Repository: onyx-dot-app/onyx
Branch: main
Commit: d4a96d70f32f
Files: 4225
Total size: 25.4 MB

Directory structure:
gitextract_2880u6lc/

├── .git-blame-ignore-revs
├── .github/
│   ├── CODEOWNERS
│   ├── actionlint.yml
│   ├── actions/
│   │   ├── build-backend-image/
│   │   │   └── action.yml
│   │   ├── build-integration-image/
│   │   │   └── action.yml
│   │   ├── build-model-server-image/
│   │   │   └── action.yml
│   │   ├── run-nightly-provider-chat-test/
│   │   │   └── action.yml
│   │   ├── setup-playwright/
│   │   │   └── action.yml
│   │   ├── setup-python-and-install-dependencies/
│   │   │   └── action.yml
│   │   └── slack-notify/
│   │       ├── action.yml
│   │       └── user-mappings.json
│   ├── dependabot.yml
│   ├── pull_request_template.md
│   ├── runs-on.yml
│   └── workflows/
│       ├── deployment.yml
│       ├── docker-tag-beta.yml
│       ├── docker-tag-latest.yml
│       ├── helm-chart-releases.yml
│       ├── merge-group.yml
│       ├── nightly-close-stale-issues.yml
│       ├── nightly-llm-provider-chat.yml
│       ├── post-merge-beta-cherry-pick.yml
│       ├── pr-database-tests.yml
│       ├── pr-desktop-build.yml
│       ├── pr-external-dependency-unit-tests.yml
│       ├── pr-golang-tests.yml
│       ├── pr-helm-chart-testing.yml
│       ├── pr-integration-tests.yml
│       ├── pr-jest-tests.yml
│       ├── pr-labeler.yml
│       ├── pr-linear-check.yml
│       ├── pr-playwright-tests.yml
│       ├── pr-python-checks.yml
│       ├── pr-python-connector-tests.yml
│       ├── pr-python-model-tests.yml
│       ├── pr-python-tests.yml
│       ├── pr-quality-checks.yml
│       ├── preview.yml
│       ├── release-cli.yml
│       ├── release-devtools.yml
│       ├── reusable-nightly-llm-provider-chat.yml
│       ├── sandbox-deployment.yml
│       ├── storybook-deploy.yml
│       ├── sync_foss.yml
│       ├── tag-nightly.yml
│       └── zizmor.yml
├── .gitignore
├── .greptile/
│   ├── config.json
│   ├── files.json
│   └── rules.md
├── .pre-commit-config.yaml
├── .prettierignore
├── .vscode/
│   ├── env.web_template.txt
│   ├── env_template.txt
│   ├── launch.json
│   └── tasks.template.jsonc
├── AGENTS.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── backend/
│   ├── .dockerignore
│   ├── .gitignore
│   ├── .trivyignore
│   ├── Dockerfile
│   ├── Dockerfile.model_server
│   ├── alembic/
│   │   ├── README.md
│   │   ├── env.py
│   │   ├── run_multitenant_migrations.py
│   │   ├── script.py.mako
│   │   └── versions/
│   │       ├── 01f8e6d95a33_populate_flow_mapping_data.py
│   │       ├── 027381bce97c_add_shortcut_option_for_users.py
│   │       ├── 03bf8be6b53a_rework_kg_config.py
│   │       ├── 03d085c5c38d_backfill_account_type.py
│   │       ├── 03d710ccf29c_add_permission_sync_attempt_tables.py
│   │       ├── 0568ccf46a6b_add_thread_specific_model_selection.py
│   │       ├── 05c07bf07c00_add_search_doc_relevance_details.py
│   │       ├── 07b98176f1de_code_interpreter_seed.py
│   │       ├── 0816326d83aa_add_federated_connector_tables.py
│   │       ├── 08a1eda20fe1_add_earliest_indexing_to_connector.py
│   │       ├── 09995b8811eb_add_theme_preference_to_user.py
│   │       ├── 0a2b51deb0b8_add_starter_prompts.py
│   │       ├── 0a98909f2757_enable_encrypted_fields.py
│   │       ├── 0bb4558f35df_add_scim_username_to_scim_user_mapping.py
│   │       ├── 0cd424f32b1d_user_file_data_preparation_and_backfill.py
│   │       ├── 0ebb1d516877_add_ccpair_deletion_failure_message.py
│   │       ├── 0f7ff6d75b57_add_index_to_index_attempt_time_created.py
│   │       ├── 114a638452db_add_default_app_mode_to_user.py
│   │       ├── 12635f6655b7_drive_canonical_ids.py
│   │       ├── 15326fcec57e_introduce_onyx_apis.py
│   │       ├── 16c37a30adf2_user_file_relationship_migration.py
│   │       ├── 173cae5bba26_port_config_store.py
│   │       ├── 175ea04c7087_add_user_preferences.py
│   │       ├── 177de57c21c9_display_custom_llm_models.py
│   │       ├── 18b5b2524446_add_is_clarification_to_chat_message.py
│   │       ├── 19c0ccb01687_migrate_to_contextual_rag_model.py
│   │       ├── 1a03d2c2856b_add_indexes_to_document__tag.py
│   │       ├── 1b10e1fda030_add_additional_data_to_notifications.py
│   │       ├── 1b8206b29c5d_add_user_delete_cascades.py
│   │       ├── 1d78c0ca7853_remove_voice_provider_deleted_column.py
│   │       ├── 1f2a3b4c5d6e_add_internet_search_and_content_providers.py
│   │       ├── 1f60f60c3401_embedding_model_search_settings.py
│   │       ├── 2020d417ec84_single_onyx_craft_migration.py
│   │       ├── 213fd978c6d8_notifications.py
│   │       ├── 238b84885828_add_foreign_key_to_user__external_user_.py
│   │       ├── 23957775e5f5_remove_feedback_foreignkey_constraint.py
│   │       ├── 25a5501dc766_group_permissions_phase1.py
│   │       ├── 2664261bfaab_add_cache_store_table.py
│   │       ├── 2666d766cb9b_google_oauth2.py
│   │       ├── 26b931506ecb_default_chosen_assistants_to_none.py
│   │       ├── 27c6ecc08586_permission_framework.py
│   │       ├── 27fb147a843f_add_timestamps_to_user_table.py
│   │       ├── 2955778aa44c_add_chunk_count_to_document.py
│   │       ├── 2a391f840e85_add_last_refreshed_at_mcp_server.py
│   │       ├── 2acdef638fc2_add_switchover_type_field.py
│   │       ├── 2b75d0a8ffcb_user_file_schema_cleanup.py
│   │       ├── 2b90f3af54b8_usage_limits.py
│   │       ├── 2c2430828bdf_add_unique_constraint_to_inputprompt_.py
│   │       ├── 2cdeff6d8c93_set_built_in_to_default.py
│   │       ├── 2d2304e27d8c_add_above_below_to_persona.py
│   │       ├── 2daa494a0851_add_group_sync_time.py
│   │       ├── 2f80c6a2550f_add_chat_session_specific_temperature_.py
│   │       ├── 2f95e36923e6_add_indexing_coordination.py
│   │       ├── 30c1d5744104_persona_datetime_aware.py
│   │       ├── 325975216eb3_add_icon_color_and_icon_shape_to_persona.py
│   │       ├── 33cb72ea4d80_single_tool_call_per_message.py
│   │       ├── 33ea50e88f24_foreign_key_input_prompts.py
│   │       ├── 351faebd379d_add_curator_fields.py
│   │       ├── 35e518e0ddf4_properly_cascade.py
│   │       ├── 35e6853a51d5_server_default_chosen_assistants.py
│   │       ├── 369644546676_add_composite_index_for_index_attempt_.py
│   │       ├── 36e9220ab794_update_kg_trigger_functions.py
│   │       ├── 3781a5eb12cb_add_chunk_stats_table.py
│   │       ├── 3879338f8ba1_add_tool_table.py
│   │       ├── 38eda64af7fe_add_chat_session_sharing.py
│   │       ├── 3934b1bc7b62_update_github_connector_repo_name_to_.py
│   │       ├── 3a7802814195_add_alternate_assistant_to_chat_message.py
│   │       ├── 3a78dba1080a_user_file_legacy_data_cleanup.py
│   │       ├── 3b25685ff73c_move_is_public_to_cc_pair.py
│   │       ├── 3bd4c84fe72f_improved_index.py
│   │       ├── 3c5e35aa9af0_polling_document_count.py
│   │       ├── 3c6531f32351_add_back_input_prompts.py
│   │       ├── 3c9a65f1207f_seed_exa_provider_from_env.py
│   │       ├── 3d1cca026fe8_add_oauth_config_and_user_tokens.py
│   │       ├── 3fc5d75723b3_add_doc_metadata_field_in_document_model.py
│   │       ├── 401c1ac29467_add_tables_for_ui_based_llm_.py
│   │       ├── 40926a4dab77_reset_userfile_document_id_migrated_.py
│   │       ├── 41fa44bef321_remove_default_prompt_shortcuts.py
│   │       ├── 43cbbb3f5e6a_rename_index_origin_to_index_recursively.py
│   │       ├── 44f856ae2a4a_add_cloud_embedding_model.py
│   │       ├── 4505fd7302e1_added_is_internet_to_dbdoc.py
│   │       ├── 465f78d9b7f9_larger_access_tokens_for_oauth.py
│   │       ├── 46625e4745d4_remove_native_enum.py
│   │       ├── 46b7a812670f_fix_user__external_user_group_id_fk.py
│   │       ├── 4738e4b3bae1_pg_file_store.py
│   │       ├── 473a1a7ca408_add_display_model_names_to_llm_provider.py
│   │       ├── 47433d30de82_create_indexattempt_table.py
│   │       ├── 475fcefe8826_add_name_to_api_key.py
│   │       ├── 4794bc13e484_update_prompt_length.py
│   │       ├── 47a07e1a38f1_fix_invalid_model_configurations_state.py
│   │       ├── 47e5bef3a1d7_add_persona_categories.py
│   │       ├── 48d14957fe80_add_support_for_custom_tools.py
│   │       ├── 495cb26ce93e_create_knowlege_graph_tables.py
│   │       ├── 4a1e4b1c89d2_add_indexing_to_userfilestatus.py
│   │       ├── 4a951134c801_moved_status_to_connector_credential_.py
│   │       ├── 4b08d97e175a_change_default_prune_freq.py
│   │       ├── 4cebcbc9b2ae_add_tab_index_to_tool_call.py
│   │       ├── 4d58345da04a_lowercase_user_emails.py
│   │       ├── 4ea2c93919c1_add_type_to_credentials.py
│   │       ├── 4ee1287bd26a_add_multiple_slack_bot_support.py
│   │       ├── 4f8a2b3c1d9e_add_open_url_tool.py
│   │       ├── 503883791c39_add_effective_permissions.py
│   │       ├── 505c488f6662_merge_default_assistants_into_unified.py
│   │       ├── 50b683a8295c_add_additional_retrieval_controls_to_.py
│   │       ├── 52a219fb5233_add_last_synced_and_last_modified_to_document_table.py
│   │       ├── 54a74a0417fc_danswerbot_onyxbot.py
│   │       ├── 55546a7967ee_assistant_rework.py
│   │       ├── 570282d33c49_track_onyxbot_explicitly.py
│   │       ├── 57122d037335_add_python_tool_on_default.py
│   │       ├── 57b53544726e_add_document_set_tables.py
│   │       ├── 5809c0787398_add_chat_sessions.py
│   │       ├── 58c50ef19f08_add_stale_column_to_user__external_user_.py
│   │       ├── 5ae8240accb3_add_research_agent_database_tables_and_.py
│   │       ├── 5b29123cd710_nullable_search_settings_for_historic_.py
│   │       ├── 5c3dca366b35_backend_driven_notification_details.py
│   │       ├── 5c448911b12f_add_content_type_to_userfile.py
│   │       ├── 5c7fdadae813_match_any_keywords_flag_for_standard_.py
│   │       ├── 5d12a446f5c0_add_api_version_and_deployment_name_to_.py
│   │       ├── 5e1c073d48a3_add_personal_access_token_table.py
│   │       ├── 5e6f7a8b9c0d_update_default_persona_prompt.py
│   │       ├── 5e84129c8be3_add_docs_indexed_column_to_index_.py
│   │       ├── 5f4b8568a221_add_removed_documents_to_index_attempt.py
│   │       ├── 5fc1f54cc252_hybrid_enum.py
│   │       ├── 61ff3651add4_add_permission_syncing.py
│   │       ├── 62c3a055a141_add_file_names_to_file_connector_config.py
│   │       ├── 631fd2504136_add_approx_chunk_count_in_vespa_to_.py
│   │       ├── 6436661d5b65_add_created_at_in_project_userfile.py
│   │       ├── 643a84a42a33_add_user_configured_names_to_llmprovider.py
│   │       ├── 64bd5677aeb6_add_image_input_support_to_model_config.py
│   │       ├── 65bc6e0f8500_remove_kg_subtype_from_db.py
│   │       ├── 6756efa39ada_id_uuid_for_chat_session.py
│   │       ├── 689433b0d8de_add_hook_and_hook_execution_log_tables.py
│   │       ├── 699221885109_nullify_default_task_prompt.py
│   │       ├── 6a804aeb4830_duplicated_no_harm_user_file_migration.py
│   │       ├── 6b3b4083c5aa_persona_cleanup_and_featured.py
│   │       ├── 6d387b3196c2_basic_auth.py
│   │       ├── 6d562f86c78b_remove_default_bot.py
│   │       ├── 6f4f86aef280_add_queries_and_is_web_fetch_to_.py
│   │       ├── 6fc7886d665d_make_categories_labels_and_many_to_many.py
│   │       ├── 703313b75876_add_tokenratelimit_tables.py
│   │       ├── 70f00c45c0f2_more_descriptive_filestore.py
│   │       ├── 7206234e012a_add_image_generation_config_table.py
│   │       ├── 72aa7de2e5cf_make_processing_mode_default_all_caps.py
│   │       ├── 72bdc9929a46_permission_auto_sync_framework.py
│   │       ├── 73e9983e5091_add_search_query_table.py
│   │       ├── 7477a5f5d728_added_model_defaults_for_users.py
│   │       ├── 7547d982db8f_chat_folders.py
│   │       ├── 7616121f6e97_add_enterprise_fields_to_scim_user_mapping.py
│   │       ├── 767f1c2a00eb_count_chat_tokens.py
│   │       ├── 76b60d407dfb_cc_pair_name_not_unique.py
│   │       ├── 776b3bbe9092_remove_remaining_enums.py
│   │       ├── 77d07dffae64_forcibly_remove_more_enum_types_from_.py
│   │       ├── 78dbe7e38469_task_tracking.py
│   │       ├── 78ebc66946a0_remove_reranking_from_search_settings.py
│   │       ├── 795b20b85b4b_add_llm_group_permissions_control.py
│   │       ├── 797089dfb4d2_persona_start_date.py
│   │       ├── 79acd316403a_add_api_key_table.py
│   │       ├── 7a70b7664e37_add_model_configuration_table.py
│   │       ├── 7aea705850d5_added_slack_auto_filter.py
│   │       ├── 7b9b952abdf6_update_entities.py
│   │       ├── 7bd55f264e1b_add_display_name_to_model_configuration.py
│   │       ├── 7cb492013621_code_interpreter_server_model.py
│   │       ├── 7cc3fcc116c1_user_file_uuid_primary_key_swap.py
│   │       ├── 7ccea01261f6_store_chat_retrieval_docs.py
│   │       ├── 7da0ae5ad583_add_description_to_persona.py
│   │       ├── 7da543f5672f_add_slackbotconfig_table.py
│   │       ├── 7e490836d179_nullify_default_system_prompt.py
│   │       ├── 7ed603b64d5a_add_mcp_server_and_connection_config_.py
│   │       ├── 7f726bad5367_slack_followup.py
│   │       ├── 7f99be1cb9f5_add_index_for_getting_documents_just_by_.py
│   │       ├── 800f48024ae9_add_id_to_connectorcredentialpair.py
│   │       ├── 80696cf850ae_add_chat_session_to_query_event.py
│   │       ├── 8188861f4e92_csv_to_tabular_chat_file_type.py
│   │       ├── 81c22b1e2e78_hierarchy_nodes_v1.py
│   │       ├── 8405ca81cc83_notifications_constraint.py
│   │       ├── 849b21c732f8_add_demo_data_enabled_to_build_session.py
│   │       ├── 87c52ec39f84_update_default_system_prompt.py
│   │       ├── 8818cf73fa1a_drop_include_citations.py
│   │       ├── 891cd83c87a8_add_is_visible_to_persona.py
│   │       ├── 8987770549c0_add_full_exception_stack_trace.py
│   │       ├── 8a87bd6ec550_associate_index_attempts_with_ccpair.py
│   │       ├── 8aabb57f3b49_restructure_document_indices.py
│   │       ├── 8b5ce697290e_add_discord_bot_tables.py
│   │       ├── 8e1ac4f39a9f_enable_contextual_retrieval.py
│   │       ├── 8e26726b7683_chat_context_addition.py
│   │       ├── 8f43500ee275_add_index.py
│   │       ├── 8ffcc2bcfc11_add_needs_persona_sync_to_user_file.py
│   │       ├── 904451035c9b_store_tool_details.py
│   │       ├── 904e5138fffb_tags.py
│   │       ├── 9087b548dd69_seed_default_image_gen_config.py
│   │       ├── 90b409d06e50_add_chat_compression_fields.py
│   │       ├── 90e3b9af7da4_tag_fix.py
│   │       ├── 91a0a4d62b14_milestone.py
│   │       ├── 91fd3b470d1a_remove_documentsource_from_tag.py
│   │       ├── 91ffac7e65b3_add_expiry_time.py
│   │       ├── 93560ba1b118_add_web_ui_option_to_slack_config.py
│   │       ├── 93a2e195e25c_add_voice_provider_and_user_voice_prefs.py
│   │       ├── 93c15d6a6fbb_add_chunk_error_and_vespa_count_columns_.py
│   │       ├── 949b4a92a401_remove_rt.py
│   │       ├── 94dc3d0236f8_make_document_set_description_optional.py
│   │       ├── 96a5702df6aa_mcp_tool_enabled.py
│   │       ├── 977e834c1427_seed_default_groups.py
│   │       ├── 97dbb53fa8c8_add_syncrecord.py
│   │       ├── 98a5008d8711_agent_tracking.py
│   │       ├── 9a0296d7421e_add_is_auto_mode_to_llm_provider.py
│   │       ├── 9aadf32dfeb4_add_user_files.py
│   │       ├── 9b66d3156fc6_user_file_schema_additions.py
│   │       ├── 9c00a2bccb83_chat_message_agentic.py
│   │       ├── 9c54986124c6_add_scim_tables.py
│   │       ├── 9cf5c00f72fe_add_creator_to_cc_pair.py
│   │       ├── 9d1543a37106_add_processing_duration_seconds_to_chat_.py
│   │       ├── 9d97fecfab7f_added_retrieved_docs_to_query_event.py
│   │       ├── 9drpiiw74ljy_add_config_to_federated_connector.py
│   │       ├── 9f696734098f_combine_search_and_chat.py
│   │       ├── a01bf2971c5d_update_default_tool_descriptions.py
│   │       ├── a1b2c3d4e5f6_add_license_table.py
│   │       ├── a1b2c3d4e5f7_drop_agent_search_metrics_table.py
│   │       ├── a2b3c4d5e6f7_remove_fast_default_model_name.py
│   │       ├── a3795dce87be_migration_confluence_to_be_explicit.py
│   │       ├── a3b8d9e2f1c4_make_scim_external_id_nullable.py
│   │       ├── a3bfd0d64902_add_chosen_assistants_to_user_table.py
│   │       ├── a3c1a7904cd0_remove_userfile_related_deprecated_.py
│   │       ├── a3f8b2c1d4e5_add_preferred_response_id_to_chat_message.py
│   │       ├── a4f23d6b71c8_add_llm_provider_persona_restrictions.py
│   │       ├── a570b80a5f20_usergroup_tables.py
│   │       ├── a6df6b88ef81_remove_recent_assistants.py
│   │       ├── a7688ab35c45_add_public_external_user_group_table.py
│   │       ├── a852cbe15577_new_chat_history.py
│   │       ├── a8c2065484e6_add_auto_scroll_to_user_model.py
│   │       ├── abbfec3a5ac5_merge_prompt_into_persona.py
│   │       ├── abe7378b8217_add_indexing_trigger_to_cc_pair.py
│   │       ├── ac5eaac849f9_add_last_pruned_to_connector_table.py
│   │       ├── acaab4ef4507_remove_inactive_ccpair_status_on_.py
│   │       ├── ae62505e3acc_add_saml_accounts.py
│   │       ├── aeda5f2df4f6_add_pinned_assistants.py
│   │       ├── b082fec533f0_make_last_attempt_status_nullable.py
│   │       ├── b156fa702355_chat_reworked.py
│   │       ├── b30353be4eec_add_mcp_auth_performer.py
│   │       ├── b329d00a9ea6_adding_assistant_specific_user_.py
│   │       ├── b388730a2899_nullable_preferences.py
│   │       ├── b4b7e1028dfd_grant_basic_to_existing_groups.py
│   │       ├── b4ef3ae0bf6e_add_user_oauth_token_to_slack_bot.py
│   │       ├── b51c6844d1df_seed_memory_tool.py
│   │       ├── b558f51620b4_pause_finished_user_file_connectors.py
│   │       ├── b5c4d7e8f9a1_add_hierarchy_node_cc_pair_table.py
│   │       ├── b728689f45b1_rename_persona_is_visible_to_is_listed_.py
│   │       ├── b72ed7a5db0e_remove_description_from_starter_messages.py
│   │       ├── b7a7eee5aa15_add_checkpointing_failure_handling.py
│   │       ├── b7bcc991d722_assign_users_to_default_groups.py
│   │       ├── b7c2b63c4a03_add_background_reindex_enabled_field.py
│   │       ├── b7ec9b5b505f_adjust_prompt_length.py
│   │       ├── b85f02ec1308_fix_file_type_migration.py
│   │       ├── b896bbd0d5a7_backfill_is_internet_data_to_false.py
│   │       ├── b8c9d0e1f2a3_drop_milestone_table.py
│   │       ├── ba98eba0f66a_add_support_for_litellm_proxy_in_.py
│   │       ├── baf71f781b9e_add_llm_model_version_override_to_.py
│   │       ├── bc9771dccadf_create_usage_reports_table.py
│   │       ├── bceb1e139447_add_base_url_to_cloudembeddingprovider.py
│   │       ├── bd2921608c3a_non_nullable_default_persona.py
│   │       ├── bd7c3bf8beba_migrate_agent_responses_to_research_.py
│   │       ├── be2ab2aa50ee_fix_capitalization.py
│   │       ├── be87a654d5af_persona_new_default_model_configuration_.py
│   │       ├── bf7a81109301_delete_input_prompts.py
│   │       ├── c0aab6edb6dd_delete_workspace.py
│   │       ├── c0c937d5c9e5_llm_provider_deprecate_fields.py
│   │       ├── c0fd6e4da83a_add_recent_assistants.py
│   │       ├── c18cdf4b497e_add_standard_answer_tables.py
│   │       ├── c1d2e3f4a5b6_add_deep_research_tool.py
│   │       ├── c5b692fa265c_add_index_attempt_errors_table.py
│   │       ├── c5eae4a75a1b_add_chat_message__standard_answer_table.py
│   │       ├── c7bf5721733e_add_has_been_indexed_to_.py
│   │       ├── c7e9f4a3b2d1_add_python_tool.py
│   │       ├── c7f2e1b4a9d3_add_sharing_scope_to_build_session.py
│   │       ├── c8a93a2af083_personalization_user_info.py
│   │       ├── c99d76fcd298_add_nullable_to_persona_id_in_chat_.py
│   │       ├── c9e2cd766c29_add_s3_file_store_table.py
│   │       ├── ca04500b9ee8_add_cascade_deletes_to_agent_tables.py
│   │       ├── cbc03e08d0f3_add_opensearch_migration_tables.py
│   │       ├── cec7ec36c505_kgentity_parent.py
│   │       ├── cf90764725d8_larger_refresh_tokens.py
│   │       ├── d09fc20a3c66_seed_builtin_tools.py
│   │       ├── d1b637d7050a_sync_exa_api_key_to_content_provider.py
│   │       ├── d25168c2beee_tool_name_consistency.py
│   │       ├── d3fd499c829c_add_file_reader_tool.py
│   │       ├── d5645c915d0e_remove_deletion_attempt_table.py
│   │       ├── d56ffa94ca32_add_file_content.py
│   │       ├── d5c86e2c6dc6_add_cascade_delete_to_search_query_user_.py
│   │       ├── d61e513bef0a_add_total_docs_for_index_attempt.py
│   │       ├── d7111c1238cd_remove_document_ids.py
│   │       ├── d716b0791ddd_combined_slack_id_fields.py
│   │       ├── d8cdfee5df80_add_skipped_to_userfilestatus.py
│   │       ├── d929f0c1c6af_feedback_feature.py
│   │       ├── d961aca62eb3_update_status_length.py
│   │       ├── d9ec13955951_remove__dim_suffix_from_model_name.py
│   │       ├── da42808081e3_migrate_jira_connectors_to_new_format.py
│   │       ├── da4c21c69164_chosen_assistants_changed_to_jsonb.py
│   │       ├── dab04867cd88_add_composite_index_to_document_by_.py
│   │       ├── dba7f71618f5_onyx_custom_tool_flow.py
│   │       ├── dbaa756c2ccf_embedding_models.py
│   │       ├── df0c7ad8a076_added_deletion_attempt_table.py
│   │       ├── df46c75b714e_add_default_vision_provider_to_llm_.py
│   │       ├── dfbe9e93d3c7_extended_role_for_non_web.py
│   │       ├── e0a68a81d434_add_chat_feedback.py
│   │       ├── e1392f05e840_added_input_prompts.py
│   │       ├── e209dc5a8156_added_prune_frequency.py
│   │       ├── e4334d5b33ba_add_deployment_name_to_llmprovider.py
│   │       ├── e50154680a5c_no_source_enum.py
│   │       ├── e6a4bbc13fe4_add_index_for_retrieving_latest_index_.py
│   │       ├── e7f8a9b0c1d2_create_anonymous_user.py
│   │       ├── e86866a9c78a_add_persona_to_chat_session.py
│   │       ├── e8f0d2a38171_add_status_to_mcp_server_and_make_auth_.py
│   │       ├── e91df4e935ef_private_personas_documentsets.py
│   │       ├── eaa3b5593925_add_default_slack_channel_config.py
│   │       ├── ec3ec2eabf7b_index_from_beginning.py
│   │       ├── ec85f2b3c544_remove_last_attempt_status_from_cc_pair.py
│   │       ├── ecab2b3f1a3b_add_overrides_to_the_chat_session.py
│   │       ├── ed9e44312505_add_icon_name_field.py
│   │       ├── ee3f4b47fad5_added_alternate_model_to_chat_message.py
│   │       ├── ef7da92f7213_add_files_to_chatmessage.py
│   │       ├── efb35676026c_standard_answer_match_regex_flag.py
│   │       ├── f11b408e39d3_force_lowercase_all_users.py
│   │       ├── f13db29f3101_add_composite_index_for_last_modified_.py
│   │       ├── f17bf3b0d9f1_embedding_provider_by_provider_type.py
│   │       ├── f1c6478c3fd8_add_pre_defined_feedback.py
│   │       ├── f1ca58b2f2ec_add_passthrough_auth_to_tool.py
│   │       ├── f220515df7b4_add_flow_mapping_table.py
│   │       ├── f32615f71aeb_add_custom_headers_to_tools.py
│   │       ├── f39c5794c10a_add_background_errors_table.py
│   │       ├── f5437cc136c5_delete_non_search_assistants.py
│   │       ├── f71470ba9274_add_prompt_length_limit.py
│   │       ├── f7505c5b0284_updated_constraints_for_ccpairs.py
│   │       ├── f7a894b06d02_non_nullbale_slack_bot_id_in_channel_.py
│   │       ├── f7ca3e2f45d9_migrate_no_auth_data_to_placeholder.py
│   │       ├── f7e58d357687_add_has_web_column_to_user.py
│   │       ├── f8a9b2c3d4e5_add_research_answer_purpose_to_chat_message.py
│   │       ├── f9b8c7d6e5a4_update_parent_question_id_foreign_key_to_research_agent_iteration.py
│   │       ├── fad14119fb92_delete_tags_with_wrong_enum.py
│   │       ├── fb80bdd256de_add_chat_background_to_user.py
│   │       ├── fcd135795f21_add_slack_bot_display_type.py
│   │       ├── febe9eaa0644_add_document_set_persona_relationship_.py
│   │       ├── fec3db967bf7_add_time_updated_to_usergroup_and_.py
│   │       ├── feead2911109_add_opensearch_tenant_migration_columns.py
│   │       └── ffc707a226b4_basic_document_metadata.py
│   ├── alembic.ini
│   ├── alembic_tenants/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── env.py
│   │   ├── script.py.mako
│   │   └── versions/
│   │       ├── 14a83a331951_create_usertenantmapping_table.py
│   │       ├── 34e3630c7f32_lowercase_multi_tenant_user_auth.py
│   │       ├── 3b45e0018bf1_add_new_available_tenant_table.py
│   │       ├── 3b9f09038764_add_read_only_kg_user.py
│   │       ├── a4f6ee863c47_mapping_for_anonymous_user_path.py
│   │       └── ac842f85f932_new_column_user_tenant_mapping.py
│   ├── assets/
│   │   └── .gitignore
│   ├── ee/
│   │   ├── LICENSE
│   │   ├── __init__.py
│   │   └── onyx/
│   │       ├── __init__.py
│   │       ├── access/
│   │       │   ├── access.py
│   │       │   └── hierarchy_access.py
│   │       ├── auth/
│   │       │   ├── __init__.py
│   │       │   └── users.py
│   │       ├── background/
│   │       │   ├── celery/
│   │       │   │   ├── apps/
│   │       │   │   │   ├── heavy.py
│   │       │   │   │   ├── light.py
│   │       │   │   │   ├── monitoring.py
│   │       │   │   │   └── primary.py
│   │       │   │   └── tasks/
│   │       │   │       ├── beat_schedule.py
│   │       │   │       ├── cleanup/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   └── tasks.py
│   │       │   │       ├── cloud/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   └── tasks.py
│   │       │   │       ├── doc_permission_syncing/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   └── tasks.py
│   │       │   │       ├── external_group_syncing/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   ├── group_sync_utils.py
│   │       │   │       │   └── tasks.py
│   │       │   │       ├── hooks/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   └── tasks.py
│   │       │   │       ├── query_history/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   └── tasks.py
│   │       │   │       ├── tenant_provisioning/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   └── tasks.py
│   │       │   │       ├── ttl_management/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   └── tasks.py
│   │       │   │       ├── usage_reporting/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   └── tasks.py
│   │       │   │       └── vespa/
│   │       │   │           ├── __init__.py
│   │       │   │           └── tasks.py
│   │       │   ├── celery_utils.py
│   │       │   └── task_name_builders.py
│   │       ├── configs/
│   │       │   ├── __init__.py
│   │       │   ├── app_configs.py
│   │       │   └── license_enforcement_config.py
│   │       ├── connectors/
│   │       │   └── perm_sync_valid.py
│   │       ├── db/
│   │       │   ├── __init__.py
│   │       │   ├── analytics.py
│   │       │   ├── connector.py
│   │       │   ├── connector_credential_pair.py
│   │       │   ├── document.py
│   │       │   ├── document_set.py
│   │       │   ├── external_perm.py
│   │       │   ├── hierarchy.py
│   │       │   ├── license.py
│   │       │   ├── persona.py
│   │       │   ├── query_history.py
│   │       │   ├── saml.py
│   │       │   ├── scim.py
│   │       │   ├── search.py
│   │       │   ├── standard_answer.py
│   │       │   ├── token_limit.py
│   │       │   ├── usage_export.py
│   │       │   └── user_group.py
│   │       ├── document_index/
│   │       │   └── vespa/
│   │       │       └── app_config/
│   │       │           └── cloud-services.xml.jinja
│   │       ├── external_permissions/
│   │       │   ├── __init__.py
│   │       │   ├── confluence/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── constants.py
│   │       │   │   ├── doc_sync.py
│   │       │   │   ├── group_sync.py
│   │       │   │   ├── page_access.py
│   │       │   │   └── space_access.py
│   │       │   ├── github/
│   │       │   │   ├── doc_sync.py
│   │       │   │   ├── group_sync.py
│   │       │   │   └── utils.py
│   │       │   ├── gmail/
│   │       │   │   └── doc_sync.py
│   │       │   ├── google_drive/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── doc_sync.py
│   │       │   │   ├── folder_retrieval.py
│   │       │   │   ├── group_sync.py
│   │       │   │   ├── models.py
│   │       │   │   └── permission_retrieval.py
│   │       │   ├── jira/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── doc_sync.py
│   │       │   │   ├── group_sync.py
│   │       │   │   ├── models.py
│   │       │   │   └── page_access.py
│   │       │   ├── perm_sync_types.py
│   │       │   ├── post_query_censoring.py
│   │       │   ├── salesforce/
│   │       │   │   ├── postprocessing.py
│   │       │   │   └── utils.py
│   │       │   ├── sharepoint/
│   │       │   │   ├── doc_sync.py
│   │       │   │   ├── group_sync.py
│   │       │   │   └── permission_utils.py
│   │       │   ├── slack/
│   │       │   │   ├── channel_access.py
│   │       │   │   ├── doc_sync.py
│   │       │   │   ├── group_sync.py
│   │       │   │   └── utils.py
│   │       │   ├── sync_params.py
│   │       │   ├── teams/
│   │       │   │   └── doc_sync.py
│   │       │   └── utils.py
│   │       ├── feature_flags/
│   │       │   ├── __init__.py
│   │       │   ├── factory.py
│   │       │   └── posthog_provider.py
│   │       ├── hooks/
│   │       │   ├── __init__.py
│   │       │   └── executor.py
│   │       ├── main.py
│   │       ├── onyxbot/
│   │       │   └── slack/
│   │       │       └── handlers/
│   │       │           ├── __init__.py
│   │       │           └── handle_standard_answers.py
│   │       ├── prompts/
│   │       │   ├── __init__.py
│   │       │   ├── query_expansion.py
│   │       │   └── search_flow_classification.py
│   │       ├── search/
│   │       │   └── process_search_query.py
│   │       ├── secondary_llm_flows/
│   │       │   ├── __init__.py
│   │       │   ├── query_expansion.py
│   │       │   └── search_flow_classification.py
│   │       ├── server/
│   │       │   ├── __init__.py
│   │       │   ├── analytics/
│   │       │   │   └── api.py
│   │       │   ├── auth_check.py
│   │       │   ├── billing/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── api.py
│   │       │   │   ├── models.py
│   │       │   │   └── service.py
│   │       │   ├── documents/
│   │       │   │   └── cc_pair.py
│   │       │   ├── enterprise_settings/
│   │       │   │   ├── api.py
│   │       │   │   ├── models.py
│   │       │   │   └── store.py
│   │       │   ├── evals/
│   │       │   │   ├── __init__.py
│   │       │   │   └── api.py
│   │       │   ├── features/
│   │       │   │   ├── __init__.py
│   │       │   │   └── hooks/
│   │       │   │       ├── __init__.py
│   │       │   │       └── api.py
│   │       │   ├── license/
│   │       │   │   ├── api.py
│   │       │   │   └── models.py
│   │       │   ├── manage/
│   │       │   │   └── standard_answer.py
│   │       │   ├── middleware/
│   │       │   │   ├── license_enforcement.py
│   │       │   │   └── tenant_tracking.py
│   │       │   ├── oauth/
│   │       │   │   ├── api.py
│   │       │   │   ├── api_router.py
│   │       │   │   ├── confluence_cloud.py
│   │       │   │   ├── google_drive.py
│   │       │   │   └── slack.py
│   │       │   ├── query_and_chat/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── models.py
│   │       │   │   ├── query_backend.py
│   │       │   │   ├── search_backend.py
│   │       │   │   ├── streaming_models.py
│   │       │   │   └── token_limit.py
│   │       │   ├── query_history/
│   │       │   │   ├── api.py
│   │       │   │   └── models.py
│   │       │   ├── reporting/
│   │       │   │   ├── usage_export_api.py
│   │       │   │   ├── usage_export_generation.py
│   │       │   │   └── usage_export_models.py
│   │       │   ├── scim/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── api.py
│   │       │   │   ├── auth.py
│   │       │   │   ├── filtering.py
│   │       │   │   ├── models.py
│   │       │   │   ├── patch.py
│   │       │   │   ├── providers/
│   │       │   │   │   ├── __init__.py
│   │       │   │   │   ├── base.py
│   │       │   │   │   ├── entra.py
│   │       │   │   │   └── okta.py
│   │       │   │   └── schema_definitions.py
│   │       │   ├── seeding.py
│   │       │   ├── settings/
│   │       │   │   ├── __init__.py
│   │       │   │   └── api.py
│   │       │   ├── tenant_usage_limits.py
│   │       │   ├── tenants/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── access.py
│   │       │   │   ├── admin_api.py
│   │       │   │   ├── anonymous_user_path.py
│   │       │   │   ├── anonymous_users_api.py
│   │       │   │   ├── api.py
│   │       │   │   ├── billing.py
│   │       │   │   ├── billing_api.py
│   │       │   │   ├── models.py
│   │       │   │   ├── product_gating.py
│   │       │   │   ├── provisioning.py
│   │       │   │   ├── proxy.py
│   │       │   │   ├── schema_management.py
│   │       │   │   ├── team_membership_api.py
│   │       │   │   ├── tenant_management_api.py
│   │       │   │   ├── user_invitations_api.py
│   │       │   │   └── user_mapping.py
│   │       │   ├── token_rate_limits/
│   │       │   │   └── api.py
│   │       │   ├── usage_limits.py
│   │       │   └── user_group/
│   │       │       ├── api.py
│   │       │       └── models.py
│   │       └── utils/
│   │           ├── __init__.py
│   │           ├── encryption.py
│   │           ├── license.py
│   │           ├── posthog_client.py
│   │           └── telemetry.py
│   ├── generated/
│   │   └── README.md
│   ├── keys/
│   │   └── license_public_key.pem
│   ├── model_server/
│   │   ├── __init__.py
│   │   ├── constants.py
│   │   ├── encoders.py
│   │   ├── legacy/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── custom_models.py
│   │   │   ├── onyx_torch_model.py
│   │   │   └── reranker.py
│   │   ├── main.py
│   │   ├── management_endpoints.py
│   │   └── utils.py
│   ├── onyx/
│   │   ├── __init__.py
│   │   ├── access/
│   │   │   ├── __init__.py
│   │   │   ├── access.py
│   │   │   ├── hierarchy_access.py
│   │   │   ├── models.py
│   │   │   └── utils.py
│   │   ├── auth/
│   │   │   ├── __init__.py
│   │   │   ├── anonymous_user.py
│   │   │   ├── api_key.py
│   │   │   ├── captcha.py
│   │   │   ├── constants.py
│   │   │   ├── disposable_email_validator.py
│   │   │   ├── email_utils.py
│   │   │   ├── invited_users.py
│   │   │   ├── jwt.py
│   │   │   ├── oauth_refresher.py
│   │   │   ├── oauth_token_manager.py
│   │   │   ├── pat.py
│   │   │   ├── permissions.py
│   │   │   ├── schemas.py
│   │   │   ├── users.py
│   │   │   └── utils.py
│   │   ├── background/
│   │   │   ├── README.md
│   │   │   ├── celery/
│   │   │   │   ├── apps/
│   │   │   │   │   ├── app_base.py
│   │   │   │   │   ├── beat.py
│   │   │   │   │   ├── client.py
│   │   │   │   │   ├── docfetching.py
│   │   │   │   │   ├── docprocessing.py
│   │   │   │   │   ├── heavy.py
│   │   │   │   │   ├── light.py
│   │   │   │   │   ├── monitoring.py
│   │   │   │   │   ├── primary.py
│   │   │   │   │   ├── task_formatters.py
│   │   │   │   │   └── user_file_processing.py
│   │   │   │   ├── celery_k8s_probe.py
│   │   │   │   ├── celery_redis.py
│   │   │   │   ├── celery_utils.py
│   │   │   │   ├── configs/
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── beat.py
│   │   │   │   │   ├── client.py
│   │   │   │   │   ├── docfetching.py
│   │   │   │   │   ├── docprocessing.py
│   │   │   │   │   ├── heavy.py
│   │   │   │   │   ├── light.py
│   │   │   │   │   ├── monitoring.py
│   │   │   │   │   ├── primary.py
│   │   │   │   │   └── user_file_processing.py
│   │   │   │   ├── memory_monitoring.py
│   │   │   │   ├── tasks/
│   │   │   │   │   ├── beat_schedule.py
│   │   │   │   │   ├── connector_deletion/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   ├── docfetching/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── task_creation_utils.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   ├── docprocessing/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── heartbeat.py
│   │   │   │   │   │   ├── tasks.py
│   │   │   │   │   │   └── utils.py
│   │   │   │   │   ├── evals/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   ├── hierarchyfetching/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   ├── llm_model_update/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   ├── monitoring/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   ├── opensearch_migration/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── constants.py
│   │   │   │   │   │   ├── tasks.py
│   │   │   │   │   │   └── transformer.py
│   │   │   │   │   ├── periodic/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   ├── pruning/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   ├── shared/
│   │   │   │   │   │   ├── RetryDocumentIndex.py
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   ├── user_file_processing/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tasks.py
│   │   │   │   │   └── vespa/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── document_sync.py
│   │   │   │   │       └── tasks.py
│   │   │   │   └── versioned_apps/
│   │   │   │       ├── beat.py
│   │   │   │       ├── client.py
│   │   │   │       ├── docfetching.py
│   │   │   │       ├── docprocessing.py
│   │   │   │       ├── heavy.py
│   │   │   │       ├── light.py
│   │   │   │       ├── monitoring.py
│   │   │   │       ├── primary.py
│   │   │   │       └── user_file_processing.py
│   │   │   ├── error_logging.py
│   │   │   ├── indexing/
│   │   │   │   ├── checkpointing_utils.py
│   │   │   │   ├── dask_utils.py
│   │   │   │   ├── index_attempt_utils.py
│   │   │   │   ├── job_client.py
│   │   │   │   ├── memory_tracer.py
│   │   │   │   ├── models.py
│   │   │   │   └── run_docfetching.py
│   │   │   ├── periodic_poller.py
│   │   │   └── task_utils.py
│   │   ├── cache/
│   │   │   ├── factory.py
│   │   │   ├── interface.py
│   │   │   ├── postgres_backend.py
│   │   │   └── redis_backend.py
│   │   ├── chat/
│   │   │   ├── COMPRESSION.md
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── chat_processing_checker.py
│   │   │   ├── chat_state.py
│   │   │   ├── chat_utils.py
│   │   │   ├── citation_processor.py
│   │   │   ├── citation_utils.py
│   │   │   ├── compression.py
│   │   │   ├── emitter.py
│   │   │   ├── llm_loop.py
│   │   │   ├── llm_step.py
│   │   │   ├── models.py
│   │   │   ├── process_message.py
│   │   │   ├── prompt_utils.py
│   │   │   ├── save_chat.py
│   │   │   ├── stop_signal_checker.py
│   │   │   └── tool_call_args_streaming.py
│   │   ├── configs/
│   │   │   ├── __init__.py
│   │   │   ├── agent_configs.py
│   │   │   ├── app_configs.py
│   │   │   ├── chat_configs.py
│   │   │   ├── constants.py
│   │   │   ├── embedding_configs.py
│   │   │   ├── kg_configs.py
│   │   │   ├── llm_configs.py
│   │   │   ├── model_configs.py
│   │   │   ├── onyxbot_configs.py
│   │   │   ├── research_configs.py
│   │   │   ├── saml_config/
│   │   │   │   └── template.settings.json
│   │   │   └── tool_configs.py
│   │   ├── connectors/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── airtable/
│   │   │   │   └── airtable_connector.py
│   │   │   ├── asana/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── asana_api.py
│   │   │   │   └── connector.py
│   │   │   ├── axero/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── bitbucket/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── connector.py
│   │   │   │   └── utils.py
│   │   │   ├── blob/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── bookstack/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   └── connector.py
│   │   │   ├── canvas/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── access.py
│   │   │   │   ├── client.py
│   │   │   │   └── connector.py
│   │   │   ├── clickup/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── coda/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── confluence/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── access.py
│   │   │   │   ├── connector.py
│   │   │   │   ├── models.py
│   │   │   │   ├── onyx_confluence.py
│   │   │   │   ├── user_profile_override.py
│   │   │   │   └── utils.py
│   │   │   ├── connector_runner.py
│   │   │   ├── credentials_provider.py
│   │   │   ├── cross_connector_utils/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── miscellaneous_utils.py
│   │   │   │   └── rate_limit_wrapper.py
│   │   │   ├── discord/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── discourse/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── document360/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── connector.py
│   │   │   │   └── utils.py
│   │   │   ├── dropbox/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── drupal_wiki/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── connector.py
│   │   │   │   ├── models.py
│   │   │   │   └── utils.py
│   │   │   ├── egnyte/
│   │   │   │   └── connector.py
│   │   │   ├── exceptions.py
│   │   │   ├── factory.py
│   │   │   ├── file/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── fireflies/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── freshdesk/
│   │   │   │   ├── __init__,py
│   │   │   │   └── connector.py
│   │   │   ├── gitbook/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── github/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── connector.py
│   │   │   │   ├── models.py
│   │   │   │   ├── rate_limit_utils.py
│   │   │   │   └── utils.py
│   │   │   ├── gitlab/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── gmail/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── gong/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── google_drive/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── connector.py
│   │   │   │   ├── constants.py
│   │   │   │   ├── doc_conversion.py
│   │   │   │   ├── file_retrieval.py
│   │   │   │   ├── models.py
│   │   │   │   └── section_extraction.py
│   │   │   ├── google_site/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── google_utils/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── google_auth.py
│   │   │   │   ├── google_kv.py
│   │   │   │   ├── google_utils.py
│   │   │   │   ├── resources.py
│   │   │   │   └── shared_constants.py
│   │   │   ├── guru/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── highspot/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   ├── connector.py
│   │   │   │   └── utils.py
│   │   │   ├── hubspot/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── connector.py
│   │   │   │   └── rate_limit.py
│   │   │   ├── imap/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── connector.py
│   │   │   │   └── models.py
│   │   │   ├── interfaces.py
│   │   │   ├── jira/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── access.py
│   │   │   │   ├── connector.py
│   │   │   │   └── utils.py
│   │   │   ├── linear/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── loopio/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── mediawiki/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── family.py
│   │   │   │   └── wiki.py
│   │   │   ├── microsoft_graph_env.py
│   │   │   ├── mock_connector/
│   │   │   │   └── connector.py
│   │   │   ├── models.py
│   │   │   ├── notion/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── outline/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   └── connector.py
│   │   │   ├── productboard/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── registry.py
│   │   │   ├── requesttracker/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── salesforce/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── blacklist.py
│   │   │   │   ├── connector.py
│   │   │   │   ├── doc_conversion.py
│   │   │   │   ├── onyx_salesforce.py
│   │   │   │   ├── salesforce_calls.py
│   │   │   │   ├── shelve_stuff/
│   │   │   │   │   ├── old_test_salesforce_shelves.py
│   │   │   │   │   ├── shelve_functions.py
│   │   │   │   │   ├── shelve_utils.py
│   │   │   │   │   └── test_salesforce_shelves.py
│   │   │   │   ├── sqlite_functions.py
│   │   │   │   └── utils.py
│   │   │   ├── sharepoint/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── connector.py
│   │   │   │   └── connector_utils.py
│   │   │   ├── slab/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── slack/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── access.py
│   │   │   │   ├── connector.py
│   │   │   │   ├── models.py
│   │   │   │   ├── onyx_retry_handler.py
│   │   │   │   ├── onyx_slack_web_client.py
│   │   │   │   └── utils.py
│   │   │   ├── teams/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── connector.py
│   │   │   │   ├── models.py
│   │   │   │   └── utils.py
│   │   │   ├── testrail/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── web/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── wikipedia/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── xenforo/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   ├── zendesk/
│   │   │   │   ├── __init__.py
│   │   │   │   └── connector.py
│   │   │   └── zulip/
│   │   │       ├── __init__.py
│   │   │       ├── connector.py
│   │   │       ├── schemas.py
│   │   │       └── utils.py
│   │   ├── context/
│   │   │   └── search/
│   │   │       ├── __init__.py
│   │   │       ├── enums.py
│   │   │       ├── federated/
│   │   │       │   ├── models.py
│   │   │       │   ├── slack_search.py
│   │   │       │   └── slack_search_utils.py
│   │   │       ├── models.py
│   │   │       ├── pipeline.py
│   │   │       ├── preprocessing/
│   │   │       │   └── access_filters.py
│   │   │       ├── retrieval/
│   │   │       │   └── search_runner.py
│   │   │       └── utils.py
│   │   ├── db/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── _deprecated/
│   │   │   │   └── pg_file_store.py
│   │   │   ├── api_key.py
│   │   │   ├── auth.py
│   │   │   ├── background_error.py
│   │   │   ├── chat.py
│   │   │   ├── chat_search.py
│   │   │   ├── chunk.py
│   │   │   ├── code_interpreter.py
│   │   │   ├── connector.py
│   │   │   ├── connector_credential_pair.py
│   │   │   ├── constants.py
│   │   │   ├── credentials.py
│   │   │   ├── dal.py
│   │   │   ├── deletion_attempt.py
│   │   │   ├── discord_bot.py
│   │   │   ├── document.py
│   │   │   ├── document_access.py
│   │   │   ├── document_set.py
│   │   │   ├── engine/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── async_sql_engine.py
│   │   │   │   ├── connection_warmup.py
│   │   │   │   ├── iam_auth.py
│   │   │   │   ├── sql_engine.py
│   │   │   │   ├── tenant_utils.py
│   │   │   │   └── time_utils.py
│   │   │   ├── entities.py
│   │   │   ├── entity_type.py
│   │   │   ├── enums.py
│   │   │   ├── federated.py
│   │   │   ├── feedback.py
│   │   │   ├── file_content.py
│   │   │   ├── file_record.py
│   │   │   ├── hierarchy.py
│   │   │   ├── hook.py
│   │   │   ├── image_generation.py
│   │   │   ├── index_attempt.py
│   │   │   ├── indexing_coordination.py
│   │   │   ├── input_prompt.py
│   │   │   ├── kg_config.py
│   │   │   ├── kg_temp_view.py
│   │   │   ├── llm.py
│   │   │   ├── mcp.py
│   │   │   ├── memory.py
│   │   │   ├── models.py
│   │   │   ├── notification.py
│   │   │   ├── oauth_config.py
│   │   │   ├── opensearch_migration.py
│   │   │   ├── pat.py
│   │   │   ├── permission_sync_attempt.py
│   │   │   ├── permissions.py
│   │   │   ├── persona.py
│   │   │   ├── projects.py
│   │   │   ├── pydantic_type.py
│   │   │   ├── relationships.py
│   │   │   ├── release_notes.py
│   │   │   ├── rotate_encryption_key.py
│   │   │   ├── saml.py
│   │   │   ├── search_settings.py
│   │   │   ├── seeding/
│   │   │   │   └── chat_history_seeding.py
│   │   │   ├── slack_bot.py
│   │   │   ├── slack_channel_config.py
│   │   │   ├── swap_index.py
│   │   │   ├── sync_record.py
│   │   │   ├── tag.py
│   │   │   ├── tasks.py
│   │   │   ├── token_limit.py
│   │   │   ├── tools.py
│   │   │   ├── usage.py
│   │   │   ├── user_file.py
│   │   │   ├── user_preferences.py
│   │   │   ├── users.py
│   │   │   ├── utils.py
│   │   │   ├── voice.py
│   │   │   └── web_search.py
│   │   ├── deep_research/
│   │   │   ├── __init__.py
│   │   │   ├── dr_loop.py
│   │   │   ├── dr_mock_tools.py
│   │   │   ├── models.py
│   │   │   └── utils.py
│   │   ├── document_index/
│   │   │   ├── FILTER_SEMANTICS.md
│   │   │   ├── __init__.py
│   │   │   ├── chunk_content_enrichment.py
│   │   │   ├── disabled.py
│   │   │   ├── document_index_utils.py
│   │   │   ├── factory.py
│   │   │   ├── interfaces.py
│   │   │   ├── interfaces_new.py
│   │   │   ├── opensearch/
│   │   │   │   ├── README.md
│   │   │   │   ├── client.py
│   │   │   │   ├── cluster_settings.py
│   │   │   │   ├── constants.py
│   │   │   │   ├── opensearch_document_index.py
│   │   │   │   ├── schema.py
│   │   │   │   ├── search.py
│   │   │   │   └── string_filtering.py
│   │   │   ├── vespa/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── app_config/
│   │   │   │   │   ├── schemas/
│   │   │   │   │   │   └── danswer_chunk.sd.jinja
│   │   │   │   │   ├── services.xml.jinja
│   │   │   │   │   └── validation-overrides.xml.jinja
│   │   │   │   ├── chunk_retrieval.py
│   │   │   │   ├── deletion.py
│   │   │   │   ├── index.py
│   │   │   │   ├── indexing_utils.py
│   │   │   │   ├── kg_interactions.py
│   │   │   │   ├── shared_utils/
│   │   │   │   │   ├── utils.py
│   │   │   │   │   └── vespa_request_builders.py
│   │   │   │   └── vespa_document_index.py
│   │   │   └── vespa_constants.py
│   │   ├── error_handling/
│   │   │   ├── __init__.py
│   │   │   ├── error_codes.py
│   │   │   └── exceptions.py
│   │   ├── evals/
│   │   │   ├── README.md
│   │   │   ├── eval.py
│   │   │   ├── eval_cli.py
│   │   │   ├── models.py
│   │   │   ├── one_off/
│   │   │   │   └── create_braintrust_dataset.py
│   │   │   ├── provider.py
│   │   │   └── providers/
│   │   │       ├── braintrust.py
│   │   │       └── local.py
│   │   ├── feature_flags/
│   │   │   ├── __init__.py
│   │   │   ├── factory.py
│   │   │   ├── feature_flags_keys.py
│   │   │   ├── flags.py
│   │   │   └── interface.py
│   │   ├── federated_connectors/
│   │   │   ├── __init__.py
│   │   │   ├── factory.py
│   │   │   ├── federated_retrieval.py
│   │   │   ├── interfaces.py
│   │   │   ├── models.py
│   │   │   ├── oauth_utils.py
│   │   │   ├── registry.py
│   │   │   └── slack/
│   │   │       ├── __init__.py
│   │   │       ├── federated_connector.py
│   │   │       └── models.py
│   │   ├── file_processing/
│   │   │   ├── __init__.py
│   │   │   ├── enums.py
│   │   │   ├── extract_file_text.py
│   │   │   ├── file_types.py
│   │   │   ├── html_utils.py
│   │   │   ├── image_summarization.py
│   │   │   ├── image_utils.py
│   │   │   ├── password_validation.py
│   │   │   └── unstructured.py
│   │   ├── file_store/
│   │   │   ├── README.md
│   │   │   ├── constants.py
│   │   │   ├── document_batch_storage.py
│   │   │   ├── file_store.py
│   │   │   ├── models.py
│   │   │   ├── postgres_file_store.py
│   │   │   ├── s3_key_utils.py
│   │   │   └── utils.py
│   │   ├── hooks/
│   │   │   ├── __init__.py
│   │   │   ├── api_dependencies.py
│   │   │   ├── executor.py
│   │   │   ├── models.py
│   │   │   ├── points/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── document_ingestion.py
│   │   │   │   └── query_processing.py
│   │   │   └── registry.py
│   │   ├── httpx/
│   │   │   └── httpx_pool.py
│   │   ├── image_gen/
│   │   │   ├── __init__.py
│   │   │   ├── exceptions.py
│   │   │   ├── factory.py
│   │   │   ├── interfaces.py
│   │   │   └── providers/
│   │   │       ├── azure_img_gen.py
│   │   │       ├── openai_img_gen.py
│   │   │       └── vertex_img_gen.py
│   │   ├── indexing/
│   │   │   ├── __init__.py
│   │   │   ├── adapters/
│   │   │   │   ├── document_indexing_adapter.py
│   │   │   │   └── user_file_indexing_adapter.py
│   │   │   ├── chunk_batch_store.py
│   │   │   ├── chunker.py
│   │   │   ├── content_classification.py
│   │   │   ├── embedder.py
│   │   │   ├── indexing_heartbeat.py
│   │   │   ├── indexing_pipeline.py
│   │   │   ├── models.py
│   │   │   └── vector_db_insertion.py
│   │   ├── key_value_store/
│   │   │   ├── __init__.py
│   │   │   ├── factory.py
│   │   │   ├── interface.py
│   │   │   └── store.py
│   │   ├── kg/
│   │   │   ├── clustering/
│   │   │   │   ├── clustering.py
│   │   │   │   └── normalizations.py
│   │   │   ├── extractions/
│   │   │   │   └── extraction_processing.py
│   │   │   ├── models.py
│   │   │   ├── resets/
│   │   │   │   ├── reset_index.py
│   │   │   │   ├── reset_source.py
│   │   │   │   └── reset_vespa.py
│   │   │   ├── setup/
│   │   │   │   └── kg_default_entity_definitions.py
│   │   │   ├── utils/
│   │   │   │   ├── embeddings.py
│   │   │   │   ├── extraction_utils.py
│   │   │   │   ├── formatting_utils.py
│   │   │   │   └── lock_utils.py
│   │   │   └── vespa/
│   │   │       └── vespa_interactions.py
│   │   ├── llm/
│   │   │   ├── __init__.py
│   │   │   ├── constants.py
│   │   │   ├── cost.py
│   │   │   ├── factory.py
│   │   │   ├── interfaces.py
│   │   │   ├── litellm_singleton/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config.py
│   │   │   │   └── monkey_patches.py
│   │   │   ├── model_metadata_enrichments.json
│   │   │   ├── model_name_parser.py
│   │   │   ├── model_response.py
│   │   │   ├── models.py
│   │   │   ├── multi_llm.py
│   │   │   ├── override_models.py
│   │   │   ├── prompt_cache/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── cache_manager.py
│   │   │   │   ├── models.py
│   │   │   │   ├── processor.py
│   │   │   │   ├── providers/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── noop.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   └── vertex.py
│   │   │   │   └── utils.py
│   │   │   ├── request_context.py
│   │   │   ├── utils.py
│   │   │   └── well_known_providers/
│   │   │       ├── auto_update_models.py
│   │   │       ├── auto_update_service.py
│   │   │       ├── constants.py
│   │   │       ├── llm_provider_options.py
│   │   │       ├── models.py
│   │   │       └── recommended-models.json
│   │   ├── main.py
│   │   ├── mcp_server/
│   │   │   ├── README.md
│   │   │   ├── api.py
│   │   │   ├── auth.py
│   │   │   ├── mcp.json.template
│   │   │   ├── resources/
│   │   │   │   ├── __init__.py
│   │   │   │   └── indexed_sources.py
│   │   │   ├── tools/
│   │   │   │   ├── __init__.py
│   │   │   │   └── search.py
│   │   │   └── utils.py
│   │   ├── mcp_server_main.py
│   │   ├── natural_language_processing/
│   │   │   ├── __init__.py
│   │   │   ├── constants.py
│   │   │   ├── english_stopwords.py
│   │   │   ├── exceptions.py
│   │   │   ├── search_nlp_models.py
│   │   │   └── utils.py
│   │   ├── onyxbot/
│   │   │   ├── discord/
│   │   │   │   ├── DISCORD_MULTITENANT_README.md
│   │   │   │   ├── api_client.py
│   │   │   │   ├── cache.py
│   │   │   │   ├── client.py
│   │   │   │   ├── constants.py
│   │   │   │   ├── exceptions.py
│   │   │   │   ├── handle_commands.py
│   │   │   │   ├── handle_message.py
│   │   │   │   └── utils.py
│   │   │   └── slack/
│   │   │       ├── blocks.py
│   │   │       ├── config.py
│   │   │       ├── constants.py
│   │   │       ├── formatting.py
│   │   │       ├── handlers/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── handle_buttons.py
│   │   │       │   ├── handle_message.py
│   │   │       │   ├── handle_regular_answer.py
│   │   │       │   ├── handle_standard_answers.py
│   │   │       │   └── utils.py
│   │   │       ├── icons.py
│   │   │       ├── listener.py
│   │   │       ├── models.py
│   │   │       └── utils.py
│   │   ├── prompts/
│   │   │   ├── __init__.py
│   │   │   ├── basic_memory.py
│   │   │   ├── chat_prompts.py
│   │   │   ├── chat_tools.py
│   │   │   ├── compression_prompts.py
│   │   │   ├── constants.py
│   │   │   ├── contextual_retrieval.py
│   │   │   ├── deep_research/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dr_tool_prompts.py
│   │   │   │   ├── orchestration_layer.py
│   │   │   │   └── research_agent.py
│   │   │   ├── federated_search.py
│   │   │   ├── filter_extration.py
│   │   │   ├── image_analysis.py
│   │   │   ├── kg_prompts.py
│   │   │   ├── prompt_template.py
│   │   │   ├── prompt_utils.py
│   │   │   ├── search_prompts.py
│   │   │   ├── tool_prompts.py
│   │   │   └── user_info.py
│   │   ├── redis/
│   │   │   ├── iam_auth.py
│   │   │   ├── redis_connector.py
│   │   │   ├── redis_connector_delete.py
│   │   │   ├── redis_connector_doc_perm_sync.py
│   │   │   ├── redis_connector_ext_group_sync.py
│   │   │   ├── redis_connector_index.py
│   │   │   ├── redis_connector_prune.py
│   │   │   ├── redis_connector_stop.py
│   │   │   ├── redis_connector_utils.py
│   │   │   ├── redis_document_set.py
│   │   │   ├── redis_hierarchy.py
│   │   │   ├── redis_object_helper.py
│   │   │   ├── redis_pool.py
│   │   │   ├── redis_usergroup.py
│   │   │   └── redis_utils.py
│   │   ├── secondary_llm_flows/
│   │   │   ├── __init__.py
│   │   │   ├── chat_session_naming.py
│   │   │   ├── document_filter.py
│   │   │   ├── memory_update.py
│   │   │   ├── query_expansion.py
│   │   │   ├── source_filter.py
│   │   │   └── time_filter.py
│   │   ├── seeding/
│   │   │   └── __init__.py
│   │   ├── server/
│   │   │   ├── __init__.py
│   │   │   ├── api_key/
│   │   │   │   ├── api.py
│   │   │   │   └── models.py
│   │   │   ├── api_key_usage.py
│   │   │   ├── auth_check.py
│   │   │   ├── documents/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── cc_pair.py
│   │   │   │   ├── connector.py
│   │   │   │   ├── credential.py
│   │   │   │   ├── document.py
│   │   │   │   ├── document_utils.py
│   │   │   │   ├── models.py
│   │   │   │   ├── private_key_types.py
│   │   │   │   └── standard_oauth.py
│   │   │   ├── evals/
│   │   │   │   ├── __init__.py
│   │   │   │   └── models.py
│   │   │   ├── features/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── build/
│   │   │   │   │   ├── .gitignore
│   │   │   │   │   ├── AGENTS.template.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── api/
│   │   │   │   │   │   ├── api.py
│   │   │   │   │   │   ├── messages_api.py
│   │   │   │   │   │   ├── models.py
│   │   │   │   │   │   ├── packet_logger.py
│   │   │   │   │   │   ├── packets.py
│   │   │   │   │   │   ├── rate_limit.py
│   │   │   │   │   │   ├── sessions_api.py
│   │   │   │   │   │   ├── subscription_check.py
│   │   │   │   │   │   ├── templates/
│   │   │   │   │   │   │   ├── webapp_hmr_fixer.js
│   │   │   │   │   │   │   └── webapp_offline.html
│   │   │   │   │   │   └── user_library.py
│   │   │   │   │   ├── configs.py
│   │   │   │   │   ├── db/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── build_session.py
│   │   │   │   │   │   ├── rate_limit.py
│   │   │   │   │   │   ├── sandbox.py
│   │   │   │   │   │   └── user_library.py
│   │   │   │   │   ├── indexing/
│   │   │   │   │   │   └── persistent_document_writer.py
│   │   │   │   │   ├── s3/
│   │   │   │   │   │   └── s3_client.py
│   │   │   │   │   ├── sandbox/
│   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── base.py
│   │   │   │   │   │   ├── kubernetes/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── docker/
│   │   │   │   │   │   │   │   ├── Dockerfile
│   │   │   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   │   │   ├── generate_agents_md.py
│   │   │   │   │   │   │   │   ├── initial-requirements.txt
│   │   │   │   │   │   │   │   ├── run-test.sh
│   │   │   │   │   │   │   │   ├── skills/
│   │   │   │   │   │   │   │   │   ├── image-generation/
│   │   │   │   │   │   │   │   │   │   ├── SKILL.md
│   │   │   │   │   │   │   │   │   │   └── scripts/
│   │   │   │   │   │   │   │   │   │       └── generate.py
│   │   │   │   │   │   │   │   │   └── pptx/
│   │   │   │   │   │   │   │   │       ├── SKILL.md
│   │   │   │   │   │   │   │   │       ├── editing.md
│   │   │   │   │   │   │   │   │       ├── pptxgenjs.md
│   │   │   │   │   │   │   │   │       └── scripts/
│   │   │   │   │   │   │   │   │           ├── __init__.py
│   │   │   │   │   │   │   │   │           ├── add_slide.py
│   │   │   │   │   │   │   │   │           ├── clean.py
│   │   │   │   │   │   │   │   │           ├── office/
│   │   │   │   │   │   │   │   │           │   ├── helpers/
│   │   │   │   │   │   │   │   │           │   │   ├── __init__.py
│   │   │   │   │   │   │   │   │           │   │   ├── merge_runs.py
│   │   │   │   │   │   │   │   │           │   │   └── simplify_redlines.py
│   │   │   │   │   │   │   │   │           │   ├── pack.py
│   │   │   │   │   │   │   │   │           │   ├── schemas/
│   │   │   │   │   │   │   │   │           │   │   ├── ISO-IEC29500-4_2016/
│   │   │   │   │   │   │   │   │           │   │   │   ├── dml-chart.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── dml-chartDrawing.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── dml-diagram.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── dml-lockedCanvas.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── dml-main.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── dml-picture.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── dml-spreadsheetDrawing.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── dml-wordprocessingDrawing.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── pml.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-additionalCharacteristics.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-bibliography.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-commonSimpleTypes.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-customXmlDataProperties.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-customXmlSchemaProperties.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-documentPropertiesCustom.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-documentPropertiesExtended.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-documentPropertiesVariantTypes.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-math.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── shared-relationshipReference.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── sml.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── vml-main.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── vml-officeDrawing.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── vml-presentationDrawing.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── vml-spreadsheetDrawing.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── vml-wordprocessingDrawing.xsd
│   │   │   │   │   │   │   │   │           │   │   │   ├── wml.xsd
│   │   │   │   │   │   │   │   │           │   │   │   └── xml.xsd
│   │   │   │   │   │   │   │   │           │   │   ├── ecma/
│   │   │   │   │   │   │   │   │           │   │   │   └── fouth-edition/
│   │   │   │   │   │   │   │   │           │   │   │       ├── opc-contentTypes.xsd
│   │   │   │   │   │   │   │   │           │   │   │       ├── opc-coreProperties.xsd
│   │   │   │   │   │   │   │   │           │   │   │       ├── opc-digSig.xsd
│   │   │   │   │   │   │   │   │           │   │   │       └── opc-relationships.xsd
│   │   │   │   │   │   │   │   │           │   │   ├── mce/
│   │   │   │   │   │   │   │   │           │   │   │   └── mc.xsd
│   │   │   │   │   │   │   │   │           │   │   └── microsoft/
│   │   │   │   │   │   │   │   │           │   │       ├── wml-2010.xsd
│   │   │   │   │   │   │   │   │           │   │       ├── wml-2012.xsd
│   │   │   │   │   │   │   │   │           │   │       ├── wml-2018.xsd
│   │   │   │   │   │   │   │   │           │   │       ├── wml-cex-2018.xsd
│   │   │   │   │   │   │   │   │           │   │       ├── wml-cid-2016.xsd
│   │   │   │   │   │   │   │   │           │   │       ├── wml-sdtdatahash-2020.xsd
│   │   │   │   │   │   │   │   │           │   │       └── wml-symex-2015.xsd
│   │   │   │   │   │   │   │   │           │   ├── soffice.py
│   │   │   │   │   │   │   │   │           │   ├── unpack.py
│   │   │   │   │   │   │   │   │           │   ├── validate.py
│   │   │   │   │   │   │   │   │           │   └── validators/
│   │   │   │   │   │   │   │   │           │       ├── __init__.py
│   │   │   │   │   │   │   │   │           │       ├── base.py
│   │   │   │   │   │   │   │   │           │       ├── docx.py
│   │   │   │   │   │   │   │   │           │       ├── pptx.py
│   │   │   │   │   │   │   │   │           │       └── redlining.py
│   │   │   │   │   │   │   │   │           ├── preview.py
│   │   │   │   │   │   │   │   │           └── thumbnail.py
│   │   │   │   │   │   │   │   ├── templates/
│   │   │   │   │   │   │   │   │   └── outputs/
│   │   │   │   │   │   │   │   │       └── web/
│   │   │   │   │   │   │   │   │           ├── .gitignore
│   │   │   │   │   │   │   │   │           ├── AGENTS.md
│   │   │   │   │   │   │   │   │           ├── app/
│   │   │   │   │   │   │   │   │           │   ├── globals.css
│   │   │   │   │   │   │   │   │           │   ├── layout.tsx
│   │   │   │   │   │   │   │   │           │   ├── page.tsx
│   │   │   │   │   │   │   │   │           │   └── site.webmanifest
│   │   │   │   │   │   │   │   │           ├── components/
│   │   │   │   │   │   │   │   │           │   ├── component-example.tsx
│   │   │   │   │   │   │   │   │           │   ├── example.tsx
│   │   │   │   │   │   │   │   │           │   └── ui/
│   │   │   │   │   │   │   │   │           │       ├── accordion.tsx
│   │   │   │   │   │   │   │   │           │       ├── alert-dialog.tsx
│   │   │   │   │   │   │   │   │           │       ├── alert.tsx
│   │   │   │   │   │   │   │   │           │       ├── aspect-ratio.tsx
│   │   │   │   │   │   │   │   │           │       ├── avatar.tsx
│   │   │   │   │   │   │   │   │           │       ├── badge.tsx
│   │   │   │   │   │   │   │   │           │       ├── breadcrumb.tsx
│   │   │   │   │   │   │   │   │           │       ├── button-group.tsx
│   │   │   │   │   │   │   │   │           │       ├── button.tsx
│   │   │   │   │   │   │   │   │           │       ├── calendar.tsx
│   │   │   │   │   │   │   │   │           │       ├── card.tsx
│   │   │   │   │   │   │   │   │           │       ├── carousel.tsx
│   │   │   │   │   │   │   │   │           │       ├── chart.tsx
│   │   │   │   │   │   │   │   │           │       ├── checkbox.tsx
│   │   │   │   │   │   │   │   │           │       ├── collapsible.tsx
│   │   │   │   │   │   │   │   │           │       ├── combobox.tsx
│   │   │   │   │   │   │   │   │           │       ├── command.tsx
│   │   │   │   │   │   │   │   │           │       ├── context-menu.tsx
│   │   │   │   │   │   │   │   │           │       ├── dialog.tsx
│   │   │   │   │   │   │   │   │           │       ├── drawer.tsx
│   │   │   │   │   │   │   │   │           │       ├── dropdown-menu.tsx
│   │   │   │   │   │   │   │   │           │       ├── empty.tsx
│   │   │   │   │   │   │   │   │           │       ├── field.tsx
│   │   │   │   │   │   │   │   │           │       ├── hover-card.tsx
│   │   │   │   │   │   │   │   │           │       ├── input-group.tsx
│   │   │   │   │   │   │   │   │           │       ├── input.tsx
│   │   │   │   │   │   │   │   │           │       ├── item.tsx
│   │   │   │   │   │   │   │   │           │       ├── kbd.tsx
│   │   │   │   │   │   │   │   │           │       ├── label.tsx
│   │   │   │   │   │   │   │   │           │       ├── menubar.tsx
│   │   │   │   │   │   │   │   │           │       ├── native-select.tsx
│   │   │   │   │   │   │   │   │           │       ├── navigation-menu.tsx
│   │   │   │   │   │   │   │   │           │       ├── pagination.tsx
│   │   │   │   │   │   │   │   │           │       ├── popover.tsx
│   │   │   │   │   │   │   │   │           │       ├── progress.tsx
│   │   │   │   │   │   │   │   │           │       ├── radio-group.tsx
│   │   │   │   │   │   │   │   │           │       ├── resizable.tsx
│   │   │   │   │   │   │   │   │           │       ├── scroll-area.tsx
│   │   │   │   │   │   │   │   │           │       ├── select.tsx
│   │   │   │   │   │   │   │   │           │       ├── separator.tsx
│   │   │   │   │   │   │   │   │           │       ├── sheet.tsx
│   │   │   │   │   │   │   │   │           │       ├── sidebar.tsx
│   │   │   │   │   │   │   │   │           │       ├── skeleton.tsx
│   │   │   │   │   │   │   │   │           │       ├── slider.tsx
│   │   │   │   │   │   │   │   │           │       ├── sonner.tsx
│   │   │   │   │   │   │   │   │           │       ├── spinner.tsx
│   │   │   │   │   │   │   │   │           │       ├── switch.tsx
│   │   │   │   │   │   │   │   │           │       ├── table.tsx
│   │   │   │   │   │   │   │   │           │       ├── tabs.tsx
│   │   │   │   │   │   │   │   │           │       ├── textarea.tsx
│   │   │   │   │   │   │   │   │           │       ├── toggle-group.tsx
│   │   │   │   │   │   │   │   │           │       ├── toggle.tsx
│   │   │   │   │   │   │   │   │           │       └── tooltip.tsx
│   │   │   │   │   │   │   │   │           ├── components.json
│   │   │   │   │   │   │   │   │           ├── eslint.config.mjs
│   │   │   │   │   │   │   │   │           ├── hooks/
│   │   │   │   │   │   │   │   │           │   └── use-mobile.ts
│   │   │   │   │   │   │   │   │           ├── lib/
│   │   │   │   │   │   │   │   │           │   └── utils.ts
│   │   │   │   │   │   │   │   │           ├── next.config.ts
│   │   │   │   │   │   │   │   │           ├── package.json
│   │   │   │   │   │   │   │   │           ├── postcss.config.mjs
│   │   │   │   │   │   │   │   │           └── tsconfig.json
│   │   │   │   │   │   │   │   └── test-job.yaml
│   │   │   │   │   │   │   ├── internal/
│   │   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   │   └── acp_exec_client.py
│   │   │   │   │   │   │   └── kubernetes_sandbox_manager.py
│   │   │   │   │   │   ├── local/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── agent_client.py
│   │   │   │   │   │   │   ├── local_sandbox_manager.py
│   │   │   │   │   │   │   ├── process_manager.py
│   │   │   │   │   │   │   ├── test_agent_client.py
│   │   │   │   │   │   │   └── test_manager.py
│   │   │   │   │   │   ├── manager/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── directory_manager.py
│   │   │   │   │   │   │   ├── snapshot_manager.py
│   │   │   │   │   │   │   └── test_directory_manager.py
│   │   │   │   │   │   ├── models.py
│   │   │   │   │   │   ├── tasks/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   └── tasks.py
│   │   │   │   │   │   └── util/
│   │   │   │   │   │       ├── __init__.py
│   │   │   │   │   │       ├── agent_instructions.py
│   │   │   │   │   │       ├── build_venv_template.py
│   │   │   │   │   │       ├── opencode_config.py
│   │   │   │   │   │       └── persona_mapping.py
│   │   │   │   │   ├── session/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── manager.py
│   │   │   │   │   │   └── prompts.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── default_assistant/
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── document_set/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── hierarchy/
│   │   │   │   │   ├── api.py
│   │   │   │   │   ├── constants.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── hooks/
│   │   │   │   │   └── __init__.py
│   │   │   │   ├── input_prompt/
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── mcp/
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── notifications/
│   │   │   │   │   └── api.py
│   │   │   │   ├── oauth_config/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── password/
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── persona/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── api.py
│   │   │   │   │   ├── constants.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── projects/
│   │   │   │   │   ├── api.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── projects_file_utils.py
│   │   │   │   ├── release_notes/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── constants.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── tool/
│   │   │   │   │   ├── api.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── tool_visibility.py
│   │   │   │   ├── user_oauth_token/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── api.py
│   │   │   │   └── web_search/
│   │   │   │       ├── api.py
│   │   │   │       └── models.py
│   │   │   ├── federated/
│   │   │   │   ├── api.py
│   │   │   │   └── models.py
│   │   │   ├── kg/
│   │   │   │   ├── api.py
│   │   │   │   └── models.py
│   │   │   ├── manage/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── administrative.py
│   │   │   │   ├── code_interpreter/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── discord_bot/
│   │   │   │   │   ├── api.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── embedding/
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── get_state.py
│   │   │   │   ├── image_generation/
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── llm/
│   │   │   │   │   ├── api.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── models.py
│   │   │   │   ├── opensearch_migration/
│   │   │   │   │   ├── api.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── search_settings.py
│   │   │   │   ├── slack_bot.py
│   │   │   │   ├── users.py
│   │   │   │   ├── validate_tokens.py
│   │   │   │   ├── voice/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── api.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   ├── user_api.py
│   │   │   │   │   └── websocket_api.py
│   │   │   │   └── web_search/
│   │   │   │       ├── api.py
│   │   │   │       └── models.py
│   │   │   ├── metrics/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── celery_task_metrics.py
│   │   │   │   ├── indexing_pipeline.py
│   │   │   │   ├── indexing_pipeline_setup.py
│   │   │   │   ├── indexing_task_metrics.py
│   │   │   │   ├── metrics_server.py
│   │   │   │   ├── opensearch_search.py
│   │   │   │   ├── per_tenant.py
│   │   │   │   ├── postgres_connection_pool.py
│   │   │   │   ├── prometheus_setup.py
│   │   │   │   └── slow_requests.py
│   │   │   ├── middleware/
│   │   │   │   ├── latency_logging.py
│   │   │   │   └── rate_limiting.py
│   │   │   ├── models.py
│   │   │   ├── onyx_api/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ingestion.py
│   │   │   │   └── models.py
│   │   │   ├── pat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── api.py
│   │   │   │   └── models.py
│   │   │   ├── query_and_chat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── chat_backend.py
│   │   │   │   ├── chat_utils.py
│   │   │   │   ├── models.py
│   │   │   │   ├── placement.py
│   │   │   │   ├── query_backend.py
│   │   │   │   ├── session_loading.py
│   │   │   │   ├── streaming_models.py
│   │   │   │   └── token_limit.py
│   │   │   ├── runtime/
│   │   │   │   └── onyx_runtime.py
│   │   │   ├── saml.py
│   │   │   ├── settings/
│   │   │   │   ├── api.py
│   │   │   │   ├── models.py
│   │   │   │   └── store.py
│   │   │   ├── tenant_usage_limits.py
│   │   │   ├── token_rate_limits/
│   │   │   │   ├── api.py
│   │   │   │   └── models.py
│   │   │   ├── usage_limits.py
│   │   │   ├── utils.py
│   │   │   └── utils_vector_db.py
│   │   ├── setup.py
│   │   ├── tools/
│   │   │   ├── built_in_tools.py
│   │   │   ├── constants.py
│   │   │   ├── fake_tools/
│   │   │   │   ├── __init__.py
│   │   │   │   └── research_agent.py
│   │   │   ├── interface.py
│   │   │   ├── models.py
│   │   │   ├── tool_constructor.py
│   │   │   ├── tool_implementations/
│   │   │   │   ├── custom/
│   │   │   │   │   ├── base_tool_types.py
│   │   │   │   │   ├── custom_tool.py
│   │   │   │   │   └── openapi_parsing.py
│   │   │   │   ├── file_reader/
│   │   │   │   │   └── file_reader_tool.py
│   │   │   │   ├── images/
│   │   │   │   │   ├── image_generation_tool.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── knowledge_graph/
│   │   │   │   │   └── knowledge_graph_tool.py
│   │   │   │   ├── mcp/
│   │   │   │   │   ├── mcp_client.py
│   │   │   │   │   └── mcp_tool.py
│   │   │   │   ├── memory/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── memory_tool.py
│   │   │   │   │   └── models.py
│   │   │   │   ├── open_url/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── firecrawl.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   ├── onyx_web_crawler.py
│   │   │   │   │   ├── open_url_tool.py
│   │   │   │   │   ├── snippet_matcher.py
│   │   │   │   │   ├── url_normalization.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── python/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── code_interpreter_client.py
│   │   │   │   │   └── python_tool.py
│   │   │   │   ├── search/
│   │   │   │   │   ├── constants.py
│   │   │   │   │   ├── search_tool.py
│   │   │   │   │   └── search_utils.py
│   │   │   │   ├── search_like_tool_utils.py
│   │   │   │   ├── utils.py
│   │   │   │   └── web_search/
│   │   │   │       ├── clients/
│   │   │   │       │   ├── brave_client.py
│   │   │   │       │   ├── exa_client.py
│   │   │   │       │   ├── google_pse_client.py
│   │   │   │       │   ├── searxng_client.py
│   │   │   │       │   └── serper_client.py
│   │   │   │       ├── models.py
│   │   │   │       ├── providers.py
│   │   │   │       ├── utils.py
│   │   │   │       └── web_search_tool.py
│   │   │   ├── tool_runner.py
│   │   │   └── utils.py
│   │   ├── tracing/
│   │   │   ├── braintrust_tracing_processor.py
│   │   │   ├── framework/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── _error_tracing.py
│   │   │   │   ├── create.py
│   │   │   │   ├── processor_interface.py
│   │   │   │   ├── provider.py
│   │   │   │   ├── scope.py
│   │   │   │   ├── setup.py
│   │   │   │   ├── span_data.py
│   │   │   │   ├── spans.py
│   │   │   │   ├── traces.py
│   │   │   │   └── util.py
│   │   │   ├── langfuse_tracing_processor.py
│   │   │   ├── llm_utils.py
│   │   │   ├── masking.py
│   │   │   └── setup.py
│   │   ├── utils/
│   │   │   ├── __init__.py
│   │   │   ├── b64.py
│   │   │   ├── batching.py
│   │   │   ├── callbacks.py
│   │   │   ├── encryption.py
│   │   │   ├── error_handling.py
│   │   │   ├── errors.py
│   │   │   ├── file.py
│   │   │   ├── gpu_utils.py
│   │   │   ├── headers.py
│   │   │   ├── jsonriver/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── parse.py
│   │   │   │   └── tokenize.py
│   │   │   ├── logger.py
│   │   │   ├── long_term_log.py
│   │   │   ├── memory_logger.py
│   │   │   ├── middleware.py
│   │   │   ├── object_size_check.py
│   │   │   ├── postgres_sanitization.py
│   │   │   ├── pydantic_util.py
│   │   │   ├── retry_wrapper.py
│   │   │   ├── search_nlp_models_utils.py
│   │   │   ├── sensitive.py
│   │   │   ├── sitemap.py
│   │   │   ├── special_types.py
│   │   │   ├── subclasses.py
│   │   │   ├── supervisord_watchdog.py
│   │   │   ├── telemetry.py
│   │   │   ├── tenant.py
│   │   │   ├── text_processing.py
│   │   │   ├── threadpool_concurrency.py
│   │   │   ├── timing.py
│   │   │   ├── url.py
│   │   │   ├── variable_functionality.py
│   │   │   └── web_content.py
│   │   └── voice/
│   │       ├── __init__.py
│   │       ├── factory.py
│   │       ├── interface.py
│   │       └── providers/
│   │           ├── __init__.py
│   │           ├── azure.py
│   │           ├── elevenlabs.py
│   │           └── openai.py
│   ├── pyproject.toml
│   ├── pytest.ini
│   ├── requirements/
│   │   ├── README.md
│   │   ├── combined.txt
│   │   ├── default.txt
│   │   ├── dev.txt
│   │   ├── ee.txt
│   │   └── model_server.txt
│   ├── scripts/
│   │   ├── __init__.py
│   │   ├── add_connector_creation_script.py
│   │   ├── api_inference_sample.py
│   │   ├── celery_purge_queue.py
│   │   ├── chat_feedback_dump.py
│   │   ├── chat_history_seeding.py
│   │   ├── chat_loadtest.py
│   │   ├── debugging/
│   │   │   ├── debug_usage_limits.py
│   │   │   ├── litellm/
│   │   │   │   ├── README
│   │   │   │   ├── call_litellm.py
│   │   │   │   ├── directly_hit_azure_api.py
│   │   │   │   └── payload.json
│   │   │   ├── onyx_db.py
│   │   │   ├── onyx_list_tenants.py
│   │   │   ├── onyx_redis.py
│   │   │   ├── onyx_vespa_schemas.py
│   │   │   └── opensearch/
│   │   │       ├── benchmark_retrieval.py
│   │   │       ├── constants.py
│   │   │       ├── embed_and_save.py
│   │   │       ├── embedding_io.py
│   │   │       ├── opensearch_debug.py
│   │   │       └── query_hierarchy_debug.py
│   │   ├── decrypt.py
│   │   ├── dev_run_background_jobs.py
│   │   ├── docker_memory_tracking.sh
│   │   ├── force_delete_connector_by_id.py
│   │   ├── get_wikidocs.py
│   │   ├── hard_delete_chats.py
│   │   ├── lib/
│   │   │   └── logger.py
│   │   ├── make_foss_repo.sh
│   │   ├── onyx_openapi_schema.py
│   │   ├── orphan_doc_cleanup_script.py
│   │   ├── query_time_check/
│   │   │   ├── seed_dummy_docs.py
│   │   │   └── test_query_times.py
│   │   ├── reencrypt_secrets.py
│   │   ├── reset_indexes.py
│   │   ├── reset_postgres.py
│   │   ├── restart_containers.sh
│   │   ├── resume_paused_connectors.py
│   │   ├── run_industryrag_bench_questions.py
│   │   ├── save_load_state.py
│   │   ├── setup_craft_templates.sh
│   │   ├── sources_selection_analysis.py
│   │   ├── supervisord_entrypoint.sh
│   │   ├── tenant_cleanup/
│   │   │   ├── QUICK_START_NO_BASTION.md
│   │   │   ├── README.md
│   │   │   ├── analyze_current_tenants.py
│   │   │   ├── check_no_bastion_setup.py
│   │   │   ├── cleanup_tenants.py
│   │   │   ├── cleanup_utils.py
│   │   │   ├── mark_connectors_for_deletion.py
│   │   │   ├── no_bastion_analyze_tenants.py
│   │   │   ├── no_bastion_cleanup_tenants.py
│   │   │   ├── no_bastion_cleanup_utils.py
│   │   │   ├── no_bastion_mark_connectors.py
│   │   │   └── on_pod_scripts/
│   │   │       ├── check_documents_deleted.py
│   │   │       ├── cleanup_tenant_schema.py
│   │   │       ├── execute_connector_deletion.py
│   │   │       ├── get_tenant_connectors.py
│   │   │       ├── get_tenant_index_name.py
│   │   │       ├── get_tenant_users.py
│   │   │       └── understand_tenants.py
│   │   ├── test-openapi-key.py
│   │   ├── transform_openapi_for_docs.py
│   │   └── upload_files_as_connectors.py
│   ├── shared_configs/
│   │   ├── __init__.py
│   │   ├── configs.py
│   │   ├── contextvars.py
│   │   ├── enums.py
│   │   ├── model_server_models.py
│   │   └── utils.py
│   ├── slackbot_images/
│   │   └── README.md
│   ├── supervisord.conf
│   └── tests/
│       ├── README.md
│       ├── __init__.py
│       ├── api/
│       │   └── test_api.py
│       ├── conftest.py
│       ├── daily/
│       │   ├── conftest.py
│       │   ├── connectors/
│       │   │   ├── airtable/
│       │   │   │   └── test_airtable_basic.py
│       │   │   ├── bitbucket/
│       │   │   │   ├── conftest.py
│       │   │   │   ├── test_bitbucket_checkpointed.py
│       │   │   │   └── test_bitbucket_slim_connector.py
│       │   │   ├── blob/
│       │   │   │   └── test_blob_connector.py
│       │   │   ├── coda/
│       │   │   │   ├── README.md
│       │   │   │   └── test_coda_connector.py
│       │   │   ├── confluence/
│       │   │   │   ├── models.py
│       │   │   │   ├── test_confluence_basic.py
│       │   │   │   ├── test_confluence_permissions_basic.py
│       │   │   │   └── test_confluence_user_email_overrides.py
│       │   │   ├── conftest.py
│       │   │   ├── discord/
│       │   │   │   └── test_discord_connector.py
│       │   │   ├── file/
│       │   │   │   └── test_file_connector.py
│       │   │   ├── fireflies/
│       │   │   │   ├── test_fireflies_connector.py
│       │   │   │   └── test_fireflies_data.json
│       │   │   ├── gitbook/
│       │   │   │   └── test_gitbook_connector.py
│       │   │   ├── github/
│       │   │   │   └── test_github_basic.py
│       │   │   ├── gitlab/
│       │   │   │   └── test_gitlab_basic.py
│       │   │   ├── gmail/
│       │   │   │   ├── conftest.py
│       │   │   │   └── test_gmail_connector.py
│       │   │   ├── gong/
│       │   │   │   └── test_gong.py
│       │   │   ├── google_drive/
│       │   │   │   ├── conftest.py
│       │   │   │   ├── consts_and_utils.py
│       │   │   │   ├── drive_id_mapping.json
│       │   │   │   ├── test_admin_oauth.py
│       │   │   │   ├── test_drive_perm_sync.py
│       │   │   │   ├── test_link_visibility_filter.py
│       │   │   │   ├── test_map_test_ids.py
│       │   │   │   ├── test_sections.py
│       │   │   │   ├── test_service_acct.py
│       │   │   │   └── test_user_1_oauth.py
│       │   │   ├── highspot/
│       │   │   │   ├── test_highspot_connector.py
│       │   │   │   └── test_highspot_data.json
│       │   │   ├── hubspot/
│       │   │   │   └── test_hubspot_connector.py
│       │   │   ├── imap/
│       │   │   │   ├── models.py
│       │   │   │   └── test_imap_connector.py
│       │   │   ├── jira/
│       │   │   │   └── test_jira_basic.py
│       │   │   ├── notion/
│       │   │   │   └── test_notion_connector.py
│       │   │   ├── outline/
│       │   │   │   └── test_outline_connector.py
│       │   │   ├── salesforce/
│       │   │   │   ├── test_salesforce_connector.py
│       │   │   │   └── test_salesforce_data.json
│       │   │   ├── sharepoint/
│       │   │   │   └── test_sharepoint_connector.py
│       │   │   ├── slab/
│       │   │   │   ├── test_slab_connector.py
│       │   │   │   └── test_slab_data.json
│       │   │   ├── slack/
│       │   │   │   ├── conftest.py
│       │   │   │   ├── test_slack_connector.py
│       │   │   │   └── test_slack_perm_sync.py
│       │   │   ├── teams/
│       │   │   │   ├── models.py
│       │   │   │   └── test_teams_connector.py
│       │   │   ├── utils.py
│       │   │   ├── web/
│       │   │   │   └── test_web_connector.py
│       │   │   └── zendesk/
│       │   │       ├── test_zendesk_connector.py
│       │   │       └── test_zendesk_data.json
│       │   ├── embedding/
│       │   │   └── test_embeddings.py
│       │   └── llm/
│       │       └── test_bedrock.py
│       ├── external_dependency_unit/
│       │   ├── answer/
│       │   │   ├── conftest.py
│       │   │   ├── stream_test_assertions.py
│       │   │   ├── stream_test_builder.py
│       │   │   ├── stream_test_utils.py
│       │   │   ├── test_answer_without_openai.py
│       │   │   ├── test_current_datetime_replacement.py
│       │   │   ├── test_stream_chat_message.py
│       │   │   └── test_stream_chat_message_objects.py
│       │   ├── background/
│       │   │   ├── test_periodic_task_claim.py
│       │   │   └── test_startup_recovery.py
│       │   ├── cache/
│       │   │   ├── conftest.py
│       │   │   ├── test_cache_backend_parity.py
│       │   │   ├── test_kv_store_cache_layer.py
│       │   │   └── test_postgres_cache_backend.py
│       │   ├── celery/
│       │   │   ├── test_docfetching_priority.py
│       │   │   ├── test_docprocessing_priority.py
│       │   │   ├── test_persona_file_sync.py
│       │   │   ├── test_pruning_hierarchy_nodes.py
│       │   │   ├── test_user_file_delete_queue.py
│       │   │   ├── test_user_file_indexing_adapter.py
│       │   │   └── test_user_file_processing_queue.py
│       │   ├── chat/
│       │   │   └── test_user_reminder_message_type.py
│       │   ├── conftest.py
│       │   ├── connectors/
│       │   │   ├── confluence/
│       │   │   │   ├── conftest.py
│       │   │   │   └── test_confluence_group_sync.py
│       │   │   ├── google_drive/
│       │   │   │   └── test_google_drive_group_sync.py
│       │   │   └── jira/
│       │   │       ├── conftest.py
│       │   │       ├── test_jira_doc_sync.py
│       │   │       └── test_jira_group_sync.py
│       │   ├── constants.py
│       │   ├── craft/
│       │   │   ├── conftest.py
│       │   │   ├── test_build_packet_storage.py
│       │   │   ├── test_file_upload.py
│       │   │   ├── test_kubernetes_sandbox.py
│       │   │   └── test_persistent_document_writer.py
│       │   ├── db/
│       │   │   ├── __init__.py
│       │   │   ├── conftest.py
│       │   │   ├── test_chat_session_eager_load.py
│       │   │   ├── test_credential_sensitive_value.py
│       │   │   ├── test_rotate_encryption_key.py
│       │   │   ├── test_tag_race_condition.py
│       │   │   └── test_user_account_type.py
│       │   ├── discord_bot/
│       │   │   ├── conftest.py
│       │   │   └── test_discord_events.py
│       │   ├── document_index/
│       │   │   ├── conftest.py
│       │   │   ├── test_document_index.py
│       │   │   └── test_document_index_old.py
│       │   ├── feature_flags/
│       │   │   ├── __init__.py
│       │   │   └── test_feature_flag_provider_factory.py
│       │   ├── file_store/
│       │   │   ├── test_file_store_non_mocked.py
│       │   │   └── test_postgres_file_store_non_mocked.py
│       │   ├── full_setup.py
│       │   ├── hierarchy/
│       │   │   ├── __init__.py
│       │   │   └── test_hierarchy_access_filter.py
│       │   ├── llm/
│       │   │   ├── test_llm_provider.py
│       │   │   ├── test_llm_provider_api_base.py
│       │   │   ├── test_llm_provider_auto_mode.py
│       │   │   ├── test_llm_provider_called.py
│       │   │   ├── test_llm_provider_default_model_protection.py
│       │   │   └── test_prompt_caching.py
│       │   ├── mock_content_provider.py
│       │   ├── mock_image_provider.py
│       │   ├── mock_llm.py
│       │   ├── mock_search_pipeline.py
│       │   ├── mock_search_provider.py
│       │   ├── opensearch/
│       │   │   ├── test_assistant_knowledge_filter.py
│       │   │   └── test_opensearch_client.py
│       │   ├── opensearch_migration/
│       │   │   └── test_opensearch_migration_tasks.py
│       │   ├── permission_sync/
│       │   │   ├── test_doc_permission_sync_attempt.py
│       │   │   └── test_external_group_permission_sync_attempt.py
│       │   ├── search_settings/
│       │   │   └── test_search_settings.py
│       │   ├── slack_bot/
│       │   │   ├── __init__.py
│       │   │   ├── test_slack_bot_crud.py
│       │   │   └── test_slack_bot_federated_search.py
│       │   ├── tools/
│       │   │   ├── data/
│       │   │   │   └── financial-sample.xlsx
│       │   │   ├── test_image_generation_tool.py
│       │   │   ├── test_mcp_passthrough_oauth.py
│       │   │   ├── test_memory_tool_integration.py
│       │   │   ├── test_oauth_config_crud.py
│       │   │   ├── test_oauth_token_manager.py
│       │   │   ├── test_oauth_tool_integration.py
│       │   │   ├── test_python_tool.py
│       │   │   └── test_python_tool_server_enabled.py
│       │   └── tracing/
│       │       ├── __init__.py
│       │       └── test_llm_span_recording.py
│       ├── integration/
│       │   ├── Dockerfile
│       │   ├── README.md
│       │   ├── __init__.py
│       │   ├── common_utils/
│       │   │   ├── chat.py
│       │   │   ├── config.py
│       │   │   ├── constants.py
│       │   │   ├── document_acl.py
│       │   │   ├── managers/
│       │   │   │   ├── api_key.py
│       │   │   │   ├── cc_pair.py
│       │   │   │   ├── chat.py
│       │   │   │   ├── connector.py
│       │   │   │   ├── credential.py
│       │   │   │   ├── discord_bot.py
│       │   │   │   ├── document.py
│       │   │   │   ├── document_search.py
│       │   │   │   ├── document_set.py
│       │   │   │   ├── file.py
│       │   │   │   ├── image_generation.py
│       │   │   │   ├── index_attempt.py
│       │   │   │   ├── llm_provider.py
│       │   │   │   ├── pat.py
│       │   │   │   ├── persona.py
│       │   │   │   ├── project.py
│       │   │   │   ├── query_history.py
│       │   │   │   ├── scim_client.py
│       │   │   │   ├── scim_token.py
│       │   │   │   ├── settings.py
│       │   │   │   ├── tenant.py
│       │   │   │   ├── tool.py
│       │   │   │   ├── user.py
│       │   │   │   └── user_group.py
│       │   │   ├── reset.py
│       │   │   ├── test_document_utils.py
│       │   │   ├── test_file_utils.py
│       │   │   ├── test_files/
│       │   │   │   └── three_images.docx
│       │   │   ├── test_models.py
│       │   │   ├── timeout.py
│       │   │   └── vespa.py
│       │   ├── conftest.py
│       │   ├── connector_job_tests/
│       │   │   ├── github/
│       │   │   │   ├── conftest.py
│       │   │   │   ├── test_github_permission_sync.py
│       │   │   │   └── utils.py
│       │   │   ├── google/
│       │   │   │   ├── google_drive_api_utils.py
│       │   │   │   └── test_google_drive_permission_sync.py
│       │   │   ├── jira/
│       │   │   │   ├── conftest.py
│       │   │   │   └── test_jira_permission_sync_full.py
│       │   │   ├── sharepoint/
│       │   │   │   ├── conftest.py
│       │   │   │   └── test_sharepoint_permissions.py
│       │   │   └── slack/
│       │   │       ├── conftest.py
│       │   │       ├── slack_api_utils.py
│       │   │       ├── test_permission_sync.py
│       │   │       └── test_prune.py
│       │   ├── mock_services/
│       │   │   ├── docker-compose.mock-it-services.yml
│       │   │   ├── mcp_test_server/
│       │   │   │   ├── run_mcp_server_api_key.py
│       │   │   │   ├── run_mcp_server_google_oauth.py
│       │   │   │   ├── run_mcp_server_no_auth.py
│       │   │   │   ├── run_mcp_server_oauth.py
│       │   │   │   └── run_mcp_server_per_user_key.py
│       │   │   └── mock_connector_server/
│       │   │       ├── Dockerfile
│       │   │       └── main.py
│       │   ├── multitenant_tests/
│       │   │   ├── discord_bot/
│       │   │   │   └── test_discord_bot_multitenant.py
│       │   │   ├── invitation/
│       │   │   │   └── test_user_invitation.py
│       │   │   ├── migrations/
│       │   │   │   └── test_run_multitenant_migrations.py
│       │   │   ├── syncing/
│       │   │   │   └── test_search_permissions.py
│       │   │   ├── tenants/
│       │   │   │   ├── test_tenant_creation.py
│       │   │   │   └── test_tenant_provisioning_rollback.py
│       │   │   └── test_get_schemas_needing_migration.py
│       │   └── tests/
│       │       ├── anonymous_user/
│       │       │   └── test_anonymous_user.py
│       │       ├── api_key/
│       │       │   └── test_api_key.py
│       │       ├── auth/
│       │       │   └── test_saml_user_conversion.py
│       │       ├── chat/
│       │       │   ├── test_chat_deletion.py
│       │       │   └── test_chat_session_access.py
│       │       ├── chat_retention/
│       │       │   └── test_chat_retention.py
│       │       ├── code_interpreter/
│       │       │   ├── conftest.py
│       │       │   └── test_code_interpreter_api.py
│       │       ├── connector/
│       │       │   ├── test_connector_creation.py
│       │       │   ├── test_connector_deletion.py
│       │       │   └── test_last_indexed_time.py
│       │       ├── discord_bot/
│       │       │   ├── test_discord_bot_api.py
│       │       │   └── test_discord_bot_db.py
│       │       ├── document_set/
│       │       │   └── test_syncing.py
│       │       ├── image_generation/
│       │       │   ├── test_image_generation_config.py
│       │       │   └── test_image_generation_tool_visibility.py
│       │       ├── image_indexing/
│       │       │   └── test_indexing_images.py
│       │       ├── index_attempt/
│       │       │   └── test_index_attempt_pagination.py
│       │       ├── indexing/
│       │       │   ├── conftest.py
│       │       │   ├── file_connector/
│       │       │   │   ├── test_file_connector_zip_metadata.py
│       │       │   │   └── test_files/
│       │       │   │       ├── .onyx_metadata.json
│       │       │   │       ├── sample1.txt
│       │       │   │       └── sample2.txt
│       │       │   ├── test_checkpointing.py
│       │       │   ├── test_initial_permission_sync.py
│       │       │   ├── test_polling.py
│       │       │   └── test_repeated_error_state.py
│       │       ├── ingestion/
│       │       │   └── test_ingestion_api.py
│       │       ├── kg/
│       │       │   └── test_kg_api.py
│       │       ├── llm_auto_update/
│       │       │   └── test_auto_llm_update.py
│       │       ├── llm_provider/
│       │       │   ├── test_llm_provider.py
│       │       │   ├── test_llm_provider_access_control.py
│       │       │   └── test_llm_provider_persona_access.py
│       │       ├── llm_workflows/
│       │       │   ├── test_mock_llm_tool_calls.py
│       │       │   ├── test_nightly_provider_chat_workflow.py
│       │       │   └── test_tool_policy_enforcement.py
│       │       ├── mcp/
│       │       │   ├── test_mcp_client_no_auth_flow.py
│       │       │   ├── test_mcp_server_auth.py
│       │       │   └── test_mcp_server_search.py
│       │       ├── migrations/
│       │       │   ├── conftest.py
│       │       │   ├── test_alembic_main.py
│       │       │   ├── test_alembic_tenants.py
│       │       │   ├── test_assistant_consolidation_migration.py
│       │       │   ├── test_migrations.py
│       │       │   └── test_tool_seeding.py
│       │       ├── no_vectordb/
│       │       │   ├── conftest.py
│       │       │   ├── test_no_vectordb_chat.py
│       │       │   ├── test_no_vectordb_endpoints.py
│       │       │   └── test_no_vectordb_file_lifecycle.py
│       │       ├── opensearch_migration/
│       │       │   └── test_opensearch_migration_api.py
│       │       ├── pat/
│       │       │   └── test_pat_api.py
│       │       ├── permissions/
│       │       │   ├── test_auth_permission_propagation.py
│       │       │   ├── test_cc_pair_permissions.py
│       │       │   ├── test_connector_permissions.py
│       │       │   ├── test_credential_permissions.py
│       │       │   ├── test_doc_set_permissions.py
│       │       │   ├── test_file_connector_permissions.py
│       │       │   ├── test_persona_permissions.py
│       │       │   ├── test_user_file_permissions.py
│       │       │   ├── test_user_role_permissions.py
│       │       │   └── test_whole_curator_flow.py
│       │       ├── personalization/
│       │       │   └── test_personalization_flow.py
│       │       ├── personas/
│       │       │   ├── test_persona_categories.py
│       │       │   ├── test_persona_creation.py
│       │       │   ├── test_persona_file_context.py
│       │       │   ├── test_persona_label_updates.py
│       │       │   ├── test_persona_pagination.py
│       │       │   └── test_unified_assistant.py
│       │       ├── projects/
│       │       │   └── test_projects.py
│       │       ├── pruning/
│       │       │   ├── test_pruning.py
│       │       │   └── website/
│       │       │       ├── about.html
│       │       │       ├── contact.html
│       │       │       ├── courses.html
│       │       │       ├── css/
│       │       │       │   ├── animate.css
│       │       │       │   ├── custom-fonts.css
│       │       │       │   ├── fancybox/
│       │       │       │   │   └── jquery.fancybox.css
│       │       │       │   ├── font-awesome.css
│       │       │       │   └── style.css
│       │       │       ├── fonts/
│       │       │       │   └── fontawesome.otf
│       │       │       ├── index.html
│       │       │       ├── js/
│       │       │       │   ├── animate.js
│       │       │       │   ├── custom.js
│       │       │       │   ├── flexslider/
│       │       │       │   │   ├── jquery.flexslider.js
│       │       │       │   │   └── setting.js
│       │       │       │   ├── google-code-prettify/
│       │       │       │   │   ├── prettify.css
│       │       │       │   │   └── prettify.js
│       │       │       │   ├── jquery.easing.1.3.js
│       │       │       │   ├── jquery.fancybox-media.js
│       │       │       │   ├── jquery.fancybox.pack.js
│       │       │       │   ├── jquery.flexslider.js
│       │       │       │   ├── jquery.js
│       │       │       │   ├── portfolio/
│       │       │       │   │   ├── jquery.quicksand.js
│       │       │       │   │   └── setting.js
│       │       │       │   ├── quicksand/
│       │       │       │   │   ├── jquery.quicksand.js
│       │       │       │   │   └── setting.js
│       │       │       │   └── validate.js
│       │       │       ├── portfolio.html
│       │       │       ├── pricing.html
│       │       │       └── readme.txt
│       │       ├── query_history/
│       │       │   ├── test_query_history.py
│       │       │   ├── test_query_history_pagination.py
│       │       │   ├── test_usage_reports.py
│       │       │   └── utils.py
│       │       ├── reporting/
│       │       │   └── test_usage_export_api.py
│       │       ├── scim/
│       │       │   ├── test_scim_groups.py
│       │       │   ├── test_scim_tokens.py
│       │       │   └── test_scim_users.py
│       │       ├── search_settings/
│       │       │   └── test_search_settings.py
│       │       ├── streaming_endpoints/
│       │       │   ├── test_chat_file_attachment.py
│       │       │   └── test_chat_stream.py
│       │       ├── tags/
│       │       │   └── test_tags.py
│       │       ├── tools/
│       │       │   ├── test_force_tool_use.py
│       │       │   └── test_image_generation_streaming.py
│       │       ├── usergroup/
│       │       │   ├── test_add_users_to_group.py
│       │       │   ├── test_group_membership_updates_user_permissions.py
│       │       │   ├── test_new_group_gets_basic_permission.py
│       │       │   ├── test_user_group_deletion.py
│       │       │   └── test_usergroup_syncing.py
│       │       ├── users/
│       │       │   ├── test_default_group_assignment.py
│       │       │   ├── test_password_signup_upgrade.py
│       │       │   ├── test_reactivation_groups.py
│       │       │   ├── test_seat_limit.py
│       │       │   ├── test_slack_user_deactivation.py
│       │       │   └── test_user_pagination.py
│       │       └── web_search/
│       │           └── test_web_search_api.py
│       ├── load_env_vars.py
│       ├── regression/
│       │   ├── answer_quality/
│       │   │   ├── README.md
│       │   │   ├── __init__.py
│       │   │   ├── api_utils.py
│       │   │   ├── cli_utils.py
│       │   │   ├── file_uploader.py
│       │   │   ├── launch_eval_env.py
│       │   │   └── search_test_config.yaml.template
│       │   └── search_quality/
│       │       ├── README.md
│       │       ├── models.py
│       │       ├── run_search_eval.py
│       │       ├── test_queries.json.template
│       │       └── utils.py
│       └── unit/
│           ├── __init__.py
│           ├── build/
│           │   └── test_rewrite_asset_paths.py
│           ├── ee/
│           │   ├── conftest.py
│           │   └── onyx/
│           │       ├── db/
│           │       │   ├── test_license.py
│           │       │   └── test_user_group_rename.py
│           │       ├── external_permissions/
│           │       │   ├── salesforce/
│           │       │   │   └── test_postprocessing.py
│           │       │   └── sharepoint/
│           │       │       └── test_permission_utils.py
│           │       ├── hooks/
│           │       │   ├── __init__.py
│           │       │   └── test_executor.py
│           │       ├── server/
│           │       │   ├── __init__.py
│           │       │   ├── billing/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── conftest.py
│           │       │   │   ├── test_billing_api.py
│           │       │   │   ├── test_billing_service.py
│           │       │   │   └── test_proxy.py
│           │       │   ├── features/
│           │       │   │   ├── __init__.py
│           │       │   │   └── hooks/
│           │       │   │       ├── __init__.py
│           │       │   │       └── test_api.py
│           │       │   ├── license/
│           │       │   │   └── test_api.py
│           │       │   ├── middleware/
│           │       │   │   └── test_license_enforcement.py
│           │       │   ├── settings/
│           │       │   │   └── test_license_enforcement_settings.py
│           │       │   └── tenants/
│           │       │       ├── test_billing_api.py
│           │       │       ├── test_product_gating.py
│           │       │       ├── test_proxy.py
│           │       │       └── test_schema_management.py
│           │       └── utils/
│           │           ├── test_encryption.py
│           │           └── test_license_utils.py
│           ├── federated_connector/
│           │   └── slack/
│           │       └── test_slack_federated_connnector.py
│           ├── file_store/
│           │   ├── test_file_store.py
│           │   └── test_postgres_file_store.py
│           ├── model_server/
│           │   └── test_embedding.py
│           ├── onyx/
│           │   ├── __init__.py
│           │   ├── access/
│           │   │   └── test_user_file_access.py
│           │   ├── auth/
│           │   │   ├── conftest.py
│           │   │   ├── test_disposable_email_validator.py
│           │   │   ├── test_email.py
│           │   │   ├── test_is_same_origin.py
│           │   │   ├── test_jwt_provisioning.py
│           │   │   ├── test_oauth_refresher.py
│           │   │   ├── test_oidc_pkce.py
│           │   │   ├── test_permissions.py
│           │   │   ├── test_single_tenant_jwt_strategy.py
│           │   │   ├── test_user_create_schema.py
│           │   │   ├── test_user_default_pins.py
│           │   │   ├── test_user_registration.py
│           │   │   ├── test_verify_auth_setting.py
│           │   │   ├── test_verify_email_domain.py
│           │   │   └── test_verify_email_invite.py
│           │   ├── background/
│           │   │   └── celery/
│           │   │       ├── tasks/
│           │   │       │   ├── tenant_provisioning/
│           │   │       │   │   ├── __init__.py
│           │   │       │   │   └── test_check_available_tenants.py
│           │   │       │   ├── test_hierarchyfetching_queue.py
│           │   │       │   ├── test_user_file_impl_redis_locking.py
│           │   │       │   ├── test_user_file_processing_no_vectordb.py
│           │   │       │   └── test_user_file_project_sync_queue.py
│           │   │       └── test_celery_redis.py
│           │   ├── chat/
│           │   │   ├── test_argument_delta_streaming.py
│           │   │   ├── test_chat_utils.py
│           │   │   ├── test_citation_processor.py
│           │   │   ├── test_citation_utils.py
│           │   │   ├── test_compression.py
│           │   │   ├── test_context_files.py
│           │   │   ├── test_emitter.py
│           │   │   ├── test_llm_loop.py
│           │   │   ├── test_llm_step.py
│           │   │   ├── test_multi_model_streaming.py
│           │   │   ├── test_multi_model_types.py
│           │   │   ├── test_process_message.py
│           │   │   ├── test_process_message_mock_llm.py
│           │   │   ├── test_save_chat.py
│           │   │   └── test_stop_signal_checker.py
│           │   ├── connectors/
│           │   │   ├── airtable/
│           │   │   │   └── test_airtable_index_all.py
│           │   │   ├── asana/
│           │   │   │   └── test_asana_connector.py
│           │   │   ├── canvas/
│           │   │   │   └── test_canvas_connector.py
│           │   │   ├── confluence/
│           │   │   │   ├── test_confluence_checkpointing.py
│           │   │   │   ├── test_onyx_confluence.py
│           │   │   │   └── test_rate_limit_handler.py
│           │   │   ├── cross_connector_utils/
│           │   │   │   ├── test_html_utils.py
│           │   │   │   ├── test_rate_limit.py
│           │   │   │   └── test_table.html
│           │   │   ├── discord/
│           │   │   │   └── test_discord_validation.py
│           │   │   ├── github/
│           │   │   │   └── test_github_checkpointing.py
│           │   │   ├── gmail/
│           │   │   │   ├── test_connector.py
│           │   │   │   └── thread.json
│           │   │   ├── google_utils/
│           │   │   │   └── test_rate_limit_detection.py
│           │   │   ├── jira/
│           │   │   │   ├── conftest.py
│           │   │   │   ├── test_jira_bulk_fetch.py
│           │   │   │   ├── test_jira_checkpointing.py
│           │   │   │   ├── test_jira_error_handling.py
│           │   │   │   ├── test_jira_large_ticket_handling.py
│           │   │   │   └── test_jira_permission_sync.py
│           │   │   ├── mediawiki/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_mediawiki_family.py
│           │   │   │   └── test_wiki.py
│           │   │   ├── notion/
│           │   │   │   └── test_notion_datasource.py
│           │   │   ├── salesforce/
│           │   │   │   ├── test_salesforce_custom_config.py
│           │   │   │   ├── test_salesforce_sqlite.py
│           │   │   │   └── test_yield_doc_batches.py
│           │   │   ├── sharepoint/
│           │   │   │   ├── test_delta_checkpointing.py
│           │   │   │   ├── test_denylist.py
│           │   │   │   ├── test_drive_matching.py
│           │   │   │   ├── test_fetch_site_pages.py
│           │   │   │   ├── test_hierarchy_helpers.py
│           │   │   │   ├── test_rest_client_context_caching.py
│           │   │   │   └── test_url_parsing.py
│           │   │   ├── slab/
│           │   │   │   └── test_slab_validation.py
│           │   │   ├── slack/
│           │   │   │   └── test_message_filtering.py
│           │   │   ├── teams/
│           │   │   │   └── test_collect_teams.py
│           │   │   ├── test_connector_factory.py
│           │   │   ├── test_document_metadata_coercion.py
│           │   │   ├── test_microsoft_graph_env.py
│           │   │   ├── utils.py
│           │   │   └── zendesk/
│           │   │       ├── test_zendesk_checkpointing.py
│           │   │       └── test_zendesk_rate_limit.py
│           │   ├── context/
│           │   │   └── search/
│           │   │       └── federated/
│           │   │           ├── test_slack_query_construction.py
│           │   │           └── test_slack_thread_context.py
│           │   ├── db/
│           │   │   ├── __init__.py
│           │   │   ├── conftest.py
│           │   │   ├── test_assign_default_groups.py
│           │   │   ├── test_chat_sessions.py
│           │   │   ├── test_dal.py
│           │   │   ├── test_delete_user.py
│           │   │   ├── test_llm_sync.py
│           │   │   ├── test_persona_display_priority.py
│           │   │   ├── test_projects_upload_task_expiry.py
│           │   │   ├── test_scim_dal.py
│           │   │   ├── test_tools.py
│           │   │   ├── test_usage.py
│           │   │   └── test_voice.py
│           │   ├── document_index/
│           │   │   ├── opensearch/
│           │   │   │   ├── test_get_doc_chunk_id.py
│           │   │   │   └── test_opensearch_batch_flush.py
│           │   │   ├── test_disabled_document_index.py
│           │   │   └── vespa/
│           │   │       ├── shared_utils/
│           │   │       │   └── test_utils.py
│           │   │       └── test_vespa_batch_flush.py
│           │   ├── error_handling/
│           │   │   ├── __init__.py
│           │   │   └── test_exceptions.py
│           │   ├── federated_connectors/
│           │   │   ├── test_federated_connector_factory.py
│           │   │   └── test_oauth_utils.py
│           │   ├── file_processing/
│           │   │   ├── __init__.py
│           │   │   ├── test_image_summarization_errors.py
│           │   │   ├── test_image_summarization_litellm_errors.py
│           │   │   ├── test_pdf.py
│           │   │   └── test_xlsx_to_text.py
│           │   ├── hooks/
│           │   │   ├── __init__.py
│           │   │   ├── test_api_dependencies.py
│           │   │   ├── test_base_spec.py
│           │   │   ├── test_models.py
│           │   │   ├── test_query_processing_spec.py
│           │   │   └── test_registry.py
│           │   ├── image_gen/
│           │   │   └── test_provider_building.py
│           │   ├── indexing/
│           │   │   ├── conftest.py
│           │   │   ├── test_censoring.py
│           │   │   ├── test_chunker.py
│           │   │   ├── test_embed_chunks_in_batches.py
│           │   │   ├── test_embedder.py
│           │   │   ├── test_indexing_pipeline.py
│           │   │   ├── test_personas_in_chunks.py
│           │   │   └── test_vespa.py
│           │   ├── lazy_handling/
│           │   │   └── __init__.py
│           │   ├── llm/
│           │   │   ├── conftest.py
│           │   │   ├── test_bedrock_token_limit.py
│           │   │   ├── test_factory.py
│           │   │   ├── test_formatting_reenabled.py
│           │   │   ├── test_litellm_monkey_patches.py
│           │   │   ├── test_llm_provider_options.py
│           │   │   ├── test_model_is_reasoning.py
│           │   │   ├── test_model_map.py
│           │   │   ├── test_model_name_parser.py
│           │   │   ├── test_model_response.py
│           │   │   ├── test_multi_llm.py
│           │   │   ├── test_reasoning_effort_mapping.py
│           │   │   ├── test_request_context.py
│           │   │   ├── test_true_openai_model.py
│           │   │   └── test_vision_model_selection_logging.py
│           │   ├── natural_language_processing/
│           │   │   └── test_search_nlp_models.py
│           │   ├── onyxbot/
│           │   │   ├── discord/
│           │   │   │   ├── conftest.py
│           │   │   │   ├── test_api_client.py
│           │   │   │   ├── test_cache_manager.py
│           │   │   │   ├── test_context_builders.py
│           │   │   │   ├── test_discord_utils.py
│           │   │   │   ├── test_message_utils.py
│           │   │   │   └── test_should_respond.py
│           │   │   ├── test_handle_regular_answer.py
│           │   │   ├── test_slack_blocks.py
│           │   │   ├── test_slack_channel_config.py
│           │   │   ├── test_slack_formatting.py
│           │   │   └── test_slack_gating.py
│           │   ├── prompts/
│           │   │   └── test_prompt_utils.py
│           │   ├── redis_ca.pem
│           │   ├── server/
│           │   │   ├── __init__.py
│           │   │   ├── features/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── hierarchy/
│           │   │   │   │   └── test_user_access_info.py
│           │   │   │   └── hooks/
│           │   │   │       └── __init__.py
│           │   │   ├── manage/
│           │   │   │   ├── embedding/
│           │   │   │   │   └── test_embedding_api.py
│           │   │   │   ├── llm/
│           │   │   │   │   ├── test_fetch_models_api.py
│           │   │   │   │   └── test_llm_provider_utils.py
│           │   │   │   ├── test_bulk_invite_limit.py
│           │   │   │   └── voice/
│           │   │   │       └── test_voice_api_validation.py
│           │   │   ├── scim/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── conftest.py
│           │   │   │   ├── test_admin.py
│           │   │   │   ├── test_auth.py
│           │   │   │   ├── test_entra.py
│           │   │   │   ├── test_filtering.py
│           │   │   │   ├── test_group_endpoints.py
│           │   │   │   ├── test_patch.py
│           │   │   │   ├── test_providers.py
│           │   │   │   └── test_user_endpoints.py
│           │   │   ├── test_full_user_snapshot.py
│           │   │   ├── test_pool_metrics.py
│           │   │   ├── test_projects_file_utils.py
│           │   │   ├── test_prometheus_instrumentation.py
│           │   │   ├── test_settings_store.py
│           │   │   └── test_upload_files.py
│           │   ├── test_redis.py
│           │   ├── test_startup_validation.py
│           │   ├── tools/
│           │   │   ├── __init__.py
│           │   │   ├── custom/
│           │   │   │   └── test_custom_tools.py
│           │   │   ├── test_construct_tools_no_vectordb.py
│           │   │   ├── test_file_reader_tool.py
│           │   │   ├── test_no_vectordb.py
│           │   │   ├── test_python_tool_availability.py
│           │   │   ├── test_search_utils.py
│           │   │   ├── test_tool_runner.py
│           │   │   ├── test_tool_runner_chat_files.py
│           │   │   ├── test_tool_utils.py
│           │   │   └── tool_implementations/
│           │   │       ├── open_url/
│           │   │       │   ├── data/
│           │   │       │   │   └── test_snippet_finding_data.json
│           │   │       │   ├── test_onyx_web_crawler.py
│           │   │       │   ├── test_snippet_matcher.py
│           │   │       │   └── test_url_normalization.py
│           │   │       ├── python/
│           │   │       │   ├── __init__.py
│           │   │       │   ├── test_code_interpreter_client.py
│           │   │       │   └── test_python_tool_upload_cache.py
│           │   │       └── websearch/
│           │   │           ├── data/
│           │   │           │   └── tartan.txt
│           │   │           ├── test_brave_client.py
│           │   │           ├── test_web_search_providers.py
│           │   │           ├── test_web_search_tool_run.py
│           │   │           └── test_websearch_utils.py
│           │   ├── tracing/
│           │   │   ├── __init__.py
│           │   │   └── test_tracing_setup.py
│           │   ├── utils/
│           │   │   ├── test_gpu_utils.py
│           │   │   ├── test_json_river.py
│           │   │   ├── test_postgres_sanitization.py
│           │   │   ├── test_sensitive.py
│           │   │   ├── test_sensitive_typing.py
│           │   │   ├── test_telemetry.py
│           │   │   ├── test_threadpool_concurrency.py
│           │   │   ├── test_threadpool_contextvars.py
│           │   │   ├── test_url_ssrf.py
│           │   │   ├── test_vespa_query.py
│           │   │   └── test_vespa_tasks.py
│           │   └── voice/
│           │       └── providers/
│           │           ├── test_azure_provider.py
│           │           ├── test_azure_ssml.py
│           │           ├── test_elevenlabs_provider.py
│           │           └── test_openai_provider.py
│           ├── scripts/
│           │   └── __init__.py
│           ├── server/
│           │   └── metrics/
│           │       ├── test_celery_task_metrics.py
│           │       ├── test_indexing_pipeline_collectors.py
│           │       ├── test_indexing_pipeline_setup.py
│           │       ├── test_indexing_task_metrics.py
│           │       ├── test_metrics_server.py
│           │       ├── test_opensearch_search_metrics.py
│           │       └── test_worker_health.py
│           └── tools/
│               ├── __init__.py
│               └── test_memory_tool_packets.py
├── contributor_ip_assignment/
│   └── EE_Contributor_IP_Assignment_Agreement.md
├── ct.yaml
├── cubic.yaml
├── deployment/
│   ├── .gitignore
│   ├── README.md
│   ├── aws_ecs_fargate/
│   │   └── cloudformation/
│   │       ├── README.md
│   │       ├── deploy.sh
│   │       ├── onyx_acm_template.yaml
│   │       ├── onyx_cluster_template.yaml
│   │       ├── onyx_config.jsonl
│   │       ├── onyx_efs_template.yaml
│   │       ├── services/
│   │       │   ├── onyx_backend_api_server_service_template.yaml
│   │       │   ├── onyx_backend_background_server_service_template.yaml
│   │       │   ├── onyx_model_server_indexing_service_template.yaml
│   │       │   ├── onyx_model_server_inference_service_template.yaml
│   │       │   ├── onyx_nginx_service_template.yaml
│   │       │   ├── onyx_postgres_service_template.yaml
│   │       │   ├── onyx_redis_service_template.yaml
│   │       │   ├── onyx_vespaengine_service_template.yaml
│   │       │   └── onyx_web_server_service_template.yaml
│   │       └── uninstall.sh
│   ├── data/
│   │   └── nginx/
│   │       ├── app.conf.template
│   │       ├── app.conf.template.no-letsencrypt
│   │       ├── app.conf.template.prod
│   │       ├── mcp.conf.inc.template
│   │       ├── mcp_upstream.conf.inc.template
│   │       └── run-nginx.sh
│   ├── docker_compose/
│   │   ├── README.md
│   │   ├── docker-compose.dev.yml
│   │   ├── docker-compose.mcp-api-key-test.yml
│   │   ├── docker-compose.mcp-oauth-test.yml
│   │   ├── docker-compose.multitenant-dev.yml
│   │   ├── docker-compose.onyx-lite.yml
│   │   ├── docker-compose.prod-cloud.yml
│   │   ├── docker-compose.prod-no-letsencrypt.yml
│   │   ├── docker-compose.prod.yml
│   │   ├── docker-compose.resources.yml
│   │   ├── docker-compose.search-testing.yml
│   │   ├── docker-compose.yml
│   │   ├── env.nginx.template
│   │   ├── env.prod.template
│   │   ├── env.template
│   │   ├── init-letsencrypt.sh
│   │   ├── install.ps1
│   │   └── install.sh
│   ├── helm/
│   │   ├── README.md
│   │   └── charts/
│   │       └── onyx/
│   │           ├── .gitignore
│   │           ├── .helmignore
│   │           ├── Chart.yaml
│   │           ├── ci/
│   │           │   └── ct-values.yaml
│   │           ├── dashboards/
│   │           │   └── indexing-pipeline.json
│   │           ├── templates/
│   │           │   ├── _helpers.tpl
│   │           │   ├── api-deployment.yaml
│   │           │   ├── api-hpa.yaml
│   │           │   ├── api-scaledobject.yaml
│   │           │   ├── api-service.yaml
│   │           │   ├── auth-secrets.yaml
│   │           │   ├── celery-beat.yaml
│   │           │   ├── celery-worker-docfetching-hpa.yaml
│   │           │   ├── celery-worker-docfetching-metrics-service.yaml
│   │           │   ├── celery-worker-docfetching-scaledobject.yaml
│   │           │   ├── celery-worker-docfetching.yaml
│   │           │   ├── celery-worker-docprocessing-hpa.yaml
│   │           │   ├── celery-worker-docprocessing-metrics-service.yaml
│   │           │   ├── celery-worker-docprocessing-scaledobject.yaml
│   │           │   ├── celery-worker-docprocessing.yaml
│   │           │   ├── celery-worker-heavy-hpa.yaml
│   │           │   ├── celery-worker-heavy-scaledobject.yaml
│   │           │   ├── celery-worker-heavy.yaml
│   │           │   ├── celery-worker-light-hpa.yaml
│   │           │   ├── celery-worker-light-scaledobject.yaml
│   │           │   ├── celery-worker-light.yaml
│   │           │   ├── celery-worker-monitoring-hpa.yaml
│   │           │   ├── celery-worker-monitoring-metrics-service.yaml
│   │           │   ├── celery-worker-monitoring-scaledobject.yaml
│   │           │   ├── celery-worker-monitoring.yaml
│   │           │   ├── celery-worker-primary-hpa.yaml
│   │           │   ├── celery-worker-primary-scaledobject.yaml
│   │           │   ├── celery-worker-primary.yaml
│   │           │   ├── celery-worker-servicemonitors.yaml
│   │           │   ├── celery-worker-user-file-processing-hpa.yaml
│   │           │   ├── celery-worker-user-file-processing-scaledobject.yaml
│   │           │   ├── celery-worker-user-file-processing.yaml
│   │           │   ├── configmap.yaml
│   │           │   ├── discordbot.yaml
│   │           │   ├── grafana-dashboards.yaml
│   │           │   ├── indexing-model-deployment.yaml
│   │           │   ├── indexing-model-service.yaml
│   │           │   ├── inference-model-deployment.yaml
│   │           │   ├── inference-model-service.yaml
│   │           │   ├── ingress-api.yaml
│   │           │   ├── ingress-mcp.yaml
│   │           │   ├── ingress-webserver.yaml
│   │           │   ├── lets-encrypt.yaml
│   │           │   ├── mcp-server-deployment.yaml
│   │           │   ├── mcp-server-service.yaml
│   │           │   ├── nginx-conf.yaml
│   │           │   ├── serviceaccount.yaml
│   │           │   ├── slackbot.yaml
│   │           │   ├── tests/
│   │           │   │   └── test-connection.yaml
│   │           │   ├── tooling-pginto-configmap.yaml
│   │           │   ├── webserver-deployment.yaml
│   │           │   ├── webserver-hpa.yaml
│   │           │   ├── webserver-scaledobject.yaml
│   │           │   └── webserver-service.yaml
│   │           ├── templates_disabled/
│   │           │   ├── background-deployment.yaml
│   │           │   ├── background-hpa.yaml
│   │           │   └── onyx-secret.yaml
│   │           ├── values-lite.yaml
│   │           └── values.yaml
│   └── terraform/
│       └── modules/
│           └── aws/
│               ├── README.md
│               ├── eks/
│               │   ├── main.tf
│               │   ├── outputs.tf
│               │   └── variables.tf
│               ├── onyx/
│               │   ├── main.tf
│               │   ├── outputs.tf
│               │   ├── variables.tf
│               │   └── versions.tf
│               ├── opensearch/
│               │   ├── main.tf
│               │   ├── outputs.tf
│               │   └── variables.tf
│               ├── postgres/
│               │   ├── main.tf
│               │   ├── outputs.tf
│               │   └── variables.tf
│               ├── redis/
│               │   ├── main.tf
│               │   ├── outputs.tf
│               │   └── variables.tf
│               ├── s3/
│               │   ├── main.tf
│               │   └── variables.tf
│               ├── vpc/
│               │   ├── main.tf
│               │   ├── outputs.tf
│               │   └── variables.tf
│               └── waf/
│                   ├── main.tf
│                   ├── outputs.tf
│                   └── variables.tf
├── desktop/
│   ├── .gitignore
│   ├── README.md
│   ├── package.json
│   ├── scripts/
│   │   └── generate-icons.sh
│   ├── src/
│   │   ├── index.html
│   │   └── titlebar.js
│   └── src-tauri/
│       ├── Cargo.toml
│       ├── build.rs
│       ├── gen/
│       │   └── schemas/
│       │       ├── acl-manifests.json
│       │       ├── capabilities.json
│       │       ├── desktop-schema.json
│       │       └── macOS-schema.json
│       ├── icons/
│       │   ├── android/
│       │   │   ├── mipmap-anydpi-v26/
│       │   │   │   └── ic_launcher.xml
│       │   │   └── values/
│       │   │       └── ic_launcher_background.xml
│       │   └── icon.icns
│       ├── src/
│       │   └── main.rs
│       └── tauri.conf.json
├── docker-bake.hcl
├── docs/
│   └── METRICS.md
├── examples/
│   ├── assistants-api/
│   │   └── topics_analyzer.py
│   └── widget/
│       ├── .eslintrc.json
│       ├── .gitignore
│       ├── README.md
│       ├── next.config.mjs
│       ├── package.json
│       ├── postcss.config.mjs
│       ├── src/
│       │   └── app/
│       │       ├── globals.css
│       │       ├── layout.tsx
│       │       ├── page.tsx
│       │       └── widget/
│       │           └── Widget.tsx
│       ├── tailwind.config.ts
│       └── tsconfig.json
├── extensions/
│   └── chrome/
│       ├── LICENSE
│       ├── README.md
│       ├── manifest.json
│       ├── service_worker.js
│       └── src/
│           ├── pages/
│           │   ├── onyx_home.html
│           │   ├── onyx_home.js
│           │   ├── options.html
│           │   ├── options.js
│           │   ├── panel.html
│           │   ├── panel.js
│           │   ├── popup.html
│           │   ├── popup.js
│           │   ├── welcome.html
│           │   └── welcome.js
│           ├── styles/
│           │   ├── selection-icon.css
│           │   └── shared.css
│           └── utils/
│               ├── constants.js
│               ├── content.js
│               ├── error-modal.js
│               ├── selection-icon.js
│               └── storage.js
├── profiling/
│   └── grafana/
│       └── dashboards/
│           └── onyx/
│               └── opensearch-search-latency.json
├── pyproject.toml
├── web/
│   ├── .dockerignore
│   ├── .eslintrc.json
│   ├── .gitignore
│   ├── .prettierignore
│   ├── .prettierrc.json
│   ├── .storybook/
│   │   ├── Introduction.mdx
│   │   ├── README.md
│   │   ├── main.ts
│   │   ├── mocks/
│   │   │   ├── next-image.tsx
│   │   │   ├── next-link.tsx
│   │   │   └── next-navigation.tsx
│   │   ├── preview-head.html
│   │   └── preview.ts
│   ├── @types/
│   │   ├── favicon-fetch.d.ts
│   │   └── images.d.ts
│   ├── AGENTS.md
│   ├── Dockerfile
│   ├── README.md
│   ├── components.json
│   ├── jest.config.js
│   ├── lib/
│   │   └── opal/
│   │       ├── README.md
│   │       ├── package.json
│   │       ├── scripts/
│   │       │   ├── README.md
│   │       │   ├── convert-svg.sh
│   │       │   └── icon-template.js
│   │       ├── src/
│   │       │   ├── components/
│   │       │   │   ├── README.md
│   │       │   │   ├── buttons/
│   │       │   │   │   ├── Button/
│   │       │   │   │   │   └── Button.stories.tsx
│   │       │   │   │   ├── button/
│   │       │   │   │   │   ├── README.md
│   │       │   │   │   │   └── components.tsx
│   │       │   │   │   ├── chevron.css
│   │       │   │   │   ├── chevron.tsx
│   │       │   │   │   ├── filter-button/
│   │       │   │   │   │   ├── FilterButton.stories.tsx
│   │       │   │   │   │   ├── README.md
│   │       │   │   │   │   └── components.tsx
│   │       │   │   │   ├── icon-wrapper.tsx
│   │       │   │   │   ├── line-item-button/
│   │       │   │   │   │   ├── README.md
│   │       │   │   │   │   └── components.tsx
│   │       │   │   │   ├── open-button/
│   │       │   │   │   │   ├── OpenButton.stories.tsx
│   │       │   │   │   │   ├── README.md
│   │       │   │   │   │   └── components.tsx
│   │       │   │   │   ├── select-button/
│   │       │   │   │   │   ├── README.md
│   │       │   │   │   │   ├── components.tsx
│   │       │   │   │   │   └── styles.css
│   │       │   │   │   └── sidebar-tab/
│   │       │   │   │       ├── README.md
│   │       │   │   │       ├── SidebarTab.stories.tsx
│   │       │   │   │       └── components.tsx
│   │       │   │   ├── cards/
│   │       │   │   │   ├── card/
│   │       │   │   │   │   ├── Card.stories.tsx
│   │       │   │   │   │   ├── README.md
│   │       │   │   │   │   ├── components.tsx
│   │       │   │   │   │   └── styles.css
│   │       │   │   │   ├── empty-message-card/
│   │       │   │   │   │   ├── EmptyMessageCard.stories.tsx
│   │       │   │   │   │   ├── README.md
│   │       │   │   │   │   └── components.tsx
│   │       │   │   │   └── select-card/
│   │       │   │   │       ├── README.md
│   │       │   │   │       ├── SelectCard.stories.tsx
│   │       │   │   │       ├── components.tsx
│   │       │   │   │       └── styles.css
│   │       │   │   ├── index.ts
│   │       │   │   ├── pagination/
│   │       │   │   │   ├── Pagination.stories.tsx
│   │       │   │   │   ├── README.md
│   │       │   │   │   └── components.tsx
│   │       │   │   ├── table/
│   │       │   │   │   ├── ActionsContainer.tsx
│   │       │   │   │   ├── ColumnSortabilityPopover.tsx
│   │       │   │   │   ├── ColumnVisibilityPopover.tsx
│   │       │   │   │   ├── DragOverlayRow.tsx
│   │       │   │   │   ├── Footer.tsx
│   │       │   │   │   ├── QualifierContainer.tsx
│   │       │   │   │   ├── README.md
│   │       │   │   │   ├── Table.stories.tsx
│   │       │   │   │   ├── TableBody.tsx
│   │       │   │   │   ├── TableCell.tsx
│   │       │   │   │   ├── TableElement.tsx
│   │       │   │   │   ├── TableHead.tsx
│   │       │   │   │   ├── TableHeader.tsx
│   │       │   │   │   ├── TableQualifier.tsx
│   │       │   │   │   ├── TableRow.tsx
│   │       │   │   │   ├── TableSizeContext.tsx
│   │       │   │   │   ├── columns.ts
│   │       │   │   │   ├── components.tsx
│   │       │   │   │   ├── hooks/
│   │       │   │   │   │   ├── useColumnWidths.ts
│   │       │   │   │   │   ├── useDataTable.ts
│   │       │   │   │   │   └── useDraggableRows.ts
│   │       │   │   │   ├── styles.css
│   │       │   │   │   └── types.ts
│   │       │   │   ├── tag/
│   │       │   │   │   ├── README.md
│   │       │   │   │   ├── Tag.stories.tsx
│   │       │   │   │   ├── components.tsx
│   │       │   │   │   └── styles.css
│   │       │   │   ├── text/
│   │       │   │   │   ├── InlineMarkdown.tsx
│   │       │   │   │   ├── README.md
│   │       │   │   │   ├── Text.stories.tsx
│   │       │   │   │   └── components.tsx
│   │       │   │   └── tooltip.css
│   │       │   ├── core/
│   │       │   │   ├── README.md
│   │       │   │   ├── animations/
│   │       │   │   │   ├── Hoverable.stories.tsx
│   │       │   │   │   ├── README.md
│   │       │   │   │   ├── components.tsx
│   │       │   │   │   └── styles.css
│   │       │   │   ├── disabled/
│   │       │   │   │   ├── components.tsx
│   │       │   │   │   └── styles.css
│   │       │   │   ├── index.ts
│   │       │   │   └── interactive/
│   │       │   │       ├── Interactive.stories.tsx
│   │       │   │       ├── README.md
│   │       │   │       ├── container/
│   │       │   │       │   ├── README.md
│   │       │   │       │   └── components.tsx
│   │       │   │       ├── foldable/
│   │       │   │       │   ├── README.md
│   │       │   │       │   ├── components.tsx
│   │       │   │       │   └── styles.css
│   │       │   │       ├── shared.css
│   │       │   │       ├── simple/
│   │       │   │       │   └── components.tsx
│   │       │   │       ├── stateful/
│   │       │   │       │   ├── README.md
│   │       │   │       │   ├── components.tsx
│   │       │   │       │   └── styles.css
│   │       │   │       ├── stateless/
│   │       │   │       │   ├── README.md
│   │       │   │       │   ├── components.tsx
│   │       │   │       │   └── styles.css
│   │       │   │       └── utils.ts
│   │       │   ├── icons/
│   │       │   │   ├── DiscordMono.tsx
│   │       │   │   ├── actions.tsx
│   │       │   │   ├── activity-small.tsx
│   │       │   │   ├── activity.tsx
│   │       │   │   ├── add-lines.tsx
│   │       │   │   ├── alert-circle.tsx
│   │       │   │   ├── alert-triangle.tsx
│   │       │   │   ├── arrow-down-dot.tsx
│   │       │   │   ├── arrow-exchange.tsx
│   │       │   │   ├── arrow-left-dot.tsx
│   │       │   │   ├── arrow-left.tsx
│   │       │   │   ├── arrow-right-circle.tsx
│   │       │   │   ├── arrow-right-dot.tsx
│   │       │   │   ├── arrow-right.tsx
│   │       │   │   ├── arrow-up-circle.tsx
│   │       │   │   ├── arrow-up-dot.tsx
│   │       │   │   ├── arrow-up-down.tsx
│   │       │   │   ├── arrow-up-right.tsx
│   │       │   │   ├── arrow-up.tsx
│   │       │   │   ├── arrow-wall-right.tsx
│   │       │   │   ├── audio-eq-small.tsx
│   │       │   │   ├── audio.tsx
│   │       │   │   ├── aws.tsx
│   │       │   │   ├── azure.tsx
│   │       │   │   ├── bar-chart-small.tsx
│   │       │   │   ├── bar-chart.tsx
│   │       │   │   ├── bell.tsx
│   │       │   │   ├── bifrost.tsx
│   │       │   │   ├── blocks.tsx
│   │       │   │   ├── book-open.tsx
│   │       │   │   ├── bookmark.tsx
│   │       │   │   ├── books-line-small.tsx
│   │       │   │   ├── books-stack-small.tsx
│   │       │   │   ├── bracket-curly.tsx
│   │       │   │   ├── branch.tsx
│   │       │   │   ├── bubble-text.tsx
│   │       │   │   ├── calendar.tsx
│   │       │   │   ├── check-circle.tsx
│   │       │   │   ├── check-small.tsx
│   │       │   │   ├── check-square.tsx
│   │       │   │   ├── check.tsx
│   │       │   │   ├── chevron-down-small.tsx
│   │       │   │   ├── chevron-down.tsx
│   │       │   │   ├── chevron-left.tsx
│   │       │   │   ├── chevron-right.tsx
│   │       │   │   ├── chevron-up-small.tsx
│   │       │   │   ├── chevron-up.tsx
│   │       │   │   ├── circle.tsx
│   │       │   │   ├── claude.tsx
│   │       │   │   ├── clipboard.tsx
│   │       │   │   ├── clock-hands-small.tsx
│   │       │   │   ├── clock.tsx
│   │       │   │   ├── cloud.tsx
│   │       │   │   ├── code.tsx
│   │       │   │   ├── column.tsx
│   │       │   │   ├── copy.tsx
│   │       │   │   ├── corner-right-up-dot.tsx
│   │       │   │   ├── cpu.tsx
│   │       │   │   ├── credit-card.tsx
│   │       │   │   ├── curate.tsx
│   │       │   │   ├── dashboard.tsx
│   │       │   │   ├── dev-kit.tsx
│   │       │   │   ├── download-cloud.tsx
│   │       │   │   ├── download.tsx
│   │       │   │   ├── edit-big.tsx
│   │       │   │   ├── edit.tsx
│   │       │   │   ├── empty.tsx
│   │       │   │   ├── expand.tsx
│   │       │   │   ├── external-link.tsx
│   │       │   │   ├── eye-closed.tsx
│   │       │   │   ├── eye-off.tsx
│   │       │   │   ├── eye.tsx
│   │       │   │   ├── file-braces.tsx
│   │       │   │   ├── file-broadcast.tsx
│   │       │   │   ├── file-chart-pie.tsx
│   │       │   │   ├── file-small.tsx
│   │       │   │   ├── file-text.tsx
│   │       │   │   ├── files.tsx
│   │       │   │   ├── filter-plus.tsx
│   │       │   │   ├── filter.tsx
│   │       │   │   ├── fold.tsx
│   │       │   │   ├── folder-in.tsx
│   │       │   │   ├── folder-open.tsx
│   │       │   │   ├── folder-partial-open.tsx
│   │       │   │   ├── folder-plus.tsx
│   │       │   │   ├── folder.tsx
│   │       │   │   ├── gemini.tsx
│   │       │   │   ├── globe.tsx
│   │       │   │   ├── handle.tsx
│   │       │   │   ├── hard-drive.tsx
│   │       │   │   ├── hash-small.tsx
│   │       │   │   ├── hash.tsx
│   │       │   │   ├── headset-mic.tsx
│   │       │   │   ├── history.tsx
│   │       │   │   ├── hourglass.tsx
│   │       │   │   ├── image-small.tsx
│   │       │   │   ├── image.tsx
│   │       │   │   ├── import-icon.tsx
│   │       │   │   ├── index.ts
│   │       │   │   ├── info-small.tsx
│   │       │   │   ├── info.tsx
│   │       │   │   ├── key.tsx
│   │       │   │   ├── keystroke.tsx
│   │       │   │   ├── lightbulb-simple.tsx
│   │       │   │   ├── line-chart-up.tsx
│   │       │   │   ├── link.tsx
│   │       │   │   ├── linked-dots.tsx
│   │       │   │   ├── litellm.tsx
│   │       │   │   ├── lm-studio.tsx
│   │       │   │   ├── loader.tsx
│   │       │   │   ├── lock.tsx
│   │       │   │   ├── log-out.tsx
│   │       │   │   ├── maximize-2.tsx
│   │       │   │   ├── mcp.tsx
│   │       │   │   ├── menu.tsx
│   │       │   │   ├── microphone-off.tsx
│   │       │   │   ├── microphone.tsx
│   │       │   │   ├── minus-circle.tsx
│   │       │   │   ├── minus.tsx
│   │       │   │   ├── moon.tsx
│   │       │   │   ├── more-horizontal.tsx
│   │       │   │   ├── music-small.tsx
│   │       │   │   ├── network-graph.tsx
│   │       │   │   ├── notification-bubble.tsx
│   │       │   │   ├── ollama.tsx
│   │       │   │   ├── onyx-logo-typed.tsx
│   │       │   │   ├── onyx-logo.tsx
│   │       │   │   ├── onyx-octagon.tsx
│   │       │   │   ├── onyx-typed.tsx
│   │       │   │   ├── openai.tsx
│   │       │   │   ├── openrouter.tsx
│   │       │   │   ├── organization.tsx
│   │       │   │   ├── paint-brush.tsx
│   │       │   │   ├── paperclip.tsx
│   │       │   │   ├── pause-circle.tsx
│   │       │   │   ├── pen-small.tsx
│   │       │   │   ├── pencil-ruler.tsx
│   │       │   │   ├── pie-chart.tsx
│   │       │   │   ├── pin.tsx
│   │       │   │   ├── pinned.tsx
│   │       │   │   ├── play-circle.tsx
│   │       │   │   ├── plug.tsx
│   │       │   │   ├── plus-circle.tsx
│   │       │   │   ├── plus.tsx
│   │       │   │   ├── progress-bars.tsx
│   │       │   │   ├── progress-circle.tsx
│   │       │   │   ├── question-mark-small.tsx
│   │       │   │   ├── quote-end.tsx
│   │       │   │   ├── quote-start.tsx
│   │       │   │   ├── refresh-cw.tsx
│   │       │   │   ├── revert.tsx
│   │       │   │   ├── search-menu.tsx
│   │       │   │   ├── search-small.tsx
│   │       │   │   ├── search.tsx
│   │       │   │   ├── server.tsx
│   │       │   │   ├── settings.tsx
│   │       │   │   ├── share-webhook.tsx
│   │       │   │   ├── share.tsx
│   │       │   │   ├── shield.tsx
│   │       │   │   ├── sidebar.tsx
│   │       │   │   ├── slack.tsx
│   │       │   │   ├── slash.tsx
│   │       │   │   ├── sliders-small.tsx
│   │       │   │   ├── sliders.tsx
│   │       │   │   ├── sort-order.tsx
│   │       │   │   ├── sort.tsx
│   │       │   │   ├── sparkle.tsx
│   │       │   │   ├── star-off.tsx
│   │       │   │   ├── star.tsx
│   │       │   │   ├── step1.tsx
│   │       │   │   ├── step2.tsx
│   │       │   │   ├── step3-end.tsx
│   │       │   │   ├── step3.tsx
│   │       │   │   ├── stop-circle.tsx
│   │       │   │   ├── stop.tsx
│   │       │   │   ├── sun.tsx
│   │       │   │   ├── tag.tsx
│   │       │   │   ├── terminal-small.tsx
│   │       │   │   ├── terminal.tsx
│   │       │   │   ├── text-lines-small.tsx
│   │       │   │   ├── text-lines.tsx
│   │       │   │   ├── thumbs-down.tsx
│   │       │   │   ├── thumbs-up.tsx
│   │       │   │   ├── trash.tsx
│   │       │   │   ├── two-line-small.tsx
│   │       │   │   ├── unplug.tsx
│   │       │   │   ├── upload-cloud.tsx
│   │       │   │   ├── user-check.tsx
│   │       │   │   ├── user-edit.tsx
│   │       │   │   ├── user-key.tsx
│   │       │   │   ├── user-manage.tsx
│   │       │   │   ├── user-minus.tsx
│   │       │   │   ├── user-plus.tsx
│   │       │   │   ├── user-shield.tsx
│   │       │   │   ├── user-speaker.tsx
│   │       │   │   ├── user-sync.tsx
│   │       │   │   ├── user-x.tsx
│   │       │   │   ├── user.tsx
│   │       │   │   ├── users.tsx
│   │       │   │   ├── volume-off.tsx
│   │       │   │   ├── volume.tsx
│   │       │   │   ├── wallet.tsx
│   │       │   │   ├── workflow.tsx
│   │       │   │   ├── x-circle.tsx
│   │       │   │   ├── x-octagon.tsx
│   │       │   │   ├── x.tsx
│   │       │   │   ├── zoom-in.tsx
│   │       │   │   └── zoom-out.tsx
│   │       │   ├── illustrations/
│   │       │   │   ├── broken-key.tsx
│   │       │   │   ├── connect.tsx
│   │       │   │   ├── connected.tsx
│   │       │   │   ├── disconnected.tsx
│   │       │   │   ├── empty.tsx
│   │       │   │   ├── end-of-line.tsx
│   │       │   │   ├── index.ts
│   │       │   │   ├── limit-alert.tsx
│   │       │   │   ├── long-wait.tsx
│   │       │   │   ├── no-access.tsx
│   │       │   │   ├── no-result.tsx
│   │       │   │   ├── not-found.tsx
│   │       │   │   ├── overflow.tsx
│   │       │   │   ├── plug-broken.tsx
│   │       │   │   ├── timeout.tsx
│   │       │   │   ├── un-plugged.tsx
│   │       │   │   └── usage-alert.tsx
│   │       │   ├── layouts/
│   │       │   │   ├── README.md
│   │       │   │   ├── cards/
│   │       │   │   │   └── header-layout/
│   │       │   │   │       ├── CardHeaderLayout.stories.tsx
│   │       │   │   │       ├── README.md
│   │       │   │   │       └── components.tsx
│   │       │   │   ├── content/
│   │       │   │   │   ├── Content.stories.tsx
│   │       │   │   │   ├── ContentLg.tsx
│   │       │   │   │   ├── ContentMd.tsx
│   │       │   │   │   ├── ContentSm.tsx
│   │       │   │   │   ├── ContentXl.tsx
│   │       │   │   │   ├── README.md
│   │       │   │   │   ├── components.tsx
│   │       │   │   │   └── styles.css
│   │       │   │   ├── content-action/
│   │       │   │   │   ├── ContentAction.stories.tsx
│   │       │   │   │   ├── README.md
│   │       │   │   │   └── components.tsx
│   │       │   │   ├── illustration-content/
│   │       │   │   │   ├── IllustrationContent.stories.tsx
│   │       │   │   │   ├── README.md
│   │       │   │   │   └── components.tsx
│   │       │   │   └── index.ts
│   │       │   ├── shared.ts
│   │       │   ├── types.ts
│   │       │   └── utils.ts
│   │       └── tsconfig.json
│   ├── next.config.js
│   ├── package.json
│   ├── playwright.config.ts
│   ├── postcss.config.js
│   ├── public/
│   │   └── fonts/
│   │       └── KHTeka-Medium.otf
│   ├── sentry.edge.config.ts
│   ├── sentry.server.config.ts
│   ├── src/
│   │   ├── app/
│   │   │   ├── PostHogPageView.tsx
│   │   │   ├── admin/
│   │   │   │   ├── actions/
│   │   │   │   │   ├── edit/
│   │   │   │   │   │   └── [toolId]/
│   │   │   │   │   │       └── page.tsx
│   │   │   │   │   ├── edit-mcp/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── mcp/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── new/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── open-api/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── add-connector/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── agents/
│   │   │   │   │   ├── CollapsibleSection.tsx
│   │   │   │   │   ├── interfaces.ts
│   │   │   │   │   ├── lib.ts
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── billing/
│   │   │   │   │   ├── BillingDetailsView.tsx
│   │   │   │   │   ├── CheckoutView.tsx
│   │   │   │   │   ├── LicenseActivationCard.tsx
│   │   │   │   │   ├── PlansView.tsx
│   │   │   │   │   ├── billing.css
│   │   │   │   │   ├── page.test.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── bots/
│   │   │   │   │   ├── SlackBotCreationForm.tsx
│   │   │   │   │   ├── SlackBotTable.tsx
│   │   │   │   │   ├── SlackBotUpdateForm.tsx
│   │   │   │   │   ├── SlackTokensForm.tsx
│   │   │   │   │   ├── [bot-id]/
│   │   │   │   │   │   ├── SlackChannelConfigsTable.tsx
│   │   │   │   │   │   ├── channels/
│   │   │   │   │   │   │   ├── SlackChannelConfigCreationForm.tsx
│   │   │   │   │   │   │   ├── SlackChannelConfigFormFields.tsx
│   │   │   │   │   │   │   ├── [id]/
│   │   │   │   │   │   │   │   └── page.tsx
│   │   │   │   │   │   │   └── new/
│   │   │   │   │   │   │       └── page.tsx
│   │   │   │   │   │   ├── hooks.ts
│   │   │   │   │   │   ├── lib.ts
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── new/
│   │   │   │   │   │   ├── lib.ts
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── configuration/
│   │   │   │   │   ├── chat-preferences/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── code-interpreter/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── document-processing/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── image-generation/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── llm/
│   │   │   │   │   │   ├── ModelConfigurationField.tsx
│   │   │   │   │   │   ├── ProviderIcon.tsx
│   │   │   │   │   │   ├── page.tsx
│   │   │   │   │   │   └── utils.ts
│   │   │   │   │   ├── search/
│   │   │   │   │   │   ├── UpgradingPage.tsx
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── voice/
│   │   │   │   │   │   ├── VoiceProviderSetupModal.tsx
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   └── web-search/
│   │   │   │   │       └── page.tsx
│   │   │   │   ├── connector/
│   │   │   │   │   └── [ccPairId]/
│   │   │   │   │       ├── ConfigDisplay.tsx
│   │   │   │   │       ├── DeletionErrorStatus.tsx
│   │   │   │   │       ├── IndexAttemptErrorsModal.tsx
│   │   │   │   │       ├── IndexAttemptsTable.tsx
│   │   │   │   │       ├── InlineFileManagement.tsx
│   │   │   │   │       ├── ReIndexModal.tsx
│   │   │   │   │       ├── lib.ts
│   │   │   │   │       ├── page.tsx
│   │   │   │   │       ├── types.ts
│   │   │   │   │       └── useStatusChange.tsx
│   │   │   │   ├── connectors/
│   │   │   │   │   └── [connector]/
│   │   │   │   │       ├── AddConnectorPage.tsx
│   │   │   │   │       ├── ConnectorWrapper.tsx
│   │   │   │   │       ├── NavigationRow.tsx
│   │   │   │   │       ├── auth/
│   │   │   │   │       │   └── callback/
│   │   │   │   │       │       └── route.ts
│   │   │   │   │       ├── oauth/
│   │   │   │   │       │   ├── callback/
│   │   │   │   │       │   │   └── page.tsx
│   │   │   │   │       │   └── finalize/
│   │   │   │   │       │       └── page.tsx
│   │   │   │   │       ├── page.tsx
│   │   │   │   │       └── pages/
│   │   │   │   │           ├── Advanced.tsx
│   │   │   │   │           ├── ConnectorInput/
│   │   │   │   │           │   ├── FileInput.tsx
│   │   │   │   │           │   ├── ListInput.tsx
│   │   │   │   │           │   ├── NumberInput.tsx
│   │   │   │   │           │   └── SelectInput.tsx
│   │   │   │   │           ├── DynamicConnectorCreationForm.tsx
│   │   │   │   │           ├── FieldRendering.tsx
│   │   │   │   │           ├── gdrive/
│   │   │   │   │           │   ├── Credential.tsx
│   │   │   │   │           │   └── GoogleDrivePage.tsx
│   │   │   │   │           ├── gmail/
│   │   │   │   │           │   ├── Credential.tsx
│   │   │   │   │           │   └── GmailPage.tsx
│   │   │   │   │           └── utils/
│   │   │   │   │               ├── files.ts
│   │   │   │   │               ├── google_site.ts
│   │   │   │   │               └── hooks.ts
│   │   │   │   ├── debug/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── discord-bot/
│   │   │   │   │   ├── BotConfigCard.tsx
│   │   │   │   │   ├── DiscordGuildsTable.tsx
│   │   │   │   │   ├── [guild-id]/
│   │   │   │   │   │   ├── DiscordChannelsTable.tsx
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── hooks.ts
│   │   │   │   │   ├── lib.ts
│   │   │   │   │   ├── page.tsx
│   │   │   │   │   └── types.ts
│   │   │   │   ├── document-index-migration/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── documents/
│   │   │   │   │   ├── ScoreEditor.tsx
│   │   │   │   │   ├── explorer/
│   │   │   │   │   │   ├── DocumentExplorerPage.tsx
│   │   │   │   │   │   ├── Explorer.tsx
│   │   │   │   │   │   ├── lib.ts
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── feedback/
│   │   │   │   │   │   ├── DocumentFeedbackTable.tsx
│   │   │   │   │   │   ├── constants.ts
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── lib.ts
│   │   │   │   │   └── sets/
│   │   │   │   │       ├── DocumentSetCreationForm.tsx
│   │   │   │   │       ├── [documentSetId]/
│   │   │   │   │       │   └── page.tsx
│   │   │   │   │       ├── hooks.tsx
│   │   │   │   │       ├── lib.ts
│   │   │   │   │       ├── new/
│   │   │   │   │       │   └── page.tsx
│   │   │   │   │       └── page.tsx
│   │   │   │   ├── embeddings/
│   │   │   │   │   ├── EmbeddingModelSelectionForm.tsx
│   │   │   │   │   ├── RerankingFormPage.tsx
│   │   │   │   │   ├── interfaces.ts
│   │   │   │   │   ├── modals/
│   │   │   │   │   │   ├── AlreadyPickedModal.tsx
│   │   │   │   │   │   ├── ChangeCredentialsModal.tsx
│   │   │   │   │   │   ├── DeleteCredentialsModal.tsx
│   │   │   │   │   │   ├── InstantSwitchConfirmModal.tsx
│   │   │   │   │   │   ├── ModelSelectionModal.tsx
│   │   │   │   │   │   ├── ProviderCreationModal.tsx
│   │   │   │   │   │   └── SelectModelModal.tsx
│   │   │   │   │   ├── page.tsx
│   │   │   │   │   └── pages/
│   │   │   │   │       ├── AdvancedEmbeddingFormPage.tsx
│   │   │   │   │       ├── CloudEmbeddingPage.tsx
│   │   │   │   │       ├── EmbeddingFormPage.tsx
│   │   │   │   │       ├── OpenEmbeddingPage.tsx
│   │   │   │   │       └── utils.ts
│   │   │   │   ├── federated/
│   │   │   │   │   └── [id]/
│   │   │   │   │       ├── page.tsx
│   │   │   │   │       └── useFederatedConnector.ts
│   │   │   │   ├── groups/
│   │   │   │   │   ├── [id]/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── create/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── groups2/
│   │   │   │   │   ├── [id]/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── create/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── hooks/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── indexing/
│   │   │   │   │   └── status/
│   │   │   │   │       ├── CCPairIndexingStatusTable.tsx
│   │   │   │   │       ├── ConnectorRowSkeleton.tsx
│   │   │   │   │       ├── FilterComponent.tsx
│   │   │   │   │       ├── SearchAndFilterControls.tsx
│   │   │   │   │       └── page.tsx
│   │   │   │   ├── kg/
│   │   │   │   │   ├── KGEntityTypes.tsx
│   │   │   │   │   ├── interfaces.ts
│   │   │   │   │   ├── page.tsx
│   │   │   │   │   └── utils.ts
│   │   │   │   ├── layout.tsx
│   │   │   │   ├── scim/
│   │   │   │   │   ├── ScimModal.tsx
│   │   │   │   │   ├── ScimSyncCard.tsx
│   │   │   │   │   ├── interfaces.ts
│   │   │   │   │   ├── page.tsx
│   │   │   │   │   └── svc.ts
│   │   │   │   ├── service-accounts/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── systeminfo/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── token-rate-limits/
│   │   │   │   │   ├── CreateRateLimitModal.tsx
│   │   │   │   │   ├── TokenRateLimitTables.tsx
│   │   │   │   │   ├── lib.ts
│   │   │   │   │   ├── page.tsx
│   │   │   │   │   └── types.ts
│   │   │   │   └── users/
│   │   │   │       └── page.tsx
│   │   │   ├── anonymous/
│   │   │   │   └── [id]/
│   │   │   │       ├── AnonymousPage.tsx
│   │   │   │       └── page.tsx
│   │   │   ├── api/
│   │   │   │   ├── [...path]/
│   │   │   │   │   └── route.ts
│   │   │   │   └── chat/
│   │   │   │       └── mcp/
│   │   │   │           └── oauth/
│   │   │   │               └── callback/
│   │   │   │                   └── route.ts
│   │   │   ├── app/
│   │   │   │   ├── agents/
│   │   │   │   │   ├── create/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── edit/
│   │   │   │   │   │   └── [id]/
│   │   │   │   │   │       └── page.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── components/
│   │   │   │   │   ├── AgentDescription.tsx
│   │   │   │   │   ├── AppPopup.tsx
│   │   │   │   │   ├── WelcomeMessage.tsx
│   │   │   │   │   ├── files/
│   │   │   │   │   │   ├── InputBarPreview.tsx
│   │   │   │   │   │   └── images/
│   │   │   │   │   │       ├── FullImageModal.tsx
│   │   │   │   │   │       ├── InMessageImage.tsx
│   │   │   │   │   │       ├── InputBarPreviewImage.tsx
│   │   │   │   │   │       └── utils.ts
│   │   │   │   │   ├── folders/
│   │   │   │   │   │   ├── FolderDropdown.tsx
│   │   │   │   │   │   └── interfaces.ts
│   │   │   │   │   ├── modifiers/
│   │   │   │   │   │   └── SelectedDocuments.tsx
│   │   │   │   │   ├── projects/
│   │   │   │   │   │   ├── ProjectChatSessionList.tsx
│   │   │   │   │   │   ├── ProjectContextPanel.tsx
│   │   │   │   │   │   └── project_utils.ts
│   │   │   │   │   └── tools/
│   │   │   │   │       ├── GeneratingImageDisplay.tsx
│   │   │   │   │       └── constants.ts
│   │   │   │   ├── interfaces.ts
│   │   │   │   ├── layout.tsx
│   │   │   │   ├── message/
│   │   │   │   │   ├── BlinkingBar.tsx
│   │   │   │   │   ├── CodeBlock.tsx
│   │   │   │   │   ├── FileDisplay.tsx
│   │   │   │   │   ├── HumanMessage.tsx
│   │   │   │   │   ├── MemoizedTextComponents.tsx
│   │   │   │   │   ├── MessageSwitcher.tsx
│   │   │   │   │   ├── Resubmit.tsx
│   │   │   │   │   ├── codeUtils.test.ts
│   │   │   │   │   ├── codeUtils.ts
│   │   │   │   │   ├── copyingUtils.tsx
│   │   │   │   │   ├── custom-code-styles.css
│   │   │   │   │   ├── errorHelpers.tsx
│   │   │   │   │   ├── hooks.ts
│   │   │   │   │   ├── messageComponents/
│   │   │   │   │   │   ├── AgentMessage.tsx
│   │   │   │   │   │   ├── CustomToolAuthCard.tsx
│   │   │   │   │   │   ├── MessageToolbar.tsx
│   │   │   │   │   │   ├── TTSButton.tsx
│   │   │   │   │   │   ├── constants.ts
│   │   │   │   │   │   ├── hooks/
│   │   │   │   │   │   │   ├── useAuthErrors.ts
│   │   │   │   │   │   │   ├── useMessageSwitching.ts
│   │   │   │   │   │   │   └── usePacketAnimationAndCollapse.ts
│   │   │   │   │   │   ├── interfaces.ts
│   │   │   │   │   │   ├── markdownUtils.tsx
│   │   │   │   │   │   ├── renderMessageComponent.tsx
│   │   │   │   │   │   ├── renderers/
│   │   │   │   │   │   │   ├── CustomToolRenderer.tsx
│   │   │   │   │   │   │   ├── ImageToolRenderer.tsx
│   │   │   │   │   │   │   └── MessageTextRenderer.tsx
│   │   │   │   │   │   ├── timeline/
│   │   │   │   │   │   │   ├── AgentTimeline.tsx
│   │   │   │   │   │   │   ├── CollapsedStreamingContent.tsx
│   │   │   │   │   │   │   ├── ExpandedTimelineContent.tsx
│   │   │   │   │   │   │   ├── ParallelTimelineTabs.tsx
│   │   │   │   │   │   │   ├── StepContainer.tsx
│   │   │   │   │   │   │   ├── TimelineRendererComponent.tsx
│   │   │   │   │   │   │   ├── TimelineStepComposer.tsx
│   │   │   │   │   │   │   ├── headers/
│   │   │   │   │   │   │   │   ├── CompletedHeader.tsx
│   │   │   │   │   │   │   │   ├── ParallelStreamingHeader.tsx
│   │   │   │   │   │   │   │   ├── StoppedHeader.tsx
│   │   │   │   │   │   │   │   └── StreamingHeader.tsx
│   │   │   │   │   │   │   ├── hooks/
│   │   │   │   │   │   │   │   ├── __tests__/
│   │   │   │   │   │   │   │   │   └── testHelpers.ts
│   │   │   │   │   │   │   │   ├── packetProcessor.test.ts
│   │   │   │   │   │   │   │   ├── packetProcessor.ts
│   │   │   │   │   │   │   │   ├── usePacedTurnGroups.test.tsx
│   │   │   │   │   │   │   │   ├── usePacedTurnGroups.ts
│   │   │   │   │   │   │   │   ├── usePacketProcessor.test.tsx
│   │   │   │   │   │   │   │   ├── usePacketProcessor.ts
│   │   │   │   │   │   │   │   ├── useStreamingDuration.ts
│   │   │   │   │   │   │   │   ├── useTimelineExpansion.ts
│   │   │   │   │   │   │   │   ├── useTimelineHeader.ts
│   │   │   │   │   │   │   │   ├── useTimelineMetrics.ts
│   │   │   │   │   │   │   │   ├── useTimelineStepState.ts
│   │   │   │   │   │   │   │   └── useTimelineUIState.ts
│   │   │   │   │   │   │   ├── packetHelpers.ts
│   │   │   │   │   │   │   ├── primitives/
│   │   │   │   │   │   │   │   ├── TimelineHeaderRow.tsx
│   │   │   │   │   │   │   │   ├── TimelineIconColumn.tsx
│   │   │   │   │   │   │   │   ├── TimelineRoot.tsx
│   │   │   │   │   │   │   │   ├── TimelineRow.tsx
│   │   │   │   │   │   │   │   ├── TimelineStepContent.tsx
│   │   │   │   │   │   │   │   ├── TimelineSurface.tsx
│   │   │   │   │   │   │   │   ├── TimelineTopSpacer.tsx
│   │   │   │   │   │   │   │   └── tokens.ts
│   │   │   │   │   │   │   ├── renderers/
│   │   │   │   │   │   │   │   ├── code/
│   │   │   │   │   │   │   │   │   └── PythonToolRenderer.tsx
│   │   │   │   │   │   │   │   ├── deepresearch/
│   │   │   │   │   │   │   │   │   ├── DeepResearchPlanRenderer.tsx
│   │   │   │   │   │   │   │   │   └── ResearchAgentRenderer.tsx
│   │   │   │   │   │   │   │   ├── fetch/
│   │   │   │   │   │   │   │   │   ├── FetchToolRenderer.tsx
│   │   │   │   │   │   │   │   │   └── fetchStateUtils.ts
│   │   │   │   │   │   │   │   ├── filereader/
│   │   │   │   │   │   │   │   │   └── FileReaderToolRenderer.tsx
│   │   │   │   │   │   │   │   ├── memory/
│   │   │   │   │   │   │   │   │   ├── MemoryToolRenderer.tsx
│   │   │   │   │   │   │   │   │   └── memoryStateUtils.ts
│   │   │   │   │   │   │   │   ├── reasoning/
│   │   │   │   │   │   │   │   │   └── ReasoningRenderer.tsx
│   │   │   │   │   │   │   │   ├── search/
│   │   │   │   │   │   │   │   │   ├── InternalSearchToolRenderer.tsx
│   │   │   │   │   │   │   │   │   ├── SearchChipList.tsx
│   │   │   │   │   │   │   │   │   ├── WebSearchToolRenderer.tsx
│   │   │   │   │   │   │   │   │   └── searchStateUtils.ts
│   │   │   │   │   │   │   │   └── sharedMarkdownComponents.tsx
│   │   │   │   │   │   │   └── transformers.ts
│   │   │   │   │   │   ├── timing.ts
│   │   │   │   │   │   └── toolDisplayHelpers.tsx
│   │   │   │   │   └── thinkingBox/
│   │   │   │   │       └── ThinkingBox.css
│   │   │   │   ├── page.tsx
│   │   │   │   ├── projects/
│   │   │   │   │   └── projectsService.ts
│   │   │   │   ├── services/
│   │   │   │   │   ├── actionUtils.ts
│   │   │   │   │   ├── currentMessageFIFO.ts
│   │   │   │   │   ├── fileUtils.ts
│   │   │   │   │   ├── lib.tsx
│   │   │   │   │   ├── messageTree.ts
│   │   │   │   │   ├── packetUtils.test.ts
│   │   │   │   │   ├── packetUtils.ts
│   │   │   │   │   ├── searchParams.ts
│   │   │   │   │   ├── streamingModels.ts
│   │   │   │   │   └── thinkingTokens.ts
│   │   │   │   ├── settings/
│   │   │   │   │   ├── accounts-access/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── chat-preferences/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── connectors/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── general/
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── layout.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── shared/
│   │   │   │   │   └── [chatId]/
│   │   │   │   │       ├── SharedChatDisplay.tsx
│   │   │   │   │       └── page.tsx
│   │   │   │   └── stores/
│   │   │   │       └── useChatSessionStore.ts
│   │   │   ├── auth/
│   │   │   │   ├── create-account/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── error/
│   │   │   │   │   ├── AuthErrorContent.tsx
│   │   │   │   │   ├── layout.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── forgot-password/
│   │   │   │   │   ├── page.tsx
│   │   │   │   │   └── utils.ts
│   │   │   │   ├── impersonate/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── join/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── lib.ts
│   │   │   │   ├── libSS.ts
│   │   │   │   ├── login/
│   │   │   │   │   ├── EmailPasswordForm.test.tsx
│   │   │   │   │   ├── EmailPasswordForm.tsx
│   │   │   │   │   ├── LoginPage.tsx
│   │   │   │   │   ├── LoginText.tsx
│   │   │   │   │   ├── SignInButton.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── logout/
│   │   │   │   │   └── route.ts
│   │   │   │   ├── oauth/
│   │   │   │   │   └── callback/
│   │   │   │   │       └── route.ts
│   │   │   │   ├── oidc/
│   │   │   │   │   └── callback/
│   │   │   │   │       └── route.ts
│   │   │   │   ├── reset-password/
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── saml/
│   │   │   │   │   └── callback/
│   │   │   │   │       └── route.ts
│   │   │   │   ├── signup/
│   │   │   │   │   ├── ReferralSourceSelector.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── verify-email/
│   │   │   │   │   ├── Verify.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   └── waiting-on-verification/
│   │   │   │       ├── RequestNewVerificationEmail.tsx
│   │   │   │       └── page.tsx
│   │   │   ├── components/
│   │   │   │   └── nrf/
│   │   │   │       └── SettingsPanel.tsx
│   │   │   ├── config/
│   │   │   │   └── timeRange.tsx
│   │   │   ├── connector/
│   │   │   │   └── oauth/
│   │   │   │       └── callback/
│   │   │   │           └── [source]/
│   │   │   │               └── route.tsx
│   │   │   ├── craft/
│   │   │   │   ├── README.md
│   │   │   │   ├── components/
│   │   │   │   │   ├── BigButton.tsx
│   │   │   │   │   ├── BuildLLMPopover.tsx
│   │   │   │   │   ├── BuildMessageList.tsx
│   │   │   │   │   ├── BuildWelcome.tsx
│   │   │   │   │   ├── ChatPanel.tsx
│   │   │   │   │   ├── ConnectDataBanner.tsx
│   │   │   │   │   ├── ConnectorBannersRow.tsx
│   │   │   │   │   ├── CraftingLoader.tsx
│   │   │   │   │   ├── DiffView.tsx
│   │   │   │   │   ├── FileBrowser.tsx
│   │   │   │   │   ├── FilePreviewModal.tsx
│   │   │   │   │   ├── InputBar.tsx
│   │   │   │   │   ├── IntroBackground.tsx
│   │   │   │   │   ├── IntroContent.tsx
│   │   │   │   │   ├── OutputPanel.tsx
│   │   │   │   │   ├── RawOutputBlock.tsx
│   │   │   │   │   ├── SandboxStatusIndicator.tsx
│   │   │   │   │   ├── ShareButton.tsx
│   │   │   │   │   ├── SideBar.tsx
│   │   │   │   │   ├── SuggestedPrompts.tsx
│   │   │   │   │   ├── SuggestionBubbles.tsx
│   │   │   │   │   ├── TextChunk.tsx
│   │   │   │   │   ├── ThinkingCard.tsx
│   │   │   │   │   ├── TodoListCard.tsx
│   │   │   │   │   ├── ToggleWarningModal.tsx
│   │   │   │   │   ├── ToolCallPill.tsx
│   │   │   │   │   ├── TypewriterText.tsx
│   │   │   │   │   ├── UpgradePlanModal.tsx
│   │   │   │   │   ├── UserMessage.tsx
│   │   │   │   │   ├── WorkingLine.tsx
│   │   │   │   │   ├── WorkingPill.tsx
│   │   │   │   │   └── output-panel/
│   │   │   │   │       ├── ArtifactsTab.tsx
│   │   │   │   │       ├── FilePreviewContent.tsx
│   │   │   │   │       ├── FilesTab.tsx
│   │   │   │   │       ├── ImagePreview.tsx
│   │   │   │   │       ├── MarkdownFilePreview.tsx
│   │   │   │   │       ├── PdfPreview.tsx
│   │   │   │   │       ├── PptxPreview.tsx
│   │   │   │   │       ├── PreviewTab.tsx
│   │   │   │   │       └── UrlBar.tsx
│   │   │   │   ├── constants/
│   │   │   │   │   └── exampleBuildPrompts.ts
│   │   │   │   ├── constants.ts
│   │   │   │   ├── contexts/
│   │   │   │   │   ├── BuildContext.tsx
│   │   │   │   │   └── UploadFilesContext.tsx
│   │   │   │   ├── hooks/
│   │   │   │   │   ├── useBuildConnectors.ts
│   │   │   │   │   ├── useBuildLlmSelection.ts
│   │   │   │   │   ├── useBuildSessionController.ts
│   │   │   │   │   ├── useBuildSessionStore.ts
│   │   │   │   │   ├── useBuildStreaming.ts
│   │   │   │   │   ├── usePreProvisionPolling.ts
│   │   │   │   │   └── useUsageLimits.ts
│   │   │   │   ├── layout.tsx
│   │   │   │   ├── onboarding/
│   │   │   │   │   ├── BuildOnboardingProvider.tsx
│   │   │   │   │   ├── components/
│   │   │   │   │   │   ├── BuildOnboardingModal.tsx
│   │   │   │   │   │   ├── NoLlmProvidersModal.tsx
│   │   │   │   │   │   ├── NotAllowedModal.tsx
│   │   │   │   │   │   ├── OnboardingInfoPages.tsx
│   │   │   │   │   │   ├── OnboardingLlmSetup.tsx
│   │   │   │   │   │   └── OnboardingUserInfo.tsx
│   │   │   │   │   ├── constants.ts
│   │   │   │   │   ├── hooks/
│   │   │   │   │   │   └── useOnboardingModal.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── page.tsx
│   │   │   │   ├── services/
│   │   │   │   │   ├── apiServices.ts
│   │   │   │   │   └── searchParams.ts
│   │   │   │   ├── types/
│   │   │   │   │   ├── displayTypes.ts
│   │   │   │   │   ├── streamingTypes.ts
│   │   │   │   │   └── user-library.ts
│   │   │   │   ├── utils/
│   │   │   │   │   ├── packetTypes.ts
│   │   │   │   │   ├── parsePacket.ts
│   │   │   │   │   ├── pathSanitizer.test.ts
│   │   │   │   │   ├── pathSanitizer.ts
│   │   │   │   │   └── streamItemHelpers.ts
│   │   │   │   └── v1/
│   │   │   │       ├── configure/
│   │   │   │       │   ├── components/
│   │   │   │       │   │   ├── ComingSoonConnectors.tsx
│   │   │   │       │   │   ├── ConfigureConnectorModal.tsx
│   │   │   │       │   │   ├── ConfigureOverlays.tsx
│   │   │   │       │   │   ├── ConnectorCard.tsx
│   │   │   │       │   │   ├── ConnectorConfigStep.tsx
│   │   │   │       │   │   ├── CreateCredentialInline.tsx
│   │   │   │       │   │   ├── CredentialStep.tsx
│   │   │   │       │   │   ├── DemoDataConfirmModal.tsx
│   │   │   │       │   │   ├── RequestConnectorModal.tsx
│   │   │   │       │   │   └── UserLibraryModal.tsx
│   │   │   │       │   ├── page.tsx
│   │   │   │       │   └── utils/
│   │   │   │       │       └── createBuildConnector.ts
│   │   │   │       ├── constants.ts
│   │   │   │       ├── layout.tsx
│   │   │   │       └── page.tsx
│   │   │   ├── css/
│   │   │   │   ├── attachment-button.css
│   │   │   │   ├── button.css
│   │   │   │   ├── card.css
│   │   │   │   ├── code.css
│   │   │   │   ├── color-swatch.css
│   │   │   │   ├── colors.css
│   │   │   │   ├── divider.css
│   │   │   │   ├── general-layouts.css
│   │   │   │   ├── inputs.css
│   │   │   │   ├── knowledge-table.css
│   │   │   │   ├── line-item.css
│   │   │   │   ├── sizes.css
│   │   │   │   ├── square-button.css
│   │   │   │   ├── switch.css
│   │   │   │   └── z-index.css
│   │   │   ├── ee/
│   │   │   │   ├── EEFeatureRedirect.tsx
│   │   │   │   ├── LICENSE
│   │   │   │   ├── admin/
│   │   │   │   │   ├── billing/
│   │   │   │   │   │   ├── BillingAlerts.tsx
│   │   │   │   │   │   ├── BillingInformationPage.tsx
│   │   │   │   │   │   ├── InfoItem.tsx
│   │   │   │   │   │   ├── SubscriptionSummary.tsx
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── groups/
│   │   │   │   │   │   ├── [id]/
│   │   │   │   │   │   │   └── page.tsx
│   │   │   │   │   │   ├── create/
│   │   │   │   │   │   │   └── page.tsx
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   ├── layout.tsx
│   │   │   │   │   ├── performance/
│   │   │   │   │   │   ├── custom-analytics/
│   │   │   │   │   │   │   ├── CustomAnalyticsUpdateForm.tsx
│   │   │   │   │   │   │   └── page.tsx
│   │   │   │   │   │   ├── lib.ts
│   │   │   │   │   │   ├── query-history/
│   │   │   │   │   │   │   ├── FeedbackBadge.tsx
│   │   │   │   │   │   │   ├── KickoffCSVExport.tsx
│   │   │   │   │   │   │   ├── QueryHistoryTable.tsx
│   │   │   │   │   │   │   ├── [id]/
│   │   │   │   │   │   │   │   └── page.tsx
│   │   │   │   │   │   │   ├── constants.ts
│   │   │   │   │   │   │   ├── page.tsx
│   │   │   │   │   │   │   ├── types.ts
│   │   │   │   │   │   │   └── utils.ts
│   │   │   │   │   │   └── usage/
│   │   │   │   │   │       ├── FeedbackChart.tsx
│   │   │   │   │   │       ├── OnyxBotChart.tsx
│   │   │   │   │   │       ├── PersonaMessagesChart.tsx
│   │   │   │   │   │       ├── QueryPerformanceChart.tsx
│   │   │   │   │   │       ├── UsageReports.tsx
│   │   │   │   │   │       ├── page.tsx
│   │   │   │   │   │       └── types.ts
│   │   │   │   │   ├── standard-answer/
│   │   │   │   │   │   ├── StandardAnswerCreationForm.tsx
│   │   │   │   │   │   ├── [id]/
│   │   │   │   │   │   │   └── page.tsx
│   │   │   │   │   │   ├── hooks.ts
│   │   │   │   │   │   ├── lib.ts
│   │   │   │   │   │   ├── new/
│   │   │   │   │   │   │   └── page.tsx
│   │   │   │   │   │   └── page.tsx
│   │   │   │   │   └── theme/
│   │   │   │   │       ├── AppearanceThemeSettings.tsx
│   │   │   │   │       ├── Preview.tsx
│   │   │   │   │       └── page.tsx
│   │   │   │   ├── agents/
│   │   │   │   │   └── stats/
│   │   │   │   │       └── [id]/
│   │   │   │   │           ├── AgentStats.tsx
│   │   │   │   │           └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── federated/
│   │   │   │   └── oauth/
│   │   │   │       └── callback/
│   │   │   │           └── page.tsx
│   │   │   ├── global-error.tsx
│   │   │   ├── globals.css
│   │   │   ├── layout.tsx
│   │   │   ├── mcp/
│   │   │   │   ├── [[...path]]/
│   │   │   │   │   └── route.ts
│   │   │   │   └── oauth/
│   │   │   │       └── callback/
│   │   │   │           └── page.tsx
│   │   │   ├── not-found.tsx
│   │   │   ├── nrf/
│   │   │   │   ├── (main)/
│   │   │   │   │   ├── layout.tsx
│   │   │   │   │   └── page.tsx
│   │   │   │   ├── NRFChrome.tsx
│   │   │   │   ├── NRFPage.tsx
│   │   │   │   ├── layout.tsx
│   │   │   │   └── side-panel/
│   │   │   │       ├── SidePanelHeader.tsx
│   │   │   │       └── page.tsx
│   │   │   ├── oauth-config/
│   │   │   │   └── callback/
│   │   │   │       └── page.tsx
│   │   │   ├── page.tsx
│   │   │   ├── providers.tsx
│   │   │   └── web-vitals.tsx
│   │   ├── ce.tsx
│   │   ├── components/
│   │   │   ├── AdvancedOptionsToggle.tsx
│   │   │   ├── AgentsMultiSelect.tsx
│   │   │   ├── BasicClickable.tsx
│   │   │   ├── Bubble.tsx
│   │   │   ├── CollapsibleCard.tsx
│   │   │   ├── ConnectorMultiSelect.tsx
│   │   │   ├── DeleteButton.tsx
│   │   │   ├── Dropdown.tsx
│   │   │   ├── EditableStringFieldDisplay.tsx
│   │   │   ├── EditableValue.tsx
│   │   │   ├── ErrorCallout.tsx
│   │   │   ├── FederatedConnectorSelector.tsx
│   │   │   ├── Field.tsx
│   │   │   ├── FormErrorHelpers.tsx
│   │   │   ├── GatedContentWrapper.tsx
│   │   │   ├── GenericMultiSelect.tsx
│   │   │   ├── GroupsMultiSelect.tsx
│   │   │   ├── HoverPopup.tsx
│   │   │   ├── IsPublicGroupSelector.tsx
│   │   │   ├── Loading.tsx
│   │   │   ├── MetadataBadge.tsx
│   │   │   ├── MultiSelectDropdown.tsx
│   │   │   ├── NonSelectableConnectors.tsx
│   │   │   ├── OnyxInitializingLoader.tsx
│   │   │   ├── PageSelector.tsx
│   │   │   ├── RichTextSubtext.tsx
│   │   │   ├── SSRAutoRefresh.tsx
│   │   │   ├── SearchResultIcon.tsx
│   │   │   ├── SourceIcon.tsx
│   │   │   ├── SourceTile.tsx
│   │   │   ├── Spinner.tsx
│   │   │   ├── Status.tsx
│   │   │   ├── WebResultIcon.tsx
│   │   │   ├── admin/
│   │   │   │   ├── CardSection.tsx
│   │   │   │   ├── ClientLayout.tsx
│   │   │   │   ├── Layout.tsx
│   │   │   │   ├── Title.tsx
│   │   │   │   ├── connectors/
│   │   │   │   │   ├── AccessTypeForm.tsx
│   │   │   │   │   ├── AccessTypeGroupSelector.tsx
│   │   │   │   │   ├── AutoSyncOptions.tsx
│   │   │   │   │   ├── BasicTable.tsx
│   │   │   │   │   ├── ConnectorDocsLink.tsx
│   │   │   │   │   ├── ConnectorTitle.tsx
│   │   │   │   │   ├── CredentialForm.tsx
│   │   │   │   │   ├── FileUpload.tsx
│   │   │   │   │   └── types.ts
│   │   │   │   ├── federated/
│   │   │   │   │   └── FederatedConnectorForm.tsx
│   │   │   │   └── users/
│   │   │   │       ├── BulkAdd.tsx
│   │   │   │       ├── CenteredPageSelector.tsx
│   │   │   │       ├── InvitedUserTable.tsx
│   │   │   │       ├── PendingUsersTable.tsx
│   │   │   │       ├── ResetPasswordModal.tsx
│   │   │   │       ├── SignedUpUserTable.tsx
│   │   │   │       └── buttons/
│   │   │   │           ├── DeactivateUserButton.tsx
│   │   │   │           ├── DeleteUserButton.tsx
│   │   │   │           ├── InviteUserButton.tsx
│   │   │   │           ├── LeaveOrganizationButton.tsx
│   │   │   │           └── UserRoleDropdown.tsx
│   │   │   ├── auth/
│   │   │   │   ├── AuthErrorDisplay.tsx
│   │   │   │   └── AuthFlowContainer.tsx
│   │   │   ├── chat/
│   │   │   │   ├── DynamicBottomSpacer.tsx
│   │   │   │   ├── FederatedOAuthModal.tsx
│   │   │   │   ├── MCPApiKeyModal.tsx
│   │   │   │   ├── MinimalMarkdown.test.tsx
│   │   │   │   ├── MinimalMarkdown.tsx
│   │   │   │   ├── ProviderContext.tsx
│   │   │   │   └── ScrollContainerContext.tsx
│   │   │   ├── context/
│   │   │   │   ├── EmbeddingContext.tsx
│   │   │   │   ├── FormContext.tsx
│   │   │   │   ├── ModalContext.tsx
│   │   │   │   └── NRFPreferencesContext.tsx
│   │   │   ├── credentials/
│   │   │   │   ├── CredentialFields.tsx
│   │   │   │   ├── CredentialSection.tsx
│   │   │   │   ├── actions/
│   │   │   │   │   ├── CreateCredential.tsx
│   │   │   │   │   ├── CreateStdOAuthCredential.tsx
│   │   │   │   │   ├── CredentialFieldsRenderer.tsx
│   │   │   │   │   ├── EditCredential.tsx
│   │   │   │   │   └── ModifyCredential.tsx
│   │   │   │   ├── lib.ts
│   │   │   │   └── types.ts
│   │   │   ├── dateRangeSelectors/
│   │   │   │   ├── AdminDateRangeSelector.tsx
│   │   │   │   ├── SearchDateRangeSelector.tsx
│   │   │   │   └── dateUtils.ts
│   │   │   ├── dev/
│   │   │   │   ├── StatsOverlay.tsx
│   │   │   │   └── StatsOverlayLoader.tsx
│   │   │   ├── embedding/
│   │   │   │   ├── CustomEmbeddingModelForm.tsx
│   │   │   │   ├── CustomModelForm.tsx
│   │   │   │   ├── FailedReIndexAttempts.tsx
│   │   │   │   ├── ModelSelector.tsx
│   │   │   │   ├── ReindexingProgressTable.tsx
│   │   │   │   └── interfaces.tsx
│   │   │   ├── errorPages/
│   │   │   │   ├── AccessRestrictedPage.tsx
│   │   │   │   ├── CloudErrorPage.tsx
│   │   │   │   ├── ErrorPage.tsx
│   │   │   │   └── ErrorPageLayout.tsx
│   │   │   ├── filters/
│   │   │   │   ├── SourceSelector.tsx
│   │   │   │   └── TimeRangeSelector.tsx
│   │   │   ├── header/
│   │   │   │   ├── AnnouncementBanner.tsx
│   │   │   │   └── HeaderTitle.tsx
│   │   │   ├── icons/
│   │   │   │   ├── DynamicFaIcon.tsx
│   │   │   │   ├── icons.test.tsx
│   │   │   │   └── icons.tsx
│   │   │   ├── llm/
│   │   │   │   └── LLMSelector.tsx
│   │   │   ├── loading.css
│   │   │   ├── modals/
│   │   │   │   ├── AddInstructionModal.tsx
│   │   │   │   ├── ConfirmEntityModal.tsx
│   │   │   │   ├── CreateProjectModal.tsx
│   │   │   │   ├── EditPropertyModal.tsx
│   │   │   │   ├── GenericConfirmModal.tsx
│   │   │   │   ├── MoveCustomAgentChatModal.tsx
│   │   │   │   ├── NewTeamModal.tsx
│   │   │   │   ├── NoAgentModal.tsx
│   │   │   │   ├── ProviderModal.tsx
│   │   │   │   └── UserFilesModal.tsx
│   │   │   ├── oauth/
│   │   │   │   └── OAuthCallbackPage.tsx
│   │   │   ├── resizable/
│   │   │   │   └── constants.ts
│   │   │   ├── search/
│   │   │   │   ├── DocumentDisplay.tsx
│   │   │   │   ├── DocumentFeedbackBlock.tsx
│   │   │   │   ├── DocumentUpdatedAtBadge.tsx
│   │   │   │   ├── filtering/
│   │   │   │   │   └── FilterDropdown.tsx
│   │   │   │   └── results/
│   │   │   │       ├── Citation.tsx
│   │   │   │       └── ResponseSection.tsx
│   │   │   ├── settings/
│   │   │   │   ├── lib.ts
│   │   │   │   └── usePaidEnterpriseFeaturesEnabled.ts
│   │   │   ├── sidebar/
│   │   │   │   ├── ChatSessionMorePopup.tsx
│   │   │   │   └── types.ts
│   │   │   ├── spinner.css
│   │   │   ├── standardAnswers/
│   │   │   │   ├── StandardAnswerCategoryDropdown.tsx
│   │   │   │   └── getStandardAnswerCategoriesIfEE.tsx
│   │   │   ├── table/
│   │   │   │   ├── DragHandle.tsx
│   │   │   │   ├── DraggableRow.tsx
│   │   │   │   ├── DraggableTable.tsx
│   │   │   │   └── interfaces.ts
│   │   │   ├── theme/
│   │   │   │   └── ThemeProvider.tsx
│   │   │   ├── tools/
│   │   │   │   ├── CSVContent.tsx
│   │   │   │   ├── ExpandableContentWrapper.tsx
│   │   │   │   └── parseCSV.test.ts
│   │   │   ├── tooltip/
│   │   │   │   └── CustomTooltip.tsx
│   │   │   ├── ui/
│   │   │   │   ├── RadioGroupItemField.tsx
│   │   │   │   ├── accordion.tsx
│   │   │   │   ├── alert.tsx
│   │   │   │   ├── areaChart.tsx
│   │   │   │   ├── badge.tsx
│   │   │   │   ├── callout.tsx
│   │   │   │   ├── card.tsx
│   │   │   │   ├── dialog.tsx
│   │   │   │   ├── dropdown-menu-with-tooltip.tsx
│   │   │   │   ├── dropdown-menu.tsx
│   │   │   │   ├── input.tsx
│   │   │   │   ├── radio-group.tsx
│   │   │   │   ├── scroll-area.tsx
│   │   │   │   ├── select.tsx
│   │   │   │   ├── slider.tsx
│   │   │   │   ├── table.tsx
│   │   │   │   ├── title.tsx
│   │   │   │   └── tooltip.tsx
│   │   │   └── voice/
│   │   │       └── Waveform.tsx
│   │   ├── ee/
│   │   │   ├── LICENSE
│   │   │   ├── hooks/
│   │   │   │   ├── useHookExecutionLogs.ts
│   │   │   │   ├── useHookSpecs.ts
│   │   │   │   └── useHooks.ts
│   │   │   ├── lib/
│   │   │   │   └── search/
│   │   │   │       └── svc.ts
│   │   │   ├── providers/
│   │   │   │   └── QueryControllerProvider.tsx
│   │   │   ├── refresh-pages/
│   │   │   │   └── admin/
│   │   │   │       └── HooksPage/
│   │   │   │           ├── HookFormModal.tsx
│   │   │   │           ├── HookLogsModal.tsx
│   │   │   │           ├── HookStatusPopover.tsx
│   │   │   │           ├── index.tsx
│   │   │   │           ├── interfaces.ts
│   │   │   │           └── svc.ts
│   │   │   └── sections/
│   │   │       ├── SearchCard.tsx
│   │   │       └── SearchUI.tsx
│   │   ├── hooks/
│   │   │   ├── __tests__/
│   │   │   │   └── useShowOnboarding.test.tsx
│   │   │   ├── appNavigation.ts
│   │   │   ├── formHooks.ts
│   │   │   ├── useAdminPersonas.ts
│   │   │   ├── useAdminUsers.ts
│   │   │   ├── useAgentController.ts
│   │   │   ├── useAgentPreferences.ts
│   │   │   ├── useAgents.ts
│   │   │   ├── useAppFocus.ts
│   │   │   ├── useAuthTypeMetadata.ts
│   │   │   ├── useAvailableTools.ts
│   │   │   ├── useBillingInformation.ts
│   │   │   ├── useBoundingBox.ts
│   │   │   ├── useBrowserInfo.ts
│   │   │   ├── useCCPairs.ts
│   │   │   ├── useChatController.ts
│   │   │   ├── useChatSessionController.ts
│   │   │   ├── useChatSessions.ts
│   │   │   ├── useClickOutside.ts
│   │   │   ├── useCloudSubscription.ts
│   │   │   ├── useCodeInterpreter.ts
│   │   │   ├── useContainerCenter.ts
│   │   │   ├── useContentSize.ts
│   │   │   ├── useCurrentUser.ts
│   │   │   ├── useDeepResearchToggle.ts
│   │   │   ├── useFederatedOAuthStatus.ts
│   │   │   ├── useFeedbackController.ts
│   │   │   ├── useFilter.ts
│   │   │   ├── useGroups.ts
│   │   │   ├── useImageDropzone.ts
│   │   │   ├── useIsDefaultAgent.ts
│   │   │   ├── useKeyPress.ts
│   │   │   ├── useLLMProviders.ts
│   │   │   ├── useLicense.ts
│   │   │   ├── useMcpServers.ts
│   │   │   ├── useMcpServersForAgentEditor.ts
│   │   │   ├── useMemoryManager.ts
│   │   │   ├── useOnMount.ts
│   │   │   ├── useOpenApiTools.ts
│   │   │   ├── usePaginatedFetch.ts
│   │   │   ├── usePromptShortcuts.ts
│   │   │   ├── useScimToken.ts
│   │   │   ├── useScreenSize.ts
│   │   │   ├── useServerTools.ts
│   │   │   ├── useSettings.test.ts
│   │   │   ├── useSettings.ts
│   │   │   ├── useShareableGroups.ts
│   │   │   ├── useShareableUsers.ts
│   │   │   ├── useShowOnboarding.ts
│   │   │   ├── useTags.ts
│   │   │   ├── useToast.ts
│   │   │   ├── useTokenRefresh.ts
│   │   │   ├── useUserCounts.ts
│   │   │   ├── useUserPersonalization.ts
│   │   │   ├── useUsers.ts
│   │   │   ├── useVoicePlayback.ts
│   │   │   ├── useVoiceProviders.ts
│   │   │   ├── useVoiceRecorder.ts
│   │   │   ├── useVoiceStatus.ts
│   │   │   └── useWebSocket.ts
│   │   ├── instrumentation-client.ts
│   │   ├── instrumentation.ts
│   │   ├── interfaces/
│   │   │   ├── llm.ts
│   │   │   ├── onboarding.ts
│   │   │   └── settings.ts
│   │   ├── layouts/
│   │   │   ├── actions-layouts.tsx
│   │   │   ├── app-layouts.tsx
│   │   │   ├── expandable-card-layouts.tsx
│   │   │   ├── general-layouts.tsx
│   │   │   ├── input-layouts.tsx
│   │   │   ├── settings-layouts.tsx
│   │   │   └── table-layouts.tsx
│   │   ├── lib/
│   │   │   ├── admin/
│   │   │   │   ├── users/
│   │   │   │   │   └── userMutationFetcher.ts
│   │   │   │   └── voice/
│   │   │   │       └── svc.ts
│   │   │   ├── admin-routes.ts
│   │   │   ├── agents.ts
│   │   │   ├── agentsSS.ts
│   │   │   ├── analytics.ts
│   │   │   ├── appSidebarSS.ts
│   │   │   ├── auth/
│   │   │   │   ├── redirectValidation.ts
│   │   │   │   └── requireAuth.ts
│   │   │   ├── azureTargetUri.ts
│   │   │   ├── billing/
│   │   │   │   ├── index.ts
│   │   │   │   ├── interfaces.ts
│   │   │   │   ├── svc.test.ts
│   │   │   │   └── svc.ts
│   │   │   ├── browserUtilities.tsx
│   │   │   ├── build/
│   │   │   │   └── client.ts
│   │   │   ├── ccPair.ts
│   │   │   ├── chat/
│   │   │   │   ├── fetchAgentData.ts
│   │   │   │   ├── fetchBackendChatSessionSS.ts
│   │   │   │   ├── greetingMessages.ts
│   │   │   │   └── svc.ts
│   │   │   ├── clipboard.test.ts
│   │   │   ├── clipboard.ts
│   │   │   ├── connector.ts
│   │   │   ├── connectors/
│   │   │   │   ├── AutoSyncOptionFields.tsx
│   │   │   │   ├── connectors.tsx
│   │   │   │   ├── credentials.ts
│   │   │   │   ├── fileTypes.ts
│   │   │   │   └── oauth.ts
│   │   │   ├── constants/
│   │   │   │   └── chatBackgrounds.ts
│   │   │   ├── constants.ts
│   │   │   ├── contains.ts
│   │   │   ├── credential.ts
│   │   │   ├── dateUtils.ts
│   │   │   ├── documentDeletion.ts
│   │   │   ├── documentUtils.ts
│   │   │   ├── download.ts
│   │   │   ├── drag/
│   │   │   │   └── constants.ts
│   │   │   ├── error.ts
│   │   │   ├── extension/
│   │   │   │   ├── constants.ts
│   │   │   │   └── utils.ts
│   │   │   ├── fetchUtils.ts
│   │   │   ├── fetcher.ts
│   │   │   ├── fileConnector.ts
│   │   │   ├── filters.ts
│   │   │   ├── generated/
│   │   │   │   └── README.md
│   │   │   ├── gmail.ts
│   │   │   ├── googleConnector.ts
│   │   │   ├── googleDrive.ts
│   │   │   ├── headers/
│   │   │   │   └── fetchHeaderDataSS.ts
│   │   │   ├── hierarchy/
│   │   │   │   ├── interfaces.ts
│   │   │   │   └── svc.ts
│   │   │   ├── hooks/
│   │   │   │   ├── useCaptcha.ts
│   │   │   │   ├── useCustomAnalyticsEnabled.ts
│   │   │   │   ├── useDocumentSets.ts
│   │   │   │   ├── useForcedTools.ts
│   │   │   │   ├── useLLMProviderOptions.ts
│   │   │   │   ├── useLLMProviders.test.ts
│   │   │   │   ├── useProjects.ts
│   │   │   │   └── useToolOAuthStatus.ts
│   │   │   ├── hooks.llmResolver.test.ts
│   │   │   ├── hooks.ts
│   │   │   ├── indexAttempt.ts
│   │   │   ├── languages.test.ts
│   │   │   ├── languages.ts
│   │   │   ├── llmConfig/
│   │   │   │   ├── cache.ts
│   │   │   │   ├── constants.ts
│   │   │   │   ├── providers.ts
│   │   │   │   ├── svc.ts
│   │   │   │   ├── utils.ts
│   │   │   │   └── visionLLM.ts
│   │   │   ├── oauth/
│   │   │   │   └── api.ts
│   │   │   ├── oauth_utils.ts
│   │   │   ├── redirectSS.ts
│   │   │   ├── search/
│   │   │   │   ├── interfaces.ts
│   │   │   │   ├── streamingUtils.ts
│   │   │   │   ├── utils.ts
│   │   │   │   └── utilsSS.ts
│   │   │   ├── sources.ts
│   │   │   ├── streamingTTS.ts
│   │   │   ├── swr-keys.ts
│   │   │   ├── time.ts
│   │   │   ├── tools/
│   │   │   │   ├── fetchTools.ts
│   │   │   │   ├── interfaces.ts
│   │   │   │   ├── mcpService.ts
│   │   │   │   ├── mcpUtils.tsx
│   │   │   │   └── openApiService.ts
│   │   │   ├── types.ts
│   │   │   ├── typingUtils.ts
│   │   │   ├── updateSlackBotField.ts
│   │   │   ├── urlBuilder.ts
│   │   │   ├── user.test.ts
│   │   │   ├── user.ts
│   │   │   ├── userSS.ts
│   │   │   ├── userSettings.ts
│   │   │   ├── utils.test.ts
│   │   │   ├── utils.ts
│   │   │   ├── utilsSS.ts
│   │   │   └── version.ts
│   │   ├── providers/
│   │   │   ├── AppBackgroundProvider.tsx
│   │   │   ├── AppProvider.tsx
│   │   │   ├── AppSidebarProvider.tsx
│   │   │   ├── CustomAnalyticsScript.tsx
│   │   │   ├── DynamicMetadata.tsx
│   │   │   ├── ProductGatingWrapper.tsx
│   │   │   ├── ProjectsContext.tsx
│   │   │   ├── QueryControllerProvider.tsx
│   │   │   ├── SWRConfigProvider.tsx
│   │   │   ├── SettingsProvider.tsx
│   │   │   ├── ToastProvider.tsx
│   │   │   ├── UserProvider.tsx
│   │   │   ├── VoiceModeProvider.tsx
│   │   │   └── __tests__/
│   │   │       └── ProjectsContext.test.tsx
│   │   ├── proxy.ts
│   │   ├── refresh-components/
│   │   │   ├── Attachment.stories.tsx
│   │   │   ├── Attachment.tsx
│   │   │   ├── Calendar.stories.tsx
│   │   │   ├── Calendar.tsx
│   │   │   ├── CharacterCount.stories.tsx
│   │   │   ├── CharacterCount.tsx
│   │   │   ├── Chip.stories.tsx
│   │   │   ├── Chip.tsx
│   │   │   ├── Code.stories.tsx
│   │   │   ├── Code.tsx
│   │   │   ├── Collapsible.stories.tsx
│   │   │   ├── Collapsible.tsx
│   │   │   ├── ColorSwatch.stories.tsx
│   │   │   ├── ColorSwatch.tsx
│   │   │   ├── ConnectionProviderIcon.stories.tsx
│   │   │   ├── ConnectionProviderIcon.tsx
│   │   │   ├── Divider.stories.tsx
│   │   │   ├── Divider.tsx
│   │   │   ├── EmptyMessage.stories.tsx
│   │   │   ├── EmptyMessage.tsx
│   │   │   ├── EnabledCount.stories.tsx
│   │   │   ├── EnabledCount.tsx
│   │   │   ├── FadingEdgeContainer.stories.tsx
│   │   │   ├── FadingEdgeContainer.tsx
│   │   │   ├── FrostedDiv.stories.tsx
│   │   │   ├── FrostedDiv.tsx
│   │   │   ├── InlineExternalLink.stories.tsx
│   │   │   ├── InlineExternalLink.tsx
│   │   │   ├── Logo.tsx
│   │   │   ├── Modal.stories.tsx
│   │   │   ├── Modal.tsx
│   │   │   ├── OverflowDiv.stories.tsx
│   │   │   ├── OverflowDiv.tsx
│   │   │   ├── Popover.stories.tsx
│   │   │   ├── Popover.tsx
│   │   │   ├── PreviewImage.stories.tsx
│   │   │   ├── PreviewImage.tsx
│   │   │   ├── ScrollIndicatorDiv.stories.tsx
│   │   │   ├── ScrollIndicatorDiv.tsx
│   │   │   ├── Separator.stories.tsx
│   │   │   ├── Separator.tsx
│   │   │   ├── ShadowDiv.stories.tsx
│   │   │   ├── ShadowDiv.tsx
│   │   │   ├── SimpleCollapsible.stories.tsx
│   │   │   ├── SimpleCollapsible.tsx
│   │   │   ├── SimplePopover.stories.tsx
│   │   │   ├── SimplePopover.tsx
│   │   │   ├── SimpleTabs.stories.tsx
│   │   │   ├── SimpleTabs.tsx
│   │   │   ├── SimpleTooltip.stories.tsx
│   │   │   ├── SimpleTooltip.tsx
│   │   │   ├── Spacer.stories.tsx
│   │   │   ├── Spacer.tsx
│   │   │   ├── Tabs.stories.tsx
│   │   │   ├── Tabs.tsx
│   │   │   ├── TextSeparator.stories.tsx
│   │   │   ├── TextSeparator.tsx
│   │   │   ├── avatars/
│   │   │   │   ├── AgentAvatar.tsx
│   │   │   │   ├── CustomAgentAvatar.stories.tsx
│   │   │   │   ├── CustomAgentAvatar.tsx
│   │   │   │   └── UserAvatar.tsx
│   │   │   ├── buttons/
│   │   │   │   ├── AttachmentButton.stories.tsx
│   │   │   │   ├── AttachmentButton.tsx
│   │   │   │   ├── BackButton.stories.tsx
│   │   │   │   ├── BackButton.tsx
│   │   │   │   ├── Button.stories.tsx
│   │   │   │   ├── Button.tsx
│   │   │   │   ├── ButtonRenaming.stories.tsx
│   │   │   │   ├── ButtonRenaming.tsx
│   │   │   │   ├── CopyIconButton.stories.tsx
│   │   │   │   ├── CopyIconButton.tsx
│   │   │   │   ├── CreateButton.stories.tsx
│   │   │   │   ├── CreateButton.tsx
│   │   │   │   ├── IconButton.stories.tsx
│   │   │   │   ├── IconButton.tsx
│   │   │   │   ├── LineItem.stories.tsx
│   │   │   │   ├── LineItem.tsx
│   │   │   │   ├── SelectButton.stories.tsx
│   │   │   │   ├── SelectButton.tsx
│   │   │   │   ├── SquareButton.stories.tsx
│   │   │   │   ├── SquareButton.tsx
│   │   │   │   ├── Tag.stories.tsx
│   │   │   │   ├── Tag.tsx
│   │   │   │   └── source-tag/
│   │   │   │       ├── SourceTag.tsx
│   │   │   │       ├── SourceTagDetailsCard.tsx
│   │   │   │       ├── index.ts
│   │   │   │       └── sourceTagUtils.ts
│   │   │   ├── cards/
│   │   │   │   ├── Card.stories.tsx
│   │   │   │   ├── Card.tsx
│   │   │   │   └── index.ts
│   │   │   ├── commandmenu/
│   │   │   │   ├── CommandMenu.stories.tsx
│   │   │   │   ├── CommandMenu.test.tsx
│   │   │   │   ├── CommandMenu.tsx
│   │   │   │   └── types.ts
│   │   │   ├── contexts/
│   │   │   │   └── ModalContext.tsx
│   │   │   ├── form/
│   │   │   │   ├── CheckboxField.tsx
│   │   │   │   ├── FieldContext.tsx
│   │   │   │   ├── FormField.stories.tsx
│   │   │   │   ├── FormField.tsx
│   │   │   │   ├── FormikField.tsx
│   │   │   │   ├── FormikFields.stories.tsx
│   │   │   │   ├── InputDatePickerField.tsx
│   │   │   │   ├── InputSelectField.tsx
│   │   │   │   ├── InputTextAreaField.tsx
│   │   │   │   ├── InputTypeInElementField.tsx
│   │   │   │   ├── InputTypeInField.tsx
│   │   │   │   ├── Label.stories.tsx
│   │   │   │   ├── Label.tsx
│   │   │   │   ├── LabeledCheckboxField.tsx
│   │   │   │   ├── PasswordInputTypeInField.tsx
│   │   │   │   ├── SwitchField.tsx
│   │   │   │   └── types.ts
│   │   │   ├── inputs/
│   │   │   │   ├── Checkbox.stories.tsx
│   │   │   │   ├── Checkbox.test.tsx
│   │   │   │   ├── Checkbox.tsx
│   │   │   │   ├── InputAvatar.stories.tsx
│   │   │   │   ├── InputAvatar.tsx
│   │   │   │   ├── InputChipField.stories.tsx
│   │   │   │   ├── InputChipField.tsx
│   │   │   │   ├── InputComboBox/
│   │   │   │   │   ├── InputComboBox.stories.tsx
│   │   │   │   │   ├── InputComboBox.test.tsx
│   │   │   │   │   ├── InputComboBox.tsx
│   │   │   │   │   ├── components/
│   │   │   │   │   │   ├── ComboBoxDropdown.tsx
│   │   │   │   │   │   ├── OptionItem.tsx
│   │   │   │   │   │   └── OptionsList.tsx
│   │   │   │   │   ├── hooks.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── types.ts
│   │   │   │   │   └── utils/
│   │   │   │   │       ├── aria.ts
│   │   │   │   │       └── validation.ts
│   │   │   │   ├── InputDatePicker.stories.tsx
│   │   │   │   ├── InputDatePicker.tsx
│   │   │   │   ├── InputFile.stories.tsx
│   │   │   │   ├── InputFile.tsx
│   │   │   │   ├── InputImage.stories.tsx
│   │   │   │   ├── InputImage.tsx
│   │   │   │   ├── InputKeyValue.stories.tsx
│   │   │   │   ├── InputKeyValue.tsx
│   │   │   │   ├── InputNumber.stories.tsx
│   │   │   │   ├── InputNumber.tsx
│   │   │   │   ├── InputSearch.stories.tsx
│   │   │   │   ├── InputSearch.tsx
│   │   │   │   ├── InputSelect.stories.tsx
│   │   │   │   ├── InputSelect.tsx
│   │   │   │   ├── InputTextArea.stories.tsx
│   │   │   │   ├── InputTextArea.tsx
│   │   │   │   ├── InputTypeIn.stories.tsx
│   │   │   │   ├── InputTypeIn.tsx
│   │   │   │   ├── ListFieldInput.stories.tsx
│   │   │   │   ├── ListFieldInput.tsx
│   │   │   │   ├── PasswordInputTypeIn.stories.tsx
│   │   │   │   ├── PasswordInputTypeIn.test.ts
│   │   │   │   ├── PasswordInputTypeIn.tsx
│   │   │   │   ├── Switch.stories.tsx
│   │   │   │   ├── Switch.tsx
│   │   │   │   └── styles.ts
│   │   │   ├── layouts/
│   │   │   │   ├── ConfirmationModalLayout.stories.tsx
│   │   │   │   └── ConfirmationModalLayout.tsx
│   │   │   ├── loaders/
│   │   │   │   ├── SimpleLoader.stories.tsx
│   │   │   │   └── SimpleLoader.tsx
│   │   │   ├── messages/
│   │   │   │   ├── FieldMessage.stories.tsx
│   │   │   │   ├── FieldMessage.tsx
│   │   │   │   ├── InfoBlock.stories.tsx
│   │   │   │   ├── InfoBlock.tsx
│   │   │   │   ├── Message.stories.tsx
│   │   │   │   └── Message.tsx
│   │   │   ├── modals/
│   │   │   │   └── MemoriesModal.tsx
│   │   │   ├── popovers/
│   │   │   │   ├── ActionsPopover/
│   │   │   │   │   ├── ActionLineItem.tsx
│   │   │   │   │   ├── MCPLineItem.tsx
│   │   │   │   │   ├── SwitchList.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── FilePickerPopover.tsx
│   │   │   │   ├── LLMPopover.test.tsx
│   │   │   │   ├── LLMPopover.tsx
│   │   │   │   └── interfaces.ts
│   │   │   ├── skeletons/
│   │   │   │   ├── ChatSessionSkeleton.stories.tsx
│   │   │   │   ├── ChatSessionSkeleton.tsx
│   │   │   │   ├── SidebarTabSkeleton.stories.tsx
│   │   │   │   └── SidebarTabSkeleton.tsx
│   │   │   ├── texts/
│   │   │   │   ├── ExpandableTextDisplay.stories.tsx
│   │   │   │   ├── ExpandableTextDisplay.tsx
│   │   │   │   ├── Text.stories.tsx
│   │   │   │   ├── Text.tsx
│   │   │   │   ├── Truncated.stories.tsx
│   │   │   │   └── Truncated.tsx
│   │   │   └── tiles/
│   │   │       ├── ButtonTile.stories.tsx
│   │   │       ├── ButtonTile.tsx
│   │   │       ├── FileTile.stories.tsx
│   │   │       └── FileTile.tsx
│   │   ├── refresh-pages/
│   │   │   ├── AgentEditorPage.tsx
│   │   │   ├── AgentsNavigationPage.tsx
│   │   │   ├── AppPage.tsx
│   │   │   ├── SettingsPage.tsx
│   │   │   └── admin/
│   │   │       ├── AgentsPage/
│   │   │       │   ├── AgentRowActions.tsx
│   │   │       │   ├── AgentsTable.tsx
│   │   │       │   ├── interfaces.ts
│   │   │       │   └── svc.ts
│   │   │       ├── AgentsPage.tsx
│   │   │       ├── ChatPreferencesPage.tsx
│   │   │       ├── CodeInterpreterPage/
│   │   │       │   ├── index.tsx
│   │   │       │   └── svc.ts
│   │   │       ├── GroupsPage/
│   │   │       │   ├── CreateGroupPage.tsx
│   │   │       │   ├── EditGroupPage.tsx
│   │   │       │   ├── GroupCard.tsx
│   │   │       │   ├── GroupsList.tsx
│   │   │       │   ├── SharedGroupResources/
│   │   │       │   │   ├── ResourceContent.tsx
│   │   │       │   │   ├── ResourcePopover.tsx
│   │   │       │   │   ├── index.tsx
│   │   │       │   │   └── interfaces.ts
│   │   │       │   ├── TokenLimitSection.tsx
│   │   │       │   ├── index.tsx
│   │   │       │   ├── interfaces.ts
│   │   │       │   ├── shared.tsx
│   │   │       │   ├── svc.ts
│   │   │       │   └── utils.ts
│   │   │       ├── ImageGenerationPage/
│   │   │       │   ├── ImageGenerationContent.tsx
│   │   │       │   ├── constants.ts
│   │   │       │   ├── forms/
│   │   │       │   │   ├── AzureImageGenForm.tsx
│   │   │       │   │   ├── ImageGenFormWrapper.tsx
│   │   │       │   │   ├── OpenAIImageGenForm.tsx
│   │   │       │   │   ├── VertexImageGenForm.tsx
│   │   │       │   │   ├── getImageGenForm.tsx
│   │   │       │   │   ├── index.ts
│   │   │       │   │   └── types.ts
│   │   │       │   ├── index.tsx
│   │   │       │   └── svc.ts
│   │   │       ├── LLMConfigurationPage.tsx
│   │   │       ├── ServiceAccountsPage/
│   │   │       │   ├── ApiKeyFormModal.tsx
│   │   │       │   ├── index.tsx
│   │   │       │   ├── interfaces.ts
│   │   │       │   └── svc.ts
│   │   │       ├── UsersPage/
│   │   │       │   ├── EditUserModal.tsx
│   │   │       │   ├── GroupsCell.tsx
│   │   │       │   ├── InviteUsersModal.tsx
│   │   │       │   ├── UserActionModals.tsx
│   │   │       │   ├── UserFilters.tsx
│   │   │       │   ├── UserRoleCell.tsx
│   │   │       │   ├── UserRowActions.tsx
│   │   │       │   ├── UsersSummary.tsx
│   │   │       │   ├── UsersTable.tsx
│   │   │       │   ├── index.tsx
│   │   │       │   ├── interfaces.ts
│   │   │       │   └── svc.ts
│   │   │       ├── VoiceConfigurationPage.tsx
│   │   │       └── WebSearchPage/
│   │   │           ├── WebProviderModalReducer.ts
│   │   │           ├── WebProviderSetupModal.tsx
│   │   │           ├── connectProviderFlow.ts
│   │   │           ├── contentProviderUtils.ts
│   │   │           ├── index.tsx
│   │   │           ├── interfaces.ts
│   │   │           ├── searchProviderUtils.ts
│   │   │           └── svc.ts
│   │   ├── sections/
│   │   │   ├── AppHealthBanner.tsx
│   │   │   ├── Suggestions.tsx
│   │   │   ├── actions/
│   │   │   │   ├── ActionCard.tsx
│   │   │   │   ├── ActionCardContext.tsx
│   │   │   │   ├── ActionCardHeader.tsx
│   │   │   │   ├── Actions.tsx
│   │   │   │   ├── MCPActionCard.tsx
│   │   │   │   ├── MCPPageContent.tsx
│   │   │   │   ├── OpenApiActionCard.tsx
│   │   │   │   ├── OpenApiPageContent.tsx
│   │   │   │   ├── PerUserAuthConfig.tsx
│   │   │   │   ├── ToolItem.tsx
│   │   │   │   ├── ToolsList.tsx
│   │   │   │   ├── ToolsSection.tsx
│   │   │   │   ├── modals/
│   │   │   │   │   ├── AddMCPServerModal.tsx
│   │   │   │   │   ├── AddOpenAPIActionModal.tsx
│   │   │   │   │   ├── DisconnectEntityModal.tsx
│   │   │   │   │   ├── MCPAuthenticationModal.tsx
│   │   │   │   │   └── OpenAPIAuthenticationModal.tsx
│   │   │   │   └── skeleton/
│   │   │   │       ├── ActionCardSkeleton.tsx
│   │   │   │       └── ToolItemSkeleton.tsx
│   │   │   ├── admin/
│   │   │   │   ├── AdminListHeader.tsx
│   │   │   │   └── ProviderCard.tsx
│   │   │   ├── cards/
│   │   │   │   ├── AgentCard.tsx
│   │   │   │   ├── DocumentSetCard.tsx
│   │   │   │   ├── FileCard.tsx
│   │   │   │   └── README.md
│   │   │   ├── chat/
│   │   │   │   ├── ChatScrollContainer.tsx
│   │   │   │   └── ChatUI.tsx
│   │   │   ├── document-sidebar/
│   │   │   │   ├── ChatDocumentDisplay.tsx
│   │   │   │   └── DocumentsSidebar.tsx
│   │   │   ├── input/
│   │   │   │   ├── AppInputBar.tsx
│   │   │   │   ├── MicrophoneButton.tsx
│   │   │   │   └── SharedAppInputBar.tsx
│   │   │   ├── knowledge/
│   │   │   │   ├── AgentKnowledgePane.tsx
│   │   │   │   └── SourceHierarchyBrowser.tsx
│   │   │   ├── modals/
│   │   │   │   ├── AgentViewerModal.tsx
│   │   │   │   ├── FeedbackModal.tsx
│   │   │   │   ├── NewTenantModal.tsx
│   │   │   │   ├── PreviewModal/
│   │   │   │   │   ├── ExceptionTraceModal.tsx
│   │   │   │   │   ├── FloatingFooter.tsx
│   │   │   │   │   ├── PreviewModal.tsx
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── interfaces.ts
│   │   │   │   │   └── variants/
│   │   │   │   │       ├── CodePreview.tsx
│   │   │   │   │       ├── codeVariant.tsx
│   │   │   │   │       ├── csvVariant.tsx
│   │   │   │   │       ├── dataVariant.tsx
│   │   │   │   │       ├── docxVariant.tsx
│   │   │   │   │       ├── imageVariant.tsx
│   │   │   │   │       ├── index.ts
│   │   │   │   │       ├── markdownVariant.tsx
│   │   │   │   │       ├── pdfVariant.tsx
│   │   │   │   │       ├── shared.tsx
│   │   │   │   │       ├── textVariant.tsx
│   │   │   │   │       └── unsupportedVariant.tsx
│   │   │   │   ├── ShareAgentModal.test.tsx
│   │   │   │   ├── ShareAgentModal.tsx
│   │   │   │   ├── ShareChatSessionModal.tsx
│   │   │   │   └── llmConfig/
│   │   │   │       ├── AnthropicModal.tsx
│   │   │   │       ├── AzureModal.tsx
│   │   │   │       ├── BedrockModal.tsx
│   │   │   │       ├── BifrostModal.tsx
│   │   │   │       ├── CustomModal.test.tsx
│   │   │   │       ├── CustomModal.tsx
│   │   │   │       ├── LMStudioForm.tsx
│   │   │   │       ├── LiteLLMProxyModal.tsx
│   │   │   │       ├── OllamaModal.tsx
│   │   │   │       ├── OpenAIModal.tsx
│   │   │   │       ├── OpenRouterModal.tsx
│   │   │   │       ├── VertexAIModal.tsx
│   │   │   │       ├── getModal.tsx
│   │   │   │       ├── shared.tsx
│   │   │   │       ├── svc.ts
│   │   │   │       └── utils.ts
│   │   │   ├── onboarding/
│   │   │   │   ├── OnboardingFlow.tsx
│   │   │   │   ├── __tests__/
│   │   │   │   │   └── onboardingReducer.test.ts
│   │   │   │   ├── components/
│   │   │   │   │   ├── LLMProviderCard.tsx
│   │   │   │   │   ├── NonAdminStep.tsx
│   │   │   │   │   └── OnboardingHeader.tsx
│   │   │   │   ├── constants.ts
│   │   │   │   ├── forms/
│   │   │   │   │   └── getOnboardingForm.tsx
│   │   │   │   ├── reducer.ts
│   │   │   │   └── steps/
│   │   │   │       ├── FinalStep.tsx
│   │   │   │       ├── LLMStep.tsx
│   │   │   │       └── NameStep.tsx
│   │   │   ├── settings/
│   │   │   │   └── Memories.tsx
│   │   │   └── sidebar/
│   │   │       ├── AdminSidebar.tsx
│   │   │       ├── AgentButton.tsx
│   │   │       ├── AppSidebar.tsx
│   │   │       ├── ChatButton.tsx
│   │   │       ├── ChatSearchCommandMenu.tsx
│   │   │       ├── CreateConnectorSidebar.tsx
│   │   │       ├── NotificationsPopover.tsx
│   │   │       ├── ProjectFolderButton.tsx
│   │   │       ├── SidebarBody.tsx
│   │   │       ├── SidebarSection.tsx
│   │   │       ├── SidebarWrapper.tsx
│   │   │       ├── StepSidebarWrapper.tsx
│   │   │       ├── UpsertEmbeddingSidebar.tsx
│   │   │       ├── UserAvatarPopover.tsx
│   │   │       ├── chatSearchUtils.ts
│   │   │       ├── constants.ts
│   │   │       ├── sidebarUtils.ts
│   │   │       └── useChatSearchOptimistic.ts
│   │   └── types.ts
│   ├── tailwind-themes/
│   │   └── tailwind.config.js
│   ├── tailwind.config.js
│   ├── tests/
│   │   ├── README.md
│   │   ├── e2e/
│   │   │   ├── admin/
│   │   │   │   ├── admin_auth.setup.ts
│   │   │   │   ├── admin_oauth_redirect_uri.spec.ts
│   │   │   │   ├── admin_pages.spec.ts
│   │   │   │   ├── code-interpreter/
│   │   │   │   │   └── code_interpreter.spec.ts
│   │   │   │   ├── default-agent.spec.ts
│   │   │   │   ├── disable_default_agent.spec.ts
│   │   │   │   ├── discord-bot/
│   │   │   │   │   ├── admin-workflows.spec.ts
│   │   │   │   │   ├── bot-config.spec.ts
│   │   │   │   │   ├── channel-config.spec.ts
│   │   │   │   │   ├── fixtures.ts
│   │   │   │   │   └── guilds-list.spec.ts
│   │   │   │   ├── ee_feature_redirect.spec.ts
│   │   │   │   ├── groups/
│   │   │   │   │   ├── GroupsAdminPage.ts
│   │   │   │   │   ├── fixtures.ts
│   │   │   │   │   └── groups.spec.ts
│   │   │   │   ├── image-generation/
│   │   │   │   │   ├── disconnect-provider.spec.ts
│   │   │   │   │   └── image-generation-content.spec.ts
│   │   │   │   ├── llm_provider_setup.spec.ts
│   │   │   │   ├── oauth_config/
│   │   │   │   │   └── test_tool_oauth.spec.ts
│   │   │   │   ├── scim/
│   │   │   │   │   ├── fixtures.ts
│   │   │   │   │   └── scim.spec.ts
│   │   │   │   ├── theme/
│   │   │   │   │   └── appearance_theme_settings.spec.ts
│   │   │   │   ├── users/
│   │   │   │   │   ├── UsersAdminPage.ts
│   │   │   │   │   ├── fixtures.ts
│   │   │   │   │   └── users.spec.ts
│   │   │   │   ├── voice/
│   │   │   │   │   └── disconnect-provider.spec.ts
│   │   │   │   └── web-search/
│   │   │   │       ├── disconnect-provider.spec.ts
│   │   │   │       ├── svc.ts
│   │   │   │       ├── web_content_providers.spec.ts
│   │   │   │       └── web_search_providers.spec.ts
│   │   │   ├── agents/
│   │   │   │   ├── create_and_edit_agent.spec.ts
│   │   │   │   ├── llm_provider_rbac.spec.ts
│   │   │   │   └── user_file_attachment.spec.ts
│   │   │   ├── auth/
│   │   │   │   ├── email_verification.spec.ts
│   │   │   │   ├── login.spec.ts
│   │   │   │   ├── password_managements.spec.ts
│   │   │   │   ├── pat_management.spec.ts
│   │   │   │   └── signup.spec.ts
│   │   │   ├── chat/
│   │   │   │   ├── actions_popover.spec.ts
│   │   │   │   ├── chat-search-command-menu.spec.ts
│   │   │   │   ├── chat_message_rendering.spec.ts
│   │   │   │   ├── chat_session_not_found.spec.ts
│   │   │   │   ├── current_agent.spec.ts
│   │   │   │   ├── default_agent.spec.ts
│   │   │   │   ├── default_app_mode.spec.ts
│   │   │   │   ├── file_preview_modal.spec.ts
│   │   │   │   ├── input_focus_retention.spec.ts
│   │   │   │   ├── live_agent.spec.ts
│   │   │   │   ├── llm_ordering.spec.ts
│   │   │   │   ├── llm_runtime_selection.spec.ts
│   │   │   │   ├── message_edit_regenerate.spec.ts
│   │   │   │   ├── message_feedback.spec.ts
│   │   │   │   ├── project_files_visual_regression.spec.ts
│   │   │   │   ├── scroll_behavior.spec.ts
│   │   │   │   ├── share_chat.spec.ts
│   │   │   │   └── welcome_page.spec.ts
│   │   │   ├── connectors/
│   │   │   │   ├── federated_slack.spec.ts
│   │   │   │   └── inlineFileManagement.spec.ts
│   │   │   ├── constants.ts
│   │   │   ├── fixtures/
│   │   │   │   ├── eeFeatures.ts
│   │   │   │   ├── llmProvider.ts
│   │   │   │   └── three_images.docx
│   │   │   ├── global-setup.ts
│   │   │   ├── mcp/
│   │   │   │   ├── default-agent-mcp.spec.ts
│   │   │   │   └── mcp_oauth_flow.spec.ts
│   │   │   ├── onboarding/
│   │   │   │   └── onboarding_flow.spec.ts
│   │   │   ├── settings/
│   │   │   │   └── settings_pages.spec.ts
│   │   │   └── utils/
│   │   │       ├── agentUtils.ts
│   │   │       ├── auth.ts
│   │   │       ├── chatActions.ts
│   │   │       ├── chatStream.ts
│   │   │       ├── dragUtils.ts
│   │   │       ├── mcpServer.ts
│   │   │       ├── onyxApiClient.ts
│   │   │       ├── pageStateLogger.ts
│   │   │       ├── theme.ts
│   │   │       ├── tools.ts
│   │   │       └── visualRegression.ts
│   │   └── setup/
│   │       ├── fileMock.js
│   │       ├── jest.setup.ts
│   │       ├── llmProviderTestUtils.ts
│   │       ├── mocks/
│   │       │   ├── README.md
│   │       │   ├── components/
│   │       │   │   └── UserProvider.tsx
│   │       │   └── cssMock.js
│   │       └── test-utils.tsx
│   ├── tsconfig.json
│   ├── tsconfig.types.json
│   └── types/
│       ├── assets.d.ts
│       └── favicon-fetch.d.ts
└── widget/
    ├── .gitignore
    ├── README.md
    ├── index.html
    ├── package.json
    ├── src/
    │   ├── assets/
    │   │   └── logo.ts
    │   ├── config/
    │   │   └── config.ts
    │   ├── index.ts
    │   ├── services/
    │   │   ├── api-service.ts
    │   │   └── stream-parser.ts
    │   ├── styles/
    │   │   ├── colors.ts
    │   │   ├── theme.ts
    │   │   └── widget-styles.ts
    │   ├── types/
    │   │   ├── api-types.ts
    │   │   └── widget-types.ts
    │   ├── utils/
    │   │   └── storage.ts
    │   └── widget.ts
    ├── tsconfig.json
    └── vite.config.ts

================================================
FILE CONTENTS
================================================

================================================
FILE: .git-blame-ignore-revs
================================================
# Exclude these commits from git blame (e.g. mass reformatting).
# These are ignored by GitHub automatically.
# To enable this locally, run:
#
#    git config blame.ignoreRevsFile .git-blame-ignore-revs

3134e5f840c12c8f32613ce520101a047c89dcc2  # refactor(whitespace): rm temporary react fragments (#7161)
ed3f72bc75f3e3a9ae9e4d8cd38278f9c97e78b4  # refactor(whitespace): rm react fragment #7190
7b927e79c25f4ddfd18a067f489e122acd2c89de  # chore(format): format files where `ruff` and `black` agree (#9339)


================================================
FILE: .github/CODEOWNERS
================================================
* @onyx-dot-app/onyx-core-team
# Helm charts Owners
/helm/ @justin-tahara

# Web standards updates
/web/STANDARDS.md @raunakab @Weves

# Agent context files
/CLAUDE.md @Weves
/AGENTS.md @Weves

# Beta cherry-pick workflow owners
/.github/workflows/post-merge-beta-cherry-pick.yml @justin-tahara @jmelahman


================================================
FILE: .github/actionlint.yml
================================================
self-hosted-runner:
  # Labels of self-hosted runner in array of strings.
  labels:
    - extras=ecr-cache
    - extras=s3-cache
    - hdd=256
    - runs-on
    - runner=1cpu-linux-arm64
    - runner=1cpu-linux-x64
    - runner=2cpu-linux-arm64
    - runner=2cpu-linux-x64
    - runner=4cpu-linux-arm64
    - runner=4cpu-linux-x64
    - runner=8cpu-linux-arm64
    - runner=8cpu-linux-x64
    - runner=16cpu-linux-arm64
    - runner=16cpu-linux-x64
    - ubuntu-slim # Currently in public preview
    - volume=40gb
    - volume=50gb

# Configuration variables in array of strings defined in your repository or
# organization. `null` means disabling configuration variables check.
# Empty array means no configuration variable is allowed.
config-variables: null

# Configuration for file paths. The keys are glob patterns to match to file
# paths relative to the repository root. The values are the configurations for
# the file paths. Note that the path separator is always '/'.
# The following configurations are available.
#
# "ignore" is an array of regular expression patterns. Matched error messages
# are ignored. This is similar to the "-ignore" command line option.
paths:
  # Glob pattern relative to the repository root for matching files. The path separator is always '/'.
  # This example configures any YAML file under the '.github/workflows/' directory.
  .github/workflows/**/*.{yml,yaml}:
    # TODO: These are real and should be fixed eventually.
    ignore:
      - 'shellcheck reported issue in this script: SC2038:.+'
      - 'shellcheck reported issue in this script: SC2046:.+'
      - 'shellcheck reported issue in this script: SC2086:.+'
      - 'shellcheck reported issue in this script: SC2193:.+'


================================================
FILE: .github/actions/build-backend-image/action.yml
================================================
name: "Build Backend Image"
description: "Builds and pushes the backend Docker image with cache reuse"
inputs:
  runs-on-ecr-cache:
    description: "ECR cache registry from runs-on/action"
    required: true
  ref-name:
    description: "Git ref name used for cache suffix fallback"
    required: true
  pr-number:
    description: "Optional PR number for cache suffix"
    required: false
    default: ""
  github-sha:
    description: "Commit SHA used for cache keys"
    required: true
  run-id:
    description: "GitHub run ID used in output image tag"
    required: true
  docker-username:
    description: "Docker Hub username"
    required: true
  docker-token:
    description: "Docker Hub token"
    required: true
  docker-no-cache:
    description: "Set to 'true' to disable docker build cache"
    required: false
    default: "false"
runs:
  using: "composite"
  steps:
    - name: Format branch name for cache
      id: format-branch
      shell: bash
      env:
        PR_NUMBER: ${{ inputs.pr-number }}
        REF_NAME: ${{ inputs.ref-name }}
      run: |
        if [ -n "${PR_NUMBER}" ]; then
          CACHE_SUFFIX="${PR_NUMBER}"
        else
          # shellcheck disable=SC2001
          CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
        fi
        echo "cache-suffix=${CACHE_SUFFIX}" >> "$GITHUB_OUTPUT"

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

    - name: Login to Docker Hub
      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
      with:
        username: ${{ inputs.docker-username }}
        password: ${{ inputs.docker-token }}

    - name: Build and push Backend Docker image
      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
      with:
        context: ./backend
        file: ./backend/Dockerfile
        push: true
        tags: ${{ inputs.runs-on-ecr-cache }}:nightly-llm-it-backend-${{ inputs.run-id }}
        cache-from: |
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ inputs.github-sha }}
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache
          type=registry,ref=onyxdotapp/onyx-backend:latest
        cache-to: |
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ inputs.github-sha }},mode=max
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache,mode=max
        no-cache: ${{ inputs.docker-no-cache == 'true' }}


================================================
FILE: .github/actions/build-integration-image/action.yml
================================================
name: "Build Integration Image"
description: "Builds and pushes the integration test image with docker bake"
inputs:
  runs-on-ecr-cache:
    description: "ECR cache registry from runs-on/action"
    required: true
  ref-name:
    description: "Git ref name used for cache suffix fallback"
    required: true
  pr-number:
    description: "Optional PR number for cache suffix"
    required: false
    default: ""
  github-sha:
    description: "Commit SHA used for cache keys"
    required: true
  run-id:
    description: "GitHub run ID used in output image tag"
    required: true
  docker-username:
    description: "Docker Hub username"
    required: true
  docker-token:
    description: "Docker Hub token"
    required: true
runs:
  using: "composite"
  steps:
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

    - name: Login to Docker Hub
      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
      with:
        username: ${{ inputs.docker-username }}
        password: ${{ inputs.docker-token }}

    - name: Format branch name for cache
      id: format-branch
      shell: bash
      env:
        PR_NUMBER: ${{ inputs.pr-number }}
        REF_NAME: ${{ inputs.ref-name }}
      run: |
        if [ -n "${PR_NUMBER}" ]; then
          CACHE_SUFFIX="${PR_NUMBER}"
        else
          # shellcheck disable=SC2001
          CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
        fi
        echo "cache-suffix=${CACHE_SUFFIX}" >> "$GITHUB_OUTPUT"

    - name: Build and push integration test image with Docker Bake
      shell: bash
      env:
        RUNS_ON_ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}
        INTEGRATION_REPOSITORY: ${{ inputs.runs-on-ecr-cache }}
        TAG: nightly-llm-it-${{ inputs.run-id }}
        CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
        HEAD_SHA: ${{ inputs.github-sha }}
      run: |
        docker buildx bake --push \
          --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
          --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
          --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
          --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
          --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
          --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
          --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
          --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
          --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
          --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
          --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
          --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
          --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
          integration


================================================
FILE: .github/actions/build-model-server-image/action.yml
================================================
name: "Build Model Server Image"
description: "Builds and pushes the model server Docker image with cache reuse"
inputs:
  runs-on-ecr-cache:
    description: "ECR cache registry from runs-on/action"
    required: true
  ref-name:
    description: "Git ref name used for cache suffix fallback"
    required: true
  pr-number:
    description: "Optional PR number for cache suffix"
    required: false
    default: ""
  github-sha:
    description: "Commit SHA used for cache keys"
    required: true
  run-id:
    description: "GitHub run ID used in output image tag"
    required: true
  docker-username:
    description: "Docker Hub username"
    required: true
  docker-token:
    description: "Docker Hub token"
    required: true
runs:
  using: "composite"
  steps:
    - name: Format branch name for cache
      id: format-branch
      shell: bash
      env:
        PR_NUMBER: ${{ inputs.pr-number }}
        REF_NAME: ${{ inputs.ref-name }}
      run: |
        if [ -n "${PR_NUMBER}" ]; then
          CACHE_SUFFIX="${PR_NUMBER}"
        else
          # shellcheck disable=SC2001
          CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
        fi
        echo "cache-suffix=${CACHE_SUFFIX}" >> "$GITHUB_OUTPUT"

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

    - name: Login to Docker Hub
      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
      with:
        username: ${{ inputs.docker-username }}
        password: ${{ inputs.docker-token }}

    - name: Build and push Model Server Docker image
      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
      with:
        context: ./backend
        file: ./backend/Dockerfile.model_server
        push: true
        tags: ${{ inputs.runs-on-ecr-cache }}:nightly-llm-it-model-server-${{ inputs.run-id }}
        cache-from: |
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ inputs.github-sha }}
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache
          type=registry,ref=onyxdotapp/onyx-model-server:latest
        cache-to: |
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ inputs.github-sha }},mode=max
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache,mode=max


================================================
FILE: .github/actions/run-nightly-provider-chat-test/action.yml
================================================
name: "Run Nightly Provider Chat Test"
description: "Starts required compose services and runs nightly provider integration test"
inputs:
  provider:
    description: "Provider slug for NIGHTLY_LLM_PROVIDER"
    required: true
  models:
    description: "Comma-separated model list for NIGHTLY_LLM_MODELS"
    required: true
  provider-api-key:
    description: "API key for NIGHTLY_LLM_API_KEY"
    required: false
    default: ""
  strict:
    description: "String true/false for NIGHTLY_LLM_STRICT"
    required: true
  api-base:
    description: "Optional NIGHTLY_LLM_API_BASE"
    required: false
    default: ""
  api-version:
    description: "Optional NIGHTLY_LLM_API_VERSION"
    required: false
    default: ""
  deployment-name:
    description: "Optional NIGHTLY_LLM_DEPLOYMENT_NAME"
    required: false
    default: ""
  custom-config-json:
    description: "Optional NIGHTLY_LLM_CUSTOM_CONFIG_JSON"
    required: false
    default: ""
  runs-on-ecr-cache:
    description: "ECR cache registry from runs-on/action"
    required: true
  run-id:
    description: "GitHub run ID used in image tags"
    required: true
  docker-username:
    description: "Docker Hub username"
    required: true
  docker-token:
    description: "Docker Hub token"
    required: true
runs:
  using: "composite"
  steps:
    - name: Login to Docker Hub
      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
      with:
        username: ${{ inputs.docker-username }}
        password: ${{ inputs.docker-token }}

    - name: Create .env file for Docker Compose
      shell: bash
      env:
        ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}
        RUN_ID: ${{ inputs.run-id }}
      run: |
        cat <<EOF2 > deployment/docker_compose/.env
        COMPOSE_PROFILES=s3-filestore
        ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
        LICENSE_ENFORCEMENT_ENABLED=false
        AUTH_TYPE=basic
        POSTGRES_POOL_PRE_PING=true
        POSTGRES_USE_NULL_POOL=true
        REQUIRE_EMAIL_VERIFICATION=false
        DISABLE_TELEMETRY=true
        INTEGRATION_TESTS_MODE=true
        AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
        AWS_REGION_NAME=us-west-2
        ONYX_BACKEND_IMAGE=${ECR_CACHE}:nightly-llm-it-backend-${RUN_ID}
        ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:nightly-llm-it-model-server-${RUN_ID}
        EOF2

    - name: Start Docker containers
      shell: bash
      run: |
        cd deployment/docker_compose
        docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait \
          relational_db \
          index \
          cache \
          minio \
          api_server \
          inference_model_server

    - name: Run nightly provider integration test
      uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
      env:
        MODELS: ${{ inputs.models }}
        NIGHTLY_LLM_PROVIDER: ${{ inputs.provider }}
        NIGHTLY_LLM_API_KEY: ${{ inputs.provider-api-key }}
        NIGHTLY_LLM_API_BASE: ${{ inputs.api-base }}
        NIGHTLY_LLM_API_VERSION: ${{ inputs.api-version }}
        NIGHTLY_LLM_DEPLOYMENT_NAME: ${{ inputs.deployment-name }}
        NIGHTLY_LLM_CUSTOM_CONFIG_JSON: ${{ inputs.custom-config-json }}
        NIGHTLY_LLM_STRICT: ${{ inputs.strict }}
        RUNS_ON_ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}
        RUN_ID: ${{ inputs.run-id }}
      with:
        timeout_minutes: 20
        max_attempts: 2
        retry_wait_seconds: 10
        command: |
          docker run --rm --network onyx_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
            -e DB_READONLY_USER=db_readonly_user \
            -e DB_READONLY_PASSWORD=password \
            -e POSTGRES_POOL_PRE_PING=true \
            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
            -e TEST_WEB_HOSTNAME=test-runner \
            -e AWS_REGION_NAME=us-west-2 \
            -e NIGHTLY_LLM_PROVIDER="${NIGHTLY_LLM_PROVIDER}" \
            -e NIGHTLY_LLM_MODELS="${MODELS}" \
            -e NIGHTLY_LLM_API_KEY="${NIGHTLY_LLM_API_KEY}" \
            -e NIGHTLY_LLM_API_BASE="${NIGHTLY_LLM_API_BASE}" \
            -e NIGHTLY_LLM_API_VERSION="${NIGHTLY_LLM_API_VERSION}" \
            -e NIGHTLY_LLM_DEPLOYMENT_NAME="${NIGHTLY_LLM_DEPLOYMENT_NAME}" \
            -e NIGHTLY_LLM_CUSTOM_CONFIG_JSON="${NIGHTLY_LLM_CUSTOM_CONFIG_JSON}" \
            -e NIGHTLY_LLM_STRICT="${NIGHTLY_LLM_STRICT}" \
            ${RUNS_ON_ECR_CACHE}:nightly-llm-it-${RUN_ID} \
            /app/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py


================================================
FILE: .github/actions/setup-playwright/action.yml
================================================
name: "Setup Playwright"
description: "Sets up Playwright and system deps (assumes Python and Playwright are installed)"
runs:
  using: "composite"
  steps:
    - name: Cache playwright cache
      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
      with:
        path: ~/.cache/ms-playwright
        key: ${{ runner.os }}-${{ runner.arch }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
        restore-keys: |
          ${{ runner.os }}-${{ runner.arch }}-playwright-

    - name: Install playwright
      shell: bash
      run: |
        playwright install chromium --with-deps


================================================
FILE: .github/actions/setup-python-and-install-dependencies/action.yml
================================================
name: "Setup Python and Install Dependencies"
description: "Sets up Python with uv and installs deps"
inputs:
  requirements:
    description: "Newline-separated list of requirement files to install (relative to repo root)"
    required: true
runs:
  using: "composite"
  steps:
    - name: Compute requirements hash
      id: req-hash
      shell: bash
      env:
        REQUIREMENTS: ${{ inputs.requirements }}
      run: |
        # Hash the contents of the specified requirement files
        hash=""
        while IFS= read -r req; do
          if [ -n "$req" ] && [ -f "$req" ]; then
            hash="$hash$(sha256sum "$req")"
          fi
        done <<< "$REQUIREMENTS"
        echo "hash=$(echo "$hash" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"

    # NOTE: This comes before Setup uv since clean-ups run in reverse chronological order
    # such that Setup uv's prune-cache is able to prune the cache before we upload.
    - name: Cache uv cache directory
      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
      with:
        path: ~/.cache/uv
        key: ${{ runner.os }}-uv-${{ steps.req-hash.outputs.hash }}
        restore-keys: |
          ${{ runner.os }}-uv-

    - name: Setup uv
      uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
      with:
        version: "0.9.9"
      # TODO: Enable caching once there is a uv.lock file checked in.
      # with:
      #   enable-cache: true

    - name: Setup Python
      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
      with:
        python-version: "3.11"

    - name: Create virtual environment
      shell: bash
      env:
        VENV_DIR: ${{ runner.temp }}/venv
      run: | # zizmor: ignore[github-env]
        uv venv "$VENV_DIR"
        # Validate path before adding to GITHUB_PATH to prevent code injection
        if [ -d "$VENV_DIR/bin" ]; then
          realpath "$VENV_DIR/bin" >> "$GITHUB_PATH"
        else
          echo "Error: $VENV_DIR/bin does not exist"
          exit 1
        fi

    - name: Install Python dependencies with uv
      shell: bash
      env:
        REQUIREMENTS: ${{ inputs.requirements }}
      run: |
        # Build the uv pip install command with each requirement file as array elements
        cmd=("uv" "pip" "install")
        while IFS= read -r req; do
          # Skip empty lines
          if [ -n "$req" ]; then
            cmd+=("-r" "$req")
          fi
        done <<< "$REQUIREMENTS"
        echo "Running: ${cmd[*]}"
        "${cmd[@]}"


================================================
FILE: .github/actions/slack-notify/action.yml
================================================
name: "Slack Notify"
description: "Sends a Slack notification for workflow events"
inputs:
  webhook-url:
    description: "Slack webhook URL (can also use SLACK_WEBHOOK_URL env var)"
    required: false
  details:
    description: "Additional message body content"
    required: false
  failed-jobs:
    description: "Deprecated alias for details"
    required: false
  mention:
    description: "GitHub username to resolve to a Slack @-mention. Replaces {mention} in details."
    required: false
  title:
    description: "Title for the notification"
    required: false
    default: "🚨 Workflow Failed"
  ref-name:
    description: "Git ref name (tag/branch)"
    required: false
runs:
  using: "composite"
  steps:
    - name: Send Slack notification
      shell: bash
      env:
        SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
        DETAILS: ${{ inputs.details }}
        FAILED_JOBS: ${{ inputs.failed-jobs }}
        MENTION_USER: ${{ inputs.mention }}
        TITLE: ${{ inputs.title }}
        REF_NAME: ${{ inputs.ref-name }}
        REPO: ${{ github.repository }}
        WORKFLOW: ${{ github.workflow }}
        RUN_NUMBER: ${{ github.run_number }}
        RUN_ID: ${{ github.run_id }}
        SERVER_URL: ${{ github.server_url }}
        GITHUB_REF_NAME: ${{ github.ref_name }}
      run: |
        if [ -z "$SLACK_WEBHOOK_URL" ]; then
          echo "webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification"
          exit 0
        fi

        # Build workflow URL
        WORKFLOW_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}"

        # Use ref_name from input or fall back to github.ref_name
        if [ -z "$REF_NAME" ]; then
          REF_NAME="$GITHUB_REF_NAME"
        fi

        if [ -z "$DETAILS" ]; then
          DETAILS="$FAILED_JOBS"
        fi

        # Resolve {mention} placeholder if a GitHub username was provided.
        # Looks up the username in user-mappings.json (co-located with this action)
        # and replaces {mention} with <@SLACK_ID> for a Slack @-mention.
        # Falls back to the plain GitHub username if not found in the mapping.
        if [ -n "$MENTION_USER" ]; then
          MAPPINGS_FILE="${GITHUB_ACTION_PATH}/user-mappings.json"
          slack_id="$(jq -r --arg gh "$MENTION_USER" 'to_entries[] | select(.value | ascii_downcase == ($gh | ascii_downcase)) | .key' "$MAPPINGS_FILE" 2>/dev/null | head -1)"

          if [ -n "$slack_id" ]; then
            mention_text="<@${slack_id}>"
          else
            mention_text="${MENTION_USER}"
          fi

          DETAILS="${DETAILS//\{mention\}/$mention_text}"
          TITLE="${TITLE//\{mention\}/}"
        else
          DETAILS="${DETAILS//\{mention\}/}"
          TITLE="${TITLE//\{mention\}/}"
        fi

        normalize_multiline() {
          printf '%s' "$1" | awk 'BEGIN { ORS=""; first=1 } { if (!first) printf "\\n"; printf "%s", $0; first=0 }'
        }

        DETAILS="$(normalize_multiline "$DETAILS")"
        REF_NAME="$(normalize_multiline "$REF_NAME")"
        TITLE="$(normalize_multiline "$TITLE")"

        # Escape JSON special characters
        escape_json() {
          local input="$1"
          # Escape backslashes first (but preserve \n sequences)
          # Protect \n sequences temporarily
          input=$(printf '%s' "$input" | sed 's/\\n/\x01NL\x01/g')
          # Escape remaining backslashes
          input=$(printf '%s' "$input" | sed 's/\\/\\\\/g')
          # Restore \n sequences (single backslash, will be correct in JSON)
          input=$(printf '%s' "$input" | sed 's/\x01NL\x01/\\n/g')
          # Escape quotes
          printf '%s' "$input" | sed 's/"/\\"/g'
        }

        REF_NAME_ESC=$(escape_json "$REF_NAME")
        DETAILS_ESC=$(escape_json "$DETAILS")
        WORKFLOW_URL_ESC=$(escape_json "$WORKFLOW_URL")
        TITLE_ESC=$(escape_json "$TITLE")

        # Build JSON payload piece by piece
        # Note: DETAILS_ESC already contains \n sequences that should remain as \n in JSON
        PAYLOAD="{"
        PAYLOAD="${PAYLOAD}\"text\":\"${TITLE_ESC}\","
        PAYLOAD="${PAYLOAD}\"blocks\":[{"
        PAYLOAD="${PAYLOAD}\"type\":\"header\","
        PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"${TITLE_ESC}\"}"
        PAYLOAD="${PAYLOAD}},{"
        PAYLOAD="${PAYLOAD}\"type\":\"section\","
        PAYLOAD="${PAYLOAD}\"fields\":["
        if [ -n "$REF_NAME" ]; then
          PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Ref:*\\n${REF_NAME_ESC}\"},"
        fi
        PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Run ID:*\\n#${RUN_NUMBER}\"}"
        PAYLOAD="${PAYLOAD}]"
        PAYLOAD="${PAYLOAD}}"
        if [ -n "$DETAILS" ]; then
          PAYLOAD="${PAYLOAD},{"
          PAYLOAD="${PAYLOAD}\"type\":\"section\","
          PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"mrkdwn\",\"text\":\"${DETAILS_ESC}\"}"
          PAYLOAD="${PAYLOAD}}"
        fi
        PAYLOAD="${PAYLOAD},{"
        PAYLOAD="${PAYLOAD}\"type\":\"actions\","
        PAYLOAD="${PAYLOAD}\"elements\":[{"
        PAYLOAD="${PAYLOAD}\"type\":\"button\","
        PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"View Workflow Run\"},"
        PAYLOAD="${PAYLOAD}\"url\":\"${WORKFLOW_URL_ESC}\""
        PAYLOAD="${PAYLOAD}}]"
        PAYLOAD="${PAYLOAD}}"
        PAYLOAD="${PAYLOAD}]"
        PAYLOAD="${PAYLOAD}}"

        curl -X POST -H 'Content-type: application/json' \
          --data "$PAYLOAD" \
          "$SLACK_WEBHOOK_URL"


================================================
FILE: .github/actions/slack-notify/user-mappings.json
================================================
{
  "U05SAGZPEA1": "yuhongsun96",
  "U05SAH6UGUD": "Weves",
  "U07PWEQB7A5": "evan-onyx",
  "U07V1SM68KF": "joachim-danswer",
  "U08JZ9N3QNN": "raunakab",
  "U08L24NCLJE": "Subash-Mohan",
  "U090B9M07B2": "wenxi-onyx",
  "U094RASDP0Q": "duo-onyx",
  "U096L8ZQ85B": "justin-tahara",
  "U09AHV8UBQX": "jessicasingh7",
  "U09KAL5T3C2": "nmgarza5",
  "U09KPGVQ70R": "acaprau",
  "U09QR8KTSJH": "rohoswagger",
  "U09RB4NTXA4": "jmelahman",
  "U0A6K9VCY6A": "Danelegend",
  "U0AGC4KH71A": "Bo-Onyx"
}


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
    cooldown:
      default-days: 7
    open-pull-requests-limit: 3
    assignees:
      - "jmelahman"
    labels:
      - "dependabot:actions"
  - package-ecosystem: "pip"
    directory: "/backend"
    schedule:
      interval: "weekly"
    cooldown:
      default-days: 7
    open-pull-requests-limit: 3
    assignees:
      - "jmelahman"
    labels:
      - "dependabot:python"


================================================
FILE: .github/pull_request_template.md
================================================
## Description

<!--- Provide a brief description of the changes in this PR --->

## How Has This Been Tested?

<!--- Describe the tests you ran to verify your changes --->

## Additional Options

- [ ] [Optional] Please cherry-pick this PR to the latest release version.
- [ ] [Optional] Override Linear Check


================================================
FILE: .github/runs-on.yml
================================================
_extend: .github-private


================================================
FILE: .github/workflows/deployment.yml
================================================
name: Build and Push Docker Images on Tag

on:
  push:
    tags:
      - "*"
  workflow_dispatch:

# Set restrictive default permissions for all jobs. Jobs that need more permissions
# should explicitly declare them.
permissions:
  # Required for OIDC authentication with AWS
  id-token: write # zizmor: ignore[excessive-permissions]

env:
  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}

jobs:
  # Determine which components to build based on the tag
  determine-builds:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
    timeout-minutes: 90
    outputs:
      build-desktop: ${{ steps.check.outputs.build-desktop }}
      build-web: ${{ steps.check.outputs.build-web }}
      build-web-cloud: ${{ steps.check.outputs.build-web-cloud }}
      build-backend: ${{ steps.check.outputs.build-backend }}
      build-backend-craft: ${{ steps.check.outputs.build-backend-craft }}
      build-model-server: ${{ steps.check.outputs.build-model-server }}
      is-cloud-tag: ${{ steps.check.outputs.is-cloud-tag }}
      is-beta: ${{ steps.check.outputs.is-beta }}
      is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}
      is-latest: ${{ steps.check.outputs.is-latest }}
      is-test-run: ${{ steps.check.outputs.is-test-run }}
      sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}
      short-sha: ${{ steps.check.outputs.short-sha }}
    steps:
      - name: Checkout (for git tags)
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
          fetch-depth: 0
          fetch-tags: true

      - name: Setup uv
        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          version: "0.9.9"
          enable-cache: false

      - name: Check which components to build and version info
        id: check
        env:
          EVENT_NAME: ${{ github.event_name }}
        run: |
          set -eo pipefail
          TAG="${GITHUB_REF_NAME}"
          # Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)
          SANITIZED_TAG=$(echo "$TAG" | tr '/' '-')
          SHORT_SHA="${GITHUB_SHA::7}"

          # Initialize all flags to false
          IS_CLOUD=false
          IS_NIGHTLY=false
          IS_VERSION_TAG=false
          IS_STABLE=false
          IS_BETA=false
          IS_BETA_STANDALONE=false
          IS_LATEST=false
          IS_PROD_TAG=false
          IS_TEST_RUN=false
          BUILD_DESKTOP=false
          BUILD_WEB=false
          BUILD_WEB_CLOUD=false
          BUILD_BACKEND=true
          BUILD_BACKEND_CRAFT=false
          BUILD_MODEL_SERVER=true

          # Determine tag type based on pattern matching (do regex checks once)
          if [[ "$TAG" == *cloud* ]]; then
            IS_CLOUD=true
          fi
          if [[ "$TAG" == nightly* ]]; then
            IS_NIGHTLY=true
          fi
          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+ ]]; then
            IS_VERSION_TAG=true
          fi
          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
            IS_STABLE=true
          fi
          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta(\.[0-9]+)?$ ]]; then
            IS_BETA=true
          fi

          # Determine what to build based on tag type
          if [[ "$IS_CLOUD" == "true" ]]; then
            BUILD_WEB_CLOUD=true
          else
            BUILD_WEB=true
            # Only build desktop for semver tags (excluding beta)
            if [[ "$IS_VERSION_TAG" == "true" ]] && [[ "$IS_BETA" != "true" ]]; then
              BUILD_DESKTOP=true
            fi
          fi

          # Standalone version checks (for backend/model-server - version excluding cloud tags)
          if [[ "$IS_BETA" == "true" ]] && [[ "$IS_CLOUD" != "true" ]]; then
            IS_BETA_STANDALONE=true
          fi

          # Determine if this tag should get the "latest" Docker tag.
          # Only the highest semver stable tag (vX.Y.Z exactly) gets "latest".
          if [[ "$IS_STABLE" == "true" ]]; then
            HIGHEST_STABLE=$(uv run --no-sync --with onyx-devtools ods latest-stable-tag) || {
              echo "::error::Failed to determine highest stable tag via 'ods latest-stable-tag'"
              exit 1
            }
            if [[ "$TAG" == "$HIGHEST_STABLE" ]]; then
              IS_LATEST=true
            fi
          fi

          # Build craft-latest backend alongside the regular latest.
          if [[ "$IS_LATEST" == "true" ]]; then
            BUILD_BACKEND_CRAFT=true
          fi

          # Determine if this is a production tag
          # Production tags are: version tags (v1.2.3*) or nightly tags
          if [[ "$IS_VERSION_TAG" == "true" ]] || [[ "$IS_NIGHTLY" == "true" ]]; then
            IS_PROD_TAG=true
          fi

          # Determine if this is a test run (workflow_dispatch on non-production ref)
          if [[ "$EVENT_NAME" == "workflow_dispatch" ]] && [[ "$IS_PROD_TAG" != "true" ]]; then
            IS_TEST_RUN=true
          fi
          {
            echo "build-desktop=$BUILD_DESKTOP"
            echo "build-web=$BUILD_WEB"
            echo "build-web-cloud=$BUILD_WEB_CLOUD"
            echo "build-backend=$BUILD_BACKEND"
            echo "build-backend-craft=$BUILD_BACKEND_CRAFT"
            echo "build-model-server=$BUILD_MODEL_SERVER"
            echo "is-cloud-tag=$IS_CLOUD"
            echo "is-beta=$IS_BETA"
            echo "is-beta-standalone=$IS_BETA_STANDALONE"
            echo "is-latest=$IS_LATEST"
            echo "is-test-run=$IS_TEST_RUN"
            echo "sanitized-tag=$SANITIZED_TAG"
            echo "short-sha=$SHORT_SHA"
          } >> "$GITHUB_OUTPUT"

  check-version-tag:
    runs-on: ubuntu-slim
    timeout-minutes: 10
    if: ${{ !startsWith(github.ref_name, 'nightly-latest') && github.event_name != 'workflow_dispatch' }}
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
          fetch-depth: 0

      - name: Setup uv
        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          version: "0.9.9"
          # NOTE: This isn't caching much and zizmor suggests this could be poisoned, so disable.
          enable-cache: false

      - name: Validate tag is versioned correctly
        run: |
          uv run --no-sync --with release-tag tag --check

  notify-slack-on-tag-check-failure:
    needs:
      - check-version-tag
    if: always() && needs.check-version-tag.result == 'failure' && github.event_name != 'workflow_dispatch'
    runs-on: ubuntu-slim
    timeout-minutes: 10
    environment: release
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Send Slack notification
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
          failed-jobs: "• check-version-tag"
          title: "🚨 Version Tag Check Failed"
          ref-name: ${{ github.ref_name }}

  # Create GitHub release first, before desktop builds start.
  # This ensures all desktop matrix jobs upload to the same release instead of
  # racing to create duplicate releases.
  create-release:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-desktop == 'true'
    runs-on: ubuntu-slim
    timeout-minutes: 10
    permissions:
      contents: write
    outputs:
      release-id: ${{ steps.create-release.outputs.id }}
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Determine release tag
        id: release-tag
        env:
          IS_TEST_RUN: ${{ needs.determine-builds.outputs.is-test-run }}
          SHORT_SHA: ${{ needs.determine-builds.outputs.short-sha }}
        run: |
          if [ "${IS_TEST_RUN}" == "true" ]; then
            echo "tag=v0.0.0-dev+${SHORT_SHA}" >> "$GITHUB_OUTPUT"
          else
            echo "tag=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT"
          fi

      - name: Create GitHub Release
        id: create-release
        uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631 # ratchet:softprops/action-gh-release@v2
        with:
          tag_name: ${{ steps.release-tag.outputs.tag }}
          name: ${{ steps.release-tag.outputs.tag }}
          body: "See the assets to download this version and install."
          draft: true
          prerelease: false
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

  build-desktop:
    needs:
      - determine-builds
      - create-release
    if: needs.determine-builds.outputs.build-desktop == 'true'
    permissions:
      id-token: write
      contents: write
      actions: read
    strategy:
      fail-fast: false
      matrix:
        include:
          - platform: "macos-latest" # Build a universal image for macOS.
            args: "--target universal-apple-darwin"
          - platform: "ubuntu-24.04"
            args: "--bundles deb,rpm"
          - platform: "ubuntu-24.04-arm" # Only available in public repos.
            args: "--bundles deb,rpm"
          - platform: "windows-latest"
            args: ""

    runs-on: ${{ matrix.platform }}
    timeout-minutes: 90
    environment: release
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2
        with:
          # NOTE: persist-credentials is needed for tauri-action to upload assets to GitHub releases.
          persist-credentials: true # zizmor: ignore[artipacked]

      - name: Configure AWS credentials
        if: startsWith(matrix.platform, 'macos-')
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        if: startsWith(matrix.platform, 'macos-')
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            APPLE_ID, deploy/apple-id
            APPLE_PASSWORD, deploy/apple-password
            APPLE_CERTIFICATE, deploy/apple-certificate
            APPLE_CERTIFICATE_PASSWORD, deploy/apple-certificate-password
            KEYCHAIN_PASSWORD, deploy/keychain-password
            APPLE_TEAM_ID, deploy/apple-team-id
          parse-json-secrets: true

      - name: install dependencies (ubuntu only)
        if: startsWith(matrix.platform, 'ubuntu-')
        run: |
          sudo apt-get update
          sudo apt-get install -y \
            build-essential \
            libglib2.0-dev \
            libgirepository1.0-dev \
            libgtk-3-dev \
            libjavascriptcoregtk-4.1-dev \
            libwebkit2gtk-4.1-dev \
            libayatana-appindicator3-dev \
            gobject-introspection \
            pkg-config \
            curl \
            xdg-utils

      - name: setup node
        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v6.3.0
        with:
          node-version: 24
          package-manager-cache: false

      - name: install Rust stable
        uses: dtolnay/rust-toolchain@6d9817901c499d6b02debbb57edb38d33daa680b # zizmor: ignore[impostor-commit]
        with:
          # Those targets are only used on macos runners so it's in an `if` to slightly speed up windows and linux builds.
          targets: ${{ matrix.platform == 'macos-latest' && 'aarch64-apple-darwin,x86_64-apple-darwin' || '' }}

      - name: install frontend dependencies
        working-directory: ./desktop
        run: npm install

      - name: Inject version (Unix)
        if: runner.os != 'Windows'
        working-directory: ./desktop
        env:
          SHORT_SHA: ${{ needs.determine-builds.outputs.short-sha }}
          IS_TEST_RUN: ${{ needs.determine-builds.outputs.is-test-run }}
        run: |
          if [ "${IS_TEST_RUN}" == "true" ]; then
            VERSION="0.0.0-dev+${SHORT_SHA}"
          else
            VERSION="${GITHUB_REF_NAME#v}"
          fi
          echo "Injecting version: $VERSION"

          # Update Cargo.toml
          sed "s/^version = .*/version = \"$VERSION\"/" src-tauri/Cargo.toml > src-tauri/Cargo.toml.tmp
          mv src-tauri/Cargo.toml.tmp src-tauri/Cargo.toml

          # Update tauri.conf.json
          jq --arg v "$VERSION" '.version = $v' src-tauri/tauri.conf.json > src-tauri/tauri.conf.json.tmp
          mv src-tauri/tauri.conf.json.tmp src-tauri/tauri.conf.json

          # Update package.json
          jq --arg v "$VERSION" '.version = $v' package.json > package.json.tmp
          mv package.json.tmp package.json

          echo "Versions set to: $VERSION"

      - name: Inject version (Windows)
        if: runner.os == 'Windows'
        working-directory: ./desktop
        shell: pwsh
        env:
          IS_TEST_RUN: ${{ needs.determine-builds.outputs.is-test-run }}
        run: |
          # Windows MSI requires numeric-only build metadata, so we skip the SHA suffix
          if ($env:IS_TEST_RUN -eq "true") {
            $VERSION = "0.0.0"
          } else {
            # Strip 'v' prefix and any pre-release suffix (e.g., -beta.13) for MSI compatibility
            $VERSION = "$env:GITHUB_REF_NAME" -replace '^v', '' -replace '-.*$', ''
          }
          Write-Host "Injecting version: $VERSION"

          # Update Cargo.toml
          $cargo = Get-Content src-tauri/Cargo.toml -Raw
          $cargo = $cargo -replace '(?m)^version = .*', "version = `"$VERSION`""
          Set-Content src-tauri/Cargo.toml $cargo -NoNewline

          # Update tauri.conf.json
          $json = Get-Content src-tauri/tauri.conf.json | ConvertFrom-Json
          $json.version = $VERSION
          $json | ConvertTo-Json -Depth 100 | Set-Content src-tauri/tauri.conf.json

          # Update package.json
          $pkg = Get-Content package.json | ConvertFrom-Json
          $pkg.version = $VERSION
          $pkg | ConvertTo-Json -Depth 100 | Set-Content package.json

          Write-Host "Versions set to: $VERSION"

      - name: Import Apple Developer Certificate
        if: startsWith(matrix.platform, 'macos-')
        run: |
          echo $APPLE_CERTIFICATE | base64 --decode > certificate.p12
          security create-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
          security default-keychain -s build.keychain
          security unlock-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
          security set-keychain-settings -t 3600 -u build.keychain
          security import certificate.p12 -k build.keychain -P "$APPLE_CERTIFICATE_PASSWORD" -T /usr/bin/codesign
          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$KEYCHAIN_PASSWORD" build.keychain
          security find-identity -v -p codesigning build.keychain

      - name: Verify Certificate
        if: startsWith(matrix.platform, 'macos-')
        run: |
          CERT_INFO=$(security find-identity -v -p codesigning build.keychain | grep -E "(Developer ID Application|Apple Distribution|Apple Development)" | head -n 1)
          CERT_ID=$(echo "$CERT_INFO" | awk -F'"' '{print $2}')
          echo "CERT_ID=$CERT_ID" >> $GITHUB_ENV
          echo "Certificate imported."

      - uses: tauri-apps/tauri-action@73fb865345c54760d875b94642314f8c0c894afa # ratchet:tauri-apps/tauri-action@action-v0.6.1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          APPLE_ID: ${{ env.APPLE_ID }}
          APPLE_PASSWORD: ${{ env.APPLE_PASSWORD }}
          APPLE_SIGNING_IDENTITY: ${{ env.CERT_ID }}
          APPLE_TEAM_ID: ${{ env.APPLE_TEAM_ID }}
        with:
          # Use the release created by the create-release job to avoid race conditions
          # when multiple matrix jobs try to create/update the same release simultaneously
          releaseId: ${{ needs.create-release.outputs.release-id }}
          assetNamePattern: "[name]_[arch][ext]"
          args: ${{ matrix.args }}

  build-web-amd64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-web == 'true'
    runs-on:
      - runs-on
      - runner=4cpu-linux-x64
      - run-id=${{ github.run_id }}-web-amd64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./web
          file: ./web/Dockerfile
          platforms: linux/amd64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
            NODE_OPTIONS=--max-old-space-size=8192
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-amd64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-amd64,mode=max
          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-web-arm64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-web == 'true'
    runs-on:
      - runs-on
      - runner=4cpu-linux-arm64
      - run-id=${{ github.run_id }}-web-arm64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./web
          file: ./web/Dockerfile
          platforms: linux/arm64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
            NODE_OPTIONS=--max-old-space-size=8192
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-arm64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-arm64,mode=max
          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  merge-web:
    needs:
      - determine-builds
      - build-web-amd64
      - build-web-arm64
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-merge-web
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false
          tags: |
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}

      - name: Create and push manifest
        env:
          IMAGE_REPO: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          AMD64_DIGEST: ${{ needs.build-web-amd64.outputs.digest }}
          ARM64_DIGEST: ${{ needs.build-web-arm64.outputs.digest }}
          META_TAGS: ${{ steps.meta.outputs.tags }}
        run: |
          IMAGES="${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}"
          docker buildx imagetools create \
            $(printf '%s\n' "${META_TAGS}" | xargs -I {} echo -t {}) \
            $IMAGES

  build-web-cloud-amd64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-web-cloud == 'true'
    runs-on:
      - runs-on
      - runner=4cpu-linux-x64
      - run-id=${{ github.run_id }}-web-cloud-amd64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./web
          file: ./web/Dockerfile
          platforms: linux/amd64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
            NEXT_PUBLIC_CLOUD_ENABLED=true
            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
            NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${{ vars.NEXT_PUBLIC_RECAPTCHA_SITE_KEY }}
            NEXT_PUBLIC_GTM_ENABLED=true
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
            SENTRY_RELEASE=${{ github.sha }}
          secrets: |
            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64,mode=max
          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-web-cloud-arm64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-web-cloud == 'true'
    runs-on:
      - runs-on
      - runner=4cpu-linux-arm64
      - run-id=${{ github.run_id }}-web-cloud-arm64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./web
          file: ./web/Dockerfile
          platforms: linux/arm64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
            NEXT_PUBLIC_CLOUD_ENABLED=true
            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
            NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${{ vars.NEXT_PUBLIC_RECAPTCHA_SITE_KEY }}
            NEXT_PUBLIC_GTM_ENABLED=true
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
            SENTRY_RELEASE=${{ github.sha }}
          secrets: |
            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64,mode=max
          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  merge-web-cloud:
    needs:
      - determine-builds
      - build-web-cloud-amd64
      - build-web-cloud-arm64
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-merge-web-cloud
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false
          tags: |
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('web-cloud-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}

      - name: Create and push manifest
        env:
          IMAGE_REPO: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          AMD64_DIGEST: ${{ needs.build-web-cloud-amd64.outputs.digest }}
          ARM64_DIGEST: ${{ needs.build-web-cloud-arm64.outputs.digest }}
          META_TAGS: ${{ steps.meta.outputs.tags }}
        run: |
          IMAGES="${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}"
          docker buildx imagetools create \
            $(printf '%s\n' "${META_TAGS}" | xargs -I {} echo -t {}) \
            $IMAGES

  build-backend-amd64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-backend == 'true'
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-backend-amd64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
          platforms: linux/amd64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-amd64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-amd64,mode=max
          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-backend-arm64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-backend == 'true'
    runs-on:
      - runs-on
      - runner=2cpu-linux-arm64
      - run-id=${{ github.run_id }}-backend-arm64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
          platforms: linux/arm64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-arm64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-arm64,mode=max
          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  merge-backend:
    needs:
      - determine-builds
      - build-backend-amd64
      - build-backend-arm64
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-merge-backend
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false
          tags: |
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('backend-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}

      - name: Create and push manifest
        env:
          IMAGE_REPO: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          AMD64_DIGEST: ${{ needs.build-backend-amd64.outputs.digest }}
          ARM64_DIGEST: ${{ needs.build-backend-arm64.outputs.digest }}
          META_TAGS: ${{ steps.meta.outputs.tags }}
        run: |
          IMAGES="${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}"
          docker buildx imagetools create \
            $(printf '%s\n' "${META_TAGS}" | xargs -I {} echo -t {}) \
            $IMAGES

  build-backend-craft-amd64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-backend-craft == 'true'
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-backend-craft-amd64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-backend
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
          platforms: linux/amd64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
            ENABLE_CRAFT=true
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-amd64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-amd64,mode=max
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-backend-craft-arm64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-backend-craft == 'true'
    runs-on:
      - runs-on
      - runner=2cpu-linux-arm64
      - run-id=${{ github.run_id }}-backend-craft-arm64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-backend
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
          platforms: linux/arm64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
            ENABLE_CRAFT=true
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-arm64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-arm64,mode=max
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  merge-backend-craft:
    needs:
      - determine-builds
      - build-backend-craft-amd64
      - build-backend-craft-arm64
    if: needs.determine-builds.outputs.build-backend-craft == 'true'
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-merge-backend-craft
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-backend
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
            latest=false
          tags: |
            type=raw,value=craft-latest

      - name: Create and push manifest
        env:
          IMAGE_REPO: ${{ env.REGISTRY_IMAGE }}
          AMD64_DIGEST: ${{ needs.build-backend-craft-amd64.outputs.digest }}
          ARM64_DIGEST: ${{ needs.build-backend-craft-arm64.outputs.digest }}
          META_TAGS: ${{ steps.meta.outputs.tags }}
        run: |
          IMAGES="${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}"
          docker buildx imagetools create \
            $(printf '%s\n' "${META_TAGS}" | xargs -I {} echo -t {}) \
            $IMAGES

  build-model-server-amd64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-model-server == 'true'
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-model-server-amd64
      - volume=40gb
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
        with:
          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        env:
          DEBUG: ${{ vars.DOCKER_DEBUG == 'true' && 1 || 0 }}
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
          platforms: linux/amd64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-amd64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-amd64,mode=max
          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ env.EDGE_TAG != 'true' && vars.MODEL_SERVER_NO_CACHE == 'true' }}
          provenance: false
          sbom: false

  build-model-server-arm64:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-model-server == 'true'
    runs-on:
      - runs-on
      - runner=2cpu-linux-arm64
      - run-id=${{ github.run_id }}-model-server-arm64
      - volume=40gb
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
        with:
          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        env:
          DEBUG: ${{ vars.DOCKER_DEBUG == 'true' && 1 || 0 }}
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
          platforms: linux/arm64
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-arm64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-arm64,mode=max
          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
          no-cache: ${{ env.EDGE_TAG != 'true' && vars.MODEL_SERVER_NO_CACHE == 'true' }}
          provenance: false
          sbom: false

  merge-model-server:
    needs:
      - determine-builds
      - build-model-server-amd64
      - build-model-server-arm64
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-merge-model-server
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
            latest=false
          tags: |
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}

      - name: Create and push manifest
        env:
          IMAGE_REPO: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          AMD64_DIGEST: ${{ needs.build-model-server-amd64.outputs.digest }}
          ARM64_DIGEST: ${{ needs.build-model-server-arm64.outputs.digest }}
          META_TAGS: ${{ steps.meta.outputs.tags }}
        run: |
          IMAGES="${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}"
          docker buildx imagetools create \
            $(printf '%s\n' "${META_TAGS}" | xargs -I {} echo -t {}) \
            $IMAGES

  trivy-scan:
    needs:
      - determine-builds
      - merge-web
      - merge-web-cloud
      - merge-backend
      - merge-model-server
    if: >-
      always() && !cancelled() &&
      (needs.merge-web.result == 'success' ||
       needs.merge-web-cloud.result == 'success' ||
       needs.merge-backend.result == 'success' ||
       needs.merge-model-server.result == 'success')
    runs-on:
      - runs-on
      - runner=2cpu-linux-arm64
      - run-id=${{ github.run_id }}-trivy-scan-${{ matrix.component }}
      - extras=ecr-cache
    permissions:
      security-events: write # needed for SARIF uploads
    timeout-minutes: 10
    strategy:
      fail-fast: false
      matrix:
        include:
          - component: web
            registry-image: onyxdotapp/onyx-web-server
          - component: web-cloud
            registry-image: onyxdotapp/onyx-web-server-cloud
          - component: backend
            registry-image: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
            trivyignore: backend/.trivyignore
          - component: model-server
            registry-image: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
    steps:
      - name: Check if this scan should run
        id: should-run
        run: |
          case "$COMPONENT" in
            web) RESULT="$MERGE_WEB" ;;
            web-cloud) RESULT="$MERGE_WEB_CLOUD" ;;
            backend) RESULT="$MERGE_BACKEND" ;;
            model-server) RESULT="$MERGE_MODEL_SERVER" ;;
          esac
          if [ "$RESULT" == "success" ]; then
            echo "run=true" >> "$GITHUB_OUTPUT"
          else
            echo "run=false" >> "$GITHUB_OUTPUT"
          fi
        env:
          COMPONENT: ${{ matrix.component }}
          MERGE_WEB: ${{ needs.merge-web.result }}
          MERGE_WEB_CLOUD: ${{ needs.merge-web-cloud.result }}
          MERGE_BACKEND: ${{ needs.merge-backend.result }}
          MERGE_MODEL_SERVER: ${{ needs.merge-model-server.result }}

      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
        if: steps.should-run.outputs.run == 'true'

      - name: Checkout
        if: steps.should-run.outputs.run == 'true' && matrix.trivyignore != ''
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Determine scan image
        if: steps.should-run.outputs.run == 'true'
        id: scan-image
        run: |
          if [ "$IS_TEST_RUN" == "true" ]; then
            echo "image=${RUNS_ON_ECR_CACHE}:${TAG_PREFIX}-${SANITIZED_TAG}" >> "$GITHUB_OUTPUT"
          else
            echo "image=docker.io/${REGISTRY_IMAGE}:${REF_NAME}" >> "$GITHUB_OUTPUT"
          fi
        env:
          IS_TEST_RUN: ${{ needs.determine-builds.outputs.is-test-run }}
          TAG_PREFIX: ${{ matrix.component }}
          SANITIZED_TAG: ${{ needs.determine-builds.outputs.sanitized-tag }}
          REGISTRY_IMAGE: ${{ matrix.registry-image }}
          REF_NAME: ${{ github.ref_name }}

      - name: Run Trivy vulnerability scanner
        if: steps.should-run.outputs.run == 'true'
        uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # ratchet:aquasecurity/trivy-action@v0.35.0
        with:
          image-ref: ${{ steps.scan-image.outputs.image }}
          severity: CRITICAL,HIGH
          format: "sarif"
          output: "trivy-results.sarif"
          trivyignores: ${{ matrix.trivyignore }}
        env:
          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}

      - name: Upload Trivy scan results to GitHub Security tab
        if: steps.should-run.outputs.run == 'true'
        uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab
        with:
          sarif_file: "trivy-results.sarif"

  notify-slack-on-failure:
    needs:
      - determine-builds
      - build-desktop
      - build-web-amd64
      - build-web-arm64
      - merge-web
      - build-web-cloud-amd64
      - build-web-cloud-arm64
      - merge-web-cloud
      - build-backend-amd64
      - build-backend-arm64
      - merge-backend
      - build-backend-craft-amd64
      - build-backend-craft-arm64
      - merge-backend-craft
      - build-model-server-amd64
      - build-model-server-arm64
      - merge-model-server
    if: always() && (needs.build-desktop.result == 'failure' || needs.build-web-amd64.result == 'failure' || needs.build-web-arm64.result == 'failure' || needs.merge-web.result == 'failure' || needs.build-web-cloud-amd64.result == 'failure' || needs.build-web-cloud-arm64.result == 'failure' || needs.merge-web-cloud.result == 'failure' || needs.build-backend-amd64.result == 'failure' || needs.build-backend-arm64.result == 'failure' || needs.merge-backend.result == 'failure' || (needs.determine-builds.outputs.build-backend-craft == 'true' && (needs.build-backend-craft-amd64.result == 'failure' || needs.build-backend-craft-arm64.result == 'failure' || needs.merge-backend-craft.result == 'failure')) || needs.build-model-server-amd64.result == 'failure' || needs.build-model-server-arm64.result == 'failure' || needs.merge-model-server.result == 'failure') && needs.determine-builds.outputs.is-test-run != 'true'
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
    timeout-minutes: 90
    environment: release
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Determine failed jobs
        id: failed-jobs
        shell: bash
        run: |
          FAILED_JOBS=""
          if [ "${NEEDS_BUILD_DESKTOP_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• build-desktop\\n"
          fi
          if [ "${NEEDS_BUILD_WEB_AMD64_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• build-web-amd64\\n"
          fi
          if [ "${NEEDS_BUILD_WEB_ARM64_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• build-web-arm64\\n"
          fi
          if [ "${NEEDS_MERGE_WEB_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• merge-web\\n"
          fi
          if [ "${NEEDS_BUILD_WEB_CLOUD_AMD64_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• build-web-cloud-amd64\\n"
          fi
          if [ "${NEEDS_BUILD_WEB_CLOUD_ARM64_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• build-web-cloud-arm64\\n"
          fi
          if [ "${NEEDS_MERGE_WEB_CLOUD_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• merge-web-cloud\\n"
          fi
          if [ "${NEEDS_BUILD_BACKEND_AMD64_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• build-backend-amd64\\n"
          fi
          if [ "${NEEDS_BUILD_BACKEND_ARM64_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• build-backend-arm64\\n"
          fi
          if [ "${NEEDS_MERGE_BACKEND_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• merge-backend\\n"
          fi
          if [ "${NEEDS_BUILD_MODEL_SERVER_AMD64_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• build-model-server-amd64\\n"
          fi
          if [ "${NEEDS_BUILD_MODEL_SERVER_ARM64_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• build-model-server-arm64\\n"
          fi
          if [ "${NEEDS_MERGE_MODEL_SERVER_RESULT}" == "failure" ]; then
            FAILED_JOBS="${FAILED_JOBS}• merge-model-server\\n"
          fi
          # Remove trailing \n and set output
          FAILED_JOBS=$(printf '%s' "$FAILED_JOBS" | sed 's/\\n$//')
          echo "jobs=$FAILED_JOBS" >> "$GITHUB_OUTPUT"
        env:
          NEEDS_BUILD_DESKTOP_RESULT: ${{ needs.build-desktop.result }}
          NEEDS_BUILD_WEB_AMD64_RESULT: ${{ needs.build-web-amd64.result }}
          NEEDS_BUILD_WEB_ARM64_RESULT: ${{ needs.build-web-arm64.result }}
          NEEDS_MERGE_WEB_RESULT: ${{ needs.merge-web.result }}
          NEEDS_BUILD_WEB_CLOUD_AMD64_RESULT: ${{ needs.build-web-cloud-amd64.result }}
          NEEDS_BUILD_WEB_CLOUD_ARM64_RESULT: ${{ needs.build-web-cloud-arm64.result }}
          NEEDS_MERGE_WEB_CLOUD_RESULT: ${{ needs.merge-web-cloud.result }}
          NEEDS_BUILD_BACKEND_AMD64_RESULT: ${{ needs.build-backend-amd64.result }}
          NEEDS_BUILD_BACKEND_ARM64_RESULT: ${{ needs.build-backend-arm64.result }}
          NEEDS_MERGE_BACKEND_RESULT: ${{ needs.merge-backend.result }}
          NEEDS_BUILD_MODEL_SERVER_AMD64_RESULT: ${{ needs.build-model-server-amd64.result }}
          NEEDS_BUILD_MODEL_SERVER_ARM64_RESULT: ${{ needs.build-model-server-arm64.result }}
          NEEDS_MERGE_MODEL_SERVER_RESULT: ${{ needs.merge-model-server.result }}

      - name: Send Slack notification
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
          failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
          title: "🚨 Deployment Workflow Failed"
          ref-name: ${{ github.ref_name }}


================================================
FILE: .github/workflows/docker-tag-beta.yml
================================================
# This workflow is set up to be manually triggered via the GitHub Action tab.
# Given a version, it will tag those backend and webserver images as "beta".

name: Tag Beta Version

on:
  workflow_dispatch:
    inputs:
      version:
        description: "The version (ie v1.0.0-beta.0) to tag as beta"
        required: true

permissions:
  contents: read

jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Enable Docker CLI experimental features
        run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV

      - name: Pull, Tag and Push Web Server Image
        env:
          VERSION: ${{ github.event.inputs.version }}
        run: |
          docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${VERSION}

      - name: Pull, Tag and Push API Server Image
        env:
          VERSION: ${{ github.event.inputs.version }}
        run: |
          docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${VERSION}

      - name: Pull, Tag and Push Model Server Image
        env:
          VERSION: ${{ github.event.inputs.version }}
        run: |
          docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${VERSION}


================================================
FILE: .github/workflows/docker-tag-latest.yml
================================================
# This workflow is set up to be manually triggered via the GitHub Action tab.
# Given a version, it will tag those backend and webserver images as "latest".

name: Tag Latest Version

on:
  workflow_dispatch:
    inputs:
      version:
        description: "The version (ie v0.0.1) to tag as latest"
        required: true

permissions:
  contents: read

jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Enable Docker CLI experimental features
        run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV

      - name: Pull, Tag and Push Web Server Image
        env:
          VERSION: ${{ github.event.inputs.version }}
        run: |
          docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${VERSION}

      - name: Pull, Tag and Push API Server Image
        env:
          VERSION: ${{ github.event.inputs.version }}
        run: |
          docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${VERSION}

      - name: Pull, Tag and Push Model Server Image
        env:
          VERSION: ${{ github.event.inputs.version }}
        run: |
          docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${VERSION}


================================================
FILE: .github/workflows/helm-chart-releases.yml
================================================
name: Release Onyx Helm Charts

on:
  push:
    branches:
      - main

permissions: write-all

jobs:
  release:
    permissions:
      contents: write
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          fetch-depth: 0
          persist-credentials: false

      - name: Install Helm CLI
        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4
        with:
          version: v3.12.1

      - name: Add required Helm repositories
        run: |
          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
          helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
          helm repo add opensearch https://opensearch-project.github.io/helm-charts
          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
          helm repo add minio https://charts.min.io/
          helm repo add code-interpreter https://onyx-dot-app.github.io/python-sandbox/
          helm repo update

      - name: Build chart dependencies
        run: |
          set -euo pipefail
          for chart_dir in deployment/helm/charts/*; do
            if [ -f "$chart_dir/Chart.yaml" ]; then
              echo "Building dependencies for $chart_dir"
              helm dependency build "$chart_dir"
            fi
          done

      - name: Publish Helm charts to gh-pages
        # NOTE: HEAD of https://github.com/stefanprodan/helm-gh-pages/pull/43
        uses: stefanprodan/helm-gh-pages@ad32ad3b8720abfeaac83532fd1e9bdfca5bbe27 # zizmor: ignore[impostor-commit]
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          charts_dir: deployment/helm/charts
          branch: gh-pages
          commit_username: ${{ github.actor }}
          commit_email: ${{ github.actor }}@users.noreply.github.com


================================================
FILE: .github/workflows/merge-group.yml
================================================
name: Merge Group-Specific

on:
  merge_group:

permissions:
  contents: read

jobs:
  # This job immediately succeeds to satisfy branch protection rules on merge_group events.
  # There is a similarly named "required" job in pr-integration-tests.yml which runs the actual
  # integration tests. That job runs on both pull_request and merge_group events, and this job
  # exists solely to provide a fast-passing check with the same name for branch protection.
  # The actual tests remain enforced on presubmit (pull_request events).
  required:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - name: Success
        run: echo "Success"
  # This job immediately succeeds to satisfy branch protection rules on merge_group events.
  # There is a similarly named "playwright-required" job in pr-playwright-tests.yml which runs
  # the actual playwright tests. That job runs on both pull_request and merge_group events, and
  # this job exists solely to provide a fast-passing check with the same name for branch protection.
  # The actual tests remain enforced on presubmit (pull_request events).
  playwright-required:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - name: Success
        run: echo "Success"


================================================
FILE: .github/workflows/nightly-close-stale-issues.yml
================================================
name: 'Nightly - Close stale issues and PRs'
on:
  schedule:
    - cron: '0 11 * * *' # Runs every day at 3 AM PST / 4 AM PDT / 11 AM UTC

permissions:
  # contents: write # only for delete-branch option
  issues: write
  pull-requests: write

jobs:
  stale:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # ratchet:actions/stale@v10
        with:
          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
          close-issue-message: 'This issue was closed because it has been stalled for 90 days with no activity.'
          close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.'
          days-before-stale: 75
#           days-before-close: 90  # uncomment after we test stale behavior


================================================
FILE: .github/workflows/nightly-llm-provider-chat.yml
================================================
name: Nightly LLM Provider Chat Tests
concurrency:
  group: Nightly-LLM-Provider-Chat-${{ github.workflow }}-${{ github.ref_name }}
  cancel-in-progress: true

on:
  schedule:
    # Runs daily at 10:30 UTC (2:30 AM PST / 3:30 AM PDT)
    - cron: "30 10 * * *"
  workflow_dispatch:

permissions:
  contents: read

jobs:
  provider-chat-test:
    uses: ./.github/workflows/reusable-nightly-llm-provider-chat.yml
    secrets:
      AWS_OIDC_ROLE_ARN: ${{ secrets.AWS_OIDC_ROLE_ARN }}
    permissions:
      contents: read
      id-token: write
    with:
      openai_models: ${{ vars.NIGHTLY_LLM_OPENAI_MODELS }}
      anthropic_models: ${{ vars.NIGHTLY_LLM_ANTHROPIC_MODELS }}
      bedrock_models: ${{ vars.NIGHTLY_LLM_BEDROCK_MODELS }}
      vertex_ai_models: ${{ vars.NIGHTLY_LLM_VERTEX_AI_MODELS }}
      azure_models: ${{ vars.NIGHTLY_LLM_AZURE_MODELS }}
      azure_api_base: ${{ vars.NIGHTLY_LLM_AZURE_API_BASE }}
      ollama_models: ${{ vars.NIGHTLY_LLM_OLLAMA_MODELS }}
      openrouter_models: ${{ vars.NIGHTLY_LLM_OPENROUTER_MODELS }}
      strict: true

  notify-slack-on-failure:
    needs: [provider-chat-test]
    if: failure() && github.event_name == 'schedule'
    runs-on: ubuntu-slim
    environment: ci-protected
    timeout-minutes: 5
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Send Slack notification
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.SLACK_WEBHOOK }}
          failed-jobs: provider-chat-test
          title: "🚨 Scheduled LLM Provider Chat Tests failed!"
          ref-name: ${{ github.ref_name }}


================================================
FILE: .github/workflows/post-merge-beta-cherry-pick.yml
================================================
name: Post-Merge Beta Cherry-Pick

on:
  pull_request_target:
    types:
      - closed

# SECURITY NOTE:
# This workflow intentionally uses pull_request_target so post-merge automation can
# use base-repo credentials. Do not checkout PR head refs in this workflow
# (e.g. github.event.pull_request.head.sha). Only trusted base refs are allowed.
permissions:
  contents: read

jobs:
  resolve-cherry-pick-request:
    if: >-
      github.event.pull_request.merged == true
      && github.event.pull_request.base.ref == 'main'
      && github.event.pull_request.head.repo.full_name == github.repository
    outputs:
      should_cherrypick: ${{ steps.gate.outputs.should_cherrypick }}
      pr_number: ${{ steps.gate.outputs.pr_number }}
      merge_commit_sha: ${{ steps.gate.outputs.merge_commit_sha }}
      merged_by: ${{ steps.gate.outputs.merged_by }}
      gate_error: ${{ steps.gate.outputs.gate_error }}
    runs-on: ubuntu-latest
    timeout-minutes: 10
    steps:
      - name: Resolve merged PR and checkbox state
        id: gate
        env:
          GH_TOKEN: ${{ github.token }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
          # SECURITY: keep PR body in env/plain-text handling; avoid directly
          # inlining github.event.pull_request.body into shell commands.
          PR_BODY: ${{ github.event.pull_request.body }}
          MERGE_COMMIT_SHA: ${{ github.event.pull_request.merge_commit_sha }}
          MERGED_BY: ${{ github.event.pull_request.merged_by.login }}
          # Explicit merger allowlist used because pull_request_target runs with
          # the default GITHUB_TOKEN, which cannot reliably read org/team
          # membership for this repository context.
          ALLOWED_MERGERS: |
            acaprau
            bo-onyx
            danelegend
            duo-onyx
            evan-onyx
            jessicasingh7
            jmelahman
            joachim-danswer
            justin-tahara
            nmgarza5
            raunakab
            rohoswagger
            subash-mohan
            trial2onyx
            wenxi-onyx
            weves
            yuhongsun96
        run: |
          echo "pr_number=${PR_NUMBER}" >> "$GITHUB_OUTPUT"
          echo "merged_by=${MERGED_BY}" >> "$GITHUB_OUTPUT"

          if ! echo "${PR_BODY}" | grep -qiE "\\[x\\][[:space:]]*(\\[[^]]+\\][[:space:]]*)?Please cherry-pick this PR to the latest release version"; then
            echo "should_cherrypick=false" >> "$GITHUB_OUTPUT"
            echo "Cherry-pick checkbox not checked for PR #${PR_NUMBER}. Skipping."
            exit 0
          fi

          # Keep should_cherrypick output before any possible exit 1 below so
          # notify-slack can still gate on this output even if this job fails.
          echo "should_cherrypick=true" >> "$GITHUB_OUTPUT"
          echo "Cherry-pick checkbox checked for PR #${PR_NUMBER}."

          if [ -z "${MERGE_COMMIT_SHA}" ] || [ "${MERGE_COMMIT_SHA}" = "null" ]; then
            echo "gate_error=missing-merge-commit-sha" >> "$GITHUB_OUTPUT"
            echo "::error::PR #${PR_NUMBER} requested cherry-pick, but merge_commit_sha is missing."
            exit 1
          fi

          echo "merge_commit_sha=${MERGE_COMMIT_SHA}" >> "$GITHUB_OUTPUT"

          normalized_merged_by="$(printf '%s' "${MERGED_BY}" | tr '[:upper:]' '[:lower:]')"
          normalized_allowed_mergers="$(printf '%s\n' "${ALLOWED_MERGERS}" | tr '[:upper:]' '[:lower:]')"
          if ! printf '%s\n' "${normalized_allowed_mergers}" | grep -Fxq "${normalized_merged_by}"; then
            echo "gate_error=not-allowed-merger" >> "$GITHUB_OUTPUT"
            echo "::error::${MERGED_BY} is not in the explicit cherry-pick merger allowlist. Failing cherry-pick gate."
            exit 1
          fi

          exit 0

  cherry-pick-to-latest-release:
    needs:
      - resolve-cherry-pick-request
    if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success'
    permissions:
      contents: write
      pull-requests: write
    outputs:
      cherry_pick_pr_url: ${{ steps.run_cherry_pick.outputs.pr_url }}
      cherry_pick_reason: ${{ steps.run_cherry_pick.outputs.reason }}
      cherry_pick_details: ${{ steps.run_cherry_pick.outputs.details }}
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - name: Checkout repository
        # SECURITY: keep checkout pinned to trusted base branch; do not switch to PR head refs.
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          fetch-depth: 0
          persist-credentials: true
          ref: main

      - name: Install the latest version of uv
        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"

      - name: Configure git identity
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"

      - name: Create cherry-pick PR to latest release
        id: run_cherry_pick
        env:
          GH_TOKEN: ${{ github.token }}
          GITHUB_TOKEN: ${{ github.token }}
          CHERRY_PICK_ASSIGNEE: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
          MERGE_COMMIT_SHA: ${{ needs.resolve-cherry-pick-request.outputs.merge_commit_sha }}
        run: |
          output_file="$(mktemp)"
          set +e
          uv run --no-sync --with onyx-devtools ods cherry-pick "${MERGE_COMMIT_SHA}" --yes --no-verify 2>&1 | tee "$output_file"
          pipe_statuses=("${PIPESTATUS[@]}")
          exit_code="${pipe_statuses[0]}"
          tee_exit="${pipe_statuses[1]:-0}"
          set -e
          if [ "${tee_exit}" -ne 0 ]; then
            echo "status=failure" >> "$GITHUB_OUTPUT"
            echo "reason=output-capture-failed" >> "$GITHUB_OUTPUT"
            echo "::error::tee failed to capture cherry-pick output (exit ${tee_exit}); cannot classify result."
            exit 1
          fi

          if [ "${exit_code}" -eq 0 ]; then
            pr_url="$(sed -n 's/^.*PR created successfully: \(https:\/\/github\.com\/[^[:space:]]\+\/pull\/[0-9]\+\).*$/\1/p' "$output_file" | tail -n 1)"
            echo "status=success" >> "$GITHUB_OUTPUT"
            if [ -n "${pr_url}" ]; then
              echo "pr_url=${pr_url}" >> "$GITHUB_OUTPUT"
            fi
            exit 0
          fi

          echo "status=failure" >> "$GITHUB_OUTPUT"

          reason="command-failed"
          if grep -qiE "merge conflict during cherry-pick|CONFLICT|could not apply|cherry-pick in progress with staged changes" "$output_file"; then
            reason="merge-conflict"
          fi
          echo "reason=${reason}" >> "$GITHUB_OUTPUT"

          {
            echo "details<<EOF"
            tail -n 40 "$output_file"
            echo "EOF"
          } >> "$GITHUB_OUTPUT"

      - name: Mark workflow as failed if cherry-pick failed
        if: steps.run_cherry_pick.outputs.status == 'failure'
        env:
          CHERRY_PICK_REASON: ${{ steps.run_cherry_pick.outputs.reason }}
        run: |
          echo "::error::Automated cherry-pick failed (${CHERRY_PICK_REASON})."
          exit 1

  notify-slack-on-cherry-pick-success:
    needs:
      - resolve-cherry-pick-request
      - cherry-pick-to-latest-release
    if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success' && needs.cherry-pick-to-latest-release.result == 'success'
    runs-on: ubuntu-slim
    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Fail if Slack webhook secret is missing
        env:
          CHERRY_PICK_PRS_WEBHOOK: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
        run: |
          if [ -z "${CHERRY_PICK_PRS_WEBHOOK}" ]; then
            echo "::error::CHERRY_PICK_PRS_WEBHOOK is not configured."
            exit 1
          fi

      - name: Build cherry-pick success summary
        id: success-summary
        env:
          SOURCE_PR_NUMBER: ${{ needs.resolve-cherry-pick-request.outputs.pr_number }}
          MERGE_COMMIT_SHA: ${{ needs.resolve-cherry-pick-request.outputs.merge_commit_sha }}
          CHERRY_PICK_PR_URL: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_pr_url }}
        run: |
          source_pr_url="https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}"
          details="*Cherry-pick PR opened successfully.*\\n• author: {mention}\\n• source PR: ${source_pr_url}"
          if [ -n "${CHERRY_PICK_PR_URL}" ]; then
            details="${details}\\n• cherry-pick PR: ${CHERRY_PICK_PR_URL}"
          fi
          if [ -n "${MERGE_COMMIT_SHA}" ]; then
            details="${details}\\n• merge SHA: ${MERGE_COMMIT_SHA}"
          fi

          echo "details=${details}" >> "$GITHUB_OUTPUT"

      - name: Notify #cherry-pick-prs about cherry-pick success
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
          mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
          details: ${{ steps.success-summary.outputs.details }}
          title: "✅ Automated Cherry-Pick PR Opened"
          ref-name: ${{ github.event.pull_request.base.ref }}

  notify-slack-on-cherry-pick-failure:
    needs:
      - resolve-cherry-pick-request
      - cherry-pick-to-latest-release
    if: always() && needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && (needs.resolve-cherry-pick-request.result == 'failure' || needs.cherry-pick-to-latest-release.result == 'failure')
    runs-on: ubuntu-slim
    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Fail if Slack webhook secret is missing
        env:
          CHERRY_PICK_PRS_WEBHOOK: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
        run: |
          if [ -z "${CHERRY_PICK_PRS_WEBHOOK}" ]; then
            echo "::error::CHERRY_PICK_PRS_WEBHOOK is not configured."
            exit 1
          fi

      - name: Build cherry-pick failure summary
        id: failure-summary
        env:
          SOURCE_PR_NUMBER: ${{ needs.resolve-cherry-pick-request.outputs.pr_number }}
          MERGE_COMMIT_SHA: ${{ needs.resolve-cherry-pick-request.outputs.merge_commit_sha }}
          GATE_ERROR: ${{ needs.resolve-cherry-pick-request.outputs.gate_error }}
          CHERRY_PICK_REASON: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_reason }}
          CHERRY_PICK_DETAILS: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_details }}
        run: |
          source_pr_url="https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}"

          reason_text="cherry-pick command failed"
          if [ "${GATE_ERROR}" = "missing-merge-commit-sha" ]; then
            reason_text="requested cherry-pick but merge commit SHA was missing"
          elif [ "${GATE_ERROR}" = "not-allowed-merger" ]; then
            reason_text="merger is not in the explicit cherry-pick allowlist"
          elif [ "${CHERRY_PICK_REASON}" = "output-capture-failed" ]; then
            reason_text="failed to capture cherry-pick output for classification"
          elif [ "${CHERRY_PICK_REASON}" = "merge-conflict" ]; then
            reason_text="merge conflict during cherry-pick"
          fi

          details_excerpt="$(printf '%s' "${CHERRY_PICK_DETAILS}" | tail -n 8 | tr '\n' ' ' | sed "s/[[:space:]]\\+/ /g" | sed "s/\"/'/g" | cut -c1-350)"
          if [ -n "${GATE_ERROR}" ]; then
            failed_job_label="resolve-cherry-pick-request"
          else
            failed_job_label="cherry-pick-to-latest-release"
          fi
          details="• author: {mention}\\n• ${failed_job_label}\\n• source PR: ${source_pr_url}\\n• reason: ${reason_text}"
          if [ -n "${MERGE_COMMIT_SHA}" ]; then
            details="${details}\\n• merge SHA: ${MERGE_COMMIT_SHA}"
          fi
          if [ -n "${details_excerpt}" ]; then
            details="${details}\\n• excerpt: ${details_excerpt}"
          fi

          echo "details=${details}" >> "$GITHUB_OUTPUT"

      - name: Notify #cherry-pick-prs about cherry-pick failure
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
          mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
          details: ${{ steps.failure-summary.outputs.details }}
          title: "🚨 Automated Cherry-Pick Failed"
          ref-name: ${{ github.event.pull_request.base.ref }}


================================================
FILE: .github/workflows/pr-database-tests.yml
================================================
name: Database Tests
concurrency:
  group: Database-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches:
      - main
      - "release/**"
  push:
    tags:
      - "v*.*.*"

permissions:
  contents: read

jobs:
  database-tests:
    runs-on:
      - runs-on
      - runner=2cpu-linux-arm64
      - "run-id=${{ github.run_id }}-database-tests"
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Setup Python and Install Dependencies
        uses: ./.github/actions/setup-python-and-install-dependencies
        with:
          requirements: |
            backend/requirements/default.txt
            backend/requirements/dev.txt

      - name: Generate OpenAPI schema and Python client
        shell: bash
        # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
        env:
          LICENSE_ENFORCEMENT_ENABLED: "false"
        run: |
          ods openapi all

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Start Docker containers
        working-directory: ./deployment/docker_compose
        run: |
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d \
            relational_db

      - name: Run Database Tests
        working-directory: ./backend
        run: pytest -m alembic tests/integration/tests/migrations/


================================================
FILE: .github/workflows/pr-desktop-build.yml
================================================
name: Build Desktop App
concurrency:
  group: Build-Desktop-App-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches:
      - main
      - "release/**"
    paths:
      - "desktop/**"
      - ".github/workflows/pr-desktop-build.yml"
  push:
    tags:
      - "v*.*.*"

permissions:
  contents: read

jobs:
  build-desktop:
    name: Build Desktop (${{ matrix.platform }})
    runs-on: ${{ matrix.os }}
    timeout-minutes: 60
    strategy:
      fail-fast: false
      matrix:
        include:
          - platform: linux
            os: ubuntu-latest
            target: x86_64-unknown-linux-gnu
            args: "--bundles deb,rpm"
          # TODO: Fix and enable the macOS build.
          #- platform: macos
          #  os: macos-latest
          #  target: universal-apple-darwin
          #  args: "--target universal-apple-darwin"
          # TODO: Fix and enable the Windows build.
          #- platform: windows
          #  os: windows-latest
          #  target: x86_64-pc-windows-msvc
          #  args: ""

    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
        with:
          persist-credentials: false

      - name: Setup node
        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f
        with:
          node-version: 24
          cache: "npm" # zizmor: ignore[cache-poisoning]
          cache-dependency-path: ./desktop/package-lock.json

      - name: Setup Rust
        uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
        with:
          toolchain: stable
          targets: ${{ matrix.target }}

      - name: Cache Cargo registry and build
        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # zizmor: ignore[cache-poisoning]
        with:
          path: |
            ~/.cargo/bin/
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
            desktop/src-tauri/target/
          key: ${{ runner.os }}-cargo-${{ hashFiles('desktop/src-tauri/Cargo.lock') }}
          restore-keys: |
            ${{ runner.os }}-cargo-

      - name: Install Linux dependencies
        if: matrix.platform == 'linux'
        run: |
          sudo apt-get update
          sudo apt-get install -y \
            build-essential \
            libglib2.0-dev \
            libgirepository1.0-dev \
            libgtk-3-dev \
            libjavascriptcoregtk-4.1-dev \
            libwebkit2gtk-4.1-dev \
            libayatana-appindicator3-dev \
            gobject-introspection \
            pkg-config \
            curl \
            xdg-utils

      - name: Install npm dependencies
        working-directory: ./desktop
        run: npm ci

      - name: Build desktop app
        working-directory: ./desktop
        run: npx tauri build ${{ matrix.args }}
        env:
          TAURI_SIGNING_PRIVATE_KEY: ""
          TAURI_SIGNING_PRIVATE_KEY_PASSWORD: ""

      - name: Upload build artifacts
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
          path: |
            desktop/src-tauri/target/release/bundle/
          retention-days: 7
          if-no-files-found: ignore


================================================
FILE: .github/workflows/pr-external-dependency-unit-tests.yml
================================================
name: External Dependency Unit Tests
concurrency:
  group: External-Dependency-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches: [main]
    paths:
      - "backend/**"
      - "pyproject.toml"
      - "uv.lock"
      - ".github/workflows/pr-external-dependency-unit-tests.yml"
      - ".github/actions/setup-python-and-install-dependencies/**"
      - ".github/actions/setup-playwright/**"
      - "deployment/docker_compose/docker-compose.yml"
      - "deployment/docker_compose/docker-compose.dev.yml"
  push:
    tags:
      - "v*.*.*"

permissions:
  contents: read

env:
  # AWS credentials for S3-specific test
  S3_AWS_ACCESS_KEY_ID_FOR_TEST: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
  S3_AWS_SECRET_ACCESS_KEY_FOR_TEST: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}

  # MinIO
  S3_ENDPOINT_URL: "http://localhost:9004"
  S3_AWS_ACCESS_KEY_ID: "minioadmin"
  S3_AWS_SECRET_ACCESS_KEY: "minioadmin"

  # Confluence
  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_TEST_SPACE: ${{ vars.CONFLUENCE_TEST_SPACE }}
  CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
  CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}

  # Jira
  JIRA_ADMIN_API_TOKEN: ${{ secrets.JIRA_ADMIN_API_TOKEN }}

  # LLMs
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
  VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }}
  VERTEX_LOCATION: ${{ vars.VERTEX_LOCATION }}

  # Code Interpreter
  # TODO: debug why this is failing and enable
  CODE_INTERPRETER_BASE_URL: http://localhost:8000

jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
    timeout-minutes: 45
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Discover test directories
        id: set-matrix
        run: |
          # Find all subdirectories in backend/tests/external_dependency_unit
          dirs=$(find backend/tests/external_dependency_unit -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | sort | jq -R -s -c 'split("\n")[:-1]')
          echo "test-dirs=$dirs" >> $GITHUB_OUTPUT

  external-dependency-unit-tests:
    needs: discover-test-dirs
    # Use larger runner with more resources for Vespa
    runs-on:
      - runs-on
      - runner=2cpu-linux-arm64
      - ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}
      - extras=s3-cache
    timeout-minutes: 45
    strategy:
      fail-fast: false
      matrix:
        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}

    env:
      PYTHONPATH: ./backend
      MODEL_SERVER_HOST: "disabled"
      DISABLE_TELEMETRY: "true"

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Setup Python and Install Dependencies
        uses: ./.github/actions/setup-python-and-install-dependencies
        with:
          requirements: |
            backend/requirements/default.txt
            backend/requirements/dev.txt
            backend/requirements/ee.txt

      - name: Setup Playwright
        uses: ./.github/actions/setup-playwright

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Create .env file for Docker Compose
        run: |
          cat <<EOF > deployment/docker_compose/.env
          COMPOSE_PROFILES=s3-filestore,opensearch-enabled
          DISABLE_TELEMETRY=true
          OPENSEARCH_FOR_ONYX_ENABLED=true
          EOF

      - name: Set up Standard Dependencies
        run: |
          cd deployment/docker_compose
          docker compose \
            -f docker-compose.yml \
            -f docker-compose.dev.yml \
            up -d \
            minio \
            relational_db \
            cache \
            index \
            opensearch \
            code-interpreter

      - name: Run migrations
        run: |
          cd backend
          # Run migrations to head
          alembic upgrade head
          alembic heads --verbose

      - name: Run Tests for ${{ matrix.test-dir }}
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        env:
          TEST_DIR: ${{ matrix.test-dir }}
        run: |
          py.test \
            --durations=8 \
            -o junit_family=xunit2 \
            -xv \
            --ff \
            backend/tests/external_dependency_unit/${TEST_DIR}

      - name: Collect Docker logs on failure
        if: failure()
        run: |
          mkdir -p docker-logs
          cd deployment/docker_compose

          # Get list of running containers
          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)

          # Collect logs from each container
          for container in $containers; do
            container_name=$(docker inspect --format='{{.Name}}' $container | sed 's/^\///')
            echo "Collecting logs from $container_name..."
            docker logs $container > ../../docker-logs/${container_name}.log 2>&1
          done

          cd ../..
          echo "Docker logs collected in docker-logs directory"

      - name: Upload Docker logs
        if: failure()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-logs-${{ matrix.test-dir }}
          path: docker-logs/
          retention-days: 7


================================================
FILE: .github/workflows/pr-golang-tests.yml
================================================
name: Golang Tests
concurrency:
  group: Golang-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches:
      - main
      - "release/**"
  push:
    tags:
      - "v*.*.*"

permissions: {}

env:
  GO_VERSION: "1.26"

jobs:
  detect-modules:
    runs-on: ubuntu-latest
    timeout-minutes: 10
    outputs:
      modules: ${{ steps.set-modules.outputs.modules }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
        with:
          persist-credentials: false
      - id: set-modules
        run: echo "modules=$(find . -name 'go.mod' -exec dirname {} \; | jq -Rc '[.,inputs]')" >> "$GITHUB_OUTPUT"

  golang:
    needs: detect-modules
    runs-on: ubuntu-latest
    timeout-minutes: 10
    strategy:
      matrix:
        modules: ${{ fromJSON(needs.detect-modules.outputs.modules) }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
      - uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # zizmor: ignore[cache-poisoning]
        with:
          go-version: ${{ env.GO_VERSION }}
          cache-dependency-path: "**/go.sum"

      - run: go mod tidy
        working-directory: ${{ matrix.modules }}
      - run: git diff --exit-code go.mod go.sum
        working-directory: ${{ matrix.modules }}

      - run: go test ./...
        working-directory: ${{ matrix.modules }}


================================================
FILE: .github/workflows/pr-helm-chart-testing.yml
================================================
name: Helm - Lint and Test Charts
concurrency:
  group: Helm-Lint-and-Test-Charts-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches: [main]
  push:
    tags:
      - "v*.*.*"
  workflow_dispatch: # Allows manual triggering

permissions:
  contents: read

jobs:
  helm-chart-check:
    # See https://runs-on.com/runners/linux/
    runs-on:
      [
        runs-on,
        runner=8cpu-linux-x64,
        hdd=256,
        "run-id=${{ github.run_id }}-helm-chart-check",
      ]
    timeout-minutes: 45

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          fetch-depth: 0
          persist-credentials: false

      - name: Set up Helm
        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
        with:
          version: v3.19.0

      - name: Set up chart-testing
        uses: helm/chart-testing-action@2e2940618cb426dce2999631d543b53cdcfc8527
        with:
          uv_version: "0.9.9"

      # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
      - name: Run chart-testing (list-changed)
        id: list-changed
        env:
          DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
        run: |
          echo "default_branch: ${DEFAULT_BRANCH}"
          changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
          echo "list-changed output: $changed"
          if [[ -n "$changed" ]]; then
            echo "changed=true" >> "$GITHUB_OUTPUT"
          fi

      # uncomment to force run chart-testing
      #     - name: Force run chart-testing (list-changed)
      #       id: list-changed
      #       run: echo "changed=true" >> $GITHUB_OUTPUT
      # lint all charts if any changes were detected
      - name: Run chart-testing (lint)
        if: steps.list-changed.outputs.changed == 'true'
        run: ct lint --config ct.yaml --all
        # the following would lint only changed charts, but linting isn't expensive
        # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}

      - name: Create kind cluster
        if: steps.list-changed.outputs.changed == 'true'
        uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # ratchet:helm/kind-action@v1.14.0

      - name: Pre-install cluster status check
        if: steps.list-changed.outputs.changed == 'true'
        run: |
          echo "=== Pre-install Cluster Status ==="
          kubectl get nodes -o wide
          kubectl get pods --all-namespaces
          kubectl get storageclass

      - name: Add Helm repositories and update
        if: steps.list-changed.outputs.changed == 'true'
        run: |
          echo "=== Adding Helm repositories ==="
          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
          helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
          helm repo add opensearch https://opensearch-project.github.io/helm-charts
          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
          helm repo add minio https://charts.min.io/
          helm repo add code-interpreter https://onyx-dot-app.github.io/python-sandbox/
          helm repo update

      - name: Install Redis operator
        if: steps.list-changed.outputs.changed == 'true'
        shell: bash
        run: |
          echo "=== Installing redis-operator CRDs ==="
          helm upgrade --install redis-operator ot-container-kit/redis-operator \
            --namespace redis-operator --create-namespace --wait --timeout 300s

      - name: Pre-pull required images
        if: steps.list-changed.outputs.changed == 'true'
        run: |
          echo "=== Pre-pulling required images to avoid timeout ==="
          KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
          echo "Kind cluster: $KIND_CLUSTER"

          IMAGES=(
            "ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
            "quay.io/opstree/redis:v7.0.15"
            "docker.io/onyxdotapp/onyx-web-server:latest"
          )

          for image in "${IMAGES[@]}"; do
            echo "Pre-pulling $image"
            if docker pull "$image"; then
              kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
            else
              echo "Failed to pull $image"
            fi
          done

          echo "=== Images loaded into Kind cluster ==="
          docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."

      - name: Validate chart dependencies
        if: steps.list-changed.outputs.changed == 'true'
        run: |
          echo "=== Validating chart dependencies ==="
          cd deployment/helm/charts/onyx
          helm dependency update
          helm lint . --set auth.userauth.values.user_auth_secret=placeholder

      - name: Run chart-testing (install) with enhanced monitoring
        timeout-minutes: 25
        if: steps.list-changed.outputs.changed == 'true'
        run: |
          echo "=== Starting chart installation with monitoring ==="

          # Function to monitor cluster state
          monitor_cluster() {
            while true; do
              echo "=== Cluster Status Check at $(date) ==="
              # Only show non-running pods to reduce noise
              NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
              if [ "$NON_RUNNING_PODS" -gt 0 ]; then
                echo "Non-running pods:"
                kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
              else
                echo "All pods running successfully"
              fi
              # Only show recent events if there are issues
              RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
              if [ -n "$RECENT_EVENTS" ]; then
                echo "Recent warnings/errors:"
                echo "$RECENT_EVENTS"
              fi
              sleep 60
            done
          }

          # Start monitoring in background
          monitor_cluster &
          MONITOR_PID=$!

          # Set up cleanup
          cleanup() {
            echo "=== Cleaning up monitoring process ==="
            kill $MONITOR_PID 2>/dev/null || true
            echo "=== Final cluster state ==="
            kubectl get pods --all-namespaces
            kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
          }

          # Trap cleanup on exit
          trap cleanup EXIT

          # Run the actual installation with detailed logging
          # Note that opensearch.enabled is true whereas others in this install
          # are false. There is some work that needs to be done to get this
          # entire step working in CI, enabling opensearch here is a small step
          # in that direction. If this is causing issues, disabling it in this
          # step should be ok in the short term.
          echo "=== Starting ct install ==="
          set +e
          ct install --all \
            --helm-extra-set-args="\
              --set=nginx.enabled=false \
              --set=minio.enabled=false \
              --set=vespa.enabled=false \
              --set=opensearch.enabled=true \
              --set=auth.opensearch.enabled=true \
              --set=auth.userauth.values.user_auth_secret=test-secret \
              --set=slackbot.enabled=false \
              --set=postgresql.enabled=true \
              --set=postgresql.cluster.storage.storageClass=standard \
              --set=redis.enabled=true \
              --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
              --set=webserver.replicaCount=1 \
              --set=api.replicaCount=0 \
              --set=inferenceCapability.replicaCount=0 \
              --set=indexCapability.replicaCount=0 \
              --set=celery_beat.replicaCount=0 \
              --set=celery_worker_heavy.replicaCount=0 \
              --set=celery_worker_docfetching.replicaCount=0 \
              --set=celery_worker_docprocessing.replicaCount=0 \
              --set=celery_worker_light.replicaCount=0 \
              --set=celery_worker_monitoring.replicaCount=0 \
              --set=celery_worker_primary.replicaCount=0 \
              --set=celery_worker_user_file_processing.replicaCount=0 \
              --set=celery_worker_user_files_indexing.replicaCount=0" \
            --helm-extra-args="--timeout 900s --debug" \
            --debug --config ct.yaml
          CT_EXIT=$?
          set -e

          if [[ $CT_EXIT -ne 0 ]]; then
            echo "ct install failed with exit code $CT_EXIT"
            exit $CT_EXIT
          else
            echo "=== Installation completed successfully ==="
          fi

          kubectl get pods --all-namespaces

      - name: Post-install verification
        if: steps.list-changed.outputs.changed == 'true'
        run: |
          echo "=== Post-install verification ==="
          if ! kubectl cluster-info >/dev/null 2>&1; then
            echo "ERROR: Kubernetes cluster is not reachable after install"
            exit 1
          fi
          kubectl get pods --all-namespaces
          kubectl get services --all-namespaces
          # Only show issues if they exist
          kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"

      - name: Cleanup on failure
        if: failure() && steps.list-changed.outputs.changed == 'true'
        run: |
          echo "=== Cleanup on failure ==="
          if ! kubectl cluster-info >/dev/null 2>&1; then
            echo "Skipping failure cleanup: Kubernetes cluster is not reachable"
            exit 0
          fi
          echo "=== Final cluster state ==="
          kubectl get pods --all-namespaces
          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10

          echo "=== Pod descriptions for debugging ==="
          kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"

          echo "=== Recent logs for debugging ==="
          kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"

          echo "=== Helm releases ==="
          helm list --all-namespaces
        # the following would install only changed charts, but we only have one chart so
        # don't worry about that for now
        # run: ct install --target-branch ${{ github.event.repository.default_branch }}


================================================
FILE: .github/workflows/pr-integration-tests.yml
================================================
name: Run Integration Tests v2
concurrency:
  group: Run-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches:
      - main
      - "release/**"
  push:
    tags:
      - "v*.*.*"

permissions:
  contents: read

env:
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  SLACK_BOT_TOKEN_TEST_SPACE: ${{ secrets.SLACK_BOT_TOKEN_TEST_SPACE }}
  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
  JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
  PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
  PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN }}
  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC }}
  GITHUB_ADMIN_EMAIL: ${{ secrets.ONYX_GITHUB_ADMIN_EMAIL }}
  GITHUB_TEST_USER_1_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_1_EMAIL }}
  GITHUB_TEST_USER_2_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_2_EMAIL }}

jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
    timeout-minutes: 45
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
      editions: ${{ steps.set-editions.outputs.editions }}
    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Discover test directories
        id: set-matrix
        run: |
          # Find all leaf-level directories in both test directories
          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" ! -name "no_vectordb" -exec basename {} \; | sort)
          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)

          # Create JSON array with directory info
          all_dirs=""
          for dir in $tests_dirs; do
            all_dirs="$all_dirs{\"path\":\"tests/$dir\",\"name\":\"tests-$dir\"},"
          done
          for dir in $connector_dirs; do
            all_dirs="$all_dirs{\"path\":\"connector_job_tests/$dir\",\"name\":\"connector-$dir\"},"
          done

          # Remove trailing comma and wrap in array
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

      - name: Determine editions to test
        id: set-editions
        run: |
          # On PRs, only run EE tests. On merge_group and tags, run both EE and MIT.
          if [ "${{ github.event_name }}" = "pull_request" ]; then
            echo 'editions=["ee"]' >> $GITHUB_OUTPUT
          else
            echo 'editions=["ee","mit"]' >> $GITHUB_OUTPUT
          fi

  build-backend-image:
    runs-on:
      [
        runs-on,
        runner=1cpu-linux-arm64,
        "run-id=${{ github.run_id }}-build-backend-image",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Format branch name for cache
        id: format-branch
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
          REF_NAME: ${{ github.ref_name }}
        run: |
          if [ -n "${PR_NUMBER}" ]; then
            CACHE_SUFFIX="${PR_NUMBER}"
          else
            # shellcheck disable=SC2001
            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
          fi
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Backend Docker image
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
            type=registry,ref=onyxdotapp/onyx-backend:latest
          cache-to: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-model-server-image:
    runs-on:
      [
        runs-on,
        runner=1cpu-linux-arm64,
        "run-id=${{ github.run_id }}-build-model-server-image",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Format branch name for cache
        id: format-branch
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
          REF_NAME: ${{ github.ref_name }}
        run: |
          if [ -n "${PR_NUMBER}" ]; then
            CACHE_SUFFIX="${PR_NUMBER}"
          else
            # shellcheck disable=SC2001
            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
          fi
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Model Server Docker image
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
            type=registry,ref=onyxdotapp/onyx-model-server:latest
          cache-to: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max

  build-integration-image:
    runs-on:
      [
        runs-on,
        runner=2cpu-linux-arm64,
        "run-id=${{ github.run_id }}-build-integration-image",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling openapitools/openapi-generator-cli
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Format branch name for cache
        id: format-branch
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
          REF_NAME: ${{ github.ref_name }}
        run: |
          if [ -n "${PR_NUMBER}" ]; then
            CACHE_SUFFIX="${PR_NUMBER}"
          else
            # shellcheck disable=SC2001
            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
          fi
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Build and push integration test image with Docker Bake
        env:
          INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
          TAG: integration-test-${{ github.run_id }}
          CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
          HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
        run: |
          docker buildx bake --push \
            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
            --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
            integration

  integration-tests:
    needs:
      [
        discover-test-dirs,
        build-backend-image,
        build-model-server-image,
        build-integration-image,
      ]
    runs-on:
      - runs-on
      - runner=4cpu-linux-arm64
      - ${{ format('run-id={0}-integration-tests-{1}-job-{2}', github.run_id, matrix.edition, strategy['job-index']) }}
      - extras=ecr-cache
    timeout-minutes: 45

    strategy:
      fail-fast: false
      matrix:
        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
        edition: ${{ fromJson(needs.discover-test-dirs.outputs.editions) }}

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
      - name: Create .env file for Docker Compose
        env:
          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
          RUN_ID: ${{ github.run_id }}
          EDITION: ${{ matrix.edition }}
        run: |
          # Base config shared by both editions
          cat <<EOF > deployment/docker_compose/.env
          COMPOSE_PROFILES=s3-filestore
          OPENSEARCH_FOR_ONYX_ENABLED=false
          AUTH_TYPE=basic
          POSTGRES_POOL_PRE_PING=true
          POSTGRES_USE_NULL_POOL=true
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
          INTEGRATION_TESTS_MODE=true
          MCP_SERVER_ENABLED=true
          AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
          EOF

          # EE-only config
          if [ "$EDITION" = "ee" ]; then
            cat <<EOF >> deployment/docker_compose/.env
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
          # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
          LICENSE_ENFORCEMENT_ENABLED=false
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
          EOF
          fi

      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
            relational_db \
            index \
            cache \
            minio \
            api_server \
            inference_model_server \
            indexing_model_server \
            background \
            -d
        id: start_docker

      - name: Wait for services to be ready
        run: |
          echo "Starting wait-for-service script..."

          wait_for_service() {
            local url=$1
            local label=$2
            local timeout=${3:-300}  # default 5 minutes
            local start_time
            start_time=$(date +%s)

            while true; do
              local current_time
              current_time=$(date +%s)
              local elapsed_time=$((current_time - start_time))

              if [ $elapsed_time -ge $timeout ]; then
                echo "Timeout reached. ${label} did not become ready in $timeout seconds."
                exit 1
              fi

              local response
              response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")

              if [ "$response" = "200" ]; then
                echo "${label} is ready!"
                break
              elif [ "$response" = "curl_error" ]; then
                echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
              else
                echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
              fi

              sleep 5
            done
          }

          wait_for_service "http://localhost:8080/health" "API server"
          echo "Finished waiting for services."

      - name: Start Mock Services
        run: |
          cd backend/tests/integration/mock_services
          docker compose -f docker-compose.mock-it-services.yml \
            -p mock-it-services-stack up -d

      - name: Run Integration Tests (${{ matrix.edition }}) for ${{ matrix.test-dir.name }}
        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
          timeout_minutes: 20
          max_attempts: 3
          retry_wait_seconds: 10
          command: |
            echo "Running ${{ matrix.edition }} integration tests for ${{ matrix.test-dir.path }}..."
            docker run --rm --network onyx_default \
              --name test-runner \
              -e POSTGRES_HOST=relational_db \
              -e POSTGRES_USER=postgres \
              -e POSTGRES_PASSWORD=password \
              -e POSTGRES_DB=postgres \
              -e DB_READONLY_USER=db_readonly_user \
              -e DB_READONLY_PASSWORD=password \
              -e POSTGRES_POOL_PRE_PING=true \
              -e POSTGRES_USE_NULL_POOL=true \
              -e VESPA_HOST=index \
              -e ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=false \
              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
              -e SLACK_BOT_TOKEN_TEST_SPACE=${SLACK_BOT_TOKEN_TEST_SPACE} \
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
              -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
              -e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
              -e JIRA_BASE_URL=${JIRA_BASE_URL} \
              -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
              -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
              -e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
              -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
              -e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN} \
              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC} \
              -e GITHUB_ADMIN_EMAIL=${GITHUB_ADMIN_EMAIL} \
              -e GITHUB_TEST_USER_1_EMAIL=${GITHUB_TEST_USER_1_EMAIL} \
              -e GITHUB_TEST_USER_2_EMAIL=${GITHUB_TEST_USER_2_EMAIL} \
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
              -e ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${{ matrix.edition == 'ee' && 'true' || 'false' }} \
              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/${{ matrix.test-dir.path }}

      # ------------------------------------------------------------
      # Always gather logs BEFORE "down":
      - name: Dump API server logs
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs-${{ matrix.edition }}-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

  onyx-lite-tests:
    needs: [build-backend-image, build-integration-image]
    runs-on:
      [
        runs-on,
        runner=4cpu-linux-arm64,
        "run-id=${{ github.run_id }}-onyx-lite-tests",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Create .env file for Onyx Lite Docker Compose
        env:
          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
          RUN_ID: ${{ github.run_id }}
        run: |
          cat <<EOF > deployment/docker_compose/.env
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
          LICENSE_ENFORCEMENT_ENABLED=false
          AUTH_TYPE=basic
          POSTGRES_POOL_PRE_PING=true
          POSTGRES_USE_NULL_POOL=true
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
          INTEGRATION_TESTS_MODE=true
          EOF

      # Start only the services needed for Onyx Lite (Postgres + API server)
      - name: Start Docker containers (onyx-lite)
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up \
            relational_db \
            api_server \
            -d
        id: start_docker_onyx_lite

      - name: Wait for services to be ready
        run: |
          echo "Starting wait-for-service script (onyx-lite)..."
          start_time=$(date +%s)
          timeout=300
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in $timeout seconds."
              exit 1
            fi
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
            if [ "$response" = "200" ]; then
              echo "API server is ready!"
              break
            elif [ "$response" = "curl_error" ]; then
              echo "Curl encountered an error; retrying..."
            else
              echo "Service not ready yet (HTTP $response). Retrying in 5 seconds..."
            fi
            sleep 5
          done

      - name: Run Onyx Lite Integration Tests
        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
          timeout_minutes: 20
          max_attempts: 3
          retry_wait_seconds: 10
          command: |
            echo "Running onyx-lite integration tests..."
            docker run --rm --network onyx_default \
              --name test-runner \
              -e POSTGRES_HOST=relational_db \
              -e POSTGRES_USER=postgres \
              -e POSTGRES_PASSWORD=password \
              -e POSTGRES_DB=postgres \
              -e DB_READONLY_USER=db_readonly_user \
              -e DB_READONLY_PASSWORD=password \
              -e POSTGRES_POOL_PRE_PING=true \
              -e POSTGRES_USE_NULL_POOL=true \
              -e API_SERVER_HOST=api_server \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
              -e TEST_WEB_HOSTNAME=test-runner \
              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/tests/no_vectordb

      - name: Dump API server logs (onyx-lite)
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \
            logs --no-color api_server > $GITHUB_WORKSPACE/api_server_onyx_lite.log || true

      - name: Dump all-container logs (onyx-lite)
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \
            logs --no-color > $GITHUB_WORKSPACE/docker-compose-onyx-lite.log || true

      - name: Upload logs (onyx-lite)
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs-onyx-lite
          path: ${{ github.workspace }}/docker-compose-onyx-lite.log

      - name: Stop Docker containers (onyx-lite)
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml down -v

  multitenant-tests:
    needs:
      [build-backend-image, build-model-server-image, build-integration-image]
    runs-on:
      [
        runs-on,
        runner=8cpu-linux-arm64,
        "run-id=${{ github.run_id }}-multitenant-tests",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Start Docker containers for multi-tenant tests
        env:
          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
          RUN_ID: ${{ github.run_id }}
        run: |
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
          LICENSE_ENFORCEMENT_ENABLED=false \
          MULTI_TENANT=true \
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          OPENAI_DEFAULT_API_KEY=${OPENAI_API_KEY} \
          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
          DEV_MODE=true \
          OPENSEARCH_FOR_ONYX_ENABLED=false \
          docker compose -f docker-compose.multitenant-dev.yml up \
            relational_db \
            index \
            cache \
            minio \
            api_server \
            inference_model_server \
            indexing_model_server \
            background \
            -d
        id: start_docker_multi_tenant

      - name: Wait for service to be ready (multi-tenant)
        run: |
          echo "Starting wait-for-service script for multi-tenant..."
          docker logs -f onyx-api_server-1 &
          start_time=$(date +%s)
          timeout=300
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
            elif [ "$response" = "curl_error" ]; then
              echo "Curl encountered an error; retrying..."
            else
              echo "Service not ready yet (HTTP $response). Retrying in 5 seconds..."
            fi
            sleep 5
          done
          echo "Finished waiting for service."

      - name: Run Multi-Tenant Integration Tests
        env:
          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
          RUN_ID: ${{ github.run_id }}
        run: |
          echo "Running multi-tenant integration tests..."
          docker run --rm --network onyx_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e DB_READONLY_USER=db_readonly_user \
            -e DB_READONLY_PASSWORD=password \
            -e POSTGRES_DB=postgres \
            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
            -e ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=false \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
            -e EXA_API_KEY=${EXA_API_KEY} \
            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
            -e SLACK_BOT_TOKEN_TEST_SPACE=${SLACK_BOT_TOKEN_TEST_SPACE} \
            -e TEST_WEB_HOSTNAME=test-runner \
            -e AUTH_TYPE=cloud \
            -e MULTI_TENANT=true \
            -e SKIP_RESET=true \
            -e REQUIRE_EMAIL_VERIFICATION=false \
            -e DISABLE_TELEMETRY=true \
            -e DEV_MODE=true \
            ${ECR_CACHE}:integration-test-${RUN_ID} \
            /app/tests/integration/multitenant_tests

      - name: Dump API server logs (multi-tenant)
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.multitenant-dev.yml logs --no-color api_server > $GITHUB_WORKSPACE/api_server_multitenant.log || true

      - name: Dump all-container logs (multi-tenant)
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.multitenant-dev.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose-multitenant.log || true

      - name: Upload logs (multi-tenant)
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs-multitenant
          path: ${{ github.workspace }}/docker-compose-multitenant.log

      - name: Stop multi-tenant Docker containers
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.multitenant-dev.yml down -v

  required:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
    timeout-minutes: 45
    needs: [integration-tests, onyx-lite-tests, multitenant-tests]
    if: ${{ always() }}
    steps:
      - name: Check job status
        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
        run: exit 1


================================================
FILE: .github/workflows/pr-jest-tests.yml
================================================
name: Run Jest Tests
concurrency:
  group: Run-Jest-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches:
      - main
      - "release/**"
  push:
    tags:
      - "v*.*.*"

permissions:
  contents: read

jobs:
  jest-tests:
    name: Jest Tests
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Setup node
        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm" # zizmor: ignore[cache-poisoning] test-only workflow; no deploy artifacts
          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
        working-directory: ./web
        run: npm ci

      - name: Run Jest tests
        working-directory: ./web
        run: npm test -- --ci --coverage --maxWorkers=50%

      - name: Upload coverage reports
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: jest-coverage-${{ github.run_id }}
          path: ./web/coverage
          retention-days: 7


================================================
FILE: .github/workflows/pr-labeler.yml
================================================
name: PR Labeler

on:
  pull_request:
    branches:
      - main
    types:
      - opened
      - reopened
      - synchronize
      - edited

permissions:
  contents: read

jobs:
  validate_pr_title:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - name: Check PR title for Conventional Commits
        env:
          PR_TITLE: ${{ github.event.pull_request.title }}
        run: |
          echo "PR Title: $PR_TITLE"
          if [[ ! "$PR_TITLE" =~ ^(feat|fix|docs|test|ci|refactor|perf|chore|revert|build)(\(.+\))?:\ .+ ]]; then
            echo "::error::❌ Your PR title does not follow the Conventional Commits format.
              This check ensures that all pull requests use clear, consistent titles that help automate changelogs and improve project history.

              Please update your PR title to follow the Conventional Commits style.
              Here is a link to a blog explaining the reason why we've included the Conventional Commits style into our PR titles: https://xfuture-blog.com/working-with-conventional-commits

              **Here are some examples of valid PR titles:**
              - feat: add user authentication
              - fix(login): handle null password error
              - docs(readme): update installation instructions"
            exit 1
          fi


================================================
FILE: .github/workflows/pr-linear-check.yml
================================================
name: Ensure PR references Linear
concurrency:
  group: Ensure-PR-references-Linear-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  pull_request:
    types: [opened, edited, reopened, synchronize]

permissions:
  contents: read

jobs:
  linear-check:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - name: Check PR body for Linear link or override
        env:
          PR_BODY: ${{ github.event.pull_request.body }}
        run: |
          # Looking for "https://linear.app" in the body
          if echo "$PR_BODY" | grep -qE "https://linear\.app"; then
            echo "Found a Linear link. Check passed."
            exit 0
          fi

          # Looking for a checked override: "[x] Override Linear Check"
          if echo "$PR_BODY" | grep -q "\[x\].*Override Linear Check"; then
            echo "Override box is checked. Check passed."
            exit 0
          fi

          # Otherwise, fail the run
          echo "No Linear link or override found in the PR description."
          exit 1


================================================
FILE: .github/workflows/pr-playwright-tests.yml
================================================
name: Run Playwright Tests
concurrency:
  group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches:
      - main
      - "release/**"
  push:
    tags:
      - "v*.*.*"
    # TODO: Remove this if we enable merge-queues for release branches.
    branches:
      - "release/**"

permissions:
  contents: read

env:
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
  FIRECRAWL_API_KEY: ${{ secrets.FIRECRAWL_API_KEY }}
  GOOGLE_PSE_API_KEY: ${{ secrets.GOOGLE_PSE_API_KEY }}
  GOOGLE_PSE_SEARCH_ENGINE_ID: ${{ secrets.GOOGLE_PSE_SEARCH_ENGINE_ID }}

  # for federated slack tests
  SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}
  SLACK_CLIENT_SECRET: ${{ secrets.SLACK_CLIENT_SECRET }}

  # for MCP Oauth tests
  MCP_OAUTH_CLIENT_ID: ${{ secrets.MCP_OAUTH_CLIENT_ID }}
  MCP_OAUTH_CLIENT_SECRET: ${{ secrets.MCP_OAUTH_CLIENT_SECRET }}
  MCP_OAUTH_ISSUER: ${{ secrets.MCP_OAUTH_ISSUER }}
  MCP_OAUTH_JWKS_URI: ${{ secrets.MCP_OAUTH_JWKS_URI }}
  MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}
  MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}

  # for MCP API Key tests
  MCP_API_KEY: test-api-key-12345
  MCP_API_KEY_TEST_PORT: 8005
  MCP_API_KEY_TEST_URL: http://host.docker.internal:8005/mcp
  MCP_API_KEY_SERVER_HOST: 0.0.0.0
  MCP_API_KEY_SERVER_PUBLIC_HOST: host.docker.internal

  MOCK_LLM_RESPONSE: true
  MCP_TEST_SERVER_PORT: 8004
  MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp
  MCP_TEST_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
  MCP_TEST_SERVER_BIND_HOST: 0.0.0.0
  MCP_TEST_SERVER_PUBLIC_HOST: host.docker.internal
  MCP_SERVER_HOST: 0.0.0.0
  MCP_SERVER_PUBLIC_HOST: host.docker.internal
  MCP_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp

  # Visual regression S3 bucket (shared across all jobs)
  PLAYWRIGHT_S3_BUCKET: onyx-playwright-artifacts

jobs:
  build-web-image:
    runs-on:
      [
        runs-on,
        runner=4cpu-linux-arm64,
        "run-id=${{ github.run_id }}-build-web-image",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Format branch name for cache
        id: format-branch
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
          REF_NAME: ${{ github.ref_name }}
        run: |
          if [ -n "${PR_NUMBER}" ]; then
            CACHE_SUFFIX="${PR_NUMBER}"
          else
            # shellcheck disable=SC2001
            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
          fi
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Web Docker image
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./web
          file: ./web/Dockerfile
          platforms: linux/arm64
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
          push: true
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache
            type=registry,ref=onyxdotapp/onyx-web-server:latest
          cache-to: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-backend-image:
    runs-on:
      [
        runs-on,
        runner=1cpu-linux-arm64,
        "run-id=${{ github.run_id }}-build-backend-image",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Format branch name for cache
        id: format-branch
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
          REF_NAME: ${{ github.ref_name }}
        run: |
          if [ -n "${PR_NUMBER}" ]; then
            CACHE_SUFFIX="${PR_NUMBER}"
          else
            # shellcheck disable=SC2001
            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
          fi
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Backend Docker image
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
          platforms: linux/arm64
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
          push: true
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
            type=registry,ref=onyxdotapp/onyx-backend:latest
          cache-to: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-model-server-image:
    runs-on:
      [
        runs-on,
        runner=1cpu-linux-arm64,
        "run-id=${{ github.run_id }}-build-model-server-image",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Format branch name for cache
        id: format-branch
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
          REF_NAME: ${{ github.ref_name }}
        run: |
          if [ -n "${PR_NUMBER}" ]; then
            CACHE_SUFFIX="${PR_NUMBER}"
          else
            # shellcheck disable=SC2001
            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
          fi
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Model Server Docker image
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
          platforms: linux/arm64
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
          push: true
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
            type=registry,ref=onyxdotapp/onyx-model-server:latest
          cache-to: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  playwright-tests:
    needs: [build-web-image, build-backend-image, build-model-server-image]
    name: Playwright Tests (${{ matrix.project }})
    permissions:
      id-token: write # Required for OIDC-based AWS credential exchange (S3 access)
      contents: read
    runs-on:
      - runs-on
      - runner=8cpu-linux-arm64
      - "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}"
      - "extras=ecr-cache"
      - volume=50gb
    timeout-minutes: 45
    strategy:
      fail-fast: false
      matrix:
        project: [admin, exclusive]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Setup node
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm" # zizmor: ignore[cache-poisoning]
          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
        working-directory: ./web
        run: npm ci

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
          restore-keys: |
            ${{ runner.os }}-playwright-npm-

      - name: Install playwright browsers
        working-directory: ./web
        run: npx playwright install --with-deps

      - name: Create .env file for Docker Compose
        env:
          OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
          EXA_API_KEY_VALUE: ${{ env.EXA_API_KEY }}
          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
          RUN_ID: ${{ github.run_id }}
        run: |
          cat <<EOF > deployment/docker_compose/.env
          COMPOSE_PROFILES=s3-filestore
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
          # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
          LICENSE_ENFORCEMENT_ENABLED=false
          AUTH_TYPE=basic
          INTEGRATION_TESTS_MODE=true
          GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
          EXA_API_KEY=${EXA_API_KEY_VALUE}
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
          ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}
          ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
          EOF

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml -f docker-compose.mcp-api-key-test.yml up -d
        id: start_docker

      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

          docker logs -f onyx-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds

          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))

            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi

            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")

            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
            elif [ "$response" = "curl_error" ]; then
              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi

            sleep 5
          done
          echo "Finished waiting for service."

      - name: Wait for MCP OAuth mock server
        run: |
          echo "Waiting for MCP OAuth mock server on port ${MCP_TEST_SERVER_PORT:-8004}..."
          start_time=$(date +%s)
          timeout=120

          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))

            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. MCP OAuth mock server did not become ready in ${timeout}s."
              exit 1
            fi

            if curl -sf "http://localhost:${MCP_TEST_SERVER_PORT:-8004}/healthz" > /dev/null; then
              echo "MCP OAuth mock server is ready!"
              break
            fi

            sleep 3
          done

      - name: Wait for MCP API Key mock server
        run: |
          echo "Waiting for MCP API Key mock server on port ${MCP_API_KEY_TEST_PORT:-8005}..."
          start_time=$(date +%s)
          timeout=120

          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))

            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. MCP API Key mock server did not become ready in ${timeout}s."
              exit 1
            fi

            if curl -sf "http://localhost:${MCP_API_KEY_TEST_PORT:-8005}/healthz" > /dev/null; then
              echo "MCP API Key mock server is ready!"
              break
            fi

            sleep 3
          done

      - name: Wait for web server to be ready
        run: |
          echo "Waiting for web server on port 3000..."
          start_time=$(date +%s)
          timeout=120

          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))

            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Web server did not become ready in ${timeout}s."
              exit 1
            fi

            if curl -sf "http://localhost:3000/api/health" > /dev/null 2>&1 || \
               curl -sf "http://localhost:3000/" > /dev/null 2>&1; then
              echo "Web server is ready!"
              break
            fi

            echo "Web server not ready yet. Retrying in 3 seconds..."
            sleep 3
          done

      - name: Run Playwright tests
        working-directory: ./web
        env:
          PROJECT: ${{ matrix.project }}
        run: |
          npx playwright test --project ${PROJECT}

      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          # Includes test results and trace.zip files
          name: playwright-test-results-${{ matrix.project }}-${{ github.run_id }}
          path: ./web/output/playwright/
          retention-days: 30

      - name: Upload screenshots
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          name: playwright-screenshots-${{ matrix.project }}-${{ github.run_id }}
          path: ./web/output/screenshots/
          retention-days: 30

      # --- Visual Regression Diff ---
      - name: Configure AWS credentials
        if: always()
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Install the latest version of uv
        if: always()
        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"

      - name: Determine baseline revision
        if: always()
        id: baseline-rev
        env:
          EVENT_NAME: ${{ github.event_name }}
          BASE_REF: ${{ github.event.pull_request.base.ref }}
          MERGE_GROUP_BASE_REF: ${{ github.event.merge_group.base_ref }}
          GH_REF: ${{ github.ref }}
          REF_NAME: ${{ github.ref_name }}
        run: |
          if [ "${EVENT_NAME}" = "pull_request" ]; then
            # PRs compare against the base branch (e.g. main, release/2.5)
            echo "rev=${BASE_REF}" >> "$GITHUB_OUTPUT"
          elif [ "${EVENT_NAME}" = "merge_group" ]; then
            # Merge queue compares against the target branch (e.g. refs/heads/main -> main)
            echo "rev=${MERGE_GROUP_BASE_REF#refs/heads/}" >> "$GITHUB_OUTPUT"
          elif [[ "${GH_REF}" == refs/tags/* ]]; then
            # Tag builds compare against the tag name
            echo "rev=${REF_NAME}" >> "$GITHUB_OUTPUT"
          else
            # Push builds (main, release/*) compare against the branch name
            echo "rev=${REF_NAME}" >> "$GITHUB_OUTPUT"
          fi

      - name: Generate screenshot diff report
        if: always()
        env:
          PROJECT: ${{ matrix.project }}
          PLAYWRIGHT_S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}
          BASELINE_REV: ${{ steps.baseline-rev.outputs.rev }}
        run: |
          uv run --no-sync --with onyx-devtools ods screenshot-diff compare \
            --project "${PROJECT}" \
            --rev "${BASELINE_REV}"

      - name: Upload visual diff report to S3
        if: always()
        env:
          PROJECT: ${{ matrix.project }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
          RUN_ID: ${{ github.run_id }}
        run: |
          SUMMARY_FILE="web/output/screenshot-diff/${PROJECT}/summary.json"
          if [ ! -f "${SUMMARY_FILE}" ]; then
            echo "No summary file found — skipping S3 upload."
            exit 0
          fi

          HAS_DIFF=$(jq -r '.has_differences' "${SUMMARY_FILE}")
          if [ "${HAS_DIFF}" != "true" ]; then
            echo "No visual differences for ${PROJECT} — skipping S3 upload."
            exit 0
          fi

          aws s3 sync "web/output/screenshot-diff/${PROJECT}/" \
            "s3://${PLAYWRIGHT_S3_BUCKET}/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/"

      - name: Upload visual diff summary
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          name: screenshot-diff-summary-${{ matrix.project }}
          path: ./web/output/screenshot-diff/${{ matrix.project }}/summary.json
          if-no-files-found: ignore
          retention-days: 5

      - name: Upload visual diff report artifact
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          name: screenshot-diff-report-${{ matrix.project }}-${{ github.run_id }}
          path: ./web/output/screenshot-diff/${{ matrix.project }}/
          if-no-files-found: ignore
          retention-days: 30

      - name: Update S3 baselines
        if: >-
          success() && (
            github.ref == 'refs/heads/main' ||
            startsWith(github.ref, 'refs/heads/release/') ||
            startsWith(github.ref, 'refs/tags/v') ||
            (
              github.event_name == 'merge_group' && (
                github.event.merge_group.base_ref == 'refs/heads/main' ||
                startsWith(github.event.merge_group.base_ref, 'refs/heads/release/')
              )
            )
          )
        env:
          PROJECT: ${{ matrix.project }}
          PLAYWRIGHT_S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}
          BASELINE_REV: ${{ steps.baseline-rev.outputs.rev }}
        run: |
          if [ -d "web/output/screenshots/" ] && [ "$(ls -A web/output/screenshots/)" ]; then
            uv run --no-sync --with onyx-devtools ods screenshot-diff upload-baselines \
              --project "${PROJECT}" \
              --rev "${BASELINE_REV}" \
              --delete
          else
            echo "No screenshots to upload for ${PROJECT} — skipping baseline update."
          fi

      # save before stopping the containers so the logs can be captured
      - name: Save Docker logs
        if: success() || failure()
        env:
          WORKSPACE: ${{ github.workspace }}
        run: |
          cd deployment/docker_compose
          docker compose logs > docker-compose.log
          mv docker-compose.log ${WORKSPACE}/docker-compose.log

      - name: Upload logs
        if: success() || failure()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log

  playwright-tests-lite:
    needs: [build-web-image, build-backend-image]
    name: Playwright Tests (lite)
    runs-on:
      - runs-on
      - runner=4cpu-linux-arm64
      - "run-id=${{ github.run_id }}-playwright-tests-lite"
      - "extras=ecr-cache"
    timeout-minutes: 30
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Setup node
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm" # zizmor: ignore[cache-poisoning]
          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
        working-directory: ./web
        run: npm ci

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
          restore-keys: |
            ${{ runner.os }}-playwright-npm-

      - name: Install playwright browsers
        working-directory: ./web
        run: npx playwright install --with-deps

      - name: Create .env file for Docker Compose
        env:
          OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
          RUN_ID: ${{ github.run_id }}
        run: |
          cat <<EOF > deployment/docker_compose/.env
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
          LICENSE_ENFORCEMENT_ENABLED=false
          AUTH_TYPE=basic
          INTEGRATION_TESTS_MODE=true
          GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
          MOCK_LLM_RESPONSE=true
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
          ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
          ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
          EOF

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Start Docker containers (lite)
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up -d
        id: start_docker

      - name: Run Playwright tests (lite)
        working-directory: ./web
        run: npx playwright test --project lite

      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          name: playwright-test-results-lite-${{ github.run_id }}
          path: ./web/output/playwright/
          retention-days: 30

      - name: Save Docker logs
        if: success() || failure()
        env:
          WORKSPACE: ${{ github.workspace }}
        run: |
          cd deployment/docker_compose
          docker compose logs > docker-compose.log
          mv docker-compose.log ${WORKSPACE}/docker-compose.log

      - name: Upload logs
        if: success() || failure()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-logs-lite-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log

  # Post a single combined visual regression comment after all matrix jobs finish
  visual-regression-comment:
    needs: [playwright-tests]
    if: >-
      always() &&
      github.event_name == 'pull_request' &&
      needs.playwright-tests.result != 'cancelled'
    runs-on: ubuntu-slim
    timeout-minutes: 5
    permissions:
      pull-requests: write
    steps:
      - name: Download visual diff summaries
        uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3
        with:
          pattern: screenshot-diff-summary-*
          path: summaries/

      - name: Post combined PR comment
        env:
          GH_TOKEN: ${{ github.token }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
          RUN_ID: ${{ github.run_id }}
          REPO: ${{ github.repository }}
          S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}
        run: |
          MARKER="<!-- visual-regression-report -->"

          # Build the markdown table from all summary files
          TABLE_HEADER="| Project | Changed | Added | Removed | Unchanged | Report |"
          TABLE_DIVIDER="|---------|---------|-------|---------|-----------|--------|"
          TABLE_ROWS=""
          HAS_ANY_SUMMARY=false

          for SUMMARY_DIR in summaries/screenshot-diff-summary-*/; do
            SUMMARY_FILE="${SUMMARY_DIR}summary.json"
            if [ ! -f "${SUMMARY_FILE}" ]; then
              continue
            fi

            HAS_ANY_SUMMARY=true
            PROJECT=$(jq -r '.project' "${SUMMARY_FILE}")
            CHANGED=$(jq -r '.changed' "${SUMMARY_FILE}")
            ADDED=$(jq -r '.added' "${SUMMARY_FILE}")
            REMOVED=$(jq -r '.removed' "${SUMMARY_FILE}")
            UNCHANGED=$(jq -r '.unchanged' "${SUMMARY_FILE}")
            TOTAL=$(jq -r '.total' "${SUMMARY_FILE}")
            HAS_DIFF=$(jq -r '.has_differences' "${SUMMARY_FILE}")

            if [ "${TOTAL}" = "0" ]; then
              REPORT_LINK="_No screenshots_"
            elif [ "${HAS_DIFF}" = "true" ]; then
              REPORT_URL="https://${S3_BUCKET}.s3.us-east-2.amazonaws.com/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/index.html"
              REPORT_LINK="[View Report](${REPORT_URL})"
            else
              REPORT_LINK="✅ No changes"
            fi

            TABLE_ROWS="${TABLE_ROWS}| \`${PROJECT}\` | ${CHANGED} | ${ADDED} | ${REMOVED} | ${UNCHANGED} | ${REPORT_LINK} |\n"
          done

          if [ "${HAS_ANY_SUMMARY}" = "false" ]; then
            echo "No visual diff summaries found — skipping PR comment."
            exit 0
          fi

          BODY=$(printf '%s\n' \
            "${MARKER}" \
            "### 🖼️ Visual Regression Report" \
            "" \
            "${TABLE_HEADER}" \
            "${TABLE_DIVIDER}" \
            "$(printf '%b' "${TABLE_ROWS}")")

          # Upsert: find existing comment with the marker, or create a new one
          EXISTING_COMMENT_ID=$(gh api \
            "repos/${REPO}/issues/${PR_NUMBER}/comments" \
            --jq ".[] | select(.body | startswith(\"${MARKER}\")) | .id" \
            2>/dev/null | head -1)

          if [ -n "${EXISTING_COMMENT_ID}" ]; then
            gh api \
              --method PATCH \
              "repos/${REPO}/issues/comments/${EXISTING_COMMENT_ID}" \
              -f body="${BODY}"
          else
            gh api \
              --method POST \
              "repos/${REPO}/issues/${PR_NUMBER}/comments" \
              -f body="${BODY}"
          fi

  playwright-required:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
    timeout-minutes: 45
    needs: [playwright-tests, playwright-tests-lite]
    if: ${{ always() }}
    steps:
      - name: Check job status
        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
        run: exit 1


================================================
FILE: .github/workflows/pr-python-checks.yml
================================================
name: Python Checks
concurrency:
  group: Python-Checks-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches:
      - main
      - "release/**"
  push:
    tags:
      - "v*.*.*"

permissions:
  contents: read

jobs:
  mypy-check:
    # See https://runs-on.com/runners/linux/
    # Note: Mypy seems quite optimized for x64 compared to arm64.
    # Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
    runs-on:
      [
        runs-on,
        runner=2cpu-linux-x64,
        "run-id=${{ github.run_id }}-mypy-check",
        "extras=s3-cache",
      ]
    timeout-minutes: 45

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Setup Python and Install Dependencies
        uses: ./.github/actions/setup-python-and-install-dependencies
        with:
          requirements: |
            backend/requirements/default.txt
            backend/requirements/dev.txt
            backend/requirements/model_server.txt
            backend/requirements/ee.txt

      - name: Generate OpenAPI schema and Python client
        shell: bash
        # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
        env:
          LICENSE_ENFORCEMENT_ENABLED: "false"
        run: |
          ods openapi all

      - name: Cache mypy cache
        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
        with:
          path: .mypy_cache
          key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'pyproject.toml') }}
          restore-keys: |
            mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-
            mypy-${{ runner.os }}-

      - name: Run MyPy
        env:
          MYPY_FORCE_COLOR: 1
          TERM: xterm-256color
        run: mypy .


================================================
FILE: .github/workflows/pr-python-connector-tests.yml
================================================
name: Connector Tests
concurrency:
  group: Connector-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches: [main]
    paths:
      - "backend/**"
      - "pyproject.toml"
      - "uv.lock"
      - ".github/workflows/pr-python-connector-tests.yml"
      - ".github/actions/setup-python-and-install-dependencies/**"
      - ".github/actions/setup-playwright/**"
  push:
    tags:
      - "v*.*.*"
  schedule:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"

permissions:
  id-token: write # Required for OIDC-based AWS credential exchange
  contents: read

env:
  PYTHONPATH: ./backend
  DISABLE_TELEMETRY: "true"
  R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS: ${{ vars.R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS }}
  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_TEST_SPACE: ${{ vars.CONFLUENCE_TEST_SPACE }}
  CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
  SF_USERNAME: ${{ vars.SF_USERNAME }}
  IMAP_HOST: ${{ vars.IMAP_HOST }}
  IMAP_USERNAME: ${{ vars.IMAP_USERNAME }}
  IMAP_MAILBOXES: ${{ vars.IMAP_MAILBOXES }}
  AIRTABLE_TEST_BASE_ID: ${{ vars.AIRTABLE_TEST_BASE_ID }}
  AIRTABLE_TEST_TABLE_ID: ${{ vars.AIRTABLE_TEST_TABLE_ID }}
  AIRTABLE_TEST_TABLE_NAME: ${{ vars.AIRTABLE_TEST_TABLE_NAME }}
  SHAREPOINT_CLIENT_ID: ${{ vars.SHAREPOINT_CLIENT_ID }}
  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ vars.SHAREPOINT_CLIENT_DIRECTORY_ID }}
  SHAREPOINT_SITE: ${{ vars.SHAREPOINT_SITE }}
  BITBUCKET_EMAIL: ${{ vars.BITBUCKET_EMAIL }}

jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
    runs-on:
      [
        runs-on,
        runner=8cpu-linux-x64,
        "run-id=${{ github.run_id }}-connectors-check",
        "extras=s3-cache",
      ]
    timeout-minutes: 45
    environment: ci-protected

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Setup Python and Install Dependencies
        uses: ./.github/actions/setup-python-and-install-dependencies
        with:
          requirements: |
            backend/requirements/default.txt
            backend/requirements/dev.txt

      - name: Setup Playwright
        uses: ./.github/actions/setup-playwright

      - name: Detect Connector changes
        id: changes
        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3
        with:
          filters: |
            hubspot:
              - 'backend/onyx/connectors/hubspot/**'
              - 'backend/tests/daily/connectors/hubspot/**'
              - 'uv.lock'
            salesforce:
              - 'backend/onyx/connectors/salesforce/**'
              - 'backend/tests/daily/connectors/salesforce/**'
              - 'uv.lock'
            github:
              - 'backend/onyx/connectors/github/**'
              - 'backend/tests/daily/connectors/github/**'
              - 'uv.lock'
            file_processing:
              - 'backend/onyx/file_processing/**'
              - 'uv.lock'

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v4
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get connector test secrets from AWS Secrets Manager
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2
        with:
          parse-json-secrets: false
          secret-ids: |
            AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS, test/aws-access-key-id
            AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS, test/aws-secret-access-key
            R2_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS, test/r2-access-key-id
            R2_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS, test/r2-secret-access-key
            GCS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS, test/gcs-access-key-id
            GCS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS, test/gcs-secret-access-key
            CONFLUENCE_ACCESS_TOKEN, test/confluence-access-token
            CONFLUENCE_ACCESS_TOKEN_SCOPED, test/confluence-access-token-scoped
            JIRA_BASE_URL, test/jira-base-url
            JIRA_USER_EMAIL, test/jira-user-email
            JIRA_API_TOKEN, test/jira-api-token
            JIRA_API_TOKEN_SCOPED, test/jira-api-token-scoped
            GONG_ACCESS_KEY, test/gong-access-key
            GONG_ACCESS_KEY_SECRET, test/gong-access-key-secret
            GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR, test/google-drive-service-account-json
            GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1, test/google-drive-oauth-creds-test-user-1
            GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR, test/google-drive-oauth-creds
            GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR, test/google-gmail-service-account-json
            GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR, test/google-gmail-oauth-creds
            SLAB_BOT_TOKEN, test/slab-bot-token
            ZENDESK_SUBDOMAIN, test/zendesk-subdomain
            ZENDESK_EMAIL, test/zendesk-email
            ZENDESK_TOKEN, test/zendesk-token
            SF_PASSWORD, test/sf-password
            SF_SECURITY_TOKEN, test/sf-security-token
            HUBSPOT_ACCESS_TOKEN, test/hubspot-access-token
            IMAP_PASSWORD, test/imap-password
            AIRTABLE_ACCESS_TOKEN, test/airtable-access-token
            SHAREPOINT_CLIENT_SECRET, test/sharepoint-client-secret
            PERM_SYNC_SHAREPOINT_CLIENT_ID, test/perm-sync-sharepoint-client-id
            PERM_SYNC_SHAREPOINT_PRIVATE_KEY, test/perm-sync-sharepoint-private-key
            PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD, test/perm-sync-sharepoint-cert-password
            PERM_SYNC_SHAREPOINT_DIRECTORY_ID, test/perm-sync-sharepoint-directory-id
            ACCESS_TOKEN_GITHUB, test/github-access-token
            GITLAB_ACCESS_TOKEN, test/gitlab-access-token
            GITBOOK_SPACE_ID, test/gitbook-space-id
            GITBOOK_API_KEY, test/gitbook-api-key
            NOTION_INTEGRATION_TOKEN, test/notion-integration-token
            HIGHSPOT_KEY, test/highspot-key
            HIGHSPOT_SECRET, test/highspot-secret
            SLACK_BOT_TOKEN, test/slack-bot-token
            DISCORD_CONNECTOR_BOT_TOKEN, test/discord-bot-token
            TEAMS_APPLICATION_ID, test/teams-application-id
            TEAMS_DIRECTORY_ID, test/teams-directory-id
            TEAMS_SECRET, test/teams-secret
            BITBUCKET_WORKSPACE, test/bitbucket-workspace
            BITBUCKET_API_TOKEN, test/bitbucket-api-token
            FIREFLIES_API_KEY, test/fireflies-api-key

      - name: Run Tests (excluding HubSpot, Salesforce, GitHub, and Coda)
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
          py.test \
            -n 8 \
            --dist loadfile \
            --durations=8 \
            -o junit_family=xunit2 \
            -xv \
            --ff \
            backend/tests/daily/connectors \
            --ignore backend/tests/daily/connectors/hubspot \
            --ignore backend/tests/daily/connectors/salesforce \
            --ignore backend/tests/daily/connectors/github \
            --ignore backend/tests/daily/connectors/coda

      - name: Run HubSpot Connector Tests
        if: ${{ github.event_name == 'schedule' || steps.changes.outputs.hubspot == 'true' || steps.changes.outputs.file_processing == 'true' }}
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
          py.test \
            -n 8 \
            --dist loadfile \
            --durations=8 \
            -o junit_family=xunit2 \
            -xv \
            --ff \
            backend/tests/daily/connectors/hubspot

      - name: Run Salesforce Connector Tests
        if: ${{ github.event_name == 'schedule' || steps.changes.outputs.salesforce == 'true' || steps.changes.outputs.file_processing == 'true' }}
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
          py.test \
            -n 8 \
            --dist loadfile \
            --durations=8 \
            -o junit_family=xunit2 \
            -xv \
            --ff \
            backend/tests/daily/connectors/salesforce

      - name: Run GitHub Connector Tests
        if: ${{ github.event_name == 'schedule' || steps.changes.outputs.github == 'true' || steps.changes.outputs.file_processing == 'true' }}
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
          py.test \
            -n 8 \
            --dist loadfile \
            --durations=8 \
            -o junit_family=xunit2 \
            -xv \
            --ff \
            backend/tests/daily/connectors/github

      - name: Alert on Failure
        if: failure() && github.event_name == 'schedule'
        env:
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
          REPO: ${{ github.repository }}
          RUN_ID: ${{ github.run_id }}
        run: |
          curl -X POST \
            -H 'Content-type: application/json' \
            --data "{\"text\":\"Scheduled Connector Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
            $SLACK_WEBHOOK


================================================
FILE: .github/workflows/pr-python-model-tests.yml
================================================
name: Model Server Tests

on:
  schedule:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"
  workflow_dispatch:

permissions:
  contents: read

env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
  AWS_REGION_NAME: ${{ vars.AWS_REGION_NAME }}

  # API keys for testing
  COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
  LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}
  LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
  AZURE_API_URL: ${{ vars.AZURE_API_URL }}

jobs:
  model-check:
    # See https://runs-on.com/runners/linux/
    runs-on:
      - runs-on
      - runner=4cpu-linux-arm64
      - "run-id=${{ github.run_id }}-model-check"
      - "extras=ecr-cache"
    environment: ci-protected
    timeout-minutes: 45

    env:
      PYTHONPATH: ./backend

    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Setup Python and Install Dependencies
        uses: ./.github/actions/setup-python-and-install-dependencies
        with:
          requirements: |
            backend/requirements/default.txt
            backend/requirements/dev.txt

      - name: Format branch name for cache
        id: format-branch
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
          REF_NAME: ${{ github.ref_name }}
        run: |
          if [ -n "${PR_NUMBER}" ]; then
            CACHE_SUFFIX="${PR_NUMBER}"
          else
            # shellcheck disable=SC2001
            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
          fi
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f

      - name: Build and load
        uses: docker/bake-action@82490499d2e5613fcead7e128237ef0b0ea210f7 # ratchet:docker/bake-action@v7.0.0
        env:
          TAG: model-server-${{ github.run_id }}
        with:
          load: true
          targets: model-server
          set: |
            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
            model-server.cache-from=type=registry,ref=onyxdotapp/onyx-model-server:latest
            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max

      - name: Start Docker containers
        id: start_docker
        env:
          IMAGE_TAG: model-server-${{ github.run_id }}
        run: |
          cd deployment/docker_compose
          docker compose \
            -f docker-compose.yml \
            -f docker-compose.dev.yml \
            up -d --wait \
            inference_model_server

      - name: Run Tests
        run: |
          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding

      - name: Alert on Failure
        if: failure() && github.event_name == 'schedule'
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.SLACK_WEBHOOK }}
          failed-jobs: model-check
          title: "🚨 Scheduled Model Tests failed!"
          ref-name: ${{ github.ref_name }}

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log


================================================
FILE: .github/workflows/pr-python-tests.yml
================================================
name: Python Unit Tests
concurrency:
  group: Python-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request:
    branches:
      - main
      - 'release/**'
  push:
    tags:
      - "v*.*.*"

permissions:
  contents: read

jobs:
  backend-check:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-backend-check"]
    timeout-minutes: 45


    env:
      PYTHONPATH: ./backend
      REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
      DISABLE_TELEMETRY: "true"
      # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
      LICENSE_ENFORCEMENT_ENABLED: "false"

    steps:
    - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

    - name: Checkout code
      uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
      with:
        persist-credentials: false

    - name: Setup Python and Install Dependencies
      uses: ./.github/actions/setup-python-and-install-dependencies
      with:
        requirements: |
          backend/requirements/default.txt
          backend/requirements/dev.txt
          backend/requirements/model_server.txt
          backend/requirements/ee.txt

    - name: Run Tests
      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
      run: py.test -o junit_family=xunit2 -xv --ff backend/tests/unit


================================================
FILE: .github/workflows/pr-quality-checks.yml
================================================
name: Quality Checks PR
concurrency:
  group: Quality-Checks-PR-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

on:
  merge_group:
  pull_request: null
  push:
    branches:
      - main
    tags:
      - "v*.*.*"

permissions:
  contents: read

jobs:
  quality-checks:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          fetch-depth: 0
          persist-credentials: false
      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
      - name: Setup Terraform
        uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # ratchet:hashicorp/setup-terraform@v4.0.0
      - name: Setup node
        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v6
        with: # zizmor: ignore[cache-poisoning]
          node-version: 22
          cache: "npm"
          cache-dependency-path: ./web/package-lock.json
      - name: Install node dependencies
        working-directory: ./web
        run: npm ci
      - uses: j178/prek-action@0bb87d7f00b0c99306c8bcb8b8beba1eb581c037 # ratchet:j178/prek-action@v1
        with:
          prek-version: '0.3.4'
          extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
      - name: Check Actions
        uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1
        with:
          check_permissions: false
          check_versions: false


================================================
FILE: .github/workflows/preview.yml
================================================
name: Preview Deployment
env:
  VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
  VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }}
  VERCEL_CLI: vercel@50.14.1
on:
  push:
    branches-ignore:
      - main
    paths:
      - "web/**"
permissions:
  contents: read
  pull-requests: write
jobs:
  Deploy-Preview:
    runs-on: ubuntu-latest
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
        with:
          persist-credentials: false

      - name: Setup node
        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm"
          cache-dependency-path: ./web/package-lock.json

      - name: Pull Vercel Environment Information
        run: npx --yes ${{ env.VERCEL_CLI }} pull --yes --environment=preview --token=${{ secrets.VERCEL_TOKEN }}

      - name: Build Project Artifacts
        run: npx --yes ${{ env.VERCEL_CLI }} build --token=${{ secrets.VERCEL_TOKEN }}

      - name: Deploy Project Artifacts to Vercel
        id: deploy
        run: |
          DEPLOYMENT_URL=$(npx --yes ${{ env.VERCEL_CLI }} deploy --prebuilt --token=${{ secrets.VERCEL_TOKEN }})
          echo "url=$DEPLOYMENT_URL" >> "$GITHUB_OUTPUT"

      - name: Update PR comment with deployment URL
        if: always() && steps.deploy.outputs.url
        env:
          GH_TOKEN: ${{ github.token }}
          DEPLOYMENT_URL: ${{ steps.deploy.outputs.url }}
        run: |
          # Find the PR for this branch
          PR_NUMBER=$(gh pr list --head "$GITHUB_REF_NAME" --json number --jq '.[0].number')
          if [ -z "$PR_NUMBER" ]; then
            echo "No open PR found for branch $GITHUB_REF_NAME, skipping comment."
            exit 0
          fi

          COMMENT_MARKER="<!-- preview-deployment -->"
          COMMENT_BODY="$COMMENT_MARKER
          **Preview Deployment**

          | Status | Preview | Commit | Updated |
          | --- | --- | --- | --- |
          | ✅ |  $DEPLOYMENT_URL | \`${GITHUB_SHA::7}\` | $(date -u '+%Y-%m-%d %H:%M:%S UTC') |"

          # Find existing comment by marker
          EXISTING_COMMENT_ID=$(gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" \
            --jq ".[] | select(.body | startswith(\"$COMMENT_MARKER\")) | .id" | head -1)

          if [ -n "$EXISTING_COMMENT_ID" ]; then
            gh api "repos/$GITHUB_REPOSITORY/issues/comments/$EXISTING_COMMENT_ID" \
              --method PATCH --field body="$COMMENT_BODY"
          else
            gh pr comment "$PR_NUMBER" --body "$COMMENT_BODY"
          fi


================================================
FILE: .github/workflows/release-cli.yml
================================================
name: Release CLI

on:
  push:
    tags:
      - "cli/v*.*.*"

jobs:
  pypi:
    runs-on: ubuntu-latest
    environment:
      name: release-cli
    permissions:
      id-token: write
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
      - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"
      - run: |
          for goos in linux windows darwin; do
            for goarch in amd64 arm64; do
              GOOS="$goos" GOARCH="$goarch" uv build --wheel
            done
          done
        working-directory: cli
      - run: uv publish
        working-directory: cli

  docker-amd64:
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-cli-amd64
      - extras=ecr-cache
    environment: deploy
    permissions:
      id-token: write
    timeout-minutes: 30
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-cli
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v6.0.0
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2.0.10
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # ratchet:docker/setup-buildx-action@v4

      - name: Login to Docker Hub
        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # ratchet:docker/login-action@v4
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
        uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # ratchet:docker/build-push-action@v7
        with:
          context: ./cli
          file: ./cli/Dockerfile
          platforms: linux/amd64
          cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: type=inline
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true

  docker-arm64:
    runs-on:
      - runs-on
      - runner=2cpu-linux-arm64
      - run-id=${{ github.run_id }}-cli-arm64
      - extras=ecr-cache
    environment: deploy
    permissions:
      id-token: write
    timeout-minutes: 30
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-cli
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v6.0.0
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2.0.10
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # ratchet:docker/setup-buildx-action@v4

      - name: Login to Docker Hub
        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # ratchet:docker/login-action@v4
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
        uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # ratchet:docker/build-push-action@v7
        with:
          context: ./cli
          file: ./cli/Dockerfile
          platforms: linux/arm64
          cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: type=inline
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true

  merge-docker:
    needs:
      - docker-amd64
      - docker-arm64
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-cli-merge
    environment: deploy
    permissions:
      id-token: write
    timeout-minutes: 10
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-cli
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v6.0.0
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2.0.10
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # ratchet:docker/setup-buildx-action@v4

      - name: Login to Docker Hub
        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # ratchet:docker/login-action@v4
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Create and push manifest
        env:
          AMD64_DIGEST: ${{ needs.docker-amd64.outputs.digest }}
          ARM64_DIGEST: ${{ needs.docker-arm64.outputs.digest }}
          TAG: ${{ github.ref_name }}
        run: |
          SANITIZED_TAG="${TAG#cli/}"
          IMAGES=(
            "${REGISTRY_IMAGE}@${AMD64_DIGEST}"
            "${REGISTRY_IMAGE}@${ARM64_DIGEST}"
          )

          if [[ "$TAG" =~ ^cli/v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
            docker buildx imagetools create \
              -t "${REGISTRY_IMAGE}:${SANITIZED_TAG}" \
              -t "${REGISTRY_IMAGE}:latest" \
              "${IMAGES[@]}"
          else
            docker buildx imagetools create \
              -t "${REGISTRY_IMAGE}:${SANITIZED_TAG}" \
              "${IMAGES[@]}"
          fi


================================================
FILE: .github/workflows/release-devtools.yml
================================================
name: Release Devtools

on:
  push:
    tags:
      - "ods/v*.*.*"

jobs:
  pypi:
    runs-on: ubuntu-latest
    environment:
      name: release-devtools
    permissions:
      id-token: write
    timeout-minutes: 10
    strategy:
      matrix:
        os-arch:
          - { goos: "linux", goarch: "amd64" }
          - { goos: "linux", goarch: "arm64" }
          - { goos: "windows", goarch: "amd64" }
          - { goos: "windows", goarch: "arm64" }
          - { goos: "darwin", goarch: "amd64" }
          - { goos: "darwin", goarch: "arm64" }
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
      - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"
      - run: |
          GOOS="${{ matrix.os-arch.goos }}" \
          GOARCH="${{ matrix.os-arch.goarch }}" \
          uv build --wheel
        working-directory: tools/ods
      - run: uv publish
        working-directory: tools/ods


================================================
FILE: .github/workflows/reusable-nightly-llm-provider-chat.yml
================================================
name: Reusable Nightly LLM Provider Chat Tests

on:
  workflow_call:
    inputs:
      openai_models:
        description: "Comma-separated models for openai"
        required: false
        default: ""
        type: string
      anthropic_models:
        description: "Comma-separated models for anthropic"
        required: false
        default: ""
        type: string
      bedrock_models:
        description: "Comma-separated models for bedrock"
        required: false
        default: ""
        type: string
      vertex_ai_models:
        description: "Comma-separated models for vertex_ai"
        required: false
        default: ""
        type: string
      azure_models:
        description: "Comma-separated models for azure"
        required: false
        default: ""
        type: string
      ollama_models:
        description: "Comma-separated models for ollama_chat"
        required: false
        default: ""
        type: string
      openrouter_models:
        description: "Comma-separated models for openrouter"
        required: false
        default: ""
        type: string
      azure_api_base:
        description: "API base for azure provider"
        required: false
        default: ""
        type: string
      strict:
        description: "Default NIGHTLY_LLM_STRICT passed to tests"
        required: false
        default: true
        type: boolean
    secrets:
      AWS_OIDC_ROLE_ARN:
        description: "AWS role ARN for OIDC auth"
        required: true

permissions:
  contents: read
  id-token: write

jobs:
  build-backend-image:
    runs-on:
      [
        runs-on,
        runner=1cpu-linux-arm64,
        "run-id=${{ github.run_id }}-build-backend-image",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
    environment: ci-protected
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, test/docker-username
            DOCKER_TOKEN, test/docker-token

      - name: Build backend image
        uses: ./.github/actions/build-backend-image
        with:
          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
          ref-name: ${{ github.ref_name }}
          pr-number: ${{ github.event.pull_request.number }}
          github-sha: ${{ github.sha }}
          run-id: ${{ github.run_id }}
          docker-username: ${{ env.DOCKER_USERNAME }}
          docker-token: ${{ env.DOCKER_TOKEN }}
          docker-no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' && 'true' || 'false' }}

  build-model-server-image:
    runs-on:
      [
        runs-on,
        runner=1cpu-linux-arm64,
        "run-id=${{ github.run_id }}-build-model-server-image",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
    environment: ci-protected
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, test/docker-username
            DOCKER_TOKEN, test/docker-token

      - name: Build model server image
        uses: ./.github/actions/build-model-server-image
        with:
          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
          ref-name: ${{ github.ref_name }}
          pr-number: ${{ github.event.pull_request.number }}
          github-sha: ${{ github.sha }}
          run-id: ${{ github.run_id }}
          docker-username: ${{ env.DOCKER_USERNAME }}
          docker-token: ${{ env.DOCKER_TOKEN }}

  build-integration-image:
    runs-on:
      [
        runs-on,
        runner=2cpu-linux-arm64,
        "run-id=${{ github.run_id }}-build-integration-image",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
    environment: ci-protected
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, test/docker-username
            DOCKER_TOKEN, test/docker-token

      - name: Build integration image
        uses: ./.github/actions/build-integration-image
        with:
          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
          ref-name: ${{ github.ref_name }}
          pr-number: ${{ github.event.pull_request.number }}
          github-sha: ${{ github.sha }}
          run-id: ${{ github.run_id }}
          docker-username: ${{ env.DOCKER_USERNAME }}
          docker-token: ${{ env.DOCKER_TOKEN }}

  provider-chat-test:
    needs:
      [
        build-backend-image,
        build-model-server-image,
        build-integration-image,
      ]
    strategy:
      fail-fast: false
      matrix:
        include:
          - provider: openai
            models: ${{ inputs.openai_models }}
            api_key_env: OPENAI_API_KEY
            custom_config_env: ""
            api_base: ""
            api_version: ""
            deployment_name: ""
            required: true
          - provider: anthropic
            models: ${{ inputs.anthropic_models }}
            api_key_env: ANTHROPIC_API_KEY
            custom_config_env: ""
            api_base: ""
            api_version: ""
            deployment_name: ""
            required: true
          - provider: bedrock
            models: ${{ inputs.bedrock_models }}
            api_key_env: BEDROCK_API_KEY
            custom_config_env: ""
            api_base: ""
            api_version: ""
            deployment_name: ""
            required: false
          - provider: vertex_ai
            models: ${{ inputs.vertex_ai_models }}
            api_key_env: ""
            custom_config_env: NIGHTLY_LLM_VERTEX_AI_CUSTOM_CONFIG_JSON
            api_base: ""
            api_version: ""
            deployment_name: ""
            required: false
          - provider: azure
            models: ${{ inputs.azure_models }}
            api_key_env: AZURE_API_KEY
            custom_config_env: ""
            api_base: ${{ inputs.azure_api_base }}
            api_version: "2025-04-01-preview"
            deployment_name: ""
            required: false
          - provider: ollama_chat
            models: ${{ inputs.ollama_models }}
            api_key_env: OLLAMA_API_KEY
            custom_config_env: ""
            api_base: "https://ollama.com"
            api_version: ""
            deployment_name: ""
            required: false
          - provider: openrouter
            models: ${{ inputs.openrouter_models }}
            api_key_env: OPENROUTER_API_KEY
            custom_config_env: ""
            api_base: "https://openrouter.ai/api/v1"
            api_version: ""
            deployment_name: ""
            required: false
    runs-on:
      - runs-on
      - runner=4cpu-linux-arm64
      - "run-id=${{ github.run_id }}-nightly-${{ matrix.provider }}-provider-chat-test"
      - extras=ecr-cache
    timeout-minutes: 45
    environment: ci-protected
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          # Keep JSON values unparsed so vertex custom config is passed as raw JSON.
          parse-json-secrets: false
          secret-ids: |
            DOCKER_USERNAME, test/docker-username
            DOCKER_TOKEN, test/docker-token
            OPENAI_API_KEY, test/openai-api-key
            ANTHROPIC_API_KEY, test/anthropic-api-key
            BEDROCK_API_KEY, test/bedrock-api-key
            NIGHTLY_LLM_VERTEX_AI_CUSTOM_CONFIG_JSON, test/nightly-llm-vertex-ai-custom-config-json
            AZURE_API_KEY, test/azure-api-key
            OLLAMA_API_KEY, test/ollama-api-key
            OPENROUTER_API_KEY, test/openrouter-api-key

      - name: Run nightly provider chat test
        uses: ./.github/actions/run-nightly-provider-chat-test
        with:
          provider: ${{ matrix.provider }}
          models: ${{ matrix.models }}
          provider-api-key: ${{ matrix.api_key_env && env[matrix.api_key_env] || '' }}
          strict: ${{ inputs.strict && 'true' || 'false' }}
          api-base: ${{ matrix.api_base }}
          api-version: ${{ matrix.api_version }}
          deployment-name: ${{ matrix.deployment_name }}
          custom-config-json: ${{ matrix.custom_config_env && env[matrix.custom_config_env] || '' }}
          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
          run-id: ${{ github.run_id }}
          docker-username: ${{ env.DOCKER_USERNAME }}
          docker-token: ${{ env.DOCKER_TOKEN }}

      - name: Dump API server logs
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

      - name: Dump all-container logs
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs-nightly-${{ matrix.provider }}-llm-provider
          path: |
            ${{ github.workspace }}/api_server.log
            ${{ github.workspace }}/docker-compose.log

      - name: Stop Docker containers
        if: always()
        run: |
          cd deployment/docker_compose
          docker compose down -v


================================================
FILE: .github/workflows/sandbox-deployment.yml
================================================
name: Build and Push Sandbox Image on Tag

on:
  push:
    tags:
      - "experimental-cc4a.*"

# Restrictive defaults; jobs declare what they need.
permissions: {}

jobs:
  check-sandbox-changes:
    runs-on: ubuntu-slim
    timeout-minutes: 10
    permissions:
      contents: read
    outputs:
      sandbox-changed: ${{ steps.check.outputs.sandbox-changed }}
      new-version: ${{ steps.version.outputs.new-version }}
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
          fetch-depth: 0

      - name: Check for sandbox-relevant file changes
        id: check
        run: |
          # Get the previous tag to diff against
          CURRENT_TAG="${GITHUB_REF_NAME}"
          PREVIOUS_TAG=$(git tag --sort=-creatordate | grep '^experimental-cc4a\.' | grep -v "^${CURRENT_TAG}$" | head -n 1)

          if [ -z "$PREVIOUS_TAG" ]; then
            echo "No previous experimental-cc4a tag found, building unconditionally"
            echo "sandbox-changed=true" >> "$GITHUB_OUTPUT"
            exit 0
          fi

          echo "Comparing ${PREVIOUS_TAG}..${CURRENT_TAG}"

          # Check if any sandbox-relevant files changed
          SANDBOX_PATHS=(
            "backend/onyx/server/features/build/sandbox/"
          )

          CHANGED=false
          for path in "${SANDBOX_PATHS[@]}"; do
            if git diff --name-only "${PREVIOUS_TAG}..${CURRENT_TAG}" -- "$path" | grep -q .; then
              echo "Changes detected in: $path"
              CHANGED=true
              break
            fi
          done

          echo "sandbox-changed=$CHANGED" >> "$GITHUB_OUTPUT"

      - name: Determine new sandbox version
        id: version
        if: steps.check.outputs.sandbox-changed == 'true'
        run: |
          # Query Docker Hub for the latest versioned tag
          LATEST_TAG=$(curl -s "https://hub.docker.com/v2/repositories/onyxdotapp/sandbox/tags?page_size=100" \
            | jq -r '.results[].name' \
            | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' \
            | sort -V \
            | tail -n 1)

          if [ -z "$LATEST_TAG" ]; then
            echo "No existing version tags found on Docker Hub, starting at 0.1.1"
            NEW_VERSION="0.1.1"
          else
            CURRENT_VERSION="${LATEST_TAG#v}"
            echo "Latest version on Docker Hub: $CURRENT_VERSION"

            # Increment patch version
            MAJOR=$(echo "$CURRENT_VERSION" | cut -d. -f1)
            MINOR=$(echo "$CURRENT_VERSION" | cut -d. -f2)
            PATCH=$(echo "$CURRENT_VERSION" | cut -d. -f3)
            NEW_PATCH=$((PATCH + 1))
            NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
          fi

          echo "New version: $NEW_VERSION"
          echo "new-version=$NEW_VERSION" >> "$GITHUB_OUTPUT"

  build-sandbox-amd64:
    needs: check-sandbox-changes
    if: needs.check-sandbox-changes.outputs.sandbox-changed == 'true'
    runs-on:
      - runs-on
      - runner=4cpu-linux-x64
      - run-id=${{ github.run_id }}-sandbox-amd64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    permissions:
      contents: read
      id-token: write
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/sandbox
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend/onyx/server/features/build/sandbox/kubernetes/docker
          file: ./backend/onyx/server/features/build/sandbox/kubernetes/docker/Dockerfile
          platforms: linux/amd64
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: |
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true

  build-sandbox-arm64:
    needs: check-sandbox-changes
    if: needs.check-sandbox-changes.outputs.sandbox-changed == 'true'
    runs-on:
      - runs-on
      - runner=4cpu-linux-arm64
      - run-id=${{ github.run_id }}-sandbox-arm64
      - extras=ecr-cache
    timeout-minutes: 90
    environment: release
    permissions:
      contents: read
      id-token: write
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
      REGISTRY_IMAGE: onyxdotapp/sandbox
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
            latest=false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend/onyx/server/features/build/sandbox/kubernetes/docker
          file: ./backend/onyx/server/features/build/sandbox/kubernetes/docker/Dockerfile
          platforms: linux/arm64
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: |
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
          cache-to: |
            type=inline
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true

  merge-sandbox:
    needs:
      - check-sandbox-changes
      - build-sandbox-amd64
      - build-sandbox-arm64
    runs-on:
      - runs-on
      - runner=2cpu-linux-x64
      - run-id=${{ github.run_id }}-merge-sandbox
      - extras=ecr-cache
    timeout-minutes: 30
    environment: release
    permissions:
      id-token: write
    env:
      REGISTRY_IMAGE: onyxdotapp/sandbox
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
        with:
          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
          aws-region: us-east-2

      - name: Get AWS Secrets
        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
        with:
          secret-ids: |
            DOCKER_USERNAME, deploy/docker-username
            DOCKER_TOKEN, deploy/docker-token
          parse-json-secrets: true

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
            latest=false
          tags: |
            type=raw,value=v${{ needs.check-sandbox-changes.outputs.new-version }}
            type=raw,value=latest

      - name: Create and push manifest
        env:
          IMAGE_REPO: ${{ env.REGISTRY_IMAGE }}
          AMD64_DIGEST: ${{ needs.build-sandbox-amd64.outputs.digest }}
          ARM64_DIGEST: ${{ needs.build-sandbox-arm64.outputs.digest }}
          META_TAGS: ${{ steps.meta.outputs.tags }}
        run: |
          IMAGES="${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}"
          docker buildx imagetools create \
            $(printf '%s\n' "${META_TAGS}" | xargs -I {} echo -t {}) \
            $IMAGES


================================================
FILE: .github/workflows/storybook-deploy.yml
================================================
name: Storybook Deploy
env:
  VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
  VERCEL_PROJECT_ID: prj_sG49mVsA25UsxIPhN2pmBJlikJZM
  VERCEL_CLI: vercel@50.14.1
  VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }}

concurrency:
  group: storybook-deploy-production
  cancel-in-progress: true

on:
  workflow_dispatch:
  push:
    branches:
      - main
    paths:
      - "web/lib/opal/**"
      - "web/src/refresh-components/**"
      - "web/.storybook/**"
      - "web/package.json"
      - "web/package-lock.json"
permissions:
  contents: read
jobs:
  Deploy-Storybook:
    runs-on: ubuntu-latest
    environment: ci-protected
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
        with:
          persist-credentials: false

      - name: Setup node
        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm"
          cache-dependency-path: ./web/package-lock.json

      - name: Install dependencies
        working-directory: web
        run: npm ci

      - name: Build Storybook
        working-directory: web
        run: npm run storybook:build

      - name: Deploy to Vercel (Production)
        working-directory: web
        run: npx --yes "$VERCEL_CLI" deploy storybook-static/ --prod --yes --token="$VERCEL_TOKEN"

  notify-slack-on-failure:
    needs: Deploy-Storybook
    if: always() && needs.Deploy-Storybook.result == 'failure'
    runs-on: ubuntu-latest
    environment: ci-protected
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
        with:
          persist-credentials: false
          sparse-checkout: .github/actions/slack-notify

      - name: Send Slack notification
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
          failed-jobs: "• Deploy-Storybook"
          title: "🚨 Storybook Deploy Failed"


================================================
FILE: .github/workflows/sync_foss.yml
================================================
name: Sync FOSS Repo

on:
  schedule:
    # Run daily at 3am PT (11am UTC during PST)
    - cron: '0 11 * * *'
  workflow_dispatch:

jobs:
  sync-foss:
    runs-on: ubuntu-latest
    environment: ci-protected
    timeout-minutes: 45
    permissions:
      contents: read
    steps:
      - name: Checkout main Onyx repo
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          fetch-depth: 0
          persist-credentials: false

      - name: Install git-filter-repo
        run: |
          sudo apt-get update && sudo apt-get install -y git-filter-repo

      - name: Configure SSH for deploy key
        env:
          FOSS_REPO_DEPLOY_KEY: ${{ secrets.FOSS_REPO_DEPLOY_KEY }}
        run: |
          mkdir -p ~/.ssh
          echo "$FOSS_REPO_DEPLOY_KEY" > ~/.ssh/id_ed25519
          chmod 600 ~/.ssh/id_ed25519
          ssh-keyscan github.com >> ~/.ssh/known_hosts

      - name: Set Git config
        run: |
          git config --global user.name "onyx-bot"
          git config --global user.email "bot@onyx.app"

      - name: Build FOSS version
        run: bash backend/scripts/make_foss_repo.sh

      - name: Push to FOSS repo
        env:
          FOSS_REPO_URL: git@github.com:onyx-dot-app/onyx-foss.git
        run: |
          cd /tmp/foss_repo
          git remote add public "$FOSS_REPO_URL"
          git push --force public main


================================================
FILE: .github/workflows/tag-nightly.yml
================================================
name: Nightly Tag Push

on:
  schedule:
    - cron: "0 10 * * *" # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
  workflow_dispatch:

permissions:
  contents: write # Allows pushing tags to the repository

jobs:
  create-and-push-tag:
    runs-on: ubuntu-slim
    environment: ci-protected
    timeout-minutes: 45

    steps:
      # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
      # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
      # implement here which needs an actual user's deploy key
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          ssh-key: "${{ secrets.DEPLOY_KEY }}"
          persist-credentials: true

      - name: Set up Git user
        run: |
          git config user.name "Onyx Bot [bot]"
          git config user.email "onyx-bot[bot]@onyx.app"

      - name: Check for existing nightly tag
        id: check_tag
        run: |
          if git tag --points-at HEAD --list "nightly-latest*" | grep -q .; then
            echo "A tag starting with 'nightly-latest' already exists on HEAD."
            echo "tag_exists=true" >> $GITHUB_OUTPUT
          else
            echo "No tag starting with 'nightly-latest' exists on HEAD."
            echo "tag_exists=false" >> $GITHUB_OUTPUT
          fi

      # don't tag again if HEAD already has a nightly-latest tag on it
      - name: Create Nightly Tag
        if: steps.check_tag.outputs.tag_exists == 'false'
        env:
          DATE: ${{ github.run_id }}
        run: |
          TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
          echo "Creating tag: $TAG_NAME"
          git tag $TAG_NAME

      - name: Push Tag
        if: steps.check_tag.outputs.tag_exists == 'false'
        run: |
          TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
          git push origin $TAG_NAME

      - name: Send Slack notification
        if: failure()
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
          title: "🚨 Nightly Tag Push Failed"
          ref-name: ${{ github.ref_name }}
          failed-jobs: "create-and-push-tag"


================================================
FILE: .github/workflows/zizmor.yml
================================================
name: Run Zizmor

on:
  push:
    branches: ["main"]
  pull_request:
    branches: ["**"]
    paths:
      - ".github/**"

permissions: {}

jobs:
  zizmor:
    name: zizmor
    runs-on: ubuntu-slim
    timeout-minutes: 45
    permissions:
      security-events: write # needed for SARIF uploads
    steps:
      - name: Checkout repository
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2
        with:
          persist-credentials: false

      - name: Install the latest version of uv
        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"

      - name: Run zizmor
        run: uv run --no-sync --with zizmor zizmor --format=sarif . > results.sarif
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Upload SARIF file
        uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab # ratchet:github/codeql-action/upload-sarif@codeql-bundle-v2.23.5
        with:
          sarif_file: results.sarif
          category: zizmor


================================================
FILE: .gitignore
================================================
# editors
.vscode/*
!/.vscode/env_template.txt
!/.vscode/env.web_template.txt
!/.vscode/launch.json
!/.vscode/tasks.template.jsonc
.zed
.cursor
!/.cursor/mcp.json
!/.cursor/skills/

# macos
.DS_store

# python
.venv
.mypy_cache
.idea

# testing
/web/test-results/
backend/onyx/agent_search/main/test_data.json
backend/tests/regression/answer_quality/test_data.json
backend/tests/regression/search_quality/eval-*
backend/tests/regression/search_quality/search_eval_config.yaml
backend/tests/regression/search_quality/*.json
backend/onyx/evals/data/
backend/onyx/evals/one_off/*.json
*.log
*.csv

# secret files
.env
jira_test_env
settings.json

# others
/deployment/data/nginx/app.conf
/deployment/data/nginx/mcp.conf.inc
/deployment/data/nginx/mcp_upstream.conf.inc
*.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml
*.egg-info

# Local .terraform directories
**/.terraform/*

# Local .tfstate files
*.tfstate
*.tfstate.*

# Local .terraform.lock.hcl file
.terraform.lock.hcl

node_modules

# MCP configs
.playwright-mcp

# plans
plans/


================================================
FILE: .greptile/config.json
================================================
{
    "labels": [],
    "comment": "",
    "fixWithAI": true,
    "hideFooter": false,
    "strictness": 3,
    "statusCheck": true,
    "commentTypes": [
      "logic",
      "syntax",
      "style"
    ],
    "instructions": "",
    "disabledLabels": [],
    "excludeAuthors": [
      "dependabot[bot]",
      "renovate[bot]"
    ],
    "ignoreKeywords": "",
    "ignorePatterns": "",
    "includeAuthors": [],
    "summarySection": {
      "included": true,
      "collapsible": false,
      "defaultOpen": false
    },
    "excludeBranches": [],
    "fileChangeLimit": 300,
    "includeBranches": [],
    "includeKeywords": "",
    "triggerOnUpdates": true,
    "updateExistingSummaryComment": true,
    "updateSummaryOnly": false,
    "issuesTableSection": {
      "included": true,
      "collapsible": false,
      "defaultOpen": false
    },
    "statusCommentsEnabled": true,
    "confidenceScoreSection": {
      "included": true,
      "collapsible": false
    },
    "sequenceDiagramSection": {
      "included": true,
      "collapsible": false,
      "defaultOpen": false
    },
    "shouldUpdateDescription": false,
    "rules": [
      {
        "scope": ["web/**"],
        "rule": "In Onyx's Next.js app, the `app/ee/admin/` directory is a filesystem convention for Enterprise Edition route overrides — it does NOT add an `/ee/` prefix to the URL. Both `app/admin/groups/page.tsx` and `app/ee/admin/groups/page.tsx` serve the same URL `/admin/groups`. Hardcoded `/admin/...` paths in router.push() calls are correct and do NOT break EE deployments. Do not flag hardcoded admin paths as bugs."
      },
      {
        "scope": ["web/**"],
        "rule": "In Onyx, each API key creates a unique user row in the database with a unique `user_id` (UUID). There is a 1:1 mapping between API keys and their backing user records. Multiple API keys do NOT share the same `user_id`. Do not flag potential duplicate row IDs when using `user_id` from API key descriptors."
      },
      {
        "scope": ["backend/**/*.py"],
        "rule": "Never raise HTTPException directly in business code. Use `raise OnyxError(OnyxErrorCode.XXX, \"message\")` from `onyx.error_handling.exceptions`. A global FastAPI exception handler converts OnyxError into structured JSON responses with {\"error_code\": \"...\", \"detail\": \"...\"}. Error codes are defined in `onyx.error_handling.error_codes.OnyxErrorCode`. For upstream errors with dynamic HTTP status codes, use `status_code_override`: `raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)`."
      }
    ]
}


================================================
FILE: .greptile/files.json
================================================
[
  {
    "scope": [],
    "path": "contributing_guides/best_practices.md",
    "description": "Best practices for contributing to the codebase"
  },
  {
    "scope": ["web/**"],
    "path": "web/AGENTS.md",
    "description": "Frontend coding standards for the web directory"
  },
  {
    "scope": ["web/**"],
    "path": "web/tests/README.md",
    "description": "Frontend testing guide and conventions"
  },
  {
    "scope": ["web/**"],
    "path": "web/CLAUDE.md",
    "description": "Single source of truth for frontend coding standards"
  },
  {
    "scope": ["web/**"],
    "path": "web/lib/opal/README.md",
    "description": "Opal component library usage guide"
  },
  {
    "scope": ["backend/**"],
    "path": "backend/tests/README.md",
    "description": "Backend testing guide covering all 4 test types, fixtures, and conventions"
  },
  {
    "scope": ["backend/onyx/connectors/**"],
    "path": "backend/onyx/connectors/README.md",
    "description": "Connector development guide covering design, interfaces, and required changes"
  },
  {
    "scope": [],
    "path": "CLAUDE.md",
    "description": "Project instructions and coding standards"
  },
  {
    "scope": [],
    "path": "backend/alembic/README.md",
    "description": "Migration guidance, including multi-tenant migration behavior"
  },
  {
    "scope": [],
    "path": "deployment/helm/charts/onyx/values-lite.yaml",
    "description": "Lite deployment Helm values and service assumptions"
  },
  {
    "scope": [],
    "path": "deployment/docker_compose/docker-compose.onyx-lite.yml",
    "description": "Lite deployment Docker Compose overlay and disabled service behavior"
  }
]


================================================
FILE: .greptile/rules.md
================================================
# Greptile Review Rules

## Type Annotations

Use explicit type annotations for variables to enhance code clarity, especially when moving type hints around in the code.

## Best Practices

Use the "Engineering Best Practices" section of `CONTRIBUTING.md` as core review context. Prefer consistency with existing patterns, fix issues in code you touch, avoid tacking new features onto muddy interfaces, fail loudly instead of silently swallowing errors, keep code strictly typed, preserve clear state boundaries, remove duplicate or dead logic, break up overly long functions, avoid hidden import-time side effects, respect module boundaries, and favor correctness-by-construction over relying on callers to use an API correctly.

## TODOs

Whenever a TODO is added, there must always be an associated name or ticket with that TODO in the style of `TODO(name): ...` or `TODO(1234): ...`

## Debugging Code

Remove temporary debugging code before merging to production, especially tenant-specific debugging logs.

## Hardcoded Booleans

When hardcoding a boolean variable to a constant value, remove the variable entirely and clean up all places where it's used rather than just setting it to a constant.

## Multi-tenant vs Single-tenant

Code changes must consider both multi-tenant and single-tenant deployments. In multi-tenant mode, preserve tenant isolation, ensure tenant context is propagated correctly, and avoid assumptions that only hold for a single shared schema or globally shared state. In single-tenant mode, avoid introducing unnecessary tenant-specific requirements or cloud-only control-plane dependencies.

## Nginx Routing — New Backend Routes

Whenever a new backend route is added that does NOT start with `/api`, it must also be explicitly added to ALL nginx configs:

- `deployment/helm/charts/onyx/templates/nginx-conf.yaml` (Helm/k8s)
- `deployment/data/nginx/app.conf.template` (docker-compose dev)
- `deployment/data/nginx/app.conf.template.prod` (docker-compose prod)
- `deployment/data/nginx/app.conf.template.no-letsencrypt` (docker-compose no-letsencrypt)

Routes not starting with `/api` are not caught by the existing `^/(api|openapi\.json)` location block and will fall through to `location /`, which proxies to the Next.js web server and returns an HTML 404. The new location block must be placed before the `/api` block. Examples of routes that need this treatment: `/scim`, `/mcp`.

## Full vs Lite Deployments

Code changes must consider both regular Onyx deployments and Onyx lite deployments. Lite deployments disable the vector DB, Redis, model servers, and background workers by default, use PostgreSQL-backed cache/auth/file storage, and rely on the API server to handle background work. Do not assume those services are available unless the code path is explicitly limited to full deployments.

## SWR Cache Keys — Always Use SWR_KEYS Registry

All `useSWR()` calls and `mutate()` calls in the frontend must reference the centralized `SWR_KEYS` registry in `web/src/lib/swr-keys.ts` instead of inline endpoint strings or local string constants. Never write `useSWR("/api/some/endpoint", ...)` or `mutate("/api/some/endpoint")` — always use the corresponding `SWR_KEYS.someEndpoint` constant. If the endpoint does not yet exist in the registry, add it there first. This applies to all variants of an endpoint (e.g. query-string variants like `?get_editable=true` must also be registered as their own key).


================================================
FILE: .pre-commit-config.yaml
================================================
default_install_hook_types:
  - pre-commit
  - post-checkout
  - post-merge
  - post-rewrite
repos:
  - repo: https://github.com/astral-sh/uv-pre-commit
    # From: https://github.com/astral-sh/uv-pre-commit/pull/53/commits/d30b4298e4fb63ce8609e29acdbcf4c9018a483c
    rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
    hooks:
      - id: uv-sync
        args: ["--locked", "--all-extras"]
      - id: uv-lock
      - id: uv-export
        name: uv-export default.txt
        args:
          [
            "--no-emit-project",
            "--no-default-groups",
            "--no-hashes",
            "--extra",
            "backend",
            "-o",
            "backend/requirements/default.txt",
          ]
        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
      - id: uv-export
        name: uv-export dev.txt
        args:
          [
            "--no-emit-project",
            "--no-default-groups",
            "--no-hashes",
            "--extra",
            "dev",
            "-o",
            "backend/requirements/dev.txt",
          ]
        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
      - id: uv-export
        name: uv-export ee.txt
        args:
          [
            "--no-emit-project",
            "--no-default-groups",
            "--no-hashes",
            "--extra",
            "ee",
            "-o",
            "backend/requirements/ee.txt",
          ]
        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
      - id: uv-export
        name: uv-export model_server.txt
        args:
          [
            "--no-emit-project",
            "--no-default-groups",
            "--no-hashes",
            "--extra",
            "model_server",
            "-o",
            "backend/requirements/model_server.txt",
          ]
        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
      - id: uv-run
        name: Check lazy imports
        args: ["--active", "--with=onyx-devtools", "ods", "check-lazy-imports"]
        pass_filenames: true
        files: ^backend/(?!\.venv/|scripts/).*\.py$
      # NOTE: This takes ~6s on a single, large module which is prohibitively slow.
      # - id: uv-run
      #   name: mypy
      #   args: ["--all-extras", "mypy"]
      #   pass_filenames: true
      #   files: ^backend/.*\.py$

  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
    hooks:
      - id: check-added-large-files
        name: Check for added large files
        args: ["--maxkb=1500"]

  - repo: https://github.com/rhysd/actionlint
    rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
    hooks:
      - id: actionlint

  - repo: https://github.com/psf/black
    rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
    hooks:
      - id: black
        language_version: python3.11

  # this is a fork which keeps compatibility with black
  - repo: https://github.com/wimglenn/reorder-python-imports-black
    rev: f55cd27f90f0cf0ee775002c2383ce1c7820013d # frozen: v3.14.0
    hooks:
      - id: reorder-python-imports
        args: ["--py311-plus", "--application-directories=backend/"]
        # need to ignore alembic files, since reorder-python-imports gets confused
        # and thinks that alembic is a local package since there is a folder
        # in the backend directory called `alembic`
        exclude: ^backend/alembic/

  # These settings will remove unused imports with side effects
  # Note: The repo currently does not and should not have imports with side effects
  - repo: https://github.com/PyCQA/autoflake
    rev: 0544741e2b4a22b472d9d93e37d4ea9153820bb1 # frozen: v2.3.1
    hooks:
      - id: autoflake
        args:
          [
            "--remove-all-unused-imports",
            "--remove-unused-variables",
            "--in-place",
            "--recursive",
          ]

  - repo: https://github.com/golangci/golangci-lint
    rev: 5d1e709b7be35cb2025444e19de266b056b7b7ee # frozen: v2.10.1
    hooks:
      - id: golangci-lint
        language_version: "1.26.1"
        entry: bash -c "find . -name go.mod -not -path './.venv/*' -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"

  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
    rev: 971923581912ef60a6b70dbf0c3e9a39563c9d47 # frozen: v0.11.4
    hooks:
      - id: ruff

  - repo: https://github.com/pre-commit/mirrors-prettier
    rev: ffb6a759a979008c0e6dff86e39f4745a2d9eac4 # frozen: v3.1.0
    hooks:
      - id: prettier
        types_or: [html, css, javascript, ts, tsx]
        language_version: system

  - repo: https://github.com/sirwart/ripsecrets
    rev: 7d94620933e79b8acaa0cd9e60e9864b07673d86 # frozen: v0.1.11
    hooks:
      - id: ripsecrets
        args:
          - --additional-pattern
          - ^sk-[A-Za-z0-9_\-]{20,}$

  - repo: local
    hooks:
      - id: terraform-fmt
        name: terraform fmt
        entry: terraform fmt -recursive
        language: system
        pass_filenames: false
        files: \.tf$

      - id: npm-install
        name: npm install
        description: "Automatically run 'npm install' after a checkout, pull or rebase"
        language: system
        entry: bash -c 'cd web && npm install --no-save'
        pass_filenames: false
        files: ^web/package(-lock)?\.json$
        stages: [post-checkout, post-merge, post-rewrite]
      - id: npm-install-check
        name: npm install --package-lock-only
        description: "Check the 'web/package-lock.json' is updated"
        language: system
        entry: bash -c 'cd web && npm install --package-lock-only'
        pass_filenames: false
        files: ^web/package(-lock)?\.json$

      # Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
      # This is a preview package - if it breaks:
      #   1. Try updating: cd web && npm update @typescript/native-preview
      #   2. Or fallback to tsc: replace 'tsgo' with 'tsc' below
      - id: typescript-check
        name: TypeScript type check
        entry: bash -c 'cd web && npx tsgo --noEmit --project tsconfig.types.json'
        language: system
        pass_filenames: false
        files: ^web/.*\.(ts|tsx)$


================================================
FILE: .prettierignore
================================================
backend/tests/integration/tests/pruning/website


================================================
FILE: .vscode/env.web_template.txt
================================================
# Copy this file to .env.web in the .vscode folder.
# Fill in the <REPLACE THIS> values as needed
# Web Server specific environment variables
# Minimal set needed for Next.js dev server

# Auth
AUTH_TYPE=basic
DEV_MODE=true

# Enable the full set of Danswer Enterprise Edition features.
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you
# are using this for local testing/development).
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=false

# Enable Onyx Craft
ENABLE_CRAFT=true


================================================
FILE: .vscode/env_template.txt
================================================
# Copy this file to .env in the .vscode folder.
# Fill in the <REPLACE THIS> values as needed; it is recommended to set the
# GEN_AI_API_KEY value to avoid having to set up an LLM in the UI.
# Also check out onyx/backend/scripts/restart_containers.sh for a script to
# restart the containers which Onyx relies on outside of VSCode/Cursor
# processes.


AUTH_TYPE=basic
# Recommended for basic auth - used for signing password reset and verification tokens
# Generate a secure value with: openssl rand -hex 32
USER_AUTH_SECRET=""
DEV_MODE=true


# Always keep these on for Dev.
# Logs model prompts, reasoning, and answer to stdout.
LOG_ONYX_MODEL_INTERACTIONS=False
# More verbose logging
LOG_LEVEL=debug


# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically).
OAUTH_CLIENT_ID=<REPLACE THIS>
OAUTH_CLIENT_SECRET=<REPLACE THIS>
OPENID_CONFIG_URL=<REPLACE THIS>
SAML_CONF_DIR=/<ABSOLUTE PATH TO ONYX>/onyx/backend/ee/onyx/configs/saml_config


# Generally not useful for dev, we don't generally want to set up an SMTP server
# for dev.
REQUIRE_EMAIL_VERIFICATION=False


# Set these so if you wipe the DB, you don't end up having to go through the UI
# every time.
GEN_AI_API_KEY=<REPLACE THIS>
OPENAI_API_KEY=<REPLACE THIS>
# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper.
GEN_AI_MODEL_VERSION=gpt-4o


# Python stuff
PYTHONPATH=../backend
PYTHONUNBUFFERED=1


# Enable the full set of Danswer Enterprise Edition features.
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you
# are using this for local testing/development).
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False


# S3 File Store Configuration (MinIO for local development)
S3_ENDPOINT_URL=http://localhost:9004
S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
S3_AWS_ACCESS_KEY_ID=minioadmin
S3_AWS_SECRET_ACCESS_KEY=minioadmin


# Show extra/uncommon connectors.
SHOW_EXTRA_CONNECTORS=True


# Local langsmith tracing
LANGSMITH_TRACING="true"
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
LANGSMITH_API_KEY=<REPLACE_THIS>
LANGSMITH_PROJECT=<REPLACE_THIS>


# Local Confluence OAuth testing
# OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>
# OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>
# NEXT_PUBLIC_TEST_ENV=True


# OpenSearch
# Arbitrary password is fine for local development.
OPENSEARCH_INITIAL_ADMIN_PASSWORD=<REPLACE THIS>


================================================
FILE: .vscode/launch.json
================================================
{
  // Use IntelliSense to learn about possible attributes.
  // Hover to view descriptions of existing attributes.
  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
  "version": "0.2.0",
  "compounds": [
    {
      // Dummy entry used to label the group
      "name": "--- Compound ---",
      "configurations": ["--- Individual ---"],
      "presentation": {
        "group": "1"
      }
    },
    {
      "name": "Run All Onyx Services",
      "configurations": [
        "Web Server",
        "Model Server",
        "API Server",
        "MCP Server",
        "Slack Bot",
        "Celery primary",
        "Celery light",
        "Celery heavy",
        "Celery docfetching",
        "Celery docprocessing",
        "Celery user_file_processing",
        "Celery beat"
      ],
      "presentation": {
        "group": "1"
      }
    },
    {
      "name": "Web / Model / API",
      "configurations": ["Web Server", "Model Server", "API Server"],
      "presentation": {
        "group": "1"
      }
    },
    {
      "name": "Celery",
      "configurations": [
        "Celery primary",
        "Celery light",
        "Celery heavy",
        "Celery kg_processing",
        "Celery monitoring",
        "Celery user_file_processing",
        "Celery docfetching",
        "Celery docprocessing",
        "Celery beat"
      ],
      "presentation": {
        "group": "1"
      },
      "stopAll": true
    }
  ],
  "configurations": [
    {
      // Dummy entry used to label the group
      "name": "--- Individual ---",
      "type": "node",
      "request": "launch",
      "presentation": {
        "group": "2",
        "order": 0
      }
    },
    {
      "name": "Web Server",
      "type": "node",
      "request": "launch",
      "cwd": "${workspaceRoot}/web",
      "runtimeExecutable": "npm",
      "envFile": "${workspaceFolder}/.vscode/.env.web",
      "runtimeArgs": ["run", "dev"],
      "presentation": {
        "group": "2"
      },
      "console": "integratedTerminal",
      "consoleTitle": "Web Server Console"
    },
    {
      "name": "Model Server",
      "consoleName": "Model Server",
      "type": "debugpy",
      "request": "launch",
      "module": "uvicorn",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1"
      },
      "args": ["model_server.main:app", "--reload", "--port", "9000"],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Model Server Console"
    },
    {
      "name": "API Server",
      "consoleName": "API Server",
      "type": "debugpy",
      "request": "launch",
      "module": "uvicorn",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1"
      },
      "args": ["onyx.main:app", "--reload", "--port", "8080"],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "API Server Console",
      "justMyCode": false
    },
    {
      "name": "Slack Bot",
      "consoleName": "Slack Bot",
      "type": "debugpy",
      "request": "launch",
      "program": "onyx/onyxbot/slack/listener.py",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Slack Bot Console"
    },
    {
      "name": "Discord Bot",
      "consoleName": "Discord Bot",
      "type": "debugpy",
      "request": "launch",
      "program": "onyx/onyxbot/discord/client.py",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Discord Bot Console"
    },
    {
      "name": "MCP Server",
      "consoleName": "MCP Server",
      "type": "debugpy",
      "request": "launch",
      "module": "uvicorn",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "MCP_SERVER_ENABLED": "true",
        "MCP_SERVER_PORT": "8090",
        "MCP_SERVER_CORS_ORIGINS": "http://localhost:*",
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1"
      },
      "args": [
        "onyx.mcp_server.api:mcp_app",
        "--reload",
        "--port",
        "8090",
        "--timeout-graceful-shutdown",
        "0"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "MCP Server Console"
    },
    {
      "name": "Celery primary",
      "type": "debugpy",
      "request": "launch",
      "module": "celery",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "INFO",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-A",
        "onyx.background.celery.versioned_apps.primary",
        "worker",
        "--pool=threads",
        "--concurrency=4",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=primary@%n",
        "-Q",
        "celery"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Celery primary Console"
    },
    {
      "name": "Celery light",
      "type": "debugpy",
      "request": "launch",
      "module": "celery",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "INFO",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-A",
        "onyx.background.celery.versioned_apps.light",
        "worker",
        "--pool=threads",
        "--concurrency=64",
        "--prefetch-multiplier=8",
        "--loglevel=INFO",
        "--hostname=light@%n",
        "-Q",
        "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup,opensearch_migration"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Celery light Console"
    },
    {
      "name": "Celery heavy",
      "type": "debugpy",
      "request": "launch",
      "module": "celery",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "INFO",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-A",
        "onyx.background.celery.versioned_apps.heavy",
        "worker",
        "--pool=threads",
        "--concurrency=4",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=heavy@%n",
        "-Q",
        "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Celery heavy Console",
      "justMyCode": false
    },
    {
      "name": "Celery kg_processing",
      "type": "debugpy",
      "request": "launch",
      "module": "celery",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "INFO",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-A",
        "onyx.background.celery.versioned_apps.kg_processing",
        "worker",
        "--pool=threads",
        "--concurrency=2",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=kg_processing@%n",
        "-Q",
        "kg_processing"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Celery kg_processing Console"
    },
    {
      "name": "Celery monitoring",
      "type": "debugpy",
      "request": "launch",
      "module": "celery",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "INFO",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-A",
        "onyx.background.celery.versioned_apps.monitoring",
        "worker",
        "--pool=threads",
        "--concurrency=1",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=monitoring@%n",
        "-Q",
        "monitoring"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Celery monitoring Console"
    },
    {
      "name": "Celery user_file_processing",
      "type": "debugpy",
      "request": "launch",
      "module": "celery",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "INFO",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-A",
        "onyx.background.celery.versioned_apps.user_file_processing",
        "worker",
        "--pool=threads",
        "--concurrency=2",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=user_file_processing@%n",
        "-Q",
        "user_file_processing,user_file_project_sync,user_file_delete"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Celery user_file_processing Console",
      "justMyCode": false
    },
    {
      "name": "Celery docfetching",
      "type": "debugpy",
      "request": "launch",
      "module": "celery",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-A",
        "onyx.background.celery.versioned_apps.docfetching",
        "worker",
        "--pool=threads",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=docfetching@%n",
        "-Q",
        "connector_doc_fetching"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Celery docfetching Console",
      "justMyCode": false
    },
    {
      "name": "Celery docprocessing",
      "type": "debugpy",
      "request": "launch",
      "module": "celery",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "ENABLE_MULTIPASS_INDEXING": "false",
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-A",
        "onyx.background.celery.versioned_apps.docprocessing",
        "worker",
        "--pool=threads",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=docprocessing@%n",
        "-Q",
        "docprocessing"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Celery docprocessing Console",
      "justMyCode": false
    },
    {
      "name": "Celery beat",
      "type": "debugpy",
      "request": "launch",
      "module": "celery",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-A",
        "onyx.background.celery.versioned_apps.beat",
        "beat",
        "--loglevel=INFO"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Celery beat Console"
    },
    {
      "name": "Pytest",
      "consoleName": "Pytest",
      "type": "debugpy",
      "request": "launch",
      "module": "pytest",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "-v"
        // Specify a specific module/test to run or provide nothing to run all tests
        // "tests/unit/onyx/llm/answering/test_prune_and_merge.py"
      ],
      "presentation": {
        "group": "2"
      },
      "consoleTitle": "Pytest Console"
    },
    {
      // Dummy entry used to label the group
      "name": "--- Tasks ---",
      "type": "node",
      "request": "launch",
      "presentation": {
        "group": "3",
        "order": 0
      }
    },
    {
      "name": "Clear and Restart External Volumes and Containers",
      "type": "node",
      "request": "launch",
      "runtimeExecutable": "bash",
      "runtimeArgs": [
        "${workspaceFolder}/backend/scripts/restart_containers.sh"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
      "presentation": {
        "group": "3"
      }
    },
    {
      "name": "Eval CLI",
      "type": "debugpy",
      "request": "launch",
      "program": "${workspaceFolder}/backend/onyx/evals/eval_cli.py",
      "cwd": "${workspaceFolder}/backend",
      "console": "integratedTerminal",
      "justMyCode": false,
      "envFile": "${workspaceFolder}/.vscode/.env",
      "presentation": {
        "group": "3"
      },
      "env": {
        "LOG_LEVEL": "INFO",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": ["--verbose"],
      "consoleTitle": "Eval CLI Console"
    },
    {
      // Celery jobs launched through a single background script (legacy)
      // Recommend using the "Celery (all)" compound launch instead.
      "name": "Background Jobs",
      "consoleName": "Background Jobs",
      "type": "debugpy",
      "request": "launch",
      "program": "scripts/dev_run_background_jobs.py",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "LOG_LEVEL": "DEBUG",
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      }
    },
    {
      "name": "Install Python Requirements",
      "type": "node",
      "request": "launch",
      "runtimeExecutable": "uv",
      "runtimeArgs": [
        "sync",
        "--all-extras"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
      "presentation": {
        "group": "3"
      }
    },
    {
      "name": "Build Sandbox Templates",
      "type": "debugpy",
      "request": "launch",
      "module": "onyx.server.features.build.sandbox.build_templates",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.vscode/.env",
      "env": {
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "console": "integratedTerminal",
      "presentation": {
        "group": "3"
      },
      "consoleTitle": "Build Sandbox Templates"
    },
    {
      // Dummy entry used to label the group
      "name": "--- Database ---",
      "type": "node",
      "request": "launch",
      "presentation": {
        "group": "4",
        "order": 0
      }
    },
    {
      "name": "Restore seeded database dump",
      "type": "node",
      "request": "launch",
      "runtimeExecutable": "uv",
      "runtimeArgs": [
        "run",
        "--with",
        "onyx-devtools",
        "ods",
        "db",
        "restore",
        "--fetch-seeded",
        "--yes"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
      "presentation": {
        "group": "4"
      }
    },
    {
      "name": "Clean restore seeded database dump (destructive)",
      "type": "node",
      "request": "launch",
      "runtimeExecutable": "uv",
      "runtimeArgs": [
        "run",
        "--with",
        "onyx-devtools",
        "ods",
        "db",
        "restore",
        "--fetch-seeded",
        "--clean",
        "--yes"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
      "presentation": {
        "group": "4"
      }
    },
    {
      "name": "Create database snapshot",
      "type": "node",
      "request": "launch",
      "runtimeExecutable": "uv",
      "runtimeArgs": [
        "run",
        "--with",
        "onyx-devtools",
        "ods",
        "db",
        "dump",
        "backup.dump"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
      "presentation": {
        "group": "4"
      }
    },
    {
      "name": "Clean restore database snapshot (destructive)",
      "type": "node",
      "request": "launch",
      "runtimeExecutable": "uv",
      "runtimeArgs": [
        "run",
        "--with",
        "onyx-devtools",
        "ods",
        "db",
        "restore",
        "--clean",
        "--yes",
        "backup.dump"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
      "presentation": {
        "group": "4"
      }
    },
    {
      "name": "Upgrade database to head revision",
      "type": "node",
      "request": "launch",
      "runtimeExecutable": "uv",
      "runtimeArgs": [
        "run",
        "--with",
        "onyx-devtools",
        "ods",
        "db",
        "upgrade"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
      "presentation": {
        "group": "4"
      }
    },
    {
      // script to generate the openapi schema
      "name": "Onyx OpenAPI Schema Generator",
      "type": "debugpy",
      "request": "launch",
      "program": "backend/scripts/onyx_openapi_schema.py",
      "cwd": "${workspaceFolder}",
      "envFile": "${workspaceFolder}/.env",
      "env": {
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "backend"
      },
      "args": ["--filename", "backend/generated/openapi.json", "--generate-python-client"]
    },
    {
      // script to debug multi tenant db issues
      "name": "Onyx DB Manager (Top Chunks)",
      "type": "debugpy",
      "request": "launch",
      "program": "scripts/debugging/onyx_db.py",
      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.env",
      "env": {
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
      "args": [
        "--password",
        "your_password_here",
        "--port",
        "5433",
        "--report",
        "top-chunks",
        "--filename",
        "generated/tenants_by_num_docs.csv"
      ]
    },
    {
      "name": "Debug React Web App in Chrome",
      "type": "chrome",
      "request": "launch",
      "url": "http://localhost:3000",
      "webRoot": "${workspaceFolder}/web"
    }
  ]
}


================================================
FILE: .vscode/tasks.template.jsonc
================================================
{
    "version": "2.0.0",
    "tasks": [
        {
            "type": "austin",
            "label": "Profile celery beat",
            "envFile": "${workspaceFolder}/.env",
            "options": {
              "cwd": "${workspaceFolder}/backend"
            },
            "command": [
                "sudo",
                "-E"
            ],
            "args": [
              "celery",
              "-A",
              "onyx.background.celery.versioned_apps.beat",
              "beat",
              "--loglevel=INFO"
            ]
        },
        {
            "type": "shell",
            "label": "Generate Onyx OpenAPI Python client",
            "cwd": "${workspaceFolder}/backend",
            "envFile": "${workspaceFolder}/.env",
            "options": {
              "cwd": "${workspaceFolder}/backend"
            },
            "command": [
                "openapi-generator"
            ],
            "args": [
                "generate",
                "-i",
                "generated/openapi.json",
                "-g",
                "python",
                "-o",
                "generated/onyx_openapi_client",
                "--package-name",
                "onyx_openapi_client",
            ]
        },
        {
            "type": "shell",
            "label": "Generate Typescript Fetch client (openapi-generator)",
            "envFile": "${workspaceFolder}/.env",
            "options": {
              "cwd": "${workspaceFolder}"
            },
            "command": [
                "openapi-generator"
            ],
            "args": [
                "generate",
                "-i",
                "backend/generated/openapi.json",
                "-g",
                "typescript-fetch",
                "-o",
                "${workspaceFolder}/web/src/lib/generated/onyx_api",
                "--additional-properties=disallowAdditionalPropertiesIfNotPresent=false,legacyDiscriminatorBehavior=false,supportsES6=true",
            ]
        },
        {
            "type": "shell",
            "label": "Generate TypeScript Client (openapi-ts)",
            "envFile": "${workspaceFolder}/.env",
            "options": {
              "cwd": "${workspaceFolder}/web"
            },
            "command": [
                "npx"
            ],
            "args": [
                "openapi-typescript",
                "../backend/generated/openapi.json",
                "--output",
                "./src/lib/generated/onyx-schema.ts",
            ]
        },
        {
            "type": "shell",
            "label": "Generate TypeScript Client (orval)",
            "envFile": "${workspaceFolder}/.env",
            "options": {
              "cwd": "${workspaceFolder}/web"
            },
            "command": [
                "npx"
            ],
            "args": [
            	"orval",
                "--config",
                "orval.config.js",
            ]
        }
    ]
}


================================================
FILE: AGENTS.md
================================================
# PROJECT KNOWLEDGE BASE

This file provides guidance to AI agents when working with code in this repository.

## KEY NOTES

- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
  to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
  `a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
  make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
  outside of those directories.

## Project Overview

**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.

### Background Workers (Celery)

Onyx uses Celery for asynchronous task processing with multiple specialized workers:

#### Worker Types

1. **Primary Worker** (`celery_app.py`)
   - Coordinates core background tasks and system-wide operations
   - Handles connector management, document sync, pruning, and periodic checks
   - Runs with 4 threads concurrency
   - Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync

2. **Docfetching Worker** (`docfetching`)
   - Fetches documents from external data sources (connectors)
   - Spawns docprocessing tasks for each document batch
   - Implements watchdog monitoring for stuck connectors
   - Configurable concurrency (default from env)

3. **Docprocessing Worker** (`docprocessing`)
   - Processes fetched documents through the indexing pipeline:
     - Upserts documents to PostgreSQL
     - Chunks documents and adds contextual information
     - Embeds chunks via model server
     - Writes chunks to Vespa vector database
     - Updates document metadata
   - Configurable concurrency (default from env)

4. **Light Worker** (`light`)
   - Handles lightweight, fast operations
   - Tasks: vespa operations, document permissions sync, external group sync
   - Higher concurrency for quick tasks

5. **Heavy Worker** (`heavy`)
   - Handles resource-intensive operations
   - Primary task: document pruning operations
   - Runs with 4 threads concurrency

6. **KG Processing Worker** (`kg_processing`)
   - Handles Knowledge Graph processing and clustering
   - Builds relationships between documents
   - Runs clustering algorithms
   - Configurable concurrency

7. **Monitoring Worker** (`monitoring`)
   - System health monitoring and metrics collection
   - Monitors Celery queues, process memory, and system status
   - Single thread (monitoring doesn't need parallelism)
   - Cloud-specific monitoring tasks

8. **User File Processing Worker** (`user_file_processing`)
   - Processes user-uploaded files
   - Handles user file indexing and project synchronization
   - Configurable concurrency

9. **Beat Worker** (`beat`)
   - Celery's scheduler for periodic tasks
   - Uses DynamicTenantScheduler for multi-tenant support
   - Schedules tasks like:
     - Indexing checks (every 15 seconds)
     - Connector deletion checks (every 20 seconds)
     - Vespa sync checks (every 20 seconds)
     - Pruning checks (every 20 seconds)
     - KG processing (every 60 seconds)
     - Monitoring tasks (every 5 minutes)
     - Cleanup tasks (hourly)

#### Key Features

- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
  middleware layer that automatically finds the appropriate tenant ID when sending tasks
  via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL

#### Important Notes

**Defining Tasks**:

- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
- Never enqueue a task without an expiration. Always supply `expires=` when
  sending tasks, either from the beat schedule or directly from another task. It
  should never be acceptable to submit code which enqueues tasks without an
  expiration, as doing so can lead to unbounded task queue growth.

**Defining APIs**:
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
function.

**Testing Updates**:
If you make any updates to a celery worker and you want to test these changes, you will need
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.

**Task Time Limits**:
Since all tasks are executed in thread pools, the time limit features of Celery are silently 
disabled and won't work. Timeout logic must be implemented within the task itself.

### Code Quality

```bash
# Install and run pre-commit hooks
pre-commit install
pre-commit run --all-files
```

NOTE: Always make sure everything is strictly typed (both in Python and Typescript).

## Architecture Overview

### Technology Stack

- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
- **Search**: Vespa vector database
- **Auth**: OAuth2, SAML, multi-provider support
- **AI/ML**: LangChain, LiteLLM, multiple embedding models

### Directory Structure

```
backend/
├── onyx/
│   ├── auth/                    # Authentication & authorization
│   ├── chat/                    # Chat functionality & LLM interactions
│   ├── connectors/              # Data source connectors
│   ├── db/                      # Database models & operations
│   ├── document_index/          # Vespa integration
│   ├── federated_connectors/    # External search connectors
│   ├── llm/                     # LLM provider integrations
│   └── server/                  # API endpoints & routers
├── ee/                          # Enterprise Edition features
├── alembic/                     # Database migrations
└── tests/                       # Test suites

web/
├── src/app/                     # Next.js app router pages
├── src/components/              # Reusable React components
└── src/lib/                     # Utilities & business logic
```

## Frontend Standards

Frontend standards for the `web/` and `desktop/` projects live in `web/AGENTS.md`.

## Database & Migrations

### Running Migrations

```bash
# Standard migrations
alembic upgrade head

# Multi-tenant (Enterprise)
alembic -n schema_private upgrade head
```

### Creating Migrations

```bash
# Create migration
alembic revision -m "description"

# Multi-tenant migration
alembic -n schema_private revision -m "description"
```

Write the migration manually and place it in the file that alembic creates when running the above command.

## Testing Strategy

First, you must activate the virtual environment with `source .venv/bin/activate`.

There are 4 main types of tests within Onyx:

### Unit Tests

These should not assume any Onyx/external services are available to be called.
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
write these for complex, isolated modules e.g. `citation_processing.py`.

To run them:

```bash
pytest -xv backend/tests/unit
```

### External Dependency Unit Tests

These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).

However, the actual Onyx containers are not running and with these tests we call the function to test directly.
We can also mock components/calls at will.

The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
with certain args, something that would be impossible with proper integration tests).

A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.

To run them:

```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
```

### Integration Tests

Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
verification is necessary) over any other type of test.

Tests are parallelized at a directory level.

When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
calling the utilities directly (e.g. do NOT create admin users with
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).

A great example of this type of test is `backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py`.

To run them:

```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```

### Playwright (E2E) Tests

These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, _including_ the Web Server.

Use these tests for anything that requires significant frontend <-> backend coordination.

Tests are located at `web/tests/e2e`. Tests are written in TypeScript.

To run them:

```bash
npx playwright test <TEST_NAME>
```

For shared fixtures, best practices, and detailed guidance, see `backend/tests/README.md`.

## Logs

When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
will be tailing their logs to this file.

## Security Considerations

- Never commit API keys or secrets to repository
- Use encrypted credential storage for connector credentials
- Follow RBAC patterns for new features
- Implement proper input validation with Pydantic models
- Use parameterized queries to prevent SQL injection

## AI/LLM Integration

- Multiple LLM providers supported via LiteLLM
- Configurable models per feature (chat, search, embeddings)
- Streaming support for real-time responses
- Token management and rate limiting
- Custom prompts and agent actions

## Creating a Plan

When creating a plan in the `plans` directory, make sure to include at least these elements:

**Issues to Address**
What the change is meant to do.

**Important Notes**
Things you come across in your research that are important to the implementation.

**Implementation strategy**
How you are going to make the changes happen. High level approach.

**Tests**
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.

Do NOT include these: _Timeline_, _Rollback plan_

This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
Keep it high level. You can reference certain files or functions though.

Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.

## Error Handling

**Always raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
Never hardcode status codes or use `starlette.status` / `fastapi.status` constants directly.**

A global FastAPI exception handler converts `OnyxError` into a JSON response with the standard
`{"error_code": "...", "detail": "..."}` shape. This eliminates boilerplate and keeps error
handling consistent across the entire backend.

```python
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError

# ✅ Good
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")

# ✅ Good — no extra message needed
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)

# ✅ Good — upstream service with dynamic status code
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)

# ❌ Bad — using HTTPException directly
raise HTTPException(status_code=404, detail="Session not found")

# ❌ Bad — starlette constant
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
```

Available error codes are defined in `backend/onyx/error_handling/error_codes.py`. If a new error
category is needed, add it there first — do not invent ad-hoc codes.

**Upstream service errors:** When forwarding errors from an upstream service where the HTTP
status code is dynamic (comes from the upstream response), use `status_code_override`:

```python
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=e.response.status_code)
```

## Best Practices

In addition to the other content in this file, best practices for contributing
to the codebase can be found in the "Engineering Best Practices" section of
`CONTRIBUTING.md`. Understand its contents and follow them.


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Onyx

Hey there! We are so excited that you're interested in Onyx.

## Table of Contents

- [Contribution Opportunities](#contribution-opportunities)
- [Contribution Process](#contribution-process)
- [Development Setup](#development-setup)
  - [Prerequisites](#prerequisites)
  - [Backend: Python Requirements](#backend-python-requirements)
  - [Frontend: Node Dependencies](#frontend-node-dependencies)
  - [Formatting and Linting](#formatting-and-linting)
- [Running the Application](#running-the-application)
  - [VSCode Debugger (Recommended)](#vscode-debugger-recommended)
  - [Manually Running for Development](#manually-running-for-development)
  - [Running in Docker](#running-in-docker)
- [macOS-Specific Notes](#macos-specific-notes)
- [Engineering Best Practices](#engineering-best-practices)
  - [Principles and Collaboration](#principles-and-collaboration)
  - [Style and Maintainability](#style-and-maintainability)
  - [Performance and Correctness](#performance-and-correctness)
  - [Repository Conventions](#repository-conventions)
- [Release Process](#release-process)
- [Getting Help](#getting-help)
- [Enterprise Edition Contributions](#enterprise-edition-contributions)

---

## Contribution Opportunities

The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to look for and share contribution ideas.

If you have your own feature that you would like to build, please create an issue and community members can provide feedback and upvote if they feel a common need.

---

## Contribution Process

To contribute, please follow the
["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.

### 1. Get the feature or enhancement approved

Create a GitHub issue and see if there are upvotes. If you feel the feature is sufficiently value-additive and you would like approval to contribute it to the repo, tag [Yuhong](https://github.com/yuhongsun96) to review.

If you do not get a response within a week, feel free to email yuhong@onyx.app and include the issue in the message.

Not all small features and enhancements will be accepted as there is a balance between feature richness and bloat. We strive to provide the best user experience possible so we have to be intentional about what we include in the app.

### 2. Get the design approved

The Onyx team will either provide a design doc and PRD for the feature or request one from you, the contributor. The scope and detail of the design will depend on the individual feature.

### 3. IP attribution for EE contributions

If you are contributing features to Onyx Enterprise Edition, you are required to sign the [IP Assignment Agreement](contributor_ip_assignment/EE_Contributor_IP_Assignment_Agreement.md).

### 4. Review and testing

Your features must pass all tests and all comments must be addressed prior to merging.

### Implicit agreements

If we approve an issue, we are promising you the following:
- Your work will receive timely attention and we will put aside other important items to ensure you are not blocked.
- You will receive necessary coaching on eng quality, system design, etc. to ensure the feature is completed well.
- The Onyx team will pull resources and bandwidth from design, PM, and engineering to ensure that you have all the resources to build the feature to the quality required for merging.

Because this is a large investment from our team, we ask that you:
- Thoroughly read all the requirements of the design docs, engineering best practices, and try to minimize overhead for the Onyx team.
- Complete the feature in a timely manner to reduce context switching and an ongoing resource pull from the Onyx team.

---

## Development Setup

Onyx being a fully functional app, relies on some external software, specifically:

- [Postgres](https://www.postgresql.org/) (Relational DB)
- [OpenSearch](https://opensearch.org/) (Vector DB/Search Engine)
- [Redis](https://redis.io/) (Cache)
- [MinIO](https://min.io/) (File Store)
- [Nginx](https://nginx.org/) (Not needed for development flows generally)

> **Note:**
> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software.
> We believe this combination is easier for development purposes. If you prefer to use pre-built container images, see [Running in Docker](#running-in-docker) below.

### Prerequisites

- **Python 3.11** — If using a lower version, modifications will have to be made to the code. Higher versions may have library compatibility issues.
- **Docker** — Required for running external services (Postgres, OpenSearch, Redis, MinIO).
- **Node.js v22** — We recommend using [nvm](https://github.com/nvm-sh/nvm) to manage Node installations.

### Backend: Python Requirements

We use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).

```bash
uv venv .venv --python 3.11
source .venv/bin/activate
```

_For Windows, activate the virtual environment using Command Prompt:_

```bash
.venv\Scripts\activate
```

If using PowerShell, the command slightly differs:

```powershell
.venv\Scripts\Activate.ps1
```

Install the required Python dependencies:

```bash
uv sync --all-extras
```

Install Playwright for Python (headless browser required by the Web Connector):

```bash
uv run playwright install
```

### Frontend: Node Dependencies

```bash
nvm install 22 && nvm use 22
node -v # verify your active version
```

Navigate to `onyx/web` and run:

```bash
npm i
```

### Formatting and Linting

#### Backend

Set up pre-commit hooks (black / reorder-python-imports):

```bash
uv run pre-commit install
```

We also use `mypy` for static type checking. Onyx is fully type-annotated, and we want to keep it that way! To run the mypy checks manually:

```bash
uv run mypy .  # from onyx/backend
```

#### Frontend

We use `prettier` for formatting. The desired version will be installed via `npm i` from the `onyx/web` directory. To run the formatter:

```bash
npx prettier --write .  # from onyx/web
```

Pre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail. Re-stage your changes and commit again.

---

## Running the Application

### VSCode Debugger (Recommended)

We highly recommend using VSCode's debugger for development.

#### Initial Setup

1. Copy `.vscode/env_template.txt` to `.vscode/.env`
2. Fill in the necessary environment variables in `.vscode/.env`

#### Using the Debugger

Before starting, make sure the Docker Daemon is running.

1. Open the Debug view in VSCode (Cmd+Shift+D on macOS)
2. From the dropdown at the top, select "Clear and Restart External Volumes and Containers" and press the green play button
3. From the dropdown at the top, select "Run All Onyx Services" and press the green play button
4. Navigate to http://localhost:3000 in your browser to start using the app
5. Set breakpoints by clicking to the left of line numbers to help debug while the app is running
6. Use the debug toolbar to step through code, inspect variables, etc.

> **Note:** "Clear and Restart External Volumes and Containers" will reset your Postgres and OpenSearch (relational-db and index). Only run this if you are okay with wiping your data.

**Features:**
- Hot reload is enabled for the web server and API servers
- Python debugging is configured with debugpy
- Environment variables are loaded from `.vscode/.env`
- Console output is organized in the integrated terminal with labeled tabs

### Manually Running for Development

#### Docker containers for external software

You will need Docker installed to run these containers.

Navigate to `onyx/deployment/docker_compose`, then start up Postgres/OpenSearch/Redis/MinIO with:

```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d index relational_db cache minio
```

(index refers to OpenSearch, relational_db refers to Postgres, and cache refers to Redis)

#### Running Onyx locally

To start the frontend, navigate to `onyx/web` and run:

```bash
npm run dev
```

Next, start the model server which runs the local NLP models. Navigate to `onyx/backend` and run:

```bash
uvicorn model_server.main:app --reload --port 9000
```

_For Windows (for compatibility with both PowerShell and Command Prompt):_

```bash
powershell -Command "uvicorn model_server.main:app --reload --port 9000"
```

The first time running Onyx, you will need to run the DB migrations for Postgres. After the first time, this is no longer required unless the DB models change.

Navigate to `onyx/backend` and with the venv active, run:

```bash
alembic upgrade head
```

Next, start the task queue which orchestrates the background jobs. Still in `onyx/backend`, run:

```bash
python ./scripts/dev_run_background_jobs.py
```

To run the backend API server, navigate back to `onyx/backend` and run:

```bash
AUTH_TYPE=basic uvicorn onyx.main:app --reload --port 8080
```

_For Windows (for compatibility with both PowerShell and Command Prompt):_

```bash
powershell -Command "
    $env:AUTH_TYPE='basic'
    uvicorn onyx.main:app --reload --port 8080
"
```

> **Note:** If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.

#### Wrapping up

You should now have 4 servers running:

- Web server
- Backend API
- Model server
- Background jobs

Now, visit http://localhost:3000 in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.

You've successfully set up a local Onyx instance!

### Running in Docker

You can run the full Onyx application stack from pre-built images including all external software dependencies.

Navigate to `onyx/deployment/docker_compose` and run:

```bash
docker compose up -d
```

After Docker pulls and starts these containers, navigate to http://localhost:3000 to use Onyx.

If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes:

```bash
docker compose up -d --build
```

---

## macOS-Specific Notes

### Setting up Python

Ensure [Homebrew](https://brew.sh/) is already set up, then install Python 3.11:

```bash
brew install python@3.11
```

Add Python 3.11 to your path by adding the following line to `~/.zshrc`:

```
export PATH="$(brew --prefix)/opt/python@3.11/libexec/bin:$PATH"
```

> **Note:** You will need to open a new terminal for the path change above to take effect.

### Setting up Docker

On macOS, you will need to install [Docker Desktop](https://www.docker.com/products/docker-desktop/) and ensure it is running before continuing with the docker commands.

### Formatting and Linting

macOS will likely require you to remove some quarantine attributes on some of the hooks for them to execute properly. After installing pre-commit, run the following command:

```bash
sudo xattr -r -d com.apple.quarantine ~/.cache/pre-commit
```

---

## Engineering Best Practices

> These are also what we adhere to as a team internally, we love to build in the open and to uplevel our community and each other through being transparent.

### Principles and Collaboration

- **Use 1-way vs 2-way doors.** For 2-way doors, move faster and iterate. For 1-way doors, be more deliberate.
- **Consistency > being "right."** Prefer consistent patterns across the codebase. If something is truly bad, fix it everywhere.
- **Fix what you touch (selectively).**
  - Don't feel obligated to fix every best-practice issue you notice.
  - Don't introduce new bad practices.
  - If your change touches code that violates best practices, fix it as part of the change.
- **Don't tack features on.** When adding functionality, restructure logically as needed to avoid muddying interfaces and accumulating tech debt.

### Style and Maintainability

#### Comments and readability
Add clear comments:
- At logical boundaries (e.g., interfaces) so the reader doesn't need to dig 10 layers deeper.
- Wherever assumptions are made or something non-obvious/unexpected is done.
- For complicated flows/functions.
- Wherever it saves time (e.g., nontrivial regex patterns).

#### Errors and exceptions
- **Fail loudly** rather than silently skipping work.
  - Example: raise and let exceptions propagate instead of silently dropping a document.
- **Don't overuse `try/except`.**
  - Put `try/except` at the correct logical level.
  - Do not mask exceptions unless it is clearly appropriate.

#### Typing
- Everything should be **as strictly typed as possible**.
- Use `cast` for annoying/loose-typed interfaces (e.g., results of `run_functions_tuples_in_parallel`).
  - Only `cast` when the type checker sees `Any` or types are too loose.
- Prefer types that are easy to read.
  - Avoid dense types like `dict[tuple[str, str], list[list[float]]]`.
  - Prefer domain models, e.g.:
    - `EmbeddingModel(provider_name, model_name)` as a Pydantic model
    - `dict[EmbeddingModel, list[EmbeddingVector]]`

#### State, objects, and boundaries
- Keep **clear logical boundaries** for state containers and objects.
- A **config** object should never contain things like a `db_session`.
- Avoid state containers that are overly nested, or huge + flat (use judgment).
- Prefer **composition and functional style** over inheritance/OOP.
- Prefer **no mutation** unless there's a strong reason.
- State objects should be **intentional and explicit**, ideally nonmutating.
- Use interfaces/objects to create clear separation of responsibility.
- Prefer simplicity when there's no clear gain.
  - Avoid overcomplicated mechanisms like semaphores.
  - Prefer **hash maps (dicts)** over tree structures unless there's a strong reason.

#### Naming
- Name variables carefully and intentionally.
- Prefer long, explicit names when undecided.
- Avoid single-character variables except for small, self-contained utilities (or not at all).
- Keep the same object/name consistent through the call stack and within functions when reasonable.
  - Good: `for token in tokens:`
  - Bad: `for msg in tokens:` (if iterating tokens)
- Function names should bias toward **long + descriptive** for codebase search.
  - IntelliSense can miss call sites; search works best with unique names.

#### Correctness by construction
- Prefer self-contained correctness — don't rely on callers to "use it right" if you can make misuse hard.
- Avoid redundancies: if a function takes an arg, it shouldn't also take a state object that contains that same arg.
- No dead code (unless there's a very good reason).
- No commented-out code in main or feature branches (unless there's a very good reason).
- No duplicate logic:
  - Don't copy/paste into branches when shared logic can live above the conditional.
  - If you're afraid to touch the original, you don't understand it well enough.
  - LLMs often create subtle duplicate logic — review carefully and remove it.
  - Avoid "nearly identical" objects that confuse when to use which.
- Avoid extremely long functions with chained logic:
  - Encapsulate steps into helpers for readability, even if not reused.
  - "Pythonic" multi-step expressions are OK in moderation; don't trade clarity for cleverness.

### Performance and Correctness

- Avoid holding resources for extended periods (DB sessions, locks/semaphores).
- Validate objects on creation and right before use.
- Connector code (data to Onyx documents):
  - Any in-memory structure that can grow without bound based on input must be periodically size-checked.
  - If a connector is OOMing (often shows up as "missing celery tasks"), this is a top thing to check retroactively.
- Async and event loops:
  - Never introduce new async/event loop Python code, and try to make existing async code synchronous when possible if it makes sense.
  - Writing async code without 100% understanding the code and having a concrete reason to do so is likely to introduce bugs and not add any meaningful performance gains.

### Repository Conventions

#### Where code lives
- Pydantic + data models: `models.py` files.
- DB interface functions (excluding lazy loading): `db/` directory.
- LLM prompts: `prompts/` directory, roughly mirroring the code layout that uses them.
- API routes: `server/` directory.

#### Pydantic and modeling
- Prefer **Pydantic** over dataclasses.
- If absolutely required, use `allow_arbitrary_types`.

#### Data conventions
- Prefer explicit `None` over sentinel empty strings (usually; depends on intent).
- Prefer explicit identifiers: use string enums instead of integer codes.
- Avoid magic numbers (co-location is good when necessary). **Always avoid magic strings.**

#### Logging
- Log messages where they are created.
- Don't propagate log messages around just to log them elsewhere.

#### Encapsulation
- Don't use private attributes/methods/properties from other classes/modules.
- "Private" is private — respect that boundary.

#### SQLAlchemy guidance
- Lazy loading is often bad at scale, especially across multiple list relationships.
- Be careful when accessing SQLAlchemy object attributes:
  - It can help avoid redundant DB queries,
  - but it can also fail if accessed outside an active session,
  - and lazy loading can add hidden DB dependencies to otherwise "simple" functions.
- Reference: https://www.reddit.com/r/SQLAlchemy/comments/138f248/joinedload_vs_selectinload/

#### Trunk-based development and feature flags
- **PRs should contain no more than 500 lines of real change.**
- **Merge to main frequently.** Avoid long-lived feature branches — they create merge conflicts and integration pain.
- **Use feature flags for incremental rollout.**
  - Large features should be merged in small, shippable increments behind a flag.
  - This allows continuous integration without exposing incomplete functionality.
- **Keep flags short-lived.** Once a feature is fully rolled out, remove the flag and dead code paths promptly.
- **Flag at the right level.** Prefer flagging at API/UI entry points rather than deep in business logic.
- **Test both flag states.** Ensure the codebase works correctly with the flag on and off.

#### Miscellaneous
- Any TODOs you add in the code must be accompanied by either the name/username of the owner of that TODO, or an issue number for an issue referencing that piece of work.
- Avoid module-level logic that runs on import, which leads to import-time side effects. Essentially every piece of meaningful logic should exist within some function that has to be explicitly invoked. Acceptable exceptions may include loading environment variables or setting up loggers.
  - If you find yourself needing something like this, you may want that logic to exist in a file dedicated for manual execution (contains `if __name__ == "__main__":`) which should not be imported by anything else.
- Do not conflate Python scripts you intend to run from the command line (contains `if __name__ == "__main__":`) with modules you intend to import from elsewhere. If for some unlikely reason they have to be the same file, any logic specific to executing the file (including imports) should be contained in the `if __name__ == "__main__":` block.
  - Generally these executable files exist in `backend/scripts/`.

---

## Release Process

Onyx loosely follows the SemVer versioning standard.
A set of Docker containers will be pushed automatically to DockerHub with every tag.
You can see the containers [here](https://hub.docker.com/search?q=onyx%2F).

---

## Getting Help

We have support channels and generally interesting discussions on our [Discord](https://discord.gg/4NA5SbzrWb).

See you there!

---

## Enterprise Edition Contributions

If you are contributing features to Onyx Enterprise Edition (code under any `ee/` directory), you are required to sign the [IP Assignment Agreement](contributor_ip_assignment/EE_Contributor_IP_Assignment_Agreement.md) ([PDF version](contributor_ip_assignment/EE_Contributor_IP_Assignment_Agreement.pdf)).


================================================
FILE: LICENSE
================================================
Copyright (c) 2023-present DanswerAI, Inc.

Portions of this software are licensed as follows:

- All content that resides under "ee" directories of this repository is licensed under the Onyx Enterprise License. Each ee directory contains an identical copy of this license at its root:
  - backend/ee/LICENSE
  - web/src/app/ee/LICENSE
  - web/src/ee/LICENSE
- All third party components incorporated into the Onyx Software are licensed under the original license provided by the owner of the applicable component.
- Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
<a name="readme-top"></a>

<h2 align="center">
    <a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true" /></a>
</h2>

<p align="center">
    <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord" />
    </a>
    <a href="https://docs.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
        <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation" />
    </a>
    <a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
        <img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation" />
    </a>
    <a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE" target="_blank">
        <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License" />
    </a>
</p>

<p align="center">
  <a href="https://trendshift.io/repositories/12516" target="_blank">
    <img src="https://trendshift.io/api/badge/repositories/12516" alt="onyx-dot-app/onyx | Trendshift" style="width: 250px; height: 55px;" />
  </a>
</p>

# Onyx - The Open Source AI Platform

**[Onyx](https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)** is the application layer for LLMs - bringing a feature-rich interface that can be easily hosted by anyone.
Onyx enables LLMs through advanced capabilities like RAG, web search, code execution, file creation, deep research and more.

Connect your applications with over 50+ indexing based connectors provided out of the box or via MCP.

> [!TIP]
> Deploy with a single command:
> ```
> curl -fsSL https://onyx.app/install_onyx.sh | bash
> ```

![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v3.0.0/Onyx.gif)

---

## ⭐ Features

- **🔍 Agentic RAG:** Get best in class search and answer quality based on hybrid index + AI Agents for information retrieval
  - Benchmark to release soon!
- **🔬 Deep Research:** Get in depth reports with a multi-step research flow.
  - Top of [leaderboard](https://github.com/onyx-dot-app/onyx_deep_research_bench) as of Feb 2026.
- **🤖 Custom Agents:** Build AI Agents with unique instructions, knowledge, and actions.
- **🌍 Web Search:** Browse the web to get up to date information.
  - Supports Serper, Google PSE, Brave, SearXNG, and others.
  - Comes with an in house web crawler and support for Firecrawl/Exa.
- **📄 Artifacts:** Generate documents, graphics, and other downloadable artifacts.
- **▶️ Actions & MCP:** Let Onyx agents interact with external applications, comes with flexible Auth options.
- **💻 Code Execution:** Execute code in a sandbox to analyze data, render graphs, or modify files.
- **🎙️ Voice Mode:** Chat with Onyx via text-to-speech and speech-to-text.
- **🎨 Image Generation:** Generate images based on user prompts.

Onyx supports all major LLM providers, both self-hosted (like Ollama, LiteLLM, vLLM, etc.) and proprietary (like Anthropic, OpenAI, Gemini, etc.).

To learn more - check out our [docs](https://docs.onyx.app/welcome?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)!

---

## 🚀 Deployment Modes

> Onyx supports deployments in Docker, Kubernetes, Helm/Terraform and provides guides for major cloud providers.
> Detailed deployment guides found [here](https://docs.onyx.app/deployment/overview).

Onyx supports two separate deployment options: standard and lite.

#### Onyx Lite

The Lite mode can be thought of as a lightweight Chat UI. It requires less resources (under 1GB memory) and runs a less complex stack.
It is great for users who want to test out Onyx quickly or for teams who are only interested in the Chat UI and Agents functionalities.

#### Standard Onyx

The complete feature set of Onyx which is recommended for serious users and larger teams. Additional components not included in Lite mode:
- Vector + Keyword index for RAG.
- Background containers to run job queues and workers for syncing knowledge from connectors.
- AI model inference servers to run deep learning models used during indexing and inference.
- Performance optimizations for large scale use via in memory cache (Redis) and blob store (MinIO).

> [!TIP]  
> **To try Onyx for free without deploying, visit [Onyx Cloud](https://cloud.onyx.app/signup?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)**.

---

## 🏢 Onyx for Enterprise

Onyx is built for teams of all sizes, from individual users to the largest global enterprises:
- 👥 Collaboration: Share chats and agents with other members of your organization.
- 🔐 Single Sign On: SSO via Google OAuth, OIDC, or SAML. Group syncing and user provisioning via SCIM.
- 🛡️ Role Based Access Control: RBAC for sensitive resources like access to agents, actions, etc.
- 📊 Analytics: Usage graphs broken down by teams, LLMs, or agents.
- 🕵️ Query History: Audit usage to ensure safe adoption of AI in your organization.
- 💻 Custom code: Run custom code to remove PII, reject sensitive queries, or to run custom analysis.
- 🎨 Whitelabeling: Customize the look and feel of Onyx with custom naming, icons, banners, and more.

## 📚 Licensing

There are two editions of Onyx:

- Onyx Community Edition (CE) is available freely under the MIT license and covers all of the core features for Chat, RAG, Agents, and Actions.
- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.

For feature details, check out [our website](https://www.onyx.app/pricing?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme).

## 👪 Community

Join our open source community on **[Discord](https://discord.gg/TDJ59cGV2X)**!

## 💡 Contributing

Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.


================================================
FILE: backend/.dockerignore
================================================
**/__pycache__
venv/
env/
*.egg-info
.cache
.git/
.svn/
.vscode/
.idea/
*.log
log/
.env
secrets.yaml
build/
dist/
.coverage
htmlcov/
model_server/legacy/

# Craft: demo_data directory should be unzipped at container startup, not copied
**/demo_data/
# Craft: templates/outputs/venv is created at container startup
**/templates/outputs/venv


================================================
FILE: backend/.gitignore
================================================
__pycache__/
.mypy_cache
.idea/
site_crawls/
.ipynb_checkpoints/
api_keys.py
*ipynb
.env*
vespa-app.zip
dynamic_config_storage/
celerybeat-schedule*
onyx/connectors/salesforce/data/
.test.env
/generated


================================================
FILE: backend/.trivyignore
================================================
# https://github.com/madler/zlib/issues/868
# Pulled in with base Debian image, it's part of the contrib folder but unused
# zlib1g is fine
# Will be gone with Debian image upgrade
# No impact in our settings
CVE-2023-45853

# krb5 related, worst case is denial of service by resource exhaustion
# Accept the risk
CVE-2024-26458
CVE-2024-26461
CVE-2024-26462
CVE-2024-26458
CVE-2024-26461
CVE-2024-26462
CVE-2024-26458
CVE-2024-26461
CVE-2024-26462
CVE-2024-26458
CVE-2024-26461
CVE-2024-26462

# Specific to Firefox which we do not use
# No impact in our settings
CVE-2024-0743

# bind9 related, worst case is denial of service by CPU resource exhaustion
# Accept the risk
CVE-2023-50387
CVE-2023-50868
CVE-2023-50387
CVE-2023-50868

# libexpat1, XML parsing resource exhaustion
# We don't parse any user provided XMLs
# No impact in our settings
CVE-2023-52425
CVE-2024-28757

# libharfbuzz0b, O(n^2) growth, worst case is denial of service
# Accept the risk
CVE-2023-25193


================================================
FILE: backend/Dockerfile
================================================
FROM python:3.11.7-slim-bookworm

LABEL com.danswer.maintainer="founders@onyx.app"
LABEL com.danswer.description="This image is the web/frontend container of Onyx which \
contains code for both the Community and Enterprise editions of Onyx. If you do not \
have a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \
Edition features outside of personal development or testing purposes. Please reach out to \
founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"

# Build argument for Craft support (disabled by default)
# Use --build-arg ENABLE_CRAFT=true to include Node.js and opencode CLI
ARG ENABLE_CRAFT=false

# DO_NOT_TRACK is used to disable telemetry for Unstructured
ENV DANSWER_RUNNING_IN_DOCKER="true" \
    DO_NOT_TRACK="true" \
    PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"

# Create non-root user for security best practices
RUN groupadd -g 1001 onyx && \
    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
    mkdir -p /var/log/onyx && \
    chmod 755 /var/log/onyx && \
    chown onyx:onyx /var/log/onyx

COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

# Install system dependencies
# cmake needed for psycopg (postgres)
# libpq-dev needed for psycopg (postgres)
# curl included just for users' convenience
# zip for Vespa step futher down
# ca-certificates for HTTPS
RUN apt-get update && \
    apt-get install -y \
        cmake \
        curl \
        zip \
        ca-certificates \
        libgnutls30 \
        libblkid1 \
        libmount1 \
        libsmartcols1 \
        libuuid1 \
        libxmlsec1-dev \
        pkg-config \
        gcc \
        nano \
        vim \
        # Install procps so kubernetes exec sessions can use ps aux for debugging
        procps \
        libjemalloc2 \
        && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

# Conditionally install Node.js 20 for Craft (required for Next.js)
# Only installed when ENABLE_CRAFT=true
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
        echo "Installing Node.js 20 for Craft support..." && \
        curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
        apt-get install -y nodejs && \
        rm -rf /var/lib/apt/lists/*; \
    fi

# Conditionally install opencode CLI for Craft agent functionality
# Only installed when ENABLE_CRAFT=true
# TODO: download a specific, versioned release of the opencode CLI
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
        echo "Installing opencode CLI for Craft support..." && \
        curl -fsSL https://opencode.ai/install | bash; \
    fi
ENV PATH="/root/.opencode/bin:${PATH}"

# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
COPY ./requirements/default.txt /tmp/requirements.txt
COPY ./requirements/ee.txt /tmp/ee-requirements.txt
RUN uv pip install --system --no-cache-dir --upgrade \
        -r /tmp/requirements.txt \
        -r /tmp/ee-requirements.txt && \
    pip uninstall -y py && \
    playwright install chromium && \
    playwright install-deps chromium && \
    chown -R onyx:onyx /app && \
    ln -s /usr/local/bin/supervisord /usr/bin/supervisord && \
    # Cleanup for CVEs and size reduction
    # https://github.com/tornadoweb/tornado/issues/3107
    # xserver-common and xvfb included by playwright installation but not needed after
    # perl-base is part of the base Python Debian image but not needed for Onyx functionality
    # perl-base could only be removed with --allow-remove-essential
    apt-get update && \
    apt-get remove -y --allow-remove-essential \
        perl-base \
        xserver-common \
        xvfb \
        cmake \
        libldap-2.5-0 \
        libxmlsec1-dev \
        pkg-config \
        gcc && \
    # Install here to avoid some packages being cleaned up above
    apt-get install -y \
        libxmlsec1-openssl \
        # Install postgresql-client for easy manual tests
        postgresql-client && \
    apt-get autoremove -y && \
    rm -rf /var/lib/apt/lists/* && \
    rm -rf ~/.cache/uv /tmp/*.txt && \
    rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key

# Pre-downloading models for setups with limited egress
RUN python -c "from tokenizers import Tokenizer; \
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"

# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
    nltk.download('stopwords', quiet=True); \
    nltk.download('punkt_tab', quiet=True);"
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed

# Pre-downloading tiktoken for setups with limited egress
RUN python -c "import tiktoken; \
tiktoken.get_encoding('cl100k_base')"

# Set up application files
WORKDIR /app

# Enterprise Version Files
COPY --chown=onyx:onyx ./ee /app/ee
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf

# Set up application files
COPY --chown=onyx:onyx ./onyx /app/onyx
COPY --chown=onyx:onyx ./shared_configs /app/shared_configs
COPY --chown=onyx:onyx ./alembic /app/alembic
COPY --chown=onyx:onyx ./alembic_tenants /app/alembic_tenants
COPY --chown=onyx:onyx ./alembic.ini /app/alembic.ini
COPY supervisord.conf /usr/etc/supervisord.conf
COPY --chown=onyx:onyx ./static /app/static
COPY --chown=onyx:onyx ./keys /app/keys

# Escape hatch scripts
COPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging
COPY --chown=onyx:onyx ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
COPY --chown=onyx:onyx ./scripts/supervisord_entrypoint.sh /app/scripts/supervisord_entrypoint.sh
COPY --chown=onyx:onyx ./scripts/setup_craft_templates.sh /app/scripts/setup_craft_templates.sh
COPY --chown=onyx:onyx ./scripts/reencrypt_secrets.py /app/scripts/reencrypt_secrets.py
RUN chmod +x /app/scripts/supervisord_entrypoint.sh /app/scripts/setup_craft_templates.sh

# Run Craft template setup at build time when ENABLE_CRAFT=true
# This pre-bakes demo data, Python venv, and npm dependencies into the image
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
        echo "Running Craft template setup at build time..." && \
        ENABLE_CRAFT=true /app/scripts/setup_craft_templates.sh; \
    fi

# Set Craft template paths to the in-image locations
# These match the paths where setup_craft_templates.sh creates the templates
ENV OUTPUTS_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs
ENV VENV_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv

# Put logo in assets
COPY --chown=onyx:onyx ./assets /app/assets

ENV PYTHONPATH=/app

# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION}

# Use jemalloc instead of glibc malloc to reduce memory fragmentation
# in long-running Python processes (API server, Celery workers).
# The soname is architecture-independent; the dynamic linker resolves
# the correct path from standard library directories.
# Placed after all RUN steps so build-time processes are unaffected.
ENV LD_PRELOAD=libjemalloc.so.2

# Default command which does nothing
# This container is used by api server and background which specify their own CMD
CMD ["tail", "-f", "/dev/null"]


================================================
FILE: backend/Dockerfile.model_server
================================================
# Base stage with dependencies
FROM python:3.11.7-slim-bookworm AS base

ENV DANSWER_RUNNING_IN_DOCKER="true" \
    HF_HOME=/app/.cache/huggingface

COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

RUN mkdir -p /app/.cache/huggingface

COPY ./requirements/model_server.txt /tmp/requirements.txt
RUN uv pip install --system --no-cache-dir --upgrade \
        -r /tmp/requirements.txt && \
    rm -rf ~/.cache/uv /tmp/*.txt

# Stage for downloading embedding models
FROM base AS embedding-models
RUN python -c "from huggingface_hub import snapshot_download; \
snapshot_download('nomic-ai/nomic-embed-text-v1');"

# Initialize SentenceTransformer to cache the custom architecture
RUN python -c "from sentence_transformers import SentenceTransformer; \
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"

# Final stage - combine all downloads
FROM base AS final

LABEL com.danswer.maintainer="founders@onyx.app"
LABEL com.danswer.description="This image is for the Onyx model server which runs all of the \
AI models for Onyx. This container and all the code is MIT Licensed and free for all to use. \
You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
visit https://github.com/onyx-dot-app/onyx."

# Create non-root user for security best practices
RUN groupadd -g 1001 onyx && \
    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
    mkdir -p /var/log/onyx && \
    chmod 755 /var/log/onyx && \
    chown onyx:onyx /var/log/onyx

# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
# running Onyx, move the current contents of the cache folder to a temporary location to ensure
# it's preserved in order to combine with the user's cache contents
COPY --chown=onyx:onyx --from=embedding-models /app/.cache/huggingface /app/.cache/temp_huggingface

WORKDIR /app

# Utils used by model server
COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py
COPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py
COPY ./onyx/utils/tenant.py /app/onyx/utils/tenant.py

# Place to fetch version information
COPY ./onyx/__init__.py /app/onyx/__init__.py

# Shared between Onyx Backend and Model Server
COPY ./shared_configs /app/shared_configs

# Model Server main code
COPY ./model_server /app/model_server

ENV PYTHONPATH=/app

# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION}

CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]


================================================
FILE: backend/alembic/README.md
================================================
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->

# Alembic DB Migrations

These files are for creating/updating the tables in the Relational DB (Postgres).
Onyx migrations use a generic single-database configuration with an async dbapi.

## To generate new migrations:

From onyx/backend, run:
`alembic revision -m <DESCRIPTION_OF_MIGRATION>`

Note: you cannot use the `--autogenerate` flag as the automatic schema parsing does not work.

Manually populate the upgrade and downgrade in your new migration.

More info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html

## Running migrations

To run all un-applied migrations:
`alembic upgrade head`

To undo migrations:
`alembic downgrade -X`
where X is the number of migrations you want to undo from the current state

### Multi-tenant migrations

For multi-tenant deployments, you can use additional options:

**Upgrade all tenants:**
```bash
alembic -x upgrade_all_tenants=true upgrade head
```

**Upgrade specific schemas:**
```bash
# Single schema
alembic -x schemas=tenant_12345678-1234-1234-1234-123456789012 upgrade head

# Multiple schemas (comma-separated)
alembic -x schemas=tenant_12345678-1234-1234-1234-123456789012,public,another_tenant upgrade head
```

**Upgrade tenants within an alphabetical range:**
```bash
# Upgrade tenants 100-200 when sorted alphabetically (positions 100 to 200)
alembic -x upgrade_all_tenants=true -x tenant_range_start=100 -x tenant_range_end=200 upgrade head

# Upgrade tenants starting from position 1000 alphabetically
alembic -x upgrade_all_tenants=true -x tenant_range_start=1000 upgrade head

# Upgrade first 500 tenants alphabetically
alembic -x upgrade_all_tenants=true -x tenant_range_end=500 upgrade head
```

**Continue on error (for batch operations):**
```bash
alembic -x upgrade_all_tenants=true -x continue=true upgrade head
```

The tenant range filtering works by:
1. Sorting tenant IDs alphabetically
2. Using 1-based position numbers (1st, 2nd, 3rd tenant, etc.)
3. Filtering to the specified range of positions
4. Non-tenant schemas (like 'public') are always included


================================================
FILE: backend/alembic/env.py
================================================
from typing import Any, Literal
from onyx.db.engine.iam_auth import get_iam_auth_token
from onyx.configs.app_configs import USE_IAM_AUTH
from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USER
from onyx.configs.app_configs import AWS_REGION_NAME
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from sqlalchemy import event
from sqlalchemy import pool
from sqlalchemy import text
from sqlalchemy.engine.base import Connection
import os
import ssl
import asyncio
import logging
from logging.config import fileConfig

from alembic import context
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.sql.schema import SchemaItem
from onyx.configs.constants import SSL_CERT_FILE
from shared_configs.configs import (
    MULTI_TENANT,
    POSTGRES_DEFAULT_SCHEMA,
    TENANT_ID_PREFIX,
)
from onyx.db.models import Base
from celery.backends.database.session import ResultModelBase  # type: ignore
from onyx.db.engine.sql_engine import SqlEngine

# Make sure in alembic.ini [logger_root] level=INFO is set or most logging will be
# hidden! (defaults to level=WARN)

# Alembic Config object
config = context.config

if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
):
    # disable_existing_loggers=False prevents breaking pytest's caplog fixture
    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
    fileConfig(config.config_file_name, disable_existing_loggers=False)

target_metadata = [Base.metadata, ResultModelBase.metadata]

EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}

logger = logging.getLogger(__name__)

ssl_context: ssl.SSLContext | None = None
if USE_IAM_AUTH:
    if not os.path.exists(SSL_CERT_FILE):
        raise FileNotFoundError(f"Expected {SSL_CERT_FILE} when USE_IAM_AUTH is true.")
    ssl_context = ssl.create_default_context(cafile=SSL_CERT_FILE)


def include_object(
    object: SchemaItem,  # noqa: ARG001
    name: str | None,
    type_: Literal[
        "schema",
        "table",
        "column",
        "index",
        "unique_constraint",
        "foreign_key_constraint",
    ],
    reflected: bool,  # noqa: ARG001
    compare_to: SchemaItem | None,  # noqa: ARG001
) -> bool:
    if type_ == "table" and name in EXCLUDE_TABLES:
        return False
    return True


def filter_tenants_by_range(
    tenant_ids: list[str], start_range: int | None = None, end_range: int | None = None
) -> list[str]:
    """
    Filter tenant IDs by alphabetical position range.

    Args:
        tenant_ids: List of tenant IDs to filter
        start_range: Starting position in alphabetically sorted list (1-based, inclusive)
        end_range: Ending position in alphabetically sorted list (1-based, inclusive)

    Returns:
        Filtered list of tenant IDs in their original order
    """
    if start_range is None and end_range is None:
        return tenant_ids

    # Separate tenant IDs from non-tenant schemas
    tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]
    non_tenant_schemas = [
        tid for tid in tenant_ids if not tid.startswith(TENANT_ID_PREFIX)
    ]

    # Sort tenant schemas alphabetically.
    # NOTE: can cause missed schemas if a schema is created in between workers
    # fetching of all tenant IDs. We accept this risk for now. Just re-running
    # the migration will fix the issue.
    sorted_tenant_schemas = sorted(tenant_schemas)

    # Apply range filtering (0-based indexing)
    start_idx = start_range if start_range is not None else 0
    end_idx = end_range if end_range is not None else len(sorted_tenant_schemas)

    # Ensure indices are within bounds
    start_idx = max(0, start_idx)
    end_idx = min(len(sorted_tenant_schemas), end_idx)

    # Get the filtered tenant schemas
    filtered_tenant_schemas = sorted_tenant_schemas[start_idx:end_idx]

    # Combine with non-tenant schemas and preserve original order
    filtered_tenants = []
    for tenant_id in tenant_ids:
        if tenant_id in filtered_tenant_schemas or tenant_id in non_tenant_schemas:
            filtered_tenants.append(tenant_id)

    return filtered_tenants


def get_schema_options() -> (
    tuple[bool, bool, bool, int | None, int | None, list[str] | None]
):
    x_args_raw = context.get_x_argument()
    x_args = {}
    for arg in x_args_raw:
        if "=" in arg:
            key, value = arg.split("=", 1)
            x_args[key.strip()] = value.strip()
        else:
            raise ValueError(f"Invalid argument: {arg}")

    create_schema = x_args.get("create_schema", "true").lower() == "true"
    upgrade_all_tenants = x_args.get("upgrade_all_tenants", "false").lower() == "true"

    # continue on error with individual tenant
    # only applies to online migrations
    continue_on_error = x_args.get("continue", "false").lower() == "true"

    # Tenant range filtering
    tenant_range_start = None
    tenant_range_end = None

    if "tenant_range_start" in x_args:
        try:
            tenant_range_start = int(x_args["tenant_range_start"])
        except ValueError:
            raise ValueError(
                f"Invalid tenant_range_start value: {x_args['tenant_range_start']}. Must be an integer."
            )

    if "tenant_range_end" in x_args:
        try:
            tenant_range_end = int(x_args["tenant_range_end"])
        except ValueError:
            raise ValueError(
                f"Invalid tenant_range_end value: {x_args['tenant_range_end']}. Must be an integer."
            )

    # Validate range
    if tenant_range_start is not None and tenant_range_end is not None:
        if tenant_range_start > tenant_range_end:
            raise ValueError(
                f"tenant_range_start ({tenant_range_start}) cannot be greater than tenant_range_end ({tenant_range_end})"
            )

    # Specific schema names filtering (replaces both schema_name and the old tenant_ids approach)
    schemas = None
    if "schemas" in x_args:
        schema_names_str = x_args["schemas"].strip()
        if schema_names_str:
            # Split by comma and strip whitespace
            schemas = [
                name.strip() for name in schema_names_str.split(",") if name.strip()
            ]
            if schemas:
                logger.info(f"Specific schema names specified: {schemas}")

    # Validate that only one method is used at a time
    range_filtering = tenant_range_start is not None or tenant_range_end is not None
    specific_filtering = schemas is not None and len(schemas) > 0

    if range_filtering and specific_filtering:
        raise ValueError(
            "Cannot use both tenant range filtering (tenant_range_start/tenant_range_end) "
            "and specific schema filtering (schemas) at the same time. "
            "Please use only one filtering method."
        )

    if upgrade_all_tenants and specific_filtering:
        raise ValueError(
            "Cannot use both upgrade_all_tenants=true and schemas at the same time. "
            "Use either upgrade_all_tenants=true for all tenants, or schemas for specific schemas."
        )

    # If any filtering parameters are specified, we're not doing the default single schema migration
    if range_filtering:
        upgrade_all_tenants = True

    # Validate multi-tenant requirements
    if MULTI_TENANT and not upgrade_all_tenants and not specific_filtering:
        raise ValueError(
            "In multi-tenant mode, you must specify either upgrade_all_tenants=true "
            "or provide schemas. Cannot run default migration."
        )

    return (
        create_schema,
        upgrade_all_tenants,
        continue_on_error,
        tenant_range_start,
        tenant_range_end,
        schemas,
    )


def do_run_migrations(
    connection: Connection, schema_name: str, create_schema: bool
) -> None:
    if create_schema:
        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))

    connection.execute(text(f'SET search_path TO "{schema_name}"'))

    context.configure(
        connection=connection,
        target_metadata=target_metadata,  # type: ignore
        include_object=include_object,
        version_table_schema=schema_name,
        include_schemas=True,
        compare_type=True,
        compare_server_default=True,
        script_location=config.get_main_option("script_location"),
    )

    with context.begin_transaction():
        context.run_migrations()


def provide_iam_token_for_alembic(
    dialect: Any,  # noqa: ARG001
    conn_rec: Any,  # noqa: ARG001
    cargs: Any,  # noqa: ARG001
    cparams: Any,
) -> None:
    if USE_IAM_AUTH:
        # Database connection settings
        region = AWS_REGION_NAME
        host = POSTGRES_HOST
        port = POSTGRES_PORT
        user = POSTGRES_USER

        # Get IAM authentication token
        token = get_iam_auth_token(host, port, user, region)

        # For Alembic / SQLAlchemy in this context, set SSL and password
        cparams["password"] = token
        cparams["ssl"] = ssl_context


async def run_async_migrations() -> None:
    (
        create_schema,
        upgrade_all_tenants,
        continue_on_error,
        tenant_range_start,
        tenant_range_end,
        schemas,
    ) = get_schema_options()

    if not schemas and not MULTI_TENANT:
        schemas = [POSTGRES_DEFAULT_SCHEMA]

    # without init_engine, subsequent engine calls fail hard intentionally
    SqlEngine.init_engine(pool_size=20, max_overflow=5)

    engine = create_async_engine(
        build_connection_string(),
        poolclass=pool.NullPool,
    )

    if USE_IAM_AUTH:

        @event.listens_for(engine.sync_engine, "do_connect")
        def event_provide_iam_token_for_alembic(
            dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
        ) -> None:
            provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)

    if schemas:
        # Use specific schema names directly without fetching all tenants
        logger.info(f"Migrating specific schema names: {schemas}")

        i_schema = 0
        num_schemas = len(schemas)
        for schema in schemas:
            i_schema += 1
            logger.info(
                f"Migrating schema: index={i_schema} num_schemas={num_schemas} schema={schema}"
            )
            try:
                async with engine.connect() as connection:
                    await connection.run_sync(
                        do_run_migrations,
                        schema_name=schema,
                        create_schema=create_schema,
                    )
                    await connection.commit()
            except Exception as e:
                logger.error(f"Error migrating schema {schema}: {e}")
                if not continue_on_error:
                    logger.error("--continue=true is not set, raising exception!")
                    raise

                logger.warning("--continue=true is set, continuing to next schema.")

    elif upgrade_all_tenants:
        tenant_schemas = get_all_tenant_ids()

        filtered_tenant_schemas = filter_tenants_by_range(
            tenant_schemas, tenant_range_start, tenant_range_end
        )

        if tenant_range_start is not None or tenant_range_end is not None:
            logger.info(
                f"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}"
            )
            logger.info(
                f"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}"
            )

        i_tenant = 0
        num_tenants = len(filtered_tenant_schemas)
        for schema in filtered_tenant_schemas:
            i_tenant += 1
            logger.info(
                f"Migrating schema: index={i_tenant} num_tenants={num_tenants} schema={schema}"
            )
            try:
                async with engine.connect() as connection:
                    await connection.run_sync(
                        do_run_migrations,
                        schema_name=schema,
                        create_schema=create_schema,
                    )
                    await connection.commit()
            except Exception as e:
                logger.error(f"Error migrating schema {schema}: {e}")
                if not continue_on_error:
                    logger.error("--continue=true is not set, raising exception!")
                    raise

                logger.warning("--continue=true is set, continuing to next schema.")

    else:
        # This should not happen in the new design since we require either
        # upgrade_all_tenants=true or schemas in multi-tenant mode
        # and for non-multi-tenant mode, we should use schemas with the default schema
        raise ValueError(
            "No migration target specified. Use either upgrade_all_tenants=true for all tenants or schemas for specific schemas."
        )

    await engine.dispose()


def run_migrations_offline() -> None:
    """
    NOTE(rkuo): This generates a sql script that can be used to migrate the database ...
    instead of migrating the db live via an open connection

    Not clear on when this would be used by us or if it even works.

    If it is offline, then why are there calls to the db engine?

    This doesn't really get used when we migrate in the cloud."""

    logger.info("run_migrations_offline starting.")

    # without init_engine, subsequent engine calls fail hard intentionally
    SqlEngine.init_engine(pool_size=20, max_overflow=5)

    (
        create_schema,
        upgrade_all_tenants,
        continue_on_error,
        tenant_range_start,
        tenant_range_end,
        schemas,
    ) = get_schema_options()
    url = build_connection_string()

    if schemas:
        # Use specific schema names directly without fetching all tenants
        logger.info(f"Migrating specific schema names: {schemas}")

        for schema in schemas:
            logger.info(f"Migrating schema: {schema}")
            context.configure(
                url=url,
                target_metadata=target_metadata,  # type: ignore
                literal_binds=True,
                include_object=include_object,
                version_table_schema=schema,
                include_schemas=True,
                script_location=config.get_main_option("script_location"),
                dialect_opts={"paramstyle": "named"},
            )

            with context.begin_transaction():
                context.run_migrations()

    elif upgrade_all_tenants:
        engine = create_async_engine(url)

        if USE_IAM_AUTH:

            @event.listens_for(engine.sync_engine, "do_connect")
            def event_provide_iam_token_for_alembic_offline(
                dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
            ) -> None:
                provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)

        tenant_schemas = get_all_tenant_ids()
        engine.sync_engine.dispose()

        filtered_tenant_schemas = filter_tenants_by_range(
            tenant_schemas, tenant_range_start, tenant_range_end
        )

        if tenant_range_start is not None or tenant_range_end is not None:
            logger.info(
                f"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}"
            )
            logger.info(
                f"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}"
            )

        for schema in filtered_tenant_schemas:
            logger.info(f"Migrating schema: {schema}")
            context.configure(
                url=url,
                target_metadata=target_metadata,  # type: ignore
                literal_binds=True,
                include_object=include_object,
                version_table_schema=schema,
                include_schemas=True,
                script_location=config.get_main_option("script_location"),
                dialect_opts={"paramstyle": "named"},
            )

            with context.begin_transaction():
                context.run_migrations()
    else:
        # This should not happen in the new design
        raise ValueError(
            "No migration target specified. Use either upgrade_all_tenants=true for all tenants or schemas for specific schemas."
        )


def run_migrations_online() -> None:
    """Run migrations in 'online' mode.

    Supports pytest-alembic by checking for a pre-configured connection
    in context.config.attributes["connection"]. If present, uses that
    connection/engine directly instead of creating a new async engine.
    """
    # Check if pytest-alembic is providing a connection/engine
    connectable = context.config.attributes.get("connection", None)

    if connectable is not None:
        # pytest-alembic is providing an engine - use it directly
        logger.debug("run_migrations_online starting (pytest-alembic mode).")

        # For pytest-alembic, we use the default schema (public)
        schema_name = context.config.attributes.get(
            "schema_name", POSTGRES_DEFAULT_SCHEMA
        )

        # pytest-alembic passes an Engine, we need to get a connection from it
        with connectable.connect() as connection:
            # Set search path for the schema
            connection.execute(text(f'SET search_path TO "{schema_name}"'))

            context.configure(
                connection=connection,
                target_metadata=target_metadata,  # type: ignore
                include_object=include_object,
                version_table_schema=schema_name,
                include_schemas=True,
                compare_type=True,
                compare_server_default=True,
                script_location=config.get_main_option("script_location"),
            )

            with context.begin_transaction():
                context.run_migrations()

            # Commit the transaction to ensure changes are visible to next migration
            connection.commit()
    else:
        # Normal operation - use async migrations
        logger.info("run_migrations_online starting.")
        asyncio.run(run_async_migrations())


if context.is_offline_mode():
    run_migrations_offline()
else:
    run_migrations_online()


================================================
FILE: backend/alembic/run_multitenant_migrations.py
================================================
#!/usr/bin/env python3
"""Parallel Alembic Migration Runner

Upgrades tenant schemas to head in batched, parallel alembic subprocesses.
Each subprocess handles a batch of schemas (via ``-x schemas=a,b,c``),
reducing per-process overhead compared to one-schema-per-process.

Usage examples::

    # defaults: 6 workers, 50 schemas/batch
    python alembic/run_multitenant_migrations.py

    # custom settings
    python alembic/run_multitenant_migrations.py -j 8 -b 100
"""

from __future__ import annotations

import argparse
import subprocess
import sys
import threading
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import NamedTuple

from alembic.config import Config
from alembic.script import ScriptDirectory

from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.db.engine.tenant_utils import get_schemas_needing_migration
from shared_configs.configs import TENANT_ID_PREFIX


# ---------------------------------------------------------------------------
# Data types
# ---------------------------------------------------------------------------


class Args(NamedTuple):
    jobs: int
    batch_size: int


class BatchResult(NamedTuple):
    schemas: list[str]
    success: bool
    output: str
    elapsed_sec: float


# ---------------------------------------------------------------------------
# Core functions
# ---------------------------------------------------------------------------


def run_alembic_for_batch(schemas: list[str]) -> BatchResult:
    """Run ``alembic upgrade head`` for a batch of schemas in one subprocess.

    If the batch fails, it is automatically retried with ``-x continue=true``
    so that the remaining schemas in the batch still get migrated.  The retry
    output (which contains alembic's per-schema error messages) is returned
    for diagnosis.
    """
    csv = ",".join(schemas)
    base_cmd = ["alembic", "-x", f"schemas={csv}"]

    start = time.monotonic()
    result = subprocess.run(
        [*base_cmd, "upgrade", "head"],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
    )

    if result.returncode == 0:
        elapsed = time.monotonic() - start
        return BatchResult(schemas, True, result.stdout or "", elapsed)

    # At least one schema failed.  Print the initial error output, then
    # re-run with continue=true so the remaining schemas still get migrated.
    if result.stdout:
        print(f"Initial error output:\n{result.stdout}", file=sys.stderr, flush=True)
    print(
        f"Batch failed (exit {result.returncode}), retrying with 'continue=true'...",
        file=sys.stderr,
        flush=True,
    )

    retry = subprocess.run(
        [*base_cmd, "-x", "continue=true", "upgrade", "head"],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
    )
    elapsed = time.monotonic() - start
    return BatchResult(schemas, False, retry.stdout or "", elapsed)


def get_head_revision() -> str | None:
    """Get the head revision from the alembic script directory."""
    alembic_cfg = Config("alembic.ini")
    script = ScriptDirectory.from_config(alembic_cfg)
    return script.get_current_head()


def run_migrations_parallel(
    schemas: list[str],
    max_workers: int,
    batch_size: int,
) -> bool:
    """Chunk *schemas* into batches and run them in parallel.

    A background monitor thread prints a status line every 60 s listing
    which batches are still in-flight, making it easy to spot hung tenants.
    """
    batches = [schemas[i : i + batch_size] for i in range(0, len(schemas), batch_size)]
    total_batches = len(batches)
    print(
        f"{len(schemas)} schemas in {total_batches} batch(es) with {max_workers} workers (batch size: {batch_size})...",
        flush=True,
    )
    all_success = True

    # Thread-safe tracking of in-flight batches for the monitor thread.
    in_flight: dict[int, list[str]] = {}
    prev_in_flight: set[int] = set()
    lock = threading.Lock()
    stop_event = threading.Event()

    def _monitor() -> None:
        """Print a status line every 60 s listing batches still in-flight.

        Only prints batches that were also present in the previous tick,
        making it easy to spot batches that are stuck.
        """
        nonlocal prev_in_flight
        while not stop_event.wait(60):
            with lock:
                if not in_flight:
                    prev_in_flight = set()
                    continue
                current = set(in_flight)
                stuck = current & prev_in_flight
                prev_in_flight = current

                if not stuck:
                    continue

                schemas = [s for idx in sorted(stuck) for s in in_flight[idx]]
                print(
                    f"⏳ batch(es) still running since last check "
                    f"({', '.join(str(i + 1) for i in sorted(stuck))}): "
                    + ", ".join(schemas),
                    flush=True,
                )

    monitor_thread = threading.Thread(target=_monitor, daemon=True)
    monitor_thread.start()

    try:
        with ThreadPoolExecutor(max_workers=max_workers) as executor:

            def _run(batch_idx: int, batch: list[str]) -> BatchResult:
                with lock:
                    in_flight[batch_idx] = batch
                print(
                    f"Batch {batch_idx + 1}/{total_batches} started ({len(batch)} schemas): {', '.join(batch)}",
                    flush=True,
                )
                result = run_alembic_for_batch(batch)
                with lock:
                    in_flight.pop(batch_idx, None)
                return result

            future_to_idx = {
                executor.submit(_run, i, b): i for i, b in enumerate(batches)
            }

            for future in as_completed(future_to_idx):
                batch_idx = future_to_idx[future]
                try:
                    result = future.result()
                    status = "✓" if result.success else "✗"

                    print(
                        f"Batch {batch_idx + 1}/{total_batches} "
                        f"{status} {len(result.schemas)} schemas "
                        f"in {result.elapsed_sec:.1f}s",
                        flush=True,
                    )

                    if not result.success:
                        # Print last 20 lines of retry output for diagnosis
                        tail = result.output.strip().splitlines()[-20:]
                        for line in tail:
                            print(f"    {line}", flush=True)
                        all_success = False

                except Exception as e:
                    print(
                        f"Batch {batch_idx + 1}/{total_batches} ✗ exception: {e}",
                        flush=True,
                    )
                    all_success = False
    finally:
        stop_event.set()
        monitor_thread.join(timeout=2)

    return all_success


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------


def parse_args() -> Args:
    parser = argparse.ArgumentParser(
        description="Run alembic migrations for all tenant schemas in parallel"
    )
    parser.add_argument(
        "-j",
        "--jobs",
        type=int,
        default=6,
        metavar="N",
        help="Number of parallel alembic processes (default: 6)",
    )
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        default=50,
        metavar="N",
        help="Schemas per alembic process (default: 50)",
    )
    args = parser.parse_args()
    if args.jobs < 1:
        parser.error("--jobs must be >= 1")
    if args.batch_size < 1:
        parser.error("--batch-size must be >= 1")
    return Args(jobs=args.jobs, batch_size=args.batch_size)


def main() -> int:
    args = parse_args()

    head_rev = get_head_revision()
    if head_rev is None:
        print("Could not determine head revision.", file=sys.stderr)
        return 1

    with SqlEngine.scoped_engine(pool_size=5, max_overflow=2):
        tenant_ids = get_all_tenant_ids()
        tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]

        if not tenant_schemas:
            print(
                "No tenant schemas found. Is MULTI_TENANT=true set?",
                file=sys.stderr,
            )
            return 1

        schemas_to_migrate = get_schemas_needing_migration(tenant_schemas, head_rev)

    if not schemas_to_migrate:
        print(
            f"All {len(tenant_schemas)} tenants are already at head revision ({head_rev})."
        )
        return 0

    print(
        f"{len(schemas_to_migrate)}/{len(tenant_schemas)} tenants need migration (head: {head_rev})."
    )

    success = run_migrations_parallel(
        schemas_to_migrate,
        max_workers=args.jobs,
        batch_size=args.batch_size,
    )

    print(f"\n{'All migrations successful' if success else 'Some migrations failed'}")
    return 0 if success else 1


if __name__ == "__main__":
    raise SystemExit(main())


================================================
FILE: backend/alembic/script.py.mako
================================================
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}


def upgrade() -> None:
    ${upgrades if upgrades else "pass"}


def downgrade() -> None:
    ${downgrades if downgrades else "pass"}


================================================
FILE: backend/alembic/versions/01f8e6d95a33_populate_flow_mapping_data.py
================================================
"""Populate flow mapping data

Revision ID: 01f8e6d95a33
Revises: d5c86e2c6dc6
Create Date: 2026-01-31 17:37:10.485558

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "01f8e6d95a33"
down_revision = "d5c86e2c6dc6"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add each model config to the conversation flow, setting the global default if it exists
    # Exclude models that are part of ImageGenerationConfig
    op.execute(
        """
        INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
        SELECT
            'CHAT' AS llm_model_flow_type,
            COALESCE(
                (lp.is_default_provider IS TRUE AND lp.default_model_name = mc.name),
                FALSE
            ) AS is_default,
            mc.id AS model_configuration_id
        FROM model_configuration mc
        LEFT JOIN llm_provider lp
            ON lp.id = mc.llm_provider_id
        WHERE NOT EXISTS (
            SELECT 1 FROM image_generation_config igc
            WHERE igc.model_configuration_id = mc.id
        );
        """
    )

    # Add models with supports_image_input to the vision flow
    op.execute(
        """
        INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
        SELECT
            'VISION' AS llm_model_flow_type,
            COALESCE(
                (lp.is_default_vision_provider IS TRUE AND lp.default_vision_model = mc.name),
                FALSE
            ) AS is_default,
            mc.id AS model_configuration_id
        FROM model_configuration mc
        LEFT JOIN llm_provider lp
            ON lp.id = mc.llm_provider_id
        WHERE mc.supports_image_input IS TRUE;
        """
    )


def downgrade() -> None:
    # Populate vision defaults from model_flow
    op.execute(
        """
        UPDATE llm_provider AS lp
        SET
            is_default_vision_provider = TRUE,
            default_vision_model = mc.name
        FROM llm_model_flow mf
        JOIN model_configuration mc ON mc.id = mf.model_configuration_id
        WHERE mf.llm_model_flow_type = 'VISION'
          AND mf.is_default = TRUE
          AND mc.llm_provider_id = lp.id;
        """
    )

    # Populate conversation defaults from model_flow
    op.execute(
        """
        UPDATE llm_provider AS lp
        SET
            is_default_provider = TRUE,
            default_model_name = mc.name
        FROM llm_model_flow mf
        JOIN model_configuration mc ON mc.id = mf.model_configuration_id
        WHERE mf.llm_model_flow_type = 'CHAT'
          AND mf.is_default = TRUE
          AND mc.llm_provider_id = lp.id;
        """
    )

    # For providers that have conversation flow mappings but aren't the default,
    # we still need a default_model_name (it was NOT NULL originally)
    # Pick the first visible model or any model for that provider
    op.execute(
        """
        UPDATE llm_provider AS lp
        SET default_model_name = (
            SELECT mc.name
            FROM model_configuration mc
            JOIN llm_model_flow mf ON mf.model_configuration_id = mc.id
            WHERE mc.llm_provider_id = lp.id
              AND mf.llm_model_flow_type = 'CHAT'
            ORDER BY mc.is_visible DESC, mc.id ASC
            LIMIT 1
        )
        WHERE lp.default_model_name IS NULL;
        """
    )

    # Delete all model_flow entries (reverse the inserts from upgrade)
    op.execute("DELETE FROM llm_model_flow;")


================================================
FILE: backend/alembic/versions/027381bce97c_add_shortcut_option_for_users.py
================================================
"""add shortcut option for users

Revision ID: 027381bce97c
Revises: 6fc7886d665d
Create Date: 2025-01-14 12:14:00.814390

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "027381bce97c"
down_revision = "6fc7886d665d"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column(
            "shortcut_enabled", sa.Boolean(), nullable=False, server_default="false"
        ),
    )


def downgrade() -> None:
    op.drop_column("user", "shortcut_enabled")


================================================
FILE: backend/alembic/versions/03bf8be6b53a_rework_kg_config.py
================================================
"""rework-kg-config

Revision ID: 03bf8be6b53a
Revises: 65bc6e0f8500
Create Date: 2025-06-16 10:52:34.815335

"""

import json


from datetime import datetime
from datetime import timedelta
from sqlalchemy.dialects import postgresql
from sqlalchemy import text
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "03bf8be6b53a"
down_revision = "65bc6e0f8500"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # get current config
    current_configs = (
        op.get_bind()
        .execute(text("SELECT kg_variable_name, kg_variable_values FROM kg_config"))
        .all()
    )
    current_config_dict = {
        config.kg_variable_name: (
            config.kg_variable_values[0]
            if config.kg_variable_name
            not in ("KG_VENDOR_DOMAINS", "KG_IGNORE_EMAIL_DOMAINS")
            else config.kg_variable_values
        )
        for config in current_configs
        if config.kg_variable_values
    }

    # not using the KGConfigSettings model here in case it changes in the future
    kg_config_settings = json.dumps(
        {
            "KG_EXPOSED": current_config_dict.get("KG_EXPOSED", False),
            "KG_ENABLED": current_config_dict.get("KG_ENABLED", False),
            "KG_VENDOR": current_config_dict.get("KG_VENDOR", None),
            "KG_VENDOR_DOMAINS": current_config_dict.get("KG_VENDOR_DOMAINS", []),
            "KG_IGNORE_EMAIL_DOMAINS": current_config_dict.get(
                "KG_IGNORE_EMAIL_DOMAINS", []
            ),
            "KG_COVERAGE_START": current_config_dict.get(
                "KG_COVERAGE_START",
                (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
            ),
            "KG_MAX_COVERAGE_DAYS": current_config_dict.get("KG_MAX_COVERAGE_DAYS", 90),
            "KG_MAX_PARENT_RECURSION_DEPTH": current_config_dict.get(
                "KG_MAX_PARENT_RECURSION_DEPTH", 2
            ),
            "KG_BETA_PERSONA_ID": current_config_dict.get("KG_BETA_PERSONA_ID", None),
        }
    )
    op.execute(
        f"INSERT INTO key_value_store (key, value) VALUES ('kg_config', '{kg_config_settings}')"
    )

    # drop kg config table
    op.drop_table("kg_config")


def downgrade() -> None:
    # get current config
    current_config_dict = {
        "KG_EXPOSED": False,
        "KG_ENABLED": False,
        "KG_VENDOR": [],
        "KG_VENDOR_DOMAINS": [],
        "KG_IGNORE_EMAIL_DOMAINS": [],
        "KG_COVERAGE_START": (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
        "KG_MAX_COVERAGE_DAYS": 90,
        "KG_MAX_PARENT_RECURSION_DEPTH": 2,
    }
    current_configs = (
        op.get_bind()
        .execute(text("SELECT value FROM key_value_store WHERE key = 'kg_config'"))
        .one_or_none()
    )
    if current_configs is not None:
        current_config_dict.update(current_configs[0])
    insert_values = [
        {
            "kg_variable_name": name,
            "kg_variable_values": (
                [str(val).lower() if isinstance(val, bool) else str(val)]
                if not isinstance(val, list)
                else val
            ),
        }
        for name, val in current_config_dict.items()
    ]

    op.create_table(
        "kg_config",
        sa.Column("id", sa.Integer(), primary_key=True, nullable=False, index=True),
        sa.Column("kg_variable_name", sa.String(), nullable=False, index=True),
        sa.Column("kg_variable_values", postgresql.ARRAY(sa.String()), nullable=False),
        sa.UniqueConstraint("kg_variable_name", name="uq_kg_config_variable_name"),
    )
    op.bulk_insert(
        sa.table(
            "kg_config",
            sa.column("kg_variable_name", sa.String),
            sa.column("kg_variable_values", postgresql.ARRAY(sa.String)),
        ),
        insert_values,
    )

    op.execute("DELETE FROM key_value_store WHERE key = 'kg_config'")


================================================
FILE: backend/alembic/versions/03d085c5c38d_backfill_account_type.py
================================================
"""backfill_account_type

Revision ID: 03d085c5c38d
Revises: 977e834c1427
Create Date: 2026-03-25 16:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "03d085c5c38d"
down_revision = "977e834c1427"
branch_labels = None
depends_on = None

_STANDARD = "STANDARD"
_BOT = "BOT"
_EXT_PERM_USER = "EXT_PERM_USER"
_SERVICE_ACCOUNT = "SERVICE_ACCOUNT"
_ANONYMOUS = "ANONYMOUS"

# Well-known anonymous user UUID
ANONYMOUS_USER_ID = "00000000-0000-0000-0000-000000000002"

# Email pattern for API key virtual users
API_KEY_EMAIL_PATTERN = r"API\_KEY\_\_%"

# Reflect the table structure for use in DML
user_table = sa.table(
    "user",
    sa.column("id", sa.Uuid),
    sa.column("email", sa.String),
    sa.column("role", sa.String),
    sa.column("account_type", sa.String),
)


def upgrade() -> None:
    # ------------------------------------------------------------------
    # Step 1: Backfill account_type from role.
    # Order matters — most-specific matches first so the final catch-all
    # only touches rows that haven't been classified yet.
    # ------------------------------------------------------------------

    # 1a. API key virtual users → SERVICE_ACCOUNT
    op.execute(
        sa.update(user_table)
        .where(
            user_table.c.email.ilike(API_KEY_EMAIL_PATTERN),
            user_table.c.account_type.is_(None),
        )
        .values(account_type=_SERVICE_ACCOUNT)
    )

    # 1b. Anonymous user → ANONYMOUS
    op.execute(
        sa.update(user_table)
        .where(
            user_table.c.id == ANONYMOUS_USER_ID,
            user_table.c.account_type.is_(None),
        )
        .values(account_type=_ANONYMOUS)
    )

    # 1c. SLACK_USER role → BOT
    op.execute(
        sa.update(user_table)
        .where(
            user_table.c.role == "SLACK_USER",
            user_table.c.account_type.is_(None),
        )
        .values(account_type=_BOT)
    )

    # 1d. EXT_PERM_USER role → EXT_PERM_USER
    op.execute(
        sa.update(user_table)
        .where(
            user_table.c.role == "EXT_PERM_USER",
            user_table.c.account_type.is_(None),
        )
        .values(account_type=_EXT_PERM_USER)
    )

    # 1e. Everything else → STANDARD
    op.execute(
        sa.update(user_table)
        .where(user_table.c.account_type.is_(None))
        .values(account_type=_STANDARD)
    )

    # ------------------------------------------------------------------
    # Step 2: Set account_type to NOT NULL now that every row is filled.
    # ------------------------------------------------------------------
    op.alter_column(
        "user",
        "account_type",
        nullable=False,
        server_default="STANDARD",
    )


def downgrade() -> None:
    op.alter_column("user", "account_type", nullable=True, server_default=None)
    op.execute(sa.update(user_table).values(account_type=None))


================================================
FILE: backend/alembic/versions/03d710ccf29c_add_permission_sync_attempt_tables.py
================================================
"""add permission sync attempt tables

Revision ID: 03d710ccf29c
Revises: 96a5702df6aa
Create Date: 2025-09-11 13:30:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "03d710ccf29c"  # Generate a new unique ID
down_revision = "96a5702df6aa"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create the permission sync status enum
    permission_sync_status_enum = sa.Enum(
        "not_started",
        "in_progress",
        "success",
        "canceled",
        "failed",
        "completed_with_errors",
        name="permissionsyncstatus",
        native_enum=False,
    )

    # Create doc_permission_sync_attempt table
    op.create_table(
        "doc_permission_sync_attempt",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
        sa.Column("status", permission_sync_status_enum, nullable=False),
        sa.Column("total_docs_synced", sa.Integer(), nullable=True),
        sa.Column("docs_with_permission_errors", sa.Integer(), nullable=True),
        sa.Column("error_message", sa.Text(), nullable=True),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("time_started", sa.DateTime(timezone=True), nullable=True),
        sa.Column("time_finished", sa.DateTime(timezone=True), nullable=True),
        sa.ForeignKeyConstraint(
            ["connector_credential_pair_id"],
            ["connector_credential_pair.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )

    # Create indexes for doc_permission_sync_attempt
    op.create_index(
        "ix_doc_permission_sync_attempt_time_created",
        "doc_permission_sync_attempt",
        ["time_created"],
        unique=False,
    )
    op.create_index(
        "ix_permission_sync_attempt_latest_for_cc_pair",
        "doc_permission_sync_attempt",
        ["connector_credential_pair_id", "time_created"],
        unique=False,
    )
    op.create_index(
        "ix_permission_sync_attempt_status_time",
        "doc_permission_sync_attempt",
        ["status", sa.text("time_finished DESC")],
        unique=False,
    )

    # Create external_group_permission_sync_attempt table
    # connector_credential_pair_id is nullable - group syncs can be global (e.g., Confluence)
    op.create_table(
        "external_group_permission_sync_attempt",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=True),
        sa.Column("status", permission_sync_status_enum, nullable=False),
        sa.Column("total_users_processed", sa.Integer(), nullable=True),
        sa.Column("total_groups_processed", sa.Integer(), nullable=True),
        sa.Column("total_group_memberships_synced", sa.Integer(), nullable=True),
        sa.Column("error_message", sa.Text(), nullable=True),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("time_started", sa.DateTime(timezone=True), nullable=True),
        sa.Column("time_finished", sa.DateTime(timezone=True), nullable=True),
        sa.ForeignKeyConstraint(
            ["connector_credential_pair_id"],
            ["connector_credential_pair.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )

    # Create indexes for external_group_permission_sync_attempt
    op.create_index(
        "ix_external_group_permission_sync_attempt_time_created",
        "external_group_permission_sync_attempt",
        ["time_created"],
        unique=False,
    )
    op.create_index(
        "ix_group_sync_attempt_cc_pair_time",
        "external_group_permission_sync_attempt",
        ["connector_credential_pair_id", "time_created"],
        unique=False,
    )
    op.create_index(
        "ix_group_sync_attempt_status_time",
        "external_group_permission_sync_attempt",
        ["status", sa.text("time_finished DESC")],
        unique=False,
    )


def downgrade() -> None:
    # Drop indexes
    op.drop_index(
        "ix_group_sync_attempt_status_time",
        table_name="external_group_permission_sync_attempt",
    )
    op.drop_index(
        "ix_group_sync_attempt_cc_pair_time",
        table_name="external_group_permission_sync_attempt",
    )
    op.drop_index(
        "ix_external_group_permission_sync_attempt_time_created",
        table_name="external_group_permission_sync_attempt",
    )
    op.drop_index(
        "ix_permission_sync_attempt_status_time",
        table_name="doc_permission_sync_attempt",
    )
    op.drop_index(
        "ix_permission_sync_attempt_latest_for_cc_pair",
        table_name="doc_permission_sync_attempt",
    )
    op.drop_index(
        "ix_doc_permission_sync_attempt_time_created",
        table_name="doc_permission_sync_attempt",
    )

    # Drop tables
    op.drop_table("external_group_permission_sync_attempt")
    op.drop_table("doc_permission_sync_attempt")


================================================
FILE: backend/alembic/versions/0568ccf46a6b_add_thread_specific_model_selection.py
================================================
"""Add thread specific model selection

Revision ID: 0568ccf46a6b
Revises: e209dc5a8156
Create Date: 2024-06-19 14:25:36.376046

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "0568ccf46a6b"
down_revision = "e209dc5a8156"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_session",
        sa.Column("current_alternate_model", sa.String(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("chat_session", "current_alternate_model")


================================================
FILE: backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
================================================
"""add search doc relevance details

Revision ID: 05c07bf07c00
Revises: b896bbd0d5a7
Create Date: 2024-07-10 17:48:15.886653

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "05c07bf07c00"
down_revision = "b896bbd0d5a7"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "search_doc",
        sa.Column("is_relevant", sa.Boolean(), nullable=True),
    )
    op.add_column(
        "search_doc",
        sa.Column("relevance_explanation", sa.String(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("search_doc", "relevance_explanation")
    op.drop_column("search_doc", "is_relevant")


================================================
FILE: backend/alembic/versions/07b98176f1de_code_interpreter_seed.py
================================================
"""code interpreter seed

Revision ID: 07b98176f1de
Revises: 7cb492013621
Create Date: 2026-02-23 15:55:07.606784

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "07b98176f1de"
down_revision = "7cb492013621"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Seed the single instance of code_interpreter_server
    # NOTE: There should only exist at most and at minimum 1 code_interpreter_server row
    op.execute(
        sa.text("INSERT INTO code_interpreter_server (server_enabled) VALUES (true)")
    )


def downgrade() -> None:
    op.execute(sa.text("DELETE FROM code_interpreter_server"))


================================================
FILE: backend/alembic/versions/0816326d83aa_add_federated_connector_tables.py
================================================
"""add federated connector tables

Revision ID: 0816326d83aa
Revises: 12635f6655b7
Create Date: 2025-06-29 14:09:45.109518

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "0816326d83aa"
down_revision = "12635f6655b7"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create federated_connector table
    op.create_table(
        "federated_connector",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("source", sa.String(), nullable=False),
        sa.Column("credentials", sa.LargeBinary(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
    )

    # Create federated_connector_oauth_token table
    op.create_table(
        "federated_connector_oauth_token",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("federated_connector_id", sa.Integer(), nullable=False),
        sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
        sa.Column("token", sa.LargeBinary(), nullable=False),
        sa.Column("expires_at", sa.DateTime(), nullable=True),
        sa.ForeignKeyConstraint(
            ["federated_connector_id"], ["federated_connector.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
    )

    # Create federated_connector__document_set table
    op.create_table(
        "federated_connector__document_set",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("federated_connector_id", sa.Integer(), nullable=False),
        sa.Column("document_set_id", sa.Integer(), nullable=False),
        sa.Column("entities", postgresql.JSONB(), nullable=False),
        sa.ForeignKeyConstraint(
            ["federated_connector_id"], ["federated_connector.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(
            ["document_set_id"], ["document_set.id"], ondelete="CASCADE"
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint(
            "federated_connector_id",
            "document_set_id",
            name="uq_federated_connector_document_set",
        ),
    )


def downgrade() -> None:
    # Drop tables in reverse order due to foreign key dependencies
    op.drop_table("federated_connector__document_set")
    op.drop_table("federated_connector_oauth_token")
    op.drop_table("federated_connector")


================================================
FILE: backend/alembic/versions/08a1eda20fe1_add_earliest_indexing_to_connector.py
================================================
"""add_indexing_start_to_connector

Revision ID: 08a1eda20fe1
Revises: 8a87bd6ec550
Create Date: 2024-07-23 11:12:39.462397

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "08a1eda20fe1"
down_revision = "8a87bd6ec550"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "connector", sa.Column("indexing_start", sa.DateTime(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("connector", "indexing_start")


================================================
FILE: backend/alembic/versions/09995b8811eb_add_theme_preference_to_user.py
================================================
"""add theme_preference to user

Revision ID: 09995b8811eb
Revises: 3d1cca026fe8
Create Date: 2025-10-24 08:58:50.246949

"""

from alembic import op
import sqlalchemy as sa
from onyx.db.enums import ThemePreference


# revision identifiers, used by Alembic.
revision = "09995b8811eb"
down_revision = "3d1cca026fe8"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column(
            "theme_preference",
            sa.Enum(ThemePreference, native_enum=False),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("user", "theme_preference")


================================================
FILE: backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py
================================================
"""Add starter prompts

Revision ID: 0a2b51deb0b8
Revises: 5f4b8568a221
Create Date: 2024-03-02 23:23:49.960309

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "0a2b51deb0b8"
down_revision = "5f4b8568a221"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "persona",
        sa.Column(
            "starter_messages",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("persona", "starter_messages")


================================================
FILE: backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py
================================================
"""Enable Encrypted Fields

Revision ID: 0a98909f2757
Revises: 570282d33c49
Create Date: 2024-05-05 19:30:34.317972

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table
from sqlalchemy.dialects import postgresql
import json

from onyx.utils.encryption import encrypt_string_to_bytes

# revision identifiers, used by Alembic.
revision = "0a98909f2757"
down_revision = "570282d33c49"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    connection = op.get_bind()

    op.alter_column("key_value_store", "value", nullable=True)
    op.add_column(
        "key_value_store",
        sa.Column(
            "encrypted_value",
            sa.LargeBinary,
            nullable=True,
        ),
    )

    # Need a temporary column to translate the JSONB to binary
    op.add_column("credential", sa.Column("temp_column", sa.LargeBinary()))

    creds_table = table(
        "credential",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "credential_json",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=False,
        ),
        sa.Column(
            "temp_column",
            sa.LargeBinary(),
            nullable=False,
        ),
    )

    results = connection.execute(sa.select(creds_table))

    # This uses the MIT encrypt which does not actually encrypt the credentials
    # In other words, this upgrade does not apply the encryption. Porting existing sensitive data
    # and key rotation currently is not supported and will come out in the future
    for row_id, creds, _ in results:
        creds_binary = encrypt_string_to_bytes(json.dumps(creds))
        connection.execute(
            creds_table.update()
            .where(creds_table.c.id == row_id)
            .values(temp_column=creds_binary)
        )

    op.drop_column("credential", "credential_json")
    op.alter_column("credential", "temp_column", new_column_name="credential_json")

    op.add_column("llm_provider", sa.Column("temp_column", sa.LargeBinary()))

    llm_table = table(
        "llm_provider",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "api_key",
            sa.String(),
            nullable=False,
        ),
        sa.Column(
            "temp_column",
            sa.LargeBinary(),
            nullable=False,
        ),
    )
    results = connection.execute(sa.select(llm_table))

    for row_id, api_key, _ in results:
        llm_key = encrypt_string_to_bytes(api_key)
        connection.execute(
            llm_table.update()
            .where(llm_table.c.id == row_id)
            .values(temp_column=llm_key)
        )

    op.drop_column("llm_provider", "api_key")
    op.alter_column("llm_provider", "temp_column", new_column_name="api_key")


def downgrade() -> None:
    # Some information loss but this is ok. Should not allow decryption via downgrade.
    op.drop_column("credential", "credential_json")
    op.drop_column("llm_provider", "api_key")

    op.add_column("llm_provider", sa.Column("api_key", sa.String()))
    op.add_column(
        "credential",
        sa.Column("credential_json", postgresql.JSONB(astext_type=sa.Text())),
    )

    op.execute("DELETE FROM key_value_store WHERE value IS NULL")
    op.alter_column("key_value_store", "value", nullable=False)
    op.drop_column("key_value_store", "encrypted_value")


================================================
FILE: backend/alembic/versions/0bb4558f35df_add_scim_username_to_scim_user_mapping.py
================================================
"""add scim_username to scim_user_mapping

Revision ID: 0bb4558f35df
Revises: 631fd2504136
Create Date: 2026-02-20 10:45:30.340188

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "0bb4558f35df"
down_revision = "631fd2504136"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "scim_user_mapping",
        sa.Column("scim_username", sa.String(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("scim_user_mapping", "scim_username")


================================================
FILE: backend/alembic/versions/0cd424f32b1d_user_file_data_preparation_and_backfill.py
================================================
"""Migration 2: User file data preparation and backfill

Revision ID: 0cd424f32b1d
Revises: 9b66d3156fc6
Create Date: 2025-09-22 09:44:42.727034

This migration populates the new columns added in migration 1.
It prepares data for the UUID transition and relationship migration.
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
import logging

logger = logging.getLogger("alembic.runtime.migration")

# revision identifiers, used by Alembic.
revision = "0cd424f32b1d"
down_revision = "9b66d3156fc6"
branch_labels = None
depends_on = None


def upgrade() -> None:
    """Populate new columns with data."""

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    # === Step 1: Populate user_file.new_id ===
    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
    has_new_id = "new_id" in user_file_columns

    if has_new_id:
        logger.info("Populating user_file.new_id with UUIDs...")

        # Count rows needing UUIDs
        null_count = bind.execute(
            text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
        ).scalar_one()

        if null_count > 0:
            logger.info(f"Generating UUIDs for {null_count} user_file records...")

            # Populate in batches to avoid long locks
            batch_size = 10000
            total_updated = 0

            while True:
                result = bind.execute(
                    text(
                        """
                    UPDATE user_file
                    SET new_id = gen_random_uuid()
                    WHERE new_id IS NULL
                    AND id IN (
                        SELECT id FROM user_file
                        WHERE new_id IS NULL
                        LIMIT :batch_size
                    )
                """
                    ),
                    {"batch_size": batch_size},
                )

                updated = result.rowcount
                total_updated += updated

                if updated < batch_size:
                    break

                logger.info(f"  Updated {total_updated}/{null_count} records...")

            logger.info(f"Generated UUIDs for {total_updated} user_file records")

        # Verify all records have UUIDs
        remaining_null = bind.execute(
            text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
        ).scalar_one()

        if remaining_null > 0:
            raise Exception(
                f"Failed to populate all user_file.new_id values ({remaining_null} NULL)"
            )

        # Lock down the column
        op.alter_column("user_file", "new_id", nullable=False)
        op.alter_column("user_file", "new_id", server_default=None)
        logger.info("Locked down user_file.new_id column")

    # === Step 2: Populate persona__user_file.user_file_id_uuid ===
    persona_user_file_columns = [
        col["name"] for col in inspector.get_columns("persona__user_file")
    ]

    if has_new_id and "user_file_id_uuid" in persona_user_file_columns:
        logger.info("Populating persona__user_file.user_file_id_uuid...")

        # Count rows needing update
        null_count = bind.execute(
            text(
                """
            SELECT COUNT(*) FROM persona__user_file
            WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
        """
            )
        ).scalar_one()

        if null_count > 0:
            logger.info(f"Updating {null_count} persona__user_file records...")

            # Update in batches
            batch_size = 10000
            total_updated = 0

            while True:
                result = bind.execute(
                    text(
                        """
                    UPDATE persona__user_file p
                    SET user_file_id_uuid = uf.new_id
                    FROM user_file uf
                    WHERE p.user_file_id = uf.id
                    AND p.user_file_id_uuid IS NULL
                    AND p.persona_id IN (
                        SELECT persona_id
                        FROM persona__user_file
                        WHERE user_file_id_uuid IS NULL
                        LIMIT :batch_size
                    )
                """
                    ),
                    {"batch_size": batch_size},
                )

                updated = result.rowcount
                total_updated += updated

                if updated < batch_size:
                    break

                logger.info(f"  Updated {total_updated}/{null_count} records...")

            logger.info(f"Updated {total_updated} persona__user_file records")

        # Verify all records are populated
        remaining_null = bind.execute(
            text(
                """
            SELECT COUNT(*) FROM persona__user_file
            WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
        """
            )
        ).scalar_one()

        if remaining_null > 0:
            raise Exception(
                f"Failed to populate all persona__user_file.user_file_id_uuid values ({remaining_null} NULL)"
            )

        op.alter_column("persona__user_file", "user_file_id_uuid", nullable=False)
        logger.info("Locked down persona__user_file.user_file_id_uuid column")

    # === Step 3: Create user_project records from chat_folder ===
    if "chat_folder" in inspector.get_table_names():
        logger.info("Creating user_project records from chat_folder...")

        result = bind.execute(
            text(
                """
            INSERT INTO user_project (user_id, name)
            SELECT cf.user_id, cf.name
            FROM chat_folder cf
            WHERE NOT EXISTS (
                SELECT 1
                FROM user_project up
                WHERE up.user_id = cf.user_id AND up.name = cf.name
            )
        """
            )
        )

        logger.info(f"Created {result.rowcount} user_project records from chat_folder")

    # === Step 4: Populate chat_session.project_id ===
    chat_session_columns = [
        col["name"] for col in inspector.get_columns("chat_session")
    ]

    if "folder_id" in chat_session_columns and "project_id" in chat_session_columns:
        logger.info("Populating chat_session.project_id...")

        # Count sessions needing update
        null_count = bind.execute(
            text(
                """
            SELECT COUNT(*) FROM chat_session
            WHERE project_id IS NULL AND folder_id IS NOT NULL
        """
            )
        ).scalar_one()

        if null_count > 0:
            logger.info(f"Updating {null_count} chat_session records...")

            result = bind.execute(
                text(
                    """
                UPDATE chat_session cs
                SET project_id = up.id
                FROM chat_folder cf
                JOIN user_project up ON up.user_id = cf.user_id AND up.name = cf.name
                WHERE cs.folder_id = cf.id AND cs.project_id IS NULL
            """
                )
            )

            logger.info(f"Updated {result.rowcount} chat_session records")

        # Verify all records are populated
        remaining_null = bind.execute(
            text(
                """
            SELECT COUNT(*) FROM chat_session
            WHERE project_id IS NULL AND folder_id IS NOT NULL
        """
            )
        ).scalar_one()

        if remaining_null > 0:
            logger.warning(
                f"Warning: {remaining_null} chat_session records could not be mapped to projects"
            )

    # === Step 5: Update plaintext FileRecord IDs/display names to UUID scheme ===
    # Prior to UUID migration, plaintext cache files were stored with file_id like 'plain_text_<int_id>'.
    # After migration, we use 'plaintext_<uuid>' (note the name change to 'plaintext_').
    # This step remaps existing FileRecord rows to the new naming while preserving object_key/bucket.
    logger.info("Updating plaintext FileRecord ids and display names to UUID scheme...")

    # Count legacy plaintext records that can be mapped to UUID user_file ids
    count_query = text(
        """
        SELECT COUNT(*)
        FROM file_record fr
        JOIN user_file uf ON fr.file_id = CONCAT('plaintext_', uf.id::text)
        WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
        """
    )
    legacy_count = bind.execute(count_query).scalar_one()

    if legacy_count and legacy_count > 0:
        logger.info(f"Found {legacy_count} legacy plaintext file records to update")

        # Update display_name first for readability (safe regardless of rename)
        bind.execute(
            text(
                """
                UPDATE file_record fr
                SET display_name = CONCAT('Plaintext for user file ', uf.new_id::text)
                FROM user_file uf
                WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
                    AND fr.file_id = CONCAT('plaintext_', uf.id::text)
                """
            )
        )

        # Remap file_id from 'plaintext_<int>' -> 'plaintext_<uuid>' using transitional new_id
        # Use a single UPDATE ... WHERE file_id LIKE 'plain_text_%'
        # and ensure it aligns to existing user_file ids to avoid renaming unrelated rows
        result = bind.execute(
            text(
                """
                UPDATE file_record fr
                SET file_id = CONCAT('plaintext_', uf.new_id::text)
                FROM user_file uf
                WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
                    AND fr.file_id = CONCAT('plaintext_', uf.id::text)
                """
            )
        )
        logger.info(
            f"Updated {result.rowcount} plaintext file_record ids to UUID scheme"
        )

    # === Step 6: Ensure document_id_migrated default TRUE and backfill existing FALSE ===
    # New records should default to migrated=True so the migration task won't run for them.
    # Existing rows that had a legacy document_id should be marked as not migrated to be processed.

    # Backfill existing records: if document_id is not null, set to FALSE
    bind.execute(
        text(
            """
            UPDATE user_file
            SET document_id_migrated = FALSE
            WHERE document_id IS NOT NULL
            """
        )
    )

    # === Step 7: Backfill user_file.status from index_attempt ===
    logger.info("Backfilling user_file.status from index_attempt...")

    # Update user_file status based on latest index attempt
    # Using CTEs instead of temp tables for asyncpg compatibility
    result = bind.execute(
        text(
            """
        WITH latest_attempt AS (
            SELECT DISTINCT ON (ia.connector_credential_pair_id)
                ia.connector_credential_pair_id,
                ia.status
            FROM index_attempt ia
            ORDER BY ia.connector_credential_pair_id, ia.time_updated DESC
        ),
        uf_to_ccp AS (
            SELECT DISTINCT uf.id AS uf_id, ccp.id AS cc_pair_id
            FROM user_file uf
            JOIN document_by_connector_credential_pair dcc
                ON dcc.id = REPLACE(uf.document_id, 'USER_FILE_CONNECTOR__', 'FILE_CONNECTOR__')
            JOIN connector_credential_pair ccp
                ON ccp.connector_id = dcc.connector_id
                AND ccp.credential_id = dcc.credential_id
        )
        UPDATE user_file uf
        SET status = CASE
            WHEN la.status IN ('NOT_STARTED', 'IN_PROGRESS') THEN 'PROCESSING'
            WHEN la.status = 'SUCCESS' THEN 'COMPLETED'
            ELSE 'FAILED'
        END
        FROM uf_to_ccp ufc
        LEFT JOIN latest_attempt la
            ON la.connector_credential_pair_id = ufc.cc_pair_id
        WHERE uf.id = ufc.uf_id
        AND uf.status = 'PROCESSING'
    """
        )
    )

    logger.info(f"Updated status for {result.rowcount} user_file records")

    logger.info("Migration 2 (data preparation) completed successfully")


def downgrade() -> None:
    """Reset populated data to allow clean downgrade of schema."""

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    logger.info("Starting downgrade of data preparation...")

    # Reset user_file columns to allow nulls before data removal
    if "user_file" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("user_file")]

        if "new_id" in columns:
            op.alter_column(
                "user_file",
                "new_id",
                nullable=True,
                server_default=sa.text("gen_random_uuid()"),
            )
            # Optionally clear the data
            # bind.execute(text("UPDATE user_file SET new_id = NULL"))
            logger.info("Reset user_file.new_id to nullable")

    # Reset persona__user_file.user_file_id_uuid
    if "persona__user_file" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("persona__user_file")]

        if "user_file_id_uuid" in columns:
            op.alter_column("persona__user_file", "user_file_id_uuid", nullable=True)
            # Optionally clear the data
            # bind.execute(text("UPDATE persona__user_file SET user_file_id_uuid = NULL"))
            logger.info("Reset persona__user_file.user_file_id_uuid to nullable")

    # Note: We don't delete user_project records or reset chat_session.project_id
    # as these might be in use and can be handled by the schema downgrade

    # Reset user_file.status to default
    if "user_file" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("user_file")]
        if "status" in columns:
            bind.execute(text("UPDATE user_file SET status = 'PROCESSING'"))
            logger.info("Reset user_file.status to default")

    logger.info("Downgrade completed successfully")


================================================
FILE: backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py
================================================
"""add ccpair deletion failure message

Revision ID: 0ebb1d516877
Revises: 52a219fb5233
Create Date: 2024-09-10 15:03:48.233926

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "0ebb1d516877"
down_revision = "52a219fb5233"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column("deletion_failure_message", sa.String(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("connector_credential_pair", "deletion_failure_message")


================================================
FILE: backend/alembic/versions/0f7ff6d75b57_add_index_to_index_attempt_time_created.py
================================================
"""add index to index_attempt.time_created

Revision ID: 0f7ff6d75b57
Revises: 369644546676
Create Date: 2025-01-10 14:01:14.067144

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "0f7ff6d75b57"
down_revision = "fec3db967bf7"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_index(
        op.f("ix_index_attempt_status"),
        "index_attempt",
        ["status"],
        unique=False,
    )

    op.create_index(
        op.f("ix_index_attempt_time_created"),
        "index_attempt",
        ["time_created"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(op.f("ix_index_attempt_time_created"), table_name="index_attempt")

    op.drop_index(op.f("ix_index_attempt_status"), table_name="index_attempt")


================================================
FILE: backend/alembic/versions/114a638452db_add_default_app_mode_to_user.py
================================================
"""add default_app_mode to user

Revision ID: 114a638452db
Revises: feead2911109
Create Date: 2026-02-09 18:57:08.274640

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "114a638452db"
down_revision = "feead2911109"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column(
            "default_app_mode",
            sa.String(),
            nullable=False,
            server_default="CHAT",
        ),
    )


def downgrade() -> None:
    op.drop_column("user", "default_app_mode")


================================================
FILE: backend/alembic/versions/12635f6655b7_drive_canonical_ids.py
================================================
"""drive-canonical-ids

Revision ID: 12635f6655b7
Revises: 58c50ef19f08
Create Date: 2025-06-20 14:44:54.241159

"""

from alembic import op
import sqlalchemy as sa
from urllib.parse import urlparse, urlunparse
from httpx import HTTPStatusError
import httpx
from onyx.db.search_settings import SearchSettings
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.shared_utils.utils import (
    replace_invalid_doc_id_characters,
)
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.utils.logger import setup_logger
import os

logger = setup_logger()

# revision identifiers, used by Alembic.
revision = "12635f6655b7"
down_revision = "58c50ef19f08"
branch_labels = None
depends_on = None

SKIP_CANON_DRIVE_IDS = os.environ.get("SKIP_CANON_DRIVE_IDS", "true").lower() == "true"


def active_search_settings() -> tuple[SearchSettings, SearchSettings | None]:
    result = op.get_bind().execute(
        sa.text(
            """
        SELECT * FROM search_settings WHERE status = 'PRESENT' ORDER BY id DESC LIMIT 1
        """
        )
    )
    search_settings_fetch = result.fetchall()
    search_settings = (
        SearchSettings(**search_settings_fetch[0]._asdict())
        if search_settings_fetch
        else None
    )

    result2 = op.get_bind().execute(
        sa.text(
            """
        SELECT * FROM search_settings WHERE status = 'FUTURE' ORDER BY id DESC LIMIT 1
        """
        )
    )
    search_settings_future_fetch = result2.fetchall()
    search_settings_future = (
        SearchSettings(**search_settings_future_fetch[0]._asdict())
        if search_settings_future_fetch
        else None
    )

    if not isinstance(search_settings, SearchSettings):
        raise RuntimeError(
            "current search settings is of type " + str(type(search_settings))
        )
    if (
        not isinstance(search_settings_future, SearchSettings)
        and search_settings_future is not None
    ):
        raise RuntimeError(
            "future search settings is of type " + str(type(search_settings_future))
        )

    return search_settings, search_settings_future


def normalize_google_drive_url(url: str) -> str:
    """Remove query parameters from Google Drive URLs to create canonical document IDs.
    NOTE: copied from drive doc_conversion.py
    """
    parsed_url = urlparse(url)
    parsed_url = parsed_url._replace(query="")
    spl_path = parsed_url.path.split("/")
    if spl_path and (spl_path[-1] in ["edit", "view", "preview"]):
        spl_path.pop()
        parsed_url = parsed_url._replace(path="/".join(spl_path))
    # Remove query parameters and reconstruct URL
    return urlunparse(parsed_url)


def get_google_drive_documents_from_database() -> list[dict]:
    """Get all Google Drive documents from the database."""
    bind = op.get_bind()
    result = bind.execute(
        sa.text(
            """
            SELECT d.id
            FROM document d
            JOIN document_by_connector_credential_pair dcc ON d.id = dcc.id
            JOIN connector_credential_pair cc ON dcc.connector_id = cc.connector_id
                AND dcc.credential_id = cc.credential_id
            JOIN connector c ON cc.connector_id = c.id
            WHERE c.source = 'GOOGLE_DRIVE'
        """
        )
    )

    documents = []
    for row in result:
        documents.append({"document_id": row.id})

    return documents


def update_document_id_in_database(
    old_doc_id: str, new_doc_id: str, index_name: str
) -> None:
    """Update document IDs in all relevant database tables using copy-and-swap approach."""
    bind = op.get_bind()

    # print(f"Updating database tables for document {old_doc_id} -> {new_doc_id}")

    # Check if new document ID already exists
    result = bind.execute(
        sa.text("SELECT COUNT(*) FROM document WHERE id = :new_id"),
        {"new_id": new_doc_id},
    )
    row = result.fetchone()
    if row and row[0] > 0:
        # print(f"Document with ID {new_doc_id} already exists, deleting old one")
        delete_document_from_db(old_doc_id, index_name)
        return

    # Step 1: Create a new document row with the new ID (copy all fields from old row)
    # Use a conservative approach to handle columns that might not exist in all installations
    try:
        bind.execute(
            sa.text(
                """
                INSERT INTO document (id, from_ingestion_api, boost, hidden, semantic_id,
                                    link, doc_updated_at, primary_owners, secondary_owners,
                                    external_user_emails, external_user_group_ids, is_public,
                                    chunk_count, last_modified, last_synced, kg_stage, kg_processing_time)
                SELECT :new_id, from_ingestion_api, boost, hidden, semantic_id,
                       link, doc_updated_at, primary_owners, secondary_owners,
                       external_user_emails, external_user_group_ids, is_public,
                       chunk_count, last_modified, last_synced, kg_stage, kg_processing_time
                FROM document
                WHERE id = :old_id
            """
            ),
            {"new_id": new_doc_id, "old_id": old_doc_id},
        )
        # print(f"Successfully updated database tables for document {old_doc_id} -> {new_doc_id}")
    except Exception as e:
        # If the full INSERT fails, try a more basic version with only core columns
        logger.warning(f"Full INSERT failed, trying basic version: {e}")
        bind.execute(
            sa.text(
                """
                INSERT INTO document (id, from_ingestion_api, boost, hidden, semantic_id,
                                    link, doc_updated_at, primary_owners, secondary_owners)
                SELECT :new_id, from_ingestion_api, boost, hidden, semantic_id,
                       link, doc_updated_at, primary_owners, secondary_owners
                FROM document
                WHERE id = :old_id
            """
            ),
            {"new_id": new_doc_id, "old_id": old_doc_id},
        )

    # Step 2: Update all foreign key references to point to the new ID

    # Update document_by_connector_credential_pair table
    bind.execute(
        sa.text(
            "UPDATE document_by_connector_credential_pair SET id = :new_id WHERE id = :old_id"
        ),
        {"new_id": new_doc_id, "old_id": old_doc_id},
    )
    # print(f"Successfully updated document_by_connector_credential_pair table for document {old_doc_id} -> {new_doc_id}")

    # Update search_doc table (stores search results for chat replay)
    # This is critical for agent functionality
    bind.execute(
        sa.text(
            "UPDATE search_doc SET document_id = :new_id WHERE document_id = :old_id"
        ),
        {"new_id": new_doc_id, "old_id": old_doc_id},
    )
    # print(f"Successfully updated search_doc table for document {old_doc_id} -> {new_doc_id}")
    # Update document_retrieval_feedback table (user feedback on documents)
    bind.execute(
        sa.text(
            "UPDATE document_retrieval_feedback SET document_id = :new_id WHERE document_id = :old_id"
        ),
        {"new_id": new_doc_id, "old_id": old_doc_id},
    )
    # print(f"Successfully updated document_retrieval_feedback table for document {old_doc_id} -> {new_doc_id}")
    # Update document__tag table (document-tag relationships)
    bind.execute(
        sa.text(
            "UPDATE document__tag SET document_id = :new_id WHERE document_id = :old_id"
        ),
        {"new_id": new_doc_id, "old_id": old_doc_id},
    )
    # print(f"Successfully updated document__tag table for document {old_doc_id} -> {new_doc_id}")
    # Update user_file table (user uploaded files linked to documents)
    bind.execute(
        sa.text(
            "UPDATE user_file SET document_id = :new_id WHERE document_id = :old_id"
        ),
        {"new_id": new_doc_id, "old_id": old_doc_id},
    )
    # print(f"Successfully updated user_file table for document {old_doc_id} -> {new_doc_id}")
    # Update KG and chunk_stats tables (these may not exist in all installations)
    try:
        # Update kg_entity table
        bind.execute(
            sa.text(
                "UPDATE kg_entity SET document_id = :new_id WHERE document_id = :old_id"
            ),
            {"new_id": new_doc_id, "old_id": old_doc_id},
        )
        # print(f"Successfully updated kg_entity table for document {old_doc_id} -> {new_doc_id}")
        # Update kg_entity_extraction_staging table
        bind.execute(
            sa.text(
                "UPDATE kg_entity_extraction_staging SET document_id = :new_id WHERE document_id = :old_id"
            ),
            {"new_id": new_doc_id, "old_id": old_doc_id},
        )
        # print(f"Successfully updated kg_entity_extraction_staging table for document {old_doc_id} -> {new_doc_id}")
        # Update kg_relationship table
        bind.execute(
            sa.text(
                "UPDATE kg_relationship SET source_document = :new_id WHERE source_document = :old_id"
            ),
            {"new_id": new_doc_id, "old_id": old_doc_id},
        )
        # print(f"Successfully updated kg_relationship table for document {old_doc_id} -> {new_doc_id}")
        # Update kg_relationship_extraction_staging table
        bind.execute(
            sa.text(
                "UPDATE kg_relationship_extraction_staging SET source_document = :new_id WHERE source_document = :old_id"
            ),
            {"new_id": new_doc_id, "old_id": old_doc_id},
        )
        # print(f"Successfully updated kg_relationship_extraction_staging table for document {old_doc_id} -> {new_doc_id}")
        # Update chunk_stats table
        bind.execute(
            sa.text(
                "UPDATE chunk_stats SET document_id = :new_id WHERE document_id = :old_id"
            ),
            {"new_id": new_doc_id, "old_id": old_doc_id},
        )
        # print(f"Successfully updated chunk_stats table for document {old_doc_id} -> {new_doc_id}")
        # Update chunk_stats ID field which includes document_id
        bind.execute(
            sa.text(
                """
                UPDATE chunk_stats
                SET id = REPLACE(id, :old_id, :new_id)
                WHERE id LIKE :old_id_pattern
            """
            ),
            {
                "new_id": new_doc_id,
                "old_id": old_doc_id,
                "old_id_pattern": f"{old_doc_id}__%",
            },
        )
        # print(f"Successfully updated chunk_stats ID field for document {old_doc_id} -> {new_doc_id}")
    except Exception as e:
        logger.warning(f"Some KG/chunk tables may not exist or failed to update: {e}")

    # Step 3: Delete the old document row (this should now be safe since all FKs point to new row)
    bind.execute(
        sa.text("DELETE FROM document WHERE id = :old_id"), {"old_id": old_doc_id}
    )
    # print(f"Successfully deleted document {old_doc_id} from database")


def _visit_chunks(
    *,
    http_client: httpx.Client,
    index_name: str,
    selection: str,
    continuation: str | None = None,
) -> tuple[list[dict], str | None]:
    """Helper that calls the /document/v1 visit API once and returns (docs, next_token)."""

    # Use the same URL as the document API, but with visit-specific params
    base_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)

    params: dict[str, str] = {
        "selection": selection,
        "wantedDocumentCount": "1000",
    }
    if continuation:
        params["continuation"] = continuation

    # print(f"Visiting chunks for selection '{selection}' with params {params}")
    resp = http_client.get(base_url, params=params, timeout=None)
    # print(f"Visited chunks for document {selection}")
    resp.raise_for_status()

    payload = resp.json()
    return payload.get("documents", []), payload.get("continuation")


def delete_document_chunks_from_vespa(index_name: str, doc_id: str) -> None:
    """Delete all chunks for *doc_id* from Vespa using continuation-token paging (no offset)."""

    total_deleted = 0
    # Use exact match instead of contains - Document Selector Language doesn't support contains
    selection = f'{index_name}.document_id=="{doc_id}"'

    with get_vespa_http_client() as http_client:
        continuation: str | None = None
        while True:
            docs, continuation = _visit_chunks(
                http_client=http_client,
                index_name=index_name,
                selection=selection,
                continuation=continuation,
            )

            if not docs:
                break

            for doc in docs:
                vespa_full_id = doc.get("id")
                if not vespa_full_id:
                    continue

                vespa_doc_uuid = vespa_full_id.split("::")[-1]
                delete_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}"

                try:
                    resp = http_client.delete(delete_url)
                    resp.raise_for_status()
                    total_deleted += 1
                except Exception as e:
                    print(f"Failed to delete chunk {vespa_doc_uuid}: {e}")

            if not continuation:
                break


def update_document_id_in_vespa(
    index_name: str, old_doc_id: str, new_doc_id: str
) -> None:
    """Update all chunks' document_id field from *old_doc_id* to *new_doc_id* using continuation paging."""

    clean_new_doc_id = replace_invalid_doc_id_characters(new_doc_id)

    # Use exact match instead of contains - Document Selector Language doesn't support contains
    selection = f'{index_name}.document_id=="{old_doc_id}"'

    with get_vespa_http_client() as http_client:
        continuation: str | None = None
        while True:
            # print(f"Visiting chunks for document {old_doc_id} -> {new_doc_id}")
            docs, continuation = _visit_chunks(
                http_client=http_client,
                index_name=index_name,
                selection=selection,
                continuation=continuation,
            )

            if not docs:
                break

            for doc in docs:
                vespa_full_id = doc.get("id")
                if not vespa_full_id:
                    continue

                vespa_doc_uuid = vespa_full_id.split("::")[-1]
                vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}"

                update_request = {
                    "fields": {"document_id": {"assign": clean_new_doc_id}}
                }

                try:
                    resp = http_client.put(vespa_url, json=update_request)
                    resp.raise_for_status()
                except Exception as e:
                    print(f"Failed to update chunk {vespa_doc_uuid}: {e}")
                    raise

            if not continuation:
                break


def delete_document_from_db(current_doc_id: str, index_name: str) -> None:
    # Delete all foreign key references first, then delete the document
    try:
        bind = op.get_bind()

        # Delete from agent-related tables first (order matters due to foreign keys)
        # Delete from agent__sub_query__search_doc first since it references search_doc
        bind.execute(
            sa.text(
                """
                DELETE FROM agent__sub_query__search_doc
                WHERE search_doc_id IN (
                    SELECT id FROM search_doc WHERE document_id = :doc_id
                )
                """
            ),
            {"doc_id": current_doc_id},
        )

        # Delete from chat_message__search_doc
        bind.execute(
            sa.text(
                """
                DELETE FROM chat_message__search_doc
                WHERE search_doc_id IN (
                    SELECT id FROM search_doc WHERE document_id = :doc_id
                )
                """
            ),
            {"doc_id": current_doc_id},
        )

        # Now we can safely delete from search_doc
        bind.execute(
            sa.text("DELETE FROM search_doc WHERE document_id = :doc_id"),
            {"doc_id": current_doc_id},
        )

        # Delete from document_by_connector_credential_pair
        bind.execute(
            sa.text(
                "DELETE FROM document_by_connector_credential_pair WHERE id = :doc_id"
            ),
            {"doc_id": current_doc_id},
        )

        # Delete from other tables that reference this document
        bind.execute(
            sa.text(
                "DELETE FROM document_retrieval_feedback WHERE document_id = :doc_id"
            ),
            {"doc_id": current_doc_id},
        )

        bind.execute(
            sa.text("DELETE FROM document__tag WHERE document_id = :doc_id"),
            {"doc_id": current_doc_id},
        )

        bind.execute(
            sa.text("DELETE FROM user_file WHERE document_id = :doc_id"),
            {"doc_id": current_doc_id},
        )

        # Delete from KG tables if they exist
        try:
            bind.execute(
                sa.text("DELETE FROM kg_entity WHERE document_id = :doc_id"),
                {"doc_id": current_doc_id},
            )

            bind.execute(
                sa.text(
                    "DELETE FROM kg_entity_extraction_staging WHERE document_id = :doc_id"
                ),
                {"doc_id": current_doc_id},
            )

            bind.execute(
                sa.text("DELETE FROM kg_relationship WHERE source_document = :doc_id"),
                {"doc_id": current_doc_id},
            )

            bind.execute(
                sa.text(
                    "DELETE FROM kg_relationship_extraction_staging WHERE source_document = :doc_id"
                ),
                {"doc_id": current_doc_id},
            )

            bind.execute(
                sa.text("DELETE FROM chunk_stats WHERE document_id = :doc_id"),
                {"doc_id": current_doc_id},
            )

            bind.execute(
                sa.text("DELETE FROM chunk_stats WHERE id LIKE :doc_id_pattern"),
                {"doc_id_pattern": f"{current_doc_id}__%"},
            )

        except Exception as e:
            logger.warning(
                f"Some KG/chunk tables may not exist or failed to delete from: {e}"
            )

        # Finally delete the document itself
        bind.execute(
            sa.text("DELETE FROM document WHERE id = :doc_id"),
            {"doc_id": current_doc_id},
        )

        # Delete chunks from vespa
        delete_document_chunks_from_vespa(index_name, current_doc_id)

    except Exception as e:
        print(f"Failed to delete duplicate document {current_doc_id}: {e}")
        # Continue with other documents instead of failing the entire migration


def upgrade() -> None:
    if SKIP_CANON_DRIVE_IDS:
        return
    current_search_settings, _ = active_search_settings()

    # Get the index name
    if hasattr(current_search_settings, "index_name"):
        index_name = current_search_settings.index_name
    else:
        # Default index name if we can't get it from the document_index
        index_name = "danswer_index"

    # Get all Google Drive documents from the database (this is faster and more reliable)
    gdrive_documents = get_google_drive_documents_from_database()

    if not gdrive_documents:
        return

    # Track normalized document IDs to detect duplicates
    all_normalized_doc_ids = set()
    updated_count = 0

    for doc_info in gdrive_documents:
        current_doc_id = doc_info["document_id"]
        normalized_doc_id = normalize_google_drive_url(current_doc_id)

        print(f"Processing document {current_doc_id} -> {normalized_doc_id}")
        # Check for duplicates
        if normalized_doc_id in all_normalized_doc_ids:
            # print(f"Deleting duplicate document {current_doc_id}")
            delete_document_from_db(current_doc_id, index_name)
            continue

        all_normalized_doc_ids.add(normalized_doc_id)

        # If the document ID already doesn't have query parameters, skip it
        if current_doc_id == normalized_doc_id:
            # print(f"Skipping document {current_doc_id} -> {normalized_doc_id} because it already has no query parameters")
            continue

        try:
            # Update both database and Vespa in order
            # Database first to ensure consistency
            update_document_id_in_database(
                current_doc_id, normalized_doc_id, index_name
            )

            # For Vespa, we can now use the original document IDs since we're using contains matching
            update_document_id_in_vespa(index_name, current_doc_id, normalized_doc_id)
            updated_count += 1
            # print(f"Finished updating document {current_doc_id} -> {normalized_doc_id}")
        except Exception as e:
            print(f"Failed to update document {current_doc_id}: {e}")

            if isinstance(e, HTTPStatusError):
                print(f"HTTPStatusError: {e}")
                print(f"Response: {e.response.text}")
                print(f"Status: {e.response.status_code}")
                print(f"Headers: {e.response.headers}")
                print(f"Request: {e.request.url}")
                print(f"Request headers: {e.request.headers}")
            # Note: Rollback is complex with copy-and-swap approach since the old document is already deleted
            # In case of failure, manual intervention may be required
            # Continue with other documents instead of failing the entire migration
            continue

    logger.info(f"Migration complete. Updated {updated_count} Google Drive documents")


def downgrade() -> None:
    # this is a one way migration, so no downgrade.
    # It wouldn't make sense to store the extra query parameters
    # and duplicate documents to allow a reversal.
    pass


================================================
FILE: backend/alembic/versions/15326fcec57e_introduce_onyx_apis.py
================================================
"""Introduce Onyx APIs

Revision ID: 15326fcec57e
Revises: 77d07dffae64
Create Date: 2023-11-11 20:51:24.228999

"""

from alembic import op
import sqlalchemy as sa

from onyx.configs.constants import DocumentSource

# revision identifiers, used by Alembic.
revision = "15326fcec57e"
down_revision = "77d07dffae64"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.alter_column("credential", "is_admin", new_column_name="admin_public")
    op.add_column(
        "document",
        sa.Column("from_ingestion_api", sa.Boolean(), nullable=True),
    )
    op.alter_column(
        "connector",
        "source",
        type_=sa.String(length=50),
        existing_type=sa.Enum(DocumentSource, native_enum=False),
        existing_nullable=False,
    )


def downgrade() -> None:
    op.drop_column("document", "from_ingestion_api")
    op.alter_column("credential", "admin_public", new_column_name="is_admin")


================================================
FILE: backend/alembic/versions/16c37a30adf2_user_file_relationship_migration.py
================================================
"""Migration 3: User file relationship migration

Revision ID: 16c37a30adf2
Revises: 0cd424f32b1d
Create Date: 2025-09-22 09:47:34.175596

This migration converts folder-based relationships to project-based relationships.
It migrates persona__user_folder to persona__user_file and populates project__user_file.
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
import logging

logger = logging.getLogger("alembic.runtime.migration")

# revision identifiers, used by Alembic.
revision = "16c37a30adf2"
down_revision = "0cd424f32b1d"
branch_labels = None
depends_on = None


def upgrade() -> None:
    """Migrate folder-based relationships to project-based relationships."""

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    # === Step 1: Migrate persona__user_folder to persona__user_file ===
    table_names = inspector.get_table_names()

    if "persona__user_folder" in table_names and "user_file" in table_names:
        user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
        has_new_id = "new_id" in user_file_columns

        if has_new_id and "folder_id" in user_file_columns:
            logger.info(
                "Migrating persona__user_folder relationships to persona__user_file..."
            )

            # Count relationships to migrate (asyncpg-compatible)
            count_query = text(
                """
                SELECT COUNT(*)
                FROM (
                    SELECT DISTINCT puf.persona_id, uf.id
                    FROM persona__user_folder puf
                    JOIN user_file uf ON uf.folder_id = puf.user_folder_id
                    WHERE NOT EXISTS (
                        SELECT 1
                        FROM persona__user_file p2
                        WHERE p2.persona_id = puf.persona_id
                        AND p2.user_file_id = uf.id
                    )
                ) AS distinct_pairs
            """
            )
            to_migrate = bind.execute(count_query).scalar_one()

            if to_migrate > 0:
                logger.info(f"Creating {to_migrate} persona-file relationships...")

                # Migrate in batches to avoid memory issues
                batch_size = 10000
                total_inserted = 0

                while True:
                    # Insert batch directly using subquery (asyncpg compatible)
                    result = bind.execute(
                        text(
                            """
                        INSERT INTO persona__user_file (persona_id, user_file_id, user_file_id_uuid)
                        SELECT DISTINCT puf.persona_id, uf.id as file_id, uf.new_id
                        FROM persona__user_folder puf
                        JOIN user_file uf ON uf.folder_id = puf.user_folder_id
                        WHERE NOT EXISTS (
                            SELECT 1
                            FROM persona__user_file p2
                            WHERE p2.persona_id = puf.persona_id
                            AND p2.user_file_id = uf.id
                        )
                        LIMIT :batch_size
                    """
                        ),
                        {"batch_size": batch_size},
                    )

                    inserted = result.rowcount
                    total_inserted += inserted

                    if inserted < batch_size:
                        break

                    logger.info(
                        f"  Migrated {total_inserted}/{to_migrate} relationships..."
                    )

                logger.info(
                    f"Created {total_inserted} persona__user_file relationships"
                )

    # === Step 2: Add foreign key for chat_session.project_id ===
    chat_session_fks = inspector.get_foreign_keys("chat_session")
    fk_exists = any(
        fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
    )

    if not fk_exists:
        logger.info("Adding foreign key constraint for chat_session.project_id...")
        op.create_foreign_key(
            "fk_chat_session_project_id",
            "chat_session",
            "user_project",
            ["project_id"],
            ["id"],
        )
        logger.info("Added foreign key constraint")

    # === Step 3: Populate project__user_file from user_file.folder_id ===
    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
    has_new_id = "new_id" in user_file_columns

    if has_new_id and "folder_id" in user_file_columns:
        logger.info("Populating project__user_file from folder relationships...")

        # Count relationships to create
        count_query = text(
            """
            SELECT COUNT(*)
            FROM user_file uf
            WHERE uf.folder_id IS NOT NULL
            AND NOT EXISTS (
                SELECT 1
                FROM project__user_file puf
                WHERE puf.project_id = uf.folder_id
                AND puf.user_file_id = uf.new_id
            )
        """
        )
        to_create = bind.execute(count_query).scalar_one()

        if to_create > 0:
            logger.info(f"Creating {to_create} project-file relationships...")

            # Insert in batches
            batch_size = 10000
            total_inserted = 0

            while True:
                result = bind.execute(
                    text(
                        """
                    INSERT INTO project__user_file (project_id, user_file_id)
                    SELECT uf.folder_id, uf.new_id
                    FROM user_file uf
                    WHERE uf.folder_id IS NOT NULL
                    AND NOT EXISTS (
                        SELECT 1
                        FROM project__user_file puf
                        WHERE puf.project_id = uf.folder_id
                        AND puf.user_file_id = uf.new_id
                    )
                    LIMIT :batch_size
                    ON CONFLICT (project_id, user_file_id) DO NOTHING
                """
                    ),
                    {"batch_size": batch_size},
                )

                inserted = result.rowcount
                total_inserted += inserted

                if inserted < batch_size:
                    break

                logger.info(f"  Created {total_inserted}/{to_create} relationships...")

            logger.info(f"Created {total_inserted} project__user_file relationships")

    # === Step 4: Create index on chat_session.project_id ===
    try:
        indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
    except Exception:
        indexes = []

    if "ix_chat_session_project_id" not in indexes:
        logger.info("Creating index on chat_session.project_id...")
        op.create_index(
            "ix_chat_session_project_id", "chat_session", ["project_id"], unique=False
        )
        logger.info("Created index")

    logger.info("Migration 3 (relationship migration) completed successfully")


def downgrade() -> None:
    """Remove migrated relationships and constraints."""

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    logger.info("Starting downgrade of relationship migration...")

    # Drop index on chat_session.project_id
    try:
        indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
        if "ix_chat_session_project_id" in indexes:
            op.drop_index("ix_chat_session_project_id", "chat_session")
            logger.info("Dropped index on chat_session.project_id")
    except Exception:
        pass

    # Drop foreign key constraint
    try:
        chat_session_fks = inspector.get_foreign_keys("chat_session")
        fk_exists = any(
            fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
        )
        if fk_exists:
            op.drop_constraint(
                "fk_chat_session_project_id", "chat_session", type_="foreignkey"
            )
            logger.info("Dropped foreign key constraint on chat_session.project_id")
    except Exception:
        pass

    # Clear project__user_file relationships (but keep the table for migration 1 to handle)
    if "project__user_file" in inspector.get_table_names():
        result = bind.execute(text("DELETE FROM project__user_file"))
        logger.info(f"Cleared {result.rowcount} records from project__user_file")

    # Remove migrated persona__user_file relationships
    # Only remove those that came from folder relationships
    if all(
        table in inspector.get_table_names()
        for table in ["persona__user_file", "persona__user_folder", "user_file"]
    ):
        user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
        if "folder_id" in user_file_columns:
            result = bind.execute(
                text(
                    """
                DELETE FROM persona__user_file puf
                WHERE EXISTS (
                    SELECT 1
                    FROM user_file uf
                    JOIN persona__user_folder puf2
                        ON puf2.user_folder_id = uf.folder_id
                    WHERE puf.persona_id = puf2.persona_id
                    AND puf.user_file_id = uf.id
                )
            """
                )
            )
            logger.info(
                f"Removed {result.rowcount} migrated persona__user_file relationships"
            )

    logger.info("Downgrade completed successfully")


================================================
FILE: backend/alembic/versions/173cae5bba26_port_config_store.py
================================================
"""Port Config Store

Revision ID: 173cae5bba26
Revises: e50154680a5c
Create Date: 2024-03-19 15:30:44.425436

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "173cae5bba26"
down_revision = "e50154680a5c"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "key_value_store",
        sa.Column("key", sa.String(), nullable=False),
        sa.Column("value", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
        sa.PrimaryKeyConstraint("key"),
    )


def downgrade() -> None:
    op.drop_table("key_value_store")


================================================
FILE: backend/alembic/versions/175ea04c7087_add_user_preferences.py
================================================
"""add_user_preferences

Revision ID: 175ea04c7087
Revises: d56ffa94ca32
Create Date: 2026-02-04 18:16:24.830873

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "175ea04c7087"
down_revision = "d56ffa94ca32"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column("user_preferences", sa.Text(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("user", "user_preferences")


================================================
FILE: backend/alembic/versions/177de57c21c9_display_custom_llm_models.py
================================================
"""display custom llm models

Revision ID: 177de57c21c9
Revises: 4ee1287bd26a
Create Date: 2024-11-21 11:49:04.488677

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy import and_

revision = "177de57c21c9"
down_revision = "4ee1287bd26a"
branch_labels = None
depends_on = None
depends_on = None


def upgrade() -> None:
    conn = op.get_bind()
    llm_provider = sa.table(
        "llm_provider",
        sa.column("id", sa.Integer),
        sa.column("provider", sa.String),
        sa.column("model_names", postgresql.ARRAY(sa.String)),
        sa.column("display_model_names", postgresql.ARRAY(sa.String)),
    )

    excluded_providers = ["openai", "bedrock", "anthropic", "azure"]

    providers_to_update = sa.select(
        llm_provider.c.id,
        llm_provider.c.model_names,
        llm_provider.c.display_model_names,
    ).where(
        and_(
            ~llm_provider.c.provider.in_(excluded_providers),
            llm_provider.c.model_names.isnot(None),
        )
    )

    results = conn.execute(providers_to_update).fetchall()

    for provider_id, model_names, display_model_names in results:
        if display_model_names is None:
            display_model_names = []

        combined_model_names = list(set(display_model_names + model_names))
        update_stmt = (
            llm_provider.update()
            .where(llm_provider.c.id == provider_id)
            .values(display_model_names=combined_model_names)
        )
        conn.execute(update_stmt)


def downgrade() -> None:
    pass


================================================
FILE: backend/alembic/versions/18b5b2524446_add_is_clarification_to_chat_message.py
================================================
"""add is_clarification to chat_message

Revision ID: 18b5b2524446
Revises: 87c52ec39f84
Create Date: 2025-01-16

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "18b5b2524446"
down_revision = "87c52ec39f84"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "chat_message",
        sa.Column(
            "is_clarification", sa.Boolean(), nullable=False, server_default="false"
        ),
    )


def downgrade() -> None:
    op.drop_column("chat_message", "is_clarification")


================================================
FILE: backend/alembic/versions/19c0ccb01687_migrate_to_contextual_rag_model.py
================================================
"""Migrate to contextual rag model

Revision ID: 19c0ccb01687
Revises: 9c54986124c6
Create Date: 2026-02-12 11:21:41.798037

"""

import sqlalchemy as sa
from alembic import op


# revision identifiers, used by Alembic.
revision = "19c0ccb01687"
down_revision = "9c54986124c6"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Widen the column to fit 'CONTEXTUAL_RAG' (15 chars); was varchar(10)
    # when the table was created with only CHAT/VISION values.
    op.alter_column(
        "llm_model_flow",
        "llm_model_flow_type",
        type_=sa.String(length=20),
        existing_type=sa.String(length=10),
        existing_nullable=False,
    )

    # For every search_settings row that has contextual rag configured,
    # create an llm_model_flow entry. is_default is TRUE if the row
    # belongs to the PRESENT search settings, FALSE otherwise.
    op.execute(
        """
        INSERT INTO llm_model_flow (llm_model_flow_type, model_configuration_id, is_default)
        SELECT DISTINCT
            'CONTEXTUAL_RAG',
            mc.id,
            (ss.status = 'PRESENT')
        FROM search_settings ss
        JOIN llm_provider lp
            ON lp.name = ss.contextual_rag_llm_provider
        JOIN model_configuration mc
            ON mc.llm_provider_id = lp.id
            AND mc.name = ss.contextual_rag_llm_name
        WHERE ss.enable_contextual_rag = TRUE
            AND ss.contextual_rag_llm_name IS NOT NULL
            AND ss.contextual_rag_llm_provider IS NOT NULL
        ON CONFLICT (llm_model_flow_type, model_configuration_id)
            DO UPDATE SET is_default = EXCLUDED.is_default
            WHERE EXCLUDED.is_default = TRUE
        """
    )


def downgrade() -> None:
    op.execute(
        """
        DELETE FROM llm_model_flow
        WHERE llm_model_flow_type = 'CONTEXTUAL_RAG'
        """
    )

    op.alter_column(
        "llm_model_flow",
        "llm_model_flow_type",
        type_=sa.String(length=10),
        existing_type=sa.String(length=20),
        existing_nullable=False,
    )


================================================
FILE: backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
================================================
"""Add indexes to document__tag

Revision ID: 1a03d2c2856b
Revises: 9c00a2bccb83
Create Date: 2025-02-18 10:45:13.957807

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "1a03d2c2856b"
down_revision = "9c00a2bccb83"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_index(
        op.f("ix_document__tag_tag_id"),
        "document__tag",
        ["tag_id"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(op.f("ix_document__tag_tag_id"), table_name="document__tag")


================================================
FILE: backend/alembic/versions/1b10e1fda030_add_additional_data_to_notifications.py
================================================
"""add additional data to notifications

Revision ID: 1b10e1fda030
Revises: 6756efa39ada
Create Date: 2024-10-15 19:26:44.071259

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "1b10e1fda030"
down_revision = "6756efa39ada"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "notification", sa.Column("additional_data", postgresql.JSONB(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("notification", "additional_data")


================================================
FILE: backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py
================================================
"""add_user_delete_cascades

Revision ID: 1b8206b29c5d
Revises: 35e6853a51d5
Create Date: 2024-09-18 11:48:59.418726

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "1b8206b29c5d"
down_revision = "35e6853a51d5"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_constraint("credential_user_id_fkey", "credential", type_="foreignkey")
    op.create_foreign_key(
        "credential_user_id_fkey",
        "credential",
        "user",
        ["user_id"],
        ["id"],
        ondelete="CASCADE",
    )

    op.drop_constraint("chat_session_user_id_fkey", "chat_session", type_="foreignkey")
    op.create_foreign_key(
        "chat_session_user_id_fkey",
        "chat_session",
        "user",
        ["user_id"],
        ["id"],
        ondelete="CASCADE",
    )

    op.drop_constraint("chat_folder_user_id_fkey", "chat_folder", type_="foreignkey")
    op.create_foreign_key(
        "chat_folder_user_id_fkey",
        "chat_folder",
        "user",
        ["user_id"],
        ["id"],
        ondelete="CASCADE",
    )

    op.drop_constraint("prompt_user_id_fkey", "prompt", type_="foreignkey")
    op.create_foreign_key(
        "prompt_user_id_fkey", "prompt", "user", ["user_id"], ["id"], ondelete="CASCADE"
    )

    op.drop_constraint("notification_user_id_fkey", "notification", type_="foreignkey")
    op.create_foreign_key(
        "notification_user_id_fkey",
        "notification",
        "user",
        ["user_id"],
        ["id"],
        ondelete="CASCADE",
    )

    op.drop_constraint("inputprompt_user_id_fkey", "inputprompt", type_="foreignkey")
    op.create_foreign_key(
        "inputprompt_user_id_fkey",
        "inputprompt",
        "user",
        ["user_id"],
        ["id"],
        ondelete="CASCADE",
    )


def downgrade() -> None:
    op.drop_constraint("credential_user_id_fkey", "credential", type_="foreignkey")
    op.create_foreign_key(
        "credential_user_id_fkey", "credential", "user", ["user_id"], ["id"]
    )

    op.drop_constraint("chat_session_user_id_fkey", "chat_session", type_="foreignkey")
    op.create_foreign_key(
        "chat_session_user_id_fkey", "chat_session", "user", ["user_id"], ["id"]
    )

    op.drop_constraint("chat_folder_user_id_fkey", "chat_folder", type_="foreignkey")
    op.create_foreign_key(
        "chat_folder_user_id_fkey", "chat_folder", "user", ["user_id"], ["id"]
    )

    op.drop_constraint("prompt_user_id_fkey", "prompt", type_="foreignkey")
    op.create_foreign_key("prompt_user_id_fkey", "prompt", "user", ["user_id"], ["id"])

    op.drop_constraint("notification_user_id_fkey", "notification", type_="foreignkey")
    op.create_foreign_key(
        "notification_user_id_fkey", "notification", "user", ["user_id"], ["id"]
    )

    op.drop_constraint("inputprompt_user_id_fkey", "inputprompt", type_="foreignkey")
    op.create_foreign_key(
        "inputprompt_user_id_fkey", "inputprompt", "user", ["user_id"], ["id"]
    )


================================================
FILE: backend/alembic/versions/1d78c0ca7853_remove_voice_provider_deleted_column.py
================================================
"""remove voice_provider deleted column

Revision ID: 1d78c0ca7853
Revises: a3f8b2c1d4e5
Create Date: 2026-03-26 11:30:53.883127

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "1d78c0ca7853"
down_revision = "a3f8b2c1d4e5"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Hard-delete any soft-deleted rows before dropping the column
    op.execute("DELETE FROM voice_provider WHERE deleted = true")
    op.drop_column("voice_provider", "deleted")


def downgrade() -> None:
    op.add_column(
        "voice_provider",
        sa.Column(
            "deleted",
            sa.Boolean(),
            nullable=False,
            server_default=sa.text("false"),
        ),
    )


================================================
FILE: backend/alembic/versions/1f2a3b4c5d6e_add_internet_search_and_content_providers.py
================================================
"""add internet search and content provider tables

Revision ID: 1f2a3b4c5d6e
Revises: 9drpiiw74ljy
Create Date: 2025-11-10 19:45:00.000000

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "1f2a3b4c5d6e"
down_revision = "9drpiiw74ljy"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "internet_search_provider",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("name", sa.String(), nullable=False, unique=True),
        sa.Column("provider_type", sa.String(), nullable=False),
        sa.Column("api_key", sa.LargeBinary(), nullable=True),
        sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
        sa.Column(
            "is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
        ),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.text("now()"),
        ),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.text("now()"),
        ),
    )
    op.create_index(
        "ix_internet_search_provider_is_active",
        "internet_search_provider",
        ["is_active"],
    )

    op.create_table(
        "internet_content_provider",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("name", sa.String(), nullable=False, unique=True),
        sa.Column("provider_type", sa.String(), nullable=False),
        sa.Column("api_key", sa.LargeBinary(), nullable=True),
        sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
        sa.Column(
            "is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
        ),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.text("now()"),
        ),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.text("now()"),
        ),
    )
    op.create_index(
        "ix_internet_content_provider_is_active",
        "internet_content_provider",
        ["is_active"],
    )


def downgrade() -> None:
    op.drop_index(
        "ix_internet_content_provider_is_active", table_name="internet_content_provider"
    )
    op.drop_table("internet_content_provider")
    op.drop_index(
        "ix_internet_search_provider_is_active", table_name="internet_search_provider"
    )
    op.drop_table("internet_search_provider")


================================================
FILE: backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py
================================================
"""embedding model -> search settings

Revision ID: 1f60f60c3401
Revises: f17bf3b0d9f1
Create Date: 2024-08-25 12:39:51.731632

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "1f60f60c3401"
down_revision = "f17bf3b0d9f1"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.drop_constraint(
        "index_attempt__embedding_model_fk", "index_attempt", type_="foreignkey"
    )
    # Rename the table
    op.rename_table("embedding_model", "search_settings")

    # Add new columns
    op.add_column(
        "search_settings",
        sa.Column(
            "multipass_indexing", sa.Boolean(), nullable=False, server_default="false"
        ),
    )
    op.add_column(
        "search_settings",
        sa.Column(
            "multilingual_expansion",
            postgresql.ARRAY(sa.String()),
            nullable=False,
            server_default="{}",
        ),
    )
    op.add_column(
        "search_settings",
        sa.Column(
            "disable_rerank_for_streaming",
            sa.Boolean(),
            nullable=False,
            server_default="false",
        ),
    )
    op.add_column(
        "search_settings", sa.Column("rerank_model_name", sa.String(), nullable=True)
    )
    op.add_column(
        "search_settings", sa.Column("rerank_provider_type", sa.String(), nullable=True)
    )
    op.add_column(
        "search_settings", sa.Column("rerank_api_key", sa.String(), nullable=True)
    )
    op.add_column(
        "search_settings",
        sa.Column(
            "num_rerank",
            sa.Integer(),
            nullable=False,
            server_default=str(20),
        ),
    )

    # Add the new column as nullable initially
    op.add_column(
        "index_attempt", sa.Column("search_settings_id", sa.Integer(), nullable=True)
    )

    # Populate the new column with data from the existing embedding_model_id
    op.execute("UPDATE index_attempt SET search_settings_id = embedding_model_id")

    # Create the foreign key constraint
    op.create_foreign_key(
        "fk_index_attempt_search_settings",
        "index_attempt",
        "search_settings",
        ["search_settings_id"],
        ["id"],
    )

    # Make the new column non-nullable
    op.alter_column("index_attempt", "search_settings_id", nullable=False)

    # Drop the old embedding_model_id column
    op.drop_column("index_attempt", "embedding_model_id")


def downgrade() -> None:
    # Add back the embedding_model_id column
    op.add_column(
        "index_attempt", sa.Column("embedding_model_id", sa.Integer(), nullable=True)
    )

    # Populate the old column with data from search_settings_id
    op.execute("UPDATE index_attempt SET embedding_model_id = search_settings_id")

    # Make the old column non-nullable
    op.alter_column("index_attempt", "embedding_model_id", nullable=False)

    # Drop the foreign key constraint
    op.drop_constraint(
        "fk_index_attempt_search_settings", "index_attempt", type_="foreignkey"
    )

    # Drop the new search_settings_id column
    op.drop_column("index_attempt", "search_settings_id")

    # Rename the table back
    op.rename_table("search_settings", "embedding_model")

    # Remove added columns
    op.drop_column("embedding_model", "num_rerank")
    op.drop_column("embedding_model", "rerank_api_key")
    op.drop_column("embedding_model", "rerank_provider_type")
    op.drop_column("embedding_model", "rerank_model_name")
    op.drop_column("embedding_model", "disable_rerank_for_streaming")
    op.drop_column("embedding_model", "multilingual_expansion")
    op.drop_column("embedding_model", "multipass_indexing")

    op.create_foreign_key(
        "index_attempt__embedding_model_fk",
        "index_attempt",
        "embedding_model",
        ["embedding_model_id"],
        ["id"],
    )


================================================
FILE: backend/alembic/versions/2020d417ec84_single_onyx_craft_migration.py
================================================
"""single onyx craft migration

Consolidates all buildmode/onyx craft tables into a single migration.

Tables created:
- build_session: User build sessions with status tracking
- sandbox: User-owned containerized environments (one per user)
- artifact: Build output files (web apps, documents, images)
- snapshot: Sandbox filesystem snapshots
- build_message: Conversation messages for build sessions

Existing table modified:
- connector_credential_pair: Added processing_mode column

Revision ID: 2020d417ec84
Revises: 41fa44bef321
Create Date: 2026-01-26 14:43:54.641405

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "2020d417ec84"
down_revision = "41fa44bef321"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # ==========================================================================
    # ENUMS
    # ==========================================================================

    # Build session status enum
    build_session_status_enum = sa.Enum(
        "active",
        "idle",
        name="buildsessionstatus",
        native_enum=False,
    )

    # Sandbox status enum
    sandbox_status_enum = sa.Enum(
        "provisioning",
        "running",
        "idle",
        "sleeping",
        "terminated",
        "failed",
        name="sandboxstatus",
        native_enum=False,
    )

    # Artifact type enum
    artifact_type_enum = sa.Enum(
        "web_app",
        "pptx",
        "docx",
        "markdown",
        "excel",
        "image",
        name="artifacttype",
        native_enum=False,
    )

    # ==========================================================================
    # BUILD_SESSION TABLE
    # ==========================================================================

    op.create_table(
        "build_session",
        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
        sa.Column(
            "user_id",
            postgresql.UUID(as_uuid=True),
            sa.ForeignKey("user.id", ondelete="CASCADE"),
            nullable=True,
        ),
        sa.Column("name", sa.String(), nullable=True),
        sa.Column(
            "status",
            build_session_status_enum,
            nullable=False,
            server_default="active",
        ),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "last_activity_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("nextjs_port", sa.Integer(), nullable=True),
        sa.PrimaryKeyConstraint("id"),
    )

    op.create_index(
        "ix_build_session_user_created",
        "build_session",
        ["user_id", sa.text("created_at DESC")],
        unique=False,
    )
    op.create_index(
        "ix_build_session_status",
        "build_session",
        ["status"],
        unique=False,
    )

    # ==========================================================================
    # SANDBOX TABLE (user-owned, one per user)
    # ==========================================================================

    op.create_table(
        "sandbox",
        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
        sa.Column(
            "user_id",
            postgresql.UUID(as_uuid=True),
            sa.ForeignKey("user.id", ondelete="CASCADE"),
            nullable=False,
        ),
        sa.Column("container_id", sa.String(), nullable=True),
        sa.Column(
            "status",
            sandbox_status_enum,
            nullable=False,
            server_default="provisioning",
        ),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("last_heartbeat", sa.DateTime(timezone=True), nullable=True),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("user_id", name="sandbox_user_id_key"),
    )

    op.create_index(
        "ix_sandbox_status",
        "sandbox",
        ["status"],
        unique=False,
    )
    op.create_index(
        "ix_sandbox_container_id",
        "sandbox",
        ["container_id"],
        unique=False,
    )

    # ==========================================================================
    # ARTIFACT TABLE
    # ==========================================================================

    op.create_table(
        "artifact",
        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
        sa.Column(
            "session_id",
            postgresql.UUID(as_uuid=True),
            sa.ForeignKey("build_session.id", ondelete="CASCADE"),
            nullable=False,
        ),
        sa.Column("type", artifact_type_enum, nullable=False),
        sa.Column("path", sa.String(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
    )

    op.create_index(
        "ix_artifact_session_created",
        "artifact",
        ["session_id", sa.text("created_at DESC")],
        unique=False,
    )
    op.create_index(
        "ix_artifact_type",
        "artifact",
        ["type"],
        unique=False,
    )

    # ==========================================================================
    # SNAPSHOT TABLE
    # ==========================================================================

    op.create_table(
        "snapshot",
        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
        sa.Column(
            "session_id",
            postgresql.UUID(as_uuid=True),
            sa.ForeignKey("build_session.id", ondelete="CASCADE"),
            nullable=False,
        ),
        sa.Column("storage_path", sa.String(), nullable=False),
        sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
    )

    op.create_index(
        "ix_snapshot_session_created",
        "snapshot",
        ["session_id", sa.text("created_at DESC")],
        unique=False,
    )

    # ==========================================================================
    # BUILD_MESSAGE TABLE
    # ==========================================================================

    op.create_table(
        "build_message",
        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
        sa.Column(
            "session_id",
            postgresql.UUID(as_uuid=True),
            sa.ForeignKey("build_session.id", ondelete="CASCADE"),
            nullable=False,
        ),
        sa.Column(
            "turn_index",
            sa.Integer(),
            nullable=False,
        ),
        sa.Column(
            "type",
            sa.Enum(
                "SYSTEM",
                "USER",
                "ASSISTANT",
                "DANSWER",
                name="messagetype",
                create_type=False,
                native_enum=False,
            ),
            nullable=False,
        ),
        sa.Column(
            "message_metadata",
            postgresql.JSONB(),
            nullable=False,
        ),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
    )

    op.create_index(
        "ix_build_message_session_turn",
        "build_message",
        ["session_id", "turn_index", sa.text("created_at ASC")],
        unique=False,
    )

    # ==========================================================================
    # CONNECTOR_CREDENTIAL_PAIR MODIFICATION
    # ==========================================================================

    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "processing_mode",
            sa.String(),
            nullable=False,
            server_default="regular",
        ),
    )


def downgrade() -> None:
    # ==========================================================================
    # CONNECTOR_CREDENTIAL_PAIR MODIFICATION
    # ==========================================================================

    op.drop_column("connector_credential_pair", "processing_mode")

    # ==========================================================================
    # BUILD_MESSAGE TABLE
    # ==========================================================================

    op.drop_index("ix_build_message_session_turn", table_name="build_message")
    op.drop_table("build_message")

    # ==========================================================================
    # SNAPSHOT TABLE
    # ==========================================================================

    op.drop_index("ix_snapshot_session_created", table_name="snapshot")
    op.drop_table("snapshot")

    # ==========================================================================
    # ARTIFACT TABLE
    # ==========================================================================

    op.drop_index("ix_artifact_type", table_name="artifact")
    op.drop_index("ix_artifact_session_created", table_name="artifact")
    op.drop_table("artifact")
    sa.Enum(name="artifacttype").drop(op.get_bind(), checkfirst=True)

    # ==========================================================================
    # SANDBOX TABLE
    # ==========================================================================

    op.drop_index("ix_sandbox_container_id", table_name="sandbox")
    op.drop_index("ix_sandbox_status", table_name="sandbox")
    op.drop_table("sandbox")
    sa.Enum(name="sandboxstatus").drop(op.get_bind(), checkfirst=True)

    # ==========================================================================
    # BUILD_SESSION TABLE
    # ==========================================================================

    op.drop_index("ix_build_session_status", table_name="build_session")
    op.drop_index("ix_build_session_user_created", table_name="build_session")
    op.drop_table("build_session")
    sa.Enum(name="buildsessionstatus").drop(op.get_bind(), checkfirst=True)


================================================
FILE: backend/alembic/versions/213fd978c6d8_notifications.py
================================================
"""notifications

Revision ID: 213fd978c6d8
Revises: 5fc1f54cc252
Create Date: 2024-08-10 11:13:36.070790

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "213fd978c6d8"
down_revision = "5fc1f54cc252"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "notification",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "notif_type",
            sa.String(),
            nullable=False,
        ),
        sa.Column(
            "user_id",
            sa.UUID(),
            nullable=True,
        ),
        sa.Column("dismissed", sa.Boolean(), nullable=False),
        sa.Column("last_shown", sa.DateTime(timezone=True), nullable=False),
        sa.Column("first_shown", sa.DateTime(timezone=True), nullable=False),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )


def downgrade() -> None:
    op.drop_table("notification")


================================================
FILE: backend/alembic/versions/238b84885828_add_foreign_key_to_user__external_user_.py
================================================
"""Add foreign key to user__external_user_group_id

Revision ID: 238b84885828
Revises: a7688ab35c45
Create Date: 2025-05-19 17:15:33.424584

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "238b84885828"
down_revision = "a7688ab35c45"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # First, clean up any entries that don't have a valid cc_pair_id
    op.execute(
        """
        DELETE FROM user__external_user_group_id
        WHERE cc_pair_id NOT IN (SELECT id FROM connector_credential_pair)
        """
    )

    # Add foreign key constraint with cascade delete
    op.create_foreign_key(
        "fk_user__external_user_group_id_cc_pair_id",
        "user__external_user_group_id",
        "connector_credential_pair",
        ["cc_pair_id"],
        ["id"],
        ondelete="CASCADE",
    )


def downgrade() -> None:
    # Drop the foreign key constraint
    op.drop_constraint(
        "fk_user__external_user_group_id_cc_pair_id",
        "user__external_user_group_id",
        type_="foreignkey",
    )


================================================
FILE: backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
================================================
"""remove-feedback-foreignkey-constraint

Revision ID: 23957775e5f5
Revises: bc9771dccadf
Create Date: 2024-06-27 16:04:51.480437

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "23957775e5f5"
down_revision = "bc9771dccadf"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_constraint(
        "chat_feedback__chat_message_fk", "chat_feedback", type_="foreignkey"
    )
    op.create_foreign_key(
        "chat_feedback__chat_message_fk",
        "chat_feedback",
        "chat_message",
        ["chat_message_id"],
        ["id"],
        ondelete="SET NULL",
    )
    op.alter_column(
        "chat_feedback", "chat_message_id", existing_type=sa.Integer(), nullable=True
    )
    op.drop_constraint(
        "document_retrieval_feedback__chat_message_fk",
        "document_retrieval_feedback",
        type_="foreignkey",
    )
    op.create_foreign_key(
        "document_retrieval_feedback__chat_message_fk",
        "document_retrieval_feedback",
        "chat_message",
        ["chat_message_id"],
        ["id"],
        ondelete="SET NULL",
    )
    op.alter_column(
        "document_retrieval_feedback",
        "chat_message_id",
        existing_type=sa.Integer(),
        nullable=True,
    )


def downgrade() -> None:
    op.alter_column(
        "chat_feedback", "chat_message_id", existing_type=sa.Integer(), nullable=False
    )
    op.drop_constraint(
        "chat_feedback__chat_message_fk", "chat_feedback", type_="foreignkey"
    )
    op.create_foreign_key(
        "chat_feedback__chat_message_fk",
        "chat_feedback",
        "chat_message",
        ["chat_message_id"],
        ["id"],
    )

    op.alter_column(
        "document_retrieval_feedback",
        "chat_message_id",
        existing_type=sa.Integer(),
        nullable=False,
    )
    op.drop_constraint(
        "document_retrieval_feedback__chat_message_fk",
        "document_retrieval_feedback",
        type_="foreignkey",
    )
    op.create_foreign_key(
        "document_retrieval_feedback__chat_message_fk",
        "document_retrieval_feedback",
        "chat_message",
        ["chat_message_id"],
        ["id"],
    )


================================================
FILE: backend/alembic/versions/25a5501dc766_group_permissions_phase1.py
================================================
"""group_permissions_phase1

Revision ID: 25a5501dc766
Revises: b728689f45b1
Create Date: 2026-03-23 11:41:25.557442

"""

from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa

from onyx.db.enums import AccountType
from onyx.db.enums import GrantSource
from onyx.db.enums import Permission


# revision identifiers, used by Alembic.
revision = "25a5501dc766"
down_revision = "b728689f45b1"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # 1. Add account_type column to user table (nullable for now).
    #    TODO(subash): backfill account_type for existing rows and add NOT NULL.
    op.add_column(
        "user",
        sa.Column(
            "account_type",
            sa.Enum(AccountType, native_enum=False),
            nullable=True,
        ),
    )

    # 2. Add is_default column to user_group table
    op.add_column(
        "user_group",
        sa.Column(
            "is_default",
            sa.Boolean(),
            nullable=False,
            server_default=sa.false(),
        ),
    )

    # 3. Create permission_grant table
    op.create_table(
        "permission_grant",
        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
        sa.Column("group_id", sa.Integer(), nullable=False),
        sa.Column(
            "permission",
            sa.Enum(Permission, native_enum=False),
            nullable=False,
        ),
        sa.Column(
            "grant_source",
            sa.Enum(GrantSource, native_enum=False),
            nullable=False,
        ),
        sa.Column(
            "granted_by",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column(
            "granted_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.Column(
            "is_deleted",
            sa.Boolean(),
            nullable=False,
            server_default=sa.false(),
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(
            ["group_id"],
            ["user_group.id"],
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["granted_by"],
            ["user.id"],
            ondelete="SET NULL",
        ),
        sa.UniqueConstraint(
            "group_id", "permission", name="uq_permission_grant_group_permission"
        ),
    )

    # 4. Index on user__user_group(user_id) — existing composite PK
    #    has user_group_id as leading column; user-filtered queries need this
    op.create_index(
        "ix_user__user_group_user_id",
        "user__user_group",
        ["user_id"],
    )


def downgrade() -> None:
    op.drop_index("ix_user__user_group_user_id", table_name="user__user_group")
    op.drop_table("permission_grant")
    op.drop_column("user_group", "is_default")
    op.drop_column("user", "account_type")


================================================
FILE: backend/alembic/versions/2664261bfaab_add_cache_store_table.py
================================================
"""add cache_store table

Revision ID: 2664261bfaab
Revises: 4a1e4b1c89d2
Create Date: 2026-02-27 00:00:00.000000

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "2664261bfaab"
down_revision = "4a1e4b1c89d2"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "cache_store",
        sa.Column("key", sa.String(), nullable=False),
        sa.Column("value", sa.LargeBinary(), nullable=True),
        sa.Column("expires_at", sa.DateTime(timezone=True), nullable=True),
        sa.PrimaryKeyConstraint("key"),
    )
    op.create_index(
        "ix_cache_store_expires",
        "cache_store",
        ["expires_at"],
        postgresql_where=sa.text("expires_at IS NOT NULL"),
    )


def downgrade() -> None:
    op.drop_index("ix_cache_store_expires", table_name="cache_store")
    op.drop_table("cache_store")


================================================
FILE: backend/alembic/versions/2666d766cb9b_google_oauth2.py
================================================
"""Google OAuth2

Revision ID: 2666d766cb9b
Revises: 6d387b3196c2
Create Date: 2023-05-05 15:49:35.716016

"""

import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op


# revision identifiers, used by Alembic.
revision = "2666d766cb9b"
down_revision = "6d387b3196c2"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "oauth_account",
        sa.Column("id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.Column("oauth_name", sa.String(length=100), nullable=False),
        sa.Column("access_token", sa.String(length=1024), nullable=False),
        sa.Column("expires_at", sa.Integer(), nullable=True),
        sa.Column("refresh_token", sa.String(length=1024), nullable=True),
        sa.Column("account_id", sa.String(length=320), nullable=False),
        sa.Column("account_email", sa.String(length=320), nullable=False),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="cascade"),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_index(
        op.f("ix_oauth_account_account_id"),
        "oauth_account",
        ["account_id"],
        unique=False,
    )
    op.create_index(
        op.f("ix_oauth_account_oauth_name"),
        "oauth_account",
        ["oauth_name"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(op.f("ix_oauth_account_oauth_name"), table_name="oauth_account")
    op.drop_index(op.f("ix_oauth_account_account_id"), table_name="oauth_account")
    op.drop_table("oauth_account")


================================================
FILE: backend/alembic/versions/26b931506ecb_default_chosen_assistants_to_none.py
================================================
"""default chosen assistants to none

Revision ID: 26b931506ecb
Revises: 2daa494a0851
Create Date: 2024-11-12 13:23:29.858995

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "26b931506ecb"
down_revision = "2daa494a0851"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user", sa.Column("chosen_assistants_new", postgresql.JSONB(), nullable=True)
    )

    op.execute(
        """
    UPDATE "user"
    SET chosen_assistants_new =
        CASE
            WHEN chosen_assistants = '[-2, -1, 0]' THEN NULL
            ELSE chosen_assistants
        END
    """
    )

    op.drop_column("user", "chosen_assistants")

    op.alter_column(
        "user", "chosen_assistants_new", new_column_name="chosen_assistants"
    )


def downgrade() -> None:
    op.add_column(
        "user",
        sa.Column(
            "chosen_assistants_old",
            postgresql.JSONB(),
            nullable=False,
            server_default="[-2, -1, 0]",
        ),
    )

    op.execute(
        """
    UPDATE "user"
    SET chosen_assistants_old =
        CASE
            WHEN chosen_assistants IS NULL THEN '[-2, -1, 0]'::jsonb
            ELSE chosen_assistants
        END
    """
    )

    op.drop_column("user", "chosen_assistants")

    op.alter_column(
        "user", "chosen_assistants_old", new_column_name="chosen_assistants"
    )


================================================
FILE: backend/alembic/versions/27c6ecc08586_permission_framework.py
================================================
"""Permission Framework

Revision ID: 27c6ecc08586
Revises: 2666d766cb9b
Create Date: 2023-05-24 18:45:17.244495

"""

import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "27c6ecc08586"
down_revision = "2666d766cb9b"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.execute("TRUNCATE TABLE index_attempt")
    op.create_table(
        "connector",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column(
            "source",
            sa.Enum(
                "SLACK",
                "WEB",
                "GOOGLE_DRIVE",
                "GITHUB",
                "CONFLUENCE",
                name="documentsource",
                native_enum=False,
            ),
            nullable=False,
        ),
        sa.Column(
            "input_type",
            sa.Enum(
                "LOAD_STATE",
                "POLL",
                "EVENT",
                name="inputtype",
                native_enum=False,
            ),
            nullable=True,
        ),
        sa.Column(
            "connector_specific_config",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=False,
        ),
        sa.Column("refresh_freq", sa.Integer(), nullable=True),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("disabled", sa.Boolean(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "credential",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "credential_json",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=False,
        ),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column("public_doc", sa.Boolean(), nullable=False),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "connector_credential_pair",
        sa.Column("connector_id", sa.Integer(), nullable=False),
        sa.Column("credential_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["connector_id"],
            ["connector.id"],
        ),
        sa.ForeignKeyConstraint(
            ["credential_id"],
            ["credential.id"],
        ),
        sa.PrimaryKeyConstraint("connector_id", "credential_id"),
    )
    op.add_column(
        "index_attempt",
        sa.Column("connector_id", sa.Integer(), nullable=True),
    )
    op.add_column(
        "index_attempt",
        sa.Column("credential_id", sa.Integer(), nullable=True),
    )
    op.create_foreign_key(
        "fk_index_attempt_credential_id",
        "index_attempt",
        "credential",
        ["credential_id"],
        ["id"],
    )
    op.create_foreign_key(
        "fk_index_attempt_connector_id",
        "index_attempt",
        "connector",
        ["connector_id"],
        ["id"],
    )
    op.drop_column("index_attempt", "connector_specific_config")
    op.drop_column("index_attempt", "source")
    op.drop_column("index_attempt", "input_type")


def downgrade() -> None:
    op.execute("TRUNCATE TABLE index_attempt")
    conn = op.get_bind()
    inspector = sa.inspect(conn)
    existing_columns = {col["name"] for col in inspector.get_columns("index_attempt")}

    if "input_type" not in existing_columns:
        op.add_column(
            "index_attempt",
            sa.Column("input_type", sa.VARCHAR(), autoincrement=False, nullable=False),
        )

    if "source" not in existing_columns:
        op.add_column(
            "index_attempt",
            sa.Column("source", sa.VARCHAR(), autoincrement=False, nullable=False),
        )

    if "connector_specific_config" not in existing_columns:
        op.add_column(
            "index_attempt",
            sa.Column(
                "connector_specific_config",
                postgresql.JSONB(astext_type=sa.Text()),
                autoincrement=False,
                nullable=False,
            ),
        )

    # Check if the constraint exists before dropping
    constraints = inspector.get_foreign_keys("index_attempt")

    if any(
        constraint["name"] == "fk_index_attempt_credential_id"
        for constraint in constraints
    ):
        op.drop_constraint(
            "fk_index_attempt_credential_id", "index_attempt", type_="foreignkey"
        )

    if any(
        constraint["name"] == "fk_index_attempt_connector_id"
        for constraint in constraints
    ):
        op.drop_constraint(
            "fk_index_attempt_connector_id", "index_attempt", type_="foreignkey"
        )

    if "credential_id" in existing_columns:
        op.drop_column("index_attempt", "credential_id")

    if "connector_id" in existing_columns:
        op.drop_column("index_attempt", "connector_id")

    op.execute("DROP TABLE IF EXISTS connector_credential_pair CASCADE")
    op.execute("DROP TABLE IF EXISTS credential CASCADE")
    op.execute("DROP TABLE IF EXISTS connector CASCADE")


================================================
FILE: backend/alembic/versions/27fb147a843f_add_timestamps_to_user_table.py
================================================
"""add timestamps to user table

Revision ID: 27fb147a843f
Revises: b5c4d7e8f9a1
Create Date: 2026-03-08 17:18:40.828644

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "27fb147a843f"
down_revision = "b5c4d7e8f9a1"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
    )
    op.add_column(
        "user",
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
    )


def downgrade() -> None:
    op.drop_column("user", "updated_at")
    op.drop_column("user", "created_at")


================================================
FILE: backend/alembic/versions/2955778aa44c_add_chunk_count_to_document.py
================================================
"""add chunk count to document

Revision ID: 2955778aa44c
Revises: c0aab6edb6dd
Create Date: 2025-01-04 11:39:43.268612

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "2955778aa44c"
down_revision = "c0aab6edb6dd"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column("document", sa.Column("chunk_count", sa.Integer(), nullable=True))


def downgrade() -> None:
    op.drop_column("document", "chunk_count")


================================================
FILE: backend/alembic/versions/2a391f840e85_add_last_refreshed_at_mcp_server.py
================================================
"""add last refreshed at mcp server

Revision ID: 2a391f840e85
Revises: 4cebcbc9b2ae
Create Date: 2025-12-06 15:19:59.766066

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembi.
revision = "2a391f840e85"
down_revision = "4cebcbc9b2ae"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "mcp_server",
        sa.Column("last_refreshed_at", sa.DateTime(timezone=True), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("mcp_server", "last_refreshed_at")


================================================
FILE: backend/alembic/versions/2acdef638fc2_add_switchover_type_field.py
================================================
"""add switchover_type field and remove background_reindex_enabled

Revision ID: 2acdef638fc2
Revises: a4f23d6b71c8
Create Date: 2025-01-XX XX:XX:XX.XXXXXX

"""

from alembic import op
import sqlalchemy as sa

from onyx.db.enums import SwitchoverType


# revision identifiers, used by Alembic.
revision = "2acdef638fc2"
down_revision = "a4f23d6b71c8"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add switchover_type column with default value of REINDEX
    op.add_column(
        "search_settings",
        sa.Column(
            "switchover_type",
            sa.Enum(SwitchoverType, native_enum=False),
            nullable=False,
            server_default=SwitchoverType.REINDEX.value,
        ),
    )

    # Migrate existing data: set switchover_type based on background_reindex_enabled
    # REINDEX where background_reindex_enabled=True, INSTANT where False
    op.execute(
        """
        UPDATE search_settings
        SET switchover_type = CASE
            WHEN background_reindex_enabled = true THEN 'REINDEX'
            ELSE 'INSTANT'
        END
        """
    )

    # Remove the background_reindex_enabled column (replaced by switchover_type)
    op.drop_column("search_settings", "background_reindex_enabled")


def downgrade() -> None:
    # Re-add the background_reindex_enabled column with default value of True
    op.add_column(
        "search_settings",
        sa.Column(
            "background_reindex_enabled",
            sa.Boolean(),
            nullable=False,
            server_default="true",
        ),
    )
    # Set background_reindex_enabled based on switchover_type
    op.execute(
        """
        UPDATE search_settings
        SET background_reindex_enabled = CASE
            WHEN switchover_type = 'INSTANT' THEN false
            ELSE true
        END
        """
    )
    # Remove the switchover_type column
    op.drop_column("search_settings", "switchover_type")


================================================
FILE: backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py
================================================
"""Migration 6: User file schema cleanup

Revision ID: 2b75d0a8ffcb
Revises: 3a78dba1080a
Create Date: 2025-09-22 10:09:26.375377

This migration removes legacy columns and tables after data migration is complete.
It should only be run after verifying all data has been successfully migrated.
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
import logging
import fastapi_users_db_sqlalchemy

logger = logging.getLogger("alembic.runtime.migration")

# revision identifiers, used by Alembic.
revision = "2b75d0a8ffcb"
down_revision = "3a78dba1080a"
branch_labels = None
depends_on = None


def upgrade() -> None:
    """Remove legacy columns and tables."""

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    logger.info("Starting schema cleanup...")

    # === Step 1: Verify data migration is complete ===
    logger.info("Verifying data migration completion...")

    # Check if any chat sessions still have folder_id references
    chat_session_columns = [
        col["name"] for col in inspector.get_columns("chat_session")
    ]
    if "folder_id" in chat_session_columns:
        orphaned_count = bind.execute(
            text(
                """
            SELECT COUNT(*) FROM chat_session
            WHERE folder_id IS NOT NULL AND project_id IS NULL
        """
            )
        ).scalar_one()

        if orphaned_count > 0:
            logger.warning(
                f"WARNING: {orphaned_count} chat_session records still have folder_id without project_id. Proceeding anyway."
            )

    # === Step 2: Drop chat_session.folder_id ===
    if "folder_id" in chat_session_columns:
        logger.info("Dropping chat_session.folder_id...")

        # Drop foreign key constraint first
        op.execute(
            "ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_chat_folder_fk"
        )
        op.execute(
            "ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_folder_fk"
        )

        # Drop the column
        op.drop_column("chat_session", "folder_id")
        logger.info("Dropped chat_session.folder_id")

    # === Step 3: Drop persona__user_folder table ===
    if "persona__user_folder" in inspector.get_table_names():
        logger.info("Dropping persona__user_folder table...")

        # Check for any remaining data
        remaining = bind.execute(
            text("SELECT COUNT(*) FROM persona__user_folder")
        ).scalar_one()

        if remaining > 0:
            logger.warning(
                f"WARNING: Dropping persona__user_folder with {remaining} records"
            )

        op.drop_table("persona__user_folder")
        logger.info("Dropped persona__user_folder table")

    # === Step 4: Drop chat_folder table ===
    if "chat_folder" in inspector.get_table_names():
        logger.info("Dropping chat_folder table...")

        # Check for any remaining data
        remaining = bind.execute(text("SELECT COUNT(*) FROM chat_folder")).scalar_one()

        if remaining > 0:
            logger.warning(f"WARNING: Dropping chat_folder with {remaining} records")

        op.drop_table("chat_folder")
        logger.info("Dropped chat_folder table")

    # === Step 5: Drop user_file legacy columns ===
    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]

    # Drop folder_id
    if "folder_id" in user_file_columns:
        logger.info("Dropping user_file.folder_id...")
        op.drop_column("user_file", "folder_id")
        logger.info("Dropped user_file.folder_id")

    # Drop cc_pair_id (already handled in migration 5, but be sure)
    if "cc_pair_id" in user_file_columns:
        logger.info("Dropping user_file.cc_pair_id...")

        # Drop any remaining foreign key constraints
        bind.execute(
            text(
                """
            DO $$
            DECLARE r RECORD;
            BEGIN
              FOR r IN (
                SELECT conname
                FROM pg_constraint c
                JOIN pg_class t ON c.conrelid = t.oid
                WHERE c.contype = 'f'
                  AND t.relname = 'user_file'
                  AND EXISTS (
                    SELECT 1 FROM pg_attribute a
                    WHERE a.attrelid = t.oid
                    AND a.attname = 'cc_pair_id'
                  )
              ) LOOP
                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
              END LOOP;
            END$$;
        """
            )
        )

        op.drop_column("user_file", "cc_pair_id")
        logger.info("Dropped user_file.cc_pair_id")

    # === Step 6: Clean up any remaining constraints ===
    logger.info("Cleaning up remaining constraints...")

    # Drop any unique constraints on removed columns
    op.execute(
        "ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_cc_pair_id_key"
    )

    logger.info("Migration 6 (schema cleanup) completed successfully")
    logger.info("Legacy schema has been fully removed")


def downgrade() -> None:
    """Recreate dropped columns and tables (structure only, no data)."""

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    logger.warning("Downgrading schema cleanup - recreating structure only, no data!")

    # Recreate user_file columns
    if "user_file" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("user_file")]

        if "cc_pair_id" not in columns:
            op.add_column(
                "user_file", sa.Column("cc_pair_id", sa.Integer(), nullable=True)
            )

        if "folder_id" not in columns:
            op.add_column(
                "user_file", sa.Column("folder_id", sa.Integer(), nullable=True)
            )

    # Recreate persona__user_folder table
    if "persona__user_folder" not in inspector.get_table_names():
        op.create_table(
            "persona__user_folder",
            sa.Column("persona_id", sa.Integer(), nullable=False),
            sa.Column("user_folder_id", sa.Integer(), nullable=False),
            sa.PrimaryKeyConstraint("persona_id", "user_folder_id"),
            sa.ForeignKeyConstraint(["persona_id"], ["persona.id"]),
            sa.ForeignKeyConstraint(["user_folder_id"], ["user_project.id"]),
        )

    # Recreate chat_folder table and related structures
    if "chat_folder" not in inspector.get_table_names():
        op.create_table(
            "chat_folder",
            sa.Column("id", sa.Integer(), nullable=False),
            sa.Column(
                "user_id",
                fastapi_users_db_sqlalchemy.generics.GUID(),
                nullable=True,
            ),
            sa.Column("name", sa.String(), nullable=True),
            sa.Column("display_priority", sa.Integer(), nullable=False),
            sa.ForeignKeyConstraint(
                ["user_id"],
                ["user.id"],
                name="chat_folder_user_id_fkey",
            ),
            sa.PrimaryKeyConstraint("id"),
        )

    # Add folder_id back to chat_session
    if "chat_session" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("chat_session")]
        if "folder_id" not in columns:
            op.add_column(
                "chat_session", sa.Column("folder_id", sa.Integer(), nullable=True)
            )

            # Add foreign key if chat_folder exists
            if "chat_folder" in inspector.get_table_names():
                op.create_foreign_key(
                    "chat_session_chat_folder_fk",
                    "chat_session",
                    "chat_folder",
                    ["folder_id"],
                    ["id"],
                )

    logger.info("Downgrade completed - structure recreated but data is lost")


================================================
FILE: backend/alembic/versions/2b90f3af54b8_usage_limits.py
================================================
"""usage_limits

Revision ID: 2b90f3af54b8
Revises: 9a0296d7421e
Create Date: 2026-01-03 16:55:30.449692

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "2b90f3af54b8"
down_revision = "9a0296d7421e"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "tenant_usage",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "window_start", sa.DateTime(timezone=True), nullable=False, index=True
        ),
        sa.Column("llm_cost_cents", sa.Float(), nullable=False, server_default="0.0"),
        sa.Column("chunks_indexed", sa.Integer(), nullable=False, server_default="0"),
        sa.Column("api_calls", sa.Integer(), nullable=False, server_default="0"),
        sa.Column(
            "non_streaming_api_calls", sa.Integer(), nullable=False, server_default="0"
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=True,
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("window_start", name="uq_tenant_usage_window"),
    )


def downgrade() -> None:
    op.drop_index("ix_tenant_usage_window_start", table_name="tenant_usage")
    op.drop_table("tenant_usage")


================================================
FILE: backend/alembic/versions/2c2430828bdf_add_unique_constraint_to_inputprompt_.py
================================================
"""add_unique_constraint_to_inputprompt_prompt_user_id

Revision ID: 2c2430828bdf
Revises: fb80bdd256de
Create Date: 2026-01-20 16:01:54.314805

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "2c2430828bdf"
down_revision = "fb80bdd256de"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create unique constraint on (prompt, user_id) for user-owned prompts
    # This ensures each user can only have one shortcut with a given name
    op.create_unique_constraint(
        "uq_inputprompt_prompt_user_id",
        "inputprompt",
        ["prompt", "user_id"],
    )

    # Create partial unique index for public prompts (where user_id IS NULL)
    # PostgreSQL unique constraints don't enforce uniqueness for NULL values,
    # so we need a partial index to ensure public prompt names are also unique
    op.execute(
        """
        CREATE UNIQUE INDEX uq_inputprompt_prompt_public
        ON inputprompt (prompt)
        WHERE user_id IS NULL
        """
    )


def downgrade() -> None:
    op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
    op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")


================================================
FILE: backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py
================================================
"""set built in to default

Revision ID: 2cdeff6d8c93
Revises: f5437cc136c5
Create Date: 2025-02-11 14:57:51.308775

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "2cdeff6d8c93"
down_revision = "f5437cc136c5"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Prior to this migration / point in the codebase history,
    # built in personas were implicitly treated as default personas (with no option to change this)
    # This migration makes that explicit
    op.execute(
        """
        UPDATE persona
        SET is_default_persona = TRUE
        WHERE builtin_persona = TRUE
    """
    )


def downgrade() -> None:
    pass


================================================
FILE: backend/alembic/versions/2d2304e27d8c_add_above_below_to_persona.py
================================================
"""Add Above Below to Persona

Revision ID: 2d2304e27d8c
Revises: 4b08d97e175a
Create Date: 2024-08-21 19:15:15.762948

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "2d2304e27d8c"
down_revision = "4b08d97e175a"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("persona", sa.Column("chunks_above", sa.Integer(), nullable=True))
    op.add_column("persona", sa.Column("chunks_below", sa.Integer(), nullable=True))

    op.execute(
        "UPDATE persona SET chunks_above = 1, chunks_below = 1 WHERE chunks_above IS NULL AND chunks_below IS NULL"
    )

    op.alter_column("persona", "chunks_above", nullable=False)
    op.alter_column("persona", "chunks_below", nullable=False)


def downgrade() -> None:
    op.drop_column("persona", "chunks_below")
    op.drop_column("persona", "chunks_above")


================================================
FILE: backend/alembic/versions/2daa494a0851_add_group_sync_time.py
================================================
"""add-group-sync-time

Revision ID: 2daa494a0851
Revises: c0fd6e4da83a
Create Date: 2024-11-11 10:57:22.991157

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "2daa494a0851"
down_revision = "c0fd6e4da83a"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "last_time_external_group_sync",
            sa.DateTime(timezone=True),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("connector_credential_pair", "last_time_external_group_sync")


================================================
FILE: backend/alembic/versions/2f80c6a2550f_add_chat_session_specific_temperature_.py
================================================
"""add chat session specific temperature override

Revision ID: 2f80c6a2550f
Revises: 33ea50e88f24
Create Date: 2025-01-31 10:30:27.289646

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "2f80c6a2550f"
down_revision = "33ea50e88f24"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "chat_session", sa.Column("temperature_override", sa.Float(), nullable=True)
    )
    op.add_column(
        "user",
        sa.Column(
            "temperature_override_enabled",
            sa.Boolean(),
            nullable=False,
            server_default=sa.false(),
        ),
    )


def downgrade() -> None:
    op.drop_column("chat_session", "temperature_override")
    op.drop_column("user", "temperature_override_enabled")


================================================
FILE: backend/alembic/versions/2f95e36923e6_add_indexing_coordination.py
================================================
"""add_indexing_coordination

Revision ID: 2f95e36923e6
Revises: 0816326d83aa
Create Date: 2025-07-10 16:17:57.762182

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "2f95e36923e6"
down_revision = "0816326d83aa"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add database-based coordination fields (replacing Redis fencing)
    op.add_column(
        "index_attempt", sa.Column("celery_task_id", sa.String(), nullable=True)
    )
    op.add_column(
        "index_attempt",
        sa.Column(
            "cancellation_requested",
            sa.Boolean(),
            nullable=False,
            server_default="false",
        ),
    )

    # Add batch coordination fields (replacing FileStore state)
    op.add_column(
        "index_attempt", sa.Column("total_batches", sa.Integer(), nullable=True)
    )
    op.add_column(
        "index_attempt",
        sa.Column(
            "completed_batches", sa.Integer(), nullable=False, server_default="0"
        ),
    )
    op.add_column(
        "index_attempt",
        sa.Column(
            "total_failures_batch_level",
            sa.Integer(),
            nullable=False,
            server_default="0",
        ),
    )
    op.add_column(
        "index_attempt",
        sa.Column("total_chunks", sa.Integer(), nullable=False, server_default="0"),
    )

    # Progress tracking for stall detection
    op.add_column(
        "index_attempt",
        sa.Column("last_progress_time", sa.DateTime(timezone=True), nullable=True),
    )
    op.add_column(
        "index_attempt",
        sa.Column(
            "last_batches_completed_count",
            sa.Integer(),
            nullable=False,
            server_default="0",
        ),
    )

    # Heartbeat tracking for worker liveness detection
    op.add_column(
        "index_attempt",
        sa.Column(
            "heartbeat_counter", sa.Integer(), nullable=False, server_default="0"
        ),
    )
    op.add_column(
        "index_attempt",
        sa.Column(
            "last_heartbeat_value", sa.Integer(), nullable=False, server_default="0"
        ),
    )
    op.add_column(
        "index_attempt",
        sa.Column("last_heartbeat_time", sa.DateTime(timezone=True), nullable=True),
    )

    # Add index for coordination queries
    op.create_index(
        "ix_index_attempt_active_coordination",
        "index_attempt",
        ["connector_credential_pair_id", "search_settings_id", "status"],
    )


def downgrade() -> None:
    # Remove the new index
    op.drop_index("ix_index_attempt_active_coordination", table_name="index_attempt")

    # Remove the new columns
    op.drop_column("index_attempt", "last_batches_completed_count")
    op.drop_column("index_attempt", "last_progress_time")
    op.drop_column("index_attempt", "last_heartbeat_time")
    op.drop_column("index_attempt", "last_heartbeat_value")
    op.drop_column("index_attempt", "heartbeat_counter")
    op.drop_column("index_attempt", "total_chunks")
    op.drop_column("index_attempt", "total_failures_batch_level")
    op.drop_column("index_attempt", "completed_batches")
    op.drop_column("index_attempt", "total_batches")
    op.drop_column("index_attempt", "cancellation_requested")
    op.drop_column("index_attempt", "celery_task_id")


================================================
FILE: backend/alembic/versions/30c1d5744104_persona_datetime_aware.py
================================================
"""Persona Datetime Aware

Revision ID: 30c1d5744104
Revises: 7f99be1cb9f5
Create Date: 2023-10-16 23:21:01.283424

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "30c1d5744104"
down_revision = "7f99be1cb9f5"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("persona", sa.Column("datetime_aware", sa.Boolean(), nullable=True))
    op.execute("UPDATE persona SET datetime_aware = TRUE")
    op.alter_column("persona", "datetime_aware", nullable=False)
    op.create_index(
        "_default_persona_name_idx",
        "persona",
        ["name"],
        unique=True,
        postgresql_where=sa.text("default_persona = true"),
    )


def downgrade() -> None:
    op.drop_index(
        "_default_persona_name_idx",
        table_name="persona",
        postgresql_where=sa.text("default_persona = true"),
    )
    op.drop_column("persona", "datetime_aware")


================================================
FILE: backend/alembic/versions/325975216eb3_add_icon_color_and_icon_shape_to_persona.py
================================================
"""Add icon_color and icon_shape to Persona

Revision ID: 325975216eb3
Revises: 91ffac7e65b3
Create Date: 2024-07-24 21:29:31.784562

"""

import random
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table, column, select

# revision identifiers, used by Alembic.
revision = "325975216eb3"
down_revision = "91ffac7e65b3"
branch_labels: None = None
depends_on: None = None


colorOptions = [
    "#FF6FBF",
    "#6FB1FF",
    "#B76FFF",
    "#FFB56F",
    "#6FFF8D",
    "#FF6F6F",
    "#6FFFFF",
]


# Function to generate a random shape ensuring at least 3 of the middle 4 squares are filled
def generate_random_shape() -> int:
    center_squares = [12, 10, 6, 14, 13, 11, 7, 15]
    center_fill = random.choice(center_squares)
    remaining_squares = [i for i in range(16) if not (center_fill & (1 << i))]
    random.shuffle(remaining_squares)
    for i in range(10 - bin(center_fill).count("1")):
        center_fill |= 1 << remaining_squares[i]
    return center_fill


def upgrade() -> None:
    op.add_column("persona", sa.Column("icon_color", sa.String(), nullable=True))
    op.add_column("persona", sa.Column("icon_shape", sa.Integer(), nullable=True))
    op.add_column("persona", sa.Column("uploaded_image_id", sa.String(), nullable=True))

    persona = table(
        "persona",
        column("id", sa.Integer),
        column("icon_color", sa.String),
        column("icon_shape", sa.Integer),
    )

    conn = op.get_bind()
    personas = conn.execute(select(persona.c.id))

    for persona_id in personas:
        random_color = random.choice(colorOptions)
        random_shape = generate_random_shape()
        conn.execute(
            persona.update()
            .where(persona.c.id == persona_id[0])
            .values(icon_color=random_color, icon_shape=random_shape)
        )


def downgrade() -> None:
    op.drop_column("persona", "icon_shape")
    op.drop_column("persona", "uploaded_image_id")
    op.drop_column("persona", "icon_color")


================================================
FILE: backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py
================================================
"""single tool call per message

Revision ID: 33cb72ea4d80
Revises: 5b29123cd710
Create Date: 2024-11-01 12:51:01.535003

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "33cb72ea4d80"
down_revision = "5b29123cd710"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Step 1: Delete extraneous ToolCall entries
    # Keep only the ToolCall with the smallest 'id' for each 'message_id'
    op.execute(
        sa.text(
            """
            DELETE FROM tool_call
            WHERE id NOT IN (
                SELECT MIN(id)
                FROM tool_call
                WHERE message_id IS NOT NULL
                GROUP BY message_id
            );
        """
        )
    )

    # Step 2: Add a unique constraint on message_id
    op.create_unique_constraint(
        constraint_name="uq_tool_call_message_id",
        table_name="tool_call",
        columns=["message_id"],
    )


def downgrade() -> None:
    # Step 1: Drop the unique constraint on message_id
    op.drop_constraint(
        constraint_name="uq_tool_call_message_id",
        table_name="tool_call",
        type_="unique",
    )


================================================
FILE: backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py
================================================
"""foreign key input prompts

Revision ID: 33ea50e88f24
Revises: a6df6b88ef81
Create Date: 2025-01-29 10:54:22.141765

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "33ea50e88f24"
down_revision = "a6df6b88ef81"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Safely drop constraints if exists
    op.execute(
        """
        ALTER TABLE inputprompt__user
        DROP CONSTRAINT IF EXISTS inputprompt__user_input_prompt_id_fkey
        """
    )
    op.execute(
        """
        ALTER TABLE inputprompt__user
        DROP CONSTRAINT IF EXISTS inputprompt__user_user_id_fkey
        """
    )

    # Recreate with ON DELETE CASCADE
    op.create_foreign_key(
        "inputprompt__user_input_prompt_id_fkey",
        "inputprompt__user",
        "inputprompt",
        ["input_prompt_id"],
        ["id"],
        ondelete="CASCADE",
    )

    op.create_foreign_key(
        "inputprompt__user_user_id_fkey",
        "inputprompt__user",
        "user",
        ["user_id"],
        ["id"],
        ondelete="CASCADE",
    )


def downgrade() -> None:
    # Drop the new FKs with ondelete
    op.drop_constraint(
        "inputprompt__user_input_prompt_id_fkey",
        "inputprompt__user",
        type_="foreignkey",
    )
    op.drop_constraint(
        "inputprompt__user_user_id_fkey",
        "inputprompt__user",
        type_="foreignkey",
    )

    # Recreate them without cascading
    op.create_foreign_key(
        "inputprompt__user_input_prompt_id_fkey",
        "inputprompt__user",
        "inputprompt",
        ["input_prompt_id"],
        ["id"],
    )
    op.create_foreign_key(
        "inputprompt__user_user_id_fkey",
        "inputprompt__user",
        "user",
        ["user_id"],
        ["id"],
    )


================================================
FILE: backend/alembic/versions/351faebd379d_add_curator_fields.py
================================================
"""Add curator fields

Revision ID: 351faebd379d
Revises: ee3f4b47fad5
Create Date: 2024-08-15 22:37:08.397052

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "351faebd379d"
down_revision = "ee3f4b47fad5"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # Add is_curator column to User__UserGroup table
    op.add_column(
        "user__user_group",
        sa.Column("is_curator", sa.Boolean(), nullable=False, server_default="false"),
    )

    # Use batch mode to modify the enum type
    with op.batch_alter_table("user", schema=None) as batch_op:
        batch_op.alter_column(  # type: ignore[attr-defined]
            "role",
            type_=sa.Enum(
                "BASIC",
                "ADMIN",
                "CURATOR",
                "GLOBAL_CURATOR",
                name="userrole",
                native_enum=False,
            ),
            existing_type=sa.Enum("BASIC", "ADMIN", name="userrole", native_enum=False),
            existing_nullable=False,
        )
    # Create the association table
    op.create_table(
        "credential__user_group",
        sa.Column("credential_id", sa.Integer(), nullable=False),
        sa.Column("user_group_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["credential_id"],
            ["credential.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_group_id"],
            ["user_group.id"],
        ),
        sa.PrimaryKeyConstraint("credential_id", "user_group_id"),
    )
    op.add_column(
        "credential",
        sa.Column(
            "curator_public", sa.Boolean(), nullable=False, server_default="false"
        ),
    )


def downgrade() -> None:
    # Update existing records to ensure they fit within the BASIC/ADMIN roles
    op.execute(
        "UPDATE \"user\" SET role = 'ADMIN' WHERE role IN ('CURATOR', 'GLOBAL_CURATOR')"
    )

    # Remove is_curator column from User__UserGroup table
    op.drop_column("user__user_group", "is_curator")

    with op.batch_alter_table("user", schema=None) as batch_op:
        batch_op.alter_column(  # type: ignore[attr-defined]
            "role",
            type_=sa.Enum(
                "BASIC", "ADMIN", name="userrole", native_enum=False, length=20
            ),
            existing_type=sa.Enum(
                "BASIC",
                "ADMIN",
                "CURATOR",
                "GLOBAL_CURATOR",
                name="userrole",
                native_enum=False,
            ),
            existing_nullable=False,
        )
    # Drop the association table
    op.drop_table("credential__user_group")
    op.drop_column("credential", "curator_public")


================================================
FILE: backend/alembic/versions/35e518e0ddf4_properly_cascade.py
================================================
"""properly_cascade

Revision ID: 35e518e0ddf4
Revises: 91a0a4d62b14
Create Date: 2024-09-20 21:24:04.891018

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "35e518e0ddf4"
down_revision = "91a0a4d62b14"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Update chat_message foreign key constraint
    op.drop_constraint(
        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
    )
    op.create_foreign_key(
        "chat_message_chat_session_id_fkey",
        "chat_message",
        "chat_session",
        ["chat_session_id"],
        ["id"],
        ondelete="CASCADE",
    )

    # Update chat_message__search_doc foreign key constraints
    op.drop_constraint(
        "chat_message__search_doc_chat_message_id_fkey",
        "chat_message__search_doc",
        type_="foreignkey",
    )
    op.drop_constraint(
        "chat_message__search_doc_search_doc_id_fkey",
        "chat_message__search_doc",
        type_="foreignkey",
    )

    op.create_foreign_key(
        "chat_message__search_doc_chat_message_id_fkey",
        "chat_message__search_doc",
        "chat_message",
        ["chat_message_id"],
        ["id"],
        ondelete="CASCADE",
    )
    op.create_foreign_key(
        "chat_message__search_doc_search_doc_id_fkey",
        "chat_message__search_doc",
        "search_doc",
        ["search_doc_id"],
        ["id"],
        ondelete="CASCADE",
    )

    # Add CASCADE delete for tool_call foreign key
    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
    op.create_foreign_key(
        "tool_call_message_id_fkey",
        "tool_call",
        "chat_message",
        ["message_id"],
        ["id"],
        ondelete="CASCADE",
    )


def downgrade() -> None:
    # Revert chat_message foreign key constraint
    op.drop_constraint(
        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
    )
    op.create_foreign_key(
        "chat_message_chat_session_id_fkey",
        "chat_message",
        "chat_session",
        ["chat_session_id"],
        ["id"],
    )

    # Revert chat_message__search_doc foreign key constraints
    op.drop_constraint(
        "chat_message__search_doc_chat_message_id_fkey",
        "chat_message__search_doc",
        type_="foreignkey",
    )
    op.drop_constraint(
        "chat_message__search_doc_search_doc_id_fkey",
        "chat_message__search_doc",
        type_="foreignkey",
    )

    op.create_foreign_key(
        "chat_message__search_doc_chat_message_id_fkey",
        "chat_message__search_doc",
        "chat_message",
        ["chat_message_id"],
        ["id"],
    )
    op.create_foreign_key(
        "chat_message__search_doc_search_doc_id_fkey",
        "chat_message__search_doc",
        "search_doc",
        ["search_doc_id"],
        ["id"],
    )

    # Revert tool_call foreign key constraint
    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
    op.create_foreign_key(
        "tool_call_message_id_fkey",
        "tool_call",
        "chat_message",
        ["message_id"],
        ["id"],
    )


================================================
FILE: backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py
================================================
"""server default chosen assistants

Revision ID: 35e6853a51d5
Revises: c99d76fcd298
Create Date: 2024-09-13 13:20:32.885317

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "35e6853a51d5"
down_revision = "c99d76fcd298"
branch_labels = None
depends_on = None

DEFAULT_ASSISTANTS = [-2, -1, 0]


def upgrade() -> None:
    # Step 1: Update any NULL values to the default value
    # This upgrades existing users without ordered assistant
    # to have default assistants set to visible assistants which are
    # accessible by them.
    op.execute(
        """
        UPDATE "user" u
        SET chosen_assistants = (
            SELECT jsonb_agg(
                p.id ORDER BY
                    COALESCE(p.display_priority, 2147483647) ASC,
                    p.id ASC
            )
            FROM persona p
            LEFT JOIN persona__user pu ON p.id = pu.persona_id AND pu.user_id = u.id
            WHERE p.is_visible = true
            AND (p.is_public = true OR pu.user_id IS NOT NULL)
        )
        WHERE chosen_assistants IS NULL
        OR chosen_assistants = 'null'
        OR jsonb_typeof(chosen_assistants) = 'null'
        OR (jsonb_typeof(chosen_assistants) = 'string' AND chosen_assistants = '"null"')
    """
    )

    # Step 2: Alter the column to make it non-nullable
    op.alter_column(
        "user",
        "chosen_assistants",
        type_=postgresql.JSONB(astext_type=sa.Text()),
        nullable=False,
        server_default=sa.text(f"'{DEFAULT_ASSISTANTS}'::jsonb"),
    )


def downgrade() -> None:
    op.alter_column(
        "user",
        "chosen_assistants",
        type_=postgresql.JSONB(astext_type=sa.Text()),
        nullable=True,
        server_default=None,
    )


================================================
FILE: backend/alembic/versions/369644546676_add_composite_index_for_index_attempt_.py
================================================
"""add composite index for index attempt time updated

Revision ID: 369644546676
Revises: 2955778aa44c
Create Date: 2025-01-08 15:38:17.224380

"""

from alembic import op
from sqlalchemy import text

# revision identifiers, used by Alembic.
revision = "369644546676"
down_revision = "2955778aa44c"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_index(
        "ix_index_attempt_ccpair_search_settings_time_updated",
        "index_attempt",
        [
            "connector_credential_pair_id",
            "search_settings_id",
            text("time_updated DESC"),
        ],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(
        "ix_index_attempt_ccpair_search_settings_time_updated",
        table_name="index_attempt",
    )


================================================
FILE: backend/alembic/versions/36e9220ab794_update_kg_trigger_functions.py
================================================
"""update_kg_trigger_functions

Revision ID: 36e9220ab794
Revises: c9e2cd766c29
Create Date: 2025-06-22 17:33:25.833733

"""

from alembic import op
from sqlalchemy.orm import Session
from sqlalchemy import text
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

# revision identifiers, used by Alembic.
revision = "36e9220ab794"
down_revision = "c9e2cd766c29"
branch_labels = None
depends_on = None


def _get_tenant_contextvar(session: Session) -> str:
    """Get the current schema for the migration"""
    current_tenant = session.execute(text("SELECT current_schema()")).scalar()
    if isinstance(current_tenant, str):
        return current_tenant
    else:
        raise ValueError("Current tenant is not a string")


def upgrade() -> None:

    bind = op.get_bind()
    session = Session(bind=bind)

    # Create kg_entity trigger to update kg_entity.name and its trigrams
    tenant_id = _get_tenant_contextvar(session)
    alphanum_pattern = r"[^a-z0-9]+"
    truncate_length = 1000
    function = "update_kg_entity_name"
    op.execute(
        text(
            f"""
            CREATE OR REPLACE FUNCTION "{tenant_id}".{function}()
            RETURNS TRIGGER AS $$
            DECLARE
                name text;
                cleaned_name text;
            BEGIN
                -- Set name to semantic_id if document_id is not NULL
                IF NEW.document_id IS NOT NULL THEN
                    SELECT lower(semantic_id) INTO name
                    FROM "{tenant_id}".document
                    WHERE id = NEW.document_id;
                ELSE
                    name = lower(NEW.name);
                END IF;

                -- Clean name and truncate if too long
                cleaned_name = regexp_replace(
                    name,
                    '{alphanum_pattern}', '', 'g'
                );
                IF length(cleaned_name) > {truncate_length} THEN
                    cleaned_name = left(cleaned_name, {truncate_length});
                END IF;

                -- Set name and name trigrams
                NEW.name = name;
                NEW.name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name);
                RETURN NEW;
            END;
            $$ LANGUAGE plpgsql;
            """
        )
    )
    trigger = f"{function}_trigger"
    op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON "{tenant_id}".kg_entity')
    op.execute(
        f"""
        CREATE TRIGGER {trigger}
            BEFORE INSERT OR UPDATE OF name
            ON "{tenant_id}".kg_entity
            FOR EACH ROW
            EXECUTE FUNCTION "{tenant_id}".{function}();
        """
    )

    # Create kg_entity trigger to update kg_entity.name and its trigrams
    function = "update_kg_entity_name_from_doc"
    op.execute(
        text(
            f"""
            CREATE OR REPLACE FUNCTION "{tenant_id}".{function}()
            RETURNS TRIGGER AS $$
            DECLARE
                doc_name text;
                cleaned_name text;
            BEGIN
                doc_name = lower(NEW.semantic_id);

                -- Clean name and truncate if too long
                cleaned_name = regexp_replace(
                    doc_name,
                    '{alphanum_pattern}', '', 'g'
                );
                IF length(cleaned_name) > {truncate_length} THEN
                    cleaned_name = left(cleaned_name, {truncate_length});
                END IF;

                -- Set name and name trigrams for all entities referencing this document
                UPDATE "{tenant_id}".kg_entity
                SET
                    name = doc_name,
                    name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name)
                WHERE document_id = NEW.id;
                RETURN NEW;
            END;
            $$ LANGUAGE plpgsql;
            """
        )
    )
    trigger = f"{function}_trigger"
    op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON "{tenant_id}".document')
    op.execute(
        f"""
        CREATE TRIGGER {trigger}
            AFTER UPDATE OF semantic_id
            ON "{tenant_id}".document
            FOR EACH ROW
            EXECUTE FUNCTION "{tenant_id}".{function}();
        """
    )


def downgrade() -> None:
    pass


================================================
FILE: backend/alembic/versions/3781a5eb12cb_add_chunk_stats_table.py
================================================
"""add chunk stats table

Revision ID: 3781a5eb12cb
Revises: df46c75b714e
Create Date: 2025-03-10 10:02:30.586666

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "3781a5eb12cb"
down_revision = "df46c75b714e"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "chunk_stats",
        sa.Column("id", sa.String(), primary_key=True, index=True),
        sa.Column(
            "document_id",
            sa.String(),
            sa.ForeignKey("document.id"),
            nullable=False,
            index=True,
        ),
        sa.Column("chunk_in_doc_id", sa.Integer(), nullable=False),
        sa.Column("information_content_boost", sa.Float(), nullable=True),
        sa.Column(
            "last_modified",
            sa.DateTime(timezone=True),
            nullable=False,
            index=True,
            server_default=sa.func.now(),
        ),
        sa.Column("last_synced", sa.DateTime(timezone=True), nullable=True, index=True),
        sa.UniqueConstraint(
            "document_id", "chunk_in_doc_id", name="uq_chunk_stats_doc_chunk"
        ),
    )

    op.create_index(
        "ix_chunk_sync_status", "chunk_stats", ["last_modified", "last_synced"]
    )


def downgrade() -> None:
    op.drop_index("ix_chunk_sync_status", table_name="chunk_stats")
    op.drop_table("chunk_stats")


================================================
FILE: backend/alembic/versions/3879338f8ba1_add_tool_table.py
================================================
"""Add tool table

Revision ID: 3879338f8ba1
Revises: f1c6478c3fd8
Create Date: 2024-05-11 16:11:23.718084

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "3879338f8ba1"
down_revision = "f1c6478c3fd8"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "tool",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("description", sa.Text(), nullable=True),
        sa.Column("in_code_tool_id", sa.String(), nullable=True),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "persona__tool",
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.Column("tool_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.ForeignKeyConstraint(
            ["tool_id"],
            ["tool.id"],
        ),
        sa.PrimaryKeyConstraint("persona_id", "tool_id"),
    )


def downgrade() -> None:
    op.drop_table("persona__tool")
    op.drop_table("tool")


================================================
FILE: backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py
================================================
"""Add chat session sharing

Revision ID: 38eda64af7fe
Revises: 776b3bbe9092
Create Date: 2024-03-27 19:41:29.073594

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "38eda64af7fe"
down_revision = "776b3bbe9092"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_session",
        sa.Column(
            "shared_status",
            sa.Enum(
                "PUBLIC",
                "PRIVATE",
                name="chatsessionsharedstatus",
                native_enum=False,
            ),
            nullable=True,
        ),
    )
    op.execute("UPDATE chat_session SET shared_status='PRIVATE'")
    op.alter_column(
        "chat_session",
        "shared_status",
        nullable=False,
    )


def downgrade() -> None:
    op.drop_column("chat_session", "shared_status")


================================================
FILE: backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
================================================
"""Update GitHub connector repo_name to repositories

Revision ID: 3934b1bc7b62
Revises: b7c2b63c4a03
Create Date: 2025-03-05 10:50:30.516962

"""

from alembic import op
import sqlalchemy as sa
import json
import logging

# revision identifiers, used by Alembic.
revision = "3934b1bc7b62"
down_revision = "b7c2b63c4a03"
branch_labels = None
depends_on = None

logger = logging.getLogger("alembic.runtime.migration")


def upgrade() -> None:
    # Get all GitHub connectors
    conn = op.get_bind()

    # First get all GitHub connectors
    github_connectors = conn.execute(
        sa.text(
            """
            SELECT id, connector_specific_config
            FROM connector
            WHERE source = 'GITHUB'
            """
        )
    ).fetchall()

    # Update each connector's config
    updated_count = 0
    for connector_id, config in github_connectors:
        try:
            if not config:
                logger.warning(f"Connector {connector_id} has no config, skipping")
                continue

            # Parse the config if it's a string
            if isinstance(config, str):
                config = json.loads(config)

            if "repo_name" not in config:
                continue

            # Create new config with repositories instead of repo_name
            new_config = dict(config)
            repo_name_value = new_config.pop("repo_name")
            new_config["repositories"] = repo_name_value

            # Update the connector with the new config
            conn.execute(
                sa.text(
                    """
                    UPDATE connector
                    SET connector_specific_config = :new_config
                    WHERE id = :connector_id
                    """
                ),
                {"connector_id": connector_id, "new_config": json.dumps(new_config)},
            )
            updated_count += 1
        except Exception as e:
            logger.error(f"Error updating connector {connector_id}: {str(e)}")


def downgrade() -> None:
    # Get all GitHub connectors
    conn = op.get_bind()

    logger.debug(
        "Starting rollback of GitHub connectors from repositories to repo_name"
    )

    github_connectors = conn.execute(
        sa.text(
            """
            SELECT id, connector_specific_config
            FROM connector
            WHERE source = 'GITHUB'
            """
        )
    ).fetchall()

    logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")

    # Revert each GitHub connector to use repo_name instead of repositories
    reverted_count = 0
    for connector_id, config in github_connectors:
        try:
            if not config:
                continue

            # Parse the config if it's a string
            if isinstance(config, str):
                config = json.loads(config)

            if "repositories" not in config:
                continue

            # Create new config with repo_name instead of repositories
            new_config = dict(config)
            repositories_value = new_config.pop("repositories")
            new_config["repo_name"] = repositories_value

            # Update the connector with the new config
            conn.execute(
                sa.text(
                    """
                    UPDATE connector
                    SET connector_specific_config = :new_config
                    WHERE id = :connector_id
                    """
                ),
                {"new_config": json.dumps(new_config), "connector_id": connector_id},
            )
            reverted_count += 1
        except Exception as e:
            logger.error(f"Error reverting connector {connector_id}: {str(e)}")


================================================
FILE: backend/alembic/versions/3a7802814195_add_alternate_assistant_to_chat_message.py
================================================
"""add alternate assistant to chat message

Revision ID: 3a7802814195
Revises: 23957775e5f5
Create Date: 2024-06-05 11:18:49.966333

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "3a7802814195"
down_revision = "23957775e5f5"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True)
    )
    op.create_foreign_key(
        "fk_chat_message_persona",
        "chat_message",
        "persona",
        ["alternate_assistant_id"],
        ["id"],
    )


def downgrade() -> None:
    op.drop_constraint("fk_chat_message_persona", "chat_message", type_="foreignkey")
    op.drop_column("chat_message", "alternate_assistant_id")


================================================
FILE: backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py
================================================
"""Migration 5: User file legacy data cleanup

Revision ID: 3a78dba1080a
Revises: 7cc3fcc116c1
Create Date: 2025-09-22 10:04:27.986294

This migration removes legacy user-file documents and connector_credential_pairs.
It performs bulk deletions of obsolete data after the UUID migration.
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql as psql
from sqlalchemy import text
import logging
from typing import List
import uuid

logger = logging.getLogger("alembic.runtime.migration")

# revision identifiers, used by Alembic.
revision = "3a78dba1080a"
down_revision = "7cc3fcc116c1"
branch_labels = None
depends_on = None


def batch_delete(
    bind: sa.engine.Connection,
    table_name: str,
    id_column: str,
    ids: List[str | int | uuid.UUID],
    batch_size: int = 1000,
    id_type: str = "int",
) -> int:
    """Delete records in batches to avoid memory issues and timeouts."""
    total_count = len(ids)
    if total_count == 0:
        return 0

    logger.info(
        f"Starting batch deletion of {total_count} records from {table_name}..."
    )

    # Determine appropriate ARRAY type
    if id_type == "uuid":
        array_type = psql.ARRAY(psql.UUID(as_uuid=True))
    elif id_type == "int":
        array_type = psql.ARRAY(sa.Integer())
    else:
        array_type = psql.ARRAY(sa.String())

    total_deleted = 0
    failed_batches = []

    for i in range(0, total_count, batch_size):
        batch_ids = ids[i : i + batch_size]
        try:
            stmt = text(
                f"DELETE FROM {table_name} WHERE {id_column} = ANY(:ids)"
            ).bindparams(sa.bindparam("ids", value=batch_ids, type_=array_type))
            result = bind.execute(stmt)
            total_deleted += result.rowcount

            # Log progress every 10 batches or at completion
            batch_num = (i // batch_size) + 1
            if batch_num % 10 == 0 or i + batch_size >= total_count:
                logger.info(
                    f"  Deleted {min(i + batch_size, total_count)}/{total_count} records "
                    f"({total_deleted} actual) from {table_name}"
                )
        except Exception as e:
            logger.error(f"Failed to delete batch {(i // batch_size) + 1}: {e}")
            failed_batches.append((i, min(i + batch_size, total_count)))

    if failed_batches:
        logger.warning(
            f"Failed to delete {len(failed_batches)} batches from {table_name}. Total deleted: {total_deleted}/{total_count}"
        )
        # Fail the migration to avoid silently succeeding on partial cleanup
        raise RuntimeError(
            f"Batch deletion failed for {table_name}: "
            f"{len(failed_batches)} failed batches out of "
            f"{(total_count + batch_size - 1) // batch_size}."
        )

    return total_deleted


def upgrade() -> None:
    """Remove legacy user-file documents and connector_credential_pairs."""

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    logger.info("Starting legacy data cleanup...")

    # === Step 1: Identify and delete user-file documents ===
    logger.info("Identifying user-file documents to delete...")

    # Get document IDs to delete
    doc_rows = bind.execute(
        text(
            """
        SELECT DISTINCT dcc.id AS document_id
        FROM document_by_connector_credential_pair dcc
        JOIN connector_credential_pair u
          ON u.connector_id = dcc.connector_id
         AND u.credential_id = dcc.credential_id
        WHERE u.is_user_file IS TRUE
    """
        )
    ).fetchall()

    doc_ids = [r[0] for r in doc_rows]

    if doc_ids:
        logger.info(f"Found {len(doc_ids)} user-file documents to delete")

        # Delete dependent rows first
        tables_to_clean = [
            ("document_retrieval_feedback", "document_id"),
            ("document__tag", "document_id"),
            ("chunk_stats", "document_id"),
        ]

        for table_name, column_name in tables_to_clean:
            if table_name in inspector.get_table_names():
                # document_id is a string in these tables
                deleted = batch_delete(
                    bind, table_name, column_name, doc_ids, id_type="str"
                )
                logger.info(f"Deleted {deleted} records from {table_name}")

        # Delete document_by_connector_credential_pair entries
        deleted = batch_delete(
            bind, "document_by_connector_credential_pair", "id", doc_ids, id_type="str"
        )
        logger.info(f"Deleted {deleted} document_by_connector_credential_pair records")

        # Delete documents themselves
        deleted = batch_delete(bind, "document", "id", doc_ids, id_type="str")
        logger.info(f"Deleted {deleted} document records")
    else:
        logger.info("No user-file documents found to delete")

    # === Step 2: Clean up user-file connector_credential_pairs ===
    logger.info("Cleaning up user-file connector_credential_pairs...")

    # Get cc_pair IDs
    cc_pair_rows = bind.execute(
        text(
            """
        SELECT id AS cc_pair_id
        FROM connector_credential_pair
        WHERE is_user_file IS TRUE
    """
        )
    ).fetchall()

    cc_pair_ids = [r[0] for r in cc_pair_rows]

    if cc_pair_ids:
        logger.info(
            f"Found {len(cc_pair_ids)} user-file connector_credential_pairs to clean up"
        )

        # Delete related records
        # Clean child tables first to satisfy foreign key constraints,
        # then the parent tables
        tables_to_clean = [
            ("index_attempt_errors", "connector_credential_pair_id"),
            ("index_attempt", "connector_credential_pair_id"),
            ("background_error", "cc_pair_id"),
            ("document_set__connector_credential_pair", "connector_credential_pair_id"),
            ("user_group__connector_credential_pair", "cc_pair_id"),
        ]

        for table_name, column_name in tables_to_clean:
            if table_name in inspector.get_table_names():
                deleted = batch_delete(
                    bind, table_name, column_name, cc_pair_ids, id_type="int"
                )
                logger.info(f"Deleted {deleted} records from {table_name}")

    # === Step 3: Identify connectors and credentials to delete ===
    logger.info("Identifying orphaned connectors and credentials...")

    # Get connectors used only by user-file cc_pairs
    connector_rows = bind.execute(
        text(
            """
        SELECT DISTINCT ccp.connector_id
        FROM connector_credential_pair ccp
        WHERE ccp.is_user_file IS TRUE
          AND ccp.connector_id != 0  -- Exclude system default
          AND NOT EXISTS (
            SELECT 1
            FROM connector_credential_pair c2
            WHERE c2.connector_id = ccp.connector_id
              AND c2.is_user_file IS NOT TRUE
          )
    """
        )
    ).fetchall()

    userfile_only_connector_ids = [r[0] for r in connector_rows]

    # Get credentials used only by user-file cc_pairs
    credential_rows = bind.execute(
        text(
            """
        SELECT DISTINCT ccp.credential_id
        FROM connector_credential_pair ccp
        WHERE ccp.is_user_file IS TRUE
          AND ccp.credential_id != 0  -- Exclude public/default
          AND NOT EXISTS (
            SELECT 1
            FROM connector_credential_pair c2
            WHERE c2.credential_id = ccp.credential_id
              AND c2.is_user_file IS NOT TRUE
          )
    """
        )
    ).fetchall()

    userfile_only_credential_ids = [r[0] for r in credential_rows]

    # === Step 4: Delete the cc_pairs themselves ===
    if cc_pair_ids:
        # Remove FK dependency from user_file first
        bind.execute(
            text(
                """
            DO $$
            DECLARE r RECORD;
            BEGIN
              FOR r IN (
                SELECT conname
                FROM pg_constraint c
                JOIN pg_class t ON c.conrelid = t.oid
                JOIN pg_class ft ON c.confrelid = ft.oid
                WHERE c.contype = 'f'
                  AND t.relname = 'user_file'
                  AND ft.relname = 'connector_credential_pair'
              ) LOOP
                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
              END LOOP;
            END$$;
        """
            )
        )

        # Delete cc_pairs
        deleted = batch_delete(
            bind, "connector_credential_pair", "id", cc_pair_ids, id_type="int"
        )
        logger.info(f"Deleted {deleted} connector_credential_pair records")

    # === Step 5: Delete orphaned connectors ===
    if userfile_only_connector_ids:
        deleted = batch_delete(
            bind, "connector", "id", userfile_only_connector_ids, id_type="int"
        )
        logger.info(f"Deleted {deleted} orphaned connector records")

    # === Step 6: Delete orphaned credentials ===
    if userfile_only_credential_ids:
        # Clean up credential__user_group mappings first
        deleted = batch_delete(
            bind,
            "credential__user_group",
            "credential_id",
            userfile_only_credential_ids,
            id_type="int",
        )
        logger.info(f"Deleted {deleted} credential__user_group records")

        # Delete credentials
        deleted = batch_delete(
            bind, "credential", "id", userfile_only_credential_ids, id_type="int"
        )
        logger.info(f"Deleted {deleted} orphaned credential records")

    logger.info("Migration 5 (legacy data cleanup) completed successfully")


def downgrade() -> None:
    """Cannot restore deleted data - requires backup restoration."""

    logger.error("CRITICAL: Downgrading data cleanup cannot restore deleted data!")
    logger.error("Data restoration requires backup files or database backup.")

    # raise NotImplementedError(
    #     "Downgrade of legacy data cleanup is not supported. "
    #     "Deleted data must be restored from backups."
    # )


================================================
FILE: backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py
================================================
"""Move is_public to cc_pair

Revision ID: 3b25685ff73c
Revises: e0a68a81d434
Create Date: 2023-10-05 18:47:09.582849

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "3b25685ff73c"
down_revision = "e0a68a81d434"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column("is_public", sa.Boolean(), nullable=True),
    )
    # fill in is_public for existing rows
    op.execute(
        "UPDATE connector_credential_pair SET is_public = true WHERE is_public IS NULL"
    )
    op.alter_column("connector_credential_pair", "is_public", nullable=False)

    op.add_column(
        "credential",
        sa.Column("is_admin", sa.Boolean(), nullable=True),
    )
    op.execute("UPDATE credential SET is_admin = true WHERE is_admin IS NULL")
    op.alter_column("credential", "is_admin", nullable=False)

    op.drop_column("credential", "public_doc")


def downgrade() -> None:
    op.add_column(
        "credential",
        sa.Column("public_doc", sa.Boolean(), nullable=True),
    )
    # setting public_doc to false for all existing rows to be safe
    # NOTE: this is likely not the correct state of the world but it's the best we can do
    op.execute("UPDATE credential SET public_doc = false WHERE public_doc IS NULL")
    op.alter_column("credential", "public_doc", nullable=False)
    op.drop_column("connector_credential_pair", "is_public")
    op.drop_column("credential", "is_admin")


================================================
FILE: backend/alembic/versions/3bd4c84fe72f_improved_index.py
================================================
"""improved index

Revision ID: 3bd4c84fe72f
Revises: 8f43500ee275
Create Date: 2025-02-26 13:07:56.217791

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "3bd4c84fe72f"
down_revision = "8f43500ee275"
branch_labels = None
depends_on = None


# NOTE:
# This migration addresses issues with the previous migration (8f43500ee275) which caused
# an outage by creating an index without using CONCURRENTLY. This migration:
#
# 1. Creates more efficient full-text search capabilities using tsvector columns and GIN indexes
# 2. Adds indexes to both chat_message and chat_session tables for comprehensive search
# 3. Note: CONCURRENTLY was removed due to operational issues


def upgrade() -> None:
    # First, drop any existing indexes to avoid conflicts
    op.execute("DROP INDEX IF EXISTS idx_chat_message_tsv;")
    op.execute("DROP INDEX IF EXISTS idx_chat_session_desc_tsv;")
    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")

    # Drop existing columns if they exist
    op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;")
    op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv;")

    # Create a GIN index for full-text search on chat_message.message
    op.execute(
        """
        ALTER TABLE chat_message
        ADD COLUMN message_tsv tsvector
        GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
        """
    )

    op.execute(
        """
        CREATE INDEX IF NOT EXISTS idx_chat_message_tsv
        ON chat_message
        USING GIN (message_tsv)
        """
    )

    # Also add a stored tsvector column for chat_session.description
    op.execute(
        """
        ALTER TABLE chat_session
        ADD COLUMN description_tsv tsvector
        GENERATED ALWAYS AS (to_tsvector('english', coalesce(description, ''))) STORED;
        """
    )

    op.execute(
        """
        CREATE INDEX IF NOT EXISTS idx_chat_session_desc_tsv
        ON chat_session
        USING GIN (description_tsv)
        """
    )


def downgrade() -> None:
    # Drop the indexes first
    op.execute("DROP INDEX IF EXISTS idx_chat_message_tsv;")
    op.execute("DROP INDEX IF EXISTS idx_chat_session_desc_tsv;")

    # Then drop the columns
    op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;")
    op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv;")

    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")


================================================
FILE: backend/alembic/versions/3c5e35aa9af0_polling_document_count.py
================================================
"""Polling Document Count

Revision ID: 3c5e35aa9af0
Revises: 27c6ecc08586
Create Date: 2023-06-14 23:45:51.760440

"""

import sqlalchemy as sa
from alembic import op


# revision identifiers, used by Alembic.
revision = "3c5e35aa9af0"
down_revision = "27c6ecc08586"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "last_successful_index_time",
            sa.DateTime(timezone=True),
            nullable=True,
        ),
    )
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "last_attempt_status",
            sa.Enum(
                "NOT_STARTED",
                "IN_PROGRESS",
                "SUCCESS",
                "FAILED",
                name="indexingstatus",
                native_enum=False,
            ),
            nullable=False,
        ),
    )
    op.add_column(
        "connector_credential_pair",
        sa.Column("total_docs_indexed", sa.Integer(), nullable=False),
    )


def downgrade() -> None:
    op.drop_column("connector_credential_pair", "total_docs_indexed")
    op.drop_column("connector_credential_pair", "last_attempt_status")
    op.drop_column("connector_credential_pair", "last_successful_index_time")


================================================
FILE: backend/alembic/versions/3c6531f32351_add_back_input_prompts.py
================================================
"""add back input prompts

Revision ID: 3c6531f32351
Revises: aeda5f2df4f6
Create Date: 2025-01-13 12:49:51.705235

"""

from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy

# revision identifiers, used by Alembic.
revision = "3c6531f32351"
down_revision = "aeda5f2df4f6"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "inputprompt",
        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
        sa.Column("prompt", sa.String(), nullable=False),
        sa.Column("content", sa.String(), nullable=False),
        sa.Column("active", sa.Boolean(), nullable=False),
        sa.Column("is_public", sa.Boolean(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "inputprompt__user",
        sa.Column("input_prompt_id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False
        ),
        sa.Column("disabled", sa.Boolean(), nullable=False, default=False),
        sa.ForeignKeyConstraint(
            ["input_prompt_id"],
            ["inputprompt.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("input_prompt_id", "user_id"),
    )


def downgrade() -> None:
    op.drop_table("inputprompt__user")
    op.drop_table("inputprompt")


================================================
FILE: backend/alembic/versions/3c9a65f1207f_seed_exa_provider_from_env.py
================================================
"""seed_exa_provider_from_env

Revision ID: 3c9a65f1207f
Revises: 1f2a3b4c5d6e
Create Date: 2025-11-20 19:18:00.000000

"""

from __future__ import annotations

import os

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from dotenv import load_dotenv, find_dotenv

from onyx.utils.encryption import encrypt_string_to_bytes

revision = "3c9a65f1207f"
down_revision = "1f2a3b4c5d6e"
branch_labels = None
depends_on = None


EXA_PROVIDER_NAME = "Exa"


def _get_internet_search_table(metadata: sa.MetaData) -> sa.Table:
    return sa.Table(
        "internet_search_provider",
        metadata,
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column("name", sa.String),
        sa.Column("provider_type", sa.String),
        sa.Column("api_key", sa.LargeBinary),
        sa.Column("config", postgresql.JSONB),
        sa.Column("is_active", sa.Boolean),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.text("now()"),
        ),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.text("now()"),
        ),
    )


def upgrade() -> None:
    load_dotenv(find_dotenv())

    exa_api_key = os.environ.get("EXA_API_KEY")
    if not exa_api_key:
        return

    bind = op.get_bind()
    metadata = sa.MetaData()
    table = _get_internet_search_table(metadata)

    existing = bind.execute(
        sa.select(table.c.id).where(table.c.name == EXA_PROVIDER_NAME)
    ).first()
    if existing:
        return

    encrypted_key = encrypt_string_to_bytes(exa_api_key)

    has_active_provider = bind.execute(
        sa.select(table.c.id).where(table.c.is_active.is_(True))
    ).first()

    bind.execute(
        table.insert().values(
            name=EXA_PROVIDER_NAME,
            provider_type="exa",
            api_key=encrypted_key,
            config=None,
            is_active=not bool(has_active_provider),
        )
    )


def downgrade() -> None:
    return


================================================
FILE: backend/alembic/versions/3d1cca026fe8_add_oauth_config_and_user_tokens.py
================================================
"""add_oauth_config_and_user_tokens

Revision ID: 3d1cca026fe8
Revises: c8a93a2af083
Create Date: 2025-10-21 13:27:34.274721

"""

from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "3d1cca026fe8"
down_revision = "c8a93a2af083"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create oauth_config table
    op.create_table(
        "oauth_config",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("authorization_url", sa.Text(), nullable=False),
        sa.Column("token_url", sa.Text(), nullable=False),
        sa.Column("client_id", sa.LargeBinary(), nullable=False),
        sa.Column("client_secret", sa.LargeBinary(), nullable=False),
        sa.Column("scopes", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
        sa.Column(
            "additional_params",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("name"),
    )

    # Create oauth_user_token table
    op.create_table(
        "oauth_user_token",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("oauth_config_id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.Column("token_data", sa.LargeBinary(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["oauth_config_id"], ["oauth_config.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("oauth_config_id", "user_id", name="uq_oauth_user_token"),
    )

    # Create index on user_id for efficient user-based token lookups
    # Note: unique constraint on (oauth_config_id, user_id) already creates
    # an index for config-based lookups
    op.create_index(
        "ix_oauth_user_token_user_id",
        "oauth_user_token",
        ["user_id"],
    )

    # Add oauth_config_id column to tool table
    op.add_column("tool", sa.Column("oauth_config_id", sa.Integer(), nullable=True))

    # Create foreign key from tool to oauth_config
    op.create_foreign_key(
        "tool_oauth_config_fk",
        "tool",
        "oauth_config",
        ["oauth_config_id"],
        ["id"],
        ondelete="SET NULL",
    )


def downgrade() -> None:
    # Drop foreign key from tool to oauth_config
    op.drop_constraint("tool_oauth_config_fk", "tool", type_="foreignkey")

    # Drop oauth_config_id column from tool table
    op.drop_column("tool", "oauth_config_id")

    # Drop index on user_id
    op.drop_index("ix_oauth_user_token_user_id", table_name="oauth_user_token")

    # Drop oauth_user_token table (will cascade delete tokens)
    op.drop_table("oauth_user_token")

    # Drop oauth_config table
    op.drop_table("oauth_config")


================================================
FILE: backend/alembic/versions/3fc5d75723b3_add_doc_metadata_field_in_document_model.py
================================================
"""add_doc_metadata_field_in_document_model

Revision ID: 3fc5d75723b3
Revises: 2f95e36923e6
Create Date: 2025-07-28 18:45:37.985406

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "3fc5d75723b3"
down_revision = "2f95e36923e6"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "document",
        sa.Column(
            "doc_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True
        ),
    )


def downgrade() -> None:
    op.drop_column("document", "doc_metadata")


================================================
FILE: backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py
================================================
"""Add tables for UI-based LLM configuration

Revision ID: 401c1ac29467
Revises: 703313b75876
Create Date: 2024-04-13 18:07:29.153817

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "401c1ac29467"
down_revision = "703313b75876"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "llm_provider",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("api_key", sa.String(), nullable=True),
        sa.Column("api_base", sa.String(), nullable=True),
        sa.Column("api_version", sa.String(), nullable=True),
        sa.Column(
            "custom_config",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
        sa.Column("default_model_name", sa.String(), nullable=False),
        sa.Column("fast_default_model_name", sa.String(), nullable=True),
        sa.Column("is_default_provider", sa.Boolean(), unique=True, nullable=True),
        sa.Column("model_names", postgresql.ARRAY(sa.String()), nullable=True),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("name"),
    )

    op.add_column(
        "persona",
        sa.Column("llm_model_provider_override", sa.String(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("persona", "llm_model_provider_override")

    op.drop_table("llm_provider")


================================================
FILE: backend/alembic/versions/40926a4dab77_reset_userfile_document_id_migrated_.py
================================================
"""reset userfile document_id_migrated field

Revision ID: 40926a4dab77
Revises: 64bd5677aeb6
Create Date: 2025-10-06 16:10:32.898668

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "40926a4dab77"
down_revision = "64bd5677aeb6"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Set all existing records to not migrated
    op.execute(
        "UPDATE user_file SET document_id_migrated = FALSE WHERE document_id_migrated IS DISTINCT FROM FALSE;"
    )


def downgrade() -> None:
    # No-op
    pass


================================================
FILE: backend/alembic/versions/41fa44bef321_remove_default_prompt_shortcuts.py
================================================
"""remove default prompt shortcuts

Revision ID: 41fa44bef321
Revises: 2c2430828bdf
Create Date: 2025-01-21

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "41fa44bef321"
down_revision = "2c2430828bdf"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Delete any user associations for the default prompts first (foreign key constraint)
    op.execute(
        "DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
    )
    # Delete the pre-seeded default prompt shortcuts (they have negative IDs)
    op.execute("DELETE FROM inputprompt WHERE id < 0")


def downgrade() -> None:
    # We don't restore the default prompts on downgrade
    pass


================================================
FILE: backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py
================================================
"""Rename index_origin to index_recursively

Revision ID: 1d6ad76d1f37
Revises: e1392f05e840
Create Date: 2024-08-01 12:38:54.466081

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "1d6ad76d1f37"
down_revision = "e1392f05e840"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.execute(
        """
        UPDATE connector
        SET connector_specific_config = jsonb_set(
            connector_specific_config,
            '{index_recursively}',
            'true'::jsonb
        ) - 'index_origin'
        WHERE connector_specific_config ? 'index_origin'
    """
    )


def downgrade() -> None:
    op.execute(
        """
        UPDATE connector
        SET connector_specific_config = jsonb_set(
            connector_specific_config,
            '{index_origin}',
            connector_specific_config->'index_recursively'
        ) - 'index_recursively'
        WHERE connector_specific_config ? 'index_recursively'
    """
    )


================================================
FILE: backend/alembic/versions/44f856ae2a4a_add_cloud_embedding_model.py
================================================
"""add cloud embedding model and update embedding_model

Revision ID: 44f856ae2a4a
Revises: d716b0791ddd
Create Date: 2024-06-28 20:01:05.927647

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "44f856ae2a4a"
down_revision = "d716b0791ddd"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # Create embedding_provider table
    op.create_table(
        "embedding_provider",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("api_key", sa.LargeBinary(), nullable=True),
        sa.Column("default_model_id", sa.Integer(), nullable=True),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("name"),
    )

    # Add cloud_provider_id to embedding_model table
    op.add_column(
        "embedding_model", sa.Column("cloud_provider_id", sa.Integer(), nullable=True)
    )

    # Add foreign key constraints
    op.create_foreign_key(
        "fk_embedding_model_cloud_provider",
        "embedding_model",
        "embedding_provider",
        ["cloud_provider_id"],
        ["id"],
    )
    op.create_foreign_key(
        "fk_embedding_provider_default_model",
        "embedding_provider",
        "embedding_model",
        ["default_model_id"],
        ["id"],
    )


def downgrade() -> None:
    # Remove foreign key constraints
    op.drop_constraint(
        "fk_embedding_model_cloud_provider", "embedding_model", type_="foreignkey"
    )
    op.drop_constraint(
        "fk_embedding_provider_default_model", "embedding_provider", type_="foreignkey"
    )

    # Remove cloud_provider_id column
    op.drop_column("embedding_model", "cloud_provider_id")

    # Drop embedding_provider table
    op.drop_table("embedding_provider")


================================================
FILE: backend/alembic/versions/4505fd7302e1_added_is_internet_to_dbdoc.py
================================================
"""added is_internet to DBDoc

Revision ID: 4505fd7302e1
Revises: c18cdf4b497e
Create Date: 2024-06-18 20:46:09.095034

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "4505fd7302e1"
down_revision = "c18cdf4b497e"


def upgrade() -> None:
    op.add_column("search_doc", sa.Column("is_internet", sa.Boolean(), nullable=True))
    op.add_column("tool", sa.Column("display_name", sa.String(), nullable=True))


def downgrade() -> None:
    op.drop_column("tool", "display_name")
    op.drop_column("search_doc", "is_internet")


================================================
FILE: backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py
================================================
"""Larger Access Tokens for OAUTH

Revision ID: 465f78d9b7f9
Revises: 3c5e35aa9af0
Create Date: 2023-07-18 17:33:40.365034

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "465f78d9b7f9"
down_revision = "3c5e35aa9af0"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.alter_column("oauth_account", "access_token", type_=sa.Text())


def downgrade() -> None:
    op.alter_column("oauth_account", "access_token", type_=sa.String(length=1024))


================================================
FILE: backend/alembic/versions/46625e4745d4_remove_native_enum.py
================================================
"""Remove Native Enum

Revision ID: 46625e4745d4
Revises: 9d97fecfab7f
Create Date: 2023-10-27 11:38:33.803145

"""

from alembic import op
from sqlalchemy import String

# revision identifiers, used by Alembic.
revision = "46625e4745d4"
down_revision = "9d97fecfab7f"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # At this point, we directly changed some previous migrations,
    # https://github.com/onyx-dot-app/onyx/pull/637
    # Due to using Postgres native Enums, it caused some complications for first time users.
    # To remove those complications, all Enums are only handled application side moving forward.
    # This migration exists to ensure that existing users don't run into upgrade issues.
    op.alter_column("index_attempt", "status", type_=String)
    op.alter_column("connector_credential_pair", "last_attempt_status", type_=String)
    op.execute("DROP TYPE IF EXISTS indexingstatus")


def downgrade() -> None:
    # We don't want Native Enums, do nothing
    pass


================================================
FILE: backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py
================================================
"""fix_user__external_user_group_id_fk

Revision ID: 46b7a812670f
Revises: f32615f71aeb
Create Date: 2024-09-23 12:58:03.894038

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "46b7a812670f"
down_revision = "f32615f71aeb"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Drop the existing primary key
    op.drop_constraint(
        "user__external_user_group_id_pkey",
        "user__external_user_group_id",
        type_="primary",
    )

    # Add the new composite primary key
    op.create_primary_key(
        "user__external_user_group_id_pkey",
        "user__external_user_group_id",
        ["user_id", "external_user_group_id", "cc_pair_id"],
    )


def downgrade() -> None:
    # Drop the composite primary key
    op.drop_constraint(
        "user__external_user_group_id_pkey",
        "user__external_user_group_id",
        type_="primary",
    )
    # Delete all entries from the table
    op.execute("DELETE FROM user__external_user_group_id")

    # Recreate the original primary key on user_id
    op.create_primary_key(
        "user__external_user_group_id_pkey", "user__external_user_group_id", ["user_id"]
    )


================================================
FILE: backend/alembic/versions/4738e4b3bae1_pg_file_store.py
================================================
"""PG File Store

Revision ID: 4738e4b3bae1
Revises: e91df4e935ef
Create Date: 2024-03-20 18:53:32.461518

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "4738e4b3bae1"
down_revision = "e91df4e935ef"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "file_store",
        sa.Column("file_name", sa.String(), nullable=False),
        sa.Column("lobj_oid", sa.Integer(), nullable=False),
        sa.PrimaryKeyConstraint("file_name"),
    )


def downgrade() -> None:
    op.drop_table("file_store")


================================================
FILE: backend/alembic/versions/473a1a7ca408_add_display_model_names_to_llm_provider.py
================================================
"""Add display_model_names to llm_provider

Revision ID: 473a1a7ca408
Revises: 325975216eb3
Create Date: 2024-07-25 14:31:02.002917

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "473a1a7ca408"
down_revision = "325975216eb3"
branch_labels: None = None
depends_on: None = None

default_models_by_provider = {
    "openai": ["gpt-4", "gpt-4o", "gpt-4o-mini"],
    "bedrock": [
        "meta.llama3-1-70b-instruct-v1:0",
        "meta.llama3-1-8b-instruct-v1:0",
        "anthropic.claude-3-opus-20240229-v1:0",
        "mistral.mistral-large-2402-v1:0",
        "anthropic.claude-3-5-sonnet-20240620-v1:0",
    ],
    "anthropic": ["claude-3-opus-20240229", "claude-3-5-sonnet-20240620"],
}


def upgrade() -> None:
    op.add_column(
        "llm_provider",
        sa.Column("display_model_names", postgresql.ARRAY(sa.String()), nullable=True),
    )

    connection = op.get_bind()
    for provider, models in default_models_by_provider.items():
        connection.execute(
            sa.text(
                "UPDATE llm_provider SET display_model_names = :models WHERE provider = :provider"
            ),
            {"models": models, "provider": provider},
        )


def downgrade() -> None:
    op.drop_column("llm_provider", "display_model_names")


================================================
FILE: backend/alembic/versions/47433d30de82_create_indexattempt_table.py
================================================
"""Create IndexAttempt table

Revision ID: 47433d30de82
Revises:
Create Date: 2023-05-04 00:55:32.971991

"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "47433d30de82"
down_revision: None = None
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "index_attempt",
        sa.Column("id", sa.Integer(), nullable=False),
        # String type since python enum will change often
        sa.Column(
            "source",
            sa.String(),
            nullable=False,
        ),
        # String type to easily accomodate new ways of pulling
        # in documents
        sa.Column(
            "input_type",
            sa.String(),
            nullable=False,
        ),
        sa.Column(
            "connector_specific_config",
            postgresql.JSONB(),
            nullable=False,
        ),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=True,
        ),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            server_onupdate=sa.text("now()"),  # type: ignore
            nullable=True,
        ),
        sa.Column(
            "status",
            sa.Enum(
                "NOT_STARTED",
                "IN_PROGRESS",
                "SUCCESS",
                "FAILED",
                name="indexingstatus",
                native_enum=False,
            ),
            nullable=False,
        ),
        sa.Column("document_ids", postgresql.ARRAY(sa.String()), nullable=True),
        sa.Column("error_msg", sa.String(), nullable=True),
        sa.PrimaryKeyConstraint("id"),
    )


def downgrade() -> None:
    op.drop_table("index_attempt")


================================================
FILE: backend/alembic/versions/475fcefe8826_add_name_to_api_key.py
================================================
"""Add name to api_key

Revision ID: 475fcefe8826
Revises: ecab2b3f1a3b
Create Date: 2024-04-11 11:05:18.414438

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "475fcefe8826"
down_revision = "ecab2b3f1a3b"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("api_key", sa.Column("name", sa.String(), nullable=True))


def downgrade() -> None:
    op.drop_column("api_key", "name")


================================================
FILE: backend/alembic/versions/4794bc13e484_update_prompt_length.py
================================================
"""update prompt length

Revision ID: 4794bc13e484
Revises: f7505c5b0284
Create Date: 2025-04-02 11:26:36.180328

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "4794bc13e484"
down_revision = "f7505c5b0284"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.alter_column(
        "prompt",
        "system_prompt",
        existing_type=sa.TEXT(),
        type_=sa.String(length=5000000),
        existing_nullable=False,
    )
    op.alter_column(
        "prompt",
        "task_prompt",
        existing_type=sa.TEXT(),
        type_=sa.String(length=5000000),
        existing_nullable=False,
    )


def downgrade() -> None:
    op.alter_column(
        "prompt",
        "system_prompt",
        existing_type=sa.String(length=5000000),
        type_=sa.TEXT(),
        existing_nullable=False,
    )
    op.alter_column(
        "prompt",
        "task_prompt",
        existing_type=sa.String(length=5000000),
        type_=sa.TEXT(),
        existing_nullable=False,
    )


================================================
FILE: backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py
================================================
"""Fix invalid model-configurations state

Revision ID: 47a07e1a38f1
Revises: 7a70b7664e37
Create Date: 2025-04-23 15:39:43.159504

"""

from alembic import op
from pydantic import BaseModel, ConfigDict
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from onyx.llm.well_known_providers.llm_provider_options import (
    fetch_model_names_for_provider_as_set,
    fetch_visible_model_names_for_provider_as_set,
)


# revision identifiers, used by Alembic.
revision = "47a07e1a38f1"
down_revision = "7a70b7664e37"
branch_labels = None
depends_on = None


class _SimpleModelConfiguration(BaseModel):
    # Configure model to read from attributes
    model_config = ConfigDict(from_attributes=True)

    id: int
    llm_provider_id: int
    name: str
    is_visible: bool
    max_input_tokens: int | None


def upgrade() -> None:
    llm_provider_table = sa.sql.table(
        "llm_provider",
        sa.column("id", sa.Integer),
        sa.column("provider", sa.String),
        sa.column("model_names", postgresql.ARRAY(sa.String)),
        sa.column("display_model_names", postgresql.ARRAY(sa.String)),
        sa.column("default_model_name", sa.String),
        sa.column("fast_default_model_name", sa.String),
    )
    model_configuration_table = sa.sql.table(
        "model_configuration",
        sa.column("id", sa.Integer),
        sa.column("llm_provider_id", sa.Integer),
        sa.column("name", sa.String),
        sa.column("is_visible", sa.Boolean),
        sa.column("max_input_tokens", sa.Integer),
    )

    connection = op.get_bind()

    llm_providers = connection.execute(
        sa.select(
            llm_provider_table.c.id,
            llm_provider_table.c.provider,
        )
    ).fetchall()

    for llm_provider in llm_providers:
        llm_provider_id, provider_name = llm_provider

        default_models = fetch_model_names_for_provider_as_set(provider_name)
        display_models = fetch_visible_model_names_for_provider_as_set(
            provider_name=provider_name
        )

        # if `fetch_model_names_for_provider_as_set` returns `None`, then
        # that means that `provider_name` is not a well-known llm provider.
        if not default_models:
            continue

        if not display_models:
            raise RuntimeError(
                "If `default_models` is non-None, `display_models` must be non-None too."
            )

        model_configurations = [
            _SimpleModelConfiguration.model_validate(model_configuration)
            for model_configuration in connection.execute(
                sa.select(
                    model_configuration_table.c.id,
                    model_configuration_table.c.llm_provider_id,
                    model_configuration_table.c.name,
                    model_configuration_table.c.is_visible,
                    model_configuration_table.c.max_input_tokens,
                ).where(model_configuration_table.c.llm_provider_id == llm_provider_id)
            ).fetchall()
        ]

        if model_configurations:
            at_least_one_is_visible = any(
                [
                    model_configuration.is_visible
                    for model_configuration in model_configurations
                ]
            )

            # If there is at least one model which is public, this is a valid state.
            # Therefore, don't touch it and move on to the next one.
            if at_least_one_is_visible:
                continue

            existing_visible_model_names: set[str] = set(
                [
                    model_configuration.name
                    for model_configuration in model_configurations
                    if model_configuration.is_visible
                ]
            )

            difference = display_models.difference(existing_visible_model_names)

            for model_name in difference:
                if not model_name:
                    continue

                insert_statement = postgresql.insert(model_configuration_table).values(
                    llm_provider_id=llm_provider_id,
                    name=model_name,
                    is_visible=True,
                    max_input_tokens=None,
                )

                connection.execute(
                    insert_statement.on_conflict_do_update(
                        index_elements=["llm_provider_id", "name"],
                        set_={"is_visible": insert_statement.excluded.is_visible},
                    )
                )
        else:
            for model_name in default_models:
                connection.execute(
                    model_configuration_table.insert().values(
                        llm_provider_id=llm_provider_id,
                        name=model_name,
                        is_visible=model_name in display_models,
                        max_input_tokens=None,
                    )
                )


def downgrade() -> None:
    pass


================================================
FILE: backend/alembic/versions/47e5bef3a1d7_add_persona_categories.py
================================================
"""add persona categories

Revision ID: 47e5bef3a1d7
Revises: dfbe9e93d3c7
Create Date: 2024-11-05 18:55:02.221064

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "47e5bef3a1d7"
down_revision = "dfbe9e93d3c7"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create the persona_category table
    op.create_table(
        "persona_category",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("description", sa.String(), nullable=True),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("name"),
    )

    # Add category_id to persona table
    op.add_column("persona", sa.Column("category_id", sa.Integer(), nullable=True))
    op.create_foreign_key(
        "fk_persona_category",
        "persona",
        "persona_category",
        ["category_id"],
        ["id"],
        ondelete="SET NULL",
    )


def downgrade() -> None:
    op.drop_constraint("persona_category_id_fkey", "persona", type_="foreignkey")
    op.drop_column("persona", "category_id")
    op.drop_table("persona_category")


================================================
FILE: backend/alembic/versions/48d14957fe80_add_support_for_custom_tools.py
================================================
"""Add support for custom tools

Revision ID: 48d14957fe80
Revises: b85f02ec1308
Create Date: 2024-06-09 14:58:19.946509

"""

from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "48d14957fe80"
down_revision = "b85f02ec1308"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "tool",
        sa.Column(
            "openapi_schema",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )
    op.add_column(
        "tool",
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
    )
    op.create_foreign_key("tool_user_fk", "tool", "user", ["user_id"], ["id"])

    op.create_table(
        "tool_call",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("tool_id", sa.Integer(), nullable=False),
        sa.Column("tool_name", sa.String(), nullable=False),
        sa.Column(
            "tool_arguments", postgresql.JSONB(astext_type=sa.Text()), nullable=False
        ),
        sa.Column(
            "tool_result", postgresql.JSONB(astext_type=sa.Text()), nullable=False
        ),
        sa.Column(
            "message_id", sa.Integer(), sa.ForeignKey("chat_message.id"), nullable=False
        ),
    )


def downgrade() -> None:
    op.drop_table("tool_call")

    op.drop_constraint("tool_user_fk", "tool", type_="foreignkey")
    op.drop_column("tool", "user_id")
    op.drop_column("tool", "openapi_schema")


================================================
FILE: backend/alembic/versions/495cb26ce93e_create_knowlege_graph_tables.py
================================================
"""create knowledge graph tables

Revision ID: 495cb26ce93e
Revises: ca04500b9ee8
Create Date: 2025-03-19 08:51:14.341989

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy import text
from datetime import datetime, timedelta

from onyx.configs.app_configs import DB_READONLY_USER
from onyx.configs.app_configs import DB_READONLY_PASSWORD
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA


# revision identifiers, used by Alembic.
revision = "495cb26ce93e"
down_revision = "ca04500b9ee8"
branch_labels = None
depends_on = None


def upgrade() -> None:

    # Create a new permission-less user to be later used for knowledge graph queries.
    # The user will later get temporary read privileges for a specific view that will be
    # ad hoc generated specific to a knowledge graph query.
    #
    # Note: in order for the migration to run, the DB_READONLY_USER and DB_READONLY_PASSWORD
    # environment variables MUST be set. Otherwise, an exception will be raised.

    if not MULTI_TENANT:
        # Enable pg_trgm extension if not already enabled
        op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")

        # Create read-only db user here only in single tenant mode. For multi-tenant mode,
        # the user is created in the alembic_tenants migration.
        if not (DB_READONLY_USER and DB_READONLY_PASSWORD):
            raise Exception("DB_READONLY_USER or DB_READONLY_PASSWORD is not set")

        op.execute(
            text(
                f"""
                DO $$
                BEGIN
                    -- Check if the read-only user already exists
                    IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
                        -- Create the read-only user with the specified password
                        EXECUTE format('CREATE USER %I WITH PASSWORD %L', '{DB_READONLY_USER}', '{DB_READONLY_PASSWORD}');
                        -- First revoke all privileges to ensure a clean slate
                        EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');
                        -- Grant only the CONNECT privilege to allow the user to connect to the database
                        -- but not perform any operations without additional specific grants
                        EXECUTE format('GRANT CONNECT ON DATABASE %I TO %I', current_database(), '{DB_READONLY_USER}');
                    END IF;
                END
                $$;
                """
            )
        )

    # Grant usage on current schema to readonly user
    op.execute(
        text(
            f"""
            DO $$
            BEGIN
                IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
                    EXECUTE format('GRANT USAGE ON SCHEMA %I TO %I', current_schema(), '{DB_READONLY_USER}');
                END IF;
            END
            $$;
            """
        )
    )

    op.execute("DROP TABLE IF EXISTS kg_config CASCADE")
    op.create_table(
        "kg_config",
        sa.Column("id", sa.Integer(), primary_key=True, nullable=False, index=True),
        sa.Column("kg_variable_name", sa.String(), nullable=False, index=True),
        sa.Column("kg_variable_values", postgresql.ARRAY(sa.String()), nullable=False),
        sa.UniqueConstraint("kg_variable_name", name="uq_kg_config_variable_name"),
    )

    # Insert initial data into kg_config table
    op.bulk_insert(
        sa.table(
            "kg_config",
            sa.column("kg_variable_name", sa.String),
            sa.column("kg_variable_values", postgresql.ARRAY(sa.String)),
        ),
        [
            {"kg_variable_name": "KG_EXPOSED", "kg_variable_values": ["false"]},
            {"kg_variable_name": "KG_ENABLED", "kg_variable_values": ["false"]},
            {"kg_variable_name": "KG_VENDOR", "kg_variable_values": []},
            {"kg_variable_name": "KG_VENDOR_DOMAINS", "kg_variable_values": []},
            {"kg_variable_name": "KG_IGNORE_EMAIL_DOMAINS", "kg_variable_values": []},
            {
                "kg_variable_name": "KG_EXTRACTION_IN_PROGRESS",
                "kg_variable_values": ["false"],
            },
            {
                "kg_variable_name": "KG_CLUSTERING_IN_PROGRESS",
                "kg_variable_values": ["false"],
            },
            {
                "kg_variable_name": "KG_COVERAGE_START",
                "kg_variable_values": [
                    (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d")
                ],
            },
            {"kg_variable_name": "KG_MAX_COVERAGE_DAYS", "kg_variable_values": ["90"]},
            {
                "kg_variable_name": "KG_MAX_PARENT_RECURSION_DEPTH",
                "kg_variable_values": ["2"],
            },
        ],
    )

    op.execute("DROP TABLE IF EXISTS kg_entity_type CASCADE")
    op.create_table(
        "kg_entity_type",
        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
        sa.Column("description", sa.String(), nullable=True),
        sa.Column("grounding", sa.String(), nullable=False),
        sa.Column(
            "attributes",
            postgresql.JSONB,
            nullable=False,
            server_default="{}",
        ),
        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
        sa.Column("active", sa.Boolean(), nullable=False, default=False),
        sa.Column("deep_extraction", sa.Boolean(), nullable=False, default=False),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            onupdate=sa.text("now()"),
        ),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
        ),
        sa.Column("grounded_source_name", sa.String(), nullable=True),
        sa.Column("entity_values", postgresql.ARRAY(sa.String()), nullable=True),
        sa.Column(
            "clustering",
            postgresql.JSONB,
            nullable=False,
            server_default="{}",
        ),
    )

    op.execute("DROP TABLE IF EXISTS kg_relationship_type CASCADE")
    # Create KGRelationshipType table
    op.create_table(
        "kg_relationship_type",
        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
        sa.Column("name", sa.String(), nullable=False, index=True),
        sa.Column(
            "source_entity_type_id_name", sa.String(), nullable=False, index=True
        ),
        sa.Column(
            "target_entity_type_id_name", sa.String(), nullable=False, index=True
        ),
        sa.Column("definition", sa.Boolean(), nullable=False, default=False),
        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
        sa.Column("type", sa.String(), nullable=False, index=True),
        sa.Column("active", sa.Boolean(), nullable=False, default=True),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            onupdate=sa.text("now()"),
        ),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
        ),
        sa.Column(
            "clustering",
            postgresql.JSONB,
            nullable=False,
            server_default="{}",
        ),
        sa.ForeignKeyConstraint(
            ["source_entity_type_id_name"], ["kg_entity_type.id_name"]
        ),
        sa.ForeignKeyConstraint(
            ["target_entity_type_id_name"], ["kg_entity_type.id_name"]
        ),
    )

    op.execute("DROP TABLE IF EXISTS kg_relationship_type_extraction_staging CASCADE")
    # Create KGRelationshipTypeExtractionStaging table
    op.create_table(
        "kg_relationship_type_extraction_staging",
        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
        sa.Column("name", sa.String(), nullable=False, index=True),
        sa.Column(
            "source_entity_type_id_name", sa.String(), nullable=False, index=True
        ),
        sa.Column(
            "target_entity_type_id_name", sa.String(), nullable=False, index=True
        ),
        sa.Column("definition", sa.Boolean(), nullable=False, default=False),
        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
        sa.Column("type", sa.String(), nullable=False, index=True),
        sa.Column("active", sa.Boolean(), nullable=False, default=True),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
        ),
        sa.Column(
            "clustering",
            postgresql.JSONB,
            nullable=False,
            server_default="{}",
        ),
        sa.Column("transferred", sa.Boolean(), nullable=False, server_default="false"),
        sa.ForeignKeyConstraint(
            ["source_entity_type_id_name"], ["kg_entity_type.id_name"]
        ),
        sa.ForeignKeyConstraint(
            ["target_entity_type_id_name"], ["kg_entity_type.id_name"]
        ),
    )

    op.execute("DROP TABLE IF EXISTS kg_entity CASCADE")

    # Create KGEntity table
    op.create_table(
        "kg_entity",
        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
        sa.Column("name", sa.String(), nullable=False, index=True),
        sa.Column("entity_class", sa.String(), nullable=True, index=True),
        sa.Column("entity_subtype", sa.String(), nullable=True, index=True),
        sa.Column("entity_key", sa.String(), nullable=True, index=True),
        sa.Column("name_trigrams", postgresql.ARRAY(sa.String(3)), nullable=True),
        sa.Column("document_id", sa.String(), nullable=True, index=True),
        sa.Column(
            "alternative_names",
            postgresql.ARRAY(sa.String()),
            nullable=False,
            server_default="{}",
        ),
        sa.Column("entity_type_id_name", sa.String(), nullable=False, index=True),
        sa.Column("description", sa.String(), nullable=True),
        sa.Column(
            "keywords",
            postgresql.ARRAY(sa.String()),
            nullable=False,
            server_default="{}",
        ),
        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
        sa.Column(
            "acl", postgresql.ARRAY(sa.String()), nullable=False, server_default="{}"
        ),
        sa.Column("boosts", postgresql.JSONB, nullable=False, server_default="{}"),
        sa.Column("attributes", postgresql.JSONB, nullable=False, server_default="{}"),
        sa.Column("event_time", sa.DateTime(timezone=True), nullable=True),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            onupdate=sa.text("now()"),
        ),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
        ),
        sa.ForeignKeyConstraint(["entity_type_id_name"], ["kg_entity_type.id_name"]),
        sa.ForeignKeyConstraint(["document_id"], ["document.id"]),
        sa.UniqueConstraint(
            "name",
            "entity_type_id_name",
            "document_id",
            name="uq_kg_entity_name_type_doc",
        ),
    )
    op.create_index("ix_entity_type_acl", "kg_entity", ["entity_type_id_name", "acl"])
    op.create_index(
        "ix_entity_name_search", "kg_entity", ["name", "entity_type_id_name"]
    )

    op.execute("DROP TABLE IF EXISTS kg_entity_extraction_staging CASCADE")
    # Create KGEntityExtractionStaging table
    op.create_table(
        "kg_entity_extraction_staging",
        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
        sa.Column("name", sa.String(), nullable=False, index=True),
        sa.Column("document_id", sa.String(), nullable=True, index=True),
        sa.Column(
            "alternative_names",
            postgresql.ARRAY(sa.String()),
            nullable=False,
            server_default="{}",
        ),
        sa.Column("entity_type_id_name", sa.String(), nullable=False, index=True),
        sa.Column("description", sa.String(), nullable=True),
        sa.Column(
            "keywords",
            postgresql.ARRAY(sa.String()),
            nullable=False,
            server_default="{}",
        ),
        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
        sa.Column(
            "acl", postgresql.ARRAY(sa.String()), nullable=False, server_default="{}"
        ),
        sa.Column("boosts", postgresql.JSONB, nullable=False, server_default="{}"),
        sa.Column("attributes", postgresql.JSONB, nullable=False, server_default="{}"),
        sa.Column("transferred_id_name", sa.String(), nullable=True, default=None),
        sa.Column("entity_class", sa.String(), nullable=True, index=True),
        sa.Column("entity_key", sa.String(), nullable=True, index=True),
        sa.Column("entity_subtype", sa.String(), nullable=True, index=True),
        sa.Column("parent_key", sa.String(), nullable=True, index=True),
        sa.Column("event_time", sa.DateTime(timezone=True), nullable=True),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
        ),
        sa.ForeignKeyConstraint(["entity_type_id_name"], ["kg_entity_type.id_name"]),
        sa.ForeignKeyConstraint(["document_id"], ["document.id"]),
    )
    op.create_index(
        "ix_entity_extraction_staging_acl",
        "kg_entity_extraction_staging",
        ["entity_type_id_name", "acl"],
    )
    op.create_index(
        "ix_entity_extraction_staging_name_search",
        "kg_entity_extraction_staging",
        ["name", "entity_type_id_name"],
    )

    op.execute("DROP TABLE IF EXISTS kg_relationship CASCADE")
    # Create KGRelationship table
    op.create_table(
        "kg_relationship",
        sa.Column("id_name", sa.String(), nullable=False, index=True),
        sa.Column("source_node", sa.String(), nullable=False, index=True),
        sa.Column("target_node", sa.String(), nullable=False, index=True),
        sa.Column("source_node_type", sa.String(), nullable=False, index=True),
        sa.Column("target_node_type", sa.String(), nullable=False, index=True),
        sa.Column("source_document", sa.String(), nullable=True, index=True),
        sa.Column("type", sa.String(), nullable=False, index=True),
        sa.Column("relationship_type_id_name", sa.String(), nullable=False, index=True),
        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            onupdate=sa.text("now()"),
        ),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
        ),
        sa.ForeignKeyConstraint(["source_node"], ["kg_entity.id_name"]),
        sa.ForeignKeyConstraint(["target_node"], ["kg_entity.id_name"]),
        sa.ForeignKeyConstraint(["source_node_type"], ["kg_entity_type.id_name"]),
        sa.ForeignKeyConstraint(["target_node_type"], ["kg_entity_type.id_name"]),
        sa.ForeignKeyConstraint(["source_document"], ["document.id"]),
        sa.ForeignKeyConstraint(
            ["relationship_type_id_name"], ["kg_relationship_type.id_name"]
        ),
        sa.UniqueConstraint(
            "source_node",
            "target_node",
            "type",
            name="uq_kg_relationship_source_target_type",
        ),
        sa.PrimaryKeyConstraint("id_name", "source_document"),
    )
    op.create_index(
        "ix_kg_relationship_nodes", "kg_relationship", ["source_node", "target_node"]
    )

    op.execute("DROP TABLE IF EXISTS kg_relationship_extraction_staging CASCADE")
    # Create KGRelationshipExtractionStaging table
    op.create_table(
        "kg_relationship_extraction_staging",
        sa.Column("id_name", sa.String(), nullable=False, index=True),
        sa.Column("source_node", sa.String(), nullable=False, index=True),
        sa.Column("target_node", sa.String(), nullable=False, index=True),
        sa.Column("source_node_type", sa.String(), nullable=False, index=True),
        sa.Column("target_node_type", sa.String(), nullable=False, index=True),
        sa.Column("source_document", sa.String(), nullable=True, index=True),
        sa.Column("type", sa.String(), nullable=False, index=True),
        sa.Column("relationship_type_id_name", sa.String(), nullable=False, index=True),
        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
        sa.Column("transferred", sa.Boolean(), nullable=False, server_default="false"),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
        ),
        sa.ForeignKeyConstraint(
            ["source_node"], ["kg_entity_extraction_staging.id_name"]
        ),
        sa.ForeignKeyConstraint(
            ["target_node"], ["kg_entity_extraction_staging.id_name"]
        ),
        sa.ForeignKeyConstraint(["source_node_type"], ["kg_entity_type.id_name"]),
        sa.ForeignKeyConstraint(["target_node_type"], ["kg_entity_type.id_name"]),
        sa.ForeignKeyConstraint(["source_document"], ["document.id"]),
        sa.ForeignKeyConstraint(
            ["relationship_type_id_name"],
            ["kg_relationship_type_extraction_staging.id_name"],
        ),
        sa.UniqueConstraint(
            "source_node",
            "target_node",
            "type",
            name="uq_kg_relationship_extraction_staging_source_target_type",
        ),
        sa.PrimaryKeyConstraint("id_name", "source_document"),
    )
    op.create_index(
        "ix_kg_relationship_extraction_staging_nodes",
        "kg_relationship_extraction_staging",
        ["source_node", "target_node"],
    )

    op.execute("DROP TABLE IF EXISTS kg_term CASCADE")
    # Create KGTerm table
    op.create_table(
        "kg_term",
        sa.Column("id_term", sa.String(), primary_key=True, nullable=False, index=True),
        sa.Column(
            "entity_types",
            postgresql.ARRAY(sa.String()),
            nullable=False,
            server_default="{}",
        ),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            onupdate=sa.text("now()"),
        ),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
        ),
    )
    op.create_index("ix_search_term_entities", "kg_term", ["entity_types"])
    op.create_index("ix_search_term_term", "kg_term", ["id_term"])

    op.add_column(
        "document",
        sa.Column("kg_stage", sa.String(), nullable=True, index=True),
    )
    op.add_column(
        "document",
        sa.Column("kg_processing_time", sa.DateTime(timezone=True), nullable=True),
    )
    op.add_column(
        "connector",
        sa.Column(
            "kg_processing_enabled",
            sa.Boolean(),
            nullable=True,
            server_default="false",
        ),
    )

    op.add_column(
        "connector",
        sa.Column(
            "kg_coverage_days",
            sa.Integer(),
            nullable=True,
            server_default=None,
        ),
    )

    # Create GIN index for clustering and normalization
    op.execute(
        "CREATE INDEX IF NOT EXISTS idx_kg_entity_clustering_trigrams "
        f"ON kg_entity USING GIN (name {POSTGRES_DEFAULT_SCHEMA}.gin_trgm_ops)"
    )
    op.execute(
        "CREATE INDEX IF NOT EXISTS idx_kg_entity_normalization_trigrams ON kg_entity USING GIN (name_trigrams)"
    )

    # Create kg_entity trigger to update kg_entity.name and its trigrams
    alphanum_pattern = r"[^a-z0-9]+"
    truncate_length = 1000
    function = "update_kg_entity_name"
    op.execute(
        text(
            f"""
            CREATE OR REPLACE FUNCTION {function}()
            RETURNS TRIGGER AS $$
            DECLARE
                name text;
                cleaned_name text;
            BEGIN
                -- Set name to semantic_id if document_id is not NULL
                IF NEW.document_id IS NOT NULL THEN
                    SELECT lower(semantic_id) INTO name
                    FROM document
                    WHERE id = NEW.document_id;
                ELSE
                    name = lower(NEW.name);
                END IF;

                -- Clean name and truncate if too long
                cleaned_name = regexp_replace(
                    name,
                    '{alphanum_pattern}', '', 'g'
                );
                IF length(cleaned_name) > {truncate_length} THEN
                    cleaned_name = left(cleaned_name, {truncate_length});
                END IF;

                -- Set name and name trigrams
                NEW.name = name;
                NEW.name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name);
                RETURN NEW;
            END;
            $$ LANGUAGE plpgsql;
            """
        )
    )
    trigger = f"{function}_trigger"
    op.execute(f"DROP TRIGGER IF EXISTS {trigger} ON kg_entity")
    op.execute(
        f"""
        CREATE TRIGGER {trigger}
            BEFORE INSERT OR UPDATE OF name
            ON kg_entity
            FOR EACH ROW
            EXECUTE FUNCTION {function}();
        """
    )

    # Create kg_entity trigger to update kg_entity.name and its trigrams
    function = "update_kg_entity_name_from_doc"
    op.execute(
        text(
            f"""
            CREATE OR REPLACE FUNCTION {function}()
            RETURNS TRIGGER AS $$
            DECLARE
                doc_name text;
                cleaned_name text;
            BEGIN
                doc_name = lower(NEW.semantic_id);

                -- Clean name and truncate if too long
                cleaned_name = regexp_replace(
                    doc_name,
                    '{alphanum_pattern}', '', 'g'
                );
                IF length(cleaned_name) > {truncate_length} THEN
                    cleaned_name = left(cleaned_name, {truncate_length});
                END IF;

                -- Set name and name trigrams for all entities referencing this document
                UPDATE kg_entity
                SET
                    name = doc_name,
                    name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name)
                WHERE document_id = NEW.id;
                RETURN NEW;
            END;
            $$ LANGUAGE plpgsql;
            """
        )
    )
    trigger = f"{function}_trigger"
    op.execute(f"DROP TRIGGER IF EXISTS {trigger} ON document")
    op.execute(
        f"""
        CREATE TRIGGER {trigger}
            AFTER UPDATE OF semantic_id
            ON document
            FOR EACH ROW
            EXECUTE FUNCTION {function}();
        """
    )


def downgrade() -> None:

    #  Drop all views that start with 'kg_'
    op.execute(
        """
                DO $$
                DECLARE
                    view_name text;
                BEGIN
                    FOR view_name IN
                        SELECT c.relname
                        FROM pg_catalog.pg_class c
                        JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
                        WHERE c.relkind = 'v'
                        AND n.nspname = current_schema()
                        AND c.relname LIKE 'kg_relationships_with_access%'
                    LOOP
                        EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident(view_name);
                    END LOOP;
                END $$;
            """
    )

    op.execute(
        """
                DO $$
                DECLARE
                    view_name text;
                BEGIN
                    FOR view_name IN
                        SELECT c.relname
                        FROM pg_catalog.pg_class c
                        JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
                        WHERE c.relkind = 'v'
                        AND n.nspname = current_schema()
                        AND c.relname LIKE 'allowed_docs%'
                    LOOP
                        EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident(view_name);
                    END LOOP;
                END $$;
            """
    )

    for table, function in (
        ("kg_entity", "update_kg_entity_name"),
        ("document", "update_kg_entity_name_from_doc"),
    ):
        op.execute(f"DROP TRIGGER IF EXISTS {function}_trigger ON {table}")
        op.execute(f"DROP FUNCTION IF EXISTS {function}()")

    # Drop index
    op.execute("DROP INDEX IF EXISTS idx_kg_entity_clustering_trigrams")
    op.execute("DROP INDEX IF EXISTS idx_kg_entity_normalization_trigrams")

    # Drop tables in reverse order of creation to handle dependencies
    op.drop_table("kg_term")
    op.drop_table("kg_relationship")
    op.drop_table("kg_entity")
    op.drop_table("kg_relationship_type")
    op.drop_table("kg_relationship_extraction_staging")
    op.drop_table("kg_relationship_type_extraction_staging")
    op.drop_table("kg_entity_extraction_staging")
    op.drop_table("kg_entity_type")
    op.drop_column("connector", "kg_processing_enabled")
    op.drop_column("connector", "kg_coverage_days")
    op.drop_column("document", "kg_stage")
    op.drop_column("document", "kg_processing_time")
    op.drop_table("kg_config")

    # Revoke usage on current schema for the readonly user
    op.execute(
        text(
            f"""
            DO $$
            BEGIN
                IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
                    EXECUTE format('REVOKE ALL ON SCHEMA %I FROM %I', current_schema(), '{DB_READONLY_USER}');
                END IF;
            END
            $$;
            """
        )
    )

    if not MULTI_TENANT:
        # Drop read-only db user here only in single tenant mode. For multi-tenant mode,
        # the user is dropped in the alembic_tenants migration.

        op.execute(
            text(
                f"""
            DO $$
            BEGIN
                IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
                    -- First revoke all privileges from the database
                    EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');
                    -- Then drop the user
                    EXECUTE format('DROP USER %I', '{DB_READONLY_USER}');
                END IF;
            END
            $$;
        """
            )
        )
        op.execute(text("DROP EXTENSION IF EXISTS pg_trgm"))


================================================
FILE: backend/alembic/versions/4a1e4b1c89d2_add_indexing_to_userfilestatus.py
================================================
"""Add INDEXING to UserFileStatus

Revision ID: 4a1e4b1c89d2
Revises: 6b3b4083c5aa
Create Date: 2026-02-28 00:00:00.000000

"""

import sqlalchemy as sa
from alembic import op

revision = "4a1e4b1c89d2"
down_revision = "6b3b4083c5aa"
branch_labels = None
depends_on = None

TABLE = "user_file"
COLUMN = "status"
CONSTRAINT_NAME = "ck_user_file_status"

OLD_VALUES = ("PROCESSING", "COMPLETED", "FAILED", "CANCELED", "DELETING")
NEW_VALUES = ("PROCESSING", "INDEXING", "COMPLETED", "FAILED", "CANCELED", "DELETING")


def _drop_status_check_constraint() -> None:
    """Drop the existing CHECK constraint on user_file.status.

    The constraint name is auto-generated by SQLAlchemy and unknown,
    so we look it up via the inspector.
    """
    inspector = sa.inspect(op.get_bind())
    for constraint in inspector.get_check_constraints(TABLE):
        if COLUMN in constraint.get("sqltext", ""):
            constraint_name = constraint["name"]
            if constraint_name is not None:
                op.drop_constraint(constraint_name, TABLE, type_="check")


def upgrade() -> None:
    _drop_status_check_constraint()
    in_clause = ", ".join(f"'{v}'" for v in NEW_VALUES)
    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f"{COLUMN} IN ({in_clause})")


def downgrade() -> None:
    op.execute(
        f"UPDATE {TABLE} SET {COLUMN} = 'PROCESSING' WHERE {COLUMN} = 'INDEXING'"
    )
    op.drop_constraint(CONSTRAINT_NAME, TABLE, type_="check")
    in_clause = ", ".join(f"'{v}'" for v in OLD_VALUES)
    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f"{COLUMN} IN ({in_clause})")


================================================
FILE: backend/alembic/versions/4a951134c801_moved_status_to_connector_credential_.py
================================================
"""Moved status to connector credential pair

Revision ID: 4a951134c801
Revises: 7477a5f5d728
Create Date: 2024-08-10 19:20:34.527559

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "4a951134c801"
down_revision = "7477a5f5d728"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "status",
            sa.Enum(
                "ACTIVE",
                "PAUSED",
                "DELETING",
                name="connectorcredentialpairstatus",
                native_enum=False,
            ),
            nullable=True,
        ),
    )

    # Update status of connector_credential_pair based on connector's disabled status
    op.execute(
        """
        UPDATE connector_credential_pair
        SET status = CASE
            WHEN (
                SELECT disabled
                FROM connector
                WHERE connector.id = connector_credential_pair.connector_id
            ) = FALSE THEN 'ACTIVE'
            ELSE 'PAUSED'
        END
        """
    )

    # Make the status column not nullable after setting values
    op.alter_column("connector_credential_pair", "status", nullable=False)

    op.drop_column("connector", "disabled")


def downgrade() -> None:
    op.add_column(
        "connector",
        sa.Column("disabled", sa.BOOLEAN(), autoincrement=False, nullable=True),
    )

    # Update disabled status of connector based on connector_credential_pair's status
    op.execute(
        """
        UPDATE connector
        SET disabled = CASE
            WHEN EXISTS (
                SELECT 1
                FROM connector_credential_pair
                WHERE connector_credential_pair.connector_id = connector.id
                AND connector_credential_pair.status = 'ACTIVE'
            ) THEN FALSE
            ELSE TRUE
        END
        """
    )

    # Make the disabled column not nullable after setting values
    op.alter_column("connector", "disabled", nullable=False)

    op.drop_column("connector_credential_pair", "status")


================================================
FILE: backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py
================================================
"""change default prune_freq

Revision ID: 4b08d97e175a
Revises: d9ec13955951
Create Date: 2024-08-20 15:28:52.993827

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "4b08d97e175a"
down_revision = "d9ec13955951"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.execute(
        """
        UPDATE connector
        SET prune_freq = 2592000
        WHERE prune_freq = 86400
        """
    )


def downgrade() -> None:
    op.execute(
        """
        UPDATE connector
        SET prune_freq = 86400
        WHERE prune_freq = 2592000
        """
    )


================================================
FILE: backend/alembic/versions/4cebcbc9b2ae_add_tab_index_to_tool_call.py
================================================
"""add tab_index to tool_call

Revision ID: 4cebcbc9b2ae
Revises: a1b2c3d4e5f6
Create Date: 2025-12-16

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "4cebcbc9b2ae"
down_revision = "a1b2c3d4e5f6"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "tool_call",
        sa.Column("tab_index", sa.Integer(), nullable=False, server_default="0"),
    )


def downgrade() -> None:
    op.drop_column("tool_call", "tab_index")


================================================
FILE: backend/alembic/versions/4d58345da04a_lowercase_user_emails.py
================================================
"""lowercase_user_emails

Revision ID: 4d58345da04a
Revises: f1ca58b2f2ec
Create Date: 2025-01-29 07:48:46.784041

"""

import logging
from typing import cast
from alembic import op
from sqlalchemy.exc import IntegrityError
from sqlalchemy.sql import text


# revision identifiers, used by Alembic.
revision = "4d58345da04a"
down_revision = "f1ca58b2f2ec"
branch_labels = None
depends_on = None

logger = logging.getLogger("alembic.runtime.migration")


def upgrade() -> None:
    """Conflicts on lowercasing will result in the uppercased email getting a
    unique integer suffix when converted to lowercase."""

    connection = op.get_bind()

    # Fetch all user emails that are not already lowercase
    user_emails = connection.execute(
        text('SELECT id, email FROM "user" WHERE email != LOWER(email)')
    ).fetchall()

    for user_id, email in user_emails:
        email = cast(str, email)
        username, domain = email.rsplit("@", 1)
        new_email = f"{username.lower()}@{domain.lower()}"
        attempt = 1

        while True:
            try:
                # Try updating the email
                connection.execute(
                    text('UPDATE "user" SET email = :new_email WHERE id = :user_id'),
                    {"new_email": new_email, "user_id": user_id},
                )
                break  # Success, exit loop
            except IntegrityError:
                next_email = f"{username.lower()}_{attempt}@{domain.lower()}"
                # Email conflict occurred, append `_1`, `_2`, etc., to the username
                logger.warning(
                    f"Conflict while lowercasing email: old_email={email} conflicting_email={new_email} next_email={next_email}"
                )
                new_email = next_email
                attempt += 1


def downgrade() -> None:
    # Cannot restore original case of emails
    pass


================================================
FILE: backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py
================================================
"""Add type to credentials

Revision ID: 4ea2c93919c1
Revises: 473a1a7ca408
Create Date: 2024-07-18 13:07:13.655895

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "4ea2c93919c1"
down_revision = "473a1a7ca408"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # Add the new 'source' column to the 'credential' table
    op.add_column(
        "credential",
        sa.Column(
            "source",
            sa.String(length=100),  # Use String instead of Enum
            nullable=True,  # Initially allow NULL values
        ),
    )
    op.add_column(
        "credential",
        sa.Column(
            "name",
            sa.String(),
            nullable=True,
        ),
    )

    # Create a temporary table that maps each credential to a single connector source.
    # This is needed because a credential can be associated with multiple connectors,
    # but we want to assign a single source to each credential.
    # We use DISTINCT ON to ensure we only get one row per credential_id.
    op.execute(
        """
    CREATE TEMPORARY TABLE temp_connector_credential AS
    SELECT DISTINCT ON (cc.credential_id)
        cc.credential_id,
        c.source AS connector_source
    FROM connector_credential_pair cc
    JOIN connector c ON cc.connector_id = c.id
    """
    )

    # Update the 'source' column in the 'credential' table
    op.execute(
        """
    UPDATE credential cred
    SET source = COALESCE(
        (SELECT connector_source
         FROM temp_connector_credential temp
         WHERE cred.id = temp.credential_id),
        'NOT_APPLICABLE'
    )
    """
    )

    # Drop the temporary table to avoid conflicts if migration runs again
    # (e.g., during upgrade -> downgrade -> upgrade cycles in tests)
    op.execute("DROP TABLE IF EXISTS temp_connector_credential")

    # If no exception was raised, alter the column
    op.alter_column("credential", "source", nullable=True)  # TODO modify
    # # ### end Alembic commands ###


def downgrade() -> None:
    op.drop_column("credential", "source")
    op.drop_column("credential", "name")


================================================
FILE: backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
================================================
"""add_multiple_slack_bot_support

Revision ID: 4ee1287bd26a
Revises: 47e5bef3a1d7
Create Date: 2024-11-06 13:15:53.302644

"""

from typing import cast
from alembic import op
import sqlalchemy as sa
from sqlalchemy.orm import Session
from onyx.key_value_store.factory import get_kv_store
from onyx.db.models import SlackBot
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "4ee1287bd26a"
down_revision = "47e5bef3a1d7"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # Create new slack_bot table
    op.create_table(
        "slack_bot",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("enabled", sa.Boolean(), nullable=False, server_default="true"),
        sa.Column("bot_token", sa.LargeBinary(), nullable=False),
        sa.Column("app_token", sa.LargeBinary(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("bot_token"),
        sa.UniqueConstraint("app_token"),
    )

    # # Create new slack_channel_config table
    op.create_table(
        "slack_channel_config",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("slack_bot_id", sa.Integer(), nullable=True),
        sa.Column("persona_id", sa.Integer(), nullable=True),
        sa.Column("channel_config", postgresql.JSONB(), nullable=False),
        sa.Column("response_type", sa.String(), nullable=False),
        sa.Column(
            "enable_auto_filters", sa.Boolean(), nullable=False, server_default="false"
        ),
        sa.ForeignKeyConstraint(
            ["slack_bot_id"],
            ["slack_bot.id"],
        ),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )

    # Handle existing Slack bot tokens first
    bot_token = None
    app_token = None
    first_row_id = None

    try:
        tokens = cast(dict, get_kv_store().load("slack_bot_tokens_config_key"))
    except Exception:
        tokens = {}

    bot_token = tokens.get("bot_token")
    app_token = tokens.get("app_token")

    if bot_token and app_token:
        session = Session(bind=op.get_bind())
        new_slack_bot = SlackBot(
            name="Slack Bot (Migrated)",
            enabled=True,
            bot_token=bot_token,
            app_token=app_token,
        )
        session.add(new_slack_bot)
        session.commit()
        first_row_id = new_slack_bot.id

    # Create a default bot if none exists
    # This is in case there are no slack tokens but there are channels configured
    op.execute(
        sa.text(
            """
            INSERT INTO slack_bot (name, enabled, bot_token, app_token)
            SELECT 'Default Bot', true, '', ''
            WHERE NOT EXISTS (SELECT 1 FROM slack_bot)
            RETURNING id;
            """
        )
    )

    # Get the bot ID to use (either from existing migration or newly created)
    bot_id_query = sa.text(
        """
        SELECT COALESCE(
            :first_row_id,
            (SELECT id FROM slack_bot ORDER BY id ASC LIMIT 1)
        ) as bot_id;
        """
    )
    result = op.get_bind().execute(bot_id_query, {"first_row_id": first_row_id})
    bot_id = result.scalar()

    # CTE (Common Table Expression) that transforms the old slack_bot_config table data
    # This splits up the channel_names into their own rows
    channel_names_cte = """
        WITH channel_names AS (
            SELECT
                sbc.id as config_id,
                sbc.persona_id,
                sbc.response_type,
                sbc.enable_auto_filters,
                jsonb_array_elements_text(sbc.channel_config->'channel_names') as channel_name,
                sbc.channel_config->>'respond_tag_only' as respond_tag_only,
                sbc.channel_config->>'respond_to_bots' as respond_to_bots,
                sbc.channel_config->'respond_member_group_list' as respond_member_group_list,
                sbc.channel_config->'answer_filters' as answer_filters,
                sbc.channel_config->'follow_up_tags' as follow_up_tags
            FROM slack_bot_config sbc
        )
    """

    # Insert the channel names into the new slack_channel_config table
    insert_statement = """
        INSERT INTO slack_channel_config (
            slack_bot_id,
            persona_id,
            channel_config,
            response_type,
            enable_auto_filters
        )
        SELECT
            :bot_id,
            channel_name.persona_id,
            jsonb_build_object(
                'channel_name', channel_name.channel_name,
                'respond_tag_only',
                COALESCE((channel_name.respond_tag_only)::boolean, false),
                'respond_to_bots',
                COALESCE((channel_name.respond_to_bots)::boolean, false),
                'respond_member_group_list',
                COALESCE(channel_name.respond_member_group_list, '[]'::jsonb),
                'answer_filters',
                COALESCE(channel_name.answer_filters, '[]'::jsonb),
                'follow_up_tags',
                COALESCE(channel_name.follow_up_tags, '[]'::jsonb)
            ),
            channel_name.response_type,
            channel_name.enable_auto_filters
        FROM channel_names channel_name;
    """

    op.execute(sa.text(channel_names_cte + insert_statement).bindparams(bot_id=bot_id))

    # Clean up old tokens if they existed
    try:
        if bot_token and app_token:
            get_kv_store().delete("slack_bot_tokens_config_key")
    except Exception:
        pass
    # Rename the table
    op.rename_table(
        "slack_bot_config__standard_answer_category",
        "slack_channel_config__standard_answer_category",
    )

    # Rename the column
    op.alter_column(
        "slack_channel_config__standard_answer_category",
        "slack_bot_config_id",
        new_column_name="slack_channel_config_id",
    )

    # Drop the table with CASCADE to handle dependent objects
    op.execute("DROP TABLE slack_bot_config CASCADE")


def downgrade() -> None:
    # Recreate the old slack_bot_config table
    op.create_table(
        "slack_bot_config",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("persona_id", sa.Integer(), nullable=True),
        sa.Column("channel_config", postgresql.JSONB(), nullable=False),
        sa.Column("response_type", sa.String(), nullable=False),
        sa.Column("enable_auto_filters", sa.Boolean(), nullable=False),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )

    # Migrate data back to the old format
    # Group by persona_id to combine channel names back into arrays
    op.execute(
        sa.text(
            """
            INSERT INTO slack_bot_config (
                persona_id,
                channel_config,
                response_type,
                enable_auto_filters
            )
            SELECT DISTINCT ON (persona_id)
                persona_id,
                jsonb_build_object(
                    'channel_names', (
                        SELECT jsonb_agg(c.channel_config->>'channel_name')
                        FROM slack_channel_config c
                        WHERE c.persona_id = scc.persona_id
                    ),
                    'respond_tag_only', (channel_config->>'respond_tag_only')::boolean,
                    'respond_to_bots', (channel_config->>'respond_to_bots')::boolean,
                    'respond_member_group_list', channel_config->'respond_member_group_list',
                    'answer_filters', channel_config->'answer_filters',
                    'follow_up_tags', channel_config->'follow_up_tags'
                ),
                response_type,
                enable_auto_filters
            FROM slack_channel_config scc
            WHERE persona_id IS NOT NULL;
            """
        )
    )

    # Rename the table back
    op.rename_table(
        "slack_channel_config__standard_answer_category",
        "slack_bot_config__standard_answer_category",
    )

    # Rename the column back
    op.alter_column(
        "slack_bot_config__standard_answer_category",
        "slack_channel_config_id",
        new_column_name="slack_bot_config_id",
    )

    # Try to save the first bot's tokens back to KV store
    try:
        first_bot = (
            op.get_bind()
            .execute(
                sa.text(
                    "SELECT bot_token, app_token FROM slack_bot ORDER BY id LIMIT 1"
                )
            )
            .first()
        )
        if first_bot and first_bot.bot_token and first_bot.app_token:
            tokens = {
                "bot_token": first_bot.bot_token,
                "app_token": first_bot.app_token,
            }
            get_kv_store().store("slack_bot_tokens_config_key", tokens)
    except Exception:
        pass

    # Drop the new tables in reverse order
    op.drop_table("slack_channel_config")
    op.drop_table("slack_bot")


================================================
FILE: backend/alembic/versions/4f8a2b3c1d9e_add_open_url_tool.py
================================================
"""add_open_url_tool

Revision ID: 4f8a2b3c1d9e
Revises: a852cbe15577
Create Date: 2025-11-24 12:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "4f8a2b3c1d9e"
down_revision = "a852cbe15577"
branch_labels = None
depends_on = None


OPEN_URL_TOOL = {
    "name": "OpenURLTool",
    "display_name": "Open URL",
    "description": (
        "The Open URL Action allows the agent to fetch and read contents of web pages."
    ),
    "in_code_tool_id": "OpenURLTool",
    "enabled": True,
}


def upgrade() -> None:
    conn = op.get_bind()

    # Check if tool already exists
    existing = conn.execute(
        sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
        {"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
    ).fetchone()

    if existing:
        tool_id = existing[0]
        # Update existing tool
        conn.execute(
            sa.text(
                """
                UPDATE tool
                SET name = :name,
                    display_name = :display_name,
                    description = :description
                WHERE in_code_tool_id = :in_code_tool_id
                """
            ),
            OPEN_URL_TOOL,
        )
    else:
        # Insert new tool
        conn.execute(
            sa.text(
                """
                INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
                VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
                """
            ),
            OPEN_URL_TOOL,
        )
        # Get the newly inserted tool's id
        result = conn.execute(
            sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
            {"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
        ).fetchone()
        tool_id = result[0]  # type: ignore

    # Associate the tool with all existing personas
    # Get all persona IDs
    persona_ids = conn.execute(sa.text("SELECT id FROM persona")).fetchall()

    for (persona_id,) in persona_ids:
        # Check if association already exists
        exists = conn.execute(
            sa.text(
                """
                SELECT 1 FROM persona__tool
                WHERE persona_id = :persona_id AND tool_id = :tool_id
                """
            ),
            {"persona_id": persona_id, "tool_id": tool_id},
        ).fetchone()

        if not exists:
            conn.execute(
                sa.text(
                    """
                    INSERT INTO persona__tool (persona_id, tool_id)
                    VALUES (:persona_id, :tool_id)
                    """
                ),
                {"persona_id": persona_id, "tool_id": tool_id},
            )


def downgrade() -> None:
    # We don't remove the tool on downgrade since it's fine to have it around.
    # If we upgrade again, it will be a no-op.
    pass


================================================
FILE: backend/alembic/versions/503883791c39_add_effective_permissions.py
================================================
"""add_effective_permissions

Adds a JSONB column `effective_permissions` to the user table to store
directly granted permissions (e.g. ["admin"] or ["basic"]). Implied
permissions are expanded at read time, not stored.

Backfill: joins user__user_group → permission_grant to collect each
user's granted permissions into a JSON array. Users without group
memberships keep the default [].

Revision ID: 503883791c39
Revises: b4b7e1028dfd
Create Date: 2026-03-30 14:49:22.261748

"""

from collections.abc import Sequence

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "503883791c39"
down_revision = "b4b7e1028dfd"
branch_labels: str | None = None
depends_on: str | Sequence[str] | None = None

user_table = sa.table(
    "user",
    sa.column("id", sa.Uuid),
    sa.column("effective_permissions", postgresql.JSONB),
)

user_user_group = sa.table(
    "user__user_group",
    sa.column("user_id", sa.Uuid),
    sa.column("user_group_id", sa.Integer),
)

permission_grant = sa.table(
    "permission_grant",
    sa.column("group_id", sa.Integer),
    sa.column("permission", sa.String),
    sa.column("is_deleted", sa.Boolean),
)


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column(
            "effective_permissions",
            postgresql.JSONB(),
            nullable=False,
            server_default=sa.text("'[]'::jsonb"),
        ),
    )

    conn = op.get_bind()

    # Deduplicated permissions per user
    deduped = (
        sa.select(
            user_user_group.c.user_id,
            permission_grant.c.permission,
        )
        .select_from(
            user_user_group.join(
                permission_grant,
                sa.and_(
                    permission_grant.c.group_id == user_user_group.c.user_group_id,
                    permission_grant.c.is_deleted == sa.false(),
                ),
            )
        )
        .distinct()
        .subquery("deduped")
    )

    # Aggregate into JSONB array per user (order is not guaranteed;
    # consumers read this as a set so ordering does not matter)
    perms_per_user = (
        sa.select(
            deduped.c.user_id,
            sa.func.jsonb_agg(
                deduped.c.permission,
                type_=postgresql.JSONB,
            ).label("perms"),
        )
        .group_by(deduped.c.user_id)
        .subquery("sub")
    )

    conn.execute(
        user_table.update()
        .where(user_table.c.id == perms_per_user.c.user_id)
        .values(effective_permissions=perms_per_user.c.perms)
    )


def downgrade() -> None:
    op.drop_column("user", "effective_permissions")


================================================
FILE: backend/alembic/versions/505c488f6662_merge_default_assistants_into_unified.py
================================================
"""merge_default_assistants_into_unified

Revision ID: 505c488f6662
Revises: d09fc20a3c66
Create Date: 2025-09-09 19:00:56.816626

"""

import json
from typing import Any
from typing import NamedTuple
from uuid import UUID

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "505c488f6662"
down_revision = "d09fc20a3c66"
branch_labels = None
depends_on = None

# Constants for the unified assistant
UNIFIED_ASSISTANT_NAME = "Assistant"
UNIFIED_ASSISTANT_DESCRIPTION = (
    "Your AI assistant with search, web browsing, and image generation capabilities."
)
UNIFIED_ASSISTANT_NUM_CHUNKS = 25
UNIFIED_ASSISTANT_DISPLAY_PRIORITY = 0
UNIFIED_ASSISTANT_LLM_FILTER_EXTRACTION = True
UNIFIED_ASSISTANT_LLM_RELEVANCE_FILTER = False
UNIFIED_ASSISTANT_RECENCY_BIAS = "AUTO"  # NOTE: needs to be capitalized
UNIFIED_ASSISTANT_CHUNKS_ABOVE = 0
UNIFIED_ASSISTANT_CHUNKS_BELOW = 0
UNIFIED_ASSISTANT_DATETIME_AWARE = True

# NOTE: tool specific prompts are handled on the fly and automatically injected
# into the prompt before passing to the LLM.
DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the \
user's intent, ask clarifying questions when needed, think step-by-step through complex problems, \
provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always \
prioritize being truthful, nuanced, insightful, and efficient.
The current date is [[CURRENT_DATETIME]]

You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make \
your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, \
symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use Markdown horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".strip()


INSERT_DICT: dict[str, Any] = {
    "name": UNIFIED_ASSISTANT_NAME,
    "description": UNIFIED_ASSISTANT_DESCRIPTION,
    "system_prompt": DEFAULT_SYSTEM_PROMPT,
    "num_chunks": UNIFIED_ASSISTANT_NUM_CHUNKS,
    "display_priority": UNIFIED_ASSISTANT_DISPLAY_PRIORITY,
    "llm_filter_extraction": UNIFIED_ASSISTANT_LLM_FILTER_EXTRACTION,
    "llm_relevance_filter": UNIFIED_ASSISTANT_LLM_RELEVANCE_FILTER,
    "recency_bias": UNIFIED_ASSISTANT_RECENCY_BIAS,
    "chunks_above": UNIFIED_ASSISTANT_CHUNKS_ABOVE,
    "chunks_below": UNIFIED_ASSISTANT_CHUNKS_BELOW,
    "datetime_aware": UNIFIED_ASSISTANT_DATETIME_AWARE,
}

GENERAL_ASSISTANT_ID = -1
ART_ASSISTANT_ID = -3


class UserRow(NamedTuple):
    """Typed representation of user row from database query."""

    id: UUID
    chosen_assistants: list[int] | None
    visible_assistants: list[int] | None
    hidden_assistants: list[int] | None
    pinned_assistants: list[int] | None


def upgrade() -> None:
    conn = op.get_bind()

    # Step 1: Create or update the unified assistant (ID 0)
    search_assistant = conn.execute(
        sa.text("SELECT * FROM persona WHERE id = 0")
    ).fetchone()

    if search_assistant:
        # Update existing Search assistant to be the unified assistant
        conn.execute(
            sa.text(
                """
                UPDATE persona
                SET name = :name,
                    description = :description,
                    system_prompt = :system_prompt,
                    num_chunks = :num_chunks,
                    is_default_persona = true,
                    is_visible = true,
                    deleted = false,
                    display_priority = :display_priority,
                    llm_filter_extraction = :llm_filter_extraction,
                    llm_relevance_filter = :llm_relevance_filter,
                    recency_bias = :recency_bias,
                    chunks_above = :chunks_above,
                    chunks_below = :chunks_below,
                    datetime_aware = :datetime_aware,
                    starter_messages = null
                WHERE id = 0
            """
            ),
            INSERT_DICT,
        )
    else:
        # Create new unified assistant with ID 0
        conn.execute(
            sa.text(
                """
                INSERT INTO persona (
                    id, name, description, system_prompt, num_chunks,
                    is_default_persona, is_visible, deleted, display_priority,
                    llm_filter_extraction, llm_relevance_filter, recency_bias,
                    chunks_above, chunks_below, datetime_aware, starter_messages,
                    builtin_persona
                ) VALUES (
                    0, :name, :description, :system_prompt, :num_chunks,
                    true, true, false, :display_priority, :llm_filter_extraction,
                    :llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
                    :datetime_aware, null, true
                )
            """
            ),
            INSERT_DICT,
        )

    # Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET deleted = true, is_visible = false, is_default_persona = false
            WHERE builtin_persona = true AND id != 0
        """
        )
    )

    # Step 3: Add all built-in tools to the unified assistant
    # First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
    search_tool = conn.execute(
        sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
    ).fetchone()

    if not search_tool:
        raise ValueError(
            "SearchTool not found in database. Ensure tools migration has run first."
        )

    image_gen_tool = conn.execute(
        sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
    ).fetchone()

    if not image_gen_tool:
        raise ValueError(
            "ImageGenerationTool not found in database. Ensure tools migration has run first."
        )

    # WebSearchTool is optional - may not be configured
    web_search_tool = conn.execute(
        sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
    ).fetchone()

    # Clear existing tool associations for persona 0
    conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))

    # Add tools to the unified assistant
    conn.execute(
        sa.text(
            """
            INSERT INTO persona__tool (persona_id, tool_id)
            VALUES (0, :tool_id)
            ON CONFLICT DO NOTHING
        """
        ),
        {"tool_id": search_tool[0]},
    )

    conn.execute(
        sa.text(
            """
            INSERT INTO persona__tool (persona_id, tool_id)
            VALUES (0, :tool_id)
            ON CONFLICT DO NOTHING
        """
        ),
        {"tool_id": image_gen_tool[0]},
    )

    if web_search_tool:
        conn.execute(
            sa.text(
                """
                INSERT INTO persona__tool (persona_id, tool_id)
                VALUES (0, :tool_id)
                ON CONFLICT DO NOTHING
            """
            ),
            {"tool_id": web_search_tool[0]},
        )

    # Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
    conn.execute(
        sa.text(
            """
            UPDATE chat_session
            SET persona_id = 0
            WHERE persona_id IN (
                SELECT id FROM persona WHERE builtin_persona = true AND id != 0
            )
        """
        )
    )

    # Step 5: Migrate user preferences - remove references to all builtin assistants
    # First, get all builtin assistant IDs (except 0)
    builtin_assistants_result = conn.execute(
        sa.text(
            """
            SELECT id FROM persona
            WHERE builtin_persona = true AND id != 0
        """
        )
    ).fetchall()
    builtin_assistant_ids = [row[0] for row in builtin_assistants_result]

    # Get all users with preferences
    users_result = conn.execute(
        sa.text(
            """
            SELECT id, chosen_assistants, visible_assistants,
                   hidden_assistants, pinned_assistants
            FROM "user"
        """
        )
    ).fetchall()

    for user_row in users_result:
        user = UserRow(*user_row)
        user_id: UUID = user.id
        updates: dict[str, Any] = {}

        # Remove all builtin assistants from chosen_assistants
        if user.chosen_assistants:
            new_chosen: list[int] = [
                assistant_id
                for assistant_id in user.chosen_assistants
                if assistant_id not in builtin_assistant_ids
            ]
            if new_chosen != user.chosen_assistants:
                updates["chosen_assistants"] = json.dumps(new_chosen)

        # Remove all builtin assistants from visible_assistants
        if user.visible_assistants:
            new_visible: list[int] = [
                assistant_id
                for assistant_id in user.visible_assistants
                if assistant_id not in builtin_assistant_ids
            ]
            if new_visible != user.visible_assistants:
                updates["visible_assistants"] = json.dumps(new_visible)

        # Add all builtin assistants to hidden_assistants
        if user.hidden_assistants:
            new_hidden: list[int] = list(user.hidden_assistants)
            for old_id in builtin_assistant_ids:
                if old_id not in new_hidden:
                    new_hidden.append(old_id)
            if new_hidden != user.hidden_assistants:
                updates["hidden_assistants"] = json.dumps(new_hidden)
        else:
            updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)

        # Remove all builtin assistants from pinned_assistants
        if user.pinned_assistants:
            new_pinned: list[int] = [
                assistant_id
                for assistant_id in user.pinned_assistants
                if assistant_id not in builtin_assistant_ids
            ]
            if new_pinned != user.pinned_assistants:
                updates["pinned_assistants"] = json.dumps(new_pinned)

        # Apply updates if any
        if updates:
            set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
            updates["user_id"] = str(user_id)  # Convert UUID to string for SQL
            conn.execute(
                sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
                updates,
            )


def downgrade() -> None:
    conn = op.get_bind()

    # Only restore General (ID -1) and Art (ID -3) assistants
    # Step 1: Keep Search assistant (ID 0) as default but restore original state
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET is_default_persona = true,
                is_visible = true,
                deleted = false
            WHERE id = 0
        """
        )
    )

    # Step 2: Restore General assistant (ID -1)
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET deleted = false,
                is_visible = true,
                is_default_persona = true
            WHERE id = :general_assistant_id
        """
        ),
        {"general_assistant_id": GENERAL_ASSISTANT_ID},
    )

    # Step 3: Restore Art assistant (ID -3)
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET deleted = false,
                is_visible = true,
                is_default_persona = true
            WHERE id = :art_assistant_id
        """
        ),
        {"art_assistant_id": ART_ASSISTANT_ID},
    )

    # Note: We don't restore the original tool associations, names, or descriptions
    # as those would require more complex logic to determine original state.
    # We also cannot restore original chat session persona_ids as we don't
    # have the original mappings.
    # Other builtin assistants remain deleted as per the requirement.


================================================
FILE: backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py
================================================
"""Add additional retrieval controls to Persona

Revision ID: 50b683a8295c
Revises: 7da0ae5ad583
Create Date: 2023-11-27 17:23:29.668422

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "50b683a8295c"
down_revision = "7da0ae5ad583"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("persona", sa.Column("num_chunks", sa.Integer(), nullable=True))
    op.add_column(
        "persona",
        sa.Column("apply_llm_relevance_filter", sa.Boolean(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("persona", "apply_llm_relevance_filter")
    op.drop_column("persona", "num_chunks")


================================================
FILE: backend/alembic/versions/52a219fb5233_add_last_synced_and_last_modified_to_document_table.py
================================================
"""Add last synced and last modified to document table

Revision ID: 52a219fb5233
Revises: f7e58d357687
Create Date: 2024-08-28 17:40:46.077470

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import func

# revision identifiers, used by Alembic.
revision = "52a219fb5233"
down_revision = "f7e58d357687"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # last modified represents the last time anything needing syncing to vespa changed
    # including row metadata and the document itself. This obviously does not include
    # the last_synced column.
    op.add_column(
        "document",
        sa.Column(
            "last_modified",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=func.now(),
        ),
    )

    # last synced represents the last time this document was synced to Vespa
    op.add_column(
        "document",
        sa.Column("last_synced", sa.DateTime(timezone=True), nullable=True),
    )

    # Set last_synced to the same value as last_modified for existing rows
    op.execute(
        """
        UPDATE document
        SET last_synced = last_modified
        """
    )

    op.create_index(
        op.f("ix_document_last_modified"),
        "document",
        ["last_modified"],
        unique=False,
    )

    op.create_index(
        op.f("ix_document_last_synced"),
        "document",
        ["last_synced"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(op.f("ix_document_last_synced"), table_name="document")
    op.drop_index(op.f("ix_document_last_modified"), table_name="document")
    op.drop_column("document", "last_synced")
    op.drop_column("document", "last_modified")


================================================
FILE: backend/alembic/versions/54a74a0417fc_danswerbot_onyxbot.py
================================================
"""danswerbot -> onyxbot

Revision ID: 54a74a0417fc
Revises: 94dc3d0236f8
Create Date: 2024-12-11 18:05:05.490737

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "54a74a0417fc"
down_revision = "94dc3d0236f8"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.alter_column("chat_session", "danswerbot_flow", new_column_name="onyxbot_flow")


def downgrade() -> None:
    op.alter_column("chat_session", "onyxbot_flow", new_column_name="danswerbot_flow")


================================================
FILE: backend/alembic/versions/55546a7967ee_assistant_rework.py
================================================
"""assistant_rework

Revision ID: 55546a7967ee
Revises: 61ff3651add4
Create Date: 2024-09-18 17:00:23.755399

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "55546a7967ee"
down_revision = "61ff3651add4"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Reworking persona and user tables for new assistant features
    # keep track of user's chosen assistants separate from their `ordering`
    op.add_column("persona", sa.Column("builtin_persona", sa.Boolean(), nullable=True))
    op.execute("UPDATE persona SET builtin_persona = default_persona")
    op.alter_column("persona", "builtin_persona", nullable=False)
    op.drop_index("_default_persona_name_idx", table_name="persona")
    op.create_index(
        "_builtin_persona_name_idx",
        "persona",
        ["name"],
        unique=True,
        postgresql_where=sa.text("builtin_persona = true"),
    )

    op.add_column(
        "user", sa.Column("visible_assistants", postgresql.JSONB(), nullable=True)
    )
    op.add_column(
        "user", sa.Column("hidden_assistants", postgresql.JSONB(), nullable=True)
    )
    op.execute(
        "UPDATE \"user\" SET visible_assistants = '[]'::jsonb, hidden_assistants = '[]'::jsonb"
    )
    op.alter_column(
        "user",
        "visible_assistants",
        nullable=False,
        server_default=sa.text("'[]'::jsonb"),
    )
    op.alter_column(
        "user",
        "hidden_assistants",
        nullable=False,
        server_default=sa.text("'[]'::jsonb"),
    )
    op.drop_column("persona", "default_persona")
    op.add_column(
        "persona", sa.Column("is_default_persona", sa.Boolean(), nullable=True)
    )


def downgrade() -> None:
    # Reverting changes made in upgrade
    op.drop_column("user", "hidden_assistants")
    op.drop_column("user", "visible_assistants")
    op.drop_index("_builtin_persona_name_idx", table_name="persona")

    op.drop_column("persona", "is_default_persona")
    op.add_column("persona", sa.Column("default_persona", sa.Boolean(), nullable=True))
    op.execute("UPDATE persona SET default_persona = builtin_persona")
    op.alter_column("persona", "default_persona", nullable=False)
    op.drop_column("persona", "builtin_persona")
    op.create_index(
        "_default_persona_name_idx",
        "persona",
        ["name"],
        unique=True,
        postgresql_where=sa.text("default_persona = true"),
    )


================================================
FILE: backend/alembic/versions/570282d33c49_track_onyxbot_explicitly.py
================================================
"""Track Onyxbot Explicitly

Revision ID: 570282d33c49
Revises: 7547d982db8f
Create Date: 2024-05-04 17:49:28.568109

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "570282d33c49"
down_revision = "7547d982db8f"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_session", sa.Column("danswerbot_flow", sa.Boolean(), nullable=True)
    )
    op.execute("UPDATE chat_session SET danswerbot_flow = one_shot")
    op.alter_column("chat_session", "danswerbot_flow", nullable=False)


def downgrade() -> None:
    op.drop_column("chat_session", "danswerbot_flow")


================================================
FILE: backend/alembic/versions/57122d037335_add_python_tool_on_default.py
================================================
"""add python tool on default

Revision ID: 57122d037335
Revises: c0c937d5c9e5
Create Date: 2026-02-27 10:10:40.124925

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "57122d037335"
down_revision = "c0c937d5c9e5"
branch_labels = None
depends_on = None


PYTHON_TOOL_NAME = "python"


def upgrade() -> None:
    conn = op.get_bind()

    # Look up the PythonTool id
    result = conn.execute(
        sa.text("SELECT id FROM tool WHERE name = :name"),
        {"name": PYTHON_TOOL_NAME},
    ).fetchone()

    if not result:
        return

    tool_id = result[0]

    # Attach to the default persona (id=0) if not already attached
    conn.execute(
        sa.text(
            """
            INSERT INTO persona__tool (persona_id, tool_id)
            VALUES (0, :tool_id)
            ON CONFLICT DO NOTHING
            """
        ),
        {"tool_id": tool_id},
    )


def downgrade() -> None:
    conn = op.get_bind()

    result = conn.execute(
        sa.text("SELECT id FROM tool WHERE name = :name"),
        {"name": PYTHON_TOOL_NAME},
    ).fetchone()

    if not result:
        return

    conn.execute(
        sa.text(
            """
            DELETE FROM persona__tool
            WHERE persona_id = 0 AND tool_id = :tool_id
            """
        ),
        {"tool_id": result[0]},
    )


================================================
FILE: backend/alembic/versions/57b53544726e_add_document_set_tables.py
================================================
"""Add document set tables

Revision ID: 57b53544726e
Revises: 800f48024ae9
Create Date: 2023-09-20 16:59:39.097177

"""

from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "57b53544726e"
down_revision = "800f48024ae9"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "document_set",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("description", sa.String(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column("is_up_to_date", sa.Boolean(), nullable=False),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("name"),
    )
    op.create_table(
        "document_set__connector_credential_pair",
        sa.Column("document_set_id", sa.Integer(), nullable=False),
        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
        sa.Column("is_current", sa.Boolean(), nullable=False),
        sa.ForeignKeyConstraint(
            ["connector_credential_pair_id"],
            ["connector_credential_pair.id"],
        ),
        sa.ForeignKeyConstraint(
            ["document_set_id"],
            ["document_set.id"],
        ),
        sa.PrimaryKeyConstraint(
            "document_set_id", "connector_credential_pair_id", "is_current"
        ),
    )


def downgrade() -> None:
    op.drop_table("document_set__connector_credential_pair")
    op.drop_table("document_set")


================================================
FILE: backend/alembic/versions/5809c0787398_add_chat_sessions.py
================================================
"""Add Chat Sessions

Revision ID: 5809c0787398
Revises: d929f0c1c6af
Create Date: 2023-09-04 15:29:44.002164

"""

import fastapi_users_db_sqlalchemy
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "5809c0787398"
down_revision = "d929f0c1c6af"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "chat_session",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column("description", sa.Text(), nullable=False),
        sa.Column("deleted", sa.Boolean(), nullable=False),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "chat_message",
        sa.Column("chat_session_id", sa.Integer(), nullable=False),
        sa.Column("message_number", sa.Integer(), nullable=False),
        sa.Column("edit_number", sa.Integer(), nullable=False),
        sa.Column("parent_edit_number", sa.Integer(), nullable=True),
        sa.Column("latest", sa.Boolean(), nullable=False),
        sa.Column("message", sa.Text(), nullable=False),
        sa.Column(
            "message_type",
            sa.Enum(
                "SYSTEM",
                "USER",
                "ASSISTANT",
                "DANSWER",
                name="messagetype",
                native_enum=False,
            ),
            nullable=False,
        ),
        sa.Column(
            "time_sent",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["chat_session_id"],
            ["chat_session.id"],
        ),
        sa.PrimaryKeyConstraint("chat_session_id", "message_number", "edit_number"),
    )


def downgrade() -> None:
    op.drop_table("chat_message")
    op.drop_table("chat_session")


================================================
FILE: backend/alembic/versions/58c50ef19f08_add_stale_column_to_user__external_user_.py
================================================
"""add stale column to external user group tables

Revision ID: 58c50ef19f08
Revises: 7b9b952abdf6
Create Date: 2025-06-25 14:08:14.162380

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "58c50ef19f08"
down_revision = "7b9b952abdf6"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add the stale column with default value False to user__external_user_group_id
    op.add_column(
        "user__external_user_group_id",
        sa.Column("stale", sa.Boolean(), nullable=False, server_default="false"),
    )

    # Create index for efficient querying of stale rows by cc_pair_id
    op.create_index(
        "ix_user__external_user_group_id_cc_pair_id_stale",
        "user__external_user_group_id",
        ["cc_pair_id", "stale"],
        unique=False,
    )

    # Create index for efficient querying of all stale rows
    op.create_index(
        "ix_user__external_user_group_id_stale",
        "user__external_user_group_id",
        ["stale"],
        unique=False,
    )

    # Add the stale column with default value False to public_external_user_group
    op.add_column(
        "public_external_user_group",
        sa.Column("stale", sa.Boolean(), nullable=False, server_default="false"),
    )

    # Create index for efficient querying of stale rows by cc_pair_id
    op.create_index(
        "ix_public_external_user_group_cc_pair_id_stale",
        "public_external_user_group",
        ["cc_pair_id", "stale"],
        unique=False,
    )

    # Create index for efficient querying of all stale rows
    op.create_index(
        "ix_public_external_user_group_stale",
        "public_external_user_group",
        ["stale"],
        unique=False,
    )


def downgrade() -> None:
    # Drop the indices for public_external_user_group first
    op.drop_index(
        "ix_public_external_user_group_stale", table_name="public_external_user_group"
    )
    op.drop_index(
        "ix_public_external_user_group_cc_pair_id_stale",
        table_name="public_external_user_group",
    )

    # Drop the stale column from public_external_user_group
    op.drop_column("public_external_user_group", "stale")

    # Drop the indices for user__external_user_group_id
    op.drop_index(
        "ix_user__external_user_group_id_stale",
        table_name="user__external_user_group_id",
    )
    op.drop_index(
        "ix_user__external_user_group_id_cc_pair_id_stale",
        table_name="user__external_user_group_id",
    )

    # Drop the stale column from user__external_user_group_id
    op.drop_column("user__external_user_group_id", "stale")


================================================
FILE: backend/alembic/versions/5ae8240accb3_add_research_agent_database_tables_and_.py
================================================
"""add research agent database tables and chat message research fields

Revision ID: 5ae8240accb3
Revises: b558f51620b4
Create Date: 2025-08-06 14:29:24.691388

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "5ae8240accb3"
down_revision = "b558f51620b4"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add research_type and research_plan columns to chat_message table
    op.add_column(
        "chat_message",
        sa.Column("research_type", sa.String(), nullable=True),
    )
    op.add_column(
        "chat_message",
        sa.Column("research_plan", postgresql.JSONB(), nullable=True),
    )

    # Create research_agent_iteration table
    op.create_table(
        "research_agent_iteration",
        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
        sa.Column(
            "primary_question_id",
            sa.Integer(),
            sa.ForeignKey("chat_message.id", ondelete="CASCADE"),
            nullable=False,
        ),
        sa.Column("iteration_nr", sa.Integer(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.Column("purpose", sa.String(), nullable=True),
        sa.Column("reasoning", sa.String(), nullable=True),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint(
            "primary_question_id",
            "iteration_nr",
            name="_research_agent_iteration_unique_constraint",
        ),
    )

    # Create research_agent_iteration_sub_step table
    op.create_table(
        "research_agent_iteration_sub_step",
        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
        sa.Column(
            "primary_question_id",
            sa.Integer(),
            sa.ForeignKey("chat_message.id", ondelete="CASCADE"),
            nullable=False,
        ),
        sa.Column(
            "parent_question_id",
            sa.Integer(),
            sa.ForeignKey("research_agent_iteration_sub_step.id", ondelete="CASCADE"),
            nullable=True,
        ),
        sa.Column("iteration_nr", sa.Integer(), nullable=False),
        sa.Column("iteration_sub_step_nr", sa.Integer(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.Column("sub_step_instructions", sa.String(), nullable=True),
        sa.Column(
            "sub_step_tool_id",
            sa.Integer(),
            sa.ForeignKey("tool.id"),
            nullable=True,
        ),
        sa.Column("reasoning", sa.String(), nullable=True),
        sa.Column("sub_answer", sa.String(), nullable=True),
        sa.Column("cited_doc_results", postgresql.JSONB(), nullable=True),
        sa.Column("claims", postgresql.JSONB(), nullable=True),
        sa.Column("generated_images", postgresql.JSONB(), nullable=True),
        sa.Column("additional_data", postgresql.JSONB(), nullable=True),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(
            ["primary_question_id", "iteration_nr"],
            [
                "research_agent_iteration.primary_question_id",
                "research_agent_iteration.iteration_nr",
            ],
            ondelete="CASCADE",
        ),
    )


def downgrade() -> None:
    # Drop tables in reverse order
    op.drop_table("research_agent_iteration_sub_step")
    op.drop_table("research_agent_iteration")

    # Remove columns from chat_message table
    op.drop_column("chat_message", "research_plan")
    op.drop_column("chat_message", "research_type")


================================================
FILE: backend/alembic/versions/5b29123cd710_nullable_search_settings_for_historic_.py
================================================
"""nullable search settings for historic index attempts

Revision ID: 5b29123cd710
Revises: 949b4a92a401
Create Date: 2024-10-30 19:37:59.630704

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "5b29123cd710"
down_revision = "949b4a92a401"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Drop the existing foreign key constraint
    op.drop_constraint(
        "fk_index_attempt_search_settings", "index_attempt", type_="foreignkey"
    )

    # Modify the column to be nullable
    op.alter_column(
        "index_attempt", "search_settings_id", existing_type=sa.INTEGER(), nullable=True
    )

    # Add back the foreign key with ON DELETE SET NULL
    op.create_foreign_key(
        "fk_index_attempt_search_settings",
        "index_attempt",
        "search_settings",
        ["search_settings_id"],
        ["id"],
        ondelete="SET NULL",
    )


def downgrade() -> None:
    # Warning: This will delete all index attempts that don't have search settings
    op.execute(
        """
        DELETE FROM index_attempt
        WHERE search_settings_id IS NULL
    """
    )

    # Drop foreign key constraint
    op.drop_constraint(
        "fk_index_attempt_search_settings", "index_attempt", type_="foreignkey"
    )

    # Modify the column to be not nullable
    op.alter_column(
        "index_attempt",
        "search_settings_id",
        existing_type=sa.INTEGER(),
        nullable=False,
    )

    # Add back the foreign key without ON DELETE SET NULL
    op.create_foreign_key(
        "fk_index_attempt_search_settings",
        "index_attempt",
        "search_settings",
        ["search_settings_id"],
        ["id"],
    )


================================================
FILE: backend/alembic/versions/5c3dca366b35_backend_driven_notification_details.py
================================================
"""backend driven notification details

Revision ID: 5c3dca366b35
Revises: 9087b548dd69
Create Date: 2026-01-06 16:03:11.413724

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "5c3dca366b35"
down_revision = "9087b548dd69"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "notification",
        sa.Column(
            "title", sa.String(), nullable=False, server_default="New Notification"
        ),
    )
    op.add_column(
        "notification",
        sa.Column("description", sa.String(), nullable=True, server_default=""),
    )


def downgrade() -> None:
    op.drop_column("notification", "title")
    op.drop_column("notification", "description")


================================================
FILE: backend/alembic/versions/5c448911b12f_add_content_type_to_userfile.py
================================================
"""Add content type to UserFile

Revision ID: 5c448911b12f
Revises: 47a07e1a38f1
Create Date: 2025-04-25 16:59:48.182672

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "5c448911b12f"
down_revision = "47a07e1a38f1"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("user_file", sa.Column("content_type", sa.String(), nullable=True))


def downgrade() -> None:
    op.drop_column("user_file", "content_type")


================================================
FILE: backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py
================================================
"""match_any_keywords flag for standard answers

Revision ID: 5c7fdadae813
Revises: efb35676026c
Create Date: 2024-09-13 18:52:59.256478

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "5c7fdadae813"
down_revision = "efb35676026c"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.add_column(
        "standard_answer",
        sa.Column(
            "match_any_keywords",
            sa.Boolean(),
            nullable=False,
            server_default=sa.false(),
        ),
    )
    # ### end Alembic commands ###


def downgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_column("standard_answer", "match_any_keywords")
    # ### end Alembic commands ###


================================================
FILE: backend/alembic/versions/5d12a446f5c0_add_api_version_and_deployment_name_to_.py
================================================
"""add api_version and deployment_name to search settings

Revision ID: 5d12a446f5c0
Revises: e4334d5b33ba
Create Date: 2024-10-08 15:56:07.975636

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "5d12a446f5c0"
down_revision = "e4334d5b33ba"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "embedding_provider", sa.Column("api_version", sa.String(), nullable=True)
    )
    op.add_column(
        "embedding_provider", sa.Column("deployment_name", sa.String(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("embedding_provider", "deployment_name")
    op.drop_column("embedding_provider", "api_version")


================================================
FILE: backend/alembic/versions/5e1c073d48a3_add_personal_access_token_table.py
================================================
"""add_personal_access_token_table

Revision ID: 5e1c073d48a3
Revises: 09995b8811eb
Create Date: 2025-10-30 17:30:24.308521

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "5e1c073d48a3"
down_revision = "09995b8811eb"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create personal_access_token table
    op.create_table(
        "personal_access_token",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("hashed_token", sa.String(length=64), nullable=False),
        sa.Column("token_display", sa.String(), nullable=False),
        sa.Column(
            "user_id",
            postgresql.UUID(as_uuid=True),
            nullable=False,
        ),
        sa.Column(
            "expires_at",
            sa.DateTime(timezone=True),
            nullable=True,
        ),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "last_used_at",
            sa.DateTime(timezone=True),
            nullable=True,
        ),
        sa.Column(
            "is_revoked",
            sa.Boolean(),
            server_default=sa.text("false"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("hashed_token"),
    )

    # Create indexes
    op.create_index(
        "ix_personal_access_token_expires_at",
        "personal_access_token",
        ["expires_at"],
        unique=False,
    )
    op.create_index(
        "ix_pat_user_created",
        "personal_access_token",
        ["user_id", sa.text("created_at DESC")],
        unique=False,
    )


def downgrade() -> None:
    # Drop indexes first
    op.drop_index("ix_pat_user_created", table_name="personal_access_token")
    op.drop_index(
        "ix_personal_access_token_expires_at", table_name="personal_access_token"
    )

    # Drop table
    op.drop_table("personal_access_token")


================================================
FILE: backend/alembic/versions/5e6f7a8b9c0d_update_default_persona_prompt.py
================================================
"""update_default_persona_prompt

Revision ID: 5e6f7a8b9c0d
Revises: 4f8a2b3c1d9e
Create Date: 2025-11-30 12:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "5e6f7a8b9c0d"
down_revision = "4f8a2b3c1d9e"
branch_labels = None
depends_on = None


DEFAULT_PERSONA_ID = 0

# ruff: noqa: E501, W605 start
DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.

The current date is [[CURRENT_DATETIME]].{citation_reminder_or_empty}

# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".lstrip()
# ruff: noqa: E501, W605 end


def upgrade() -> None:
    conn = op.get_bind()
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET system_prompt = :system_prompt
            WHERE id = :persona_id
            """
        ),
        {"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
    )


def downgrade() -> None:
    # We don't revert the system prompt on downgrade since we don't know
    # what the previous value was. The new prompt is a reasonable default.
    pass


================================================
FILE: backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py
================================================
"""Add docs_indexed_column + time_started to index_attempt table

Revision ID: 5e84129c8be3
Revises: e6a4bbc13fe4
Create Date: 2023-08-10 21:43:09.069523

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "5e84129c8be3"
down_revision = "e6a4bbc13fe4"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "index_attempt",
        sa.Column("num_docs_indexed", sa.Integer()),
    )
    op.add_column(
        "index_attempt",
        sa.Column(
            "time_started",
            sa.DateTime(timezone=True),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("index_attempt", "time_started")
    op.drop_column("index_attempt", "num_docs_indexed")


================================================
FILE: backend/alembic/versions/5f4b8568a221_add_removed_documents_to_index_attempt.py
================================================
"""add removed documents to index_attempt

Revision ID: 5f4b8568a221
Revises: dbaa756c2ccf
Create Date: 2024-02-16 15:02:03.319907

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "5f4b8568a221"
down_revision = "8987770549c0"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "index_attempt",
        sa.Column("docs_removed_from_index", sa.Integer()),
    )
    op.execute("UPDATE index_attempt SET docs_removed_from_index = 0")


def downgrade() -> None:
    op.drop_column("index_attempt", "docs_removed_from_index")


================================================
FILE: backend/alembic/versions/5fc1f54cc252_hybrid_enum.py
================================================
"""hybrid-enum

Revision ID: 5fc1f54cc252
Revises: 1d6ad76d1f37
Create Date: 2024-08-06 15:35:40.278485

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "5fc1f54cc252"
down_revision = "1d6ad76d1f37"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.drop_column("persona", "search_type")


def downgrade() -> None:
    op.add_column("persona", sa.Column("search_type", sa.String(), nullable=True))
    op.execute("UPDATE persona SET search_type = 'SEMANTIC'")
    op.alter_column("persona", "search_type", nullable=False)


================================================
FILE: backend/alembic/versions/61ff3651add4_add_permission_syncing.py
================================================
"""Add Permission Syncing

Revision ID: 61ff3651add4
Revises: 1b8206b29c5d
Create Date: 2024-09-05 13:57:11.770413

"""

import fastapi_users_db_sqlalchemy

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "61ff3651add4"
down_revision = "1b8206b29c5d"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Admin user who set up connectors will lose access to the docs temporarily
    # only way currently to give back access is to rerun from beginning
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "access_type",
            sa.String(),
            nullable=True,
        ),
    )
    op.execute(
        "UPDATE connector_credential_pair SET access_type = 'PUBLIC' WHERE is_public = true"
    )
    op.execute(
        "UPDATE connector_credential_pair SET access_type = 'PRIVATE' WHERE is_public = false"
    )
    op.alter_column("connector_credential_pair", "access_type", nullable=False)

    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "auto_sync_options",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )
    op.add_column(
        "connector_credential_pair",
        sa.Column("last_time_perm_sync", sa.DateTime(timezone=True), nullable=True),
    )
    op.drop_column("connector_credential_pair", "is_public")

    op.add_column(
        "document",
        sa.Column("external_user_emails", postgresql.ARRAY(sa.String()), nullable=True),
    )
    op.add_column(
        "document",
        sa.Column(
            "external_user_group_ids", postgresql.ARRAY(sa.String()), nullable=True
        ),
    )
    op.add_column(
        "document",
        sa.Column("is_public", sa.Boolean(), nullable=True),
    )

    op.create_table(
        "user__external_user_group_id",
        sa.Column(
            "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False
        ),
        sa.Column("external_user_group_id", sa.String(), nullable=False),
        sa.Column("cc_pair_id", sa.Integer(), nullable=False),
        sa.PrimaryKeyConstraint("user_id"),
    )

    op.drop_column("external_permission", "user_id")
    op.drop_column("email_to_external_user_cache", "user_id")
    op.drop_table("permission_sync_run")
    op.drop_table("external_permission")
    op.drop_table("email_to_external_user_cache")


def downgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column("is_public", sa.BOOLEAN(), nullable=True),
    )
    op.execute(
        "UPDATE connector_credential_pair SET is_public = (access_type = 'PUBLIC')"
    )
    op.alter_column("connector_credential_pair", "is_public", nullable=False)

    op.drop_column("connector_credential_pair", "auto_sync_options")
    op.drop_column("connector_credential_pair", "access_type")
    op.drop_column("connector_credential_pair", "last_time_perm_sync")
    op.drop_column("document", "external_user_emails")
    op.drop_column("document", "external_user_group_ids")
    op.drop_column("document", "is_public")

    op.drop_table("user__external_user_group_id")

    # Drop the enum type at the end of the downgrade
    op.create_table(
        "permission_sync_run",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "source_type",
            sa.String(),
            nullable=False,
        ),
        sa.Column("update_type", sa.String(), nullable=False),
        sa.Column("cc_pair_id", sa.Integer(), nullable=True),
        sa.Column(
            "status",
            sa.String(),
            nullable=False,
        ),
        sa.Column("error_msg", sa.Text(), nullable=True),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["cc_pair_id"],
            ["connector_credential_pair.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "external_permission",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("user_id", sa.UUID(), nullable=True),
        sa.Column("user_email", sa.String(), nullable=False),
        sa.Column(
            "source_type",
            sa.String(),
            nullable=False,
        ),
        sa.Column("external_permission_group", sa.String(), nullable=False),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "email_to_external_user_cache",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("external_user_id", sa.String(), nullable=False),
        sa.Column("user_id", sa.UUID(), nullable=True),
        sa.Column("user_email", sa.String(), nullable=False),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )


================================================
FILE: backend/alembic/versions/62c3a055a141_add_file_names_to_file_connector_config.py
================================================
"""add file names to file connector config

Revision ID: 62c3a055a141
Revises: 3fc5d75723b3
Create Date: 2025-07-30 17:01:24.417551

"""

from alembic import op
import sqlalchemy as sa
import json
import os
import logging


# revision identifiers, used by Alembic.
revision = "62c3a055a141"
down_revision = "3fc5d75723b3"
branch_labels = None
depends_on = None

SKIP_FILE_NAME_MIGRATION = (
    os.environ.get("SKIP_FILE_NAME_MIGRATION", "true").lower() == "true"
)

logger = logging.getLogger("alembic.runtime.migration")


def upgrade() -> None:
    if SKIP_FILE_NAME_MIGRATION:
        logger.info(
            "Skipping file name migration. Hint: set SKIP_FILE_NAME_MIGRATION=false to run this migration"
        )
        return
    logger.info("Running file name migration")
    # Get connection
    conn = op.get_bind()

    # Get all FILE connectors with their configs
    file_connectors = conn.execute(
        sa.text(
            """
            SELECT id, connector_specific_config
            FROM connector
            WHERE source = 'FILE'
        """
        )
    ).fetchall()

    for connector_id, config in file_connectors:
        # Parse config if it's a string
        if isinstance(config, str):
            config = json.loads(config)

        # Get file_locations list
        file_locations = config.get("file_locations", [])

        # Get display names for each file_id
        file_names = []
        for file_id in file_locations:
            result = conn.execute(
                sa.text(
                    """
                    SELECT display_name
                    FROM file_record
                    WHERE file_id = :file_id
                """
                ),
                {"file_id": file_id},
            ).fetchone()

            if result:
                file_names.append(result[0])
            else:
                file_names.append(file_id)  # Should not happen

        # Add file_names to config
        new_config = dict(config)
        new_config["file_names"] = file_names

        # Update the connector
        conn.execute(
            sa.text(
                """
                UPDATE connector
                SET connector_specific_config = :new_config
                WHERE id = :connector_id
            """
            ),
            {"connector_id": connector_id, "new_config": json.dumps(new_config)},
        )


def downgrade() -> None:
    # Get connection
    conn = op.get_bind()

    # Remove file_names from all FILE connectors
    file_connectors = conn.execute(
        sa.text(
            """
            SELECT id, connector_specific_config
            FROM connector
            WHERE source = 'FILE'
        """
        )
    ).fetchall()

    for connector_id, config in file_connectors:
        # Parse config if it's a string
        if isinstance(config, str):
            config = json.loads(config)

        # Remove file_names if it exists
        if "file_names" in config:
            new_config = dict(config)
            del new_config["file_names"]

            # Update the connector
            conn.execute(
                sa.text(
                    """
                    UPDATE connector
                    SET connector_specific_config = :new_config
                    WHERE id = :connector_id
                """
                ),
                {
                    "connector_id": connector_id,
                    "new_config": json.dumps(new_config),
                },
            )


================================================
FILE: backend/alembic/versions/631fd2504136_add_approx_chunk_count_in_vespa_to_.py
================================================
"""add approx_chunk_count_in_vespa to opensearch tenant migration

Revision ID: 631fd2504136
Revises: c7f2e1b4a9d3
Create Date: 2026-02-18 21:07:52.831215

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "631fd2504136"
down_revision = "c7f2e1b4a9d3"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "opensearch_tenant_migration_record",
        sa.Column(
            "approx_chunk_count_in_vespa",
            sa.Integer(),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("opensearch_tenant_migration_record", "approx_chunk_count_in_vespa")


================================================
FILE: backend/alembic/versions/6436661d5b65_add_created_at_in_project_userfile.py
================================================
"""add_created_at_in_project_userfile

Revision ID: 6436661d5b65
Revises: c7e9f4a3b2d1
Create Date: 2025-11-24 11:50:24.536052

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "6436661d5b65"
down_revision = "c7e9f4a3b2d1"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add created_at column to project__user_file table
    op.add_column(
        "project__user_file",
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
    )
    # Add composite index on (project_id, created_at DESC)
    op.create_index(
        "ix_project__user_file_project_id_created_at",
        "project__user_file",
        ["project_id", sa.text("created_at DESC")],
    )


def downgrade() -> None:
    # Remove composite index on (project_id, created_at)
    op.drop_index(
        "ix_project__user_file_project_id_created_at", table_name="project__user_file"
    )
    # Remove created_at column from project__user_file table
    op.drop_column("project__user_file", "created_at")


================================================
FILE: backend/alembic/versions/643a84a42a33_add_user_configured_names_to_llmprovider.py
================================================
"""Add user-configured names to LLMProvider

Revision ID: 643a84a42a33
Revises: 0a98909f2757
Create Date: 2024-05-07 14:54:55.493100

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "643a84a42a33"
down_revision = "0a98909f2757"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("llm_provider", sa.Column("provider", sa.String(), nullable=True))
    # move "name" -> "provider" to match the new schema
    op.execute("UPDATE llm_provider SET provider = name")
    # pretty up display name
    op.execute("UPDATE llm_provider SET name = 'OpenAI' WHERE name = 'openai'")
    op.execute("UPDATE llm_provider SET name = 'Anthropic' WHERE name = 'anthropic'")
    op.execute("UPDATE llm_provider SET name = 'Azure OpenAI' WHERE name = 'azure'")
    op.execute("UPDATE llm_provider SET name = 'AWS Bedrock' WHERE name = 'bedrock'")

    # update personas to use the new provider names
    op.execute(
        "UPDATE persona SET llm_model_provider_override = 'OpenAI' WHERE llm_model_provider_override = 'openai'"
    )
    op.execute(
        "UPDATE persona SET llm_model_provider_override = 'Anthropic' WHERE llm_model_provider_override = 'anthropic'"
    )
    op.execute(
        "UPDATE persona SET llm_model_provider_override = 'Azure OpenAI' WHERE llm_model_provider_override = 'azure'"
    )
    op.execute(
        "UPDATE persona SET llm_model_provider_override = 'AWS Bedrock' WHERE llm_model_provider_override = 'bedrock'"
    )


def downgrade() -> None:
    op.execute("UPDATE llm_provider SET name = provider")
    op.drop_column("llm_provider", "provider")


================================================
FILE: backend/alembic/versions/64bd5677aeb6_add_image_input_support_to_model_config.py
================================================
"""Add image input support to model config

Revision ID: 64bd5677aeb6
Revises: b30353be4eec
Create Date: 2025-09-28 15:48:12.003612

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "64bd5677aeb6"
down_revision = "b30353be4eec"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "model_configuration",
        sa.Column("supports_image_input", sa.Boolean(), nullable=True),
    )

    # Seems to be left over from when model visibility was introduced and a nullable field.
    # Set any null is_visible values to False
    connection = op.get_bind()
    connection.execute(
        sa.text(
            "UPDATE model_configuration SET is_visible = false WHERE is_visible IS NULL"
        )
    )


def downgrade() -> None:
    op.drop_column("model_configuration", "supports_image_input")


================================================
FILE: backend/alembic/versions/65bc6e0f8500_remove_kg_subtype_from_db.py
================================================
"""remove kg subtype from db

Revision ID: 65bc6e0f8500
Revises: cec7ec36c505
Create Date: 2025-06-13 10:04:27.705976

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "65bc6e0f8500"
down_revision = "cec7ec36c505"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_column("kg_entity", "entity_class")
    op.drop_column("kg_entity", "entity_subtype")
    op.drop_column("kg_entity_extraction_staging", "entity_class")
    op.drop_column("kg_entity_extraction_staging", "entity_subtype")


def downgrade() -> None:
    op.add_column(
        "kg_entity_extraction_staging",
        sa.Column("entity_subtype", sa.String(), nullable=True, index=True),
    )
    op.add_column(
        "kg_entity_extraction_staging",
        sa.Column("entity_class", sa.String(), nullable=True, index=True),
    )
    op.add_column(
        "kg_entity", sa.Column("entity_subtype", sa.String(), nullable=True, index=True)
    )
    op.add_column(
        "kg_entity", sa.Column("entity_class", sa.String(), nullable=True, index=True)
    )


================================================
FILE: backend/alembic/versions/6756efa39ada_id_uuid_for_chat_session.py
================================================
"""Migrate chat_session and chat_message tables to use UUID primary keys

Revision ID: 6756efa39ada
Revises: 5d12a446f5c0
Create Date: 2024-10-15 17:47:44.108537

"""

from alembic import op
import sqlalchemy as sa

revision = "6756efa39ada"
down_revision = "5d12a446f5c0"
branch_labels = None
depends_on = None

"""
This script:
1. Adds UUID columns to chat_session and chat_message
2. Populates new columns with UUIDs
3. Updates foreign key relationships
4. Removes old integer ID columns

Note: Downgrade will assign new integer IDs, not restore original ones.
"""


def upgrade() -> None:
    op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto;")

    op.add_column(
        "chat_session",
        sa.Column(
            "new_id",
            sa.UUID(as_uuid=True),
            server_default=sa.text("gen_random_uuid()"),
            nullable=False,
        ),
    )

    op.execute("UPDATE chat_session SET new_id = gen_random_uuid();")

    op.add_column(
        "chat_message",
        sa.Column("new_chat_session_id", sa.UUID(as_uuid=True), nullable=True),
    )

    op.execute(
        """
        UPDATE chat_message
        SET new_chat_session_id = cs.new_id
        FROM chat_session cs
        WHERE chat_message.chat_session_id = cs.id;
        """
    )

    op.drop_constraint(
        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
    )

    op.drop_column("chat_message", "chat_session_id")
    op.alter_column(
        "chat_message", "new_chat_session_id", new_column_name="chat_session_id"
    )

    op.drop_constraint("chat_session_pkey", "chat_session", type_="primary")
    op.drop_column("chat_session", "id")
    op.alter_column("chat_session", "new_id", new_column_name="id")

    op.create_primary_key("chat_session_pkey", "chat_session", ["id"])

    op.create_foreign_key(
        "chat_message_chat_session_id_fkey",
        "chat_message",
        "chat_session",
        ["chat_session_id"],
        ["id"],
        ondelete="CASCADE",
    )


def downgrade() -> None:
    op.drop_constraint(
        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
    )

    op.add_column(
        "chat_session",
        sa.Column("old_id", sa.Integer, autoincrement=True, nullable=True),
    )

    op.execute("CREATE SEQUENCE chat_session_old_id_seq OWNED BY chat_session.old_id;")
    op.execute(
        "ALTER TABLE chat_session ALTER COLUMN old_id SET DEFAULT nextval('chat_session_old_id_seq');"
    )

    op.execute(
        "UPDATE chat_session SET old_id = nextval('chat_session_old_id_seq') WHERE old_id IS NULL;"
    )

    op.alter_column("chat_session", "old_id", nullable=False)

    op.drop_constraint("chat_session_pkey", "chat_session", type_="primary")
    op.create_primary_key("chat_session_pkey", "chat_session", ["old_id"])

    op.add_column(
        "chat_message",
        sa.Column("old_chat_session_id", sa.Integer, nullable=True),
    )

    op.execute(
        """
        UPDATE chat_message
        SET old_chat_session_id = cs.old_id
        FROM chat_session cs
        WHERE chat_message.chat_session_id = cs.id;
        """
    )

    op.drop_column("chat_message", "chat_session_id")
    op.alter_column(
        "chat_message", "old_chat_session_id", new_column_name="chat_session_id"
    )

    op.create_foreign_key(
        "chat_message_chat_session_id_fkey",
        "chat_message",
        "chat_session",
        ["chat_session_id"],
        ["old_id"],
        ondelete="CASCADE",
    )

    op.drop_column("chat_session", "id")
    op.alter_column("chat_session", "old_id", new_column_name="id")

    op.alter_column(
        "chat_session",
        "id",
        type_=sa.Integer(),
        existing_type=sa.Integer(),
        existing_nullable=False,
        existing_server_default=False,
    )

    # Rename the sequence
    op.execute("ALTER SEQUENCE chat_session_old_id_seq RENAME TO chat_session_id_seq;")

    # Update the default value to use the renamed sequence
    op.alter_column(
        "chat_session",
        "id",
        server_default=sa.text("nextval('chat_session_id_seq'::regclass)"),
    )


================================================
FILE: backend/alembic/versions/689433b0d8de_add_hook_and_hook_execution_log_tables.py
================================================
"""add_hook_and_hook_execution_log_tables

Revision ID: 689433b0d8de
Revises: 93a2e195e25c
Create Date: 2026-03-13 11:25:06.547474

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID as PGUUID


# revision identifiers, used by Alembic.
revision = "689433b0d8de"
down_revision = "93a2e195e25c"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "hook",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column(
            "hook_point",
            sa.Enum("document_ingestion", "query_processing", native_enum=False),
            nullable=False,
        ),
        sa.Column("endpoint_url", sa.Text(), nullable=True),
        sa.Column("api_key", sa.LargeBinary(), nullable=True),
        sa.Column("is_reachable", sa.Boolean(), nullable=True),
        sa.Column(
            "fail_strategy",
            sa.Enum("hard", "soft", native_enum=False),
            nullable=False,
        ),
        sa.Column("timeout_seconds", sa.Float(), nullable=False),
        sa.Column(
            "is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
        ),
        sa.Column(
            "deleted", sa.Boolean(), nullable=False, server_default=sa.text("false")
        ),
        sa.Column("creator_id", PGUUID(as_uuid=True), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(["creator_id"], ["user.id"], ondelete="SET NULL"),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_index(
        "ix_hook_one_non_deleted_per_point",
        "hook",
        ["hook_point"],
        unique=True,
        postgresql_where=sa.text("deleted = false"),
    )

    op.create_table(
        "hook_execution_log",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("hook_id", sa.Integer(), nullable=False),
        sa.Column(
            "is_success",
            sa.Boolean(),
            nullable=False,
        ),
        sa.Column("error_message", sa.Text(), nullable=True),
        sa.Column("status_code", sa.Integer(), nullable=True),
        sa.Column("duration_ms", sa.Integer(), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(["hook_id"], ["hook.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_index("ix_hook_execution_log_hook_id", "hook_execution_log", ["hook_id"])
    op.create_index(
        "ix_hook_execution_log_created_at", "hook_execution_log", ["created_at"]
    )


def downgrade() -> None:
    op.drop_index("ix_hook_execution_log_created_at", table_name="hook_execution_log")
    op.drop_index("ix_hook_execution_log_hook_id", table_name="hook_execution_log")
    op.drop_table("hook_execution_log")

    op.drop_index("ix_hook_one_non_deleted_per_point", table_name="hook")
    op.drop_table("hook")


================================================
FILE: backend/alembic/versions/699221885109_nullify_default_task_prompt.py
================================================
"""nullify_default_task_prompt

Revision ID: 699221885109
Revises: 7e490836d179
Create Date: 2025-12-30 10:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "699221885109"
down_revision = "7e490836d179"
branch_labels = None
depends_on = None

DEFAULT_PERSONA_ID = 0


def upgrade() -> None:
    # Make task_prompt column nullable
    # Note: The model had nullable=True but the DB column was NOT NULL until this point
    op.alter_column(
        "persona",
        "task_prompt",
        nullable=True,
    )

    # Set task_prompt to NULL for the default persona
    conn = op.get_bind()
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET task_prompt = NULL
            WHERE id = :persona_id
            """
        ),
        {"persona_id": DEFAULT_PERSONA_ID},
    )


def downgrade() -> None:
    # Restore task_prompt to empty string for the default persona
    conn = op.get_bind()
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET task_prompt = ''
            WHERE id = :persona_id AND task_prompt IS NULL
            """
        ),
        {"persona_id": DEFAULT_PERSONA_ID},
    )

    # Set any remaining NULL task_prompts to empty string before making non-nullable
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET task_prompt = ''
            WHERE task_prompt IS NULL
            """
        )
    )

    # Revert task_prompt column to not nullable
    op.alter_column(
        "persona",
        "task_prompt",
        nullable=False,
    )


================================================
FILE: backend/alembic/versions/6a804aeb4830_duplicated_no_harm_user_file_migration.py
================================================
"""duplicated no-harm user file migration

Revision ID: 6a804aeb4830
Revises: 8e1ac4f39a9f
Create Date: 2025-04-01 07:26:10.539362

"""

# revision identifiers, used by Alembic.
revision = "6a804aeb4830"
down_revision = "8e1ac4f39a9f"
branch_labels = None
depends_on = None


# Leaving this around only because some people might be on this migration
# originally was a duplicate of the user files migration
def upgrade() -> None:
    pass


def downgrade() -> None:
    pass


================================================
FILE: backend/alembic/versions/6b3b4083c5aa_persona_cleanup_and_featured.py
================================================
"""persona cleanup and featured

Revision ID: 6b3b4083c5aa
Revises: 57122d037335
Create Date: 2026-02-26 12:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "6b3b4083c5aa"
down_revision = "57122d037335"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add featured column with nullable=True first
    op.add_column("persona", sa.Column("featured", sa.Boolean(), nullable=True))

    # Migrate data from is_default_persona to featured
    op.execute("UPDATE persona SET featured = is_default_persona")

    # Make featured non-nullable with default=False
    op.alter_column(
        "persona",
        "featured",
        existing_type=sa.Boolean(),
        nullable=False,
        server_default=sa.false(),
    )

    # Drop is_default_persona column
    op.drop_column("persona", "is_default_persona")

    # Drop unused columns
    op.drop_column("persona", "num_chunks")
    op.drop_column("persona", "chunks_above")
    op.drop_column("persona", "chunks_below")
    op.drop_column("persona", "llm_relevance_filter")
    op.drop_column("persona", "llm_filter_extraction")
    op.drop_column("persona", "recency_bias")


def downgrade() -> None:
    # Add back recency_bias column
    op.add_column(
        "persona",
        sa.Column(
            "recency_bias",
            sa.VARCHAR(),
            nullable=False,
            server_default="base_decay",
        ),
    )

    # Add back llm_filter_extraction column
    op.add_column(
        "persona",
        sa.Column(
            "llm_filter_extraction",
            sa.Boolean(),
            nullable=False,
            server_default=sa.false(),
        ),
    )

    # Add back llm_relevance_filter column
    op.add_column(
        "persona",
        sa.Column(
            "llm_relevance_filter",
            sa.Boolean(),
            nullable=False,
            server_default=sa.false(),
        ),
    )

    # Add back chunks_below column
    op.add_column(
        "persona",
        sa.Column("chunks_below", sa.Integer(), nullable=False, server_default="0"),
    )

    # Add back chunks_above column
    op.add_column(
        "persona",
        sa.Column("chunks_above", sa.Integer(), nullable=False, server_default="0"),
    )

    # Add back num_chunks column
    op.add_column("persona", sa.Column("num_chunks", sa.Float(), nullable=True))

    # Add back is_default_persona column
    op.add_column(
        "persona",
        sa.Column(
            "is_default_persona",
            sa.Boolean(),
            nullable=False,
            server_default=sa.false(),
        ),
    )

    # Migrate data from featured to is_default_persona
    op.execute("UPDATE persona SET is_default_persona = featured")

    # Drop featured column
    op.drop_column("persona", "featured")


================================================
FILE: backend/alembic/versions/6d387b3196c2_basic_auth.py
================================================
"""Basic Auth

Revision ID: 6d387b3196c2
Revises: 47433d30de82
Create Date: 2023-05-05 14:40:10.242502

"""

import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "6d387b3196c2"
down_revision = "47433d30de82"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "user",
        sa.Column("id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False),
        sa.Column("email", sa.String(length=320), nullable=False),
        sa.Column("hashed_password", sa.String(length=1024), nullable=False),
        sa.Column("is_active", sa.Boolean(), nullable=False),
        sa.Column("is_superuser", sa.Boolean(), nullable=False),
        sa.Column("is_verified", sa.Boolean(), nullable=False),
        sa.Column(
            "role",
            sa.Enum("BASIC", "ADMIN", name="userrole", native_enum=False),
            default="BASIC",
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_index(op.f("ix_user_email"), "user", ["email"], unique=True)
    op.create_table(
        "accesstoken",
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.Column("token", sa.String(length=43), nullable=False),
        sa.Column(
            "created_at",
            fastapi_users_db_sqlalchemy.generics.TIMESTAMPAware(timezone=True),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="cascade"),
        sa.PrimaryKeyConstraint("token"),
    )
    op.create_index(
        op.f("ix_accesstoken_created_at"),
        "accesstoken",
        ["created_at"],
        unique=False,
    )
    op.alter_column(
        "index_attempt",
        "time_created",
        existing_type=postgresql.TIMESTAMP(timezone=True),
        nullable=False,
        existing_server_default=sa.text("now()"),  # type: ignore
    )
    op.alter_column(
        "index_attempt",
        "time_updated",
        existing_type=postgresql.TIMESTAMP(timezone=True),
        nullable=False,
    )


def downgrade() -> None:
    op.alter_column(
        "index_attempt",
        "time_updated",
        existing_type=postgresql.TIMESTAMP(timezone=True),
        nullable=True,
    )
    op.alter_column(
        "index_attempt",
        "time_created",
        existing_type=postgresql.TIMESTAMP(timezone=True),
        nullable=True,
        existing_server_default=sa.text("now()"),  # type: ignore
    )
    op.drop_index(op.f("ix_accesstoken_created_at"), table_name="accesstoken")
    op.drop_table("accesstoken")
    op.drop_index(op.f("ix_user_email"), table_name="user")
    op.drop_table("user")


================================================
FILE: backend/alembic/versions/6d562f86c78b_remove_default_bot.py
================================================
"""remove default bot

Revision ID: 6d562f86c78b
Revises: 177de57c21c9
Create Date: 2024-11-22 11:51:29.331336

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "6d562f86c78b"
down_revision = "177de57c21c9"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.execute(
        sa.text(
            """
            DELETE FROM slack_bot
            WHERE name = 'Default Bot'
            AND bot_token = ''
            AND app_token = ''
            AND NOT EXISTS (
                SELECT 1 FROM slack_channel_config
                WHERE slack_channel_config.slack_bot_id = slack_bot.id
            )
            """
        )
    )


def downgrade() -> None:
    op.execute(
        sa.text(
            """
            INSERT INTO slack_bot (name, enabled, bot_token, app_token)
            SELECT 'Default Bot', true, '', ''
            WHERE NOT EXISTS (SELECT 1 FROM slack_bot)
            RETURNING id;
            """
        )
    )


================================================
FILE: backend/alembic/versions/6f4f86aef280_add_queries_and_is_web_fetch_to_.py
================================================
"""add queries and is web fetch to iteration answer

Revision ID: 6f4f86aef280
Revises: 03d710ccf29c
Create Date: 2025-10-14 18:08:30.920123

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "6f4f86aef280"
down_revision = "03d710ccf29c"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add is_web_fetch column
    op.add_column(
        "research_agent_iteration_sub_step",
        sa.Column("is_web_fetch", sa.Boolean(), nullable=True),
    )

    # Add queries column
    op.add_column(
        "research_agent_iteration_sub_step",
        sa.Column("queries", postgresql.JSONB(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("research_agent_iteration_sub_step", "queries")
    op.drop_column("research_agent_iteration_sub_step", "is_web_fetch")


================================================
FILE: backend/alembic/versions/6fc7886d665d_make_categories_labels_and_many_to_many.py
================================================
"""make categories labels and many to many

Revision ID: 6fc7886d665d
Revises: 3c6531f32351
Create Date: 2025-01-13 18:12:18.029112

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "6fc7886d665d"
down_revision = "3c6531f32351"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Rename persona_category table to persona_label
    op.rename_table("persona_category", "persona_label")

    # Create the new association table
    op.create_table(
        "persona__persona_label",
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.Column("persona_label_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.ForeignKeyConstraint(
            ["persona_label_id"],
            ["persona_label.id"],
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("persona_id", "persona_label_id"),
    )

    # Copy existing relationships to the new table
    op.execute(
        """
        INSERT INTO persona__persona_label (persona_id, persona_label_id)
        SELECT id, category_id FROM persona WHERE category_id IS NOT NULL
    """
    )

    # Remove the old category_id column from persona table
    op.drop_column("persona", "category_id")


def downgrade() -> None:
    # Rename persona_label table back to persona_category
    op.rename_table("persona_label", "persona_category")

    # Add back the category_id column to persona table
    op.add_column("persona", sa.Column("category_id", sa.Integer(), nullable=True))
    op.create_foreign_key(
        "persona_category_id_fkey",
        "persona",
        "persona_category",
        ["category_id"],
        ["id"],
    )

    # Copy the first label relationship back to the persona table
    op.execute(
        """
        UPDATE persona
        SET category_id = (
            SELECT persona_label_id
            FROM persona__persona_label
            WHERE persona__persona_label.persona_id = persona.id
            LIMIT 1
        )
    """
    )

    # Drop the association table
    op.drop_table("persona__persona_label")


================================================
FILE: backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py
================================================
"""Add TokenRateLimit Tables

Revision ID: 703313b75876
Revises: fad14119fb92
Create Date: 2024-04-15 01:36:02.952809

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "703313b75876"
down_revision = "fad14119fb92"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "token_rate_limit",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("enabled", sa.Boolean(), nullable=False),
        sa.Column("token_budget", sa.Integer(), nullable=False),
        sa.Column("period_hours", sa.Integer(), nullable=False),
        sa.Column(
            "scope",
            sa.String(length=10),
            nullable=False,
        ),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "token_rate_limit__user_group",
        sa.Column("rate_limit_id", sa.Integer(), nullable=False),
        sa.Column("user_group_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["rate_limit_id"],
            ["token_rate_limit.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_group_id"],
            ["user_group.id"],
        ),
        sa.PrimaryKeyConstraint("rate_limit_id", "user_group_id"),
    )

    # NOTE: rate limit settings used to be stored in the "token_budget_settings" key in the
    # KeyValueStore. This will now be lost. The KV store works differently than it used to
    # so the migration is fairly complicated and likely not worth it to support (pretty much
    # nobody will have it set)


def downgrade() -> None:
    op.drop_table("token_rate_limit__user_group")
    op.drop_table("token_rate_limit")


================================================
FILE: backend/alembic/versions/70f00c45c0f2_more_descriptive_filestore.py
================================================
"""More Descriptive Filestore

Revision ID: 70f00c45c0f2
Revises: 3879338f8ba1
Create Date: 2024-05-17 17:51:41.926893

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "70f00c45c0f2"
down_revision = "3879338f8ba1"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("file_store", sa.Column("display_name", sa.String(), nullable=True))
    op.add_column(
        "file_store",
        sa.Column(
            "file_origin",
            sa.String(),
            nullable=False,
            server_default="connector",  # Default to connector
        ),
    )
    op.add_column(
        "file_store",
        sa.Column(
            "file_type", sa.String(), nullable=False, server_default="text/plain"
        ),
    )
    op.add_column(
        "file_store",
        sa.Column(
            "file_metadata",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )

    op.execute(
        """
        UPDATE file_store
        SET file_origin = CASE
            WHEN file_name LIKE 'chat__%' THEN 'chat_upload'
            ELSE 'connector'
        END,
        file_name = CASE
            WHEN file_name LIKE 'chat__%' THEN SUBSTR(file_name, 7)
            ELSE file_name
        END,
        file_type = CASE
            WHEN file_name LIKE 'chat__%' THEN 'image/png'
            ELSE 'text/plain'
        END
    """
    )


def downgrade() -> None:
    op.drop_column("file_store", "file_metadata")
    op.drop_column("file_store", "file_type")
    op.drop_column("file_store", "file_origin")
    op.drop_column("file_store", "display_name")


================================================
FILE: backend/alembic/versions/7206234e012a_add_image_generation_config_table.py
================================================
"""add image generation config table

Revision ID: 7206234e012a
Revises: 699221885109
Create Date: 2025-12-21 00:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "7206234e012a"
down_revision = "699221885109"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "image_generation_config",
        sa.Column("image_provider_id", sa.String(), primary_key=True),
        sa.Column("model_configuration_id", sa.Integer(), nullable=False),
        sa.Column("is_default", sa.Boolean(), nullable=False),
        sa.ForeignKeyConstraint(
            ["model_configuration_id"],
            ["model_configuration.id"],
            ondelete="CASCADE",
        ),
    )
    op.create_index(
        "ix_image_generation_config_is_default",
        "image_generation_config",
        ["is_default"],
        unique=False,
    )
    op.create_index(
        "ix_image_generation_config_model_configuration_id",
        "image_generation_config",
        ["model_configuration_id"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(
        "ix_image_generation_config_model_configuration_id",
        table_name="image_generation_config",
    )
    op.drop_index(
        "ix_image_generation_config_is_default", table_name="image_generation_config"
    )
    op.drop_table("image_generation_config")


================================================
FILE: backend/alembic/versions/72aa7de2e5cf_make_processing_mode_default_all_caps.py
================================================
"""make processing mode default all caps

Revision ID: 72aa7de2e5cf
Revises: 2020d417ec84
Create Date: 2026-01-26 18:58:47.705253

This migration fixes the ProcessingMode enum value mismatch:
- SQLAlchemy's Enum with native_enum=False uses enum member NAMES as valid values
- The original migration stored lowercase VALUES ('regular', 'file_system')
- This converts existing data to uppercase NAMES ('REGULAR', 'FILE_SYSTEM')
- Also drops any spurious native PostgreSQL enum type that may have been auto-created
"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "72aa7de2e5cf"
down_revision = "2020d417ec84"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Convert existing lowercase values to uppercase to match enum member names
    op.execute(
        "UPDATE connector_credential_pair SET processing_mode = 'REGULAR' WHERE processing_mode = 'regular'"
    )
    op.execute(
        "UPDATE connector_credential_pair SET processing_mode = 'FILE_SYSTEM' WHERE processing_mode = 'file_system'"
    )

    # Update the server default to use uppercase
    op.alter_column(
        "connector_credential_pair",
        "processing_mode",
        server_default="REGULAR",
    )


def downgrade() -> None:
    # State prior to this was broken, so we don't want to revert back to it
    pass


================================================
FILE: backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py
================================================
"""Permission Auto Sync Framework

Revision ID: 72bdc9929a46
Revises: 475fcefe8826
Create Date: 2024-04-14 21:15:28.659634

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "72bdc9929a46"
down_revision = "475fcefe8826"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "email_to_external_user_cache",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("external_user_id", sa.String(), nullable=False),
        sa.Column("user_id", sa.UUID(), nullable=True),
        sa.Column("user_email", sa.String(), nullable=False),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "external_permission",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("user_id", sa.UUID(), nullable=True),
        sa.Column("user_email", sa.String(), nullable=False),
        sa.Column(
            "source_type",
            sa.String(),
            nullable=False,
        ),
        sa.Column("external_permission_group", sa.String(), nullable=False),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "permission_sync_run",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "source_type",
            sa.String(),
            nullable=False,
        ),
        sa.Column("update_type", sa.String(), nullable=False),
        sa.Column("cc_pair_id", sa.Integer(), nullable=True),
        sa.Column(
            "status",
            sa.String(),
            nullable=False,
        ),
        sa.Column("error_msg", sa.Text(), nullable=True),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["cc_pair_id"],
            ["connector_credential_pair.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )


def downgrade() -> None:
    op.drop_table("permission_sync_run")
    op.drop_table("external_permission")
    op.drop_table("email_to_external_user_cache")


================================================
FILE: backend/alembic/versions/73e9983e5091_add_search_query_table.py
================================================
"""add_search_query_table

Revision ID: 73e9983e5091
Revises: d1b637d7050a
Create Date: 2026-01-14 14:16:52.837489

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "73e9983e5091"
down_revision = "d1b637d7050a"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "search_query",
        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
        sa.Column(
            "user_id",
            postgresql.UUID(as_uuid=True),
            sa.ForeignKey("user.id"),
            nullable=False,
        ),
        sa.Column("query", sa.String(), nullable=False),
        sa.Column("query_expansions", postgresql.ARRAY(sa.String()), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.func.now(),
        ),
    )

    op.create_index("ix_search_query_user_id", "search_query", ["user_id"])
    op.create_index("ix_search_query_created_at", "search_query", ["created_at"])


def downgrade() -> None:
    op.drop_index("ix_search_query_created_at", table_name="search_query")
    op.drop_index("ix_search_query_user_id", table_name="search_query")
    op.drop_table("search_query")


================================================
FILE: backend/alembic/versions/7477a5f5d728_added_model_defaults_for_users.py
================================================
"""Added model defaults for users

Revision ID: 7477a5f5d728
Revises: 213fd978c6d8
Create Date: 2024-08-04 19:00:04.512634

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "7477a5f5d728"
down_revision = "213fd978c6d8"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("user", sa.Column("default_model", sa.Text(), nullable=True))


def downgrade() -> None:
    op.drop_column("user", "default_model")


================================================
FILE: backend/alembic/versions/7547d982db8f_chat_folders.py
================================================
"""Chat Folders

Revision ID: 7547d982db8f
Revises: ef7da92f7213
Create Date: 2024-05-02 15:18:56.573347

"""

from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy

# revision identifiers, used by Alembic.
revision = "7547d982db8f"
down_revision = "ef7da92f7213"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "chat_folder",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column("name", sa.String(), nullable=True),
        sa.Column("display_priority", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.add_column("chat_session", sa.Column("folder_id", sa.Integer(), nullable=True))
    op.create_foreign_key(
        "chat_session_chat_folder_fk",
        "chat_session",
        "chat_folder",
        ["folder_id"],
        ["id"],
    )


def downgrade() -> None:
    bind = op.get_bind()
    inspector = sa.inspect(bind)

    if "chat_session" in inspector.get_table_names():
        chat_session_fks = {
            fk.get("name") for fk in inspector.get_foreign_keys("chat_session")
        }
        if "chat_session_chat_folder_fk" in chat_session_fks:
            op.drop_constraint(
                "chat_session_chat_folder_fk", "chat_session", type_="foreignkey"
            )

        chat_session_columns = {
            col["name"] for col in inspector.get_columns("chat_session")
        }
        if "folder_id" in chat_session_columns:
            op.drop_column("chat_session", "folder_id")

    if "chat_folder" in inspector.get_table_names():
        op.drop_table("chat_folder")


================================================
FILE: backend/alembic/versions/7616121f6e97_add_enterprise_fields_to_scim_user_mapping.py
================================================
"""add enterprise and name fields to scim_user_mapping

Revision ID: 7616121f6e97
Revises: 07b98176f1de
Create Date: 2026-02-23 12:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "7616121f6e97"
down_revision = "07b98176f1de"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "scim_user_mapping",
        sa.Column("department", sa.String(), nullable=True),
    )
    op.add_column(
        "scim_user_mapping",
        sa.Column("manager", sa.String(), nullable=True),
    )
    op.add_column(
        "scim_user_mapping",
        sa.Column("given_name", sa.String(), nullable=True),
    )
    op.add_column(
        "scim_user_mapping",
        sa.Column("family_name", sa.String(), nullable=True),
    )
    op.add_column(
        "scim_user_mapping",
        sa.Column("scim_emails_json", sa.Text(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("scim_user_mapping", "scim_emails_json")
    op.drop_column("scim_user_mapping", "family_name")
    op.drop_column("scim_user_mapping", "given_name")
    op.drop_column("scim_user_mapping", "manager")
    op.drop_column("scim_user_mapping", "department")


================================================
FILE: backend/alembic/versions/767f1c2a00eb_count_chat_tokens.py
================================================
"""Count Chat Tokens

Revision ID: 767f1c2a00eb
Revises: dba7f71618f5
Create Date: 2023-09-21 10:03:21.509899

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "767f1c2a00eb"
down_revision = "dba7f71618f5"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_message", sa.Column("token_count", sa.Integer(), nullable=False)
    )


def downgrade() -> None:
    op.drop_column("chat_message", "token_count")


================================================
FILE: backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py
================================================
"""CC-Pair Name not Unique

Revision ID: 76b60d407dfb
Revises: b156fa702355
Create Date: 2023-12-22 21:42:10.018804

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "76b60d407dfb"
down_revision = "b156fa702355"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.execute("DELETE FROM connector_credential_pair WHERE name IS NULL")
    op.drop_constraint(
        "connector_credential_pair__name__key",
        "connector_credential_pair",
        type_="unique",
    )
    op.alter_column(
        "connector_credential_pair", "name", existing_type=sa.String(), nullable=False
    )


def downgrade() -> None:
    op.create_unique_constraint(
        "connector_credential_pair__name__key", "connector_credential_pair", ["name"]
    )
    op.alter_column(
        "connector_credential_pair", "name", existing_type=sa.String(), nullable=True
    )


================================================
FILE: backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
================================================
"""Remove Remaining Enums

Revision ID: 776b3bbe9092
Revises: 4738e4b3bae1
Create Date: 2024-03-22 21:34:27.629444

"""

from alembic import op
import sqlalchemy as sa

from onyx.db.models import IndexModelStatus
from onyx.context.search.enums import RecencyBiasSetting, SearchType

# revision identifiers, used by Alembic.
revision = "776b3bbe9092"
down_revision = "4738e4b3bae1"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.alter_column(
        "persona",
        "search_type",
        type_=sa.String,
        existing_type=sa.Enum(SearchType, native_enum=False),
        existing_nullable=False,
    )
    op.alter_column(
        "persona",
        "recency_bias",
        type_=sa.String,
        existing_type=sa.Enum(RecencyBiasSetting, native_enum=False),
        existing_nullable=False,
    )

    # Because the indexmodelstatus enum does not have a mapping to a string type
    # we need this workaround instead of directly changing the type
    op.add_column("embedding_model", sa.Column("temp_status", sa.String))
    op.execute("UPDATE embedding_model SET temp_status = status::text")
    op.drop_column("embedding_model", "status")
    op.alter_column("embedding_model", "temp_status", new_column_name="status")

    op.execute("DROP TYPE IF EXISTS searchtype")
    op.execute("DROP TYPE IF EXISTS recencybiassetting")
    op.execute("DROP TYPE IF EXISTS indexmodelstatus")


def downgrade() -> None:
    op.alter_column(
        "persona",
        "search_type",
        type_=sa.Enum(SearchType, native_enum=False),
        existing_type=sa.String(length=50),
        existing_nullable=False,
    )
    op.alter_column(
        "persona",
        "recency_bias",
        type_=sa.Enum(RecencyBiasSetting, native_enum=False),
        existing_type=sa.String(length=50),
        existing_nullable=False,
    )
    op.alter_column(
        "embedding_model",
        "status",
        type_=sa.Enum(IndexModelStatus, native_enum=False),
        existing_type=sa.String(length=50),
        existing_nullable=False,
    )


================================================
FILE: backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py
================================================
"""forcibly remove more enum types from postgres

Revision ID: 77d07dffae64
Revises: d61e513bef0a
Create Date: 2023-11-01 12:33:01.999617

"""

from alembic import op
from sqlalchemy import String


# revision identifiers, used by Alembic.
revision = "77d07dffae64"
down_revision = "d61e513bef0a"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # In a PR:
    # https://github.com/onyx-dot-app/onyx/pull/397/files#diff-f05fb341f6373790b91852579631b64ca7645797a190837156a282b67e5b19c2
    # we directly changed some previous migrations. This caused some users to have native enums
    # while others wouldn't. This has caused some issues when adding new fields to these enums.
    # This migration manually changes the enum types to ensure that nobody uses native enums.
    op.alter_column("query_event", "selected_search_flow", type_=String)
    op.alter_column("query_event", "feedback", type_=String)
    op.alter_column("document_retrieval_feedback", "feedback", type_=String)
    op.execute("DROP TYPE IF EXISTS searchtype")
    op.execute("DROP TYPE IF EXISTS qafeedbacktype")
    op.execute("DROP TYPE IF EXISTS searchfeedbacktype")


def downgrade() -> None:
    # We don't want Native Enums, do nothing
    pass


================================================
FILE: backend/alembic/versions/78dbe7e38469_task_tracking.py
================================================
"""Task Tracking

Revision ID: 78dbe7e38469
Revises: 7ccea01261f6
Create Date: 2023-10-15 23:40:50.593262

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "78dbe7e38469"
down_revision = "7ccea01261f6"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "task_queue_jobs",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("task_id", sa.String(), nullable=False),
        sa.Column("task_name", sa.String(), nullable=False),
        sa.Column(
            "status",
            sa.Enum(
                "PENDING",
                "STARTED",
                "SUCCESS",
                "FAILURE",
                name="taskstatus",
                native_enum=False,
            ),
            nullable=False,
        ),
        sa.Column("start_time", sa.DateTime(timezone=True), nullable=True),
        sa.Column(
            "register_time",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
    )


def downgrade() -> None:
    op.drop_table("task_queue_jobs")


================================================
FILE: backend/alembic/versions/78ebc66946a0_remove_reranking_from_search_settings.py
================================================
"""remove reranking from search_settings

Revision ID: 78ebc66946a0
Revises: 849b21c732f8
Create Date: 2026-01-28

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "78ebc66946a0"
down_revision = "849b21c732f8"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.drop_column("search_settings", "disable_rerank_for_streaming")
    op.drop_column("search_settings", "rerank_model_name")
    op.drop_column("search_settings", "rerank_provider_type")
    op.drop_column("search_settings", "rerank_api_key")
    op.drop_column("search_settings", "rerank_api_url")
    op.drop_column("search_settings", "num_rerank")


def downgrade() -> None:
    op.add_column(
        "search_settings",
        sa.Column(
            "disable_rerank_for_streaming",
            sa.Boolean(),
            nullable=False,
            server_default="false",
        ),
    )
    op.add_column(
        "search_settings", sa.Column("rerank_model_name", sa.String(), nullable=True)
    )
    op.add_column(
        "search_settings", sa.Column("rerank_provider_type", sa.String(), nullable=True)
    )
    op.add_column(
        "search_settings", sa.Column("rerank_api_key", sa.String(), nullable=True)
    )
    op.add_column(
        "search_settings", sa.Column("rerank_api_url", sa.String(), nullable=True)
    )
    op.add_column(
        "search_settings",
        sa.Column(
            "num_rerank",
            sa.Integer(),
            nullable=False,
            server_default=str(20),
        ),
    )


================================================
FILE: backend/alembic/versions/795b20b85b4b_add_llm_group_permissions_control.py
================================================
"""add_llm_group_permissions_control

Revision ID: 795b20b85b4b
Revises: 05c07bf07c00
Create Date: 2024-07-19 11:54:35.701558

"""

from alembic import op
import sqlalchemy as sa


revision = "795b20b85b4b"
down_revision = "05c07bf07c00"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "llm_provider__user_group",
        sa.Column("llm_provider_id", sa.Integer(), nullable=False),
        sa.Column("user_group_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["llm_provider_id"],
            ["llm_provider.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_group_id"],
            ["user_group.id"],
        ),
        sa.PrimaryKeyConstraint("llm_provider_id", "user_group_id"),
    )
    op.add_column(
        "llm_provider",
        sa.Column("is_public", sa.Boolean(), nullable=False, server_default="true"),
    )


def downgrade() -> None:
    op.drop_table("llm_provider__user_group")
    op.drop_column("llm_provider", "is_public")


================================================
FILE: backend/alembic/versions/797089dfb4d2_persona_start_date.py
================================================
"""persona_start_date

Revision ID: 797089dfb4d2
Revises: 55546a7967ee
Create Date: 2024-09-11 14:51:49.785835

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "797089dfb4d2"
down_revision = "55546a7967ee"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "persona",
        sa.Column("search_start_date", sa.DateTime(timezone=True), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("persona", "search_start_date")


================================================
FILE: backend/alembic/versions/79acd316403a_add_api_key_table.py
================================================
"""Add api_key table

Revision ID: 79acd316403a
Revises: 904e5138fffb
Create Date: 2024-01-11 17:56:37.934381

"""

from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "79acd316403a"
down_revision = "904e5138fffb"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "api_key",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("hashed_api_key", sa.String(), nullable=False),
        sa.Column("api_key_display", sa.String(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.Column(
            "owner_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("api_key_display"),
        sa.UniqueConstraint("hashed_api_key"),
    )


def downgrade() -> None:
    op.drop_table("api_key")


================================================
FILE: backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py
================================================
"""Add model-configuration table

Revision ID: 7a70b7664e37
Revises: d961aca62eb3
Create Date: 2025-04-10 15:00:35.984669

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from onyx.llm.well_known_providers.llm_provider_options import (
    fetch_model_names_for_provider_as_set,
    fetch_visible_model_names_for_provider_as_set,
)

# revision identifiers, used by Alembic.
revision = "7a70b7664e37"
down_revision = "d961aca62eb3"
branch_labels = None
depends_on = None


def _resolve(
    provider_name: str,
    model_names: list[str] | None,
    display_model_names: list[str] | None,
    default_model_name: str,
    fast_default_model_name: str | None,
) -> set[tuple[str, bool]]:
    models = set(model_names) if model_names else None
    display_models = set(display_model_names) if display_model_names else None

    # If both are defined, we need to make sure that `model_names` is a superset of `display_model_names`.
    if models and display_models:
        models = display_models.union(models)

    # If only `model_names` is defined, then:
    #   - If default-model-names are available for the `provider_name`, then set `display_model_names` to it
    #     and set `model_names` to the union of those default-model-names with itself.
    #   - If no default-model-names are available, then set `display_models` to `models`.
    #
    # This preserves the invariant that `display_models` is a subset of `models`.
    elif models and not display_models:
        visible_default_models = fetch_visible_model_names_for_provider_as_set(
            provider_name=provider_name
        )
        if visible_default_models:
            display_models = set(visible_default_models)
            models = display_models.union(models)
        else:
            display_models = set(models)

    # If only the `display_model_names` are defined, then set `models` to the union of `display_model_names`
    # and the default-model-names for that provider.
    #
    # This will also preserve the invariant that `display_models` is a subset of `models`.
    elif not models and display_models:
        default_models = fetch_model_names_for_provider_as_set(
            provider_name=provider_name
        )
        if default_models:
            models = display_models.union(default_models)
        else:
            models = set(display_models)

    # If neither are defined, then set `models` and `display_models` to the default-model-names for the given provider.
    #
    # This will also preserve the invariant that `display_models` is a subset of `models`.
    else:
        default_models = fetch_model_names_for_provider_as_set(
            provider_name=provider_name
        )
        visible_default_models = fetch_visible_model_names_for_provider_as_set(
            provider_name=provider_name
        )

        if default_models:
            if not visible_default_models:
                raise RuntimeError
                raise RuntimeError(
                    "If `default_models` is non-None, `visible_default_models` must be non-None too."
                )
            models = default_models
            display_models = visible_default_models

        # This is not a well-known llm-provider; we can't provide any model suggestions.
        # Therefore, we set to the empty set and continue
        else:
            models = set()
            display_models = set()

    # It is possible that `default_model_name` is not in `models` and is not in `display_models`.
    # It is also possible that `fast_default_model_name` is not in `models` and is not in `display_models`.
    models.add(default_model_name)
    if fast_default_model_name:
        models.add(fast_default_model_name)
    display_models.add(default_model_name)
    if fast_default_model_name:
        display_models.add(fast_default_model_name)

    return set([(model, model in display_models) for model in models])


def upgrade() -> None:
    op.create_table(
        "model_configuration",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("llm_provider_id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("is_visible", sa.Boolean(), nullable=False),
        sa.Column("max_input_tokens", sa.Integer(), nullable=True),
        sa.ForeignKeyConstraint(
            ["llm_provider_id"], ["llm_provider.id"], ondelete="CASCADE"
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("llm_provider_id", "name"),
    )

    # Create temporary sqlalchemy references to tables for data migration
    llm_provider_table = sa.sql.table(
        "llm_provider",
        sa.column("id", sa.Integer),
        sa.column("provider", sa.Integer),
        sa.column("model_names", postgresql.ARRAY(sa.String)),
        sa.column("display_model_names", postgresql.ARRAY(sa.String)),
        sa.column("default_model_name", sa.String),
        sa.column("fast_default_model_name", sa.String),
    )
    model_configuration_table = sa.sql.table(
        "model_configuration",
        sa.column("id", sa.Integer),
        sa.column("llm_provider_id", sa.Integer),
        sa.column("name", sa.String),
        sa.column("is_visible", sa.Boolean),
        sa.column("max_input_tokens", sa.Integer),
    )
    connection = op.get_bind()
    llm_providers = connection.execute(
        sa.select(
            llm_provider_table.c.id,
            llm_provider_table.c.provider,
            llm_provider_table.c.model_names,
            llm_provider_table.c.display_model_names,
            llm_provider_table.c.default_model_name,
            llm_provider_table.c.fast_default_model_name,
        )
    ).fetchall()

    for llm_provider in llm_providers:
        provider_id = llm_provider[0]
        provider_name = llm_provider[1]
        model_names = llm_provider[2]
        display_model_names = llm_provider[3]
        default_model_name = llm_provider[4]
        fast_default_model_name = llm_provider[5]

        model_configurations = _resolve(
            provider_name=provider_name,
            model_names=model_names,
            display_model_names=display_model_names,
            default_model_name=default_model_name,
            fast_default_model_name=fast_default_model_name,
        )

        for model_name, is_visible in model_configurations:
            connection.execute(
                model_configuration_table.insert().values(
                    llm_provider_id=provider_id,
                    name=model_name,
                    is_visible=is_visible,
                    max_input_tokens=None,
                )
            )

    op.drop_column("llm_provider", "model_names")
    op.drop_column("llm_provider", "display_model_names")


def downgrade() -> None:
    llm_provider = sa.table(
        "llm_provider",
        sa.column("id", sa.Integer),
        sa.column("model_names", postgresql.ARRAY(sa.String)),
        sa.column("display_model_names", postgresql.ARRAY(sa.String)),
    )

    model_configuration = sa.table(
        "model_configuration",
        sa.column("id", sa.Integer),
        sa.column("llm_provider_id", sa.Integer),
        sa.column("name", sa.String),
        sa.column("is_visible", sa.Boolean),
        sa.column("max_input_tokens", sa.Integer),
    )
    op.add_column(
        "llm_provider",
        sa.Column(
            "model_names",
            postgresql.ARRAY(sa.VARCHAR()),
            autoincrement=False,
            nullable=True,
        ),
    )
    op.add_column(
        "llm_provider",
        sa.Column(
            "display_model_names",
            postgresql.ARRAY(sa.VARCHAR()),
            autoincrement=False,
            nullable=True,
        ),
    )

    connection = op.get_bind()
    provider_ids = connection.execute(sa.select(llm_provider.c.id)).fetchall()

    for (provider_id,) in provider_ids:
        # Get all models for this provider
        models = connection.execute(
            sa.select(
                model_configuration.c.name, model_configuration.c.is_visible
            ).where(model_configuration.c.llm_provider_id == provider_id)
        ).fetchall()

        all_models = [model[0] for model in models]
        visible_models = [model[0] for model in models if model[1]]

        # Update provider with arrays
        op.execute(
            llm_provider.update()
            .where(llm_provider.c.id == provider_id)
            .values(model_names=all_models, display_model_names=visible_models)
        )

    op.drop_table("model_configuration")


================================================
FILE: backend/alembic/versions/7aea705850d5_added_slack_auto_filter.py
================================================
"""added slack_auto_filter

Revision ID: 7aea705850d5
Revises: 4505fd7302e1
Create Date: 2024-07-10 11:01:23.581015

"""

from alembic import op
import sqlalchemy as sa

revision = "7aea705850d5"
down_revision = "4505fd7302e1"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "slack_bot_config",
        sa.Column("enable_auto_filters", sa.Boolean(), nullable=True),
    )
    op.execute(
        "UPDATE slack_bot_config SET enable_auto_filters = FALSE WHERE enable_auto_filters IS NULL"
    )
    op.alter_column(
        "slack_bot_config",
        "enable_auto_filters",
        existing_type=sa.Boolean(),
        nullable=False,
        server_default=sa.false(),
    )


def downgrade() -> None:
    op.drop_column("slack_bot_config", "enable_auto_filters")


================================================
FILE: backend/alembic/versions/7b9b952abdf6_update_entities.py
================================================
"""update-entities

Revision ID: 7b9b952abdf6
Revises: 36e9220ab794
Create Date: 2025-06-23 20:24:08.139201

"""

import json

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "7b9b952abdf6"
down_revision = "36e9220ab794"
branch_labels = None
depends_on = None


def upgrade() -> None:
    conn = op.get_bind()

    # new entity type metadata_attribute_conversion
    new_entity_type_conversion = {
        "LINEAR": {
            "team": {"name": "team", "keep": True, "implication_property": None},
            "state": {"name": "state", "keep": True, "implication_property": None},
            "priority": {
                "name": "priority",
                "keep": True,
                "implication_property": None,
            },
            "estimate": {
                "name": "estimate",
                "keep": True,
                "implication_property": None,
            },
            "created_at": {
                "name": "created_at",
                "keep": True,
                "implication_property": None,
            },
            "started_at": {
                "name": "started_at",
                "keep": True,
                "implication_property": None,
            },
            "completed_at": {
                "name": "completed_at",
                "keep": True,
                "implication_property": None,
            },
            "due_date": {
                "name": "due_date",
                "keep": True,
                "implication_property": None,
            },
            "creator": {
                "name": "creator",
                "keep": False,
                "implication_property": {
                    "implied_entity_type": "from_email",
                    "implied_relationship_name": "is_creator_of",
                },
            },
            "assignee": {
                "name": "assignee",
                "keep": False,
                "implication_property": {
                    "implied_entity_type": "from_email",
                    "implied_relationship_name": "is_assignee_of",
                },
            },
        },
        "JIRA": {
            "issuetype": {
                "name": "subtype",
                "keep": True,
                "implication_property": None,
            },
            "status": {"name": "status", "keep": True, "implication_property": None},
            "priority": {
                "name": "priority",
                "keep": True,
                "implication_property": None,
            },
            "project_name": {
                "name": "project",
                "keep": True,
                "implication_property": None,
            },
            "created": {
                "name": "created_at",
                "keep": True,
                "implication_property": None,
            },
            "updated": {
                "name": "updated_at",
                "keep": True,
                "implication_property": None,
            },
            "resolution_date": {
                "name": "completed_at",
                "keep": True,
                "implication_property": None,
            },
            "duedate": {"name": "due_date", "keep": True, "implication_property": None},
            "reporter_email": {
                "name": "creator",
                "keep": False,
                "implication_property": {
                    "implied_entity_type": "from_email",
                    "implied_relationship_name": "is_creator_of",
                },
            },
            "assignee_email": {
                "name": "assignee",
                "keep": False,
                "implication_property": {
                    "implied_entity_type": "from_email",
                    "implied_relationship_name": "is_assignee_of",
                },
            },
            "key": {"name": "key", "keep": True, "implication_property": None},
            "parent": {"name": "parent", "keep": True, "implication_property": None},
        },
        "GITHUB_PR": {
            "repo": {"name": "repository", "keep": True, "implication_property": None},
            "state": {"name": "state", "keep": True, "implication_property": None},
            "num_commits": {
                "name": "num_commits",
                "keep": True,
                "implication_property": None,
            },
            "num_files_changed": {
                "name": "num_files_changed",
                "keep": True,
                "implication_property": None,
            },
            "labels": {"name": "labels", "keep": True, "implication_property": None},
            "merged": {"name": "merged", "keep": True, "implication_property": None},
            "merged_at": {
                "name": "merged_at",
                "keep": True,
                "implication_property": None,
            },
            "closed_at": {
                "name": "closed_at",
                "keep": True,
                "implication_property": None,
            },
            "created_at": {
                "name": "created_at",
                "keep": True,
                "implication_property": None,
            },
            "updated_at": {
                "name": "updated_at",
                "keep": True,
                "implication_property": None,
            },
            "user": {
                "name": "creator",
                "keep": False,
                "implication_property": {
                    "implied_entity_type": "from_email",
                    "implied_relationship_name": "is_creator_of",
                },
            },
            "assignees": {
                "name": "assignees",
                "keep": False,
                "implication_property": {
                    "implied_entity_type": "from_email",
                    "implied_relationship_name": "is_assignee_of",
                },
            },
        },
        "GITHUB_ISSUE": {
            "repo": {"name": "repository", "keep": True, "implication_property": None},
            "state": {"name": "state", "keep": True, "implication_property": None},
            "labels": {"name": "labels", "keep": True, "implication_property": None},
            "closed_at": {
                "name": "closed_at",
                "keep": True,
                "implication_property": None,
            },
            "created_at": {
                "name": "created_at",
                "keep": True,
                "implication_property": None,
            },
            "updated_at": {
                "name": "updated_at",
                "keep": True,
                "implication_property": None,
            },
            "user": {
                "name": "creator",
                "keep": False,
                "implication_property": {
                    "implied_entity_type": "from_email",
                    "implied_relationship_name": "is_creator_of",
                },
            },
            "assignees": {
                "name": "assignees",
                "keep": False,
                "implication_property": {
                    "implied_entity_type": "from_email",
                    "implied_relationship_name": "is_assignee_of",
                },
            },
        },
        "FIREFLIES": {},
        "ACCOUNT": {},
        "OPPORTUNITY": {
            "name": {"name": "name", "keep": True, "implication_property": None},
            "stage_name": {"name": "stage", "keep": True, "implication_property": None},
            "type": {"name": "type", "keep": True, "implication_property": None},
            "amount": {"name": "amount", "keep": True, "implication_property": None},
            "fiscal_year": {
                "name": "fiscal_year",
                "keep": True,
                "implication_property": None,
            },
            "fiscal_quarter": {
                "name": "fiscal_quarter",
                "keep": True,
                "implication_property": None,
            },
            "is_closed": {
                "name": "is_closed",
                "keep": True,
                "implication_property": None,
            },
            "close_date": {
                "name": "close_date",
                "keep": True,
                "implication_property": None,
            },
            "probability": {
                "name": "close_probability",
                "keep": True,
                "implication_property": None,
            },
            "created_date": {
                "name": "created_at",
                "keep": True,
                "implication_property": None,
            },
            "last_modified_date": {
                "name": "updated_at",
                "keep": True,
                "implication_property": None,
            },
            "account": {
                "name": "account",
                "keep": False,
                "implication_property": {
                    "implied_entity_type": "ACCOUNT",
                    "implied_relationship_name": "is_account_of",
                },
            },
        },
        "VENDOR": {},
        "EMPLOYEE": {},
    }

    current_entity_types = conn.execute(
        sa.text("SELECT id_name, attributes from kg_entity_type")
    ).all()
    for entity_type, attributes in current_entity_types:
        # delete removed entity types
        if entity_type not in new_entity_type_conversion:
            op.execute(
                sa.text(f"DELETE FROM kg_entity_type WHERE id_name = '{entity_type}'")
            )
            continue

        # update entity type attributes
        if "metadata_attributes" in attributes:
            del attributes["metadata_attributes"]
        attributes["metadata_attribute_conversion"] = new_entity_type_conversion[
            entity_type
        ]
        attributes_str = json.dumps(attributes).replace("'", "''")
        op.execute(
            sa.text(
                f"UPDATE kg_entity_type SET attributes = '{attributes_str}'WHERE id_name = '{entity_type}'"
            ),
        )


def downgrade() -> None:
    conn = op.get_bind()

    current_entity_types = conn.execute(
        sa.text("SELECT id_name, attributes from kg_entity_type")
    ).all()
    for entity_type, attributes in current_entity_types:
        conversion = {}
        if "metadata_attribute_conversion" in attributes:
            conversion = attributes.pop("metadata_attribute_conversion")
        attributes["metadata_attributes"] = {
            attr: prop["name"] for attr, prop in conversion.items() if prop["keep"]
        }

        attributes_str = json.dumps(attributes).replace("'", "''")
        op.execute(
            sa.text(
                f"UPDATE kg_entity_type SET attributes = '{attributes_str}'WHERE id_name = '{entity_type}'"
            ),
        )


================================================
FILE: backend/alembic/versions/7bd55f264e1b_add_display_name_to_model_configuration.py
================================================
"""Add display_name to model_configuration

Revision ID: 7bd55f264e1b
Revises: e8f0d2a38171
Create Date: 2025-12-04

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "7bd55f264e1b"
down_revision = "e8f0d2a38171"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "model_configuration",
        sa.Column("display_name", sa.String(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("model_configuration", "display_name")


================================================
FILE: backend/alembic/versions/7cb492013621_code_interpreter_server_model.py
================================================
"""code interpreter server model

Revision ID: 7cb492013621
Revises: 0bb4558f35df
Create Date: 2026-02-22 18:54:54.007265

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "7cb492013621"
down_revision = "0bb4558f35df"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "code_interpreter_server",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column(
            "server_enabled", sa.Boolean, nullable=False, server_default=sa.true()
        ),
    )


def downgrade() -> None:
    op.drop_table("code_interpreter_server")


================================================
FILE: backend/alembic/versions/7cc3fcc116c1_user_file_uuid_primary_key_swap.py
================================================
"""Migration 4: User file UUID primary key swap

Revision ID: 7cc3fcc116c1
Revises: 16c37a30adf2
Create Date: 2025-09-22 09:54:38.292952

This migration performs the critical UUID primary key swap on user_file table.
It updates all foreign key references to use UUIDs instead of integers.
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql as psql
import logging

logger = logging.getLogger("alembic.runtime.migration")

# revision identifiers, used by Alembic.
revision = "7cc3fcc116c1"
down_revision = "16c37a30adf2"
branch_labels = None
depends_on = None


def upgrade() -> None:
    """Swap user_file primary key from integer to UUID."""

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    # Verify we're in the expected state
    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
    if "new_id" not in user_file_columns:
        logger.warning(
            "user_file.new_id not found - migration may have already been applied"
        )
        return

    logger.info("Starting UUID primary key swap...")

    # === Step 1: Update persona__user_file foreign key to UUID ===
    logger.info("Updating persona__user_file foreign key...")

    # Drop existing foreign key constraints
    op.execute(
        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_uuid_fkey"
    )
    op.execute(
        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_fkey"
    )

    # Create new foreign key to user_file.new_id
    op.create_foreign_key(
        "persona__user_file_user_file_id_fkey",
        "persona__user_file",
        "user_file",
        local_cols=["user_file_id_uuid"],
        remote_cols=["new_id"],
    )

    # Drop the old integer column and rename UUID column
    op.execute("ALTER TABLE persona__user_file DROP COLUMN IF EXISTS user_file_id")
    op.alter_column(
        "persona__user_file",
        "user_file_id_uuid",
        new_column_name="user_file_id",
        existing_type=psql.UUID(as_uuid=True),
        nullable=False,
    )

    # Recreate composite primary key
    op.execute(
        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_pkey"
    )
    op.execute(
        "ALTER TABLE persona__user_file ADD PRIMARY KEY (persona_id, user_file_id)"
    )

    logger.info("Updated persona__user_file to use UUID foreign key")

    # === Step 2: Perform the primary key swap on user_file ===
    logger.info("Swapping user_file primary key to UUID...")

    # Drop the primary key constraint
    op.execute("ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_pkey")

    # Drop the old id column and rename new_id to id
    op.execute("ALTER TABLE user_file DROP COLUMN IF EXISTS id")
    op.alter_column(
        "user_file",
        "new_id",
        new_column_name="id",
        existing_type=psql.UUID(as_uuid=True),
        nullable=False,
    )

    # Set default for new inserts
    op.alter_column(
        "user_file",
        "id",
        existing_type=psql.UUID(as_uuid=True),
        server_default=sa.text("gen_random_uuid()"),
    )

    # Create new primary key
    op.execute("ALTER TABLE user_file ADD PRIMARY KEY (id)")

    logger.info("Swapped user_file primary key to UUID")

    # === Step 3: Update foreign key constraints ===
    logger.info("Updating foreign key constraints...")

    # Recreate persona__user_file foreign key to point to user_file.id
    # Drop existing FK first to break dependency on the unique constraint
    op.execute(
        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_fkey"
    )
    # Drop the unique constraint on (formerly) new_id BEFORE recreating the FK,
    # so the FK will bind to the primary key instead of the unique index.
    op.execute("ALTER TABLE user_file DROP CONSTRAINT IF EXISTS uq_user_file_new_id")
    # Now recreate FK to the primary key column
    op.create_foreign_key(
        "persona__user_file_user_file_id_fkey",
        "persona__user_file",
        "user_file",
        local_cols=["user_file_id"],
        remote_cols=["id"],
    )

    # Add foreign keys for project__user_file
    existing_fks = inspector.get_foreign_keys("project__user_file")

    has_user_file_fk = any(
        fk.get("referred_table") == "user_file"
        and fk.get("constrained_columns") == ["user_file_id"]
        for fk in existing_fks
    )

    if not has_user_file_fk:
        op.create_foreign_key(
            "fk_project__user_file_user_file_id",
            "project__user_file",
            "user_file",
            ["user_file_id"],
            ["id"],
        )
        logger.info("Added project__user_file -> user_file foreign key")

    has_project_fk = any(
        fk.get("referred_table") == "user_project"
        and fk.get("constrained_columns") == ["project_id"]
        for fk in existing_fks
    )

    if not has_project_fk:
        op.create_foreign_key(
            "fk_project__user_file_project_id",
            "project__user_file",
            "user_project",
            ["project_id"],
            ["id"],
        )
        logger.info("Added project__user_file -> user_project foreign key")

    # === Step 4: Mark files for document_id migration ===
    logger.info("Marking files for background document_id migration...")

    logger.info("Migration 4 (UUID primary key swap) completed successfully")
    logger.info(
        "NOTE: Background task will update document IDs in Vespa and search_doc"
    )


def downgrade() -> None:
    """Revert UUID primary key back to integer (data destructive!)."""

    logger.error("CRITICAL: Downgrading UUID primary key swap is data destructive!")
    logger.error(
        "This will break all UUID-based references created after the migration."
    )
    logger.error("Only proceed if absolutely necessary and have backups.")

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    # Capture existing primary key definitions so we can restore them after swaps
    persona_pk = inspector.get_pk_constraint("persona__user_file") or {}
    persona_pk_name = persona_pk.get("name")
    persona_pk_cols = persona_pk.get("constrained_columns") or []

    project_pk = inspector.get_pk_constraint("project__user_file") or {}
    project_pk_name = project_pk.get("name")
    project_pk_cols = project_pk.get("constrained_columns") or []

    # Drop foreign keys that reference the UUID primary key
    op.drop_constraint(
        "persona__user_file_user_file_id_fkey",
        "persona__user_file",
        type_="foreignkey",
    )
    op.drop_constraint(
        "fk_project__user_file_user_file_id",
        "project__user_file",
        type_="foreignkey",
    )

    # Drop primary keys that rely on the UUID column so we can replace it
    if persona_pk_name:
        op.drop_constraint(persona_pk_name, "persona__user_file", type_="primary")
    if project_pk_name:
        op.drop_constraint(project_pk_name, "project__user_file", type_="primary")

    # Rebuild integer IDs on user_file using a sequence-backed column
    op.execute("CREATE SEQUENCE IF NOT EXISTS user_file_id_seq")
    op.add_column(
        "user_file",
        sa.Column(
            "id_int",
            sa.Integer(),
            server_default=sa.text("nextval('user_file_id_seq')"),
            nullable=False,
        ),
    )
    op.execute("ALTER SEQUENCE user_file_id_seq OWNED BY user_file.id_int")

    # Prepare integer foreign key columns on referencing tables
    op.add_column(
        "persona__user_file",
        sa.Column("user_file_id_int", sa.Integer(), nullable=True),
    )
    op.add_column(
        "project__user_file",
        sa.Column("user_file_id_int", sa.Integer(), nullable=True),
    )

    # Populate the new integer foreign key columns by mapping from the UUID IDs
    op.execute(
        """
        UPDATE persona__user_file AS p
        SET user_file_id_int = uf.id_int
        FROM user_file AS uf
        WHERE p.user_file_id = uf.id
        """
    )
    op.execute(
        """
        UPDATE project__user_file AS p
        SET user_file_id_int = uf.id_int
        FROM user_file AS uf
        WHERE p.user_file_id = uf.id
        """
    )

    op.alter_column(
        "persona__user_file",
        "user_file_id_int",
        existing_type=sa.Integer(),
        nullable=False,
    )
    op.alter_column(
        "project__user_file",
        "user_file_id_int",
        existing_type=sa.Integer(),
        nullable=False,
    )

    # Remove the UUID foreign key columns and rename the integer replacements
    op.drop_column("persona__user_file", "user_file_id")
    op.alter_column(
        "persona__user_file",
        "user_file_id_int",
        new_column_name="user_file_id",
        existing_type=sa.Integer(),
        nullable=False,
    )

    op.drop_column("project__user_file", "user_file_id")
    op.alter_column(
        "project__user_file",
        "user_file_id_int",
        new_column_name="user_file_id",
        existing_type=sa.Integer(),
        nullable=False,
    )

    # Swap the user_file primary key back to the integer column
    op.drop_constraint("user_file_pkey", "user_file", type_="primary")
    op.drop_column("user_file", "id")
    op.alter_column(
        "user_file",
        "id_int",
        new_column_name="id",
        existing_type=sa.Integer(),
    )
    op.alter_column(
        "user_file",
        "id",
        existing_type=sa.Integer(),
        nullable=False,
        server_default=sa.text("nextval('user_file_id_seq')"),
    )
    op.execute("ALTER SEQUENCE user_file_id_seq OWNED BY user_file.id")
    op.execute(
        """
        SELECT setval(
            'user_file_id_seq',
            GREATEST(COALESCE(MAX(id), 1), 1),
            MAX(id) IS NOT NULL
        )
        FROM user_file
        """
    )
    op.create_primary_key("user_file_pkey", "user_file", ["id"])

    # Restore primary keys on referencing tables
    if persona_pk_cols:
        op.create_primary_key(
            "persona__user_file_pkey", "persona__user_file", persona_pk_cols
        )
    if project_pk_cols:
        op.create_primary_key(
            "project__user_file_pkey",
            "project__user_file",
            project_pk_cols,
        )

    # Recreate foreign keys pointing at the integer primary key
    op.create_foreign_key(
        "persona__user_file_user_file_id_fkey",
        "persona__user_file",
        "user_file",
        ["user_file_id"],
        ["id"],
    )
    op.create_foreign_key(
        "fk_project__user_file_user_file_id",
        "project__user_file",
        "user_file",
        ["user_file_id"],
        ["id"],
    )


================================================
FILE: backend/alembic/versions/7ccea01261f6_store_chat_retrieval_docs.py
================================================
"""Store Chat Retrieval Docs

Revision ID: 7ccea01261f6
Revises: a570b80a5f20
Create Date: 2023-10-15 10:39:23.317453

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "7ccea01261f6"
down_revision = "a570b80a5f20"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_message",
        sa.Column(
            "reference_docs",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("chat_message", "reference_docs")


================================================
FILE: backend/alembic/versions/7da0ae5ad583_add_description_to_persona.py
================================================
"""Add description to persona

Revision ID: 7da0ae5ad583
Revises: e86866a9c78a
Create Date: 2023-11-27 00:16:19.959414

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "7da0ae5ad583"
down_revision = "e86866a9c78a"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("persona", sa.Column("description", sa.String(), nullable=True))


def downgrade() -> None:
    op.drop_column("persona", "description")


================================================
FILE: backend/alembic/versions/7da543f5672f_add_slackbotconfig_table.py
================================================
"""Add SlackBotConfig table

Revision ID: 7da543f5672f
Revises: febe9eaa0644
Create Date: 2023-09-24 16:34:17.526128

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "7da543f5672f"
down_revision = "febe9eaa0644"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "slack_bot_config",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("persona_id", sa.Integer(), nullable=True),
        sa.Column(
            "channel_config",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )


def downgrade() -> None:
    op.drop_table("slack_bot_config")


================================================
FILE: backend/alembic/versions/7e490836d179_nullify_default_system_prompt.py
================================================
"""nullify_default_system_prompt

Revision ID: 7e490836d179
Revises: c1d2e3f4a5b6
Create Date: 2025-12-29 16:54:36.635574

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "7e490836d179"
down_revision = "c1d2e3f4a5b6"
branch_labels = None
depends_on = None


# This is the default system prompt from the previous migration (87c52ec39f84)
# ruff: noqa: E501, W605 start
PREVIOUS_DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.

The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]

# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".lstrip()
# ruff: noqa: E501, W605 end


def upgrade() -> None:
    # Make system_prompt column nullable (model already has nullable=True but DB doesn't)
    op.alter_column(
        "persona",
        "system_prompt",
        nullable=True,
    )

    # Set system_prompt to NULL where it matches the previous default
    conn = op.get_bind()
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET system_prompt = NULL
            WHERE system_prompt = :previous_default
            """
        ),
        {"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
    )


def downgrade() -> None:
    # Restore the default system prompt for personas that have NULL
    # Note: This may restore the prompt to personas that originally had NULL
    # before this migration, but there's no way to distinguish them
    conn = op.get_bind()
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET system_prompt = :previous_default
            WHERE system_prompt IS NULL
            """
        ),
        {"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
    )

    # Revert system_prompt column to not nullable
    op.alter_column(
        "persona",
        "system_prompt",
        nullable=False,
    )


================================================
FILE: backend/alembic/versions/7ed603b64d5a_add_mcp_server_and_connection_config_.py
================================================
"""add_mcp_server_and_connection_config_models

Revision ID: 7ed603b64d5a
Revises: b329d00a9ea6
Create Date: 2025-07-28 17:35:59.900680

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from onyx.db.enums import MCPAuthenticationType

# revision identifiers, used by Alembic.
revision = "7ed603b64d5a"
down_revision = "b329d00a9ea6"
branch_labels = None
depends_on = None


def upgrade() -> None:
    """Create tables and columns for MCP Server support"""

    # 1. MCP Server main table (no FK constraints yet to avoid circular refs)
    op.create_table(
        "mcp_server",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("owner", sa.String(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("description", sa.String(), nullable=True),
        sa.Column("server_url", sa.String(), nullable=False),
        sa.Column(
            "auth_type",
            sa.Enum(
                MCPAuthenticationType,
                name="mcp_authentication_type",
                native_enum=False,
            ),
            nullable=False,
        ),
        sa.Column("admin_connection_config_id", sa.Integer(), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
    )

    # 2. MCP Connection Config table (can reference mcp_server now that it exists)
    op.create_table(
        "mcp_connection_config",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("mcp_server_id", sa.Integer(), nullable=True),
        sa.Column("user_email", sa.String(), nullable=False, default=""),
        sa.Column("config", sa.LargeBinary(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["mcp_server_id"], ["mcp_server.id"], ondelete="CASCADE"
        ),
    )

    # Helpful indexes
    op.create_index(
        "ix_mcp_connection_config_server_user",
        "mcp_connection_config",
        ["mcp_server_id", "user_email"],
    )
    op.create_index(
        "ix_mcp_connection_config_user_email",
        "mcp_connection_config",
        ["user_email"],
    )

    # 3. Add the back-references from mcp_server to connection configs
    op.create_foreign_key(
        "mcp_server_admin_config_fk",
        "mcp_server",
        "mcp_connection_config",
        ["admin_connection_config_id"],
        ["id"],
        ondelete="SET NULL",
    )

    # 4. Association / access-control tables
    op.create_table(
        "mcp_server__user",
        sa.Column("mcp_server_id", sa.Integer(), primary_key=True),
        sa.Column("user_id", sa.UUID(), primary_key=True),
        sa.ForeignKeyConstraint(
            ["mcp_server_id"], ["mcp_server.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
    )

    op.create_table(
        "mcp_server__user_group",
        sa.Column("mcp_server_id", sa.Integer(), primary_key=True),
        sa.Column("user_group_id", sa.Integer(), primary_key=True),
        sa.ForeignKeyConstraint(
            ["mcp_server_id"], ["mcp_server.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(["user_group_id"], ["user_group.id"]),
    )

    # 5. Update existing `tool` table – allow tools to belong to an MCP server
    op.add_column(
        "tool",
        sa.Column("mcp_server_id", sa.Integer(), nullable=True),
    )
    # Add column for MCP tool input schema
    op.add_column(
        "tool",
        sa.Column("mcp_input_schema", postgresql.JSONB(), nullable=True),
    )
    op.create_foreign_key(
        "tool_mcp_server_fk",
        "tool",
        "mcp_server",
        ["mcp_server_id"],
        ["id"],
        ondelete="CASCADE",
    )

    # 6. Update persona__tool foreign keys to cascade delete
    # This ensures that when a tool is deleted (including via MCP server deletion),
    # the corresponding persona__tool rows are also deleted
    op.drop_constraint(
        "persona__tool_tool_id_fkey", "persona__tool", type_="foreignkey"
    )
    op.drop_constraint(
        "persona__tool_persona_id_fkey", "persona__tool", type_="foreignkey"
    )

    op.create_foreign_key(
        "persona__tool_persona_id_fkey",
        "persona__tool",
        "persona",
        ["persona_id"],
        ["id"],
        ondelete="CASCADE",
    )
    op.create_foreign_key(
        "persona__tool_tool_id_fkey",
        "persona__tool",
        "tool",
        ["tool_id"],
        ["id"],
        ondelete="CASCADE",
    )

    # 7. Update research_agent_iteration_sub_step foreign key to SET NULL on delete
    # This ensures that when a tool is deleted, the sub_step_tool_id is set to NULL
    # instead of causing a foreign key constraint violation
    op.drop_constraint(
        "research_agent_iteration_sub_step_sub_step_tool_id_fkey",
        "research_agent_iteration_sub_step",
        type_="foreignkey",
    )
    op.create_foreign_key(
        "research_agent_iteration_sub_step_sub_step_tool_id_fkey",
        "research_agent_iteration_sub_step",
        "tool",
        ["sub_step_tool_id"],
        ["id"],
        ondelete="SET NULL",
    )


def downgrade() -> None:
    """Drop all MCP-related tables / columns"""

    # # # 1. Drop FK & columns from tool
    # op.drop_constraint("tool_mcp_server_fk", "tool", type_="foreignkey")
    op.execute("DELETE FROM tool WHERE mcp_server_id IS NOT NULL")

    op.drop_constraint(
        "research_agent_iteration_sub_step_sub_step_tool_id_fkey",
        "research_agent_iteration_sub_step",
        type_="foreignkey",
    )
    op.create_foreign_key(
        "research_agent_iteration_sub_step_sub_step_tool_id_fkey",
        "research_agent_iteration_sub_step",
        "tool",
        ["sub_step_tool_id"],
        ["id"],
    )

    # Restore original persona__tool foreign keys (without CASCADE)
    op.drop_constraint(
        "persona__tool_persona_id_fkey", "persona__tool", type_="foreignkey"
    )
    op.drop_constraint(
        "persona__tool_tool_id_fkey", "persona__tool", type_="foreignkey"
    )

    op.create_foreign_key(
        "persona__tool_persona_id_fkey",
        "persona__tool",
        "persona",
        ["persona_id"],
        ["id"],
    )
    op.create_foreign_key(
        "persona__tool_tool_id_fkey",
        "persona__tool",
        "tool",
        ["tool_id"],
        ["id"],
    )
    op.drop_column("tool", "mcp_input_schema")
    op.drop_column("tool", "mcp_server_id")

    # 2. Drop association tables
    op.drop_table("mcp_server__user_group")
    op.drop_table("mcp_server__user")

    # 3. Drop FK from mcp_server to connection configs
    op.drop_constraint("mcp_server_admin_config_fk", "mcp_server", type_="foreignkey")

    # 4. Drop connection config indexes & table
    op.drop_index(
        "ix_mcp_connection_config_user_email", table_name="mcp_connection_config"
    )
    op.drop_index(
        "ix_mcp_connection_config_server_user", table_name="mcp_connection_config"
    )
    op.drop_table("mcp_connection_config")

    # 5. Finally drop mcp_server table
    op.drop_table("mcp_server")


================================================
FILE: backend/alembic/versions/7f726bad5367_slack_followup.py
================================================
"""Slack Followup

Revision ID: 7f726bad5367
Revises: 79acd316403a
Create Date: 2024-01-15 00:19:55.991224

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "7f726bad5367"
down_revision = "79acd316403a"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_feedback",
        sa.Column("required_followup", sa.Boolean(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("chat_feedback", "required_followup")


================================================
FILE: backend/alembic/versions/7f99be1cb9f5_add_index_for_getting_documents_just_by_.py
================================================
"""Add index for getting documents just by connector id / credential id

Revision ID: 7f99be1cb9f5
Revises: 78dbe7e38469
Create Date: 2023-10-15 22:48:15.487762

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "7f99be1cb9f5"
down_revision = "78dbe7e38469"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_index(
        op.f(
            "ix_document_by_connector_credential_pair_pkey__connector_id__credential_id"
        ),
        "document_by_connector_credential_pair",
        ["connector_id", "credential_id"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(
        op.f(
            "ix_document_by_connector_credential_pair_pkey__connector_id__credential_id"
        ),
        table_name="document_by_connector_credential_pair",
    )


================================================
FILE: backend/alembic/versions/800f48024ae9_add_id_to_connectorcredentialpair.py
================================================
"""Add ID to ConnectorCredentialPair

Revision ID: 800f48024ae9
Revises: 767f1c2a00eb
Create Date: 2023-09-19 16:13:42.299715

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.schema import Sequence, CreateSequence

# revision identifiers, used by Alembic.
revision = "800f48024ae9"
down_revision = "767f1c2a00eb"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    sequence = Sequence("connector_credential_pair_id_seq")
    op.execute(CreateSequence(sequence))  # type: ignore
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "id", sa.Integer(), nullable=True, server_default=sequence.next_value()
        ),
    )
    op.add_column(
        "connector_credential_pair",
        sa.Column("name", sa.String(), nullable=True),
    )

    # fill in IDs for existing rows
    op.execute(
        "UPDATE connector_credential_pair SET id = nextval('connector_credential_pair_id_seq') WHERE id IS NULL"
    )
    op.alter_column("connector_credential_pair", "id", nullable=False)

    op.create_unique_constraint(
        "connector_credential_pair__name__key", "connector_credential_pair", ["name"]
    )
    op.create_unique_constraint(
        "connector_credential_pair__id__key", "connector_credential_pair", ["id"]
    )


def downgrade() -> None:
    op.drop_constraint(
        "connector_credential_pair__name__key",
        "connector_credential_pair",
        type_="unique",
    )
    op.drop_constraint(
        "connector_credential_pair__id__key",
        "connector_credential_pair",
        type_="unique",
    )
    op.drop_column("connector_credential_pair", "name")
    op.drop_column("connector_credential_pair", "id")
    op.execute("DROP SEQUENCE connector_credential_pair_id_seq")


================================================
FILE: backend/alembic/versions/80696cf850ae_add_chat_session_to_query_event.py
================================================
"""Add chat session to query_event

Revision ID: 80696cf850ae
Revises: 15326fcec57e
Create Date: 2023-11-26 02:38:35.008070

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "80696cf850ae"
down_revision = "15326fcec57e"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "query_event",
        sa.Column("chat_session_id", sa.Integer(), nullable=True),
    )
    op.create_foreign_key(
        "fk_query_event_chat_session_id",
        "query_event",
        "chat_session",
        ["chat_session_id"],
        ["id"],
    )


def downgrade() -> None:
    op.drop_constraint(
        "fk_query_event_chat_session_id", "query_event", type_="foreignkey"
    )
    op.drop_column("query_event", "chat_session_id")


================================================
FILE: backend/alembic/versions/8188861f4e92_csv_to_tabular_chat_file_type.py
================================================
"""csv to tabular chat file type

Revision ID: 8188861f4e92
Revises: d8cdfee5df80
Create Date: 2026-03-31 19:23:05.753184

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "8188861f4e92"
down_revision = "d8cdfee5df80"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.execute(
        """
        UPDATE chat_message
        SET files = (
            SELECT jsonb_agg(
                CASE
                    WHEN elem->>'type' = 'csv'
                    THEN jsonb_set(elem, '{type}', '"tabular"')
                    ELSE elem
                END
            )
            FROM jsonb_array_elements(files) AS elem
        )
        WHERE files::text LIKE '%"type": "csv"%'
        """
    )


def downgrade() -> None:
    op.execute(
        """
        UPDATE chat_message
        SET files = (
            SELECT jsonb_agg(
                CASE
                    WHEN elem->>'type' = 'tabular'
                    THEN jsonb_set(elem, '{type}', '"csv"')
                    ELSE elem
                END
            )
            FROM jsonb_array_elements(files) AS elem
        )
        WHERE files::text LIKE '%"type": "tabular"%'
        """
    )


================================================
FILE: backend/alembic/versions/81c22b1e2e78_hierarchy_nodes_v1.py
================================================
"""hierarchy_nodes_v1

Revision ID: 81c22b1e2e78
Revises: 72aa7de2e5cf
Create Date: 2026-01-13 18:10:01.021451

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from onyx.configs.constants import DocumentSource


# revision identifiers, used by Alembic.
revision = "81c22b1e2e78"
down_revision = "72aa7de2e5cf"
branch_labels = None
depends_on = None


# Human-readable display names for each source
SOURCE_DISPLAY_NAMES: dict[str, str] = {
    "ingestion_api": "Ingestion API",
    "slack": "Slack",
    "web": "Web",
    "google_drive": "Google Drive",
    "gmail": "Gmail",
    "requesttracker": "Request Tracker",
    "github": "GitHub",
    "gitbook": "GitBook",
    "gitlab": "GitLab",
    "guru": "Guru",
    "bookstack": "BookStack",
    "outline": "Outline",
    "confluence": "Confluence",
    "jira": "Jira",
    "slab": "Slab",
    "productboard": "Productboard",
    "file": "File",
    "coda": "Coda",
    "notion": "Notion",
    "zulip": "Zulip",
    "linear": "Linear",
    "hubspot": "HubSpot",
    "document360": "Document360",
    "gong": "Gong",
    "google_sites": "Google Sites",
    "zendesk": "Zendesk",
    "loopio": "Loopio",
    "dropbox": "Dropbox",
    "sharepoint": "SharePoint",
    "teams": "Teams",
    "salesforce": "Salesforce",
    "discourse": "Discourse",
    "axero": "Axero",
    "clickup": "ClickUp",
    "mediawiki": "MediaWiki",
    "wikipedia": "Wikipedia",
    "asana": "Asana",
    "s3": "S3",
    "r2": "R2",
    "google_cloud_storage": "Google Cloud Storage",
    "oci_storage": "OCI Storage",
    "xenforo": "XenForo",
    "not_applicable": "Not Applicable",
    "discord": "Discord",
    "freshdesk": "Freshdesk",
    "fireflies": "Fireflies",
    "egnyte": "Egnyte",
    "airtable": "Airtable",
    "highspot": "Highspot",
    "drupal_wiki": "Drupal Wiki",
    "imap": "IMAP",
    "bitbucket": "Bitbucket",
    "testrail": "TestRail",
    "mock_connector": "Mock Connector",
    "user_file": "User File",
}


def upgrade() -> None:
    # 1. Create hierarchy_node table
    op.create_table(
        "hierarchy_node",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("raw_node_id", sa.String(), nullable=False),
        sa.Column("display_name", sa.String(), nullable=False),
        sa.Column("link", sa.String(), nullable=True),
        sa.Column("source", sa.String(), nullable=False),
        sa.Column("node_type", sa.String(), nullable=False),
        sa.Column("document_id", sa.String(), nullable=True),
        sa.Column("parent_id", sa.Integer(), nullable=True),
        # Permission fields - same pattern as Document table
        sa.Column(
            "external_user_emails",
            postgresql.ARRAY(sa.String()),
            nullable=True,
        ),
        sa.Column(
            "external_user_group_ids",
            postgresql.ARRAY(sa.String()),
            nullable=True,
        ),
        sa.Column("is_public", sa.Boolean(), nullable=False, server_default="false"),
        sa.PrimaryKeyConstraint("id"),
        # When document is deleted, just unlink (node can exist without document)
        sa.ForeignKeyConstraint(["document_id"], ["document.id"], ondelete="SET NULL"),
        # When parent node is deleted, orphan children (cleanup via pruning)
        sa.ForeignKeyConstraint(
            ["parent_id"], ["hierarchy_node.id"], ondelete="SET NULL"
        ),
        sa.UniqueConstraint(
            "raw_node_id", "source", name="uq_hierarchy_node_raw_id_source"
        ),
    )
    op.create_index("ix_hierarchy_node_parent_id", "hierarchy_node", ["parent_id"])
    op.create_index(
        "ix_hierarchy_node_source_type", "hierarchy_node", ["source", "node_type"]
    )

    # Add partial unique index to ensure only one SOURCE-type node per source
    # This prevents duplicate source root nodes from being created
    # NOTE: node_type stores enum NAME ('SOURCE'), not value ('source')
    op.execute(
        sa.text(
            """
            CREATE UNIQUE INDEX uq_hierarchy_node_one_source_per_type
            ON hierarchy_node (source)
            WHERE node_type = 'SOURCE'
            """
        )
    )

    # 2. Create hierarchy_fetch_attempt table
    op.create_table(
        "hierarchy_fetch_attempt",
        sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
        sa.Column("status", sa.String(), nullable=False),
        sa.Column("nodes_fetched", sa.Integer(), nullable=True, server_default="0"),
        sa.Column("nodes_updated", sa.Integer(), nullable=True, server_default="0"),
        sa.Column("error_msg", sa.Text(), nullable=True),
        sa.Column("full_exception_trace", sa.Text(), nullable=True),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.Column("time_started", sa.DateTime(timezone=True), nullable=True),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(
            ["connector_credential_pair_id"],
            ["connector_credential_pair.id"],
            ondelete="CASCADE",
        ),
    )
    op.create_index(
        "ix_hierarchy_fetch_attempt_status", "hierarchy_fetch_attempt", ["status"]
    )
    op.create_index(
        "ix_hierarchy_fetch_attempt_time_created",
        "hierarchy_fetch_attempt",
        ["time_created"],
    )
    op.create_index(
        "ix_hierarchy_fetch_attempt_cc_pair",
        "hierarchy_fetch_attempt",
        ["connector_credential_pair_id"],
    )

    # 3. Insert SOURCE-type hierarchy nodes for each DocumentSource
    # We insert these so every existing document can have a parent hierarchy node
    # NOTE: SQLAlchemy's Enum with native_enum=False stores the enum NAME (e.g., 'GOOGLE_DRIVE'),
    # not the VALUE (e.g., 'google_drive'). We must use .name for source and node_type columns.
    # SOURCE nodes are always public since they're just categorical roots.
    for source in DocumentSource:
        source_name = (
            source.name
        )  # e.g., 'GOOGLE_DRIVE' - what SQLAlchemy stores/expects
        source_value = source.value  # e.g., 'google_drive' - the raw_node_id
        display_name = SOURCE_DISPLAY_NAMES.get(
            source_value, source_value.replace("_", " ").title()
        )
        op.execute(
            sa.text(
                """
                INSERT INTO hierarchy_node (raw_node_id, display_name, source, node_type, parent_id, is_public)
                VALUES (:raw_node_id, :display_name, :source, 'SOURCE', NULL, true)
                ON CONFLICT (raw_node_id, source) DO NOTHING
                """
            ).bindparams(
                raw_node_id=source_value,  # Use .value for raw_node_id (human-readable identifier)
                display_name=display_name,
                source=source_name,  # Use .name for source column (SQLAlchemy enum storage)
            )
        )

    # 4. Add parent_hierarchy_node_id column to document table
    op.add_column(
        "document",
        sa.Column("parent_hierarchy_node_id", sa.Integer(), nullable=True),
    )
    # When hierarchy node is deleted, just unlink the document (SET NULL)
    op.create_foreign_key(
        "fk_document_parent_hierarchy_node",
        "document",
        "hierarchy_node",
        ["parent_hierarchy_node_id"],
        ["id"],
        ondelete="SET NULL",
    )
    op.create_index(
        "ix_document_parent_hierarchy_node_id",
        "document",
        ["parent_hierarchy_node_id"],
    )

    # 5. Set all existing documents' parent_hierarchy_node_id to their source's SOURCE node
    # For documents with multiple connectors, we pick one source deterministically (MIN connector_id)
    # NOTE: Both connector.source and hierarchy_node.source store enum NAMEs (e.g., 'GOOGLE_DRIVE')
    # because SQLAlchemy Enum(native_enum=False) uses the enum name for storage.
    op.execute(
        sa.text(
            """
            UPDATE document d
            SET parent_hierarchy_node_id = hn.id
            FROM (
                -- Get the source for each document (pick MIN connector_id for determinism)
                SELECT DISTINCT ON (dbcc.id)
                    dbcc.id as doc_id,
                    c.source as source
                FROM document_by_connector_credential_pair dbcc
                JOIN connector c ON dbcc.connector_id = c.id
                ORDER BY dbcc.id, dbcc.connector_id
            ) doc_source
            JOIN hierarchy_node hn ON hn.source = doc_source.source AND hn.node_type = 'SOURCE'
            WHERE d.id = doc_source.doc_id
            """
        )
    )

    # Create the persona__hierarchy_node association table
    op.create_table(
        "persona__hierarchy_node",
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.Column("hierarchy_node_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["hierarchy_node_id"],
            ["hierarchy_node.id"],
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("persona_id", "hierarchy_node_id"),
    )

    # Add index for efficient lookups
    op.create_index(
        "ix_persona__hierarchy_node_hierarchy_node_id",
        "persona__hierarchy_node",
        ["hierarchy_node_id"],
    )

    # Create the persona__document association table for attaching individual
    # documents directly to assistants
    op.create_table(
        "persona__document",
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.Column("document_id", sa.String(), nullable=False),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["document_id"],
            ["document.id"],
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("persona_id", "document_id"),
    )

    # Add index for efficient lookups by document_id
    op.create_index(
        "ix_persona__document_document_id",
        "persona__document",
        ["document_id"],
    )

    # 6. Add last_time_hierarchy_fetch column to connector_credential_pair table
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "last_time_hierarchy_fetch", sa.DateTime(timezone=True), nullable=True
        ),
    )


def downgrade() -> None:
    # Remove last_time_hierarchy_fetch from connector_credential_pair
    op.drop_column("connector_credential_pair", "last_time_hierarchy_fetch")

    # Drop persona__document table
    op.drop_index("ix_persona__document_document_id", table_name="persona__document")
    op.drop_table("persona__document")

    # Drop persona__hierarchy_node table
    op.drop_index(
        "ix_persona__hierarchy_node_hierarchy_node_id",
        table_name="persona__hierarchy_node",
    )
    op.drop_table("persona__hierarchy_node")

    # Remove parent_hierarchy_node_id from document
    op.drop_index("ix_document_parent_hierarchy_node_id", table_name="document")
    op.drop_constraint(
        "fk_document_parent_hierarchy_node", "document", type_="foreignkey"
    )
    op.drop_column("document", "parent_hierarchy_node_id")

    # Drop hierarchy_fetch_attempt table
    op.drop_index(
        "ix_hierarchy_fetch_attempt_cc_pair", table_name="hierarchy_fetch_attempt"
    )
    op.drop_index(
        "ix_hierarchy_fetch_attempt_time_created", table_name="hierarchy_fetch_attempt"
    )
    op.drop_index(
        "ix_hierarchy_fetch_attempt_status", table_name="hierarchy_fetch_attempt"
    )
    op.drop_table("hierarchy_fetch_attempt")

    # Drop hierarchy_node table
    op.drop_index("uq_hierarchy_node_one_source_per_type", table_name="hierarchy_node")
    op.drop_index("ix_hierarchy_node_source_type", table_name="hierarchy_node")
    op.drop_index("ix_hierarchy_node_parent_id", table_name="hierarchy_node")
    op.drop_table("hierarchy_node")


================================================
FILE: backend/alembic/versions/8405ca81cc83_notifications_constraint.py
================================================
"""notifications constraint, sort index, and cleanup old notifications

Revision ID: 8405ca81cc83
Revises: a3c1a7904cd0
Create Date: 2026-01-07 16:43:44.855156

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "8405ca81cc83"
down_revision = "a3c1a7904cd0"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create unique index for notification deduplication.
    # This enables atomic ON CONFLICT DO NOTHING inserts in batch_create_notifications.
    #
    # Uses COALESCE to handle NULL additional_data (NULLs are normally distinct
    # in unique constraints, but we want NULL == NULL for deduplication).
    # The '{}' represents an empty JSONB object as the NULL replacement.

    # Clean up legacy notifications first
    op.execute("DELETE FROM notification WHERE title = 'New Notification'")

    op.execute(
        """
        CREATE UNIQUE INDEX IF NOT EXISTS ix_notification_user_type_data
        ON notification (user_id, notif_type, COALESCE(additional_data, '{}'::jsonb))
        """
    )

    # Create index for efficient notification sorting by user
    # Covers: WHERE user_id = ? ORDER BY dismissed, first_shown DESC
    op.execute(
        """
        CREATE INDEX IF NOT EXISTS ix_notification_user_sort
        ON notification (user_id, dismissed, first_shown DESC)
        """
    )


def downgrade() -> None:
    op.execute("DROP INDEX IF EXISTS ix_notification_user_type_data")
    op.execute("DROP INDEX IF EXISTS ix_notification_user_sort")


================================================
FILE: backend/alembic/versions/849b21c732f8_add_demo_data_enabled_to_build_session.py
================================================
"""add demo_data_enabled to build_session

Revision ID: 849b21c732f8
Revises: 81c22b1e2e78
Create Date: 2026-01-28 10:00:00.000000

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "849b21c732f8"
down_revision = "81c22b1e2e78"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "build_session",
        sa.Column(
            "demo_data_enabled",
            sa.Boolean(),
            nullable=False,
            server_default=sa.text("true"),
        ),
    )


def downgrade() -> None:
    op.drop_column("build_session", "demo_data_enabled")


================================================
FILE: backend/alembic/versions/87c52ec39f84_update_default_system_prompt.py
================================================
"""update_default_system_prompt

Revision ID: 87c52ec39f84
Revises: 7bd55f264e1b
Create Date: 2025-12-05 15:54:06.002452

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "87c52ec39f84"
down_revision = "7bd55f264e1b"
branch_labels = None
depends_on = None


DEFAULT_PERSONA_ID = 0

# ruff: noqa: E501, W605 start
DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.

The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]

# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".lstrip()
# ruff: noqa: E501, W605 end


def upgrade() -> None:
    conn = op.get_bind()
    conn.execute(
        sa.text(
            """
            UPDATE persona
            SET system_prompt = :system_prompt
            WHERE id = :persona_id
            """
        ),
        {"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
    )


def downgrade() -> None:
    # We don't revert the system prompt on downgrade since we don't know
    # what the previous value was. The new prompt is a reasonable default.
    pass


================================================
FILE: backend/alembic/versions/8818cf73fa1a_drop_include_citations.py
================================================
"""drop include citations

Revision ID: 8818cf73fa1a
Revises: 7ed603b64d5a
Create Date: 2025-09-02 19:43:50.060680

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "8818cf73fa1a"
down_revision = "7ed603b64d5a"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_column("prompt", "include_citations")


def downgrade() -> None:
    op.add_column(
        "prompt",
        sa.Column(
            "include_citations",
            sa.BOOLEAN(),
            autoincrement=False,
            nullable=True,
        ),
    )
    # Set include_citations based on prompt name: FALSE for ImageGeneration, TRUE for others
    op.execute(
        sa.text(
            "UPDATE prompt SET include_citations = CASE WHEN name = 'ImageGeneration' THEN FALSE ELSE TRUE END"
        )
    )


================================================
FILE: backend/alembic/versions/891cd83c87a8_add_is_visible_to_persona.py
================================================
"""Add is_visible to Persona

Revision ID: 891cd83c87a8
Revises: 76b60d407dfb
Create Date: 2023-12-21 11:55:54.132279

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "891cd83c87a8"
down_revision = "76b60d407dfb"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "persona",
        sa.Column("is_visible", sa.Boolean(), nullable=True),
    )
    op.execute("UPDATE persona SET is_visible = true")
    op.alter_column("persona", "is_visible", nullable=False)

    op.add_column(
        "persona",
        sa.Column("display_priority", sa.Integer(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("persona", "is_visible")
    op.drop_column("persona", "display_priority")


================================================
FILE: backend/alembic/versions/8987770549c0_add_full_exception_stack_trace.py
================================================
"""Add full exception stack trace

Revision ID: 8987770549c0
Revises: ec3ec2eabf7b
Create Date: 2024-02-10 19:31:28.339135

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "8987770549c0"
down_revision = "ec3ec2eabf7b"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "index_attempt", sa.Column("full_exception_trace", sa.Text(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("index_attempt", "full_exception_trace")


================================================
FILE: backend/alembic/versions/8a87bd6ec550_associate_index_attempts_with_ccpair.py
================================================
"""associate index attempts with ccpair

Revision ID: 8a87bd6ec550
Revises: 4ea2c93919c1
Create Date: 2024-07-22 15:15:52.558451

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "8a87bd6ec550"
down_revision = "4ea2c93919c1"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # Add the new connector_credential_pair_id column
    op.add_column(
        "index_attempt",
        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=True),
    )

    # Create a foreign key constraint to the connector_credential_pair table
    op.create_foreign_key(
        "fk_index_attempt_connector_credential_pair_id",
        "index_attempt",
        "connector_credential_pair",
        ["connector_credential_pair_id"],
        ["id"],
    )

    # Populate the new connector_credential_pair_id column using existing connector_id and credential_id
    op.execute(
        """
        UPDATE index_attempt ia
        SET connector_credential_pair_id = (
            SELECT id FROM connector_credential_pair ccp
            WHERE
                (ia.connector_id IS NULL OR ccp.connector_id = ia.connector_id)
                AND (ia.credential_id IS NULL OR ccp.credential_id = ia.credential_id)
            LIMIT 1
        )
        WHERE ia.connector_id IS NOT NULL OR ia.credential_id IS NOT NULL
        """
    )

    # For good measure
    op.execute(
        """
        DELETE FROM index_attempt
        WHERE connector_credential_pair_id IS NULL
        """
    )

    # Make the new connector_credential_pair_id column non-nullable
    op.alter_column("index_attempt", "connector_credential_pair_id", nullable=False)

    # Drop the old connector_id and credential_id columns
    op.drop_column("index_attempt", "connector_id")
    op.drop_column("index_attempt", "credential_id")

    # Update the index to use connector_credential_pair_id
    op.create_index(
        "ix_index_attempt_latest_for_connector_credential_pair",
        "index_attempt",
        ["connector_credential_pair_id", "time_created"],
    )


def downgrade() -> None:
    # Add back the old connector_id and credential_id columns
    op.add_column(
        "index_attempt", sa.Column("connector_id", sa.Integer(), nullable=True)
    )
    op.add_column(
        "index_attempt", sa.Column("credential_id", sa.Integer(), nullable=True)
    )

    # Populate the old connector_id and credential_id columns using the connector_credential_pair_id
    op.execute(
        """
        UPDATE index_attempt ia
        SET connector_id = ccp.connector_id, credential_id = ccp.credential_id
        FROM connector_credential_pair ccp
        WHERE ia.connector_credential_pair_id = ccp.id
        """
    )

    # Make the old connector_id and credential_id columns non-nullable
    op.alter_column("index_attempt", "connector_id", nullable=False)
    op.alter_column("index_attempt", "credential_id", nullable=False)

    # Drop the new connector_credential_pair_id column
    op.drop_constraint(
        "fk_index_attempt_connector_credential_pair_id",
        "index_attempt",
        type_="foreignkey",
    )
    op.drop_column("index_attempt", "connector_credential_pair_id")

    op.create_index(
        "ix_index_attempt_latest_for_connector_credential_pair",
        "index_attempt",
        ["connector_id", "credential_id", "time_created"],
    )


================================================
FILE: backend/alembic/versions/8aabb57f3b49_restructure_document_indices.py
================================================
"""Restructure Document Indices

Revision ID: 8aabb57f3b49
Revises: 5e84129c8be3
Create Date: 2023-08-18 21:15:57.629515

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "8aabb57f3b49"
down_revision = "5e84129c8be3"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.drop_table("chunk")
    op.execute("DROP TYPE IF EXISTS documentstoretype")


def downgrade() -> None:
    op.create_table(
        "chunk",
        sa.Column("id", sa.VARCHAR(), autoincrement=False, nullable=False),
        sa.Column(
            "document_store_type",
            postgresql.ENUM("VECTOR", "KEYWORD", name="documentstoretype"),
            autoincrement=False,
            nullable=False,
        ),
        sa.Column("document_id", sa.VARCHAR(), autoincrement=False, nullable=False),
        sa.ForeignKeyConstraint(
            ["document_id"], ["document.id"], name="chunk_document_id_fkey"
        ),
        sa.PrimaryKeyConstraint("id", "document_store_type", name="chunk_pkey"),
    )


================================================
FILE: backend/alembic/versions/8b5ce697290e_add_discord_bot_tables.py
================================================
"""Add Discord bot tables

Revision ID: 8b5ce697290e
Revises: a1b2c3d4e5f7
Create Date: 2025-01-14

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "8b5ce697290e"
down_revision = "a1b2c3d4e5f7"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # DiscordBotConfig (singleton table - one per tenant)
    op.create_table(
        "discord_bot_config",
        sa.Column(
            "id",
            sa.String(),
            primary_key=True,
            server_default=sa.text("'SINGLETON'"),
        ),
        sa.Column("bot_token", sa.LargeBinary(), nullable=False),  # EncryptedString
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.CheckConstraint("id = 'SINGLETON'", name="ck_discord_bot_config_singleton"),
    )

    # DiscordGuildConfig
    op.create_table(
        "discord_guild_config",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("guild_id", sa.BigInteger(), nullable=True, unique=True),
        sa.Column("guild_name", sa.String(), nullable=True),
        sa.Column("registration_key", sa.String(), nullable=False, unique=True),
        sa.Column("registered_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column(
            "default_persona_id",
            sa.Integer(),
            sa.ForeignKey("persona.id", ondelete="SET NULL"),
            nullable=True,
        ),
        sa.Column(
            "enabled", sa.Boolean(), server_default=sa.text("true"), nullable=False
        ),
    )

    # DiscordChannelConfig
    op.create_table(
        "discord_channel_config",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column(
            "guild_config_id",
            sa.Integer(),
            sa.ForeignKey("discord_guild_config.id", ondelete="CASCADE"),
            nullable=False,
        ),
        sa.Column("channel_id", sa.BigInteger(), nullable=False),
        sa.Column("channel_name", sa.String(), nullable=False),
        sa.Column(
            "channel_type",
            sa.String(20),
            server_default=sa.text("'text'"),
            nullable=False,
        ),
        sa.Column(
            "is_private",
            sa.Boolean(),
            server_default=sa.text("false"),
            nullable=False,
        ),
        sa.Column(
            "thread_only_mode",
            sa.Boolean(),
            server_default=sa.text("false"),
            nullable=False,
        ),
        sa.Column(
            "require_bot_invocation",
            sa.Boolean(),
            server_default=sa.text("true"),
            nullable=False,
        ),
        sa.Column(
            "persona_override_id",
            sa.Integer(),
            sa.ForeignKey("persona.id", ondelete="SET NULL"),
            nullable=True,
        ),
        sa.Column(
            "enabled", sa.Boolean(), server_default=sa.text("false"), nullable=False
        ),
    )

    # Unique constraint: one config per channel per guild
    op.create_unique_constraint(
        "uq_discord_channel_guild_channel",
        "discord_channel_config",
        ["guild_config_id", "channel_id"],
    )


def downgrade() -> None:
    op.drop_table("discord_channel_config")
    op.drop_table("discord_guild_config")
    op.drop_table("discord_bot_config")


================================================
FILE: backend/alembic/versions/8e1ac4f39a9f_enable_contextual_retrieval.py
================================================
"""enable contextual retrieval

Revision ID: 8e1ac4f39a9f
Revises: 9aadf32dfeb4
Create Date: 2024-12-20 13:29:09.918661

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "8e1ac4f39a9f"
down_revision = "9aadf32dfeb4"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "search_settings",
        sa.Column(
            "enable_contextual_rag",
            sa.Boolean(),
            nullable=False,
            server_default="false",
        ),
    )
    op.add_column(
        "search_settings",
        sa.Column(
            "contextual_rag_llm_name",
            sa.String(),
            nullable=True,
        ),
    )
    op.add_column(
        "search_settings",
        sa.Column(
            "contextual_rag_llm_provider",
            sa.String(),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("search_settings", "enable_contextual_rag")
    op.drop_column("search_settings", "contextual_rag_llm_name")
    op.drop_column("search_settings", "contextual_rag_llm_provider")


================================================
FILE: backend/alembic/versions/8e26726b7683_chat_context_addition.py
================================================
"""Chat Context Addition

Revision ID: 8e26726b7683
Revises: 5809c0787398
Create Date: 2023-09-13 18:34:31.327944

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "8e26726b7683"
down_revision = "5809c0787398"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "persona",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("system_text", sa.Text(), nullable=True),
        sa.Column("tools_text", sa.Text(), nullable=True),
        sa.Column("hint_text", sa.Text(), nullable=True),
        sa.Column("default_persona", sa.Boolean(), nullable=False),
        sa.Column("deleted", sa.Boolean(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
    )
    op.add_column("chat_message", sa.Column("persona_id", sa.Integer(), nullable=True))
    op.create_foreign_key(
        "fk_chat_message_persona_id", "chat_message", "persona", ["persona_id"], ["id"]
    )


def downgrade() -> None:
    op.drop_constraint("fk_chat_message_persona_id", "chat_message", type_="foreignkey")
    op.drop_column("chat_message", "persona_id")
    op.drop_table("persona")


================================================
FILE: backend/alembic/versions/8f43500ee275_add_index.py
================================================
"""add index

Revision ID: 8f43500ee275
Revises: da42808081e3
Create Date: 2025-02-24 17:35:33.072714

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "8f43500ee275"
down_revision = "da42808081e3"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create a basic index on the lowercase message column for direct text matching
    # Limit to 1500 characters to stay well under the 2856 byte limit of btree version 4
    # op.execute(
    #     """
    #     CREATE INDEX idx_chat_message_message_lower
    #     ON chat_message (LOWER(substring(message, 1, 1500)))
    #     """
    # )
    pass


def downgrade() -> None:
    # Drop the index
    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")


================================================
FILE: backend/alembic/versions/8ffcc2bcfc11_add_needs_persona_sync_to_user_file.py
================================================
"""add needs_persona_sync to user_file

Revision ID: 8ffcc2bcfc11
Revises: 7616121f6e97
Create Date: 2026-02-23 10:48:48.343826

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "8ffcc2bcfc11"
down_revision = "7616121f6e97"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user_file",
        sa.Column(
            "needs_persona_sync",
            sa.Boolean(),
            nullable=False,
            server_default=sa.text("false"),
        ),
    )


def downgrade() -> None:
    op.drop_column("user_file", "needs_persona_sync")


================================================
FILE: backend/alembic/versions/904451035c9b_store_tool_details.py
================================================
"""Store Tool Details

Revision ID: 904451035c9b
Revises: 3b25685ff73c
Create Date: 2023-10-05 12:29:26.620000

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "904451035c9b"
down_revision = "3b25685ff73c"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "persona",
        sa.Column("tools", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
    )
    op.drop_column("persona", "tools_text")


def downgrade() -> None:
    op.add_column(
        "persona",
        sa.Column("tools_text", sa.TEXT(), autoincrement=False, nullable=True),
    )
    op.drop_column("persona", "tools")


================================================
FILE: backend/alembic/versions/904e5138fffb_tags.py
================================================
"""Tags

Revision ID: 904e5138fffb
Revises: 891cd83c87a8
Create Date: 2024-01-01 10:44:43.733974

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "904e5138fffb"
down_revision = "891cd83c87a8"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "tag",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("tag_key", sa.String(), nullable=False),
        sa.Column("tag_value", sa.String(), nullable=False),
        sa.Column("source", sa.String(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint(
            "tag_key", "tag_value", "source", name="_tag_key_value_source_uc"
        ),
    )
    op.create_table(
        "document__tag",
        sa.Column("document_id", sa.String(), nullable=False),
        sa.Column("tag_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["document_id"],
            ["document.id"],
        ),
        sa.ForeignKeyConstraint(
            ["tag_id"],
            ["tag.id"],
        ),
        sa.PrimaryKeyConstraint("document_id", "tag_id"),
    )

    op.add_column(
        "search_doc",
        sa.Column(
            "doc_metadata",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )
    op.execute("UPDATE search_doc SET doc_metadata = '{}' WHERE doc_metadata IS NULL")
    op.alter_column("search_doc", "doc_metadata", nullable=False)


def downgrade() -> None:
    op.drop_table("document__tag")
    op.drop_table("tag")
    op.drop_column("search_doc", "doc_metadata")


================================================
FILE: backend/alembic/versions/9087b548dd69_seed_default_image_gen_config.py
================================================
"""seed_default_image_gen_config

Revision ID: 9087b548dd69
Revises: 2b90f3af54b8
Create Date: 2026-01-05 00:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "9087b548dd69"
down_revision = "2b90f3af54b8"
branch_labels = None
depends_on = None

# Constants for default image generation config
# Source: web/src/app/admin/configuration/image-generation/constants.ts
IMAGE_PROVIDER_ID = "openai_gpt_image_1"
MODEL_NAME = "gpt-image-1"
PROVIDER_NAME = "openai"


def upgrade() -> None:
    conn = op.get_bind()

    # Check if image_generation_config table already has records
    existing_configs = (
        conn.execute(sa.text("SELECT COUNT(*) FROM image_generation_config")).scalar()
        or 0
    )

    if existing_configs > 0:
        # Skip if configs already exist - user may have configured manually
        return

    # Find the first OpenAI LLM provider
    openai_provider = conn.execute(
        sa.text(
            """
            SELECT id, api_key
            FROM llm_provider
            WHERE provider = :provider
            ORDER BY id
            LIMIT 1
            """
        ),
        {"provider": PROVIDER_NAME},
    ).fetchone()

    if not openai_provider:
        # No OpenAI provider found - nothing to do
        return

    source_provider_id, api_key = openai_provider

    # Create new LLM provider for image generation (clone only api_key)
    result = conn.execute(
        sa.text(
            """
            INSERT INTO llm_provider (
                name, provider, api_key, api_base, api_version,
                deployment_name, default_model_name, is_public,
                is_default_provider, is_default_vision_provider, is_auto_mode
            )
            VALUES (
                :name, :provider, :api_key, NULL, NULL,
                NULL, :default_model_name, :is_public,
                NULL, NULL, :is_auto_mode
            )
            RETURNING id
            """
        ),
        {
            "name": f"Image Gen - {IMAGE_PROVIDER_ID}",
            "provider": PROVIDER_NAME,
            "api_key": api_key,
            "default_model_name": MODEL_NAME,
            "is_public": True,
            "is_auto_mode": False,
        },
    )
    new_provider_id = result.scalar()

    # Create model configuration
    result = conn.execute(
        sa.text(
            """
            INSERT INTO model_configuration (
                llm_provider_id, name, is_visible, max_input_tokens,
                supports_image_input, display_name
            )
            VALUES (
                :llm_provider_id, :name, :is_visible, :max_input_tokens,
                :supports_image_input, :display_name
            )
            RETURNING id
            """
        ),
        {
            "llm_provider_id": new_provider_id,
            "name": MODEL_NAME,
            "is_visible": True,
            "max_input_tokens": None,
            "supports_image_input": False,
            "display_name": None,
        },
    )
    model_config_id = result.scalar()

    # Create image generation config
    conn.execute(
        sa.text(
            """
            INSERT INTO image_generation_config (
                image_provider_id, model_configuration_id, is_default
            )
            VALUES (
                :image_provider_id, :model_configuration_id, :is_default
            )
            """
        ),
        {
            "image_provider_id": IMAGE_PROVIDER_ID,
            "model_configuration_id": model_config_id,
            "is_default": True,
        },
    )


def downgrade() -> None:
    # We don't remove the config on downgrade since it's safe to keep around
    # If we upgrade again, it will be a no-op due to the existing records check
    pass


================================================
FILE: backend/alembic/versions/90b409d06e50_add_chat_compression_fields.py
================================================
"""add_chat_compression_fields

Revision ID: 90b409d06e50
Revises: f220515df7b4
Create Date: 2026-01-26 09:13:09.635427

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "90b409d06e50"
down_revision = "f220515df7b4"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add last_summarized_message_id to chat_message
    # This field marks a message as a summary and indicates the last message it covers.
    # Summaries are branch-aware via their parent_message_id pointing to the branch.
    op.add_column(
        "chat_message",
        sa.Column(
            "last_summarized_message_id",
            sa.Integer(),
            sa.ForeignKey("chat_message.id", ondelete="SET NULL"),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("chat_message", "last_summarized_message_id")


================================================
FILE: backend/alembic/versions/90e3b9af7da4_tag_fix.py
================================================
"""tag-fix

Revision ID: 90e3b9af7da4
Revises: 62c3a055a141
Create Date: 2025-08-01 20:58:14.607624

"""

import json
import logging
import os

from typing import cast
from typing import Generator

from alembic import op
import sqlalchemy as sa

from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.db.search_settings import SearchSettings
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.constants import AuthType
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client

logger = logging.getLogger("alembic.runtime.migration")


# revision identifiers, used by Alembic.
revision = "90e3b9af7da4"
down_revision = "62c3a055a141"
branch_labels = None
depends_on = None

SKIP_TAG_FIX = os.environ.get("SKIP_TAG_FIX", "true").lower() == "true"

# override for cloud
if AUTH_TYPE == AuthType.CLOUD:
    SKIP_TAG_FIX = True


def set_is_list_for_known_tags() -> None:
    """
    Sets is_list to true for all tags that are known to be lists.
    """
    LIST_METADATA: list[tuple[str, str]] = [
        ("CLICKUP", "tags"),
        ("CONFLUENCE", "labels"),
        ("DISCOURSE", "tags"),
        ("FRESHDESK", "emails"),
        ("GITHUB", "assignees"),
        ("GITHUB", "labels"),
        ("GURU", "tags"),
        ("GURU", "folders"),
        ("HUBSPOT", "associated_contact_ids"),
        ("HUBSPOT", "associated_company_ids"),
        ("HUBSPOT", "associated_deal_ids"),
        ("HUBSPOT", "associated_ticket_ids"),
        ("JIRA", "labels"),
        ("MEDIAWIKI", "categories"),
        ("ZENDESK", "labels"),
        ("ZENDESK", "content_tags"),
    ]

    bind = op.get_bind()
    for source, key in LIST_METADATA:
        bind.execute(
            sa.text(
                f"""
                UPDATE tag
                SET is_list = true
                WHERE tag_key = '{key}'
                AND source = '{source}'
                """
            )
        )


def set_is_list_for_list_tags() -> None:
    """
    Sets is_list to true for all tags which have multiple values for a given
    document, key, and source triplet. This only works if we remove old tags
    from the database.
    """
    bind = op.get_bind()
    bind.execute(
        sa.text(
            """
            UPDATE tag
            SET is_list = true
            FROM (
                SELECT DISTINCT tag.tag_key, tag.source
                FROM tag
                JOIN document__tag ON tag.id = document__tag.tag_id
                GROUP BY tag.tag_key, tag.source, document__tag.document_id
                HAVING count(*) > 1
            ) AS list_tags
            WHERE tag.tag_key = list_tags.tag_key
            AND tag.source = list_tags.source
            """
        )
    )


def log_list_tags() -> None:
    bind = op.get_bind()
    result = bind.execute(
        sa.text(
            """
            SELECT DISTINCT source, tag_key
            FROM tag
            WHERE is_list
            ORDER BY source, tag_key
            """
        )
    ).fetchall()
    logger.info(
        "List tags:\n" + "\n".join(f"  {source}: {key}" for source, key in result)
    )


def remove_old_tags() -> None:
    """
    Removes old tags from the database.
    Previously, there was a bug where if a document got indexed with a tag and then
    the document got reindexed, the old tag would not be removed.
    This function removes those old tags by comparing it against the tags in vespa.
    """
    current_search_settings, _ = active_search_settings()

    # Get the index name
    if hasattr(current_search_settings, "index_name"):
        index_name = current_search_settings.index_name
    else:
        # Default index name if we can't get it from the document_index
        index_name = "danswer_index"

    for batch in _get_batch_documents_with_multiple_tags():
        n_deleted = 0

        for document_id in batch:
            true_metadata = _get_vespa_metadata(document_id, index_name)
            tags = _get_document_tags(document_id)

            # identify document__tags to delete
            to_delete: list[str] = []
            for tag_id, tag_key, tag_value in tags:
                true_val = true_metadata.get(tag_key, "")
                if (isinstance(true_val, list) and tag_value not in true_val) or (
                    isinstance(true_val, str) and tag_value != true_val
                ):
                    to_delete.append(str(tag_id))

            if not to_delete:
                continue

            # delete old document__tags
            bind = op.get_bind()
            result = bind.execute(
                sa.text(
                    f"""
                    DELETE FROM document__tag
                    WHERE document_id = '{document_id}'
                    AND tag_id IN ({",".join(to_delete)})
                    """
                )
            )
            n_deleted += result.rowcount
        logger.info(f"Processed {len(batch)} documents and deleted {n_deleted} tags")


def active_search_settings() -> tuple[SearchSettings, SearchSettings | None]:
    result = op.get_bind().execute(
        sa.text(
            """
        SELECT * FROM search_settings WHERE status = 'PRESENT' ORDER BY id DESC LIMIT 1
        """
        )
    )
    search_settings_fetch = result.fetchall()
    search_settings = (
        SearchSettings(**search_settings_fetch[0]._asdict())
        if search_settings_fetch
        else None
    )

    result2 = op.get_bind().execute(
        sa.text(
            """
        SELECT * FROM search_settings WHERE status = 'FUTURE' ORDER BY id DESC LIMIT 1
        """
        )
    )
    search_settings_future_fetch = result2.fetchall()
    search_settings_future = (
        SearchSettings(**search_settings_future_fetch[0]._asdict())
        if search_settings_future_fetch
        else None
    )

    if not isinstance(search_settings, SearchSettings):
        raise RuntimeError(
            "current search settings is of type " + str(type(search_settings))
        )
    if (
        not isinstance(search_settings_future, SearchSettings)
        and search_settings_future is not None
    ):
        raise RuntimeError(
            "future search settings is of type " + str(type(search_settings_future))
        )

    return search_settings, search_settings_future


def _get_batch_documents_with_multiple_tags(
    batch_size: int = 128,
) -> Generator[list[str], None, None]:
    """
    Returns a list of document ids which contain a one to many tag.
    The document may either contain a list metadata value, or may contain leftover
    old tags from reindexing.
    """
    offset_clause = ""
    bind = op.get_bind()

    while True:
        batch = bind.execute(
            sa.text(
                f"""
                SELECT DISTINCT document__tag.document_id
                FROM tag
                JOIN document__tag ON tag.id = document__tag.tag_id
                GROUP BY tag.tag_key, tag.source, document__tag.document_id
                HAVING count(*) > 1 {offset_clause}
                ORDER BY document__tag.document_id
                LIMIT {batch_size}
                """
            )
        ).fetchall()
        if not batch:
            break
        doc_ids = [document_id for (document_id,) in batch]
        yield doc_ids
        offset_clause = f"AND document__tag.document_id > '{doc_ids[-1]}'"


def _get_vespa_metadata(
    document_id: str, index_name: str
) -> dict[str, str | list[str]]:
    url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)

    # Document-Selector language
    selection = (
        f"{index_name}.document_id=='{document_id}' and {index_name}.chunk_id==0"
    )

    params: dict[str, str | int] = {
        "selection": selection,
        "wantedDocumentCount": 1,
        "fieldSet": f"{index_name}:metadata",
    }

    with get_vespa_http_client() as client:
        resp = client.get(url, params=params)
        resp.raise_for_status()

    docs = resp.json().get("documents", [])
    if not docs:
        raise RuntimeError(f"No chunk-0 found for document {document_id}")

    # for some reason, metadata is a string
    metadata = docs[0]["fields"]["metadata"]
    return json.loads(metadata)


def _get_document_tags(document_id: str) -> list[tuple[int, str, str]]:
    bind = op.get_bind()
    result = bind.execute(
        sa.text(
            f"""
            SELECT tag.id, tag.tag_key, tag.tag_value
            FROM tag
            JOIN document__tag ON tag.id = document__tag.tag_id
            WHERE document__tag.document_id = '{document_id}'
            """
        )
    ).fetchall()
    return cast(list[tuple[int, str, str]], result)


def upgrade() -> None:
    op.add_column(
        "tag",
        sa.Column("is_list", sa.Boolean(), nullable=False, server_default="false"),
    )
    op.drop_constraint(
        constraint_name="_tag_key_value_source_uc",
        table_name="tag",
        type_="unique",
    )
    op.create_unique_constraint(
        constraint_name="_tag_key_value_source_list_uc",
        table_name="tag",
        columns=["tag_key", "tag_value", "source", "is_list"],
    )
    set_is_list_for_known_tags()

    if SKIP_TAG_FIX:
        logger.warning(
            "Skipping removal of old tags. "
            "This can cause issues when using the knowledge graph, or "
            "when filtering for documents by tags."
        )
        log_list_tags()
        return

    remove_old_tags()
    set_is_list_for_list_tags()

    # debug
    log_list_tags()


def downgrade() -> None:
    # the migration adds and populates the is_list column, and removes old bugged tags
    # there isn't a point in adding back the bugged tags, so we just drop the column
    op.drop_constraint(
        constraint_name="_tag_key_value_source_list_uc",
        table_name="tag",
        type_="unique",
    )
    op.create_unique_constraint(
        constraint_name="_tag_key_value_source_uc",
        table_name="tag",
        columns=["tag_key", "tag_value", "source"],
    )
    op.drop_column("tag", "is_list")


================================================
FILE: backend/alembic/versions/91a0a4d62b14_milestone.py
================================================
"""Milestone

Revision ID: 91a0a4d62b14
Revises: dab04867cd88
Create Date: 2024-12-13 19:03:30.947551

"""

from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "91a0a4d62b14"
down_revision = "dab04867cd88"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "milestone",
        sa.Column("id", sa.UUID(), nullable=False),
        sa.Column("tenant_id", sa.String(), nullable=True),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column("event_type", sa.String(), nullable=False),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("event_tracker", postgresql.JSONB(), nullable=True),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("event_type", name="uq_milestone_event_type"),
    )


def downgrade() -> None:
    op.drop_table("milestone")


================================================
FILE: backend/alembic/versions/91fd3b470d1a_remove_documentsource_from_tag.py
================================================
"""Remove DocumentSource from Tag

Revision ID: 91fd3b470d1a
Revises: 173cae5bba26
Create Date: 2024-03-21 12:05:23.956734

"""

from alembic import op
import sqlalchemy as sa
from onyx.configs.constants import DocumentSource

# revision identifiers, used by Alembic.
revision = "91fd3b470d1a"
down_revision = "173cae5bba26"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.alter_column(
        "tag",
        "source",
        type_=sa.String(length=50),
        existing_type=sa.Enum(DocumentSource, native_enum=False),
        existing_nullable=False,
    )


def downgrade() -> None:
    op.alter_column(
        "tag",
        "source",
        type_=sa.Enum(DocumentSource, native_enum=False),
        existing_type=sa.String(length=50),
        existing_nullable=False,
    )


================================================
FILE: backend/alembic/versions/91ffac7e65b3_add_expiry_time.py
================================================
"""add expiry time

Revision ID: 91ffac7e65b3
Revises: bc9771dccadf
Create Date: 2024-06-24 09:39:56.462242

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "91ffac7e65b3"
down_revision = "795b20b85b4b"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "user", sa.Column("oidc_expiry", sa.DateTime(timezone=True), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("user", "oidc_expiry")


================================================
FILE: backend/alembic/versions/93560ba1b118_add_web_ui_option_to_slack_config.py
================================================
"""add web ui option to slack config

Revision ID: 93560ba1b118
Revises: 6d562f86c78b
Create Date: 2024-11-24 06:36:17.490612

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "93560ba1b118"
down_revision = "6d562f86c78b"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add show_continue_in_web_ui with default False to all existing channel_configs
    op.execute(
        """
        UPDATE slack_channel_config
        SET channel_config = channel_config || '{"show_continue_in_web_ui": false}'::jsonb
        WHERE NOT channel_config ? 'show_continue_in_web_ui'
        """
    )


def downgrade() -> None:
    # Remove show_continue_in_web_ui from all channel_configs
    op.execute(
        """
        UPDATE slack_channel_config
        SET channel_config = channel_config - 'show_continue_in_web_ui'
        """
    )


================================================
FILE: backend/alembic/versions/93a2e195e25c_add_voice_provider_and_user_voice_prefs.py
================================================
"""add_voice_provider_and_user_voice_prefs

Revision ID: 93a2e195e25c
Revises: 27fb147a843f
Create Date: 2026-02-23 15:16:39.507304

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy import column
from sqlalchemy import true
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "93a2e195e25c"
down_revision = "27fb147a843f"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create voice_provider table
    op.create_table(
        "voice_provider",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("name", sa.String(), unique=True, nullable=False),
        sa.Column("provider_type", sa.String(), nullable=False),
        sa.Column("api_key", sa.LargeBinary(), nullable=True),
        sa.Column("api_base", sa.String(), nullable=True),
        sa.Column("custom_config", postgresql.JSONB(), nullable=True),
        sa.Column("stt_model", sa.String(), nullable=True),
        sa.Column("tts_model", sa.String(), nullable=True),
        sa.Column("default_voice", sa.String(), nullable=True),
        sa.Column(
            "is_default_stt", sa.Boolean(), nullable=False, server_default="false"
        ),
        sa.Column(
            "is_default_tts", sa.Boolean(), nullable=False, server_default="false"
        ),
        sa.Column("deleted", sa.Boolean(), nullable=False, server_default="false"),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            onupdate=sa.func.now(),
            nullable=False,
        ),
    )

    # Add partial unique indexes to enforce only one default STT/TTS provider
    op.create_index(
        "ix_voice_provider_one_default_stt",
        "voice_provider",
        ["is_default_stt"],
        unique=True,
        postgresql_where=column("is_default_stt") == true(),
    )
    op.create_index(
        "ix_voice_provider_one_default_tts",
        "voice_provider",
        ["is_default_tts"],
        unique=True,
        postgresql_where=column("is_default_tts") == true(),
    )

    # Add voice preference columns to user table
    op.add_column(
        "user",
        sa.Column(
            "voice_auto_send",
            sa.Boolean(),
            default=False,
            nullable=False,
            server_default="false",
        ),
    )
    op.add_column(
        "user",
        sa.Column(
            "voice_auto_playback",
            sa.Boolean(),
            default=False,
            nullable=False,
            server_default="false",
        ),
    )
    op.add_column(
        "user",
        sa.Column(
            "voice_playback_speed",
            sa.Float(),
            default=1.0,
            nullable=False,
            server_default="1.0",
        ),
    )


def downgrade() -> None:
    # Remove user voice preference columns
    op.drop_column("user", "voice_playback_speed")
    op.drop_column("user", "voice_auto_playback")
    op.drop_column("user", "voice_auto_send")

    op.drop_index("ix_voice_provider_one_default_tts", table_name="voice_provider")
    op.drop_index("ix_voice_provider_one_default_stt", table_name="voice_provider")

    # Drop voice_provider table
    op.drop_table("voice_provider")


================================================
FILE: backend/alembic/versions/93c15d6a6fbb_add_chunk_error_and_vespa_count_columns_.py
================================================
"""add chunk error and vespa count columns to opensearch tenant migration

Revision ID: 93c15d6a6fbb
Revises: d3fd499c829c
Create Date: 2026-02-11 23:07:34.576725

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "93c15d6a6fbb"
down_revision = "d3fd499c829c"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "opensearch_tenant_migration_record",
        sa.Column(
            "total_chunks_errored",
            sa.Integer(),
            nullable=False,
            server_default="0",
        ),
    )
    op.add_column(
        "opensearch_tenant_migration_record",
        sa.Column(
            "total_chunks_in_vespa",
            sa.Integer(),
            nullable=False,
            server_default="0",
        ),
    )


def downgrade() -> None:
    op.drop_column("opensearch_tenant_migration_record", "total_chunks_in_vespa")
    op.drop_column("opensearch_tenant_migration_record", "total_chunks_errored")


================================================
FILE: backend/alembic/versions/949b4a92a401_remove_rt.py
================================================
"""remove rt

Revision ID: 949b4a92a401
Revises: 1b10e1fda030
Create Date: 2024-10-26 13:06:06.937969

"""

from alembic import op
from sqlalchemy.orm import Session
from sqlalchemy import text

# Import your models and constants
from onyx.db.models import (
    Connector,
    ConnectorCredentialPair,
    Credential,
    IndexAttempt,
)


# revision identifiers, used by Alembic.
revision = "949b4a92a401"
down_revision = "1b10e1fda030"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Deletes all RequestTracker connectors and associated data
    bind = op.get_bind()
    session = Session(bind=bind)

    # Get connectors using raw SQL
    result = bind.execute(
        text("SELECT id FROM connector WHERE source = 'requesttracker'")
    )
    connector_ids = [row[0] for row in result]

    if connector_ids:
        cc_pairs_to_delete = (
            session.query(ConnectorCredentialPair)
            .filter(ConnectorCredentialPair.connector_id.in_(connector_ids))
            .all()
        )

        cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs_to_delete]

        if cc_pair_ids:
            session.query(IndexAttempt).filter(
                IndexAttempt.connector_credential_pair_id.in_(cc_pair_ids)
            ).delete(synchronize_session=False)

            session.query(ConnectorCredentialPair).filter(
                ConnectorCredentialPair.id.in_(cc_pair_ids)
            ).delete(synchronize_session=False)

        credential_ids = [cc_pair.credential_id for cc_pair in cc_pairs_to_delete]
        if credential_ids:
            session.query(Credential).filter(Credential.id.in_(credential_ids)).delete(
                synchronize_session=False
            )

        session.query(Connector).filter(Connector.id.in_(connector_ids)).delete(
            synchronize_session=False
        )

    session.commit()


def downgrade() -> None:
    # No-op downgrade as we cannot restore deleted data
    pass


================================================
FILE: backend/alembic/versions/94dc3d0236f8_make_document_set_description_optional.py
================================================
"""make document set description optional

Revision ID: 94dc3d0236f8
Revises: bf7a81109301
Create Date: 2024-12-11 11:26:10.616722

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "94dc3d0236f8"
down_revision = "bf7a81109301"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Make document_set.description column nullable
    op.alter_column(
        "document_set", "description", existing_type=sa.String(), nullable=True
    )


def downgrade() -> None:
    # Revert document_set.description column to non-nullable
    op.alter_column(
        "document_set", "description", existing_type=sa.String(), nullable=False
    )


================================================
FILE: backend/alembic/versions/96a5702df6aa_mcp_tool_enabled.py
================================================
"""mcp_tool_enabled

Revision ID: 96a5702df6aa
Revises: 40926a4dab77
Create Date: 2025-10-09 12:10:21.733097

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "96a5702df6aa"
down_revision = "40926a4dab77"
branch_labels = None
depends_on = None


DELETE_DISABLED_TOOLS_SQL = "DELETE FROM tool WHERE enabled = false"


def upgrade() -> None:
    op.add_column(
        "tool",
        sa.Column(
            "enabled",
            sa.Boolean(),
            nullable=False,
            server_default=sa.true(),
        ),
    )
    op.create_index(
        "ix_tool_mcp_server_enabled",
        "tool",
        ["mcp_server_id", "enabled"],
    )
    # Remove the server default so application controls defaulting
    op.alter_column("tool", "enabled", server_default=None)


def downgrade() -> None:
    op.execute(DELETE_DISABLED_TOOLS_SQL)
    op.drop_index("ix_tool_mcp_server_enabled", table_name="tool")
    op.drop_column("tool", "enabled")


================================================
FILE: backend/alembic/versions/977e834c1427_seed_default_groups.py
================================================
"""seed_default_groups

Revision ID: 977e834c1427
Revises: 8188861f4e92
Create Date: 2026-03-25 14:59:41.313091

"""

from typing import Any

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import insert as pg_insert


# revision identifiers, used by Alembic.
revision = "977e834c1427"
down_revision = "8188861f4e92"
branch_labels = None
depends_on = None

# (group_name, permission_value)
DEFAULT_GROUPS = [
    ("Admin", "admin"),
    ("Basic", "basic"),
]

CUSTOM_SUFFIX = "(Custom)"

MAX_RENAME_ATTEMPTS = 100

# Reflect table structures for use in DML
user_group_table = sa.table(
    "user_group",
    sa.column("id", sa.Integer),
    sa.column("name", sa.String),
    sa.column("is_up_to_date", sa.Boolean),
    sa.column("is_up_for_deletion", sa.Boolean),
    sa.column("is_default", sa.Boolean),
)

permission_grant_table = sa.table(
    "permission_grant",
    sa.column("group_id", sa.Integer),
    sa.column("permission", sa.String),
    sa.column("grant_source", sa.String),
)

user__user_group_table = sa.table(
    "user__user_group",
    sa.column("user_group_id", sa.Integer),
    sa.column("user_id", sa.Uuid),
)


def _find_available_name(conn: sa.engine.Connection, base: str) -> str:
    """Return a name like 'Admin (Custom)' or 'Admin (Custom 2)' that is not taken."""
    candidate = f"{base} {CUSTOM_SUFFIX}"
    attempt = 1
    while attempt <= MAX_RENAME_ATTEMPTS:
        exists: Any = conn.execute(
            sa.select(sa.literal(1))
            .select_from(user_group_table)
            .where(user_group_table.c.name == candidate)
            .limit(1)
        ).fetchone()
        if exists is None:
            return candidate
        attempt += 1
        candidate = f"{base} (Custom {attempt})"
    raise RuntimeError(
        f"Could not find an available name for group '{base}' "
        f"after {MAX_RENAME_ATTEMPTS} attempts"
    )


def upgrade() -> None:
    conn = op.get_bind()

    for group_name, permission_value in DEFAULT_GROUPS:
        # Step 1: Rename ALL existing groups that clash with the canonical name.
        conflicting = conn.execute(
            sa.select(user_group_table.c.id, user_group_table.c.name).where(
                user_group_table.c.name == group_name
            )
        ).fetchall()

        for row_id, row_name in conflicting:
            new_name = _find_available_name(conn, row_name)
            op.execute(
                sa.update(user_group_table)
                .where(user_group_table.c.id == row_id)
                .values(name=new_name, is_up_to_date=False)
            )

        # Step 2: Create a fresh default group.
        result = conn.execute(
            user_group_table.insert()
            .values(
                name=group_name,
                is_up_to_date=True,
                is_up_for_deletion=False,
                is_default=True,
            )
            .returning(user_group_table.c.id)
        ).fetchone()
        assert result is not None
        group_id = result[0]

        # Step 3: Upsert permission grant.
        op.execute(
            pg_insert(permission_grant_table)
            .values(
                group_id=group_id,
                permission=permission_value,
                grant_source="SYSTEM",
            )
            .on_conflict_do_nothing(index_elements=["group_id", "permission"])
        )


def downgrade() -> None:
    # Remove the default groups created by this migration.
    # First remove user-group memberships that reference default groups
    # to avoid FK violations, then delete the groups themselves.
    default_group_ids = sa.select(user_group_table.c.id).where(
        user_group_table.c.is_default == True  # noqa: E712
    )
    conn = op.get_bind()
    conn.execute(
        sa.delete(user__user_group_table).where(
            user__user_group_table.c.user_group_id.in_(default_group_ids)
        )
    )
    conn.execute(
        sa.delete(user_group_table).where(
            user_group_table.c.is_default == True  # noqa: E712
        )
    )


================================================
FILE: backend/alembic/versions/97dbb53fa8c8_add_syncrecord.py
================================================
"""Add SyncRecord

Revision ID: 97dbb53fa8c8
Revises: 369644546676
Create Date: 2025-01-11 19:39:50.426302

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "97dbb53fa8c8"
down_revision = "be2ab2aa50ee"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "sync_record",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("entity_id", sa.Integer(), nullable=False),
        sa.Column(
            "sync_type",
            sa.Enum(
                "DOCUMENT_SET",
                "USER_GROUP",
                "CONNECTOR_DELETION",
                name="synctype",
                native_enum=False,
                length=40,
            ),
            nullable=False,
        ),
        sa.Column(
            "sync_status",
            sa.Enum(
                "IN_PROGRESS",
                "SUCCESS",
                "FAILED",
                "CANCELED",
                name="syncstatus",
                native_enum=False,
                length=40,
            ),
            nullable=False,
        ),
        sa.Column("num_docs_synced", sa.Integer(), nullable=False),
        sa.Column("sync_start_time", sa.DateTime(timezone=True), nullable=False),
        sa.Column("sync_end_time", sa.DateTime(timezone=True), nullable=True),
        sa.PrimaryKeyConstraint("id"),
    )

    # Add index for fetch_latest_sync_record query
    op.create_index(
        "ix_sync_record_entity_id_sync_type_sync_start_time",
        "sync_record",
        ["entity_id", "sync_type", "sync_start_time"],
    )

    # Add index for cleanup_sync_records query
    op.create_index(
        "ix_sync_record_entity_id_sync_type_sync_status",
        "sync_record",
        ["entity_id", "sync_type", "sync_status"],
    )


def downgrade() -> None:
    op.drop_index("ix_sync_record_entity_id_sync_type_sync_status")
    op.drop_index("ix_sync_record_entity_id_sync_type_sync_start_time")
    op.drop_table("sync_record")


================================================
FILE: backend/alembic/versions/98a5008d8711_agent_tracking.py
================================================
"""agent_tracking

Revision ID: 98a5008d8711
Revises: 2f80c6a2550f
Create Date: 2025-01-29 17:00:00.000001

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import UUID

# revision identifiers, used by Alembic.
revision = "98a5008d8711"
down_revision = "2f80c6a2550f"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "agent__search_metrics",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
        sa.Column("persona_id", sa.Integer(), nullable=True),
        sa.Column("agent_type", sa.String(), nullable=False),
        sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
        sa.Column("base_duration_s", sa.Float(), nullable=False),
        sa.Column("full_duration_s", sa.Float(), nullable=False),
        sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
        sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
        sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
    )

    # Create sub_question table
    op.create_table(
        "agent__sub_question",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column("primary_question_id", sa.Integer, sa.ForeignKey("chat_message.id")),
        sa.Column(
            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
        ),
        sa.Column("sub_question", sa.Text),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
        ),
        sa.Column("sub_answer", sa.Text),
        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=True),
        sa.Column("level", sa.Integer(), nullable=False),
        sa.Column("level_question_num", sa.Integer(), nullable=False),
    )

    # Create sub_query table
    op.create_table(
        "agent__sub_query",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column(
            "parent_question_id", sa.Integer, sa.ForeignKey("agent__sub_question.id")
        ),
        sa.Column(
            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
        ),
        sa.Column("sub_query", sa.Text),
        sa.Column(
            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
        ),
    )

    # Create sub_query__search_doc association table
    op.create_table(
        "agent__sub_query__search_doc",
        sa.Column(
            "sub_query_id",
            sa.Integer,
            sa.ForeignKey("agent__sub_query.id"),
            primary_key=True,
        ),
        sa.Column(
            "search_doc_id",
            sa.Integer,
            sa.ForeignKey("search_doc.id"),
            primary_key=True,
        ),
    )

    op.add_column(
        "chat_message",
        sa.Column(
            "refined_answer_improvement",
            sa.Boolean(),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("chat_message", "refined_answer_improvement")
    op.drop_table("agent__sub_query__search_doc")
    op.drop_table("agent__sub_query")
    op.drop_table("agent__sub_question")
    op.drop_table("agent__search_metrics")


================================================
FILE: backend/alembic/versions/9a0296d7421e_add_is_auto_mode_to_llm_provider.py
================================================
"""add_is_auto_mode_to_llm_provider

Revision ID: 9a0296d7421e
Revises: 7206234e012a
Create Date: 2025-12-17 18:14:29.620981

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "9a0296d7421e"
down_revision = "7206234e012a"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "llm_provider",
        sa.Column(
            "is_auto_mode",
            sa.Boolean(),
            nullable=False,
            server_default="false",
        ),
    )


def downgrade() -> None:
    op.drop_column("llm_provider", "is_auto_mode")


================================================
FILE: backend/alembic/versions/9aadf32dfeb4_add_user_files.py
================================================
"""add user files

Revision ID: 9aadf32dfeb4
Revises: 3781a5eb12cb
Create Date: 2025-01-26 16:08:21.551022

"""

import sqlalchemy as sa
import datetime
from alembic import op


# revision identifiers, used by Alembic.
revision = "9aadf32dfeb4"
down_revision = "3781a5eb12cb"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create user_folder table without parent_id
    op.create_table(
        "user_folder",
        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
        sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
        sa.Column("name", sa.String(length=255), nullable=True),
        sa.Column("description", sa.String(length=255), nullable=True),
        sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
        sa.Column(
            "created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
        ),
    )

    # Create user_file table with folder_id instead of parent_folder_id
    op.create_table(
        "user_file",
        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
        sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
        sa.Column(
            "folder_id",
            sa.Integer(),
            sa.ForeignKey("user_folder.id"),
            nullable=True,
        ),
        sa.Column("link_url", sa.String(), nullable=True),
        sa.Column("token_count", sa.Integer(), nullable=True),
        sa.Column("file_type", sa.String(), nullable=True),
        sa.Column("file_id", sa.String(length=255), nullable=False),
        sa.Column("document_id", sa.String(length=255), nullable=False),
        sa.Column("name", sa.String(length=255), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(),
            default=datetime.datetime.utcnow,
        ),
        sa.Column(
            "cc_pair_id",
            sa.Integer(),
            sa.ForeignKey("connector_credential_pair.id"),
            nullable=True,
            unique=True,
        ),
    )

    # Create persona__user_file table
    op.create_table(
        "persona__user_file",
        sa.Column(
            "persona_id", sa.Integer(), sa.ForeignKey("persona.id"), primary_key=True
        ),
        sa.Column(
            "user_file_id",
            sa.Integer(),
            sa.ForeignKey("user_file.id"),
            primary_key=True,
        ),
    )

    # Create persona__user_folder table
    op.create_table(
        "persona__user_folder",
        sa.Column(
            "persona_id", sa.Integer(), sa.ForeignKey("persona.id"), primary_key=True
        ),
        sa.Column(
            "user_folder_id",
            sa.Integer(),
            sa.ForeignKey("user_folder.id"),
            primary_key=True,
        ),
    )

    op.add_column(
        "connector_credential_pair",
        sa.Column("is_user_file", sa.Boolean(), nullable=True, default=False),
    )

    # Update existing records to have is_user_file=False instead of NULL
    op.execute(
        "UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL"
    )


def downgrade() -> None:
    op.drop_column("connector_credential_pair", "is_user_file")
    # Drop the persona__user_folder table
    op.drop_table("persona__user_folder")
    # Drop the persona__user_file table
    op.drop_table("persona__user_file")
    # Drop the user_file table
    op.drop_table("user_file")
    # Drop the user_folder table
    op.drop_table("user_folder")


================================================
FILE: backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
================================================
"""Migration 1: User file schema additions

Revision ID: 9b66d3156fc6
Revises: b4ef3ae0bf6e
Create Date: 2025-09-22 09:42:06.086732

This migration adds new columns and tables without modifying existing data.
It is safe to run and can be easily rolled back.
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql as psql
import logging

logger = logging.getLogger("alembic.runtime.migration")
# revision identifiers, used by Alembic.
revision = "9b66d3156fc6"
down_revision = "b4ef3ae0bf6e"
branch_labels = None
depends_on = None


def upgrade() -> None:
    """Add new columns and tables without modifying existing data."""

    # Enable pgcrypto for UUID generation
    op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto")

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    # === USER_FILE: Add new columns ===
    logger.info("Adding new columns to user_file table...")

    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]

    # Check if ID is already UUID (in case of re-run after partial migration)
    id_is_uuid = any(
        col["name"] == "id" and "uuid" in str(col["type"]).lower()
        for col in inspector.get_columns("user_file")
    )

    # Add transitional UUID column only if ID is not already UUID
    if "new_id" not in user_file_columns and not id_is_uuid:
        op.add_column(
            "user_file",
            sa.Column(
                "new_id",
                psql.UUID(as_uuid=True),
                nullable=True,
                server_default=sa.text("gen_random_uuid()"),
            ),
        )
        op.create_unique_constraint("uq_user_file_new_id", "user_file", ["new_id"])
        logger.info("Added new_id column to user_file")

    # Add status column
    if "status" not in user_file_columns:
        op.add_column(
            "user_file",
            sa.Column(
                "status",
                sa.Enum(
                    "PROCESSING",
                    "COMPLETED",
                    "FAILED",
                    "CANCELED",
                    name="userfilestatus",
                    native_enum=False,
                ),
                nullable=False,
                server_default="PROCESSING",
            ),
        )
        logger.info("Added status column to user_file")

    # Add other tracking columns
    if "chunk_count" not in user_file_columns:
        op.add_column(
            "user_file", sa.Column("chunk_count", sa.Integer(), nullable=True)
        )
        logger.info("Added chunk_count column to user_file")

    if "last_accessed_at" not in user_file_columns:
        op.add_column(
            "user_file",
            sa.Column("last_accessed_at", sa.DateTime(timezone=True), nullable=True),
        )
        logger.info("Added last_accessed_at column to user_file")

    if "needs_project_sync" not in user_file_columns:
        op.add_column(
            "user_file",
            sa.Column(
                "needs_project_sync",
                sa.Boolean(),
                nullable=False,
                server_default=sa.text("false"),
            ),
        )
        logger.info("Added needs_project_sync column to user_file")

    if "last_project_sync_at" not in user_file_columns:
        op.add_column(
            "user_file",
            sa.Column(
                "last_project_sync_at", sa.DateTime(timezone=True), nullable=True
            ),
        )
        logger.info("Added last_project_sync_at column to user_file")

    if "document_id_migrated" not in user_file_columns:
        op.add_column(
            "user_file",
            sa.Column(
                "document_id_migrated",
                sa.Boolean(),
                nullable=False,
                server_default=sa.text("true"),
            ),
        )
        logger.info("Added document_id_migrated column to user_file")

    # === USER_FOLDER -> USER_PROJECT rename ===
    table_names = set(inspector.get_table_names())

    if "user_folder" in table_names:
        logger.info("Updating user_folder table...")
        # Make description nullable first
        op.alter_column("user_folder", "description", nullable=True)

        # Rename table if user_project doesn't exist
        if "user_project" not in table_names:
            op.execute("ALTER TABLE user_folder RENAME TO user_project")
            logger.info("Renamed user_folder to user_project")
    elif "user_project" in table_names:
        # If already renamed, ensure column nullability
        project_cols = [col["name"] for col in inspector.get_columns("user_project")]
        if "description" in project_cols:
            op.alter_column("user_project", "description", nullable=True)

    # Add instructions column to user_project
    inspector = sa.inspect(bind)  # Refresh after rename
    if "user_project" in inspector.get_table_names():
        project_columns = [col["name"] for col in inspector.get_columns("user_project")]
        if "instructions" not in project_columns:
            op.add_column(
                "user_project",
                sa.Column("instructions", sa.String(), nullable=True),
            )
            logger.info("Added instructions column to user_project")

    # === CHAT_SESSION: Add project_id ===
    chat_session_columns = [
        col["name"] for col in inspector.get_columns("chat_session")
    ]
    if "project_id" not in chat_session_columns:
        op.add_column(
            "chat_session",
            sa.Column("project_id", sa.Integer(), nullable=True),
        )
        logger.info("Added project_id column to chat_session")

    # === PERSONA__USER_FILE: Add UUID column ===
    persona_user_file_columns = [
        col["name"] for col in inspector.get_columns("persona__user_file")
    ]
    if "user_file_id_uuid" not in persona_user_file_columns:
        op.add_column(
            "persona__user_file",
            sa.Column("user_file_id_uuid", psql.UUID(as_uuid=True), nullable=True),
        )
        logger.info("Added user_file_id_uuid column to persona__user_file")

    # === PROJECT__USER_FILE: Create new table ===
    if "project__user_file" not in inspector.get_table_names():
        op.create_table(
            "project__user_file",
            sa.Column("project_id", sa.Integer(), nullable=False),
            sa.Column("user_file_id", psql.UUID(as_uuid=True), nullable=False),
            sa.PrimaryKeyConstraint("project_id", "user_file_id"),
        )
        logger.info("Created project__user_file table")

    # Only create the index if it doesn't exist
    existing_indexes = [
        ix["name"] for ix in inspector.get_indexes("project__user_file")
    ]
    if "idx_project__user_file_user_file_id" not in existing_indexes:
        op.create_index(
            "idx_project__user_file_user_file_id",
            "project__user_file",
            ["user_file_id"],
        )
        logger.info(
            "Created index idx_project__user_file_user_file_id on project__user_file"
        )

    logger.info("Migration 1 (schema additions) completed successfully")


def downgrade() -> None:
    """Remove added columns and tables."""

    bind = op.get_bind()
    inspector = sa.inspect(bind)

    logger.info("Starting downgrade of schema additions...")

    # Drop project__user_file table
    if "project__user_file" in inspector.get_table_names():
        # op.drop_index("idx_project__user_file_user_file_id", "project__user_file")
        op.drop_table("project__user_file")
        logger.info("Dropped project__user_file table")

    # Remove columns from persona__user_file
    if "persona__user_file" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("persona__user_file")]
        if "user_file_id_uuid" in columns:
            op.drop_column("persona__user_file", "user_file_id_uuid")
            logger.info("Dropped user_file_id_uuid from persona__user_file")

    # Remove columns from chat_session
    if "chat_session" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("chat_session")]
        if "project_id" in columns:
            op.drop_column("chat_session", "project_id")
            logger.info("Dropped project_id from chat_session")

    # Rename user_project back to user_folder and remove instructions
    if "user_project" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("user_project")]
        if "instructions" in columns:
            op.drop_column("user_project", "instructions")
        op.execute("ALTER TABLE user_project RENAME TO user_folder")
        # Update NULL descriptions to empty string before setting NOT NULL constraint
        op.execute("UPDATE user_folder SET description = '' WHERE description IS NULL")
        op.alter_column("user_folder", "description", nullable=False)
        logger.info("Renamed user_project back to user_folder")

    # Remove columns from user_file
    if "user_file" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("user_file")]

        columns_to_drop = [
            "document_id_migrated",
            "last_project_sync_at",
            "needs_project_sync",
            "last_accessed_at",
            "chunk_count",
            "status",
        ]

        for col in columns_to_drop:
            if col in columns:
                op.drop_column("user_file", col)
                logger.info(f"Dropped {col} from user_file")

        if "new_id" in columns:
            op.drop_constraint("uq_user_file_new_id", "user_file", type_="unique")
            op.drop_column("user_file", "new_id")
            logger.info("Dropped new_id from user_file")

    # Drop enum type if no columns use it
    bind.execute(sa.text("DROP TYPE IF EXISTS userfilestatus"))

    logger.info("Downgrade completed successfully")


================================================
FILE: backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py
================================================
"""chat_message_agentic

Revision ID: 9c00a2bccb83
Revises: b7a7eee5aa15
Create Date: 2025-02-17 11:15:43.081150

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "9c00a2bccb83"
down_revision = "b7a7eee5aa15"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # First add the column as nullable
    op.add_column("chat_message", sa.Column("is_agentic", sa.Boolean(), nullable=True))

    # Update existing rows based on presence of SubQuestions
    op.execute(
        """
        UPDATE chat_message
        SET is_agentic = EXISTS (
            SELECT 1
            FROM agent__sub_question
            WHERE agent__sub_question.primary_question_id = chat_message.id
        )
        WHERE is_agentic IS NULL
    """
    )

    # Make the column non-nullable with a default value of False
    op.alter_column(
        "chat_message", "is_agentic", nullable=False, server_default=sa.text("false")
    )


def downgrade() -> None:
    op.drop_column("chat_message", "is_agentic")


================================================
FILE: backend/alembic/versions/9c54986124c6_add_scim_tables.py
================================================
"""add_scim_tables

Revision ID: 9c54986124c6
Revises: b51c6844d1df
Create Date: 2026-02-12 20:29:47.448614

"""

from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "9c54986124c6"
down_revision = "b51c6844d1df"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "scim_token",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("hashed_token", sa.String(length=64), nullable=False),
        sa.Column("token_display", sa.String(), nullable=False),
        sa.Column(
            "created_by_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.Column(
            "is_active",
            sa.Boolean(),
            server_default=sa.text("true"),
            nullable=False,
        ),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("last_used_at", sa.DateTime(timezone=True), nullable=True),
        sa.ForeignKeyConstraint(["created_by_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("hashed_token"),
    )
    op.create_table(
        "scim_group_mapping",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("external_id", sa.String(), nullable=False),
        sa.Column("user_group_id", sa.Integer(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            onupdate=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["user_group_id"], ["user_group.id"], ondelete="CASCADE"
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("user_group_id"),
    )
    op.create_index(
        op.f("ix_scim_group_mapping_external_id"),
        "scim_group_mapping",
        ["external_id"],
        unique=True,
    )
    op.create_table(
        "scim_user_mapping",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("external_id", sa.String(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            onupdate=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("user_id"),
    )
    op.create_index(
        op.f("ix_scim_user_mapping_external_id"),
        "scim_user_mapping",
        ["external_id"],
        unique=True,
    )


def downgrade() -> None:
    op.drop_index(
        op.f("ix_scim_user_mapping_external_id"),
        table_name="scim_user_mapping",
    )
    op.drop_table("scim_user_mapping")
    op.drop_index(
        op.f("ix_scim_group_mapping_external_id"),
        table_name="scim_group_mapping",
    )
    op.drop_table("scim_group_mapping")
    op.drop_table("scim_token")


================================================
FILE: backend/alembic/versions/9cf5c00f72fe_add_creator_to_cc_pair.py
================================================
"""add creator to cc pair

Revision ID: 9cf5c00f72fe
Revises: 26b931506ecb
Create Date: 2024-11-12 15:16:42.682902

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "9cf5c00f72fe"
down_revision = "26b931506ecb"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "creator_id",
            sa.UUID(as_uuid=True),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("connector_credential_pair", "creator_id")


================================================
FILE: backend/alembic/versions/9d1543a37106_add_processing_duration_seconds_to_chat_.py
================================================
"""add processing_duration_seconds to chat_message

Revision ID: 9d1543a37106
Revises: cbc03e08d0f3
Create Date: 2026-01-21 11:42:18.546188

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "9d1543a37106"
down_revision = "cbc03e08d0f3"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "chat_message",
        sa.Column("processing_duration_seconds", sa.Float(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("chat_message", "processing_duration_seconds")


================================================
FILE: backend/alembic/versions/9d97fecfab7f_added_retrieved_docs_to_query_event.py
================================================
"""Added retrieved docs to query event

Revision ID: 9d97fecfab7f
Revises: ffc707a226b4
Create Date: 2023-10-20 12:22:31.930449

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "9d97fecfab7f"
down_revision = "ffc707a226b4"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "query_event",
        sa.Column(
            "retrieved_document_ids",
            postgresql.ARRAY(sa.String()),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("query_event", "retrieved_document_ids")


================================================
FILE: backend/alembic/versions/9drpiiw74ljy_add_config_to_federated_connector.py
================================================
"""add config to federated_connector

Revision ID: 9drpiiw74ljy
Revises: 2acdef638fc2
Create Date: 2025-11-03 12:00:00.000000

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "9drpiiw74ljy"
down_revision = "2acdef638fc2"
branch_labels = None
depends_on = None


def upgrade() -> None:
    connection = op.get_bind()

    # Check if column already exists in current schema
    result = connection.execute(
        sa.text(
            """
            SELECT column_name
            FROM information_schema.columns
            WHERE table_schema = current_schema()
            AND table_name = 'federated_connector'
            AND column_name = 'config'
            """
        )
    )
    column_exists = result.fetchone() is not None

    # Add config column with default empty object (only if it doesn't exist)
    if not column_exists:
        op.add_column(
            "federated_connector",
            sa.Column(
                "config", postgresql.JSONB(), nullable=False, server_default="{}"
            ),
        )

    # Data migration: Single bulk update for all Slack connectors
    connection.execute(
        sa.text(
            """
            WITH connector_configs AS (
                SELECT
                    fc.id as connector_id,
                    CASE
                        WHEN fcds.entities->'channels' IS NOT NULL
                            AND jsonb_typeof(fcds.entities->'channels') = 'array'
                            AND jsonb_array_length(fcds.entities->'channels') > 0
                        THEN
                            jsonb_build_object(
                                'channels', fcds.entities->'channels',
                                'search_all_channels', false
                            ) ||
                            CASE
                                WHEN fcds.entities->'include_dm' IS NOT NULL
                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
                                ELSE '{}'::jsonb
                            END
                        ELSE
                            jsonb_build_object('search_all_channels', true) ||
                            CASE
                                WHEN fcds.entities->'include_dm' IS NOT NULL
                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
                                ELSE '{}'::jsonb
                            END
                    END as config
                FROM federated_connector fc
                LEFT JOIN LATERAL (
                    SELECT entities
                    FROM federated_connector__document_set
                    WHERE federated_connector_id = fc.id
                    AND entities IS NOT NULL
                    ORDER BY id
                    LIMIT 1
                ) fcds ON true
                WHERE fc.source = 'FEDERATED_SLACK'
                AND fcds.entities IS NOT NULL
            )
            UPDATE federated_connector fc
            SET config = cc.config
            FROM connector_configs cc
            WHERE fc.id = cc.connector_id
            """
        )
    )


def downgrade() -> None:
    op.drop_column("federated_connector", "config")


================================================
FILE: backend/alembic/versions/9f696734098f_combine_search_and_chat.py
================================================
"""Combine Search and Chat

Revision ID: 9f696734098f
Revises: a8c2065484e6
Create Date: 2024-11-27 15:32:19.694972

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "9f696734098f"
down_revision = "a8c2065484e6"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.alter_column("chat_session", "description", nullable=True)
    op.drop_column("chat_session", "one_shot")
    op.drop_column("slack_channel_config", "response_type")


def downgrade() -> None:
    op.execute("UPDATE chat_session SET description = '' WHERE description IS NULL")
    op.alter_column("chat_session", "description", nullable=False)
    op.add_column(
        "chat_session",
        sa.Column("one_shot", sa.Boolean(), nullable=False, server_default=sa.false()),
    )
    op.add_column(
        "slack_channel_config",
        sa.Column(
            "response_type", sa.String(), nullable=False, server_default="citations"
        ),
    )


================================================
FILE: backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py
================================================
"""update_default_tool_descriptions

Revision ID: a01bf2971c5d
Revises: 87c52ec39f84
Create Date: 2025-12-16 15:21:25.656375

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "a01bf2971c5d"
down_revision = "18b5b2524446"
branch_labels = None
depends_on = None

# new tool descriptions (12/2025)
TOOL_DESCRIPTIONS = {
    "SearchTool": "The Search Action allows the agent to search through connected knowledge to help build an answer.",
    "ImageGenerationTool": (
        "The Image Generation Action allows the agent to use DALL-E 3 or GPT-IMAGE-1 to generate images. "
        "The action will be used when the user asks the agent to generate an image."
    ),
    "WebSearchTool": (
        "The Web Search Action allows the agent to perform internet searches for up-to-date information."
    ),
    "KnowledgeGraphTool": (
        "The Knowledge Graph Search Action allows the agent to search the "
        "Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Agent, "
        "and it requires the Knowledge Graph to be enabled."
    ),
    "OktaProfileTool": (
        "The Okta Profile Action allows the agent to fetch the current user's information from Okta. "
        "This may include the user's name, email, phone number, address, and other details such as their "
        "manager and direct reports."
    ),
}


def upgrade() -> None:
    conn = op.get_bind()
    for tool_id, description in TOOL_DESCRIPTIONS.items():
        conn.execute(
            sa.text(
                "UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
            ),
            {"description": description, "tool_id": tool_id},
        )


def downgrade() -> None:
    pass


================================================
FILE: backend/alembic/versions/a1b2c3d4e5f6_add_license_table.py
================================================
"""add license table

Revision ID: a1b2c3d4e5f6
Revises: a01bf2971c5d
Create Date: 2025-12-04 10:00:00.000000

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "a1b2c3d4e5f6"
down_revision = "a01bf2971c5d"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "license",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("license_data", sa.Text(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
    )

    # Singleton pattern - only ever one row in this table
    op.create_index(
        "idx_license_singleton",
        "license",
        [sa.text("(true)")],
        unique=True,
    )


def downgrade() -> None:
    op.drop_index("idx_license_singleton", table_name="license")
    op.drop_table("license")


================================================
FILE: backend/alembic/versions/a1b2c3d4e5f7_drop_agent_search_metrics_table.py
================================================
"""drop agent_search_metrics table

Revision ID: a1b2c3d4e5f7
Revises: 73e9983e5091
Create Date: 2026-01-17

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "a1b2c3d4e5f7"
down_revision = "73e9983e5091"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_table("agent__search_metrics")


def downgrade() -> None:
    op.create_table(
        "agent__search_metrics",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("user_id", sa.UUID(), nullable=True),
        sa.Column("persona_id", sa.Integer(), nullable=True),
        sa.Column("agent_type", sa.String(), nullable=False),
        sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
        sa.Column("base_duration_s", sa.Float(), nullable=False),
        sa.Column("full_duration_s", sa.Float(), nullable=False),
        sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
        sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
        sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )


================================================
FILE: backend/alembic/versions/a2b3c4d5e6f7_remove_fast_default_model_name.py
================================================
"""Remove fast_default_model_name from llm_provider

Revision ID: a2b3c4d5e6f7
Revises: 2a391f840e85
Create Date: 2024-12-17

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "a2b3c4d5e6f7"
down_revision = "2a391f840e85"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.drop_column("llm_provider", "fast_default_model_name")


def downgrade() -> None:
    op.add_column(
        "llm_provider",
        sa.Column("fast_default_model_name", sa.String(), nullable=True),
    )


================================================
FILE: backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py
================================================
"""migration confluence to be explicit

Revision ID: a3795dce87be
Revises: 1f60f60c3401
Create Date: 2024-09-01 13:52:12.006740

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import table, column

revision = "a3795dce87be"
down_revision = "1f60f60c3401"
branch_labels: None = None
depends_on: None = None


def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str, str, bool]:
    from urllib.parse import urlparse

    def _extract_confluence_keys_from_cloud_url(wiki_url: str) -> tuple[str, str, str]:
        parsed_url = urlparse(wiki_url)
        wiki_base = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.split('/spaces')[0]}"
        path_parts = parsed_url.path.split("/")
        space = path_parts[3]
        page_id = path_parts[5] if len(path_parts) > 5 else ""
        return wiki_base, space, page_id

    def _extract_confluence_keys_from_datacenter_url(
        wiki_url: str,
    ) -> tuple[str, str, str]:
        DISPLAY = "/display/"
        PAGE = "/pages/"
        parsed_url = urlparse(wiki_url)
        wiki_base = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.split(DISPLAY)[0]}"
        space = DISPLAY.join(parsed_url.path.split(DISPLAY)[1:]).split("/")[0]
        page_id = ""
        if (content := parsed_url.path.split(PAGE)) and len(content) > 1:
            page_id = content[1]
        return wiki_base, space, page_id

    is_confluence_cloud = (
        ".atlassian.net/wiki/spaces/" in wiki_url
        or ".jira.com/wiki/spaces/" in wiki_url
    )

    if is_confluence_cloud:
        wiki_base, space, page_id = _extract_confluence_keys_from_cloud_url(wiki_url)
    else:
        wiki_base, space, page_id = _extract_confluence_keys_from_datacenter_url(
            wiki_url
        )

    return wiki_base, space, page_id, is_confluence_cloud


def reconstruct_confluence_url(
    wiki_base: str, space: str, page_id: str, is_cloud: bool
) -> str:
    if is_cloud:
        url = f"{wiki_base}/spaces/{space}"
        if page_id:
            url += f"/pages/{page_id}"
    else:
        url = f"{wiki_base}/display/{space}"
        if page_id:
            url += f"/pages/{page_id}"
    return url


def upgrade() -> None:
    connector = table(
        "connector",
        column("id", sa.Integer),
        column("source", sa.String()),
        column("input_type", sa.String()),
        column("connector_specific_config", postgresql.JSONB),
    )

    # Fetch all Confluence connectors
    connection = op.get_bind()
    confluence_connectors = connection.execute(
        sa.select(connector).where(
            sa.and_(
                connector.c.source == "CONFLUENCE", connector.c.input_type == "POLL"
            )
        )
    ).fetchall()

    for row in confluence_connectors:
        config = row.connector_specific_config
        wiki_page_url = config["wiki_page_url"]
        wiki_base, space, page_id, is_cloud = extract_confluence_keys_from_url(
            wiki_page_url
        )

        new_config = {
            "wiki_base": wiki_base,
            "space": space,
            "page_id": page_id,
            "is_cloud": is_cloud,
        }

        for key, value in config.items():
            if key not in ["wiki_page_url"]:
                new_config[key] = value

        op.execute(
            connector.update()
            .where(connector.c.id == row.id)
            .values(connector_specific_config=new_config)
        )


def downgrade() -> None:
    connector = table(
        "connector",
        column("id", sa.Integer),
        column("source", sa.String()),
        column("input_type", sa.String()),
        column("connector_specific_config", postgresql.JSONB),
    )

    confluence_connectors = (
        op.get_bind()
        .execute(
            sa.select(connector).where(
                connector.c.source == "CONFLUENCE", connector.c.input_type == "POLL"
            )
        )
        .fetchall()
    )

    for row in confluence_connectors:
        config = row.connector_specific_config
        if all(key in config for key in ["wiki_base", "space", "is_cloud"]):
            wiki_page_url = reconstruct_confluence_url(
                config["wiki_base"],
                config["space"],
                config.get("page_id", ""),
                config["is_cloud"],
            )

            new_config = {"wiki_page_url": wiki_page_url}
            new_config.update(
                {
                    k: v
                    for k, v in config.items()
                    if k not in ["wiki_base", "space", "page_id", "is_cloud"]
                }
            )

            op.execute(
                connector.update()
                .where(connector.c.id == row.id)
                .values(connector_specific_config=new_config)
            )


================================================
FILE: backend/alembic/versions/a3b8d9e2f1c4_make_scim_external_id_nullable.py
================================================
"""make scim_user_mapping.external_id nullable

Revision ID: a3b8d9e2f1c4
Revises: 2664261bfaab
Create Date: 2026-03-02

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "a3b8d9e2f1c4"
down_revision = "2664261bfaab"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.alter_column(
        "scim_user_mapping",
        "external_id",
        nullable=True,
    )


def downgrade() -> None:
    # Delete any rows where external_id is NULL before re-applying NOT NULL
    op.execute("DELETE FROM scim_user_mapping WHERE external_id IS NULL")
    op.alter_column(
        "scim_user_mapping",
        "external_id",
        nullable=False,
    )


================================================
FILE: backend/alembic/versions/a3bfd0d64902_add_chosen_assistants_to_user_table.py
================================================
"""Add chosen_assistants to User table

Revision ID: a3bfd0d64902
Revises: ec85f2b3c544
Create Date: 2024-05-26 17:22:24.834741

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "a3bfd0d64902"
down_revision = "ec85f2b3c544"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column("chosen_assistants", postgresql.ARRAY(sa.Integer()), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("user", "chosen_assistants")


================================================
FILE: backend/alembic/versions/a3c1a7904cd0_remove_userfile_related_deprecated_.py
================================================
"""remove userfile related deprecated fields

Revision ID: a3c1a7904cd0
Revises: 5c3dca366b35
Create Date: 2026-01-06 13:00:30.634396

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "a3c1a7904cd0"
down_revision = "5c3dca366b35"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_column("user_file", "document_id")
    op.drop_column("user_file", "document_id_migrated")
    op.drop_column("connector_credential_pair", "is_user_file")


def downgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column("is_user_file", sa.Boolean(), nullable=False, server_default="false"),
    )
    op.add_column(
        "user_file",
        sa.Column("document_id", sa.String(), nullable=True),
    )
    op.add_column(
        "user_file",
        sa.Column(
            "document_id_migrated", sa.Boolean(), nullable=False, server_default="true"
        ),
    )


================================================
FILE: backend/alembic/versions/a3f8b2c1d4e5_add_preferred_response_id_to_chat_message.py
================================================
"""add preferred_response_id and model_display_name to chat_message

Revision ID: a3f8b2c1d4e5
Create Date: 2026-03-22

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "a3f8b2c1d4e5"
down_revision = "25a5501dc766"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "chat_message",
        sa.Column(
            "preferred_response_id",
            sa.Integer(),
            sa.ForeignKey("chat_message.id", ondelete="SET NULL"),
            nullable=True,
        ),
    )
    op.add_column(
        "chat_message",
        sa.Column("model_display_name", sa.String(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("chat_message", "model_display_name")
    op.drop_column("chat_message", "preferred_response_id")


================================================
FILE: backend/alembic/versions/a4f23d6b71c8_add_llm_provider_persona_restrictions.py
================================================
"""add llm provider persona restrictions

Revision ID: a4f23d6b71c8
Revises: 5e1c073d48a3
Create Date: 2025-10-21 00:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "a4f23d6b71c8"
down_revision = "5e1c073d48a3"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "llm_provider__persona",
        sa.Column("llm_provider_id", sa.Integer(), nullable=False),
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["llm_provider_id"], ["llm_provider.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(["persona_id"], ["persona.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("llm_provider_id", "persona_id"),
    )
    op.create_index(
        "ix_llm_provider__persona_llm_provider_id",
        "llm_provider__persona",
        ["llm_provider_id"],
    )
    op.create_index(
        "ix_llm_provider__persona_persona_id",
        "llm_provider__persona",
        ["persona_id"],
    )
    op.create_index(
        "ix_llm_provider__persona_composite",
        "llm_provider__persona",
        ["persona_id", "llm_provider_id"],
    )


def downgrade() -> None:
    op.drop_index(
        "ix_llm_provider__persona_composite",
        table_name="llm_provider__persona",
    )
    op.drop_index(
        "ix_llm_provider__persona_persona_id",
        table_name="llm_provider__persona",
    )
    op.drop_index(
        "ix_llm_provider__persona_llm_provider_id",
        table_name="llm_provider__persona",
    )
    op.drop_table("llm_provider__persona")


================================================
FILE: backend/alembic/versions/a570b80a5f20_usergroup_tables.py
================================================
"""UserGroup tables

Revision ID: a570b80a5f20
Revises: 904451035c9b
Create Date: 2023-10-02 12:27:10.265725

"""

from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "a570b80a5f20"
down_revision = "904451035c9b"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "user_group",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("is_up_to_date", sa.Boolean(), nullable=False),
        sa.Column("is_up_for_deletion", sa.Boolean(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("name"),
    )
    op.create_table(
        "user__user_group",
        sa.Column("user_group_id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["user_group_id"],
            ["user_group.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("user_group_id", "user_id"),
    )
    op.create_table(
        "user_group__connector_credential_pair",
        sa.Column("user_group_id", sa.Integer(), nullable=False),
        sa.Column("cc_pair_id", sa.Integer(), nullable=False),
        sa.Column("is_current", sa.Boolean(), nullable=False),
        sa.ForeignKeyConstraint(
            ["cc_pair_id"],
            ["connector_credential_pair.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_group_id"],
            ["user_group.id"],
        ),
        sa.PrimaryKeyConstraint("user_group_id", "cc_pair_id", "is_current"),
    )


def downgrade() -> None:
    op.drop_table("user_group__connector_credential_pair")
    op.drop_table("user__user_group")
    op.drop_table("user_group")


================================================
FILE: backend/alembic/versions/a6df6b88ef81_remove_recent_assistants.py
================================================
"""remove recent assistants

Revision ID: a6df6b88ef81
Revises: 4d58345da04a
Create Date: 2025-01-29 10:25:52.790407

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "a6df6b88ef81"
down_revision = "4d58345da04a"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_column("user", "recent_assistants")


def downgrade() -> None:
    op.add_column(
        "user",
        sa.Column(
            "recent_assistants", postgresql.JSONB(), server_default="[]", nullable=False
        ),
    )


================================================
FILE: backend/alembic/versions/a7688ab35c45_add_public_external_user_group_table.py
================================================
"""Add public_external_user_group table

Revision ID: a7688ab35c45
Revises: 5c448911b12f
Create Date: 2025-05-06 20:55:12.747875

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "a7688ab35c45"
down_revision = "5c448911b12f"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "public_external_user_group",
        sa.Column("external_user_group_id", sa.String(), nullable=False),
        sa.Column("cc_pair_id", sa.Integer(), nullable=False),
        sa.PrimaryKeyConstraint("external_user_group_id", "cc_pair_id"),
        sa.ForeignKeyConstraint(
            ["cc_pair_id"], ["connector_credential_pair.id"], ondelete="CASCADE"
        ),
    )


def downgrade() -> None:
    op.drop_table("public_external_user_group")


================================================
FILE: backend/alembic/versions/a852cbe15577_new_chat_history.py
================================================
"""New Chat History

Revision ID: a852cbe15577
Revises: 6436661d5b65
Create Date: 2025-11-08 15:16:37.781308

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "a852cbe15577"
down_revision = "6436661d5b65"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # 1. Drop old research/agent tables (CASCADE handles dependencies)
    op.execute("DROP TABLE IF EXISTS research_agent_iteration_sub_step CASCADE")
    op.execute("DROP TABLE IF EXISTS research_agent_iteration CASCADE")
    op.execute("DROP TABLE IF EXISTS agent__sub_query__search_doc CASCADE")
    op.execute("DROP TABLE IF EXISTS agent__sub_query CASCADE")
    op.execute("DROP TABLE IF EXISTS agent__sub_question CASCADE")

    # 2. ChatMessage table changes
    # Rename columns and add FKs
    op.alter_column(
        "chat_message", "parent_message", new_column_name="parent_message_id"
    )
    op.create_foreign_key(
        "fk_chat_message_parent_message_id",
        "chat_message",
        "chat_message",
        ["parent_message_id"],
        ["id"],
    )
    op.alter_column(
        "chat_message",
        "latest_child_message",
        new_column_name="latest_child_message_id",
    )
    op.create_foreign_key(
        "fk_chat_message_latest_child_message_id",
        "chat_message",
        "chat_message",
        ["latest_child_message_id"],
        ["id"],
    )

    # Add new column
    op.add_column(
        "chat_message", sa.Column("reasoning_tokens", sa.Text(), nullable=True)
    )

    # Drop old columns
    op.drop_column("chat_message", "rephrased_query")
    op.drop_column("chat_message", "alternate_assistant_id")
    op.drop_column("chat_message", "overridden_model")
    op.drop_column("chat_message", "is_agentic")
    op.drop_column("chat_message", "refined_answer_improvement")
    op.drop_column("chat_message", "research_type")
    op.drop_column("chat_message", "research_plan")
    op.drop_column("chat_message", "research_answer_purpose")

    # 3. ToolCall table changes
    # Drop the unique constraint first
    op.drop_constraint("uq_tool_call_message_id", "tool_call", type_="unique")

    # Delete orphaned tool_call rows (those without valid chat_message)
    op.execute(
        "DELETE FROM tool_call WHERE message_id NOT IN (SELECT id FROM chat_message)"
    )

    # Add chat_session_id as nullable first, populate, then make NOT NULL
    op.add_column(
        "tool_call",
        sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=True),
    )

    # Populate chat_session_id from the related chat_message
    op.execute(
        """
        UPDATE tool_call
        SET chat_session_id = chat_message.chat_session_id
        FROM chat_message
        WHERE tool_call.message_id = chat_message.id
    """
    )

    # Now make it NOT NULL and add FK
    op.alter_column("tool_call", "chat_session_id", nullable=False)
    op.create_foreign_key(
        "fk_tool_call_chat_session_id",
        "tool_call",
        "chat_session",
        ["chat_session_id"],
        ["id"],
        ondelete="CASCADE",
    )

    # Rename message_id and make nullable, recreate FK with CASCADE
    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
    op.alter_column(
        "tool_call",
        "message_id",
        new_column_name="parent_chat_message_id",
        nullable=True,
    )
    op.create_foreign_key(
        "fk_tool_call_parent_chat_message_id",
        "tool_call",
        "chat_message",
        ["parent_chat_message_id"],
        ["id"],
        ondelete="CASCADE",
    )

    # Add parent_tool_call_id with FK
    op.add_column(
        "tool_call", sa.Column("parent_tool_call_id", sa.Integer(), nullable=True)
    )
    op.create_foreign_key(
        "fk_tool_call_parent_tool_call_id",
        "tool_call",
        "tool_call",
        ["parent_tool_call_id"],
        ["id"],
        ondelete="CASCADE",
    )

    # Add other new columns
    op.add_column(
        "tool_call",
        sa.Column("turn_number", sa.Integer(), nullable=False, server_default="0"),
    )
    op.add_column(
        "tool_call",
        sa.Column("tool_call_id", sa.String(), nullable=False, server_default=""),
    )
    op.add_column("tool_call", sa.Column("reasoning_tokens", sa.Text(), nullable=True))
    op.add_column(
        "tool_call",
        sa.Column("tool_call_tokens", sa.Integer(), nullable=False, server_default="0"),
    )
    op.add_column(
        "tool_call",
        sa.Column("generated_images", postgresql.JSONB(), nullable=True),
    )

    # Rename columns
    op.alter_column(
        "tool_call", "tool_arguments", new_column_name="tool_call_arguments"
    )
    op.alter_column("tool_call", "tool_result", new_column_name="tool_call_response")

    # Change tool_call_response type from JSONB to Text
    op.execute(
        """
        ALTER TABLE tool_call
        ALTER COLUMN tool_call_response TYPE TEXT
        USING tool_call_response::text
    """
    )

    # Drop old columns
    op.drop_column("tool_call", "tool_name")

    # 4. Create new association table
    op.create_table(
        "tool_call__search_doc",
        sa.Column("tool_call_id", sa.Integer(), nullable=False),
        sa.Column("search_doc_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(["tool_call_id"], ["tool_call.id"], ondelete="CASCADE"),
        sa.ForeignKeyConstraint(
            ["search_doc_id"], ["search_doc.id"], ondelete="CASCADE"
        ),
        sa.PrimaryKeyConstraint("tool_call_id", "search_doc_id"),
    )

    # 5. Persona table change
    op.add_column(
        "persona",
        sa.Column(
            "replace_base_system_prompt",
            sa.Boolean(),
            nullable=False,
            server_default="false",
        ),
    )


def downgrade() -> None:
    # Reverse persona changes
    op.drop_column("persona", "replace_base_system_prompt")

    # Drop new association table
    op.drop_table("tool_call__search_doc")

    # Reverse ToolCall changes
    op.add_column(
        "tool_call",
        sa.Column("tool_name", sa.String(), nullable=False, server_default=""),
    )

    # Change tool_call_response back to JSONB
    op.execute(
        """
        ALTER TABLE tool_call
        ALTER COLUMN tool_call_response TYPE JSONB
        USING tool_call_response::jsonb
    """
    )

    op.alter_column("tool_call", "tool_call_response", new_column_name="tool_result")
    op.alter_column(
        "tool_call", "tool_call_arguments", new_column_name="tool_arguments"
    )

    op.drop_column("tool_call", "generated_images")
    op.drop_column("tool_call", "tool_call_tokens")
    op.drop_column("tool_call", "reasoning_tokens")
    op.drop_column("tool_call", "tool_call_id")
    op.drop_column("tool_call", "turn_number")

    op.drop_constraint(
        "fk_tool_call_parent_tool_call_id", "tool_call", type_="foreignkey"
    )
    op.drop_column("tool_call", "parent_tool_call_id")

    op.drop_constraint(
        "fk_tool_call_parent_chat_message_id", "tool_call", type_="foreignkey"
    )
    op.alter_column(
        "tool_call",
        "parent_chat_message_id",
        new_column_name="message_id",
        nullable=False,
    )
    op.create_foreign_key(
        "tool_call_message_id_fkey",
        "tool_call",
        "chat_message",
        ["message_id"],
        ["id"],
    )

    op.drop_constraint("fk_tool_call_chat_session_id", "tool_call", type_="foreignkey")
    op.drop_column("tool_call", "chat_session_id")

    op.create_unique_constraint("uq_tool_call_message_id", "tool_call", ["message_id"])

    # Reverse ChatMessage changes
    # Note: research_answer_purpose and research_type were originally String columns,
    # not Enum types (see migrations 5ae8240accb3 and f8a9b2c3d4e5)
    op.add_column(
        "chat_message",
        sa.Column("research_answer_purpose", sa.String(), nullable=True),
    )
    op.add_column(
        "chat_message", sa.Column("research_plan", postgresql.JSONB(), nullable=True)
    )
    op.add_column(
        "chat_message",
        sa.Column("research_type", sa.String(), nullable=True),
    )
    op.add_column(
        "chat_message",
        sa.Column("refined_answer_improvement", sa.Boolean(), nullable=True),
    )
    op.add_column(
        "chat_message",
        sa.Column("is_agentic", sa.Boolean(), nullable=False, server_default="false"),
    )
    op.add_column(
        "chat_message", sa.Column("overridden_model", sa.String(), nullable=True)
    )
    op.add_column(
        "chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True)
    )
    # Recreate the FK constraint that was implicitly dropped when the column was dropped
    op.create_foreign_key(
        "fk_chat_message_persona",
        "chat_message",
        "persona",
        ["alternate_assistant_id"],
        ["id"],
    )
    op.add_column(
        "chat_message", sa.Column("rephrased_query", sa.Text(), nullable=True)
    )

    op.drop_column("chat_message", "reasoning_tokens")

    op.drop_constraint(
        "fk_chat_message_latest_child_message_id", "chat_message", type_="foreignkey"
    )
    op.alter_column(
        "chat_message",
        "latest_child_message_id",
        new_column_name="latest_child_message",
    )

    op.drop_constraint(
        "fk_chat_message_parent_message_id", "chat_message", type_="foreignkey"
    )
    op.alter_column(
        "chat_message", "parent_message_id", new_column_name="parent_message"
    )

    # Recreate agent sub question and sub query tables
    op.create_table(
        "agent__sub_question",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("primary_question_id", sa.Integer(), nullable=False),
        sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
        sa.Column("sub_question", sa.Text(), nullable=False),
        sa.Column("level", sa.Integer(), nullable=False),
        sa.Column("level_question_num", sa.Integer(), nullable=False),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("sub_answer", sa.Text(), nullable=False),
        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=False),
        sa.ForeignKeyConstraint(
            ["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
        sa.PrimaryKeyConstraint("id"),
    )

    op.create_table(
        "agent__sub_query",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("parent_question_id", sa.Integer(), nullable=False),
        sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
        sa.Column("sub_query", sa.Text(), nullable=False),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["parent_question_id"], ["agent__sub_question.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
        sa.PrimaryKeyConstraint("id"),
    )

    op.create_table(
        "agent__sub_query__search_doc",
        sa.Column("sub_query_id", sa.Integer(), nullable=False),
        sa.Column("search_doc_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["sub_query_id"], ["agent__sub_query.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(["search_doc_id"], ["search_doc.id"]),
        sa.PrimaryKeyConstraint("sub_query_id", "search_doc_id"),
    )

    # Recreate research agent tables
    op.create_table(
        "research_agent_iteration",
        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
        sa.Column("primary_question_id", sa.Integer(), nullable=False),
        sa.Column("iteration_nr", sa.Integer(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("purpose", sa.String(), nullable=True),
        sa.Column("reasoning", sa.String(), nullable=True),
        sa.ForeignKeyConstraint(
            ["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint(
            "primary_question_id",
            "iteration_nr",
            name="_research_agent_iteration_unique_constraint",
        ),
    )

    op.create_table(
        "research_agent_iteration_sub_step",
        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
        sa.Column("primary_question_id", sa.Integer(), nullable=False),
        sa.Column("iteration_nr", sa.Integer(), nullable=False),
        sa.Column("iteration_sub_step_nr", sa.Integer(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("sub_step_instructions", sa.String(), nullable=True),
        sa.Column("sub_step_tool_id", sa.Integer(), nullable=True),
        sa.Column("reasoning", sa.String(), nullable=True),
        sa.Column("sub_answer", sa.String(), nullable=True),
        sa.Column("cited_doc_results", postgresql.JSONB(), nullable=False),
        sa.Column("claims", postgresql.JSONB(), nullable=True),
        sa.Column("is_web_fetch", sa.Boolean(), nullable=True),
        sa.Column("queries", postgresql.JSONB(), nullable=True),
        sa.Column("generated_images", postgresql.JSONB(), nullable=True),
        sa.Column("additional_data", postgresql.JSONB(), nullable=True),
        sa.Column("file_ids", postgresql.JSONB(), nullable=True),
        sa.ForeignKeyConstraint(
            ["primary_question_id", "iteration_nr"],
            [
                "research_agent_iteration.primary_question_id",
                "research_agent_iteration.iteration_nr",
            ],
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(["sub_step_tool_id"], ["tool.id"], ondelete="SET NULL"),
        sa.PrimaryKeyConstraint("id"),
    )


================================================
FILE: backend/alembic/versions/a8c2065484e6_add_auto_scroll_to_user_model.py
================================================
"""add auto scroll to user model

Revision ID: a8c2065484e6
Revises: abe7378b8217
Create Date: 2024-11-22 17:34:09.690295

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "a8c2065484e6"
down_revision = "abe7378b8217"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column("auto_scroll", sa.Boolean(), nullable=True, server_default=None),
    )


def downgrade() -> None:
    op.drop_column("user", "auto_scroll")


================================================
FILE: backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py
================================================
"""merge prompt into persona

Revision ID: abbfec3a5ac5
Revises: 8818cf73fa1a
Create Date: 2024-12-19 12:00:00.000000

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "abbfec3a5ac5"
down_revision = "8818cf73fa1a"
branch_labels = None
depends_on = None


MAX_PROMPT_LENGTH = 5_000_000


def upgrade() -> None:
    """NOTE: Prompts without any Personas will just be lost."""
    # Step 1: Add new columns to persona table (only if they don't exist)

    # Check if columns exist before adding them
    connection = op.get_bind()
    inspector = sa.inspect(connection)
    existing_columns = [col["name"] for col in inspector.get_columns("persona")]

    if "system_prompt" not in existing_columns:
        op.add_column(
            "persona",
            sa.Column(
                "system_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True
            ),
        )

    if "task_prompt" not in existing_columns:
        op.add_column(
            "persona",
            sa.Column(
                "task_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True
            ),
        )

    if "datetime_aware" not in existing_columns:
        op.add_column(
            "persona",
            sa.Column(
                "datetime_aware", sa.Boolean(), nullable=False, server_default="true"
            ),
        )

    # Step 2: Migrate data from prompt table to persona table (only if tables exist)
    existing_tables = inspector.get_table_names()

    if "prompt" in existing_tables and "persona__prompt" in existing_tables:
        # For personas that have associated prompts, copy the prompt data
        op.execute(
            """
            UPDATE persona
            SET
                system_prompt = p.system_prompt,
                task_prompt = p.task_prompt,
                datetime_aware = p.datetime_aware
            FROM (
                -- Get the first prompt for each persona (in case there are multiple)
                SELECT DISTINCT ON (pp.persona_id)
                    pp.persona_id,
                    pr.system_prompt,
                    pr.task_prompt,
                    pr.datetime_aware
                FROM persona__prompt pp
                JOIN prompt pr ON pp.prompt_id = pr.id
            ) p
            WHERE persona.id = p.persona_id
        """
        )

        # Step 3: Update chat_message references
        # Since chat messages referenced prompt_id, we need to update them to use persona_id
        # This is complex as we need to map from prompt_id to persona_id

        # Check if chat_message has prompt_id column
        chat_message_columns = [
            col["name"] for col in inspector.get_columns("chat_message")
        ]
        if "prompt_id" in chat_message_columns:
            op.execute(
                """
                ALTER TABLE chat_message
                DROP CONSTRAINT IF EXISTS chat_message__prompt_fk
            """
            )
            op.drop_column("chat_message", "prompt_id")

    # Step 4: Handle personas without prompts - set default values if needed (always run this)
    op.execute(
        """
        UPDATE persona
        SET
            system_prompt = COALESCE(system_prompt, ''),
            task_prompt = COALESCE(task_prompt, '')
        WHERE system_prompt IS NULL OR task_prompt IS NULL
    """
    )

    # Step 5: Drop the persona__prompt association table (if it exists)
    if "persona__prompt" in existing_tables:
        op.drop_table("persona__prompt")

    # Step 6: Drop the prompt table (if it exists)
    if "prompt" in existing_tables:
        op.drop_table("prompt")

    # Step 7: Make system_prompt and task_prompt non-nullable after migration (only if they exist)
    op.alter_column(
        "persona",
        "system_prompt",
        existing_type=sa.String(length=MAX_PROMPT_LENGTH),
        nullable=False,
        server_default=None,
    )

    op.alter_column(
        "persona",
        "task_prompt",
        existing_type=sa.String(length=MAX_PROMPT_LENGTH),
        nullable=False,
        server_default=None,
    )


def downgrade() -> None:
    # Step 1: Recreate the prompt table
    op.create_table(
        "prompt",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("description", sa.String(), nullable=False),
        sa.Column("system_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=False),
        sa.Column("task_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=False),
        sa.Column(
            "datetime_aware", sa.Boolean(), nullable=False, server_default="true"
        ),
        sa.Column(
            "default_prompt", sa.Boolean(), nullable=False, server_default="false"
        ),
        sa.Column("deleted", sa.Boolean(), nullable=False, server_default="false"),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
    )

    # Step 2: Recreate the persona__prompt association table
    op.create_table(
        "persona__prompt",
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.Column("prompt_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.ForeignKeyConstraint(
            ["prompt_id"],
            ["prompt.id"],
        ),
        sa.PrimaryKeyConstraint("persona_id", "prompt_id"),
    )

    # Step 3: Migrate data back from persona to prompt table
    op.execute(
        """
        INSERT INTO prompt (
            name,
            description,
            system_prompt,
            task_prompt,
            datetime_aware,
            default_prompt,
            deleted,
            user_id
        )
        SELECT
            CONCAT('Prompt for ', name),
            description,
            system_prompt,
            task_prompt,
            datetime_aware,
            is_default_persona,
            deleted,
            user_id
        FROM persona
        WHERE system_prompt IS NOT NULL AND system_prompt != ''
        RETURNING id, name
        """
    )

    # Step 4: Re-establish persona__prompt relationships
    op.execute(
        """
        INSERT INTO persona__prompt (persona_id, prompt_id)
        SELECT
            p.id as persona_id,
            pr.id as prompt_id
        FROM persona p
        JOIN prompt pr ON pr.name = CONCAT('Prompt for ', p.name)
        WHERE p.system_prompt IS NOT NULL AND p.system_prompt != ''
    """
    )

    # Step 5: Add prompt_id column back to chat_message
    op.add_column("chat_message", sa.Column("prompt_id", sa.Integer(), nullable=True))

    # Step 6: Re-establish foreign key constraint
    op.create_foreign_key(
        "chat_message__prompt_fk", "chat_message", "prompt", ["prompt_id"], ["id"]
    )

    # Step 7: Remove columns from persona table
    op.drop_column("persona", "datetime_aware")
    op.drop_column("persona", "task_prompt")
    op.drop_column("persona", "system_prompt")


================================================
FILE: backend/alembic/versions/abe7378b8217_add_indexing_trigger_to_cc_pair.py
================================================
"""add indexing trigger to cc_pair

Revision ID: abe7378b8217
Revises: 6d562f86c78b
Create Date: 2024-11-26 19:09:53.481171

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "abe7378b8217"
down_revision = "93560ba1b118"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "indexing_trigger",
            sa.Enum("UPDATE", "REINDEX", name="indexingmode", native_enum=False),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("connector_credential_pair", "indexing_trigger")


================================================
FILE: backend/alembic/versions/ac5eaac849f9_add_last_pruned_to_connector_table.py
================================================
"""add last_pruned to the connector_credential_pair table

Revision ID: ac5eaac849f9
Revises: 52a219fb5233
Create Date: 2024-09-10 15:04:26.437118

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "ac5eaac849f9"
down_revision = "46b7a812670f"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # last pruned represents the last time the connector was pruned
    op.add_column(
        "connector_credential_pair",
        sa.Column("last_pruned", sa.DateTime(timezone=True), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("connector_credential_pair", "last_pruned")


================================================
FILE: backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py
================================================
"""remove inactive ccpair status on downgrade

Revision ID: acaab4ef4507
Revises: b388730a2899
Create Date: 2025-02-16 18:21:41.330212

"""

from alembic import op
from onyx.db.models import ConnectorCredentialPair
from onyx.db.enums import ConnectorCredentialPairStatus
from sqlalchemy import update

# revision identifiers, used by Alembic.
revision = "acaab4ef4507"
down_revision = "b388730a2899"
branch_labels = None
depends_on = None


def upgrade() -> None:
    pass


def downgrade() -> None:
    op.execute(
        update(ConnectorCredentialPair)
        .where(ConnectorCredentialPair.status == ConnectorCredentialPairStatus.INVALID)
        .values(status=ConnectorCredentialPairStatus.ACTIVE)
    )


================================================
FILE: backend/alembic/versions/ae62505e3acc_add_saml_accounts.py
================================================
"""Add SAML Accounts

Revision ID: ae62505e3acc
Revises: 7da543f5672f
Create Date: 2023-09-26 16:19:30.933183

"""

import fastapi_users_db_sqlalchemy
from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "ae62505e3acc"
down_revision = "7da543f5672f"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "saml",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.Column("encrypted_cookie", sa.Text(), nullable=False),
        sa.Column("expires_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("encrypted_cookie"),
        sa.UniqueConstraint("user_id"),
    )


def downgrade() -> None:
    op.drop_table("saml")


================================================
FILE: backend/alembic/versions/aeda5f2df4f6_add_pinned_assistants.py
================================================
"""add pinned assistants

Revision ID: aeda5f2df4f6
Revises: c5eae4a75a1b
Create Date: 2025-01-09 16:04:10.770636

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "aeda5f2df4f6"
down_revision = "c5eae4a75a1b"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user", sa.Column("pinned_assistants", postgresql.JSONB(), nullable=True)
    )
    op.execute('UPDATE "user" SET pinned_assistants = chosen_assistants')


def downgrade() -> None:
    op.drop_column("user", "pinned_assistants")


================================================
FILE: backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py
================================================
"""Make 'last_attempt_status' nullable

Revision ID: b082fec533f0
Revises: df0c7ad8a076
Create Date: 2023-08-06 12:05:47.087325

"""

from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "b082fec533f0"
down_revision = "df0c7ad8a076"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.alter_column(
        "connector_credential_pair",
        "last_attempt_status",
        existing_type=postgresql.ENUM(
            "NOT_STARTED",
            "IN_PROGRESS",
            "SUCCESS",
            "FAILED",
            name="indexingstatus",
        ),
        nullable=True,
    )


def downgrade() -> None:
    # First, update any null values to a default value
    op.execute(
        "UPDATE connector_credential_pair SET last_attempt_status = 'NOT_STARTED' WHERE last_attempt_status IS NULL"
    )

    # Then, make the column non-nullable
    op.alter_column(
        "connector_credential_pair",
        "last_attempt_status",
        existing_type=postgresql.ENUM(
            "NOT_STARTED",
            "IN_PROGRESS",
            "SUCCESS",
            "FAILED",
            name="indexingstatus",
        ),
        nullable=False,
    )


================================================
FILE: backend/alembic/versions/b156fa702355_chat_reworked.py
================================================
"""Chat Reworked

Revision ID: b156fa702355
Revises: baf71f781b9e
Create Date: 2023-12-12 00:57:41.823371

"""

import fastapi_users_db_sqlalchemy
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import ENUM
from onyx.configs.constants import DocumentSource

# revision identifiers, used by Alembic.
revision = "b156fa702355"
down_revision = "baf71f781b9e"
branch_labels: None = None
depends_on: None = None


searchtype_enum = ENUM(
    "KEYWORD", "SEMANTIC", "HYBRID", name="searchtype", create_type=True
)
recencybiassetting_enum = ENUM(
    "FAVOR_RECENT",
    "BASE_DECAY",
    "NO_DECAY",
    "AUTO",
    name="recencybiassetting",
    create_type=True,
)


def upgrade() -> None:
    bind = op.get_bind()
    searchtype_enum.create(bind)
    recencybiassetting_enum.create(bind)

    # This is irrecoverable, whatever
    op.execute("DELETE FROM chat_feedback")
    op.execute("DELETE FROM document_retrieval_feedback")

    op.create_table(
        "search_doc",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("document_id", sa.String(), nullable=False),
        sa.Column("chunk_ind", sa.Integer(), nullable=False),
        sa.Column("semantic_id", sa.String(), nullable=False),
        sa.Column("link", sa.String(), nullable=True),
        sa.Column("blurb", sa.String(), nullable=False),
        sa.Column("boost", sa.Integer(), nullable=False),
        sa.Column(
            "source_type",
            sa.Enum(DocumentSource, native=False),
            nullable=False,
        ),
        sa.Column("hidden", sa.Boolean(), nullable=False),
        sa.Column("score", sa.Float(), nullable=False),
        sa.Column("match_highlights", postgresql.ARRAY(sa.String()), nullable=False),
        sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("primary_owners", postgresql.ARRAY(sa.String()), nullable=True),
        sa.Column("secondary_owners", postgresql.ARRAY(sa.String()), nullable=True),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "prompt",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("description", sa.String(), nullable=False),
        sa.Column("system_prompt", sa.Text(), nullable=False),
        sa.Column("task_prompt", sa.Text(), nullable=False),
        sa.Column("include_citations", sa.Boolean(), nullable=False),
        sa.Column("datetime_aware", sa.Boolean(), nullable=False),
        sa.Column("default_prompt", sa.Boolean(), nullable=False),
        sa.Column("deleted", sa.Boolean(), nullable=False),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "persona__prompt",
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.Column("prompt_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.ForeignKeyConstraint(
            ["prompt_id"],
            ["prompt.id"],
        ),
        sa.PrimaryKeyConstraint("persona_id", "prompt_id"),
    )

    # Changes to persona first so chat_sessions can have the right persona
    # The empty persona will be overwritten on server startup
    op.add_column(
        "persona",
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
    )
    op.add_column(
        "persona",
        sa.Column(
            "search_type",
            searchtype_enum,
            nullable=True,
        ),
    )
    op.execute("UPDATE persona SET search_type = 'HYBRID'")
    op.alter_column("persona", "search_type", nullable=False)
    op.add_column(
        "persona",
        sa.Column("llm_relevance_filter", sa.Boolean(), nullable=True),
    )
    op.execute("UPDATE persona SET llm_relevance_filter = TRUE")
    op.alter_column("persona", "llm_relevance_filter", nullable=False)
    op.add_column(
        "persona",
        sa.Column("llm_filter_extraction", sa.Boolean(), nullable=True),
    )
    op.execute("UPDATE persona SET llm_filter_extraction = TRUE")
    op.alter_column("persona", "llm_filter_extraction", nullable=False)
    op.add_column(
        "persona",
        sa.Column(
            "recency_bias",
            recencybiassetting_enum,
            nullable=True,
        ),
    )
    op.execute("UPDATE persona SET recency_bias = 'BASE_DECAY'")
    op.alter_column("persona", "recency_bias", nullable=False)
    op.alter_column("persona", "description", existing_type=sa.VARCHAR(), nullable=True)
    op.execute("UPDATE persona SET description = ''")
    op.alter_column("persona", "description", nullable=False)
    op.create_foreign_key("persona__user_fk", "persona", "user", ["user_id"], ["id"])
    op.drop_column("persona", "datetime_aware")
    op.drop_column("persona", "tools")
    op.drop_column("persona", "hint_text")
    op.drop_column("persona", "apply_llm_relevance_filter")
    op.drop_column("persona", "retrieval_enabled")
    op.drop_column("persona", "system_text")

    # Need to create a persona row so fk can work
    result = bind.execute(sa.text("SELECT 1 FROM persona WHERE id = 0"))
    exists = result.fetchone()
    if not exists:
        op.execute(
            sa.text(
                """
                INSERT INTO persona (
                    id, user_id, name, description, search_type, num_chunks,
                    llm_relevance_filter, llm_filter_extraction, recency_bias,
                    llm_model_version_override, default_persona, deleted
                ) VALUES (
                    0, NULL, '', '', 'HYBRID', NULL,
                    TRUE, TRUE, 'BASE_DECAY', NULL, TRUE, FALSE
                )
                """
            )
        )
    delete_statement = sa.text(
        """
        DELETE FROM persona
        WHERE name = 'Danswer' AND default_persona = TRUE AND id != 0
        """
    )

    bind.execute(delete_statement)

    op.add_column(
        "chat_feedback",
        sa.Column("chat_message_id", sa.Integer(), nullable=False),
    )
    op.drop_constraint(
        "chat_feedback_chat_message_chat_session_id_chat_message_me_fkey",
        "chat_feedback",
        type_="foreignkey",
    )
    op.drop_column("chat_feedback", "chat_message_edit_number")
    op.drop_column("chat_feedback", "chat_message_chat_session_id")
    op.drop_column("chat_feedback", "chat_message_message_number")
    op.add_column(
        "chat_message",
        sa.Column(
            "id",
            sa.Integer(),
            primary_key=True,
            autoincrement=True,
            nullable=False,
            unique=True,
        ),
    )
    op.add_column(
        "chat_message",
        sa.Column("parent_message", sa.Integer(), nullable=True),
    )
    op.add_column(
        "chat_message",
        sa.Column("latest_child_message", sa.Integer(), nullable=True),
    )
    op.add_column(
        "chat_message", sa.Column("rephrased_query", sa.Text(), nullable=True)
    )
    op.add_column("chat_message", sa.Column("prompt_id", sa.Integer(), nullable=True))
    op.add_column(
        "chat_message",
        sa.Column("citations", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
    )
    op.add_column("chat_message", sa.Column("error", sa.Text(), nullable=True))
    op.drop_constraint("fk_chat_message_persona_id", "chat_message", type_="foreignkey")
    op.create_foreign_key(
        "chat_message__prompt_fk", "chat_message", "prompt", ["prompt_id"], ["id"]
    )
    op.drop_column("chat_message", "parent_edit_number")
    op.drop_column("chat_message", "persona_id")
    op.drop_column("chat_message", "reference_docs")
    op.drop_column("chat_message", "edit_number")
    op.drop_column("chat_message", "latest")
    op.drop_column("chat_message", "message_number")
    op.add_column("chat_session", sa.Column("one_shot", sa.Boolean(), nullable=True))
    op.execute("UPDATE chat_session SET one_shot = TRUE")
    op.alter_column("chat_session", "one_shot", nullable=False)
    op.alter_column(
        "chat_session",
        "persona_id",
        existing_type=sa.INTEGER(),
        nullable=True,
    )
    op.execute("UPDATE chat_session SET persona_id = 0")
    op.alter_column("chat_session", "persona_id", nullable=False)
    op.add_column(
        "document_retrieval_feedback",
        sa.Column("chat_message_id", sa.Integer(), nullable=False),
    )
    op.drop_constraint(
        "document_retrieval_feedback_qa_event_id_fkey",
        "document_retrieval_feedback",
        type_="foreignkey",
    )
    op.create_foreign_key(
        "document_retrieval_feedback__chat_message_fk",
        "document_retrieval_feedback",
        "chat_message",
        ["chat_message_id"],
        ["id"],
    )
    op.drop_column("document_retrieval_feedback", "qa_event_id")

    # Relation table must be created after the other tables are correct
    op.create_table(
        "chat_message__search_doc",
        sa.Column("chat_message_id", sa.Integer(), nullable=False),
        sa.Column("search_doc_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["chat_message_id"],
            ["chat_message.id"],
        ),
        sa.ForeignKeyConstraint(
            ["search_doc_id"],
            ["search_doc.id"],
        ),
        sa.PrimaryKeyConstraint("chat_message_id", "search_doc_id"),
    )

    # Needs to be created after chat_message id field is added
    op.create_foreign_key(
        "chat_feedback__chat_message_fk",
        "chat_feedback",
        "chat_message",
        ["chat_message_id"],
        ["id"],
    )

    op.drop_table("query_event")


def downgrade() -> None:
    # NOTE: you will lose all chat history. This is to satisfy the non-nullable constraints
    # below
    op.execute("DELETE FROM chat_feedback")
    op.execute("DELETE FROM chat_message__search_doc")
    op.execute("DELETE FROM document_retrieval_feedback")
    op.execute("DELETE FROM document_retrieval_feedback")
    op.execute("DELETE FROM chat_message")
    op.execute("DELETE FROM chat_session")

    op.drop_constraint(
        "chat_feedback__chat_message_fk", "chat_feedback", type_="foreignkey"
    )
    op.drop_constraint(
        "document_retrieval_feedback__chat_message_fk",
        "document_retrieval_feedback",
        type_="foreignkey",
    )
    op.drop_constraint("persona__user_fk", "persona", type_="foreignkey")
    op.drop_constraint("chat_message__prompt_fk", "chat_message", type_="foreignkey")
    op.drop_constraint(
        "chat_message__search_doc_chat_message_id_fkey",
        "chat_message__search_doc",
        type_="foreignkey",
    )
    op.add_column(
        "persona",
        sa.Column("system_text", sa.TEXT(), autoincrement=False, nullable=True),
    )
    op.add_column(
        "persona",
        sa.Column(
            "retrieval_enabled",
            sa.BOOLEAN(),
            autoincrement=False,
            nullable=True,
        ),
    )
    op.execute("UPDATE persona SET retrieval_enabled = TRUE")
    op.alter_column("persona", "retrieval_enabled", nullable=False)
    op.add_column(
        "persona",
        sa.Column(
            "apply_llm_relevance_filter",
            sa.BOOLEAN(),
            autoincrement=False,
            nullable=True,
        ),
    )
    op.add_column(
        "persona",
        sa.Column("hint_text", sa.TEXT(), autoincrement=False, nullable=True),
    )
    op.add_column(
        "persona",
        sa.Column(
            "tools",
            postgresql.JSONB(astext_type=sa.Text()),
            autoincrement=False,
            nullable=True,
        ),
    )
    op.add_column(
        "persona",
        sa.Column("datetime_aware", sa.BOOLEAN(), autoincrement=False, nullable=True),
    )
    op.execute("UPDATE persona SET datetime_aware = TRUE")
    op.alter_column("persona", "datetime_aware", nullable=False)
    op.alter_column("persona", "description", existing_type=sa.VARCHAR(), nullable=True)
    op.drop_column("persona", "recency_bias")
    op.drop_column("persona", "llm_filter_extraction")
    op.drop_column("persona", "llm_relevance_filter")
    op.drop_column("persona", "search_type")
    op.drop_column("persona", "user_id")
    op.add_column(
        "document_retrieval_feedback",
        sa.Column("qa_event_id", sa.INTEGER(), autoincrement=False, nullable=False),
    )
    op.drop_column("document_retrieval_feedback", "chat_message_id")
    op.alter_column(
        "chat_session", "persona_id", existing_type=sa.INTEGER(), nullable=True
    )
    op.drop_column("chat_session", "one_shot")
    op.add_column(
        "chat_message",
        sa.Column(
            "message_number",
            sa.INTEGER(),
            autoincrement=False,
            nullable=False,
            primary_key=True,
        ),
    )
    op.add_column(
        "chat_message",
        sa.Column("latest", sa.BOOLEAN(), autoincrement=False, nullable=False),
    )
    op.add_column(
        "chat_message",
        sa.Column(
            "edit_number",
            sa.INTEGER(),
            autoincrement=False,
            nullable=False,
            primary_key=True,
        ),
    )
    op.add_column(
        "chat_message",
        sa.Column(
            "reference_docs",
            postgresql.JSONB(astext_type=sa.Text()),
            autoincrement=False,
            nullable=True,
        ),
    )
    op.add_column(
        "chat_message",
        sa.Column("persona_id", sa.INTEGER(), autoincrement=False, nullable=True),
    )
    op.add_column(
        "chat_message",
        sa.Column(
            "parent_edit_number",
            sa.INTEGER(),
            autoincrement=False,
            nullable=True,
        ),
    )
    op.create_foreign_key(
        "fk_chat_message_persona_id",
        "chat_message",
        "persona",
        ["persona_id"],
        ["id"],
    )
    op.drop_column("chat_message", "error")
    op.drop_column("chat_message", "citations")
    op.drop_column("chat_message", "prompt_id")
    op.drop_column("chat_message", "rephrased_query")
    op.drop_column("chat_message", "latest_child_message")
    op.drop_column("chat_message", "parent_message")
    op.drop_column("chat_message", "id")
    op.add_column(
        "chat_feedback",
        sa.Column(
            "chat_message_message_number",
            sa.INTEGER(),
            autoincrement=False,
            nullable=False,
        ),
    )
    op.add_column(
        "chat_feedback",
        sa.Column(
            "chat_message_chat_session_id",
            sa.INTEGER(),
            autoincrement=False,
            nullable=False,
            primary_key=True,
        ),
    )
    op.add_column(
        "chat_feedback",
        sa.Column(
            "chat_message_edit_number",
            sa.INTEGER(),
            autoincrement=False,
            nullable=False,
        ),
    )
    op.drop_column("chat_feedback", "chat_message_id")
    op.create_table(
        "query_event",
        sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False),
        sa.Column("query", sa.VARCHAR(), autoincrement=False, nullable=False),
        sa.Column(
            "selected_search_flow",
            sa.VARCHAR(),
            autoincrement=False,
            nullable=True,
        ),
        sa.Column("llm_answer", sa.VARCHAR(), autoincrement=False, nullable=True),
        sa.Column("feedback", sa.VARCHAR(), autoincrement=False, nullable=True),
        sa.Column("user_id", sa.UUID(), autoincrement=False, nullable=True),
        sa.Column(
            "time_created",
            postgresql.TIMESTAMP(timezone=True),
            server_default=sa.text("now()"),
            autoincrement=False,
            nullable=False,
        ),
        sa.Column(
            "retrieved_document_ids",
            postgresql.ARRAY(sa.VARCHAR()),
            autoincrement=False,
            nullable=True,
        ),
        sa.Column("chat_session_id", sa.INTEGER(), autoincrement=False, nullable=True),
        sa.ForeignKeyConstraint(
            ["chat_session_id"],
            ["chat_session.id"],
            name="fk_query_event_chat_session_id",
        ),
        sa.ForeignKeyConstraint(
            ["user_id"], ["user.id"], name="query_event_user_id_fkey"
        ),
        sa.PrimaryKeyConstraint("id", name="query_event_pkey"),
    )
    op.drop_table("chat_message__search_doc")
    op.drop_table("persona__prompt")
    op.drop_table("prompt")
    op.drop_table("search_doc")
    op.create_unique_constraint(
        "uq_chat_message_combination",
        "chat_message",
        ["chat_session_id", "message_number", "edit_number"],
    )
    op.create_foreign_key(
        "chat_feedback_chat_message_chat_session_id_chat_message_me_fkey",
        "chat_feedback",
        "chat_message",
        [
            "chat_message_chat_session_id",
            "chat_message_message_number",
            "chat_message_edit_number",
        ],
        ["chat_session_id", "message_number", "edit_number"],
    )
    op.create_foreign_key(
        "document_retrieval_feedback_qa_event_id_fkey",
        "document_retrieval_feedback",
        "query_event",
        ["qa_event_id"],
        ["id"],
    )

    op.execute("DROP TYPE IF EXISTS searchtype")
    op.execute("DROP TYPE IF EXISTS recencybiassetting")
    op.execute("DROP TYPE IF EXISTS documentsource")


================================================
FILE: backend/alembic/versions/b30353be4eec_add_mcp_auth_performer.py
================================================
"""add_mcp_auth_performer

Revision ID: b30353be4eec
Revises: 2b75d0a8ffcb
Create Date: 2025-09-13 14:58:08.413534

"""

from alembic import op
import sqlalchemy as sa
from onyx.db.enums import MCPAuthenticationPerformer, MCPTransport


# revision identifiers, used by Alembic.
revision = "b30353be4eec"
down_revision = "2b75d0a8ffcb"
branch_labels = None
depends_on = None


def upgrade() -> None:
    """moving to a better way of handling auth performer and transport"""
    # Add nullable column first for backward compatibility
    op.add_column(
        "mcp_server",
        sa.Column(
            "auth_performer",
            sa.Enum(MCPAuthenticationPerformer, native_enum=False),
            nullable=True,
        ),
    )

    op.add_column(
        "mcp_server",
        sa.Column(
            "transport",
            sa.Enum(MCPTransport, native_enum=False),
            nullable=True,
        ),
    )

    # # Backfill values using existing data and inference rules
    bind = op.get_bind()

    # 1) OAUTH servers are always PER_USER
    bind.execute(
        sa.text(
            """
        UPDATE mcp_server
        SET auth_performer = 'PER_USER'
        WHERE auth_type = 'OAUTH'
        """
        )
    )

    # 2) If there is no admin connection config, mark as ADMIN (and not set yet)
    bind.execute(
        sa.text(
            """
        UPDATE mcp_server
        SET auth_performer = 'ADMIN'
        WHERE admin_connection_config_id IS NULL
          AND auth_performer IS NULL
        """
        )
    )

    # 3) If there exists any user-specific connection config (user_email != ''), mark as PER_USER
    bind.execute(
        sa.text(
            """
        UPDATE mcp_server AS ms
        SET auth_performer = 'PER_USER'
        FROM mcp_connection_config AS mcc
        WHERE mcc.mcp_server_id = ms.id
          AND COALESCE(mcc.user_email, '') <> ''
          AND ms.auth_performer IS NULL
        """
        )
    )

    # 4) Default any remaining nulls to ADMIN (covers API_TOKEN admin-managed and NONE)
    bind.execute(
        sa.text(
            """
        UPDATE mcp_server
        SET auth_performer = 'ADMIN'
        WHERE auth_performer IS NULL
        """
        )
    )

    # Finally, make the column non-nullable
    op.alter_column(
        "mcp_server",
        "auth_performer",
        existing_type=sa.Enum(MCPAuthenticationPerformer, native_enum=False),
        nullable=False,
    )

    # Backfill transport for existing rows to STREAMABLE_HTTP, then make non-nullable
    bind.execute(
        sa.text(
            """
        UPDATE mcp_server
        SET transport = 'STREAMABLE_HTTP'
        WHERE transport IS NULL
        """
        )
    )

    op.alter_column(
        "mcp_server",
        "transport",
        existing_type=sa.Enum(MCPTransport, native_enum=False),
        nullable=False,
    )


def downgrade() -> None:
    """remove cols"""
    op.drop_column("mcp_server", "transport")
    op.drop_column("mcp_server", "auth_performer")


================================================
FILE: backend/alembic/versions/b329d00a9ea6_adding_assistant_specific_user_.py
================================================
"""Adding assistant-specific user preferences

Revision ID: b329d00a9ea6
Revises: f9b8c7d6e5a4
Create Date: 2025-08-26 23:14:44.592985

"""

from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "b329d00a9ea6"
down_revision = "f9b8c7d6e5a4"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "assistant__user_specific_config",
        sa.Column("assistant_id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.Column("disabled_tool_ids", postgresql.ARRAY(sa.Integer()), nullable=False),
        sa.ForeignKeyConstraint(["assistant_id"], ["persona.id"], ondelete="CASCADE"),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("assistant_id", "user_id"),
    )


def downgrade() -> None:
    op.drop_table("assistant__user_specific_config")


================================================
FILE: backend/alembic/versions/b388730a2899_nullable_preferences.py
================================================
"""nullable preferences

Revision ID: b388730a2899
Revises: 1a03d2c2856b
Create Date: 2025-02-17 18:49:22.643902

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "b388730a2899"
down_revision = "1a03d2c2856b"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.alter_column("user", "temperature_override_enabled", nullable=True)
    op.alter_column("user", "auto_scroll", nullable=True)


def downgrade() -> None:
    # Ensure no null values before making columns non-nullable
    op.execute(
        'UPDATE "user" SET temperature_override_enabled = false WHERE temperature_override_enabled IS NULL'
    )
    op.execute('UPDATE "user" SET auto_scroll = false WHERE auto_scroll IS NULL')

    op.alter_column("user", "temperature_override_enabled", nullable=False)
    op.alter_column("user", "auto_scroll", nullable=False)


================================================
FILE: backend/alembic/versions/b4b7e1028dfd_grant_basic_to_existing_groups.py
================================================
"""grant_basic_to_existing_groups

Grants the "basic" permission to all existing groups that don't already
have it. Every group should have at least "basic" so that its members
get basic access when effective_permissions is backfilled.

Revision ID: b4b7e1028dfd
Revises: b7bcc991d722
Create Date: 2026-03-30 16:15:17.093498

"""

from collections.abc import Sequence

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "b4b7e1028dfd"
down_revision = "b7bcc991d722"
branch_labels: str | None = None
depends_on: str | Sequence[str] | None = None

user_group = sa.table(
    "user_group",
    sa.column("id", sa.Integer),
    sa.column("is_default", sa.Boolean),
)

permission_grant = sa.table(
    "permission_grant",
    sa.column("group_id", sa.Integer),
    sa.column("permission", sa.String),
    sa.column("grant_source", sa.String),
    sa.column("is_deleted", sa.Boolean),
)


def upgrade() -> None:
    conn = op.get_bind()

    already_has_basic = (
        sa.select(sa.literal(1))
        .select_from(permission_grant)
        .where(
            permission_grant.c.group_id == user_group.c.id,
            permission_grant.c.permission == "basic",
        )
        .exists()
    )

    groups_needing_basic = sa.select(
        user_group.c.id,
        sa.literal("basic").label("permission"),
        sa.literal("SYSTEM").label("grant_source"),
        sa.literal(False).label("is_deleted"),
    ).where(
        user_group.c.is_default == sa.false(),
        ~already_has_basic,
    )

    conn.execute(
        permission_grant.insert().from_select(
            ["group_id", "permission", "grant_source", "is_deleted"],
            groups_needing_basic,
        )
    )


def downgrade() -> None:
    conn = op.get_bind()

    non_default_group_ids = sa.select(user_group.c.id).where(
        user_group.c.is_default == sa.false()
    )

    conn.execute(
        permission_grant.delete().where(
            permission_grant.c.permission == "basic",
            permission_grant.c.grant_source == "SYSTEM",
            permission_grant.c.group_id.in_(non_default_group_ids),
        )
    )


================================================
FILE: backend/alembic/versions/b4ef3ae0bf6e_add_user_oauth_token_to_slack_bot.py
================================================
"""add_user_oauth_token_to_slack_bot

Revision ID: b4ef3ae0bf6e
Revises: 505c488f6662
Create Date: 2025-08-26 17:47:41.788462

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "b4ef3ae0bf6e"
down_revision = "505c488f6662"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add user_token column to slack_bot table
    op.add_column("slack_bot", sa.Column("user_token", sa.LargeBinary(), nullable=True))


def downgrade() -> None:
    # Remove user_token column from slack_bot table
    op.drop_column("slack_bot", "user_token")


================================================
FILE: backend/alembic/versions/b51c6844d1df_seed_memory_tool.py
================================================
"""seed_memory_tool and add enable_memory_tool to user

Revision ID: b51c6844d1df
Revises: 93c15d6a6fbb
Create Date: 2026-02-11 00:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "b51c6844d1df"
down_revision = "93c15d6a6fbb"
branch_labels = None
depends_on = None


MEMORY_TOOL = {
    "name": "MemoryTool",
    "display_name": "Add Memory",
    "description": "Save memories about the user for future conversations.",
    "in_code_tool_id": "MemoryTool",
    "enabled": True,
}


def upgrade() -> None:
    conn = op.get_bind()

    existing = conn.execute(
        sa.text(
            "SELECT in_code_tool_id FROM tool WHERE in_code_tool_id = :in_code_tool_id"
        ),
        {"in_code_tool_id": MEMORY_TOOL["in_code_tool_id"]},
    ).fetchone()

    if existing:
        conn.execute(
            sa.text(
                """
                UPDATE tool
                SET name = :name,
                    display_name = :display_name,
                    description = :description
                WHERE in_code_tool_id = :in_code_tool_id
                """
            ),
            MEMORY_TOOL,
        )
    else:
        conn.execute(
            sa.text(
                """
                INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
                VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
                """
            ),
            MEMORY_TOOL,
        )

    op.add_column(
        "user",
        sa.Column(
            "enable_memory_tool",
            sa.Boolean(),
            nullable=False,
            server_default=sa.true(),
        ),
    )


def downgrade() -> None:
    op.drop_column("user", "enable_memory_tool")

    conn = op.get_bind()
    conn.execute(
        sa.text("DELETE FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
        {"in_code_tool_id": MEMORY_TOOL["in_code_tool_id"]},
    )


================================================
FILE: backend/alembic/versions/b558f51620b4_pause_finished_user_file_connectors.py
================================================
"""Pause finished user file connectors

Revision ID: b558f51620b4
Revises: 90e3b9af7da4
Create Date: 2025-08-15 17:17:02.456704

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "b558f51620b4"
down_revision = "90e3b9af7da4"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Set all user file connector credential pairs with ACTIVE status to PAUSED
    # This ensures user files don't continue to run indexing tasks after processing
    op.execute(
        """
        UPDATE connector_credential_pair
        SET status = 'PAUSED'
        WHERE is_user_file = true
        AND status = 'ACTIVE'
        """
    )


def downgrade() -> None:
    pass


================================================
FILE: backend/alembic/versions/b5c4d7e8f9a1_add_hierarchy_node_cc_pair_table.py
================================================
"""add hierarchy_node_by_connector_credential_pair table

Revision ID: b5c4d7e8f9a1
Revises: a3b8d9e2f1c4
Create Date: 2026-03-04

"""

import sqlalchemy as sa
from alembic import op

revision = "b5c4d7e8f9a1"
down_revision = "a3b8d9e2f1c4"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "hierarchy_node_by_connector_credential_pair",
        sa.Column("hierarchy_node_id", sa.Integer(), nullable=False),
        sa.Column("connector_id", sa.Integer(), nullable=False),
        sa.Column("credential_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["hierarchy_node_id"],
            ["hierarchy_node.id"],
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["connector_id", "credential_id"],
            [
                "connector_credential_pair.connector_id",
                "connector_credential_pair.credential_id",
            ],
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("hierarchy_node_id", "connector_id", "credential_id"),
    )
    op.create_index(
        "ix_hierarchy_node_cc_pair_connector_credential",
        "hierarchy_node_by_connector_credential_pair",
        ["connector_id", "credential_id"],
    )


def downgrade() -> None:
    op.drop_index(
        "ix_hierarchy_node_cc_pair_connector_credential",
        table_name="hierarchy_node_by_connector_credential_pair",
    )
    op.drop_table("hierarchy_node_by_connector_credential_pair")


================================================
FILE: backend/alembic/versions/b728689f45b1_rename_persona_is_visible_to_is_listed_.py
================================================
"""rename persona is_visible to is_listed and featured to is_featured

Revision ID: b728689f45b1
Revises: 689433b0d8de
Create Date: 2026-03-23 12:36:26.607305

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "b728689f45b1"
down_revision = "689433b0d8de"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.alter_column("persona", "is_visible", new_column_name="is_listed")
    op.alter_column("persona", "featured", new_column_name="is_featured")


def downgrade() -> None:
    op.alter_column("persona", "is_listed", new_column_name="is_visible")
    op.alter_column("persona", "is_featured", new_column_name="featured")


================================================
FILE: backend/alembic/versions/b72ed7a5db0e_remove_description_from_starter_messages.py
================================================
"""remove description from starter messages

Revision ID: b72ed7a5db0e
Revises: 33cb72ea4d80
Create Date: 2024-11-03 15:55:28.944408

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "b72ed7a5db0e"
down_revision = "33cb72ea4d80"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.execute(
        sa.text(
            """
            UPDATE persona
            SET starter_messages = (
                SELECT jsonb_agg(elem - 'description')
                FROM jsonb_array_elements(starter_messages) elem
            )
            WHERE starter_messages IS NOT NULL
              AND jsonb_typeof(starter_messages) = 'array'
            """
        )
    )


def downgrade() -> None:
    op.execute(
        sa.text(
            """
            UPDATE persona
            SET starter_messages = (
                SELECT jsonb_agg(elem || '{"description": ""}')
                FROM jsonb_array_elements(starter_messages) elem
            )
            WHERE starter_messages IS NOT NULL
              AND jsonb_typeof(starter_messages) = 'array'
            """
        )
    )


================================================
FILE: backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py
================================================
"""Add checkpointing/failure handling

Revision ID: b7a7eee5aa15
Revises: f39c5794c10a
Create Date: 2025-01-24 15:17:36.763172

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "b7a7eee5aa15"
down_revision = "f39c5794c10a"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "index_attempt",
        sa.Column("checkpoint_pointer", sa.String(), nullable=True),
    )
    op.add_column(
        "index_attempt",
        sa.Column("poll_range_start", sa.DateTime(timezone=True), nullable=True),
    )
    op.add_column(
        "index_attempt",
        sa.Column("poll_range_end", sa.DateTime(timezone=True), nullable=True),
    )

    op.create_index(
        "ix_index_attempt_cc_pair_settings_poll",
        "index_attempt",
        [
            "connector_credential_pair_id",
            "search_settings_id",
            "status",
            sa.text("time_updated DESC"),
        ],
    )

    # Drop the old IndexAttemptError table
    op.drop_index("index_attempt_id", table_name="index_attempt_errors")
    op.drop_table("index_attempt_errors")

    # Create the new version of the table
    op.create_table(
        "index_attempt_errors",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("index_attempt_id", sa.Integer(), nullable=False),
        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
        sa.Column("document_id", sa.String(), nullable=True),
        sa.Column("document_link", sa.String(), nullable=True),
        sa.Column("entity_id", sa.String(), nullable=True),
        sa.Column("failed_time_range_start", sa.DateTime(timezone=True), nullable=True),
        sa.Column("failed_time_range_end", sa.DateTime(timezone=True), nullable=True),
        sa.Column("failure_message", sa.Text(), nullable=False),
        sa.Column("is_resolved", sa.Boolean(), nullable=False, default=False),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["index_attempt_id"],
            ["index_attempt.id"],
        ),
        sa.ForeignKeyConstraint(
            ["connector_credential_pair_id"],
            ["connector_credential_pair.id"],
        ),
    )


def downgrade() -> None:
    op.execute("SET lock_timeout = '5s'")

    # try a few times to drop the table, this has been observed to fail due to other locks
    # blocking the drop
    NUM_TRIES = 10
    for i in range(NUM_TRIES):
        try:
            op.drop_table("index_attempt_errors")
            break
        except Exception as e:
            if i == NUM_TRIES - 1:
                raise e
            print(f"Error dropping table: {e}. Retrying...")

    op.execute("SET lock_timeout = DEFAULT")

    # Recreate the old IndexAttemptError table
    op.create_table(
        "index_attempt_errors",
        sa.Column("id", sa.Integer(), primary_key=True),
        sa.Column("index_attempt_id", sa.Integer(), nullable=True),
        sa.Column("batch", sa.Integer(), nullable=True),
        sa.Column("doc_summaries", postgresql.JSONB(), nullable=False),
        sa.Column("error_msg", sa.Text(), nullable=True),
        sa.Column("traceback", sa.Text(), nullable=True),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
        ),
        sa.ForeignKeyConstraint(
            ["index_attempt_id"],
            ["index_attempt.id"],
        ),
    )

    op.create_index(
        "index_attempt_id",
        "index_attempt_errors",
        ["time_created"],
    )

    op.drop_index("ix_index_attempt_cc_pair_settings_poll")
    op.drop_column("index_attempt", "checkpoint_pointer")
    op.drop_column("index_attempt", "poll_range_start")
    op.drop_column("index_attempt", "poll_range_end")


================================================
FILE: backend/alembic/versions/b7bcc991d722_assign_users_to_default_groups.py
================================================
"""assign_users_to_default_groups

Revision ID: b7bcc991d722
Revises: 03d085c5c38d
Create Date: 2026-03-25 16:30:39.529301

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import insert as pg_insert


# revision identifiers, used by Alembic.
revision = "b7bcc991d722"
down_revision = "03d085c5c38d"
branch_labels = None
depends_on = None

# The no-auth placeholder user must NOT be assigned to default groups.
# A database trigger (migrate_no_auth_data_to_user) will try to DELETE this
# user when the first real user registers; group membership rows would cause
# an FK violation on that DELETE.
NO_AUTH_PLACEHOLDER_USER_UUID = "00000000-0000-0000-0000-000000000001"

# Reflect table structures for use in DML
user_group_table = sa.table(
    "user_group",
    sa.column("id", sa.Integer),
    sa.column("name", sa.String),
    sa.column("is_default", sa.Boolean),
)

user_table = sa.table(
    "user",
    sa.column("id", sa.Uuid),
    sa.column("role", sa.String),
    sa.column("account_type", sa.String),
    sa.column("is_active", sa.Boolean),
)

user__user_group_table = sa.table(
    "user__user_group",
    sa.column("user_group_id", sa.Integer),
    sa.column("user_id", sa.Uuid),
)


def upgrade() -> None:
    conn = op.get_bind()

    # Look up default group IDs
    admin_row = conn.execute(
        sa.select(user_group_table.c.id).where(
            user_group_table.c.name == "Admin",
            user_group_table.c.is_default == True,  # noqa: E712
        )
    ).fetchone()

    basic_row = conn.execute(
        sa.select(user_group_table.c.id).where(
            user_group_table.c.name == "Basic",
            user_group_table.c.is_default == True,  # noqa: E712
        )
    ).fetchone()

    if admin_row is None:
        raise RuntimeError(
            "Default 'Admin' group not found. "
            "Ensure migration 977e834c1427 (seed_default_groups) ran successfully."
        )

    if basic_row is None:
        raise RuntimeError(
            "Default 'Basic' group not found. "
            "Ensure migration 977e834c1427 (seed_default_groups) ran successfully."
        )

    # Users with role=admin → Admin group
    # Include inactive users so reactivation doesn't require reconciliation.
    # Exclude non-human account types (mirrors assign_user_to_default_groups logic).
    admin_users = sa.select(
        sa.literal(admin_row[0]).label("user_group_id"),
        user_table.c.id.label("user_id"),
    ).where(
        user_table.c.role == "ADMIN",
        user_table.c.account_type.notin_(["BOT", "EXT_PERM_USER", "ANONYMOUS"]),
        user_table.c.id != NO_AUTH_PLACEHOLDER_USER_UUID,
    )
    op.execute(
        pg_insert(user__user_group_table)
        .from_select(["user_group_id", "user_id"], admin_users)
        .on_conflict_do_nothing(index_elements=["user_group_id", "user_id"])
    )

    # STANDARD users (non-admin) and SERVICE_ACCOUNT users (role=basic) → Basic group
    # Include inactive users so reactivation doesn't require reconciliation.
    basic_users = sa.select(
        sa.literal(basic_row[0]).label("user_group_id"),
        user_table.c.id.label("user_id"),
    ).where(
        user_table.c.account_type.notin_(["BOT", "EXT_PERM_USER", "ANONYMOUS"]),
        user_table.c.id != NO_AUTH_PLACEHOLDER_USER_UUID,
        sa.or_(
            sa.and_(
                user_table.c.account_type == "STANDARD",
                user_table.c.role != "ADMIN",
            ),
            sa.and_(
                user_table.c.account_type == "SERVICE_ACCOUNT",
                user_table.c.role == "BASIC",
            ),
        ),
    )
    op.execute(
        pg_insert(user__user_group_table)
        .from_select(["user_group_id", "user_id"], basic_users)
        .on_conflict_do_nothing(index_elements=["user_group_id", "user_id"])
    )


def downgrade() -> None:
    # Group memberships are left in place — removing them risks
    # deleting memberships that existed before this migration.
    pass


================================================
FILE: backend/alembic/versions/b7c2b63c4a03_add_background_reindex_enabled_field.py
================================================
"""add background_reindex_enabled field

Revision ID: b7c2b63c4a03
Revises: f11b408e39d3
Create Date: 2024-03-26 12:34:56.789012

"""

from alembic import op
import sqlalchemy as sa

from onyx.db.enums import EmbeddingPrecision


# revision identifiers, used by Alembic.
revision = "b7c2b63c4a03"
down_revision = "f11b408e39d3"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add background_reindex_enabled column with default value of True
    op.add_column(
        "search_settings",
        sa.Column(
            "background_reindex_enabled",
            sa.Boolean(),
            nullable=False,
            server_default="true",
        ),
    )

    # Add embedding_precision column with default value of FLOAT
    op.add_column(
        "search_settings",
        sa.Column(
            "embedding_precision",
            sa.Enum(EmbeddingPrecision, native_enum=False),
            nullable=False,
            server_default=EmbeddingPrecision.FLOAT.name,
        ),
    )

    # Add reduced_dimension column with default value of None
    op.add_column(
        "search_settings",
        sa.Column("reduced_dimension", sa.Integer(), nullable=True),
    )


def downgrade() -> None:
    # Remove the background_reindex_enabled column
    op.drop_column("search_settings", "background_reindex_enabled")
    op.drop_column("search_settings", "embedding_precision")
    op.drop_column("search_settings", "reduced_dimension")


================================================
FILE: backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py
================================================
"""adjust prompt length

Revision ID: b7ec9b5b505f
Revises: abbfec3a5ac5
Create Date: 2025-09-10 18:51:15.629197

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "b7ec9b5b505f"
down_revision = "abbfec3a5ac5"
branch_labels = None
depends_on = None


MAX_PROMPT_LENGTH = 5_000_000


def upgrade() -> None:
    # NOTE: need to run this since the previous migration PREVIOUSLY set the length to 8000
    op.alter_column(
        "persona",
        "system_prompt",
        existing_type=sa.String(length=8000),
        type_=sa.String(length=MAX_PROMPT_LENGTH),
        existing_nullable=False,
    )
    op.alter_column(
        "persona",
        "task_prompt",
        existing_type=sa.String(length=8000),
        type_=sa.String(length=MAX_PROMPT_LENGTH),
        existing_nullable=False,
    )


def downgrade() -> None:
    # Downgrade not necessary
    pass


================================================
FILE: backend/alembic/versions/b85f02ec1308_fix_file_type_migration.py
================================================
"""fix-file-type-migration

Revision ID: b85f02ec1308
Revises: a3bfd0d64902
Create Date: 2024-05-31 18:09:26.658164

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "b85f02ec1308"
down_revision = "a3bfd0d64902"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.execute(
        """
        UPDATE file_store
        SET file_origin = UPPER(file_origin)
    """
    )


def downgrade() -> None:
    # Let's not break anything on purpose :)
    pass


================================================
FILE: backend/alembic/versions/b896bbd0d5a7_backfill_is_internet_data_to_false.py
================================================
"""backfill is_internet data to False

Revision ID: b896bbd0d5a7
Revises: 44f856ae2a4a
Create Date: 2024-07-16 15:21:05.718571

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "b896bbd0d5a7"
down_revision = "44f856ae2a4a"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.execute("UPDATE search_doc SET is_internet = FALSE WHERE is_internet IS NULL")


def downgrade() -> None:
    pass


================================================
FILE: backend/alembic/versions/b8c9d0e1f2a3_drop_milestone_table.py
================================================
"""Drop milestone table

Revision ID: b8c9d0e1f2a3
Revises: a2b3c4d5e6f7
Create Date: 2025-12-18

"""

from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "b8c9d0e1f2a3"
down_revision = "a2b3c4d5e6f7"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_table("milestone")


def downgrade() -> None:
    op.create_table(
        "milestone",
        sa.Column("id", sa.UUID(), nullable=False),
        sa.Column("tenant_id", sa.String(), nullable=True),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column("event_type", sa.String(), nullable=False),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("event_tracker", postgresql.JSONB(), nullable=True),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("event_type", name="uq_milestone_event_type"),
    )


================================================
FILE: backend/alembic/versions/ba98eba0f66a_add_support_for_litellm_proxy_in_.py
================================================
"""add support for litellm proxy in reranking

Revision ID: ba98eba0f66a
Revises: bceb1e139447
Create Date: 2024-09-06 10:36:04.507332

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "ba98eba0f66a"
down_revision = "bceb1e139447"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "search_settings", sa.Column("rerank_api_url", sa.String(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("search_settings", "rerank_api_url")


================================================
FILE: backend/alembic/versions/baf71f781b9e_add_llm_model_version_override_to_.py
================================================
"""Add llm_model_version_override to Persona

Revision ID: baf71f781b9e
Revises: 50b683a8295c
Create Date: 2023-12-06 21:56:50.286158

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "baf71f781b9e"
down_revision = "50b683a8295c"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "persona",
        sa.Column("llm_model_version_override", sa.String(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("persona", "llm_model_version_override")


================================================
FILE: backend/alembic/versions/bc9771dccadf_create_usage_reports_table.py
================================================
"""create usage reports table

Revision ID: bc9771dccadf
Revises: 0568ccf46a6b
Create Date: 2024-06-18 10:04:26.800282

"""

from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy

# revision identifiers, used by Alembic.
revision = "bc9771dccadf"
down_revision = "0568ccf46a6b"

branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "usage_reports",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("report_name", sa.String(), nullable=False),
        sa.Column(
            "requestor_user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("period_from", sa.DateTime(timezone=True), nullable=True),
        sa.Column("period_to", sa.DateTime(timezone=True), nullable=True),
        sa.ForeignKeyConstraint(
            ["report_name"],
            ["file_store.file_name"],
        ),
        sa.ForeignKeyConstraint(
            ["requestor_user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )


def downgrade() -> None:
    op.drop_table("usage_reports")


================================================
FILE: backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py
================================================
"""Add base_url to CloudEmbeddingProvider

Revision ID: bceb1e139447
Revises: a3795dce87be
Create Date: 2024-08-28 17:00:52.554580

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "bceb1e139447"
down_revision = "a3795dce87be"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "embedding_provider", sa.Column("api_url", sa.String(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("embedding_provider", "api_url")


================================================
FILE: backend/alembic/versions/bd2921608c3a_non_nullable_default_persona.py
================================================
"""non nullable default persona

Revision ID: bd2921608c3a
Revises: 797089dfb4d2
Create Date: 2024-09-20 10:28:37.992042

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "bd2921608c3a"
down_revision = "797089dfb4d2"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Set existing NULL values to False
    op.execute(
        "UPDATE persona SET is_default_persona = FALSE WHERE is_default_persona IS NULL"
    )

    # Alter the column to be not nullable with a default value of False
    op.alter_column(
        "persona",
        "is_default_persona",
        existing_type=sa.Boolean(),
        nullable=False,
        server_default=sa.text("false"),
    )


def downgrade() -> None:
    # Revert the changes
    op.alter_column(
        "persona",
        "is_default_persona",
        existing_type=sa.Boolean(),
        nullable=True,
        server_default=None,
    )


================================================
FILE: backend/alembic/versions/bd7c3bf8beba_migrate_agent_responses_to_research_.py
================================================
"""migrate_agent_sub_questions_to_research_iterations

Revision ID: bd7c3bf8beba
Revises: f8a9b2c3d4e5
Create Date: 2025-08-18 11:33:27.098287

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "bd7c3bf8beba"
down_revision = "f8a9b2c3d4e5"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Get connection to execute raw SQL
    connection = op.get_bind()

    # First, insert data into research_agent_iteration table
    # This creates one iteration record per primary_question_id using the earliest time_created
    connection.execute(
        sa.text(
            """
            INSERT INTO research_agent_iteration (primary_question_id, created_at, iteration_nr, purpose, reasoning)
            SELECT
                primary_question_id,
                MIN(time_created) as created_at,
                1 as iteration_nr,
                'Generating and researching subquestions' as purpose,
                '(No previous reasoning)' as reasoning
            FROM agent__sub_question
            JOIN chat_message on agent__sub_question.primary_question_id = chat_message.id
            WHERE primary_question_id IS NOT NULL
                AND chat_message.is_agentic = true
            GROUP BY primary_question_id
            ON CONFLICT DO NOTHING;
        """
        )
    )

    # Then, insert data into research_agent_iteration_sub_step table
    # This migrates each sub-question as a sub-step
    connection.execute(
        sa.text(
            """
            INSERT INTO research_agent_iteration_sub_step (
                primary_question_id,
                iteration_nr,
                iteration_sub_step_nr,
                created_at,
                sub_step_instructions,
                sub_step_tool_id,
                sub_answer,
                cited_doc_results
            )
            SELECT
                primary_question_id,
                1 as iteration_nr,
                level_question_num as iteration_sub_step_nr,
                time_created as created_at,
                sub_question as sub_step_instructions,
                1 as sub_step_tool_id,
                sub_answer,
                sub_question_doc_results as cited_doc_results
            FROM agent__sub_question
            JOIN chat_message on agent__sub_question.primary_question_id = chat_message.id
            WHERE chat_message.is_agentic = true
            AND primary_question_id IS NOT NULL
            ON CONFLICT DO NOTHING;
        """
        )
    )

    # Update chat_message records: set legacy agentic type and answer purpose for existing agentic messages
    connection.execute(
        sa.text(
            """
            UPDATE chat_message
            SET research_answer_purpose = 'ANSWER'
            WHERE is_agentic = true
            AND research_type IS NULL and
                message_type = 'ASSISTANT';
        """
        )
    )
    connection.execute(
        sa.text(
            """
            UPDATE chat_message
            SET research_type = 'LEGACY_AGENTIC'
            WHERE is_agentic = true
            AND research_type IS NULL;
        """
        )
    )


def downgrade() -> None:
    # Get connection to execute raw SQL
    connection = op.get_bind()

    # Note: This downgrade removes all research agent iteration data
    # There's no way to perfectly restore the original agent__sub_question data
    # if it was deleted after this migration

    # Delete all research_agent_iteration_sub_step records that were migrated
    connection.execute(
        sa.text(
            """
            DELETE FROM research_agent_iteration_sub_step
            USING chat_message
            WHERE research_agent_iteration_sub_step.primary_question_id = chat_message.id
            AND chat_message.research_type = 'LEGACY_AGENTIC';
        """
        )
    )

    # Delete all research_agent_iteration records that were migrated
    connection.execute(
        sa.text(
            """
            DELETE FROM research_agent_iteration
            USING chat_message
            WHERE research_agent_iteration.primary_question_id = chat_message.id
            AND chat_message.research_type = 'LEGACY_AGENTIC';
        """
        )
    )

    # Revert chat_message updates: clear research fields for legacy agentic messages
    connection.execute(
        sa.text(
            """
            UPDATE chat_message
            SET research_type = NULL,
                research_answer_purpose = NULL
            WHERE is_agentic = true
            AND research_type = 'LEGACY_AGENTIC'
            AND message_type = 'ASSISTANT';
        """
        )
    )


================================================
FILE: backend/alembic/versions/be2ab2aa50ee_fix_capitalization.py
================================================
"""fix_capitalization

Revision ID: be2ab2aa50ee
Revises: 369644546676
Create Date: 2025-01-10 13:13:26.228960

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "be2ab2aa50ee"
down_revision = "369644546676"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.execute(
        """
        UPDATE document
        SET
            external_user_group_ids = ARRAY(
                SELECT LOWER(unnest(external_user_group_ids))
            ),
            last_modified = NOW()
        WHERE
            external_user_group_ids IS NOT NULL
            AND external_user_group_ids::text[] <> ARRAY(
                SELECT LOWER(unnest(external_user_group_ids))
            )::text[]
    """
    )


def downgrade() -> None:
    # No way to cleanly persist the bad state through an upgrade/downgrade
    # cycle, so we just pass
    pass


================================================
FILE: backend/alembic/versions/be87a654d5af_persona_new_default_model_configuration_.py
================================================
"""Persona new default model configuration id column

Revision ID: be87a654d5af
Revises: e7f8a9b0c1d2
Create Date: 2026-01-30 11:14:17.306275

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "be87a654d5af"
down_revision = "e7f8a9b0c1d2"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "persona",
        sa.Column("default_model_configuration_id", sa.Integer(), nullable=True),
    )
    op.create_foreign_key(
        "fk_persona_default_model_configuration_id",
        "persona",
        "model_configuration",
        ["default_model_configuration_id"],
        ["id"],
        ondelete="SET NULL",
    )


def downgrade() -> None:
    op.drop_constraint(
        "fk_persona_default_model_configuration_id", "persona", type_="foreignkey"
    )

    op.drop_column("persona", "default_model_configuration_id")


================================================
FILE: backend/alembic/versions/bf7a81109301_delete_input_prompts.py
================================================
"""delete_input_prompts

Revision ID: bf7a81109301
Revises: f7a894b06d02
Create Date: 2024-12-09 12:00:49.884228

"""

from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy


# revision identifiers, used by Alembic.
revision = "bf7a81109301"
down_revision = "f7a894b06d02"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_table("inputprompt__user")
    op.drop_table("inputprompt")


def downgrade() -> None:
    op.create_table(
        "inputprompt",
        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
        sa.Column("prompt", sa.String(), nullable=False),
        sa.Column("content", sa.String(), nullable=False),
        sa.Column("active", sa.Boolean(), nullable=False),
        sa.Column("is_public", sa.Boolean(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "inputprompt__user",
        sa.Column("input_prompt_id", sa.Integer(), nullable=False),
        sa.Column("user_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["input_prompt_id"],
            ["inputprompt.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["inputprompt.id"],
        ),
        sa.PrimaryKeyConstraint("input_prompt_id", "user_id"),
    )


================================================
FILE: backend/alembic/versions/c0aab6edb6dd_delete_workspace.py
================================================
"""delete workspace

Revision ID: c0aab6edb6dd
Revises: 35e518e0ddf4
Create Date: 2024-12-17 14:37:07.660631

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "c0aab6edb6dd"
down_revision = "35e518e0ddf4"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.execute(
        """
    UPDATE connector
    SET connector_specific_config = connector_specific_config - 'workspace'
    WHERE source = 'SLACK'
    """
    )


def downgrade() -> None:
    import json
    from sqlalchemy import text
    from slack_sdk import WebClient

    conn = op.get_bind()

    # Fetch all Slack credentials
    creds_result = conn.execute(
        text("SELECT id, credential_json FROM credential WHERE source = 'SLACK'")
    )
    all_slack_creds = creds_result.fetchall()
    if not all_slack_creds:
        return

    for cred_row in all_slack_creds:
        credential_id, credential_json = cred_row

        credential_json = (
            credential_json.tobytes().decode("utf-8")
            if isinstance(credential_json, memoryview)
            else credential_json.decode("utf-8")
        )
        credential_data = json.loads(credential_json)
        slack_bot_token = credential_data.get("slack_bot_token")
        if not slack_bot_token:
            print(
                f"No slack_bot_token found for credential {credential_id}. "
                "Your Slack connector will not function until you upgrade and provide a valid token."
            )
            continue

        client = WebClient(token=slack_bot_token)
        try:
            auth_response = client.auth_test()
            workspace = auth_response["url"].split("//")[1].split(".")[0]

            # Update only the connectors linked to this credential
            # (and which are Slack connectors).
            op.execute(
                f"""
                UPDATE connector AS c
                SET connector_specific_config = jsonb_set(
                    connector_specific_config,
                    '{{workspace}}',
                    to_jsonb('{workspace}'::text)
                )
                FROM connector_credential_pair AS ccp
                WHERE ccp.connector_id = c.id
                  AND c.source = 'SLACK'
                  AND ccp.credential_id = {credential_id}
            """
            )
        except Exception:
            print(
                f"We were unable to get the workspace url for your Slack Connector with id {credential_id}."
            )
            print("This connector will no longer work until you upgrade.")
            continue


================================================
FILE: backend/alembic/versions/c0c937d5c9e5_llm_provider_deprecate_fields.py
================================================
"""llm provider deprecate fields

Revision ID: c0c937d5c9e5
Revises: 8ffcc2bcfc11
Create Date: 2026-02-25 17:35:46.125102

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "c0c937d5c9e5"
down_revision = "8ffcc2bcfc11"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Make default_model_name nullable (was NOT NULL)
    op.alter_column(
        "llm_provider",
        "default_model_name",
        existing_type=sa.String(),
        nullable=True,
    )

    # Drop unique constraint on is_default_provider (defaults now tracked via LLMModelFlow)
    op.drop_constraint(
        "llm_provider_is_default_provider_key",
        "llm_provider",
        type_="unique",
    )

    # Remove server_default from is_default_vision_provider (was server_default=false())
    op.alter_column(
        "llm_provider",
        "is_default_vision_provider",
        existing_type=sa.Boolean(),
        server_default=None,
    )


def downgrade() -> None:
    # Restore default_model_name to NOT NULL (set empty string for any NULLs first)
    op.execute(
        "UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL"
    )
    op.alter_column(
        "llm_provider",
        "default_model_name",
        existing_type=sa.String(),
        nullable=False,
    )

    # Restore unique constraint on is_default_provider
    op.create_unique_constraint(
        "llm_provider_is_default_provider_key",
        "llm_provider",
        ["is_default_provider"],
    )

    # Restore server_default for is_default_vision_provider
    op.alter_column(
        "llm_provider",
        "is_default_vision_provider",
        existing_type=sa.Boolean(),
        server_default=sa.false(),
    )


================================================
FILE: backend/alembic/versions/c0fd6e4da83a_add_recent_assistants.py
================================================
"""add recent assistants

Revision ID: c0fd6e4da83a
Revises: b72ed7a5db0e
Create Date: 2024-11-03 17:28:54.916618

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "c0fd6e4da83a"
down_revision = "b72ed7a5db0e"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column(
            "recent_assistants", postgresql.JSONB(), server_default="[]", nullable=False
        ),
    )


def downgrade() -> None:
    op.drop_column("user", "recent_assistants")


================================================
FILE: backend/alembic/versions/c18cdf4b497e_add_standard_answer_tables.py
================================================
"""Add standard_answer tables

Revision ID: c18cdf4b497e
Revises: 3a7802814195
Create Date: 2024-06-06 15:15:02.000648

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "c18cdf4b497e"
down_revision = "3a7802814195"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "standard_answer",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("keyword", sa.String(), nullable=False),
        sa.Column("answer", sa.String(), nullable=False),
        sa.Column("active", sa.Boolean(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("keyword"),
    )
    op.create_table(
        "standard_answer_category",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("name"),
    )
    op.create_table(
        "standard_answer__standard_answer_category",
        sa.Column("standard_answer_id", sa.Integer(), nullable=False),
        sa.Column("standard_answer_category_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["standard_answer_category_id"],
            ["standard_answer_category.id"],
        ),
        sa.ForeignKeyConstraint(
            ["standard_answer_id"],
            ["standard_answer.id"],
        ),
        sa.PrimaryKeyConstraint("standard_answer_id", "standard_answer_category_id"),
    )
    op.create_table(
        "slack_bot_config__standard_answer_category",
        sa.Column("slack_bot_config_id", sa.Integer(), nullable=False),
        sa.Column("standard_answer_category_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["slack_bot_config_id"],
            ["slack_bot_config.id"],
        ),
        sa.ForeignKeyConstraint(
            ["standard_answer_category_id"],
            ["standard_answer_category.id"],
        ),
        sa.PrimaryKeyConstraint("slack_bot_config_id", "standard_answer_category_id"),
    )

    op.add_column(
        "chat_session", sa.Column("slack_thread_id", sa.String(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("chat_session", "slack_thread_id")

    op.drop_table("slack_bot_config__standard_answer_category")
    op.drop_table("standard_answer__standard_answer_category")
    op.drop_table("standard_answer_category")
    op.drop_table("standard_answer")


================================================
FILE: backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
================================================
"""add_deep_research_tool

Revision ID: c1d2e3f4a5b6
Revises: b8c9d0e1f2a3
Create Date: 2025-12-18 16:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "c1d2e3f4a5b6"
down_revision = "b8c9d0e1f2a3"
branch_labels = None
depends_on = None


DEEP_RESEARCH_TOOL = {
    "name": "ResearchAgent",
    "display_name": "Research Agent",
    "description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
    "in_code_tool_id": "ResearchAgent",
}


def upgrade() -> None:
    conn = op.get_bind()
    conn.execute(
        sa.text(
            """
            INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
            VALUES (:name, :display_name, :description, :in_code_tool_id, false)
            """
        ),
        DEEP_RESEARCH_TOOL,
    )


def downgrade() -> None:
    conn = op.get_bind()
    conn.execute(
        sa.text(
            """
            DELETE FROM tool
            WHERE in_code_tool_id = :in_code_tool_id
            """
        ),
        {"in_code_tool_id": DEEP_RESEARCH_TOOL["in_code_tool_id"]},
    )


================================================
FILE: backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py
================================================
"""Add index_attempt_errors table

Revision ID: c5b692fa265c
Revises: 4a951134c801
Create Date: 2024-08-08 14:06:39.581972

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "c5b692fa265c"
down_revision = "4a951134c801"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "index_attempt_errors",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("index_attempt_id", sa.Integer(), nullable=True),
        sa.Column("batch", sa.Integer(), nullable=True),
        sa.Column(
            "doc_summaries",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=False,
        ),
        sa.Column("error_msg", sa.Text(), nullable=True),
        sa.Column("traceback", sa.Text(), nullable=True),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["index_attempt_id"],
            ["index_attempt.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_index(
        "index_attempt_id",
        "index_attempt_errors",
        ["time_created"],
        unique=False,
    )
    # ### end Alembic commands ###


def downgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_index("index_attempt_id", table_name="index_attempt_errors")
    op.drop_table("index_attempt_errors")
    # ### end Alembic commands ###


================================================
FILE: backend/alembic/versions/c5eae4a75a1b_add_chat_message__standard_answer_table.py
================================================
"""Add chat_message__standard_answer table

Revision ID: c5eae4a75a1b
Revises: 0f7ff6d75b57
Create Date: 2025-01-15 14:08:49.688998

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "c5eae4a75a1b"
down_revision = "0f7ff6d75b57"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "chat_message__standard_answer",
        sa.Column("chat_message_id", sa.Integer(), nullable=False),
        sa.Column("standard_answer_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["chat_message_id"],
            ["chat_message.id"],
        ),
        sa.ForeignKeyConstraint(
            ["standard_answer_id"],
            ["standard_answer.id"],
        ),
        sa.PrimaryKeyConstraint("chat_message_id", "standard_answer_id"),
    )


def downgrade() -> None:
    op.drop_table("chat_message__standard_answer")


================================================
FILE: backend/alembic/versions/c7bf5721733e_add_has_been_indexed_to_.py
================================================
"""Add has_been_indexed to DocumentByConnectorCredentialPair

Revision ID: c7bf5721733e
Revises: fec3db967bf7
Create Date: 2025-01-13 12:39:05.831693

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "c7bf5721733e"
down_revision = "027381bce97c"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # assume all existing rows have been indexed, no better approach
    op.add_column(
        "document_by_connector_credential_pair",
        sa.Column("has_been_indexed", sa.Boolean(), nullable=True),
    )
    op.execute(
        "UPDATE document_by_connector_credential_pair SET has_been_indexed = TRUE"
    )
    op.alter_column(
        "document_by_connector_credential_pair",
        "has_been_indexed",
        nullable=False,
    )

    # Add index to optimize get_document_counts_for_cc_pairs query pattern
    op.create_index(
        "idx_document_cc_pair_counts",
        "document_by_connector_credential_pair",
        ["connector_id", "credential_id", "has_been_indexed"],
        unique=False,
    )


def downgrade() -> None:
    # Remove the index first before removing the column
    op.drop_index(
        "idx_document_cc_pair_counts",
        table_name="document_by_connector_credential_pair",
    )
    op.drop_column("document_by_connector_credential_pair", "has_been_indexed")


================================================
FILE: backend/alembic/versions/c7e9f4a3b2d1_add_python_tool.py
================================================
"""add_python_tool

Revision ID: c7e9f4a3b2d1
Revises: 3c9a65f1207f
Create Date: 2025-11-08 00:00:00.000000

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "c7e9f4a3b2d1"
down_revision = "3c9a65f1207f"
branch_labels = None
depends_on = None


def upgrade() -> None:
    """Add PythonTool to built-in tools"""
    conn = op.get_bind()

    conn.execute(
        sa.text(
            """
            INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
            VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
            """
        ),
        {
            "name": "PythonTool",
            # in the UI, call it `Code Interpreter` since this is a well known term for this tool
            "display_name": "Code Interpreter",
            "description": (
                "The Code Interpreter Action allows the assistant to execute "
                "Python code in a secure, isolated environment for data analysis, "
                "computation, visualization, and file processing."
            ),
            "in_code_tool_id": "PythonTool",
            "enabled": True,
        },
    )

    # needed to store files generated by the python tool
    op.add_column(
        "research_agent_iteration_sub_step",
        sa.Column(
            "file_ids",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )


def downgrade() -> None:
    """Remove PythonTool from built-in tools"""
    conn = op.get_bind()

    conn.execute(
        sa.text(
            """
            DELETE FROM tool
            WHERE in_code_tool_id = :in_code_tool_id
            """
        ),
        {
            "in_code_tool_id": "PythonTool",
        },
    )

    op.drop_column("research_agent_iteration_sub_step", "file_ids")


================================================
FILE: backend/alembic/versions/c7f2e1b4a9d3_add_sharing_scope_to_build_session.py
================================================
"""add sharing_scope to build_session

Revision ID: c7f2e1b4a9d3
Revises: 19c0ccb01687
Create Date: 2026-02-17 12:00:00.000000

"""

from alembic import op
import sqlalchemy as sa

revision = "c7f2e1b4a9d3"
down_revision = "19c0ccb01687"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "build_session",
        sa.Column(
            "sharing_scope",
            sa.String(),
            nullable=False,
            server_default="private",
        ),
    )


def downgrade() -> None:
    op.drop_column("build_session", "sharing_scope")


================================================
FILE: backend/alembic/versions/c8a93a2af083_personalization_user_info.py
================================================
"""personalization_user_info

Revision ID: c8a93a2af083
Revises: 6f4f86aef280
Create Date: 2025-10-14 15:59:03.577343

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "c8a93a2af083"
down_revision = "6f4f86aef280"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column("personal_name", sa.String(), nullable=True),
    )
    op.add_column(
        "user",
        sa.Column("personal_role", sa.String(), nullable=True),
    )
    op.add_column(
        "user",
        sa.Column(
            "use_memories",
            sa.Boolean(),
            nullable=False,
            server_default=sa.true(),
        ),
    )

    op.create_table(
        "memory",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
        sa.Column("memory_text", sa.Text(), nullable=False),
        sa.Column("conversation_id", postgresql.UUID(as_uuid=True), nullable=True),
        sa.Column("message_id", sa.Integer(), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
    )

    op.create_index("ix_memory_user_id", "memory", ["user_id"])


def downgrade() -> None:
    op.drop_index("ix_memory_user_id", table_name="memory")
    op.drop_table("memory")

    op.drop_column("user", "use_memories")
    op.drop_column("user", "personal_role")
    op.drop_column("user", "personal_name")


================================================
FILE: backend/alembic/versions/c99d76fcd298_add_nullable_to_persona_id_in_chat_.py
================================================
"""add nullable to persona id in Chat Session

Revision ID: c99d76fcd298
Revises: 5c7fdadae813
Create Date: 2024-07-09 19:27:01.579697

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "c99d76fcd298"
down_revision = "5c7fdadae813"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.alter_column(
        "chat_session", "persona_id", existing_type=sa.INTEGER(), nullable=True
    )


def downgrade() -> None:
    # Delete chat messages and feedback first since they reference chat sessions
    # Get chat messages from sessions with null persona_id
    chat_messages_query = """
        SELECT id
        FROM chat_message
        WHERE chat_session_id IN (
            SELECT id
            FROM chat_session
            WHERE persona_id IS NULL
        )
    """

    # Delete dependent records first
    op.execute(
        f"""
        DELETE FROM document_retrieval_feedback
        WHERE chat_message_id IN (
            {chat_messages_query}
        )
    """
    )
    op.execute(
        f"""
        DELETE FROM chat_message__search_doc
        WHERE chat_message_id IN (
            {chat_messages_query}
        )
    """
    )

    # Delete chat messages
    op.execute(
        """
        DELETE FROM chat_message
        WHERE chat_session_id IN (
            SELECT id
            FROM chat_session
            WHERE persona_id IS NULL
        )
    """
    )

    # Now we can safely delete the chat sessions
    op.execute(
        """
        DELETE FROM chat_session
        WHERE persona_id IS NULL
    """
    )

    op.alter_column(
        "chat_session",
        "persona_id",
        existing_type=sa.INTEGER(),
        nullable=False,
    )


================================================
FILE: backend/alembic/versions/c9e2cd766c29_add_s3_file_store_table.py
================================================
"""modify_file_store_for_external_storage

Revision ID: c9e2cd766c29
Revises: 03bf8be6b53a
Create Date: 2025-06-13 14:02:09.867679

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.orm import Session
from sqlalchemy import text
from typing import cast, Any

from botocore.exceptions import ClientError

from onyx.db._deprecated.pg_file_store import delete_lobj_by_id, read_lobj
from onyx.file_store.file_store import get_s3_file_store
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

# revision identifiers, used by Alembic.
revision = "c9e2cd766c29"
down_revision = "03bf8be6b53a"
branch_labels = None
depends_on = None


def upgrade() -> None:
    try:
        # Modify existing file_store table to support external storage
        op.rename_table("file_store", "file_record")

        # Make lobj_oid nullable (for external storage files)
        op.alter_column("file_record", "lobj_oid", nullable=True)

        # Add external storage columns with generic names
        op.add_column(
            "file_record", sa.Column("bucket_name", sa.String(), nullable=True)
        )
        op.add_column(
            "file_record", sa.Column("object_key", sa.String(), nullable=True)
        )

        # Add timestamps for tracking
        op.add_column(
            "file_record",
            sa.Column(
                "created_at",
                sa.DateTime(timezone=True),
                server_default=sa.func.now(),
                nullable=False,
            ),
        )
        op.add_column(
            "file_record",
            sa.Column(
                "updated_at",
                sa.DateTime(timezone=True),
                server_default=sa.func.now(),
                nullable=False,
            ),
        )

        op.alter_column("file_record", "file_name", new_column_name="file_id")
    except Exception as e:
        if "does not exist" in str(e) or 'relation "file_store" does not exist' in str(
            e
        ):
            print(
                f"Ran into error - {e}. Likely means we had a partial success in the past, continuing..."
            )
        else:
            raise

    print(
        "External storage configured - migrating files from PostgreSQL to external storage..."
    )
    # if we fail midway through this, we'll have a partial success. Running the migration
    # again should allow us to continue.
    _migrate_files_to_external_storage()
    print("File migration completed successfully!")

    # Remove lobj_oid column
    op.drop_column("file_record", "lobj_oid")


def downgrade() -> None:
    """Revert schema changes and migrate files from external storage back to PostgreSQL large objects."""

    print(
        "Reverting to PostgreSQL-backed file store – migrating files from external storage …"
    )

    # 1. Ensure `lobj_oid` exists on the current `file_record` table (nullable for now).
    op.add_column("file_record", sa.Column("lobj_oid", sa.Integer(), nullable=True))

    # 2. Move content from external storage back into PostgreSQL large objects (table is still
    #    called `file_record` so application code continues to work during the copy).
    try:
        _migrate_files_to_postgres()
    except Exception:
        print("Error during downgrade migration, rolling back …")
        op.drop_column("file_record", "lobj_oid")
        raise

    # 3. After migration every row should now have `lobj_oid` populated – mark NOT NULL.
    op.alter_column("file_record", "lobj_oid", nullable=False)

    # 4. Remove columns that are only relevant to external storage.
    op.drop_column("file_record", "updated_at")
    op.drop_column("file_record", "created_at")
    op.drop_column("file_record", "object_key")
    op.drop_column("file_record", "bucket_name")

    # 5. Rename `file_id` back to `file_name` (still on `file_record`).
    op.alter_column("file_record", "file_id", new_column_name="file_name")

    # 6. Finally, rename the table back to its original name expected by the legacy codebase.
    op.rename_table("file_record", "file_store")

    print(
        "Downgrade migration completed – files are now stored inside PostgreSQL again."
    )


# -----------------------------------------------------------------------------
# Helper: migrate from external storage (S3/MinIO) back into PostgreSQL large objects


def _migrate_files_to_postgres() -> None:
    """Move any files whose content lives in external S3-compatible storage back into PostgreSQL.

    The logic mirrors *inverse* of `_migrate_files_to_external_storage` used on upgrade.
    """

    # Obtain DB session from Alembic context
    bind = op.get_bind()
    session = Session(bind=bind)

    # Fetch rows that have external storage pointers (bucket/object_key not NULL)
    result = session.execute(
        text(
            "SELECT file_id, bucket_name, object_key FROM file_record WHERE bucket_name IS NOT NULL AND object_key IS NOT NULL"
        )
    )

    files_to_migrate = [row[0] for row in result.fetchall()]
    total_files = len(files_to_migrate)

    if total_files == 0:
        print("No files found in external storage to migrate back to PostgreSQL.")
        return

    print(f"Found {total_files} files to migrate back to PostgreSQL large objects.")

    _set_tenant_contextvar(session)
    migrated_count = 0

    # only create external store if we have files to migrate. This line
    # makes it so we need to have S3/MinIO configured to run this migration.
    external_store = get_s3_file_store()

    for i, file_id in enumerate(files_to_migrate, 1):
        print(f"Migrating file {i}/{total_files}: {file_id}")

        # Read file content from external storage (always binary)
        try:
            file_io = external_store.read_file(
                file_id=file_id, mode="b", use_tempfile=True
            )
            file_io.seek(0)

            # Import lazily to avoid circular deps at Alembic runtime
            from onyx.db._deprecated.pg_file_store import (
                create_populate_lobj,
            )  # noqa: E402

            # Create new Postgres large object and populate it
            lobj_oid = create_populate_lobj(content=file_io, db_session=session)

            # Update DB row: set lobj_oid, clear bucket/object_key
            session.execute(
                text(
                    "UPDATE file_record SET lobj_oid = :lobj_oid, bucket_name = NULL, object_key = NULL WHERE file_id = :file_id"
                ),
                {"lobj_oid": lobj_oid, "file_id": file_id},
            )
        except ClientError as e:
            if "NoSuchKey" in str(e):
                print(
                    f"File {file_id} not found in external storage. Deleting from database."
                )
                session.execute(
                    text("DELETE FROM file_record WHERE file_id = :file_id"),
                    {"file_id": file_id},
                )
            else:
                raise

        migrated_count += 1
        print(f"✓ Successfully migrated file {i}/{total_files}: {file_id}")

    # Flush the SQLAlchemy session so statements are sent to the DB, but **do not**
    # commit the transaction.  The surrounding Alembic migration will commit once
    # the *entire* downgrade succeeds.  This keeps the whole downgrade atomic and
    # avoids leaving the database in a partially-migrated state if a later schema
    # operation fails.
    session.flush()

    print(
        f"Migration back to PostgreSQL completed: {migrated_count} files staged for commit."
    )


def _migrate_files_to_external_storage() -> None:
    """Migrate files from PostgreSQL large objects to external storage"""
    # Get database session
    bind = op.get_bind()
    session = Session(bind=bind)
    external_store = get_s3_file_store()

    # Find all files currently stored in PostgreSQL (lobj_oid is not null)
    result = session.execute(
        text(
            "SELECT file_id FROM file_record WHERE lobj_oid IS NOT NULL AND bucket_name IS NULL AND object_key IS NULL"
        )
    )

    files_to_migrate = [row[0] for row in result.fetchall()]
    total_files = len(files_to_migrate)

    if total_files == 0:
        print("No files found in PostgreSQL storage to migrate.")
        return

    # might need to move this above the if statement when creating a new multi-tenant
    # system. VERY extreme edge case.
    external_store.initialize()
    print(f"Found {total_files} files to migrate from PostgreSQL to external storage.")

    _set_tenant_contextvar(session)
    migrated_count = 0

    for i, file_id in enumerate(files_to_migrate, 1):
        print(f"Migrating file {i}/{total_files}: {file_id}")

        # Read file record to get metadata
        file_record = session.execute(
            text("SELECT * FROM file_record WHERE file_id = :file_id"),
            {"file_id": file_id},
        ).fetchone()

        if file_record is None:
            print(f"File {file_id} not found in PostgreSQL storage.")
            continue

        lobj_id = cast(int, file_record.lobj_oid)
        file_metadata = cast(Any, file_record.file_metadata)

        # Read file content from PostgreSQL
        try:
            file_content = read_lobj(
                lobj_id, db_session=session, mode="b", use_tempfile=True
            )
        except Exception as e:
            if "large object" in str(e) and "does not exist" in str(e):
                print(f"File {file_id} not found in PostgreSQL storage.")
                continue
            else:
                raise

        # Handle file_metadata type conversion
        file_metadata = None
        if file_metadata is not None:
            if isinstance(file_metadata, dict):
                file_metadata = file_metadata
            else:
                # Convert other types to dict if possible, otherwise None
                try:
                    file_metadata = dict(file_record.file_metadata)
                except (TypeError, ValueError):
                    file_metadata = None

        # Save to external storage (this will handle the database record update and cleanup)
        # NOTE: this WILL .commit() the transaction.
        external_store.save_file(
            file_id=file_id,
            content=file_content,
            display_name=file_record.display_name,
            file_origin=file_record.file_origin,
            file_type=file_record.file_type,
            file_metadata=file_metadata,
        )
        delete_lobj_by_id(lobj_id, db_session=session)

        migrated_count += 1
        print(f"✓ Successfully migrated file {i}/{total_files}: {file_id}")

    # See note above – flush but do **not** commit so the outer Alembic transaction
    # controls atomicity.
    session.flush()

    print(
        f"Migration completed: {migrated_count} files staged for commit to external storage."
    )


def _set_tenant_contextvar(session: Session) -> None:
    """Set the tenant contextvar to the default schema"""
    current_tenant = session.execute(text("SELECT current_schema()")).scalar()
    print(f"Migrating files for tenant: {current_tenant}")
    CURRENT_TENANT_ID_CONTEXTVAR.set(current_tenant)


================================================
FILE: backend/alembic/versions/ca04500b9ee8_add_cascade_deletes_to_agent_tables.py
================================================
"""add_cascade_deletes_to_agent_tables

Revision ID: ca04500b9ee8
Revises: 238b84885828
Create Date: 2025-05-30 16:03:51.112263

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "ca04500b9ee8"
down_revision = "238b84885828"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Drop existing foreign key constraints
    op.drop_constraint(
        "agent__sub_question_primary_question_id_fkey",
        "agent__sub_question",
        type_="foreignkey",
    )
    op.drop_constraint(
        "agent__sub_query_parent_question_id_fkey",
        "agent__sub_query",
        type_="foreignkey",
    )
    op.drop_constraint(
        "chat_message__standard_answer_chat_message_id_fkey",
        "chat_message__standard_answer",
        type_="foreignkey",
    )
    op.drop_constraint(
        "agent__sub_query__search_doc_sub_query_id_fkey",
        "agent__sub_query__search_doc",
        type_="foreignkey",
    )

    # Recreate foreign key constraints with CASCADE delete
    op.create_foreign_key(
        "agent__sub_question_primary_question_id_fkey",
        "agent__sub_question",
        "chat_message",
        ["primary_question_id"],
        ["id"],
        ondelete="CASCADE",
    )
    op.create_foreign_key(
        "agent__sub_query_parent_question_id_fkey",
        "agent__sub_query",
        "agent__sub_question",
        ["parent_question_id"],
        ["id"],
        ondelete="CASCADE",
    )
    op.create_foreign_key(
        "chat_message__standard_answer_chat_message_id_fkey",
        "chat_message__standard_answer",
        "chat_message",
        ["chat_message_id"],
        ["id"],
        ondelete="CASCADE",
    )
    op.create_foreign_key(
        "agent__sub_query__search_doc_sub_query_id_fkey",
        "agent__sub_query__search_doc",
        "agent__sub_query",
        ["sub_query_id"],
        ["id"],
        ondelete="CASCADE",
    )


def downgrade() -> None:
    # Drop CASCADE foreign key constraints
    op.drop_constraint(
        "agent__sub_question_primary_question_id_fkey",
        "agent__sub_question",
        type_="foreignkey",
    )
    op.drop_constraint(
        "agent__sub_query_parent_question_id_fkey",
        "agent__sub_query",
        type_="foreignkey",
    )
    op.drop_constraint(
        "chat_message__standard_answer_chat_message_id_fkey",
        "chat_message__standard_answer",
        type_="foreignkey",
    )
    op.drop_constraint(
        "agent__sub_query__search_doc_sub_query_id_fkey",
        "agent__sub_query__search_doc",
        type_="foreignkey",
    )

    # Recreate foreign key constraints without CASCADE delete
    op.create_foreign_key(
        "agent__sub_question_primary_question_id_fkey",
        "agent__sub_question",
        "chat_message",
        ["primary_question_id"],
        ["id"],
    )
    op.create_foreign_key(
        "agent__sub_query_parent_question_id_fkey",
        "agent__sub_query",
        "agent__sub_question",
        ["parent_question_id"],
        ["id"],
    )
    op.create_foreign_key(
        "chat_message__standard_answer_chat_message_id_fkey",
        "chat_message__standard_answer",
        "chat_message",
        ["chat_message_id"],
        ["id"],
    )
    op.create_foreign_key(
        "agent__sub_query__search_doc_sub_query_id_fkey",
        "agent__sub_query__search_doc",
        "agent__sub_query",
        ["sub_query_id"],
        ["id"],
    )


================================================
FILE: backend/alembic/versions/cbc03e08d0f3_add_opensearch_migration_tables.py
================================================
"""add_opensearch_migration_tables

Revision ID: cbc03e08d0f3
Revises: be87a654d5af
Create Date: 2026-01-31 17:00:45.176604

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "cbc03e08d0f3"
down_revision = "be87a654d5af"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # 1. Create opensearch_document_migration_record table.
    op.create_table(
        "opensearch_document_migration_record",
        sa.Column("document_id", sa.String(), nullable=False),
        sa.Column("status", sa.String(), nullable=False, server_default="pending"),
        sa.Column("error_message", sa.Text(), nullable=True),
        sa.Column("attempts_count", sa.Integer(), nullable=False, server_default="0"),
        sa.Column("last_attempt_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("document_id"),
        sa.ForeignKeyConstraint(
            ["document_id"],
            ["document.id"],
            ondelete="CASCADE",
        ),
    )
    # 2. Create indices.
    op.create_index(
        "ix_opensearch_document_migration_record_status",
        "opensearch_document_migration_record",
        ["status"],
    )
    op.create_index(
        "ix_opensearch_document_migration_record_attempts_count",
        "opensearch_document_migration_record",
        ["attempts_count"],
    )
    op.create_index(
        "ix_opensearch_document_migration_record_created_at",
        "opensearch_document_migration_record",
        ["created_at"],
    )

    # 3. Create opensearch_tenant_migration_record table (singleton).
    op.create_table(
        "opensearch_tenant_migration_record",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "document_migration_record_table_population_status",
            sa.String(),
            nullable=False,
            server_default="pending",
        ),
        sa.Column(
            "num_times_observed_no_additional_docs_to_populate_migration_table",
            sa.Integer(),
            nullable=False,
            server_default="0",
        ),
        sa.Column(
            "overall_document_migration_status",
            sa.String(),
            nullable=False,
            server_default="pending",
        ),
        sa.Column(
            "num_times_observed_no_additional_docs_to_migrate",
            sa.Integer(),
            nullable=False,
            server_default="0",
        ),
        sa.Column(
            "last_updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.func.now(),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
    )

    # 4. Create unique index on constant to enforce singleton pattern.
    op.execute(
        sa.text(
            """
            CREATE UNIQUE INDEX idx_opensearch_tenant_migration_singleton
            ON opensearch_tenant_migration_record ((true))
            """
        )
    )


def downgrade() -> None:
    # Drop opensearch_tenant_migration_record.
    op.drop_index(
        "idx_opensearch_tenant_migration_singleton",
        table_name="opensearch_tenant_migration_record",
    )
    op.drop_table("opensearch_tenant_migration_record")

    # Drop opensearch_document_migration_record.
    op.drop_index(
        "ix_opensearch_document_migration_record_created_at",
        table_name="opensearch_document_migration_record",
    )
    op.drop_index(
        "ix_opensearch_document_migration_record_attempts_count",
        table_name="opensearch_document_migration_record",
    )
    op.drop_index(
        "ix_opensearch_document_migration_record_status",
        table_name="opensearch_document_migration_record",
    )
    op.drop_table("opensearch_document_migration_record")


================================================
FILE: backend/alembic/versions/cec7ec36c505_kgentity_parent.py
================================================
"""kgentity_parent

Revision ID: cec7ec36c505
Revises: 495cb26ce93e
Create Date: 2025-06-07 20:07:46.400770

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "cec7ec36c505"
down_revision = "495cb26ce93e"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "kg_entity",
        sa.Column("parent_key", sa.String(), nullable=True, index=True),
    )
    # NOTE: you will have to reindex the KG after this migration as the parent_key will be null


def downgrade() -> None:
    op.drop_column("kg_entity", "parent_key")


================================================
FILE: backend/alembic/versions/cf90764725d8_larger_refresh_tokens.py
================================================
"""larger refresh tokens

Revision ID: cf90764725d8
Revises: 4794bc13e484
Create Date: 2025-04-04 10:56:39.769294

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "cf90764725d8"
down_revision = "4794bc13e484"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.alter_column("oauth_account", "refresh_token", type_=sa.Text())


def downgrade() -> None:
    op.alter_column("oauth_account", "refresh_token", type_=sa.String(length=1024))


================================================
FILE: backend/alembic/versions/d09fc20a3c66_seed_builtin_tools.py
================================================
"""seed_builtin_tools

Revision ID: d09fc20a3c66
Revises: b7ec9b5b505f
Create Date: 2025-09-09 19:32:16.824373

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "d09fc20a3c66"
down_revision = "b7ec9b5b505f"
branch_labels = None
depends_on = None


# Tool definitions - core tools that should always be seeded
# Names/in_code_tool_id are the same as the class names in the tool_implementations package
BUILT_IN_TOOLS = [
    {
        "name": "SearchTool",
        "display_name": "Internal Search",
        "description": "The Search Action allows the Assistant to search through connected knowledge to help build an answer.",
        "in_code_tool_id": "SearchTool",
    },
    {
        "name": "ImageGenerationTool",
        "display_name": "Image Generation",
        "description": (
            "The Image Generation Action allows the assistant to use DALL-E 3 or GPT-IMAGE-1 to generate images. "
            "The action will be used when the user asks the assistant to generate an image."
        ),
        "in_code_tool_id": "ImageGenerationTool",
    },
    {
        "name": "WebSearchTool",
        "display_name": "Web Search",
        "description": (
            "The Web Search Action allows the assistant to perform internet searches for up-to-date information."
        ),
        "in_code_tool_id": "WebSearchTool",
    },
    {
        "name": "KnowledgeGraphTool",
        "display_name": "Knowledge Graph Search",
        "description": (
            "The Knowledge Graph Search Action allows the assistant to search the "
            "Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Assistant, "
            "and it requires the Knowledge Graph to be enabled."
        ),
        "in_code_tool_id": "KnowledgeGraphTool",
    },
    {
        "name": "OktaProfileTool",
        "display_name": "Okta Profile",
        "description": (
            "The Okta Profile Action allows the assistant to fetch the current user's information from Okta. "
            "This may include the user's name, email, phone number, address, and other details such as their "
            "manager and direct reports."
        ),
        "in_code_tool_id": "OktaProfileTool",
    },
]


def upgrade() -> None:
    conn = op.get_bind()

    # Get existing tools to check what already exists
    existing_tools = conn.execute(
        sa.text("SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL")
    ).fetchall()
    existing_tool_ids = {row[0] for row in existing_tools}

    # Insert or update built-in tools
    for tool in BUILT_IN_TOOLS:
        in_code_id = tool["in_code_tool_id"]

        # Handle historical rename: InternetSearchTool -> WebSearchTool
        if (
            in_code_id == "WebSearchTool"
            and "WebSearchTool" not in existing_tool_ids
            and "InternetSearchTool" in existing_tool_ids
        ):
            # Rename the existing InternetSearchTool row in place and update fields
            conn.execute(
                sa.text(
                    """
                    UPDATE tool
                    SET name = :name,
                        display_name = :display_name,
                        description = :description,
                        in_code_tool_id = :in_code_tool_id
                    WHERE in_code_tool_id = 'InternetSearchTool'
                    """
                ),
                tool,
            )
            # Keep the local view of existing ids in sync to avoid duplicate insert
            existing_tool_ids.discard("InternetSearchTool")
            existing_tool_ids.add("WebSearchTool")
            continue

        if in_code_id in existing_tool_ids:
            # Update existing tool
            conn.execute(
                sa.text(
                    """
                    UPDATE tool
                    SET name = :name,
                        display_name = :display_name,
                        description = :description
                    WHERE in_code_tool_id = :in_code_tool_id
                    """
                ),
                tool,
            )
        else:
            # Insert new tool
            conn.execute(
                sa.text(
                    """
                    INSERT INTO tool (name, display_name, description, in_code_tool_id)
                    VALUES (:name, :display_name, :description, :in_code_tool_id)
                    """
                ),
                tool,
            )


def downgrade() -> None:
    # We don't remove the tools on downgrade since it's totally fine to just
    # have them around. If we upgrade again, it will be a no-op.
    pass


================================================
FILE: backend/alembic/versions/d1b637d7050a_sync_exa_api_key_to_content_provider.py
================================================
"""sync_exa_api_key_to_content_provider

Revision ID: d1b637d7050a
Revises: d25168c2beee
Create Date: 2026-01-09 15:54:15.646249

"""

from alembic import op
from sqlalchemy import text


# revision identifiers, used by Alembic.
revision = "d1b637d7050a"
down_revision = "d25168c2beee"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Exa uses a shared API key between search and content providers.
    # For existing Exa search providers with API keys, create the corresponding
    # content provider if it doesn't exist yet.
    connection = op.get_bind()

    # Check if Exa search provider exists with an API key
    result = connection.execute(
        text(
            """
            SELECT api_key FROM internet_search_provider
            WHERE provider_type = 'exa' AND api_key IS NOT NULL
            LIMIT 1
            """
        )
    )
    row = result.fetchone()

    if row:
        api_key = row[0]
        # Create Exa content provider with the shared key
        connection.execute(
            text(
                """
                INSERT INTO internet_content_provider
                (name, provider_type, api_key, is_active)
                VALUES ('Exa', 'exa', :api_key, false)
                ON CONFLICT (name) DO NOTHING
                """
            ),
            {"api_key": api_key},
        )


def downgrade() -> None:
    # Remove the Exa content provider that was created by this migration
    connection = op.get_bind()
    connection.execute(
        text(
            """
            DELETE FROM internet_content_provider
            WHERE provider_type = 'exa'
            """
        )
    )


================================================
FILE: backend/alembic/versions/d25168c2beee_tool_name_consistency.py
================================================
"""tool_name_consistency

Revision ID: d25168c2beee
Revises: 8405ca81cc83
Create Date: 2026-01-11 17:54:40.135777

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "d25168c2beee"
down_revision = "8405ca81cc83"
branch_labels = None
depends_on = None


# Currently the seeded tools have the in_code_tool_id == name
CURRENT_TOOL_NAME_MAPPING = [
    "SearchTool",
    "WebSearchTool",
    "ImageGenerationTool",
    "PythonTool",
    "OpenURLTool",
    "KnowledgeGraphTool",
    "ResearchAgent",
]

# Mapping of in_code_tool_id -> name
# These are the expected names that we want in the database
EXPECTED_TOOL_NAME_MAPPING = {
    "SearchTool": "internal_search",
    "WebSearchTool": "web_search",
    "ImageGenerationTool": "generate_image",
    "PythonTool": "python",
    "OpenURLTool": "open_url",
    "KnowledgeGraphTool": "run_kg_search",
    "ResearchAgent": "research_agent",
}


def upgrade() -> None:
    conn = op.get_bind()

    # Mapping of in_code_tool_id to the NAME constant from each tool class
    # These match the .name property of each tool implementation
    tool_name_mapping = EXPECTED_TOOL_NAME_MAPPING

    # Update the name column for each tool based on its in_code_tool_id
    for in_code_tool_id, expected_name in tool_name_mapping.items():
        conn.execute(
            sa.text(
                """
                UPDATE tool
                SET name = :expected_name
                WHERE in_code_tool_id = :in_code_tool_id
                """
            ),
            {
                "expected_name": expected_name,
                "in_code_tool_id": in_code_tool_id,
            },
        )


def downgrade() -> None:
    conn = op.get_bind()

    # Reverse the migration by setting name back to in_code_tool_id
    # This matches the original pattern where name was the class name
    for in_code_tool_id in CURRENT_TOOL_NAME_MAPPING:
        conn.execute(
            sa.text(
                """
                UPDATE tool
                SET name = :current_name
                WHERE in_code_tool_id = :in_code_tool_id
                """
            ),
            {
                "current_name": in_code_tool_id,
                "in_code_tool_id": in_code_tool_id,
            },
        )


================================================
FILE: backend/alembic/versions/d3fd499c829c_add_file_reader_tool.py
================================================
"""add_file_reader_tool

Revision ID: d3fd499c829c
Revises: 114a638452db
Create Date: 2026-02-07 19:28:22.452337

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "d3fd499c829c"
down_revision = "114a638452db"
branch_labels = None
depends_on = None

FILE_READER_TOOL = {
    "name": "read_file",
    "display_name": "File Reader",
    "description": (
        "Read sections of user-uploaded files by character offset. "
        "Useful for inspecting large files that cannot fit entirely in context."
    ),
    "in_code_tool_id": "FileReaderTool",
    "enabled": True,
}


def upgrade() -> None:
    conn = op.get_bind()

    # Check if tool already exists
    existing = conn.execute(
        sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
        {"in_code_tool_id": FILE_READER_TOOL["in_code_tool_id"]},
    ).fetchone()

    if existing:
        # Update existing tool
        conn.execute(
            sa.text(
                """
                UPDATE tool
                SET name = :name,
                    display_name = :display_name,
                    description = :description
                WHERE in_code_tool_id = :in_code_tool_id
                """
            ),
            FILE_READER_TOOL,
        )
        tool_id = existing[0]
    else:
        # Insert new tool
        result = conn.execute(
            sa.text(
                """
                INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
                VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
                RETURNING id
                """
            ),
            FILE_READER_TOOL,
        )
        tool_id = result.scalar_one()

    # Attach to the default persona (id=0) if not already attached
    conn.execute(
        sa.text(
            """
            INSERT INTO persona__tool (persona_id, tool_id)
            VALUES (0, :tool_id)
            ON CONFLICT DO NOTHING
            """
        ),
        {"tool_id": tool_id},
    )


def downgrade() -> None:
    conn = op.get_bind()
    in_code_tool_id = FILE_READER_TOOL["in_code_tool_id"]

    # Remove persona associations first (FK constraint)
    conn.execute(
        sa.text(
            """
            DELETE FROM persona__tool
            WHERE tool_id IN (
                SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id
            )
            """
        ),
        {"in_code_tool_id": in_code_tool_id},
    )

    conn.execute(
        sa.text("DELETE FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
        {"in_code_tool_id": in_code_tool_id},
    )


================================================
FILE: backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py
================================================
"""Remove deletion_attempt table

Revision ID: d5645c915d0e
Revises: 8e26726b7683
Create Date: 2023-09-14 15:04:14.444909

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "d5645c915d0e"
down_revision = "8e26726b7683"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.drop_table("deletion_attempt")

    # Remove the DeletionStatus enum
    op.execute("DROP TYPE IF EXISTS deletionstatus;")


def downgrade() -> None:
    op.create_table(
        "deletion_attempt",
        sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False),
        sa.Column("connector_id", sa.INTEGER(), autoincrement=False, nullable=False),
        sa.Column("credential_id", sa.INTEGER(), autoincrement=False, nullable=False),
        sa.Column(
            "status",
            postgresql.ENUM(
                "NOT_STARTED",
                "IN_PROGRESS",
                "SUCCESS",
                "FAILED",
                name="deletionstatus",
            ),
            autoincrement=False,
            nullable=False,
        ),
        sa.Column(
            "num_docs_deleted",
            sa.INTEGER(),
            autoincrement=False,
            nullable=False,
        ),
        sa.Column("error_msg", sa.VARCHAR(), autoincrement=False, nullable=True),
        sa.Column(
            "time_created",
            postgresql.TIMESTAMP(timezone=True),
            server_default=sa.text("now()"),
            autoincrement=False,
            nullable=False,
        ),
        sa.Column(
            "time_updated",
            postgresql.TIMESTAMP(timezone=True),
            server_default=sa.text("now()"),
            autoincrement=False,
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["connector_id"],
            ["connector.id"],
            name="deletion_attempt_connector_id_fkey",
        ),
        sa.ForeignKeyConstraint(
            ["credential_id"],
            ["credential.id"],
            name="deletion_attempt_credential_id_fkey",
        ),
        sa.PrimaryKeyConstraint("id", name="deletion_attempt_pkey"),
    )


================================================
FILE: backend/alembic/versions/d56ffa94ca32_add_file_content.py
================================================
"""add_file_content

Revision ID: d56ffa94ca32
Revises: 01f8e6d95a33
Create Date: 2026-02-06 15:29:34.192960

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "d56ffa94ca32"
down_revision = "01f8e6d95a33"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "file_content",
        sa.Column(
            "file_id",
            sa.String(),
            sa.ForeignKey("file_record.file_id", ondelete="CASCADE"),
            primary_key=True,
        ),
        sa.Column("lobj_oid", sa.BigInteger(), nullable=False),
        sa.Column("file_size", sa.BigInteger(), nullable=False, server_default="0"),
    )


def downgrade() -> None:
    op.drop_table("file_content")


================================================
FILE: backend/alembic/versions/d5c86e2c6dc6_add_cascade_delete_to_search_query_user_.py
================================================
"""add_cascade_delete_to_search_query_user_id

Revision ID: d5c86e2c6dc6
Revises: 90b409d06e50
Create Date: 2026-02-04 16:05:04.749804

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "d5c86e2c6dc6"
down_revision = "90b409d06e50"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.drop_constraint("search_query_user_id_fkey", "search_query", type_="foreignkey")
    op.create_foreign_key(
        "search_query_user_id_fkey",
        "search_query",
        "user",
        ["user_id"],
        ["id"],
        ondelete="CASCADE",
    )


def downgrade() -> None:
    op.drop_constraint("search_query_user_id_fkey", "search_query", type_="foreignkey")
    op.create_foreign_key(
        "search_query_user_id_fkey", "search_query", "user", ["user_id"], ["id"]
    )


================================================
FILE: backend/alembic/versions/d61e513bef0a_add_total_docs_for_index_attempt.py
================================================
"""Add Total Docs for Index Attempt

Revision ID: d61e513bef0a
Revises: 46625e4745d4
Create Date: 2023-10-27 23:02:43.369964

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "d61e513bef0a"
down_revision = "46625e4745d4"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "index_attempt",
        sa.Column("new_docs_indexed", sa.Integer(), nullable=True),
    )
    op.alter_column(
        "index_attempt", "num_docs_indexed", new_column_name="total_docs_indexed"
    )


def downgrade() -> None:
    op.alter_column(
        "index_attempt", "total_docs_indexed", new_column_name="num_docs_indexed"
    )
    op.drop_column("index_attempt", "new_docs_indexed")


================================================
FILE: backend/alembic/versions/d7111c1238cd_remove_document_ids.py
================================================
"""Remove Document IDs

Revision ID: d7111c1238cd
Revises: 465f78d9b7f9
Create Date: 2023-07-29 15:06:25.126169

"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "d7111c1238cd"
down_revision = "465f78d9b7f9"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.drop_column("index_attempt", "document_ids")


def downgrade() -> None:
    op.add_column(
        "index_attempt",
        sa.Column(
            "document_ids",
            postgresql.ARRAY(sa.VARCHAR()),
            autoincrement=False,
            nullable=True,
        ),
    )


================================================
FILE: backend/alembic/versions/d716b0791ddd_combined_slack_id_fields.py
================================================
"""combined slack id fields

Revision ID: d716b0791ddd
Revises: 7aea705850d5
Create Date: 2024-07-10 17:57:45.630550

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "d716b0791ddd"
down_revision = "7aea705850d5"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.execute(
        """
    UPDATE slack_bot_config
    SET channel_config = jsonb_set(
        channel_config,
        '{respond_member_group_list}',
        coalesce(channel_config->'respond_team_member_list', '[]'::jsonb) ||
        coalesce(channel_config->'respond_slack_group_list', '[]'::jsonb)
    ) - 'respond_team_member_list' - 'respond_slack_group_list'
    """
    )


def downgrade() -> None:
    op.execute(
        """
    UPDATE slack_bot_config
    SET channel_config = jsonb_set(
        jsonb_set(
            channel_config - 'respond_member_group_list',
            '{respond_team_member_list}',
            '[]'::jsonb
        ),
        '{respond_slack_group_list}',
        '[]'::jsonb
    )
    """
    )


================================================
FILE: backend/alembic/versions/d8cdfee5df80_add_skipped_to_userfilestatus.py
================================================
"""add skipped to userfilestatus

Revision ID: d8cdfee5df80
Revises: 1d78c0ca7853
Create Date: 2026-04-01 10:47:12.593950

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "d8cdfee5df80"
down_revision = "1d78c0ca7853"
branch_labels = None
depends_on = None


TABLE = "user_file"
COLUMN = "status"
CONSTRAINT_NAME = "ck_user_file_status"

OLD_VALUES = ("PROCESSING", "INDEXING", "COMPLETED", "FAILED", "CANCELED", "DELETING")
NEW_VALUES = (
    "PROCESSING",
    "INDEXING",
    "COMPLETED",
    "SKIPPED",
    "FAILED",
    "CANCELED",
    "DELETING",
)


def _drop_status_check_constraint() -> None:
    inspector = sa.inspect(op.get_bind())
    for constraint in inspector.get_check_constraints(TABLE):
        if COLUMN in constraint.get("sqltext", ""):
            constraint_name = constraint["name"]
            if constraint_name is not None:
                op.drop_constraint(constraint_name, TABLE, type_="check")


def upgrade() -> None:
    _drop_status_check_constraint()
    in_clause = ", ".join(f"'{v}'" for v in NEW_VALUES)
    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f"{COLUMN} IN ({in_clause})")


def downgrade() -> None:
    op.execute(f"UPDATE {TABLE} SET {COLUMN} = 'COMPLETED' WHERE {COLUMN} = 'SKIPPED'")
    _drop_status_check_constraint()
    in_clause = ", ".join(f"'{v}'" for v in OLD_VALUES)
    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f"{COLUMN} IN ({in_clause})")


================================================
FILE: backend/alembic/versions/d929f0c1c6af_feedback_feature.py
================================================
"""Feedback Feature

Revision ID: d929f0c1c6af
Revises: 8aabb57f3b49
Create Date: 2023-08-27 13:03:54.274987

"""

import fastapi_users_db_sqlalchemy
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "d929f0c1c6af"
down_revision = "8aabb57f3b49"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "query_event",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("query", sa.String(), nullable=False),
        sa.Column(
            "selected_search_flow",
            sa.Enum("KEYWORD", "SEMANTIC", name="searchtype", native_enum=False),
            nullable=True,
        ),
        sa.Column("llm_answer", sa.String(), nullable=True),
        sa.Column(
            "feedback",
            sa.Enum("LIKE", "DISLIKE", name="qafeedbacktype", native_enum=False),
            nullable=True,
        ),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "document_retrieval_feedback",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("qa_event_id", sa.Integer(), nullable=False),
        sa.Column("document_id", sa.String(), nullable=False),
        sa.Column("document_rank", sa.Integer(), nullable=False),
        sa.Column("clicked", sa.Boolean(), nullable=False),
        sa.Column(
            "feedback",
            sa.Enum(
                "ENDORSE",
                "REJECT",
                "HIDE",
                "UNHIDE",
                name="searchfeedbacktype",
                native_enum=False,
            ),
            nullable=True,
        ),
        sa.ForeignKeyConstraint(
            ["document_id"],
            ["document.id"],
        ),
        sa.ForeignKeyConstraint(
            ["qa_event_id"],
            ["query_event.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.add_column("document", sa.Column("boost", sa.Integer(), nullable=False))
    op.add_column("document", sa.Column("hidden", sa.Boolean(), nullable=False))
    op.add_column("document", sa.Column("semantic_id", sa.String(), nullable=False))
    op.add_column("document", sa.Column("link", sa.String(), nullable=True))


def downgrade() -> None:
    op.drop_column("document", "link")
    op.drop_column("document", "semantic_id")
    op.drop_column("document", "hidden")
    op.drop_column("document", "boost")
    op.drop_table("document_retrieval_feedback")
    op.drop_table("query_event")


================================================
FILE: backend/alembic/versions/d961aca62eb3_update_status_length.py
================================================
"""Update status length

Revision ID: d961aca62eb3
Revises: cf90764725d8
Create Date: 2025-03-23 16:10:05.683965

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "d961aca62eb3"
down_revision = "cf90764725d8"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Drop the existing enum type constraint
    op.execute("ALTER TABLE connector_credential_pair ALTER COLUMN status TYPE varchar")

    # Create new enum type with all values
    op.execute(
        "ALTER TABLE connector_credential_pair ALTER COLUMN status TYPE VARCHAR(20) USING status::varchar(20)"
    )

    # Update the enum type to include all possible values
    op.alter_column(
        "connector_credential_pair",
        "status",
        type_=sa.Enum(
            "SCHEDULED",
            "INITIAL_INDEXING",
            "ACTIVE",
            "PAUSED",
            "DELETING",
            "INVALID",
            name="connectorcredentialpairstatus",
            native_enum=False,
        ),
        existing_type=sa.String(20),
        nullable=False,
    )

    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "in_repeated_error_state", sa.Boolean, default=False, server_default="false"
        ),
    )


def downgrade() -> None:
    # no need to convert back to the old enum type, since we're not using it anymore
    op.drop_column("connector_credential_pair", "in_repeated_error_state")


================================================
FILE: backend/alembic/versions/d9ec13955951_remove__dim_suffix_from_model_name.py
================================================
"""Remove _alt suffix from model_name

Revision ID: d9ec13955951
Revises: da4c21c69164
Create Date: 2024-08-20 16:31:32.955686

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "d9ec13955951"
down_revision = "da4c21c69164"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.execute(
        """
        UPDATE embedding_model
        SET model_name = regexp_replace(model_name, '__danswer_alt_index$', '')
        WHERE model_name LIKE '%__danswer_alt_index'
    """
    )


def downgrade() -> None:
    # We can't reliably add the __danswer_alt_index suffix back, so we'll leave this empty
    pass


================================================
FILE: backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py
================================================
"""migrate jira connectors to new format

Revision ID: da42808081e3
Revises: f13db29f3101
Create Date: 2025-02-24 11:24:54.396040

"""

from alembic import op
import sqlalchemy as sa
import json

from onyx.configs.constants import DocumentSource
from onyx.connectors.jira.utils import extract_jira_project


# revision identifiers, used by Alembic.
revision = "da42808081e3"
down_revision = "f13db29f3101"
branch_labels = None
depends_on = None


PRESERVED_CONFIG_KEYS = ["comment_email_blacklist", "batch_size", "labels_to_skip"]


def upgrade() -> None:
    # Get all Jira connectors
    conn = op.get_bind()

    # First get all Jira connectors
    jira_connectors = conn.execute(
        sa.text(
            """
            SELECT id, connector_specific_config
            FROM connector
            WHERE source = :source
            """
        ),
        {"source": DocumentSource.JIRA.value.upper()},
    ).fetchall()

    # Update each connector's config
    for connector_id, old_config in jira_connectors:
        if not old_config:
            continue

        # Extract project key from URL if it exists
        new_config: dict[str, str | None] = {}
        if project_url := old_config.get("jira_project_url"):
            # Parse the URL to get base and project
            try:
                jira_base, project_key = extract_jira_project(project_url)
                new_config = {"jira_base_url": jira_base, "project_key": project_key}
            except ValueError:
                # If URL parsing fails, just use the URL as the base
                new_config = {
                    "jira_base_url": project_url.split("/projects/")[0],
                    "project_key": None,
                }
        else:
            # For connectors without a project URL, we need admin intervention
            # Mark these for review
            print(
                f"WARNING: Jira connector {connector_id} has no project URL configured"
            )
            continue
        for old_key in PRESERVED_CONFIG_KEYS:
            if old_key in old_config:
                new_config[old_key] = old_config[old_key]

        # Update the connector config
        conn.execute(
            sa.text(
                """
                UPDATE connector
                SET connector_specific_config = :new_config
                WHERE id = :id
                """
            ),
            {"id": connector_id, "new_config": json.dumps(new_config)},
        )


def downgrade() -> None:
    # Get all Jira connectors
    conn = op.get_bind()

    # First get all Jira connectors
    jira_connectors = conn.execute(
        sa.text(
            """
            SELECT id, connector_specific_config
            FROM connector
            WHERE source = :source
            """
        ),
        {"source": DocumentSource.JIRA.value.upper()},
    ).fetchall()

    # Update each connector's config back to the old format
    for connector_id, new_config in jira_connectors:
        if not new_config:
            continue

        old_config = {}
        base_url = new_config.get("jira_base_url")
        project_key = new_config.get("project_key")

        if base_url and project_key:
            old_config = {"jira_project_url": f"{base_url}/projects/{project_key}"}
        elif base_url:
            old_config = {"jira_project_url": base_url}
        else:
            continue

        for old_key in PRESERVED_CONFIG_KEYS:
            if old_key in new_config:
                old_config[old_key] = new_config[old_key]

        # Update the connector config
        conn.execute(
            sa.text(
                """
                UPDATE connector
                SET connector_specific_config = :old_config
                WHERE id = :id
                """
            ),
            {"id": connector_id, "old_config": json.dumps(old_config)},
        )


================================================
FILE: backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py
================================================
"""chosen_assistants changed to jsonb

Revision ID: da4c21c69164
Revises: c5b692fa265c
Create Date: 2024-08-18 19:06:47.291491

"""

import json
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "da4c21c69164"
down_revision = "c5b692fa265c"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    conn = op.get_bind()
    existing_ids_and_chosen_assistants = conn.execute(
        sa.text('select id, chosen_assistants from "user"')
    )
    op.drop_column(
        "user",
        "chosen_assistants",
    )
    op.add_column(
        "user",
        sa.Column(
            "chosen_assistants",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )
    for id, chosen_assistants in existing_ids_and_chosen_assistants:
        conn.execute(
            sa.text(
                'update "user" set chosen_assistants = :chosen_assistants where id = :id'
            ),
            {"chosen_assistants": json.dumps(chosen_assistants), "id": id},
        )


def downgrade() -> None:
    conn = op.get_bind()
    existing_ids_and_chosen_assistants = conn.execute(
        sa.text('select id, chosen_assistants from "user"')
    )
    op.drop_column(
        "user",
        "chosen_assistants",
    )
    op.add_column(
        "user",
        sa.Column("chosen_assistants", postgresql.ARRAY(sa.Integer()), nullable=True),
    )
    for id, chosen_assistants in existing_ids_and_chosen_assistants:
        conn.execute(
            sa.text(
                'update "user" set chosen_assistants = :chosen_assistants where id = :id'
            ),
            {"chosen_assistants": chosen_assistants, "id": id},
        )


================================================
FILE: backend/alembic/versions/dab04867cd88_add_composite_index_to_document_by_.py
================================================
"""Add composite index to document_by_connector_credential_pair

Revision ID: dab04867cd88
Revises: 54a74a0417fc
Create Date: 2024-12-13 22:43:20.119990

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "dab04867cd88"
down_revision = "54a74a0417fc"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Composite index on (connector_id, credential_id)
    op.create_index(
        "idx_document_cc_pair_connector_credential",
        "document_by_connector_credential_pair",
        ["connector_id", "credential_id"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(
        "idx_document_cc_pair_connector_credential",
        table_name="document_by_connector_credential_pair",
    )


================================================
FILE: backend/alembic/versions/dba7f71618f5_onyx_custom_tool_flow.py
================================================
"""Onyx Custom Tool Flow

Revision ID: dba7f71618f5
Revises: d5645c915d0e
Create Date: 2023-09-18 15:18:37.370972

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "dba7f71618f5"
down_revision = "d5645c915d0e"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "persona",
        sa.Column("retrieval_enabled", sa.Boolean(), nullable=True),
    )
    op.execute("UPDATE persona SET retrieval_enabled = true")
    op.alter_column("persona", "retrieval_enabled", nullable=False)


def downgrade() -> None:
    op.drop_column("persona", "retrieval_enabled")


================================================
FILE: backend/alembic/versions/dbaa756c2ccf_embedding_models.py
================================================
"""Embedding Models

Revision ID: dbaa756c2ccf
Revises: 7f726bad5367
Create Date: 2024-01-25 17:12:31.813160

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy import table, column, String, Integer, Boolean

from onyx.configs.model_configs import ASYM_PASSAGE_PREFIX
from onyx.configs.model_configs import ASYM_QUERY_PREFIX
from onyx.configs.model_configs import DOC_EMBEDDING_DIM
from onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL
from onyx.configs.model_configs import NORMALIZE_EMBEDDINGS
from onyx.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
from onyx.db.enums import EmbeddingPrecision
from onyx.db.models import IndexModelStatus
from onyx.db.search_settings import user_has_overridden_embedding_model
from onyx.indexing.models import IndexingSetting
from onyx.natural_language_processing.search_nlp_models import clean_model_name

# revision identifiers, used by Alembic.
revision = "dbaa756c2ccf"
down_revision = "7f726bad5367"
branch_labels: None = None
depends_on: None = None


def _get_old_default_embedding_model() -> IndexingSetting:
    is_overridden = user_has_overridden_embedding_model()
    return IndexingSetting(
        model_name=(
            DOCUMENT_ENCODER_MODEL
            if is_overridden
            else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
        ),
        model_dim=(
            DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
        ),
        embedding_precision=(EmbeddingPrecision.FLOAT),
        normalize=(
            NORMALIZE_EMBEDDINGS
            if is_overridden
            else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
        ),
        query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""),
        passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""),
        index_name="danswer_chunk",
        multipass_indexing=False,
        enable_contextual_rag=False,
        api_url=None,
    )


def _get_new_default_embedding_model() -> IndexingSetting:
    return IndexingSetting(
        model_name=DOCUMENT_ENCODER_MODEL,
        model_dim=DOC_EMBEDDING_DIM,
        embedding_precision=(EmbeddingPrecision.BFLOAT16),
        normalize=NORMALIZE_EMBEDDINGS,
        query_prefix=ASYM_QUERY_PREFIX,
        passage_prefix=ASYM_PASSAGE_PREFIX,
        index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}",
        multipass_indexing=False,
        enable_contextual_rag=False,
        api_url=None,
    )


def upgrade() -> None:
    op.create_table(
        "embedding_model",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("model_name", sa.String(), nullable=False),
        sa.Column("model_dim", sa.Integer(), nullable=False),
        sa.Column("normalize", sa.Boolean(), nullable=False),
        sa.Column("query_prefix", sa.String(), nullable=False),
        sa.Column("passage_prefix", sa.String(), nullable=False),
        sa.Column("index_name", sa.String(), nullable=False),
        sa.Column(
            "status",
            sa.Enum(IndexModelStatus, native=False),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    # since all index attempts must be associated with an embedding model,
    # need to put something in here to avoid nulls. On server startup,
    # this value will be overriden
    EmbeddingModel = table(
        "embedding_model",
        column("id", Integer),
        column("model_name", String),
        column("model_dim", Integer),
        column("normalize", Boolean),
        column("query_prefix", String),
        column("passage_prefix", String),
        column("index_name", String),
        column(
            "status", sa.Enum(IndexModelStatus, name="indexmodelstatus", native=False)
        ),
    )
    # insert an embedding model row that corresponds to the embedding model
    # the user selected via env variables before this change. This is needed since
    # all index_attempts must be associated with an embedding model, so without this
    # we will run into violations of non-null contraints
    old_embedding_model = _get_old_default_embedding_model()
    op.bulk_insert(
        EmbeddingModel,
        [
            {
                "model_name": old_embedding_model.model_name,
                "model_dim": old_embedding_model.model_dim,
                "normalize": old_embedding_model.normalize,
                "query_prefix": old_embedding_model.query_prefix,
                "passage_prefix": old_embedding_model.passage_prefix,
                "index_name": old_embedding_model.index_name,
                "status": IndexModelStatus.PRESENT,
            }
        ],
    )
    # if the user has not overridden the default embedding model via env variables,
    # insert the new default model into the database to auto-upgrade them
    if not user_has_overridden_embedding_model():
        new_embedding_model = _get_new_default_embedding_model()
        op.bulk_insert(
            EmbeddingModel,
            [
                {
                    "model_name": new_embedding_model.model_name,
                    "model_dim": new_embedding_model.model_dim,
                    "normalize": new_embedding_model.normalize,
                    "query_prefix": new_embedding_model.query_prefix,
                    "passage_prefix": new_embedding_model.passage_prefix,
                    "index_name": new_embedding_model.index_name,
                    "status": IndexModelStatus.FUTURE,
                }
            ],
        )

    op.add_column(
        "index_attempt",
        sa.Column("embedding_model_id", sa.Integer(), nullable=True),
    )
    op.execute(
        "UPDATE index_attempt SET embedding_model_id=1 WHERE embedding_model_id IS NULL"
    )
    op.alter_column(
        "index_attempt",
        "embedding_model_id",
        existing_type=sa.Integer(),
        nullable=False,
    )
    op.create_foreign_key(
        "index_attempt__embedding_model_fk",
        "index_attempt",
        "embedding_model",
        ["embedding_model_id"],
        ["id"],
    )
    op.create_index(
        "ix_embedding_model_present_unique",
        "embedding_model",
        ["status"],
        unique=True,
        postgresql_where=sa.text("status = 'PRESENT'"),
    )
    op.create_index(
        "ix_embedding_model_future_unique",
        "embedding_model",
        ["status"],
        unique=True,
        postgresql_where=sa.text("status = 'FUTURE'"),
    )


def downgrade() -> None:
    op.drop_constraint(
        "index_attempt__embedding_model_fk", "index_attempt", type_="foreignkey"
    )
    op.drop_column("index_attempt", "embedding_model_id")
    op.drop_table("embedding_model")
    op.execute("DROP TYPE IF EXISTS indexmodelstatus;")


================================================
FILE: backend/alembic/versions/df0c7ad8a076_added_deletion_attempt_table.py
================================================
"""Added deletion_attempt table

Revision ID: df0c7ad8a076
Revises: d7111c1238cd
Create Date: 2023-08-05 13:35:39.609619

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "df0c7ad8a076"
down_revision = "d7111c1238cd"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.execute("DROP TABLE IF EXISTS document CASCADE")
    op.create_table(
        "document",
        sa.Column("id", sa.String(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
    )
    op.execute("DROP TABLE IF EXISTS chunk CASCADE")
    op.create_table(
        "chunk",
        sa.Column("id", sa.String(), nullable=False),
        sa.Column(
            "document_store_type",
            sa.Enum(
                "VECTOR",
                "KEYWORD",
                name="documentstoretype",
                native_enum=False,
            ),
            nullable=False,
        ),
        sa.Column("document_id", sa.String(), nullable=False),
        sa.ForeignKeyConstraint(
            ["document_id"],
            ["document.id"],
        ),
        sa.PrimaryKeyConstraint("id", "document_store_type"),
    )
    op.execute("DROP TABLE IF EXISTS deletion_attempt CASCADE")
    op.create_table(
        "deletion_attempt",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("connector_id", sa.Integer(), nullable=False),
        sa.Column("credential_id", sa.Integer(), nullable=False),
        sa.Column(
            "status",
            sa.Enum(
                "NOT_STARTED",
                "IN_PROGRESS",
                "SUCCESS",
                "FAILED",
                name="deletionstatus",
                native_enum=False,
            ),
            nullable=False,
        ),
        sa.Column("num_docs_deleted", sa.Integer(), nullable=False),
        sa.Column("error_msg", sa.String(), nullable=True),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "time_updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["connector_id"],
            ["connector.id"],
        ),
        sa.ForeignKeyConstraint(
            ["credential_id"],
            ["credential.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.execute("DROP TABLE IF EXISTS document_by_connector_credential_pair CASCADE")
    op.create_table(
        "document_by_connector_credential_pair",
        sa.Column("id", sa.String(), nullable=False),
        sa.Column("connector_id", sa.Integer(), nullable=False),
        sa.Column("credential_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["connector_id"],
            ["connector.id"],
        ),
        sa.ForeignKeyConstraint(
            ["credential_id"],
            ["credential.id"],
        ),
        sa.ForeignKeyConstraint(
            ["id"],
            ["document.id"],
        ),
        sa.PrimaryKeyConstraint("id", "connector_id", "credential_id"),
    )


def downgrade() -> None:
    # upstream tables first
    op.drop_table("document_by_connector_credential_pair")
    op.drop_table("deletion_attempt")
    op.drop_table("chunk")

    # Alembic op.drop_table() has no "cascade" flag – issue raw SQL
    op.execute("DROP TABLE IF EXISTS document CASCADE")


================================================
FILE: backend/alembic/versions/df46c75b714e_add_default_vision_provider_to_llm_.py
================================================
"""add_default_vision_provider_to_llm_provider

Revision ID: df46c75b714e
Revises: 3934b1bc7b62
Create Date: 2025-03-11 16:20:19.038945

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "df46c75b714e"
down_revision = "3934b1bc7b62"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "llm_provider",
        sa.Column(
            "is_default_vision_provider",
            sa.Boolean(),
            nullable=True,
            server_default=sa.false(),
        ),
    )
    op.add_column(
        "llm_provider", sa.Column("default_vision_model", sa.String(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("llm_provider", "default_vision_model")
    op.drop_column("llm_provider", "is_default_vision_provider")


================================================
FILE: backend/alembic/versions/dfbe9e93d3c7_extended_role_for_non_web.py
================================================
"""extended_role_for_non_web

Revision ID: dfbe9e93d3c7
Revises: 9cf5c00f72fe
Create Date: 2024-11-16 07:54:18.727906

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "dfbe9e93d3c7"
down_revision = "9cf5c00f72fe"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.execute(
        """
        UPDATE "user"
        SET role = 'EXT_PERM_USER'
        WHERE has_web_login = false
    """
    )
    op.drop_column("user", "has_web_login")


def downgrade() -> None:
    op.add_column(
        "user",
        sa.Column("has_web_login", sa.Boolean(), nullable=False, server_default="true"),
    )

    op.execute(
        """
        UPDATE "user"
        SET has_web_login = false,
            role = 'BASIC'
        WHERE role IN ('SLACK_USER', 'EXT_PERM_USER')
    """
    )


================================================
FILE: backend/alembic/versions/e0a68a81d434_add_chat_feedback.py
================================================
"""Add Chat Feedback

Revision ID: e0a68a81d434
Revises: ae62505e3acc
Create Date: 2023-10-04 20:22:33.380286

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "e0a68a81d434"
down_revision = "ae62505e3acc"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "chat_feedback",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("chat_message_chat_session_id", sa.Integer(), nullable=False),
        sa.Column("chat_message_message_number", sa.Integer(), nullable=False),
        sa.Column("chat_message_edit_number", sa.Integer(), nullable=False),
        sa.Column("is_positive", sa.Boolean(), nullable=True),
        sa.Column("feedback_text", sa.Text(), nullable=True),
        sa.ForeignKeyConstraint(
            [
                "chat_message_chat_session_id",
                "chat_message_message_number",
                "chat_message_edit_number",
            ],
            [
                "chat_message.chat_session_id",
                "chat_message.message_number",
                "chat_message.edit_number",
            ],
        ),
        sa.PrimaryKeyConstraint("id"),
    )


def downgrade() -> None:
    op.drop_table("chat_feedback")


================================================
FILE: backend/alembic/versions/e1392f05e840_added_input_prompts.py
================================================
"""Added input prompts

Revision ID: e1392f05e840
Revises: 08a1eda20fe1
Create Date: 2024-07-13 19:09:22.556224

"""

import fastapi_users_db_sqlalchemy

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "e1392f05e840"
down_revision = "08a1eda20fe1"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "inputprompt",
        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
        sa.Column("prompt", sa.String(), nullable=False),
        sa.Column("content", sa.String(), nullable=False),
        sa.Column("active", sa.Boolean(), nullable=False),
        sa.Column("is_public", sa.Boolean(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=True,
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_table(
        "inputprompt__user",
        sa.Column("input_prompt_id", sa.Integer(), nullable=False),
        sa.Column("user_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["input_prompt_id"],
            ["inputprompt.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["inputprompt.id"],
        ),
        sa.PrimaryKeyConstraint("input_prompt_id", "user_id"),
    )


def downgrade() -> None:
    op.drop_table("inputprompt__user")
    op.drop_table("inputprompt")


================================================
FILE: backend/alembic/versions/e209dc5a8156_added_prune_frequency.py
================================================
"""added-prune-frequency

Revision ID: e209dc5a8156
Revises: 48d14957fe80
Create Date: 2024-06-16 16:02:35.273231

"""

from alembic import op
import sqlalchemy as sa

revision = "e209dc5a8156"
down_revision = "48d14957fe80"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column("connector", sa.Column("prune_freq", sa.Integer(), nullable=True))


def downgrade() -> None:
    op.drop_column("connector", "prune_freq")


================================================
FILE: backend/alembic/versions/e4334d5b33ba_add_deployment_name_to_llmprovider.py
================================================
"""add_deployment_name_to_llmprovider

Revision ID: e4334d5b33ba
Revises: ac5eaac849f9
Create Date: 2024-10-04 09:52:34.896867

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "e4334d5b33ba"
down_revision = "ac5eaac849f9"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "llm_provider", sa.Column("deployment_name", sa.String(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("llm_provider", "deployment_name")


================================================
FILE: backend/alembic/versions/e50154680a5c_no_source_enum.py
================================================
"""No Source Enum

Revision ID: e50154680a5c
Revises: fcd135795f21
Create Date: 2024-03-14 18:06:08.523106

"""

from alembic import op
import sqlalchemy as sa

from onyx.configs.constants import DocumentSource

# revision identifiers, used by Alembic.
revision = "e50154680a5c"
down_revision = "fcd135795f21"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.alter_column(
        "search_doc",
        "source_type",
        type_=sa.String(length=50),
        existing_type=sa.Enum(DocumentSource, native_enum=False),
        existing_nullable=False,
    )
    op.execute("DROP TYPE IF EXISTS documentsource")


def downgrade() -> None:
    op.alter_column(
        "search_doc",
        "source_type",
        type_=sa.Enum(DocumentSource, native_enum=False),
        existing_type=sa.String(length=50),
        existing_nullable=False,
    )


================================================
FILE: backend/alembic/versions/e6a4bbc13fe4_add_index_for_retrieving_latest_index_.py
================================================
"""Add index for retrieving latest index_attempt

Revision ID: e6a4bbc13fe4
Revises: b082fec533f0
Create Date: 2023-08-10 12:37:23.335471

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "e6a4bbc13fe4"
down_revision = "b082fec533f0"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_index(
        op.f("ix_index_attempt_latest_for_connector_credential_pair"),
        "index_attempt",
        ["connector_id", "credential_id", "time_created"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(
        op.f("ix_index_attempt_latest_for_connector_credential_pair"),
        table_name="index_attempt",
    )


================================================
FILE: backend/alembic/versions/e7f8a9b0c1d2_create_anonymous_user.py
================================================
"""create_anonymous_user

This migration creates a permanent anonymous user in the database.
When anonymous access is enabled, unauthenticated requests will use this user
instead of returning user_id=NULL.

Revision ID: e7f8a9b0c1d2
Revises: f7ca3e2f45d9
Create Date: 2026-01-15 14:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "e7f8a9b0c1d2"
down_revision = "f7ca3e2f45d9"
branch_labels = None
depends_on = None

# Must match constants in onyx/configs/constants.py file
ANONYMOUS_USER_UUID = "00000000-0000-0000-0000-000000000002"
ANONYMOUS_USER_EMAIL = "anonymous@onyx.app"

# Tables with user_id foreign key that may need migration
TABLES_WITH_USER_ID = [
    "chat_session",
    "credential",
    "document_set",
    "persona",
    "tool",
    "notification",
    "inputprompt",
]


def _dedupe_null_notifications(connection: sa.Connection) -> None:
    # Multiple NULL-owned notifications can exist because the unique index treats
    # NULL user_id values as distinct. Before migrating them to the anonymous
    # user, collapse duplicates and remove rows that would conflict with an
    # already-existing anonymous notification.
    result = connection.execute(
        sa.text(
            """
            WITH ranked_null_notifications AS (
                SELECT
                    id,
                    ROW_NUMBER() OVER (
                        PARTITION BY notif_type, COALESCE(additional_data, '{}'::jsonb)
                        ORDER BY first_shown DESC, last_shown DESC, id DESC
                    ) AS row_num
                FROM notification
                WHERE user_id IS NULL
            )
            DELETE FROM notification
            WHERE id IN (
                SELECT id
                FROM ranked_null_notifications
                WHERE row_num > 1
            )
            """
        )
    )
    if result.rowcount > 0:
        print(f"Deleted {result.rowcount} duplicate NULL-owned notifications")

    result = connection.execute(
        sa.text(
            """
            DELETE FROM notification AS null_owned
            USING notification AS anonymous_owned
            WHERE null_owned.user_id IS NULL
              AND anonymous_owned.user_id = :user_id
              AND null_owned.notif_type = anonymous_owned.notif_type
              AND COALESCE(null_owned.additional_data, '{}'::jsonb) =
                  COALESCE(anonymous_owned.additional_data, '{}'::jsonb)
            """
        ),
        {"user_id": ANONYMOUS_USER_UUID},
    )
    if result.rowcount > 0:
        print(
            f"Deleted {result.rowcount} NULL-owned notifications that conflict with existing anonymous-owned notifications"
        )


def upgrade() -> None:
    """
    Create the anonymous user for anonymous access feature.
    Also migrates any remaining user_id=NULL records to the anonymous user.
    """
    connection = op.get_bind()

    # Create the anonymous user (using ON CONFLICT to be idempotent)
    connection.execute(
        sa.text(
            """
            INSERT INTO "user" (id, email, hashed_password, is_active, is_superuser, is_verified, role)
            VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)
            ON CONFLICT (id) DO NOTHING
            """
        ),
        {
            "id": ANONYMOUS_USER_UUID,
            "email": ANONYMOUS_USER_EMAIL,
            "hashed_password": "",  # Empty password - user cannot log in directly
            "is_active": True,  # Active so it can be used for anonymous access
            "is_superuser": False,
            "is_verified": True,  # Verified since no email verification needed
            "role": "LIMITED",  # Anonymous users have limited role to restrict access
        },
    )

    # Migrate any remaining user_id=NULL records to anonymous user
    for table in TABLES_WITH_USER_ID:
        # Dedup notifications outside the savepoint so deletions persist
        # even if the subsequent UPDATE rolls back
        if table == "notification":
            _dedupe_null_notifications(connection)

        with connection.begin_nested():
            # Exclude public credential (id=0) which must remain user_id=NULL
            # Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL
            # Exclude builtin personas (builtin_persona=True) which must remain user_id=NULL
            # Exclude system input prompts (is_public=True with user_id=NULL) which must remain user_id=NULL
            if table == "credential":
                condition = "user_id IS NULL AND id != 0"
            elif table == "tool":
                condition = "user_id IS NULL AND in_code_tool_id IS NULL"
            elif table == "persona":
                condition = "user_id IS NULL AND builtin_persona = false"
            elif table == "inputprompt":
                condition = "user_id IS NULL AND is_public = false"
            else:
                condition = "user_id IS NULL"

            result = connection.execute(
                sa.text(
                    f"""
                    UPDATE "{table}"
                    SET user_id = :user_id
                    WHERE {condition}
                    """
                ),
                {"user_id": ANONYMOUS_USER_UUID},
            )
            if result.rowcount > 0:
                print(f"Updated {result.rowcount} rows in {table} to anonymous user")


def downgrade() -> None:
    """
    Set anonymous user's records back to NULL and delete the anonymous user.

    Note: Duplicate NULL-owned notifications removed during upgrade are not restored.
    """
    connection = op.get_bind()

    # Set records back to NULL
    for table in TABLES_WITH_USER_ID:
        with connection.begin_nested():
            connection.execute(
                sa.text(
                    f"""
                    UPDATE "{table}"
                    SET user_id = NULL
                    WHERE user_id = :user_id
                    """
                ),
                {"user_id": ANONYMOUS_USER_UUID},
            )

    # Delete the anonymous user
    connection.execute(
        sa.text('DELETE FROM "user" WHERE id = :user_id'),
        {"user_id": ANONYMOUS_USER_UUID},
    )


================================================
FILE: backend/alembic/versions/e86866a9c78a_add_persona_to_chat_session.py
================================================
"""Add persona to chat_session

Revision ID: e86866a9c78a
Revises: 80696cf850ae
Create Date: 2023-11-26 02:51:47.657357

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "e86866a9c78a"
down_revision = "80696cf850ae"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column("chat_session", sa.Column("persona_id", sa.Integer(), nullable=True))
    op.create_foreign_key(
        "fk_chat_session_persona_id", "chat_session", "persona", ["persona_id"], ["id"]
    )


def downgrade() -> None:
    op.drop_constraint("fk_chat_session_persona_id", "chat_session", type_="foreignkey")
    op.drop_column("chat_session", "persona_id")


================================================
FILE: backend/alembic/versions/e8f0d2a38171_add_status_to_mcp_server_and_make_auth_.py
================================================
"""add status to mcp server and make auth fields nullable

Revision ID: e8f0d2a38171
Revises: ed9e44312505
Create Date: 2025-11-28 11:15:37.667340

"""

from alembic import op
import sqlalchemy as sa
from onyx.db.enums import (
    MCPTransport,
    MCPAuthenticationType,
    MCPAuthenticationPerformer,
    MCPServerStatus,
)

# revision identifiers, used by Alembic.
revision = "e8f0d2a38171"
down_revision = "ed9e44312505"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Make auth fields nullable
    op.alter_column(
        "mcp_server",
        "transport",
        existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
        nullable=True,
    )

    op.alter_column(
        "mcp_server",
        "auth_type",
        existing_type=sa.Enum(
            MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
        ),
        nullable=True,
    )

    op.alter_column(
        "mcp_server",
        "auth_performer",
        existing_type=sa.Enum(
            MCPAuthenticationPerformer,
            name="mcp_authentication_performer",
            native_enum=False,
        ),
        nullable=True,
    )

    # Add status column with default
    op.add_column(
        "mcp_server",
        sa.Column(
            "status",
            sa.Enum(MCPServerStatus, name="mcp_server_status", native_enum=False),
            nullable=False,
            server_default="CREATED",
        ),
    )

    # For existing records, mark status as CONNECTED
    bind = op.get_bind()
    bind.execute(
        sa.text(
            """
        UPDATE mcp_server
        SET status = 'CONNECTED'
        WHERE status != 'CONNECTED'
        and admin_connection_config_id IS NOT NULL
        """
        )
    )


def downgrade() -> None:
    # Remove status column
    op.drop_column("mcp_server", "status")

    # Make auth fields non-nullable (set defaults first)
    op.execute(
        "UPDATE mcp_server SET transport = 'STREAMABLE_HTTP' WHERE transport IS NULL"
    )
    op.execute("UPDATE mcp_server SET auth_type = 'NONE' WHERE auth_type IS NULL")
    op.execute(
        "UPDATE mcp_server SET auth_performer = 'ADMIN' WHERE auth_performer IS NULL"
    )

    op.alter_column(
        "mcp_server",
        "transport",
        existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
        nullable=False,
    )
    op.alter_column(
        "mcp_server",
        "auth_type",
        existing_type=sa.Enum(
            MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
        ),
        nullable=False,
    )
    op.alter_column(
        "mcp_server",
        "auth_performer",
        existing_type=sa.Enum(
            MCPAuthenticationPerformer,
            name="mcp_authentication_performer",
            native_enum=False,
        ),
        nullable=False,
    )


================================================
FILE: backend/alembic/versions/e91df4e935ef_private_personas_documentsets.py
================================================
"""Private Personas DocumentSets

Revision ID: e91df4e935ef
Revises: 91fd3b470d1a
Create Date: 2024-03-17 11:47:24.675881

"""

import fastapi_users_db_sqlalchemy
from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "e91df4e935ef"
down_revision = "91fd3b470d1a"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "document_set__user",
        sa.Column("document_set_id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["document_set_id"],
            ["document_set.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("document_set_id", "user_id"),
    )
    op.create_table(
        "persona__user",
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.Column(
            "user_id",
            fastapi_users_db_sqlalchemy.generics.GUID(),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_id"],
            ["user.id"],
        ),
        sa.PrimaryKeyConstraint("persona_id", "user_id"),
    )
    op.create_table(
        "document_set__user_group",
        sa.Column("document_set_id", sa.Integer(), nullable=False),
        sa.Column(
            "user_group_id",
            sa.Integer(),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["document_set_id"],
            ["document_set.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_group_id"],
            ["user_group.id"],
        ),
        sa.PrimaryKeyConstraint("document_set_id", "user_group_id"),
    )
    op.create_table(
        "persona__user_group",
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.Column(
            "user_group_id",
            sa.Integer(),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.ForeignKeyConstraint(
            ["user_group_id"],
            ["user_group.id"],
        ),
        sa.PrimaryKeyConstraint("persona_id", "user_group_id"),
    )

    op.add_column(
        "document_set",
        sa.Column("is_public", sa.Boolean(), nullable=True),
    )
    # fill in is_public for existing rows
    op.execute("UPDATE document_set SET is_public = true WHERE is_public IS NULL")
    op.alter_column("document_set", "is_public", nullable=False)

    op.add_column(
        "persona",
        sa.Column("is_public", sa.Boolean(), nullable=True),
    )
    # fill in is_public for existing rows
    op.execute("UPDATE persona SET is_public = true WHERE is_public IS NULL")
    op.alter_column("persona", "is_public", nullable=False)


def downgrade() -> None:
    op.drop_column("persona", "is_public")

    op.drop_column("document_set", "is_public")

    op.drop_table("persona__user")
    op.drop_table("document_set__user")
    op.drop_table("persona__user_group")
    op.drop_table("document_set__user_group")


================================================
FILE: backend/alembic/versions/eaa3b5593925_add_default_slack_channel_config.py
================================================
"""add default slack channel config

Revision ID: eaa3b5593925
Revises: 98a5008d8711
Create Date: 2025-02-03 18:07:56.552526

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "eaa3b5593925"
down_revision = "98a5008d8711"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add is_default column
    op.add_column(
        "slack_channel_config",
        sa.Column("is_default", sa.Boolean(), nullable=False, server_default="false"),
    )

    op.create_index(
        "ix_slack_channel_config_slack_bot_id_default",
        "slack_channel_config",
        ["slack_bot_id", "is_default"],
        unique=True,
        postgresql_where=sa.text("is_default IS TRUE"),
    )

    # Create default channel configs for existing slack bots without one
    conn = op.get_bind()
    slack_bots = conn.execute(sa.text("SELECT id FROM slack_bot")).fetchall()

    for slack_bot in slack_bots:
        slack_bot_id = slack_bot[0]
        existing_default = conn.execute(
            sa.text(
                "SELECT id FROM slack_channel_config WHERE slack_bot_id = :bot_id AND is_default = TRUE"
            ),
            {"bot_id": slack_bot_id},
        ).fetchone()

        if not existing_default:
            conn.execute(
                sa.text(
                    """
                    INSERT INTO slack_channel_config (
                        slack_bot_id, persona_id, channel_config, enable_auto_filters, is_default
                    ) VALUES (
                        :bot_id, NULL,
                        '{"channel_name": null, '
                        '"respond_member_group_list": [], '
                        '"answer_filters": [], '
                        '"follow_up_tags": [], '
                        '"respond_tag_only": true}',
                        FALSE, TRUE
                    )
                """
                ),
                {"bot_id": slack_bot_id},
            )


def downgrade() -> None:
    # Delete default slack channel configs
    conn = op.get_bind()
    conn.execute(sa.text("DELETE FROM slack_channel_config WHERE is_default = TRUE"))

    # Remove index
    op.drop_index(
        "ix_slack_channel_config_slack_bot_id_default",
        table_name="slack_channel_config",
    )

    # Remove is_default column
    op.drop_column("slack_channel_config", "is_default")


================================================
FILE: backend/alembic/versions/ec3ec2eabf7b_index_from_beginning.py
================================================
"""Index From Beginning

Revision ID: ec3ec2eabf7b
Revises: dbaa756c2ccf
Create Date: 2024-02-06 22:03:28.098158

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "ec3ec2eabf7b"
down_revision = "dbaa756c2ccf"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "index_attempt", sa.Column("from_beginning", sa.Boolean(), nullable=True)
    )
    op.execute("UPDATE index_attempt SET from_beginning = False")
    op.alter_column("index_attempt", "from_beginning", nullable=False)


def downgrade() -> None:
    op.drop_column("index_attempt", "from_beginning")


================================================
FILE: backend/alembic/versions/ec85f2b3c544_remove_last_attempt_status_from_cc_pair.py
================================================
"""Remove Last Attempt Status from CC Pair

Revision ID: ec85f2b3c544
Revises: 3879338f8ba1
Create Date: 2024-05-23 21:39:46.126010

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "ec85f2b3c544"
down_revision = "70f00c45c0f2"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.drop_column("connector_credential_pair", "last_attempt_status")


def downgrade() -> None:
    op.add_column(
        "connector_credential_pair",
        sa.Column(
            "last_attempt_status",
            sa.VARCHAR(),
            autoincrement=False,
            nullable=True,
        ),
    )


================================================
FILE: backend/alembic/versions/ecab2b3f1a3b_add_overrides_to_the_chat_session.py
================================================
"""Add overrides to the chat session

Revision ID: ecab2b3f1a3b
Revises: 38eda64af7fe
Create Date: 2024-04-01 19:08:21.359102

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "ecab2b3f1a3b"
down_revision = "38eda64af7fe"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_session",
        sa.Column(
            "llm_override",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )
    op.add_column(
        "chat_session",
        sa.Column(
            "prompt_override",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("chat_session", "prompt_override")
    op.drop_column("chat_session", "llm_override")


================================================
FILE: backend/alembic/versions/ed9e44312505_add_icon_name_field.py
================================================
"""Add icon_name field

Revision ID: ed9e44312505
Revises: 5e6f7a8b9c0d
Create Date: 2025-12-03 16:35:07.828393

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "ed9e44312505"
down_revision = "5e6f7a8b9c0d"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add icon_name column
    op.add_column("persona", sa.Column("icon_name", sa.String(), nullable=True))

    # Remove old icon columns
    op.drop_column("persona", "icon_shape")
    op.drop_column("persona", "icon_color")


def downgrade() -> None:
    # Re-add old icon columns
    op.add_column("persona", sa.Column("icon_color", sa.String(), nullable=True))
    op.add_column("persona", sa.Column("icon_shape", sa.Integer(), nullable=True))

    # Remove icon_name column
    op.drop_column("persona", "icon_name")


================================================
FILE: backend/alembic/versions/ee3f4b47fad5_added_alternate_model_to_chat_message.py
================================================
"""Added alternate model to chat message

Revision ID: ee3f4b47fad5
Revises: 2d2304e27d8c
Create Date: 2024-08-12 00:11:50.915845

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "ee3f4b47fad5"
down_revision = "2d2304e27d8c"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_message",
        sa.Column("overridden_model", sa.String(length=255), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("chat_message", "overridden_model")


================================================
FILE: backend/alembic/versions/ef7da92f7213_add_files_to_chatmessage.py
================================================
"""Add files to ChatMessage

Revision ID: ef7da92f7213
Revises: 401c1ac29467
Create Date: 2024-04-28 16:59:33.199153

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "ef7da92f7213"
down_revision = "401c1ac29467"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_message",
        sa.Column("files", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("chat_message", "files")


================================================
FILE: backend/alembic/versions/efb35676026c_standard_answer_match_regex_flag.py
================================================
"""standard answer match_regex flag

Revision ID: efb35676026c
Revises: 0ebb1d516877
Create Date: 2024-09-11 13:55:46.101149

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "efb35676026c"
down_revision = "0ebb1d516877"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.add_column(
        "standard_answer",
        sa.Column(
            "match_regex", sa.Boolean(), nullable=False, server_default=sa.false()
        ),
    )
    # ### end Alembic commands ###


def downgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_column("standard_answer", "match_regex")
    # ### end Alembic commands ###


================================================
FILE: backend/alembic/versions/f11b408e39d3_force_lowercase_all_users.py
================================================
"""force lowercase all users

Revision ID: f11b408e39d3
Revises: 3bd4c84fe72f
Create Date: 2025-02-26 17:04:55.683500

"""

# revision identifiers, used by Alembic.
revision = "f11b408e39d3"
down_revision = "3bd4c84fe72f"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # 1) Convert all existing user emails to lowercase
    from alembic import op

    op.execute(
        """
        UPDATE "user"
        SET email = LOWER(email)
        """
    )

    # 2) Add a check constraint to ensure emails are always lowercase
    op.create_check_constraint("ensure_lowercase_email", "user", "email = LOWER(email)")


def downgrade() -> None:
    # Drop the check constraint
    from alembic import op

    op.drop_constraint("ensure_lowercase_email", "user", type_="check")


================================================
FILE: backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py
================================================
"""Add composite index for last_modified and last_synced to document

Revision ID: f13db29f3101
Revises: b388730a2899
Create Date: 2025-02-18 22:48:11.511389

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "f13db29f3101"
down_revision = "acaab4ef4507"
branch_labels: str | None = None
depends_on: str | None = None


def upgrade() -> None:
    op.create_index(
        "ix_document_sync_status",
        "document",
        ["last_modified", "last_synced"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index("ix_document_sync_status", table_name="document")


================================================
FILE: backend/alembic/versions/f17bf3b0d9f1_embedding_provider_by_provider_type.py
================================================
"""embedding provider by provider type

Revision ID: f17bf3b0d9f1
Revises: 351faebd379d
Create Date: 2024-08-21 13:13:31.120460

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "f17bf3b0d9f1"
down_revision = "351faebd379d"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # Add provider_type column to embedding_provider
    op.add_column(
        "embedding_provider",
        sa.Column("provider_type", sa.String(50), nullable=True),
    )

    # Update provider_type with existing name values
    op.execute("UPDATE embedding_provider SET provider_type = UPPER(name)")

    # Make provider_type not nullable
    op.alter_column("embedding_provider", "provider_type", nullable=False)

    # Drop the foreign key constraint in embedding_model table
    op.drop_constraint(
        "fk_embedding_model_cloud_provider", "embedding_model", type_="foreignkey"
    )

    # Drop the existing primary key constraint
    op.drop_constraint("embedding_provider_pkey", "embedding_provider", type_="primary")

    # Create a new primary key constraint on provider_type
    op.create_primary_key(
        "embedding_provider_pkey", "embedding_provider", ["provider_type"]
    )

    # Add provider_type column to embedding_model
    op.add_column(
        "embedding_model",
        sa.Column("provider_type", sa.String(50), nullable=True),
    )

    # Update provider_type for existing embedding models
    op.execute(
        """
        UPDATE embedding_model
        SET provider_type = (
            SELECT provider_type
            FROM embedding_provider
            WHERE embedding_provider.id = embedding_model.cloud_provider_id
        )
    """
    )

    # Drop the old id column from embedding_provider
    op.drop_column("embedding_provider", "id")

    # Drop the name column from embedding_provider
    op.drop_column("embedding_provider", "name")

    # Drop the default_model_id column from embedding_provider
    op.drop_column("embedding_provider", "default_model_id")

    # Drop the old cloud_provider_id column from embedding_model
    op.drop_column("embedding_model", "cloud_provider_id")

    # Create the new foreign key constraint
    op.create_foreign_key(
        "fk_embedding_model_cloud_provider",
        "embedding_model",
        "embedding_provider",
        ["provider_type"],
        ["provider_type"],
    )


def downgrade() -> None:
    # Drop the foreign key constraint in embedding_model table
    op.drop_constraint(
        "fk_embedding_model_cloud_provider", "embedding_model", type_="foreignkey"
    )

    # Add back the cloud_provider_id column to embedding_model
    op.add_column(
        "embedding_model", sa.Column("cloud_provider_id", sa.Integer(), nullable=True)
    )
    op.add_column("embedding_provider", sa.Column("id", sa.Integer(), nullable=True))

    # Assign incrementing IDs to embedding providers
    op.execute(
        """
        CREATE SEQUENCE IF NOT EXISTS embedding_provider_id_seq;"""
    )
    op.execute(
        """
        UPDATE embedding_provider SET id = nextval('embedding_provider_id_seq');
    """
    )

    # Update cloud_provider_id based on provider_type
    op.execute(
        """
        UPDATE embedding_model
        SET cloud_provider_id = CASE
            WHEN provider_type IS NULL THEN NULL
            ELSE (
                SELECT id
                FROM embedding_provider
                WHERE embedding_provider.provider_type = embedding_model.provider_type
            )
        END
    """
    )

    # Drop the provider_type column from embedding_model
    op.drop_column("embedding_model", "provider_type")

    # Add back the columns to embedding_provider
    op.add_column("embedding_provider", sa.Column("name", sa.String(50), nullable=True))
    op.add_column(
        "embedding_provider", sa.Column("default_model_id", sa.Integer(), nullable=True)
    )

    # Drop the existing primary key constraint on provider_type
    op.drop_constraint("embedding_provider_pkey", "embedding_provider", type_="primary")

    # Create the original primary key constraint on id
    op.create_primary_key("embedding_provider_pkey", "embedding_provider", ["id"])

    # Update name with existing provider_type values
    op.execute(
        """
        UPDATE embedding_provider
        SET name = CASE
            WHEN provider_type = 'OPENAI' THEN 'OpenAI'
            WHEN provider_type = 'COHERE' THEN 'Cohere'
            WHEN provider_type = 'GOOGLE' THEN 'Google'
            WHEN provider_type = 'VOYAGE' THEN 'Voyage'
            ELSE provider_type
        END
    """
    )

    # Drop the provider_type column from embedding_provider
    op.drop_column("embedding_provider", "provider_type")

    # Recreate the foreign key constraint in embedding_model table
    op.create_foreign_key(
        "fk_embedding_model_cloud_provider",
        "embedding_model",
        "embedding_provider",
        ["cloud_provider_id"],
        ["id"],
    )

    # Recreate the foreign key constraint in embedding_model table
    op.create_foreign_key(
        "fk_embedding_provider_default_model",
        "embedding_provider",
        "embedding_model",
        ["default_model_id"],
        ["id"],
    )


================================================
FILE: backend/alembic/versions/f1c6478c3fd8_add_pre_defined_feedback.py
================================================
"""Add pre-defined feedback

Revision ID: f1c6478c3fd8
Revises: 643a84a42a33
Create Date: 2024-05-09 18:11:49.210667

"""

from alembic import op
import sqlalchemy as sa

revision = "f1c6478c3fd8"
down_revision = "643a84a42a33"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "chat_feedback",
        sa.Column("predefined_feedback", sa.String(), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("chat_feedback", "predefined_feedback")


================================================
FILE: backend/alembic/versions/f1ca58b2f2ec_add_passthrough_auth_to_tool.py
================================================
"""add passthrough auth to tool

Revision ID: f1ca58b2f2ec
Revises: c7bf5721733e
Create Date: 2024-03-19

"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = "f1ca58b2f2ec"
down_revision: Union[str, None] = "c7bf5721733e"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
    # Add passthrough_auth column to tool table with default value of False
    op.add_column(
        "tool",
        sa.Column(
            "passthrough_auth", sa.Boolean(), nullable=False, server_default=sa.false()
        ),
    )


def downgrade() -> None:
    # Remove passthrough_auth column from tool table
    op.drop_column("tool", "passthrough_auth")


================================================
FILE: backend/alembic/versions/f220515df7b4_add_flow_mapping_table.py
================================================
"""Add flow mapping table

Revision ID: f220515df7b4
Revises: cbc03e08d0f3
Create Date: 2026-01-30 12:21:24.955922

"""

from onyx.db.enums import LLMModelFlowType
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "f220515df7b4"
down_revision = "9d1543a37106"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "llm_model_flow",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "llm_model_flow_type",
            sa.Enum(LLMModelFlowType, name="llmmodelflowtype", native_enum=False),
            nullable=False,
        ),
        sa.Column(
            "is_default", sa.Boolean(), nullable=False, server_default=sa.text("false")
        ),
        sa.Column("model_configuration_id", sa.Integer(), nullable=False),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(
            ["model_configuration_id"], ["model_configuration.id"], ondelete="CASCADE"
        ),
        sa.UniqueConstraint(
            "llm_model_flow_type",
            "model_configuration_id",
            name="uq_model_config_per_llm_model_flow_type",
        ),
    )

    # Partial unique index so that there is at most one default for each flow type
    op.create_index(
        "ix_one_default_per_llm_model_flow",
        "llm_model_flow",
        ["llm_model_flow_type"],
        unique=True,
        postgresql_where=sa.text("is_default IS TRUE"),
    )


def downgrade() -> None:
    # Drop the llm_model_flow table (index is dropped automatically with table)
    op.drop_table("llm_model_flow")


================================================
FILE: backend/alembic/versions/f32615f71aeb_add_custom_headers_to_tools.py
================================================
"""add custom headers to tools

Revision ID: f32615f71aeb
Revises: bd2921608c3a
Create Date: 2024-09-12 20:26:38.932377

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "f32615f71aeb"
down_revision = "bd2921608c3a"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "tool", sa.Column("custom_headers", postgresql.JSONB(), nullable=True)
    )


def downgrade() -> None:
    op.drop_column("tool", "custom_headers")


================================================
FILE: backend/alembic/versions/f39c5794c10a_add_background_errors_table.py
================================================
"""Add background errors table

Revision ID: f39c5794c10a
Revises: 2cdeff6d8c93
Create Date: 2025-02-12 17:11:14.527876

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "f39c5794c10a"
down_revision = "2cdeff6d8c93"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "background_error",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("message", sa.String(), nullable=False),
        sa.Column(
            "time_created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("cc_pair_id", sa.Integer(), nullable=True),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(
            ["cc_pair_id"],
            ["connector_credential_pair.id"],
            ondelete="CASCADE",
        ),
    )


def downgrade() -> None:
    op.drop_table("background_error")


================================================
FILE: backend/alembic/versions/f5437cc136c5_delete_non_search_assistants.py
================================================
"""delete non-search assistants

Revision ID: f5437cc136c5
Revises: eaa3b5593925
Create Date: 2025-02-04 16:17:15.677256

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "f5437cc136c5"
down_revision = "eaa3b5593925"
branch_labels = None
depends_on = None


def upgrade() -> None:
    pass


def downgrade() -> None:
    # Fix: split the statements into multiple op.execute() calls
    op.execute(
        """
        WITH personas_without_search AS (
            SELECT p.id
            FROM persona p
            LEFT JOIN persona__tool pt ON p.id = pt.persona_id
            LEFT JOIN tool t ON pt.tool_id = t.id
            GROUP BY p.id
            HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0
        )
        UPDATE slack_channel_config
        SET persona_id = NULL
        WHERE is_default = TRUE AND persona_id IN (SELECT id FROM personas_without_search)
        """
    )

    op.execute(
        """
        WITH personas_without_search AS (
            SELECT p.id
            FROM persona p
            LEFT JOIN persona__tool pt ON p.id = pt.persona_id
            LEFT JOIN tool t ON pt.tool_id = t.id
            GROUP BY p.id
            HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0
        )
        DELETE FROM slack_channel_config
        WHERE is_default = FALSE AND persona_id IN (SELECT id FROM personas_without_search)
        """
    )


================================================
FILE: backend/alembic/versions/f71470ba9274_add_prompt_length_limit.py
================================================
"""add prompt length limit

Revision ID: f71470ba9274
Revises: 6a804aeb4830
Create Date: 2025-04-01 15:07:14.977435

"""

# revision identifiers, used by Alembic.
revision = "f71470ba9274"
down_revision = "6a804aeb4830"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # op.alter_column(
    #     "prompt",
    #     "system_prompt",
    #     existing_type=sa.TEXT(),
    #     type_=sa.String(length=8000),
    #     existing_nullable=False,
    # )
    # op.alter_column(
    #     "prompt",
    #     "task_prompt",
    #     existing_type=sa.TEXT(),
    #     type_=sa.String(length=8000),
    #     existing_nullable=False,
    # )
    pass


def downgrade() -> None:
    # op.alter_column(
    #     "prompt",
    #     "system_prompt",
    #     existing_type=sa.String(length=8000),
    #     type_=sa.TEXT(),
    #     existing_nullable=False,
    # )
    # op.alter_column(
    #     "prompt",
    #     "task_prompt",
    #     existing_type=sa.String(length=8000),
    #     type_=sa.TEXT(),
    #     existing_nullable=False,
    # )
    pass


================================================
FILE: backend/alembic/versions/f7505c5b0284_updated_constraints_for_ccpairs.py
================================================
"""updated constraints for ccpairs

Revision ID: f7505c5b0284
Revises: f71470ba9274
Create Date: 2025-04-01 17:50:42.504818

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "f7505c5b0284"
down_revision = "f71470ba9274"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # 1) Drop the old foreign-key constraints
    op.drop_constraint(
        "document_by_connector_credential_pair_connector_id_fkey",
        "document_by_connector_credential_pair",
        type_="foreignkey",
    )
    op.drop_constraint(
        "document_by_connector_credential_pair_credential_id_fkey",
        "document_by_connector_credential_pair",
        type_="foreignkey",
    )

    # 2) Re-add them with ondelete='CASCADE'
    op.create_foreign_key(
        "document_by_connector_credential_pair_connector_id_fkey",
        source_table="document_by_connector_credential_pair",
        referent_table="connector",
        local_cols=["connector_id"],
        remote_cols=["id"],
        ondelete="CASCADE",
    )
    op.create_foreign_key(
        "document_by_connector_credential_pair_credential_id_fkey",
        source_table="document_by_connector_credential_pair",
        referent_table="credential",
        local_cols=["credential_id"],
        remote_cols=["id"],
        ondelete="CASCADE",
    )


def downgrade() -> None:
    # Reverse the changes for rollback
    op.drop_constraint(
        "document_by_connector_credential_pair_connector_id_fkey",
        "document_by_connector_credential_pair",
        type_="foreignkey",
    )
    op.drop_constraint(
        "document_by_connector_credential_pair_credential_id_fkey",
        "document_by_connector_credential_pair",
        type_="foreignkey",
    )

    # Recreate without CASCADE
    op.create_foreign_key(
        "document_by_connector_credential_pair_connector_id_fkey",
        "document_by_connector_credential_pair",
        "connector",
        ["connector_id"],
        ["id"],
    )
    op.create_foreign_key(
        "document_by_connector_credential_pair_credential_id_fkey",
        "document_by_connector_credential_pair",
        "credential",
        ["credential_id"],
        ["id"],
    )


================================================
FILE: backend/alembic/versions/f7a894b06d02_non_nullbale_slack_bot_id_in_channel_.py
================================================
"""non-nullbale slack bot id in channel config

Revision ID: f7a894b06d02
Revises: 9f696734098f
Create Date: 2024-12-06 12:55:42.845723

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "f7a894b06d02"
down_revision = "9f696734098f"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Delete all rows with null slack_bot_id
    op.execute("DELETE FROM slack_channel_config WHERE slack_bot_id IS NULL")

    # Make slack_bot_id non-nullable
    op.alter_column(
        "slack_channel_config",
        "slack_bot_id",
        existing_type=sa.Integer(),
        nullable=False,
    )


def downgrade() -> None:
    # Make slack_bot_id nullable again
    op.alter_column(
        "slack_channel_config",
        "slack_bot_id",
        existing_type=sa.Integer(),
        nullable=True,
    )


================================================
FILE: backend/alembic/versions/f7ca3e2f45d9_migrate_no_auth_data_to_placeholder.py
================================================
"""migrate_no_auth_data_to_placeholder

This migration handles the transition from AUTH_TYPE=disabled to requiring
authentication. It creates a placeholder user and assigns all data that was
created without a user (user_id=NULL) to this placeholder.

A database trigger is installed that automatically transfers all data from
the placeholder user to the first real user who registers, then drops itself.

Revision ID: f7ca3e2f45d9
Revises: 78ebc66946a0
Create Date: 2026-01-15 12:49:53.802741

"""

import os

from alembic import op
import sqlalchemy as sa

from shared_configs.configs import MULTI_TENANT


# revision identifiers, used by Alembic.
revision = "f7ca3e2f45d9"
down_revision = "78ebc66946a0"
branch_labels = None
depends_on = None

# Must match constants in onyx/configs/constants.py file
NO_AUTH_PLACEHOLDER_USER_UUID = "00000000-0000-0000-0000-000000000001"
NO_AUTH_PLACEHOLDER_USER_EMAIL = "no-auth-placeholder@onyx.app"

# Trigger and function names
TRIGGER_NAME = "trg_migrate_no_auth_data"
FUNCTION_NAME = "migrate_no_auth_data_to_user"

# Trigger function that migrates data from placeholder to first real user
MIGRATE_NO_AUTH_TRIGGER_FUNCTION = f"""
CREATE OR REPLACE FUNCTION {FUNCTION_NAME}()
RETURNS TRIGGER AS $$
DECLARE
    placeholder_uuid UUID := '00000000-0000-0000-0000-000000000001'::uuid;
    anonymous_uuid UUID := '00000000-0000-0000-0000-000000000002'::uuid;
    placeholder_row RECORD;
    schema_name TEXT;
BEGIN
    -- Skip if this is the placeholder user being inserted
    IF NEW.id = placeholder_uuid THEN
        RETURN NULL;
    END IF;

    -- Skip if this is the anonymous user being inserted (not a real user)
    IF NEW.id = anonymous_uuid THEN
        RETURN NULL;
    END IF;

    -- Skip if the new user is not active
    IF NEW.is_active = FALSE THEN
        RETURN NULL;
    END IF;

    -- Get current schema for self-cleanup
    schema_name := current_schema();

    -- Try to lock the placeholder user row with FOR UPDATE SKIP LOCKED
    -- This ensures only one concurrent transaction can proceed with migration
    -- SKIP LOCKED means if another transaction has the lock, we skip (don't wait)
    SELECT id INTO placeholder_row
    FROM "user"
    WHERE id = placeholder_uuid
    FOR UPDATE SKIP LOCKED;

    IF NOT FOUND THEN
        -- Either placeholder doesn't exist or another transaction has it locked
        -- Either way, drop the trigger and return without making admin
        EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I."user"', schema_name);
        EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);
        RETURN NULL;
    END IF;

    -- We have exclusive lock on placeholder - proceed with migration
    -- The INSERT has already completed (AFTER INSERT), so NEW.id exists in the table

    -- Migrate chat_session
    UPDATE "chat_session" SET user_id = NEW.id WHERE user_id = placeholder_uuid;

    -- Migrate credential (exclude public credential id=0)
    UPDATE "credential" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND id != 0;

    -- Migrate document_set
    UPDATE "document_set" SET user_id = NEW.id WHERE user_id = placeholder_uuid;

    -- Migrate persona (exclude builtin personas)
    UPDATE "persona" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND builtin_persona = FALSE;

    -- Migrate tool (exclude builtin tools)
    UPDATE "tool" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND in_code_tool_id IS NULL;

    -- Migrate notification
    UPDATE "notification" SET user_id = NEW.id WHERE user_id = placeholder_uuid;

    -- Migrate inputprompt (exclude system/public prompts)
    UPDATE "inputprompt" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND is_public = FALSE;

    -- Make the new user an admin (they had admin access in no-auth mode)
    -- In AFTER INSERT trigger, we must UPDATE the row since it already exists
    UPDATE "user" SET role = 'ADMIN' WHERE id = NEW.id;

    -- Delete the placeholder user (we hold the lock so this is safe)
    DELETE FROM "user" WHERE id = placeholder_uuid;

    -- Drop the trigger and function (self-cleanup)
    EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I."user"', schema_name);
    EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);

    RETURN NULL;
END;
$$ LANGUAGE plpgsql;
"""

MIGRATE_NO_AUTH_TRIGGER = f"""
CREATE TRIGGER {TRIGGER_NAME}
AFTER INSERT ON "user"
FOR EACH ROW
EXECUTE FUNCTION {FUNCTION_NAME}();
"""


def upgrade() -> None:
    """
    Create a placeholder user and assign all NULL user_id records to it.
    Install a trigger that migrates data to the first real user and self-destructs.
    Only runs if AUTH_TYPE is currently disabled/none.

    Skipped in multi-tenant mode - each tenant starts fresh with no legacy data.
    """
    # Skip in multi-tenant mode - this migration handles single-tenant
    # AUTH_TYPE=disabled -> auth transitions only
    if MULTI_TENANT:
        return

    # Only run if AUTH_TYPE is currently disabled/none
    # If they've already switched to auth-enabled, NULL data is stale anyway
    auth_type = (os.environ.get("AUTH_TYPE") or "").lower()
    if auth_type not in ("disabled", "none", ""):
        print(f"AUTH_TYPE is '{auth_type}', not disabled. Skipping migration.")
        return

    connection = op.get_bind()

    # Check if there are any NULL user_id records that need migration
    tables_to_check = [
        "chat_session",
        "credential",
        "document_set",
        "persona",
        "tool",
        "notification",
        "inputprompt",
    ]

    has_null_records = False
    for table in tables_to_check:
        try:
            result = connection.execute(
                sa.text(f'SELECT 1 FROM "{table}" WHERE user_id IS NULL LIMIT 1')
            )
            if result.fetchone():
                has_null_records = True
                break
        except Exception:
            # Table might not exist
            pass

    if not has_null_records:
        return

    # Create the placeholder user
    connection.execute(
        sa.text(
            """
            INSERT INTO "user" (id, email, hashed_password, is_active, is_superuser, is_verified, role)
            VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)
            """
        ),
        {
            "id": NO_AUTH_PLACEHOLDER_USER_UUID,
            "email": NO_AUTH_PLACEHOLDER_USER_EMAIL,
            "hashed_password": "",  # Empty password - user cannot log in
            "is_active": False,  # Inactive - user cannot log in
            "is_superuser": False,
            "is_verified": False,
            "role": "BASIC",
        },
    )

    # Assign NULL user_id records to the placeholder user
    for table in tables_to_check:
        try:
            # Base condition for all tables
            condition = "user_id IS NULL"
            # Exclude public credential (id=0) which must remain user_id=NULL
            if table == "credential":
                condition += " AND id != 0"
            # Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL
            elif table == "tool":
                condition += " AND in_code_tool_id IS NULL"
            # Exclude builtin personas which must remain user_id=NULL
            elif table == "persona":
                condition += " AND builtin_persona = FALSE"
            # Exclude system/public input prompts which must remain user_id=NULL
            elif table == "inputprompt":
                condition += " AND is_public = FALSE"
            result = connection.execute(
                sa.text(
                    f"""
                    UPDATE "{table}"
                    SET user_id = :user_id
                    WHERE {condition}
                    """
                ),
                {"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
            )
            if result.rowcount > 0:
                print(f"Updated {result.rowcount} rows in {table}")
        except Exception as e:
            print(f"Skipping {table}: {e}")

    # Install the trigger function and trigger for automatic migration on first user registration
    connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER_FUNCTION))
    connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER))
    print("Installed trigger for automatic data migration on first user registration")


def downgrade() -> None:
    """
    Drop trigger and function, set placeholder user's records back to NULL,
    and delete the placeholder user.
    """
    # Skip in multi-tenant mode for consistency with upgrade
    if MULTI_TENANT:
        return

    connection = op.get_bind()

    # Drop trigger and function if they exist (they may have already self-destructed)
    connection.execute(sa.text(f'DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON "user"'))
    connection.execute(sa.text(f"DROP FUNCTION IF EXISTS {FUNCTION_NAME}()"))

    tables_to_update = [
        "chat_session",
        "credential",
        "document_set",
        "persona",
        "tool",
        "notification",
        "inputprompt",
    ]

    # Set records back to NULL
    for table in tables_to_update:
        try:
            connection.execute(
                sa.text(
                    f"""
                    UPDATE "{table}"
                    SET user_id = NULL
                    WHERE user_id = :user_id
                    """
                ),
                {"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
            )
        except Exception:
            pass

    # Delete the placeholder user
    connection.execute(
        sa.text('DELETE FROM "user" WHERE id = :user_id'),
        {"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
    )


================================================
FILE: backend/alembic/versions/f7e58d357687_add_has_web_column_to_user.py
================================================
"""add has_web_login column to user

Revision ID: f7e58d357687
Revises: ba98eba0f66a
Create Date: 2024-09-07 20:20:54.522620

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "f7e58d357687"
down_revision = "ba98eba0f66a"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column("has_web_login", sa.Boolean(), nullable=False, server_default="true"),
    )


def downgrade() -> None:
    op.drop_column("user", "has_web_login")


================================================
FILE: backend/alembic/versions/f8a9b2c3d4e5_add_research_answer_purpose_to_chat_message.py
================================================
"""add research_answer_purpose to chat_message

Revision ID: f8a9b2c3d4e5
Revises: 5ae8240accb3
Create Date: 2025-01-27 12:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "f8a9b2c3d4e5"
down_revision = "5ae8240accb3"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add research_answer_purpose column to chat_message table
    op.add_column(
        "chat_message",
        sa.Column("research_answer_purpose", sa.String(), nullable=True),
    )


def downgrade() -> None:
    # Remove research_answer_purpose column from chat_message table
    op.drop_column("chat_message", "research_answer_purpose")


================================================
FILE: backend/alembic/versions/f9b8c7d6e5a4_update_parent_question_id_foreign_key_to_research_agent_iteration.py
================================================
"""remove foreign key constraints from research_agent_iteration_sub_step

Revision ID: f9b8c7d6e5a4
Revises: bd7c3bf8beba
Create Date: 2025-01-27 12:00:00.000000

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "f9b8c7d6e5a4"
down_revision = "bd7c3bf8beba"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Drop the existing foreign key constraint for parent_question_id
    op.drop_constraint(
        "research_agent_iteration_sub_step_parent_question_id_fkey",
        "research_agent_iteration_sub_step",
        type_="foreignkey",
    )

    # Drop the parent_question_id column entirely
    op.drop_column("research_agent_iteration_sub_step", "parent_question_id")

    # Drop the foreign key constraint for primary_question_id to chat_message.id
    # (keep the column as it's needed for the composite foreign key)
    op.drop_constraint(
        "research_agent_iteration_sub_step_primary_question_id_fkey",
        "research_agent_iteration_sub_step",
        type_="foreignkey",
    )


def downgrade() -> None:
    # Restore the foreign key constraint for primary_question_id to chat_message.id
    op.create_foreign_key(
        "research_agent_iteration_sub_step_primary_question_id_fkey",
        "research_agent_iteration_sub_step",
        "chat_message",
        ["primary_question_id"],
        ["id"],
        ondelete="CASCADE",
    )

    # Add back the parent_question_id column
    op.add_column(
        "research_agent_iteration_sub_step",
        sa.Column(
            "parent_question_id",
            sa.Integer(),
            nullable=True,
        ),
    )

    # Restore the foreign key constraint pointing to research_agent_iteration_sub_step.id
    op.create_foreign_key(
        "research_agent_iteration_sub_step_parent_question_id_fkey",
        "research_agent_iteration_sub_step",
        "research_agent_iteration_sub_step",
        ["parent_question_id"],
        ["id"],
        ondelete="CASCADE",
    )


================================================
FILE: backend/alembic/versions/fad14119fb92_delete_tags_with_wrong_enum.py
================================================
"""Delete Tags with wrong Enum

Revision ID: fad14119fb92
Revises: 72bdc9929a46
Create Date: 2024-04-25 17:05:09.695703

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "fad14119fb92"
down_revision = "72bdc9929a46"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    # Some documents may lose their tags but this is the only way as the enum
    # mapping may have changed since tag switched to string (it will be reindexed anyway)
    op.execute(
        """
        DELETE FROM document__tag
        WHERE tag_id IN (
            SELECT id FROM tag
            WHERE source ~ '^[0-9]+$'
        )
        """
    )

    op.execute(
        """
        DELETE FROM tag
        WHERE source ~ '^[0-9]+$'
        """
    )


def downgrade() -> None:
    pass


================================================
FILE: backend/alembic/versions/fb80bdd256de_add_chat_background_to_user.py
================================================
"""add chat_background to user

Revision ID: fb80bdd256de
Revises: 8b5ce697290e
Create Date: 2026-01-16 16:15:59.222617

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "fb80bdd256de"
down_revision = "8b5ce697290e"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "user",
        sa.Column(
            "chat_background",
            sa.String(),
            nullable=True,
        ),
    )


def downgrade() -> None:
    op.drop_column("user", "chat_background")


================================================
FILE: backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py
================================================
"""Add slack bot display type

Revision ID: fcd135795f21
Revises: 0a2b51deb0b8
Create Date: 2024-03-04 17:03:27.116284

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "fcd135795f21"
down_revision = "0a2b51deb0b8"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "slack_bot_config",
        sa.Column(
            "response_type",
            sa.Enum(
                "QUOTES",
                "CITATIONS",
                name="slackbotresponsetype",
                native_enum=False,
            ),
            nullable=True,
        ),
    )
    op.execute(
        "UPDATE slack_bot_config SET response_type = 'QUOTES' WHERE response_type IS NULL"
    )
    op.alter_column("slack_bot_config", "response_type", nullable=False)


def downgrade() -> None:
    op.drop_column("slack_bot_config", "response_type")


================================================
FILE: backend/alembic/versions/febe9eaa0644_add_document_set_persona_relationship_.py
================================================
"""Add document_set / persona relationship table

Revision ID: febe9eaa0644
Revises: 57b53544726e
Create Date: 2023-09-24 13:06:24.018610

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "febe9eaa0644"
down_revision = "57b53544726e"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.create_table(
        "persona__document_set",
        sa.Column("persona_id", sa.Integer(), nullable=False),
        sa.Column("document_set_id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(
            ["document_set_id"],
            ["document_set.id"],
        ),
        sa.ForeignKeyConstraint(
            ["persona_id"],
            ["persona.id"],
        ),
        sa.PrimaryKeyConstraint("persona_id", "document_set_id"),
    )


def downgrade() -> None:
    op.drop_table("persona__document_set")


================================================
FILE: backend/alembic/versions/fec3db967bf7_add_time_updated_to_usergroup_and_.py
================================================
"""Add time_updated to UserGroup and DocumentSet

Revision ID: fec3db967bf7
Revises: 97dbb53fa8c8
Create Date: 2025-01-12 15:49:02.289100

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "fec3db967bf7"
down_revision = "97dbb53fa8c8"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "document_set",
        sa.Column(
            "time_last_modified_by_user",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.func.now(),
        ),
    )
    op.add_column(
        "user_group",
        sa.Column(
            "time_last_modified_by_user",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.func.now(),
        ),
    )


def downgrade() -> None:
    op.drop_column("user_group", "time_last_modified_by_user")
    op.drop_column("document_set", "time_last_modified_by_user")


================================================
FILE: backend/alembic/versions/feead2911109_add_opensearch_tenant_migration_columns.py
================================================
"""add_opensearch_tenant_migration_columns

Revision ID: feead2911109
Revises: d56ffa94ca32
Create Date: 2026-02-10 17:46:34.029937

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "feead2911109"
down_revision = "175ea04c7087"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "opensearch_tenant_migration_record",
        sa.Column("vespa_visit_continuation_token", sa.Text(), nullable=True),
    )
    op.add_column(
        "opensearch_tenant_migration_record",
        sa.Column(
            "total_chunks_migrated",
            sa.Integer(),
            nullable=False,
            server_default="0",
        ),
    )
    op.add_column(
        "opensearch_tenant_migration_record",
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.func.now(),
        ),
    )
    op.add_column(
        "opensearch_tenant_migration_record",
        sa.Column(
            "migration_completed_at",
            sa.DateTime(timezone=True),
            nullable=True,
        ),
    )
    op.add_column(
        "opensearch_tenant_migration_record",
        sa.Column(
            "enable_opensearch_retrieval",
            sa.Boolean(),
            nullable=False,
            server_default="false",
        ),
    )


def downgrade() -> None:
    op.drop_column("opensearch_tenant_migration_record", "enable_opensearch_retrieval")
    op.drop_column("opensearch_tenant_migration_record", "migration_completed_at")
    op.drop_column("opensearch_tenant_migration_record", "created_at")
    op.drop_column("opensearch_tenant_migration_record", "total_chunks_migrated")
    op.drop_column(
        "opensearch_tenant_migration_record", "vespa_visit_continuation_token"
    )


================================================
FILE: backend/alembic/versions/ffc707a226b4_basic_document_metadata.py
================================================
"""Basic Document Metadata

Revision ID: ffc707a226b4
Revises: 30c1d5744104
Create Date: 2023-10-18 16:52:25.967592

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "ffc707a226b4"
down_revision = "30c1d5744104"
branch_labels: None = None
depends_on: None = None


def upgrade() -> None:
    op.add_column(
        "document",
        sa.Column("doc_updated_at", sa.DateTime(timezone=True), nullable=True),
    )
    op.add_column(
        "document",
        sa.Column("primary_owners", postgresql.ARRAY(sa.String()), nullable=True),
    )
    op.add_column(
        "document",
        sa.Column("secondary_owners", postgresql.ARRAY(sa.String()), nullable=True),
    )


def downgrade() -> None:
    op.drop_column("document", "secondary_owners")
    op.drop_column("document", "primary_owners")
    op.drop_column("document", "doc_updated_at")


================================================
FILE: backend/alembic.ini
================================================
# A generic, single database configuration.

[DEFAULT]
# path to migration scripts
script_location = alembic

# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s

# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .

# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python-dateutil library that can be
# installed by adding `alembic[tz]` to the pip requirements
# string value is passed to dateutil.tz.gettz()
# leave blank for localtime
# timezone =

# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false

# version location specification; This defaults
# to alembic/versions.  When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions

# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
version_path_separator = os  
# Use os.pathsep. Default configuration used for new projects.

# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false

# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8

# sqlalchemy.url = driver://user:pass@localhost/dbname


[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts.  See the documentation for further
# detail and examples

# format using "black" - use the console_scripts runner, against the "black" entrypoint
hooks = black
black.type = console_scripts
black.entrypoint = black
black.options = -l 79 REVISION_SCRIPT_FILENAME

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = INFO
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S


[alembic]
script_location = alembic
version_locations = %(script_location)s/versions

[schema_private]
script_location = alembic_tenants
version_locations = %(script_location)s/versions


================================================
FILE: backend/alembic_tenants/README.md
================================================
These files are for public table migrations when operating with multi tenancy.

If you are not a Onyx developer, you can ignore this directory entirely.


================================================
FILE: backend/alembic_tenants/__init__.py
================================================


================================================
FILE: backend/alembic_tenants/env.py
================================================
import asyncio
from logging.config import fileConfig
from typing import Literal

from sqlalchemy import pool
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.schema import SchemaItem

from alembic import context
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.models import PublicBase

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
):
    # disable_existing_loggers=False prevents breaking pytest's caplog fixture
    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
    fileConfig(config.config_file_name, disable_existing_loggers=False)

# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = [PublicBase.metadata]

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.

EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}


def include_object(
    object: SchemaItem,  # noqa: ARG001
    name: str | None,
    type_: Literal[
        "schema",
        "table",
        "column",
        "index",
        "unique_constraint",
        "foreign_key_constraint",
    ],
    reflected: bool,  # noqa: ARG001
    compare_to: SchemaItem | None,  # noqa: ARG001
) -> bool:
    if type_ == "table" and name in EXCLUDE_TABLES:
        return False
    return True


def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode.

    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.

    Calls to context.execute() here emit the given string to the
    script output.

    """
    url = build_connection_string()
    context.configure(
        url=url,
        target_metadata=target_metadata,  # type: ignore
        literal_binds=True,
        dialect_opts={"paramstyle": "named"},
    )

    with context.begin_transaction():
        context.run_migrations()


def do_run_migrations(connection: Connection) -> None:
    context.configure(
        connection=connection,
        target_metadata=target_metadata,  # type: ignore[arg-type]
        include_object=include_object,
    )

    with context.begin_transaction():
        context.run_migrations()


async def run_async_migrations() -> None:
    """In this scenario we need to create an Engine
    and associate a connection with the context.

    """

    connectable = create_async_engine(
        build_connection_string(),
        poolclass=pool.NullPool,
    )

    async with connectable.connect() as connection:
        await connection.run_sync(do_run_migrations)

    await connectable.dispose()


def run_migrations_online() -> None:
    """Run migrations in 'online' mode.

    Supports pytest-alembic by checking for a pre-configured connection
    in context.config.attributes["connection"]. If present, uses that
    connection/engine directly instead of creating a new async engine.
    """
    # Check if pytest-alembic is providing a connection/engine
    connectable = context.config.attributes.get("connection", None)

    if connectable is not None:
        # pytest-alembic is providing an engine - use it directly
        with connectable.connect() as connection:
            do_run_migrations(connection)
            # Commit to ensure changes are visible to next migration
            connection.commit()
    else:
        # Normal operation - use async migrations
        asyncio.run(run_async_migrations())


if context.is_offline_mode():
    run_migrations_offline()
else:
    run_migrations_online()


================================================
FILE: backend/alembic_tenants/script.py.mako
================================================
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}


def upgrade() -> None:
    ${upgrades if upgrades else "pass"}


def downgrade() -> None:
    ${downgrades if downgrades else "pass"}


================================================
FILE: backend/alembic_tenants/versions/14a83a331951_create_usertenantmapping_table.py
================================================
import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision = "14a83a331951"
down_revision = None
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "user_tenant_mapping",
        sa.Column("email", sa.String(), nullable=False),
        sa.Column("tenant_id", sa.String(), nullable=False),
        sa.UniqueConstraint("email", "tenant_id", name="uq_user_tenant"),
        sa.UniqueConstraint("email", name="uq_email"),
        schema="public",
    )


def downgrade() -> None:
    op.drop_table("user_tenant_mapping", schema="public")


================================================
FILE: backend/alembic_tenants/versions/34e3630c7f32_lowercase_multi_tenant_user_auth.py
================================================
"""lowercase multi-tenant user auth

Revision ID: 34e3630c7f32
Revises: a4f6ee863c47
Create Date: 2025-02-26 15:03:01.211894

"""

from alembic import op


# revision identifiers, used by Alembic.
revision = "34e3630c7f32"
down_revision = "a4f6ee863c47"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # 1) Convert all existing rows to lowercase
    op.execute(
        """
        UPDATE user_tenant_mapping
        SET email = LOWER(email)
        """
    )
    # 2) Add a check constraint so that emails cannot be written in uppercase
    op.create_check_constraint(
        "ensure_lowercase_email",
        "user_tenant_mapping",
        "email = LOWER(email)",
        schema="public",
    )


def downgrade() -> None:
    # Drop the check constraint
    op.drop_constraint(
        "ensure_lowercase_email",
        "user_tenant_mapping",
        schema="public",
        type_="check",
    )


================================================
FILE: backend/alembic_tenants/versions/3b45e0018bf1_add_new_available_tenant_table.py
================================================
"""add new available tenant table

Revision ID: 3b45e0018bf1
Revises: ac842f85f932
Create Date: 2025-03-06 09:55:18.229910

"""

import sqlalchemy as sa

from alembic import op


# revision identifiers, used by Alembic.
revision = "3b45e0018bf1"
down_revision = "ac842f85f932"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Create new_available_tenant table
    op.create_table(
        "available_tenant",
        sa.Column("tenant_id", sa.String(), nullable=False),
        sa.Column("alembic_version", sa.String(), nullable=False),
        sa.Column("date_created", sa.DateTime(), nullable=False),
        sa.PrimaryKeyConstraint("tenant_id"),
    )


def downgrade() -> None:
    # Drop new_available_tenant table
    op.drop_table("available_tenant")


================================================
FILE: backend/alembic_tenants/versions/3b9f09038764_add_read_only_kg_user.py
================================================
"""add_db_readonly_user

Revision ID: 3b9f09038764
Revises: 3b45e0018bf1
Create Date: 2025-05-11 11:05:11.436977

"""

from sqlalchemy import text

from alembic import op
from onyx.configs.app_configs import DB_READONLY_PASSWORD
from onyx.configs.app_configs import DB_READONLY_USER


# revision identifiers, used by Alembic.
revision = "3b9f09038764"
down_revision = "3b45e0018bf1"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Enable pg_trgm extension if not already enabled
    op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")

    # Create the read-only db user if it does not already exist.
    if not (DB_READONLY_USER and DB_READONLY_PASSWORD):
        raise Exception("DB_READONLY_USER or DB_READONLY_PASSWORD is not set")

    op.execute(
        text(
            f"""
            DO $$
            BEGIN
                -- Check if the read-only user already exists
                IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
                    -- Create the read-only user with the specified password
                    EXECUTE format('CREATE USER %I WITH PASSWORD %L', '{DB_READONLY_USER}', '{DB_READONLY_PASSWORD}');
                    -- First revoke all privileges to ensure a clean slate
                    EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');
                    -- Grant only the CONNECT privilege to allow the user to connect to the database
                    -- but not perform any operations without additional specific grants
                    EXECUTE format('GRANT CONNECT ON DATABASE %I TO %I', current_database(), '{DB_READONLY_USER}');
                END IF;
            END
            $$;
            """
        )
    )


def downgrade() -> None:
    op.execute(
        text(
            f"""
        DO $$
        BEGIN
            IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
                -- First revoke all privileges from the database
                EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');
                -- Then revoke all privileges from the public schema
                EXECUTE format('REVOKE ALL ON SCHEMA public FROM %I', '{DB_READONLY_USER}');
                -- Then drop the user
                EXECUTE format('DROP USER %I', '{DB_READONLY_USER}');
            END IF;
        END
        $$;
    """
        )
    )
    op.execute(text("DROP EXTENSION IF EXISTS pg_trgm"))


================================================
FILE: backend/alembic_tenants/versions/a4f6ee863c47_mapping_for_anonymous_user_path.py
================================================
"""mapping for anonymous user path

Revision ID: a4f6ee863c47
Revises: 14a83a331951
Create Date: 2025-01-04 14:16:58.697451

"""

import sqlalchemy as sa

from alembic import op


# revision identifiers, used by Alembic.
revision = "a4f6ee863c47"
down_revision = "14a83a331951"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "tenant_anonymous_user_path",
        sa.Column("tenant_id", sa.String(), primary_key=True, nullable=False),
        sa.Column("anonymous_user_path", sa.String(), nullable=False),
        sa.PrimaryKeyConstraint("tenant_id"),
        sa.UniqueConstraint("anonymous_user_path"),
    )


def downgrade() -> None:
    op.drop_table("tenant_anonymous_user_path")


================================================
FILE: backend/alembic_tenants/versions/ac842f85f932_new_column_user_tenant_mapping.py
================================================
"""new column user tenant mapping

Revision ID: ac842f85f932
Revises: 34e3630c7f32
Create Date: 2025-03-03 13:30:14.802874

"""

import sqlalchemy as sa

from alembic import op


# revision identifiers, used by Alembic.
revision = "ac842f85f932"
down_revision = "34e3630c7f32"
branch_labels = None
depends_on = None


def upgrade() -> None:
    # Add active column with default value of True
    op.add_column(
        "user_tenant_mapping",
        sa.Column(
            "active",
            sa.Boolean(),
            nullable=False,
            server_default="true",
        ),
        schema="public",
    )

    op.drop_constraint("uq_email", "user_tenant_mapping", schema="public")

    # Create a unique index for active=true records
    # This ensures a user can only be active in one tenant at a time
    op.execute(
        "CREATE UNIQUE INDEX uq_user_active_email_idx ON public.user_tenant_mapping (email) WHERE active = true"
    )


def downgrade() -> None:
    # Drop the unique index for active=true records
    op.execute("DROP INDEX IF EXISTS uq_user_active_email_idx")

    op.create_unique_constraint(
        "uq_email", "user_tenant_mapping", ["email"], schema="public"
    )

    # Remove the active column
    op.drop_column("user_tenant_mapping", "active", schema="public")


================================================
FILE: backend/assets/.gitignore
================================================
*
!.gitignore


================================================
FILE: backend/ee/LICENSE
================================================
The Onyx Enterprise License (the "Enterprise License")
Copyright (c) 2023-present DanswerAI, Inc.

With regard to the Onyx Software:

This software and associated documentation files (the "Software") may only be
used in production, if you (and any entity that you represent) have agreed to,
and are in compliance with, the Onyx Subscription Terms of Service, available
at https://www.onyx.app/legal/self-host (the "Enterprise Terms"), or other
agreement governing the use of the Software, as agreed by you and DanswerAI,
and otherwise have a valid Onyx Enterprise License for the
correct number of user seats. Subject to the foregoing sentence, you are free to
modify this Software and publish patches to the Software. You agree that DanswerAI
and/or its licensors (as applicable) retain all right, title and interest in and
to all such modifications and/or patches, and all such modifications and/or
patches may only be used, copied, modified, displayed, distributed, or otherwise
exploited with a valid Onyx Enterprise License for the correct
number of user seats. Notwithstanding the foregoing, you may copy and modify
the Software for development and testing purposes, without requiring a
subscription. You agree that DanswerAI and/or its licensors (as applicable) retain
all right, title and interest in and to all such modifications. You are not
granted any other rights beyond what is expressly stated herein. Subject to the
foregoing, it is forbidden to copy, merge, publish, distribute, sublicense,
and/or sell the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

For all third party components incorporated into the Onyx Software, those
components are licensed under the original license provided by the owner of the
applicable component.


================================================
FILE: backend/ee/__init__.py
================================================


================================================
FILE: backend/ee/onyx/__init__.py
================================================


================================================
FILE: backend/ee/onyx/access/access.py
================================================
from sqlalchemy.orm import Session

from ee.onyx.db.external_perm import fetch_external_groups_for_user
from ee.onyx.db.external_perm import fetch_public_external_group_ids
from ee.onyx.db.user_group import fetch_user_groups_for_documents
from ee.onyx.db.user_group import fetch_user_groups_for_user
from ee.onyx.external_permissions.sync_params import get_source_perm_sync_config
from onyx.access.access import (
    _get_access_for_documents as get_access_for_documents_without_groups,
)
from onyx.access.access import _get_acl_for_user as get_acl_for_user_without_groups
from onyx.access.access import collect_user_file_access
from onyx.access.models import DocumentAccess
from onyx.access.utils import prefix_external_group
from onyx.access.utils import prefix_user_group
from onyx.db.document import get_document_sources
from onyx.db.document import get_documents_by_ids
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.user_file import fetch_user_files_with_access_relationships
from onyx.utils.logger import setup_logger


logger = setup_logger()


def _get_access_for_document(
    document_id: str,
    db_session: Session,
) -> DocumentAccess:
    id_to_access = _get_access_for_documents([document_id], db_session)
    if len(id_to_access) == 0:
        return DocumentAccess.build(
            user_emails=[],
            user_groups=[],
            external_user_emails=[],
            external_user_group_ids=[],
            is_public=False,
        )

    return next(iter(id_to_access.values()))


def _get_access_for_documents(
    document_ids: list[str],
    db_session: Session,
) -> dict[str, DocumentAccess]:
    non_ee_access_dict = get_access_for_documents_without_groups(
        document_ids=document_ids,
        db_session=db_session,
    )
    user_group_info: dict[str, list[str]] = {
        document_id: group_names
        for document_id, group_names in fetch_user_groups_for_documents(
            db_session=db_session,
            document_ids=document_ids,
        )
    }
    documents = get_documents_by_ids(
        db_session=db_session,
        document_ids=document_ids,
    )
    doc_id_map = {doc.id: doc for doc in documents}

    # Get all sources in one batch
    doc_id_to_source_map = get_document_sources(
        db_session=db_session,
        document_ids=document_ids,
    )

    all_public_ext_u_group_ids = set(fetch_public_external_group_ids(db_session))

    access_map = {}
    for document_id, non_ee_access in non_ee_access_dict.items():
        document = doc_id_map[document_id]
        source = doc_id_to_source_map.get(document_id)
        if source is None:
            logger.error(f"Document {document_id} has no source")
            continue

        perm_sync_config = get_source_perm_sync_config(source)
        is_only_censored = (
            perm_sync_config
            and perm_sync_config.censoring_config is not None
            and perm_sync_config.doc_sync_config is None
        )

        ext_u_emails = (
            set(document.external_user_emails)
            if document.external_user_emails
            else set()
        )

        ext_u_groups = (
            set(document.external_user_group_ids)
            if document.external_user_group_ids
            else set()
        )

        # If the document is determined to be "public" externally (through a SYNC connector)
        # then it's given the same access level as if it were marked public within Onyx
        # If its censored, then it's public anywhere during the search and then permissions are
        # applied after the search
        is_public_anywhere = (
            document.is_public
            or non_ee_access.is_public
            or is_only_censored
            or any(u_group in all_public_ext_u_group_ids for u_group in ext_u_groups)
        )

        # To avoid collisions of group namings between connectors, they need to be prefixed
        access_map[document_id] = DocumentAccess.build(
            user_emails=list(non_ee_access.user_emails),
            user_groups=user_group_info.get(document_id, []),
            is_public=is_public_anywhere,
            external_user_emails=list(ext_u_emails),
            external_user_group_ids=list(ext_u_groups),
        )
    return access_map


def _collect_user_file_group_names(user_file: UserFile) -> set[str]:
    """Extract user-group names from the already-loaded Persona.groups
    relationships on a UserFile (skipping deleted personas)."""
    groups: set[str] = set()
    for persona in user_file.assistants:
        if persona.deleted:
            continue
        for group in persona.groups:
            groups.add(group.name)
    return groups


def get_access_for_user_files_impl(
    user_file_ids: list[str],
    db_session: Session,
) -> dict[str, DocumentAccess]:
    """EE version: extends the MIT user file ACL with user group names
    from personas shared via user groups.

    Uses a single DB query (via fetch_user_files_with_access_relationships)
    that eagerly loads both the MIT-needed and EE-needed relationships.

    NOTE: is imported in onyx.access.access by `fetch_versioned_implementation`
    DO NOT REMOVE."""
    user_files = fetch_user_files_with_access_relationships(
        user_file_ids, db_session, eager_load_groups=True
    )
    return build_access_for_user_files_impl(user_files)


def build_access_for_user_files_impl(
    user_files: list[UserFile],
) -> dict[str, DocumentAccess]:
    """EE version: works on pre-loaded UserFile objects.
    Expects Persona.groups to be eagerly loaded.

    NOTE: is imported in onyx.access.access by `fetch_versioned_implementation`
    DO NOT REMOVE."""
    result: dict[str, DocumentAccess] = {}
    for user_file in user_files:
        if user_file.user is None:
            result[str(user_file.id)] = DocumentAccess.build(
                user_emails=[],
                user_groups=[],
                is_public=True,
                external_user_emails=[],
                external_user_group_ids=[],
            )
            continue

        emails, is_public = collect_user_file_access(user_file)
        group_names = _collect_user_file_group_names(user_file)
        result[str(user_file.id)] = DocumentAccess.build(
            user_emails=list(emails),
            user_groups=list(group_names),
            is_public=is_public,
            external_user_emails=[],
            external_user_group_ids=[],
        )
    return result


def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
    """Returns a list of ACL entries that the user has access to. This is meant to be
    used downstream to filter out documents that the user does not have access to. The
    user should have access to a document if at least one entry in the document's ACL
    matches one entry in the returned set.

    NOTE: is imported in onyx.access.access by `fetch_versioned_implementation`
    DO NOT REMOVE."""
    is_anonymous = user.is_anonymous
    db_user_groups = (
        [] if is_anonymous else fetch_user_groups_for_user(db_session, user.id)
    )
    prefixed_user_groups = [
        prefix_user_group(db_user_group.name) for db_user_group in db_user_groups
    ]

    db_external_groups = (
        [] if is_anonymous else fetch_external_groups_for_user(db_session, user.id)
    )
    prefixed_external_groups = [
        prefix_external_group(db_external_group.external_user_group_id)
        for db_external_group in db_external_groups
    ]

    user_acl = set(prefixed_user_groups + prefixed_external_groups)
    user_acl.update(get_acl_for_user_without_groups(user, db_session))

    return user_acl


================================================
FILE: backend/ee/onyx/access/hierarchy_access.py
================================================
from sqlalchemy.orm import Session

from ee.onyx.db.external_perm import fetch_external_groups_for_user
from onyx.db.models import User


def _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
    if not user:
        return []
    external_groups = fetch_external_groups_for_user(db_session, user.id)
    return [external_group.external_user_group_id for external_group in external_groups]


================================================
FILE: backend/ee/onyx/auth/__init__.py
================================================


================================================
FILE: backend/ee/onyx/auth/users.py
================================================
import os
from datetime import datetime

import jwt
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from fastapi import status

from ee.onyx.configs.app_configs import SUPER_CLOUD_API_KEY
from ee.onyx.configs.app_configs import SUPER_USERS
from ee.onyx.server.seeding import get_seed_config
from onyx.auth.users import current_admin_user
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import USER_AUTH_SECRET
from onyx.db.models import User
from onyx.utils.logger import setup_logger


logger = setup_logger()


def verify_auth_setting() -> None:
    # All the Auth flows are valid for EE version, but warn about deprecated 'disabled'
    raw_auth_type = (os.environ.get("AUTH_TYPE") or "").lower()
    if raw_auth_type == "disabled":
        logger.warning(
            "AUTH_TYPE='disabled' is no longer supported. Using 'basic' instead. Please update your configuration."
        )
    logger.notice(f"Using Auth Type: {AUTH_TYPE.value}")


def get_default_admin_user_emails_() -> list[str]:
    seed_config = get_seed_config()
    if seed_config and seed_config.admin_user_emails:
        return seed_config.admin_user_emails
    return []


async def current_cloud_superuser(
    request: Request,
    user: User = Depends(current_admin_user),
) -> User:
    api_key = request.headers.get("Authorization", "").replace("Bearer ", "")
    if api_key != SUPER_CLOUD_API_KEY:
        raise HTTPException(status_code=401, detail="Invalid API key")

    if user and user.email not in SUPER_USERS:
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Access denied. User must be a cloud superuser to perform this action.",
        )
    return user


def generate_anonymous_user_jwt_token(tenant_id: str) -> str:
    payload = {
        "tenant_id": tenant_id,
        # Token does not expire
        "iat": datetime.utcnow(),  # Issued at time
    }

    return jwt.encode(payload, USER_AUTH_SECRET, algorithm="HS256")


def decode_anonymous_user_jwt_token(token: str) -> dict:
    return jwt.decode(token, USER_AUTH_SECRET, algorithms=["HS256"])


================================================
FILE: backend/ee/onyx/background/celery/apps/heavy.py
================================================
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.heavy import celery_app


celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "ee.onyx.background.celery.tasks.doc_permission_syncing",
            "ee.onyx.background.celery.tasks.external_group_syncing",
            "ee.onyx.background.celery.tasks.cleanup",
            "ee.onyx.background.celery.tasks.query_history",
        ]
    )
)


================================================
FILE: backend/ee/onyx/background/celery/apps/light.py
================================================
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.light import celery_app

celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "ee.onyx.background.celery.tasks.doc_permission_syncing",
            "ee.onyx.background.celery.tasks.external_group_syncing",
        ]
    )
)


================================================
FILE: backend/ee/onyx/background/celery/apps/monitoring.py
================================================
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.monitoring import celery_app

celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "ee.onyx.background.celery.tasks.tenant_provisioning",
        ]
    )
)


================================================
FILE: backend/ee/onyx/background/celery/apps/primary.py
================================================
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.primary import celery_app


celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "ee.onyx.background.celery.tasks.hooks",
            "ee.onyx.background.celery.tasks.doc_permission_syncing",
            "ee.onyx.background.celery.tasks.external_group_syncing",
            "ee.onyx.background.celery.tasks.cloud",
            "ee.onyx.background.celery.tasks.ttl_management",
            "ee.onyx.background.celery.tasks.usage_reporting",
        ]
    )
)


================================================
FILE: backend/ee/onyx/background/celery/tasks/beat_schedule.py
================================================
from datetime import timedelta
from typing import Any

from ee.onyx.configs.app_configs import CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS
from onyx.background.celery.tasks.beat_schedule import (
    beat_cloud_tasks as base_beat_system_tasks,
)
from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT
from onyx.background.celery.tasks.beat_schedule import (
    beat_task_templates as base_beat_task_templates,
)
from onyx.background.celery.tasks.beat_schedule import generate_cloud_tasks
from onyx.background.celery.tasks.beat_schedule import (
    get_tasks_to_schedule as base_get_tasks_to_schedule,
)
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from shared_configs.configs import MULTI_TENANT

ee_beat_system_tasks: list[dict] = []

ee_beat_task_templates: list[dict] = [
    {
        "name": "autogenerate-usage-report",
        "task": OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,
        "schedule": timedelta(days=30),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-ttl-management",
        "task": OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,
        "schedule": timedelta(hours=CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "export-query-history-cleanup-task",
        "task": OnyxCeleryTask.EXPORT_QUERY_HISTORY_CLEANUP_TASK,
        "schedule": timedelta(hours=1),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
            "queue": OnyxCeleryQueues.CSV_GENERATION,
        },
    },
]

ee_tasks_to_schedule: list[dict] = []

if not MULTI_TENANT:
    ee_tasks_to_schedule = [
        {
            "name": "hook-execution-log-cleanup",
            "task": OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
            "schedule": timedelta(days=1),
            "options": {
                "priority": OnyxCeleryPriority.LOW,
                "expires": BEAT_EXPIRES_DEFAULT,
            },
        },
        {
            "name": "autogenerate-usage-report",
            "task": OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,
            "schedule": timedelta(days=30),  # TODO: change this to config flag
            "options": {
                "priority": OnyxCeleryPriority.MEDIUM,
                "expires": BEAT_EXPIRES_DEFAULT,
            },
        },
        {
            "name": "check-ttl-management",
            "task": OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,
            "schedule": timedelta(hours=CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS),
            "options": {
                "priority": OnyxCeleryPriority.MEDIUM,
                "expires": BEAT_EXPIRES_DEFAULT,
            },
        },
        {
            "name": "export-query-history-cleanup-task",
            "task": OnyxCeleryTask.EXPORT_QUERY_HISTORY_CLEANUP_TASK,
            "schedule": timedelta(hours=1),
            "options": {
                "priority": OnyxCeleryPriority.MEDIUM,
                "expires": BEAT_EXPIRES_DEFAULT,
                "queue": OnyxCeleryQueues.CSV_GENERATION,
            },
        },
    ]


def get_cloud_tasks_to_schedule(beat_multiplier: float) -> list[dict[str, Any]]:
    beat_system_tasks = ee_beat_system_tasks + base_beat_system_tasks
    beat_task_templates = ee_beat_task_templates + base_beat_task_templates
    cloud_tasks = generate_cloud_tasks(
        beat_system_tasks, beat_task_templates, beat_multiplier
    )
    return cloud_tasks


def get_tasks_to_schedule() -> list[dict[str, Any]]:
    return ee_tasks_to_schedule + base_get_tasks_to_schedule()


================================================
FILE: backend/ee/onyx/background/celery/tasks/cleanup/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/cleanup/tasks.py
================================================
from datetime import datetime
from datetime import timedelta

from celery import shared_task

from ee.onyx.db.query_history import get_all_query_history_export_tasks
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.enums import TaskStatus
from onyx.db.tasks import delete_task_with_id
from onyx.utils.logger import setup_logger


logger = setup_logger()


@shared_task(
    name=OnyxCeleryTask.EXPORT_QUERY_HISTORY_CLEANUP_TASK,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
)
def export_query_history_cleanup_task(*, tenant_id: str) -> None:
    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        tasks = get_all_query_history_export_tasks(db_session=db_session)

        for task in tasks:
            if task.status == TaskStatus.SUCCESS:
                delete_task_with_id(db_session=db_session, task_id=task.task_id)
            elif task.status == TaskStatus.FAILURE:
                if task.start_time:
                    deadline = task.start_time + timedelta(hours=24)
                    now = datetime.now()
                    if now < deadline:
                        continue

                logger.error(
                    f"Task with {task.task_id=} failed; it is being deleted now"
                )
                delete_task_with_id(db_session=db_session, task_id=task.task_id)


================================================
FILE: backend/ee/onyx/background/celery/tasks/cloud/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/cloud/tasks.py
================================================
import time

from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from redis.lock import Lock as RedisLock

from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import redis_lock_dump
from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST


@shared_task(
    name=OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
    ignore_result=True,
    trail=False,
    bind=True,
)
def cloud_beat_task_generator(
    self: Task,
    task_name: str,
    queue: str = OnyxCeleryTask.DEFAULT,
    priority: int = OnyxCeleryPriority.MEDIUM,
    expires: int = BEAT_EXPIRES_DEFAULT,
) -> bool | None:
    """a lightweight task used to kick off individual beat tasks per tenant."""
    time_start = time.monotonic()

    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)

    lock_beat: RedisLock = redis_client.lock(
        f"{OnyxRedisLocks.CLOUD_BEAT_TASK_GENERATOR_LOCK}:{task_name}",
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
        return None

    last_lock_time = time.monotonic()
    tenant_ids: list[str] = []
    num_processed_tenants = 0

    try:
        tenant_ids = get_all_tenant_ids()

        # NOTE: for now, we are running tasks for gated tenants, since we want to allow
        # connector deletion to run successfully. The new plan is to continously prune
        # the gated tenants set, so we won't have a build up of old, unused gated tenants.
        # Keeping this around in case we want to revert to the previous behavior.
        # gated_tenants = get_gated_tenants()

        for tenant_id in tenant_ids:
            # Same comment here as the above NOTE
            # if tenant_id in gated_tenants:
            #     continue

            current_time = time.monotonic()
            if current_time - last_lock_time >= (CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4):
                lock_beat.reacquire()
                last_lock_time = current_time

            # needed in the cloud
            if IGNORED_SYNCING_TENANT_LIST and tenant_id in IGNORED_SYNCING_TENANT_LIST:
                continue

            self.app.send_task(
                task_name,
                kwargs=dict(
                    tenant_id=tenant_id,
                ),
                queue=queue,
                priority=priority,
                expires=expires,
                ignore_result=True,
            )

            num_processed_tenants += 1
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception:
        task_logger.exception("Unexpected exception during cloud_beat_task_generator")
    finally:
        if not lock_beat.owned():
            task_logger.error(
                "cloud_beat_task_generator - Lock not owned on completion"
            )
            redis_lock_dump(lock_beat, redis_client)
        else:
            lock_beat.release()

    time_elapsed = time.monotonic() - time_start
    task_logger.info(
        f"cloud_beat_task_generator finished: "
        f"task={task_name} "
        f"num_processed_tenants={num_processed_tenants} "
        f"num_tenants={len(tenant_ids)} "
        f"elapsed={time_elapsed:.2f}"
    )
    return True


================================================
FILE: backend/ee/onyx/background/celery/tasks/doc_permission_syncing/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
================================================
import time
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from time import sleep
from typing import Any
from typing import cast
from uuid import uuid4

from celery import Celery
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from pydantic import ValidationError
from redis import Redis
from redis.exceptions import LockError
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session
from tenacity import retry
from tenacity import retry_if_exception
from tenacity import stop_after_delay
from tenacity import wait_random_exponential

from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs
from ee.onyx.db.document import upsert_document_external_perms
from ee.onyx.external_permissions.sync_params import get_source_perm_sync_config
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_find_task
from onyx.background.celery.celery_redis import celery_get_broker_client
from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_redis import celery_get_queued_task_ids
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import OnyxRedisSignals
from onyx.connectors.factory import validate_ccpair_for_user
from onyx.db.connector import mark_cc_pair_as_permissions_synced
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.document import get_document_ids_for_connector_credential_pair
from onyx.db.document import get_documents_for_connector_credential_pair_limited_columns
from onyx.db.document import upsert_document_by_connector_credential_pair
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.hierarchy import (
    update_hierarchy_node_permissions as db_update_hierarchy_node_permissions,
)
from onyx.db.models import ConnectorCredentialPair
from onyx.db.permission_sync_attempt import complete_doc_permission_sync_attempt
from onyx.db.permission_sync_attempt import create_doc_permission_sync_attempt
from onyx.db.permission_sync_attempt import mark_doc_permission_sync_attempt_failed
from onyx.db.permission_sync_attempt import (
    mark_doc_permission_sync_attempt_in_progress,
)
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.db.users import batch_add_ext_perm_user_if_not_exists
from onyx.db.utils import DocumentRow
from onyx.db.utils import is_retryable_sqlalchemy_error
from onyx.db.utils import SortOrder
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSyncPayload
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.server.runtime.onyx_runtime import OnyxRuntime
from onyx.server.utils import make_short_id
from onyx.utils.logger import doc_permission_sync_ctx
from onyx.utils.logger import format_error_for_logging
from onyx.utils.logger import LoggerContextVars
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


DOCUMENT_PERMISSIONS_UPDATE_MAX_RETRIES = 3
DOCUMENT_PERMISSIONS_UPDATE_STOP_AFTER = 10 * 60
DOCUMENT_PERMISSIONS_UPDATE_MAX_WAIT = 60


# 5 seconds more than RetryDocumentIndex STOP_AFTER+MAX_WAIT
LIGHT_SOFT_TIME_LIMIT = 105
LIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15


def _get_fence_validation_block_expiration() -> int:
    """
    Compute the expiration time for the fence validation block signal.
    Base expiration is 300 seconds, multiplied by the beat multiplier only in MULTI_TENANT mode.
    """
    base_expiration = 300  # seconds

    if not MULTI_TENANT:
        return base_expiration

    try:
        beat_multiplier = OnyxRuntime.get_beat_multiplier()
    except Exception:
        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT

    return int(base_expiration * beat_multiplier)


"""Jobs / utils for kicking off doc permissions sync tasks."""


def _fail_doc_permission_sync_attempt(attempt_id: int, error_msg: str) -> None:
    """Helper to mark a doc permission sync attempt as failed with an error message."""
    with get_session_with_current_tenant() as db_session:
        mark_doc_permission_sync_attempt_failed(
            attempt_id, db_session, error_message=error_msg
        )


def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
    """Returns boolean indicating if external doc permissions sync is due."""

    if cc_pair.access_type != AccessType.SYNC:
        return False

    # skip doc permissions sync if not active
    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:
        return False

    sync_config = get_source_perm_sync_config(cc_pair.connector.source)
    if sync_config is None:
        logger.error(f"No sync config found for {cc_pair.connector.source}")
        return False

    if sync_config.doc_sync_config is None:
        logger.error(f"No doc sync config found for {cc_pair.connector.source}")
        return False

    # if indexing also does perm sync, don't start running doc_sync until at
    # least one indexing is done
    if (
        sync_config.doc_sync_config.initial_index_should_sync
        and cc_pair.last_successful_index_time is None
    ):
        return False

    # If the last sync is None, it has never been run so we run the sync
    last_perm_sync = cc_pair.last_time_perm_sync
    if last_perm_sync is None:
        return True

    source_sync_period = sync_config.doc_sync_config.doc_sync_frequency
    source_sync_period *= int(OnyxRuntime.get_doc_permission_sync_multiplier())

    # If the last sync is greater than the full fetch period, we run the sync
    next_sync = last_perm_sync + timedelta(seconds=source_sync_period)
    if datetime.now(timezone.utc) >= next_sync:
        return True

    return False


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
)
def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None:
    # TODO(rkuo): merge into check function after lookup table for fences is added

    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
    r = get_redis_client()
    r_replica = get_redis_replica_client()

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
        return None

    try:
        # get all cc pairs that need to be synced
        cc_pair_ids_to_sync: list[int] = []
        with get_session_with_current_tenant() as db_session:
            cc_pairs = get_all_auto_sync_cc_pairs(db_session)

            for cc_pair in cc_pairs:
                if _is_external_doc_permissions_sync_due(cc_pair):
                    cc_pair_ids_to_sync.append(cc_pair.id)

        lock_beat.reacquire()
        for cc_pair_id in cc_pair_ids_to_sync:
            payload_id = try_creating_permissions_sync_task(
                self.app, cc_pair_id, r, tenant_id
            )
            if not payload_id:
                continue

            task_logger.info(
                f"Permissions sync queued: cc_pair={cc_pair_id} id={payload_id}"
            )

        # we want to run this less frequently than the overall task
        lock_beat.reacquire()
        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_PERMISSION_SYNC_FENCES):
            # clear any permission fences that don't have associated celery tasks in progress
            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
            # or be currently executing
            try:
                r_celery = celery_get_broker_client(self.app)
                validate_permission_sync_fences(
                    tenant_id, r, r_replica, r_celery, lock_beat
                )
            except Exception:
                task_logger.exception(
                    "Exception while validating permission sync fences"
                )

            r.set(
                OnyxRedisSignals.BLOCK_VALIDATE_PERMISSION_SYNC_FENCES,
                1,
                ex=_get_fence_validation_block_expiration(),
            )

        # use a lookup table to find active fences. We still have to verify the fence
        # exists since it is an optimization and not the source of truth.
        lock_beat.reacquire()
        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
        for key in keys:
            key_bytes = cast(bytes, key)

            if not r.exists(key_bytes):
                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
                continue

            key_str = key_bytes.decode("utf-8")
            if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
                with get_session_with_current_tenant() as db_session:
                    monitor_ccpair_permissions_taskset(
                        tenant_id, key_bytes, r, db_session
                    )
        task_logger.info(f"check_for_doc_permissions_sync finished: tenant={tenant_id}")
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception as e:
        error_msg = format_error_for_logging(e)
        task_logger.warning(
            f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id} {error_msg}"
        )
        task_logger.exception(
            f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id}"
        )
    finally:
        if lock_beat.owned():
            lock_beat.release()

    return True


def try_creating_permissions_sync_task(
    app: Celery,
    cc_pair_id: int,
    r: Redis,
    tenant_id: str,
) -> str | None:
    """Returns a randomized payload id on success.
    Returns None if no syncing is required."""
    LOCK_TIMEOUT = 30

    payload_id: str | None = None

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    lock: RedisLock = r.lock(
        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_generate_permissions_sync_tasks",
        timeout=LOCK_TIMEOUT,
    )

    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)
    if not acquired:
        return None

    try:
        if redis_connector.permissions.fenced:
            return None

        if redis_connector.delete.fenced:
            return None

        if redis_connector.prune.fenced:
            return None

        redis_connector.permissions.generator_clear()
        redis_connector.permissions.taskset_clear()

        custom_task_id = f"{redis_connector.permissions.generator_task_key}_{uuid4()}"

        # create before setting fence to avoid race condition where the monitoring
        # task updates the sync record before it is created
        try:
            with get_session_with_current_tenant() as db_session:
                insert_sync_record(
                    db_session=db_session,
                    entity_id=cc_pair_id,
                    sync_type=SyncType.EXTERNAL_PERMISSIONS,
                )
        except Exception:
            task_logger.exception("insert_sync_record exceptioned.")

        # set a basic fence to start
        redis_connector.permissions.set_active()
        payload = RedisConnectorPermissionSyncPayload(
            id=make_short_id(),
            submitted=datetime.now(timezone.utc),
            started=None,
            celery_task_id=None,
        )
        redis_connector.permissions.set_fence(payload)

        result = app.send_task(
            OnyxCeleryTask.CONNECTOR_PERMISSION_SYNC_GENERATOR_TASK,
            kwargs=dict(
                cc_pair_id=cc_pair_id,
                tenant_id=tenant_id,
            ),
            queue=OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC,
            task_id=custom_task_id,
            priority=OnyxCeleryPriority.MEDIUM,
        )

        # fill in the celery task id
        payload.celery_task_id = result.id
        redis_connector.permissions.set_fence(payload)

        payload_id = payload.id
    except Exception as e:
        error_msg = format_error_for_logging(e)
        task_logger.warning(
            f"Unexpected try_creating_permissions_sync_task exception: cc_pair={cc_pair_id} {error_msg}"
        )
        return None
    finally:
        if lock.owned():
            lock.release()

    task_logger.info(
        f"try_creating_permissions_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}"
    )
    return payload_id


@shared_task(
    name=OnyxCeleryTask.CONNECTOR_PERMISSION_SYNC_GENERATOR_TASK,
    acks_late=False,
    soft_time_limit=JOB_TIMEOUT,
    track_started=True,
    trail=False,
    bind=True,
)
def connector_permission_sync_generator_task(
    self: Task,
    cc_pair_id: int,
    tenant_id: str,
) -> None:
    """
    Permission sync task that handles document permission syncing for a given connector credential pair
    This task assumes that the task has already been properly fenced
    """

    payload_id: str | None = None

    LoggerContextVars.reset()

    doc_permission_sync_ctx_dict = doc_permission_sync_ctx.get()
    doc_permission_sync_ctx_dict["cc_pair_id"] = cc_pair_id
    doc_permission_sync_ctx_dict["request_id"] = self.request.id
    doc_permission_sync_ctx.set(doc_permission_sync_ctx_dict)

    with get_session_with_current_tenant() as db_session:
        attempt_id = create_doc_permission_sync_attempt(
            connector_credential_pair_id=cc_pair_id,
            db_session=db_session,
        )
        task_logger.info(
            f"Created doc permission sync attempt: {attempt_id} for cc_pair={cc_pair_id}"
        )

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    r = get_redis_client()

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
    # before the primary worker can finalize the fence
    start = time.monotonic()
    while True:
        if time.monotonic() - start > CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT:
            error_msg = (
                f"connector_permission_sync_generator_task - timed out waiting for fence to be ready: "
                f"fence={redis_connector.permissions.fence_key}"
            )
            _fail_doc_permission_sync_attempt(attempt_id, error_msg)
            raise ValueError(error_msg)

        if not redis_connector.permissions.fenced:  # The fence must exist
            error_msg = f"connector_permission_sync_generator_task - fence not found: fence={redis_connector.permissions.fence_key}"
            _fail_doc_permission_sync_attempt(attempt_id, error_msg)
            raise ValueError(error_msg)

        payload = redis_connector.permissions.payload  # The payload must exist
        if not payload:
            error_msg = (
                "connector_permission_sync_generator_task: payload invalid or not found"
            )
            _fail_doc_permission_sync_attempt(attempt_id, error_msg)
            raise ValueError(error_msg)

        if payload.celery_task_id is None:
            logger.info(
                f"connector_permission_sync_generator_task - Waiting for fence: fence={redis_connector.permissions.fence_key}"
            )
            sleep(1)
            continue

        payload_id = payload.id

        logger.info(
            f"connector_permission_sync_generator_task - Fence found, continuing...: "
            f"fence={redis_connector.permissions.fence_key} "
            f"payload_id={payload.id}"
        )
        break

    lock: RedisLock = r.lock(
        OnyxRedisLocks.CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX
        + f"_{redis_connector.cc_pair_id}",
        timeout=CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT,
        thread_local=False,
    )

    acquired = lock.acquire(blocking=False)
    if not acquired:
        error_msg = (
            f"Permission sync task already running, exiting...: cc_pair={cc_pair_id}"
        )
        task_logger.warning(error_msg)
        _fail_doc_permission_sync_attempt(attempt_id, error_msg)
        return None

    try:
        with get_session_with_current_tenant() as db_session:
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
                eager_load_connector=True,
                eager_load_credential=True,
            )
            if cc_pair is None:
                raise ValueError(
                    f"No connector credential pair found for id: {cc_pair_id}"
                )

            try:
                created = validate_ccpair_for_user(
                    cc_pair.connector.id,
                    cc_pair.credential.id,
                    cc_pair.access_type,
                    db_session,
                    enforce_creation=False,
                )
                if not created:
                    task_logger.warning(
                        f"Unable to create connector credential pair for id: {cc_pair_id}"
                    )
            except Exception:
                task_logger.exception(
                    f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}"
                )
                # TODO: add some notification to the admins here
                raise

            source_type = cc_pair.connector.source
            sync_config = get_source_perm_sync_config(source_type)
            if sync_config is None:
                error_msg = f"No sync config found for {source_type}"
                logger.error(error_msg)
                _fail_doc_permission_sync_attempt(attempt_id, error_msg)
                return None

            if sync_config.doc_sync_config is None:
                if sync_config.censoring_config:
                    error_msg = f"Doc sync config is None but censoring config exists for {source_type}"
                    _fail_doc_permission_sync_attempt(attempt_id, error_msg)
                    return None

                raise ValueError(
                    f"No doc sync func found for {source_type} with cc_pair={cc_pair_id}"
                )

            logger.info(f"Syncing docs for {source_type} with cc_pair={cc_pair_id}")

            mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)

            payload = redis_connector.permissions.payload
            if not payload:
                raise ValueError(f"No fence payload found: cc_pair={cc_pair_id}")

            new_payload = RedisConnectorPermissionSyncPayload(
                id=payload.id,
                submitted=payload.submitted,
                started=datetime.now(timezone.utc),
                celery_task_id=payload.celery_task_id,
            )
            redis_connector.permissions.set_fence(new_payload)

            callback = PermissionSyncCallback(
                redis_connector, lock, r, timeout_seconds=JOB_TIMEOUT
            )

            # pass in the capability to fetch all existing docs for the cc_pair
            # this is can be used to determine documents that are "missing" and thus
            # should no longer be accessible. The decision as to whether we should find
            # every document during the doc sync process is connector-specific.
            def fetch_all_existing_docs_fn(
                sort_order: SortOrder | None = None,
            ) -> list[DocumentRow]:
                result = get_documents_for_connector_credential_pair_limited_columns(
                    db_session=db_session,
                    connector_id=cc_pair.connector.id,
                    credential_id=cc_pair.credential.id,
                    sort_order=sort_order,
                )
                return list(result)

            def fetch_all_existing_docs_ids_fn() -> list[str]:
                result = get_document_ids_for_connector_credential_pair(
                    db_session=db_session,
                    connector_id=cc_pair.connector.id,
                    credential_id=cc_pair.credential.id,
                )
                return result

            doc_sync_func = sync_config.doc_sync_config.doc_sync_func
            document_external_accesses = doc_sync_func(
                cc_pair,
                fetch_all_existing_docs_fn,
                fetch_all_existing_docs_ids_fn,
                callback,
            )

            task_logger.info(
                f"RedisConnector.permissions.generate_tasks starting. cc_pair={cc_pair_id}"
            )

            tasks_generated = 0
            docs_with_errors = 0
            for doc_external_access in document_external_accesses:
                if callback.should_stop():
                    raise RuntimeError(
                        f"Permission sync task timed out or stop signal detected: "
                        f"cc_pair={cc_pair_id} "
                        f"tasks_generated={tasks_generated}"
                    )

                result = redis_connector.permissions.update_db(
                    lock=lock,
                    new_permissions=[doc_external_access],
                    source_string=source_type,
                    connector_id=cc_pair.connector.id,
                    credential_id=cc_pair.credential.id,
                    task_logger=task_logger,
                )
                tasks_generated += result.num_updated
                docs_with_errors += result.num_errors

            task_logger.info(
                f"RedisConnector.permissions.generate_tasks finished. "
                f"cc_pair={cc_pair_id} tasks_generated={tasks_generated} docs_with_errors={docs_with_errors}"
            )

            complete_doc_permission_sync_attempt(
                db_session=db_session,
                attempt_id=attempt_id,
                total_docs_synced=tasks_generated,
                docs_with_permission_errors=docs_with_errors,
            )
            task_logger.info(
                f"Completed doc permission sync attempt {attempt_id}: {tasks_generated} docs, {docs_with_errors} errors"
            )

            redis_connector.permissions.generator_complete = tasks_generated

    except Exception as e:
        error_msg = format_error_for_logging(e)

        task_logger.warning(
            f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id} {error_msg}"
        )
        task_logger.exception(
            f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id}"
        )

        with get_session_with_current_tenant() as db_session:
            mark_doc_permission_sync_attempt_failed(
                attempt_id, db_session, error_message=error_msg
            )

        redis_connector.permissions.generator_clear()
        redis_connector.permissions.taskset_clear()
        redis_connector.permissions.set_fence(None)
        raise e
    finally:
        if lock.owned():
            lock.release()

    task_logger.info(
        f"Permission sync finished: cc_pair={cc_pair_id} payload_id={payload.id}"
    )


# NOTE(rkuo): this should probably move to the db layer
@retry(
    retry=retry_if_exception(is_retryable_sqlalchemy_error),
    wait=wait_random_exponential(
        multiplier=1, max=DOCUMENT_PERMISSIONS_UPDATE_MAX_WAIT
    ),
    stop=stop_after_delay(DOCUMENT_PERMISSIONS_UPDATE_STOP_AFTER),
)
def element_update_permissions(
    tenant_id: str,
    permissions: ElementExternalAccess,
    source_type_str: str,
    connector_id: int,
    credential_id: int,
) -> bool:
    """Update permissions for a document or hierarchy node."""
    start = time.monotonic()
    external_access = permissions.external_access

    # Determine element type and identifier for logging
    if isinstance(permissions, DocExternalAccess):
        element_id = permissions.doc_id
        element_type = "doc"
    else:
        element_id = permissions.raw_node_id
        element_type = "node"

    try:
        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            # Add the users to the DB if they don't exist
            batch_add_ext_perm_user_if_not_exists(
                db_session=db_session,
                emails=list(external_access.external_user_emails),
                continue_on_error=True,
            )

            if isinstance(permissions, DocExternalAccess):
                # Document permission update
                created_new_doc = upsert_document_external_perms(
                    db_session=db_session,
                    doc_id=permissions.doc_id,
                    external_access=external_access,
                    source_type=DocumentSource(source_type_str),
                )

                if created_new_doc:
                    # If a new document was created, we associate it with the cc_pair
                    upsert_document_by_connector_credential_pair(
                        db_session=db_session,
                        connector_id=connector_id,
                        credential_id=credential_id,
                        document_ids=[permissions.doc_id],
                    )
            else:
                # Hierarchy node permission update
                db_update_hierarchy_node_permissions(
                    db_session=db_session,
                    raw_node_id=permissions.raw_node_id,
                    source=DocumentSource(permissions.source),
                    is_public=external_access.is_public,
                    external_user_emails=(
                        list(external_access.external_user_emails)
                        if external_access.external_user_emails
                        else None
                    ),
                    external_user_group_ids=(
                        list(external_access.external_user_group_ids)
                        if external_access.external_user_group_ids
                        else None
                    ),
                )

            elapsed = time.monotonic() - start
            task_logger.info(
                f"{element_type}={element_id} action=update_permissions elapsed={elapsed:.2f}"
            )
    except Exception as e:
        task_logger.exception(
            f"element_update_permissions exceptioned: {element_type}={element_id}, {connector_id=} {credential_id=}"
        )
        raise e
    finally:
        task_logger.info(
            f"element_update_permissions completed: {element_type}={element_id}, {connector_id=} {credential_id=}"
        )

    return True


def validate_permission_sync_fences(
    tenant_id: str,
    r: Redis,
    r_replica: Redis,
    r_celery: Redis,
    lock_beat: RedisLock,
) -> None:
    # building lookup table can be expensive, so we won't bother
    # validating until the queue is small
    PERMISSION_SYNC_VALIDATION_MAX_QUEUE_LEN = 1024

    queue_len = celery_get_queue_length(
        OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery
    )
    if queue_len > PERMISSION_SYNC_VALIDATION_MAX_QUEUE_LEN:
        return

    queued_upsert_tasks = celery_get_queued_task_ids(
        OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery
    )
    reserved_generator_tasks = celery_get_unacked_task_ids(
        OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC, r_celery
    )

    # validate all existing permission sync jobs
    lock_beat.reacquire()
    keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
    for key in keys:
        key_bytes = cast(bytes, key)
        key_str = key_bytes.decode("utf-8")
        if not key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
            continue

        validate_permission_sync_fence(
            tenant_id,
            key_bytes,
            queued_upsert_tasks,
            reserved_generator_tasks,
            r,
            r_celery,
        )

        lock_beat.reacquire()

    return


def validate_permission_sync_fence(
    tenant_id: str,
    key_bytes: bytes,
    queued_tasks: set[str],
    reserved_tasks: set[str],
    r: Redis,
    r_celery: Redis,
) -> None:
    """Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.
    This can happen if the indexing worker hard crashes or is terminated.
    Being in this bad state means the fence will never clear without help, so this function
    gives the help.

    How this works:
    1. This function renews the active signal with a 5 minute TTL under the following conditions
    1.2. When the task is seen in the redis queue
    1.3. When the task is seen in the reserved / prefetched list

    2. Externally, the active signal is renewed when:
    2.1. The fence is created
    2.2. The indexing watchdog checks the spawned task.

    3. The TTL allows us to get through the transitions on fence startup
    and when the task starts executing.

    More TTL clarification: it is seemingly impossible to exactly query Celery for
    whether a task is in the queue or currently executing.
    1. An unknown task id is always returned as state PENDING.
    2. Redis can be inspected for the task id, but the task id is gone between the time a worker receives the task
    and the time it actually starts on the worker.

    queued_tasks: the celery queue of lightweight permission sync tasks
    reserved_tasks: prefetched tasks for sync task generator
    """
    # if the fence doesn't exist, there's nothing to do
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
        task_logger.warning(
            f"validate_permission_sync_fence - could not parse id from {fence_key}"
        )
        return

    cc_pair_id = int(cc_pair_id_str)
    # parse out metadata and initialize the helper class with it
    redis_connector = RedisConnector(tenant_id, int(cc_pair_id))

    # check to see if the fence/payload exists
    if not redis_connector.permissions.fenced:
        return

    # in the cloud, the payload format may have changed ...
    # it's a little sloppy, but just reset the fence for now if that happens
    # TODO: add intentional cleanup/abort logic
    try:
        payload = redis_connector.permissions.payload
    except ValidationError:
        task_logger.exception(
            "validate_permission_sync_fence - "
            "Resetting fence because fence schema is out of date: "
            f"cc_pair={cc_pair_id} "
            f"fence={fence_key}"
        )

        redis_connector.permissions.reset()
        return

    if not payload:
        return

    if not payload.celery_task_id:
        return

    # OK, there's actually something for us to validate

    # either the generator task must be in flight or its subtasks must be
    found = celery_find_task(
        payload.celery_task_id,
        OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC,
        r_celery,
    )
    if found:
        # the celery task exists in the redis queue
        redis_connector.permissions.set_active()
        return

    if payload.celery_task_id in reserved_tasks:
        # the celery task was prefetched and is reserved within a worker
        redis_connector.permissions.set_active()
        return

    # look up every task in the current taskset in the celery queue
    # every entry in the taskset should have an associated entry in the celery task queue
    # because we get the celery tasks first, the entries in our own permissions taskset
    # should be roughly a subset of the tasks in celery

    # this check isn't very exact, but should be sufficient over a period of time
    # A single successful check over some number of attempts is sufficient.

    # TODO: if the number of tasks in celery is much lower than than the taskset length
    # we might be able to shortcut the lookup since by definition some of the tasks
    # must not exist in celery.

    tasks_scanned = 0
    tasks_not_in_celery = 0  # a non-zero number after completing our check is bad

    for member in r.sscan_iter(redis_connector.permissions.taskset_key):
        tasks_scanned += 1

        member_bytes = cast(bytes, member)
        member_str = member_bytes.decode("utf-8")
        if member_str in queued_tasks:
            continue

        if member_str in reserved_tasks:
            continue

        tasks_not_in_celery += 1

    task_logger.info(
        f"validate_permission_sync_fence task check: tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
    )

    # we're active if there are still tasks to run and those tasks all exist in celery
    if tasks_scanned > 0 and tasks_not_in_celery == 0:
        redis_connector.permissions.set_active()
        return

    # we may want to enable this check if using the active task list somehow isn't good enough
    # if redis_connector_index.generator_locked():
    #     logger.info(f"{payload.celery_task_id} is currently executing.")

    # if we get here, we didn't find any direct indication that the associated celery tasks exist,
    # but they still might be there due to gaps in our ability to check states during transitions
    # Checking the active signal safeguards us against these transition periods
    # (which has a duration that allows us to bridge those gaps)
    if redis_connector.permissions.active():
        return

    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.
    task_logger.warning(
        "validate_permission_sync_fence - "
        "Resetting fence because no associated celery tasks were found: "
        f"cc_pair={cc_pair_id} "
        f"fence={fence_key} "
        f"payload_id={payload.id}"
    )

    redis_connector.permissions.reset()
    return


class PermissionSyncCallback(IndexingHeartbeatInterface):
    PARENT_CHECK_INTERVAL = 60

    def __init__(
        self,
        redis_connector: RedisConnector,
        redis_lock: RedisLock,
        redis_client: Redis,
        timeout_seconds: int | None = None,
    ):
        super().__init__()
        self.redis_connector: RedisConnector = redis_connector
        self.redis_lock: RedisLock = redis_lock
        self.redis_client = redis_client

        self.started: datetime = datetime.now(timezone.utc)
        self.redis_lock.reacquire()

        self.last_tag: str = "PermissionSyncCallback.__init__"
        self.last_lock_reacquire: datetime = datetime.now(timezone.utc)
        self.last_lock_monotonic = time.monotonic()
        self.start_monotonic = time.monotonic()
        self.timeout_seconds = timeout_seconds

    def should_stop(self) -> bool:
        if self.redis_connector.stop.fenced:
            return True

        # Check if the task has exceeded its timeout
        # NOTE: Celery's soft_time_limit does not work with thread pools,
        # so we must enforce timeouts internally.
        if self.timeout_seconds is not None:
            elapsed = time.monotonic() - self.start_monotonic
            if elapsed > self.timeout_seconds:
                logger.warning(
                    f"PermissionSyncCallback - task timeout exceeded: "
                    f"elapsed={elapsed:.0f}s timeout={self.timeout_seconds}s "
                    f"cc_pair={self.redis_connector.cc_pair_id}"
                )
                return True

        return False

    def progress(self, tag: str, amount: int) -> None:  # noqa: ARG002
        try:
            self.redis_connector.permissions.set_active()

            current_time = time.monotonic()
            if current_time - self.last_lock_monotonic >= (
                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4
            ):
                self.redis_lock.reacquire()
                self.last_lock_reacquire = datetime.now(timezone.utc)
                self.last_lock_monotonic = time.monotonic()

            self.last_tag = tag
        except LockError:
            logger.exception(
                f"PermissionSyncCallback - lock.reacquire exceptioned: "
                f"lock_timeout={self.redis_lock.timeout} "
                f"start={self.started} "
                f"last_tag={self.last_tag} "
                f"last_reacquired={self.last_lock_reacquire} "
                f"now={datetime.now(timezone.utc)}"
            )

            redis_lock_dump(self.redis_lock, self.redis_client)
            raise


"""Monitoring CCPair permissions utils"""


def monitor_ccpair_permissions_taskset(
    tenant_id: str,
    key_bytes: bytes,
    r: Redis,  # noqa: ARG001
    db_session: Session,
) -> None:
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
        task_logger.warning(
            f"monitor_ccpair_permissions_taskset: could not parse cc_pair_id from {fence_key}"
        )
        return

    cc_pair_id = int(cc_pair_id_str)

    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    if not redis_connector.permissions.fenced:
        return

    initial = redis_connector.permissions.generator_complete
    if initial is None:
        return

    try:
        payload = redis_connector.permissions.payload
    except ValidationError:
        task_logger.exception(
            "Permissions sync payload failed to validate. Schema may have been updated."
        )
        return

    if not payload:
        return

    remaining = redis_connector.permissions.get_remaining()
    task_logger.info(
        f"Permissions sync progress: cc_pair={cc_pair_id} id={payload.id} remaining={remaining} initial={initial}"
    )

    # Add telemetry for permission syncing progress
    optional_telemetry(
        record_type=RecordType.PERMISSION_SYNC_PROGRESS,
        data={
            "cc_pair_id": cc_pair_id,
            "total_docs_synced": initial if initial is not None else 0,
            "remaining_docs_to_sync": remaining,
        },
        tenant_id=tenant_id,
    )

    if remaining > 0:
        return

    mark_cc_pair_as_permissions_synced(db_session, int(cc_pair_id), payload.started)
    task_logger.info(
        f"Permissions sync finished: cc_pair={cc_pair_id} id={payload.id} num_synced={initial}"
    )

    # Add telemetry for permission syncing complete
    optional_telemetry(
        record_type=RecordType.PERMISSION_SYNC_COMPLETE,
        data={"cc_pair_id": cc_pair_id},
        tenant_id=tenant_id,
    )

    update_sync_record_status(
        db_session=db_session,
        entity_id=cc_pair_id,
        sync_type=SyncType.EXTERNAL_PERMISSIONS,
        sync_status=SyncStatus.SUCCESS,
        num_docs_synced=initial,
    )

    redis_connector.permissions.reset()


================================================
FILE: backend/ee/onyx/background/celery/tasks/external_group_syncing/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/external_group_syncing/group_sync_utils.py
================================================
from sqlalchemy.orm import Session

from ee.onyx.external_permissions.sync_params import (
    source_group_sync_is_cc_pair_agnostic,
)
from onyx.db.connector import mark_cc_pair_as_external_group_synced
from onyx.db.connector_credential_pair import get_connector_credential_pairs_for_source
from onyx.db.models import ConnectorCredentialPair


def _get_all_cc_pair_ids_to_mark_as_group_synced(
    db_session: Session, cc_pair: ConnectorCredentialPair
) -> list[int]:
    if not source_group_sync_is_cc_pair_agnostic(cc_pair.connector.source):
        return [cc_pair.id]

    cc_pairs = get_connector_credential_pairs_for_source(
        db_session, cc_pair.connector.source
    )
    return [cc_pair.id for cc_pair in cc_pairs]


def mark_all_relevant_cc_pairs_as_external_group_synced(
    db_session: Session, cc_pair: ConnectorCredentialPair
) -> None:
    """For some source types, one successful group sync run should count for all
    cc pairs of that type. This function handles that case."""
    cc_pair_ids = _get_all_cc_pair_ids_to_mark_as_group_synced(db_session, cc_pair)
    for cc_pair_id in cc_pair_ids:
        mark_cc_pair_as_external_group_synced(db_session, cc_pair_id)


================================================
FILE: backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
================================================
import time
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from typing import cast
from uuid import uuid4

from celery import Celery
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from pydantic import ValidationError
from redis import Redis
from redis.lock import Lock as RedisLock

from ee.onyx.background.celery.tasks.external_group_syncing.group_sync_utils import (
    mark_all_relevant_cc_pairs_as_external_group_synced,
)
from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs
from ee.onyx.db.connector_credential_pair import get_cc_pairs_by_source
from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.db.external_perm import mark_old_external_groups_as_stale
from ee.onyx.db.external_perm import remove_stale_external_groups
from ee.onyx.db.external_perm import upsert_external_groups
from ee.onyx.external_permissions.sync_params import (
    get_all_cc_pair_agnostic_group_sync_sources,
)
from ee.onyx.external_permissions.sync_params import get_source_perm_sync_config
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_find_task
from onyx.background.celery.celery_redis import celery_get_broker_client
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
from onyx.background.error_logging import emit_background_error
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import OnyxRedisSignals
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.models import ConnectorCredentialPair
from onyx.db.permission_sync_attempt import complete_external_group_sync_attempt
from onyx.db.permission_sync_attempt import (
    create_external_group_sync_attempt,
)
from onyx.db.permission_sync_attempt import (
    mark_external_group_sync_attempt_failed,
)
from onyx.db.permission_sync_attempt import (
    mark_external_group_sync_attempt_in_progress,
)
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync
from onyx.redis.redis_connector_ext_group_sync import (
    RedisConnectorExternalGroupSyncPayload,
)
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.server.runtime.onyx_runtime import OnyxRuntime
from onyx.server.utils import make_short_id
from onyx.utils.logger import format_error_for_logging
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


_EXTERNAL_GROUP_BATCH_SIZE = 100


def _fail_external_group_sync_attempt(attempt_id: int, error_msg: str) -> None:
    """Helper to mark an external group sync attempt as failed with an error message."""
    with get_session_with_current_tenant() as db_session:
        mark_external_group_sync_attempt_failed(
            attempt_id, db_session, error_message=error_msg
        )


def _get_fence_validation_block_expiration() -> int:
    """
    Compute the expiration time for the fence validation block signal.
    Base expiration is 300 seconds, multiplied by the beat multiplier only in MULTI_TENANT mode.
    """
    base_expiration = 300  # seconds

    if not MULTI_TENANT:
        return base_expiration

    try:
        beat_multiplier = OnyxRuntime.get_beat_multiplier()
    except Exception:
        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT

    return int(base_expiration * beat_multiplier)


def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
    """Returns boolean indicating if external group sync is due."""

    if cc_pair.access_type != AccessType.SYNC:
        task_logger.error(
            f"Received non-sync CC Pair {cc_pair.id} for external group sync. Actual access type: {cc_pair.access_type}"
        )
        return False

    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
        task_logger.debug(
            f"Skipping group sync for CC Pair {cc_pair.id} - CC Pair is being deleted"
        )
        return False

    sync_config = get_source_perm_sync_config(cc_pair.connector.source)
    if sync_config is None:
        task_logger.debug(
            f"Skipping group sync for CC Pair {cc_pair.id} - no sync config found for {cc_pair.connector.source}"
        )
        return False

    # If there is not group sync function for the connector, we don't run the sync
    # This is fine because all sources dont necessarily have a concept of groups
    if sync_config.group_sync_config is None:
        task_logger.debug(
            f"Skipping group sync for CC Pair {cc_pair.id} - no group sync config found for {cc_pair.connector.source}"
        )
        return False

    # If the last sync is None, it has never been run so we run the sync
    last_ext_group_sync = cc_pair.last_time_external_group_sync
    if last_ext_group_sync is None:
        return True

    source_sync_period = sync_config.group_sync_config.group_sync_frequency

    # If the last sync is greater than the full fetch period, we run the sync
    next_sync = last_ext_group_sync + timedelta(seconds=source_sync_period)
    if datetime.now(timezone.utc) >= next_sync:
        return True

    return False


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
)
def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
    r = get_redis_client()
    r_replica = get_redis_replica_client()

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
        task_logger.warning(
            f"Failed to acquire beat lock for external group sync: {tenant_id}"
        )
        return None

    try:
        cc_pair_ids_to_sync: list[int] = []
        with get_session_with_current_tenant() as db_session:
            cc_pairs = get_all_auto_sync_cc_pairs(db_session)

            # For some sources, we only want to sync one cc_pair per source type
            for source in get_all_cc_pair_agnostic_group_sync_sources():
                # These are ordered by cc_pair id so the first one is the one we want
                cc_pairs_to_dedupe = get_cc_pairs_by_source(
                    db_session,
                    source,
                    access_type=AccessType.SYNC,
                    status=ConnectorCredentialPairStatus.ACTIVE,
                )
                # dedupe cc_pairs to only keep the first one
                for cc_pair_to_remove in cc_pairs_to_dedupe[1:]:
                    cc_pairs = [
                        cc_pair
                        for cc_pair in cc_pairs
                        if cc_pair.id != cc_pair_to_remove.id
                    ]

            for cc_pair in cc_pairs:
                if _is_external_group_sync_due(cc_pair):
                    cc_pair_ids_to_sync.append(cc_pair.id)

        lock_beat.reacquire()
        for cc_pair_id in cc_pair_ids_to_sync:
            payload_id = try_creating_external_group_sync_task(
                self.app, cc_pair_id, r, tenant_id
            )
            if not payload_id:
                continue

            task_logger.info(
                f"External group sync queued: cc_pair={cc_pair_id} id={payload_id}"
            )

        # we want to run this less frequently than the overall task
        lock_beat.reacquire()
        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_EXTERNAL_GROUP_SYNC_FENCES):
            # clear fences that don't have associated celery tasks in progress
            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
            # or be currently executing
            try:
                r_celery = celery_get_broker_client(self.app)
                validate_external_group_sync_fences(
                    tenant_id, self.app, r, r_replica, r_celery, lock_beat
                )
            except Exception:
                task_logger.exception(
                    "Exception while validating external group sync fences"
                )

            r.set(
                OnyxRedisSignals.BLOCK_VALIDATE_EXTERNAL_GROUP_SYNC_FENCES,
                1,
                ex=_get_fence_validation_block_expiration(),
            )
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception as e:
        error_msg = format_error_for_logging(e)
        task_logger.warning(
            f"Unexpected check_for_external_group_sync exception: tenant={tenant_id} {error_msg}"
        )
        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
    finally:
        if lock_beat.owned():
            lock_beat.release()

    task_logger.info(f"check_for_external_group_sync finished: tenant={tenant_id}")
    return True


def try_creating_external_group_sync_task(
    app: Celery,
    cc_pair_id: int,
    r: Redis,  # noqa: ARG001
    tenant_id: str,
) -> str | None:
    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
    Returns None if no syncing is required."""
    payload_id: str | None = None

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    try:
        # Dont kick off a new sync if the previous one is still running
        if redis_connector.external_group_sync.fenced:
            logger.warning(
                f"Skipping external group sync for CC Pair {cc_pair_id} - already running."
            )
            return None

        redis_connector.external_group_sync.generator_clear()
        redis_connector.external_group_sync.taskset_clear()

        # create before setting fence to avoid race condition where the monitoring
        # task updates the sync record before it is created
        try:
            with get_session_with_current_tenant() as db_session:
                insert_sync_record(
                    db_session=db_session,
                    entity_id=cc_pair_id,
                    sync_type=SyncType.EXTERNAL_GROUP,
                )
        except Exception:
            task_logger.exception("insert_sync_record exceptioned.")

        # Signal active before creating fence
        redis_connector.external_group_sync.set_active()

        payload = RedisConnectorExternalGroupSyncPayload(
            id=make_short_id(),
            submitted=datetime.now(timezone.utc),
            started=None,
            celery_task_id=None,
        )
        redis_connector.external_group_sync.set_fence(payload)

        custom_task_id = f"{redis_connector.external_group_sync.taskset_key}_{uuid4()}"

        result = app.send_task(
            OnyxCeleryTask.CONNECTOR_EXTERNAL_GROUP_SYNC_GENERATOR_TASK,
            kwargs=dict(
                cc_pair_id=cc_pair_id,
                tenant_id=tenant_id,
            ),
            queue=OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC,
            task_id=custom_task_id,
            priority=OnyxCeleryPriority.MEDIUM,
        )

        payload.celery_task_id = result.id
        redis_connector.external_group_sync.set_fence(payload)

        payload_id = payload.id
    except Exception as e:
        error_msg = format_error_for_logging(e)
        task_logger.warning(
            f"Unexpected try_creating_external_group_sync_task exception: cc_pair={cc_pair_id} {error_msg}"
        )
        task_logger.exception(
            f"Unexpected exception while trying to create external group sync task: cc_pair={cc_pair_id}"
        )
        return None

    task_logger.info(
        f"try_creating_external_group_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}"
    )
    return payload_id


@shared_task(
    name=OnyxCeleryTask.CONNECTOR_EXTERNAL_GROUP_SYNC_GENERATOR_TASK,
    acks_late=False,
    soft_time_limit=JOB_TIMEOUT,
    track_started=True,
    trail=False,
    bind=True,
)
def connector_external_group_sync_generator_task(
    self: Task,  # noqa: ARG001
    cc_pair_id: int,
    tenant_id: str,
) -> None:
    """
    External group sync task for a given connector credential pair
    This task assumes that the task has already been properly fenced
    """

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    r = get_redis_client()

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
    # before the primary worker can finalize the fence
    start = time.monotonic()
    while True:
        if time.monotonic() - start > CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT:
            msg = (
                f"connector_external_group_sync_generator_task - timed out waiting for fence to be ready: "
                f"fence={redis_connector.external_group_sync.fence_key}"
            )
            emit_background_error(msg, cc_pair_id=cc_pair_id)
            raise ValueError(msg)

        if not redis_connector.external_group_sync.fenced:  # The fence must exist
            msg = (
                f"connector_external_group_sync_generator_task - fence not found: "
                f"fence={redis_connector.external_group_sync.fence_key}"
            )
            emit_background_error(msg, cc_pair_id=cc_pair_id)
            raise ValueError(msg)

        payload = redis_connector.external_group_sync.payload  # The payload must exist
        if not payload:
            msg = "connector_external_group_sync_generator_task: payload invalid or not found"
            emit_background_error(msg, cc_pair_id=cc_pair_id)
            raise ValueError(msg)

        if payload.celery_task_id is None:
            logger.info(
                f"connector_external_group_sync_generator_task - Waiting for fence: "
                f"fence={redis_connector.external_group_sync.fence_key}"
            )
            time.sleep(1)
            continue

        logger.info(
            f"connector_external_group_sync_generator_task - Fence found, continuing...: "
            f"fence={redis_connector.external_group_sync.fence_key} "
            f"payload_id={payload.id}"
        )
        break

    lock: RedisLock = r.lock(
        OnyxRedisLocks.CONNECTOR_EXTERNAL_GROUP_SYNC_LOCK_PREFIX
        + f"_{redis_connector.cc_pair_id}",
        timeout=CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT,
    )

    acquired = lock.acquire(blocking=False)
    if not acquired:
        msg = f"External group sync task already running, exiting...: cc_pair={cc_pair_id}"
        emit_background_error(msg, cc_pair_id=cc_pair_id)
        task_logger.error(msg)
        return None

    try:
        payload.started = datetime.now(timezone.utc)
        redis_connector.external_group_sync.set_fence(payload)

        _perform_external_group_sync(
            cc_pair_id=cc_pair_id,
            tenant_id=tenant_id,
        )

        with get_session_with_current_tenant() as db_session:
            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
                sync_type=SyncType.EXTERNAL_GROUP,
                sync_status=SyncStatus.SUCCESS,
            )
    except Exception as e:
        error_msg = format_error_for_logging(e)
        task_logger.warning(
            f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id} {error_msg}"
        )
        task_logger.exception(
            f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
        )

        msg = f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
        task_logger.exception(msg)
        emit_background_error(msg + f"\n\n{e}", cc_pair_id=cc_pair_id)

        with get_session_with_current_tenant() as db_session:
            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
                sync_type=SyncType.EXTERNAL_GROUP,
                sync_status=SyncStatus.FAILED,
            )

        redis_connector.external_group_sync.generator_clear()
        redis_connector.external_group_sync.taskset_clear()
        raise e
    finally:
        # we always want to clear the fence after the task is done or failed so it doesn't get stuck
        redis_connector.external_group_sync.set_fence(None)
        if lock.owned():
            lock.release()

    task_logger.info(
        f"External group sync finished: cc_pair={cc_pair_id} payload_id={payload.id}"
    )


def _perform_external_group_sync(
    cc_pair_id: int,
    tenant_id: str,
    timeout_seconds: int = JOB_TIMEOUT,
) -> None:
    # Create attempt record at the start
    with get_session_with_current_tenant() as db_session:
        attempt_id = create_external_group_sync_attempt(
            connector_credential_pair_id=cc_pair_id,
            db_session=db_session,
        )
        logger.info(
            f"Created external group sync attempt: {attempt_id} for cc_pair={cc_pair_id}"
        )

    with get_session_with_current_tenant() as db_session:
        cc_pair = get_connector_credential_pair_from_id(
            db_session=db_session,
            cc_pair_id=cc_pair_id,
            eager_load_credential=True,
        )
        if cc_pair is None:
            raise ValueError(f"No connector credential pair found for id: {cc_pair_id}")

        source_type = cc_pair.connector.source
        sync_config = get_source_perm_sync_config(source_type)
        if sync_config is None:
            msg = f"No sync config found for {source_type} for cc_pair: {cc_pair_id}"
            emit_background_error(msg, cc_pair_id=cc_pair_id)
            _fail_external_group_sync_attempt(attempt_id, msg)
            raise ValueError(msg)

        if sync_config.group_sync_config is None:
            msg = f"No group sync config found for {source_type} for cc_pair: {cc_pair_id}"
            emit_background_error(msg, cc_pair_id=cc_pair_id)
            _fail_external_group_sync_attempt(attempt_id, msg)
            raise ValueError(msg)

        ext_group_sync_func = sync_config.group_sync_config.group_sync_func

        logger.info(
            f"Marking old external groups as stale for {source_type} for cc_pair: {cc_pair_id}"
        )
        mark_old_external_groups_as_stale(db_session, cc_pair_id)

        # Mark attempt as in progress
        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)
        logger.info(f"Marked external group sync attempt {attempt_id} as in progress")

        logger.info(
            f"Syncing external groups for {source_type} for cc_pair: {cc_pair_id}"
        )
        external_user_group_batch: list[ExternalUserGroup] = []
        seen_users: set[str] = set()  # Track unique users across all groups
        total_groups_processed = 0
        total_group_memberships_synced = 0
        start_time = time.monotonic()
        try:
            external_user_group_generator = ext_group_sync_func(tenant_id, cc_pair)
            for external_user_group in external_user_group_generator:
                # Check if the task has exceeded its timeout
                # NOTE: Celery's soft_time_limit does not work with thread pools,
                # so we must enforce timeouts internally.
                elapsed = time.monotonic() - start_time
                if elapsed > timeout_seconds:
                    raise RuntimeError(
                        f"External group sync task timed out: "
                        f"cc_pair={cc_pair_id} "
                        f"elapsed={elapsed:.0f}s "
                        f"timeout={timeout_seconds}s "
                        f"groups_processed={total_groups_processed}"
                    )

                external_user_group_batch.append(external_user_group)

                # Track progress
                total_groups_processed += 1
                total_group_memberships_synced += len(external_user_group.user_emails)
                seen_users = seen_users.union(external_user_group.user_emails)

                if len(external_user_group_batch) >= _EXTERNAL_GROUP_BATCH_SIZE:
                    logger.debug(
                        f"New external user groups: {external_user_group_batch}"
                    )
                    upsert_external_groups(
                        db_session=db_session,
                        cc_pair_id=cc_pair_id,
                        external_groups=external_user_group_batch,
                        source=cc_pair.connector.source,
                    )
                    external_user_group_batch = []

            if external_user_group_batch:
                logger.debug(f"New external user groups: {external_user_group_batch}")
                upsert_external_groups(
                    db_session=db_session,
                    cc_pair_id=cc_pair_id,
                    external_groups=external_user_group_batch,
                    source=cc_pair.connector.source,
                )
        except Exception as e:
            format_error_for_logging(e)

            # Mark as failed (this also updates progress to show partial progress)
            mark_external_group_sync_attempt_failed(
                attempt_id, db_session, error_message=str(e)
            )

            # TODO: add some notification to the admins here
            logger.exception(
                f"Error syncing external groups for {source_type} for cc_pair: {cc_pair_id} {e}"
            )
            raise e

        logger.info(
            f"Removing stale external groups for {source_type} for cc_pair: {cc_pair_id}"
        )
        remove_stale_external_groups(db_session, cc_pair_id)

        # Calculate total unique users processed
        total_users_processed = len(seen_users)

        # Complete the sync attempt with final progress
        complete_external_group_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_users_processed=total_users_processed,
            total_groups_processed=total_groups_processed,
            total_group_memberships_synced=total_group_memberships_synced,
            errors_encountered=0,
        )
        logger.info(
            f"Completed external group sync attempt {attempt_id}: "
            f"{total_groups_processed} groups, {total_users_processed} users, "
            f"{total_group_memberships_synced} memberships"
        )

        mark_all_relevant_cc_pairs_as_external_group_synced(db_session, cc_pair)


def validate_external_group_sync_fences(
    tenant_id: str,
    celery_app: Celery,  # noqa: ARG001
    r: Redis,  # noqa: ARG001
    r_replica: Redis,
    r_celery: Redis,
    lock_beat: RedisLock,
) -> None:
    reserved_tasks = celery_get_unacked_task_ids(
        OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery
    )

    # validate all existing external group sync tasks
    lock_beat.reacquire()
    keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
    for key in keys:
        key_bytes = cast(bytes, key)
        key_str = key_bytes.decode("utf-8")
        if not key_str.startswith(RedisConnectorExternalGroupSync.FENCE_PREFIX):
            continue

        validate_external_group_sync_fence(
            tenant_id,
            key_bytes,
            reserved_tasks,
            r_celery,
        )

        lock_beat.reacquire()
    return


def validate_external_group_sync_fence(
    tenant_id: str,
    key_bytes: bytes,
    reserved_tasks: set[str],
    r_celery: Redis,
) -> None:
    """Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.
    This can happen if the indexing worker hard crashes or is terminated.
    Being in this bad state means the fence will never clear without help, so this function
    gives the help.

    How this works:
    1. This function renews the active signal with a 5 minute TTL under the following conditions
    1.2. When the task is seen in the redis queue
    1.3. When the task is seen in the reserved / prefetched list

    2. Externally, the active signal is renewed when:
    2.1. The fence is created
    2.2. The indexing watchdog checks the spawned task.

    3. The TTL allows us to get through the transitions on fence startup
    and when the task starts executing.

    More TTL clarification: it is seemingly impossible to exactly query Celery for
    whether a task is in the queue or currently executing.
    1. An unknown task id is always returned as state PENDING.
    2. Redis can be inspected for the task id, but the task id is gone between the time a worker receives the task
    and the time it actually starts on the worker.
    """
    # if the fence doesn't exist, there's nothing to do
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
        msg = (
            f"validate_external_group_sync_fence - could not parse id from {fence_key}"
        )
        emit_background_error(msg)
        task_logger.error(msg)
        return

    cc_pair_id = int(cc_pair_id_str)

    # parse out metadata and initialize the helper class with it
    redis_connector = RedisConnector(tenant_id, int(cc_pair_id))

    # check to see if the fence/payload exists
    if not redis_connector.external_group_sync.fenced:
        return

    try:
        payload = redis_connector.external_group_sync.payload
    except ValidationError:
        msg = (
            "validate_external_group_sync_fence - "
            "Resetting fence because fence schema is out of date: "
            f"cc_pair={cc_pair_id} "
            f"fence={fence_key}"
        )
        task_logger.exception(msg)
        emit_background_error(msg, cc_pair_id=cc_pair_id)

        redis_connector.external_group_sync.reset()
        return

    if not payload:
        return

    if not payload.celery_task_id:
        return

    # OK, there's actually something for us to validate
    found = celery_find_task(
        payload.celery_task_id, OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery
    )
    if found:
        # the celery task exists in the redis queue
        # redis_connector_index.set_active()
        return

    if payload.celery_task_id in reserved_tasks:
        # the celery task was prefetched and is reserved within the indexing worker
        # redis_connector_index.set_active()
        return

    # we may want to enable this check if using the active task list somehow isn't good enough
    # if redis_connector_index.generator_locked():
    #     logger.info(f"{payload.celery_task_id} is currently executing.")

    # if we get here, we didn't find any direct indication that the associated celery tasks exist,
    # but they still might be there due to gaps in our ability to check states during transitions
    # Checking the active signal safeguards us against these transition periods
    # (which has a duration that allows us to bridge those gaps)
    # if redis_connector_index.active():
    # return

    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.
    emit_background_error(
        message=(
            "validate_external_group_sync_fence - "
            "Resetting fence because no associated celery tasks were found: "
            f"cc_pair={cc_pair_id} "
            f"fence={fence_key} "
            f"payload_id={payload.id}"
        ),
        cc_pair_id=cc_pair_id,
    )

    redis_connector.external_group_sync.reset()
    return


================================================
FILE: backend/ee/onyx/background/celery/tasks/hooks/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/hooks/tasks.py
================================================
from celery import shared_task

from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.hook import cleanup_old_execution_logs__no_commit
from onyx.utils.logger import setup_logger

logger = setup_logger()

_HOOK_EXECUTION_LOG_RETENTION_DAYS: int = 30


@shared_task(
    name=OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    trail=False,
)
def hook_execution_log_cleanup_task(*, tenant_id: str) -> None:  # noqa: ARG001
    try:
        with get_session_with_current_tenant() as db_session:
            deleted: int = cleanup_old_execution_logs__no_commit(
                db_session=db_session,
                max_age_days=_HOOK_EXECUTION_LOG_RETENTION_DAYS,
            )
            db_session.commit()
            if deleted:
                logger.info(
                    f"Deleted {deleted} hook execution log(s) older than "
                    f"{_HOOK_EXECUTION_LOG_RETENTION_DAYS} days."
                )
    except Exception:
        logger.exception("Failed to clean up hook execution logs")
        raise


================================================
FILE: backend/ee/onyx/background/celery/tasks/query_history/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/query_history/tasks.py
================================================
import csv
import io
from datetime import datetime

from celery import shared_task
from celery import Task

from ee.onyx.server.query_history.api import fetch_and_process_chat_session_history
from ee.onyx.server.query_history.api import ONYX_ANONYMIZED_EMAIL
from ee.onyx.server.query_history.models import QuestionAnswerPairSnapshot
from onyx.background.task_utils import construct_query_history_report_name
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import FileType
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import QueryHistoryType
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.tasks import delete_task_with_id
from onyx.db.tasks import mark_task_as_finished_with_id
from onyx.db.tasks import mark_task_as_started_with_id
from onyx.file_store.file_store import get_default_file_store
from onyx.utils.logger import setup_logger


logger = setup_logger()


@shared_task(
    name=OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
    trail=False,
)
def export_query_history_task(
    self: Task,
    *,
    start: datetime,
    end: datetime,
    start_time: datetime,
    # Need to include the tenant_id since the TenantAwareTask needs this
    tenant_id: str,  # noqa: ARG001
) -> None:
    if not self.request.id:
        raise RuntimeError("No task id defined for this task; cannot identify it")

    task_id = self.request.id
    stream = io.StringIO()
    writer = csv.DictWriter(
        stream,
        fieldnames=list(QuestionAnswerPairSnapshot.model_fields.keys()),
    )
    writer.writeheader()

    with get_session_with_current_tenant() as db_session:
        try:
            mark_task_as_started_with_id(
                db_session=db_session,
                task_id=task_id,
            )

            snapshot_generator = fetch_and_process_chat_session_history(
                db_session=db_session,
                start=start,
                end=end,
            )

            for snapshot in snapshot_generator:
                if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
                    snapshot.user_email = ONYX_ANONYMIZED_EMAIL

                writer.writerows(
                    qa_pair.to_json()
                    for qa_pair in QuestionAnswerPairSnapshot.from_chat_session_snapshot(
                        snapshot
                    )
                )

        except Exception:
            logger.exception(f"Failed to export query history with {task_id=}")
            mark_task_as_finished_with_id(
                db_session=db_session,
                task_id=task_id,
                success=False,
            )
            raise

    report_name = construct_query_history_report_name(task_id)
    with get_session_with_current_tenant() as db_session:
        try:
            stream.seek(0)
            get_default_file_store().save_file(
                content=stream,
                display_name=report_name,
                file_origin=FileOrigin.QUERY_HISTORY_CSV,
                file_type=FileType.CSV,
                file_metadata={
                    "start": start.isoformat(),
                    "end": end.isoformat(),
                    "start_time": start_time.isoformat(),
                },
                file_id=report_name,
            )

            delete_task_with_id(
                db_session=db_session,
                task_id=task_id,
            )
        except Exception:
            logger.exception(
                f"Failed to save query history export file; {report_name=}"
            )
            mark_task_as_finished_with_id(
                db_session=db_session,
                task_id=task_id,
                success=False,
            )
            raise


================================================
FILE: backend/ee/onyx/background/celery/tasks/tenant_provisioning/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
================================================
"""
Periodic tasks for tenant pre-provisioning.
"""

import asyncio
import datetime
import uuid

from celery import shared_task
from celery import Task
from redis.lock import Lock as RedisLock

from ee.onyx.server.tenants.provisioning import setup_tenant
from ee.onyx.server.tenants.schema_management import create_schema_if_not_exists
from ee.onyx.server.tenants.schema_management import get_current_alembic_version
from ee.onyx.server.tenants.schema_management import run_alembic_migrations
from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.app_configs import TARGET_AVAILABLE_TENANTS
from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_session_with_shared_schema
from onyx.db.models import AvailableTenant
from onyx.redis.redis_pool import get_redis_client
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import TENANT_ID_PREFIX

# Maximum tenants to provision in a single task run.
# Each tenant takes ~80s (alembic migrations), so 5 tenants ≈ 7 minutes.
_MAX_TENANTS_PER_RUN = 5

# Time limits sized for worst-case: provisioning up to _MAX_TENANTS_PER_RUN new tenants
# (~90s each) plus migrating up to TARGET_AVAILABLE_TENANTS pool tenants (~90s each).
_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 20  # 20 minutes
_TENANT_PROVISIONING_TIME_LIMIT = 60 * 25  # 25 minutes


@shared_task(
    name=OnyxCeleryTask.CLOUD_CHECK_AVAILABLE_TENANTS,
    queue=OnyxCeleryQueues.MONITORING,
    ignore_result=True,
    soft_time_limit=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,
    time_limit=_TENANT_PROVISIONING_TIME_LIMIT,
    trail=False,
    bind=True,
)
def check_available_tenants(self: Task) -> None:  # noqa: ARG001
    """
    Check if we have enough pre-provisioned tenants available.
    If not, trigger the pre-provisioning of new tenants.
    """
    task_logger.info("STARTING CHECK_AVAILABLE_TENANTS")
    if not MULTI_TENANT:
        task_logger.info(
            "Multi-tenancy is not enabled, skipping tenant pre-provisioning"
        )
        return

    r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
    lock_check: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_AVAILABLE_TENANTS_LOCK,
        timeout=_TENANT_PROVISIONING_TIME_LIMIT,
    )

    # These tasks should never overlap
    if not lock_check.acquire(blocking=False):
        task_logger.info(
            "Skipping check_available_tenants task because it is already running"
        )
        return

    try:
        # Get the current count of available tenants
        with get_session_with_shared_schema() as db_session:
            num_available_tenants = db_session.query(AvailableTenant).count()

        # Get the target number of available tenants
        num_minimum_available_tenants = TARGET_AVAILABLE_TENANTS

        # Calculate how many new tenants we need to provision
        if num_available_tenants < num_minimum_available_tenants:
            tenants_to_provision = num_minimum_available_tenants - num_available_tenants
        else:
            tenants_to_provision = 0

        task_logger.info(
            f"Available tenants: {num_available_tenants}, "
            f"Target minimum available tenants: {num_minimum_available_tenants}, "
            f"To provision: {tenants_to_provision}"
        )

        batch_size = min(tenants_to_provision, _MAX_TENANTS_PER_RUN)
        if batch_size < tenants_to_provision:
            task_logger.info(
                f"Capping batch to {batch_size} (need {tenants_to_provision}, will catch up next cycle)"
            )

        provisioned = 0
        for i in range(batch_size):
            task_logger.info(f"Provisioning tenant {i + 1}/{batch_size}")
            try:
                if pre_provision_tenant():
                    provisioned += 1
            except Exception:
                task_logger.exception(
                    f"Failed to provision tenant {i + 1}/{batch_size}, continuing with remaining tenants"
                )

        task_logger.info(f"Provisioning complete: {provisioned}/{batch_size} succeeded")

        # Migrate any pool tenants that were provisioned before a new migration was deployed
        _migrate_stale_pool_tenants()

    except Exception:
        task_logger.exception("Error in check_available_tenants task")

    finally:
        try:
            lock_check.release()
        except Exception:
            task_logger.warning(
                "Could not release check lock (likely expired), continuing"
            )


def _migrate_stale_pool_tenants() -> None:
    """
    Run alembic upgrade head on all pool tenants. Since alembic upgrade head is
    idempotent, tenants already at head are a fast no-op. This ensures pool
    tenants are always current so that signup doesn't hit schema mismatches
    (e.g. missing columns added after the tenant was pre-provisioned).
    """
    with get_session_with_shared_schema() as db_session:
        pool_tenants = db_session.query(AvailableTenant).all()
        tenant_ids = [t.tenant_id for t in pool_tenants]

    if not tenant_ids:
        return

    task_logger.info(
        f"Checking {len(tenant_ids)} pool tenant(s) for pending migrations"
    )

    for tenant_id in tenant_ids:
        try:
            run_alembic_migrations(tenant_id)
            new_version = get_current_alembic_version(tenant_id)
            with get_session_with_shared_schema() as db_session:
                tenant = (
                    db_session.query(AvailableTenant)
                    .filter_by(tenant_id=tenant_id)
                    .first()
                )
                if tenant and tenant.alembic_version != new_version:
                    task_logger.info(
                        f"Migrated pool tenant {tenant_id}: {tenant.alembic_version} -> {new_version}"
                    )
                    tenant.alembic_version = new_version
                    db_session.commit()
        except Exception:
            task_logger.exception(
                f"Failed to migrate pool tenant {tenant_id}, skipping"
            )


def pre_provision_tenant() -> bool:
    """
    Pre-provision a new tenant and store it in the NewAvailableTenant table.
    This function fully sets up the tenant with all necessary configurations,
    so it's ready to be assigned to a user immediately.

    Returns True if a tenant was successfully provisioned, False otherwise.
    """
    # The MULTI_TENANT check is now done at the caller level (check_available_tenants)
    # rather than inside this function

    r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
    lock_provision: RedisLock = r.lock(
        OnyxRedisLocks.CLOUD_PRE_PROVISION_TENANT_LOCK,
        timeout=_TENANT_PROVISIONING_TIME_LIMIT,
    )

    # Allow multiple pre-provisioning tasks to run, but ensure they don't overlap
    if not lock_provision.acquire(blocking=False):
        task_logger.warning(
            "Skipping pre_provision_tenant — could not acquire provision lock"
        )
        return False

    tenant_id: str | None = None
    try:
        # Generate a new tenant ID
        tenant_id = TENANT_ID_PREFIX + str(uuid.uuid4())
        task_logger.info(f"Pre-provisioning tenant: {tenant_id}")

        # Create the schema for the new tenant
        schema_created = create_schema_if_not_exists(tenant_id)
        if schema_created:
            task_logger.debug(f"Created schema for tenant: {tenant_id}")
        else:
            task_logger.debug(f"Schema already exists for tenant: {tenant_id}")

        # Set up the tenant with all necessary configurations
        task_logger.debug(f"Setting up tenant configuration: {tenant_id}")
        asyncio.run(setup_tenant(tenant_id))
        task_logger.debug(f"Tenant configuration completed: {tenant_id}")

        # Get the current Alembic version
        alembic_version = get_current_alembic_version(tenant_id)
        task_logger.debug(
            f"Tenant {tenant_id} using Alembic version: {alembic_version}"
        )

        # Store the pre-provisioned tenant in the database
        task_logger.debug(f"Storing pre-provisioned tenant in database: {tenant_id}")
        with get_session_with_shared_schema() as db_session:
            # Use a transaction to ensure atomicity
            db_session.begin()
            try:
                new_tenant = AvailableTenant(
                    tenant_id=tenant_id,
                    alembic_version=alembic_version,
                    date_created=datetime.datetime.now(),
                )
                db_session.add(new_tenant)
                db_session.commit()
                task_logger.info(f"Successfully pre-provisioned tenant: {tenant_id}")
                return True
            except Exception:
                db_session.rollback()
                task_logger.error(
                    f"Failed to store pre-provisioned tenant: {tenant_id}",
                    exc_info=True,
                )
                raise

    except Exception:
        task_logger.error("Error in pre_provision_tenant task", exc_info=True)
        # If we have a tenant_id, attempt to rollback any partially completed provisioning
        if tenant_id:
            task_logger.info(
                f"Rolling back failed tenant provisioning for: {tenant_id}"
            )
            try:
                from ee.onyx.server.tenants.provisioning import (
                    rollback_tenant_provisioning,
                )

                asyncio.run(rollback_tenant_provisioning(tenant_id))
            except Exception:
                task_logger.exception(f"Error during rollback for tenant: {tenant_id}")
        return False
    finally:
        try:
            lock_provision.release()
        except Exception:
            task_logger.warning(
                "Could not release provision lock (likely expired), continuing"
            )


================================================
FILE: backend/ee/onyx/background/celery/tasks/ttl_management/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/ttl_management/tasks.py
================================================
from uuid import UUID

from celery import shared_task
from celery import Task

from ee.onyx.background.celery_utils import should_perform_chat_ttl_check
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.chat import delete_chat_session
from onyx.db.chat import get_chat_sessions_older_than
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.server.settings.store import load_settings
from onyx.utils.logger import setup_logger

logger = setup_logger()


@shared_task(
    name=OnyxCeleryTask.PERFORM_TTL_MANAGEMENT_TASK,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
    trail=False,
)
def perform_ttl_management_task(
    self: Task, retention_limit_days: int, *, tenant_id: str  # noqa: ARG001
) -> None:
    task_id = self.request.id
    if not task_id:
        raise RuntimeError("No task id defined for this task; cannot identify it")

    user_id: UUID | None = None
    session_id: UUID | None = None
    try:
        with get_session_with_current_tenant() as db_session:

            old_chat_sessions = get_chat_sessions_older_than(
                retention_limit_days, db_session
            )

        for user_id, session_id in old_chat_sessions:
            # one session per delete so that we don't blow up if a deletion fails.
            with get_session_with_current_tenant() as db_session:
                delete_chat_session(
                    user_id,
                    session_id,
                    db_session,
                    include_deleted=True,
                    hard_delete=True,
                )

    except Exception:
        logger.exception(
            f"delete_chat_session exceptioned. user_id={user_id} session_id={session_id}"
        )
        raise


@shared_task(
    name=OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
)
def check_ttl_management_task(*, tenant_id: str) -> None:
    """Runs periodically to check if any ttl tasks should be run and adds them
    to the queue"""

    settings = load_settings()
    retention_limit_days = settings.maximum_chat_retention_days
    with get_session_with_current_tenant() as db_session:
        if should_perform_chat_ttl_check(retention_limit_days, db_session):
            perform_ttl_management_task.apply_async(
                kwargs=dict(
                    retention_limit_days=retention_limit_days, tenant_id=tenant_id
                ),
            )


================================================
FILE: backend/ee/onyx/background/celery/tasks/usage_reporting/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/usage_reporting/tasks.py
================================================
from datetime import datetime
from uuid import UUID

from celery import shared_task
from celery import Task

from ee.onyx.server.reporting.usage_export_generation import create_new_usage_report
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.utils.logger import setup_logger

logger = setup_logger()


@shared_task(
    name=OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
    trail=False,
)
def generate_usage_report_task(
    self: Task,  # noqa: ARG001
    *,
    tenant_id: str,  # noqa: ARG001
    user_id: str | None = None,
    period_from: str | None = None,
    period_to: str | None = None,
) -> None:
    """User-initiated usage report generation task"""
    # Parse period if provided
    period = None
    if period_from and period_to:
        period = (
            datetime.fromisoformat(period_from),
            datetime.fromisoformat(period_to),
        )

    # Generate the report
    with get_session_with_current_tenant() as db_session:
        create_new_usage_report(
            db_session=db_session,
            user_id=UUID(user_id) if user_id else None,
            period=period,
        )


================================================
FILE: backend/ee/onyx/background/celery/tasks/vespa/__init__.py
================================================


================================================
FILE: backend/ee/onyx/background/celery/tasks/vespa/tasks.py
================================================
from typing import cast

from redis import Redis
from sqlalchemy.orm import Session

from ee.onyx.db.user_group import delete_user_group
from ee.onyx.db.user_group import fetch_user_group
from ee.onyx.db.user_group import mark_user_group_as_synced
from ee.onyx.db.user_group import prepare_user_group_for_deletion
from onyx.background.celery.apps.app_base import task_logger
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.sync_record import update_sync_record_status
from onyx.redis.redis_usergroup import RedisUserGroup
from onyx.utils.logger import setup_logger

logger = setup_logger()


def monitor_usergroup_taskset(
    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
) -> None:
    """This function is likely to move in the worker refactor happening next."""
    fence_key = key_bytes.decode("utf-8")
    usergroup_id_str = RedisUserGroup.get_id_from_fence_key(fence_key)
    if not usergroup_id_str:
        task_logger.warning(f"Could not parse usergroup id from {fence_key}")
        return

    try:
        usergroup_id = int(usergroup_id_str)
    except ValueError:
        task_logger.exception(f"usergroup_id ({usergroup_id_str}) is not an integer!")
        raise

    rug = RedisUserGroup(tenant_id, usergroup_id)
    if not rug.fenced:
        return

    initial_count = rug.payload
    if initial_count is None:
        return

    count = cast(int, r.scard(rug.taskset_key))
    task_logger.info(
        f"User group sync progress: usergroup_id={usergroup_id} remaining={count} initial={initial_count}"
    )
    if count > 0:
        update_sync_record_status(
            db_session=db_session,
            entity_id=usergroup_id,
            sync_type=SyncType.USER_GROUP,
            sync_status=SyncStatus.IN_PROGRESS,
            num_docs_synced=count,
        )
        return

    user_group = fetch_user_group(db_session=db_session, user_group_id=usergroup_id)
    if user_group:
        usergroup_name = user_group.name
        try:
            if user_group.is_up_for_deletion:
                # this prepare should have been run when the deletion was scheduled,
                # but run it again to be sure we're ready to go
                mark_user_group_as_synced(db_session, user_group)
                prepare_user_group_for_deletion(db_session, usergroup_id)
                delete_user_group(db_session=db_session, user_group=user_group)

                update_sync_record_status(
                    db_session=db_session,
                    entity_id=usergroup_id,
                    sync_type=SyncType.USER_GROUP,
                    sync_status=SyncStatus.SUCCESS,
                    num_docs_synced=initial_count,
                )

                task_logger.info(
                    f"Deleted usergroup: name={usergroup_name} id={usergroup_id}"
                )
            else:
                mark_user_group_as_synced(db_session=db_session, user_group=user_group)

                update_sync_record_status(
                    db_session=db_session,
                    entity_id=usergroup_id,
                    sync_type=SyncType.USER_GROUP,
                    sync_status=SyncStatus.SUCCESS,
                    num_docs_synced=initial_count,
                )

                task_logger.info(
                    f"Synced usergroup. name={usergroup_name} id={usergroup_id}"
                )
        except Exception as e:
            update_sync_record_status(
                db_session=db_session,
                entity_id=usergroup_id,
                sync_type=SyncType.USER_GROUP,
                sync_status=SyncStatus.FAILED,
                num_docs_synced=initial_count,
            )
            raise e

    rug.reset()


================================================
FILE: backend/ee/onyx/background/celery_utils.py
================================================
from sqlalchemy.orm import Session

from ee.onyx.background.task_name_builders import name_chat_ttl_task
from onyx.db.tasks import check_task_is_live_and_not_timed_out
from onyx.db.tasks import get_latest_task
from onyx.utils.logger import setup_logger

logger = setup_logger()


def should_perform_chat_ttl_check(
    retention_limit_days: float | None, db_session: Session
) -> bool:
    # TODO: make this a check for None and add behavior for 0 day TTL
    if not retention_limit_days:
        return False

    task_name = name_chat_ttl_task(retention_limit_days)
    latest_task = get_latest_task(task_name, db_session)
    if not latest_task:
        return True

    if check_task_is_live_and_not_timed_out(latest_task, db_session):
        logger.debug(f"{task_name} is already being performed. Skipping.")
        return False
    return True


================================================
FILE: backend/ee/onyx/background/task_name_builders.py
================================================
from datetime import datetime

from onyx.configs.constants import OnyxCeleryTask


QUERY_HISTORY_TASK_NAME_PREFIX = OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK


def name_chat_ttl_task(
    retention_limit_days: float,
    tenant_id: str | None = None,  # noqa: ARG001
) -> str:
    return f"chat_ttl_{retention_limit_days}_days"


def query_history_task_name(start: datetime, end: datetime) -> str:
    return f"{QUERY_HISTORY_TASK_NAME_PREFIX}_{start}_{end}"


================================================
FILE: backend/ee/onyx/configs/__init__.py
================================================


================================================
FILE: backend/ee/onyx/configs/app_configs.py
================================================
import json
import os


#####
# Auto Permission Sync
#####
# should generally only be used for sources that support polling of permissions
# e.g. can pull in only permission changes rather than having to go through all
# documents every time
DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
)


#####
# Confluence
#####

# In seconds, default is 30 minutes
CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY = int(
    os.environ.get("CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY") or 30 * 60
)
# In seconds, default is 30 minutes
CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY") or 30 * 60
)
# This is a boolean that determines if anonymous access is public
# Default behavior is to not make the page public and instead add a group
# that contains all the users that we found in Confluence
CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC = (
    os.environ.get("CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC", "").lower() == "true"
)


#####
# JIRA
#####

# In seconds, default is 30 minutes
JIRA_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("JIRA_PERMISSION_DOC_SYNC_FREQUENCY") or 30 * 60
)
# In seconds, default is 30 minutes
JIRA_PERMISSION_GROUP_SYNC_FREQUENCY = int(
    os.environ.get("JIRA_PERMISSION_GROUP_SYNC_FREQUENCY") or 30 * 60
)


#####
# Google Drive
#####
GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY = int(
    os.environ.get("GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
)


#####
# GitHub
#####
# In seconds, default is 5 minutes
GITHUB_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("GITHUB_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
)
# In seconds, default is 5 minutes
GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY = int(
    os.environ.get("GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
)


#####
# Slack
#####
SLACK_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("SLACK_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
)

NUM_PERMISSION_WORKERS = int(os.environ.get("NUM_PERMISSION_WORKERS") or 2)


#####
# Teams
#####
# In seconds, default is 5 minutes
TEAMS_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("TEAMS_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
)

#####
# SharePoint
#####
# In seconds, default is 30 minutes
SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY") or 30 * 60
)

# In seconds, default is 5 minutes
SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY = int(
    os.environ.get("SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
)


####
# Celery Job Frequency
####
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS = float(
    os.environ.get("CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS") or 1
)  # float for easier testing


STRIPE_SECRET_KEY = os.environ.get("STRIPE_SECRET_KEY")

# JWT Public Key URL
JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)


# Super Users
SUPER_USERS = json.loads(os.environ.get("SUPER_USERS", "[]"))
SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")

POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY")
POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
POSTHOG_DEBUG_LOGS_ENABLED = (
    os.environ.get("POSTHOG_DEBUG_LOGS_ENABLED", "").lower() == "true"
)

MARKETING_POSTHOG_API_KEY = os.environ.get("MARKETING_POSTHOG_API_KEY")

HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")

GATED_TENANTS_KEY = "gated_tenants"

# License enforcement - when True, blocks API access for gated/expired licenses
LICENSE_ENFORCEMENT_ENABLED = (
    os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "true").lower() == "true"
)

# Cloud data plane URL - self-hosted instances call this to reach cloud proxy endpoints
# Used when MULTI_TENANT=false (self-hosted mode)
CLOUD_DATA_PLANE_URL = os.environ.get(
    "CLOUD_DATA_PLANE_URL", "https://cloud.onyx.app/api"
)


================================================
FILE: backend/ee/onyx/configs/license_enforcement_config.py
================================================
"""Constants for license enforcement.

This file is the single source of truth for:
1. Paths that bypass license enforcement (always accessible)
2. Paths that require an EE license (EE-only features)

Import these constants in both production code and tests to ensure consistency.
"""

# Paths that are ALWAYS accessible, even when license is expired/gated.
# These enable users to:
#   /auth - Log in/out (users can't fix billing if locked out of auth)
#   /license - Fetch, upload, or check license status
#   /health - Health checks for load balancers/orchestrators
#   /me - Basic user info needed for UI rendering
#   /settings, /enterprise-settings - View app status and branding
#   /billing - Unified billing API
#   /proxy - Self-hosted proxy endpoints (have own license-based auth)
#   /tenants/billing-* - Legacy billing endpoints (backwards compatibility)
#   /manage/users, /users - User management (needed for seat limit resolution)
#   /notifications - Needed for UI to load properly
LICENSE_ENFORCEMENT_ALLOWED_PREFIXES: frozenset[str] = frozenset(
    {
        "/auth",
        "/license",
        "/health",
        "/me",
        "/settings",
        "/enterprise-settings",
        # Billing endpoints (unified API for both MT and self-hosted)
        "/billing",
        "/admin/billing",
        # Proxy endpoints for self-hosted billing (no tenant context)
        "/proxy",
        # Legacy tenant billing endpoints (kept for backwards compatibility)
        "/tenants/billing-information",
        "/tenants/create-customer-portal-session",
        "/tenants/create-subscription-session",
        # User management - needed to remove users when seat limit exceeded
        "/manage/users",
        "/manage/admin/users",
        "/manage/admin/valid-domains",
        "/manage/admin/deactivate-user",
        "/manage/admin/delete-user",
        "/users",
        # Notifications - needed for UI to load properly
        "/notifications",
    }
)

# EE-only paths that require a valid license.
# Users without a license (community edition) cannot access these.
# These are blocked even when user has never subscribed (no license).
EE_ONLY_PATH_PREFIXES: frozenset[str] = frozenset(
    {
        # User groups and access control
        "/manage/admin/user-group",
        # Analytics and reporting
        "/analytics",
        # Query history (admin chat session endpoints)
        "/admin/chat-sessions",
        "/admin/chat-session-history",
        "/admin/query-history",
        # Usage reporting/export
        "/admin/usage-report",
        # Standard answers (canned responses)
        "/manage/admin/standard-answer",
        # Token rate limits
        "/admin/token-rate-limits",
        # Evals
        "/evals",
        # Hook extensions
        "/admin/hooks",
    }
)


================================================
FILE: backend/ee/onyx/connectors/perm_sync_valid.py
================================================
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.interfaces import BaseConnector


def validate_confluence_perm_sync(connector: ConfluenceConnector) -> None:
    """
    Validate that the connector is configured correctly for permissions syncing.
    """


def validate_drive_perm_sync(connector: GoogleDriveConnector) -> None:
    """
    Validate that the connector is configured correctly for permissions syncing.
    """


def validate_perm_sync(connector: BaseConnector) -> None:
    """
    Override this if your connector needs to validate permissions syncing.
    Raise an exception if invalid, otherwise do nothing.

    Default is a no-op (always successful).
    """
    if isinstance(connector, ConfluenceConnector):
        validate_confluence_perm_sync(connector)
    elif isinstance(connector, GoogleDriveConnector):
        validate_drive_perm_sync(connector)


================================================
FILE: backend/ee/onyx/db/__init__.py
================================================


================================================
FILE: backend/ee/onyx/db/analytics.py
================================================
import datetime
from collections.abc import Sequence
from uuid import UUID

from sqlalchemy import and_
from sqlalchemy import case
from sqlalchemy import cast
from sqlalchemy import Date
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.configs.constants import MessageType
from onyx.db.models import ChatMessage
from onyx.db.models import ChatMessageFeedback
from onyx.db.models import ChatSession
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.models import UserRole


def fetch_query_analytics(
    start: datetime.datetime,
    end: datetime.datetime,
    db_session: Session,
) -> Sequence[tuple[int, int, int, datetime.date]]:
    stmt = (
        select(
            func.count(ChatMessage.id),
            func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)),
            func.sum(
                case(
                    (ChatMessageFeedback.is_positive == False, 1),  # noqa: E712
                    else_=0,  # noqa: E712
                )
            ),
            cast(ChatMessage.time_sent, Date),
        )
        .join(
            ChatMessageFeedback,
            ChatMessageFeedback.chat_message_id == ChatMessage.id,
            isouter=True,
        )
        .where(
            ChatMessage.time_sent >= start,
        )
        .where(
            ChatMessage.time_sent <= end,
        )
        .where(ChatMessage.message_type == MessageType.ASSISTANT)
        .group_by(cast(ChatMessage.time_sent, Date))
        .order_by(cast(ChatMessage.time_sent, Date))
    )

    return db_session.execute(stmt).all()  # type: ignore


def fetch_per_user_query_analytics(
    start: datetime.datetime,
    end: datetime.datetime,
    db_session: Session,
) -> Sequence[tuple[int, int, int, datetime.date, UUID]]:
    stmt = (
        select(
            func.count(ChatMessage.id),
            func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)),
            func.sum(
                case(
                    (ChatMessageFeedback.is_positive == False, 1),  # noqa: E712
                    else_=0,  # noqa: E712
                )
            ),
            cast(ChatMessage.time_sent, Date),
            ChatSession.user_id,
        )
        .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id)
        # Include chats that have no explicit feedback instead of dropping them
        .join(
            ChatMessageFeedback,
            ChatMessageFeedback.chat_message_id == ChatMessage.id,
            isouter=True,
        )
        .where(
            ChatMessage.time_sent >= start,
        )
        .where(
            ChatMessage.time_sent <= end,
        )
        .where(ChatMessage.message_type == MessageType.ASSISTANT)
        .group_by(cast(ChatMessage.time_sent, Date), ChatSession.user_id)
        .order_by(cast(ChatMessage.time_sent, Date), ChatSession.user_id)
    )

    return db_session.execute(stmt).all()  # type: ignore


def fetch_onyxbot_analytics(
    start: datetime.datetime,
    end: datetime.datetime,
    db_session: Session,
) -> Sequence[tuple[int, int, datetime.date]]:
    """Gets the:
    Date of each set of aggregated statistics
    Number of OnyxBot Queries (Chat Sessions)
    Number of instances of Negative feedback OR Needing additional help
        (only counting the last feedback)
    """
    # Get every chat session in the time range which is a Onyxbot flow
    # along with the first Assistant message which is the response to the user question.
    # Generally there should not be more than one AI message per chat session of this type
    subquery_first_ai_response = (
        db_session.query(
            ChatMessage.chat_session_id.label("chat_session_id"),
            func.min(ChatMessage.id).label("chat_message_id"),
        )
        .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id)
        .where(
            ChatSession.time_created >= start,
            ChatSession.time_created <= end,
            ChatSession.onyxbot_flow.is_(True),
        )
        .where(
            ChatMessage.message_type == MessageType.ASSISTANT,
        )
        .group_by(ChatMessage.chat_session_id)
        .subquery()
    )

    # Get the chat message ids and most recent feedback for each of those chat messages,
    # not including the messages that have no feedback
    subquery_last_feedback = (
        db_session.query(
            ChatMessageFeedback.chat_message_id.label("chat_message_id"),
            func.max(ChatMessageFeedback.id).label("max_feedback_id"),
        )
        .group_by(ChatMessageFeedback.chat_message_id)
        .subquery()
    )

    results = (
        db_session.query(
            func.count(ChatSession.id).label("total_sessions"),
            # Need to explicitly specify this as False to handle the NULL case so the cases without
            # feedback aren't counted against Onyxbot
            func.sum(
                case(
                    (
                        or_(
                            ChatMessageFeedback.is_positive.is_(False),
                            ChatMessageFeedback.required_followup.is_(True),
                        ),
                        1,
                    ),
                    else_=0,
                )
            ).label("negative_answer"),
            cast(ChatSession.time_created, Date).label("session_date"),
        )
        .join(
            subquery_first_ai_response,
            ChatSession.id == subquery_first_ai_response.c.chat_session_id,
        )
        # Combine the chat sessions with latest feedback to get the latest feedback for the first AI
        # message of the chat session where the chat session is Onyxbot type and within the time
        # range specified. Left/outer join used here to ensure that if no feedback, a null is used
        # for the feedback id
        .outerjoin(
            subquery_last_feedback,
            subquery_first_ai_response.c.chat_message_id
            == subquery_last_feedback.c.chat_message_id,
        )
        # Join the actual feedback table to get the feedback info for the sums
        # Outer join because the "last feedback" may be null
        .outerjoin(
            ChatMessageFeedback,
            ChatMessageFeedback.id == subquery_last_feedback.c.max_feedback_id,
        )
        .group_by(cast(ChatSession.time_created, Date))
        .order_by(cast(ChatSession.time_created, Date))
        .all()
    )

    return [tuple(row) for row in results]


def fetch_persona_message_analytics(
    db_session: Session,
    persona_id: int,
    start: datetime.datetime,
    end: datetime.datetime,
) -> list[tuple[int, datetime.date]]:
    """Gets the daily message counts for a specific persona within the given time range."""
    query = (
        select(
            func.count(ChatMessage.id),
            cast(ChatMessage.time_sent, Date),
        )
        .join(
            ChatSession,
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
            ChatSession.persona_id == persona_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
        )
        .group_by(cast(ChatMessage.time_sent, Date))
        .order_by(cast(ChatMessage.time_sent, Date))
    )

    return [tuple(row) for row in db_session.execute(query).all()]


def fetch_persona_unique_users(
    db_session: Session,
    persona_id: int,
    start: datetime.datetime,
    end: datetime.datetime,
) -> list[tuple[int, datetime.date]]:
    """Gets the daily unique user counts for a specific persona within the given time range."""
    query = (
        select(
            func.count(func.distinct(ChatSession.user_id)),
            cast(ChatMessage.time_sent, Date),
        )
        .join(
            ChatSession,
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
            ChatSession.persona_id == persona_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
        )
        .group_by(cast(ChatMessage.time_sent, Date))
        .order_by(cast(ChatMessage.time_sent, Date))
    )

    return [tuple(row) for row in db_session.execute(query).all()]


def fetch_assistant_message_analytics(
    db_session: Session,
    assistant_id: int,
    start: datetime.datetime,
    end: datetime.datetime,
) -> list[tuple[int, datetime.date]]:
    """
    Gets the daily message counts for a specific assistant in the given time range.
    """
    query = (
        select(
            func.count(ChatMessage.id),
            cast(ChatMessage.time_sent, Date),
        )
        .join(
            ChatSession,
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
            ChatSession.persona_id == assistant_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
        )
        .group_by(cast(ChatMessage.time_sent, Date))
        .order_by(cast(ChatMessage.time_sent, Date))
    )

    return [tuple(row) for row in db_session.execute(query).all()]


def fetch_assistant_unique_users(
    db_session: Session,
    assistant_id: int,
    start: datetime.datetime,
    end: datetime.datetime,
) -> list[tuple[int, datetime.date]]:
    """
    Gets the daily unique user counts for a specific assistant in the given time range.
    """
    query = (
        select(
            func.count(func.distinct(ChatSession.user_id)),
            cast(ChatMessage.time_sent, Date),
        )
        .join(
            ChatSession,
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
            ChatSession.persona_id == assistant_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
        )
        .group_by(cast(ChatMessage.time_sent, Date))
        .order_by(cast(ChatMessage.time_sent, Date))
    )

    return [tuple(row) for row in db_session.execute(query).all()]


def fetch_assistant_unique_users_total(
    db_session: Session,
    assistant_id: int,
    start: datetime.datetime,
    end: datetime.datetime,
) -> int:
    """
    Gets the total number of distinct users who have sent or received messages from
    the specified assistant in the given time range.
    """
    query = (
        select(func.count(func.distinct(ChatSession.user_id)))
        .select_from(ChatMessage)
        .join(
            ChatSession,
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
            ChatSession.persona_id == assistant_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
        )
    )

    result = db_session.execute(query).scalar()
    return result if result else 0


# Users can view assistant stats if they created the persona,
# or if they are an admin
def user_can_view_assistant_stats(
    db_session: Session, user: User, assistant_id: int
) -> bool:
    if user.role == UserRole.ADMIN:
        return True

    # Check if the user created the persona
    stmt = select(Persona).where(
        and_(Persona.id == assistant_id, Persona.user_id == user.id)
    )

    persona = db_session.execute(stmt).scalar_one_or_none()
    return persona is not None


================================================
FILE: backend/ee/onyx/db/connector.py
================================================
from sqlalchemy import distinct
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.db.models import Connector
from onyx.utils.logger import setup_logger

logger = setup_logger()


def fetch_sources_with_connectors(db_session: Session) -> list[DocumentSource]:
    sources = db_session.query(distinct(Connector.source)).all()  # type: ignore

    document_sources = [source[0] for source in sources]

    return document_sources


================================================
FILE: backend/ee/onyx/db/connector_credential_pair.py
================================================
from sqlalchemy import delete
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.db.connector_credential_pair import get_connector_credential_pair
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import UserGroup__ConnectorCredentialPair
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _delete_connector_credential_pair_user_groups_relationship__no_commit(
    db_session: Session, connector_id: int, credential_id: int
) -> None:
    cc_pair = get_connector_credential_pair(
        db_session=db_session,
        connector_id=connector_id,
        credential_id=credential_id,
    )
    if cc_pair is None:
        raise ValueError(
            f"ConnectorCredentialPair with connector_id: {connector_id} and credential_id: {credential_id} not found"
        )

    stmt = delete(UserGroup__ConnectorCredentialPair).where(
        UserGroup__ConnectorCredentialPair.cc_pair_id == cc_pair.id,
    )
    db_session.execute(stmt)


def get_cc_pairs_by_source(
    db_session: Session,
    source_type: DocumentSource,
    access_type: AccessType | None = None,
    status: ConnectorCredentialPairStatus | None = None,
) -> list[ConnectorCredentialPair]:
    """
    Get all cc_pairs for a given source type with optional filtering by access_type and status
    result is sorted by cc_pair id
    """
    query = (
        db_session.query(ConnectorCredentialPair)
        .join(ConnectorCredentialPair.connector)
        .filter(Connector.source == source_type)
        .order_by(ConnectorCredentialPair.id)
    )

    if access_type is not None:
        query = query.filter(ConnectorCredentialPair.access_type == access_type)

    if status is not None:
        query = query.filter(ConnectorCredentialPair.status == status)

    cc_pairs = query.all()
    return cc_pairs


def get_all_auto_sync_cc_pairs(
    db_session: Session,
) -> list[ConnectorCredentialPair]:
    return (
        db_session.query(ConnectorCredentialPair)
        .where(
            ConnectorCredentialPair.access_type == AccessType.SYNC,
        )
        .all()
    )


================================================
FILE: backend/ee/onyx/db/document.py
================================================
from datetime import datetime
from datetime import timezone

from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.db.models import Document as DbDocument


def upsert_document_external_perms__no_commit(
    db_session: Session,
    doc_id: str,
    external_access: ExternalAccess,
    source_type: DocumentSource,
) -> None:
    """
    This sets the permissions for a document in postgres.
    NOTE: this will replace any existing external access, it will not do a union
    """
    document = db_session.scalars(
        select(DbDocument).where(DbDocument.id == doc_id)
    ).first()

    prefixed_external_groups = [
        build_ext_group_name_for_onyx(
            ext_group_name=group_id,
            source=source_type,
        )
        for group_id in external_access.external_user_group_ids
    ]

    if not document:
        # If the document does not exist, still store the external access
        # So that if the document is added later, the external access is already stored
        document = DbDocument(
            id=doc_id,
            semantic_id="",
            external_user_emails=external_access.external_user_emails,
            external_user_group_ids=prefixed_external_groups,
            is_public=external_access.is_public,
        )
        db_session.add(document)
        return

    document.external_user_emails = list(external_access.external_user_emails)
    document.external_user_group_ids = prefixed_external_groups
    document.is_public = external_access.is_public


def upsert_document_external_perms(
    db_session: Session,
    doc_id: str,
    external_access: ExternalAccess,
    source_type: DocumentSource,
) -> bool:
    """
    This sets the permissions for a document in postgres. Returns True if the
    a new document was created, False otherwise.
    NOTE: this will replace any existing external access, it will not do a union
    """
    document = db_session.scalars(
        select(DbDocument).where(DbDocument.id == doc_id)
    ).first()

    prefixed_external_groups: set[str] = {
        build_ext_group_name_for_onyx(
            ext_group_name=group_id,
            source=source_type,
        )
        for group_id in external_access.external_user_group_ids
    }

    if not document:
        # If the document does not exist, still store the external access
        # So that if the document is added later, the external access is already stored
        # The upsert function in the indexing pipeline does not overwrite the permissions fields
        document = DbDocument(
            id=doc_id,
            semantic_id="",
            external_user_emails=external_access.external_user_emails,
            external_user_group_ids=prefixed_external_groups,
            is_public=external_access.is_public,
        )
        db_session.add(document)
        db_session.commit()
        return True

    # If the document exists, we need to check if the external access has changed
    if (
        external_access.external_user_emails != set(document.external_user_emails or [])
        or prefixed_external_groups != set(document.external_user_group_ids or [])
        or external_access.is_public != document.is_public
    ):
        document.external_user_emails = list(external_access.external_user_emails)
        document.external_user_group_ids = list(prefixed_external_groups)
        document.is_public = external_access.is_public
        document.last_modified = datetime.now(timezone.utc)
        db_session.commit()

    return False


================================================
FILE: backend/ee/onyx/db/document_set.py
================================================
from uuid import UUID

from sqlalchemy.orm import Session

from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import DocumentSet
from onyx.db.models import DocumentSet__ConnectorCredentialPair
from onyx.db.models import DocumentSet__User
from onyx.db.models import DocumentSet__UserGroup
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup


def make_doc_set_private(
    document_set_id: int,
    user_ids: list[UUID] | None,
    group_ids: list[int] | None,
    db_session: Session,
) -> None:
    db_session.query(DocumentSet__User).filter(
        DocumentSet__User.document_set_id == document_set_id
    ).delete(synchronize_session="fetch")
    db_session.query(DocumentSet__UserGroup).filter(
        DocumentSet__UserGroup.document_set_id == document_set_id
    ).delete(synchronize_session="fetch")

    if user_ids:
        for user_uuid in user_ids:
            db_session.add(
                DocumentSet__User(document_set_id=document_set_id, user_id=user_uuid)
            )

    if group_ids:
        for group_id in group_ids:
            db_session.add(
                DocumentSet__UserGroup(
                    document_set_id=document_set_id, user_group_id=group_id
                )
            )


def delete_document_set_privacy__no_commit(
    document_set_id: int, db_session: Session
) -> None:
    db_session.query(DocumentSet__User).filter(
        DocumentSet__User.document_set_id == document_set_id
    ).delete(synchronize_session="fetch")

    db_session.query(DocumentSet__UserGroup).filter(
        DocumentSet__UserGroup.document_set_id == document_set_id
    ).delete(synchronize_session="fetch")


def fetch_document_sets(
    user_id: UUID | None,
    db_session: Session,
    include_outdated: bool = True,  # Parameter only for versioned implementation, unused  # noqa: ARG001
) -> list[tuple[DocumentSet, list[ConnectorCredentialPair]]]:
    assert user_id is not None

    # Public document sets
    public_document_sets = (
        db_session.query(DocumentSet)
        .filter(DocumentSet.is_public == True)  # noqa
        .all()
    )

    # Document sets via shared user relationships
    shared_document_sets = (
        db_session.query(DocumentSet)
        .join(DocumentSet__User, DocumentSet.id == DocumentSet__User.document_set_id)
        .filter(DocumentSet__User.user_id == user_id)
        .all()
    )

    # Document sets via groups
    # First, find the user groups the user belongs to
    user_groups = (
        db_session.query(UserGroup)
        .join(User__UserGroup, UserGroup.id == User__UserGroup.user_group_id)
        .filter(User__UserGroup.user_id == user_id)
        .all()
    )

    group_document_sets = []
    for group in user_groups:
        group_document_sets.extend(
            db_session.query(DocumentSet)
            .join(
                DocumentSet__UserGroup,
                DocumentSet.id == DocumentSet__UserGroup.document_set_id,
            )
            .filter(DocumentSet__UserGroup.user_group_id == group.id)
            .all()
        )

    # Combine and deduplicate document sets from all sources
    all_document_sets = list(
        set(public_document_sets + shared_document_sets + group_document_sets)
    )

    document_set_with_cc_pairs: list[
        tuple[DocumentSet, list[ConnectorCredentialPair]]
    ] = []

    for document_set in all_document_sets:
        # Fetch the associated ConnectorCredentialPairs
        cc_pairs = (
            db_session.query(ConnectorCredentialPair)
            .join(
                DocumentSet__ConnectorCredentialPair,
                ConnectorCredentialPair.id
                == DocumentSet__ConnectorCredentialPair.connector_credential_pair_id,
            )
            .filter(
                DocumentSet__ConnectorCredentialPair.document_set_id == document_set.id,
            )
            .all()
        )

        document_set_with_cc_pairs.append((document_set, cc_pairs))

    return document_set_with_cc_pairs


================================================
FILE: backend/ee/onyx/db/external_perm.py
================================================
from collections.abc import Sequence
from uuid import UUID

from pydantic import BaseModel
from sqlalchemy import delete
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import Session

from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.db.models import PublicExternalUserGroup
from onyx.db.models import User
from onyx.db.models import User__ExternalUserGroupId
from onyx.db.users import batch_add_ext_perm_user_if_not_exists
from onyx.db.users import get_user_by_email
from onyx.utils.logger import setup_logger

logger = setup_logger()


class ExternalUserGroup(BaseModel):
    id: str
    user_emails: list[str]
    # `True` for cases like a Folder in Google Drive that give domain-wide
    # or "Anyone with link" access to all files in the folder.
    # if this is set, `user_emails` don't really matter.
    # When this is `True`, this `ExternalUserGroup` object doesn't really represent
    # an actual "group" in the source.
    gives_anyone_access: bool = False


def delete_user__ext_group_for_user__no_commit(
    db_session: Session,
    user_id: UUID,
) -> None:
    db_session.execute(
        delete(User__ExternalUserGroupId).where(
            User__ExternalUserGroupId.user_id == user_id
        )
    )


def delete_user__ext_group_for_cc_pair__no_commit(
    db_session: Session,
    cc_pair_id: int,
) -> None:
    db_session.execute(
        delete(User__ExternalUserGroupId).where(
            User__ExternalUserGroupId.cc_pair_id == cc_pair_id
        )
    )


def delete_public_external_group_for_cc_pair__no_commit(
    db_session: Session,
    cc_pair_id: int,
) -> None:
    db_session.execute(
        delete(PublicExternalUserGroup).where(
            PublicExternalUserGroup.cc_pair_id == cc_pair_id
        )
    )


def mark_old_external_groups_as_stale(
    db_session: Session,
    cc_pair_id: int,
) -> None:
    db_session.execute(
        update(User__ExternalUserGroupId)
        .where(User__ExternalUserGroupId.cc_pair_id == cc_pair_id)
        .values(stale=True)
    )
    db_session.execute(
        update(PublicExternalUserGroup)
        .where(PublicExternalUserGroup.cc_pair_id == cc_pair_id)
        .values(stale=True)
    )


def upsert_external_groups(
    db_session: Session,
    cc_pair_id: int,
    external_groups: list[ExternalUserGroup],
    source: DocumentSource,
) -> None:
    """
    Performs a true upsert operation for external user groups:
    - For existing groups (same user_id, external_user_group_id, cc_pair_id), updates the stale flag to False
    - For new groups, inserts them with stale=False
    - For public groups, uses upsert logic as well
    """
    # If there are no groups to add, return early
    if not external_groups:
        return

    # collect all emails from all groups to batch add all users at once for efficiency
    all_group_member_emails = set()
    for external_group in external_groups:
        for user_email in external_group.user_emails:
            all_group_member_emails.add(user_email)

    # batch add users if they don't exist and get their ids
    all_group_members: list[User] = batch_add_ext_perm_user_if_not_exists(
        db_session=db_session,
        # NOTE: this function handles case sensitivity for emails
        emails=list(all_group_member_emails),
    )

    # map emails to ids
    email_id_map = {user.email.lower(): user.id for user in all_group_members}

    # Process each external group
    for external_group in external_groups:
        external_group_id = build_ext_group_name_for_onyx(
            ext_group_name=external_group.id,
            source=source,
        )

        # Handle user-group mappings
        for user_email in external_group.user_emails:
            user_id = email_id_map.get(user_email.lower())
            if user_id is None:
                logger.warning(
                    f"User in group {external_group.id} with email {user_email} not found"
                )
                continue

            # Check if the user-group mapping already exists
            existing_user_group = db_session.scalar(
                select(User__ExternalUserGroupId).where(
                    User__ExternalUserGroupId.user_id == user_id,
                    User__ExternalUserGroupId.external_user_group_id
                    == external_group_id,
                    User__ExternalUserGroupId.cc_pair_id == cc_pair_id,
                )
            )

            if existing_user_group:
                # Update existing record
                existing_user_group.stale = False
            else:
                # Insert new record
                new_user_group = User__ExternalUserGroupId(
                    user_id=user_id,
                    external_user_group_id=external_group_id,
                    cc_pair_id=cc_pair_id,
                    stale=False,
                )
                db_session.add(new_user_group)

        # Handle public group if needed
        if external_group.gives_anyone_access:
            # Check if the public group already exists
            existing_public_group = db_session.scalar(
                select(PublicExternalUserGroup).where(
                    PublicExternalUserGroup.external_user_group_id == external_group_id,
                    PublicExternalUserGroup.cc_pair_id == cc_pair_id,
                )
            )

            if existing_public_group:
                # Update existing record
                existing_public_group.stale = False
            else:
                # Insert new record
                new_public_group = PublicExternalUserGroup(
                    external_user_group_id=external_group_id,
                    cc_pair_id=cc_pair_id,
                    stale=False,
                )
                db_session.add(new_public_group)

    db_session.commit()


def remove_stale_external_groups(
    db_session: Session,
    cc_pair_id: int,
) -> None:
    db_session.execute(
        delete(User__ExternalUserGroupId).where(
            User__ExternalUserGroupId.cc_pair_id == cc_pair_id,
            User__ExternalUserGroupId.stale.is_(True),
        )
    )
    db_session.execute(
        delete(PublicExternalUserGroup).where(
            PublicExternalUserGroup.cc_pair_id == cc_pair_id,
            PublicExternalUserGroup.stale.is_(True),
        )
    )
    db_session.commit()


def fetch_external_groups_for_user(
    db_session: Session,
    user_id: UUID,
) -> Sequence[User__ExternalUserGroupId]:
    return db_session.scalars(
        select(User__ExternalUserGroupId).where(
            User__ExternalUserGroupId.user_id == user_id
        )
    ).all()


def fetch_external_groups_for_user_email_and_group_ids(
    db_session: Session,
    user_email: str,
    group_ids: list[str],
) -> list[User__ExternalUserGroupId]:
    user = get_user_by_email(db_session=db_session, email=user_email)
    if user is None:
        return []
    user_id = user.id
    user_ext_groups = db_session.scalars(
        select(User__ExternalUserGroupId).where(
            User__ExternalUserGroupId.user_id == user_id,
            User__ExternalUserGroupId.external_user_group_id.in_(group_ids),
        )
    ).all()
    return list(user_ext_groups)


def fetch_public_external_group_ids(
    db_session: Session,
) -> list[str]:
    return list(
        db_session.scalars(select(PublicExternalUserGroup.external_user_group_id)).all()
    )


================================================
FILE: backend/ee/onyx/db/hierarchy.py
================================================
"""EE version of hierarchy node access control.

This module provides permission-aware hierarchy node access for Enterprise Edition.
It filters hierarchy nodes based on user email and external group membership.
"""

from sqlalchemy import any_
from sqlalchemy import cast
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy import String
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session
from sqlalchemy.sql.elements import ColumnElement

from onyx.configs.constants import DocumentSource
from onyx.db.models import HierarchyNode


def _build_hierarchy_access_filter(
    user_email: str,
    external_group_ids: list[str],
) -> ColumnElement[bool]:
    """Build SQLAlchemy filter for hierarchy node access.

    A user can access a hierarchy node if any of the following are true:
    - The node is marked as public (is_public=True)
    - The user's email is in the node's external_user_emails list
    - Any of the user's external group IDs overlap with the node's external_user_group_ids
    """
    access_filters: list[ColumnElement[bool]] = [HierarchyNode.is_public.is_(True)]
    if user_email:
        access_filters.append(any_(HierarchyNode.external_user_emails) == user_email)
    if external_group_ids:
        access_filters.append(
            HierarchyNode.external_user_group_ids.overlap(
                cast(postgresql.array(external_group_ids), postgresql.ARRAY(String))
            )
        )
    return or_(*access_filters)


def _get_accessible_hierarchy_nodes_for_source(
    db_session: Session,
    source: DocumentSource,
    user_email: str,
    external_group_ids: list[str],
) -> list[HierarchyNode]:
    """
    EE version: Returns hierarchy nodes filtered by user permissions.

    A user can access a hierarchy node if any of the following are true:
    - The node is marked as public (is_public=True)
    - The user's email is in the node's external_user_emails list
    - Any of the user's external group IDs overlap with the node's external_user_group_ids

    Args:
        db_session: SQLAlchemy session
        source: Document source type
        user_email: User's email for permission checking
        external_group_ids: User's external group IDs for permission checking

    Returns:
        List of HierarchyNode objects the user has access to
    """
    stmt = select(HierarchyNode).where(HierarchyNode.source == source)
    stmt = stmt.where(_build_hierarchy_access_filter(user_email, external_group_ids))
    stmt = stmt.order_by(HierarchyNode.display_name)
    return list(db_session.execute(stmt).scalars().all())


================================================
FILE: backend/ee/onyx/db/license.py
================================================
"""Database and cache operations for the license table."""

from datetime import datetime
from typing import NamedTuple

from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import Session

from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import LicenseSource
from onyx.auth.schemas import UserRole
from onyx.cache.factory import get_cache_backend
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.db.models import License
from onyx.db.models import User
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

LICENSE_METADATA_KEY = "license:metadata"
LICENSE_CACHE_TTL_SECONDS = 86400  # 24 hours


class SeatAvailabilityResult(NamedTuple):
    """Result of a seat availability check."""

    available: bool
    error_message: str | None = None


# -----------------------------------------------------------------------------
# Database CRUD Operations
# -----------------------------------------------------------------------------


def get_license(db_session: Session) -> License | None:
    """
    Get the current license (singleton pattern - only one row).

    Args:
        db_session: Database session

    Returns:
        License object if exists, None otherwise
    """
    return db_session.execute(select(License)).scalars().first()


def upsert_license(db_session: Session, license_data: str) -> License:
    """
    Insert or update the license (singleton pattern).

    Args:
        db_session: Database session
        license_data: Base64-encoded signed license blob

    Returns:
        The created or updated License object
    """
    existing = get_license(db_session)

    if existing:
        existing.license_data = license_data
        db_session.commit()
        db_session.refresh(existing)
        logger.info("License updated")
        return existing

    new_license = License(license_data=license_data)
    db_session.add(new_license)
    db_session.commit()
    db_session.refresh(new_license)
    logger.info("License created")
    return new_license


def delete_license(db_session: Session) -> bool:
    """
    Delete the current license.

    Args:
        db_session: Database session

    Returns:
        True if deleted, False if no license existed
    """
    existing = get_license(db_session)
    if existing:
        db_session.delete(existing)
        db_session.commit()
        logger.info("License deleted")
        return True
    return False


# -----------------------------------------------------------------------------
# Seat Counting
# -----------------------------------------------------------------------------


def get_used_seats(tenant_id: str | None = None) -> int:
    """
    Get current seat usage directly from database.

    For multi-tenant: counts users in UserTenantMapping for this tenant.
    For self-hosted: counts all active users (excludes EXT_PERM_USER role
    and the anonymous system user).

    TODO: Exclude API key dummy users from seat counting. API keys create
    users with emails like `__DANSWER_API_KEY_*` that should not count toward
    seat limits. See: https://linear.app/onyx-app/issue/ENG-3518
    """
    if MULTI_TENANT:
        from ee.onyx.server.tenants.user_mapping import get_tenant_count

        return get_tenant_count(tenant_id or get_current_tenant_id())
    else:
        from onyx.db.engine.sql_engine import get_session_with_current_tenant

        with get_session_with_current_tenant() as db_session:
            result = db_session.execute(
                select(func.count())
                .select_from(User)
                .where(
                    User.is_active == True,  # type: ignore  # noqa: E712
                    User.role != UserRole.EXT_PERM_USER,
                    User.email != ANONYMOUS_USER_EMAIL,  # type: ignore
                )
            )
            return result.scalar() or 0


# -----------------------------------------------------------------------------
# Redis Cache Operations
# -----------------------------------------------------------------------------


def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata | None:
    """
    Get license metadata from cache.

    Args:
        tenant_id: Tenant ID (for multi-tenant deployments)

    Returns:
        LicenseMetadata if cached, None otherwise
    """
    cache = get_cache_backend(tenant_id=tenant_id)
    cached = cache.get(LICENSE_METADATA_KEY)
    if not cached:
        return None

    try:
        cached_str = (
            cached.decode("utf-8") if isinstance(cached, bytes) else str(cached)
        )
        return LicenseMetadata.model_validate_json(cached_str)
    except Exception as e:
        logger.warning(f"Failed to parse cached license metadata: {e}")
        return None


def invalidate_license_cache(tenant_id: str | None = None) -> None:
    """
    Invalidate the license metadata cache (not the license itself).

    Deletes the cached LicenseMetadata. The actual license in the database
    is not affected. Delete is idempotent — if the key doesn't exist, this
    is a no-op.

    Args:
        tenant_id: Tenant ID (for multi-tenant deployments)
    """
    cache = get_cache_backend(tenant_id=tenant_id)
    cache.delete(LICENSE_METADATA_KEY)
    logger.info("License cache invalidated")


def update_license_cache(
    payload: LicensePayload,
    source: LicenseSource | None = None,
    grace_period_end: datetime | None = None,
    tenant_id: str | None = None,
) -> LicenseMetadata:
    """
    Update the cache with license metadata.

    We cache all license statuses (ACTIVE, GRACE_PERIOD, GATED_ACCESS) because:
    1. Frontend needs status to show appropriate UI/banners
    2. Caching avoids repeated DB + crypto verification on every request
    3. Status enforcement happens at the feature level, not here

    Args:
        payload: Verified license payload
        source: How the license was obtained
        grace_period_end: Optional grace period end time
        tenant_id: Tenant ID (for multi-tenant deployments)

    Returns:
        The cached LicenseMetadata
    """
    from ee.onyx.utils.license import get_license_status

    tenant = tenant_id or get_current_tenant_id()
    cache = get_cache_backend(tenant_id=tenant_id)

    used_seats = get_used_seats(tenant)
    status = get_license_status(payload, grace_period_end)

    metadata = LicenseMetadata(
        tenant_id=payload.tenant_id,
        organization_name=payload.organization_name,
        seats=payload.seats,
        used_seats=used_seats,
        plan_type=payload.plan_type,
        issued_at=payload.issued_at,
        expires_at=payload.expires_at,
        grace_period_end=grace_period_end,
        status=status,
        source=source,
        stripe_subscription_id=payload.stripe_subscription_id,
    )

    cache.set(
        LICENSE_METADATA_KEY,
        metadata.model_dump_json(),
        ex=LICENSE_CACHE_TTL_SECONDS,
    )

    logger.info(f"License cache updated: {metadata.seats} seats, status={status.value}")
    return metadata


def refresh_license_cache(
    db_session: Session,
    tenant_id: str | None = None,
) -> LicenseMetadata | None:
    """
    Refresh the license cache from the database.

    Args:
        db_session: Database session
        tenant_id: Tenant ID (for multi-tenant deployments)

    Returns:
        LicenseMetadata if license exists, None otherwise
    """
    from ee.onyx.utils.license import verify_license_signature

    license_record = get_license(db_session)
    if not license_record:
        invalidate_license_cache(tenant_id)
        return None

    try:
        payload = verify_license_signature(license_record.license_data)
        # Derive source from payload: manual licenses lack stripe_customer_id
        source: LicenseSource = (
            LicenseSource.AUTO_FETCH
            if payload.stripe_customer_id
            else LicenseSource.MANUAL_UPLOAD
        )
        return update_license_cache(
            payload,
            source=source,
            tenant_id=tenant_id,
        )
    except ValueError as e:
        logger.error(f"Failed to verify license during cache refresh: {e}")
        invalidate_license_cache(tenant_id)
        return None


def get_license_metadata(
    db_session: Session,
    tenant_id: str | None = None,
) -> LicenseMetadata | None:
    """
    Get license metadata, using cache if available.

    Args:
        db_session: Database session
        tenant_id: Tenant ID (for multi-tenant deployments)

    Returns:
        LicenseMetadata if license exists, None otherwise
    """
    # Try cache first
    cached = get_cached_license_metadata(tenant_id)
    if cached:
        return cached

    # Refresh from database
    return refresh_license_cache(db_session, tenant_id)


def check_seat_availability(
    db_session: Session,
    seats_needed: int = 1,
    tenant_id: str | None = None,
) -> SeatAvailabilityResult:
    """
    Check if there are enough seats available to add users.

    Args:
        db_session: Database session
        seats_needed: Number of seats needed (default 1)
        tenant_id: Tenant ID (for multi-tenant deployments)

    Returns:
        SeatAvailabilityResult with available=True if seats are available,
        or available=False with error_message if limit would be exceeded.
        Returns available=True if no license exists (self-hosted = unlimited).
    """
    metadata = get_license_metadata(db_session, tenant_id)

    # No license = no enforcement (self-hosted without license)
    if metadata is None:
        return SeatAvailabilityResult(available=True)

    # Calculate current usage directly from DB (not cache) for accuracy
    current_used = get_used_seats(tenant_id)
    total_seats = metadata.seats

    # Use > (not >=) to allow filling to exactly 100% capacity
    would_exceed_limit = current_used + seats_needed > total_seats
    if would_exceed_limit:
        return SeatAvailabilityResult(
            available=False,
            error_message=f"Seat limit would be exceeded: {current_used} of {total_seats} seats used, "
            f"cannot add {seats_needed} more user(s).",
        )

    return SeatAvailabilityResult(available=True)


================================================
FILE: backend/ee/onyx/db/persona.py
================================================
from uuid import UUID

from sqlalchemy.orm import Session

from onyx.configs.constants import NotificationType
from onyx.db.models import Persona
from onyx.db.models import Persona__User
from onyx.db.models import Persona__UserGroup
from onyx.db.notification import create_notification
from onyx.db.persona import mark_persona_user_files_for_sync
from onyx.server.features.persona.models import PersonaSharedNotificationData


def update_persona_access(
    persona_id: int,
    creator_user_id: UUID | None,
    db_session: Session,
    is_public: bool | None = None,
    user_ids: list[UUID] | None = None,
    group_ids: list[int] | None = None,
) -> None:
    """Updates the access settings for a persona including public status, user shares,
    and group shares.

    NOTE: This function batches all updates. If we don't dedupe the inputs,
    the commit will exception.

    NOTE: Callers are responsible for committing."""

    needs_sync = False
    if is_public is not None:
        needs_sync = True
        persona = db_session.query(Persona).filter(Persona.id == persona_id).first()
        if persona:
            persona.is_public = is_public

    # NOTE: For user-ids and group-ids, `None` means "leave unchanged", `[]` means "clear all shares",
    # and a non-empty list means "replace with these shares".

    if user_ids is not None:
        needs_sync = True
        db_session.query(Persona__User).filter(
            Persona__User.persona_id == persona_id
        ).delete(synchronize_session="fetch")

        user_ids_set = set(user_ids)
        for user_id in user_ids_set:
            db_session.add(Persona__User(persona_id=persona_id, user_id=user_id))
            if user_id != creator_user_id:
                create_notification(
                    user_id=user_id,
                    notif_type=NotificationType.PERSONA_SHARED,
                    title="A new agent was shared with you!",
                    db_session=db_session,
                    additional_data=PersonaSharedNotificationData(
                        persona_id=persona_id,
                    ).model_dump(),
                )

    if group_ids is not None:
        needs_sync = True
        db_session.query(Persona__UserGroup).filter(
            Persona__UserGroup.persona_id == persona_id
        ).delete(synchronize_session="fetch")

        group_ids_set = set(group_ids)
        for group_id in group_ids_set:
            db_session.add(
                Persona__UserGroup(persona_id=persona_id, user_group_id=group_id)
            )

    # When sharing changes, user file ACLs need to be updated in the vector DB
    if needs_sync:
        mark_persona_user_files_for_sync(persona_id, db_session)


================================================
FILE: backend/ee/onyx/db/query_history.py
================================================
from collections.abc import Sequence
from datetime import datetime

from sqlalchemy import asc
from sqlalchemy import BinaryExpression
from sqlalchemy import ColumnElement
from sqlalchemy import desc
from sqlalchemy import distinct
from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session
from sqlalchemy.sql import case
from sqlalchemy.sql import func
from sqlalchemy.sql import select
from sqlalchemy.sql.expression import literal
from sqlalchemy.sql.expression import UnaryExpression

from ee.onyx.background.task_name_builders import QUERY_HISTORY_TASK_NAME_PREFIX
from onyx.configs.constants import QAFeedbackType
from onyx.db.models import ChatMessage
from onyx.db.models import ChatMessageFeedback
from onyx.db.models import ChatSession
from onyx.db.models import TaskQueueState
from onyx.db.tasks import get_all_tasks_with_prefix


def _build_filter_conditions(
    start_time: datetime | None,
    end_time: datetime | None,
    feedback_filter: QAFeedbackType | None,
) -> list[ColumnElement]:
    """
    Helper function to build all filter conditions for chat sessions.
    Filters by start and end time, feedback type, and any sessions without messages.
    start_time: Date from which to filter
    end_time: Date to which to filter
    feedback_filter: Feedback type to filter by
    Returns: List of filter conditions
    """
    conditions = []

    if start_time is not None:
        conditions.append(ChatSession.time_created >= start_time)
    if end_time is not None:
        conditions.append(ChatSession.time_created <= end_time)

    if feedback_filter is not None:
        feedback_subq = (
            select(ChatMessage.chat_session_id)
            .join(ChatMessageFeedback)
            .group_by(ChatMessage.chat_session_id)
            .having(
                case(
                    (
                        case(
                            {literal(feedback_filter == QAFeedbackType.LIKE): True},
                            else_=False,
                        ),
                        func.bool_and(ChatMessageFeedback.is_positive),
                    ),
                    (
                        case(
                            {literal(feedback_filter == QAFeedbackType.DISLIKE): True},
                            else_=False,
                        ),
                        func.bool_and(func.not_(ChatMessageFeedback.is_positive)),
                    ),
                    else_=func.bool_or(ChatMessageFeedback.is_positive)
                    & func.bool_or(func.not_(ChatMessageFeedback.is_positive)),
                )
            )
        )
        conditions.append(ChatSession.id.in_(feedback_subq))

    return conditions


def get_total_filtered_chat_sessions_count(
    db_session: Session,
    start_time: datetime | None,
    end_time: datetime | None,
    feedback_filter: QAFeedbackType | None,
) -> int:
    conditions = _build_filter_conditions(start_time, end_time, feedback_filter)
    stmt = (
        select(func.count(distinct(ChatSession.id)))
        .select_from(ChatSession)
        .filter(*conditions)
    )
    return db_session.scalar(stmt) or 0


def get_page_of_chat_sessions(
    start_time: datetime | None,
    end_time: datetime | None,
    db_session: Session,
    page_num: int,
    page_size: int,
    feedback_filter: QAFeedbackType | None = None,
) -> Sequence[ChatSession]:
    conditions = _build_filter_conditions(start_time, end_time, feedback_filter)

    subquery = (
        select(ChatSession.id)
        .filter(*conditions)
        .order_by(desc(ChatSession.time_created), ChatSession.id)
        .limit(page_size)
        .offset(page_num * page_size)
        .subquery()
    )

    stmt = (
        select(ChatSession)
        .join(subquery, ChatSession.id == subquery.c.id)
        .outerjoin(ChatMessage, ChatSession.id == ChatMessage.chat_session_id)
        .options(
            joinedload(ChatSession.user),
            joinedload(ChatSession.persona),
            contains_eager(ChatSession.messages).joinedload(
                ChatMessage.chat_message_feedbacks
            ),
        )
        .order_by(
            desc(ChatSession.time_created),
            ChatSession.id,
            asc(ChatMessage.id),  # Ensure chronological message order
        )
    )

    return db_session.scalars(stmt).unique().all()


def fetch_chat_sessions_eagerly_by_time(
    start: datetime,
    end: datetime,
    db_session: Session,
    limit: int | None = 500,
    initial_time: datetime | None = None,
) -> list[ChatSession]:
    """Sorted by oldest to newest, then by message id"""

    asc_time_order: UnaryExpression = asc(ChatSession.time_created)
    message_order: UnaryExpression = asc(ChatMessage.id)

    filters: list[ColumnElement | BinaryExpression] = [
        ChatSession.time_created.between(start, end)
    ]

    if initial_time:
        filters.append(ChatSession.time_created > initial_time)

    subquery = (
        db_session.query(ChatSession.id, ChatSession.time_created)
        .filter(*filters)
        .order_by(asc_time_order)
        .limit(limit)
        .subquery()
    )

    query = (
        db_session.query(ChatSession)
        .join(subquery, ChatSession.id == subquery.c.id)
        .outerjoin(ChatMessage, ChatSession.id == ChatMessage.chat_session_id)
        .options(
            joinedload(ChatSession.user),
            joinedload(ChatSession.persona),
            contains_eager(ChatSession.messages).joinedload(
                ChatMessage.chat_message_feedbacks
            ),
        )
        .order_by(asc_time_order, message_order)
    )

    chat_sessions = query.all()

    return chat_sessions


def get_all_query_history_export_tasks(
    db_session: Session,
) -> list[TaskQueueState]:
    return get_all_tasks_with_prefix(db_session, QUERY_HISTORY_TASK_NAME_PREFIX)


================================================
FILE: backend/ee/onyx/db/saml.py
================================================
import datetime
from typing import cast
from uuid import UUID

from sqlalchemy import and_
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS
from onyx.db.models import SamlAccount


def upsert_saml_account(
    user_id: UUID,
    cookie: str,
    db_session: Session,
    expiration_offset: int = SESSION_EXPIRE_TIME_SECONDS,
) -> datetime.datetime:
    expires_at = func.now() + datetime.timedelta(seconds=expiration_offset)

    existing_saml_acc = (
        db_session.query(SamlAccount)
        .filter(SamlAccount.user_id == user_id)
        .one_or_none()
    )

    if existing_saml_acc:
        existing_saml_acc.encrypted_cookie = cookie
        existing_saml_acc.expires_at = cast(datetime.datetime, expires_at)
        existing_saml_acc.updated_at = func.now()
        saml_acc = existing_saml_acc
    else:
        saml_acc = SamlAccount(
            user_id=user_id,
            encrypted_cookie=cookie,
            expires_at=expires_at,
        )
        db_session.add(saml_acc)

    db_session.commit()

    return saml_acc.expires_at


async def get_saml_account(
    cookie: str, async_db_session: AsyncSession
) -> SamlAccount | None:
    """NOTE: this is async, since it's used during auth
    (which is necessarily async due to FastAPI Users)"""
    stmt = (
        select(SamlAccount)
        .options(selectinload(SamlAccount.user))  # Use selectinload for collections
        .where(
            and_(
                SamlAccount.encrypted_cookie == cookie,
                SamlAccount.expires_at > func.now(),
            )
        )
    )

    result = await async_db_session.execute(stmt)
    return result.scalars().unique().one_or_none()


async def expire_saml_account(
    saml_account: SamlAccount, async_db_session: AsyncSession
) -> None:
    saml_account.expires_at = func.now()
    await async_db_session.commit()


================================================
FILE: backend/ee/onyx/db/scim.py
================================================
"""SCIM Data Access Layer.

All database operations for SCIM provisioning — token management, user
mappings, and group mappings. Extends the base DAL (see ``onyx.db.dal``).

Usage from FastAPI::

    def get_scim_dal(db_session: Session = Depends(get_session)) -> ScimDAL:
        return ScimDAL(db_session)

    @router.post("/tokens")
    def create_token(dal: ScimDAL = Depends(get_scim_dal)) -> ...:
        token = dal.create_token(name=..., hashed_token=..., ...)
        dal.commit()
        return token

Usage from background tasks::

    with ScimDAL.from_tenant("tenant_abc") as dal:
        mapping = dal.create_user_mapping(external_id="idp-123", user_id=uid)
        dal.commit()
"""

from __future__ import annotations

from uuid import UUID

from sqlalchemy import delete as sa_delete
from sqlalchemy import func
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import SQLColumnExpression
from sqlalchemy.dialects.postgresql import insert as pg_insert

from ee.onyx.server.scim.filtering import ScimFilter
from ee.onyx.server.scim.filtering import ScimFilterOperator
from ee.onyx.server.scim.models import ScimMappingFields
from onyx.db.dal import DAL
from onyx.db.enums import AccountType
from onyx.db.enums import GrantSource
from onyx.db.enums import Permission
from onyx.db.models import PermissionGrant
from onyx.db.models import ScimGroupMapping
from onyx.db.models import ScimToken
from onyx.db.models import ScimUserMapping
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.utils.logger import setup_logger

logger = setup_logger()


class ScimDAL(DAL):
    """Data Access Layer for SCIM provisioning operations.

    Methods mutate but do NOT commit — call ``dal.commit()`` explicitly
    when you want to persist changes. This follows the existing ``_no_commit``
    convention and lets callers batch multiple operations into one transaction.
    """

    # ------------------------------------------------------------------
    # Token operations
    # ------------------------------------------------------------------

    def create_token(
        self,
        name: str,
        hashed_token: str,
        token_display: str,
        created_by_id: UUID,
    ) -> ScimToken:
        """Create a new SCIM bearer token.

        Only one token is active at a time — this method automatically revokes
        all existing active tokens before creating the new one.
        """
        # Revoke any currently active tokens
        active_tokens = list(
            self._session.scalars(
                select(ScimToken).where(ScimToken.is_active.is_(True))
            ).all()
        )
        for t in active_tokens:
            t.is_active = False

        token = ScimToken(
            name=name,
            hashed_token=hashed_token,
            token_display=token_display,
            created_by_id=created_by_id,
        )
        self._session.add(token)
        self._session.flush()
        return token

    def get_active_token(self) -> ScimToken | None:
        """Return the single currently active token, or None."""
        return self._session.scalar(
            select(ScimToken).where(ScimToken.is_active.is_(True))
        )

    def get_token_by_hash(self, hashed_token: str) -> ScimToken | None:
        """Look up a token by its SHA-256 hash."""
        return self._session.scalar(
            select(ScimToken).where(ScimToken.hashed_token == hashed_token)
        )

    def revoke_token(self, token_id: int) -> None:
        """Deactivate a token by ID.

        Raises:
            ValueError: If the token does not exist.
        """
        token = self._session.get(ScimToken, token_id)
        if not token:
            raise ValueError(f"SCIM token with id {token_id} not found")
        token.is_active = False

    def update_token_last_used(self, token_id: int) -> None:
        """Update the last_used_at timestamp for a token."""
        token = self._session.get(ScimToken, token_id)
        if token:
            token.last_used_at = func.now()  # type: ignore[assignment]

    # ------------------------------------------------------------------
    # User mapping operations
    # ------------------------------------------------------------------

    def create_user_mapping(
        self,
        external_id: str | None,
        user_id: UUID,
        scim_username: str | None = None,
        fields: ScimMappingFields | None = None,
    ) -> ScimUserMapping:
        """Create a SCIM mapping for a user.

        ``external_id`` may be ``None`` when the IdP omits it (RFC 7643
        allows this). The mapping still marks the user as SCIM-managed.
        """
        f = fields or ScimMappingFields()
        mapping = ScimUserMapping(
            external_id=external_id,
            user_id=user_id,
            scim_username=scim_username,
            department=f.department,
            manager=f.manager,
            given_name=f.given_name,
            family_name=f.family_name,
            scim_emails_json=f.scim_emails_json,
        )
        self._session.add(mapping)
        self._session.flush()
        return mapping

    def get_user_mapping_by_external_id(
        self, external_id: str
    ) -> ScimUserMapping | None:
        """Look up a user mapping by the IdP's external identifier."""
        return self._session.scalar(
            select(ScimUserMapping).where(ScimUserMapping.external_id == external_id)
        )

    def get_user_mapping_by_user_id(self, user_id: UUID) -> ScimUserMapping | None:
        """Look up a user mapping by the Onyx user ID."""
        return self._session.scalar(
            select(ScimUserMapping).where(ScimUserMapping.user_id == user_id)
        )

    def list_user_mappings(
        self,
        start_index: int = 1,
        count: int = 100,
    ) -> tuple[list[ScimUserMapping], int]:
        """List user mappings with SCIM-style pagination.

        Args:
            start_index: 1-based start index (SCIM convention).
            count: Maximum number of results to return.

        Returns:
            A tuple of (mappings, total_count).
        """
        total = (
            self._session.scalar(select(func.count()).select_from(ScimUserMapping)) or 0
        )

        offset = max(start_index - 1, 0)
        mappings = list(
            self._session.scalars(
                select(ScimUserMapping)
                .order_by(ScimUserMapping.id)
                .offset(offset)
                .limit(count)
            ).all()
        )

        return mappings, total

    def update_user_mapping_external_id(
        self,
        mapping_id: int,
        external_id: str,
    ) -> ScimUserMapping:
        """Update the external ID on a user mapping.

        Raises:
            ValueError: If the mapping does not exist.
        """
        mapping = self._session.get(ScimUserMapping, mapping_id)
        if not mapping:
            raise ValueError(f"SCIM user mapping with id {mapping_id} not found")
        mapping.external_id = external_id
        return mapping

    def delete_user_mapping(self, mapping_id: int) -> None:
        """Delete a user mapping by ID. No-op if already deleted."""
        mapping = self._session.get(ScimUserMapping, mapping_id)
        if not mapping:
            logger.warning("SCIM user mapping %d not found during delete", mapping_id)
            return
        self._session.delete(mapping)

    # ------------------------------------------------------------------
    # User query operations
    # ------------------------------------------------------------------

    def get_user(self, user_id: UUID) -> User | None:
        """Fetch a user by ID."""
        return self._session.scalar(
            select(User).where(User.id == user_id)  # type: ignore[arg-type]
        )

    def get_user_by_email(self, email: str) -> User | None:
        """Fetch a user by email (case-insensitive)."""
        return self._session.scalar(
            select(User).where(func.lower(User.email) == func.lower(email))
        )

    def add_user(self, user: User) -> None:
        """Add a new user to the session and flush to assign an ID."""
        self._session.add(user)
        self._session.flush()

    def update_user(
        self,
        user: User,
        *,
        email: str | None = None,
        is_active: bool | None = None,
        personal_name: str | None = None,
    ) -> None:
        """Update user attributes. Only sets fields that are provided."""
        if email is not None:
            user.email = email
        if is_active is not None:
            user.is_active = is_active
        if personal_name is not None:
            user.personal_name = personal_name

    def deactivate_user(self, user: User) -> None:
        """Mark a user as inactive."""
        user.is_active = False

    def list_users(
        self,
        scim_filter: ScimFilter | None,
        start_index: int = 1,
        count: int = 100,
    ) -> tuple[list[tuple[User, ScimUserMapping | None]], int]:
        """Query users with optional SCIM filter and pagination.

        Returns:
            A tuple of (list of (user, mapping) pairs, total_count).

        Raises:
            ValueError: If the filter uses an unsupported attribute.
        """
        # Inner-join with ScimUserMapping so only SCIM-managed users appear.
        # Pre-existing system accounts (anonymous, admin, etc.) are excluded
        # unless they were explicitly linked via SCIM provisioning.
        query = (
            select(User)
            .join(ScimUserMapping, ScimUserMapping.user_id == User.id)
            .where(
                User.account_type.notin_([AccountType.BOT, AccountType.EXT_PERM_USER])
            )
        )

        if scim_filter:
            attr = scim_filter.attribute.lower()
            if attr == "username":
                # arg-type: fastapi-users types User.email as str, not a column expression
                # assignment: union return type widens but query is still Select[tuple[User]]
                query = _apply_scim_string_op(query, User.email, scim_filter)  # type: ignore[arg-type, assignment]
            elif attr == "active":
                query = query.where(
                    User.is_active.is_(scim_filter.value.lower() == "true")  # type: ignore[attr-defined]
                )
            elif attr == "externalid":
                mapping = self.get_user_mapping_by_external_id(scim_filter.value)
                if not mapping:
                    return [], 0
                query = query.where(User.id == mapping.user_id)  # type: ignore[arg-type]
            else:
                raise ValueError(
                    f"Unsupported filter attribute: {scim_filter.attribute}"
                )

        # Count total matching rows first, then paginate. SCIM uses 1-based
        # indexing (RFC 7644 §3.4.2), so we convert to a 0-based offset.
        total = (
            self._session.scalar(select(func.count()).select_from(query.subquery()))
            or 0
        )

        offset = max(start_index - 1, 0)
        users = list(
            self._session.scalars(
                query.order_by(User.id).offset(offset).limit(count)  # type: ignore[arg-type]
            )
            .unique()
            .all()
        )

        # Batch-fetch SCIM mappings to avoid N+1 queries
        mapping_map = self._get_user_mappings_batch([u.id for u in users])
        return [(u, mapping_map.get(u.id)) for u in users], total

    def sync_user_external_id(
        self,
        user_id: UUID,
        new_external_id: str | None,
        scim_username: str | None = None,
        fields: ScimMappingFields | None = None,
    ) -> None:
        """Sync the SCIM mapping for a user.

        If a mapping already exists, its fields are updated (including
        setting ``external_id`` to ``None`` when the IdP omits it).
        If no mapping exists and ``new_external_id`` is provided, a new
        mapping is created.  A mapping is never deleted here — SCIM-managed
        users must retain their mapping to remain visible in ``GET /Users``.

        When *fields* is provided, all mapping fields are written
        unconditionally — including ``None`` values — so that a caller can
        clear a previously-set field (e.g. removing a department).
        """
        mapping = self.get_user_mapping_by_user_id(user_id)
        if mapping:
            if mapping.external_id != new_external_id:
                mapping.external_id = new_external_id
            if scim_username is not None:
                mapping.scim_username = scim_username
            if fields is not None:
                mapping.department = fields.department
                mapping.manager = fields.manager
                mapping.given_name = fields.given_name
                mapping.family_name = fields.family_name
                mapping.scim_emails_json = fields.scim_emails_json
        elif new_external_id:
            self.create_user_mapping(
                external_id=new_external_id,
                user_id=user_id,
                scim_username=scim_username,
                fields=fields,
            )

    def _get_user_mappings_batch(
        self, user_ids: list[UUID]
    ) -> dict[UUID, ScimUserMapping]:
        """Batch-fetch SCIM user mappings keyed by user ID."""
        if not user_ids:
            return {}
        mappings = self._session.scalars(
            select(ScimUserMapping).where(ScimUserMapping.user_id.in_(user_ids))
        ).all()
        return {m.user_id: m for m in mappings}

    def get_user_groups(self, user_id: UUID) -> list[tuple[int, str]]:
        """Get groups a user belongs to as ``(group_id, group_name)`` pairs.

        Excludes groups marked for deletion.
        """
        rels = self._session.scalars(
            select(User__UserGroup).where(User__UserGroup.user_id == user_id)
        ).all()

        group_ids = [r.user_group_id for r in rels]
        if not group_ids:
            return []

        groups = self._session.scalars(
            select(UserGroup).where(
                UserGroup.id.in_(group_ids),
                UserGroup.is_up_for_deletion.is_(False),
            )
        ).all()
        return [(g.id, g.name) for g in groups]

    def get_users_groups_batch(
        self, user_ids: list[UUID]
    ) -> dict[UUID, list[tuple[int, str]]]:
        """Batch-fetch group memberships for multiple users.

        Returns a mapping of ``user_id → [(group_id, group_name), ...]``.
        Avoids N+1 queries when building user list responses.
        """
        if not user_ids:
            return {}

        rels = self._session.scalars(
            select(User__UserGroup).where(User__UserGroup.user_id.in_(user_ids))
        ).all()

        group_ids = list({r.user_group_id for r in rels})
        if not group_ids:
            return {}

        groups = self._session.scalars(
            select(UserGroup).where(
                UserGroup.id.in_(group_ids),
                UserGroup.is_up_for_deletion.is_(False),
            )
        ).all()
        groups_by_id = {g.id: g.name for g in groups}

        result: dict[UUID, list[tuple[int, str]]] = {}
        for r in rels:
            if r.user_id and r.user_group_id in groups_by_id:
                result.setdefault(r.user_id, []).append(
                    (r.user_group_id, groups_by_id[r.user_group_id])
                )
        return result

    # ------------------------------------------------------------------
    # Group mapping operations
    # ------------------------------------------------------------------

    def create_group_mapping(
        self,
        external_id: str,
        user_group_id: int,
    ) -> ScimGroupMapping:
        """Create a mapping between a SCIM externalId and an Onyx user group."""
        mapping = ScimGroupMapping(external_id=external_id, user_group_id=user_group_id)
        self._session.add(mapping)
        self._session.flush()
        return mapping

    def get_group_mapping_by_external_id(
        self, external_id: str
    ) -> ScimGroupMapping | None:
        """Look up a group mapping by the IdP's external identifier."""
        return self._session.scalar(
            select(ScimGroupMapping).where(ScimGroupMapping.external_id == external_id)
        )

    def get_group_mapping_by_group_id(
        self, user_group_id: int
    ) -> ScimGroupMapping | None:
        """Look up a group mapping by the Onyx user group ID."""
        return self._session.scalar(
            select(ScimGroupMapping).where(
                ScimGroupMapping.user_group_id == user_group_id
            )
        )

    def list_group_mappings(
        self,
        start_index: int = 1,
        count: int = 100,
    ) -> tuple[list[ScimGroupMapping], int]:
        """List group mappings with SCIM-style pagination.

        Args:
            start_index: 1-based start index (SCIM convention).
            count: Maximum number of results to return.

        Returns:
            A tuple of (mappings, total_count).
        """
        total = (
            self._session.scalar(select(func.count()).select_from(ScimGroupMapping))
            or 0
        )

        offset = max(start_index - 1, 0)
        mappings = list(
            self._session.scalars(
                select(ScimGroupMapping)
                .order_by(ScimGroupMapping.id)
                .offset(offset)
                .limit(count)
            ).all()
        )

        return mappings, total

    def delete_group_mapping(self, mapping_id: int) -> None:
        """Delete a group mapping by ID. No-op if already deleted."""
        mapping = self._session.get(ScimGroupMapping, mapping_id)
        if not mapping:
            logger.warning("SCIM group mapping %d not found during delete", mapping_id)
            return
        self._session.delete(mapping)

    # ------------------------------------------------------------------
    # Group query operations
    # ------------------------------------------------------------------

    def get_group(self, group_id: int) -> UserGroup | None:
        """Fetch a group by ID, returning None if deleted or missing."""
        group = self._session.get(UserGroup, group_id)
        if group and group.is_up_for_deletion:
            return None
        return group

    def get_group_by_name(self, name: str) -> UserGroup | None:
        """Fetch a group by exact name."""
        return self._session.scalar(select(UserGroup).where(UserGroup.name == name))

    def add_group(self, group: UserGroup) -> None:
        """Add a new group to the session and flush to assign an ID."""
        self._session.add(group)
        self._session.flush()

    def add_permission_grant_to_group(
        self,
        group_id: int,
        permission: Permission,
        grant_source: GrantSource,
    ) -> None:
        """Grant a permission to a group and flush."""
        self._session.add(
            PermissionGrant(
                group_id=group_id,
                permission=permission,
                grant_source=grant_source,
            )
        )
        self._session.flush()

    def update_group(
        self,
        group: UserGroup,
        *,
        name: str | None = None,
    ) -> None:
        """Update group attributes and set the modification timestamp."""
        if name is not None:
            group.name = name
        group.time_last_modified_by_user = func.now()

    def delete_group(self, group: UserGroup) -> None:
        """Delete a group from the session."""
        self._session.delete(group)

    def list_groups(
        self,
        scim_filter: ScimFilter | None,
        start_index: int = 1,
        count: int = 100,
    ) -> tuple[list[tuple[UserGroup, str | None]], int]:
        """Query groups with optional SCIM filter and pagination.

        Returns:
            A tuple of (list of (group, external_id) pairs, total_count).

        Raises:
            ValueError: If the filter uses an unsupported attribute.
        """
        query = select(UserGroup).where(UserGroup.is_up_for_deletion.is_(False))

        if scim_filter:
            attr = scim_filter.attribute.lower()
            if attr == "displayname":
                # assignment: union return type widens but query is still Select[tuple[UserGroup]]
                query = _apply_scim_string_op(query, UserGroup.name, scim_filter)  # type: ignore[assignment]
            elif attr == "externalid":
                mapping = self.get_group_mapping_by_external_id(scim_filter.value)
                if not mapping:
                    return [], 0
                query = query.where(UserGroup.id == mapping.user_group_id)
            else:
                raise ValueError(
                    f"Unsupported filter attribute: {scim_filter.attribute}"
                )

        total = (
            self._session.scalar(select(func.count()).select_from(query.subquery()))
            or 0
        )

        offset = max(start_index - 1, 0)
        groups = list(
            self._session.scalars(
                query.order_by(UserGroup.id).offset(offset).limit(count)
            ).all()
        )

        ext_id_map = self._get_group_external_ids([g.id for g in groups])
        return [(g, ext_id_map.get(g.id)) for g in groups], total

    def get_group_members(self, group_id: int) -> list[tuple[UUID, str | None]]:
        """Get group members as (user_id, email) pairs."""
        rels = self._session.scalars(
            select(User__UserGroup).where(User__UserGroup.user_group_id == group_id)
        ).all()

        user_ids = [r.user_id for r in rels if r.user_id]
        if not user_ids:
            return []

        users = (
            self._session.scalars(
                select(User).where(User.id.in_(user_ids))  # type: ignore[attr-defined]
            )
            .unique()
            .all()
        )
        users_by_id = {u.id: u for u in users}

        return [
            (
                r.user_id,
                users_by_id[r.user_id].email if r.user_id in users_by_id else None,
            )
            for r in rels
            if r.user_id
        ]

    def validate_member_ids(self, uuids: list[UUID]) -> list[UUID]:
        """Return the subset of UUIDs that don't exist as users.

        Returns an empty list if all IDs are valid.
        """
        if not uuids:
            return []
        existing_users = (
            self._session.scalars(
                select(User).where(User.id.in_(uuids))  # type: ignore[attr-defined]
            )
            .unique()
            .all()
        )
        existing_ids = {u.id for u in existing_users}
        return [uid for uid in uuids if uid not in existing_ids]

    def upsert_group_members(self, group_id: int, user_ids: list[UUID]) -> None:
        """Add user-group relationships, ignoring duplicates."""
        if not user_ids:
            return
        self._session.execute(
            pg_insert(User__UserGroup)
            .values([{"user_id": uid, "user_group_id": group_id} for uid in user_ids])
            .on_conflict_do_nothing(
                index_elements=[
                    User__UserGroup.user_group_id,
                    User__UserGroup.user_id,
                ]
            )
        )

    def replace_group_members(self, group_id: int, user_ids: list[UUID]) -> None:
        """Replace all members of a group."""
        self._session.execute(
            sa_delete(User__UserGroup).where(User__UserGroup.user_group_id == group_id)
        )
        self.upsert_group_members(group_id, user_ids)

    def remove_group_members(self, group_id: int, user_ids: list[UUID]) -> None:
        """Remove specific members from a group."""
        if not user_ids:
            return
        self._session.execute(
            sa_delete(User__UserGroup).where(
                User__UserGroup.user_group_id == group_id,
                User__UserGroup.user_id.in_(user_ids),
            )
        )

    def delete_group_with_members(self, group: UserGroup) -> None:
        """Remove all member relationships and delete the group."""
        self._session.execute(
            sa_delete(User__UserGroup).where(User__UserGroup.user_group_id == group.id)
        )
        self._session.delete(group)

    def sync_group_external_id(
        self, group_id: int, new_external_id: str | None
    ) -> None:
        """Create, update, or delete the external ID mapping for a group."""
        mapping = self.get_group_mapping_by_group_id(group_id)
        if new_external_id:
            if mapping:
                if mapping.external_id != new_external_id:
                    mapping.external_id = new_external_id
            else:
                self.create_group_mapping(
                    external_id=new_external_id, user_group_id=group_id
                )
        elif mapping:
            self.delete_group_mapping(mapping.id)

    def _get_group_external_ids(self, group_ids: list[int]) -> dict[int, str]:
        """Batch-fetch external IDs for a list of group IDs."""
        if not group_ids:
            return {}
        mappings = self._session.scalars(
            select(ScimGroupMapping).where(
                ScimGroupMapping.user_group_id.in_(group_ids)
            )
        ).all()
        return {m.user_group_id: m.external_id for m in mappings}


# ---------------------------------------------------------------------------
# Module-level helpers (used by DAL methods above)
# ---------------------------------------------------------------------------


def _apply_scim_string_op(
    query: Select[tuple[User]] | Select[tuple[UserGroup]],
    column: SQLColumnExpression[str],
    scim_filter: ScimFilter,
) -> Select[tuple[User]] | Select[tuple[UserGroup]]:
    """Apply a SCIM string filter operator using SQLAlchemy column operators.

    Handles eq (case-insensitive exact), co (contains), and sw (starts with).
    SQLAlchemy's operators handle LIKE-pattern escaping internally.
    """
    val = scim_filter.value
    if scim_filter.operator == ScimFilterOperator.EQUAL:
        return query.where(func.lower(column) == val.lower())
    elif scim_filter.operator == ScimFilterOperator.CONTAINS:
        return query.where(column.icontains(val, autoescape=True))
    elif scim_filter.operator == ScimFilterOperator.STARTS_WITH:
        return query.where(column.istartswith(val, autoescape=True))
    else:
        raise ValueError(f"Unsupported string filter operator: {scim_filter.operator}")


================================================
FILE: backend/ee/onyx/db/search.py
================================================
import uuid
from datetime import timedelta
from uuid import UUID

from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.models import SearchQuery


def create_search_query(
    db_session: Session,
    user_id: UUID,
    query: str,
    query_expansions: list[str] | None = None,
) -> SearchQuery:
    """Create and persist a `SearchQuery` row.

    Notes:
    - `SearchQuery.id` is a UUID PK without a server-side default, so we generate it.
    - `created_at` is filled by the DB (server_default=now()).
    """
    search_query = SearchQuery(
        id=uuid.uuid4(),
        user_id=user_id,
        query=query,
        query_expansions=query_expansions,
    )
    db_session.add(search_query)
    db_session.commit()
    db_session.refresh(search_query)
    return search_query


def fetch_search_queries_for_user(
    db_session: Session,
    user_id: UUID,
    filter_days: int | None = None,
    limit: int | None = None,
) -> list[SearchQuery]:
    """Fetch `SearchQuery` rows for a user.

    Args:
        user_id: User UUID.
        filter_days: Optional time filter. If provided, only rows created within
            the last `filter_days` days are returned.
        limit: Optional max number of rows to return.
    """
    if filter_days is not None and filter_days <= 0:
        raise ValueError("filter_days must be > 0")

    stmt = select(SearchQuery).where(SearchQuery.user_id == user_id)

    if filter_days is not None and filter_days > 0:
        cutoff = get_db_current_time(db_session) - timedelta(days=filter_days)
        stmt = stmt.where(SearchQuery.created_at >= cutoff)

    stmt = stmt.order_by(SearchQuery.created_at.desc())

    if limit is not None:
        stmt = stmt.limit(limit)

    return list(db_session.scalars(stmt).all())


================================================
FILE: backend/ee/onyx/db/standard_answer.py
================================================
import re
import string
from collections.abc import Sequence

from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.models import StandardAnswer
from onyx.db.models import StandardAnswerCategory
from onyx.utils.logger import setup_logger

logger = setup_logger()


def check_category_validity(category_name: str) -> bool:
    """If a category name is too long, it should not be used (it will cause an error in Postgres
    as the unique constraint can only apply to entries that are less than 2704 bytes).

    Additionally, extremely long categories are not really usable / useful."""
    if len(category_name) > 255:
        logger.error(
            f"Category with name '{category_name}' is too long, cannot be used"
        )
        return False

    return True


def insert_standard_answer_category(
    category_name: str, db_session: Session
) -> StandardAnswerCategory:
    if not check_category_validity(category_name):
        raise ValueError(f"Invalid category name: {category_name}")
    standard_answer_category = StandardAnswerCategory(name=category_name)
    db_session.add(standard_answer_category)
    db_session.commit()

    return standard_answer_category


def insert_standard_answer(
    keyword: str,
    answer: str,
    category_ids: list[int],
    match_regex: bool,
    match_any_keywords: bool,
    db_session: Session,
) -> StandardAnswer:
    existing_categories = fetch_standard_answer_categories_by_ids(
        standard_answer_category_ids=category_ids,
        db_session=db_session,
    )
    if len(existing_categories) != len(category_ids):
        raise ValueError(f"Some or all categories with ids {category_ids} do not exist")

    standard_answer = StandardAnswer(
        keyword=keyword,
        answer=answer,
        categories=existing_categories,
        active=True,
        match_regex=match_regex,
        match_any_keywords=match_any_keywords,
    )
    db_session.add(standard_answer)
    db_session.commit()
    return standard_answer


def update_standard_answer(
    standard_answer_id: int,
    keyword: str,
    answer: str,
    category_ids: list[int],
    match_regex: bool,
    match_any_keywords: bool,
    db_session: Session,
) -> StandardAnswer:
    standard_answer = db_session.scalar(
        select(StandardAnswer).where(StandardAnswer.id == standard_answer_id)
    )
    if standard_answer is None:
        raise ValueError(f"No standard answer with id {standard_answer_id}")

    existing_categories = fetch_standard_answer_categories_by_ids(
        standard_answer_category_ids=category_ids,
        db_session=db_session,
    )
    if len(existing_categories) != len(category_ids):
        raise ValueError(f"Some or all categories with ids {category_ids} do not exist")

    standard_answer.keyword = keyword
    standard_answer.answer = answer
    standard_answer.categories = list(existing_categories)
    standard_answer.match_regex = match_regex
    standard_answer.match_any_keywords = match_any_keywords

    db_session.commit()

    return standard_answer


def remove_standard_answer(
    standard_answer_id: int,
    db_session: Session,
) -> None:
    standard_answer = db_session.scalar(
        select(StandardAnswer).where(StandardAnswer.id == standard_answer_id)
    )
    if standard_answer is None:
        raise ValueError(f"No standard answer with id {standard_answer_id}")

    standard_answer.active = False
    db_session.commit()


def update_standard_answer_category(
    standard_answer_category_id: int,
    category_name: str,
    db_session: Session,
) -> StandardAnswerCategory:
    standard_answer_category = db_session.scalar(
        select(StandardAnswerCategory).where(
            StandardAnswerCategory.id == standard_answer_category_id
        )
    )
    if standard_answer_category is None:
        raise ValueError(
            f"No standard answer category with id {standard_answer_category_id}"
        )

    if not check_category_validity(category_name):
        raise ValueError(f"Invalid category name: {category_name}")

    standard_answer_category.name = category_name

    db_session.commit()

    return standard_answer_category


def fetch_standard_answer_category(
    standard_answer_category_id: int,
    db_session: Session,
) -> StandardAnswerCategory | None:
    return db_session.scalar(
        select(StandardAnswerCategory).where(
            StandardAnswerCategory.id == standard_answer_category_id
        )
    )


def fetch_standard_answer_categories_by_ids(
    standard_answer_category_ids: list[int],
    db_session: Session,
) -> Sequence[StandardAnswerCategory]:
    return db_session.scalars(
        select(StandardAnswerCategory).where(
            StandardAnswerCategory.id.in_(standard_answer_category_ids)
        )
    ).all()


def fetch_standard_answer_categories(
    db_session: Session,
) -> Sequence[StandardAnswerCategory]:
    return db_session.scalars(select(StandardAnswerCategory)).all()


def fetch_standard_answer(
    standard_answer_id: int,
    db_session: Session,
) -> StandardAnswer | None:
    return db_session.scalar(
        select(StandardAnswer).where(StandardAnswer.id == standard_answer_id)
    )


def fetch_standard_answers(db_session: Session) -> Sequence[StandardAnswer]:
    return db_session.scalars(
        select(StandardAnswer).where(StandardAnswer.active.is_(True))
    ).all()


def create_initial_default_standard_answer_category(db_session: Session) -> None:
    default_category_id = 0
    default_category_name = "General"
    default_category = fetch_standard_answer_category(
        standard_answer_category_id=default_category_id,
        db_session=db_session,
    )
    if default_category is not None:
        if default_category.name != default_category_name:
            raise ValueError(
                "DB is not in a valid initial state. Default standard answer category does not have expected name."
            )
        return

    standard_answer_category = StandardAnswerCategory(
        id=default_category_id,
        name=default_category_name,
    )
    db_session.add(standard_answer_category)
    db_session.commit()


def fetch_standard_answer_categories_by_names(
    standard_answer_category_names: list[str],
    db_session: Session,
) -> Sequence[StandardAnswerCategory]:
    return db_session.scalars(
        select(StandardAnswerCategory).where(
            StandardAnswerCategory.name.in_(standard_answer_category_names)
        )
    ).all()


def find_matching_standard_answers(
    id_in: list[int],
    query: str,
    db_session: Session,
) -> list[tuple[StandardAnswer, str]]:
    """
    Returns a list of tuples, where each tuple is a StandardAnswer definition matching
    the query and a string representing the match (either the regex match group or the
    set of keywords).

    If `answer_instance.match_regex` is true, the definition is considered "matched"
    if the query matches the `answer_instance.keyword` using `re.search`.

    Otherwise, the definition is considered "matched" if the space-delimited tokens
    in `keyword` exists in `query`, depending on the state of `match_any_keywords`
    """
    stmt = (
        select(StandardAnswer)
        .where(StandardAnswer.active.is_(True))
        .where(StandardAnswer.id.in_(id_in))
    )
    possible_standard_answers: Sequence[StandardAnswer] = db_session.scalars(stmt).all()

    matching_standard_answers: list[tuple[StandardAnswer, str]] = []
    for standard_answer in possible_standard_answers:
        if standard_answer.match_regex:
            maybe_matches = re.search(standard_answer.keyword, query, re.IGNORECASE)
            if maybe_matches is not None:
                match_group = maybe_matches.group(0)
                matching_standard_answers.append((standard_answer, match_group))

        else:
            # Remove punctuation and split the keyword into individual words
            keyword_words = set(
                "".join(
                    char
                    for char in standard_answer.keyword.lower()
                    if char not in string.punctuation
                ).split()
            )

            # Remove punctuation and split the query into individual words
            query_words = "".join(
                char for char in query.lower() if char not in string.punctuation
            ).split()

            # Check if all of the keyword words are in the query words
            if standard_answer.match_any_keywords:
                for word in query_words:
                    if word in keyword_words:
                        matching_standard_answers.append((standard_answer, word))
                        break
            else:
                if all(word in query_words for word in keyword_words):
                    matching_standard_answers.append(
                        (
                            standard_answer,
                            re.sub(r"\s+?", ", ", standard_answer.keyword),
                        )
                    )

    return matching_standard_answers


================================================
FILE: backend/ee/onyx/db/token_limit.py
================================================
from collections.abc import Sequence

from sqlalchemy import exists
from sqlalchemy import Row
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session

from onyx.configs.constants import TokenRateLimitScope
from onyx.db.models import TokenRateLimit
from onyx.db.models import TokenRateLimit__UserGroup
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.db.models import UserRole
from onyx.server.token_rate_limits.models import TokenRateLimitArgs


def _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Select:
    if user.role == UserRole.ADMIN:
        return stmt

    # If anonymous user, only show global/public token_rate_limits
    if user.is_anonymous:
        where_clause = TokenRateLimit.scope == TokenRateLimitScope.GLOBAL
        return stmt.where(where_clause)

    stmt = stmt.distinct()
    TRLimit_UG = aliased(TokenRateLimit__UserGroup)
    User__UG = aliased(User__UserGroup)

    """
    Here we select token_rate_limits by relation:
    User -> User__UserGroup -> TokenRateLimit__UserGroup ->
    TokenRateLimit
    """
    stmt = stmt.outerjoin(TRLimit_UG).outerjoin(
        User__UG,
        User__UG.user_group_id == TRLimit_UG.user_group_id,
    )

    """
    Filter token_rate_limits by:
    - if the user is in the user_group that owns the token_rate_limit
    - if the user is not a global_curator, they must also have a curator relationship
    to the user_group
    - if editing is being done, we also filter out token_rate_limits that are owned by groups
    that the user isn't a curator for
    - if we are not editing, we show all token_rate_limits in the groups the user curates
    """

    where_clause = User__UG.user_id == user.id
    if user.role == UserRole.CURATOR and get_editable:
        where_clause &= User__UG.is_curator == True  # noqa: E712
    if get_editable:
        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)
        if user.role == UserRole.CURATOR:
            user_groups = user_groups.where(
                User__UserGroup.is_curator == True  # noqa: E712
            )
        where_clause &= (
            ~exists()
            .where(TRLimit_UG.rate_limit_id == TokenRateLimit.id)
            .where(~TRLimit_UG.user_group_id.in_(user_groups))
            .correlate(TokenRateLimit)
        )

    return stmt.where(where_clause)


def fetch_all_user_group_token_rate_limits_by_group(
    db_session: Session,
) -> Sequence[Row[tuple[TokenRateLimit, str]]]:
    query = (
        select(TokenRateLimit, UserGroup.name)
        .join(
            TokenRateLimit__UserGroup,
            TokenRateLimit.id == TokenRateLimit__UserGroup.rate_limit_id,
        )
        .join(UserGroup, UserGroup.id == TokenRateLimit__UserGroup.user_group_id)
    )

    return db_session.execute(query).all()


def insert_user_group_token_rate_limit(
    db_session: Session,
    token_rate_limit_settings: TokenRateLimitArgs,
    group_id: int,
) -> TokenRateLimit:
    token_limit = TokenRateLimit(
        enabled=token_rate_limit_settings.enabled,
        token_budget=token_rate_limit_settings.token_budget,
        period_hours=token_rate_limit_settings.period_hours,
        scope=TokenRateLimitScope.USER_GROUP,
    )
    db_session.add(token_limit)
    db_session.flush()

    rate_limit = TokenRateLimit__UserGroup(
        rate_limit_id=token_limit.id, user_group_id=group_id
    )
    db_session.add(rate_limit)
    db_session.commit()

    return token_limit


def fetch_user_group_token_rate_limits_for_user(
    db_session: Session,
    group_id: int,
    user: User,
    enabled_only: bool = False,
    ordered: bool = True,
    get_editable: bool = True,
) -> Sequence[TokenRateLimit]:
    stmt = (
        select(TokenRateLimit)
        .join(
            TokenRateLimit__UserGroup,
            TokenRateLimit.id == TokenRateLimit__UserGroup.rate_limit_id,
        )
        .where(TokenRateLimit__UserGroup.user_group_id == group_id)
    )
    stmt = _add_user_filters(stmt, user, get_editable)

    if enabled_only:
        stmt = stmt.where(TokenRateLimit.enabled.is_(True))

    if ordered:
        stmt = stmt.order_by(TokenRateLimit.created_at.desc())

    return db_session.scalars(stmt).all()


================================================
FILE: backend/ee/onyx/db/usage_export.py
================================================
import uuid
from collections.abc import Generator
from datetime import datetime
from typing import IO
from typing import Optional

from fastapi_users_db_sqlalchemy import UUID_ID
from sqlalchemy import cast
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Session

from ee.onyx.db.query_history import fetch_chat_sessions_eagerly_by_time
from ee.onyx.server.reporting.usage_export_models import ChatMessageSkeleton
from ee.onyx.server.reporting.usage_export_models import FlowType
from ee.onyx.server.reporting.usage_export_models import UsageReportMetadata
from onyx.configs.constants import MessageType
from onyx.db.models import UsageReport
from onyx.db.models import User
from onyx.file_store.file_store import get_default_file_store


# Gets skeletons of all messages in the given range
def get_empty_chat_messages_entries__paginated(
    db_session: Session,
    period: tuple[datetime, datetime],
    limit: int | None = 500,
    initial_time: datetime | None = None,
) -> tuple[Optional[datetime], list[ChatMessageSkeleton]]:
    """Returns a tuple where:
    first element is the most recent timestamp out of the sessions iterated
    - this timestamp can be used to paginate forward in time
    second element is a list of messages belonging to all the sessions iterated

    Only messages of type USER are returned
    """
    chat_sessions = fetch_chat_sessions_eagerly_by_time(
        start=period[0],
        end=period[1],
        db_session=db_session,
        limit=limit,
        initial_time=initial_time,
    )

    message_skeletons: list[ChatMessageSkeleton] = []
    for chat_session in chat_sessions:
        flow_type = FlowType.SLACK if chat_session.onyxbot_flow else FlowType.CHAT

        for message in chat_session.messages:
            # Only count user messages
            if message.message_type != MessageType.USER:
                continue

            # Get user email
            user_email = chat_session.user.email if chat_session.user else None

            # Get assistant name (from session persona, or alternate if specified)
            assistant_name = None
            if chat_session.persona:
                assistant_name = chat_session.persona.name

            message_skeletons.append(
                ChatMessageSkeleton(
                    message_id=message.id,
                    chat_session_id=chat_session.id,
                    user_id=str(chat_session.user_id) if chat_session.user_id else None,
                    flow_type=flow_type,
                    time_sent=message.time_sent,
                    assistant_name=assistant_name,
                    user_email=user_email,
                    number_of_tokens=message.token_count,
                )
            )
    if len(chat_sessions) == 0:
        return None, []

    return chat_sessions[-1].time_created, message_skeletons


def get_all_empty_chat_message_entries(
    db_session: Session,
    period: tuple[datetime, datetime],
) -> Generator[list[ChatMessageSkeleton], None, None]:
    """period is the range of time over which to fetch messages."""
    initial_time: Optional[datetime] = period[0]
    while True:
        # iterate from oldest to newest
        time_created, message_skeletons = get_empty_chat_messages_entries__paginated(
            db_session,
            period,
            initial_time=initial_time,
        )

        if not message_skeletons:
            return

        yield message_skeletons

        # Update initial_time for the next iteration
        initial_time = time_created


def get_all_usage_reports(db_session: Session) -> list[UsageReportMetadata]:
    # Get the user emails
    usage_reports = db_session.query(UsageReport).all()
    user_ids = {r.requestor_user_id for r in usage_reports if r.requestor_user_id}
    user_emails = {
        user.id: user.email
        for user in db_session.query(User)
        .filter(cast(User.id, UUID).in_(user_ids))
        .all()
    }

    return [
        UsageReportMetadata(
            report_name=r.report_name,
            requestor=(
                user_emails.get(r.requestor_user_id) if r.requestor_user_id else None
            ),
            time_created=r.time_created,
            period_from=r.period_from,
            period_to=r.period_to,
        )
        for r in usage_reports
    ]


def get_usage_report_data(
    report_display_name: str,
) -> IO:
    """
    Get the usage report data from the file store.

    Args:
        db_session: The database session.
        report_display_name: The display name of the usage report. Also assumes
                             that the file is stored with this as the ID in the file store.

    Returns:
        The usage report data.
    """
    file_store = get_default_file_store()
    # usage report may be very large, so don't load it all into memory
    return file_store.read_file(
        file_id=report_display_name, mode="b", use_tempfile=True
    )


def write_usage_report(
    db_session: Session,
    report_name: str,
    user_id: uuid.UUID | UUID_ID | None,
    period: tuple[datetime, datetime] | None,
) -> UsageReport:
    new_report = UsageReport(
        report_name=report_name,
        requestor_user_id=user_id,
        period_from=period[0] if period else None,
        period_to=period[1] if period else None,
    )
    db_session.add(new_report)
    db_session.commit()
    return new_report


================================================
FILE: backend/ee/onyx/db/user_group.py
================================================
from collections.abc import Sequence
from operator import and_
from uuid import UUID

from fastapi import HTTPException
from sqlalchemy import delete
from sqlalchemy import func
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from ee.onyx.server.user_group.models import SetCuratorRequest
from ee.onyx.server.user_group.models import UserGroupCreate
from ee.onyx.server.user_group.models import UserGroupUpdate
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import GrantSource
from onyx.db.enums import Permission
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import Credential__UserGroup
from onyx.db.models import Document
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import DocumentSet
from onyx.db.models import DocumentSet__UserGroup
from onyx.db.models import FederatedConnector__DocumentSet
from onyx.db.models import LLMProvider__UserGroup
from onyx.db.models import PermissionGrant
from onyx.db.models import Persona
from onyx.db.models import Persona__UserGroup
from onyx.db.models import TokenRateLimit__UserGroup
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.db.models import UserGroup__ConnectorCredentialPair
from onyx.db.models import UserRole
from onyx.db.permissions import recompute_user_permissions__no_commit
from onyx.db.users import fetch_user_by_id
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _cleanup_user__user_group_relationships__no_commit(
    db_session: Session,
    user_group_id: int,
    user_ids: list[UUID] | None = None,
) -> None:
    """NOTE: does not commit the transaction."""
    where_clause = User__UserGroup.user_group_id == user_group_id
    if user_ids:
        where_clause &= User__UserGroup.user_id.in_(user_ids)

    user__user_group_relationships = db_session.scalars(
        select(User__UserGroup).where(where_clause)
    ).all()
    for user__user_group_relationship in user__user_group_relationships:
        db_session.delete(user__user_group_relationship)


def _cleanup_credential__user_group_relationships__no_commit(
    db_session: Session,
    user_group_id: int,
) -> None:
    """NOTE: does not commit the transaction."""
    db_session.query(Credential__UserGroup).filter(
        Credential__UserGroup.user_group_id == user_group_id
    ).delete(synchronize_session=False)


def _cleanup_llm_provider__user_group_relationships__no_commit(
    db_session: Session, user_group_id: int
) -> None:
    """NOTE: does not commit the transaction."""
    db_session.query(LLMProvider__UserGroup).filter(
        LLMProvider__UserGroup.user_group_id == user_group_id
    ).delete(synchronize_session=False)


def _cleanup_persona__user_group_relationships__no_commit(
    db_session: Session, user_group_id: int
) -> None:
    """NOTE: does not commit the transaction."""
    db_session.query(Persona__UserGroup).filter(
        Persona__UserGroup.user_group_id == user_group_id
    ).delete(synchronize_session=False)


def _cleanup_token_rate_limit__user_group_relationships__no_commit(
    db_session: Session, user_group_id: int
) -> None:
    """NOTE: does not commit the transaction."""
    token_rate_limit__user_group_relationships = db_session.scalars(
        select(TokenRateLimit__UserGroup).where(
            TokenRateLimit__UserGroup.user_group_id == user_group_id
        )
    ).all()
    for (
        token_rate_limit__user_group_relationship
    ) in token_rate_limit__user_group_relationships:
        db_session.delete(token_rate_limit__user_group_relationship)


def _cleanup_user_group__cc_pair_relationships__no_commit(
    db_session: Session, user_group_id: int, outdated_only: bool
) -> None:
    """NOTE: does not commit the transaction."""
    stmt = select(UserGroup__ConnectorCredentialPair).where(
        UserGroup__ConnectorCredentialPair.user_group_id == user_group_id
    )
    if outdated_only:
        stmt = stmt.where(
            UserGroup__ConnectorCredentialPair.is_current == False  # noqa: E712
        )
    user_group__cc_pair_relationships = db_session.scalars(stmt)
    for user_group__cc_pair_relationship in user_group__cc_pair_relationships:
        db_session.delete(user_group__cc_pair_relationship)


def _cleanup_document_set__user_group_relationships__no_commit(
    db_session: Session, user_group_id: int
) -> None:
    """NOTE: does not commit the transaction."""
    db_session.execute(
        delete(DocumentSet__UserGroup).where(
            DocumentSet__UserGroup.user_group_id == user_group_id
        )
    )


def validate_object_creation_for_user(
    db_session: Session,
    user: User,
    target_group_ids: list[int] | None = None,
    object_is_public: bool | None = None,
    object_is_perm_sync: bool | None = None,
    object_is_owned_by_user: bool = False,
    object_is_new: bool = False,
) -> None:
    """
    All users can create/edit permission synced objects if they don't specify a group
    All admin actions are allowed.
    Curators and global curators can create public objects.
    Prevents other non-admins from creating/editing:
    - public objects
    - objects with no groups
    - objects that belong to a group they don't curate
    """
    if object_is_perm_sync and not target_group_ids:
        return

    # Admins are allowed
    if user.role == UserRole.ADMIN:
        return

    # Allow curators and global curators to create public objects
    # w/o associated groups IF the object is new/owned by them
    if (
        object_is_public
        and user.role in [UserRole.CURATOR, UserRole.GLOBAL_CURATOR]
        and (object_is_new or object_is_owned_by_user)
    ):
        return

    if object_is_public and user.role == UserRole.BASIC:
        detail = "User does not have permission to create public objects"
        logger.error(detail)
        raise HTTPException(
            status_code=400,
            detail=detail,
        )

    if not target_group_ids:
        detail = "Curators must specify 1+ groups"
        logger.error(detail)
        raise HTTPException(
            status_code=400,
            detail=detail,
        )

    user_curated_groups = fetch_user_groups_for_user(
        db_session=db_session,
        user_id=user.id,
        # Global curators can curate all groups they are member of
        only_curator_groups=user.role != UserRole.GLOBAL_CURATOR,
    )
    user_curated_group_ids = set([group.id for group in user_curated_groups])
    target_group_ids_set = set(target_group_ids)
    if not target_group_ids_set.issubset(user_curated_group_ids):
        detail = "Curators cannot control groups they don't curate"
        logger.error(detail)
        raise HTTPException(
            status_code=400,
            detail=detail,
        )


def fetch_user_group(db_session: Session, user_group_id: int) -> UserGroup | None:
    stmt = select(UserGroup).where(UserGroup.id == user_group_id)
    return db_session.scalar(stmt)


def _add_user_group_snapshot_eager_loads(
    stmt: Select,
) -> Select:
    """Add eager loading options needed by UserGroup.from_model snapshot creation."""
    return stmt.options(
        selectinload(UserGroup.users),
        selectinload(UserGroup.user_group_relationships),
        selectinload(UserGroup.cc_pair_relationships)
        .selectinload(UserGroup__ConnectorCredentialPair.cc_pair)
        .options(
            selectinload(ConnectorCredentialPair.connector),
            selectinload(ConnectorCredentialPair.credential).selectinload(
                Credential.user
            ),
        ),
        selectinload(UserGroup.document_sets).options(
            selectinload(DocumentSet.connector_credential_pairs).selectinload(
                ConnectorCredentialPair.connector
            ),
            selectinload(DocumentSet.users),
            selectinload(DocumentSet.groups),
            selectinload(DocumentSet.federated_connectors).selectinload(
                FederatedConnector__DocumentSet.federated_connector
            ),
        ),
        selectinload(UserGroup.personas).options(
            selectinload(Persona.tools),
            selectinload(Persona.hierarchy_nodes),
            selectinload(Persona.attached_documents).selectinload(
                Document.parent_hierarchy_node
            ),
            selectinload(Persona.labels),
            selectinload(Persona.document_sets).options(
                selectinload(DocumentSet.connector_credential_pairs).selectinload(
                    ConnectorCredentialPair.connector
                ),
                selectinload(DocumentSet.users),
                selectinload(DocumentSet.groups),
                selectinload(DocumentSet.federated_connectors).selectinload(
                    FederatedConnector__DocumentSet.federated_connector
                ),
            ),
            selectinload(Persona.user),
            selectinload(Persona.user_files),
            selectinload(Persona.users),
            selectinload(Persona.groups),
        ),
    )


def fetch_user_groups(
    db_session: Session,
    only_up_to_date: bool = True,
    eager_load_for_snapshot: bool = False,
    include_default: bool = True,
) -> Sequence[UserGroup]:
    """
    Fetches user groups from the database.

    This function retrieves a sequence of `UserGroup` objects from the database.
    If `only_up_to_date` is set to `True`, it filters the user groups to return only those
    that are marked as up-to-date (`is_up_to_date` is `True`).

    Args:
        db_session (Session): The SQLAlchemy session used to query the database.
        only_up_to_date (bool, optional): Flag to determine whether to filter the results
            to include only up to date user groups. Defaults to `True`.
        eager_load_for_snapshot: If True, adds eager loading for all relationships
            needed by UserGroup.from_model snapshot creation.
        include_default: If False, excludes system default groups (is_default=True).

    Returns:
        Sequence[UserGroup]: A sequence of `UserGroup` objects matching the query criteria.
    """
    stmt = select(UserGroup)
    if only_up_to_date:
        stmt = stmt.where(UserGroup.is_up_to_date == True)  # noqa: E712
    if not include_default:
        stmt = stmt.where(UserGroup.is_default == False)  # noqa: E712
    if eager_load_for_snapshot:
        stmt = _add_user_group_snapshot_eager_loads(stmt)
    return db_session.scalars(stmt).unique().all()


def fetch_user_groups_for_user(
    db_session: Session,
    user_id: UUID,
    only_curator_groups: bool = False,
    eager_load_for_snapshot: bool = False,
    include_default: bool = True,
) -> Sequence[UserGroup]:
    stmt = (
        select(UserGroup)
        .join(User__UserGroup, User__UserGroup.user_group_id == UserGroup.id)
        .join(User, User.id == User__UserGroup.user_id)  # type: ignore
        .where(User.id == user_id)  # type: ignore
    )
    if only_curator_groups:
        stmt = stmt.where(User__UserGroup.is_curator == True)  # noqa: E712
    if not include_default:
        stmt = stmt.where(UserGroup.is_default == False)  # noqa: E712
    if eager_load_for_snapshot:
        stmt = _add_user_group_snapshot_eager_loads(stmt)
    return db_session.scalars(stmt).unique().all()


def construct_document_id_select_by_usergroup(
    user_group_id: int,
) -> Select:
    """This returns a statement that should be executed using
    .yield_per() to minimize overhead. The primary consumers of this function
    are background processing task generators."""
    stmt = (
        select(Document.id)
        .join(
            DocumentByConnectorCredentialPair,
            Document.id == DocumentByConnectorCredentialPair.id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .join(
            UserGroup__ConnectorCredentialPair,
            UserGroup__ConnectorCredentialPair.cc_pair_id == ConnectorCredentialPair.id,
        )
        .join(
            UserGroup,
            UserGroup__ConnectorCredentialPair.user_group_id == UserGroup.id,
        )
        .where(UserGroup.id == user_group_id)
        .order_by(Document.id)
    )
    stmt = stmt.distinct()
    return stmt


def fetch_documents_for_user_group_paginated(
    db_session: Session,
    user_group_id: int,
    last_document_id: str | None = None,
    limit: int = 100,
) -> tuple[Sequence[Document], str | None]:
    stmt = (
        select(Document)
        .join(
            DocumentByConnectorCredentialPair,
            Document.id == DocumentByConnectorCredentialPair.id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .join(
            UserGroup__ConnectorCredentialPair,
            UserGroup__ConnectorCredentialPair.cc_pair_id == ConnectorCredentialPair.id,
        )
        .join(
            UserGroup,
            UserGroup__ConnectorCredentialPair.user_group_id == UserGroup.id,
        )
        .where(UserGroup.id == user_group_id)
        .order_by(Document.id)
        .limit(limit)
    )
    if last_document_id is not None:
        stmt = stmt.where(Document.id > last_document_id)
    stmt = stmt.distinct()

    documents = db_session.scalars(stmt).all()
    return documents, documents[-1].id if documents else None


def fetch_user_groups_for_documents(
    db_session: Session,
    document_ids: list[str],
) -> Sequence[tuple[str, list[str]]]:
    """
    Fetches all user groups that have access to the given documents.

    NOTE: this doesn't include groups if the cc_pair is access type SYNC
    """
    stmt = (
        select(Document.id, func.array_agg(UserGroup.name))
        .join(
            UserGroup__ConnectorCredentialPair,
            UserGroup.id == UserGroup__ConnectorCredentialPair.user_group_id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                ConnectorCredentialPair.id
                == UserGroup__ConnectorCredentialPair.cc_pair_id,
                ConnectorCredentialPair.access_type != AccessType.SYNC,
            ),
        )
        .join(
            DocumentByConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .join(Document, Document.id == DocumentByConnectorCredentialPair.id)
        .where(Document.id.in_(document_ids))
        .where(UserGroup__ConnectorCredentialPair.is_current == True)  # noqa: E712
        # don't include CC pairs that are being deleted
        # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them
        .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING)
        .group_by(Document.id)
    )

    return db_session.execute(stmt).all()  # type: ignore


def _check_user_group_is_modifiable(user_group: UserGroup) -> None:
    if not user_group.is_up_to_date:
        raise ValueError(
            "Specified user group is currently syncing. Wait until the current sync has finished before editing."
        )


def _add_user__user_group_relationships__no_commit(
    db_session: Session, user_group_id: int, user_ids: list[UUID]
) -> None:
    """NOTE: does not commit the transaction.

    This function is idempotent - it will skip users who are already in the group
    to avoid duplicate key violations during concurrent operations or re-syncs.
    Uses ON CONFLICT DO NOTHING to keep inserts atomic under concurrency.
    """
    if not user_ids:
        return

    insert_stmt = (
        insert(User__UserGroup)
        .values(
            [
                {"user_id": user_id, "user_group_id": user_group_id}
                for user_id in user_ids
            ]
        )
        .on_conflict_do_nothing(
            index_elements=[User__UserGroup.user_group_id, User__UserGroup.user_id]
        )
    )
    db_session.execute(insert_stmt)


def _add_user_group__cc_pair_relationships__no_commit(
    db_session: Session, user_group_id: int, cc_pair_ids: list[int]
) -> list[UserGroup__ConnectorCredentialPair]:
    """NOTE: does not commit the transaction."""
    relationships = [
        UserGroup__ConnectorCredentialPair(
            user_group_id=user_group_id, cc_pair_id=cc_pair_id
        )
        for cc_pair_id in cc_pair_ids
    ]
    db_session.add_all(relationships)
    return relationships


def insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserGroup:
    db_user_group = UserGroup(
        name=user_group.name,
        time_last_modified_by_user=func.now(),
        is_up_to_date=DISABLE_VECTOR_DB,
    )
    db_session.add(db_user_group)
    db_session.flush()  # give the group an ID

    # Every group gets the "basic" permission by default
    db_session.add(
        PermissionGrant(
            group_id=db_user_group.id,
            permission=Permission.BASIC_ACCESS,
            grant_source=GrantSource.SYSTEM,
        )
    )
    db_session.flush()

    _add_user__user_group_relationships__no_commit(
        db_session=db_session,
        user_group_id=db_user_group.id,
        user_ids=user_group.user_ids,
    )
    _add_user_group__cc_pair_relationships__no_commit(
        db_session=db_session,
        user_group_id=db_user_group.id,
        cc_pair_ids=user_group.cc_pair_ids,
    )

    recompute_user_permissions__no_commit(user_group.user_ids, db_session)

    db_session.commit()
    return db_user_group


def _mark_user_group__cc_pair_relationships_outdated__no_commit(
    db_session: Session, user_group_id: int
) -> None:
    """NOTE: does not commit the transaction."""
    user_group__cc_pair_relationships = db_session.scalars(
        select(UserGroup__ConnectorCredentialPair).where(
            UserGroup__ConnectorCredentialPair.user_group_id == user_group_id
        )
    )
    for user_group__cc_pair_relationship in user_group__cc_pair_relationships:
        user_group__cc_pair_relationship.is_current = False


def _validate_curator_status__no_commit(
    db_session: Session,
    users: list[User],
) -> None:
    for user in users:
        # Check if the user is a curator in any of their groups
        curator_relationships = (
            db_session.query(User__UserGroup)
            .filter(
                User__UserGroup.user_id == user.id,
                User__UserGroup.is_curator == True,  # noqa: E712
            )
            .all()
        )

        # if the user is a curator in any of their groups, set their role to CURATOR
        # otherwise, set their role to BASIC only if they were previously a CURATOR
        if curator_relationships:
            user.role = UserRole.CURATOR
        elif user.role == UserRole.CURATOR:
            user.role = UserRole.BASIC
        db_session.add(user)


def remove_curator_status__no_commit(db_session: Session, user: User) -> None:
    stmt = (
        update(User__UserGroup)
        .where(User__UserGroup.user_id == user.id)
        .values(is_curator=False)
    )
    db_session.execute(stmt)
    _validate_curator_status__no_commit(db_session, [user])


def _validate_curator_relationship_update_requester(
    db_session: Session,
    user_group_id: int,
    user_making_change: User,
) -> None:
    """
    This function validates that the user making the change has the necessary permissions
    to update the curator relationship for the target user in the given user group.
    """

    # Admins can update curator relationships for any group
    if user_making_change.role == UserRole.ADMIN:
        return

    # check if the user making the change is a curator in the group they are changing the curator relationship for
    user_making_change_curator_groups = fetch_user_groups_for_user(
        db_session=db_session,
        user_id=user_making_change.id,
        # only check if the user making the change is a curator if they are a curator
        # otherwise, they are a global_curator and can update the curator relationship
        # for any group they are a member of
        only_curator_groups=user_making_change.role == UserRole.CURATOR,
    )
    requestor_curator_group_ids = [
        group.id for group in user_making_change_curator_groups
    ]
    if user_group_id not in requestor_curator_group_ids:
        raise ValueError(
            f"user making change {user_making_change.email} is not a curator,"
            f" admin, or global_curator for group '{user_group_id}'"
        )


def _validate_curator_relationship_update_request(
    db_session: Session,
    user_group_id: int,
    target_user: User,
) -> None:
    """
    This function validates that the curator_relationship_update request itself is valid.
    """
    if target_user.role == UserRole.ADMIN:
        raise ValueError(
            f"User '{target_user.email}' is an admin and therefore has all permissions "
            "of a curator. If you'd like this user to only have curator permissions, "
            "you must update their role to BASIC then assign them to be CURATOR in the "
            "appropriate groups."
        )
    elif target_user.role == UserRole.GLOBAL_CURATOR:
        raise ValueError(
            f"User '{target_user.email}' is a global_curator and therefore has all "
            "permissions of a curator for all groups. If you'd like this user to only "
            "have curator permissions for a specific group, you must update their role "
            "to BASIC then assign them to be CURATOR in the appropriate groups."
        )
    elif target_user.role not in [UserRole.CURATOR, UserRole.BASIC]:
        raise ValueError(
            f"This endpoint can only be used to update the curator relationship for "
            "users with the CURATOR or BASIC role. \n"
            f"Target user: {target_user.email} \n"
            f"Target user role: {target_user.role} \n"
        )

    # check if the target user is in the group they are changing the curator relationship for
    requested_user_groups = fetch_user_groups_for_user(
        db_session=db_session,
        user_id=target_user.id,
        only_curator_groups=False,
    )
    group_ids = [group.id for group in requested_user_groups]
    if user_group_id not in group_ids:
        raise ValueError(
            f"target user {target_user.email} is not in group '{user_group_id}'"
        )


def update_user_curator_relationship(
    db_session: Session,
    user_group_id: int,
    set_curator_request: SetCuratorRequest,
    user_making_change: User,
) -> None:
    target_user = fetch_user_by_id(db_session, set_curator_request.user_id)
    if not target_user:
        raise ValueError(f"User with id '{set_curator_request.user_id}' not found")

    _validate_curator_relationship_update_request(
        db_session=db_session,
        user_group_id=user_group_id,
        target_user=target_user,
    )

    _validate_curator_relationship_update_requester(
        db_session=db_session,
        user_group_id=user_group_id,
        user_making_change=user_making_change,
    )

    logger.info(
        f"user_making_change={user_making_change.email if user_making_change else 'None'} is "
        f"updating the curator relationship for user={target_user.email} "
        f"in group={user_group_id} to is_curator={set_curator_request.is_curator}"
    )

    relationship_to_update = (
        db_session.query(User__UserGroup)
        .filter(
            User__UserGroup.user_group_id == user_group_id,
            User__UserGroup.user_id == set_curator_request.user_id,
        )
        .first()
    )

    if relationship_to_update:
        relationship_to_update.is_curator = set_curator_request.is_curator
    else:
        relationship_to_update = User__UserGroup(
            user_group_id=user_group_id,
            user_id=set_curator_request.user_id,
            is_curator=True,
        )
        db_session.add(relationship_to_update)

    _validate_curator_status__no_commit(db_session, [target_user])
    db_session.commit()


def add_users_to_user_group(
    db_session: Session,
    user: User,
    user_group_id: int,
    user_ids: list[UUID],
) -> UserGroup:
    db_user_group = fetch_user_group(db_session=db_session, user_group_id=user_group_id)
    if db_user_group is None:
        raise ValueError(f"UserGroup with id '{user_group_id}' not found")

    missing_users = [
        user_id for user_id in user_ids if fetch_user_by_id(db_session, user_id) is None
    ]
    if missing_users:
        raise ValueError(
            f"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}"
        )

    _check_user_group_is_modifiable(db_user_group)

    current_user_ids = [user.id for user in db_user_group.users]
    current_user_ids_set = set(current_user_ids)
    new_user_ids = [
        user_id for user_id in user_ids if user_id not in current_user_ids_set
    ]

    if not new_user_ids:
        return db_user_group

    user_group_update = UserGroupUpdate(
        user_ids=current_user_ids + new_user_ids,
        cc_pair_ids=[cc_pair.id for cc_pair in db_user_group.cc_pairs],
    )

    return update_user_group(
        db_session=db_session,
        user=user,
        user_group_id=user_group_id,
        user_group_update=user_group_update,
    )


def update_user_group(
    db_session: Session,
    user: User,  # noqa: ARG001
    user_group_id: int,
    user_group_update: UserGroupUpdate,
) -> UserGroup:
    """If successful, this can set db_user_group.is_up_to_date = False.
    That will be processed by check_for_vespa_user_groups_sync_task and trigger
    a long running background sync to Vespa.
    """
    stmt = select(UserGroup).where(UserGroup.id == user_group_id)
    db_user_group = db_session.scalar(stmt)
    if db_user_group is None:
        raise ValueError(f"UserGroup with id '{user_group_id}' not found")

    _check_user_group_is_modifiable(db_user_group)

    current_user_ids = set([user.id for user in db_user_group.users])
    updated_user_ids = set(user_group_update.user_ids)
    added_user_ids = list(updated_user_ids - current_user_ids)
    removed_user_ids = list(current_user_ids - updated_user_ids)

    if added_user_ids:
        missing_users = [
            user_id
            for user_id in added_user_ids
            if fetch_user_by_id(db_session, user_id) is None
        ]
        if missing_users:
            raise ValueError(
                f"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}"
            )

    # LEAVING THIS HERE FOR NOW FOR GIVING DIFFERENT ROLES
    # ACCESS TO DIFFERENT PERMISSIONS
    # if (removed_user_ids or added_user_ids) and (
    #     not user or user.role != UserRole.ADMIN
    # ):
    #     raise ValueError("Only admins can add or remove users from user groups")

    if removed_user_ids:
        _cleanup_user__user_group_relationships__no_commit(
            db_session=db_session,
            user_group_id=user_group_id,
            user_ids=removed_user_ids,
        )

    if added_user_ids:
        _add_user__user_group_relationships__no_commit(
            db_session=db_session,
            user_group_id=user_group_id,
            user_ids=added_user_ids,
        )

    cc_pairs_updated = set([cc_pair.id for cc_pair in db_user_group.cc_pairs]) != set(
        user_group_update.cc_pair_ids
    )
    if cc_pairs_updated:
        _mark_user_group__cc_pair_relationships_outdated__no_commit(
            db_session=db_session, user_group_id=user_group_id
        )
        _add_user_group__cc_pair_relationships__no_commit(
            db_session=db_session,
            user_group_id=db_user_group.id,
            cc_pair_ids=user_group_update.cc_pair_ids,
        )

    if cc_pairs_updated and not DISABLE_VECTOR_DB:
        db_user_group.is_up_to_date = False

    removed_users = db_session.scalars(
        select(User).where(User.id.in_(removed_user_ids))  # type: ignore
    ).unique()

    # Filter out admin and global curator users before validating curator status
    users_to_validate = [
        user
        for user in removed_users
        if user.role not in [UserRole.ADMIN, UserRole.GLOBAL_CURATOR]
    ]

    if users_to_validate:
        _validate_curator_status__no_commit(db_session, users_to_validate)

    # update "time_updated" to now
    db_user_group.time_last_modified_by_user = func.now()

    recompute_user_permissions__no_commit(
        list(set(added_user_ids) | set(removed_user_ids)), db_session
    )

    db_session.commit()
    return db_user_group


def rename_user_group(
    db_session: Session,
    user_group_id: int,
    new_name: str,
) -> UserGroup:
    stmt = select(UserGroup).where(UserGroup.id == user_group_id)
    db_user_group = db_session.scalar(stmt)
    if db_user_group is None:
        raise ValueError(f"UserGroup with id '{user_group_id}' not found")

    _check_user_group_is_modifiable(db_user_group)

    db_user_group.name = new_name
    db_user_group.time_last_modified_by_user = func.now()

    # CC pair documents in Vespa contain the group name, so we need to
    # trigger a sync to update them with the new name.
    _mark_user_group__cc_pair_relationships_outdated__no_commit(
        db_session=db_session, user_group_id=user_group_id
    )
    if not DISABLE_VECTOR_DB:
        db_user_group.is_up_to_date = False

    db_session.commit()
    return db_user_group


def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> None:
    stmt = select(UserGroup).where(UserGroup.id == user_group_id)
    db_user_group = db_session.scalar(stmt)
    if db_user_group is None:
        raise ValueError(f"UserGroup with id '{user_group_id}' not found")

    _check_user_group_is_modifiable(db_user_group)

    # Collect affected user IDs before cleanup deletes the relationships
    affected_user_ids: list[UUID] = [
        uid
        for uid in db_session.execute(
            select(User__UserGroup.user_id).where(
                User__UserGroup.user_group_id == user_group_id
            )
        )
        .scalars()
        .all()
        if uid is not None
    ]

    _mark_user_group__cc_pair_relationships_outdated__no_commit(
        db_session=db_session, user_group_id=user_group_id
    )

    _cleanup_credential__user_group_relationships__no_commit(
        db_session=db_session, user_group_id=user_group_id
    )
    _cleanup_user__user_group_relationships__no_commit(
        db_session=db_session, user_group_id=user_group_id
    )
    _cleanup_token_rate_limit__user_group_relationships__no_commit(
        db_session=db_session, user_group_id=user_group_id
    )
    _cleanup_document_set__user_group_relationships__no_commit(
        db_session=db_session, user_group_id=user_group_id
    )
    _cleanup_persona__user_group_relationships__no_commit(
        db_session=db_session, user_group_id=user_group_id
    )
    _cleanup_user_group__cc_pair_relationships__no_commit(
        db_session=db_session,
        user_group_id=user_group_id,
        outdated_only=False,
    )
    _cleanup_llm_provider__user_group_relationships__no_commit(
        db_session=db_session, user_group_id=user_group_id
    )

    # Recompute permissions for affected users now that their
    # membership in this group has been removed
    recompute_user_permissions__no_commit(affected_user_ids, db_session)

    db_user_group.is_up_to_date = False
    db_user_group.is_up_for_deletion = True
    db_session.commit()


def delete_user_group(db_session: Session, user_group: UserGroup) -> None:
    """
    This assumes that all the fk cleanup has already been done.
    """
    db_session.delete(user_group)
    db_session.commit()


def mark_user_group_as_synced(db_session: Session, user_group: UserGroup) -> None:
    # cleanup outdated relationships
    _cleanup_user_group__cc_pair_relationships__no_commit(
        db_session=db_session, user_group_id=user_group.id, outdated_only=True
    )
    user_group.is_up_to_date = True
    db_session.commit()


def delete_user_group_cc_pair_relationship__no_commit(
    cc_pair_id: int, db_session: Session
) -> None:
    """Deletes all rows from UserGroup__ConnectorCredentialPair where the
    connector_credential_pair_id matches the given cc_pair_id.

    Should be used very carefully (only for connectors that are being deleted)."""
    cc_pair = get_connector_credential_pair_from_id(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
    )
    if not cc_pair:
        raise ValueError(f"Connector Credential Pair '{cc_pair_id}' does not exist")

    if cc_pair.status != ConnectorCredentialPairStatus.DELETING:
        raise ValueError(
            f"Connector Credential Pair '{cc_pair_id}' is not in the DELETING state. status={cc_pair.status}"
        )

    delete_stmt = delete(UserGroup__ConnectorCredentialPair).where(
        UserGroup__ConnectorCredentialPair.cc_pair_id == cc_pair_id,
    )
    db_session.execute(delete_stmt)


================================================
FILE: backend/ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja
================================================
<?xml version="1.0" encoding="utf-8"?>
<services version="1.0">
    <container id="default" version="1.0">
        <document-api />
        <search />
        <http>
            <server id="default" port="4080" />
        </http>
        <nodes count="[2, 4]">
            <resources vcpu="4.0" memory="16Gb" architecture="arm64" storage-type="remote"
                disk="48Gb" />
        </nodes>


    </container>
    <content id="danswer_index" version="1.0">
        <documents>
            <!-- <document type="danswer_chunk" mode="index" /> -->
{{ document_elements }}
        </documents>
        <nodes count="50">
            <resources vcpu="8.0" memory="128.0Gb" architecture="arm64" storage-type="local"
                disk="475.0Gb" />
        </nodes>
        <engine>
            <proton>
                <tuning>
                    <searchnode>
                        <requestthreads>
                            <persearch>2</persearch>
                        </requestthreads>
                    </searchnode>
                </tuning>
            </proton>
        </engine>

        <config name="vespa.config.search.summary.juniperrc">
            <max_matches>3</max_matches>
            <length>750</length>
            <surround_max>350</surround_max>
            <min_length>300</min_length>
        </config>


        <min-redundancy>2</min-redundancy>

    </content>
</services>


================================================
FILE: backend/ee/onyx/external_permissions/__init__.py
================================================


================================================
FILE: backend/ee/onyx/external_permissions/confluence/__init__.py
================================================


================================================
FILE: backend/ee/onyx/external_permissions/confluence/constants.py
================================================
# This is a group that we use to store all the users that we found in Confluence
# Instead of setting a page to public, we just add this group so that the page
# is only accessible to users who have confluence accounts.
ALL_CONF_EMAILS_GROUP_NAME = "All_Confluence_Users_Found_By_Onyx"

VIEWSPACE_PERMISSION_TYPE = "VIEWSPACE"
REQUEST_PAGINATION_LIMIT = 5000


================================================
FILE: backend/ee/onyx/external_permissions/confluence/doc_sync.py
================================================
"""
Rules defined here:
https://confluence.atlassian.com/conf85/check-who-can-view-a-page-1283360557.html
"""

from collections.abc import Generator

from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import ElementExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


CONFLUENCE_DOC_SYNC_LABEL = "confluence_doc_sync"


def confluence_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
    """
    Fetches document permissions from Confluence and yields DocExternalAccess objects.
    Compares fetched documents against existing documents in the DB for the connector.
    If a document exists in the DB but not in the Confluence fetch, it's marked as restricted.
    """
    confluence_connector = ConfluenceConnector(
        **cc_pair.connector.connector_specific_config
    )

    provider = OnyxDBCredentialsProvider(
        get_current_tenant_id(), "confluence", cc_pair.credential_id
    )
    confluence_connector.set_credentials_provider(provider)

    yield from generic_doc_sync(
        cc_pair=cc_pair,
        fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,
        callback=callback,
        doc_source=DocumentSource.CONFLUENCE,
        slim_connector=confluence_connector,
        label=CONFLUENCE_DOC_SYNC_LABEL,
    )


================================================
FILE: backend/ee/onyx/external_permissions/confluence/group_sync.py
================================================
from collections.abc import Generator

from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
from onyx.background.error_logging import emit_background_error
from onyx.configs.app_configs import CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC
from onyx.connectors.confluence.onyx_confluence import (
    get_user_email_from_username__server,
)
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import ConnectorCredentialPair
from onyx.db.users import get_all_users
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _build_group_member_email_map(
    confluence_client: OnyxConfluence, cc_pair_id: int
) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
    for user in confluence_client.paginated_cql_user_retrieval():
        logger.info(f"Processing groups for user: {user}")

        email = user.email
        if not email:
            # This field is only present in Confluence Server
            user_name = user.username
            # If it is present, try to get the email using a Server-specific method
            if user_name:
                email = get_user_email_from_username__server(
                    confluence_client=confluence_client,
                    user_name=user_name,
                )
            else:
                logger.error(f"user result missing username field: {user}")

        if not email:
            # If we still don't have an email, skip this user
            msg = f"user result missing email field: {user}"
            if user.type == "app":
                logger.warning(msg)
            else:
                emit_background_error(msg, cc_pair_id=cc_pair_id)
                logger.error(msg)
            continue

        all_users_groups: set[str] = set()
        for group in confluence_client.paginated_groups_by_user_retrieval(user.user_id):
            # group name uniqueness is enforced by Confluence, so we can use it as a group ID
            group_id = group["name"]
            group_member_emails.setdefault(group_id, set()).add(email)
            all_users_groups.add(group_id)

        if not all_users_groups:
            msg = f"No groups found for user with email: {email}"
            emit_background_error(msg, cc_pair_id=cc_pair_id)
            logger.error(msg)
        else:
            logger.debug(f"Found groups {all_users_groups} for user with email {email}")

    if not group_member_emails:
        msg = "No groups found for any users."
        emit_background_error(msg, cc_pair_id=cc_pair_id)
        logger.error(msg)

    return group_member_emails


def _build_group_member_email_map_from_onyx_users(
    confluence_client: OnyxConfluence,
) -> dict[str, set[str]]:
    """Hacky, but it's the only way to do this as long as the
    Confluence APIs are broken.

    This is fixed in Confluence Data Center 10.1.0, so first choice
    is to tell users to upgrade to 10.1.0.
    https://jira.atlassian.com/browse/CONFSERVER-95999
    """
    with get_session_with_current_tenant() as db_session:
        # don't include external since they are handled by the "through confluence"
        # user fetching mechanism
        user_emails = [
            user.email for user in get_all_users(db_session, include_external=False)
        ]

    def _infer_username_from_email(email: str) -> str:
        return email.split("@")[0]

    group_member_emails: dict[str, set[str]] = {}
    for email in user_emails:
        logger.info(f"Processing groups for user with email: {email}")
        try:
            user_name = _infer_username_from_email(email)
            response = confluence_client.get_user_details_by_username(user_name)
            user_key = response.get("userKey")
            if not user_key:
                logger.error(f"User key not found for user with email {email}")
                continue

            all_users_groups: set[str] = set()
            for group in confluence_client.paginated_groups_by_user_retrieval(user_key):
                # group name uniqueness is enforced by Confluence, so we can use it as a group ID
                group_id = group["name"]
                group_member_emails.setdefault(group_id, set()).add(email)
                all_users_groups.add(group_id)

            if not all_users_groups:
                msg = f"No groups found for user with email: {email}"
                logger.error(msg)
            else:
                logger.info(
                    f"Found groups {all_users_groups} for user with email {email}"
                )
        except Exception:
            logger.exception(f"Error getting user details for user with email {email}")

    return group_member_emails


def _build_final_group_to_member_email_map(
    confluence_client: OnyxConfluence,
    cc_pair_id: int,
    # if set, will infer confluence usernames from onyx users in addition to using the
    # confluence users API. This is a hacky workaround for the fact that the Confluence
    # users API is broken before Confluence Data Center 10.1.0.
    use_onyx_users: bool = CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC,
) -> dict[str, set[str]]:
    group_to_member_email_map = _build_group_member_email_map(
        confluence_client=confluence_client,
        cc_pair_id=cc_pair_id,
    )
    group_to_member_email_map_from_onyx_users = (
        (
            _build_group_member_email_map_from_onyx_users(
                confluence_client=confluence_client,
            )
        )
        if use_onyx_users
        else {}
    )

    all_group_ids = set(group_to_member_email_map.keys()) | set(
        group_to_member_email_map_from_onyx_users.keys()
    )
    final_group_to_member_email_map = {}
    for group_id in all_group_ids:
        group_member_emails = group_to_member_email_map.get(
            group_id, set()
        ) | group_to_member_email_map_from_onyx_users.get(group_id, set())
        final_group_to_member_email_map[group_id] = group_member_emails

    return final_group_to_member_email_map


def confluence_group_sync(
    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
    provider = OnyxDBCredentialsProvider(tenant_id, "confluence", cc_pair.credential_id)
    is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False)
    wiki_base: str = cc_pair.connector.connector_specific_config["wiki_base"]
    url = wiki_base.rstrip("/")

    probe_kwargs = {
        "max_backoff_retries": 6,
        "max_backoff_seconds": 10,
    }

    final_kwargs = {
        "max_backoff_retries": 10,
        "max_backoff_seconds": 60,
    }

    confluence_client = OnyxConfluence(is_cloud, url, provider)
    confluence_client._probe_connection(**probe_kwargs)
    confluence_client._initialize_connection(**final_kwargs)

    group_to_member_email_map = _build_final_group_to_member_email_map(
        confluence_client, cc_pair.id
    )

    all_found_emails = set()
    for group_id, group_member_emails in group_to_member_email_map.items():
        yield (
            ExternalUserGroup(
                id=group_id,
                user_emails=list(group_member_emails),
            )
        )
        all_found_emails.update(group_member_emails)

    # This is so that when we find a public confleunce server page, we can
    # give access to all users only in if they have an email in Confluence
    if cc_pair.connector.connector_specific_config.get("is_cloud", False):
        all_found_group = ExternalUserGroup(
            id=ALL_CONF_EMAILS_GROUP_NAME,
            user_emails=list(all_found_emails),
        )
        yield all_found_group


================================================
FILE: backend/ee/onyx/external_permissions/confluence/page_access.py
================================================
from typing import Any

from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.onyx_confluence import (
    get_user_email_from_username__server,
)
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _extract_read_access_restrictions(
    confluence_client: OnyxConfluence, restrictions: dict[str, Any]
) -> tuple[set[str], set[str], bool]:
    """
    Converts a page's restrictions dict into an ExternalAccess object.
    If there are no restrictions, then return None
    """
    read_access = restrictions.get("read", {})
    read_access_restrictions = read_access.get("restrictions", {})

    # Extract the users with read access
    read_access_user = read_access_restrictions.get("user", {})
    read_access_user_jsons = read_access_user.get("results", [])
    # any items found means that there is a restriction
    found_any_restriction = bool(read_access_user_jsons)

    read_access_user_emails = []
    for user in read_access_user_jsons:
        # If the user has an email, then add it to the list
        if user.get("email"):
            read_access_user_emails.append(user["email"])
        # If the user has a username and not an email, then get the email from Confluence
        elif user.get("username"):
            email = get_user_email_from_username__server(
                confluence_client=confluence_client, user_name=user["username"]
            )
            if email:
                read_access_user_emails.append(email)
            else:
                logger.warning(
                    f"Email for user {user['username']} not found in Confluence"
                )
        else:
            if user.get("email") is not None:
                logger.warning(f"Cant find email for user {user.get('displayName')}")
                logger.warning(
                    "This user needs to make their email accessible in Confluence Settings"
                )

            logger.warning(f"no user email or username for {user}")

    # Extract the groups with read access
    read_access_group = read_access_restrictions.get("group", {})
    read_access_group_jsons = read_access_group.get("results", [])
    # any items found means that there is a restriction
    found_any_restriction |= bool(read_access_group_jsons)
    read_access_group_names = [
        group["name"] for group in read_access_group_jsons if group.get("name")
    ]

    return (
        set(read_access_user_emails),
        set(read_access_group_names),
        found_any_restriction,
    )


def get_page_restrictions(
    confluence_client: OnyxConfluence,
    page_id: str,
    page_restrictions: dict[str, Any],
    ancestors: list[dict[str, Any]],
    add_prefix: bool = False,
) -> ExternalAccess | None:
    """
    This function gets the restrictions for a page. In Confluence, a child can have
    at MOST the same level accessibility as its immediate parent.

    If no restrictions are found anywhere, then return None, indicating that the page
    should inherit the space's restrictions.

    add_prefix: When True, prefix group IDs with source type (for indexing path).
               When False (default), leave unprefixed (for permission sync path).
    """
    found_user_emails: set[str] = set()
    found_group_names: set[str] = set()

    # NOTE: need the found_any_restriction, since we can find restrictions
    # but not be able to extract any user emails or group names
    # in this case, we should just give no access
    found_user_emails, found_group_names, found_any_page_level_restriction = (
        _extract_read_access_restrictions(
            confluence_client=confluence_client,
            restrictions=page_restrictions,
        )
    )

    def _maybe_prefix_groups(group_names: set[str]) -> set[str]:
        if add_prefix:
            return {
                build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)
                for g in group_names
            }
        return group_names

    # if there are individual page-level restrictions, then this is the accurate
    # restriction for the page. You cannot both have page-level restrictions AND
    # inherit restrictions from the parent.
    if found_any_page_level_restriction:
        return ExternalAccess(
            external_user_emails=found_user_emails,
            external_user_group_ids=_maybe_prefix_groups(found_group_names),
            is_public=False,
        )

    # ancestors seem to be in order from root to immediate parent
    # https://community.atlassian.com/forums/Confluence-questions/Order-of-ancestors-in-REST-API-response-Confluence-Server-amp/qaq-p/2385981
    # we want the restrictions from the immediate parent to take precedence, so we should
    # reverse the list
    for ancestor in reversed(ancestors):
        (
            ancestor_user_emails,
            ancestor_group_names,
            found_any_restrictions_in_ancestor,
        ) = _extract_read_access_restrictions(
            confluence_client=confluence_client,
            restrictions=ancestor.get("restrictions", {}),
        )
        if found_any_restrictions_in_ancestor:
            # if inheriting restrictions from the parent, then the first one we run into
            # should be applied (the reason why we'd traverse more than one ancestor is if
            # the ancestor also is in "inherit" mode.)
            logger.debug(
                f"Found user restrictions {ancestor_user_emails} and group restrictions {ancestor_group_names}"
                f"for document {page_id} based on ancestor {ancestor}"
            )
            return ExternalAccess(
                external_user_emails=ancestor_user_emails,
                external_user_group_ids=_maybe_prefix_groups(ancestor_group_names),
                is_public=False,
            )

    # we didn't find any restrictions, so the page inherits the space's restrictions
    return None


================================================
FILE: backend/ee/onyx/external_permissions/confluence/space_access.py
================================================
from ee.onyx.configs.app_configs import CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC
from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
from ee.onyx.external_permissions.confluence.constants import REQUEST_PAGINATION_LIMIT
from ee.onyx.external_permissions.confluence.constants import VIEWSPACE_PERMISSION_TYPE
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.onyx_confluence import (
    get_user_email_from_username__server,
)
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.utils.logger import setup_logger


logger = setup_logger()


def _get_server_space_permissions(
    confluence_client: OnyxConfluence, space_key: str
) -> ExternalAccess:
    space_permissions = confluence_client.get_all_space_permissions_server(
        space_key=space_key
    )

    viewspace_permissions = []
    for permission_category in space_permissions:
        if permission_category.get("type") == VIEWSPACE_PERMISSION_TYPE:
            viewspace_permissions.extend(
                permission_category.get("spacePermissions", [])
            )

    is_public = False
    user_names = set()
    group_names = set()
    for permission in viewspace_permissions:
        if user_name := permission.get("userName"):
            user_names.add(user_name)
        if group_name := permission.get("groupName"):
            group_names.add(group_name)

        # It seems that if anonymous access is turned on for the site and space,
        # then the space is publicly accessible.
        # For confluence server, we make a group that contains all users
        # that exist in confluence and then just add that group to the space permissions
        # if anonymous access is turned on for the site and space or we set is_public = True
        # if they set the env variable CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC to True so
        # that we can support confluence server deployments that want anonymous access
        # to be public (we cant test this because its paywalled)
        if user_name is None and group_name is None:
            # Defaults to False
            if CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC:
                is_public = True
            else:
                group_names.add(ALL_CONF_EMAILS_GROUP_NAME)

    user_emails = set()
    for user_name in user_names:
        user_email = get_user_email_from_username__server(confluence_client, user_name)
        if user_email:
            user_emails.add(user_email)
        else:
            logger.warning(f"Email for user {user_name} not found in Confluence")

    if not user_emails and not group_names:
        logger.warning(
            "No user emails or group names found in Confluence space permissions"
            f"\nSpace key: {space_key}"
            f"\nSpace permissions: {space_permissions}"
        )

    return ExternalAccess(
        external_user_emails=user_emails,
        external_user_group_ids=group_names,
        is_public=is_public,
    )


def _get_cloud_space_permissions(
    confluence_client: OnyxConfluence, space_key: str
) -> ExternalAccess:
    space_permissions_result = confluence_client.get_space(
        space_key=space_key, expand="permissions"
    )
    space_permissions = space_permissions_result.get("permissions", [])

    user_emails = set()
    group_names = set()
    is_externally_public = False
    for permission in space_permissions:
        subs = permission.get("subjects")
        if subs:
            # If there are subjects, then there are explicit users or groups with access
            if email := subs.get("user", {}).get("results", [{}])[0].get("email"):
                user_emails.add(email)
            if group_name := subs.get("group", {}).get("results", [{}])[0].get("name"):
                group_names.add(group_name)
        else:
            # If there are no subjects, then the permission is for everyone
            if permission.get("operation", {}).get(
                "operation"
            ) == "read" and permission.get("anonymousAccess", False):
                # If the permission specifies read access for anonymous users, then
                # the space is publicly accessible
                is_externally_public = True

    return ExternalAccess(
        external_user_emails=user_emails,
        external_user_group_ids=group_names,
        is_public=is_externally_public,
    )


def get_space_permission(
    confluence_client: OnyxConfluence,
    space_key: str,
    is_cloud: bool,
    add_prefix: bool = False,
) -> ExternalAccess:
    if is_cloud:
        space_permissions = _get_cloud_space_permissions(confluence_client, space_key)
    else:
        space_permissions = _get_server_space_permissions(confluence_client, space_key)

    if (
        not space_permissions.is_public
        and not space_permissions.external_user_emails
        and not space_permissions.external_user_group_ids
    ):
        logger.warning(
            f"No permissions found for space '{space_key}'. This is very unlikely "
            "to be correct and is more likely caused by an access token with "
            "insufficient permissions. Make sure that the access token has Admin "
            f"permissions for space '{space_key}'"
        )

    # Prefix group IDs with source type if requested (for indexing path)
    if add_prefix and space_permissions.external_user_group_ids:
        prefixed_groups = {
            build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)
            for g in space_permissions.external_user_group_ids
        }
        return ExternalAccess(
            external_user_emails=space_permissions.external_user_emails,
            external_user_group_ids=prefixed_groups,
            is_public=space_permissions.is_public,
        )

    return space_permissions


def get_all_space_permissions(
    confluence_client: OnyxConfluence,
    is_cloud: bool,
    add_prefix: bool = False,
) -> dict[str, ExternalAccess]:
    """
    Get access permissions for all spaces in Confluence.

    add_prefix: When True, prefix group IDs with source type (for indexing path).
               When False (default), leave unprefixed (for permission sync path).
    """
    logger.debug("Getting space permissions")
    # Gets all the spaces in the Confluence instance
    all_space_keys = [
        key
        for space in confluence_client.retrieve_confluence_spaces(
            limit=REQUEST_PAGINATION_LIMIT,
        )
        if (key := space.get("key"))
    ]

    # Gets the permissions for each space
    logger.debug(f"Got {len(all_space_keys)} spaces from confluence")
    space_permissions_by_space_key: dict[str, ExternalAccess] = {}
    for space_key in all_space_keys:
        space_permissions = get_space_permission(
            confluence_client, space_key, is_cloud, add_prefix
        )

        # Stores the permissions for each space
        space_permissions_by_space_key[space_key] = space_permissions

    return space_permissions_by_space_key


================================================
FILE: backend/ee/onyx/external_permissions/github/doc_sync.py
================================================
import json
from collections.abc import Generator

from github import Github
from github.Repository import Repository

from ee.onyx.external_permissions.github.utils import fetch_repository_team_slugs
from ee.onyx.external_permissions.github.utils import form_collaborators_group_id
from ee.onyx.external_permissions.github.utils import form_organization_group_id
from ee.onyx.external_permissions.github.utils import (
    form_outside_collaborators_group_id,
)
from ee.onyx.external_permissions.github.utils import get_external_access_permission
from ee.onyx.external_permissions.github.utils import get_repository_visibility
from ee.onyx.external_permissions.github.utils import GitHubVisibility
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.github.connector import DocMetadata
from onyx.connectors.github.connector import GithubConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.utils import DocumentRow
from onyx.db.utils import SortOrder
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()

GITHUB_DOC_SYNC_LABEL = "github_doc_sync"


def github_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
    callback: IndexingHeartbeatInterface | None = None,
) -> Generator[DocExternalAccess, None, None]:
    """
    Sync GitHub documents with external access permissions.

    This function checks each repository for visibility/team changes and updates
    document permissions accordingly without using checkpoints.
    """
    logger.info(f"Starting GitHub document sync for CC pair ID: {cc_pair.id}")

    # Initialize GitHub connector with credentials
    github_connector: GithubConnector = GithubConnector(
        **cc_pair.connector.connector_specific_config
    )

    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    github_connector.load_credentials(credential_json)
    logger.info("GitHub connector credentials loaded successfully")

    if not github_connector.github_client:
        logger.error("GitHub client initialization failed")
        raise ValueError("github_client is required")

    # Get all repositories from GitHub API
    logger.info("Fetching all repositories from GitHub API")
    try:
        repos = github_connector.fetch_configured_repos()

        logger.info(f"Found {len(repos)} repositories to check")
    except Exception as e:
        logger.error(f"Failed to fetch repositories: {e}")
        raise

    repo_to_doc_list_map: dict[str, list[DocumentRow]] = {}
    # sort order is ascending because we want to get the oldest documents first
    existing_docs: list[DocumentRow] = fetch_all_existing_docs_fn(
        sort_order=SortOrder.ASC
    )
    logger.info(f"Found {len(existing_docs)} documents to check")
    for doc in existing_docs:
        try:
            doc_metadata = DocMetadata.model_validate_json(json.dumps(doc.doc_metadata))
            if doc_metadata.repo not in repo_to_doc_list_map:
                repo_to_doc_list_map[doc_metadata.repo] = []
            repo_to_doc_list_map[doc_metadata.repo].append(doc)
        except Exception as e:
            logger.error(f"Failed to parse doc metadata: {e} for doc {doc.id}")
            continue
    logger.info(f"Found {len(repo_to_doc_list_map)} documents to check")
    # Process each repository individually
    for repo in repos:
        try:
            logger.info(f"Processing repository: {repo.id} (name: {repo.name})")
            repo_doc_list: list[DocumentRow] = repo_to_doc_list_map.get(
                repo.full_name, []
            )
            if not repo_doc_list:
                logger.warning(
                    f"No documents found for repository {repo.id} ({repo.name})"
                )
                continue

            current_external_group_ids = repo_doc_list[0].external_user_group_ids or []
            # Check if repository has any permission changes
            has_changes = _check_repository_for_changes(
                repo=repo,
                github_client=github_connector.github_client,
                current_external_group_ids=current_external_group_ids,
            )

            if has_changes:
                logger.info(
                    f"Repository {repo.id} ({repo.name}) has changes, updating documents"
                )

                # Get new external access permissions for this repository
                new_external_access = get_external_access_permission(
                    repo, github_connector.github_client
                )

                logger.info(
                    f"Found {len(repo_doc_list)} documents for repository {repo.full_name}"
                )

                # Yield updated external access for each document
                for doc in repo_doc_list:
                    if callback:
                        callback.progress(GITHUB_DOC_SYNC_LABEL, 1)

                    yield DocExternalAccess(
                        doc_id=doc.id,
                        external_access=new_external_access,
                    )
            else:
                logger.info(
                    f"Repository {repo.id} ({repo.name}) has no changes, skipping"
                )
        except Exception as e:
            logger.error(f"Error processing repository {repo.id} ({repo.name}): {e}")

    logger.info(f"GitHub document sync completed for CC pair ID: {cc_pair.id}")


def _check_repository_for_changes(
    repo: Repository,
    github_client: Github,
    current_external_group_ids: list[str],
) -> bool:
    """
    Check if repository has any permission changes (visibility or team updates).
    """
    logger.info(f"Checking repository {repo.id} ({repo.name}) for changes")

    # Check for repository visibility changes using the sample document data
    if _is_repo_visibility_changed_from_groups(
        repo=repo,
        current_external_group_ids=current_external_group_ids,
    ):
        logger.info(f"Repository {repo.id} ({repo.name}) has visibility changes")
        return True

    # Check for team membership changes if repository is private
    if get_repository_visibility(
        repo
    ) == GitHubVisibility.PRIVATE and _teams_updated_from_groups(
        repo=repo,
        github_client=github_client,
        current_external_group_ids=current_external_group_ids,
    ):
        logger.info(f"Repository {repo.id} ({repo.name}) has team changes")
        return True

    logger.info(f"Repository {repo.id} ({repo.name}) has no changes")
    return False


def _is_repo_visibility_changed_from_groups(
    repo: Repository,
    current_external_group_ids: list[str],
) -> bool:
    """
    Check if repository visibility has changed by analyzing existing external group IDs.

    Args:
        repo: GitHub repository object
        current_external_group_ids: List of external group IDs from existing document

    Returns:
        True if visibility has changed
    """
    current_repo_visibility = get_repository_visibility(repo)
    logger.info(f"Current repository visibility: {current_repo_visibility.value}")

    # Build expected group IDs for current visibility
    collaborators_group_id = build_ext_group_name_for_onyx(
        source=DocumentSource.GITHUB,
        ext_group_name=form_collaborators_group_id(repo.id),
    )

    org_group_id = None
    if repo.organization:
        org_group_id = build_ext_group_name_for_onyx(
            source=DocumentSource.GITHUB,
            ext_group_name=form_organization_group_id(repo.organization.id),
        )

    # Determine existing visibility from group IDs
    has_collaborators_group = collaborators_group_id in current_external_group_ids
    has_org_group = org_group_id and org_group_id in current_external_group_ids

    if has_collaborators_group:
        existing_repo_visibility = GitHubVisibility.PRIVATE
    elif has_org_group:
        existing_repo_visibility = GitHubVisibility.INTERNAL
    else:
        existing_repo_visibility = GitHubVisibility.PUBLIC

    logger.info(f"Inferred existing visibility: {existing_repo_visibility.value}")

    visibility_changed = existing_repo_visibility != current_repo_visibility
    if visibility_changed:
        logger.info(
            f"Visibility changed for repo {repo.id} ({repo.name}): "
            f"{existing_repo_visibility.value} -> {current_repo_visibility.value}"
        )

    return visibility_changed


def _teams_updated_from_groups(
    repo: Repository,
    github_client: Github,
    current_external_group_ids: list[str],
) -> bool:
    """
    Check if repository team memberships have changed using existing group IDs.
    """
    # Fetch current team slugs for the repository
    current_teams = fetch_repository_team_slugs(repo=repo, github_client=github_client)
    logger.info(
        f"Current teams for repository {repo.id} (name: {repo.name}): {current_teams}"
    )

    # Build group IDs to exclude from team comparison (non-team groups)
    collaborators_group_id = build_ext_group_name_for_onyx(
        source=DocumentSource.GITHUB,
        ext_group_name=form_collaborators_group_id(repo.id),
    )
    outside_collaborators_group_id = build_ext_group_name_for_onyx(
        source=DocumentSource.GITHUB,
        ext_group_name=form_outside_collaborators_group_id(repo.id),
    )
    non_team_group_ids = {collaborators_group_id, outside_collaborators_group_id}

    # Extract existing team IDs from current external group IDs
    existing_team_ids = set()
    for group_id in current_external_group_ids:
        # Skip all non-team groups, keep only team groups
        if group_id not in non_team_group_ids:
            existing_team_ids.add(group_id)

    # Note: existing_team_ids from DB are already prefixed (e.g., "github__team-slug")
    # but current_teams from API are raw team slugs, so we need to add the prefix
    current_team_ids = set()
    for team_slug in current_teams:
        team_group_id = build_ext_group_name_for_onyx(
            source=DocumentSource.GITHUB,
            ext_group_name=team_slug,
        )
        current_team_ids.add(team_group_id)

    logger.info(
        f"Existing team IDs: {existing_team_ids}, Current team IDs: {current_team_ids}"
    )

    # Compare actual team IDs to detect changes
    teams_changed = current_team_ids != existing_team_ids
    if teams_changed:
        logger.info(
            f"Team changes detected for repo {repo.id} (name: {repo.name}): "
            f"existing={existing_team_ids}, current={current_team_ids}"
        )

    return teams_changed


================================================
FILE: backend/ee/onyx/external_permissions/github/group_sync.py
================================================
from collections.abc import Generator

from github import Repository

from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.external_permissions.github.utils import get_external_user_group
from onyx.connectors.github.connector import GithubConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.logger import setup_logger

logger = setup_logger()


def github_group_sync(
    tenant_id: str,  # noqa: ARG001
    cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
    github_connector: GithubConnector = GithubConnector(
        **cc_pair.connector.connector_specific_config
    )
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    github_connector.load_credentials(credential_json)
    if not github_connector.github_client:
        raise ValueError("github_client is required")

    logger.info("Starting GitHub group sync...")
    repos: list[Repository.Repository] = []
    if github_connector.repositories:
        if "," in github_connector.repositories:
            # Multiple repositories specified
            repos = github_connector.get_github_repos(github_connector.github_client)
        else:
            # Single repository (backward compatibility)
            repos = [github_connector.get_github_repo(github_connector.github_client)]
    else:
        # All repositories
        repos = github_connector.get_all_repos(github_connector.github_client)

    for repo in repos:
        try:
            for external_group in get_external_user_group(
                repo, github_connector.github_client
            ):
                logger.info(f"External group: {external_group}")
                yield external_group
        except Exception as e:
            logger.error(f"Error processing repository {repo.id} ({repo.name}): {e}")


================================================
FILE: backend/ee/onyx/external_permissions/github/utils.py
================================================
from collections.abc import Callable
from enum import Enum
from typing import List
from typing import Optional
from typing import Tuple
from typing import TypeVar

from github import Github
from github import RateLimitExceededException
from github.GithubException import GithubException
from github.NamedUser import NamedUser
from github.Organization import Organization
from github.PaginatedList import PaginatedList
from github.Repository import Repository
from github.Team import Team
from pydantic import BaseModel

from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.github.rate_limit_utils import sleep_after_rate_limit_exception
from onyx.utils.logger import setup_logger

logger = setup_logger()


class GitHubVisibility(Enum):
    """GitHub repository visibility options."""

    PUBLIC = "public"
    PRIVATE = "private"
    INTERNAL = "internal"


MAX_RETRY_COUNT = 3

T = TypeVar("T")

# Higher-order function to wrap GitHub operations with retry and exception handling


def _run_with_retry(
    operation: Callable[[], T],
    description: str,
    github_client: Github,
    retry_count: int = 0,
) -> Optional[T]:
    """Execute a GitHub operation with retry on rate limit and exception handling."""
    logger.debug(f"Starting operation '{description}', attempt {retry_count + 1}")
    try:
        result = operation()
        logger.debug(f"Operation '{description}' completed successfully")
        return result
    except RateLimitExceededException:
        if retry_count < MAX_RETRY_COUNT:
            sleep_after_rate_limit_exception(github_client)
            logger.warning(
                f"Rate limit exceeded while {description}. Retrying... (attempt {retry_count + 1}/{MAX_RETRY_COUNT})"
            )
            return _run_with_retry(
                operation, description, github_client, retry_count + 1
            )
        else:
            error_msg = f"Max retries exceeded for {description}"
            logger.exception(error_msg)
            raise RuntimeError(error_msg)
    except GithubException as e:
        logger.warning(f"GitHub API error during {description}: {e}")
        return None
    except Exception as e:
        logger.exception(f"Unexpected error during {description}: {e}")
        return None


class UserInfo(BaseModel):
    """Represents a GitHub user with their basic information."""

    login: str
    name: Optional[str] = None
    email: Optional[str] = None


class TeamInfo(BaseModel):
    """Represents a GitHub team with its members."""

    name: str
    slug: str
    members: List[UserInfo]


def _fetch_organization_members(
    github_client: Github,
    org_name: str,
    retry_count: int = 0,  # noqa: ARG001
) -> List[UserInfo]:
    """Fetch all organization members including owners and regular members."""
    org_members: List[UserInfo] = []
    logger.info(f"Fetching organization members for {org_name}")

    org = _run_with_retry(
        lambda: github_client.get_organization(org_name),
        f"get organization {org_name}",
        github_client,
    )
    if not org:
        logger.error(f"Failed to fetch organization {org_name}")
        raise RuntimeError(f"Failed to fetch organization {org_name}")

    member_objs: PaginatedList[NamedUser] | list[NamedUser] = (
        _run_with_retry(
            lambda: org.get_members(filter_="all"),
            f"get members for organization {org_name}",
            github_client,
        )
        or []
    )

    for member in member_objs:
        user_info = UserInfo(login=member.login, name=member.name, email=member.email)
        org_members.append(user_info)

    logger.info(f"Fetched {len(org_members)} members for organization {org_name}")
    return org_members


def _fetch_repository_teams_detailed(
    repo: Repository,
    github_client: Github,
    retry_count: int = 0,  # noqa: ARG001
) -> List[TeamInfo]:
    """Fetch teams with access to the repository and their members."""
    teams_data: List[TeamInfo] = []
    logger.info(f"Fetching teams for repository {repo.full_name}")

    team_objs: PaginatedList[Team] | list[Team] = (
        _run_with_retry(
            lambda: repo.get_teams(),
            f"get teams for repository {repo.full_name}",
            github_client,
        )
        or []
    )

    for team in team_objs:
        logger.info(
            f"Processing team {team.name} (slug: {team.slug}) for repository {repo.full_name}"
        )

        members: PaginatedList[NamedUser] | list[NamedUser] = (
            _run_with_retry(
                lambda: team.get_members(),
                f"get members for team {team.name}",
                github_client,
            )
            or []
        )

        team_members = []
        for m in members:
            user_info = UserInfo(login=m.login, name=m.name, email=m.email)
            team_members.append(user_info)

        team_info = TeamInfo(name=team.name, slug=team.slug, members=team_members)
        teams_data.append(team_info)
        logger.info(f"Team {team.name} has {len(team_members)} members")

    logger.info(f"Fetched {len(teams_data)} teams for repository {repo.full_name}")
    return teams_data


def fetch_repository_team_slugs(
    repo: Repository,
    github_client: Github,
    retry_count: int = 0,  # noqa: ARG001
) -> List[str]:
    """Fetch team slugs with access to the repository."""
    logger.info(f"Fetching team slugs for repository {repo.full_name}")
    teams_data: List[str] = []

    team_objs: PaginatedList[Team] | list[Team] = (
        _run_with_retry(
            lambda: repo.get_teams(),
            f"get teams for repository {repo.full_name}",
            github_client,
        )
        or []
    )

    for team in team_objs:
        teams_data.append(team.slug)

    logger.info(f"Fetched {len(teams_data)} team slugs for repository {repo.full_name}")
    return teams_data


def _get_collaborators_and_outside_collaborators(
    github_client: Github,
    repo: Repository,
) -> Tuple[List[UserInfo], List[UserInfo]]:
    """Fetch and categorize collaborators into regular and outside collaborators."""
    collaborators: List[UserInfo] = []
    outside_collaborators: List[UserInfo] = []
    logger.info(f"Fetching collaborators for repository {repo.full_name}")

    repo_collaborators: PaginatedList[NamedUser] | list[NamedUser] = (
        _run_with_retry(
            lambda: repo.get_collaborators(),
            f"get collaborators for repository {repo.full_name}",
            github_client,
        )
        or []
    )

    for collaborator in repo_collaborators:
        is_outside = False

        # Check if collaborator is outside the organization
        if repo.organization:
            org: Organization | None = _run_with_retry(
                lambda: github_client.get_organization(repo.organization.login),
                f"get organization {repo.organization.login}",
                github_client,
            )

            if org is not None:
                org_obj = org
                membership = _run_with_retry(
                    lambda: org_obj.has_in_members(collaborator),
                    f"check membership for {collaborator.login} in org {org_obj.login}",
                    github_client,
                )
                is_outside = membership is not None and not membership

        info = UserInfo(
            login=collaborator.login, name=collaborator.name, email=collaborator.email
        )
        if repo.organization and is_outside:
            outside_collaborators.append(info)
        else:
            collaborators.append(info)

    logger.info(
        f"Categorized {len(collaborators)} regular and {len(outside_collaborators)} outside collaborators for {repo.full_name}"
    )
    return collaborators, outside_collaborators


def form_collaborators_group_id(repository_id: int) -> str:
    """Generate group ID for repository collaborators."""
    if not repository_id:
        logger.exception("Repository ID is required to generate collaborators group ID")
        raise ValueError("Repository ID must be set to generate group ID.")
    group_id = f"{repository_id}_collaborators"
    return group_id


def form_organization_group_id(organization_id: int) -> str:
    """Generate group ID for organization using organization ID."""
    if not organization_id:
        logger.exception(
            "Organization ID is required to generate organization group ID"
        )
        raise ValueError("Organization ID must be set to generate group ID.")
    group_id = f"{organization_id}_organization"
    return group_id


def form_outside_collaborators_group_id(repository_id: int) -> str:
    """Generate group ID for outside collaborators."""
    if not repository_id:
        logger.exception(
            "Repository ID is required to generate outside collaborators group ID"
        )
        raise ValueError("Repository ID must be set to generate group ID.")
    group_id = f"{repository_id}_outside_collaborators"
    return group_id


def get_repository_visibility(repo: Repository) -> GitHubVisibility:
    """
    Get the visibility of a repository.
    Returns GitHubVisibility enum member.
    """
    if hasattr(repo, "visibility"):
        visibility = repo.visibility
        logger.info(
            f"Repository {repo.full_name} visibility from attribute: {visibility}"
        )
        try:
            return GitHubVisibility(visibility)
        except ValueError:
            logger.warning(
                f"Unknown visibility '{visibility}' for repo {repo.full_name}, defaulting to private"
            )
            return GitHubVisibility.PRIVATE

    logger.info(f"Repository {repo.full_name} is private")
    return GitHubVisibility.PRIVATE


def get_external_access_permission(
    repo: Repository, github_client: Github, add_prefix: bool = False
) -> ExternalAccess:
    """
    Get the external access permission for a repository.
    Uses group-based permissions for efficiency and scalability.

    add_prefix: When this method is called during the initial permission sync via the connector,
                the group ID isn't prefixed with the source while inserting the document record.
                So in that case, set add_prefix to True, allowing the method itself to handle
                prefixing. However, when the same method is invoked from doc_sync, our system
                already adds the prefix to the group ID while processing the ExternalAccess object.
    """
    # We maintain collaborators, and outside collaborators as two separate groups
    # instead of adding individual user emails to ExternalAccess.external_user_emails for two reasons:
    # 1. Changes in repo collaborators (additions/removals) would require updating all documents.
    # 2. Repo permissions can change without updating the repo's updated_at timestamp,
    #    forcing full permission syncs for all documents every time, which is inefficient.

    repo_visibility = get_repository_visibility(repo)
    logger.info(
        f"Generating ExternalAccess for {repo.full_name}: visibility={repo_visibility.value}"
    )

    if repo_visibility == GitHubVisibility.PUBLIC:
        logger.info(
            f"Repository {repo.full_name} is public - allowing access to all users"
        )
        return ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=True,
        )
    elif repo_visibility == GitHubVisibility.PRIVATE:
        logger.info(
            f"Repository {repo.full_name} is private - setting up restricted access"
        )

        collaborators_group_id = form_collaborators_group_id(repo.id)
        outside_collaborators_group_id = form_outside_collaborators_group_id(repo.id)
        if add_prefix:
            collaborators_group_id = build_ext_group_name_for_onyx(
                source=DocumentSource.GITHUB,
                ext_group_name=collaborators_group_id,
            )
            outside_collaborators_group_id = build_ext_group_name_for_onyx(
                source=DocumentSource.GITHUB,
                ext_group_name=outside_collaborators_group_id,
            )
        group_ids = {collaborators_group_id, outside_collaborators_group_id}

        team_slugs = fetch_repository_team_slugs(repo, github_client)
        if add_prefix:
            team_slugs = [
                build_ext_group_name_for_onyx(
                    source=DocumentSource.GITHUB,
                    ext_group_name=slug,
                )
                for slug in team_slugs
            ]
        group_ids.update(team_slugs)

        logger.info(f"ExternalAccess groups for {repo.full_name}: {group_ids}")
        return ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=group_ids,
            is_public=False,
        )
    else:
        # Internal repositories - accessible to organization members
        logger.info(
            f"Repository {repo.full_name} is internal - accessible to org members"
        )
        org_group_id = form_organization_group_id(repo.organization.id)
        if add_prefix:
            org_group_id = build_ext_group_name_for_onyx(
                source=DocumentSource.GITHUB,
                ext_group_name=org_group_id,
            )
        group_ids = {org_group_id}
        logger.info(f"ExternalAccess groups for {repo.full_name}: {group_ids}")
        return ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=group_ids,
            is_public=False,
        )


def get_external_user_group(
    repo: Repository, github_client: Github
) -> list[ExternalUserGroup]:
    """
    Get the external user group for a repository.
    Creates ExternalUserGroup objects with actual user emails for each permission group.
    """
    repo_visibility = get_repository_visibility(repo)
    logger.info(
        f"Generating ExternalUserGroups for {repo.full_name}: visibility={repo_visibility.value}"
    )

    if repo_visibility == GitHubVisibility.PRIVATE:
        logger.info(f"Processing private repository {repo.full_name}")

        collaborators, outside_collaborators = (
            _get_collaborators_and_outside_collaborators(github_client, repo)
        )
        teams = _fetch_repository_teams_detailed(repo, github_client)
        external_user_groups = []

        user_emails = set()
        for collab in collaborators:
            if collab.email:
                user_emails.add(collab.email)
            else:
                logger.error(f"Collaborator {collab.login} has no email")

        if user_emails:
            collaborators_group = ExternalUserGroup(
                id=form_collaborators_group_id(repo.id),
                user_emails=list(user_emails),
            )
            external_user_groups.append(collaborators_group)
            logger.info(f"Created collaborators group with {len(user_emails)} emails")

        # Create group for outside collaborators
        user_emails = set()
        for collab in outside_collaborators:
            if collab.email:
                user_emails.add(collab.email)
            else:
                logger.error(f"Outside collaborator {collab.login} has no email")

        if user_emails:
            outside_collaborators_group = ExternalUserGroup(
                id=form_outside_collaborators_group_id(repo.id),
                user_emails=list(user_emails),
            )
            external_user_groups.append(outside_collaborators_group)
            logger.info(
                f"Created outside collaborators group with {len(user_emails)} emails"
            )

        # Create groups for teams
        for team in teams:
            user_emails = set()
            for member in team.members:
                if member.email:
                    user_emails.add(member.email)
                else:
                    logger.error(f"Team member {member.login} has no email")

            if user_emails:
                team_group = ExternalUserGroup(
                    id=team.slug,
                    user_emails=list(user_emails),
                )
                external_user_groups.append(team_group)
                logger.info(
                    f"Created team group {team.name} with {len(user_emails)} emails"
                )

        logger.info(
            f"Created {len(external_user_groups)} ExternalUserGroups for private repository {repo.full_name}"
        )
        return external_user_groups

    if repo_visibility == GitHubVisibility.INTERNAL:
        logger.info(f"Processing internal repository {repo.full_name}")

        org_group_id = form_organization_group_id(repo.organization.id)
        org_members = _fetch_organization_members(
            github_client, repo.organization.login
        )

        user_emails = set()
        for member in org_members:
            if member.email:
                user_emails.add(member.email)
            else:
                logger.error(f"Org member {member.login} has no email")

        org_group = ExternalUserGroup(
            id=org_group_id,
            user_emails=list(user_emails),
        )
        logger.info(
            f"Created organization group with {len(user_emails)} emails for internal repository {repo.full_name}"
        )
        return [org_group]

    logger.info(f"Repository {repo.full_name} is public - no user groups needed")
    return []


================================================
FILE: backend/ee/onyx/external_permissions/gmail/doc_sync.py
================================================
from collections.abc import Generator
from datetime import datetime
from datetime import timezone

from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.gmail.connector import GmailConnector
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.models import HierarchyNode
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _get_slim_doc_generator(
    cc_pair: ConnectorCredentialPair,
    gmail_connector: GmailConnector,
    callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
    current_time = datetime.now(timezone.utc)
    start_time = (
        cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc).timestamp()
        if cc_pair.last_time_perm_sync
        else 0.0
    )

    return gmail_connector.retrieve_all_slim_docs_perm_sync(
        start=start_time,
        end=current_time.timestamp(),
        callback=callback,
    )


def gmail_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
    callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
    """
    Adds the external permissions to the documents and hierarchy nodes in postgres.
    If the document doesn't already exist in postgres, we create
    it in postgres so that when it gets created later, the permissions are
    already populated.
    """
    gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config)
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    gmail_connector.load_credentials(credential_json)

    slim_doc_generator = _get_slim_doc_generator(
        cc_pair, gmail_connector, callback=callback
    )

    for slim_doc_batch in slim_doc_generator:
        for slim_doc in slim_doc_batch:
            if callback:
                if callback.should_stop():
                    raise RuntimeError("gmail_doc_sync: Stop signal detected")

                callback.progress("gmail_doc_sync", 1)

            if isinstance(slim_doc, HierarchyNode):
                # Yield hierarchy node permissions to be processed in outer layer
                if slim_doc.external_access:
                    yield NodeExternalAccess(
                        external_access=slim_doc.external_access,
                        raw_node_id=slim_doc.raw_node_id,
                        source=DocumentSource.GMAIL.value,
                    )
                continue
            if slim_doc.external_access is None:
                logger.warning(f"No permissions found for document {slim_doc.id}")
                continue

            yield DocExternalAccess(
                doc_id=slim_doc.id,
                external_access=slim_doc.external_access,
            )


================================================
FILE: backend/ee/onyx/external_permissions/google_drive/__init__.py
================================================


================================================
FILE: backend/ee/onyx/external_permissions/google_drive/doc_sync.py
================================================
from collections.abc import Generator
from datetime import datetime
from datetime import timezone

from ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission
from ee.onyx.external_permissions.google_drive.models import PermissionType
from ee.onyx.external_permissions.google_drive.permission_retrieval import (
    get_permissions_by_ids,
)
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import ExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_drive.models import GoogleDriveFileType
from onyx.connectors.google_utils.resources import GoogleDriveService
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.models import HierarchyNode
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _get_slim_doc_generator(
    cc_pair: ConnectorCredentialPair,
    google_drive_connector: GoogleDriveConnector,
    callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
    current_time = datetime.now(timezone.utc)
    start_time = (
        cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc).timestamp()
        if cc_pair.last_time_perm_sync
        else 0.0
    )

    return google_drive_connector.retrieve_all_slim_docs_perm_sync(
        start=start_time,
        end=current_time.timestamp(),
        callback=callback,
    )


def _merge_permissions_lists(
    permission_lists: list[list[GoogleDrivePermission]],
) -> list[GoogleDrivePermission]:
    """
    Merge a list of permission lists into a single list of permissions.
    """
    seen_permission_ids: set[str] = set()
    merged_permissions: list[GoogleDrivePermission] = []
    for permission_list in permission_lists:
        for permission in permission_list:
            if permission.id not in seen_permission_ids:
                merged_permissions.append(permission)
                seen_permission_ids.add(permission.id)

    return merged_permissions


def get_external_access_for_raw_gdrive_file(
    file: GoogleDriveFileType,
    company_domain: str,
    retriever_drive_service: GoogleDriveService | None,
    admin_drive_service: GoogleDriveService,
    fallback_user_email: str,
    add_prefix: bool = False,
) -> ExternalAccess:
    """
    Get the external access for a raw Google Drive file.

    Assumes the file we retrieved has EITHER `permissions` or `permission_ids`

    add_prefix: When this method is called during the initial indexing via the connector,
                set add_prefix to True so group IDs are prefixed with the source type.
                When invoked from doc_sync (permission sync), use the default (False)
                since upsert_document_external_perms handles prefixing.
    fallback_user_email: When we cannot retrieve any permission info for a file
                (e.g. externally-owned files where the API returns no permissions
                and permissions.list returns 403), fall back to granting access
                to this user. This is typically the impersonated org user whose
                drive contained the file.
    """
    doc_id = file.get("id")
    if not doc_id:
        raise ValueError("No doc_id found in file")

    permissions = file.get("permissions")
    permission_ids = file.get("permissionIds")
    drive_id = file.get("driveId")

    permissions_list: list[GoogleDrivePermission] = []
    if permissions:
        permissions_list = [
            GoogleDrivePermission.from_drive_permission(p) for p in permissions
        ]
    elif permission_ids:

        def _get_permissions(
            drive_service: GoogleDriveService,
        ) -> list[GoogleDrivePermission]:
            return get_permissions_by_ids(
                drive_service=drive_service,
                doc_id=doc_id,
                permission_ids=permission_ids,
            )

        permissions_list = _get_permissions(
            retriever_drive_service or admin_drive_service
        )
        if len(permissions_list) != len(permission_ids) and retriever_drive_service:
            logger.warning(
                f"Failed to get all permissions for file {doc_id} with retriever service, trying admin service"
            )
            backup_permissions_list = _get_permissions(admin_drive_service)
            permissions_list = _merge_permissions_lists(
                [permissions_list, backup_permissions_list]
            )

    # For externally-owned files, the Drive API may return no permissions
    # and permissions.list may return 403. In this case, fall back to
    # granting access to the user who found the file in their drive.
    # Note, even if other users also have access to this file,
    # they will not be granted access in Onyx.
    # We check permissions_list (the final result after all fetch attempts)
    # rather than the raw fields, because permission_ids may be present
    # but the actual fetch can still return empty due to a 403.
    if not permissions_list:
        logger.info(
            f"No permission info available for file {doc_id} "
            f"(likely owned by a user outside of your organization). "
            f"Falling back to granting access to retriever user: {fallback_user_email}"
        )
        return ExternalAccess(
            external_user_emails={fallback_user_email},
            external_user_group_ids=set(),
            is_public=False,
        )

    folder_ids_to_inherit_permissions_from: set[str] = set()
    user_emails: set[str] = set()
    group_emails: set[str] = set()
    public = False

    for permission in permissions_list:
        # if the permission is inherited, do not add it directly to the file
        # instead, add the folder ID as a group that has access to the file
        # we will then handle mapping that folder to the list of Onyx users
        # in the group sync job
        # NOTE: this doesn't handle the case where a folder initially has no
        # permissioning, but then later that folder is shared with a user or group.
        # We could fetch all ancestors of the file to get the list of folders that
        # might affect the permissions of the file, but this will get replaced with
        # an audit-log based approach in the future so not doing it now.
        if permission.inherited_from:
            folder_ids_to_inherit_permissions_from.add(permission.inherited_from)

        if permission.type == PermissionType.USER:
            if permission.email_address:
                user_emails.add(permission.email_address)
            else:
                logger.error(
                    f"Permission is type `user` but no email address is provided for document {doc_id}\n {permission}"
                )
        elif permission.type == PermissionType.GROUP:
            # groups are represented as email addresses within Drive
            if permission.email_address:
                group_emails.add(permission.email_address)
            else:
                logger.error(
                    f"Permission is type `group` but no email address is provided for document {doc_id}\n {permission}"
                )
        elif permission.type == PermissionType.DOMAIN and company_domain:
            if permission.domain == company_domain:
                public = True
            else:
                logger.warning(
                    f"Permission is type domain but does not match company domain:\n {permission}"
                )
        elif permission.type == PermissionType.ANYONE:
            public = True

    group_ids = (
        group_emails
        | folder_ids_to_inherit_permissions_from
        | ({drive_id} if drive_id is not None else set())
    )

    # Prefix group IDs with source type if requested (for indexing path)
    if add_prefix:
        group_ids = {
            build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)
            for group_id in group_ids
        }

    return ExternalAccess(
        external_user_emails=user_emails,
        external_user_group_ids=group_ids,
        is_public=public,
    )


def get_external_access_for_folder(
    folder: GoogleDriveFileType,
    google_domain: str,
    drive_service: GoogleDriveService,
    add_prefix: bool = False,
) -> ExternalAccess:
    """
    Extract ExternalAccess from a folder's permissions.

    This fetches permissions using the Drive API (via permissionIds) and extracts
    user emails, group emails, and public access status.

    Args:
        folder: The folder metadata from Google Drive API (must include permissionIds field)
        google_domain: The company's Google Workspace domain (e.g., "company.com")
        drive_service: Google Drive service for fetching permission details
        add_prefix: When True, prefix group IDs with source type (for indexing path).
                   When False (default), leave unprefixed (for permission sync path).

    Returns:
        ExternalAccess with extracted permission info
    """
    folder_id = folder.get("id")
    if not folder_id:
        logger.warning("Folder missing ID, returning empty permissions")
        return ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=False,
        )

    # Get permission IDs from folder metadata
    permission_ids = folder.get("permissionIds") or []
    if not permission_ids:
        logger.debug(f"No permissionIds found for folder {folder_id}")
        return ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=False,
        )

    # Fetch full permission objects using the permission IDs
    permissions_list = get_permissions_by_ids(
        drive_service=drive_service,
        doc_id=folder_id,
        permission_ids=permission_ids,
    )

    user_emails: set[str] = set()
    group_emails: set[str] = set()
    is_public = False

    for permission in permissions_list:
        if permission.type == PermissionType.USER:
            if permission.email_address:
                user_emails.add(permission.email_address)
            else:
                logger.warning(f"User permission without email for folder {folder_id}")
        elif permission.type == PermissionType.GROUP:
            # Groups are represented as email addresses in Google Drive
            if permission.email_address:
                group_emails.add(permission.email_address)
            else:
                logger.warning(f"Group permission without email for folder {folder_id}")
        elif permission.type == PermissionType.DOMAIN:
            # Domain permission - check if it matches company domain
            if permission.domain == google_domain:
                # Only public if discoverable (allowFileDiscovery is not False)
                # If allowFileDiscovery is False, it's "link only" access
                is_public = permission.allow_file_discovery is not False
            else:
                logger.debug(
                    f"Domain permission for {permission.domain} does not match "
                    f"company domain {google_domain} for folder {folder_id}"
                )
        elif permission.type == PermissionType.ANYONE:
            # Only public if discoverable (allowFileDiscovery is not False)
            # If allowFileDiscovery is False, it's "link only" access
            is_public = permission.allow_file_discovery is not False

    # Prefix group IDs with source type if requested (for indexing path)
    group_ids: set[str] = group_emails
    if add_prefix:
        group_ids = {
            build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)
            for group_id in group_emails
        }

    return ExternalAccess(
        external_user_emails=user_emails,
        external_user_group_ids=group_ids,
        is_public=is_public,
    )


def gdrive_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
    callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
    """
    Adds the external permissions to the documents and hierarchy nodes in postgres.
    If the document doesn't already exist in postgres, we create
    it in postgres so that when it gets created later, the permissions are
    already populated.
    """
    google_drive_connector = GoogleDriveConnector(
        **cc_pair.connector.connector_specific_config
    )
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    google_drive_connector.load_credentials(credential_json)

    slim_doc_generator = _get_slim_doc_generator(cc_pair, google_drive_connector)

    total_processed = 0
    for slim_doc_batch in slim_doc_generator:
        logger.info(f"Drive perm sync: Processing {len(slim_doc_batch)} documents")
        for slim_doc in slim_doc_batch:
            if callback:
                if callback.should_stop():
                    raise RuntimeError("gdrive_doc_sync: Stop signal detected")

                callback.progress("gdrive_doc_sync", 1)
            if isinstance(slim_doc, HierarchyNode):
                # Yield hierarchy node permissions to be processed in outer layer
                if slim_doc.external_access:
                    yield NodeExternalAccess(
                        external_access=slim_doc.external_access,
                        raw_node_id=slim_doc.raw_node_id,
                        source=DocumentSource.GOOGLE_DRIVE.value,
                    )
                continue
            if slim_doc.external_access is None:
                raise ValueError(
                    f"Drive perm sync: No external access for document {slim_doc.id}"
                )

            yield DocExternalAccess(
                external_access=slim_doc.external_access,
                doc_id=slim_doc.id,
            )
        total_processed += len(slim_doc_batch)
        logger.info(f"Drive perm sync: Processed {total_processed} total documents")


================================================
FILE: backend/ee/onyx/external_permissions/google_drive/folder_retrieval.py
================================================
from collections.abc import Iterator

from googleapiclient.discovery import Resource  # type: ignore

from ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission
from ee.onyx.external_permissions.google_drive.permission_retrieval import (
    get_permissions_by_ids,
)
from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
from onyx.connectors.google_drive.file_retrieval import generate_time_range_filter
from onyx.connectors.google_drive.models import GoogleDriveFileType
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Only include fields we need - folder ID and permissions
# IMPORTANT: must fetch permissionIds, since sometimes the drive API
# seems to miss permissions when requesting them directly
FOLDER_PERMISSION_FIELDS = "nextPageToken, files(id, name, permissionIds, permissions(id, emailAddress, type, domain, permissionDetails))"


def get_folder_permissions_by_ids(
    service: Resource,
    folder_id: str,
    permission_ids: list[str],
) -> list[GoogleDrivePermission]:
    """
    Retrieves permissions for a specific folder filtered by permission IDs.

    Args:
        service: The Google Drive service instance
        folder_id: The ID of the folder to fetch permissions for
        permission_ids: A list of permission IDs to filter by

    Returns:
        A list of permissions matching the provided permission IDs
    """
    return get_permissions_by_ids(
        drive_service=service,
        doc_id=folder_id,
        permission_ids=permission_ids,
    )


def get_modified_folders(
    service: Resource,
    start: SecondsSinceUnixEpoch | None = None,
    end: SecondsSinceUnixEpoch | None = None,
) -> Iterator[GoogleDriveFileType]:
    """
    Retrieves all folders that were modified within the specified time range.
    Only includes folder ID and permission information, not any contained files.

    Args:
        service: The Google Drive service instance
        start: The start time as seconds since Unix epoch (inclusive)
        end: The end time as seconds since Unix epoch (inclusive)

    Returns:
        An iterator yielding folder information including ID and permissions
    """
    # Build query for folders
    query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
    query += " and trashed = false"
    query += generate_time_range_filter(start, end)

    # Retrieve and yield folders
    for folder in execute_paginated_retrieval(
        retrieval_function=service.files().list,
        list_key="files",
        continue_on_404_or_403=True,
        corpora="allDrives",
        supportsAllDrives=True,
        includeItemsFromAllDrives=True,
        includePermissionsForView="published",
        fields=FOLDER_PERMISSION_FIELDS,
        q=query,
    ):
        yield folder


================================================
FILE: backend/ee/onyx/external_permissions/google_drive/group_sync.py
================================================
from collections.abc import Generator

from googleapiclient.errors import HttpError  # type: ignore
from pydantic import BaseModel

from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.external_permissions.google_drive.folder_retrieval import (
    get_folder_permissions_by_ids,
)
from ee.onyx.external_permissions.google_drive.folder_retrieval import (
    get_modified_folders,
)
from ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission
from ee.onyx.external_permissions.google_drive.models import PermissionType
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
from onyx.connectors.google_utils.resources import AdminService
from onyx.connectors.google_utils.resources import get_admin_service
from onyx.connectors.google_utils.resources import get_drive_service
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.logger import setup_logger

logger = setup_logger()


"""
Folder Permission Sync.

Each folder is treated as a group. Each file has all ancestor folders
as groups.
"""


class FolderInfo(BaseModel):
    id: str
    permissions: list[GoogleDrivePermission]


def _get_all_folders(
    google_drive_connector: GoogleDriveConnector, skip_folders_without_permissions: bool
) -> list[FolderInfo]:
    """Have to get all folders since the group syncing system assumes all groups
    are returned every time.

    TODO: tweak things so we can fetch deltas.
    """
    MAX_FAILED_PERCENTAGE = 0.5

    all_folders: list[FolderInfo] = []
    seen_folder_ids: set[str] = set()

    def _get_all_folders_for_user(
        google_drive_connector: GoogleDriveConnector,
        skip_folders_without_permissions: bool,
        user_email: str,
    ) -> None:
        """Helper to get folders for a specific user + update shared seen_folder_ids"""
        drive_service = get_drive_service(
            google_drive_connector.creds,
            user_email,
        )

        for folder in get_modified_folders(
            service=drive_service,
        ):
            folder_id = folder["id"]
            if folder_id in seen_folder_ids:
                logger.debug(f"Folder {folder_id} has already been seen. Skipping.")
                continue

            seen_folder_ids.add(folder_id)

            # Check if the folder has permission IDs but no permissions
            permission_ids = folder.get("permissionIds", [])
            raw_permissions = folder.get("permissions", [])

            if not raw_permissions and permission_ids:
                # Fetch permissions using the IDs
                permissions = get_folder_permissions_by_ids(
                    drive_service, folder_id, permission_ids
                )
            else:
                permissions = [
                    GoogleDrivePermission.from_drive_permission(permission)
                    for permission in raw_permissions
                ]

            # Don't include inherited permissions, those will be captured
            # by the folder/shared drive itself
            permissions = [
                permission
                for permission in permissions
                if permission.inherited_from is None
            ]

            if not permissions and skip_folders_without_permissions:
                logger.debug(f"Folder {folder_id} has no permissions. Skipping.")
                continue

            all_folders.append(
                FolderInfo(
                    id=folder_id,
                    permissions=permissions,
                )
            )

    failed_count = 0
    user_emails = google_drive_connector._get_all_user_emails()
    for user_email in user_emails:
        try:
            _get_all_folders_for_user(
                google_drive_connector, skip_folders_without_permissions, user_email
            )
        except Exception:
            logger.exception(f"Error getting folders for user {user_email}")
            failed_count += 1

            if failed_count > MAX_FAILED_PERCENTAGE * len(user_emails):
                raise RuntimeError("Too many failed folder fetches during group sync")

    return all_folders


def _drive_folder_to_onyx_group(
    folder: FolderInfo,
    group_email_to_member_emails_map: dict[str, list[str]],
) -> ExternalUserGroup:
    """
    Converts a folder into an Onyx group.
    """
    anyone_can_access = False
    folder_member_emails: set[str] = set()

    for permission in folder.permissions:
        if permission.type == PermissionType.USER:
            if permission.email_address is None:
                logger.warning(
                    f"User email is None for folder {folder.id} permission {permission}"
                )
                continue
            folder_member_emails.add(permission.email_address)
        elif permission.type == PermissionType.GROUP:
            if permission.email_address not in group_email_to_member_emails_map:
                logger.warning(
                    f"Group email {permission.email_address} for folder {folder.id} not found in group_email_to_member_emails_map"
                )
                continue
            folder_member_emails.update(
                group_email_to_member_emails_map[permission.email_address]
            )
        elif permission.type == PermissionType.ANYONE:
            anyone_can_access = True

    return ExternalUserGroup(
        id=folder.id,
        user_emails=list(folder_member_emails),
        gives_anyone_access=anyone_can_access,
    )


"""Individual Shared Drive / My Drive Permission Sync"""


def _get_drive_members(
    google_drive_connector: GoogleDriveConnector,
    admin_service: AdminService,
) -> dict[str, tuple[set[str], set[str]]]:
    """
    This builds a map of drive ids to their members (group and user emails).
    E.g. {
        "drive_id_1": ({"group_email_1"}, {"user_email_1", "user_email_2"}),
        "drive_id_2": ({"group_email_3"}, {"user_email_3"}),
    }
    """

    # fetches shared drives only
    drive_ids = google_drive_connector.get_all_drive_ids()

    drive_id_to_members_map: dict[str, tuple[set[str], set[str]]] = {}
    drive_service = get_drive_service(
        google_drive_connector.creds,
        google_drive_connector.primary_admin_email,
    )

    admin_user_info = (
        admin_service.users()
        .get(userKey=google_drive_connector.primary_admin_email)
        .execute()
    )
    is_admin = admin_user_info.get("isAdmin", False) or admin_user_info.get(
        "isDelegatedAdmin", False
    )

    for drive_id in drive_ids:
        group_emails: set[str] = set()
        user_emails: set[str] = set()

        try:
            for permission in execute_paginated_retrieval(
                drive_service.permissions().list,
                list_key="permissions",
                fileId=drive_id,
                fields="permissions(emailAddress, type),nextPageToken",
                supportsAllDrives=True,
                # can only set `useDomainAdminAccess` to true if the user
                # is an admin
                useDomainAdminAccess=is_admin,
            ):
                # NOTE: don't need to check for PermissionType.ANYONE since
                # you can't share a drive with the internet
                if permission["type"] == PermissionType.GROUP:
                    group_emails.add(permission["emailAddress"])
                elif permission["type"] == PermissionType.USER:
                    user_emails.add(permission["emailAddress"])
        except HttpError as e:
            if e.status_code == 404:
                logger.warning(
                    f"Error getting permissions for drive id {drive_id}. "
                    f"User '{google_drive_connector.primary_admin_email}' likely "
                    f"does not have access to this drive. Exception: {e}"
                )
            else:
                raise e

        drive_id_to_members_map[drive_id] = (group_emails, user_emails)
    return drive_id_to_members_map


def _drive_member_map_to_onyx_groups(
    drive_id_to_members_map: dict[str, tuple[set[str], set[str]]],
    group_email_to_member_emails_map: dict[str, list[str]],
) -> Generator[ExternalUserGroup, None, None]:
    """The `user_emails` for the Shared Drive should be all individuals in the
    Shared Drive + the union of all flattened group emails."""
    for drive_id, (group_emails, user_emails) in drive_id_to_members_map.items():
        drive_member_emails: set[str] = user_emails
        for group_email in group_emails:
            if group_email not in group_email_to_member_emails_map:
                logger.warning(
                    f"Group email {group_email} for drive {drive_id} not found in group_email_to_member_emails_map"
                )
                continue
            drive_member_emails.update(group_email_to_member_emails_map[group_email])
        yield ExternalUserGroup(
            id=drive_id,
            user_emails=list(drive_member_emails),
        )


def _get_all_google_groups(
    admin_service: AdminService,
    google_domain: str,
) -> set[str]:
    """
    This gets all the group emails.
    """
    group_emails: set[str] = set()
    for group in execute_paginated_retrieval(
        admin_service.groups().list,
        list_key="groups",
        domain=google_domain,
        fields="groups(email),nextPageToken",
    ):
        group_emails.add(group["email"])
    return group_emails


def _google_group_to_onyx_group(
    admin_service: AdminService,
    group_email: str,
) -> ExternalUserGroup:
    """
    This maps google group emails to their member emails.
    """
    group_member_emails: set[str] = set()
    for member in execute_paginated_retrieval(
        admin_service.members().list,
        list_key="members",
        groupKey=group_email,
        fields="members(email),nextPageToken",
    ):
        group_member_emails.add(member["email"])

    return ExternalUserGroup(
        id=group_email,
        user_emails=list(group_member_emails),
    )


def _map_group_email_to_member_emails(
    admin_service: AdminService,
    group_emails: set[str],
) -> dict[str, set[str]]:
    """
    This maps group emails to their member emails.
    """
    group_to_member_map: dict[str, set[str]] = {}
    for group_email in group_emails:
        group_member_emails: set[str] = set()
        for member in execute_paginated_retrieval(
            admin_service.members().list,
            list_key="members",
            groupKey=group_email,
            fields="members(email),nextPageToken",
        ):
            group_member_emails.add(member["email"])

        group_to_member_map[group_email] = group_member_emails
    return group_to_member_map


def _build_onyx_groups(
    drive_id_to_members_map: dict[str, tuple[set[str], set[str]]],
    group_email_to_member_emails_map: dict[str, set[str]],
    folder_info: list[FolderInfo],
) -> list[ExternalUserGroup]:
    onyx_groups: list[ExternalUserGroup] = []

    # Convert all drive member definitions to onyx groups
    # This is because having drive level access means you have
    # irrevocable access to all the files in the drive.
    for drive_id, (group_emails, user_emails) in drive_id_to_members_map.items():
        drive_member_emails: set[str] = user_emails
        for group_email in group_emails:
            if group_email not in group_email_to_member_emails_map:
                logger.warning(
                    f"Group email {group_email} for drive {drive_id} not found in group_email_to_member_emails_map"
                )
                continue
            drive_member_emails.update(group_email_to_member_emails_map[group_email])
        onyx_groups.append(
            ExternalUserGroup(
                id=drive_id,
                user_emails=list(drive_member_emails),
            )
        )

    # Convert all folder permissions to onyx groups
    for folder in folder_info:
        anyone_can_access = False
        folder_member_emails: set[str] = set()
        for permission in folder.permissions:
            if permission.type == PermissionType.USER:
                if permission.email_address is None:
                    logger.warning(
                        f"User email is None for folder {folder.id} permission {permission}"
                    )
                    continue
                folder_member_emails.add(permission.email_address)
            elif permission.type == PermissionType.GROUP:
                if permission.email_address not in group_email_to_member_emails_map:
                    logger.warning(
                        f"Group email {permission.email_address} for folder {folder.id} "
                        "not found in group_email_to_member_emails_map"
                    )
                    continue
                folder_member_emails.update(
                    group_email_to_member_emails_map[permission.email_address]
                )
            elif permission.type == PermissionType.ANYONE:
                anyone_can_access = True

        onyx_groups.append(
            ExternalUserGroup(
                id=folder.id,
                user_emails=list(folder_member_emails),
                gives_anyone_access=anyone_can_access,
            )
        )

    # Convert all group member definitions to onyx groups
    for group_email, member_emails in group_email_to_member_emails_map.items():
        onyx_groups.append(
            ExternalUserGroup(
                id=group_email,
                user_emails=list(member_emails),
            )
        )

    return onyx_groups


def gdrive_group_sync(
    tenant_id: str,  # noqa: ARG001
    cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
    # Initialize connector and build credential/service objects
    google_drive_connector = GoogleDriveConnector(
        **cc_pair.connector.connector_specific_config
    )
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    google_drive_connector.load_credentials(credential_json)
    admin_service = get_admin_service(
        google_drive_connector.creds, google_drive_connector.primary_admin_email
    )

    # Get all drive members
    drive_id_to_members_map = _get_drive_members(google_drive_connector, admin_service)

    # Get all group emails
    all_group_emails = _get_all_google_groups(
        admin_service, google_drive_connector.google_domain
    )

    # Each google group is an Onyx group, yield those
    group_email_to_member_emails_map: dict[str, list[str]] = {}
    for group_email in all_group_emails:
        onyx_group = _google_group_to_onyx_group(admin_service, group_email)
        group_email_to_member_emails_map[group_email] = onyx_group.user_emails
        yield onyx_group

    # Each drive is a group, yield those
    for onyx_group in _drive_member_map_to_onyx_groups(
        drive_id_to_members_map, group_email_to_member_emails_map
    ):
        yield onyx_group

    # Get all folder permissions
    folder_info = _get_all_folders(
        google_drive_connector=google_drive_connector,
        skip_folders_without_permissions=True,
    )
    for folder in folder_info:
        yield _drive_folder_to_onyx_group(folder, group_email_to_member_emails_map)


================================================
FILE: backend/ee/onyx/external_permissions/google_drive/models.py
================================================
from enum import Enum
from typing import Any

from pydantic import BaseModel


class PermissionType(str, Enum):
    USER = "user"
    GROUP = "group"
    DOMAIN = "domain"
    ANYONE = "anyone"


class GoogleDrivePermissionDetails(BaseModel):
    # this is "file", "member", etc.
    # different from the `type` field within `GoogleDrivePermission`
    # Sometimes can be not, although not sure why...
    permission_type: str | None
    # this is "reader", "writer", "owner", etc.
    role: str
    # this is the id of the parent permission
    inherited_from: str | None


class GoogleDrivePermission(BaseModel):
    id: str
    # groups are also represented as email addresses within Drive
    # will be None for domain/global permissions
    email_address: str | None
    type: PermissionType
    domain: str | None  # only applies to domain permissions
    permission_details: GoogleDrivePermissionDetails | None
    # Whether this permission makes the file discoverable in search
    # False means "anyone with the link" (not searchable/discoverable)
    # Only applicable for domain/anyone permission types
    allow_file_discovery: bool | None

    @classmethod
    def from_drive_permission(
        cls, drive_permission: dict[str, Any]
    ) -> "GoogleDrivePermission":
        # we seem to only get details for permissions that are inherited
        # we can get multiple details if a permission is inherited from multiple
        permission_details_list = drive_permission.get("permissionDetails", [])
        permission_details: dict[str, Any] | None = (
            permission_details_list[0] if permission_details_list else None
        )
        return cls(
            id=drive_permission["id"],
            email_address=drive_permission.get("emailAddress"),
            type=PermissionType(drive_permission["type"]),
            domain=drive_permission.get("domain"),
            allow_file_discovery=drive_permission.get("allowFileDiscovery"),
            permission_details=(
                GoogleDrivePermissionDetails(
                    permission_type=permission_details.get("type"),
                    role=permission_details.get("role", ""),
                    inherited_from=permission_details.get("inheritedFrom"),
                )
                if permission_details
                else None
            ),
        )

    @property
    def inherited_from(self) -> str | None:
        if self.permission_details:
            return self.permission_details.inherited_from
        return None


================================================
FILE: backend/ee/onyx/external_permissions/google_drive/permission_retrieval.py
================================================
from retry import retry

from ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
from onyx.connectors.google_utils.resources import GoogleDriveService
from onyx.utils.logger import setup_logger

logger = setup_logger()


@retry(tries=3, delay=2, backoff=2)
def get_permissions_by_ids(
    drive_service: GoogleDriveService,
    doc_id: str,
    permission_ids: list[str],
) -> list[GoogleDrivePermission]:
    """
    Fetches permissions for a document based on a list of permission IDs.

    Args:
        drive_service: The Google Drive service instance
        doc_id: The ID of the document to fetch permissions for
        permission_ids: A list of permission IDs to filter by

    Returns:
        A list of GoogleDrivePermission objects matching the provided permission IDs
    """
    if not permission_ids:
        return []

    # Create a set for faster lookup
    permission_id_set = set(permission_ids)

    # Fetch all permissions for the document
    fetched_permissions = execute_paginated_retrieval(
        retrieval_function=drive_service.permissions().list,
        list_key="permissions",
        fileId=doc_id,
        fields="permissions(id, emailAddress, type, domain, allowFileDiscovery, permissionDetails),nextPageToken",
        supportsAllDrives=True,
        continue_on_404_or_403=True,
    )

    # Filter permissions by ID and convert to GoogleDrivePermission objects
    filtered_permissions = []
    for permission in fetched_permissions:
        permission_id = permission.get("id")
        if permission_id in permission_id_set:
            google_drive_permission = GoogleDrivePermission.from_drive_permission(
                permission
            )
            filtered_permissions.append(google_drive_permission)

    # Log if we couldn't find all requested permission IDs
    if len(filtered_permissions) < len(permission_ids):
        missing_ids = permission_id_set - {p.id for p in filtered_permissions if p.id}
        logger.warning(
            f"Could not find all requested permission IDs for document {doc_id}. Missing IDs: {missing_ids}"
        )

    return filtered_permissions


================================================
FILE: backend/ee/onyx/external_permissions/jira/__init__.py
================================================


================================================
FILE: backend/ee/onyx/external_permissions/jira/doc_sync.py
================================================
from collections.abc import Generator

from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import ElementExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.jira.connector import JiraConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()

JIRA_DOC_SYNC_TAG = "jira_doc_sync"


def jira_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:
    jira_connector = JiraConnector(
        **cc_pair.connector.connector_specific_config,
    )
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    jira_connector.load_credentials(credential_json)

    yield from generic_doc_sync(
        cc_pair=cc_pair,
        fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,
        callback=callback,
        doc_source=DocumentSource.JIRA,
        slim_connector=jira_connector,
        label=JIRA_DOC_SYNC_TAG,
    )


================================================
FILE: backend/ee/onyx/external_permissions/jira/group_sync.py
================================================
from collections.abc import Generator
from typing import Any

from jira import JIRA
from jira.exceptions import JIRAError

from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.connectors.jira.utils import build_jira_client
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.logger import setup_logger

logger = setup_logger()

_ATLASSIAN_ACCOUNT_TYPE = "atlassian"
_GROUP_MEMBER_PAGE_SIZE = 50

# The GET /group/member endpoint was introduced in Jira 6.0.
# Jira versions older than 6.0 do not have group management REST APIs at all.
_MIN_JIRA_VERSION_FOR_GROUP_MEMBER = "6.0"


def _fetch_group_member_page(
    jira_client: JIRA,
    group_name: str,
    start_at: int,
) -> dict[str, Any]:
    """Fetch a single page from the non-deprecated GET /group/member endpoint.

    The old GET /group endpoint (used by jira_client.group_members()) is deprecated
    and decommissioned in Jira Server 10.3+. This uses the replacement endpoint
    directly via the library's internal _get_json helper, following the same pattern
    as enhanced_search_ids / bulk_fetch_issues in connector.py.

    There is an open PR to the library to switch to this endpoint since last year:
    https://github.com/pycontribs/jira/pull/2356
    so once it is merged and released, we can switch to using the library function.
    """
    try:
        return jira_client._get_json(
            "group/member",
            params={
                "groupname": group_name,
                "includeInactiveUsers": "false",
                "startAt": start_at,
                "maxResults": _GROUP_MEMBER_PAGE_SIZE,
            },
        )
    except JIRAError as e:
        if e.status_code == 404:
            raise RuntimeError(
                f"GET /group/member returned 404 for group '{group_name}'. "
                f"This endpoint requires Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}+. "
                f"If you are running a self-hosted Jira instance, please upgrade "
                f"to at least Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}."
            ) from e
        raise


def _get_group_member_emails(
    jira_client: JIRA,
    group_name: str,
) -> set[str]:
    """Get all member emails for a single Jira group.

    Uses the non-deprecated GET /group/member endpoint which returns full user
    objects including accountType, so we can filter out app/customer accounts
    without making separate user() calls.
    """
    emails: set[str] = set()
    start_at = 0

    while True:
        try:
            page = _fetch_group_member_page(jira_client, group_name, start_at)
        except Exception as e:
            logger.error(f"Error fetching members for group {group_name}: {e}")
            raise

        members: list[dict[str, Any]] = page.get("values", [])
        for member in members:
            account_type = member.get("accountType")
            # On Jira DC < 9.0, accountType is absent; include those users.
            # On Cloud / DC 9.0+, filter to real user accounts only.
            if account_type is not None and account_type != _ATLASSIAN_ACCOUNT_TYPE:
                continue

            email = member.get("emailAddress")
            if email:
                emails.add(email)
            else:
                logger.warning(
                    f"Atlassian user {member.get('accountId', 'unknown')} in group {group_name} has no visible email address"
                )

        if page.get("isLast", True) or not members:
            break
        start_at += len(members)

    return emails


def jira_group_sync(
    tenant_id: str,  # noqa: ARG001
    cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
    """Sync Jira groups and their members, yielding one group at a time.

    Streams group-by-group rather than accumulating all groups in memory.
    """
    jira_base_url = cc_pair.connector.connector_specific_config.get("jira_base_url", "")
    scoped_token = cc_pair.connector.connector_specific_config.get(
        "scoped_token", False
    )

    if not jira_base_url:
        raise ValueError("No jira_base_url found in connector config")

    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    jira_client = build_jira_client(
        credentials=credential_json,
        jira_base=jira_base_url,
        scoped_token=scoped_token,
    )

    group_names = jira_client.groups()
    if not group_names:
        raise ValueError(f"No groups found for cc_pair_id={cc_pair.id}")

    logger.info(f"Found {len(group_names)} groups in Jira")

    for group_name in group_names:
        if not group_name:
            continue

        member_emails = _get_group_member_emails(
            jira_client=jira_client,
            group_name=group_name,
        )
        if not member_emails:
            logger.debug(f"No members found for group {group_name}")
            continue

        logger.debug(f"Found {len(member_emails)} members for group {group_name}")
        yield ExternalUserGroup(
            id=group_name,
            user_emails=list(member_emails),
        )


================================================
FILE: backend/ee/onyx/external_permissions/jira/models.py
================================================
from typing import Any

from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic.alias_generators import to_camel


Holder = dict[str, Any]


class Permission(BaseModel):
    id: int
    permission: str
    holder: Holder | None


class User(BaseModel):
    account_id: str
    email_address: str
    display_name: str
    active: bool

    model_config = ConfigDict(
        alias_generator=to_camel,
    )


================================================
FILE: backend/ee/onyx/external_permissions/jira/page_access.py
================================================
from collections import defaultdict

from jira import JIRA
from jira.resources import PermissionScheme
from pydantic import ValidationError

from ee.onyx.external_permissions.jira.models import Holder
from ee.onyx.external_permissions.jira.models import Permission
from ee.onyx.external_permissions.jira.models import User
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.utils.logger import setup_logger

HolderMap = dict[str, list[Holder]]


logger = setup_logger()


def _get_role_id(holder: Holder) -> str | None:
    return holder.get("value") or holder.get("parameter")


def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:
    """
    A "Holder" in JIRA is a person / entity who "holds" the corresponding permission.
    It can have different types. They can be one of (but not limited to):
        - user (an explicitly whitelisted user)
        - projectRole (for project level "roles")
        - reporter (the reporter of an issue)

    A "Holder" usually has following structure:
        - `{ "type": "user", "value": "$USER_ID", "user": { .. }, .. }`
        - `{ "type": "projectRole", "value": "$PROJECT_ID", ..  }`

    When we fetch the PermissionSchema from JIRA, we retrieve a list of "Holder"s.
    The list of "Holder"s can have multiple "Holder"s of the same type in the list (e.g., you can have two `"type": "user"`s in
    there, each corresponding to a different user).
    This function constructs a map of "Holder" types to a list of the "Holder"s which contained that type.

    Returns:
        A dict from the "Holder" type to the actual "Holder" instance.

    Example:
        ```
        {
            "user": [
                { "type": "user", "value": "10000", "user": { .. }, .. },
                { "type": "user", "value": "10001", "user": { .. }, .. },
            ],
            "projectRole": [
                { "type": "projectRole", "value": "10010", ..  },
                { "type": "projectRole", "value": "10011", ..  },
            ],
            "applicationRole": [
                { "type": "applicationRole" },
            ],
            ..
        }
        ```
    """

    holder_map: defaultdict[str, list[Holder]] = defaultdict(list)

    for raw_perm in permissions:
        if not hasattr(raw_perm, "raw"):
            logger.warning(f"Expected a 'raw' field, but none was found: {raw_perm=}")
            continue

        permission = Permission(**raw_perm.raw)

        # We only care about ability to browse through projects + issues (not other permissions such as read/write).
        if permission.permission != "BROWSE_PROJECTS":
            continue

        # In order to associate this permission to some Atlassian entity, we need the "Holder".
        # If this doesn't exist, then we cannot associate this permission to anyone; just skip.
        if not permission.holder:
            logger.warning(
                f"Expected to find a permission holder, but none was found: {permission=}"
            )
            continue

        type = permission.holder.get("type")
        if not type:
            logger.warning(
                f"Expected to find the type of permission holder, but none was found: {permission=}"
            )
            continue

        holder_map[type].append(permission.holder)

    return holder_map


def _get_user_emails(user_holders: list[Holder]) -> list[str]:
    emails = []

    for user_holder in user_holders:
        if "user" not in user_holder:
            continue
        raw_user_dict = user_holder["user"]

        try:
            user_model = User.model_validate(raw_user_dict)
        except ValidationError:
            logger.error(
                "Expected to be able to serialize the raw-user-dict into an instance of `User`, but validation failed;"
                f"{raw_user_dict=}"
            )
            continue

        emails.append(user_model.email_address)

    return emails


def _get_user_emails_and_groups_from_project_roles(
    jira_client: JIRA,
    jira_project: str,
    project_role_holders: list[Holder],
) -> tuple[list[str], list[str]]:
    """
    Get user emails and group names from project roles.
    Returns a tuple of (emails, group_names).
    """
    # Get role IDs - Cloud uses "value", Data Center uses "parameter"
    role_ids = []
    for holder in project_role_holders:
        role_id = _get_role_id(holder)
        if role_id:
            role_ids.append(role_id)
        else:
            logger.warning(f"No value or parameter in projectRole holder: {holder}")

    roles = [
        jira_client.project_role(project=jira_project, id=role_id)
        for role_id in role_ids
    ]

    emails = []
    groups = []

    for role in roles:
        if not hasattr(role, "actors"):
            logger.warning(f"Project role {role} has no actors attribute")
            continue

        for actor in role.actors:
            # Handle group actors
            if hasattr(actor, "actorGroup"):
                group_name = getattr(actor.actorGroup, "name", None) or getattr(
                    actor.actorGroup, "displayName", None
                )
                if group_name:
                    groups.append(group_name)
                continue

            # Handle user actors
            if hasattr(actor, "actorUser"):
                account_id = getattr(actor.actorUser, "accountId", None)
                if not account_id:
                    logger.error(f"No accountId in actorUser: {actor.actorUser}")
                    continue

                user = jira_client.user(id=account_id)
                if not hasattr(user, "accountType") or user.accountType != "atlassian":
                    logger.info(
                        f"Skipping user {account_id} because it is not an atlassian user"
                    )
                    continue

                if not hasattr(user, "emailAddress"):
                    msg = f"User's email address was not able to be retrieved;  {actor.actorUser.accountId=}"
                    if hasattr(user, "displayName"):
                        msg += f" {actor.displayName=}"
                    logger.warning(msg)
                    continue

                emails.append(user.emailAddress)
                continue

            logger.debug(f"Skipping actor type: {actor}")

    return emails, groups


def _build_external_access_from_holder_map(
    jira_client: JIRA, jira_project: str, holder_map: HolderMap
) -> ExternalAccess:
    """
    Build ExternalAccess from the holder map.

    Holder types handled:
        - "anyone": Public project, anyone can access
        - "applicationRole": All users with a Jira license can access (treated as public)
        - "user": Specific users with access
        - "projectRole": Project roles containing users and/or groups
        - "group": Groups directly assigned in the permission scheme
    """
    # Public access - anyone can view
    if "anyone" in holder_map:
        return ExternalAccess(
            external_user_emails=set(), external_user_group_ids=set(), is_public=True
        )

    # applicationRole means all users with a Jira license can access - treat as public
    if "applicationRole" in holder_map:
        return ExternalAccess(
            external_user_emails=set(), external_user_group_ids=set(), is_public=True
        )

    # Get emails from explicit user holders
    user_emails = (
        _get_user_emails(user_holders=holder_map["user"])
        if "user" in holder_map
        else []
    )

    # Get emails and groups from project roles
    project_role_user_emails: list[str] = []
    project_role_groups: list[str] = []
    if "projectRole" in holder_map:
        project_role_user_emails, project_role_groups = (
            _get_user_emails_and_groups_from_project_roles(
                jira_client=jira_client,
                jira_project=jira_project,
                project_role_holders=holder_map["projectRole"],
            )
        )

    # Get groups directly assigned in permission scheme (common in Data Center)
    # Format: {'type': 'group', 'parameter': 'group-name', 'expand': 'group'}
    direct_groups: list[str] = []
    if "group" in holder_map:
        for group_holder in holder_map["group"]:
            group_name = _get_role_id(group_holder)
            if group_name:
                direct_groups.append(group_name)
            else:
                logger.error(f"No parameter/value in group holder: {group_holder}")

    external_user_emails = set(user_emails + project_role_user_emails)
    external_user_group_ids = set(project_role_groups + direct_groups)

    return ExternalAccess(
        external_user_emails=external_user_emails,
        external_user_group_ids=external_user_group_ids,
        is_public=False,
    )


def get_project_permissions(
    jira_client: JIRA,
    jira_project: str,
    add_prefix: bool = False,
) -> ExternalAccess | None:
    """
    Get project permissions from Jira.

    add_prefix: When True, prefix group IDs with source type (for indexing path).
               When False (default), leave unprefixed (for permission sync path).
    """
    project_permissions: PermissionScheme = jira_client.project_permissionscheme(
        project=jira_project
    )

    if not hasattr(project_permissions, "permissions"):
        logger.error(f"Project {jira_project} has no permissions attribute")
        return None

    if not isinstance(project_permissions.permissions, list):
        logger.error(f"Project {jira_project} permissions is not a list")
        return None

    holder_map = _build_holder_map(permissions=project_permissions.permissions)

    external_access = _build_external_access_from_holder_map(
        jira_client=jira_client, jira_project=jira_project, holder_map=holder_map
    )

    # Prefix group IDs with source type if requested (for indexing path)
    if add_prefix and external_access and external_access.external_user_group_ids:
        prefixed_groups = {
            build_ext_group_name_for_onyx(g, DocumentSource.JIRA)
            for g in external_access.external_user_group_ids
        }
        return ExternalAccess(
            external_user_emails=external_access.external_user_emails,
            external_user_group_ids=prefixed_groups,
            is_public=external_access.is_public,
        )

    return external_access


================================================
FILE: backend/ee/onyx/external_permissions/perm_sync_types.py
================================================
from collections.abc import Callable
from collections.abc import Generator
from typing import Optional
from typing import Protocol

from ee.onyx.db.external_perm import ExternalUserGroup  # noqa
from onyx.access.models import DocExternalAccess  # noqa
from onyx.access.models import ElementExternalAccess  # noqa
from onyx.access.models import NodeExternalAccess  # noqa
from onyx.context.search.models import InferenceChunk
from onyx.db.models import ConnectorCredentialPair  # noqa
from onyx.db.utils import DocumentRow
from onyx.db.utils import SortOrder
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface  # noqa


class FetchAllDocumentsFunction(Protocol):
    """Protocol for a function that fetches documents for a connector credential pair.

    This protocol defines the interface for functions that retrieve documents
    from the database, typically used in permission synchronization workflows.
    """

    def __call__(
        self,
        sort_order: SortOrder | None,
    ) -> list[DocumentRow]:
        """
        Fetches documents for a connector credential pair.
        """
        ...


class FetchAllDocumentsIdsFunction(Protocol):
    """Protocol for a function that fetches document IDs for a connector credential pair.

    This protocol defines the interface for functions that retrieve document IDs
    from the database, typically used in permission synchronization workflows.
    """

    def __call__(
        self,
    ) -> list[str]:
        """
        Fetches document IDs for a connector credential pair.
        """
        ...


# Defining the input/output types for the sync functions
DocSyncFuncType = Callable[
    [
        ConnectorCredentialPair,
        FetchAllDocumentsFunction,
        FetchAllDocumentsIdsFunction,
        Optional[IndexingHeartbeatInterface],
    ],
    Generator[ElementExternalAccess, None, None],
]

GroupSyncFuncType = Callable[
    [
        str,  # tenant_id
        ConnectorCredentialPair,  # cc_pair
    ],
    Generator[ExternalUserGroup, None, None],
]

# list of chunks to be censored and the user email. returns censored chunks
CensoringFuncType = Callable[[list[InferenceChunk], str], list[InferenceChunk]]


================================================
FILE: backend/ee/onyx/external_permissions/post_query_censoring.py
================================================
from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs
from ee.onyx.external_permissions.sync_params import get_all_censoring_enabled_sources
from ee.onyx.external_permissions.sync_params import get_source_perm_sync_config
from onyx.configs.constants import DocumentSource
from onyx.context.search.pipeline import InferenceChunk
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import User
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _get_all_censoring_enabled_sources() -> set[DocumentSource]:
    """
    Returns the set of sources that have censoring enabled.
    This is based on if the access_type is set to sync and the connector
    source has a censoring config.

    NOTE: This means if there is a source has a single cc_pair that is sync,
    all chunks for that source will be censored, even if the connector that
    indexed that chunk is not sync. This was done to avoid getting the cc_pair
    for every single chunk.
    """
    all_censoring_enabled_sources = get_all_censoring_enabled_sources()
    with get_session_with_current_tenant() as db_session:
        enabled_sync_connectors = get_all_auto_sync_cc_pairs(db_session)
        return {
            cc_pair.connector.source
            for cc_pair in enabled_sync_connectors
            if cc_pair.connector.source in all_censoring_enabled_sources
        }


# NOTE: This is only called if ee is enabled.
def _post_query_chunk_censoring(
    chunks: list[InferenceChunk],
    user: User,
) -> list[InferenceChunk]:
    """
    This function checks all chunks to see if they need to be sent to a censoring
    function. If they do, it sends them to the censoring function and returns the
    censored chunks. If they don't, it returns the original chunks.
    """
    sources_to_censor = _get_all_censoring_enabled_sources()

    # Anonymous users can only access public (non-permission-synced) content
    if user.is_anonymous:
        return [chunk for chunk in chunks if chunk.source_type not in sources_to_censor]

    final_chunk_dict: dict[str, InferenceChunk] = {}
    chunks_to_process: dict[DocumentSource, list[InferenceChunk]] = {}
    for chunk in chunks:
        # Separate out chunks that require permission post-processing by source
        if chunk.source_type in sources_to_censor:
            chunks_to_process.setdefault(chunk.source_type, []).append(chunk)
        else:
            final_chunk_dict[chunk.unique_id] = chunk

    # For each source, filter out the chunks using the permission
    # check function for that source
    # TODO: Use a threadpool/multiprocessing to process the sources in parallel
    for source, chunks_for_source in chunks_to_process.items():
        sync_config = get_source_perm_sync_config(source)
        if sync_config is None or sync_config.censoring_config is None:
            raise ValueError(f"No sync config found for {source}")

        censor_chunks_for_source = sync_config.censoring_config.chunk_censoring_func
        try:
            censored_chunks = censor_chunks_for_source(chunks_for_source, user.email)
        except Exception as e:
            logger.exception(
                f"Failed to censor chunks for source {source} so throwing out all chunks for this source and continuing: {e}"
            )
            continue

        for censored_chunk in censored_chunks:
            final_chunk_dict[censored_chunk.unique_id] = censored_chunk

    # IMPORTANT: make sure to retain the same ordering as the original `chunks` passed in
    final_chunk_list: list[InferenceChunk] = []
    for chunk in chunks:
        # only if the chunk is in the final censored chunks, add it to the final list
        # if it is missing, that means it was intentionally left out
        if chunk.unique_id in final_chunk_dict:
            final_chunk_list.append(final_chunk_dict[chunk.unique_id])

    return final_chunk_list


================================================
FILE: backend/ee/onyx/external_permissions/salesforce/postprocessing.py
================================================
import time

from ee.onyx.db.external_perm import fetch_external_groups_for_user_email_and_group_ids
from ee.onyx.external_permissions.salesforce.utils import (
    get_any_salesforce_client_for_doc_id,
)
from ee.onyx.external_permissions.salesforce.utils import get_objects_access_for_user_id
from ee.onyx.external_permissions.salesforce.utils import (
    get_salesforce_user_id_from_email,
)
from onyx.configs.app_configs import BLURB_SIZE
from onyx.context.search.models import InferenceChunk
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.utils.logger import setup_logger

logger = setup_logger()


# Types
ChunkKey = tuple[str, int]  # (doc_id, chunk_id)
ContentRange = tuple[int, int | None]  # (start_index, end_index) None means to the end


# NOTE: Used for testing timing
def _get_dummy_object_access_map(
    object_ids: set[str],
    user_email: str,  # noqa: ARG001
    chunks: list[InferenceChunk],  # noqa: ARG001
) -> dict[str, bool]:
    time.sleep(0.15)
    # return {object_id: True for object_id in object_ids}
    import random

    return {object_id: random.choice([True, False]) for object_id in object_ids}


def _get_objects_access_for_user_email_from_salesforce(
    object_ids: set[str],
    user_email: str,
    chunks: list[InferenceChunk],
) -> dict[str, bool] | None:
    """
    This function wraps the salesforce call as we may want to change how this
    is done in the future. (E.g. replace it with the above function)
    """
    # This is cached in the function so the first query takes an extra 0.1-0.3 seconds
    # but subsequent queries for this source are essentially instant
    first_doc_id = chunks[0].document_id
    with get_session_with_current_tenant() as db_session:
        salesforce_client = get_any_salesforce_client_for_doc_id(
            db_session, first_doc_id
        )

    # This is cached in the function so the first query takes an extra 0.1-0.3 seconds
    # but subsequent queries by the same user are essentially instant
    start_time = time.monotonic()
    user_id = get_salesforce_user_id_from_email(salesforce_client, user_email)
    end_time = time.monotonic()
    logger.info(
        f"Time taken to get Salesforce user ID: {end_time - start_time} seconds"
    )
    if user_id is None:
        logger.warning(f"User '{user_email}' not found in Salesforce")
        return None

    # This is the only query that is not cached in the function
    # so it takes 0.1-0.2 seconds total
    object_id_to_access = get_objects_access_for_user_id(
        salesforce_client, user_id, list(object_ids)
    )
    logger.debug(f"Object ID to access: {object_id_to_access}")
    return object_id_to_access


def _extract_salesforce_object_id_from_url(url: str) -> str:
    return url.split("/")[-1]


def _get_object_ranges_for_chunk(
    chunk: InferenceChunk,
) -> dict[str, list[ContentRange]]:
    """
    Given a chunk, return a dictionary of salesforce object ids and the content ranges
    for that object id in the current chunk
    """
    if chunk.source_links is None:
        return {}

    object_ranges: dict[str, list[ContentRange]] = {}
    end_index = None
    descending_source_links = sorted(
        chunk.source_links.items(), key=lambda x: x[0], reverse=True
    )
    for start_index, url in descending_source_links:
        object_id = _extract_salesforce_object_id_from_url(url)
        if object_id not in object_ranges:
            object_ranges[object_id] = []
        object_ranges[object_id].append((start_index, end_index))
        end_index = start_index
    return object_ranges


def _create_empty_censored_chunk(uncensored_chunk: InferenceChunk) -> InferenceChunk:
    """
    Create a copy of the unfiltered chunk where potentially sensitive content is removed
    to be added later if the user has access to each of the sub-objects
    """
    empty_censored_chunk = InferenceChunk(
        **uncensored_chunk.model_dump(),
    )
    empty_censored_chunk.content = ""
    empty_censored_chunk.blurb = ""
    empty_censored_chunk.source_links = {}
    return empty_censored_chunk


def _update_censored_chunk(
    censored_chunk: InferenceChunk,
    uncensored_chunk: InferenceChunk,
    content_range: ContentRange,
) -> InferenceChunk:
    """
    Update the filtered chunk with the content and source links from the unfiltered chunk using the content ranges
    """
    start_index, end_index = content_range

    # Update the content of the filtered chunk
    permitted_content = uncensored_chunk.content[start_index:end_index]
    permitted_section_start_index = len(censored_chunk.content)
    censored_chunk.content = permitted_content + censored_chunk.content

    # Update the source links of the filtered chunk
    if uncensored_chunk.source_links is not None:
        if censored_chunk.source_links is None:
            censored_chunk.source_links = {}
        link_content = uncensored_chunk.source_links[start_index]
        censored_chunk.source_links[permitted_section_start_index] = link_content

    # Update the blurb of the filtered chunk
    censored_chunk.blurb = censored_chunk.content[:BLURB_SIZE]

    return censored_chunk


# TODO: Generalize this to other sources
def censor_salesforce_chunks(
    chunks: list[InferenceChunk],
    user_email: str,
    # This is so we can provide a mock access map for testing
    access_map: dict[str, bool] | None = None,
) -> list[InferenceChunk]:
    # object_id -> list[((doc_id, chunk_id), (start_index, end_index))]
    object_to_content_map: dict[str, list[tuple[ChunkKey, ContentRange]]] = {}

    # (doc_id, chunk_id) -> chunk
    uncensored_chunks: dict[ChunkKey, InferenceChunk] = {}

    # keep track of all object ids that we have seen to make it easier to get
    # the access for these object ids
    object_ids: set[str] = set()

    for chunk in chunks:
        chunk_key = (chunk.document_id, chunk.chunk_id)
        # create a dictionary to quickly look up the unfiltered chunk
        uncensored_chunks[chunk_key] = chunk

        # for each chunk, get a dictionary of object ids and the content ranges
        # for that object id in the current chunk
        object_ranges_for_chunk = _get_object_ranges_for_chunk(chunk)
        for object_id, ranges in object_ranges_for_chunk.items():
            object_ids.add(object_id)
            for start_index, end_index in ranges:
                object_to_content_map.setdefault(object_id, []).append(
                    (chunk_key, (start_index, end_index))
                )

    # This is so we can provide a mock access map for testing
    if access_map is None:
        access_map = _get_objects_access_for_user_email_from_salesforce(
            object_ids=object_ids,
            user_email=user_email,
            chunks=chunks,
        )
        if access_map is None:
            # If the user is not found in Salesforce, access_map will be None
            # so we should just return an empty list because no chunks will be
            # censored
            return []

    censored_chunks: dict[ChunkKey, InferenceChunk] = {}
    for object_id, content_list in object_to_content_map.items():
        # if the user does not have access to the object, or the object is not in the
        # access_map, do not include its content in the filtered chunks
        if not access_map.get(object_id, False):
            continue

        # if we got this far, the user has access to the object so we can create or update
        # the filtered chunk(s) for this object
        # NOTE: we only create a censored chunk if the user has access to some
        # part of the chunk
        for chunk_key, content_range in content_list:
            if chunk_key not in censored_chunks:
                censored_chunks[chunk_key] = _create_empty_censored_chunk(
                    uncensored_chunks[chunk_key]
                )

            uncensored_chunk = uncensored_chunks[chunk_key]
            censored_chunk = _update_censored_chunk(
                censored_chunk=censored_chunks[chunk_key],
                uncensored_chunk=uncensored_chunk,
                content_range=content_range,
            )
            censored_chunks[chunk_key] = censored_chunk

    return list(censored_chunks.values())


# NOTE: This is not used anywhere.
def _get_objects_access_for_user_email(
    object_ids: set[str], user_email: str
) -> dict[str, bool]:
    with get_session_with_current_tenant() as db_session:
        external_groups = fetch_external_groups_for_user_email_and_group_ids(
            db_session=db_session,
            user_email=user_email,
            # Maybe make a function that adds a salesforce prefix to the group ids
            group_ids=list(object_ids),
        )
        external_group_ids = {group.external_user_group_id for group in external_groups}
        return {group_id: group_id in external_group_ids for group_id in object_ids}


================================================
FILE: backend/ee/onyx/external_permissions/salesforce/utils.py
================================================
from simple_salesforce import Salesforce
from sqlalchemy.orm import Session

from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.document import get_cc_pairs_for_document
from onyx.utils.logger import setup_logger

logger = setup_logger()

_ANY_SALESFORCE_CLIENT: Salesforce | None = None


def get_any_salesforce_client_for_doc_id(
    db_session: Session, doc_id: str
) -> Salesforce:
    """
    We create a salesforce client for the first cc_pair for the first doc_id where
    salesforce censoring is enabled. After that we just cache and reuse the same
    client for all queries.

    We do this to reduce the number of postgres queries we make at query time.

    This may be problematic if they are using multiple cc_pairs for salesforce.
    E.g. there are 2 different credential sets for 2 different salesforce cc_pairs
    but only one has the permissions to access the permissions needed for the query.
    """

    # NOTE: this global seems very very bad
    global _ANY_SALESFORCE_CLIENT
    if _ANY_SALESFORCE_CLIENT is None:
        cc_pairs = get_cc_pairs_for_document(db_session, doc_id)
        first_cc_pair = cc_pairs[0]
        credential_json = (
            first_cc_pair.credential.credential_json.get_value(apply_mask=False)
            if first_cc_pair.credential.credential_json
            else {}
        )
        _ANY_SALESFORCE_CLIENT = Salesforce(
            username=credential_json["sf_username"],
            password=credential_json["sf_password"],
            security_token=credential_json["sf_security_token"],
        )
    return _ANY_SALESFORCE_CLIENT


def _query_salesforce_user_id(sf_client: Salesforce, user_email: str) -> str | None:
    query = f"SELECT Id FROM User WHERE Username = '{user_email}' AND IsActive = true"
    result = sf_client.query(query)
    if len(result["records"]) > 0:
        return result["records"][0]["Id"]

    # try emails
    query = f"SELECT Id FROM User WHERE Email = '{user_email}' AND IsActive = true"
    result = sf_client.query(query)
    if len(result["records"]) > 0:
        return result["records"][0]["Id"]

    return None


# This contains only the user_ids that we have found in Salesforce.
# If we don't know their user_id, we don't store anything in the cache.
_CACHED_SF_EMAIL_TO_ID_MAP: dict[str, str] = {}


def get_salesforce_user_id_from_email(
    sf_client: Salesforce,
    user_email: str,
) -> str | None:
    """
    We cache this so we don't have to query Salesforce for every query and salesforce
    user IDs never change.
    Memory usage is fine because we just store 2 small strings per user.

    If the email is not in the cache, we check the local salesforce database for the info.
    If the user is not found in the local salesforce database, we query Salesforce.
    Whatever we get back from Salesforce is added to the database.
    If no user_id is found, we add a NULL_ID_STRING to the database for that email so
    we don't query Salesforce again (which is slow) but we still check the local salesforce
    database every query until a user id is found. This is acceptable because the query time
    is quite fast.
    If a user_id is created in Salesforce, it will be added to the local salesforce database
    next time the connector is run. Then that value will be found in this function and cached.

    NOTE: First time this runs, it may be slow if it hasn't already been updated in the local
    salesforce database. (Around 0.1-0.3 seconds)
    If it's cached or stored in the local salesforce database, it's fast (<0.001 seconds).
    """

    # NOTE: this global seems bad
    global _CACHED_SF_EMAIL_TO_ID_MAP
    if user_email in _CACHED_SF_EMAIL_TO_ID_MAP:
        if _CACHED_SF_EMAIL_TO_ID_MAP[user_email] is not None:
            return _CACHED_SF_EMAIL_TO_ID_MAP[user_email]

    # some caching via sqlite existed here before ... check history if interested

    # ...query Salesforce and store the result in the database
    user_id = _query_salesforce_user_id(sf_client, user_email)

    if user_id is None:
        return None

    # If the found user_id is real, cache it
    _CACHED_SF_EMAIL_TO_ID_MAP[user_email] = user_id
    return user_id


_MAX_RECORD_IDS_PER_QUERY = 200


def get_objects_access_for_user_id(
    salesforce_client: Salesforce,
    user_id: str,
    record_ids: list[str],
) -> dict[str, bool]:
    """
    Salesforce has a limit of 200 record ids per query. So we just truncate
    the list of record ids to 200. We only ever retrieve 50 chunks at a time
    so this should be fine (unlikely that we retrieve all 50 chunks contain
    4 unique objects).
    If we decide this isn't acceptable we can use multiple queries but they
    should be in parallel so query time doesn't get too long.
    """
    truncated_record_ids = record_ids[:_MAX_RECORD_IDS_PER_QUERY]
    record_ids_str = "'" + "','".join(truncated_record_ids) + "'"
    access_query = f"""
    SELECT RecordId, HasReadAccess
    FROM UserRecordAccess
    WHERE RecordId IN ({record_ids_str})
    AND UserId = '{user_id}'
    """
    result = salesforce_client.query_all(access_query)
    return {record["RecordId"]: record["HasReadAccess"] for record in result["records"]}


_CC_PAIR_ID_SALESFORCE_CLIENT_MAP: dict[int, Salesforce] = {}
_DOC_ID_TO_CC_PAIR_ID_MAP: dict[str, int] = {}


# NOTE: This is not used anywhere.
def _get_salesforce_client_for_doc_id(db_session: Session, doc_id: str) -> Salesforce:
    """
    Uses a document id to get the cc_pair that indexed that document and uses the credentials
    for that cc_pair to create a Salesforce client.
    Problems:
    - There may be multiple cc_pairs for a document, and we don't know which one to use.
        - right now we just use the first one
    - Building a new Salesforce client for each document is slow.
    - Memory usage could be an issue as we build these dictionaries.
    """
    if doc_id not in _DOC_ID_TO_CC_PAIR_ID_MAP:
        cc_pairs = get_cc_pairs_for_document(db_session, doc_id)
        first_cc_pair = cc_pairs[0]
        _DOC_ID_TO_CC_PAIR_ID_MAP[doc_id] = first_cc_pair.id

    cc_pair_id = _DOC_ID_TO_CC_PAIR_ID_MAP[doc_id]
    if cc_pair_id not in _CC_PAIR_ID_SALESFORCE_CLIENT_MAP:
        cc_pair = get_connector_credential_pair_from_id(
            db_session=db_session,
            cc_pair_id=cc_pair_id,
        )
        if cc_pair is None:
            raise ValueError(f"CC pair {cc_pair_id} not found")
        credential_json = (
            cc_pair.credential.credential_json.get_value(apply_mask=False)
            if cc_pair.credential.credential_json
            else {}
        )
        _CC_PAIR_ID_SALESFORCE_CLIENT_MAP[cc_pair_id] = Salesforce(
            username=credential_json["sf_username"],
            password=credential_json["sf_password"],
            security_token=credential_json["sf_security_token"],
        )

    return _CC_PAIR_ID_SALESFORCE_CLIENT_MAP[cc_pair_id]


================================================
FILE: backend/ee/onyx/external_permissions/sharepoint/doc_sync.py
================================================
from collections.abc import Generator

from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import ElementExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()

SHAREPOINT_DOC_SYNC_TAG = "sharepoint_doc_sync"


def sharepoint_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:
    sharepoint_connector = SharepointConnector(
        **cc_pair.connector.connector_specific_config,
    )
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    sharepoint_connector.load_credentials(credential_json)

    yield from generic_doc_sync(
        cc_pair=cc_pair,
        fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,
        callback=callback,
        doc_source=DocumentSource.SHAREPOINT,
        slim_connector=sharepoint_connector,
        label=SHAREPOINT_DOC_SYNC_TAG,
    )


================================================
FILE: backend/ee/onyx/external_permissions/sharepoint/group_sync.py
================================================
from collections.abc import Generator

from office365.sharepoint.client_context import ClientContext  # type: ignore[import-untyped]

from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.external_permissions.sharepoint.permission_utils import (
    get_sharepoint_external_groups,
)
from onyx.configs.app_configs import SHAREPOINT_EXHAUSTIVE_AD_ENUMERATION
from onyx.connectors.sharepoint.connector import acquire_token_for_rest
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.logger import setup_logger

logger = setup_logger()


def sharepoint_group_sync(
    tenant_id: str,  # noqa: ARG001
    cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
    """Sync SharePoint groups and their members"""

    # Get site URLs from connector config
    connector_config = cc_pair.connector.connector_specific_config

    # Create SharePoint connector instance and load credentials
    connector = SharepointConnector(**connector_config)
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    connector.load_credentials(credential_json)

    if not connector.msal_app:
        raise RuntimeError("MSAL app not initialized in connector")

    if not connector.sp_tenant_domain:
        raise RuntimeError("Tenant domain not initialized in connector")

    # Get site descriptors from connector (either configured sites or all sites)
    site_descriptors = connector.site_descriptors or connector.fetch_sites()

    if not site_descriptors:
        raise RuntimeError("No SharePoint sites found for group sync")

    logger.info(f"Processing {len(site_descriptors)} sites for group sync")

    enumerate_all = connector_config.get(
        "exhaustive_ad_enumeration", SHAREPOINT_EXHAUSTIVE_AD_ENUMERATION
    )

    msal_app = connector.msal_app
    sp_tenant_domain = connector.sp_tenant_domain
    sp_domain_suffix = connector.sharepoint_domain_suffix
    for site_descriptor in site_descriptors:
        logger.debug(f"Processing site: {site_descriptor.url}")

        ctx = ClientContext(site_descriptor.url).with_access_token(
            lambda: acquire_token_for_rest(msal_app, sp_tenant_domain, sp_domain_suffix)
        )

        external_groups = get_sharepoint_external_groups(
            ctx,
            connector.graph_client,
            graph_api_base=connector.graph_api_base,
            get_access_token=connector._get_graph_access_token,
            enumerate_all_ad_groups=enumerate_all,
        )

        # Yield each group
        for group in external_groups:
            logger.debug(
                f"Found group: {group.id} with {len(group.user_emails)} members"
            )
            yield group


================================================
FILE: backend/ee/onyx/external_permissions/sharepoint/permission_utils.py
================================================
import re
import time
from collections import deque
from collections.abc import Callable
from collections.abc import Generator
from typing import Any
from urllib.parse import urlparse

import requests as _requests
from office365.graph_client import GraphClient  # type: ignore[import-untyped]
from office365.onedrive.driveitems.driveItem import DriveItem  # type: ignore[import-untyped]
from office365.runtime.client_request import ClientRequestException  # type: ignore
from office365.sharepoint.client_context import ClientContext  # type: ignore[import-untyped]
from office365.sharepoint.permissions.securable_object import RoleAssignmentCollection  # type: ignore[import-untyped]
from pydantic import BaseModel

from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS
from onyx.configs.constants import DocumentSource
from onyx.connectors.sharepoint.connector import GRAPH_API_MAX_RETRIES
from onyx.connectors.sharepoint.connector import GRAPH_API_RETRYABLE_STATUSES
from onyx.connectors.sharepoint.connector import SHARED_DOCUMENTS_MAP_REVERSE
from onyx.connectors.sharepoint.connector import sleep_and_retry
from onyx.utils.logger import setup_logger

logger = setup_logger()


# These values represent different types of SharePoint principals used in permission assignments
USER_PRINCIPAL_TYPE = 1  # Individual user accounts
ANONYMOUS_USER_PRINCIPAL_TYPE = 3  # Anonymous/unauthenticated users (public access)
AZURE_AD_GROUP_PRINCIPAL_TYPE = 4  # Azure Active Directory security groups
SHAREPOINT_GROUP_PRINCIPAL_TYPE = 8  # SharePoint site groups (local to the site)
MICROSOFT_DOMAIN = ".onmicrosoft"
# Limited Access role type, limited access is a travel through permission not a actual permission
LIMITED_ACCESS_ROLE_TYPES = [1, 9]
LIMITED_ACCESS_ROLE_NAMES = ["Limited Access", "Web-Only Limited Access"]


AD_GROUP_ENUMERATION_THRESHOLD = 100_000


def _graph_api_get(
    url: str,
    get_access_token: Callable[[], str],
    params: dict[str, str] | None = None,
) -> dict[str, Any]:
    """Authenticated Graph API GET with retry on transient errors."""
    for attempt in range(GRAPH_API_MAX_RETRIES + 1):
        access_token = get_access_token()
        headers = {"Authorization": f"Bearer {access_token}"}
        try:
            resp = _requests.get(
                url, headers=headers, params=params, timeout=REQUEST_TIMEOUT_SECONDS
            )
            if (
                resp.status_code in GRAPH_API_RETRYABLE_STATUSES
                and attempt < GRAPH_API_MAX_RETRIES
            ):
                wait = min(int(resp.headers.get("Retry-After", str(2**attempt))), 60)
                logger.warning(
                    f"Graph API {resp.status_code} on attempt {attempt + 1}, retrying in {wait}s: {url}"
                )
                time.sleep(wait)
                continue
            resp.raise_for_status()
            return resp.json()
        except (_requests.ConnectionError, _requests.Timeout, _requests.HTTPError):
            if attempt < GRAPH_API_MAX_RETRIES:
                wait = min(2**attempt, 60)
                logger.warning(
                    f"Graph API connection error on attempt {attempt + 1}, retrying in {wait}s: {url}"
                )
                time.sleep(wait)
                continue
            raise
    raise RuntimeError(
        f"Graph API request failed after {GRAPH_API_MAX_RETRIES + 1} attempts: {url}"
    )


def _iter_graph_collection(
    initial_url: str,
    get_access_token: Callable[[], str],
    params: dict[str, str] | None = None,
) -> Generator[dict[str, Any], None, None]:
    """Paginate through a Graph API collection, yielding items one at a time."""
    url: str | None = initial_url
    while url:
        data = _graph_api_get(url, get_access_token, params)
        params = None
        yield from data.get("value", [])
        url = data.get("@odata.nextLink")


def _normalize_email(email: str) -> str:
    if MICROSOFT_DOMAIN in email:
        return email.replace(MICROSOFT_DOMAIN, "")
    return email


class SharepointGroup(BaseModel):
    model_config = {"frozen": True}

    name: str
    login_name: str
    principal_type: int


class GroupsResult(BaseModel):
    groups_to_emails: dict[str, set[str]]
    found_public_group: bool


def _get_azuread_group_guid_by_name(
    graph_client: GraphClient, group_name: str
) -> str | None:
    try:
        # Search for groups by display name
        groups = sleep_and_retry(
            graph_client.groups.filter(f"displayName eq '{group_name}'").get(),
            "get_azuread_group_guid_by_name",
        )

        if groups and len(groups) > 0:
            return groups[0].id

        return None

    except Exception as e:
        logger.error(f"Failed to get Azure AD group GUID for name {group_name}: {e}")
        return None


def _extract_guid_from_claims_token(claims_token: str) -> str | None:

    try:
        # Pattern to match GUID in claims token
        # Claims tokens often have format: c:0o.c|provider|GUID_suffix
        guid_pattern = r"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"

        match = re.search(guid_pattern, claims_token, re.IGNORECASE)
        if match:
            return match.group(1)

        return None

    except Exception as e:
        logger.error(f"Failed to extract GUID from claims token {claims_token}: {e}")
        return None


def _get_group_guid_from_identifier(
    graph_client: GraphClient, identifier: str
) -> str | None:
    try:
        # Check if it's already a GUID
        guid_pattern = r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
        if re.match(guid_pattern, identifier, re.IGNORECASE):
            return identifier

        # Check if it's a SharePoint claims token
        if identifier.startswith("c:0") and "|" in identifier:
            guid = _extract_guid_from_claims_token(identifier)
            if guid:
                logger.info(f"Extracted GUID {guid} from claims token {identifier}")
                return guid

        # Try to search by display name as fallback
        return _get_azuread_group_guid_by_name(graph_client, identifier)

    except Exception as e:
        logger.error(f"Failed to get group GUID from identifier {identifier}: {e}")
        return None


def _get_security_group_owners(graph_client: GraphClient, group_id: str) -> list[str]:
    try:
        # Get group owners using Graph API
        group = graph_client.groups[group_id]
        owners = sleep_and_retry(
            group.owners.get_all(page_loaded=lambda _: None),
            "get_security_group_owners",
        )

        owner_emails: list[str] = []
        logger.info(f"Owners: {owners}")

        for owner in owners:
            owner_data = owner.to_json()

            # Extract email from the JSON data
            mail: str | None = owner_data.get("mail")
            user_principal_name: str | None = owner_data.get("userPrincipalName")

            # Check if owner is a user and has an email
            if mail:
                if MICROSOFT_DOMAIN in mail:
                    mail = mail.replace(MICROSOFT_DOMAIN, "")
                owner_emails.append(mail)
            elif user_principal_name:
                if MICROSOFT_DOMAIN in user_principal_name:
                    user_principal_name = user_principal_name.replace(
                        MICROSOFT_DOMAIN, ""
                    )
                owner_emails.append(user_principal_name)

        logger.info(
            f"Retrieved {len(owner_emails)} owners from security group {group_id}"
        )
        return owner_emails

    except Exception as e:
        logger.error(f"Failed to get security group owners for group {group_id}: {e}")
        return []


def _get_sharepoint_list_item_id(drive_item: DriveItem) -> str | None:

    try:
        # First try to get the list item directly from the drive item
        if hasattr(drive_item, "listItem"):
            list_item = drive_item.listItem
            if list_item:
                # Load the list item properties to get the ID
                sleep_and_retry(list_item.get(), "get_sharepoint_list_item_id")
                if hasattr(list_item, "id") and list_item.id:
                    return str(list_item.id)

        # The SharePoint list item ID is typically available in the sharepointIds property
        sharepoint_ids = getattr(drive_item, "sharepoint_ids", None)
        if sharepoint_ids and hasattr(sharepoint_ids, "listItemId"):
            return sharepoint_ids.listItemId

        # Alternative: try to get it from the properties
        properties = getattr(drive_item, "properties", None)
        if properties:
            # Sometimes the SharePoint list item ID is in the properties
            for prop_name, prop_value in properties.items():
                if "listitemid" in prop_name.lower():
                    return str(prop_value)

        return None
    except Exception as e:
        logger.error(
            f"Error getting SharePoint list item ID for item {drive_item.id}: {e}"
        )
        raise e


def _is_public_item(
    drive_item: DriveItem,
    treat_sharing_link_as_public: bool = False,
) -> bool:
    if not treat_sharing_link_as_public:
        return False

    try:
        permissions = sleep_and_retry(
            drive_item.permissions.get_all(page_loaded=lambda _: None), "is_public_item"
        )
        for permission in permissions:
            if permission.link and permission.link.scope in (
                "anonymous",
                "organization",
            ):
                return True
        return False
    except Exception as e:
        logger.error(f"Failed to check if item {drive_item.id} is public: {e}")
        return False


def _is_public_login_name(login_name: str) -> bool:
    # Patterns that indicate public access
    # This list is derived from the below link
    # https://learn.microsoft.com/en-us/answers/questions/2085339/guid-in-the-loginname-of-site-user-everyone-except
    public_login_patterns: list[str] = [
        "c:0-.f|rolemanager|spo-grid-all-users/",
        "c:0(.s|true",
    ]
    for pattern in public_login_patterns:
        if pattern in login_name:
            logger.info(f"Login name {login_name} is public")
            return True
    return False


# AD groups allows same display name for multiple groups, so we need to add the GUID to the name
def _get_group_name_with_suffix(
    login_name: str, group_name: str, graph_client: GraphClient
) -> str:
    ad_group_suffix = _get_group_guid_from_identifier(graph_client, login_name)
    return f"{group_name}_{ad_group_suffix}"


def _get_sharepoint_groups(
    client_context: ClientContext, group_name: str, graph_client: GraphClient
) -> tuple[set[SharepointGroup], set[str]]:

    groups: set[SharepointGroup] = set()
    user_emails: set[str] = set()

    def process_users(users: list[Any]) -> None:
        nonlocal groups, user_emails

        for user in users:
            logger.debug(f"User: {user.to_json()}")
            if user.principal_type == USER_PRINCIPAL_TYPE and hasattr(
                user, "user_principal_name"
            ):
                if user.user_principal_name:
                    email = user.user_principal_name
                    if MICROSOFT_DOMAIN in email:
                        email = email.replace(MICROSOFT_DOMAIN, "")
                    user_emails.add(email)
                else:
                    logger.warning(
                        f"User don't have a user principal name: {user.login_name}"
                    )
            elif user.principal_type in [
                AZURE_AD_GROUP_PRINCIPAL_TYPE,
                SHAREPOINT_GROUP_PRINCIPAL_TYPE,
            ]:
                name = user.title
                if user.principal_type == AZURE_AD_GROUP_PRINCIPAL_TYPE:
                    name = _get_group_name_with_suffix(
                        user.login_name, name, graph_client
                    )
                groups.add(
                    SharepointGroup(
                        login_name=user.login_name,
                        principal_type=user.principal_type,
                        name=name,
                    )
                )

    group = client_context.web.site_groups.get_by_name(group_name)
    sleep_and_retry(
        group.users.get_all(page_loaded=process_users), "get_sharepoint_groups"
    )

    return groups, user_emails


def _get_azuread_groups(
    graph_client: GraphClient, group_name: str
) -> tuple[set[SharepointGroup], set[str]]:

    group_id = _get_group_guid_from_identifier(graph_client, group_name)
    if not group_id:
        logger.error(f"Failed to get Azure AD group GUID for name {group_name}")
        return set(), set()
    group = graph_client.groups[group_id]
    groups: set[SharepointGroup] = set()
    user_emails: set[str] = set()

    def process_members(members: list[Any]) -> None:
        nonlocal groups, user_emails

        for member in members:
            member_data = member.to_json()
            logger.debug(f"Member: {member_data}")
            # Check for user-specific attributes
            user_principal_name = member_data.get("userPrincipalName")
            mail = member_data.get("mail")
            display_name = member_data.get("displayName") or member_data.get(
                "display_name"
            )

            # Check object attributes directly (if available)
            is_user = False
            is_group = False

            # Users typically have userPrincipalName or mail
            if user_principal_name or (mail and "@" in str(mail)):
                is_user = True
            # Groups typically have displayName but no userPrincipalName
            elif display_name and not user_principal_name:
                # Additional check: try to access group-specific properties
                if (
                    hasattr(member, "groupTypes")
                    or member_data.get("groupTypes") is not None
                ):
                    is_group = True
                # Or check if it has an 'id' field typical for groups
                elif member_data.get("id") and not user_principal_name:
                    is_group = True

            # Check the object type name (fallback)
            if not is_user and not is_group:
                obj_type = type(member).__name__.lower()
                if "user" in obj_type:
                    is_user = True
                elif "group" in obj_type:
                    is_group = True

            # Process based on identification
            if is_user:
                if user_principal_name:
                    email = user_principal_name
                    if MICROSOFT_DOMAIN in email:
                        email = email.replace(MICROSOFT_DOMAIN, "")
                    user_emails.add(email)
                elif mail:
                    email = mail
                    if MICROSOFT_DOMAIN in email:
                        email = email.replace(MICROSOFT_DOMAIN, "")
                    user_emails.add(email)
                logger.info(f"Added user: {user_principal_name or mail}")
            elif is_group:
                if not display_name:
                    logger.error(f"No display name for group: {member_data.get('id')}")
                    continue
                name = _get_group_name_with_suffix(
                    member_data.get("id", ""), display_name, graph_client
                )
                groups.add(
                    SharepointGroup(
                        login_name=member_data.get("id", ""),  # Use ID for groups
                        principal_type=AZURE_AD_GROUP_PRINCIPAL_TYPE,
                        name=name,
                    )
                )
                logger.info(f"Added group: {name}")
            else:
                # Log unidentified members for debugging
                logger.warning(f"Could not identify member type for: {member_data}")

    sleep_and_retry(
        group.members.get_all(page_loaded=process_members), "get_azuread_groups"
    )

    owner_emails = _get_security_group_owners(graph_client, group_id)
    user_emails.update(owner_emails)

    return groups, user_emails


def _get_groups_and_members_recursively(
    client_context: ClientContext,
    graph_client: GraphClient,
    groups: set[SharepointGroup],
    is_group_sync: bool = False,
) -> GroupsResult:
    """
    Get all groups and their members recursively.
    """
    group_queue: deque[SharepointGroup] = deque(groups)
    visited_groups: set[str] = set()
    visited_group_name_to_emails: dict[str, set[str]] = {}
    found_public_group = False
    while group_queue:
        group = group_queue.popleft()
        if group.login_name in visited_groups:
            continue
        visited_groups.add(group.login_name)
        visited_group_name_to_emails[group.name] = set()
        logger.info(
            f"Processing group: {group.name} principal type: {group.principal_type}"
        )
        if group.principal_type == SHAREPOINT_GROUP_PRINCIPAL_TYPE:
            group_info, user_emails = _get_sharepoint_groups(
                client_context, group.login_name, graph_client
            )
            visited_group_name_to_emails[group.name].update(user_emails)
            if group_info:
                group_queue.extend(group_info)
        if group.principal_type == AZURE_AD_GROUP_PRINCIPAL_TYPE:
            try:
                # if the site is public, we have default groups assigned to it, so we return early
                if _is_public_login_name(group.login_name):
                    found_public_group = True
                    if not is_group_sync:
                        return GroupsResult(
                            groups_to_emails={}, found_public_group=True
                        )
                    else:
                        # we don't want to sync public groups, so we skip them
                        continue
                group_info, user_emails = _get_azuread_groups(
                    graph_client, group.login_name
                )
                visited_group_name_to_emails[group.name].update(user_emails)
                if group_info:
                    group_queue.extend(group_info)
            except ClientRequestException as e:
                # If the group is not found, we skip it. There is a chance that group is still referenced
                # in sharepoint but it is removed from Azure AD. There is no actual documentation on this, but based on
                # our testing we have seen this happen.
                if e.response is not None and e.response.status_code == 404:
                    logger.warning(f"Group {group.login_name} not found")
                    continue
                raise e

    return GroupsResult(
        groups_to_emails=visited_group_name_to_emails,
        found_public_group=found_public_group,
    )


def get_external_access_from_sharepoint(
    client_context: ClientContext,
    graph_client: GraphClient,
    drive_name: str | None,
    drive_item: DriveItem | None,
    site_page: dict[str, Any] | None,
    add_prefix: bool = False,
    treat_sharing_link_as_public: bool = False,
) -> ExternalAccess:
    """
    Get external access information from SharePoint.
    """
    groups: set[SharepointGroup] = set()
    user_emails: set[str] = set()
    group_ids: set[str] = set()

    # Add all members to a processing set first
    def add_user_and_group_to_sets(
        role_assignments: RoleAssignmentCollection,
    ) -> None:
        nonlocal user_emails, groups
        for assignment in role_assignments:
            logger.debug(f"Assignment: {assignment.to_json()}")
            if assignment.role_definition_bindings:
                is_limited_access = True
                for role_definition_binding in assignment.role_definition_bindings:
                    if (
                        role_definition_binding.role_type_kind
                        not in LIMITED_ACCESS_ROLE_TYPES
                        or role_definition_binding.name not in LIMITED_ACCESS_ROLE_NAMES
                    ):
                        is_limited_access = False
                        break

                # Skip if the role is only Limited Access, because this is not a actual permission its a travel through permission
                if is_limited_access:
                    logger.info(
                        "Skipping assignment because it has only Limited Access role"
                    )
                    continue
            if assignment.member:
                member = assignment.member
                if member.principal_type == USER_PRINCIPAL_TYPE and hasattr(
                    member, "user_principal_name"
                ):
                    email = member.user_principal_name
                    if MICROSOFT_DOMAIN in email:
                        email = email.replace(MICROSOFT_DOMAIN, "")
                    user_emails.add(email)
                elif member.principal_type in [
                    AZURE_AD_GROUP_PRINCIPAL_TYPE,
                    SHAREPOINT_GROUP_PRINCIPAL_TYPE,
                ]:
                    name = member.title
                    if member.principal_type == AZURE_AD_GROUP_PRINCIPAL_TYPE:
                        name = _get_group_name_with_suffix(
                            member.login_name, name, graph_client
                        )
                    groups.add(
                        SharepointGroup(
                            login_name=member.login_name,
                            principal_type=member.principal_type,
                            name=name,
                        )
                    )

    if drive_item and drive_name:
        is_public = _is_public_item(drive_item, treat_sharing_link_as_public)
        if is_public:
            logger.info(f"Item {drive_item.id} is public")
            return ExternalAccess(
                external_user_emails=set(),
                external_user_group_ids=set(),
                is_public=True,
            )

        item_id = _get_sharepoint_list_item_id(drive_item)

        if not item_id:
            raise RuntimeError(
                f"Failed to get SharePoint list item ID for item {drive_item.id}"
            )

        if drive_name in SHARED_DOCUMENTS_MAP_REVERSE:
            drive_name = SHARED_DOCUMENTS_MAP_REVERSE[drive_name]

        item = client_context.web.lists.get_by_title(drive_name).items.get_by_id(
            item_id
        )

        sleep_and_retry(
            item.role_assignments.expand(["Member", "RoleDefinitionBindings"]).get_all(
                page_loaded=add_user_and_group_to_sets,
            ),
            "get_external_access_from_sharepoint",
        )
    elif site_page:
        site_url = site_page.get("webUrl")
        # Keep percent-encoding intact so the path matches the encoding
        # used by the Office365 library's SPResPath.create_relative(),
        # which compares against urlparse(context.base_url).path.
        # Decoding (e.g. %27 → ') causes a mismatch that duplicates
        # the site prefix in the constructed URL.
        server_relative_url = urlparse(site_url).path
        file_obj = client_context.web.get_file_by_server_relative_url(
            server_relative_url
        )
        item = file_obj.listItemAllFields

        sleep_and_retry(
            item.role_assignments.expand(["Member", "RoleDefinitionBindings"]).get_all(
                page_loaded=add_user_and_group_to_sets,
            ),
            "get_external_access_from_sharepoint",
        )
    else:
        raise RuntimeError("No drive item or site page provided")

    groups_and_members: GroupsResult = _get_groups_and_members_recursively(
        client_context, graph_client, groups
    )

    # If the site is public, w have default groups assigned to it, so we return early
    if groups_and_members.found_public_group:
        return ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=True,
        )

    for group_name, _ in groups_and_members.groups_to_emails.items():
        if add_prefix:
            group_name = build_ext_group_name_for_onyx(
                group_name, DocumentSource.SHAREPOINT
            )
        group_ids.add(group_name.lower())

    logger.info(f"User emails: {len(user_emails)}")
    logger.info(f"Group IDs: {len(group_ids)}")

    return ExternalAccess(
        external_user_emails=user_emails,
        external_user_group_ids=group_ids,
        is_public=False,
    )


def _enumerate_ad_groups_paginated(
    get_access_token: Callable[[], str],
    already_resolved: set[str],
    graph_api_base: str,
) -> Generator[ExternalUserGroup, None, None]:
    """Paginate through all Azure AD groups and yield ExternalUserGroup for each.

    Skips groups whose suffixed name is already in *already_resolved*.
    Stops early if the number of groups exceeds AD_GROUP_ENUMERATION_THRESHOLD.
    """
    groups_url = f"{graph_api_base}/groups"
    groups_params: dict[str, str] = {"$select": "id,displayName", "$top": "999"}
    total_groups = 0

    for group_json in _iter_graph_collection(
        groups_url, get_access_token, groups_params
    ):
        group_id: str = group_json.get("id", "")
        display_name: str = group_json.get("displayName", "")
        if not group_id or not display_name:
            continue

        total_groups += 1
        if total_groups > AD_GROUP_ENUMERATION_THRESHOLD:
            logger.warning(
                f"Azure AD group enumeration exceeded {AD_GROUP_ENUMERATION_THRESHOLD} "
                "groups — stopping to avoid excessive memory/API usage. "
                "Remaining groups will be resolved from role assignments only."
            )
            return

        name = f"{display_name}_{group_id}"
        if name in already_resolved:
            continue

        member_emails: list[str] = []
        members_url = f"{graph_api_base}/groups/{group_id}/members"
        members_params: dict[str, str] = {
            "$select": "userPrincipalName,mail",
            "$top": "999",
        }
        for member_json in _iter_graph_collection(
            members_url, get_access_token, members_params
        ):
            email = member_json.get("userPrincipalName") or member_json.get("mail")
            if email:
                member_emails.append(_normalize_email(email))

        yield ExternalUserGroup(id=name, user_emails=member_emails)

    logger.info(f"Enumerated {total_groups} Azure AD groups via paginated Graph API")


def get_sharepoint_external_groups(
    client_context: ClientContext,
    graph_client: GraphClient,
    graph_api_base: str,
    get_access_token: Callable[[], str] | None = None,
    enumerate_all_ad_groups: bool = False,
) -> list[ExternalUserGroup]:

    groups: set[SharepointGroup] = set()

    def add_group_to_sets(role_assignments: RoleAssignmentCollection) -> None:
        nonlocal groups
        for assignment in role_assignments:
            if assignment.role_definition_bindings:
                is_limited_access = True
                for role_definition_binding in assignment.role_definition_bindings:
                    if (
                        role_definition_binding.role_type_kind
                        not in LIMITED_ACCESS_ROLE_TYPES
                        or role_definition_binding.name not in LIMITED_ACCESS_ROLE_NAMES
                    ):
                        is_limited_access = False
                        break

                # Skip if the role assignment is only Limited Access, because this is not a actual permission its
                #  a travel through permission
                if is_limited_access:
                    logger.info(
                        "Skipping assignment because it has only Limited Access role"
                    )
                    continue
            if assignment.member:
                member = assignment.member
                if member.principal_type in [
                    AZURE_AD_GROUP_PRINCIPAL_TYPE,
                    SHAREPOINT_GROUP_PRINCIPAL_TYPE,
                ]:
                    name = member.title
                    if member.principal_type == AZURE_AD_GROUP_PRINCIPAL_TYPE:
                        name = _get_group_name_with_suffix(
                            member.login_name, name, graph_client
                        )

                    groups.add(
                        SharepointGroup(
                            login_name=member.login_name,
                            principal_type=member.principal_type,
                            name=name,
                        )
                    )

    sleep_and_retry(
        client_context.web.role_assignments.expand(
            ["Member", "RoleDefinitionBindings"]
        ).get_all(page_loaded=add_group_to_sets),
        "get_sharepoint_external_groups",
    )
    groups_and_members: GroupsResult = _get_groups_and_members_recursively(
        client_context, graph_client, groups, is_group_sync=True
    )

    external_user_groups: list[ExternalUserGroup] = [
        ExternalUserGroup(id=group_name, user_emails=list(emails))
        for group_name, emails in groups_and_members.groups_to_emails.items()
    ]

    if not enumerate_all_ad_groups or get_access_token is None:
        logger.info(
            "Skipping exhaustive Azure AD group enumeration. Only groups found in site role assignments are included."
        )
        return external_user_groups

    already_resolved = set(groups_and_members.groups_to_emails.keys())
    for group in _enumerate_ad_groups_paginated(
        get_access_token, already_resolved, graph_api_base
    ):
        external_user_groups.append(group)

    return external_user_groups


================================================
FILE: backend/ee/onyx/external_permissions/slack/channel_access.py
================================================
from slack_sdk import WebClient

from onyx.access.models import ExternalAccess
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.slack.connector import ChannelType
from onyx.connectors.slack.utils import expert_info_from_slack_id
from onyx.connectors.slack.utils import make_paginated_slack_api_call


def get_channel_access(
    client: WebClient,
    channel: ChannelType,
    user_cache: dict[str, BasicExpertInfo | None],
) -> ExternalAccess:
    """
    Get channel access permissions for a Slack channel.

    Args:
        client: Slack WebClient instance
        channel: Slack channel object containing channel info
        user_cache: Cache of user IDs to BasicExpertInfo objects. May be updated in place.

    Returns:
        ExternalAccess object for the channel.
    """
    channel_is_public = not channel["is_private"]
    if channel_is_public:
        return ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=True,
        )

    channel_id = channel["id"]

    # Get all member IDs for the channel
    member_ids = []
    for result in make_paginated_slack_api_call(
        client.conversations_members,
        channel=channel_id,
    ):
        member_ids.extend(result.get("members", []))

    member_emails = set()
    for member_id in member_ids:
        # Try to get user info from cache or fetch it
        user_info = expert_info_from_slack_id(
            user_id=member_id,
            client=client,
            user_cache=user_cache,
        )

        # If we have user info and an email, add it to the set
        if user_info and user_info.email:
            member_emails.add(user_info.email)

    return ExternalAccess(
        external_user_emails=member_emails,
        # NOTE: groups are not used, since adding a group to a channel just adds all
        # users that are in the group.
        external_user_group_ids=set(),
        is_public=False,
    )


================================================
FILE: backend/ee/onyx/external_permissions/slack/doc_sync.py
================================================
from collections.abc import Generator

from slack_sdk import WebClient

from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.slack.utils import fetch_user_id_to_email_map
from onyx.access.models import DocExternalAccess
from onyx.access.models import ExternalAccess
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import HierarchyNode
from onyx.connectors.slack.connector import get_channels
from onyx.connectors.slack.connector import make_paginated_slack_api_call
from onyx.connectors.slack.connector import SlackConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id


logger = setup_logger()


def _fetch_workspace_permissions(
    user_id_to_email_map: dict[str, str],
) -> ExternalAccess:
    user_emails = set()
    for email in user_id_to_email_map.values():
        user_emails.add(email)
    return ExternalAccess(
        external_user_emails=user_emails,
        # No group<->document mapping for slack
        external_user_group_ids=set(),
        # No way to determine if slack is invite only without enterprise license
        is_public=False,
    )


def _fetch_channel_permissions(
    slack_client: WebClient,
    workspace_permissions: ExternalAccess,
    user_id_to_email_map: dict[str, str],
) -> dict[str, ExternalAccess]:
    channel_permissions = {}
    public_channels = get_channels(
        client=slack_client,
        get_public=True,
        get_private=False,
    )
    public_channel_ids = [
        channel["id"] for channel in public_channels if "id" in channel
    ]
    for channel_id in public_channel_ids:
        channel_permissions[channel_id] = workspace_permissions

    private_channels = get_channels(
        client=slack_client,
        get_public=False,
        get_private=True,
    )
    private_channel_ids = [
        channel["id"] for channel in private_channels if "id" in channel
    ]

    for channel_id in private_channel_ids:
        # Collect all member ids for the channel pagination calls
        member_ids = []
        for result in make_paginated_slack_api_call(
            slack_client.conversations_members,
            channel=channel_id,
        ):
            member_ids.extend(result.get("members", []))

        # Collect all member emails for the channel
        member_emails = set()
        for member_id in member_ids:
            member_email = user_id_to_email_map.get(member_id)

            if not member_email:
                # If the user is an external user, they wont get returned from the
                # conversations_members call so we need to make a separate call to users_info
                # and add them to the user_id_to_email_map
                member_info = slack_client.users_info(user=member_id)
                member_email = member_info["user"]["profile"].get("email")
                if not member_email:
                    # If no email is found, we skip the user
                    continue
                user_id_to_email_map[member_id] = member_email

            member_emails.add(member_email)

        channel_permissions[channel_id] = ExternalAccess(
            external_user_emails=member_emails,
            # No group<->document mapping for slack
            external_user_group_ids=set(),
            # No way to determine if slack is invite only without enterprise license
            is_public=False,
        )

    return channel_permissions


def _get_slack_document_access(
    slack_connector: SlackConnector,
    channel_permissions: dict[str, ExternalAccess],  # noqa: ARG001
    callback: IndexingHeartbeatInterface | None,
    indexing_start: SecondsSinceUnixEpoch | None = None,
) -> Generator[DocExternalAccess, None, None]:
    slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
        callback=callback,
        start=indexing_start,
    )

    for doc_metadata_batch in slim_doc_generator:
        for doc_metadata in doc_metadata_batch:
            if isinstance(doc_metadata, HierarchyNode):
                # TODO: handle hierarchynodes during sync
                continue
            if doc_metadata.external_access is None:
                raise ValueError(
                    f"No external access for document {doc_metadata.id}. "
                    "Please check to make sure that your Slack bot token has the "
                    "`channels:read` scope"
                )

            yield DocExternalAccess(
                external_access=doc_metadata.external_access,
                doc_id=doc_metadata.id,
            )

        if callback:
            if callback.should_stop():
                raise RuntimeError("_get_slack_document_access: Stop signal detected")

            callback.progress("_get_slack_document_access", 1)


def slack_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
    callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
    """
    Adds the external permissions to the documents in postgres
    if the document doesn't already exists in postgres, we create
    it in postgres so that when it gets created later, the permissions are
    already populated
    """
    # Use credentials provider instead of directly loading credentials

    tenant_id = get_current_tenant_id()
    provider = OnyxDBCredentialsProvider(tenant_id, "slack", cc_pair.credential.id)
    r = get_redis_client(tenant_id=tenant_id)
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    slack_client = SlackConnector.make_slack_web_client(
        provider.get_provider_key(),
        credential_json["slack_bot_token"],
        SlackConnector.MAX_RETRIES,
        r,
    )

    user_id_to_email_map = fetch_user_id_to_email_map(slack_client)
    if not user_id_to_email_map:
        raise ValueError(
            "No user id to email map found. Please check to make sure that your Slack bot token has the `users:read.email` scope"
        )

    workspace_permissions = _fetch_workspace_permissions(
        user_id_to_email_map=user_id_to_email_map,
    )
    channel_permissions = _fetch_channel_permissions(
        slack_client=slack_client,
        workspace_permissions=workspace_permissions,
        user_id_to_email_map=user_id_to_email_map,
    )

    slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)
    slack_connector.set_credentials_provider(provider)
    indexing_start_ts: SecondsSinceUnixEpoch | None = (
        cc_pair.connector.indexing_start.timestamp()
        if cc_pair.connector.indexing_start is not None
        else None
    )

    yield from _get_slack_document_access(
        slack_connector=slack_connector,
        channel_permissions=channel_permissions,
        callback=callback,
        indexing_start=indexing_start_ts,
    )


================================================
FILE: backend/ee/onyx/external_permissions/slack/group_sync.py
================================================
"""
THIS IS NOT USEFUL OR USED FOR PERMISSION SYNCING
WHEN USERGROUPS ARE ADDED TO A CHANNEL, IT JUST RESOLVES ALL THE USERS TO THAT CHANNEL
SO WHEN CHECKING IF A USER CAN ACCESS A DOCUMENT, WE ONLY NEED TO CHECK THEIR EMAIL
THERE IS NO USERGROUP <-> DOCUMENT PERMISSION MAPPING
"""

from slack_sdk import WebClient

from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.external_permissions.slack.utils import fetch_user_id_to_email_map
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.connectors.slack.connector import SlackConnector
from onyx.connectors.slack.utils import make_paginated_slack_api_call
from onyx.db.models import ConnectorCredentialPair
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _get_slack_group_ids(
    slack_client: WebClient,
) -> list[str]:
    group_ids = []
    for result in make_paginated_slack_api_call(slack_client.usergroups_list):
        for group in result.get("usergroups", []):
            group_ids.append(group.get("id"))
    return group_ids


def _get_slack_group_members_email(
    slack_client: WebClient,
    group_name: str,
    user_id_to_email_map: dict[str, str],
) -> list[str]:
    group_member_emails = []
    for result in make_paginated_slack_api_call(
        slack_client.usergroups_users_list, usergroup=group_name
    ):
        for member_id in result.get("users", []):
            member_email = user_id_to_email_map.get(member_id)
            if not member_email:
                # If the user is an external user, they wont get returned from the
                # conversations_members call so we need to make a separate call to users_info
                member_info = slack_client.users_info(user=member_id)
                member_email = member_info["user"]["profile"].get("email")
                if not member_email:
                    # If no email is found, we skip the user
                    continue
                user_id_to_email_map[member_id] = member_email
            group_member_emails.append(member_email)

    return group_member_emails


def slack_group_sync(
    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
) -> list[ExternalUserGroup]:
    """NOTE: not used atm. All channel access is done at the
    individual user level. Leaving in for now in case we need it later."""

    provider = OnyxDBCredentialsProvider(tenant_id, "slack", cc_pair.credential.id)
    r = get_redis_client(tenant_id=tenant_id)
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    slack_client = SlackConnector.make_slack_web_client(
        provider.get_provider_key(),
        credential_json["slack_bot_token"],
        SlackConnector.MAX_RETRIES,
        r,
    )

    user_id_to_email_map = fetch_user_id_to_email_map(slack_client)

    onyx_groups: list[ExternalUserGroup] = []
    for group_name in _get_slack_group_ids(slack_client):
        group_member_emails = _get_slack_group_members_email(
            slack_client=slack_client,
            group_name=group_name,
            user_id_to_email_map=user_id_to_email_map,
        )
        if not group_member_emails:
            continue
        onyx_groups.append(
            ExternalUserGroup(id=group_name, user_emails=group_member_emails)
        )
    return onyx_groups


================================================
FILE: backend/ee/onyx/external_permissions/slack/utils.py
================================================
from slack_sdk import WebClient

from onyx.connectors.slack.utils import make_paginated_slack_api_call


def fetch_user_id_to_email_map(
    slack_client: WebClient,
) -> dict[str, str]:
    user_id_to_email_map = {}
    for user_info in make_paginated_slack_api_call(
        slack_client.users_list,
    ):
        for user in user_info.get("members", []):
            if user.get("profile", {}).get("email"):
                user_id_to_email_map[user.get("id")] = user.get("profile", {}).get(
                    "email"
                )
    return user_id_to_email_map


================================================
FILE: backend/ee/onyx/external_permissions/sync_params.py
================================================
from collections.abc import Generator
from typing import Optional
from typing import TYPE_CHECKING

from pydantic import BaseModel

from ee.onyx.configs.app_configs import CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import GITHUB_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import JIRA_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import JIRA_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import SLACK_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import TEAMS_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.external_permissions.confluence.doc_sync import confluence_doc_sync
from ee.onyx.external_permissions.confluence.group_sync import confluence_group_sync
from ee.onyx.external_permissions.github.doc_sync import github_doc_sync
from ee.onyx.external_permissions.github.group_sync import github_group_sync
from ee.onyx.external_permissions.gmail.doc_sync import gmail_doc_sync
from ee.onyx.external_permissions.google_drive.doc_sync import gdrive_doc_sync
from ee.onyx.external_permissions.google_drive.group_sync import gdrive_group_sync
from ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync
from ee.onyx.external_permissions.jira.group_sync import jira_group_sync
from ee.onyx.external_permissions.perm_sync_types import CensoringFuncType
from ee.onyx.external_permissions.perm_sync_types import DocSyncFuncType
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.perm_sync_types import GroupSyncFuncType
from ee.onyx.external_permissions.salesforce.postprocessing import (
    censor_salesforce_chunks,
)
from ee.onyx.external_permissions.sharepoint.doc_sync import sharepoint_doc_sync
from ee.onyx.external_permissions.sharepoint.group_sync import sharepoint_group_sync
from ee.onyx.external_permissions.slack.doc_sync import slack_doc_sync
from ee.onyx.external_permissions.teams.doc_sync import teams_doc_sync
from onyx.configs.constants import DocumentSource

if TYPE_CHECKING:
    from onyx.access.models import DocExternalAccess  # noqa
    from onyx.db.models import ConnectorCredentialPair  # noqa
    from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface  # noqa


class DocSyncConfig(BaseModel):
    doc_sync_frequency: int
    doc_sync_func: DocSyncFuncType
    initial_index_should_sync: bool


class GroupSyncConfig(BaseModel):
    group_sync_frequency: int
    group_sync_func: GroupSyncFuncType
    group_sync_is_cc_pair_agnostic: bool


class CensoringConfig(BaseModel):
    chunk_censoring_func: CensoringFuncType


class SyncConfig(BaseModel):
    # None means we don't perform a doc_sync
    doc_sync_config: DocSyncConfig | None = None
    # None means we don't perform a group_sync
    group_sync_config: GroupSyncConfig | None = None
    # None means we don't perform a chunk_censoring
    censoring_config: CensoringConfig | None = None


# Mock doc sync function for testing (no-op)
def mock_doc_sync(
    cc_pair: "ConnectorCredentialPair",  # noqa: ARG001
    fetch_all_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
    fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
    callback: Optional["IndexingHeartbeatInterface"],  # noqa: ARG001
) -> Generator["DocExternalAccess", None, None]:
    """Mock doc sync function for testing - returns empty list since permissions are fetched during indexing"""
    yield from []


_SOURCE_TO_SYNC_CONFIG: dict[DocumentSource, SyncConfig] = {
    DocumentSource.GOOGLE_DRIVE: SyncConfig(
        doc_sync_config=DocSyncConfig(
            doc_sync_frequency=DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY,
            doc_sync_func=gdrive_doc_sync,
            initial_index_should_sync=True,
        ),
        group_sync_config=GroupSyncConfig(
            group_sync_frequency=GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY,
            group_sync_func=gdrive_group_sync,
            group_sync_is_cc_pair_agnostic=False,
        ),
    ),
    DocumentSource.CONFLUENCE: SyncConfig(
        doc_sync_config=DocSyncConfig(
            doc_sync_frequency=CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY,
            doc_sync_func=confluence_doc_sync,
            initial_index_should_sync=False,
        ),
        group_sync_config=GroupSyncConfig(
            group_sync_frequency=CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY,
            group_sync_func=confluence_group_sync,
            group_sync_is_cc_pair_agnostic=True,
        ),
    ),
    DocumentSource.JIRA: SyncConfig(
        doc_sync_config=DocSyncConfig(
            doc_sync_frequency=JIRA_PERMISSION_DOC_SYNC_FREQUENCY,
            doc_sync_func=jira_doc_sync,
            initial_index_should_sync=True,
        ),
        group_sync_config=GroupSyncConfig(
            group_sync_frequency=JIRA_PERMISSION_GROUP_SYNC_FREQUENCY,
            group_sync_func=jira_group_sync,
            group_sync_is_cc_pair_agnostic=True,
        ),
    ),
    # Groups are not needed for Slack.
    # All channel access is done at the individual user level.
    DocumentSource.SLACK: SyncConfig(
        doc_sync_config=DocSyncConfig(
            doc_sync_frequency=SLACK_PERMISSION_DOC_SYNC_FREQUENCY,
            doc_sync_func=slack_doc_sync,
            initial_index_should_sync=True,
        ),
    ),
    DocumentSource.GMAIL: SyncConfig(
        doc_sync_config=DocSyncConfig(
            doc_sync_frequency=DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY,
            doc_sync_func=gmail_doc_sync,
            initial_index_should_sync=False,
        ),
    ),
    DocumentSource.GITHUB: SyncConfig(
        doc_sync_config=DocSyncConfig(
            doc_sync_frequency=GITHUB_PERMISSION_DOC_SYNC_FREQUENCY,
            doc_sync_func=github_doc_sync,
            initial_index_should_sync=True,
        ),
        group_sync_config=GroupSyncConfig(
            group_sync_frequency=GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY,
            group_sync_func=github_group_sync,
            group_sync_is_cc_pair_agnostic=False,
        ),
    ),
    DocumentSource.SALESFORCE: SyncConfig(
        censoring_config=CensoringConfig(
            chunk_censoring_func=censor_salesforce_chunks,
        ),
    ),
    DocumentSource.MOCK_CONNECTOR: SyncConfig(
        doc_sync_config=DocSyncConfig(
            doc_sync_frequency=DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY,
            doc_sync_func=mock_doc_sync,
            initial_index_should_sync=True,
        ),
    ),
    # Groups are not needed for Teams.
    # All channel access is done at the individual user level.
    DocumentSource.TEAMS: SyncConfig(
        doc_sync_config=DocSyncConfig(
            doc_sync_frequency=TEAMS_PERMISSION_DOC_SYNC_FREQUENCY,
            doc_sync_func=teams_doc_sync,
            initial_index_should_sync=True,
        ),
    ),
    DocumentSource.SHAREPOINT: SyncConfig(
        doc_sync_config=DocSyncConfig(
            doc_sync_frequency=SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY,
            doc_sync_func=sharepoint_doc_sync,
            initial_index_should_sync=True,
        ),
        group_sync_config=GroupSyncConfig(
            group_sync_frequency=SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY,
            group_sync_func=sharepoint_group_sync,
            group_sync_is_cc_pair_agnostic=False,
        ),
    ),
}


def source_requires_doc_sync(source: DocumentSource) -> bool:
    """Checks if the given DocumentSource requires doc syncing."""
    if source not in _SOURCE_TO_SYNC_CONFIG:
        return False
    return _SOURCE_TO_SYNC_CONFIG[source].doc_sync_config is not None


def source_requires_external_group_sync(source: DocumentSource) -> bool:
    """Checks if the given DocumentSource requires external group syncing."""
    if source not in _SOURCE_TO_SYNC_CONFIG:
        return False
    return _SOURCE_TO_SYNC_CONFIG[source].group_sync_config is not None


def get_source_perm_sync_config(source: DocumentSource) -> SyncConfig | None:
    """Returns the frequency of the external group sync for the given DocumentSource."""
    return _SOURCE_TO_SYNC_CONFIG.get(source)


def source_group_sync_is_cc_pair_agnostic(source: DocumentSource) -> bool:
    """Checks if the given DocumentSource requires external group syncing."""
    if source not in _SOURCE_TO_SYNC_CONFIG:
        return False

    group_sync_config = _SOURCE_TO_SYNC_CONFIG[source].group_sync_config
    if group_sync_config is None:
        return False

    return group_sync_config.group_sync_is_cc_pair_agnostic


def get_all_cc_pair_agnostic_group_sync_sources() -> set[DocumentSource]:
    """Returns the set of sources that have external group syncing that is cc_pair agnostic."""
    return {
        source
        for source, sync_config in _SOURCE_TO_SYNC_CONFIG.items()
        if sync_config.group_sync_config is not None
        and sync_config.group_sync_config.group_sync_is_cc_pair_agnostic
    }


def check_if_valid_sync_source(source_type: DocumentSource) -> bool:
    return source_type in _SOURCE_TO_SYNC_CONFIG


def get_all_censoring_enabled_sources() -> set[DocumentSource]:
    """Returns the set of sources that have censoring enabled."""
    return {
        source
        for source, sync_config in _SOURCE_TO_SYNC_CONFIG.items()
        if sync_config.censoring_config is not None
    }


def source_should_fetch_permissions_during_indexing(source: DocumentSource) -> bool:
    """Returns True if the given DocumentSource requires permissions to be fetched during indexing."""
    if source not in _SOURCE_TO_SYNC_CONFIG:
        return False

    doc_sync_config = _SOURCE_TO_SYNC_CONFIG[source].doc_sync_config
    if doc_sync_config is None:
        return False

    return doc_sync_config.initial_index_should_sync


================================================
FILE: backend/ee/onyx/external_permissions/teams/doc_sync.py
================================================
from collections.abc import Generator

from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import ElementExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.teams.connector import TeamsConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()


TEAMS_DOC_SYNC_LABEL = "teams_doc_sync"


def teams_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
    teams_connector = TeamsConnector(
        **cc_pair.connector.connector_specific_config,
    )
    credential_json = (
        cc_pair.credential.credential_json.get_value(apply_mask=False)
        if cc_pair.credential.credential_json
        else {}
    )
    teams_connector.load_credentials(credential_json)

    yield from generic_doc_sync(
        cc_pair=cc_pair,
        fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,
        callback=callback,
        doc_source=DocumentSource.TEAMS,
        slim_connector=teams_connector,
        label=TEAMS_DOC_SYNC_LABEL,
    )


================================================
FILE: backend/ee/onyx/external_permissions/utils.py
================================================
from collections.abc import Generator

from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import ExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import HierarchyNode
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()


def generic_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
    doc_source: DocumentSource,
    slim_connector: SlimConnectorWithPermSync,
    label: str,
) -> Generator[ElementExternalAccess, None, None]:
    """
    A convenience function for performing a generic document synchronization.

    Notes:
    A generic doc sync includes:
        - fetching existing docs
        - fetching *all* new (slim) docs
        - yielding external-access permissions for existing docs which do not exist in the newly fetched slim-docs set (with their
        `external_access` set to "private")
        - yielding external-access permissions for newly fetched docs and hierarchy nodes

    Returns:
        A `Generator` which yields existing and newly fetched external-access permissions.
    """

    logger.info(f"Starting {doc_source} doc sync for CC Pair ID: {cc_pair.id}")

    indexing_start: SecondsSinceUnixEpoch | None = (
        cc_pair.connector.indexing_start.timestamp()
        if cc_pair.connector.indexing_start is not None
        else None
    )

    newly_fetched_doc_ids: set[str] = set()

    logger.info(f"Fetching all slim documents from {doc_source}")
    for doc_batch in slim_connector.retrieve_all_slim_docs_perm_sync(
        start=indexing_start,
        callback=callback,
    ):
        logger.info(f"Got {len(doc_batch)} slim documents from {doc_source}")

        if callback:
            if callback.should_stop():
                raise RuntimeError(f"{label}: Stop signal detected")
            callback.progress(label, 1)

        for doc in doc_batch:
            if isinstance(doc, HierarchyNode):
                # Yield hierarchy node permissions to be processed in outer layer
                if doc.external_access:
                    yield NodeExternalAccess(
                        external_access=doc.external_access,
                        raw_node_id=doc.raw_node_id,
                        source=doc_source.value,
                    )
                continue
            if not doc.external_access:
                raise RuntimeError(
                    f"No external access found for document ID; {cc_pair.id=} {doc_source=} {doc.id=}"
                )

            newly_fetched_doc_ids.add(doc.id)

            yield DocExternalAccess(
                doc_id=doc.id,
                external_access=doc.external_access,
            )

    logger.info(f"Querying existing document IDs for CC Pair ID: {cc_pair.id=}")
    existing_doc_ids: list[str] = fetch_all_existing_docs_ids_fn()

    missing_doc_ids = set(existing_doc_ids) - newly_fetched_doc_ids

    if not missing_doc_ids:
        return

    logger.warning(
        f"Found {len(missing_doc_ids)=} documents that are in the DB but not present in fetch. Making them inaccessible."
    )

    for missing_id in missing_doc_ids:
        logger.warning(f"Removing access for {missing_id=}")
        yield DocExternalAccess(
            doc_id=missing_id,
            external_access=ExternalAccess.empty(),
        )

    logger.info(f"Finished {doc_source} doc sync")


================================================
FILE: backend/ee/onyx/feature_flags/__init__.py
================================================


================================================
FILE: backend/ee/onyx/feature_flags/factory.py
================================================
from ee.onyx.feature_flags.posthog_provider import PostHogFeatureFlagProvider
from onyx.feature_flags.interface import FeatureFlagProvider


def get_posthog_feature_flag_provider() -> FeatureFlagProvider:
    """
    Get the PostHog feature flag provider instance.

    This is the EE implementation that gets loaded by the versioned
    implementation loader.

    Returns:
        PostHogFeatureFlagProvider: The PostHog-based feature flag provider
    """
    return PostHogFeatureFlagProvider()


================================================
FILE: backend/ee/onyx/feature_flags/posthog_provider.py
================================================
from typing import Any
from uuid import UUID

from ee.onyx.utils.posthog_client import posthog
from onyx.feature_flags.interface import FeatureFlagProvider
from onyx.utils.logger import setup_logger

logger = setup_logger()


class PostHogFeatureFlagProvider(FeatureFlagProvider):
    """
    PostHog-based feature flag provider.

    Uses PostHog's feature flag API to determine if features are enabled
    for specific users. Only active in multi-tenant mode.
    """

    def feature_enabled(
        self,
        flag_key: str,
        user_id: UUID,
        user_properties: dict[str, Any] | None = None,
    ) -> bool:
        """
        Check if a feature flag is enabled for a user via PostHog.

        Args:
            flag_key: The identifier for the feature flag to check
            user_id: The unique identifier for the user
            user_properties: Optional dictionary of user properties/attributes
                           that may influence flag evaluation

        Returns:
            True if the feature is enabled for the user, False otherwise.
        """
        if not posthog:
            return False

        try:
            posthog.set(
                distinct_id=user_id,
                properties=user_properties,
            )
            is_enabled = posthog.feature_enabled(
                flag_key,
                str(user_id),
                person_properties=user_properties,
            )

            return bool(is_enabled) if is_enabled is not None else False

        except Exception as e:
            logger.error(
                f"Error checking feature flag {flag_key} for user {user_id}: {e}"
            )
            return False


================================================
FILE: backend/ee/onyx/hooks/__init__.py
================================================


================================================
FILE: backend/ee/onyx/hooks/executor.py
================================================
"""Hook executor — calls a customer's external HTTP endpoint for a given hook point.

Usage (Celery tasks and FastAPI handlers):
    result = execute_hook(
        db_session=db_session,
        hook_point=HookPoint.QUERY_PROCESSING,
        payload={"query": "...", "user_email": "...", "chat_session_id": "..."},
        response_type=QueryProcessingResponse,
    )

    if isinstance(result, HookSkipped):
        # no active hook configured — continue with original behavior
        ...
    elif isinstance(result, HookSoftFailed):
        # hook failed but fail strategy is SOFT — continue with original behavior
        ...
    else:
        # result is a validated Pydantic model instance (response_type)
        ...

is_reachable update policy
--------------------------
``is_reachable`` on the Hook row is updated selectively — only when the outcome
carries meaningful signal about physical reachability:

  NetworkError (DNS, connection refused)  → False  (cannot reach the server)
  HTTP 401 / 403                          → False  (api_key revoked or invalid)
  TimeoutException                        → None   (server may be slow, skip write)
  Other HTTP errors (4xx / 5xx)           → None   (server responded, skip write)
  Unknown exception                       → None   (no signal, skip write)
  Non-JSON / non-dict response            → None   (server responded, skip write)
  Success (2xx, valid dict)               → True   (confirmed reachable)

None means "leave the current value unchanged" — no DB round-trip is made.

DB session design
-----------------
The executor uses three sessions:

  1. Caller's session (db_session) — used only for the hook lookup read. All
     needed fields are extracted from the Hook object before the HTTP call, so
     the caller's session is not held open during the external HTTP request.

  2. Log session — a separate short-lived session opened after the HTTP call
     completes to write the HookExecutionLog row on failure. Success runs are
     not recorded. Committed independently of everything else.

  3. Reachable session — a second short-lived session to update is_reachable on
     the Hook. Kept separate from the log session so a concurrent hook deletion
     (which causes update_hook__no_commit to raise OnyxError(NOT_FOUND)) cannot
     prevent the execution log from being written. This update is best-effort.
"""

import json
import time
from typing import Any
from typing import TypeVar

import httpx
from pydantic import BaseModel
from pydantic import ValidationError
from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.db.hook import create_hook_execution_log__no_commit
from onyx.db.hook import get_non_deleted_hook_by_hook_point
from onyx.db.hook import update_hook__no_commit
from onyx.db.models import Hook
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


T = TypeVar("T", bound=BaseModel)


# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------


class _HttpOutcome(BaseModel):
    """Structured result of an HTTP hook call, returned by _process_response."""

    is_success: bool
    updated_is_reachable: (
        bool | None
    )  # True/False = write to DB, None = unchanged (skip write)
    status_code: int | None
    error_message: str | None
    response_payload: dict[str, Any] | None


def _lookup_hook(
    db_session: Session,
    hook_point: HookPoint,
) -> Hook | HookSkipped:
    """Return the active Hook or HookSkipped if hooks are unavailable/unconfigured.

    No HTTP call is made and no DB writes are performed for any HookSkipped path.
    There is nothing to log and no reachability information to update.
    """
    if MULTI_TENANT:
        return HookSkipped()
    hook = get_non_deleted_hook_by_hook_point(
        db_session=db_session, hook_point=hook_point
    )
    if hook is None or not hook.is_active:
        return HookSkipped()
    if not hook.endpoint_url:
        return HookSkipped()
    return hook


def _process_response(
    *,
    response: httpx.Response | None,
    exc: Exception | None,
    timeout: float,
) -> _HttpOutcome:
    """Process the result of an HTTP call and return a structured outcome.

    Called after the client.post() try/except. If post() raised, exc is set and
    response is None. Otherwise response is set and exc is None. Handles
    raise_for_status(), JSON decoding, and the dict shape check.
    """
    if exc is not None:
        if isinstance(exc, httpx.NetworkError):
            msg = f"Hook network error (endpoint unreachable): {exc}"
            logger.warning(msg, exc_info=exc)
            return _HttpOutcome(
                is_success=False,
                updated_is_reachable=False,
                status_code=None,
                error_message=msg,
                response_payload=None,
            )
        if isinstance(exc, httpx.TimeoutException):
            msg = f"Hook timed out after {timeout}s: {exc}"
            logger.warning(msg, exc_info=exc)
            return _HttpOutcome(
                is_success=False,
                updated_is_reachable=None,  # timeout doesn't indicate unreachability
                status_code=None,
                error_message=msg,
                response_payload=None,
            )
        msg = f"Hook call failed: {exc}"
        logger.exception(msg, exc_info=exc)
        return _HttpOutcome(
            is_success=False,
            updated_is_reachable=None,  # unknown error — don't make assumptions
            status_code=None,
            error_message=msg,
            response_payload=None,
        )

    if response is None:
        raise ValueError(
            "exactly one of response or exc must be non-None; both are None"
        )
    status_code = response.status_code

    try:
        response.raise_for_status()
    except httpx.HTTPStatusError as e:
        msg = f"Hook returned HTTP {e.response.status_code}: {e.response.text}"
        logger.warning(msg, exc_info=e)
        # 401/403 means the api_key has been revoked or is invalid — mark unreachable
        # so the operator knows to update it. All other HTTP errors keep is_reachable
        # as-is (server is up, the request just failed for application reasons).
        auth_failed = e.response.status_code in (401, 403)
        return _HttpOutcome(
            is_success=False,
            updated_is_reachable=False if auth_failed else None,
            status_code=status_code,
            error_message=msg,
            response_payload=None,
        )

    try:
        response_payload = response.json()
    except (json.JSONDecodeError, httpx.DecodingError) as e:
        msg = f"Hook returned non-JSON response: {e}"
        logger.warning(msg, exc_info=e)
        return _HttpOutcome(
            is_success=False,
            updated_is_reachable=None,  # server responded — reachability unchanged
            status_code=status_code,
            error_message=msg,
            response_payload=None,
        )

    if not isinstance(response_payload, dict):
        msg = f"Hook returned non-dict JSON (got {type(response_payload).__name__})"
        logger.warning(msg)
        return _HttpOutcome(
            is_success=False,
            updated_is_reachable=None,  # server responded — reachability unchanged
            status_code=status_code,
            error_message=msg,
            response_payload=None,
        )

    return _HttpOutcome(
        is_success=True,
        updated_is_reachable=True,
        status_code=status_code,
        error_message=None,
        response_payload=response_payload,
    )


def _persist_result(
    *,
    hook_id: int,
    outcome: _HttpOutcome,
    duration_ms: int,
) -> None:
    """Write the execution log on failure and optionally update is_reachable, each
    in its own session so a failure in one does not affect the other."""
    # Only write the execution log on failure — success runs are not recorded.
    # Must not be skipped if the is_reachable update fails (e.g. hook concurrently
    # deleted between the initial lookup and here).
    if not outcome.is_success:
        try:
            with get_session_with_current_tenant() as log_session:
                create_hook_execution_log__no_commit(
                    db_session=log_session,
                    hook_id=hook_id,
                    is_success=False,
                    error_message=outcome.error_message,
                    status_code=outcome.status_code,
                    duration_ms=duration_ms,
                )
                log_session.commit()
        except Exception:
            logger.exception(
                f"Failed to persist hook execution log for hook_id={hook_id}"
            )

    # Update is_reachable separately — best-effort, non-critical.
    # None means the value is unchanged (set by the caller to skip the no-op write).
    # update_hook__no_commit can raise OnyxError(NOT_FOUND) if the hook was
    # concurrently deleted, so keep this isolated from the log write above.
    if outcome.updated_is_reachable is not None:
        try:
            with get_session_with_current_tenant() as reachable_session:
                update_hook__no_commit(
                    db_session=reachable_session,
                    hook_id=hook_id,
                    is_reachable=outcome.updated_is_reachable,
                )
                reachable_session.commit()
        except Exception:
            logger.warning(f"Failed to update is_reachable for hook_id={hook_id}")


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def _execute_hook_inner(
    hook: Hook,
    payload: dict[str, Any],
    response_type: type[T],
) -> T | HookSoftFailed:
    """Make the HTTP call, validate the response, and return a typed model.

    Raises OnyxError on HARD failure. Returns HookSoftFailed on SOFT failure.
    """
    timeout = hook.timeout_seconds
    hook_id = hook.id
    fail_strategy = hook.fail_strategy
    endpoint_url = hook.endpoint_url
    current_is_reachable: bool | None = hook.is_reachable

    if not endpoint_url:
        raise ValueError(
            f"hook_id={hook_id} is active but has no endpoint_url — "
            "active hooks without an endpoint_url must be rejected by _lookup_hook"
        )

    start = time.monotonic()
    response: httpx.Response | None = None
    exc: Exception | None = None
    try:
        api_key: str | None = (
            hook.api_key.get_value(apply_mask=False) if hook.api_key else None
        )
        headers: dict[str, str] = {"Content-Type": "application/json"}
        if api_key:
            headers["Authorization"] = f"Bearer {api_key}"
        with httpx.Client(
            timeout=timeout, follow_redirects=False
        ) as client:  # SSRF guard: never follow redirects
            response = client.post(endpoint_url, json=payload, headers=headers)
    except Exception as e:
        exc = e
    duration_ms = int((time.monotonic() - start) * 1000)

    outcome = _process_response(response=response, exc=exc, timeout=timeout)

    # Validate the response payload against response_type.
    # A validation failure downgrades the outcome to a failure so it is logged,
    # is_reachable is left unchanged (server responded — just a bad payload),
    # and fail_strategy is respected below.
    validated_model: T | None = None
    if outcome.is_success and outcome.response_payload is not None:
        try:
            validated_model = response_type.model_validate(outcome.response_payload)
        except ValidationError as e:
            msg = (
                f"Hook response failed validation against {response_type.__name__}: {e}"
            )
            outcome = _HttpOutcome(
                is_success=False,
                updated_is_reachable=None,  # server responded — reachability unchanged
                status_code=outcome.status_code,
                error_message=msg,
                response_payload=None,
            )

    # Skip the is_reachable write when the value would not change — avoids a
    # no-op DB round-trip on every call when the hook is already in the expected state.
    if outcome.updated_is_reachable == current_is_reachable:
        outcome = outcome.model_copy(update={"updated_is_reachable": None})
    _persist_result(hook_id=hook_id, outcome=outcome, duration_ms=duration_ms)

    if not outcome.is_success:
        if fail_strategy == HookFailStrategy.HARD:
            raise OnyxError(
                OnyxErrorCode.HOOK_EXECUTION_FAILED,
                outcome.error_message or "Hook execution failed.",
            )
        logger.warning(
            f"Hook execution failed (soft fail) for hook_id={hook_id}: {outcome.error_message}"
        )
        return HookSoftFailed()

    if validated_model is None:
        raise OnyxError(
            OnyxErrorCode.INTERNAL_ERROR,
            f"validated_model is None for successful hook call (hook_id={hook_id})",
        )
    return validated_model


def _execute_hook_impl(
    *,
    db_session: Session,
    hook_point: HookPoint,
    payload: dict[str, Any],
    response_type: type[T],
) -> T | HookSkipped | HookSoftFailed:
    """EE implementation — loaded by CE's execute_hook via fetch_versioned_implementation.

    Returns HookSkipped if no active hook is configured, HookSoftFailed if the
    hook failed with SOFT fail strategy, or a validated response model on success.
    Raises OnyxError on HARD failure or if the hook is misconfigured.
    """
    hook = _lookup_hook(db_session, hook_point)
    if isinstance(hook, HookSkipped):
        return hook

    fail_strategy = hook.fail_strategy
    hook_id = hook.id

    try:
        return _execute_hook_inner(hook, payload, response_type)
    except Exception:
        if fail_strategy == HookFailStrategy.SOFT:
            logger.exception(
                f"Unexpected error in hook execution (soft fail) for hook_id={hook_id}"
            )
            return HookSoftFailed()
        raise


================================================
FILE: backend/ee/onyx/main.py
================================================
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager

from fastapi import FastAPI
from httpx_oauth.clients.google import GoogleOAuth2

from ee.onyx.server.analytics.api import router as analytics_router
from ee.onyx.server.auth_check import check_ee_router_auth
from ee.onyx.server.billing.api import router as billing_router
from ee.onyx.server.documents.cc_pair import router as ee_document_cc_pair_router
from ee.onyx.server.enterprise_settings.api import (
    admin_router as enterprise_settings_admin_router,
)
from ee.onyx.server.enterprise_settings.api import (
    basic_router as enterprise_settings_router,
)
from ee.onyx.server.evals.api import router as evals_router
from ee.onyx.server.features.hooks.api import router as hook_router
from ee.onyx.server.license.api import router as license_router
from ee.onyx.server.manage.standard_answer import router as standard_answer_router
from ee.onyx.server.middleware.license_enforcement import (
    add_license_enforcement_middleware,
)
from ee.onyx.server.middleware.tenant_tracking import (
    add_api_server_tenant_id_middleware,
)
from ee.onyx.server.oauth.api import router as ee_oauth_router
from ee.onyx.server.query_and_chat.query_backend import (
    basic_router as ee_query_router,
)
from ee.onyx.server.query_and_chat.search_backend import router as search_router
from ee.onyx.server.query_history.api import router as query_history_router
from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
from ee.onyx.server.scim.api import register_scim_exception_handlers
from ee.onyx.server.scim.api import scim_router
from ee.onyx.server.seeding import seed_db
from ee.onyx.server.tenants.api import router as tenants_router
from ee.onyx.server.token_rate_limits.api import (
    router as token_rate_limit_settings_router,
)
from ee.onyx.server.user_group.api import router as user_group_router
from ee.onyx.utils.encryption import test_encryption
from onyx.auth.users import auth_backend
from onyx.auth.users import create_onyx_oauth_router
from onyx.auth.users import fastapi_users
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import OAUTH_CLIENT_ID
from onyx.configs.app_configs import OAUTH_CLIENT_SECRET
from onyx.configs.app_configs import USER_AUTH_SECRET
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import AuthType
from onyx.main import get_application as get_application_base
from onyx.main import include_auth_router_with_prefix
from onyx.main import include_router_with_global_prefix_prepended
from onyx.main import lifespan as lifespan_base
from onyx.main import use_route_function_names_as_operation_ids
from onyx.server.query_and_chat.query_backend import (
    basic_router as query_router,
)
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import global_version
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
    """Small wrapper around the lifespan of the MIT application.
    Basically just calls the base lifespan, and then adds EE-only
    steps after."""

    async with lifespan_base(app):
        # seed the Onyx environment with LLMs, Assistants, etc. based on an optional
        # environment variable. Used to automate deployment for multiple environments.
        seed_db()

        yield


def get_application() -> FastAPI:
    # Anything that happens at import time is not guaranteed to be running ee-version
    # Anything after the server startup will be running ee version
    global_version.set_ee()

    test_encryption()

    application = get_application_base(lifespan_override=lifespan)

    if MULTI_TENANT:
        add_api_server_tenant_id_middleware(application, logger)
    else:
        # License enforcement middleware for self-hosted deployments only
        # Checks LICENSE_ENFORCEMENT_ENABLED at runtime (can be toggled without restart)
        # MT deployments use control plane gating via is_tenant_gated() instead
        add_license_enforcement_middleware(application, logger)

    if AUTH_TYPE == AuthType.CLOUD:
        # For Google OAuth, refresh tokens are requested by:
        # 1. Adding the right scopes
        # 2. Properly configuring OAuth in Google Cloud Console to allow offline access
        oauth_client = GoogleOAuth2(
            OAUTH_CLIENT_ID,
            OAUTH_CLIENT_SECRET,
            # Use standard scopes that include profile and email
            scopes=["openid", "email", "profile"],
        )
        include_auth_router_with_prefix(
            application,
            create_onyx_oauth_router(
                oauth_client,
                auth_backend,
                USER_AUTH_SECRET,
                associate_by_email=True,
                is_verified_by_default=True,
                # Points the user back to the login page
                redirect_url=f"{WEB_DOMAIN}/auth/oauth/callback",
            ),
            prefix="/auth/oauth",
        )

        # Need basic auth router for `logout` endpoint
        include_auth_router_with_prefix(
            application,
            fastapi_users.get_logout_router(auth_backend),
            prefix="/auth",
        )

    # RBAC / group access control
    include_router_with_global_prefix_prepended(application, user_group_router)
    # Analytics endpoints
    include_router_with_global_prefix_prepended(application, analytics_router)
    include_router_with_global_prefix_prepended(application, query_history_router)
    # EE only backend APIs
    include_router_with_global_prefix_prepended(application, query_router)
    include_router_with_global_prefix_prepended(application, ee_query_router)
    include_router_with_global_prefix_prepended(application, search_router)
    include_router_with_global_prefix_prepended(application, standard_answer_router)
    include_router_with_global_prefix_prepended(application, ee_oauth_router)
    include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)
    include_router_with_global_prefix_prepended(application, evals_router)
    include_router_with_global_prefix_prepended(application, hook_router)

    # Enterprise-only global settings
    include_router_with_global_prefix_prepended(
        application, enterprise_settings_admin_router
    )
    # Token rate limit settings
    include_router_with_global_prefix_prepended(
        application, token_rate_limit_settings_router
    )
    include_router_with_global_prefix_prepended(application, enterprise_settings_router)
    include_router_with_global_prefix_prepended(application, usage_export_router)
    # License management
    include_router_with_global_prefix_prepended(application, license_router)

    # Unified billing API - always registered in EE.
    # Each endpoint is protected by the `current_admin_user` dependency (admin auth).
    include_router_with_global_prefix_prepended(application, billing_router)

    if MULTI_TENANT:
        # Tenant management
        include_router_with_global_prefix_prepended(application, tenants_router)

    # SCIM 2.0 — protocol endpoints (unauthenticated by Onyx session auth;
    # they use their own SCIM bearer token auth).
    # Not behind APP_API_PREFIX because IdPs expect /scim/v2/... directly.
    application.include_router(scim_router)
    register_scim_exception_handlers(application)

    # Ensure all routes have auth enabled or are explicitly marked as public
    check_ee_router_auth(application)

    # for debugging discovered routes
    # for route in application.router.routes:
    #     print(f"Path: {route.path}, Methods: {route.methods}")

    use_route_function_names_as_operation_ids(application)

    return application


================================================
FILE: backend/ee/onyx/onyxbot/slack/handlers/__init__.py
================================================


================================================
FILE: backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py
================================================
from slack_sdk import WebClient
from slack_sdk.models.blocks import ActionsBlock
from slack_sdk.models.blocks import Block
from slack_sdk.models.blocks import ButtonElement
from slack_sdk.models.blocks import SectionBlock
from sqlalchemy.orm import Session

from ee.onyx.db.standard_answer import fetch_standard_answer_categories_by_names
from ee.onyx.db.standard_answer import find_matching_standard_answers
from onyx.configs.constants import MessageType
from onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI
from onyx.db.chat import create_chat_session
from onyx.db.chat import create_new_chat_message
from onyx.db.chat import get_chat_messages_by_sessions
from onyx.db.chat import get_chat_sessions_by_slack_thread_id
from onyx.db.chat import get_or_create_root_message
from onyx.db.models import SlackChannelConfig
from onyx.db.models import StandardAnswer as StandardAnswerModel
from onyx.onyxbot.slack.blocks import get_restate_blocks
from onyx.onyxbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID
from onyx.onyxbot.slack.handlers.utils import send_team_member_message
from onyx.onyxbot.slack.models import SlackMessageInfo
from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
from onyx.onyxbot.slack.utils import update_emote_react
from onyx.server.manage.models import StandardAnswer as PydanticStandardAnswer
from onyx.utils.logger import OnyxLoggingAdapter
from onyx.utils.logger import setup_logger

logger = setup_logger()


def build_standard_answer_blocks(
    answer_message: str,
) -> list[Block]:
    generate_button_block = ButtonElement(
        action_id=GENERATE_ANSWER_BUTTON_ACTION_ID,
        text="Generate Full Answer",
    )
    answer_block = SectionBlock(text=answer_message)
    return [
        answer_block,
        ActionsBlock(
            elements=[generate_button_block],
        ),
    ]


def oneoff_standard_answers(
    message: str,
    slack_bot_categories: list[str],
    db_session: Session,
) -> list[PydanticStandardAnswer]:
    """
    Respond to the user message if it matches any configured standard answers.

    Returns a list of matching StandardAnswers if found, otherwise None.
    """
    configured_standard_answers = {
        standard_answer
        for category in fetch_standard_answer_categories_by_names(
            slack_bot_categories, db_session=db_session
        )
        for standard_answer in category.standard_answers
    }

    matching_standard_answers = find_matching_standard_answers(
        query=message,
        id_in=[answer.id for answer in configured_standard_answers],
        db_session=db_session,
    )

    server_standard_answers = [
        PydanticStandardAnswer.from_model(standard_answer_model)
        for (standard_answer_model, _) in matching_standard_answers
    ]
    return server_standard_answers


def _handle_standard_answers(
    message_info: SlackMessageInfo,
    receiver_ids: list[str] | None,
    slack_channel_config: SlackChannelConfig,
    logger: OnyxLoggingAdapter,
    client: WebClient,
    db_session: Session,
) -> bool:
    """
    Potentially respond to the user message depending on whether the user's message matches
    any of the configured standard answers and also whether those answers have already been
    provided in the current thread.

    Returns True if standard answers are found to match the user's message and therefore,
    we still need to respond to the users.
    """

    slack_thread_id = message_info.thread_to_respond
    configured_standard_answer_categories = (
        slack_channel_config.standard_answer_categories
    )
    configured_standard_answers = set(
        [
            standard_answer
            for standard_answer_category in configured_standard_answer_categories
            for standard_answer in standard_answer_category.standard_answers
        ]
    )
    query_msg = message_info.thread_messages[-1]

    if slack_thread_id is None:
        used_standard_answer_ids = set([])
    else:
        chat_sessions = get_chat_sessions_by_slack_thread_id(
            slack_thread_id=slack_thread_id,
            user_id=None,
            db_session=db_session,
        )
        chat_messages = get_chat_messages_by_sessions(
            chat_session_ids=[chat_session.id for chat_session in chat_sessions],
            user_id=None,
            db_session=db_session,
            skip_permission_check=True,
        )
        used_standard_answer_ids = set(
            [
                standard_answer.id
                for chat_message in chat_messages
                for standard_answer in chat_message.standard_answers
            ]
        )

    usable_standard_answers = configured_standard_answers.difference(
        used_standard_answer_ids
    )

    matching_standard_answers: list[tuple[StandardAnswerModel, str]] = []
    if usable_standard_answers:
        matching_standard_answers = find_matching_standard_answers(
            query=query_msg.message,
            id_in=[standard_answer.id for standard_answer in usable_standard_answers],
            db_session=db_session,
        )

    if matching_standard_answers:
        chat_session = create_chat_session(
            db_session=db_session,
            description="",
            user_id=None,
            persona_id=(
                slack_channel_config.persona.id if slack_channel_config.persona else 0
            ),
            onyxbot_flow=True,
            slack_thread_id=slack_thread_id,
        )

        root_message = get_or_create_root_message(
            chat_session_id=chat_session.id, db_session=db_session
        )

        new_user_message = create_new_chat_message(
            chat_session_id=chat_session.id,
            parent_message=root_message,
            message=query_msg.message,
            token_count=0,
            message_type=MessageType.USER,
            db_session=db_session,
            commit=True,
        )

        formatted_answers = []
        for standard_answer, match_str in matching_standard_answers:
            since_you_mentioned_pretext = (
                f'Since your question contains "_{match_str}_"'
            )
            block_quotified_answer = ">" + standard_answer.answer.replace("\n", "\n> ")
            formatted_answer = f"{since_you_mentioned_pretext}, I thought this might be useful: \n\n{block_quotified_answer}"
            formatted_answers.append(formatted_answer)
        answer_message = "\n\n".join(formatted_answers)

        chat_message = create_new_chat_message(
            chat_session_id=chat_session.id,
            parent_message=new_user_message,
            message=answer_message,
            token_count=0,
            message_type=MessageType.ASSISTANT,
            error=None,
            db_session=db_session,
            commit=False,
        )
        # attach the standard answers to the chat message
        chat_message.standard_answers = [
            standard_answer for standard_answer, _ in matching_standard_answers
        ]
        db_session.commit()

        update_emote_react(
            emoji=ONYX_BOT_REACT_EMOJI,
            channel=message_info.channel_to_respond,
            message_ts=message_info.msg_to_respond,
            remove=True,
            client=client,
        )

        restate_question_blocks = get_restate_blocks(
            msg=query_msg.message,
            is_slash_command=message_info.is_slash_command,
        )

        answer_blocks = build_standard_answer_blocks(
            answer_message=answer_message,
        )

        all_blocks = restate_question_blocks + answer_blocks

        try:
            respond_in_thread_or_channel(
                client=client,
                channel=message_info.channel_to_respond,
                receiver_ids=receiver_ids,
                text="Hello! Onyx has some results for you!",
                blocks=all_blocks,
                thread_ts=message_info.msg_to_respond,
                unfurl=False,
            )

            if receiver_ids and slack_thread_id:
                send_team_member_message(
                    client=client,
                    channel=message_info.channel_to_respond,
                    thread_ts=slack_thread_id,
                    receiver_ids=receiver_ids,
                )

            return True
        except Exception as e:
            logger.exception(f"Unable to send standard answer message: {e}")
            return False
    else:
        return False


================================================
FILE: backend/ee/onyx/prompts/__init__.py
================================================


================================================
FILE: backend/ee/onyx/prompts/query_expansion.py
================================================
# Single message is likely most reliable and generally better for this task
# No final reminders at the end since the user query is expected to be short
# If it is not short, it should go into the chat flow so we do not need to account for this.
KEYWORD_EXPANSION_PROMPT = """
Generate a set of keyword-only queries to help find relevant documents for the provided query. \
These queries will be passed to a bm25-based keyword search engine. \
Provide a single query per line (where each query consists of one or more keywords). \
The queries must be purely keywords and not contain any filler natural language. \
The each query should have as few keywords as necessary to represent the user's search intent. \
If there are no useful expansions, simply return the original query with no additional keyword queries. \
CRITICAL: Do not include any additional formatting, comments, or anything aside from the keyword queries.

The user query is:
{user_query}
""".strip()


QUERY_TYPE_PROMPT = """
Determine if the provided query is better suited for a keyword search or a semantic search.
Respond with "keyword" or "semantic" literally and nothing else.
Do not provide any additional text or reasoning to your response.

CRITICAL: It must only be 1 single word - EITHER "keyword" or "semantic".

The user query is:
{user_query}
""".strip()


================================================
FILE: backend/ee/onyx/prompts/search_flow_classification.py
================================================
# ruff: noqa: E501, W605 start
SEARCH_CLASS = "search"
CHAT_CLASS = "chat"

# Will note that with many larger LLMs the latency on running this prompt via third party APIs is as high as 2 seconds which is too slow for many
# use cases.
SEARCH_CHAT_PROMPT = f"""
Determine if the following query is better suited for a search UI or a chat UI. Respond with "{SEARCH_CLASS}" or "{CHAT_CLASS}" literally and nothing else. \
Do not provide any additional text or reasoning to your response. CRITICAL, IT MUST ONLY BE 1 SINGLE WORD - EITHER "{SEARCH_CLASS}" or "{CHAT_CLASS}".

# Classification Guidelines:
## {SEARCH_CLASS}
- If the query consists entirely of keywords or query doesn't require any answer from the AI
- If the query is a short statement that seems like a search query rather than a question
- If the query feels nonsensical or is a short phrase that possibly describes a document or information that could be found in a internal document

### Examples of {SEARCH_CLASS} queries:
- Find me the document that goes over the onboarding process for a new hire
- Pull requests since last week
- Sales Runbook AMEA Region
- Procurement process
- Retrieve the PRD for project X

## {CHAT_CLASS}
- If the query is asking a question that requires an answer rather than a document
- If the query is asking for a solution, suggestion, or general help
- If the query is seeking information that is on the web and likely not in a company internal document
- If the query should be answered without any context from additional documents or searches

### Examples of {CHAT_CLASS} queries:
- What led us to win the deal with company X? (seeking answer)
- Google Drive not sync-ing files to my computer (seeking solution)
- Review my email: <whatever the email is> (general help)
- Write me a script to... (general help)
- Cheap flights Europe to Tokyo (information likely found on the web, not internal)

# User Query:
{{user_query}}

REMEMBER TO ONLY RESPOND WITH "{SEARCH_CLASS}" OR "{CHAT_CLASS}" AND NOTHING ELSE.
""".strip()
# ruff: noqa: E501, W605 end


================================================
FILE: backend/ee/onyx/search/process_search_query.py
================================================
from collections.abc import Generator

from sqlalchemy.orm import Session

from ee.onyx.db.search import create_search_query
from ee.onyx.secondary_llm_flows.query_expansion import expand_keywords
from ee.onyx.server.query_and_chat.models import SearchDocWithContent
from ee.onyx.server.query_and_chat.models import SearchFullResponse
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
from ee.onyx.server.query_and_chat.streaming_models import LLMSelectedDocsPacket
from ee.onyx.server.query_and_chat.streaming_models import SearchDocsPacket
from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
from ee.onyx.server.query_and_chat.streaming_models import SearchQueriesPacket
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import ChunkSearchRequest
from onyx.context.search.models import InferenceChunk
from onyx.context.search.pipeline import merge_individual_chunks
from onyx.context.search.pipeline import search_pipeline
from onyx.db.models import User
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import DocumentIndex
from onyx.llm.factory import get_default_llm
from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
from onyx.tools.tool_implementations.search.search_utils import (
    weighted_reciprocal_rank_fusion,
)
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel

logger = setup_logger()


# This is just a heuristic that also happens to work well for the UI/UX
# Users would not find it useful to see a huge list of suggested docs
# but more than 1 is also likely good as many questions may target more than 1 doc.
TARGET_NUM_SECTIONS_FOR_LLM_SELECTION = 3


def _run_single_search(
    query: str,
    filters: BaseFilters | None,
    document_index: DocumentIndex,
    user: User,
    db_session: Session,
    num_hits: int | None = None,
    hybrid_alpha: float | None = None,
) -> list[InferenceChunk]:
    """Execute a single search query and return chunks."""
    chunk_search_request = ChunkSearchRequest(
        query=query,
        user_selected_filters=filters,
        limit=num_hits,
        hybrid_alpha=hybrid_alpha,
    )

    return search_pipeline(
        chunk_search_request=chunk_search_request,
        document_index=document_index,
        user=user,
        persona_search_info=None,
        db_session=db_session,
    )


def stream_search_query(
    request: SendSearchQueryRequest,
    user: User,
    db_session: Session,
) -> Generator[
    SearchQueriesPacket | SearchDocsPacket | LLMSelectedDocsPacket | SearchErrorPacket,
    None,
    None,
]:
    """
    Core search function that yields streaming packets.
    Used by both streaming and non-streaming endpoints.
    """
    # Get document index.
    search_settings = get_current_search_settings(db_session)
    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None, db_session)

    # Determine queries to execute
    original_query = request.search_query
    keyword_expansions: list[str] = []

    if request.run_query_expansion:
        try:
            llm = get_default_llm()
            keyword_expansions = expand_keywords(
                user_query=original_query,
                llm=llm,
            )
            if keyword_expansions:
                logger.debug(
                    f"Query expansion generated {len(keyword_expansions)} keyword queries"
                )
        except Exception as e:
            logger.warning(f"Query expansion failed: {e}; using original query only.")
            keyword_expansions = []

    # Build list of all executed queries for tracking
    all_executed_queries = [original_query] + keyword_expansions

    if not user.is_anonymous:
        create_search_query(
            db_session=db_session,
            user_id=user.id,
            query=request.search_query,
            query_expansions=keyword_expansions if keyword_expansions else None,
        )

    # Execute search(es)
    if not keyword_expansions:
        # Single query (original only) - no threading needed
        chunks = _run_single_search(
            query=original_query,
            filters=request.filters,
            document_index=document_index,
            user=user,
            db_session=db_session,
            num_hits=request.num_hits,
            hybrid_alpha=request.hybrid_alpha,
        )
    else:
        # Multiple queries - run in parallel and merge with RRF
        # First query is the original (semantic), rest are keyword expansions
        search_functions = [
            (
                _run_single_search,
                (
                    query,
                    request.filters,
                    document_index,
                    user,
                    db_session,
                    request.num_hits,
                    request.hybrid_alpha,
                ),
            )
            for query in all_executed_queries
        ]

        # Run all searches in parallel
        all_search_results: list[list[InferenceChunk]] = (
            run_functions_tuples_in_parallel(
                search_functions,
                allow_failures=True,
            )
        )

        # Separate original query results from keyword expansion results
        # Note that in rare cases, the original query may have failed and so we may be
        # just overweighting one set of keyword results, should be not a big deal though.
        original_result = all_search_results[0] if all_search_results else []
        keyword_results = all_search_results[1:] if len(all_search_results) > 1 else []

        # Build valid results and weights
        # Original query (semantic): weight 2.0
        # Keyword expansions: weight 1.0 each
        valid_results: list[list[InferenceChunk]] = []
        weights: list[float] = []

        if original_result:
            valid_results.append(original_result)
            weights.append(2.0)

        for keyword_result in keyword_results:
            if keyword_result:
                valid_results.append(keyword_result)
                weights.append(1.0)

        if not valid_results:
            logger.warning("All parallel searches returned empty results")
            chunks = []
        else:
            chunks = weighted_reciprocal_rank_fusion(
                ranked_results=valid_results,
                weights=weights,
                id_extractor=lambda chunk: f"{chunk.document_id}_{chunk.chunk_id}",
            )

    # Merge chunks into sections
    sections = merge_individual_chunks(chunks)

    # Truncate to the requested number of hits
    sections = sections[: request.num_hits]

    # Apply LLM document selection if requested
    # num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
    # The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it
    # llm_selected_doc_ids will be:
    #   - None if LLM selection was not requested or failed
    #   - Empty list if LLM selection ran but selected nothing
    #   - List of doc IDs if LLM selection succeeded
    run_llm_selection = (
        request.num_docs_fed_to_llm_selection is not None
        and request.num_docs_fed_to_llm_selection >= 1
    )
    llm_selected_doc_ids: list[str] | None = None
    llm_selection_failed = False
    if run_llm_selection and sections:
        try:
            llm = get_default_llm()
            sections_to_evaluate = sections[: request.num_docs_fed_to_llm_selection]
            selected_sections, _ = select_sections_for_expansion(
                sections=sections_to_evaluate,
                user_query=original_query,
                llm=llm,
                max_sections=TARGET_NUM_SECTIONS_FOR_LLM_SELECTION,
                try_to_fill_to_max=True,
            )
            # Extract unique document IDs from selected sections (may be empty)
            llm_selected_doc_ids = list(
                dict.fromkeys(
                    section.center_chunk.document_id for section in selected_sections
                )
            )
            logger.debug(
                f"LLM document selection evaluated {len(sections_to_evaluate)} sections, "
                f"selected {len(selected_sections)} sections with doc IDs: {llm_selected_doc_ids}"
            )
        except Exception as e:
            # Allowing a blanket exception here as this step is not critical and the rest of the results are still valid
            logger.warning(f"LLM document selection failed: {e}")
            llm_selection_failed = True
    elif run_llm_selection and not sections:
        # LLM selection requested but no sections to evaluate
        llm_selected_doc_ids = []

    # Convert to SearchDocWithContent list, optionally including content
    search_docs = SearchDocWithContent.from_inference_sections(
        sections,
        include_content=request.include_content,
        is_internet=False,
    )

    # Yield queries packet
    yield SearchQueriesPacket(all_executed_queries=all_executed_queries)

    # Yield docs packet
    yield SearchDocsPacket(search_docs=search_docs)

    # Yield LLM selected docs packet if LLM selection was requested
    # - llm_selected_doc_ids is None if selection failed
    # - llm_selected_doc_ids is empty list if no docs were selected
    # - llm_selected_doc_ids is list of IDs if docs were selected
    if run_llm_selection:
        yield LLMSelectedDocsPacket(
            llm_selected_doc_ids=None if llm_selection_failed else llm_selected_doc_ids
        )


def gather_search_stream(
    packets: Generator[
        SearchQueriesPacket
        | SearchDocsPacket
        | LLMSelectedDocsPacket
        | SearchErrorPacket,
        None,
        None,
    ],
) -> SearchFullResponse:
    """
    Aggregate all streaming packets into SearchFullResponse.
    """
    all_executed_queries: list[str] = []
    search_docs: list[SearchDocWithContent] = []
    llm_selected_doc_ids: list[str] | None = None
    error: str | None = None

    for packet in packets:
        if isinstance(packet, SearchQueriesPacket):
            all_executed_queries = packet.all_executed_queries
        elif isinstance(packet, SearchDocsPacket):
            search_docs = packet.search_docs
        elif isinstance(packet, LLMSelectedDocsPacket):
            llm_selected_doc_ids = packet.llm_selected_doc_ids
        elif isinstance(packet, SearchErrorPacket):
            error = packet.error

    return SearchFullResponse(
        all_executed_queries=all_executed_queries,
        search_docs=search_docs,
        doc_selection_reasoning=None,
        llm_selected_doc_ids=llm_selected_doc_ids,
        error=error,
    )


================================================
FILE: backend/ee/onyx/secondary_llm_flows/__init__.py
================================================


================================================
FILE: backend/ee/onyx/secondary_llm_flows/query_expansion.py
================================================
import re

from ee.onyx.prompts.query_expansion import KEYWORD_EXPANSION_PROMPT
from onyx.llm.interfaces import LLM
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Pattern to remove common LLM artifacts: brackets, quotes, list markers, etc.
CLEANUP_PATTERN = re.compile(r'[\[\]"\'`]')


def _clean_keyword_line(line: str) -> str:
    """Clean a keyword line by removing common LLM artifacts.

    Removes brackets, quotes, and other characters that LLMs may accidentally
    include in their output.
    """
    # Remove common artifacts
    cleaned = CLEANUP_PATTERN.sub("", line)
    # Remove leading list markers like "1.", "2.", "-", "*"
    cleaned = re.sub(r"^\s*(?:\d+[\.\)]\s*|[-*]\s*)", "", cleaned)
    return cleaned.strip()


def expand_keywords(
    user_query: str,
    llm: LLM,
) -> list[str]:
    """Expand a user query into multiple keyword-only queries for BM25 search.

    Uses an LLM to generate keyword-based search queries that capture different
    aspects of the user's search intent. Returns only the expanded queries,
    not the original query.

    Args:
        user_query: The original search query from the user
        llm: Language model to use for keyword expansion

    Returns:
        List of expanded keyword queries (excluding the original query).
        Returns empty list if expansion fails or produces no useful expansions.
    """
    messages: LanguageModelInput = [
        UserMessage(content=KEYWORD_EXPANSION_PROMPT.format(user_query=user_query))
    ]

    try:
        response = llm.invoke(
            prompt=messages,
            reasoning_effort=ReasoningEffort.OFF,
            # Limit output - we only expect a few short keyword queries
            max_tokens=150,
        )

        content = llm_response_to_string(response).strip()

        if not content:
            logger.warning("Keyword expansion returned empty response.")
            return []

        # Parse response - each line is a separate keyword query
        # Clean each line to remove LLM artifacts and drop empty lines
        parsed_queries = []
        for line in content.strip().split("\n"):
            cleaned = _clean_keyword_line(line)
            if cleaned:
                parsed_queries.append(cleaned)

        if not parsed_queries:
            logger.warning("Keyword expansion parsing returned no queries.")
            return []

        # Filter out duplicates and queries that match the original
        expanded_queries: list[str] = []
        seen_lower: set[str] = {user_query.lower()}
        for query in parsed_queries:
            query_lower = query.lower()
            if query_lower not in seen_lower:
                seen_lower.add(query_lower)
                expanded_queries.append(query)

        logger.debug(f"Keyword expansion generated {len(expanded_queries)} queries")
        return expanded_queries

    except Exception as e:
        logger.warning(f"Keyword expansion failed: {e}")
        return []


================================================
FILE: backend/ee/onyx/secondary_llm_flows/search_flow_classification.py
================================================
from ee.onyx.prompts.search_flow_classification import CHAT_CLASS
from ee.onyx.prompts.search_flow_classification import SEARCH_CHAT_PROMPT
from ee.onyx.prompts.search_flow_classification import SEARCH_CLASS
from onyx.llm.interfaces import LLM
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.utils.logger import setup_logger
from onyx.utils.timing import log_function_time

logger = setup_logger()


@log_function_time(print_only=True)
def classify_is_search_flow(
    query: str,
    llm: LLM,
) -> bool:
    messages: LanguageModelInput = [
        UserMessage(content=SEARCH_CHAT_PROMPT.format(user_query=query))
    ]
    response = llm.invoke(
        prompt=messages,
        reasoning_effort=ReasoningEffort.OFF,
        # Nothing can happen in the UI until this call finishes so we need to be aggressive with the timeout
        timeout_override=2,
        # Well more than necessary but just to ensure completion and in case it succeeds with classifying but
        # ends up rambling
        max_tokens=20,
    )

    content = llm_response_to_string(response).strip().lower()
    if not content:
        logger.warning(
            "Search flow classification returned empty response; defaulting to chat flow."
        )
        return False

    # Prefer chat if both appear.
    if CHAT_CLASS in content:
        return False
    if SEARCH_CLASS in content:
        return True

    logger.warning(
        "Search flow classification returned unexpected response; defaulting to chat flow. Response=%r",
        content,
    )
    return False


================================================
FILE: backend/ee/onyx/server/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/analytics/api.py
================================================
import datetime
from collections import defaultdict
from typing import List

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session

from ee.onyx.db.analytics import fetch_assistant_message_analytics
from ee.onyx.db.analytics import fetch_assistant_unique_users
from ee.onyx.db.analytics import fetch_assistant_unique_users_total
from ee.onyx.db.analytics import fetch_onyxbot_analytics
from ee.onyx.db.analytics import fetch_per_user_query_analytics
from ee.onyx.db.analytics import fetch_persona_message_analytics
from ee.onyx.db.analytics import fetch_persona_unique_users
from ee.onyx.db.analytics import fetch_query_analytics
from ee.onyx.db.analytics import user_can_view_assistant_stats
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User

router = APIRouter(prefix="/analytics", tags=PUBLIC_API_TAGS)


_DEFAULT_LOOKBACK_DAYS = 30


class QueryAnalyticsResponse(BaseModel):
    total_queries: int
    total_likes: int
    total_dislikes: int
    date: datetime.date


@router.get("/admin/query")
def get_query_analytics(
    start: datetime.datetime | None = None,
    end: datetime.datetime | None = None,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[QueryAnalyticsResponse]:
    daily_query_usage_info = fetch_query_analytics(
        start=start
        or (
            datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)
        ),  # default is 30d lookback
        end=end or datetime.datetime.utcnow(),
        db_session=db_session,
    )
    return [
        QueryAnalyticsResponse(
            total_queries=total_queries,
            total_likes=total_likes,
            total_dislikes=total_dislikes,
            date=date,
        )
        for total_queries, total_likes, total_dislikes, date in daily_query_usage_info
    ]


class UserAnalyticsResponse(BaseModel):
    total_active_users: int
    date: datetime.date


@router.get("/admin/user")
def get_user_analytics(
    start: datetime.datetime | None = None,
    end: datetime.datetime | None = None,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[UserAnalyticsResponse]:
    daily_query_usage_info_per_user = fetch_per_user_query_analytics(
        start=start
        or (
            datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)
        ),  # default is 30d lookback
        end=end or datetime.datetime.utcnow(),
        db_session=db_session,
    )

    user_analytics: dict[datetime.date, int] = defaultdict(int)
    for __, ___, ____, date, _____ in daily_query_usage_info_per_user:
        user_analytics[date] += 1
    return [
        UserAnalyticsResponse(
            total_active_users=cnt,
            date=date,
        )
        for date, cnt in user_analytics.items()
    ]


class OnyxbotAnalyticsResponse(BaseModel):
    total_queries: int
    auto_resolved: int
    date: datetime.date


@router.get("/admin/onyxbot")
def get_onyxbot_analytics(
    start: datetime.datetime | None = None,
    end: datetime.datetime | None = None,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[OnyxbotAnalyticsResponse]:
    daily_onyxbot_info = fetch_onyxbot_analytics(
        start=start
        or (
            datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)
        ),  # default is 30d lookback
        end=end or datetime.datetime.utcnow(),
        db_session=db_session,
    )

    resolution_results = [
        OnyxbotAnalyticsResponse(
            total_queries=total_queries,
            # If it hits negatives, something has gone wrong...
            auto_resolved=max(0, total_queries - total_negatives),
            date=date,
        )
        for total_queries, total_negatives, date in daily_onyxbot_info
    ]

    return resolution_results


class PersonaMessageAnalyticsResponse(BaseModel):
    total_messages: int
    date: datetime.date
    persona_id: int


@router.get("/admin/persona/messages")
def get_persona_messages(
    persona_id: int,
    start: datetime.datetime | None = None,
    end: datetime.datetime | None = None,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[PersonaMessageAnalyticsResponse]:
    """Fetch daily message counts for a single persona within the given time range."""
    start = start or (
        datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)
    )
    end = end or datetime.datetime.utcnow()

    persona_message_counts = []
    for count, date in fetch_persona_message_analytics(
        db_session=db_session,
        persona_id=persona_id,
        start=start,
        end=end,
    ):
        persona_message_counts.append(
            PersonaMessageAnalyticsResponse(
                total_messages=count,
                date=date,
                persona_id=persona_id,
            )
        )

    return persona_message_counts


class PersonaUniqueUsersResponse(BaseModel):
    unique_users: int
    date: datetime.date
    persona_id: int


@router.get("/admin/persona/unique-users")
def get_persona_unique_users(
    persona_id: int,
    start: datetime.datetime,
    end: datetime.datetime,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[PersonaUniqueUsersResponse]:
    """Get unique users per day for a single persona."""
    unique_user_counts = []
    daily_counts = fetch_persona_unique_users(
        db_session=db_session,
        persona_id=persona_id,
        start=start,
        end=end,
    )
    for count, date in daily_counts:
        unique_user_counts.append(
            PersonaUniqueUsersResponse(
                unique_users=count,
                date=date,
                persona_id=persona_id,
            )
        )
    return unique_user_counts


class AssistantDailyUsageResponse(BaseModel):
    date: datetime.date
    total_messages: int
    total_unique_users: int


class AssistantStatsResponse(BaseModel):
    daily_stats: List[AssistantDailyUsageResponse]
    total_messages: int
    total_unique_users: int


@router.get("/assistant/{assistant_id}/stats")
def get_assistant_stats(
    assistant_id: int,
    start: datetime.datetime | None = None,
    end: datetime.datetime | None = None,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> AssistantStatsResponse:
    """
    Returns daily message and unique user counts for a user's assistant,
    along with the overall total messages and total distinct users.
    """
    start = start or (
        datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)
    )
    end = end or datetime.datetime.utcnow()

    if not user_can_view_assistant_stats(db_session, user, assistant_id):
        raise HTTPException(
            status_code=403, detail="Not allowed to access this assistant's stats."
        )

    # Pull daily usage from the DB calls
    messages_data = fetch_assistant_message_analytics(
        db_session, assistant_id, start, end
    )
    unique_users_data = fetch_assistant_unique_users(
        db_session, assistant_id, start, end
    )

    # Map each day => (messages, unique_users).
    daily_messages_map = {date: count for count, date in messages_data}
    daily_unique_users_map = {date: count for count, date in unique_users_data}
    all_dates = set(daily_messages_map.keys()) | set(daily_unique_users_map.keys())

    # Merge both sets of metrics by date
    daily_results: list[AssistantDailyUsageResponse] = []
    for date in sorted(all_dates):
        daily_results.append(
            AssistantDailyUsageResponse(
                date=date,
                total_messages=daily_messages_map.get(date, 0),
                total_unique_users=daily_unique_users_map.get(date, 0),
            )
        )

    # Now pull a single total distinct user count across the entire time range
    total_msgs = sum(d.total_messages for d in daily_results)
    total_users = fetch_assistant_unique_users_total(
        db_session, assistant_id, start, end
    )

    return AssistantStatsResponse(
        daily_stats=daily_results,
        total_messages=total_msgs,
        total_unique_users=total_users,
    )


================================================
FILE: backend/ee/onyx/server/auth_check.py
================================================
from fastapi import FastAPI

from onyx.server.auth_check import check_router_auth
from onyx.server.auth_check import PUBLIC_ENDPOINT_SPECS


EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
    # SCIM 2.0 service discovery — unauthenticated so IdPs can probe
    # before bearer token configuration is complete
    ("/scim/v2/ServiceProviderConfig", {"GET"}),
    ("/scim/v2/ResourceTypes", {"GET"}),
    ("/scim/v2/Schemas", {"GET"}),
    # needs to be accessible prior to user login
    ("/enterprise-settings", {"GET"}),
    ("/enterprise-settings/logo", {"GET"}),
    ("/enterprise-settings/logotype", {"GET"}),
    ("/enterprise-settings/custom-analytics-script", {"GET"}),
    # Stripe publishable key is safe to expose publicly
    ("/tenants/stripe-publishable-key", {"GET"}),
    ("/admin/billing/stripe-publishable-key", {"GET"}),
    # Proxy endpoints use license-based auth, not user auth
    ("/proxy/create-checkout-session", {"POST"}),
    ("/proxy/claim-license", {"POST"}),
    ("/proxy/create-customer-portal-session", {"POST"}),
    ("/proxy/billing-information", {"GET"}),
    ("/proxy/license/{tenant_id}", {"GET"}),
    ("/proxy/seats/update", {"POST"}),
]


def check_ee_router_auth(
    application: FastAPI,
    public_endpoint_specs: list[tuple[str, set[str]]] = EE_PUBLIC_ENDPOINT_SPECS,
) -> None:
    # similar to the open source version of this function, but checking for the EE-only
    # endpoints as well
    check_router_auth(application, public_endpoint_specs)


================================================
FILE: backend/ee/onyx/server/billing/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/billing/api.py
================================================
"""Unified Billing API endpoints.

These endpoints provide Stripe billing functionality for both cloud and
self-hosted deployments. The service layer routes requests appropriately:

- Self-hosted: Routes through cloud data plane proxy
  Flow: Backend /admin/billing/* → Cloud DP /proxy/* → Control plane

- Cloud (MULTI_TENANT): Routes directly to control plane
  Flow: Backend /admin/billing/* → Control plane

License claiming is handled separately by /license/claim endpoint (self-hosted only).

Migration Note (ENG-3533):
This /admin/billing/* API replaces the older /tenants/* billing endpoints:
- /tenants/billing-information            -> /admin/billing/billing-information
- /tenants/create-customer-portal-session -> /admin/billing/create-customer-portal-session
- /tenants/create-subscription-session    -> /admin/billing/create-checkout-session
- /tenants/stripe-publishable-key         -> /admin/billing/stripe-publishable-key

See: https://linear.app/onyx-app/issue/ENG-3533/migrate-tenantsbilling-adminbilling
"""

import asyncio

import httpx
from fastapi import APIRouter
from fastapi import Depends
from pydantic import BaseModel
from sqlalchemy.orm import Session

from ee.onyx.auth.users import current_admin_user
from ee.onyx.db.license import get_license
from ee.onyx.db.license import get_used_seats
from ee.onyx.server.billing.models import BillingInformationResponse
from ee.onyx.server.billing.models import CreateCheckoutSessionRequest
from ee.onyx.server.billing.models import CreateCheckoutSessionResponse
from ee.onyx.server.billing.models import CreateCustomerPortalSessionRequest
from ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse
from ee.onyx.server.billing.models import SeatUpdateRequest
from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.billing.models import StripePublishableKeyResponse
from ee.onyx.server.billing.models import SubscriptionStatusResponse
from ee.onyx.server.billing.service import (
    create_checkout_session as create_checkout_service,
)
from ee.onyx.server.billing.service import (
    create_customer_portal_session as create_portal_service,
)
from ee.onyx.server.billing.service import (
    get_billing_information as get_billing_service,
)
from ee.onyx.server.billing.service import update_seat_count as update_seat_service
from onyx.auth.users import User
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.db.engine.sql_engine import get_session
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.redis.redis_pool import get_shared_redis_client
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/admin/billing")

# Cache for Stripe publishable key to avoid hitting S3 on every request
_stripe_publishable_key_cache: str | None = None
_stripe_key_lock = asyncio.Lock()

# Redis key for billing circuit breaker (self-hosted only)
# When set, billing requests to Stripe are disabled until user manually retries
BILLING_CIRCUIT_BREAKER_KEY = "billing_circuit_open"
# Circuit breaker auto-expires after 1 hour (user can manually retry sooner)
BILLING_CIRCUIT_BREAKER_TTL_SECONDS = 3600


def _is_billing_circuit_open() -> bool:
    """Check if the billing circuit breaker is open (self-hosted only)."""
    if MULTI_TENANT:
        return False
    try:
        redis_client = get_shared_redis_client()
        is_open = bool(redis_client.exists(BILLING_CIRCUIT_BREAKER_KEY))
        logger.debug(
            f"Circuit breaker check: key={BILLING_CIRCUIT_BREAKER_KEY}, is_open={is_open}"
        )
        return is_open
    except Exception as e:
        logger.error(f"Failed to check circuit breaker: {e}")
        return False


def _open_billing_circuit() -> None:
    """Open the billing circuit breaker after a failure (self-hosted only)."""
    if MULTI_TENANT:
        return
    try:
        redis_client = get_shared_redis_client()
        redis_client.set(
            BILLING_CIRCUIT_BREAKER_KEY,
            "1",
            ex=BILLING_CIRCUIT_BREAKER_TTL_SECONDS,
        )
        # Verify it was set
        exists = redis_client.exists(BILLING_CIRCUIT_BREAKER_KEY)
        logger.warning(
            f"Billing circuit breaker opened (TTL={BILLING_CIRCUIT_BREAKER_TTL_SECONDS}s, "
            f"verified={exists}). Stripe billing requests are disabled until manually reset."
        )
    except Exception as e:
        logger.error(f"Failed to open circuit breaker: {e}")


def _close_billing_circuit() -> None:
    """Close the billing circuit breaker (re-enable Stripe requests)."""
    if MULTI_TENANT:
        return
    try:
        redis_client = get_shared_redis_client()
        redis_client.delete(BILLING_CIRCUIT_BREAKER_KEY)
        logger.info(
            "Billing circuit breaker closed. Stripe billing requests re-enabled."
        )
    except Exception as e:
        logger.error(f"Failed to close circuit breaker: {e}")


def _get_license_data(db_session: Session) -> str | None:
    """Get license data from database if exists (self-hosted only)."""
    if MULTI_TENANT:
        return None
    license_record = get_license(db_session)
    return license_record.license_data if license_record else None


def _get_tenant_id() -> str | None:
    """Get tenant ID for cloud deployments."""
    if MULTI_TENANT:
        return get_current_tenant_id()
    return None


@router.post("/create-checkout-session")
async def create_checkout_session(
    request: CreateCheckoutSessionRequest | None = None,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> CreateCheckoutSessionResponse:
    """Create a Stripe checkout session for new subscription or renewal.

    For new customers, no license/tenant is required.
    For renewals, existing license (self-hosted) or tenant_id (cloud) is used.

    After checkout completion:
    - Self-hosted: Use /license/claim to retrieve the license
    - Cloud: Subscription is automatically activated
    """
    license_data = _get_license_data(db_session)
    tenant_id = _get_tenant_id()
    billing_period = request.billing_period if request else "monthly"
    seats = request.seats if request else None
    email = request.email if request else None

    # Validate that requested seats is not less than current used seats
    if seats is not None:
        used_seats = get_used_seats(tenant_id)
        if seats < used_seats:
            raise OnyxError(
                OnyxErrorCode.VALIDATION_ERROR,
                f"Cannot subscribe with fewer seats than current usage. "
                f"You have {used_seats} active users/integrations but requested {seats} seats.",
            )

    # Build redirect URL for after checkout completion
    redirect_url = f"{WEB_DOMAIN}/admin/billing?checkout=success"

    return await create_checkout_service(
        billing_period=billing_period,
        seats=seats,
        email=email,
        license_data=license_data,
        redirect_url=redirect_url,
        tenant_id=tenant_id,
    )


@router.post("/create-customer-portal-session")
async def create_customer_portal_session(
    request: CreateCustomerPortalSessionRequest | None = None,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> CreateCustomerPortalSessionResponse:
    """Create a Stripe customer portal session for managing subscription.

    Requires existing license (self-hosted) or active tenant (cloud).
    """
    license_data = _get_license_data(db_session)
    tenant_id = _get_tenant_id()

    # Self-hosted requires license
    if not MULTI_TENANT and not license_data:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No license found")

    return_url = request.return_url if request else f"{WEB_DOMAIN}/admin/billing"

    return await create_portal_service(
        license_data=license_data,
        return_url=return_url,
        tenant_id=tenant_id,
    )


@router.get("/billing-information")
async def get_billing_information(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> BillingInformationResponse | SubscriptionStatusResponse:
    """Get billing information for the current subscription.

    Returns subscription status and details from Stripe.
    For self-hosted: If the circuit breaker is open (previous failure),
    returns a 503 error without making the request.
    """
    license_data = _get_license_data(db_session)
    tenant_id = _get_tenant_id()

    # Self-hosted without license = no subscription
    if not MULTI_TENANT and not license_data:
        return SubscriptionStatusResponse(subscribed=False)

    # Check circuit breaker (self-hosted only)
    if _is_billing_circuit_open():
        raise OnyxError(
            OnyxErrorCode.SERVICE_UNAVAILABLE,
            "Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.",
        )

    try:
        return await get_billing_service(
            license_data=license_data,
            tenant_id=tenant_id,
        )
    except OnyxError as e:
        # Open circuit breaker on connection failures (self-hosted only)
        if e.status_code in (
            OnyxErrorCode.BAD_GATEWAY.status_code,
            OnyxErrorCode.SERVICE_UNAVAILABLE.status_code,
            OnyxErrorCode.GATEWAY_TIMEOUT.status_code,
        ):
            _open_billing_circuit()
        raise


@router.post("/seats/update")
async def update_seats(
    request: SeatUpdateRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> SeatUpdateResponse:
    """Update the seat count for the current subscription.

    Handles Stripe proration and license regeneration via control plane.
    For self-hosted, the frontend should call /license/claim after a short delay
    to fetch the regenerated license.
    """
    license_data = _get_license_data(db_session)
    tenant_id = _get_tenant_id()

    # Self-hosted requires license
    if not MULTI_TENANT and not license_data:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No license found")

    # Validate that new seat count is not less than current used seats
    used_seats = get_used_seats(tenant_id)
    if request.new_seat_count < used_seats:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            f"Cannot reduce seats below current usage. "
            f"You have {used_seats} active users/integrations but requested {request.new_seat_count} seats.",
        )

    # Note: Don't store license here - the control plane may still be processing
    # the subscription update. The frontend should call /license/claim after a
    # short delay to get the freshly generated license.
    return await update_seat_service(
        new_seat_count=request.new_seat_count,
        license_data=license_data,
        tenant_id=tenant_id,
    )


@router.get("/stripe-publishable-key")
async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
    """Fetch the Stripe publishable key.

    Priority: env var override (for testing) > S3 bucket (production).
    This endpoint is public (no auth required) since publishable keys are safe to expose.
    The key is cached in memory to avoid hitting S3 on every request.
    """
    global _stripe_publishable_key_cache

    # Fast path: return cached value without lock
    if _stripe_publishable_key_cache:
        return StripePublishableKeyResponse(
            publishable_key=_stripe_publishable_key_cache
        )

    # Use lock to prevent concurrent S3 requests
    async with _stripe_key_lock:
        # Double-check after acquiring lock (another request may have populated cache)
        if _stripe_publishable_key_cache:
            return StripePublishableKeyResponse(
                publishable_key=_stripe_publishable_key_cache
            )

        # Check for env var override first (for local testing with pk_test_* keys)
        if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
            key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
            if not key.startswith("pk_"):
                raise OnyxError(
                    OnyxErrorCode.INTERNAL_ERROR,
                    "Invalid Stripe publishable key format",
                )
            _stripe_publishable_key_cache = key
            return StripePublishableKeyResponse(publishable_key=key)

        # Fall back to S3 bucket
        if not STRIPE_PUBLISHABLE_KEY_URL:
            raise OnyxError(
                OnyxErrorCode.INTERNAL_ERROR,
                "Stripe publishable key is not configured",
            )

        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)
                response.raise_for_status()
                key = response.text.strip()

                # Validate key format
                if not key.startswith("pk_"):
                    raise OnyxError(
                        OnyxErrorCode.INTERNAL_ERROR,
                        "Invalid Stripe publishable key format",
                    )

                _stripe_publishable_key_cache = key
                return StripePublishableKeyResponse(publishable_key=key)
        except httpx.HTTPError:
            raise OnyxError(
                OnyxErrorCode.INTERNAL_ERROR,
                "Failed to fetch Stripe publishable key",
            )


class ResetConnectionResponse(BaseModel):
    success: bool
    message: str


@router.post("/reset-connection")
async def reset_stripe_connection(
    _: User = Depends(current_admin_user),
) -> ResetConnectionResponse:
    """Reset the Stripe connection circuit breaker.

    Called when user clicks "Connect to Stripe" to retry after a previous failure.
    This clears the circuit breaker flag, allowing billing requests to proceed again.
    Self-hosted only - cloud deployments don't use the circuit breaker.
    """
    if MULTI_TENANT:
        return ResetConnectionResponse(
            success=True,
            message="Circuit breaker not applicable for cloud deployments",
        )

    _close_billing_circuit()
    return ResetConnectionResponse(
        success=True,
        message="Stripe connection reset. Billing requests re-enabled.",
    )


================================================
FILE: backend/ee/onyx/server/billing/models.py
================================================
"""Pydantic models for the billing API."""

from datetime import datetime
from typing import Literal

from pydantic import BaseModel


class CreateCheckoutSessionRequest(BaseModel):
    """Request to create a Stripe checkout session."""

    billing_period: Literal["monthly", "annual"] = "monthly"
    seats: int | None = None
    email: str | None = None


class CreateCheckoutSessionResponse(BaseModel):
    """Response containing the Stripe checkout session URL."""

    stripe_checkout_url: str


class CreateCustomerPortalSessionRequest(BaseModel):
    """Request to create a Stripe customer portal session."""

    return_url: str | None = None


class CreateCustomerPortalSessionResponse(BaseModel):
    """Response containing the Stripe customer portal URL."""

    stripe_customer_portal_url: str


class BillingInformationResponse(BaseModel):
    """Billing information for the current subscription."""

    tenant_id: str
    status: str | None = None
    plan_type: str | None = None
    seats: int | None = None
    billing_period: str | None = None
    current_period_start: datetime | None = None
    current_period_end: datetime | None = None
    cancel_at_period_end: bool = False
    canceled_at: datetime | None = None
    trial_start: datetime | None = None
    trial_end: datetime | None = None
    payment_method_enabled: bool = False


class SubscriptionStatusResponse(BaseModel):
    """Response when no subscription exists."""

    subscribed: bool = False


class SeatUpdateRequest(BaseModel):
    """Request to update seat count."""

    new_seat_count: int


class SeatUpdateResponse(BaseModel):
    """Response from seat update operation."""

    success: bool
    current_seats: int
    used_seats: int
    message: str | None = None
    license: str | None = None  # Regenerated license (self-hosted stores this)


class StripePublishableKeyResponse(BaseModel):
    """Response containing the Stripe publishable key."""

    publishable_key: str


================================================
FILE: backend/ee/onyx/server/billing/service.py
================================================
"""Service layer for billing operations.

This module provides functions for billing operations that route differently
based on deployment type:

- Self-hosted (not MULTI_TENANT): Routes through cloud data plane proxy
  Flow: Self-hosted backend → Cloud DP /proxy/* → Control plane

- Cloud (MULTI_TENANT): Routes directly to control plane
  Flow: Cloud backend → Control plane
"""

from typing import Literal

import httpx

from ee.onyx.configs.app_configs import CLOUD_DATA_PLANE_URL
from ee.onyx.server.billing.models import BillingInformationResponse
from ee.onyx.server.billing.models import CreateCheckoutSessionResponse
from ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse
from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.billing.models import SubscriptionStatusResponse
from ee.onyx.server.tenants.access import generate_data_plane_token
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()

# HTTP request timeout for billing service calls
_REQUEST_TIMEOUT = 30.0


def _get_proxy_headers(license_data: str | None) -> dict[str, str]:
    """Build headers for proxy requests (self-hosted).

    Self-hosted instances authenticate with their license.
    """
    headers = {"Content-Type": "application/json"}
    if license_data:
        headers["Authorization"] = f"Bearer {license_data}"
    return headers


def _get_direct_headers() -> dict[str, str]:
    """Build headers for direct control plane requests (cloud).

    Cloud instances authenticate with JWT.
    """
    token = generate_data_plane_token()
    return {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {token}",
    }


def _get_base_url() -> str:
    """Get the base URL based on deployment type."""
    if MULTI_TENANT:
        return CONTROL_PLANE_API_BASE_URL
    return f"{CLOUD_DATA_PLANE_URL}/proxy"


def _get_headers(license_data: str | None) -> dict[str, str]:
    """Get appropriate headers based on deployment type."""
    if MULTI_TENANT:
        return _get_direct_headers()
    return _get_proxy_headers(license_data)


async def _make_billing_request(
    method: Literal["GET", "POST"],
    path: str,
    license_data: str | None = None,
    body: dict | None = None,
    params: dict | None = None,
    error_message: str = "Billing service request failed",
) -> dict:
    """Make an HTTP request to the billing service.

    Consolidates the common HTTP request pattern used by all billing operations.

    Args:
        method: HTTP method (GET or POST)
        path: URL path (appended to base URL)
        license_data: License for authentication (self-hosted)
        body: Request body for POST requests
        params: Query parameters for GET requests
        error_message: Default error message if request fails

    Returns:
        Response JSON as dict

    Raises:
        OnyxError: If request fails
    """

    base_url = _get_base_url()
    url = f"{base_url}{path}"
    headers = _get_headers(license_data)

    try:
        async with httpx.AsyncClient(
            timeout=_REQUEST_TIMEOUT, follow_redirects=True
        ) as client:
            if method == "GET":
                response = await client.get(url, headers=headers, params=params)
            else:
                response = await client.post(url, headers=headers, json=body)

            response.raise_for_status()
            return response.json()

    except httpx.HTTPStatusError as e:
        detail = error_message
        try:
            error_data = e.response.json()
            detail = error_data.get("detail", detail)
        except Exception:
            pass
        logger.error(f"{error_message}: {e.response.status_code} - {detail}")
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            detail,
            status_code_override=e.response.status_code,
        )

    except httpx.RequestError:
        logger.exception("Failed to connect to billing service")
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY, "Failed to connect to billing service"
        )


async def create_checkout_session(
    billing_period: str = "monthly",
    seats: int | None = None,
    email: str | None = None,
    license_data: str | None = None,
    redirect_url: str | None = None,
    tenant_id: str | None = None,
) -> CreateCheckoutSessionResponse:
    """Create a Stripe checkout session.

    Args:
        billing_period: "monthly" or "annual"
        seats: Number of seats to purchase (optional, uses default if not provided)
        email: Customer email for new subscriptions
        license_data: Existing license for renewals (self-hosted)
        redirect_url: URL to redirect after successful checkout
        tenant_id: Tenant ID (cloud only, for renewals)

    Returns:
        CreateCheckoutSessionResponse with checkout URL
    """
    body: dict = {"billing_period": billing_period}
    if seats is not None:
        body["seats"] = seats
    if email:
        body["email"] = email
    if redirect_url:
        body["redirect_url"] = redirect_url
    if tenant_id and MULTI_TENANT:
        body["tenant_id"] = tenant_id

    data = await _make_billing_request(
        method="POST",
        path="/create-checkout-session",
        license_data=license_data,
        body=body,
        error_message="Failed to create checkout session",
    )
    return CreateCheckoutSessionResponse(stripe_checkout_url=data["url"])


async def create_customer_portal_session(
    license_data: str | None = None,
    return_url: str | None = None,
    tenant_id: str | None = None,
) -> CreateCustomerPortalSessionResponse:
    """Create a Stripe customer portal session.

    Args:
        license_data: License blob for authentication (self-hosted)
        return_url: URL to return to after portal session
        tenant_id: Tenant ID (cloud only)

    Returns:
        CreateCustomerPortalSessionResponse with portal URL
    """
    body: dict = {}
    if return_url:
        body["return_url"] = return_url
    if tenant_id and MULTI_TENANT:
        body["tenant_id"] = tenant_id

    data = await _make_billing_request(
        method="POST",
        path="/create-customer-portal-session",
        license_data=license_data,
        body=body,
        error_message="Failed to create customer portal session",
    )
    return CreateCustomerPortalSessionResponse(stripe_customer_portal_url=data["url"])


async def get_billing_information(
    license_data: str | None = None,
    tenant_id: str | None = None,
) -> BillingInformationResponse | SubscriptionStatusResponse:
    """Fetch billing information.

    Args:
        license_data: License blob for authentication (self-hosted)
        tenant_id: Tenant ID (cloud only)

    Returns:
        BillingInformationResponse or SubscriptionStatusResponse if no subscription
    """
    params = {}
    if tenant_id and MULTI_TENANT:
        params["tenant_id"] = tenant_id

    data = await _make_billing_request(
        method="GET",
        path="/billing-information",
        license_data=license_data,
        params=params or None,
        error_message="Failed to fetch billing information",
    )

    # Check if no subscription
    if isinstance(data, dict) and data.get("subscribed") is False:
        return SubscriptionStatusResponse(subscribed=False)

    return BillingInformationResponse(**data)


async def update_seat_count(
    new_seat_count: int,
    license_data: str | None = None,
    tenant_id: str | None = None,
) -> SeatUpdateResponse:
    """Update the seat count for the current subscription.

    Args:
        new_seat_count: New number of seats
        license_data: License blob for authentication (self-hosted)
        tenant_id: Tenant ID (cloud only)

    Returns:
        SeatUpdateResponse with updated seat information
    """
    body: dict = {"new_seat_count": new_seat_count}
    if tenant_id and MULTI_TENANT:
        body["tenant_id"] = tenant_id

    data = await _make_billing_request(
        method="POST",
        path="/seats/update",
        license_data=license_data,
        body=body,
        error_message="Failed to update seat count",
    )

    return SeatUpdateResponse(
        success=data.get("success", False),
        current_seats=data.get("current_seats", 0),
        used_seats=data.get("used_seats", 0),
        message=data.get("message"),
        license=data.get("license"),
    )


================================================
FILE: backend/ee/onyx/server/documents/cc_pair.py
================================================
from datetime import datetime
from http import HTTPStatus

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from ee.onyx.background.celery.tasks.doc_permission_syncing.tasks import (
    try_creating_permissions_sync_task,
)
from ee.onyx.background.celery.tasks.external_group_syncing.tasks import (
    try_creating_external_group_sync_task,
)
from onyx.auth.users import current_curator_or_admin_user
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.db.connector_credential_pair import (
    get_connector_credential_pair_from_id_for_user,
)
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_pool import get_redis_client
from onyx.server.models import StatusResponse
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()
router = APIRouter(prefix="/manage")


@router.get("/admin/cc-pair/{cc_pair_id}/sync-permissions")
def get_cc_pair_latest_sync(
    cc_pair_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> datetime | None:
    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id=cc_pair_id,
        db_session=db_session,
        user=user,
        get_editable=False,
    )
    if not cc_pair:
        raise HTTPException(
            status_code=400,
            detail="cc_pair not found for current user's permissions",
        )

    return cc_pair.last_time_perm_sync


@router.post("/admin/cc-pair/{cc_pair_id}/sync-permissions")
def sync_cc_pair(
    cc_pair_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse[None]:
    """Triggers permissions sync on a particular cc_pair immediately"""
    tenant_id = get_current_tenant_id()

    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id=cc_pair_id,
        db_session=db_session,
        user=user,
        get_editable=False,
    )
    if not cc_pair:
        raise HTTPException(
            status_code=400,
            detail="Connection not found for current user's permissions",
        )

    r = get_redis_client()

    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    if redis_connector.permissions.fenced:
        raise HTTPException(
            status_code=HTTPStatus.CONFLICT,
            detail="Permissions sync task already in progress.",
        )

    logger.info(
        f"Permissions sync cc_pair={cc_pair_id} "
        f"connector_id={cc_pair.connector_id} "
        f"credential_id={cc_pair.credential_id} "
        f"{cc_pair.connector.name} connector."
    )
    payload_id = try_creating_permissions_sync_task(
        client_app, cc_pair_id, r, tenant_id
    )
    if not payload_id:
        raise HTTPException(
            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
            detail="Permissions sync task creation failed.",
        )

    logger.info(f"Permissions sync queued: cc_pair={cc_pair_id} id={payload_id}")

    return StatusResponse(
        success=True,
        message="Successfully created the permissions sync task.",
    )


@router.get("/admin/cc-pair/{cc_pair_id}/sync-groups")
def get_cc_pair_latest_group_sync(
    cc_pair_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> datetime | None:
    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id=cc_pair_id,
        db_session=db_session,
        user=user,
        get_editable=False,
    )
    if not cc_pair:
        raise HTTPException(
            status_code=400,
            detail="cc_pair not found for current user's permissions",
        )

    return cc_pair.last_time_external_group_sync


@router.post("/admin/cc-pair/{cc_pair_id}/sync-groups")
def sync_cc_pair_groups(
    cc_pair_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse[None]:
    """Triggers group sync on a particular cc_pair immediately"""
    tenant_id = get_current_tenant_id()

    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id=cc_pair_id,
        db_session=db_session,
        user=user,
        get_editable=False,
    )
    if not cc_pair:
        raise HTTPException(
            status_code=400,
            detail="Connection not found for current user's permissions",
        )

    r = get_redis_client()

    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    if redis_connector.external_group_sync.fenced:
        raise HTTPException(
            status_code=HTTPStatus.CONFLICT,
            detail="External group sync task already in progress.",
        )

    logger.info(
        f"External group sync cc_pair={cc_pair_id} "
        f"connector_id={cc_pair.connector_id} "
        f"credential_id={cc_pair.credential_id} "
        f"{cc_pair.connector.name} connector."
    )
    payload_id = try_creating_external_group_sync_task(
        client_app, cc_pair_id, r, tenant_id
    )
    if not payload_id:
        raise HTTPException(
            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
            detail="External group sync task creation failed.",
        )

    logger.info(f"External group sync queued: cc_pair={cc_pair_id} id={payload_id}")

    return StatusResponse(
        success=True,
        message="Successfully created the external group sync task.",
    )


================================================
FILE: backend/ee/onyx/server/enterprise_settings/api.py
================================================
from datetime import datetime
from datetime import timezone
from typing import Any

import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Response
from fastapi import status
from fastapi import UploadFile
from pydantic import BaseModel
from pydantic import Field
from sqlalchemy.orm import Session

from ee.onyx.db.scim import ScimDAL
from ee.onyx.server.enterprise_settings.models import AnalyticsScriptUpload
from ee.onyx.server.enterprise_settings.models import EnterpriseSettings
from ee.onyx.server.enterprise_settings.store import get_logo_filename
from ee.onyx.server.enterprise_settings.store import get_logotype_filename
from ee.onyx.server.enterprise_settings.store import load_analytics_script
from ee.onyx.server.enterprise_settings.store import load_settings
from ee.onyx.server.enterprise_settings.store import store_analytics_script
from ee.onyx.server.enterprise_settings.store import store_settings
from ee.onyx.server.enterprise_settings.store import upload_logo
from ee.onyx.server.scim.auth import generate_scim_token
from ee.onyx.server.scim.models import ScimTokenCreate
from ee.onyx.server.scim.models import ScimTokenCreatedResponse
from ee.onyx.server.scim.models import ScimTokenResponse
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user_with_expired_token
from onyx.auth.users import get_user_manager
from onyx.auth.users import UserManager
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.file_store.file_store import get_default_file_store
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.contextvars import get_current_tenant_id

admin_router = APIRouter(prefix="/admin/enterprise-settings")
basic_router = APIRouter(prefix="/enterprise-settings")

logger = setup_logger()


class RefreshTokenData(BaseModel):
    access_token: str
    refresh_token: str
    session: dict = Field(..., description="Contains session information")
    userinfo: dict = Field(..., description="Contains user information")

    def __init__(self, **data: Any) -> None:
        super().__init__(**data)
        if "exp" not in self.session:
            raise ValueError("'exp' must be set in the session dictionary")
        if "userId" not in self.userinfo or "email" not in self.userinfo:
            raise ValueError(
                "'userId' and 'email' must be set in the userinfo dictionary"
            )


@basic_router.post("/refresh-token")
async def refresh_access_token(
    refresh_token: RefreshTokenData,
    user: User = Depends(current_user_with_expired_token),
    user_manager: UserManager = Depends(get_user_manager),
) -> None:
    try:
        logger.debug(f"Received response from Meechum auth URL for user {user.id}")

        # Extract new tokens
        new_access_token = refresh_token.access_token
        new_refresh_token = refresh_token.refresh_token

        new_expiry = datetime.fromtimestamp(
            refresh_token.session["exp"] / 1000, tz=timezone.utc
        )
        expires_at_timestamp = int(new_expiry.timestamp())

        logger.debug(f"Access token has been refreshed for user {user.id}")

        await user_manager.oauth_callback(
            oauth_name="custom",
            access_token=new_access_token,
            account_id=refresh_token.userinfo["userId"],
            account_email=refresh_token.userinfo["email"],
            expires_at=expires_at_timestamp,
            refresh_token=new_refresh_token,
            associate_by_email=True,
        )

        logger.info(f"Successfully refreshed tokens for user {user.id}")

    except httpx.HTTPStatusError as e:
        if e.response.status_code == 401:
            logger.warning(f"Full authentication required for user {user.id}")
            raise HTTPException(
                status_code=status.HTTP_401_UNAUTHORIZED,
                detail="Full authentication required",
            )
        logger.error(
            f"HTTP error occurred while refreshing token for user {user.id}: {str(e)}"
        )
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Failed to refresh token",
        )
    except Exception as e:
        logger.error(
            f"Unexpected error occurred while refreshing token for user {user.id}: {str(e)}"
        )
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="An unexpected error occurred",
        )


@admin_router.put("")
def admin_ee_put_settings(
    settings: EnterpriseSettings, _: User = Depends(current_admin_user)
) -> None:
    store_settings(settings)


@basic_router.get("")
def ee_fetch_settings() -> EnterpriseSettings:
    if MULTI_TENANT:
        tenant_id = get_current_tenant_id()
        if not tenant_id or tenant_id == POSTGRES_DEFAULT_SCHEMA:
            raise BasicAuthenticationError(detail="User must authenticate")

    return load_settings()


@admin_router.put("/logo")
def put_logo(
    file: UploadFile,
    is_logotype: bool = False,
    _: User = Depends(current_admin_user),
) -> None:
    upload_logo(file=file, is_logotype=is_logotype)


def fetch_logo_helper(db_session: Session) -> Response:  # noqa: ARG001
    try:
        file_store = get_default_file_store()
        onyx_file = file_store.get_file_with_mime_type(get_logo_filename())
        if not onyx_file:
            raise ValueError("get_onyx_file returned None!")
    except Exception:
        logger.exception("Faield to fetch logo file")
        raise HTTPException(
            status_code=404,
            detail="No logo file found",
        )
    else:
        return Response(
            content=onyx_file.data,
            media_type=onyx_file.mime_type,
            headers={"Cache-Control": "no-cache"},
        )


def fetch_logotype_helper(db_session: Session) -> Response:  # noqa: ARG001
    try:
        file_store = get_default_file_store()
        onyx_file = file_store.get_file_with_mime_type(get_logotype_filename())
        if not onyx_file:
            raise ValueError("get_onyx_file returned None!")
    except Exception:
        raise HTTPException(
            status_code=404,
            detail="No logotype file found",
        )
    else:
        return Response(content=onyx_file.data, media_type=onyx_file.mime_type)


@basic_router.get("/logotype")
def fetch_logotype(db_session: Session = Depends(get_session)) -> Response:
    return fetch_logotype_helper(db_session)


@basic_router.get("/logo")
def fetch_logo(
    is_logotype: bool = False, db_session: Session = Depends(get_session)
) -> Response:
    if is_logotype:
        return fetch_logotype_helper(db_session)

    return fetch_logo_helper(db_session)


@admin_router.put("/custom-analytics-script")
def upload_custom_analytics_script(
    script_upload: AnalyticsScriptUpload, _: User = Depends(current_admin_user)
) -> None:
    try:
        store_analytics_script(script_upload)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))


@basic_router.get("/custom-analytics-script")
def fetch_custom_analytics_script() -> str | None:
    return load_analytics_script()


# ---------------------------------------------------------------------------
# SCIM token management
# ---------------------------------------------------------------------------


def _get_scim_dal(db_session: Session = Depends(get_session)) -> ScimDAL:
    return ScimDAL(db_session)


@admin_router.get("/scim/token")
def get_active_scim_token(
    _: User = Depends(current_admin_user),
    dal: ScimDAL = Depends(_get_scim_dal),
) -> ScimTokenResponse:
    """Return the currently active SCIM token's metadata, or 404 if none."""
    token = dal.get_active_token()
    if not token:
        raise HTTPException(status_code=404, detail="No active SCIM token")

    # Derive the IdP domain from the first synced user as a heuristic.
    idp_domain: str | None = None
    mappings, _total = dal.list_user_mappings(start_index=1, count=1)
    if mappings:
        user = dal.get_user(mappings[0].user_id)
        if user and "@" in user.email:
            idp_domain = user.email.rsplit("@", 1)[1]

    return ScimTokenResponse(
        id=token.id,
        name=token.name,
        token_display=token.token_display,
        is_active=token.is_active,
        created_at=token.created_at,
        last_used_at=token.last_used_at,
        idp_domain=idp_domain,
    )


@admin_router.post("/scim/token", status_code=201)
def create_scim_token(
    body: ScimTokenCreate,
    user: User = Depends(current_admin_user),
    dal: ScimDAL = Depends(_get_scim_dal),
) -> ScimTokenCreatedResponse:
    """Create a new SCIM bearer token.

    Only one token is active at a time — creating a new token automatically
    revokes all previous tokens. The raw token value is returned exactly once
    in the response; it cannot be retrieved again.
    """
    raw_token, hashed_token, token_display = generate_scim_token()
    token = dal.create_token(
        name=body.name,
        hashed_token=hashed_token,
        token_display=token_display,
        created_by_id=user.id,
    )
    dal.commit()

    return ScimTokenCreatedResponse(
        id=token.id,
        name=token.name,
        token_display=token.token_display,
        is_active=token.is_active,
        created_at=token.created_at,
        last_used_at=token.last_used_at,
        raw_token=raw_token,
    )


================================================
FILE: backend/ee/onyx/server/enterprise_settings/models.py
================================================
from enum import Enum
from typing import Any
from typing import List

from pydantic import BaseModel
from pydantic import Field


class NavigationItem(BaseModel):
    link: str
    title: str
    # Right now must be one of the FA icons
    icon: str | None = None
    # NOTE: SVG must not have a width / height specified
    # This is the actual SVG as a string. Done this way to reduce
    # complexity / having to store additional "logos" in Postgres
    svg_logo: str | None = None

    @classmethod
    def model_validate(cls, *args: Any, **kwargs: Any) -> "NavigationItem":
        instance = super().model_validate(*args, **kwargs)
        if bool(instance.icon) == bool(instance.svg_logo):
            raise ValueError("Exactly one of fa_icon or svg_logo must be specified")
        return instance


class LogoDisplayStyle(str, Enum):
    LOGO_AND_NAME = "logo_and_name"
    LOGO_ONLY = "logo_only"
    NAME_ONLY = "name_only"


class EnterpriseSettings(BaseModel):
    """General settings that only apply to the Enterprise Edition of Onyx

    NOTE: don't put anything sensitive in here, as this is accessible without auth."""

    application_name: str | None = None
    use_custom_logo: bool = False
    use_custom_logotype: bool = False
    logo_display_style: LogoDisplayStyle | None = None

    # custom navigation
    custom_nav_items: List[NavigationItem] = Field(default_factory=list)

    # custom Chat components
    two_lines_for_chat_header: bool | None = None
    custom_lower_disclaimer_content: str | None = None
    custom_header_content: str | None = None
    custom_popup_header: str | None = None
    custom_popup_content: str | None = None
    enable_consent_screen: bool | None = None
    consent_screen_prompt: str | None = None
    show_first_visit_notice: bool | None = None
    custom_greeting_message: str | None = None

    def check_validity(self) -> None:
        return


class AnalyticsScriptUpload(BaseModel):
    script: str
    secret_key: str


================================================
FILE: backend/ee/onyx/server/enterprise_settings/store.py
================================================
import os
from io import BytesIO
from typing import Any
from typing import cast
from typing import IO

from fastapi import HTTPException
from fastapi import UploadFile

from ee.onyx.server.enterprise_settings.models import AnalyticsScriptUpload
from ee.onyx.server.enterprise_settings.models import EnterpriseSettings
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import KV_CUSTOM_ANALYTICS_SCRIPT_KEY
from onyx.configs.constants import KV_ENTERPRISE_SETTINGS_KEY
from onyx.configs.constants import ONYX_DEFAULT_APPLICATION_NAME
from onyx.file_store.file_store import get_default_file_store
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.utils.logger import setup_logger


logger = setup_logger()

_LOGO_FILENAME = "__logo__"
_LOGOTYPE_FILENAME = "__logotype__"


def load_settings() -> EnterpriseSettings:
    """Loads settings data directly from DB. This should be used primarily
    for checking what is actually in the DB, aka for editing and saving back settings.

    Runtime settings actually used by the application should be checked with
    load_runtime_settings as defaults may be applied at runtime.
    """

    dynamic_config_store = get_kv_store()
    try:
        settings = EnterpriseSettings(
            **cast(dict, dynamic_config_store.load(KV_ENTERPRISE_SETTINGS_KEY))
        )
    except KvKeyNotFoundError:
        settings = EnterpriseSettings()
        dynamic_config_store.store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump())

    return settings


def store_settings(settings: EnterpriseSettings) -> None:
    """Stores settings directly to the kv store / db."""

    get_kv_store().store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump())


def load_runtime_settings() -> EnterpriseSettings:
    """Loads settings from DB and applies any defaults or transformations for use
    at runtime.

    Should not be stored back to the DB.
    """
    enterprise_settings = load_settings()
    if not enterprise_settings.application_name:
        enterprise_settings.application_name = ONYX_DEFAULT_APPLICATION_NAME

    return enterprise_settings


_CUSTOM_ANALYTICS_SECRET_KEY = os.environ.get("CUSTOM_ANALYTICS_SECRET_KEY")


def load_analytics_script() -> str | None:
    dynamic_config_store = get_kv_store()
    try:
        return cast(str, dynamic_config_store.load(KV_CUSTOM_ANALYTICS_SCRIPT_KEY))
    except KvKeyNotFoundError:
        return None


def store_analytics_script(analytics_script_upload: AnalyticsScriptUpload) -> None:
    if (
        not _CUSTOM_ANALYTICS_SECRET_KEY
        or analytics_script_upload.secret_key != _CUSTOM_ANALYTICS_SECRET_KEY
    ):
        raise ValueError("Invalid secret key")

    get_kv_store().store(KV_CUSTOM_ANALYTICS_SCRIPT_KEY, analytics_script_upload.script)


def is_valid_file_type(filename: str) -> bool:
    valid_extensions = (".png", ".jpg", ".jpeg")
    return filename.endswith(valid_extensions)


def guess_file_type(filename: str) -> str:
    if filename.lower().endswith(".png"):
        return "image/png"
    elif filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"):
        return "image/jpeg"
    return "application/octet-stream"


def upload_logo(file: UploadFile | str, is_logotype: bool = False) -> bool:
    content: IO[Any]

    if isinstance(file, str):
        logger.notice(f"Uploading logo from local path {file}")
        if not os.path.isfile(file) or not is_valid_file_type(file):
            logger.error(
                "Invalid file type- only .png, .jpg, and .jpeg files are allowed"
            )
            return False

        with open(file, "rb") as file_handle:
            file_content = file_handle.read()
        content = BytesIO(file_content)
        display_name = file
        file_type = guess_file_type(file)

    else:
        logger.notice("Uploading logo from uploaded file")
        if not file.filename or not is_valid_file_type(file.filename):
            raise HTTPException(
                status_code=400,
                detail="Invalid file type- only .png, .jpg, and .jpeg files are allowed",
            )
        content = file.file
        display_name = file.filename
        file_type = file.content_type or "image/jpeg"

    file_store = get_default_file_store()
    file_store.save_file(
        content=content,
        display_name=display_name,
        file_origin=FileOrigin.OTHER,
        file_type=file_type,
        file_id=_LOGOTYPE_FILENAME if is_logotype else _LOGO_FILENAME,
    )
    return True


def get_logo_filename() -> str:
    return _LOGO_FILENAME


def get_logotype_filename() -> str:
    return _LOGOTYPE_FILENAME


================================================
FILE: backend/ee/onyx/server/evals/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/evals/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends

from ee.onyx.auth.users import current_cloud_superuser
from onyx.background.celery.apps.client import celery_app as client_app
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.models import User
from onyx.evals.models import EvalConfigurationOptions
from onyx.server.evals.models import EvalRunAck
from onyx.utils.logger import setup_logger

logger = setup_logger()

router = APIRouter(prefix="/evals")


@router.post("/eval_run", response_model=EvalRunAck)
def eval_run(
    request: EvalConfigurationOptions,
    user: User = Depends(current_cloud_superuser),  # noqa: ARG001
) -> EvalRunAck:
    """
    Run an evaluation with the given message and optional dataset.
    This endpoint requires a valid API key for authentication.
    """
    client_app.send_task(
        OnyxCeleryTask.EVAL_RUN_TASK,
        kwargs={
            "configuration_dict": request.model_dump(),
        },
    )
    return EvalRunAck(success=True)


================================================
FILE: backend/ee/onyx/server/features/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/features/hooks/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/features/hooks/api.py
================================================
import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import Query
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.auth.users import User
from onyx.db.constants import UNSET
from onyx.db.constants import UnsetType
from onyx.db.engine.sql_engine import get_session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.hook import create_hook__no_commit
from onyx.db.hook import delete_hook__no_commit
from onyx.db.hook import get_hook_by_id
from onyx.db.hook import get_hook_execution_logs
from onyx.db.hook import get_hooks
from onyx.db.hook import update_hook__no_commit
from onyx.db.models import Hook
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.api_dependencies import require_hook_enabled
from onyx.hooks.models import HookCreateRequest
from onyx.hooks.models import HookExecutionRecord
from onyx.hooks.models import HookPointMetaResponse
from onyx.hooks.models import HookResponse
from onyx.hooks.models import HookUpdateRequest
from onyx.hooks.models import HookValidateResponse
from onyx.hooks.models import HookValidateStatus
from onyx.hooks.registry import get_all_specs
from onyx.hooks.registry import get_hook_point_spec
from onyx.utils.logger import setup_logger
from onyx.utils.url import SSRFException
from onyx.utils.url import validate_outbound_http_url

logger = setup_logger()

# ---------------------------------------------------------------------------
# SSRF protection
# ---------------------------------------------------------------------------


def _check_ssrf_safety(endpoint_url: str) -> None:
    """Raise OnyxError if endpoint_url could be used for SSRF.

    Delegates to validate_outbound_http_url with https_only=True.
    Uses BAD_GATEWAY so the frontend maps the error to the Endpoint URL field.
    """
    try:
        validate_outbound_http_url(endpoint_url, https_only=True)
    except (SSRFException, ValueError) as e:
        raise OnyxError(OnyxErrorCode.BAD_GATEWAY, str(e))


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _hook_to_response(hook: Hook, creator_email: str | None = None) -> HookResponse:
    return HookResponse(
        id=hook.id,
        name=hook.name,
        hook_point=hook.hook_point,
        endpoint_url=hook.endpoint_url,
        api_key_masked=(
            hook.api_key.get_value(apply_mask=True) if hook.api_key else None
        ),
        fail_strategy=hook.fail_strategy,
        timeout_seconds=hook.timeout_seconds,
        is_active=hook.is_active,
        is_reachable=hook.is_reachable,
        creator_email=(
            creator_email
            if creator_email is not None
            else (hook.creator.email if hook.creator else None)
        ),
        created_at=hook.created_at,
        updated_at=hook.updated_at,
    )


def _get_hook_or_404(
    db_session: Session,
    hook_id: int,
    include_creator: bool = False,
) -> Hook:
    hook = get_hook_by_id(
        db_session=db_session,
        hook_id=hook_id,
        include_creator=include_creator,
    )
    if hook is None:
        raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Hook {hook_id} not found.")
    return hook


def _raise_for_validation_failure(validation: HookValidateResponse) -> None:
    """Raise an appropriate OnyxError for a non-passed validation result."""
    if validation.status == HookValidateStatus.auth_failed:
        raise OnyxError(OnyxErrorCode.CREDENTIAL_INVALID, validation.error_message)
    if validation.status == HookValidateStatus.timeout:
        raise OnyxError(
            OnyxErrorCode.GATEWAY_TIMEOUT,
            f"Endpoint validation failed: {validation.error_message}",
        )
    raise OnyxError(
        OnyxErrorCode.BAD_GATEWAY,
        f"Endpoint validation failed: {validation.error_message}",
    )


def _validate_endpoint(
    endpoint_url: str,
    api_key: str | None,
    timeout_seconds: float,
) -> HookValidateResponse:
    """Check whether endpoint_url is reachable by sending an empty POST request.

    We use POST since hook endpoints expect POST requests. The server will typically
    respond with 4xx (missing/invalid body) — that is fine. Any HTTP response means
    the server is up and routable. A 401/403 response returns auth_failed
    (not reachable — indicates the api_key is invalid).

    Timeout handling:
    - Any httpx.TimeoutException (ConnectTimeout, ReadTimeout, WriteTimeout, PoolTimeout) →
      timeout (operator should consider increasing timeout_seconds).
    - All other exceptions → cannot_connect.
    """
    _check_ssrf_safety(endpoint_url)
    headers: dict[str, str] = {}
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"
    try:
        with httpx.Client(timeout=timeout_seconds, follow_redirects=False) as client:
            response = client.post(endpoint_url, headers=headers)
        if response.status_code in (401, 403):
            return HookValidateResponse(
                status=HookValidateStatus.auth_failed,
                error_message=f"Authentication failed (HTTP {response.status_code})",
            )
        return HookValidateResponse(status=HookValidateStatus.passed)
    except httpx.TimeoutException as exc:
        # Any timeout (connect, read, or write) means the configured timeout_seconds
        # is too low for this endpoint. Report as timeout so the UI directs the user
        # to increase the timeout setting.
        logger.warning(
            "Hook endpoint validation: timeout for %s",
            endpoint_url,
            exc_info=exc,
        )
        return HookValidateResponse(
            status=HookValidateStatus.timeout,
            error_message="Endpoint timed out — consider increasing timeout_seconds.",
        )
    except Exception as exc:
        logger.warning(
            "Hook endpoint validation: connection error for %s",
            endpoint_url,
            exc_info=exc,
        )
        return HookValidateResponse(
            status=HookValidateStatus.cannot_connect, error_message=str(exc)
        )


# ---------------------------------------------------------------------------
# Routers
# ---------------------------------------------------------------------------

router = APIRouter(prefix="/admin/hooks")


# ---------------------------------------------------------------------------
# Hook endpoints
# ---------------------------------------------------------------------------


@router.get("/specs")
def get_hook_point_specs(
    _: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
) -> list[HookPointMetaResponse]:
    return [
        HookPointMetaResponse(
            hook_point=spec.hook_point,
            display_name=spec.display_name,
            description=spec.description,
            docs_url=spec.docs_url,
            input_schema=spec.input_schema,
            output_schema=spec.output_schema,
            default_timeout_seconds=spec.default_timeout_seconds,
            default_fail_strategy=spec.default_fail_strategy,
            fail_hard_description=spec.fail_hard_description,
        )
        for spec in get_all_specs()
    ]


@router.get("")
def list_hooks(
    _: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
    db_session: Session = Depends(get_session),
) -> list[HookResponse]:
    hooks = get_hooks(db_session=db_session, include_creator=True)
    return [_hook_to_response(h) for h in hooks]


@router.post("")
def create_hook(
    req: HookCreateRequest,
    user: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
    db_session: Session = Depends(get_session),
) -> HookResponse:
    """Create a new hook. The endpoint is validated before persisting — creation fails if
    the endpoint cannot be reached or the api_key is invalid. Hooks are created active.
    """
    spec = get_hook_point_spec(req.hook_point)
    api_key = req.api_key.get_secret_value() if req.api_key else None
    validation = _validate_endpoint(
        endpoint_url=req.endpoint_url,
        api_key=api_key,
        timeout_seconds=req.timeout_seconds or spec.default_timeout_seconds,
    )
    if validation.status != HookValidateStatus.passed:
        _raise_for_validation_failure(validation)

    hook = create_hook__no_commit(
        db_session=db_session,
        name=req.name,
        hook_point=req.hook_point,
        endpoint_url=req.endpoint_url,
        api_key=api_key,
        fail_strategy=req.fail_strategy or spec.default_fail_strategy,
        timeout_seconds=req.timeout_seconds or spec.default_timeout_seconds,
        is_active=True,
        is_reachable=True,
        creator_id=user.id,
    )
    db_session.commit()
    return _hook_to_response(hook, creator_email=user.email)


@router.get("/{hook_id}")
def get_hook(
    hook_id: int,
    _: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
    db_session: Session = Depends(get_session),
) -> HookResponse:
    hook = _get_hook_or_404(db_session, hook_id, include_creator=True)
    return _hook_to_response(hook)


@router.patch("/{hook_id}")
def update_hook(
    hook_id: int,
    req: HookUpdateRequest,
    _: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
    db_session: Session = Depends(get_session),
) -> HookResponse:
    """Update hook fields. If endpoint_url, api_key, or timeout_seconds changes, the
    endpoint is re-validated using the effective values. For active hooks the update is
    rejected on validation failure, keeping live traffic unaffected. For inactive hooks
    the update goes through regardless and is_reachable is updated to reflect the result.

    Note: if an active hook's endpoint is currently down, even a timeout_seconds-only
    increase will be rejected. The recovery flow is: deactivate → update → reactivate.
    """
    # api_key: UNSET = no change, None = clear, value = update
    api_key: str | None | UnsetType
    if "api_key" not in req.model_fields_set:
        api_key = UNSET
    elif req.api_key is None:
        api_key = None
    else:
        api_key = req.api_key.get_secret_value()

    endpoint_url_changing = "endpoint_url" in req.model_fields_set
    api_key_changing = not isinstance(api_key, UnsetType)
    timeout_changing = "timeout_seconds" in req.model_fields_set

    validated_is_reachable: bool | None = None
    if endpoint_url_changing or api_key_changing or timeout_changing:
        existing = _get_hook_or_404(db_session, hook_id)
        effective_url: str = (
            req.endpoint_url if endpoint_url_changing else existing.endpoint_url  # type: ignore[assignment]  # endpoint_url is required on create and cannot be cleared on update
        )
        effective_api_key: str | None = (
            (api_key if not isinstance(api_key, UnsetType) else None)
            if api_key_changing
            else (
                existing.api_key.get_value(apply_mask=False)
                if existing.api_key
                else None
            )
        )
        effective_timeout: float = (
            req.timeout_seconds if timeout_changing else existing.timeout_seconds  # type: ignore[assignment]  # req.timeout_seconds is non-None when timeout_changing (validated by HookUpdateRequest)
        )
        validation = _validate_endpoint(
            endpoint_url=effective_url,
            api_key=effective_api_key,
            timeout_seconds=effective_timeout,
        )
        if existing.is_active and validation.status != HookValidateStatus.passed:
            _raise_for_validation_failure(validation)
        validated_is_reachable = validation.status == HookValidateStatus.passed

    hook = update_hook__no_commit(
        db_session=db_session,
        hook_id=hook_id,
        name=req.name,
        endpoint_url=(req.endpoint_url if endpoint_url_changing else UNSET),
        api_key=api_key,
        fail_strategy=req.fail_strategy,
        timeout_seconds=req.timeout_seconds,
        is_reachable=validated_is_reachable,
        include_creator=True,
    )
    db_session.commit()
    return _hook_to_response(hook)


@router.delete("/{hook_id}")
def delete_hook(
    hook_id: int,
    _: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
    db_session: Session = Depends(get_session),
) -> None:
    delete_hook__no_commit(db_session=db_session, hook_id=hook_id)
    db_session.commit()


@router.post("/{hook_id}/activate")
def activate_hook(
    hook_id: int,
    _: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
    db_session: Session = Depends(get_session),
) -> HookResponse:
    hook = _get_hook_or_404(db_session, hook_id)
    if not hook.endpoint_url:
        raise OnyxError(
            OnyxErrorCode.INVALID_INPUT, "Hook has no endpoint URL configured."
        )

    api_key = hook.api_key.get_value(apply_mask=False) if hook.api_key else None
    validation = _validate_endpoint(
        endpoint_url=hook.endpoint_url,
        api_key=api_key,
        timeout_seconds=hook.timeout_seconds,
    )
    if validation.status != HookValidateStatus.passed:
        # Persist is_reachable=False in a separate session so the request
        # session has no commits on the failure path and the transaction
        # boundary stays clean.
        if hook.is_reachable is not False:
            with get_session_with_current_tenant() as side_session:
                update_hook__no_commit(
                    db_session=side_session, hook_id=hook_id, is_reachable=False
                )
                side_session.commit()
        _raise_for_validation_failure(validation)

    hook = update_hook__no_commit(
        db_session=db_session,
        hook_id=hook_id,
        is_active=True,
        is_reachable=True,
        include_creator=True,
    )
    db_session.commit()
    return _hook_to_response(hook)


@router.post("/{hook_id}/validate")
def validate_hook(
    hook_id: int,
    _: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
    db_session: Session = Depends(get_session),
) -> HookValidateResponse:
    hook = _get_hook_or_404(db_session, hook_id)
    if not hook.endpoint_url:
        raise OnyxError(
            OnyxErrorCode.INVALID_INPUT, "Hook has no endpoint URL configured."
        )

    api_key = hook.api_key.get_value(apply_mask=False) if hook.api_key else None
    validation = _validate_endpoint(
        endpoint_url=hook.endpoint_url,
        api_key=api_key,
        timeout_seconds=hook.timeout_seconds,
    )
    validation_passed = validation.status == HookValidateStatus.passed
    if hook.is_reachable != validation_passed:
        update_hook__no_commit(
            db_session=db_session, hook_id=hook_id, is_reachable=validation_passed
        )
        db_session.commit()
    return validation


@router.post("/{hook_id}/deactivate")
def deactivate_hook(
    hook_id: int,
    _: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
    db_session: Session = Depends(get_session),
) -> HookResponse:
    hook = update_hook__no_commit(
        db_session=db_session,
        hook_id=hook_id,
        is_active=False,
        include_creator=True,
    )
    db_session.commit()
    return _hook_to_response(hook)


# ---------------------------------------------------------------------------
# Execution log endpoints
# ---------------------------------------------------------------------------


@router.get("/{hook_id}/execution-logs")
def list_hook_execution_logs(
    hook_id: int,
    limit: int = Query(default=10, ge=1, le=100),
    _: User = Depends(current_admin_user),
    _hook_enabled: None = Depends(require_hook_enabled),
    db_session: Session = Depends(get_session),
) -> list[HookExecutionRecord]:
    _get_hook_or_404(db_session, hook_id)
    logs = get_hook_execution_logs(db_session=db_session, hook_id=hook_id, limit=limit)
    return [
        HookExecutionRecord(
            error_message=log.error_message,
            status_code=log.status_code,
            duration_ms=log.duration_ms,
            created_at=log.created_at,
        )
        for log in logs
    ]


================================================
FILE: backend/ee/onyx/server/license/api.py
================================================
"""License API endpoints for self-hosted deployments.

These endpoints allow self-hosted Onyx instances to:
1. Claim a license after Stripe checkout (via cloud data plane proxy)
2. Upload a license file manually (for air-gapped deployments)
3. View license status and seat usage
4. Refresh/delete the local license

NOTE: Cloud (MULTI_TENANT) deployments do NOT use these endpoints.
Cloud licensing is managed via the control plane and gated_tenants Redis key.
"""

import requests
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import UploadFile
from sqlalchemy.orm import Session

from ee.onyx.auth.users import current_admin_user
from ee.onyx.configs.app_configs import CLOUD_DATA_PLANE_URL
from ee.onyx.db.license import delete_license as db_delete_license
from ee.onyx.db.license import get_license
from ee.onyx.db.license import get_license_metadata
from ee.onyx.db.license import invalidate_license_cache
from ee.onyx.db.license import refresh_license_cache
from ee.onyx.db.license import update_license_cache
from ee.onyx.db.license import upsert_license
from ee.onyx.server.license.models import LicenseResponse
from ee.onyx.server.license.models import LicenseSource
from ee.onyx.server.license.models import LicenseStatusResponse
from ee.onyx.server.license.models import LicenseUploadResponse
from ee.onyx.server.license.models import SeatUsageResponse
from ee.onyx.utils.license import verify_license_signature
from onyx.auth.users import User
from onyx.db.engine.sql_engine import get_session
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()

router = APIRouter(prefix="/license")

# PEM-style delimiters used in license file format
_PEM_BEGIN = "-----BEGIN ONYX LICENSE-----"
_PEM_END = "-----END ONYX LICENSE-----"


def _strip_pem_delimiters(content: str) -> str:
    """Strip PEM-style delimiters from license content if present."""
    content = content.strip()
    if content.startswith(_PEM_BEGIN) and content.endswith(_PEM_END):
        # Remove first and last lines (the delimiters)
        lines = content.split("\n")
        return "\n".join(lines[1:-1]).strip()
    return content


@router.get("")
async def get_license_status(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> LicenseStatusResponse:
    """Get current license status and seat usage."""
    metadata = get_license_metadata(db_session)

    if not metadata:
        return LicenseStatusResponse(has_license=False)

    return LicenseStatusResponse(
        has_license=True,
        seats=metadata.seats,
        used_seats=metadata.used_seats,
        plan_type=metadata.plan_type,
        issued_at=metadata.issued_at,
        expires_at=metadata.expires_at,
        grace_period_end=metadata.grace_period_end,
        status=metadata.status,
        source=metadata.source,
    )


@router.get("/seats")
async def get_seat_usage(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> SeatUsageResponse:
    """Get detailed seat usage information."""
    metadata = get_license_metadata(db_session)

    if not metadata:
        return SeatUsageResponse(
            total_seats=0,
            used_seats=0,
            available_seats=0,
        )

    return SeatUsageResponse(
        total_seats=metadata.seats,
        used_seats=metadata.used_seats,
        available_seats=max(0, metadata.seats - metadata.used_seats),
    )


@router.post("/claim")
async def claim_license(
    session_id: str | None = None,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> LicenseResponse:
    """
    Claim a license from the control plane (self-hosted only).

    Two modes:
    1. With session_id: After Stripe checkout, exchange session_id for license
    2. Without session_id: Re-claim using existing license for auth

    Use without session_id after:
    - Updating seats via the billing API
    - Returning from the Stripe customer portal
    - Any operation that regenerates the license on control plane
    Claim a license from the control plane (self-hosted only).

    Two modes:
    1. With session_id: After Stripe checkout, exchange session_id for license
    2. Without session_id: Re-claim using existing license for auth
    """
    if MULTI_TENANT:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "License claiming is only available for self-hosted deployments",
        )

    try:
        if session_id:
            # Claim license after checkout using session_id
            url = f"{CLOUD_DATA_PLANE_URL}/proxy/claim-license"
            response = requests.post(
                url,
                json={"session_id": session_id},
                headers={"Content-Type": "application/json"},
                timeout=30,
            )
        else:
            # Re-claim using existing license for auth
            metadata = get_license_metadata(db_session)
            if not metadata or not metadata.tenant_id:
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    "No license found. Provide session_id after checkout.",
                )

            license_row = get_license(db_session)
            if not license_row or not license_row.license_data:
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    "No license found in database",
                )

            url = f"{CLOUD_DATA_PLANE_URL}/proxy/license/{metadata.tenant_id}"
            response = requests.get(
                url,
                headers={
                    "Authorization": f"Bearer {license_row.license_data}",
                    "Content-Type": "application/json",
                },
                timeout=30,
            )

        response.raise_for_status()

        data = response.json()
        license_data = data.get("license")

        if not license_data:
            raise OnyxError(OnyxErrorCode.NOT_FOUND, "No license in response")

        # Verify signature before persisting
        payload = verify_license_signature(license_data)

        # Store in DB
        upsert_license(db_session, license_data)

        try:
            update_license_cache(payload, source=LicenseSource.AUTO_FETCH)
        except Exception as cache_error:
            logger.warning(f"Failed to update license cache: {cache_error}")

        logger.info(
            f"License claimed: seats={payload.seats}, expires={payload.expires_at.date()}"
        )
        return LicenseResponse(success=True, license=payload)

    except requests.HTTPError as e:
        status_code = e.response.status_code if e.response is not None else 502
        detail = "Failed to claim license"
        try:
            error_data = e.response.json() if e.response is not None else {}
            detail = error_data.get("detail", detail)
        except Exception:
            pass
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=status_code
        )
    except ValueError as e:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))
    except requests.RequestException:
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY, "Failed to connect to license server"
        )


@router.post("/upload")
async def upload_license(
    license_file: UploadFile = File(...),
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> LicenseUploadResponse:
    """
    Upload a license file manually (self-hosted only).

    Used for air-gapped deployments where the cloud data plane is not accessible.
    The license file must be cryptographically signed by Onyx.
    """
    if MULTI_TENANT:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "License upload is only available for self-hosted deployments",
        )

    try:
        content = await license_file.read()
        license_data = content.decode("utf-8").strip()
        # Strip PEM-style delimiters if present (used in .lic file format)
        license_data = _strip_pem_delimiters(license_data)
        # Remove any stray whitespace/newlines from user input
        license_data = license_data.strip()
    except UnicodeDecodeError:
        raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Invalid license file format")

    # Verify cryptographic signature - this is the only validation needed
    # The license's tenant_id identifies the customer in control plane, not locally
    try:
        payload = verify_license_signature(license_data)
    except ValueError as e:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))

    # Persist to DB and update cache
    upsert_license(db_session, license_data)

    try:
        update_license_cache(payload, source=LicenseSource.MANUAL_UPLOAD)
    except Exception as cache_error:
        logger.warning(f"Failed to update license cache: {cache_error}")

    return LicenseUploadResponse(
        success=True,
        message=f"License uploaded successfully. {payload.seats} seats, expires {payload.expires_at.date()}",
    )


@router.post("/refresh")
async def refresh_license_cache_endpoint(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> LicenseStatusResponse:
    """
    Force refresh the license cache from the local database.

    Useful after manual database changes or to verify license validity.
    Does NOT fetch from control plane - use /claim for that.
    """
    metadata = refresh_license_cache(db_session)

    if not metadata:
        return LicenseStatusResponse(has_license=False)

    return LicenseStatusResponse(
        has_license=True,
        seats=metadata.seats,
        used_seats=metadata.used_seats,
        plan_type=metadata.plan_type,
        issued_at=metadata.issued_at,
        expires_at=metadata.expires_at,
        grace_period_end=metadata.grace_period_end,
        status=metadata.status,
        source=metadata.source,
    )


@router.delete("")
async def delete_license(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict[str, bool]:
    """
    Delete the current license.

    Admin only - removes license from database and invalidates cache.
    """
    if MULTI_TENANT:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "License deletion is only available for self-hosted deployments",
        )

    try:
        invalidate_license_cache()
    except Exception as cache_error:
        logger.warning(f"Failed to invalidate license cache: {cache_error}")

    deleted = db_delete_license(db_session)

    return {"deleted": deleted}


================================================
FILE: backend/ee/onyx/server/license/models.py
================================================
from datetime import datetime
from enum import Enum

from pydantic import BaseModel

from onyx.server.settings.models import ApplicationStatus


class PlanType(str, Enum):
    MONTHLY = "monthly"
    ANNUAL = "annual"


class LicenseSource(str, Enum):
    AUTO_FETCH = "auto_fetch"
    MANUAL_UPLOAD = "manual_upload"


class LicensePayload(BaseModel):
    """The payload portion of a signed license."""

    version: str
    tenant_id: str
    organization_name: str | None = None
    issued_at: datetime
    expires_at: datetime
    seats: int
    plan_type: PlanType
    billing_cycle: str | None = None
    grace_period_days: int = 30
    stripe_subscription_id: str | None = None
    stripe_customer_id: str | None = None


class LicenseData(BaseModel):
    """Full signed license structure."""

    payload: LicensePayload
    signature: str


class LicenseMetadata(BaseModel):
    """Cached license metadata stored in Redis."""

    tenant_id: str
    organization_name: str | None = None
    seats: int
    used_seats: int
    plan_type: PlanType
    issued_at: datetime
    expires_at: datetime
    grace_period_end: datetime | None = None
    status: ApplicationStatus
    source: LicenseSource | None = None
    stripe_subscription_id: str | None = None


class LicenseStatusResponse(BaseModel):
    """Response for license status API."""

    has_license: bool
    seats: int = 0
    used_seats: int = 0
    plan_type: PlanType | None = None
    issued_at: datetime | None = None
    expires_at: datetime | None = None
    grace_period_end: datetime | None = None
    status: ApplicationStatus | None = None
    source: LicenseSource | None = None


class LicenseResponse(BaseModel):
    """Response after license fetch/upload."""

    success: bool
    message: str | None = None
    license: LicensePayload | None = None


class LicenseUploadResponse(BaseModel):
    """Response after license upload."""

    success: bool
    message: str | None = None


class SeatUsageResponse(BaseModel):
    """Response for seat usage API."""

    total_seats: int
    used_seats: int
    available_seats: int


================================================
FILE: backend/ee/onyx/server/manage/standard_answer.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from ee.onyx.db.standard_answer import fetch_standard_answer
from ee.onyx.db.standard_answer import fetch_standard_answer_categories
from ee.onyx.db.standard_answer import fetch_standard_answer_category
from ee.onyx.db.standard_answer import fetch_standard_answers
from ee.onyx.db.standard_answer import insert_standard_answer
from ee.onyx.db.standard_answer import insert_standard_answer_category
from ee.onyx.db.standard_answer import remove_standard_answer
from ee.onyx.db.standard_answer import update_standard_answer
from ee.onyx.db.standard_answer import update_standard_answer_category
from onyx.auth.users import current_admin_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.manage.models import StandardAnswer
from onyx.server.manage.models import StandardAnswerCategory
from onyx.server.manage.models import StandardAnswerCategoryCreationRequest
from onyx.server.manage.models import StandardAnswerCreationRequest

router = APIRouter(prefix="/manage")


@router.post("/admin/standard-answer")
def create_standard_answer(
    standard_answer_creation_request: StandardAnswerCreationRequest,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> StandardAnswer:
    standard_answer_model = insert_standard_answer(
        keyword=standard_answer_creation_request.keyword,
        answer=standard_answer_creation_request.answer,
        category_ids=standard_answer_creation_request.categories,
        match_regex=standard_answer_creation_request.match_regex,
        match_any_keywords=standard_answer_creation_request.match_any_keywords,
        db_session=db_session,
    )
    return StandardAnswer.from_model(standard_answer_model)


@router.get("/admin/standard-answer")
def list_standard_answers(
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> list[StandardAnswer]:
    standard_answer_models = fetch_standard_answers(db_session=db_session)
    return [
        StandardAnswer.from_model(standard_answer_model)
        for standard_answer_model in standard_answer_models
    ]


@router.patch("/admin/standard-answer/{standard_answer_id}")
def patch_standard_answer(
    standard_answer_id: int,
    standard_answer_creation_request: StandardAnswerCreationRequest,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> StandardAnswer:
    existing_standard_answer = fetch_standard_answer(
        standard_answer_id=standard_answer_id,
        db_session=db_session,
    )

    if existing_standard_answer is None:
        raise HTTPException(status_code=404, detail="Standard answer not found")

    standard_answer_model = update_standard_answer(
        standard_answer_id=standard_answer_id,
        keyword=standard_answer_creation_request.keyword,
        answer=standard_answer_creation_request.answer,
        category_ids=standard_answer_creation_request.categories,
        match_regex=standard_answer_creation_request.match_regex,
        match_any_keywords=standard_answer_creation_request.match_any_keywords,
        db_session=db_session,
    )
    return StandardAnswer.from_model(standard_answer_model)


@router.delete("/admin/standard-answer/{standard_answer_id}")
def delete_standard_answer(
    standard_answer_id: int,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> None:
    return remove_standard_answer(
        standard_answer_id=standard_answer_id,
        db_session=db_session,
    )


@router.post("/admin/standard-answer/category")
def create_standard_answer_category(
    standard_answer_category_creation_request: StandardAnswerCategoryCreationRequest,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> StandardAnswerCategory:
    standard_answer_category_model = insert_standard_answer_category(
        category_name=standard_answer_category_creation_request.name,
        db_session=db_session,
    )
    return StandardAnswerCategory.from_model(standard_answer_category_model)


@router.get("/admin/standard-answer/category")
def list_standard_answer_categories(
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> list[StandardAnswerCategory]:
    standard_answer_category_models = fetch_standard_answer_categories(
        db_session=db_session
    )
    return [
        StandardAnswerCategory.from_model(standard_answer_category_model)
        for standard_answer_category_model in standard_answer_category_models
    ]


@router.patch("/admin/standard-answer/category/{standard_answer_category_id}")
def patch_standard_answer_category(
    standard_answer_category_id: int,
    standard_answer_category_creation_request: StandardAnswerCategoryCreationRequest,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> StandardAnswerCategory:
    existing_standard_answer_category = fetch_standard_answer_category(
        standard_answer_category_id=standard_answer_category_id,
        db_session=db_session,
    )

    if existing_standard_answer_category is None:
        raise HTTPException(
            status_code=404, detail="Standard answer category not found"
        )

    standard_answer_category_model = update_standard_answer_category(
        standard_answer_category_id=standard_answer_category_id,
        category_name=standard_answer_category_creation_request.name,
        db_session=db_session,
    )
    return StandardAnswerCategory.from_model(standard_answer_category_model)


================================================
FILE: backend/ee/onyx/server/middleware/license_enforcement.py
================================================
"""Middleware to enforce license status for SELF-HOSTED deployments only.

NOTE: This middleware is NOT used for multi-tenant (cloud) deployments.
Multi-tenant gating is handled separately by the control plane via the
/tenants/product-gating endpoint and is_tenant_gated() checks.

IMPORTANT: Mutual Exclusivity with ENTERPRISE_EDITION_ENABLED
============================================================
This middleware is controlled by LICENSE_ENFORCEMENT_ENABLED env var.
It works alongside the legacy ENTERPRISE_EDITION_ENABLED system:

- LICENSE_ENFORCEMENT_ENABLED=false (default):
  Middleware is disabled. EE features are controlled solely by
  ENTERPRISE_EDITION_ENABLED. This preserves legacy behavior.

- LICENSE_ENFORCEMENT_ENABLED=true:
  Middleware actively enforces license status. EE features require
  a valid license, regardless of ENTERPRISE_EDITION_ENABLED.

Eventually, ENTERPRISE_EDITION_ENABLED will be removed and license
enforcement will be the only mechanism for gating EE features.

License Enforcement States (when enabled)
=========================================
For self-hosted deployments:

1. No license (never subscribed):
   - Allow community features (basic connectors, search, chat)
   - Block EE-only features (analytics, user groups, etc.)

2. GATED_ACCESS (fully expired):
   - Block all routes except billing/auth/license
   - User must renew subscription to continue

3. Valid license (ACTIVE, GRACE_PERIOD, PAYMENT_REMINDER):
   - Full access to all EE features
   - Seat limits enforced
   - GRACE_PERIOD/PAYMENT_REMINDER are for notifications only, not blocking
"""

import logging
from collections.abc import Awaitable
from collections.abc import Callable

from fastapi import FastAPI
from fastapi import Request
from fastapi import Response
from fastapi.responses import JSONResponse
from sqlalchemy.exc import SQLAlchemyError

from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.configs.license_enforcement_config import EE_ONLY_PATH_PREFIXES
from ee.onyx.configs.license_enforcement_config import (
    LICENSE_ENFORCEMENT_ALLOWED_PREFIXES,
)
from ee.onyx.db.license import get_cached_license_metadata
from ee.onyx.db.license import refresh_license_cache
from onyx.cache.interface import CACHE_TRANSIENT_ERRORS
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.server.settings.models import ApplicationStatus
from shared_configs.contextvars import get_current_tenant_id


def _is_path_allowed(path: str) -> bool:
    """Check if path is in allowlist (prefix match)."""
    return any(
        path.startswith(prefix) for prefix in LICENSE_ENFORCEMENT_ALLOWED_PREFIXES
    )


def _is_ee_only_path(path: str) -> bool:
    """Check if path requires EE license (prefix match)."""
    return any(path.startswith(prefix) for prefix in EE_ONLY_PATH_PREFIXES)


def add_license_enforcement_middleware(
    app: FastAPI, logger: logging.LoggerAdapter
) -> None:
    logger.info("License enforcement middleware registered")

    @app.middleware("http")
    async def enforce_license(
        request: Request, call_next: Callable[[Request], Awaitable[Response]]
    ) -> Response:
        """Block requests when license is expired/gated."""
        if not LICENSE_ENFORCEMENT_ENABLED:
            return await call_next(request)

        path = request.url.path
        if path.startswith("/api"):
            path = path[4:]

        if _is_path_allowed(path):
            return await call_next(request)

        is_gated = False
        tenant_id = get_current_tenant_id()

        try:
            metadata = get_cached_license_metadata(tenant_id)

            # If no cached metadata, check database (cache may have been cleared)
            if not metadata:
                logger.debug(
                    "[license_enforcement] No cached license, checking database..."
                )
                try:
                    with get_session_with_current_tenant() as db_session:
                        metadata = refresh_license_cache(db_session, tenant_id)
                        if metadata:
                            logger.info(
                                "[license_enforcement] Loaded license from database"
                            )
                except SQLAlchemyError as db_error:
                    logger.warning(
                        f"[license_enforcement] Failed to check database for license: {db_error}"
                    )

            if metadata:
                # User HAS a license (current or expired)
                if metadata.status == ApplicationStatus.GATED_ACCESS:
                    # License fully expired - gate the user
                    # Note: GRACE_PERIOD and PAYMENT_REMINDER are for notifications only,
                    # they don't block access
                    is_gated = True
                else:
                    # License is active - check seat limit
                    # used_seats in cache is kept accurate via invalidation
                    # when users are added/removed
                    if metadata.used_seats > metadata.seats:
                        logger.info(
                            f"[license_enforcement] Blocking request: "
                            f"seat limit exceeded ({metadata.used_seats}/{metadata.seats})"
                        )
                        return JSONResponse(
                            status_code=402,
                            content={
                                "detail": {
                                    "error": "seat_limit_exceeded",
                                    "message": f"Seat limit exceeded: {metadata.used_seats} of {metadata.seats} seats used.",
                                    "used_seats": metadata.used_seats,
                                    "seats": metadata.seats,
                                }
                            },
                        )
            else:
                # No license in cache OR database = never subscribed
                # Allow community features, but block EE-only features
                if _is_ee_only_path(path):
                    logger.info(
                        f"[license_enforcement] Blocking EE-only path (no license): {path}"
                    )
                    return JSONResponse(
                        status_code=402,
                        content={
                            "detail": {
                                "error": "enterprise_license_required",
                                "message": "This feature requires an Enterprise license. "
                                "Please upgrade to access this functionality.",
                            }
                        },
                    )
                logger.debug(
                    "[license_enforcement] No license, allowing community features"
                )
                is_gated = False
        except CACHE_TRANSIENT_ERRORS as e:
            logger.warning(f"Failed to check license metadata: {e}")
            # Fail open - don't block users due to cache connectivity issues
            is_gated = False

        if is_gated:
            logger.info(
                f"[license_enforcement] Blocking request (license expired): {path}"
            )

            return JSONResponse(
                status_code=402,
                content={
                    "detail": {
                        "error": "license_expired",
                        "message": "Your subscription has expired. Please update your billing.",
                    }
                },
            )

        return await call_next(request)


================================================
FILE: backend/ee/onyx/server/middleware/tenant_tracking.py
================================================
import logging
from collections.abc import Awaitable
from collections.abc import Callable

from fastapi import FastAPI
from fastapi import HTTPException
from fastapi import Request
from fastapi import Response

from ee.onyx.auth.users import decode_anonymous_user_jwt_token
from onyx.auth.utils import extract_tenant_from_auth_header
from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
from onyx.configs.constants import TENANT_ID_COOKIE_NAME
from onyx.db.engine.sql_engine import is_valid_schema_name
from onyx.redis.redis_pool import retrieve_auth_token_data_from_redis
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR


def add_api_server_tenant_id_middleware(
    app: FastAPI, logger: logging.LoggerAdapter
) -> None:
    @app.middleware("http")
    async def set_tenant_id(
        request: Request, call_next: Callable[[Request], Awaitable[Response]]
    ) -> Response:
        """Extracts the tenant id from multiple locations and sets the context var.

        This is very specific to the api server and probably not something you'd want
        to use elsewhere.
        """
        try:
            if MULTI_TENANT:
                tenant_id = await _get_tenant_id_from_request(request, logger)
            else:
                tenant_id = POSTGRES_DEFAULT_SCHEMA

            CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
            return await call_next(request)

        except Exception as e:
            logger.exception(f"Error in tenant ID middleware: {str(e)}")
            raise


async def _get_tenant_id_from_request(
    request: Request, logger: logging.LoggerAdapter
) -> str:
    """
    Attempt to extract tenant_id from:
    1) The API key or PAT (Personal Access Token) header
    2) The Redis-based token (stored in Cookie: fastapiusersauth)
    3) The anonymous user cookie
    Fallback: POSTGRES_DEFAULT_SCHEMA
    """
    # Check for API key or PAT in Authorization header
    tenant_id = extract_tenant_from_auth_header(request)
    if tenant_id is not None:
        return tenant_id

    try:
        # Look up token data in Redis

        token_data = await retrieve_auth_token_data_from_redis(request)

        if token_data:
            tenant_id_from_payload = token_data.get(
                "tenant_id", POSTGRES_DEFAULT_SCHEMA
            )

            tenant_id = (
                str(tenant_id_from_payload)
                if tenant_id_from_payload is not None
                else None
            )

            if tenant_id and not is_valid_schema_name(tenant_id):
                raise HTTPException(status_code=400, detail="Invalid tenant ID format")

        # Check for anonymous user cookie
        anonymous_user_cookie = request.cookies.get(ANONYMOUS_USER_COOKIE_NAME)
        if anonymous_user_cookie:
            try:
                anonymous_user_data = decode_anonymous_user_jwt_token(
                    anonymous_user_cookie
                )
                tenant_id = anonymous_user_data.get(
                    "tenant_id", POSTGRES_DEFAULT_SCHEMA
                )

                if not tenant_id or not is_valid_schema_name(tenant_id):
                    raise HTTPException(
                        status_code=400, detail="Invalid tenant ID format"
                    )

                return tenant_id

            except Exception as e:
                logger.error(f"Error decoding anonymous user cookie: {str(e)}")
                # Continue and attempt to authenticate

        logger.debug(
            "Token data not found or expired in Redis, defaulting to POSTGRES_DEFAULT_SCHEMA"
        )

        # Return POSTGRES_DEFAULT_SCHEMA, so non-authenticated requests are sent to the default schema
        # The CURRENT_TENANT_ID_CONTEXTVAR is initialized with POSTGRES_DEFAULT_SCHEMA,
        # so we maintain consistency by returning it here when no valid tenant is found.
        return POSTGRES_DEFAULT_SCHEMA

    except Exception as e:
        logger.error(f"Unexpected error in _get_tenant_id_from_request: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error")

    finally:
        if tenant_id:
            return tenant_id

        # As a final step, check for explicit tenant_id cookie
        tenant_id_cookie = request.cookies.get(TENANT_ID_COOKIE_NAME)
        if tenant_id_cookie and is_valid_schema_name(tenant_id_cookie):
            return tenant_id_cookie

        # If we've reached this point, return the default schema
        return POSTGRES_DEFAULT_SCHEMA


================================================
FILE: backend/ee/onyx/server/oauth/api.py
================================================
import base64
import uuid

from fastapi import Depends
from fastapi import HTTPException
from fastapi.responses import JSONResponse

from ee.onyx.server.oauth.api_router import router
from ee.onyx.server.oauth.confluence_cloud import ConfluenceCloudOAuth
from ee.onyx.server.oauth.google_drive import GoogleDriveOAuth
from ee.onyx.server.oauth.slack import SlackOAuth
from onyx.auth.users import current_admin_user
from onyx.configs.app_configs import DEV_MODE
from onyx.configs.constants import DocumentSource
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


@router.post("/prepare-authorization-request")
def prepare_authorization_request(
    connector: DocumentSource,
    redirect_on_success: str | None,
    user: User = Depends(current_admin_user),
    tenant_id: str | None = Depends(get_current_tenant_id),
) -> JSONResponse:
    """Used by the frontend to generate the url for the user's browser during auth request.

    Example: https://www.oauth.com/oauth2-servers/authorization/the-authorization-request/
    """

    # create random oauth state param for security and to retrieve user data later
    oauth_uuid = uuid.uuid4()
    oauth_uuid_str = str(oauth_uuid)

    # urlsafe b64 encode the uuid for the oauth url
    oauth_state = (
        base64.urlsafe_b64encode(oauth_uuid.bytes).rstrip(b"=").decode("utf-8")
    )

    session: str | None = None
    if connector == DocumentSource.SLACK:
        if not DEV_MODE:
            oauth_url = SlackOAuth.generate_oauth_url(oauth_state)
        else:
            oauth_url = SlackOAuth.generate_dev_oauth_url(oauth_state)

        session = SlackOAuth.session_dump_json(
            email=user.email, redirect_on_success=redirect_on_success
        )
    elif connector == DocumentSource.CONFLUENCE:
        if not DEV_MODE:
            oauth_url = ConfluenceCloudOAuth.generate_oauth_url(oauth_state)
        else:
            oauth_url = ConfluenceCloudOAuth.generate_dev_oauth_url(oauth_state)
        session = ConfluenceCloudOAuth.session_dump_json(
            email=user.email, redirect_on_success=redirect_on_success
        )
    elif connector == DocumentSource.GOOGLE_DRIVE:
        if not DEV_MODE:
            oauth_url = GoogleDriveOAuth.generate_oauth_url(oauth_state)
        else:
            oauth_url = GoogleDriveOAuth.generate_dev_oauth_url(oauth_state)
        session = GoogleDriveOAuth.session_dump_json(
            email=user.email, redirect_on_success=redirect_on_success
        )
    else:
        oauth_url = None

    if not oauth_url:
        raise HTTPException(
            status_code=404,
            detail=f"The document source type {connector} does not have OAuth implemented",
        )

    if not session:
        raise HTTPException(
            status_code=500,
            detail=f"The document source type {connector} failed to generate an OAuth session.",
        )

    r = get_redis_client(tenant_id=tenant_id)

    # store important session state to retrieve when the user is redirected back
    # 10 min is the max we want an oauth flow to be valid
    r.set(f"da_oauth:{oauth_uuid_str}", session, ex=600)

    return JSONResponse(content={"url": oauth_url})


================================================
FILE: backend/ee/onyx/server/oauth/api_router.py
================================================
from fastapi import APIRouter

router: APIRouter = APIRouter(prefix="/oauth")


================================================
FILE: backend/ee/onyx/server/oauth/confluence_cloud.py
================================================
import base64
import uuid
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from typing import cast

import requests
from fastapi import Depends
from fastapi import HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from pydantic import ValidationError
from sqlalchemy.orm import Session

from ee.onyx.server.oauth.api_router import router
from onyx.auth.users import current_admin_user
from onyx.configs.app_configs import DEV_MODE
from onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID
from onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.utils import CONFLUENCE_OAUTH_TOKEN_URL
from onyx.db.credentials import create_credential
from onyx.db.credentials import fetch_credential_by_id_for_user
from onyx.db.credentials import update_credential_json
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.models import CredentialBase
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


class ConfluenceCloudOAuth:
    # https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps/

    class OAuthSession(BaseModel):
        """Stored in redis to be looked up on callback"""

        email: str
        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds

    class TokenResponse(BaseModel):
        access_token: str
        expires_in: int
        token_type: str
        refresh_token: str
        scope: str

    class AccessibleResources(BaseModel):
        id: str
        name: str
        url: str
        scopes: list[str]
        avatarUrl: str

    CLIENT_ID = OAUTH_CONFLUENCE_CLOUD_CLIENT_ID
    CLIENT_SECRET = OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET
    TOKEN_URL = CONFLUENCE_OAUTH_TOKEN_URL

    ACCESSIBLE_RESOURCE_URL = (
        "https://api.atlassian.com/oauth/token/accessible-resources"
    )

    # All read scopes per https://developer.atlassian.com/cloud/confluence/scopes-for-oauth-2-3LO-and-forge-apps/
    CONFLUENCE_OAUTH_SCOPE = (
        # classic scope
        "read:confluence-space.summary%20"
        "read:confluence-props%20"
        "read:confluence-content.all%20"
        "read:confluence-content.summary%20"
        "read:confluence-content.permission%20"
        "read:confluence-user%20"
        "read:confluence-groups%20"
        "read:space:confluence%20"
        "readonly:content.attachment:confluence%20"
        "search:confluence%20"
        # granular scope
        "read:attachment:confluence%20"  # possibly unneeded unless calling v2 attachments api
        "read:content-details:confluence%20"  # for permission sync
        "offline_access"
    )

    REDIRECT_URI = f"{WEB_DOMAIN}/admin/connectors/confluence/oauth/callback"
    DEV_REDIRECT_URI = f"https://redirectmeto.com/{REDIRECT_URI}"

    # eventually for Confluence Data Center
    # oauth_url = (
    #     f"http://localhost:8090/rest/oauth/v2/authorize?client_id={CONFLUENCE_OAUTH_CLIENT_ID}"
    #     f"&scope={CONFLUENCE_OAUTH_SCOPE_2}"
    #     f"&redirect_uri={redirectme_uri}"
    # )

    @classmethod
    def generate_oauth_url(cls, state: str) -> str:
        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)

    @classmethod
    def generate_dev_oauth_url(cls, state: str) -> str:
        """dev mode workaround for localhost testing
        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https
        """
        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)

    @classmethod
    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:
        # https://developer.atlassian.com/cloud/jira/platform/oauth-2-3lo-apps/#1--direct-the-user-to-the-authorization-url-to-get-an-authorization-code

        url = (
            "https://auth.atlassian.com/authorize"
            f"?audience=api.atlassian.com"
            f"&client_id={cls.CLIENT_ID}"
            f"&scope={cls.CONFLUENCE_OAUTH_SCOPE}"
            f"&redirect_uri={redirect_uri}"
            f"&state={state}"
            "&response_type=code"
            "&prompt=consent"
        )
        return url

    @classmethod
    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:
        """Temporary state to store in redis. to be looked up on auth response.
        Returns a json string.
        """
        session = ConfluenceCloudOAuth.OAuthSession(
            email=email, redirect_on_success=redirect_on_success
        )
        return session.model_dump_json()

    @classmethod
    def parse_session(cls, session_json: str) -> OAuthSession:
        session = ConfluenceCloudOAuth.OAuthSession.model_validate_json(session_json)
        return session

    @classmethod
    def generate_finalize_url(cls, credential_id: int) -> str:
        return f"{WEB_DOMAIN}/admin/connectors/confluence/oauth/finalize?credential={credential_id}"


@router.post("/connector/confluence/callback")
def confluence_oauth_callback(
    code: str,
    state: str,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
    tenant_id: str | None = Depends(get_current_tenant_id),
) -> JSONResponse:
    """Handles the backend logic for the frontend page that the user is redirected to
    after visiting the oauth authorization url."""

    if not ConfluenceCloudOAuth.CLIENT_ID or not ConfluenceCloudOAuth.CLIENT_SECRET:
        raise HTTPException(
            status_code=500,
            detail="Confluence Cloud client ID or client secret is not configured.",
        )

    r = get_redis_client(tenant_id=tenant_id)

    # recover the state
    padded_state = state + "=" * (
        -len(state) % 4
    )  # Add padding back (Base64 decoding requires padding)
    uuid_bytes = base64.urlsafe_b64decode(
        padded_state
    )  # Decode the Base64 string back to bytes

    # Convert bytes back to a UUID
    oauth_uuid = uuid.UUID(bytes=uuid_bytes)
    oauth_uuid_str = str(oauth_uuid)

    r_key = f"da_oauth:{oauth_uuid_str}"

    session_json_bytes = cast(bytes, r.get(r_key))
    if not session_json_bytes:
        raise HTTPException(
            status_code=400,
            detail=f"Confluence Cloud OAuth failed - OAuth state key not found: key={r_key}",
        )

    session_json = session_json_bytes.decode("utf-8")
    try:
        session = ConfluenceCloudOAuth.parse_session(session_json)

        if not DEV_MODE:
            redirect_uri = ConfluenceCloudOAuth.REDIRECT_URI
        else:
            redirect_uri = ConfluenceCloudOAuth.DEV_REDIRECT_URI

        # Exchange the authorization code for an access token
        response = requests.post(
            ConfluenceCloudOAuth.TOKEN_URL,
            headers={"Content-Type": "application/x-www-form-urlencoded"},
            data={
                "client_id": ConfluenceCloudOAuth.CLIENT_ID,
                "client_secret": ConfluenceCloudOAuth.CLIENT_SECRET,
                "code": code,
                "redirect_uri": redirect_uri,
                "grant_type": "authorization_code",
            },
        )

        token_response: ConfluenceCloudOAuth.TokenResponse | None = None

        try:
            token_response = ConfluenceCloudOAuth.TokenResponse.model_validate_json(
                response.text
            )
        except Exception:
            raise RuntimeError(
                "Confluence Cloud OAuth failed during code/token exchange."
            )

        now = datetime.now(timezone.utc)
        expires_at = now + timedelta(seconds=token_response.expires_in)

        credential_info = CredentialBase(
            credential_json={
                "confluence_access_token": token_response.access_token,
                "confluence_refresh_token": token_response.refresh_token,
                "created_at": now.isoformat(),
                "expires_at": expires_at.isoformat(),
                "expires_in": token_response.expires_in,
                "scope": token_response.scope,
            },
            admin_public=True,
            source=DocumentSource.CONFLUENCE,
            name="Confluence Cloud OAuth",
        )

        credential = create_credential(credential_info, user, db_session)
    except Exception as e:
        return JSONResponse(
            status_code=500,
            content={
                "success": False,
                "message": f"An error occurred during Confluence Cloud OAuth: {str(e)}",
            },
        )
    finally:
        r.delete(r_key)

    # return the result
    return JSONResponse(
        content={
            "success": True,
            "message": "Confluence Cloud OAuth completed successfully.",
            "finalize_url": ConfluenceCloudOAuth.generate_finalize_url(credential.id),
            "redirect_on_success": session.redirect_on_success,
        }
    )


@router.get("/connector/confluence/accessible-resources")
def confluence_oauth_accessible_resources(
    credential_id: int,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
    tenant_id: str | None = Depends(get_current_tenant_id),  # noqa: ARG001
) -> JSONResponse:
    """Atlassian's API is weird and does not supply us with enough info to be in a
    usable state after authorizing.  All API's require a cloud id. We have to list
    the accessible resources/sites and let the user choose which site to use."""

    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)
    if not credential:
        raise HTTPException(400, f"Credential {credential_id} not found.")

    credential_dict = (
        credential.credential_json.get_value(apply_mask=False)
        if credential.credential_json
        else {}
    )
    access_token = credential_dict["confluence_access_token"]

    try:
        # Exchange the authorization code for an access token
        response = requests.get(
            ConfluenceCloudOAuth.ACCESSIBLE_RESOURCE_URL,
            headers={
                "Authorization": f"Bearer {access_token}",
                "Accept": "application/json",
            },
        )

        response.raise_for_status()
        accessible_resources_data = response.json()

        # Validate the list of AccessibleResources
        try:
            accessible_resources = [
                ConfluenceCloudOAuth.AccessibleResources(**resource)
                for resource in accessible_resources_data
            ]
        except ValidationError as e:
            raise RuntimeError(f"Failed to parse accessible resources: {e}")
    except Exception as e:
        return JSONResponse(
            status_code=500,
            content={
                "success": False,
                "message": f"An error occurred retrieving Confluence Cloud accessible resources: {str(e)}",
            },
        )

    # return the result
    return JSONResponse(
        content={
            "success": True,
            "message": "Confluence Cloud get accessible resources completed successfully.",
            "accessible_resources": [
                resource.model_dump() for resource in accessible_resources
            ],
        }
    )


@router.post("/connector/confluence/finalize")
def confluence_oauth_finalize(
    credential_id: int,
    cloud_id: str,
    cloud_name: str,
    cloud_url: str,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
    tenant_id: str | None = Depends(get_current_tenant_id),  # noqa: ARG001
) -> JSONResponse:
    """Saves the info for the selected cloud site to the credential.
    This is the final step in the confluence oauth flow where after the traditional
    OAuth process, the user has to select a site to associate with the credentials.
    After this, the credential is usable."""

    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)
    if not credential:
        raise HTTPException(
            status_code=400,
            detail=f"Confluence Cloud OAuth failed - credential {credential_id} not found.",
        )

    existing_credential_json = (
        credential.credential_json.get_value(apply_mask=False)
        if credential.credential_json
        else {}
    )
    new_credential_json: dict[str, Any] = dict(existing_credential_json)
    new_credential_json["cloud_id"] = cloud_id
    new_credential_json["cloud_name"] = cloud_name
    new_credential_json["wiki_base"] = cloud_url

    try:
        update_credential_json(credential_id, new_credential_json, user, db_session)
    except Exception as e:
        return JSONResponse(
            status_code=500,
            content={
                "success": False,
                "message": f"An error occurred during Confluence Cloud OAuth: {str(e)}",
            },
        )

    # return the result
    return JSONResponse(
        content={
            "success": True,
            "message": "Confluence Cloud OAuth finalized successfully.",
            "redirect_url": f"{WEB_DOMAIN}/admin/connectors/confluence",
        }
    )


================================================
FILE: backend/ee/onyx/server/oauth/google_drive.py
================================================
import base64
import json
import uuid
from typing import Any
from typing import cast

import requests
from fastapi import Depends
from fastapi import HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session

from ee.onyx.server.oauth.api_router import router
from onyx.auth.users import current_admin_user
from onyx.configs.app_configs import DEV_MODE
from onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID
from onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import DocumentSource
from onyx.connectors.google_utils.google_auth import get_google_oauth_creds
from onyx.connectors.google_utils.google_auth import sanitize_oauth_credentials
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_AUTHENTICATION_METHOD,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_TOKEN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    GoogleOAuthAuthenticationMethod,
)
from onyx.db.credentials import create_credential
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.models import CredentialBase
from shared_configs.contextvars import get_current_tenant_id


class GoogleDriveOAuth:
    # https://developers.google.com/identity/protocols/oauth2
    # https://developers.google.com/identity/protocols/oauth2/web-server

    class OAuthSession(BaseModel):
        """Stored in redis to be looked up on callback"""

        email: str
        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds

    CLIENT_ID = OAUTH_GOOGLE_DRIVE_CLIENT_ID
    CLIENT_SECRET = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET

    TOKEN_URL = "https://oauth2.googleapis.com/token"

    # SCOPE is per https://docs.danswer.dev/connectors/google-drive
    # TODO: Merge with or use google_utils.GOOGLE_SCOPES
    SCOPE = (
        "https://www.googleapis.com/auth/drive.readonly%20"
        "https://www.googleapis.com/auth/drive.metadata.readonly%20"
        "https://www.googleapis.com/auth/admin.directory.user.readonly%20"
        "https://www.googleapis.com/auth/admin.directory.group.readonly"
    )

    REDIRECT_URI = f"{WEB_DOMAIN}/admin/connectors/google-drive/oauth/callback"
    DEV_REDIRECT_URI = f"https://redirectmeto.com/{REDIRECT_URI}"

    @classmethod
    def generate_oauth_url(cls, state: str) -> str:
        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)

    @classmethod
    def generate_dev_oauth_url(cls, state: str) -> str:
        """dev mode workaround for localhost testing
        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https
        """

        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)

    @classmethod
    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:
        # without prompt=consent, a refresh token is only issued the first time the user approves
        url = (
            f"https://accounts.google.com/o/oauth2/v2/auth"
            f"?client_id={cls.CLIENT_ID}"
            f"&redirect_uri={redirect_uri}"
            "&response_type=code"
            f"&scope={cls.SCOPE}"
            "&access_type=offline"
            f"&state={state}"
            "&prompt=consent"
        )
        return url

    @classmethod
    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:
        """Temporary state to store in redis. to be looked up on auth response.
        Returns a json string.
        """
        session = GoogleDriveOAuth.OAuthSession(
            email=email, redirect_on_success=redirect_on_success
        )
        return session.model_dump_json()

    @classmethod
    def parse_session(cls, session_json: str) -> OAuthSession:
        session = GoogleDriveOAuth.OAuthSession.model_validate_json(session_json)
        return session


@router.post("/connector/google-drive/callback")
def handle_google_drive_oauth_callback(
    code: str,
    state: str,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
    tenant_id: str | None = Depends(get_current_tenant_id),
) -> JSONResponse:
    if not GoogleDriveOAuth.CLIENT_ID or not GoogleDriveOAuth.CLIENT_SECRET:
        raise HTTPException(
            status_code=500,
            detail="Google Drive client ID or client secret is not configured.",
        )

    r = get_redis_client(tenant_id=tenant_id)

    # recover the state
    padded_state = state + "=" * (
        -len(state) % 4
    )  # Add padding back (Base64 decoding requires padding)
    uuid_bytes = base64.urlsafe_b64decode(
        padded_state
    )  # Decode the Base64 string back to bytes

    # Convert bytes back to a UUID
    oauth_uuid = uuid.UUID(bytes=uuid_bytes)
    oauth_uuid_str = str(oauth_uuid)

    r_key = f"da_oauth:{oauth_uuid_str}"

    session_json_bytes = cast(bytes, r.get(r_key))
    if not session_json_bytes:
        raise HTTPException(
            status_code=400,
            detail=f"Google Drive OAuth failed - OAuth state key not found: key={r_key}",
        )

    session_json = session_json_bytes.decode("utf-8")
    try:
        session = GoogleDriveOAuth.parse_session(session_json)

        if not DEV_MODE:
            redirect_uri = GoogleDriveOAuth.REDIRECT_URI
        else:
            redirect_uri = GoogleDriveOAuth.DEV_REDIRECT_URI

        # Exchange the authorization code for an access token
        response = requests.post(
            GoogleDriveOAuth.TOKEN_URL,
            headers={"Content-Type": "application/x-www-form-urlencoded"},
            data={
                "client_id": GoogleDriveOAuth.CLIENT_ID,
                "client_secret": GoogleDriveOAuth.CLIENT_SECRET,
                "code": code,
                "redirect_uri": redirect_uri,
                "grant_type": "authorization_code",
            },
        )

        response.raise_for_status()

        authorization_response: dict[str, Any] = response.json()

        # the connector wants us to store the json in its authorized_user_info format
        # returned from OAuthCredentials.get_authorized_user_info().
        # So refresh immediately via get_google_oauth_creds with the params filled in
        # from fields in authorization_response to get the json we need
        authorized_user_info = {}
        authorized_user_info["client_id"] = OAUTH_GOOGLE_DRIVE_CLIENT_ID
        authorized_user_info["client_secret"] = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
        authorized_user_info["refresh_token"] = authorization_response["refresh_token"]

        token_json_str = json.dumps(authorized_user_info)
        oauth_creds = get_google_oauth_creds(
            token_json_str=token_json_str, source=DocumentSource.GOOGLE_DRIVE
        )
        if not oauth_creds:
            raise RuntimeError("get_google_oauth_creds returned None.")

        # save off the credentials
        oauth_creds_sanitized_json_str = sanitize_oauth_credentials(oauth_creds)

        credential_dict: dict[str, str] = {}
        credential_dict[DB_CREDENTIALS_DICT_TOKEN_KEY] = oauth_creds_sanitized_json_str
        credential_dict[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] = session.email
        credential_dict[DB_CREDENTIALS_AUTHENTICATION_METHOD] = (
            GoogleOAuthAuthenticationMethod.OAUTH_INTERACTIVE.value
        )

        credential_info = CredentialBase(
            credential_json=credential_dict,
            admin_public=True,
            source=DocumentSource.GOOGLE_DRIVE,
            name="OAuth (interactive)",
        )

        create_credential(credential_info, user, db_session)
    except Exception as e:
        return JSONResponse(
            status_code=500,
            content={
                "success": False,
                "message": f"An error occurred during Google Drive OAuth: {str(e)}",
            },
        )
    finally:
        r.delete(r_key)

    # return the result
    return JSONResponse(
        content={
            "success": True,
            "message": "Google Drive OAuth completed successfully.",
            "finalize_url": None,
            "redirect_on_success": session.redirect_on_success,
        }
    )


================================================
FILE: backend/ee/onyx/server/oauth/slack.py
================================================
import base64
import uuid
from typing import cast

import requests
from fastapi import Depends
from fastapi import HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session

from ee.onyx.server.oauth.api_router import router
from onyx.auth.users import current_admin_user
from onyx.configs.app_configs import DEV_MODE
from onyx.configs.app_configs import OAUTH_SLACK_CLIENT_ID
from onyx.configs.app_configs import OAUTH_SLACK_CLIENT_SECRET
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import DocumentSource
from onyx.db.credentials import create_credential
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.models import CredentialBase
from shared_configs.contextvars import get_current_tenant_id


class SlackOAuth:
    # https://knock.app/blog/how-to-authenticate-users-in-slack-using-oauth
    # Example: https://api.slack.com/authentication/oauth-v2#exchanging

    class OAuthSession(BaseModel):
        """Stored in redis to be looked up on callback"""

        email: str
        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds

    CLIENT_ID = OAUTH_SLACK_CLIENT_ID
    CLIENT_SECRET = OAUTH_SLACK_CLIENT_SECRET

    TOKEN_URL = "https://slack.com/api/oauth.v2.access"

    # SCOPE is per https://docs.danswer.dev/connectors/slack
    BOT_SCOPE = (
        "channels:history,"
        "channels:read,"
        "groups:history,"
        "groups:read,"
        "channels:join,"
        "im:history,"
        "users:read,"
        "users:read.email,"
        "usergroups:read"
    )

    REDIRECT_URI = f"{WEB_DOMAIN}/admin/connectors/slack/oauth/callback"
    DEV_REDIRECT_URI = f"https://redirectmeto.com/{REDIRECT_URI}"

    @classmethod
    def generate_oauth_url(cls, state: str) -> str:
        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)

    @classmethod
    def generate_dev_oauth_url(cls, state: str) -> str:
        """dev mode workaround for localhost testing
        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https
        """

        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)

    @classmethod
    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:
        url = (
            f"https://slack.com/oauth/v2/authorize"
            f"?client_id={cls.CLIENT_ID}"
            f"&redirect_uri={redirect_uri}"
            f"&scope={cls.BOT_SCOPE}"
            f"&state={state}"
        )
        return url

    @classmethod
    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:
        """Temporary state to store in redis. to be looked up on auth response.
        Returns a json string.
        """
        session = SlackOAuth.OAuthSession(
            email=email, redirect_on_success=redirect_on_success
        )
        return session.model_dump_json()

    @classmethod
    def parse_session(cls, session_json: str) -> OAuthSession:
        session = SlackOAuth.OAuthSession.model_validate_json(session_json)
        return session


@router.post("/connector/slack/callback")
def handle_slack_oauth_callback(
    code: str,
    state: str,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
    tenant_id: str | None = Depends(get_current_tenant_id),
) -> JSONResponse:
    if not SlackOAuth.CLIENT_ID or not SlackOAuth.CLIENT_SECRET:
        raise HTTPException(
            status_code=500,
            detail="Slack client ID or client secret is not configured.",
        )

    r = get_redis_client(tenant_id=tenant_id)

    # recover the state
    padded_state = state + "=" * (
        -len(state) % 4
    )  # Add padding back (Base64 decoding requires padding)
    uuid_bytes = base64.urlsafe_b64decode(
        padded_state
    )  # Decode the Base64 string back to bytes

    # Convert bytes back to a UUID
    oauth_uuid = uuid.UUID(bytes=uuid_bytes)
    oauth_uuid_str = str(oauth_uuid)

    r_key = f"da_oauth:{oauth_uuid_str}"

    session_json_bytes = cast(bytes, r.get(r_key))
    if not session_json_bytes:
        raise HTTPException(
            status_code=400,
            detail=f"Slack OAuth failed - OAuth state key not found: key={r_key}",
        )

    session_json = session_json_bytes.decode("utf-8")
    try:
        session = SlackOAuth.parse_session(session_json)

        if not DEV_MODE:
            redirect_uri = SlackOAuth.REDIRECT_URI
        else:
            redirect_uri = SlackOAuth.DEV_REDIRECT_URI

        # Exchange the authorization code for an access token
        response = requests.post(
            SlackOAuth.TOKEN_URL,
            headers={"Content-Type": "application/x-www-form-urlencoded"},
            data={
                "client_id": SlackOAuth.CLIENT_ID,
                "client_secret": SlackOAuth.CLIENT_SECRET,
                "code": code,
                "redirect_uri": redirect_uri,
            },
        )

        response_data = response.json()

        if not response_data.get("ok"):
            raise HTTPException(
                status_code=400,
                detail=f"Slack OAuth failed: {response_data.get('error')}",
            )

        # Extract token and team information
        access_token: str = response_data.get("access_token")
        team_id: str = response_data.get("team", {}).get("id")
        authed_user_id: str = response_data.get("authed_user", {}).get("id")

        credential_info = CredentialBase(
            credential_json={"slack_bot_token": access_token},
            admin_public=True,
            source=DocumentSource.SLACK,
            name="Slack OAuth",
        )

        create_credential(credential_info, user, db_session)
    except Exception as e:
        return JSONResponse(
            status_code=500,
            content={
                "success": False,
                "message": f"An error occurred during Slack OAuth: {str(e)}",
            },
        )
    finally:
        r.delete(r_key)

    # return the result
    return JSONResponse(
        content={
            "success": True,
            "message": "Slack OAuth completed successfully.",
            "finalize_url": None,
            "redirect_on_success": session.redirect_on_success,
            "team_id": team_id,
            "authed_user_id": authed_user_id,
        }
    )


================================================
FILE: backend/ee/onyx/server/query_and_chat/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/query_and_chat/models.py
================================================
from collections.abc import Sequence
from datetime import datetime

from pydantic import BaseModel
from pydantic import Field

from onyx.context.search.models import BaseFilters
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import SearchDoc
from onyx.server.manage.models import StandardAnswer


class StandardAnswerRequest(BaseModel):
    message: str
    slack_bot_categories: list[str]


class StandardAnswerResponse(BaseModel):
    standard_answers: list[StandardAnswer] = Field(default_factory=list)


class SearchFlowClassificationRequest(BaseModel):
    user_query: str


class SearchFlowClassificationResponse(BaseModel):
    is_search_flow: bool


# NOTE: This model is used for the core flow of the Onyx application, any
# changes to it should be reviewed and approved by an experienced team member.
# It is very important to 1. avoid bloat and 2. that this remains backwards
# compatible across versions.
class SendSearchQueryRequest(BaseModel):
    search_query: str
    filters: BaseFilters | None = None
    num_docs_fed_to_llm_selection: int | None = None
    run_query_expansion: bool = False
    num_hits: int = 30
    hybrid_alpha: float | None = None
    include_content: bool = False
    stream: bool = False


class SearchDocWithContent(SearchDoc):
    # Allows None because this is determined by a flag but the object used in code
    # of the search path uses this type
    content: str | None

    @classmethod
    def from_inference_sections(
        cls,
        sections: Sequence[InferenceSection],
        include_content: bool = False,
        is_internet: bool = False,
    ) -> list["SearchDocWithContent"]:
        """Convert InferenceSections to SearchDocWithContent objects.

        Args:
            sections: Sequence of InferenceSection objects
            include_content: If True, populate content field with combined_content
            is_internet: Whether these are internet search results

        Returns:
            List of SearchDocWithContent with optional content
        """
        if not sections:
            return []

        return [
            cls(
                document_id=(chunk := section.center_chunk).document_id,
                chunk_ind=chunk.chunk_id,
                semantic_identifier=chunk.semantic_identifier or "Unknown",
                link=chunk.source_links[0] if chunk.source_links else None,
                blurb=chunk.blurb,
                source_type=chunk.source_type,
                boost=chunk.boost,
                hidden=chunk.hidden,
                metadata=chunk.metadata,
                score=chunk.score,
                match_highlights=chunk.match_highlights,
                updated_at=chunk.updated_at,
                primary_owners=chunk.primary_owners,
                secondary_owners=chunk.secondary_owners,
                is_internet=is_internet,
                content=section.combined_content if include_content else None,
            )
            for section in sections
        ]


class SearchFullResponse(BaseModel):
    all_executed_queries: list[str]
    search_docs: list[SearchDocWithContent]
    # Reasoning tokens output by the LLM for the document selection
    doc_selection_reasoning: str | None = None
    # This a list of document ids that are in the search_docs list
    llm_selected_doc_ids: list[str] | None = None
    # Error message if the search failed partway through
    error: str | None = None


class SearchQueryResponse(BaseModel):
    query: str
    query_expansions: list[str] | None
    created_at: datetime


class SearchHistoryResponse(BaseModel):
    search_queries: list[SearchQueryResponse]


================================================
FILE: backend/ee/onyx/server/query_and_chat/query_backend.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from ee.onyx.onyxbot.slack.handlers.handle_standard_answers import (
    oneoff_standard_answers,
)
from ee.onyx.server.query_and_chat.models import StandardAnswerRequest
from ee.onyx.server.query_and_chat.models import StandardAnswerResponse
from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.utils.logger import setup_logger

logger = setup_logger()

basic_router = APIRouter(prefix="/query")


@basic_router.get("/standard-answer")
def get_standard_answer(
    request: StandardAnswerRequest,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_user),
) -> StandardAnswerResponse:
    try:
        standard_answers = oneoff_standard_answers(
            message=request.message,
            slack_bot_categories=request.slack_bot_categories,
            db_session=db_session,
        )
        return StandardAnswerResponse(standard_answers=standard_answers)
    except Exception as e:
        logger.error(f"Error in get_standard_answer: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail="An internal server error occurred")


================================================
FILE: backend/ee/onyx/server/query_and_chat/search_backend.py
================================================
from collections.abc import Generator

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session

from ee.onyx.db.search import fetch_search_queries_for_user
from ee.onyx.search.process_search_query import gather_search_stream
from ee.onyx.search.process_search_query import stream_search_query
from ee.onyx.secondary_llm_flows.search_flow_classification import (
    classify_is_search_flow,
)
from ee.onyx.server.query_and_chat.models import SearchFlowClassificationRequest
from ee.onyx.server.query_and_chat.models import SearchFlowClassificationResponse
from ee.onyx.server.query_and_chat.models import SearchFullResponse
from ee.onyx.server.query_and_chat.models import SearchHistoryResponse
from ee.onyx.server.query_and_chat.models import SearchQueryResponse
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
from onyx.auth.users import current_user
from onyx.configs.app_configs import ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH
from onyx.db.engine.sql_engine import get_session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import User
from onyx.llm.factory import get_default_llm
from onyx.server.usage_limits import check_llm_cost_limit_for_provider
from onyx.server.utils import get_json_line
from onyx.server.utils_vector_db import require_vector_db
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/search")


@router.post("/search-flow-classification")
def search_flow_classification(
    request: SearchFlowClassificationRequest,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> SearchFlowClassificationResponse:
    query = request.user_query
    # This is a heuristic that if the user is typing a lot of text, it's unlikely they're looking for some specific document
    # Most likely something needs to be done with the text included so we'll just classify it as a chat flow
    if len(query) > 200:
        return SearchFlowClassificationResponse(is_search_flow=False)

    llm = get_default_llm()

    check_llm_cost_limit_for_provider(
        db_session=db_session,
        tenant_id=get_current_tenant_id(),
        llm_provider_api_key=llm.config.api_key,
    )

    try:
        is_search_flow = classify_is_search_flow(query=query, llm=llm)
    except Exception as e:
        logger.exception(
            "Search flow classification failed; defaulting to chat flow",
            exc_info=e,
        )
        is_search_flow = False

    return SearchFlowClassificationResponse(is_search_flow=is_search_flow)


# NOTE: This endpoint is used for the core flow of the Onyx application, any
# changes to it should be reviewed and approved by an experienced team member.
# It is very important to 1. avoid bloat and 2. that this remains backwards
# compatible across versions.
@router.post(
    "/send-search-message",
    response_model=None,
    dependencies=[Depends(require_vector_db)],
)
def handle_send_search_message(
    request: SendSearchQueryRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> StreamingResponse | SearchFullResponse:
    """
    Executes a search query with optional streaming.

    If hybrid_alpha is unset and ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH
    is True, executes pure keyword search.

    Returns:
        StreamingResponse with SSE if stream=True, otherwise SearchFullResponse.
    """
    logger.debug(f"Received search query: {request.search_query}")

    if request.hybrid_alpha is None and ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH:
        request.hybrid_alpha = 0.0

    # Non-streaming path
    if not request.stream:
        try:
            packets = stream_search_query(request, user, db_session)
            return gather_search_stream(packets)
        except NotImplementedError as e:
            return SearchFullResponse(
                all_executed_queries=[],
                search_docs=[],
                error=str(e),
            )

    # Streaming path
    def stream_generator() -> Generator[str, None, None]:
        try:
            with get_session_with_current_tenant() as streaming_db_session:
                for packet in stream_search_query(request, user, streaming_db_session):
                    yield get_json_line(packet.model_dump())
        except NotImplementedError as e:
            yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())
        except HTTPException:
            raise
        except Exception as e:
            logger.exception("Error in search streaming")
            yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())

    return StreamingResponse(stream_generator(), media_type="text/event-stream")


@router.get("/search-history")
def get_search_history(
    limit: int = 100,
    filter_days: int | None = None,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> SearchHistoryResponse:
    """
    Fetch past search queries for the authenticated user.

    Args:
        limit: Maximum number of queries to return (default 100)
        filter_days: Only return queries from the last N days (optional)

    Returns:
        SearchHistoryResponse with list of search queries, ordered by most recent first.
    """
    # Validate limit
    if limit <= 0:
        raise HTTPException(
            status_code=400,
            detail="limit must be greater than 0",
        )
    if limit > 1000:
        raise HTTPException(
            status_code=400,
            detail="limit must be at most 1000",
        )

    # Validate filter_days
    if filter_days is not None and filter_days <= 0:
        raise HTTPException(
            status_code=400,
            detail="filter_days must be greater than 0",
        )

    search_queries = fetch_search_queries_for_user(
        db_session=db_session,
        user_id=user.id,
        filter_days=filter_days,
        limit=limit,
    )

    return SearchHistoryResponse(
        search_queries=[
            SearchQueryResponse(
                query=sq.query,
                query_expansions=sq.query_expansions,
                created_at=sq.created_at,
            )
            for sq in search_queries
        ]
    )


================================================
FILE: backend/ee/onyx/server/query_and_chat/streaming_models.py
================================================
from typing import Literal

from pydantic import BaseModel
from pydantic import ConfigDict

from ee.onyx.server.query_and_chat.models import SearchDocWithContent


class SearchQueriesPacket(BaseModel):
    model_config = ConfigDict(frozen=True)

    type: Literal["search_queries"] = "search_queries"
    all_executed_queries: list[str]


class SearchDocsPacket(BaseModel):
    model_config = ConfigDict(frozen=True)

    type: Literal["search_docs"] = "search_docs"
    search_docs: list[SearchDocWithContent]


class SearchErrorPacket(BaseModel):
    model_config = ConfigDict(frozen=True)

    type: Literal["search_error"] = "search_error"
    error: str


class LLMSelectedDocsPacket(BaseModel):
    model_config = ConfigDict(frozen=True)

    type: Literal["llm_selected_docs"] = "llm_selected_docs"
    # None if LLM selection failed, empty list if no docs selected, list of IDs otherwise
    llm_selected_doc_ids: list[str] | None


================================================
FILE: backend/ee/onyx/server/query_and_chat/token_limit.py
================================================
from collections import defaultdict
from collections.abc import Sequence
from datetime import datetime
from itertools import groupby
from typing import Dict
from typing import List
from typing import Tuple
from uuid import UUID

from fastapi import HTTPException
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.api_key import is_api_key_email_address
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession
from onyx.db.models import TokenRateLimit
from onyx.db.models import TokenRateLimit__UserGroup
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.db.token_limit import fetch_all_user_token_rate_limits
from onyx.server.query_and_chat.token_limit import _get_cutoff_time
from onyx.server.query_and_chat.token_limit import _is_rate_limited
from onyx.server.query_and_chat.token_limit import _user_is_rate_limited_by_global
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel


def _check_token_rate_limits(user: User) -> None:
    # Anonymous users are only rate limited by global settings
    if user.is_anonymous:
        _user_is_rate_limited_by_global()

    elif is_api_key_email_address(user.email):
        # API keys are only rate limited by global settings
        _user_is_rate_limited_by_global()

    else:
        run_functions_tuples_in_parallel(
            [
                (_user_is_rate_limited, (user.id,)),
                (_user_is_rate_limited_by_group, (user.id,)),
                (_user_is_rate_limited_by_global, ()),
            ]
        )


"""
User rate limits
"""


def _user_is_rate_limited(user_id: UUID) -> None:
    with get_session_with_current_tenant() as db_session:
        user_rate_limits = fetch_all_user_token_rate_limits(
            db_session=db_session, enabled_only=True, ordered=False
        )

        if user_rate_limits:
            user_cutoff_time = _get_cutoff_time(user_rate_limits)
            user_usage = _fetch_user_usage(user_id, user_cutoff_time, db_session)

            if _is_rate_limited(user_rate_limits, user_usage):
                raise HTTPException(
                    status_code=429,
                    detail="Token budget exceeded for user. Try again later.",
                )


def _fetch_user_usage(
    user_id: UUID, cutoff_time: datetime, db_session: Session
) -> Sequence[tuple[datetime, int]]:
    """
    Fetch user usage within the cutoff time, grouped by minute
    """
    result = db_session.execute(
        select(
            func.date_trunc("minute", ChatMessage.time_sent),
            func.sum(ChatMessage.token_count),
        )
        .join(ChatSession, ChatMessage.chat_session_id == ChatSession.id)
        .where(ChatSession.user_id == user_id, ChatMessage.time_sent >= cutoff_time)
        .group_by(func.date_trunc("minute", ChatMessage.time_sent))
    ).all()

    return [(row[0], row[1]) for row in result]


"""
User Group rate limits
"""


def _user_is_rate_limited_by_group(user_id: UUID) -> None:
    with get_session_with_current_tenant() as db_session:
        group_rate_limits = _fetch_all_user_group_rate_limits(user_id, db_session)

        if group_rate_limits:
            # Group cutoff time is the same for all groups.
            # This could be optimized to only fetch the maximum cutoff time for
            # a specific group, but seems unnecessary for now.
            group_cutoff_time = _get_cutoff_time(
                [e for sublist in group_rate_limits.values() for e in sublist]
            )

            user_group_ids = list(group_rate_limits.keys())
            group_usage = _fetch_user_group_usage(
                user_group_ids, group_cutoff_time, db_session
            )

            has_at_least_one_untriggered_limit = False
            for user_group_id, rate_limits in group_rate_limits.items():
                usage = group_usage.get(user_group_id, [])

                if not _is_rate_limited(rate_limits, usage):
                    has_at_least_one_untriggered_limit = True
                    break

            if not has_at_least_one_untriggered_limit:
                raise HTTPException(
                    status_code=429,
                    detail="Token budget exceeded for user's groups. Try again later.",
                )


def _fetch_all_user_group_rate_limits(
    user_id: UUID, db_session: Session
) -> Dict[int, List[TokenRateLimit]]:
    group_limits = (
        select(TokenRateLimit, User__UserGroup.user_group_id)
        .join(
            TokenRateLimit__UserGroup,
            TokenRateLimit.id == TokenRateLimit__UserGroup.rate_limit_id,
        )
        .join(
            UserGroup,
            UserGroup.id == TokenRateLimit__UserGroup.user_group_id,
        )
        .join(
            User__UserGroup,
            User__UserGroup.user_group_id == UserGroup.id,
        )
        .where(
            User__UserGroup.user_id == user_id,
            TokenRateLimit.enabled.is_(True),
        )
    )

    raw_rate_limits = db_session.execute(group_limits).all()

    group_rate_limits = defaultdict(list)
    for rate_limit, user_group_id in raw_rate_limits:
        group_rate_limits[user_group_id].append(rate_limit)

    return group_rate_limits


def _fetch_user_group_usage(
    user_group_ids: list[int], cutoff_time: datetime, db_session: Session
) -> dict[int, list[Tuple[datetime, int]]]:
    """
    Fetch user group usage within the cutoff time, grouped by minute
    """
    user_group_usage = db_session.execute(
        select(
            func.sum(ChatMessage.token_count),
            func.date_trunc("minute", ChatMessage.time_sent),
            UserGroup.id,
        )
        .join(ChatSession, ChatMessage.chat_session_id == ChatSession.id)
        .join(User__UserGroup, User__UserGroup.user_id == ChatSession.user_id)
        .join(UserGroup, UserGroup.id == User__UserGroup.user_group_id)
        .filter(UserGroup.id.in_(user_group_ids), ChatMessage.time_sent >= cutoff_time)
        .group_by(func.date_trunc("minute", ChatMessage.time_sent), UserGroup.id)
    ).all()

    return {
        user_group_id: [(usage, time_sent) for time_sent, usage, _ in group_usage]
        for user_group_id, group_usage in groupby(
            user_group_usage, key=lambda row: row[2]
        )
    }


================================================
FILE: backend/ee/onyx/server/query_history/api.py
================================================
import uuid
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from http import HTTPStatus
from uuid import UUID

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session

from ee.onyx.background.task_name_builders import query_history_task_name
from ee.onyx.db.query_history import get_all_query_history_export_tasks
from ee.onyx.db.query_history import get_page_of_chat_sessions
from ee.onyx.db.query_history import get_total_filtered_chat_sessions_count
from ee.onyx.server.query_history.models import ChatSessionMinimal
from ee.onyx.server.query_history.models import ChatSessionSnapshot
from ee.onyx.server.query_history.models import MessageSnapshot
from ee.onyx.server.query_history.models import QueryHistoryExport
from onyx.auth.users import current_admin_user
from onyx.auth.users import get_display_email
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.background.task_utils import construct_query_history_report_name
from onyx.chat.chat_utils import create_chat_history_chain
from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import FileType
from onyx.configs.constants import MessageType
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.configs.constants import QAFeedbackType
from onyx.configs.constants import QueryHistoryType
from onyx.configs.constants import SessionType
from onyx.db.chat import get_chat_session_by_id
from onyx.db.chat import get_chat_sessions_by_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import TaskStatus
from onyx.db.file_record import get_query_history_export_files
from onyx.db.models import ChatSession
from onyx.db.models import User
from onyx.db.tasks import get_task_with_id
from onyx.db.tasks import register_task
from onyx.file_store.file_store import get_default_file_store
from onyx.server.documents.models import PaginatedReturn
from onyx.server.query_and_chat.models import ChatSessionDetails
from onyx.server.query_and_chat.models import ChatSessionsResponse
from onyx.utils.threadpool_concurrency import parallel_yield
from shared_configs.contextvars import get_current_tenant_id

router = APIRouter()

ONYX_ANONYMIZED_EMAIL = "anonymous@anonymous.invalid"


def ensure_query_history_is_enabled(
    disallowed: list[QueryHistoryType],
) -> None:
    if ONYX_QUERY_HISTORY_TYPE in disallowed:
        raise HTTPException(
            status_code=HTTPStatus.FORBIDDEN,
            detail="Query history has been disabled by the administrator.",
        )


def yield_snapshot_from_chat_session(
    chat_session: ChatSession,
    db_session: Session,
) -> Generator[ChatSessionSnapshot | None]:
    yield snapshot_from_chat_session(chat_session=chat_session, db_session=db_session)


def fetch_and_process_chat_session_history(
    db_session: Session,
    start: datetime,
    end: datetime,
    limit: int | None = 500,  # noqa: ARG001
) -> Generator[ChatSessionSnapshot]:
    PAGE_SIZE = 100

    page = 0
    while True:
        paged_chat_sessions = get_page_of_chat_sessions(
            start_time=start,
            end_time=end,
            db_session=db_session,
            page_num=page,
            page_size=PAGE_SIZE,
        )

        if not paged_chat_sessions:
            break

        paged_snapshots = parallel_yield(
            [
                yield_snapshot_from_chat_session(
                    db_session=db_session,
                    chat_session=chat_session,
                )
                for chat_session in paged_chat_sessions
            ]
        )

        for snapshot in paged_snapshots:
            if snapshot:
                yield snapshot

        # If we've fetched *less* than a `PAGE_SIZE` worth
        # of data, we have reached the end of the
        # pagination sequence; break.
        if len(paged_chat_sessions) < PAGE_SIZE:
            break

        page += 1


def snapshot_from_chat_session(
    chat_session: ChatSession,
    db_session: Session,
) -> ChatSessionSnapshot | None:
    try:
        # Older chats may not have the right structure
        messages = create_chat_history_chain(
            chat_session_id=chat_session.id, db_session=db_session
        )
    except RuntimeError:
        return None

    flow_type = SessionType.SLACK if chat_session.onyxbot_flow else SessionType.CHAT

    return ChatSessionSnapshot(
        id=chat_session.id,
        user_email=get_display_email(
            chat_session.user.email if chat_session.user else None
        ),
        name=chat_session.description,
        messages=[
            MessageSnapshot.build(message)
            for message in messages
            if message.message_type != MessageType.SYSTEM
        ],
        assistant_id=chat_session.persona_id,
        assistant_name=chat_session.persona.name if chat_session.persona else None,
        time_created=chat_session.time_created,
        flow_type=flow_type,
    )


@router.get("/admin/chat-sessions")
def admin_get_chat_sessions(
    user_id: UUID,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> ChatSessionsResponse:
    # we specifically don't allow this endpoint if "anonymized" since
    # this is a direct query on the user id
    ensure_query_history_is_enabled(
        [
            QueryHistoryType.DISABLED,
            QueryHistoryType.ANONYMIZED,
        ]
    )

    try:
        chat_sessions = get_chat_sessions_by_user(
            user_id=user_id, deleted=False, db_session=db_session, limit=0
        )

    except ValueError:
        raise ValueError("Chat session does not exist or has been deleted")

    return ChatSessionsResponse(
        sessions=[
            ChatSessionDetails(
                id=chat.id,
                name=chat.description,
                persona_id=chat.persona_id,
                time_created=chat.time_created.isoformat(),
                time_updated=chat.time_updated.isoformat(),
                shared_status=chat.shared_status,
                current_alternate_model=chat.current_alternate_model,
            )
            for chat in chat_sessions
        ]
    )


@router.get("/admin/chat-session-history")
def get_chat_session_history(
    page_num: int = Query(0, ge=0),
    page_size: int = Query(10, ge=1),
    feedback_type: QAFeedbackType | None = None,
    start_time: datetime | None = None,
    end_time: datetime | None = None,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> PaginatedReturn[ChatSessionMinimal]:
    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])

    page_of_chat_sessions = get_page_of_chat_sessions(
        page_num=page_num,
        page_size=page_size,
        db_session=db_session,
        start_time=start_time,
        end_time=end_time,
        feedback_filter=feedback_type,
    )

    total_filtered_chat_sessions_count = get_total_filtered_chat_sessions_count(
        db_session=db_session,
        start_time=start_time,
        end_time=end_time,
        feedback_filter=feedback_type,
    )

    minimal_chat_sessions: list[ChatSessionMinimal] = []

    for chat_session in page_of_chat_sessions:
        minimal_chat_session = ChatSessionMinimal.from_chat_session(chat_session)
        if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
            minimal_chat_session.user_email = ONYX_ANONYMIZED_EMAIL
        minimal_chat_sessions.append(minimal_chat_session)

    return PaginatedReturn(
        items=minimal_chat_sessions,
        total_items=total_filtered_chat_sessions_count,
    )


@router.get("/admin/chat-session-history/{chat_session_id}")
def get_chat_session_admin(
    chat_session_id: UUID,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> ChatSessionSnapshot:
    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])

    try:
        chat_session = get_chat_session_by_id(
            chat_session_id=chat_session_id,
            user_id=None,  # view chat regardless of user
            db_session=db_session,
            include_deleted=True,
        )
    except ValueError:
        raise HTTPException(
            HTTPStatus.BAD_REQUEST,
            f"Chat session with id '{chat_session_id}' does not exist.",
        )
    snapshot = snapshot_from_chat_session(
        chat_session=chat_session, db_session=db_session
    )

    if snapshot is None:
        raise HTTPException(
            HTTPStatus.BAD_REQUEST,
            f"Could not create snapshot for chat session with id '{chat_session_id}'",
        )

    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
        snapshot.user_email = ONYX_ANONYMIZED_EMAIL

    return snapshot


@router.get("/admin/query-history/list")
def list_all_query_history_exports(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[QueryHistoryExport]:
    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
    try:
        pending_tasks = [
            QueryHistoryExport.from_task(task)
            for task in get_all_query_history_export_tasks(db_session=db_session)
        ]
        generated_files = [
            QueryHistoryExport.from_file(file)
            for file in get_query_history_export_files(db_session=db_session)
        ]
        merged = pending_tasks + generated_files

        # We sort based off of the start-time of the task.
        # We also return it in reverse order since viewing generated reports in most-recent to least-recent is most common.
        merged.sort(key=lambda task: task.start_time, reverse=True)

        return merged
    except Exception as e:
        raise HTTPException(
            HTTPStatus.INTERNAL_SERVER_ERROR, f"Failed to get all tasks: {e}"
        )


@router.post("/admin/query-history/start-export", tags=PUBLIC_API_TAGS)
def start_query_history_export(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
    start: datetime | None = None,
    end: datetime | None = None,
) -> dict[str, str]:
    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])

    start = start or datetime.fromtimestamp(0, tz=timezone.utc)
    end = end or datetime.now(tz=timezone.utc)

    if start >= end:
        raise HTTPException(
            HTTPStatus.BAD_REQUEST,
            f"Start time must come before end time, but instead got the start time coming after; {start=} {end=}",
        )

    task_id_uuid = uuid.uuid4()
    task_id = str(task_id_uuid)
    start_time = datetime.now(tz=timezone.utc)

    register_task(
        db_session=db_session,
        task_name=query_history_task_name(start=start, end=end),
        task_id=task_id,
        status=TaskStatus.PENDING,
        start_time=start_time,
    )

    client_app.send_task(
        OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK,
        task_id=task_id,
        priority=OnyxCeleryPriority.MEDIUM,
        queue=OnyxCeleryQueues.CSV_GENERATION,
        kwargs={
            "start": start,
            "end": end,
            "start_time": start_time,
            "tenant_id": get_current_tenant_id(),
        },
    )

    return {"request_id": task_id}


@router.get("/admin/query-history/export-status", tags=PUBLIC_API_TAGS)
def get_query_history_export_status(
    request_id: str,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict[str, str]:
    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])

    task = get_task_with_id(db_session=db_session, task_id=request_id)

    if task:
        return {"status": task.status}

    # If task is None, then it's possible that the task has already finished processing.
    # Therefore, we should then check if the export file has already been stored inside of the file-store.
    # If that *also* doesn't exist, then we can return a 404.
    file_store = get_default_file_store()

    report_name = construct_query_history_report_name(request_id)
    has_file = file_store.has_file(
        file_id=report_name,
        file_origin=FileOrigin.QUERY_HISTORY_CSV,
        file_type=FileType.CSV,
    )

    if not has_file:
        raise HTTPException(
            HTTPStatus.NOT_FOUND,
            f"No task with {request_id=} was found",
        )

    return {"status": TaskStatus.SUCCESS}


@router.get("/admin/query-history/download", tags=PUBLIC_API_TAGS)
def download_query_history_csv(
    request_id: str,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> StreamingResponse:
    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])

    report_name = construct_query_history_report_name(request_id)
    file_store = get_default_file_store()
    has_file = file_store.has_file(
        file_id=report_name,
        file_origin=FileOrigin.QUERY_HISTORY_CSV,
        file_type=FileType.CSV,
    )

    if has_file:
        try:
            csv_stream = file_store.read_file(report_name)
        except Exception as e:
            raise HTTPException(
                HTTPStatus.INTERNAL_SERVER_ERROR,
                f"Failed to read query history file: {str(e)}",
            )
        csv_stream.seek(0)
        return StreamingResponse(
            iter(csv_stream),
            media_type=FileType.CSV,
            headers={"Content-Disposition": f"attachment;filename={report_name}"},
        )

    # If the file doesn't exist yet, it may still be processing.
    # Therefore, we check the task queue to determine its status, if there is any.
    task = get_task_with_id(db_session=db_session, task_id=request_id)
    if not task:
        raise HTTPException(
            HTTPStatus.NOT_FOUND,
            f"No task with {request_id=} was found",
        )

    if task.status in [TaskStatus.STARTED, TaskStatus.PENDING]:
        raise HTTPException(
            HTTPStatus.ACCEPTED, f"Task with {request_id=} is still being worked on"
        )

    elif task.status == TaskStatus.FAILURE:
        raise HTTPException(
            HTTPStatus.INTERNAL_SERVER_ERROR,
            f"Task with {request_id=} failed to be processed",
        )
    else:
        # This is the final case in which `task.status == SUCCESS`
        raise RuntimeError(
            "The task was marked as success, the file was not found in the file store; this is an internal error..."
        )


================================================
FILE: backend/ee/onyx/server/query_history/models.py
================================================
from datetime import datetime
from uuid import UUID

from pydantic import BaseModel

from ee.onyx.background.task_name_builders import QUERY_HISTORY_TASK_NAME_PREFIX
from onyx.auth.users import get_display_email
from onyx.background.task_utils import extract_task_id_from_query_history_report_name
from onyx.configs.constants import MessageType
from onyx.configs.constants import QAFeedbackType
from onyx.configs.constants import SessionType
from onyx.db.enums import TaskStatus
from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession
from onyx.db.models import FileRecord
from onyx.db.models import TaskQueueState


class AbridgedSearchDoc(BaseModel):
    """A subset of the info present in `SearchDoc`"""

    document_id: str
    semantic_identifier: str
    link: str | None


class MessageSnapshot(BaseModel):
    id: int
    message: str
    message_type: MessageType
    documents: list[AbridgedSearchDoc]
    feedback_type: QAFeedbackType | None
    feedback_text: str | None
    time_created: datetime

    @classmethod
    def build(cls, message: ChatMessage) -> "MessageSnapshot":
        latest_messages_feedback_obj = (
            message.chat_message_feedbacks[-1]
            if len(message.chat_message_feedbacks) > 0
            else None
        )
        feedback_type = (
            (
                QAFeedbackType.LIKE
                if latest_messages_feedback_obj.is_positive
                else QAFeedbackType.DISLIKE
            )
            if latest_messages_feedback_obj
            else None
        )
        feedback_text = (
            latest_messages_feedback_obj.feedback_text
            if latest_messages_feedback_obj
            else None
        )
        return cls(
            id=message.id,
            message=message.message,
            message_type=message.message_type,
            documents=[
                AbridgedSearchDoc(
                    document_id=document.document_id,
                    semantic_identifier=document.semantic_id,
                    link=document.link,
                )
                for document in message.search_docs
            ],
            feedback_type=feedback_type,
            feedback_text=feedback_text,
            time_created=message.time_sent,
        )


class ChatSessionMinimal(BaseModel):
    id: UUID
    user_email: str
    name: str | None
    first_user_message: str
    first_ai_message: str
    assistant_id: int | None
    assistant_name: str | None
    time_created: datetime
    feedback_type: QAFeedbackType | None
    flow_type: SessionType
    conversation_length: int

    @classmethod
    def from_chat_session(cls, chat_session: ChatSession) -> "ChatSessionMinimal":
        first_user_message = next(
            (
                message.message
                for message in chat_session.messages
                if message.message_type == MessageType.USER
            ),
            "",
        )
        first_ai_message = next(
            (
                message.message
                for message in chat_session.messages
                if message.message_type == MessageType.ASSISTANT
            ),
            "",
        )

        list_of_message_feedbacks = [
            feedback.is_positive
            for message in chat_session.messages
            for feedback in message.chat_message_feedbacks
        ]
        session_feedback_type = None
        if list_of_message_feedbacks:
            if all(list_of_message_feedbacks):
                session_feedback_type = QAFeedbackType.LIKE
            elif not any(list_of_message_feedbacks):
                session_feedback_type = QAFeedbackType.DISLIKE
            else:
                session_feedback_type = QAFeedbackType.MIXED

        return cls(
            id=chat_session.id,
            user_email=get_display_email(
                chat_session.user.email if chat_session.user else None
            ),
            name=chat_session.description,
            first_user_message=first_user_message,
            first_ai_message=first_ai_message,
            assistant_id=chat_session.persona_id,
            assistant_name=(
                chat_session.persona.name if chat_session.persona else None
            ),
            time_created=chat_session.time_created,
            feedback_type=session_feedback_type,
            flow_type=(
                SessionType.SLACK if chat_session.onyxbot_flow else SessionType.CHAT
            ),
            conversation_length=len(
                [
                    message
                    for message in chat_session.messages
                    if message.message_type != MessageType.SYSTEM
                ]
            ),
        )


class ChatSessionSnapshot(BaseModel):
    id: UUID
    user_email: str
    name: str | None
    messages: list[MessageSnapshot]
    assistant_id: int | None
    assistant_name: str | None
    time_created: datetime
    flow_type: SessionType


class QuestionAnswerPairSnapshot(BaseModel):
    chat_session_id: UUID
    # 1-indexed message number in the chat_session
    # e.g. the first message pair in the chat_session is 1, the second is 2, etc.
    message_pair_num: int
    user_message: str
    ai_response: str
    retrieved_documents: list[AbridgedSearchDoc]
    feedback_type: QAFeedbackType | None
    feedback_text: str | None
    persona_name: str | None
    user_email: str
    time_created: datetime
    flow_type: SessionType

    @classmethod
    def from_chat_session_snapshot(
        cls,
        chat_session_snapshot: ChatSessionSnapshot,
    ) -> list["QuestionAnswerPairSnapshot"]:
        message_pairs: list[tuple[MessageSnapshot, MessageSnapshot]] = []
        for ind in range(1, len(chat_session_snapshot.messages), 2):
            message_pairs.append(
                (
                    chat_session_snapshot.messages[ind - 1],
                    chat_session_snapshot.messages[ind],
                )
            )

        return [
            cls(
                chat_session_id=chat_session_snapshot.id,
                message_pair_num=ind + 1,
                user_message=user_message.message,
                ai_response=ai_message.message,
                retrieved_documents=ai_message.documents,
                feedback_type=ai_message.feedback_type,
                feedback_text=ai_message.feedback_text,
                persona_name=chat_session_snapshot.assistant_name,
                user_email=get_display_email(chat_session_snapshot.user_email),
                time_created=user_message.time_created,
                flow_type=chat_session_snapshot.flow_type,
            )
            for ind, (user_message, ai_message) in enumerate(message_pairs)
        ]

    def to_json(self) -> dict[str, str | None]:
        return {
            "chat_session_id": str(self.chat_session_id),
            "message_pair_num": str(self.message_pair_num),
            "user_message": self.user_message,
            "ai_response": self.ai_response,
            "retrieved_documents": "|".join(
                [
                    doc.link or doc.semantic_identifier
                    for doc in self.retrieved_documents
                ]
            ),
            "feedback_type": self.feedback_type.value if self.feedback_type else "",
            "feedback_text": self.feedback_text or "",
            "persona_name": self.persona_name,
            "user_email": self.user_email,
            "time_created": str(self.time_created),
            "flow_type": self.flow_type,
        }


class QueryHistoryExport(BaseModel):
    task_id: str
    status: TaskStatus
    start: datetime
    end: datetime
    start_time: datetime

    @classmethod
    def from_task(
        cls,
        task_queue_state: TaskQueueState,
    ) -> "QueryHistoryExport":
        start_end = task_queue_state.task_name.removeprefix(
            f"{QUERY_HISTORY_TASK_NAME_PREFIX}_"
        )
        start, end = start_end.split("_")

        if not task_queue_state.start_time:
            raise RuntimeError("The start time of the task must always be present")

        return cls(
            task_id=task_queue_state.task_id,
            status=task_queue_state.status,
            start=datetime.fromisoformat(start),
            end=datetime.fromisoformat(end),
            start_time=task_queue_state.start_time,
        )

    @classmethod
    def from_file(
        cls,
        file: FileRecord,
    ) -> "QueryHistoryExport":
        if not file.file_metadata or not isinstance(file.file_metadata, dict):
            raise RuntimeError(
                "The file metadata must be non-null, and must be of type `dict[str, str]`"
            )

        metadata = QueryHistoryFileMetadata.model_validate(dict(file.file_metadata))
        task_id = extract_task_id_from_query_history_report_name(file.file_id)

        return cls(
            task_id=task_id,
            status=TaskStatus.SUCCESS,
            start=metadata.start,
            end=metadata.end,
            start_time=metadata.start_time,
        )


class QueryHistoryFileMetadata(BaseModel):
    start: datetime
    end: datetime
    start_time: datetime


================================================
FILE: backend/ee/onyx/server/reporting/usage_export_api.py
================================================
from collections.abc import Generator
from datetime import datetime

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Response
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session

from ee.onyx.db.usage_export import get_all_usage_reports
from ee.onyx.db.usage_export import get_usage_report_data
from ee.onyx.db.usage_export import UsageReportMetadata
from onyx.auth.users import current_admin_user
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.file_store.constants import STANDARD_CHUNK_SIZE
from shared_configs.contextvars import get_current_tenant_id

router = APIRouter()


class GenerateUsageReportParams(BaseModel):
    period_from: str | None = None
    period_to: str | None = None


@router.post("/admin/usage-report", status_code=204)
def generate_report(
    params: GenerateUsageReportParams,
    user: User = Depends(current_admin_user),
) -> None:
    # Validate period parameters
    if params.period_from and params.period_to:
        try:
            datetime.fromisoformat(params.period_from)
            datetime.fromisoformat(params.period_to)
        except ValueError as e:
            raise HTTPException(status_code=400, detail=str(e))

    tenant_id = get_current_tenant_id()
    client_app.send_task(
        OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,
        kwargs={
            "tenant_id": tenant_id,
            "user_id": str(user.id) if user else None,
            "period_from": params.period_from,
            "period_to": params.period_to,
        },
    )

    return None


@router.get("/admin/usage-report/{report_name}")
def read_usage_report(
    report_name: str,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),  # noqa: ARG001
) -> Response:
    try:
        file = get_usage_report_data(report_name)
    except (ValueError, RuntimeError) as e:
        raise HTTPException(status_code=404, detail=str(e))

    def iterfile() -> Generator[bytes, None, None]:
        while True:
            chunk = file.read(STANDARD_CHUNK_SIZE)
            if not chunk:
                break
            yield chunk

    return StreamingResponse(
        content=iterfile(),
        media_type="application/zip",
        headers={"Content-Disposition": f"attachment; filename={report_name}"},
    )


@router.get("/admin/usage-report")
def fetch_usage_reports(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[UsageReportMetadata]:
    try:
        return get_all_usage_reports(db_session)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


================================================
FILE: backend/ee/onyx/server/reporting/usage_export_generation.py
================================================
import csv
import tempfile
import uuid
import zipfile
from datetime import datetime
from datetime import timedelta
from datetime import timezone

from fastapi_users_db_sqlalchemy import UUID_ID
from sqlalchemy import cast
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Session

from ee.onyx.db.usage_export import get_all_empty_chat_message_entries
from ee.onyx.db.usage_export import write_usage_report
from ee.onyx.server.reporting.usage_export_models import UsageReportMetadata
from ee.onyx.server.reporting.usage_export_models import UserSkeleton
from onyx.configs.constants import FileOrigin
from onyx.db.models import User
from onyx.db.users import get_all_users
from onyx.file_store.constants import MAX_IN_MEMORY_SIZE
from onyx.file_store.file_store import FileStore
from onyx.file_store.file_store import get_default_file_store


def generate_chat_messages_report(
    db_session: Session,
    file_store: FileStore,
    report_id: str,
    period: tuple[datetime, datetime] | None,
) -> str:
    file_name = f"{report_id}_chat_sessions"

    if period is None:
        period = (
            datetime.fromtimestamp(0, tz=timezone.utc),
            datetime.now(tz=timezone.utc),
        )
    else:
        # time-picker sends a time which is at the beginning of the day
        # so we need to add one day to the end time to make it inclusive
        period = (
            period[0],
            period[1] + timedelta(days=1),
        )

    with tempfile.SpooledTemporaryFile(
        max_size=MAX_IN_MEMORY_SIZE, mode="w+"
    ) as temp_file:
        csvwriter = csv.writer(temp_file, delimiter=",")
        csvwriter.writerow(
            [
                "session_id",
                "user_id",
                "flow_type",
                "time_sent",
                "assistant_name",
                "user_email",
                "number_of_tokens",
            ]
        )
        for chat_message_skeleton_batch in get_all_empty_chat_message_entries(
            db_session, period
        ):
            for chat_message_skeleton in chat_message_skeleton_batch:
                csvwriter.writerow(
                    [
                        chat_message_skeleton.chat_session_id,
                        chat_message_skeleton.user_id,
                        chat_message_skeleton.flow_type,
                        chat_message_skeleton.time_sent.isoformat(),
                        chat_message_skeleton.assistant_name,
                        chat_message_skeleton.user_email,
                        chat_message_skeleton.number_of_tokens,
                    ]
                )

        # after writing seek to beginning of buffer
        temp_file.seek(0)
        file_id = file_store.save_file(
            content=temp_file,
            display_name=file_name,
            file_origin=FileOrigin.GENERATED_REPORT,
            file_type="text/csv",
        )

    return file_id


def generate_user_report(
    db_session: Session,
    file_store: FileStore,
    report_id: str,
) -> str:
    file_name = f"{report_id}_users"

    with tempfile.SpooledTemporaryFile(
        max_size=MAX_IN_MEMORY_SIZE, mode="w+"
    ) as temp_file:
        csvwriter = csv.writer(temp_file, delimiter=",")
        csvwriter.writerow(["user_id", "is_active"])

        users = get_all_users(db_session)
        for user in users:
            user_skeleton = UserSkeleton(
                user_id=str(user.id),
                is_active=user.is_active,
            )
            csvwriter.writerow([user_skeleton.user_id, user_skeleton.is_active])

        temp_file.seek(0)
        file_id = file_store.save_file(
            content=temp_file,
            display_name=file_name,
            file_origin=FileOrigin.GENERATED_REPORT,
            file_type="text/csv",
        )

    return file_id


def create_new_usage_report(
    db_session: Session,
    user_id: UUID_ID | None,  # None = auto-generated
    period: tuple[datetime, datetime] | None,
) -> UsageReportMetadata:
    report_id = str(uuid.uuid4())
    file_store = get_default_file_store()

    messages_file_id = generate_chat_messages_report(
        db_session, file_store, report_id, period
    )
    users_file_id = generate_user_report(db_session, file_store, report_id)

    with tempfile.SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE) as zip_buffer:
        with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zip_file:
            # write messages
            chat_messages_tmpfile = file_store.read_file(
                messages_file_id, mode="b", use_tempfile=True
            )
            zip_file.writestr(
                "chat_messages.csv",
                chat_messages_tmpfile.read(),
            )

            # write users
            users_tmpfile = file_store.read_file(
                users_file_id, mode="b", use_tempfile=True
            )
            zip_file.writestr("users.csv", users_tmpfile.read())

        zip_buffer.seek(0)

        # store zip blob to file_store
        report_name = f"{datetime.now(tz=timezone.utc).strftime('%Y-%m-%d')}_{report_id}_usage_report.zip"
        file_store.save_file(
            content=zip_buffer,
            display_name=report_name,
            file_origin=FileOrigin.GENERATED_REPORT,
            file_type="application/zip",
            file_id=report_name,
        )

    # add report after zip file is written
    new_report = write_usage_report(db_session, report_name, user_id, period)

    # get user email
    requestor_user = (
        db_session.query(User)
        .filter(cast(User.id, UUID) == new_report.requestor_user_id)
        .one_or_none()
        if new_report.requestor_user_id
        else None
    )
    requestor_email = requestor_user.email if requestor_user else None

    return UsageReportMetadata(
        report_name=new_report.report_name,
        requestor=requestor_email,
        time_created=new_report.time_created,
        period_from=new_report.period_from,
        period_to=new_report.period_to,
    )


================================================
FILE: backend/ee/onyx/server/reporting/usage_export_models.py
================================================
from datetime import datetime
from enum import Enum
from uuid import UUID

from pydantic import BaseModel


class FlowType(str, Enum):
    CHAT = "chat"
    SLACK = "slack"


class ChatMessageSkeleton(BaseModel):
    message_id: int
    chat_session_id: UUID
    user_id: str | None
    flow_type: FlowType
    time_sent: datetime
    assistant_name: str | None
    user_email: str | None
    number_of_tokens: int


class UserSkeleton(BaseModel):
    user_id: str
    is_active: bool


class UsageReportMetadata(BaseModel):
    report_name: str
    requestor: str | None
    time_created: datetime
    period_from: datetime | None  # None = All time
    period_to: datetime | None


================================================
FILE: backend/ee/onyx/server/scim/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/scim/api.py
================================================
"""SCIM 2.0 API endpoints (RFC 7644).

This module provides the FastAPI router for SCIM service discovery,
User CRUD, and Group CRUD. Identity providers (Okta, Azure AD) call
these endpoints to provision and manage users and groups.

Service discovery endpoints are unauthenticated — IdPs may probe them
before bearer token configuration is complete. All other endpoints
require a valid SCIM bearer token.
"""

from __future__ import annotations

from uuid import UUID

from fastapi import APIRouter
from fastapi import Depends
from fastapi import FastAPI
from fastapi import Query
from fastapi import Request
from fastapi import Response
from fastapi.responses import JSONResponse
from fastapi_users.password import PasswordHelper
from sqlalchemy import func
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session

from ee.onyx.db.scim import ScimDAL
from ee.onyx.server.scim.auth import ScimAuthError
from ee.onyx.server.scim.auth import verify_scim_token
from ee.onyx.server.scim.filtering import parse_scim_filter
from ee.onyx.server.scim.models import SCIM_LIST_RESPONSE_SCHEMA
from ee.onyx.server.scim.models import ScimError
from ee.onyx.server.scim.models import ScimGroupMember
from ee.onyx.server.scim.models import ScimGroupResource
from ee.onyx.server.scim.models import ScimListResponse
from ee.onyx.server.scim.models import ScimMappingFields
from ee.onyx.server.scim.models import ScimName
from ee.onyx.server.scim.models import ScimPatchRequest
from ee.onyx.server.scim.models import ScimServiceProviderConfig
from ee.onyx.server.scim.models import ScimUserResource
from ee.onyx.server.scim.patch import apply_group_patch
from ee.onyx.server.scim.patch import apply_user_patch
from ee.onyx.server.scim.patch import ScimPatchError
from ee.onyx.server.scim.providers.base import get_default_provider
from ee.onyx.server.scim.providers.base import ScimProvider
from ee.onyx.server.scim.providers.base import serialize_emails
from ee.onyx.server.scim.schema_definitions import ENTERPRISE_USER_SCHEMA_DEF
from ee.onyx.server.scim.schema_definitions import GROUP_RESOURCE_TYPE
from ee.onyx.server.scim.schema_definitions import GROUP_SCHEMA_DEF
from ee.onyx.server.scim.schema_definitions import SERVICE_PROVIDER_CONFIG
from ee.onyx.server.scim.schema_definitions import USER_RESOURCE_TYPE
from ee.onyx.server.scim.schema_definitions import USER_SCHEMA_DEF
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import AccountType
from onyx.db.enums import GrantSource
from onyx.db.enums import Permission
from onyx.db.models import ScimToken
from onyx.db.models import ScimUserMapping
from onyx.db.models import User
from onyx.db.models import UserGroup
from onyx.db.models import UserRole
from onyx.db.permissions import recompute_permissions_for_group__no_commit
from onyx.db.permissions import recompute_user_permissions__no_commit
from onyx.db.users import assign_user_to_default_groups__no_commit
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop

logger = setup_logger()

# Group names reserved for system default groups (seeded by migration).
_RESERVED_GROUP_NAMES = frozenset({"Admin", "Basic"})


class ScimJSONResponse(JSONResponse):
    """JSONResponse with Content-Type: application/scim+json (RFC 7644 §3.1)."""

    media_type = "application/scim+json"


# NOTE: All URL paths in this router (/ServiceProviderConfig, /ResourceTypes,
# /Schemas, /Users, /Groups) are mandated by the SCIM spec (RFC 7643/7644).
# IdPs like Okta and Azure AD hardcode these exact paths, so they cannot be
# changed to kebab-case.


scim_router = APIRouter(prefix="/scim/v2", tags=["SCIM"])

_pw_helper = PasswordHelper()


def register_scim_exception_handlers(app: FastAPI) -> None:
    """Register SCIM-specific exception handlers on the FastAPI app.

    Call this after ``app.include_router(scim_router)`` so that auth
    failures from ``verify_scim_token`` return RFC 7644 §3.12 error
    envelopes (with ``schemas`` and ``status`` fields) instead of
    FastAPI's default ``{"detail": "..."}`` format.
    """

    @app.exception_handler(ScimAuthError)
    async def _handle_scim_auth_error(
        _request: Request, exc: ScimAuthError
    ) -> ScimJSONResponse:
        return _scim_error_response(exc.status_code, exc.detail)


def _get_provider(
    _token: ScimToken = Depends(verify_scim_token),
) -> ScimProvider:
    """Resolve the SCIM provider for the current request.

    Currently returns OktaProvider for all requests. When multi-provider
    support is added (ENG-3652), this will resolve based on token metadata
    or tenant configuration — no endpoint changes required.
    """
    return get_default_provider()


# ---------------------------------------------------------------------------
# Service Discovery Endpoints (unauthenticated)
# ---------------------------------------------------------------------------


@scim_router.get("/ServiceProviderConfig")
def get_service_provider_config() -> ScimServiceProviderConfig:
    """Advertise supported SCIM features (RFC 7643 §5)."""
    return SERVICE_PROVIDER_CONFIG


@scim_router.get("/ResourceTypes")
def get_resource_types() -> ScimJSONResponse:
    """List available SCIM resource types (RFC 7643 §6).

    Wrapped in a ListResponse envelope (RFC 7644 §3.4.2) because IdPs
    like Entra ID expect a JSON object, not a bare array.
    """
    resources = [USER_RESOURCE_TYPE, GROUP_RESOURCE_TYPE]
    return ScimJSONResponse(
        content={
            "schemas": [SCIM_LIST_RESPONSE_SCHEMA],
            "totalResults": len(resources),
            "Resources": [
                r.model_dump(exclude_none=True, by_alias=True) for r in resources
            ],
        }
    )


@scim_router.get("/Schemas")
def get_schemas() -> ScimJSONResponse:
    """Return SCIM schema definitions (RFC 7643 §7).

    Wrapped in a ListResponse envelope (RFC 7644 §3.4.2) because IdPs
    like Entra ID expect a JSON object, not a bare array.
    """
    schemas = [USER_SCHEMA_DEF, GROUP_SCHEMA_DEF, ENTERPRISE_USER_SCHEMA_DEF]
    return ScimJSONResponse(
        content={
            "schemas": [SCIM_LIST_RESPONSE_SCHEMA],
            "totalResults": len(schemas),
            "Resources": [s.model_dump(exclude_none=True) for s in schemas],
        }
    )


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _scim_error_response(status: int, detail: str) -> ScimJSONResponse:
    """Build a SCIM-compliant error response (RFC 7644 §3.12)."""
    logger.warning("SCIM error response: status=%s detail=%s", status, detail)
    body = ScimError(status=str(status), detail=detail)
    return ScimJSONResponse(
        status_code=status,
        content=body.model_dump(exclude_none=True),
    )


def _parse_excluded_attributes(raw: str | None) -> set[str]:
    """Parse the ``excludedAttributes`` query parameter (RFC 7644 §3.4.2.5).

    Returns a set of lowercased attribute names to omit from responses.
    """
    if not raw:
        return set()
    return {attr.strip().lower() for attr in raw.split(",") if attr.strip()}


def _apply_exclusions(
    resource: ScimUserResource | ScimGroupResource,
    excluded: set[str],
) -> dict:
    """Serialize a SCIM resource, omitting attributes the IdP excluded.

    RFC 7644 §3.4.2.5 lets the IdP pass ``?excludedAttributes=groups,emails``
    to reduce response payload size. We strip those fields after serialization
    so the rest of the pipeline doesn't need to know about them.
    """
    data = resource.model_dump(exclude_none=True, by_alias=True)
    for attr in excluded:
        # Match case-insensitively against the camelCase field names
        keys_to_remove = [k for k in data if k.lower() == attr]
        for k in keys_to_remove:
            del data[k]
    return data


def _check_seat_availability(dal: ScimDAL) -> str | None:
    """Return an error message if seat limit is reached, else None."""
    check_fn = fetch_ee_implementation_or_noop(
        "onyx.db.license", "check_seat_availability", None
    )
    if check_fn is None:
        return None
    result = check_fn(dal.session, seats_needed=1)
    if not result.available:
        return result.error_message or "Seat limit reached"
    return None


def _fetch_user_or_404(user_id: str, dal: ScimDAL) -> User | ScimJSONResponse:
    """Parse *user_id* as UUID, look up the user, or return a 404 error."""
    try:
        uid = UUID(user_id)
    except ValueError:
        return _scim_error_response(404, f"User {user_id} not found")
    user = dal.get_user(uid)
    if not user:
        return _scim_error_response(404, f"User {user_id} not found")
    return user


def _scim_name_to_str(name: ScimName | None) -> str | None:
    """Extract a display name string from a SCIM name object.

    Returns None if no name is provided, so the caller can decide
    whether to update the user's personal_name.
    """
    if not name:
        return None
    # If the client explicitly provides ``formatted``, prefer it — the client
    # knows what display string it wants. Otherwise build from components.
    if name.formatted:
        return name.formatted
    parts = " ".join(part for part in [name.givenName, name.familyName] if part)
    return parts or None


def _scim_resource_response(
    resource: ScimUserResource | ScimGroupResource | ScimListResponse,
    status_code: int = 200,
) -> ScimJSONResponse:
    """Serialize a SCIM resource as ``application/scim+json``."""
    content = resource.model_dump(exclude_none=True, by_alias=True)
    return ScimJSONResponse(
        status_code=status_code,
        content=content,
    )


def _build_list_response(
    resources: list[ScimUserResource | ScimGroupResource],
    total: int,
    start_index: int,
    count: int,
    excluded: set[str] | None = None,
) -> ScimListResponse | ScimJSONResponse:
    """Build a SCIM list response, optionally applying attribute exclusions.

    RFC 7644 §3.4.2.5 — IdPs may request certain attributes be omitted via
    the ``excludedAttributes`` query parameter.
    """
    if excluded:
        envelope = ScimListResponse(
            totalResults=total,
            startIndex=start_index,
            itemsPerPage=count,
        )
        data = envelope.model_dump(exclude_none=True)
        data["Resources"] = [_apply_exclusions(r, excluded) for r in resources]
        return ScimJSONResponse(content=data)

    return _scim_resource_response(
        ScimListResponse(
            totalResults=total,
            startIndex=start_index,
            itemsPerPage=count,
            Resources=resources,
        )
    )


def _extract_enterprise_fields(
    resource: ScimUserResource,
) -> tuple[str | None, str | None]:
    """Extract department and manager from enterprise extension."""
    ext = resource.enterprise_extension
    if not ext:
        return None, None
    department = ext.department
    manager = ext.manager.value if ext.manager else None
    return department, manager


def _mapping_to_fields(
    mapping: ScimUserMapping | None,
) -> ScimMappingFields | None:
    """Extract round-trip fields from a SCIM user mapping."""
    if not mapping:
        return None
    return ScimMappingFields(
        department=mapping.department,
        manager=mapping.manager,
        given_name=mapping.given_name,
        family_name=mapping.family_name,
        scim_emails_json=mapping.scim_emails_json,
    )


def _fields_from_resource(resource: ScimUserResource) -> ScimMappingFields:
    """Build mapping fields from an incoming SCIM user resource."""
    department, manager = _extract_enterprise_fields(resource)
    return ScimMappingFields(
        department=department,
        manager=manager,
        given_name=resource.name.givenName if resource.name else None,
        family_name=resource.name.familyName if resource.name else None,
        scim_emails_json=serialize_emails(resource.emails),
    )


# ---------------------------------------------------------------------------
# User CRUD (RFC 7644 §3)
# ---------------------------------------------------------------------------


@scim_router.get("/Users", response_model=None)
def list_users(
    filter: str | None = Query(None),
    excludedAttributes: str | None = None,
    startIndex: int = Query(1, ge=1),
    count: int = Query(100, ge=0, le=500),
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimListResponse | ScimJSONResponse:
    """List users with optional SCIM filter and pagination."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
    dal.commit()

    try:
        scim_filter = parse_scim_filter(filter)
    except ValueError as e:
        return _scim_error_response(400, str(e))

    try:
        users_with_mappings, total = dal.list_users(scim_filter, startIndex, count)
    except ValueError as e:
        return _scim_error_response(400, str(e))

    user_groups_map = dal.get_users_groups_batch([u.id for u, _ in users_with_mappings])
    resources: list[ScimUserResource | ScimGroupResource] = [
        provider.build_user_resource(
            user,
            mapping.external_id if mapping else None,
            groups=user_groups_map.get(user.id, []),
            scim_username=mapping.scim_username if mapping else None,
            fields=_mapping_to_fields(mapping),
        )
        for user, mapping in users_with_mappings
    ]

    return _build_list_response(
        resources,
        total,
        startIndex,
        count,
        excluded=_parse_excluded_attributes(excludedAttributes),
    )


@scim_router.get("/Users/{user_id}", response_model=None)
def get_user(
    user_id: str,
    excludedAttributes: str | None = None,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimUserResource | ScimJSONResponse:
    """Get a single user by ID."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
    dal.commit()

    result = _fetch_user_or_404(user_id, dal)
    if isinstance(result, ScimJSONResponse):
        return result
    user = result

    mapping = dal.get_user_mapping_by_user_id(user.id)

    resource = provider.build_user_resource(
        user,
        mapping.external_id if mapping else None,
        groups=dal.get_user_groups(user.id),
        scim_username=mapping.scim_username if mapping else None,
        fields=_mapping_to_fields(mapping),
    )

    # RFC 7644 §3.4.2.5 — IdP may request certain attributes be omitted
    excluded = _parse_excluded_attributes(excludedAttributes)
    if excluded:
        return ScimJSONResponse(content=_apply_exclusions(resource, excluded))

    return _scim_resource_response(resource)


@scim_router.post("/Users", status_code=201, response_model=None)
def create_user(
    user_resource: ScimUserResource,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimUserResource | ScimJSONResponse:
    """Create a new user from a SCIM provisioning request."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    email = user_resource.userName.strip()

    # Check for existing user — if they exist but aren't SCIM-managed yet,
    # link them to the IdP rather than rejecting with 409.
    external_id: str | None = user_resource.externalId
    scim_username: str = user_resource.userName.strip()
    fields: ScimMappingFields = _fields_from_resource(user_resource)

    existing_user = dal.get_user_by_email(email)
    if existing_user:
        existing_mapping = dal.get_user_mapping_by_user_id(existing_user.id)
        if existing_mapping:
            return _scim_error_response(409, f"User with email {email} already exists")

        # Adopt pre-existing user into SCIM management.
        # Reactivating a deactivated user consumes a seat, so enforce the
        # seat limit the same way replace_user does.
        if user_resource.active and not existing_user.is_active:
            seat_error = _check_seat_availability(dal)
            if seat_error:
                return _scim_error_response(403, seat_error)

        personal_name = _scim_name_to_str(user_resource.name)
        dal.update_user(
            existing_user,
            is_active=user_resource.active,
            **({"personal_name": personal_name} if personal_name else {}),
        )

        try:
            dal.create_user_mapping(
                external_id=external_id,
                user_id=existing_user.id,
                scim_username=scim_username,
                fields=fields,
            )
            dal.commit()
        except IntegrityError:
            dal.rollback()
            return _scim_error_response(
                409, f"User with email {email} already has a SCIM mapping"
            )

        return _scim_resource_response(
            provider.build_user_resource(
                existing_user,
                external_id,
                scim_username=scim_username,
                fields=fields,
            ),
            status_code=201,
        )

    # Only enforce seat limit for net-new users — adopting a pre-existing
    # user doesn't consume a new seat.
    seat_error = _check_seat_availability(dal)
    if seat_error:
        return _scim_error_response(403, seat_error)

    # Create user with a random password (SCIM users authenticate via IdP)
    personal_name = _scim_name_to_str(user_resource.name)
    user = User(
        email=email,
        hashed_password=_pw_helper.hash(_pw_helper.generate()),
        role=UserRole.BASIC,
        account_type=AccountType.STANDARD,
        is_active=user_resource.active,
        is_verified=True,
        personal_name=personal_name,
    )

    try:
        dal.add_user(user)
    except IntegrityError:
        dal.rollback()
        return _scim_error_response(409, f"User with email {email} already exists")

    # Always create a SCIM mapping so that the user is marked as
    # SCIM-managed. externalId may be None (RFC 7643 says it's optional).
    try:
        dal.create_user_mapping(
            external_id=external_id,
            user_id=user.id,
            scim_username=scim_username,
            fields=fields,
        )
    except IntegrityError:
        dal.rollback()
        return _scim_error_response(
            409, f"User with email {email} already has a SCIM mapping"
        )

    # Assign user to default group BEFORE commit so everything is atomic.
    # If this fails, the entire user creation rolls back and IdP can retry.
    try:
        assign_user_to_default_groups__no_commit(db_session, user)
    except Exception:
        dal.rollback()
        logger.exception(f"Failed to assign SCIM user {email} to default groups")
        return _scim_error_response(
            500, f"Failed to assign user {email} to default group"
        )

    dal.commit()

    return _scim_resource_response(
        provider.build_user_resource(
            user,
            external_id,
            scim_username=scim_username,
            fields=fields,
        ),
        status_code=201,
    )


@scim_router.put("/Users/{user_id}", response_model=None)
def replace_user(
    user_id: str,
    user_resource: ScimUserResource,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimUserResource | ScimJSONResponse:
    """Replace a user entirely (RFC 7644 §3.5.1)."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_user_or_404(user_id, dal)
    if isinstance(result, ScimJSONResponse):
        return result
    user = result

    # Handle activation (need seat check) / deactivation
    is_reactivation = user_resource.active and not user.is_active
    if is_reactivation:
        seat_error = _check_seat_availability(dal)
        if seat_error:
            return _scim_error_response(403, seat_error)

    personal_name = _scim_name_to_str(user_resource.name)

    dal.update_user(
        user,
        email=user_resource.userName.strip(),
        is_active=user_resource.active,
        personal_name=personal_name,
    )

    # Reconcile default-group membership on reactivation
    if is_reactivation:
        assign_user_to_default_groups__no_commit(
            db_session, user, is_admin=(user.role == UserRole.ADMIN)
        )

    new_external_id = user_resource.externalId
    scim_username = user_resource.userName.strip()
    fields = _fields_from_resource(user_resource)
    dal.sync_user_external_id(
        user.id,
        new_external_id,
        scim_username=scim_username,
        fields=fields,
    )

    dal.commit()

    return _scim_resource_response(
        provider.build_user_resource(
            user,
            new_external_id,
            groups=dal.get_user_groups(user.id),
            scim_username=scim_username,
            fields=fields,
        )
    )


@scim_router.patch("/Users/{user_id}", response_model=None)
def patch_user(
    user_id: str,
    patch_request: ScimPatchRequest,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimUserResource | ScimJSONResponse:
    """Partially update a user (RFC 7644 §3.5.2).

    This is the primary endpoint for user deprovisioning — Okta sends
    ``PATCH {"active": false}`` rather than DELETE.
    """
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_user_or_404(user_id, dal)
    if isinstance(result, ScimJSONResponse):
        return result
    user = result

    mapping = dal.get_user_mapping_by_user_id(user.id)
    external_id = mapping.external_id if mapping else None
    current_scim_username = mapping.scim_username if mapping else None
    current_fields = _mapping_to_fields(mapping)

    current = provider.build_user_resource(
        user,
        external_id,
        groups=dal.get_user_groups(user.id),
        scim_username=current_scim_username,
        fields=current_fields,
    )

    try:
        patched, ent_data = apply_user_patch(
            patch_request.Operations, current, provider.ignored_patch_paths
        )
    except ScimPatchError as e:
        return _scim_error_response(e.status, e.detail)

    # Apply changes back to the DB model
    is_reactivation = patched.active and not user.is_active
    if patched.active != user.is_active:
        if patched.active:
            seat_error = _check_seat_availability(dal)
            if seat_error:
                return _scim_error_response(403, seat_error)

    # Track the scim_username — if userName was patched, update it
    new_scim_username = patched.userName.strip() if patched.userName else None

    # If displayName was explicitly patched (different from the original), use
    # it as personal_name directly.  Otherwise, derive from name components.
    personal_name: str | None
    if patched.displayName and patched.displayName != current.displayName:
        personal_name = patched.displayName
    else:
        personal_name = _scim_name_to_str(patched.name)

    dal.update_user(
        user,
        email=(
            patched.userName.strip()
            if patched.userName.strip().lower() != user.email.lower()
            else None
        ),
        is_active=patched.active if patched.active != user.is_active else None,
        personal_name=personal_name,
    )

    # Reconcile default-group membership on reactivation
    if is_reactivation:
        assign_user_to_default_groups__no_commit(
            db_session, user, is_admin=(user.role == UserRole.ADMIN)
        )

    # Build updated fields by merging PATCH enterprise data with current values
    cf = current_fields or ScimMappingFields()
    fields = ScimMappingFields(
        department=ent_data.get("department", cf.department),
        manager=ent_data.get("manager", cf.manager),
        given_name=patched.name.givenName if patched.name else cf.given_name,
        family_name=patched.name.familyName if patched.name else cf.family_name,
        scim_emails_json=(
            serialize_emails(patched.emails)
            if patched.emails is not None
            else cf.scim_emails_json
        ),
    )

    dal.sync_user_external_id(
        user.id,
        patched.externalId,
        scim_username=new_scim_username,
        fields=fields,
    )

    dal.commit()

    return _scim_resource_response(
        provider.build_user_resource(
            user,
            patched.externalId,
            groups=dal.get_user_groups(user.id),
            scim_username=new_scim_username,
            fields=fields,
        )
    )


@scim_router.delete("/Users/{user_id}", status_code=204, response_model=None)
def delete_user(
    user_id: str,
    _token: ScimToken = Depends(verify_scim_token),
    db_session: Session = Depends(get_session),
) -> Response | ScimJSONResponse:
    """Delete a user (RFC 7644 §3.6).

    Deactivates the user and removes the SCIM mapping. Note that Okta
    typically uses PATCH active=false instead of DELETE.
    A second DELETE returns 404 per RFC 7644 §3.6.
    """
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_user_or_404(user_id, dal)
    if isinstance(result, ScimJSONResponse):
        return result
    user = result

    # If no SCIM mapping exists, the user was already deleted from
    # SCIM's perspective — return 404 per RFC 7644 §3.6.
    mapping = dal.get_user_mapping_by_user_id(user.id)
    if not mapping:
        return _scim_error_response(404, f"User {user_id} not found")

    dal.deactivate_user(user)
    dal.delete_user_mapping(mapping.id)

    dal.commit()

    return Response(status_code=204)


# ---------------------------------------------------------------------------
# Group helpers
# ---------------------------------------------------------------------------


def _fetch_group_or_404(group_id: str, dal: ScimDAL) -> UserGroup | ScimJSONResponse:
    """Parse *group_id* as int, look up the group, or return a 404 error."""
    try:
        gid = int(group_id)
    except ValueError:
        return _scim_error_response(404, f"Group {group_id} not found")
    group = dal.get_group(gid)
    if not group:
        return _scim_error_response(404, f"Group {group_id} not found")
    return group


def _parse_member_uuids(
    members: list[ScimGroupMember],
) -> tuple[list[UUID], str | None]:
    """Parse member value strings to UUIDs.

    Returns (uuid_list, error_message). error_message is None on success.
    """
    uuids: list[UUID] = []
    for m in members:
        try:
            uuids.append(UUID(m.value))
        except ValueError:
            return [], f"Invalid member ID: {m.value}"
    return uuids, None


def _validate_and_parse_members(
    members: list[ScimGroupMember], dal: ScimDAL
) -> tuple[list[UUID], str | None]:
    """Parse and validate member UUIDs exist in the database.

    Returns (uuid_list, error_message). error_message is None on success.
    """
    uuids, err = _parse_member_uuids(members)
    if err:
        return [], err

    if uuids:
        missing = dal.validate_member_ids(uuids)
        if missing:
            return [], f"Member(s) not found: {', '.join(str(u) for u in missing)}"

    return uuids, None


# ---------------------------------------------------------------------------
# Group CRUD (RFC 7644 §3)
# ---------------------------------------------------------------------------


@scim_router.get("/Groups", response_model=None)
def list_groups(
    filter: str | None = Query(None),
    excludedAttributes: str | None = None,
    startIndex: int = Query(1, ge=1),
    count: int = Query(100, ge=0, le=500),
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimListResponse | ScimJSONResponse:
    """List groups with optional SCIM filter and pagination."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
    dal.commit()

    try:
        scim_filter = parse_scim_filter(filter)
    except ValueError as e:
        return _scim_error_response(400, str(e))

    try:
        groups_with_ext_ids, total = dal.list_groups(scim_filter, startIndex, count)
    except ValueError as e:
        return _scim_error_response(400, str(e))

    resources: list[ScimUserResource | ScimGroupResource] = [
        provider.build_group_resource(group, dal.get_group_members(group.id), ext_id)
        for group, ext_id in groups_with_ext_ids
    ]

    return _build_list_response(
        resources,
        total,
        startIndex,
        count,
        excluded=_parse_excluded_attributes(excludedAttributes),
    )


@scim_router.get("/Groups/{group_id}", response_model=None)
def get_group(
    group_id: str,
    excludedAttributes: str | None = None,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimGroupResource | ScimJSONResponse:
    """Get a single group by ID."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
    dal.commit()

    result = _fetch_group_or_404(group_id, dal)
    if isinstance(result, ScimJSONResponse):
        return result
    group = result

    mapping = dal.get_group_mapping_by_group_id(group.id)
    members = dal.get_group_members(group.id)

    resource = provider.build_group_resource(
        group, members, mapping.external_id if mapping else None
    )

    # RFC 7644 §3.4.2.5 — IdP may request certain attributes be omitted
    excluded = _parse_excluded_attributes(excludedAttributes)
    if excluded:
        return ScimJSONResponse(content=_apply_exclusions(resource, excluded))

    return _scim_resource_response(resource)


@scim_router.post("/Groups", status_code=201, response_model=None)
def create_group(
    group_resource: ScimGroupResource,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimGroupResource | ScimJSONResponse:
    """Create a new group from a SCIM provisioning request."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    if group_resource.displayName in _RESERVED_GROUP_NAMES:
        return _scim_error_response(
            409, f"'{group_resource.displayName}' is a reserved group name."
        )

    if dal.get_group_by_name(group_resource.displayName):
        return _scim_error_response(
            409, f"Group with name '{group_resource.displayName}' already exists"
        )

    member_uuids, err = _validate_and_parse_members(group_resource.members, dal)
    if err:
        return _scim_error_response(400, err)

    db_group = UserGroup(
        name=group_resource.displayName,
        is_up_to_date=True,
        time_last_modified_by_user=func.now(),
    )
    try:
        dal.add_group(db_group)
    except IntegrityError:
        dal.rollback()
        return _scim_error_response(
            409, f"Group with name '{group_resource.displayName}' already exists"
        )

    # Every group gets the "basic" permission by default.
    dal.add_permission_grant_to_group(
        group_id=db_group.id,
        permission=Permission.BASIC_ACCESS,
        grant_source=GrantSource.SYSTEM,
    )

    dal.upsert_group_members(db_group.id, member_uuids)

    # Recompute permissions for initial members.
    recompute_user_permissions__no_commit(member_uuids, db_session)

    external_id = group_resource.externalId
    if external_id:
        dal.create_group_mapping(external_id=external_id, user_group_id=db_group.id)

    dal.commit()

    members = dal.get_group_members(db_group.id)
    return _scim_resource_response(
        provider.build_group_resource(db_group, members, external_id),
        status_code=201,
    )


@scim_router.put("/Groups/{group_id}", response_model=None)
def replace_group(
    group_id: str,
    group_resource: ScimGroupResource,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimGroupResource | ScimJSONResponse:
    """Replace a group entirely (RFC 7644 §3.5.1)."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_group_or_404(group_id, dal)
    if isinstance(result, ScimJSONResponse):
        return result
    group = result

    if group.name in _RESERVED_GROUP_NAMES and group_resource.displayName != group.name:
        return _scim_error_response(
            409, f"'{group.name}' is a reserved group name and cannot be renamed."
        )

    if (
        group_resource.displayName in _RESERVED_GROUP_NAMES
        and group_resource.displayName != group.name
    ):
        return _scim_error_response(
            409, f"'{group_resource.displayName}' is a reserved group name."
        )

    member_uuids, err = _validate_and_parse_members(group_resource.members, dal)
    if err:
        return _scim_error_response(400, err)

    # Capture old member IDs before replacing so we can recompute their
    # permissions after they are removed from the group.
    old_member_ids = {uid for uid, _ in dal.get_group_members(group.id)}

    dal.update_group(group, name=group_resource.displayName)
    dal.replace_group_members(group.id, member_uuids)
    dal.sync_group_external_id(group.id, group_resource.externalId)

    # Recompute permissions for current members (batch) and removed members.
    recompute_permissions_for_group__no_commit(group.id, db_session)
    removed_ids = list(old_member_ids - set(member_uuids))
    recompute_user_permissions__no_commit(removed_ids, db_session)

    dal.commit()

    members = dal.get_group_members(group.id)
    return _scim_resource_response(
        provider.build_group_resource(group, members, group_resource.externalId)
    )


@scim_router.patch("/Groups/{group_id}", response_model=None)
def patch_group(
    group_id: str,
    patch_request: ScimPatchRequest,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
) -> ScimGroupResource | ScimJSONResponse:
    """Partially update a group (RFC 7644 §3.5.2).

    Handles member add/remove operations from Okta and Azure AD.
    """
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_group_or_404(group_id, dal)
    if isinstance(result, ScimJSONResponse):
        return result
    group = result

    mapping = dal.get_group_mapping_by_group_id(group.id)
    external_id = mapping.external_id if mapping else None

    current_members = dal.get_group_members(group.id)
    current = provider.build_group_resource(group, current_members, external_id)

    try:
        patched, added_ids, removed_ids = apply_group_patch(
            patch_request.Operations, current, provider.ignored_patch_paths
        )
    except ScimPatchError as e:
        return _scim_error_response(e.status, e.detail)

    new_name = patched.displayName if patched.displayName != group.name else None

    if group.name in _RESERVED_GROUP_NAMES and new_name:
        return _scim_error_response(
            409, f"'{group.name}' is a reserved group name and cannot be renamed."
        )

    if new_name and new_name in _RESERVED_GROUP_NAMES:
        return _scim_error_response(409, f"'{new_name}' is a reserved group name.")

    dal.update_group(group, name=new_name)

    affected_uuids: list[UUID] = []

    if added_ids:
        add_uuids = [UUID(mid) for mid in added_ids if _is_valid_uuid(mid)]
        if add_uuids:
            missing = dal.validate_member_ids(add_uuids)
            if missing:
                return _scim_error_response(
                    400,
                    f"Member(s) not found: {', '.join(str(u) for u in missing)}",
                )
            dal.upsert_group_members(group.id, add_uuids)
            affected_uuids.extend(add_uuids)

    if removed_ids:
        remove_uuids = [UUID(mid) for mid in removed_ids if _is_valid_uuid(mid)]
        dal.remove_group_members(group.id, remove_uuids)
        affected_uuids.extend(remove_uuids)

    # Recompute permissions for all users whose group membership changed.
    recompute_user_permissions__no_commit(affected_uuids, db_session)

    dal.sync_group_external_id(group.id, patched.externalId)
    dal.commit()

    members = dal.get_group_members(group.id)
    return _scim_resource_response(
        provider.build_group_resource(group, members, patched.externalId)
    )


@scim_router.delete("/Groups/{group_id}", status_code=204, response_model=None)
def delete_group(
    group_id: str,
    _token: ScimToken = Depends(verify_scim_token),
    db_session: Session = Depends(get_session),
) -> Response | ScimJSONResponse:
    """Delete a group (RFC 7644 §3.6)."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_group_or_404(group_id, dal)
    if isinstance(result, ScimJSONResponse):
        return result
    group = result

    if group.name in _RESERVED_GROUP_NAMES:
        return _scim_error_response(409, f"'{group.name}' is a reserved group name.")

    # Capture member IDs before deletion so we can recompute their permissions.
    affected_user_ids = [uid for uid, _ in dal.get_group_members(group.id)]

    mapping = dal.get_group_mapping_by_group_id(group.id)
    if mapping:
        dal.delete_group_mapping(mapping.id)

    dal.delete_group_with_members(group)

    # Recompute permissions for users who lost this group membership.
    recompute_user_permissions__no_commit(affected_user_ids, db_session)

    dal.commit()

    return Response(status_code=204)


def _is_valid_uuid(value: str) -> bool:
    """Check if a string is a valid UUID."""
    try:
        UUID(value)
        return True
    except ValueError:
        return False


================================================
FILE: backend/ee/onyx/server/scim/auth.py
================================================
"""SCIM bearer token authentication.

SCIM endpoints are authenticated via bearer tokens that admins create in the
Onyx UI. This module provides:

  - ``verify_scim_token``: FastAPI dependency that extracts, hashes, and
    validates the token from the Authorization header.
  - ``generate_scim_token``: Creates a new cryptographically random token
    and returns the raw value, its SHA-256 hash, and a display suffix.

Token format: ``onyx_scim_<random>`` where ``<random>`` is 48 bytes of
URL-safe base64 from ``secrets.token_urlsafe``.

The hash is stored in the ``scim_token`` table; the raw value is shown to
the admin exactly once at creation time.
"""

import hashlib
import secrets

from fastapi import Depends
from fastapi import Request
from sqlalchemy.orm import Session

from ee.onyx.db.scim import ScimDAL
from onyx.auth.utils import get_hashed_bearer_token_from_request
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import ScimToken


class ScimAuthError(Exception):
    """Raised when SCIM bearer token authentication fails.

    Unlike HTTPException, this carries the status and detail so the SCIM
    exception handler can wrap them in an RFC 7644 §3.12 error envelope
    with ``schemas`` and ``status`` fields.
    """

    def __init__(self, status_code: int, detail: str) -> None:
        self.status_code = status_code
        self.detail = detail
        super().__init__(detail)


SCIM_TOKEN_PREFIX = "onyx_scim_"
SCIM_TOKEN_LENGTH = 48


def _hash_scim_token(token: str) -> str:
    """SHA-256 hash a SCIM token. No salt needed — tokens are random."""
    return hashlib.sha256(token.encode("utf-8")).hexdigest()


def generate_scim_token() -> tuple[str, str, str]:
    """Generate a new SCIM bearer token.

    Returns:
        A tuple of ``(raw_token, hashed_token, token_display)`` where
        ``token_display`` is a masked version showing only the last 4 chars.
    """
    raw_token = SCIM_TOKEN_PREFIX + secrets.token_urlsafe(SCIM_TOKEN_LENGTH)
    hashed_token = _hash_scim_token(raw_token)
    token_display = SCIM_TOKEN_PREFIX + "****" + raw_token[-4:]
    return raw_token, hashed_token, token_display


def _get_hashed_scim_token_from_request(request: Request) -> str | None:
    """Extract and hash a SCIM token from the request Authorization header."""
    return get_hashed_bearer_token_from_request(
        request,
        valid_prefixes=[SCIM_TOKEN_PREFIX],
        hash_fn=_hash_scim_token,
    )


def _get_scim_dal(db_session: Session = Depends(get_session)) -> ScimDAL:
    return ScimDAL(db_session)


def verify_scim_token(
    request: Request,
    dal: ScimDAL = Depends(_get_scim_dal),
) -> ScimToken:
    """FastAPI dependency that authenticates SCIM requests.

    Extracts the bearer token from the Authorization header, hashes it,
    looks it up in the database, and verifies it is active.

    Note:
        This dependency does NOT update ``last_used_at`` — the endpoint
        should do that via ``ScimDAL.update_token_last_used()`` so the
        timestamp write is part of the endpoint's transaction.

    Raises:
        HTTPException(401): If the token is missing, invalid, or inactive.
    """
    hashed = _get_hashed_scim_token_from_request(request)
    if not hashed:
        raise ScimAuthError(401, "Missing or invalid SCIM bearer token")

    token = dal.get_token_by_hash(hashed)

    if not token:
        raise ScimAuthError(401, "Invalid SCIM bearer token")

    if not token.is_active:
        raise ScimAuthError(401, "SCIM token has been revoked")

    return token


================================================
FILE: backend/ee/onyx/server/scim/filtering.py
================================================
"""SCIM filter expression parser (RFC 7644 §3.4.2.2).

Identity providers (Okta, Azure AD, OneLogin, etc.) use filters to look up
resources before deciding whether to create or update them. For example, when
an admin assigns a user to the Onyx app, the IdP first checks whether that
user already exists::

    GET /scim/v2/Users?filter=userName eq "john@example.com"

If zero results come back the IdP creates the user (``POST``); if a match is
found it links to the existing record and uses ``PUT``/``PATCH`` going forward.
The same pattern applies to groups (``displayName eq "Engineering"``).

This module parses the subset of the SCIM filter grammar that identity
providers actually send in practice:

    attribute SP operator SP value

Supported operators: ``eq``, ``co`` (contains), ``sw`` (starts with).
Compound filters (``and`` / ``or``) are not supported; if an IdP sends one
the parser returns ``None`` and the caller falls back to an unfiltered list.
"""

from __future__ import annotations

import re
from dataclasses import dataclass
from enum import Enum


class ScimFilterOperator(str, Enum):
    """Supported SCIM filter operators."""

    EQUAL = "eq"
    CONTAINS = "co"
    STARTS_WITH = "sw"


@dataclass(frozen=True, slots=True)
class ScimFilter:
    """Parsed SCIM filter expression."""

    attribute: str
    operator: ScimFilterOperator
    value: str


# Matches: attribute operator "value" (with or without quotes around value)
# Groups: (attribute) (operator) ("quoted value" | unquoted_value)
_FILTER_RE = re.compile(
    r"^(\S+)\s+(eq|co|sw)\s+"  # attribute + operator
    r'(?:"([^"]*)"'  # quoted value
    r"|'([^']*)')"  # or single-quoted value
    r"$",
    re.IGNORECASE,
)


def parse_scim_filter(filter_string: str | None) -> ScimFilter | None:
    """Parse a simple SCIM filter expression.

    Args:
        filter_string: Raw filter query parameter value, e.g.
            ``'userName eq "john@example.com"'``

    Returns:
        A ``ScimFilter`` if the expression is valid and uses a supported
        operator, or ``None`` if the input is empty / missing.

    Raises:
        ValueError: If the filter string is present but malformed or uses
            an unsupported operator.
    """
    if not filter_string or not filter_string.strip():
        return None

    match = _FILTER_RE.match(filter_string.strip())
    if not match:
        raise ValueError(f"Unsupported or malformed SCIM filter: {filter_string}")

    return _build_filter(match, filter_string)


def _build_filter(match: re.Match[str], raw: str) -> ScimFilter:
    """Extract fields from a regex match and construct a ScimFilter."""
    attribute = match.group(1)
    op_str = match.group(2).lower()
    # Value is in group 3 (double-quoted) or group 4 (single-quoted)
    value = match.group(3) if match.group(3) is not None else match.group(4)

    if value is None:
        raise ValueError(f"Unsupported or malformed SCIM filter: {raw}")

    operator = ScimFilterOperator(op_str)

    return ScimFilter(attribute=attribute, operator=operator, value=value)


================================================
FILE: backend/ee/onyx/server/scim/models.py
================================================
"""Pydantic schemas for SCIM 2.0 provisioning (RFC 7643 / RFC 7644).

SCIM protocol schemas follow the wire format defined in:
  - Core Schema: https://datatracker.ietf.org/doc/html/rfc7643
  - Protocol:    https://datatracker.ietf.org/doc/html/rfc7644

Admin API schemas are internal to Onyx and used for SCIM token management.
"""

from dataclasses import dataclass
from datetime import datetime
from enum import Enum

from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import field_validator


# ---------------------------------------------------------------------------
# SCIM Schema URIs (RFC 7643 §8)
# Every SCIM JSON payload includes a "schemas" array identifying its type.
# IdPs like Okta/Azure AD use these URIs to determine how to parse responses.
# ---------------------------------------------------------------------------

SCIM_USER_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:User"
SCIM_GROUP_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:Group"
SCIM_LIST_RESPONSE_SCHEMA = "urn:ietf:params:scim:api:messages:2.0:ListResponse"
SCIM_PATCH_OP_SCHEMA = "urn:ietf:params:scim:api:messages:2.0:PatchOp"
SCIM_ERROR_SCHEMA = "urn:ietf:params:scim:api:messages:2.0:Error"
SCIM_SERVICE_PROVIDER_CONFIG_SCHEMA = (
    "urn:ietf:params:scim:schemas:core:2.0:ServiceProviderConfig"
)
SCIM_RESOURCE_TYPE_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:ResourceType"
SCIM_SCHEMA_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:Schema"
SCIM_ENTERPRISE_USER_SCHEMA = (
    "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User"
)


# ---------------------------------------------------------------------------
# SCIM Protocol Schemas
# ---------------------------------------------------------------------------


class ScimName(BaseModel):
    """User name components (RFC 7643 §4.1.1)."""

    givenName: str | None = None
    familyName: str | None = None
    formatted: str | None = None


class ScimEmail(BaseModel):
    """Email sub-attribute (RFC 7643 §4.1.2)."""

    value: str
    type: str | None = None
    primary: bool = False


class ScimMeta(BaseModel):
    """Resource metadata (RFC 7643 §3.1)."""

    resourceType: str | None = None
    created: datetime | None = None
    lastModified: datetime | None = None
    location: str | None = None


class ScimUserGroupRef(BaseModel):
    """Group reference within a User resource (RFC 7643 §4.1.2, read-only)."""

    value: str
    display: str | None = None


class ScimManagerRef(BaseModel):
    """Manager sub-attribute for the enterprise extension (RFC 7643 §4.3)."""

    value: str | None = None


class ScimEnterpriseExtension(BaseModel):
    """Enterprise User extension attributes (RFC 7643 §4.3)."""

    department: str | None = None
    manager: ScimManagerRef | None = None


@dataclass
class ScimMappingFields:
    """Stored SCIM mapping fields that need to round-trip through the IdP.

    Entra ID sends structured name components, email metadata, and enterprise
    extension attributes that must be returned verbatim in subsequent GET
    responses. These fields are persisted on ScimUserMapping and threaded
    through the DAL, provider, and endpoint layers.
    """

    department: str | None = None
    manager: str | None = None
    given_name: str | None = None
    family_name: str | None = None
    scim_emails_json: str | None = None


class ScimUserResource(BaseModel):
    """SCIM User resource representation (RFC 7643 §4.1).

    This is the JSON shape that IdPs send when creating/updating a user via
    SCIM, and the shape we return in GET responses. Field names use camelCase
    to match the SCIM wire format (not Python convention).
    """

    model_config = ConfigDict(populate_by_name=True)

    schemas: list[str] = Field(default_factory=lambda: [SCIM_USER_SCHEMA])
    id: str | None = None  # Onyx's internal user ID, set on responses
    externalId: str | None = None  # IdP's identifier for this user
    userName: str  # Typically the user's email address
    name: ScimName | None = None
    displayName: str | None = None
    emails: list[ScimEmail] = Field(default_factory=list)
    active: bool = True
    groups: list[ScimUserGroupRef] = Field(default_factory=list)
    meta: ScimMeta | None = None
    enterprise_extension: ScimEnterpriseExtension | None = Field(
        default=None,
        alias="urn:ietf:params:scim:schemas:extension:enterprise:2.0:User",
    )


class ScimGroupMember(BaseModel):
    """Group member reference (RFC 7643 §4.2).

    Represents a user within a SCIM group. The IdP sends these when adding
    or removing users from groups. ``value`` is the Onyx user ID.
    """

    value: str  # User ID of the group member
    display: str | None = None


class ScimGroupResource(BaseModel):
    """SCIM Group resource representation (RFC 7643 §4.2)."""

    schemas: list[str] = Field(default_factory=lambda: [SCIM_GROUP_SCHEMA])
    id: str | None = None
    externalId: str | None = None
    displayName: str
    members: list[ScimGroupMember] = Field(default_factory=list)
    meta: ScimMeta | None = None


class ScimListResponse(BaseModel):
    """Paginated list response (RFC 7644 §3.4.2)."""

    schemas: list[str] = Field(default_factory=lambda: [SCIM_LIST_RESPONSE_SCHEMA])
    totalResults: int
    startIndex: int = 1
    itemsPerPage: int = 100
    Resources: list[ScimUserResource | ScimGroupResource] = Field(default_factory=list)


class ScimPatchOperationType(str, Enum):
    """Supported PATCH operations (RFC 7644 §3.5.2)."""

    ADD = "add"
    REPLACE = "replace"
    REMOVE = "remove"


class ScimPatchResourceValue(BaseModel):
    """Partial resource dict for path-less PATCH replace operations.

    When an IdP sends a PATCH without a ``path``, the ``value`` is a dict
    of resource attributes to set.  IdPs may include read-only fields
    (``id``, ``schemas``, ``meta``) alongside actual changes — these are
    stripped by the provider's ``ignored_patch_paths`` before processing.

    ``extra="allow"`` lets unknown attributes pass through so the patch
    handler can decide what to do with them (ignore or reject).
    """

    model_config = ConfigDict(extra="allow")

    active: bool | None = None
    userName: str | None = None
    displayName: str | None = None
    externalId: str | None = None
    name: ScimName | None = None
    members: list[ScimGroupMember] | None = None
    id: str | None = None
    schemas: list[str] | None = None
    meta: ScimMeta | None = None


ScimPatchValue = str | bool | list[ScimGroupMember] | ScimPatchResourceValue | None


class ScimPatchOperation(BaseModel):
    """Single PATCH operation (RFC 7644 §3.5.2)."""

    op: ScimPatchOperationType
    path: str | None = None
    value: ScimPatchValue = None

    @field_validator("op", mode="before")
    @classmethod
    def normalize_operation(cls, v: object) -> object:
        """Normalize op to lowercase for case-insensitive matching.

        Some IdPs (e.g. Entra ID) send capitalized ops like ``"Replace"``
        instead of ``"replace"``. This is safe for all providers since the
        enum values are lowercase. If a future provider requires other
        pre-processing quirks, move patch deserialization into the provider
        subclass instead of adding more special cases here.
        """
        return v.lower() if isinstance(v, str) else v


class ScimPatchRequest(BaseModel):
    """PATCH request body (RFC 7644 §3.5.2).

    IdPs use PATCH to make incremental changes — e.g. deactivating a user
    (replace active=false) or adding/removing group members — instead of
    replacing the entire resource with PUT.
    """

    schemas: list[str] = Field(default_factory=lambda: [SCIM_PATCH_OP_SCHEMA])
    Operations: list[ScimPatchOperation]


class ScimError(BaseModel):
    """SCIM error response (RFC 7644 §3.12)."""

    schemas: list[str] = Field(default_factory=lambda: [SCIM_ERROR_SCHEMA])
    status: str
    detail: str | None = None
    scimType: str | None = None


# ---------------------------------------------------------------------------
# Service Provider Configuration (RFC 7643 §5)
# ---------------------------------------------------------------------------


class ScimSupported(BaseModel):
    """Generic supported/not-supported flag used in ServiceProviderConfig."""

    supported: bool


class ScimFilterConfig(BaseModel):
    """Filter configuration within ServiceProviderConfig (RFC 7643 §5)."""

    supported: bool
    maxResults: int = 100


class ScimServiceProviderConfig(BaseModel):
    """SCIM ServiceProviderConfig resource (RFC 7643 §5).

    Served at GET /scim/v2/ServiceProviderConfig. IdPs fetch this during
    initial setup to discover which SCIM features our server supports
    (e.g. PATCH yes, bulk no, filtering yes).
    """

    schemas: list[str] = Field(
        default_factory=lambda: [SCIM_SERVICE_PROVIDER_CONFIG_SCHEMA]
    )
    patch: ScimSupported = ScimSupported(supported=True)
    bulk: ScimSupported = ScimSupported(supported=False)
    filter: ScimFilterConfig = ScimFilterConfig(supported=True)
    changePassword: ScimSupported = ScimSupported(supported=False)
    sort: ScimSupported = ScimSupported(supported=False)
    etag: ScimSupported = ScimSupported(supported=False)
    authenticationSchemes: list[dict[str, str]] = Field(
        default_factory=lambda: [
            {
                "type": "oauthbearertoken",
                "name": "OAuth Bearer Token",
                "description": "Authentication scheme using a SCIM bearer token",
            }
        ]
    )


class ScimSchemaAttribute(BaseModel):
    """Attribute definition within a SCIM Schema (RFC 7643 §7)."""

    name: str
    type: str
    multiValued: bool = False
    required: bool = False
    description: str = ""
    caseExact: bool = False
    mutability: str = "readWrite"
    returned: str = "default"
    uniqueness: str = "none"
    subAttributes: list["ScimSchemaAttribute"] = Field(default_factory=list)


class ScimSchemaDefinition(BaseModel):
    """SCIM Schema definition (RFC 7643 §7).

    Served at GET /scim/v2/Schemas. Describes the attributes available
    on each resource type so IdPs know which fields they can provision.
    """

    schemas: list[str] = Field(default_factory=lambda: [SCIM_SCHEMA_SCHEMA])
    id: str
    name: str
    description: str
    attributes: list[ScimSchemaAttribute] = Field(default_factory=list)


class ScimSchemaExtension(BaseModel):
    """Schema extension reference within ResourceType (RFC 7643 §6)."""

    model_config = ConfigDict(populate_by_name=True, serialize_by_alias=True)

    schema_: str = Field(alias="schema")
    required: bool


class ScimResourceType(BaseModel):
    """SCIM ResourceType resource (RFC 7643 §6).

    Served at GET /scim/v2/ResourceTypes. Tells the IdP which resource
    types are available (Users, Groups) and their respective endpoints.
    """

    model_config = ConfigDict(populate_by_name=True, serialize_by_alias=True)

    schemas: list[str] = Field(default_factory=lambda: [SCIM_RESOURCE_TYPE_SCHEMA])
    id: str
    name: str
    endpoint: str
    description: str | None = None
    schema_: str = Field(alias="schema")
    schemaExtensions: list[ScimSchemaExtension] = Field(default_factory=list)


# ---------------------------------------------------------------------------
# Admin API Schemas (Onyx-internal, for SCIM token management)
# These are NOT part of the SCIM protocol. They power the Onyx admin UI
# where admins create/revoke the bearer tokens that IdPs use to authenticate.
# ---------------------------------------------------------------------------


class ScimTokenCreate(BaseModel):
    """Request to create a new SCIM bearer token."""

    name: str


class ScimTokenResponse(BaseModel):
    """SCIM token metadata returned in list/get responses."""

    id: int
    name: str
    token_display: str
    is_active: bool
    created_at: datetime
    last_used_at: datetime | None = None
    idp_domain: str | None = None


class ScimTokenCreatedResponse(ScimTokenResponse):
    """Response returned when a new SCIM token is created.

    Includes the raw token value which is only available at creation time.
    """

    raw_token: str


================================================
FILE: backend/ee/onyx/server/scim/patch.py
================================================
"""SCIM PATCH operation handler (RFC 7644 §3.5.2).

Identity providers use PATCH to make incremental changes to SCIM resources
instead of replacing the entire resource with PUT. Common operations include:

  - Deactivating a user: ``replace`` ``active`` with ``false``
  - Adding group members: ``add`` to ``members``
  - Removing group members: ``remove`` from ``members[value eq "..."]``

This module applies PATCH operations to Pydantic SCIM resource objects and
returns the modified result. It does NOT touch the database — the caller is
responsible for persisting changes.
"""

from __future__ import annotations

import logging
import re
from dataclasses import dataclass
from dataclasses import field
from typing import Any

from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
from ee.onyx.server.scim.models import ScimGroupMember
from ee.onyx.server.scim.models import ScimGroupResource
from ee.onyx.server.scim.models import ScimPatchOperation
from ee.onyx.server.scim.models import ScimPatchOperationType
from ee.onyx.server.scim.models import ScimPatchResourceValue
from ee.onyx.server.scim.models import ScimPatchValue
from ee.onyx.server.scim.models import ScimUserResource

logger = logging.getLogger(__name__)

# Lowercased enterprise extension URN for case-insensitive matching
_ENTERPRISE_URN_LOWER = SCIM_ENTERPRISE_USER_SCHEMA.lower()

# Pattern for email filter paths, e.g.:
#   emails[primary eq true].value  (Okta)
#   emails[type eq "work"].value   (Azure AD / Entra ID)
_EMAIL_FILTER_RE = re.compile(
    r"^emails\[.+\]\.value$",
    re.IGNORECASE,
)

# Pattern for member removal path: members[value eq "user-id"]
_MEMBER_FILTER_RE = re.compile(
    r'^members\[value\s+eq\s+"([^"]+)"\]$',
    re.IGNORECASE,
)

# ---------------------------------------------------------------------------
# Dispatch tables for user PATCH paths
#
# Maps lowercased SCIM path → (camelCase key, target dict name).
# "data" writes to the top-level resource dict, "name" writes to the
# name sub-object dict. This replaces the elif chains for simple fields.
# ---------------------------------------------------------------------------

_USER_REPLACE_PATHS: dict[str, tuple[str, str]] = {
    "active": ("active", "data"),
    "username": ("userName", "data"),
    "externalid": ("externalId", "data"),
    "name.givenname": ("givenName", "name"),
    "name.familyname": ("familyName", "name"),
    "name.formatted": ("formatted", "name"),
}

_USER_REMOVE_PATHS: dict[str, tuple[str, str]] = {
    "externalid": ("externalId", "data"),
    "name.givenname": ("givenName", "name"),
    "name.familyname": ("familyName", "name"),
    "name.formatted": ("formatted", "name"),
    "displayname": ("displayName", "data"),
}

_GROUP_REPLACE_PATHS: dict[str, tuple[str, str]] = {
    "displayname": ("displayName", "data"),
    "externalid": ("externalId", "data"),
}


class ScimPatchError(Exception):
    """Raised when a PATCH operation cannot be applied."""

    def __init__(self, detail: str, status: int = 400) -> None:
        self.detail = detail
        self.status = status
        super().__init__(detail)


@dataclass
class _UserPatchCtx:
    """Bundles the mutable state for user PATCH operations."""

    data: dict[str, Any]
    name_data: dict[str, Any]
    ent_data: dict[str, str | None] = field(default_factory=dict)


# ---------------------------------------------------------------------------
# User PATCH
# ---------------------------------------------------------------------------


def apply_user_patch(
    operations: list[ScimPatchOperation],
    current: ScimUserResource,
    ignored_paths: frozenset[str] = frozenset(),
) -> tuple[ScimUserResource, dict[str, str | None]]:
    """Apply SCIM PATCH operations to a user resource.

    Args:
        operations: The PATCH operations to apply.
        current: The current user resource state.
        ignored_paths: SCIM attribute paths to silently skip (from provider).

    Returns:
        A tuple of (modified user resource, enterprise extension data dict).
        The enterprise dict has keys ``"department"`` and ``"manager"``
        with values set only when a PATCH operation touched them.

    Raises:
        ScimPatchError: If an operation targets an unsupported path.
    """
    data = current.model_dump()
    ctx = _UserPatchCtx(data=data, name_data=data.get("name") or {})

    for op in operations:
        if op.op in (ScimPatchOperationType.REPLACE, ScimPatchOperationType.ADD):
            _apply_user_replace(op, ctx, ignored_paths)
        elif op.op == ScimPatchOperationType.REMOVE:
            _apply_user_remove(op, ctx, ignored_paths)
        else:
            raise ScimPatchError(
                f"Unsupported operation '{op.op.value}' on User resource"
            )

    ctx.data["name"] = ctx.name_data
    return ScimUserResource.model_validate(ctx.data), ctx.ent_data


def _apply_user_replace(
    op: ScimPatchOperation,
    ctx: _UserPatchCtx,
    ignored_paths: frozenset[str],
) -> None:
    """Apply a replace/add operation to user data."""
    path = (op.path or "").lower()

    if not path:
        # No path — value is a resource dict of top-level attributes to set.
        if isinstance(op.value, ScimPatchResourceValue):
            for key, val in op.value.model_dump(exclude_unset=True).items():
                _set_user_field(key.lower(), val, ctx, ignored_paths, strict=False)
        else:
            raise ScimPatchError("Replace without path requires a dict value")
        return

    _set_user_field(path, op.value, ctx, ignored_paths)


def _apply_user_remove(
    op: ScimPatchOperation,
    ctx: _UserPatchCtx,
    ignored_paths: frozenset[str],
) -> None:
    """Apply a remove operation to user data — clears the target field."""
    path = (op.path or "").lower()
    if not path:
        raise ScimPatchError("Remove operation requires a path")

    if path in ignored_paths:
        return

    entry = _USER_REMOVE_PATHS.get(path)
    if entry:
        key, target = entry
        target_dict = ctx.data if target == "data" else ctx.name_data
        target_dict[key] = None
        return

    raise ScimPatchError(f"Unsupported remove path '{path}' for User PATCH")


def _set_user_field(
    path: str,
    value: ScimPatchValue,
    ctx: _UserPatchCtx,
    ignored_paths: frozenset[str],
    *,
    strict: bool = True,
) -> None:
    """Set a single field on user data by SCIM path.

    Args:
        strict: When ``False`` (path-less replace), unknown attributes are
            silently skipped.  When ``True`` (explicit path), they raise.
    """
    if path in ignored_paths:
        return

    # Simple field writes handled by the dispatch table
    entry = _USER_REPLACE_PATHS.get(path)
    if entry:
        key, target = entry
        target_dict = ctx.data if target == "data" else ctx.name_data
        target_dict[key] = value
        return

    # displayName sets both the top-level field and the name.formatted sub-field
    if path == "displayname":
        ctx.data["displayName"] = value
        ctx.name_data["formatted"] = value
    elif path == "name":
        if isinstance(value, dict):
            for k, v in value.items():
                ctx.name_data[k] = v
    elif path == "emails":
        if isinstance(value, list):
            ctx.data["emails"] = value
    elif _EMAIL_FILTER_RE.match(path):
        _update_primary_email(ctx.data, value)
    elif path.startswith(_ENTERPRISE_URN_LOWER):
        _set_enterprise_field(path, value, ctx.ent_data)
    elif not strict:
        return
    else:
        raise ScimPatchError(f"Unsupported path '{path}' for User PATCH")


def _update_primary_email(data: dict[str, Any], value: ScimPatchValue) -> None:
    """Update the primary email entry via an email filter path."""
    emails: list[dict] = data.get("emails") or []
    for email_entry in emails:
        if email_entry.get("primary"):
            email_entry["value"] = value
            break
    else:
        emails.append({"value": value, "type": "work", "primary": True})
    data["emails"] = emails


def _to_dict(value: ScimPatchValue) -> dict | None:
    """Coerce a SCIM patch value to a plain dict if possible.

    Pydantic may parse raw dicts as ``ScimPatchResourceValue`` (which uses
    ``extra="allow"``), so we also dump those back to a dict.
    """
    if isinstance(value, dict):
        return value
    if isinstance(value, ScimPatchResourceValue):
        return value.model_dump(exclude_unset=True)
    return None


def _set_enterprise_field(
    path: str,
    value: ScimPatchValue,
    ent_data: dict[str, str | None],
) -> None:
    """Handle enterprise extension URN paths or value dicts."""
    # Full URN as key with dict value (path-less PATCH)
    # e.g. key="urn:...:user", value={"department": "Eng", "manager": {...}}
    if path == _ENTERPRISE_URN_LOWER:
        d = _to_dict(value)
        if d is not None:
            if "department" in d:
                ent_data["department"] = d["department"]
            if "manager" in d:
                mgr = d["manager"]
                if isinstance(mgr, dict):
                    ent_data["manager"] = mgr.get("value")
        return

    # Dotted URN path, e.g. "urn:...:user:department"
    suffix = path[len(_ENTERPRISE_URN_LOWER) :].lstrip(":").lower()
    if suffix == "department":
        ent_data["department"] = str(value) if value is not None else None
    elif suffix == "manager":
        d = _to_dict(value)
        if d is not None:
            ent_data["manager"] = d.get("value")
        elif isinstance(value, str):
            ent_data["manager"] = value
    else:
        # Unknown enterprise attributes are silently ignored rather than
        # rejected — IdPs may send attributes we don't model yet.
        logger.warning("Ignoring unknown enterprise extension attribute '%s'", suffix)


# ---------------------------------------------------------------------------
# Group PATCH
# ---------------------------------------------------------------------------


def apply_group_patch(
    operations: list[ScimPatchOperation],
    current: ScimGroupResource,
    ignored_paths: frozenset[str] = frozenset(),
) -> tuple[ScimGroupResource, list[str], list[str]]:
    """Apply SCIM PATCH operations to a group resource.

    Args:
        operations: The PATCH operations to apply.
        current: The current group resource state.
        ignored_paths: SCIM attribute paths to silently skip (from provider).

    Returns:
        A tuple of (modified group, added member IDs, removed member IDs).
        The caller uses the member ID lists to update the database.

    Raises:
        ScimPatchError: If an operation targets an unsupported path.
    """
    data = current.model_dump()
    current_members: list[dict] = list(data.get("members") or [])
    added_ids: list[str] = []
    removed_ids: list[str] = []

    for op in operations:
        if op.op == ScimPatchOperationType.REPLACE:
            _apply_group_replace(
                op, data, current_members, added_ids, removed_ids, ignored_paths
            )
        elif op.op == ScimPatchOperationType.ADD:
            _apply_group_add(op, current_members, added_ids)
        elif op.op == ScimPatchOperationType.REMOVE:
            _apply_group_remove(op, current_members, removed_ids)
        else:
            raise ScimPatchError(
                f"Unsupported operation '{op.op.value}' on Group resource"
            )

    data["members"] = current_members
    group = ScimGroupResource.model_validate(data)
    return group, added_ids, removed_ids


def _apply_group_replace(
    op: ScimPatchOperation,
    data: dict,
    current_members: list[dict],
    added_ids: list[str],
    removed_ids: list[str],
    ignored_paths: frozenset[str],
) -> None:
    """Apply a replace operation to group data."""
    path = (op.path or "").lower()

    if not path:
        if isinstance(op.value, ScimPatchResourceValue):
            dumped = op.value.model_dump(exclude_unset=True)
            for key, val in dumped.items():
                if key.lower() == "members":
                    _replace_members(val, current_members, added_ids, removed_ids)
                else:
                    _set_group_field(key.lower(), val, data, ignored_paths)
        else:
            raise ScimPatchError("Replace without path requires a dict value")
        return

    if path == "members":
        _replace_members(
            _members_to_dicts(op.value), current_members, added_ids, removed_ids
        )
        return

    _set_group_field(path, op.value, data, ignored_paths)


def _members_to_dicts(
    value: str | bool | list[ScimGroupMember] | ScimPatchResourceValue | None,
) -> list[dict]:
    """Convert a member list value to a list of dicts for internal processing."""
    if not isinstance(value, list):
        raise ScimPatchError("Replace members requires a list value")
    return [m.model_dump(exclude_none=True) for m in value]


def _replace_members(
    value: list[dict],
    current_members: list[dict],
    added_ids: list[str],
    removed_ids: list[str],
) -> None:
    """Replace the entire group member list."""
    old_ids = {m["value"] for m in current_members}
    new_ids = {m.get("value", "") for m in value}

    removed_ids.extend(old_ids - new_ids)
    added_ids.extend(new_ids - old_ids)

    current_members[:] = value


def _set_group_field(
    path: str,
    value: ScimPatchValue,
    data: dict,
    ignored_paths: frozenset[str],
) -> None:
    """Set a single field on group data by SCIM path."""
    if path in ignored_paths:
        return

    entry = _GROUP_REPLACE_PATHS.get(path)
    if entry:
        key, _ = entry
        data[key] = value
        return

    raise ScimPatchError(f"Unsupported path '{path}' for Group PATCH")


def _apply_group_add(
    op: ScimPatchOperation,
    members: list[dict],
    added_ids: list[str],
) -> None:
    """Add members to a group."""
    path = (op.path or "").lower()

    if path and path != "members":
        raise ScimPatchError(f"Unsupported add path '{op.path}' for Group")

    if not isinstance(op.value, list):
        raise ScimPatchError("Add members requires a list value")

    member_dicts = [m.model_dump(exclude_none=True) for m in op.value]

    existing_ids = {m["value"] for m in members}
    for member_data in member_dicts:
        member_id = member_data.get("value", "")
        if member_id and member_id not in existing_ids:
            members.append(member_data)
            added_ids.append(member_id)
            existing_ids.add(member_id)


def _apply_group_remove(
    op: ScimPatchOperation,
    members: list[dict],
    removed_ids: list[str],
) -> None:
    """Remove members from a group."""
    if not op.path:
        raise ScimPatchError("Remove operation requires a path")

    match = _MEMBER_FILTER_RE.match(op.path)
    if not match:
        raise ScimPatchError(
            f"Unsupported remove path '{op.path}'. Expected: members[value eq \"user-id\"]"
        )

    target_id = match.group(1)
    original_len = len(members)
    members[:] = [m for m in members if m.get("value") != target_id]

    if len(members) < original_len:
        removed_ids.append(target_id)


================================================
FILE: backend/ee/onyx/server/scim/providers/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/scim/providers/base.py
================================================
"""Base SCIM provider abstraction."""

from __future__ import annotations

import json
import logging
from abc import ABC
from abc import abstractmethod
from uuid import UUID

from pydantic import ValidationError

from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
from ee.onyx.server.scim.models import SCIM_USER_SCHEMA
from ee.onyx.server.scim.models import ScimEmail
from ee.onyx.server.scim.models import ScimEnterpriseExtension
from ee.onyx.server.scim.models import ScimGroupMember
from ee.onyx.server.scim.models import ScimGroupResource
from ee.onyx.server.scim.models import ScimManagerRef
from ee.onyx.server.scim.models import ScimMappingFields
from ee.onyx.server.scim.models import ScimMeta
from ee.onyx.server.scim.models import ScimName
from ee.onyx.server.scim.models import ScimUserGroupRef
from ee.onyx.server.scim.models import ScimUserResource
from onyx.db.models import User
from onyx.db.models import UserGroup


logger = logging.getLogger(__name__)

COMMON_IGNORED_PATCH_PATHS: frozenset[str] = frozenset(
    {
        "id",
        "schemas",
        "meta",
    }
)


class ScimProvider(ABC):
    """Base class for provider-specific SCIM behavior.

    Subclass this to handle IdP-specific quirks. The base class provides
    RFC 7643-compliant response builders that populate all standard fields.
    """

    @property
    @abstractmethod
    def name(self) -> str:
        """Short identifier for this provider (e.g. ``"okta"``)."""
        ...

    @property
    @abstractmethod
    def ignored_patch_paths(self) -> frozenset[str]:
        """SCIM attribute paths to silently skip in PATCH value-object dicts.

        IdPs may include read-only or meta fields alongside actual changes
        (e.g. Okta sends ``{"id": "...", "active": false}``). Paths listed
        here are silently dropped instead of raising an error.
        """
        ...

    @property
    def user_schemas(self) -> list[str]:
        """Schema URIs to include in User resource responses.

        Override in subclasses to advertise additional schemas (e.g. the
        enterprise extension for Entra ID).
        """
        return [SCIM_USER_SCHEMA]

    def build_user_resource(
        self,
        user: User,
        external_id: str | None = None,
        groups: list[tuple[int, str]] | None = None,
        scim_username: str | None = None,
        fields: ScimMappingFields | None = None,
    ) -> ScimUserResource:
        """Build a SCIM User response from an Onyx User.

        Args:
            user: The Onyx user model.
            external_id: The IdP's external identifier for this user.
            groups: List of ``(group_id, group_name)`` tuples for the
                ``groups`` read-only attribute. Pass ``None`` or ``[]``
                for newly-created users.
            scim_username: The original-case userName from the IdP. Falls
                back to ``user.email`` (lowercase) when not available.
            fields: Stored mapping fields that the IdP expects round-tripped.
        """
        f = fields or ScimMappingFields()
        group_refs = [
            ScimUserGroupRef(value=str(gid), display=gname)
            for gid, gname in (groups or [])
        ]

        username = scim_username or user.email

        # Build enterprise extension when at least one value is present.
        # Dynamically add the enterprise URN to schemas per RFC 7643 §3.0.
        enterprise_ext: ScimEnterpriseExtension | None = None
        schemas = list(self.user_schemas)
        if f.department is not None or f.manager is not None:
            manager_ref = (
                ScimManagerRef(value=f.manager) if f.manager is not None else None
            )
            enterprise_ext = ScimEnterpriseExtension(
                department=f.department,
                manager=manager_ref,
            )
            if SCIM_ENTERPRISE_USER_SCHEMA not in schemas:
                schemas.append(SCIM_ENTERPRISE_USER_SCHEMA)

        name = self.build_scim_name(user, f)
        emails = _deserialize_emails(f.scim_emails_json, username)

        resource = ScimUserResource(
            schemas=schemas,
            id=str(user.id),
            externalId=external_id,
            userName=username,
            name=name,
            displayName=user.personal_name,
            emails=emails,
            active=user.is_active,
            groups=group_refs,
            meta=ScimMeta(resourceType="User"),
        )
        resource.enterprise_extension = enterprise_ext
        return resource

    def build_group_resource(
        self,
        group: UserGroup,
        members: list[tuple[UUID, str | None]],
        external_id: str | None = None,
    ) -> ScimGroupResource:
        """Build a SCIM Group response from an Onyx UserGroup."""
        scim_members = [
            ScimGroupMember(value=str(uid), display=email) for uid, email in members
        ]
        return ScimGroupResource(
            id=str(group.id),
            externalId=external_id,
            displayName=group.name,
            members=scim_members,
            meta=ScimMeta(resourceType="Group"),
        )

    def build_scim_name(
        self,
        user: User,
        fields: ScimMappingFields,
    ) -> ScimName:
        """Build SCIM name components for the response.

        Round-trips stored ``given_name``/``family_name`` when available (so
        the IdP gets back what it sent). Falls back to splitting
        ``personal_name`` for users provisioned before we stored components.
        Always returns a ScimName — Okta's spec tests expect ``name``
        (with ``givenName``/``familyName``) on every user resource.
        Providers may override for custom behavior.
        """
        if fields.given_name is not None or fields.family_name is not None:
            return ScimName(
                givenName=fields.given_name or "",
                familyName=fields.family_name or "",
                formatted=user.personal_name or "",
            )
        if not user.personal_name:
            # Derive a reasonable name from the email so that SCIM spec tests
            # see non-empty givenName / familyName for every user resource.
            local = user.email.split("@")[0] if user.email else ""
            return ScimName(givenName=local, familyName="", formatted=local)
        parts = user.personal_name.split(" ", 1)
        return ScimName(
            givenName=parts[0],
            familyName=parts[1] if len(parts) > 1 else "",
            formatted=user.personal_name,
        )


def _deserialize_emails(stored_json: str | None, username: str) -> list[ScimEmail]:
    """Deserialize stored email entries or build a default work email."""
    if stored_json:
        try:
            entries = json.loads(stored_json)
            if isinstance(entries, list) and entries:
                return [ScimEmail(**e) for e in entries]
        except (json.JSONDecodeError, TypeError, ValidationError):
            logger.warning(
                "Corrupt scim_emails_json, falling back to default: %s", stored_json
            )
    return [ScimEmail(value=username, type="work", primary=True)]


def serialize_emails(emails: list[ScimEmail]) -> str | None:
    """Serialize SCIM email entries to JSON for storage."""
    if not emails:
        return None
    return json.dumps([e.model_dump(exclude_none=True) for e in emails])


def get_default_provider() -> ScimProvider:
    """Return the default SCIM provider.

    Currently returns ``OktaProvider`` since Okta is the primary supported
    IdP. When provider detection is added (via token metadata or tenant
    config), this can be replaced with dynamic resolution.
    """
    from ee.onyx.server.scim.providers.okta import OktaProvider

    return OktaProvider()


================================================
FILE: backend/ee/onyx/server/scim/providers/entra.py
================================================
"""Entra ID (Azure AD) SCIM provider."""

from __future__ import annotations

from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
from ee.onyx.server.scim.models import SCIM_USER_SCHEMA
from ee.onyx.server.scim.providers.base import COMMON_IGNORED_PATCH_PATHS
from ee.onyx.server.scim.providers.base import ScimProvider

_ENTRA_IGNORED_PATCH_PATHS = COMMON_IGNORED_PATCH_PATHS


class EntraProvider(ScimProvider):
    """Entra ID (Azure AD) SCIM provider.

    Entra behavioral notes:
      - Sends capitalized PATCH ops (``"Add"``, ``"Replace"``, ``"Remove"``)
        — handled by ``ScimPatchOperation.normalize_op`` validator.
      - Sends the enterprise extension URN as a key in path-less PATCH value
        dicts — handled by ``_set_enterprise_field`` in ``patch.py`` to
        store department/manager values.
      - Expects the enterprise extension schema in ``schemas`` arrays and
        ``/Schemas`` + ``/ResourceTypes`` discovery endpoints.
    """

    @property
    def name(self) -> str:
        return "entra"

    @property
    def ignored_patch_paths(self) -> frozenset[str]:
        return _ENTRA_IGNORED_PATCH_PATHS

    @property
    def user_schemas(self) -> list[str]:
        return [SCIM_USER_SCHEMA, SCIM_ENTERPRISE_USER_SCHEMA]


================================================
FILE: backend/ee/onyx/server/scim/providers/okta.py
================================================
"""Okta SCIM provider."""

from __future__ import annotations

from ee.onyx.server.scim.providers.base import COMMON_IGNORED_PATCH_PATHS
from ee.onyx.server.scim.providers.base import ScimProvider


class OktaProvider(ScimProvider):
    """Okta SCIM provider.

    Okta behavioral notes:
      - Uses ``PATCH {"active": false}`` for deprovisioning (not DELETE)
      - Sends path-less PATCH with value dicts containing extra fields
        (``id``, ``schemas``)
      - Expects ``displayName`` and ``groups`` in user responses
      - Only uses ``eq`` operator for ``userName`` filter
    """

    @property
    def name(self) -> str:
        return "okta"

    @property
    def ignored_patch_paths(self) -> frozenset[str]:
        return COMMON_IGNORED_PATCH_PATHS


================================================
FILE: backend/ee/onyx/server/scim/schema_definitions.py
================================================
"""Static SCIM service discovery responses (RFC 7643 §5, §6, §7).

Pre-built at import time — these never change at runtime. Separated from
api.py to keep the endpoint module focused on request handling.
"""

from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
from ee.onyx.server.scim.models import SCIM_GROUP_SCHEMA
from ee.onyx.server.scim.models import SCIM_USER_SCHEMA
from ee.onyx.server.scim.models import ScimResourceType
from ee.onyx.server.scim.models import ScimSchemaAttribute
from ee.onyx.server.scim.models import ScimSchemaDefinition
from ee.onyx.server.scim.models import ScimServiceProviderConfig

SERVICE_PROVIDER_CONFIG = ScimServiceProviderConfig()

USER_RESOURCE_TYPE = ScimResourceType.model_validate(
    {
        "id": "User",
        "name": "User",
        "endpoint": "/scim/v2/Users",
        "description": "SCIM User resource",
        "schema": SCIM_USER_SCHEMA,
        "schemaExtensions": [
            {"schema": SCIM_ENTERPRISE_USER_SCHEMA, "required": False}
        ],
    }
)

GROUP_RESOURCE_TYPE = ScimResourceType.model_validate(
    {
        "id": "Group",
        "name": "Group",
        "endpoint": "/scim/v2/Groups",
        "description": "SCIM Group resource",
        "schema": SCIM_GROUP_SCHEMA,
    }
)

USER_SCHEMA_DEF = ScimSchemaDefinition(
    id=SCIM_USER_SCHEMA,
    name="User",
    description="SCIM core User schema",
    attributes=[
        ScimSchemaAttribute(
            name="userName",
            type="string",
            required=True,
            uniqueness="server",
            description="Unique identifier for the user, typically an email address.",
        ),
        ScimSchemaAttribute(
            name="name",
            type="complex",
            description="The components of the user's name.",
            subAttributes=[
                ScimSchemaAttribute(
                    name="givenName",
                    type="string",
                    description="The user's first name.",
                ),
                ScimSchemaAttribute(
                    name="familyName",
                    type="string",
                    description="The user's last name.",
                ),
                ScimSchemaAttribute(
                    name="formatted",
                    type="string",
                    description="The full name, including all middle names and titles.",
                ),
            ],
        ),
        ScimSchemaAttribute(
            name="emails",
            type="complex",
            multiValued=True,
            description="Email addresses for the user.",
            subAttributes=[
                ScimSchemaAttribute(
                    name="value",
                    type="string",
                    description="Email address value.",
                ),
                ScimSchemaAttribute(
                    name="type",
                    type="string",
                    description="Label for this email (e.g. 'work').",
                ),
                ScimSchemaAttribute(
                    name="primary",
                    type="boolean",
                    description="Whether this is the primary email.",
                ),
            ],
        ),
        ScimSchemaAttribute(
            name="active",
            type="boolean",
            description="Whether the user account is active.",
        ),
        ScimSchemaAttribute(
            name="externalId",
            type="string",
            description="Identifier from the provisioning client (IdP).",
            caseExact=True,
        ),
    ],
)

ENTERPRISE_USER_SCHEMA_DEF = ScimSchemaDefinition(
    id=SCIM_ENTERPRISE_USER_SCHEMA,
    name="EnterpriseUser",
    description="Enterprise User extension (RFC 7643 §4.3)",
    attributes=[
        ScimSchemaAttribute(
            name="department",
            type="string",
            description="Department.",
        ),
        ScimSchemaAttribute(
            name="manager",
            type="complex",
            description="The user's manager.",
            subAttributes=[
                ScimSchemaAttribute(
                    name="value",
                    type="string",
                    description="Manager user ID.",
                ),
            ],
        ),
    ],
)

GROUP_SCHEMA_DEF = ScimSchemaDefinition(
    id=SCIM_GROUP_SCHEMA,
    name="Group",
    description="SCIM core Group schema",
    attributes=[
        ScimSchemaAttribute(
            name="displayName",
            type="string",
            required=True,
            description="Human-readable name for the group.",
        ),
        ScimSchemaAttribute(
            name="members",
            type="complex",
            multiValued=True,
            description="Members of the group.",
            subAttributes=[
                ScimSchemaAttribute(
                    name="value",
                    type="string",
                    description="User ID of the group member.",
                ),
                ScimSchemaAttribute(
                    name="display",
                    type="string",
                    mutability="readOnly",
                    description="Display name of the group member.",
                ),
            ],
        ),
        ScimSchemaAttribute(
            name="externalId",
            type="string",
            description="Identifier from the provisioning client (IdP).",
            caseExact=True,
        ),
    ],
)


================================================
FILE: backend/ee/onyx/server/seeding.py
================================================
import json
import os
from copy import deepcopy
from typing import List
from typing import Optional

from pydantic import BaseModel
from sqlalchemy.orm import Session

from ee.onyx.db.standard_answer import (
    create_initial_default_standard_answer_category,
)
from ee.onyx.server.enterprise_settings.models import AnalyticsScriptUpload
from ee.onyx.server.enterprise_settings.models import EnterpriseSettings
from ee.onyx.server.enterprise_settings.models import NavigationItem
from ee.onyx.server.enterprise_settings.store import store_analytics_script
from ee.onyx.server.enterprise_settings.store import (
    store_settings as store_ee_settings,
)
from ee.onyx.server.enterprise_settings.store import upload_logo
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import update_default_provider
from onyx.db.llm import upsert_llm_provider
from onyx.db.models import Tool
from onyx.db.persona import upsert_persona
from onyx.server.features.persona.models import PersonaUpsertRequest
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.settings.models import Settings
from onyx.server.settings.store import store_settings as store_base_settings
from onyx.utils.logger import setup_logger


class CustomToolSeed(BaseModel):
    name: str
    description: str
    definition_path: str
    custom_headers: Optional[List[dict]] = None
    display_name: Optional[str] = None
    in_code_tool_id: Optional[str] = None
    user_id: Optional[str] = None


logger = setup_logger()

_SEED_CONFIG_ENV_VAR_NAME = "ENV_SEED_CONFIGURATION"


class NavigationItemSeed(BaseModel):
    link: str
    title: str
    # NOTE: SVG at this path must not have a width / height specified
    svg_path: str


class SeedConfiguration(BaseModel):
    llms: list[LLMProviderUpsertRequest] | None = None
    admin_user_emails: list[str] | None = None
    seeded_logo_path: str | None = None
    personas: list[PersonaUpsertRequest] | None = None
    settings: Settings | None = None
    enterprise_settings: EnterpriseSettings | None = None

    # allows for specifying custom navigation items that have your own custom SVG logos
    nav_item_overrides: list[NavigationItemSeed] | None = None

    # Use existing `CUSTOM_ANALYTICS_SECRET_KEY` for reference
    analytics_script_path: str | None = None
    custom_tools: List[CustomToolSeed] | None = None


def _parse_env() -> SeedConfiguration | None:
    seed_config_str = os.getenv(_SEED_CONFIG_ENV_VAR_NAME)
    if not seed_config_str:
        return None
    seed_config = SeedConfiguration.model_validate_json(seed_config_str)
    return seed_config


def _seed_custom_tools(db_session: Session, tools: List[CustomToolSeed]) -> None:
    if tools:
        logger.notice("Seeding Custom Tools")
        for tool in tools:
            try:
                logger.debug(f"Attempting to seed tool: {tool.name}")
                logger.debug(f"Reading definition from: {tool.definition_path}")
                with open(tool.definition_path, "r") as file:
                    file_content = file.read()
                    if not file_content.strip():
                        raise ValueError("File is empty")
                    openapi_schema = json.loads(file_content)
                db_tool = Tool(
                    name=tool.name,
                    description=tool.description,
                    openapi_schema=openapi_schema,
                    custom_headers=tool.custom_headers,
                    display_name=tool.display_name,
                    in_code_tool_id=tool.in_code_tool_id,
                    user_id=tool.user_id,
                )
                db_session.add(db_tool)
                logger.debug(f"Successfully added tool: {tool.name}")
            except FileNotFoundError:
                logger.error(
                    f"Definition file not found for tool {tool.name}: {tool.definition_path}"
                )
            except json.JSONDecodeError as e:
                logger.error(
                    f"Invalid JSON in definition file for tool {tool.name}: {str(e)}"
                )
            except Exception as e:
                logger.error(f"Failed to seed tool {tool.name}: {str(e)}")
        db_session.commit()
        logger.notice(f"Successfully seeded {len(tools)} Custom Tools")


def _seed_llms(
    db_session: Session, llm_upsert_requests: list[LLMProviderUpsertRequest]
) -> None:
    if not llm_upsert_requests:
        return

    logger.notice("Seeding LLMs")
    for request in llm_upsert_requests:
        existing = fetch_existing_llm_provider(name=request.name, db_session=db_session)
        if existing:
            request.id = existing.id
    seeded_providers: list[LLMProviderView] = []
    for llm_upsert_request in llm_upsert_requests:
        try:
            seeded_providers.append(upsert_llm_provider(llm_upsert_request, db_session))
        except ValueError as e:
            logger.warning(
                "Failed to upsert LLM provider '%s' during seeding: %s",
                llm_upsert_request.name,
                e,
            )

    default_provider = next(
        (p for p in seeded_providers if p.model_configurations), None
    )
    if not default_provider:
        return

    visible_configs = [
        mc for mc in default_provider.model_configurations if mc.is_visible
    ]
    default_config = (
        visible_configs[0]
        if visible_configs
        else default_provider.model_configurations[0]
    )
    update_default_provider(
        provider_id=default_provider.id,
        model_name=default_config.name,
        db_session=db_session,
    )


def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) -> None:
    if personas:
        logger.notice("Seeding Personas")
        try:
            for persona in personas:
                upsert_persona(
                    user=None,  # Seeding is done as admin
                    name=persona.name,
                    description=persona.description,
                    document_set_ids=persona.document_set_ids,
                    llm_model_provider_override=persona.llm_model_provider_override,
                    llm_model_version_override=persona.llm_model_version_override,
                    starter_messages=persona.starter_messages,
                    is_public=persona.is_public,
                    db_session=db_session,
                    tool_ids=persona.tool_ids,
                    display_priority=persona.display_priority,
                    system_prompt=persona.system_prompt,
                    task_prompt=persona.task_prompt,
                    datetime_aware=persona.datetime_aware,
                    is_featured=persona.is_featured,
                    commit=False,
                )
            db_session.commit()
        except Exception:
            logger.exception("Failed to seed personas.")
            raise


def _seed_settings(settings: Settings) -> None:
    logger.notice("Seeding Settings")
    try:
        store_base_settings(settings)
        logger.notice("Successfully seeded Settings")
    except ValueError as e:
        logger.error(f"Failed to seed Settings: {str(e)}")


def _seed_enterprise_settings(seed_config: SeedConfiguration) -> None:
    if (
        seed_config.enterprise_settings is not None
        or seed_config.nav_item_overrides is not None
    ):
        final_enterprise_settings = (
            deepcopy(seed_config.enterprise_settings)
            if seed_config.enterprise_settings
            else EnterpriseSettings()
        )

        final_nav_items = final_enterprise_settings.custom_nav_items
        if seed_config.nav_item_overrides is not None:
            final_nav_items = []
            for item in seed_config.nav_item_overrides:
                with open(item.svg_path, "r") as file:
                    svg_content = file.read().strip()

                final_nav_items.append(
                    NavigationItem(
                        link=item.link,
                        title=item.title,
                        svg_logo=svg_content,
                    )
                )

        final_enterprise_settings.custom_nav_items = final_nav_items

        logger.notice("Seeding enterprise settings")
        store_ee_settings(final_enterprise_settings)


def _seed_logo(logo_path: str | None) -> None:
    if logo_path:
        logger.notice("Uploading logo")
        upload_logo(file=logo_path)


def _seed_analytics_script(seed_config: SeedConfiguration) -> None:
    custom_analytics_secret_key = os.environ.get("CUSTOM_ANALYTICS_SECRET_KEY")
    if seed_config.analytics_script_path and custom_analytics_secret_key:
        logger.notice("Seeding analytics script")
        try:
            with open(seed_config.analytics_script_path, "r") as file:
                script_content = file.read()
            analytics_script = AnalyticsScriptUpload(
                script=script_content, secret_key=custom_analytics_secret_key
            )
            store_analytics_script(analytics_script)
        except FileNotFoundError:
            logger.error(
                f"Analytics script file not found: {seed_config.analytics_script_path}"
            )
        except ValueError as e:
            logger.error(f"Failed to seed analytics script: {str(e)}")


def get_seed_config() -> SeedConfiguration | None:
    return _parse_env()


def seed_db() -> None:
    seed_config = _parse_env()
    if seed_config is None:
        logger.debug("No seeding configuration file passed")
        return

    with get_session_with_current_tenant() as db_session:
        if seed_config.llms is not None:
            _seed_llms(db_session, seed_config.llms)
        if seed_config.personas is not None:
            _seed_personas(db_session, seed_config.personas)
        if seed_config.settings is not None:
            _seed_settings(seed_config.settings)
        if seed_config.custom_tools is not None:
            _seed_custom_tools(db_session, seed_config.custom_tools)

        _seed_logo(seed_config.seeded_logo_path)
        _seed_enterprise_settings(seed_config)
        _seed_analytics_script(seed_config)

        logger.notice("Verifying default standard answer category exists.")
        create_initial_default_standard_answer_category(db_session)


================================================
FILE: backend/ee/onyx/server/settings/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/settings/api.py
================================================
"""EE Settings API - provides license-aware settings override."""

from redis.exceptions import RedisError
from sqlalchemy.exc import SQLAlchemyError

from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.db.license import get_cached_license_metadata
from ee.onyx.db.license import refresh_license_cache
from onyx.cache.interface import CACHE_TRANSIENT_ERRORS
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.server.settings.models import ApplicationStatus
from onyx.server.settings.models import Settings
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

# Only GATED_ACCESS actually blocks access - other statuses are for notifications
_BLOCKING_STATUS = ApplicationStatus.GATED_ACCESS


def check_ee_features_enabled() -> bool:
    """EE version: checks if EE features should be available.

    Returns True if:
    - LICENSE_ENFORCEMENT_ENABLED is False (legacy/rollout mode)
    - Cloud mode (MULTI_TENANT) - cloud handles its own gating
    - Self-hosted with a valid (non-expired) license

    Returns False if:
    - Self-hosted with no license (never subscribed)
    - Self-hosted with expired license
    """
    if not LICENSE_ENFORCEMENT_ENABLED:
        # License enforcement disabled - allow EE features (legacy behavior)
        return True

    if MULTI_TENANT:
        # Cloud mode - EE features always available (gating handled by is_tenant_gated)
        return True

    # Self-hosted with enforcement - check for valid license
    tenant_id = get_current_tenant_id()
    try:
        metadata = get_cached_license_metadata(tenant_id)
        if not metadata:
            # Cache miss — warm from DB so cold-start doesn't block EE features
            try:
                with get_session_with_current_tenant() as db_session:
                    metadata = refresh_license_cache(db_session, tenant_id)
            except SQLAlchemyError as db_error:
                logger.warning(f"Failed to load license from DB: {db_error}")

        if metadata and metadata.status != _BLOCKING_STATUS:
            # Has a valid license (GRACE_PERIOD/PAYMENT_REMINDER still allow EE features)
            return True
    except RedisError as e:
        logger.warning(f"Failed to check license for EE features: {e}")
        # Fail closed - if Redis is down, other things will break anyway
        return False

    # No license or GATED_ACCESS - no EE features
    return False


def apply_license_status_to_settings(settings: Settings) -> Settings:
    """EE version: checks license status for self-hosted deployments.

    For self-hosted, looks up license metadata and overrides application_status
    if the license indicates GATED_ACCESS (fully expired).

    Also sets ee_features_enabled based on license status to control
    visibility of EE features in the UI.

    For multi-tenant (cloud), the settings already have the correct status
    from the control plane, so no override is needed.

    If LICENSE_ENFORCEMENT_ENABLED is false, ee_features_enabled is set to True
    (since EE code was loaded via ENABLE_PAID_ENTERPRISE_EDITION_FEATURES).
    """
    if not LICENSE_ENFORCEMENT_ENABLED:
        # License enforcement disabled - EE code is loaded via
        # ENABLE_PAID_ENTERPRISE_EDITION_FEATURES, so EE features are on
        settings.ee_features_enabled = True
        return settings

    if MULTI_TENANT:
        # Cloud mode - EE features always available (gating handled by is_tenant_gated)
        settings.ee_features_enabled = True
        return settings

    tenant_id = get_current_tenant_id()
    try:
        metadata = get_cached_license_metadata(tenant_id)
        if not metadata:
            # Cache miss (e.g. after TTL expiry). Fall back to DB so
            # the /settings request doesn't falsely return GATED_ACCESS
            # while the cache is cold.
            try:
                with get_session_with_current_tenant() as db_session:
                    metadata = refresh_license_cache(db_session, tenant_id)
            except SQLAlchemyError as db_error:
                logger.warning(
                    f"Failed to load license from DB for settings: {db_error}"
                )

        if metadata:
            if metadata.status == _BLOCKING_STATUS:
                settings.application_status = metadata.status
                settings.ee_features_enabled = False
            elif metadata.used_seats > metadata.seats:
                # License is valid but seat limit exceeded
                settings.application_status = ApplicationStatus.SEAT_LIMIT_EXCEEDED
                settings.seat_count = metadata.seats
                settings.used_seats = metadata.used_seats
                settings.ee_features_enabled = True
            else:
                # Has a valid license (GRACE_PERIOD/PAYMENT_REMINDER still allow EE features)
                settings.ee_features_enabled = True
        else:
            # No license found in cache or DB.
            if ENTERPRISE_EDITION_ENABLED:
                # Legacy EE flag is set → prior EE usage (e.g. permission
                # syncing) means indexed data may need protection.
                settings.application_status = _BLOCKING_STATUS
            settings.ee_features_enabled = False
    except CACHE_TRANSIENT_ERRORS as e:
        logger.warning(f"Failed to check license metadata for settings: {e}")
        # Fail closed - disable EE features if we can't verify license
        settings.ee_features_enabled = False

    return settings


================================================
FILE: backend/ee/onyx/server/tenant_usage_limits.py
================================================
"""Tenant-specific usage limit overrides from the control plane (EE version)."""

import time

import requests

from ee.onyx.server.tenants.access import generate_data_plane_token
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.configs.app_configs import DEV_MODE
from onyx.server.tenant_usage_limits import TenantUsageLimitOverrides
from onyx.server.usage_limits import NO_LIMIT
from onyx.utils.logger import setup_logger

logger = setup_logger()


# In-memory storage for tenant overrides (populated at startup)
_tenant_usage_limit_overrides: dict[str, TenantUsageLimitOverrides] | None = None
_last_fetch_time: float = 0.0
_FETCH_INTERVAL = 60 * 60 * 24  # 24 hours
_ERROR_FETCH_INTERVAL = 30 * 60  # 30 minutes (if the last fetch failed)


def fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides] | None:
    """
    Fetch tenant-specific usage limit overrides from the control plane.

    Returns:
        Dictionary mapping tenant_id to their specific limit overrides.
        Returns empty dict on any error (falls back to defaults).
    """
    try:
        token = generate_data_plane_token()
        headers = {
            "Authorization": f"Bearer {token}",
            "Content-Type": "application/json",
        }
        url = f"{CONTROL_PLANE_API_BASE_URL}/usage-limit-overrides"
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()

        tenant_overrides = response.json()

        # Parse each tenant's overrides
        result: dict[str, TenantUsageLimitOverrides] = {}
        for override_data in tenant_overrides:
            tenant_id = override_data["tenant_id"]
            try:
                result[tenant_id] = TenantUsageLimitOverrides(**override_data)
            except Exception as e:
                logger.warning(
                    f"Failed to parse usage limit overrides for tenant {tenant_id}: {e}"
                )

        return (
            result or None
        )  # if empty dictionary, something went wrong and we shouldn't enforce limits

    except requests.exceptions.RequestException as e:
        logger.warning(f"Failed to fetch usage limit overrides from control plane: {e}")
        return None
    except Exception as e:
        logger.error(f"Error parsing usage limit overrides: {e}")
        return None


def load_usage_limit_overrides() -> None:
    """
    Load tenant usage limit overrides from the control plane.
    """
    global _tenant_usage_limit_overrides
    global _last_fetch_time

    logger.info("Loading tenant usage limit overrides from control plane...")
    overrides = fetch_usage_limit_overrides()

    _last_fetch_time = time.time()

    # use the new result if it exists, otherwise use the old result
    # (prevents us from updating to a failed fetch result)
    _tenant_usage_limit_overrides = overrides or _tenant_usage_limit_overrides

    if overrides:
        logger.info(f"Loaded usage limit overrides for {len(overrides)} tenants")
    else:
        logger.info("No tenant-specific usage limit overrides found")


def unlimited(tenant_id: str) -> TenantUsageLimitOverrides:
    return TenantUsageLimitOverrides(
        tenant_id=tenant_id,
        llm_cost_cents_trial=NO_LIMIT,
        llm_cost_cents_paid=NO_LIMIT,
        chunks_indexed_trial=NO_LIMIT,
        chunks_indexed_paid=NO_LIMIT,
        api_calls_trial=NO_LIMIT,
        api_calls_paid=NO_LIMIT,
        non_streaming_calls_trial=NO_LIMIT,
        non_streaming_calls_paid=NO_LIMIT,
    )


def get_tenant_usage_limit_overrides(
    tenant_id: str,
) -> TenantUsageLimitOverrides | None:
    """
    Get the usage limit overrides for a specific tenant.

    Args:
        tenant_id: The tenant ID to look up

    Returns:
        TenantUsageLimitOverrides if the tenant has overrides, None otherwise.
    """

    if DEV_MODE:  # in dev mode, we return unlimited limits for all tenants
        return unlimited(tenant_id)

    global _tenant_usage_limit_overrides
    time_since = time.time() - _last_fetch_time
    if (
        _tenant_usage_limit_overrides is None and time_since > _ERROR_FETCH_INTERVAL
    ) or (time_since > _FETCH_INTERVAL):
        logger.debug(
            f"Last fetch time: {_last_fetch_time}, time since last fetch: {time_since}"
        )

        load_usage_limit_overrides()

    # If we have failed to fetch from the control plane or we're in dev mode, don't usage limit anyone.
    if _tenant_usage_limit_overrides is None or DEV_MODE:
        return unlimited(tenant_id)
    return _tenant_usage_limit_overrides.get(tenant_id)


================================================
FILE: backend/ee/onyx/server/tenants/__init__.py
================================================


================================================
FILE: backend/ee/onyx/server/tenants/access.py
================================================
from datetime import datetime
from datetime import timedelta

import jwt
from fastapi import HTTPException
from fastapi import Request

from onyx.configs.app_configs import DATA_PLANE_SECRET
from onyx.configs.app_configs import EXPECTED_API_KEY
from onyx.configs.app_configs import JWT_ALGORITHM
from onyx.utils.logger import setup_logger

logger = setup_logger()


def generate_data_plane_token() -> str:
    if DATA_PLANE_SECRET is None:
        raise ValueError("DATA_PLANE_SECRET is not set")

    payload = {
        "iss": "data_plane",
        "exp": datetime.utcnow() + timedelta(minutes=5),
        "iat": datetime.utcnow(),
        "scope": "api_access",
    }

    token = jwt.encode(payload, DATA_PLANE_SECRET, algorithm=JWT_ALGORITHM)
    return token


async def control_plane_dep(request: Request) -> None:
    api_key = request.headers.get("X-API-KEY")
    if api_key != EXPECTED_API_KEY:
        logger.warning("Invalid API key")
        raise HTTPException(status_code=401, detail="Invalid API key")

    auth_header = request.headers.get("Authorization")
    if not auth_header or not auth_header.startswith("Bearer "):
        logger.warning("Invalid authorization header")
        raise HTTPException(status_code=401, detail="Invalid authorization header")

    token = auth_header.split(" ")[1]
    try:
        payload = jwt.decode(token, DATA_PLANE_SECRET, algorithms=[JWT_ALGORITHM])
        if payload.get("scope") != "tenant:create":
            logger.warning("Insufficient permissions")
            raise HTTPException(status_code=403, detail="Insufficient permissions")
    except jwt.ExpiredSignatureError:
        logger.warning("Token has expired")
        raise HTTPException(status_code=401, detail="Token has expired")
    except jwt.InvalidTokenError:
        logger.warning("Invalid token")
        raise HTTPException(status_code=401, detail="Invalid token")


================================================
FILE: backend/ee/onyx/server/tenants/admin_api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Response
from fastapi_users import exceptions

from ee.onyx.auth.users import current_cloud_superuser
from ee.onyx.server.tenants.models import ImpersonateRequest
from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
from onyx.auth.users import auth_backend
from onyx.auth.users import get_redis_strategy
from onyx.auth.users import User
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.users import get_user_by_email
from onyx.utils.logger import setup_logger

logger = setup_logger()

router = APIRouter(prefix="/tenants")


@router.post("/impersonate")
async def impersonate_user(
    impersonate_request: ImpersonateRequest,
    _: User = Depends(current_cloud_superuser),
) -> Response:
    """Allows a cloud superuser to impersonate another user by generating an impersonation JWT token"""
    try:
        tenant_id = get_tenant_id_for_email(impersonate_request.email)
    except exceptions.UserNotExists:
        detail = f"User has no tenant mapping: {impersonate_request.email=}"
        logger.warning(detail)
        raise HTTPException(status_code=422, detail=detail)

    with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:
        user_to_impersonate = get_user_by_email(
            impersonate_request.email, tenant_session
        )
        if user_to_impersonate is None:
            detail = (
                f"User not found in tenant: {impersonate_request.email=} {tenant_id=}"
            )
            logger.warning(detail)
            raise HTTPException(status_code=422, detail=detail)

        token = await get_redis_strategy().write_token(user_to_impersonate)

    response = await auth_backend.transport.get_login_response(token)
    response.set_cookie(
        key="fastapiusersauth",
        value=token,
        httponly=True,
        secure=True,
        samesite="lax",
    )
    return response


================================================
FILE: backend/ee/onyx/server/tenants/anonymous_user_path.py
================================================
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.models import TenantAnonymousUserPath


def get_anonymous_user_path(tenant_id: str, db_session: Session) -> str | None:
    result = db_session.execute(
        select(TenantAnonymousUserPath).where(
            TenantAnonymousUserPath.tenant_id == tenant_id
        )
    )
    result_scalar = result.scalar_one_or_none()
    if result_scalar:
        return result_scalar.anonymous_user_path
    else:
        return None


def modify_anonymous_user_path(
    tenant_id: str, anonymous_user_path: str, db_session: Session
) -> None:
    # Enforce lowercase path at DB operation level
    anonymous_user_path = anonymous_user_path.lower()

    existing_entry = (
        db_session.query(TenantAnonymousUserPath).filter_by(tenant_id=tenant_id).first()
    )

    if existing_entry:
        existing_entry.anonymous_user_path = anonymous_user_path

    else:
        new_entry = TenantAnonymousUserPath(
            tenant_id=tenant_id, anonymous_user_path=anonymous_user_path
        )
        db_session.add(new_entry)

    db_session.commit()


def get_tenant_id_for_anonymous_user_path(
    anonymous_user_path: str, db_session: Session
) -> str | None:
    result = db_session.execute(
        select(TenantAnonymousUserPath).where(
            TenantAnonymousUserPath.anonymous_user_path == anonymous_user_path
        )
    )
    result_scalar = result.scalar_one_or_none()
    if result_scalar:
        return result_scalar.tenant_id
    else:
        return None


def validate_anonymous_user_path(path: str) -> None:
    if not path or "/" in path or not path.replace("-", "").isalnum():
        raise ValueError("Invalid path. Use only letters, numbers, and hyphens.")


================================================
FILE: backend/ee/onyx/server/tenants/anonymous_users_api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Response
from sqlalchemy.exc import IntegrityError

from ee.onyx.auth.users import generate_anonymous_user_jwt_token
from ee.onyx.server.tenants.anonymous_user_path import get_anonymous_user_path
from ee.onyx.server.tenants.anonymous_user_path import (
    get_tenant_id_for_anonymous_user_path,
)
from ee.onyx.server.tenants.anonymous_user_path import modify_anonymous_user_path
from ee.onyx.server.tenants.anonymous_user_path import validate_anonymous_user_path
from ee.onyx.server.tenants.models import AnonymousUserPath
from onyx.auth.users import anonymous_user_enabled
from onyx.auth.users import current_admin_user
from onyx.auth.users import User
from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
from onyx.db.engine.sql_engine import get_session_with_shared_schema
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/tenants")


@router.get("/anonymous-user-path")
async def get_anonymous_user_path_api(
    _: User = Depends(current_admin_user),
) -> AnonymousUserPath:
    tenant_id = get_current_tenant_id()

    if tenant_id is None:
        raise HTTPException(status_code=404, detail="Tenant not found")

    with get_session_with_shared_schema() as db_session:
        current_path = get_anonymous_user_path(tenant_id, db_session)

    return AnonymousUserPath(anonymous_user_path=current_path)


@router.post("/anonymous-user-path")
async def set_anonymous_user_path_api(
    anonymous_user_path: str,
    _: User = Depends(current_admin_user),
) -> None:
    tenant_id = get_current_tenant_id()
    try:
        validate_anonymous_user_path(anonymous_user_path)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    with get_session_with_shared_schema() as db_session:
        try:
            modify_anonymous_user_path(tenant_id, anonymous_user_path, db_session)
        except IntegrityError:
            raise HTTPException(
                status_code=409,
                detail="The anonymous user path is already in use. Please choose a different path.",
            )
        except Exception as e:
            logger.exception(f"Failed to modify anonymous user path: {str(e)}")
            raise HTTPException(
                status_code=500,
                detail="An unexpected error occurred while modifying the anonymous user path",
            )


@router.post("/anonymous-user")
async def login_as_anonymous_user(
    anonymous_user_path: str,
) -> Response:
    with get_session_with_shared_schema() as db_session:
        tenant_id = get_tenant_id_for_anonymous_user_path(
            anonymous_user_path, db_session
        )
        if not tenant_id:
            raise HTTPException(status_code=404, detail="Tenant not found")

    if not anonymous_user_enabled(tenant_id=tenant_id):
        raise HTTPException(status_code=403, detail="Anonymous user is not enabled")

    token = generate_anonymous_user_jwt_token(tenant_id)

    response = Response()
    response.delete_cookie(FASTAPI_USERS_AUTH_COOKIE_NAME)
    response.set_cookie(
        key=ANONYMOUS_USER_COOKIE_NAME,
        value=token,
        httponly=True,
        secure=True,
        samesite="strict",
    )
    return response


================================================
FILE: backend/ee/onyx/server/tenants/api.py
================================================
from fastapi import APIRouter

from ee.onyx.server.tenants.admin_api import router as admin_router
from ee.onyx.server.tenants.anonymous_users_api import router as anonymous_users_router
from ee.onyx.server.tenants.billing_api import router as billing_router
from ee.onyx.server.tenants.proxy import router as proxy_router
from ee.onyx.server.tenants.team_membership_api import router as team_membership_router
from ee.onyx.server.tenants.tenant_management_api import (
    router as tenant_management_router,
)
from ee.onyx.server.tenants.user_invitations_api import (
    router as user_invitations_router,
)

# Create a main router to include all sub-routers
# Note: We don't add a prefix here as each router already has the /tenants prefix
router = APIRouter()

# Include all the individual routers
router.include_router(admin_router)
router.include_router(anonymous_users_router)
router.include_router(billing_router)
router.include_router(team_membership_router)
router.include_router(tenant_management_router)
router.include_router(user_invitations_router)
router.include_router(proxy_router)


================================================
FILE: backend/ee/onyx/server/tenants/billing.py
================================================
from typing import cast
from typing import Literal

import requests
import stripe

from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
from ee.onyx.server.tenants.access import generate_data_plane_token
from ee.onyx.server.tenants.models import BillingInformation
from ee.onyx.server.tenants.models import SubscriptionStatusResponse
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.utils.logger import setup_logger

stripe.api_key = STRIPE_SECRET_KEY

logger = setup_logger()


def fetch_stripe_checkout_session(
    tenant_id: str,
    billing_period: Literal["monthly", "annual"] = "monthly",
    seats: int | None = None,
) -> str:
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    url = f"{CONTROL_PLANE_API_BASE_URL}/create-checkout-session"
    payload = {
        "tenant_id": tenant_id,
        "billing_period": billing_period,
        "seats": seats,
    }
    response = requests.post(url, headers=headers, json=payload)
    if not response.ok:
        try:
            data = response.json()
            error_msg = (
                data.get("error")
                or f"Request failed with status {response.status_code}"
            )
        except (ValueError, requests.exceptions.JSONDecodeError):
            error_msg = f"Request failed with status {response.status_code}: {response.text[:200]}"
        raise Exception(error_msg)
    data = response.json()
    if data.get("error"):
        raise Exception(data["error"])
    return data["sessionId"]


def fetch_tenant_stripe_information(tenant_id: str) -> dict:
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    url = f"{CONTROL_PLANE_API_BASE_URL}/tenant-stripe-information"
    params = {"tenant_id": tenant_id}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()


def fetch_billing_information(
    tenant_id: str,
) -> BillingInformation | SubscriptionStatusResponse:
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    url = f"{CONTROL_PLANE_API_BASE_URL}/billing-information"
    params = {"tenant_id": tenant_id}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()

    response_data = response.json()

    # Check if the response indicates no subscription
    if (
        isinstance(response_data, dict)
        and "subscribed" in response_data
        and not response_data["subscribed"]
    ):
        return SubscriptionStatusResponse(**response_data)

    # Otherwise, parse as BillingInformation
    return BillingInformation(**response_data)


def fetch_customer_portal_session(tenant_id: str, return_url: str | None = None) -> str:
    """
    Fetch a Stripe customer portal session URL from the control plane.
    NOTE: This is currently only used for multi-tenant (cloud) deployments.
    Self-hosted proxy endpoints will be added in a future phase.
    """
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    url = f"{CONTROL_PLANE_API_BASE_URL}/create-customer-portal-session"
    payload = {"tenant_id": tenant_id}
    if return_url:
        payload["return_url"] = return_url
    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    return response.json()["url"]


def register_tenant_users(tenant_id: str, number_of_users: int) -> stripe.Subscription:
    """
    Update the number of seats for a tenant's subscription.
    Preserves the existing price (monthly, annual, or grandfathered).
    """
    response = fetch_tenant_stripe_information(tenant_id)
    stripe_subscription_id = cast(str, response.get("stripe_subscription_id"))

    subscription = stripe.Subscription.retrieve(stripe_subscription_id)
    subscription_item = subscription["items"]["data"][0]

    # Use existing price to preserve the customer's current plan
    current_price_id = subscription_item.price.id

    updated_subscription = stripe.Subscription.modify(
        stripe_subscription_id,
        items=[
            {
                "id": subscription_item.id,
                "price": current_price_id,
                "quantity": number_of_users,
            }
        ],
        metadata={"tenant_id": str(tenant_id)},
    )
    return updated_subscription


================================================
FILE: backend/ee/onyx/server/tenants/billing_api.py
================================================
"""Billing API endpoints for cloud multi-tenant deployments.

DEPRECATED: These /tenants/* billing endpoints are being replaced by /admin/billing/*
which provides a unified API for both self-hosted and cloud deployments.

TODO(ENG-3533): Migrate frontend to use /admin/billing/* endpoints and remove this file.
https://linear.app/onyx-app/issue/ENG-3533/migrate-tenantsbilling-adminbilling

Current endpoints to migrate:
- GET  /tenants/billing-information     -> GET  /admin/billing/information
- POST /tenants/create-customer-portal-session -> POST /admin/billing/portal-session
- POST /tenants/create-subscription-session    -> POST /admin/billing/checkout-session
- GET  /tenants/stripe-publishable-key  -> (keep as-is, shared endpoint)

Note: /tenants/product-gating/* endpoints are control-plane-to-data-plane calls
and are NOT part of this migration - they stay here.
"""

import asyncio

import httpx
from fastapi import APIRouter
from fastapi import Depends

from ee.onyx.auth.users import current_admin_user
from ee.onyx.server.tenants.access import control_plane_dep
from ee.onyx.server.tenants.billing import fetch_billing_information
from ee.onyx.server.tenants.billing import fetch_customer_portal_session
from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
from ee.onyx.server.tenants.models import BillingInformation
from ee.onyx.server.tenants.models import CreateCheckoutSessionRequest
from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
from ee.onyx.server.tenants.models import ProductGatingRequest
from ee.onyx.server.tenants.models import ProductGatingResponse
from ee.onyx.server.tenants.models import StripePublishableKeyResponse
from ee.onyx.server.tenants.models import SubscriptionSessionResponse
from ee.onyx.server.tenants.models import SubscriptionStatusResponse
from ee.onyx.server.tenants.product_gating import overwrite_full_gated_set
from ee.onyx.server.tenants.product_gating import store_product_gating
from onyx.auth.users import User
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/tenants")

# Cache for Stripe publishable key to avoid hitting S3 on every request
_stripe_publishable_key_cache: str | None = None
_stripe_key_lock = asyncio.Lock()


@router.post("/product-gating")
def gate_product(
    product_gating_request: ProductGatingRequest, _: None = Depends(control_plane_dep)
) -> ProductGatingResponse:
    """
    Gating the product means that the product is not available to the tenant.
    They will be directed to the billing page.
    We gate the product when their subscription has ended.
    """
    try:
        store_product_gating(
            product_gating_request.tenant_id, product_gating_request.application_status
        )
        return ProductGatingResponse(updated=True, error=None)

    except Exception as e:
        logger.exception("Failed to gate product")
        return ProductGatingResponse(updated=False, error=str(e))


@router.post("/product-gating/full-sync")
def gate_product_full_sync(
    product_gating_request: ProductGatingFullSyncRequest,
    _: None = Depends(control_plane_dep),
) -> ProductGatingResponse:
    """
    Bulk operation to overwrite the entire gated tenant set.
    This replaces all currently gated tenants with the provided list.
    Gated tenants are not available to access the product and will be
    directed to the billing page when their subscription has ended.
    """
    try:
        overwrite_full_gated_set(product_gating_request.gated_tenant_ids)
        return ProductGatingResponse(updated=True, error=None)

    except Exception as e:
        logger.exception("Failed to gate products during full sync")
        return ProductGatingResponse(updated=False, error=str(e))


@router.get("/billing-information")
async def billing_information(
    _: User = Depends(current_admin_user),
) -> BillingInformation | SubscriptionStatusResponse:
    logger.info("Fetching billing information")
    tenant_id = get_current_tenant_id()
    return fetch_billing_information(tenant_id)


@router.post("/create-customer-portal-session")
async def create_customer_portal_session(
    _: User = Depends(current_admin_user),
) -> dict:
    """Create a Stripe customer portal session via the control plane."""
    tenant_id = get_current_tenant_id()
    return_url = f"{WEB_DOMAIN}/admin/billing"

    try:
        portal_url = fetch_customer_portal_session(tenant_id, return_url)
        return {"stripe_customer_portal_url": portal_url}
    except OnyxError:
        raise
    except Exception:
        logger.exception("Failed to create customer portal session")
        raise OnyxError(
            OnyxErrorCode.INTERNAL_ERROR,
            "Failed to create customer portal session",
        )


@router.post("/create-checkout-session")
async def create_checkout_session(
    request: CreateCheckoutSessionRequest | None = None,
    _: User = Depends(current_admin_user),
) -> dict:
    """Create a Stripe checkout session via the control plane."""
    tenant_id = get_current_tenant_id()
    billing_period = request.billing_period if request else "monthly"
    seats = request.seats if request else None

    try:
        checkout_url = fetch_stripe_checkout_session(tenant_id, billing_period, seats)
        return {"stripe_checkout_url": checkout_url}
    except OnyxError:
        raise
    except Exception:
        logger.exception("Failed to create checkout session")
        raise OnyxError(
            OnyxErrorCode.INTERNAL_ERROR,
            "Failed to create checkout session",
        )


@router.post("/create-subscription-session")
async def create_subscription_session(
    request: CreateSubscriptionSessionRequest | None = None,
    _: User = Depends(current_admin_user),
) -> SubscriptionSessionResponse:
    try:
        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
        if not tenant_id:
            raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Tenant ID not found")

        billing_period = request.billing_period if request else "monthly"
        session_id = fetch_stripe_checkout_session(tenant_id, billing_period)
        return SubscriptionSessionResponse(sessionId=session_id)

    except OnyxError:
        raise
    except Exception:
        logger.exception("Failed to create subscription session")
        raise OnyxError(
            OnyxErrorCode.INTERNAL_ERROR,
            "Failed to create subscription session",
        )


@router.get("/stripe-publishable-key")
async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
    """
    Fetch the Stripe publishable key.
    Priority: env var override (for testing) > S3 bucket (production).
    This endpoint is public (no auth required) since publishable keys are safe to expose.
    The key is cached in memory to avoid hitting S3 on every request.
    """
    global _stripe_publishable_key_cache

    # Fast path: return cached value without lock
    if _stripe_publishable_key_cache:
        return StripePublishableKeyResponse(
            publishable_key=_stripe_publishable_key_cache
        )

    # Use lock to prevent concurrent S3 requests
    async with _stripe_key_lock:
        # Double-check after acquiring lock (another request may have populated cache)
        if _stripe_publishable_key_cache:
            return StripePublishableKeyResponse(
                publishable_key=_stripe_publishable_key_cache
            )

        # Check for env var override first (for local testing with pk_test_* keys)
        if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
            key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
            if not key.startswith("pk_"):
                raise OnyxError(
                    OnyxErrorCode.INTERNAL_ERROR,
                    "Invalid Stripe publishable key format",
                )
            _stripe_publishable_key_cache = key
            return StripePublishableKeyResponse(publishable_key=key)

        # Fall back to S3 bucket
        if not STRIPE_PUBLISHABLE_KEY_URL:
            raise OnyxError(
                OnyxErrorCode.INTERNAL_ERROR,
                "Stripe publishable key is not configured",
            )

        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)
                response.raise_for_status()
                key = response.text.strip()

                # Validate key format
                if not key.startswith("pk_"):
                    raise OnyxError(
                        OnyxErrorCode.INTERNAL_ERROR,
                        "Invalid Stripe publishable key format",
                    )

                _stripe_publishable_key_cache = key
                return StripePublishableKeyResponse(publishable_key=key)
        except httpx.HTTPError:
            raise OnyxError(
                OnyxErrorCode.INTERNAL_ERROR,
                "Failed to fetch Stripe publishable key",
            )


================================================
FILE: backend/ee/onyx/server/tenants/models.py
================================================
from datetime import datetime
from typing import Literal

from pydantic import BaseModel

from onyx.server.settings.models import ApplicationStatus


class CheckoutSessionCreationRequest(BaseModel):
    quantity: int


class CreateTenantRequest(BaseModel):
    tenant_id: str
    initial_admin_email: str


class ProductGatingRequest(BaseModel):
    tenant_id: str
    application_status: ApplicationStatus


class ProductGatingFullSyncRequest(BaseModel):
    gated_tenant_ids: list[str]


class SubscriptionStatusResponse(BaseModel):
    subscribed: bool


class BillingInformation(BaseModel):
    stripe_subscription_id: str
    status: str
    current_period_start: datetime
    current_period_end: datetime
    number_of_seats: int
    cancel_at_period_end: bool
    canceled_at: datetime | None
    trial_start: datetime | None
    trial_end: datetime | None
    seats: int
    payment_method_enabled: bool


class CreateCheckoutSessionRequest(BaseModel):
    billing_period: Literal["monthly", "annual"] = "monthly"
    seats: int | None = None
    email: str | None = None


class CheckoutSessionCreationResponse(BaseModel):
    id: str


class ImpersonateRequest(BaseModel):
    email: str


class TenantCreationPayload(BaseModel):
    tenant_id: str
    email: str
    referral_source: str | None = None


class TenantDeletionPayload(BaseModel):
    tenant_id: str
    email: str


class AnonymousUserPath(BaseModel):
    anonymous_user_path: str | None


class ProductGatingResponse(BaseModel):
    updated: bool
    error: str | None


class SubscriptionSessionResponse(BaseModel):
    sessionId: str


class CreateSubscriptionSessionRequest(BaseModel):
    """Request to create a subscription checkout session."""

    billing_period: Literal["monthly", "annual"] = "monthly"


class TenantByDomainResponse(BaseModel):
    tenant_id: str
    number_of_users: int
    creator_email: str


class TenantByDomainRequest(BaseModel):
    email: str


class RequestInviteRequest(BaseModel):
    tenant_id: str


class RequestInviteResponse(BaseModel):
    success: bool
    message: str


class PendingUserSnapshot(BaseModel):
    email: str


class ApproveUserRequest(BaseModel):
    email: str


class StripePublishableKeyResponse(BaseModel):
    publishable_key: str


================================================
FILE: backend/ee/onyx/server/tenants/product_gating.py
================================================
from typing import cast

from ee.onyx.configs.app_configs import GATED_TENANTS_KEY
from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.server.settings.models import ApplicationStatus
from onyx.server.settings.store import load_settings
from onyx.server.settings.store import store_settings
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()


def update_tenant_gating(tenant_id: str, status: ApplicationStatus) -> None:
    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)

    # Maintain the GATED_ACCESS set
    if status == ApplicationStatus.GATED_ACCESS:
        redis_client.sadd(GATED_TENANTS_KEY, tenant_id)
    else:
        redis_client.srem(GATED_TENANTS_KEY, tenant_id)


def store_product_gating(tenant_id: str, application_status: ApplicationStatus) -> None:
    try:
        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

        settings = load_settings()
        settings.application_status = application_status
        store_settings(settings)

        # Store gated tenant information in Redis
        update_tenant_gating(tenant_id, application_status)

        if token is not None:
            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

    except Exception:
        logger.exception("Failed to gate product")
        raise


def overwrite_full_gated_set(tenant_ids: list[str]) -> None:
    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)

    pipeline = redis_client.pipeline()

    # using pipeline doesn't automatically add the tenant_id prefix
    full_gated_set_key = f"{ONYX_CLOUD_TENANT_ID}:{GATED_TENANTS_KEY}"

    # Clear the existing set
    pipeline.delete(full_gated_set_key)

    # Add all tenant IDs to the set and set their status
    for tenant_id in tenant_ids:
        pipeline.sadd(full_gated_set_key, tenant_id)

    # Execute all commands at once
    pipeline.execute()


def get_gated_tenants() -> set[str]:
    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
    gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
    return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}


def is_tenant_gated(tenant_id: str) -> bool:
    """Fast O(1) check if tenant is in gated set (multi-tenant only)."""
    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
    return bool(redis_client.sismember(GATED_TENANTS_KEY, tenant_id))


================================================
FILE: backend/ee/onyx/server/tenants/provisioning.py
================================================
import asyncio
import uuid

import aiohttp  # Async HTTP client
import httpx
import requests
from fastapi import HTTPException
from fastapi import Request
from sqlalchemy import select
from sqlalchemy.orm import Session

from ee.onyx.configs.app_configs import HUBSPOT_TRACKING_URL
from ee.onyx.server.tenants.access import generate_data_plane_token
from ee.onyx.server.tenants.models import TenantByDomainResponse
from ee.onyx.server.tenants.models import TenantCreationPayload
from ee.onyx.server.tenants.models import TenantDeletionPayload
from ee.onyx.server.tenants.schema_management import create_schema_if_not_exists
from ee.onyx.server.tenants.schema_management import drop_schema
from ee.onyx.server.tenants.schema_management import run_alembic_migrations
from ee.onyx.server.tenants.user_mapping import add_users_to_tenant
from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
from ee.onyx.server.tenants.user_mapping import user_owns_a_tenant
from onyx.auth.users import exceptions
from onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY
from onyx.configs.app_configs import COHERE_DEFAULT_API_KEY
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.configs.app_configs import DEV_MODE
from onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
from onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY
from onyx.configs.app_configs import VERTEXAI_DEFAULT_CREDENTIALS
from onyx.configs.app_configs import VERTEXAI_DEFAULT_LOCATION
from onyx.db.engine.sql_engine import get_session_with_shared_schema
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.image_generation import create_default_image_gen_config_from_api_key
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import update_default_provider
from onyx.db.llm import upsert_cloud_embedding_provider
from onyx.db.llm import upsert_llm_provider
from onyx.db.models import AvailableTenant
from onyx.db.models import IndexModelStatus
from onyx.db.models import SearchSettings
from onyx.db.models import UserTenantMapping
from onyx.llm.well_known_providers.auto_update_models import LLMRecommendations
from onyx.llm.well_known_providers.constants import ANTHROPIC_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import OPENAI_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import OPENROUTER_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import VERTEX_CREDENTIALS_FILE_KWARG
from onyx.llm.well_known_providers.constants import VERTEX_LOCATION_KWARG
from onyx.llm.well_known_providers.constants import VERTEXAI_PROVIDER_NAME
from onyx.llm.well_known_providers.llm_provider_options import (
    get_recommendations,
)
from onyx.llm.well_known_providers.llm_provider_options import (
    model_configurations_for_provider,
)
from onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.setup import setup_onyx
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import TENANT_ID_PREFIX
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.enums import EmbeddingProvider


logger = setup_logger()


async def get_or_provision_tenant(
    email: str,
    referral_source: str | None = None,
    request: Request | None = None,
) -> str:
    """
    Get existing tenant ID for an email or create a new tenant if none exists.
    This function should only be called after we have verified we want this user's tenant to exist.
    It returns the tenant ID associated with the email, creating a new tenant if necessary.
    """
    # Early return for non-multi-tenant mode
    if not MULTI_TENANT:
        return POSTGRES_DEFAULT_SCHEMA

    if referral_source and request:
        await submit_to_hubspot(email, referral_source, request)

    # First, check if the user already has a tenant
    tenant_id: str | None = None
    try:
        tenant_id = get_tenant_id_for_email(email)
        return tenant_id
    except exceptions.UserNotExists:
        # User doesn't exist, so we need to create a new tenant or assign an existing one
        pass

    try:
        # Try to get a pre-provisioned tenant
        tenant_id = await get_available_tenant()

        if tenant_id:
            # Run migrations to ensure the pre-provisioned tenant schema is current.
            # Pool tenants may have been created before a new migration was deployed.
            # Capture as a non-optional local so mypy can type the lambda correctly.
            _tenant_id: str = tenant_id
            loop = asyncio.get_running_loop()
            try:
                await loop.run_in_executor(
                    None, lambda: run_alembic_migrations(_tenant_id)
                )
            except Exception:
                # The tenant was already dequeued from the pool — roll it back so
                # it doesn't end up orphaned (schema exists, but not assigned to anyone).
                logger.exception(
                    f"Migration failed for pre-provisioned tenant {_tenant_id}; rolling back"
                )
                try:
                    await rollback_tenant_provisioning(_tenant_id)
                except Exception:
                    logger.exception(f"Failed to rollback orphaned tenant {_tenant_id}")
                raise
            # If we have a pre-provisioned tenant, assign it to the user
            await assign_tenant_to_user(tenant_id, email, referral_source)
            logger.info(f"Assigned pre-provisioned tenant {tenant_id} to user {email}")
        else:
            # If no pre-provisioned tenant is available, create a new one on-demand
            tenant_id = await create_tenant(email, referral_source)

        # Notify control plane if we have created / assigned a new tenant
        if not DEV_MODE:
            await notify_control_plane(tenant_id, email, referral_source)

        return tenant_id

    except Exception as e:
        # If we've encountered an error, log and raise an exception
        error_msg = "Failed to provision tenant"
        logger.error(error_msg, exc_info=e)
        raise HTTPException(
            status_code=500,
            detail="Failed to provision tenant. Please try again later.",
        )


async def create_tenant(
    email: str,
    referral_source: str | None = None,  # noqa: ARG001
) -> str:
    """
    Create a new tenant on-demand when no pre-provisioned tenants are available.
    This is the fallback method when we can't use a pre-provisioned tenant.

    """
    tenant_id = TENANT_ID_PREFIX + str(uuid.uuid4())
    logger.info(f"Creating new tenant {tenant_id} for user {email}")

    try:
        # Provision tenant on data plane
        await provision_tenant(tenant_id, email)

    except Exception as e:
        logger.exception(f"Tenant provisioning failed: {str(e)}")
        # Attempt to rollback the tenant provisioning
        try:
            await rollback_tenant_provisioning(tenant_id)
        except Exception:
            logger.exception(f"Failed to rollback tenant provisioning for {tenant_id}")
        raise HTTPException(status_code=500, detail="Failed to provision tenant.")

    return tenant_id


async def provision_tenant(tenant_id: str, email: str) -> None:
    if not MULTI_TENANT:
        raise HTTPException(status_code=403, detail="Multi-tenancy is not enabled")

    if user_owns_a_tenant(email):
        raise HTTPException(
            status_code=409, detail="User already belongs to an organization"
        )

    logger.debug(f"Provisioning tenant {tenant_id} for user {email}")

    try:
        # Create the schema for the tenant
        if not create_schema_if_not_exists(tenant_id):
            logger.debug(f"Created schema for tenant {tenant_id}")
        else:
            logger.debug(f"Schema already exists for tenant {tenant_id}")

        # Set up the tenant with all necessary configurations
        await setup_tenant(tenant_id)

        # Assign the tenant to the user
        await assign_tenant_to_user(tenant_id, email)

    except Exception as e:
        logger.exception(f"Failed to create tenant {tenant_id}")
        raise HTTPException(
            status_code=500, detail=f"Failed to create tenant: {str(e)}"
        )


async def notify_control_plane(
    tenant_id: str, email: str, referral_source: str | None = None
) -> None:
    logger.info("Fetching billing information")
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    payload = TenantCreationPayload(
        tenant_id=tenant_id, email=email, referral_source=referral_source
    )

    async with aiohttp.ClientSession() as session:
        async with session.post(
            f"{CONTROL_PLANE_API_BASE_URL}/tenants/create",
            headers=headers,
            json=payload.model_dump(),
        ) as response:
            if response.status != 200:
                error_text = await response.text()
                logger.error(f"Control plane tenant creation failed: {error_text}")
                raise Exception(
                    f"Failed to create tenant on control plane: {error_text}"
                )


async def rollback_tenant_provisioning(tenant_id: str) -> None:
    """
    Logic to rollback tenant provisioning on data plane.
    Handles each step independently to ensure maximum cleanup even if some steps fail.
    """
    logger.info(f"Rolling back tenant provisioning for tenant_id: {tenant_id}")

    # Track if any part of the rollback fails
    rollback_errors = []

    # 1. Try to drop the tenant's schema
    try:
        drop_schema(tenant_id)
        logger.info(f"Successfully dropped schema for tenant {tenant_id}")
    except Exception as e:
        error_msg = f"Failed to drop schema for tenant {tenant_id}: {str(e)}"
        logger.error(error_msg)
        rollback_errors.append(error_msg)

    # 2. Try to remove tenant mapping
    try:
        with get_session_with_shared_schema() as db_session:
            db_session.begin()
            try:
                db_session.query(UserTenantMapping).filter(
                    UserTenantMapping.tenant_id == tenant_id
                ).delete()
                db_session.commit()
                logger.info(
                    f"Successfully removed user mappings for tenant {tenant_id}"
                )
            except Exception as e:
                db_session.rollback()
                raise e
    except Exception as e:
        error_msg = f"Failed to remove user mappings for tenant {tenant_id}: {str(e)}"
        logger.error(error_msg)
        rollback_errors.append(error_msg)

    # 3. If this tenant was in the available tenants table, remove it
    try:
        with get_session_with_shared_schema() as db_session:
            db_session.begin()
            try:
                available_tenant = (
                    db_session.query(AvailableTenant)
                    .filter(AvailableTenant.tenant_id == tenant_id)
                    .first()
                )

                if available_tenant:
                    db_session.delete(available_tenant)
                    db_session.commit()
                    logger.info(
                        f"Removed tenant {tenant_id} from available tenants table"
                    )
            except Exception as e:
                db_session.rollback()
                raise e
    except Exception as e:
        error_msg = f"Failed to remove tenant {tenant_id} from available tenants table: {str(e)}"
        logger.error(error_msg)
        rollback_errors.append(error_msg)

    # Log summary of rollback operation
    if rollback_errors:
        logger.error(f"Tenant rollback completed with {len(rollback_errors)} errors")
    else:
        logger.info(f"Tenant rollback completed successfully for tenant {tenant_id}")


def _build_model_configuration_upsert_requests(
    provider_name: str,
    recommendations: LLMRecommendations,
) -> list[ModelConfigurationUpsertRequest]:
    model_configurations = model_configurations_for_provider(
        provider_name, recommendations
    )
    return [
        ModelConfigurationUpsertRequest(
            name=model_configuration.name,
            is_visible=model_configuration.is_visible,
            max_input_tokens=model_configuration.max_input_tokens,
            supports_image_input=model_configuration.supports_image_input,
        )
        for model_configuration in model_configurations
    ]


def configure_default_api_keys(db_session: Session) -> None:
    """Configure default LLM providers using recommended-models.json for model selection."""
    # Load recommendations from JSON config
    recommendations = get_recommendations()

    has_set_default_provider = False

    def _upsert(request: LLMProviderUpsertRequest, default_model: str) -> None:
        nonlocal has_set_default_provider
        try:
            existing = fetch_existing_llm_provider(
                name=request.name, db_session=db_session
            )
            if existing:
                request.id = existing.id
            provider = upsert_llm_provider(request, db_session)
            if not has_set_default_provider:
                update_default_provider(provider.id, default_model, db_session)
                has_set_default_provider = True
        except Exception as e:
            logger.error(f"Failed to configure {request.provider} provider: {e}")

    # Configure OpenAI provider
    if OPENAI_DEFAULT_API_KEY:
        default_model = recommendations.get_default_model(OPENAI_PROVIDER_NAME)
        if default_model is None:
            logger.error(
                f"No default model found for {OPENAI_PROVIDER_NAME} in recommendations"
            )
        default_model_name = default_model.name if default_model else "gpt-5.2"

        openai_provider = LLMProviderUpsertRequest(
            name="OpenAI",
            provider=OPENAI_PROVIDER_NAME,
            api_key=OPENAI_DEFAULT_API_KEY,
            model_configurations=_build_model_configuration_upsert_requests(
                OPENAI_PROVIDER_NAME, recommendations
            ),
            api_key_changed=True,
            is_auto_mode=True,
        )
        _upsert(openai_provider, default_model_name)

        # Create default image generation config using the OpenAI API key
        try:
            create_default_image_gen_config_from_api_key(
                db_session, OPENAI_DEFAULT_API_KEY
            )
        except Exception as e:
            logger.error(f"Failed to create default image gen config: {e}")
    else:
        logger.info(
            "OPENAI_DEFAULT_API_KEY not set, skipping OpenAI provider configuration"
        )

    # Configure Anthropic provider
    if ANTHROPIC_DEFAULT_API_KEY:
        default_model = recommendations.get_default_model(ANTHROPIC_PROVIDER_NAME)
        if default_model is None:
            logger.error(
                f"No default model found for {ANTHROPIC_PROVIDER_NAME} in recommendations"
            )
        default_model_name = (
            default_model.name if default_model else "claude-sonnet-4-5"
        )

        anthropic_provider = LLMProviderUpsertRequest(
            name="Anthropic",
            provider=ANTHROPIC_PROVIDER_NAME,
            api_key=ANTHROPIC_DEFAULT_API_KEY,
            model_configurations=_build_model_configuration_upsert_requests(
                ANTHROPIC_PROVIDER_NAME, recommendations
            ),
            api_key_changed=True,
            is_auto_mode=True,
        )
        _upsert(anthropic_provider, default_model_name)
    else:
        logger.info(
            "ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
        )

    # Configure Vertex AI provider
    if VERTEXAI_DEFAULT_CREDENTIALS:
        default_model = recommendations.get_default_model(VERTEXAI_PROVIDER_NAME)
        if default_model is None:
            logger.error(
                f"No default model found for {VERTEXAI_PROVIDER_NAME} in recommendations"
            )
        default_model_name = default_model.name if default_model else "gemini-2.5-pro"

        # Vertex AI uses custom_config for credentials and location
        custom_config = {
            VERTEX_CREDENTIALS_FILE_KWARG: VERTEXAI_DEFAULT_CREDENTIALS,
            VERTEX_LOCATION_KWARG: VERTEXAI_DEFAULT_LOCATION,
        }

        vertexai_provider = LLMProviderUpsertRequest(
            name="Google Vertex AI",
            provider=VERTEXAI_PROVIDER_NAME,
            custom_config=custom_config,
            model_configurations=_build_model_configuration_upsert_requests(
                VERTEXAI_PROVIDER_NAME, recommendations
            ),
            api_key_changed=True,
            is_auto_mode=True,
        )
        _upsert(vertexai_provider, default_model_name)
    else:
        logger.info(
            "VERTEXAI_DEFAULT_CREDENTIALS not set, skipping Vertex AI provider configuration"
        )

    # Configure OpenRouter provider
    if OPENROUTER_DEFAULT_API_KEY:
        default_model = recommendations.get_default_model(OPENROUTER_PROVIDER_NAME)
        if default_model is None:
            logger.error(
                f"No default model found for {OPENROUTER_PROVIDER_NAME} in recommendations"
            )
        default_model_name = default_model.name if default_model else "z-ai/glm-4.7"

        # For OpenRouter, we use the visible models from recommendations as model_configurations
        # since OpenRouter models are dynamic (fetched from their API)
        visible_models = recommendations.get_visible_models(OPENROUTER_PROVIDER_NAME)
        model_configurations = [
            ModelConfigurationUpsertRequest(
                name=model.name,
                is_visible=True,
                max_input_tokens=None,
                display_name=model.display_name,
            )
            for model in visible_models
        ]

        openrouter_provider = LLMProviderUpsertRequest(
            name="OpenRouter",
            provider=OPENROUTER_PROVIDER_NAME,
            api_key=OPENROUTER_DEFAULT_API_KEY,
            model_configurations=model_configurations,
            api_key_changed=True,
            is_auto_mode=True,
        )
        _upsert(openrouter_provider, default_model_name)
    else:
        logger.info(
            "OPENROUTER_DEFAULT_API_KEY not set, skipping OpenRouter provider configuration"
        )

    # Configure Cohere embedding provider
    if COHERE_DEFAULT_API_KEY:
        cloud_embedding_provider = CloudEmbeddingProviderCreationRequest(
            provider_type=EmbeddingProvider.COHERE,
            api_key=COHERE_DEFAULT_API_KEY,
        )

        try:
            logger.info("Attempting to upsert Cohere cloud embedding provider")
            upsert_cloud_embedding_provider(db_session, cloud_embedding_provider)
            logger.info("Successfully upserted Cohere cloud embedding provider")

            logger.info("Updating search settings with Cohere embedding model details")
            query = (
                select(SearchSettings)
                .where(SearchSettings.status == IndexModelStatus.FUTURE)
                .order_by(SearchSettings.id.desc())
            )
            result = db_session.execute(query)
            current_search_settings = result.scalars().first()

            if current_search_settings:
                current_search_settings.model_name = (
                    "embed-english-v3.0"  # Cohere's latest model as of now
                )
                current_search_settings.model_dim = (
                    1024  # Cohere's embed-english-v3.0 dimension
                )
                current_search_settings.provider_type = EmbeddingProvider.COHERE
                current_search_settings.index_name = (
                    "danswer_chunk_cohere_embed_english_v3_0"
                )
                current_search_settings.query_prefix = ""
                current_search_settings.passage_prefix = ""
                db_session.commit()
            else:
                raise RuntimeError(
                    "No search settings specified, DB is not in a valid state"
                )
            logger.info("Fetching updated search settings to verify changes")
            updated_query = (
                select(SearchSettings)
                .where(SearchSettings.status == IndexModelStatus.PRESENT)
                .order_by(SearchSettings.id.desc())
            )
            updated_result = db_session.execute(updated_query)
            updated_result.scalars().first()

        except Exception:
            logger.exception("Failed to configure Cohere embedding provider")
    else:
        logger.info(
            "COHERE_DEFAULT_API_KEY not set, skipping Cohere embedding provider configuration"
        )


async def submit_to_hubspot(
    email: str, referral_source: str | None, request: Request
) -> None:
    if not HUBSPOT_TRACKING_URL:
        logger.info("HUBSPOT_TRACKING_URL not set, skipping HubSpot submission")
        return

    # HubSpot tracking cookie
    hubspot_cookie = request.cookies.get("hubspotutk")

    # IP address
    ip_address = request.client.host if request.client else None

    data = {
        "fields": [
            {"name": "email", "value": email},
            {"name": "referral_source", "value": referral_source or ""},
        ],
        "context": {
            "hutk": hubspot_cookie,
            "ipAddress": ip_address,
            "pageUri": str(request.url),
            "pageName": "User Registration",
        },
    }

    async with httpx.AsyncClient() as client:
        response = await client.post(HUBSPOT_TRACKING_URL, json=data)

    if response.status_code != 200:
        logger.error(f"Failed to submit to HubSpot: {response.text}")


async def delete_user_from_control_plane(tenant_id: str, email: str) -> None:
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    payload = TenantDeletionPayload(tenant_id=tenant_id, email=email)

    async with aiohttp.ClientSession() as session:
        async with session.delete(
            f"{CONTROL_PLANE_API_BASE_URL}/tenants/delete",
            headers=headers,
            json=payload.model_dump(),
        ) as response:
            if response.status != 200:
                error_text = await response.text()
                logger.error(f"Control plane tenant creation failed: {error_text}")
                raise Exception(
                    f"Failed to delete tenant on control plane: {error_text}"
                )


def get_tenant_by_domain_from_control_plane(
    domain: str,
    tenant_id: str,
) -> TenantByDomainResponse | None:
    """
    Fetches tenant information from the control plane based on the email domain.

    Args:
        domain: The email domain to search for (e.g., "example.com")

    Returns:
        A dictionary containing tenant information if found, None otherwise
    """
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }

    try:
        response = requests.get(
            f"{CONTROL_PLANE_API_BASE_URL}/tenant-by-domain",
            headers=headers,
            json={"domain": domain, "tenant_id": tenant_id},
        )

        if response.status_code != 200:
            logger.error(f"Control plane tenant lookup failed: {response.text}")
            return None

        response_data = response.json()
        if not response_data:
            return None

        return TenantByDomainResponse(
            tenant_id=response_data.get("tenant_id"),
            number_of_users=response_data.get("number_of_users"),
            creator_email=response_data.get("creator_email"),
        )
    except Exception as e:
        logger.error(f"Error fetching tenant by domain: {str(e)}")
        return None


async def get_available_tenant() -> str | None:
    """
    Get an available pre-provisioned tenant from the NewAvailableTenant table.
    Returns the tenant_id if one is available, None otherwise.
    Uses row-level locking to prevent race conditions when multiple processes
    try to get an available tenant simultaneously.
    """
    if not MULTI_TENANT:
        return None

    with get_session_with_shared_schema() as db_session:
        try:
            db_session.begin()

            # Get the oldest available tenant with FOR UPDATE lock to prevent race conditions
            available_tenant = (
                db_session.query(AvailableTenant)
                .order_by(AvailableTenant.date_created)
                .with_for_update(skip_locked=True)  # Skip locked rows to avoid blocking
                .first()
            )

            if available_tenant:
                tenant_id = available_tenant.tenant_id
                # Remove the tenant from the available tenants table
                db_session.delete(available_tenant)
                db_session.commit()
                logger.info(f"Using pre-provisioned tenant {tenant_id}")
                return tenant_id
            else:
                db_session.rollback()
                return None
        except Exception:
            logger.exception("Error getting available tenant")
            db_session.rollback()
            return None


async def setup_tenant(tenant_id: str) -> None:
    """
    Set up a tenant with all necessary configurations.
    This is a centralized function that handles all tenant setup logic.
    """
    token = None
    try:
        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

        # Run Alembic migrations in a way that isolates it from the current event loop
        # Create a new event loop for this synchronous operation
        loop = asyncio.get_event_loop()
        # Use run_in_executor which properly isolates the thread execution
        await loop.run_in_executor(None, lambda: run_alembic_migrations(tenant_id))

        # Configure the tenant with default settings
        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            # Configure default API keys
            configure_default_api_keys(db_session)

            # Set up Onyx with appropriate settings
            current_search_settings = (
                db_session.query(SearchSettings)
                .filter_by(status=IndexModelStatus.FUTURE)
                .first()
            )
            cohere_enabled = (
                current_search_settings is not None
                and current_search_settings.provider_type == EmbeddingProvider.COHERE
            )
            setup_onyx(db_session, tenant_id, cohere_enabled=cohere_enabled)

    except Exception as e:
        logger.exception(f"Failed to set up tenant {tenant_id}")
        raise e
    finally:
        if token is not None:
            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


async def assign_tenant_to_user(
    tenant_id: str,
    email: str,
    referral_source: str | None = None,  # noqa: ARG001
) -> None:
    """
    Assign a tenant to a user and perform necessary operations.
    Uses transaction handling to ensure atomicity and includes retry logic
    for control plane notifications.
    """
    # First, add the user to the tenant in a transaction

    try:
        add_users_to_tenant([email], tenant_id)
    except Exception:
        logger.exception(f"Failed to assign tenant {tenant_id} to user {email}")
        raise Exception("Failed to assign tenant to user")


================================================
FILE: backend/ee/onyx/server/tenants/proxy.py
================================================
"""Proxy endpoints for billing operations.

These endpoints run on the CLOUD DATA PLANE (cloud.onyx.app) and serve as a proxy
for self-hosted instances to reach the control plane.

Flow:
  Self-hosted backend → Cloud DP /proxy/* (license auth) → Control plane (JWT auth)

Self-hosted instances call these endpoints with their license in the Authorization
header. The cloud data plane validates the license signature and forwards the
request to the control plane using JWT authentication.

Auth levels by endpoint:
- /create-checkout-session: No auth (new customer) or expired license OK (renewal)
- /claim-license: Session ID based (one-time after Stripe payment)
- /create-customer-portal-session: Expired license OK (need portal to fix payment)
- /billing-information: Valid license required
- /license/{tenant_id}: Valid license required
- /seats/update: Valid license required
"""

from typing import Literal

import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import Header
from fastapi import HTTPException
from pydantic import BaseModel

from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.server.billing.models import SeatUpdateRequest
from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.tenants.access import generate_data_plane_token
from ee.onyx.utils.license import is_license_valid
from ee.onyx.utils.license import verify_license_signature
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.utils.logger import setup_logger

logger = setup_logger()

router = APIRouter(prefix="/proxy")


def _check_license_enforcement_enabled() -> None:
    """Ensure LICENSE_ENFORCEMENT_ENABLED is true (proxy endpoints only work on cloud DP)."""
    if not LICENSE_ENFORCEMENT_ENABLED:
        raise HTTPException(
            status_code=501,
            detail="Proxy endpoints are only available on cloud data plane",
        )


def _extract_license_from_header(
    authorization: str | None,
    required: bool = True,
) -> str | None:
    """Extract license data from Authorization header.

    Self-hosted instances authenticate to these proxy endpoints by sending their
    license as a Bearer token: `Authorization: Bearer <base64-encoded-license>`.

    We use the Bearer scheme (RFC 6750) because:
    1. It's the standard HTTP auth scheme for token-based authentication
    2. The license blob is cryptographically signed (RSA), so it's self-validating
    3. No other auth schemes (Basic, Digest, etc.) are supported for license auth

    The license data is the base64-encoded signed blob that contains tenant_id,
    seats, expiration, etc. We verify the signature to authenticate the caller.

    Args:
        authorization: The Authorization header value (e.g., "Bearer <license>")
        required: If True, raise 401 when header is missing/invalid

    Returns:
        License data string (base64-encoded), or None if not required and missing

    Raises:
        HTTPException: 401 if required and header is missing/invalid
    """
    if not authorization or not authorization.startswith("Bearer "):
        if required:
            raise HTTPException(
                status_code=401, detail="Missing or invalid authorization header"
            )
        return None

    return authorization.split(" ", 1)[1]


def verify_license_auth(
    license_data: str,
    allow_expired: bool = False,
) -> LicensePayload:
    """Verify license signature and optionally check expiry.

    Args:
        license_data: Base64-encoded signed license blob
        allow_expired: If True, accept expired licenses (for renewal flows)

    Returns:
        LicensePayload if valid

    Raises:
        HTTPException: If license is invalid or expired (when not allowed)
    """
    _check_license_enforcement_enabled()

    try:
        payload = verify_license_signature(license_data)
    except ValueError as e:
        raise HTTPException(status_code=401, detail=f"Invalid license: {e}")

    if not allow_expired and not is_license_valid(payload):
        raise HTTPException(status_code=401, detail="License has expired")

    return payload


async def get_license_payload(
    authorization: str | None = Header(None, alias="Authorization"),
) -> LicensePayload:
    """Dependency: Require valid (non-expired) license.

    Used for endpoints that require an active subscription.
    """
    license_data = _extract_license_from_header(authorization, required=True)
    # license_data is guaranteed non-None when required=True
    assert license_data is not None
    return verify_license_auth(license_data, allow_expired=False)


async def get_license_payload_allow_expired(
    authorization: str | None = Header(None, alias="Authorization"),
) -> LicensePayload:
    """Dependency: Require license with valid signature, expired OK.

    Used for endpoints needed to fix payment issues (portal, renewal checkout).
    """
    license_data = _extract_license_from_header(authorization, required=True)
    # license_data is guaranteed non-None when required=True
    assert license_data is not None
    return verify_license_auth(license_data, allow_expired=True)


async def get_optional_license_payload(
    authorization: str | None = Header(None, alias="Authorization"),
) -> LicensePayload | None:
    """Dependency: Optional license auth (for checkout - new customers have none).

    Returns None if no license provided, otherwise validates and returns payload.
    Expired licenses are allowed for renewal flows.
    """
    _check_license_enforcement_enabled()

    license_data = _extract_license_from_header(authorization, required=False)
    if license_data is None:
        return None

    return verify_license_auth(license_data, allow_expired=True)


async def forward_to_control_plane(
    method: str,
    path: str,
    body: dict | None = None,
    params: dict | None = None,
) -> dict:
    """Forward a request to the control plane with proper authentication."""
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }

    url = f"{CONTROL_PLANE_API_BASE_URL}{path}"

    try:
        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            if method == "GET":
                response = await client.get(url, headers=headers, params=params)
            elif method == "POST":
                response = await client.post(url, headers=headers, json=body)
            else:
                raise ValueError(f"Unsupported HTTP method: {method}")

            response.raise_for_status()
            return response.json()

    except httpx.HTTPStatusError as e:
        status_code = e.response.status_code
        detail = "Control plane request failed"
        try:
            error_data = e.response.json()
            detail = error_data.get("detail", detail)
        except Exception:
            pass
        logger.error(f"Control plane returned {status_code}: {detail}")
        raise HTTPException(status_code=status_code, detail=detail)
    except httpx.RequestError:
        logger.exception("Failed to connect to control plane")
        raise HTTPException(
            status_code=502, detail="Failed to connect to control plane"
        )


# -----------------------------------------------------------------------------
# Endpoints
# -----------------------------------------------------------------------------


class CreateCheckoutSessionRequest(BaseModel):
    billing_period: Literal["monthly", "annual"] = "monthly"
    seats: int | None = None
    email: str | None = None
    # Redirect URL after successful checkout - self-hosted passes their instance URL
    redirect_url: str | None = None
    # Cancel URL when user exits checkout - returns to upgrade page
    cancel_url: str | None = None


class CreateCheckoutSessionResponse(BaseModel):
    url: str


@router.post("/create-checkout-session")
async def proxy_create_checkout_session(
    request_body: CreateCheckoutSessionRequest,
    license_payload: LicensePayload | None = Depends(get_optional_license_payload),
) -> CreateCheckoutSessionResponse:
    """Proxy checkout session creation to control plane.

    Auth: Optional license (new customers don't have one yet).
    If license provided, expired is OK (for renewals).
    """
    # license_payload is None for new customers who don't have a license yet.
    # In that case, tenant_id is omitted from the request body and the control
    # plane will create a new tenant during checkout completion.
    tenant_id = license_payload.tenant_id if license_payload else None

    body: dict = {
        "billing_period": request_body.billing_period,
    }
    if tenant_id:
        body["tenant_id"] = tenant_id
    if request_body.seats is not None:
        body["seats"] = request_body.seats
    if request_body.email:
        body["email"] = request_body.email
    if request_body.redirect_url:
        body["redirect_url"] = request_body.redirect_url
    if request_body.cancel_url:
        body["cancel_url"] = request_body.cancel_url

    result = await forward_to_control_plane(
        "POST", "/create-checkout-session", body=body
    )
    return CreateCheckoutSessionResponse(url=result["url"])


class ClaimLicenseRequest(BaseModel):
    session_id: str


class ClaimLicenseResponse(BaseModel):
    tenant_id: str
    license: str
    message: str | None = None


@router.post("/claim-license")
async def proxy_claim_license(
    request_body: ClaimLicenseRequest,
) -> ClaimLicenseResponse:
    """Claim a license after successful Stripe checkout.

    Auth: Session ID based (one-time use after payment).
    The control plane verifies the session_id is valid and unclaimed.

    Returns the license to the caller. For self-hosted instances, they will
    store the license locally. The cloud DP doesn't need to store it.
    """
    _check_license_enforcement_enabled()

    result = await forward_to_control_plane(
        "POST",
        "/claim-license",
        body={"session_id": request_body.session_id},
    )

    tenant_id = result.get("tenant_id")
    license_data = result.get("license")

    if not tenant_id or not license_data:
        logger.error(f"Control plane returned incomplete claim response: {result}")
        raise HTTPException(
            status_code=502,
            detail="Control plane returned incomplete license data",
        )

    return ClaimLicenseResponse(
        tenant_id=tenant_id,
        license=license_data,
        message="License claimed successfully",
    )


class CreateCustomerPortalSessionRequest(BaseModel):
    return_url: str | None = None


class CreateCustomerPortalSessionResponse(BaseModel):
    url: str


@router.post("/create-customer-portal-session")
async def proxy_create_customer_portal_session(
    request_body: CreateCustomerPortalSessionRequest | None = None,
    license_payload: LicensePayload = Depends(get_license_payload_allow_expired),
) -> CreateCustomerPortalSessionResponse:
    """Proxy customer portal session creation to control plane.

    Auth: License required, expired OK (need portal to fix payment issues).
    """
    # tenant_id is a required field in LicensePayload (Pydantic validates this),
    # but we check explicitly for defense in depth
    if not license_payload.tenant_id:
        raise HTTPException(status_code=401, detail="License missing tenant_id")

    tenant_id = license_payload.tenant_id

    body: dict = {"tenant_id": tenant_id}
    if request_body and request_body.return_url:
        body["return_url"] = request_body.return_url

    result = await forward_to_control_plane(
        "POST", "/create-customer-portal-session", body=body
    )
    return CreateCustomerPortalSessionResponse(url=result["url"])


class BillingInformationResponse(BaseModel):
    tenant_id: str
    status: str | None = None
    plan_type: str | None = None
    seats: int | None = None
    billing_period: str | None = None
    current_period_start: str | None = None
    current_period_end: str | None = None
    cancel_at_period_end: bool = False
    canceled_at: str | None = None
    trial_start: str | None = None
    trial_end: str | None = None
    payment_method_enabled: bool = False
    stripe_subscription_id: str | None = None


@router.get("/billing-information")
async def proxy_billing_information(
    license_payload: LicensePayload = Depends(get_license_payload),
) -> BillingInformationResponse:
    """Proxy billing information request to control plane.

    Auth: Valid (non-expired) license required.
    """
    # tenant_id is a required field in LicensePayload (Pydantic validates this),
    # but we check explicitly for defense in depth
    if not license_payload.tenant_id:
        raise HTTPException(status_code=401, detail="License missing tenant_id")

    tenant_id = license_payload.tenant_id

    result = await forward_to_control_plane(
        "GET", "/billing-information", params={"tenant_id": tenant_id}
    )
    # Add tenant_id from license if not in response (control plane may not include it)
    if "tenant_id" not in result:
        result["tenant_id"] = tenant_id
    return BillingInformationResponse(**result)


class LicenseFetchResponse(BaseModel):
    license: str
    tenant_id: str


@router.get("/license/{tenant_id}")
async def proxy_license_fetch(
    tenant_id: str,
    license_payload: LicensePayload = Depends(get_license_payload),
) -> LicenseFetchResponse:
    """Proxy license fetch to control plane.

    Auth: Valid license required.
    The tenant_id in path must match the authenticated tenant.
    """
    # tenant_id is a required field in LicensePayload (Pydantic validates this),
    # but we check explicitly for defense in depth
    if not license_payload.tenant_id:
        raise HTTPException(status_code=401, detail="License missing tenant_id")

    if tenant_id != license_payload.tenant_id:
        raise HTTPException(
            status_code=403,
            detail="Cannot fetch license for a different tenant",
        )

    result = await forward_to_control_plane("GET", f"/license/{tenant_id}")

    license_data = result.get("license")
    if not license_data:
        logger.error(f"Control plane returned incomplete license response: {result}")
        raise HTTPException(
            status_code=502,
            detail="Control plane returned incomplete license data",
        )

    # Return license to caller - self-hosted instance stores it via /api/license/claim
    return LicenseFetchResponse(license=license_data, tenant_id=tenant_id)


@router.post("/seats/update")
async def proxy_seat_update(
    request_body: SeatUpdateRequest,
    license_payload: LicensePayload = Depends(get_license_payload),
) -> SeatUpdateResponse:
    """Proxy seat update to control plane.

    Auth: Valid (non-expired) license required.
    Handles Stripe proration and license regeneration.
    Returns the regenerated license in the response for the caller to store.
    """
    if not license_payload.tenant_id:
        raise HTTPException(status_code=401, detail="License missing tenant_id")

    tenant_id = license_payload.tenant_id

    result = await forward_to_control_plane(
        "POST",
        "/seats/update",
        body={
            "tenant_id": tenant_id,
            "new_seat_count": request_body.new_seat_count,
        },
    )

    # Return license in response - self-hosted instance stores it via /api/license/claim
    return SeatUpdateResponse(
        success=result.get("success", False),
        current_seats=result.get("current_seats", 0),
        used_seats=result.get("used_seats", 0),
        message=result.get("message"),
        license=result.get("license"),
    )


================================================
FILE: backend/ee/onyx/server/tenants/schema_management.py
================================================
import logging
import os
import re
from types import SimpleNamespace

from sqlalchemy import text
from sqlalchemy.orm import Session
from sqlalchemy.schema import CreateSchema

from alembic import command
from alembic.config import Config
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from shared_configs.configs import TENANT_ID_PREFIX

logger = logging.getLogger(__name__)

# Regex pattern for valid tenant IDs:
# - UUID format: tenant_xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
# - AWS instance ID format: tenant_i-xxxxxxxxxxxxxxxxx
# Also useful for not accidentally dropping `public` schema
TENANT_ID_PATTERN = re.compile(
    rf"^{re.escape(TENANT_ID_PREFIX)}("
    r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"  # UUID
    r"|i-[a-f0-9]+"  # AWS instance ID
    r")$"
)


def validate_tenant_id(tenant_id: str) -> bool:
    """Validate that tenant_id matches expected format.

    This is important for SQL injection prevention since schema names
    cannot be parameterized in SQL and must be formatted directly.
    """
    return bool(TENANT_ID_PATTERN.match(tenant_id))


def run_alembic_migrations(schema_name: str) -> None:
    logger.info(f"Starting Alembic migrations for schema: {schema_name}")

    try:
        current_dir = os.path.dirname(os.path.abspath(__file__))
        root_dir = os.path.abspath(os.path.join(current_dir, "..", "..", "..", ".."))
        alembic_ini_path = os.path.join(root_dir, "alembic.ini")

        # Configure Alembic
        alembic_cfg = Config(alembic_ini_path)
        alembic_cfg.set_main_option("sqlalchemy.url", build_connection_string())
        alembic_cfg.set_main_option(
            "script_location", os.path.join(root_dir, "alembic")
        )

        # Ensure that logging isn't broken
        alembic_cfg.attributes["configure_logger"] = False

        # Mimic command-line options by adding 'cmd_opts' to the config
        alembic_cfg.cmd_opts = SimpleNamespace()  # type: ignore
        alembic_cfg.cmd_opts.x = [f"schemas={schema_name}"]  # type: ignore

        # Run migrations programmatically
        command.upgrade(alembic_cfg, "head")

        # Run migrations programmatically
        logger.info(
            f"Alembic migrations completed successfully for schema: {schema_name}"
        )

    except Exception as e:
        logger.exception(f"Alembic migration failed for schema {schema_name}: {str(e)}")
        raise


def create_schema_if_not_exists(tenant_id: str) -> bool:
    with Session(get_sqlalchemy_engine()) as db_session:
        with db_session.begin():
            result = db_session.execute(
                text(
                    "SELECT schema_name FROM information_schema.schemata WHERE schema_name = :schema_name"
                ),
                {"schema_name": tenant_id},
            )
            schema_exists = result.scalar() is not None
            if not schema_exists:
                stmt = CreateSchema(tenant_id)
                db_session.execute(stmt)
                return True
            return False


def drop_schema(tenant_id: str) -> None:
    """Drop a tenant's schema.

    Uses strict regex validation to reject unexpected formats early,
    preventing SQL injection since schema names cannot be parameterized.
    """
    if not validate_tenant_id(tenant_id):
        raise ValueError(f"Invalid tenant_id format: {tenant_id}")

    with get_sqlalchemy_engine().connect() as connection:
        with connection.begin():
            # Use string formatting with validated tenant_id (safe after validation)
            connection.execute(text(f'DROP SCHEMA IF EXISTS "{tenant_id}" CASCADE'))


def get_current_alembic_version(tenant_id: str) -> str:
    """Get the current Alembic version for a tenant."""
    from alembic.runtime.migration import MigrationContext
    from sqlalchemy import text

    engine = get_sqlalchemy_engine()

    # Set the search path to the tenant's schema
    with engine.connect() as connection:
        connection.execute(text(f'SET search_path TO "{tenant_id}"'))

        # Get the current version from the alembic_version table
        context = MigrationContext.configure(connection)
        current_rev = context.get_current_revision()

    return current_rev or "head"


================================================
FILE: backend/ee/onyx/server/tenants/team_membership_api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from ee.onyx.server.tenants.provisioning import delete_user_from_control_plane
from ee.onyx.server.tenants.user_mapping import remove_all_users_from_tenant
from ee.onyx.server.tenants.user_mapping import remove_users_from_tenant
from onyx.auth.users import current_admin_user
from onyx.auth.users import User
from onyx.db.auth import get_user_count
from onyx.db.engine.sql_engine import get_session
from onyx.db.users import delete_user_from_db
from onyx.db.users import get_user_by_email
from onyx.server.manage.models import UserByEmail
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/tenants")


@router.post("/leave-team")
async def leave_organization(
    user_email: UserByEmail,
    current_user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    tenant_id = get_current_tenant_id()

    if current_user.email != user_email.user_email:
        raise HTTPException(
            status_code=403, detail="You can only leave the organization as yourself"
        )

    user_to_delete = get_user_by_email(user_email.user_email, db_session)
    if user_to_delete is None:
        raise HTTPException(status_code=404, detail="User not found")

    num_admin_users = await get_user_count(only_admin_users=True)

    should_delete_tenant = num_admin_users == 1

    if should_delete_tenant:
        logger.info(
            "Last admin user is leaving the organization. Deleting tenant from control plane."
        )
        try:
            await delete_user_from_control_plane(tenant_id, user_to_delete.email)
            logger.debug("User deleted from control plane")
        except Exception as e:
            logger.exception(
                f"Failed to delete user from control plane for tenant {tenant_id}: {e}"
            )
            raise HTTPException(
                status_code=500,
                detail=f"Failed to remove user from control plane: {str(e)}",
            )

    db_session.expunge(user_to_delete)
    delete_user_from_db(user_to_delete, db_session)

    if should_delete_tenant:
        remove_all_users_from_tenant(tenant_id)
    else:
        remove_users_from_tenant([user_to_delete.email], tenant_id)


================================================
FILE: backend/ee/onyx/server/tenants/tenant_management_api.py
================================================
from fastapi import APIRouter
from fastapi import Depends

from ee.onyx.server.tenants.models import TenantByDomainResponse
from ee.onyx.server.tenants.provisioning import get_tenant_by_domain_from_control_plane
from onyx.auth.users import current_user
from onyx.auth.users import User
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/tenants")

FORBIDDEN_COMMON_EMAIL_SUBSTRINGS = [
    "gmail",
    "outlook",
    "yahoo",
    "hotmail",
    "icloud",
    "msn",
    "hotmail",
    "hotmail.co.uk",
]


@router.get("/existing-team-by-domain")
def get_existing_tenant_by_domain(
    user: User = Depends(current_user),
) -> TenantByDomainResponse | None:
    domain = user.email.split("@")[1]
    if any(substring in domain for substring in FORBIDDEN_COMMON_EMAIL_SUBSTRINGS):
        return None

    tenant_id = get_current_tenant_id()

    return get_tenant_by_domain_from_control_plane(domain, tenant_id)


================================================
FILE: backend/ee/onyx/server/tenants/user_invitations_api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException

from ee.onyx.server.tenants.models import ApproveUserRequest
from ee.onyx.server.tenants.models import PendingUserSnapshot
from ee.onyx.server.tenants.models import RequestInviteRequest
from ee.onyx.server.tenants.user_mapping import accept_user_invite
from ee.onyx.server.tenants.user_mapping import approve_user_invite
from ee.onyx.server.tenants.user_mapping import deny_user_invite
from ee.onyx.server.tenants.user_mapping import invite_self_to_tenant
from onyx.auth.invited_users import get_pending_users
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.auth.users import User
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/tenants")


@router.post("/users/invite/request")
async def request_invite(
    invite_request: RequestInviteRequest,
    user: User = Depends(current_admin_user),
) -> None:
    try:
        invite_self_to_tenant(user.email, invite_request.tenant_id)
    except Exception as e:
        logger.exception(
            f"Failed to invite self to tenant {invite_request.tenant_id}: {e}"
        )
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/users/pending")
def list_pending_users(
    _: User = Depends(current_admin_user),
) -> list[PendingUserSnapshot]:
    pending_emails = get_pending_users()
    return [PendingUserSnapshot(email=email) for email in pending_emails]


@router.post("/users/invite/approve")
async def approve_user(
    approve_user_request: ApproveUserRequest,
    _: User = Depends(current_admin_user),
) -> None:
    tenant_id = get_current_tenant_id()
    approve_user_invite(approve_user_request.email, tenant_id)


@router.post("/users/invite/accept")
async def accept_invite(
    invite_request: RequestInviteRequest,
    user: User = Depends(current_user),
) -> None:
    """
    Accept an invitation to join a tenant.
    """
    try:
        accept_user_invite(user.email, invite_request.tenant_id)
    except Exception as e:
        logger.exception(f"Failed to accept invite: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to accept invitation")


@router.post("/users/invite/deny")
async def deny_invite(
    invite_request: RequestInviteRequest,
    user: User = Depends(current_user),
) -> None:
    """
    Deny an invitation to join a tenant.
    """
    try:
        deny_user_invite(user.email, invite_request.tenant_id)
    except Exception as e:
        logger.exception(f"Failed to deny invite: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to deny invitation")


================================================
FILE: backend/ee/onyx/server/tenants/user_mapping.py
================================================
from fastapi_users import exceptions
from sqlalchemy import select

from onyx.auth.invited_users import get_invited_users
from onyx.auth.invited_users import get_pending_users
from onyx.auth.invited_users import write_invited_users
from onyx.auth.invited_users import write_pending_users
from onyx.db.engine.sql_engine import get_session_with_shared_schema
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.models import UserTenantMapping
from onyx.server.manage.models import TenantSnapshot
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()


def get_tenant_id_for_email(email: str) -> str:
    if not MULTI_TENANT:
        return POSTGRES_DEFAULT_SCHEMA
    # Implement logic to get tenant_id from the mapping table
    try:
        with get_session_with_shared_schema() as db_session:
            # First try to get an active tenant
            result = db_session.execute(
                select(UserTenantMapping).where(
                    UserTenantMapping.email == email,
                    UserTenantMapping.active == True,  # noqa: E712
                )
            )
            mapping = result.scalar_one_or_none()
            tenant_id = mapping.tenant_id if mapping else None

            # If no active tenant found, try to get the first inactive one
            if tenant_id is None:
                result = db_session.execute(
                    select(UserTenantMapping).where(
                        UserTenantMapping.email == email,
                        UserTenantMapping.active == False,  # noqa: E712
                    )
                )
                mapping = result.scalar_one_or_none()
                if mapping:
                    # Mark this mapping as active
                    mapping.active = True
                    db_session.commit()
                    tenant_id = mapping.tenant_id
    except Exception as e:
        logger.exception(f"Error getting tenant id for email {email}: {e}")
        raise exceptions.UserNotExists()

    if tenant_id is None:
        raise exceptions.UserNotExists()
    return tenant_id


def user_owns_a_tenant(email: str) -> bool:
    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
        result = (
            db_session.query(UserTenantMapping)
            .filter(UserTenantMapping.email == email)
            .first()
        )
        return result is not None


def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
    """
    Add users to a tenant with proper transaction handling.
    Checks if users already have a tenant mapping to avoid duplicates.

    If a user already has an active mapping to a different tenant, they receive
    an inactive mapping (invitation) to this tenant. They can accept the
    invitation later to switch tenants.

    """
    unique_emails = set(emails)
    if not unique_emails:
        return

    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
        try:
            # Start a transaction
            db_session.begin()

            # Batch query 1: Get all existing mappings for these emails to this tenant
            # Lock rows to prevent concurrent modifications
            existing_mappings = (
                db_session.query(UserTenantMapping)
                .filter(
                    UserTenantMapping.email.in_(unique_emails),
                    UserTenantMapping.tenant_id == tenant_id,
                )
                .with_for_update()
                .all()
            )
            emails_with_mapping = {m.email for m in existing_mappings}

            # Batch query 2: Get all active mappings for these emails (any tenant)
            active_mappings = (
                db_session.query(UserTenantMapping)
                .filter(
                    UserTenantMapping.email.in_(unique_emails),
                    UserTenantMapping.active == True,  # noqa: E712
                )
                .all()
            )
            emails_with_active_mapping = {m.email for m in active_mappings}

            # Add mappings for emails that don't already have one to this tenant
            for email in unique_emails:
                if email in emails_with_mapping:
                    continue

                # Create mapping: inactive if user belongs to another tenant (invitation),
                # active otherwise
                db_session.add(
                    UserTenantMapping(
                        email=email,
                        tenant_id=tenant_id,
                        active=email not in emails_with_active_mapping,
                    )
                )

            # Commit the transaction
            db_session.commit()
            logger.info(f"Successfully added users {emails} to tenant {tenant_id}")

        except Exception:
            logger.exception(f"Failed to add users to tenant {tenant_id}")
            db_session.rollback()
            raise


def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:
    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
        try:
            mappings_to_delete = (
                db_session.query(UserTenantMapping)
                .filter(
                    UserTenantMapping.email.in_(emails),
                    UserTenantMapping.tenant_id == tenant_id,
                )
                .all()
            )

            for mapping in mappings_to_delete:
                db_session.delete(mapping)

            db_session.commit()
        except Exception as e:
            logger.exception(
                f"Failed to remove users from tenant {tenant_id}: {str(e)}"
            )
            db_session.rollback()


def remove_all_users_from_tenant(tenant_id: str) -> None:
    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
        db_session.query(UserTenantMapping).filter(
            UserTenantMapping.tenant_id == tenant_id
        ).delete()
        db_session.commit()


def invite_self_to_tenant(email: str, tenant_id: str) -> None:
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
    try:
        pending_users = get_pending_users()
        if email in pending_users:
            return
        write_pending_users(pending_users + [email])
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


def approve_user_invite(email: str, tenant_id: str) -> None:
    """
    Approve a user invite to a tenant.
    This will delete all existing records for this email and create a new mapping entry for the user in this tenant.
    """
    with get_session_with_shared_schema() as db_session:
        # Delete all existing records for this email
        db_session.query(UserTenantMapping).filter(
            UserTenantMapping.email == email
        ).delete()

        # Create a new mapping entry for the user in this tenant
        new_mapping = UserTenantMapping(email=email, tenant_id=tenant_id, active=True)
        db_session.add(new_mapping)
        db_session.commit()

    # Also remove the user from pending users list
    # Remove from pending users
    pending_users = get_pending_users()
    if email in pending_users:
        pending_users.remove(email)
        write_pending_users(pending_users)

    # Add to invited users
    invited_users = get_invited_users()
    if email not in invited_users:
        invited_users.append(email)
        write_invited_users(invited_users)


def accept_user_invite(email: str, tenant_id: str) -> None:
    """
    Accept an invitation to join a tenant.
    This activates the user's mapping to the tenant.
    """
    with get_session_with_shared_schema() as db_session:
        try:
            # Lock the user's mappings first to prevent race conditions.
            # This ensures no concurrent request can modify this user's mappings.
            active_mapping = (
                db_session.query(UserTenantMapping)
                .filter(
                    UserTenantMapping.email == email,
                    UserTenantMapping.active == True,  # noqa: E712
                )
                .with_for_update()
                .first()
            )

            # If an active mapping exists, delete it
            if active_mapping:
                db_session.delete(active_mapping)
                logger.info(
                    f"Deleted existing active mapping for user {email} in tenant {tenant_id}"
                )

            # Find the inactive mapping for this user and tenant
            mapping = (
                db_session.query(UserTenantMapping)
                .filter(
                    UserTenantMapping.email == email,
                    UserTenantMapping.tenant_id == tenant_id,
                    UserTenantMapping.active == False,  # noqa: E712
                )
                .first()
            )

            if mapping:
                # Set all other mappings for this user to inactive
                db_session.query(UserTenantMapping).filter(
                    UserTenantMapping.email == email,
                    UserTenantMapping.active == True,  # noqa: E712
                ).update({"active": False})

                # Activate this mapping
                mapping.active = True
                db_session.commit()
                logger.info(f"User {email} accepted invitation to tenant {tenant_id}")
            else:
                logger.warning(
                    f"No invitation found for user {email} in tenant {tenant_id}"
                )

        except Exception as e:
            db_session.rollback()
            logger.exception(
                f"Failed to accept invitation for user {email} to tenant {tenant_id}: {str(e)}"
            )
            raise

    # Remove from invited users list since they've accepted
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
    try:
        invited_users = get_invited_users()
        if email in invited_users:
            invited_users.remove(email)
            write_invited_users(invited_users)
            logger.info(f"Removed {email} from invited users list after acceptance")
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


def deny_user_invite(email: str, tenant_id: str) -> None:
    """
    Deny an invitation to join a tenant.
    This removes the user's mapping to the tenant.
    """
    with get_session_with_shared_schema() as db_session:
        # Delete the mapping for this user and tenant
        result = (
            db_session.query(UserTenantMapping)
            .filter(
                UserTenantMapping.email == email,
                UserTenantMapping.tenant_id == tenant_id,
                UserTenantMapping.active == False,  # noqa: E712
            )
            .delete()
        )

        db_session.commit()
        if result:
            logger.info(f"User {email} denied invitation to tenant {tenant_id}")
        else:
            logger.warning(
                f"No invitation found for user {email} in tenant {tenant_id}"
            )
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
    try:
        pending_users = get_invited_users()
        if email in pending_users:
            pending_users.remove(email)
            write_invited_users(pending_users)
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


def get_tenant_count(tenant_id: str) -> int:
    """
    Get the number of active users for this tenant.

    A user counts toward the seat count if:
    1. They have an active mapping to this tenant (UserTenantMapping.active == True)
    2. AND the User is active (User.is_active == True)
    3. AND the User is not the anonymous system user

    TODO: Exclude API key dummy users from seat counting. API keys create
    users with emails like `__DANSWER_API_KEY_*` that should not count toward
    seat limits. See: https://linear.app/onyx-app/issue/ENG-3518
    """
    from onyx.configs.constants import ANONYMOUS_USER_EMAIL
    from onyx.db.models import User

    # First get all emails with active mappings to this tenant
    with get_session_with_shared_schema() as db_session:
        active_mapping_emails = (
            db_session.query(UserTenantMapping.email)
            .filter(
                UserTenantMapping.tenant_id == tenant_id,
                UserTenantMapping.active == True,  # noqa: E712
                UserTenantMapping.email != ANONYMOUS_USER_EMAIL,
            )
            .all()
        )
        emails = [email for (email,) in active_mapping_emails]

    if not emails:
        return 0

    # Now count how many of those users are actually active in the tenant's User table
    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        user_count = (
            db_session.query(User)
            .filter(
                User.email.in_(emails),  # type: ignore
                User.is_active == True,  # type: ignore  # noqa: E712
            )
            .count()
        )

        return user_count


def get_tenant_invitation(email: str) -> TenantSnapshot | None:
    """
    Get the first tenant invitation for this user
    """
    with get_session_with_shared_schema() as db_session:
        # Get the first tenant invitation for this user
        invitation = (
            db_session.query(UserTenantMapping)
            .filter(
                UserTenantMapping.email == email,
                UserTenantMapping.active == False,  # noqa: E712
            )
            .first()
        )

        if invitation:
            # Get the user count for this tenant
            user_count = (
                db_session.query(UserTenantMapping)
                .filter(
                    UserTenantMapping.tenant_id == invitation.tenant_id,
                    UserTenantMapping.active == True,  # noqa: E712
                )
                .count()
            )
            return TenantSnapshot(
                tenant_id=invitation.tenant_id, number_of_users=user_count
            )

        return None


================================================
FILE: backend/ee/onyx/server/token_rate_limits/api.py
================================================
from collections import defaultdict

from fastapi import APIRouter
from fastapi import Depends
from sqlalchemy.orm import Session

from ee.onyx.db.token_limit import fetch_all_user_group_token_rate_limits_by_group
from ee.onyx.db.token_limit import fetch_user_group_token_rate_limits_for_user
from ee.onyx.db.token_limit import insert_user_group_token_rate_limit
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.token_limit import fetch_all_user_token_rate_limits
from onyx.db.token_limit import insert_user_token_rate_limit
from onyx.server.query_and_chat.token_limit import any_rate_limit_exists
from onyx.server.token_rate_limits.models import TokenRateLimitArgs
from onyx.server.token_rate_limits.models import TokenRateLimitDisplay

router = APIRouter(prefix="/admin/token-rate-limits", tags=PUBLIC_API_TAGS)


"""
Group Token Limit Settings
"""


@router.get("/user-groups")
def get_all_group_token_limit_settings(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict[str, list[TokenRateLimitDisplay]]:
    user_groups_to_token_rate_limits = fetch_all_user_group_token_rate_limits_by_group(
        db_session
    )

    token_rate_limits_by_group = defaultdict(list)
    for token_rate_limit, group_name in user_groups_to_token_rate_limits:
        token_rate_limits_by_group[group_name].append(
            TokenRateLimitDisplay.from_db(token_rate_limit)
        )

    return dict(token_rate_limits_by_group)


@router.get("/user-group/{group_id}")
def get_group_token_limit_settings(
    group_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> list[TokenRateLimitDisplay]:
    return [
        TokenRateLimitDisplay.from_db(token_rate_limit)
        for token_rate_limit in fetch_user_group_token_rate_limits_for_user(
            db_session=db_session,
            group_id=group_id,
            user=user,
        )
    ]


@router.post("/user-group/{group_id}")
def create_group_token_limit_settings(
    group_id: int,
    token_limit_settings: TokenRateLimitArgs,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> TokenRateLimitDisplay:
    rate_limit_display = TokenRateLimitDisplay.from_db(
        insert_user_group_token_rate_limit(
            db_session=db_session,
            token_rate_limit_settings=token_limit_settings,
            group_id=group_id,
        )
    )
    # clear cache in case this was the first rate limit created
    any_rate_limit_exists.cache_clear()
    return rate_limit_display


"""
User Token Limit Settings
"""


@router.get("/users")
def get_user_token_limit_settings(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[TokenRateLimitDisplay]:
    return [
        TokenRateLimitDisplay.from_db(token_rate_limit)
        for token_rate_limit in fetch_all_user_token_rate_limits(db_session)
    ]


@router.post("/users")
def create_user_token_limit_settings(
    token_limit_settings: TokenRateLimitArgs,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> TokenRateLimitDisplay:
    rate_limit_display = TokenRateLimitDisplay.from_db(
        insert_user_token_rate_limit(db_session, token_limit_settings)
    )
    # clear cache in case this was the first rate limit created
    any_rate_limit_exists.cache_clear()
    return rate_limit_display


================================================
FILE: backend/ee/onyx/server/usage_limits.py
================================================
"""EE Usage limits - trial detection via billing information."""

from ee.onyx.server.tenants.billing import fetch_billing_information
from ee.onyx.server.tenants.models import BillingInformation
from ee.onyx.server.tenants.models import SubscriptionStatusResponse
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


def is_tenant_on_trial(tenant_id: str) -> bool:
    """
    Determine if a tenant is currently on a trial subscription.

    In multi-tenant mode, we fetch billing information from the control plane
    to determine if the tenant has an active trial.
    """
    if not MULTI_TENANT:
        return False

    try:
        billing_info = fetch_billing_information(tenant_id)

        # If not subscribed at all, check if we have trial information
        if isinstance(billing_info, SubscriptionStatusResponse):
            # No subscription means they're likely on trial (new tenant)
            return True

        if isinstance(billing_info, BillingInformation):
            return billing_info.status == "trialing"

        return False

    except Exception as e:
        logger.warning(f"Failed to fetch billing info for trial check: {e}")
        # Default to trial limits on error (more restrictive = safer)
        return True


================================================
FILE: backend/ee/onyx/server/user_group/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session

from ee.onyx.db.persona import update_persona_access
from ee.onyx.db.user_group import add_users_to_user_group
from ee.onyx.db.user_group import delete_user_group as db_delete_user_group
from ee.onyx.db.user_group import fetch_user_group
from ee.onyx.db.user_group import fetch_user_groups
from ee.onyx.db.user_group import fetch_user_groups_for_user
from ee.onyx.db.user_group import insert_user_group
from ee.onyx.db.user_group import prepare_user_group_for_deletion
from ee.onyx.db.user_group import rename_user_group
from ee.onyx.db.user_group import update_user_curator_relationship
from ee.onyx.db.user_group import update_user_group
from ee.onyx.server.user_group.models import AddUsersToUserGroupRequest
from ee.onyx.server.user_group.models import MinimalUserGroupSnapshot
from ee.onyx.server.user_group.models import SetCuratorRequest
from ee.onyx.server.user_group.models import UpdateGroupAgentsRequest
from ee.onyx.server.user_group.models import UserGroup
from ee.onyx.server.user_group.models import UserGroupCreate
from ee.onyx.server.user_group.models import UserGroupRename
from ee.onyx.server.user_group.models import UserGroupUpdate
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.models import UserRole
from onyx.db.persona import get_persona_by_id
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger

logger = setup_logger()

router = APIRouter(prefix="/manage", tags=PUBLIC_API_TAGS)


@router.get("/admin/user-group")
def list_user_groups(
    include_default: bool = False,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> list[UserGroup]:
    if user.role == UserRole.ADMIN:
        user_groups = fetch_user_groups(
            db_session,
            only_up_to_date=False,
            eager_load_for_snapshot=True,
            include_default=include_default,
        )
    else:
        user_groups = fetch_user_groups_for_user(
            db_session=db_session,
            user_id=user.id,
            only_curator_groups=user.role == UserRole.CURATOR,
            eager_load_for_snapshot=True,
            include_default=include_default,
        )
    return [UserGroup.from_model(user_group) for user_group in user_groups]


@router.get("/user-groups/minimal")
def list_minimal_user_groups(
    include_default: bool = False,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[MinimalUserGroupSnapshot]:
    if user.role == UserRole.ADMIN:
        user_groups = fetch_user_groups(
            db_session,
            only_up_to_date=False,
            include_default=include_default,
        )
    else:
        user_groups = fetch_user_groups_for_user(
            db_session=db_session,
            user_id=user.id,
            include_default=include_default,
        )
    return [
        MinimalUserGroupSnapshot.from_model(user_group) for user_group in user_groups
    ]


@router.get("/admin/user-group/{user_group_id}/permissions")
def get_user_group_permissions(
    user_group_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[str]:
    group = fetch_user_group(db_session, user_group_id)
    if group is None:
        raise OnyxError(OnyxErrorCode.NOT_FOUND, "User group not found")
    return [
        grant.permission.value
        for grant in group.permission_grants
        if not grant.is_deleted
    ]


@router.post("/admin/user-group")
def create_user_group(
    user_group: UserGroupCreate,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> UserGroup:
    try:
        db_user_group = insert_user_group(db_session, user_group)
    except IntegrityError:
        raise HTTPException(
            400,
            f"User group with name '{user_group.name}' already exists. Please "
            + "choose a different name.",
        )
    return UserGroup.from_model(db_user_group)


@router.patch("/admin/user-group/rename")
def rename_user_group_endpoint(
    rename_request: UserGroupRename,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> UserGroup:
    group = fetch_user_group(db_session, rename_request.id)
    if group and group.is_default:
        raise OnyxError(OnyxErrorCode.CONFLICT, "Cannot rename a default system group.")
    try:
        return UserGroup.from_model(
            rename_user_group(
                db_session=db_session,
                user_group_id=rename_request.id,
                new_name=rename_request.name,
            )
        )
    except IntegrityError:
        raise OnyxError(
            OnyxErrorCode.DUPLICATE_RESOURCE,
            f"User group with name '{rename_request.name}' already exists.",
        )
    except ValueError as e:
        msg = str(e)
        if "not found" in msg.lower():
            raise OnyxError(OnyxErrorCode.NOT_FOUND, msg)
        raise OnyxError(OnyxErrorCode.CONFLICT, msg)


@router.patch("/admin/user-group/{user_group_id}")
def patch_user_group(
    user_group_id: int,
    user_group_update: UserGroupUpdate,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> UserGroup:
    try:
        return UserGroup.from_model(
            update_user_group(
                db_session=db_session,
                user=user,
                user_group_id=user_group_id,
                user_group_update=user_group_update,
            )
        )
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


@router.post("/admin/user-group/{user_group_id}/add-users")
def add_users(
    user_group_id: int,
    add_users_request: AddUsersToUserGroupRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> UserGroup:
    try:
        return UserGroup.from_model(
            add_users_to_user_group(
                db_session=db_session,
                user=user,
                user_group_id=user_group_id,
                user_ids=add_users_request.user_ids,
            )
        )
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


@router.post("/admin/user-group/{user_group_id}/set-curator")
def set_user_curator(
    user_group_id: int,
    set_curator_request: SetCuratorRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    try:
        update_user_curator_relationship(
            db_session=db_session,
            user_group_id=user_group_id,
            set_curator_request=set_curator_request,
            user_making_change=user,
        )
    except ValueError as e:
        logger.error(f"Error setting user curator: {e}")
        raise HTTPException(status_code=404, detail=str(e))


@router.delete("/admin/user-group/{user_group_id}")
def delete_user_group(
    user_group_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    group = fetch_user_group(db_session, user_group_id)
    if group and group.is_default:
        raise OnyxError(OnyxErrorCode.CONFLICT, "Cannot delete a default system group.")
    try:
        prepare_user_group_for_deletion(db_session, user_group_id)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))

    if DISABLE_VECTOR_DB:
        user_group = fetch_user_group(db_session, user_group_id)
        if user_group:
            db_delete_user_group(db_session, user_group)


@router.patch("/admin/user-group/{user_group_id}/agents")
def update_group_agents(
    user_group_id: int,
    request: UpdateGroupAgentsRequest,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    for agent_id in request.added_agent_ids:
        persona = get_persona_by_id(
            persona_id=agent_id, user=user, db_session=db_session
        )
        current_group_ids = [g.id for g in persona.groups]
        if user_group_id not in current_group_ids:
            update_persona_access(
                persona_id=agent_id,
                creator_user_id=user.id,
                db_session=db_session,
                group_ids=current_group_ids + [user_group_id],
            )

    for agent_id in request.removed_agent_ids:
        persona = get_persona_by_id(
            persona_id=agent_id, user=user, db_session=db_session
        )
        current_group_ids = [g.id for g in persona.groups]
        update_persona_access(
            persona_id=agent_id,
            creator_user_id=user.id,
            db_session=db_session,
            group_ids=[gid for gid in current_group_ids if gid != user_group_id],
        )

    db_session.commit()


================================================
FILE: backend/ee/onyx/server/user_group/models.py
================================================
from uuid import UUID

from pydantic import BaseModel

from onyx.db.models import UserGroup as UserGroupModel
from onyx.server.documents.models import ConnectorCredentialPairDescriptor
from onyx.server.documents.models import ConnectorSnapshot
from onyx.server.documents.models import CredentialSnapshot
from onyx.server.features.document_set.models import DocumentSet
from onyx.server.features.persona.models import PersonaSnapshot
from onyx.server.manage.models import UserInfo
from onyx.server.manage.models import UserPreferences


class UserGroup(BaseModel):
    id: int
    name: str
    users: list[UserInfo]
    curator_ids: list[UUID]
    cc_pairs: list[ConnectorCredentialPairDescriptor]
    document_sets: list[DocumentSet]
    personas: list[PersonaSnapshot]
    is_up_to_date: bool
    is_up_for_deletion: bool
    is_default: bool

    @classmethod
    def from_model(cls, user_group_model: UserGroupModel) -> "UserGroup":
        return cls(
            id=user_group_model.id,
            name=user_group_model.name,
            users=[
                UserInfo(
                    id=str(user.id),
                    email=user.email,
                    is_active=user.is_active,
                    is_superuser=user.is_superuser,
                    is_verified=user.is_verified,
                    role=user.role,
                    preferences=UserPreferences(
                        default_model=user.default_model,
                        chosen_assistants=user.chosen_assistants,
                    ),
                )
                for user in user_group_model.users
            ],
            curator_ids=[
                user.user_id
                for user in user_group_model.user_group_relationships
                if user.is_curator and user.user_id is not None
            ],
            cc_pairs=[
                ConnectorCredentialPairDescriptor(
                    id=cc_pair_relationship.cc_pair.id,
                    name=cc_pair_relationship.cc_pair.name,
                    connector=ConnectorSnapshot.from_connector_db_model(
                        cc_pair_relationship.cc_pair.connector,
                        credential_ids=[cc_pair_relationship.cc_pair.credential_id],
                    ),
                    credential=CredentialSnapshot.from_credential_db_model(
                        cc_pair_relationship.cc_pair.credential
                    ),
                    access_type=cc_pair_relationship.cc_pair.access_type,
                )
                for cc_pair_relationship in user_group_model.cc_pair_relationships
                if cc_pair_relationship.is_current
            ],
            document_sets=[
                DocumentSet.from_model(ds) for ds in user_group_model.document_sets
            ],
            personas=[
                PersonaSnapshot.from_model(persona)
                for persona in user_group_model.personas
                if not persona.deleted
            ],
            is_up_to_date=user_group_model.is_up_to_date,
            is_up_for_deletion=user_group_model.is_up_for_deletion,
            is_default=user_group_model.is_default,
        )


class MinimalUserGroupSnapshot(BaseModel):
    id: int
    name: str
    is_default: bool

    @classmethod
    def from_model(cls, user_group_model: UserGroupModel) -> "MinimalUserGroupSnapshot":
        return cls(
            id=user_group_model.id,
            name=user_group_model.name,
            is_default=user_group_model.is_default,
        )


class UserGroupCreate(BaseModel):
    name: str
    user_ids: list[UUID]
    cc_pair_ids: list[int]


class UserGroupUpdate(BaseModel):
    user_ids: list[UUID]
    cc_pair_ids: list[int]


class AddUsersToUserGroupRequest(BaseModel):
    user_ids: list[UUID]


class UserGroupRename(BaseModel):
    id: int
    name: str


class SetCuratorRequest(BaseModel):
    user_id: UUID
    is_curator: bool


class UpdateGroupAgentsRequest(BaseModel):
    added_agent_ids: list[int]
    removed_agent_ids: list[int]


================================================
FILE: backend/ee/onyx/utils/__init__.py
================================================


================================================
FILE: backend/ee/onyx/utils/encryption.py
================================================
from functools import lru_cache
from os import urandom

from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import algorithms
from cryptography.hazmat.primitives.ciphers import Cipher
from cryptography.hazmat.primitives.ciphers import modes

from onyx.configs.app_configs import ENCRYPTION_KEY_SECRET
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation

logger = setup_logger()


@lru_cache(maxsize=2)
def _get_trimmed_key(key: str) -> bytes:
    encoded_key = key.encode()
    key_length = len(encoded_key)
    if key_length < 16:
        raise RuntimeError("Invalid ENCRYPTION_KEY_SECRET - too short")

    # Trim to the largest valid AES key size that fits
    valid_lengths = [32, 24, 16]
    for size in valid_lengths:
        if key_length >= size:
            return encoded_key[:size]

    raise AssertionError("unreachable")


def _encrypt_string(input_str: str, key: str | None = None) -> bytes:
    effective_key = key if key is not None else ENCRYPTION_KEY_SECRET
    if not effective_key:
        return input_str.encode()

    trimmed = _get_trimmed_key(effective_key)
    iv = urandom(16)
    padder = padding.PKCS7(algorithms.AES.block_size).padder()
    padded_data = padder.update(input_str.encode()) + padder.finalize()

    cipher = Cipher(algorithms.AES(trimmed), modes.CBC(iv), backend=default_backend())
    encryptor = cipher.encryptor()
    encrypted_data = encryptor.update(padded_data) + encryptor.finalize()

    return iv + encrypted_data


def _decrypt_bytes(input_bytes: bytes, key: str | None = None) -> str:
    effective_key = key if key is not None else ENCRYPTION_KEY_SECRET
    if not effective_key:
        return input_bytes.decode()

    trimmed = _get_trimmed_key(effective_key)
    try:
        iv = input_bytes[:16]
        encrypted_data = input_bytes[16:]

        cipher = Cipher(
            algorithms.AES(trimmed), modes.CBC(iv), backend=default_backend()
        )
        decryptor = cipher.decryptor()
        decrypted_padded_data = decryptor.update(encrypted_data) + decryptor.finalize()

        unpadder = padding.PKCS7(algorithms.AES.block_size).unpadder()
        decrypted_data = unpadder.update(decrypted_padded_data) + unpadder.finalize()

        return decrypted_data.decode()
    except (ValueError, UnicodeDecodeError):
        if key is not None:
            # Explicit key was provided — don't fall back silently
            raise
        # Read path: attempt raw UTF-8 decode as a fallback for legacy data.
        # Does NOT handle data encrypted with a different key — that
        # ciphertext is not valid UTF-8 and will raise below.
        logger.warning(
            "AES decryption failed — falling back to raw decode. Run the re-encrypt secrets script to rotate to the current key."
        )
        try:
            return input_bytes.decode()
        except UnicodeDecodeError:
            raise ValueError(
                "Data is not valid UTF-8 — likely encrypted with a different key. "
                "Run the re-encrypt secrets script to rotate to the current key."
            ) from None


def encrypt_string_to_bytes(input_str: str, key: str | None = None) -> bytes:
    versioned_encryption_fn = fetch_versioned_implementation(
        "onyx.utils.encryption", "_encrypt_string"
    )
    return versioned_encryption_fn(input_str, key=key)


def decrypt_bytes_to_string(input_bytes: bytes, key: str | None = None) -> str:
    versioned_decryption_fn = fetch_versioned_implementation(
        "onyx.utils.encryption", "_decrypt_bytes"
    )
    return versioned_decryption_fn(input_bytes, key=key)


def test_encryption() -> None:
    test_string = "Onyx is the BEST!"
    encrypted_bytes = encrypt_string_to_bytes(test_string)
    decrypted_string = decrypt_bytes_to_string(encrypted_bytes)
    if test_string != decrypted_string:
        raise RuntimeError("Encryption decryption test failed")


================================================
FILE: backend/ee/onyx/utils/license.py
================================================
"""RSA-4096 license signature verification utilities."""

import base64
import json
import os
from datetime import datetime
from datetime import timezone
from pathlib import Path

from cryptography.exceptions import InvalidSignature
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey

from ee.onyx.server.license.models import LicenseData
from ee.onyx.server.license.models import LicensePayload
from onyx.server.settings.models import ApplicationStatus
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Path to the license public key file
_LICENSE_PUBLIC_KEY_PATH = (
    Path(__file__).parent.parent.parent.parent / "keys" / "license_public_key.pem"
)


def _get_public_key() -> RSAPublicKey:
    """Load the public key from file, with env var override."""
    # Allow env var override for flexibility
    key_pem = os.environ.get("LICENSE_PUBLIC_KEY_PEM")

    if not key_pem:
        # Read from file
        if not _LICENSE_PUBLIC_KEY_PATH.exists():
            raise ValueError(
                f"License public key not found at {_LICENSE_PUBLIC_KEY_PATH}. "
                "License verification requires the control plane public key."
            )
        key_pem = _LICENSE_PUBLIC_KEY_PATH.read_text()

    key = serialization.load_pem_public_key(key_pem.encode())
    if not isinstance(key, RSAPublicKey):
        raise ValueError("Expected RSA public key")
    return key


def verify_license_signature(license_data: str) -> LicensePayload:
    """
    Verify RSA-4096 signature and return payload if valid.

    Args:
        license_data: Base64-encoded JSON containing payload and signature

    Returns:
        LicensePayload if signature is valid

    Raises:
        ValueError: If license data is invalid or signature verification fails
    """
    try:
        decoded = json.loads(base64.b64decode(license_data))

        # Parse into LicenseData to validate structure
        license_obj = LicenseData(**decoded)

        # IMPORTANT: Use the ORIGINAL payload JSON for signature verification,
        # not re-serialized through Pydantic. Pydantic may format fields differently
        # (e.g., datetime "+00:00" vs "Z") which would break signature verification.
        original_payload = decoded.get("payload", {})
        payload_json = json.dumps(original_payload, sort_keys=True)
        signature_bytes = base64.b64decode(license_obj.signature)

        # Verify signature using PSS padding (modern standard)
        public_key = _get_public_key()

        public_key.verify(
            signature_bytes,
            payload_json.encode(),
            padding.PSS(
                mgf=padding.MGF1(hashes.SHA256()),
                salt_length=padding.PSS.MAX_LENGTH,
            ),
            hashes.SHA256(),
        )

        return license_obj.payload

    except InvalidSignature:
        logger.error("[verify_license] FAILED: Signature verification failed")
        raise ValueError("Invalid license signature")
    except json.JSONDecodeError as e:
        logger.error(f"[verify_license] FAILED: JSON decode error: {e}")
        raise ValueError("Invalid license format: not valid JSON")
    except (ValueError, KeyError, TypeError) as e:
        logger.error(
            f"[verify_license] FAILED: Validation error: {type(e).__name__}: {e}"
        )
        raise ValueError(f"Invalid license format: {type(e).__name__}: {e}")
    except Exception:
        logger.exception("[verify_license] FAILED: Unexpected error")
        raise ValueError("License verification failed: unexpected error")


def get_license_status(
    payload: LicensePayload,
    grace_period_end: datetime | None = None,
) -> ApplicationStatus:
    """
    Determine current license status based on expiry.

    Args:
        payload: The verified license payload
        grace_period_end: Optional grace period end datetime

    Returns:
        ApplicationStatus indicating current license state
    """
    now = datetime.now(timezone.utc)

    # Check if grace period has expired
    if grace_period_end and now > grace_period_end:
        return ApplicationStatus.GATED_ACCESS

    # Check if license has expired
    if now > payload.expires_at:
        if grace_period_end and now <= grace_period_end:
            return ApplicationStatus.GRACE_PERIOD
        return ApplicationStatus.GATED_ACCESS

    # License is valid
    return ApplicationStatus.ACTIVE


def is_license_valid(payload: LicensePayload) -> bool:
    """Check if a license is currently valid (not expired)."""
    now = datetime.now(timezone.utc)
    return now <= payload.expires_at


================================================
FILE: backend/ee/onyx/utils/posthog_client.py
================================================
import json
from typing import Any
from urllib.parse import unquote

from posthog import Posthog

from ee.onyx.configs.app_configs import MARKETING_POSTHOG_API_KEY
from ee.onyx.configs.app_configs import POSTHOG_API_KEY
from ee.onyx.configs.app_configs import POSTHOG_DEBUG_LOGS_ENABLED
from ee.onyx.configs.app_configs import POSTHOG_HOST
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


def posthog_on_error(error: Any, items: Any) -> None:
    """Log any PostHog delivery errors."""
    logger.error(f"PostHog error: {error}, items: {items}")


posthog: Posthog | None = None
if POSTHOG_API_KEY:
    posthog = Posthog(
        project_api_key=POSTHOG_API_KEY,
        host=POSTHOG_HOST,
        debug=POSTHOG_DEBUG_LOGS_ENABLED,
        on_error=posthog_on_error,
    )
elif MULTI_TENANT:
    logger.warning(
        "POSTHOG_API_KEY is not set but MULTI_TENANT is enabled — "
        "PostHog telemetry and feature flags will be disabled"
    )

# For cross referencing between cloud and www Onyx sites
# NOTE: These clients are separate because they are separate posthog projects.
# We should eventually unify them into a single posthog project,
# which would no longer require this workaround
marketing_posthog = None
if MARKETING_POSTHOG_API_KEY:
    marketing_posthog = Posthog(
        project_api_key=MARKETING_POSTHOG_API_KEY,
        host=POSTHOG_HOST,
        debug=POSTHOG_DEBUG_LOGS_ENABLED,
        on_error=posthog_on_error,
    )


def capture_and_sync_with_alternate_posthog(
    alternate_distinct_id: str, event: str, properties: dict[str, Any]
) -> None:
    """
    Identify in both PostHog projects and capture the event in marketing.
    - Marketing keeps the marketing distinct_id (for feature flags).
    - Cloud identify uses the cloud distinct_id
    """
    if not marketing_posthog:
        return

    props = properties.copy()

    try:
        marketing_posthog.identify(distinct_id=alternate_distinct_id, properties=props)
        marketing_posthog.capture(alternate_distinct_id, event, props)
        marketing_posthog.flush()
    except Exception as e:
        logger.error(f"Error capturing marketing posthog event: {e}")

    try:
        if posthog and (cloud_user_id := props.get("onyx_cloud_user_id")):
            cloud_props = props.copy()
            cloud_props.pop("onyx_cloud_user_id", None)

            posthog.identify(
                distinct_id=cloud_user_id,
                properties=cloud_props,
            )
    except Exception as e:
        logger.error(f"Error identifying cloud posthog user: {e}")


def alias_user(distinct_id: str, anonymous_id: str) -> None:
    """Link an anonymous distinct_id to an identified user, merging person profiles.

    No-ops when the IDs match (e.g. returning users whose PostHog cookie
    already contains their identified user ID).
    """
    if not posthog or anonymous_id == distinct_id:
        return

    try:
        posthog.alias(previous_id=anonymous_id, distinct_id=distinct_id)
        posthog.flush()
    except Exception as e:
        logger.error(f"Error aliasing PostHog user: {e}")


def get_anon_id_from_request(request: Any) -> str | None:
    """Extract the anonymous distinct_id from the app PostHog cookie on a request."""
    if not POSTHOG_API_KEY:
        return None

    cookie_name = f"ph_{POSTHOG_API_KEY}_posthog"
    if (cookie_value := request.cookies.get(cookie_name)) and (
        parsed := parse_posthog_cookie(cookie_value)
    ):
        return parsed.get("distinct_id")

    return None


def get_marketing_posthog_cookie_name() -> str | None:
    if not MARKETING_POSTHOG_API_KEY:
        return None
    return f"onyx_custom_ph_{MARKETING_POSTHOG_API_KEY}_posthog"


def parse_posthog_cookie(cookie_value: str) -> dict[str, Any] | None:
    """
    Parse a URL-encoded JSON PostHog cookie

    Expected format (URL-encoded):
    {"distinct_id":"...", "featureFlags":{"landing_page_variant":"..."}, ...}

    Returns:
        Dict with 'distinct_id' explicitly required and all other cookie values
        passed through as-is, or None if parsing fails or distinct_id is missing.
    """
    try:
        decoded_cookie = unquote(cookie_value)
        cookie_data = json.loads(decoded_cookie)

        distinct_id = cookie_data.get("distinct_id")
        if not distinct_id or not isinstance(distinct_id, str):
            return None

        return cookie_data
    except (json.JSONDecodeError, KeyError, TypeError, AttributeError) as e:
        logger.warning(f"Failed to parse cookie: {e}")
        return None


================================================
FILE: backend/ee/onyx/utils/telemetry.py
================================================
from typing import Any

from ee.onyx.utils.posthog_client import posthog
from onyx.utils.logger import setup_logger

logger = setup_logger()


def event_telemetry(
    distinct_id: str, event: str, properties: dict[str, Any] | None = None
) -> None:
    """Capture and send an event to PostHog, flushing immediately."""
    if not posthog:
        return

    logger.info(f"Capturing PostHog event: {distinct_id} {event} {properties}")
    try:
        posthog.capture(distinct_id, event, properties)
        posthog.flush()
    except Exception as e:
        logger.error(f"Error capturing PostHog event: {e}")


def identify_user(distinct_id: str, properties: dict[str, Any] | None = None) -> None:
    """Create/update a PostHog person profile, flushing immediately."""
    if not posthog:
        return

    try:
        posthog.identify(distinct_id, properties)
        posthog.flush()
    except Exception as e:
        logger.error(f"Error identifying PostHog user: {e}")


================================================
FILE: backend/generated/README.md
================================================
- Generated Files
* Generated files live here. This directory should be git ignored.

================================================
FILE: backend/keys/license_public_key.pem
================================================
-----BEGIN PUBLIC KEY-----
MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA5DpchQujdxjCwpc4/RQP
Hej6rc3SS/5ENCXL0I8NAfMogel0fqG6PKRhonyEh/Bt3P4q18y8vYzAShwf4b6Q
aS0WwshbvnkjyWlsK0BY4HLBKPkTpes7kaz8MwmPZDeelvGJ7SNv3FvyJR4QsoSQ
GSoB5iTH7hi63TjzdxtckkXoNG+GdVd/koxVDUv2uWcAoWIFTTcbKWyuq2SS/5Sf
xdVaIArqfAhLpnNbnM9OS7lZ1xP+29ZXpHxDoeluz35tJLMNBYn9u0y+puo1kW1E
TOGizlAq5kmEMsTJ55e9ZuyIV3gZAUaUKe8CxYJPkOGt0Gj6e1jHoHZCBJmaq97Y
stKj//84HNBzajaryEZuEfRecJ94ANEjkD8u9cGmW+9VxRe5544zWguP5WMT/nv1
0Q+jkOBW2hkY5SS0Rug4cblxiB7bDymWkaX6+sC0VWd5g6WXp36EuP2T0v3mYuHU
GDEiWbD44ToREPVwE/M07ny8qhLo/HYk2l8DKFt83hXe7ePBnyQdcsrVbQWOO1na
j43OkoU5gOFyOkrk2RmmtCjA8jSnw+tGCTpRaRcshqoWC1MjZyU+8/kDteXNkmv9
/B5VxzYSyX+abl7yAu5wLiUPW8l+mOazzWu0nPkmiA160ArxnRyxbGnmp4dUIrt5
azYku4tQYLSsSabfhcpeiCsCAwEAAQ==
-----END PUBLIC KEY-----


================================================
FILE: backend/model_server/__init__.py
================================================


================================================
FILE: backend/model_server/constants.py
================================================
MODEL_WARM_UP_STRING = "hi " * 512


class GPUStatus:
    CUDA = "cuda"
    MAC_MPS = "mps"
    NONE = "none"


================================================
FILE: backend/model_server/encoders.py
================================================
import asyncio
import time
from typing import Any
from typing import TYPE_CHECKING

from fastapi import APIRouter
from fastapi import HTTPException
from fastapi import Request

from model_server.utils import simple_log_function_time
from onyx.utils.logger import setup_logger
from shared_configs.enums import EmbedTextType
from shared_configs.model_server_models import Embedding
from shared_configs.model_server_models import EmbedRequest
from shared_configs.model_server_models import EmbedResponse

if TYPE_CHECKING:
    from sentence_transformers import SentenceTransformer

logger = setup_logger()

router = APIRouter(prefix="/encoder")


_GLOBAL_MODELS_DICT: dict[str, "SentenceTransformer"] = {}


def get_embedding_model(
    model_name: str,
    max_context_length: int,
) -> "SentenceTransformer":
    """
    Loads or returns a cached SentenceTransformer, sets max_seq_length, pins device,
    pre-warms rotary caches once, and wraps encode() with a lock to avoid cache races.
    """
    from sentence_transformers import SentenceTransformer

    def _prewarm_rope(st_model: "SentenceTransformer", target_len: int) -> None:
        """
        Build RoPE cos/sin caches once on the final device/dtype so later forwards only read.
        Works by calling the underlying HF model directly with dummy IDs/attention.
        """
        try:
            # ensure > max seq after tokenization
            # Ideally we would use the saved tokenizer, but whatever it's ok
            # we'll make an assumption about tokenization here
            long_text = "x " * (target_len * 2)
            _ = st_model.encode(
                [long_text],
                batch_size=1,
                convert_to_tensor=True,
                show_progress_bar=False,
                normalize_embeddings=False,
            )
            logger.info("RoPE pre-warm successful")
        except Exception as e:
            logger.warning(f"RoPE pre-warm skipped/failed: {e}")

    global _GLOBAL_MODELS_DICT

    if model_name not in _GLOBAL_MODELS_DICT:
        logger.notice(f"Loading {model_name}")
        model = SentenceTransformer(
            model_name_or_path=model_name,
            trust_remote_code=True,
        )
        model.max_seq_length = max_context_length
        _prewarm_rope(model, max_context_length)
        _GLOBAL_MODELS_DICT[model_name] = model
    else:
        model = _GLOBAL_MODELS_DICT[model_name]
        if max_context_length != model.max_seq_length:
            model.max_seq_length = max_context_length
            prev = getattr(model, "_rope_prewarmed_to", 0)
            if max_context_length > int(prev or 0):
                _prewarm_rope(model, max_context_length)

    return _GLOBAL_MODELS_DICT[model_name]


ENCODING_RETRIES = 3
ENCODING_RETRY_DELAY = 0.1


def _concurrent_embedding(
    texts: list[str], model: "SentenceTransformer", normalize_embeddings: bool
) -> Any:
    """Synchronous wrapper for concurrent_embedding to use with run_in_executor."""
    for _ in range(ENCODING_RETRIES):
        try:
            return model.encode(texts, normalize_embeddings=normalize_embeddings)
        except RuntimeError as e:
            # There is a concurrency bug in the SentenceTransformer library that causes
            # the model to fail to encode texts. It's pretty rare and we want to allow
            # concurrent embedding, hence we retry (the specific error is
            # "RuntimeError: Already borrowed" and occurs in the transformers library)
            logger.warning(f"Error encoding texts, retrying: {e}")
            time.sleep(ENCODING_RETRY_DELAY)
    return model.encode(texts, normalize_embeddings=normalize_embeddings)


@simple_log_function_time()
async def embed_text(
    texts: list[str],
    model_name: str | None,
    max_context_length: int,
    normalize_embeddings: bool,
    prefix: str | None,
    gpu_type: str = "UNKNOWN",
) -> list[Embedding]:
    if not all(texts):
        logger.error("Empty strings provided for embedding")
        raise ValueError("Empty strings are not allowed for embedding.")

    if not texts:
        logger.error("No texts provided for embedding")
        raise ValueError("No texts provided for embedding.")

    start = time.monotonic()

    total_chars = 0
    for text in texts:
        total_chars += len(text)

    # Only local models should call this function now
    # API providers should go directly to API server

    if model_name is not None:
        logger.info(
            f"Embedding {len(texts)} texts with {total_chars} total characters with local model: {model_name}"
        )

        prefixed_texts = [f"{prefix}{text}" for text in texts] if prefix else texts

        local_model = get_embedding_model(
            model_name=model_name, max_context_length=max_context_length
        )
        # Run CPU-bound embedding in a thread pool
        embeddings_vectors = await asyncio.get_event_loop().run_in_executor(
            None,
            lambda: _concurrent_embedding(
                prefixed_texts, local_model, normalize_embeddings
            ),
        )
        embeddings = [
            embedding if isinstance(embedding, list) else embedding.tolist()
            for embedding in embeddings_vectors
        ]

        elapsed = time.monotonic() - start
        logger.info(
            f"Successfully embedded {len(texts)} texts with {total_chars} total characters "
            f"with local model {model_name} in {elapsed:.2f}"
        )
        logger.info(
            f"event=embedding_model "
            f"texts={len(texts)} "
            f"chars={total_chars} "
            f"model={model_name} "
            f"gpu={gpu_type} "
            f"elapsed={elapsed:.2f}"
        )
    else:
        logger.error("Model name not specified for embedding")
        raise ValueError("Model name must be provided to run embeddings.")

    return embeddings


@router.post("/bi-encoder-embed")
async def route_bi_encoder_embed(
    request: Request,
    embed_request: EmbedRequest,
) -> EmbedResponse:
    return await process_embed_request(embed_request, request.app.state.gpu_type)


async def process_embed_request(
    embed_request: EmbedRequest, gpu_type: str = "UNKNOWN"
) -> EmbedResponse:
    from litellm.exceptions import RateLimitError

    # Only local models should use this endpoint - API providers should make direct API calls
    if embed_request.provider_type is not None:
        raise ValueError(
            f"Model server embedding endpoint should only be used for local models. "
            f"API provider '{embed_request.provider_type}' should make direct API calls instead."
        )

    if not embed_request.texts:
        raise HTTPException(status_code=400, detail="No texts to be embedded")

    if not all(embed_request.texts):
        raise ValueError("Empty strings are not allowed for embedding.")

    try:
        if embed_request.text_type == EmbedTextType.QUERY:
            prefix = embed_request.manual_query_prefix
        elif embed_request.text_type == EmbedTextType.PASSAGE:
            prefix = embed_request.manual_passage_prefix
        else:
            prefix = None

        embeddings = await embed_text(
            texts=embed_request.texts,
            model_name=embed_request.model_name,
            max_context_length=embed_request.max_context_length,
            normalize_embeddings=embed_request.normalize_embeddings,
            prefix=prefix,
            gpu_type=gpu_type,
        )
        return EmbedResponse(embeddings=embeddings)
    except RateLimitError as e:
        raise HTTPException(
            status_code=429,
            detail=str(e),
        )
    except Exception as e:
        logger.exception(
            f"Error during embedding process: provider={embed_request.provider_type} model={embed_request.model_name}"
        )
        raise HTTPException(
            status_code=500, detail=f"Error during embedding process: {e}"
        )


================================================
FILE: backend/model_server/legacy/README.md
================================================
This directory contains code that was useful and may become useful again in the future.

We stopped using rerankers because the state of the art rerankers are not significantly better than the biencoders and much worse than LLMs which are also capable of acting on a small set of documents for filtering, reranking, etc.

We stopped using the internal query classifier as that's now offloaded to the LLM which does query expansion so we know ahead of time if it's a keyword or semantic query.


================================================
FILE: backend/model_server/legacy/__init__.py
================================================


================================================
FILE: backend/model_server/legacy/custom_models.py
================================================
# from typing import cast
# from typing import Optional
# from typing import TYPE_CHECKING

# import numpy as np
# import torch
# import torch.nn.functional as F
# from fastapi import APIRouter
# from huggingface_hub import snapshot_download
# from pydantic import BaseModel

# from model_server.constants import MODEL_WARM_UP_STRING
# from model_server.legacy.onyx_torch_model import ConnectorClassifier
# from model_server.legacy.onyx_torch_model import HybridClassifier
# from model_server.utils import simple_log_function_time
# from onyx.utils.logger import setup_logger
# from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO
# from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG
# from shared_configs.configs import INDEXING_ONLY
# from shared_configs.configs import INTENT_MODEL_TAG
# from shared_configs.configs import INTENT_MODEL_VERSION
# from shared_configs.model_server_models import IntentRequest
# from shared_configs.model_server_models import IntentResponse

# if TYPE_CHECKING:
#     from setfit import SetFitModel  # type: ignore[import-untyped]
#     from transformers import PreTrainedTokenizer, BatchEncoding


# INFORMATION_CONTENT_MODEL_WARM_UP_STRING = "hi" * 50

# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX = 1.0
# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN = 0.7
# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE = 4.0
# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH = 10
# INFORMATION_CONTENT_MODEL_VERSION = "onyx-dot-app/information-content-model"
# INFORMATION_CONTENT_MODEL_TAG: str | None = None


# class ConnectorClassificationRequest(BaseModel):
#     available_connectors: list[str]
#     query: str


# class ConnectorClassificationResponse(BaseModel):
#     connectors: list[str]


# class ContentClassificationPrediction(BaseModel):
#     predicted_label: int
#     content_boost_factor: float


# logger = setup_logger()

# router = APIRouter(prefix="/custom")

# _CONNECTOR_CLASSIFIER_TOKENIZER: Optional["PreTrainedTokenizer"] = None
# _CONNECTOR_CLASSIFIER_MODEL: ConnectorClassifier | None = None

# _INTENT_TOKENIZER: Optional["PreTrainedTokenizer"] = None
# _INTENT_MODEL: HybridClassifier | None = None

# _INFORMATION_CONTENT_MODEL: Optional["SetFitModel"] = None

# _INFORMATION_CONTENT_MODEL_PROMPT_PREFIX: str = ""  # spec to model version!


# def get_connector_classifier_tokenizer() -> "PreTrainedTokenizer":
#     global _CONNECTOR_CLASSIFIER_TOKENIZER
#     from transformers import AutoTokenizer, PreTrainedTokenizer

#     if _CONNECTOR_CLASSIFIER_TOKENIZER is None:
#         # The tokenizer details are not uploaded to the HF hub since it's just the
#         # unmodified distilbert tokenizer.
#         _CONNECTOR_CLASSIFIER_TOKENIZER = cast(
#             PreTrainedTokenizer,
#             AutoTokenizer.from_pretrained("distilbert-base-uncased"),
#         )
#     return _CONNECTOR_CLASSIFIER_TOKENIZER


# def get_local_connector_classifier(
#     model_name_or_path: str = CONNECTOR_CLASSIFIER_MODEL_REPO,
#     tag: str = CONNECTOR_CLASSIFIER_MODEL_TAG,
# ) -> ConnectorClassifier:
#     global _CONNECTOR_CLASSIFIER_MODEL
#     if _CONNECTOR_CLASSIFIER_MODEL is None:
#         try:
#             # Calculate where the cache should be, then load from local if available
#             local_path = snapshot_download(
#                 repo_id=model_name_or_path, revision=tag, local_files_only=True
#             )
#             _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
#                 local_path
#             )
#         except Exception as e:
#             logger.warning(f"Failed to load model directly: {e}")
#             try:
#                 # Attempt to download the model snapshot
#                 logger.info(f"Downloading model snapshot for {model_name_or_path}")
#                 local_path = snapshot_download(repo_id=model_name_or_path, revision=tag)
#                 _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
#                     local_path
#                 )
#             except Exception as e:
#                 logger.error(
#                     f"Failed to load model even after attempted snapshot download: {e}"
#                 )
#                 raise
#     return _CONNECTOR_CLASSIFIER_MODEL


# def get_intent_model_tokenizer() -> "PreTrainedTokenizer":
#     from transformers import AutoTokenizer, PreTrainedTokenizer

#     global _INTENT_TOKENIZER
#     if _INTENT_TOKENIZER is None:
#         # The tokenizer details are not uploaded to the HF hub since it's just the
#         # unmodified distilbert tokenizer.
#         _INTENT_TOKENIZER = cast(
#             PreTrainedTokenizer,
#             AutoTokenizer.from_pretrained("distilbert-base-uncased"),
#         )
#     return _INTENT_TOKENIZER


# def get_local_intent_model(
#     model_name_or_path: str = INTENT_MODEL_VERSION,
#     tag: str | None = INTENT_MODEL_TAG,
# ) -> HybridClassifier:
#     global _INTENT_MODEL
#     if _INTENT_MODEL is None:
#         try:
#             # Calculate where the cache should be, then load from local if available
#             logger.notice(f"Loading model from local cache: {model_name_or_path}")
#             local_path = snapshot_download(
#                 repo_id=model_name_or_path, revision=tag, local_files_only=True
#             )
#             _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
#             logger.notice(f"Loaded model from local cache: {local_path}")
#         except Exception as e:
#             logger.warning(f"Failed to load model directly: {e}")
#             try:
#                 # Attempt to download the model snapshot
#                 logger.notice(f"Downloading model snapshot for {model_name_or_path}")
#                 local_path = snapshot_download(
#                     repo_id=model_name_or_path, revision=tag, local_files_only=False
#                 )
#                 _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
#             except Exception as e:
#                 logger.error(
#                     f"Failed to load model even after attempted snapshot download: {e}"
#                 )
#                 raise
#     return _INTENT_MODEL


# def get_local_information_content_model(
#     model_name_or_path: str = INFORMATION_CONTENT_MODEL_VERSION,
#     tag: str | None = INFORMATION_CONTENT_MODEL_TAG,
# ) -> "SetFitModel":
#     from setfit import SetFitModel

#     global _INFORMATION_CONTENT_MODEL
#     if _INFORMATION_CONTENT_MODEL is None:
#         try:
#             # Calculate where the cache should be, then load from local if available
#             logger.notice(
#                 f"Loading content information model from local cache: {model_name_or_path}"
#             )
#             local_path = snapshot_download(
#                 repo_id=model_name_or_path, revision=tag, local_files_only=True
#             )
#             _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
#             logger.notice(
#                 f"Loaded content information model from local cache: {local_path}"
#             )
#         except Exception as e:
#             logger.warning(f"Failed to load content information model directly: {e}")
#             try:
#                 # Attempt to download the model snapshot
#                 logger.notice(
#                     f"Downloading content information model snapshot for {model_name_or_path}"
#                 )
#                 local_path = snapshot_download(
#                     repo_id=model_name_or_path, revision=tag, local_files_only=False
#                 )
#                 _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
#             except Exception as e:
#                 logger.error(
#                     f"Failed to load content information model even after attempted snapshot download: {e}"
#                 )
#                 raise

#     return _INFORMATION_CONTENT_MODEL


# def tokenize_connector_classification_query(
#     connectors: list[str],
#     query: str,
#     tokenizer: "PreTrainedTokenizer",
#     connector_token_end_id: int,
# ) -> tuple[torch.Tensor, torch.Tensor]:
#     """
#     Tokenize the connectors & user query into one prompt for the forward pass of ConnectorClassifier models

#     The attention mask is just all 1s. The prompt is CLS + each connector name suffixed with the connector end
#     token and then the user query.
#     """

#     input_ids = torch.tensor([tokenizer.cls_token_id], dtype=torch.long)

#     for connector in connectors:
#         connector_token_ids = tokenizer(
#             connector,
#             add_special_tokens=False,
#             return_tensors="pt",
#         )

#         input_ids = torch.cat(
#             (
#                 input_ids,
#                 connector_token_ids["input_ids"].squeeze(dim=0),
#                 torch.tensor([connector_token_end_id], dtype=torch.long),
#             ),
#             dim=-1,
#         )
#     query_token_ids = tokenizer(
#         query,
#         add_special_tokens=False,
#         return_tensors="pt",
#     )

#     input_ids = torch.cat(
#         (
#             input_ids,
#             query_token_ids["input_ids"].squeeze(dim=0),
#             torch.tensor([tokenizer.sep_token_id], dtype=torch.long),
#         ),
#         dim=-1,
#     )
#     attention_mask = torch.ones(input_ids.numel(), dtype=torch.long)

#     return input_ids.unsqueeze(0), attention_mask.unsqueeze(0)


# def warm_up_connector_classifier_model() -> None:
#     logger.info(
#         f"Warming up connector_classifier model {CONNECTOR_CLASSIFIER_MODEL_TAG}"
#     )
#     connector_classifier_tokenizer = get_connector_classifier_tokenizer()
#     connector_classifier = get_local_connector_classifier()

#     input_ids, attention_mask = tokenize_connector_classification_query(
#         ["GitHub"],
#         "onyx classifier query google doc",
#         connector_classifier_tokenizer,
#         connector_classifier.connector_end_token_id,
#     )
#     input_ids = input_ids.to(connector_classifier.device)
#     attention_mask = attention_mask.to(connector_classifier.device)

#     connector_classifier(input_ids, attention_mask)


# def warm_up_intent_model() -> None:
#     logger.notice(f"Warming up Intent Model: {INTENT_MODEL_VERSION}")
#     intent_tokenizer = get_intent_model_tokenizer()
#     tokens = intent_tokenizer(
#         MODEL_WARM_UP_STRING, return_tensors="pt", truncation=True, padding=True
#     )

#     intent_model = get_local_intent_model()
#     device = intent_model.device
#     intent_model(
#         query_ids=tokens["input_ids"].to(device),
#         query_mask=tokens["attention_mask"].to(device),
#     )


# def warm_up_information_content_model() -> None:
#     logger.notice("Warming up Content Model")  # TODO: add version if needed

#     information_content_model = get_local_information_content_model()
#     information_content_model(INFORMATION_CONTENT_MODEL_WARM_UP_STRING)


# @simple_log_function_time()
# def run_inference(tokens: "BatchEncoding") -> tuple[list[float], list[float]]:
#     intent_model = get_local_intent_model()
#     device = intent_model.device

#     outputs = intent_model(
#         query_ids=tokens["input_ids"].to(device),
#         query_mask=tokens["attention_mask"].to(device),
#     )

#     token_logits = outputs["token_logits"]
#     intent_logits = outputs["intent_logits"]

#     # Move tensors to CPU before applying softmax and converting to numpy
#     intent_probabilities = F.softmax(intent_logits.cpu(), dim=-1).numpy()[0]
#     token_probabilities = F.softmax(token_logits.cpu(), dim=-1).numpy()[0]

#     # Extract the probabilities for the positive class (index 1) for each token
#     token_positive_probs = token_probabilities[:, 1].tolist()

#     return intent_probabilities.tolist(), token_positive_probs


# @simple_log_function_time()
# def run_content_classification_inference(
#     text_inputs: list[str],
# ) -> list[ContentClassificationPrediction]:
#     """
#     Assign a score to the segments in question. The model stored in get_local_information_content_model()
#     creates the 'model score' based on its training, and the scores are then converted to a 0.0-1.0 scale.
#     In the code outside of the model/inference model servers that score will be converted into the actual
#     boost factor.
#     """

#     def _prob_to_score(prob: float) -> float:
#         """
#         Conversion of base score to 0.0 - 1.0 score. Note that the min/max values depend on the model!
#         """
#         _MIN_BASE_SCORE = 0.25
#         _MAX_BASE_SCORE = 0.75
#         if prob < _MIN_BASE_SCORE:
#             raw_score = 0.0
#         elif prob < _MAX_BASE_SCORE:
#             raw_score = (prob - _MIN_BASE_SCORE) / (_MAX_BASE_SCORE - _MIN_BASE_SCORE)
#         else:
#             raw_score = 1.0
#         return (
#             INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
#             + (
#                 INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
#                 - INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
#             )
#             * raw_score
#         )

#     _BATCH_SIZE = 32
#     content_model = get_local_information_content_model()

#     # Process inputs in batches
#     all_output_classes: list[int] = []
#     all_base_output_probabilities: list[float] = []

#     for i in range(0, len(text_inputs), _BATCH_SIZE):
#         batch = text_inputs[i : i + _BATCH_SIZE]
#         batch_with_prefix = []
#         batch_indices = []

#         # Pre-allocate results for this batch
#         batch_output_classes: list[np.ndarray] = [np.array(1)] * len(batch)
#         batch_probabilities: list[np.ndarray] = [np.array(1.0)] * len(batch)

#         # Pre-process batch to handle long input exceptions
#         for j, text in enumerate(batch):
#             if len(text) == 0:
#                 # if no input, treat as non-informative from the model's perspective
#                 batch_output_classes[j] = np.array(0)
#                 batch_probabilities[j] = np.array(0.0)
#                 logger.warning("Input for Content Information Model is empty")

#             elif (
#                 len(text.split())
#                 <= INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH
#             ):
#                 # if input is short, use the model
#                 batch_with_prefix.append(
#                     _INFORMATION_CONTENT_MODEL_PROMPT_PREFIX + text
#                 )
#                 batch_indices.append(j)
#             else:
#                 # if longer than cutoff, treat as informative (stay with default), but issue warning
#                 logger.warning("Input for Content Information Model too long")

#         if batch_with_prefix:  # Only run model if we have valid inputs
#             # Get predictions for the batch
#             model_output_classes = content_model(batch_with_prefix)
#             model_output_probabilities = content_model.predict_proba(batch_with_prefix)

#             # Place results in the correct positions
#             for idx, batch_idx in enumerate(batch_indices):
#                 batch_output_classes[batch_idx] = model_output_classes[idx].numpy()
#                 batch_probabilities[batch_idx] = model_output_probabilities[idx][
#                     1
#                 ].numpy()  # x[1] is prob of the positive class

#         all_output_classes.extend([int(x) for x in batch_output_classes])
#         all_base_output_probabilities.extend([float(x) for x in batch_probabilities])

#     logits = [
#         np.log(p / (1 - p)) if p != 0.0 and p != 1.0 else (100 if p == 1.0 else -100)
#         for p in all_base_output_probabilities
#     ]
#     scaled_logits = [
#         logit / INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE
#         for logit in logits
#     ]
#     output_probabilities_with_temp = [
#         np.exp(scaled_logit) / (1 + np.exp(scaled_logit))
#         for scaled_logit in scaled_logits
#     ]

#     prediction_scores = [
#         _prob_to_score(p_temp) for p_temp in output_probabilities_with_temp
#     ]

#     content_classification_predictions = [
#         ContentClassificationPrediction(
#             predicted_label=predicted_label, content_boost_factor=output_score
#         )
#         for predicted_label, output_score in zip(all_output_classes, prediction_scores)
#     ]

#     return content_classification_predictions


# def map_keywords(
#     input_ids: torch.Tensor, tokenizer: "PreTrainedTokenizer", is_keyword: list[bool]
# ) -> list[str]:
#     tokens = tokenizer.convert_ids_to_tokens(input_ids)  # type: ignore

#     if not len(tokens) == len(is_keyword):
#         raise ValueError("Length of tokens and keyword predictions must match")

#     if input_ids[0] == tokenizer.cls_token_id:
#         tokens = tokens[1:]
#         is_keyword = is_keyword[1:]

#     if input_ids[-1] == tokenizer.sep_token_id:
#         tokens = tokens[:-1]
#         is_keyword = is_keyword[:-1]

#     unk_token = tokenizer.unk_token
#     if unk_token in tokens:
#         raise ValueError("Unknown token detected in the input")

#     keywords = []
#     current_keyword = ""

#     for ind, token in enumerate(tokens):
#         if is_keyword[ind]:
#             if token.startswith("##"):
#                 current_keyword += token[2:]
#             else:
#                 if current_keyword:
#                     keywords.append(current_keyword)
#                 current_keyword = token
#         else:
#             # If mispredicted a later token of a keyword, add it to the current keyword
#             # to complete it
#             if current_keyword:
#                 if len(current_keyword) > 2 and current_keyword.startswith("##"):
#                     current_keyword = current_keyword[2:]

#                 else:
#                     keywords.append(current_keyword)
#                     current_keyword = ""

#     if current_keyword:
#         keywords.append(current_keyword)

#     return keywords


# def clean_keywords(keywords: list[str]) -> list[str]:
#     cleaned_words = []
#     for word in keywords:
#         word = word[:-2] if word.endswith("'s") else word
#         word = word.replace("/", " ")
#         word = word.replace("'", "").replace('"', "")
#         cleaned_words.extend([w for w in word.strip().split() if w and not w.isspace()])
#     return cleaned_words


# def run_connector_classification(req: ConnectorClassificationRequest) -> list[str]:
#     tokenizer = get_connector_classifier_tokenizer()
#     model = get_local_connector_classifier()

#     connector_names = req.available_connectors

#     input_ids, attention_mask = tokenize_connector_classification_query(
#         connector_names,
#         req.query,
#         tokenizer,
#         model.connector_end_token_id,
#     )
#     input_ids = input_ids.to(model.device)
#     attention_mask = attention_mask.to(model.device)

#     global_confidence, classifier_confidence = model(input_ids, attention_mask)

#     if global_confidence.item() < 0.5:
#         return []

#     passed_connectors = []

#     for i, connector_name in enumerate(connector_names):
#         if classifier_confidence.view(-1)[i].item() > 0.5:
#             passed_connectors.append(connector_name)

#     return passed_connectors


# def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:
#     tokenizer = get_intent_model_tokenizer()
#     model_input = tokenizer(
#         intent_req.query, return_tensors="pt", truncation=False, padding=False
#     )

#     if len(model_input.input_ids[0]) > 512:
#         # If the user text is too long, assume it is semantic and keep all words
#         return True, intent_req.query.split()

#     intent_probs, token_probs = run_inference(model_input)

#     is_keyword_sequence = intent_probs[0] >= intent_req.keyword_percent_threshold

#     keyword_preds = [
#         token_prob >= intent_req.keyword_percent_threshold for token_prob in token_probs
#     ]

#     try:
#         keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)
#     except Exception as e:
#         logger.warning(
#             f"Failed to extract keywords for query: {intent_req.query} due to {e}"
#         )
#         # Fallback to keeping all words
#         keywords = intent_req.query.split()

#     cleaned_keywords = clean_keywords(keywords)

#     return is_keyword_sequence, cleaned_keywords


# @router.post("/connector-classification")
# async def process_connector_classification_request(
#     classification_request: ConnectorClassificationRequest,
# ) -> ConnectorClassificationResponse:
#     if INDEXING_ONLY:
#         raise RuntimeError(
#             "Indexing model server should not call connector classification endpoint"
#         )

#     if len(classification_request.available_connectors) == 0:
#         return ConnectorClassificationResponse(connectors=[])

#     connectors = run_connector_classification(classification_request)
#     return ConnectorClassificationResponse(connectors=connectors)


# @router.post("/query-analysis")
# async def process_analysis_request(
#     intent_request: IntentRequest,
# ) -> IntentResponse:
#     if INDEXING_ONLY:
#         raise RuntimeError("Indexing model server should not call intent endpoint")

#     is_keyword, keywords = run_analysis(intent_request)
#     return IntentResponse(is_keyword=is_keyword, keywords=keywords)


# @router.post("/content-classification")
# async def process_content_classification_request(
#     content_classification_requests: list[str],
# ) -> list[ContentClassificationPrediction]:
#     return run_content_classification_inference(content_classification_requests)


================================================
FILE: backend/model_server/legacy/onyx_torch_model.py
================================================
# import json
# import os
# from typing import cast
# from typing import TYPE_CHECKING

# import torch
# import torch.nn as nn


# if TYPE_CHECKING:
#     from transformers import DistilBertConfig


# class HybridClassifier(nn.Module):
#     def __init__(self) -> None:
#         from transformers import DistilBertConfig, DistilBertModel

#         super().__init__()
#         config = DistilBertConfig()
#         self.distilbert = DistilBertModel(config)
#         config = self.distilbert.config  # type: ignore

#         # Keyword tokenwise binary classification layer
#         self.keyword_classifier = nn.Linear(config.dim, 2)

#         # Intent Classifier layers
#         self.pre_classifier = nn.Linear(config.dim, config.dim)
#         self.intent_classifier = nn.Linear(config.dim, 2)

#         self.device = torch.device("cpu")

#     def forward(
#         self,
#         query_ids: torch.Tensor,
#         query_mask: torch.Tensor,
#     ) -> dict[str, torch.Tensor]:
#         outputs = self.distilbert(input_ids=query_ids, attention_mask=query_mask)
#         sequence_output = outputs.last_hidden_state

#         # Intent classification on the CLS token
#         cls_token_state = sequence_output[:, 0, :]
#         pre_classifier_out = self.pre_classifier(cls_token_state)
#         intent_logits = self.intent_classifier(pre_classifier_out)

#         # Keyword classification on all tokens
#         token_logits = self.keyword_classifier(sequence_output)

#         return {"intent_logits": intent_logits, "token_logits": token_logits}

#     @classmethod
#     def from_pretrained(cls, load_directory: str) -> "HybridClassifier":
#         model_path = os.path.join(load_directory, "pytorch_model.bin")
#         config_path = os.path.join(load_directory, "config.json")

#         with open(config_path, "r") as f:
#             config = json.load(f)
#         model = cls(**config)

#         if torch.backends.mps.is_available():
#             # Apple silicon GPU
#             device = torch.device("mps")
#         elif torch.cuda.is_available():
#             device = torch.device("cuda")
#         else:
#             device = torch.device("cpu")

#         model.load_state_dict(torch.load(model_path, map_location=device))
#         model = model.to(device)

#         model.device = device

#         model.eval()
#         # Eval doesn't set requires_grad to False, do it manually to save memory and have faster inference
#         for param in model.parameters():
#             param.requires_grad = False

#         return model


# class ConnectorClassifier(nn.Module):
#     def __init__(self, config: "DistilBertConfig") -> None:
#         from transformers import DistilBertTokenizer, DistilBertModel

#         super().__init__()

#         self.config = config
#         self.distilbert = DistilBertModel(config)
#         config = self.distilbert.config  # type: ignore
#         self.connector_global_classifier = nn.Linear(config.dim, 1)
#         self.connector_match_classifier = nn.Linear(config.dim, 1)
#         self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

#         # Token indicating end of connector name, and on which classifier is used
#         self.connector_end_token_id = self.tokenizer.get_vocab()[
#             self.config.connector_end_token
#         ]

#         self.device = torch.device("cpu")

#     def forward(
#         self,
#         input_ids: torch.Tensor,
#         attention_mask: torch.Tensor,
#     ) -> tuple[torch.Tensor, torch.Tensor]:
#         hidden_states = self.distilbert(
#             input_ids=input_ids, attention_mask=attention_mask
#         ).last_hidden_state

#         cls_hidden_states = hidden_states[
#             :, 0, :
#         ]  # Take leap of faith that first token is always [CLS]
#         global_logits = self.connector_global_classifier(cls_hidden_states).view(-1)
#         global_confidence = torch.sigmoid(global_logits).view(-1)

#         connector_end_position_ids = input_ids == self.connector_end_token_id
#         connector_end_hidden_states = hidden_states[connector_end_position_ids]
#         classifier_output = self.connector_match_classifier(connector_end_hidden_states)
#         classifier_confidence = torch.nn.functional.sigmoid(classifier_output).view(-1)

#         return global_confidence, classifier_confidence

#     @classmethod
#     def from_pretrained(cls, repo_dir: str) -> "ConnectorClassifier":
#         from transformers import DistilBertConfig

#         config = cast(
#             DistilBertConfig,
#             DistilBertConfig.from_pretrained(os.path.join(repo_dir, "config.json")),
#         )
#         device = (
#             torch.device("cuda")
#             if torch.cuda.is_available()
#             else (
#                 torch.device("mps")
#                 if torch.backends.mps.is_available()
#                 else torch.device("cpu")
#             )
#         )
#         state_dict = torch.load(
#             os.path.join(repo_dir, "pytorch_model.pt"),
#             map_location=device,
#             weights_only=True,
#         )

#         model = cls(config)
#         model.load_state_dict(state_dict)
#         model.to(device)
#         model.device = device
#         model.eval()

#         for param in model.parameters():
#             param.requires_grad = False

#         return model


================================================
FILE: backend/model_server/legacy/reranker.py
================================================
# import asyncio
# from typing import Optional
# from typing import TYPE_CHECKING

# from fastapi import APIRouter
# from fastapi import HTTPException

# from model_server.utils import simple_log_function_time
# from onyx.utils.logger import setup_logger
# from shared_configs.configs import INDEXING_ONLY
# from shared_configs.model_server_models import RerankRequest
# from shared_configs.model_server_models import RerankResponse

# if TYPE_CHECKING:
#     from sentence_transformers import CrossEncoder

# logger = setup_logger()

# router = APIRouter(prefix="/encoder")

# _RERANK_MODEL: Optional["CrossEncoder"] = None


# def get_local_reranking_model(
#     model_name: str,
# ) -> "CrossEncoder":
#     global _RERANK_MODEL
#     from sentence_transformers import CrossEncoder

#     if _RERANK_MODEL is None:
#         logger.notice(f"Loading {model_name}")
#         model = CrossEncoder(model_name)
#         _RERANK_MODEL = model
#     return _RERANK_MODEL


# @simple_log_function_time()
# async def local_rerank(query: str, docs: list[str], model_name: str) -> list[float]:
#     cross_encoder = get_local_reranking_model(model_name)
#     # Run CPU-bound reranking in a thread pool
#     return await asyncio.get_event_loop().run_in_executor(
#         None,
#         lambda: cross_encoder.predict([(query, doc) for doc in docs]).tolist(),
#     )


# @router.post("/cross-encoder-scores")
# async def process_rerank_request(rerank_request: RerankRequest) -> RerankResponse:
#     """Cross encoders can be purely black box from the app perspective"""
#     # Only local models should use this endpoint - API providers should make direct API calls
#     if rerank_request.provider_type is not None:
#         raise ValueError(
#             f"Model server reranking endpoint should only be used for local models. "
#             f"API provider '{rerank_request.provider_type}' should make direct API calls instead."
#         )

#     if INDEXING_ONLY:
#         raise RuntimeError("Indexing model server should not call reranking endpoint")

#     if not rerank_request.documents or not rerank_request.query:
#         raise HTTPException(
#             status_code=400, detail="Missing documents or query for reranking"
#         )
#     if not all(rerank_request.documents):
#         raise ValueError("Empty documents cannot be reranked.")

#     try:
#         # At this point, provider_type is None, so handle local reranking
#         sim_scores = await local_rerank(
#             query=rerank_request.query,
#             docs=rerank_request.documents,
#             model_name=rerank_request.model_name,
#         )
#         return RerankResponse(scores=sim_scores)

#     except Exception as e:
#         logger.exception(f"Error during reranking process:\n{str(e)}")
#         raise HTTPException(
#             status_code=500, detail="Failed to run Cross-Encoder reranking"
#         )


================================================
FILE: backend/model_server/main.py
================================================
import logging
import os
import shutil
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from pathlib import Path

import sentry_sdk
import torch
import uvicorn
from fastapi import FastAPI
from prometheus_fastapi_instrumentator import Instrumentator
from sentry_sdk.integrations.fastapi import FastApiIntegration
from sentry_sdk.integrations.starlette import StarletteIntegration
from transformers import logging as transformer_logging

from model_server.encoders import router as encoders_router
from model_server.management_endpoints import router as management_router
from model_server.utils import get_gpu_type
from onyx import __version__
from onyx.utils.logger import setup_logger
from onyx.utils.logger import setup_uvicorn_logger
from onyx.utils.middleware import add_onyx_request_id_middleware
from onyx.utils.middleware import add_onyx_tenant_id_middleware
from shared_configs.configs import INDEXING_ONLY
from shared_configs.configs import MIN_THREADS_ML_MODELS
from shared_configs.configs import MODEL_SERVER_ALLOWED_HOST
from shared_configs.configs import MODEL_SERVER_PORT
from shared_configs.configs import SENTRY_DSN

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"

HF_CACHE_PATH = Path(".cache/huggingface")
TEMP_HF_CACHE_PATH = Path(".cache/temp_huggingface")

transformer_logging.set_verbosity_error()

logger = setup_logger()

file_handlers = [
    h for h in logger.logger.handlers if isinstance(h, logging.FileHandler)
]

setup_uvicorn_logger(shared_file_handlers=file_handlers)


def _move_files_recursively(source: Path, dest: Path, overwrite: bool = False) -> None:
    """
    This moves the files from the temp huggingface cache to the huggingface cache

    We have to move each file individually because the directories might
    have the same name but not the same contents and we dont want to remove
    the files in the existing huggingface cache that don't exist in the temp
    huggingface cache.
    """

    for item in source.iterdir():
        target_path = dest / item.relative_to(source)
        if item.is_dir():
            _move_files_recursively(item, target_path, overwrite)
        else:
            target_path.parent.mkdir(parents=True, exist_ok=True)
            if target_path.exists() and not overwrite:
                continue
            shutil.move(str(item), str(target_path))


@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator:
    gpu_type = get_gpu_type()
    logger.notice(f"Torch GPU Detection: gpu_type={gpu_type}")

    app.state.gpu_type = gpu_type

    try:
        if TEMP_HF_CACHE_PATH.is_dir():
            logger.notice("Moving contents of temp_huggingface to huggingface cache.")
            _move_files_recursively(TEMP_HF_CACHE_PATH, HF_CACHE_PATH)
            shutil.rmtree(TEMP_HF_CACHE_PATH, ignore_errors=True)
            logger.notice("Moved contents of temp_huggingface to huggingface cache.")
    except Exception as e:
        logger.warning(
            f"Error moving contents of temp_huggingface to huggingface cache: {e}. "
            "This is not a critical error and the model server will continue to run."
        )

    torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads()))
    logger.notice(f"Torch Threads: {torch.get_num_threads()}")

    yield


def get_model_app() -> FastAPI:
    application = FastAPI(
        title="Onyx Model Server", version=__version__, lifespan=lifespan
    )
    if SENTRY_DSN:
        sentry_sdk.init(
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
        logger.debug("Sentry DSN not provided, skipping Sentry initialization")

    application.include_router(management_router)
    application.include_router(encoders_router)

    request_id_prefix = "INF"
    if INDEXING_ONLY:
        request_id_prefix = "IDX"

    add_onyx_tenant_id_middleware(application, logger)
    add_onyx_request_id_middleware(application, request_id_prefix, logger)

    # Initialize and instrument the app
    Instrumentator().instrument(application).expose(application)

    return application


app = get_model_app()


if __name__ == "__main__":
    logger.notice(
        f"Starting Onyx Model Server on http://{MODEL_SERVER_ALLOWED_HOST}:{str(MODEL_SERVER_PORT)}/"
    )
    logger.notice(f"Model Server Version: {__version__}")
    uvicorn.run(app, host=MODEL_SERVER_ALLOWED_HOST, port=MODEL_SERVER_PORT)


================================================
FILE: backend/model_server/management_endpoints.py
================================================
from fastapi import APIRouter
from fastapi import Response

from model_server.constants import GPUStatus
from model_server.utils import get_gpu_type

router = APIRouter(prefix="/api")


@router.get("/health")
async def healthcheck() -> Response:
    return Response(status_code=200)


@router.get("/gpu-status")
async def route_gpu_status() -> dict[str, bool | str]:
    gpu_type = get_gpu_type()
    gpu_available = gpu_type != GPUStatus.NONE
    return {"gpu_available": gpu_available, "type": gpu_type}


================================================
FILE: backend/model_server/utils.py
================================================
import asyncio
import time
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterator
from functools import wraps
from typing import Any
from typing import cast
from typing import TypeVar

import torch

from model_server.constants import GPUStatus
from onyx.utils.logger import setup_logger

logger = setup_logger()

F = TypeVar("F", bound=Callable)
FG = TypeVar("FG", bound=Callable[..., Generator | Iterator])


def simple_log_function_time(
    func_name: str | None = None,
    debug_only: bool = False,
    include_args: bool = False,
) -> Callable[[F], F]:
    def decorator(func: F) -> F:
        if asyncio.iscoroutinefunction(func):

            @wraps(func)
            async def wrapped_async_func(*args: Any, **kwargs: Any) -> Any:
                start_time = time.time()
                result = await func(*args, **kwargs)
                elapsed_time_str = str(time.time() - start_time)
                log_name = func_name or func.__name__
                args_str = f" args={args} kwargs={kwargs}" if include_args else ""
                final_log = f"{log_name}{args_str} took {elapsed_time_str} seconds"
                if debug_only:
                    logger.debug(final_log)
                else:
                    logger.notice(final_log)
                return result

            return cast(F, wrapped_async_func)
        else:

            @wraps(func)
            def wrapped_sync_func(*args: Any, **kwargs: Any) -> Any:
                start_time = time.time()
                result = func(*args, **kwargs)
                elapsed_time_str = str(time.time() - start_time)
                log_name = func_name or func.__name__
                args_str = f" args={args} kwargs={kwargs}" if include_args else ""
                final_log = f"{log_name}{args_str} took {elapsed_time_str} seconds"
                if debug_only:
                    logger.debug(final_log)
                else:
                    logger.notice(final_log)
                return result

            return cast(F, wrapped_sync_func)

    return decorator


def get_gpu_type() -> str:
    if torch.cuda.is_available():
        return GPUStatus.CUDA
    if torch.backends.mps.is_available():
        return GPUStatus.MAC_MPS

    return GPUStatus.NONE


================================================
FILE: backend/onyx/__init__.py
================================================
import os

__version__ = os.environ.get("ONYX_VERSION", "") or "Development"


================================================
FILE: backend/onyx/access/__init__.py
================================================


================================================
FILE: backend/onyx/access/access.py
================================================
from collections.abc import Callable
from typing import cast

from sqlalchemy.orm import Session

from onyx.access.models import DocumentAccess
from onyx.access.utils import prefix_user_email
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.db.document import get_access_info_for_document
from onyx.db.document import get_access_info_for_documents
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.user_file import fetch_user_files_with_access_relationships
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from onyx.utils.variable_functionality import fetch_versioned_implementation


def _get_access_for_document(
    document_id: str,
    db_session: Session,
) -> DocumentAccess:
    info = get_access_info_for_document(
        db_session=db_session,
        document_id=document_id,
    )

    doc_access = DocumentAccess.build(
        user_emails=info[1] if info and info[1] else [],
        user_groups=[],
        external_user_emails=[],
        external_user_group_ids=[],
        is_public=info[2] if info else False,
    )

    return doc_access


def get_access_for_document(
    document_id: str,
    db_session: Session,
) -> DocumentAccess:
    versioned_get_access_for_document_fn = fetch_versioned_implementation(
        "onyx.access.access", "_get_access_for_document"
    )
    return versioned_get_access_for_document_fn(document_id, db_session)


def get_null_document_access() -> DocumentAccess:
    return DocumentAccess.build(
        user_emails=[],
        user_groups=[],
        is_public=False,
        external_user_emails=[],
        external_user_group_ids=[],
    )


def _get_access_for_documents(
    document_ids: list[str],
    db_session: Session,
) -> dict[str, DocumentAccess]:
    document_access_info = get_access_info_for_documents(
        db_session=db_session,
        document_ids=document_ids,
    )
    doc_access = {}
    for document_id, user_emails, is_public in document_access_info:
        doc_access[document_id] = DocumentAccess.build(
            user_emails=[email for email in user_emails if email],
            # MIT version will wipe all groups and external groups on update
            user_groups=[],
            is_public=is_public,
            external_user_emails=[],
            external_user_group_ids=[],
        )

    # Sometimes the document has not been indexed by the indexing job yet, in those cases
    # the document does not exist and so we use least permissive. Specifically the EE version
    # checks the MIT version permissions and creates a superset. This ensures that this flow
    # does not fail even if the Document has not yet been indexed.
    for doc_id in document_ids:
        if doc_id not in doc_access:
            doc_access[doc_id] = get_null_document_access()
    return doc_access


def get_access_for_documents(
    document_ids: list[str],
    db_session: Session,
) -> dict[str, DocumentAccess]:
    """Fetches all access information for the given documents."""
    versioned_get_access_for_documents_fn = fetch_versioned_implementation(
        "onyx.access.access", "_get_access_for_documents"
    )
    return versioned_get_access_for_documents_fn(document_ids, db_session)


def _get_acl_for_user(
    user: User, db_session: Session  # noqa: ARG001
) -> set[str]:  # noqa: ARG001
    """Returns a list of ACL entries that the user has access to. This is meant to be
    used downstream to filter out documents that the user does not have access to. The
    user should have access to a document if at least one entry in the document's ACL
    matches one entry in the returned set.

    Anonymous users only have access to public documents.
    """
    if user.is_anonymous:
        return {PUBLIC_DOC_PAT}
    return {prefix_user_email(user.email), PUBLIC_DOC_PAT}


def get_acl_for_user(user: User, db_session: Session | None = None) -> set[str]:
    versioned_acl_for_user_fn = fetch_versioned_implementation(
        "onyx.access.access", "_get_acl_for_user"
    )
    return versioned_acl_for_user_fn(user, db_session)


def source_should_fetch_permissions_during_indexing(source: DocumentSource) -> bool:
    _source_should_fetch_permissions_during_indexing_func = cast(
        Callable[[DocumentSource], bool],
        fetch_ee_implementation_or_noop(
            "onyx.external_permissions.sync_params",
            "source_should_fetch_permissions_during_indexing",
            False,
        ),
    )
    return _source_should_fetch_permissions_during_indexing_func(source)


def get_access_for_user_files(
    user_file_ids: list[str],
    db_session: Session,
) -> dict[str, DocumentAccess]:
    versioned_fn = fetch_versioned_implementation(
        "onyx.access.access", "get_access_for_user_files_impl"
    )
    return versioned_fn(user_file_ids, db_session)


def get_access_for_user_files_impl(
    user_file_ids: list[str],
    db_session: Session,
) -> dict[str, DocumentAccess]:
    user_files = fetch_user_files_with_access_relationships(user_file_ids, db_session)
    return build_access_for_user_files_impl(user_files)


def build_access_for_user_files(
    user_files: list[UserFile],
) -> dict[str, DocumentAccess]:
    """Compute access from pre-loaded UserFile objects (with relationships).
    Callers must ensure UserFile.user, Persona.users, and Persona.user are
    eagerly loaded (and Persona.groups for the EE path)."""
    versioned_fn = fetch_versioned_implementation(
        "onyx.access.access", "build_access_for_user_files_impl"
    )
    return versioned_fn(user_files)


def build_access_for_user_files_impl(
    user_files: list[UserFile],
) -> dict[str, DocumentAccess]:
    result: dict[str, DocumentAccess] = {}
    for user_file in user_files:
        emails, is_public = collect_user_file_access(user_file)
        result[str(user_file.id)] = DocumentAccess.build(
            user_emails=list(emails),
            user_groups=[],
            is_public=is_public,
            external_user_emails=[],
            external_user_group_ids=[],
        )
    return result


def collect_user_file_access(user_file: UserFile) -> tuple[set[str], bool]:
    """Collect all user emails that should have access to this user file.
    Includes the owner plus any users who have access via shared personas.
    Returns (emails, is_public)."""
    emails: set[str] = {user_file.user.email}
    is_public = False
    for persona in user_file.assistants:
        if persona.deleted:
            continue
        if persona.is_public:
            is_public = True
        if persona.user_id is not None and persona.user:
            emails.add(persona.user.email)
        for shared_user in persona.users:
            emails.add(shared_user.email)
    return emails, is_public


================================================
FILE: backend/onyx/access/hierarchy_access.py
================================================
from sqlalchemy.orm import Session

from onyx.db.models import User
from onyx.utils.variable_functionality import fetch_versioned_implementation


def _get_user_external_group_ids(
    db_session: Session,  # noqa: ARG001
    user: User,  # noqa: ARG001
) -> list[str]:
    return []


def get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
    versioned_get_user_external_group_ids = fetch_versioned_implementation(
        "onyx.access.hierarchy_access", "_get_user_external_group_ids"
    )
    return versioned_get_user_external_group_ids(db_session, user)


================================================
FILE: backend/onyx/access/models.py
================================================
from dataclasses import dataclass

from onyx.access.utils import prefix_external_group
from onyx.access.utils import prefix_user_email
from onyx.access.utils import prefix_user_group
from onyx.configs.constants import PUBLIC_DOC_PAT


@dataclass(frozen=True)
class ExternalAccess:
    # arbitrary limit to prevent excessively large permissions sets
    # not internally enforced ... the caller can check this before using the instance
    MAX_NUM_ENTRIES = 5000

    # Emails of external users with access to the doc externally
    external_user_emails: set[str]
    # Names or external IDs of groups with access to the doc
    external_user_group_ids: set[str]
    # Whether the document is public in the external system or Onyx
    is_public: bool

    def __str__(self) -> str:
        """Prevent extremely long logs"""

        def truncate_set(s: set[str], max_len: int = 100) -> str:
            s_str = str(s)
            if len(s_str) > max_len:
                return f"{s_str[:max_len]}... ({len(s)} items)"
            return s_str

        return (
            f"ExternalAccess("
            f"external_user_emails={truncate_set(self.external_user_emails)}, "
            f"external_user_group_ids={truncate_set(self.external_user_group_ids)}, "
            f"is_public={self.is_public})"
        )

    @property
    def num_entries(self) -> int:
        return len(self.external_user_emails) + len(self.external_user_group_ids)

    @classmethod
    def public(cls) -> "ExternalAccess":
        return cls(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=True,
        )

    @classmethod
    def empty(cls) -> "ExternalAccess":
        """
        A helper function that returns an *empty* set of external user-emails and group-ids, and sets `is_public` to `False`.
        This effectively makes the document in question "private" or inaccessible to anyone else.

        This is especially helpful to use when you are performing permission-syncing, and some document's permissions aren't able
        to be determined (for whatever reason). Setting its `ExternalAccess` to "private" is a feasible fallback.
        """

        return cls(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=False,
        )


@dataclass(frozen=True)
class DocExternalAccess:
    """
    This is just a class to wrap the external access and the document ID
    together. It's used for syncing document permissions to Vespa.
    """

    external_access: ExternalAccess
    # The document ID
    doc_id: str

    def to_dict(self) -> dict:
        return {
            "external_access": {
                "external_user_emails": list(self.external_access.external_user_emails),
                "external_user_group_ids": list(
                    self.external_access.external_user_group_ids
                ),
                "is_public": self.external_access.is_public,
            },
            "doc_id": self.doc_id,
        }

    @classmethod
    def from_dict(cls, data: dict) -> "DocExternalAccess":
        external_access = ExternalAccess(
            external_user_emails=set(
                data["external_access"].get("external_user_emails", [])
            ),
            external_user_group_ids=set(
                data["external_access"].get("external_user_group_ids", [])
            ),
            is_public=data["external_access"]["is_public"],
        )
        return cls(
            external_access=external_access,
            doc_id=data["doc_id"],
        )


@dataclass(frozen=True)
class NodeExternalAccess:
    """
    Wraps external access with a hierarchy node's raw ID.
    Used for syncing hierarchy node permissions (e.g., folder permissions).
    """

    external_access: ExternalAccess
    # The raw node ID from the source system (e.g., Google Drive folder ID)
    raw_node_id: str
    # The source type (e.g., "google_drive")
    source: str

    def to_dict(self) -> dict:
        return {
            "external_access": {
                "external_user_emails": list(self.external_access.external_user_emails),
                "external_user_group_ids": list(
                    self.external_access.external_user_group_ids
                ),
                "is_public": self.external_access.is_public,
            },
            "raw_node_id": self.raw_node_id,
            "source": self.source,
        }

    @classmethod
    def from_dict(cls, data: dict) -> "NodeExternalAccess":
        external_access = ExternalAccess(
            external_user_emails=set(
                data["external_access"].get("external_user_emails", [])
            ),
            external_user_group_ids=set(
                data["external_access"].get("external_user_group_ids", [])
            ),
            is_public=data["external_access"]["is_public"],
        )
        return cls(
            external_access=external_access,
            raw_node_id=data["raw_node_id"],
            source=data["source"],
        )


# Union type for elements that can have permissions synced
ElementExternalAccess = DocExternalAccess | NodeExternalAccess


# TODO(andrei): First refactor this into a pydantic model, then get rid of
# duplicate fields.
@dataclass(frozen=True, init=False)
class DocumentAccess(ExternalAccess):
    # User emails for Onyx users, None indicates admin
    user_emails: set[str | None]

    # Names of user groups associated with this document
    user_groups: set[str]

    external_user_emails: set[str]
    external_user_group_ids: set[str]
    is_public: bool

    def __init__(self) -> None:
        raise TypeError(
            "Use `DocumentAccess.build(...)` instead of creating an instance directly."
        )

    def to_acl(self) -> set[str]:
        """Converts the access state to a set of formatted ACL strings.

        NOTE: When querying for documents, the supplied ACL filter strings must
        be formatted in the same way as this function.
        """
        acl_set: set[str] = set()
        for user_email in self.user_emails:
            if user_email:
                acl_set.add(prefix_user_email(user_email))

        for group_name in self.user_groups:
            acl_set.add(prefix_user_group(group_name))

        for external_user_email in self.external_user_emails:
            acl_set.add(prefix_user_email(external_user_email))

        for external_group_id in self.external_user_group_ids:
            acl_set.add(prefix_external_group(external_group_id))

        if self.is_public:
            acl_set.add(PUBLIC_DOC_PAT)

        return acl_set

    @classmethod
    def build(
        cls,
        user_emails: list[str | None],
        user_groups: list[str],
        external_user_emails: list[str],
        external_user_group_ids: list[str],
        is_public: bool,
    ) -> "DocumentAccess":
        """Don't prefix incoming data wth acl type, prefix on read from to_acl!"""

        obj = object.__new__(cls)
        object.__setattr__(
            obj, "user_emails", {user_email for user_email in user_emails if user_email}
        )
        object.__setattr__(obj, "user_groups", set(user_groups))
        object.__setattr__(
            obj,
            "external_user_emails",
            {external_email for external_email in external_user_emails},
        )
        object.__setattr__(
            obj,
            "external_user_group_ids",
            {external_group_id for external_group_id in external_user_group_ids},
        )
        object.__setattr__(obj, "is_public", is_public)

        return obj


default_public_access = DocumentAccess.build(
    external_user_emails=[],
    external_user_group_ids=[],
    user_emails=[],
    user_groups=[],
    is_public=True,
)


================================================
FILE: backend/onyx/access/utils.py
================================================
from onyx.configs.constants import DocumentSource


def prefix_user_email(user_email: str) -> str:
    """Prefixes a user email to eliminate collision with group names.
    This applies to both a Onyx user and an External user, this is to make the query time
    more efficient"""
    return f"user_email:{user_email}"


def prefix_user_group(user_group_name: str) -> str:
    """Prefixes a user group name to eliminate collision with user emails.
    This assumes that user ids are prefixed with a different prefix."""
    return f"group:{user_group_name}"


def prefix_external_group(ext_group_name: str) -> str:
    """Prefixes an external group name to eliminate collision with user emails / Onyx groups."""
    return f"external_group:{ext_group_name}"


def build_ext_group_name_for_onyx(ext_group_name: str, source: DocumentSource) -> str:
    """
    External groups may collide across sources, every source needs its own prefix.
    NOTE: the name is lowercased to handle case sensitivity for group names
    """
    return f"{source.value}_{ext_group_name}".lower()


================================================
FILE: backend/onyx/auth/__init__.py
================================================


================================================
FILE: backend/onyx/auth/anonymous_user.py
================================================
from collections.abc import Mapping
from typing import Any
from typing import cast

from onyx.auth.schemas import UserRole
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.configs.constants import ANONYMOUS_USER_INFO_ID
from onyx.configs.constants import KV_ANONYMOUS_USER_PERSONALIZATION_KEY
from onyx.configs.constants import KV_ANONYMOUS_USER_PREFERENCES_KEY
from onyx.key_value_store.store import KeyValueStore
from onyx.key_value_store.store import KvKeyNotFoundError
from onyx.server.manage.models import UserInfo
from onyx.server.manage.models import UserPersonalization
from onyx.server.manage.models import UserPreferences


def set_anonymous_user_preferences(
    store: KeyValueStore, preferences: UserPreferences
) -> None:
    store.store(KV_ANONYMOUS_USER_PREFERENCES_KEY, preferences.model_dump())


def set_anonymous_user_personalization(
    store: KeyValueStore, personalization: UserPersonalization
) -> None:
    store.store(KV_ANONYMOUS_USER_PERSONALIZATION_KEY, personalization.model_dump())


def load_anonymous_user_preferences(store: KeyValueStore) -> UserPreferences:
    try:
        preferences_data = cast(
            Mapping[str, Any], store.load(KV_ANONYMOUS_USER_PREFERENCES_KEY)
        )
        return UserPreferences(**preferences_data)
    except KvKeyNotFoundError:
        return UserPreferences(
            chosen_assistants=None, default_model=None, auto_scroll=True
        )


def fetch_anonymous_user_info(store: KeyValueStore) -> UserInfo:
    """Fetch a UserInfo object for anonymous users (used for API responses)."""
    personalization = UserPersonalization()
    try:
        personalization_data = cast(
            Mapping[str, Any], store.load(KV_ANONYMOUS_USER_PERSONALIZATION_KEY)
        )
        personalization = UserPersonalization(**personalization_data)
    except KvKeyNotFoundError:
        pass

    return UserInfo(
        id=ANONYMOUS_USER_INFO_ID,
        email=ANONYMOUS_USER_EMAIL,
        is_active=True,
        is_superuser=False,
        is_verified=True,
        role=UserRole.LIMITED,
        preferences=load_anonymous_user_preferences(store),
        personalization=personalization,
        is_anonymous_user=True,
        password_configured=False,
    )


================================================
FILE: backend/onyx/auth/api_key.py
================================================
import hashlib
import secrets
import uuid
from urllib.parse import quote

from fastapi import Request
from passlib.hash import sha256_crypt
from pydantic import BaseModel

from onyx.auth.constants import API_KEY_LENGTH
from onyx.auth.constants import API_KEY_PREFIX
from onyx.auth.constants import DEPRECATED_API_KEY_PREFIX
from onyx.auth.schemas import UserRole
from onyx.auth.utils import get_hashed_bearer_token_from_request
from onyx.configs.app_configs import API_KEY_HASH_ROUNDS
from shared_configs.configs import MULTI_TENANT


class ApiKeyDescriptor(BaseModel):
    api_key_id: int
    api_key_display: str
    api_key: str | None = None  # only present on initial creation
    api_key_name: str | None = None
    api_key_role: UserRole

    user_id: uuid.UUID


def generate_api_key(tenant_id: str | None = None) -> str:
    if not MULTI_TENANT or not tenant_id:
        return API_KEY_PREFIX + secrets.token_urlsafe(API_KEY_LENGTH)

    encoded_tenant = quote(tenant_id)  # URL encode the tenant ID
    return f"{API_KEY_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(API_KEY_LENGTH)}"


def _deprecated_hash_api_key(api_key: str) -> str:
    return sha256_crypt.hash(api_key, salt="", rounds=API_KEY_HASH_ROUNDS)


def hash_api_key(api_key: str) -> str:
    # NOTE: no salt is needed, as the API key is randomly generated
    # and overlaps are impossible
    if api_key.startswith(API_KEY_PREFIX):
        return hashlib.sha256(api_key.encode("utf-8")).hexdigest()

    if api_key.startswith(DEPRECATED_API_KEY_PREFIX):
        return _deprecated_hash_api_key(api_key)

    raise ValueError(f"Invalid API key prefix: {api_key[:3]}")


def build_displayable_api_key(api_key: str) -> str:
    if api_key.startswith(API_KEY_PREFIX):
        api_key = api_key[len(API_KEY_PREFIX) :]

    return API_KEY_PREFIX + api_key[:4] + "********" + api_key[-4:]


def get_hashed_api_key_from_request(request: Request) -> str | None:
    """Extract and hash API key from Authorization header.

    Accepts both "Bearer <key>" and raw key formats.
    """
    return get_hashed_bearer_token_from_request(
        request,
        valid_prefixes=[API_KEY_PREFIX, DEPRECATED_API_KEY_PREFIX],
        hash_fn=hash_api_key,
        allow_non_bearer=True,  # API keys historically support both formats
    )


================================================
FILE: backend/onyx/auth/captcha.py
================================================
"""Captcha verification for user registration."""

import httpx
from pydantic import BaseModel
from pydantic import Field

from onyx.configs.app_configs import CAPTCHA_ENABLED
from onyx.configs.app_configs import RECAPTCHA_SCORE_THRESHOLD
from onyx.configs.app_configs import RECAPTCHA_SECRET_KEY
from onyx.utils.logger import setup_logger

logger = setup_logger()

RECAPTCHA_VERIFY_URL = "https://www.google.com/recaptcha/api/siteverify"


class CaptchaVerificationError(Exception):
    """Raised when captcha verification fails."""


class RecaptchaResponse(BaseModel):
    """Response from Google reCAPTCHA verification API."""

    success: bool
    score: float | None = None  # Only present for reCAPTCHA v3
    action: str | None = None
    challenge_ts: str | None = None
    hostname: str | None = None
    error_codes: list[str] | None = Field(default=None, alias="error-codes")


def is_captcha_enabled() -> bool:
    """Check if captcha verification is enabled."""
    return CAPTCHA_ENABLED and bool(RECAPTCHA_SECRET_KEY)


async def verify_captcha_token(
    token: str,
    expected_action: str = "signup",
) -> None:
    """
    Verify a reCAPTCHA token with Google's API.

    Args:
        token: The reCAPTCHA response token from the client
        expected_action: Expected action name for v3 verification

    Raises:
        CaptchaVerificationError: If verification fails
    """
    if not is_captcha_enabled():
        return

    if not token:
        raise CaptchaVerificationError("Captcha token is required")

    try:
        async with httpx.AsyncClient() as client:
            response = await client.post(
                RECAPTCHA_VERIFY_URL,
                data={
                    "secret": RECAPTCHA_SECRET_KEY,
                    "response": token,
                },
                timeout=10.0,
            )
            response.raise_for_status()

            data = response.json()
            result = RecaptchaResponse(**data)

            if not result.success:
                error_codes = result.error_codes or ["unknown-error"]
                logger.warning(f"Captcha verification failed: {error_codes}")
                raise CaptchaVerificationError(
                    f"Captcha verification failed: {', '.join(error_codes)}"
                )

            # For reCAPTCHA v3, also check the score
            if result.score is not None:
                if result.score < RECAPTCHA_SCORE_THRESHOLD:
                    logger.warning(
                        f"Captcha score too low: {result.score} < {RECAPTCHA_SCORE_THRESHOLD}"
                    )
                    raise CaptchaVerificationError(
                        "Captcha verification failed: suspicious activity detected"
                    )

                # Optionally verify the action matches
                if result.action and result.action != expected_action:
                    logger.warning(
                        f"Captcha action mismatch: {result.action} != {expected_action}"
                    )
                    raise CaptchaVerificationError(
                        "Captcha verification failed: action mismatch"
                    )

            logger.debug(
                f"Captcha verification passed: score={result.score}, action={result.action}"
            )

    except httpx.HTTPError as e:
        logger.error(f"Captcha API request failed: {e}")
        # In case of API errors, we might want to allow registration
        # to prevent blocking legitimate users. This is a policy decision.
        raise CaptchaVerificationError("Captcha verification service unavailable")


================================================
FILE: backend/onyx/auth/constants.py
================================================
"""Authentication constants shared across auth modules."""

# API Key constants
API_KEY_PREFIX = "on_"
DEPRECATED_API_KEY_PREFIX = "dn_"
API_KEY_LENGTH = 192

# PAT constants
PAT_PREFIX = "onyx_pat_"
PAT_LENGTH = 192

# Shared header constants
API_KEY_HEADER_NAME = "Authorization"
API_KEY_HEADER_ALTERNATIVE_NAME = "X-Onyx-Authorization"
BEARER_PREFIX = "Bearer "


================================================
FILE: backend/onyx/auth/disposable_email_validator.py
================================================
"""
Utility to validate and block disposable/temporary email addresses.

This module fetches a list of known disposable email domains from a remote source
and caches them for performance. It's used during user registration to prevent
abuse from temporary email services.
"""

import threading
import time
from typing import Set

import httpx

from onyx.configs.app_configs import DISPOSABLE_EMAIL_DOMAINS_URL
from onyx.utils.logger import setup_logger

logger = setup_logger()


class DisposableEmailValidator:
    """
    Thread-safe singleton validator for disposable email domains.

    Fetches and caches the list of disposable domains, with periodic refresh.
    """

    _instance: "DisposableEmailValidator | None" = None
    _lock = threading.Lock()

    def __new__(cls) -> "DisposableEmailValidator":
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    cls._instance = super().__new__(cls)
        return cls._instance

    def __init__(self) -> None:
        # Check if already initialized using a try/except to avoid type issues
        try:
            if self._initialized:
                return
        except AttributeError:
            pass

        self._domains: Set[str] = set()
        self._last_fetch_time: float = 0
        self._fetch_lock = threading.Lock()
        # Cache for 1 hour
        self._cache_duration = 3600
        # Hardcoded fallback list of common disposable domains
        # This ensures we block at least these even if the remote fetch fails
        self._fallback_domains = {
            "trashlify.com",
            "10minutemail.com",
            "guerrillamail.com",
            "mailinator.com",
            "tempmail.com",
            "chat-tempmail.com",
            "throwaway.email",
            "yopmail.com",
            "temp-mail.org",
            "getnada.com",
            "maildrop.cc",
        }
        # Set initialized flag last to prevent race conditions
        self._initialized: bool = True

    def _should_refresh(self) -> bool:
        """Check if the cached domains should be refreshed."""
        return (time.time() - self._last_fetch_time) > self._cache_duration

    def _fetch_domains(self) -> Set[str]:
        """
        Fetch disposable email domains from the configured URL.

        Returns:
            Set of domain strings (lowercased)
        """
        if not DISPOSABLE_EMAIL_DOMAINS_URL:
            logger.debug("DISPOSABLE_EMAIL_DOMAINS_URL not configured")
            return self._fallback_domains.copy()

        try:
            logger.info(
                f"Fetching disposable email domains from {DISPOSABLE_EMAIL_DOMAINS_URL}"
            )
            with httpx.Client(timeout=10.0) as client:
                response = client.get(DISPOSABLE_EMAIL_DOMAINS_URL)
                response.raise_for_status()

                domains_list = response.json()

                if not isinstance(domains_list, list):
                    logger.error(
                        f"Expected list from disposable domains URL, got {type(domains_list)}"
                    )
                    return self._fallback_domains.copy()

                # Convert all to lowercase and create set
                domains = {domain.lower().strip() for domain in domains_list if domain}

                # Always include fallback domains
                domains.update(self._fallback_domains)

                logger.info(
                    f"Successfully fetched {len(domains)} disposable email domains"
                )
                return domains

        except httpx.HTTPError as e:
            logger.warning(f"Failed to fetch disposable domains (HTTP error): {e}")
        except Exception as e:
            logger.warning(f"Failed to fetch disposable domains: {e}")

        # On error, return fallback domains
        return self._fallback_domains.copy()

    def get_domains(self) -> Set[str]:
        """
        Get the cached set of disposable email domains.
        Refreshes the cache if needed.

        Returns:
            Set of disposable domain strings (lowercased)
        """
        # Fast path: return cached domains if still fresh
        if self._domains and not self._should_refresh():
            return self._domains.copy()

        # Slow path: need to refresh
        with self._fetch_lock:
            # Double-check after acquiring lock
            if self._domains and not self._should_refresh():
                return self._domains.copy()

            self._domains = self._fetch_domains()
            self._last_fetch_time = time.time()
            return self._domains.copy()

    def is_disposable(self, email: str) -> bool:
        """
        Check if an email address uses a disposable domain.

        Args:
            email: The email address to check

        Returns:
            True if the email domain is disposable, False otherwise
        """
        if not email or "@" not in email:
            return False

        parts = email.split("@")
        if len(parts) != 2 or not parts[0]:  # Must have user@domain with non-empty user
            return False

        domain = parts[1].lower().strip()
        if not domain:  # Domain part must not be empty
            return False

        disposable_domains = self.get_domains()
        return domain in disposable_domains


# Global singleton instance
_validator = DisposableEmailValidator()


def is_disposable_email(email: str) -> bool:
    """
    Check if an email address uses a disposable/temporary domain.

    This is a convenience function that uses the global validator instance.

    Args:
        email: The email address to check

    Returns:
        True if the email uses a disposable domain, False otherwise
    """
    return _validator.is_disposable(email)


def refresh_disposable_domains() -> None:
    """
    Force a refresh of the disposable domains list.

    This can be called manually if you want to update the list
    without waiting for the cache to expire.
    """
    _validator._last_fetch_time = 0
    _validator.get_domains()


================================================
FILE: backend/onyx/auth/email_utils.py
================================================
import base64
import smtplib
from datetime import datetime
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.utils import formatdate
from email.utils import make_msgid

import sendgrid  # type: ignore
from sendgrid.helpers.mail import Attachment  # type: ignore
from sendgrid.helpers.mail import Content
from sendgrid.helpers.mail import ContentId
from sendgrid.helpers.mail import Disposition
from sendgrid.helpers.mail import Email
from sendgrid.helpers.mail import FileContent
from sendgrid.helpers.mail import FileName
from sendgrid.helpers.mail import FileType
from sendgrid.helpers.mail import Mail
from sendgrid.helpers.mail import To

from onyx.configs.app_configs import EMAIL_CONFIGURED
from onyx.configs.app_configs import EMAIL_FROM
from onyx.configs.app_configs import SENDGRID_API_KEY
from onyx.configs.app_configs import SMTP_PASS
from onyx.configs.app_configs import SMTP_PORT
from onyx.configs.app_configs import SMTP_SERVER
from onyx.configs.app_configs import SMTP_USER
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import AuthType
from onyx.configs.constants import ONYX_DEFAULT_APPLICATION_NAME
from onyx.configs.constants import ONYX_DISCORD_URL
from onyx.db.models import User
from onyx.server.runtime.onyx_runtime import OnyxRuntime
from onyx.utils.logger import setup_logger
from onyx.utils.url import add_url_params
from onyx.utils.variable_functionality import fetch_versioned_implementation
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()

HTML_EMAIL_TEMPLATE = """\
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width" />
  <title>{title}</title>
  <style>
    body, table, td, a {{
      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
      text-size-adjust: 100%;
      margin: 0;
      padding: 0;
      -webkit-font-smoothing: antialiased;
      -webkit-text-size-adjust: none;
    }}
    body {{
      background-color: #f7f7f7;
      color: #333;
    }}
    .body-content {{
      color: #333;
    }}
    .email-container {{
      width: 100%;
      max-width: 600px;
      margin: 0 auto;
      background-color: #ffffff;
      border-radius: 6px;
      overflow: hidden;
      border: 1px solid #eaeaea;
    }}
    .header {{
      background-color: #000000;
      padding: 20px;
      text-align: center;
    }}
    .header img {{
      max-width: 140px;
      width: 140px;
      height: auto;
      filter: brightness(1.1) contrast(1.2);
      border-radius: 8px;
      padding: 5px;
    }}
    .body-content {{
      padding: 20px 30px;
    }}
    .title {{
      font-size: 20px;
      font-weight: bold;
      margin: 0 0 10px;
    }}
    .message {{
      font-size: 16px;
      line-height: 1.5;
      margin: 0 0 20px;
    }}
    .cta-button {{
      display: inline-block;
      padding: 14px 24px;
      background-color: #0055FF;
      color: #ffffff !important;
      text-decoration: none;
      border-radius: 4px;
      font-weight: 600;
      font-size: 16px;
      margin-top: 10px;
      box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
      text-align: center;
    }}
    .footer {{
      font-size: 13px;
      color: #6A7280;
      text-align: center;
      padding: 20px;
    }}
    .footer a {{
      color: #6b7280;
      text-decoration: underline;
    }}
  </style>
</head>
<body>
  <table role="presentation" class="email-container" cellpadding="0" cellspacing="0">
    <tr>
      <td class="header">
        <img
          style="background-color: #ffffff; border-radius: 8px;"
          src="cid:logo.png"
          alt="{application_name} Logo"
        >
      </td>
    </tr>
    <tr>
      <td class="body-content">
        <h1 class="title">{heading}</h1>
        <div class="message">
          {message}
        </div>
        {cta_block}
      </td>
    </tr>
    <tr>
      <td class="footer">
        © {year} {application_name}. All rights reserved.
        {community_link_fragment}
      </td>
    </tr>
  </table>
</body>
</html>
"""


def build_html_email(
    application_name: str | None,
    heading: str,
    message: str,
    cta_text: str | None = None,
    cta_link: str | None = None,
) -> str:
    community_link_fragment = ""
    if application_name == ONYX_DEFAULT_APPLICATION_NAME:
        community_link_fragment = f'<br>Have questions? Join our Discord community <a href="{ONYX_DISCORD_URL}">here</a>.'

    if cta_text and cta_link:
        cta_block = f'<a class="cta-button" href="{cta_link}">{cta_text}</a>'
    else:
        cta_block = ""
    return HTML_EMAIL_TEMPLATE.format(
        application_name=application_name,
        title=heading,
        heading=heading,
        message=message,
        cta_block=cta_block,
        community_link_fragment=community_link_fragment,
        year=datetime.now().year,
    )


def send_email(
    user_email: str,
    subject: str,
    html_body: str,
    text_body: str,
    mail_from: str = EMAIL_FROM,
    inline_png: tuple[str, bytes] | None = None,
) -> None:
    if not EMAIL_CONFIGURED:
        raise ValueError("Email is not configured.")

    if SENDGRID_API_KEY:
        send_email_with_sendgrid(
            user_email, subject, html_body, text_body, mail_from, inline_png
        )
        return

    send_email_with_smtplib(
        user_email, subject, html_body, text_body, mail_from, inline_png
    )


def send_email_with_sendgrid(
    user_email: str,
    subject: str,
    html_body: str,
    text_body: str,
    mail_from: str = EMAIL_FROM,
    inline_png: tuple[str, bytes] | None = None,
) -> None:
    from_email = Email(mail_from) if mail_from else Email("noreply@onyx.app")
    to_email = To(user_email)

    mail = Mail(
        from_email=from_email,
        to_emails=to_email,
        subject=subject,
        plain_text_content=Content("text/plain", text_body),
    )

    # Add HTML content
    mail.add_content(Content("text/html", html_body))

    if inline_png:
        image_name, image_data = inline_png

        # Create attachment
        encoded_image = base64.b64encode(image_data).decode()
        attachment = Attachment()
        attachment.file_content = FileContent(encoded_image)
        attachment.file_name = FileName(image_name)
        attachment.file_type = FileType("image/png")
        attachment.disposition = Disposition("inline")
        attachment.content_id = ContentId(image_name)

        mail.add_attachment(attachment)

    # Get a JSON-ready representation of the Mail object
    mail_json = mail.get()

    sg = sendgrid.SendGridAPIClient(api_key=SENDGRID_API_KEY)
    response = sg.client.mail.send.post(request_body=mail_json)  # can raise
    if response.status_code != 202:
        logger.warning(f"Unexpected status code {response.status_code}")


def send_email_with_smtplib(
    user_email: str,
    subject: str,
    html_body: str,
    text_body: str,
    mail_from: str = EMAIL_FROM,
    inline_png: tuple[str, bytes] | None = None,
) -> None:

    # Create a multipart/alternative message - this indicates these are alternative versions of the same content
    msg = MIMEMultipart("alternative")
    msg["Subject"] = subject
    msg["To"] = user_email
    if mail_from:
        msg["From"] = mail_from
    msg["Date"] = formatdate(localtime=True)
    msg["Message-ID"] = make_msgid(domain="onyx.app")

    # Add text part first (lowest priority)
    text_part = MIMEText(text_body, "plain")
    msg.attach(text_part)

    if inline_png:
        # For HTML with images, create a multipart/related container
        related = MIMEMultipart("related")

        # Add the HTML part to the related container
        html_part = MIMEText(html_body, "html")
        related.attach(html_part)

        # Add image with proper Content-ID to the related container
        img = MIMEImage(inline_png[1], _subtype="png")
        img.add_header("Content-ID", f"<{inline_png[0]}>")
        img.add_header("Content-Disposition", "inline", filename=inline_png[0])
        related.attach(img)

        # Add the related part to the message (higher priority than text)
        msg.attach(related)
    else:
        # No images, just add HTML directly (higher priority than text)
        html_part = MIMEText(html_body, "html")
        msg.attach(html_part)

    with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as s:
        s.starttls()
        s.login(SMTP_USER, SMTP_PASS)
        s.send_message(msg)


def send_subscription_cancellation_email(user_email: str) -> None:
    """This is templated but isn't meaningful for whitelabeling."""

    # Example usage of the reusable HTML
    try:
        load_runtime_settings_fn = fetch_versioned_implementation(
            "onyx.server.enterprise_settings.store", "load_runtime_settings"
        )
        settings = load_runtime_settings_fn()
        application_name = settings.application_name
    except ModuleNotFoundError:
        application_name = ONYX_DEFAULT_APPLICATION_NAME

    onyx_file = OnyxRuntime.get_emailable_logo()

    subject = f"Your {application_name} Subscription Has Been Canceled"
    heading = "Subscription Canceled"
    message = (
        "<p>We're sorry to see you go.</p>"
        "<p>Your subscription has been canceled and will end on your next billing date.</p>"
        "<p>If you change your mind, you can always come back!</p>"
    )
    cta_text = "Renew Subscription"
    cta_link = "https://www.onyx.app/pricing"
    html_content = build_html_email(
        application_name,
        heading,
        message,
        cta_text,
        cta_link,
    )
    text_content = (
        "We're sorry to see you go.\n"
        "Your subscription has been canceled and will end on your next billing date.\n"
        "If you change your mind, visit https://www.onyx.app/pricing"
    )
    send_email(
        user_email,
        subject,
        html_content,
        text_content,
        inline_png=("logo.png", onyx_file.data),
    )


def build_user_email_invite(
    from_email: str, to_email: str, application_name: str, auth_type: AuthType
) -> tuple[str, str]:
    heading = "You've Been Invited!"

    # the exact action taken by the user, and thus the message, depends on the auth type
    message = f"<p>You have been invited by {from_email} to join an organization on {application_name}.</p>"
    if auth_type == AuthType.CLOUD:
        message += (
            "<p>To join the organization, please click the button below to set a password "
            "or login with Google and complete your registration.</p>"
        )
    elif auth_type == AuthType.BASIC:
        message += "<p>To join the organization, please click the button below to set a password and complete your registration.</p>"
    elif auth_type == AuthType.GOOGLE_OAUTH:
        message += "<p>To join the organization, please click the button below to login with Google and complete your registration.</p>"
    elif auth_type == AuthType.OIDC or auth_type == AuthType.SAML:
        message += "<p>To join the organization, please click the button below to complete your registration.</p>"
    else:
        raise ValueError(f"Invalid auth type: {auth_type}")

    cta_text = "Join Organization"
    cta_link = f"{WEB_DOMAIN}/auth/signup?email={to_email}"

    html_content = build_html_email(
        application_name,
        heading,
        message,
        cta_text,
        cta_link,
    )

    # text content is the fallback for clients that don't support HTML
    # not as critical, so not having special cases for each auth type
    text_content = (
        f"You have been invited by {from_email} to join an organization on {application_name}.\n"
        "To join the organization, please visit the following link:\n"
        f"{WEB_DOMAIN}/auth/signup?email={to_email}\n"
    )
    if auth_type == AuthType.CLOUD:
        text_content += "You'll be asked to set a password or login with Google to complete your registration."

    return text_content, html_content


def send_user_email_invite(
    user_email: str, current_user: User, auth_type: AuthType
) -> None:
    try:
        load_runtime_settings_fn = fetch_versioned_implementation(
            "onyx.server.enterprise_settings.store", "load_runtime_settings"
        )
        settings = load_runtime_settings_fn()
        application_name = settings.application_name
    except ModuleNotFoundError:
        application_name = ONYX_DEFAULT_APPLICATION_NAME

    onyx_file = OnyxRuntime.get_emailable_logo()

    subject = f"Invitation to Join {application_name} Organization"

    text_content, html_content = build_user_email_invite(
        current_user.email, user_email, application_name, auth_type
    )

    send_email(
        user_email,
        subject,
        html_content,
        text_content,
        inline_png=("logo.png", onyx_file.data),
    )


def send_forgot_password_email(
    user_email: str,
    token: str,
    tenant_id: str,
    mail_from: str = EMAIL_FROM,
) -> None:
    # Builds a forgot password email with or without fancy HTML
    try:
        load_runtime_settings_fn = fetch_versioned_implementation(
            "onyx.server.enterprise_settings.store", "load_runtime_settings"
        )
        settings = load_runtime_settings_fn()
        application_name = settings.application_name
    except ModuleNotFoundError:
        application_name = ONYX_DEFAULT_APPLICATION_NAME

    onyx_file = OnyxRuntime.get_emailable_logo()

    subject = f"Reset Your {application_name} Password"
    heading = "Reset Your Password"
    tenant_param = f"&tenant={tenant_id}" if tenant_id and MULTI_TENANT else ""
    message = "<p>Please click the button below to reset your password. This link will expire in 24 hours.</p>"
    cta_text = "Reset Password"
    cta_link = f"{WEB_DOMAIN}/auth/reset-password?token={token}{tenant_param}"
    html_content = build_html_email(
        application_name,
        heading,
        message,
        cta_text,
        cta_link,
    )
    text_content = (
        f"Please click the following link to reset your password. This link will expire in 24 hours.\n"
        f"{WEB_DOMAIN}/auth/reset-password?token={token}{tenant_param}"
    )
    send_email(
        user_email,
        subject,
        html_content,
        text_content,
        mail_from,
        inline_png=("logo.png", onyx_file.data),
    )


def send_user_verification_email(
    user_email: str,
    token: str,
    new_organization: bool = False,
    mail_from: str = EMAIL_FROM,
) -> None:
    # Builds a verification email
    try:
        load_runtime_settings_fn = fetch_versioned_implementation(
            "onyx.server.enterprise_settings.store", "load_runtime_settings"
        )
        settings = load_runtime_settings_fn()
        application_name = settings.application_name
    except ModuleNotFoundError:
        application_name = ONYX_DEFAULT_APPLICATION_NAME

    onyx_file = OnyxRuntime.get_emailable_logo()

    subject = f"{application_name} Email Verification"
    link = f"{WEB_DOMAIN}/auth/verify-email?token={token}"
    if new_organization:
        link = add_url_params(link, {"first_user": "true"})
    message = (
        f"<p>Click the following link to verify your email address:</p><p>{link}</p>"
    )
    html_content = build_html_email(
        application_name,
        "Verify Your Email",
        message,
    )
    text_content = f"Click the following link to verify your email address: {link}"
    send_email(
        user_email,
        subject,
        html_content,
        text_content,
        mail_from,
        inline_png=("logo.png", onyx_file.data),
    )


================================================
FILE: backend/onyx/auth/invited_users.py
================================================
from typing import cast

from onyx.configs.constants import KV_PENDING_USERS_KEY
from onyx.configs.constants import KV_USER_STORE_KEY
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.utils.special_types import JSON_ro


def remove_user_from_invited_users(email: str) -> int:
    try:
        store = get_kv_store()
        user_emails = cast(list, store.load(KV_USER_STORE_KEY))
        remaining_users = [user for user in user_emails if user != email]
        store.store(KV_USER_STORE_KEY, cast(JSON_ro, remaining_users))
        return len(remaining_users)
    except KvKeyNotFoundError:
        return 0


def get_invited_users() -> list[str]:
    try:
        store = get_kv_store()
        return cast(list, store.load(KV_USER_STORE_KEY))
    except KvKeyNotFoundError:
        return list()


def write_invited_users(emails: list[str]) -> int:
    store = get_kv_store()
    store.store(KV_USER_STORE_KEY, cast(JSON_ro, emails))
    return len(emails)


def get_pending_users() -> list[str]:
    try:
        store = get_kv_store()
        return cast(list, store.load(KV_PENDING_USERS_KEY))
    except KvKeyNotFoundError:
        return list()


def write_pending_users(emails: list[str]) -> int:
    store = get_kv_store()
    store.store(KV_PENDING_USERS_KEY, cast(JSON_ro, emails))
    return len(emails)


================================================
FILE: backend/onyx/auth/jwt.py
================================================
import json
from enum import Enum
from functools import lru_cache
from typing import Any
from typing import cast

import jwt
import requests
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey
from jwt import decode as jwt_decode
from jwt import InvalidTokenError
from jwt import PyJWTError
from jwt.algorithms import RSAAlgorithm

from onyx.configs.app_configs import JWT_PUBLIC_KEY_URL
from onyx.utils.logger import setup_logger


logger = setup_logger()


_PUBLIC_KEY_FETCH_ATTEMPTS = 2


class PublicKeyFormat(Enum):
    JWKS = "jwks"
    PEM = "pem"


@lru_cache()
def _fetch_public_key_payload() -> tuple[str | dict[str, Any], PublicKeyFormat] | None:
    """Fetch and cache the raw JWT verification material."""
    if JWT_PUBLIC_KEY_URL is None:
        logger.error("JWT_PUBLIC_KEY_URL is not set")
        return None

    try:
        response = requests.get(JWT_PUBLIC_KEY_URL)
        response.raise_for_status()
    except requests.RequestException as exc:
        logger.error(f"Failed to fetch JWT public key: {str(exc)}")
        return None
    content_type = response.headers.get("Content-Type", "").lower()
    raw_body = response.text
    body_lstripped = raw_body.lstrip()

    if "application/json" in content_type or body_lstripped.startswith("{"):
        try:
            data = response.json()
        except ValueError:
            logger.error("JWT public key URL returned invalid JSON")
            return None

        if isinstance(data, dict) and "keys" in data:
            return data, PublicKeyFormat.JWKS

        logger.error(
            "JWT public key URL returned JSON but no JWKS 'keys' field was found"
        )
        return None

    body = raw_body.strip()
    if not body:
        logger.error("JWT public key URL returned an empty response")
        return None

    return body, PublicKeyFormat.PEM


def get_public_key(token: str) -> RSAPublicKey | str | None:
    """Return the concrete public key used to verify the provided JWT token."""
    payload = _fetch_public_key_payload()
    if payload is None:
        logger.error("Failed to retrieve public key payload")
        return None

    key_material, key_format = payload

    if key_format is PublicKeyFormat.JWKS:
        jwks_data = cast(dict[str, Any], key_material)
        return _resolve_public_key_from_jwks(token, jwks_data)

    return cast(str, key_material)


def _resolve_public_key_from_jwks(
    token: str, jwks_payload: dict[str, Any]
) -> RSAPublicKey | None:
    try:
        header = jwt.get_unverified_header(token)
    except PyJWTError as e:
        logger.error(f"Unable to parse JWT header: {str(e)}")
        return None

    keys = jwks_payload.get("keys", []) if isinstance(jwks_payload, dict) else []
    if not keys:
        logger.error("JWKS payload did not contain any keys")
        return None

    kid = header.get("kid")
    thumbprint = header.get("x5t")

    candidates = []
    if kid:
        candidates = [k for k in keys if k.get("kid") == kid]
    if not candidates and thumbprint:
        candidates = [k for k in keys if k.get("x5t") == thumbprint]
    if not candidates and len(keys) == 1:
        candidates = keys

    if not candidates:
        logger.warning(
            "No matching JWK found for token header (kid=%s, x5t=%s)", kid, thumbprint
        )
        return None

    if len(candidates) > 1:
        logger.warning(
            "Multiple JWKs matched token header kid=%s; selecting the first occurrence",
            kid,
        )

    jwk = candidates[0]
    try:
        return cast(RSAPublicKey, RSAAlgorithm.from_jwk(json.dumps(jwk)))
    except ValueError as e:
        logger.error(f"Failed to construct RSA key from JWK: {str(e)}")
        return None


async def verify_jwt_token(token: str) -> dict[str, Any] | None:
    for attempt in range(_PUBLIC_KEY_FETCH_ATTEMPTS):
        public_key = get_public_key(token)
        if public_key is None:
            logger.error("Unable to resolve a public key for JWT verification")
            if attempt < _PUBLIC_KEY_FETCH_ATTEMPTS - 1:
                _fetch_public_key_payload.cache_clear()
                continue
            return None

        try:
            payload = jwt_decode(
                token,
                public_key,
                algorithms=["RS256"],
                options={"verify_aud": False},
            )
        except InvalidTokenError as e:
            logger.error(f"Invalid JWT token: {str(e)}")
            if attempt < _PUBLIC_KEY_FETCH_ATTEMPTS - 1:
                _fetch_public_key_payload.cache_clear()
                continue
            return None
        except PyJWTError as e:
            logger.error(f"JWT decoding error: {str(e)}")
            if attempt < _PUBLIC_KEY_FETCH_ATTEMPTS - 1:
                _fetch_public_key_payload.cache_clear()
                continue
            return None

        return payload

    return None


================================================
FILE: backend/onyx/auth/oauth_refresher.py
================================================
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast
from typing import Dict
from typing import List
from typing import Optional

import httpx
from fastapi_users.manager import BaseUserManager
from sqlalchemy.ext.asyncio import AsyncSession

from onyx.configs.app_configs import OAUTH_CLIENT_ID
from onyx.configs.app_configs import OAUTH_CLIENT_SECRET
from onyx.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY
from onyx.db.models import OAuthAccount
from onyx.db.models import User
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Standard OAuth refresh token endpoints
REFRESH_ENDPOINTS = {
    "google": "https://oauth2.googleapis.com/token",
}


# NOTE: Keeping this as a utility function for potential future debugging,
# but not using it in production code
async def _test_expire_oauth_token(
    user: User,
    oauth_account: OAuthAccount,
    db_session: AsyncSession,  # noqa: ARG001
    user_manager: BaseUserManager[User, Any],
    expire_in_seconds: int = 10,
) -> bool:
    """
    Utility function for testing - Sets an OAuth token to expire in a short time
    to facilitate testing of the refresh flow.
    Not used in production code.
    """
    try:
        new_expires_at = int(
            (datetime.now(timezone.utc).timestamp() + expire_in_seconds)
        )

        updated_data: Dict[str, Any] = {"expires_at": new_expires_at}

        await user_manager.user_db.update_oauth_account(
            user, cast(Any, oauth_account), updated_data
        )

        return True
    except Exception as e:
        logger.exception(f"Error setting artificial expiration: {str(e)}")
        return False


async def refresh_oauth_token(
    user: User,
    oauth_account: OAuthAccount,
    db_session: AsyncSession,  # noqa: ARG001
    user_manager: BaseUserManager[User, Any],
) -> bool:
    """
    Attempt to refresh an OAuth token that's about to expire or has expired.
    Returns True if successful, False otherwise.
    """
    if not oauth_account.refresh_token:
        logger.warning(
            f"No refresh token available for {user.email}'s {oauth_account.oauth_name} account"
        )
        return False

    provider = oauth_account.oauth_name
    if provider not in REFRESH_ENDPOINTS:
        logger.warning(f"Refresh endpoint not configured for provider: {provider}")
        return False

    try:
        logger.info(f"Refreshing OAuth token for {user.email}'s {provider} account")

        async with httpx.AsyncClient() as client:
            response = await client.post(
                REFRESH_ENDPOINTS[provider],
                data={
                    "client_id": OAUTH_CLIENT_ID,
                    "client_secret": OAUTH_CLIENT_SECRET,
                    "refresh_token": oauth_account.refresh_token,
                    "grant_type": "refresh_token",
                },
                headers={"Content-Type": "application/x-www-form-urlencoded"},
            )

            if response.status_code != 200:
                logger.error(
                    f"Failed to refresh OAuth token: Status {response.status_code}"
                )
                return False

            token_data = response.json()

            new_access_token = token_data.get("access_token")
            new_refresh_token = token_data.get(
                "refresh_token", oauth_account.refresh_token
            )
            expires_in = token_data.get("expires_in")

            # Calculate new expiry time if provided
            new_expires_at: Optional[int] = None
            if expires_in:
                new_expires_at = int(
                    (datetime.now(timezone.utc).timestamp() + expires_in)
                )

            # Update the OAuth account
            updated_data: Dict[str, Any] = {
                "access_token": new_access_token,
                "refresh_token": new_refresh_token,
            }

            if new_expires_at:
                updated_data["expires_at"] = new_expires_at

                # Update oidc_expiry in user model if we're tracking it
                if TRACK_EXTERNAL_IDP_EXPIRY:
                    oidc_expiry = datetime.fromtimestamp(
                        new_expires_at, tz=timezone.utc
                    )
                    await user_manager.user_db.update(
                        user, {"oidc_expiry": oidc_expiry}
                    )

            # Update the OAuth account
            await user_manager.user_db.update_oauth_account(
                user, cast(Any, oauth_account), updated_data
            )

            logger.info(f"Successfully refreshed OAuth token for {user.email}")
            return True

    except Exception as e:
        logger.exception(f"Error refreshing OAuth token: {str(e)}")
        return False


async def check_and_refresh_oauth_tokens(
    user: User,
    db_session: AsyncSession,
    user_manager: BaseUserManager[User, Any],
) -> None:
    """
    Check if any OAuth tokens are expired or about to expire and refresh them.
    """
    if not hasattr(user, "oauth_accounts") or not user.oauth_accounts:
        return

    now_timestamp = datetime.now(timezone.utc).timestamp()

    # Buffer time to refresh tokens before they expire (in seconds)
    buffer_seconds = 300  # 5 minutes

    for oauth_account in user.oauth_accounts:
        # Skip accounts without refresh tokens
        if not oauth_account.refresh_token:
            continue

        # If token is about to expire, refresh it
        if (
            oauth_account.expires_at
            and oauth_account.expires_at - now_timestamp < buffer_seconds
        ):
            logger.info(f"OAuth token for {user.email} is about to expire - refreshing")
            success = await refresh_oauth_token(
                user, oauth_account, db_session, user_manager
            )

            if not success:
                logger.warning(
                    "Failed to refresh OAuth token. User may need to re-authenticate."
                )


async def check_oauth_account_has_refresh_token(
    user: User,  # noqa: ARG001
    oauth_account: OAuthAccount,
) -> bool:
    """
    Check if an OAuth account has a refresh token.
    Returns True if a refresh token exists, False otherwise.
    """
    return bool(oauth_account.refresh_token)


async def get_oauth_accounts_requiring_refresh_token(user: User) -> List[OAuthAccount]:
    """
    Returns a list of OAuth accounts for a user that are missing refresh tokens.
    These accounts will need re-authentication to get refresh tokens.
    """
    if not hasattr(user, "oauth_accounts") or not user.oauth_accounts:
        return []

    accounts_needing_refresh = []
    for oauth_account in user.oauth_accounts:
        has_refresh_token = await check_oauth_account_has_refresh_token(
            user, oauth_account
        )
        if not has_refresh_token:
            accounts_needing_refresh.append(oauth_account)

    return accounts_needing_refresh


================================================
FILE: backend/onyx/auth/oauth_token_manager.py
================================================
import time
from typing import Any
from urllib.parse import urlencode
from uuid import UUID

import requests
from sqlalchemy.orm import Session

from onyx.db.models import OAuthConfig
from onyx.db.models import OAuthUserToken
from onyx.db.oauth_config import get_user_oauth_token
from onyx.db.oauth_config import upsert_user_oauth_token
from onyx.utils.logger import setup_logger
from onyx.utils.sensitive import SensitiveValue


logger = setup_logger()


class OAuthTokenManager:
    """Manages OAuth token retrieval, refresh, and validation"""

    def __init__(self, oauth_config: OAuthConfig, user_id: UUID, db_session: Session):
        self.oauth_config = oauth_config
        self.user_id = user_id
        self.db_session = db_session

    def get_valid_access_token(self) -> str | None:
        """Get valid access token, refreshing if necessary"""
        user_token = get_user_oauth_token(
            self.oauth_config.id, self.user_id, self.db_session
        )

        if not user_token:
            return None

        if not user_token.token_data:
            return None

        token_data = self._unwrap_token_data(user_token.token_data)

        # Check if token is expired
        if OAuthTokenManager.is_token_expired(token_data):
            # Try to refresh if we have a refresh token
            if "refresh_token" in token_data:
                try:
                    return self.refresh_token(user_token)
                except Exception as e:
                    logger.warning(f"Failed to refresh token: {e}")
                    return None
            else:
                return None

        return token_data.get("access_token")

    def refresh_token(self, user_token: OAuthUserToken) -> str:
        """Refresh access token using refresh token"""
        if not user_token.token_data:
            raise ValueError("No token data available for refresh")

        if (
            self.oauth_config.client_id is None
            or self.oauth_config.client_secret is None
        ):
            raise ValueError(
                "OAuth client_id and client_secret are required for token refresh"
            )

        token_data = self._unwrap_token_data(user_token.token_data)

        data: dict[str, str] = {
            "grant_type": "refresh_token",
            "refresh_token": token_data["refresh_token"],
            "client_id": self._unwrap_sensitive_str(self.oauth_config.client_id),
            "client_secret": self._unwrap_sensitive_str(
                self.oauth_config.client_secret
            ),
        }
        response = requests.post(
            self.oauth_config.token_url,
            data=data,
            headers={"Accept": "application/json"},
        )
        response.raise_for_status()

        new_token_data = response.json()

        # Calculate expires_at if expires_in is present
        if "expires_in" in new_token_data:
            new_token_data["expires_at"] = (
                int(time.time()) + new_token_data["expires_in"]
            )

        # Preserve refresh_token if not returned (some providers don't return it)
        if "refresh_token" not in new_token_data and "refresh_token" in token_data:
            new_token_data["refresh_token"] = token_data["refresh_token"]

        # Update token in DB
        upsert_user_oauth_token(
            self.oauth_config.id,
            self.user_id,
            new_token_data,
            self.db_session,
        )

        return new_token_data["access_token"]

    @classmethod
    def token_expiration_time(cls, token_data: dict[str, Any]) -> int | None:
        """Get the token expiration time"""
        expires_at = token_data.get("expires_at")
        if not expires_at:
            return None

        return expires_at

    @classmethod
    def is_token_expired(cls, token_data: dict[str, Any]) -> bool:
        """Check if token is expired (with 60 second buffer)"""
        expires_at = cls.token_expiration_time(token_data)
        if not expires_at:
            return False  # No expiration data, assume valid

        # Add 60 second buffer to avoid race conditions
        return int(time.time()) + 60 >= expires_at

    def exchange_code_for_token(self, code: str, redirect_uri: str) -> dict[str, Any]:
        """Exchange authorization code for access token"""
        if (
            self.oauth_config.client_id is None
            or self.oauth_config.client_secret is None
        ):
            raise ValueError(
                "OAuth client_id and client_secret are required for code exchange"
            )

        data: dict[str, str] = {
            "grant_type": "authorization_code",
            "code": code,
            "client_id": self._unwrap_sensitive_str(self.oauth_config.client_id),
            "client_secret": self._unwrap_sensitive_str(
                self.oauth_config.client_secret
            ),
            "redirect_uri": redirect_uri,
        }
        response = requests.post(
            self.oauth_config.token_url,
            data=data,
            headers={"Accept": "application/json"},
        )
        response.raise_for_status()

        token_data = response.json()

        # Calculate expires_at if expires_in is present
        if "expires_in" in token_data:
            token_data["expires_at"] = int(time.time()) + token_data["expires_in"]

        return token_data

    @staticmethod
    def build_authorization_url(
        oauth_config: OAuthConfig, redirect_uri: str, state: str
    ) -> str:
        """Build OAuth authorization URL"""
        if oauth_config.client_id is None:
            raise ValueError("OAuth client_id is required to build authorization URL")

        params: dict[str, Any] = {
            "client_id": OAuthTokenManager._unwrap_sensitive_str(
                oauth_config.client_id
            ),
            "redirect_uri": redirect_uri,
            "response_type": "code",
            "state": state,
        }

        # Add scopes if configured
        if oauth_config.scopes:
            params["scope"] = " ".join(oauth_config.scopes)

        # Add any additional provider-specific parameters
        if oauth_config.additional_params:
            params.update(oauth_config.additional_params)

        # Check if URL already has query parameters
        separator = "&" if "?" in oauth_config.authorization_url else "?"

        return f"{oauth_config.authorization_url}{separator}{urlencode(params)}"

    @staticmethod
    def _unwrap_sensitive_str(value: SensitiveValue[str] | str) -> str:
        if isinstance(value, SensitiveValue):
            return value.get_value(apply_mask=False)
        return value

    @staticmethod
    def _unwrap_token_data(
        token_data: SensitiveValue[dict[str, Any]] | dict[str, Any],
    ) -> dict[str, Any]:
        if isinstance(token_data, SensitiveValue):
            return token_data.get_value(apply_mask=False)
        return token_data


================================================
FILE: backend/onyx/auth/pat.py
================================================
"""Personal Access Token generation and validation."""

import hashlib
import secrets
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from urllib.parse import quote

from fastapi import Request

from onyx.auth.constants import PAT_LENGTH
from onyx.auth.constants import PAT_PREFIX
from onyx.auth.utils import get_hashed_bearer_token_from_request
from shared_configs.configs import MULTI_TENANT


def generate_pat(tenant_id: str | None = None) -> str:
    """Generate cryptographically secure PAT."""
    if MULTI_TENANT and tenant_id:
        encoded_tenant = quote(tenant_id)
        return f"{PAT_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(PAT_LENGTH)}"
    return PAT_PREFIX + secrets.token_urlsafe(PAT_LENGTH)


def hash_pat(token: str) -> str:
    """Hash PAT using SHA256 (no salt needed due to cryptographic randomness)."""
    return hashlib.sha256(token.encode("utf-8")).hexdigest()


def build_displayable_pat(token: str) -> str:
    """Create masked display version: show prefix + first 4 random chars, mask middle, show last 4.

    Example: onyx_pat_abc1****xyz9
    """
    # Show first 12 chars (onyx_pat_ + 4 random chars) and last 4 chars
    return f"{token[:12]}****{token[-4:]}"


def get_hashed_pat_from_request(request: Request) -> str | None:
    """Extract and hash PAT from Authorization header.

    Only accepts "Bearer <token>" format (unlike API keys which support raw format).
    """
    return get_hashed_bearer_token_from_request(
        request,
        valid_prefixes=[PAT_PREFIX],
        hash_fn=hash_pat,
        allow_non_bearer=False,  # PATs require Bearer prefix
    )


def calculate_expiration(days: int | None) -> datetime | None:
    """Calculate expiration at 23:59:59.999999 UTC on the target date. None = no expiration."""
    if days is None:
        return None
    expiry_date = datetime.now(timezone.utc).date() + timedelta(days=days)
    return datetime.combine(expiry_date, datetime.max.time()).replace(
        tzinfo=timezone.utc
    )


================================================
FILE: backend/onyx/auth/permissions.py
================================================
"""
Permission resolution for group-based authorization.

Granted permissions are stored as a JSONB column on the User table and
loaded for free with every auth query. Implied permissions are expanded
at read time — only directly granted permissions are persisted.
"""

from collections.abc import Callable
from collections.abc import Coroutine
from typing import Any

from fastapi import Depends

from onyx.auth.users import current_user
from onyx.db.enums import Permission
from onyx.db.models import User
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger

logger = setup_logger()

ALL_PERMISSIONS: frozenset[str] = frozenset(p.value for p in Permission)

# Implication map: granted permission -> set of permissions it implies.
IMPLIED_PERMISSIONS: dict[str, set[str]] = {
    Permission.ADD_AGENTS.value: {Permission.READ_AGENTS.value},
    Permission.MANAGE_AGENTS.value: {
        Permission.ADD_AGENTS.value,
        Permission.READ_AGENTS.value,
    },
    Permission.MANAGE_DOCUMENT_SETS.value: {
        Permission.READ_DOCUMENT_SETS.value,
        Permission.READ_CONNECTORS.value,
    },
    Permission.ADD_CONNECTORS.value: {Permission.READ_CONNECTORS.value},
    Permission.MANAGE_CONNECTORS.value: {
        Permission.ADD_CONNECTORS.value,
        Permission.READ_CONNECTORS.value,
    },
    Permission.MANAGE_USER_GROUPS.value: {
        Permission.READ_CONNECTORS.value,
        Permission.READ_DOCUMENT_SETS.value,
        Permission.READ_AGENTS.value,
        Permission.READ_USERS.value,
    },
}


def resolve_effective_permissions(granted: set[str]) -> set[str]:
    """Expand granted permissions with their implied permissions.

    If "admin" is present, returns all 19 permissions.
    """
    if Permission.FULL_ADMIN_PANEL_ACCESS.value in granted:
        return set(ALL_PERMISSIONS)

    effective = set(granted)
    changed = True
    while changed:
        changed = False
        for perm in list(effective):
            implied = IMPLIED_PERMISSIONS.get(perm)
            if implied and not implied.issubset(effective):
                effective |= implied
                changed = True
    return effective


def get_effective_permissions(user: User) -> set[Permission]:
    """Read granted permissions from the column and expand implied permissions."""
    granted: set[Permission] = set()
    for p in user.effective_permissions:
        try:
            granted.add(Permission(p))
        except ValueError:
            logger.warning(f"Skipping unknown permission '{p}' for user {user.id}")
    if Permission.FULL_ADMIN_PANEL_ACCESS in granted:
        return set(Permission)
    expanded = resolve_effective_permissions({p.value for p in granted})
    return {Permission(p) for p in expanded}


def require_permission(
    required: Permission,
) -> Callable[..., Coroutine[Any, Any, User]]:
    """FastAPI dependency factory for permission-based access control.

    Usage:
        @router.get("/endpoint")
        def endpoint(user: User = Depends(require_permission(Permission.MANAGE_CONNECTORS))):
            ...
    """

    async def dependency(user: User = Depends(current_user)) -> User:
        effective = get_effective_permissions(user)

        if Permission.FULL_ADMIN_PANEL_ACCESS in effective:
            return user

        if required not in effective:
            raise OnyxError(
                OnyxErrorCode.INSUFFICIENT_PERMISSIONS,
                "You do not have the required permissions for this action.",
            )

        return user

    return dependency


================================================
FILE: backend/onyx/auth/schemas.py
================================================
import uuid
from enum import Enum
from typing import Any

from fastapi_users import schemas
from typing_extensions import override

from onyx.db.enums import AccountType


class UserRole(str, Enum):
    """
    User roles
    - Basic can't perform any admin actions
    - Admin can perform all admin actions
    - Curator can perform admin actions for
        groups they are curators of
    - Global Curator can perform admin actions
        for all groups they are a member of
    - Limited can access a limited set of basic api endpoints
    - Slack are users that have used onyx via slack but dont have a web login
    - External permissioned users that have been picked up during the external permissions sync process but don't have a web login
    """

    LIMITED = "limited"
    BASIC = "basic"
    ADMIN = "admin"
    CURATOR = "curator"
    GLOBAL_CURATOR = "global_curator"
    SLACK_USER = "slack_user"
    EXT_PERM_USER = "ext_perm_user"

    def is_web_login(self) -> bool:
        return self not in [
            UserRole.SLACK_USER,
            UserRole.EXT_PERM_USER,
        ]


class UserRead(schemas.BaseUser[uuid.UUID]):
    role: UserRole


class UserCreate(schemas.BaseUserCreate):
    role: UserRole = UserRole.BASIC
    account_type: AccountType = AccountType.STANDARD
    tenant_id: str | None = None
    # Captcha token for cloud signup protection (optional, only used when captcha is enabled)
    # Excluded from create_update_dict so it never reaches the DB layer
    captcha_token: str | None = None

    @override
    def create_update_dict(self) -> dict[str, Any]:
        d = super().create_update_dict()
        d.pop("captcha_token", None)
        # Force STANDARD for self-registration; only trusted paths
        # (SCIM, API key creation) supply a different account_type directly.
        d["account_type"] = AccountType.STANDARD
        return d

    @override
    def create_update_dict_superuser(self) -> dict[str, Any]:
        d = super().create_update_dict_superuser()
        d.pop("captcha_token", None)
        d.setdefault("account_type", self.account_type)
        return d


class UserUpdate(schemas.BaseUserUpdate):
    """
    Role updates are not allowed through the user update endpoint for security reasons
    Role changes should be handled through a separate, admin-only process
    """


class AuthBackend(str, Enum):
    REDIS = "redis"
    POSTGRES = "postgres"
    JWT = "jwt"


================================================
FILE: backend/onyx/auth/users.py
================================================
import base64
import hashlib
import json
import os
import random
import secrets
import string
import uuid
from collections.abc import AsyncGenerator
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from typing import cast
from typing import Dict
from typing import List
from typing import Literal
from typing import Optional
from typing import Protocol
from typing import Tuple
from typing import TypeVar
from urllib.parse import urlparse

import jwt
from email_validator import EmailNotValidError
from email_validator import EmailUndeliverableError
from email_validator import validate_email
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi import Request
from fastapi import Response
from fastapi import status
from fastapi import WebSocket
from fastapi.responses import JSONResponse
from fastapi.responses import RedirectResponse
from fastapi.security import OAuth2PasswordRequestForm
from fastapi_users import BaseUserManager
from fastapi_users import exceptions
from fastapi_users import FastAPIUsers
from fastapi_users import models
from fastapi_users import schemas
from fastapi_users import UUIDIDMixin
from fastapi_users.authentication import AuthenticationBackend
from fastapi_users.authentication import CookieTransport
from fastapi_users.authentication import JWTStrategy
from fastapi_users.authentication import RedisStrategy
from fastapi_users.authentication import Strategy
from fastapi_users.authentication.strategy.db import AccessTokenDatabase
from fastapi_users.authentication.strategy.db import DatabaseStrategy
from fastapi_users.exceptions import UserAlreadyExists
from fastapi_users.jwt import decode_jwt
from fastapi_users.jwt import generate_jwt
from fastapi_users.jwt import SecretType
from fastapi_users.manager import UserManagerDependency
from fastapi_users.openapi import OpenAPIResponseType
from fastapi_users.router.common import ErrorCode
from fastapi_users.router.common import ErrorModel
from fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase
from httpx_oauth.integrations.fastapi import OAuth2AuthorizeCallback
from httpx_oauth.oauth2 import BaseOAuth2
from httpx_oauth.oauth2 import GetAccessTokenError
from httpx_oauth.oauth2 import OAuth2Token
from pydantic import BaseModel
from sqlalchemy import nulls_last
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session

from onyx.auth.api_key import get_hashed_api_key_from_request
from onyx.auth.disposable_email_validator import is_disposable_email
from onyx.auth.email_utils import send_forgot_password_email
from onyx.auth.email_utils import send_user_verification_email
from onyx.auth.invited_users import get_invited_users
from onyx.auth.invited_users import remove_user_from_invited_users
from onyx.auth.jwt import verify_jwt_token
from onyx.auth.pat import get_hashed_pat_from_request
from onyx.auth.schemas import AuthBackend
from onyx.auth.schemas import UserCreate
from onyx.auth.schemas import UserRole
from onyx.configs.app_configs import AUTH_BACKEND
from onyx.configs.app_configs import AUTH_COOKIE_EXPIRE_TIME_SECONDS
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import EMAIL_CONFIGURED
from onyx.configs.app_configs import JWT_PUBLIC_KEY_URL
from onyx.configs.app_configs import PASSWORD_MAX_LENGTH
from onyx.configs.app_configs import PASSWORD_MIN_LENGTH
from onyx.configs.app_configs import PASSWORD_REQUIRE_DIGIT
from onyx.configs.app_configs import PASSWORD_REQUIRE_LOWERCASE
from onyx.configs.app_configs import PASSWORD_REQUIRE_SPECIAL_CHAR
from onyx.configs.app_configs import PASSWORD_REQUIRE_UPPERCASE
from onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX
from onyx.configs.app_configs import REQUIRE_EMAIL_VERIFICATION
from onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS
from onyx.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY
from onyx.configs.app_configs import USER_AUTH_SECRET
from onyx.configs.app_configs import VALID_EMAIL_DOMAINS
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.configs.constants import ANONYMOUS_USER_UUID
from onyx.configs.constants import AuthType
from onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN
from onyx.configs.constants import DANSWER_API_KEY_PREFIX
from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import PASSWORD_SPECIAL_CHARS
from onyx.configs.constants import UNNAMED_KEY_PLACEHOLDER
from onyx.db.api_key import fetch_user_for_api_key
from onyx.db.auth import get_access_token_db
from onyx.db.auth import get_default_admin_user_emails
from onyx.db.auth import get_user_count
from onyx.db.auth import get_user_db
from onyx.db.auth import SQLAlchemyUserAdminDB
from onyx.db.engine.async_sql_engine import get_async_session
from onyx.db.engine.async_sql_engine import get_async_session_context_manager
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.enums import AccountType
from onyx.db.models import AccessToken
from onyx.db.models import OAuthAccount
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.pat import fetch_user_for_pat
from onyx.db.users import assign_user_to_default_groups__no_commit
from onyx.db.users import get_user_by_email
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import log_onyx_error
from onyx.error_handling.exceptions import onyx_error_to_json_response
from onyx.error_handling.exceptions import OnyxError
from onyx.redis.redis_pool import get_async_redis_connection
from onyx.redis.redis_pool import retrieve_ws_token_data
from onyx.server.settings.store import load_settings
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import mt_cloud_alias
from onyx.utils.telemetry import mt_cloud_get_anon_id
from onyx.utils.telemetry import mt_cloud_identify
from onyx.utils.telemetry import mt_cloud_telemetry
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
from onyx.utils.timing import log_function_time
from onyx.utils.url import add_url_params
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from shared_configs.configs import async_return_default_schema
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

REGISTER_INVITE_ONLY_CODE = "REGISTER_INVITE_ONLY"


def is_user_admin(user: User) -> bool:
    return user.role == UserRole.ADMIN


def verify_auth_setting() -> None:
    """Log warnings for AUTH_TYPE issues.

    This only runs on app startup not during migrations/scripts.
    """
    raw_auth_type = (os.environ.get("AUTH_TYPE") or "").lower()

    if raw_auth_type == "cloud":
        raise ValueError(
            "'cloud' is not a valid auth type for self-hosted deployments."
        )
    if raw_auth_type == "disabled":
        logger.warning(
            "AUTH_TYPE='disabled' is no longer supported. Using 'basic' instead. Please update your configuration."
        )

    logger.notice(f"Using Auth Type: {AUTH_TYPE.value}")


def get_display_email(email: str | None, space_less: bool = False) -> str:
    if email and email.endswith(DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN):
        name = email.split("@")[0]
        if name == DANSWER_API_KEY_PREFIX + UNNAMED_KEY_PLACEHOLDER:
            return "Unnamed API Key"

        if space_less:
            return name

        return name.replace("API_KEY__", "API Key: ")

    return email or ""


def generate_password() -> str:
    lowercase_letters = string.ascii_lowercase
    uppercase_letters = string.ascii_uppercase
    digits = string.digits
    special_characters = string.punctuation

    # Ensure at least one of each required character type
    password = [
        secrets.choice(uppercase_letters),
        secrets.choice(digits),
        secrets.choice(special_characters),
    ]

    # Fill the rest with a mix of characters
    remaining_length = 12 - len(password)
    all_characters = lowercase_letters + uppercase_letters + digits + special_characters
    password.extend(secrets.choice(all_characters) for _ in range(remaining_length))

    # Shuffle the password to randomize the position of the required characters
    random.shuffle(password)

    return "".join(password)


def user_needs_to_be_verified() -> bool:
    if AUTH_TYPE == AuthType.BASIC or AUTH_TYPE == AuthType.CLOUD:
        return REQUIRE_EMAIL_VERIFICATION

    # For other auth types, if the user is authenticated it's assumed that
    # the user is already verified via the external IDP
    return False


def anonymous_user_enabled(*, tenant_id: str | None = None) -> bool:
    from onyx.cache.factory import get_cache_backend

    cache = get_cache_backend(tenant_id=tenant_id)
    value = cache.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)

    if value is None:
        return False

    return int(value.decode("utf-8")) == 1


def workspace_invite_only_enabled() -> bool:
    settings = load_settings()
    return settings.invite_only_enabled


def verify_email_is_invited(email: str) -> None:
    if AUTH_TYPE in {AuthType.SAML, AuthType.OIDC}:
        # SSO providers manage membership; allow JIT provisioning regardless of invites
        return

    if not workspace_invite_only_enabled():
        return

    whitelist = get_invited_users()

    if not email:
        raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Email must be specified")

    try:
        email_info = validate_email(email, check_deliverability=False)
    except EmailUndeliverableError:
        raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Email is not valid")

    for email_whitelist in whitelist:
        try:
            # normalized emails are now being inserted into the db
            # we can remove this normalization on read after some time has passed
            email_info_whitelist = validate_email(
                email_whitelist, check_deliverability=False
            )
        except EmailNotValidError:
            continue

        # oddly, normalization does not include lowercasing the user part of the
        # email address ... which we want to allow
        if email_info.normalized.lower() == email_info_whitelist.normalized.lower():
            return

    raise OnyxError(
        OnyxErrorCode.UNAUTHORIZED,
        "This workspace is invite-only. Please ask your admin to invite you.",
    )


def verify_email_in_whitelist(email: str, tenant_id: str) -> None:
    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        if not get_user_by_email(email, db_session):
            verify_email_is_invited(email)


def verify_email_domain(email: str, *, is_registration: bool = False) -> None:
    if email.count("@") != 1:
        raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Email is not valid")

    local_part, domain = email.split("@")
    domain = domain.lower()
    local_part = local_part.lower()

    if AUTH_TYPE == AuthType.CLOUD:
        # Normalize googlemail.com to gmail.com (they deliver to the same inbox)
        if domain == "googlemail.com":
            raise OnyxError(
                OnyxErrorCode.INVALID_INPUT,
                "Please use @gmail.com instead of @googlemail.com.",
            )

        # Only block dotted Gmail on new signups — existing users must still be
        # able to sign in with the address they originally registered with.
        if is_registration and domain == "gmail.com" and "." in local_part:
            raise OnyxError(
                OnyxErrorCode.INVALID_INPUT,
                "Gmail addresses with '.' are not allowed. Please use your base email address.",
            )

        if "+" in local_part and domain != "onyx.app":
            raise OnyxError(
                OnyxErrorCode.INVALID_INPUT,
                "Email addresses with '+' are not allowed. Please use your base email address.",
            )

    # Check if email uses a disposable/temporary domain
    if is_disposable_email(email):
        raise OnyxError(
            OnyxErrorCode.INVALID_INPUT,
            "Disposable email addresses are not allowed. Please use a permanent email address.",
        )

    # Check domain whitelist if configured
    if VALID_EMAIL_DOMAINS:
        if domain not in VALID_EMAIL_DOMAINS:
            raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Email domain is not valid")


def enforce_seat_limit(db_session: Session, seats_needed: int = 1) -> None:
    """Raise HTTPException(402) if adding users would exceed the seat limit.

    No-op for multi-tenant or CE deployments.
    """
    if MULTI_TENANT:
        return

    result = fetch_ee_implementation_or_noop(
        "onyx.db.license", "check_seat_availability", None
    )(db_session, seats_needed=seats_needed)

    if result is not None and not result.available:
        raise OnyxError(OnyxErrorCode.SEAT_LIMIT_EXCEEDED, result.error_message)


class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    reset_password_token_secret = USER_AUTH_SECRET
    verification_token_secret = USER_AUTH_SECRET
    verification_token_lifetime_seconds = AUTH_COOKIE_EXPIRE_TIME_SECONDS
    user_db: SQLAlchemyUserDatabase[User, uuid.UUID]

    async def get_by_email(self, user_email: str) -> User:
        tenant_id = fetch_ee_implementation_or_noop(
            "onyx.server.tenants.user_mapping", "get_tenant_id_for_email", None
        )(user_email)
        async with get_async_session_context_manager(tenant_id) as db_session:
            if MULTI_TENANT:
                tenant_user_db = SQLAlchemyUserAdminDB[User, uuid.UUID](
                    db_session, User, OAuthAccount
                )
                user = await tenant_user_db.get_by_email(user_email)
            else:
                user = await self.user_db.get_by_email(user_email)

        if not user:
            raise exceptions.UserNotExists()

        return user

    async def create(
        self,
        user_create: schemas.UC | UserCreate,
        safe: bool = False,
        request: Optional[Request] = None,
    ) -> User:
        # Verify captcha if enabled (for cloud signup protection)
        from onyx.auth.captcha import CaptchaVerificationError
        from onyx.auth.captcha import is_captcha_enabled
        from onyx.auth.captcha import verify_captcha_token

        if is_captcha_enabled() and request is not None:
            # Get captcha token from request body or headers
            captcha_token = None
            if hasattr(user_create, "captcha_token"):
                captcha_token = getattr(user_create, "captcha_token", None)

            # Also check headers as a fallback
            if not captcha_token:
                captcha_token = request.headers.get("X-Captcha-Token")

            try:
                await verify_captcha_token(
                    captcha_token or "", expected_action="signup"
                )
            except CaptchaVerificationError as e:
                raise OnyxError(OnyxErrorCode.INVALID_INPUT, str(e))

        # We verify the password here to make sure it's valid before we proceed
        await self.validate_password(
            user_create.password, cast(schemas.UC, user_create)
        )

        # Check for disposable emails BEFORE provisioning tenant
        # This prevents creating tenants for throwaway email addresses
        try:
            verify_email_domain(user_create.email, is_registration=True)
        except OnyxError as e:
            # Log blocked disposable email attempts
            if "Disposable email" in e.detail:
                domain = (
                    user_create.email.split("@")[-1]
                    if "@" in user_create.email
                    else "unknown"
                )
                logger.warning(
                    f"Blocked disposable email registration attempt: {domain}",
                    extra={"email_domain": domain},
                )
            raise

        user_count: int | None = None
        referral_source = (
            request.cookies.get("referral_source", None)
            if request is not None
            else None
        )

        tenant_id = await fetch_ee_implementation_or_noop(
            "onyx.server.tenants.provisioning",
            "get_or_provision_tenant",
            async_return_default_schema,
        )(
            email=user_create.email,
            referral_source=referral_source,
            request=request,
        )
        user: User

        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
        try:
            async with get_async_session_context_manager(tenant_id) as db_session:
                # Check invite list based on deployment mode
                if MULTI_TENANT:
                    # Multi-tenant: Only require invite for existing tenants
                    # New tenant creation (first user) doesn't require an invite
                    user_count = await get_user_count()
                    if user_count > 0:
                        # Tenant already has users - require invite for new users
                        verify_email_is_invited(user_create.email)
                else:
                    # Single-tenant: Check invite list (skips if SAML/OIDC or no list configured)
                    verify_email_is_invited(user_create.email)
                if MULTI_TENANT:
                    tenant_user_db = SQLAlchemyUserAdminDB[User, uuid.UUID](
                        db_session, User, OAuthAccount
                    )
                    self.user_db = tenant_user_db

                if hasattr(user_create, "role"):
                    user_create.role = UserRole.BASIC

                    user_count = await get_user_count()
                    if (
                        user_count == 0
                        or user_create.email in get_default_admin_user_emails()
                    ):
                        user_create.role = UserRole.ADMIN

                # Check seat availability for new users (single-tenant only)
                with get_session_with_current_tenant() as sync_db:
                    existing = get_user_by_email(user_create.email, sync_db)
                    if existing is None:
                        enforce_seat_limit(sync_db)

                user_created = False
                try:
                    user = await super().create(user_create, safe=safe, request=request)
                    user_created = True
                except IntegrityError as error:
                    # Race condition: another request created the same user after the
                    # pre-insert existence check but before our commit.
                    await self.user_db.session.rollback()
                    logger.warning(
                        "IntegrityError while creating user %s, assuming duplicate: %s",
                        user_create.email,
                        str(error),
                    )
                    try:
                        user = await self.get_by_email(user_create.email)
                    except exceptions.UserNotExists:
                        # Unexpected integrity error, surface it for handling upstream.
                        raise error

                    if MULTI_TENANT:
                        user_by_session = await db_session.get(User, user.id)
                        if user_by_session:
                            user = user_by_session

                    if (
                        user.account_type.is_web_login()
                        or not isinstance(user_create, UserCreate)
                        or not user_create.account_type.is_web_login()
                    ):
                        raise exceptions.UserAlreadyExists()

                    # Cache id before expire — accessing attrs on an expired
                    # object triggers a sync lazy-load which raises MissingGreenlet
                    # in this async context.
                    user_id = user.id
                    self._upgrade_user_to_standard__sync(user_id, user_create)
                    # Expire so the async session re-fetches the row updated by
                    # the sync session above.
                    self.user_db.session.expire(user)
                    user = await self.user_db.get(user_id)  # type: ignore[assignment]
                except exceptions.UserAlreadyExists:
                    user = await self.get_by_email(user_create.email)

                    # we must use the existing user in the session if it matches
                    # the user we just got by email. Note that this only applies
                    # to multi-tenant, due to the overwriting of the user_db
                    if MULTI_TENANT:
                        user_by_session = await db_session.get(User, user.id)
                        if user_by_session:
                            user = user_by_session

                    # Handle case where user has used product outside of web and is now creating an account through web
                    if (
                        user.account_type.is_web_login()
                        or not isinstance(user_create, UserCreate)
                        or not user_create.account_type.is_web_login()
                    ):
                        raise exceptions.UserAlreadyExists()

                    # Cache id before expire — accessing attrs on an expired
                    # object triggers a sync lazy-load which raises MissingGreenlet
                    # in this async context.
                    user_id = user.id
                    self._upgrade_user_to_standard__sync(user_id, user_create)
                    # Expire so the async session re-fetches the row updated by
                    # the sync session above.
                    self.user_db.session.expire(user)
                    user = await self.user_db.get(user_id)  # type: ignore[assignment]
                if user_created:
                    await self._assign_default_pinned_assistants(user, db_session)
                remove_user_from_invited_users(user_create.email)
        finally:
            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
        return user

    async def _assign_default_pinned_assistants(
        self, user: User, db_session: AsyncSession
    ) -> None:
        if user.pinned_assistants is not None:
            return

        result = await db_session.execute(
            select(Persona.id)
            .where(
                Persona.is_featured.is_(True),
                Persona.is_public.is_(True),
                Persona.is_listed.is_(True),
                Persona.deleted.is_(False),
            )
            .order_by(
                nulls_last(Persona.display_priority.asc()),
                Persona.id.asc(),
            )
        )
        default_persona_ids = list(result.scalars().all())
        if not default_persona_ids:
            return

        await self.user_db.update(
            user,
            {"pinned_assistants": default_persona_ids},
        )
        user.pinned_assistants = default_persona_ids

    def _upgrade_user_to_standard__sync(
        self,
        user_id: uuid.UUID,
        user_create: UserCreate,
    ) -> None:
        """Upgrade a non-web user to STANDARD and assign default groups atomically.

        All writes happen in a single sync transaction so neither the field
        update nor the group assignment is visible without the other.
        """
        with get_session_with_current_tenant() as sync_db:
            sync_user = sync_db.query(User).filter(User.id == user_id).first()  # type: ignore[arg-type]
            if sync_user:
                sync_user.hashed_password = self.password_helper.hash(
                    user_create.password
                )
                sync_user.is_verified = user_create.is_verified or False
                sync_user.role = user_create.role
                sync_user.account_type = AccountType.STANDARD
                assign_user_to_default_groups__no_commit(
                    sync_db,
                    sync_user,
                    is_admin=(user_create.role == UserRole.ADMIN),
                )
                sync_db.commit()
            else:
                logger.warning(
                    "User %s not found in sync session during upgrade to standard; "
                    "skipping upgrade",
                    user_id,
                )

    async def validate_password(self, password: str, _: schemas.UC | models.UP) -> None:
        # Validate password according to configurable security policy (defined via environment variables)
        if len(password) < PASSWORD_MIN_LENGTH:
            raise exceptions.InvalidPasswordException(
                reason=f"Password must be at least {PASSWORD_MIN_LENGTH} characters long."
            )
        if len(password) > PASSWORD_MAX_LENGTH:
            raise exceptions.InvalidPasswordException(
                reason=f"Password must not exceed {PASSWORD_MAX_LENGTH} characters."
            )
        if PASSWORD_REQUIRE_UPPERCASE and not any(char.isupper() for char in password):
            raise exceptions.InvalidPasswordException(
                reason="Password must contain at least one uppercase letter."
            )
        if PASSWORD_REQUIRE_LOWERCASE and not any(char.islower() for char in password):
            raise exceptions.InvalidPasswordException(
                reason="Password must contain at least one lowercase letter."
            )
        if PASSWORD_REQUIRE_DIGIT and not any(char.isdigit() for char in password):
            raise exceptions.InvalidPasswordException(
                reason="Password must contain at least one number."
            )
        if PASSWORD_REQUIRE_SPECIAL_CHAR and not any(
            char in PASSWORD_SPECIAL_CHARS for char in password
        ):
            raise exceptions.InvalidPasswordException(
                reason=f"Password must contain at least one special character from the following set: {PASSWORD_SPECIAL_CHARS}."
            )
        return

    @log_function_time(print_only=True)
    async def oauth_callback(
        self,
        oauth_name: str,
        access_token: str,
        account_id: str,
        account_email: str,
        expires_at: Optional[int] = None,
        refresh_token: Optional[str] = None,
        request: Optional[Request] = None,
        *,
        associate_by_email: bool = False,
        is_verified_by_default: bool = False,
    ) -> User:
        referral_source = (
            getattr(request.state, "referral_source", None) if request else None
        )

        tenant_id = await fetch_ee_implementation_or_noop(
            "onyx.server.tenants.provisioning",
            "get_or_provision_tenant",
            async_return_default_schema,
        )(
            email=account_email,
            referral_source=referral_source,
            request=request,
        )

        if not tenant_id:
            raise HTTPException(status_code=401, detail="User not found")

        # Proceed with the tenant context
        token = None
        async with get_async_session_context_manager(tenant_id) as db_session:
            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

            verify_email_in_whitelist(account_email, tenant_id)
            verify_email_domain(account_email)

            # NOTE(rkuo): If this UserManager is instantiated per connection
            # should we even be doing this here?
            if MULTI_TENANT:
                tenant_user_db = SQLAlchemyUserAdminDB[User, uuid.UUID](
                    db_session, User, OAuthAccount
                )
                self.user_db = tenant_user_db

            oauth_account_dict = {
                "oauth_name": oauth_name,
                "access_token": access_token,
                "account_id": account_id,
                "account_email": account_email,
                "expires_at": expires_at,
                "refresh_token": refresh_token,
            }

            user: User | None = None

            try:
                # Attempt to get user by OAuth account
                user = await self.get_by_oauth_account(oauth_name, account_id)

            except exceptions.UserNotExists:
                try:
                    # Attempt to get user by email
                    user = await self.user_db.get_by_email(account_email)
                    if not associate_by_email:
                        raise exceptions.UserAlreadyExists()

                    # Make sure user is not None before adding OAuth account
                    if user is not None:
                        user = await self.user_db.add_oauth_account(
                            user, oauth_account_dict
                        )
                    else:
                        # This shouldn't happen since get_by_email would raise UserNotExists
                        # but adding as a safeguard
                        raise exceptions.UserNotExists()

                except exceptions.UserNotExists:
                    verify_email_domain(account_email, is_registration=True)

                    # Check seat availability before creating (single-tenant only)
                    with get_session_with_current_tenant() as sync_db:
                        enforce_seat_limit(sync_db)

                    password = self.password_helper.generate()
                    user_dict = {
                        "email": account_email,
                        "hashed_password": self.password_helper.hash(password),
                        "is_verified": is_verified_by_default,
                        "account_type": AccountType.STANDARD,
                    }

                    user = await self.user_db.create(user_dict)
                    await self.user_db.add_oauth_account(user, oauth_account_dict)
                    await self._assign_default_pinned_assistants(user, db_session)
                    await self.on_after_register(user, request)

            else:
                # User exists, update OAuth account if needed
                if user is not None:  # Add explicit check
                    for existing_oauth_account in user.oauth_accounts:
                        if (
                            existing_oauth_account.account_id == account_id
                            and existing_oauth_account.oauth_name == oauth_name
                        ):
                            user = await self.user_db.update_oauth_account(
                                user,
                                # NOTE: OAuthAccount DOES implement the OAuthAccountProtocol
                                # but the type checker doesn't know that :(
                                existing_oauth_account,  # type: ignore
                                oauth_account_dict,
                            )

            # NOTE: Most IdPs have very short expiry times, and we don't want to force the user to
            # re-authenticate that frequently, so by default this is disabled
            if expires_at and TRACK_EXTERNAL_IDP_EXPIRY:
                oidc_expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc)
                await self.user_db.update(
                    user, update_dict={"oidc_expiry": oidc_expiry}
                )

            # Handle case where user has used product outside of web and is now creating an account through web
            if not user.account_type.is_web_login():
                # We must use the existing user in the session if it matches
                # the user we just got by email/oauth. Note that this only applies
                # to multi-tenant, due to the overwriting of the user_db
                if MULTI_TENANT:
                    if user.id:
                        user_by_session = await db_session.get(User, user.id)
                        if user_by_session:
                            user = user_by_session

                # If the user is inactive, check seat availability before
                # upgrading role — otherwise they'd become an inactive BASIC
                # user who still can't log in.
                if not user.is_active:
                    with get_session_with_current_tenant() as sync_db:
                        enforce_seat_limit(sync_db)

                # Upgrade the user and assign default groups in a single
                # transaction so neither change is visible without the other.
                was_inactive = not user.is_active
                with get_session_with_current_tenant() as sync_db:
                    sync_user = sync_db.query(User).filter(User.id == user.id).first()  # type: ignore[arg-type]
                    if sync_user:
                        sync_user.is_verified = is_verified_by_default
                        sync_user.role = UserRole.BASIC
                        sync_user.account_type = AccountType.STANDARD
                        if was_inactive:
                            sync_user.is_active = True
                        assign_user_to_default_groups__no_commit(sync_db, sync_user)
                        sync_db.commit()

                # Refresh the async user object so downstream code
                # (e.g. oidc_expiry check) sees the updated fields.
                self.user_db.session.expire(user)
                user = await self.user_db.get(user.id)
                assert user is not None

            # this is needed if an organization goes from `TRACK_EXTERNAL_IDP_EXPIRY=true` to `false`
            # otherwise, the oidc expiry will always be old, and the user will never be able to login
            if user.oidc_expiry is not None and not TRACK_EXTERNAL_IDP_EXPIRY:
                await self.user_db.update(user, {"oidc_expiry": None})
                user.oidc_expiry = None  # type: ignore
            remove_user_from_invited_users(user.email)
            if token:
                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

            return user

    async def on_after_login(
        self,
        user: User,
        request: Optional[Request] = None,
        response: Optional[Response] = None,
    ) -> None:
        try:
            if response and request and ANONYMOUS_USER_COOKIE_NAME in request.cookies:
                response.delete_cookie(
                    ANONYMOUS_USER_COOKIE_NAME,
                    # Ensure cookie deletion doesn't override other cookies by setting the same path/domain
                    path="/",
                    domain=None,
                    secure=WEB_DOMAIN.startswith("https"),
                )
                logger.debug(f"Deleted anonymous user cookie for user {user.email}")
        except Exception:
            logger.exception("Error deleting anonymous user cookie")

        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()

        # Link the anonymous PostHog session to the identified user so that
        # pre-login session recordings and events merge into one person profile.
        if anon_id := mt_cloud_get_anon_id(request):
            mt_cloud_alias(distinct_id=str(user.id), anonymous_id=anon_id)

        mt_cloud_identify(
            distinct_id=str(user.id),
            properties={"email": user.email, "tenant_id": tenant_id},
        )

    async def on_after_register(
        self, user: User, request: Optional[Request] = None
    ) -> None:
        tenant_id = await fetch_ee_implementation_or_noop(
            "onyx.server.tenants.provisioning",
            "get_or_provision_tenant",
            async_return_default_schema,
        )(
            email=user.email,
            request=request,
        )

        user_count = None
        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
        try:
            user_count = await get_user_count()
            logger.debug(f"Current tenant user count: {user_count}")

            # Link the anonymous PostHog session to the identified user so
            # that pre-signup session recordings merge into one person profile.
            if anon_id := mt_cloud_get_anon_id(request):
                mt_cloud_alias(distinct_id=str(user.id), anonymous_id=anon_id)

            # Ensure a PostHog person profile exists for this user.
            mt_cloud_identify(
                distinct_id=str(user.id),
                properties={"email": user.email, "tenant_id": tenant_id},
            )

            mt_cloud_telemetry(
                tenant_id=tenant_id,
                distinct_id=str(user.id),
                event=MilestoneRecordType.USER_SIGNED_UP,
            )

            if user_count == 1:
                mt_cloud_telemetry(
                    tenant_id=tenant_id,
                    distinct_id=str(user.id),
                    event=MilestoneRecordType.TENANT_CREATED,
                )

            # Assign user to the appropriate default group (Admin or Basic).
            # Must happen inside the try block while tenant context is active,
            # otherwise get_session_with_current_tenant() targets the wrong schema.
            is_admin = user_count == 1 or user.email in get_default_admin_user_emails()
            with get_session_with_current_tenant() as db_session:
                assign_user_to_default_groups__no_commit(
                    db_session, user, is_admin=is_admin
                )
                db_session.commit()

        finally:
            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

        # Fetch EE PostHog functions if available
        get_marketing_posthog_cookie_name = fetch_ee_implementation_or_noop(
            module="onyx.utils.posthog_client",
            attribute="get_marketing_posthog_cookie_name",
            noop_return_value=None,
        )
        parse_posthog_cookie = fetch_ee_implementation_or_noop(
            module="onyx.utils.posthog_client",
            attribute="parse_posthog_cookie",
            noop_return_value=None,
        )
        capture_and_sync_with_alternate_posthog = fetch_ee_implementation_or_noop(
            module="onyx.utils.posthog_client",
            attribute="capture_and_sync_with_alternate_posthog",
            noop_return_value=None,
        )

        if (
            request
            and user_count is not None
            and (marketing_cookie_name := get_marketing_posthog_cookie_name())
            and (marketing_cookie_value := request.cookies.get(marketing_cookie_name))
            and (parsed_cookie := parse_posthog_cookie(marketing_cookie_value))
        ):
            marketing_anonymous_id = parsed_cookie["distinct_id"]

            # Technically, USER_SIGNED_UP is only fired from the cloud site when
            # it is the first user in a tenant. However, it is semantically correct
            # for the marketing site and should probably be refactored for the cloud site
            # to also be semantically correct.
            properties = {
                "email": user.email,
                "onyx_cloud_user_id": str(user.id),
                "tenant_id": str(tenant_id) if tenant_id else None,
                "role": user.role.value,
                "is_first_user": user_count == 1,
                "source": "marketing_site_signup",
                "conversion_timestamp": datetime.now(timezone.utc).isoformat(),
            }

            # Add all other values from the marketing cookie (featureFlags, etc.)
            for key, value in parsed_cookie.items():
                if key != "distinct_id":
                    properties.setdefault(key, value)

            capture_and_sync_with_alternate_posthog(
                alternate_distinct_id=marketing_anonymous_id,
                event=MilestoneRecordType.USER_SIGNED_UP,
                properties=properties,
            )

        logger.debug(f"User {user.id} has registered.")
        optional_telemetry(
            record_type=RecordType.SIGN_UP,
            data={"action": "create"},
            user_id=str(user.id),
        )

    async def on_after_forgot_password(
        self,
        user: User,
        token: str,
        request: Optional[Request] = None,  # noqa: ARG002
    ) -> None:
        if not EMAIL_CONFIGURED:
            logger.error(
                "Email is not configured. Please configure email in the admin panel"
            )
            raise HTTPException(
                status.HTTP_500_INTERNAL_SERVER_ERROR,
                "Your admin has not enabled this feature.",
            )
        tenant_id = await fetch_ee_implementation_or_noop(
            "onyx.server.tenants.provisioning",
            "get_or_provision_tenant",
            async_return_default_schema,
        )(email=user.email)

        send_forgot_password_email(user.email, tenant_id=tenant_id, token=token)

    async def on_after_request_verify(
        self,
        user: User,
        token: str,
        request: Optional[Request] = None,  # noqa: ARG002
    ) -> None:
        verify_email_domain(user.email)

        logger.notice(
            f"Verification requested for user {user.id}. Verification token: {token}"
        )
        user_count = await get_user_count()
        send_user_verification_email(
            user.email, token, new_organization=user_count == 1
        )

    @log_function_time(print_only=True)
    async def authenticate(
        self, credentials: OAuth2PasswordRequestForm
    ) -> Optional[User]:
        email = credentials.username

        tenant_id: str | None = None
        try:
            tenant_id = fetch_ee_implementation_or_noop(
                "onyx.server.tenants.provisioning",
                "get_tenant_id_for_email",
                POSTGRES_DEFAULT_SCHEMA,
            )(
                email=email,
            )
        except Exception as e:
            logger.warning(
                f"User attempted to login with invalid credentials: {str(e)}"
            )

        if not tenant_id:
            # User not found in mapping
            self.password_helper.hash(credentials.password)
            return None

        # Create a tenant-specific session
        async with get_async_session_context_manager(tenant_id) as tenant_session:
            tenant_user_db: SQLAlchemyUserDatabase = SQLAlchemyUserDatabase(
                tenant_session, User
            )
            self.user_db = tenant_user_db

            # Proceed with authentication
            try:
                user = await self.get_by_email(email)

            except exceptions.UserNotExists:
                self.password_helper.hash(credentials.password)
                return None

            if not user.account_type.is_web_login():
                raise BasicAuthenticationError(
                    detail="NO_WEB_LOGIN_AND_HAS_NO_PASSWORD",
                )

            verified, updated_password_hash = self.password_helper.verify_and_update(
                credentials.password, user.hashed_password
            )
            if not verified:
                return None

            if updated_password_hash is not None:
                await self.user_db.update(
                    user, {"hashed_password": updated_password_hash}
                )

            return user

    async def reset_password_as_admin(self, user_id: uuid.UUID) -> str:
        """Admin-only. Generate a random password for a user and return it."""
        user = await self.get(user_id)
        new_password = generate_password()
        await self._update(user, {"password": new_password})
        return new_password

    async def change_password_if_old_matches(
        self, user: User, old_password: str, new_password: str
    ) -> None:
        """
        For normal users to change password if they know the old one.
        Raises 400 if old password doesn't match.
        """
        verified, updated_password_hash = self.password_helper.verify_and_update(
            old_password, user.hashed_password
        )
        if not verified:
            # Raise some HTTPException (or your custom exception) if old password is invalid:
            from fastapi import HTTPException, status

            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail="Invalid current password",
            )

        # If the hash was upgraded behind the scenes, we can keep it before setting the new password:
        if updated_password_hash:
            user.hashed_password = updated_password_hash

        # Now apply and validate the new password
        await self._update(user, {"password": new_password})


async def get_user_manager(
    user_db: SQLAlchemyUserDatabase = Depends(get_user_db),
) -> AsyncGenerator[UserManager, None]:
    yield UserManager(user_db)


cookie_transport = CookieTransport(
    cookie_max_age=SESSION_EXPIRE_TIME_SECONDS,
    cookie_secure=WEB_DOMAIN.startswith("https"),
    cookie_name=FASTAPI_USERS_AUTH_COOKIE_NAME,
)


T = TypeVar("T", covariant=True)
ID = TypeVar("ID", contravariant=True)


# Protocol for strategies that support token refreshing without inheritance.
class RefreshableStrategy(Protocol):
    """Protocol for authentication strategies that support token refreshing."""

    async def refresh_token(self, token: Optional[str], user: Any) -> str:
        """
        Refresh an existing token by extending its lifetime.
        Returns either the same token with extended expiration or a new token.
        """
        ...


class TenantAwareRedisStrategy(RedisStrategy[User, uuid.UUID]):
    """
    A custom strategy that fetches the actual async Redis connection inside each method.
    We do NOT pass a synchronous or "coroutine" redis object to the constructor.
    """

    def __init__(
        self,
        lifetime_seconds: Optional[int] = SESSION_EXPIRE_TIME_SECONDS,
        key_prefix: str = REDIS_AUTH_KEY_PREFIX,
    ):
        self.lifetime_seconds = lifetime_seconds
        self.key_prefix = key_prefix

    async def write_token(self, user: User) -> str:
        redis = await get_async_redis_connection()

        tenant_id = await fetch_ee_implementation_or_noop(
            "onyx.server.tenants.provisioning",
            "get_or_provision_tenant",
            async_return_default_schema,
        )(email=user.email)

        token_data = {
            "sub": str(user.id),
            "tenant_id": tenant_id,
        }
        token = secrets.token_urlsafe()
        await redis.set(
            f"{self.key_prefix}{token}",
            json.dumps(token_data),
            ex=self.lifetime_seconds,
        )
        return token

    async def read_token(
        self, token: Optional[str], user_manager: BaseUserManager[User, uuid.UUID]
    ) -> Optional[User]:
        redis = await get_async_redis_connection()
        token_data_str = await redis.get(f"{self.key_prefix}{token}")
        if not token_data_str:
            return None

        try:
            token_data = json.loads(token_data_str)
            user_id = token_data["sub"]
            parsed_id = user_manager.parse_id(user_id)
            return await user_manager.get(parsed_id)
        except (exceptions.UserNotExists, exceptions.InvalidID, KeyError):
            return None

    async def destroy_token(self, token: str, user: User) -> None:  # noqa: ARG002
        """Properly delete the token from async redis."""
        redis = await get_async_redis_connection()
        await redis.delete(f"{self.key_prefix}{token}")

    async def refresh_token(self, token: Optional[str], user: User) -> str:
        """Refresh a token by extending its expiration time in Redis."""
        if token is None:
            # If no token provided, create a new one
            return await self.write_token(user)

        redis = await get_async_redis_connection()
        token_key = f"{self.key_prefix}{token}"

        # Check if token exists
        token_data_str = await redis.get(token_key)
        if not token_data_str:
            # Token not found, create new one
            return await self.write_token(user)

        # Token exists, extend its lifetime
        token_data = json.loads(token_data_str)
        await redis.set(
            token_key,
            json.dumps(token_data),
            ex=self.lifetime_seconds,
        )

        return token


class RefreshableDatabaseStrategy(DatabaseStrategy[User, uuid.UUID, AccessToken]):
    """Database strategy with token refreshing capabilities."""

    def __init__(
        self,
        access_token_db: AccessTokenDatabase[AccessToken],
        lifetime_seconds: Optional[int] = None,
    ):
        super().__init__(access_token_db, lifetime_seconds)
        self._access_token_db = access_token_db

    async def refresh_token(self, token: Optional[str], user: User) -> str:
        """Refresh a token by updating its expiration time in the database."""
        if token is None:
            return await self.write_token(user)

        # Find the token in database
        access_token = await self._access_token_db.get_by_token(token)

        if access_token is None:
            # Token not found, create new one
            return await self.write_token(user)

        # Update expiration time
        new_expires = datetime.now(timezone.utc) + timedelta(
            seconds=float(self.lifetime_seconds or SESSION_EXPIRE_TIME_SECONDS)
        )
        await self._access_token_db.update(access_token, {"expires": new_expires})

        return token


class SingleTenantJWTStrategy(JWTStrategy[User, uuid.UUID]):
    """Stateless JWT strategy for single-tenant deployments.

    Tokens are self-contained and verified via signature — no Redis or DB
    lookup required per request. An ``iat`` claim is embedded so that
    downstream code can determine when the token was created without
    querying an external store.

    Refresh is implemented by issuing a brand-new JWT (the old one remains
    valid until its natural expiry).  ``destroy_token`` is a no-op because
    JWTs cannot be server-side invalidated.
    """

    def __init__(
        self,
        secret: SecretType,
        lifetime_seconds: int | None = SESSION_EXPIRE_TIME_SECONDS,
        token_audience: list[str] | None = None,
        algorithm: str = "HS256",
        public_key: SecretType | None = None,
    ):
        super().__init__(
            secret=secret,
            lifetime_seconds=lifetime_seconds,
            token_audience=token_audience or ["fastapi-users:auth"],
            algorithm=algorithm,
            public_key=public_key,
        )

    async def write_token(self, user: User) -> str:
        data = {
            "sub": str(user.id),
            "aud": self.token_audience,
            "iat": int(datetime.now(timezone.utc).timestamp()),
        }
        return generate_jwt(
            data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm
        )

    async def destroy_token(self, token: str, user: User) -> None:  # noqa: ARG002
        # JWTs are stateless — nothing to invalidate server-side.
        # NOTE: a compromise that makes JWT auth stateful but revocable
        # is to include a token_version claim in the JWT payload. The token_version
        # is incremented whenever the user logs out (or gets login revoked). Whenever
        # the JWT is used, it is only valid if the token_version claim is the same as the one
        # in the db. If not, the JWT is invalid and the user needs to login again.
        return

    async def refresh_token(
        self,
        token: Optional[str],  # noqa: ARG002
        user: User,  # noqa: ARG002
    ) -> str:
        """Issue a fresh JWT with a new expiry."""
        return await self.write_token(user)


def get_redis_strategy() -> TenantAwareRedisStrategy:
    return TenantAwareRedisStrategy()


def get_database_strategy(
    access_token_db: AccessTokenDatabase[AccessToken] = Depends(get_access_token_db),
) -> RefreshableDatabaseStrategy:
    return RefreshableDatabaseStrategy(
        access_token_db, lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS
    )


def get_jwt_strategy() -> SingleTenantJWTStrategy:
    return SingleTenantJWTStrategy(
        secret=USER_AUTH_SECRET,
        lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS,
    )


if AUTH_BACKEND == AuthBackend.JWT:
    if MULTI_TENANT or AUTH_TYPE == AuthType.CLOUD:
        raise ValueError(
            "JWT auth backend is only supported for single-tenant, self-hosted deployments. Use 'redis' or 'postgres' instead."
        )
    if not USER_AUTH_SECRET:
        raise ValueError("USER_AUTH_SECRET is required for JWT auth backend.")

if AUTH_BACKEND == AuthBackend.REDIS:
    auth_backend = AuthenticationBackend(
        name="redis", transport=cookie_transport, get_strategy=get_redis_strategy
    )
elif AUTH_BACKEND == AuthBackend.POSTGRES:
    auth_backend = AuthenticationBackend(
        name="postgres", transport=cookie_transport, get_strategy=get_database_strategy
    )
elif AUTH_BACKEND == AuthBackend.JWT:
    auth_backend = AuthenticationBackend(
        name="jwt", transport=cookie_transport, get_strategy=get_jwt_strategy
    )
else:
    raise ValueError(f"Invalid auth backend: {AUTH_BACKEND}")


class FastAPIUserWithLogoutRouter(FastAPIUsers[models.UP, models.ID]):
    def get_logout_router(
        self,
        backend: AuthenticationBackend,
        requires_verification: bool = REQUIRE_EMAIL_VERIFICATION,
    ) -> APIRouter:
        """
        Provide a router for logout only for OAuth/OIDC Flows.
        This way the login router does not need to be included
        """
        router = APIRouter()

        get_current_user_token = self.authenticator.current_user_token(
            active=True, verified=requires_verification
        )

        logout_responses: OpenAPIResponseType = {
            **{
                status.HTTP_401_UNAUTHORIZED: {
                    "description": "Missing token or inactive user."
                }
            },
            **backend.transport.get_openapi_logout_responses_success(),
        }

        @router.post(
            "/logout", name=f"auth:{backend.name}.logout", responses=logout_responses
        )
        async def logout(
            user_token: Tuple[models.UP, str] = Depends(get_current_user_token),
            strategy: Strategy[models.UP, models.ID] = Depends(backend.get_strategy),
        ) -> Response:
            user, token = user_token
            return await backend.logout(strategy, user, token)

        return router

    def get_refresh_router(
        self,
        backend: AuthenticationBackend,
        requires_verification: bool = REQUIRE_EMAIL_VERIFICATION,
    ) -> APIRouter:
        """
        Provide a router for session token refreshing.
        """
        # Import the oauth_refresher here to avoid circular imports
        from onyx.auth.oauth_refresher import check_and_refresh_oauth_tokens

        router = APIRouter()

        get_current_user_token = self.authenticator.current_user_token(
            active=True, verified=requires_verification
        )

        refresh_responses: OpenAPIResponseType = {
            **{
                status.HTTP_401_UNAUTHORIZED: {
                    "description": "Missing token or inactive user."
                }
            },
            **backend.transport.get_openapi_login_responses_success(),
        }

        @router.post(
            "/refresh", name=f"auth:{backend.name}.refresh", responses=refresh_responses
        )
        async def refresh(
            user_token: Tuple[models.UP, str] = Depends(get_current_user_token),
            strategy: Strategy[models.UP, models.ID] = Depends(backend.get_strategy),
            user_manager: BaseUserManager[models.UP, models.ID] = Depends(
                get_user_manager
            ),
            db_session: AsyncSession = Depends(get_async_session),
        ) -> Response:
            try:
                user, token = user_token
                logger.info(f"Processing token refresh request for user {user.email}")

                # Check if user has OAuth accounts that need refreshing
                await check_and_refresh_oauth_tokens(
                    user=cast(User, user),
                    db_session=db_session,
                    user_manager=cast(Any, user_manager),
                )

                # Check if strategy supports refreshing
                supports_refresh = hasattr(strategy, "refresh_token") and callable(
                    getattr(strategy, "refresh_token")
                )

                if supports_refresh:
                    try:
                        refresh_method = getattr(strategy, "refresh_token")
                        new_token = await refresh_method(token, user)
                        logger.info(
                            f"Successfully refreshed session token for user {user.email}"
                        )
                        return await backend.transport.get_login_response(new_token)
                    except Exception as e:
                        logger.error(f"Error refreshing session token: {str(e)}")
                        # Fallback to logout and login if refresh fails
                        await backend.logout(strategy, user, token)
                        return await backend.login(strategy, user)

                # Fallback: logout and login again
                logger.info(
                    "Strategy doesn't support refresh - using logout/login flow"
                )
                await backend.logout(strategy, user, token)
                return await backend.login(strategy, user)
            except Exception as e:
                logger.error(f"Unexpected error in refresh endpoint: {str(e)}")
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail=f"Token refresh failed: {str(e)}",
                )

        return router


fastapi_users = FastAPIUserWithLogoutRouter[User, uuid.UUID](
    get_user_manager, [auth_backend]
)


# NOTE: verified=REQUIRE_EMAIL_VERIFICATION is not used here since we
# take care of that in `double_check_user` ourself. This is needed, since
# we want the /me endpoint to still return a user even if they are not
# yet verified, so that the frontend knows they exist
optional_fastapi_current_user = fastapi_users.current_user(active=True, optional=True)


_JWT_EMAIL_CLAIM_KEYS = ("email", "preferred_username", "upn")


def _extract_email_from_jwt(payload: dict[str, Any]) -> str | None:
    """Return the best-effort email/username from a decoded JWT payload."""
    for key in _JWT_EMAIL_CLAIM_KEYS:
        value = payload.get(key)
        if isinstance(value, str) and value:
            try:
                email_info = validate_email(value, check_deliverability=False)
            except EmailNotValidError:
                continue
            normalized_email = email_info.normalized or email_info.email
            return normalized_email.lower()
    return None


async def _sync_jwt_oidc_expiry(
    user_manager: UserManager, user: User, payload: dict[str, Any]
) -> None:
    if TRACK_EXTERNAL_IDP_EXPIRY:
        expires_at = payload.get("exp")
        if expires_at is None:
            return
        try:
            expiry_timestamp = int(expires_at)
        except (TypeError, ValueError):
            logger.warning("Invalid exp claim on JWT for user %s", user.email)
            return

        oidc_expiry = datetime.fromtimestamp(expiry_timestamp, tz=timezone.utc)
        if user.oidc_expiry == oidc_expiry:
            return

        await user_manager.user_db.update(user, {"oidc_expiry": oidc_expiry})
        user.oidc_expiry = oidc_expiry
        return

    if user.oidc_expiry is not None:
        await user_manager.user_db.update(user, {"oidc_expiry": None})
        user.oidc_expiry = None  # type: ignore


async def _get_or_create_user_from_jwt(
    payload: dict[str, Any],
    request: Request,
    async_db_session: AsyncSession,
) -> User | None:
    email = _extract_email_from_jwt(payload)
    if email is None:
        logger.warning(
            "JWT token decoded successfully but no email claim found; skipping auth"
        )
        return None

    # Enforce the same allowlist/domain policies as other auth flows
    verify_email_is_invited(email)
    verify_email_domain(email)

    user_db: SQLAlchemyUserAdminDB[User, uuid.UUID] = SQLAlchemyUserAdminDB(
        async_db_session, User, OAuthAccount
    )
    user_manager = UserManager(user_db)

    try:
        user = await user_manager.get_by_email(email)
        if not user.is_active:
            logger.warning("Inactive user %s attempted JWT login; skipping", email)
            return None
        if not user.account_type.is_web_login():
            raise exceptions.UserNotExists()
    except exceptions.UserNotExists:
        logger.info("Provisioning user %s from JWT login", email)
        try:
            user = await user_manager.create(
                UserCreate(
                    email=email,
                    password=generate_password(),
                    is_verified=True,
                ),
                request=request,
            )
        except exceptions.UserAlreadyExists:
            user = await user_manager.get_by_email(email)
            if not user.is_active:
                logger.warning(
                    "Inactive user %s attempted JWT login during provisioning race; skipping",
                    email,
                )
                return None
            if not user.account_type.is_web_login():
                logger.warning(
                    "Non-web-login user %s attempted JWT login during provisioning race; skipping",
                    email,
                )
                return None

    await _sync_jwt_oidc_expiry(user_manager, user, payload)
    return user


async def _check_for_saml_and_jwt(
    request: Request,
    user: User | None,
    async_db_session: AsyncSession,
) -> User | None:
    # If user is None, check for JWT in Authorization header
    if user is None and JWT_PUBLIC_KEY_URL is not None:
        auth_header = request.headers.get("Authorization")
        if auth_header and auth_header.startswith("Bearer "):
            token = auth_header[len("Bearer ") :].strip()
            payload = await verify_jwt_token(token)
            if payload is not None:
                user = await _get_or_create_user_from_jwt(
                    payload, request, async_db_session
                )

    return user


async def optional_user(
    request: Request,
    async_db_session: AsyncSession = Depends(get_async_session),
    user: User | None = Depends(optional_fastapi_current_user),
) -> User | None:

    if user := await _check_for_saml_and_jwt(request, user, async_db_session):
        # If user is already set, _check_for_saml_and_jwt returns the same user object
        return user

    try:
        if hashed_pat := get_hashed_pat_from_request(request):
            user = await fetch_user_for_pat(hashed_pat, async_db_session)
        elif hashed_api_key := get_hashed_api_key_from_request(request):
            user = await fetch_user_for_api_key(hashed_api_key, async_db_session)
    except ValueError:
        logger.warning("Issue with validating authentication token")
        return None

    return user


def get_anonymous_user() -> User:
    """Create anonymous user object."""
    user = User(
        id=uuid.UUID(ANONYMOUS_USER_UUID),
        email=ANONYMOUS_USER_EMAIL,
        hashed_password="",
        is_active=True,
        is_verified=True,
        is_superuser=False,
        role=UserRole.LIMITED,
        account_type=AccountType.ANONYMOUS,
        use_memories=False,
        enable_memory_tool=False,
    )
    return user


async def double_check_user(
    user: User | None,
    include_expired: bool = False,
    allow_anonymous_access: bool = False,
) -> User:
    if user is not None:
        # If user attempted to authenticate, verify them, do not default
        # to anonymous access if it fails.
        if user_needs_to_be_verified() and not user.is_verified:
            raise BasicAuthenticationError(
                detail="Access denied. User is not verified.",
            )

        if (
            user.oidc_expiry
            and user.oidc_expiry < datetime.now(timezone.utc)
            and not include_expired
        ):
            raise BasicAuthenticationError(
                detail="Access denied. User's OIDC token has expired.",
            )

        return user

    if allow_anonymous_access:
        return get_anonymous_user()

    raise BasicAuthenticationError(
        detail="Access denied. User is not authenticated.",
    )


async def current_user_with_expired_token(
    user: User | None = Depends(optional_user),
) -> User:
    return await double_check_user(user, include_expired=True)


async def current_limited_user(
    user: User | None = Depends(optional_user),
) -> User:
    return await double_check_user(user)


async def current_chat_accessible_user(
    user: User | None = Depends(optional_user),
) -> User:
    tenant_id = get_current_tenant_id()

    return await double_check_user(
        user, allow_anonymous_access=anonymous_user_enabled(tenant_id=tenant_id)
    )


async def current_user(
    user: User | None = Depends(optional_user),
) -> User:
    user = await double_check_user(user)

    if user.role == UserRole.LIMITED:
        raise BasicAuthenticationError(
            detail="Access denied. User role is LIMITED. BASIC or higher permissions are required.",
        )
    return user


async def current_curator_or_admin_user(
    user: User = Depends(current_user),
) -> User:
    allowed_roles = {UserRole.GLOBAL_CURATOR, UserRole.CURATOR, UserRole.ADMIN}
    if user.role not in allowed_roles:
        raise BasicAuthenticationError(
            detail="Access denied. User is not a curator or admin.",
        )

    return user


async def current_admin_user(user: User = Depends(current_user)) -> User:
    if user.role != UserRole.ADMIN:
        raise BasicAuthenticationError(
            detail="Access denied. User must be an admin to perform this action.",
        )

    return user


async def _get_user_from_token_data(token_data: dict) -> User | None:
    """Shared logic: token data dict → User object.

    Args:
        token_data: Decoded token data containing 'sub' (user ID).

    Returns:
        User object if found and active, None otherwise.
    """
    user_id = token_data.get("sub")
    if not user_id:
        return None

    try:
        user_uuid = uuid.UUID(user_id)
    except ValueError:
        return None

    async with get_async_session_context_manager() as async_db_session:
        user = await async_db_session.get(User, user_uuid)
        if user is None or not user.is_active:
            return None
        return user


_LOOPBACK_HOSTNAMES = frozenset({"localhost", "127.0.0.1", "::1"})


def _is_same_origin(actual: str, expected: str) -> bool:
    """Compare two origins for the WebSocket CSWSH check.

    Scheme and hostname must match exactly.  Port must also match, except
    when the hostname is a loopback address (localhost / 127.0.0.1 / ::1),
    where port is ignored.  On loopback, all ports belong to the same
    operator, so port differences carry no security significance — the
    CSWSH threat is remote origins, not local ones.
    """
    a = urlparse(actual.rstrip("/"))
    e = urlparse(expected.rstrip("/"))

    if a.scheme != e.scheme or a.hostname != e.hostname:
        return False

    if a.hostname in _LOOPBACK_HOSTNAMES:
        return True

    actual_port = a.port or (443 if a.scheme == "https" else 80)
    expected_port = e.port or (443 if e.scheme == "https" else 80)

    return actual_port == expected_port


async def current_user_from_websocket(
    websocket: WebSocket,
    token: str = Query(..., description="WebSocket authentication token"),
) -> User:
    """
    WebSocket authentication dependency using query parameter.

    Validates the WS token from query param and returns the User.
    Raises BasicAuthenticationError if authentication fails.

    The token must be obtained from POST /voice/ws-token before connecting.
    Tokens are single-use and expire after 60 seconds.

    Usage:
        1. POST /voice/ws-token -> {"token": "xxx"}
        2. Connect to ws://host/path?token=xxx

    This applies the same auth checks as current_user() for HTTP endpoints.
    """
    # Check Origin header to prevent Cross-Site WebSocket Hijacking (CSWSH).
    # Browsers always send Origin on WebSocket connections.
    origin = websocket.headers.get("origin")
    if not origin:
        logger.warning("WS auth: missing Origin header")
        raise BasicAuthenticationError(detail="Access denied. Missing origin.")

    if not _is_same_origin(origin, WEB_DOMAIN):
        logger.warning(f"WS auth: origin mismatch. Expected {WEB_DOMAIN}, got {origin}")
        raise BasicAuthenticationError(detail="Access denied. Invalid origin.")

    # Validate WS token in Redis (single-use, deleted after retrieval)
    try:
        token_data = await retrieve_ws_token_data(token)
        if token_data is None:
            raise BasicAuthenticationError(
                detail="Access denied. Invalid or expired authentication token."
            )
    except BasicAuthenticationError:
        raise
    except Exception as e:
        logger.error(f"WS auth: error during token validation: {e}")
        raise BasicAuthenticationError(
            detail="Authentication verification failed."
        ) from e

    # Get user from token data
    user = await _get_user_from_token_data(token_data)
    if user is None:
        logger.warning(f"WS auth: user not found for id={token_data.get('sub')}")
        raise BasicAuthenticationError(
            detail="Access denied. User not found or inactive."
        )

    # Apply same checks as HTTP auth (verification, OIDC expiry, role)
    user = await double_check_user(user)

    # Block LIMITED users (same as current_user)
    if user.role == UserRole.LIMITED:
        logger.warning(f"WS auth: user {user.email} has LIMITED role")
        raise BasicAuthenticationError(
            detail="Access denied. User role is LIMITED. BASIC or higher permissions are required.",
        )

    logger.debug(f"WS auth: authenticated {user.email}")
    return user


def get_default_admin_user_emails_() -> list[str]:
    # No default seeding available for Onyx MIT
    return []


STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state"
STATE_TOKEN_LIFETIME_SECONDS = 3600
CSRF_TOKEN_KEY = "csrftoken"
CSRF_TOKEN_COOKIE_NAME = "fastapiusersoauthcsrf"
PKCE_COOKIE_NAME_PREFIX = "fastapiusersoauthpkce"


class OAuth2AuthorizeResponse(BaseModel):
    authorization_url: str


def generate_state_token(
    data: Dict[str, str],
    secret: SecretType,
    lifetime_seconds: int = STATE_TOKEN_LIFETIME_SECONDS,
) -> str:
    data["aud"] = STATE_TOKEN_AUDIENCE

    return generate_jwt(data, secret, lifetime_seconds)


def generate_csrf_token() -> str:
    return secrets.token_urlsafe(32)


def _base64url_encode(data: bytes) -> str:
    return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")


def generate_pkce_pair() -> tuple[str, str]:
    verifier = secrets.token_urlsafe(64)
    challenge = _base64url_encode(hashlib.sha256(verifier.encode("ascii")).digest())
    return verifier, challenge


def get_pkce_cookie_name(state: str) -> str:
    state_hash = hashlib.sha256(state.encode("utf-8")).hexdigest()
    return f"{PKCE_COOKIE_NAME_PREFIX}_{state_hash}"


# refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
def create_onyx_oauth_router(
    oauth_client: BaseOAuth2,
    backend: AuthenticationBackend,
    state_secret: SecretType,
    redirect_url: Optional[str] = None,
    associate_by_email: bool = False,
    is_verified_by_default: bool = False,
    enable_pkce: bool = False,
) -> APIRouter:
    return get_oauth_router(
        oauth_client,
        backend,
        get_user_manager,
        state_secret,
        redirect_url,
        associate_by_email,
        is_verified_by_default,
        enable_pkce=enable_pkce,
    )


def get_oauth_router(
    oauth_client: BaseOAuth2,
    backend: AuthenticationBackend,
    get_user_manager: UserManagerDependency[models.UP, models.ID],
    state_secret: SecretType,
    redirect_url: Optional[str] = None,
    associate_by_email: bool = False,
    is_verified_by_default: bool = False,
    *,
    csrf_token_cookie_name: str = CSRF_TOKEN_COOKIE_NAME,
    csrf_token_cookie_path: str = "/",
    csrf_token_cookie_domain: Optional[str] = None,
    csrf_token_cookie_secure: Optional[bool] = None,
    csrf_token_cookie_httponly: bool = True,
    csrf_token_cookie_samesite: Optional[Literal["lax", "strict", "none"]] = "lax",
    enable_pkce: bool = False,
) -> APIRouter:
    """Generate a router with the OAuth routes."""
    router = APIRouter()
    callback_route_name = f"oauth:{oauth_client.name}.{backend.name}.callback"

    if redirect_url is not None:
        oauth2_authorize_callback = OAuth2AuthorizeCallback(
            oauth_client,
            redirect_url=redirect_url,
        )
    else:
        oauth2_authorize_callback = OAuth2AuthorizeCallback(
            oauth_client,
            route_name=callback_route_name,
        )

    async def null_access_token_state() -> tuple[OAuth2Token, Optional[str]] | None:
        return None

    access_token_state_dependency = (
        oauth2_authorize_callback if not enable_pkce else null_access_token_state
    )

    if csrf_token_cookie_secure is None:
        csrf_token_cookie_secure = WEB_DOMAIN.startswith("https")

    @router.get(
        "/authorize",
        name=f"oauth:{oauth_client.name}.{backend.name}.authorize",
        response_model=OAuth2AuthorizeResponse,
    )
    async def authorize(
        request: Request,
        response: Response,
        redirect: bool = Query(False),
        scopes: List[str] = Query(None),
    ) -> Response | OAuth2AuthorizeResponse:
        referral_source = request.cookies.get("referral_source", None)

        if redirect_url is not None:
            authorize_redirect_url = redirect_url
        else:
            # Use WEB_DOMAIN instead of request.url_for() to prevent host
            # header poisoning — request.url_for() trusts the Host header.
            callback_path = request.app.url_path_for(callback_route_name)
            authorize_redirect_url = f"{WEB_DOMAIN}{callback_path}"

        next_url = request.query_params.get("next", "/")

        csrf_token = generate_csrf_token()
        state_data: Dict[str, str] = {
            "next_url": next_url,
            "referral_source": referral_source or "default_referral",
            CSRF_TOKEN_KEY: csrf_token,
        }
        state = generate_state_token(state_data, state_secret)
        pkce_cookie: tuple[str, str] | None = None

        if enable_pkce:
            code_verifier, code_challenge = generate_pkce_pair()
            pkce_cookie_name = get_pkce_cookie_name(state)
            pkce_cookie = (pkce_cookie_name, code_verifier)
            authorization_url = await oauth_client.get_authorization_url(
                authorize_redirect_url,
                state,
                scopes,
                code_challenge=code_challenge,
                code_challenge_method="S256",
            )
        else:
            # Get the basic authorization URL
            authorization_url = await oauth_client.get_authorization_url(
                authorize_redirect_url,
                state,
                scopes,
            )

        # For Google OAuth, add parameters to request refresh tokens
        if oauth_client.name == "google":
            authorization_url = add_url_params(
                authorization_url, {"access_type": "offline", "prompt": "consent"}
            )

        def set_oauth_cookie(
            target_response: Response,
            *,
            key: str,
            value: str,
        ) -> None:
            target_response.set_cookie(
                key=key,
                value=value,
                max_age=STATE_TOKEN_LIFETIME_SECONDS,
                path=csrf_token_cookie_path,
                domain=csrf_token_cookie_domain,
                secure=csrf_token_cookie_secure,
                httponly=csrf_token_cookie_httponly,
                samesite=csrf_token_cookie_samesite,
            )

        response_with_cookies: Response
        if redirect:
            response_with_cookies = RedirectResponse(authorization_url, status_code=302)
        else:
            response_with_cookies = response

        set_oauth_cookie(
            response_with_cookies,
            key=csrf_token_cookie_name,
            value=csrf_token,
        )
        if pkce_cookie is not None:
            pkce_cookie_name, code_verifier = pkce_cookie
            set_oauth_cookie(
                response_with_cookies,
                key=pkce_cookie_name,
                value=code_verifier,
            )

        if redirect:
            return response_with_cookies

        return OAuth2AuthorizeResponse(authorization_url=authorization_url)

    @log_function_time(print_only=True)
    @router.get(
        "/callback",
        name=callback_route_name,
        description="The response varies based on the authentication backend used.",
        responses={
            status.HTTP_400_BAD_REQUEST: {
                "model": ErrorModel,
                "content": {
                    "application/json": {
                        "examples": {
                            "INVALID_STATE_TOKEN": {
                                "summary": "Invalid state token.",
                                "value": None,
                            },
                            ErrorCode.LOGIN_BAD_CREDENTIALS: {
                                "summary": "User is inactive.",
                                "value": {"detail": ErrorCode.LOGIN_BAD_CREDENTIALS},
                            },
                        }
                    }
                },
            },
        },
    )
    async def callback(
        request: Request,
        access_token_state: Tuple[OAuth2Token, Optional[str]] | None = Depends(
            access_token_state_dependency
        ),
        code: Optional[str] = None,
        state: Optional[str] = None,
        error: Optional[str] = None,
        user_manager: BaseUserManager[models.UP, models.ID] = Depends(get_user_manager),
        strategy: Strategy[models.UP, models.ID] = Depends(backend.get_strategy),
    ) -> Response:
        pkce_cookie_name: str | None = None

        def delete_pkce_cookie(response: Response) -> None:
            if enable_pkce and pkce_cookie_name:
                response.delete_cookie(
                    key=pkce_cookie_name,
                    path=csrf_token_cookie_path,
                    domain=csrf_token_cookie_domain,
                    secure=csrf_token_cookie_secure,
                    httponly=csrf_token_cookie_httponly,
                    samesite=csrf_token_cookie_samesite,
                )

        def build_error_response(exc: OnyxError) -> JSONResponse:
            log_onyx_error(exc)
            error_response = onyx_error_to_json_response(exc)
            delete_pkce_cookie(error_response)
            return error_response

        def decode_and_validate_state(state_value: str) -> Dict[str, str]:
            try:
                state_data = decode_jwt(
                    state_value, state_secret, [STATE_TOKEN_AUDIENCE]
                )
            except jwt.DecodeError:
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    getattr(
                        ErrorCode,
                        "ACCESS_TOKEN_DECODE_ERROR",
                        "ACCESS_TOKEN_DECODE_ERROR",
                    ),
                )
            except jwt.ExpiredSignatureError:
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    getattr(
                        ErrorCode,
                        "ACCESS_TOKEN_ALREADY_EXPIRED",
                        "ACCESS_TOKEN_ALREADY_EXPIRED",
                    ),
                )
            except jwt.PyJWTError:
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    getattr(
                        ErrorCode,
                        "ACCESS_TOKEN_DECODE_ERROR",
                        "ACCESS_TOKEN_DECODE_ERROR",
                    ),
                )

            cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
            state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
            if (
                not cookie_csrf_token
                or not state_csrf_token
                or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
            ):
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
                )

            return state_data

        token: OAuth2Token
        state_data: Dict[str, str]

        # `code`, `state`, and `error` are read directly only in the PKCE path.
        # In the non-PKCE path, `oauth2_authorize_callback` consumes them.
        if enable_pkce:
            if state is not None:
                pkce_cookie_name = get_pkce_cookie_name(state)

            if error is not None:
                return build_error_response(
                    OnyxError(
                        OnyxErrorCode.VALIDATION_ERROR,
                        "Authorization request failed or was denied",
                    )
                )
            if code is None:
                return build_error_response(
                    OnyxError(
                        OnyxErrorCode.VALIDATION_ERROR,
                        "Missing authorization code in OAuth callback",
                    )
                )
            if state is None:
                return build_error_response(
                    OnyxError(
                        OnyxErrorCode.VALIDATION_ERROR,
                        "Missing state parameter in OAuth callback",
                    )
                )

            state_value = state

            if redirect_url is not None:
                callback_redirect_url = redirect_url
            else:
                callback_path = request.app.url_path_for(callback_route_name)
                callback_redirect_url = f"{WEB_DOMAIN}{callback_path}"

            code_verifier = request.cookies.get(cast(str, pkce_cookie_name))
            if not code_verifier:
                return build_error_response(
                    OnyxError(
                        OnyxErrorCode.VALIDATION_ERROR,
                        "Missing PKCE verifier cookie in OAuth callback",
                    )
                )

            try:
                state_data = decode_and_validate_state(state_value)
            except OnyxError as e:
                return build_error_response(e)

            try:
                token = await oauth_client.get_access_token(
                    code, callback_redirect_url, code_verifier
                )
            except GetAccessTokenError:
                return build_error_response(
                    OnyxError(
                        OnyxErrorCode.VALIDATION_ERROR,
                        "Authorization code exchange failed",
                    )
                )
        else:
            if access_token_state is None:
                raise OnyxError(
                    OnyxErrorCode.INTERNAL_ERROR, "Missing OAuth callback state"
                )
            token, callback_state = access_token_state
            if callback_state is None:
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    "Missing state parameter in OAuth callback",
                )
            state_data = decode_and_validate_state(callback_state)

        async def complete_login_flow(
            token: OAuth2Token, state_data: Dict[str, str]
        ) -> RedirectResponse:
            account_id, account_email = await oauth_client.get_id_email(
                token["access_token"]
            )

            if account_email is None:
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    ErrorCode.OAUTH_NOT_AVAILABLE_EMAIL,
                )

            next_url = state_data.get("next_url", "/")
            referral_source = state_data.get("referral_source", None)
            try:
                tenant_id = fetch_ee_implementation_or_noop(
                    "onyx.server.tenants.user_mapping", "get_tenant_id_for_email", None
                )(account_email)
            except exceptions.UserNotExists:
                tenant_id = None

            request.state.referral_source = referral_source

            # Proceed to authenticate or create the user
            try:
                user = await user_manager.oauth_callback(
                    oauth_client.name,
                    token["access_token"],
                    account_id,
                    account_email,
                    token.get("expires_at"),
                    token.get("refresh_token"),
                    request,
                    associate_by_email=associate_by_email,
                    is_verified_by_default=is_verified_by_default,
                )
            except UserAlreadyExists:
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    ErrorCode.OAUTH_USER_ALREADY_EXISTS,
                )

            if not user.is_active:
                raise OnyxError(
                    OnyxErrorCode.VALIDATION_ERROR,
                    ErrorCode.LOGIN_BAD_CREDENTIALS,
                )

            # Login user
            response = await backend.login(strategy, user)
            await user_manager.on_after_login(user, request, response)

            # Prepare redirect response
            if tenant_id is None:
                # Use URL utility to add parameters
                redirect_destination = add_url_params(next_url, {"new_team": "true"})
                redirect_response = RedirectResponse(
                    redirect_destination, status_code=302
                )
            else:
                # No parameters to add
                redirect_response = RedirectResponse(next_url, status_code=302)

            # Copy headers from auth response to redirect response, with special handling for Set-Cookie
            for header_name, header_value in response.headers.items():
                header_name_lower = header_name.lower()
                if header_name_lower == "set-cookie":
                    redirect_response.headers.append(header_name, header_value)
                    continue
                if header_name_lower in {"location", "content-length"}:
                    continue
                redirect_response.headers[header_name] = header_value

            return redirect_response

        if enable_pkce:
            try:
                redirect_response = await complete_login_flow(token, state_data)
            except OnyxError as e:
                return build_error_response(e)
            delete_pkce_cookie(redirect_response)
            return redirect_response

        return await complete_login_flow(token, state_data)

    return router


================================================
FILE: backend/onyx/auth/utils.py
================================================
"""Shared authentication utilities for bearer token extraction and validation."""

from collections.abc import Callable
from urllib.parse import unquote

from fastapi import Request

from onyx.auth.constants import API_KEY_HEADER_ALTERNATIVE_NAME
from onyx.auth.constants import API_KEY_HEADER_NAME
from onyx.auth.constants import API_KEY_PREFIX
from onyx.auth.constants import BEARER_PREFIX
from onyx.auth.constants import DEPRECATED_API_KEY_PREFIX
from onyx.auth.constants import PAT_PREFIX


def get_hashed_bearer_token_from_request(
    request: Request,
    valid_prefixes: list[str],
    hash_fn: Callable[[str], str],
    allow_non_bearer: bool = False,
) -> str | None:
    """Generic extraction and hashing of bearer tokens from request headers.

    Args:
        request: The FastAPI request
        valid_prefixes: List of valid token prefixes (e.g., ["on_", "onyx_pat_"])
        hash_fn: Function to hash the token (e.g., hash_api_key or hash_pat)
        allow_non_bearer: If True, accept raw tokens without "Bearer " prefix

    Returns:
        Hashed token if valid format, else None
    """
    auth_header = request.headers.get(
        API_KEY_HEADER_ALTERNATIVE_NAME
    ) or request.headers.get(API_KEY_HEADER_NAME)

    if not auth_header:
        return None

    # Handle bearer format
    if auth_header.startswith(BEARER_PREFIX):
        token = auth_header[len(BEARER_PREFIX) :].strip()
    elif allow_non_bearer:
        token = auth_header
    else:
        return None

    # Check if token starts with any valid prefix
    if valid_prefixes:
        valid = any(token.startswith(prefix) for prefix in valid_prefixes)
        if not valid:
            return None

    return hash_fn(token)


def _extract_tenant_from_bearer_token(
    request: Request, valid_prefixes: list[str]
) -> str | None:
    """Generic tenant extraction from bearer token. Returns None if invalid format.

    Args:
        request: The FastAPI request
        valid_prefixes: List of valid token prefixes (e.g., ["on_", "dn_"])

    Returns:
        Tenant ID if found in format <prefix><tenant>.<random>, else None
    """
    auth_header = request.headers.get(
        API_KEY_HEADER_ALTERNATIVE_NAME
    ) or request.headers.get(API_KEY_HEADER_NAME)

    if not auth_header or not auth_header.startswith(BEARER_PREFIX):
        return None

    token = auth_header[len(BEARER_PREFIX) :].strip()

    # Check if token starts with any valid prefix
    matched_prefix = None
    for prefix in valid_prefixes:
        if token.startswith(prefix):
            matched_prefix = prefix
            break

    if not matched_prefix:
        return None

    # Parse tenant from token format: <prefix><tenant>.<random>
    parts = token[len(matched_prefix) :].split(".", 1)
    if len(parts) != 2:
        return None

    tenant_id = parts[0]
    return unquote(tenant_id) if tenant_id else None


def extract_tenant_from_auth_header(request: Request) -> str | None:
    """Extract tenant ID from API key or PAT header.

    Unified function for extracting tenant from any bearer token (API key or PAT).
    Checks all known token prefixes in order.

    Returns:
        Tenant ID if found, else None
    """
    return _extract_tenant_from_bearer_token(
        request, [API_KEY_PREFIX, DEPRECATED_API_KEY_PREFIX, PAT_PREFIX]
    )


================================================
FILE: backend/onyx/background/README.md
================================================
# Overview of Onyx Background Jobs

The background jobs take care of:
1. Pulling/Indexing documents (from connectors)
2. Updating document metadata (from connectors)
3. Cleaning up checkpoints and logic around indexing work (indexing indexing checkpoints and index attempt metadata)
4. Handling user uploaded files and deletions (from the Projects feature and uploads via the Chat)
5. Reporting metrics on things like queue length for monitoring purposes

## Worker → Queue Mapping

| Worker | File | Queues |
|--------|------|--------|
| Primary | `apps/primary.py` | `celery` |
| Light | `apps/light.py` | `vespa_metadata_sync`, `connector_deletion`, `doc_permissions_upsert`, `checkpoint_cleanup`, `index_attempt_cleanup` |
| Heavy | `apps/heavy.py` | `connector_pruning`, `connector_doc_permissions_sync`, `connector_external_group_sync`, `csv_generation`, `sandbox` |
| Docprocessing | `apps/docprocessing.py` | `docprocessing` |
| Docfetching | `apps/docfetching.py` | `connector_doc_fetching` |
| User File Processing | `apps/user_file_processing.py` | `user_file_processing`, `user_file_project_sync`, `user_file_delete` |
| Monitoring | `apps/monitoring.py` | `monitoring` |
| Background (consolidated) | `apps/background.py` | All queues above except `celery` |

## Non-Worker Apps
| App | File | Purpose |
|-----|------|---------|
| **Beat** | `beat.py` | Celery beat scheduler with `DynamicTenantScheduler` that generates per-tenant periodic task schedules |
| **Client** | `client.py` | Minimal app for task submission from non-worker processes (e.g., API server) |

### Shared Module
`app_base.py` provides:
- `TenantAwareTask` - Base task class that sets tenant context
- Signal handlers for logging, cleanup, and lifecycle events
- Readiness probes and health checks


## Worker Details

### Primary (Coordinator and task dispatcher)
It is the single worker which handles tasks from the default celery queue. It is a singleton worker ensured by the `PRIMARY_WORKER` Redis lock
which it touches every `CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8` seconds (using Celery Bootsteps)

On startup:
- waits for redis, postgres, document index to all be healthy
- acquires the singleton lock
- cleans all the redis states associated with background jobs
- mark orphaned index attempts failed

Then it cycles through its tasks as scheduled by Celery Beat:

| Task | Frequency | Description |
|------|-----------|-------------|
| `check_for_indexing` | 15s | Scans for connectors needing indexing → dispatches to `DOCFETCHING` queue |
| `check_for_vespa_sync_task` | 20s | Finds stale documents/document sets → dispatches sync tasks to `VESPA_METADATA_SYNC` queue |
| `check_for_pruning` | 20s | Finds connectors due for pruning → dispatches to `CONNECTOR_PRUNING` queue |
| `check_for_connector_deletion` | 20s | Processes deletion requests → dispatches to `CONNECTOR_DELETION` queue |
| `check_for_user_file_processing` | 20s | Checks for user uploads → dispatches to `USER_FILE_PROCESSING` queue |
| `check_for_checkpoint_cleanup` | 1h | Cleans up old indexing checkpoints |
| `check_for_index_attempt_cleanup` | 30m | Cleans up old index attempts |
| `kombu_message_cleanup_task` | periodic | Cleans orphaned Kombu messages from DB (Kombu being the messaging framework used by Celery) |
| `celery_beat_heartbeat` | 1m | Heartbeat for Beat watchdog |

Watchdog is a separate Python process managed by supervisord which runs alongside celery workers. It checks the ONYX_CELERY_BEAT_HEARTBEAT_KEY in
Redis to ensure Celery Beat is not dead. Beat schedules the celery_beat_heartbeat for Primary to touch the key and share that it's still alive.
See supervisord.conf for watchdog config.


### Light
Fast and short living tasks that are not resource intensive. High concurrency:
Can have 24 concurrent workers, each with a prefetch of 8 for a total of 192 tasks in flight at once.

Tasks it handles:
- Syncs access/permissions, document sets, boosts, hidden state
- Deletes documents that are marked for deletion in Postgres
- Cleanup of checkpoints and index attempts


### Heavy
Long running, resource intensive tasks, handles pruning and sandbox operations. Low concurrency - max concurrency of 4 with 1 prefetch.

Does not interact with the Document Index, it handles the syncs with external systems. Large volume API calls to handle pruning and fetching permissions, etc.

Generates CSV exports which may take a long time with significant data in Postgres.

Sandbox (new feature) for running Next.js, Python virtual env, OpenCode AI Agent, and access to knowledge files


### Docprocessing, Docfetching, User File Processing
Docprocessing and Docfetching are for indexing documents:
- Docfetching runs connectors to pull documents from external APIs (Google Drive, Confluence, etc.), stores batches to file storage, and dispatches docprocessing tasks
- Docprocessing retrieves batches, runs the indexing pipeline (chunking, embedding), and indexes into the Document Index 
User Files come from uploads directly via the input bar


### Monitoring
Observability and metrics collections:
- Queue lengths, connector success/failure, lconnector latencies
- Memory of supervisor managed processes (workers, beat, slack)
- Cloud and multitenant specific monitorings


================================================
FILE: backend/onyx/background/celery/apps/app_base.py
================================================
import logging
import multiprocessing
import os
import time
from typing import Any
from typing import cast

import sentry_sdk
from celery import bootsteps  # type: ignore
from celery import Task
from celery.app import trace
from celery.exceptions import WorkerShutdown
from celery.signals import task_postrun
from celery.signals import task_prerun
from celery.states import READY_STATES
from celery.utils.log import get_task_logger
from celery.worker import strategy  # type: ignore
from redis.lock import Lock as RedisLock
from sentry_sdk.integrations.celery import CeleryIntegration
from sqlalchemy import text
from sqlalchemy.orm import Session

from onyx import __version__
from onyx.background.celery.apps.task_formatters import CeleryTaskColoredFormatter
from onyx.background.celery.apps.task_formatters import CeleryTaskPlainFormatter
from onyx.background.celery.celery_utils import celery_is_worker_primary
from onyx.background.celery.celery_utils import make_probe_path
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFIX
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from onyx.document_index.opensearch.client import (
    wait_for_opensearch_with_timeout,
)
from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_delete import RedisConnectorDelete
from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
from onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync
from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_document_set import RedisDocumentSet
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_usergroup import RedisUserGroup
from onyx.tracing.setup import setup_tracing
from onyx.utils.logger import ColoredFormatter
from onyx.utils.logger import LoggerContextVars
from onyx.utils.logger import PlainFormatter
from onyx.utils.logger import setup_logger
from shared_configs.configs import DEV_LOGGING_ENABLED
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import SENTRY_DSN
from shared_configs.configs import TENANT_ID_PREFIX
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()

task_logger = get_task_logger(__name__)

if SENTRY_DSN:
    sentry_sdk.init(
        dsn=SENTRY_DSN,
        integrations=[CeleryIntegration()],
        traces_sample_rate=0.1,
        release=__version__,
    )
    logger.info("Sentry initialized")
else:
    logger.debug("Sentry DSN not provided, skipping Sentry initialization")


class TenantAwareTask(Task):
    """A custom base Task that sets tenant_id in a contextvar before running."""

    abstract = True  # So Celery knows not to register this as a real task.

    def __call__(self, *args: Any, **kwargs: Any) -> Any:
        # Grab tenant_id from the kwargs, or fallback to default if missing.
        tenant_id = kwargs.get("tenant_id", None) or POSTGRES_DEFAULT_SCHEMA

        # Set the context var
        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

        # Actually run the task now
        try:
            return super().__call__(*args, **kwargs)
        finally:
            # Clear or reset after the task runs
            # so it does not leak into any subsequent tasks on the same worker process
            CURRENT_TENANT_ID_CONTEXTVAR.set(None)


@task_prerun.connect
def on_task_prerun(
    sender: Any | None = None,  # noqa: ARG001
    task_id: str | None = None,  # noqa: ARG001
    task: Task | None = None,  # noqa: ARG001
    args: tuple[Any, ...] | None = None,  # noqa: ARG001
    kwargs: dict[str, Any] | None = None,  # noqa: ARG001
    **other_kwargs: Any,  # noqa: ARG001
) -> None:
    # Reset any per-task logging context so that prefixes (e.g. pruning_ctx)
    # from a previous task executed in the same worker process do not leak
    # into the next task's log messages. This fixes incorrect [CC Pair:/Index Attempt]
    # prefixes observed when a pruning task finishes and an indexing task
    # runs in the same process.

    LoggerContextVars.reset()


def on_task_postrun(
    sender: Any | None = None,  # noqa: ARG001
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,  # noqa: ARG001
    kwargs: dict[str, Any] | None = None,
    retval: Any | None = None,  # noqa: ARG001
    state: str | None = None,
    **kwds: Any,  # noqa: ARG001
) -> None:
    """We handle this signal in order to remove completed tasks
    from their respective tasksets. This allows us to track the progress of document set
    and user group syncs.

    This function runs after any task completes (both success and failure)
    Note that this signal does not fire on a task that failed to complete and is going
    to be retried.

    This also does not fire if a worker with acks_late=False crashes (which all of our
    long running workers are)
    """
    if not task:
        return

    task_logger.debug(f"Task {task.name} (ID: {task_id}) completed with state: {state}")

    if state not in READY_STATES:
        return

    if not task_id:
        return

    if task.name.startswith(ONYX_CLOUD_CELERY_TASK_PREFIX):
        # this is a cloud / all tenant task ... no postrun is needed
        return

    # Get tenant_id directly from kwargs- each celery task has a tenant_id kwarg
    if not kwargs:
        logger.error(f"Task {task.name} (ID: {task_id}) is missing kwargs")
        tenant_id = POSTGRES_DEFAULT_SCHEMA
    else:
        tenant_id = cast(str, kwargs.get("tenant_id", POSTGRES_DEFAULT_SCHEMA))

    task_logger.debug(
        f"Task {task.name} (ID: {task_id}) completed with state: {state} {f'for tenant_id={tenant_id}' if tenant_id else ''}"
    )

    r = get_redis_client(tenant_id=tenant_id)

    # NOTE: we want to remove the `Redis*` classes, prefer to just have functions to
    # do these things going forward. In short, things should generally be like the doc
    # sync task rather than the others below
    if task_id.startswith(DOCUMENT_SYNC_PREFIX):
        r.srem(DOCUMENT_SYNC_TASKSET_KEY, task_id)
        return

    if task_id.startswith(RedisDocumentSet.PREFIX):
        document_set_id = RedisDocumentSet.get_id_from_task_id(task_id)
        if document_set_id is not None:
            rds = RedisDocumentSet(tenant_id, int(document_set_id))
            r.srem(rds.taskset_key, task_id)
        return

    if task_id.startswith(RedisUserGroup.PREFIX):
        usergroup_id = RedisUserGroup.get_id_from_task_id(task_id)
        if usergroup_id is not None:
            rug = RedisUserGroup(tenant_id, int(usergroup_id))
            r.srem(rug.taskset_key, task_id)
        return

    if task_id.startswith(RedisConnectorDelete.PREFIX):
        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)
        if cc_pair_id is not None:
            RedisConnectorDelete.remove_from_taskset(int(cc_pair_id), task_id, r)
        return

    if task_id.startswith(RedisConnectorPrune.SUBTASK_PREFIX):
        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)
        if cc_pair_id is not None:
            RedisConnectorPrune.remove_from_taskset(int(cc_pair_id), task_id, r)
        return

    if task_id.startswith(RedisConnectorPermissionSync.SUBTASK_PREFIX):
        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)
        if cc_pair_id is not None:
            RedisConnectorPermissionSync.remove_from_taskset(
                int(cc_pair_id), task_id, r
            )
        return

    if task_id.startswith(RedisConnectorExternalGroupSync.SUBTASK_PREFIX):
        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)
        if cc_pair_id is not None:
            RedisConnectorExternalGroupSync.remove_from_taskset(
                int(cc_pair_id), task_id, r
            )
        return


def on_celeryd_init(
    sender: str,  # noqa: ARG001
    conf: Any = None,  # noqa: ARG001
    **kwargs: Any,  # noqa: ARG001
) -> None:
    """The first signal sent on celery worker startup"""

    # NOTE(rkuo): start method "fork" is unsafe and we really need it to be "spawn"
    # But something is blocking set_start_method from working in the cloud unless
    # force=True. so we use force=True as a fallback.

    all_start_methods: list[str] = multiprocessing.get_all_start_methods()
    logger.info(f"Multiprocessing all start methods: {all_start_methods}")

    try:
        multiprocessing.set_start_method("spawn")  # fork is unsafe, set to spawn
    except Exception:
        logger.info(
            "Multiprocessing set_start_method exceptioned. Trying force=True..."
        )
        try:
            multiprocessing.set_start_method(
                "spawn", force=True
            )  # fork is unsafe, set to spawn
        except Exception:
            logger.info(
                "Multiprocessing set_start_method force=True exceptioned even with force=True."
            )

    logger.info(
        f"Multiprocessing selected start method: {multiprocessing.get_start_method()}"
    )

    # Initialize tracing in workers if credentials are available.
    setup_tracing()


def wait_for_redis(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
    """Waits for redis to become ready subject to a hardcoded timeout.
    Will raise WorkerShutdown to kill the celery worker if the timeout
    is reached."""

    r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)

    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60

    ready = False
    time_start = time.monotonic()
    logger.info("Redis: Readiness probe starting.")
    while True:
        try:
            if r.ping():
                ready = True
                break
        except Exception:
            pass

        time_elapsed = time.monotonic() - time_start
        if time_elapsed > WAIT_LIMIT:
            break

        logger.info(
            f"Redis: Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
        )

        time.sleep(WAIT_INTERVAL)

    if not ready:
        msg = f"Redis: Readiness probe did not succeed within the timeout ({WAIT_LIMIT} seconds). Exiting..."
        logger.error(msg)
        raise WorkerShutdown(msg)

    logger.info("Redis: Readiness probe succeeded. Continuing...")
    return


def wait_for_db(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
    """Waits for the db to become ready subject to a hardcoded timeout.
    Will raise WorkerShutdown to kill the celery worker if the timeout is reached."""

    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60

    ready = False
    time_start = time.monotonic()
    logger.info("Database: Readiness probe starting.")
    while True:
        try:
            with Session(get_sqlalchemy_engine()) as db_session:
                result = db_session.execute(text("SELECT NOW()")).scalar()
                if result:
                    ready = True
                    break
        except Exception:
            pass

        time_elapsed = time.monotonic() - time_start
        if time_elapsed > WAIT_LIMIT:
            break

        logger.info(
            f"Database: Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
        )

        time.sleep(WAIT_INTERVAL)

    if not ready:
        msg = f"Database: Readiness probe did not succeed within the timeout ({WAIT_LIMIT} seconds). Exiting..."
        logger.error(msg)
        raise WorkerShutdown(msg)

    logger.info("Database: Readiness probe succeeded. Continuing...")
    return


def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
    logger.info(f"Running as a secondary celery worker: pid={os.getpid()}")

    # Set up variables for waiting on primary worker
    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
    r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)
    time_start = time.monotonic()

    logger.info("Waiting for primary worker to be ready...")
    while True:
        if r.exists(OnyxRedisLocks.PRIMARY_WORKER):
            break

        time_elapsed = time.monotonic() - time_start
        logger.info(
            f"Primary worker is not ready yet. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
        )
        if time_elapsed > WAIT_LIMIT:
            msg = f"Primary worker was not ready within the timeout. ({WAIT_LIMIT} seconds). Exiting..."
            logger.error(msg)
            raise WorkerShutdown(msg)

        time.sleep(WAIT_INTERVAL)

    logger.info("Wait for primary worker completed successfully. Continuing...")
    return


def on_worker_ready(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
    task_logger.info("worker_ready signal received.")

    # file based way to do readiness/liveness probes
    # https://medium.com/ambient-innovation/health-checks-for-celery-in-kubernetes-cf3274a3e106
    # https://github.com/celery/celery/issues/4079#issuecomment-1270085680

    hostname: str = cast(str, sender.hostname)
    path = make_probe_path("readiness", hostname)
    path.touch()
    logger.info(f"Readiness signal touched at {path}.")


def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
    HttpxPool.close_all()

    hostname: str = cast(str, sender.hostname)
    path = make_probe_path("readiness", hostname)
    path.unlink(missing_ok=True)

    if not celery_is_worker_primary(sender):
        return

    if not hasattr(sender, "primary_worker_lock"):
        # primary_worker_lock will not exist when MULTI_TENANT is True
        return

    if not sender.primary_worker_lock:
        return

    logger.info("Releasing primary worker lock.")
    lock: RedisLock = sender.primary_worker_lock
    try:
        if lock.owned():
            try:
                lock.release()
                sender.primary_worker_lock = None
            except Exception:
                logger.exception("Failed to release primary worker lock")
    except Exception:
        logger.exception("Failed to check if primary worker lock is owned")


def on_setup_logging(
    loglevel: int,
    logfile: str | None,
    format: str,  # noqa: ARG001
    colorize: bool,  # noqa: ARG001
    **kwargs: Any,  # noqa: ARG001
) -> None:
    # TODO: could unhardcode format and colorize and accept these as options from
    # celery's config

    root_logger = logging.getLogger()
    root_logger.handlers = []

    # Define the log format
    log_format = (
        "%(levelname)-8s %(asctime)s %(filename)15s:%(lineno)-4d: %(name)s %(message)s"
    )

    # Set up the root handler
    root_handler = logging.StreamHandler()
    root_formatter = ColoredFormatter(
        log_format,
        datefmt="%m/%d/%Y %I:%M:%S %p",
    )
    root_handler.setFormatter(root_formatter)
    root_logger.addHandler(root_handler)

    if logfile:
        # Truncate log file if DEV_LOGGING_ENABLED (for clean dev experience)
        if DEV_LOGGING_ENABLED and os.path.exists(logfile):
            try:
                open(logfile, "w").close()  # Truncate the file
            except Exception:
                pass  # Ignore errors, just proceed with normal logging

        root_file_handler = logging.FileHandler(logfile)
        root_file_formatter = PlainFormatter(
            log_format,
            datefmt="%m/%d/%Y %I:%M:%S %p",
        )
        root_file_handler.setFormatter(root_file_formatter)
        root_logger.addHandler(root_file_handler)

    root_logger.setLevel(loglevel)

    # Configure the task logger
    task_logger.handlers = []

    task_handler = logging.StreamHandler()
    task_handler.addFilter(TenantContextFilter())
    task_formatter = CeleryTaskColoredFormatter(
        log_format,
        datefmt="%m/%d/%Y %I:%M:%S %p",
    )
    task_handler.setFormatter(task_formatter)
    task_logger.addHandler(task_handler)

    if logfile:
        # No need to truncate again, already done above for root logger
        task_file_handler = logging.FileHandler(logfile)
        task_file_handler.addFilter(TenantContextFilter())
        task_file_formatter = CeleryTaskPlainFormatter(
            log_format,
            datefmt="%m/%d/%Y %I:%M:%S %p",
        )
        task_file_handler.setFormatter(task_file_formatter)
        task_logger.addHandler(task_file_handler)

    task_logger.setLevel(loglevel)
    task_logger.propagate = False

    # hide celery task received spam
    # e.g. "Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] received"
    strategy.logger.setLevel(logging.WARNING)

    # uncomment this to hide celery task succeeded/failed spam
    # e.g. "Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] succeeded in 0.03137450001668185s: None"
    trace.logger.setLevel(logging.WARNING)


def set_task_finished_log_level(logLevel: int) -> None:
    """call this to override the setLevel in on_setup_logging. We are interested
    in the task timings in the cloud but it can be spammy for self hosted."""
    trace.logger.setLevel(logLevel)


class TenantContextFilter(logging.Filter):
    """Logging filter to inject tenant ID into the logger's name."""

    def filter(self, record: logging.LogRecord) -> bool:
        if not MULTI_TENANT:
            record.name = ""
            return True

        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
        if tenant_id:
            # Match the 8 character tenant abbreviation used in OnyxLoggingAdapter
            tenant_id = tenant_id.split(TENANT_ID_PREFIX)[-1][:8]
            record.name = f"[t:{tenant_id}]"
        else:
            record.name = ""
        return True


@task_postrun.connect
def reset_tenant_id(
    sender: Any | None = None,  # noqa: ARG001
    task_id: str | None = None,  # noqa: ARG001
    task: Task | None = None,  # noqa: ARG001
    args: tuple[Any, ...] | None = None,  # noqa: ARG001
    kwargs: dict[str, Any] | None = None,  # noqa: ARG001
    **other_kwargs: Any,  # noqa: ARG001
) -> None:
    """Signal handler to reset tenant ID in context var after task ends."""
    CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)


def wait_for_vespa_or_shutdown(
    sender: Any,  # noqa: ARG001
    **kwargs: Any,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """Waits for Vespa to become ready subject to a timeout.
    Raises WorkerShutdown if the timeout is reached."""

    if DISABLE_VECTOR_DB:
        logger.info(
            "DISABLE_VECTOR_DB is set — skipping Vespa/OpenSearch readiness check."
        )
        return

    if not wait_for_vespa_with_timeout():
        msg = "[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
        logger.error(msg)
        raise WorkerShutdown(msg)

    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
        if not wait_for_opensearch_with_timeout():
            msg = "[OpenSearch] Readiness probe did not succeed within the timeout. Exiting..."
            logger.error(msg)
            raise WorkerShutdown(msg)


# File for validating worker liveness
class LivenessProbe(bootsteps.StartStopStep):
    requires = {"celery.worker.components:Timer"}

    def __init__(self, worker: Any, **kwargs: Any) -> None:
        super().__init__(worker, **kwargs)
        self.requests: list[Any] = []
        self.task_tref = None
        self.path = make_probe_path("liveness", worker.hostname)

    def start(self, worker: Any) -> None:
        self.task_tref = worker.timer.call_repeatedly(
            15.0,
            self.update_liveness_file,
            (worker,),
            priority=10,
        )

    def stop(self, worker: Any) -> None:  # noqa: ARG002
        self.path.unlink(missing_ok=True)
        if self.task_tref:
            self.task_tref.cancel()

    def update_liveness_file(self, worker: Any) -> None:  # noqa: ARG002
        self.path.touch()


def get_bootsteps() -> list[type]:
    return [LivenessProbe]


# Task modules that require a vector DB (Vespa/OpenSearch).
# When DISABLE_VECTOR_DB is True these are excluded from autodiscover lists.
_VECTOR_DB_TASK_MODULES: set[str] = {
    "onyx.background.celery.tasks.connector_deletion",
    "onyx.background.celery.tasks.docprocessing",
    "onyx.background.celery.tasks.docfetching",
    "onyx.background.celery.tasks.pruning",
    "onyx.background.celery.tasks.vespa",
    "onyx.background.celery.tasks.opensearch_migration",
    "onyx.background.celery.tasks.doc_permission_syncing",
    "onyx.background.celery.tasks.hierarchyfetching",
    # EE modules that are vector-DB-dependent
    "ee.onyx.background.celery.tasks.doc_permission_syncing",
    "ee.onyx.background.celery.tasks.external_group_syncing",
}
# NOTE: "onyx.background.celery.tasks.shared" is intentionally NOT in the set
# above. It contains celery_beat_heartbeat (which only writes to Redis) alongside
# document cleanup tasks. The cleanup tasks won't be invoked in minimal mode
# because the periodic tasks that trigger them are in other filtered modules.


def filter_task_modules(modules: list[str]) -> list[str]:
    """Remove vector-DB-dependent task modules when DISABLE_VECTOR_DB is True."""
    if not DISABLE_VECTOR_DB:
        return modules
    return [m for m in modules if m not in _VECTOR_DB_TASK_MODULES]


================================================
FILE: backend/onyx/background/celery/apps/beat.py
================================================
from datetime import timedelta
from typing import Any

from celery import Celery
from celery import signals
from celery.beat import PersistentScheduler  # type: ignore
from celery.signals import beat_init
from celery.utils.log import get_task_logger

import onyx.background.celery.apps.app_base as app_base
from onyx.background.celery.celery_utils import make_probe_path
from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
from onyx.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.server.runtime.onyx_runtime import OnyxRuntime
from onyx.utils.variable_functionality import fetch_versioned_implementation
from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST
from shared_configs.configs import MULTI_TENANT

task_logger = get_task_logger(__name__)

celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.beat")


class DynamicTenantScheduler(PersistentScheduler):
    """This scheduler is useful because we can dynamically adjust task generation rates
    through it."""

    RELOAD_INTERVAL = 60

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)

        self.last_beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT

        self._reload_interval = timedelta(
            seconds=DynamicTenantScheduler.RELOAD_INTERVAL
        )
        self._last_reload = self.app.now() - self._reload_interval

        # Let the parent class handle store initialization
        self.setup_schedule()
        task_logger.info(
            f"DynamicTenantScheduler initialized: reload_interval={self._reload_interval}"
        )

        self._liveness_probe_path = make_probe_path("liveness", "beat@hostname")

        # do not set the initial schedule here because we don't have db access yet.
        # do it in beat_init after the db engine is initialized

        # An initial schedule is required ... otherwise, the scheduler will delay
        # for 5 minutes before calling tick()

    def setup_schedule(self) -> None:
        super().setup_schedule()

    def tick(self) -> float:
        retval = super().tick()
        now = self.app.now()
        if (
            self._last_reload is None
            or (now - self._last_reload) > self._reload_interval
        ):
            task_logger.debug("Reload interval reached, initiating task update")
            self._liveness_probe_path.touch()

            try:
                self._try_updating_schedule()
            except (AttributeError, KeyError):
                task_logger.exception("Failed to process task configuration")
            except Exception:
                task_logger.exception("Unexpected error updating tasks")

            self._last_reload = now

        return retval

    def _generate_schedule(
        self, tenant_ids: list[str] | list[None], beat_multiplier: float
    ) -> dict[str, dict[str, Any]]:
        """Given a list of tenant id's, generates a new beat schedule for celery."""
        new_schedule: dict[str, dict[str, Any]] = {}

        if MULTI_TENANT:
            # cloud tasks are system wide and thus only need to be on the beat schedule
            # once for all tenants
            get_cloud_tasks_to_schedule = fetch_versioned_implementation(
                "onyx.background.celery.tasks.beat_schedule",
                "get_cloud_tasks_to_schedule",
            )

            cloud_tasks_to_schedule: list[dict[str, Any]] = get_cloud_tasks_to_schedule(
                beat_multiplier
            )
            for task in cloud_tasks_to_schedule:
                task_name = task["name"]
                cloud_task = {
                    "task": task["task"],
                    "schedule": task["schedule"],
                    "kwargs": task.get("kwargs", {}),
                }
                if options := task.get("options"):
                    task_logger.debug(f"Adding options to task {task_name}: {options}")
                    cloud_task["options"] = options
                new_schedule[task_name] = cloud_task

        # regular task beats are multiplied across all tenants
        # note that currently this just schedules for a single tenant in self hosted
        # and doesn't do anything in the cloud because it's much more scalable
        # to schedule a single cloud beat task to dispatch per tenant tasks.
        get_tasks_to_schedule = fetch_versioned_implementation(
            "onyx.background.celery.tasks.beat_schedule", "get_tasks_to_schedule"
        )

        tasks_to_schedule: list[dict[str, Any]] = get_tasks_to_schedule()

        for tenant_id in tenant_ids:
            if IGNORED_SYNCING_TENANT_LIST and tenant_id in IGNORED_SYNCING_TENANT_LIST:
                task_logger.debug(
                    f"Skipping tenant {tenant_id} as it is in the ignored syncing list"
                )
                continue

            for task in tasks_to_schedule:
                task_name = task["name"]
                tenant_task_name = f"{task['name']}-{tenant_id}"

                task_logger.debug(f"Creating task configuration for {tenant_task_name}")
                tenant_task = {
                    "task": task["task"],
                    "schedule": task["schedule"],
                    "kwargs": {"tenant_id": tenant_id},
                }
                if options := task.get("options"):
                    task_logger.debug(
                        f"Adding options to task {tenant_task_name}: {options}"
                    )
                    tenant_task["options"] = options

                new_schedule[tenant_task_name] = tenant_task

        return new_schedule

    def _try_updating_schedule(self) -> None:
        """Only updates the actual beat schedule on the celery app when it changes"""
        do_update = False

        task_logger.debug("_try_updating_schedule starting")

        tenant_ids = get_all_tenant_ids()
        task_logger.debug(f"Found {len(tenant_ids)} IDs")

        # get current schedule and extract current tenants
        current_schedule = self.schedule.items()

        # get potential new state
        try:
            beat_multiplier = OnyxRuntime.get_beat_multiplier()
        except Exception:
            beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT

        new_schedule = self._generate_schedule(tenant_ids, beat_multiplier)

        # if the schedule or beat multiplier has changed, update
        while True:
            if beat_multiplier != self.last_beat_multiplier:
                do_update = True
                break

            if not DynamicTenantScheduler._compare_schedules(
                current_schedule, new_schedule
            ):
                do_update = True
                break

            break

        if not do_update:
            # exit early if nothing changed
            task_logger.info(
                f"_try_updating_schedule - Schedule unchanged: tasks={len(new_schedule)} beat_multiplier={beat_multiplier}"
            )
            return

        # schedule needs updating
        task_logger.debug(
            "Schedule update required",
            extra={
                "new_tasks": len(new_schedule),
                "current_tasks": len(current_schedule),
            },
        )

        # Create schedule entries
        entries = {}
        for name, entry in new_schedule.items():
            entries[name] = self.Entry(
                name=name,
                app=self.app,
                task=entry["task"],
                schedule=entry["schedule"],
                options=entry.get("options", {}),
                kwargs=entry.get("kwargs", {}),
            )

        # Update the schedule using the scheduler's methods
        self.schedule.clear()
        self.schedule.update(entries)

        # Ensure changes are persisted
        self.sync()

        task_logger.info(
            f"_try_updating_schedule - Schedule updated: "
            f"prev_num_tasks={len(current_schedule)} "
            f"prev_beat_multiplier={self.last_beat_multiplier} "
            f"tasks={len(new_schedule)} "
            f"beat_multiplier={beat_multiplier}"
        )

        self.last_beat_multiplier = beat_multiplier

    @staticmethod
    def _compare_schedules(schedule1: dict, schedule2: dict) -> bool:
        """Compare schedules by task name only to determine if an update is needed.
        True if equivalent, False if not."""
        current_tasks = set(name for name, _ in schedule1)
        new_tasks = set(schedule2.keys())
        return current_tasks == new_tasks


@beat_init.connect
def on_beat_init(sender: Any, **kwargs: Any) -> None:
    task_logger.info("beat_init signal received.")

    # Celery beat shouldn't touch the db at all. But just setting a low minimum here.
    SqlEngine.set_app_name(POSTGRES_CELERY_BEAT_APP_NAME)
    SqlEngine.init_engine(pool_size=2, max_overflow=0)

    app_base.wait_for_redis(sender, **kwargs)
    path = make_probe_path("readiness", "beat@hostname")
    path.touch()
    task_logger.info(f"Readiness signal touched at {path}.")

    # first time init of the scheduler after db has been init'ed
    scheduler: DynamicTenantScheduler = sender.scheduler
    scheduler._try_updating_schedule()


@signals.setup_logging.connect
def on_setup_logging(
    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)


celery_app.conf.beat_scheduler = DynamicTenantScheduler
celery_app.conf.task_default_base = app_base.TenantAwareTask


================================================
FILE: backend/onyx/background/celery/apps/client.py
================================================
from celery import Celery

import onyx.background.celery.apps.app_base as app_base

celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.client")
celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


================================================
FILE: backend/onyx/background/celery/apps/docfetching.py
================================================
from typing import Any
from typing import cast

from celery import Celery
from celery import signals
from celery import Task
from celery.apps.worker import Worker
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown

import onyx.background.celery.apps.app_base as app_base
from onyx.configs.constants import POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
from onyx.server.metrics.indexing_task_metrics import on_indexing_task_postrun
from onyx.server.metrics.indexing_task_metrics import on_indexing_task_prerun
from onyx.server.metrics.metrics_server import start_metrics_server
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT


logger = setup_logger()

celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.docfetching")
celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
    on_celery_task_prerun(task_id, task)
    on_indexing_task_prerun(task_id, task, kwargs)


@signals.task_postrun.connect
def on_task_postrun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    retval: Any | None = None,
    state: str | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
    on_celery_task_postrun(task_id, task, state)
    on_indexing_task_postrun(task_id, task, kwargs, state)


@signals.task_retry.connect
def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
    # task_retry signal doesn't pass task_id in kwargs; get it from
    # the sender (the task instance) via sender.request.id.
    task_id = getattr(getattr(sender, "request", None), "id", None)
    on_celery_task_retry(task_id, sender)


@signals.task_revoked.connect
def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
    task_name = getattr(sender, "name", None) or str(sender)
    on_celery_task_revoked(kwargs.get("task_id"), task_name)


@signals.task_rejected.connect
def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
    # task_rejected sends the Consumer as sender, not the task instance.
    # The task name must be extracted from the Celery message headers.
    message = kwargs.get("message")
    task_name: str | None = None
    if message is not None:
        headers = getattr(message, "headers", None) or {}
        task_name = headers.get("task")
    if task_name is None:
        task_name = "unknown"
    on_celery_task_rejected(None, task_name)


@celeryd_init.connect
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
    app_base.on_celeryd_init(sender, conf, **kwargs)


@worker_init.connect
def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    logger.info("worker_init signal received.")

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME)
    pool_size = cast(int, sender.concurrency)  # type: ignore
    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)

    # Less startup checks in multi-tenant case
    if MULTI_TENANT:
        return

    app_base.on_secondary_worker_init(sender, **kwargs)


@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
    start_metrics_server("docfetching")
    app_base.on_worker_ready(sender, **kwargs)


@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_shutdown(sender, **kwargs)


@signals.setup_logging.connect
def on_setup_logging(
    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)


base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
    celery_app.steps["worker"].add(bootstep)

celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "onyx.background.celery.tasks.docfetching",
        ]
    )
)


================================================
FILE: backend/onyx/background/celery/apps/docprocessing.py
================================================
from typing import Any
from typing import cast

from celery import Celery
from celery import signals
from celery import Task
from celery.apps.worker import Worker
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_process_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown

import onyx.background.celery.apps.app_base as app_base
from onyx.configs.constants import POSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
from onyx.server.metrics.indexing_task_metrics import on_indexing_task_postrun
from onyx.server.metrics.indexing_task_metrics import on_indexing_task_prerun
from onyx.server.metrics.metrics_server import start_metrics_server
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT


logger = setup_logger()

celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.docprocessing")
celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
    on_celery_task_prerun(task_id, task)
    on_indexing_task_prerun(task_id, task, kwargs)


@signals.task_postrun.connect
def on_task_postrun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    retval: Any | None = None,
    state: str | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
    on_celery_task_postrun(task_id, task, state)
    on_indexing_task_postrun(task_id, task, kwargs, state)


@signals.task_retry.connect
def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
    # task_retry signal doesn't pass task_id in kwargs; get it from
    # the sender (the task instance) via sender.request.id.
    task_id = getattr(getattr(sender, "request", None), "id", None)
    on_celery_task_retry(task_id, sender)


@signals.task_revoked.connect
def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
    task_name = getattr(sender, "name", None) or str(sender)
    on_celery_task_revoked(kwargs.get("task_id"), task_name)


@signals.task_rejected.connect
def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
    # task_rejected sends the Consumer as sender, not the task instance.
    # The task name must be extracted from the Celery message headers.
    message = kwargs.get("message")
    task_name: str | None = None
    if message is not None:
        headers = getattr(message, "headers", None) or {}
        task_name = headers.get("task")
    if task_name is None:
        task_name = "unknown"
    on_celery_task_rejected(None, task_name)


@celeryd_init.connect
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
    app_base.on_celeryd_init(sender, conf, **kwargs)


@worker_init.connect
def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    logger.info("worker_init signal received.")

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME)

    # rkuo: Transient errors keep happening in the indexing watchdog threads.
    # "SSL connection has been closed unexpectedly"
    # actually setting the spawn method in the cloud fixes 95% of these.
    # setting pre ping might help even more, but not worrying about that yet
    pool_size = cast(int, sender.concurrency)  # type: ignore
    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)

    # Less startup checks in multi-tenant case
    if MULTI_TENANT:
        return

    app_base.on_secondary_worker_init(sender, **kwargs)


@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
    start_metrics_server("docprocessing")
    app_base.on_worker_ready(sender, **kwargs)


@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_shutdown(sender, **kwargs)


# Note: worker_process_init only fires in prefork pool mode. Docprocessing uses
# worker_pool="threads" (see configs/docprocessing.py), so this handler is
# effectively a no-op in normal operation. It remains as a safety net in case
# the pool type is ever changed to prefork. Prometheus metrics are safe in
# thread-pool mode since all threads share the same process memory and can
# update the same Counter/Gauge/Histogram objects directly.
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None:  # noqa: ARG001
    SqlEngine.reset_engine()


@signals.setup_logging.connect
def on_setup_logging(
    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)


base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
    celery_app.steps["worker"].add(bootstep)

celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "onyx.background.celery.tasks.docprocessing",
        ]
    )
)


================================================
FILE: backend/onyx/background/celery/apps/heavy.py
================================================
from typing import Any
from typing import cast

from celery import Celery
from celery import signals
from celery import Task
from celery.apps.worker import Worker
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown

import onyx.background.celery.apps.app_base as app_base
from onyx.configs.constants import POSTGRES_CELERY_WORKER_HEAVY_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT


logger = setup_logger()

celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.heavy")
celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)


@signals.task_postrun.connect
def on_task_postrun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    retval: Any | None = None,
    state: str | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)


@celeryd_init.connect
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
    app_base.on_celeryd_init(sender, conf, **kwargs)


@worker_init.connect
def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    logger.info("worker_init signal received.")

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_HEAVY_APP_NAME)
    pool_size = cast(int, sender.concurrency)  # type: ignore
    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)

    # Less startup checks in multi-tenant case
    if MULTI_TENANT:
        return

    app_base.on_secondary_worker_init(sender, **kwargs)


@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_ready(sender, **kwargs)


@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_shutdown(sender, **kwargs)


@signals.setup_logging.connect
def on_setup_logging(
    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)


base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
    celery_app.steps["worker"].add(bootstep)

celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "onyx.background.celery.tasks.pruning",
            # Sandbox tasks (file sync, cleanup)
            "onyx.server.features.build.sandbox.tasks",
            "onyx.background.celery.tasks.hierarchyfetching",
        ]
    )
)


================================================
FILE: backend/onyx/background/celery/apps/light.py
================================================
from typing import Any

from celery import Celery
from celery import signals
from celery import Task
from celery.apps.worker import Worker
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown

import onyx.background.celery.apps.app_base as app_base
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()

celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.light")
celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)


@signals.task_postrun.connect
def on_task_postrun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    retval: Any | None = None,
    state: str | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)


@celeryd_init.connect
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
    app_base.on_celeryd_init(sender, conf, **kwargs)


@worker_init.connect
def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    EXTRA_CONCURRENCY = 8  # small extra fudge factor for connection limits

    logger.info("worker_init signal received.")

    logger.info(f"Concurrency: {sender.concurrency}")  # type: ignore

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME)
    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=EXTRA_CONCURRENCY)  # type: ignore

    if MANAGED_VESPA:
        httpx_init_vespa_pool(
            sender.concurrency + EXTRA_CONCURRENCY,  # type: ignore
            ssl_cert=VESPA_CLOUD_CERT_PATH,
            ssl_key=VESPA_CLOUD_KEY_PATH,
        )
    else:
        httpx_init_vespa_pool(sender.concurrency + EXTRA_CONCURRENCY)  # type: ignore

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)

    # Less startup checks in multi-tenant case
    if MULTI_TENANT:
        return

    app_base.on_secondary_worker_init(sender, **kwargs)


@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_ready(sender, **kwargs)


@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_shutdown(sender, **kwargs)


@signals.setup_logging.connect
def on_setup_logging(
    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)


base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
    celery_app.steps["worker"].add(bootstep)

celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "onyx.background.celery.tasks.shared",
            "onyx.background.celery.tasks.vespa",
            "onyx.background.celery.tasks.connector_deletion",
            "onyx.background.celery.tasks.doc_permission_syncing",
            "onyx.background.celery.tasks.docprocessing",
            "onyx.background.celery.tasks.opensearch_migration",
            # Sandbox cleanup tasks (isolated in build feature)
            "onyx.server.features.build.sandbox.tasks",
        ]
    )
)


================================================
FILE: backend/onyx/background/celery/apps/monitoring.py
================================================
import multiprocessing
from typing import Any

from celery import Celery
from celery import signals
from celery import Task
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown

import onyx.background.celery.apps.app_base as app_base
from onyx.configs.constants import POSTGRES_CELERY_WORKER_MONITORING_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT


logger = setup_logger()

celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.monitoring")
celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)


@signals.task_postrun.connect
def on_task_postrun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    retval: Any | None = None,
    state: str | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)


@celeryd_init.connect
def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None:
    app_base.on_celeryd_init(sender, conf, **kwargs)


# Set by on_worker_init so on_worker_ready knows whether to start the server.
_prometheus_collectors_ok: bool = False


@worker_init.connect
def on_worker_init(sender: Any, **kwargs: Any) -> None:
    global _prometheus_collectors_ok

    logger.info("worker_init signal received.")
    logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}")

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_MONITORING_APP_NAME)
    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=3)

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)

    _prometheus_collectors_ok = _setup_prometheus_collectors(sender)

    # Less startup checks in multi-tenant case
    if MULTI_TENANT:
        return

    app_base.on_secondary_worker_init(sender, **kwargs)


def _setup_prometheus_collectors(sender: Any) -> bool:
    """Register Prometheus collectors that need Redis/DB access.

    Passes the Celery app so the queue depth collector can obtain a fresh
    broker Redis client on each scrape (rather than holding a stale reference).

    Returns True if registration succeeded, False otherwise.
    """
    try:
        from onyx.server.metrics.indexing_pipeline_setup import (
            setup_indexing_pipeline_metrics,
        )

        setup_indexing_pipeline_metrics(sender.app)
        logger.info("Prometheus indexing pipeline collectors registered")
        return True
    except Exception:
        logger.exception("Failed to register Prometheus indexing pipeline collectors")
        return False


@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
    if _prometheus_collectors_ok:
        from onyx.server.metrics.metrics_server import start_metrics_server

        start_metrics_server("monitoring")
    else:
        logger.warning(
            "Skipping Prometheus metrics server — collector registration failed"
        )
    app_base.on_worker_ready(sender, **kwargs)


@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_shutdown(sender, **kwargs)


@signals.setup_logging.connect
def on_setup_logging(
    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)


base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
    celery_app.steps["worker"].add(bootstep)

celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "onyx.background.celery.tasks.monitoring",
        ]
    )
)


================================================
FILE: backend/onyx/background/celery/apps/primary.py
================================================
import logging
import os
from typing import Any
from typing import cast

from celery import bootsteps  # type: ignore
from celery import Celery
from celery import signals
from celery import Task
from celery.apps.worker import Worker
from celery.exceptions import WorkerShutdown
from celery.result import AsyncResult
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown
from redis.lock import Lock as RedisLock

import onyx.background.celery.apps.app_base as app_base
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_utils import celery_is_worker_primary
from onyx.background.celery.tasks.vespa.document_sync import reset_document_sync
from onyx.configs.app_configs import CELERY_WORKER_PRIMARY_POOL_OVERFLOW
from onyx.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT
from onyx.configs.constants import OnyxRedisConstants
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.index_attempt import get_index_attempt
from onyx.db.index_attempt import mark_attempt_canceled
from onyx.db.indexing_coordination import IndexingCoordination
from onyx.redis.redis_connector_delete import RedisConnectorDelete
from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
from onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync
from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_connector_stop import RedisConnectorStop
from onyx.redis.redis_document_set import RedisDocumentSet
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_usergroup import RedisUserGroup
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

logger = setup_logger()

celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.primary")
celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)


@signals.task_postrun.connect
def on_task_postrun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    retval: Any | None = None,
    state: str | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)


@celeryd_init.connect
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
    app_base.on_celeryd_init(sender, conf, **kwargs)


@worker_init.connect
def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    logger.info("worker_init signal received.")

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME)
    pool_size = cast(int, sender.concurrency)  # type: ignore
    SqlEngine.init_engine(
        pool_size=pool_size, max_overflow=CELERY_WORKER_PRIMARY_POOL_OVERFLOW
    )

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)

    logger.info(f"Running as the primary celery worker: pid={os.getpid()}")

    # Less startup checks in multi-tenant case
    if MULTI_TENANT:
        return

    # This is singleton work that should be done on startup exactly once
    # by the primary worker. This is unnecessary in the multi tenant scenario
    r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)

    # Log the role and slave count - being connected to a slave or slave count > 0 could be problematic
    replication_info: dict[str, Any] = cast(dict, r.info("replication"))
    role: str = cast(str, replication_info.get("role", ""))
    connected_slaves: int = replication_info.get("connected_slaves", 0)

    logger.info(
        f"Redis INFO REPLICATION: role={role} connected_slaves={connected_slaves}"
    )

    memory_info: dict[str, Any] = cast(dict, r.info("memory"))
    maxmemory_policy: str = cast(str, memory_info.get("maxmemory_policy", ""))

    logger.info(f"Redis INFO MEMORY: maxmemory_policy={maxmemory_policy}")

    # For the moment, we're assuming that we are the only primary worker
    # that should be running.
    # TODO: maybe check for or clean up another zombie primary worker if we detect it
    r.delete(OnyxRedisLocks.PRIMARY_WORKER)

    # this process wide lock is taken to help other workers start up in order.
    # it is planned to use this lock to enforce singleton behavior on the primary
    # worker, since the primary worker does redis cleanup on startup, but this isn't
    # implemented yet.

    # set thread_local=False since we don't control what thread the periodic task might
    # reacquire the lock with
    lock: RedisLock = r.lock(
        OnyxRedisLocks.PRIMARY_WORKER,
        timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT,
        thread_local=False,
    )

    logger.info("Primary worker lock: Acquire starting.")
    acquired = lock.acquire(blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2)
    if acquired:
        logger.info("Primary worker lock: Acquire succeeded.")
    else:
        logger.error("Primary worker lock: Acquire failed!")
        raise WorkerShutdown("Primary worker lock could not be acquired!")

    # tacking on our own user data to the sender
    sender.primary_worker_lock = lock  # type: ignore

    # As currently designed, when this worker starts as "primary", we reinitialize redis
    # to a clean state (for our purposes, anyway)
    r.delete(OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK)

    r.delete(OnyxRedisConstants.ACTIVE_FENCES)

    # NOTE: we want to remove the `Redis*` classes, prefer to just have functions
    # This is the preferred way to do this going forward
    reset_document_sync(r)

    RedisDocumentSet.reset_all(r)
    RedisUserGroup.reset_all(r)
    RedisConnectorDelete.reset_all(r)
    RedisConnectorPrune.reset_all(r)
    RedisConnectorStop.reset_all(r)
    RedisConnectorPermissionSync.reset_all(r)
    RedisConnectorExternalGroupSync.reset_all(r)

    # mark orphaned index attempts as failed
    # This uses database coordination instead of Redis fencing
    with get_session_with_current_tenant() as db_session:
        # Get potentially orphaned attempts (those with active status and task IDs)
        potentially_orphaned_ids = IndexingCoordination.get_orphaned_index_attempt_ids(
            db_session
        )

        for attempt_id in potentially_orphaned_ids:
            attempt = get_index_attempt(db_session, attempt_id)

            # handle case where not started or docfetching is done but indexing is not
            if (
                not attempt
                or not attempt.celery_task_id
                or attempt.total_batches is not None
            ):
                continue

            # Check if the Celery task actually exists
            try:
                result: AsyncResult = AsyncResult(attempt.celery_task_id)

                # If the task is not in PENDING state, it exists in Celery
                if result.state != "PENDING":
                    continue

                # Task is orphaned - mark as failed
                failure_reason = (
                    f"Orphaned index attempt found on startup - Celery task not found: "
                    f"index_attempt={attempt.id} "
                    f"cc_pair={attempt.connector_credential_pair_id} "
                    f"search_settings={attempt.search_settings_id} "
                    f"celery_task_id={attempt.celery_task_id}"
                )
                logger.warning(failure_reason)
                mark_attempt_canceled(attempt.id, db_session, failure_reason)

            except Exception:
                # If we can't check the task status, be conservative and continue
                logger.warning(
                    f"Could not verify Celery task status on startup for attempt {attempt.id}, task_id={attempt.celery_task_id}"
                )


@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_ready(sender, **kwargs)


@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_shutdown(sender, **kwargs)


@signals.setup_logging.connect
def on_setup_logging(
    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)

    # this can be spammy, so just enable it in the cloud for now
    if MULTI_TENANT:
        app_base.set_task_finished_log_level(logging.INFO)


class HubPeriodicTask(bootsteps.StartStopStep):
    """Regularly reacquires the primary worker lock outside of the task queue.
    Use the task_logger in this class to avoid double logging.

    This cannot be done inside a regular beat task because it must run on schedule and
    a queue of existing work would starve the task from running.
    """

    # it's unclear to me whether using the hub's timer or the bootstep timer is better
    requires = {"celery.worker.components:Hub"}

    def __init__(self, worker: Any, **kwargs: Any) -> None:  # noqa: ARG002
        self.interval = CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8  # Interval in seconds
        self.task_tref = None

    def start(self, worker: Any) -> None:
        if not celery_is_worker_primary(worker):
            return

        # Access the worker's event loop (hub)
        hub = worker.consumer.controller.hub

        # Schedule the periodic task
        self.task_tref = hub.call_repeatedly(
            self.interval, self.run_periodic_task, worker
        )
        task_logger.info("Scheduled periodic task with hub.")

    def run_periodic_task(self, worker: Any) -> None:
        try:
            if not celery_is_worker_primary(worker):
                return

            if not hasattr(worker, "primary_worker_lock"):
                return

            lock: RedisLock = worker.primary_worker_lock

            r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)

            if lock.owned():
                task_logger.debug("Reacquiring primary worker lock.")
                lock.reacquire()
            else:
                task_logger.warning(
                    "Full acquisition of primary worker lock. Reasons could be worker restart or lock expiration."
                )
                lock = r.lock(
                    OnyxRedisLocks.PRIMARY_WORKER,
                    timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT,
                )

                task_logger.info("Primary worker lock: Acquire starting.")
                acquired = lock.acquire(
                    blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2
                )
                if acquired:
                    task_logger.info("Primary worker lock: Acquire succeeded.")
                    worker.primary_worker_lock = lock
                else:
                    task_logger.error("Primary worker lock: Acquire failed!")
                    raise TimeoutError("Primary worker lock could not be acquired!")

        except Exception:
            task_logger.exception("Periodic task failed.")

    def stop(self, worker: Any) -> None:  # noqa: ARG002
        # Cancel the scheduled task when the worker stops
        if self.task_tref:
            self.task_tref.cancel()
            task_logger.info("Canceled periodic task with hub.")


celery_app.steps["worker"].add(HubPeriodicTask)

base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
    celery_app.steps["worker"].add(bootstep)

celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "onyx.background.celery.tasks.connector_deletion",
            "onyx.background.celery.tasks.docprocessing",
            "onyx.background.celery.tasks.evals",
            "onyx.background.celery.tasks.hierarchyfetching",
            "onyx.background.celery.tasks.periodic",
            "onyx.background.celery.tasks.pruning",
            "onyx.background.celery.tasks.shared",
            "onyx.background.celery.tasks.vespa",
            "onyx.background.celery.tasks.llm_model_update",
            "onyx.background.celery.tasks.user_file_processing",
        ]
    )
)


================================================
FILE: backend/onyx/background/celery/apps/task_formatters.py
================================================
import logging

from celery import current_task

from onyx.utils.logger import ColoredFormatter
from onyx.utils.logger import PlainFormatter


class CeleryTaskPlainFormatter(PlainFormatter):
    def format(self, record: logging.LogRecord) -> str:
        task = current_task
        if task and task.request:
            record.__dict__.update(task_id=task.request.id, task_name=task.name)
            record.msg = f"[{task.name}({task.request.id})] {record.msg}"

        return super().format(record)


class CeleryTaskColoredFormatter(ColoredFormatter):
    def format(self, record: logging.LogRecord) -> str:
        task = current_task
        if task and task.request:
            record.__dict__.update(task_id=task.request.id, task_name=task.name)
            record.msg = f"[{task.name}({task.request.id})] {record.msg}"

        return super().format(record)


================================================
FILE: backend/onyx/background/celery/apps/user_file_processing.py
================================================
from typing import Any
from typing import cast

from celery import Celery
from celery import signals
from celery import Task
from celery.apps.worker import Worker
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_process_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown

import onyx.background.celery.apps.app_base as app_base
from onyx.configs.constants import POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT


logger = setup_logger()

celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.user_file_processing")
celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)


@signals.task_postrun.connect
def on_task_postrun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple | None = None,
    kwargs: dict | None = None,
    retval: Any | None = None,
    state: str | None = None,
    **kwds: Any,
) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)


@celeryd_init.connect
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
    app_base.on_celeryd_init(sender, conf, **kwargs)


@worker_init.connect
def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    logger.info("worker_init signal received.")

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME)

    # rkuo: Transient errors keep happening in the indexing watchdog threads.
    # "SSL connection has been closed unexpectedly"
    # actually setting the spawn method in the cloud fixes 95% of these.
    # setting pre ping might help even more, but not worrying about that yet
    pool_size = cast(int, sender.concurrency)  # type: ignore
    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)

    # Less startup checks in multi-tenant case
    if MULTI_TENANT:
        return

    app_base.on_secondary_worker_init(sender, **kwargs)


@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_ready(sender, **kwargs)


@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_shutdown(sender, **kwargs)


@worker_process_init.connect
def init_worker(**kwargs: Any) -> None:  # noqa: ARG001
    SqlEngine.reset_engine()


@signals.setup_logging.connect
def on_setup_logging(
    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)


base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
    celery_app.steps["worker"].add(bootstep)

celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
            "onyx.background.celery.tasks.user_file_processing",
        ]
    )
)


================================================
FILE: backend/onyx/background/celery/celery_k8s_probe.py
================================================
# script to use as a kubernetes readiness / liveness probe

import argparse
import sys
import time
from pathlib import Path


def main_readiness(filename: str) -> int:
    """Checks if the file exists."""
    path = Path(filename)
    if not path.is_file():
        return 1

    return 0


def main_liveness(filename: str) -> int:
    """Checks if the file exists AND was recently modified."""
    path = Path(filename)
    if not path.is_file():
        return 1

    stats = path.stat()
    liveness_timestamp = stats.st_mtime
    current_timestamp = time.time()
    time_diff = current_timestamp - liveness_timestamp
    if time_diff > 60:
        return 1

    return 0


if __name__ == "__main__":
    exit_code: int

    parser = argparse.ArgumentParser(description="k8s readiness/liveness probe")
    parser.add_argument(
        "--probe",
        type=str,
        choices=["readiness", "liveness"],
        help="The type of probe",
        required=True,
    )
    parser.add_argument("--filename", help="The filename to watch", required=True)
    args = parser.parse_args()

    if args.probe == "readiness":
        exit_code = main_readiness(args.filename)
    elif args.probe == "liveness":
        exit_code = main_liveness(args.filename)
    else:
        raise ValueError(f"Unknown probe type: {args.probe}")

    sys.exit(exit_code)


================================================
FILE: backend/onyx/background/celery/celery_redis.py
================================================
# These are helper objects for tracking the keys we need to write in redis
import json
import threading
from typing import Any
from typing import cast

from celery import Celery
from redis import Redis

from onyx.background.celery.configs.base import CELERY_SEPARATOR
from onyx.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS


_broker_client: Redis | None = None
_broker_url: str | None = None
_broker_client_lock = threading.Lock()


def celery_get_broker_client(app: Celery) -> Redis:
    """Return a shared Redis client connected to the Celery broker DB.

    Uses a module-level singleton so all tasks on a worker share one
    connection instead of creating a new one per call. The client
    connects directly to the broker Redis DB (parsed from the broker URL).

    Thread-safe via lock — safe for use in Celery thread-pool workers.

    Usage:
        r_celery = celery_get_broker_client(self.app)
        length = celery_get_queue_length(queue, r_celery)
    """
    global _broker_client, _broker_url
    with _broker_client_lock:
        url = app.conf.broker_url
        if _broker_client is not None and _broker_url == url:
            try:
                _broker_client.ping()
                return _broker_client
            except Exception:
                try:
                    _broker_client.close()
                except Exception:
                    pass
                _broker_client = None
        elif _broker_client is not None:
            try:
                _broker_client.close()
            except Exception:
                pass
            _broker_client = None

        _broker_url = url
        _broker_client = Redis.from_url(
            url,
            decode_responses=False,
            health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,
            socket_keepalive=True,
            socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,
            retry_on_timeout=True,
        )
        return _broker_client


def celery_get_unacked_length(r: Redis) -> int:
    """Checking the unacked queue is useful because a non-zero length tells us there
    may be prefetched tasks.

    There can be other tasks in here besides indexing tasks, so this is mostly useful
    just to see if the task count is non zero.

    ref: https://blog.hikaru.run/2022/08/29/get-waiting-tasks-count-in-celery.html
    """
    length = cast(int, r.hlen("unacked"))
    return length


def celery_get_unacked_task_ids(queue: str, r: Redis) -> set[str]:
    """Gets the set of task id's matching the given queue in the unacked hash.

    Unacked entries belonging to the indexing queues are "prefetched", so this gives
    us crucial visibility as to what tasks are in that state.
    """
    tasks: set[str] = set()

    for _, v in r.hscan_iter("unacked"):
        v_bytes = cast(bytes, v)
        v_str = v_bytes.decode("utf-8")
        task = json.loads(v_str)

        task_description = task[0]
        task_queue = task[2]

        if task_queue != queue:
            continue

        task_id = task_description.get("headers", {}).get("id")
        if not task_id:
            continue

        # if the queue matches and we see the task_id, add it
        tasks.add(task_id)
    return tasks


def celery_get_queue_length(queue: str, r: Redis) -> int:
    """This is a redis specific way to get the length of a celery queue.
    It is priority aware and knows how to count across the multiple redis lists
    used to implement task prioritization.
    This operation is not atomic."""
    total_length = 0
    for i in range(len(OnyxCeleryPriority)):
        queue_name = queue
        if i > 0:
            queue_name += CELERY_SEPARATOR
            queue_name += str(i)

        length = r.llen(queue_name)
        total_length += cast(int, length)

    return total_length


def celery_find_task(task_id: str, queue: str, r: Redis) -> int:
    """This is a redis specific way to find a task for a particular queue in redis.
    It is priority aware and knows how to look through the multiple redis lists
    used to implement task prioritization.
    This operation is not atomic.

    This is a linear search O(n) ... so be careful using it when the task queues can be larger.

    Returns true if the id is in the queue, False if not.
    """
    for priority in range(len(OnyxCeleryPriority)):
        queue_name = f"{queue}{CELERY_SEPARATOR}{priority}" if priority > 0 else queue

        tasks = cast(list[bytes], r.lrange(queue_name, 0, -1))
        for task in tasks:
            task_dict: dict[str, Any] = json.loads(task.decode("utf-8"))
            if task_dict.get("headers", {}).get("id") == task_id:
                return True

    return False


def celery_get_queued_task_ids(queue: str, r: Redis) -> set[str]:
    """This is a redis specific way to build a list of tasks in a queue and return them
    as a set.

    This helps us read the queue once and then efficiently look for missing tasks
    in the queue.
    """

    task_set: set[str] = set()

    for priority in range(len(OnyxCeleryPriority)):
        queue_name = f"{queue}{CELERY_SEPARATOR}{priority}" if priority > 0 else queue

        tasks = cast(list[bytes], r.lrange(queue_name, 0, -1))
        for task in tasks:
            task_dict: dict[str, Any] = json.loads(task.decode("utf-8"))
            task_id = task_dict.get("headers", {}).get("id")
            if task_id:
                task_set.add(task_id)

    return task_set


def celery_inspect_get_workers(name_filter: str | None, app: Celery) -> list[str]:
    """Returns a list of current workers containing name_filter, or all workers if
    name_filter is None.

    We've empirically discovered that the celery inspect API is potentially unstable
    and may hang or return empty results when celery is under load. Suggest using this
    more to debug and troubleshoot than in production code.
    """
    worker_names: list[str] = []

    # filter for and create an indexing specific inspect object
    inspect = app.control.inspect()
    workers: dict[str, Any] = inspect.ping()  # type: ignore
    if workers:
        for worker_name in list(workers.keys()):
            # if the name filter not set, return all worker names
            if not name_filter:
                worker_names.append(worker_name)
                continue

            # if the name filter is set, return only worker names that contain the name filter
            if name_filter not in worker_name:
                continue

            worker_names.append(worker_name)

    return worker_names


def celery_inspect_get_reserved(worker_names: list[str], app: Celery) -> set[str]:
    """Returns a list of reserved tasks on the specified workers.

    We've empirically discovered that the celery inspect API is potentially unstable
    and may hang or return empty results when celery is under load. Suggest using this
    more to debug and troubleshoot than in production code.
    """
    reserved_task_ids: set[str] = set()

    inspect = app.control.inspect(destination=worker_names)

    # get the list of reserved tasks
    reserved_tasks: dict[str, list] | None = inspect.reserved()  # type: ignore
    if reserved_tasks:
        for _, task_list in reserved_tasks.items():
            for task in task_list:
                reserved_task_ids.add(task["id"])

    return reserved_task_ids


def celery_inspect_get_active(worker_names: list[str], app: Celery) -> set[str]:
    """Returns a list of active tasks on the specified workers.

    We've empirically discovered that the celery inspect API is potentially unstable
    and may hang or return empty results when celery is under load. Suggest using this
    more to debug and troubleshoot than in production code.
    """
    active_task_ids: set[str] = set()

    inspect = app.control.inspect(destination=worker_names)

    # get the list of reserved tasks
    active_tasks: dict[str, list] | None = inspect.active()  # type: ignore
    if active_tasks:
        for _, task_list in active_tasks.items():
            for task in task_list:
                active_task_ids.add(task["id"])

    return active_task_ids


================================================
FILE: backend/onyx/background/celery/celery_utils.py
================================================
from collections.abc import Generator
from collections.abc import Iterator
from collections.abc import Sequence
from datetime import datetime
from datetime import timezone
from pathlib import Path
from typing import Any
from typing import cast
from typing import TypeVar

import httpx
from pydantic import BaseModel

from onyx.configs.app_configs import MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE
from onyx.configs.app_configs import VESPA_REQUEST_TIMEOUT
from onyx.connectors.connector_runner import CheckpointOutputWrapper
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.interfaces import BaseConnector
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import ConnectorCheckpoint
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.httpx.httpx_pool import HttpxPool
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger


logger = setup_logger()

CT = TypeVar("CT", bound=ConnectorCheckpoint)


class SlimConnectorExtractionResult(BaseModel):
    """Result of extracting document IDs and hierarchy nodes from a connector.

    raw_id_to_parent maps document ID → parent_hierarchy_raw_node_id (or None).
    Use raw_id_to_parent.keys() wherever the old set of IDs was needed.
    """

    raw_id_to_parent: dict[str, str | None]
    hierarchy_nodes: list[HierarchyNode]


def _checkpointed_batched_items(
    connector: CheckpointedConnector[CT],
    start: float,
    end: float,
) -> Generator[list[Document | HierarchyNode | ConnectorFailure], None, None]:
    """Loop through all checkpoint steps and yield batched items.

    Some checkpointed connectors (e.g. IMAP) are multi-step: the first
    checkpoint call may only initialize internal state without yielding
    any documents. This function loops until checkpoint.has_more is False
    to ensure all items are collected across every step.
    """
    checkpoint = connector.build_dummy_checkpoint()
    while True:
        checkpoint_output = connector.load_from_checkpoint(
            start=start, end=end, checkpoint=checkpoint
        )
        wrapper: CheckpointOutputWrapper[CT] = CheckpointOutputWrapper()
        batch: list[Document | HierarchyNode | ConnectorFailure] = []
        for document, hierarchy_node, failure, next_checkpoint in wrapper(
            checkpoint_output
        ):
            if document is not None:
                batch.append(document)
            elif hierarchy_node is not None:
                batch.append(hierarchy_node)
            elif failure is not None:
                batch.append(failure)

            if next_checkpoint is not None:
                checkpoint = next_checkpoint

        if batch:
            yield batch

        if not checkpoint.has_more:
            break


def _get_failure_id(failure: ConnectorFailure) -> str | None:
    """Extract the document/entity ID from a ConnectorFailure."""
    if failure.failed_document:
        return failure.failed_document.document_id
    if failure.failed_entity:
        return failure.failed_entity.entity_id
    return None


class BatchResult(BaseModel):
    raw_id_to_parent: dict[str, str | None]
    hierarchy_nodes: list[HierarchyNode]


def _extract_from_batch(
    doc_list: Sequence[Document | SlimDocument | HierarchyNode | ConnectorFailure],
) -> BatchResult:
    """Separate a batch into document IDs (with parent mapping) and hierarchy nodes.

    ConnectorFailure items have their failed document/entity IDs added to the
    ID dict so that failed-to-retrieve documents are not accidentally pruned.
    """
    ids: dict[str, str | None] = {}
    hierarchy_nodes: list[HierarchyNode] = []
    for item in doc_list:
        if isinstance(item, HierarchyNode):
            hierarchy_nodes.append(item)
        elif isinstance(item, ConnectorFailure):
            failed_id = _get_failure_id(item)
            if failed_id:
                ids[failed_id] = None
            logger.warning(
                f"Failed to retrieve document {failed_id}: {item.failure_message}"
            )
        else:
            ids[item.id] = item.parent_hierarchy_raw_node_id
    return BatchResult(raw_id_to_parent=ids, hierarchy_nodes=hierarchy_nodes)


def extract_ids_from_runnable_connector(
    runnable_connector: BaseConnector,
    callback: IndexingHeartbeatInterface | None = None,
) -> SlimConnectorExtractionResult:
    """
    Extract document IDs and hierarchy nodes from a runnable connector.

    Hierarchy nodes yielded alongside documents/slim docs are collected and
    returned in the result. ConnectorFailure items have their IDs preserved
    so that failed-to-retrieve documents are not accidentally pruned.

    Optionally, a callback can be passed to handle the length of each document batch.
    """
    all_raw_id_to_parent: dict[str, str | None] = {}
    all_hierarchy_nodes: list[HierarchyNode] = []

    # Sequence (covariant) lets all the specific list[...] iterator types unify here
    raw_batch_generator: (
        Iterator[Sequence[Document | SlimDocument | HierarchyNode | ConnectorFailure]]
        | None
    ) = None

    if isinstance(runnable_connector, SlimConnector):
        raw_batch_generator = runnable_connector.retrieve_all_slim_docs()
    elif isinstance(runnable_connector, SlimConnectorWithPermSync):
        raw_batch_generator = runnable_connector.retrieve_all_slim_docs_perm_sync()
    # If the connector isn't slim, fall back to running it normally to get ids
    elif isinstance(runnable_connector, LoadConnector):
        raw_batch_generator = runnable_connector.load_from_state()
    elif isinstance(runnable_connector, PollConnector):
        start = datetime(1970, 1, 1, tzinfo=timezone.utc).timestamp()
        end = datetime.now(timezone.utc).timestamp()
        raw_batch_generator = runnable_connector.poll_source(start=start, end=end)
    elif isinstance(runnable_connector, CheckpointedConnector):
        start = datetime(1970, 1, 1, tzinfo=timezone.utc).timestamp()
        end = datetime.now(timezone.utc).timestamp()
        raw_batch_generator = _checkpointed_batched_items(
            runnable_connector, start, end
        )
    else:
        raise RuntimeError("Pruning job could not find a valid runnable_connector.")

    # this function is called per batch for rate limiting
    doc_batch_processing_func = (
        rate_limit_builder(
            max_calls=MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE, period=60
        )(lambda x: x)
        if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE
        else lambda x: x
    )

    # process raw batches to extract both IDs and hierarchy nodes
    for doc_list in raw_batch_generator:
        if callback and callback.should_stop():
            raise RuntimeError(
                "extract_ids_from_runnable_connector: Stop signal detected"
            )

        batch_result = _extract_from_batch(doc_list)
        batch_ids = batch_result.raw_id_to_parent
        batch_nodes = batch_result.hierarchy_nodes
        doc_batch_processing_func(batch_ids)
        all_raw_id_to_parent.update(batch_ids)
        all_hierarchy_nodes.extend(batch_nodes)

        if callback:
            callback.progress("extract_ids_from_runnable_connector", len(batch_ids))

    return SlimConnectorExtractionResult(
        raw_id_to_parent=all_raw_id_to_parent,
        hierarchy_nodes=all_hierarchy_nodes,
    )


def celery_is_listening_to_queue(worker: Any, name: str) -> bool:
    """Checks to see if we're listening to the named queue"""

    # how to get a list of queues this worker is listening to
    # https://stackoverflow.com/questions/29790523/how-to-determine-which-queues-a-celery-worker-is-consuming-at-runtime
    queue_names = list(worker.app.amqp.queues.consume_from.keys())
    for queue_name in queue_names:
        if queue_name == name:
            return True

    return False


def celery_is_worker_primary(worker: Any) -> bool:
    """There are multiple approaches that could be taken to determine if a celery worker
    is 'primary', as defined by us. But the way we do it is to check the hostname set
    for the celery worker, which can be done on the
    command line with '--hostname'."""
    hostname = worker.hostname
    if hostname.startswith("primary"):
        return True

    return False


def httpx_init_vespa_pool(
    max_keepalive_connections: int,
    timeout: int = VESPA_REQUEST_TIMEOUT,
    ssl_cert: str | None = None,
    ssl_key: str | None = None,
) -> None:
    httpx_cert = None
    httpx_verify = False
    if ssl_cert and ssl_key:
        httpx_cert = cast(tuple[str, str], (ssl_cert, ssl_key))
        httpx_verify = True

    HttpxPool.init_client(
        name="vespa",
        cert=httpx_cert,
        verify=httpx_verify,
        timeout=timeout,
        http2=False,
        limits=httpx.Limits(max_keepalive_connections=max_keepalive_connections),
    )


def make_probe_path(probe: str, hostname: str) -> Path:
    """templates the path for a k8s probe file.

    e.g. /tmp/onyx_k8s_indexing_readiness.txt
    """
    hostname_parts = hostname.split("@")
    if len(hostname_parts) != 2:
        raise ValueError(f"hostname could not be split! {hostname=}")

    name = hostname_parts[0]
    if not name:
        raise ValueError(f"name cannot be empty! {name=}")

    safe_name = "".join(c for c in name if c.isalnum()).rstrip()
    return Path(f"/tmp/onyx_k8s_{safe_name}_{probe}.txt")


================================================
FILE: backend/onyx/background/celery/configs/base.py
================================================
# docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html
import urllib.parse

from onyx.configs.app_configs import CELERY_BROKER_POOL_LIMIT
from onyx.configs.app_configs import CELERY_RESULT_EXPIRES
from onyx.configs.app_configs import REDIS_DB_NUMBER_CELERY
from onyx.configs.app_configs import REDIS_DB_NUMBER_CELERY_RESULT_BACKEND
from onyx.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
from onyx.configs.app_configs import REDIS_HOST
from onyx.configs.app_configs import REDIS_PASSWORD
from onyx.configs.app_configs import REDIS_PORT
from onyx.configs.app_configs import REDIS_SSL
from onyx.configs.app_configs import REDIS_SSL_CA_CERTS
from onyx.configs.app_configs import REDIS_SSL_CERT_REQS
from onyx.configs.app_configs import USE_REDIS_IAM_AUTH
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS

CELERY_SEPARATOR = ":"

CELERY_PASSWORD_PART = ""
if REDIS_PASSWORD:
    CELERY_PASSWORD_PART = ":" + urllib.parse.quote(REDIS_PASSWORD, safe="") + "@"

REDIS_SCHEME = "redis"

# SSL-specific query parameters for Redis URL
SSL_QUERY_PARAMS = ""
if REDIS_SSL and not USE_REDIS_IAM_AUTH:
    REDIS_SCHEME = "rediss"
    SSL_QUERY_PARAMS = f"?ssl_cert_reqs={REDIS_SSL_CERT_REQS}"
    if REDIS_SSL_CA_CERTS:
        SSL_QUERY_PARAMS += f"&ssl_ca_certs={REDIS_SSL_CA_CERTS}"

# region Broker settings
# example celery_broker_url: "redis://:password@localhost:6379/15"
broker_url = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}{SSL_QUERY_PARAMS}"

broker_connection_retry_on_startup = True
broker_pool_limit = CELERY_BROKER_POOL_LIMIT

# redis broker settings
# https://docs.celeryq.dev/projects/kombu/en/stable/reference/kombu.transport.redis.html
broker_transport_options = {
    "priority_steps": list(range(len(OnyxCeleryPriority))),
    "sep": CELERY_SEPARATOR,
    "queue_order_strategy": "priority",
    "retry_on_timeout": True,
    "health_check_interval": REDIS_HEALTH_CHECK_INTERVAL,
    "socket_keepalive": True,
    "socket_keepalive_options": REDIS_SOCKET_KEEPALIVE_OPTIONS,
}
# endregion

# redis backend settings
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings

# there doesn't appear to be a way to set socket_keepalive_options on the redis result backend
redis_socket_keepalive = True
redis_retry_on_timeout = True
redis_backend_health_check_interval = REDIS_HEALTH_CHECK_INTERVAL


task_default_priority = OnyxCeleryPriority.MEDIUM
task_acks_late = True

# region Task result backend settings
# It's possible we don't even need celery's result backend, in which case all of the optimization below
# might be irrelevant
result_backend = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY_RESULT_BACKEND}{SSL_QUERY_PARAMS}"
result_expires = CELERY_RESULT_EXPIRES  # 86400 seconds is the default
# endregion

# Leaving this to the default of True may cause double logging since both our own app
# and celery think they are controlling the logger.
# TODO: Configure celery's logger entirely manually and set this to False
# worker_hijack_root_logger = False

# region Notes on serialization performance
# Option 0: Defaults (json serializer, no compression)
# about 1.5 KB per queued task. 1KB in queue, 400B for result, 100 as a child entry in generator result

# Option 1: Reduces generator task result sizes by roughly 20%
# task_compression = "bzip2"
# task_serializer = "pickle"
# result_compression = "bzip2"
# result_serializer = "pickle"
# accept_content=["pickle"]

# Option 2: this significantly reduces the size of the result for generator tasks since the list of children
# can be large. small tasks change very little
# def pickle_bz2_encoder(data):
#     return bz2.compress(pickle.dumps(data))

# def pickle_bz2_decoder(data):
#     return pickle.loads(bz2.decompress(data))

# from kombu import serialization  # To register custom serialization with Celery/Kombu

# serialization.register('pickle-bzip2', pickle_bz2_encoder, pickle_bz2_decoder, 'application/x-pickle-bz2', 'binary')

# task_serializer = "pickle-bzip2"
# result_serializer = "pickle-bzip2"
# accept_content=["pickle", "pickle-bzip2"]
# endregion


================================================
FILE: backend/onyx/background/celery/configs/beat.py
================================================
# docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html
import onyx.background.celery.configs.base as shared_config

broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options

redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval

result_backend = shared_config.result_backend
result_expires = shared_config.result_expires  # 86400 seconds is the default


================================================
FILE: backend/onyx/background/celery/configs/client.py
================================================
import onyx.background.celery.configs.base as shared_config

broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options

redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval

result_backend = shared_config.result_backend
result_expires = shared_config.result_expires  # 86400 seconds is the default

task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late


================================================
FILE: backend/onyx/background/celery/configs/docfetching.py
================================================
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_DOCFETCHING_CONCURRENCY

broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options

redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval

result_backend = shared_config.result_backend
result_expires = shared_config.result_expires  # 86400 seconds is the default

task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late

# Docfetching worker configuration
worker_concurrency = CELERY_WORKER_DOCFETCHING_CONCURRENCY
worker_pool = "threads"
worker_prefetch_multiplier = 1


================================================
FILE: backend/onyx/background/celery/configs/docprocessing.py
================================================
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_DOCPROCESSING_CONCURRENCY

broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options

redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval

result_backend = shared_config.result_backend
result_expires = shared_config.result_expires  # 86400 seconds is the default

task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late

# Indexing worker specific ... this lets us track the transition to STARTED in redis
# We don't currently rely on this but it has the potential to be useful and
# indexing tasks are not high volume

# we don't turn this on yet because celery occasionally runs tasks more than once
# which means a duplicate run might change the task state unexpectedly
# task_track_started = True

worker_concurrency = CELERY_WORKER_DOCPROCESSING_CONCURRENCY
worker_pool = "threads"
worker_prefetch_multiplier = 1


================================================
FILE: backend/onyx/background/celery/configs/heavy.py
================================================
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_HEAVY_CONCURRENCY

broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options

redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval

result_backend = shared_config.result_backend
result_expires = shared_config.result_expires  # 86400 seconds is the default

task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late

worker_concurrency = CELERY_WORKER_HEAVY_CONCURRENCY
worker_pool = "threads"
worker_prefetch_multiplier = 1


================================================
FILE: backend/onyx/background/celery/configs/light.py
================================================
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_LIGHT_CONCURRENCY
from onyx.configs.app_configs import CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER

broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options

redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval

result_backend = shared_config.result_backend
result_expires = shared_config.result_expires  # 86400 seconds is the default

task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late

worker_concurrency = CELERY_WORKER_LIGHT_CONCURRENCY
worker_pool = "threads"
worker_prefetch_multiplier = CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER


================================================
FILE: backend/onyx/background/celery/configs/monitoring.py
================================================
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_MONITORING_CONCURRENCY

broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options

redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval

result_backend = shared_config.result_backend
result_expires = shared_config.result_expires  # 86400 seconds is the default

task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late

# Monitoring worker specific settings
worker_concurrency = CELERY_WORKER_MONITORING_CONCURRENCY
worker_pool = "threads"
worker_prefetch_multiplier = 1


================================================
FILE: backend/onyx/background/celery/configs/primary.py
================================================
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_PRIMARY_CONCURRENCY

broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options

redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval

result_backend = shared_config.result_backend
result_expires = shared_config.result_expires  # 86400 seconds is the default

task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late

worker_concurrency = CELERY_WORKER_PRIMARY_CONCURRENCY
worker_pool = "threads"
worker_prefetch_multiplier = 1


================================================
FILE: backend/onyx/background/celery/configs/user_file_processing.py
================================================
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY

broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options

redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval

result_backend = shared_config.result_backend
result_expires = shared_config.result_expires  # 86400 seconds is the default

task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late

# User file processing worker configuration
worker_concurrency = CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY
worker_pool = "threads"
worker_prefetch_multiplier = 1


================================================
FILE: backend/onyx/background/celery/memory_monitoring.py
================================================
# backend/onyx/background/celery/memory_monitoring.py
import logging
import os
from logging.handlers import RotatingFileHandler

import psutil

from onyx.utils.logger import is_running_in_container
from onyx.utils.logger import setup_logger

# Regular application logger
logger = setup_logger()

# Only set up memory monitoring in container environment
if is_running_in_container():
    # Set up a dedicated memory monitoring logger
    MEMORY_LOG_DIR = "/var/log/onyx/memory"
    MEMORY_LOG_FILE = os.path.join(MEMORY_LOG_DIR, "memory_usage.log")
    MEMORY_LOG_MAX_BYTES = 10 * 1024 * 1024  # 10MB
    MEMORY_LOG_BACKUP_COUNT = 5  # Keep 5 backup files

    # Ensure log directory exists
    os.makedirs(MEMORY_LOG_DIR, exist_ok=True)

    # Create a dedicated logger for memory monitoring
    memory_logger = logging.getLogger("memory_monitoring")
    memory_logger.setLevel(logging.INFO)

    # Create a rotating file handler
    memory_handler = RotatingFileHandler(
        MEMORY_LOG_FILE,
        maxBytes=MEMORY_LOG_MAX_BYTES,
        backupCount=MEMORY_LOG_BACKUP_COUNT,
    )

    # Create a formatter that includes all relevant information
    memory_formatter = logging.Formatter(
        "%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
    )
    memory_handler.setFormatter(memory_formatter)
    memory_logger.addHandler(memory_handler)
else:
    # Create a null logger when not in container
    memory_logger = logging.getLogger("memory_monitoring")
    memory_logger.addHandler(logging.NullHandler())


def emit_process_memory(
    pid: int, process_name: str, additional_metadata: dict[str, str | int]
) -> None:
    # Skip memory monitoring if not in container
    if not is_running_in_container():
        return

    try:
        process = psutil.Process(pid)
        memory_info = process.memory_info()
        cpu_percent = process.cpu_percent(interval=0.1)

        # Build metadata string from additional_metadata dictionary
        metadata_str = " ".join(
            [f"{key}={value}" for key, value in additional_metadata.items()]
        )
        metadata_str = f" {metadata_str}" if metadata_str else ""

        memory_logger.info(
            f"PROCESS_MEMORY process_name={process_name} pid={pid} "
            f"rss_mb={memory_info.rss / (1024 * 1024):.2f} "
            f"vms_mb={memory_info.vms / (1024 * 1024):.2f} "
            f"cpu={cpu_percent:.2f}{metadata_str}"
        )
    except Exception:
        logger.exception("Error monitoring process memory.")


================================================
FILE: backend/onyx/background/celery/tasks/beat_schedule.py
================================================
import copy
from datetime import timedelta
from typing import Any

from celery.schedules import crontab

from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
from onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from shared_configs.configs import MULTI_TENANT

# choosing 15 minutes because it roughly gives us enough time to process many tasks
# we might be able to reduce this greatly if we can run a unified
# loop across all tenants rather than tasks per tenant

# we set expires because it isn't necessary to queue up these tasks
# it's only important that they run relatively regularly
BEAT_EXPIRES_DEFAULT = 15 * 60  # 15 minutes (in seconds)

# hack to slow down task dispatch in the cloud until
# we have a better implementation (backpressure, etc)
# Note that DynamicTenantScheduler can adjust the runtime value for this via Redis
CLOUD_BEAT_MULTIPLIER_DEFAULT = 8.0
CLOUD_DOC_PERMISSION_SYNC_MULTIPLIER_DEFAULT = 1.0

# tasks that run in either self-hosted on cloud
beat_task_templates: list[dict] = [
    {
        "name": "check-for-user-file-processing",
        "task": OnyxCeleryTask.CHECK_FOR_USER_FILE_PROCESSING,
        "schedule": timedelta(seconds=20),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-for-user-file-project-sync",
        "task": OnyxCeleryTask.CHECK_FOR_USER_FILE_PROJECT_SYNC,
        "schedule": timedelta(seconds=20),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-for-user-file-delete",
        "task": OnyxCeleryTask.CHECK_FOR_USER_FILE_DELETE,
        "schedule": timedelta(seconds=20),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-for-indexing",
        "task": OnyxCeleryTask.CHECK_FOR_INDEXING,
        "schedule": timedelta(seconds=15),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-for-checkpoint-cleanup",
        "task": OnyxCeleryTask.CHECK_FOR_CHECKPOINT_CLEANUP,
        "schedule": timedelta(hours=1),
        "options": {
            "priority": OnyxCeleryPriority.LOW,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-for-index-attempt-cleanup",
        "task": OnyxCeleryTask.CHECK_FOR_INDEX_ATTEMPT_CLEANUP,
        "schedule": timedelta(minutes=30),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-for-connector-deletion",
        "task": OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
        "schedule": timedelta(seconds=20),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-for-vespa-sync",
        "task": OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
        "schedule": timedelta(seconds=20),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-for-pruning",
        "task": OnyxCeleryTask.CHECK_FOR_PRUNING,
        "schedule": timedelta(seconds=20),
        "options": {
            "priority": OnyxCeleryPriority.MEDIUM,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "check-for-hierarchy-fetching",
        "task": OnyxCeleryTask.CHECK_FOR_HIERARCHY_FETCHING,
        "schedule": timedelta(hours=1),  # Check hourly, but only fetch once per day
        "options": {
            "priority": OnyxCeleryPriority.LOW,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": "monitor-background-processes",
        "task": OnyxCeleryTask.MONITOR_BACKGROUND_PROCESSES,
        "schedule": timedelta(minutes=5),
        "options": {
            "priority": OnyxCeleryPriority.LOW,
            "expires": BEAT_EXPIRES_DEFAULT,
            "queue": OnyxCeleryQueues.MONITORING,
        },
    },
    # Sandbox cleanup tasks
    {
        "name": "cleanup-idle-sandboxes",
        "task": OnyxCeleryTask.CLEANUP_IDLE_SANDBOXES,
        "schedule": timedelta(minutes=1),
        "options": {
            "priority": OnyxCeleryPriority.LOW,
            "expires": BEAT_EXPIRES_DEFAULT,
            "queue": OnyxCeleryQueues.SANDBOX,
        },
    },
    {
        "name": "cleanup-old-snapshots",
        "task": OnyxCeleryTask.CLEANUP_OLD_SNAPSHOTS,
        "schedule": timedelta(hours=24),
        "options": {
            "priority": OnyxCeleryPriority.LOW,
            "expires": BEAT_EXPIRES_DEFAULT,
            "queue": OnyxCeleryQueues.SANDBOX,
        },
    },
]

if ENTERPRISE_EDITION_ENABLED:
    beat_task_templates.extend(
        [
            {
                "name": "check-for-doc-permissions-sync",
                "task": OnyxCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,
                "schedule": timedelta(seconds=30),
                "options": {
                    "priority": OnyxCeleryPriority.MEDIUM,
                    "expires": BEAT_EXPIRES_DEFAULT,
                },
            },
            {
                "name": "check-for-external-group-sync",
                "task": OnyxCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,
                "schedule": timedelta(seconds=20),
                "options": {
                    "priority": OnyxCeleryPriority.MEDIUM,
                    "expires": BEAT_EXPIRES_DEFAULT,
                },
            },
        ]
    )

# Add the Auto LLM update task if the config URL is set (has a default)
if AUTO_LLM_CONFIG_URL:
    beat_task_templates.append(
        {
            "name": "check-for-auto-llm-update",
            "task": OnyxCeleryTask.CHECK_FOR_AUTO_LLM_UPDATE,
            "schedule": timedelta(seconds=AUTO_LLM_UPDATE_INTERVAL_SECONDS),
            "options": {
                "priority": OnyxCeleryPriority.LOW,
                "expires": BEAT_EXPIRES_DEFAULT,
            },
        }
    )

# Add scheduled eval task if datasets are configured
if SCHEDULED_EVAL_DATASET_NAMES:
    beat_task_templates.append(
        {
            "name": "scheduled-eval-pipeline",
            "task": OnyxCeleryTask.SCHEDULED_EVAL_TASK,
            # run every Sunday at midnight UTC
            "schedule": crontab(
                hour=0,
                minute=0,
                day_of_week=0,
            ),
            "options": {
                "priority": OnyxCeleryPriority.LOW,
                "expires": BEAT_EXPIRES_DEFAULT,
            },
        }
    )

# Add OpenSearch migration task if enabled.
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
    beat_task_templates.append(
        {
            "name": "migrate-chunks-from-vespa-to-opensearch",
            "task": OnyxCeleryTask.MIGRATE_CHUNKS_FROM_VESPA_TO_OPENSEARCH_TASK,
            # Try to enqueue an invocation of this task with this frequency.
            "schedule": timedelta(seconds=120),  # 2 minutes
            "options": {
                "priority": OnyxCeleryPriority.LOW,
                # If the task was not dequeued in this time, revoke it.
                "expires": BEAT_EXPIRES_DEFAULT,
                "queue": OnyxCeleryQueues.OPENSEARCH_MIGRATION,
            },
        }
    )


# Beat task names that require a vector DB. Filtered out when DISABLE_VECTOR_DB.
_VECTOR_DB_BEAT_TASK_NAMES: set[str] = {
    "check-for-indexing",
    "check-for-connector-deletion",
    "check-for-vespa-sync",
    "check-for-pruning",
    "check-for-hierarchy-fetching",
    "check-for-checkpoint-cleanup",
    "check-for-index-attempt-cleanup",
    "check-for-doc-permissions-sync",
    "check-for-external-group-sync",
    "migrate-chunks-from-vespa-to-opensearch",
}

if DISABLE_VECTOR_DB:
    beat_task_templates = [
        t for t in beat_task_templates if t["name"] not in _VECTOR_DB_BEAT_TASK_NAMES
    ]


def make_cloud_generator_task(task: dict[str, Any]) -> dict[str, Any]:
    cloud_task: dict[str, Any] = {}

    # constant options for cloud beat task generators
    task_schedule: timedelta = task["schedule"]
    cloud_task["schedule"] = task_schedule
    cloud_task["options"] = {}
    cloud_task["options"]["priority"] = OnyxCeleryPriority.HIGHEST
    cloud_task["options"]["expires"] = BEAT_EXPIRES_DEFAULT

    # settings dependent on the original task
    cloud_task["name"] = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_{task['name']}"
    cloud_task["task"] = OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR
    cloud_task["kwargs"] = {}
    cloud_task["kwargs"]["task_name"] = task["task"]

    optional_fields = ["queue", "priority", "expires"]
    for field in optional_fields:
        if field in task["options"]:
            cloud_task["kwargs"][field] = task["options"][field]

    return cloud_task


# tasks that only run in the cloud and are system wide
# the name attribute must start with ONYX_CLOUD_CELERY_TASK_PREFIX = "cloud" to be seen
# by the DynamicTenantScheduler as system wide task and not a per tenant task
beat_cloud_tasks: list[dict] = [
    # cloud specific tasks
    {
        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-alembic",
        "task": OnyxCeleryTask.CLOUD_MONITOR_ALEMBIC,
        "schedule": timedelta(hours=1),
        "options": {
            "queue": OnyxCeleryQueues.MONITORING,
            "priority": OnyxCeleryPriority.HIGH,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-celery-queues",
        "task": OnyxCeleryTask.CLOUD_MONITOR_CELERY_QUEUES,
        "schedule": timedelta(seconds=30),
        "options": {
            "queue": OnyxCeleryQueues.MONITORING,
            "priority": OnyxCeleryPriority.HIGH,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-available-tenants",
        "task": OnyxCeleryTask.CLOUD_CHECK_AVAILABLE_TENANTS,
        "schedule": timedelta(minutes=10),
        "options": {
            "queue": OnyxCeleryQueues.MONITORING,
            "priority": OnyxCeleryPriority.HIGH,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
    {
        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-celery-pidbox",
        "task": OnyxCeleryTask.CLOUD_MONITOR_CELERY_PIDBOX,
        "schedule": timedelta(hours=4),
        "options": {
            "queue": OnyxCeleryQueues.MONITORING,
            "priority": OnyxCeleryPriority.HIGH,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
]

# tasks that only run self hosted
tasks_to_schedule: list[dict] = []
if not MULTI_TENANT:
    tasks_to_schedule.extend(
        [
            {
                "name": "monitor-celery-queues",
                "task": OnyxCeleryTask.MONITOR_CELERY_QUEUES,
                "schedule": timedelta(seconds=10),
                "options": {
                    "priority": OnyxCeleryPriority.MEDIUM,
                    "expires": BEAT_EXPIRES_DEFAULT,
                    "queue": OnyxCeleryQueues.MONITORING,
                },
            },
            {
                "name": "monitor-process-memory",
                "task": OnyxCeleryTask.MONITOR_PROCESS_MEMORY,
                "schedule": timedelta(minutes=5),
                "options": {
                    "priority": OnyxCeleryPriority.LOW,
                    "expires": BEAT_EXPIRES_DEFAULT,
                    "queue": OnyxCeleryQueues.MONITORING,
                },
            },
            {
                "name": "celery-beat-heartbeat",
                "task": OnyxCeleryTask.CELERY_BEAT_HEARTBEAT,
                "schedule": timedelta(minutes=1),
                "options": {
                    "priority": OnyxCeleryPriority.HIGHEST,
                    "expires": BEAT_EXPIRES_DEFAULT,
                    "queue": OnyxCeleryQueues.PRIMARY,
                },
            },
        ]
    )

    tasks_to_schedule.extend(beat_task_templates)


def generate_cloud_tasks(
    beat_tasks: list[dict], beat_templates: list[dict], beat_multiplier: float
) -> list[dict[str, Any]]:
    """
    beat_tasks: system wide tasks that can be sent as is
    beat_templates: task templates that will be transformed into per tenant tasks via
    the cloud_beat_task_generator
    beat_multiplier: a multiplier that can be applied on top of the task schedule
    to speed up or slow down the task generation rate. useful in production.

    Returns a list of cloud tasks, which consists of incoming tasks + tasks generated
    from incoming templates.
    """

    if beat_multiplier <= 0:
        raise ValueError("beat_multiplier must be positive!")

    cloud_tasks: list[dict] = []

    # generate our tenant aware cloud tasks from the templates
    for beat_template in beat_templates:
        cloud_task = make_cloud_generator_task(beat_template)
        cloud_tasks.append(cloud_task)

    # factor in the cloud multiplier for the above
    for cloud_task in cloud_tasks:
        cloud_task["schedule"] = cloud_task["schedule"] * beat_multiplier

    # add the fixed cloud/system beat tasks. No multiplier for these.
    cloud_tasks.extend(copy.deepcopy(beat_tasks))
    return cloud_tasks


def get_cloud_tasks_to_schedule(beat_multiplier: float) -> list[dict[str, Any]]:
    return generate_cloud_tasks(beat_cloud_tasks, beat_task_templates, beat_multiplier)


def get_tasks_to_schedule() -> list[dict[str, Any]]:
    return tasks_to_schedule


================================================
FILE: backend/onyx/background/celery/tasks/connector_deletion/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/connector_deletion/tasks.py
================================================
import traceback
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast

from celery import Celery
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from pydantic import ValidationError
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_get_broker_client
from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_redis import celery_get_queued_task_ids
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import OnyxRedisSignals
from onyx.db.connector import fetch_connector_by_id
from onyx.db.connector_credential_pair import add_deletion_failure_message
from onyx.db.connector_credential_pair import (
    delete_connector_credential_pair__no_commit,
)
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.document import (
    delete_all_documents_by_connector_credential_pair__no_commit,
)
from onyx.db.document import get_document_ids_for_connector_credential_pair
from onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.index_attempt import delete_index_attempts
from onyx.db.index_attempt import get_recent_attempts_for_cc_pair
from onyx.db.permission_sync_attempt import (
    delete_doc_permission_sync_attempts__no_commit,
)
from onyx.db.permission_sync_attempt import (
    delete_external_group_permission_sync_attempts__no_commit,
)
from onyx.db.search_settings import get_all_search_settings
from onyx.db.sync_record import cleanup_sync_records
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.db.tag import delete_orphan_tags__no_commit
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_delete import RedisConnectorDelete
from onyx.redis.redis_connector_delete import RedisConnectorDeletePayload
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)
from onyx.utils.variable_functionality import noop_fallback


class TaskDependencyError(RuntimeError):
    """Raised to the caller to indicate dependent tasks are running that would interfere
    with connector deletion."""


def revoke_tasks_blocking_deletion(
    redis_connector: RedisConnector, db_session: Session, app: Celery
) -> None:
    search_settings_list = get_all_search_settings(db_session)
    for search_settings in search_settings_list:
        try:
            recent_index_attempts = get_recent_attempts_for_cc_pair(
                cc_pair_id=redis_connector.cc_pair_id,
                search_settings_id=search_settings.id,
                limit=1,
                db_session=db_session,
            )
            if (
                recent_index_attempts
                and recent_index_attempts[0].status == IndexingStatus.IN_PROGRESS
                and recent_index_attempts[0].celery_task_id
            ):
                app.control.revoke(recent_index_attempts[0].celery_task_id)
                task_logger.info(
                    f"Revoked indexing task {recent_index_attempts[0].celery_task_id}."
                )
        except Exception:
            task_logger.exception("Exception while revoking indexing task")

    try:
        permissions_sync_payload = redis_connector.permissions.payload
        if permissions_sync_payload and permissions_sync_payload.celery_task_id:
            app.control.revoke(permissions_sync_payload.celery_task_id)
            task_logger.info(
                f"Revoked permissions sync task {permissions_sync_payload.celery_task_id}."
            )
    except Exception:
        task_logger.exception("Exception while revoking pruning task")

    try:
        prune_payload = redis_connector.prune.payload
        if prune_payload and prune_payload.celery_task_id:
            app.control.revoke(prune_payload.celery_task_id)
            task_logger.info(f"Revoked pruning task {prune_payload.celery_task_id}.")
    except Exception:
        task_logger.exception("Exception while revoking permissions sync task")

    try:
        external_group_sync_payload = redis_connector.external_group_sync.payload
        if external_group_sync_payload and external_group_sync_payload.celery_task_id:
            app.control.revoke(external_group_sync_payload.celery_task_id)
            task_logger.info(
                f"Revoked external group sync task {external_group_sync_payload.celery_task_id}."
            )
    except Exception:
        task_logger.exception("Exception while revoking external group sync task")


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    trail=False,
    bind=True,
)
def check_for_connector_deletion_task(self: Task, *, tenant_id: str) -> bool | None:
    r = get_redis_client()
    r_replica = get_redis_replica_client()

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # Prevent this task from overlapping with itself
    if not lock_beat.acquire(blocking=False):
        return None

    try:
        # we want to run this less frequently than the overall task
        lock_beat.reacquire()
        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES):
            # clear fences that don't have associated celery tasks in progress
            try:
                r_celery = celery_get_broker_client(self.app)
                validate_connector_deletion_fences(
                    tenant_id, r, r_replica, r_celery, lock_beat
                )
            except Exception:
                task_logger.exception(
                    "Exception while validating connector deletion fences"
                )

            r.set(OnyxRedisSignals.BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES, 1, ex=300)

        # collect cc_pair_ids
        cc_pair_ids: list[int] = []
        with get_session_with_current_tenant() as db_session:
            cc_pairs = get_connector_credential_pairs(db_session)
            for cc_pair in cc_pairs:
                cc_pair_ids.append(cc_pair.id)

        # try running cleanup on the cc_pair_ids
        for cc_pair_id in cc_pair_ids:
            with get_session_with_current_tenant() as db_session:
                redis_connector = RedisConnector(tenant_id, cc_pair_id)
                try:
                    try_generate_document_cc_pair_cleanup_tasks(
                        self.app, cc_pair_id, db_session, lock_beat, tenant_id
                    )
                except TaskDependencyError as e:
                    # this means we wanted to start deleting but dependent tasks were running
                    # on the first error, we set a stop signal and revoke the dependent tasks
                    # on subsequent errors, we hard reset blocking fences after our specified timeout
                    # is exceeded
                    task_logger.info(str(e))

                    if not redis_connector.stop.fenced:
                        # one time revoke of celery tasks
                        task_logger.info("Revoking any tasks blocking deletion.")
                        revoke_tasks_blocking_deletion(
                            redis_connector, db_session, self.app
                        )
                        redis_connector.stop.set_fence(True)
                        redis_connector.stop.set_timeout()
                    else:
                        # stop signal already set
                        if redis_connector.stop.timed_out:
                            # waiting too long, just reset blocking fences
                            task_logger.info(
                                "Timed out waiting for tasks blocking deletion. Resetting blocking fences."
                            )

                            redis_connector.prune.reset()
                            redis_connector.permissions.reset()
                            redis_connector.external_group_sync.reset()
                        else:
                            # just wait
                            pass
                else:
                    # clear the stop signal if it exists ... no longer needed
                    redis_connector.stop.set_fence(False)

        lock_beat.reacquire()
        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
        for key in keys:
            key_bytes = cast(bytes, key)

            if not r.exists(key_bytes):
                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
                continue

            key_str = key_bytes.decode("utf-8")
            if key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
                monitor_connector_deletion_taskset(tenant_id, key_bytes, r)
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception:
        task_logger.exception("Unexpected exception during connector deletion check")
    finally:
        if lock_beat.owned():
            lock_beat.release()

    return True


def try_generate_document_cc_pair_cleanup_tasks(
    app: Celery,
    cc_pair_id: int,
    db_session: Session,
    lock_beat: RedisLock,
    tenant_id: str,
) -> int | None:
    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
    Note that syncing can still be required even if the number of sync tasks generated is zero.
    Returns None if no syncing is required.

    Will raise TaskDependencyError if dependent tasks such as indexing and pruning are
    still running. In our case, the caller reacts by setting a stop signal in Redis to
    exit those tasks as quickly as possible.
    """

    lock_beat.reacquire()

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    # don't generate sync tasks if tasks are still pending
    if redis_connector.delete.fenced:
        return None

    # we need to load the state of the object inside the fence
    # to avoid a race condition with db.commit/fence deletion
    # at the end of this taskset
    cc_pair = get_connector_credential_pair_from_id(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
    )
    if not cc_pair:
        return None

    if cc_pair.status != ConnectorCredentialPairStatus.DELETING:
        # there should be no in-progress sync records if this is up to date
        # clean it up just in case things got into a bad state
        cleanup_sync_records(
            db_session=db_session,
            entity_id=cc_pair_id,
            sync_type=SyncType.CONNECTOR_DELETION,
        )
        return None

    # set a basic fence to start
    redis_connector.delete.set_active()
    fence_payload = RedisConnectorDeletePayload(
        num_tasks=None,
        submitted=datetime.now(timezone.utc),
    )

    redis_connector.delete.set_fence(fence_payload)

    try:
        # do not proceed if connector indexing or connector pruning are running
        search_settings_list = get_all_search_settings(db_session)
        for search_settings in search_settings_list:
            recent_index_attempts = get_recent_attempts_for_cc_pair(
                cc_pair_id=cc_pair_id,
                search_settings_id=search_settings.id,
                limit=1,
                db_session=db_session,
            )
            if (
                recent_index_attempts
                and recent_index_attempts[0].status == IndexingStatus.IN_PROGRESS
            ):
                raise TaskDependencyError(
                    "Connector deletion - Delayed (indexing in progress): "
                    f"cc_pair={cc_pair_id} "
                    f"search_settings={search_settings.id}"
                )

        if redis_connector.prune.fenced:
            raise TaskDependencyError(
                f"Connector deletion - Delayed (pruning in progress): cc_pair={cc_pair_id}"
            )

        if redis_connector.permissions.fenced:
            raise TaskDependencyError(
                f"Connector deletion - Delayed (permissions in progress): cc_pair={cc_pair_id}"
            )

        # add tasks to celery and build up the task set to monitor in redis
        redis_connector.delete.taskset_clear()

        # Add all documents that need to be updated into the queue
        task_logger.info(
            f"RedisConnectorDeletion.generate_tasks starting. cc_pair={cc_pair_id}"
        )
        tasks_generated = redis_connector.delete.generate_tasks(
            app, db_session, lock_beat
        )
        if tasks_generated is None:
            raise ValueError("RedisConnectorDeletion.generate_tasks returned None")

        try:
            insert_sync_record(
                db_session=db_session,
                entity_id=cc_pair_id,
                sync_type=SyncType.CONNECTOR_DELETION,
            )
        except Exception:
            task_logger.exception("insert_sync_record exceptioned.")

    except TaskDependencyError:
        redis_connector.delete.set_fence(None)
        raise
    except Exception:
        task_logger.exception("Unexpected exception")
        redis_connector.delete.set_fence(None)
        return None
    else:
        # Currently we are allowing the sync to proceed with 0 tasks.
        # It's possible for sets/groups to be generated initially with no entries
        # and they still need to be marked as up to date.
        # if tasks_generated == 0:
        #     return 0

        task_logger.info(
            f"RedisConnectorDeletion.generate_tasks finished. cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
        )

        # set this only after all tasks have been added
        fence_payload.num_tasks = tasks_generated
        redis_connector.delete.set_fence(fence_payload)

    return tasks_generated


def monitor_connector_deletion_taskset(
    tenant_id: str,
    key_bytes: bytes,
    r: Redis,  # noqa: ARG001
) -> None:
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
        task_logger.warning(f"could not parse cc_pair_id from {fence_key}")
        return

    cc_pair_id = int(cc_pair_id_str)

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    fence_data = redis_connector.delete.payload
    if not fence_data:
        task_logger.warning(
            f"Connector deletion - fence payload invalid: cc_pair={cc_pair_id}"
        )
        return

    if fence_data.num_tasks is None:
        # the fence is setting up but isn't ready yet
        return

    remaining = redis_connector.delete.get_remaining()
    task_logger.info(
        f"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}"
    )
    if remaining > 0:
        with get_session_with_current_tenant() as db_session:
            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
                sync_type=SyncType.CONNECTOR_DELETION,
                sync_status=SyncStatus.IN_PROGRESS,
                num_docs_synced=remaining,
            )
        return

    with get_session_with_current_tenant() as db_session:
        cc_pair = get_connector_credential_pair_from_id(
            db_session=db_session,
            cc_pair_id=cc_pair_id,
        )
        credential_id_to_delete: int | None = None
        connector_id_to_delete: int | None = None
        if not cc_pair:
            task_logger.warning(
                f"Connector deletion - cc_pair not found: cc_pair={cc_pair_id}"
            )
            return

        try:
            doc_ids = get_document_ids_for_connector_credential_pair(
                db_session, cc_pair.connector_id, cc_pair.credential_id
            )
            if len(doc_ids) > 0:
                # NOTE(rkuo): if this happens, documents somehow got added while
                # deletion was in progress. Likely a bug gating off pruning and indexing
                # work before deletion starts.
                task_logger.warning(
                    "Connector deletion - documents still found after taskset completion. "
                    "Clearing the current deletion attempt and allowing deletion to restart: "
                    f"cc_pair={cc_pair_id} "
                    f"docs_deleted={fence_data.num_tasks} "
                    f"docs_remaining={len(doc_ids)}"
                )

                # We don't want to waive off why we get into this state, but resetting
                # our attempt and letting the deletion restart is a good way to recover
                redis_connector.delete.reset()
                raise RuntimeError(
                    "Connector deletion - documents still found after taskset completion"
                )

            # clean up the rest of the related Postgres entities
            # index attempts
            delete_index_attempts(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
            )

            # permission sync attempts
            delete_doc_permission_sync_attempts__no_commit(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
            )
            delete_external_group_permission_sync_attempts__no_commit(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
            )

            # document sets
            delete_document_set_cc_pair_relationship__no_commit(
                db_session=db_session,
                connector_id=cc_pair.connector_id,
                credential_id=cc_pair.credential_id,
            )

            # user groups
            cleanup_user_groups = fetch_versioned_implementation_with_fallback(
                "onyx.db.user_group",
                "delete_user_group_cc_pair_relationship__no_commit",
                noop_fallback,
            )
            cleanup_user_groups(
                cc_pair_id=cc_pair_id,
                db_session=db_session,
            )

            # delete orphan tags
            delete_orphan_tags__no_commit(db_session)

            # Store IDs before potentially expiring cc_pair
            connector_id_to_delete = cc_pair.connector_id
            credential_id_to_delete = cc_pair.credential_id

            # Explicitly delete document by connector credential pair records before deleting the connector
            # This is needed because connector_id is a primary key in that table and cascading deletes won't work
            delete_all_documents_by_connector_credential_pair__no_commit(
                db_session=db_session,
                connector_id=connector_id_to_delete,
                credential_id=credential_id_to_delete,
            )

            # Flush to ensure document deletion happens before connector deletion
            db_session.flush()

            # Expire the cc_pair to ensure SQLAlchemy doesn't try to manage its state
            # related to the deleted DocumentByConnectorCredentialPair during commit
            db_session.expire(cc_pair)

            # finally, delete the cc-pair
            delete_connector_credential_pair__no_commit(
                db_session=db_session,
                connector_id=connector_id_to_delete,
                credential_id=credential_id_to_delete,
            )
            # if there are no credentials left, delete the connector
            connector = fetch_connector_by_id(
                db_session=db_session,
                connector_id=connector_id_to_delete,
            )
            if not connector or not len(connector.credentials):
                task_logger.info(
                    "Connector deletion - Found no credentials left for connector, deleting connector"
                )
                db_session.delete(connector)
            db_session.commit()

            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
                sync_type=SyncType.CONNECTOR_DELETION,
                sync_status=SyncStatus.SUCCESS,
                num_docs_synced=fence_data.num_tasks,
            )

        except Exception as e:
            db_session.rollback()
            stack_trace = traceback.format_exc()
            error_message = f"Error: {str(e)}\n\nStack Trace:\n{stack_trace}"
            add_deletion_failure_message(db_session, cc_pair_id, error_message)

            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
                sync_type=SyncType.CONNECTOR_DELETION,
                sync_status=SyncStatus.FAILED,
                num_docs_synced=fence_data.num_tasks,
            )

            task_logger.exception(
                f"Connector deletion exceptioned: "
                f"cc_pair={cc_pair_id} connector={connector_id_to_delete} credential={credential_id_to_delete}"
            )
            raise e

    task_logger.info(
        f"Connector deletion succeeded: "
        f"cc_pair={cc_pair_id} "
        f"connector={connector_id_to_delete} "
        f"credential={credential_id_to_delete} "
        f"docs_deleted={fence_data.num_tasks}"
    )

    redis_connector.delete.reset()


def validate_connector_deletion_fences(
    tenant_id: str,
    r: Redis,
    r_replica: Redis,
    r_celery: Redis,
    lock_beat: RedisLock,
) -> None:
    # building lookup table can be expensive, so we won't bother
    # validating until the queue is small
    CONNECTION_DELETION_VALIDATION_MAX_QUEUE_LEN = 1024

    queue_len = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)
    if queue_len > CONNECTION_DELETION_VALIDATION_MAX_QUEUE_LEN:
        return

    queued_upsert_tasks = celery_get_queued_task_ids(
        OnyxCeleryQueues.CONNECTOR_DELETION, r_celery
    )

    # validate all existing connector deletion jobs
    lock_beat.reacquire()
    keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
    for key in keys:
        key_bytes = cast(bytes, key)
        key_str = key_bytes.decode("utf-8")
        if not key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
            continue

        validate_connector_deletion_fence(
            tenant_id,
            key_bytes,
            queued_upsert_tasks,
            r,
        )

        lock_beat.reacquire()

    return


def validate_connector_deletion_fence(
    tenant_id: str,
    key_bytes: bytes,
    queued_upsert_tasks: set[str],
    r: Redis,
) -> None:
    """Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.
    This can happen if the indexing worker hard crashes or is terminated.
    Being in this bad state means the fence will never clear without help, so this function
    gives the help.

    How this works:
    1. This function renews the active signal with a 5 minute TTL under the following conditions
    1.2. When the task is seen in the redis queue
    1.3. When the task is seen in the reserved / prefetched list

    2. Externally, the active signal is renewed when:
    2.1. The fence is created
    2.2. The indexing watchdog checks the spawned task.

    3. The TTL allows us to get through the transitions on fence startup
    and when the task starts executing.

    More TTL clarification: it is seemingly impossible to exactly query Celery for
    whether a task is in the queue or currently executing.
    1. An unknown task id is always returned as state PENDING.
    2. Redis can be inspected for the task id, but the task id is gone between the time a worker receives the task
    and the time it actually starts on the worker.

    queued_tasks: the celery queue of lightweight permission sync tasks
    reserved_tasks: prefetched tasks for sync task generator
    """
    # if the fence doesn't exist, there's nothing to do
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
        task_logger.warning(
            f"validate_connector_deletion_fence - could not parse id from {fence_key}"
        )
        return

    cc_pair_id = int(cc_pair_id_str)
    # parse out metadata and initialize the helper class with it
    redis_connector = RedisConnector(tenant_id, int(cc_pair_id))

    # check to see if the fence/payload exists
    if not redis_connector.delete.fenced:
        return

    # in the cloud, the payload format may have changed ...
    # it's a little sloppy, but just reset the fence for now if that happens
    # TODO: add intentional cleanup/abort logic
    try:
        payload = redis_connector.delete.payload
    except ValidationError:
        task_logger.exception(
            "validate_connector_deletion_fence - "
            "Resetting fence because fence schema is out of date: "
            f"cc_pair={cc_pair_id} "
            f"fence={fence_key}"
        )

        redis_connector.delete.reset()
        return

    if not payload:
        return

    # OK, there's actually something for us to validate

    # look up every task in the current taskset in the celery queue
    # every entry in the taskset should have an associated entry in the celery task queue
    # because we get the celery tasks first, the entries in our own permissions taskset
    # should be roughly a subset of the tasks in celery

    # this check isn't very exact, but should be sufficient over a period of time
    # A single successful check over some number of attempts is sufficient.

    # TODO: if the number of tasks in celery is much lower than than the taskset length
    # we might be able to shortcut the lookup since by definition some of the tasks
    # must not exist in celery.

    tasks_scanned = 0
    tasks_not_in_celery = 0  # a non-zero number after completing our check is bad

    for member in r.sscan_iter(redis_connector.delete.taskset_key):
        tasks_scanned += 1

        member_bytes = cast(bytes, member)
        member_str = member_bytes.decode("utf-8")
        if member_str in queued_upsert_tasks:
            continue

        tasks_not_in_celery += 1

    task_logger.info(
        f"validate_connector_deletion_fence task check: tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
    )

    # we're active if there are still tasks to run and those tasks all exist in celery
    if tasks_scanned > 0 and tasks_not_in_celery == 0:
        redis_connector.delete.set_active()
        return

    # we may want to enable this check if using the active task list somehow isn't good enough
    # if redis_connector_index.generator_locked():
    #     logger.info(f"{payload.celery_task_id} is currently executing.")

    # if we get here, we didn't find any direct indication that the associated celery tasks exist,
    # but they still might be there due to gaps in our ability to check states during transitions
    # Checking the active signal safeguards us against these transition periods
    # (which has a duration that allows us to bridge those gaps)
    if redis_connector.delete.active():
        return

    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.
    task_logger.warning(
        "validate_connector_deletion_fence - "
        "Resetting fence because no associated celery tasks were found: "
        f"cc_pair={cc_pair_id} "
        f"fence={fence_key}"
    )

    redis_connector.delete.reset()
    return


================================================
FILE: backend/onyx/background/celery/tasks/docfetching/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/docfetching/task_creation_utils.py
================================================
from uuid import uuid4

from celery import Celery
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.index_attempt import mark_attempt_failed
from onyx.db.indexing_coordination import IndexingCoordination
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import SearchSettings


def try_creating_docfetching_task(
    celery_app: Celery,
    cc_pair: ConnectorCredentialPair,
    search_settings: SearchSettings,
    reindex: bool,
    db_session: Session,
    r: Redis,
    tenant_id: str,
) -> int | None:
    """Checks for any conditions that should block the indexing task from being
    created, then creates the task.

    Does not check for scheduling related conditions as this function
    is used to trigger indexing immediately.

    Now uses database-based coordination instead of Redis fencing.
    """

    LOCK_TIMEOUT = 30

    # we need to serialize any attempt to trigger indexing since it can be triggered
    # either via celery beat or manually (API call)
    lock: RedisLock = r.lock(
        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_creating_indexing_task",
        timeout=LOCK_TIMEOUT,
    )

    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)
    if not acquired:
        return None

    index_attempt_id = None
    try:
        # Basic status checks
        db_session.refresh(cc_pair)
        if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
            return None

        # Generate custom task ID for tracking
        custom_task_id = f"docfetching_{cc_pair.id}_{search_settings.id}_{uuid4()}"

        # Try to create a new index attempt using database coordination
        # This replaces the Redis fencing mechanism
        index_attempt_id = IndexingCoordination.try_create_index_attempt(
            db_session=db_session,
            cc_pair_id=cc_pair.id,
            search_settings_id=search_settings.id,
            celery_task_id=custom_task_id,
            from_beginning=reindex,
        )

        if index_attempt_id is None:
            # Another indexing attempt is already running
            return None

        # Use higher priority for first-time indexing to ensure new connectors
        # get processed before re-indexing of existing connectors
        has_successful_attempt = cc_pair.last_successful_index_time is not None
        priority = (
            OnyxCeleryPriority.MEDIUM
            if has_successful_attempt
            else OnyxCeleryPriority.HIGH
        )

        # Send the task to Celery
        result = celery_app.send_task(
            OnyxCeleryTask.CONNECTOR_DOC_FETCHING_TASK,
            kwargs=dict(
                index_attempt_id=index_attempt_id,
                cc_pair_id=cc_pair.id,
                search_settings_id=search_settings.id,
                tenant_id=tenant_id,
            ),
            queue=OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,
            task_id=custom_task_id,
            priority=priority,
        )
        if not result:
            raise RuntimeError("send_task for connector_doc_fetching_task failed.")

        task_logger.info(
            f"Created docfetching task: "
            f"cc_pair={cc_pair.id} "
            f"search_settings={search_settings.id} "
            f"attempt_id={index_attempt_id} "
            f"celery_task_id={custom_task_id}"
        )

        return index_attempt_id

    except Exception:
        task_logger.exception(
            f"try_creating_indexing_task - Unexpected exception: cc_pair={cc_pair.id} search_settings={search_settings.id}"
        )

        # Clean up on failure
        if index_attempt_id is not None:
            mark_attempt_failed(index_attempt_id, db_session)

        return None
    finally:
        if lock.owned():
            lock.release()

    return index_attempt_id


================================================
FILE: backend/onyx/background/celery/tasks/docfetching/tasks.py
================================================
import multiprocessing
import os
import time
import traceback
from time import sleep

import sentry_sdk
from celery import Celery
from celery import shared_task
from celery import Task

from onyx import __version__
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.memory_monitoring import emit_process_memory
from onyx.background.celery.tasks.docprocessing.heartbeat import start_heartbeat
from onyx.background.celery.tasks.docprocessing.heartbeat import stop_heartbeat
from onyx.background.celery.tasks.docprocessing.tasks import ConnectorIndexingLogBuilder
from onyx.background.celery.tasks.docprocessing.utils import IndexingCallback
from onyx.background.celery.tasks.models import DocProcessingContext
from onyx.background.celery.tasks.models import IndexingWatchdogTerminalStatus
from onyx.background.celery.tasks.models import SimpleJobResult
from onyx.background.indexing.job_client import SimpleJob
from onyx.background.indexing.job_client import SimpleJobClient
from onyx.background.indexing.job_client import SimpleJobException
from onyx.background.indexing.run_docfetching import run_docfetching_entrypoint
from onyx.configs.constants import CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import IndexingStatus
from onyx.db.index_attempt import get_index_attempt
from onyx.db.index_attempt import mark_attempt_canceled
from onyx.db.index_attempt import mark_attempt_failed
from onyx.db.indexing_coordination import IndexingCoordination
from onyx.redis.redis_connector import RedisConnector
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import global_version
from shared_configs.configs import SENTRY_DSN

logger = setup_logger()


def _verify_indexing_attempt(
    index_attempt_id: int,
    cc_pair_id: int,
    search_settings_id: int,
) -> None:
    """
    Verify that the indexing attempt exists and is in the correct state.
    """

    with get_session_with_current_tenant() as db_session:
        attempt = get_index_attempt(db_session, index_attempt_id)

        if not attempt:
            raise SimpleJobException(
                f"docfetching_task - IndexAttempt not found: attempt_id={index_attempt_id}",
                code=IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND.code,
            )

        if attempt.connector_credential_pair_id != cc_pair_id:
            raise SimpleJobException(
                f"docfetching_task - CC pair mismatch: expected={cc_pair_id} actual={attempt.connector_credential_pair_id}",
                code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,
            )

        if attempt.search_settings_id != search_settings_id:
            raise SimpleJobException(
                f"docfetching_task - Search settings mismatch: expected={search_settings_id} actual={attempt.search_settings_id}",
                code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,
            )

        if attempt.status not in [
            IndexingStatus.NOT_STARTED,
            IndexingStatus.IN_PROGRESS,
        ]:
            raise SimpleJobException(
                f"docfetching_task - Invalid attempt status: attempt_id={index_attempt_id} status={attempt.status}",
                code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,
            )

        # Check for cancellation
        if IndexingCoordination.check_cancellation_requested(
            db_session, index_attempt_id
        ):
            raise SimpleJobException(
                f"docfetching_task - Cancellation requested: attempt_id={index_attempt_id}",
                code=IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL.code,
            )

    logger.info(
        f"docfetching_task - IndexAttempt verified: "
        f"attempt_id={index_attempt_id} "
        f"cc_pair={cc_pair_id} "
        f"search_settings={search_settings_id}"
    )


def docfetching_task(
    app: Celery,
    index_attempt_id: int,
    cc_pair_id: int,
    search_settings_id: int,
    is_ee: bool,
    tenant_id: str,
) -> None:
    """
    This function is run in a SimpleJob as a new process. It is responsible for validating
    some stuff, but basically it just calls run_indexing_entrypoint.

    NOTE: if an exception is raised out of this task, the primary worker will detect
    that the task transitioned to a "READY" state but the generator_complete_key doesn't exist.
    This will cause the primary worker to abort the indexing attempt and clean up.
    """

    # Start heartbeat for this indexing attempt
    heartbeat_thread, stop_event = start_heartbeat(index_attempt_id)
    try:
        _docfetching_task(
            app, index_attempt_id, cc_pair_id, search_settings_id, is_ee, tenant_id
        )
    finally:
        stop_heartbeat(heartbeat_thread, stop_event)  # Stop heartbeat before exiting


def _docfetching_task(
    app: Celery,
    index_attempt_id: int,
    cc_pair_id: int,
    search_settings_id: int,
    is_ee: bool,
    tenant_id: str,
) -> None:
    # Since connector_indexing_proxy_task spawns a new process using this function as
    # the entrypoint, we init Sentry here.
    if SENTRY_DSN:
        sentry_sdk.init(
            dsn=SENTRY_DSN,
            traces_sample_rate=0.1,
            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
        logger.debug("Sentry DSN not provided, skipping Sentry initialization")

    logger.info(
        f"Indexing spawned task starting: "
        f"attempt={index_attempt_id} "
        f"tenant={tenant_id} "
        f"cc_pair={cc_pair_id} "
        f"search_settings={search_settings_id}"
    )

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    # TODO: remove all fences, cause all signals to be set in postgres
    if redis_connector.delete.fenced:
        raise SimpleJobException(
            f"Indexing will not start because connector deletion is in progress: "
            f"attempt={index_attempt_id} "
            f"cc_pair={cc_pair_id} "
            f"fence={redis_connector.delete.fence_key}",
            code=IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION.code,
        )

    if redis_connector.stop.fenced:
        raise SimpleJobException(
            f"Indexing will not start because a connector stop signal was detected: "
            f"attempt={index_attempt_id} "
            f"cc_pair={cc_pair_id} "
            f"fence={redis_connector.stop.fence_key}",
            code=IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL.code,
        )

    # Verify the indexing attempt exists and is valid
    # This replaces the Redis fence payload waiting
    _verify_indexing_attempt(index_attempt_id, cc_pair_id, search_settings_id)

    try:
        with get_session_with_current_tenant() as db_session:
            attempt = get_index_attempt(db_session, index_attempt_id)
            if not attempt:
                raise SimpleJobException(
                    f"Index attempt not found: index_attempt={index_attempt_id}",
                    code=IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH.code,
                )

            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
            )

            if not cc_pair:
                raise SimpleJobException(
                    f"cc_pair not found: cc_pair={cc_pair_id}",
                    code=IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH.code,
                )

        # define a callback class
        callback = IndexingCallback(
            redis_connector,
        )

        logger.info(
            f"Indexing spawned task running entrypoint: attempt={index_attempt_id} "
            f"tenant={tenant_id} "
            f"cc_pair={cc_pair_id} "
            f"search_settings={search_settings_id}"
        )

        # This is where the heavy/real work happens
        run_docfetching_entrypoint(
            app,
            index_attempt_id,
            tenant_id,
            cc_pair_id,
            is_ee,
            callback=callback,
        )

    except ConnectorValidationError:
        raise SimpleJobException(
            f"Indexing task failed: attempt={index_attempt_id} "
            f"tenant={tenant_id} "
            f"cc_pair={cc_pair_id} "
            f"search_settings={search_settings_id}",
            code=IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR.code,
        )

    except Exception as e:
        logger.exception(
            f"Indexing spawned task failed: attempt={index_attempt_id} "
            f"tenant={tenant_id} "
            f"cc_pair={cc_pair_id} "
            f"search_settings={search_settings_id}"
        )

        # special bulletproofing ... truncate long exception messages
        # for exception types that require more args, this will fail
        # thus the try/except
        try:
            sanitized_e = type(e)(str(e)[:1024])
            sanitized_e.__traceback__ = e.__traceback__
            raise sanitized_e
        except Exception:
            raise e

    logger.info(
        f"Indexing spawned task finished: attempt={index_attempt_id} cc_pair={cc_pair_id} search_settings={search_settings_id}"
    )
    os._exit(0)  # ensure process exits cleanly


def process_job_result(
    job: SimpleJob,
    connector_source: str | None,
    index_attempt_id: int,
    log_builder: ConnectorIndexingLogBuilder,
) -> SimpleJobResult:
    result = SimpleJobResult()
    result.connector_source = connector_source

    if job.process:
        result.exit_code = job.process.exitcode

    if job.status != "error":
        result.status = IndexingWatchdogTerminalStatus.SUCCEEDED
        return result

    ignore_exitcode = False

    # In EKS, there is an edge case where successful tasks return exit
    # code 1 in the cloud due to the set_spawn_method not sticking.
    # Workaround: check that the total number of batches is set, since this only
    # happens when docfetching completed successfully
    with get_session_with_current_tenant() as db_session:
        index_attempt = get_index_attempt(db_session, index_attempt_id)
        if index_attempt and index_attempt.total_batches is not None:
            ignore_exitcode = True

    if ignore_exitcode:
        result.status = IndexingWatchdogTerminalStatus.SUCCEEDED
        task_logger.warning(
            log_builder.build(
                "Indexing watchdog - spawned task has non-zero exit code but completion signal is OK. Continuing...",
                exit_code=str(result.exit_code),
            )
        )
    else:
        if result.exit_code is not None:
            result.status = IndexingWatchdogTerminalStatus.from_code(result.exit_code)

        job_level_exception = job.exception()
        result.exception_str = f"Docfetching returned exit code {result.exit_code} with exception: {job_level_exception}"

    return result


@shared_task(
    name=OnyxCeleryTask.CONNECTOR_DOC_FETCHING_TASK,
    bind=True,
    acks_late=False,
    track_started=True,
)
def docfetching_proxy_task(
    self: Task,
    index_attempt_id: int,
    cc_pair_id: int,
    search_settings_id: int,
    tenant_id: str,
) -> None:
    """
    This task is the entrypoint for the full indexing pipeline, which is composed of two tasks:
    docfetching and docprocessing.
    This task is spawned by "try_creating_indexing_task" which is called in the "check_for_indexing" task.

    This task spawns a new process for a new scheduled index attempt. That
    new process (which runs the docfetching_task function) does the following:

    1)  determines parameters of the indexing attempt (which connector indexing function to run,
        start and end time, from prev checkpoint or not), then run that connector. Specifically,
        connectors are responsible for reading data from an outside source and converting it to Onyx documents.
        At the moment these two steps (reading external data and converting to an Onyx document)
        are not parallelized in most connectors; that's a subject for future work.

    Each document batch produced by step 1 is stored in the file store, and a docprocessing task is spawned
    to process it. docprocessing involves the steps listed below.

    2) upserts documents to postgres (index_doc_batch_prepare)
    3) chunks each document (optionally adds context for contextual rag)
    4) embeds chunks (embed_chunks_with_failure_handling) via a call to the model server
    5) write chunks to vespa (write_chunks_to_vector_db_with_backoff)
    6) update document and indexing metadata in postgres
    7) pulls all document IDs from the source and compares those IDs to locally stored documents and deletes
    all locally stored IDs missing from the most recently pulled document ID list

    Some important notes:
    Invariants:
    - docfetching proxy tasks are spawned by check_for_indexing. The proxy then runs the docfetching_task wrapped in a watchdog.
      The watchdog is responsible for monitoring the docfetching_task and marking the index attempt as failed
      if it is not making progress.
    - All docprocessing tasks are spawned by a docfetching task.
    - all docfetching tasks, docprocessing tasks, and document batches in the file store are
      associated with a specific index attempt.
    - the index attempt status is the source of truth for what is currently happening with the index attempt.
      It is coupled with the creation/running of docfetching and docprocessing tasks as much as possible.

    How we deal with failures/ partial indexing:
    - non-checkpointed connectors/ new runs in general => delete the old document batches from the file store and do the new run
    - checkpointed connectors + resuming from checkpoint => reissue the old document batches and do a new run

    Misc:
    - most inter-process communication is handled in postgres, some is still in redis and we're trying to remove it
    - Heartbeat spawned in docfetching and docprocessing is how check_for_indexing monitors liveliness
    - progress based liveliness check: if nothing is done in 3-6 hours, mark the attempt as failed
    - TODO: task level timeouts (i.e. a connector stuck in an infinite loop)


    Comments below are from the old version and some may no longer be valid.
    TODO(rkuo): refactor this so that there is a single return path where we canonically
    log the result of running this function.

    Some more Richard notes:
    celery out of process task execution strategy is pool=prefork, but it uses fork,
    and forking is inherently unstable.

    To work around this, we use pool=threads and proxy our work to a spawned task.

    acks_late must be set to False. Otherwise, celery's visibility timeout will
    cause any task that runs longer than the timeout to be redispatched by the broker.
    There appears to be no good workaround for this, so we need to handle redispatching
    manually.
    NOTE: we try/except all db access in this function because as a watchdog, this function
    needs to be extremely stable.
    """
    # TODO: remove dependence on Redis
    start = time.monotonic()

    result = SimpleJobResult()

    ctx = DocProcessingContext(
        tenant_id=tenant_id,
        cc_pair_id=cc_pair_id,
        search_settings_id=search_settings_id,
        index_attempt_id=index_attempt_id,
    )

    log_builder = ConnectorIndexingLogBuilder(ctx)

    task_logger.info(
        log_builder.build(
            "Indexing watchdog - starting",
            mp_start_method=str(multiprocessing.get_start_method()),
        )
    )

    if not self.request.id:
        task_logger.error("self.request.id is None!")

    client = SimpleJobClient()
    task_logger.info(f"submitting docfetching_task with tenant_id={tenant_id}")

    job = client.submit(
        docfetching_task,
        self.app,
        index_attempt_id,
        cc_pair_id,
        search_settings_id,
        global_version.is_ee_version(),
        tenant_id,
    )

    if not job or not job.process:
        result.status = IndexingWatchdogTerminalStatus.SPAWN_FAILED
        task_logger.info(
            log_builder.build(
                "Indexing watchdog - finished",
                status=str(result.status.value),
                exit_code=str(result.exit_code),
            )
        )
        return

    # Ensure the process has moved out of the starting state
    num_waits = 0
    while True:
        if num_waits > 15:
            result.status = IndexingWatchdogTerminalStatus.SPAWN_NOT_ALIVE
            task_logger.info(
                log_builder.build(
                    "Indexing watchdog - finished",
                    status=str(result.status.value),
                    exit_code=str(result.exit_code),
                )
            )
            job.release()
            return

        if job.process.is_alive() or job.process.exitcode is not None:
            break

        sleep(1)
        num_waits += 1

    task_logger.info(
        log_builder.build(
            "Indexing watchdog - spawn succeeded",
            pid=str(job.process.pid),
        )
    )

    # Track the last time memory info was emitted
    last_memory_emit_time = 0.0

    try:
        with get_session_with_current_tenant() as db_session:
            index_attempt = get_index_attempt(
                db_session=db_session,
                index_attempt_id=index_attempt_id,
                eager_load_cc_pair=True,
            )
            if not index_attempt:
                raise RuntimeError("Index attempt not found")

            result.connector_source = (
                index_attempt.connector_credential_pair.connector.source.value
            )

        while True:
            sleep(5)

            time.monotonic()

            # if the job is done, clean up and break
            if job.done():
                try:
                    result = process_job_result(
                        job, result.connector_source, index_attempt_id, log_builder
                    )
                except Exception:
                    task_logger.exception(
                        log_builder.build(
                            "Indexing watchdog - spawned task exceptioned"
                        )
                    )
                finally:
                    job.release()
                    break

            # log the memory usage for tracking down memory leaks / connector-specific memory issues
            pid = job.process.pid
            if pid is not None:
                # Only emit memory info once per minute (60 seconds)
                current_time = time.monotonic()
                if current_time - last_memory_emit_time >= 60.0:
                    emit_process_memory(
                        pid,
                        "indexing_worker",
                        {
                            "cc_pair_id": cc_pair_id,
                            "search_settings_id": search_settings_id,
                            "index_attempt_id": index_attempt_id,
                        },
                    )
                    last_memory_emit_time = current_time

            # if the spawned task is still running, restart the check once again
            # if the index attempt is not in a finished status
            try:
                with get_session_with_current_tenant() as db_session:
                    index_attempt = get_index_attempt(
                        db_session=db_session, index_attempt_id=index_attempt_id
                    )

                    if not index_attempt:
                        continue

                    if not index_attempt.is_finished():
                        continue

            except Exception:
                task_logger.exception(
                    log_builder.build(
                        "Indexing watchdog - transient exception looking up index attempt"
                    )
                )
                continue

    except Exception as e:
        result.status = IndexingWatchdogTerminalStatus.WATCHDOG_EXCEPTIONED
        if isinstance(e, ConnectorValidationError):
            # No need to expose full stack trace for validation errors
            result.exception_str = str(e)
        else:
            result.exception_str = traceback.format_exc()

    # handle exit and reporting
    elapsed = time.monotonic() - start
    if result.exception_str is not None:
        # print with exception
        try:
            with get_session_with_current_tenant() as db_session:
                attempt = get_index_attempt(db_session, ctx.index_attempt_id)

                # only mark failures if not already terminal,
                # otherwise we're overwriting potential real stack traces
                if attempt and not attempt.status.is_terminal():
                    failure_reason = (
                        f"Spawned task exceptioned: exit_code={result.exit_code}"
                    )
                    mark_attempt_failed(
                        ctx.index_attempt_id,
                        db_session,
                        failure_reason=failure_reason,
                        full_exception_trace=result.exception_str,
                    )
        except Exception:
            task_logger.exception(
                log_builder.build(
                    "Indexing watchdog - transient exception marking index attempt as failed"
                )
            )

        normalized_exception_str = "None"
        if result.exception_str:
            normalized_exception_str = result.exception_str.replace(
                "\n", "\\n"
            ).replace('"', '\\"')

        task_logger.warning(
            log_builder.build(
                "Indexing watchdog - finished",
                source=result.connector_source,
                status=result.status.value,
                exit_code=str(result.exit_code),
                exception=f'"{normalized_exception_str}"',
                elapsed=f"{elapsed:.2f}s",
            )
        )
        raise RuntimeError(f"Exception encountered: traceback={result.exception_str}")

    # print without exception
    if result.status == IndexingWatchdogTerminalStatus.TERMINATED_BY_SIGNAL:
        try:
            with get_session_with_current_tenant() as db_session:
                logger.exception(
                    f"Marking attempt {index_attempt_id} as canceled due to termination signal"
                )
                mark_attempt_canceled(
                    index_attempt_id,
                    db_session,
                    "Connector termination signal detected",
                )
        except Exception:
            task_logger.exception(
                log_builder.build(
                    "Indexing watchdog - transient exception marking index attempt as canceled"
                )
            )

        job.cancel()
    elif result.status == IndexingWatchdogTerminalStatus.TERMINATED_BY_ACTIVITY_TIMEOUT:
        try:
            with get_session_with_current_tenant() as db_session:
                mark_attempt_failed(
                    index_attempt_id,
                    db_session,
                    "Indexing watchdog - activity timeout exceeded: "
                    f"attempt={index_attempt_id} "
                    f"timeout={CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT}s",
                )
        except Exception:
            logger.exception(
                log_builder.build(
                    "Indexing watchdog - transient exception marking index attempt as failed"
                )
            )
        job.cancel()
    else:
        pass

    task_logger.info(
        log_builder.build(
            "Indexing watchdog - finished",
            source=result.connector_source,
            status=str(result.status.value),
            exit_code=str(result.exit_code),
            elapsed=f"{elapsed:.2f}s",
        )
    )


================================================
FILE: backend/onyx/background/celery/tasks/docprocessing/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/docprocessing/heartbeat.py
================================================
import contextvars
import threading

from sqlalchemy import update

from onyx.configs.constants import INDEXING_WORKER_HEARTBEAT_INTERVAL
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import IndexAttempt
from onyx.utils.logger import setup_logger

logger = setup_logger()


def start_heartbeat(index_attempt_id: int) -> tuple[threading.Thread, threading.Event]:
    """Start a heartbeat thread for the given index attempt"""
    stop_event = threading.Event()

    def heartbeat_loop() -> None:
        while not stop_event.wait(INDEXING_WORKER_HEARTBEAT_INTERVAL):
            try:
                with get_session_with_current_tenant() as db_session:
                    db_session.execute(
                        update(IndexAttempt)
                        .where(IndexAttempt.id == index_attempt_id)
                        .values(heartbeat_counter=IndexAttempt.heartbeat_counter + 1)
                    )
                    db_session.commit()
            except Exception:
                logger.exception(
                    "Failed to update heartbeat counter for index attempt %s",
                    index_attempt_id,
                )

    # Ensure contextvars from the outer context are available in the thread
    context = contextvars.copy_context()
    thread = threading.Thread(target=context.run, args=(heartbeat_loop,), daemon=True)
    thread.start()
    return thread, stop_event


def stop_heartbeat(thread: threading.Thread, stop_event: threading.Event) -> None:
    """Stop the heartbeat thread"""
    stop_event.set()
    thread.join(timeout=5)  # Wait up to 5 seconds for clean shutdown


================================================
FILE: backend/onyx/background/celery/tasks/docprocessing/tasks.py
================================================
import gc
import os
import time
import traceback
from collections import defaultdict
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any

from celery import Celery
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from fastapi import HTTPException
from pydantic import BaseModel
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy import exists
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_find_task
from onyx.background.celery.celery_redis import celery_get_broker_client
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
from onyx.background.celery.memory_monitoring import emit_process_memory
from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
from onyx.background.celery.tasks.docfetching.task_creation_utils import (
    try_creating_docfetching_task,
)
from onyx.background.celery.tasks.docprocessing.heartbeat import start_heartbeat
from onyx.background.celery.tasks.docprocessing.heartbeat import stop_heartbeat
from onyx.background.celery.tasks.docprocessing.utils import IndexingCallback
from onyx.background.celery.tasks.docprocessing.utils import is_in_repeated_error_state
from onyx.background.celery.tasks.docprocessing.utils import should_index
from onyx.background.celery.tasks.models import DocProcessingContext
from onyx.background.indexing.checkpointing_utils import cleanup_checkpoint
from onyx.background.indexing.checkpointing_utils import (
    get_index_attempts_with_old_checkpoints,
)
from onyx.background.indexing.index_attempt_utils import cleanup_index_attempts
from onyx.background.indexing.index_attempt_utils import get_old_index_attempts
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import AuthType
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import OnyxRedisSignals
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import IndexAttemptMetadata
from onyx.db.connector import mark_ccpair_with_indexing_trigger
from onyx.db.connector_credential_pair import (
    fetch_indexable_standard_connector_credential_pair_ids,
)
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import set_cc_pair_repeated_error_state
from onyx.db.connector_credential_pair import update_connector_credential_pair_from_id
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingMode
from onyx.db.enums import IndexingStatus
from onyx.db.enums import SwitchoverType
from onyx.db.index_attempt import create_index_attempt_error
from onyx.db.index_attempt import get_index_attempt
from onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair
from onyx.db.index_attempt import IndexAttemptError
from onyx.db.index_attempt import mark_attempt_canceled
from onyx.db.index_attempt import mark_attempt_failed
from onyx.db.index_attempt import mark_attempt_partially_succeeded
from onyx.db.index_attempt import mark_attempt_succeeded
from onyx.db.indexing_coordination import CoordinationStatus
from onyx.db.indexing_coordination import INDEXING_PROGRESS_TIMEOUT_HOURS
from onyx.db.indexing_coordination import IndexingCoordination
from onyx.db.models import IndexAttempt
from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.document_index.factory import get_all_document_indices
from onyx.file_store.document_batch_storage import DocumentBatchStorage
from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.httpx.httpx_pool import HttpxPool
from onyx.indexing.adapters.document_indexing_adapter import (
    DocumentIndexingBatchAdapter,
)
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.indexing_pipeline import run_indexing_pipeline
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
from onyx.redis.redis_utils import is_fence
from onyx.server.runtime.onyx_runtime import OnyxRuntime
from onyx.utils.logger import setup_logger
from onyx.utils.middleware import make_randomized_onyx_request_id
from onyx.utils.telemetry import mt_cloud_telemetry
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
from shared_configs.configs import INDEXING_MODEL_SERVER_PORT
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import USAGE_LIMITS_ENABLED
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR

logger = setup_logger()

DOCPROCESSING_STALL_TIMEOUT_MULTIPLIER = 4
DOCPROCESSING_HEARTBEAT_TIMEOUT_MULTIPLIER = 24
# Heartbeat timeout: if no heartbeat received for 30 minutes, consider it dead
# This should be much longer than INDEXING_WORKER_HEARTBEAT_INTERVAL (30s)
HEARTBEAT_TIMEOUT_SECONDS = 30 * 60  # 30 minutes
INDEX_ATTEMPT_BATCH_SIZE = 500


def _get_fence_validation_block_expiration() -> int:
    """
    Compute the expiration time for the fence validation block signal.
    Base expiration is 60 seconds, multiplied by the beat multiplier only in MULTI_TENANT mode.
    """
    base_expiration = 60  # seconds

    if not MULTI_TENANT:
        return base_expiration

    try:
        beat_multiplier = OnyxRuntime.get_beat_multiplier()
    except Exception:
        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT

    return int(base_expiration * beat_multiplier)


def validate_active_indexing_attempts(
    lock_beat: RedisLock,
) -> None:
    """
    Validates that active indexing attempts are still alive by checking heartbeat.
    If no heartbeat has been received for a certain amount of time, mark the attempt as failed.

    This uses the heartbeat_counter field which is incremented by active worker threads
    every INDEXING_WORKER_HEARTBEAT_INTERVAL seconds.
    """
    logger.info("Validating active indexing attempts")

    with get_session_with_current_tenant() as db_session:
        # Find all active indexing attempts
        active_attempts = (
            db_session.execute(
                select(IndexAttempt).where(
                    IndexAttempt.status.in_([IndexingStatus.IN_PROGRESS]),
                    IndexAttempt.celery_task_id.isnot(None),
                )
            )
            .scalars()
            .all()
        )

        for attempt in active_attempts:
            lock_beat.reacquire()

            # Initialize timeout for each attempt to prevent state pollution
            heartbeat_timeout_seconds = HEARTBEAT_TIMEOUT_SECONDS

            # Double-check the attempt still exists and has the same status
            fresh_attempt = get_index_attempt(db_session, attempt.id)
            if not fresh_attempt or fresh_attempt.status.is_terminal():
                continue

            # Check if this attempt has been updated with heartbeat tracking
            if fresh_attempt.last_heartbeat_time is None:
                # First time seeing this attempt - initialize heartbeat tracking
                fresh_attempt.last_heartbeat_value = fresh_attempt.heartbeat_counter
                fresh_attempt.last_heartbeat_time = datetime.now(timezone.utc)
                db_session.commit()

                task_logger.info(
                    f"Initialized heartbeat tracking for attempt {fresh_attempt.id}: counter={fresh_attempt.heartbeat_counter}"
                )
                continue

            # Check if the heartbeat counter has advanced since last check
            current_counter = fresh_attempt.heartbeat_counter
            last_known_counter = fresh_attempt.last_heartbeat_value
            last_check_time = fresh_attempt.last_heartbeat_time

            task_logger.debug(
                f"Checking heartbeat for attempt {fresh_attempt.id}: "
                f"current_counter={current_counter} "
                f"last_known_counter={last_known_counter} "
                f"last_check_time={last_check_time}"
            )

            if current_counter > last_known_counter:
                # Heartbeat has advanced - worker is alive
                fresh_attempt.last_heartbeat_value = current_counter
                fresh_attempt.last_heartbeat_time = datetime.now(timezone.utc)
                db_session.commit()

                task_logger.debug(
                    f"Heartbeat advanced for attempt {fresh_attempt.id}: new_counter={current_counter}"
                )
                continue

            if fresh_attempt.total_batches and fresh_attempt.completed_batches == 0:
                heartbeat_timeout_seconds = (
                    HEARTBEAT_TIMEOUT_SECONDS
                    * DOCPROCESSING_HEARTBEAT_TIMEOUT_MULTIPLIER
                )
            cutoff_time = datetime.now(timezone.utc) - timedelta(
                seconds=heartbeat_timeout_seconds
            )

            # Heartbeat hasn't advanced - check if it's been too long
            if last_check_time >= cutoff_time:
                task_logger.debug(
                    f"Heartbeat hasn't advanced for attempt {fresh_attempt.id} but still within timeout window"
                )
                continue

            # No heartbeat for too long - mark as failed
            failure_reason = (
                f"No heartbeat received for {heartbeat_timeout_seconds} seconds"
            )

            task_logger.warning(
                f"Heartbeat timeout for attempt {fresh_attempt.id}: "
                f"last_heartbeat_time={last_check_time} "
                f"cutoff_time={cutoff_time} "
                f"counter={current_counter}"
            )

            try:
                mark_attempt_failed(
                    fresh_attempt.id,
                    db_session,
                    failure_reason=failure_reason,
                )

                task_logger.error(
                    f"Marked attempt {fresh_attempt.id} as failed due to heartbeat timeout"
                )

            except Exception:
                task_logger.exception(
                    f"Failed to mark attempt {fresh_attempt.id} as failed due to heartbeat timeout"
                )


class ConnectorIndexingLogBuilder:
    def __init__(self, ctx: DocProcessingContext):
        self.ctx = ctx

    def build(self, msg: str, **kwargs: Any) -> str:
        msg_final = (
            f"{msg}: "
            f"tenant_id={self.ctx.tenant_id} "
            f"attempt={self.ctx.index_attempt_id} "
            f"cc_pair={self.ctx.cc_pair_id} "
            f"search_settings={self.ctx.search_settings_id}"
        )

        # Append extra keyword arguments in logfmt style
        if kwargs:
            extra_logfmt = " ".join(f"{key}={value}" for key, value in kwargs.items())
            msg_final = f"{msg_final} {extra_logfmt}"

        return msg_final


def monitor_indexing_attempt_progress(
    attempt: IndexAttempt, tenant_id: str, db_session: Session, task: Task
) -> None:
    """
    TODO: rewrite this docstring
    Monitor the progress of an indexing attempt using database coordination.
    This replaces the Redis fence-based monitoring.

    Race condition handling:
    - Uses database coordination status to track progress
    - Only updates CC pair status based on confirmed database state
    - Handles concurrent completion gracefully
    """
    if not attempt.celery_task_id:
        # Attempt hasn't been assigned a task yet
        return

    cc_pair = get_connector_credential_pair_from_id(
        db_session, attempt.connector_credential_pair_id
    )
    if not cc_pair:
        task_logger.warning(f"CC pair not found for attempt {attempt.id}")
        return

    # Check if the CC Pair should be moved to INITIAL_INDEXING
    if cc_pair.status == ConnectorCredentialPairStatus.SCHEDULED:
        cc_pair.status = ConnectorCredentialPairStatus.INITIAL_INDEXING
        db_session.commit()

    # Get coordination status to track progress

    coordination_status = IndexingCoordination.get_coordination_status(
        db_session, attempt.id
    )

    current_db_time = get_db_current_time(db_session)
    total_batches: int | str = (
        coordination_status.total_batches
        if coordination_status.total_batches is not None
        else "?"
    )
    if coordination_status.found:
        task_logger.info(
            f"Indexing attempt progress: "
            f"attempt={attempt.id} "
            f"cc_pair={attempt.connector_credential_pair_id} "
            f"search_settings={attempt.search_settings_id} "
            f"completed_batches={coordination_status.completed_batches} "
            f"total_batches={total_batches} "
            f"total_docs={coordination_status.total_docs} "
            f"total_failures={coordination_status.total_failures}"
            f"elapsed={(current_db_time - attempt.time_created).seconds}"
        )

    if coordination_status.cancellation_requested:
        task_logger.info(f"Indexing attempt {attempt.id} has been cancelled")
        mark_attempt_canceled(attempt.id, db_session)
        return

    storage = get_document_batch_storage(
        attempt.connector_credential_pair_id, attempt.id
    )

    # Check task completion using Celery
    try:
        check_indexing_completion(
            attempt.id, coordination_status, storage, tenant_id, task
        )
    except Exception as e:
        logger.exception(
            f"Failed to monitor document processing completion: attempt={attempt.id} error={str(e)}"
        )

        # Mark the attempt as failed if monitoring fails
        try:
            with get_session_with_current_tenant() as db_session:
                mark_attempt_failed(
                    attempt.id,
                    db_session,
                    failure_reason=f"Processing monitoring failed: {str(e)}",
                    full_exception_trace=traceback.format_exc(),
                )

        except Exception:
            logger.exception("Failed to mark attempt as failed")

        # Try to clean up storage
        try:
            logger.info(f"Cleaning up storage after monitoring failure: {storage}")
            storage.cleanup_all_batches()
        except Exception:
            logger.exception("Failed to cleanup storage after monitoring failure")


def _resolve_indexing_entity_errors(
    cc_pair_id: int,
    db_session: Session,
) -> None:
    unresolved_errors = get_index_attempt_errors_for_cc_pair(
        cc_pair_id=cc_pair_id,
        unresolved_only=True,
        db_session=db_session,
    )
    for error in unresolved_errors:
        if error.entity_id:
            error.is_resolved = True
            db_session.add(error)
    db_session.commit()


def check_indexing_completion(
    index_attempt_id: int,
    coordination_status: CoordinationStatus,
    storage: DocumentBatchStorage,
    tenant_id: str,
    task: Task,
) -> None:

    logger.info(
        f"Checking for indexing completion: attempt={index_attempt_id} tenant={tenant_id}"
    )

    # Check if indexing is complete and all batches are processed
    batches_total = coordination_status.total_batches
    batches_processed = coordination_status.completed_batches
    indexing_completed = (
        batches_total is not None and batches_processed >= batches_total
    )

    logger.info(
        f"Indexing status: "
        f"indexing_completed={indexing_completed} "
        f"batches_processed={batches_processed}/{batches_total if batches_total is not None else '?'} "
        f"total_docs={coordination_status.total_docs} "
        f"total_chunks={coordination_status.total_chunks} "
        f"total_failures={coordination_status.total_failures}"
    )

    # Update progress tracking and check for stalls
    with get_session_with_current_tenant() as db_session:
        stalled_timeout_hours = INDEXING_PROGRESS_TIMEOUT_HOURS
        # Index attempts that are waiting between docfetching and
        # docprocessing get a generous stalling timeout
        if batches_total is not None and batches_processed == 0:
            stalled_timeout_hours = (
                stalled_timeout_hours * DOCPROCESSING_STALL_TIMEOUT_MULTIPLIER
            )

        timed_out = not IndexingCoordination.update_progress_tracking(
            db_session,
            index_attempt_id,
            batches_processed,
            timeout_hours=stalled_timeout_hours,
        )

        # Check for stalls (3-6 hour timeout). Only applies to in-progress attempts.
        attempt = get_index_attempt(db_session, index_attempt_id)
        if attempt and timed_out:
            if attempt.status == IndexingStatus.IN_PROGRESS:
                logger.error(
                    f"Indexing attempt {index_attempt_id} has been indexing for "
                    f"{stalled_timeout_hours // 2}-{stalled_timeout_hours} hours without progress. "
                    f"Marking it as failed."
                )
                mark_attempt_failed(
                    index_attempt_id, db_session, failure_reason="Stalled indexing"
                )
            elif (
                attempt.status == IndexingStatus.NOT_STARTED and attempt.celery_task_id
            ):
                # Check if the task exists in the celery queue
                # This handles the case where Redis dies after task creation but before task execution
                redis_celery = celery_get_broker_client(task.app)
                task_exists = celery_find_task(
                    attempt.celery_task_id,
                    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,
                    redis_celery,
                )
                unacked_task_ids = celery_get_unacked_task_ids(
                    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING, redis_celery
                )

                if not task_exists and attempt.celery_task_id not in unacked_task_ids:
                    # there is a race condition where the docfetching task has been taken off
                    # the queues (i.e. started) but the indexing attempt still has a status of
                    # Not Started because the switch to in progress takes like 0.1 seconds.
                    # sleep a bit and confirm that the attempt is still not in progress.
                    time.sleep(1)
                    attempt = get_index_attempt(db_session, index_attempt_id)
                    if attempt and attempt.status == IndexingStatus.NOT_STARTED:
                        logger.error(
                            f"Task {attempt.celery_task_id} attached to indexing attempt "
                            f"{index_attempt_id} does not exist in the queue. "
                            f"Marking indexing attempt as failed."
                        )
                        mark_attempt_failed(
                            index_attempt_id,
                            db_session,
                            failure_reason="Task not in queue",
                        )
            else:
                logger.info(
                    f"Indexing attempt {index_attempt_id} is {attempt.status}. 3-6 hours without heartbeat "
                    "but task is in the queue. Likely underprovisioned docfetching worker."
                )
                # Update last progress time so we won't time out again for another 3 hours
                IndexingCoordination.update_progress_tracking(
                    db_session,
                    index_attempt_id,
                    batches_processed,
                    force_update_progress=True,
                )

    # check again on the next check_for_indexing task
    # TODO: on the cloud this is currently 25 minutes at most, which
    # is honestly too slow. We should either increase the frequency of
    # this task or change where we check for completion.
    if not indexing_completed:
        return

    # If processing is complete, handle completion
    logger.info(f"Connector indexing finished for index attempt {index_attempt_id}.")

    # All processing is complete
    total_failures = coordination_status.total_failures

    with get_session_with_current_tenant() as db_session:
        if total_failures == 0:
            attempt = mark_attempt_succeeded(index_attempt_id, db_session)
            logger.info(f"Index attempt {index_attempt_id} completed successfully")
        else:
            attempt = mark_attempt_partially_succeeded(index_attempt_id, db_session)
            logger.info(
                f"Index attempt {index_attempt_id} completed with {total_failures} failures"
            )

        # Update CC pair status if successful
        cc_pair = get_connector_credential_pair_from_id(
            db_session, attempt.connector_credential_pair_id
        )
        if cc_pair is None:
            raise RuntimeError(
                f"CC pair {attempt.connector_credential_pair_id} not found in database"
            )

        if attempt.status.is_successful():
            # NOTE: we define the last successful index time as the time the last successful
            # attempt finished. This is distinct from the poll_range_end of the last successful
            # attempt, which is the time up to which documents have been fetched.
            cc_pair.last_successful_index_time = attempt.time_updated
            if cc_pair.status in [
                ConnectorCredentialPairStatus.SCHEDULED,
                ConnectorCredentialPairStatus.INITIAL_INDEXING,
            ]:
                # User file connectors must be paused on success
                # NOTE: _run_indexing doesn't update connectors if the index attempt is the future embedding model
                cc_pair.status = ConnectorCredentialPairStatus.ACTIVE
                db_session.commit()

            mt_cloud_telemetry(
                tenant_id=tenant_id,
                distinct_id=tenant_id,
                event=MilestoneRecordType.CONNECTOR_SUCCEEDED,
            )

            # Clear repeated error state on success
            if cc_pair.in_repeated_error_state:
                cc_pair.in_repeated_error_state = False
                db_session.commit()

            if attempt.status == IndexingStatus.SUCCESS:
                logger.info(
                    f"Resolving indexing entity errors for attempt {index_attempt_id}"
                )
                _resolve_indexing_entity_errors(
                    cc_pair_id=attempt.connector_credential_pair_id,
                    db_session=db_session,
                )

    # Clean up FileStore storage (still needed for document batches during transition)
    try:
        logger.info(f"Cleaning up storage after indexing completion: {storage}")
        storage.cleanup_all_batches()
    except Exception:
        logger.exception("Failed to clean up document batches - continuing")

    logger.info(f"Database coordination completed for attempt {index_attempt_id}")


def active_indexing_attempt(
    cc_pair_id: int,
    search_settings_id: int,
    db_session: Session,
) -> bool:
    """
    Check if there's already an active indexing attempt for this CC pair + search settings.
    This prevents race conditions where multiple indexing attempts could be created.
    We check for any non-terminal status (NOT_STARTED, IN_PROGRESS).

    Returns True if there's an active indexing attempt, False otherwise.
    """
    active_indexing_attempt = db_session.execute(
        select(
            exists().where(
                IndexAttempt.connector_credential_pair_id == cc_pair_id,
                IndexAttempt.search_settings_id == search_settings_id,
                IndexAttempt.status.in_(
                    [
                        IndexingStatus.NOT_STARTED,
                        IndexingStatus.IN_PROGRESS,
                    ]
                ),
            )
        )
    ).scalar()

    if active_indexing_attempt:
        task_logger.debug(
            f"active_indexing_attempt - Skipping due to active indexing attempt: "
            f"cc_pair={cc_pair_id} search_settings={search_settings_id}"
        )

    return bool(active_indexing_attempt)


def _kickoff_indexing_tasks(
    celery_app: Celery,
    db_session: Session,
    search_settings: SearchSettings,
    cc_pair_ids: list[int],
    secondary_index_building: bool,
    redis_client: Redis,
    lock_beat: RedisLock,
    tenant_id: str,
) -> int:
    """Kick off indexing tasks for the given cc_pair_ids and search_settings.

    Returns the number of tasks successfully created.
    """
    tasks_created = 0

    for cc_pair_id in cc_pair_ids:
        lock_beat.reacquire()

        # Lightweight check prior to fetching cc pair
        if active_indexing_attempt(
            cc_pair_id=cc_pair_id,
            search_settings_id=search_settings.id,
            db_session=db_session,
        ):
            continue

        cc_pair = get_connector_credential_pair_from_id(
            db_session=db_session,
            cc_pair_id=cc_pair_id,
        )
        if not cc_pair:
            task_logger.warning(
                f"_kickoff_indexing_tasks - CC pair not found: cc_pair={cc_pair_id}"
            )
            continue

        # Heavyweight check after fetching cc pair
        if not should_index(
            cc_pair=cc_pair,
            search_settings_instance=search_settings,
            secondary_index_building=secondary_index_building,
            db_session=db_session,
        ):
            task_logger.debug(
                f"_kickoff_indexing_tasks - Not indexing cc_pair_id: {cc_pair_id} "
                f"search_settings={search_settings.id}, "
                f"secondary_index_building={secondary_index_building}"
            )
            continue

        task_logger.debug(
            f"_kickoff_indexing_tasks - Will index cc_pair_id: {cc_pair_id} "
            f"search_settings={search_settings.id}, "
            f"secondary_index_building={secondary_index_building}"
        )

        reindex = False
        # the indexing trigger is only checked and cleared with the current search settings
        if search_settings.status.is_current() and cc_pair.indexing_trigger is not None:
            if cc_pair.indexing_trigger == IndexingMode.REINDEX:
                reindex = True

            task_logger.info(
                f"_kickoff_indexing_tasks - Connector indexing manual trigger detected: "
                f"cc_pair={cc_pair.id} "
                f"search_settings={search_settings.id} "
                f"indexing_mode={cc_pair.indexing_trigger}"
            )

            mark_ccpair_with_indexing_trigger(cc_pair.id, None, db_session)

        # using a task queue and only allowing one task per cc_pair/search_setting
        # prevents us from starving out certain attempts
        attempt_id = try_creating_docfetching_task(
            celery_app,
            cc_pair,
            search_settings,
            reindex,
            db_session,
            redis_client,
            tenant_id,
        )

        if attempt_id is not None:
            task_logger.info(
                f"Connector indexing queued: index_attempt={attempt_id} cc_pair={cc_pair.id} search_settings={search_settings.id}"
            )
            tasks_created += 1
        else:
            task_logger.error(
                f"Failed to create indexing task: cc_pair={cc_pair.id} search_settings={search_settings.id}"
            )

    return tasks_created


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_INDEXING,
    soft_time_limit=300,
    bind=True,
)
def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
    """a lightweight task used to kick off the pipeline of indexing tasks.
    Occcasionally does some validation of existing state to clear up error conditions.

    This task is the entrypoint for the full "indexing pipeline", which is composed
    of two tasks: "docfetching" and "docprocessing". More details in
    the docfetching task (OnyxCeleryTask.CONNECTOR_DOC_FETCHING_TASK).

    For cc pairs that should be indexed (see should_index()), this task
    calls try_creating_docfetching_task, which creates a docfetching task.
    All the logic for determining what state the indexing pipeline is in
    w.r.t previous failed attempt, checkpointing, etc is handled in the docfetching task.
    """

    time_start = time.monotonic()
    task_logger.warning("check_for_indexing - Starting")

    tasks_created = 0
    locked = False
    redis_client = get_redis_client()
    redis_client_replica = get_redis_replica_client()

    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
    # redis_client_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = redis_client.lock(
        OnyxRedisLocks.CHECK_INDEXING_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
        return None

    try:
        locked = True

        # SPECIAL 0/3: sync lookup table for active fences
        # we want to run this less frequently than the overall task
        if not redis_client.exists(OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE):
            # build a lookup table of existing fences
            # this is just a migration concern and should be unnecessary once
            # lookup tables are rolled out
            for key_bytes in redis_client_replica.scan_iter(
                count=SCAN_ITER_COUNT_DEFAULT
            ):
                if is_fence(key_bytes) and not redis_client.sismember(
                    OnyxRedisConstants.ACTIVE_FENCES, key_bytes
                ):
                    logger.warning(f"Adding {key_bytes} to the lookup table.")
                    redis_client.sadd(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)

            redis_client.set(
                OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE,
                1,
                ex=OnyxRuntime.get_build_fence_lookup_table_interval(),
            )

        # 1/3: KICKOFF

        # check for search settings swap
        with get_session_with_current_tenant() as db_session:
            old_search_settings = check_and_perform_index_swap(db_session=db_session)
            current_search_settings = get_current_search_settings(db_session)
            # So that the first time users aren't surprised by really slow speed of first
            # batch of documents indexed
            if current_search_settings.provider_type is None and not MULTI_TENANT:
                if old_search_settings:
                    embedding_model = EmbeddingModel.from_db_model(
                        search_settings=current_search_settings,
                        server_host=INDEXING_MODEL_SERVER_HOST,
                        server_port=INDEXING_MODEL_SERVER_PORT,
                    )

                    # only warm up if search settings were changed
                    warm_up_bi_encoder(
                        embedding_model=embedding_model,
                    )

        # gather search settings and indexable cc_pair_ids
        # indexable CC pairs include everything for future model and only active cc pairs for current model
        lock_beat.reacquire()
        with get_session_with_current_tenant() as db_session:
            # Get CC pairs for primary search settings
            standard_cc_pair_ids = (
                fetch_indexable_standard_connector_credential_pair_ids(
                    db_session, active_cc_pairs_only=True
                )
            )

            primary_cc_pair_ids = standard_cc_pair_ids

            # Get CC pairs for secondary search settings
            secondary_cc_pair_ids: list[int] = []
            secondary_search_settings = get_secondary_search_settings(db_session)
            if secondary_search_settings:
                # For ACTIVE_ONLY, we skip paused connectors
                include_paused = (
                    secondary_search_settings.switchover_type
                    != SwitchoverType.ACTIVE_ONLY
                )
                standard_cc_pair_ids = (
                    fetch_indexable_standard_connector_credential_pair_ids(
                        db_session, active_cc_pairs_only=not include_paused
                    )
                )

                secondary_cc_pair_ids = standard_cc_pair_ids

        # Flag CC pairs in repeated error state for primary/current search settings
        with get_session_with_current_tenant() as db_session:
            for cc_pair_id in primary_cc_pair_ids:
                lock_beat.reacquire()

                cc_pair = get_connector_credential_pair_from_id(
                    db_session=db_session,
                    cc_pair_id=cc_pair_id,
                )

                # if already in repeated error state, don't do anything
                # this is important so that we don't keep pausing the connector
                # immediately upon a user un-pausing it to manually re-trigger and
                # recover.
                if (
                    cc_pair
                    and not cc_pair.in_repeated_error_state
                    and is_in_repeated_error_state(
                        cc_pair=cc_pair,
                        search_settings_id=current_search_settings.id,
                        db_session=db_session,
                    )
                ):
                    set_cc_pair_repeated_error_state(
                        db_session=db_session,
                        cc_pair_id=cc_pair_id,
                        in_repeated_error_state=True,
                    )
                    # When entering repeated error state, also pause the connector
                    # to prevent continued indexing retry attempts burning through embedding credits.
                    # NOTE: only for Cloud, since most self-hosted users use self-hosted embedding
                    # models. Also, they are more prone to repeated failures -> eventual success.
                    if AUTH_TYPE == AuthType.CLOUD:
                        update_connector_credential_pair_from_id(
                            db_session=db_session,
                            cc_pair_id=cc_pair.id,
                            status=ConnectorCredentialPairStatus.PAUSED,
                        )

        # NOTE: At this point, we haven't done heavy checks on whether or not the CC pairs should actually be indexed
        # Heavy check, should_index(), is called in _kickoff_indexing_tasks
        with get_session_with_current_tenant() as db_session:
            # Primary first
            tasks_created += _kickoff_indexing_tasks(
                celery_app=self.app,
                db_session=db_session,
                search_settings=current_search_settings,
                cc_pair_ids=primary_cc_pair_ids,
                secondary_index_building=secondary_search_settings is not None,
                redis_client=redis_client,
                lock_beat=lock_beat,
                tenant_id=tenant_id,
            )

            # Secondary indexing (only if secondary search settings exist and switchover_type is not INSTANT)
            if (
                secondary_search_settings
                and secondary_search_settings.switchover_type != SwitchoverType.INSTANT
                and secondary_cc_pair_ids
            ):
                tasks_created += _kickoff_indexing_tasks(
                    celery_app=self.app,
                    db_session=db_session,
                    search_settings=secondary_search_settings,
                    cc_pair_ids=secondary_cc_pair_ids,
                    secondary_index_building=True,
                    redis_client=redis_client,
                    lock_beat=lock_beat,
                    tenant_id=tenant_id,
                )
            elif (
                secondary_search_settings
                and secondary_search_settings.switchover_type == SwitchoverType.INSTANT
            ):
                task_logger.info(
                    f"Skipping secondary indexing: switchover_type=INSTANT for search_settings={secondary_search_settings.id}"
                )

        # 2/3: VALIDATE
        # Check for inconsistent index attempts - active attempts without task IDs
        # This can happen if attempt creation fails partway through
        lock_beat.reacquire()
        with get_session_with_current_tenant() as db_session:
            inconsistent_attempts = (
                db_session.execute(
                    select(IndexAttempt).where(
                        IndexAttempt.status.in_(
                            [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
                        ),
                        IndexAttempt.celery_task_id.is_(None),
                    )
                )
                .scalars()
                .all()
            )

            for attempt in inconsistent_attempts:
                lock_beat.reacquire()

                # Double-check the attempt still has the inconsistent state
                fresh_attempt = get_index_attempt(db_session, attempt.id)
                if (
                    not fresh_attempt
                    or fresh_attempt.celery_task_id
                    or fresh_attempt.status.is_terminal()
                ):
                    continue

                failure_reason = (
                    f"Inconsistent index attempt found - active status without Celery task: "
                    f"index_attempt={attempt.id} "
                    f"cc_pair={attempt.connector_credential_pair_id} "
                    f"search_settings={attempt.search_settings_id}"
                )
                task_logger.error(failure_reason)
                mark_attempt_failed(
                    attempt.id, db_session, failure_reason=failure_reason
                )

        lock_beat.reacquire()
        # we want to run this less frequently than the overall task
        if not redis_client.exists(OnyxRedisSignals.BLOCK_VALIDATE_INDEXING_FENCES):
            # Check for orphaned index attempts that have Celery task IDs but no actual running tasks
            # This can happen if workers crash or tasks are terminated unexpectedly
            # We reuse the same Redis signal name for backwards compatibility
            try:
                validate_active_indexing_attempts(lock_beat)
            except Exception:
                task_logger.exception(
                    "Exception while validating active indexing attempts"
                )

            redis_client.set(
                OnyxRedisSignals.BLOCK_VALIDATE_INDEXING_FENCES,
                1,
                ex=_get_fence_validation_block_expiration(),
            )

        # 3/3: FINALIZE - Monitor active indexing attempts using database
        lock_beat.reacquire()
        with get_session_with_current_tenant() as db_session:
            # Monitor all active indexing attempts directly from the database
            # This replaces the Redis fence-based monitoring
            active_attempts = (
                db_session.execute(
                    select(IndexAttempt).where(
                        IndexAttempt.status.in_(
                            [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
                        )
                    )
                )
                .scalars()
                .all()
            )

            for attempt in active_attempts:
                try:
                    monitor_indexing_attempt_progress(
                        attempt, tenant_id, db_session, self
                    )
                except Exception:
                    task_logger.exception(f"Error monitoring attempt {attempt.id}")

                lock_beat.reacquire()

    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception:
        task_logger.exception("Unexpected exception during indexing check")
    finally:
        if locked:
            if lock_beat.owned():
                lock_beat.release()
            else:
                task_logger.error(
                    f"check_for_indexing - Lock not owned on completion: tenant={tenant_id}"
                )
                redis_lock_dump(lock_beat, redis_client)

    time_elapsed = time.monotonic() - time_start
    task_logger.info(f"check_for_indexing finished: elapsed={time_elapsed:.2f}")
    return tasks_created


# primary
@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_CHECKPOINT_CLEANUP,
    soft_time_limit=300,
    bind=True,
)
def check_for_checkpoint_cleanup(self: Task, *, tenant_id: str) -> None:
    """Clean up old checkpoints that are older than 7 days."""
    locked = False
    redis_client = get_redis_client(tenant_id=tenant_id)
    lock: RedisLock = redis_client.lock(
        OnyxRedisLocks.CHECK_CHECKPOINT_CLEANUP_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # these tasks should never overlap
    if not lock.acquire(blocking=False):
        return None

    try:
        locked = True
        with get_session_with_current_tenant() as db_session:
            old_attempts = get_index_attempts_with_old_checkpoints(db_session)
            for attempt in old_attempts:
                task_logger.info(
                    f"Cleaning up checkpoint for index attempt {attempt.id}"
                )
                self.app.send_task(
                    OnyxCeleryTask.CLEANUP_CHECKPOINT,
                    kwargs={
                        "index_attempt_id": attempt.id,
                        "tenant_id": tenant_id,
                    },
                    queue=OnyxCeleryQueues.CHECKPOINT_CLEANUP,
                    priority=OnyxCeleryPriority.MEDIUM,
                )
    except Exception:
        task_logger.exception("Unexpected exception during checkpoint cleanup")
        return None
    finally:
        if locked:
            if lock.owned():
                lock.release()
            else:
                task_logger.error(
                    f"check_for_checkpoint_cleanup - Lock not owned on completion: tenant={tenant_id}"
                )


# light worker
@shared_task(
    name=OnyxCeleryTask.CLEANUP_CHECKPOINT,
    bind=True,
)
def cleanup_checkpoint_task(
    self: Task,  # noqa: ARG001
    *,
    index_attempt_id: int,
    tenant_id: str | None,
) -> None:
    """Clean up a checkpoint for a given index attempt"""

    start = time.monotonic()

    try:
        with get_session_with_current_tenant() as db_session:
            cleanup_checkpoint(db_session, index_attempt_id)
    finally:
        elapsed = time.monotonic() - start

        task_logger.info(
            f"cleanup_checkpoint_task completed: tenant_id={tenant_id} index_attempt_id={index_attempt_id} elapsed={elapsed:.2f}"
        )


# primary
@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_INDEX_ATTEMPT_CLEANUP,
    soft_time_limit=300,
    bind=True,
)
def check_for_index_attempt_cleanup(self: Task, *, tenant_id: str) -> None:
    """Clean up old index attempts that are older than 7 days."""
    locked = False
    redis_client = get_redis_client(tenant_id=tenant_id)
    lock: RedisLock = redis_client.lock(
        OnyxRedisLocks.CHECK_INDEX_ATTEMPT_CLEANUP_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # these tasks should never overlap
    if not lock.acquire(blocking=False):
        task_logger.info(
            f"check_for_index_attempt_cleanup - Lock not acquired: tenant={tenant_id}"
        )
        return None

    try:
        locked = True
        batch_size = INDEX_ATTEMPT_BATCH_SIZE
        with get_session_with_current_tenant() as db_session:
            old_attempts = get_old_index_attempts(db_session)
            # We need to batch this because during the initial run, the system might have a large number
            # of index attempts since they were never deleted. After that, the number will be
            # significantly lower.
            if len(old_attempts) == 0:
                task_logger.info(
                    "check_for_index_attempt_cleanup - No index attempts to cleanup"
                )
                return

            for i in range(0, len(old_attempts), batch_size):
                batch = old_attempts[i : i + batch_size]
                task_logger.info(
                    f"check_for_index_attempt_cleanup - Cleaning up index attempts {len(batch)}"
                )
                self.app.send_task(
                    OnyxCeleryTask.CLEANUP_INDEX_ATTEMPT,
                    kwargs={
                        "index_attempt_ids": [attempt.id for attempt in batch],
                        "tenant_id": tenant_id,
                    },
                    queue=OnyxCeleryQueues.INDEX_ATTEMPT_CLEANUP,
                    priority=OnyxCeleryPriority.MEDIUM,
                )
    except Exception:
        task_logger.exception("Unexpected exception during index attempt cleanup check")
        return None
    finally:
        if locked:
            if lock.owned():
                lock.release()
            else:
                task_logger.error(
                    f"check_for_index_attempt_cleanup - Lock not owned on completion: tenant={tenant_id}"
                )


# light worker
@shared_task(
    name=OnyxCeleryTask.CLEANUP_INDEX_ATTEMPT,
    bind=True,
)
def cleanup_index_attempt_task(
    self: Task,  # noqa: ARG001
    *,
    index_attempt_ids: list[int],
    tenant_id: str,
) -> None:
    """Clean up an index attempt"""
    start = time.monotonic()

    try:
        with get_session_with_current_tenant() as db_session:
            cleanup_index_attempts(db_session, index_attempt_ids)

    finally:
        elapsed = time.monotonic() - start

        task_logger.info(
            f"cleanup_index_attempt_task completed: tenant_id={tenant_id} "
            f"index_attempt_ids={index_attempt_ids} "
            f"elapsed={elapsed:.2f}"
        )


class DocumentProcessingBatch(BaseModel):
    """Data structure for a document processing batch."""

    batch_id: str
    index_attempt_id: int
    cc_pair_id: int
    tenant_id: str
    batch_num: int


def _check_failure_threshold(
    total_failures: int,
    document_count: int,
    batch_num: int,
    last_failure: ConnectorFailure | None,
) -> None:
    """Check if we've hit the failure threshold and raise an appropriate exception if so.

    We consider the threshold hit if:
    1. We have more than 3 failures AND
    2. Failures account for more than 10% of processed documents
    """
    failure_ratio = total_failures / (document_count or 1)

    FAILURE_THRESHOLD = 3
    FAILURE_RATIO_THRESHOLD = 0.1
    if total_failures > FAILURE_THRESHOLD and failure_ratio > FAILURE_RATIO_THRESHOLD:
        logger.error(
            f"Connector run failed with '{total_failures}' errors after '{batch_num}' batches."
        )
        if last_failure and last_failure.exception:
            raise last_failure.exception from last_failure.exception

        raise RuntimeError(
            f"Connector run encountered too many errors, aborting. Last error: {last_failure}"
        )


def _resolve_indexing_document_errors(
    cc_pair_id: int,
    failures: list[ConnectorFailure],
    document_batch: list[Document],
) -> None:
    with get_session_with_current_tenant() as db_session_temp:
        # get previously unresolved errors
        unresolved_errors = get_index_attempt_errors_for_cc_pair(
            cc_pair_id=cc_pair_id,
            unresolved_only=True,
            db_session=db_session_temp,
        )
        doc_id_to_unresolved_errors: dict[str, list[IndexAttemptError]] = defaultdict(
            list
        )
        for error in unresolved_errors:
            if error.document_id:
                doc_id_to_unresolved_errors[error.document_id].append(error)

        # resolve errors for documents that were successfully indexed
        failed_document_ids = [
            failure.failed_document.document_id
            for failure in failures
            if failure.failed_document
        ]
        successful_document_ids = [
            document.id
            for document in document_batch
            if document.id not in failed_document_ids
        ]
        for document_id in successful_document_ids:
            if document_id not in doc_id_to_unresolved_errors:
                continue

            logger.info(f"Resolving IndexAttemptError for document '{document_id}'")
            for error in doc_id_to_unresolved_errors[document_id]:
                error.is_resolved = True
                db_session_temp.add(error)

        db_session_temp.commit()


@shared_task(
    name=OnyxCeleryTask.DOCPROCESSING_TASK,
    bind=True,
)
def docprocessing_task(
    self: Task,  # noqa: ARG001
    index_attempt_id: int,
    cc_pair_id: int,
    tenant_id: str,
    batch_num: int,
) -> None:
    """Process a batch of documents through the indexing pipeline.

    This task retrieves documents from storage and processes them through
    the indexing pipeline (embedding + vector store indexing).
    """
    # Start heartbeat for this indexing attempt
    heartbeat_thread, stop_event = start_heartbeat(index_attempt_id)
    try:
        # Cannot use the TaskSingleton approach here because the worker is multithreaded
        token = INDEX_ATTEMPT_INFO_CONTEXTVAR.set((cc_pair_id, index_attempt_id))
        _docprocessing_task(index_attempt_id, cc_pair_id, tenant_id, batch_num)
    finally:
        stop_heartbeat(heartbeat_thread, stop_event)  # Stop heartbeat before exiting
        INDEX_ATTEMPT_INFO_CONTEXTVAR.reset(token)


def _check_chunk_usage_limit(tenant_id: str) -> None:
    """Check if chunk indexing usage limit has been exceeded.

    Raises UsageLimitExceededError if the limit is exceeded.
    """
    if not USAGE_LIMITS_ENABLED:
        return

    from onyx.db.usage import UsageType
    from onyx.server.usage_limits import check_usage_and_raise

    with get_session_with_current_tenant() as db_session:
        check_usage_and_raise(
            db_session=db_session,
            usage_type=UsageType.CHUNKS_INDEXED,
            tenant_id=tenant_id,
            pending_amount=0,  # Just check current usage
        )


def _docprocessing_task(
    index_attempt_id: int,
    cc_pair_id: int,
    tenant_id: str,
    batch_num: int,
) -> None:
    start_time = time.monotonic()

    if tenant_id:
        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

    # Check if chunk indexing usage limit has been exceeded before processing
    if USAGE_LIMITS_ENABLED:
        try:
            _check_chunk_usage_limit(tenant_id)
        except HTTPException as e:
            # Log the error and fail the indexing attempt
            task_logger.error(
                f"Chunk indexing usage limit exceeded for tenant {tenant_id}: {e}"
            )
            with get_session_with_current_tenant() as db_session:
                from onyx.db.index_attempt import mark_attempt_failed

                mark_attempt_failed(
                    index_attempt_id=index_attempt_id,
                    db_session=db_session,
                    failure_reason=str(e),
                )
            raise

    task_logger.info(
        f"Processing document batch: attempt={index_attempt_id} batch_num={batch_num} "
    )

    # Get the document batch storage
    storage = get_document_batch_storage(cc_pair_id, index_attempt_id)

    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    r = get_redis_client(tenant_id=tenant_id)

    # 20 is the documented default for httpx max_keepalive_connections
    if MANAGED_VESPA:
        httpx_init_vespa_pool(
            20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH
        )
    else:
        httpx_init_vespa_pool(20)

    # dummy lock to satisfy linter
    per_batch_lock: RedisLock | None = None
    try:
        # FIX: Monitor memory before loading documents to track problematic batches
        emit_process_memory(
            os.getpid(),
            "docprocessing",
            {
                "phase": "before_load",
                "tenant_id": tenant_id,
                "cc_pair_id": cc_pair_id,
                "index_attempt_id": index_attempt_id,
                "batch_num": batch_num,
            },
        )

        # Retrieve documents from storage
        documents = storage.get_batch(batch_num)
        if not documents:
            task_logger.error(f"No documents found for batch {batch_num}")
            return

        # FIX: Monitor memory after loading documents
        emit_process_memory(
            os.getpid(),
            "docprocessing",
            {
                "phase": "after_load",
                "tenant_id": tenant_id,
                "cc_pair_id": cc_pair_id,
                "index_attempt_id": index_attempt_id,
                "batch_num": batch_num,
                "doc_count": len(documents),
            },
        )

        with get_session_with_current_tenant() as db_session:
            # matches parts of _run_indexing
            index_attempt = get_index_attempt(
                db_session,
                index_attempt_id,
                eager_load_cc_pair=True,
                eager_load_search_settings=True,
            )
            if not index_attempt:
                raise RuntimeError(f"Index attempt {index_attempt_id} not found")

            if index_attempt.search_settings is None:
                raise ValueError("Search settings must be set for indexing")

            if (
                index_attempt.celery_task_id is None
                or index_attempt.status.is_terminal()
            ):
                raise RuntimeError(
                    f"Index attempt {index_attempt_id} is not running, status {index_attempt.status}"
                )

            cross_batch_db_lock: RedisLock = r.lock(
                redis_connector.db_lock_key(index_attempt.search_settings.id),
                timeout=CELERY_INDEXING_LOCK_TIMEOUT,
                thread_local=False,
            )

            callback = IndexingCallback(
                redis_connector,
            )
            # TODO: right now this is the only thing the callback is used for,
            # probably there is a simpler way to handle pausing
            if callback.should_stop():
                raise RuntimeError("Docprocessing cancelled by connector pausing")

            # Set up indexing pipeline components
            embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
                search_settings=index_attempt.search_settings,
                callback=callback,
            )

            document_indices = get_all_document_indices(
                index_attempt.search_settings,
                None,
                httpx_client=HttpxPool.get("vespa"),
            )

            # Set up metadata for this batch
            index_attempt_metadata = IndexAttemptMetadata(
                attempt_id=index_attempt_id,
                connector_id=index_attempt.connector_credential_pair.connector.id,
                credential_id=index_attempt.connector_credential_pair.credential.id,
                request_id=make_randomized_onyx_request_id("DIP"),
                structured_id=f"{tenant_id}:{cc_pair_id}:{index_attempt_id}:{batch_num}",
                batch_num=batch_num,
            )

            # Process documents through indexing pipeline
            connector_source = (
                index_attempt.connector_credential_pair.connector.source.value
            )
            task_logger.info(
                f"Processing {len(documents)} documents through indexing pipeline: "
                f"cc_pair_id={cc_pair_id}, source={connector_source}, "
                f"batch_num={batch_num}"
            )

            adapter = DocumentIndexingBatchAdapter(
                db_session=db_session,
                connector_id=index_attempt.connector_credential_pair.connector.id,
                credential_id=index_attempt.connector_credential_pair.credential.id,
                tenant_id=tenant_id,
                index_attempt_metadata=index_attempt_metadata,
            )

            # real work happens here!
            index_pipeline_result = run_indexing_pipeline(
                embedder=embedding_model,
                document_indices=document_indices,
                ignore_time_skip=True,  # Documents are already filtered during extraction
                db_session=db_session,
                tenant_id=tenant_id,
                document_batch=documents,
                request_id=index_attempt_metadata.request_id,
                adapter=adapter,
            )

        # Track chunk indexing usage for cloud usage limits
        if USAGE_LIMITS_ENABLED and index_pipeline_result.total_chunks > 0:
            try:
                from onyx.db.usage import increment_usage
                from onyx.db.usage import UsageType

                with get_session_with_current_tenant() as usage_db_session:
                    increment_usage(
                        db_session=usage_db_session,
                        usage_type=UsageType.CHUNKS_INDEXED,
                        amount=index_pipeline_result.total_chunks,
                    )
                    usage_db_session.commit()
            except Exception as e:
                # Log but don't fail indexing if usage tracking fails
                task_logger.warning(f"Failed to track chunk indexing usage: {e}")

        # Update batch completion and document counts atomically using database coordination

        with get_session_with_current_tenant() as db_session, cross_batch_db_lock:
            IndexingCoordination.update_batch_completion_and_docs(
                db_session=db_session,
                index_attempt_id=index_attempt_id,
                total_docs_indexed=index_pipeline_result.total_docs,
                new_docs_indexed=index_pipeline_result.new_docs,
                total_chunks=index_pipeline_result.total_chunks,
            )

            _resolve_indexing_document_errors(
                cc_pair_id,
                index_pipeline_result.failures,
                documents,
            )

        coordination_status = None
        # Record failures in the database
        if index_pipeline_result.failures:
            with get_session_with_current_tenant() as db_session:
                for failure in index_pipeline_result.failures:
                    create_index_attempt_error(
                        index_attempt_id,
                        cc_pair_id,
                        failure,
                        db_session,
                    )
            # Use database state instead of FileStore for failure checking
            with get_session_with_current_tenant() as db_session:
                coordination_status = IndexingCoordination.get_coordination_status(
                    db_session, index_attempt_id
                )
                _check_failure_threshold(
                    coordination_status.total_failures,
                    coordination_status.total_docs,
                    batch_num,
                    index_pipeline_result.failures[-1],
                )

        # Add telemetry for indexing progress using database coordination status
        # only re-fetch coordination status if necessary
        if coordination_status is None:
            with get_session_with_current_tenant() as db_session:
                coordination_status = IndexingCoordination.get_coordination_status(
                    db_session, index_attempt_id
                )

        optional_telemetry(
            record_type=RecordType.INDEXING_PROGRESS,
            data={
                "index_attempt_id": index_attempt_id,
                "cc_pair_id": cc_pair_id,
                "current_docs_indexed": coordination_status.total_docs,
                "current_chunks_indexed": coordination_status.total_chunks,
                "source": index_attempt.connector_credential_pair.connector.source.value,
                "completed_batches": coordination_status.completed_batches,
                "total_batches": coordination_status.total_batches,
            },
            tenant_id=tenant_id,
        )
        # Clean up this batch after successful processing
        storage.delete_batch_by_num(batch_num)

        # FIX: Explicitly clear document batch from memory and force garbage collection
        # This helps prevent memory accumulation across multiple batches
        # NOTE: Thread-local event loops in embedding threads are cleaned up automatically
        # via the _cleanup_thread_local decorator in search_nlp_models.py
        del documents
        gc.collect()

        # FIX: Log final memory usage to track problematic tenants/CC pairs
        emit_process_memory(
            os.getpid(),
            "docprocessing",
            {
                "phase": "after_processing",
                "tenant_id": tenant_id,
                "cc_pair_id": cc_pair_id,
                "index_attempt_id": index_attempt_id,
                "batch_num": batch_num,
                "chunks_processed": index_pipeline_result.total_chunks,
            },
        )

        elapsed_time = time.monotonic() - start_time
        task_logger.info(
            f"Completed document batch processing: "
            f"index_attempt={index_attempt_id} "
            f"cc_pair={cc_pair_id} "
            f"search_settings={index_attempt.search_settings.id} "
            f"batch_num={batch_num} "
            f"docs={len(index_pipeline_result.failures) + index_pipeline_result.total_docs} "
            f"chunks={index_pipeline_result.total_chunks} "
            f"failures={len(index_pipeline_result.failures)} "
            f"elapsed={elapsed_time:.2f}s"
        )

    except Exception:
        task_logger.exception(
            f"Document batch processing failed: batch_num={batch_num} attempt={index_attempt_id} "
        )

        raise
    finally:
        if per_batch_lock and per_batch_lock.owned():
            per_batch_lock.release()


================================================
FILE: backend/onyx/background/celery/tasks/docprocessing/utils.py
================================================
import time
from datetime import datetime
from datetime import timezone

from redis import Redis
from redis.exceptions import LockError
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import DocumentSource
from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import IndexModelStatus
from onyx.db.index_attempt import get_last_attempt_for_cc_pair
from onyx.db.index_attempt import get_recent_attempts_for_cc_pair
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import SearchSettings
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_pool import redis_lock_dump
from onyx.utils.logger import setup_logger

logger = setup_logger()

NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE = 5


class IndexingCallbackBase(IndexingHeartbeatInterface):
    PARENT_CHECK_INTERVAL = 60

    def __init__(
        self,
        parent_pid: int,
        redis_connector: RedisConnector,
        redis_lock: RedisLock,
        redis_client: Redis,
        timeout_seconds: int | None = None,
    ):
        super().__init__()
        self.parent_pid = parent_pid
        self.redis_connector: RedisConnector = redis_connector
        self.redis_lock: RedisLock = redis_lock
        self.redis_client = redis_client
        self.started: datetime = datetime.now(timezone.utc)
        self.redis_lock.reacquire()

        self.last_tag: str = f"{self.__class__.__name__}.__init__"
        self.last_lock_reacquire: datetime = datetime.now(timezone.utc)
        self.last_lock_monotonic = time.monotonic()

        self.last_parent_check = time.monotonic()
        self.start_monotonic = time.monotonic()
        self.timeout_seconds = timeout_seconds

    def should_stop(self) -> bool:
        # Check if the associated indexing attempt has been cancelled
        # TODO: Pass index_attempt_id to the callback and check cancellation using the db
        if bool(self.redis_connector.stop.fenced):
            return True

        # Check if the task has exceeded its timeout
        # NOTE: Celery's soft_time_limit does not work with thread pools,
        # so we must enforce timeouts internally.
        if self.timeout_seconds is not None:
            elapsed = time.monotonic() - self.start_monotonic
            if elapsed > self.timeout_seconds:
                logger.warning(
                    f"IndexingCallback Docprocessing - task timeout exceeded: "
                    f"elapsed={elapsed:.0f}s timeout={self.timeout_seconds}s "
                    f"cc_pair={self.redis_connector.cc_pair_id}"
                )
                return True

        return False

    def progress(self, tag: str, amount: int) -> None:  # noqa: ARG002
        """Amount isn't used yet."""

        # rkuo: this shouldn't be necessary yet because we spawn the process this runs inside
        # with daemon=True. It seems likely some indexing tasks will need to spawn other processes
        # eventually, which daemon=True prevents, so leave this code in until we're ready to test it.

        # if self.parent_pid:
        #     # check if the parent pid is alive so we aren't running as a zombie
        #     now = time.monotonic()
        #     if now - self.last_parent_check > IndexingCallback.PARENT_CHECK_INTERVAL:
        #         try:
        #             # this is unintuitive, but it checks if the parent pid is still running
        #             os.kill(self.parent_pid, 0)
        #         except Exception:
        #             logger.exception("IndexingCallback - parent pid check exceptioned")
        #             raise
        #         self.last_parent_check = now

        try:
            current_time = time.monotonic()
            if current_time - self.last_lock_monotonic >= (
                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4
            ):
                self.redis_lock.reacquire()
                self.last_lock_reacquire = datetime.now(timezone.utc)
                self.last_lock_monotonic = time.monotonic()

            self.last_tag = tag
        except LockError:
            logger.exception(
                f"{self.__class__.__name__} - lock.reacquire exceptioned: "
                f"lock_timeout={self.redis_lock.timeout} "
                f"start={self.started} "
                f"last_tag={self.last_tag} "
                f"last_reacquired={self.last_lock_reacquire} "
                f"now={datetime.now(timezone.utc)}"
            )

            redis_lock_dump(self.redis_lock, self.redis_client)
            raise


# NOTE: we're in the process of removing all fences from indexing; this will
# eventually no longer be used. For now, it is used only for connector pausing.
class IndexingCallback(IndexingHeartbeatInterface):
    def __init__(
        self,
        redis_connector: RedisConnector,
    ):
        self.redis_connector = redis_connector

    def should_stop(self) -> bool:
        # Check if the associated indexing attempt has been cancelled
        # TODO: Pass index_attempt_id to the callback and check cancellation using the db
        return bool(self.redis_connector.stop.fenced)

    # included to satisfy old interface
    def progress(self, tag: str, amount: int) -> None:
        pass


# NOTE: The validate_indexing_fence and validate_indexing_fences functions have been removed
# as they are no longer needed with database-based coordination. The new validation is
# handled by validate_active_indexing_attempts in the main indexing tasks module.


def is_in_repeated_error_state(
    cc_pair: ConnectorCredentialPair, search_settings_id: int, db_session: Session
) -> bool:
    """Checks if the cc pair / search setting combination is in a repeated error state."""
    # if the connector doesn't have a refresh_freq, a single failed attempt is enough
    number_of_failed_attempts_in_a_row_needed = (
        NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE
        if cc_pair.connector.refresh_freq is not None
        else 1
    )

    most_recent_index_attempts = get_recent_attempts_for_cc_pair(
        cc_pair_id=cc_pair.id,
        search_settings_id=search_settings_id,
        limit=number_of_failed_attempts_in_a_row_needed,
        db_session=db_session,
    )
    return len(
        most_recent_index_attempts
    ) >= number_of_failed_attempts_in_a_row_needed and all(
        attempt.status == IndexingStatus.FAILED
        for attempt in most_recent_index_attempts
    )


def should_index(
    cc_pair: ConnectorCredentialPair,
    search_settings_instance: SearchSettings,
    secondary_index_building: bool,
    db_session: Session,
) -> bool:
    """Checks various global settings and past indexing attempts to determine if
    we should try to start indexing the cc pair / search setting combination.

    Note that tactical checks such as preventing overlap with a currently running task
    are not handled here.

    Return True if we should try to index, False if not.
    """
    connector = cc_pair.connector
    last_index_attempt = get_last_attempt_for_cc_pair(
        cc_pair_id=cc_pair.id,
        search_settings_id=search_settings_instance.id,
        db_session=db_session,
    )
    all_recent_errored = is_in_repeated_error_state(
        cc_pair=cc_pair,
        search_settings_id=search_settings_instance.id,
        db_session=db_session,
    )

    # uncomment for debugging
    # task_logger.debug(
    #     f"_should_index: "
    #     f"cc_pair={cc_pair.id} "
    #     f"connector={cc_pair.connector_id} "
    #     f"refresh_freq={connector.refresh_freq}"
    # )

    # don't kick off indexing for `NOT_APPLICABLE` sources
    if connector.source == DocumentSource.NOT_APPLICABLE:
        # print(f"Not indexing cc_pair={cc_pair.id}: NOT_APPLICABLE source")
        return False

    # User can still manually create single indexing attempts via the UI for the
    # currently in use index
    if DISABLE_INDEX_UPDATE_ON_SWAP:
        if (
            search_settings_instance.status == IndexModelStatus.PRESENT
            and secondary_index_building
        ):
            # print(
            #     f"Not indexing cc_pair={cc_pair.id}: DISABLE_INDEX_UPDATE_ON_SWAP is True and secondary index building"
            # )
            return False

    # When switching over models, always index at least once
    if search_settings_instance.status == IndexModelStatus.FUTURE:
        if last_index_attempt:
            # No new index if the last index attempt succeeded
            # Once is enough. The model will never be able to swap otherwise.
            if last_index_attempt.status == IndexingStatus.SUCCESS:
                # print(
                #     f"Not indexing cc_pair={cc_pair.id}: FUTURE model with successful last index attempt={last_index.id}"
                # )
                return False

            # No new index if the last index attempt is waiting to start
            if last_index_attempt.status == IndexingStatus.NOT_STARTED:
                # print(
                #     f"Not indexing cc_pair={cc_pair.id}: FUTURE model with NOT_STARTED last index attempt={last_index.id}"
                # )
                return False

            # No new index if the last index attempt is running
            if last_index_attempt.status == IndexingStatus.IN_PROGRESS:
                # print(
                #     f"Not indexing cc_pair={cc_pair.id}: FUTURE model with IN_PROGRESS last index attempt={last_index.id}"
                # )
                return False
        else:
            if (
                connector.id == 0 or connector.source == DocumentSource.INGESTION_API
            ):  # Ingestion API
                # print(
                #     f"Not indexing cc_pair={cc_pair.id}: FUTURE model with Ingestion API source"
                # )
                return False
        return True

    # If the connector is paused or is the ingestion API, don't index
    # NOTE: during an embedding model switch over, the following logic
    # is bypassed by the above check for a future model
    if (
        not cc_pair.status.is_active()
        or connector.id == 0
        or connector.source == DocumentSource.INGESTION_API
    ):
        # print(
        #     f"Not indexing cc_pair={cc_pair.id}: Connector is paused or is Ingestion API"
        # )
        return False

    if search_settings_instance.status.is_current():
        if cc_pair.indexing_trigger is not None:
            # if a manual indexing trigger is on the cc pair, honor it for live search settings
            return True

    # if no attempt has ever occurred, we should index regardless of refresh_freq
    if not last_index_attempt:
        return True

    if connector.refresh_freq is None:
        # print(f"Not indexing cc_pair={cc_pair.id}: refresh_freq is None")
        return False

    # if in the "initial" phase, we should always try and kick-off indexing
    # as soon as possible if there is no ongoing attempt. In other words,
    # no delay UNLESS we're repeatedly failing to index.
    if (
        cc_pair.status == ConnectorCredentialPairStatus.INITIAL_INDEXING
        and not all_recent_errored
    ):
        return True

    current_db_time = get_db_current_time(db_session)
    time_since_index = current_db_time - last_index_attempt.time_updated
    if time_since_index.total_seconds() < connector.refresh_freq:
        # print(
        #     f"Not indexing cc_pair={cc_pair.id}: Last index attempt={last_index_attempt.id} "
        #     f"too recent ({time_since_index.total_seconds()}s < {connector.refresh_freq}s)"
        # )
        return False

    return True


================================================
FILE: backend/onyx/background/celery/tasks/evals/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/evals/tasks.py
================================================
from datetime import datetime
from datetime import timezone
from typing import Any

from celery import shared_task
from celery import Task

from onyx.configs.app_configs import BRAINTRUST_API_KEY
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
from onyx.configs.app_configs import SCHEDULED_EVAL_PERMISSIONS_EMAIL
from onyx.configs.app_configs import SCHEDULED_EVAL_PROJECT
from onyx.configs.constants import OnyxCeleryTask
from onyx.evals.eval import run_eval
from onyx.evals.models import EvalConfigurationOptions
from onyx.utils.logger import setup_logger

logger = setup_logger()


@shared_task(
    name=OnyxCeleryTask.EVAL_RUN_TASK,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
    trail=False,
)
def eval_run_task(
    self: Task,  # noqa: ARG001
    *,
    configuration_dict: dict[str, Any],
) -> None:
    """Background task to run an evaluation with the given configuration"""
    try:
        configuration = EvalConfigurationOptions.model_validate(configuration_dict)
        run_eval(configuration, remote_dataset_name=configuration.dataset_name)
        logger.info("Successfully completed eval run task")

    except Exception:
        logger.error("Failed to run eval task")
        raise


@shared_task(
    name=OnyxCeleryTask.SCHEDULED_EVAL_TASK,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT * 5,  # Allow more time for multiple datasets
    bind=True,
    trail=False,
)
def scheduled_eval_task(self: Task, **kwargs: Any) -> None:  # noqa: ARG001
    """
    Scheduled task to run evaluations on configured datasets.
    Runs weekly on Sunday at midnight UTC.

    Configure via environment variables (with defaults):
    - SCHEDULED_EVAL_DATASET_NAMES: Comma-separated list of Braintrust dataset names
    - SCHEDULED_EVAL_PERMISSIONS_EMAIL: Email for search permissions (default: roshan@onyx.app)
    - SCHEDULED_EVAL_PROJECT: Braintrust project name
    """
    if not BRAINTRUST_API_KEY:
        logger.error("BRAINTRUST_API_KEY is not configured, cannot run scheduled evals")
        return

    if not SCHEDULED_EVAL_PROJECT:
        logger.error(
            "SCHEDULED_EVAL_PROJECT is not configured, cannot run scheduled evals"
        )
        return

    if not SCHEDULED_EVAL_DATASET_NAMES:
        logger.info("No scheduled eval datasets configured, skipping")
        return

    if not SCHEDULED_EVAL_PERMISSIONS_EMAIL:
        logger.error("SCHEDULED_EVAL_PERMISSIONS_EMAIL not configured")
        return

    project_name = SCHEDULED_EVAL_PROJECT
    dataset_names = SCHEDULED_EVAL_DATASET_NAMES
    permissions_email = SCHEDULED_EVAL_PERMISSIONS_EMAIL

    # Create a timestamp for the scheduled run
    run_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d")

    logger.info(
        f"Starting scheduled eval pipeline for project '{project_name}' with {len(dataset_names)} dataset(s): {dataset_names}"
    )

    pipeline_start = datetime.now(timezone.utc)
    results: list[dict[str, Any]] = []

    for dataset_name in dataset_names:
        start_time = datetime.now(timezone.utc)
        error_message: str | None = None
        success = False

        # Create informative experiment name for scheduled runs
        experiment_name = f"{dataset_name} - {run_timestamp}"

        try:
            logger.info(
                f"Running scheduled eval for dataset: {dataset_name} (project: {project_name})"
            )

            configuration = EvalConfigurationOptions(
                search_permissions_email=permissions_email,
                dataset_name=dataset_name,
                no_send_logs=False,
                braintrust_project=project_name,
                experiment_name=experiment_name,
            )

            result = run_eval(
                configuration=configuration,
                remote_dataset_name=dataset_name,
            )
            success = result.success
            logger.info(f"Completed eval for {dataset_name}: success={success}")

        except Exception as e:
            logger.exception(f"Failed to run scheduled eval for {dataset_name}")
            error_message = str(e)
            success = False

        end_time = datetime.now(timezone.utc)

        results.append(
            {
                "dataset_name": dataset_name,
                "success": success,
                "start_time": start_time,
                "end_time": end_time,
                "error_message": error_message,
            }
        )

    pipeline_end = datetime.now(timezone.utc)
    total_duration = (pipeline_end - pipeline_start).total_seconds()

    passed_count = sum(1 for r in results if r["success"])
    logger.info(
        f"Scheduled eval pipeline completed: {passed_count}/{len(results)} passed in {total_duration:.1f}s"
    )


================================================
FILE: backend/onyx/background/celery/tasks/hierarchyfetching/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/hierarchyfetching/tasks.py
================================================
"""Celery tasks for hierarchy fetching.

This module provides tasks for fetching hierarchy node information from connectors.
Hierarchy nodes represent structural elements like folders, spaces, and pages that
can be used to filter search results.

The hierarchy fetching pipeline runs once per day per connector and fetches
structural information from the connector source.
"""

import time
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from uuid import uuid4

from celery import Celery
from celery import shared_task
from celery import Task
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.connectors.factory import ConnectorMissingException
from onyx.connectors.factory import identify_connector_class
from onyx.connectors.factory import instantiate_connector
from onyx.connectors.interfaces import HierarchyConnector
from onyx.connectors.models import HierarchyNode as PydanticHierarchyNode
from onyx.db.connector import mark_cc_pair_as_hierarchy_fetched
from onyx.db.connector_credential_pair import (
    fetch_indexable_standard_connector_credential_pair_ids,
)
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.hierarchy import upsert_hierarchy_node_cc_pair_entries
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import ConnectorCredentialPair
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Hierarchy fetching runs once per day (24 hours in seconds)
HIERARCHY_FETCH_INTERVAL_SECONDS = 24 * 60 * 60


def _connector_supports_hierarchy_fetching(
    cc_pair: ConnectorCredentialPair,
) -> bool:
    """Return True only for connectors whose class implements HierarchyConnector."""
    try:
        connector_class = identify_connector_class(
            cc_pair.connector.source,
        )
    except ConnectorMissingException as e:
        task_logger.warning(
            "Skipping hierarchy fetching enqueue for source=%s input_type=%s: %s",
            cc_pair.connector.source,
            cc_pair.connector.input_type,
            str(e),
        )
        return False

    return issubclass(connector_class, HierarchyConnector)


def _is_hierarchy_fetching_due(cc_pair: ConnectorCredentialPair) -> bool:
    """Returns boolean indicating if hierarchy fetching is due for this connector.

    Hierarchy fetching should run once per day for active connectors.
    """
    # Skip if not active
    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:
        return False

    # Skip if connector has never successfully indexed
    if not cc_pair.last_successful_index_time:
        return False

    # Check if we've fetched hierarchy recently
    last_fetch = cc_pair.last_time_hierarchy_fetch
    if last_fetch is None:
        # Never fetched before - fetch now
        return True

    # Check if enough time has passed since last fetch
    next_fetch_time = last_fetch + timedelta(seconds=HIERARCHY_FETCH_INTERVAL_SECONDS)
    return datetime.now(timezone.utc) >= next_fetch_time


def _try_creating_hierarchy_fetching_task(
    celery_app: Celery,
    cc_pair: ConnectorCredentialPair,
    db_session: Session,
    r: Redis,
    tenant_id: str,
) -> str | None:
    """Try to create a hierarchy fetching task for a connector.

    Returns the task ID if created, None otherwise.
    """
    LOCK_TIMEOUT = 30

    # Serialize task creation attempts
    lock: RedisLock = r.lock(
        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + f"hierarchy_fetching_{cc_pair.id}",
        timeout=LOCK_TIMEOUT,
    )

    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)
    if not acquired:
        return None

    try:
        # Refresh to get latest state
        db_session.refresh(cc_pair)
        if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
            return None

        # Generate task ID
        custom_task_id = f"hierarchy_fetching_{cc_pair.id}_{uuid4()}"

        # Send the task
        result = celery_app.send_task(
            OnyxCeleryTask.CONNECTOR_HIERARCHY_FETCHING_TASK,
            kwargs=dict(
                cc_pair_id=cc_pair.id,
                tenant_id=tenant_id,
            ),
            queue=OnyxCeleryQueues.CONNECTOR_HIERARCHY_FETCHING,
            task_id=custom_task_id,
            priority=OnyxCeleryPriority.LOW,
        )

        if not result:
            raise RuntimeError("send_task for hierarchy_fetching_task failed.")

        task_logger.info(
            f"Created hierarchy fetching task: cc_pair={cc_pair.id} celery_task_id={custom_task_id}"
        )

        return custom_task_id

    except Exception:
        task_logger.exception(
            f"Failed to create hierarchy fetching task: cc_pair={cc_pair.id}"
        )
        return None
    finally:
        if lock.owned():
            lock.release()


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_HIERARCHY_FETCHING,
    soft_time_limit=300,
    bind=True,
)
def check_for_hierarchy_fetching(self: Task, *, tenant_id: str) -> int | None:
    """Check for connectors that need hierarchy fetching and spawn tasks.

    This task runs periodically (once per day) and checks all active connectors
    to see if they need hierarchy information fetched.
    """
    time_start = time.monotonic()
    task_logger.info("check_for_hierarchy_fetching - Starting")

    tasks_created = 0
    locked = False
    redis_client = get_redis_client()

    lock_beat: RedisLock = redis_client.lock(
        OnyxRedisLocks.CHECK_HIERARCHY_FETCHING_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # These tasks should never overlap
    if not lock_beat.acquire(blocking=False):
        return None

    try:
        locked = True

        with get_session_with_current_tenant() as db_session:
            # Get all active connector credential pairs
            cc_pair_ids = fetch_indexable_standard_connector_credential_pair_ids(
                db_session=db_session,
                active_cc_pairs_only=True,
            )

            for cc_pair_id in cc_pair_ids:
                lock_beat.reacquire()
                cc_pair = get_connector_credential_pair_from_id(
                    db_session=db_session,
                    cc_pair_id=cc_pair_id,
                )

                if not cc_pair or not _connector_supports_hierarchy_fetching(cc_pair):
                    continue

                if not _is_hierarchy_fetching_due(cc_pair):
                    continue

                task_id = _try_creating_hierarchy_fetching_task(
                    celery_app=self.app,
                    cc_pair=cc_pair,
                    db_session=db_session,
                    r=redis_client,
                    tenant_id=tenant_id,
                )

                if task_id:
                    tasks_created += 1

    except Exception:
        task_logger.exception("check_for_hierarchy_fetching - Unexpected error")
    finally:
        if locked:
            if lock_beat.owned():
                lock_beat.release()
            else:
                task_logger.error(
                    "check_for_hierarchy_fetching - Lock not owned on completion"
                )

    time_elapsed = time.monotonic() - time_start
    task_logger.info(
        f"check_for_hierarchy_fetching finished: tasks_created={tasks_created} elapsed={time_elapsed:.2f}s"
    )
    return tasks_created


# Batch size for hierarchy node processing
HIERARCHY_NODE_BATCH_SIZE = 100


def _run_hierarchy_extraction(
    db_session: Session,
    cc_pair: ConnectorCredentialPair,
    source: DocumentSource,
    tenant_id: str,
) -> int:
    """
    Run the hierarchy extraction for a connector.

    Instantiates the connector and calls load_hierarchy() if the connector
    implements HierarchyConnector.

    Returns the total number of hierarchy nodes extracted.
    """
    connector = cc_pair.connector
    credential = cc_pair.credential

    # Instantiate the connector using its configured input type
    runnable_connector = instantiate_connector(
        db_session=db_session,
        source=source,
        input_type=connector.input_type,
        connector_specific_config=connector.connector_specific_config,
        credential=credential,
    )

    # Check if the connector supports hierarchy fetching
    if not isinstance(runnable_connector, HierarchyConnector):
        task_logger.debug(
            f"Connector {source} does not implement HierarchyConnector, skipping"
        )
        return 0

    redis_client = get_redis_client(tenant_id=tenant_id)

    # Ensure the SOURCE-type root node exists before processing hierarchy nodes.
    # This is the root of the hierarchy tree - all other nodes for this source
    # should ultimately have this as an ancestor.
    ensure_source_node_exists(redis_client, db_session, source)

    # Determine time range: start from last hierarchy fetch, end at now
    last_fetch = cc_pair.last_time_hierarchy_fetch
    start_time = last_fetch.timestamp() if last_fetch else 0
    end_time = datetime.now(timezone.utc).timestamp()

    # Check if connector is public - all hierarchy nodes from public connectors
    # should be accessible to all users
    is_connector_public = cc_pair.access_type == AccessType.PUBLIC

    total_nodes = 0
    node_batch: list[PydanticHierarchyNode] = []

    def _process_batch() -> int:
        """Process accumulated hierarchy nodes batch."""
        if not node_batch:
            return 0

        upserted_nodes = upsert_hierarchy_nodes_batch(
            db_session=db_session,
            nodes=node_batch,
            source=source,
            commit=True,
            is_connector_public=is_connector_public,
        )

        upsert_hierarchy_node_cc_pair_entries(
            db_session=db_session,
            hierarchy_node_ids=[n.id for n in upserted_nodes],
            connector_id=cc_pair.connector_id,
            credential_id=cc_pair.credential_id,
            commit=True,
        )

        # Cache in Redis for fast ancestor resolution
        cache_entries = [
            HierarchyNodeCacheEntry.from_db_model(node) for node in upserted_nodes
        ]
        cache_hierarchy_nodes_batch(
            redis_client=redis_client,
            source=source,
            entries=cache_entries,
        )

        count = len(node_batch)
        node_batch.clear()
        return count

    # Fetch hierarchy nodes from the connector
    for node in runnable_connector.load_hierarchy(start=start_time, end=end_time):
        node_batch.append(node)
        if len(node_batch) >= HIERARCHY_NODE_BATCH_SIZE:
            total_nodes += _process_batch()

    # Process any remaining nodes
    total_nodes += _process_batch()

    return total_nodes


@shared_task(
    name=OnyxCeleryTask.CONNECTOR_HIERARCHY_FETCHING_TASK,
    soft_time_limit=3600,  # 1 hour soft limit
    time_limit=3900,  # 1 hour 5 min hard limit
    bind=True,
)
def connector_hierarchy_fetching_task(
    self: Task,  # noqa: ARG001
    *,
    cc_pair_id: int,
    tenant_id: str,
) -> None:
    """Fetch hierarchy information from a connector.

    This task fetches structural information (folders, spaces, pages, etc.)
    from the connector source and stores it in the database.
    """
    task_logger.info(
        f"connector_hierarchy_fetching_task starting: cc_pair={cc_pair_id} tenant={tenant_id}"
    )

    try:
        with get_session_with_current_tenant() as db_session:
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
            )

            if not cc_pair:
                task_logger.warning(
                    f"CC pair not found for hierarchy fetching: cc_pair={cc_pair_id}"
                )
                return

            if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
                task_logger.info(
                    f"Skipping hierarchy fetching for deleting connector: cc_pair={cc_pair_id}"
                )
                return

            source = cc_pair.connector.source
            total_nodes = _run_hierarchy_extraction(
                db_session=db_session,
                cc_pair=cc_pair,
                source=source,
                tenant_id=tenant_id,
            )

            task_logger.info(
                f"connector_hierarchy_fetching_task: Extracted {total_nodes} hierarchy nodes for cc_pair={cc_pair_id}"
            )

            # Update the last fetch time to prevent re-running until next interval
            mark_cc_pair_as_hierarchy_fetched(db_session, cc_pair_id)

    except Exception:
        task_logger.exception(
            f"connector_hierarchy_fetching_task failed: cc_pair={cc_pair_id}"
        )
        raise

    task_logger.info(
        f"connector_hierarchy_fetching_task completed: cc_pair={cc_pair_id}"
    )


================================================
FILE: backend/onyx/background/celery/tasks/llm_model_update/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/llm_model_update/tasks.py
================================================
from celery import shared_task
from celery import Task

from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.llm.well_known_providers.auto_update_service import (
    sync_llm_models_from_github,
)


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_AUTO_LLM_UPDATE,
    ignore_result=True,
    soft_time_limit=300,  # 5 minute timeout
    trail=False,
    bind=True,
)
def check_for_auto_llm_updates(
    self: Task,  # noqa: ARG001
    *,
    tenant_id: str,  # noqa: ARG001
) -> bool | None:
    """Periodic task to fetch LLM model updates from GitHub
    and sync them to providers in Auto mode.

    This task checks the GitHub-hosted config file and updates all
    providers that have is_auto_mode=True.
    """
    if not AUTO_LLM_CONFIG_URL:
        task_logger.debug("AUTO_LLM_CONFIG_URL not configured, skipping")
        return None

    try:
        # Sync to database
        with get_session_with_current_tenant() as db_session:
            results = sync_llm_models_from_github(db_session)

            if results:
                task_logger.info(f"Auto mode sync results: {results}")
            else:
                task_logger.debug("No model updates applied")

    except Exception:
        task_logger.exception("Error in auto LLM update task")
        raise

    return True


================================================
FILE: backend/onyx/background/celery/tasks/models.py
================================================
from enum import Enum

from pydantic import BaseModel


class DocProcessingContext(BaseModel):
    tenant_id: str
    cc_pair_id: int
    search_settings_id: int
    index_attempt_id: int


class IndexingWatchdogTerminalStatus(str, Enum):
    """The different statuses the watchdog can finish with.

    TODO: create broader success/failure/abort categories
    """

    UNDEFINED = "undefined"

    SUCCEEDED = "succeeded"

    SPAWN_FAILED = "spawn_failed"  # connector spawn failed
    SPAWN_NOT_ALIVE = (
        "spawn_not_alive"  # spawn succeeded but process did not come alive
    )

    BLOCKED_BY_DELETION = "blocked_by_deletion"
    BLOCKED_BY_STOP_SIGNAL = "blocked_by_stop_signal"
    FENCE_NOT_FOUND = "fence_not_found"  # fence does not exist
    FENCE_READINESS_TIMEOUT = (
        "fence_readiness_timeout"  # fence exists but wasn't ready within the timeout
    )
    FENCE_MISMATCH = "fence_mismatch"  # task and fence metadata mismatch
    TASK_ALREADY_RUNNING = "task_already_running"  # task appears to be running already
    INDEX_ATTEMPT_MISMATCH = (
        "index_attempt_mismatch"  # expected index attempt metadata not found in db
    )

    CONNECTOR_VALIDATION_ERROR = (
        "connector_validation_error"  # the connector validation failed
    )
    CONNECTOR_EXCEPTIONED = "connector_exceptioned"  # the connector itself exceptioned
    WATCHDOG_EXCEPTIONED = "watchdog_exceptioned"  # the watchdog exceptioned

    # the watchdog received a termination signal
    TERMINATED_BY_SIGNAL = "terminated_by_signal"

    # the watchdog terminated the task due to no activity
    TERMINATED_BY_ACTIVITY_TIMEOUT = "terminated_by_activity_timeout"

    # NOTE: this may actually be the same as SIGKILL, but parsed differently by python
    # consolidate once we know more
    OUT_OF_MEMORY = "out_of_memory"

    PROCESS_SIGNAL_SIGKILL = "process_signal_sigkill"

    @property
    def code(self) -> int:
        _ENUM_TO_CODE: dict[IndexingWatchdogTerminalStatus, int] = {
            IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL: -9,
            IndexingWatchdogTerminalStatus.OUT_OF_MEMORY: 137,
            IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR: 247,
            IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION: 248,
            IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL: 249,
            IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND: 250,
            IndexingWatchdogTerminalStatus.FENCE_READINESS_TIMEOUT: 251,
            IndexingWatchdogTerminalStatus.FENCE_MISMATCH: 252,
            IndexingWatchdogTerminalStatus.TASK_ALREADY_RUNNING: 253,
            IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH: 254,
            IndexingWatchdogTerminalStatus.CONNECTOR_EXCEPTIONED: 255,
        }

        return _ENUM_TO_CODE[self]

    @classmethod
    def from_code(cls, code: int) -> "IndexingWatchdogTerminalStatus":
        _CODE_TO_ENUM: dict[int, IndexingWatchdogTerminalStatus] = {
            -9: IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL,
            137: IndexingWatchdogTerminalStatus.OUT_OF_MEMORY,
            247: IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR,
            248: IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION,
            249: IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL,
            250: IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND,
            251: IndexingWatchdogTerminalStatus.FENCE_READINESS_TIMEOUT,
            252: IndexingWatchdogTerminalStatus.FENCE_MISMATCH,
            253: IndexingWatchdogTerminalStatus.TASK_ALREADY_RUNNING,
            254: IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH,
            255: IndexingWatchdogTerminalStatus.CONNECTOR_EXCEPTIONED,
        }

        if code in _CODE_TO_ENUM:
            return _CODE_TO_ENUM[code]

        return IndexingWatchdogTerminalStatus.UNDEFINED


class SimpleJobResult:
    """The data we want to have when the watchdog finishes"""

    def __init__(self) -> None:
        self.status = IndexingWatchdogTerminalStatus.UNDEFINED
        self.connector_source = None
        self.exit_code = None
        self.exception_str = None

    status: IndexingWatchdogTerminalStatus
    connector_source: str | None
    exit_code: int | None
    exception_str: str | None


================================================
FILE: backend/onyx/background/celery/tasks/monitoring/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/monitoring/tasks.py
================================================
import json
import time
from datetime import timedelta
from itertools import islice
from typing import Any
from typing import cast
from typing import Literal

import psutil
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from pydantic import BaseModel
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy import select
from sqlalchemy import text
from sqlalchemy.orm import Session

from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_get_broker_client
from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
from onyx.background.celery.memory_monitoring import emit_process_memory
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import get_session_with_shared_schema
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.enums import IndexingStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import DocumentSet
from onyx.db.models import IndexAttempt
from onyx.db.models import SyncRecord
from onyx.db.models import UserGroup
from onyx.db.search_settings import get_active_search_settings_list
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.utils.logger import is_running_in_container
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

_MONITORING_SOFT_TIME_LIMIT = 60 * 5  # 5 minutes
_MONITORING_TIME_LIMIT = _MONITORING_SOFT_TIME_LIMIT + 60  # 6 minutes

_CONNECTOR_INDEX_ATTEMPT_START_LATENCY_KEY_FMT = (
    "monitoring_connector_index_attempt_start_latency:{cc_pair_id}:{index_attempt_id}"
)

_CONNECTOR_INDEX_ATTEMPT_RUN_SUCCESS_KEY_FMT = (
    "monitoring_connector_index_attempt_run_success:{cc_pair_id}:{index_attempt_id}"
)

_FINAL_METRIC_KEY_FMT = "sync_final_metrics:{sync_type}:{entity_id}:{sync_record_id}"

_SYNC_START_LATENCY_KEY_FMT = (
    "sync_start_latency:{sync_type}:{entity_id}:{sync_record_id}"
)

_CONNECTOR_START_TIME_KEY_FMT = "connector_start_time:{cc_pair_id}:{index_attempt_id}"
_CONNECTOR_END_TIME_KEY_FMT = "connector_end_time:{cc_pair_id}:{index_attempt_id}"
_SYNC_START_TIME_KEY_FMT = "sync_start_time:{sync_type}:{entity_id}:{sync_record_id}"
_SYNC_END_TIME_KEY_FMT = "sync_end_time:{sync_type}:{entity_id}:{sync_record_id}"


def _mark_metric_as_emitted(redis_std: Redis, key: str) -> None:
    """Mark a metric as having been emitted by setting a Redis key with expiration"""
    redis_std.set(key, "1", ex=24 * 60 * 60)  # Expire after 1 day


def _has_metric_been_emitted(redis_std: Redis, key: str) -> bool:
    """Check if a metric has been emitted by checking for existence of Redis key"""
    return bool(redis_std.exists(key))


class Metric(BaseModel):
    key: (
        str | None
    )  # only required if we need to store that we have emitted this metric
    name: str
    value: Any
    tags: dict[str, str]

    def log(self) -> None:
        """Log the metric in a standardized format"""
        data = {
            "metric": self.name,
            "value": self.value,
            "tags": self.tags,
        }
        task_logger.info(json.dumps(data))

    def emit(self, tenant_id: str) -> None:
        # Convert value to appropriate type based on the input value
        bool_value = None
        float_value = None
        int_value = None
        string_value = None
        # NOTE: have to do bool first, since `isinstance(True, int)` is true
        # e.g. bool is a subclass of int
        if isinstance(self.value, bool):
            bool_value = self.value
        elif isinstance(self.value, int):
            int_value = self.value
        elif isinstance(self.value, float):
            float_value = self.value
        elif isinstance(self.value, str):
            string_value = self.value
        else:
            task_logger.error(
                f"Invalid metric value type: {type(self.value)} ({self.value}) for metric {self.name}."
            )
            return

        # don't send None values over the wire
        data = {
            k: v
            for k, v in {
                "metric_name": self.name,
                "float_value": float_value,
                "int_value": int_value,
                "string_value": string_value,
                "bool_value": bool_value,
                "tags": self.tags,
            }.items()
            if v is not None
        }
        task_logger.info(f"Emitting metric: {data}")
        optional_telemetry(
            record_type=RecordType.METRIC,
            data=data,
            tenant_id=tenant_id,
        )


def _collect_queue_metrics(redis_celery: Redis) -> list[Metric]:
    """Collect metrics about queue lengths for different Celery queues"""
    metrics = []
    queue_mappings = {
        "celery_queue_length": OnyxCeleryQueues.PRIMARY,
        "docprocessing_queue_length": OnyxCeleryQueues.DOCPROCESSING,
        "docfetching_queue_length": OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,
        "sync_queue_length": OnyxCeleryQueues.VESPA_METADATA_SYNC,
        "deletion_queue_length": OnyxCeleryQueues.CONNECTOR_DELETION,
        "pruning_queue_length": OnyxCeleryQueues.CONNECTOR_PRUNING,
        "permissions_sync_queue_length": OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC,
        "external_group_sync_queue_length": OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC,
        "permissions_upsert_queue_length": OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT,
        "hierarchy_fetching_queue_length": OnyxCeleryQueues.CONNECTOR_HIERARCHY_FETCHING,
        "llm_model_update_queue_length": OnyxCeleryQueues.LLM_MODEL_UPDATE,
        "checkpoint_cleanup_queue_length": OnyxCeleryQueues.CHECKPOINT_CLEANUP,
        "index_attempt_cleanup_queue_length": OnyxCeleryQueues.INDEX_ATTEMPT_CLEANUP,
        "csv_generation_queue_length": OnyxCeleryQueues.CSV_GENERATION,
        "user_file_processing_queue_length": OnyxCeleryQueues.USER_FILE_PROCESSING,
        "user_file_project_sync_queue_length": OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
        "user_file_delete_queue_length": OnyxCeleryQueues.USER_FILE_DELETE,
        "monitoring_queue_length": OnyxCeleryQueues.MONITORING,
        "sandbox_queue_length": OnyxCeleryQueues.SANDBOX,
        "opensearch_migration_queue_length": OnyxCeleryQueues.OPENSEARCH_MIGRATION,
    }

    for name, queue in queue_mappings.items():
        metrics.append(
            Metric(
                key=None,
                name=name,
                value=celery_get_queue_length(queue, redis_celery),
                tags={"queue": name},
            )
        )

    return metrics


def _build_connector_start_latency_metric(
    cc_pair: ConnectorCredentialPair,
    recent_attempt: IndexAttempt,
    second_most_recent_attempt: IndexAttempt | None,
    redis_std: Redis,
) -> Metric | None:
    if not recent_attempt.time_started:
        return None

    # check if we already emitted a metric for this index attempt
    metric_key = _CONNECTOR_INDEX_ATTEMPT_START_LATENCY_KEY_FMT.format(
        cc_pair_id=cc_pair.id,
        index_attempt_id=recent_attempt.id,
    )
    if _has_metric_been_emitted(redis_std, metric_key):
        task_logger.info(
            f"Skipping metric for connector {cc_pair.connector.id} "
            f"index attempt {recent_attempt.id} because it has already been "
            "emitted"
        )
        return None

    # Connector start latency
    # first run case - we should start as soon as it's created
    if not second_most_recent_attempt:
        desired_start_time = cc_pair.connector.time_created
    else:
        if not cc_pair.connector.refresh_freq:
            task_logger.debug(
                "Connector has no refresh_freq and this is a non-initial index attempt. "
                "Assuming user manually triggered indexing, so we'll skip start latency metric."
            )
            return None

        desired_start_time = second_most_recent_attempt.time_updated + timedelta(
            seconds=cc_pair.connector.refresh_freq
        )

    start_latency = (recent_attempt.time_started - desired_start_time).total_seconds()

    task_logger.info(
        f"Start latency for index attempt {recent_attempt.id}: {start_latency:.2f}s "
        f"(desired: {desired_start_time}, actual: {recent_attempt.time_started})"
    )

    job_id = build_job_id("connector", str(cc_pair.id), str(recent_attempt.id))

    return Metric(
        key=metric_key,
        name="connector_start_latency",
        value=start_latency,
        tags={
            "job_id": job_id,
            "connector_id": str(cc_pair.connector.id),
            "source": str(cc_pair.connector.source),
        },
    )


def _build_connector_final_metrics(
    cc_pair: ConnectorCredentialPair,
    recent_attempts: list[IndexAttempt],
    redis_std: Redis,
) -> list[Metric]:
    """
    Final metrics for connector index attempts:
      - Boolean success/fail metric
      - If success, emit:
          * duration (seconds)
          * doc_count
    """
    metrics = []
    for attempt in recent_attempts:
        metric_key = _CONNECTOR_INDEX_ATTEMPT_RUN_SUCCESS_KEY_FMT.format(
            cc_pair_id=cc_pair.id,
            index_attempt_id=attempt.id,
        )
        if _has_metric_been_emitted(redis_std, metric_key):
            task_logger.info(
                f"Skipping final metrics for connector {cc_pair.connector.id} index attempt {attempt.id}, already emitted."
            )
            continue

        # We only emit final metrics if the attempt is in a terminal state
        if attempt.status not in [
            IndexingStatus.SUCCESS,
            IndexingStatus.FAILED,
            IndexingStatus.CANCELED,
        ]:
            # Not finished; skip
            continue

        job_id = build_job_id("connector", str(cc_pair.id), str(attempt.id))
        success = attempt.status == IndexingStatus.SUCCESS
        metrics.append(
            Metric(
                key=metric_key,  # We'll mark the same key for any final metrics
                name="connector_run_succeeded",
                value=success,
                tags={
                    "job_id": job_id,
                    "connector_id": str(cc_pair.connector.id),
                    "source": str(cc_pair.connector.source),
                    "status": attempt.status.value,
                },
            )
        )

        if success:
            # Make sure we have valid time_started
            if attempt.time_started and attempt.time_updated:
                duration_seconds = (
                    attempt.time_updated - attempt.time_started
                ).total_seconds()
                metrics.append(
                    Metric(
                        key=None,  # No need for a new key, or you can reuse the same if you prefer
                        name="connector_index_duration_seconds",
                        value=duration_seconds,
                        tags={
                            "job_id": job_id,
                            "connector_id": str(cc_pair.connector.id),
                            "source": str(cc_pair.connector.source),
                        },
                    )
                )
            else:
                task_logger.error(
                    f"Index attempt {attempt.id} succeeded but has missing time "
                    f"(time_started={attempt.time_started}, time_updated={attempt.time_updated})."
                )

            # For doc counts, choose whichever field is more relevant
            doc_count = attempt.total_docs_indexed or 0
            metrics.append(
                Metric(
                    key=None,
                    name="connector_index_doc_count",
                    value=doc_count,
                    tags={
                        "job_id": job_id,
                        "connector_id": str(cc_pair.connector.id),
                        "source": str(cc_pair.connector.source),
                    },
                )
            )

    return metrics


def _collect_connector_metrics(db_session: Session, redis_std: Redis) -> list[Metric]:
    """Collect metrics about connector runs from the past hour"""
    one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)

    # Get all connector credential pairs
    cc_pairs = db_session.scalars(select(ConnectorCredentialPair)).all()
    # Might be more than one search setting, or just one
    active_search_settings_list = get_active_search_settings_list(db_session)

    metrics = []

    # If you want to process each cc_pair against each search setting:
    for cc_pair in cc_pairs:
        for search_settings in active_search_settings_list:
            recent_attempts = (
                db_session.query(IndexAttempt)
                .filter(
                    IndexAttempt.connector_credential_pair_id == cc_pair.id,
                    IndexAttempt.search_settings_id == search_settings.id,
                )
                .order_by(IndexAttempt.time_created.desc())
                .limit(2)
                .all()
            )

            if not recent_attempts:
                continue

            most_recent_attempt = recent_attempts[0]
            second_most_recent_attempt = (
                recent_attempts[1] if len(recent_attempts) > 1 else None
            )

            if one_hour_ago > most_recent_attempt.time_created:
                continue

            # Build a job_id for correlation
            job_id = build_job_id(
                "connector", str(cc_pair.id), str(most_recent_attempt.id)
            )

            # Add raw start time metric if available
            if most_recent_attempt.time_started:
                start_time_key = _CONNECTOR_START_TIME_KEY_FMT.format(
                    cc_pair_id=cc_pair.id,
                    index_attempt_id=most_recent_attempt.id,
                )
                metrics.append(
                    Metric(
                        key=start_time_key,
                        name="connector_start_time",
                        value=most_recent_attempt.time_started.timestamp(),
                        tags={
                            "job_id": job_id,
                            "connector_id": str(cc_pair.connector.id),
                            "source": str(cc_pair.connector.source),
                        },
                    )
                )

            # Add raw end time metric if available and in terminal state
            if (
                most_recent_attempt.status.is_terminal()
                and most_recent_attempt.time_updated
            ):
                end_time_key = _CONNECTOR_END_TIME_KEY_FMT.format(
                    cc_pair_id=cc_pair.id,
                    index_attempt_id=most_recent_attempt.id,
                )
                metrics.append(
                    Metric(
                        key=end_time_key,
                        name="connector_end_time",
                        value=most_recent_attempt.time_updated.timestamp(),
                        tags={
                            "job_id": job_id,
                            "connector_id": str(cc_pair.connector.id),
                            "source": str(cc_pair.connector.source),
                        },
                    )
                )

            # Connector start latency
            start_latency_metric = _build_connector_start_latency_metric(
                cc_pair, most_recent_attempt, second_most_recent_attempt, redis_std
            )

            if start_latency_metric:
                metrics.append(start_latency_metric)

            # Connector run success/failure
            final_metrics = _build_connector_final_metrics(
                cc_pair, recent_attempts, redis_std
            )
            metrics.extend(final_metrics)

    return metrics


def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]:
    """
    Collect metrics for document set and group syncing:
      - Success/failure status
      - Start latency (for doc sets / user groups)
      - Duration & doc count (only if success)
      - Throughput (docs/min) (only if success)
      - Raw start/end times for each sync
    """

    one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)

    # Get all sync records that ended in the last hour
    recent_sync_records = db_session.scalars(
        select(SyncRecord)
        .where(SyncRecord.sync_end_time.isnot(None))
        .where(SyncRecord.sync_end_time >= one_hour_ago)
        .order_by(SyncRecord.sync_end_time.desc())
    ).all()

    task_logger.info(
        f"Collecting sync metrics for {len(recent_sync_records)} sync records"
    )

    metrics = []

    for sync_record in recent_sync_records:
        # Build a job_id for correlation
        job_id = build_job_id("sync_record", str(sync_record.id))

        # Add raw start time metric
        start_time_key = _SYNC_START_TIME_KEY_FMT.format(
            sync_type=sync_record.sync_type,
            entity_id=sync_record.entity_id,
            sync_record_id=sync_record.id,
        )
        metrics.append(
            Metric(
                key=start_time_key,
                name="sync_start_time",
                value=sync_record.sync_start_time.timestamp(),
                tags={
                    "job_id": job_id,
                    "sync_type": str(sync_record.sync_type),
                },
            )
        )

        # Add raw end time metric if available
        if sync_record.sync_end_time:
            end_time_key = _SYNC_END_TIME_KEY_FMT.format(
                sync_type=sync_record.sync_type,
                entity_id=sync_record.entity_id,
                sync_record_id=sync_record.id,
            )
            metrics.append(
                Metric(
                    key=end_time_key,
                    name="sync_end_time",
                    value=sync_record.sync_end_time.timestamp(),
                    tags={
                        "job_id": job_id,
                        "sync_type": str(sync_record.sync_type),
                    },
                )
            )

        # Emit a SUCCESS/FAIL boolean metric
        #    Use a single Redis key to avoid re-emitting final metrics
        final_metric_key = _FINAL_METRIC_KEY_FMT.format(
            sync_type=sync_record.sync_type,
            entity_id=sync_record.entity_id,
            sync_record_id=sync_record.id,
        )
        if not _has_metric_been_emitted(redis_std, final_metric_key):
            # Evaluate success
            sync_succeeded = sync_record.sync_status == SyncStatus.SUCCESS

            metrics.append(
                Metric(
                    key=final_metric_key,
                    name="sync_run_succeeded",
                    value=sync_succeeded,
                    tags={
                        "job_id": job_id,
                        "sync_type": str(sync_record.sync_type),
                        "status": str(sync_record.sync_status),
                    },
                )
            )

            # If successful, emit additional metrics
            if sync_succeeded:
                if sync_record.sync_end_time and sync_record.sync_start_time:
                    duration_seconds = (
                        sync_record.sync_end_time - sync_record.sync_start_time
                    ).total_seconds()
                else:
                    task_logger.error(
                        f"Invalid times for sync record {sync_record.id}: "
                        f"start={sync_record.sync_start_time}, end={sync_record.sync_end_time}"
                    )
                    duration_seconds = None

                doc_count = sync_record.num_docs_synced or 0

                sync_speed = None
                if duration_seconds and duration_seconds > 0:
                    duration_mins = duration_seconds / 60.0
                    sync_speed = (
                        doc_count / duration_mins if duration_mins > 0 else None
                    )

                # Emit duration, doc count, speed
                if duration_seconds is not None:
                    metrics.append(
                        Metric(
                            key=final_metric_key,
                            name="sync_duration_seconds",
                            value=duration_seconds,
                            tags={
                                "job_id": job_id,
                                "sync_type": str(sync_record.sync_type),
                            },
                        )
                    )
                else:
                    task_logger.error(
                        f"Invalid sync record {sync_record.id} with no duration"
                    )

                metrics.append(
                    Metric(
                        key=final_metric_key,
                        name="sync_doc_count",
                        value=doc_count,
                        tags={
                            "job_id": job_id,
                            "sync_type": str(sync_record.sync_type),
                        },
                    )
                )

                if sync_speed is not None:
                    metrics.append(
                        Metric(
                            key=final_metric_key,
                            name="sync_speed_docs_per_min",
                            value=sync_speed,
                            tags={
                                "job_id": job_id,
                                "sync_type": str(sync_record.sync_type),
                            },
                        )
                    )
                else:
                    task_logger.error(
                        f"Invalid sync record {sync_record.id} with no duration"
                    )

        # Emit start latency
        start_latency_key = _SYNC_START_LATENCY_KEY_FMT.format(
            sync_type=sync_record.sync_type,
            entity_id=sync_record.entity_id,
            sync_record_id=sync_record.id,
        )
        if not _has_metric_been_emitted(redis_std, start_latency_key):
            # Get the entity's last update time based on sync type
            entity: DocumentSet | UserGroup | None = None
            if sync_record.sync_type == SyncType.DOCUMENT_SET:
                entity = db_session.scalar(
                    select(DocumentSet).where(DocumentSet.id == sync_record.entity_id)
                )
            elif sync_record.sync_type == SyncType.USER_GROUP:
                entity = db_session.scalar(
                    select(UserGroup).where(UserGroup.id == sync_record.entity_id)
                )
            else:
                # Only user groups and document set sync records have
                #  an associated entity we can use for latency metrics
                continue

            if entity is None:
                task_logger.error(
                    f"Sync record of type {sync_record.sync_type} doesn't have an entity "
                    f"associated with it (id={sync_record.entity_id}). Skipping start latency metric."
                )

            # Calculate start latency in seconds:
            #    (actual sync start) - (last modified time)
            if (
                entity is not None
                and entity.time_last_modified_by_user
                and sync_record.sync_start_time
            ):
                start_latency = (
                    sync_record.sync_start_time - entity.time_last_modified_by_user
                ).total_seconds()

                if start_latency < 0:
                    task_logger.error(
                        f"Negative start latency for sync record {sync_record.id} "
                        f"(start={sync_record.sync_start_time}, entity_modified={entity.time_last_modified_by_user})"
                    )
                    continue

                metrics.append(
                    Metric(
                        key=start_latency_key,
                        name="sync_start_latency_seconds",
                        value=start_latency,
                        tags={
                            "job_id": job_id,
                            "sync_type": str(sync_record.sync_type),
                        },
                    )
                )

    return metrics


def build_job_id(
    job_type: Literal["connector", "sync_record"],
    primary_id: str,
    secondary_id: str | None = None,
) -> str:
    if job_type == "connector":
        if secondary_id is None:
            raise ValueError(
                "secondary_id (attempt_id) is required for connector job_type"
            )
        return f"connector:{primary_id}:attempt:{secondary_id}"
    elif job_type == "sync_record":
        return f"sync_record:{primary_id}"


@shared_task(
    name=OnyxCeleryTask.MONITOR_BACKGROUND_PROCESSES,
    ignore_result=True,
    soft_time_limit=_MONITORING_SOFT_TIME_LIMIT,
    time_limit=_MONITORING_TIME_LIMIT,
    queue=OnyxCeleryQueues.MONITORING,
    bind=True,
)
def monitor_background_processes(self: Task, *, tenant_id: str) -> None:
    """Collect and emit metrics about background processes.
    This task runs periodically to gather metrics about:
    - Queue lengths for different Celery queues
    - Connector run metrics (start latency, success rate)
    - Syncing speed metrics
    - Worker status and task counts
    """
    if tenant_id is not None:
        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

    task_logger.info("Starting background monitoring")
    r = get_redis_client()

    lock_monitoring: RedisLock = r.lock(
        OnyxRedisLocks.MONITOR_BACKGROUND_PROCESSES_LOCK,
        timeout=_MONITORING_SOFT_TIME_LIMIT,
    )

    # these tasks should never overlap
    if not lock_monitoring.acquire(blocking=False):
        task_logger.info("Skipping monitoring task because it is already running")
        return None

    try:
        redis_std = get_redis_client()

        # Collect queue metrics with broker connection
        r_celery = celery_get_broker_client(self.app)
        queue_metrics = _collect_queue_metrics(r_celery)

        # Collect remaining metrics (no broker connection needed)
        with get_session_with_current_tenant() as db_session:
            all_metrics: list[Metric] = queue_metrics
            all_metrics.extend(_collect_connector_metrics(db_session, redis_std))
            all_metrics.extend(_collect_sync_metrics(db_session, redis_std))

            for metric in all_metrics:
                if metric.key is None or not _has_metric_been_emitted(
                    redis_std, metric.key
                ):
                    metric.log()
                    metric.emit(tenant_id)

                if metric.key is not None:
                    _mark_metric_as_emitted(redis_std, metric.key)

        task_logger.info("Successfully collected background metrics")
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception as e:
        task_logger.exception("Error collecting background process metrics")
        raise e
    finally:
        if lock_monitoring.owned():
            lock_monitoring.release()

        task_logger.info("Background monitoring task finished")


@shared_task(
    name=OnyxCeleryTask.CLOUD_MONITOR_ALEMBIC,
)
def cloud_check_alembic() -> bool | None:
    """A task to verify that all tenants are on the same alembic revision.

    This check is expected to fail if a cloud alembic migration is currently running
    across all tenants.

    TODO: have the cloud migration script set an activity signal that this check
    uses to know it doesn't make sense to run a check at the present time.
    """

    # Used as a placeholder if the alembic revision cannot be retrieved
    ALEMBIC_NULL_REVISION = "000000000000"

    time_start = time.monotonic()

    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)

    lock_beat: RedisLock = redis_client.lock(
        OnyxRedisLocks.CLOUD_CHECK_ALEMBIC_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
        return None

    last_lock_time = time.monotonic()

    tenant_to_revision: dict[str, str] = {}
    revision_counts: dict[str, int] = {}
    out_of_date_tenants: dict[str, str] = {}
    top_revision: str = ""
    tenant_ids: list[str] | list[None] = []

    try:
        # map tenant_id to revision (or ALEMBIC_NULL_REVISION if the query fails)
        tenant_ids = get_all_tenant_ids()
        for tenant_id in tenant_ids:
            current_time = time.monotonic()
            if current_time - last_lock_time >= (CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4):
                lock_beat.reacquire()
                last_lock_time = current_time

            if tenant_id is None:
                continue

            with get_session_with_shared_schema() as session:
                try:
                    result = session.execute(
                        text(f'SELECT * FROM "{tenant_id}".alembic_version LIMIT 1')
                    )
                    result_scalar: str | None = result.scalar_one_or_none()
                    if result_scalar is None:
                        raise ValueError("Alembic version should not be None.")

                    tenant_to_revision[tenant_id] = result_scalar
                except Exception:
                    task_logger.error(f"Tenant {tenant_id} has no revision!")
                    tenant_to_revision[tenant_id] = ALEMBIC_NULL_REVISION

        # get the total count of each revision
        for k, v in tenant_to_revision.items():
            revision_counts[v] = revision_counts.get(v, 0) + 1

        # error if any null revision tenants are found
        if ALEMBIC_NULL_REVISION in revision_counts:
            num_null_revisions = revision_counts[ALEMBIC_NULL_REVISION]
            raise ValueError(f"No revision was found for {num_null_revisions} tenants!")

        # get the revision with the most counts
        sorted_revision_counts = sorted(
            revision_counts.items(), key=lambda item: item[1], reverse=True
        )

        if len(sorted_revision_counts) == 0:
            raise ValueError(
                f"cloud_check_alembic - No revisions found for {len(tenant_ids)} tenant ids!"
            )

        top_revision, _ = sorted_revision_counts[0]

        # build a list of out of date tenants
        for k, v in tenant_to_revision.items():
            if v == top_revision:
                continue

            out_of_date_tenants[k] = v

    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
        raise
    except Exception:
        task_logger.exception("Unexpected exception during cloud alembic check")
        raise
    finally:
        if lock_beat.owned():
            lock_beat.release()
        else:
            task_logger.error("cloud_check_alembic - Lock not owned on completion")
            redis_lock_dump(lock_beat, redis_client)

    if len(out_of_date_tenants) > 0:
        task_logger.error(
            f"Found out of date tenants: "
            f"num_out_of_date_tenants={len(out_of_date_tenants)} "
            f"num_tenants={len(tenant_ids)} "
            f"revision={top_revision}"
        )

        num_to_log = min(5, len(out_of_date_tenants))
        task_logger.info(
            f"Logging {num_to_log}/{len(out_of_date_tenants)} out of date tenants."
        )
        for k, v in islice(out_of_date_tenants.items(), 5):
            task_logger.info(f"Out of date tenant: tenant={k} revision={v}")
    else:
        task_logger.info(
            f"All tenants are up to date: num_tenants={len(tenant_ids)} revision={top_revision}"
        )

    time_elapsed = time.monotonic() - time_start
    task_logger.info(
        f"cloud_check_alembic finished: num_tenants={len(tenant_ids)} elapsed={time_elapsed:.2f}"
    )
    return True


@shared_task(
    name=OnyxCeleryTask.CLOUD_MONITOR_CELERY_QUEUES, ignore_result=True, bind=True
)
def cloud_monitor_celery_queues(
    self: Task,
) -> None:
    return monitor_celery_queues_helper(self)


@shared_task(name=OnyxCeleryTask.MONITOR_CELERY_QUEUES, ignore_result=True, bind=True)
def monitor_celery_queues(self: Task, *, tenant_id: str) -> None:  # noqa: ARG001
    return monitor_celery_queues_helper(self)


def monitor_celery_queues_helper(
    task: Task,
) -> None:
    """A task to monitor all celery queue lengths."""

    r_celery = celery_get_broker_client(task.app)
    n_celery = celery_get_queue_length(OnyxCeleryQueues.PRIMARY, r_celery)
    n_docfetching = celery_get_queue_length(
        OnyxCeleryQueues.CONNECTOR_DOC_FETCHING, r_celery
    )
    n_docprocessing = celery_get_queue_length(OnyxCeleryQueues.DOCPROCESSING, r_celery)

    n_user_file_processing = celery_get_queue_length(
        OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery
    )
    n_user_file_project_sync = celery_get_queue_length(
        OnyxCeleryQueues.USER_FILE_PROJECT_SYNC, r_celery
    )
    n_user_file_delete = celery_get_queue_length(
        OnyxCeleryQueues.USER_FILE_DELETE, r_celery
    )
    n_sync = celery_get_queue_length(OnyxCeleryQueues.VESPA_METADATA_SYNC, r_celery)
    n_deletion = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)
    n_pruning = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery)
    n_permissions_sync = celery_get_queue_length(
        OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC, r_celery
    )
    n_external_group_sync = celery_get_queue_length(
        OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery
    )
    n_permissions_upsert = celery_get_queue_length(
        OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery
    )
    n_hierarchy_fetching = celery_get_queue_length(
        OnyxCeleryQueues.CONNECTOR_HIERARCHY_FETCHING, r_celery
    )
    n_llm_model_update = celery_get_queue_length(
        OnyxCeleryQueues.LLM_MODEL_UPDATE, r_celery
    )
    n_checkpoint_cleanup = celery_get_queue_length(
        OnyxCeleryQueues.CHECKPOINT_CLEANUP, r_celery
    )
    n_index_attempt_cleanup = celery_get_queue_length(
        OnyxCeleryQueues.INDEX_ATTEMPT_CLEANUP, r_celery
    )
    n_csv_generation = celery_get_queue_length(
        OnyxCeleryQueues.CSV_GENERATION, r_celery
    )
    n_monitoring = celery_get_queue_length(OnyxCeleryQueues.MONITORING, r_celery)
    n_sandbox = celery_get_queue_length(OnyxCeleryQueues.SANDBOX, r_celery)
    n_opensearch_migration = celery_get_queue_length(
        OnyxCeleryQueues.OPENSEARCH_MIGRATION, r_celery
    )

    n_docfetching_prefetched = celery_get_unacked_task_ids(
        OnyxCeleryQueues.CONNECTOR_DOC_FETCHING, r_celery
    )
    n_docprocessing_prefetched = celery_get_unacked_task_ids(
        OnyxCeleryQueues.DOCPROCESSING, r_celery
    )

    task_logger.info(
        f"Queue lengths: celery={n_celery} "
        f"docfetching={n_docfetching} "
        f"docfetching_prefetched={len(n_docfetching_prefetched)} "
        f"docprocessing={n_docprocessing} "
        f"docprocessing_prefetched={len(n_docprocessing_prefetched)} "
        f"user_file_processing={n_user_file_processing} "
        f"user_file_project_sync={n_user_file_project_sync} "
        f"user_file_delete={n_user_file_delete} "
        f"sync={n_sync} "
        f"deletion={n_deletion} "
        f"pruning={n_pruning} "
        f"permissions_sync={n_permissions_sync} "
        f"external_group_sync={n_external_group_sync} "
        f"permissions_upsert={n_permissions_upsert} "
        f"hierarchy_fetching={n_hierarchy_fetching} "
        f"llm_model_update={n_llm_model_update} "
        f"checkpoint_cleanup={n_checkpoint_cleanup} "
        f"index_attempt_cleanup={n_index_attempt_cleanup} "
        f"csv_generation={n_csv_generation} "
        f"monitoring={n_monitoring} "
        f"sandbox={n_sandbox} "
        f"opensearch_migration={n_opensearch_migration} "
    )


"""Memory monitoring"""


def _get_cmdline_for_process(process: psutil.Process) -> str | None:
    try:
        return " ".join(process.cmdline())
    except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
        return None


@shared_task(
    name=OnyxCeleryTask.MONITOR_PROCESS_MEMORY,
    ignore_result=True,
    soft_time_limit=_MONITORING_SOFT_TIME_LIMIT,
    time_limit=_MONITORING_TIME_LIMIT,
    queue=OnyxCeleryQueues.MONITORING,
    bind=True,
)
def monitor_process_memory(self: Task, *, tenant_id: str) -> None:  # noqa: ARG001
    """
    Task to monitor memory usage of supervisor-managed processes.
    This periodically checks the memory usage of processes and logs information
    in a standardized format.

    The task looks for processes managed by supervisor and logs their
    memory usage statistics. This is useful for monitoring memory consumption
    over time and identifying potential memory leaks.
    """
    # don't run this task in multi-tenant mode, have other, better means of monitoring
    if MULTI_TENANT:
        return

    # Skip memory monitoring if not in container
    if not is_running_in_container():
        return

    try:
        # Get all supervisor-managed processes
        supervisor_processes: dict[int, str] = {}

        # Map cmd line elements to more readable process names
        process_type_mapping = {
            "--hostname=primary": "primary",
            "--hostname=light": "light",
            "--hostname=heavy": "heavy",
            "--hostname=indexing": "indexing",
            "--hostname=monitoring": "monitoring",
            "beat": "beat",
            "slack/listener.py": "slack",
        }

        # Find all python processes that are likely celery workers
        for proc in psutil.process_iter():
            cmdline = _get_cmdline_for_process(proc)
            if not cmdline:
                continue

            # Match supervisor-managed processes
            for process_name, process_type in process_type_mapping.items():
                if process_name in cmdline:
                    if process_type in supervisor_processes.values():
                        task_logger.error(
                            f"Duplicate process type for type {process_type} with cmd {cmdline} with pid={proc.pid}."
                        )
                        continue

                    supervisor_processes[proc.pid] = process_type
                    break

        if len(supervisor_processes) != len(process_type_mapping):
            task_logger.error(
                f"Missing processes: {set(process_type_mapping.keys()).symmetric_difference(supervisor_processes.values())}"
            )

        # Log memory usage for each process
        for pid, process_type in supervisor_processes.items():
            try:
                emit_process_memory(pid, process_type, {})
            except psutil.NoSuchProcess:
                # Process may have terminated since we obtained the list
                continue
            except Exception as e:
                task_logger.exception(f"Error monitoring process {pid}: {str(e)}")

    except Exception:
        task_logger.exception("Error in monitor_process_memory task")


@shared_task(
    name=OnyxCeleryTask.CLOUD_MONITOR_CELERY_PIDBOX, ignore_result=True, bind=True
)
def cloud_monitor_celery_pidbox(
    self: Task,
) -> None:
    """
    Celery can leave behind orphaned pidboxes from old workers that are idle and never cleaned up.
    This task removes them based on idle time to avoid Redis clutter and overflowing the instance.
    This is a real issue we've observed in production.

    Note:
    - Setting CELERY_ENABLE_REMOTE_CONTROL = False would prevent pidbox keys entirely,
    but might also disable features like inspect, broadcast, and worker remote control.
    Use with caution.
    """

    num_deleted = 0

    MAX_PIDBOX_IDLE = 24 * 3600  # 1 day in seconds
    r_celery = celery_get_broker_client(self.app)
    for key in r_celery.scan_iter("*.reply.celery.pidbox"):
        key_bytes = cast(bytes, key)
        key_str = key_bytes.decode("utf-8")
        if key_str.startswith("_kombu"):
            continue

        idletime_raw = r_celery.object("idletime", key)
        if idletime_raw is None:
            continue

        idletime = cast(int, idletime_raw)
        if idletime < MAX_PIDBOX_IDLE:
            continue

        r_celery.delete(key)
        task_logger.info(
            f"Deleted idle pidbox: pidbox={key_str} idletime={idletime} max_idletime={MAX_PIDBOX_IDLE}"
        )
        num_deleted += 1

    # Enable later in case we want some aggregate metrics
    # task_logger.info(f"Deleted idle pidbox: pidbox={key_str}")


================================================
FILE: backend/onyx/background/celery/tasks/opensearch_migration/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/opensearch_migration/constants.py
================================================
# Tasks are expected to cease execution and do cleanup after the soft time
# limit. In principle they are also forceably terminated after the hard time
# limit, in practice this does not happen since we use threadpools for Celery
# task execution, and we simple hope that the total task time plus cleanup does
# not exceed this. Therefore tasks should regularly check their timeout and lock
# status. The lock timeout is the maximum time the lock manager (Redis in this
# case) will enforce the lock, independent of what is happening in the task. To
# reduce the chances that a task is still doing work while a lock has expired,
# make the lock timeout well above the task timeouts. In practice we should
# never see locks be held for this long anyway because a task should release the
# lock after its cleanup which happens at most after its soft timeout.

# Constants corresponding to migrate_documents_from_vespa_to_opensearch_task.
from onyx.configs.app_configs import OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE


MIGRATION_TASK_SOFT_TIME_LIMIT_S = 60 * 5  # 5 minutes.
MIGRATION_TASK_TIME_LIMIT_S = 60 * 6  # 6 minutes.
# The maximum time the lock can be held for. Will automatically be released
# after this time.
MIGRATION_TASK_LOCK_TIMEOUT_S = 60 * 7  # 7 minutes.
assert (
    MIGRATION_TASK_SOFT_TIME_LIMIT_S < MIGRATION_TASK_TIME_LIMIT_S
), "The soft time limit must be less than the time limit."
assert (
    MIGRATION_TASK_TIME_LIMIT_S < MIGRATION_TASK_LOCK_TIMEOUT_S
), "The time limit must be less than the lock timeout."
# Time to wait to acquire the lock.
MIGRATION_TASK_LOCK_BLOCKING_TIMEOUT_S = 60 * 2  # 2 minutes.

# Constants corresponding to check_for_documents_for_opensearch_migration_task.
CHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S = 60  # 60 seconds / 1 minute.
CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S = 90  # 90 seconds.
# The maximum time the lock can be held for. Will automatically be released
# after this time.
CHECK_FOR_DOCUMENTS_TASK_LOCK_TIMEOUT_S = 120  # 120 seconds / 2 minutes.
assert (
    CHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S < CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S
), "The soft time limit must be less than the time limit."
assert (
    CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S < CHECK_FOR_DOCUMENTS_TASK_LOCK_TIMEOUT_S
), "The time limit must be less than the lock timeout."
# Time to wait to acquire the lock.
CHECK_FOR_DOCUMENTS_TASK_LOCK_BLOCKING_TIMEOUT_S = 30  # 30 seconds.

TOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE = 15

# WARNING: Do not change these values without knowing what changes also need to
# be made to OpenSearchTenantMigrationRecord.
GET_VESPA_CHUNKS_PAGE_SIZE = OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE
GET_VESPA_CHUNKS_SLICE_COUNT = 4

# String used to indicate in the vespa_visit_continuation_token mapping that the
# slice has finished and there is nothing left to visit.
FINISHED_VISITING_SLICE_CONTINUATION_TOKEN = (
    "FINISHED_VISITING_SLICE_CONTINUATION_TOKEN"
)


================================================
FILE: backend/onyx/background/celery/tasks/opensearch_migration/tasks.py
================================================
"""Celery tasks for migrating documents from Vespa to OpenSearch."""

import time
import traceback

from celery import shared_task
from celery import Task
from redis.lock import Lock as RedisLock

from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.tasks.opensearch_migration.constants import (
    FINISHED_VISITING_SLICE_CONTINUATION_TOKEN,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
    GET_VESPA_CHUNKS_PAGE_SIZE,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
    MIGRATION_TASK_LOCK_BLOCKING_TIMEOUT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
    MIGRATION_TASK_LOCK_TIMEOUT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
    MIGRATION_TASK_SOFT_TIME_LIMIT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
    MIGRATION_TASK_TIME_LIMIT_S,
)
from onyx.background.celery.tasks.opensearch_migration.transformer import (
    transform_vespa_chunks_to_opensearch_chunks,
)
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.opensearch_migration import build_sanitized_to_original_doc_id_mapping
from onyx.db.opensearch_migration import get_vespa_visit_state
from onyx.db.opensearch_migration import is_migration_completed
from onyx.db.opensearch_migration import (
    mark_migration_completed_time_if_not_set_with_commit,
)
from onyx.db.opensearch_migration import (
    try_insert_opensearch_tenant_migration_record_with_commit,
)
from onyx.db.opensearch_migration import update_vespa_visit_progress_with_commit
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchDocumentIndex,
)
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.indexing.models import IndexingSetting
from onyx.redis.redis_pool import get_redis_client
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id


def is_continuation_token_done_for_all_slices(
    continuation_token_map: dict[int, str | None],
) -> bool:
    return all(
        continuation_token == FINISHED_VISITING_SLICE_CONTINUATION_TOKEN
        for continuation_token in continuation_token_map.values()
    )


# shared_task allows this task to be shared across celery app instances.
@shared_task(
    name=OnyxCeleryTask.MIGRATE_CHUNKS_FROM_VESPA_TO_OPENSEARCH_TASK,
    # Does not store the task's return value in the result backend.
    ignore_result=True,
    # WARNING: This is here just for rigor but since we use threads for Celery
    # this config is not respected and timeout logic must be implemented in the
    # task.
    soft_time_limit=MIGRATION_TASK_SOFT_TIME_LIMIT_S,
    # WARNING: This is here just for rigor but since we use threads for Celery
    # this config is not respected and timeout logic must be implemented in the
    # task.
    time_limit=MIGRATION_TASK_TIME_LIMIT_S,
    # Passed in self to the task to get task metadata.
    bind=True,
)
def migrate_chunks_from_vespa_to_opensearch_task(
    self: Task,  # noqa: ARG001
    *,
    tenant_id: str,
) -> bool | None:
    """
    Periodic task to migrate chunks from Vespa to OpenSearch via the Visit API.

    Uses Vespa's Visit API to iterate through ALL chunks in bulk (not
    per-document), transform them, and index them into OpenSearch. Progress is
    tracked via a continuation token map stored in the
    OpenSearchTenantMigrationRecord.

    The first time we see no continuation token map and non-zero chunks
    migrated, we consider the migration complete and all subsequent invocations
    are no-ops.

    We divide the index into GET_VESPA_CHUNKS_SLICE_COUNT independent slices
    where progress is tracked for each slice.

    Returns:
        None if OpenSearch migration is not enabled, or if the lock could not be
            acquired; effectively a no-op. True if the task completed
            successfully. False if the task errored.
    """
    # 1. Check if we should run the task.
    # 1.a. If OpenSearch indexing is disabled, we don't run the task.
    if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
        task_logger.warning(
            "OpenSearch migration is not enabled, skipping chunk migration task."
        )
        return None
    task_logger.info("Starting chunk-level migration from Vespa to OpenSearch.")
    task_start_time = time.monotonic()

    # 1.b. Only one instance per tenant of this task may run concurrently at
    # once. If we fail to acquire a lock, we assume it is because another task
    # has one and we exit.
    r = get_redis_client()
    lock: RedisLock = r.lock(
        name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
        # The maximum time the lock can be held for. Will automatically be
        # released after this time.
        timeout=MIGRATION_TASK_LOCK_TIMEOUT_S,
        # .acquire will block until the lock is acquired.
        blocking=True,
        # Time to wait to acquire the lock.
        blocking_timeout=MIGRATION_TASK_LOCK_BLOCKING_TIMEOUT_S,
    )
    if not lock.acquire():
        task_logger.warning(
            "The OpenSearch migration task timed out waiting for the lock."
        )
        return None
    else:
        task_logger.info(
            f"Acquired the OpenSearch migration lock. Took {time.monotonic() - task_start_time:.3f} seconds. "
            f"Token: {lock.local.token}"
        )

    # 2. Prepare to migrate.
    total_chunks_migrated_this_task = 0
    total_chunks_errored_this_task = 0
    try:
        # 2.a. Double-check that tenant info is correct.
        if tenant_id != get_current_tenant_id():
            err_str = (
                f"Tenant ID mismatch in the OpenSearch migration task: "
                f"{tenant_id} != {get_current_tenant_id()}. This should never happen."
            )
            task_logger.error(err_str)
            return False

        # Do as much as we can with a DB session in one spot to not hold a
        # session during a migration batch.
        with get_session_with_current_tenant() as db_session:
            # 2.b. Immediately check to see if this tenant is done, to save
            # having to do any other work. This function does not require a
            # migration record to necessarily exist.
            if is_migration_completed(db_session):
                return True

            # 2.c. Try to insert the OpenSearchTenantMigrationRecord table if it
            # does not exist.
            try_insert_opensearch_tenant_migration_record_with_commit(db_session)

            # 2.d. Get search settings.
            search_settings = get_current_search_settings(db_session)
            indexing_setting = IndexingSetting.from_db_model(search_settings)

            # 2.e. Build sanitized to original doc ID mapping to check for
            # conflicts in the event we sanitize a doc ID to an
            # already-existing doc ID.
            # We reconstruct this mapping for every task invocation because
            # a document may have been added in the time between two tasks.
            sanitized_doc_start_time = time.monotonic()
            sanitized_to_original_doc_id_mapping = (
                build_sanitized_to_original_doc_id_mapping(db_session)
            )
            task_logger.debug(
                f"Built sanitized_to_original_doc_id_mapping with {len(sanitized_to_original_doc_id_mapping)} entries "
                f"in {time.monotonic() - sanitized_doc_start_time:.3f} seconds."
            )

            # 2.f. Get the current migration state.
            continuation_token_map, total_chunks_migrated = get_vespa_visit_state(
                db_session
            )
            # 2.f.1. Double-check that the migration state does not imply
            # completion. Really we should never have to enter this block as we
            # would expect is_migration_completed to return True, but in the
            # strange event that the migration is complete but the migration
            # completed time was never stamped, we do so here.
            if is_continuation_token_done_for_all_slices(continuation_token_map):
                task_logger.info(
                    f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
                )
                mark_migration_completed_time_if_not_set_with_commit(db_session)
                return True
        task_logger.debug(
            f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
            f"Continuation token map: {continuation_token_map}"
        )

        with get_vespa_http_client(
            timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
        ) as vespa_client:
            # 2.g. Create the OpenSearch and Vespa document indexes.
            tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
            opensearch_document_index = OpenSearchDocumentIndex(
                tenant_state=tenant_state,
                index_name=search_settings.index_name,
                embedding_dim=indexing_setting.final_embedding_dim,
                embedding_precision=indexing_setting.embedding_precision,
            )
            vespa_document_index = VespaDocumentIndex(
                index_name=search_settings.index_name,
                tenant_state=tenant_state,
                large_chunks_enabled=False,
                httpx_client=vespa_client,
            )

            # 2.h. Get the approximate chunk count in Vespa as of this time to
            # update the migration record.
            approx_chunk_count_in_vespa: int | None = None
            get_chunk_count_start_time = time.monotonic()
            try:
                approx_chunk_count_in_vespa = vespa_document_index.get_chunk_count()
            except Exception:
                # This failure should not be blocking.
                task_logger.exception(
                    "Error getting approximate chunk count in Vespa. Moving on..."
                )
            task_logger.debug(
                f"Took {time.monotonic() - get_chunk_count_start_time:.3f} seconds to attempt to get "
                f"approximate chunk count in Vespa. Got {approx_chunk_count_in_vespa}."
            )

            # 3. Do the actual migration in batches until we run out of time.
            while (
                time.monotonic() - task_start_time < MIGRATION_TASK_SOFT_TIME_LIMIT_S
                and lock.owned()
            ):
                # 3.a. Get the next batch of raw chunks from Vespa.
                get_vespa_chunks_start_time = time.monotonic()
                raw_vespa_chunks, next_continuation_token_map = (
                    vespa_document_index.get_all_raw_document_chunks_paginated(
                        continuation_token_map=continuation_token_map,
                        page_size=GET_VESPA_CHUNKS_PAGE_SIZE,
                    )
                )
                task_logger.debug(
                    f"Read {len(raw_vespa_chunks)} chunks from Vespa in {time.monotonic() - get_vespa_chunks_start_time:.3f} "
                    f"seconds. Next continuation token map: {next_continuation_token_map}"
                )

                # 3.b. Transform the raw chunks to OpenSearch chunks in memory.
                opensearch_document_chunks, errored_chunks = (
                    transform_vespa_chunks_to_opensearch_chunks(
                        raw_vespa_chunks,
                        tenant_state,
                        sanitized_to_original_doc_id_mapping,
                    )
                )
                if len(opensearch_document_chunks) != len(raw_vespa_chunks):
                    task_logger.error(
                        f"Migration task error: Number of candidate chunks to migrate ({len(opensearch_document_chunks)}) does "
                        f"not match number of chunks in Vespa ({len(raw_vespa_chunks)}). {len(errored_chunks)} chunks "
                        "errored."
                    )

                # 3.c. Index the OpenSearch chunks into OpenSearch.
                index_opensearch_chunks_start_time = time.monotonic()
                opensearch_document_index.index_raw_chunks(
                    chunks=opensearch_document_chunks
                )
                task_logger.debug(
                    f"Indexed {len(opensearch_document_chunks)} chunks into OpenSearch in "
                    f"{time.monotonic() - index_opensearch_chunks_start_time:.3f} seconds."
                )

                total_chunks_migrated_this_task += len(opensearch_document_chunks)
                total_chunks_errored_this_task += len(errored_chunks)

                # Do as much as we can with a DB session in one spot to not hold a
                # session during a migration batch.
                with get_session_with_current_tenant() as db_session:
                    # 3.d. Update the migration state.
                    update_vespa_visit_progress_with_commit(
                        db_session,
                        continuation_token_map=next_continuation_token_map,
                        chunks_processed=len(opensearch_document_chunks),
                        chunks_errored=len(errored_chunks),
                        approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,
                    )

                    # 3.e. Get the current migration state. Even thought we
                    # technically have it in-memory since we just wrote it, we
                    # want to reference the DB as the source of truth at all
                    # times.
                    continuation_token_map, total_chunks_migrated = (
                        get_vespa_visit_state(db_session)
                    )
                    # 3.e.1. Check if the migration is done.
                    if is_continuation_token_done_for_all_slices(
                        continuation_token_map
                    ):
                        task_logger.info(
                            f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
                        )
                        mark_migration_completed_time_if_not_set_with_commit(db_session)
                        return True
                task_logger.debug(
                    f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
                    f"Continuation token map: {continuation_token_map}"
                )
    except Exception:
        traceback.print_exc()
        task_logger.exception("Error in the OpenSearch migration task.")
        return False
    finally:
        if lock.owned():
            lock.release()
        else:
            task_logger.warning(
                "The OpenSearch migration lock was not owned on completion of the migration task."
            )

    task_logger.info(
        f"OpenSearch chunk migration task pausing (time limit reached). "
        f"Total chunks migrated this task: {total_chunks_migrated_this_task}. "
        f"Total chunks errored this task: {total_chunks_errored_this_task}. "
        f"Elapsed: {time.monotonic() - task_start_time:.3f}s. "
        "Will resume from continuation token on next invocation."
    )

    return True


================================================
FILE: backend/onyx/background/celery/tasks/opensearch_migration/transformer.py
================================================
import traceback
from datetime import datetime
from datetime import timezone
from typing import Any

from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import BLURB
from onyx.document_index.vespa_constants import BOOST
from onyx.document_index.vespa_constants import CHUNK_CONTEXT
from onyx.document_index.vespa_constants import CHUNK_ID
from onyx.document_index.vespa_constants import CONTENT
from onyx.document_index.vespa_constants import DOC_SUMMARY
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_ID
from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import EMBEDDINGS
from onyx.document_index.vespa_constants import FULL_CHUNK_EMBEDDING_KEY
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import METADATA_SUFFIX
from onyx.document_index.vespa_constants import PERSONAS
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import TITLE_EMBEDDING
from onyx.document_index.vespa_constants import USER_PROJECT
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger(__name__)


FIELDS_NEEDED_FOR_TRANSFORMATION: list[str] = [
    DOCUMENT_ID,
    CHUNK_ID,
    TITLE,
    TITLE_EMBEDDING,
    CONTENT,
    EMBEDDINGS,
    SOURCE_TYPE,
    METADATA_LIST,
    DOC_UPDATED_AT,
    HIDDEN,
    BOOST,
    SEMANTIC_IDENTIFIER,
    IMAGE_FILE_NAME,
    SOURCE_LINKS,
    BLURB,
    DOC_SUMMARY,
    CHUNK_CONTEXT,
    METADATA_SUFFIX,
    DOCUMENT_SETS,
    USER_PROJECT,
    PERSONAS,
    PRIMARY_OWNERS,
    SECONDARY_OWNERS,
    ACCESS_CONTROL_LIST,
]
if MULTI_TENANT:
    FIELDS_NEEDED_FOR_TRANSFORMATION.append(TENANT_ID)


def _extract_content_vector(embeddings: Any) -> list[float]:
    """Extracts the full chunk embedding vector from Vespa's embeddings tensor.

    Vespa stores embeddings as a tensor<float>(t{},x[dim]) where 't' maps
    embedding names (like "full_chunk") to vectors. The API can return this in
    different formats:
    1. Direct list: {"full_chunk": [...]}
    2. Blocks format: {"blocks": {"full_chunk": [0.1, 0.2, ...]}}
    3. Possibly other formats.

    We only support formats 1 and 2. Any other supplied format will raise an
    error.

    Raises:
        ValueError: If the embeddings format is not supported.

    Returns:
        The full chunk content embedding vector as a list of floats.
    """
    if isinstance(embeddings, dict):
        # Handle format 1.
        full_chunk_embedding = embeddings.get(FULL_CHUNK_EMBEDDING_KEY)
        if isinstance(full_chunk_embedding, list):
            # Double check that within the list we have floats and not another
            # list or dict.
            if not full_chunk_embedding:
                raise ValueError("Full chunk embedding is empty.")
            if isinstance(full_chunk_embedding[0], float):
                return full_chunk_embedding

        # Handle format 2.
        blocks = embeddings.get("blocks")
        if isinstance(blocks, dict):
            full_chunk_embedding = blocks.get(FULL_CHUNK_EMBEDDING_KEY)
            if isinstance(full_chunk_embedding, list):
                # Double check that within the list we have floats and not another
                # list or dict.
                if not full_chunk_embedding:
                    raise ValueError("Full chunk embedding is empty.")
                if isinstance(full_chunk_embedding[0], float):
                    return full_chunk_embedding

    raise ValueError(f"Unknown embedding format: {type(embeddings)}")


def _extract_title_vector(title_embedding: Any | None) -> list[float] | None:
    """Extract the title embedding vector.

    Returns None if no title embedding exists.

    Vespa returns title_embedding as tensor<float>(x[dim]) which can be in
    formats:
    1. Direct list: [0.1, 0.2, ...]
    2. Values format: {"values": [0.1, 0.2, ...]}
    3. Possibly other formats.

    Only formats 1 and 2 are supported. Any other supplied format will raise an
    error.

    Raises:
        ValueError: If the title embedding format is not supported.

    Returns:
        The title embedding vector as a list of floats.
    """
    if title_embedding is None:
        return None

    # Handle format 1.
    if isinstance(title_embedding, list):
        # Double check that within the list we have floats and not another
        # list or dict.
        if not title_embedding:
            return None
        if isinstance(title_embedding[0], float):
            return title_embedding

    # Handle format 2.
    if isinstance(title_embedding, dict):
        # Try values format.
        values = title_embedding.get("values")
        if values is not None and isinstance(values, list):
            # Double check that within the list we have floats and not another
            # list or dict.
            if not values:
                return None
            if isinstance(values[0], float):
                return values

    raise ValueError(f"Unknown title embedding format: {type(title_embedding)}")


def _transform_vespa_document_sets_to_opensearch_document_sets(
    vespa_document_sets: dict[str, int] | None,
) -> list[str] | None:
    if not vespa_document_sets:
        return None
    return list(vespa_document_sets.keys())


def _transform_vespa_acl_to_opensearch_acl(
    vespa_acl: dict[str, int] | None,
) -> tuple[bool, list[str]]:
    if not vespa_acl:
        return False, []
    acl_list = list(vespa_acl.keys())
    is_public = PUBLIC_DOC_PAT in acl_list
    if is_public:
        acl_list.remove(PUBLIC_DOC_PAT)
    return is_public, acl_list


def transform_vespa_chunks_to_opensearch_chunks(
    vespa_chunks: list[dict[str, Any]],
    tenant_state: TenantState,
    sanitized_to_original_doc_id_mapping: dict[str, str],
) -> tuple[list[DocumentChunk], list[dict[str, Any]]]:
    result: list[DocumentChunk] = []
    errored_chunks: list[dict[str, Any]] = []
    for vespa_chunk in vespa_chunks:
        try:
            # This should exist; fail loudly if it does not.
            vespa_document_id: str = vespa_chunk[DOCUMENT_ID]
            if not vespa_document_id:
                raise ValueError("Missing document_id in Vespa chunk.")
            # Vespa doc IDs were sanitized using
            # replace_invalid_doc_id_characters. This was a poor design choice
            # and we don't want this in OpenSearch; whatever restrictions there
            # may be on indexed chunk ID should have no bearing on the chunk's
            # document ID field, even if document ID is an argument to the chunk
            # ID. Deliberately choose to use the real doc ID supplied to this
            # function.
            if vespa_document_id in sanitized_to_original_doc_id_mapping:
                logger.warning(
                    f"Migration warning: Vespa document ID {vespa_document_id} does not match the document ID supplied "
                    f"{sanitized_to_original_doc_id_mapping[vespa_document_id]}. "
                    "The Vespa ID will be discarded."
                )
            document_id = sanitized_to_original_doc_id_mapping.get(
                vespa_document_id, vespa_document_id
            )

            # This should exist; fail loudly if it does not.
            chunk_index: int = vespa_chunk[CHUNK_ID]

            title: str | None = vespa_chunk.get(TITLE)
            # WARNING: Should supply format.tensors=short-value to the Vespa
            # client in order to get a supported format for the tensors.
            title_vector: list[float] | None = _extract_title_vector(
                vespa_chunk.get(TITLE_EMBEDDING)
            )

            # This should exist; fail loudly if it does not.
            content: str = vespa_chunk[CONTENT]
            if not content:
                raise ValueError(
                    f"Missing content in Vespa chunk with document ID {vespa_document_id} and chunk index {chunk_index}."
                )
            # This should exist; fail loudly if it does not.
            # WARNING: Should supply format.tensors=short-value to the Vespa
            # client in order to get a supported format for the tensors.
            content_vector: list[float] = _extract_content_vector(
                vespa_chunk[EMBEDDINGS]
            )
            if not content_vector:
                raise ValueError(
                    f"Missing content_vector in Vespa chunk with document ID {vespa_document_id} and chunk index {chunk_index}."
                )

            # This should exist; fail loudly if it does not.
            source_type: str = vespa_chunk[SOURCE_TYPE]
            if not source_type:
                raise ValueError(
                    f"Missing source_type in Vespa chunk with document ID {vespa_document_id} and chunk index {chunk_index}."
                )

            metadata_list: list[str] | None = vespa_chunk.get(METADATA_LIST)

            _raw_doc_updated_at: int | None = vespa_chunk.get(DOC_UPDATED_AT)
            last_updated: datetime | None = (
                datetime.fromtimestamp(_raw_doc_updated_at, tz=timezone.utc)
                if _raw_doc_updated_at is not None
                else None
            )

            hidden: bool = vespa_chunk.get(HIDDEN, False)

            # This should exist; fail loudly if it does not.
            global_boost: int = vespa_chunk[BOOST]

            # This should exist; fail loudly if it does not.
            semantic_identifier: str = vespa_chunk[SEMANTIC_IDENTIFIER]
            if not semantic_identifier:
                raise ValueError(
                    f"Missing semantic_identifier in Vespa chunk with document ID {vespa_document_id} and chunk "
                    f"index {chunk_index}."
                )

            image_file_id: str | None = vespa_chunk.get(IMAGE_FILE_NAME)
            source_links: str | None = vespa_chunk.get(SOURCE_LINKS)
            blurb: str = vespa_chunk.get(BLURB, "")
            doc_summary: str = vespa_chunk.get(DOC_SUMMARY, "")
            chunk_context: str = vespa_chunk.get(CHUNK_CONTEXT, "")
            metadata_suffix: str | None = vespa_chunk.get(METADATA_SUFFIX)
            document_sets: list[str] | None = (
                _transform_vespa_document_sets_to_opensearch_document_sets(
                    vespa_chunk.get(DOCUMENT_SETS)
                )
            )
            user_projects: list[int] | None = vespa_chunk.get(USER_PROJECT)
            personas: list[int] | None = vespa_chunk.get(PERSONAS)
            primary_owners: list[str] | None = vespa_chunk.get(PRIMARY_OWNERS)
            secondary_owners: list[str] | None = vespa_chunk.get(SECONDARY_OWNERS)

            is_public, acl_list = _transform_vespa_acl_to_opensearch_acl(
                vespa_chunk.get(ACCESS_CONTROL_LIST)
            )
            if not is_public and not acl_list:
                logger.warning(
                    f"Migration warning: Vespa chunk with document ID {vespa_document_id} and chunk index {chunk_index} has no "
                    "public ACL and no access control list. This does not make sense as it implies the document is never "
                    "searchable. Continuing with the migration..."
                )

            chunk_tenant_id: str | None = vespa_chunk.get(TENANT_ID)
            if MULTI_TENANT:
                if not chunk_tenant_id:
                    raise ValueError(
                        "Missing tenant_id in Vespa chunk in a multi-tenant environment."
                    )
                if chunk_tenant_id != tenant_state.tenant_id:
                    raise ValueError(
                        f"Chunk tenant_id {chunk_tenant_id} does not match expected tenant_id {tenant_state.tenant_id}"
                    )

            opensearch_chunk = DocumentChunk(
                # We deliberately choose to use the doc ID supplied to this function
                # over the Vespa doc ID.
                document_id=document_id,
                chunk_index=chunk_index,
                title=title,
                title_vector=title_vector,
                content=content,
                content_vector=content_vector,
                source_type=source_type,
                metadata_list=metadata_list,
                last_updated=last_updated,
                public=is_public,
                access_control_list=acl_list,
                hidden=hidden,
                global_boost=global_boost,
                semantic_identifier=semantic_identifier,
                image_file_id=image_file_id,
                source_links=source_links,
                blurb=blurb,
                doc_summary=doc_summary,
                chunk_context=chunk_context,
                metadata_suffix=metadata_suffix,
                document_sets=document_sets,
                user_projects=user_projects,
                personas=personas,
                primary_owners=primary_owners,
                secondary_owners=secondary_owners,
                tenant_id=tenant_state,
            )

            result.append(opensearch_chunk)
        except Exception:
            traceback.print_exc()
            logger.exception(
                f"Migration error: Error transforming Vespa chunk with document ID {vespa_chunk.get(DOCUMENT_ID)} "
                f"and chunk index {vespa_chunk.get(CHUNK_ID)} into an OpenSearch chunk. Continuing with "
                "the migration..."
            )
            errored_chunks.append(vespa_chunk)

    return result, errored_chunks


================================================
FILE: backend/onyx/background/celery/tasks/periodic/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/periodic/tasks.py
================================================
#####
# Periodic Tasks
#####
import json
from typing import Any

from celery import shared_task
from celery.contrib.abortable import AbortableTask  # type: ignore
from celery.exceptions import TaskRevokedError
from sqlalchemy import inspect
from sqlalchemy import text
from sqlalchemy.orm import Session

from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import PostgresAdvisoryLocks
from onyx.db.engine.sql_engine import get_session_with_current_tenant


@shared_task(
    name=OnyxCeleryTask.KOMBU_MESSAGE_CLEANUP_TASK,
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
    base=AbortableTask,
)
def kombu_message_cleanup_task(self: Any, tenant_id: str) -> int:  # noqa: ARG001
    """Runs periodically to clean up the kombu_message table"""

    # we will select messages older than this amount to clean up
    KOMBU_MESSAGE_CLEANUP_AGE = 7  # days
    KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT = 1000

    ctx = {}
    ctx["last_processed_id"] = 0
    ctx["deleted"] = 0
    ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE
    ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT
    with get_session_with_current_tenant() as db_session:
        # Exit the task if we can't take the advisory lock
        result = db_session.execute(
            text("SELECT pg_try_advisory_lock(:id)"),
            {"id": PostgresAdvisoryLocks.KOMBU_MESSAGE_CLEANUP_LOCK_ID.value},
        ).scalar()
        if not result:
            return 0

        while True:
            if self.is_aborted():
                raise TaskRevokedError("kombu_message_cleanup_task was aborted.")

            b = kombu_message_cleanup_task_helper(ctx, db_session)
            if not b:
                break

            db_session.commit()

    if ctx["deleted"] > 0:
        task_logger.info(
            f"Deleted {ctx['deleted']} orphaned messages from kombu_message."
        )

    return ctx["deleted"]


def kombu_message_cleanup_task_helper(ctx: dict, db_session: Session) -> bool:
    """
    Helper function to clean up old messages from the `kombu_message` table that are no longer relevant.

    This function retrieves messages from the `kombu_message` table that are no longer visible and
    older than a specified interval. It checks if the corresponding task_id exists in the
    `celery_taskmeta` table. If the task_id does not exist, the message is deleted.

    Args:
        ctx (dict): A context dictionary containing configuration parameters such as:
            - 'cleanup_age' (int): The age in days after which messages are considered old.
            - 'page_limit' (int): The maximum number of messages to process in one batch.
            - 'last_processed_id' (int): The ID of the last processed message to handle pagination.
            - 'deleted' (int): A counter to track the number of deleted messages.
        db_session (Session): The SQLAlchemy database session for executing queries.

    Returns:
        bool: Returns True if there are more rows to process, False if not.
    """

    inspector = inspect(db_session.bind)
    if not inspector:
        return False

    # With the move to redis as celery's broker and backend, kombu tables may not even exist.
    # We can fail silently.
    if not inspector.has_table("kombu_message"):
        return False

    query = text(
        """
    SELECT id, timestamp, payload
    FROM kombu_message WHERE visible = 'false'
    AND timestamp < CURRENT_TIMESTAMP - INTERVAL :interval_days
    AND id > :last_processed_id
    ORDER BY id
    LIMIT :page_limit
"""
    )
    kombu_messages = db_session.execute(
        query,
        {
            "interval_days": f"{ctx['cleanup_age']} days",
            "page_limit": ctx["page_limit"],
            "last_processed_id": ctx["last_processed_id"],
        },
    ).fetchall()

    if len(kombu_messages) == 0:
        return False

    for msg in kombu_messages:
        payload = json.loads(msg[2])
        task_id = payload["headers"]["id"]

        # Check if task_id exists in celery_taskmeta
        task_exists = db_session.execute(
            text("SELECT 1 FROM celery_taskmeta WHERE task_id = :task_id"),
            {"task_id": task_id},
        ).fetchone()

        # If task_id does not exist, delete the message
        if not task_exists:
            result = db_session.execute(
                text("DELETE FROM kombu_message WHERE id = :message_id"),
                {"message_id": msg[0]},
            )
            if result.rowcount > 0:  # type: ignore
                ctx["deleted"] += 1

        ctx["last_processed_id"] = msg[0]

    return True


================================================
FILE: backend/onyx/background/celery/tasks/pruning/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/pruning/tasks.py
================================================
import time
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from typing import cast
from uuid import uuid4

from celery import Celery
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from pydantic import ValidationError
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_find_task
from onyx.background.celery.celery_redis import celery_get_broker_client
from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_redis import celery_get_queued_task_ids
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
from onyx.background.celery.celery_utils import extract_ids_from_runnable_connector
from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
from onyx.background.celery.tasks.docprocessing.utils import IndexingCallbackBase
from onyx.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import OnyxRedisSignals
from onyx.connectors.factory import instantiate_connector
from onyx.connectors.models import InputType
from onyx.db.connector import mark_ccpair_as_pruned
from onyx.db.connector_credential_pair import get_connector_credential_pair
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.document import get_documents_for_connector_credential_pair
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.hierarchy import delete_orphaned_hierarchy_nodes
from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
from onyx.db.hierarchy import remove_stale_hierarchy_node_cc_pair_entries
from onyx.db.hierarchy import reparent_orphaned_hierarchy_nodes
from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
from onyx.db.hierarchy import upsert_hierarchy_node_cc_pair_entries
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import HierarchyNode as DBHierarchyNode
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.db.tag import delete_orphan_tags__no_commit
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_connector_prune import RedisConnectorPrunePayload
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import evict_hierarchy_nodes_from_cache
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.server.runtime.onyx_runtime import OnyxRuntime
from onyx.server.utils import make_short_id
from onyx.utils.logger import format_error_for_logging
from onyx.utils.logger import LoggerContextVars
from onyx.utils.logger import pruning_ctx
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


def _get_pruning_block_expiration() -> int:
    """
    Compute the expiration time for the pruning block signal.
    Base expiration is 60 seconds (1 minute), multiplied by the beat multiplier only in MULTI_TENANT mode.
    """
    base_expiration = 60  # seconds

    if not MULTI_TENANT:
        return base_expiration

    try:
        beat_multiplier = OnyxRuntime.get_beat_multiplier()
    except Exception:
        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT

    return int(base_expiration * beat_multiplier)


def _get_fence_validation_block_expiration() -> int:
    """
    Compute the expiration time for the fence validation block signal.
    Base expiration is 300 seconds, multiplied by the beat multiplier only in MULTI_TENANT mode.
    """
    base_expiration = 300  # seconds

    if not MULTI_TENANT:
        return base_expiration

    try:
        beat_multiplier = OnyxRuntime.get_beat_multiplier()
    except Exception:
        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT

    return int(base_expiration * beat_multiplier)


class PruneCallback(IndexingCallbackBase):
    def progress(self, tag: str, amount: int) -> None:
        self.redis_connector.prune.set_active()
        super().progress(tag, amount)


def _resolve_and_update_document_parents(
    db_session: Session,
    redis_client: Redis,
    source: DocumentSource,
    raw_id_to_parent: dict[str, str | None],
) -> None:
    """Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id for
    each document and bulk-update the DB. Mirrors the resolution logic in
    run_docfetching.py."""
    source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)

    resolved: dict[str, int | None] = {}
    for doc_id, raw_parent_id in raw_id_to_parent.items():
        if raw_parent_id is None:
            continue
        node_id, found = get_node_id_from_raw_id(redis_client, source, raw_parent_id)
        resolved[doc_id] = node_id if found else source_node_id

    if not resolved:
        return

    update_document_parent_hierarchy_nodes(
        db_session=db_session,
        doc_parent_map=resolved,
        commit=True,
    )
    task_logger.info(
        f"Pruning: resolved and updated parent hierarchy for {len(resolved)} documents (source={source.value})"
    )


"""Jobs / utils for kicking off pruning tasks."""


def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
    """Returns boolean indicating if pruning is due.

    Next pruning time is calculated as a delta from the last successful prune, or the
    last successful indexing if pruning has never succeeded.

    TODO(rkuo): consider whether we should allow pruning to be immediately rescheduled
    if pruning fails (which is what it does now). A backoff could be reasonable.
    """

    # skip pruning if no prune frequency is set
    # pruning can still be forced via the API which will run a pruning task directly
    if not cc_pair.connector.prune_freq:
        return False

    # skip pruning if not active
    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:
        return False

    # skip pruning if the next scheduled prune time hasn't been reached yet
    last_pruned = cc_pair.last_pruned
    if not last_pruned:
        if not cc_pair.last_successful_index_time:
            # if we've never indexed, we can't prune
            return False

        # if never pruned, use the connector creation time. We could also
        # compute the completion time of the first successful index attempt, but
        # that is a reasonably heavy operation. This is a reasonable approximation —
        # in the worst case, we'll prune a little bit earlier than we should.
        last_pruned = cc_pair.connector.time_created

    next_prune = last_pruned + timedelta(seconds=cc_pair.connector.prune_freq)
    return datetime.now(timezone.utc) >= next_prune


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_PRUNING,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
)
def check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:
    r = get_redis_client()
    r_replica = get_redis_replica_client()

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_PRUNE_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
        return None

    try:
        # the entire task needs to run frequently in order to finalize pruning

        # but pruning only kicks off once per hour
        if not r.exists(OnyxRedisSignals.BLOCK_PRUNING):
            task_logger.info("Checking for pruning due")

            cc_pair_ids: list[int] = []
            with get_session_with_current_tenant() as db_session:
                cc_pairs = get_connector_credential_pairs(db_session)
                for cc_pair_entry in cc_pairs:
                    cc_pair_ids.append(cc_pair_entry.id)

            for cc_pair_id in cc_pair_ids:
                lock_beat.reacquire()
                with get_session_with_current_tenant() as db_session:
                    cc_pair = get_connector_credential_pair_from_id(
                        db_session=db_session,
                        cc_pair_id=cc_pair_id,
                    )
                    if not cc_pair:
                        logger.error(f"CC pair not found: {cc_pair_id}")
                        continue

                    if not _is_pruning_due(cc_pair):
                        logger.info(f"CC pair not due for pruning: {cc_pair_id}")
                        continue

                    payload_id = try_creating_prune_generator_task(
                        self.app, cc_pair, db_session, r, tenant_id
                    )
                    if not payload_id:
                        logger.info(f"Pruning not created: {cc_pair_id}")
                        continue

                    task_logger.info(
                        f"Pruning queued: cc_pair={cc_pair.id} id={payload_id}"
                    )
            r.set(OnyxRedisSignals.BLOCK_PRUNING, 1, ex=_get_pruning_block_expiration())

        # we want to run this less frequently than the overall task
        lock_beat.reacquire()
        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_PRUNING_FENCES):
            # clear any permission fences that don't have associated celery tasks in progress
            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
            # or be currently executing
            try:
                r_celery = celery_get_broker_client(self.app)
                validate_pruning_fences(tenant_id, r, r_replica, r_celery, lock_beat)
            except Exception:
                task_logger.exception("Exception while validating pruning fences")

            r.set(
                OnyxRedisSignals.BLOCK_VALIDATE_PRUNING_FENCES,
                1,
                ex=_get_fence_validation_block_expiration(),
            )

        # use a lookup table to find active fences. We still have to verify the fence
        # exists since it is an optimization and not the source of truth.
        lock_beat.reacquire()
        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
        for key in keys:
            key_bytes = cast(bytes, key)

            if not r.exists(key_bytes):
                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
                continue

            key_str = key_bytes.decode("utf-8")
            if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
                with get_session_with_current_tenant() as db_session:
                    monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception as e:
        error_msg = format_error_for_logging(e)
        task_logger.warning(f"Unexpected pruning check exception: {error_msg}")
        task_logger.exception("Unexpected exception during pruning check")
    finally:
        if lock_beat.owned():
            lock_beat.release()
    task_logger.info(f"check_for_pruning finished: tenant={tenant_id}")
    return True


def try_creating_prune_generator_task(
    celery_app: Celery,
    cc_pair: ConnectorCredentialPair,
    db_session: Session,
    r: Redis,
    tenant_id: str,
) -> str | None:
    """Checks for any conditions that should block the pruning generator task from being
    created, then creates the task.

    Does not check for scheduling related conditions as this function
    is used to trigger prunes immediately, e.g. via the web ui.
    """

    logger.info(f"try_creating_prune_generator_task: cc_pair={cc_pair.id}")

    redis_connector = RedisConnector(tenant_id, cc_pair.id)

    if not ALLOW_SIMULTANEOUS_PRUNING:
        count = redis_connector.prune.get_active_task_count()
        if count > 0:
            logger.info(
                f"try_creating_prune_generator_task: cc_pair={cc_pair.id} no simultaneous pruning allowed"
            )
            return None

    LOCK_TIMEOUT = 30

    # we need to serialize starting pruning since it can be triggered either via
    # celery beat or manually (API call)
    lock: RedisLock = r.lock(
        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_creating_prune_generator_task",
        timeout=LOCK_TIMEOUT,
    )

    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)
    if not acquired:
        logger.info(
            f"try_creating_prune_generator_task: cc_pair={cc_pair.id} lock not acquired"
        )
        return None

    try:
        # skip pruning if already pruning
        if redis_connector.prune.fenced:
            logger.info(
                f"try_creating_prune_generator_task: cc_pair={cc_pair.id} already pruning"
            )
            return None

        # skip pruning if the cc_pair is deleting
        if redis_connector.delete.fenced:
            logger.info(
                f"try_creating_prune_generator_task: cc_pair={cc_pair.id} deleting"
            )
            return None

        # skip pruning if doc permissions sync is running
        if redis_connector.permissions.fenced:
            logger.info(
                f"try_creating_prune_generator_task: cc_pair={cc_pair.id} permissions sync running"
            )
            return None

        db_session.refresh(cc_pair)
        if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
            logger.info(
                f"try_creating_prune_generator_task: cc_pair={cc_pair.id} deleting"
            )
            return None

        # add a long running generator task to the queue
        redis_connector.prune.generator_clear()
        redis_connector.prune.taskset_clear()

        custom_task_id = f"{redis_connector.prune.generator_task_key}_{uuid4()}"

        # create before setting fence to avoid race condition where the monitoring
        # task updates the sync record before it is created
        try:
            insert_sync_record(
                db_session=db_session,
                entity_id=cc_pair.id,
                sync_type=SyncType.PRUNING,
            )
        except Exception:
            task_logger.exception("insert_sync_record exceptioned.")

        # signal active before the fence is set
        redis_connector.prune.set_active()

        # set a basic fence to start
        payload = RedisConnectorPrunePayload(
            id=make_short_id(),
            submitted=datetime.now(timezone.utc),
            started=None,
            celery_task_id=None,
        )
        redis_connector.prune.set_fence(payload)

        result = celery_app.send_task(
            OnyxCeleryTask.CONNECTOR_PRUNING_GENERATOR_TASK,
            kwargs=dict(
                cc_pair_id=cc_pair.id,
                connector_id=cc_pair.connector_id,
                credential_id=cc_pair.credential_id,
                tenant_id=tenant_id,
            ),
            queue=OnyxCeleryQueues.CONNECTOR_PRUNING,
            task_id=custom_task_id,
            priority=OnyxCeleryPriority.LOW,
        )

        # fill in the celery task id
        payload.celery_task_id = result.id
        redis_connector.prune.set_fence(payload)

        payload_id = payload.id
    except Exception as e:
        error_msg = format_error_for_logging(e)
        task_logger.warning(
            f"Unexpected try_creating_prune_generator_task exception: cc_pair={cc_pair.id} {error_msg}"
        )
        task_logger.exception(f"Unexpected exception: cc_pair={cc_pair.id}")
        return None
    finally:
        if lock.owned():
            lock.release()
    task_logger.info(
        f"try_creating_prune_generator_task finished: cc_pair={cc_pair.id} payload_id={payload_id}"
    )
    return payload_id


@shared_task(
    name=OnyxCeleryTask.CONNECTOR_PRUNING_GENERATOR_TASK,
    acks_late=False,
    soft_time_limit=JOB_TIMEOUT,
    track_started=True,
    trail=False,
    bind=True,
)
def connector_pruning_generator_task(
    self: Task,
    cc_pair_id: int,
    connector_id: int,
    credential_id: int,
    tenant_id: str,
) -> None:
    """connector pruning task. For a cc pair, this task pulls all document IDs from the source
    and compares those IDs to locally stored documents and deletes all locally stored IDs missing
    from the most recently pulled document ID list"""

    payload_id: str | None = None

    LoggerContextVars.reset()

    pruning_ctx_dict = pruning_ctx.get()
    pruning_ctx_dict["cc_pair_id"] = cc_pair_id
    pruning_ctx_dict["request_id"] = self.request.id
    pruning_ctx.set(pruning_ctx_dict)

    task_logger.info(f"Pruning generator starting: cc_pair={cc_pair_id}")

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    r = get_redis_client()

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
    # before the primary worker can finalize the fence
    start = time.monotonic()
    while True:
        if time.monotonic() - start > CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT:
            raise ValueError(
                f"connector_prune_generator_task - timed out waiting for fence to be ready: "
                f"fence={redis_connector.prune.fence_key}"
            )

        if not redis_connector.prune.fenced:  # The fence must exist
            raise ValueError(
                f"connector_prune_generator_task - fence not found: fence={redis_connector.prune.fence_key}"
            )

        payload = redis_connector.prune.payload  # The payload must exist
        if not payload:
            raise ValueError(
                "connector_prune_generator_task: payload invalid or not found"
            )

        if payload.celery_task_id is None:
            logger.info(
                f"connector_prune_generator_task - Waiting for fence: fence={redis_connector.prune.fence_key}"
            )
            time.sleep(1)
            continue

        payload_id = payload.id

        logger.info(
            f"connector_prune_generator_task - Fence found, continuing...: "
            f"fence={redis_connector.prune.fence_key} "
            f"payload_id={payload.id}"
        )
        break

    # set thread_local=False since we don't control what thread the indexing/pruning
    # might run our callback with
    lock: RedisLock = r.lock(
        OnyxRedisLocks.PRUNING_LOCK_PREFIX + f"_{redis_connector.cc_pair_id}",
        timeout=CELERY_PRUNING_LOCK_TIMEOUT,
        thread_local=False,
    )

    acquired = lock.acquire(blocking=False)
    if not acquired:
        task_logger.warning(
            f"Pruning task already running, exiting...: cc_pair={cc_pair_id}"
        )
        return None

    try:
        with get_session_with_current_tenant() as db_session:
            cc_pair = get_connector_credential_pair(
                db_session=db_session,
                connector_id=connector_id,
                credential_id=credential_id,
            )

            if not cc_pair:
                task_logger.warning(
                    f"cc_pair not found for {connector_id} {credential_id}"
                )
                return

            payload = redis_connector.prune.payload
            if not payload:
                raise ValueError(f"No fence payload found: cc_pair={cc_pair_id}")

            new_payload = RedisConnectorPrunePayload(
                id=payload.id,
                submitted=payload.submitted,
                started=datetime.now(timezone.utc),
                celery_task_id=payload.celery_task_id,
            )
            redis_connector.prune.set_fence(new_payload)

            task_logger.info(
                f"Pruning generator running connector: cc_pair={cc_pair_id} connector_source={cc_pair.connector.source}"
            )

            runnable_connector = instantiate_connector(
                db_session,
                cc_pair.connector.source,
                InputType.SLIM_RETRIEVAL,
                cc_pair.connector.connector_specific_config,
                cc_pair.credential,
            )

            callback = PruneCallback(
                0,
                redis_connector,
                lock,
                r,
                timeout_seconds=JOB_TIMEOUT,
            )

            # Extract docs and hierarchy nodes from the source
            extraction_result = extract_ids_from_runnable_connector(
                runnable_connector, callback
            )
            all_connector_doc_ids = extraction_result.raw_id_to_parent

            # Process hierarchy nodes (same as docfetching):
            # upsert to Postgres and cache in Redis
            source = cc_pair.connector.source
            redis_client = get_redis_client(tenant_id=tenant_id)

            ensure_source_node_exists(redis_client, db_session, source)

            upserted_nodes: list[DBHierarchyNode] = []
            if extraction_result.hierarchy_nodes:
                is_connector_public = cc_pair.access_type == AccessType.PUBLIC

                upserted_nodes = upsert_hierarchy_nodes_batch(
                    db_session=db_session,
                    nodes=extraction_result.hierarchy_nodes,
                    source=source,
                    commit=True,
                    is_connector_public=is_connector_public,
                )

                upsert_hierarchy_node_cc_pair_entries(
                    db_session=db_session,
                    hierarchy_node_ids=[n.id for n in upserted_nodes],
                    connector_id=connector_id,
                    credential_id=credential_id,
                    commit=True,
                )

                cache_entries = [
                    HierarchyNodeCacheEntry.from_db_model(node)
                    for node in upserted_nodes
                ]
                cache_hierarchy_nodes_batch(
                    redis_client=redis_client,
                    source=source,
                    entries=cache_entries,
                )

                task_logger.info(
                    f"Pruning: persisted and cached {len(extraction_result.hierarchy_nodes)} "
                    f"hierarchy nodes for cc_pair={cc_pair_id}"
                )

            # Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id
            # and bulk-update documents, mirroring the docfetching resolution
            _resolve_and_update_document_parents(
                db_session=db_session,
                redis_client=redis_client,
                source=source,
                raw_id_to_parent=all_connector_doc_ids,
            )

            # Link hierarchy nodes to documents for sources where pages can be
            # both hierarchy nodes AND documents (e.g. Notion, Confluence)
            all_doc_id_list = list(all_connector_doc_ids.keys())
            link_hierarchy_nodes_to_documents(
                db_session=db_session,
                document_ids=all_doc_id_list,
                source=source,
                commit=True,
            )

            # a list of docs in our local index
            all_indexed_document_ids = {
                doc.id
                for doc in get_documents_for_connector_credential_pair(
                    db_session=db_session,
                    connector_id=connector_id,
                    credential_id=credential_id,
                )
            }

            # generate list of docs to remove (no longer in the source)
            doc_ids_to_remove = list(
                all_indexed_document_ids - all_connector_doc_ids.keys()
            )

            task_logger.info(
                "Pruning set collected: "
                f"cc_pair={cc_pair_id} "
                f"connector_source={cc_pair.connector.source} "
                f"docs_to_remove={len(doc_ids_to_remove)}"
            )

            task_logger.info(
                f"RedisConnector.prune.generate_tasks starting. cc_pair={cc_pair_id}"
            )
            tasks_generated = redis_connector.prune.generate_tasks(
                set(doc_ids_to_remove), self.app, db_session, None
            )
            if tasks_generated is None:
                return None

            task_logger.info(
                f"RedisConnector.prune.generate_tasks finished. cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
            )

            redis_connector.prune.generator_complete = tasks_generated

            # --- Hierarchy node pruning ---
            live_node_ids = {n.id for n in upserted_nodes}
            stale_removed = remove_stale_hierarchy_node_cc_pair_entries(
                db_session=db_session,
                connector_id=connector_id,
                credential_id=credential_id,
                live_hierarchy_node_ids=live_node_ids,
                commit=True,
            )
            deleted_raw_ids = delete_orphaned_hierarchy_nodes(
                db_session=db_session,
                source=source,
                commit=True,
            )
            reparented_nodes = reparent_orphaned_hierarchy_nodes(
                db_session=db_session,
                source=source,
                commit=True,
            )
            if deleted_raw_ids:
                evict_hierarchy_nodes_from_cache(redis_client, source, deleted_raw_ids)
            if reparented_nodes:
                reparented_cache_entries = [
                    HierarchyNodeCacheEntry.from_db_model(node)
                    for node in reparented_nodes
                ]
                cache_hierarchy_nodes_batch(
                    redis_client, source, reparented_cache_entries
                )
            if stale_removed or deleted_raw_ids or reparented_nodes:
                task_logger.info(
                    f"Hierarchy node pruning: cc_pair={cc_pair_id} "
                    f"stale_entries_removed={stale_removed} "
                    f"nodes_deleted={len(deleted_raw_ids)} "
                    f"nodes_reparented={len(reparented_nodes)}"
                )
    except Exception as e:
        task_logger.exception(
            f"Pruning exceptioned: cc_pair={cc_pair_id} connector={connector_id} payload_id={payload_id}"
        )

        redis_connector.prune.reset()
        raise e
    finally:
        if lock.owned():
            lock.release()

    task_logger.info(
        f"Pruning generator finished: cc_pair={cc_pair_id} payload_id={payload_id}"
    )


"""Monitoring pruning utils"""


def monitor_ccpair_pruning_taskset(
    tenant_id: str,
    key_bytes: bytes,
    r: Redis,  # noqa: ARG001
    db_session: Session,
) -> None:
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
        task_logger.warning(
            f"monitor_ccpair_pruning_taskset: could not parse cc_pair_id from {fence_key}"
        )
        return

    cc_pair_id = int(cc_pair_id_str)

    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    if not redis_connector.prune.fenced:
        return

    initial = redis_connector.prune.generator_complete
    if initial is None:
        return

    remaining = redis_connector.prune.get_remaining()
    task_logger.info(
        f"Connector pruning progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
    )
    if remaining > 0:
        return

    mark_ccpair_as_pruned(int(cc_pair_id), db_session)
    task_logger.info(
        f"Connector pruning finished: cc_pair={cc_pair_id} num_pruned={initial}"
    )

    update_sync_record_status(
        db_session=db_session,
        entity_id=cc_pair_id,
        sync_type=SyncType.PRUNING,
        sync_status=SyncStatus.SUCCESS,
        num_docs_synced=initial,
    )

    delete_orphan_tags__no_commit(db_session)

    redis_connector.prune.taskset_clear()
    redis_connector.prune.generator_clear()
    redis_connector.prune.set_fence(None)


def validate_pruning_fences(
    tenant_id: str,
    r: Redis,
    r_replica: Redis,
    r_celery: Redis,
    lock_beat: RedisLock,
) -> None:
    # building lookup table can be expensive, so we won't bother
    # validating until the queue is small
    PERMISSION_SYNC_VALIDATION_MAX_QUEUE_LEN = 1024

    queue_len = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)
    if queue_len > PERMISSION_SYNC_VALIDATION_MAX_QUEUE_LEN:
        return

    # the queue for a single pruning generator task
    reserved_generator_tasks = celery_get_unacked_task_ids(
        OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery
    )

    # the queue for a reasonably large set of lightweight deletion tasks
    queued_upsert_tasks = celery_get_queued_task_ids(
        OnyxCeleryQueues.CONNECTOR_DELETION, r_celery
    )

    # Use replica for this because the worst thing that happens
    # is that we don't run the validation on this pass
    keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
    for key in keys:
        key_bytes = cast(bytes, key)
        key_str = key_bytes.decode("utf-8")
        if not key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
            continue

        validate_pruning_fence(
            tenant_id,
            key_bytes,
            reserved_generator_tasks,
            queued_upsert_tasks,
            r,
            r_celery,
        )

        lock_beat.reacquire()

    return


def validate_pruning_fence(
    tenant_id: str,
    key_bytes: bytes,
    reserved_tasks: set[str],
    queued_tasks: set[str],
    r: Redis,
    r_celery: Redis,
) -> None:
    """See validate_indexing_fence for an overall idea of validation flows.

    queued_tasks: the celery queue of lightweight permission sync tasks
    reserved_tasks: prefetched tasks for sync task generator
    """
    # if the fence doesn't exist, there's nothing to do
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
        task_logger.warning(
            f"validate_pruning_fence - could not parse id from {fence_key}"
        )
        return

    cc_pair_id = int(cc_pair_id_str)
    # parse out metadata and initialize the helper class with it
    redis_connector = RedisConnector(tenant_id, int(cc_pair_id))

    # check to see if the fence/payload exists
    if not redis_connector.prune.fenced:
        return

    # in the cloud, the payload format may have changed ...
    # it's a little sloppy, but just reset the fence for now if that happens
    # TODO: add intentional cleanup/abort logic
    try:
        payload = redis_connector.prune.payload
    except ValidationError:
        task_logger.exception(
            "validate_pruning_fence - "
            "Resetting fence because fence schema is out of date: "
            f"cc_pair={cc_pair_id} "
            f"fence={fence_key}"
        )

        redis_connector.prune.reset()
        return

    if not payload:
        return

    if not payload.celery_task_id:
        return

    # OK, there's actually something for us to validate

    # either the generator task must be in flight or its subtasks must be
    found = celery_find_task(
        payload.celery_task_id,
        OnyxCeleryQueues.CONNECTOR_PRUNING,
        r_celery,
    )
    if found:
        # the celery task exists in the redis queue
        redis_connector.prune.set_active()
        return

    if payload.celery_task_id in reserved_tasks:
        # the celery task was prefetched and is reserved within a worker
        redis_connector.prune.set_active()
        return

    # look up every task in the current taskset in the celery queue
    # every entry in the taskset should have an associated entry in the celery task queue
    # because we get the celery tasks first, the entries in our own pruning taskset
    # should be roughly a subset of the tasks in celery

    # this check isn't very exact, but should be sufficient over a period of time
    # A single successful check over some number of attempts is sufficient.

    # TODO: if the number of tasks in celery is much lower than than the taskset length
    # we might be able to shortcut the lookup since by definition some of the tasks
    # must not exist in celery.

    tasks_scanned = 0
    tasks_not_in_celery = 0  # a non-zero number after completing our check is bad

    for member in r.sscan_iter(redis_connector.prune.taskset_key):
        tasks_scanned += 1

        member_bytes = cast(bytes, member)
        member_str = member_bytes.decode("utf-8")
        if member_str in queued_tasks:
            continue

        if member_str in reserved_tasks:
            continue

        tasks_not_in_celery += 1

    task_logger.info(
        f"validate_pruning_fence task check: tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
    )

    # we're active if there are still tasks to run and those tasks all exist in celery
    if tasks_scanned > 0 and tasks_not_in_celery == 0:
        redis_connector.prune.set_active()
        return

    # we may want to enable this check if using the active task list somehow isn't good enough
    # if redis_connector_index.generator_locked():
    #     logger.info(f"{payload.celery_task_id} is currently executing.")

    # if we get here, we didn't find any direct indication that the associated celery tasks exist,
    # but they still might be there due to gaps in our ability to check states during transitions
    # Checking the active signal safeguards us against these transition periods
    # (which has a duration that allows us to bridge those gaps)
    if redis_connector.prune.active():
        return

    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.
    task_logger.warning(
        "validate_pruning_fence - "
        "Resetting fence because no associated celery tasks were found: "
        f"cc_pair={cc_pair_id} "
        f"fence={fence_key} "
        f"payload_id={payload.id}"
    )

    redis_connector.prune.reset()
    return


================================================
FILE: backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py
================================================
import httpx
from tenacity import retry
from tenacity import retry_if_exception_type
from tenacity import stop_after_delay
from tenacity import wait_random_exponential

from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.document_index.interfaces import VespaDocumentUserFields


class RetryDocumentIndex:
    """A wrapper class to help with specific retries against Vespa involving
    read timeouts.

    wait_random_exponential implements full jitter as per this article:
    https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/"""

    MAX_WAIT = 30

    # STOP_AFTER + MAX_WAIT should be slightly less (5?) than the celery soft_time_limit
    STOP_AFTER = 70

    def __init__(self, index: DocumentIndex):
        self.index: DocumentIndex = index

    @retry(
        retry=retry_if_exception_type(httpx.ReadTimeout),
        wait=wait_random_exponential(multiplier=1, max=MAX_WAIT),
        stop=stop_after_delay(STOP_AFTER),
    )
    def delete_single(
        self,
        doc_id: str,
        *,
        tenant_id: str,
        chunk_count: int | None,
    ) -> int:
        return self.index.delete_single(
            doc_id,
            tenant_id=tenant_id,
            chunk_count=chunk_count,
        )

    @retry(
        retry=retry_if_exception_type(httpx.ReadTimeout),
        wait=wait_random_exponential(multiplier=1, max=MAX_WAIT),
        stop=stop_after_delay(STOP_AFTER),
    )
    def update_single(
        self,
        doc_id: str,
        *,
        tenant_id: str,
        chunk_count: int | None,
        fields: VespaDocumentFields | None,
        user_fields: VespaDocumentUserFields | None,
    ) -> None:
        self.index.update_single(
            doc_id,
            tenant_id=tenant_id,
            chunk_count=chunk_count,
            fields=fields,
            user_fields=user_fields,
        )


================================================
FILE: backend/onyx/background/celery/tasks/shared/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/shared/tasks.py
================================================
import time
from enum import Enum
from http import HTTPStatus

import httpx
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from redis import Redis
from tenacity import RetryError

from onyx.access.access import get_access_for_document
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
from onyx.configs.constants import ONYX_CELERY_BEAT_HEARTBEAT_KEY
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.document import delete_document_by_connector_credential_pair__no_commit
from onyx.db.document import delete_documents_complete__no_commit
from onyx.db.document import fetch_chunk_count_for_document
from onyx.db.document import get_document
from onyx.db.document import get_document_connector_count
from onyx.db.document import mark_document_as_modified
from onyx.db.document import mark_document_as_synced
from onyx.db.document_set import fetch_document_sets_for_document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.relationships import delete_document_references_from_kg
from onyx.db.search_settings import get_active_search_settings
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.models import ConnectorCredentialPairIdentifier

DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES = 3


# 5 seconds more than RetryDocumentIndex STOP_AFTER+MAX_WAIT
LIGHT_SOFT_TIME_LIMIT = 105
LIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15


class OnyxCeleryTaskCompletionStatus(str, Enum):
    """The different statuses the watchdog can finish with.

    TODO: create broader success/failure/abort categories
    """

    UNDEFINED = "undefined"

    SUCCEEDED = "succeeded"

    SKIPPED = "skipped"

    SOFT_TIME_LIMIT = "soft_time_limit"

    NON_RETRYABLE_EXCEPTION = "non_retryable_exception"
    RETRYABLE_EXCEPTION = "retryable_exception"


@shared_task(
    name=OnyxCeleryTask.DOCUMENT_BY_CC_PAIR_CLEANUP_TASK,
    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
    time_limit=LIGHT_TIME_LIMIT,
    max_retries=DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES,
    bind=True,
)
def document_by_cc_pair_cleanup_task(
    self: Task,
    document_id: str,
    connector_id: int,
    credential_id: int,
    tenant_id: str,
) -> bool:
    """A lightweight subtask used to clean up document to cc pair relationships.
    Created by connection deletion and connector pruning parent tasks."""

    """
    To delete a connector / credential pair:
    (1) find all documents associated with connector / credential pair where there
    this the is only connector / credential pair that has indexed it
    (2) delete all documents from document stores
    (3) delete all entries from postgres
    (4) find all documents associated with connector / credential pair where there
    are multiple connector / credential pairs that have indexed it
    (5) update document store entries to remove access associated with the
    connector / credential pair from the access list
    (6) delete all relevant entries from postgres
    """
    task_logger.debug(f"Task start: doc={document_id}")

    start = time.monotonic()

    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED

    try:
        with get_session_with_current_tenant() as db_session:
            action = "skip"

            active_search_settings = get_active_search_settings(db_session)
            # This flow is for updates and deletion so we get all indices.
            document_indices = get_all_document_indices(
                active_search_settings.primary,
                active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )

            retry_document_indices: list[RetryDocumentIndex] = [
                RetryDocumentIndex(document_index)
                for document_index in document_indices
            ]

            count = get_document_connector_count(db_session, document_id)
            if count == 1:
                # count == 1 means this is the only remaining cc_pair reference to the doc
                # delete it from vespa and the db
                action = "delete"

                chunk_count = fetch_chunk_count_for_document(document_id, db_session)

                for retry_document_index in retry_document_indices:
                    _ = retry_document_index.delete_single(
                        document_id,
                        tenant_id=tenant_id,
                        chunk_count=chunk_count,
                    )

                delete_document_references_from_kg(
                    db_session=db_session,
                    document_id=document_id,
                )

                delete_documents_complete__no_commit(
                    db_session=db_session,
                    document_ids=[document_id],
                )
                db_session.commit()

                completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
            elif count > 1:
                action = "update"

                # count > 1 means the document still has cc_pair references
                doc = get_document(document_id, db_session)
                if not doc:
                    return False

                # the below functions do not include cc_pairs being deleted.
                # i.e. they will correctly omit access for the current cc_pair
                doc_access = get_access_for_document(
                    document_id=document_id, db_session=db_session
                )

                doc_sets = fetch_document_sets_for_document(document_id, db_session)
                update_doc_sets: set[str] = set(doc_sets)

                fields = VespaDocumentFields(
                    document_sets=update_doc_sets,
                    access=doc_access,
                    boost=doc.boost,
                    hidden=doc.hidden,
                )

                for retry_document_index in retry_document_indices:
                    # TODO(andrei): Previously there was a comment here saying
                    # it was ok if a doc did not exist in the document index. I
                    # don't agree with that claim, so keep an eye on this task
                    # to see if this raises.
                    retry_document_index.update_single(
                        document_id,
                        tenant_id=tenant_id,
                        chunk_count=doc.chunk_count,
                        fields=fields,
                        user_fields=None,
                    )

                # there are still other cc_pair references to the doc, so just resync to Vespa
                delete_document_by_connector_credential_pair__no_commit(
                    db_session=db_session,
                    document_id=document_id,
                    connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
                        connector_id=connector_id,
                        credential_id=credential_id,
                    ),
                )

                mark_document_as_synced(document_id, db_session)
                db_session.commit()

                completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
            else:
                completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED

            elapsed = time.monotonic() - start
            task_logger.info(
                f"doc={document_id} action={action} refcount={count} elapsed={elapsed:.2f}"
            )
    except SoftTimeLimitExceeded:
        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
        completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT
    except Exception as ex:
        e: Exception | None = None
        while True:
            if isinstance(ex, RetryError):
                task_logger.warning(
                    f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
                )

                # only set the inner exception if it is of type Exception
                e_temp = ex.last_attempt.exception()
                if isinstance(e_temp, Exception):
                    e = e_temp
            else:
                e = ex

            if isinstance(e, httpx.HTTPStatusError):
                if e.response.status_code == HTTPStatus.BAD_REQUEST:
                    task_logger.exception(
                        f"Non-retryable HTTPStatusError: doc={document_id} status={e.response.status_code}"
                    )
                completion_status = (
                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
                )
                break

            task_logger.exception(
                f"document_by_cc_pair_cleanup_task exceptioned: doc={document_id}"
            )

            completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION
            if (
                self.max_retries is not None
                and self.request.retries >= self.max_retries
            ):
                # This is the last attempt! mark the document as dirty in the db so that it
                # eventually gets fixed out of band via stale document reconciliation
                task_logger.warning(
                    f"Max celery task retries reached. Marking doc as dirty for reconciliation: doc={document_id}"
                )
                with get_session_with_current_tenant() as db_session:
                    # delete the cc pair relationship now and let reconciliation clean it up
                    # in vespa
                    delete_document_by_connector_credential_pair__no_commit(
                        db_session=db_session,
                        document_id=document_id,
                        connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
                            connector_id=connector_id,
                            credential_id=credential_id,
                        ),
                    )
                    mark_document_as_modified(document_id, db_session)
                completion_status = (
                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
                )
                break

            # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
            countdown = 2 ** (self.request.retries + 4)
            self.retry(exc=e, countdown=countdown)  # this will raise a celery exception
            break  # we won't hit this, but it looks weird not to have it
    finally:
        task_logger.info(
            f"document_by_cc_pair_cleanup_task completed: status={completion_status.value} doc={document_id}"
        )

    if completion_status != OnyxCeleryTaskCompletionStatus.SUCCEEDED:
        return False

    task_logger.info(f"document_by_cc_pair_cleanup_task finished: doc={document_id}")
    return True


@shared_task(name=OnyxCeleryTask.CELERY_BEAT_HEARTBEAT, ignore_result=True, bind=True)
def celery_beat_heartbeat(self: Task, *, tenant_id: str) -> None:  # noqa: ARG001
    """When this task runs, it writes a key to Redis with a TTL.

    An external observer can check this key to figure out if the celery beat is still running.
    """
    time_start = time.monotonic()
    r: Redis = get_redis_client()
    r.set(ONYX_CELERY_BEAT_HEARTBEAT_KEY, 1, ex=600)
    time_elapsed = time.monotonic() - time_start
    task_logger.info(f"celery_beat_heartbeat finished: elapsed={time_elapsed:.2f}")


================================================
FILE: backend/onyx/background/celery/tasks/user_file_processing/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/user_file_processing/tasks.py
================================================
import datetime
import time
from typing import Any
from uuid import UUID

import httpx
import sqlalchemy as sa
from celery import Celery
from celery import shared_task
from celery import Task
from redis import Redis
from redis.lock import Lock as RedisLock
from retry import retry
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.access.access import build_access_for_user_files
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_get_broker_client
from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH
from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH
from onyx.connectors.file.connector import LocalFileConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.db.search_settings import get_active_search_settings
from onyx.db.search_settings import get_active_search_settings_list
from onyx.db.user_file import fetch_user_files_with_access_relationships
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.utils import store_user_file_plaintext
from onyx.file_store.utils import user_file_id_to_plaintext_file_name
from onyx.httpx.httpx_pool import HttpxPool
from onyx.indexing.adapters.user_file_indexing_adapter import UserFileIndexingAdapter
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.indexing_pipeline import run_indexing_pipeline
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.variable_functionality import global_version


def _as_uuid(value: str | UUID) -> UUID:
    """Return a UUID, accepting either a UUID or a string-like value."""
    return value if isinstance(value, UUID) else UUID(str(value))


def _user_file_lock_key(user_file_id: str | UUID) -> str:
    return f"{OnyxRedisLocks.USER_FILE_PROCESSING_LOCK_PREFIX}:{user_file_id}"


def _user_file_queued_key(user_file_id: str | UUID) -> str:
    """Key that exists while a process_single_user_file task is sitting in the queue.

    The beat generator sets this with a TTL equal to CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
    before enqueuing and the worker deletes it as its first action.  This prevents
    the beat from adding duplicate tasks for files that already have a live task
    in flight.
    """
    return f"{OnyxRedisLocks.USER_FILE_QUEUED_PREFIX}:{user_file_id}"


def user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
    return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}"


def _user_file_project_sync_queued_key(user_file_id: str | UUID) -> str:
    return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_QUEUED_PREFIX}:{user_file_id}"


def _user_file_delete_lock_key(user_file_id: str | UUID) -> str:
    return f"{OnyxRedisLocks.USER_FILE_DELETE_LOCK_PREFIX}:{user_file_id}"


def _user_file_delete_queued_key(user_file_id: str | UUID) -> str:
    """Key that exists while a delete_single_user_file task is sitting in the queue.

    The beat generator sets this with a TTL equal to CELERY_USER_FILE_DELETE_TASK_EXPIRES
    before enqueuing and the worker deletes it as its first action.  This prevents
    the beat from adding duplicate tasks for files that already have a live task
    in flight.
    """
    return f"{OnyxRedisLocks.USER_FILE_DELETE_QUEUED_PREFIX}:{user_file_id}"


def get_user_file_project_sync_queue_depth(celery_app: Celery) -> int:
    redis_celery = celery_get_broker_client(celery_app)
    return celery_get_queue_length(
        OnyxCeleryQueues.USER_FILE_PROJECT_SYNC, redis_celery
    )


def enqueue_user_file_project_sync_task(
    *,
    celery_app: Celery,
    redis_client: Redis,
    user_file_id: str | UUID,
    tenant_id: str,
    priority: OnyxCeleryPriority = OnyxCeleryPriority.HIGH,
) -> bool:
    """Enqueue a project-sync task if no matching queued task already exists."""
    queued_key = _user_file_project_sync_queued_key(user_file_id)

    # NX+EX gives us atomic dedupe and a self-healing TTL.
    queued_guard_set = redis_client.set(
        queued_key,
        1,
        nx=True,
        ex=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,
    )
    if not queued_guard_set:
        return False

    try:
        celery_app.send_task(
            OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
            kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
            queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
            priority=priority,
            expires=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,
        )
    except Exception:
        # Roll back the queued guard if task publish fails.
        redis_client.delete(queued_key)
        raise

    return True


@retry(tries=3, delay=1, backoff=2, jitter=(0.0, 1.0))
def _visit_chunks(
    *,
    http_client: httpx.Client,
    index_name: str,
    selection: str,
    continuation: str | None = None,
) -> tuple[list[dict[str, Any]], str | None]:
    task_logger.info(
        f"Visiting chunks for index={index_name} with selection={selection}"
    )
    base_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)
    params: dict[str, str] = {
        "selection": selection,
        "wantedDocumentCount": "100",  # Use smaller batch size to avoid timeouts
    }
    if continuation:
        params["continuation"] = continuation
    resp = http_client.get(base_url, params=params, timeout=None)
    resp.raise_for_status()
    payload = resp.json()
    return payload.get("documents", []), payload.get("continuation")


def _get_document_chunk_count(
    *,
    index_name: str,
    selection: str,
) -> int:
    chunk_count = 0
    continuation = None
    while True:
        docs, continuation = _visit_chunks(
            http_client=HttpxPool.get("vespa"),
            index_name=index_name,
            selection=selection,
            continuation=continuation,
        )
        if not docs:
            break
        chunk_count += len(docs)
        if not continuation:
            break
    return chunk_count


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_PROCESSING,
    soft_time_limit=300,
    bind=True,
    ignore_result=True,
)
def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
    """Scan for user files with PROCESSING status and enqueue per-file tasks.

    Three mechanisms prevent queue runaway:

    1. **Queue depth backpressure** – if the broker queue already has more than
       USER_FILE_PROCESSING_MAX_QUEUE_DEPTH items we skip this beat cycle
       entirely.  Workers are clearly behind; adding more tasks would only make
       the backlog worse.

    2. **Per-file queued guard** – before enqueuing a task we set a short-lived
       Redis key (TTL = CELERY_USER_FILE_PROCESSING_TASK_EXPIRES).  If that key
       already exists the file already has a live task in the queue, so we skip
       it.  The worker deletes the key the moment it picks up the task so the
       next beat cycle can re-enqueue if the file is still PROCESSING.

    3. **Task expiry** – every enqueued task carries an `expires` value equal to
       CELERY_USER_FILE_PROCESSING_TASK_EXPIRES.  If a task is still sitting in
       the queue after that deadline, Celery discards it without touching the DB.
       This is a belt-and-suspenders defence: even if the guard key is lost (e.g.
       Redis restart), stale tasks evict themselves rather than piling up forever.
    """
    task_logger.info("check_user_file_processing - Starting")

    redis_client = get_redis_client(tenant_id=tenant_id)
    lock: RedisLock = redis_client.lock(
        OnyxRedisLocks.USER_FILE_PROCESSING_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    # Do not overlap generator runs
    if not lock.acquire(blocking=False):
        return None

    enqueued = 0
    skipped_guard = 0
    try:
        # --- Protection 1: queue depth backpressure ---
        r_celery = celery_get_broker_client(self.app)
        queue_len = celery_get_queue_length(
            OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery
        )
        if queue_len > USER_FILE_PROCESSING_MAX_QUEUE_DEPTH:
            task_logger.warning(
                f"check_user_file_processing - Queue depth {queue_len} exceeds "
                f"{USER_FILE_PROCESSING_MAX_QUEUE_DEPTH}, skipping enqueue for "
                f"tenant={tenant_id}"
            )
            return None

        with get_session_with_current_tenant() as db_session:
            user_file_ids = (
                db_session.execute(
                    select(UserFile.id).where(
                        UserFile.status == UserFileStatus.PROCESSING
                    )
                )
                .scalars()
                .all()
            )

            for user_file_id in user_file_ids:
                # --- Protection 2: per-file queued guard ---
                queued_key = _user_file_queued_key(user_file_id)
                guard_set = redis_client.set(
                    queued_key,
                    1,
                    ex=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
                    nx=True,
                )
                if not guard_set:
                    skipped_guard += 1
                    continue

                # --- Protection 3: task expiry ---
                # If task submission fails, clear the guard immediately so the
                # next beat cycle can retry enqueuing this file.
                try:
                    self.app.send_task(
                        OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
                        kwargs={
                            "user_file_id": str(user_file_id),
                            "tenant_id": tenant_id,
                        },
                        queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
                        priority=OnyxCeleryPriority.HIGH,
                        expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
                    )
                except Exception:
                    redis_client.delete(queued_key)
                    raise
                enqueued += 1

    finally:
        if lock.owned():
            lock.release()

    task_logger.info(
        f"check_user_file_processing - Enqueued {enqueued} skipped_guard={skipped_guard} tasks for tenant={tenant_id}"
    )
    return None


def _process_user_file_without_vector_db(
    uf: UserFile,
    documents: list[Document],
    db_session: Session,
) -> None:
    """Process a user file when the vector DB is disabled.

    Extracts raw text and computes a token count, stores the plaintext in
    the file store, and marks the file as COMPLETED.  Skips embedding and
    the indexing pipeline entirely.
    """
    from onyx.llm.factory import get_default_llm
    from onyx.llm.factory import get_llm_tokenizer_encode_func

    # Combine section text from all document sections
    combined_text = " ".join(
        section.text for doc in documents for section in doc.sections if section.text
    )

    # Compute token count using the user's default LLM tokenizer
    try:
        llm = get_default_llm()
        encode = get_llm_tokenizer_encode_func(llm)
        token_count: int | None = len(encode(combined_text))
    except Exception:
        task_logger.warning(
            f"_process_user_file_without_vector_db - Failed to compute token count for {uf.id}, falling back to None"
        )
        token_count = None

    # Persist plaintext for fast FileReaderTool loads
    store_user_file_plaintext(
        user_file_id=uf.id,
        plaintext_content=combined_text,
    )

    # Update the DB record
    if uf.status != UserFileStatus.DELETING:
        uf.status = UserFileStatus.COMPLETED
    uf.token_count = token_count
    uf.chunk_count = 0  # no chunks without vector DB
    uf.last_project_sync_at = datetime.datetime.now(datetime.timezone.utc)
    db_session.add(uf)
    db_session.commit()

    task_logger.info(
        f"_process_user_file_without_vector_db - Completed id={uf.id} tokens={token_count}"
    )


def _process_user_file_with_indexing(
    uf: UserFile,
    user_file_id: str,
    documents: list[Document],
    tenant_id: str,
    db_session: Session,
) -> None:
    """Process a user file through the full indexing pipeline (vector DB path)."""
    # 20 is the documented default for httpx max_keepalive_connections
    if MANAGED_VESPA:
        httpx_init_vespa_pool(
            20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH
        )
    else:
        httpx_init_vespa_pool(20)

    search_settings_list = get_active_search_settings_list(db_session)
    current_search_settings = next(
        (ss for ss in search_settings_list if ss.status.is_current()),
        None,
    )
    if current_search_settings is None:
        raise RuntimeError(
            f"_process_user_file_with_indexing - No current search settings found for tenant={tenant_id}"
        )

    adapter = UserFileIndexingAdapter(
        tenant_id=tenant_id,
        db_session=db_session,
    )

    embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
        search_settings=current_search_settings,
    )

    document_indices = get_all_document_indices(
        current_search_settings,
        None,
        httpx_client=HttpxPool.get("vespa"),
    )

    index_pipeline_result = run_indexing_pipeline(
        embedder=embedding_model,
        document_indices=document_indices,
        ignore_time_skip=True,
        db_session=db_session,
        tenant_id=tenant_id,
        document_batch=documents,
        request_id=None,
        adapter=adapter,
    )

    task_logger.info(
        f"_process_user_file_with_indexing - Indexing pipeline completed ={index_pipeline_result}"
    )

    if (
        index_pipeline_result.failures
        or index_pipeline_result.total_docs != len(documents)
        or index_pipeline_result.total_chunks == 0
    ):
        task_logger.error(
            f"_process_user_file_with_indexing - Indexing pipeline failed id={user_file_id}"
        )
        if uf.status != UserFileStatus.DELETING:
            uf.status = UserFileStatus.FAILED
            db_session.add(uf)
            db_session.commit()
        raise RuntimeError(f"Indexing pipeline failed for user file {user_file_id}")


def process_user_file_impl(
    *, user_file_id: str, tenant_id: str, redis_locking: bool
) -> None:
    """Core implementation for processing a single user file.

    When redis_locking=True, acquires a per-file Redis lock and clears the
    queued-key guard (Celery path).  When redis_locking=False, skips all Redis
    operations (BackgroundTask path).
    """
    task_logger.info(f"process_user_file_impl - Starting id={user_file_id}")
    start = time.monotonic()

    file_lock: RedisLock | None = None
    if redis_locking:
        redis_client = get_redis_client(tenant_id=tenant_id)
        redis_client.delete(_user_file_queued_key(user_file_id))
        file_lock = redis_client.lock(
            _user_file_lock_key(user_file_id),
            timeout=CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT,
        )
        if file_lock is not None and not file_lock.acquire(blocking=False):
            task_logger.info(
                f"process_user_file_impl - Lock held, skipping user_file_id={user_file_id}"
            )
            return

    documents: list[Document] = []
    try:
        with get_session_with_current_tenant() as db_session:
            uf = db_session.get(UserFile, _as_uuid(user_file_id))
            if not uf:
                task_logger.warning(
                    f"process_user_file_impl - UserFile not found id={user_file_id}"
                )
                return

            if uf.status not in (
                UserFileStatus.PROCESSING,
                UserFileStatus.INDEXING,
            ):
                task_logger.info(
                    f"process_user_file_impl - Skipping id={user_file_id} status={uf.status}"
                )
                return

            connector = LocalFileConnector(
                file_locations=[uf.file_id],
                file_names=[uf.name] if uf.name else None,
            )
            connector.load_credentials({})

            try:
                for batch in connector.load_from_state():
                    documents.extend(
                        [doc for doc in batch if not isinstance(doc, HierarchyNode)]
                    )

                for document in documents:
                    document.id = str(user_file_id)
                    document.source = DocumentSource.USER_FILE

                if DISABLE_VECTOR_DB:
                    _process_user_file_without_vector_db(
                        uf=uf,
                        documents=documents,
                        db_session=db_session,
                    )
                else:
                    _process_user_file_with_indexing(
                        uf=uf,
                        user_file_id=user_file_id,
                        documents=documents,
                        tenant_id=tenant_id,
                        db_session=db_session,
                    )

            except Exception as e:
                task_logger.exception(
                    f"process_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
                )
                current_user_file = db_session.get(UserFile, _as_uuid(user_file_id))
                if (
                    current_user_file
                    and current_user_file.status != UserFileStatus.DELETING
                ):
                    uf.status = UserFileStatus.FAILED
                    db_session.add(uf)
                    db_session.commit()
                return

        elapsed = time.monotonic() - start
        task_logger.info(
            f"process_user_file_impl - Finished id={user_file_id} docs={len(documents)} elapsed={elapsed:.2f}s"
        )
    except Exception as e:
        with get_session_with_current_tenant() as db_session:
            uf = db_session.get(UserFile, _as_uuid(user_file_id))
            if uf:
                if uf.status != UserFileStatus.DELETING:
                    uf.status = UserFileStatus.FAILED
                db_session.add(uf)
                db_session.commit()

        task_logger.exception(
            f"process_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
        )
        raise
    finally:
        if file_lock is not None and file_lock.owned():
            file_lock.release()


@shared_task(
    name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
    bind=True,
    ignore_result=True,
)
def process_single_user_file(
    self: Task,  # noqa: ARG001
    *,
    user_file_id: str,
    tenant_id: str,
) -> None:
    process_user_file_impl(
        user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
    )


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_DELETE,
    soft_time_limit=300,
    bind=True,
    ignore_result=True,
)
def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
    """Scan for user files with DELETING status and enqueue per-file tasks.

    Three mechanisms prevent queue runaway (mirrors check_user_file_processing):

    1. **Queue depth backpressure** – if the broker queue already has more than
       USER_FILE_DELETE_MAX_QUEUE_DEPTH items we skip this beat cycle entirely.

    2. **Per-file queued guard** – before enqueuing a task we set a short-lived
       Redis key (TTL = CELERY_USER_FILE_DELETE_TASK_EXPIRES).  If that key
       already exists the file already has a live task in the queue, so we skip
       it.  The worker deletes the key the moment it picks up the task so the
       next beat cycle can re-enqueue if the file is still DELETING.

    3. **Task expiry** – every enqueued task carries an `expires` value equal to
       CELERY_USER_FILE_DELETE_TASK_EXPIRES.  If a task is still sitting in
       the queue after that deadline, Celery discards it without touching the DB.
    """
    task_logger.info("check_for_user_file_delete - Starting")
    redis_client = get_redis_client(tenant_id=tenant_id)
    lock: RedisLock = redis_client.lock(
        OnyxRedisLocks.USER_FILE_DELETE_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )
    if not lock.acquire(blocking=False):
        return None

    enqueued = 0
    skipped_guard = 0
    try:
        # --- Protection 1: queue depth backpressure ---
        # NOTE: must use the broker's Redis client (not redis_client) because
        # Celery queues live on a separate Redis DB with CELERY_SEPARATOR keys.
        r_celery = celery_get_broker_client(self.app)
        queue_len = celery_get_queue_length(OnyxCeleryQueues.USER_FILE_DELETE, r_celery)
        if queue_len > USER_FILE_DELETE_MAX_QUEUE_DEPTH:
            task_logger.warning(
                f"check_for_user_file_delete - Queue depth {queue_len} exceeds "
                f"{USER_FILE_DELETE_MAX_QUEUE_DEPTH}, skipping enqueue for "
                f"tenant={tenant_id}"
            )
            return None

        with get_session_with_current_tenant() as db_session:
            user_file_ids = (
                db_session.execute(
                    select(UserFile.id).where(
                        UserFile.status == UserFileStatus.DELETING
                    )
                )
                .scalars()
                .all()
            )
            for user_file_id in user_file_ids:
                # --- Protection 2: per-file queued guard ---
                queued_key = _user_file_delete_queued_key(user_file_id)
                guard_set = redis_client.set(
                    queued_key,
                    1,
                    ex=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
                    nx=True,
                )
                if not guard_set:
                    skipped_guard += 1
                    continue

                # --- Protection 3: task expiry ---
                try:
                    self.app.send_task(
                        OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
                        kwargs={
                            "user_file_id": str(user_file_id),
                            "tenant_id": tenant_id,
                        },
                        queue=OnyxCeleryQueues.USER_FILE_DELETE,
                        priority=OnyxCeleryPriority.HIGH,
                        expires=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
                    )
                except Exception:
                    redis_client.delete(queued_key)
                    raise
                enqueued += 1
    finally:
        if lock.owned():
            lock.release()

    task_logger.info(
        f"check_for_user_file_delete - Enqueued {enqueued} tasks, skipped_guard={skipped_guard} for tenant={tenant_id}"
    )
    return None


def delete_user_file_impl(
    *, user_file_id: str, tenant_id: str, redis_locking: bool
) -> None:
    """Core implementation for deleting a single user file.

    When redis_locking=True, acquires a per-file Redis lock (Celery path).
    When redis_locking=False, skips Redis operations (BackgroundTask path).
    """
    task_logger.info(f"delete_user_file_impl - Starting id={user_file_id}")

    file_lock: RedisLock | None = None
    if redis_locking:
        redis_client = get_redis_client(tenant_id=tenant_id)
        # Clear the queued guard so the beat can re-enqueue if deletion fails
        # and the file remains in DELETING status.
        redis_client.delete(_user_file_delete_queued_key(user_file_id))
        file_lock = redis_client.lock(
            _user_file_delete_lock_key(user_file_id),
            timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
        )
        if file_lock is not None and not file_lock.acquire(blocking=False):
            task_logger.info(
                f"delete_user_file_impl - Lock held, skipping user_file_id={user_file_id}"
            )
            return

    try:
        with get_session_with_current_tenant() as db_session:
            user_file = db_session.get(UserFile, _as_uuid(user_file_id))
            if not user_file:
                task_logger.info(
                    f"delete_user_file_impl - User file not found id={user_file_id}"
                )
                return

            if not DISABLE_VECTOR_DB:
                if MANAGED_VESPA:
                    httpx_init_vespa_pool(
                        20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH
                    )
                else:
                    httpx_init_vespa_pool(20)

                active_search_settings = get_active_search_settings(db_session)
                document_indices = get_all_document_indices(
                    search_settings=active_search_settings.primary,
                    secondary_search_settings=active_search_settings.secondary,
                    httpx_client=HttpxPool.get("vespa"),
                )
                retry_document_indices: list[RetryDocumentIndex] = [
                    RetryDocumentIndex(document_index)
                    for document_index in document_indices
                ]
                index_name = active_search_settings.primary.index_name
                selection = f"{index_name}.document_id=='{user_file_id}'"

                chunk_count = 0
                if user_file.chunk_count is None or user_file.chunk_count == 0:
                    chunk_count = _get_document_chunk_count(
                        index_name=index_name,
                        selection=selection,
                    )
                else:
                    chunk_count = user_file.chunk_count

                for retry_document_index in retry_document_indices:
                    retry_document_index.delete_single(
                        doc_id=user_file_id,
                        tenant_id=tenant_id,
                        chunk_count=chunk_count,
                    )

            file_store = get_default_file_store()
            try:
                file_store.delete_file(user_file.file_id)
                file_store.delete_file(
                    user_file_id_to_plaintext_file_name(user_file.id)
                )
            except Exception as e:
                task_logger.exception(
                    f"delete_user_file_impl - Error deleting file id={user_file.id} - {e.__class__.__name__}"
                )

            db_session.delete(user_file)
            db_session.commit()
            task_logger.info(f"delete_user_file_impl - Completed id={user_file_id}")
    except Exception as e:
        task_logger.exception(
            f"delete_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
        )
        raise
    finally:
        if file_lock is not None and file_lock.owned():
            file_lock.release()


@shared_task(
    name=OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
    bind=True,
    ignore_result=True,
)
def process_single_user_file_delete(
    self: Task,  # noqa: ARG001
    *,
    user_file_id: str,
    tenant_id: str,
) -> None:
    delete_user_file_impl(
        user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
    )


@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_PROJECT_SYNC,
    soft_time_limit=300,
    bind=True,
    ignore_result=True,
)
def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
    """Scan for user files needing project sync and enqueue per-file tasks."""
    task_logger.info("Starting")

    redis_client = get_redis_client(tenant_id=tenant_id)
    lock: RedisLock = redis_client.lock(
        OnyxRedisLocks.USER_FILE_PROJECT_SYNC_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

    if not lock.acquire(blocking=False):
        return None

    enqueued = 0
    skipped_guard = 0
    try:
        queue_depth = get_user_file_project_sync_queue_depth(self.app)
        if queue_depth > USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH:
            task_logger.warning(
                f"Queue depth {queue_depth} exceeds "
                f"{USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH}, skipping enqueue for tenant={tenant_id}"
            )
            return None

        with get_session_with_current_tenant() as db_session:
            user_file_ids = (
                db_session.execute(
                    select(UserFile.id).where(
                        sa.and_(
                            sa.or_(
                                UserFile.needs_project_sync.is_(True),
                                UserFile.needs_persona_sync.is_(True),
                            ),
                            UserFile.status == UserFileStatus.COMPLETED,
                        )
                    )
                )
                .scalars()
                .all()
            )

            for user_file_id in user_file_ids:
                if not enqueue_user_file_project_sync_task(
                    celery_app=self.app,
                    redis_client=redis_client,
                    user_file_id=user_file_id,
                    tenant_id=tenant_id,
                    priority=OnyxCeleryPriority.HIGH,
                ):
                    skipped_guard += 1
                    continue
                enqueued += 1
    finally:
        if lock.owned():
            lock.release()

    task_logger.info(
        f"Enqueued {enqueued} Skipped guard {skipped_guard} tasks for tenant={tenant_id}"
    )
    return None


def project_sync_user_file_impl(
    *, user_file_id: str, tenant_id: str, redis_locking: bool
) -> None:
    """Core implementation for syncing a user file's project/persona metadata.

    When redis_locking=True, acquires a per-file Redis lock and clears the
    queued-key guard (Celery path).  When redis_locking=False, skips Redis
    operations (BackgroundTask path).
    """
    task_logger.info(f"project_sync_user_file_impl - Starting id={user_file_id}")

    file_lock: RedisLock | None = None
    if redis_locking:
        redis_client = get_redis_client(tenant_id=tenant_id)
        redis_client.delete(_user_file_project_sync_queued_key(user_file_id))
        file_lock = redis_client.lock(
            user_file_project_sync_lock_key(user_file_id),
            timeout=CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT,
        )
        if file_lock is not None and not file_lock.acquire(blocking=False):
            task_logger.info(
                f"project_sync_user_file_impl - Lock held, skipping user_file_id={user_file_id}"
            )
            return

    try:
        with get_session_with_current_tenant() as db_session:
            user_files = fetch_user_files_with_access_relationships(
                [user_file_id],
                db_session,
                eager_load_groups=global_version.is_ee_version(),
            )
            user_file = user_files[0] if user_files else None
            if not user_file:
                task_logger.info(
                    f"project_sync_user_file_impl - User file not found id={user_file_id}"
                )
                return

            if not DISABLE_VECTOR_DB:
                if MANAGED_VESPA:
                    httpx_init_vespa_pool(
                        20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH
                    )
                else:
                    httpx_init_vespa_pool(20)

                active_search_settings = get_active_search_settings(db_session)
                document_indices = get_all_document_indices(
                    search_settings=active_search_settings.primary,
                    secondary_search_settings=active_search_settings.secondary,
                    httpx_client=HttpxPool.get("vespa"),
                )
                retry_document_indices: list[RetryDocumentIndex] = [
                    RetryDocumentIndex(document_index)
                    for document_index in document_indices
                ]

                project_ids = [project.id for project in user_file.projects]
                persona_ids = [p.id for p in user_file.assistants if not p.deleted]

                file_id_str = str(user_file.id)
                access_map = build_access_for_user_files([user_file])
                access = access_map.get(file_id_str)

                for retry_document_index in retry_document_indices:
                    retry_document_index.update_single(
                        doc_id=file_id_str,
                        tenant_id=tenant_id,
                        chunk_count=user_file.chunk_count,
                        fields=(
                            VespaDocumentFields(access=access)
                            if access is not None
                            else None
                        ),
                        user_fields=VespaDocumentUserFields(
                            user_projects=project_ids,
                            personas=persona_ids,
                        ),
                    )

            task_logger.info(
                f"project_sync_user_file_impl - User file id={user_file_id}"
            )

            user_file.needs_project_sync = False
            user_file.needs_persona_sync = False
            user_file.last_project_sync_at = datetime.datetime.now(
                datetime.timezone.utc
            )
            db_session.add(user_file)
            db_session.commit()

    except Exception as e:
        task_logger.exception(
            f"project_sync_user_file_impl - Error syncing project for file id={user_file_id} - {e.__class__.__name__}"
        )
        raise
    finally:
        if file_lock is not None and file_lock.owned():
            file_lock.release()


@shared_task(
    name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
    bind=True,
    ignore_result=True,
)
def process_single_user_file_project_sync(
    self: Task,  # noqa: ARG001
    *,
    user_file_id: str,
    tenant_id: str,
) -> None:
    project_sync_user_file_impl(
        user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
    )


================================================
FILE: backend/onyx/background/celery/tasks/vespa/__init__.py
================================================


================================================
FILE: backend/onyx/background/celery/tasks/vespa/document_sync.py
================================================
import time
from typing import cast
from uuid import uuid4

from celery import Celery
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DB_YIELD_PER_DEFAULT
from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.db.document import construct_document_id_select_by_needs_sync
from onyx.db.document import count_documents_by_needs_sync
from onyx.utils.logger import setup_logger

# Redis keys for document sync tracking
DOCUMENT_SYNC_PREFIX = "documentsync"
DOCUMENT_SYNC_FENCE_KEY = f"{DOCUMENT_SYNC_PREFIX}_fence"
DOCUMENT_SYNC_TASKSET_KEY = f"{DOCUMENT_SYNC_PREFIX}_taskset"
FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks
TASKSET_TTL = FENCE_TTL

logger = setup_logger()


def is_document_sync_fenced(r: Redis) -> bool:
    """Check if document sync tasks are currently in progress."""
    return bool(r.exists(DOCUMENT_SYNC_FENCE_KEY))


def get_document_sync_payload(r: Redis) -> int | None:
    """Get the initial number of tasks that were created."""
    bytes_result = r.get(DOCUMENT_SYNC_FENCE_KEY)
    if bytes_result is None:
        return None
    return int(cast(int, bytes_result))


def get_document_sync_remaining(r: Redis) -> int:
    """Get the number of tasks still pending completion."""
    return cast(int, r.scard(DOCUMENT_SYNC_TASKSET_KEY))


def set_document_sync_fence(r: Redis, payload: int | None) -> None:
    """Set up the fence and register with active fences."""
    if payload is None:
        r.srem(OnyxRedisConstants.ACTIVE_FENCES, DOCUMENT_SYNC_FENCE_KEY)
        r.delete(DOCUMENT_SYNC_FENCE_KEY)
        return

    r.set(DOCUMENT_SYNC_FENCE_KEY, payload, ex=FENCE_TTL)
    r.sadd(OnyxRedisConstants.ACTIVE_FENCES, DOCUMENT_SYNC_FENCE_KEY)


def delete_document_sync_taskset(r: Redis) -> None:
    """Clear the document sync taskset."""
    r.delete(DOCUMENT_SYNC_TASKSET_KEY)


def reset_document_sync(r: Redis) -> None:
    """Reset all document sync tracking data."""
    r.srem(OnyxRedisConstants.ACTIVE_FENCES, DOCUMENT_SYNC_FENCE_KEY)
    r.delete(DOCUMENT_SYNC_TASKSET_KEY)
    r.delete(DOCUMENT_SYNC_FENCE_KEY)


def generate_document_sync_tasks(
    r: Redis,
    max_tasks: int,
    celery_app: Celery,
    db_session: Session,
    lock: RedisLock,
    tenant_id: str,
) -> tuple[int, int]:
    """Generate sync tasks for all documents that need syncing.

    Args:
        r: Redis client
        max_tasks: Maximum number of tasks to generate
        celery_app: Celery application instance
        db_session: Database session
        lock: Redis lock for coordination
        tenant_id: Tenant identifier

    Returns:
        tuple[int, int]: (tasks_generated, total_docs_found)
    """
    last_lock_time = time.monotonic()
    num_tasks_sent = 0
    num_docs = 0

    # Get all documents that need syncing
    stmt = construct_document_id_select_by_needs_sync()

    for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
        doc_id = cast(str, doc_id)
        current_time = time.monotonic()

        # Reacquire lock periodically to prevent timeout
        if current_time - last_lock_time >= (CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4):
            lock.reacquire()
            last_lock_time = current_time

        num_docs += 1

        # Create a unique task ID
        custom_task_id = f"{DOCUMENT_SYNC_PREFIX}_{uuid4()}"

        # Add to the tracking taskset in Redis BEFORE creating the celery task
        r.sadd(DOCUMENT_SYNC_TASKSET_KEY, custom_task_id)
        r.expire(DOCUMENT_SYNC_TASKSET_KEY, TASKSET_TTL)

        # Create the Celery task
        celery_app.send_task(
            OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
            kwargs=dict(document_id=doc_id, tenant_id=tenant_id),
            queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
            task_id=custom_task_id,
            priority=OnyxCeleryPriority.MEDIUM,
            ignore_result=True,
        )

        num_tasks_sent += 1

        if num_tasks_sent >= max_tasks:
            break

    return num_tasks_sent, num_docs


def try_generate_stale_document_sync_tasks(
    celery_app: Celery,
    max_tasks: int,
    db_session: Session,
    r: Redis,
    lock_beat: RedisLock,
    tenant_id: str,
) -> int | None:
    # the fence is up, do nothing
    if is_document_sync_fenced(r):
        return None

    # add tasks to celery and build up the task set to monitor in redis
    stale_doc_count = count_documents_by_needs_sync(db_session)
    if stale_doc_count == 0:
        logger.info("No stale documents found. Skipping sync tasks generation.")
        return None

    logger.info(
        f"Stale documents found (at least {stale_doc_count}). Generating sync tasks in one batch."
    )

    logger.info("generate_document_sync_tasks starting for all documents.")

    # Generate all tasks in one pass
    result = generate_document_sync_tasks(
        r, max_tasks, celery_app, db_session, lock_beat, tenant_id
    )

    if result is None:
        return None

    tasks_generated, total_docs = result

    if tasks_generated >= max_tasks:
        logger.info(
            f"generate_document_sync_tasks reached the task generation limit: "
            f"tasks_generated={tasks_generated} max_tasks={max_tasks}"
        )
    else:
        logger.info(
            f"generate_document_sync_tasks finished for all documents. "
            f"tasks_generated={tasks_generated} total_docs_found={total_docs}"
        )

    set_document_sync_fence(r, tasks_generated)
    return tasks_generated


================================================
FILE: backend/onyx/background/celery/tasks/vespa/tasks.py
================================================
import time
from collections.abc import Callable
from http import HTTPStatus
from typing import Any
from typing import cast

import httpx
from celery import Celery
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session
from tenacity import RetryError

from onyx.access.access import get_access_for_document
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
from onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_FENCE_KEY
from onyx.background.celery.tasks.vespa.document_sync import get_document_sync_payload
from onyx.background.celery.tasks.vespa.document_sync import get_document_sync_remaining
from onyx.background.celery.tasks.vespa.document_sync import reset_document_sync
from onyx.background.celery.tasks.vespa.document_sync import (
    try_generate_stale_document_sync_tasks,
)
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.app_configs import VESPA_SYNC_MAX_TASKS
from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.document import get_document
from onyx.db.document import mark_document_as_synced
from onyx.db.document_set import delete_document_set
from onyx.db.document_set import fetch_document_sets
from onyx.db.document_set import fetch_document_sets_for_document
from onyx.db.document_set import get_document_set_by_id
from onyx.db.document_set import mark_document_set_as_synced
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.models import DocumentSet
from onyx.db.models import UserGroup
from onyx.db.search_settings import get_active_search_settings
from onyx.db.sync_record import cleanup_sync_records
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_document_set import RedisDocumentSet
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.redis.redis_usergroup import RedisUserGroup
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)
from onyx.utils.variable_functionality import global_version
from onyx.utils.variable_functionality import noop_fallback

logger = setup_logger()


# celery auto associates tasks created inside another task,
# which bloats the result metadata considerably. trail=False prevents this.
# TODO(andrei): Rename all these kinds of functions from *vespa* to a more
# generic *document_index*.
@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
    trail=False,
    bind=True,
)
def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:
    """Runs periodically to check if any document needs syncing.
    Generates sets of tasks for Celery if syncing is needed."""

    # Useful for debugging timing issues with reacquisitions.
    # TODO: remove once more generalized logging is in place
    task_logger.info("check_for_vespa_sync_task started")

    time_start = time.monotonic()

    r = get_redis_client()
    r_replica = get_redis_replica_client()

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,
        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
    )

    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
        return None

    try:
        # 1/3: KICKOFF
        with get_session_with_current_tenant() as db_session:
            try_generate_stale_document_sync_tasks(
                self.app, VESPA_SYNC_MAX_TASKS, db_session, r, lock_beat, tenant_id
            )

        # region document set scan
        lock_beat.reacquire()
        document_set_ids: list[int] = []
        with get_session_with_current_tenant() as db_session:
            # check if any document sets are not synced
            document_set_info = fetch_document_sets(
                user_id=None, db_session=db_session, include_outdated=True
            )

            for document_set, _ in document_set_info:
                document_set_ids.append(document_set.id)

        for document_set_id in document_set_ids:
            lock_beat.reacquire()
            with get_session_with_current_tenant() as db_session:
                try_generate_document_set_sync_tasks(
                    self.app, document_set_id, db_session, r, lock_beat, tenant_id
                )
        # endregion

        # check if any user groups are not synced
        lock_beat.reacquire()
        if global_version.is_ee_version():
            try:
                fetch_user_groups = fetch_versioned_implementation(
                    "onyx.db.user_group", "fetch_user_groups"
                )
            except ModuleNotFoundError:
                # Always exceptions on the MIT version, which is expected
                # We shouldn't actually get here if the ee version check works
                pass
            else:
                usergroup_ids: list[int] = []
                with get_session_with_current_tenant() as db_session:
                    user_groups = fetch_user_groups(
                        db_session=db_session, only_up_to_date=False
                    )

                    for usergroup in user_groups:
                        usergroup_ids.append(usergroup.id)

                for usergroup_id in usergroup_ids:
                    lock_beat.reacquire()
                    with get_session_with_current_tenant() as db_session:
                        try_generate_user_group_sync_tasks(
                            self.app, usergroup_id, db_session, r, lock_beat, tenant_id
                        )

        # 2/3: VALIDATE: TODO

        # 3/3: FINALIZE
        lock_beat.reacquire()
        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
        for key in keys:
            key_bytes = cast(bytes, key)

            if not r.exists(key_bytes):
                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
                continue

            key_str = key_bytes.decode("utf-8")
            # NOTE: removing the "Redis*" classes, prefer to just have functions to
            # do these things going forward. In short, things should generally be like the doc
            # sync task rather than the others
            if key_str == DOCUMENT_SYNC_FENCE_KEY:
                monitor_document_sync_taskset(r)
            elif key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
                with get_session_with_current_tenant() as db_session:
                    monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
            elif key_str.startswith(RedisUserGroup.FENCE_PREFIX):
                monitor_usergroup_taskset = (
                    fetch_versioned_implementation_with_fallback(
                        "onyx.background.celery.tasks.vespa.tasks",
                        "monitor_usergroup_taskset",
                        noop_fallback,
                    )
                )
                with get_session_with_current_tenant() as db_session:
                    monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)

    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception:
        task_logger.exception("Unexpected exception during vespa metadata sync")
    finally:
        if lock_beat.owned():
            lock_beat.release()
        else:
            task_logger.error(
                f"check_for_vespa_sync_task - Lock not owned on completion: tenant={tenant_id}"
            )
            redis_lock_dump(lock_beat, r)

    time_elapsed = time.monotonic() - time_start
    task_logger.debug(f"check_for_vespa_sync_task finished: elapsed={time_elapsed:.2f}")
    return True


def try_generate_document_set_sync_tasks(
    celery_app: Celery,
    document_set_id: int,
    db_session: Session,
    r: Redis,
    lock_beat: RedisLock,
    tenant_id: str,
) -> int | None:
    lock_beat.reacquire()

    rds = RedisDocumentSet(tenant_id, document_set_id)

    # don't generate document set sync tasks if tasks are still pending
    if rds.fenced:
        return None

    # don't generate sync tasks if we're up to date
    # race condition with the monitor/cleanup function if we use a cached result!
    document_set = get_document_set_by_id(
        db_session=db_session,
        document_set_id=document_set_id,
    )
    if not document_set:
        return None

    if document_set.is_up_to_date:
        # there should be no in-progress sync records if this is up to date
        # clean it up just in case things got into a bad state
        cleanup_sync_records(
            db_session=db_session,
            entity_id=document_set_id,
            sync_type=SyncType.DOCUMENT_SET,
        )
        return None

    # add tasks to celery and build up the task set to monitor in redis
    r.delete(rds.taskset_key)

    task_logger.info(
        f"RedisDocumentSet.generate_tasks starting. document_set_id={document_set.id}"
    )

    # Add all documents that need to be updated into the queue
    result = rds.generate_tasks(
        VESPA_SYNC_MAX_TASKS, celery_app, db_session, r, lock_beat, tenant_id
    )
    if result is None:
        return None

    tasks_generated = result[0]
    # Currently we are allowing the sync to proceed with 0 tasks.
    # It's possible for sets/groups to be generated initially with no entries
    # and they still need to be marked as up to date.
    # if tasks_generated == 0:
    #     return 0

    task_logger.info(
        f"RedisDocumentSet.generate_tasks finished. document_set={document_set.id} tasks_generated={tasks_generated}"
    )

    # create before setting fence to avoid race condition where the monitoring
    # task updates the sync record before it is created
    try:
        insert_sync_record(
            db_session=db_session,
            entity_id=document_set_id,
            sync_type=SyncType.DOCUMENT_SET,
        )
    except Exception:
        task_logger.exception("insert_sync_record exceptioned.")

    # set this only after all tasks have been added
    rds.set_fence(tasks_generated)
    return tasks_generated


def try_generate_user_group_sync_tasks(
    celery_app: Celery,
    usergroup_id: int,
    db_session: Session,
    r: Redis,
    lock_beat: RedisLock,
    tenant_id: str,
) -> int | None:
    lock_beat.reacquire()

    rug = RedisUserGroup(tenant_id, usergroup_id)
    if rug.fenced:
        # don't generate sync tasks if tasks are still pending
        return None

    # race condition with the monitor/cleanup function if we use a cached result!
    fetch_user_group = cast(
        Callable[[Session, int], UserGroup | None],
        fetch_versioned_implementation("onyx.db.user_group", "fetch_user_group"),
    )

    usergroup = fetch_user_group(db_session, usergroup_id)
    if not usergroup:
        return None

    if usergroup.is_up_to_date:
        # there should be no in-progress sync records if this is up to date
        # clean it up just in case things got into a bad state
        cleanup_sync_records(
            db_session=db_session,
            entity_id=usergroup_id,
            sync_type=SyncType.USER_GROUP,
        )
        return None

    # add tasks to celery and build up the task set to monitor in redis
    r.delete(rug.taskset_key)

    # Add all documents that need to be updated into the queue
    task_logger.info(
        f"RedisUserGroup.generate_tasks starting. usergroup_id={usergroup.id}"
    )
    result = rug.generate_tasks(
        VESPA_SYNC_MAX_TASKS, celery_app, db_session, r, lock_beat, tenant_id
    )
    if result is None:
        return None

    tasks_generated = result[0]
    # Currently we are allowing the sync to proceed with 0 tasks.
    # It's possible for sets/groups to be generated initially with no entries
    # and they still need to be marked as up to date.
    # if tasks_generated == 0:
    #     return 0

    task_logger.info(
        f"RedisUserGroup.generate_tasks finished. usergroup={usergroup.id} tasks_generated={tasks_generated}"
    )

    # create before setting fence to avoid race condition where the monitoring
    # task updates the sync record before it is created
    try:
        insert_sync_record(
            db_session=db_session,
            entity_id=usergroup_id,
            sync_type=SyncType.USER_GROUP,
        )
    except Exception:
        task_logger.exception("insert_sync_record exceptioned.")

    # set this only after all tasks have been added
    rug.set_fence(tasks_generated)

    return tasks_generated


def monitor_document_sync_taskset(r: Redis) -> None:
    initial_count = get_document_sync_payload(r)
    if initial_count is None:
        return

    remaining = get_document_sync_remaining(r)
    task_logger.info(
        f"Document sync progress: remaining={remaining} initial={initial_count}"
    )
    if remaining == 0:
        reset_document_sync(r)
        task_logger.info(f"Successfully synced all documents. count={initial_count}")


def monitor_document_set_taskset(
    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
) -> None:
    fence_key = key_bytes.decode("utf-8")
    document_set_id_str = RedisDocumentSet.get_id_from_fence_key(fence_key)
    if document_set_id_str is None:
        task_logger.warning(f"could not parse document set id from {fence_key}")
        return

    document_set_id = int(document_set_id_str)

    rds = RedisDocumentSet(tenant_id, document_set_id)
    if not rds.fenced:
        return

    initial_count = rds.payload
    if initial_count is None:
        return

    count = cast(int, r.scard(rds.taskset_key))
    task_logger.info(
        f"Document set sync progress: document_set={document_set_id} remaining={count} initial={initial_count}"
    )
    if count > 0:
        update_sync_record_status(
            db_session=db_session,
            entity_id=document_set_id,
            sync_type=SyncType.DOCUMENT_SET,
            sync_status=SyncStatus.IN_PROGRESS,
            num_docs_synced=count,
        )
        return

    document_set = cast(
        DocumentSet,
        get_document_set_by_id(db_session=db_session, document_set_id=document_set_id),
    )  # casting since we "know" a document set with this ID exists
    if document_set:
        has_connector_pairs = bool(document_set.connector_credential_pairs)
        # Federated connectors should keep a document set alive even without cc pairs.
        has_federated_connectors = bool(
            getattr(document_set, "federated_connectors", [])
        )

        if not has_connector_pairs and not has_federated_connectors:
            # If there are no connectors of any kind, delete the document set.
            delete_document_set(document_set_row=document_set, db_session=db_session)
            task_logger.info(
                f"Successfully deleted document set: document_set={document_set_id}"
            )
        else:
            mark_document_set_as_synced(document_set_id, db_session)
            task_logger.info(
                f"Successfully synced document set: document_set={document_set_id}"
            )

        try:
            update_sync_record_status(
                db_session=db_session,
                entity_id=document_set_id,
                sync_type=SyncType.DOCUMENT_SET,
                sync_status=SyncStatus.SUCCESS,
                num_docs_synced=initial_count,
            )
        except Exception:
            task_logger.exception(
                f"update_sync_record_status exceptioned. document_set_id={document_set_id} Resetting document set regardless."
            )

    rds.reset()


@shared_task(
    name=OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
    bind=True,
    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
    time_limit=LIGHT_TIME_LIMIT,
    max_retries=3,
)
def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) -> bool:
    start = time.monotonic()

    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED

    try:
        with get_session_with_current_tenant() as db_session:
            active_search_settings = get_active_search_settings(db_session)
            # This flow is for updates so we get all indices.
            document_indices = get_all_document_indices(
                search_settings=active_search_settings.primary,
                secondary_search_settings=active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )

            retry_document_indices: list[RetryDocumentIndex] = [
                RetryDocumentIndex(document_index)
                for document_index in document_indices
            ]

            doc = get_document(document_id, db_session)
            if not doc:
                elapsed = time.monotonic() - start
                task_logger.info(
                    f"doc={document_id} action=no_operation elapsed={elapsed:.2f}"
                )
                completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED
            else:
                # document set sync
                doc_sets = fetch_document_sets_for_document(document_id, db_session)
                update_doc_sets: set[str] = set(doc_sets)

                # User group sync
                doc_access = get_access_for_document(
                    document_id=document_id, db_session=db_session
                )

                fields = VespaDocumentFields(
                    document_sets=update_doc_sets,
                    access=doc_access,
                    boost=doc.boost,
                    hidden=doc.hidden,
                    # aggregated_boost_factor=doc.aggregated_boost_factor,
                )

                for retry_document_index in retry_document_indices:
                    # TODO(andrei): Previously there was a comment here saying
                    # it was ok if a doc did not exist in the document index. I
                    # don't agree with that claim, so keep an eye on this task
                    # to see if this raises.
                    retry_document_index.update_single(
                        document_id,
                        tenant_id=tenant_id,
                        chunk_count=doc.chunk_count,
                        fields=fields,
                        user_fields=None,
                    )

                # update db last. Worst case = we crash right before this and
                # the sync might repeat again later
                mark_document_as_synced(document_id, db_session)

                elapsed = time.monotonic() - start
                task_logger.info(f"doc={document_id} action=sync elapsed={elapsed:.2f}")
                completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
    except SoftTimeLimitExceeded:
        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
        completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT
    except Exception as ex:
        e: Exception | None = None
        while True:
            if isinstance(ex, RetryError):
                task_logger.warning(
                    f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
                )

                # only set the inner exception if it is of type Exception
                e_temp = ex.last_attempt.exception()
                if isinstance(e_temp, Exception):
                    e = e_temp
            else:
                e = ex

            if isinstance(e, httpx.HTTPStatusError):
                if e.response.status_code == HTTPStatus.BAD_REQUEST:
                    task_logger.exception(
                        f"Non-retryable HTTPStatusError: doc={document_id} status={e.response.status_code}"
                    )
                completion_status = (
                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
                )
                break

            task_logger.exception(
                f"vespa_metadata_sync_task exceptioned: doc={document_id}"
            )

            completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION
            if (
                self.max_retries is not None
                and self.request.retries >= self.max_retries
            ):
                completion_status = (
                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
                )

            # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
            countdown = 2 ** (self.request.retries + 4)
            self.retry(exc=e, countdown=countdown)  # this will raise a celery exception
            break  # we won't hit this, but it looks weird not to have it
    finally:
        task_logger.info(
            f"vespa_metadata_sync_task completed: status={completion_status.value} doc={document_id}"
        )

    return completion_status == OnyxCeleryTaskCompletionStatus.SUCCEEDED


================================================
FILE: backend/onyx/background/celery/versioned_apps/beat.py
================================================
"""Factory stub for running celery worker / celery beat."""

from celery import Celery

from onyx.background.celery.apps.beat import celery_app
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

set_is_ee_based_on_env_variable()
app: Celery = celery_app


================================================
FILE: backend/onyx/background/celery/versioned_apps/client.py
================================================
"""Factory stub for running celery worker / celery beat.
This code is different from the primary/beat stubs because there is no EE version to
fetch. Port over the code in those files if we add an EE version of this worker.

This is an app stub purely for sending tasks as a client.
"""

from celery import Celery

from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

set_is_ee_based_on_env_variable()


def get_app() -> Celery:
    from onyx.background.celery.apps.client import celery_app

    return celery_app


app = get_app()


================================================
FILE: backend/onyx/background/celery/versioned_apps/docfetching.py
================================================
"""Factory stub for running celery worker / celery beat.
This code is different from the primary/beat stubs because there is no EE version to
fetch. Port over the code in those files if we add an EE version of this worker."""

from celery import Celery

from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

set_is_ee_based_on_env_variable()


def get_app() -> Celery:
    from onyx.background.celery.apps.docfetching import celery_app

    return celery_app


app = get_app()


================================================
FILE: backend/onyx/background/celery/versioned_apps/docprocessing.py
================================================
"""Factory stub for running celery worker / celery beat.
This code is different from the primary/beat stubs because there is no EE version to
fetch. Port over the code in those files if we add an EE version of this worker."""

from celery import Celery

from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

set_is_ee_based_on_env_variable()


def get_app() -> Celery:
    from onyx.background.celery.apps.docprocessing import celery_app

    return celery_app


app = get_app()


================================================
FILE: backend/onyx/background/celery/versioned_apps/heavy.py
================================================
"""Factory stub for running celery worker / celery beat.
This code is different from the primary/beat stubs because there is no EE version to
fetch. Port over the code in those files if we add an EE version of this worker."""

from celery import Celery

from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

set_is_ee_based_on_env_variable()
app: Celery = fetch_versioned_implementation(
    "onyx.background.celery.apps.heavy",
    "celery_app",
)


================================================
FILE: backend/onyx/background/celery/versioned_apps/light.py
================================================
"""Factory stub for running celery worker / celery beat.
This code is different from the primary/beat stubs because there is no EE version to
fetch. Port over the code in those files if we add an EE version of this worker."""

from celery import Celery

from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

set_is_ee_based_on_env_variable()
app: Celery = fetch_versioned_implementation(
    "onyx.background.celery.apps.light",
    "celery_app",
)


================================================
FILE: backend/onyx/background/celery/versioned_apps/monitoring.py
================================================
"""Factory stub for running celery worker / celery beat."""

from celery import Celery

from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

set_is_ee_based_on_env_variable()
app: Celery = fetch_versioned_implementation(
    "onyx.background.celery.apps.monitoring",
    "celery_app",
)


================================================
FILE: backend/onyx/background/celery/versioned_apps/primary.py
================================================
"""Factory stub for running celery worker / celery beat."""

from celery import Celery

from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

set_is_ee_based_on_env_variable()
app: Celery = fetch_versioned_implementation(
    "onyx.background.celery.apps.primary",
    "celery_app",
)


================================================
FILE: backend/onyx/background/celery/versioned_apps/user_file_processing.py
================================================
"""Factory stub for running the user file processing Celery worker."""

from celery import Celery

from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

set_is_ee_based_on_env_variable()


def get_app() -> Celery:
    from onyx.background.celery.apps.user_file_processing import celery_app

    return celery_app


app = get_app()


================================================
FILE: backend/onyx/background/error_logging.py
================================================
from sqlalchemy.exc import IntegrityError

from onyx.db.background_error import create_background_error
from onyx.db.engine.sql_engine import get_session_with_current_tenant


def emit_background_error(
    message: str,
    cc_pair_id: int | None = None,
) -> None:
    """Currently just saves a row in the background_errors table.

    In the future, could create notifications based on the severity."""
    error_message = ""

    # try to write to the db, but handle IntegrityError specifically
    try:
        with get_session_with_current_tenant() as db_session:
            create_background_error(db_session, message, cc_pair_id)
    except IntegrityError as e:
        # Log an error if the cc_pair_id was deleted or any other exception occurs
        error_message = (
            f"Failed to create background error: {str(e)}. Original message: {message}"
        )
    except Exception:
        pass

    if not error_message:
        return

    # if we get here from an IntegrityError, try to write the error message to the db
    # we need a new session because the first session is now invalid
    try:
        with get_session_with_current_tenant() as db_session:
            create_background_error(db_session, error_message, None)
    except Exception:
        pass


================================================
FILE: backend/onyx/background/indexing/checkpointing_utils.py
================================================
from datetime import datetime
from datetime import timedelta
from io import BytesIO

from sqlalchemy import and_
from sqlalchemy.orm import Session

from onyx.configs.constants import FileOrigin
from onyx.configs.constants import NUM_DAYS_TO_KEEP_CHECKPOINTS
from onyx.connectors.interfaces import BaseConnector
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.models import ConnectorCheckpoint
from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.index_attempt import get_index_attempt
from onyx.db.index_attempt import get_recent_completed_attempts_for_cc_pair
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexingStatus
from onyx.file_store.file_store import get_default_file_store
from onyx.utils.logger import setup_logger
from onyx.utils.object_size_check import deep_getsizeof

logger = setup_logger()

_NUM_RECENT_ATTEMPTS_TO_CONSIDER = 50


def _build_checkpoint_pointer(index_attempt_id: int) -> str:
    return f"checkpoint_{index_attempt_id}.json"


def save_checkpoint(
    db_session: Session, index_attempt_id: int, checkpoint: ConnectorCheckpoint
) -> str:
    """Save a checkpoint for a given index attempt to the file store"""
    checkpoint_pointer = _build_checkpoint_pointer(index_attempt_id)

    file_store = get_default_file_store()
    file_store.save_file(
        content=BytesIO(checkpoint.model_dump_json().encode()),
        display_name=checkpoint_pointer,
        file_origin=FileOrigin.INDEXING_CHECKPOINT,
        file_type="application/json",
        file_id=checkpoint_pointer,
    )

    index_attempt = get_index_attempt(db_session, index_attempt_id)
    if not index_attempt:
        raise RuntimeError(f"Index attempt {index_attempt_id} not found in DB.")
    index_attempt.checkpoint_pointer = checkpoint_pointer
    db_session.add(index_attempt)
    db_session.commit()
    return checkpoint_pointer


def load_checkpoint(
    index_attempt_id: int, connector: BaseConnector
) -> ConnectorCheckpoint:
    """Load a checkpoint for a given index attempt from the file store"""
    checkpoint_pointer = _build_checkpoint_pointer(index_attempt_id)
    file_store = get_default_file_store()
    checkpoint_io = file_store.read_file(checkpoint_pointer, mode="rb")
    checkpoint_data = checkpoint_io.read().decode("utf-8")
    if isinstance(connector, CheckpointedConnector):
        return connector.validate_checkpoint_json(checkpoint_data)
    return ConnectorCheckpoint.model_validate_json(checkpoint_data)


def get_latest_valid_checkpoint(
    db_session: Session,
    cc_pair_id: int,
    search_settings_id: int,
    window_start: datetime,
    window_end: datetime,
    connector: BaseConnector,
) -> tuple[ConnectorCheckpoint, bool]:
    """Get the latest valid checkpoint for a given connector credential pair"""
    checkpoint_candidates = get_recent_completed_attempts_for_cc_pair(
        cc_pair_id=cc_pair_id,
        search_settings_id=search_settings_id,
        db_session=db_session,
        limit=_NUM_RECENT_ATTEMPTS_TO_CONSIDER,
    )

    # don't keep using checkpoints if we've had a bunch of failed attempts in a row
    # where we make no progress. Only do this if we have had at least
    # _NUM_RECENT_ATTEMPTS_TO_CONSIDER completed attempts.
    if len(checkpoint_candidates) >= _NUM_RECENT_ATTEMPTS_TO_CONSIDER:
        had_any_progress = False
        for candidate in checkpoint_candidates:
            if (
                candidate.total_docs_indexed is not None
                and candidate.total_docs_indexed > 0
            ) or candidate.status.is_successful():
                had_any_progress = True
                break

        if not had_any_progress:
            logger.warning(
                f"{_NUM_RECENT_ATTEMPTS_TO_CONSIDER} consecutive failed attempts without progress "
                f"found for cc_pair={cc_pair_id}. Ignoring checkpoint to let the run start "
                "from scratch."
            )
            return connector.build_dummy_checkpoint(), False

    # filter out any candidates that don't meet the criteria
    checkpoint_candidates = [
        candidate
        for candidate in checkpoint_candidates
        if (
            candidate.poll_range_start == window_start
            and candidate.poll_range_end == window_end
            and (
                candidate.status == IndexingStatus.FAILED
                # if the background job was killed (and thus the attempt was canceled)
                # we still want to use the checkpoint so that we can pick up where we left off
                or candidate.status == IndexingStatus.CANCELED
            )
            and candidate.checkpoint_pointer is not None
            # NOTE: There are a couple connectors that may make progress but not have
            # any "total_docs_indexed". E.g. they are going through
            # Slack channels, and tons of them don't have any updates.
            # Leaving the below in as historical context / in-case we want to use it again.
            # we want to make sure that the checkpoint is actually useful
            # if it's only gone through a few docs, it's probably not worth
            # using. This also avoids weird cases where a connector is basically
            # non-functional but still "makes progress" by slowly moving the
            # checkpoint forward run after run
            # and candidate.total_docs_indexed
            # and candidate.total_docs_indexed > 100
        )
    ]

    # assumes latest checkpoint is the furthest along. This only isn't true
    # if something else has gone wrong.
    latest_valid_checkpoint_candidate = (
        checkpoint_candidates[0] if checkpoint_candidates else None
    )

    checkpoint = connector.build_dummy_checkpoint()
    if latest_valid_checkpoint_candidate is None:
        logger.info(
            f"No valid checkpoint found for cc_pair={cc_pair_id}. Starting from scratch."
        )
        return checkpoint, False

    try:
        previous_checkpoint = load_checkpoint(
            index_attempt_id=latest_valid_checkpoint_candidate.id,
            connector=connector,
        )
    except Exception:
        logger.exception(
            f"Failed to load checkpoint from previous failed attempt with ID "
            f"{latest_valid_checkpoint_candidate.id}. Falling back to default checkpoint."
        )
        return checkpoint, False

    logger.info(
        f"Using checkpoint from previous failed attempt with ID "
        f"{latest_valid_checkpoint_candidate.id}. Previous checkpoint: "
        f"{previous_checkpoint}"
    )
    return previous_checkpoint, True


def get_index_attempts_with_old_checkpoints(
    db_session: Session, days_to_keep: int = NUM_DAYS_TO_KEEP_CHECKPOINTS
) -> list[IndexAttempt]:
    """Get all index attempts with checkpoints older than the specified number of days.

    Args:
        db_session: The database session
        days_to_keep: Number of days to keep checkpoints for (default: NUM_DAYS_TO_KEEP_CHECKPOINTS)

    Returns:
        List of IndexAttempt objects with old checkpoints
    """
    cutoff_date = get_db_current_time(db_session) - timedelta(days=days_to_keep)

    # Find all index attempts with checkpoints older than cutoff_date
    old_attempts = (
        db_session.query(IndexAttempt)
        .filter(
            and_(
                IndexAttempt.checkpoint_pointer.isnot(None),
                IndexAttempt.time_created < cutoff_date,
            )
        )
        .all()
    )

    return old_attempts


def cleanup_checkpoint(db_session: Session, index_attempt_id: int) -> None:
    """Clean up a checkpoint for a given index attempt"""
    index_attempt = get_index_attempt(db_session, index_attempt_id)
    if not index_attempt:
        raise RuntimeError(f"Index attempt {index_attempt_id} not found in DB.")

    if not index_attempt.checkpoint_pointer:
        return None

    file_store = get_default_file_store()
    file_store.delete_file(index_attempt.checkpoint_pointer)

    index_attempt.checkpoint_pointer = None
    db_session.add(index_attempt)
    db_session.commit()

    return None


def check_checkpoint_size(checkpoint: ConnectorCheckpoint) -> None:
    """Check if the checkpoint content size exceeds the limit (200MB)"""
    content_size = deep_getsizeof(checkpoint.model_dump())
    if content_size > 200_000_000:  # 200MB in bytes
        raise ValueError(
            f"Checkpoint content size ({content_size} bytes) exceeds 200MB limit"
        )


================================================
FILE: backend/onyx/background/indexing/dask_utils.py
================================================
import asyncio

import psutil
from dask.distributed import WorkerPlugin
from distributed import Worker

from onyx.utils.logger import setup_logger

logger = setup_logger()


class ResourceLogger(WorkerPlugin):
    def __init__(self, log_interval: int = 60 * 5):
        self.log_interval = log_interval

    def setup(self, worker: Worker) -> None:
        """This method will be called when the plugin is attached to a worker."""
        self.worker = worker
        worker.loop.add_callback(self.log_resources)

    async def log_resources(self) -> None:
        """Periodically log CPU and memory usage.

        NOTE: must be async or else will clog up the worker indefinitely due to the fact that
        Dask uses Tornado under the hood (which is async)"""
        while True:
            cpu_percent = psutil.cpu_percent(interval=None)
            memory_available_gb = psutil.virtual_memory().available / (1024.0**3)
            # You can now log these values or send them to a monitoring service
            logger.debug(
                f"Worker {self.worker.address}: CPU usage {cpu_percent}%, Memory available {memory_available_gb}GB"
            )
            await asyncio.sleep(self.log_interval)


================================================
FILE: backend/onyx/background/indexing/index_attempt_utils.py
================================================
from datetime import timedelta

from sqlalchemy import func
from sqlalchemy.orm import Session

from onyx.configs.constants import NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexAttemptError


# Always retain at least this many attempts per connector/search settings pair
NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP = 10


def get_old_index_attempts(
    db_session: Session, days_to_keep: int = NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
) -> list[IndexAttempt]:
    """
    Get index attempts older than the specified number of days while retaining
    the latest NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP per connector/search settings pair.
    """
    cutoff_date = get_db_current_time(db_session) - timedelta(days=days_to_keep)
    ranked_attempts = (
        db_session.query(
            IndexAttempt.id.label("attempt_id"),
            IndexAttempt.time_created.label("time_created"),
            func.row_number()
            .over(
                partition_by=(
                    IndexAttempt.connector_credential_pair_id,
                    IndexAttempt.search_settings_id,
                ),
                order_by=IndexAttempt.time_created.desc(),
            )
            .label("attempt_rank"),
        )
    ).subquery()

    return (
        db_session.query(IndexAttempt)
        .join(
            ranked_attempts,
            IndexAttempt.id == ranked_attempts.c.attempt_id,
        )
        .filter(
            ranked_attempts.c.time_created < cutoff_date,
            ranked_attempts.c.attempt_rank > NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP,
        )
        .all()
    )


def cleanup_index_attempts(db_session: Session, index_attempt_ids: list[int]) -> None:
    """Clean up multiple index attempts"""
    db_session.query(IndexAttemptError).filter(
        IndexAttemptError.index_attempt_id.in_(index_attempt_ids)
    ).delete(synchronize_session=False)

    db_session.query(IndexAttempt).filter(
        IndexAttempt.id.in_(index_attempt_ids)
    ).delete(synchronize_session=False)
    db_session.commit()


================================================
FILE: backend/onyx/background/indexing/job_client.py
================================================
"""Custom client that works similarly to Dask, but simpler and more lightweight.
Dask jobs behaved very strangely - they would die all the time, retries would
not follow the expected behavior, etc.

NOTE: cannot use Celery directly due to
https://github.com/celery/celery/issues/7007#issuecomment-1740139367"""

import multiprocessing as mp
import sys
import traceback
from collections.abc import Callable
from dataclasses import dataclass
from multiprocessing.context import SpawnProcess
from typing import Any
from typing import Literal
from typing import Optional

from onyx.configs.constants import POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.utils.logger import setup_logger
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import TENANT_ID_PREFIX
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()


class SimpleJobException(Exception):
    """lets us raise an exception that will return a specific error code"""

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        code: int | None = kwargs.pop("code", None)
        self.code = code
        super().__init__(*args, **kwargs)


JobStatusType = (
    Literal["error"]
    | Literal["finished"]
    | Literal["pending"]
    | Literal["running"]
    | Literal["cancelled"]
)


def _initializer(
    func: Callable,
    queue: mp.Queue,
    args: list | tuple,
    kwargs: dict[str, Any] | None = None,
) -> Any:
    """Initialize the child process with a fresh SQLAlchemy Engine.

    Based on SQLAlchemy's recommendations to handle multiprocessing:
    https://docs.sqlalchemy.org/en/20/core/pooling.html#using-connection-pools-with-multiprocessing-or-os-fork
    """
    if kwargs is None:
        kwargs = {}

    logger.info("Initializing spawned worker child process.")
    # 1. Get tenant_id from args or fallback to default
    tenant_id = POSTGRES_DEFAULT_SCHEMA
    for arg in reversed(args):
        if isinstance(arg, str) and arg.startswith(TENANT_ID_PREFIX):
            tenant_id = arg
            break

    # 2. Set the tenant context before running anything
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

    # Reset the engine in the child process
    SqlEngine.reset_engine()

    # Optionally set a custom app name for database logging purposes
    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME)

    # Initialize a new engine with desired parameters
    SqlEngine.init_engine(
        pool_size=4, max_overflow=12, pool_recycle=60, pool_pre_ping=True
    )

    # Proceed with executing the target function
    try:
        return func(*args, **kwargs)
    except SimpleJobException as e:
        logger.exception("SimpleJob raised a SimpleJobException")
        error_msg = traceback.format_exc()
        queue.put(error_msg)  # Send the exception to the parent process

        sys.exit(e.code)  # use the given exit code
    except Exception:
        logger.exception("SimpleJob raised an exception")
        error_msg = traceback.format_exc()
        queue.put(error_msg)  # Send the exception to the parent process

        sys.exit(255)  # use 255 to indicate a generic exception
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


def _run_in_process(
    func: Callable,
    queue: mp.Queue,
    args: list | tuple,
    kwargs: dict[str, Any] | None = None,
) -> None:
    _initializer(func, queue, args, kwargs)


@dataclass
class SimpleJob:
    """Drop in replacement for `dask.distributed.Future`"""

    id: int
    process: Optional["SpawnProcess"] = None
    queue: Optional[mp.Queue] = None
    _exception: Optional[str] = None

    def cancel(self) -> bool:
        return self.release()

    def release(self) -> bool:
        if self.process is not None and self.process.is_alive():
            self.process.terminate()
            return True
        return False

    @property
    def status(self) -> JobStatusType:
        if not self.process:
            return "pending"
        elif self.process.is_alive():
            return "running"
        elif self.process.exitcode is None:
            return "cancelled"
        elif self.process.exitcode != 0:
            return "error"
        else:
            return "finished"

    def done(self) -> bool:
        return (
            self.status == "finished"
            or self.status == "cancelled"
            or self.status == "error"
        )

    def exception(self) -> str:
        """Needed to match the Dask API, but not implemented since we don't currently
        have a way to get back the exception information from the child process."""

        """Retrieve exception from the multiprocessing queue if available."""
        if self._exception is None and self.queue and not self.queue.empty():
            self._exception = self.queue.get()  # Get exception from queue

        return (
            self._exception or f"Job with ID '{self.id}' did not report an exception."
        )


class SimpleJobClient:
    """Drop in replacement for `dask.distributed.Client`"""

    def __init__(self, n_workers: int = 1) -> None:
        self.n_workers = n_workers
        self.job_id_counter = 0
        self.jobs: dict[int, SimpleJob] = {}

    def _cleanup_completed_jobs(self) -> None:
        current_job_ids = list(self.jobs.keys())
        for job_id in current_job_ids:
            job = self.jobs.get(job_id)
            if job and job.done():
                logger.debug(f"Cleaning up job with id: '{job.id}'")
                del self.jobs[job.id]

    def submit(
        self,
        func: Callable,
        *args: Any,
        pure: bool = True,  # noqa: ARG002
    ) -> SimpleJob | None:
        """NOTE: `pure` arg is needed so this can be a drop in replacement for Dask"""
        self._cleanup_completed_jobs()
        if len(self.jobs) >= self.n_workers:
            logger.debug(
                f"No available workers to run job. Currently running '{len(self.jobs)}' jobs, with a limit of '{self.n_workers}'."
            )
            return None

        job_id = self.job_id_counter
        self.job_id_counter += 1

        # this approach allows us to always "spawn" a new process regardless of
        # get_start_method's current setting
        ctx = mp.get_context("spawn")
        queue = ctx.Queue()
        process = ctx.Process(
            target=_run_in_process, args=(func, queue, args), daemon=True
        )
        job = SimpleJob(id=job_id, process=process, queue=queue)
        process.start()

        self.jobs[job_id] = job

        return job


================================================
FILE: backend/onyx/background/indexing/memory_tracer.py
================================================
import tracemalloc

from onyx.utils.logger import setup_logger

logger = setup_logger()

DANSWER_TRACEMALLOC_FRAMES = 10


class MemoryTracer:
    def __init__(self, interval: int = 0, num_print_entries: int = 5):
        self.interval = interval
        self.num_print_entries = num_print_entries
        self.snapshot_first: tracemalloc.Snapshot | None = None
        self.snapshot_prev: tracemalloc.Snapshot | None = None
        self.snapshot: tracemalloc.Snapshot | None = None
        self.counter = 0

    def start(self) -> None:
        """Start the memory tracer if interval is greater than 0."""
        if self.interval > 0:
            logger.debug(f"Memory tracer starting: interval={self.interval}")
            tracemalloc.start(DANSWER_TRACEMALLOC_FRAMES)
            self._take_snapshot()

    def stop(self) -> None:
        """Stop the memory tracer if it's running."""
        if self.interval > 0:
            self.log_final_diff()
            tracemalloc.stop()
            logger.debug("Memory tracer stopped.")

    def _take_snapshot(self) -> None:
        """Take a snapshot and update internal snapshot states."""
        snapshot = tracemalloc.take_snapshot()
        # Filter out irrelevant frames
        snapshot = snapshot.filter_traces(
            (
                tracemalloc.Filter(False, tracemalloc.__file__),
                tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
                tracemalloc.Filter(False, "<frozen importlib._bootstrap_external>"),
            )
        )

        if not self.snapshot_first:
            self.snapshot_first = snapshot

        if self.snapshot:
            self.snapshot_prev = self.snapshot

        self.snapshot = snapshot

    def _log_diff(
        self, current: tracemalloc.Snapshot, previous: tracemalloc.Snapshot
    ) -> None:
        """Log the memory difference between two snapshots."""
        stats = current.compare_to(previous, "traceback")
        for s in stats[: self.num_print_entries]:
            logger.debug(f"Tracer diff: {s}")
            for line in s.traceback.format():
                logger.debug(f"* {line}")

    def increment_and_maybe_trace(self) -> None:
        """Increment counter and perform trace if interval is hit."""
        if self.interval <= 0:
            return

        self.counter += 1
        if self.counter % self.interval == 0:
            logger.debug(
                f"Running trace comparison for batch {self.counter}. interval={self.interval}"
            )
            self._take_snapshot()
            if self.snapshot and self.snapshot_prev:
                self._log_diff(self.snapshot, self.snapshot_prev)

    def log_final_diff(self) -> None:
        """Log the final memory diff between start and end of indexing."""
        if self.interval <= 0:
            return

        logger.debug(
            f"Running trace comparison between start and end of indexing. {self.counter} batches processed."
        )
        self._take_snapshot()
        if self.snapshot and self.snapshot_first:
            self._log_diff(self.snapshot, self.snapshot_first)


================================================
FILE: backend/onyx/background/indexing/models.py
================================================
from datetime import datetime

from pydantic import BaseModel

from onyx.db.models import IndexAttemptError


class IndexAttemptErrorPydantic(BaseModel):
    id: int
    connector_credential_pair_id: int

    document_id: str | None
    document_link: str | None

    entity_id: str | None
    failed_time_range_start: datetime | None
    failed_time_range_end: datetime | None

    failure_message: str
    is_resolved: bool = False

    time_created: datetime

    index_attempt_id: int

    @classmethod
    def from_model(cls, model: IndexAttemptError) -> "IndexAttemptErrorPydantic":
        return cls(
            id=model.id,
            connector_credential_pair_id=model.connector_credential_pair_id,
            document_id=model.document_id,
            document_link=model.document_link,
            entity_id=model.entity_id,
            failed_time_range_start=model.failed_time_range_start,
            failed_time_range_end=model.failed_time_range_end,
            failure_message=model.failure_message,
            is_resolved=model.is_resolved,
            time_created=model.time_created,
            index_attempt_id=model.index_attempt_id,
        )


================================================
FILE: backend/onyx/background/indexing/run_docfetching.py
================================================
import sys
import time
import traceback
from datetime import datetime
from datetime import timedelta
from datetime import timezone

from celery import Celery
from sqlalchemy.orm import Session

from onyx.access.access import source_should_fetch_permissions_during_indexing
from onyx.background.indexing.checkpointing_utils import check_checkpoint_size
from onyx.background.indexing.checkpointing_utils import get_latest_valid_checkpoint
from onyx.background.indexing.checkpointing_utils import save_checkpoint
from onyx.background.indexing.memory_tracer import MemoryTracer
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import INDEXING_SIZE_WARNING_THRESHOLD
from onyx.configs.app_configs import INDEXING_TRACER_INTERVAL
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
from onyx.configs.app_configs import LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE
from onyx.configs.app_configs import MAX_FILE_SIZE_BYTES
from onyx.configs.app_configs import POLL_CONNECTOR_OFFSET
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.connectors.connector_runner import ConnectorRunner
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.factory import instantiate_connector
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorStopSignal
from onyx.connectors.models import Document
from onyx.connectors.models import IndexAttemptMetadata
from onyx.connectors.models import TextSection
from onyx.db.connector import mark_ccpair_with_indexing_trigger
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import get_last_successful_attempt_poll_range_end
from onyx.db.connector_credential_pair import update_connector_credential_pair
from onyx.db.constants import CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX
from onyx.db.document import mark_document_as_indexed_for_cc_pair__no_commit
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import IndexModelStatus
from onyx.db.enums import ProcessingMode
from onyx.db.hierarchy import upsert_hierarchy_node_cc_pair_entries
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.index_attempt import create_index_attempt_error
from onyx.db.index_attempt import get_index_attempt
from onyx.db.index_attempt import get_recent_completed_attempts_for_cc_pair
from onyx.db.index_attempt import mark_attempt_canceled
from onyx.db.index_attempt import mark_attempt_failed
from onyx.db.index_attempt import transition_attempt_to_in_progress
from onyx.db.indexing_coordination import IndexingCoordination
from onyx.db.models import IndexAttempt
from onyx.file_store.document_batch_storage import DocumentBatchStorage
from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
from onyx.redis.redis_pool import get_redis_client
from onyx.server.features.build.indexing.persistent_document_writer import (
    get_persistent_document_writer,
)
from onyx.utils.logger import setup_logger
from onyx.utils.middleware import make_randomized_onyx_request_id
from onyx.utils.postgres_sanitization import sanitize_document_for_postgres
from onyx.utils.postgres_sanitization import sanitize_hierarchy_nodes_for_postgres
from onyx.utils.variable_functionality import global_version
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR

logger = setup_logger(propagate=False)

INDEXING_TRACER_NUM_PRINT_ENTRIES = 5


def _get_connector_runner(
    db_session: Session,
    attempt: IndexAttempt,
    batch_size: int,
    start_time: datetime,
    end_time: datetime,
    include_permissions: bool,
    leave_connector_active: bool = LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE,
) -> ConnectorRunner:
    """
    NOTE: `start_time` and `end_time` are only used for poll connectors

    Returns an iterator of document batches and whether the returned documents
    are the complete list of existing documents of the connector. If the task
    of type LOAD_STATE, the list will be considered complete and otherwise incomplete.
    """

    task = attempt.connector_credential_pair.connector.input_type

    try:
        runnable_connector = instantiate_connector(
            db_session=db_session,
            source=attempt.connector_credential_pair.connector.source,
            input_type=task,
            connector_specific_config=attempt.connector_credential_pair.connector.connector_specific_config,
            credential=attempt.connector_credential_pair.credential,
        )

        # validate the connector settings
        if not INTEGRATION_TESTS_MODE:
            runnable_connector.validate_connector_settings()
            if attempt.connector_credential_pair.access_type == AccessType.SYNC:
                runnable_connector.validate_perm_sync()

    except UnexpectedValidationError as e:
        logger.exception(
            "Unable to instantiate connector due to an unexpected temporary issue."
        )
        raise e
    except Exception as e:
        logger.exception("Unable to instantiate connector. Pausing until fixed.")
        # since we failed to even instantiate the connector, we pause the CCPair since
        # it will never succeed

        # Sometimes there are cases where the connector will
        # intermittently fail to initialize in which case we should pass in
        # leave_connector_active=True to allow it to continue.
        # For example, if there is nightly maintenance on a Confluence Server instance,
        # the connector will fail to initialize every night.
        if not leave_connector_active:
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=attempt.connector_credential_pair.id,
            )
            if cc_pair and cc_pair.status == ConnectorCredentialPairStatus.ACTIVE:
                update_connector_credential_pair(
                    db_session=db_session,
                    connector_id=attempt.connector_credential_pair.connector.id,
                    credential_id=attempt.connector_credential_pair.credential.id,
                    status=ConnectorCredentialPairStatus.PAUSED,
                )
        raise e

    return ConnectorRunner(
        connector=runnable_connector,
        batch_size=batch_size,
        include_permissions=include_permissions,
        time_range=(start_time, end_time),
    )


def strip_null_characters(doc_batch: list[Document]) -> list[Document]:
    cleaned_batch = []
    for doc in doc_batch:
        if sys.getsizeof(doc) > MAX_FILE_SIZE_BYTES:
            logger.warning(
                f"doc {doc.id} too large, Document size: {sys.getsizeof(doc)}"
            )
        cleaned_batch.append(sanitize_document_for_postgres(doc))

    return cleaned_batch


def _check_connector_and_attempt_status(
    db_session_temp: Session,
    cc_pair_id: int,
    search_settings_status: IndexModelStatus,
    index_attempt_id: int,
) -> None:
    """
    Checks the status of the connector credential pair and index attempt.
    Raises a RuntimeError if any conditions are not met.
    """
    cc_pair_loop = get_connector_credential_pair_from_id(
        db_session_temp,
        cc_pair_id,
    )
    if not cc_pair_loop:
        raise RuntimeError(f"CC pair {cc_pair_id} not found in DB.")

    if (
        cc_pair_loop.status == ConnectorCredentialPairStatus.PAUSED
        and search_settings_status != IndexModelStatus.FUTURE
    ) or cc_pair_loop.status == ConnectorCredentialPairStatus.DELETING:
        raise ConnectorStopSignal(f"Connector {cc_pair_loop.status.value.lower()}")

    index_attempt_loop = get_index_attempt(db_session_temp, index_attempt_id)
    if not index_attempt_loop:
        raise RuntimeError(f"Index attempt {index_attempt_id} not found in DB.")

    if index_attempt_loop.status == IndexingStatus.CANCELED:
        raise ConnectorStopSignal(f"Index attempt {index_attempt_id} was canceled")

    if index_attempt_loop.status != IndexingStatus.IN_PROGRESS:
        error_str = ""
        if index_attempt_loop.error_msg:
            error_str = f" Original error: {index_attempt_loop.error_msg}"

        raise RuntimeError(
            f"Index Attempt is not running, status is {index_attempt_loop.status}.{error_str}"
        )

    if index_attempt_loop.celery_task_id is None:
        raise RuntimeError(f"Index attempt {index_attempt_id} has no celery task id")


# TODO: delete from here if ends up unused
def _check_failure_threshold(
    total_failures: int,
    document_count: int,
    batch_num: int,
    last_failure: ConnectorFailure | None,
) -> None:
    """Check if we've hit the failure threshold and raise an appropriate exception if so.

    We consider the threshold hit if:
    1. We have more than 3 failures AND
    2. Failures account for more than 10% of processed documents
    """
    failure_ratio = total_failures / (document_count or 1)

    FAILURE_THRESHOLD = 3
    FAILURE_RATIO_THRESHOLD = 0.1
    if total_failures > FAILURE_THRESHOLD and failure_ratio > FAILURE_RATIO_THRESHOLD:
        logger.error(
            f"Connector run failed with '{total_failures}' errors after '{batch_num}' batches."
        )
        if last_failure and last_failure.exception:
            raise last_failure.exception from last_failure.exception

        raise RuntimeError(
            f"Connector run encountered too many errors, aborting. Last error: {last_failure}"
        )


def run_docfetching_entrypoint(
    app: Celery,
    index_attempt_id: int,
    tenant_id: str,
    connector_credential_pair_id: int,
    is_ee: bool = False,
    callback: IndexingHeartbeatInterface | None = None,
) -> None:
    """Don't swallow exceptions here ... propagate them up."""

    if is_ee:
        global_version.set_ee()

    # set the indexing attempt ID so that all log messages from this process
    # will have it added as a prefix
    token = INDEX_ATTEMPT_INFO_CONTEXTVAR.set(
        (connector_credential_pair_id, index_attempt_id)
    )
    with get_session_with_current_tenant() as db_session:
        attempt = transition_attempt_to_in_progress(index_attempt_id, db_session)

        tenant_str = ""
        if MULTI_TENANT:
            tenant_str = f" for tenant {tenant_id}"

        connector_name = attempt.connector_credential_pair.connector.name
        connector_config = (
            attempt.connector_credential_pair.connector.connector_specific_config
        )
        credential_id = attempt.connector_credential_pair.credential_id

    logger.info(
        f"Docfetching starting{tenant_str}: "
        f"connector='{connector_name}' "
        f"config='{connector_config}' "
        f"credentials='{credential_id}'"
    )

    connector_document_extraction(
        app,
        index_attempt_id,
        attempt.connector_credential_pair_id,
        attempt.search_settings_id,
        tenant_id,
        callback,
    )

    logger.info(
        f"Docfetching finished{tenant_str}: "
        f"connector='{connector_name}' "
        f"config='{connector_config}' "
        f"credentials='{credential_id}'"
    )

    INDEX_ATTEMPT_INFO_CONTEXTVAR.reset(token)


def connector_document_extraction(
    app: Celery,
    index_attempt_id: int,
    cc_pair_id: int,
    search_settings_id: int,
    tenant_id: str,
    callback: IndexingHeartbeatInterface | None = None,
) -> None:
    """Extract documents from connector and queue them for indexing pipeline processing.

    This is the first part of the split indexing process that runs the connector
    and extracts documents, storing them in the filestore for later processing.
    """

    start_time = time.monotonic()

    logger.info(
        f"Document extraction starting: "
        f"attempt={index_attempt_id} "
        f"cc_pair={cc_pair_id} "
        f"search_settings={search_settings_id} "
        f"tenant={tenant_id}"
    )

    # Get batch storage (transition to IN_PROGRESS is handled by run_indexing_entrypoint)
    batch_storage = get_document_batch_storage(cc_pair_id, index_attempt_id)

    # Initialize memory tracer. NOTE: won't actually do anything if
    # `INDEXING_TRACER_INTERVAL` is 0.
    memory_tracer = MemoryTracer(interval=INDEXING_TRACER_INTERVAL)
    memory_tracer.start()

    index_attempt = None
    last_batch_num = 0  # used to continue from checkpointing
    # comes from _run_indexing
    with get_session_with_current_tenant() as db_session:
        index_attempt = get_index_attempt(
            db_session,
            index_attempt_id,
            eager_load_cc_pair=True,
            eager_load_search_settings=True,
        )
        if not index_attempt:
            raise RuntimeError(f"Index attempt {index_attempt_id} not found")

        if index_attempt.search_settings is None:
            raise ValueError("Search settings must be set for indexing")

        # Clear the indexing trigger if it was set, to prevent duplicate indexing attempts
        if index_attempt.connector_credential_pair.indexing_trigger is not None:
            logger.info(
                "Clearing indexing trigger: "
                f"cc_pair={index_attempt.connector_credential_pair.id} "
                f"trigger={index_attempt.connector_credential_pair.indexing_trigger}"
            )
            mark_ccpair_with_indexing_trigger(
                index_attempt.connector_credential_pair.id, None, db_session
            )

        db_connector = index_attempt.connector_credential_pair.connector
        db_credential = index_attempt.connector_credential_pair.credential
        processing_mode = index_attempt.connector_credential_pair.processing_mode
        is_primary = index_attempt.search_settings.status == IndexModelStatus.PRESENT
        is_connector_public = (
            index_attempt.connector_credential_pair.access_type == AccessType.PUBLIC
        )

        from_beginning = index_attempt.from_beginning
        has_successful_attempt = (
            index_attempt.connector_credential_pair.last_successful_index_time
            is not None
        )
        # Use higher priority for first-time indexing to ensure new connectors
        # get processed before re-indexing of existing connectors
        docprocessing_priority = (
            OnyxCeleryPriority.MEDIUM
            if has_successful_attempt
            else OnyxCeleryPriority.HIGH
        )

        earliest_index_time = (
            db_connector.indexing_start.timestamp()
            if db_connector.indexing_start
            else 0
        )
        should_fetch_permissions_during_indexing = (
            index_attempt.connector_credential_pair.access_type == AccessType.SYNC
            and source_should_fetch_permissions_during_indexing(db_connector.source)
            and is_primary
            # if we've already successfully indexed, let the doc_sync job
            # take care of doc-level permissions
            and (from_beginning or not has_successful_attempt)
        )

        # Set up time windows for polling
        last_successful_index_poll_range_end = (
            earliest_index_time
            if from_beginning
            else get_last_successful_attempt_poll_range_end(
                cc_pair_id=cc_pair_id,
                earliest_index=earliest_index_time,
                search_settings=index_attempt.search_settings,
                db_session=db_session,
            )
        )

        if last_successful_index_poll_range_end > POLL_CONNECTOR_OFFSET:
            window_start = datetime.fromtimestamp(
                last_successful_index_poll_range_end, tz=timezone.utc
            ) - timedelta(minutes=POLL_CONNECTOR_OFFSET)
        else:
            # don't go into "negative" time if we've never indexed before
            window_start = datetime.fromtimestamp(0, tz=timezone.utc)

        most_recent_attempt = next(
            iter(
                get_recent_completed_attempts_for_cc_pair(
                    cc_pair_id=cc_pair_id,
                    search_settings_id=index_attempt.search_settings_id,
                    db_session=db_session,
                    limit=1,
                )
            ),
            None,
        )

        # if the last attempt failed, try and use the same window. This is necessary
        # to ensure correctness with checkpointing. If we don't do this, things like
        # new slack channels could be missed (since existing slack channels are
        # cached as part of the checkpoint).
        if (
            most_recent_attempt
            and most_recent_attempt.poll_range_end
            and (
                most_recent_attempt.status == IndexingStatus.FAILED
                or most_recent_attempt.status == IndexingStatus.CANCELED
            )
        ):
            window_end = most_recent_attempt.poll_range_end
        else:
            window_end = datetime.now(tz=timezone.utc)

        # set time range in db
        index_attempt.poll_range_start = window_start
        index_attempt.poll_range_end = window_end
        db_session.commit()

        # TODO: maybe memory tracer here

        # Set up connector runner
        connector_runner = _get_connector_runner(
            db_session=db_session,
            attempt=index_attempt,
            batch_size=INDEX_BATCH_SIZE,
            start_time=window_start,
            end_time=window_end,
            include_permissions=should_fetch_permissions_during_indexing,
        )

        # don't use a checkpoint if we're explicitly indexing from
        # the beginning in order to avoid weird interactions between
        # checkpointing / failure handling
        # OR
        # if the last attempt was successful
        if index_attempt.from_beginning or (
            most_recent_attempt and most_recent_attempt.status.is_successful()
        ):
            logger.info(
                f"Cleaning up all old batches for index attempt {index_attempt_id} before starting new run"
            )
            batch_storage.cleanup_all_batches()
            checkpoint = connector_runner.connector.build_dummy_checkpoint()
        else:
            logger.info(
                f"Getting latest valid checkpoint for index attempt {index_attempt_id}"
            )
            checkpoint, resuming_from_checkpoint = get_latest_valid_checkpoint(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
                search_settings_id=index_attempt.search_settings_id,
                window_start=window_start,
                window_end=window_end,
                connector=connector_runner.connector,
            )

            # checkpoint resumption OR the connector already finished.
            if (
                isinstance(connector_runner.connector, CheckpointedConnector)
                and resuming_from_checkpoint
            ) or (
                most_recent_attempt
                and most_recent_attempt.total_batches is not None
                and not checkpoint.has_more
            ):
                reissued_batch_count, completed_batches = reissue_old_batches(
                    batch_storage,
                    index_attempt_id,
                    cc_pair_id,
                    tenant_id,
                    app,
                    most_recent_attempt,
                    docprocessing_priority,
                )
                last_batch_num = reissued_batch_count + completed_batches
                index_attempt.completed_batches = completed_batches
                db_session.commit()
            else:
                logger.info(
                    f"Cleaning up all batches for index attempt {index_attempt_id} before starting new run"
                )
                # for non-checkpointed connectors, throw out batches from previous unsuccessful attempts
                # because we'll be getting those documents again anyways.
                batch_storage.cleanup_all_batches()

        # Save initial checkpoint
        save_checkpoint(
            db_session=db_session,
            index_attempt_id=index_attempt_id,
            checkpoint=checkpoint,
        )

    try:
        batch_num = last_batch_num  # starts at 0 if no last batch
        total_doc_batches_queued = 0
        total_failures = 0
        document_count = 0

        # Ensure the SOURCE-type root hierarchy node exists before processing.
        # This is the root of the hierarchy tree for this source - all other
        # hierarchy nodes should ultimately have this as an ancestor.
        redis_client = get_redis_client(tenant_id=tenant_id)
        with get_session_with_current_tenant() as db_session:
            ensure_source_node_exists(redis_client, db_session, db_connector.source)

        # Main extraction loop
        while checkpoint.has_more:
            logger.info(
                f"Running '{db_connector.source.value}' connector with checkpoint: {checkpoint}"
            )
            for (
                document_batch,
                hierarchy_node_batch,
                failure,
                next_checkpoint,
            ) in connector_runner.run(checkpoint):
                # Check if connector is disabled mid run and stop if so unless it's the secondary
                # index being built. We want to populate it even for paused connectors
                # Often paused connectors are sources that aren't updated frequently but the
                # contents still need to be initially pulled.
                if callback and callback.should_stop():
                    raise ConnectorStopSignal("Connector stop signal detected")

                # will exception if the connector/index attempt is marked as paused/failed
                with get_session_with_current_tenant() as db_session_tmp:
                    _check_connector_and_attempt_status(
                        db_session_tmp,
                        cc_pair_id,
                        index_attempt.search_settings.status,
                        index_attempt_id,
                    )

                # save record of any failures at the connector level
                if failure is not None:
                    total_failures += 1
                    with get_session_with_current_tenant() as db_session:
                        create_index_attempt_error(
                            index_attempt_id,
                            cc_pair_id,
                            failure,
                            db_session,
                        )
                    _check_failure_threshold(
                        total_failures, document_count, batch_num, failure
                    )

                # Save checkpoint if provided
                if next_checkpoint:
                    checkpoint = next_checkpoint

                # Process hierarchy nodes batch - upsert to Postgres and cache in Redis
                if hierarchy_node_batch:
                    hierarchy_node_batch_cleaned = (
                        sanitize_hierarchy_nodes_for_postgres(hierarchy_node_batch)
                    )
                    with get_session_with_current_tenant() as db_session:
                        upserted_nodes = upsert_hierarchy_nodes_batch(
                            db_session=db_session,
                            nodes=hierarchy_node_batch_cleaned,
                            source=db_connector.source,
                            commit=True,
                            is_connector_public=is_connector_public,
                        )

                        upsert_hierarchy_node_cc_pair_entries(
                            db_session=db_session,
                            hierarchy_node_ids=[n.id for n in upserted_nodes],
                            connector_id=db_connector.id,
                            credential_id=db_credential.id,
                            commit=True,
                        )

                        # Cache in Redis for fast ancestor resolution during doc processing
                        redis_client = get_redis_client(tenant_id=tenant_id)
                        cache_entries = [
                            HierarchyNodeCacheEntry.from_db_model(node)
                            for node in upserted_nodes
                        ]
                        cache_hierarchy_nodes_batch(
                            redis_client=redis_client,
                            source=db_connector.source,
                            entries=cache_entries,
                        )

                    logger.debug(
                        f"Persisted and cached {len(hierarchy_node_batch_cleaned)} hierarchy nodes for attempt={index_attempt_id}"
                    )

                # below is all document processing task, so if no batch we can just continue
                if not document_batch:
                    continue

                # Clean documents and create batch
                doc_batch_cleaned = strip_null_characters(document_batch)

                # Resolve parent_hierarchy_raw_node_id to parent_hierarchy_node_id
                # using the Redis cache (just populated from hierarchy nodes batch)
                with get_session_with_current_tenant() as db_session_tmp:
                    source_node_id = get_source_node_id_from_cache(
                        redis_client, db_session_tmp, db_connector.source
                    )
                for doc in doc_batch_cleaned:
                    if doc.parent_hierarchy_raw_node_id is not None:
                        node_id, found = get_node_id_from_raw_id(
                            redis_client,
                            db_connector.source,
                            doc.parent_hierarchy_raw_node_id,
                        )
                        doc.parent_hierarchy_node_id = (
                            node_id if found else source_node_id
                        )
                    else:
                        doc.parent_hierarchy_node_id = source_node_id

                batch_description = []

                for doc in doc_batch_cleaned:
                    batch_description.append(doc.to_short_descriptor())

                    doc_size = 0
                    for section in doc.sections:
                        if (
                            isinstance(section, TextSection)
                            and section.text is not None
                        ):
                            doc_size += len(section.text)

                    if doc_size > INDEXING_SIZE_WARNING_THRESHOLD:
                        logger.warning(
                            f"Document size: doc='{doc.to_short_descriptor()}' "
                            f"size={doc_size} "
                            f"threshold={INDEXING_SIZE_WARNING_THRESHOLD}"
                        )

                logger.debug(f"Indexing batch of documents: {batch_description}")
                memory_tracer.increment_and_maybe_trace()

                if processing_mode == ProcessingMode.FILE_SYSTEM:
                    # File system only - write directly to persistent storage,
                    # skip chunking/embedding/Vespa but still track documents in DB

                    # IMPORTANT: Write to S3 FIRST, before marking as indexed in DB.

                    # Write documents to persistent file system
                    # Use creator_id for user-segregated storage paths (sandbox isolation)
                    creator_id = index_attempt.connector_credential_pair.creator_id
                    if creator_id is None:
                        raise ValueError(
                            f"ConnectorCredentialPair {index_attempt.connector_credential_pair.id} "
                            "must have a creator_id for persistent document storage"
                        )
                    user_id_str: str = str(creator_id)
                    writer = get_persistent_document_writer(
                        user_id=user_id_str,
                        tenant_id=tenant_id,
                    )
                    written_paths = writer.write_documents(doc_batch_cleaned)

                    # Only after successful S3 write, mark documents as indexed in DB
                    with get_session_with_current_tenant() as db_session:
                        # Create metadata for the batch
                        index_attempt_metadata = IndexAttemptMetadata(
                            attempt_id=index_attempt_id,
                            connector_id=db_connector.id,
                            credential_id=db_credential.id,
                            request_id=make_randomized_onyx_request_id("FSI"),
                            structured_id=f"{tenant_id}:{cc_pair_id}:{index_attempt_id}:{batch_num}",
                            batch_num=batch_num,
                        )

                        # Upsert documents to PostgreSQL (document table + cc_pair relationship)
                        # This is a subset of what docprocessing does - just DB tracking, no chunking/embedding
                        index_doc_batch_prepare(
                            documents=doc_batch_cleaned,
                            index_attempt_metadata=index_attempt_metadata,
                            db_session=db_session,
                            ignore_time_skip=True,  # Documents already filtered during extraction
                        )

                        # Mark documents as indexed for the CC pair
                        mark_document_as_indexed_for_cc_pair__no_commit(
                            connector_id=db_connector.id,
                            credential_id=db_credential.id,
                            document_ids=[doc.id for doc in doc_batch_cleaned],
                            db_session=db_session,
                        )
                        db_session.commit()

                    # Update coordination directly (no docprocessing task)
                    with get_session_with_current_tenant() as db_session:
                        IndexingCoordination.update_batch_completion_and_docs(
                            db_session=db_session,
                            index_attempt_id=index_attempt_id,
                            total_docs_indexed=len(doc_batch_cleaned),
                            new_docs_indexed=len(doc_batch_cleaned),
                            total_chunks=0,  # No chunks for file system mode
                        )

                    batch_num += 1
                    total_doc_batches_queued += 1

                    logger.info(
                        f"Wrote documents to file system: "
                        f"batch_num={batch_num} "
                        f"docs={len(written_paths)} "
                        f"attempt={index_attempt_id}"
                    )
                else:
                    # REGULAR mode (default): Full pipeline - store and queue docprocessing
                    batch_storage.store_batch(batch_num, doc_batch_cleaned)

                    # Create processing task data
                    processing_batch_data = {
                        "index_attempt_id": index_attempt_id,
                        "cc_pair_id": cc_pair_id,
                        "tenant_id": tenant_id,
                        "batch_num": batch_num,  # 0-indexed
                    }

                    # Queue document processing task
                    app.send_task(
                        OnyxCeleryTask.DOCPROCESSING_TASK,
                        kwargs=processing_batch_data,
                        queue=OnyxCeleryQueues.DOCPROCESSING,
                        priority=docprocessing_priority,
                    )

                    batch_num += 1
                    total_doc_batches_queued += 1

                    logger.info(
                        f"Queued document processing batch: "
                        f"batch_num={batch_num} "
                        f"docs={len(doc_batch_cleaned)} "
                        f"attempt={index_attempt_id}"
                    )

            # Check checkpoint size periodically
            CHECKPOINT_SIZE_CHECK_INTERVAL = 100
            if batch_num % CHECKPOINT_SIZE_CHECK_INTERVAL == 0:
                check_checkpoint_size(checkpoint)

            # Save latest checkpoint
            # NOTE: checkpointing is used to track which batches have
            # been sent to the filestore, NOT which batches have been fully indexed
            # as it used to be.
            with get_session_with_current_tenant() as db_session:
                save_checkpoint(
                    db_session=db_session,
                    index_attempt_id=index_attempt_id,
                    checkpoint=checkpoint,
                )

        elapsed_time = time.monotonic() - start_time

        logger.info(
            f"Document extraction completed: "
            f"attempt={index_attempt_id} "
            f"batches_queued={total_doc_batches_queued} "
            f"elapsed={elapsed_time:.2f}s"
        )

        # Set total batches in database to signal extraction completion.
        # Used by check_for_indexing to determine if the index attempt is complete.
        with get_session_with_current_tenant() as db_session:
            IndexingCoordination.set_total_batches(
                db_session=db_session,
                index_attempt_id=index_attempt_id,
                total_batches=batch_num,
            )

        # Trigger file sync to user's sandbox (if running) - only for FILE_SYSTEM mode
        # This syncs the newly written documents from S3 to any running sandbox pod
        if processing_mode == ProcessingMode.FILE_SYSTEM:
            creator_id = index_attempt.connector_credential_pair.creator_id
            if creator_id:
                source_value = db_connector.source.value
                app.send_task(
                    OnyxCeleryTask.SANDBOX_FILE_SYNC,
                    kwargs={
                        "user_id": str(creator_id),
                        "tenant_id": tenant_id,
                        "source": source_value,
                    },
                    queue=OnyxCeleryQueues.SANDBOX,
                )
                logger.info(
                    f"Triggered sandbox file sync for user {creator_id} source={source_value} after indexing complete"
                )

    except Exception as e:
        logger.exception(
            f"Document extraction failed: attempt={index_attempt_id} error={str(e)}"
        )

        # Do NOT clean up batches on failure; future runs will use those batches
        # while docfetching will continue from the saved checkpoint if one exists

        if isinstance(e, ConnectorValidationError):
            # On validation errors during indexing, we want to cancel the indexing attempt
            # and mark the CCPair as invalid. This prevents the connector from being
            # used in the future until the credentials are updated.
            with get_session_with_current_tenant() as db_session_temp:
                logger.exception(
                    f"Marking attempt {index_attempt_id} as canceled due to validation error."
                )
                mark_attempt_canceled(
                    index_attempt_id,
                    db_session_temp,
                    reason=f"{CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX}{str(e)}",
                )

                if is_primary:
                    if not index_attempt:
                        # should always be set by now
                        raise RuntimeError("Should never happen.")

                    VALIDATION_ERROR_THRESHOLD = 5

                    recent_index_attempts = get_recent_completed_attempts_for_cc_pair(
                        cc_pair_id=cc_pair_id,
                        search_settings_id=index_attempt.search_settings_id,
                        limit=VALIDATION_ERROR_THRESHOLD,
                        db_session=db_session_temp,
                    )
                    num_validation_errors = len(
                        [
                            index_attempt
                            for index_attempt in recent_index_attempts
                            if index_attempt.error_msg
                            and index_attempt.error_msg.startswith(
                                CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX
                            )
                        ]
                    )

                    if num_validation_errors >= VALIDATION_ERROR_THRESHOLD:
                        logger.warning(
                            f"Connector {db_connector.id} has {num_validation_errors} consecutive validation"
                            f" errors. Marking the CC Pair as invalid."
                        )
                        update_connector_credential_pair(
                            db_session=db_session_temp,
                            connector_id=db_connector.id,
                            credential_id=db_credential.id,
                            status=ConnectorCredentialPairStatus.INVALID,
                        )
            raise e
        elif isinstance(e, ConnectorStopSignal):
            with get_session_with_current_tenant() as db_session_temp:
                logger.exception(
                    f"Marking attempt {index_attempt_id} as canceled due to stop signal."
                )
                mark_attempt_canceled(
                    index_attempt_id,
                    db_session_temp,
                    reason=str(e),
                )

        else:
            with get_session_with_current_tenant() as db_session_temp:
                # don't overwrite attempts that are already failed/canceled for another reason
                index_attempt = get_index_attempt(db_session_temp, index_attempt_id)
                if index_attempt and index_attempt.status in [
                    IndexingStatus.CANCELED,
                    IndexingStatus.FAILED,
                ]:
                    logger.info(
                        f"Attempt {index_attempt_id} is already failed/canceled, skipping marking as failed."
                    )
                    raise e

                mark_attempt_failed(
                    index_attempt_id,
                    db_session_temp,
                    failure_reason=str(e),
                    full_exception_trace=traceback.format_exc(),
                )

            raise e

    finally:
        memory_tracer.stop()


def reissue_old_batches(
    batch_storage: DocumentBatchStorage,
    index_attempt_id: int,
    cc_pair_id: int,
    tenant_id: str,
    app: Celery,
    most_recent_attempt: IndexAttempt | None,
    priority: OnyxCeleryPriority,
) -> tuple[int, int]:
    # When loading from a checkpoint, we need to start new docprocessing tasks
    # tied to the new index attempt for any batches left over in the file store
    old_batches = batch_storage.get_all_batches_for_cc_pair()
    batch_storage.update_old_batches_to_new_index_attempt(old_batches)
    for batch_id in old_batches:
        logger.info(
            f"Re-issuing docprocessing task for batch {batch_id} for index attempt {index_attempt_id}"
        )
        path_info = batch_storage.extract_path_info(batch_id)
        if path_info is None:
            logger.warning(
                f"Could not extract path info from batch {batch_id}, skipping"
            )
            continue
        if path_info.cc_pair_id != cc_pair_id:
            raise RuntimeError(f"Batch {batch_id} is not for cc pair {cc_pair_id}")

        app.send_task(
            OnyxCeleryTask.DOCPROCESSING_TASK,
            kwargs={
                "index_attempt_id": index_attempt_id,
                "cc_pair_id": cc_pair_id,
                "tenant_id": tenant_id,
                "batch_num": path_info.batch_num,  # use same batch num as previously
            },
            queue=OnyxCeleryQueues.DOCPROCESSING,
            priority=priority,
        )
    recent_batches = most_recent_attempt.completed_batches if most_recent_attempt else 0
    # resume from the batch num of the last attempt. This should be one more
    # than the last batch created by docfetching regardless of whether the batch
    # is still in the filestore waiting for processing or not.
    last_batch_num = len(old_batches) + recent_batches
    logger.info(
        f"Starting from batch {last_batch_num} due to re-issued batches: {old_batches}, completed batches: {recent_batches}"
    )
    return len(old_batches), recent_batches


================================================
FILE: backend/onyx/background/periodic_poller.py
================================================
"""Periodic poller for NO_VECTOR_DB deployments.

Replaces Celery Beat and background workers with a lightweight daemon thread
that runs from the API server process.  Two responsibilities:

1. Recovery polling (every 30 s): re-processes user files stuck in
   PROCESSING / DELETING / needs_sync states via the drain loops defined
   in ``task_utils.py``.

2. Periodic task execution (configurable intervals): runs LLM model updates
   and scheduled evals at their configured cadences, with Postgres advisory
   lock deduplication across multiple API server instances.
"""

import threading
import time
from collections.abc import Callable
from dataclasses import dataclass
from dataclasses import field

from onyx.utils.logger import setup_logger

logger = setup_logger()

RECOVERY_INTERVAL_SECONDS = 30
PERIODIC_TASK_LOCK_BASE = 20_000
PERIODIC_TASK_KV_PREFIX = "periodic_poller:last_claimed:"


# ------------------------------------------------------------------
# Periodic task definitions
# ------------------------------------------------------------------


_NEVER_RAN: float = -1e18


@dataclass
class _PeriodicTaskDef:
    name: str
    interval_seconds: float
    lock_id: int
    run_fn: Callable[[], None]
    last_run_at: float = field(default=_NEVER_RAN)


def _run_auto_llm_update() -> None:
    from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL

    if not AUTO_LLM_CONFIG_URL:
        return

    from onyx.db.engine.sql_engine import get_session_with_current_tenant
    from onyx.llm.well_known_providers.auto_update_service import (
        sync_llm_models_from_github,
    )

    with get_session_with_current_tenant() as db_session:
        sync_llm_models_from_github(db_session)


def _run_cache_cleanup() -> None:
    from onyx.cache.postgres_backend import cleanup_expired_cache_entries

    cleanup_expired_cache_entries()


def _run_scheduled_eval() -> None:
    from onyx.configs.app_configs import BRAINTRUST_API_KEY
    from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
    from onyx.configs.app_configs import SCHEDULED_EVAL_PERMISSIONS_EMAIL
    from onyx.configs.app_configs import SCHEDULED_EVAL_PROJECT

    if not all(
        [
            BRAINTRUST_API_KEY,
            SCHEDULED_EVAL_PROJECT,
            SCHEDULED_EVAL_DATASET_NAMES,
            SCHEDULED_EVAL_PERMISSIONS_EMAIL,
        ]
    ):
        return

    from datetime import datetime
    from datetime import timezone

    from onyx.evals.eval import run_eval
    from onyx.evals.models import EvalConfigurationOptions

    run_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d")
    for dataset_name in SCHEDULED_EVAL_DATASET_NAMES:
        try:
            run_eval(
                configuration=EvalConfigurationOptions(
                    search_permissions_email=SCHEDULED_EVAL_PERMISSIONS_EMAIL,
                    dataset_name=dataset_name,
                    no_send_logs=False,
                    braintrust_project=SCHEDULED_EVAL_PROJECT,
                    experiment_name=f"{dataset_name} - {run_timestamp}",
                ),
                remote_dataset_name=dataset_name,
            )
        except Exception:
            logger.exception(
                f"Periodic poller - Failed scheduled eval for dataset {dataset_name}"
            )


_CACHE_CLEANUP_INTERVAL_SECONDS = 300


def _build_periodic_tasks() -> list[_PeriodicTaskDef]:
    from onyx.cache.interface import CacheBackendType
    from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
    from onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS
    from onyx.configs.app_configs import CACHE_BACKEND
    from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES

    tasks: list[_PeriodicTaskDef] = []
    if CACHE_BACKEND == CacheBackendType.POSTGRES:
        tasks.append(
            _PeriodicTaskDef(
                name="cache-cleanup",
                interval_seconds=_CACHE_CLEANUP_INTERVAL_SECONDS,
                lock_id=PERIODIC_TASK_LOCK_BASE + 2,
                run_fn=_run_cache_cleanup,
            )
        )
    if AUTO_LLM_CONFIG_URL:
        tasks.append(
            _PeriodicTaskDef(
                name="auto-llm-update",
                interval_seconds=AUTO_LLM_UPDATE_INTERVAL_SECONDS,
                lock_id=PERIODIC_TASK_LOCK_BASE,
                run_fn=_run_auto_llm_update,
            )
        )
    if SCHEDULED_EVAL_DATASET_NAMES:
        tasks.append(
            _PeriodicTaskDef(
                name="scheduled-eval",
                interval_seconds=7 * 24 * 3600,
                lock_id=PERIODIC_TASK_LOCK_BASE + 1,
                run_fn=_run_scheduled_eval,
            )
        )
    return tasks


# ------------------------------------------------------------------
# Periodic task runner with advisory-lock-guarded claim
# ------------------------------------------------------------------


def _try_claim_task(task_def: _PeriodicTaskDef) -> bool:
    """Atomically check whether *task_def* should run and record a claim.

    Uses a transaction-scoped advisory lock for atomicity combined with a
    ``KVStore`` timestamp for cross-instance dedup.  The DB session is held
    only for this brief claim transaction, not during task execution.
    """
    from datetime import datetime
    from datetime import timezone

    from sqlalchemy import text

    from onyx.db.engine.sql_engine import get_session_with_current_tenant
    from onyx.db.models import KVStore

    kv_key = PERIODIC_TASK_KV_PREFIX + task_def.name

    with get_session_with_current_tenant() as db_session:
        acquired = db_session.execute(
            text("SELECT pg_try_advisory_xact_lock(:id)"),
            {"id": task_def.lock_id},
        ).scalar()
        if not acquired:
            return False

        row = db_session.query(KVStore).filter_by(key=kv_key).first()
        if row and row.value is not None:
            last_claimed = datetime.fromisoformat(str(row.value))
            elapsed = (datetime.now(timezone.utc) - last_claimed).total_seconds()
            if elapsed < task_def.interval_seconds:
                return False

        now_ts = datetime.now(timezone.utc).isoformat()
        if row:
            row.value = now_ts
        else:
            db_session.add(KVStore(key=kv_key, value=now_ts))
        db_session.commit()

    return True


def _try_run_periodic_task(task_def: _PeriodicTaskDef) -> None:
    """Run *task_def* if its interval has elapsed and no peer holds the lock."""
    now = time.monotonic()
    if now - task_def.last_run_at < task_def.interval_seconds:
        return

    if not _try_claim_task(task_def):
        return

    try:
        task_def.run_fn()
        task_def.last_run_at = now
    except Exception:
        logger.exception(
            f"Periodic poller - Error running periodic task {task_def.name}"
        )


# ------------------------------------------------------------------
# Recovery / drain loop runner
# ------------------------------------------------------------------


def _run_drain_loops(tenant_id: str) -> None:
    from onyx.background.task_utils import drain_delete_loop
    from onyx.background.task_utils import drain_processing_loop
    from onyx.background.task_utils import drain_project_sync_loop

    drain_processing_loop(tenant_id)
    drain_delete_loop(tenant_id)
    drain_project_sync_loop(tenant_id)


# ------------------------------------------------------------------
# Startup recovery (10g)
# ------------------------------------------------------------------


def recover_stuck_user_files(tenant_id: str) -> None:
    """Run all drain loops once to re-process files left in intermediate states.

    Called from ``lifespan()`` on startup when ``DISABLE_VECTOR_DB`` is set.
    """
    logger.info("recover_stuck_user_files - Checking for stuck user files")
    try:
        _run_drain_loops(tenant_id)
    except Exception:
        logger.exception("recover_stuck_user_files - Error during recovery")


# ------------------------------------------------------------------
# Daemon thread (10f)
# ------------------------------------------------------------------

_shutdown_event = threading.Event()
_poller_thread: threading.Thread | None = None


def _poller_loop(tenant_id: str) -> None:
    from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

    CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

    periodic_tasks = _build_periodic_tasks()
    logger.info(
        f"Periodic poller started with {len(periodic_tasks)} periodic task(s): {[t.name for t in periodic_tasks]}"
    )

    while not _shutdown_event.is_set():
        try:
            _run_drain_loops(tenant_id)
        except Exception:
            logger.exception("Periodic poller - Error in recovery polling")

        for task_def in periodic_tasks:
            try:
                _try_run_periodic_task(task_def)
            except Exception:
                logger.exception(
                    f"Periodic poller - Unhandled error checking task {task_def.name}"
                )

        _shutdown_event.wait(RECOVERY_INTERVAL_SECONDS)


def start_periodic_poller(tenant_id: str) -> None:
    """Start the periodic poller daemon thread."""
    global _poller_thread  # noqa: PLW0603
    _shutdown_event.clear()
    _poller_thread = threading.Thread(
        target=_poller_loop,
        args=(tenant_id,),
        daemon=True,
        name="no-vectordb-periodic-poller",
    )
    _poller_thread.start()
    logger.info("Periodic poller thread started")


def stop_periodic_poller() -> None:
    """Signal the periodic poller to stop and wait for it to exit."""
    global _poller_thread  # noqa: PLW0603
    if _poller_thread is None:
        return
    _shutdown_event.set()
    _poller_thread.join(timeout=10)
    if _poller_thread.is_alive():
        logger.warning("Periodic poller thread did not stop within timeout")
    _poller_thread = None
    logger.info("Periodic poller thread stopped")


================================================
FILE: backend/onyx/background/task_utils.py
================================================
"""Background task utilities.

Contains query-history report helpers (used by all deployment modes) and
in-process background task execution helpers for NO_VECTOR_DB mode:

- Atomic claim-and-mark helpers that prevent duplicate processing
- Drain loops that process all pending user file work

Each claim function runs a short-lived transaction: SELECT ... FOR UPDATE
SKIP LOCKED, UPDATE the row to remove it from future queries, COMMIT.
After the commit the row lock is released, but the row is no longer
eligible for re-claiming.  No long-lived sessions or advisory locks.
"""

from uuid import UUID

import sqlalchemy as sa
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.utils.logger import setup_logger

logger = setup_logger()

# ------------------------------------------------------------------
# Query-history report helpers (pre-existing, used by all modes)
# ------------------------------------------------------------------

QUERY_REPORT_NAME_PREFIX = "query-history"


def construct_query_history_report_name(
    task_id: str,
) -> str:
    return f"{QUERY_REPORT_NAME_PREFIX}-{task_id}.csv"


def extract_task_id_from_query_history_report_name(name: str) -> str:
    return name.removeprefix(f"{QUERY_REPORT_NAME_PREFIX}-").removesuffix(".csv")


# ------------------------------------------------------------------
# Atomic claim-and-mark helpers
# ------------------------------------------------------------------
# Each function runs inside a single short-lived session/transaction:
#   1. SELECT ... FOR UPDATE SKIP LOCKED  (locks one eligible row)
#   2. UPDATE the row so it is no longer eligible
#   3. COMMIT  (releases the row lock)
# After the commit, no other drain loop can claim the same row.


def _claim_next_processing_file(db_session: Session) -> UUID | None:
    """Claim the next PROCESSING file by transitioning it to INDEXING.

    Returns the file id, or None when no eligible files remain.
    """
    file_id = db_session.execute(
        select(UserFile.id)
        .where(UserFile.status == UserFileStatus.PROCESSING)
        .order_by(UserFile.created_at)
        .limit(1)
        .with_for_update(skip_locked=True)
    ).scalar_one_or_none()
    if file_id is None:
        return None

    db_session.execute(
        sa.update(UserFile)
        .where(UserFile.id == file_id)
        .values(status=UserFileStatus.INDEXING)
    )
    db_session.commit()
    return file_id


def _claim_next_deleting_file(
    db_session: Session,
    exclude_ids: set[UUID] | None = None,
) -> UUID | None:
    """Claim the next DELETING file.

    No status transition needed — the impl deletes the row on success.
    The short-lived FOR UPDATE lock prevents concurrent claims.
    *exclude_ids* prevents re-processing the same file if the impl fails.
    """
    stmt = (
        select(UserFile.id)
        .where(UserFile.status == UserFileStatus.DELETING)
        .order_by(UserFile.created_at)
        .limit(1)
        .with_for_update(skip_locked=True)
    )
    if exclude_ids:
        stmt = stmt.where(UserFile.id.notin_(exclude_ids))
    file_id = db_session.execute(stmt).scalar_one_or_none()
    db_session.commit()
    return file_id


def _claim_next_sync_file(
    db_session: Session,
    exclude_ids: set[UUID] | None = None,
) -> UUID | None:
    """Claim the next file needing project/persona sync.

    No status transition needed — the impl clears the sync flags on
    success.  The short-lived FOR UPDATE lock prevents concurrent claims.
    *exclude_ids* prevents re-processing the same file if the impl fails.
    """
    stmt = (
        select(UserFile.id)
        .where(
            sa.and_(
                sa.or_(
                    UserFile.needs_project_sync.is_(True),
                    UserFile.needs_persona_sync.is_(True),
                ),
                UserFile.status == UserFileStatus.COMPLETED,
            )
        )
        .order_by(UserFile.created_at)
        .limit(1)
        .with_for_update(skip_locked=True)
    )
    if exclude_ids:
        stmt = stmt.where(UserFile.id.notin_(exclude_ids))
    file_id = db_session.execute(stmt).scalar_one_or_none()
    db_session.commit()
    return file_id


# ------------------------------------------------------------------
# Drain loops — process *all* pending work of each type
# ------------------------------------------------------------------


def drain_processing_loop(tenant_id: str) -> None:
    """Process all pending PROCESSING user files."""
    from onyx.background.celery.tasks.user_file_processing.tasks import (
        process_user_file_impl,
    )
    from onyx.db.engine.sql_engine import get_session_with_current_tenant

    while True:
        with get_session_with_current_tenant() as session:
            file_id = _claim_next_processing_file(session)
        if file_id is None:
            break
        try:
            process_user_file_impl(
                user_file_id=str(file_id),
                tenant_id=tenant_id,
                redis_locking=False,
            )
        except Exception:
            logger.exception(f"Failed to process user file {file_id}")


def drain_delete_loop(tenant_id: str) -> None:
    """Delete all pending DELETING user files."""
    from onyx.background.celery.tasks.user_file_processing.tasks import (
        delete_user_file_impl,
    )
    from onyx.db.engine.sql_engine import get_session_with_current_tenant

    failed: set[UUID] = set()
    while True:
        with get_session_with_current_tenant() as session:
            file_id = _claim_next_deleting_file(session, exclude_ids=failed)
        if file_id is None:
            break
        try:
            delete_user_file_impl(
                user_file_id=str(file_id),
                tenant_id=tenant_id,
                redis_locking=False,
            )
        except Exception:
            logger.exception(f"Failed to delete user file {file_id}")
            failed.add(file_id)


def drain_project_sync_loop(tenant_id: str) -> None:
    """Sync all pending project/persona metadata for user files."""
    from onyx.background.celery.tasks.user_file_processing.tasks import (
        project_sync_user_file_impl,
    )
    from onyx.db.engine.sql_engine import get_session_with_current_tenant

    failed: set[UUID] = set()
    while True:
        with get_session_with_current_tenant() as session:
            file_id = _claim_next_sync_file(session, exclude_ids=failed)
        if file_id is None:
            break
        try:
            project_sync_user_file_impl(
                user_file_id=str(file_id),
                tenant_id=tenant_id,
                redis_locking=False,
            )
        except Exception:
            logger.exception(f"Failed to sync user file {file_id}")
            failed.add(file_id)


================================================
FILE: backend/onyx/cache/factory.py
================================================
from collections.abc import Callable

from onyx.cache.interface import CacheBackend
from onyx.cache.interface import CacheBackendType
from onyx.configs.app_configs import CACHE_BACKEND


def _build_redis_backend(tenant_id: str) -> CacheBackend:
    from onyx.cache.redis_backend import RedisCacheBackend
    from onyx.redis.redis_pool import redis_pool

    return RedisCacheBackend(redis_pool.get_client(tenant_id))


def _build_postgres_backend(tenant_id: str) -> CacheBackend:
    from onyx.cache.postgres_backend import PostgresCacheBackend

    return PostgresCacheBackend(tenant_id)


_BACKEND_BUILDERS: dict[CacheBackendType, Callable[[str], CacheBackend]] = {
    CacheBackendType.REDIS: _build_redis_backend,
    CacheBackendType.POSTGRES: _build_postgres_backend,
}


def get_cache_backend(*, tenant_id: str | None = None) -> CacheBackend:
    """Return a tenant-aware ``CacheBackend``.

    If *tenant_id* is ``None``, the current tenant is read from the
    thread-local context variable (same behaviour as ``get_redis_client``).
    """
    if tenant_id is None:
        from shared_configs.contextvars import get_current_tenant_id

        tenant_id = get_current_tenant_id()

    builder = _BACKEND_BUILDERS.get(CACHE_BACKEND)
    if builder is None:
        raise ValueError(
            f"Unsupported CACHE_BACKEND={CACHE_BACKEND!r}. Supported values: {[t.value for t in CacheBackendType]}"
        )
    return builder(tenant_id)


def get_shared_cache_backend() -> CacheBackend:
    """Return a ``CacheBackend`` in the shared (cross-tenant) namespace."""
    from shared_configs.configs import DEFAULT_REDIS_PREFIX

    return get_cache_backend(tenant_id=DEFAULT_REDIS_PREFIX)


================================================
FILE: backend/onyx/cache/interface.py
================================================
import abc
from enum import Enum

from redis.exceptions import RedisError
from sqlalchemy.exc import SQLAlchemyError

TTL_KEY_NOT_FOUND = -2
TTL_NO_EXPIRY = -1

CACHE_TRANSIENT_ERRORS: tuple[type[Exception], ...] = (RedisError, SQLAlchemyError)
"""Exception types that represent transient cache connectivity / operational
failures.  Callers that want to fail-open (or fail-closed) on cache errors
should catch this tuple instead of bare ``Exception``.

When adding a new ``CacheBackend`` implementation, add its transient error
base class(es) here so all call-sites pick it up automatically."""


class CacheBackendType(str, Enum):
    REDIS = "redis"
    POSTGRES = "postgres"


class CacheLock(abc.ABC):
    """Abstract distributed lock returned by CacheBackend.lock()."""

    @abc.abstractmethod
    def acquire(
        self,
        blocking: bool = True,
        blocking_timeout: float | None = None,
    ) -> bool:
        raise NotImplementedError

    @abc.abstractmethod
    def release(self) -> None:
        raise NotImplementedError

    @abc.abstractmethod
    def owned(self) -> bool:
        raise NotImplementedError

    def __enter__(self) -> "CacheLock":
        if not self.acquire():
            raise RuntimeError("Failed to acquire lock")
        return self

    def __exit__(self, *args: object) -> None:
        self.release()


class CacheBackend(abc.ABC):
    """Thin abstraction over a key-value cache with TTL, locks, and blocking lists.

    Covers the subset of Redis operations used outside of Celery. When
    CACHE_BACKEND=postgres, a PostgreSQL-backed implementation is used instead.
    """

    # -- basic key/value ---------------------------------------------------

    @abc.abstractmethod
    def get(self, key: str) -> bytes | None:
        raise NotImplementedError

    @abc.abstractmethod
    def set(
        self,
        key: str,
        value: str | bytes | int | float,
        ex: int | None = None,
    ) -> None:
        raise NotImplementedError

    @abc.abstractmethod
    def delete(self, key: str) -> None:
        raise NotImplementedError

    @abc.abstractmethod
    def exists(self, key: str) -> bool:
        raise NotImplementedError

    # -- TTL ---------------------------------------------------------------

    @abc.abstractmethod
    def expire(self, key: str, seconds: int) -> None:
        raise NotImplementedError

    @abc.abstractmethod
    def ttl(self, key: str) -> int:
        """Return remaining TTL in seconds.

        Returns ``TTL_NO_EXPIRY`` (-1) if key exists without expiry,
        ``TTL_KEY_NOT_FOUND`` (-2) if key is missing or expired.
        """
        raise NotImplementedError

    # -- distributed lock --------------------------------------------------

    @abc.abstractmethod
    def lock(self, name: str, timeout: float | None = None) -> CacheLock:
        raise NotImplementedError

    # -- blocking list (used by MCP OAuth BLPOP pattern) -------------------

    @abc.abstractmethod
    def rpush(self, key: str, value: str | bytes) -> None:
        raise NotImplementedError

    @abc.abstractmethod
    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:
        """Block until a value is available on one of *keys*, or *timeout* expires.

        Returns ``(key, value)`` or ``None`` on timeout.
        """
        raise NotImplementedError


================================================
FILE: backend/onyx/cache/postgres_backend.py
================================================
"""PostgreSQL-backed ``CacheBackend`` for NO_VECTOR_DB deployments.

Uses the ``cache_store`` table for key-value storage, PostgreSQL advisory locks
for distributed locking, and a polling loop for the BLPOP pattern.
"""

import hashlib
import struct
import time
import uuid
from contextlib import AbstractContextManager
from datetime import datetime
from datetime import timedelta
from datetime import timezone

from sqlalchemy import delete
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.orm import Session

from onyx.cache.interface import CacheBackend
from onyx.cache.interface import CacheLock
from onyx.cache.interface import TTL_KEY_NOT_FOUND
from onyx.cache.interface import TTL_NO_EXPIRY
from onyx.db.models import CacheStore

_LIST_KEY_PREFIX = "_q:"
# ASCII: ':' (0x3A) < ';' (0x3B). Upper bound for range queries so [prefix+, prefix;)
# captures all list-item keys (e.g. _q:mylist:123:uuid) without including other
# lists whose names share a prefix (e.g. _q:mylist2:...).
_LIST_KEY_RANGE_TERMINATOR = ";"
_LIST_ITEM_TTL_SECONDS = 3600
_LOCK_POLL_INTERVAL = 0.1
_BLPOP_POLL_INTERVAL = 0.25


def _list_item_key(key: str) -> str:
    """Unique key for a list item. Timestamp for FIFO ordering; UUID prevents
    collision when concurrent rpush calls occur within the same nanosecond.
    """
    return f"{_LIST_KEY_PREFIX}{key}:{time.time_ns()}:{uuid.uuid4().hex}"


def _to_bytes(value: str | bytes | int | float) -> bytes:
    if isinstance(value, bytes):
        return value
    return str(value).encode()


# ------------------------------------------------------------------
# Lock
# ------------------------------------------------------------------


class PostgresCacheLock(CacheLock):
    """Advisory-lock-based distributed lock.

    Uses ``get_session_with_tenant`` for connection lifecycle.  The lock is tied
    to the session's connection; releasing or closing the session frees it.

    NOTE: Unlike Redis locks, advisory locks do not auto-expire after
    ``timeout`` seconds.  They are released when ``release()`` is
    called or when the session is closed.
    """

    def __init__(self, lock_id: int, timeout: float | None, tenant_id: str) -> None:
        self._lock_id = lock_id
        self._timeout = timeout
        self._tenant_id = tenant_id
        self._session_cm: AbstractContextManager[Session] | None = None
        self._session: Session | None = None
        self._acquired = False

    def acquire(
        self,
        blocking: bool = True,
        blocking_timeout: float | None = None,
    ) -> bool:
        from onyx.db.engine.sql_engine import get_session_with_tenant

        self._session_cm = get_session_with_tenant(tenant_id=self._tenant_id)
        self._session = self._session_cm.__enter__()
        try:
            if not blocking:
                return self._try_lock()

            effective_timeout = blocking_timeout or self._timeout
            deadline = (
                (time.monotonic() + effective_timeout) if effective_timeout else None
            )
            while True:
                if self._try_lock():
                    return True
                if deadline is not None and time.monotonic() >= deadline:
                    return False
                time.sleep(_LOCK_POLL_INTERVAL)
        finally:
            if not self._acquired:
                self._close_session()

    def release(self) -> None:
        if not self._acquired or self._session is None:
            return
        try:
            self._session.execute(select(func.pg_advisory_unlock(self._lock_id)))
        finally:
            self._acquired = False
            self._close_session()

    def owned(self) -> bool:
        return self._acquired

    def _close_session(self) -> None:
        if self._session_cm is not None:
            try:
                self._session_cm.__exit__(None, None, None)
            finally:
                self._session_cm = None
                self._session = None

    def _try_lock(self) -> bool:
        assert self._session is not None
        result = self._session.execute(
            select(func.pg_try_advisory_lock(self._lock_id))
        ).scalar()
        if result:
            self._acquired = True
            return True
        return False


# ------------------------------------------------------------------
# Backend
# ------------------------------------------------------------------


class PostgresCacheBackend(CacheBackend):
    """``CacheBackend`` backed by the ``cache_store`` table in PostgreSQL.

    Each operation opens and closes its own database session so the backend
    is safe to share across threads.  Tenant isolation is handled by
    SQLAlchemy's ``schema_translate_map`` (set by ``get_session_with_tenant``).
    """

    def __init__(self, tenant_id: str) -> None:
        self._tenant_id = tenant_id

    # -- basic key/value ---------------------------------------------------

    def get(self, key: str) -> bytes | None:
        from onyx.db.engine.sql_engine import get_session_with_tenant

        stmt = select(CacheStore.value).where(
            CacheStore.key == key,
            or_(CacheStore.expires_at.is_(None), CacheStore.expires_at > func.now()),
        )
        with get_session_with_tenant(tenant_id=self._tenant_id) as session:
            value = session.execute(stmt).scalar_one_or_none()
        if value is None:
            return None
        return bytes(value)

    def set(
        self,
        key: str,
        value: str | bytes | int | float,
        ex: int | None = None,
    ) -> None:
        from onyx.db.engine.sql_engine import get_session_with_tenant

        value_bytes = _to_bytes(value)
        expires_at = (
            datetime.now(timezone.utc) + timedelta(seconds=ex)
            if ex is not None
            else None
        )
        stmt = (
            pg_insert(CacheStore)
            .values(key=key, value=value_bytes, expires_at=expires_at)
            .on_conflict_do_update(
                index_elements=[CacheStore.key],
                set_={"value": value_bytes, "expires_at": expires_at},
            )
        )
        with get_session_with_tenant(tenant_id=self._tenant_id) as session:
            session.execute(stmt)
            session.commit()

    def delete(self, key: str) -> None:
        from onyx.db.engine.sql_engine import get_session_with_tenant

        with get_session_with_tenant(tenant_id=self._tenant_id) as session:
            session.execute(delete(CacheStore).where(CacheStore.key == key))
            session.commit()

    def exists(self, key: str) -> bool:
        from onyx.db.engine.sql_engine import get_session_with_tenant

        stmt = (
            select(CacheStore.key)
            .where(
                CacheStore.key == key,
                or_(
                    CacheStore.expires_at.is_(None),
                    CacheStore.expires_at > func.now(),
                ),
            )
            .limit(1)
        )
        with get_session_with_tenant(tenant_id=self._tenant_id) as session:
            return session.execute(stmt).first() is not None

    # -- TTL ---------------------------------------------------------------

    def expire(self, key: str, seconds: int) -> None:
        from onyx.db.engine.sql_engine import get_session_with_tenant

        new_exp = datetime.now(timezone.utc) + timedelta(seconds=seconds)
        stmt = (
            update(CacheStore).where(CacheStore.key == key).values(expires_at=new_exp)
        )
        with get_session_with_tenant(tenant_id=self._tenant_id) as session:
            session.execute(stmt)
            session.commit()

    def ttl(self, key: str) -> int:
        from onyx.db.engine.sql_engine import get_session_with_tenant

        stmt = select(CacheStore.expires_at).where(CacheStore.key == key)
        with get_session_with_tenant(tenant_id=self._tenant_id) as session:
            result = session.execute(stmt).first()
        if result is None:
            return TTL_KEY_NOT_FOUND
        expires_at: datetime | None = result[0]
        if expires_at is None:
            return TTL_NO_EXPIRY
        remaining = (expires_at - datetime.now(timezone.utc)).total_seconds()
        if remaining <= 0:
            return TTL_KEY_NOT_FOUND
        return int(remaining)

    # -- distributed lock --------------------------------------------------

    def lock(self, name: str, timeout: float | None = None) -> CacheLock:
        return PostgresCacheLock(
            self._lock_id_for(name), timeout, tenant_id=self._tenant_id
        )

    # -- blocking list (MCP OAuth BLPOP pattern) ---------------------------

    def rpush(self, key: str, value: str | bytes) -> None:
        self.set(_list_item_key(key), value, ex=_LIST_ITEM_TTL_SECONDS)

    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:
        if timeout <= 0:
            raise ValueError(
                "PostgresCacheBackend.blpop requires timeout > 0. "
                "timeout=0 would block the calling thread indefinitely "
                "with no way to interrupt short of process termination."
            )
        from onyx.db.engine.sql_engine import get_session_with_tenant

        deadline = time.monotonic() + timeout
        while True:
            for key in keys:
                lower = f"{_LIST_KEY_PREFIX}{key}:"
                upper = f"{_LIST_KEY_PREFIX}{key}{_LIST_KEY_RANGE_TERMINATOR}"
                stmt = (
                    select(CacheStore)
                    .where(
                        CacheStore.key >= lower,
                        CacheStore.key < upper,
                        or_(
                            CacheStore.expires_at.is_(None),
                            CacheStore.expires_at > func.now(),
                        ),
                    )
                    .order_by(CacheStore.key)
                    .limit(1)
                    .with_for_update(skip_locked=True)
                )
                with get_session_with_tenant(tenant_id=self._tenant_id) as session:
                    row = session.execute(stmt).scalars().first()
                    if row is not None:
                        value = bytes(row.value) if row.value else b""
                        session.delete(row)
                        session.commit()
                        return (key.encode(), value)
            if time.monotonic() >= deadline:
                return None
            time.sleep(_BLPOP_POLL_INTERVAL)

    # -- helpers -----------------------------------------------------------

    def _lock_id_for(self, name: str) -> int:
        """Map *name* to a 64-bit signed int for ``pg_advisory_lock``."""
        h = hashlib.md5(
            f"{self._tenant_id}:{name}".encode(), usedforsecurity=False
        ).digest()
        return struct.unpack("q", h[:8])[0]


# ------------------------------------------------------------------
# Periodic cleanup
# ------------------------------------------------------------------


def cleanup_expired_cache_entries() -> None:
    """Delete rows whose ``expires_at`` is in the past.

    Called by the periodic poller every 5 minutes.
    """
    from onyx.db.engine.sql_engine import get_session_with_current_tenant

    with get_session_with_current_tenant() as session:
        session.execute(
            delete(CacheStore).where(
                CacheStore.expires_at.is_not(None),
                CacheStore.expires_at < func.now(),
            )
        )
        session.commit()


================================================
FILE: backend/onyx/cache/redis_backend.py
================================================
from typing import cast

from redis.client import Redis
from redis.lock import Lock as RedisLock

from onyx.cache.interface import CacheBackend
from onyx.cache.interface import CacheLock


class RedisCacheLock(CacheLock):
    """Wraps ``redis.lock.Lock`` behind the ``CacheLock`` interface."""

    def __init__(self, lock: RedisLock) -> None:
        self._lock = lock

    def acquire(
        self,
        blocking: bool = True,
        blocking_timeout: float | None = None,
    ) -> bool:
        return bool(
            self._lock.acquire(
                blocking=blocking,
                blocking_timeout=blocking_timeout,
            )
        )

    def release(self) -> None:
        self._lock.release()

    def owned(self) -> bool:
        return bool(self._lock.owned())


class RedisCacheBackend(CacheBackend):
    """``CacheBackend`` implementation that delegates to a ``redis.Redis`` client.

    This is a thin pass-through — every method maps 1-to-1 to the underlying
    Redis command.  ``TenantRedis`` key-prefixing is handled by the client
    itself (provided by ``get_redis_client``).
    """

    def __init__(self, redis_client: Redis) -> None:
        self._r = redis_client

    # -- basic key/value ---------------------------------------------------

    def get(self, key: str) -> bytes | None:
        val = self._r.get(key)
        if val is None:
            return None
        if isinstance(val, bytes):
            return val
        return str(val).encode()

    def set(
        self,
        key: str,
        value: str | bytes | int | float,
        ex: int | None = None,
    ) -> None:
        self._r.set(key, value, ex=ex)

    def delete(self, key: str) -> None:
        self._r.delete(key)

    def exists(self, key: str) -> bool:
        return bool(self._r.exists(key))

    # -- TTL ---------------------------------------------------------------

    def expire(self, key: str, seconds: int) -> None:
        self._r.expire(key, seconds)

    def ttl(self, key: str) -> int:
        return cast(int, self._r.ttl(key))

    # -- distributed lock --------------------------------------------------

    def lock(self, name: str, timeout: float | None = None) -> CacheLock:
        return RedisCacheLock(self._r.lock(name, timeout=timeout))

    # -- blocking list (MCP OAuth BLPOP pattern) ---------------------------

    def rpush(self, key: str, value: str | bytes) -> None:
        self._r.rpush(key, value)

    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:
        result = cast(list[bytes] | None, self._r.blpop(keys, timeout=timeout))
        if result is None:
            return None
        return (result[0], result[1])


================================================
FILE: backend/onyx/chat/COMPRESSION.md
================================================
# Chat History Compression

Compresses long chat histories by summarizing older messages while keeping recent ones verbatim.

## Architecture Decisions

### Branch-Aware via Tree Structure
Summaries are stored as `ChatMessage` records with two key fields:
- `parent_message_id` → last message when compression triggered (places summary in the tree)
- `last_summarized_message_id` → pointer to an older message up the chain (the cutoff). Messages after this are kept verbatim.

**Why store summary as a separate message?** If we embedded the summary in the `last_summarized_message_id` message itself, that message would contain context from messages that came after it—context that doesn't exist in other branches. By creating the summary as a new message attached to the branch tip, it only applies to the specific branch where compression occurred. It's only back-pointed to by the
branch which it applies to. All of this is necessary because we keep the last few messages verbatim and also to support branching logic.

### Progressive Summarization
Subsequent compressions incorporate the existing summary text + new messages, preventing information loss in very long conversations.

### Cutoff Marker Prompt Strategy
The LLM receives older messages, a cutoff marker, then recent messages. It summarizes only content before the marker while using recent context to inform what's important.

## Token Budget

Context window breakdown:
- `max_context_tokens` — LLM's total context window
- `reserved_tokens` — space for system prompt, tools, files, etc.
- Available for chat history = `max_context_tokens - reserved_tokens`
Note: If there is a lot of reserved tokens, chat compression may happen fairly frequently which is costly, slow, and leads to a bad user experience. Possible area of future improvement.

Configurable ratios:
- `COMPRESSION_TRIGGER_RATIO` (default 0.75) — compress when chat history exceeds this ratio of available space
- `RECENT_MESSAGES_RATIO` (default 0.2) — portion of chat history to keep verbatim when compressing

## Flow

1. Trigger when `history_tokens > available * 0.75`
2. Find existing summary for branch (if any)
3. Split messages: older (summarize) / recent (keep 25%)
4. Generate summary via LLM
5. Save as `ChatMessage` with `parent_message_id` + `last_summarized_message_id`

## Key Functions

| Function | Purpose |
|----------|---------|
| `get_compression_params` | Check if compression needed based on token counts |
| `find_summary_for_branch` | Find applicable summary by checking `parent_message_id` membership |
| `get_messages_to_summarize` | Split messages at token budget boundary |
| `compress_chat_history` | Orchestrate flow, save summary message |


================================================
FILE: backend/onyx/chat/README.md
================================================
# Overview of Context Management

This document reviews some design decisions around the main agent-loop powering Onyx's chat flow.
It is highly recommended for all engineers contributing to this flow to be familiar with the concepts here.

> Note: it is assumed the reader is familiar with the Onyx product and features such as Projects, User files, Citations, etc. 

## System Prompt

The system prompt is a default prompt that comes packaged with the system. Users can edit the default prompt and it will be persisted in the database.

Some parts of the system prompt are dynamically updated / inserted:

- Datetime of the message sent
- Tools description of when to use certain tools depending on if the tool is available in that cycle
- If the user has just called a search related tool, then a section about citations is included

## Custom Agent Prompt

The custom agent is inserted as a user message above the most recent user message, it is dynamically moved in the history as the user sends more messages.
If the user has opted to completely replace the System Prompt, then this Custom Agent prompt replaces the system prompt and does not move along the history.

## How Files are handled

On upload, Files are processed for tokens, if too many tokens to fit in the context, it’s considered a failed inclusion. This is done using the LLM tokenizer.

- In many cases, there is not a known tokenizer for each LLM so there is a default tokenizer used as a catchall.
- File upload happens in 2 parts - the actual upload + token counting.
- Files are added into chat context as a “point in time” inclusion and move up the context window as the conversation progresses.
  Every file knows how many tokens it is (model agnostic), image files have some assumed number of tokens.

Image files are attached to User Messages also as point in time inclusions.

**Future Extension**:
Files selected from the search results are also counted as “point in time” inclusions. Files that are too large cannot be selected.
For these files, the "entire file" does not exist for most connectors, it's pieced back together from the search engine.

## Projects

If a Project contains few enough files that it all fits in the model context, we keep it close enough in the history to ensure it is easy for the LLM to
access. Note that the project documents are assumed to be quite useful and that they should 1. never be dropped from context, 2. is not just a needle in
a haystack type search with a strong keyword to make the LLM attend to it.

Project files are vectorized and stored in the Search Engine so that if the user chooses a model with less context than the number of tokens in the project,
the system can RAG over the project files.

## How documents are represented

Documents from search or uploaded Project files are represented as a json so that the LLM can easily understand it. It is represented with a prefix string to
make the context clearer to the LLM. Note that for search results (whether web or internal, it will just be the json) and it will be a Tool Call type of
message rather than a user message.

```
Here are some documents provided for context, they may not all be relevant:
{
    "documents": [
        {"document": 1, "title": "Hello", "metadata": "status closed", "contents": "Foo"},
        {"document": 2, "title": "World", "contents": "Bar"}
    ]
}
```

Documents are represented with the `document` key so that the LLM can easily cite them with a single number. The tool returns have to be richer to be able to
translate this into links and other UI elements. What the LLM sees is far simpler to reduce noise/hallucinations.

Note that documents included in a single turn should be collapsed into a single user message.

Search tools also give URLs to the LLM so that open_url (a separate tool) can be called on them.

## Reminders

To ensure the LLM follows certain specific instructions, instructions are added at the very end of the chat context as a user message. If a search related
tool is used, a citation reminder is always added. Otherwise, by default there is no reminder. If the user configures reminders, those are added to the
final message. If a search related tool just ran and the user has reminders, both appear in a single message.

If a search related tool is called at any point during the turn, the reminder will remain at the end until the turn is over and the agent has responded.

## Tool Calls

As tool call responses can get very long (like an internal search can be many thousands of tokens), tool responses are current replaced with a hardcoded
string saying it is no longer available. Tool Call details like the search query and other arguments are kept in the history as this is information
rich and generally very few tokens.

> Note: in the Internal Search flow with query expansion, the Tool Call which was actually run differs from what the LLM provided as arguments.
> What the LLM sees in the history (to be most informative for future calls) is the full set of expanded queries.

**Possible Future Extension**:
Instead of dropping the Tool Call response, we might summarize it using an LLM so that it is just 1-2 sentences and captures the main points. That said,
this is questionable value add because anything relevant and useful should be already captured in the Agent response.

## Examples

```
S -> System Message
CA -> Custom Agent as a User Message
A -> Agent Message response to user
U -> User Message
TC -> Agent Message for a tool call
TR -> Tool response
R -> Reminder
F -> Point in time File
P -> Project Files (not overflowed case)
1,2,3 etc. to represent turn number. A turn consists of a user input and a final response from the Agent

Flow with Custom Agent
S, U1, TC, TR, A1, CA, U2, A2  -- user sends another message, triggers tool call -> S, U1, TC, TR, A1, U2, A2, CA, U3, TC, TR, R, A3
- Custom agent response moves
- Reminder inserted after TR

Flow with Project and File Upload
S, CA, P, F, U1, A1 -- user sends another message -> S, F, U1, A1, CA, P, U2, A2
- File stays in place, above the user message
- Project files move along the chain as new messages are sent
- Custom Agent prompt comes before project files which come before user uploaded files in each turn

Reminders during a single Turn
S, U1, TC, TR, R -- agent calls another tool -> S, U1, TC, TR, TC, TR, R, A1
- Reminder moved to the end
```

## Product considerations

Project files are important to the entire duration of the chat session. If the user has uploaded project files, they are likely very intent on working with
those files. The LLM is much better at referencing documents close to the end of the context window so keeping it there for ease of access.

User uploaded files are considered relevant for that point in time, it is ok if the Agent forgets about it as the chat gets long. If every uploaded file is
constantly moved towards the end of the chat, it would degrade quality as these stack up. Even with a single file, there is some cost of making the previous
User Message further away. This tradeoff is accepted for Projects because of the intent of the feature.

Reminder are absolutely necessary to ensure 1-2 specific instructions get followed with a very high probability. It is less detailed than the system prompt
and should be very targetted for it to work reliably and also not interfere with the last user message.

## Reasons / Experiments

Custom Agent instructions being placed in the system prompt is poorly followed. It also degrades performance of the system especially when the instructions
are orthogonal (or even possibly contradictory) to the system prompt. For weaker models, it causes strange artifacts in tool calls and final responses
that completely ruins the user experience. Empirically, this way works better across a range of models especially when the history gets longer.
Having the Custom Agent instructions not move means it fades more as the chat gets long which is also not ok from a UX perspective.

Different LLMs vary in this but some now have a section that cannot be set via the API layer called the "System Prompt" (OpenAI terminology) which contains
information like the model cutoff date, identity, and some other basic non-changing information. The System prompt described above is in that convention called
the "Developer Prompt". It seems the distribution of the System Prompt, by which I mean the style of wording and terms used can also affect the behavior. This
is different between different models and not necessarily scientific so the system prompt is built from an exploration across different models. It currently
starts with: "You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent..."

LLMs are able to handle changes in topic best at message boundaries. There are special tokens under the hood for this. We also use this property to slice up
the history in the way presented above.

Reminder messages are placed at the end of the prompt because all model fine tuning approaches cause the LLMs to attend very strongly to the tokens at the very
back of the context closest to generation. This is the only way to get the LLMs to not miss critical information and for the product to be reliable. Specifically
the built-in reminders are around citations and what tools it should call in certain situations.

The document json includes a field for the LLM to cite (it's a single number) to make citations reliable and avoid weird artifacts. It's called "document" so
that the LLM does not create weird artifacts in reasoning like "I should reference citation_id: 5 for...". It is also strategically placed so that it is easy to
reference. It is followed by a couple short sections like the metadata and title before the long content section. It seems LLMs are still better at local
attention despite having global access.

In a similar concept, LLM instructions in the system prompt are structured specifically so that there are coherent sections for the LLM to attend to. This is
fairly surprising actually but if there is a line of instructions effectively saying "If you try to use some tools and find that you need more information or
need to call additional tools, you are encouraged to do this", having this in the Tool section of the System prompt makes all the LLMs follow it well but if it's
even just a paragraph away like near the beginning of the prompt, it is often ignored. The difference is as drastic as a 30% follow rate to a 90% follow
rate by even just moving the same statement a few sentences.

## Other related pointers

- How messages, files, images are stored can be found in backend/onyx/db/models.py, there is also a README.md under that directory that may be helpful.

---

# Overview of LLM flow architecture

**Concepts:**
Turn: User sends a message and AI does some set of things and responds
Step/Cycle: 1 single LLM inference given some context and some tools

## 1. Top Level (process_message function):

This function can be thought of as the set-up and validation layer. It ensures that the database is in a valid state, reads the
messages in the session and sets up all the necessary items to run the chat loop and state containers. The major things it does
are:

- Validates the request
- Builds the chat history for the session
- Fetches any additional context such as files and images
- Prepares all of the tools for the LLM
- Creates the state container objects for use in the loop

### Execution (`_run_models` function):

Each model runs in its own worker thread inside a `ThreadPoolExecutor`. Workers write packets to a shared
`merged_queue` via an `Emitter`; the main thread drains the queue and yields packets in arrival order. This
means the top level is isolated from the LLM flow and can yield packets as soon as they are produced. If a
worker fails, the main thread yields a `StreamingError` for that model and keeps the other models running.
All saving and database operations are handled by the main thread after the workers complete (or by the
workers themselves via self-completion if the drain loop exits early).

### Emitter

The emitter is an object that lower levels use to send packets without needing to yield them all the way back
up the call stack. Each `Emitter` tags every packet with a `model_index` and places it on the shared
`merged_queue` as a `(model_idx, packet)` tuple. The drain loop in `_run_models` consumes these tuples and
yields the packets to the caller. Both the emitter and the state container are mutating state objects used
only to accumulate state. There should be no logic dependent on the states of these objects, especially in
the lower levels. The emitter should only take packets and should not be used for other things.

### State Container

The state container is used to accumulate state during the LLM flow. Similar to the emitter, it should not be used for logic,
only for accumulating state. It is used to gather all of the necessary information for saving the chat turn into the database.
So it will accumulate answer tokens, reasoning tokens, tool calls, citation info, etc. This is used at the end of the flow once
the lower level is completed whether on its own or stopped by the user. At that point, all of the state is read and stored into
the database. The state container can be added to by any of the underlying layers, this is fine.

### Stopping Generation

The drain loop in `_run_models` checks `check_is_connected()` every 50 ms (on queue timeout). The signal itself
is stored in Redis and is set by the user calling the stop endpoint. On disconnect, the drain loop saves
partial state for every model, yields an `OverallStop(stop_reason="user_cancelled")` packet, and returns.
A `drain_done` event signals emitters to stop blocking so worker threads can exit quickly. Workers that
already completed successfully will self-complete (persist their response) if the drain loop exited before
reaching the normal completion path.

## 2. LLM Loop (run_llm_loop function)

This function handles the logic of the Turn. It's essentially a while loop where context is added and modified (according what
is outlined in the first half of this doc). Its main functionality is:

- Translate and truncate the context for the LLM inference
- Add context modifiers like reminders, updates to the system prompts, etc.
- Run tool calls and gather results
- Build some of the objects stored in the state container.

## 3. LLM Step (run_llm_step function)

This function is a single inference of the LLM. It's a wrapper around the LLM stream function which handles packet translations
so that the Emitter can emit individual tokens as soon as they arrive. It also keeps track of the different sections since they
do not all come at once (reasoning, answers, tool calls are all built up token by token). This layer also tracks the different
tool calls and returns that to the LLM Loop to execute.

## Things to know

- Packets are labeled with a "turn_index" field as part of the Placement of the packet. This is not the same as the backend
  concept of a turn. The turn_index for the frontend is which block does this packet belong to. So while a reasoning + tool call
  comes from the same LLM inference (same backend LLM step), they are 2 turns to the frontend because that's how it's rendered.

- There are 3 representations of a message, each scoped to a different layer:
  1. **ChatMessage** — The database model. Should be converted into ChatMessageSimple early and never passed deep into the flow.
  2. **ChatMessageSimple** — The canonical data model used throughout the codebase. This is the rich, full-featured representation
     of a message. Any modifications or additions to message structure should be made here.
  3. **LanguageModelInput** — The LLM-facing representation. Intentionally minimal so the LLM interface layer stays clean and
     easy to maintain/extend.


================================================
FILE: backend/onyx/chat/__init__.py
================================================


================================================
FILE: backend/onyx/chat/chat_processing_checker.py
================================================
from uuid import UUID

from onyx.cache.interface import CacheBackend

PREFIX = "chatprocessing"
FENCE_PREFIX = f"{PREFIX}_fence"
FENCE_TTL = 30 * 60  # 30 minutes


def _get_fence_key(chat_session_id: UUID) -> str:
    """Generate the cache key for a chat session processing fence.

    Args:
        chat_session_id: The UUID of the chat session

    Returns:
        The fence key string. Tenant isolation is handled automatically
        by the cache backend (Redis key-prefixing or Postgres schema routing).
    """
    return f"{FENCE_PREFIX}_{chat_session_id}"


def set_processing_status(
    chat_session_id: UUID, cache: CacheBackend, value: bool
) -> None:
    """Set or clear the fence for a chat session processing a message.

    If the key exists, a message is being processed.

    Args:
        chat_session_id: The UUID of the chat session
        cache: Tenant-aware cache backend
        value: True to set the fence, False to clear it
    """
    fence_key = _get_fence_key(chat_session_id)
    if value:
        cache.set(fence_key, 0, ex=FENCE_TTL)
    else:
        cache.delete(fence_key)


def is_chat_session_processing(chat_session_id: UUID, cache: CacheBackend) -> bool:
    """Check if the chat session is processing a message.

    Args:
        chat_session_id: The UUID of the chat session
        cache: Tenant-aware cache backend

    Returns:
        True if the chat session is processing a message, False otherwise
    """
    return cache.exists(_get_fence_key(chat_session_id))


================================================
FILE: backend/onyx/chat/chat_state.py
================================================
import threading
from collections.abc import Callable
from dataclasses import dataclass
from uuid import UUID

from pydantic import BaseModel

from onyx.cache.interface import CacheBackend
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.models import ChatLoadedFile
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ExtractedContextFiles
from onyx.chat.models import FileToolMetadata
from onyx.chat.models import SearchParams
from onyx.context.search.models import SearchDoc
from onyx.db.memory import UserMemoryContext
from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession
from onyx.db.models import Persona
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.onyxbot.slack.models import SlackContext
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.tools.models import ChatFile
from onyx.tools.models import ToolCallInfo

# Type alias for search doc deduplication key
# Simple key: just document_id (str)
# Full key: (document_id, chunk_ind, match_highlights)
SearchDocKey = str | tuple[str, int, tuple[str, ...]]


class ChatStateContainer:
    """Container for accumulating state during LLM loop execution.

    This container holds the partial state that can be saved to the database
    if the generation is stopped by the user or completes normally.

    Thread-safe: All write operations are protected by a lock to ensure safe
    concurrent access from multiple threads. For thread-safe reads, use the
    getter methods. Direct attribute access is not thread-safe.
    """

    def __init__(self) -> None:
        self._lock = threading.Lock()
        # These are collected at the end after the entire tool call is completed
        self.tool_calls: list[ToolCallInfo] = []
        # This is accumulated during the streaming
        self.reasoning_tokens: str | None = None
        # This is accumulated during the streaming of the answer
        self.answer_tokens: str | None = None
        # Store citation mapping for building citation_docs_info during partial saves
        self.citation_to_doc: CitationMapping = {}
        # True if this turn is a clarification question (deep research flow)
        self.is_clarification: bool = False
        # Pre-answer processing time (time before answer starts) in seconds
        self.pre_answer_processing_time: float | None = None
        # Note: LLM cost tracking is now handled in multi_llm.py
        # Search doc collection - maps dedup key to SearchDoc for all docs from tool calls
        self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}
        # Track which citation numbers were actually emitted during streaming
        self._emitted_citations: set[int] = set()

    def add_tool_call(self, tool_call: ToolCallInfo) -> None:
        """Add a tool call to the accumulated state."""
        with self._lock:
            self.tool_calls.append(tool_call)

    def set_reasoning_tokens(self, reasoning: str | None) -> None:
        """Set the reasoning tokens from the final answer generation."""
        with self._lock:
            self.reasoning_tokens = reasoning

    def set_answer_tokens(self, answer: str | None) -> None:
        """Set the answer tokens from the final answer generation."""
        with self._lock:
            self.answer_tokens = answer

    def set_citation_mapping(self, citation_to_doc: CitationMapping) -> None:
        """Set the citation mapping from citation processor."""
        with self._lock:
            self.citation_to_doc = citation_to_doc

    def set_is_clarification(self, is_clarification: bool) -> None:
        """Set whether this turn is a clarification question."""
        with self._lock:
            self.is_clarification = is_clarification

    def get_answer_tokens(self) -> str | None:
        """Thread-safe getter for answer_tokens."""
        with self._lock:
            return self.answer_tokens

    def get_reasoning_tokens(self) -> str | None:
        """Thread-safe getter for reasoning_tokens."""
        with self._lock:
            return self.reasoning_tokens

    def get_tool_calls(self) -> list[ToolCallInfo]:
        """Thread-safe getter for tool_calls (returns a copy)."""
        with self._lock:
            return self.tool_calls.copy()

    def get_citation_to_doc(self) -> CitationMapping:
        """Thread-safe getter for citation_to_doc (returns a copy)."""
        with self._lock:
            return self.citation_to_doc.copy()

    def get_is_clarification(self) -> bool:
        """Thread-safe getter for is_clarification."""
        with self._lock:
            return self.is_clarification

    def set_pre_answer_processing_time(self, duration: float | None) -> None:
        """Set the pre-answer processing time (time before answer starts)."""
        with self._lock:
            self.pre_answer_processing_time = duration

    def get_pre_answer_processing_time(self) -> float | None:
        """Thread-safe getter for pre_answer_processing_time."""
        with self._lock:
            return self.pre_answer_processing_time

    @staticmethod
    def create_search_doc_key(
        search_doc: SearchDoc, use_simple_key: bool = True
    ) -> SearchDocKey:
        """Create a unique key for a SearchDoc for deduplication.

        Args:
            search_doc: The SearchDoc to create a key for
            use_simple_key: If True (default), use only document_id for deduplication.
                If False, include chunk_ind and match_highlights so that the same
                document/chunk with different highlights are stored separately.
        """
        if use_simple_key:
            return search_doc.document_id
        match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
        return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)

    def add_search_docs(
        self, search_docs: list[SearchDoc], use_simple_key: bool = True
    ) -> None:
        """Add search docs to the accumulated collection with deduplication.

        Args:
            search_docs: List of SearchDoc objects to add
            use_simple_key: If True (default), deduplicate by document_id only.
                If False, deduplicate by document_id + chunk_ind + match_highlights.
        """
        with self._lock:
            for doc in search_docs:
                key = self.create_search_doc_key(doc, use_simple_key)
                if key not in self._all_search_docs:
                    self._all_search_docs[key] = doc

    def get_all_search_docs(self) -> dict[SearchDocKey, SearchDoc]:
        """Thread-safe getter for all accumulated search docs (returns a copy)."""
        with self._lock:
            return self._all_search_docs.copy()

    def add_emitted_citation(self, citation_num: int) -> None:
        """Add a citation number that was actually emitted during streaming."""
        with self._lock:
            self._emitted_citations.add(citation_num)

    def get_emitted_citations(self) -> set[int]:
        """Thread-safe getter for emitted citations (returns a copy)."""
        with self._lock:
            return self._emitted_citations.copy()


class AvailableFiles(BaseModel):
    """Separated file IDs for the FileReaderTool so it knows which loader to use."""

    # IDs from the ``user_file`` table (project / persona-attached files).
    user_file_ids: list[UUID] = []
    # IDs from the ``file_record`` table (chat-attached files).
    chat_file_ids: list[UUID] = []


@dataclass(frozen=True)
class ChatTurnSetup:
    """Immutable context produced by ``build_chat_turn`` and consumed by ``_run_models``."""

    new_msg_req: SendMessageRequest
    chat_session: ChatSession
    persona: Persona
    user_message: ChatMessage
    user_identity: LLMUserIdentity
    llms: list[LLM]  # length 1 for single-model, N for multi-model
    model_display_names: list[str]  # parallel to llms
    simple_chat_history: list[ChatMessageSimple]
    extracted_context_files: ExtractedContextFiles
    reserved_messages: list[ChatMessage]  # length 1 for single, N for multi
    reserved_token_count: int
    search_params: SearchParams
    all_injected_file_metadata: dict[str, FileToolMetadata]
    available_files: AvailableFiles
    tool_id_to_name_map: dict[int, str]
    forced_tool_id: int | None
    files: list[ChatLoadedFile]
    chat_files_for_tools: list[ChatFile]
    custom_agent_prompt: str | None
    user_memory_context: UserMemoryContext
    # For deep research: was the last assistant message a clarification request?
    skip_clarification: bool
    check_is_connected: Callable[[], bool]
    cache: CacheBackend
    # Execution params forwarded to per-model tool construction
    bypass_acl: bool
    slack_context: SlackContext | None
    custom_tool_additional_headers: dict[str, str] | None
    mcp_headers: dict[str, str] | None


================================================
FILE: backend/onyx/chat/chat_utils.py
================================================
import json
import re
from collections.abc import Callable
from typing import cast
from uuid import UUID

from fastapi.datastructures import Headers
from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.chat.models import ChatHistoryResult
from onyx.chat.models import ChatLoadedFile
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import FileToolMetadata
from onyx.chat.models import ToolCallSimple
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.configs.constants import MessageType
from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME
from onyx.db.chat import create_chat_session
from onyx.db.chat import get_chat_messages_by_session
from onyx.db.chat import get_or_create_root_message
from onyx.db.kg_config import get_kg_config_settings
from onyx.db.kg_config import is_kg_config_settings_enabled_valid
from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession
from onyx.db.models import Persona
from onyx.db.models import SearchDoc as DbSearchDoc
from onyx.db.models import UserFile
from onyx.db.projects import check_project_ownership
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import FileDescriptor
from onyx.file_store.utils import plaintext_file_name_for_id
from onyx.file_store.utils import store_plaintext
from onyx.kg.models import KGException
from onyx.kg.setup.kg_default_entity_definitions import (
    populate_missing_default_entity_types__commit,
)
from onyx.prompts.chat_prompts import ADDITIONAL_CONTEXT_PROMPT
from onyx.prompts.chat_prompts import TOOL_CALL_RESPONSE_CROSS_MESSAGE
from onyx.prompts.tool_prompts import TOOL_CALL_FAILURE_PROMPT
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.tools.models import ToolCallKickoff
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.timing import log_function_time


logger = setup_logger()
IMAGE_GENERATION_TOOL_NAME = "generate_image"


class FileContextResult(BaseModel):
    """Result of building a file's LLM context representation."""

    message: ChatMessageSimple
    tool_metadata: FileToolMetadata


def build_file_context(
    tool_file_id: str,
    filename: str,
    file_type: ChatFileType,
    content_text: str | None = None,
    token_count: int = 0,
    approx_char_count: int | None = None,
) -> FileContextResult:
    """Build the LLM context representation for a single file.

    Centralises how files should appear in the LLM prompt
    — the ID that FileReaderTool accepts (``UserFile.id`` for user files).
    """
    if file_type.use_metadata_only():
        message_text = (
            f"File: {filename} (id={tool_file_id})\n"
            "Use the file_reader or python tools to access "
            "this file's contents."
        )
        message = ChatMessageSimple(
            message=message_text,
            token_count=max(1, len(message_text) // 4),
            message_type=MessageType.USER,
            file_id=tool_file_id,
        )
    else:
        message_text = f"File: {filename}\n{content_text or ''}\nEnd of File"
        message = ChatMessageSimple(
            message=message_text,
            token_count=token_count,
            message_type=MessageType.USER,
            file_id=tool_file_id,
        )

    metadata = FileToolMetadata(
        file_id=tool_file_id,
        filename=filename,
        approx_char_count=(
            approx_char_count
            if approx_char_count is not None
            else len(content_text or "")
        ),
    )

    return FileContextResult(message=message, tool_metadata=metadata)


def create_chat_session_from_request(
    chat_session_request: ChatSessionCreationRequest,
    user_id: UUID | None,
    db_session: Session,
) -> ChatSession:
    """Create a chat session from a ChatSessionCreationRequest.

    Includes project ownership validation when project_id is provided.

    Args:
        chat_session_request: The request containing persona_id, description, and project_id
        user_id: The ID of the user creating the session (can be None for anonymous)
        db_session: The database session

    Returns:
        The newly created ChatSession

    Raises:
        ValueError: If user lacks access to the specified project
        Exception: If the persona is invalid
    """
    project_id = chat_session_request.project_id
    if project_id:
        if not check_project_ownership(project_id, user_id, db_session):
            raise ValueError("User does not have access to project")

    return create_chat_session(
        db_session=db_session,
        description=chat_session_request.description or "",
        user_id=user_id,
        persona_id=chat_session_request.persona_id,
        project_id=chat_session_request.project_id,
    )


def create_chat_history_chain(
    chat_session_id: UUID,
    db_session: Session,
    prefetch_top_two_level_tool_calls: bool = True,
    # Optional id at which we finish processing
    stop_at_message_id: int | None = None,
) -> list[ChatMessage]:
    """Build the linear chain of messages without including the root message"""
    mainline_messages: list[ChatMessage] = []

    all_chat_messages = get_chat_messages_by_session(
        chat_session_id=chat_session_id,
        user_id=None,
        db_session=db_session,
        skip_permission_check=True,
        prefetch_top_two_level_tool_calls=prefetch_top_two_level_tool_calls,
    )

    if not all_chat_messages:
        root_message = get_or_create_root_message(
            chat_session_id=chat_session_id, db_session=db_session
        )
    else:
        root_message = all_chat_messages[0]
        if root_message.parent_message is not None:
            raise RuntimeError(
                "Invalid root message, unable to fetch valid chat message sequence"
            )

    current_message: ChatMessage | None = root_message
    previous_message: ChatMessage | None = None
    while current_message is not None:
        child_msg = current_message.latest_child_message

        # Break if at the end of the chain
        # or have reached the `final_id` of the submitted message
        if not child_msg or (
            stop_at_message_id and current_message.id == stop_at_message_id
        ):
            break
        current_message = child_msg

        if (
            current_message.message_type == MessageType.ASSISTANT
            and previous_message is not None
            and previous_message.message_type == MessageType.ASSISTANT
            and mainline_messages
        ):
            # Note that 2 user messages in a row is fine since this is often used for
            # adding custom prompts and reminders
            raise RuntimeError(
                "Invalid message chain, cannot have two assistant messages in a row"
            )
        else:
            mainline_messages.append(current_message)

        previous_message = current_message

    return mainline_messages


def reorganize_citations(
    answer: str, citations: list[CitationInfo]
) -> tuple[str, list[CitationInfo]]:
    """For a complete, citation-aware response, we want to reorganize the citations so that
    they are in the order of the documents that were used in the response. This just looks nicer / avoids
    confusion ("Why is there [7] when only 2 documents are cited?")."""

    # Regular expression to find all instances of [[x]](LINK)
    pattern = r"\[\[(.*?)\]\]\((.*?)\)"

    all_citation_matches = re.findall(pattern, answer)

    new_citation_info: dict[int, CitationInfo] = {}
    for citation_match in all_citation_matches:
        try:
            citation_num = int(citation_match[0])
            if citation_num in new_citation_info:
                continue

            matching_citation = next(
                iter([c for c in citations if c.citation_number == int(citation_num)]),
                None,
            )
            if matching_citation is None:
                continue

            new_citation_info[citation_num] = CitationInfo(
                citation_number=len(new_citation_info) + 1,
                document_id=matching_citation.document_id,
            )
        except Exception:
            pass

    # Function to replace citations with their new number
    def slack_link_format(match: re.Match) -> str:
        link_text = match.group(1)
        try:
            citation_num = int(link_text)
            if citation_num in new_citation_info:
                link_text = new_citation_info[citation_num].citation_number
        except Exception:
            pass

        link_url = match.group(2)
        return f"[[{link_text}]]({link_url})"

    # Substitute all matches in the input text
    new_answer = re.sub(pattern, slack_link_format, answer)

    # if any citations weren't parsable, just add them back to be safe
    for citation in citations:
        if citation.citation_number not in new_citation_info:
            new_citation_info[citation.citation_number] = citation

    return new_answer, list(new_citation_info.values())


def build_citation_map_from_infos(
    citations_list: list[CitationInfo], db_docs: list[DbSearchDoc]
) -> dict[int, int]:
    """Translate a list of streaming CitationInfo objects into a mapping of
    citation number -> saved search doc DB id.

    Always cites the first instance of a document_id and assumes db_docs are
    ordered as shown to the user (display order).
    """
    doc_id_to_saved_doc_id_map: dict[str, int] = {}
    for db_doc in db_docs:
        if db_doc.document_id not in doc_id_to_saved_doc_id_map:
            doc_id_to_saved_doc_id_map[db_doc.document_id] = db_doc.id

    citation_to_saved_doc_id_map: dict[int, int] = {}
    for citation in citations_list:
        if citation.citation_number not in citation_to_saved_doc_id_map:
            saved_id = doc_id_to_saved_doc_id_map.get(citation.document_id)
            if saved_id is not None:
                citation_to_saved_doc_id_map[citation.citation_number] = saved_id

    return citation_to_saved_doc_id_map


def build_citation_map_from_numbers(
    cited_numbers: list[int] | set[int], db_docs: list[DbSearchDoc]
) -> dict[int, int]:
    """Translate parsed citation numbers (e.g., from [[n]]) into a mapping of
    citation number -> saved search doc DB id by positional index.
    """
    citation_to_saved_doc_id_map: dict[int, int] = {}
    for num in sorted(set(cited_numbers)):
        idx = num - 1
        if 0 <= idx < len(db_docs):
            citation_to_saved_doc_id_map[num] = db_docs[idx].id

    return citation_to_saved_doc_id_map


def extract_headers(
    headers: dict[str, str] | Headers, pass_through_headers: list[str] | None
) -> dict[str, str]:
    """
    Extract headers specified in pass_through_headers from input headers.
    Handles both dict and FastAPI Headers objects, accounting for lowercase keys.

    Args:
        headers: Input headers as dict or Headers object.

    Returns:
        dict: Filtered headers based on pass_through_headers.
    """
    if not pass_through_headers:
        return {}

    extracted_headers: dict[str, str] = {}
    for key in pass_through_headers:
        if key in headers:
            extracted_headers[key] = headers[key]
        else:
            # fastapi makes all header keys lowercase, handling that here
            lowercase_key = key.lower()
            if lowercase_key in headers:
                extracted_headers[lowercase_key] = headers[lowercase_key]
    return extracted_headers


def process_kg_commands(
    message: str,
    persona_name: str,
    tenant_id: str,  # noqa: ARG001
    db_session: Session,
) -> None:
    # Temporarily, until we have a draft UI for the KG Operations/Management
    # TODO: move to api endpoint once we get frontend
    if not persona_name.startswith(TMP_DRALPHA_PERSONA_NAME):
        return

    kg_config_settings = get_kg_config_settings()
    if not is_kg_config_settings_enabled_valid(kg_config_settings):
        return

    if message == "kg_setup":
        populate_missing_default_entity_types__commit(db_session=db_session)
        raise KGException("KG setup done")


def _get_or_extract_plaintext(
    file_id: str,
    extract_fn: Callable[[], str],
) -> str:
    """Load cached plaintext for a file, or extract and store it.

    Tries to read pre-stored plaintext from the file store.  On a miss,
    calls extract_fn to produce the text, then stores the result so
    future calls skip the expensive extraction.
    """
    file_store = get_default_file_store()
    plaintext_key = plaintext_file_name_for_id(file_id)

    # Try cached plaintext first.
    try:
        plaintext_io = file_store.read_file(plaintext_key, mode="b")
        return plaintext_io.read().decode("utf-8")
    except Exception:
        logger.exception(f"Error when reading file, id={file_id}")

    # Cache miss — extract and store.
    content_text = extract_fn()
    if content_text:
        store_plaintext(file_id, content_text)
    return content_text


@log_function_time(print_only=True)
def load_chat_file(
    file_descriptor: FileDescriptor, db_session: Session
) -> ChatLoadedFile:
    file_io = get_default_file_store().read_file(file_descriptor["id"], mode="b")
    content = file_io.read()

    # Extract text content if it's a text file type (not an image)
    content_text = None
    # `FileDescriptor` is often JSON-roundtripped (e.g. JSONB / API), so `type`
    # may arrive as a raw string value instead of a `ChatFileType`.
    file_type = ChatFileType(file_descriptor["type"])

    if file_type.is_text_file():
        file_id = file_descriptor["id"]

        def _extract() -> str:
            return extract_file_text(
                file=file_io,
                file_name=file_descriptor.get("name") or "",
                break_on_unprocessable=False,
            )

        # Use the user_file_id as cache key when available (matches what
        # the celery indexing worker stores), otherwise fall back to the
        # file store id (covers code-interpreter-generated files, etc.).
        user_file_id_str = file_descriptor.get("user_file_id")
        cache_key = user_file_id_str or file_id

        try:
            content_text = _get_or_extract_plaintext(cache_key, _extract)
        except Exception as e:
            logger.warning(
                f"Failed to retrieve content for file {file_descriptor['id']}: {str(e)}"
            )

    # Get token count from UserFile if available
    token_count = 0
    user_file_id_str = file_descriptor.get("user_file_id")
    if user_file_id_str:
        try:
            user_file_id = UUID(user_file_id_str)
            user_file = (
                db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
            )
            if user_file and user_file.token_count:
                token_count = user_file.token_count
        except (ValueError, TypeError) as e:
            logger.warning(
                f"Failed to get token count for file {file_descriptor['id']}: {e}"
            )

    return ChatLoadedFile(
        file_id=file_descriptor["id"],
        content=content,
        file_type=file_type,
        filename=file_descriptor.get("name"),
        content_text=content_text,
        token_count=token_count,
    )


def load_all_chat_files(
    chat_messages: list[ChatMessage],
    db_session: Session,
) -> list[ChatLoadedFile]:
    # TODO There is likely a more efficient/standard way to load the files here.
    file_descriptors_for_history: list[FileDescriptor] = []
    for chat_message in chat_messages:
        if chat_message.files:
            file_descriptors_for_history.extend(chat_message.files)

    files = cast(
        list[ChatLoadedFile],
        run_functions_tuples_in_parallel(
            [
                (load_chat_file, (file, db_session))
                for file in file_descriptors_for_history
            ]
        ),
    )
    return files


def convert_chat_history_basic(
    chat_history: list[ChatMessage],
    token_counter: Callable[[str], int],
    max_individual_message_tokens: int | None = None,
    max_total_tokens: int | None = None,
) -> list[ChatMessageSimple]:
    """Convert ChatMessage history to ChatMessageSimple format with no tool calls or files included.

    Args:
        chat_history: List of ChatMessage objects to convert
        token_counter: Function to count tokens in a message string
        max_individual_message_tokens: If set, messages exceeding this number of tokens are dropped.
            If None, no messages are dropped based on individual token count.
        max_total_tokens: If set, maximum number of tokens allowed for the entire history.
            If None, the history is not trimmed based on total token count.

    Returns:
        List of ChatMessageSimple objects
    """
    # Defensive: treat a non-positive total budget as "no history".
    if max_total_tokens is not None and max_total_tokens <= 0:
        return []

    # Convert only the core USER/ASSISTANT messages; omit files and tool calls.
    converted: list[ChatMessageSimple] = []
    for chat_message in chat_history:
        if chat_message.message_type not in (MessageType.USER, MessageType.ASSISTANT):
            continue

        message = chat_message.message or ""
        token_count = getattr(chat_message, "token_count", None)
        if token_count is None:
            token_count = token_counter(message)

        # Drop any single message that would dominate the context window.
        if (
            max_individual_message_tokens is not None
            and token_count > max_individual_message_tokens
        ):
            continue

        converted.append(
            ChatMessageSimple(
                message=message,
                token_count=token_count,
                message_type=chat_message.message_type,
                image_files=None,
            )
        )

    if max_total_tokens is None:
        return converted

    # Enforce a max total budget by keeping a contiguous suffix of the conversation.
    trimmed_reversed: list[ChatMessageSimple] = []
    total_tokens = 0
    for msg in reversed(converted):
        if total_tokens + msg.token_count > max_total_tokens:
            break
        trimmed_reversed.append(msg)
        total_tokens += msg.token_count

    return list(reversed(trimmed_reversed))


def _build_tool_call_response_history_message(
    tool_name: str,
    generated_images: list[dict] | None,
    tool_call_response: str | None,
) -> str:
    if tool_name != IMAGE_GENERATION_TOOL_NAME:
        return TOOL_CALL_RESPONSE_CROSS_MESSAGE

    if generated_images:
        llm_image_context: list[dict[str, str]] = []
        for image in generated_images:
            file_id = image.get("file_id")
            revised_prompt = image.get("revised_prompt")
            if not isinstance(file_id, str):
                continue

            llm_image_context.append(
                {
                    "file_id": file_id,
                    "revised_prompt": (
                        revised_prompt if isinstance(revised_prompt, str) else ""
                    ),
                }
            )

        if llm_image_context:
            return json.dumps(llm_image_context)

    if tool_call_response:
        return tool_call_response

    return TOOL_CALL_RESPONSE_CROSS_MESSAGE


def convert_chat_history(
    chat_history: list[ChatMessage],
    files: list[ChatLoadedFile],
    context_image_files: list[ChatLoadedFile],
    additional_context: str | None,
    token_counter: Callable[[str], int],
    tool_id_to_name_map: dict[int, str],
) -> ChatHistoryResult:
    """Convert ChatMessage history to ChatMessageSimple format.

    For user messages: includes attached files (images attached to message, text files as separate messages)
    For assistant messages with tool calls: creates ONE ASSISTANT message with tool_calls array,
        followed by N TOOL_CALL_RESPONSE messages (OpenAI parallel tool calling format)
    For assistant messages without tool calls: creates a simple ASSISTANT message

    Every injected text-file message is tagged with ``file_id`` and its
    metadata is collected in ``ChatHistoryResult.all_injected_file_metadata``.
    After context-window truncation, callers compare surviving ``file_id`` tags
    against this map to discover "forgotten" files and provide their metadata
    to the FileReaderTool.
    """
    simple_messages: list[ChatMessageSimple] = []
    all_injected_file_metadata: dict[str, FileToolMetadata] = {}

    # Create a mapping of file IDs to loaded files for quick lookup
    file_map = {str(f.file_id): f for f in files}

    # Find the index of the last USER message
    last_user_message_idx = None
    for i in range(len(chat_history) - 1, -1, -1):
        if chat_history[i].message_type == MessageType.USER:
            last_user_message_idx = i
            break

    for idx, chat_message in enumerate(chat_history):
        if chat_message.message_type == MessageType.USER:
            # Process files attached to this message
            text_files: list[tuple[ChatLoadedFile, FileDescriptor]] = []
            image_files: list[ChatLoadedFile] = []

            if chat_message.files:
                for file_descriptor in chat_message.files:
                    file_id = file_descriptor["id"]
                    loaded_file = file_map.get(file_id)
                    if loaded_file:
                        if loaded_file.file_type == ChatFileType.IMAGE:
                            image_files.append(loaded_file)
                        else:
                            # Text files (DOC, PLAIN_TEXT, TABULAR) are added as separate messages
                            text_files.append((loaded_file, file_descriptor))

            # Add text files as separate messages before the user message.
            # Each message is tagged with ``file_id`` so that forgotten files
            # can be detected after context-window truncation.
            for text_file, fd in text_files:
                # Use user_file_id as the FileReaderTool accepts that.
                # Fall back to the file-store path id.
                tool_id = fd.get("user_file_id") or text_file.file_id
                filename = text_file.filename or "unknown"
                ctx = build_file_context(
                    tool_file_id=tool_id,
                    filename=filename,
                    file_type=text_file.file_type,
                    content_text=text_file.content_text,
                    token_count=text_file.token_count,
                )
                simple_messages.append(ctx.message)
                all_injected_file_metadata[tool_id] = ctx.tool_metadata

            # Sum token counts from image files (excluding project image files)
            image_token_count = (
                sum(img.token_count for img in image_files) if image_files else 0
            )

            # Add the user message with image files attached
            # If this is the last USER message, also include context_image_files
            # Note: context image file tokens are NOT counted in the token count
            if idx == last_user_message_idx:
                if context_image_files:
                    image_files.extend(context_image_files)

                if additional_context:
                    simple_messages.append(
                        ChatMessageSimple(
                            message=ADDITIONAL_CONTEXT_PROMPT.format(
                                additional_context=additional_context
                            ),
                            token_count=token_counter(additional_context),
                            message_type=MessageType.USER,
                            image_files=None,
                        )
                    )

            simple_messages.append(
                ChatMessageSimple(
                    message=chat_message.message,
                    token_count=chat_message.token_count + image_token_count,
                    message_type=MessageType.USER,
                    image_files=image_files if image_files else None,
                )
            )

        elif chat_message.message_type == MessageType.ASSISTANT:
            # Handle tool calls if present using OpenAI parallel tool calling format:
            # 1. Group tool calls by turn_number
            # 2. For each turn: ONE ASSISTANT message with tool_calls array
            # 3. Followed by N TOOL_CALL_RESPONSE messages (one per tool call)
            if chat_message.tool_calls:
                # Group tool calls by turn number
                tool_calls_by_turn: dict[int, list] = {}
                for tool_call in chat_message.tool_calls:
                    if tool_call.turn_number not in tool_calls_by_turn:
                        tool_calls_by_turn[tool_call.turn_number] = []
                    tool_calls_by_turn[tool_call.turn_number].append(tool_call)

                # Sort turns and process each turn
                for turn_number in sorted(tool_calls_by_turn.keys()):
                    turn_tool_calls = tool_calls_by_turn[turn_number]
                    # Sort by tool_id within the turn for consistent ordering
                    turn_tool_calls.sort(key=lambda tc: tc.tool_id)

                    # Build ToolCallSimple list for this turn
                    tool_calls_simple: list[ToolCallSimple] = []
                    for tool_call in turn_tool_calls:
                        tool_name = tool_id_to_name_map.get(
                            tool_call.tool_id, "unknown"
                        )
                        tool_calls_simple.append(
                            ToolCallSimple(
                                tool_call_id=tool_call.tool_call_id,
                                tool_name=tool_name,
                                tool_arguments=tool_call.tool_call_arguments or {},
                                token_count=tool_call.tool_call_tokens,
                            )
                        )

                    # Create ONE ASSISTANT message with all tool calls for this turn
                    total_tool_call_tokens = sum(
                        tc.token_count for tc in tool_calls_simple
                    )
                    simple_messages.append(
                        ChatMessageSimple(
                            message="",  # No text content when making tool calls
                            token_count=total_tool_call_tokens,
                            message_type=MessageType.ASSISTANT,
                            tool_calls=tool_calls_simple,
                            image_files=None,
                        )
                    )

                    # Add TOOL_CALL_RESPONSE messages for each tool call in this turn
                    for tool_call in turn_tool_calls:
                        tool_name = tool_id_to_name_map.get(
                            tool_call.tool_id, "unknown"
                        )
                        tool_response_message = (
                            _build_tool_call_response_history_message(
                                tool_name=tool_name,
                                generated_images=tool_call.generated_images,
                                tool_call_response=tool_call.tool_call_response,
                            )
                        )
                        simple_messages.append(
                            ChatMessageSimple(
                                message=tool_response_message,
                                token_count=(
                                    token_counter(tool_response_message)
                                    if tool_name == IMAGE_GENERATION_TOOL_NAME
                                    else 20
                                ),
                                message_type=MessageType.TOOL_CALL_RESPONSE,
                                tool_call_id=tool_call.tool_call_id,
                                image_files=None,
                            )
                        )

            # Add the assistant message itself (the final answer)
            simple_messages.append(
                ChatMessageSimple(
                    message=chat_message.message,
                    token_count=chat_message.token_count,
                    message_type=MessageType.ASSISTANT,
                    image_files=None,
                )
            )
        else:
            raise ValueError(
                f"Invalid message type when constructing simple history: {chat_message.message_type}"
            )

    return ChatHistoryResult(
        simple_messages=simple_messages,
        all_injected_file_metadata=all_injected_file_metadata,
    )


def get_custom_agent_prompt(persona: Persona, chat_session: ChatSession) -> str | None:
    """Get the custom agent prompt from persona or project instructions. If it's replacing the base system prompt,
    it does not count as a custom agent prompt (logic exists later also to drop it in this case).

    Chat Sessions in Projects that are using a custom agent will retain the custom agent prompt.
    Priority: persona.system_prompt (if not default Agent) > chat_session.project.instructions

    # NOTE: Logic elsewhere allows saving empty strings for potentially other purposes but for constructing the prompts
    # we never want to return an empty string for a prompt so it's translated into an explicit None.

    Args:
        persona: The Persona object
        chat_session: The ChatSession object

    Returns:
        The prompt to use for the custom Agent part of the prompt.
    """
    # If using a custom Agent, always respect its prompt, even if in a Project, and even if it's an empty custom prompt.
    if persona.id != DEFAULT_PERSONA_ID:
        # Logic exists later also to drop it in this case but this is strictly correct anyhow.
        if persona.replace_base_system_prompt:
            return None
        return persona.system_prompt or None

    # If in a project and using the default Agent, respect the project instructions.
    if chat_session.project and chat_session.project.instructions:
        return chat_session.project.instructions

    return None


def is_last_assistant_message_clarification(chat_history: list[ChatMessage]) -> bool:
    """Check if the last assistant message in chat history was a clarification question.

    This is used in the deep research flow to determine whether to skip the
    clarification step when the user has already responded to a clarification.

    Args:
        chat_history: List of ChatMessage objects in chronological order

    Returns:
        True if the last assistant message has is_clarification=True, False otherwise
    """
    for message in reversed(chat_history):
        if message.message_type == MessageType.ASSISTANT:
            return message.is_clarification
    return False


def create_tool_call_failure_messages(
    tool_calls: list[ToolCallKickoff], token_counter: Callable[[str], int]
) -> list[ChatMessageSimple]:
    """Create ChatMessageSimple objects for failed tool calls.

    Creates messages using OpenAI parallel tool calling format:
    1. An ASSISTANT message with tool_calls field containing all failed tool calls
    2. A TOOL_CALL_RESPONSE failure message for each tool call

    Args:
        tool_calls: List of ToolCallKickoff objects representing the failed tool calls
        token_counter: Function to count tokens in a message string

    Returns:
        List containing ChatMessageSimple objects: one assistant message with all tool calls
        followed by a failure response for each tool call
    """
    if not tool_calls:
        return []

    # Create ToolCallSimple for each failed tool call
    tool_calls_simple: list[ToolCallSimple] = []
    for tool_call in tool_calls:
        tool_call_token_count = token_counter(tool_call.to_msg_str())
        tool_calls_simple.append(
            ToolCallSimple(
                tool_call_id=tool_call.tool_call_id,
                tool_name=tool_call.tool_name,
                tool_arguments=tool_call.tool_args,
                token_count=tool_call_token_count,
            )
        )

    total_token_count = sum(tc.token_count for tc in tool_calls_simple)

    # Create ONE ASSISTANT message with all tool_calls (OpenAI format)
    assistant_msg = ChatMessageSimple(
        message="",  # No text content when making tool calls
        token_count=total_token_count,
        message_type=MessageType.ASSISTANT,
        tool_calls=tool_calls_simple,
        image_files=None,
    )

    messages: list[ChatMessageSimple] = [assistant_msg]

    # Create a TOOL_CALL_RESPONSE failure message for each tool call
    for tool_call in tool_calls:
        failure_response_msg = ChatMessageSimple(
            message=TOOL_CALL_FAILURE_PROMPT,
            token_count=50,  # Tiny overestimate
            message_type=MessageType.TOOL_CALL_RESPONSE,
            tool_call_id=tool_call.tool_call_id,
            image_files=None,
        )
        messages.append(failure_response_msg)

    return messages


================================================
FILE: backend/onyx/chat/citation_processor.py
================================================
"""
Dynamic Citation Processor for LLM Responses

This module provides a citation processor that can:
- Accept citation number to SearchDoc mappings dynamically
- Process token streams from LLMs to extract citations
- Handle citations in three modes: REMOVE, KEEP_MARKERS, or HYPERLINK
- Emit CitationInfo objects for detected citations (in HYPERLINK mode)
- Track all seen citations regardless of mode
- Maintain a list of cited documents in order of first citation
"""

import re
from collections.abc import Generator
from enum import Enum
from typing import TypeAlias

from onyx.configs.chat_configs import STOP_STREAM_PAT
from onyx.context.search.models import SearchDoc
from onyx.prompts.constants import TRIPLE_BACKTICK
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.utils.logger import setup_logger

logger = setup_logger()


class CitationMode(Enum):
    """Defines how citations should be handled in the output.

    REMOVE: Citations are completely removed from output text.
            No CitationInfo objects are emitted.
            Use case: When you need to remove citations from the output if they are not shared with the user
            (e.g. in discord bot, public slack bot).

    KEEP_MARKERS: Original citation markers like [1], [2] are preserved unchanged.
                  No CitationInfo objects are emitted.
                  Use case: When you need to track citations in research agent and later process
                  them with collapse_citations() to renumber.

    HYPERLINK: Citations are replaced with markdown links like [[1]](url).
               CitationInfo objects are emitted for UI tracking.
               Use case: Final reports shown to users with clickable links.
    """

    REMOVE = "remove"
    KEEP_MARKERS = "keep_markers"
    HYPERLINK = "hyperlink"


CitationMapping: TypeAlias = dict[int, SearchDoc]


# ============================================================================
# Utility functions
# ============================================================================


def in_code_block(llm_text: str) -> bool:
    """Check if we're currently inside a code block by counting triple backticks."""
    count = llm_text.count(TRIPLE_BACKTICK)
    return count % 2 != 0


# ============================================================================
# Main Citation Processor with Dynamic Mapping
# ============================================================================


class DynamicCitationProcessor:
    """
    A citation processor that accepts dynamic citation mappings.

    This processor is designed for multi-turn conversations where the citation
    number to document mapping is provided externally. It processes streaming
    tokens from an LLM, detects citations (e.g., [1], [2,3], [[4]]), and handles
    them according to the configured CitationMode:

    CitationMode.HYPERLINK (default):
        1. Replaces citation markers with formatted markdown links (e.g., [[1]](url))
        2. Emits CitationInfo objects for tracking
        3. Maintains the order in which documents were first cited
        Use case: Final reports shown to users with clickable links.

    CitationMode.KEEP_MARKERS:
        1. Preserves original citation markers like [1], [2] unchanged
        2. Does NOT emit CitationInfo objects
        3. Still tracks all seen citations via get_seen_citations()
        Use case: When citations need later processing (e.g., renumbering).

    CitationMode.REMOVE:
        1. Removes citation markers entirely from the output text
        2. Does NOT emit CitationInfo objects
        3. Still tracks all seen citations via get_seen_citations()
        Use case: Research agent intermediate reports.

    Features:
        - Accepts citation number → SearchDoc mapping via update_citation_mapping()
        - Configurable citation mode at initialization
        - Always tracks seen citations regardless of mode
        - Holds back tokens that might be partial citations
        - Maintains list of cited SearchDocs in order of first citation
        - Handles unicode bracket variants (【】, ［］)
        - Skips citation processing inside code blocks

    Example (HYPERLINK mode - default):
        processor = DynamicCitationProcessor()

        # Set up citation mapping
        processor.update_citation_mapping({1: search_doc1, 2: search_doc2})

        # Process tokens from LLM
        for token in llm_stream:
            for result in processor.process_token(token):
                if isinstance(result, str):
                    print(result)  # Display text with [[1]](url) format
                elif isinstance(result, CitationInfo):
                    handle_citation(result)  # Track citation

        # Get cited documents at the end
        cited_docs = processor.get_cited_documents()

    Example (KEEP_MARKERS mode):
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping({1: search_doc1, 2: search_doc2})

        # Process tokens from LLM
        for token in llm_stream:
            for result in processor.process_token(token):
                # Only strings are yielded, no CitationInfo objects
                print(result)  # Display text with original [1] format preserved

        # Get all seen citations after processing
        seen_citations = processor.get_seen_citations()  # {1: search_doc1, ...}

    Example (REMOVE mode):
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: search_doc1, 2: search_doc2})

        # Process tokens - citations are removed but tracked
        for token in llm_stream:
            for result in processor.process_token(token):
                print(result)  # Text without any citation markers

        # Citations are still tracked
        seen_citations = processor.get_seen_citations()
    """

    def __init__(
        self,
        citation_mode: CitationMode = CitationMode.HYPERLINK,
        stop_stream: str | None = STOP_STREAM_PAT,
    ):
        """
        Initialize the citation processor.

        Args:
            citation_mode: How to handle citations in the output. One of:
                - CitationMode.HYPERLINK (default): Replace [1] with [[1]](url)
                  and emit CitationInfo objects.
                - CitationMode.KEEP_MARKERS: Keep original [1] markers unchanged,
                  no CitationInfo objects emitted.
                - CitationMode.REMOVE: Remove citations entirely from output,
                  no CitationInfo objects emitted.
                All modes track seen citations via get_seen_citations().
            stop_stream: Optional stop token pattern to halt processing early.
                When this pattern is detected in the token stream, processing stops.
                Defaults to STOP_STREAM_PAT from chat configs.
        """

        # Citation mapping from citation number to SearchDoc
        self.citation_to_doc: CitationMapping = {}
        self.seen_citations: CitationMapping = {}  # citation num -> SearchDoc

        # Token processing state
        self.llm_out = ""  # entire output so far
        self.curr_segment = ""  # tokens held for citation processing
        self.hold = ""  # tokens held for stop token processing
        self.stop_stream = stop_stream
        self.citation_mode = citation_mode

        # Citation tracking
        self.cited_documents_in_order: list[SearchDoc] = (
            []
        )  # SearchDocs in citation order
        self.cited_document_ids: set[str] = set()  # all cited document_ids
        self.recent_cited_documents: set[str] = (
            set()
        )  # recently cited (for deduplication)
        self.non_citation_count = 0

        # Citation patterns
        # Matches potential incomplete citations: '[', '[[', '[1', '[[1', '[1,', '[1, ', etc.
        # Also matches unicode bracket variants: 【, ［
        self.possible_citation_pattern = re.compile(r"([\[【［]+(?:\d+,? ?)*$)")

        # Matches complete citations:
        # group 1: '[[1]]', [[2]], etc. (also matches 【【1】】, ［［1］］, 【1】, ［1］)
        # group 2: '[1]', '[1, 2]', '[1,2,16]', etc. (also matches unicode variants)
        self.citation_pattern = re.compile(
            r"([\[【［]{2}\d+[\]】］]{2})|([\[【［]\d+(?:, ?\d+)*[\]】］])"
        )

    def update_citation_mapping(
        self,
        citation_mapping: CitationMapping,
        update_duplicate_keys: bool = False,
    ) -> None:
        """
        Update the citation number to SearchDoc mapping.

        This can be called multiple times to add or update mappings. New mappings
        will be merged with existing ones.

        Args:
            citation_mapping: Dictionary mapping citation numbers (1, 2, 3, ...) to SearchDoc objects
            update_duplicate_keys: If True, update existing mappings with new values when keys overlap.
                If False (default), filter out duplicate keys and only add non-duplicates.
                The default behavior is useful when OpenURL may have the same citation number as a
                Web Search result - in those cases, we keep the web search citation and snippet etc.
        """
        if update_duplicate_keys:
            # Update all mappings, including duplicates
            self.citation_to_doc.update(citation_mapping)
        else:
            # Filter out duplicate keys and only add non-duplicates
            # Reason for this is that OpenURL may have the same citation number as a Web Search result
            # For those, we should just keep the web search citation and snippet etc.
            duplicate_keys = set(citation_mapping.keys()) & set(
                self.citation_to_doc.keys()
            )
            non_duplicate_mapping = {
                k: v for k, v in citation_mapping.items() if k not in duplicate_keys
            }
            self.citation_to_doc.update(non_duplicate_mapping)

    def process_token(
        self, token: str | None
    ) -> Generator[str | CitationInfo, None, None]:
        """
        Process a token from the LLM stream.

        This method:
        1. Accumulates tokens until a complete citation or non-citation is found
        2. Holds back potential partial citations (e.g., "[", "[1")
        3. Yields text chunks when they're safe to display
        4. Handles code blocks (avoids processing citations inside code)
        5. Handles stop tokens
        6. Always tracks seen citations in self.seen_citations

        Behavior depends on the `citation_mode` setting from __init__:
        - HYPERLINK: Citations are replaced with [[n]](url) format and CitationInfo
          objects are yielded before each formatted citation
        - KEEP_MARKERS: Original citation markers like [1] are preserved unchanged,
          no CitationInfo objects are yielded
        - REMOVE: Citations are removed entirely from output,
          no CitationInfo objects are yielded

        Args:
            token: The next token from the LLM stream, or None to signal end of stream.
                Pass None to flush any remaining buffered text at end of stream.

        Yields:
            str: Text chunks to display. Citation format depends on citation_mode.
            CitationInfo: Citation metadata (only when citation_mode=HYPERLINK)
        """
        # None -> end of stream, flush remaining segment
        if token is None:
            if self.curr_segment:
                yield self.curr_segment
            return

        # Handle stop stream token
        if self.stop_stream:
            next_hold = self.hold + token
            if self.stop_stream in next_hold:
                # Extract text before the stop pattern
                stop_pos = next_hold.find(self.stop_stream)
                text_before_stop = next_hold[:stop_pos]
                # Process the text before stop pattern if any exists
                if text_before_stop:
                    # Process text_before_stop through normal flow
                    self.hold = ""
                    token = text_before_stop
                    # Continue to normal processing below
                else:
                    # Stop pattern at the beginning, nothing to yield
                    return
            elif next_hold == self.stop_stream[: len(next_hold)]:
                self.hold = next_hold
                return
            else:
                token = next_hold
                self.hold = ""

        self.curr_segment += token
        self.llm_out += token

        # Handle code blocks without language tags
        # If we see ``` followed by \n, add "plaintext" language specifier
        if "`" in self.curr_segment:
            if self.curr_segment.endswith("`"):
                pass
            elif "```" in self.curr_segment:
                parts = self.curr_segment.split("```")
                if len(parts) > 1 and len(parts[1]) > 0:
                    piece_that_comes_after = parts[1][0]
                    if piece_that_comes_after == "\n" and in_code_block(self.llm_out):
                        self.curr_segment = self.curr_segment.replace(
                            "```", "```plaintext"
                        )

        # Look for citations in current segment
        citation_matches = list(self.citation_pattern.finditer(self.curr_segment))
        possible_citation_found = bool(
            re.search(self.possible_citation_pattern, self.curr_segment)
        )

        result = ""
        if citation_matches and not in_code_block(self.llm_out):
            match_idx = 0
            for match in citation_matches:
                match_span = match.span()

                # Get text before/between citations
                intermatch_str = self.curr_segment[match_idx : match_span[0]]
                self.non_citation_count += len(intermatch_str)
                match_idx = match_span[1]

                # Check if there is already a space before this citation
                if intermatch_str:
                    has_leading_space = intermatch_str[-1].isspace()
                else:
                    # No text between citations (consecutive citations)
                    # If match_idx > 0, we've already processed a citation, so don't add space
                    if match_idx > 0:
                        # Consecutive citations - don't add space between them
                        has_leading_space = True
                    else:
                        # Citation at start of segment - check if previous output has space
                        segment_start_idx = len(self.llm_out) - len(self.curr_segment)
                        if segment_start_idx > 0:
                            has_leading_space = self.llm_out[
                                segment_start_idx - 1
                            ].isspace()
                        else:
                            has_leading_space = False

                # Reset recent citations if no citations found for a while
                if self.non_citation_count > 5:
                    self.recent_cited_documents.clear()

                # Process the citation (returns formatted citation text and CitationInfo objects)
                # Always tracks seen citations regardless of citation_mode
                citation_text, citation_info_list = self._process_citation(
                    match, has_leading_space
                )

                if self.citation_mode == CitationMode.HYPERLINK:
                    # HYPERLINK mode: Replace citations with markdown links [[n]](url)
                    # Yield text before citation FIRST (preserve order)
                    if intermatch_str:
                        yield intermatch_str
                    # Yield CitationInfo objects BEFORE the citation text
                    # This allows the frontend to receive citation metadata before the token
                    # that contains [[n]](link), enabling immediate rendering
                    for citation in citation_info_list:
                        yield citation
                    # Then yield the formatted citation text
                    if citation_text:
                        yield citation_text

                elif self.citation_mode == CitationMode.KEEP_MARKERS:
                    # KEEP_MARKERS mode: Preserve original citation markers unchanged
                    # Yield text before citation
                    if intermatch_str:
                        yield intermatch_str
                    # Yield the original citation marker as-is
                    yield match.group()

                else:  # CitationMode.REMOVE
                    # REMOVE mode: Remove citations entirely from output
                    # This strips citation markers like [1], [2], 【1】 from the output text
                    # When removing citations, we need to handle spacing to avoid issues like:
                    # - "text [1] more" -> "text  more" (double space)
                    # - "text [1]." -> "text ." (space before punctuation)
                    if intermatch_str:
                        remaining_text = self.curr_segment[match_span[1] :]
                        # Strip trailing space from intermatch if:
                        # 1. Remaining text starts with space (avoids double space)
                        # 2. Remaining text starts with punctuation (avoids space before punctuation)
                        if intermatch_str[-1].isspace() and remaining_text:
                            first_char = remaining_text[0]
                            # Check if next char is space or common punctuation
                            if first_char.isspace() or first_char in ".,;:!?)]}":
                                intermatch_str = intermatch_str.rstrip()
                        if intermatch_str:
                            yield intermatch_str

                self.non_citation_count = 0

            # Leftover text could be part of next citation
            self.curr_segment = self.curr_segment[match_idx:]
            self.non_citation_count = len(self.curr_segment)

        # Hold onto the current segment if potential citations found, otherwise stream it
        if not possible_citation_found:
            result += self.curr_segment
            self.non_citation_count += len(self.curr_segment)
            self.curr_segment = ""

        if result:
            yield result

    def _process_citation(
        self, match: re.Match, has_leading_space: bool
    ) -> tuple[str, list[CitationInfo]]:
        """
        Process a single citation match and return formatted citation text and citation info objects.

        This is an internal method called by process_token(). The match string can be
        in various formats: '[1]', '[1, 13, 6]', '[[4]]', '【1】', '［1］', etc.

        This method always:
        1. Extracts citation numbers from the match
        2. Looks up the corresponding SearchDoc from the mapping
        3. Tracks seen citations in self.seen_citations (regardless of citation_mode)

        When citation_mode is HYPERLINK:
        4. Creates formatted citation text as [[n]](url)
        5. Creates CitationInfo objects for new citations
        6. Handles deduplication of recently cited documents

        When citation_mode is REMOVE or KEEP_MARKERS:
        4. Returns empty string and empty list (caller handles output based on mode)

        Args:
            match: Regex match object containing the citation pattern
            has_leading_space: Whether the text immediately before this citation
                ends with whitespace. Used to determine if a leading space should
                be added to the formatted output.

        Returns:
            Tuple of (formatted_citation_text, citation_info_list):
            - formatted_citation_text: Markdown-formatted citation text like
              "[[1]](https://example.com)" or empty string if not in HYPERLINK mode
            - citation_info_list: List of CitationInfo objects for newly cited
              documents, or empty list if not in HYPERLINK mode
        """
        citation_str: str = match.group()  # e.g., '[1]', '[1, 2, 3]', '[[1]]', '【1】'
        formatted = (
            match.lastindex == 1
        )  # True means already in form '[[1]]' or '【【1】】'

        citation_info_list: list[CitationInfo] = []
        formatted_citation_parts: list[str] = []

        # Extract citation numbers - regex ensures matched brackets, so we can simply slice
        citation_content = citation_str[2:-2] if formatted else citation_str[1:-1]

        for num_str in citation_content.split(","):
            num_str = num_str.strip()
            if not num_str:
                continue

            try:
                num = int(num_str)
            except ValueError:
                # Invalid citation, skip it
                logger.warning(f"Invalid citation number format: {num_str}")
                continue

            # Check if we have a mapping for this citation number
            if num not in self.citation_to_doc:
                logger.warning(
                    f"Citation number {num} not found in mapping. Available: {list(self.citation_to_doc.keys())}"
                )
                continue

            # Get the SearchDoc
            search_doc = self.citation_to_doc[num]
            doc_id = search_doc.document_id
            link = search_doc.link or ""

            # Always track seen citations regardless of citation_mode setting
            self.seen_citations[num] = search_doc

            # Only generate formatted citations and CitationInfo in HYPERLINK mode
            if self.citation_mode != CitationMode.HYPERLINK:
                continue

            # Format the citation text as [[n]](link)
            formatted_citation_parts.append(f"[[{num}]]({link})")

            # Skip creating CitationInfo for citations of the same work if cited recently (deduplication)
            if doc_id in self.recent_cited_documents:
                continue
            self.recent_cited_documents.add(doc_id)

            # Track cited documents and create CitationInfo only for new citations
            if doc_id not in self.cited_document_ids:
                self.cited_document_ids.add(doc_id)
                self.cited_documents_in_order.append(search_doc)
                citation_info_list.append(
                    CitationInfo(
                        citation_number=num,
                        document_id=doc_id,
                    )
                )

        # Join all citation parts with spaces
        formatted_citation_text = " ".join(formatted_citation_parts)

        # Apply leading space only if the text didn't already have one
        if formatted_citation_text and not has_leading_space:
            formatted_citation_text = " " + formatted_citation_text

        return formatted_citation_text, citation_info_list

    def get_cited_documents(self) -> list[SearchDoc]:
        """
        Get the list of cited SearchDoc objects in the order they were first cited.

        Note: This list is only populated when `citation_mode=HYPERLINK`.
        When using REMOVE or KEEP_MARKERS mode, this will return an empty list.
        Use get_seen_citations() instead if you need to track citations without
        emitting CitationInfo objects.

        Returns:
            List of SearchDoc objects in the order they were first cited.
            Empty list if citation_mode is not HYPERLINK.
        """
        return self.cited_documents_in_order

    def get_cited_document_ids(self) -> list[str]:
        """
        Get the list of cited document IDs in the order they were first cited.

        Note: This list is only populated when `citation_mode=HYPERLINK`.
        When using REMOVE or KEEP_MARKERS mode, this will return an empty list.
        Use get_seen_citations() instead if you need to track citations without
        emitting CitationInfo objects.

        Returns:
            List of document IDs (strings) in the order they were first cited.
            Empty list if citation_mode is not HYPERLINK.
        """
        return [doc.document_id for doc in self.cited_documents_in_order]

    def get_seen_citations(self) -> CitationMapping:
        """
        Get all seen citations as a mapping from citation number to SearchDoc.

        This returns all citations that have been encountered during processing,
        regardless of the `citation_mode` setting. Citations are tracked
        whenever they are parsed, making this useful for cases where you need to
        know which citations appeared in the text without emitting CitationInfo objects.

        This is particularly useful when using REMOVE or KEEP_MARKERS mode, as
        get_cited_documents() will be empty in those cases, but get_seen_citations()
        will still contain all the citations that were found.

        Returns:
            Dictionary mapping citation numbers (int) to SearchDoc objects.
            The dictionary is keyed by the citation number as it appeared in
            the text (e.g., {1: SearchDoc(...), 3: SearchDoc(...)}).
        """
        return self.seen_citations

    @property
    def num_cited_documents(self) -> int:
        """
        Get the number of unique documents that have been cited.

        Note: This count is only updated when `citation_mode=HYPERLINK`.
        When using REMOVE or KEEP_MARKERS mode, this will always return 0.
        Use len(get_seen_citations()) instead if you need to count citations
        without emitting CitationInfo objects.

        Returns:
            Number of unique documents cited. 0 if citation_mode is not HYPERLINK.
        """
        return len(self.cited_document_ids)

    def reset_recent_citations(self) -> None:
        """
        Reset the recent citations tracker.

        The processor tracks "recently cited" documents to avoid emitting duplicate
        CitationInfo objects for the same document when it's cited multiple times
        in close succession. This method clears that tracker.

        This is primarily useful when `citation_mode=HYPERLINK` to allow
        previously cited documents to emit CitationInfo objects again. Has no
        effect when using REMOVE or KEEP_MARKERS mode.

        The recent citation tracker is also automatically cleared when more than
        5 non-citation characters are processed between citations.
        """
        self.recent_cited_documents.clear()

    def get_next_citation_number(self) -> int:
        """
        Get the next available citation number for adding new documents to the mapping.

        This method returns the next citation number that should be used when adding
        new documents via update_citation_mapping(). Useful when dynamically adding
        citations during processing (e.g., from tool results like web search).

        If no citations exist yet in the mapping, returns 1.
        Otherwise, returns max(existing_citation_numbers) + 1.

        Returns:
            The next available citation number (1-indexed integer).

        Example:
            # After adding citations 1, 2, 3
            processor.get_next_citation_number()  # Returns 4

            # With non-sequential citations 1, 5, 10
            processor.get_next_citation_number()  # Returns 11
        """
        if not self.citation_to_doc:
            return 1
        return max(self.citation_to_doc.keys()) + 1


================================================
FILE: backend/onyx/chat/citation_utils.py
================================================
import re

from onyx.chat.citation_processor import CitationMapping
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.context.search.models import SearchDocsResponse
from onyx.tools.built_in_tools import CITEABLE_TOOLS_NAMES
from onyx.tools.models import ToolResponse


def update_citation_processor_from_tool_response(
    tool_response: ToolResponse,
    citation_processor: DynamicCitationProcessor,
) -> None:
    """Update citation processor if this was a citeable tool with a SearchDocsResponse.

    Checks if the tool call is citeable and if the response contains a SearchDocsResponse,
    then creates a mapping from citation numbers to SearchDoc objects and updates the
    citation processor.

    Args:
        tool_response: The response from the tool execution (must have tool_call set)
        citation_processor: The DynamicCitationProcessor to update
    """
    # Early return if tool_call is not set
    if tool_response.tool_call is None:
        return

    # Update citation processor if this was a search tool
    if tool_response.tool_call.tool_name in CITEABLE_TOOLS_NAMES:
        # Check if the rich_response is a SearchDocsResponse
        if isinstance(tool_response.rich_response, SearchDocsResponse):
            search_response = tool_response.rich_response

            # Create mapping from citation number to SearchDoc
            citation_to_doc: CitationMapping = {}
            for (
                citation_num,
                doc_id,
            ) in search_response.citation_mapping.items():
                # Find the SearchDoc with this doc_id
                matching_doc = next(
                    (
                        doc
                        for doc in search_response.search_docs
                        if doc.document_id == doc_id
                    ),
                    None,
                )
                if matching_doc:
                    citation_to_doc[citation_num] = matching_doc

            # Update the citation processor
            citation_processor.update_citation_mapping(citation_to_doc)


def extract_citation_order_from_text(text: str) -> list[int]:
    """Extract citation numbers from text in order of first appearance.

    Parses citation patterns like [1], [1, 2], [[1]], 【1】 etc. and returns
    the citation numbers in the order they first appear in the text.

    Args:
        text: The text containing citations

    Returns:
        List of citation numbers in order of first appearance (no duplicates)
    """
    # Same pattern used in collapse_citations and DynamicCitationProcessor
    # Group 2 captures the number in double bracket format: [[1]], 【【1】】
    # Group 4 captures the numbers in single bracket format: [1], [1, 2]
    citation_pattern = re.compile(
        r"([\[【［]{2}(\d+)[\]】］]{2})|([\[【［]([\d]+(?: *, *\d+)*)[\]】］])"
    )
    seen: set[int] = set()
    order: list[int] = []

    for match in citation_pattern.finditer(text):
        # Group 2 is for double bracket single number, group 4 is for single bracket
        if match.group(2):
            nums_str = match.group(2)
        elif match.group(4):
            nums_str = match.group(4)
        else:
            continue

        for num_str in nums_str.split(","):
            num_str = num_str.strip()
            if num_str:
                try:
                    num = int(num_str)
                    if num not in seen:
                        seen.add(num)
                        order.append(num)
                except ValueError:
                    continue

    return order


def collapse_citations(
    answer_text: str,
    existing_citation_mapping: CitationMapping,
    new_citation_mapping: CitationMapping,
) -> tuple[str, CitationMapping]:
    """Collapse the citations in the text to use the smallest possible numbers.

    This function takes citations in the text (like [25], [30], etc.) and replaces them
    with the smallest possible numbers. It starts numbering from the next available
    integer after the existing citation mapping. If a citation refers to a document
    that already exists in the existing citation mapping (matched by document_id),
    it uses the existing citation number instead of assigning a new one.

    Args:
        answer_text: The text containing citations to collapse (e.g., "See [25] and [30]")
        existing_citation_mapping: Citations already processed/displayed. These mappings
            are preserved unchanged in the output.
        new_citation_mapping: Citations from the current text that need to be collapsed.
            The keys are the citation numbers as they appear in answer_text.

    Returns:
        A tuple of (updated_text, combined_mapping) where:
        - updated_text: The text with citations replaced with collapsed numbers
        - combined_mapping: All values from existing_citation_mapping plus the new
          mappings with their (possibly renumbered) keys
    """
    # Build a reverse lookup: document_id -> existing citation number
    doc_id_to_existing_citation: dict[str, int] = {
        doc.document_id: citation_num
        for citation_num, doc in existing_citation_mapping.items()
    }

    # Determine the next available citation number
    if existing_citation_mapping:
        next_citation_num = max(existing_citation_mapping.keys()) + 1
    else:
        next_citation_num = 1

    # Build the mapping from old citation numbers (in new_citation_mapping) to new numbers
    old_to_new: dict[int, int] = {}
    additional_mappings: CitationMapping = {}

    for old_num, search_doc in new_citation_mapping.items():
        doc_id = search_doc.document_id

        # Check if this document already exists in existing citations
        if doc_id in doc_id_to_existing_citation:
            # Use the existing citation number
            old_to_new[old_num] = doc_id_to_existing_citation[doc_id]
        else:
            # Check if we've already assigned a new number to this document
            # (handles case where same doc appears with different old numbers)
            existing_new_num = None
            for mapped_old, mapped_new in old_to_new.items():
                if (
                    mapped_old in new_citation_mapping
                    and new_citation_mapping[mapped_old].document_id == doc_id
                ):
                    existing_new_num = mapped_new
                    break

            if existing_new_num is not None:
                old_to_new[old_num] = existing_new_num
            else:
                # Assign the next available number
                old_to_new[old_num] = next_citation_num
                additional_mappings[next_citation_num] = search_doc
                next_citation_num += 1

    # Pattern to match citations like [25], [1, 2, 3], [[25]], etc.
    # Also matches unicode bracket variants: 【】, ［］
    citation_pattern = re.compile(
        r"([\[【［]{2}\d+[\]】］]{2})|([\[【［]\d+(?:, ?\d+)*[\]】］])"
    )

    def replace_citation(match: re.Match) -> str:
        """Replace citation numbers in a match with their new collapsed values."""
        citation_str = match.group()

        # Determine bracket style
        if (
            citation_str.startswith("[[")
            or citation_str.startswith("【【")
            or citation_str.startswith("［［")
        ):
            open_bracket = citation_str[:2]
            close_bracket = citation_str[-2:]
            content = citation_str[2:-2]
        else:
            open_bracket = citation_str[0]
            close_bracket = citation_str[-1]
            content = citation_str[1:-1]

        # Parse and replace citation numbers
        new_nums = []
        for num_str in content.split(","):
            num_str = num_str.strip()
            if not num_str:
                continue
            try:
                num = int(num_str)
                # Only replace if we have a mapping for this number
                if num in old_to_new:
                    new_nums.append(str(old_to_new[num]))
                else:
                    # Keep original if not in our mapping
                    new_nums.append(num_str)
            except ValueError:
                new_nums.append(num_str)

        # Reconstruct the citation with original bracket style
        new_content = ", ".join(new_nums)
        return f"{open_bracket}{new_content}{close_bracket}"

    # Replace all citations in the text
    updated_text = citation_pattern.sub(replace_citation, answer_text)

    # Build the combined mapping
    combined_mapping: CitationMapping = dict(existing_citation_mapping)
    combined_mapping.update(additional_mappings)

    return updated_text, combined_mapping


================================================
FILE: backend/onyx/chat/compression.py
================================================
"""
Chat history compression via summarization.

This module handles compressing long chat histories by summarizing older messages
while keeping recent messages verbatim.

Summaries are branch-aware: each summary's parent_message_id points to the last
message when compression triggered, making it part of the tree structure.
"""

from typing import NamedTuple

from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.configs.chat_configs import COMPRESSION_TRIGGER_RATIO
from onyx.configs.constants import MessageType
from onyx.db.models import ChatMessage
from onyx.llm.interfaces import LLM
from onyx.llm.models import AssistantMessage
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.models import SystemMessage
from onyx.llm.models import UserMessage
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.prompts.compression_prompts import PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK
from onyx.prompts.compression_prompts import PROGRESSIVE_USER_REMINDER
from onyx.prompts.compression_prompts import SUMMARIZATION_CUTOFF_MARKER
from onyx.prompts.compression_prompts import SUMMARIZATION_PROMPT
from onyx.prompts.compression_prompts import USER_REMINDER
from onyx.tracing.framework.create import ensure_trace
from onyx.tracing.llm_utils import llm_generation_span
from onyx.tracing.llm_utils import record_llm_response
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

# Ratio of available context to allocate for recent messages after compression
RECENT_MESSAGES_RATIO = 0.2


class CompressionResult(BaseModel):
    """Result of a compression operation."""

    summary_created: bool
    messages_summarized: int
    error: str | None = None


class CompressionParams(BaseModel):
    """Parameters for compression operation."""

    should_compress: bool
    tokens_for_recent: int = 0


class SummaryContent(NamedTuple):
    """Messages split for summarization."""

    older_messages: list[ChatMessage]
    recent_messages: list[ChatMessage]


def calculate_total_history_tokens(chat_history: list[ChatMessage]) -> int:
    """
    Calculate the total token count for the given chat history.

    Args:
        chat_history: Branch-aware list of messages

    Returns:
        Total token count for the history
    """
    return sum(m.token_count or 0 for m in chat_history)


def get_compression_params(
    max_input_tokens: int,
    current_history_tokens: int,
    reserved_tokens: int,
) -> CompressionParams:
    """
    Calculate compression parameters based on model's context window.

    Args:
        max_input_tokens: The maximum input tokens for the LLM
        current_history_tokens: Current total tokens in chat history
        reserved_tokens: Tokens reserved for system prompt, tools, files, etc.

    Returns:
        CompressionParams indicating whether to compress and token budgets
    """
    available = max_input_tokens - reserved_tokens

    # Check trigger threshold
    trigger_threshold = int(available * COMPRESSION_TRIGGER_RATIO)

    if current_history_tokens <= trigger_threshold:
        return CompressionParams(should_compress=False)

    # Calculate token budget for recent messages as a percentage of current history
    # This ensures we always have messages to summarize when compression triggers
    tokens_for_recent = int(current_history_tokens * RECENT_MESSAGES_RATIO)

    return CompressionParams(
        should_compress=True,
        tokens_for_recent=tokens_for_recent,
    )


def find_summary_for_branch(
    db_session: Session,
    chat_history: list[ChatMessage],
) -> ChatMessage | None:
    """
    Find the most recent summary that applies to the current branch.

    A summary applies if its parent_message_id is in the current chat history,
    meaning it was created on this branch.

    Args:
        db_session: Database session
        chat_history: Branch-aware list of messages

    Returns:
        The applicable summary message, or None if no summary exists for this branch
    """
    if not chat_history:
        return None

    history_ids = {m.id for m in chat_history}
    chat_session_id = chat_history[0].chat_session_id

    # Query all summaries for this session (typically few), then filter in Python.
    # Order by time_sent descending to get the most recent summary first.
    summaries = (
        db_session.query(ChatMessage)
        .filter(
            ChatMessage.chat_session_id == chat_session_id,
            ChatMessage.last_summarized_message_id.isnot(None),
        )
        .order_by(ChatMessage.time_sent.desc())
        .all()
    )
    # Optimization to avoid using IN clause for large histories
    for summary in summaries:
        if summary.parent_message_id in history_ids:
            return summary

    return None


def get_messages_to_summarize(
    chat_history: list[ChatMessage],
    existing_summary: ChatMessage | None,
    tokens_for_recent: int,
) -> SummaryContent:
    """
    Split messages into those to summarize and those to keep verbatim.

    Args:
        chat_history: Branch-aware list of messages
        existing_summary: Existing summary for this branch (if any)
        tokens_for_recent: Token budget for recent messages to keep

    Returns:
        SummaryContent with older_messages to summarize and recent_messages to keep
    """
    # Filter to messages after the existing summary's cutoff using timestamp
    if existing_summary and existing_summary.last_summarized_message_id:
        cutoff_id = existing_summary.last_summarized_message_id
        last_summarized_msg = next(m for m in chat_history if m.id == cutoff_id)
        messages = [
            m for m in chat_history if m.time_sent > last_summarized_msg.time_sent
        ]
    else:
        messages = list(chat_history)

    # Filter out empty messages
    messages = [m for m in messages if m.message]

    if not messages:
        return SummaryContent(older_messages=[], recent_messages=[])

    # Work backwards from most recent, keeping messages until we exceed budget
    recent_messages: list[ChatMessage] = []
    tokens_used = 0

    for msg in reversed(messages):
        msg_tokens = msg.token_count or 0
        if tokens_used + msg_tokens > tokens_for_recent and recent_messages:
            break
        recent_messages.insert(0, msg)
        tokens_used += msg_tokens

    # Ensure cutoff is right before a user message by moving any leading
    # non-user messages from recent_messages to older_messages
    while recent_messages and recent_messages[0].message_type != MessageType.USER:
        recent_messages.pop(0)

    # Everything else gets summarized
    recent_ids = {m.id for m in recent_messages}
    older_messages = [m for m in messages if m.id not in recent_ids]

    return SummaryContent(
        older_messages=older_messages, recent_messages=recent_messages
    )


def _build_llm_messages_for_summarization(
    messages: list[ChatMessage],
    tool_id_to_name: dict[int, str],
) -> list[UserMessage | AssistantMessage]:
    """Convert ChatMessage objects to LLM message format for summarization.

    This is intentionally different from translate_history_to_llm_format in llm_step.py:
    - Compacts tool calls to "[Used tools: tool1, tool2]" to save tokens in summaries
    - Skips TOOL_CALL_RESPONSE messages entirely (tool usage captured in assistant message)
    - No image/multimodal handling (summaries are text-only)
    - No caching or LLMConfig-specific behavior needed
    """
    result: list[UserMessage | AssistantMessage] = []

    for msg in messages:
        # Skip empty messages
        if not msg.message:
            continue

        # Handle assistant messages with tool calls compactly
        if msg.message_type == MessageType.ASSISTANT:
            if msg.tool_calls:
                tool_names = [
                    tool_id_to_name.get(tc.tool_id, "unknown") for tc in msg.tool_calls
                ]
                result.append(
                    AssistantMessage(content=f"[Used tools: {', '.join(tool_names)}]")
                )
            else:
                result.append(AssistantMessage(content=msg.message))
            continue

        # Skip tool call response messages - tool calls are captured above via assistant messages
        if msg.message_type == MessageType.TOOL_CALL_RESPONSE:
            continue

        # Handle user messages
        if msg.message_type == MessageType.USER:
            result.append(UserMessage(content=msg.message))

    return result


def generate_summary(
    older_messages: list[ChatMessage],
    recent_messages: list[ChatMessage],
    llm: LLM,
    tool_id_to_name: dict[int, str],
    existing_summary: str | None = None,
) -> str:
    """
    Generate a summary using cutoff marker approach.

    The cutoff marker tells the LLM to summarize only older messages,
    while using recent messages as context to inform what's important.

    Messages are sent as separate UserMessage/AssistantMessage objects rather
    than being concatenated into a single message.

    Args:
        older_messages: Messages to compress into summary (before cutoff)
        recent_messages: Messages kept verbatim (after cutoff, for context only)
        llm: LLM to use for summarization
        tool_id_to_name: Mapping of tool IDs to display names
        existing_summary: Previous summary text to incorporate (progressive)

    Returns:
        Summary text
    """
    # Build system prompt
    system_content = SUMMARIZATION_PROMPT
    if existing_summary:
        # Progressive summarization: append existing summary to system prompt
        system_content += PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK.format(
            previous_summary=existing_summary
        )
        final_reminder = PROGRESSIVE_USER_REMINDER
    else:
        final_reminder = USER_REMINDER

    # Convert messages to LLM format (using compression-specific conversion)
    older_llm_messages = _build_llm_messages_for_summarization(
        older_messages, tool_id_to_name
    )
    recent_llm_messages = _build_llm_messages_for_summarization(
        recent_messages, tool_id_to_name
    )

    # Build message list with separate messages
    input_messages: list[ChatCompletionMessage] = [
        SystemMessage(content=system_content),
    ]

    # Add older messages (to be summarized)
    input_messages.extend(older_llm_messages)

    # Add cutoff marker as a user message
    input_messages.append(UserMessage(content=SUMMARIZATION_CUTOFF_MARKER))

    # Add recent messages (for context only)
    input_messages.extend(recent_llm_messages)

    # Add final reminder
    input_messages.append(UserMessage(content=final_reminder))

    with llm_generation_span(
        llm=llm,
        flow="chat_history_summarization",
        input_messages=input_messages,
    ) as span_generation:
        response = llm.invoke(input_messages)
        record_llm_response(span_generation, response)

    content = response.choice.message.content
    if not (content and content.strip()):
        raise ValueError("LLM returned empty summary")
    return content.strip()


def compress_chat_history(
    db_session: Session,
    chat_history: list[ChatMessage],
    llm: LLM,
    compression_params: CompressionParams,
    tool_id_to_name: dict[int, str],
) -> CompressionResult:
    """
    Main compression function. Creates a summary ChatMessage.

    The summary message's parent_message_id points to the last message in
    chat_history, making it branch-aware via the tree structure.

    Note: This takes the entire chat history as input, splits it into older
    messages (to summarize) and recent messages (kept verbatim within the
    token budget), generates a summary of the older part, and persists the
    new summary message with its parent set to the last message in history.

    Past summary is taken into context (progressive summarization): we find
    at most one existing summary for this branch. If present, only messages
    after that summary's last_summarized_message_id are considered; the
    existing summary text is passed into the LLM so the new summary
    incorporates it instead of summarizing from scratch.

    For more details, see the COMPRESSION.md file.

    Args:
        db_session: Database session
        chat_history: Branch-aware list of messages
        llm: LLM to use for summarization
        compression_params: Parameters from get_compression_params
        tool_id_to_name: Mapping of tool IDs to display names

    Returns:
        CompressionResult indicating success/failure
    """
    if not chat_history:
        return CompressionResult(summary_created=False, messages_summarized=0)

    chat_session_id = chat_history[0].chat_session_id

    logger.info(
        f"Starting compression for session {chat_session_id}, "
        f"history_len={len(chat_history)}, tokens_for_recent={compression_params.tokens_for_recent}"
    )

    with ensure_trace(
        "chat_history_compression",
        group_id=str(chat_session_id),
        metadata={
            "tenant_id": get_current_tenant_id(),
            "chat_session_id": str(chat_session_id),
        },
    ):
        try:
            # Find existing summary for this branch
            existing_summary = find_summary_for_branch(db_session, chat_history)

            # Get messages to summarize
            summary_content = get_messages_to_summarize(
                chat_history,
                existing_summary,
                tokens_for_recent=compression_params.tokens_for_recent,
            )

            if not summary_content.older_messages:
                logger.debug("No messages to summarize, skipping compression")
                return CompressionResult(summary_created=False, messages_summarized=0)

            # Generate summary (incorporate existing summary if present)
            existing_summary_text = (
                existing_summary.message if existing_summary else None
            )
            summary_text = generate_summary(
                older_messages=summary_content.older_messages,
                recent_messages=summary_content.recent_messages,
                llm=llm,
                tool_id_to_name=tool_id_to_name,
                existing_summary=existing_summary_text,
            )

            # Calculate token count for the summary
            tokenizer = get_tokenizer(None, None)
            summary_token_count = len(tokenizer.encode(summary_text))
            logger.debug(
                f"Generated summary ({summary_token_count} tokens): {summary_text[:200]}..."
            )

            # Create new summary as a ChatMessage
            # Parent is the last message in history - this makes the summary branch-aware
            summary_message = ChatMessage(
                chat_session_id=chat_session_id,
                message_type=MessageType.ASSISTANT,
                message=summary_text,
                token_count=summary_token_count,
                parent_message_id=chat_history[-1].id,
                last_summarized_message_id=summary_content.older_messages[-1].id,
            )
            db_session.add(summary_message)
            db_session.commit()

            logger.info(
                f"Compressed {len(summary_content.older_messages)} messages into summary "
                f"(session_id={chat_session_id}, "
                f"summary_tokens={summary_token_count})"
            )

            return CompressionResult(
                summary_created=True,
                messages_summarized=len(summary_content.older_messages),
            )

        except Exception as e:
            logger.exception(f"Compression failed for session {chat_session_id}: {e}")
            db_session.rollback()
            return CompressionResult(
                summary_created=False,
                messages_summarized=0,
                error=str(e),
            )


================================================
FILE: backend/onyx/chat/emitter.py
================================================
import threading
from queue import Queue

from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import Packet


class Emitter:
    """Routes packets from LLM/tool execution to the ``_run_models`` drain loop.

    Tags every packet with ``model_index`` and places it on ``merged_queue``
    as a ``(model_idx, packet)`` tuple for ordered consumption downstream.

    Args:
        merged_queue: Shared queue owned by ``_run_models``.
        model_idx: Index embedded in packet placements (``0`` for N=1 runs).
        drain_done: Optional event set by ``_run_models`` when the drain loop
            exits early (e.g. HTTP disconnect). When set, ``emit`` returns
            immediately so worker threads can exit fast.
    """

    def __init__(
        self,
        merged_queue: Queue[tuple[int, Packet | Exception | object]],
        model_idx: int = 0,
        drain_done: threading.Event | None = None,
    ) -> None:
        self._model_idx = model_idx
        self._merged_queue = merged_queue
        self._drain_done = drain_done

    def emit(self, packet: Packet) -> None:
        if self._drain_done is not None and self._drain_done.is_set():
            return
        base = packet.placement or Placement(turn_index=0)
        tagged = Packet(
            placement=base.model_copy(update={"model_index": self._model_idx}),
            obj=packet.obj,
        )
        self._merged_queue.put((self._model_idx, tagged))


================================================
FILE: backend/onyx/chat/llm_loop.py
================================================
import json
import time
from collections.abc import Callable
from typing import Any
from typing import Literal

from sqlalchemy.orm import Session

from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_utils import create_tool_call_failure_messages
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.citation_processor import CitationMode
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.chat.citation_utils import update_citation_processor_from_tool_response
from onyx.chat.emitter import Emitter
from onyx.chat.llm_step import extract_tool_calls_from_response_text
from onyx.chat.llm_step import run_llm_step
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ContextFileMetadata
from onyx.chat.models import ExtractedContextFiles
from onyx.chat.models import FileToolMetadata
from onyx.chat.models import LlmStepResult
from onyx.chat.models import ToolCallSimple
from onyx.chat.prompt_utils import build_reminder_message
from onyx.chat.prompt_utils import build_system_prompt
from onyx.chat.prompt_utils import (
    get_default_base_system_prompt,
)
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDoc
from onyx.context.search.models import SearchDocsResponse
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.memory import add_memory
from onyx.db.memory import update_memory_at_index
from onyx.db.memory import UserMemoryContext
from onyx.db.models import Persona
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.interfaces import ToolChoiceOptions
from onyx.llm.utils import is_true_openai_model
from onyx.prompts.chat_prompts import IMAGE_GEN_REMINDER
from onyx.prompts.chat_prompts import OPEN_URL_REMINDER
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ToolCallDebug
from onyx.server.query_and_chat.streaming_models import TopLevelBranching
from onyx.tools.built_in_tools import CITEABLE_TOOLS_NAMES
from onyx.tools.built_in_tools import STOPPING_TOOLS_NAMES
from onyx.tools.interface import Tool
from onyx.tools.models import ChatFile
from onyx.tools.models import CustomToolCallSummary
from onyx.tools.models import MemoryToolResponseSnapshot
from onyx.tools.models import PythonToolRichResponse
from onyx.tools.models import ToolCallInfo
from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.images.models import (
    FinalImageGenerationResponse,
)
from onyx.tools.tool_implementations.memory.models import MemoryToolResponse
from onyx.tools.tool_implementations.python.python_tool import PythonTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tools.tool_runner import run_tool_calls
from onyx.tracing.framework.create import trace
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


class EmptyLLMResponseError(RuntimeError):
    """Raised when the streamed LLM response completes without a usable answer."""

    def __init__(
        self,
        *,
        provider: str,
        model: str,
        tool_choice: ToolChoiceOptions,
        client_error_msg: str,
        error_code: str = "EMPTY_LLM_RESPONSE",
        is_retryable: bool = True,
    ) -> None:
        super().__init__(client_error_msg)
        self.provider = provider
        self.model = model
        self.tool_choice = tool_choice
        self.client_error_msg = client_error_msg
        self.error_code = error_code
        self.is_retryable = is_retryable


def _build_empty_llm_response_error(
    llm: LLM,
    llm_step_result: LlmStepResult,
    tool_choice: ToolChoiceOptions,
) -> EmptyLLMResponseError:
    provider = llm.config.model_provider
    model = llm.config.model_name

    # OpenAI quota exhaustion has reached us as a streamed "stop" with zero content.
    # When the stream is completely empty and there is no reasoning/tool output, surface
    # the likely account-level cause instead of a generic tool-calling error.
    if (
        not llm_step_result.reasoning
        and provider == LlmProviderNames.OPENAI
        and is_true_openai_model(provider, model)
    ):
        return EmptyLLMResponseError(
            provider=provider,
            model=model,
            tool_choice=tool_choice,
            client_error_msg=(
                "The selected OpenAI model returned an empty streamed response "
                "before producing any tokens. This commonly happens when the API "
                "key or project has no remaining quota or billing is not enabled. "
                "Verify quota and billing for this key and try again."
            ),
            error_code="BUDGET_EXCEEDED",
            is_retryable=False,
        )

    return EmptyLLMResponseError(
        provider=provider,
        model=model,
        tool_choice=tool_choice,
        client_error_msg=(
            "The selected model returned no final answer before the stream "
            "completed. No text or tool calls were received from the upstream "
            "provider."
        ),
    )


def _looks_like_xml_tool_call_payload(text: str | None) -> bool:
    """Detect XML-style marshaled tool calls emitted as plain text."""
    if not text:
        return False
    lowered = text.lower()
    return (
        "<function_calls" in lowered
        and "<invoke" in lowered
        and "<parameter" in lowered
    )


def _try_fallback_tool_extraction(
    llm_step_result: LlmStepResult,
    tool_choice: ToolChoiceOptions,
    fallback_extraction_attempted: bool,
    tool_defs: list[dict],
    turn_index: int,
) -> tuple[LlmStepResult, bool]:
    """Attempt to extract tool calls from response text as a fallback.

    This is a last resort fallback for low quality LLMs or those that don't have
    tool calling from the serving layer. Also triggers if there's reasoning but
    no answer and no tool calls.

    Args:
        llm_step_result: The result from the LLM step
        tool_choice: The tool choice option used for this step
        fallback_extraction_attempted: Whether fallback extraction was already attempted
        tool_defs: List of tool definitions
        turn_index: The current turn index for placement

    Returns:
        Tuple of (possibly updated LlmStepResult, whether fallback was attempted this call)
    """
    if fallback_extraction_attempted:
        return llm_step_result, False

    no_tool_calls = (
        not llm_step_result.tool_calls or len(llm_step_result.tool_calls) == 0
    )
    reasoning_but_no_answer_or_tools = (
        llm_step_result.reasoning and not llm_step_result.answer and no_tool_calls
    )
    xml_tool_call_text_detected = no_tool_calls and (
        _looks_like_xml_tool_call_payload(llm_step_result.answer)
        or _looks_like_xml_tool_call_payload(llm_step_result.raw_answer)
        or _looks_like_xml_tool_call_payload(llm_step_result.reasoning)
    )
    should_try_fallback = (
        (tool_choice == ToolChoiceOptions.REQUIRED and no_tool_calls)
        or reasoning_but_no_answer_or_tools
        or xml_tool_call_text_detected
    )

    if not should_try_fallback:
        return llm_step_result, False

    # Try to extract from answer first, then fall back to reasoning
    extracted_tool_calls: list[ToolCallKickoff] = []

    if llm_step_result.answer:
        extracted_tool_calls = extract_tool_calls_from_response_text(
            response_text=llm_step_result.answer,
            tool_definitions=tool_defs,
            placement=Placement(turn_index=turn_index),
        )
    if (
        not extracted_tool_calls
        and llm_step_result.raw_answer
        and llm_step_result.raw_answer != llm_step_result.answer
    ):
        extracted_tool_calls = extract_tool_calls_from_response_text(
            response_text=llm_step_result.raw_answer,
            tool_definitions=tool_defs,
            placement=Placement(turn_index=turn_index),
        )
    if not extracted_tool_calls and llm_step_result.reasoning:
        extracted_tool_calls = extract_tool_calls_from_response_text(
            response_text=llm_step_result.reasoning,
            tool_definitions=tool_defs,
            placement=Placement(turn_index=turn_index),
        )
    if extracted_tool_calls:
        logger.info(
            f"Extracted {len(extracted_tool_calls)} tool call(s) from response text as fallback"
        )
        return (
            LlmStepResult(
                reasoning=llm_step_result.reasoning,
                answer=llm_step_result.answer,
                tool_calls=extracted_tool_calls,
                raw_answer=llm_step_result.raw_answer,
            ),
            True,
        )

    return llm_step_result, True


# Hardcoded oppinionated value, might breaks down to something like:
# Cycle 1: Calls web_search for something
# Cycle 2: Calls open_url for some results
# Cycle 3: Calls web_search for some other aspect of the question
# Cycle 4: Calls open_url for some results
# Cycle 5: Maybe call open_url for some additional results or because last set failed
# Cycle 6: No more tools available, forced to answer
MAX_LLM_CYCLES = 6


def _build_context_file_citation_mapping(
    file_metadata: list[ContextFileMetadata],
    starting_citation_num: int = 1,
) -> CitationMapping:
    """Build citation mapping for context files.

    Converts context file metadata into SearchDoc objects that can be cited.
    Citation numbers start from the provided starting number.

    Args:
        file_metadata: List of context file metadata
        starting_citation_num: Starting citation number (default: 1)

    Returns:
        Dictionary mapping citation numbers to SearchDoc objects
    """
    citation_mapping: CitationMapping = {}

    for idx, file_meta in enumerate(file_metadata, start=starting_citation_num):
        search_doc = SearchDoc(
            document_id=file_meta.file_id,
            chunk_ind=0,
            semantic_identifier=file_meta.filename,
            link=None,
            blurb=file_meta.file_content,
            source_type=DocumentSource.FILE,
            boost=1,
            hidden=False,
            metadata={},
            score=0.0,
            match_highlights=[file_meta.file_content],
        )
        citation_mapping[idx] = search_doc

    return citation_mapping


def _build_project_message(
    context_files: ExtractedContextFiles | None,
    token_counter: Callable[[str], int] | None,
) -> list[ChatMessageSimple]:
    """Build messages for context-injected / tool-backed files.

    Returns up to two messages:
    1. The full-text files message (if file_texts is populated).
    2. A lightweight metadata message for files the LLM should access via the
       FileReaderTool (e.g. oversized files that don't fit in context).
    """
    if not context_files:
        return []

    messages: list[ChatMessageSimple] = []
    if context_files.file_texts:
        messages.append(
            _create_context_files_message(context_files, token_counter=None)
        )
    if context_files.file_metadata_for_tool and token_counter:
        messages.append(
            _create_file_tool_metadata_message(
                context_files.file_metadata_for_tool, token_counter
            )
        )
    return messages


def construct_message_history(
    system_prompt: ChatMessageSimple | None,
    custom_agent_prompt: ChatMessageSimple | None,
    simple_chat_history: list[ChatMessageSimple],
    reminder_message: ChatMessageSimple | None,
    context_files: ExtractedContextFiles | None,
    available_tokens: int,
    last_n_user_messages: int | None = None,
    token_counter: Callable[[str], int] | None = None,
    all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,
) -> list[ChatMessageSimple]:
    if last_n_user_messages is not None:
        if last_n_user_messages <= 0:
            raise ValueError(
                "filtering chat history by last N user messages must be a value greater than 0"
            )

    # Build the project / file-metadata messages up front so we can use their
    # actual token counts for the budget.
    project_messages = _build_project_message(context_files, token_counter)
    project_messages_tokens = sum(m.token_count for m in project_messages)

    history_token_budget = available_tokens
    history_token_budget -= system_prompt.token_count if system_prompt else 0
    history_token_budget -= (
        custom_agent_prompt.token_count if custom_agent_prompt else 0
    )
    history_token_budget -= project_messages_tokens
    history_token_budget -= reminder_message.token_count if reminder_message else 0

    if history_token_budget < 0:
        raise ValueError("Not enough tokens available to construct message history")

    if system_prompt:
        system_prompt.should_cache = True

    # If no history, build minimal context
    if not simple_chat_history:
        result = [system_prompt] if system_prompt else []
        if custom_agent_prompt:
            result.append(custom_agent_prompt)
        result.extend(project_messages)
        if reminder_message:
            result.append(reminder_message)
        return result

    # If last_n_user_messages is set, filter history to only include the last n user messages
    if last_n_user_messages is not None:
        # Find all user message indices
        user_msg_indices = [
            i
            for i, msg in enumerate(simple_chat_history)
            if msg.message_type == MessageType.USER
        ]

        if not user_msg_indices:
            raise ValueError("No user message found in simple_chat_history")

        # If we have more than n user messages, keep only the last n
        if len(user_msg_indices) > last_n_user_messages:
            # Find the index of the n-th user message from the end
            # For example, if last_n_user_messages=2, we want the 2nd-to-last user message
            nth_user_msg_index = user_msg_indices[-(last_n_user_messages)]
            # Keep everything from that user message onwards
            simple_chat_history = simple_chat_history[nth_user_msg_index:]

    # Find the last USER message in the history
    # The history may contain tool calls and responses after the last user message
    last_user_msg_index = None
    for i in range(len(simple_chat_history) - 1, -1, -1):
        if simple_chat_history[i].message_type == MessageType.USER:
            last_user_msg_index = i
            break

    if last_user_msg_index is None:
        raise ValueError("No user message found in simple_chat_history")

    # Split history into three parts:
    # 1. History before the last user message
    # 2. The last user message
    # 3. Messages after the last user message (tool calls, responses, etc.)
    history_before_last_user = simple_chat_history[:last_user_msg_index]
    last_user_message = simple_chat_history[last_user_msg_index]
    messages_after_last_user = simple_chat_history[last_user_msg_index + 1 :]

    # Calculate tokens needed for the last user message and everything after it
    last_user_tokens = last_user_message.token_count
    after_user_tokens = sum(msg.token_count for msg in messages_after_last_user)

    # Check if we can fit at least the last user message and messages after it
    required_tokens = last_user_tokens + after_user_tokens
    if required_tokens > history_token_budget:
        raise ValueError(
            f"Not enough tokens to include the last user message and subsequent messages. "
            f"Required: {required_tokens}, Available: {history_token_budget}"
        )

    # Calculate remaining budget for history before the last user message
    remaining_budget = history_token_budget - required_tokens

    # Truncate history_before_last_user from the top to fit in remaining budget.
    # Track dropped file messages so we can provide their metadata to the
    # FileReaderTool instead.
    truncated_history_before: list[ChatMessageSimple] = []
    dropped_file_ids: list[str] = []
    current_token_count = 0

    for msg in reversed(history_before_last_user):
        if current_token_count + msg.token_count <= remaining_budget:
            msg.should_cache = True
            truncated_history_before.insert(0, msg)
            current_token_count += msg.token_count
        else:
            # Can't fit this message, stop truncating.
            # This message and everything older is dropped.
            break

    # Collect file_ids from ALL dropped messages (those not in
    # truncated_history_before). The truncation loop above keeps the most
    # recent messages, so the dropped ones are at the start of the original
    # list up to (len(history) - len(kept)).
    num_kept = len(truncated_history_before)
    for msg in history_before_last_user[: len(history_before_last_user) - num_kept]:
        if msg.file_id is not None:
            dropped_file_ids.append(msg.file_id)

    # Also treat "orphaned" metadata entries as dropped -- these are files
    # from messages removed by summary truncation (before convert_chat_history
    # ran), so no ChatMessageSimple was ever tagged with their file_id.
    if all_injected_file_metadata:
        surviving_file_ids = {
            msg.file_id for msg in simple_chat_history if msg.file_id is not None
        }
        for fid in all_injected_file_metadata:
            if fid not in surviving_file_ids and fid not in dropped_file_ids:
                dropped_file_ids.append(fid)

    # Build a forgotten-files metadata message if any file messages were
    # dropped AND we have metadata for them (meaning the FileReaderTool is
    # available). Reserve tokens for this message in the budget.
    forgotten_files_message: ChatMessageSimple | None = None
    if dropped_file_ids and all_injected_file_metadata and token_counter:
        forgotten_meta = [
            all_injected_file_metadata[fid]
            for fid in dropped_file_ids
            if fid in all_injected_file_metadata
        ]
        if forgotten_meta:
            logger.debug(
                f"FileReader: building forgotten-files message for {[(m.file_id, m.filename) for m in forgotten_meta]}"
            )
            forgotten_files_message = _create_file_tool_metadata_message(
                forgotten_meta, token_counter
            )
            # Shrink the remaining budget. If the metadata message doesn't
            # fit we may need to drop more history messages.
            remaining_budget -= forgotten_files_message.token_count
            while truncated_history_before and current_token_count > remaining_budget:
                evicted = truncated_history_before.pop(0)
                current_token_count -= evicted.token_count
                # If the evicted message is itself a file, add it to the
                # forgotten metadata (it's now dropped too).
                if (
                    evicted.file_id is not None
                    and evicted.file_id in all_injected_file_metadata
                    and evicted.file_id not in {m.file_id for m in forgotten_meta}
                ):
                    forgotten_meta.append(all_injected_file_metadata[evicted.file_id])
                    # Rebuild the message with the new entry
                    forgotten_files_message = _create_file_tool_metadata_message(
                        forgotten_meta, token_counter
                    )

    # Attach project images to the last user message
    if context_files and context_files.image_files:
        existing_images = last_user_message.image_files or []
        last_user_message = ChatMessageSimple(
            message=last_user_message.message,
            token_count=last_user_message.token_count,
            message_type=last_user_message.message_type,
            image_files=existing_images + context_files.image_files,
        )

    # Build the final message list according to README ordering:
    # [system], [history_before_last_user], [custom_agent], [context_files],
    # [forgotten_files], [last_user_message], [messages_after_last_user], [reminder]
    result = [system_prompt] if system_prompt else []

    # 1. Add truncated history before last user message
    result.extend(truncated_history_before)

    # 2. Add custom agent prompt (inserted before last user message)
    if custom_agent_prompt:
        result.append(custom_agent_prompt)

    # 3. Add context files / file-metadata messages (inserted before last user message)
    result.extend(project_messages)

    # 4. Add forgotten-files metadata (right before the user's question)
    if forgotten_files_message:
        result.append(forgotten_files_message)

    # 5. Add last user message (with context images attached)
    result.append(last_user_message)

    # 6. Add messages after last user message (tool calls, responses, etc.)
    result.extend(messages_after_last_user)

    # 7. Add reminder message at the very end
    if reminder_message:
        result.append(reminder_message)

    return _drop_orphaned_tool_call_responses(result)


def _drop_orphaned_tool_call_responses(
    messages: list[ChatMessageSimple],
) -> list[ChatMessageSimple]:
    """Drop tool response messages whose tool_call_id is not in prior assistant tool calls.

    This can happen when history truncation drops an ASSISTANT tool-call message but
    leaves a later TOOL_CALL_RESPONSE message in context. Some providers (e.g. Ollama)
    reject such history with an "unexpected tool call id" error.
    """
    known_tool_call_ids: set[str] = set()
    sanitized: list[ChatMessageSimple] = []

    for msg in messages:
        if msg.message_type == MessageType.ASSISTANT and msg.tool_calls:
            for tool_call in msg.tool_calls:
                known_tool_call_ids.add(tool_call.tool_call_id)
            sanitized.append(msg)
            continue

        if msg.message_type == MessageType.TOOL_CALL_RESPONSE:
            if msg.tool_call_id and msg.tool_call_id in known_tool_call_ids:
                sanitized.append(msg)
            else:
                logger.debug(
                    "Dropping orphaned tool response with tool_call_id=%s while constructing message history",
                    msg.tool_call_id,
                )
            continue

        sanitized.append(msg)

    return sanitized


def _create_file_tool_metadata_message(
    file_metadata: list[FileToolMetadata],
    token_counter: Callable[[str], int],
) -> ChatMessageSimple:
    """Build a lightweight metadata-only message listing files available via FileReaderTool.

    Used when files are too large to fit in context and the vector DB is
    disabled, so the LLM must use ``read_file`` to inspect them.
    """
    lines = [
        "You have access to the following files. Use the read_file tool to "
        "read sections of any file. You MUST pass the file_id UUID (not the "
        "filename) to read_file:"
    ]
    for meta in file_metadata:
        lines.append(
            f'- file_id="{meta.file_id}" filename="{meta.filename}" (~{meta.approx_char_count:,} chars)'
        )

    message_content = "\n".join(lines)
    return ChatMessageSimple(
        message=message_content,
        token_count=token_counter(message_content),
        message_type=MessageType.USER,
    )


def _create_context_files_message(
    context_files: ExtractedContextFiles,
    token_counter: Callable[[str], int] | None,  # noqa: ARG001
) -> ChatMessageSimple:
    """Convert context files to a ChatMessageSimple message.

    Format follows the README specification for document representation.
    """
    import json

    # Format as documents JSON as described in README
    documents_list = []
    for idx, file_text in enumerate(context_files.file_texts, start=1):
        title = (
            context_files.file_metadata[idx - 1].filename
            if idx - 1 < len(context_files.file_metadata)
            else None
        )
        entry: dict[str, Any] = {"document": idx}
        if title:
            entry["title"] = title
        entry["contents"] = file_text
        documents_list.append(entry)

    documents_json = json.dumps({"documents": documents_list}, indent=2)
    message_content = f"Here are some documents provided for context, they may not all be relevant:\n{documents_json}"

    # Use pre-calculated token count from context_files
    return ChatMessageSimple(
        message=message_content,
        token_count=context_files.total_token_count,
        message_type=MessageType.USER,
    )


def run_llm_loop(
    emitter: Emitter,
    state_container: ChatStateContainer,
    simple_chat_history: list[ChatMessageSimple],
    tools: list[Tool],
    custom_agent_prompt: str | None,
    context_files: ExtractedContextFiles,
    persona: Persona | None,
    user_memory_context: UserMemoryContext | None,
    llm: LLM,
    token_counter: Callable[[str], int],
    db_session: Session,
    forced_tool_id: int | None = None,
    user_identity: LLMUserIdentity | None = None,
    chat_session_id: str | None = None,
    chat_files: list[ChatFile] | None = None,
    include_citations: bool = True,
    all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,
    inject_memories_in_prompt: bool = True,
) -> None:
    with trace(
        "run_llm_loop",
        group_id=chat_session_id,
        metadata={
            "tenant_id": get_current_tenant_id(),
            "chat_session_id": chat_session_id,
        },
    ):
        # Fix some LiteLLM issues,
        from onyx.llm.litellm_singleton.config import (
            initialize_litellm,
        )  # Here for lazy load LiteLLM

        initialize_litellm()

        # Track when the loop starts for calculating time-to-answer
        loop_start_time = time.monotonic()

        # Initialize citation processor for handling citations dynamically
        # When include_citations is True, use HYPERLINK mode to format citations as [[1]](url)
        # When include_citations is False, use REMOVE mode to strip citations from output
        citation_processor = DynamicCitationProcessor(
            citation_mode=(
                CitationMode.HYPERLINK if include_citations else CitationMode.REMOVE
            )
        )

        # Add project file citation mappings if project files are present
        project_citation_mapping: CitationMapping = {}
        if context_files.file_metadata:
            project_citation_mapping = _build_context_file_citation_mapping(
                context_files.file_metadata
            )
            citation_processor.update_citation_mapping(project_citation_mapping)

        llm_step_result = LlmStepResult(
            reasoning=None,
            answer=None,
            tool_calls=None,
            raw_answer=None,
        )

        # Pass the total budget to construct_message_history, which will handle token allocation
        available_tokens = llm.config.max_input_tokens
        tool_choice: ToolChoiceOptions = ToolChoiceOptions.AUTO
        # Initialize gathered_documents with project files if present
        gathered_documents: list[SearchDoc] | None = (
            list(project_citation_mapping.values())
            if project_citation_mapping
            else None
        )
        # TODO allow citing of images in Projects. Since attached to the last user message, it has no text associated with it.
        # One future workaround is to include the images as separate user messages with citation information and process those.
        always_cite_documents: bool = bool(
            context_files.use_as_search_filter or context_files.file_texts
        )
        should_cite_documents: bool = False
        ran_image_gen: bool = False
        just_ran_web_search: bool = False
        has_called_search_tool: bool = False
        code_interpreter_file_generated: bool = False
        fallback_extraction_attempted: bool = False
        citation_mapping: dict[int, str] = {}  # Maps citation_num -> document_id/URL

        # Fetch this in a short-lived session so the long-running stream loop does
        # not pin a connection just to keep read state alive.
        with get_session_with_current_tenant() as prompt_db_session:
            default_base_system_prompt: str = get_default_base_system_prompt(
                prompt_db_session
            )
        system_prompt = None
        custom_agent_prompt_msg = None

        reasoning_cycles = 0
        for llm_cycle_count in range(MAX_LLM_CYCLES):
            # Handling tool calls based on cycle count and past cycle conditions
            out_of_cycles = llm_cycle_count == MAX_LLM_CYCLES - 1
            if forced_tool_id:
                # Needs to be just the single one because the "required" currently doesn't have a specified tool, just a binary
                final_tools = [tool for tool in tools if tool.id == forced_tool_id]
                if not final_tools:
                    raise ValueError(f"Tool {forced_tool_id} not found in tools")
                tool_choice = ToolChoiceOptions.REQUIRED
                forced_tool_id = None
            elif out_of_cycles or ran_image_gen:
                # Last cycle, no tools allowed, just answer!
                tool_choice = ToolChoiceOptions.NONE
                final_tools = []
            else:
                tool_choice = ToolChoiceOptions.AUTO
                final_tools = tools

            # Handling the system prompt and custom agent prompt
            # The section below calculates the available tokens for history a bit more accurately
            # now that project files are loaded in.
            if persona and persona.replace_base_system_prompt:
                # Handles the case where user has checked off the "Replace base system prompt" checkbox
                system_prompt = (
                    ChatMessageSimple(
                        message=persona.system_prompt,
                        token_count=token_counter(persona.system_prompt),
                        message_type=MessageType.SYSTEM,
                    )
                    if persona.system_prompt
                    else None
                )
                custom_agent_prompt_msg = None
            else:
                # If it's an empty string, we assume the user does not want to include it as an empty System message
                if default_base_system_prompt:
                    prompt_memory_context = (
                        user_memory_context
                        if inject_memories_in_prompt
                        else (
                            user_memory_context.without_memories()
                            if user_memory_context
                            else None
                        )
                    )
                    system_prompt_str = build_system_prompt(
                        base_system_prompt=default_base_system_prompt,
                        datetime_aware=persona.datetime_aware if persona else True,
                        user_memory_context=prompt_memory_context,
                        tools=tools,
                        should_cite_documents=should_cite_documents
                        or always_cite_documents,
                    )
                    system_prompt = ChatMessageSimple(
                        message=system_prompt_str,
                        token_count=token_counter(system_prompt_str),
                        message_type=MessageType.SYSTEM,
                    )
                    custom_agent_prompt_msg = (
                        ChatMessageSimple(
                            message=custom_agent_prompt,
                            token_count=token_counter(custom_agent_prompt),
                            message_type=MessageType.USER,
                        )
                        if custom_agent_prompt
                        else None
                    )
                else:
                    # If there is a custom agent prompt, it replaces the system prompt when the default system prompt is empty
                    system_prompt = (
                        ChatMessageSimple(
                            message=custom_agent_prompt,
                            token_count=token_counter(custom_agent_prompt),
                            message_type=MessageType.SYSTEM,
                        )
                        if custom_agent_prompt
                        else None
                    )
                    custom_agent_prompt_msg = None

            reminder_message_text: str | None
            if ran_image_gen:
                # Some models are trained to give back images to the user for some similar tool
                # This is to prevent it generating things like:
                # [Cute Cat](attachment://a_cute_cat_sitting_playfully.png)
                reminder_message_text = IMAGE_GEN_REMINDER
            elif just_ran_web_search and not out_of_cycles:
                reminder_message_text = OPEN_URL_REMINDER
            else:
                # This is the default case, the LLM at this point may answer so it is important
                # to include the reminder. Potentially this should also mention citation
                reminder_message_text = build_reminder_message(
                    reminder_text=(
                        persona.task_prompt if persona and persona.task_prompt else None
                    ),
                    include_citation_reminder=should_cite_documents
                    or always_cite_documents,
                    include_file_reminder=code_interpreter_file_generated,
                    is_last_cycle=out_of_cycles,
                )

            reminder_msg = (
                ChatMessageSimple(
                    message=reminder_message_text,
                    token_count=token_counter(reminder_message_text),
                    message_type=MessageType.USER_REMINDER,
                )
                if reminder_message_text
                else None
            )

            truncated_message_history = construct_message_history(
                system_prompt=system_prompt,
                custom_agent_prompt=custom_agent_prompt_msg,
                simple_chat_history=simple_chat_history,
                reminder_message=reminder_msg,
                context_files=context_files,
                available_tokens=available_tokens,
                token_counter=token_counter,
                all_injected_file_metadata=all_injected_file_metadata,
            )

            # This calls the LLM, yields packets (reasoning, answers, etc.) and returns the result
            # It also pre-processes the tool calls in preparation for running them
            tool_defs = [tool.tool_definition() for tool in final_tools]

            # Calculate total processing time from loop start until now
            # This measures how long the user waits before the answer starts streaming
            pre_answer_processing_time = time.monotonic() - loop_start_time

            llm_step_result, has_reasoned = run_llm_step(
                emitter=emitter,
                history=truncated_message_history,
                tool_definitions=tool_defs,
                tool_choice=tool_choice,
                llm=llm,
                placement=Placement(turn_index=llm_cycle_count + reasoning_cycles),
                citation_processor=citation_processor,
                state_container=state_container,
                # The rich docs representation is passed in so that when yielding the answer, it can also
                # immediately yield the full set of found documents. This gives us the option to show the
                # final set of documents immediately if desired.
                final_documents=gathered_documents,
                user_identity=user_identity,
                pre_answer_processing_time=pre_answer_processing_time,
            )
            if has_reasoned:
                reasoning_cycles += 1

            # Fallback extraction for LLMs that don't support tool calling natively or are lower quality
            # and might incorrectly output tool calls in other channels
            llm_step_result, attempted = _try_fallback_tool_extraction(
                llm_step_result=llm_step_result,
                tool_choice=tool_choice,
                fallback_extraction_attempted=fallback_extraction_attempted,
                tool_defs=tool_defs,
                turn_index=llm_cycle_count + reasoning_cycles,
            )
            if attempted:
                # To prevent the case of excessive looping with bad models, we only allow one fallback attempt
                fallback_extraction_attempted = True

            # Save citation mapping after each LLM step for incremental state updates
            state_container.set_citation_mapping(citation_processor.citation_to_doc)

            # Run the LLM selected tools, there is some more logic here than a simple execution
            # each tool might have custom logic here
            tool_responses: list[ToolResponse] = []
            tool_calls = llm_step_result.tool_calls or []

            if INTEGRATION_TESTS_MODE and tool_calls:
                for tool_call in tool_calls:
                    emitter.emit(
                        Packet(
                            placement=tool_call.placement,
                            obj=ToolCallDebug(
                                tool_call_id=tool_call.tool_call_id,
                                tool_name=tool_call.tool_name,
                                tool_args=tool_call.tool_args,
                            ),
                        )
                    )

            if len(tool_calls) > 1:
                emitter.emit(
                    Packet(
                        placement=Placement(
                            turn_index=tool_calls[0].placement.turn_index
                        ),
                        obj=TopLevelBranching(num_parallel_branches=len(tool_calls)),
                    )
                )

            # Quick note for why citation_mapping and citation_processors are both needed:
            # 1. Tools return lightweight string mappings, not SearchDoc objects
            # 2. The SearchDoc resolution is deliberately deferred to llm_loop.py
            # 3. The citation_processor operates on SearchDoc objects and can't provide a complete reverse URL lookup for
            # in-flight citations
            # It can be cleaned up but not super trivial or worthwhile right now
            just_ran_web_search = False
            parallel_tool_call_results = run_tool_calls(
                tool_calls=tool_calls,
                tools=final_tools,
                message_history=truncated_message_history,
                user_memory_context=user_memory_context,
                user_info=None,  # TODO, this is part of memories right now, might want to separate it out
                citation_mapping=citation_mapping,
                next_citation_num=citation_processor.get_next_citation_number(),
                max_concurrent_tools=None,
                skip_search_query_expansion=has_called_search_tool,
                chat_files=chat_files,
                url_snippet_map=extract_url_snippet_map(gathered_documents or []),
                inject_memories_in_prompt=inject_memories_in_prompt,
            )
            tool_responses = parallel_tool_call_results.tool_responses
            citation_mapping = parallel_tool_call_results.updated_citation_mapping

            # Failure case, give something reasonable to the LLM to try again
            if tool_calls and not tool_responses:
                failure_messages = create_tool_call_failure_messages(
                    tool_calls, token_counter
                )
                simple_chat_history.extend(failure_messages)
                continue

            for tool_response in tool_responses:
                # Extract tool_call from the response (set by run_tool_calls)
                if tool_response.tool_call is None:
                    raise ValueError("Tool response missing tool_call reference")

                tool_call = tool_response.tool_call
                tab_index = tool_call.placement.tab_index

                # Track if search tool was called (for skipping query expansion on subsequent calls)
                if tool_call.tool_name == SearchTool.NAME:
                    has_called_search_tool = True

                # Track if code interpreter generated files with download links
                if (
                    tool_call.tool_name == PythonTool.NAME
                    and not code_interpreter_file_generated
                ):
                    try:
                        parsed = json.loads(tool_response.llm_facing_response)
                        if parsed.get("generated_files"):
                            code_interpreter_file_generated = True
                    except (json.JSONDecodeError, AttributeError):
                        pass

                # Build a mapping of tool names to tool objects for getting tool_id
                tools_by_name = {tool.name: tool for tool in final_tools}

                # Add the results to the chat history. Even though tools may run in parallel,
                # LLM APIs require linear history, so results are added sequentially.
                # Get the tool object to retrieve tool_id
                tool = tools_by_name.get(tool_call.tool_name)
                if not tool:
                    raise ValueError(
                        f"Tool '{tool_call.tool_name}' not found in tools list"
                    )

                # Extract search_docs if this is a search tool response
                search_docs = None
                displayed_docs = None
                if isinstance(tool_response.rich_response, SearchDocsResponse):
                    search_docs = tool_response.rich_response.search_docs
                    displayed_docs = tool_response.rich_response.displayed_docs

                    # Add ALL search docs to state container for DB persistence
                    if search_docs:
                        state_container.add_search_docs(search_docs)

                    if gathered_documents:
                        gathered_documents.extend(search_docs)
                    else:
                        gathered_documents = search_docs

                    # This is used for the Open URL reminder in the next cycle
                    # only do this if the web search tool yielded results
                    if search_docs and tool_call.tool_name == WebSearchTool.NAME:
                        just_ran_web_search = True

                # Extract generated_images if this is an image generation tool response
                generated_images = None
                if isinstance(
                    tool_response.rich_response, FinalImageGenerationResponse
                ):
                    generated_images = tool_response.rich_response.generated_images

                # Extract generated_files if this is a code interpreter response
                generated_files = None
                if isinstance(tool_response.rich_response, PythonToolRichResponse):
                    generated_files = (
                        tool_response.rich_response.generated_files or None
                    )

                # Persist memory if this is a memory tool response
                memory_snapshot: MemoryToolResponseSnapshot | None = None
                if isinstance(tool_response.rich_response, MemoryToolResponse):
                    persisted_memory_id: int | None = None
                    if user_memory_context and user_memory_context.user_id:
                        if tool_response.rich_response.index_to_replace is not None:
                            memory = update_memory_at_index(
                                user_id=user_memory_context.user_id,
                                index=tool_response.rich_response.index_to_replace,
                                new_text=tool_response.rich_response.memory_text,
                                db_session=db_session,
                            )
                            persisted_memory_id = memory.id if memory else None
                        else:
                            memory = add_memory(
                                user_id=user_memory_context.user_id,
                                memory_text=tool_response.rich_response.memory_text,
                                db_session=db_session,
                            )
                            persisted_memory_id = memory.id
                    operation: Literal["add", "update"] = (
                        "update"
                        if tool_response.rich_response.index_to_replace is not None
                        else "add"
                    )
                    memory_snapshot = MemoryToolResponseSnapshot(
                        memory_text=tool_response.rich_response.memory_text,
                        operation=operation,
                        memory_id=persisted_memory_id,
                        index=tool_response.rich_response.index_to_replace,
                    )

                if memory_snapshot:
                    saved_response = json.dumps(memory_snapshot.model_dump())
                elif isinstance(tool_response.rich_response, CustomToolCallSummary):
                    saved_response = json.dumps(
                        tool_response.rich_response.model_dump()
                    )
                elif isinstance(tool_response.rich_response, str):
                    saved_response = tool_response.rich_response
                else:
                    saved_response = tool_response.llm_facing_response

                tool_call_info = ToolCallInfo(
                    parent_tool_call_id=None,  # Top-level tool calls are attached to the chat message
                    turn_index=llm_cycle_count + reasoning_cycles,
                    tab_index=tab_index,
                    tool_name=tool_call.tool_name,
                    tool_call_id=tool_call.tool_call_id,
                    tool_id=tool.id,
                    reasoning_tokens=llm_step_result.reasoning,  # All tool calls from this loop share the same reasoning
                    tool_call_arguments=tool_call.tool_args,
                    tool_call_response=saved_response,
                    search_docs=displayed_docs or search_docs,
                    generated_images=generated_images,
                    generated_files=generated_files,
                )
                # Add to state container for partial save support
                state_container.add_tool_call(tool_call_info)

                # Update citation processor if this was a search tool
                update_citation_processor_from_tool_response(
                    tool_response, citation_processor
                )

            # After processing all tool responses for this turn, add messages to history
            # using OpenAI parallel tool calling format:
            # 1. ONE ASSISTANT message with tool_calls array
            # 2. N TOOL_CALL_RESPONSE messages (one per tool call)
            if tool_responses:
                # Filter to only responses with valid tool_call references
                valid_tool_responses = [
                    tr for tr in tool_responses if tr.tool_call is not None
                ]

                # Build ToolCallSimple list for all tool calls in this turn
                tool_calls_simple: list[ToolCallSimple] = []
                for tool_response in valid_tool_responses:
                    tc = tool_response.tool_call
                    assert (
                        tc is not None
                    )  # Already filtered above, this is just for typing purposes

                    tool_call_message = tc.to_msg_str()
                    tool_call_token_count = token_counter(tool_call_message)

                    tool_calls_simple.append(
                        ToolCallSimple(
                            tool_call_id=tc.tool_call_id,
                            tool_name=tc.tool_name,
                            tool_arguments=tc.tool_args,
                            token_count=tool_call_token_count,
                        )
                    )

                # Create ONE ASSISTANT message with all tool calls for this turn
                total_tool_call_tokens = sum(tc.token_count for tc in tool_calls_simple)
                assistant_with_tools = ChatMessageSimple(
                    message="",  # No text content when making tool calls
                    token_count=total_tool_call_tokens,
                    message_type=MessageType.ASSISTANT,
                    tool_calls=tool_calls_simple,
                    image_files=None,
                )
                simple_chat_history.append(assistant_with_tools)

                # Add TOOL_CALL_RESPONSE messages for each tool call
                for tool_response in valid_tool_responses:
                    tc = tool_response.tool_call
                    assert tc is not None  # Already filtered above

                    tool_response_message = tool_response.llm_facing_response
                    tool_response_token_count = token_counter(tool_response_message)

                    tool_response_msg = ChatMessageSimple(
                        message=tool_response_message,
                        token_count=tool_response_token_count,
                        message_type=MessageType.TOOL_CALL_RESPONSE,
                        tool_call_id=tc.tool_call_id,
                        image_files=None,
                    )
                    simple_chat_history.append(tool_response_msg)

            # If no tool calls, then it must have answered, wrap up
            if not llm_step_result.tool_calls or len(llm_step_result.tool_calls) == 0:
                break

            # Certain tools do not allow further actions, force the LLM wrap up on the next cycle
            if any(
                tool.tool_name in STOPPING_TOOLS_NAMES
                for tool in llm_step_result.tool_calls
            ):
                ran_image_gen = True

            if llm_step_result.tool_calls and any(
                tool.tool_name in CITEABLE_TOOLS_NAMES
                for tool in llm_step_result.tool_calls
            ):
                # As long as 1 tool with citeable documents is called at any point, we ask the LLM to try to cite
                should_cite_documents = True

        if not llm_step_result.answer and not llm_step_result.tool_calls:
            raise _build_empty_llm_response_error(
                llm=llm,
                llm_step_result=llm_step_result,
                tool_choice=tool_choice,
            )

        if not llm_step_result.answer:
            raise RuntimeError(
                "The LLM did not return a final answer after tool execution. "
                "Typically this indicates invalid tool-call output, a model/provider mismatch, "
                "or serving API misconfiguration."
            )

        emitter.emit(
            Packet(
                placement=Placement(turn_index=llm_cycle_count + reasoning_cycles),
                obj=OverallStop(type="stop"),
            )
        )


================================================
FILE: backend/onyx/chat/llm_step.py
================================================
import json
import re
import time
import uuid
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Mapping
from collections.abc import Sequence
from html import unescape
from typing import Any
from typing import cast

from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.chat.emitter import Emitter
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import LlmStepResult
from onyx.chat.tool_call_args_streaming import maybe_emit_argument_delta
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.configs.app_configs import PROMPT_CACHE_CHAT_HISTORY
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDoc
from onyx.file_store.models import ChatFileType
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMConfig
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.interfaces import ToolChoiceOptions
from onyx.llm.model_response import Delta
from onyx.llm.models import AssistantMessage
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.models import FunctionCall
from onyx.llm.models import ImageContentPart
from onyx.llm.models import ImageUrlDetail
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import SystemMessage
from onyx.llm.models import TextContentPart
from onyx.llm.models import ToolCall
from onyx.llm.models import ToolMessage
from onyx.llm.models import UserMessage
from onyx.llm.prompt_cache.processor import process_with_prompt_cache
from onyx.llm.utils import model_needs_formatting_reenabled
from onyx.prompts.chat_prompts import CODE_BLOCK_MARKDOWN
from onyx.prompts.constants import SYSTEM_REMINDER_TAG_CLOSE
from onyx.prompts.constants import SYSTEM_REMINDER_TAG_OPEN
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDelta
from onyx.server.query_and_chat.streaming_models import ReasoningDone
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from onyx.tools.models import ToolCallKickoff
from onyx.tracing.framework.create import generation_span
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.jsonriver import Parser
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_string
from onyx.utils.text_processing import find_all_json_objects

logger = setup_logger()

_XML_INVOKE_BLOCK_RE = re.compile(
    r"<invoke\b(?P<attrs>[^>]*)>(?P<body>.*?)</invoke>",
    re.IGNORECASE | re.DOTALL,
)
_XML_PARAMETER_RE = re.compile(
    r"<parameter\b(?P<attrs>[^>]*)>(?P<value>.*?)</parameter>",
    re.IGNORECASE | re.DOTALL,
)
_FUNCTION_CALLS_OPEN_MARKER = "<function_calls"
_FUNCTION_CALLS_CLOSE_MARKER = "</function_calls>"


class _XmlToolCallContentFilter:
    """Streaming filter that strips XML-style tool call payload blocks from text."""

    def __init__(self) -> None:
        self._pending = ""
        self._inside_function_calls_block = False

    def process(self, content: str) -> str:
        if not content:
            return ""

        self._pending += content
        output_parts: list[str] = []

        while self._pending:
            pending_lower = self._pending.lower()

            if self._inside_function_calls_block:
                end_idx = pending_lower.find(_FUNCTION_CALLS_CLOSE_MARKER)
                if end_idx == -1:
                    # Keep buffering until we see the close marker.
                    return "".join(output_parts)

                # Drop the whole function_calls block.
                self._pending = self._pending[
                    end_idx + len(_FUNCTION_CALLS_CLOSE_MARKER) :
                ]
                self._inside_function_calls_block = False
                continue

            start_idx = _find_function_calls_open_marker(pending_lower)
            if start_idx == -1:
                # Keep only a possible prefix of "<function_calls" in the buffer so
                # marker splits across chunks are handled correctly.
                tail_len = _matching_open_marker_prefix_len(self._pending)
                emit_upto = len(self._pending) - tail_len
                if emit_upto > 0:
                    output_parts.append(self._pending[:emit_upto])
                    self._pending = self._pending[emit_upto:]
                return "".join(output_parts)

            if start_idx > 0:
                output_parts.append(self._pending[:start_idx])

            # Enter block-stripping mode and keep scanning for close marker.
            self._pending = self._pending[start_idx:]
            self._inside_function_calls_block = True

        return "".join(output_parts)

    def flush(self) -> str:
        if self._inside_function_calls_block:
            # Drop any incomplete block at stream end.
            self._pending = ""
            self._inside_function_calls_block = False
            return ""

        remaining = self._pending
        self._pending = ""
        return remaining


def _matching_open_marker_prefix_len(text: str) -> int:
    """Return longest suffix of text that matches prefix of "<function_calls"."""
    max_len = min(len(text), len(_FUNCTION_CALLS_OPEN_MARKER) - 1)
    text_lower = text.lower()
    marker_lower = _FUNCTION_CALLS_OPEN_MARKER

    for candidate_len in range(max_len, 0, -1):
        if text_lower.endswith(marker_lower[:candidate_len]):
            return candidate_len

    return 0


def _is_valid_function_calls_open_follower(char: str | None) -> bool:
    return char is None or char in {">", " ", "\t", "\n", "\r"}


def _find_function_calls_open_marker(text_lower: str) -> int:
    """Find '<function_calls' with a valid tag boundary follower."""
    search_from = 0
    while True:
        idx = text_lower.find(_FUNCTION_CALLS_OPEN_MARKER, search_from)
        if idx == -1:
            return -1

        follower_pos = idx + len(_FUNCTION_CALLS_OPEN_MARKER)
        follower = text_lower[follower_pos] if follower_pos < len(text_lower) else None
        if _is_valid_function_calls_open_follower(follower):
            return idx

        search_from = idx + 1


def _try_parse_json_string(value: Any) -> Any:
    """Attempt to parse a JSON string value into its Python equivalent.

    If value is a string that looks like a JSON array or object, parse it.
    Otherwise return the value unchanged.

    This handles the case where the LLM returns arguments like:
    - queries: '["query1", "query2"]' instead of ["query1", "query2"]
    """
    if not isinstance(value, str):
        return value

    stripped = value.strip()
    # Only attempt to parse if it looks like a JSON array or object
    if not (
        (stripped.startswith("[") and stripped.endswith("]"))
        or (stripped.startswith("{") and stripped.endswith("}"))
    ):
        return value

    try:
        return json.loads(stripped)
    except json.JSONDecodeError:
        return value


def _parse_tool_args_to_dict(raw_args: Any) -> dict[str, Any]:
    """Parse tool arguments into a dict.

    Normal case:
    - raw_args == '{"queries":[...]}' -> dict via json.loads

    Defensive case (JSON string literal of an object):
    - raw_args == '"{\\"queries\\":[...]}"' -> json.loads -> str -> json.loads -> dict

    Also handles the case where argument values are JSON strings that need parsing:
    - {"queries": '["q1", "q2"]'} -> {"queries": ["q1", "q2"]}

    Anything else returns {}.
    """

    if raw_args is None:
        return {}

    if isinstance(raw_args, dict):
        # Parse any string values that look like JSON arrays/objects
        return {
            k: _try_parse_json_string(sanitize_string(v) if isinstance(v, str) else v)
            for k, v in raw_args.items()
        }

    if not isinstance(raw_args, str):
        return {}

    # Sanitize before parsing to remove NULL bytes and surrogates
    raw_args = sanitize_string(raw_args)

    try:
        parsed1: Any = json.loads(raw_args)
    except json.JSONDecodeError:
        return {}

    if isinstance(parsed1, dict):
        # Parse any string values that look like JSON arrays/objects
        return {k: _try_parse_json_string(v) for k, v in parsed1.items()}

    if isinstance(parsed1, str):
        try:
            parsed2: Any = json.loads(parsed1)
        except json.JSONDecodeError:
            return {}
        if isinstance(parsed2, dict):
            # Parse any string values that look like JSON arrays/objects
            return {k: _try_parse_json_string(v) for k, v in parsed2.items()}
        return {}

    return {}


def _format_message_history_for_logging(
    message_history: LanguageModelInput,
) -> str:
    """Format message history for logging, with special handling for tool calls.

    Tool calls are formatted as JSON with 4-space indentation for readability.
    """
    formatted_lines = []

    separator = "================================================"

    # Handle single ChatCompletionMessage - wrap in list for uniform processing
    if isinstance(
        message_history, (SystemMessage, UserMessage, AssistantMessage, ToolMessage)
    ):
        message_history = [message_history]

    # Handle sequence of messages
    for i, msg in enumerate(message_history):
        if isinstance(msg, SystemMessage):
            formatted_lines.append(f"Message {i + 1} [system]:")
            formatted_lines.append(separator)
            formatted_lines.append(f"{msg.content}")

        elif isinstance(msg, UserMessage):
            formatted_lines.append(f"Message {i + 1} [user]:")
            formatted_lines.append(separator)
            if isinstance(msg.content, str):
                formatted_lines.append(f"{msg.content}")
            elif isinstance(msg.content, list):
                # Handle multimodal content (text + images)
                for part in msg.content:
                    if isinstance(part, TextContentPart):
                        formatted_lines.append(f"{part.text}")
                    elif isinstance(part, ImageContentPart):
                        url = part.image_url.url
                        formatted_lines.append(f"[Image: {url[:50]}...]")

        elif isinstance(msg, AssistantMessage):
            formatted_lines.append(f"Message {i + 1} [assistant]:")
            formatted_lines.append(separator)
            if msg.content:
                formatted_lines.append(f"{msg.content}")

            if msg.tool_calls:
                formatted_lines.append("Tool calls:")
                for tool_call in msg.tool_calls:
                    tool_call_dict: dict[str, Any] = {
                        "id": tool_call.id,
                        "type": tool_call.type,
                        "function": {
                            "name": tool_call.function.name,
                            "arguments": tool_call.function.arguments,
                        },
                    }
                    tool_call_json = json.dumps(tool_call_dict, indent=4)
                    formatted_lines.append(tool_call_json)

        elif isinstance(msg, ToolMessage):
            formatted_lines.append(f"Message {i + 1} [tool]:")
            formatted_lines.append(separator)
            formatted_lines.append(f"Tool call ID: {msg.tool_call_id}")
            formatted_lines.append(f"Response: {msg.content}")

        else:
            # Fallback for unknown message types
            formatted_lines.append(f"Message {i + 1} [unknown]:")
            formatted_lines.append(separator)
            formatted_lines.append(f"{msg}")

        # Add separator before next message (or at end)
        if i < len(message_history) - 1:
            formatted_lines.append(separator)

    return "\n".join(formatted_lines)


def _update_tool_call_with_delta(
    tool_calls_in_progress: dict[int, dict[str, Any]],
    tool_call_delta: Any,
) -> None:
    index = tool_call_delta.index

    if index not in tool_calls_in_progress:
        tool_calls_in_progress[index] = {
            # Fallback ID in case the provider never sends one via deltas.
            "id": f"fallback_{uuid.uuid4().hex}",
            "name": None,
            "arguments": "",
        }

    if tool_call_delta.id:
        tool_calls_in_progress[index]["id"] = tool_call_delta.id

    if tool_call_delta.function:
        if tool_call_delta.function.name:
            tool_calls_in_progress[index]["name"] = tool_call_delta.function.name

        if tool_call_delta.function.arguments:
            tool_calls_in_progress[index][
                "arguments"
            ] += tool_call_delta.function.arguments


def _extract_tool_call_kickoffs(
    id_to_tool_call_map: dict[int, dict[str, Any]],
    turn_index: int,
    tab_index: int | None = None,
    sub_turn_index: int | None = None,
) -> list[ToolCallKickoff]:
    """Extract ToolCallKickoff objects from the tool call map.

    Returns a list of ToolCallKickoff objects for valid tool calls (those with both id and name).
    Each tool call is assigned the given turn_index and a tab_index based on its order.

    Args:
        id_to_tool_call_map: Map of tool call index to tool call data
        turn_index: The turn index for this set of tool calls
        tab_index: If provided, use this tab_index for all tool calls (otherwise auto-increment)
        sub_turn_index: The sub-turn index for nested tool calls
    """
    tool_calls: list[ToolCallKickoff] = []
    tab_index_calculated = 0
    for tool_call_data in id_to_tool_call_map.values():
        if tool_call_data.get("id") and tool_call_data.get("name"):
            tool_args = _parse_tool_args_to_dict(tool_call_data.get("arguments"))

            tool_calls.append(
                ToolCallKickoff(
                    tool_call_id=tool_call_data["id"],
                    tool_name=tool_call_data["name"],
                    tool_args=tool_args,
                    placement=Placement(
                        turn_index=turn_index,
                        tab_index=(
                            tab_index_calculated if tab_index is None else tab_index
                        ),
                        sub_turn_index=sub_turn_index,
                    ),
                )
            )
            tab_index_calculated += 1
    return tool_calls


def extract_tool_calls_from_response_text(
    response_text: str | None,
    tool_definitions: list[dict],
    placement: Placement,
) -> list[ToolCallKickoff]:
    """Extract tool calls from LLM response text by matching JSON against tool definitions.

    This is a fallback mechanism for when the LLM was expected to return tool calls
    but didn't use the proper tool call format. It searches for tool calls embedded
    in response text (JSON first, then XML-like invoke blocks) that match available
    tool definitions.

    Args:
        response_text: The LLM's text response to search for tool calls
        tool_definitions: List of tool definitions to match against
        placement: Placement information for the tool calls

    Returns:
        List of ToolCallKickoff objects for any matched tool calls
    """
    if not response_text or not tool_definitions:
        return []

    # Build a map of tool names to their definitions
    tool_name_to_def: dict[str, dict] = {}
    for tool_def in tool_definitions:
        if tool_def.get("type") == "function" and "function" in tool_def:
            func_def = tool_def["function"]
            tool_name = func_def.get("name")
            if tool_name:
                tool_name_to_def[tool_name] = func_def

    if not tool_name_to_def:
        return []

    matched_tool_calls: list[tuple[str, dict[str, Any]]] = []
    # Find all JSON objects in the response text
    json_objects = find_all_json_objects(response_text)
    prev_json_obj: dict[str, Any] | None = None
    prev_tool_call: tuple[str, dict[str, Any]] | None = None

    for json_obj in json_objects:
        matched_tool_call = _try_match_json_to_tool(json_obj, tool_name_to_def)
        if not matched_tool_call:
            continue

        # `find_all_json_objects` can return both an outer tool-call object and
        # its nested arguments object. If both resolve to the same tool call,
        # drop only this nested duplicate artifact.
        if (
            prev_json_obj is not None
            and prev_tool_call is not None
            and matched_tool_call == prev_tool_call
            and _is_nested_arguments_duplicate(
                previous_json_obj=prev_json_obj,
                current_json_obj=json_obj,
                tool_name_to_def=tool_name_to_def,
            )
        ):
            continue

        matched_tool_calls.append(matched_tool_call)
        prev_json_obj = json_obj
        prev_tool_call = matched_tool_call

    # Some providers/models emit XML-style function calls instead of JSON objects.
    # Keep this as a fallback behind JSON extraction to preserve current behavior.
    if not matched_tool_calls:
        matched_tool_calls = _extract_xml_tool_calls_from_response_text(
            response_text=response_text,
            tool_name_to_def=tool_name_to_def,
        )

    tool_calls: list[ToolCallKickoff] = []
    for tab_index, (tool_name, tool_args) in enumerate(matched_tool_calls):
        tool_calls.append(
            ToolCallKickoff(
                tool_call_id=f"extracted_{uuid.uuid4().hex[:8]}",
                tool_name=tool_name,
                tool_args=tool_args,
                placement=Placement(
                    turn_index=placement.turn_index,
                    tab_index=tab_index,
                    sub_turn_index=placement.sub_turn_index,
                ),
            )
        )

    logger.info(
        f"Extracted {len(tool_calls)} tool call(s) from response text as fallback"
    )

    return tool_calls


def _extract_xml_tool_calls_from_response_text(
    response_text: str,
    tool_name_to_def: dict[str, dict],
) -> list[tuple[str, dict[str, Any]]]:
    """Extract XML-style tool calls from response text.

    Supports formats such as:
    <function_calls>
      <invoke name="internal_search">
        <parameter name="queries" string="false">["foo"]</parameter>
      </invoke>
    </function_calls>
    """
    matched_tool_calls: list[tuple[str, dict[str, Any]]] = []

    for invoke_match in _XML_INVOKE_BLOCK_RE.finditer(response_text):
        invoke_attrs = invoke_match.group("attrs")
        tool_name = _extract_xml_attribute(invoke_attrs, "name")
        if not tool_name or tool_name not in tool_name_to_def:
            continue

        tool_args: dict[str, Any] = {}
        invoke_body = invoke_match.group("body")
        for parameter_match in _XML_PARAMETER_RE.finditer(invoke_body):
            parameter_attrs = parameter_match.group("attrs")
            parameter_name = _extract_xml_attribute(parameter_attrs, "name")
            if not parameter_name:
                continue

            string_attr = _extract_xml_attribute(parameter_attrs, "string")
            tool_args[parameter_name] = _parse_xml_parameter_value(
                raw_value=parameter_match.group("value"),
                string_attr=string_attr,
            )

        matched_tool_calls.append((tool_name, tool_args))

    return matched_tool_calls


def _extract_xml_attribute(attrs: str, attr_name: str) -> str | None:
    """Extract a single XML-style attribute value from a tag attribute string."""
    attr_match = re.search(
        rf"""\b{re.escape(attr_name)}\s*=\s*(['"])(.*?)\1""",
        attrs,
        flags=re.IGNORECASE | re.DOTALL,
    )
    if not attr_match:
        return None
    return sanitize_string(unescape(attr_match.group(2).strip()))


def _parse_xml_parameter_value(raw_value: str, string_attr: str | None) -> Any:
    """Parse a parameter value from XML-style tool call payloads."""
    value = sanitize_string(unescape(raw_value).strip())

    if string_attr and string_attr.lower() == "true":
        return value

    try:
        return json.loads(value)
    except json.JSONDecodeError:
        return value


def _resolve_tool_arguments(obj: dict[str, Any]) -> dict[str, Any] | None:
    """Extract and parse an arguments/parameters value from a tool-call-like object.

    Looks for "arguments" or "parameters" keys, handles JSON-string values,
    and returns a dict if successful, or None otherwise.
    """
    arguments = obj.get("arguments", obj.get("parameters", {}))
    if isinstance(arguments, str):
        arguments = sanitize_string(arguments)
        try:
            arguments = json.loads(arguments)
        except json.JSONDecodeError:
            arguments = {}
    if isinstance(arguments, dict):
        return arguments
    return None


def _try_match_json_to_tool(
    json_obj: dict[str, Any],
    tool_name_to_def: dict[str, dict],
) -> tuple[str, dict[str, Any]] | None:
    """Try to match a JSON object to a tool definition.

    Supports several formats:
    1. Direct tool call format: {"name": "tool_name", "arguments": {...}}
    2. Function call format: {"function": {"name": "tool_name", "arguments": {...}}}
    3. Tool name as key: {"tool_name": {...arguments...}}
    4. Arguments matching a tool's parameter schema

    Args:
        json_obj: The JSON object to match
        tool_name_to_def: Map of tool names to their function definitions

    Returns:
        Tuple of (tool_name, tool_args) if matched, None otherwise
    """
    # Format 1: Direct tool call format {"name": "...", "arguments": {...}}
    if "name" in json_obj and json_obj["name"] in tool_name_to_def:
        tool_name = json_obj["name"]
        arguments = _resolve_tool_arguments(json_obj)
        if arguments is not None:
            return (tool_name, arguments)

    # Format 2: Function call format {"function": {"name": "...", "arguments": {...}}}
    if "function" in json_obj and isinstance(json_obj["function"], dict):
        func_obj = json_obj["function"]
        if "name" in func_obj and func_obj["name"] in tool_name_to_def:
            tool_name = func_obj["name"]
            arguments = _resolve_tool_arguments(func_obj)
            if arguments is not None:
                return (tool_name, arguments)

    # Format 3: Tool name as key {"tool_name": {...arguments...}}
    for tool_name in tool_name_to_def:
        if tool_name in json_obj:
            arguments = json_obj[tool_name]
            if isinstance(arguments, dict):
                return (tool_name, arguments)

    # Format 4: Check if the JSON object matches a tool's parameter schema
    for tool_name, func_def in tool_name_to_def.items():
        params = func_def.get("parameters", {})
        properties = params.get("properties", {})
        required = params.get("required", [])

        if not properties:
            continue

        # Check if all required parameters are present (empty required = all optional)
        if all(req in json_obj for req in required):
            # Check if any of the tool's properties are in the JSON object
            matching_props = [prop for prop in properties if prop in json_obj]
            if matching_props:
                # Filter to only include known properties
                filtered_args = {k: v for k, v in json_obj.items() if k in properties}
                return (tool_name, filtered_args)

    return None


def _is_nested_arguments_duplicate(
    previous_json_obj: dict[str, Any],
    current_json_obj: dict[str, Any],
    tool_name_to_def: dict[str, dict],
) -> bool:
    """Detect when current object is the nested args object from previous tool call."""
    extracted_args = _extract_nested_arguments_obj(previous_json_obj, tool_name_to_def)
    return extracted_args is not None and current_json_obj == extracted_args


def _extract_nested_arguments_obj(
    json_obj: dict[str, Any],
    tool_name_to_def: dict[str, dict],
) -> dict[str, Any] | None:
    # Format 1: {"name": "...", "arguments": {...}} or {"name": "...", "parameters": {...}}
    if "name" in json_obj and json_obj["name"] in tool_name_to_def:
        args_obj = json_obj.get("arguments", json_obj.get("parameters"))
        if isinstance(args_obj, dict):
            return args_obj

    # Format 2: {"function": {"name": "...", "arguments": {...}}}
    if "function" in json_obj and isinstance(json_obj["function"], dict):
        function_obj = json_obj["function"]
        if "name" in function_obj and function_obj["name"] in tool_name_to_def:
            args_obj = function_obj.get("arguments", function_obj.get("parameters"))
            if isinstance(args_obj, dict):
                return args_obj

    # Format 3: {"tool_name": {...arguments...}}
    for tool_name in tool_name_to_def:
        if tool_name in json_obj and isinstance(json_obj[tool_name], dict):
            return json_obj[tool_name]

    return None


def _build_structured_assistant_message(msg: ChatMessageSimple) -> AssistantMessage:
    tool_calls_list: list[ToolCall] | None = None
    if msg.tool_calls:
        tool_calls_list = [
            ToolCall(
                id=tc.tool_call_id,
                type="function",
                function=FunctionCall(
                    name=tc.tool_name,
                    arguments=json.dumps(tc.tool_arguments),
                ),
            )
            for tc in msg.tool_calls
        ]

    return AssistantMessage(
        role="assistant",
        content=msg.message or None,
        tool_calls=tool_calls_list,
    )


def _build_structured_tool_response_message(msg: ChatMessageSimple) -> ToolMessage:
    if not msg.tool_call_id:
        raise ValueError(
            f"Tool call response message encountered but tool_call_id is not available. Message: {msg}"
        )

    return ToolMessage(
        role="tool",
        content=msg.message,
        tool_call_id=msg.tool_call_id,
    )


class _HistoryMessageFormatter:
    def format_assistant_message(self, msg: ChatMessageSimple) -> AssistantMessage:
        raise NotImplementedError

    def format_tool_response_message(
        self, msg: ChatMessageSimple
    ) -> ToolMessage | UserMessage:
        raise NotImplementedError


class _DefaultHistoryMessageFormatter(_HistoryMessageFormatter):
    def format_assistant_message(self, msg: ChatMessageSimple) -> AssistantMessage:
        return _build_structured_assistant_message(msg)

    def format_tool_response_message(self, msg: ChatMessageSimple) -> ToolMessage:
        return _build_structured_tool_response_message(msg)


class _OllamaHistoryMessageFormatter(_HistoryMessageFormatter):
    def format_assistant_message(self, msg: ChatMessageSimple) -> AssistantMessage:
        if not msg.tool_calls:
            return _build_structured_assistant_message(msg)

        tool_call_lines = [
            (
                f"[Tool Call] name={tc.tool_name} id={tc.tool_call_id} args={json.dumps(tc.tool_arguments)}"
            )
            for tc in msg.tool_calls
        ]
        assistant_content = (
            "\n".join([msg.message, *tool_call_lines])
            if msg.message
            else "\n".join(tool_call_lines)
        )
        return AssistantMessage(
            role="assistant",
            content=assistant_content,
            tool_calls=None,
        )

    def format_tool_response_message(self, msg: ChatMessageSimple) -> UserMessage:
        if not msg.tool_call_id:
            raise ValueError(
                f"Tool call response message encountered but tool_call_id is not available. Message: {msg}"
            )

        return UserMessage(
            role="user",
            content=f"[Tool Result] id={msg.tool_call_id}\n{msg.message}",
        )


_DEFAULT_HISTORY_MESSAGE_FORMATTER = _DefaultHistoryMessageFormatter()
_OLLAMA_HISTORY_MESSAGE_FORMATTER = _OllamaHistoryMessageFormatter()


def _get_history_message_formatter(llm_config: LLMConfig) -> _HistoryMessageFormatter:
    if llm_config.model_provider == LlmProviderNames.OLLAMA_CHAT:
        return _OLLAMA_HISTORY_MESSAGE_FORMATTER

    return _DEFAULT_HISTORY_MESSAGE_FORMATTER


def translate_history_to_llm_format(
    history: list[ChatMessageSimple],
    llm_config: LLMConfig,
) -> LanguageModelInput:
    """Convert a list of ChatMessageSimple to LanguageModelInput format.

    Converts ChatMessageSimple messages to ChatCompletionMessage format,
    handling different message types and image files for multimodal support.
    """
    messages: list[ChatCompletionMessage] = []
    history_message_formatter = _get_history_message_formatter(llm_config)
    # Note: cacheability is computed from pre-translation ChatMessageSimple types.
    # Some providers flatten tool history into plain assistant/user text, so this split
    # may be less semantically meaningful, but it remains safe and order-preserving.
    last_cacheable_msg_idx = -1
    all_previous_msgs_cacheable = True

    for idx, msg in enumerate(history):
        # if the message is being added to the history
        if PROMPT_CACHE_CHAT_HISTORY and msg.message_type in [
            MessageType.SYSTEM,
            MessageType.USER,
            MessageType.USER_REMINDER,
            MessageType.ASSISTANT,
            MessageType.TOOL_CALL_RESPONSE,
        ]:
            all_previous_msgs_cacheable = (
                all_previous_msgs_cacheable and msg.should_cache
            )
            if all_previous_msgs_cacheable:
                last_cacheable_msg_idx = idx

        if msg.message_type == MessageType.SYSTEM:
            system_msg = SystemMessage(
                role="system",
                content=msg.message,
            )
            messages.append(system_msg)

        elif msg.message_type == MessageType.USER:
            # Handle user messages with potential images
            if msg.image_files:
                # Build content parts: text + images
                content_parts: list[TextContentPart | ImageContentPart] = [
                    TextContentPart(
                        type="text",
                        text=msg.message,
                    )
                ]

                # Add image parts
                for img_file in msg.image_files:
                    if img_file.file_type == ChatFileType.IMAGE:
                        try:
                            image_type = get_image_type_from_bytes(img_file.content)
                            base64_data = img_file.to_base64()
                            image_url = f"data:{image_type};base64,{base64_data}"

                            image_part = ImageContentPart(
                                type="image_url",
                                image_url=ImageUrlDetail(
                                    url=image_url,
                                    detail=None,
                                ),
                            )
                            content_parts.append(image_part)
                        except Exception as e:
                            logger.warning(
                                f"Failed to process image file {img_file.file_id}: {e}. Skipping image."
                            )
                user_msg = UserMessage(
                    role="user",
                    content=content_parts,
                )
                messages.append(user_msg)
            else:
                # Simple text-only user message
                user_msg_text = UserMessage(
                    role="user",
                    content=msg.message,
                )
                messages.append(user_msg_text)

        elif msg.message_type == MessageType.USER_REMINDER:
            # User reminder messages are wrapped with system-reminder tags
            # and converted to UserMessage (LLM APIs don't have a native reminder type)
            wrapped_content = f"{SYSTEM_REMINDER_TAG_OPEN}\n{msg.message}\n{SYSTEM_REMINDER_TAG_CLOSE}"
            reminder_msg = UserMessage(
                role="user",
                content=wrapped_content,
            )
            messages.append(reminder_msg)

        elif msg.message_type == MessageType.ASSISTANT:
            messages.append(history_message_formatter.format_assistant_message(msg))

        elif msg.message_type == MessageType.TOOL_CALL_RESPONSE:
            messages.append(history_message_formatter.format_tool_response_message(msg))

        else:
            logger.warning(
                f"Unknown message type {msg.message_type} in history. Skipping message."
            )

    # Apply model-specific formatting when translating to LLM format (e.g. OpenAI
    # reasoning models need CODE_BLOCK_MARKDOWN prefix for correct markdown generation)
    if model_needs_formatting_reenabled(llm_config.model_name):
        for i, m in enumerate(messages):
            if isinstance(m, SystemMessage):
                messages[i] = SystemMessage(
                    role="system",
                    content=CODE_BLOCK_MARKDOWN + m.content,
                )
                break

    # prompt caching: rely on should_cache in ChatMessageSimple to
    # pick the split point for the cacheable prefix and suffix
    if last_cacheable_msg_idx != -1:
        processed_messages, _ = process_with_prompt_cache(
            llm_config=llm_config,
            cacheable_prefix=messages[: last_cacheable_msg_idx + 1],
            suffix=messages[last_cacheable_msg_idx + 1 :],
            continuation=False,
        )
        assert isinstance(processed_messages, list)  # for mypy
        messages = processed_messages

    return messages


def _increment_turns(
    turn_index: int, sub_turn_index: int | None
) -> tuple[int, int | None]:
    if sub_turn_index is None:
        return turn_index + 1, None
    else:
        return turn_index, sub_turn_index + 1


def _delta_has_action(delta: Delta) -> bool:
    return bool(delta.content or delta.reasoning_content or delta.tool_calls)


def run_llm_step_pkt_generator(
    history: list[ChatMessageSimple],
    tool_definitions: list[dict],
    tool_choice: ToolChoiceOptions,
    llm: LLM,
    placement: Placement,
    state_container: ChatStateContainer | None,
    citation_processor: DynamicCitationProcessor | None,
    reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,
    final_documents: list[SearchDoc] | None = None,
    user_identity: LLMUserIdentity | None = None,
    custom_token_processor: (
        Callable[[Delta | None, Any], tuple[Delta | None, Any]] | None
    ) = None,
    max_tokens: int | None = None,
    # TODO: Temporary handling of nested tool calls with agents, figure out a better way to handle this
    use_existing_tab_index: bool = False,
    is_deep_research: bool = False,
    pre_answer_processing_time: float | None = None,
    timeout_override: int | None = None,
) -> Generator[Packet, None, tuple[LlmStepResult, bool]]:
    """Run an LLM step and stream the response as packets.
    NOTE: DO NOT TOUCH THIS FUNCTION BEFORE ASKING YUHONG, this is very finicky and
    delicate logic that is core to the app's main functionality.

    This generator function streams LLM responses, processing reasoning content,
    answer content, tool calls, and citations. It yields Packet objects for
    real-time streaming to clients and accumulates the final result.

    Args:
        history: List of chat messages in the conversation history.
        tool_definitions: List of tool definitions available to the LLM.
        tool_choice: Tool choice configuration (e.g., "auto", "required", "none").
        llm: Language model interface to use for generation.
        placement: Placement info (turn_index, tab_index, sub_turn_index) for
            positioning packets in the conversation UI.
        state_container: Container for storing chat state (reasoning, answers).
        citation_processor: Optional processor for extracting and formatting citations
            from the response. If provided, processes tokens to identify citations.
        reasoning_effort: Optional reasoning effort configuration for models that
            support reasoning (e.g., o1 models).
        final_documents: Optional list of search documents to include in the response
            start packet.
        user_identity: Optional user identity information for the LLM.
        custom_token_processor: Optional callable that processes each token delta
            before yielding. Receives (delta, processor_state) and returns
            (modified_delta, new_processor_state). Can return None for delta to skip.
        max_tokens: Optional maximum number of tokens for the LLM response.
        use_existing_tab_index: If True, use the tab_index from placement for all
            tool calls instead of auto-incrementing.
        is_deep_research: If True, treat content before tool calls as reasoning
            when tool_choice is REQUIRED.
        pre_answer_processing_time: Optional time spent processing before the
            answer started, recorded in state_container for analytics.
        timeout_override: Optional timeout override for the LLM call.

    Yields:
        Packet: Streaming packets containing:
            - ReasoningStart/ReasoningDelta/ReasoningDone for reasoning content
            - AgentResponseStart/AgentResponseDelta for answer content
            - CitationInfo for extracted citations
            - ToolCallKickoff for tool calls (extracted at the end)

    Returns:
        tuple[LlmStepResult, bool]: A tuple containing:
            - LlmStepResult: The final result with accumulated reasoning, answer,
              and tool calls (if any).
            - bool: Whether reasoning occurred during this step. This should be used to
              increment the turn index or sub_turn index for the rest of the LLM loop.

    Note:
        The function handles incremental state updates, saving reasoning and answer
        tokens to the state container as they are generated. Tool calls are extracted
        and yielded only after the stream completes.
    """

    turn_index = placement.turn_index
    tab_index = placement.tab_index
    sub_turn_index = placement.sub_turn_index

    def _current_placement() -> Placement:
        return Placement(
            turn_index=turn_index,
            tab_index=tab_index,
            sub_turn_index=sub_turn_index,
        )

    llm_msg_history = translate_history_to_llm_format(history, llm.config)
    has_reasoned = False

    if LOG_ONYX_MODEL_INTERACTIONS:
        logger.debug(
            f"Message history:\n{_format_message_history_for_logging(llm_msg_history)}"
        )

    id_to_tool_call_map: dict[int, dict[str, Any]] = {}
    arg_parsers: dict[int, Parser] = {}
    reasoning_start = False
    answer_start = False
    accumulated_reasoning = ""
    accumulated_answer = ""
    accumulated_raw_answer = ""
    stream_chunk_count = 0
    actionable_chunk_count = 0
    empty_chunk_count = 0
    finish_reasons: set[str] = set()
    xml_tool_call_content_filter = _XmlToolCallContentFilter()

    processor_state: Any = None

    with generation_span(
        model=llm.config.model_name,
        model_config={
            "base_url": str(llm.config.api_base or ""),
            "model_impl": "litellm",
        },
    ) as span_generation:
        span_generation.span_data.input = cast(
            Sequence[Mapping[str, Any]], llm_msg_history
        )
        stream_start_time = time.monotonic()
        first_action_recorded = False

        def _emit_citation_results(
            results: Generator[str | CitationInfo, None, None],
        ) -> Generator[Packet, None, None]:
            """Yield packets for citation processor results (str or CitationInfo)."""
            nonlocal accumulated_answer

            for result in results:
                if isinstance(result, str):
                    accumulated_answer += result
                    if state_container:
                        state_container.set_answer_tokens(accumulated_answer)
                    yield Packet(
                        placement=_current_placement(),
                        obj=AgentResponseDelta(content=result),
                    )
                elif isinstance(result, CitationInfo):
                    yield Packet(
                        placement=_current_placement(),
                        obj=result,
                    )
                    if state_container:
                        state_container.add_emitted_citation(result.citation_number)

        def _close_reasoning_if_active() -> Generator[Packet, None, None]:
            """Emit ReasoningDone and increment turns if reasoning is in progress."""
            nonlocal reasoning_start
            nonlocal has_reasoned
            nonlocal turn_index
            nonlocal sub_turn_index

            if reasoning_start:
                yield Packet(
                    placement=Placement(
                        turn_index=turn_index,
                        tab_index=tab_index,
                        sub_turn_index=sub_turn_index,
                    ),
                    obj=ReasoningDone(),
                )
                has_reasoned = True
                turn_index, sub_turn_index = _increment_turns(
                    turn_index, sub_turn_index
                )
                reasoning_start = False

        def _emit_content_chunk(content_chunk: str) -> Generator[Packet, None, None]:
            nonlocal accumulated_answer
            nonlocal accumulated_reasoning
            nonlocal answer_start
            nonlocal reasoning_start
            nonlocal turn_index
            nonlocal sub_turn_index

            # When tool_choice is REQUIRED, content before tool calls is reasoning/thinking
            # about which tool to call, not an actual answer to the user.
            # Treat this content as reasoning instead of answer.
            if is_deep_research and tool_choice == ToolChoiceOptions.REQUIRED:
                accumulated_reasoning += content_chunk
                if state_container:
                    state_container.set_reasoning_tokens(accumulated_reasoning)
                if not reasoning_start:
                    yield Packet(
                        placement=_current_placement(),
                        obj=ReasoningStart(),
                    )
                yield Packet(
                    placement=_current_placement(),
                    obj=ReasoningDelta(reasoning=content_chunk),
                )
                reasoning_start = True
                return

            # Normal flow for AUTO or NONE tool choice
            yield from _close_reasoning_if_active()

            if not answer_start:
                # Store pre-answer processing time in state container for save_chat
                if state_container and pre_answer_processing_time is not None:
                    state_container.set_pre_answer_processing_time(
                        pre_answer_processing_time
                    )

                yield Packet(
                    placement=_current_placement(),
                    obj=AgentResponseStart(
                        final_documents=final_documents,
                        pre_answer_processing_seconds=pre_answer_processing_time,
                    ),
                )
                answer_start = True

            if citation_processor:
                yield from _emit_citation_results(
                    citation_processor.process_token(content_chunk)
                )
            else:
                accumulated_answer += content_chunk
                # Save answer incrementally to state container
                if state_container:
                    state_container.set_answer_tokens(accumulated_answer)
                yield Packet(
                    placement=_current_placement(),
                    obj=AgentResponseDelta(content=content_chunk),
                )

        for packet in llm.stream(
            prompt=llm_msg_history,
            tools=tool_definitions,
            tool_choice=tool_choice,
            structured_response_format=None,  # TODO
            max_tokens=max_tokens,
            reasoning_effort=reasoning_effort,
            user_identity=user_identity,
            timeout_override=timeout_override,
        ):
            stream_chunk_count += 1
            if packet.usage:
                usage = packet.usage
                span_generation.span_data.usage = {
                    "input_tokens": usage.prompt_tokens,
                    "output_tokens": usage.completion_tokens,
                    "cache_read_input_tokens": usage.cache_read_input_tokens,
                    "cache_creation_input_tokens": usage.cache_creation_input_tokens,
                }
                # Note: LLM cost tracking is now handled in multi_llm.py
            finish_reason = packet.choice.finish_reason
            if finish_reason:
                finish_reasons.add(str(finish_reason))
            delta = packet.choice.delta

            # Weird behavior from some model providers, just log and ignore for now
            if (
                not delta.content
                and delta.reasoning_content is None
                and not delta.tool_calls
            ):
                empty_chunk_count += 1
                logger.warning(
                    "LLM packet is empty (no content, reasoning, or tool calls). "
                    f"finish_reason={finish_reason}. Skipping: {packet}"
                )
                continue

            if not first_action_recorded and _delta_has_action(delta):
                span_generation.span_data.time_to_first_action_seconds = (
                    time.monotonic() - stream_start_time
                )
                first_action_recorded = True
            if _delta_has_action(delta):
                actionable_chunk_count += 1

            if custom_token_processor:
                # The custom token processor can modify the deltas for specific custom logic
                # It can also return a state so that it can handle aggregated delta logic etc.
                # Loosely typed so the function can be flexible
                modified_delta, processor_state = custom_token_processor(
                    delta, processor_state
                )
                if modified_delta is None:
                    continue
                delta = modified_delta

            # Should only happen once, frontend does not expect multiple
            # ReasoningStart or ReasoningDone packets.
            if delta.reasoning_content:
                accumulated_reasoning += delta.reasoning_content
                # Save reasoning incrementally to state container
                if state_container:
                    state_container.set_reasoning_tokens(accumulated_reasoning)
                if not reasoning_start:
                    yield Packet(
                        placement=_current_placement(),
                        obj=ReasoningStart(),
                    )
                yield Packet(
                    placement=_current_placement(),
                    obj=ReasoningDelta(reasoning=delta.reasoning_content),
                )
                reasoning_start = True

            if delta.content:
                # Keep raw content for fallback extraction. Display content can be
                # filtered and, in deep-research REQUIRED mode, routed as reasoning.
                accumulated_raw_answer += delta.content
                filtered_content = xml_tool_call_content_filter.process(delta.content)
                if filtered_content:
                    yield from _emit_content_chunk(filtered_content)

            if delta.tool_calls:
                yield from _close_reasoning_if_active()

                for tool_call_delta in delta.tool_calls:
                    # maybe_emit depends and update being called first and attaching the delta
                    _update_tool_call_with_delta(id_to_tool_call_map, tool_call_delta)
                    yield from maybe_emit_argument_delta(
                        tool_calls_in_progress=id_to_tool_call_map,
                        tool_call_delta=tool_call_delta,
                        placement=_current_placement(),
                        parsers=arg_parsers,
                    )

        # Flush any tail text buffered while checking for split "<function_calls" markers.
        filtered_content_tail = xml_tool_call_content_filter.flush()
        if filtered_content_tail:
            yield from _emit_content_chunk(filtered_content_tail)

        # Flush custom token processor to get any final tool calls
        if custom_token_processor:
            flush_delta, processor_state = custom_token_processor(None, processor_state)
            if (
                not first_action_recorded
                and flush_delta is not None
                and _delta_has_action(flush_delta)
            ):
                span_generation.span_data.time_to_first_action_seconds = (
                    time.monotonic() - stream_start_time
                )
                first_action_recorded = True
            if flush_delta and flush_delta.tool_calls:
                for tool_call_delta in flush_delta.tool_calls:
                    _update_tool_call_with_delta(id_to_tool_call_map, tool_call_delta)

        tool_calls = _extract_tool_call_kickoffs(
            id_to_tool_call_map=id_to_tool_call_map,
            turn_index=turn_index,
            tab_index=tab_index if use_existing_tab_index else None,
            sub_turn_index=sub_turn_index,
        )
        if tool_calls:
            tool_calls_list: list[ToolCall] = [
                ToolCall(
                    id=kickoff.tool_call_id,
                    type="function",
                    function=FunctionCall(
                        name=kickoff.tool_name,
                        arguments=json.dumps(kickoff.tool_args),
                    ),
                )
                for kickoff in tool_calls
            ]

            assistant_msg: AssistantMessage = AssistantMessage(
                role="assistant",
                content=accumulated_answer if accumulated_answer else None,
                tool_calls=tool_calls_list,
            )
            span_generation.span_data.output = [assistant_msg.model_dump()]
        elif accumulated_answer:
            assistant_msg_no_tools = AssistantMessage(
                role="assistant",
                content=accumulated_answer,
                tool_calls=None,
            )
            span_generation.span_data.output = [assistant_msg_no_tools.model_dump()]

        # Record reasoning content for tracing (extended thinking from reasoning models)
        if accumulated_reasoning:
            span_generation.span_data.reasoning = accumulated_reasoning

    # This may happen if the custom token processor is used to modify other packets into reasoning
    # Then there won't necessarily be anything else to come after the reasoning tokens
    yield from _close_reasoning_if_active()

    # Flush any remaining content from citation processor
    # Reasoning is always first so this should use the post-incremented value of turn_index
    # Note that this doesn't need to handle any sub-turns as those docs will not have citations
    # as clickable items and will be stripped out instead.
    if citation_processor:
        yield from _emit_citation_results(citation_processor.process_token(None))

    # Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it
    # Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)
    if LOG_ONYX_MODEL_INTERACTIONS:
        logger.debug(f"Accumulated reasoning: {accumulated_reasoning}")
        logger.debug(f"Accumulated answer: {accumulated_answer}")

        if tool_calls:
            tool_calls_str = "\n".join(
                f"  - {tc.tool_name}: {json.dumps(tc.tool_args, indent=4)}"
                for tc in tool_calls
            )
            logger.debug(f"Tool calls:\n{tool_calls_str}")
        else:
            logger.debug("Tool calls: []")

    if actionable_chunk_count == 0:
        logger.warning(
            "LLM stream completed with no actionable deltas. "
            f"chunks={stream_chunk_count}, empty_chunks={empty_chunk_count}, "
            f"finish_reasons={sorted(finish_reasons)}, "
            f"provider={llm.config.model_provider}, model={llm.config.model_name}, "
            f"tool_choice={tool_choice}, tools_sent={len(tool_definitions)}"
        )

    return (
        LlmStepResult(
            reasoning=accumulated_reasoning if accumulated_reasoning else None,
            answer=accumulated_answer if accumulated_answer else None,
            tool_calls=tool_calls if tool_calls else None,
            raw_answer=accumulated_raw_answer if accumulated_raw_answer else None,
        ),
        has_reasoned,
    )


def run_llm_step(
    emitter: Emitter,
    history: list[ChatMessageSimple],
    tool_definitions: list[dict],
    tool_choice: ToolChoiceOptions,
    llm: LLM,
    placement: Placement,
    state_container: ChatStateContainer | None,
    citation_processor: DynamicCitationProcessor | None,
    reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,
    final_documents: list[SearchDoc] | None = None,
    user_identity: LLMUserIdentity | None = None,
    custom_token_processor: (
        Callable[[Delta | None, Any], tuple[Delta | None, Any]] | None
    ) = None,
    max_tokens: int | None = None,
    use_existing_tab_index: bool = False,
    is_deep_research: bool = False,
    pre_answer_processing_time: float | None = None,
    timeout_override: int | None = None,
) -> tuple[LlmStepResult, bool]:
    """Wrapper around run_llm_step_pkt_generator that consumes packets and emits them.

    Returns:
        tuple[LlmStepResult, bool]: The LLM step result and whether reasoning occurred.
    """
    step_generator = run_llm_step_pkt_generator(
        history=history,
        tool_definitions=tool_definitions,
        tool_choice=tool_choice,
        llm=llm,
        placement=placement,
        state_container=state_container,
        citation_processor=citation_processor,
        reasoning_effort=reasoning_effort,
        final_documents=final_documents,
        user_identity=user_identity,
        custom_token_processor=custom_token_processor,
        max_tokens=max_tokens,
        use_existing_tab_index=use_existing_tab_index,
        is_deep_research=is_deep_research,
        pre_answer_processing_time=pre_answer_processing_time,
        timeout_override=timeout_override,
    )

    while True:
        try:
            packet = next(step_generator)
            emitter.emit(packet)
        except StopIteration as e:
            llm_step_result, has_reasoned = e.value
            return llm_step_result, has_reasoned


================================================
FILE: backend/onyx/chat/models.py
================================================
from collections.abc import Iterator
from typing import Any
from uuid import UUID

from pydantic import BaseModel

from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDoc
from onyx.file_store.models import InMemoryChatFile
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import MultiModelMessageResponseIDInfo
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import GeneratedImage
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.models import SearchToolUsage
from onyx.tools.models import ToolCallKickoff
from onyx.tools.tool_implementations.custom.base_tool_types import ToolResultType


class StreamingError(BaseModel):
    error: str
    stack_trace: str | None = None
    error_code: str | None = (
        None  # e.g., "RATE_LIMIT", "AUTH_ERROR", "TOOL_CALL_FAILED"
    )
    is_retryable: bool = True  # Hint to frontend if retry might help
    details: dict | None = None  # Additional context (tool name, model name, etc.)


class CustomToolResponse(BaseModel):
    response: ToolResultType
    tool_name: str


class CreateChatSessionID(BaseModel):
    chat_session_id: UUID


AnswerStreamPart = (
    Packet
    | MessageResponseIDInfo
    | MultiModelMessageResponseIDInfo
    | StreamingError
    | CreateChatSessionID
)

AnswerStream = Iterator[AnswerStreamPart]


class ToolCallResponse(BaseModel):
    """Tool call with full details for non-streaming response."""

    tool_name: str
    tool_arguments: dict[str, Any]
    tool_result: str
    search_docs: list[SearchDoc] | None = None
    generated_images: list[GeneratedImage] | None = None
    # Reasoning that led to the tool call
    pre_reasoning: str | None = None


class ChatBasicResponse(BaseModel):
    # This is built piece by piece, any of these can be None as the flow could break
    answer: str
    answer_citationless: str

    top_documents: list[SearchDoc]

    error_msg: str | None
    message_id: int
    citation_info: list[CitationInfo]


class ChatFullResponse(BaseModel):
    """Complete non-streaming response with all available data.
    NOTE: This model is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
    experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
    """

    # Core response fields
    answer: str
    answer_citationless: str
    pre_answer_reasoning: str | None = None
    tool_calls: list[ToolCallResponse] = []

    # Documents & citations
    top_documents: list[SearchDoc]
    citation_info: list[CitationInfo]

    # Metadata
    message_id: int
    chat_session_id: UUID | None = None
    error_msg: str | None = None


class ChatLoadedFile(InMemoryChatFile):
    content_text: str | None
    token_count: int


class ToolCallSimple(BaseModel):
    """Tool call for ChatMessageSimple representation (mirrors OpenAI format).

    Used when an ASSISTANT message contains one or more tool calls.
    Each tool call has an ID, name, arguments, and token count for tracking.
    """

    tool_call_id: str
    tool_name: str
    tool_arguments: dict[str, Any]
    token_count: int = 0


class ChatMessageSimple(BaseModel):
    message: str
    token_count: int
    message_type: MessageType
    # Only for USER type messages
    image_files: list[ChatLoadedFile] | None = None
    # Only for TOOL_CALL_RESPONSE type messages
    tool_call_id: str | None = None
    # For ASSISTANT messages with tool calls (OpenAI parallel tool calling format)
    tool_calls: list[ToolCallSimple] | None = None
    # The last message for which this is true
    # AND is true for all previous messages
    # (counting from the start of the history)
    # represents the end of the cacheable prefix
    # used for prompt caching
    should_cache: bool = False
    # When this message represents an injected text file, this is the file's ID.
    # Used to detect which file messages survive context-window truncation.
    file_id: str | None = None


class ContextFileMetadata(BaseModel):
    """Metadata for a context-injected file to enable citation support."""

    file_id: str
    filename: str
    file_content: str


class FileToolMetadata(BaseModel):
    """Lightweight metadata for exposing files to the FileReaderTool.

    Used when files cannot be loaded directly into context (project too large
    or persona-attached user_files without direct-load path). The LLM receives
    a listing of these so it knows which files it can read via ``read_file``.
    """

    file_id: str
    filename: str
    approx_char_count: int


class ChatHistoryResult(BaseModel):
    """Result of converting chat history to simple format.

    Bundles the simple messages with metadata for every text file that was
    injected into the history. After context-window truncation drops older
    messages, callers compare surviving ``file_id`` tags against this map
    to discover "forgotten" files whose metadata should be provided to the
    FileReaderTool.
    """

    simple_messages: list[ChatMessageSimple]
    all_injected_file_metadata: dict[str, FileToolMetadata]


class ExtractedContextFiles(BaseModel):
    """Result of attempting to load user files (from a project or persona) into context."""

    file_texts: list[str]
    image_files: list[ChatLoadedFile]
    use_as_search_filter: bool
    total_token_count: int
    # Lightweight metadata for files exposed via FileReaderTool
    # (populated when files don't fit in context and vector DB is disabled).
    file_metadata: list[ContextFileMetadata]
    uncapped_token_count: int | None
    file_metadata_for_tool: list[FileToolMetadata] = []


class SearchParams(BaseModel):
    """Resolved search filter IDs and search-tool usage for a chat turn."""

    project_id_filter: int | None
    persona_id_filter: int | None
    search_usage: SearchToolUsage


class LlmStepResult(BaseModel):
    reasoning: str | None
    answer: str | None
    tool_calls: list[ToolCallKickoff] | None
    # Raw LLM text before any display-oriented filtering/sanitization.
    # Used for fallback tool-call extraction when providers emit calls as text.
    raw_answer: str | None = None


================================================
FILE: backend/onyx/chat/process_message.py
================================================
"""
IMPORTANT: familiarize yourself with the design concepts prior to contributing to this file.
An overview can be found in the README.md file in this directory.
"""

import contextvars
import io
import queue
import re
import threading
import traceback
from collections.abc import Callable
from collections.abc import Generator
from concurrent.futures import ThreadPoolExecutor
from contextvars import Token
from typing import Final
from uuid import UUID

from sqlalchemy.orm import Session

from onyx.cache.factory import get_cache_backend
from onyx.chat.chat_processing_checker import set_processing_status
from onyx.chat.chat_state import AvailableFiles
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_state import ChatTurnSetup
from onyx.chat.chat_utils import build_file_context
from onyx.chat.chat_utils import convert_chat_history
from onyx.chat.chat_utils import create_chat_history_chain
from onyx.chat.chat_utils import create_chat_session_from_request
from onyx.chat.chat_utils import get_custom_agent_prompt
from onyx.chat.chat_utils import is_last_assistant_message_clarification
from onyx.chat.chat_utils import load_all_chat_files
from onyx.chat.compression import calculate_total_history_tokens
from onyx.chat.compression import compress_chat_history
from onyx.chat.compression import find_summary_for_branch
from onyx.chat.compression import get_compression_params
from onyx.chat.emitter import Emitter
from onyx.chat.llm_loop import EmptyLLMResponseError
from onyx.chat.llm_loop import run_llm_loop
from onyx.chat.models import AnswerStream
from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import ChatBasicResponse
from onyx.chat.models import ChatFullResponse
from onyx.chat.models import ChatLoadedFile
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ContextFileMetadata
from onyx.chat.models import CreateChatSessionID
from onyx.chat.models import ExtractedContextFiles
from onyx.chat.models import FileToolMetadata
from onyx.chat.models import SearchParams
from onyx.chat.models import StreamingError
from onyx.chat.models import ToolCallResponse
from onyx.chat.prompt_utils import calculate_reserved_tokens
from onyx.chat.save_chat import save_chat_turn
from onyx.chat.stop_signal_checker import is_connected as check_stop_signal
from onyx.chat.stop_signal_checker import reset_cancel_status
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.configs.constants import MilestoneRecordType
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import SearchDoc
from onyx.db.chat import create_new_chat_message
from onyx.db.chat import get_chat_session_by_id
from onyx.db.chat import get_or_create_root_message
from onyx.db.chat import reserve_message_id
from onyx.db.chat import reserve_multi_model_message_ids
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import HookPoint
from onyx.db.memory import get_memories
from onyx.db.models import ChatMessage
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.projects import get_user_files_from_project
from onyx.db.tools import get_tools
from onyx.deep_research.dr_loop import run_deep_research_llm_loop
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import log_onyx_error
from onyx.error_handling.exceptions import OnyxError
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import InMemoryChatFile
from onyx.file_store.utils import load_in_memory_chat_files
from onyx.file_store.utils import verify_user_files
from onyx.hooks.executor import execute_hook
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
from onyx.hooks.points.query_processing import QueryProcessingPayload
from onyx.hooks.points.query_processing import QueryProcessingResponse
from onyx.llm.factory import get_llm_for_persona
from onyx.llm.factory import get_llm_token_counter
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.override_models import LLMOverride
from onyx.llm.request_context import reset_llm_mock_response
from onyx.llm.request_context import set_llm_mock_response
from onyx.llm.utils import litellm_exception_to_error_msg
from onyx.onyxbot.slack.models import SlackContext
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import ModelResponseSlot
from onyx.server.query_and_chat.models import MultiModelMessageResponseIDInfo
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.usage_limits import check_llm_cost_limit_for_provider
from onyx.tools.constants import FILE_READER_TOOL_ID
from onyx.tools.constants import SEARCH_TOOL_ID
from onyx.tools.models import ChatFile
from onyx.tools.models import SearchToolUsage
from onyx.tools.tool_constructor import construct_tools
from onyx.tools.tool_constructor import CustomToolConfig
from onyx.tools.tool_constructor import FileReaderToolConfig
from onyx.tools.tool_constructor import SearchToolConfig
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import mt_cloud_telemetry
from onyx.utils.timing import log_function_time
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()
ERROR_TYPE_CANCELLED = "cancelled"
APPROX_CHARS_PER_TOKEN = 4


def _collect_available_file_ids(
    chat_history: list[ChatMessage],
    project_id: int | None,
    user_id: UUID | None,
    db_session: Session,
) -> AvailableFiles:
    """Collect all file IDs the FileReaderTool should be allowed to access.

    Returns *separate* lists for chat-attached files (``file_record`` IDs) and
    project/user files (``user_file`` IDs) so the tool can pick the right
    loader without a try/except fallback."""
    chat_file_ids: set[UUID] = set()
    user_file_ids: set[UUID] = set()

    for msg in chat_history:
        if not msg.files:
            continue
        for fd in msg.files:
            try:
                chat_file_ids.add(UUID(fd["id"]))
            except (ValueError, KeyError):
                pass

    if project_id:
        user_files = get_user_files_from_project(
            project_id=project_id,
            user_id=user_id,
            db_session=db_session,
        )
        for uf in user_files:
            user_file_ids.add(uf.id)

    return AvailableFiles(
        user_file_ids=list(user_file_ids),
        chat_file_ids=list(chat_file_ids),
    )


def _should_enable_slack_search(
    persona: Persona,
    filters: BaseFilters | None,
) -> bool:
    """Determine if Slack search should be enabled.

    Returns True if:
    - Source type filter exists and includes Slack, OR
    - Default persona with no source type filter
    """
    source_types = filters.source_type if filters else None
    return (source_types is not None and DocumentSource.SLACK in source_types) or (
        persona.id == DEFAULT_PERSONA_ID and source_types is None
    )


def _convert_loaded_files_to_chat_files(
    loaded_files: list[ChatLoadedFile],
) -> list[ChatFile]:
    """Convert ChatLoadedFile objects to ChatFile for tool usage (e.g., PythonTool).

    Args:
        loaded_files: List of ChatLoadedFile objects from the chat history

    Returns:
        List of ChatFile objects that can be passed to tools
    """
    chat_files = []
    for loaded_file in loaded_files:
        if len(loaded_file.content) > 0:
            chat_files.append(
                ChatFile(
                    filename=loaded_file.filename or f"file_{loaded_file.file_id}",
                    content=loaded_file.content,
                )
            )
    return chat_files


def resolve_context_user_files(
    persona: Persona,
    project_id: int | None,
    user_id: UUID | None,
    db_session: Session,
) -> list[UserFile]:
    """Apply the precedence rule to decide which user files to load.

    A custom persona fully supersedes the project.  When a chat uses a
    custom persona, the project is purely organisational — its files are
    never loaded and never made searchable.

    Custom persona → persona's own user_files (may be empty).
    Default persona inside a project → project files.
    Otherwise → empty list.
    """
    if persona.id != DEFAULT_PERSONA_ID:
        return list(persona.user_files) if persona.user_files else []
    if project_id:
        return get_user_files_from_project(
            project_id=project_id,
            user_id=user_id,
            db_session=db_session,
        )
    return []


def _empty_extracted_context_files() -> ExtractedContextFiles:
    return ExtractedContextFiles(
        file_texts=[],
        image_files=[],
        use_as_search_filter=False,
        total_token_count=0,
        file_metadata=[],
        uncapped_token_count=None,
    )


def _extract_text_from_in_memory_file(f: InMemoryChatFile) -> str | None:
    """Extract text content from an InMemoryChatFile.

    PLAIN_TEXT: the content is pre-extracted UTF-8 plaintext stored during
    ingestion — decode directly.
    DOC / CSV / other text types: the content is the original file bytes —
    use extract_file_text which handles encoding detection and format parsing.
    """
    try:
        if f.file_type == ChatFileType.PLAIN_TEXT:
            return f.content.decode("utf-8", errors="ignore").replace("\x00", "")
        return extract_file_text(
            file=io.BytesIO(f.content),
            file_name=f.filename or "",
            break_on_unprocessable=False,
        )
    except Exception:
        logger.warning(f"Failed to extract text from file {f.file_id}", exc_info=True)
        return None


def extract_context_files(
    user_files: list[UserFile],
    llm_max_context_window: int,
    reserved_token_count: int,
    db_session: Session,
    # Because the tokenizer is a generic tokenizer, the token count may be incorrect.
    # to account for this, the maximum context that is allowed for this function is
    # 60% of the LLM's max context window. The other benefit is that for projects with
    # more files, this makes it so that we don't throw away the history too quickly every time.
    max_llm_context_percentage: float = 0.6,
) -> ExtractedContextFiles:
    """Load user files into context if they fit; otherwise flag for search.

    The caller is responsible for deciding *which* user files to pass in
    (project files, persona files, etc.).  This function only cares about
    the all-or-nothing fit check and the actual content loading.

    Args:
        project_id: The project ID to load files from
        user_id: The user ID for authorization
        llm_max_context_window: Maximum tokens allowed in the LLM context window
        reserved_token_count: Number of tokens to reserve for other content
        db_session: Database session
        max_llm_context_percentage: Maximum percentage of the LLM context window to use.
    Returns:
        ExtractedContextFiles containing:
        - List of text content strings from context files (text files only)
        - List of image files from context (ChatLoadedFile objects)
        - Total token count of all extracted files
        - File metadata for context files
        - Uncapped token count of all extracted files
        - File metadata for files that don't fit in context and vector DB is disabled
    """
    # TODO(yuhong): I believe this is not handling all file types correctly.

    if not user_files:
        return _empty_extracted_context_files()

    # Aggregate tokens for the file content that will be added
    # Skip tokens for those with metadata only
    aggregate_tokens = sum(
        uf.token_count or 0
        for uf in user_files
        if not mime_type_to_chat_file_type(uf.file_type).use_metadata_only()
    )
    max_actual_tokens = (
        llm_max_context_window - reserved_token_count
    ) * max_llm_context_percentage

    if aggregate_tokens >= max_actual_tokens:
        use_as_search_filter = not DISABLE_VECTOR_DB
        if DISABLE_VECTOR_DB:
            overflow_tool_metadata = [_build_tool_metadata(uf) for uf in user_files]
        else:
            overflow_tool_metadata = [
                _build_tool_metadata(uf)
                for uf in user_files
                if mime_type_to_chat_file_type(uf.file_type).use_metadata_only()
            ]
        return ExtractedContextFiles(
            file_texts=[],
            image_files=[],
            use_as_search_filter=use_as_search_filter,
            total_token_count=0,
            file_metadata=[],
            uncapped_token_count=aggregate_tokens,
            file_metadata_for_tool=overflow_tool_metadata,
        )

    # Files fit — load them into context
    user_file_map = {uf.file_id: uf for uf in user_files}
    in_memory_files = load_in_memory_chat_files(
        user_file_ids=[uf.id for uf in user_files],
        db_session=db_session,
    )

    file_texts: list[str] = []
    image_files: list[ChatLoadedFile] = []
    file_metadata: list[ContextFileMetadata] = []
    tool_metadata: list[FileToolMetadata] = []
    total_token_count = 0

    for f in in_memory_files:
        uf = user_file_map.get(str(f.file_id))
        filename = f.filename or f"file_{f.file_id}"

        if f.file_type.use_metadata_only():
            # Metadata-only files are not injected as full text.
            # Only the metadata is provided, with LLM using tools
            if not uf:
                logger.error(
                    f"File with id={f.file_id} in metadata-only path with no associated user file"
                )
                continue
            tool_metadata.append(_build_tool_metadata(uf))
        elif f.file_type.is_text_file():
            text_content = _extract_text_from_in_memory_file(f)
            if not text_content:
                continue
            if not uf:
                logger.warning(f"No user file for file_id={f.file_id}")
                continue
            file_texts.append(text_content)
            file_metadata.append(
                ContextFileMetadata(
                    file_id=str(uf.id),
                    filename=filename,
                    file_content=text_content,
                )
            )
            if uf.token_count:
                total_token_count += uf.token_count
        elif f.file_type == ChatFileType.IMAGE:
            token_count = uf.token_count if uf and uf.token_count else 0
            total_token_count += token_count
            image_files.append(
                ChatLoadedFile(
                    file_id=f.file_id,
                    content=f.content,
                    file_type=f.file_type,
                    filename=f.filename,
                    content_text=None,
                    token_count=token_count,
                )
            )

    return ExtractedContextFiles(
        file_texts=file_texts,
        image_files=image_files,
        use_as_search_filter=False,
        total_token_count=total_token_count,
        file_metadata=file_metadata,
        uncapped_token_count=aggregate_tokens,
        file_metadata_for_tool=tool_metadata,
    )


def _build_tool_metadata(user_file: UserFile) -> FileToolMetadata:
    """Build lightweight FileToolMetadata from a UserFile record.

    Delegates to ``build_file_context`` so that the file ID exposed to the
    LLM is always consistent with what FileReaderTool expects.
    """
    return build_file_context(
        tool_file_id=str(user_file.id),
        filename=user_file.name,
        file_type=mime_type_to_chat_file_type(user_file.file_type),
        approx_char_count=(user_file.token_count or 0) * APPROX_CHARS_PER_TOKEN,
    ).tool_metadata


def determine_search_params(
    persona_id: int,
    project_id: int | None,
    extracted_context_files: ExtractedContextFiles,
) -> SearchParams:
    """Decide which search filter IDs and search-tool usage apply for a chat turn.

    A custom persona fully supersedes the project — project files are never
    searchable and the search tool config is entirely controlled by the
    persona.  The project_id filter is only set for the default persona.

    For the default persona inside a project:
      - Files overflow  → ENABLED  (vector DB scopes to these files)
      - Files fit       → DISABLED (content already in prompt)
      - No files at all → DISABLED (nothing to search)
    """
    is_custom_persona = persona_id != DEFAULT_PERSONA_ID

    project_id_filter: int | None = None
    persona_id_filter: int | None = None
    if extracted_context_files.use_as_search_filter:
        if is_custom_persona:
            persona_id_filter = persona_id
        else:
            project_id_filter = project_id

    search_usage = SearchToolUsage.AUTO
    if not is_custom_persona and project_id:
        has_context_files = bool(extracted_context_files.uncapped_token_count)
        files_loaded_in_context = bool(extracted_context_files.file_texts)

        if extracted_context_files.use_as_search_filter:
            search_usage = SearchToolUsage.ENABLED
        elif files_loaded_in_context or not has_context_files:
            search_usage = SearchToolUsage.DISABLED

    return SearchParams(
        project_id_filter=project_id_filter,
        persona_id_filter=persona_id_filter,
        search_usage=search_usage,
    )


def _resolve_query_processing_hook_result(
    hook_result: QueryProcessingResponse | HookSkipped | HookSoftFailed,
    message_text: str,
) -> str:
    """Apply the Query Processing hook result to the message text.

    Returns the (possibly rewritten) message text, or raises OnyxError with
    QUERY_REJECTED if the hook signals rejection (query is null or empty).
    HookSkipped and HookSoftFailed are pass-throughs — the original text is
    returned unchanged.
    """
    if isinstance(hook_result, (HookSkipped, HookSoftFailed)):
        return message_text
    if not (hook_result.query and hook_result.query.strip()):
        raise OnyxError(
            OnyxErrorCode.QUERY_REJECTED,
            hook_result.rejection_message
            or "The hook extension for query processing did not return a valid query. No rejection reason was provided.",
        )
    return hook_result.query.strip()


def build_chat_turn(
    new_msg_req: SendMessageRequest,
    user: User,
    db_session: Session,
    # None → single-model (persona default LLM); non-empty list → multi-model (one LLM per override)
    llm_overrides: list[LLMOverride] | None,
    *,
    litellm_additional_headers: dict[str, str] | None = None,
    custom_tool_additional_headers: dict[str, str] | None = None,
    mcp_headers: dict[str, str] | None = None,
    bypass_acl: bool = False,
    # Slack context for federated Slack search
    slack_context: SlackContext | None = None,
    # Additional context to include in the chat history, e.g. Slack threads where the
    # conversation cannot be represented by a chain of User/Assistant messages.
    # NOTE: not stored in the database, only passed in to the LLM as context
    additional_context: str | None = None,
) -> Generator[AnswerStreamPart, None, ChatTurnSetup]:
    """Shared setup generator for both single-model and multi-model chat turns.

    Yields the packet(s) the frontend needs for request tracking, then returns an
    immutable ``ChatTurnSetup`` containing everything the execution strategy needs.

    Callers use::

        setup = yield from build_chat_turn(new_msg_req, ..., llm_overrides=...)

    to forward yielded packets upstream while receiving the return value locally.

    Args:
        llm_overrides: ``None`` → single-model (persona default LLM).
                       Non-empty list → multi-model (one LLM per override).
    """
    tenant_id = get_current_tenant_id()
    is_multi = bool(llm_overrides)

    user_id = user.id
    llm_user_identifier = (
        "anonymous_user" if user.is_anonymous else (user.email or str(user_id))
    )

    # ── Session resolution ───────────────────────────────────────────────────
    if not new_msg_req.chat_session_id:
        if not new_msg_req.chat_session_info:
            raise RuntimeError("Must specify a chat session id or chat session info")
        chat_session = create_chat_session_from_request(
            chat_session_request=new_msg_req.chat_session_info,
            user_id=user_id,
            db_session=db_session,
        )
        yield CreateChatSessionID(chat_session_id=chat_session.id)
        chat_session = get_chat_session_by_id(
            chat_session_id=chat_session.id,
            user_id=user_id,
            db_session=db_session,
            eager_load_persona=True,
        )
    else:
        chat_session = get_chat_session_by_id(
            chat_session_id=new_msg_req.chat_session_id,
            user_id=user_id,
            db_session=db_session,
            eager_load_persona=True,
        )

    persona = chat_session.persona
    message_text = new_msg_req.message

    user_identity = LLMUserIdentity(
        user_id=llm_user_identifier, session_id=str(chat_session.id)
    )

    # Milestone tracking, most devs using the API don't need to understand this
    mt_cloud_telemetry(
        tenant_id=tenant_id,
        distinct_id=str(user.id) if not user.is_anonymous else tenant_id,
        event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
    )
    mt_cloud_telemetry(
        tenant_id=tenant_id,
        distinct_id=str(user.id) if not user.is_anonymous else tenant_id,
        event=MilestoneRecordType.USER_MESSAGE_SENT,
        properties={
            "origin": new_msg_req.origin.value,
            "has_files": len(new_msg_req.file_descriptors) > 0,
            "has_project": chat_session.project_id is not None,
            "has_persona": persona is not None and persona.id != DEFAULT_PERSONA_ID,
            "deep_research": new_msg_req.deep_research,
        },
    )

    # Check LLM cost limits before using the LLM (only for Onyx-managed keys),
    # then build the LLM instance(s).
    llms: list[LLM] = []
    model_display_names: list[str] = []
    selected_overrides: list[LLMOverride | None] = (
        list(llm_overrides or [])
        if is_multi
        else [new_msg_req.llm_override or chat_session.llm_override]
    )
    for override in selected_overrides:
        llm = get_llm_for_persona(
            persona=persona,
            user=user,
            llm_override=override,
            additional_headers=litellm_additional_headers,
        )
        check_llm_cost_limit_for_provider(
            db_session=db_session,
            tenant_id=tenant_id,
            llm_provider_api_key=llm.config.api_key,
        )
        llms.append(llm)
        model_display_names.append(_build_model_display_name(override))
    token_counter = get_llm_token_counter(llms[0])

    # not sure why we do this, but to maintain parity with previous code:
    if not is_multi:
        model_display_names = [""]

    # Verify that the user-specified files actually belong to the user
    verify_user_files(
        user_files=new_msg_req.file_descriptors,
        user_id=user_id,
        db_session=db_session,
        project_id=chat_session.project_id,
    )

    # Re-create linear history of messages
    chat_history = create_chat_history_chain(
        chat_session_id=chat_session.id, db_session=db_session
    )

    # Determine the parent message based on the request:
    # - AUTO_PLACE_AFTER_LATEST_MESSAGE (-1): auto-place after latest message in chain
    # - None or root ID: regeneration from root (first message)
    # - positive int: place after that specific parent message
    root_message = get_or_create_root_message(
        chat_session_id=chat_session.id, db_session=db_session
    )

    if new_msg_req.parent_message_id == AUTO_PLACE_AFTER_LATEST_MESSAGE:
        parent_message = chat_history[-1] if chat_history else root_message
    elif (
        new_msg_req.parent_message_id is None
        or new_msg_req.parent_message_id == root_message.id
    ):
        # Regeneration from root — clear history so we start fresh
        parent_message = root_message
        chat_history = []
    else:
        parent_message = None
        for i in range(len(chat_history) - 1, -1, -1):
            if chat_history[i].id == new_msg_req.parent_message_id:
                parent_message = chat_history[i]
                # Truncate to only messages up to and including the parent
                chat_history = chat_history[: i + 1]
                break

    if parent_message is None:
        raise ValueError(
            "The new message sent is not on the latest mainline of messages"
        )

    # ── Query Processing hook + user message ─────────────────────────────────
    # Skipped on regeneration (parent is USER type): message already exists/was accepted.
    if parent_message.message_type == MessageType.USER:
        user_message = parent_message
    else:
        # New message — run the Query Processing hook before saving to DB.
        # Skipped on regeneration: the message already exists and was accepted previously.
        # Skip for empty/whitespace-only messages — no meaningful query to process,
        # and SendMessageRequest.message has no min_length guard.
        if message_text.strip():
            hook_result = execute_hook(
                db_session=db_session,
                hook_point=HookPoint.QUERY_PROCESSING,
                payload=QueryProcessingPayload(
                    query=message_text,
                    # Pass None for anonymous users or authenticated users without an email
                    # (e.g. some SSO flows). QueryProcessingPayload.user_email is str | None,
                    # so None is accepted and serialised as null in both cases.
                    user_email=None if user.is_anonymous else user.email,
                    chat_session_id=str(chat_session.id),
                ).model_dump(),
                response_type=QueryProcessingResponse,
            )
            message_text = _resolve_query_processing_hook_result(
                hook_result, message_text
            )

        user_message = create_new_chat_message(
            chat_session_id=chat_session.id,
            parent_message=parent_message,
            message=message_text,
            token_count=token_counter(message_text),
            message_type=MessageType.USER,
            files=new_msg_req.file_descriptors,
            db_session=db_session,
            commit=True,
        )
        chat_history.append(user_message)

    # Collect file IDs for the file reader tool *before* summary truncation so
    # that files attached to older (summarized-away) messages are still accessible
    # via the FileReaderTool.
    available_files = _collect_available_file_ids(
        chat_history=chat_history,
        project_id=chat_session.project_id,
        user_id=user_id,
        db_session=db_session,
    )

    # Find applicable summary for the current branch
    summary_message = find_summary_for_branch(db_session, chat_history)
    # Collect file metadata from messages that will be dropped by summary truncation.
    # These become "pre-summarized" file metadata so the forgotten-file mechanism can
    # still tell the LLM about them.
    summarized_file_metadata: dict[str, FileToolMetadata] = {}
    if summary_message and summary_message.last_summarized_message_id:
        cutoff_id = summary_message.last_summarized_message_id
        for msg in chat_history:
            if msg.id > cutoff_id or not msg.files:
                continue
            for fd in msg.files:
                file_id = fd.get("id")
                if not file_id:
                    continue
                summarized_file_metadata[file_id] = FileToolMetadata(
                    file_id=file_id,
                    filename=fd.get("name") or "unknown",
                    # We don't know the exact size without loading the file,
                    # but 0 signals "unknown" to the LLM.
                    approx_char_count=0,
                )
        # Filter chat_history to only messages after the cutoff
        chat_history = [m for m in chat_history if m.id > cutoff_id]

    # Compute skip-clarification flag for deep research path (cheap, always available)
    skip_clarification = is_last_assistant_message_clarification(chat_history)

    user_memory_context = get_memories(user, db_session)

    # This prompt may come from the Agent or Project. Fetched here (before run_llm_loop)
    # because the inner loop shouldn't need to access the DB-form chat history, but we
    # need it early for token reservation.
    custom_agent_prompt = get_custom_agent_prompt(persona, chat_session)

    # When use_memories is disabled, strip memories from the prompt context but keep
    # user info/preferences. The full context is still passed to the LLM loop for
    # memory tool persistence.
    prompt_memory_context = (
        user_memory_context
        if user.use_memories
        else user_memory_context.without_memories()
    )

    # ── Token reservation ────────────────────────────────────────────────────
    max_reserved_system_prompt_tokens_str = (persona.system_prompt or "") + (
        custom_agent_prompt or ""
    )
    reserved_token_count = calculate_reserved_tokens(
        db_session=db_session,
        persona_system_prompt=max_reserved_system_prompt_tokens_str,
        token_counter=token_counter,
        files=new_msg_req.file_descriptors,
        user_memory_context=prompt_memory_context,
    )

    # Determine which user files to use. A custom persona fully supersedes the project —
    # project files are never loaded or searchable when a custom persona is in play.
    # Only the default persona inside a project uses the project's files.
    context_user_files = resolve_context_user_files(
        persona=persona,
        project_id=chat_session.project_id,
        user_id=user_id,
        db_session=db_session,
    )

    # Use the smallest context window across models for safety (harmless for N=1).
    llm_max_context_window = min(llm.config.max_input_tokens for llm in llms)

    extracted_context_files = extract_context_files(
        user_files=context_user_files,
        llm_max_context_window=llm_max_context_window,
        reserved_token_count=reserved_token_count,
        db_session=db_session,
    )

    search_params = determine_search_params(
        persona_id=persona.id,
        project_id=chat_session.project_id,
        extracted_context_files=extracted_context_files,
    )

    # Also grant access to persona-attached user files for FileReaderTool
    if persona.user_files:
        existing = set(available_files.user_file_ids)
        for uf in persona.user_files:
            if uf.id not in existing:
                available_files.user_file_ids.append(uf.id)

    all_tools = get_tools(db_session)
    tool_id_to_name_map = {tool.id: tool.name for tool in all_tools}

    search_tool_id = next(
        (tool.id for tool in all_tools if tool.in_code_tool_id == SEARCH_TOOL_ID), None
    )

    forced_tool_id = new_msg_req.forced_tool_id
    if (
        search_params.search_usage == SearchToolUsage.DISABLED
        and forced_tool_id is not None
        and search_tool_id is not None
        and forced_tool_id == search_tool_id
    ):
        forced_tool_id = None

    # TODO(nmgarza5): Once summarization is done, we don't need to load all files from the beginning.
    # Load all files needed for this chat chain into memory.
    files = load_all_chat_files(chat_history, db_session)
    # Convert loaded files to ChatFile format for tools like PythonTool
    chat_files_for_tools = _convert_loaded_files_to_chat_files(files)

    # ── Reserve assistant message ID(s) → yield to frontend ──────────────────
    if is_multi:
        assert llm_overrides is not None
        reserved_messages = reserve_multi_model_message_ids(
            db_session=db_session,
            chat_session_id=chat_session.id,
            parent_message_id=user_message.id,
            model_display_names=model_display_names,
        )
        yield MultiModelMessageResponseIDInfo(
            user_message_id=user_message.id,
            responses=[
                ModelResponseSlot(message_id=m.id, model_name=name)
                for m, name in zip(reserved_messages, model_display_names)
            ],
        )
    else:
        assistant_response = reserve_message_id(
            db_session=db_session,
            chat_session_id=chat_session.id,
            parent_message=user_message.id,
            message_type=MessageType.ASSISTANT,
        )
        reserved_messages = [assistant_response]
        yield MessageResponseIDInfo(
            user_message_id=user_message.id,
            reserved_assistant_message_id=assistant_response.id,
        )

    # Convert the chat history into a simple format that is free of any DB objects
    # and is easy to parse for the agent loop.
    has_file_reader_tool = any(
        tool.in_code_tool_id == FILE_READER_TOOL_ID for tool in persona.tools
    )

    chat_history_result = convert_chat_history(
        chat_history=chat_history,
        files=files,
        context_image_files=extracted_context_files.image_files,
        additional_context=additional_context or new_msg_req.additional_context,
        token_counter=token_counter,
        tool_id_to_name_map=tool_id_to_name_map,
    )
    simple_chat_history = chat_history_result.simple_messages

    # Metadata for every text file injected into the history. After context-window
    # truncation drops older messages, the LLM loop compares surviving file_id tags
    # against this map to discover "forgotten" files and provide their metadata to
    # FileReaderTool.
    all_injected_file_metadata: dict[str, FileToolMetadata] = (
        chat_history_result.all_injected_file_metadata if has_file_reader_tool else {}
    )

    # Merge in file metadata from messages dropped by summary truncation. These files
    # are no longer in simple_chat_history so they'd be invisible to the forgotten-file
    # mechanism — they'll always appear as "forgotten" since no surviving message carries
    # their file_id tag.
    if summarized_file_metadata:
        for fid, meta in summarized_file_metadata.items():
            all_injected_file_metadata.setdefault(fid, meta)

    if all_injected_file_metadata:
        logger.debug(
            f"FileReader: file metadata for LLM: {[(fid, m.filename) for fid, m in all_injected_file_metadata.items()]}"
        )

    if summary_message is not None:
        summary_simple = ChatMessageSimple(
            message=summary_message.message,
            token_count=summary_message.token_count,
            message_type=MessageType.ASSISTANT,
        )
        simple_chat_history.insert(0, summary_simple)

    # ── Stop signal and processing status ────────────────────────────────────
    cache = get_cache_backend()
    reset_cancel_status(chat_session.id, cache)

    def check_is_connected() -> bool:
        return check_stop_signal(chat_session.id, cache)

    set_processing_status(
        chat_session_id=chat_session.id,
        cache=cache,
        value=True,
    )

    # Release any read transaction before the long-running LLM stream.
    # If commit fails here, reset the processing status before propagating —
    # otherwise the chat session appears stuck at "processing" permanently.
    try:
        db_session.commit()
    except Exception:
        set_processing_status(chat_session_id=chat_session.id, cache=cache, value=False)
        raise

    return ChatTurnSetup(
        new_msg_req=new_msg_req,
        chat_session=chat_session,
        persona=persona,
        user_message=user_message,
        user_identity=user_identity,
        llms=llms,
        model_display_names=model_display_names,
        simple_chat_history=simple_chat_history,
        extracted_context_files=extracted_context_files,
        reserved_messages=reserved_messages,
        reserved_token_count=reserved_token_count,
        search_params=search_params,
        all_injected_file_metadata=all_injected_file_metadata,
        available_files=available_files,
        tool_id_to_name_map=tool_id_to_name_map,
        forced_tool_id=forced_tool_id,
        files=files,
        chat_files_for_tools=chat_files_for_tools,
        custom_agent_prompt=custom_agent_prompt,
        user_memory_context=user_memory_context,
        skip_clarification=skip_clarification,
        check_is_connected=check_is_connected,
        cache=cache,
        bypass_acl=bypass_acl,
        slack_context=slack_context,
        custom_tool_additional_headers=custom_tool_additional_headers,
        mcp_headers=mcp_headers,
    )


# Sentinel placed on the merged queue when a model thread finishes.
_MODEL_DONE = object()

# How often the drain loop polls for user-initiated cancellation (stop button).
_CANCEL_POLL_INTERVAL_S: Final[float] = 0.05


def _run_models(
    setup: ChatTurnSetup,
    user: User,
    db_session: Session,
    external_state_container: ChatStateContainer | None = None,
) -> AnswerStream:
    """Stream packets from one or more LLM loops running in parallel worker threads.

    Each model gets its own worker thread, DB session, and ``Emitter``. Threads write
    packets to a shared unbounded queue as they are produced; the drain loop yields them
    in arrival order so the caller receives a single interleaved stream regardless of
    how many models are running.

    Single-model (N=1) and multi-model (N>1) use the same execution path. Every
    packet is tagged with ``model_index`` by the model's Emitter — ``0`` for N=1,
    ``0``/``1``/``2`` for multi-model.

    Args:
        setup: Fully constructed turn context — LLMs, persona, history, tool config.
        user: Authenticated user making the request.
        db_session: Caller's DB session (used for setup reads; each worker opens its own
            session because SQLAlchemy sessions are not thread-safe).
        external_state_container: Pre-constructed state container for the first model.
            Used by evals and the non-streaming API path so the caller can inspect
            accumulated state (tool calls, answer tokens, citations) after the stream
            is consumed. When ``None`` a fresh container is created automatically.

    Returns:
        Generator yielding ``Packet`` objects as they arrive from worker threads —
        answer tokens, tool output, citations — followed by a terminal ``Packet``
        containing ``OverallStop`` once all models complete (or one containing
        ``OverallStop(stop_reason="user_cancelled")`` if the connection drops).
    """
    n_models = len(setup.llms)

    merged_queue: queue.Queue[tuple[int, Packet | Exception | object]] = queue.Queue()

    state_containers: list[ChatStateContainer] = [
        (
            external_state_container
            if (external_state_container is not None and i == 0)
            else ChatStateContainer()
        )
        for i in range(n_models)
    ]
    model_succeeded: list[bool] = [False] * n_models
    # Set to True when a model raises an exception (distinct from "still running").
    # Used in the stop-button path to avoid calling completion for errored models.
    model_errored: list[bool] = [False] * n_models

    # Set when the drain loop exits early (HTTP disconnect / GeneratorExit).
    # Signals emitters to skip future puts so workers exit promptly.
    drain_done = threading.Event()

    def _run_model(model_idx: int) -> None:
        """Run one LLM loop inside a worker thread, writing packets to ``merged_queue``."""
        model_emitter = Emitter(
            model_idx=model_idx,
            merged_queue=merged_queue,
            drain_done=drain_done,
        )
        sc = state_containers[model_idx]
        model_llm = setup.llms[model_idx]

        try:
            # Each worker opens its own session — SQLAlchemy sessions are not thread-safe.
            # Do NOT write to the outer db_session (or any shared DB state) from here;
            # all DB writes in this thread must go through thread_db_session.
            with get_session_with_current_tenant() as thread_db_session:
                thread_tool_dict = construct_tools(
                    persona=setup.persona,
                    db_session=thread_db_session,
                    emitter=model_emitter,
                    user=user,
                    llm=model_llm,
                    search_tool_config=SearchToolConfig(
                        user_selected_filters=setup.new_msg_req.internal_search_filters,
                        project_id_filter=setup.search_params.project_id_filter,
                        persona_id_filter=setup.search_params.persona_id_filter,
                        bypass_acl=setup.bypass_acl,
                        slack_context=setup.slack_context,
                        enable_slack_search=_should_enable_slack_search(
                            setup.persona, setup.new_msg_req.internal_search_filters
                        ),
                    ),
                    custom_tool_config=CustomToolConfig(
                        chat_session_id=setup.chat_session.id,
                        message_id=setup.user_message.id,
                        additional_headers=setup.custom_tool_additional_headers,
                        mcp_headers=setup.mcp_headers,
                    ),
                    file_reader_tool_config=FileReaderToolConfig(
                        user_file_ids=setup.available_files.user_file_ids,
                        chat_file_ids=setup.available_files.chat_file_ids,
                    ),
                    allowed_tool_ids=setup.new_msg_req.allowed_tool_ids,
                    search_usage_forcing_setting=setup.search_params.search_usage,
                )
                model_tools = [
                    tool
                    for tool_list in thread_tool_dict.values()
                    for tool in tool_list
                ]

                if setup.forced_tool_id and setup.forced_tool_id not in {
                    tool.id for tool in model_tools
                }:
                    raise ValueError(
                        f"Forced tool {setup.forced_tool_id} not found in tools"
                    )

                # Per-thread copy: run_llm_loop mutates simple_chat_history in-place.
                if n_models == 1 and setup.new_msg_req.deep_research:
                    if setup.chat_session.project_id:
                        raise RuntimeError(
                            "Deep research is not supported for projects"
                        )
                    run_deep_research_llm_loop(
                        emitter=model_emitter,
                        state_container=sc,
                        simple_chat_history=list(setup.simple_chat_history),
                        tools=model_tools,
                        custom_agent_prompt=setup.custom_agent_prompt,
                        llm=model_llm,
                        token_counter=get_llm_token_counter(model_llm),
                        db_session=thread_db_session,
                        skip_clarification=setup.skip_clarification,
                        user_identity=setup.user_identity,
                        chat_session_id=str(setup.chat_session.id),
                        all_injected_file_metadata=setup.all_injected_file_metadata,
                    )
                else:
                    run_llm_loop(
                        emitter=model_emitter,
                        state_container=sc,
                        simple_chat_history=list(setup.simple_chat_history),
                        tools=model_tools,
                        custom_agent_prompt=setup.custom_agent_prompt,
                        context_files=setup.extracted_context_files,
                        persona=setup.persona,
                        user_memory_context=setup.user_memory_context,
                        llm=model_llm,
                        token_counter=get_llm_token_counter(model_llm),
                        db_session=thread_db_session,
                        forced_tool_id=setup.forced_tool_id,
                        user_identity=setup.user_identity,
                        chat_session_id=str(setup.chat_session.id),
                        chat_files=setup.chat_files_for_tools,
                        include_citations=setup.new_msg_req.include_citations,
                        all_injected_file_metadata=setup.all_injected_file_metadata,
                        inject_memories_in_prompt=user.use_memories,
                    )

            model_succeeded[model_idx] = True

        except Exception as e:
            model_errored[model_idx] = True
            merged_queue.put((model_idx, e))

        finally:
            merged_queue.put((model_idx, _MODEL_DONE))

    def _delete_orphaned_message(model_idx: int, context: str) -> None:
        """Delete a reserved ChatMessage that was never populated due to a model error."""
        try:
            orphaned = db_session.get(
                ChatMessage, setup.reserved_messages[model_idx].id
            )
            if orphaned is not None:
                db_session.delete(orphaned)
                db_session.commit()
        except Exception:
            logger.exception(
                "%s orphan cleanup failed for model %d (%s)",
                context,
                model_idx,
                setup.model_display_names[model_idx],
            )

    # Copy contextvars before submitting futures — ThreadPoolExecutor does NOT
    # auto-propagate contextvars in Python 3.11; threads would inherit a blank context.
    worker_context = contextvars.copy_context()
    executor = ThreadPoolExecutor(
        max_workers=n_models, thread_name_prefix="multi-model"
    )
    completion_persisted: bool = False
    try:
        for i in range(n_models):
            executor.submit(worker_context.run, _run_model, i)

        # ── Main thread: merge and yield packets ────────────────────────────
        models_remaining = n_models
        while models_remaining > 0:
            try:
                model_idx, item = merged_queue.get(timeout=_CANCEL_POLL_INTERVAL_S)
            except queue.Empty:
                # Check for user-initiated cancellation every 50 ms.
                if not setup.check_is_connected():
                    # Save state for every model before exiting.
                    # - Succeeded models: full answer (is_connected=True).
                    # - Still-in-flight models: partial answer + "stopped by user".
                    # - Errored models: delete the orphaned reserved message; do NOT
                    #   save "stopped by user" for a model that actually threw an exception.
                    for i in range(n_models):
                        if model_errored[i]:
                            _delete_orphaned_message(i, "stop-button")
                            continue
                        try:
                            succeeded = model_succeeded[i]
                            llm_loop_completion_handle(
                                state_container=state_containers[i],
                                is_connected=lambda: succeeded,
                                db_session=db_session,
                                assistant_message=setup.reserved_messages[i],
                                llm=setup.llms[i],
                                reserved_tokens=setup.reserved_token_count,
                            )
                        except Exception:
                            logger.exception(
                                "stop-button completion failed for model %d (%s)",
                                i,
                                setup.model_display_names[i],
                            )
                    yield Packet(
                        placement=Placement(turn_index=0),
                        obj=OverallStop(type="stop", stop_reason="user_cancelled"),
                    )
                    completion_persisted = True
                    return
                continue
            else:
                if item is _MODEL_DONE:
                    models_remaining -= 1
                elif isinstance(item, Exception):
                    # Yield a tagged error for this model but keep the other models running.
                    # Do NOT decrement models_remaining — _run_model's finally always posts
                    # _MODEL_DONE, which is the sole completion signal.
                    error_msg = str(item)
                    stack_trace = "".join(
                        traceback.format_exception(type(item), item, item.__traceback__)
                    )
                    model_llm = setup.llms[model_idx]
                    if model_llm.config.api_key and len(model_llm.config.api_key) > 2:
                        error_msg = error_msg.replace(
                            model_llm.config.api_key, "[REDACTED_API_KEY]"
                        )
                        stack_trace = stack_trace.replace(
                            model_llm.config.api_key, "[REDACTED_API_KEY]"
                        )
                    yield StreamingError(
                        error=error_msg,
                        stack_trace=stack_trace,
                        error_code="MODEL_ERROR",
                        is_retryable=True,
                        details={
                            "model": model_llm.config.model_name,
                            "provider": model_llm.config.model_provider,
                            "model_index": model_idx,
                        },
                    )
                elif isinstance(item, Packet):
                    # model_index already embedded by the model's Emitter in _run_model
                    yield item

        # ── Completion: save each successful model's response ───────────────
        # All model loops have completed (run_llm_loop returned) — no more writes
        # to state_containers. Worker threads may still be closing their own DB
        # sessions, but the main-thread db_session is unshared and safe to use.
        for i in range(n_models):
            if not model_succeeded[i]:
                # Model errored — delete its orphaned reserved message.
                _delete_orphaned_message(i, "normal")
                continue
            try:
                llm_loop_completion_handle(
                    state_container=state_containers[i],
                    is_connected=setup.check_is_connected,
                    db_session=db_session,
                    assistant_message=setup.reserved_messages[i],
                    llm=setup.llms[i],
                    reserved_tokens=setup.reserved_token_count,
                )
            except Exception:
                logger.exception(
                    "normal completion failed for model %d (%s)",
                    i,
                    setup.model_display_names[i],
                )
        completion_persisted = True

    finally:
        if completion_persisted:
            # Normal exit or stop-button exit: completion already persisted.
            # Threads are done (normal path) or can finish in the background (stop-button).
            executor.shutdown(wait=False)
        else:
            # Early exit (GeneratorExit from raw HTTP disconnect, or unhandled
            # exception in the drain loop).
            # 1. Signal emitters to stop — future emit() calls return immediately,
            #    so workers exit their LLM loops promptly.
            drain_done.set()
            # 2. Wait for all workers to finish. Once drain_done is set the Emitter
            #    short-circuits, so workers should exit quickly.
            executor.shutdown(wait=True)
            # 3. All workers are done — complete from the main thread only.
            for i in range(n_models):
                if model_succeeded[i]:
                    try:
                        llm_loop_completion_handle(
                            state_container=state_containers[i],
                            # Model already finished — persist full response.
                            is_connected=lambda: True,
                            db_session=db_session,
                            assistant_message=setup.reserved_messages[i],
                            llm=setup.llms[i],
                            reserved_tokens=setup.reserved_token_count,
                        )
                    except Exception:
                        logger.exception(
                            "disconnect completion failed for model %d (%s)",
                            i,
                            setup.model_display_names[i],
                        )
                elif model_errored[i]:
                    _delete_orphaned_message(i, "disconnect")
            # 4. Drain buffered packets from memory — no consumer is running.
            while not merged_queue.empty():
                try:
                    merged_queue.get_nowait()
                except queue.Empty:
                    break


def _stream_chat_turn(
    new_msg_req: SendMessageRequest,
    user: User,
    db_session: Session,
    llm_overrides: list[LLMOverride] | None = None,
    litellm_additional_headers: dict[str, str] | None = None,
    custom_tool_additional_headers: dict[str, str] | None = None,
    mcp_headers: dict[str, str] | None = None,
    bypass_acl: bool = False,
    additional_context: str | None = None,
    slack_context: SlackContext | None = None,
    external_state_container: ChatStateContainer | None = None,
) -> AnswerStream:
    """Private implementation for single-model and multi-model chat turn streaming.

    Builds the turn context via ``build_chat_turn``, then streams packets from
    ``_run_models`` back to the caller. Handles setup errors, LLM errors, and
    cancellation uniformly, saving whatever partial state has been accumulated
    before re-raising or yielding a terminal error packet.

    Not called directly — use the public wrappers:
    - ``handle_stream_message_objects`` for single-model (N=1) requests.
    - ``handle_multi_model_stream`` for side-by-side multi-model comparison (N>1).

    Args:
        new_msg_req: The incoming chat request from the user.
        user: Authenticated user; may be anonymous for public personas.
        db_session: Database session for this request.
        llm_overrides: ``None`` → single-model (persona default LLM).
            Non-empty list → multi-model (one LLM per override, 2–3 items).
        litellm_additional_headers: Extra headers forwarded to the LLM provider.
        custom_tool_additional_headers: Extra headers for custom tool HTTP calls.
        mcp_headers: Extra headers for MCP tool calls.
        bypass_acl: If ``True``, document ACL checks are skipped (used by Slack bot).
        additional_context: Extra context prepended to the LLM's chat history, not
            stored in the DB (used for Slack thread hydration).
        slack_context: Federated Slack search context passed through to the search tool.
        external_state_container: Optional pre-constructed state container. When
            provided, accumulated state (tool calls, citations, answer tokens) is
            written into it so the caller can inspect the result after streaming.

    Returns:
        Generator yielding ``Packet`` objects — answer tokens, tool output, citations —
        followed by a terminal ``Packet`` containing ``OverallStop``.
    """
    if new_msg_req.mock_llm_response is not None and not INTEGRATION_TESTS_MODE:
        raise ValueError(
            "mock_llm_response can only be used when INTEGRATION_TESTS_MODE=true"
        )

    mock_response_token: Token[str | None] | None = None
    setup: ChatTurnSetup | None = None

    try:
        setup = yield from build_chat_turn(
            new_msg_req=new_msg_req,
            user=user,
            db_session=db_session,
            llm_overrides=llm_overrides,
            litellm_additional_headers=litellm_additional_headers,
            custom_tool_additional_headers=custom_tool_additional_headers,
            mcp_headers=mcp_headers,
            bypass_acl=bypass_acl,
            slack_context=slack_context,
            additional_context=additional_context,
        )

        # Set mock response token right before the LLM stream begins so that
        # run_in_background threads inherit the correct context.
        if new_msg_req.mock_llm_response is not None:
            mock_response_token = set_llm_mock_response(new_msg_req.mock_llm_response)

        yield from _run_models(
            setup=setup,
            user=user,
            db_session=db_session,
            external_state_container=external_state_container,
        )

    except OnyxError as e:
        if e.error_code is not OnyxErrorCode.QUERY_REJECTED:
            log_onyx_error(e)
        yield StreamingError(
            error=e.detail,
            error_code=e.error_code.code,
            is_retryable=e.status_code >= 500,
        )
        db_session.rollback()
        return

    except ValueError as e:
        logger.exception("Failed to process chat message.")
        yield StreamingError(
            error=str(e),
            error_code="VALIDATION_ERROR",
            is_retryable=True,
        )
        db_session.rollback()
        return

    except EmptyLLMResponseError as e:
        stack_trace = traceback.format_exc()
        logger.warning(
            f"LLM returned an empty response (provider={e.provider}, model={e.model}, tool_choice={e.tool_choice})"
        )
        yield StreamingError(
            error=e.client_error_msg,
            stack_trace=stack_trace,
            error_code=e.error_code,
            is_retryable=e.is_retryable,
            details={
                "model": e.model,
                "provider": e.provider,
                "tool_choice": e.tool_choice.value,
            },
        )
        db_session.rollback()

    except Exception as e:
        logger.exception(f"Failed to process chat message due to {e}")
        stack_trace = traceback.format_exc()

        llm = setup.llms[0] if setup else None
        if llm:
            client_error_msg, error_code, is_retryable = litellm_exception_to_error_msg(
                e, llm
            )
            if llm.config.api_key and len(llm.config.api_key) > 2:
                client_error_msg = client_error_msg.replace(
                    llm.config.api_key, "[REDACTED_API_KEY]"
                )
                stack_trace = stack_trace.replace(
                    llm.config.api_key, "[REDACTED_API_KEY]"
                )
            yield StreamingError(
                error=client_error_msg,
                stack_trace=stack_trace,
                error_code=error_code,
                is_retryable=is_retryable,
                details={
                    "model": llm.config.model_name,
                    "provider": llm.config.model_provider,
                },
            )
        else:
            yield StreamingError(
                error="Failed to initialize the chat. Please check your configuration and try again.",
                stack_trace=stack_trace,
                error_code="INIT_FAILED",
                is_retryable=True,
            )
        db_session.rollback()

    finally:
        if mock_response_token is not None:
            reset_llm_mock_response(mock_response_token)
        try:
            if setup is not None:
                set_processing_status(
                    chat_session_id=setup.chat_session.id,
                    cache=setup.cache,
                    value=False,
                )
        except Exception:
            logger.exception("Error in setting processing status")


def handle_stream_message_objects(
    new_msg_req: SendMessageRequest,
    user: User,
    db_session: Session,
    litellm_additional_headers: dict[str, str] | None = None,
    custom_tool_additional_headers: dict[str, str] | None = None,
    mcp_headers: dict[str, str] | None = None,
    bypass_acl: bool = False,
    additional_context: str | None = None,
    slack_context: SlackContext | None = None,
    external_state_container: ChatStateContainer | None = None,
) -> AnswerStream:
    """Single-model streaming entrypoint. For multi-model comparison, use ``handle_multi_model_stream``."""
    yield from _stream_chat_turn(
        new_msg_req=new_msg_req,
        user=user,
        db_session=db_session,
        llm_overrides=None,
        litellm_additional_headers=litellm_additional_headers,
        custom_tool_additional_headers=custom_tool_additional_headers,
        mcp_headers=mcp_headers,
        bypass_acl=bypass_acl,
        additional_context=additional_context,
        slack_context=slack_context,
        external_state_container=external_state_container,
    )


def _build_model_display_name(override: LLMOverride | None) -> str:
    """Build a human-readable display name from an LLM override."""
    if override is None:
        return "unknown"
    return override.display_name or override.model_version or "unknown"


def handle_multi_model_stream(
    new_msg_req: SendMessageRequest,
    user: User,
    db_session: Session,
    llm_overrides: list[LLMOverride],
    litellm_additional_headers: dict[str, str] | None = None,
    custom_tool_additional_headers: dict[str, str] | None = None,
    mcp_headers: dict[str, str] | None = None,
) -> AnswerStream:
    """Thin wrapper for side-by-side multi-model comparison (2–3 models).

    Validates the override list and delegates to ``_stream_chat_turn``,
    which handles both single-model and multi-model execution via the same path.

    Args:
        new_msg_req: The incoming chat request. ``deep_research`` must be ``False``.
        user: Authenticated user making the request.
        db_session: Database session for this request.
        llm_overrides: Exactly 2 or 3 ``LLMOverride`` objects — one per model to run.
        litellm_additional_headers: Extra headers forwarded to each LLM provider.
        custom_tool_additional_headers: Extra headers for custom tool HTTP calls.
        mcp_headers: Extra headers for MCP tool calls.

    Returns:
        Generator yielding interleaved ``Packet`` objects from all models, each tagged
        with ``model_index`` in its placement.
    """
    n_models = len(llm_overrides)
    if n_models < 2 or n_models > 3:
        yield StreamingError(
            error=f"Multi-model requires 2-3 overrides, got {n_models}",
            error_code="VALIDATION_ERROR",
            is_retryable=False,
        )
        return
    if new_msg_req.deep_research:
        yield StreamingError(
            error="Multi-model is not supported with deep research",
            error_code="VALIDATION_ERROR",
            is_retryable=False,
        )
        return
    yield from _stream_chat_turn(
        new_msg_req=new_msg_req,
        user=user,
        db_session=db_session,
        llm_overrides=llm_overrides,
        litellm_additional_headers=litellm_additional_headers,
        custom_tool_additional_headers=custom_tool_additional_headers,
        mcp_headers=mcp_headers,
    )


def llm_loop_completion_handle(
    state_container: ChatStateContainer,
    is_connected: Callable[[], bool],
    db_session: Session,
    assistant_message: ChatMessage,
    llm: LLM,
    reserved_tokens: int,
) -> None:
    chat_session_id = assistant_message.chat_session_id

    # Snapshot all state under the container's lock before any DB write.
    # Worker threads may still be running (e.g. user-cancellation path), so
    # direct attribute access is not thread-safe — use the provided getters.
    answer_tokens = state_container.get_answer_tokens()
    reasoning_tokens = state_container.get_reasoning_tokens()
    citation_to_doc = state_container.get_citation_to_doc()
    tool_calls = state_container.get_tool_calls()
    is_clarification = state_container.get_is_clarification()
    all_search_docs = state_container.get_all_search_docs()
    emitted_citations = state_container.get_emitted_citations()
    pre_answer_processing_time = state_container.get_pre_answer_processing_time()

    completed_normally = is_connected()
    if completed_normally:
        if answer_tokens is None:
            raise RuntimeError(
                "LLM run completed normally but did not return an answer."
            )
        final_answer = answer_tokens
    else:
        # Stopped by user - append stop message
        logger.debug(f"Chat session {chat_session_id} stopped by user")
        if answer_tokens:
            final_answer = (
                answer_tokens + " ... \n\nGeneration was stopped by the user."
            )
        else:
            final_answer = "The generation was stopped by the user."

    save_chat_turn(
        message_text=final_answer,
        reasoning_tokens=reasoning_tokens,
        citation_to_doc=citation_to_doc,
        tool_calls=tool_calls,
        all_search_docs=all_search_docs,
        db_session=db_session,
        assistant_message=assistant_message,
        is_clarification=is_clarification,
        emitted_citations=emitted_citations,
        pre_answer_processing_time=pre_answer_processing_time,
    )

    # Check if compression is needed after saving the message
    updated_chat_history = create_chat_history_chain(
        chat_session_id=chat_session_id,
        db_session=db_session,
    )
    total_tokens = calculate_total_history_tokens(updated_chat_history)

    compression_params = get_compression_params(
        max_input_tokens=llm.config.max_input_tokens,
        current_history_tokens=total_tokens,
        reserved_tokens=reserved_tokens,
    )
    if compression_params.should_compress:
        # Build tool mapping for formatting messages
        all_tools = get_tools(db_session)
        tool_id_to_name = {tool.id: tool.name for tool in all_tools}

        compress_chat_history(
            db_session=db_session,
            chat_history=updated_chat_history,
            llm=llm,
            compression_params=compression_params,
            tool_id_to_name=tool_id_to_name,
        )


_CITATION_LINK_START_PATTERN = re.compile(r"\s*\[\[\d+\]\]\(")


def _find_markdown_link_end(text: str, destination_start: int) -> int | None:
    depth = 0
    i = destination_start

    while i < len(text):
        curr = text[i]
        if curr == "\\":
            i += 2
            continue

        if curr == "(":
            depth += 1
        elif curr == ")":
            if depth == 0:
                return i
            depth -= 1

        i += 1

    return None


def remove_answer_citations(answer: str) -> str:
    stripped_parts: list[str] = []
    cursor = 0

    while match := _CITATION_LINK_START_PATTERN.search(answer, cursor):
        stripped_parts.append(answer[cursor : match.start()])
        link_end = _find_markdown_link_end(answer, match.end())
        if link_end is None:
            stripped_parts.append(answer[match.start() :])
            return "".join(stripped_parts)

        cursor = link_end + 1

    stripped_parts.append(answer[cursor:])
    return "".join(stripped_parts)


@log_function_time()
def gather_stream(
    packets: AnswerStream,
) -> ChatBasicResponse:
    answer: str | None = None
    citations: list[CitationInfo] = []
    error_msg: str | None = None
    message_id: int | None = None
    top_documents: list[SearchDoc] = []

    for packet in packets:
        if isinstance(packet, Packet):
            # Handle the different packet object types
            if isinstance(packet.obj, AgentResponseStart):
                # AgentResponseStart contains the final documents
                if packet.obj.final_documents:
                    top_documents = packet.obj.final_documents
            elif isinstance(packet.obj, AgentResponseDelta):
                # AgentResponseDelta contains incremental content updates
                if answer is None:
                    answer = ""
                if packet.obj.content:
                    answer += packet.obj.content
            elif isinstance(packet.obj, CitationInfo):
                # CitationInfo contains citation information
                citations.append(packet.obj)
        elif isinstance(packet, StreamingError):
            error_msg = packet.error
        elif isinstance(packet, MessageResponseIDInfo):
            message_id = packet.reserved_assistant_message_id

    if message_id is None:
        raise ValueError("Message ID is required")

    if answer is None:
        if error_msg is not None:
            answer = ""
        else:
            # This should never be the case as these non-streamed flows do not have a stop-generation signal
            raise RuntimeError("Answer was not generated")

    return ChatBasicResponse(
        answer=answer,
        answer_citationless=remove_answer_citations(answer),
        citation_info=citations,
        message_id=message_id,
        error_msg=error_msg,
        top_documents=top_documents,
    )


@log_function_time()
def gather_stream_full(
    packets: AnswerStream,
    state_container: ChatStateContainer,
) -> ChatFullResponse:
    """
    Aggregate streaming packets and state container into a complete ChatFullResponse.

    This function consumes all packets from the stream and combines them with
    the accumulated state from the ChatStateContainer to build a complete response
    including answer, reasoning, citations, and tool calls.

    Args:
        packets: The stream of packets from handle_stream_message_objects
        state_container: The state container that accumulates tool calls, reasoning, etc.

    Returns:
        ChatFullResponse with all available data
    """
    answer: str | None = None
    citations: list[CitationInfo] = []
    error_msg: str | None = None
    message_id: int | None = None
    top_documents: list[SearchDoc] = []
    chat_session_id: UUID | None = None

    for packet in packets:
        if isinstance(packet, Packet):
            if isinstance(packet.obj, AgentResponseStart):
                if packet.obj.final_documents:
                    top_documents = packet.obj.final_documents
            elif isinstance(packet.obj, AgentResponseDelta):
                if answer is None:
                    answer = ""
                if packet.obj.content:
                    answer += packet.obj.content
            elif isinstance(packet.obj, CitationInfo):
                citations.append(packet.obj)
        elif isinstance(packet, StreamingError):
            error_msg = packet.error
        elif isinstance(packet, MessageResponseIDInfo):
            message_id = packet.reserved_assistant_message_id
        elif isinstance(packet, CreateChatSessionID):
            chat_session_id = packet.chat_session_id

    if message_id is None:
        raise ValueError("Message ID is required")

    # Use state_container for complete answer (handles edge cases gracefully)
    final_answer = state_container.get_answer_tokens() or answer or ""

    # Get reasoning from state container (None when model doesn't produce reasoning)
    reasoning = state_container.get_reasoning_tokens()

    # Convert ToolCallInfo list to ToolCallResponse list
    tool_call_responses = [
        ToolCallResponse(
            tool_name=tc.tool_name,
            tool_arguments=tc.tool_call_arguments,
            tool_result=tc.tool_call_response,
            search_docs=tc.search_docs,
            generated_images=tc.generated_images,
            pre_reasoning=tc.reasoning_tokens,
        )
        for tc in state_container.get_tool_calls()
    ]

    return ChatFullResponse(
        answer=final_answer,
        answer_citationless=remove_answer_citations(final_answer),
        pre_answer_reasoning=reasoning,
        tool_calls=tool_call_responses,
        top_documents=top_documents,
        citation_info=citations,
        message_id=message_id,
        chat_session_id=chat_session_id,
        error_msg=error_msg,
    )


================================================
FILE: backend/onyx/chat/prompt_utils.py
================================================
from collections.abc import Callable
from collections.abc import Sequence
from uuid import UUID

from sqlalchemy.orm import Session

from onyx.db.memory import UserMemoryContext
from onyx.db.persona import get_default_behavior_persona
from onyx.db.user_file import calculate_user_files_token_count
from onyx.file_store.models import FileDescriptor
from onyx.prompts.chat_prompts import CITATION_REMINDER
from onyx.prompts.chat_prompts import DEFAULT_SYSTEM_PROMPT
from onyx.prompts.chat_prompts import FILE_REMINDER
from onyx.prompts.chat_prompts import LAST_CYCLE_CITATION_REMINDER
from onyx.prompts.chat_prompts import REQUIRE_CITATION_GUIDANCE
from onyx.prompts.prompt_utils import get_company_context
from onyx.prompts.prompt_utils import handle_onyx_date_awareness
from onyx.prompts.prompt_utils import replace_citation_guidance_tag
from onyx.prompts.prompt_utils import replace_reminder_tag
from onyx.prompts.tool_prompts import GENERATE_IMAGE_GUIDANCE
from onyx.prompts.tool_prompts import INTERNAL_SEARCH_GUIDANCE
from onyx.prompts.tool_prompts import MEMORY_GUIDANCE
from onyx.prompts.tool_prompts import OPEN_URLS_GUIDANCE
from onyx.prompts.tool_prompts import PYTHON_TOOL_GUIDANCE
from onyx.prompts.tool_prompts import TOOL_DESCRIPTION_SEARCH_GUIDANCE
from onyx.prompts.tool_prompts import TOOL_SECTION_HEADER
from onyx.prompts.tool_prompts import WEB_SEARCH_GUIDANCE
from onyx.prompts.tool_prompts import WEB_SEARCH_SITE_DISABLED_GUIDANCE
from onyx.prompts.user_info import BASIC_INFORMATION_PROMPT
from onyx.prompts.user_info import TEAM_INFORMATION_PROMPT
from onyx.prompts.user_info import USER_INFORMATION_HEADER
from onyx.prompts.user_info import USER_MEMORIES_PROMPT
from onyx.prompts.user_info import USER_PREFERENCES_PROMPT
from onyx.prompts.user_info import USER_ROLE_PROMPT
from onyx.tools.interface import Tool
from onyx.tools.tool_implementations.images.image_generation_tool import (
    ImageGenerationTool,
)
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.python.python_tool import PythonTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.utils.timing import log_function_time


def get_default_base_system_prompt(db_session: Session) -> str:
    default_persona = get_default_behavior_persona(db_session)
    return (
        default_persona.system_prompt
        if default_persona and default_persona.system_prompt is not None
        else DEFAULT_SYSTEM_PROMPT
    )


@log_function_time(print_only=True)
def calculate_reserved_tokens(
    db_session: Session,
    persona_system_prompt: str,
    token_counter: Callable[[str], int],
    files: list[FileDescriptor] | None = None,
    user_memory_context: UserMemoryContext | None = None,
) -> int:
    """
    Calculate reserved token count for system prompt and user files.

    This is used for token estimation purposes to reserve space for:
    - The system prompt (base + custom agent prompt + all guidance)
    - User files attached to the message

    Args:
        db_session: Database session
        persona_system_prompt: Custom agent system prompt (can be empty string)
        token_counter: Function that counts tokens in text
        files: List of file descriptors from the chat message (optional)
        user_memory_context: User memory context (optional)

    Returns:
        Total reserved token count
    """
    base_system_prompt = get_default_base_system_prompt(db_session)

    # This is for token estimation purposes
    fake_system_prompt = build_system_prompt(
        base_system_prompt=base_system_prompt,
        datetime_aware=True,
        user_memory_context=user_memory_context,
        tools=None,
        should_cite_documents=True,
        include_all_guidance=True,
    )

    custom_agent_prompt = persona_system_prompt if persona_system_prompt else ""

    reserved_token_count = token_counter(
        # Annoying that the dict has no attributes now
        custom_agent_prompt
        + " "
        + fake_system_prompt
    )

    # Calculate total token count for files in the last message
    file_token_count = 0
    if files:
        # Extract user_file_id from each file descriptor
        user_file_ids: list[UUID] = []
        for file in files:
            uid = file.get("user_file_id")
            if not uid:
                continue
            try:
                user_file_ids.append(UUID(uid))
            except (TypeError, ValueError, AttributeError):
                # Skip invalid user_file_id values
                continue
        if user_file_ids:
            file_token_count = calculate_user_files_token_count(
                user_file_ids, db_session
            )

    reserved_token_count += file_token_count

    return reserved_token_count


def build_reminder_message(
    reminder_text: str | None,
    include_citation_reminder: bool,
    include_file_reminder: bool,
    is_last_cycle: bool,
) -> str | None:
    reminder = reminder_text.strip() if reminder_text else ""
    if is_last_cycle:
        reminder += "\n\n" + LAST_CYCLE_CITATION_REMINDER
    if include_citation_reminder:
        reminder += "\n\n" + CITATION_REMINDER
    if include_file_reminder:
        reminder += "\n\n" + FILE_REMINDER
    reminder = reminder.strip()
    return reminder if reminder else None


def _build_user_information_section(
    user_memory_context: UserMemoryContext | None,
    company_context: str | None,
) -> str:
    """Build the complete '# User Information' section with all sub-sections
    in the correct order: Basic Info → Team Info → Preferences → Memories."""
    sections: list[str] = []

    if user_memory_context:
        ctx = user_memory_context
        has_basic_info = ctx.user_info.name or ctx.user_info.email or ctx.user_info.role

        if has_basic_info:
            role_line = (
                USER_ROLE_PROMPT.format(user_role=ctx.user_info.role).strip()
                if ctx.user_info.role
                else ""
            )
            if role_line:
                role_line = "\n" + role_line
            sections.append(
                BASIC_INFORMATION_PROMPT.format(
                    user_name=ctx.user_info.name or "",
                    user_email=ctx.user_info.email or "",
                    user_role=role_line,
                )
            )

    if company_context:
        sections.append(
            TEAM_INFORMATION_PROMPT.format(team_information=company_context.strip())
        )

    if user_memory_context:
        ctx = user_memory_context

        if ctx.user_preferences:
            sections.append(
                USER_PREFERENCES_PROMPT.format(user_preferences=ctx.user_preferences)
            )

        if ctx.memories:
            formatted_memories = "\n".join(f"- {memory}" for memory in ctx.memories)
            sections.append(
                USER_MEMORIES_PROMPT.format(user_memories=formatted_memories)
            )

    if not sections:
        return ""

    return USER_INFORMATION_HEADER + "\n".join(sections)


def build_system_prompt(
    base_system_prompt: str,
    datetime_aware: bool = False,
    user_memory_context: UserMemoryContext | None = None,
    tools: Sequence[Tool] | None = None,
    should_cite_documents: bool = False,
    include_all_guidance: bool = False,
) -> str:
    """Should only be called with the default behavior system prompt.
    If the user has replaced the default behavior prompt with their custom agent prompt, do not call this function.
    """
    system_prompt = handle_onyx_date_awareness(base_system_prompt, datetime_aware)

    # Replace citation guidance placeholder if present
    system_prompt, should_append_citation_guidance = replace_citation_guidance_tag(
        system_prompt,
        should_cite_documents=should_cite_documents,
        include_all_guidance=include_all_guidance,
    )

    # Replace reminder tag placeholder if present
    system_prompt = replace_reminder_tag(system_prompt)

    company_context = get_company_context()
    user_info_section = _build_user_information_section(
        user_memory_context, company_context
    )
    system_prompt += user_info_section

    # Append citation guidance after company context if placeholder was not present
    # This maintains backward compatibility and ensures citations are always enforced when needed
    if should_append_citation_guidance:
        system_prompt += REQUIRE_CITATION_GUIDANCE

    if include_all_guidance:
        tool_sections = [
            TOOL_DESCRIPTION_SEARCH_GUIDANCE,
            INTERNAL_SEARCH_GUIDANCE,
            WEB_SEARCH_GUIDANCE.format(
                site_colon_disabled=WEB_SEARCH_SITE_DISABLED_GUIDANCE
            ),
            OPEN_URLS_GUIDANCE,
            PYTHON_TOOL_GUIDANCE,
            GENERATE_IMAGE_GUIDANCE,
            MEMORY_GUIDANCE,
        ]
        system_prompt += TOOL_SECTION_HEADER + "\n".join(tool_sections)
        return system_prompt

    if tools:
        has_web_search = any(isinstance(tool, WebSearchTool) for tool in tools)
        has_internal_search = any(isinstance(tool, SearchTool) for tool in tools)
        has_open_urls = any(isinstance(tool, OpenURLTool) for tool in tools)
        has_python = any(isinstance(tool, PythonTool) for tool in tools)
        has_generate_image = any(
            isinstance(tool, ImageGenerationTool) for tool in tools
        )
        has_memory = any(isinstance(tool, MemoryTool) for tool in tools)

        tool_guidance_sections: list[str] = []

        if has_web_search or has_internal_search or include_all_guidance:
            tool_guidance_sections.append(TOOL_DESCRIPTION_SEARCH_GUIDANCE)

        # These are not included at the Tool level because the ordering may matter.
        if has_internal_search or include_all_guidance:
            tool_guidance_sections.append(INTERNAL_SEARCH_GUIDANCE)

        if has_web_search or include_all_guidance:
            site_disabled_guidance = ""
            if has_web_search:
                web_search_tool = next(
                    (t for t in tools if isinstance(t, WebSearchTool)), None
                )
                if web_search_tool and not web_search_tool.supports_site_filter:
                    site_disabled_guidance = WEB_SEARCH_SITE_DISABLED_GUIDANCE
            tool_guidance_sections.append(
                WEB_SEARCH_GUIDANCE.format(site_colon_disabled=site_disabled_guidance)
            )

        if has_open_urls or include_all_guidance:
            tool_guidance_sections.append(OPEN_URLS_GUIDANCE)

        if has_python or include_all_guidance:
            tool_guidance_sections.append(PYTHON_TOOL_GUIDANCE)

        if has_generate_image or include_all_guidance:
            tool_guidance_sections.append(GENERATE_IMAGE_GUIDANCE)

        if has_memory or include_all_guidance:
            tool_guidance_sections.append(MEMORY_GUIDANCE)

        if tool_guidance_sections:
            system_prompt += TOOL_SECTION_HEADER + "\n".join(tool_guidance_sections)

    return system_prompt


================================================
FILE: backend/onyx/chat/save_chat.py
================================================
import json
import mimetypes

from sqlalchemy.orm import Session

from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_state import SearchDocKey
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import SearchDoc
from onyx.db.chat import add_search_docs_to_chat_message
from onyx.db.chat import add_search_docs_to_tool_call
from onyx.db.chat import create_db_search_doc
from onyx.db.models import ChatMessage
from onyx.db.models import ToolCall
from onyx.db.tools import create_tool_call_no_commit
from onyx.file_store.models import FileDescriptor
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
from onyx.tools.models import ToolCallInfo
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_string

logger = setup_logger()


def _extract_referenced_file_descriptors(
    tool_calls: list[ToolCallInfo],
    message_text: str,
) -> list[FileDescriptor]:
    """Extract FileDescriptors for code interpreter files referenced in the message text."""
    descriptors: list[FileDescriptor] = []
    for tool_call_info in tool_calls:
        if not tool_call_info.generated_files:
            continue
        for gen_file in tool_call_info.generated_files:
            file_id = (
                gen_file.file_link.rsplit("/", 1)[-1] if gen_file.file_link else ""
            )
            if file_id and file_id in message_text:
                mime_type, _ = mimetypes.guess_type(gen_file.filename)
                descriptors.append(
                    FileDescriptor(
                        id=file_id,
                        type=mime_type_to_chat_file_type(mime_type),
                        name=gen_file.filename,
                    )
                )
    return descriptors


def _create_and_link_tool_calls(
    tool_calls: list[ToolCallInfo],
    assistant_message: ChatMessage,
    db_session: Session,
    default_tokenizer: BaseTokenizer,
    tool_call_to_search_doc_ids: dict[str, list[int]],
) -> None:
    """
    Create ToolCall entries and link parent references and SearchDocs.

    This function handles the logic of:
    1. Creating all ToolCall objects (with temporary parent references)
    2. Flushing to get DB IDs
    3. Building mappings and updating parent references
    4. Linking SearchDocs to ToolCalls


    Args:
        tool_calls: List of tool call information to create
        assistant_message: The ChatMessage these tool calls belong to
        db_session: Database session
        default_tokenizer: Tokenizer for calculating token counts
        tool_call_to_search_doc_ids: Mapping from tool_call_id to list of search_doc IDs
    """
    # Create all ToolCall objects first (without parent_tool_call_id set)
    # We'll update parent references after flushing to get IDs
    tool_call_objects: list[ToolCall] = []
    tool_call_info_map: dict[str, ToolCallInfo] = {}

    for tool_call_info in tool_calls:
        tool_call_info_map[tool_call_info.tool_call_id] = tool_call_info

        # Calculate tool_call_tokens from arguments
        try:
            arguments_json_str = json.dumps(tool_call_info.tool_call_arguments)
            tool_call_tokens = len(default_tokenizer.encode(arguments_json_str))
        except Exception as e:
            logger.warning(
                f"Failed to tokenize tool call arguments for {tool_call_info.tool_call_id}: {e}. Using length as (over) estimate."
            )
            arguments_json_str = json.dumps(tool_call_info.tool_call_arguments)
            tool_call_tokens = len(arguments_json_str)

        parent_message_id = (
            assistant_message.id if tool_call_info.parent_tool_call_id is None else None
        )

        # Create ToolCall DB entry (parent_tool_call_id will be set after flush)
        # This is needed to get the IDs for the parent pointers
        tool_call = create_tool_call_no_commit(
            chat_session_id=assistant_message.chat_session_id,
            parent_chat_message_id=parent_message_id,
            turn_number=tool_call_info.turn_index,
            tool_id=tool_call_info.tool_id,
            tool_call_id=tool_call_info.tool_call_id,
            tool_call_arguments=tool_call_info.tool_call_arguments,
            tool_call_response=tool_call_info.tool_call_response,
            tool_call_tokens=tool_call_tokens,
            db_session=db_session,
            parent_tool_call_id=None,  # Will be updated after flush
            reasoning_tokens=tool_call_info.reasoning_tokens,
            generated_images=(
                [img.model_dump() for img in tool_call_info.generated_images]
                if tool_call_info.generated_images
                else None
            ),
            tab_index=tool_call_info.tab_index,
            add_only=True,
        )

        # Flush to get all of the IDs
        db_session.flush()

        tool_call_objects.append(tool_call)

    # Build mapping of tool calls (tool_call_id string -> DB id int)
    tool_call_map: dict[str, int] = {}
    for tool_call_obj in tool_call_objects:
        tool_call_map[tool_call_obj.tool_call_id] = tool_call_obj.id

    # Update parent_tool_call_id for all tool calls
    # Filter out orphaned children (whose parents don't exist) - this can happen
    # when generation is stopped mid-execution and parent tool calls were cancelled
    valid_tool_calls: list[ToolCall] = []
    for tool_call_obj in tool_call_objects:
        tool_call_info = tool_call_info_map[tool_call_obj.tool_call_id]
        if tool_call_info.parent_tool_call_id is not None:
            parent_id = tool_call_map.get(tool_call_info.parent_tool_call_id)
            if parent_id is not None:
                tool_call_obj.parent_tool_call_id = parent_id
                valid_tool_calls.append(tool_call_obj)
            else:
                # Parent doesn't exist (likely cancelled) - skip this orphaned child
                logger.warning(
                    f"Skipping tool call '{tool_call_obj.tool_call_id}' with missing parent "
                    f"'{tool_call_info.parent_tool_call_id}' (likely cancelled during execution)"
                )
                # Remove from DB session to prevent saving
                db_session.delete(tool_call_obj)
        else:
            # Top-level tool call (no parent)
            valid_tool_calls.append(tool_call_obj)

    # Link SearchDocs only to valid ToolCalls
    for tool_call_obj in valid_tool_calls:
        search_doc_ids = tool_call_to_search_doc_ids.get(tool_call_obj.tool_call_id, [])
        if search_doc_ids:
            add_search_docs_to_tool_call(
                tool_call_id=tool_call_obj.id,
                search_doc_ids=search_doc_ids,
                db_session=db_session,
            )


def save_chat_turn(
    message_text: str,
    reasoning_tokens: str | None,
    tool_calls: list[ToolCallInfo],
    citation_to_doc: dict[int, SearchDoc],
    all_search_docs: dict[SearchDocKey, SearchDoc],
    db_session: Session,
    assistant_message: ChatMessage,
    is_clarification: bool = False,
    emitted_citations: set[int] | None = None,
    pre_answer_processing_time: float | None = None,
) -> None:
    """
    Save a chat turn by populating the assistant_message and creating related entities.

    This function:
    1. Updates the ChatMessage with text, reasoning tokens, and token count
    2. Creates DB SearchDoc entries from pre-deduplicated all_search_docs
    3. Builds tool_call -> search_doc mapping for displayed docs
    4. Builds citation mapping from citation_to_doc
    5. Links all unique SearchDocs to the ChatMessage
    6. Creates ToolCall entries and links SearchDocs to them
    7. Builds the citations mapping for the ChatMessage

    Args:
        message_text: The message content to save
        reasoning_tokens: Optional reasoning tokens for the message
        tool_calls: List of tool call information to create ToolCall entries (may include search_docs)
        citation_to_doc: Mapping from citation number to SearchDoc for building citations
        all_search_docs: Pre-deduplicated search docs from ChatStateContainer
        db_session: Database session for persistence
        assistant_message: The ChatMessage object to populate (should already exist in DB)
        is_clarification: Whether this assistant message is a clarification question (deep research flow)
        emitted_citations: Set of citation numbers that were actually emitted during streaming.
            If provided, only citations in this set will be saved; others are filtered out.
        pre_answer_processing_time: Duration of processing before answer starts (in seconds)
    """
    # 1. Update ChatMessage with message content, reasoning tokens, and token count
    sanitized_message_text = (
        sanitize_string(message_text) if message_text else message_text
    )
    assistant_message.message = sanitized_message_text
    assistant_message.reasoning_tokens = (
        sanitize_string(reasoning_tokens) if reasoning_tokens else reasoning_tokens
    )
    assistant_message.is_clarification = is_clarification

    # Use pre-answer processing time (captured when MESSAGE_START was emitted)
    if pre_answer_processing_time is not None:
        assistant_message.processing_duration_seconds = pre_answer_processing_time

    # Calculate token count using default tokenizer, when storing, this should not use the LLM
    # specific one so we use a system default tokenizer here.
    default_tokenizer = get_tokenizer(None, None)
    if sanitized_message_text:
        assistant_message.token_count = len(
            default_tokenizer.encode(sanitized_message_text)
        )
    else:
        assistant_message.token_count = 0

    # 2. Create DB SearchDoc entries from pre-deduplicated all_search_docs
    search_doc_key_to_id: dict[SearchDocKey, int] = {}
    for key, search_doc_py in all_search_docs.items():
        db_search_doc = create_db_search_doc(
            server_search_doc=search_doc_py,
            db_session=db_session,
            commit=False,
        )
        search_doc_key_to_id[key] = db_search_doc.id

    # 3. Build tool_call -> search_doc mapping (for displayed docs in each tool call)
    tool_call_to_search_doc_ids: dict[str, list[int]] = {}
    for tool_call_info in tool_calls:
        if tool_call_info.search_docs:
            search_doc_ids_for_tool: list[int] = []
            for search_doc_py in tool_call_info.search_docs:
                key = ChatStateContainer.create_search_doc_key(search_doc_py)
                if key in search_doc_key_to_id:
                    search_doc_ids_for_tool.append(search_doc_key_to_id[key])
                else:
                    # Displayed doc not in all_search_docs - create it
                    # This can happen if displayed_docs contains docs not in search_docs
                    db_search_doc = create_db_search_doc(
                        server_search_doc=search_doc_py,
                        db_session=db_session,
                        commit=False,
                    )
                    search_doc_key_to_id[key] = db_search_doc.id
                    search_doc_ids_for_tool.append(db_search_doc.id)
            tool_call_to_search_doc_ids[tool_call_info.tool_call_id] = list(
                set(search_doc_ids_for_tool)
            )

    # Collect all search doc IDs for ChatMessage linking
    all_search_doc_ids_set: set[int] = set(search_doc_key_to_id.values())

    # 4. Build a citation mapping from the citation number to the saved DB SearchDoc ID
    # Only include citations that were actually emitted during streaming
    citation_number_to_search_doc_id: dict[int, int] = {}

    for citation_num, search_doc_py in citation_to_doc.items():
        # Skip citations that weren't actually emitted (if emitted_citations is provided)
        if emitted_citations is not None and citation_num not in emitted_citations:
            continue

        # Create the unique key for this SearchDoc version
        search_doc_key = ChatStateContainer.create_search_doc_key(search_doc_py)

        # Get the search doc ID (should already exist from processing tool_calls)
        if search_doc_key in search_doc_key_to_id:
            db_search_doc_id = search_doc_key_to_id[search_doc_key]
        else:
            # Citation doc not found in tool call search_docs
            # Expected case: Project files (source_type=FILE) are cited but don't come from tool calls
            # Unexpected case: Other citation-only docs (indicates a potential issue upstream)
            is_project_file = search_doc_py.source_type == DocumentSource.FILE

            if is_project_file:
                logger.info(
                    f"Project file citation {search_doc_py.document_id} not in tool calls, creating it"
                )
            else:
                logger.warning(
                    f"Citation doc {search_doc_py.document_id} not found in tool call search_docs, creating it"
                )

            # Create the SearchDoc in the database
            # NOTE: It's important that this maps to the saved DB Document ID, because
            # the match-highlights are specific to this saved version, not any document that has
            # the same document_id.
            db_search_doc = create_db_search_doc(
                server_search_doc=search_doc_py,
                db_session=db_session,
                commit=False,
            )
            db_search_doc_id = db_search_doc.id
            search_doc_key_to_id[search_doc_key] = db_search_doc_id

            # Link project files to ChatMessage to enable frontend preview
            if is_project_file:
                all_search_doc_ids_set.add(db_search_doc_id)

        # Build mapping from citation number to search doc ID
        citation_number_to_search_doc_id[citation_num] = db_search_doc_id

    # 5. Link all unique SearchDocs (from both tool calls and citations) to ChatMessage
    final_search_doc_ids: list[int] = list(all_search_doc_ids_set)
    if final_search_doc_ids:
        add_search_docs_to_chat_message(
            chat_message_id=assistant_message.id,
            search_doc_ids=final_search_doc_ids,
            db_session=db_session,
        )

    # 6. Create ToolCall entries and link SearchDocs to them
    _create_and_link_tool_calls(
        tool_calls=tool_calls,
        assistant_message=assistant_message,
        db_session=db_session,
        default_tokenizer=default_tokenizer,
        tool_call_to_search_doc_ids=tool_call_to_search_doc_ids,
    )

    # 7. Build citations mapping - use the mapping we already built in step 4
    assistant_message.citations = (
        citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
    )

    # 8. Attach code interpreter generated files that the assistant actually
    # referenced in its response, so they are available via load_all_chat_files
    # on subsequent turns. Files not mentioned are intermediate artifacts.
    if sanitized_message_text:
        referenced = _extract_referenced_file_descriptors(
            tool_calls, sanitized_message_text
        )
        if referenced:
            existing_files = assistant_message.files or []
            assistant_message.files = existing_files + referenced

    # Finally save the messages, tool calls, and docs
    db_session.commit()


================================================
FILE: backend/onyx/chat/stop_signal_checker.py
================================================
from uuid import UUID

from onyx.cache.interface import CacheBackend

PREFIX = "chatsessionstop"
FENCE_PREFIX = f"{PREFIX}_fence"
FENCE_TTL = 10 * 60  # 10 minutes


def _get_fence_key(chat_session_id: UUID) -> str:
    """Generate the cache key for a chat session stop signal fence.

    Args:
        chat_session_id: The UUID of the chat session

    Returns:
        The fence key string. Tenant isolation is handled automatically
        by the cache backend (Redis key-prefixing or Postgres schema routing).
    """
    return f"{FENCE_PREFIX}_{chat_session_id}"


def set_fence(chat_session_id: UUID, cache: CacheBackend, value: bool) -> None:
    """Set or clear the stop signal fence for a chat session.

    Args:
        chat_session_id: The UUID of the chat session
        cache: Tenant-aware cache backend
        value: True to set the fence (stop signal), False to clear it
    """
    fence_key = _get_fence_key(chat_session_id)
    if not value:
        cache.delete(fence_key)
        return
    cache.set(fence_key, 0, ex=FENCE_TTL)


def is_connected(chat_session_id: UUID, cache: CacheBackend) -> bool:
    """Check if the chat session should continue (not stopped).

    Args:
        chat_session_id: The UUID of the chat session to check
        cache: Tenant-aware cache backend

    Returns:
        True if the session should continue, False if it should stop
    """
    return not cache.exists(_get_fence_key(chat_session_id))


def reset_cancel_status(chat_session_id: UUID, cache: CacheBackend) -> None:
    """Clear the stop signal for a chat session.

    Args:
        chat_session_id: The UUID of the chat session
        cache: Tenant-aware cache backend
    """
    cache.delete(_get_fence_key(chat_session_id))


================================================
FILE: backend/onyx/chat/tool_call_args_streaming.py
================================================
from collections.abc import Generator
from collections.abc import Mapping
from typing import Any
from typing import Type

from onyx.llm.model_response import ChatCompletionDeltaToolCall
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ToolCallArgumentDelta
from onyx.tools.built_in_tools import TOOL_NAME_TO_CLASS
from onyx.tools.interface import Tool
from onyx.utils.jsonriver import Parser


def _get_tool_class(
    tool_calls_in_progress: Mapping[int, Mapping[str, Any]],
    tool_call_delta: ChatCompletionDeltaToolCall,
) -> Type[Tool] | None:
    """Look up the Tool subclass for a streaming tool call delta."""
    tool_name = tool_calls_in_progress.get(tool_call_delta.index, {}).get("name")
    if not tool_name:
        return None
    return TOOL_NAME_TO_CLASS.get(tool_name)


def maybe_emit_argument_delta(
    tool_calls_in_progress: Mapping[int, Mapping[str, Any]],
    tool_call_delta: ChatCompletionDeltaToolCall,
    placement: Placement,
    parsers: dict[int, Parser],
) -> Generator[Packet, None, None]:
    """Emit decoded tool-call argument deltas to the frontend.

    Uses a ``jsonriver.Parser`` per tool-call index to incrementally parse
    the JSON argument string and extract only the newly-appended content
    for each string-valued argument.

    NOTE: Non-string arguments (numbers, booleans, null, arrays, objects)
    are skipped — they are available in the final tool-call kickoff packet.

    ``parsers`` is a mutable dict keyed by tool-call index. A new
    ``Parser`` is created automatically for each new index.
    """
    tool_cls = _get_tool_class(tool_calls_in_progress, tool_call_delta)
    if not tool_cls or not tool_cls.should_emit_argument_deltas():
        return

    fn = tool_call_delta.function
    delta_fragment = fn.arguments if fn else None
    if not delta_fragment:
        return

    idx = tool_call_delta.index
    if idx not in parsers:
        parsers[idx] = Parser()
    parser = parsers[idx]

    deltas = parser.feed(delta_fragment)

    argument_deltas: dict[str, str] = {}
    for delta in deltas:
        if isinstance(delta, dict):
            for key, value in delta.items():
                if isinstance(value, str):
                    argument_deltas[key] = argument_deltas.get(key, "") + value

    if not argument_deltas:
        return

    tc_data = tool_calls_in_progress[tool_call_delta.index]
    yield Packet(
        placement=placement,
        obj=ToolCallArgumentDelta(
            tool_type=tc_data.get("name", ""),
            argument_deltas=argument_deltas,
        ),
    )


================================================
FILE: backend/onyx/configs/__init__.py
================================================


================================================
FILE: backend/onyx/configs/agent_configs.py
================================================
import os


AGENT_DEFAULT_RETRIEVAL_HITS = 15
AGENT_DEFAULT_RERANKING_HITS = 10
AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3
AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5

AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = 25
AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = 35


AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5
AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000

INITIAL_SEARCH_DECOMPOSITION_ENABLED = True

AGENT_DEFAULT_RETRIEVAL_HITS = 15
AGENT_DEFAULT_RERANKING_HITS = 10
AGENT_DEFAULT_MAX_VERIFIVATION_HITS = 30
AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3
AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5
AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000


AGENT_ALLOW_REFINEMENT = os.environ.get("AGENT_ALLOW_REFINEMENT", "").lower() == "true"

AGENT_ANSWER_GENERATION_BY_FAST_LLM = (
    os.environ.get("AGENT_ANSWER_GENERATION_BY_FAST_LLM", "").lower() == "true"
)

AGENT_RETRIEVAL_STATS = (
    not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
) or True  # default True


AGENT_MAX_VERIFICATION_HITS = int(
    os.environ.get("AGENT_MAX_VERIFICATION_HITS") or AGENT_DEFAULT_MAX_VERIFIVATION_HITS
)  # 30

AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
)  # 15

AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
)  # 15

# Reranking agent configs
# Reranking stats - no influence on flow outside of stats collection
AGENT_RERANKING_STATS = (
    not os.environ.get("AGENT_RERANKING_STATS") == "True"
) or False  # default False

AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
)  # 15

AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS = int(
    os.environ.get("AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS")
    or AGENT_DEFAULT_RERANKING_HITS
)  # 10

AGENT_NUM_DOCS_FOR_DECOMPOSITION = int(
    os.environ.get("AGENT_NUM_DOCS_FOR_DECOMPOSITION")
    or AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION
)  # 3

AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int(
    os.environ.get("AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION")
    or AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION
)  # 5

AGENT_EXPLORATORY_SEARCH_RESULTS = int(
    os.environ.get("AGENT_EXPLORATORY_SEARCH_RESULTS")
    or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS
)  # 5

AGENT_MIN_ORIG_QUESTION_DOCS = int(
    os.environ.get("AGENT_MIN_ORIG_QUESTION_DOCS")
    or AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS
)  # 3

AGENT_MAX_ANSWER_CONTEXT_DOCS = int(
    os.environ.get("AGENT_MAX_ANSWER_CONTEXT_DOCS")
    or AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS
)  # 8


AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
    os.environ.get("AGENT_MAX_STATIC_HISTORY_WORD_LENGTH")
    or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
)  # 2000

AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = int(
    os.environ.get("AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER")
    or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
)  # 25

AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = int(
    os.environ.get("AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER")
    or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
)  # 35


AGENT_RETRIEVAL_STATS = (
    not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
) or True  # default True


AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
)  # 15

AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
)  # 15

# Reranking agent configs
# Reranking stats - no influence on flow outside of stats collection
AGENT_RERANKING_STATS = (
    not os.environ.get("AGENT_RERANKING_STATS") == "True"
) or False  # default False

AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
)  # 15

AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS = int(
    os.environ.get("AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS")
    or AGENT_DEFAULT_RERANKING_HITS
)  # 10

AGENT_NUM_DOCS_FOR_DECOMPOSITION = int(
    os.environ.get("AGENT_NUM_DOCS_FOR_DECOMPOSITION")
    or AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION
)  # 3

AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int(
    os.environ.get("AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION")
    or AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION
)  # 5

AGENT_EXPLORATORY_SEARCH_RESULTS = int(
    os.environ.get("AGENT_EXPLORATORY_SEARCH_RESULTS")
    or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS
)  # 5

AGENT_MIN_ORIG_QUESTION_DOCS = int(
    os.environ.get("AGENT_MIN_ORIG_QUESTION_DOCS")
    or AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS
)  # 3

AGENT_MAX_ANSWER_CONTEXT_DOCS = int(
    os.environ.get("AGENT_MAX_ANSWER_CONTEXT_DOCS")
    or AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS
)  # 8


AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
    os.environ.get("AGENT_MAX_STATIC_HISTORY_WORD_LENGTH")
    or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
)  # 2000


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = 15  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION
)

AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = 45  # in seconds
AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION")
    or AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = 5  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
)

AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = 8  # in seconds
AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = 8  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION
)

AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION = 45  # in seconds
AGENT_TIMEOUT_LLM_GENERAL_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_GENERAL_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = 8  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION
)

AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION = 10  # in seconds
AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = 9  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
)

AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION = 45  # in seconds
AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = 15  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION
)

AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = 40  # in seconds
AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = 20  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
)

AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = 60  # in seconds
AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = 6  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
)

AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK = 12  # in seconds
AGENT_TIMEOUT_LLM_SUBANSWER_CHECK = int(
    os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_CHECK")
    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = 6  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION
)

AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = 12  # in seconds
AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = 4  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION
)

AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = 6  # in seconds
AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = 6  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION
)

AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = 8  # in seconds
AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = 6  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
)

AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS = 12  # in seconds
AGENT_TIMEOUT_LLM_COMPARE_ANSWERS = int(
    os.environ.get("AGENT_TIMEOUT_LLM_COMPARE_ANSWERS")
    or AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS
)


AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = 6  # in seconds
AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION
)

AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = 12  # in seconds
AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION
)

AGENT_DEFAULT_MAX_TOKENS_VALIDATION = 4
AGENT_MAX_TOKENS_VALIDATION = int(
    os.environ.get("AGENT_MAX_TOKENS_VALIDATION") or AGENT_DEFAULT_MAX_TOKENS_VALIDATION
)

AGENT_DEFAULT_MAX_TOKENS_SUBANSWER_GENERATION = 256
AGENT_MAX_TOKENS_SUBANSWER_GENERATION = int(
    os.environ.get("AGENT_MAX_TOKENS_SUBANSWER_GENERATION")
    or AGENT_DEFAULT_MAX_TOKENS_SUBANSWER_GENERATION
)

AGENT_DEFAULT_MAX_TOKENS_ANSWER_GENERATION = 1024
AGENT_MAX_TOKENS_ANSWER_GENERATION = int(
    os.environ.get("AGENT_MAX_TOKENS_ANSWER_GENERATION")
    or AGENT_DEFAULT_MAX_TOKENS_ANSWER_GENERATION
)

AGENT_DEFAULT_MAX_TOKENS_SUBQUESTION_GENERATION = 256
AGENT_MAX_TOKENS_SUBQUESTION_GENERATION = int(
    os.environ.get("AGENT_MAX_TOKENS_SUBQUESTION_GENERATION")
    or AGENT_DEFAULT_MAX_TOKENS_SUBQUESTION_GENERATION
)

AGENT_DEFAULT_MAX_TOKENS_ENTITY_TERM_EXTRACTION = 1024
AGENT_MAX_TOKENS_ENTITY_TERM_EXTRACTION = int(
    os.environ.get("AGENT_MAX_TOKENS_ENTITY_TERM_EXTRACTION")
    or AGENT_DEFAULT_MAX_TOKENS_ENTITY_TERM_EXTRACTION
)

AGENT_DEFAULT_MAX_TOKENS_SUBQUERY_GENERATION = 64
AGENT_MAX_TOKENS_SUBQUERY_GENERATION = int(
    os.environ.get("AGENT_MAX_TOKENS_SUBQUERY_GENERATION")
    or AGENT_DEFAULT_MAX_TOKENS_SUBQUERY_GENERATION
)

AGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY = 128
AGENT_MAX_TOKENS_HISTORY_SUMMARY = int(
    os.environ.get("AGENT_MAX_TOKENS_HISTORY_SUMMARY")
    or AGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY
)

# Parameters for the Thoughtful/Deep Research flows
TF_DR_TIMEOUT_LONG = int(os.environ.get("TF_DR_TIMEOUT_LONG") or 120)
TF_DR_TIMEOUT_SHORT = int(os.environ.get("TF_DR_TIMEOUT_SHORT") or 60)


TF_DR_DEFAULT_FAST = (os.environ.get("TF_DR_DEFAULT_FAST") or "False").lower() == "true"

GRAPH_VERSION_NAME: str = "a"


================================================
FILE: backend/onyx/configs/app_configs.py
================================================
import json
import os
import urllib.parse
from datetime import datetime
from datetime import timezone
from typing import cast

from onyx.auth.schemas import AuthBackend
from onyx.cache.interface import CacheBackendType
from onyx.configs.constants import AuthType
from onyx.configs.constants import QueryHistoryType
from onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy
from onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT
from onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT
from onyx.utils.logger import setup_logger

logger = setup_logger()

#####
# App Configs
#####
APP_HOST = "0.0.0.0"
APP_PORT = 8080
# API_PREFIX is used to prepend a base path for all API routes
# generally used if using a reverse proxy which doesn't support stripping the `/api`
# prefix from requests directed towards the API server. In these cases, set this to `/api`
APP_API_PREFIX = os.environ.get("API_PREFIX", "")

# Certain services need to make HTTP requests to the API server, such as the MCP server and Discord bot
API_SERVER_PROTOCOL = os.environ.get("API_SERVER_PROTOCOL", "http")
API_SERVER_HOST = os.environ.get("API_SERVER_HOST", "127.0.0.1")
# This override allows self-hosting the MCP server with Onyx Cloud backend.
API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS = os.environ.get(
    "API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS"
)

# Whether to send user metadata (user_id/email and session_id) to the LLM provider.
# Disabled by default.
SEND_USER_METADATA_TO_LLM_PROVIDER = (
    os.environ.get("SEND_USER_METADATA_TO_LLM_PROVIDER", "")
).lower() == "true"

#####
# User Facing Features Configs
#####
BLURB_SIZE = 128  # Number Encoder Tokens included in the chunk blurb

# Hard ceiling for the admin-configurable file upload size (in MB).
# Self-hosted customers can raise or lower this via the environment variable.
_raw_max_upload_size_mb = int(os.environ.get("MAX_ALLOWED_UPLOAD_SIZE_MB", "250"))
if _raw_max_upload_size_mb < 0:
    logger.warning(
        "MAX_ALLOWED_UPLOAD_SIZE_MB=%d is negative; falling back to 250",
        _raw_max_upload_size_mb,
    )
    _raw_max_upload_size_mb = 250
MAX_ALLOWED_UPLOAD_SIZE_MB = _raw_max_upload_size_mb

# Default fallback for the per-user file upload size limit (in MB) when no
# admin-configured value exists.  Clamped to MAX_ALLOWED_UPLOAD_SIZE_MB at
# runtime so this never silently exceeds the hard ceiling.
_raw_default_upload_size_mb = int(
    os.environ.get("DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB", "100")
)
if _raw_default_upload_size_mb < 0:
    logger.warning(
        "DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB=%d is negative; falling back to 100",
        _raw_default_upload_size_mb,
    )
    _raw_default_upload_size_mb = 100
DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB = _raw_default_upload_size_mb
GENERATIVE_MODEL_ACCESS_CHECK_FREQ = int(
    os.environ.get("GENERATIVE_MODEL_ACCESS_CHECK_FREQ") or 86400
)  # 1 day

# Controls whether users can use User Knowledge (personal documents) in assistants
DISABLE_USER_KNOWLEDGE = os.environ.get("DISABLE_USER_KNOWLEDGE", "").lower() == "true"

# Disables vector DB (Vespa/OpenSearch) entirely. When True, connectors and RAG search
# are disabled but core chat, tools, user file uploads, and Projects still work.
DISABLE_VECTOR_DB = os.environ.get("DISABLE_VECTOR_DB", "").lower() == "true"

# Which backend to use for caching, locks, and ephemeral state.
# "redis" (default) or "postgres" (only valid when DISABLE_VECTOR_DB=true).
CACHE_BACKEND = CacheBackendType(
    os.environ.get("CACHE_BACKEND", CacheBackendType.REDIS)
)

# If set to true, will show extra/uncommon connectors in the "Other" category
SHOW_EXTRA_CONNECTORS = os.environ.get("SHOW_EXTRA_CONNECTORS", "").lower() == "true"

# Controls whether to allow admin query history reports with:
# 1. associated user emails
# 2. anonymized user emails
# 3. no queries
ONYX_QUERY_HISTORY_TYPE = QueryHistoryType(
    (os.environ.get("ONYX_QUERY_HISTORY_TYPE") or QueryHistoryType.NORMAL.value).lower()
)

#####
# Web Configs
#####
# WEB_DOMAIN is used to set the redirect_uri after login flows
# NOTE: if you are having problems accessing the Onyx web UI locally (especially
# on Windows, try  setting this to `http://127.0.0.1:3000` instead and see if that
# fixes it)
WEB_DOMAIN = os.environ.get("WEB_DOMAIN") or "http://localhost:3000"


#####
# Auth Configs
#####
# Silently default to basic - warnings/errors logged in verify_auth_setting()
# which only runs on app startup, not during migrations/scripts
_auth_type_str = (os.environ.get("AUTH_TYPE") or "").lower()
if _auth_type_str in [auth_type.value for auth_type in AuthType]:
    AUTH_TYPE = AuthType(_auth_type_str)
else:
    AUTH_TYPE = AuthType.BASIC

PASSWORD_MIN_LENGTH = int(os.getenv("PASSWORD_MIN_LENGTH", 8))
PASSWORD_MAX_LENGTH = int(os.getenv("PASSWORD_MAX_LENGTH", 64))
PASSWORD_REQUIRE_UPPERCASE = (
    os.environ.get("PASSWORD_REQUIRE_UPPERCASE", "false").lower() == "true"
)
PASSWORD_REQUIRE_LOWERCASE = (
    os.environ.get("PASSWORD_REQUIRE_LOWERCASE", "false").lower() == "true"
)
PASSWORD_REQUIRE_DIGIT = (
    os.environ.get("PASSWORD_REQUIRE_DIGIT", "false").lower() == "true"
)
PASSWORD_REQUIRE_SPECIAL_CHAR = (
    os.environ.get("PASSWORD_REQUIRE_SPECIAL_CHAR", "false").lower() == "true"
)

# Encryption key secret is used to encrypt connector credentials, api keys, and other sensitive
# information. This provides an extra layer of security on top of Postgres access controls
# and is available in Onyx EE
ENCRYPTION_KEY_SECRET = os.environ.get("ENCRYPTION_KEY_SECRET") or ""

# Turn off mask if admin users should see full credentials for data connectors.
MASK_CREDENTIAL_PREFIX = (
    os.environ.get("MASK_CREDENTIAL_PREFIX", "True").lower() != "false"
)

AUTH_BACKEND = AuthBackend(os.environ.get("AUTH_BACKEND") or AuthBackend.REDIS.value)

SESSION_EXPIRE_TIME_SECONDS = int(
    os.environ.get("SESSION_EXPIRE_TIME_SECONDS")
    or os.environ.get("REDIS_AUTH_EXPIRE_TIME_SECONDS")
    or 86400 * 7
)  # 7 days

# Default request timeout, mostly used by connectors
REQUEST_TIMEOUT_SECONDS = int(os.environ.get("REQUEST_TIMEOUT_SECONDS") or 60)

# set `VALID_EMAIL_DOMAINS` to a comma seperated list of domains in order to
# restrict access to Onyx to only users with emails from those domains.
# E.g. `VALID_EMAIL_DOMAINS=example.com,example.org` will restrict Onyx
# signups to users with either an @example.com or an @example.org email.
# NOTE: maintaining `VALID_EMAIL_DOMAIN` to keep backwards compatibility
_VALID_EMAIL_DOMAIN = os.environ.get("VALID_EMAIL_DOMAIN", "")
_VALID_EMAIL_DOMAINS_STR = (
    os.environ.get("VALID_EMAIL_DOMAINS", "") or _VALID_EMAIL_DOMAIN
)
VALID_EMAIL_DOMAINS = (
    [
        domain.strip().lower()
        for domain in _VALID_EMAIL_DOMAINS_STR.split(",")
        if domain.strip()
    ]
    if _VALID_EMAIL_DOMAINS_STR
    else []
)

# Disposable email blocking - blocks temporary/throwaway email addresses
# Set to empty string to disable disposable email blocking
DISPOSABLE_EMAIL_DOMAINS_URL = os.environ.get(
    "DISPOSABLE_EMAIL_DOMAINS_URL",
    "https://disposable.github.io/disposable-email-domains/domains.json",
)

# OAuth Login Flow
# Used for both Google OAuth2 and OIDC flows
OAUTH_CLIENT_ID = (
    os.environ.get("OAUTH_CLIENT_ID", os.environ.get("GOOGLE_OAUTH_CLIENT_ID")) or ""
)
OAUTH_CLIENT_SECRET = (
    os.environ.get("OAUTH_CLIENT_SECRET", os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET"))
    or ""
)

# Whether Google OAuth is enabled (requires both client ID and secret)
OAUTH_ENABLED = bool(OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET)

# OpenID Connect configuration URL for OIDC integrations
OPENID_CONFIG_URL = os.environ.get("OPENID_CONFIG_URL") or ""

# Applicable for OIDC Auth, allows you to override the scopes that
# are requested from the OIDC provider. Currently used when passing
# over access tokens to tool calls and the tool needs more scopes
OIDC_SCOPE_OVERRIDE: list[str] | None = None
_OIDC_SCOPE_OVERRIDE = os.environ.get("OIDC_SCOPE_OVERRIDE")

if _OIDC_SCOPE_OVERRIDE:
    try:
        OIDC_SCOPE_OVERRIDE = [
            scope.strip() for scope in _OIDC_SCOPE_OVERRIDE.split(",")
        ]
    except Exception:
        pass

# Enables PKCE for OIDC login flow. Disabled by default to preserve
# backwards compatibility for existing OIDC deployments.
OIDC_PKCE_ENABLED = os.environ.get("OIDC_PKCE_ENABLED", "").lower() == "true"

# Applicable for SAML Auth
SAML_CONF_DIR = os.environ.get("SAML_CONF_DIR") or "/app/onyx/configs/saml_config"

# JWT Public Key URL for JWT token verification
JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)

USER_AUTH_SECRET = os.environ.get("USER_AUTH_SECRET", "")

if AUTH_TYPE == AuthType.BASIC and not USER_AUTH_SECRET:
    logger.warning(
        "USER_AUTH_SECRET is not set. This is required for secure password reset "
        "and email verification tokens. Please set USER_AUTH_SECRET in production."
    )

# Duration (in seconds) for which the FastAPI Users JWT token remains valid in the user's browser.
# By default, this is set to match the Redis expiry time for consistency.
AUTH_COOKIE_EXPIRE_TIME_SECONDS = int(
    os.environ.get("AUTH_COOKIE_EXPIRE_TIME_SECONDS") or 86400 * 7
)  # 7 days

# for basic auth
REQUIRE_EMAIL_VERIFICATION = (
    os.environ.get("REQUIRE_EMAIL_VERIFICATION", "").lower() == "true"
)
SMTP_SERVER = os.environ.get("SMTP_SERVER") or ""
SMTP_PORT = int(os.environ.get("SMTP_PORT") or "587")
SMTP_USER = os.environ.get("SMTP_USER") or ""
SMTP_PASS = os.environ.get("SMTP_PASS") or ""
EMAIL_FROM = os.environ.get("EMAIL_FROM") or SMTP_USER

SENDGRID_API_KEY = os.environ.get("SENDGRID_API_KEY") or ""
EMAIL_CONFIGURED = all([SMTP_SERVER, SMTP_USER, SMTP_PASS]) or SENDGRID_API_KEY

# If set, Onyx will listen to the `expires_at` returned by the identity
# provider (e.g. Okta, Google, etc.) and force the user to re-authenticate
# after this time has elapsed. Disabled since by default many auth providers
# have very short expiry times (e.g. 1 hour) which provide a poor user experience
TRACK_EXTERNAL_IDP_EXPIRY = (
    os.environ.get("TRACK_EXTERNAL_IDP_EXPIRY", "").lower() == "true"
)


#####
# DB Configs
#####
DOCUMENT_INDEX_NAME = "danswer_index"

# OpenSearch Configs
OPENSEARCH_HOST = os.environ.get("OPENSEARCH_HOST") or "localhost"
OPENSEARCH_REST_API_PORT = int(os.environ.get("OPENSEARCH_REST_API_PORT") or 9200)
# TODO(andrei): 60 seconds is too much, we're just setting a high default
# timeout for now to examine why queries are slow.
# NOTE: This timeout applies to all requests the client makes, including bulk
# indexing.
DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S = int(
    os.environ.get("DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S") or 60
)
# TODO(andrei): 50 seconds is too much, we're just setting a high default
# timeout for now to examine why queries are slow.
# NOTE: To get useful partial results, this value should be less than the client
# timeout above.
DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S = int(
    os.environ.get("DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S") or 50
)
OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
OPENSEARCH_ADMIN_PASSWORD = os.environ.get(
    "OPENSEARCH_ADMIN_PASSWORD", "StrongPassword123!"
)
USING_AWS_MANAGED_OPENSEARCH = (
    os.environ.get("USING_AWS_MANAGED_OPENSEARCH", "").lower() == "true"
)
# Profiling adds some overhead to OpenSearch operations. This overhead is
# unknown right now. Defaults to True.
OPENSEARCH_PROFILING_DISABLED = (
    os.environ.get("OPENSEARCH_PROFILING_DISABLED", "true").lower() == "true"
)
# Whether to disable match highlights for OpenSearch. Defaults to True for now
# as we investigate query performance.
OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED = (
    os.environ.get("OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED", "true").lower() == "true"
)
# When enabled, OpenSearch returns detailed score breakdowns for each hit.
# Useful for debugging and tuning search relevance. Has ~10-30% performance overhead according to documentation.
# Seems for Hybrid Search in practice, the impact is actually more like 1000x slower.
OPENSEARCH_EXPLAIN_ENABLED = (
    os.environ.get("OPENSEARCH_EXPLAIN_ENABLED", "").lower() == "true"
)
# Analyzer used for full-text fields (title, content). Use OpenSearch built-in analyzer
# names (e.g. "english", "standard", "german"). Affects stemming and tokenization;
# existing indices need reindexing after a change.
OPENSEARCH_TEXT_ANALYZER = os.environ.get("OPENSEARCH_TEXT_ANALYZER") or "english"

# This is the "base" config for now, the idea is that at least for our dev
# environments we always want to be dual indexing into both OpenSearch and Vespa
# to stress test the new codepaths. Only enable this if there is some instance
# of OpenSearch running for the relevant Onyx instance.
# NOTE: Now enabled on by default, unless the env indicates otherwise.
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX = (
    os.environ.get("ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", "true").lower() == "true"
)
# NOTE: This effectively does nothing anymore, admins can now toggle whether
# retrieval is through OpenSearch. This value is only used as a final fallback
# in case that doesn't work for whatever reason.
# Given that the "base" config above is true, this enables whether we want to
# retrieve from OpenSearch or Vespa. We want to be able to quickly toggle this
# in the event we see issues with OpenSearch retrieval in our dev environments.
ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
    ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
    and os.environ.get("ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX", "").lower() == "true"
)
# Whether we should check for and create an index if necessary every time we
# instantiate an OpenSearchDocumentIndex on multitenant cloud. Defaults to True.
VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
    os.environ.get("VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT", "true").lower()
    == "true"
)
OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE = int(
    os.environ.get("OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE") or 500
)
# If set, will override the default number of shards and replicas for the index.
OPENSEARCH_INDEX_NUM_SHARDS: int | None = (
    int(os.environ["OPENSEARCH_INDEX_NUM_SHARDS"])
    if os.environ.get("OPENSEARCH_INDEX_NUM_SHARDS", None) is not None
    else None
)
OPENSEARCH_INDEX_NUM_REPLICAS: int | None = (
    int(os.environ["OPENSEARCH_INDEX_NUM_REPLICAS"])
    if os.environ.get("OPENSEARCH_INDEX_NUM_REPLICAS", None) is not None
    else None
)
ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH = (
    os.environ.get("ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH", "").lower()
    == "true"
)

VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
# NOTE: this is used if and only if the vespa config server is accessible via a
# different host than the main vespa application
VESPA_CONFIG_SERVER_HOST = os.environ.get("VESPA_CONFIG_SERVER_HOST") or VESPA_HOST
VESPA_PORT = os.environ.get("VESPA_PORT") or "8081"
VESPA_TENANT_PORT = os.environ.get("VESPA_TENANT_PORT") or "19071"
# the number of times to try and connect to vespa on startup before giving up
VESPA_NUM_ATTEMPTS_ON_STARTUP = int(os.environ.get("NUM_RETRIES_ON_STARTUP") or 10)

VESPA_CLOUD_URL = os.environ.get("VESPA_CLOUD_URL", "")

VESPA_CLOUD_CERT_PATH = os.environ.get("VESPA_CLOUD_CERT_PATH")
VESPA_CLOUD_KEY_PATH = os.environ.get("VESPA_CLOUD_KEY_PATH")

# Number of documents in a batch during indexing (further batching done by chunks before passing to bi-encoder)
INDEX_BATCH_SIZE = int(os.environ.get("INDEX_BATCH_SIZE") or 16)

MAX_DRIVE_WORKERS = int(os.environ.get("MAX_DRIVE_WORKERS", 4))

# Below are intended to match the env variables names used by the official postgres docker image
# https://hub.docker.com/_/postgres
POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres"
# URL-encode the password for asyncpg to avoid issues with special characters on some machines.
POSTGRES_PASSWORD = urllib.parse.quote_plus(
    os.environ.get("POSTGRES_PASSWORD") or "password"
)
POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "127.0.0.1"
POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432"
POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres"
AWS_REGION_NAME = os.environ.get("AWS_REGION_NAME") or "us-east-2"

POSTGRES_API_SERVER_POOL_SIZE = int(
    os.environ.get("POSTGRES_API_SERVER_POOL_SIZE") or 40
)
POSTGRES_API_SERVER_POOL_OVERFLOW = int(
    os.environ.get("POSTGRES_API_SERVER_POOL_OVERFLOW") or 10
)

POSTGRES_API_SERVER_READ_ONLY_POOL_SIZE = int(
    os.environ.get("POSTGRES_API_SERVER_READ_ONLY_POOL_SIZE") or 10
)
POSTGRES_API_SERVER_READ_ONLY_POOL_OVERFLOW = int(
    os.environ.get("POSTGRES_API_SERVER_READ_ONLY_POOL_OVERFLOW") or 5
)

# defaults to False
# generally should only be used for
POSTGRES_USE_NULL_POOL = os.environ.get("POSTGRES_USE_NULL_POOL", "").lower() == "true"

# defaults to False
POSTGRES_POOL_PRE_PING = os.environ.get("POSTGRES_POOL_PRE_PING", "").lower() == "true"

# recycle timeout in seconds
POSTGRES_POOL_RECYCLE_DEFAULT = 60 * 20  # 20 minutes
try:
    POSTGRES_POOL_RECYCLE = int(
        os.environ.get("POSTGRES_POOL_RECYCLE", POSTGRES_POOL_RECYCLE_DEFAULT)
    )
except ValueError:
    POSTGRES_POOL_RECYCLE = POSTGRES_POOL_RECYCLE_DEFAULT

# RDS IAM authentication - enables IAM-based authentication for PostgreSQL
USE_IAM_AUTH = os.getenv("USE_IAM_AUTH", "False").lower() == "true"

# Redis IAM authentication - enables IAM-based authentication for Redis ElastiCache
# Note: This is separate from RDS IAM auth as they use different authentication mechanisms
USE_REDIS_IAM_AUTH = os.getenv("USE_REDIS_IAM_AUTH", "False").lower() == "true"
REDIS_SSL = os.getenv("REDIS_SSL", "").lower() == "true"
REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost"
REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") or ""

# this assumes that other redis settings remain the same as the primary
REDIS_REPLICA_HOST = os.environ.get("REDIS_REPLICA_HOST") or REDIS_HOST

REDIS_AUTH_KEY_PREFIX = "fastapi_users_token:"

# Rate limiting for auth endpoints
RATE_LIMIT_WINDOW_SECONDS: int | None = None
_rate_limit_window_seconds_str = os.environ.get("RATE_LIMIT_WINDOW_SECONDS")
if _rate_limit_window_seconds_str is not None:
    try:
        RATE_LIMIT_WINDOW_SECONDS = int(_rate_limit_window_seconds_str)
    except ValueError:
        pass

RATE_LIMIT_MAX_REQUESTS: int | None = None
_rate_limit_max_requests_str = os.environ.get("RATE_LIMIT_MAX_REQUESTS")
if _rate_limit_max_requests_str is not None:
    try:
        RATE_LIMIT_MAX_REQUESTS = int(_rate_limit_max_requests_str)
    except ValueError:
        pass

AUTH_RATE_LIMITING_ENABLED = RATE_LIMIT_MAX_REQUESTS and RATE_LIMIT_WINDOW_SECONDS
# Used for general redis things
REDIS_DB_NUMBER = int(os.environ.get("REDIS_DB_NUMBER", 0))

# Used by celery as broker and backend
REDIS_DB_NUMBER_CELERY_RESULT_BACKEND = int(
    os.environ.get("REDIS_DB_NUMBER_CELERY_RESULT_BACKEND", 14)
)
REDIS_DB_NUMBER_CELERY = int(os.environ.get("REDIS_DB_NUMBER_CELERY", 15))  # broker

# will propagate to both our redis client as well as celery's redis client
REDIS_HEALTH_CHECK_INTERVAL = int(os.environ.get("REDIS_HEALTH_CHECK_INTERVAL", 60))

# our redis client only, not celery's
REDIS_POOL_MAX_CONNECTIONS = int(os.environ.get("REDIS_POOL_MAX_CONNECTIONS", 128))

# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings
# should be one of "required", "optional", or "none"
REDIS_SSL_CERT_REQS = os.getenv("REDIS_SSL_CERT_REQS", "none")
REDIS_SSL_CA_CERTS = os.getenv("REDIS_SSL_CA_CERTS", None)

CELERY_RESULT_EXPIRES = int(os.environ.get("CELERY_RESULT_EXPIRES", 86400))  # seconds

# https://docs.celeryq.dev/en/stable/userguide/configuration.html#broker-pool-limit
# Setting to None may help when there is a proxy in the way closing idle connections
_CELERY_BROKER_POOL_LIMIT_DEFAULT = 10
try:
    CELERY_BROKER_POOL_LIMIT = int(
        os.environ.get("CELERY_BROKER_POOL_LIMIT", _CELERY_BROKER_POOL_LIMIT_DEFAULT)
    )
except ValueError:
    CELERY_BROKER_POOL_LIMIT = _CELERY_BROKER_POOL_LIMIT_DEFAULT

_CELERY_WORKER_LIGHT_CONCURRENCY_DEFAULT = 24
try:
    CELERY_WORKER_LIGHT_CONCURRENCY = int(
        os.environ.get(
            "CELERY_WORKER_LIGHT_CONCURRENCY",
            _CELERY_WORKER_LIGHT_CONCURRENCY_DEFAULT,
        )
    )
except ValueError:
    CELERY_WORKER_LIGHT_CONCURRENCY = _CELERY_WORKER_LIGHT_CONCURRENCY_DEFAULT

_CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT = 8
try:
    CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER = int(
        os.environ.get(
            "CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER",
            _CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT,
        )
    )
except ValueError:
    CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER = (
        _CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT
    )

_CELERY_WORKER_DOCPROCESSING_CONCURRENCY_DEFAULT = 6
try:
    env_value = os.environ.get("CELERY_WORKER_DOCPROCESSING_CONCURRENCY")
    if not env_value:
        env_value = os.environ.get("NUM_INDEXING_WORKERS")

    if not env_value:
        env_value = str(_CELERY_WORKER_DOCPROCESSING_CONCURRENCY_DEFAULT)
    CELERY_WORKER_DOCPROCESSING_CONCURRENCY = int(env_value)
except ValueError:
    CELERY_WORKER_DOCPROCESSING_CONCURRENCY = (
        _CELERY_WORKER_DOCPROCESSING_CONCURRENCY_DEFAULT
    )

_CELERY_WORKER_DOCFETCHING_CONCURRENCY_DEFAULT = 1
try:
    env_value = os.environ.get("CELERY_WORKER_DOCFETCHING_CONCURRENCY")
    if not env_value:
        env_value = os.environ.get("NUM_DOCFETCHING_WORKERS")

    if not env_value:
        env_value = str(_CELERY_WORKER_DOCFETCHING_CONCURRENCY_DEFAULT)
    CELERY_WORKER_DOCFETCHING_CONCURRENCY = int(env_value)
except ValueError:
    CELERY_WORKER_DOCFETCHING_CONCURRENCY = (
        _CELERY_WORKER_DOCFETCHING_CONCURRENCY_DEFAULT
    )

CELERY_WORKER_PRIMARY_CONCURRENCY = int(
    os.environ.get("CELERY_WORKER_PRIMARY_CONCURRENCY") or 4
)

CELERY_WORKER_PRIMARY_POOL_OVERFLOW = int(
    os.environ.get("CELERY_WORKER_PRIMARY_POOL_OVERFLOW") or 4
)

# Individual worker concurrency settings
CELERY_WORKER_HEAVY_CONCURRENCY = int(
    os.environ.get("CELERY_WORKER_HEAVY_CONCURRENCY") or 4
)

CELERY_WORKER_MONITORING_CONCURRENCY = int(
    os.environ.get("CELERY_WORKER_MONITORING_CONCURRENCY") or 1
)

CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY = int(
    os.environ.get("CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY") or 2
)

# The maximum number of tasks that can be queued up to sync to Vespa in a single pass
VESPA_SYNC_MAX_TASKS = 8192

DB_YIELD_PER_DEFAULT = 64

#####
# Connector Configs
#####
POLL_CONNECTOR_OFFSET = 30  # Minutes overlap between poll windows

# View the list here:
# https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/factory.py
# If this is empty, all connectors are enabled, this is an option for security heavy orgs where
# only very select connectors are enabled and admins cannot add other connector types
ENABLED_CONNECTOR_TYPES = os.environ.get("ENABLED_CONNECTOR_TYPES") or ""

# If set to true, curators can only access and edit assistants that they created
CURATORS_CANNOT_VIEW_OR_EDIT_NON_OWNED_ASSISTANTS = (
    os.environ.get("CURATORS_CANNOT_VIEW_OR_EDIT_NON_OWNED_ASSISTANTS", "").lower()
    == "true"
)

# Some calls to get information on expert users are quite costly especially with rate limiting
# Since experts are not used in the actual user experience, currently it is turned off
# for some connectors
ENABLE_EXPENSIVE_EXPERT_CALLS = False


# TODO these should be available for frontend configuration, via advanced options expandable
WEB_CONNECTOR_IGNORED_CLASSES = os.environ.get(
    "WEB_CONNECTOR_IGNORED_CLASSES", "sidebar,footer"
).split(",")
WEB_CONNECTOR_IGNORED_ELEMENTS = os.environ.get(
    "WEB_CONNECTOR_IGNORED_ELEMENTS", "nav,footer,meta,script,style,symbol,aside"
).split(",")
WEB_CONNECTOR_OAUTH_CLIENT_ID = os.environ.get("WEB_CONNECTOR_OAUTH_CLIENT_ID")
WEB_CONNECTOR_OAUTH_CLIENT_SECRET = os.environ.get("WEB_CONNECTOR_OAUTH_CLIENT_SECRET")
WEB_CONNECTOR_OAUTH_TOKEN_URL = os.environ.get("WEB_CONNECTOR_OAUTH_TOKEN_URL")
WEB_CONNECTOR_VALIDATE_URLS = os.environ.get("WEB_CONNECTOR_VALIDATE_URLS")

HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY = os.environ.get(
    "HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY",
    HtmlBasedConnectorTransformLinksStrategy.STRIP,
)

NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP = (
    os.environ.get("NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP", "").lower()
    == "true"
)


#####
# Confluence Connector Configs
#####

CONFLUENCE_CONNECTOR_LABELS_TO_SKIP = [
    ignored_tag
    for ignored_tag in os.environ.get("CONFLUENCE_CONNECTOR_LABELS_TO_SKIP", "").split(
        ","
    )
    if ignored_tag
]

# Attachments exceeding this size will not be retrieved (in bytes)
CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD = int(
    os.environ.get("CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD", 10 * 1024 * 1024)
)
# Attachments with more chars than this will not be indexed. This is to prevent extremely
# large files from freezing indexing. 200,000 is ~100 google doc pages.
CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD = int(
    os.environ.get("CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD", 200_000)
)

# A JSON-formatted array. Each item in the array should have the following structure:
# {
#     "user_id": "1234567890",
#     "username": "bob",
#     "display_name": "Bob Fitzgerald",
#     "email": "bob@example.com",
#     "type": "known"
# }
_RAW_CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE = os.environ.get(
    "CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE", ""
)
CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE = cast(
    list[dict[str, str]] | None,
    (
        json.loads(_RAW_CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE)
        if _RAW_CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE
        else None
    ),
)

# Due to breakages in the confluence API, the timezone offset must be specified client side
# to match the user's specified timezone.

# The current state of affairs:
# CQL queries are parsed in the user's timezone and cannot be specified in UTC
# no API retrieves the user's timezone
# All data is returned in UTC, so we can't derive the user's timezone from that

# https://community.developer.atlassian.com/t/confluence-cloud-time-zone-get-via-rest-api/35954/16
# https://jira.atlassian.com/browse/CONFCLOUD-69670


def get_current_tz_offset() -> int:
    # datetime now() gets local time, datetime.now(timezone.utc) gets UTC time.
    # remove tzinfo to compare non-timezone-aware objects.
    time_diff = datetime.now() - datetime.now(timezone.utc).replace(tzinfo=None)
    return round(time_diff.total_seconds() / 3600)


# enter as a floating point offset from UTC in hours (-24 < val < 24)
# this will be applied globally, so it probably makes sense to transition this to per
# connector as some point.
# For the default value, we assume that the user's local timezone is more likely to be
# correct (i.e. the configured user's timezone or the default server one) than UTC.
# https://developer.atlassian.com/cloud/confluence/cql-fields/#created
CONFLUENCE_TIMEZONE_OFFSET = float(
    os.environ.get("CONFLUENCE_TIMEZONE_OFFSET", get_current_tz_offset())
)

CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC = (
    os.environ.get("CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC", "").lower() == "true"
)

GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD = int(
    os.environ.get("GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD", 10 * 1024 * 1024)
)

# Default size threshold for Drupal Wiki attachments (10MB)
DRUPAL_WIKI_ATTACHMENT_SIZE_THRESHOLD = int(
    os.environ.get("DRUPAL_WIKI_ATTACHMENT_SIZE_THRESHOLD", 10 * 1024 * 1024)
)

# Default size threshold for SharePoint files (20MB)
SHAREPOINT_CONNECTOR_SIZE_THRESHOLD = int(
    os.environ.get("SHAREPOINT_CONNECTOR_SIZE_THRESHOLD", 20 * 1024 * 1024)
)

# When True, group sync enumerates every Azure AD group in the tenant (expensive).
# When False (default), only groups found in site role assignments are synced.
# Can be overridden per-connector via the "exhaustive_ad_enumeration" key in
# connector_specific_config.
SHAREPOINT_EXHAUSTIVE_AD_ENUMERATION = (
    os.environ.get("SHAREPOINT_EXHAUSTIVE_AD_ENUMERATION", "").lower() == "true"
)

BLOB_STORAGE_SIZE_THRESHOLD = int(
    os.environ.get("BLOB_STORAGE_SIZE_THRESHOLD", 20 * 1024 * 1024)
)

JIRA_CONNECTOR_LABELS_TO_SKIP = [
    ignored_tag
    for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",")
    if ignored_tag
]
# Maximum size for Jira tickets in bytes (default: 100KB)
JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
    os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
)
JIRA_SLIM_PAGE_SIZE = int(os.environ.get("JIRA_SLIM_PAGE_SIZE", 500))

GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME")

GITHUB_CONNECTOR_BASE_URL = os.environ.get("GITHUB_CONNECTOR_BASE_URL") or None

GITLAB_CONNECTOR_INCLUDE_CODE_FILES = (
    os.environ.get("GITLAB_CONNECTOR_INCLUDE_CODE_FILES", "").lower() == "true"
)

# Typically set to http://localhost:3000 for OAuth connector development
CONNECTOR_LOCALHOST_OVERRIDE = os.getenv("CONNECTOR_LOCALHOST_OVERRIDE")

# Egnyte specific configs
EGNYTE_CLIENT_ID = os.getenv("EGNYTE_CLIENT_ID")
EGNYTE_CLIENT_SECRET = os.getenv("EGNYTE_CLIENT_SECRET")

# Linear specific configs
LINEAR_CLIENT_ID = os.getenv("LINEAR_CLIENT_ID")
LINEAR_CLIENT_SECRET = os.getenv("LINEAR_CLIENT_SECRET")

# Slack specific configs
SLACK_NUM_THREADS = int(os.getenv("SLACK_NUM_THREADS") or 8)
MAX_SLACK_QUERY_EXPANSIONS = int(os.environ.get("MAX_SLACK_QUERY_EXPANSIONS", "5"))

# Slack federated search thread context settings
# Batch size for fetching thread context (controls concurrent API calls per batch)
SLACK_THREAD_CONTEXT_BATCH_SIZE = int(
    os.environ.get("SLACK_THREAD_CONTEXT_BATCH_SIZE", "5")
)
# Maximum messages to fetch thread context for (top N by relevance get full context)
MAX_SLACK_THREAD_CONTEXT_MESSAGES = int(
    os.environ.get("MAX_SLACK_THREAD_CONTEXT_MESSAGES", "5")
)

# TestRail specific configs
TESTRAIL_BASE_URL = os.environ.get("TESTRAIL_BASE_URL", "")
TESTRAIL_USERNAME = os.environ.get("TESTRAIL_USERNAME", "")
TESTRAIL_API_KEY = os.environ.get("TESTRAIL_API_KEY", "")

LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE = (
    os.environ.get("LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE", "").lower()
    == "true"
)

DEFAULT_PRUNING_FREQ = 60 * 60 * 24  # Once a day

ALLOW_SIMULTANEOUS_PRUNING = (
    os.environ.get("ALLOW_SIMULTANEOUS_PRUNING", "").lower() == "true"
)

# This is the maximum rate at which documents are queried for a pruning job. 0 disables the limitation.
MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE = int(
    os.environ.get("MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE", 0)
)

# comma delimited list of zendesk article labels to skip indexing for
ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS = os.environ.get(
    "ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS", ""
).split(",")


#####
# Indexing Configs
#####
# NOTE: Currently only supported in the Confluence and Google Drive connectors +
# only handles some failures (Confluence = handles API call failures, Google
# Drive = handles failures pulling files / parsing them)
CONTINUE_ON_CONNECTOR_FAILURE = os.environ.get(
    "CONTINUE_ON_CONNECTOR_FAILURE", ""
).lower() not in ["false", ""]
# When swapping to a new embedding model, a secondary index is created in the background, to conserve
# resources, we pause updates on the primary index by default while the secondary index is created
DISABLE_INDEX_UPDATE_ON_SWAP = (
    os.environ.get("DISABLE_INDEX_UPDATE_ON_SWAP", "").lower() == "true"
)
# More accurate results at the expense of indexing speed and index size (stores additional 4 MINI_CHUNK vectors)
ENABLE_MULTIPASS_INDEXING = (
    os.environ.get("ENABLE_MULTIPASS_INDEXING", "").lower() == "true"
)
# Enable contextual retrieval
ENABLE_CONTEXTUAL_RAG = os.environ.get("ENABLE_CONTEXTUAL_RAG", "").lower() == "true"

DEFAULT_CONTEXTUAL_RAG_LLM_NAME = "gpt-4o-mini"
DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER = "DevEnvPresetOpenAI"
# Finer grained chunking for more detail retention
# Slightly larger since the sentence aware split is a max cutoff so most minichunks will be under MINI_CHUNK_SIZE
# tokens. But we need it to be at least as big as 1/4th chunk size to avoid having a tiny mini-chunk at the end
MINI_CHUNK_SIZE = 150

# This is the number of regular chunks per large chunk
LARGE_CHUNK_RATIO = 4

# The maximum number of chunks that can be held for 1 document processing batch
# The purpose of this is to set an upper bound on memory usage
MAX_CHUNKS_PER_DOC_BATCH = int(os.environ.get("MAX_CHUNKS_PER_DOC_BATCH") or 1000)

# Include the document level metadata in each chunk. If the metadata is too long, then it is thrown out
# We don't want the metadata to overwhelm the actual contents of the chunk
SKIP_METADATA_IN_CHUNK = os.environ.get("SKIP_METADATA_IN_CHUNK", "").lower() == "true"

# The indexer will warn in the logs whenver a document exceeds this threshold (in bytes)
INDEXING_SIZE_WARNING_THRESHOLD = int(
    os.environ.get("INDEXING_SIZE_WARNING_THRESHOLD") or 100 * 1024 * 1024
)

# during indexing, will log verbose memory diff stats every x batches and at the end.
# 0 disables this behavior and is the default.
INDEXING_TRACER_INTERVAL = int(os.environ.get("INDEXING_TRACER_INTERVAL") or 0)

# Enable multi-threaded embedding model calls for parallel processing
# Note: only applies for API-based embedding models
INDEXING_EMBEDDING_MODEL_NUM_THREADS = int(
    os.environ.get("INDEXING_EMBEDDING_MODEL_NUM_THREADS") or 8
)

# Maximum file size in a document to be indexed
MAX_DOCUMENT_CHARS = int(os.environ.get("MAX_DOCUMENT_CHARS") or 5_000_000)
MAX_FILE_SIZE_BYTES = int(
    os.environ.get("MAX_FILE_SIZE_BYTES") or 2 * 1024 * 1024 * 1024
)  # 2GB in bytes

# Use document summary for contextual rag
USE_DOCUMENT_SUMMARY = os.environ.get("USE_DOCUMENT_SUMMARY", "true").lower() == "true"
# Use chunk summary for contextual rag
USE_CHUNK_SUMMARY = os.environ.get("USE_CHUNK_SUMMARY", "true").lower() == "true"
# Average summary embeddings for contextual rag (not yet implemented)
AVERAGE_SUMMARY_EMBEDDINGS = (
    os.environ.get("AVERAGE_SUMMARY_EMBEDDINGS", "false").lower() == "true"
)

MAX_TOKENS_FOR_FULL_INCLUSION = 4096

# The intent was to have this be configurable per query, but I don't think any
# codepath was actually configuring this, so for the migrated Vespa interface
# we'll just use the default value, but also have it be configurable by env var.
RECENCY_BIAS_MULTIPLIER = float(os.environ.get("RECENCY_BIAS_MULTIPLIER") or 1.0)

# Should match the rerank-count value set in
# backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd.jinja.
RERANK_COUNT = int(os.environ.get("RERANK_COUNT") or 1000)


#####
# Tool Configs
#####
# Code Interpreter Service Configuration
CODE_INTERPRETER_BASE_URL = os.environ.get(
    "CODE_INTERPRETER_BASE_URL", "http://localhost:8000"
)

CODE_INTERPRETER_DEFAULT_TIMEOUT_MS = int(
    os.environ.get("CODE_INTERPRETER_DEFAULT_TIMEOUT_MS") or 60_000
)

CODE_INTERPRETER_MAX_OUTPUT_LENGTH = int(
    os.environ.get("CODE_INTERPRETER_MAX_OUTPUT_LENGTH") or 50_000
)


#####
# Miscellaneous
#####
JOB_TIMEOUT = 60 * 60 * 6  # 6 hours default
# Logs Onyx only model interactions like prompts, responses, messages etc.
LOG_ONYX_MODEL_INTERACTIONS = (
    os.environ.get("LOG_ONYX_MODEL_INTERACTIONS", "").lower() == "true"
)

PROMPT_CACHE_CHAT_HISTORY = (
    os.environ.get("PROMPT_CACHE_CHAT_HISTORY", "").lower() == "true"
)
# If set to `true` will enable additional logs about Vespa query performance
# (time spent on finding the right docs + time spent fetching summaries from disk)
LOG_VESPA_TIMING_INFORMATION = (
    os.environ.get("LOG_VESPA_TIMING_INFORMATION", "").lower() == "true"
)
LOG_ENDPOINT_LATENCY = os.environ.get("LOG_ENDPOINT_LATENCY", "").lower() == "true"
LOG_POSTGRES_LATENCY = os.environ.get("LOG_POSTGRES_LATENCY", "").lower() == "true"
LOG_POSTGRES_CONN_COUNTS = (
    os.environ.get("LOG_POSTGRES_CONN_COUNTS", "").lower() == "true"
)
# Anonymous usage telemetry
DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true"

#####
# Braintrust Configuration
#####
# Braintrust project name
BRAINTRUST_PROJECT = os.environ.get("BRAINTRUST_PROJECT", "Onyx")
# Braintrust API key - if provided, Braintrust tracing will be enabled
BRAINTRUST_API_KEY = os.environ.get("BRAINTRUST_API_KEY") or ""
# Maximum concurrency for Braintrust evaluations
# None means unlimited concurrency, otherwise specify a number
_braintrust_concurrency = os.environ.get("BRAINTRUST_MAX_CONCURRENCY")
BRAINTRUST_MAX_CONCURRENCY = (
    int(_braintrust_concurrency) if _braintrust_concurrency else None
)

#####
# Scheduled Evals Configuration
#####
# Comma-separated list of Braintrust dataset names to run on schedule
SCHEDULED_EVAL_DATASET_NAMES = [
    name.strip()
    for name in os.environ.get("SCHEDULED_EVAL_DATASET_NAMES", "").split(",")
    if name.strip()
]
# Email address to use for search permissions during scheduled evals
SCHEDULED_EVAL_PERMISSIONS_EMAIL = os.environ.get(
    "SCHEDULED_EVAL_PERMISSIONS_EMAIL", "roshan@onyx.app"
)
# Braintrust project name to use for scheduled evals
SCHEDULED_EVAL_PROJECT = os.environ.get("SCHEDULED_EVAL_PROJECT", "st-dev")

#####
# Langfuse Configuration
#####
# Langfuse API credentials - if provided, Langfuse tracing will be enabled
LANGFUSE_SECRET_KEY = os.environ.get("LANGFUSE_SECRET_KEY") or ""
LANGFUSE_PUBLIC_KEY = os.environ.get("LANGFUSE_PUBLIC_KEY") or ""
LANGFUSE_HOST = os.environ.get("LANGFUSE_HOST") or ""  # For self-hosted Langfuse

# Defined custom query/answer conditions to validate the query and the LLM answer.
# Format: list of strings
CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
    os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]")
)

VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "15")
# This is the timeout for the client side of the Vespa migration task. When
# exceeded, an exception is raised in our code. This value should be higher than
# VESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT.
VESPA_MIGRATION_REQUEST_TIMEOUT_S = int(
    os.environ.get("VESPA_MIGRATION_REQUEST_TIMEOUT_S") or "120"
)
# This is the timeout Vespa uses on the server side to know when to wrap up its
# traversal and try to report partial results. This differs from the client
# timeout above which raises an exception in our code when exceeded. This
# timeout allows Vespa to return gracefully. This value should be lower than
# VESPA_MIGRATION_REQUEST_TIMEOUT_S. Formatted as <number of seconds>s.
VESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT = os.environ.get(
    "VESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT", "110s"
)

SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")

PARSE_WITH_TRAFILATURA = os.environ.get("PARSE_WITH_TRAFILATURA", "").lower() == "true"

# allow for custom error messages for different errors returned by litellm
# for example, can specify: {"Violated content safety policy": "EVIL REQUEST!!!"}
# to make it so that if an LLM call returns an error containing "Violated content safety policy"
# the end user will see "EVIL REQUEST!!!" instead of the default error message.
_LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS = os.environ.get(
    "LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS", ""
)
LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS: dict[str, str] | None = None
try:
    LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS = cast(
        dict[str, str], json.loads(_LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS)
    )
except json.JSONDecodeError:
    pass

# Auto LLM Configuration - fetches model configs from GitHub for providers in Auto mode
AUTO_LLM_CONFIG_URL = os.environ.get(
    "AUTO_LLM_CONFIG_URL",
    "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/onyx/llm/well_known_providers/recommended-models.json",
)

# How often to check for auto LLM model updates (in seconds)
AUTO_LLM_UPDATE_INTERVAL_SECONDS = int(
    os.environ.get("AUTO_LLM_UPDATE_INTERVAL_SECONDS", 1800)  # 30 minutes
)

#####
# Enterprise Edition Configs
#####
# NOTE: this should only be enabled if you have purchased an enterprise license.
# if you're interested in an enterprise license, please reach out to us at
# founders@onyx.app OR message Chris Weaver or Yuhong Sun in the Onyx
# Discord community https://discord.gg/4NA5SbzrWb
ENTERPRISE_EDITION_ENABLED = (
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() == "true"
)

#####
# Image Generation Configuration (DEPRECATED)
# These environment variables will be deprecated soon.
# To configure image generation, please visit the Image Generation page in the Admin Panel.
#####
# Azure Image Configurations
AZURE_IMAGE_API_VERSION = os.environ.get("AZURE_IMAGE_API_VERSION") or os.environ.get(
    "AZURE_DALLE_API_VERSION"
)
AZURE_IMAGE_API_KEY = os.environ.get("AZURE_IMAGE_API_KEY") or os.environ.get(
    "AZURE_DALLE_API_KEY"
)
AZURE_IMAGE_API_BASE = os.environ.get("AZURE_IMAGE_API_BASE") or os.environ.get(
    "AZURE_DALLE_API_BASE"
)
AZURE_IMAGE_DEPLOYMENT_NAME = os.environ.get(
    "AZURE_IMAGE_DEPLOYMENT_NAME"
) or os.environ.get("AZURE_DALLE_DEPLOYMENT_NAME")

# configurable image model
IMAGE_MODEL_NAME = os.environ.get("IMAGE_MODEL_NAME", "gpt-image-1")
IMAGE_MODEL_PROVIDER = os.environ.get("IMAGE_MODEL_PROVIDER", "openai")

# Use managed Vespa (Vespa Cloud). If set, must also set VESPA_CLOUD_URL, VESPA_CLOUD_CERT_PATH and VESPA_CLOUD_KEY_PATH
MANAGED_VESPA = os.environ.get("MANAGED_VESPA", "").lower() == "true"

ENABLE_EMAIL_INVITES = os.environ.get("ENABLE_EMAIL_INVITES", "").lower() == "true"

# Limit on number of users a free trial tenant can invite (cloud only)
NUM_FREE_TRIAL_USER_INVITES = int(os.environ.get("NUM_FREE_TRIAL_USER_INVITES", "10"))

# Security and authentication
DATA_PLANE_SECRET = os.environ.get(
    "DATA_PLANE_SECRET", ""
)  # Used for secure communication between the control and data plane
EXPECTED_API_KEY = os.environ.get(
    "EXPECTED_API_KEY", ""
)  # Additional security check for the control plane API

# API configuration
CONTROL_PLANE_API_BASE_URL = os.environ.get(
    "CONTROL_PLANE_API_BASE_URL", "http://localhost:8082"
)

OAUTH_SLACK_CLIENT_ID = os.environ.get("OAUTH_SLACK_CLIENT_ID", "")
OAUTH_SLACK_CLIENT_SECRET = os.environ.get("OAUTH_SLACK_CLIENT_SECRET", "")
OAUTH_CONFLUENCE_CLOUD_CLIENT_ID = os.environ.get(
    "OAUTH_CONFLUENCE_CLOUD_CLIENT_ID", ""
)
OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET = os.environ.get(
    "OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET", ""
)
OAUTH_GOOGLE_DRIVE_CLIENT_ID = os.environ.get("OAUTH_GOOGLE_DRIVE_CLIENT_ID", "")
OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get(
    "OAUTH_GOOGLE_DRIVE_CLIENT_SECRET", ""
)

# JWT configuration
JWT_ALGORITHM = "HS256"

#####
# API Key Configs
#####
# refers to the rounds described here: https://passlib.readthedocs.io/en/stable/lib/passlib.hash.sha256_crypt.html
_API_KEY_HASH_ROUNDS_RAW = os.environ.get("API_KEY_HASH_ROUNDS")
API_KEY_HASH_ROUNDS = (
    int(_API_KEY_HASH_ROUNDS_RAW) if _API_KEY_HASH_ROUNDS_RAW else None
)

#####
# MCP Server Configs
#####
MCP_SERVER_ENABLED = os.environ.get("MCP_SERVER_ENABLED", "").lower() == "true"
MCP_SERVER_HOST = os.environ.get("MCP_SERVER_HOST", "0.0.0.0")
MCP_SERVER_PORT = int(os.environ.get("MCP_SERVER_PORT") or 8090)

# CORS origins for MCP clients (comma-separated)
# Local dev: "http://localhost:*"
# Production: "https://trusted-client.com,https://another-client.com"
MCP_SERVER_CORS_ORIGINS = [
    origin.strip()
    for origin in os.environ.get("MCP_SERVER_CORS_ORIGINS", "").split(",")
    if origin.strip()
]


POD_NAME = os.environ.get("POD_NAME")
POD_NAMESPACE = os.environ.get("POD_NAMESPACE")


DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"


INTEGRATION_TESTS_MODE = os.environ.get("INTEGRATION_TESTS_MODE", "").lower() == "true"

#####
# Captcha Configuration (for cloud signup protection)
#####
# Enable captcha verification for new user registration
CAPTCHA_ENABLED = os.environ.get("CAPTCHA_ENABLED", "").lower() == "true"

# Google reCAPTCHA secret key (server-side validation)
RECAPTCHA_SECRET_KEY = os.environ.get("RECAPTCHA_SECRET_KEY", "")

# Minimum score threshold for reCAPTCHA v3 (0.0-1.0, higher = more likely human)
# 0.5 is the recommended default
RECAPTCHA_SCORE_THRESHOLD = float(os.environ.get("RECAPTCHA_SCORE_THRESHOLD", "0.5"))

MOCK_CONNECTOR_FILE_PATH = os.environ.get("MOCK_CONNECTOR_FILE_PATH")

# Set to true to mock LLM responses for testing purposes
MOCK_LLM_RESPONSE = (
    os.environ.get("MOCK_LLM_RESPONSE") if os.environ.get("MOCK_LLM_RESPONSE") else None
)


DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB = 20

# Number of pre-provisioned tenants to maintain
TARGET_AVAILABLE_TENANTS = int(os.environ.get("TARGET_AVAILABLE_TENANTS", "5"))


# Image summarization configuration
IMAGE_SUMMARIZATION_SYSTEM_PROMPT = os.environ.get(
    "IMAGE_SUMMARIZATION_SYSTEM_PROMPT",
    DEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT,
)

# The user prompt for image summarization - the image filename will be automatically prepended
IMAGE_SUMMARIZATION_USER_PROMPT = os.environ.get(
    "IMAGE_SUMMARIZATION_USER_PROMPT",
    DEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT,
)

# Knowledge Graph Read Only User Configuration
DB_READONLY_USER: str = os.environ.get("DB_READONLY_USER", "db_readonly_user")
DB_READONLY_PASSWORD: str = urllib.parse.quote_plus(
    os.environ.get("DB_READONLY_PASSWORD") or "password"
)

# File Store Configuration
# Which backend to use for file storage: "s3" (S3/MinIO) or "postgres" (PostgreSQL Large Objects)
FILE_STORE_BACKEND = os.environ.get("FILE_STORE_BACKEND", "s3")

S3_FILE_STORE_BUCKET_NAME = (
    os.environ.get("S3_FILE_STORE_BUCKET_NAME") or "onyx-file-store-bucket"
)
S3_FILE_STORE_PREFIX = os.environ.get("S3_FILE_STORE_PREFIX") or "onyx-files"
# S3_ENDPOINT_URL is for MinIO and other S3-compatible storage. Leave blank for AWS S3.
S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL")
S3_VERIFY_SSL = os.environ.get("S3_VERIFY_SSL", "").lower() == "true"

# S3/MinIO Access Keys
S3_AWS_ACCESS_KEY_ID = os.environ.get("S3_AWS_ACCESS_KEY_ID")
S3_AWS_SECRET_ACCESS_KEY = os.environ.get("S3_AWS_SECRET_ACCESS_KEY")

# Should we force S3 local checksumming
S3_GENERATE_LOCAL_CHECKSUM = (
    os.environ.get("S3_GENERATE_LOCAL_CHECKSUM", "").lower() == "true"
)

# Forcing Vespa Language
# English: en, German:de, etc. See: https://docs.vespa.ai/en/linguistics.html
VESPA_LANGUAGE_OVERRIDE = os.environ.get("VESPA_LANGUAGE_OVERRIDE")


#####
# Default LLM API Keys (for cloud deployments)
# These are Onyx-managed API keys provided to tenants by default
#####
OPENAI_DEFAULT_API_KEY = os.environ.get("OPENAI_DEFAULT_API_KEY")
ANTHROPIC_DEFAULT_API_KEY = os.environ.get("ANTHROPIC_DEFAULT_API_KEY")
COHERE_DEFAULT_API_KEY = os.environ.get("COHERE_DEFAULT_API_KEY")
VERTEXAI_DEFAULT_CREDENTIALS = os.environ.get("VERTEXAI_DEFAULT_CREDENTIALS")
VERTEXAI_DEFAULT_LOCATION = os.environ.get("VERTEXAI_DEFAULT_LOCATION", "global")
OPENROUTER_DEFAULT_API_KEY = os.environ.get("OPENROUTER_DEFAULT_API_KEY")

INSTANCE_TYPE = (
    "managed"
    if os.environ.get("IS_MANAGED_INSTANCE", "").lower() == "true"
    else "cloud" if AUTH_TYPE == AuthType.CLOUD else "self_hosted"
)


## Discord Bot Configuration
DISCORD_BOT_TOKEN = os.environ.get("DISCORD_BOT_TOKEN")
DISCORD_BOT_INVOKE_CHAR = os.environ.get("DISCORD_BOT_INVOKE_CHAR", "!")


## Stripe Configuration
# URL to fetch the Stripe publishable key from a public S3 bucket.
# Publishable keys are safe to expose publicly - they can only initialize
# Stripe.js and tokenize payment info, not make charges or access data.
STRIPE_PUBLISHABLE_KEY_URL = (
    "https://onyx-stripe-public.s3.amazonaws.com/publishable-key.txt"
)
# Override for local testing with Stripe test keys (pk_test_*)
STRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get("STRIPE_PUBLISHABLE_KEY")


================================================
FILE: backend/onyx/configs/chat_configs.py
================================================
import os

PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
PERSONAS_YAML = "./onyx/seeding/personas.yaml"
NUM_RETURNED_HITS = 50

# May be less depending on model
MAX_CHUNKS_FED_TO_CHAT = int(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 25)

# 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay
# Capped in Vespa at 0.5
DOC_TIME_DECAY = float(
    os.environ.get("DOC_TIME_DECAY") or 0.5  # Hits limit at 2 years by default
)
BASE_RECENCY_DECAY = 0.5
FAVOR_RECENT_DECAY_MULTIPLIER = 2.0
# For the highest matching base size chunk, how many chunks above and below do we pull in by default
# Note this is not in any of the deployment configs yet
# Currently only applies to search flow not chat
CONTEXT_CHUNKS_ABOVE = int(os.environ.get("CONTEXT_CHUNKS_ABOVE") or 1)
CONTEXT_CHUNKS_BELOW = int(os.environ.get("CONTEXT_CHUNKS_BELOW") or 1)
# Fairly long but this is to account for edge cases where the LLM pauses for much longer than usual
# The alternative is to fail the request completely so this is intended to be fairly lenient.
LLM_SOCKET_READ_TIMEOUT = int(
    os.environ.get("LLM_SOCKET_READ_TIMEOUT") or "60"
)  # 60 seconds
# Weighting factor between vector and keyword Search; 1 for completely vector
# search, 0 for keyword. Enforces a valid range of [0, 1]. A supplied value from
# the env outside of this range will be clipped to the respective end of the
# range. Defaults to 0.5.
HYBRID_ALPHA = max(0, min(1, float(os.environ.get("HYBRID_ALPHA") or 0.5)))
# Weighting factor between Title and Content of documents during search, 1 for completely
# Title based. Default heavily favors Content because Title is also included at the top of
# Content. This is to avoid cases where the Content is very relevant but it may not be clear
# if the title is separated out. Title is most of a "boost" than a separate field.
TITLE_CONTENT_RATIO = max(
    0, min(1, float(os.environ.get("TITLE_CONTENT_RATIO") or 0.10))
)

# Stops streaming answers back to the UI if this pattern is seen:
STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None

# Set this to "true" to hard delete chats
# This will make chats unviewable by admins after a user deletes them
# As opposed to soft deleting them, which just hides them from non-admin users
HARD_DELETE_CHATS = os.environ.get("HARD_DELETE_CHATS", "").lower() == "true"

# Internet Search
NUM_INTERNET_SEARCH_RESULTS = int(os.environ.get("NUM_INTERNET_SEARCH_RESULTS") or 10)
NUM_INTERNET_SEARCH_CHUNKS = int(os.environ.get("NUM_INTERNET_SEARCH_CHUNKS") or 50)

VESPA_SEARCHER_THREADS = int(os.environ.get("VESPA_SEARCHER_THREADS") or 2)

# Whether or not to use the semantic & keyword search expansions for Basic Search
USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH = (
    os.environ.get("USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH", "false").lower()
    == "true"
)

# Chat History Compression
# Trigger compression when history exceeds this ratio of available context window
COMPRESSION_TRIGGER_RATIO = float(os.environ.get("COMPRESSION_TRIGGER_RATIO", "0.75"))

SKIP_DEEP_RESEARCH_CLARIFICATION = (
    os.environ.get("SKIP_DEEP_RESEARCH_CLARIFICATION", "false").lower() == "true"
)


================================================
FILE: backend/onyx/configs/constants.py
================================================
import platform
import re
import socket
from enum import auto
from enum import Enum


ONYX_DEFAULT_APPLICATION_NAME = "Onyx"
ONYX_DISCORD_URL = "https://discord.gg/4NA5SbzrWb"
ONYX_UTM_SOURCE = "onyx_app"
SLACK_USER_TOKEN_PREFIX = "xoxp-"
SLACK_BOT_TOKEN_PREFIX = "xoxb-"
ONYX_EMAILABLE_LOGO_MAX_DIM = 512

SOURCE_TYPE = "source_type"
# stored in the `metadata` of a chunk. Used to signify that this chunk should
# not be used for QA. For example, Google Drive file types which can't be parsed
# are still useful as a search result but not for QA.
IGNORE_FOR_QA = "ignore_for_qa"
# NOTE: deprecated, only used for porting key from old system
GEN_AI_API_KEY_STORAGE_KEY = "genai_api_key"
PUBLIC_DOC_PAT = "PUBLIC"
ID_SEPARATOR = ":;:"
DEFAULT_BOOST = 0

# Tag for endpoints that should be included in the public API documentation
PUBLIC_API_TAGS: list[str | Enum] = ["public"]

# Cookies
FASTAPI_USERS_AUTH_COOKIE_NAME = (
    "fastapiusersauth"  # Currently a constant, but logic allows for configuration
)
TENANT_ID_COOKIE_NAME = "onyx_tid"  # tenant id - for workaround cases
ANONYMOUS_USER_COOKIE_NAME = "onyx_anonymous_user"

# ID used in UserInfo API responses for anonymous users (not a UUID, just a string identifier)
ANONYMOUS_USER_INFO_ID = "__anonymous_user__"
# Placeholder user for migrating no-auth data to first registered user
NO_AUTH_PLACEHOLDER_USER_UUID = "00000000-0000-0000-0000-000000000001"
NO_AUTH_PLACEHOLDER_USER_EMAIL = "no-auth-placeholder@onyx.app"
# Real anonymous user in DB for anonymous access feature
ANONYMOUS_USER_UUID = "00000000-0000-0000-0000-000000000002"
ANONYMOUS_USER_EMAIL = "anonymous@onyx.app"

# For chunking/processing chunks
RETURN_SEPARATOR = "\n\r\n"
SECTION_SEPARATOR = "\n\n"
# For combining attributes, doesn't have to be unique/perfect to work
INDEX_SEPARATOR = "==="

# For File Connector Metadata override file
ONYX_METADATA_FILENAME = ".onyx_metadata.json"

# Messages
DISABLED_GEN_AI_MSG = (
    "Your System Admin has disabled the Generative AI functionalities of Onyx.\n"
    "Please contact them if you wish to have this enabled.\n"
    "You can still use Onyx as a search engine."
)

#####
# Version Pattern Configs
#####
# Version patterns for Docker image tags
STABLE_VERSION_PATTERN = re.compile(r"^v(\d+)\.(\d+)\.(\d+)$")
DEV_VERSION_PATTERN = re.compile(r"^v(\d+)\.(\d+)\.(\d+)-beta\.(\d+)$")

DEFAULT_PERSONA_ID = 0

DEFAULT_CC_PAIR_ID = 1


CANCEL_CHECK_INTERVAL = 20
DISPATCH_SEP_CHAR = "\n"
FORMAT_DOCS_SEPARATOR = "\n\n"
NUM_EXPLORATORY_DOCS = 15
# Postgres connection constants for application_name
POSTGRES_WEB_APP_NAME = "web"
POSTGRES_INDEXER_APP_NAME = "indexer"
POSTGRES_CELERY_APP_NAME = "celery"
POSTGRES_CELERY_BEAT_APP_NAME = "celery_beat"
POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME = "celery_worker_primary"
POSTGRES_CELERY_WORKER_LIGHT_APP_NAME = "celery_worker_light"
POSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME = "celery_worker_docprocessing"
POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME = "celery_worker_docfetching"
POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = "celery_worker_indexing_child"
POSTGRES_CELERY_WORKER_HEAVY_APP_NAME = "celery_worker_heavy"
POSTGRES_CELERY_WORKER_MONITORING_APP_NAME = "celery_worker_monitoring"
POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME = (
    "celery_worker_user_file_processing"
)
POSTGRES_PERMISSIONS_APP_NAME = "permissions"
POSTGRES_UNKNOWN_APP_NAME = "unknown"

SSL_CERT_FILE = "bundle.pem"
# API Keys
DANSWER_API_KEY_PREFIX = "API_KEY__"
DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN = "onyxapikey.ai"
UNNAMED_KEY_PLACEHOLDER = "Unnamed"
DISCORD_SERVICE_API_KEY_NAME = "discord-bot-service"

# Key-Value store keys
KV_REINDEX_KEY = "needs_reindexing"
KV_UNSTRUCTURED_API_KEY = "unstructured_api_key"
KV_USER_STORE_KEY = "INVITED_USERS"
KV_PENDING_USERS_KEY = "PENDING_USERS"
KV_ANONYMOUS_USER_PREFERENCES_KEY = "anonymous_user_preferences"
KV_ANONYMOUS_USER_PERSONALIZATION_KEY = "anonymous_user_personalization"
KV_CRED_KEY = "credential_id_{}"
KV_GMAIL_CRED_KEY = "gmail_app_credential"
KV_GMAIL_SERVICE_ACCOUNT_KEY = "gmail_service_account_key"
KV_GOOGLE_DRIVE_CRED_KEY = "google_drive_app_credential"
KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key"
KV_GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time"
KV_SETTINGS_KEY = "onyx_settings"
KV_CUSTOMER_UUID_KEY = "customer_uuid"
KV_INSTANCE_DOMAIN_KEY = "instance_domain"
KV_ENTERPRISE_SETTINGS_KEY = "onyx_enterprise_settings"
KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__"
KV_KG_CONFIG_KEY = "kg_config"

# NOTE: we use this timeout / 4 in various places to refresh a lock
# might be worth separating this timeout into separate timeouts for each situation
CELERY_GENERIC_BEAT_LOCK_TIMEOUT = 120

CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 120


CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120


# hard timeout applied by the watchdog to the indexing connector run
# to handle hung connectors
CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT = 3 * 60 * 60  # 3 hours (in seconds)

# soft timeout for the lock taken by the indexing connector run
# allows the lock to eventually expire if the managing code around it dies
# if we can get callbacks as object bytes download, we could lower this a lot.
# CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT + 15 minutes
# hard termination should always fire first if the connector is hung
CELERY_INDEXING_LOCK_TIMEOUT = CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT + 900

# Heartbeat interval for indexing worker liveness detection
INDEXING_WORKER_HEARTBEAT_INTERVAL = 30  # seconds

# how long a task should wait for associated fence to be ready
CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT = 5 * 60  # 5 min

# needs to be long enough to cover the maximum time it takes to download an object
# if we can get callbacks as object bytes download, we could lower this a lot.
CELERY_PRUNING_LOCK_TIMEOUT = 3600  # 1 hour (in seconds)

CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT = 3600  # 1 hour (in seconds)

CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT = 300  # 5 min

CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT = 30 * 60  # 30 minutes (in seconds)

# How long a queued user-file task is valid before workers discard it.
# Should be longer than the beat interval (20 s) but short enough to prevent
# indefinite queue growth.  Workers drop tasks older than this without touching
# the DB, so a shorter value = faster drain of stale duplicates.
CELERY_USER_FILE_PROCESSING_TASK_EXPIRES = 60  # 1 minute (in seconds)

# Maximum number of tasks allowed in the user-file-processing queue before the
# beat generator stops adding more.  Prevents unbounded queue growth when workers
# fall behind.
USER_FILE_PROCESSING_MAX_QUEUE_DEPTH = 500
# How long a queued user-file-project-sync task remains valid.
# Should be short enough to discard stale queue entries under load while still
# allowing workers enough time to pick up new tasks.
CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES = 60  # 1 minute (in seconds)

# Max queue depth before user-file-project-sync producers stop enqueuing.
# This applies backpressure when workers are falling behind.
USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH = 500

CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60  # 5 minutes (in seconds)

# How long a queued user-file-delete task is valid before workers discard it.
# Mirrors the processing task expiry to prevent indefinite queue growth when
# files are stuck in DELETING status and the beat keeps re-enqueuing them.
CELERY_USER_FILE_DELETE_TASK_EXPIRES = 60  # 1 minute (in seconds)

# Max queue depth before the delete beat stops enqueuing more delete tasks.
USER_FILE_DELETE_MAX_QUEUE_DEPTH = 500

CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT = 5 * 60  # 5 minutes (in seconds)

DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:"

TMP_DRALPHA_PERSONA_NAME = "KG Beta"


class DocumentSource(str, Enum):
    # Special case, document passed in via Onyx APIs without specifying a source type
    INGESTION_API = "ingestion_api"
    SLACK = "slack"
    WEB = "web"
    GOOGLE_DRIVE = "google_drive"
    GMAIL = "gmail"
    REQUESTTRACKER = "requesttracker"
    GITHUB = "github"
    GITBOOK = "gitbook"
    GITLAB = "gitlab"
    GURU = "guru"
    BOOKSTACK = "bookstack"
    OUTLINE = "outline"
    CONFLUENCE = "confluence"
    JIRA = "jira"
    SLAB = "slab"
    PRODUCTBOARD = "productboard"
    FILE = "file"
    CODA = "coda"
    CANVAS = "canvas"
    NOTION = "notion"
    ZULIP = "zulip"
    LINEAR = "linear"
    HUBSPOT = "hubspot"
    DOCUMENT360 = "document360"
    GONG = "gong"
    GOOGLE_SITES = "google_sites"
    ZENDESK = "zendesk"
    LOOPIO = "loopio"
    DROPBOX = "dropbox"
    SHAREPOINT = "sharepoint"
    TEAMS = "teams"
    SALESFORCE = "salesforce"
    DISCOURSE = "discourse"
    AXERO = "axero"
    CLICKUP = "clickup"
    MEDIAWIKI = "mediawiki"
    WIKIPEDIA = "wikipedia"
    ASANA = "asana"
    S3 = "s3"
    R2 = "r2"
    GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
    OCI_STORAGE = "oci_storage"
    XENFORO = "xenforo"
    NOT_APPLICABLE = "not_applicable"
    DISCORD = "discord"
    FRESHDESK = "freshdesk"
    FIREFLIES = "fireflies"
    EGNYTE = "egnyte"
    AIRTABLE = "airtable"
    HIGHSPOT = "highspot"
    DRUPAL_WIKI = "drupal_wiki"

    IMAP = "imap"
    BITBUCKET = "bitbucket"
    TESTRAIL = "testrail"

    # Special case just for integration tests
    MOCK_CONNECTOR = "mock_connector"
    # Special case for user files
    USER_FILE = "user_file"
    # Raw files for Craft sandbox access (xlsx, pptx, docx, etc.)
    # Uses RAW_BINARY processing mode - no text extraction
    CRAFT_FILE = "craft_file"


class FederatedConnectorSource(str, Enum):
    FEDERATED_SLACK = "federated_slack"

    def to_non_federated_source(self) -> DocumentSource | None:
        if self == FederatedConnectorSource.FEDERATED_SLACK:
            return DocumentSource.SLACK
        return None


DocumentSourceRequiringTenantContext: list[DocumentSource] = [DocumentSource.FILE]


class NotificationType(str, Enum):
    REINDEX = "reindex"
    PERSONA_SHARED = "persona_shared"
    TRIAL_ENDS_TWO_DAYS = "two_day_trial_ending"  # 2 days left in trial
    RELEASE_NOTES = "release_notes"
    ASSISTANT_FILES_READY = "assistant_files_ready"
    FEATURE_ANNOUNCEMENT = "feature_announcement"


class BlobType(str, Enum):
    R2 = "r2"
    S3 = "s3"
    GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
    OCI_STORAGE = "oci_storage"


class DocumentIndexType(str, Enum):
    COMBINED = "combined"  # Vespa
    SPLIT = "split"  # Typesense + Qdrant


class AuthType(str, Enum):
    BASIC = "basic"
    GOOGLE_OAUTH = "google_oauth"
    OIDC = "oidc"
    SAML = "saml"

    # google auth and basic
    CLOUD = "cloud"


class QueryHistoryType(str, Enum):
    DISABLED = "disabled"
    ANONYMIZED = "anonymized"
    NORMAL = "normal"


# Special characters for password validation
PASSWORD_SPECIAL_CHARS = "!@#$%^&*()_+-=[]{}|;:,.<>?"


class SessionType(str, Enum):
    CHAT = "Chat"
    SEARCH = "Search"
    SLACK = "Slack"


class QAFeedbackType(str, Enum):
    LIKE = "like"  # User likes the answer, used for metrics
    DISLIKE = "dislike"  # User dislikes the answer, used for metrics
    MIXED = "mixed"  # User likes some answers and dislikes other, used for chat session metrics


class SearchFeedbackType(str, Enum):
    ENDORSE = "endorse"  # boost this document for all future queries
    REJECT = "reject"  # down-boost this document for all future queries
    HIDE = "hide"  # mark this document as untrusted, hide from LLM
    UNHIDE = "unhide"


class MessageType(str, Enum):
    # Using OpenAI standards, Langchain equivalent shown in comment
    # System message is always constructed on the fly, not saved
    SYSTEM = "system"  # SystemMessage
    USER = "user"  # HumanMessage
    ASSISTANT = "assistant"  # AIMessage - Can include tool_calls field for parallel tool calling
    TOOL_CALL_RESPONSE = "tool_call_response"
    USER_REMINDER = "user_reminder"  # Custom Onyx message type which is translated into a USER message when passed to the LLM


class ChatMessageSimpleType(str, Enum):
    USER = "user"
    ASSISTANT = "assistant"
    TOOL_CALL = "tool_call"
    FILE_TEXT = "file_text"


class TokenRateLimitScope(str, Enum):
    USER = "user"
    USER_GROUP = "user_group"
    GLOBAL = "global"


class FileStoreType(str, Enum):
    S3 = "s3"
    POSTGRES = "postgres"


class FileOrigin(str, Enum):
    CHAT_UPLOAD = "chat_upload"
    CHAT_IMAGE_GEN = "chat_image_gen"
    CONNECTOR = "connector"
    CONNECTOR_METADATA = "connector_metadata"
    GENERATED_REPORT = "generated_report"
    INDEXING_CHECKPOINT = "indexing_checkpoint"
    PLAINTEXT_CACHE = "plaintext_cache"
    OTHER = "other"
    QUERY_HISTORY_CSV = "query_history_csv"
    SANDBOX_SNAPSHOT = "sandbox_snapshot"
    USER_FILE = "user_file"


class FileType(str, Enum):
    CSV = "text/csv"


class MilestoneRecordType(str, Enum):
    TENANT_CREATED = "tenant_created"
    USER_SIGNED_UP = "user_signed_up"
    VISITED_ADMIN_PAGE = "visited_admin_page"
    CREATED_CONNECTOR = "created_connector"
    CONNECTOR_SUCCEEDED = "connector_succeeded"
    RAN_QUERY = "ran_query"
    USER_MESSAGE_SENT = "user_message_sent"
    MULTIPLE_ASSISTANTS = "multiple_assistants"
    CREATED_ASSISTANT = "created_assistant"
    CREATED_ONYX_BOT = "created_onyx_bot"
    REQUESTED_CONNECTOR = "requested_connector"


class PostgresAdvisoryLocks(Enum):
    KOMBU_MESSAGE_CLEANUP_LOCK_ID = auto()


class OnyxCeleryQueues:
    # "celery" is the default queue defined by celery and also the queue
    # we are running in the primary worker to run system tasks
    # Tasks running in this queue should be designed specifically to run quickly
    PRIMARY = "celery"

    # Light queue
    VESPA_METADATA_SYNC = "vespa_metadata_sync"
    DOC_PERMISSIONS_UPSERT = "doc_permissions_upsert"
    CONNECTOR_DELETION = "connector_deletion"
    LLM_MODEL_UPDATE = "llm_model_update"
    CHECKPOINT_CLEANUP = "checkpoint_cleanup"
    INDEX_ATTEMPT_CLEANUP = "index_attempt_cleanup"
    # Heavy queue
    CONNECTOR_PRUNING = "connector_pruning"
    CONNECTOR_DOC_PERMISSIONS_SYNC = "connector_doc_permissions_sync"
    CONNECTOR_EXTERNAL_GROUP_SYNC = "connector_external_group_sync"
    CONNECTOR_HIERARCHY_FETCHING = "connector_hierarchy_fetching"
    CSV_GENERATION = "csv_generation"

    # User file processing queue
    USER_FILE_PROCESSING = "user_file_processing"
    USER_FILE_PROJECT_SYNC = "user_file_project_sync"
    USER_FILE_DELETE = "user_file_delete"
    # Document processing pipeline queue
    DOCPROCESSING = "docprocessing"
    CONNECTOR_DOC_FETCHING = "connector_doc_fetching"

    # Monitoring queue
    MONITORING = "monitoring"

    # Sandbox processing queue
    SANDBOX = "sandbox"

    OPENSEARCH_MIGRATION = "opensearch_migration"


class OnyxRedisLocks:
    PRIMARY_WORKER = "da_lock:primary_worker"
    CHECK_VESPA_SYNC_BEAT_LOCK = "da_lock:check_vespa_sync_beat"
    CHECK_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:check_connector_deletion_beat"
    CHECK_PRUNE_BEAT_LOCK = "da_lock:check_prune_beat"
    CHECK_HIERARCHY_FETCHING_BEAT_LOCK = "da_lock:check_hierarchy_fetching_beat"
    CHECK_INDEXING_BEAT_LOCK = "da_lock:check_indexing_beat"
    CHECK_CHECKPOINT_CLEANUP_BEAT_LOCK = "da_lock:check_checkpoint_cleanup_beat"
    CHECK_INDEX_ATTEMPT_CLEANUP_BEAT_LOCK = "da_lock:check_index_attempt_cleanup_beat"
    CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK = (
        "da_lock:check_connector_doc_permissions_sync_beat"
    )
    CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK = (
        "da_lock:check_connector_external_group_sync_beat"
    )
    OPENSEARCH_MIGRATION_BEAT_LOCK = "da_lock:opensearch_migration_beat"

    MONITOR_BACKGROUND_PROCESSES_LOCK = "da_lock:monitor_background_processes"
    CHECK_AVAILABLE_TENANTS_LOCK = "da_lock:check_available_tenants"
    CLOUD_PRE_PROVISION_TENANT_LOCK = "da_lock:pre_provision_tenant"

    CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX = (
        "da_lock:connector_doc_permissions_sync"
    )
    CONNECTOR_EXTERNAL_GROUP_SYNC_LOCK_PREFIX = "da_lock:connector_external_group_sync"
    PRUNING_LOCK_PREFIX = "da_lock:pruning"
    INDEXING_METADATA_PREFIX = "da_metadata:indexing"

    SLACK_BOT_LOCK = "da_lock:slack_bot"
    SLACK_BOT_HEARTBEAT_PREFIX = "da_heartbeat:slack_bot"
    ANONYMOUS_USER_ENABLED = "anonymous_user_enabled"

    CLOUD_BEAT_TASK_GENERATOR_LOCK = "da_lock:cloud_beat_task_generator"
    CLOUD_CHECK_ALEMBIC_BEAT_LOCK = "da_lock:cloud_check_alembic"

    # User file processing
    USER_FILE_PROCESSING_BEAT_LOCK = "da_lock:check_user_file_processing_beat"
    USER_FILE_PROCESSING_LOCK_PREFIX = "da_lock:user_file_processing"
    # Short-lived key set when a task is enqueued; cleared when the worker picks it up.
    # Prevents the beat from re-enqueuing the same file while a task is already queued.
    USER_FILE_QUEUED_PREFIX = "da_lock:user_file_queued"
    USER_FILE_PROJECT_SYNC_BEAT_LOCK = "da_lock:check_user_file_project_sync_beat"
    USER_FILE_PROJECT_SYNC_LOCK_PREFIX = "da_lock:user_file_project_sync"
    USER_FILE_PROJECT_SYNC_QUEUED_PREFIX = "da_lock:user_file_project_sync_queued"
    USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
    USER_FILE_DELETE_LOCK_PREFIX = "da_lock:user_file_delete"
    # Short-lived key set when a delete task is enqueued; cleared when the worker picks it up.
    # Prevents the beat from re-enqueuing the same file while a delete task is already queued.
    USER_FILE_DELETE_QUEUED_PREFIX = "da_lock:user_file_delete_queued"

    # Release notes
    RELEASE_NOTES_FETCH_LOCK = "da_lock:release_notes_fetch"

    # Sandbox cleanup
    CLEANUP_IDLE_SANDBOXES_BEAT_LOCK = "da_lock:cleanup_idle_sandboxes_beat"
    CLEANUP_OLD_SNAPSHOTS_BEAT_LOCK = "da_lock:cleanup_old_snapshots_beat"

    # Sandbox file sync
    SANDBOX_FILE_SYNC_LOCK_PREFIX = "da_lock:sandbox_file_sync"


class OnyxRedisSignals:
    BLOCK_VALIDATE_INDEXING_FENCES = "signal:block_validate_indexing_fences"
    BLOCK_VALIDATE_EXTERNAL_GROUP_SYNC_FENCES = (
        "signal:block_validate_external_group_sync_fences"
    )
    BLOCK_VALIDATE_PERMISSION_SYNC_FENCES = (
        "signal:block_validate_permission_sync_fences"
    )
    BLOCK_PRUNING = "signal:block_pruning"
    BLOCK_VALIDATE_PRUNING_FENCES = "signal:block_validate_pruning_fences"
    BLOCK_BUILD_FENCE_LOOKUP_TABLE = "signal:block_build_fence_lookup_table"
    BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES = (
        "signal:block_validate_connector_deletion_fences"
    )


class OnyxRedisConstants:
    ACTIVE_FENCES = "active_fences"


class OnyxCeleryPriority(int, Enum):
    HIGHEST = 0
    HIGH = auto()
    MEDIUM = auto()
    LOW = auto()
    LOWEST = auto()


# a prefix used to distinguish system wide tasks in the cloud
ONYX_CLOUD_CELERY_TASK_PREFIX = "cloud"

# the tenant id we use for system level redis operations
ONYX_CLOUD_TENANT_ID = "cloud"

# the redis namespace for runtime variables
ONYX_CLOUD_REDIS_RUNTIME = "runtime"
CLOUD_BUILD_FENCE_LOOKUP_TABLE_INTERVAL_DEFAULT = 600


class OnyxCeleryTask:
    DEFAULT = "celery"

    CLOUD_BEAT_TASK_GENERATOR = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_generate_beat_tasks"
    CLOUD_MONITOR_ALEMBIC = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_alembic"
    CLOUD_MONITOR_CELERY_QUEUES = (
        f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_celery_queues"
    )
    CLOUD_CHECK_AVAILABLE_TENANTS = (
        f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check_available_tenants"
    )
    CLOUD_MONITOR_CELERY_PIDBOX = (
        f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_celery_pidbox"
    )

    CHECK_FOR_CONNECTOR_DELETION = "check_for_connector_deletion_task"
    CHECK_FOR_VESPA_SYNC_TASK = "check_for_vespa_sync_task"
    CHECK_FOR_INDEXING = "check_for_indexing"
    CHECK_FOR_PRUNING = "check_for_pruning"
    CHECK_FOR_HIERARCHY_FETCHING = "check_for_hierarchy_fetching"
    CHECK_FOR_DOC_PERMISSIONS_SYNC = "check_for_doc_permissions_sync"
    CHECK_FOR_EXTERNAL_GROUP_SYNC = "check_for_external_group_sync"
    CHECK_FOR_AUTO_LLM_UPDATE = "check_for_auto_llm_update"

    # User file processing
    CHECK_FOR_USER_FILE_PROCESSING = "check_for_user_file_processing"
    PROCESS_SINGLE_USER_FILE = "process_single_user_file"
    CHECK_FOR_USER_FILE_PROJECT_SYNC = "check_for_user_file_project_sync"
    PROCESS_SINGLE_USER_FILE_PROJECT_SYNC = "process_single_user_file_project_sync"
    CHECK_FOR_USER_FILE_DELETE = "check_for_user_file_delete"
    DELETE_SINGLE_USER_FILE = "delete_single_user_file"

    # Connector checkpoint cleanup
    CHECK_FOR_CHECKPOINT_CLEANUP = "check_for_checkpoint_cleanup"
    CLEANUP_CHECKPOINT = "cleanup_checkpoint"

    # Connector index attempt cleanup
    CHECK_FOR_INDEX_ATTEMPT_CLEANUP = "check_for_index_attempt_cleanup"
    CLEANUP_INDEX_ATTEMPT = "cleanup_index_attempt"

    MONITOR_BACKGROUND_PROCESSES = "monitor_background_processes"
    MONITOR_CELERY_QUEUES = "monitor_celery_queues"
    MONITOR_PROCESS_MEMORY = "monitor_process_memory"
    CELERY_BEAT_HEARTBEAT = "celery_beat_heartbeat"

    KOMBU_MESSAGE_CLEANUP_TASK = "kombu_message_cleanup_task"
    CONNECTOR_PERMISSION_SYNC_GENERATOR_TASK = (
        "connector_permission_sync_generator_task"
    )
    UPDATE_EXTERNAL_DOCUMENT_PERMISSIONS_TASK = (
        "update_external_document_permissions_task"
    )
    CONNECTOR_EXTERNAL_GROUP_SYNC_GENERATOR_TASK = (
        "connector_external_group_sync_generator_task"
    )

    # New split indexing tasks
    CONNECTOR_DOC_FETCHING_TASK = "connector_doc_fetching_task"
    DOCPROCESSING_TASK = "docprocessing_task"

    CONNECTOR_PRUNING_GENERATOR_TASK = "connector_pruning_generator_task"
    CONNECTOR_HIERARCHY_FETCHING_TASK = "connector_hierarchy_fetching_task"
    DOCUMENT_BY_CC_PAIR_CLEANUP_TASK = "document_by_cc_pair_cleanup_task"
    VESPA_METADATA_SYNC_TASK = "vespa_metadata_sync_task"

    # chat retention
    CHECK_TTL_MANAGEMENT_TASK = "check_ttl_management_task"
    PERFORM_TTL_MANAGEMENT_TASK = "perform_ttl_management_task"

    GENERATE_USAGE_REPORT_TASK = "generate_usage_report_task"

    EVAL_RUN_TASK = "eval_run_task"
    SCHEDULED_EVAL_TASK = "scheduled_eval_task"

    EXPORT_QUERY_HISTORY_TASK = "export_query_history_task"
    EXPORT_QUERY_HISTORY_CLEANUP_TASK = "export_query_history_cleanup_task"

    # Hook execution log retention
    HOOK_EXECUTION_LOG_CLEANUP_TASK = "hook_execution_log_cleanup_task"

    # Sandbox cleanup
    CLEANUP_IDLE_SANDBOXES = "cleanup_idle_sandboxes"
    CLEANUP_OLD_SNAPSHOTS = "cleanup_old_snapshots"

    # Sandbox file sync
    SANDBOX_FILE_SYNC = "sandbox_file_sync"

    CHECK_FOR_DOCUMENTS_FOR_OPENSEARCH_MIGRATION_TASK = (
        "check_for_documents_for_opensearch_migration_task"
    )
    MIGRATE_DOCUMENTS_FROM_VESPA_TO_OPENSEARCH_TASK = (
        "migrate_documents_from_vespa_to_opensearch_task"
    )
    MIGRATE_CHUNKS_FROM_VESPA_TO_OPENSEARCH_TASK = (
        "migrate_chunks_from_vespa_to_opensearch_task"
    )


# this needs to correspond to the matching entry in supervisord
ONYX_CELERY_BEAT_HEARTBEAT_KEY = "onyx:celery:beat:heartbeat"

REDIS_SOCKET_KEEPALIVE_OPTIONS = {}
REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPINTVL] = 15
REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPCNT] = 3

if platform.system() == "Darwin":
    REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPALIVE] = 60  # type: ignore[attr-defined,unused-ignore]
else:
    REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPIDLE] = 60  # type: ignore[attr-defined,unused-ignore]


class OnyxCallTypes(str, Enum):
    FIREFLIES = "FIREFLIES"
    GONG = "GONG"


NUM_DAYS_TO_KEEP_CHECKPOINTS = 7
# checkpoints are queried based on index attempts, so we need to keep index attempts for one more day
NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS = NUM_DAYS_TO_KEEP_CHECKPOINTS + 1

# TODO: this should be stored likely in database
DocumentSourceDescription: dict[DocumentSource, str] = {
    # Special case, document passed in via Onyx APIs without specifying a source type
    DocumentSource.INGESTION_API: "ingestion_api",
    DocumentSource.SLACK: "slack channels for discussions and collaboration",
    DocumentSource.WEB: "indexed web pages",
    DocumentSource.GOOGLE_DRIVE: "google drive documents (docs, sheets, etc.)",
    DocumentSource.GMAIL: "email messages",
    DocumentSource.REQUESTTRACKER: "requesttracker",
    DocumentSource.GITHUB: "github data (issues, PRs)",
    DocumentSource.GITBOOK: "gitbook data",
    DocumentSource.GITLAB: "gitlab data",
    DocumentSource.BITBUCKET: "bitbucket data",
    DocumentSource.GURU: "guru data",
    DocumentSource.BOOKSTACK: "bookstack data",
    DocumentSource.OUTLINE: "outline data",
    DocumentSource.CONFLUENCE: "confluence data (pages, spaces, etc.)",
    DocumentSource.JIRA: "jira data (issues, tickets, projects, etc.)",
    DocumentSource.SLAB: "slab data",
    DocumentSource.PRODUCTBOARD: "productboard data (boards, etc.)",
    DocumentSource.FILE: "files",
    DocumentSource.CANVAS: "canvas lms - courses, pages, assignments, and announcements",
    DocumentSource.CODA: "coda - team workspace with docs, tables, and pages",
    DocumentSource.NOTION: "notion data - a workspace that combines note-taking, \
project management, and collaboration tools into a single, customizable platform",
    DocumentSource.ZULIP: "zulip data",
    DocumentSource.LINEAR: "linear data - project management tool, including tickets etc.",
    DocumentSource.HUBSPOT: "hubspot data - CRM and marketing automation data",
    DocumentSource.DOCUMENT360: "document360 data",
    DocumentSource.GONG: "gong - call transcripts",
    DocumentSource.GOOGLE_SITES: "google_sites - websites",
    DocumentSource.ZENDESK: "zendesk - customer support data",
    DocumentSource.LOOPIO: "loopio - rfp data",
    DocumentSource.DROPBOX: "dropbox - files",
    DocumentSource.SHAREPOINT: "sharepoint - files",
    DocumentSource.TEAMS: "teams - chat and collaboration",
    DocumentSource.SALESFORCE: "salesforce - CRM data",
    DocumentSource.DISCOURSE: "discourse - discussion forums",
    DocumentSource.AXERO: "axero - employee engagement data",
    DocumentSource.CLICKUP: "clickup - project management tool",
    DocumentSource.MEDIAWIKI: "mediawiki - wiki data",
    DocumentSource.WIKIPEDIA: "wikipedia - encyclopedia data",
    DocumentSource.ASANA: "asana",
    DocumentSource.S3: "s3",
    DocumentSource.R2: "r2",
    DocumentSource.GOOGLE_CLOUD_STORAGE: "google_cloud_storage - cloud storage",
    DocumentSource.OCI_STORAGE: "oci_storage - cloud storage",
    DocumentSource.XENFORO: "xenforo - forum data",
    DocumentSource.DISCORD: "discord - chat and collaboration",
    DocumentSource.FRESHDESK: "freshdesk - customer support data",
    DocumentSource.FIREFLIES: "fireflies - call transcripts",
    DocumentSource.EGNYTE: "egnyte - files",
    DocumentSource.AIRTABLE: "airtable - database",
    DocumentSource.HIGHSPOT: "highspot - CRM data",
    DocumentSource.DRUPAL_WIKI: "drupal wiki - knowledge base content (pages, spaces, attachments)",
    DocumentSource.IMAP: "imap - email data",
    DocumentSource.TESTRAIL: "testrail - test case management tool for QA processes",
}


================================================
FILE: backend/onyx/configs/embedding_configs.py
================================================
from pydantic import BaseModel

from onyx.db.enums import EmbeddingPrecision


class _BaseEmbeddingModel(BaseModel):
    """Private model for defining base embedding model configurations."""

    name: str
    dim: int
    index_name: str


class SupportedEmbeddingModel(BaseModel):
    name: str
    dim: int
    index_name: str
    embedding_precision: EmbeddingPrecision


# Base embedding model configurations (without precision)
_BASE_EMBEDDING_MODELS = [
    # Cloud-based models
    _BaseEmbeddingModel(
        name="cohere/embed-english-v3.0",
        dim=1024,
        index_name="danswer_chunk_cohere_embed_english_v3_0",
    ),
    _BaseEmbeddingModel(
        name="cohere/embed-english-v3.0",
        dim=1024,
        index_name="danswer_chunk_embed_english_v3_0",
    ),
    _BaseEmbeddingModel(
        name="cohere/embed-english-light-v3.0",
        dim=384,
        index_name="danswer_chunk_cohere_embed_english_light_v3_0",
    ),
    _BaseEmbeddingModel(
        name="cohere/embed-english-light-v3.0",
        dim=384,
        index_name="danswer_chunk_embed_english_light_v3_0",
    ),
    _BaseEmbeddingModel(
        name="openai/text-embedding-3-large",
        dim=3072,
        index_name="danswer_chunk_openai_text_embedding_3_large",
    ),
    _BaseEmbeddingModel(
        name="openai/text-embedding-3-large",
        dim=3072,
        index_name="danswer_chunk_text_embedding_3_large",
    ),
    _BaseEmbeddingModel(
        name="openai/text-embedding-3-small",
        dim=1536,
        index_name="danswer_chunk_openai_text_embedding_3_small",
    ),
    _BaseEmbeddingModel(
        name="openai/text-embedding-3-small",
        dim=1536,
        index_name="danswer_chunk_text_embedding_3_small",
    ),
    _BaseEmbeddingModel(
        name="google/gemini-embedding-001",
        dim=3072,
        index_name="danswer_chunk_gemini_embedding_001",
    ),
    _BaseEmbeddingModel(
        name="google/text-embedding-005",
        dim=768,
        index_name="danswer_chunk_text_embedding_005",
    ),
    _BaseEmbeddingModel(
        name="voyage/voyage-large-2-instruct",
        dim=1024,
        index_name="danswer_chunk_voyage_large_2_instruct",
    ),
    _BaseEmbeddingModel(
        name="voyage/voyage-large-2-instruct",
        dim=1024,
        index_name="danswer_chunk_large_2_instruct",
    ),
    _BaseEmbeddingModel(
        name="voyage/voyage-light-2-instruct",
        dim=384,
        index_name="danswer_chunk_voyage_light_2_instruct",
    ),
    _BaseEmbeddingModel(
        name="voyage/voyage-light-2-instruct",
        dim=384,
        index_name="danswer_chunk_light_2_instruct",
    ),
    # Self-hosted models
    _BaseEmbeddingModel(
        name="nomic-ai/nomic-embed-text-v1",
        dim=768,
        index_name="danswer_chunk_nomic_ai_nomic_embed_text_v1",
    ),
    _BaseEmbeddingModel(
        name="nomic-ai/nomic-embed-text-v1",
        dim=768,
        index_name="danswer_chunk_nomic_embed_text_v1",
    ),
    _BaseEmbeddingModel(
        name="intfloat/e5-base-v2",
        dim=768,
        index_name="danswer_chunk_intfloat_e5_base_v2",
    ),
    _BaseEmbeddingModel(
        name="intfloat/e5-small-v2",
        dim=384,
        index_name="danswer_chunk_intfloat_e5_small_v2",
    ),
    _BaseEmbeddingModel(
        name="intfloat/multilingual-e5-base",
        dim=768,
        index_name="danswer_chunk_intfloat_multilingual_e5_base",
    ),
    _BaseEmbeddingModel(
        name="intfloat/multilingual-e5-small",
        dim=384,
        index_name="danswer_chunk_intfloat_multilingual_e5_small",
    ),
]

# Automatically generate both FLOAT and BFLOAT16 versions of all models
SUPPORTED_EMBEDDING_MODELS = [
    # BFLOAT16 precision versions
    *[
        SupportedEmbeddingModel(
            name=model.name,
            dim=model.dim,
            index_name=f"{model.index_name}_bfloat16",
            embedding_precision=EmbeddingPrecision.BFLOAT16,
        )
        for model in _BASE_EMBEDDING_MODELS
    ],
    # FLOAT precision versions
    # NOTE: need to keep this one for backwards compatibility. We now default to
    # BFLOAT16.
    *[
        SupportedEmbeddingModel(
            name=model.name,
            dim=model.dim,
            index_name=model.index_name,
            embedding_precision=EmbeddingPrecision.FLOAT,
        )
        for model in _BASE_EMBEDDING_MODELS
    ],
]


================================================
FILE: backend/onyx/configs/kg_configs.py
================================================
import os

KG_RESEARCH_NUM_RETRIEVED_DOCS: int = int(
    os.environ.get("KG_RESEARCH_NUM_RETRIEVED_DOCS", "25")
)


KG_SIMPLE_ANSWER_MAX_DISPLAYED_SOURCES: int = int(
    os.environ.get("KG_SIMPLE_ANSWER_MAX_DISPLAYED_SOURCES", "10")
)


KG_ENTITY_EXTRACTION_TIMEOUT: int = int(
    os.environ.get("KG_ENTITY_EXTRACTION_TIMEOUT", "15")
)

KG_RELATIONSHIP_EXTRACTION_TIMEOUT: int = int(
    os.environ.get("KG_RELATIONSHIP_EXTRACTION_TIMEOUT", "15")
)

KG_STRATEGY_GENERATION_TIMEOUT: int = int(
    os.environ.get("KG_STRATEGY_GENERATION_TIMEOUT", "20")
)

KG_SQL_GENERATION_TIMEOUT: int = int(os.environ.get("KG_SQL_GENERATION_TIMEOUT", "40"))

KG_SQL_GENERATION_TIMEOUT_OVERRIDE: int = int(
    os.environ.get("KG_SQL_GENERATION_TIMEOUT_OVERRIDE", "40")
)

KG_SQL_GENERATION_MAX_TOKENS: int = int(
    os.environ.get("KG_SQL_GENERATION_MAX_TOKENS", "1500")
)

KG_TEMP_ALLOWED_DOCS_VIEW_NAME_PREFIX: str = os.environ.get(
    "KG_TEMP_ALLOWED_DOCS_VIEW_NAME_PREFIX", "allowed_docs"
)

KG_TEMP_KG_RELATIONSHIPS_VIEW_NAME_PREFIX: str = os.environ.get(
    "KG_TEMP_KG_RELATIONSHIPS_VIEW_NAME_PREFIX", "kg_relationships_with_access"
)

KG_TEMP_KG_ENTITIES_VIEW_NAME_PREFIX: str = os.environ.get(
    "KG_TEMP_KG_ENTITIES_VIEW_NAME_PREFIX", "kg_entities_with_access"
)


KG_FILTER_CONSTRUCTION_TIMEOUT: int = int(
    os.environ.get("KG_FILTER_CONSTRUCTION_TIMEOUT", "15")
)


KG_NORMALIZATION_RETRIEVE_ENTITIES_LIMIT: int = int(
    os.environ.get("KG_NORMALIZATION_RETRIEVE_ENTITIES_LIMIT", "100")
)

KG_FILTERED_SEARCH_TIMEOUT: int = int(
    os.environ.get("KG_FILTERED_SEARCH_TIMEOUT", "30")
)


KG_OBJECT_SOURCE_RESEARCH_TIMEOUT: int = int(
    os.environ.get("KG_OBJECT_SOURCE_RESEARCH_TIMEOUT", "30")
)

KG_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION: int = int(
    os.environ.get("KG_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION", "45")
)

KG_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION: int = int(
    os.environ.get("KG_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION", "15")
)

KG_MAX_TOKENS_ANSWER_GENERATION: int = int(
    os.environ.get("KG_MAX_TOKENS_ANSWER_GENERATION", "1024")
)

KG_MAX_DEEP_SEARCH_RESULTS: int = int(
    os.environ.get("KG_MAX_DEEP_SEARCH_RESULTS", "30")
)


KG_METADATA_TRACKING_THRESHOLD: int = int(
    os.environ.get("KG_METADATA_TRACKING_THRESHOLD", "10")
)


KG_DEFAULT_MAX_PARENT_RECURSION_DEPTH: int = int(
    os.environ.get("KG_DEFAULT_MAX_PARENT_RECURSION_DEPTH", "2")
)


_KG_NORMALIZATION_RERANK_UNIGRAM_WEIGHT: float = max(
    1e-3,
    min(1, float(os.environ.get("KG_NORMALIZATION_RERANK_UNIGRAM_WEIGHT", "0.25"))),
)
_KG_NORMALIZATION_RERANK_BIGRAM_WEIGHT: float = max(
    1e-3,
    min(1, float(os.environ.get("KG_NORMALIZATION_RERANK_BIGRAM_WEIGHT", "0.25"))),
)
_KG_NORMALIZATION_RERANK_TRIGRAM_WEIGHT: float = max(
    1e-3,
    min(1, float(os.environ.get("KG_NORMALIZATION_RERANK_TRIGRAM_WEIGHT", "0.5"))),
)
_KG_NORMALIZATION_RERANK_NGRAM_SUMS: float = (
    _KG_NORMALIZATION_RERANK_UNIGRAM_WEIGHT
    + _KG_NORMALIZATION_RERANK_BIGRAM_WEIGHT
    + _KG_NORMALIZATION_RERANK_TRIGRAM_WEIGHT
)

KG_NORMALIZATION_RERANK_NGRAM_WEIGHTS: tuple[float, float, float] = (
    _KG_NORMALIZATION_RERANK_UNIGRAM_WEIGHT / _KG_NORMALIZATION_RERANK_NGRAM_SUMS,
    _KG_NORMALIZATION_RERANK_BIGRAM_WEIGHT / _KG_NORMALIZATION_RERANK_NGRAM_SUMS,
    _KG_NORMALIZATION_RERANK_TRIGRAM_WEIGHT / _KG_NORMALIZATION_RERANK_NGRAM_SUMS,
)


KG_NORMALIZATION_RERANK_LEVENSHTEIN_WEIGHT: float = max(
    0,
    min(1, float(os.environ.get("KG_NORMALIZATION_RERANK_LEVENSHTEIN_WEIGHT", "0.25"))),
)


KG_NORMALIZATION_RERANK_THRESHOLD: float = float(
    os.environ.get("KG_NORMALIZATION_RERANK_THRESHOLD", "0.3")
)


KG_CLUSTERING_RETRIEVE_THRESHOLD: float = float(
    os.environ.get("KG_CLUSTERING_RETRIEVE_THRESHOLD", "0.6")
)


KG_CLUSTERING_THRESHOLD: float = float(
    os.environ.get("KG_CLUSTERING_THRESHOLD", "0.96")
)

KG_MAX_SEARCH_DOCUMENTS: int = int(os.environ.get("KG_MAX_SEARCH_DOCUMENTS", "15"))

KG_MAX_DECOMPOSITION_SEGMENTS: int = int(
    os.environ.get("KG_MAX_DECOMPOSITION_SEGMENTS", "10")
)
KG_BETA_ASSISTANT_DESCRIPTION = "The KG Beta assistant uses the Onyx Knowledge Graph (beta) structure \
to answer questions"


================================================
FILE: backend/onyx/configs/llm_configs.py
================================================
from onyx.configs.app_configs import DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB
from onyx.server.settings.store import load_settings


def get_image_extraction_and_analysis_enabled() -> bool:
    """Get image extraction and analysis enabled setting from workspace settings or fallback to False"""
    try:
        settings = load_settings()
        if settings.image_extraction_and_analysis_enabled is not None:
            return settings.image_extraction_and_analysis_enabled
    except Exception:
        pass

    return False


def get_search_time_image_analysis_enabled() -> bool:
    """Get search time image analysis enabled setting from workspace settings or fallback to False"""
    try:
        settings = load_settings()
        if settings.search_time_image_analysis_enabled is not None:
            return settings.search_time_image_analysis_enabled
    except Exception:
        pass

    return False


def get_image_analysis_max_size_mb() -> int:
    """Get image analysis max size MB setting from workspace settings or fallback to environment variable"""
    try:
        settings = load_settings()
        if settings.image_analysis_max_size_mb is not None:
            return settings.image_analysis_max_size_mb
    except Exception:
        pass

    return DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB


================================================
FILE: backend/onyx/configs/model_configs.py
================================================
import json
import os

#####
# Embedding/Reranking Model Configs
#####
# Important considerations when choosing models
# Max tokens count needs to be high considering use case (at least 512)
# Models used must be MIT or Apache license
# Inference/Indexing speed
# https://huggingface.co/DOCUMENT_ENCODER_MODEL
# The useable models configured as below must be SentenceTransformer compatible
# NOTE: DO NOT CHANGE SET THESE UNLESS YOU KNOW WHAT YOU ARE DOING
# IDEALLY, YOU SHOULD CHANGE EMBEDDING MODELS VIA THE UI
DEFAULT_DOCUMENT_ENCODER_MODEL = "nomic-ai/nomic-embed-text-v1"
DOCUMENT_ENCODER_MODEL = (
    os.environ.get("DOCUMENT_ENCODER_MODEL") or DEFAULT_DOCUMENT_ENCODER_MODEL
)
# If the below is changed, Vespa deployment must also be changed
DOC_EMBEDDING_DIM = int(os.environ.get("DOC_EMBEDDING_DIM") or 768)
NORMALIZE_EMBEDDINGS = (
    os.environ.get("NORMALIZE_EMBEDDINGS") or "true"
).lower() == "true"

# Old default model settings, which are needed for an automatic easy upgrade
OLD_DEFAULT_DOCUMENT_ENCODER_MODEL = "thenlper/gte-small"
OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM = 384
OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS = False

# These are only used if reranking is turned off, to normalize the direct retrieval scores for display
# Currently unused
SIM_SCORE_RANGE_LOW = float(os.environ.get("SIM_SCORE_RANGE_LOW") or 0.0)
SIM_SCORE_RANGE_HIGH = float(os.environ.get("SIM_SCORE_RANGE_HIGH") or 1.0)
# Certain models like e5, BGE, etc use a prefix for asymmetric retrievals (query generally shorter than docs)
ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "search_query: ")
ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "search_document: ")
# Purely an optimization, memory limitation consideration

# User's set embedding batch size overrides the default encoding batch sizes
EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE") or 0) or None

BATCH_SIZE_ENCODE_CHUNKS = EMBEDDING_BATCH_SIZE or 8
# don't send over too many chunks at once, as sending too many could cause timeouts
BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = EMBEDDING_BATCH_SIZE or 512
# For score display purposes, only way is to know the expected ranges
CROSS_ENCODER_RANGE_MAX = 1
CROSS_ENCODER_RANGE_MIN = 0


#####
# Generative AI Model Configs
#####

# NOTE: the 2 below should only be used for dev.
GEN_AI_API_KEY = os.environ.get("GEN_AI_API_KEY")
GEN_AI_MODEL_VERSION = os.environ.get("GEN_AI_MODEL_VERSION")

# Override the auto-detection of LLM max context length
GEN_AI_MAX_TOKENS = int(os.environ.get("GEN_AI_MAX_TOKENS") or 0) or None

# Set this to be enough for an answer + quotes. Also used for Chat
# This is the minimum token context we will leave for the LLM to generate an answer
GEN_AI_NUM_RESERVED_OUTPUT_TOKENS = int(
    os.environ.get("GEN_AI_NUM_RESERVED_OUTPUT_TOKENS") or 1024
)

# Fallback token limit for models where the max context is unknown
# Set conservatively at 32K to handle most modern models
GEN_AI_MODEL_FALLBACK_MAX_TOKENS = int(
    os.environ.get("GEN_AI_MODEL_FALLBACK_MAX_TOKENS") or 32000
)

# This is used when computing how much context space is available for documents
# ahead of time in order to let the user know if they can "select" more documents
# It represents a maximum "expected" number of input tokens from the latest user
# message. At query time, we don't actually enforce this - we will only throw an
# error if the total # of tokens exceeds the max input tokens.
GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS = 512
GEN_AI_TEMPERATURE = float(os.environ.get("GEN_AI_TEMPERATURE") or 0)

# should be used if you are using a custom LLM inference provider that doesn't support
# streaming format AND you are still using the langchain/litellm LLM class
DISABLE_LITELLM_STREAMING = (
    os.environ.get("DISABLE_LITELLM_STREAMING") or "false"
).lower() == "true"

# extra headers to pass to LiteLLM
LITELLM_EXTRA_HEADERS: dict[str, str] | None = None
_LITELLM_EXTRA_HEADERS_RAW = os.environ.get("LITELLM_EXTRA_HEADERS")
if _LITELLM_EXTRA_HEADERS_RAW:
    try:
        LITELLM_EXTRA_HEADERS = json.loads(_LITELLM_EXTRA_HEADERS_RAW)
    except Exception:
        # need to import here to avoid circular imports
        from onyx.utils.logger import setup_logger

        logger = setup_logger()
        logger.error(
            "Failed to parse LITELLM_EXTRA_HEADERS, must be a valid JSON object"
        )

# if specified, will pass through request headers to the call to the LLM
LITELLM_PASS_THROUGH_HEADERS: list[str] | None = None
_LITELLM_PASS_THROUGH_HEADERS_RAW = os.environ.get("LITELLM_PASS_THROUGH_HEADERS")
if _LITELLM_PASS_THROUGH_HEADERS_RAW:
    try:
        LITELLM_PASS_THROUGH_HEADERS = json.loads(_LITELLM_PASS_THROUGH_HEADERS_RAW)
    except Exception:
        # need to import here to avoid circular imports
        from onyx.utils.logger import setup_logger

        logger = setup_logger()
        logger.error(
            "Failed to parse LITELLM_PASS_THROUGH_HEADERS, must be a valid JSON object"
        )


# if specified, will merge the specified JSON with the existing body of the
# request before sending it to the LLM
LITELLM_EXTRA_BODY: dict | None = None
_LITELLM_EXTRA_BODY_RAW = os.environ.get("LITELLM_EXTRA_BODY")
if _LITELLM_EXTRA_BODY_RAW:
    try:
        LITELLM_EXTRA_BODY = json.loads(_LITELLM_EXTRA_BODY_RAW)
    except Exception:
        pass

#####
# Prompt Caching Configs
#####
# Enable prompt caching framework
ENABLE_PROMPT_CACHING = (
    os.environ.get("ENABLE_PROMPT_CACHING", "true").lower() != "false"
)

# Cache TTL multiplier - store caches slightly longer than provider TTL
# This allows for some clock skew and ensures we don't lose cache metadata prematurely
PROMPT_CACHE_REDIS_TTL_MULTIPLIER = float(
    os.environ.get("PROMPT_CACHE_REDIS_TTL_MULTIPLIER") or 1.2
)


================================================
FILE: backend/onyx/configs/onyxbot_configs.py
================================================
import os

#####
# Onyx Slack Bot Configs
#####
ONYX_BOT_NUM_RETRIES = int(os.environ.get("ONYX_BOT_NUM_RETRIES", "5"))
# Number of docs to display in "Reference Documents"
ONYX_BOT_NUM_DOCS_TO_DISPLAY = int(os.environ.get("ONYX_BOT_NUM_DOCS_TO_DISPLAY", "5"))
# If the LLM fails to answer, Onyx can still show the "Reference Documents"
ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER = os.environ.get(
    "ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER", ""
).lower() not in ["false", ""]
# When Onyx is considering a message, what emoji does it react with
ONYX_BOT_REACT_EMOJI = os.environ.get("ONYX_BOT_REACT_EMOJI") or "eyes"
# When User needs more help, what should the emoji be
ONYX_BOT_FOLLOWUP_EMOJI = os.environ.get("ONYX_BOT_FOLLOWUP_EMOJI") or "sos"
# What kind of message should be shown when someone gives an AI answer feedback to OnyxBot
# Defaults to Private if not provided or invalid
# Private: Only visible to user clicking the feedback
# Anonymous: Public but anonymous
# Public: Visible with the user name who submitted the feedback
ONYX_BOT_FEEDBACK_VISIBILITY = (
    os.environ.get("ONYX_BOT_FEEDBACK_VISIBILITY") or "private"
)
# Should OnyxBot send an apology message if it's not able to find an answer
# That way the user isn't confused as to why OnyxBot reacted but then said nothing
# Off by default to be less intrusive (don't want to give a notif that just says we couldnt help)
NOTIFY_SLACKBOT_NO_ANSWER = (
    os.environ.get("NOTIFY_SLACKBOT_NO_ANSWER", "").lower() == "true"
)
# Mostly for debugging purposes but it's for explaining what went wrong
# if OnyxBot couldn't find an answer
ONYX_BOT_DISPLAY_ERROR_MSGS = os.environ.get(
    "ONYX_BOT_DISPLAY_ERROR_MSGS", ""
).lower() not in [
    "false",
    "",
]

# Maximum Questions Per Minute, Default Uncapped
ONYX_BOT_MAX_QPM = int(os.environ.get("ONYX_BOT_MAX_QPM") or 0) or None
# Maximum time to wait when a question is queued
ONYX_BOT_MAX_WAIT_TIME = int(os.environ.get("ONYX_BOT_MAX_WAIT_TIME") or 180)

# Time (in minutes) after which a Slack message is sent to the user to remind him to give feedback.
# Set to 0 to disable it (default)
ONYX_BOT_FEEDBACK_REMINDER = int(os.environ.get("ONYX_BOT_FEEDBACK_REMINDER") or 0)

# ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD is the number of
# responses OnyxBot can send in a given time period.
# Set to 0 to disable the limit.
ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD = int(
    os.environ.get("ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD", "5000")
)
# ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS is the number
# of seconds until the response limit is reset.
ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS = int(
    os.environ.get("ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS", "86400")
)


================================================
FILE: backend/onyx/configs/research_configs.py
================================================


================================================
FILE: backend/onyx/configs/saml_config/template.settings.json
================================================
{
  "strict": true,
  "debug": false,
  "idp": {
    "entityId": "<Provide This from IDP>",
    "singleSignOnService": {
      "url": "<Replace this with your IDP URL> https://trial-1234567.okta.com/home/trial-1234567_onyx/somevalues/somevalues",
      "binding": "urn:oasis:names:tc:SAML:2.0:bindings:HTTP-Redirect"
    },
    "x509cert": "<Provide this>"
  },
  "sp": {
    "entityId": "<Provide This from IDP>",
    "assertionConsumerService": {
      "url": "http://127.0.0.1:3000/auth/saml/callback",
      "binding": "urn:oasis:names:tc:SAML:2.0:bindings:HTTP-POST"
    },
    "x509cert": "<Provide this>"
  }
}


================================================
FILE: backend/onyx/configs/tool_configs.py
================================================
import json
import os


IMAGE_GENERATION_OUTPUT_FORMAT = os.environ.get(
    "IMAGE_GENERATION_OUTPUT_FORMAT", "b64_json"
)

# if specified, will pass through request headers to the call to API calls made by custom tools
CUSTOM_TOOL_PASS_THROUGH_HEADERS: list[str] | None = None
_CUSTOM_TOOL_PASS_THROUGH_HEADERS_RAW = os.environ.get(
    "CUSTOM_TOOL_PASS_THROUGH_HEADERS"
)
if _CUSTOM_TOOL_PASS_THROUGH_HEADERS_RAW:
    try:
        CUSTOM_TOOL_PASS_THROUGH_HEADERS = json.loads(
            _CUSTOM_TOOL_PASS_THROUGH_HEADERS_RAW
        )
    except Exception:
        # need to import here to avoid circular imports
        from onyx.utils.logger import setup_logger

        logger = setup_logger()
        logger.error(
            "Failed to parse CUSTOM_TOOL_PASS_THROUGH_HEADERS, must be a valid JSON object"
        )


================================================
FILE: backend/onyx/connectors/README.md
================================================
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md"} -->

# Writing a new Onyx Connector

This README covers how to contribute a new Connector for Onyx. It includes an overview of the design, interfaces,
and required changes.

Thank you for your contribution!

### Connector Overview

Connectors come in 3 different flows:

- Load Connector:
  - Bulk indexes documents to reflect a point in time. This type of connector generally works by either pulling all
    documents via a connector's API or loads the documents from some sort of a dump file.
- Poll Connector:
  - Incrementally updates documents based on a provided time range. It is used by the background job to pull the latest
    changes and additions since the last round of polling. This connector helps keep the document index up to date
    without needing to fetch/embed/index every document which would be too slow to do frequently on large sets of
    documents.
- Slim Connector:
  - This connector should be a lighter weight method of checking all documents in the source to see if they still exist.
  - This connector should be identical to the Poll or Load Connector except that it only fetches the IDs of the documents, not the documents themselves.
  - This is used by our pruning job which removes old documents from the index.
  - The optional start and end datetimes can be ignored.
- Event Based connectors:
  - Connectors that listen to events and update documents accordingly.
  - Currently not used by the background job, this exists for future design purposes.

### Connector Implementation

Refer to [interfaces.py](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/interfaces.py)
and this first contributor created Pull Request for a new connector (Shoutout to Dan Brown):
[Reference Pull Request](https://github.com/onyx-dot-app/onyx/pull/139)

For implementing a Slim Connector, refer to the comments in this PR:
[Slim Connector PR](https://github.com/onyx-dot-app/onyx/pull/3303/files)

All new connectors should have tests added to the `backend/tests/daily/connectors` directory. Refer to the above PR for an example of adding tests for a new connector.

#### Implementing the new Connector

The connector must subclass one or more of LoadConnector, PollConnector, CheckpointedConnector, or CheckpointedConnectorWithPermSync

The `__init__` should take arguments for configuring what documents the connector will and where it finds those
documents. For example, if you have a wiki site, it may include the configuration for the team, topic, folder, etc. of
the documents to fetch. It may also include the base domain of the wiki. Alternatively, if all the access information
of the connector is stored in the credential/token, then there may be no required arguments.

`load_credentials` should take a dictionary which provides all the access information that the connector might need.
For example this could be the user's username and access token.

Refer to the existing connectors for `load_from_state` and `poll_source` examples. There is not yet a process to listen
for EventConnector events, this will come down the line.

#### Development Tip

It may be handy to test your new connector separate from the rest of the stack while developing.
Follow the below template:

```commandline
if __name__ == "__main__":
    import time
    test_connector = NewConnector(space="engineering")
    test_connector.load_credentials({
        "user_id": "foobar",
        "access_token": "fake_token"
    })
    all_docs = test_connector.load_from_state()

    current = time.time()
    one_day_ago = current - 24 * 60 * 60  # 1 day
    latest_docs = test_connector.poll_source(one_day_ago, current)
```

> Note: Be sure to set PYTHONPATH to onyx/backend before running the above main.

### Additional Required Changes:

#### Backend Changes

- Add a new type to
  [DocumentSource](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/configs/constants.py)
- Add a mapping from DocumentSource (and optionally connector type) to the right connector class
  [here](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/factory.py#L33)

#### Frontend Changes

- Add the new Connector definition to the `SOURCE_METADATA_MAP` [here](https://github.com/onyx-dot-app/onyx/blob/main/web/src/lib/sources.ts#L59).
- Add the definition for the new Form to the `connectorConfigs` object [here](https://github.com/onyx-dot-app/onyx/blob/main/web/src/lib/connectors/connectors.ts#L79).

#### Docs Changes

Create the new connector page (with guiding images!) with how to get the connector credentials and how to set up the
connector in Onyx. Then create a Pull Request in [https://github.com/onyx-dot-app/documentation](https://github.com/onyx-dot-app/documentation).

### Before opening PR

1. Be sure to fully test changes end to end with setting up the connector and updating the index with new docs from the
   new connector. To make it easier to review, please attach a video showing the successful creation of the connector via the UI (starting from the `Add Connector` page).
2. Add a folder + tests under `backend/tests/daily/connectors` director. For an example, checkout the [test for Confluence](https://github.com/onyx-dot-app/onyx/blob/main/backend/tests/daily/connectors/confluence/test_confluence_basic.py). In the PR description, include a guide on how to setup the new source to pass the test. Before merging, we will re-create the environment and make sure the test(s) pass.
3. Be sure to run the linting/formatting, refer to the formatting and linting section in
   [CONTRIBUTING.md](https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md#formatting-and-linting)


================================================
FILE: backend/onyx/connectors/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/airtable/airtable_connector.py
================================================
import contextvars
import re
from concurrent.futures import as_completed
from concurrent.futures import Future
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO
from typing import Any
from typing import cast

import requests
from pyairtable import Api as AirtableApi
from pyairtable.api.types import RecordDict
from pyairtable.models.schema import TableSchema
from retry import retry

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.utils.logger import setup_logger

logger = setup_logger()

# NOTE: all are made lowercase to avoid case sensitivity issues
# These field types are considered metadata by default when
# treat_all_non_attachment_fields_as_metadata is False
DEFAULT_METADATA_FIELD_TYPES = {
    "singlecollaborator",
    "collaborator",
    "createdby",
    "singleselect",
    "multipleselects",
    "checkbox",
    "date",
    "datetime",
    "email",
    "phone",
    "url",
    "number",
    "currency",
    "duration",
    "percent",
    "rating",
    "createdtime",
    "lastmodifiedtime",
    "autonumber",
    "rollup",
    "lookup",
    "count",
    "formula",
    "date",
}


class AirtableClientNotSetUpError(PermissionError):
    def __init__(self) -> None:
        super().__init__("Airtable Client is not set up, was load_credentials called?")


# Matches URLs like https://airtable.com/appXXX/tblYYY/viwZZZ?blocks=hide
# Captures: base_id (appXXX), table_id (tblYYY), and optionally view_id (viwZZZ)
_AIRTABLE_URL_PATTERN = re.compile(
    r"https?://airtable\.com/(app[A-Za-z0-9]+)/(tbl[A-Za-z0-9]+)(?:/(viw[A-Za-z0-9]+))?",
)


def parse_airtable_url(
    url: str,
) -> tuple[str, str, str | None]:
    """Parse an Airtable URL into (base_id, table_id, view_id).

    Accepts URLs like:
      https://airtable.com/appXXX/tblYYY
      https://airtable.com/appXXX/tblYYY/viwZZZ
      https://airtable.com/appXXX/tblYYY/viwZZZ?blocks=hide

    Returns:
        (base_id, table_id, view_id or None)

    Raises:
        ValueError if the URL doesn't match the expected format.
    """
    match = _AIRTABLE_URL_PATTERN.search(url.strip())
    if not match:
        raise ValueError(
            f"Could not parse Airtable URL: '{url}'. Expected format: https://airtable.com/appXXX/tblYYY[/viwZZZ]"
        )
    return match.group(1), match.group(2), match.group(3)


class AirtableConnector(LoadConnector):
    def __init__(
        self,
        base_id: str = "",
        table_name_or_id: str = "",
        airtable_url: str = "",
        treat_all_non_attachment_fields_as_metadata: bool = False,
        view_id: str | None = None,
        share_id: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        """Initialize an AirtableConnector.

        Args:
            base_id: The ID of the Airtable base (not required when airtable_url is set)
            table_name_or_id: The name or ID of the table (not required when airtable_url is set)
            airtable_url: An Airtable URL to parse base_id, table_id, and view_id from.
                Overrides base_id, table_name_or_id, and view_id if provided.
            treat_all_non_attachment_fields_as_metadata: If True, all fields except attachments will be treated as metadata.
                If False, only fields with types in DEFAULT_METADATA_FIELD_TYPES will be treated as metadata.
            view_id: Optional ID of a specific view to use
            share_id: Optional ID of a "share" to use for generating record URLs
            batch_size: Number of records to process in each batch

        Mode is auto-detected: if a specific table is identified (via URL or
        base_id + table_name_or_id), the connector indexes that single table.
        Otherwise, it discovers and indexes all accessible bases and tables.
        """
        # If a URL is provided, parse it to extract base_id, table_id, and view_id
        if airtable_url:
            parsed_base_id, parsed_table_id, parsed_view_id = parse_airtable_url(
                airtable_url
            )
            base_id = parsed_base_id
            table_name_or_id = parsed_table_id
            if parsed_view_id:
                view_id = parsed_view_id

        self.base_id = base_id
        self.table_name_or_id = table_name_or_id
        self.index_all = not (base_id and table_name_or_id)
        self.view_id = view_id
        self.share_id = share_id
        self.batch_size = batch_size
        self._airtable_client: AirtableApi | None = None
        self.treat_all_non_attachment_fields_as_metadata = (
            treat_all_non_attachment_fields_as_metadata
        )

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self._airtable_client = AirtableApi(credentials["airtable_access_token"])
        return None

    @property
    def airtable_client(self) -> AirtableApi:
        if not self._airtable_client:
            raise AirtableClientNotSetUpError()
        return self._airtable_client

    def validate_connector_settings(self) -> None:
        if self.index_all:
            try:
                bases = self.airtable_client.bases()
                if not bases:
                    raise ConnectorValidationError(
                        "No bases found. Ensure your API token has access to at least one base."
                    )
            except ConnectorValidationError:
                raise
            except Exception as e:
                raise ConnectorValidationError(f"Failed to list Airtable bases: {e}")
        else:
            if not self.base_id or not self.table_name_or_id:
                raise ConnectorValidationError(
                    "A valid Airtable URL or base_id and table_name_or_id are required when not using index_all mode."
                )
            try:
                table = self.airtable_client.table(self.base_id, self.table_name_or_id)
                table.schema()
            except Exception as e:
                raise ConnectorValidationError(
                    f"Failed to access table '{self.table_name_or_id}' in base '{self.base_id}': {e}"
                )

    @classmethod
    def _get_record_url(
        cls,
        base_id: str,
        table_id: str,
        record_id: str,
        share_id: str | None,
        view_id: str | None,
        field_id: str | None = None,
        attachment_id: str | None = None,
    ) -> str:
        """Constructs the URL for a record, optionally including field and attachment IDs

        Full possible structure is:

        https://airtable.com/BASE_ID/SHARE_ID/TABLE_ID/VIEW_ID/RECORD_ID/FIELD_ID/ATTACHMENT_ID
        """
        # If we have a shared link, use that view for better UX
        if share_id:
            base_url = f"https://airtable.com/{base_id}/{share_id}/{table_id}"
        else:
            base_url = f"https://airtable.com/{base_id}/{table_id}"

        if view_id:
            base_url = f"{base_url}/{view_id}"

        base_url = f"{base_url}/{record_id}"

        if field_id and attachment_id:
            return f"{base_url}/{field_id}/{attachment_id}?blocks=hide"

        return base_url

    def _extract_field_values(
        self,
        field_id: str,
        field_name: str,
        field_info: Any,
        field_type: str,
        base_id: str,
        table_id: str,
        view_id: str | None,
        record_id: str,
    ) -> list[tuple[str, str]]:
        """
        Extract value(s) + links from a field regardless of its type.
        Attachments are represented as multiple sections, and therefore
        returned as a list of tuples (value, link).
        """
        if field_info is None:
            return []

        # skip references to other records for now (would need to do another
        # request to get the actual record name/type)
        # TODO: support this
        if field_type == "multipleRecordLinks":
            return []

        # Get the base URL for this record
        default_link = self._get_record_url(
            base_id, table_id, record_id, self.share_id, self.view_id or view_id
        )

        if field_type == "multipleAttachments":
            attachment_texts: list[tuple[str, str]] = []
            for attachment in field_info:
                url = attachment.get("url")
                filename = attachment.get("filename", "")
                if not url:
                    continue

                @retry(
                    tries=5,
                    delay=1,
                    backoff=2,
                    max_delay=10,
                )
                def get_attachment_with_retry(url: str, record_id: str) -> bytes | None:
                    try:
                        attachment_response = requests.get(url)
                        attachment_response.raise_for_status()
                        return attachment_response.content
                    except requests.exceptions.HTTPError as e:
                        if e.response.status_code == 410:
                            logger.info(f"Refreshing attachment for {filename}")
                            # Re-fetch the record to get a fresh URL
                            refreshed_record = self.airtable_client.table(
                                base_id, table_id
                            ).get(record_id)
                            for refreshed_attachment in refreshed_record["fields"][
                                field_name
                            ]:
                                if refreshed_attachment.get("filename") == filename:
                                    new_url = refreshed_attachment.get("url")
                                    if new_url:
                                        attachment_response = requests.get(new_url)
                                        attachment_response.raise_for_status()
                                        return attachment_response.content

                            logger.error(f"Failed to refresh attachment for {filename}")
                        raise

                attachment_content = get_attachment_with_retry(url, record_id)
                if attachment_content:
                    try:
                        file_ext = get_file_ext(filename)
                        attachment_id = attachment["id"]
                        attachment_text = extract_file_text(
                            BytesIO(attachment_content),
                            filename,
                            break_on_unprocessable=False,
                            extension=file_ext,
                        )
                        if attachment_text:
                            # Use the helper method to construct attachment URLs
                            attachment_link = self._get_record_url(
                                base_id,
                                table_id,
                                record_id,
                                self.share_id,
                                self.view_id or view_id,
                                field_id,
                                attachment_id,
                            )
                            attachment_texts.append(
                                (f"{filename}:\n{attachment_text}", attachment_link)
                            )
                    except Exception as e:
                        logger.warning(
                            f"Failed to process attachment {filename}: {str(e)}"
                        )
            return attachment_texts

        if field_type in ["singleCollaborator", "collaborator", "createdBy"]:
            combined = []
            collab_name = field_info.get("name")
            collab_email = field_info.get("email")
            if collab_name:
                combined.append(collab_name)
            if collab_email:
                combined.append(f"({collab_email})")
            return [(" ".join(combined) if combined else str(field_info), default_link)]

        if isinstance(field_info, list):
            return [(str(item), default_link) for item in field_info]

        return [(str(field_info), default_link)]

    def _should_be_metadata(self, field_type: str) -> bool:
        """Determine if a field type should be treated as metadata.

        When treat_all_non_attachment_fields_as_metadata is True, all fields except
        attachments are treated as metadata. Otherwise, only fields with types listed
        in DEFAULT_METADATA_FIELD_TYPES are treated as metadata."""
        if self.treat_all_non_attachment_fields_as_metadata:
            return field_type.lower() != "multipleattachments"
        return field_type.lower() in DEFAULT_METADATA_FIELD_TYPES

    def _process_field(
        self,
        field_id: str,
        field_name: str,
        field_info: Any,
        field_type: str,
        base_id: str,
        table_id: str,
        view_id: str | None,
        record_id: str,
    ) -> tuple[list[TextSection], dict[str, str | list[str]]]:
        """
        Process a single Airtable field and return sections or metadata.

        Args:
            field_name: Name of the field
            field_info: Raw field information from Airtable
            field_type: Airtable field type

        Returns:
            (list of Sections, dict of metadata)
        """
        if field_info is None:
            return [], {}

        # Get the value(s) for the field
        field_value_and_links = self._extract_field_values(
            field_id=field_id,
            field_name=field_name,
            field_info=field_info,
            field_type=field_type,
            base_id=base_id,
            table_id=table_id,
            view_id=view_id,
            record_id=record_id,
        )
        if len(field_value_and_links) == 0:
            return [], {}

        # Determine if it should be metadata or a section
        if self._should_be_metadata(field_type):
            field_values = [value for value, _ in field_value_and_links]
            if len(field_values) > 1:
                return [], {field_name: field_values}
            return [], {field_name: field_values[0]}

        # Otherwise, create relevant sections
        sections = [
            TextSection(
                link=link,
                text=(
                    f"{field_name}:\n------------------------\n{text}\n------------------------"
                ),
            )
            for text, link in field_value_and_links
        ]
        return sections, {}

    def _process_record(
        self,
        record: RecordDict,
        table_schema: TableSchema,
        primary_field_name: str | None,
        base_id: str,
        base_name: str | None = None,
    ) -> Document | None:
        """Process a single Airtable record into a Document.

        Args:
            record: The Airtable record to process
            table_schema: Schema information for the table
            primary_field_name: Name of the primary field, if any
            base_id: The ID of the base this record belongs to
            base_name: The name of the base (used in semantic ID for index_all mode)

        Returns:
            Document object representing the record
        """
        table_id = table_schema.id
        table_name = table_schema.name
        record_id = record["id"]
        fields = record["fields"]
        sections: list[TextSection] = []
        metadata: dict[str, str | list[str]] = {}

        # Get primary field value if it exists
        primary_field_value = (
            fields.get(primary_field_name) if primary_field_name else None
        )
        view_id = table_schema.views[0].id if table_schema.views else None

        for field_schema in table_schema.fields:
            field_name = field_schema.name
            field_val = fields.get(field_name)
            field_type = field_schema.type

            logger.debug(
                f"Processing field '{field_name}' of type '{field_type}' for record '{record_id}'."
            )

            field_sections, field_metadata = self._process_field(
                field_id=field_schema.id,
                field_name=field_name,
                field_info=field_val,
                field_type=field_type,
                base_id=base_id,
                table_id=table_id,
                view_id=view_id,
                record_id=record_id,
            )

            sections.extend(field_sections)
            metadata.update(field_metadata)

        if not sections:
            logger.warning(f"No sections found for record {record_id}")
            return None

        # Include base name in semantic ID only in index_all mode
        if self.index_all and base_name:
            semantic_id = (
                f"{base_name} > {table_name}: {primary_field_value}"
                if primary_field_value
                else f"{base_name} > {table_name}"
            )
        else:
            semantic_id = (
                f"{table_name}: {primary_field_value}"
                if primary_field_value
                else table_name
            )

        # Build hierarchy source_path for Craft file system subdirectory structure.
        # This creates: airtable/{base_name}/{table_name}/record.json
        source_path: list[str] = []
        if base_name:
            source_path.append(base_name)
        source_path.append(table_name)

        return Document(
            id=f"airtable__{record_id}",
            sections=(cast(list[TextSection | ImageSection], sections)),
            source=DocumentSource.AIRTABLE,
            semantic_identifier=semantic_id,
            metadata=metadata,
            doc_metadata={
                "hierarchy": {
                    "source_path": source_path,
                    "base_id": base_id,
                    "table_id": table_id,
                    "table_name": table_name,
                    **({"base_name": base_name} if base_name else {}),
                }
            },
        )

    def _resolve_base_name(self, base_id: str) -> str | None:
        """Try to resolve a human-readable base name from the API."""
        try:
            for base_info in self.airtable_client.bases():
                if base_info.id == base_id:
                    return base_info.name
        except Exception:
            logger.debug(f"Could not resolve base name for {base_id}")
        return None

    def _index_table(
        self,
        base_id: str,
        table_name_or_id: str,
        base_name: str | None = None,
    ) -> GenerateDocumentsOutput:
        """Index all records from a single table. Yields batches of Documents."""
        # Resolve base name for hierarchy if not provided
        if base_name is None:
            base_name = self._resolve_base_name(base_id)

        table = self.airtable_client.table(base_id, table_name_or_id)
        records = table.all()

        table_schema = table.schema()
        primary_field_name = None

        # Find a primary field from the schema
        for field in table_schema.fields:
            if field.id == table_schema.primary_field_id:
                primary_field_name = field.name
                break

        logger.info(
            f"Processing {len(records)} records from table '{table_schema.name}' in base '{base_name or base_id}'."
        )

        if not records:
            return

        # Process records in parallel batches using ThreadPoolExecutor
        PARALLEL_BATCH_SIZE = 8
        max_workers = min(PARALLEL_BATCH_SIZE, len(records))

        for i in range(0, len(records), PARALLEL_BATCH_SIZE):
            batch_records = records[i : i + PARALLEL_BATCH_SIZE]
            record_documents: list[Document | HierarchyNode] = []

            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                # Submit batch tasks
                future_to_record: dict[Future[Document | None], RecordDict] = {}
                for record in batch_records:
                    # Capture the current context so that the thread gets the current tenant ID
                    current_context = contextvars.copy_context()
                    future_to_record[
                        executor.submit(
                            current_context.run,
                            self._process_record,
                            record=record,
                            table_schema=table_schema,
                            primary_field_name=primary_field_name,
                            base_id=base_id,
                            base_name=base_name,
                        )
                    ] = record

                # Wait for all tasks in this batch to complete
                for future in as_completed(future_to_record):
                    record = future_to_record[future]
                    try:
                        document = future.result()
                        if document:
                            record_documents.append(document)
                    except Exception as e:
                        logger.exception(f"Failed to process record {record['id']}")
                        raise e

            if record_documents:
                yield record_documents

    def load_from_state(self) -> GenerateDocumentsOutput:
        """
        Fetch all records from one or all tables.

        NOTE: Airtable does not support filtering by time updated, so
        we have to fetch all records every time.
        """
        if not self.airtable_client:
            raise AirtableClientNotSetUpError()

        if self.index_all:
            yield from self._load_all()
        else:
            yield from self._index_table(
                base_id=self.base_id,
                table_name_or_id=self.table_name_or_id,
            )

    def _load_all(self) -> GenerateDocumentsOutput:
        """Discover all bases and tables, then index everything."""
        bases = self.airtable_client.bases()
        logger.info(f"Discovered {len(bases)} Airtable base(s).")

        for base_info in bases:
            base_id = base_info.id
            base_name = base_info.name
            logger.info(f"Listing tables for base '{base_name}' ({base_id}).")

            try:
                base = self.airtable_client.base(base_id)
                tables = base.tables()
            except Exception:
                logger.exception(
                    f"Failed to list tables for base '{base_name}' ({base_id}), skipping."
                )
                continue

            logger.info(f"Found {len(tables)} table(s) in base '{base_name}'.")

            for table in tables:
                try:
                    yield from self._index_table(
                        base_id=base_id,
                        table_name_or_id=table.id,
                        base_name=base_name,
                    )
                except Exception:
                    logger.exception(
                        f"Failed to index table '{table.name}' ({table.id}) in base '{base_name}' ({base_id}), skipping."
                    )
                    continue


================================================
FILE: backend/onyx/connectors/asana/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/asana/asana_api.py
================================================
import time
from collections.abc import Iterator
from datetime import datetime
from typing import Dict

import asana  # type: ignore

from onyx.utils.logger import setup_logger

logger = setup_logger()


# https://github.com/Asana/python-asana/tree/master?tab=readme-ov-file#documentation-for-api-endpoints
class AsanaTask:
    def __init__(
        self,
        id: str,
        title: str,
        text: str,
        link: str,
        last_modified: datetime,
        project_gid: str,
        project_name: str,
    ) -> None:
        self.id = id
        self.title = title
        self.text = text
        self.link = link
        self.last_modified = last_modified
        self.project_gid = project_gid
        self.project_name = project_name

    def __str__(self) -> str:
        return f"ID: {self.id}\nTitle: {self.title}\nLast modified: {self.last_modified}\nText: {self.text}"


class AsanaAPI:
    def __init__(
        self, api_token: str, workspace_gid: str, team_gid: str | None
    ) -> None:
        self._user = None
        self.workspace_gid = workspace_gid
        self.team_gid = team_gid

        self.configuration = asana.Configuration()
        self.api_client = asana.ApiClient(self.configuration)
        self.tasks_api = asana.TasksApi(self.api_client)
        self.stories_api = asana.StoriesApi(self.api_client)
        self.users_api = asana.UsersApi(self.api_client)
        self.project_api = asana.ProjectsApi(self.api_client)
        self.workspaces_api = asana.WorkspacesApi(self.api_client)

        self.api_error_count = 0
        self.configuration.access_token = api_token
        self.task_count = 0

    def get_tasks(
        self, project_gids: list[str] | None, start_date: str
    ) -> Iterator[AsanaTask]:
        """Get all tasks from the projects with the given gids that were modified since the given date.
        If project_gids is None, get all tasks from all projects in the workspace."""
        logger.info("Starting to fetch Asana projects")
        projects = self.project_api.get_projects(
            opts={
                "workspace": self.workspace_gid,
                "opt_fields": "gid,name,archived,modified_at",
            }
        )
        start_seconds = int(time.mktime(datetime.now().timetuple()))
        projects_list = []
        project_count = 0
        for project_info in projects:
            project_gid = project_info["gid"]
            if project_gids is None or project_gid in project_gids:
                projects_list.append(project_gid)
            else:
                logger.debug(
                    f"Skipping project: {project_gid} - not in accepted project_gids"
                )
            project_count += 1
            if project_count % 100 == 0:
                logger.info(f"Processed {project_count} projects")

        logger.info(f"Found {len(projects_list)} projects to process")
        for project_gid in projects_list:
            for task in self._get_tasks_for_project(
                project_gid, start_date, start_seconds
            ):
                yield task
        logger.info(f"Completed fetching {self.task_count} tasks from Asana")
        if self.api_error_count > 0:
            logger.warning(
                f"Encountered {self.api_error_count} API errors during task fetching"
            )

    def _get_tasks_for_project(
        self, project_gid: str, start_date: str, start_seconds: int
    ) -> Iterator[AsanaTask]:
        project = self.project_api.get_project(project_gid, opts={})
        project_name = project.get("name", project_gid)
        team = project.get("team") or {}
        team_gid = team.get("gid")

        if project.get("archived"):
            logger.info(f"Skipping archived project: {project_name} ({project_gid})")
            return
        if not team_gid:
            logger.info(
                f"Skipping project without a team: {project_name} ({project_gid})"
            )
            return
        if project.get("privacy_setting") == "private":
            if self.team_gid and team_gid != self.team_gid:
                logger.info(
                    f"Skipping private project not in configured team: {project_name} ({project_gid})"
                )
                return
            logger.info(
                f"Processing private project in configured team: {project_name} ({project_gid})"
            )

        simple_start_date = start_date.split(".")[0].split("+")[0]
        logger.info(
            f"Fetching tasks modified since {simple_start_date} for project: {project_name} ({project_gid})"
        )

        opts = {
            "opt_fields": "name,memberships,memberships.project,completed_at,completed_by,created_at,"
            "created_by,custom_fields,dependencies,due_at,due_on,external,html_notes,liked,likes,"
            "modified_at,notes,num_hearts,parent,projects,resource_subtype,resource_type,start_on,"
            "workspace,permalink_url",
            "modified_since": start_date,
        }
        tasks_from_api = self.tasks_api.get_tasks_for_project(project_gid, opts)
        for data in tasks_from_api:
            self.task_count += 1
            if self.task_count % 10 == 0:
                end_seconds = time.mktime(datetime.now().timetuple())
                runtime_seconds = end_seconds - start_seconds
                if runtime_seconds > 0:
                    logger.info(
                        f"Processed {self.task_count} tasks in {runtime_seconds:.0f} seconds "
                        f"({self.task_count / runtime_seconds:.2f} tasks/second)"
                    )

            logger.debug(f"Processing Asana task: {data['name']}")

            text = self._construct_task_text(data)

            try:
                text += self._fetch_and_add_comments(data["gid"])

                last_modified_date = self.format_date(data["modified_at"])
                text += f"Last modified: {last_modified_date}\n"

                task = AsanaTask(
                    id=data["gid"],
                    title=data["name"],
                    text=text,
                    link=data["permalink_url"],
                    last_modified=datetime.fromisoformat(data["modified_at"]),
                    project_gid=project_gid,
                    project_name=project_name,
                )
                yield task
            except Exception:
                logger.error(
                    f"Error processing task {data['gid']} in project {project_gid}",
                    exc_info=True,
                )
                self.api_error_count += 1

    def _construct_task_text(self, data: Dict) -> str:
        text = f"{data['name']}\n\n"

        if data["notes"]:
            text += f"{data['notes']}\n\n"

        if data["created_by"] and data["created_by"]["gid"]:
            creator = self.get_user(data["created_by"]["gid"])["name"]
            created_date = self.format_date(data["created_at"])
            text += f"Created by: {creator} on {created_date}\n"

        if data["due_on"]:
            due_date = self.format_date(data["due_on"])
            text += f"Due date: {due_date}\n"

        if data["completed_at"]:
            completed_date = self.format_date(data["completed_at"])
            text += f"Completed on: {completed_date}\n"

        text += "\n"
        return text

    def _fetch_and_add_comments(self, task_gid: str) -> str:
        text = ""
        stories_opts: Dict[str, str] = {}
        story_start = time.time()
        stories = self.stories_api.get_stories_for_task(task_gid, stories_opts)

        story_count = 0
        comment_count = 0

        for story in stories:
            story_count += 1
            if story["resource_subtype"] == "comment_added":
                comment = self.stories_api.get_story(
                    story["gid"], opts={"opt_fields": "text,created_by,created_at"}
                )
                commenter = self.get_user(comment["created_by"]["gid"])["name"]
                text += f"Comment by {commenter}: {comment['text']}\n\n"
                comment_count += 1

        story_duration = time.time() - story_start
        logger.debug(
            f"Processed {story_count} stories (including {comment_count} comments) in {story_duration:.2f} seconds"
        )

        return text

    def get_user(self, user_gid: str) -> Dict:
        if self._user is not None:
            return self._user
        self._user = self.users_api.get_user(user_gid, {"opt_fields": "name,email"})

        if not self._user:
            logger.warning(f"Unable to fetch user information for user_gid: {user_gid}")
            return {"name": "Unknown"}
        return self._user

    def format_date(self, date_str: str) -> str:
        date = datetime.fromisoformat(date_str)
        return time.strftime("%Y-%m-%d", date.timetuple())

    def get_time(self) -> str:
        return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())


================================================
FILE: backend/onyx/connectors/asana/connector.py
================================================
import datetime
from typing import Any

from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.asana import asana_api
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger

logger = setup_logger()


class AsanaConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        asana_workspace_id: str,
        asana_project_ids: str | None = None,
        asana_team_id: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
        continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,
    ) -> None:
        self.workspace_id = asana_workspace_id.strip()
        if asana_project_ids:
            project_ids = [
                project_id.strip()
                for project_id in asana_project_ids.split(",")
                if project_id.strip()
            ]
            self.project_ids_to_index = project_ids or None
        else:
            self.project_ids_to_index = None
        self.asana_team_id = (asana_team_id.strip() or None) if asana_team_id else None
        self.batch_size = batch_size
        self.continue_on_failure = continue_on_failure
        logger.info(
            f"AsanaConnector initialized with workspace_id: {asana_workspace_id}"
        )

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.api_token = credentials["asana_api_token_secret"]
        self.asana_client = asana_api.AsanaAPI(
            api_token=self.api_token,
            workspace_gid=self.workspace_id,
            team_gid=self.asana_team_id,
        )
        logger.info("Asana credentials loaded and API client initialized")
        return None

    def poll_source(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch | None,  # noqa: ARG002
    ) -> GenerateDocumentsOutput:
        start_time = datetime.datetime.fromtimestamp(start).isoformat()
        logger.info(f"Starting Asana poll from {start_time}")
        asana = asana_api.AsanaAPI(
            api_token=self.api_token,
            workspace_gid=self.workspace_id,
            team_gid=self.asana_team_id,
        )
        docs_batch: list[Document | HierarchyNode] = []
        tasks = asana.get_tasks(self.project_ids_to_index, start_time)

        for task in tasks:
            doc = self._message_to_doc(task)
            docs_batch.append(doc)

            if len(docs_batch) >= self.batch_size:
                logger.info(f"Yielding batch of {len(docs_batch)} documents")
                yield docs_batch
                docs_batch = []

        if docs_batch:
            logger.info(f"Yielding final batch of {len(docs_batch)} documents")
            yield docs_batch

        logger.info("Asana poll completed")

    def load_from_state(self) -> GenerateDocumentsOutput:
        logger.notice("Starting full index of all Asana tasks")
        return self.poll_source(start=0, end=None)

    def _message_to_doc(self, task: asana_api.AsanaTask) -> Document:
        logger.debug(f"Converting Asana task {task.id} to Document")
        return Document(
            id=task.id,
            sections=[TextSection(link=task.link, text=task.text)],
            doc_updated_at=task.last_modified,
            source=DocumentSource.ASANA,
            semantic_identifier=task.title,
            metadata={
                "group": task.project_gid,
                "project": task.project_name,
            },
        )


if __name__ == "__main__":
    import time
    import os

    logger.notice("Starting Asana connector test")
    connector = AsanaConnector(
        os.environ["WORKSPACE_ID"],
        os.environ["PROJECT_IDS"],
        os.environ["TEAM_ID"],
    )
    connector.load_credentials(
        {
            "asana_api_token_secret": os.environ["API_TOKEN"],
        }
    )
    logger.info("Loading all documents from Asana")
    all_docs = connector.load_from_state()
    current = time.time()
    one_day_ago = current - 24 * 60 * 60  # 1 day
    logger.info("Polling for documents updated in the last 24 hours")
    latest_docs = connector.poll_source(one_day_ago, current)
    for docs in latest_docs:
        for doc in docs:
            if isinstance(doc, HierarchyNode):
                print("hierarchynode:", doc.display_name)
            else:
                print(doc.id)
    logger.notice("Asana connector test completed")


================================================
FILE: backend/onyx/connectors/axero/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/axero/connector.py
================================================
import time
from datetime import datetime
from datetime import timezone
from typing import Any

import requests
from pydantic import BaseModel

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    process_in_batches,
)
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder


logger = setup_logger()


ENTITY_NAME_MAP = {1: "Forum", 3: "Article", 4: "Blog", 9: "Wiki"}


def _get_auth_header(api_key: str) -> dict[str, str]:
    return {"Rest-Api-Key": api_key}


@retry_builder()
@rate_limit_builder(max_calls=5, period=1)
def _rate_limited_request(
    endpoint: str, headers: dict, params: dict | None = None
) -> Any:
    # https://my.axerosolutions.com/spaces/5/communifire-documentation/wiki/view/370/rest-api
    return requests.get(endpoint, headers=headers, params=params)


# https://my.axerosolutions.com/spaces/5/communifire-documentation/wiki/view/595/rest-api-get-content-list
def _get_entities(
    entity_type: int,
    api_key: str,
    axero_base_url: str,
    start: datetime,
    end: datetime,
    space_id: str | None = None,
) -> list[dict]:
    endpoint = axero_base_url + "api/content/list"
    page_num = 1
    pages_fetched = 0
    pages_to_return = []
    break_out = False
    while True:
        params = {
            "EntityType": str(entity_type),
            "SortColumn": "DateUpdated",
            "SortOrder": "1",  # descending
            "StartPage": str(page_num),
        }

        if space_id is not None:
            params["SpaceID"] = space_id

        res = _rate_limited_request(
            endpoint, headers=_get_auth_header(api_key), params=params
        )
        res.raise_for_status()

        # Axero limitations:
        # No next page token, can paginate but things may have changed
        # for example, a doc that hasn't been read in by Onyx is updated and is now front of the list
        # due to this limitation and the fact that Axero has no rate limiting but API calls can cause
        # increased latency for the team, we have to just fetch all the pages quickly to reduce the
        # chance of missing a document due to an update (it will still get updated next pass)
        # Assumes the volume of data isn't too big to store in memory (probably fine)
        data = res.json()
        total_records = data["TotalRecords"]
        contents = data["ResponseData"]
        pages_fetched += len(contents)
        logger.debug(f"Fetched {pages_fetched} {ENTITY_NAME_MAP[entity_type]}")

        for page in contents:
            update_time = time_str_to_utc(page["DateUpdated"])

            if update_time > end:
                continue

            if update_time < start:
                break_out = True
                break

            pages_to_return.append(page)

        if pages_fetched >= total_records:
            break

        page_num += 1

        if break_out:
            break

    return pages_to_return


def _get_obj_by_id(obj_id: int, api_key: str, axero_base_url: str) -> dict:
    endpoint = axero_base_url + f"api/content/{obj_id}"
    res = _rate_limited_request(endpoint, headers=_get_auth_header(api_key))
    res.raise_for_status()

    return res.json()


class AxeroForum(BaseModel):
    doc_id: str
    title: str
    link: str
    initial_content: str
    responses: list[str]
    last_update: datetime


def _map_post_to_parent(
    posts: dict,
    api_key: str,
    axero_base_url: str,
) -> list[AxeroForum]:
    """Cannot handle in batches since the posts aren't ordered or structured in any way
    may need to map any number of them to the initial post"""
    epoch_str = "1970-01-01T00:00:00.000"
    post_map: dict[int, AxeroForum] = {}

    for ind, post in enumerate(posts):
        if (ind + 1) % 25 == 0:
            logger.debug(f"Processed {ind + 1} posts or responses")

        post_time = time_str_to_utc(
            post.get("DateUpdated") or post.get("DateCreated") or epoch_str
        )
        p_id = post.get("ParentContentID")
        if p_id in post_map:
            axero_forum = post_map[p_id]
            axero_forum.responses.insert(0, post.get("ContentSummary"))
            axero_forum.last_update = max(axero_forum.last_update, post_time)
        else:
            initial_post_d = _get_obj_by_id(p_id, api_key, axero_base_url)[
                "ResponseData"
            ]
            initial_post_time = time_str_to_utc(
                initial_post_d.get("DateUpdated")
                or initial_post_d.get("DateCreated")
                or epoch_str
            )
            post_map[p_id] = AxeroForum(
                doc_id="AXERO_" + str(initial_post_d.get("ContentID")),
                title=initial_post_d.get("ContentTitle"),
                link=initial_post_d.get("ContentURL"),
                initial_content=initial_post_d.get("ContentSummary"),
                responses=[post.get("ContentSummary")],
                last_update=max(post_time, initial_post_time),
            )

    return list(post_map.values())


def _get_forums(
    api_key: str,
    axero_base_url: str,
    space_id: str | None = None,
) -> list[dict]:
    endpoint = axero_base_url + "api/content/list"
    page_num = 1
    pages_fetched = 0
    pages_to_return = []
    break_out = False

    while True:
        params = {
            "EntityType": "54",
            "SortColumn": "DateUpdated",
            "SortOrder": "1",  # descending
            "StartPage": str(page_num),
        }

        if space_id is not None:
            params["SpaceID"] = space_id

        res = _rate_limited_request(
            endpoint, headers=_get_auth_header(api_key), params=params
        )
        res.raise_for_status()

        data = res.json()
        total_records = data["TotalRecords"]
        contents = data["ResponseData"]
        pages_fetched += len(contents)
        logger.debug(f"Fetched {pages_fetched} forums")

        for page in contents:
            pages_to_return.append(page)

        if pages_fetched >= total_records:
            break

        page_num += 1

        if break_out:
            break

    return pages_to_return


def _translate_forum_to_doc(af: AxeroForum) -> Document:
    doc = Document(
        id=af.doc_id,
        sections=[TextSection(link=af.link, text=reply) for reply in af.responses],
        source=DocumentSource.AXERO,
        semantic_identifier=af.title,
        doc_updated_at=af.last_update,
        metadata={},
    )

    return doc


def _translate_content_to_doc(content: dict) -> Document:
    page_text = ""
    summary = content.get("ContentSummary")
    body = content.get("ContentBody")
    if summary:
        page_text += f"{summary}\n"

    if body:
        content_parsed = parse_html_page_basic(body)
        page_text += content_parsed

    doc = Document(
        id="AXERO_" + str(content["ContentID"]),
        sections=[TextSection(link=content["ContentURL"], text=page_text)],
        source=DocumentSource.AXERO,
        semantic_identifier=content["ContentTitle"],
        doc_updated_at=time_str_to_utc(content["DateUpdated"]),
        metadata={"space": content["SpaceName"]},
    )

    return doc


class AxeroConnector(PollConnector):
    def __init__(
        self,
        # Strings of the integer ids of the spaces
        spaces: list[str] | None = None,
        include_article: bool = True,
        include_blog: bool = True,
        include_wiki: bool = True,
        include_forum: bool = True,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.include_article = include_article
        self.include_blog = include_blog
        self.include_wiki = include_wiki
        self.include_forum = include_forum
        self.batch_size = batch_size
        self.space_ids = spaces
        self.axero_key = None
        self.base_url = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.axero_key = credentials["axero_api_token"]
        # As the API key specifically applies to a particular deployment, this is
        # included as part of the credential
        base_url = credentials["base_url"]
        if not base_url.endswith("/"):
            base_url += "/"
        self.base_url = base_url
        return None

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        if not self.axero_key or not self.base_url:
            raise ConnectorMissingCredentialError("Axero")

        start_datetime = datetime.utcfromtimestamp(start).replace(tzinfo=timezone.utc)
        end_datetime = datetime.utcfromtimestamp(end).replace(tzinfo=timezone.utc)

        entity_types = []
        if self.include_article:
            entity_types.append(3)
        if self.include_blog:
            entity_types.append(4)
        if self.include_wiki:
            entity_types.append(9)

        iterable_space_ids = self.space_ids if self.space_ids else [None]

        for space_id in iterable_space_ids:
            for entity in entity_types:
                axero_obj = _get_entities(
                    entity_type=entity,
                    api_key=self.axero_key,
                    axero_base_url=self.base_url,
                    start=start_datetime,
                    end=end_datetime,
                    space_id=space_id,
                )
                yield from process_in_batches(
                    objects=axero_obj,
                    process_function=_translate_content_to_doc,
                    batch_size=self.batch_size,
                )

            if self.include_forum:
                forums_posts = _get_forums(
                    api_key=self.axero_key,
                    axero_base_url=self.base_url,
                    space_id=space_id,
                )

                all_axero_forums = _map_post_to_parent(
                    posts=forums_posts,
                    api_key=self.axero_key,
                    axero_base_url=self.base_url,
                )

                filtered_forums = [
                    f
                    for f in all_axero_forums
                    if f.last_update >= start_datetime and f.last_update <= end_datetime
                ]

                yield from process_in_batches(
                    objects=filtered_forums,
                    process_function=_translate_forum_to_doc,
                    batch_size=self.batch_size,
                )


if __name__ == "__main__":
    import os

    connector = AxeroConnector()
    connector.load_credentials(
        {
            "axero_api_token": os.environ["AXERO_API_TOKEN"],
            "base_url": os.environ["AXERO_BASE_URL"],
        }
    )
    current = time.time()

    one_year_ago = current - 24 * 60 * 60 * 360
    latest_docs = connector.poll_source(one_year_ago, current)

    print(next(latest_docs))


================================================
FILE: backend/onyx/connectors/bitbucket/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/bitbucket/connector.py
================================================
from __future__ import annotations

import copy
from collections.abc import Callable
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import TYPE_CHECKING

from typing_extensions import override

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS
from onyx.configs.constants import DocumentSource
from onyx.connectors.bitbucket.utils import build_auth_client
from onyx.connectors.bitbucket.utils import list_repositories
from onyx.connectors.bitbucket.utils import map_pr_to_document
from onyx.connectors.bitbucket.utils import paginate
from onyx.connectors.bitbucket.utils import PR_LIST_RESPONSE_FIELDS
from onyx.connectors.bitbucket.utils import SLIM_PR_LIST_RESPONSE_FIELDS
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

if TYPE_CHECKING:
    import httpx

logger = setup_logger()


class BitbucketConnectorCheckpoint(ConnectorCheckpoint):
    """Checkpoint state for resumable Bitbucket PR indexing.

    Fields:
        repos_queue: Materialized list of repository slugs to process.
        current_repo_index: Index of the repository currently being processed.
        next_url: Bitbucket "next" URL for continuing pagination within the current repo.
    """

    repos_queue: list[str] = []
    current_repo_index: int = 0
    next_url: str | None = None


class BitbucketConnector(
    CheckpointedConnector[BitbucketConnectorCheckpoint],
    SlimConnectorWithPermSync,
):
    """Connector for indexing Bitbucket Cloud pull requests.

    Args:
        workspace: Bitbucket workspace ID.
        repositories: Comma-separated list of repository slugs to index.
        projects: Comma-separated list of project keys to index all repositories within.
        batch_size: Max number of documents to yield per batch.
    """

    def __init__(
        self,
        workspace: str,
        repositories: str | None = None,
        projects: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.workspace = workspace
        self._repositories = (
            [s.strip() for s in repositories.split(",") if s.strip()]
            if repositories
            else None
        )
        self._projects: list[str] | None = (
            [s.strip() for s in projects.split(",") if s.strip()] if projects else None
        )
        self.batch_size = batch_size
        self.email: str | None = None
        self.api_token: str | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        """Load API token-based credentials.

        Expects a dict with keys: `bitbucket_email`, `bitbucket_api_token`.
        """
        self.email = credentials.get("bitbucket_email")
        self.api_token = credentials.get("bitbucket_api_token")
        if not self.email or not self.api_token:
            raise ConnectorMissingCredentialError("Bitbucket")
        return None

    def _client(self) -> httpx.Client:
        """Build an authenticated HTTP client or raise if credentials missing."""
        if not self.email or not self.api_token:
            raise ConnectorMissingCredentialError("Bitbucket")
        return build_auth_client(self.email, self.api_token)

    def _iter_pull_requests_for_repo(
        self,
        client: httpx.Client,
        repo_slug: str,
        params: dict[str, Any] | None = None,
        start_url: str | None = None,
        on_page: Callable[[str | None], None] | None = None,
    ) -> Iterator[dict[str, Any]]:
        base = f"https://api.bitbucket.org/2.0/repositories/{self.workspace}/{repo_slug}/pullrequests"
        yield from paginate(
            client,
            base,
            params,
            start_url=start_url,
            on_page=on_page,
        )

    def _build_params(
        self,
        fields: str = PR_LIST_RESPONSE_FIELDS,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> dict[str, Any]:
        """Build Bitbucket fetch params.

        Always include OPEN, MERGED, and DECLINED PRs. If both ``start`` and
        ``end`` are provided, apply a single updated_on time window.
        """

        def _iso(ts: SecondsSinceUnixEpoch) -> str:
            return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat()

        def _tc_epoch(
            lower_epoch: SecondsSinceUnixEpoch | None,
            upper_epoch: SecondsSinceUnixEpoch | None,
        ) -> str | None:
            if lower_epoch is not None and upper_epoch is not None:
                lower_iso = _iso(lower_epoch)
                upper_iso = _iso(upper_epoch)
                return f'(updated_on >= "{lower_iso}" AND updated_on <= "{upper_iso}")'
            return None

        params: dict[str, Any] = {"fields": fields, "pagelen": 50}
        time_clause = _tc_epoch(start, end)
        q = '(state = "OPEN" OR state = "MERGED" OR state = "DECLINED")'
        if time_clause:
            q = f"{q} AND {time_clause}"
        params["q"] = q
        return params

    def _iter_target_repositories(self, client: httpx.Client) -> Iterator[str]:
        """Yield repository slugs based on configuration.

        Priority:
        - repositories list
        - projects list (list repos by project key)
        - workspace (all repos)
        """
        if self._repositories:
            for slug in self._repositories:
                yield slug
            return
        if self._projects:
            for project_key in self._projects:
                for repo in list_repositories(client, self.workspace, project_key):
                    slug_val = repo.get("slug")
                    if isinstance(slug_val, str) and slug_val:
                        yield slug_val
            return
        for repo in list_repositories(client, self.workspace, None):
            slug_val = repo.get("slug")
            if isinstance(slug_val, str) and slug_val:
                yield slug_val

    @override
    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: BitbucketConnectorCheckpoint,
    ) -> CheckpointOutput[BitbucketConnectorCheckpoint]:
        """Resumable PR ingestion across repos and pages within a time window.

        Yields Documents (or ConnectorFailure for per-PR mapping failures) and returns
        an updated checkpoint that records repo position and next page URL.
        """
        new_checkpoint = copy.deepcopy(checkpoint)

        with self._client() as client:
            # Materialize target repositories once
            if not new_checkpoint.repos_queue:
                # Preserve explicit order; otherwise ensure deterministic ordering
                repos_list = list(self._iter_target_repositories(client))
                new_checkpoint.repos_queue = sorted(set(repos_list))
                new_checkpoint.current_repo_index = 0
                new_checkpoint.next_url = None

            repos = new_checkpoint.repos_queue
            if not repos or new_checkpoint.current_repo_index >= len(repos):
                new_checkpoint.has_more = False
                return new_checkpoint

            repo_slug = repos[new_checkpoint.current_repo_index]

            first_page_params = self._build_params(
                fields=PR_LIST_RESPONSE_FIELDS,
                start=start,
                end=end,
            )

            def _on_page(next_url: str | None) -> None:
                new_checkpoint.next_url = next_url

            for pr in self._iter_pull_requests_for_repo(
                client,
                repo_slug,
                params=first_page_params,
                start_url=new_checkpoint.next_url,
                on_page=_on_page,
            ):
                try:
                    document = map_pr_to_document(pr, self.workspace, repo_slug)
                    yield document
                except Exception as e:
                    pr_id = pr.get("id")
                    pr_link = (
                        f"https://bitbucket.org/{self.workspace}/{repo_slug}/pull-requests/{pr_id}"
                        if pr_id is not None
                        else None
                    )
                    yield ConnectorFailure(
                        failed_document=DocumentFailure(
                            document_id=(
                                f"{DocumentSource.BITBUCKET.value}:{self.workspace}:{repo_slug}:pr:{pr_id}"
                                if pr_id is not None
                                else f"{DocumentSource.BITBUCKET.value}:{self.workspace}:{repo_slug}:pr:unknown"
                            ),
                            document_link=pr_link,
                        ),
                        failure_message=f"Failed to process Bitbucket PR: {e}",
                        exception=e,
                    )

            # Advance to next repository (if any) and set has_more accordingly
            new_checkpoint.current_repo_index += 1
            new_checkpoint.next_url = None
            new_checkpoint.has_more = new_checkpoint.current_repo_index < len(repos)

        return new_checkpoint

    @override
    def build_dummy_checkpoint(self) -> BitbucketConnectorCheckpoint:
        """Create an initial checkpoint with work remaining."""
        return BitbucketConnectorCheckpoint(has_more=True)

    @override
    def validate_checkpoint_json(
        self, checkpoint_json: str
    ) -> BitbucketConnectorCheckpoint:
        """Validate and deserialize a checkpoint instance from JSON."""
        return BitbucketConnectorCheckpoint.model_validate_json(checkpoint_json)

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> Iterator[list[SlimDocument | HierarchyNode]]:
        """Return only document IDs for all existing pull requests."""
        batch: list[SlimDocument | HierarchyNode] = []
        params = self._build_params(
            fields=SLIM_PR_LIST_RESPONSE_FIELDS,
            start=start,
            end=end,
        )
        with self._client() as client:
            for slug in self._iter_target_repositories(client):
                for pr in self._iter_pull_requests_for_repo(
                    client, slug, params=params
                ):
                    pr_id = pr["id"]
                    doc_id = f"{DocumentSource.BITBUCKET.value}:{self.workspace}:{slug}:pr:{pr_id}"
                    batch.append(SlimDocument(id=doc_id))
                    if len(batch) >= self.batch_size:
                        yield batch
                        batch = []
                        if callback:
                            if callback.should_stop():
                                # Note: this is not actually used for permission sync yet, just pruning
                                raise RuntimeError(
                                    "bitbucket_pr_sync: Stop signal detected"
                                )
                            callback.progress("bitbucket_pr_sync", len(batch))
        if batch:
            yield batch

    def validate_connector_settings(self) -> None:
        """Validate Bitbucket credentials and workspace access by probing a lightweight endpoint.

        Raises:
            CredentialExpiredError: on HTTP 401
            InsufficientPermissionsError: on HTTP 403
            UnexpectedValidationError: on any other failure
        """
        try:
            with self._client() as client:
                url = f"https://api.bitbucket.org/2.0/repositories/{self.workspace}"
                resp = client.get(
                    url,
                    params={"pagelen": 1, "fields": "pagelen"},
                    timeout=REQUEST_TIMEOUT_SECONDS,
                )
                if resp.status_code == 401:
                    raise CredentialExpiredError(
                        "Invalid or expired Bitbucket credentials (HTTP 401)."
                    )
                if resp.status_code == 403:
                    raise InsufficientPermissionsError(
                        "Insufficient permissions to access Bitbucket workspace (HTTP 403)."
                    )
                if resp.status_code < 200 or resp.status_code >= 300:
                    raise UnexpectedValidationError(
                        f"Unexpected Bitbucket error (status={resp.status_code})."
                    )
        except Exception as e:
            # Network or other unexpected errors
            if isinstance(
                e,
                (
                    CredentialExpiredError,
                    InsufficientPermissionsError,
                    UnexpectedValidationError,
                    ConnectorMissingCredentialError,
                ),
            ):
                raise
            raise UnexpectedValidationError(
                f"Unexpected error while validating Bitbucket settings: {e}"
            )


================================================
FILE: backend/onyx/connectors/bitbucket/utils.py
================================================
from __future__ import annotations

import time
from collections.abc import Callable
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import Any

import httpx

from onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()

# Fields requested from Bitbucket PR list endpoint to ensure rich PR data
PR_LIST_RESPONSE_FIELDS: str = ",".join(
    [
        "next",
        "page",
        "pagelen",
        "values.author",
        "values.close_source_branch",
        "values.closed_by",
        "values.comment_count",
        "values.created_on",
        "values.description",
        "values.destination",
        "values.draft",
        "values.id",
        "values.links",
        "values.merge_commit",
        "values.participants",
        "values.reason",
        "values.rendered",
        "values.reviewers",
        "values.source",
        "values.state",
        "values.summary",
        "values.task_count",
        "values.title",
        "values.type",
        "values.updated_on",
    ]
)

# Minimal fields for slim retrieval (IDs only)
SLIM_PR_LIST_RESPONSE_FIELDS: str = ",".join(
    [
        "next",
        "page",
        "pagelen",
        "values.id",
    ]
)


# Minimal fields for repository list calls
REPO_LIST_RESPONSE_FIELDS: str = ",".join(
    [
        "next",
        "page",
        "pagelen",
        "values.slug",
        "values.full_name",
        "values.project.key",
    ]
)


class BitbucketRetriableError(Exception):
    """Raised for retriable Bitbucket conditions (429, 5xx)."""


class BitbucketNonRetriableError(Exception):
    """Raised for non-retriable Bitbucket client errors (4xx except 429)."""


@retry_builder(
    tries=6,
    delay=1,
    backoff=2,
    max_delay=30,
    exceptions=(BitbucketRetriableError, httpx.RequestError),
)
@rate_limit_builder(max_calls=60, period=60)
def bitbucket_get(
    client: httpx.Client, url: str, params: dict[str, Any] | None = None
) -> httpx.Response:
    """Perform a GET against Bitbucket with retry and rate limiting.

    Retries on 429 and 5xx responses, and on transport errors. Honors
    `Retry-After` header for 429 when present by sleeping before retrying.
    """
    try:
        response = client.get(url, params=params, timeout=REQUEST_TIMEOUT_SECONDS)
    except httpx.RequestError:
        # Allow retry_builder to handle retries of transport errors
        raise

    try:
        response.raise_for_status()
    except httpx.HTTPStatusError as e:
        status = e.response.status_code if e.response is not None else None
        if status == 429:
            retry_after = e.response.headers.get("Retry-After") if e.response else None
            if retry_after is not None:
                try:
                    time.sleep(int(retry_after))
                except (TypeError, ValueError):
                    pass
            raise BitbucketRetriableError("Bitbucket rate limit exceeded (429)") from e
        if status is not None and 500 <= status < 600:
            raise BitbucketRetriableError(f"Bitbucket server error: {status}") from e
        if status is not None and 400 <= status < 500:
            raise BitbucketNonRetriableError(f"Bitbucket client error: {status}") from e
        # Unknown status, propagate
        raise

    return response


def build_auth_client(email: str, api_token: str) -> httpx.Client:
    """Create an authenticated httpx client for Bitbucket Cloud API."""
    return httpx.Client(auth=(email, api_token), http2=True)


def paginate(
    client: httpx.Client,
    url: str,
    params: dict[str, Any] | None = None,
    start_url: str | None = None,
    on_page: Callable[[str | None], None] | None = None,
) -> Iterator[dict[str, Any]]:
    """Iterate over paginated Bitbucket API responses yielding individual values.

    Args:
        client: Authenticated HTTP client.
        url: Base collection URL (first page when start_url is None).
        params: Query params for the first page.
        start_url: If provided, start from this absolute URL (ignores params).
        on_page: Optional callback invoked after each page with the next page URL.
    """
    next_url = start_url or url
    # If resuming from a next URL, do not pass params again
    query = params.copy() if params else None
    query = None if start_url else query
    while next_url:
        resp = bitbucket_get(client, next_url, params=query)
        data = resp.json()
        values = data.get("values", [])
        for item in values:
            yield item
        next_url = data.get("next")
        if on_page is not None:
            on_page(next_url)
        # only include params on first call, next_url will contain all necessary params
        query = None


def list_repositories(
    client: httpx.Client, workspace: str, project_key: str | None = None
) -> Iterator[dict[str, Any]]:
    """List repositories in a workspace, optionally filtered by project key."""
    base_url = f"https://api.bitbucket.org/2.0/repositories/{workspace}"
    params: dict[str, Any] = {
        "fields": REPO_LIST_RESPONSE_FIELDS,
        "pagelen": 100,
        # Ensure deterministic ordering
        "sort": "full_name",
    }
    if project_key:
        params["q"] = f'project.key="{project_key}"'
    yield from paginate(client, base_url, params)


def map_pr_to_document(pr: dict[str, Any], workspace: str, repo_slug: str) -> Document:
    """Map a Bitbucket pull request JSON to Onyx Document."""
    pr_id = pr["id"]
    title = pr.get("title") or f"PR {pr_id}"
    description = pr.get("description") or ""
    state = pr.get("state")
    draft = pr.get("draft", False)
    author = pr.get("author", {})
    reviewers = pr.get("reviewers", [])
    participants = pr.get("participants", [])

    link = pr.get("links", {}).get("html", {}).get("href") or (
        f"https://bitbucket.org/{workspace}/{repo_slug}/pull-requests/{pr_id}"
    )

    created_on = pr.get("created_on")
    updated_on = pr.get("updated_on")
    updated_dt = (
        datetime.fromisoformat(updated_on.replace("Z", "+00:00")).astimezone(
            timezone.utc
        )
        if isinstance(updated_on, str)
        else None
    )

    source_branch = pr.get("source", {}).get("branch", {}).get("name", "")
    destination_branch = pr.get("destination", {}).get("branch", {}).get("name", "")

    approved_by = [
        _get_user_name(p.get("user", {})) for p in participants if p.get("approved")
    ]

    primary_owner = None
    if author:
        primary_owner = BasicExpertInfo(
            display_name=_get_user_name(author),
        )

    secondary_owners = [
        BasicExpertInfo(display_name=_get_user_name(r)) for r in reviewers
    ] or None

    reviewer_names = [_get_user_name(r) for r in reviewers]

    # Create a concise summary of key PR info
    created_date = created_on.split("T")[0] if created_on else "N/A"
    updated_date = updated_on.split("T")[0] if updated_on else "N/A"
    content_text = (
        "Pull Request Information:\n"
        f"- Pull Request ID: {pr_id}\n"
        f"- Title: {title}\n"
        f"- State: {state or 'N/A'} {'(Draft)' if draft else ''}\n"
    )
    if state == "DECLINED":
        content_text += f"- Reason: {pr.get('reason', 'N/A')}\n"
    content_text += (
        f"- Author: {_get_user_name(author) if author else 'N/A'}\n"
        f"- Reviewers: {', '.join(reviewer_names) if reviewer_names else 'N/A'}\n"
        f"- Branch: {source_branch} -> {destination_branch}\n"
        f"- Created: {created_date}\n"
        f"- Updated: {updated_date}"
    )
    if description:
        content_text += f"\n\nDescription:\n{description}"
    sections: list[TextSection | ImageSection] = [
        TextSection(link=link, text=content_text)
    ]

    metadata: dict[str, str | list[str]] = {
        "object_type": "PullRequest",
        "workspace": workspace,
        "repository": repo_slug,
        "pr_key": f"{workspace}/{repo_slug}#{pr_id}",
        "id": str(pr_id),
        "title": title,
        "state": state or "",
        "draft": str(bool(draft)),
        "link": link,
        "author": _get_user_name(author) if author else "",
        "reviewers": reviewer_names,
        "approved_by": approved_by,
        "comment_count": str(pr.get("comment_count", "")),
        "task_count": str(pr.get("task_count", "")),
        "created_on": created_on or "",
        "updated_on": updated_on or "",
        "source_branch": source_branch,
        "destination_branch": destination_branch,
        "closed_by": (
            _get_user_name(pr.get("closed_by", {})) if pr.get("closed_by") else ""
        ),
        "close_source_branch": str(bool(pr.get("close_source_branch", False))),
    }

    return Document(
        id=f"{DocumentSource.BITBUCKET.value}:{workspace}:{repo_slug}:pr:{pr_id}",
        sections=sections,
        source=DocumentSource.BITBUCKET,
        semantic_identifier=f"#{pr_id}: {title}",
        title=title,
        doc_updated_at=updated_dt,
        primary_owners=[primary_owner] if primary_owner else None,
        secondary_owners=secondary_owners,
        metadata=metadata,
    )


def _get_user_name(user: dict[str, Any]) -> str:
    return user.get("display_name") or user.get("nickname") or "unknown"


================================================
FILE: backend/onyx/connectors/blob/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/blob/connector.py
================================================
import os
import time
from collections.abc import Mapping
from datetime import datetime
from datetime import timezone
from io import BytesIO
from numbers import Integral
from typing import Any
from typing import Optional
from urllib.parse import quote

import boto3
from botocore.client import Config
from botocore.credentials import RefreshableCredentials
from botocore.exceptions import ClientError
from botocore.exceptions import NoCredentialsError
from botocore.exceptions import PartialCredentialsError
from botocore.session import get_session
from mypy_boto3_s3 import S3Client

from onyx.configs.app_configs import BLOB_STORAGE_SIZE_THRESHOLD
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import BlobType
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FileOrigin
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    process_onyx_metadata,
)
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import extract_text_and_images
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.image_utils import store_image_and_create_section
from onyx.utils.logger import setup_logger

logger = setup_logger()


DOWNLOAD_CHUNK_SIZE = 1024 * 1024
SIZE_THRESHOLD_BUFFER = 64


class BlobStorageConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        bucket_type: str,
        bucket_name: str,
        prefix: str = "",
        batch_size: int = INDEX_BATCH_SIZE,
        european_residency: bool = False,
    ) -> None:
        self.bucket_type: BlobType = BlobType(bucket_type)
        self.bucket_name = bucket_name.strip()
        self.prefix = prefix if not prefix or prefix.endswith("/") else prefix + "/"
        self.batch_size = batch_size
        self.s3_client: Optional[S3Client] = None
        self._allow_images: bool | None = None
        self.size_threshold: int | None = BLOB_STORAGE_SIZE_THRESHOLD
        self.bucket_region: Optional[str] = None
        self.european_residency: bool = european_residency

    def set_allow_images(self, allow_images: bool) -> None:
        """Set whether to process images in this connector."""
        logger.info(f"Setting allow_images to {allow_images}.")
        self._allow_images = allow_images

    def _detect_bucket_region(self) -> None:
        """Detect and cache the actual region of the S3 bucket using head_bucket."""
        if self.s3_client is None:
            logger.warning(
                "S3 client not initialized. Skipping bucket region detection."
            )
            return

        try:
            response = self.s3_client.head_bucket(Bucket=self.bucket_name)
            # The region is in the response headers as 'x-amz-bucket-region'
            self.bucket_region = response.get("BucketRegion") or response.get(
                "ResponseMetadata", {}
            ).get("HTTPHeaders", {}).get("x-amz-bucket-region")

            if self.bucket_region:
                logger.debug(f"Detected bucket region: {self.bucket_region}")
            else:
                logger.warning("Bucket region not found in head_bucket response")
        except Exception as e:
            logger.warning(f"Failed to detect bucket region via head_bucket: {e}")

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        """Checks for boto3 credentials based on the bucket type.
        (1) R2: Access Key ID, Secret Access Key, Account ID
        (2) S3: AWS Access Key ID, AWS Secret Access Key or IAM role or Assume Role
        (3) GOOGLE_CLOUD_STORAGE: Access Key ID, Secret Access Key, Project ID
        (4) OCI_STORAGE: Namespace, Region, Access Key ID, Secret Access Key

        For each bucket type, the method initializes the appropriate S3 client:
        - R2: Uses Cloudflare R2 endpoint with S3v4 signature
        - S3: Creates a standard boto3 S3 client
        - GOOGLE_CLOUD_STORAGE: Uses Google Cloud Storage endpoint
        - OCI_STORAGE: Uses Oracle Cloud Infrastructure Object Storage endpoint

        Raises ConnectorMissingCredentialError if required credentials are missing.
        Raises ValueError for unsupported bucket types.
        """

        logger.debug(
            f"Loading credentials for {self.bucket_name} or type {self.bucket_type}"
        )

        if self.bucket_type == BlobType.R2:
            if not all(
                credentials.get(key)
                for key in ["r2_access_key_id", "r2_secret_access_key", "account_id"]
            ):
                raise ConnectorMissingCredentialError("Cloudflare R2")

            # Use EU endpoint if european_residency is enabled
            subdomain = "eu." if self.european_residency else ""
            endpoint_url = f"https://{credentials['account_id']}.{subdomain}r2.cloudflarestorage.com"

            self.s3_client = boto3.client(
                "s3",
                endpoint_url=endpoint_url,
                aws_access_key_id=credentials["r2_access_key_id"],
                aws_secret_access_key=credentials["r2_secret_access_key"],
                region_name="auto",
                config=Config(signature_version="s3v4"),
            )

        elif self.bucket_type == BlobType.S3:
            # For S3, we can use either access keys or IAM roles.
            authentication_method = credentials.get(
                "authentication_method", "access_key"
            )
            logger.debug(
                f"Using authentication method: {authentication_method} for S3 bucket."
            )
            if authentication_method == "access_key":
                logger.debug("Using access key authentication for S3 bucket.")
                if not all(
                    credentials.get(key)
                    for key in ["aws_access_key_id", "aws_secret_access_key"]
                ):
                    raise ConnectorMissingCredentialError("Amazon S3")

                session = boto3.Session(
                    aws_access_key_id=credentials["aws_access_key_id"],
                    aws_secret_access_key=credentials["aws_secret_access_key"],
                )
                self.s3_client = session.client("s3")
            elif authentication_method == "iam_role":
                # If using IAM roles, we assume the role and let boto3 handle the credentials.
                role_arn = credentials.get("aws_role_arn")
                # create session name using timestamp
                if not role_arn:
                    raise ConnectorMissingCredentialError(
                        "Amazon S3 IAM role ARN is required for assuming role."
                    )

                def _refresh_credentials() -> dict[str, str]:
                    """Refreshes the credentials for the assumed role."""
                    sts_client = boto3.client("sts")
                    assumed_role_object = sts_client.assume_role(
                        RoleArn=role_arn,
                        RoleSessionName=f"onyx_blob_storage_{int(time.time())}",
                    )
                    creds = assumed_role_object["Credentials"]
                    return {
                        "access_key": creds["AccessKeyId"],
                        "secret_key": creds["SecretAccessKey"],
                        "token": creds["SessionToken"],
                        "expiry_time": creds["Expiration"].isoformat(),
                    }

                refreshable = RefreshableCredentials.create_from_metadata(
                    metadata=_refresh_credentials(),
                    refresh_using=_refresh_credentials,
                    method="sts-assume-role",
                )
                botocore_session = get_session()
                botocore_session._credentials = refreshable  # type: ignore[attr-defined]
                session = boto3.Session(botocore_session=botocore_session)
                self.s3_client = session.client("s3")
            elif authentication_method == "assume_role":
                # We will assume the instance role to access S3.
                logger.debug("Using instance role authentication for S3 bucket.")
                self.s3_client = boto3.client("s3")
            else:
                raise ConnectorValidationError("Invalid authentication method for S3. ")

            # This is important for correct citation links
            # NOTE: the client region actually doesn't matter for accessing the bucket
            self._detect_bucket_region()

        elif self.bucket_type == BlobType.GOOGLE_CLOUD_STORAGE:
            if not all(
                credentials.get(key) for key in ["access_key_id", "secret_access_key"]
            ):
                raise ConnectorMissingCredentialError("Google Cloud Storage")

            self.s3_client = boto3.client(
                "s3",
                endpoint_url="https://storage.googleapis.com",
                aws_access_key_id=credentials["access_key_id"],
                aws_secret_access_key=credentials["secret_access_key"],
                region_name="auto",
            )

        elif self.bucket_type == BlobType.OCI_STORAGE:
            if not all(
                credentials.get(key)
                for key in ["namespace", "region", "access_key_id", "secret_access_key"]
            ):
                raise ConnectorMissingCredentialError("Oracle Cloud Infrastructure")

            self.s3_client = boto3.client(
                "s3",
                endpoint_url=f"https://{credentials['namespace']}.compat.objectstorage.{credentials['region']}.oraclecloud.com",
                aws_access_key_id=credentials["access_key_id"],
                aws_secret_access_key=credentials["secret_access_key"],
                region_name=credentials["region"],
            )

        else:
            raise ValueError(f"Unsupported bucket type: {self.bucket_type}")

        return None

    def _download_object(self, key: str) -> bytes | None:
        if self.s3_client is None:
            raise ConnectorMissingCredentialError("Blob storage")
        response = self.s3_client.get_object(Bucket=self.bucket_name, Key=key)
        body = response["Body"]

        try:
            if self.size_threshold is None:
                return body.read()

            return self._read_stream_with_limit(body, key)
        finally:
            body.close()

    def _read_stream_with_limit(self, body: Any, key: str) -> bytes | None:
        if self.size_threshold is None:
            return body.read()

        bytes_read = 0
        chunks: list[bytes] = []
        chunk_size = min(
            DOWNLOAD_CHUNK_SIZE, self.size_threshold + SIZE_THRESHOLD_BUFFER
        )

        for chunk in body.iter_chunks(chunk_size=chunk_size):
            if not chunk:
                continue
            chunks.append(chunk)
            bytes_read += len(chunk)

            if bytes_read > self.size_threshold + SIZE_THRESHOLD_BUFFER:
                logger.warning(
                    f"{key} exceeds size threshold of {self.size_threshold}. Skipping."
                )
                return None

        return b"".join(chunks)

    # NOTE: Left in as may be useful for one-off access to documents and sharing across orgs.
    # def _get_presigned_url(self, key: str) -> str:
    #     if self.s3_client is None:
    #         raise ConnectorMissingCredentialError("Blog storage")

    #     url = self.s3_client.generate_presigned_url(
    #         "get_object",
    #         Params={"Bucket": self.bucket_name, "Key": key},
    #         ExpiresIn=self.presign_length,
    #     )
    #     return url

    def _get_blob_link(self, key: str) -> str:
        # NOTE: We store the object dashboard URL instead of the actual object URL
        # This is because the actual object URL requires S3 client authentication
        # Accessing through the browser will always return an unauthorized error

        if self.s3_client is None:
            raise ConnectorMissingCredentialError("Blob storage")

        # URL encode the key to handle special characters, spaces, etc.
        # safe='/' keeps forward slashes unencoded for proper path structure
        encoded_key = quote(key, safe="/")

        if self.bucket_type == BlobType.R2:
            account_id = self.s3_client.meta.endpoint_url.split("//")[1].split(".")[0]
            subdomain = "eu/" if self.european_residency else "default/"

            return f"https://dash.cloudflare.com/{account_id}/r2/{subdomain}buckets/{self.bucket_name}/objects/{encoded_key}/details"

        elif self.bucket_type == BlobType.S3:
            region = self.bucket_region or self.s3_client.meta.region_name
            return f"https://s3.console.aws.amazon.com/s3/object/{self.bucket_name}?region={region}&prefix={encoded_key}"

        elif self.bucket_type == BlobType.GOOGLE_CLOUD_STORAGE:
            return f"https://console.cloud.google.com/storage/browser/_details/{self.bucket_name}/{encoded_key}"

        elif self.bucket_type == BlobType.OCI_STORAGE:
            namespace = self.s3_client.meta.endpoint_url.split("//")[1].split(".")[0]
            region = self.s3_client.meta.region_name
            return f"https://objectstorage.{region}.oraclecloud.com/n/{namespace}/b/{self.bucket_name}/o/{encoded_key}"

        else:
            # This should never happen!
            raise ValueError(f"Unsupported bucket type: {self.bucket_type}")

    @staticmethod
    def _extract_size_bytes(obj: Mapping[str, Any]) -> int | None:
        """Return the first numeric size field found on the object metadata."""

        candidate_keys = (
            "Size",
            "size",
            "ContentLength",
            "content_length",
            "Content-Length",
            "contentLength",
            "bytes",
            "Bytes",
        )

        def _normalize(value: Any) -> int | None:
            if value is None or isinstance(value, bool):
                return None
            if isinstance(value, Integral):
                return int(value)
            try:
                numeric = float(value)
            except (TypeError, ValueError):
                return None
            if numeric >= 0 and numeric.is_integer():
                return int(numeric)
            return None

        for key in candidate_keys:
            if key in obj:
                normalized = _normalize(obj.get(key))
                if normalized is not None:
                    return normalized

        for key, value in obj.items():
            if not isinstance(key, str):
                continue
            lowered_key = key.lower()
            if "size" in lowered_key or "length" in lowered_key:
                normalized = _normalize(value)
                if normalized is not None:
                    return normalized

        return None

    def _yield_blob_objects(
        self,
        start: datetime,
        end: datetime,
    ) -> GenerateDocumentsOutput:
        if self.s3_client is None:
            raise ConnectorMissingCredentialError("Blob storage")

        paginator = self.s3_client.get_paginator("list_objects_v2")
        pages = paginator.paginate(Bucket=self.bucket_name, Prefix=self.prefix)

        batch: list[Document | HierarchyNode] = []
        for page in pages:
            if "Contents" not in page:
                continue

            for obj in page["Contents"]:
                if obj["Key"].endswith("/"):
                    continue

                last_modified = obj["LastModified"].replace(tzinfo=timezone.utc)

                if not start <= last_modified <= end:
                    continue

                file_name = os.path.basename(obj["Key"])
                file_ext = get_file_ext(file_name)
                key = obj["Key"]
                link = self._get_blob_link(key)

                size_bytes = self._extract_size_bytes(obj)
                if (
                    self.size_threshold is not None
                    and isinstance(size_bytes, int)
                    and self.size_threshold is not None
                    and size_bytes > self.size_threshold
                ):
                    logger.warning(
                        f"{file_name} exceeds size threshold of {self.size_threshold}. Skipping."
                    )
                    continue

                # Handle image files
                if file_ext in OnyxFileExtensions.IMAGE_EXTENSIONS:
                    if not self._allow_images:
                        logger.debug(
                            f"Skipping image file: {key} (image processing not enabled)"
                        )
                        continue

                    # Process the image file
                    try:
                        downloaded_file = self._download_object(key)
                        if downloaded_file is None:
                            continue

                        # TODO: Refactor to avoid direct DB access in connector
                        # This will require broader refactoring across the codebase
                        image_section, _ = store_image_and_create_section(
                            image_data=downloaded_file,
                            file_id=f"{self.bucket_type}_{self.bucket_name}_{key.replace('/', '_')}",
                            display_name=file_name,
                            link=link,
                            file_origin=FileOrigin.CONNECTOR,
                        )

                        batch.append(
                            Document(
                                id=f"{self.bucket_type}:{self.bucket_name}:{key}",
                                sections=[image_section],
                                source=DocumentSource(self.bucket_type.value),
                                semantic_identifier=file_name,
                                doc_updated_at=last_modified,
                                metadata={},
                            )
                        )

                        if len(batch) == self.batch_size:
                            yield batch
                            batch = []
                    except Exception:
                        logger.exception(f"Error processing image {key}")
                    continue

                # Handle text and document files
                try:
                    downloaded_file = self._download_object(key)
                    if downloaded_file is None:
                        continue
                    extraction_result = extract_text_and_images(
                        BytesIO(downloaded_file), file_name=file_name
                    )

                    onyx_metadata, custom_tags = process_onyx_metadata(
                        extraction_result.metadata
                    )
                    file_display_name = onyx_metadata.file_display_name or file_name
                    time_updated = onyx_metadata.doc_updated_at or last_modified
                    link = onyx_metadata.link or link
                    primary_owners = onyx_metadata.primary_owners
                    secondary_owners = onyx_metadata.secondary_owners
                    source_type = onyx_metadata.source_type or DocumentSource(
                        self.bucket_type.value
                    )

                    sections: list[TextSection | ImageSection] = []
                    if extraction_result.text_content.strip():
                        logger.debug(
                            f"Creating TextSection for {file_name} with link: {link}"
                        )
                        sections.append(
                            TextSection(
                                link=link,
                                text=extraction_result.text_content.strip(),
                            )
                        )

                    batch.append(
                        Document(
                            id=f"{self.bucket_type}:{self.bucket_name}:{key}",
                            sections=(
                                sections
                                if sections
                                else [TextSection(link=link, text="")]
                            ),
                            source=source_type,
                            semantic_identifier=file_display_name,
                            doc_updated_at=time_updated,
                            metadata=custom_tags,
                            primary_owners=primary_owners,
                            secondary_owners=secondary_owners,
                        )
                    )
                    if len(batch) == self.batch_size:
                        yield batch
                        batch = []

                except Exception:
                    logger.exception(f"Error decoding object {key} as UTF-8")
        if batch:
            yield batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        logger.debug("Loading blob objects")
        return self._yield_blob_objects(
            start=datetime(1970, 1, 1, tzinfo=timezone.utc),
            end=datetime.now(timezone.utc),
        )

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        if self.s3_client is None:
            raise ConnectorMissingCredentialError("Blob storage")

        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)

        for batch in self._yield_blob_objects(start_datetime, end_datetime):
            yield batch

        return None

    def validate_connector_settings(self) -> None:
        if self.s3_client is None:
            raise ConnectorMissingCredentialError(
                "Blob storage credentials not loaded."
            )

        if not self.bucket_name:
            raise ConnectorValidationError(
                "No bucket name was provided in connector settings."
            )

        try:
            # We only fetch one object/page as a light-weight validation step.
            # This ensures we trigger typical S3 permission checks (ListObjectsV2, etc.).
            self.s3_client.list_objects_v2(
                Bucket=self.bucket_name, Prefix=self.prefix, MaxKeys=1
            )

        except NoCredentialsError:
            raise ConnectorMissingCredentialError(
                "No valid blob storage credentials found or provided to boto3."
            )
        except PartialCredentialsError:
            raise ConnectorMissingCredentialError(
                "Partial or incomplete blob storage credentials provided to boto3."
            )
        except ClientError as e:
            error_code = e.response["Error"].get("Code", "")
            status_code = e.response["ResponseMetadata"].get("HTTPStatusCode")

            # Most common S3 error cases
            if error_code in [
                "AccessDenied",
                "InvalidAccessKeyId",
                "SignatureDoesNotMatch",
            ]:
                if status_code == 403 or error_code == "AccessDenied":
                    raise InsufficientPermissionsError(
                        f"Insufficient permissions to list objects in bucket '{self.bucket_name}'. "
                        "Please check your bucket policy and/or IAM policy."
                    )
                if status_code == 401 or error_code == "SignatureDoesNotMatch":
                    raise CredentialExpiredError(
                        "Provided blob storage credentials appear invalid or expired."
                    )

                raise CredentialExpiredError(
                    f"Credential issue encountered ({error_code})."
                )

            if error_code == "NoSuchBucket" or status_code == 404:
                raise ConnectorValidationError(
                    f"Bucket '{self.bucket_name}' does not exist or cannot be found."
                )

            raise ConnectorValidationError(
                f"Unexpected S3 client error (code={error_code}, status={status_code}): {e}"
            )

        except Exception as e:
            # Catch-all for anything not captured by the above
            # Since we are unsure of the error and it may not disable the connector,
            #  raise an unexpected error (does not disable connector)
            raise UnexpectedValidationError(
                f"Unexpected error during blob storage settings validation: {e}"
            )


if __name__ == "__main__":
    credentials_dict = {
        "aws_access_key_id": os.environ.get("AWS_ACCESS_KEY_ID"),
        "aws_secret_access_key": os.environ.get("AWS_SECRET_ACCESS_KEY"),
    }

    # Initialize the connector
    connector = BlobStorageConnector(
        bucket_type=os.environ.get("BUCKET_TYPE") or "s3",
        bucket_name=os.environ.get("BUCKET_NAME") or "test",
        prefix="",
    )

    try:
        connector.load_credentials(credentials_dict)
        document_batch_generator = connector.load_from_state()
        for document_batch in document_batch_generator:
            print("First batch of documents:")
            for doc in document_batch:
                if isinstance(doc, HierarchyNode):
                    print("hierarchynode:", doc.display_name)
                    continue

                print(f"Document ID: {doc.id}")
                print(f"Semantic Identifier: {doc.semantic_identifier}")
                print(f"Source: {doc.source}")
                print(f"Updated At: {doc.doc_updated_at}")
                print("Sections:")
                for section in doc.sections:
                    print(f"  - Link: {section.link}")
                    if isinstance(section, TextSection) and section.text is not None:
                        print(f"  - Text: {section.text[:100]}...")
                    elif hasattr(section, "image_file_id") and section.image_file_id:
                        print(f"  - Image: {section.image_file_id}")
                    else:
                        print("Error: Unknown section type")
                print("---")
            break

    except ConnectorMissingCredentialError as e:
        print(f"Error: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


================================================
FILE: backend/onyx/connectors/bookstack/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/bookstack/client.py
================================================
from typing import Any

import requests


class BookStackClientRequestFailedError(ConnectionError):
    def __init__(self, status: int, error: str) -> None:
        self.status_code = status
        self.error = error
        super().__init__(
            "BookStack Client request failed with status {status}: {error}".format(
                status=status, error=error
            )
        )


class BookStackApiClient:
    def __init__(
        self,
        base_url: str,
        token_id: str,
        token_secret: str,
    ) -> None:
        self.base_url = base_url
        self.token_id = token_id
        self.token_secret = token_secret

    def get(self, endpoint: str, params: dict[str, str]) -> dict[str, Any]:
        url: str = self._build_url(endpoint)
        headers = self._build_headers()
        response = requests.get(url, headers=headers, params=params)

        try:
            json = response.json()
        except Exception:
            json = {}

        if response.status_code >= 300:
            error = response.reason
            response_error = json.get("error", {}).get("message", "")
            if response_error:
                error = response_error
            raise BookStackClientRequestFailedError(response.status_code, error)

        return json

    def _build_headers(self) -> dict[str, str]:
        auth = "Token " + self.token_id + ":" + self.token_secret
        return {
            "Authorization": auth,
            "Accept": "application/json",
        }

    def _build_url(self, endpoint: str) -> str:
        return self.base_url.rstrip("/") + "/api/" + endpoint.lstrip("/")

    def build_app_url(self, endpoint: str) -> str:
        return self.base_url.rstrip("/") + "/" + endpoint.lstrip("/")


================================================
FILE: backend/onyx/connectors/bookstack/connector.py
================================================
import html
import time
from collections.abc import Callable
from datetime import datetime
from typing import Any

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.bookstack.client import BookStackApiClient
from onyx.connectors.bookstack.client import BookStackClientRequestFailedError
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import parse_html_page_basic


class BookstackConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.batch_size = batch_size
        self.bookstack_client: BookStackApiClient | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.bookstack_client = BookStackApiClient(
            base_url=credentials["bookstack_base_url"],
            token_id=credentials["bookstack_api_token_id"],
            token_secret=credentials["bookstack_api_token_secret"],
        )
        return None

    @staticmethod
    def _get_doc_batch(
        batch_size: int,
        bookstack_client: BookStackApiClient,
        endpoint: str,
        transformer: Callable[[BookStackApiClient, dict], Document],
        start_ind: int,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> tuple[list[Document | HierarchyNode], int]:
        params = {
            "count": str(batch_size),
            "offset": str(start_ind),
            "sort": "+id",
        }

        if start:
            params["filter[updated_at:gte]"] = datetime.utcfromtimestamp(
                start
            ).strftime("%Y-%m-%d")

        if end:
            params["filter[updated_at:lte]"] = datetime.utcfromtimestamp(end).strftime(
                "%Y-%m-%d"
            )

        batch = bookstack_client.get(endpoint, params=params).get("data", [])
        doc_batch: list[Document | HierarchyNode] = [
            transformer(bookstack_client, item) for item in batch
        ]

        return doc_batch, len(batch)

    @staticmethod
    def _book_to_document(
        bookstack_client: BookStackApiClient, book: dict[str, Any]
    ) -> Document:
        url = bookstack_client.build_app_url("/books/" + str(book.get("slug")))
        title = str(book.get("name", ""))
        text = book.get("name", "") + "\n" + book.get("description", "")
        updated_at_str = (
            str(book.get("updated_at")) if book.get("updated_at") is not None else None
        )
        return Document(
            id="book__" + str(book.get("id")),
            sections=[TextSection(link=url, text=text)],
            source=DocumentSource.BOOKSTACK,
            semantic_identifier="Book: " + title,
            title=title,
            doc_updated_at=(
                time_str_to_utc(updated_at_str) if updated_at_str is not None else None
            ),
            metadata={"type": "book"},
        )

    @staticmethod
    def _chapter_to_document(
        bookstack_client: BookStackApiClient, chapter: dict[str, Any]
    ) -> Document:
        url = bookstack_client.build_app_url(
            "/books/"
            + str(chapter.get("book_slug"))
            + "/chapter/"
            + str(chapter.get("slug"))
        )
        title = str(chapter.get("name", ""))
        text = chapter.get("name", "") + "\n" + chapter.get("description", "")
        updated_at_str = (
            str(chapter.get("updated_at"))
            if chapter.get("updated_at") is not None
            else None
        )
        return Document(
            id="chapter__" + str(chapter.get("id")),
            sections=[TextSection(link=url, text=text)],
            source=DocumentSource.BOOKSTACK,
            semantic_identifier="Chapter: " + title,
            title=title,
            doc_updated_at=(
                time_str_to_utc(updated_at_str) if updated_at_str is not None else None
            ),
            metadata={"type": "chapter"},
        )

    @staticmethod
    def _shelf_to_document(
        bookstack_client: BookStackApiClient, shelf: dict[str, Any]
    ) -> Document:
        url = bookstack_client.build_app_url("/shelves/" + str(shelf.get("slug")))
        title = str(shelf.get("name", ""))
        text = shelf.get("name", "") + "\n" + shelf.get("description", "")
        updated_at_str = (
            str(shelf.get("updated_at"))
            if shelf.get("updated_at") is not None
            else None
        )
        return Document(
            id="shelf:" + str(shelf.get("id")),
            sections=[TextSection(link=url, text=text)],
            source=DocumentSource.BOOKSTACK,
            semantic_identifier="Shelf: " + title,
            title=title,
            doc_updated_at=(
                time_str_to_utc(updated_at_str) if updated_at_str is not None else None
            ),
            metadata={"type": "shelf"},
        )

    @staticmethod
    def _page_to_document(
        bookstack_client: BookStackApiClient, page: dict[str, Any]
    ) -> Document:
        page_id = str(page.get("id"))
        title = str(page.get("name", ""))
        page_data = bookstack_client.get("/pages/" + page_id, {})
        url = bookstack_client.build_app_url(
            "/books/"
            + str(page.get("book_slug"))
            + "/page/"
            + str(page_data.get("slug"))
        )
        page_html = "<h1>" + html.escape(title) + "</h1>" + str(page_data.get("html"))
        text = parse_html_page_basic(page_html)
        updated_at_str = (
            str(page_data.get("updated_at"))
            if page_data.get("updated_at") is not None
            else None
        )
        time.sleep(0.1)
        return Document(
            id="page:" + page_id,
            sections=[TextSection(link=url, text=text)],
            source=DocumentSource.BOOKSTACK,
            semantic_identifier="Page: " + str(title),
            title=str(title),
            doc_updated_at=(
                time_str_to_utc(updated_at_str) if updated_at_str is not None else None
            ),
            metadata={"type": "page"},
        )

    def load_from_state(self) -> GenerateDocumentsOutput:
        if self.bookstack_client is None:
            raise ConnectorMissingCredentialError("Bookstack")

        return self.poll_source(None, None)

    def poll_source(
        self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
    ) -> GenerateDocumentsOutput:
        if self.bookstack_client is None:
            raise ConnectorMissingCredentialError("Bookstack")

        transform_by_endpoint: dict[
            str, Callable[[BookStackApiClient, dict], Document]
        ] = {
            "/books": self._book_to_document,
            "/chapters": self._chapter_to_document,
            "/shelves": self._shelf_to_document,
            "/pages": self._page_to_document,
        }

        for endpoint, transform in transform_by_endpoint.items():
            start_ind = 0
            while True:
                doc_batch, num_results = self._get_doc_batch(
                    batch_size=self.batch_size,
                    bookstack_client=self.bookstack_client,
                    endpoint=endpoint,
                    transformer=transform,
                    start_ind=start_ind,
                    start=start,
                    end=end,
                )
                start_ind += num_results
                if doc_batch:
                    yield doc_batch

                if num_results < self.batch_size:
                    break
                else:
                    time.sleep(0.2)

    def validate_connector_settings(self) -> None:
        """
        Validate that the BookStack credentials and connector settings are correct.
        Specifically checks that we can make an authenticated request to BookStack.
        """
        if not self.bookstack_client:
            raise ConnectorMissingCredentialError(
                "BookStack credentials have not been loaded."
            )

        try:
            # Attempt to fetch a small batch of books (arbitrary endpoint) to verify credentials
            _ = self.bookstack_client.get(
                "/books", params={"count": "1", "offset": "0"}
            )

        except BookStackClientRequestFailedError as e:
            # Check for HTTP status codes
            if e.status_code == 401:
                raise CredentialExpiredError(
                    "Your BookStack credentials appear to be invalid or expired (HTTP 401)."
                ) from e
            elif e.status_code == 403:
                raise InsufficientPermissionsError(
                    "The configured BookStack token does not have sufficient permissions (HTTP 403)."
                ) from e
            else:
                raise ConnectorValidationError(
                    f"Unexpected BookStack error (status={e.status_code}): {e}"
                ) from e

        except Exception as exc:
            raise ConnectorValidationError(
                f"Unexpected error while validating BookStack connector settings: {exc}"
            ) from exc


================================================
FILE: backend/onyx/connectors/canvas/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/canvas/access.py
================================================
"""
Permissioning / AccessControl logic for Canvas courses.

CE stub — returns None (no permissions). The EE implementation is loaded
at runtime via ``fetch_versioned_implementation``.
"""

from collections.abc import Callable
from typing import cast

from onyx.access.models import ExternalAccess
from onyx.connectors.canvas.client import CanvasApiClient
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version


def get_course_permissions(
    canvas_client: CanvasApiClient,
    course_id: int,
) -> ExternalAccess | None:
    if not global_version.is_ee_version():
        return None

    ee_get_course_permissions = cast(
        Callable[[CanvasApiClient, int], ExternalAccess | None],
        fetch_versioned_implementation(
            "onyx.external_permissions.canvas.access",
            "get_course_permissions",
        ),
    )

    return ee_get_course_permissions(canvas_client, course_id)


================================================
FILE: backend/onyx/connectors/canvas/client.py
================================================
from __future__ import annotations

import logging
import re
from collections.abc import Iterator
from typing import Any
from urllib.parse import urlparse

from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rl_requests,
)
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError

logger = logging.getLogger(__name__)

# Requests timeout in seconds.
_CANVAS_CALL_TIMEOUT: int = 30
_CANVAS_API_VERSION: str = "/api/v1"
# Matches the "next" URL in a Canvas Link header, e.g.:
#   <https://canvas.example.com/api/v1/courses?page=2>; rel="next"
# Captures the URL inside the angle brackets.
_NEXT_LINK_PATTERN: re.Pattern[str] = re.compile(r'<([^>]+)>;\s*rel="next"')


_STATUS_TO_ERROR_CODE: dict[int, OnyxErrorCode] = {
    401: OnyxErrorCode.CREDENTIAL_EXPIRED,
    403: OnyxErrorCode.INSUFFICIENT_PERMISSIONS,
    404: OnyxErrorCode.BAD_GATEWAY,
    429: OnyxErrorCode.RATE_LIMITED,
}


def _error_code_for_status(status_code: int) -> OnyxErrorCode:
    """Map an HTTP status code to the appropriate OnyxErrorCode.

    Expects a >= 400 status code. Known codes (401, 403, 404, 429) are
    mapped to specific error codes; all other codes (unrecognised 4xx
    and 5xx) map to BAD_GATEWAY as unexpected upstream errors.
    """
    if status_code in _STATUS_TO_ERROR_CODE:
        return _STATUS_TO_ERROR_CODE[status_code]
    return OnyxErrorCode.BAD_GATEWAY


class CanvasApiClient:
    def __init__(
        self,
        bearer_token: str,
        canvas_base_url: str,
    ) -> None:
        parsed_base = urlparse(canvas_base_url)
        if not parsed_base.hostname:
            raise ValueError("canvas_base_url must include a valid host")
        if parsed_base.scheme != "https":
            raise ValueError("canvas_base_url must use https")

        self._bearer_token = bearer_token
        self.base_url = (
            canvas_base_url.rstrip("/").removesuffix(_CANVAS_API_VERSION)
            + _CANVAS_API_VERSION
        )
        # Hostname is already validated above; reuse parsed_base instead
        # of re-parsing.  Used by _parse_next_link to validate pagination URLs.
        self._expected_host: str = parsed_base.hostname

    def get(
        self,
        endpoint: str = "",
        params: dict[str, Any] | None = None,
        full_url: str | None = None,
    ) -> tuple[Any, str | None]:
        """Make a GET request to the Canvas API.

        Returns a tuple of (json_body, next_url).
        next_url is parsed from the Link header and is None if there are no more pages.
        If full_url is provided, it is used directly (for following pagination links).

        Security note: full_url must only be set to values returned by
        ``_parse_next_link``, which validates the host against the configured
        Canvas base URL.  Passing an arbitrary URL would leak the bearer token.
        """
        # full_url is used when following pagination (Canvas returns the
        # next-page URL in the Link header).  For the first request we build
        # the URL from the endpoint name instead.
        url = full_url if full_url else self._build_url(endpoint)
        headers = self._build_headers()

        response = rl_requests.get(
            url,
            headers=headers,
            params=params if not full_url else None,
            timeout=_CANVAS_CALL_TIMEOUT,
        )

        try:
            response_json = response.json()
        except ValueError as e:
            if response.status_code < 300:
                raise OnyxError(
                    OnyxErrorCode.BAD_GATEWAY,
                    detail=f"Invalid JSON in Canvas response: {e}",
                )
            logger.warning(
                "Failed to parse JSON from Canvas error response (status=%d): %s",
                response.status_code,
                e,
            )
            response_json = {}

        if response.status_code >= 400:
            # Try to extract the most specific error message from the
            # Canvas response body.  Canvas uses three different shapes
            # depending on the endpoint and error type:
            default_error: str = response.reason or f"HTTP {response.status_code}"
            error = default_error
            if isinstance(response_json, dict):
                # Shape 1: {"error": {"message": "Not authorized"}}
                error_field = response_json.get("error")
                if isinstance(error_field, dict):
                    response_error = error_field.get("message", "")
                    if response_error:
                        error = response_error
                # Shape 2: {"error": "Invalid access token"}
                elif isinstance(error_field, str):
                    error = error_field
                # Shape 3: {"errors": [{"message": "..."}]}
                # Used for validation errors.  Only use as fallback if
                # we didn't already find a more specific message above.
                if error == default_error:
                    errors_list = response_json.get("errors")
                    if isinstance(errors_list, list) and errors_list:
                        first_error = errors_list[0]
                        if isinstance(first_error, dict):
                            msg = first_error.get("message", "")
                            if msg:
                                error = msg
            raise OnyxError(
                _error_code_for_status(response.status_code),
                detail=error,
                status_code_override=response.status_code,
            )

        next_url = self._parse_next_link(response.headers.get("Link", ""))
        return response_json, next_url

    def _parse_next_link(self, link_header: str) -> str | None:
        """Extract the 'next' URL from a Canvas Link header.

        Only returns URLs whose host matches the configured Canvas base URL
        to prevent leaking the bearer token to arbitrary hosts.
        """
        expected_host = self._expected_host
        for match in _NEXT_LINK_PATTERN.finditer(link_header):
            url = match.group(1)
            parsed_url = urlparse(url)
            if parsed_url.hostname != expected_host:
                raise OnyxError(
                    OnyxErrorCode.BAD_GATEWAY,
                    detail=(
                        "Canvas pagination returned an unexpected host "
                        f"({parsed_url.hostname}); expected {expected_host}"
                    ),
                )
            if parsed_url.scheme != "https":
                raise OnyxError(
                    OnyxErrorCode.BAD_GATEWAY,
                    detail=(
                        "Canvas pagination link must use https, "
                        f"got {parsed_url.scheme!r}"
                    ),
                )
            return url
        return None

    def _build_headers(self) -> dict[str, str]:
        """Return the Authorization header with the bearer token."""
        return {"Authorization": f"Bearer {self._bearer_token}"}

    def _build_url(self, endpoint: str) -> str:
        """Build a full Canvas API URL from an endpoint path.

        Assumes endpoint is non-empty (e.g. ``"courses"``, ``"announcements"``).
        Only called on a first request, endpoint must be set for first request.
        Verify endpoint exists in case of future changes where endpoint might be optional.
        Leading slashes are stripped to avoid double-slash in the result.
        self.base_url is already normalized with no trailing slash.
        """
        final_url = self.base_url
        clean_endpoint = endpoint.lstrip("/")
        if clean_endpoint:
            final_url += "/" + clean_endpoint
        return final_url

    def paginate(
        self,
        endpoint: str,
        params: dict[str, Any] | None = None,
    ) -> Iterator[list[Any]]:
        """Yield each page of results, following Link-header pagination.

        Makes the first request with endpoint + params, then follows
        next_url from Link headers for subsequent pages.
        """
        response, next_url = self.get(endpoint, params=params)
        while True:
            if not response:
                break
            yield response
            if not next_url:
                break
            response, next_url = self.get(full_url=next_url)


================================================
FILE: backend/onyx/connectors/canvas/connector.py
================================================
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast
from typing import Literal
from typing import NoReturn
from typing import TypeAlias

from pydantic import BaseModel
from retry import retry
from typing_extensions import override

from onyx.access.models import ExternalAccess
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.canvas.access import get_course_permissions
from onyx.connectors.canvas.client import CanvasApiClient
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.error_handling.exceptions import OnyxError
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _handle_canvas_api_error(e: OnyxError) -> NoReturn:
    """Map Canvas API errors to connector framework exceptions."""
    if e.status_code == 401:
        raise CredentialExpiredError(
            "Canvas API token is invalid or expired (HTTP 401)."
        )
    elif e.status_code == 403:
        raise InsufficientPermissionsError(
            "Canvas API token does not have sufficient permissions (HTTP 403)."
        )
    elif e.status_code == 429:
        raise ConnectorValidationError(
            "Canvas rate-limit exceeded (HTTP 429). Please try again later."
        )
    elif e.status_code >= 500:
        raise UnexpectedValidationError(
            f"Unexpected Canvas HTTP error (status={e.status_code}): {e}"
        )
    else:
        raise ConnectorValidationError(
            f"Canvas API error (status={e.status_code}): {e}"
        )


class CanvasCourse(BaseModel):
    id: int
    name: str | None = None
    course_code: str | None = None
    created_at: str | None = None
    workflow_state: str | None = None

    @classmethod
    def from_api(cls, payload: dict[str, Any]) -> "CanvasCourse":
        return cls(
            id=payload["id"],
            name=payload.get("name"),
            course_code=payload.get("course_code"),
            created_at=payload.get("created_at"),
            workflow_state=payload.get("workflow_state"),
        )


class CanvasPage(BaseModel):
    page_id: int
    url: str
    title: str
    body: str | None = None
    created_at: str | None = None
    updated_at: str | None = None
    course_id: int

    @classmethod
    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasPage":
        return cls(
            page_id=payload["page_id"],
            url=payload["url"],
            title=payload["title"],
            body=payload.get("body"),
            created_at=payload.get("created_at"),
            updated_at=payload.get("updated_at"),
            course_id=course_id,
        )


class CanvasAssignment(BaseModel):
    id: int
    name: str
    description: str | None = None
    html_url: str
    course_id: int
    created_at: str | None = None
    updated_at: str | None = None
    due_at: str | None = None

    @classmethod
    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasAssignment":
        return cls(
            id=payload["id"],
            name=payload["name"],
            description=payload.get("description"),
            html_url=payload["html_url"],
            course_id=course_id,
            created_at=payload.get("created_at"),
            updated_at=payload.get("updated_at"),
            due_at=payload.get("due_at"),
        )


class CanvasAnnouncement(BaseModel):
    id: int
    title: str
    message: str | None = None
    html_url: str
    posted_at: str | None = None
    course_id: int

    @classmethod
    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasAnnouncement":
        return cls(
            id=payload["id"],
            title=payload["title"],
            message=payload.get("message"),
            html_url=payload["html_url"],
            posted_at=payload.get("posted_at"),
            course_id=course_id,
        )


CanvasStage: TypeAlias = Literal["pages", "assignments", "announcements"]


class CanvasConnectorCheckpoint(ConnectorCheckpoint):
    """Checkpoint state for resumable Canvas indexing.

    Fields:
        course_ids: Materialized list of course IDs to process.
        current_course_index: Index into course_ids for current course.
        stage: Which item type we're processing for the current course.
        next_url: Pagination cursor within the current stage. None means
            start from the first page; a URL means resume from that page.

    Invariant:
        If current_course_index is incremented, stage must be reset to
        "pages" and next_url must be reset to None.
    """

    course_ids: list[int] = []
    current_course_index: int = 0
    stage: CanvasStage = "pages"
    next_url: str | None = None

    def advance_course(self) -> None:
        """Move to the next course and reset within-course state."""
        self.current_course_index += 1
        self.stage = "pages"
        self.next_url = None


class CanvasConnector(
    CheckpointedConnectorWithPermSync[CanvasConnectorCheckpoint],
    SlimConnectorWithPermSync,
):
    def __init__(
        self,
        canvas_base_url: str,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.canvas_base_url = canvas_base_url.rstrip("/").removesuffix("/api/v1")
        self.batch_size = batch_size
        self._canvas_client: CanvasApiClient | None = None
        self._course_permissions_cache: dict[int, ExternalAccess | None] = {}

    @property
    def canvas_client(self) -> CanvasApiClient:
        if self._canvas_client is None:
            raise ConnectorMissingCredentialError("Canvas")
        return self._canvas_client

    def _get_course_permissions(self, course_id: int) -> ExternalAccess | None:
        """Get course permissions with caching."""
        if course_id not in self._course_permissions_cache:
            self._course_permissions_cache[course_id] = get_course_permissions(
                canvas_client=self.canvas_client,
                course_id=course_id,
            )
        return self._course_permissions_cache[course_id]

    @retry(tries=3, delay=1, backoff=2)
    def _list_courses(self) -> list[CanvasCourse]:
        """Fetch all courses accessible to the authenticated user."""
        logger.debug("Fetching Canvas courses")

        courses: list[CanvasCourse] = []
        for page in self.canvas_client.paginate(
            "courses", params={"per_page": "100", "state[]": "available"}
        ):
            courses.extend(CanvasCourse.from_api(c) for c in page)
        return courses

    @retry(tries=3, delay=1, backoff=2)
    def _list_pages(self, course_id: int) -> list[CanvasPage]:
        """Fetch all pages for a given course."""
        logger.debug(f"Fetching pages for course {course_id}")

        pages: list[CanvasPage] = []
        for page in self.canvas_client.paginate(
            f"courses/{course_id}/pages",
            params={"per_page": "100", "include[]": "body", "published": "true"},
        ):
            pages.extend(CanvasPage.from_api(p, course_id=course_id) for p in page)
        return pages

    @retry(tries=3, delay=1, backoff=2)
    def _list_assignments(self, course_id: int) -> list[CanvasAssignment]:
        """Fetch all assignments for a given course."""
        logger.debug(f"Fetching assignments for course {course_id}")

        assignments: list[CanvasAssignment] = []
        for page in self.canvas_client.paginate(
            f"courses/{course_id}/assignments",
            params={"per_page": "100", "published": "true"},
        ):
            assignments.extend(
                CanvasAssignment.from_api(a, course_id=course_id) for a in page
            )
        return assignments

    @retry(tries=3, delay=1, backoff=2)
    def _list_announcements(self, course_id: int) -> list[CanvasAnnouncement]:
        """Fetch all announcements for a given course."""
        logger.debug(f"Fetching announcements for course {course_id}")

        announcements: list[CanvasAnnouncement] = []
        for page in self.canvas_client.paginate(
            "announcements",
            params={
                "per_page": "100",
                "context_codes[]": f"course_{course_id}",
                "active_only": "true",
            },
        ):
            announcements.extend(
                CanvasAnnouncement.from_api(a, course_id=course_id) for a in page
            )
        return announcements

    def _build_document(
        self,
        doc_id: str,
        link: str,
        text: str,
        semantic_identifier: str,
        doc_updated_at: datetime | None,
        course_id: int,
        doc_type: str,
    ) -> Document:
        """Build a Document with standard Canvas fields."""
        return Document(
            id=doc_id,
            sections=cast(
                list[TextSection | ImageSection],
                [TextSection(link=link, text=text)],
            ),
            source=DocumentSource.CANVAS,
            semantic_identifier=semantic_identifier,
            doc_updated_at=doc_updated_at,
            metadata={"course_id": str(course_id), "type": doc_type},
        )

    def _convert_page_to_document(self, page: CanvasPage) -> Document:
        """Convert a Canvas page to a Document."""
        link = f"{self.canvas_base_url}/courses/{page.course_id}/pages/{page.url}"

        text_parts = [page.title]
        body_text = parse_html_page_basic(page.body) if page.body else ""
        if body_text:
            text_parts.append(body_text)

        doc_updated_at = (
            datetime.fromisoformat(page.updated_at.replace("Z", "+00:00")).astimezone(
                timezone.utc
            )
            if page.updated_at
            else None
        )

        document = self._build_document(
            doc_id=f"canvas-page-{page.course_id}-{page.page_id}",
            link=link,
            text="\n\n".join(text_parts),
            semantic_identifier=page.title or f"Page {page.page_id}",
            doc_updated_at=doc_updated_at,
            course_id=page.course_id,
            doc_type="page",
        )
        return document

    def _convert_assignment_to_document(self, assignment: CanvasAssignment) -> Document:
        """Convert a Canvas assignment to a Document."""
        text_parts = [assignment.name]
        desc_text = (
            parse_html_page_basic(assignment.description)
            if assignment.description
            else ""
        )
        if desc_text:
            text_parts.append(desc_text)
        if assignment.due_at:
            due_dt = datetime.fromisoformat(
                assignment.due_at.replace("Z", "+00:00")
            ).astimezone(timezone.utc)
            text_parts.append(f"Due: {due_dt.strftime('%B %d, %Y %H:%M UTC')}")

        doc_updated_at = (
            datetime.fromisoformat(
                assignment.updated_at.replace("Z", "+00:00")
            ).astimezone(timezone.utc)
            if assignment.updated_at
            else None
        )

        document = self._build_document(
            doc_id=f"canvas-assignment-{assignment.course_id}-{assignment.id}",
            link=assignment.html_url,
            text="\n\n".join(text_parts),
            semantic_identifier=assignment.name or f"Assignment {assignment.id}",
            doc_updated_at=doc_updated_at,
            course_id=assignment.course_id,
            doc_type="assignment",
        )
        return document

    def _convert_announcement_to_document(
        self, announcement: CanvasAnnouncement
    ) -> Document:
        """Convert a Canvas announcement to a Document."""
        text_parts = [announcement.title]
        msg_text = (
            parse_html_page_basic(announcement.message) if announcement.message else ""
        )
        if msg_text:
            text_parts.append(msg_text)

        doc_updated_at = (
            datetime.fromisoformat(
                announcement.posted_at.replace("Z", "+00:00")
            ).astimezone(timezone.utc)
            if announcement.posted_at
            else None
        )

        document = self._build_document(
            doc_id=f"canvas-announcement-{announcement.course_id}-{announcement.id}",
            link=announcement.html_url,
            text="\n\n".join(text_parts),
            semantic_identifier=announcement.title or f"Announcement {announcement.id}",
            doc_updated_at=doc_updated_at,
            course_id=announcement.course_id,
            doc_type="announcement",
        )
        return document

    @override
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        """Load and validate Canvas credentials."""
        access_token = credentials.get("canvas_access_token")
        if not access_token:
            raise ConnectorMissingCredentialError("Canvas")

        try:
            client = CanvasApiClient(
                bearer_token=access_token,
                canvas_base_url=self.canvas_base_url,
            )
            client.get("courses", params={"per_page": "1"})
        except ValueError as e:
            raise ConnectorValidationError(f"Invalid Canvas base URL: {e}")
        except OnyxError as e:
            _handle_canvas_api_error(e)

        self._canvas_client = client
        return None

    @override
    def validate_connector_settings(self) -> None:
        """Validate Canvas connector settings by testing API access."""
        try:
            self.canvas_client.get("courses", params={"per_page": "1"})
            logger.info("Canvas connector settings validated successfully")
        except OnyxError as e:
            _handle_canvas_api_error(e)
        except ConnectorMissingCredentialError:
            raise
        except Exception as exc:
            raise UnexpectedValidationError(
                f"Unexpected error during Canvas settings validation: {exc}"
            )

    @override
    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: CanvasConnectorCheckpoint,
    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
        # TODO(benwu408): implemented in PR3 (checkpoint)
        raise NotImplementedError

    @override
    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: CanvasConnectorCheckpoint,
    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
        # TODO(benwu408): implemented in PR3 (checkpoint)
        raise NotImplementedError

    @override
    def build_dummy_checkpoint(self) -> CanvasConnectorCheckpoint:
        # TODO(benwu408): implemented in PR3 (checkpoint)
        raise NotImplementedError

    @override
    def validate_checkpoint_json(
        self, checkpoint_json: str
    ) -> CanvasConnectorCheckpoint:
        # TODO(benwu408): implemented in PR3 (checkpoint)
        raise NotImplementedError

    @override
    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        # TODO(benwu408): implemented in PR4 (perm sync)
        raise NotImplementedError


================================================
FILE: backend/onyx/connectors/clickup/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/clickup/connector.py
================================================
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import Optional

import requests

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.utils.retry_wrapper import retry_builder


CLICKUP_API_BASE_URL = "https://api.clickup.com/api/v2"


class ClickupConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
        api_token: str | None = None,
        team_id: str | None = None,
        connector_type: str | None = None,
        connector_ids: list[str] | None = None,
        retrieve_task_comments: bool = True,
    ) -> None:
        self.batch_size = batch_size
        self.api_token = api_token
        self.team_id = team_id
        self.connector_type = connector_type if connector_type else "workspace"
        self.connector_ids = connector_ids
        self.retrieve_task_comments = retrieve_task_comments

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.api_token = credentials["clickup_api_token"]
        self.team_id = credentials["clickup_team_id"]
        return None

    @retry_builder()
    @rate_limit_builder(max_calls=100, period=60)
    def _make_request(self, endpoint: str, params: Optional[dict] = None) -> Any:
        if not self.api_token:
            raise ConnectorMissingCredentialError("Clickup")

        headers = {"Authorization": self.api_token}

        response = requests.get(
            f"{CLICKUP_API_BASE_URL}/{endpoint}", headers=headers, params=params
        )

        response.raise_for_status()

        return response.json()

    def _get_task_comments(self, task_id: str) -> list[TextSection]:
        url_endpoint = f"/task/{task_id}/comment"
        response = self._make_request(url_endpoint)
        comments = [
            TextSection(
                link=f"https://app.clickup.com/t/{task_id}?comment={comment_dict['id']}",
                text=comment_dict["comment_text"],
            )
            for comment_dict in response["comments"]
        ]

        return comments

    def _get_all_tasks_filtered(
        self,
        start: int | None = None,
        end: int | None = None,
    ) -> GenerateDocumentsOutput:
        doc_batch: list[Document | HierarchyNode] = []
        page: int = 0
        params = {
            "include_markdown_description": "true",
            "include_closed": "true",
            "page": page,
        }

        if start is not None:
            params["date_updated_gt"] = start
        if end is not None:
            params["date_updated_lt"] = end

        if self.connector_type == "list":
            params["list_ids[]"] = self.connector_ids
        elif self.connector_type == "folder":
            params["project_ids[]"] = self.connector_ids
        elif self.connector_type == "space":
            params["space_ids[]"] = self.connector_ids

        url_endpoint = f"/team/{self.team_id}/task"

        while True:
            response = self._make_request(url_endpoint, params)

            page += 1
            params["page"] = page

            for task in response["tasks"]:
                document = Document(
                    id=task["id"],
                    source=DocumentSource.CLICKUP,
                    semantic_identifier=task["name"],
                    doc_updated_at=(
                        datetime.fromtimestamp(
                            round(float(task["date_updated"]) / 1000, 3)
                        ).replace(tzinfo=timezone.utc)
                    ),
                    primary_owners=[
                        BasicExpertInfo(
                            display_name=task["creator"]["username"],
                            email=task["creator"]["email"],
                        )
                    ],
                    secondary_owners=[
                        BasicExpertInfo(
                            display_name=assignee["username"],
                            email=assignee["email"],
                        )
                        for assignee in task["assignees"]
                    ],
                    title=task["name"],
                    sections=[
                        TextSection(
                            link=task["url"],
                            text=(
                                task["markdown_description"]
                                if "markdown_description" in task
                                else task["description"]
                            ),
                        )
                    ],
                    metadata={
                        "id": task["id"],
                        "status": task["status"]["status"],
                        "list": task["list"]["name"],
                        "project": task["project"]["name"],
                        "folder": task["folder"]["name"],
                        "space_id": task["space"]["id"],
                        "tags": [tag["name"] for tag in task["tags"]],
                        "priority": (
                            task["priority"]["priority"]
                            if "priority" in task and task["priority"] is not None
                            else ""
                        ),
                    },
                )

                extra_fields = [
                    "date_created",
                    "date_updated",
                    "date_closed",
                    "date_done",
                    "due_date",
                ]
                for extra_field in extra_fields:
                    if extra_field in task and task[extra_field] is not None:
                        document.metadata[extra_field] = task[extra_field]

                if self.retrieve_task_comments:
                    document.sections.extend(self._get_task_comments(task["id"]))

                doc_batch.append(document)

                if len(doc_batch) >= self.batch_size:
                    yield doc_batch
                    doc_batch = []

            if response.get("last_page") is True or len(response["tasks"]) < 100:
                break

        if doc_batch:
            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        if self.api_token is None:
            raise ConnectorMissingCredentialError("Clickup")

        return self._get_all_tasks_filtered(None, None)

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        if self.api_token is None:
            raise ConnectorMissingCredentialError("Clickup")

        return self._get_all_tasks_filtered(int(start * 1000), int(end * 1000))


if __name__ == "__main__":
    import os

    clickup_connector = ClickupConnector()

    clickup_connector.load_credentials(
        {
            "clickup_api_token": os.environ["clickup_api_token"],
            "clickup_team_id": os.environ["clickup_team_id"],
        }
    )

    latest_docs = clickup_connector.load_from_state()

    for doc in latest_docs:
        print(doc)


================================================
FILE: backend/onyx/connectors/coda/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/coda/connector.py
================================================
import os
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast
from typing import Dict
from typing import List
from typing import Optional

from pydantic import BaseModel
from retry import retry

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rl_requests,
)
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.utils.batching import batch_generator
from onyx.utils.logger import setup_logger

_CODA_CALL_TIMEOUT = 30
_CODA_BASE_URL = "https://coda.io/apis/v1"

logger = setup_logger()


class CodaClientRequestFailedError(ConnectionError):
    def __init__(self, message: str, status_code: int):
        super().__init__(
            f"Coda API request failed with status {status_code}: {message}"
        )
        self.status_code = status_code


class CodaDoc(BaseModel):
    id: str
    browser_link: str
    name: str
    created_at: str
    updated_at: str
    workspace_id: str
    workspace_name: str
    folder_id: str | None
    folder_name: str | None


class CodaPage(BaseModel):
    id: str
    browser_link: str
    name: str
    content_type: str
    created_at: str
    updated_at: str
    doc_id: str


class CodaTable(BaseModel):
    id: str
    name: str
    browser_link: str
    created_at: str
    updated_at: str
    doc_id: str


class CodaRow(BaseModel):
    id: str
    name: Optional[str] = None
    index: Optional[int] = None
    browser_link: str
    created_at: str
    updated_at: str
    values: Dict[str, Any]
    table_id: str
    doc_id: str


class CodaApiClient:
    def __init__(
        self,
        bearer_token: str,
    ) -> None:
        self.bearer_token = bearer_token
        self.base_url = os.environ.get("CODA_BASE_URL", _CODA_BASE_URL)

    def get(
        self, endpoint: str, params: Optional[dict[str, str]] = None
    ) -> dict[str, Any]:
        url = self._build_url(endpoint)
        headers = self._build_headers()

        response = rl_requests.get(
            url, headers=headers, params=params, timeout=_CODA_CALL_TIMEOUT
        )

        try:
            json = response.json()
        except Exception:
            json = {}

        if response.status_code >= 300:
            error = response.reason
            response_error = json.get("error", {}).get("message", "")
            if response_error:
                error = response_error
            raise CodaClientRequestFailedError(error, response.status_code)

        return json

    def _build_headers(self) -> Dict[str, str]:
        return {"Authorization": f"Bearer {self.bearer_token}"}

    def _build_url(self, endpoint: str) -> str:
        return self.base_url.rstrip("/") + "/" + endpoint.lstrip("/")


class CodaConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
        index_page_content: bool = True,
        workspace_id: str | None = None,
    ) -> None:
        self.batch_size = batch_size
        self.index_page_content = index_page_content
        self.workspace_id = workspace_id
        self._coda_client: CodaApiClient | None = None

    @property
    def coda_client(self) -> CodaApiClient:
        if self._coda_client is None:
            raise ConnectorMissingCredentialError("Coda")
        return self._coda_client

    @retry(tries=3, delay=1, backoff=2)
    def _get_doc(self, doc_id: str) -> CodaDoc:
        """Fetch a specific Coda document by its ID."""
        logger.debug(f"Fetching Coda doc with ID: {doc_id}")
        try:
            response = self.coda_client.get(f"docs/{doc_id}")
        except CodaClientRequestFailedError as e:
            if e.status_code == 404:
                raise ConnectorValidationError(f"Failed to fetch doc: {doc_id}") from e
            else:
                raise

        return CodaDoc(
            id=response["id"],
            browser_link=response["browserLink"],
            name=response["name"],
            created_at=response["createdAt"],
            updated_at=response["updatedAt"],
            workspace_id=response["workspace"]["id"],
            workspace_name=response["workspace"]["name"],
            folder_id=response["folder"]["id"] if response.get("folder") else None,
            folder_name=response["folder"]["name"] if response.get("folder") else None,
        )

    @retry(tries=3, delay=1, backoff=2)
    def _get_page(self, doc_id: str, page_id: str) -> CodaPage:
        """Fetch a specific page from a Coda document."""
        logger.debug(f"Fetching Coda page with ID: {page_id}")
        try:
            response = self.coda_client.get(f"docs/{doc_id}/pages/{page_id}")
        except CodaClientRequestFailedError as e:
            if e.status_code == 404:
                raise ConnectorValidationError(
                    f"Failed to fetch page: {page_id} from doc: {doc_id}"
                ) from e
            else:
                raise

        return CodaPage(
            id=response["id"],
            doc_id=doc_id,
            browser_link=response["browserLink"],
            name=response["name"],
            content_type=response["contentType"],
            created_at=response["createdAt"],
            updated_at=response["updatedAt"],
        )

    @retry(tries=3, delay=1, backoff=2)
    def _get_table(self, doc_id: str, table_id: str) -> CodaTable:
        """Fetch a specific table from a Coda document."""
        logger.debug(f"Fetching Coda table with ID: {table_id}")
        try:
            response = self.coda_client.get(f"docs/{doc_id}/tables/{table_id}")
        except CodaClientRequestFailedError as e:
            if e.status_code == 404:
                raise ConnectorValidationError(
                    f"Failed to fetch table: {table_id} from doc: {doc_id}"
                ) from e
            else:
                raise

        return CodaTable(
            id=response["id"],
            name=response["name"],
            browser_link=response["browserLink"],
            created_at=response["createdAt"],
            updated_at=response["updatedAt"],
            doc_id=doc_id,
        )

    @retry(tries=3, delay=1, backoff=2)
    def _get_row(self, doc_id: str, table_id: str, row_id: str) -> CodaRow:
        """Fetch a specific row from a Coda table."""
        logger.debug(f"Fetching Coda row with ID: {row_id}")
        try:
            response = self.coda_client.get(
                f"docs/{doc_id}/tables/{table_id}/rows/{row_id}"
            )
        except CodaClientRequestFailedError as e:
            if e.status_code == 404:
                raise ConnectorValidationError(
                    f"Failed to fetch row: {row_id} from table: {table_id} in doc: {doc_id}"
                ) from e
            else:
                raise

        values = {}
        for col_name, col_value in response.get("values", {}).items():
            values[col_name] = col_value

        return CodaRow(
            id=response["id"],
            name=response.get("name"),
            index=response.get("index"),
            browser_link=response["browserLink"],
            created_at=response["createdAt"],
            updated_at=response["updatedAt"],
            values=values,
            table_id=table_id,
            doc_id=doc_id,
        )

    @retry(tries=3, delay=1, backoff=2)
    def _list_all_docs(
        self, endpoint: str = "docs", params: Optional[Dict[str, str]] = None
    ) -> List[CodaDoc]:
        """List all Coda documents in the workspace."""
        logger.debug("Listing documents in Coda")

        all_docs: List[CodaDoc] = []
        next_page_token: str | None = None
        params = params or {}

        if self.workspace_id:
            params["workspaceId"] = self.workspace_id

        while True:
            if next_page_token:
                params["pageToken"] = next_page_token

            try:
                response = self.coda_client.get(endpoint, params=params)
            except CodaClientRequestFailedError as e:
                if e.status_code == 404:
                    raise ConnectorValidationError("Failed to list docs") from e
                else:
                    raise

            items = response.get("items", [])

            for item in items:
                doc = CodaDoc(
                    id=item["id"],
                    browser_link=item["browserLink"],
                    name=item["name"],
                    created_at=item["createdAt"],
                    updated_at=item["updatedAt"],
                    workspace_id=item["workspace"]["id"],
                    workspace_name=item["workspace"]["name"],
                    folder_id=item["folder"]["id"] if item.get("folder") else None,
                    folder_name=item["folder"]["name"] if item.get("folder") else None,
                )
                all_docs.append(doc)

            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break

        logger.debug(f"Found {len(all_docs)} docs")
        return all_docs

    @retry(tries=3, delay=1, backoff=2)
    def _list_pages_in_doc(self, doc_id: str) -> List[CodaPage]:
        """List all pages in a Coda document."""
        logger.debug(f"Listing pages in Coda doc with ID: {doc_id}")

        pages: List[CodaPage] = []
        endpoint = f"docs/{doc_id}/pages"
        params: Dict[str, str] = {}
        next_page_token: str | None = None

        while True:
            if next_page_token:
                params["pageToken"] = next_page_token

            try:
                response = self.coda_client.get(endpoint, params=params)
            except CodaClientRequestFailedError as e:
                if e.status_code == 404:
                    raise ConnectorValidationError(
                        f"Failed to list pages for doc: {doc_id}"
                    ) from e
                else:
                    raise

            items = response.get("items", [])
            for item in items:
                # can be removed if we don't care to skip hidden pages
                if item.get("isHidden", False):
                    continue

                pages.append(
                    CodaPage(
                        id=item["id"],
                        browser_link=item["browserLink"],
                        name=item["name"],
                        content_type=item["contentType"],
                        created_at=item["createdAt"],
                        updated_at=item["updatedAt"],
                        doc_id=doc_id,
                    )
                )

            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break

        logger.debug(f"Found {len(pages)} pages in doc {doc_id}")
        return pages

    @retry(tries=3, delay=1, backoff=2)
    def _fetch_page_content(self, doc_id: str, page_id: str) -> str:
        """Fetch the content of a Coda page."""
        logger.debug(f"Fetching content for page {page_id} in doc {doc_id}")

        content_parts = []
        next_page_token: str | None = None
        params: Dict[str, str] = {}

        while True:
            if next_page_token:
                params["pageToken"] = next_page_token

            try:
                response = self.coda_client.get(
                    f"docs/{doc_id}/pages/{page_id}/content", params=params
                )
            except CodaClientRequestFailedError as e:
                if e.status_code == 404:
                    logger.debug(f"No content available for page {page_id}")
                    return ""
                raise

            items = response.get("items", [])

            for item in items:
                item_content = item.get("itemContent", {})

                content_text = item_content.get("content", "")
                if content_text:
                    content_parts.append(content_text)

            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break

        return "\n\n".join(content_parts)

    @retry(tries=3, delay=1, backoff=2)
    def _list_tables(self, doc_id: str) -> List[CodaTable]:
        """List all tables in a Coda document."""
        logger.debug(f"Listing tables in Coda doc with ID: {doc_id}")

        tables: List[CodaTable] = []
        endpoint = f"docs/{doc_id}/tables"
        params: Dict[str, str] = {}
        next_page_token: str | None = None

        while True:
            if next_page_token:
                params["pageToken"] = next_page_token

            try:
                response = self.coda_client.get(endpoint, params=params)
            except CodaClientRequestFailedError as e:
                if e.status_code == 404:
                    raise ConnectorValidationError(
                        f"Failed to list tables for doc: {doc_id}"
                    ) from e
                else:
                    raise

            items = response.get("items", [])
            for item in items:
                tables.append(
                    CodaTable(
                        id=item["id"],
                        browser_link=item["browserLink"],
                        name=item["name"],
                        created_at=item["createdAt"],
                        updated_at=item["updatedAt"],
                        doc_id=doc_id,
                    )
                )

            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break

        logger.debug(f"Found {len(tables)} tables in doc {doc_id}")
        return tables

    @retry(tries=3, delay=1, backoff=2)
    def _list_rows_and_values(self, doc_id: str, table_id: str) -> List[CodaRow]:
        """List all rows and their values in a table."""
        logger.debug(f"Listing rows in Coda table: {table_id} in Coda doc: {doc_id}")

        rows: List[CodaRow] = []
        endpoint = f"docs/{doc_id}/tables/{table_id}/rows"
        params: Dict[str, str] = {"valueFormat": "rich"}
        next_page_token: str | None = None

        while True:
            if next_page_token:
                params["pageToken"] = next_page_token

            try:
                response = self.coda_client.get(endpoint, params=params)
            except CodaClientRequestFailedError as e:
                if e.status_code == 404:
                    raise ConnectorValidationError(
                        f"Failed to list rows for table: {table_id} in doc: {doc_id}"
                    ) from e
                else:
                    raise

            items = response.get("items", [])
            for item in items:
                values = {}
                for col_name, col_value in item.get("values", {}).items():
                    values[col_name] = col_value

                rows.append(
                    CodaRow(
                        id=item["id"],
                        name=item["name"],
                        index=item["index"],
                        browser_link=item["browserLink"],
                        created_at=item["createdAt"],
                        updated_at=item["updatedAt"],
                        values=values,
                        table_id=table_id,
                        doc_id=doc_id,
                    )
                )

            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break

        logger.debug(f"Found {len(rows)} rows in table {table_id}")
        return rows

    def _convert_page_to_document(self, page: CodaPage, content: str = "") -> Document:
        """Convert a page into a Document."""
        page_updated = datetime.fromisoformat(page.updated_at).astimezone(timezone.utc)

        text_parts = [page.name, page.browser_link]
        if content:
            text_parts.append(content)

        sections = [TextSection(link=page.browser_link, text="\n\n".join(text_parts))]

        return Document(
            id=f"coda-page-{page.doc_id}-{page.id}",
            sections=cast(list[TextSection | ImageSection], sections),
            source=DocumentSource.CODA,
            semantic_identifier=page.name or f"Page {page.id}",
            doc_updated_at=page_updated,
            metadata={
                "browser_link": page.browser_link,
                "doc_id": page.doc_id,
                "content_type": page.content_type,
            },
        )

    def _convert_table_with_rows_to_document(
        self, table: CodaTable, rows: List[CodaRow]
    ) -> Document:
        """Convert a table and its rows into a single Document with multiple sections (one per row)."""
        table_updated = datetime.fromisoformat(table.updated_at).astimezone(
            timezone.utc
        )

        sections: List[TextSection] = []
        for row in rows:
            content_text = " ".join(
                str(v) if not isinstance(v, list) else " ".join(map(str, v))
                for v in row.values.values()
            )

            row_name = row.name or f"Row {row.index or row.id}"
            text = f"{row_name}: {content_text}" if content_text else row_name

            sections.append(TextSection(link=row.browser_link, text=text))

        # If no rows, create a single section for the table itself
        if not sections:
            sections = [
                TextSection(link=table.browser_link, text=f"Table: {table.name}")
            ]

        return Document(
            id=f"coda-table-{table.doc_id}-{table.id}",
            sections=cast(list[TextSection | ImageSection], sections),
            source=DocumentSource.CODA,
            semantic_identifier=table.name or f"Table {table.id}",
            doc_updated_at=table_updated,
            metadata={
                "browser_link": table.browser_link,
                "doc_id": table.doc_id,
                "row_count": str(len(rows)),
            },
        )

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        """Load and validate Coda credentials."""
        self._coda_client = CodaApiClient(bearer_token=credentials["coda_bearer_token"])

        try:
            self._coda_client.get("docs", params={"limit": "1"})
        except CodaClientRequestFailedError as e:
            if e.status_code == 401:
                raise ConnectorMissingCredentialError("Invalid Coda API token")
            raise

        return None

    def load_from_state(self) -> GenerateDocumentsOutput:
        """Load all documents from Coda workspace."""

        def _iter_documents() -> Generator[Document, None, None]:
            docs = self._list_all_docs()
            logger.info(f"Found {len(docs)} Coda docs to process")

            for doc in docs:
                logger.debug(f"Processing doc: {doc.name} ({doc.id})")

                try:
                    pages = self._list_pages_in_doc(doc.id)
                    for page in pages:
                        content = ""
                        if self.index_page_content:
                            try:
                                content = self._fetch_page_content(doc.id, page.id)
                            except Exception as e:
                                logger.warning(
                                    f"Failed to fetch content for page {page.id}: {e}"
                                )
                        yield self._convert_page_to_document(page, content)
                except ConnectorValidationError as e:
                    logger.warning(f"Failed to list pages for doc {doc.id}: {e}")

                try:
                    tables = self._list_tables(doc.id)
                    for table in tables:
                        try:
                            rows = self._list_rows_and_values(doc.id, table.id)
                            yield self._convert_table_with_rows_to_document(table, rows)
                        except ConnectorValidationError as e:
                            logger.warning(
                                f"Failed to list rows for table {table.id}: {e}"
                            )
                            yield self._convert_table_with_rows_to_document(table, [])
                except ConnectorValidationError as e:
                    logger.warning(f"Failed to list tables for doc {doc.id}: {e}")

        return batch_generator(_iter_documents(), self.batch_size)

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        """
        Polls the Coda API for documents updated between start and end timestamps.
        We refer to page and table update times to determine if they need to be re-indexed.
        """

        def _iter_documents() -> Generator[Document, None, None]:
            docs = self._list_all_docs()
            logger.info(
                f"Polling {len(docs)} Coda docs for updates between {start} and {end}"
            )

            for doc in docs:
                try:
                    pages = self._list_pages_in_doc(doc.id)
                    for page in pages:
                        page_timestamp = (
                            datetime.fromisoformat(page.updated_at)
                            .astimezone(timezone.utc)
                            .timestamp()
                        )
                        if start < page_timestamp <= end:
                            content = ""
                            if self.index_page_content:
                                try:
                                    content = self._fetch_page_content(doc.id, page.id)
                                except Exception as e:
                                    logger.warning(
                                        f"Failed to fetch content for page {page.id}: {e}"
                                    )
                            yield self._convert_page_to_document(page, content)
                except ConnectorValidationError as e:
                    logger.warning(f"Failed to list pages for doc {doc.id}: {e}")

                try:
                    tables = self._list_tables(doc.id)
                    for table in tables:
                        table_timestamp = (
                            datetime.fromisoformat(table.updated_at)
                            .astimezone(timezone.utc)
                            .timestamp()
                        )

                        try:
                            rows = self._list_rows_and_values(doc.id, table.id)

                            table_or_rows_updated = start < table_timestamp <= end
                            if not table_or_rows_updated:
                                for row in rows:
                                    row_timestamp = (
                                        datetime.fromisoformat(row.updated_at)
                                        .astimezone(timezone.utc)
                                        .timestamp()
                                    )
                                    if start < row_timestamp <= end:
                                        table_or_rows_updated = True
                                        break

                            if table_or_rows_updated:
                                yield self._convert_table_with_rows_to_document(
                                    table, rows
                                )

                        except ConnectorValidationError as e:
                            logger.warning(
                                f"Failed to list rows for table {table.id}: {e}"
                            )
                            if table_timestamp > start and table_timestamp <= end:
                                yield self._convert_table_with_rows_to_document(
                                    table, []
                                )

                except ConnectorValidationError as e:
                    logger.warning(f"Failed to list tables for doc {doc.id}: {e}")

        return batch_generator(_iter_documents(), self.batch_size)

    def validate_connector_settings(self) -> None:
        """Validates the Coda connector settings calling the 'whoami' endpoint."""
        try:
            response = self.coda_client.get("whoami")
            logger.info(
                f"Coda connector validated for user: {response.get('name', 'Unknown')}"
            )

            if self.workspace_id:
                params = {"workspaceId": self.workspace_id, "limit": "1"}
                self.coda_client.get("docs", params=params)
                logger.info(f"Validated access to workspace: {self.workspace_id}")

        except CodaClientRequestFailedError as e:
            if e.status_code == 401:
                raise CredentialExpiredError(
                    "Coda credential appears to be invalid or expired (HTTP 401)."
                )
            elif e.status_code == 404:
                raise ConnectorValidationError(
                    "Coda workspace not found or not accessible (HTTP 404). "
                    "Please verify the workspace_id is correct and shared with the integration."
                )
            elif e.status_code == 429:
                raise ConnectorValidationError(
                    "Validation failed due to Coda rate-limits being exceeded (HTTP 429). Please try again later."
                )
            else:
                raise UnexpectedValidationError(
                    f"Unexpected Coda HTTP error (status={e.status_code}): {e}"
                )
        except Exception as exc:
            raise UnexpectedValidationError(
                f"Unexpected error during Coda settings validation: {exc}"
            )


================================================
FILE: backend/onyx/connectors/confluence/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/confluence/access.py
================================================
from collections.abc import Callable
from typing import Any
from typing import cast

from onyx.access.models import ExternalAccess
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version


def get_page_restrictions(
    confluence_client: OnyxConfluence,
    page_id: str,
    page_restrictions: dict[str, Any],
    ancestors: list[dict[str, Any]],
) -> ExternalAccess | None:
    """
    Get page access restrictions for a Confluence page.
    This functionality requires Enterprise Edition.

    Note: This wrapper is only called from permission sync path. Group IDs are
    left unprefixed here because upsert_document_external_perms handles prefixing.

    Args:
        confluence_client: OnyxConfluence client instance
        page_id: The ID of the page
        page_restrictions: Dictionary containing page restriction data
        ancestors: List of ancestor pages with their restriction data

    Returns:
        ExternalAccess object for the page. None if EE is not enabled or no restrictions found.
    """
    # Check if EE is enabled
    if not global_version.is_ee_version():
        return None

    # Fetch the EE implementation
    ee_get_all_page_restrictions = cast(
        Callable[
            [OnyxConfluence, str, dict[str, Any], list[dict[str, Any]], bool],
            ExternalAccess | None,
        ],
        fetch_versioned_implementation(
            "onyx.external_permissions.confluence.page_access", "get_page_restrictions"
        ),
    )

    # add_prefix=False: permission sync path - upsert_document_external_perms handles prefixing
    return ee_get_all_page_restrictions(
        confluence_client, page_id, page_restrictions, ancestors, False
    )


def get_all_space_permissions(
    confluence_client: OnyxConfluence,
    is_cloud: bool,
) -> dict[str, ExternalAccess]:
    """
    Get access permissions for all spaces in Confluence.
    This functionality requires Enterprise Edition.

    Note: This wrapper is only called from permission sync path. Group IDs are
    left unprefixed here because upsert_document_external_perms handles prefixing.

    Args:
        confluence_client: OnyxConfluence client instance
        is_cloud: Whether this is a Confluence Cloud instance

    Returns:
        Dictionary mapping space keys to ExternalAccess objects. Empty dict if EE is not enabled.
    """
    # Check if EE is enabled
    if not global_version.is_ee_version():
        return {}

    # Fetch the EE implementation
    ee_get_all_space_permissions = cast(
        Callable[
            [OnyxConfluence, bool, bool],
            dict[str, ExternalAccess],
        ],
        fetch_versioned_implementation(
            "onyx.external_permissions.confluence.space_access",
            "get_all_space_permissions",
        ),
    )

    # add_prefix=False: permission sync path - upsert_document_external_perms handles prefixing
    return ee_get_all_space_permissions(confluence_client, is_cloud, False)


================================================
FILE: backend/onyx/connectors/confluence/connector.py
================================================
import copy
from collections.abc import Generator
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from urllib.parse import quote

from atlassian.errors import ApiError  # type: ignore
from requests.exceptions import HTTPError
from typing_extensions import override

from onyx.access.models import ExternalAccess
from onyx.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP
from onyx.configs.app_configs import CONFLUENCE_TIMEZONE_OFFSET
from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.access import get_all_space_permissions
from onyx.connectors.confluence.access import get_page_restrictions
from onyx.connectors.confluence.onyx_confluence import extract_text_from_confluence_html
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.connectors.confluence.utils import build_confluence_document_id
from onyx.connectors.confluence.utils import convert_attachment_to_content
from onyx.connectors.confluence.utils import datetime_from_string
from onyx.connectors.confluence.utils import update_param_in_path
from onyx.connectors.confluence.utils import validate_attachment_filetype
from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    is_atlassian_date_error,
)
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import ConnectorCheckpoint
from onyx.connectors.interfaces import ConnectorFailure
from onyx.connectors.interfaces import CredentialsConnector
from onyx.connectors.interfaces import CredentialsProviderInterface
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.db.enums import HierarchyNodeType
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()
# Potential Improvements
# 1. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost
_COMMENT_EXPANSION_FIELDS = ["body.storage.value"]
_PAGE_EXPANSION_FIELDS = [
    "body.storage.value",
    "version",
    "space",
    "metadata.labels",
    "history.lastUpdated",
    "ancestors",  # For hierarchy node tracking
]
_ATTACHMENT_EXPANSION_FIELDS = [
    "version",
    "space",
    "metadata.labels",
]
_RESTRICTIONS_EXPANSION_FIELDS = [
    "space",
    "restrictions.read.restrictions.user",
    "restrictions.read.restrictions.group",
    "ancestors.restrictions.read.restrictions.user",
    "ancestors.restrictions.read.restrictions.group",
]

_SLIM_DOC_BATCH_SIZE = 5000

ONE_HOUR = 3600
ONE_DAY = ONE_HOUR * 24

MAX_CACHED_IDS = 100


def _get_page_id(page: dict[str, Any], allow_missing: bool = False) -> str:
    if allow_missing and "id" not in page:
        return "unknown"
    return str(page["id"])


class ConfluenceCheckpoint(ConnectorCheckpoint):
    next_page_url: str | None


class ConfluenceConnector(
    CheckpointedConnector[ConfluenceCheckpoint],
    SlimConnector,
    SlimConnectorWithPermSync,
    CredentialsConnector,
):
    def __init__(
        self,
        wiki_base: str,
        is_cloud: bool,
        space: str = "",
        page_id: str = "",
        index_recursively: bool = False,
        cql_query: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
        continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,
        # if a page has one of the labels specified in this list, we will just
        # skip it. This is generally used to avoid indexing extra sensitive
        # pages.
        labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
        timezone_offset: float = CONFLUENCE_TIMEZONE_OFFSET,
        scoped_token: bool = False,
    ) -> None:
        self.wiki_base = wiki_base
        self.is_cloud = is_cloud
        self.space = space
        self.page_id = page_id
        self.index_recursively = index_recursively
        self.cql_query = cql_query
        self.batch_size = batch_size
        self.labels_to_skip = labels_to_skip
        self.timezone_offset = timezone_offset
        self.scoped_token = scoped_token
        self._confluence_client: OnyxConfluence | None = None
        self._low_timeout_confluence_client: OnyxConfluence | None = None
        self._fetched_titles: set[str] = set()
        self.allow_images = False

        # Track hierarchy nodes we've already yielded to avoid duplicates
        self.seen_hierarchy_node_raw_ids: set[str] = set()

        # Remove trailing slash from wiki_base if present
        self.wiki_base = wiki_base.rstrip("/")
        """
        If nothing is provided, we default to fetching all pages
        Only one or none of the following options should be specified so
            the order shouldn't matter
        However, we use elif to ensure that only of the following is enforced
        """
        base_cql_page_query = "type=page"
        if cql_query:
            base_cql_page_query = cql_query
        elif page_id:
            if index_recursively:
                base_cql_page_query += f" and (ancestor='{page_id}' or id='{page_id}')"
            else:
                base_cql_page_query += f" and id='{page_id}'"
        elif space:
            uri_safe_space = quote(space)
            base_cql_page_query += f" and space='{uri_safe_space}'"

        self.base_cql_page_query = base_cql_page_query

        self.cql_label_filter = ""
        if labels_to_skip:
            labels_to_skip = list(set(labels_to_skip))
            comma_separated_labels = ",".join(
                f"'{quote(label)}'" for label in labels_to_skip
            )
            self.cql_label_filter = f" and label not in ({comma_separated_labels})"

        self.timezone: timezone = timezone(offset=timedelta(hours=timezone_offset))
        self.credentials_provider: CredentialsProviderInterface | None = None

        self.probe_kwargs = {
            "max_backoff_retries": 6,
            "max_backoff_seconds": 10,
        }

        self.final_kwargs = {
            "max_backoff_retries": 10,
            "max_backoff_seconds": 60,
        }

        # deprecated
        self.continue_on_failure = continue_on_failure

    def set_allow_images(self, value: bool) -> None:
        logger.info(f"Setting allow_images to {value}.")
        self.allow_images = value

    def _yield_space_hierarchy_nodes(
        self,
    ) -> Generator[HierarchyNode, None, None]:
        """Yield hierarchy nodes for all spaces we're indexing."""
        space_keys = [self.space] if self.space else None

        for space in self.confluence_client.retrieve_confluence_spaces(
            space_keys=space_keys,
            limit=50,
        ):
            space_key = space.get("key")
            if not space_key or space_key in self.seen_hierarchy_node_raw_ids:
                continue

            self.seen_hierarchy_node_raw_ids.add(space_key)

            # Build space link
            space_link = f"{self.wiki_base}/spaces/{space_key}"

            yield HierarchyNode(
                raw_node_id=space_key,
                raw_parent_id=None,  # Parent is SOURCE
                display_name=space.get("name", space_key),
                link=space_link,
                node_type=HierarchyNodeType.SPACE,
            )

    def _yield_ancestor_hierarchy_nodes(
        self,
        page: dict[str, Any],
    ) -> Generator[HierarchyNode, None, None]:
        """Yield hierarchy nodes for all unseen ancestors of this page.

        Any page that appears as an ancestor of another page IS a hierarchy node
        (it has at least one child - the page we're currently processing).

        This ensures parent nodes are always yielded before child documents.

        Note: raw_node_id for page hierarchy nodes uses the page URL (same as document.id)
        to enable document<->hierarchy node linking in the indexing pipeline.
        Space hierarchy nodes use the space key since they don't have documents.
        """
        ancestors = page.get("ancestors", [])
        space_key = page.get("space", {}).get("key")

        # Ensure space is yielded first (if not already)
        if space_key and space_key not in self.seen_hierarchy_node_raw_ids:
            self.seen_hierarchy_node_raw_ids.add(space_key)
            space = page.get("space", {})
            yield HierarchyNode(
                raw_node_id=space_key,
                raw_parent_id=None,  # Parent is SOURCE
                display_name=space.get("name", space_key),
                link=f"{self.wiki_base}/spaces/{space_key}",
                node_type=HierarchyNodeType.SPACE,
            )

        # Walk through ancestors (root to immediate parent)
        # Build a list of (ancestor_url, ancestor_data) pairs first
        ancestor_urls: list[str | None] = []
        for ancestor in ancestors:
            if "_links" in ancestor and "webui" in ancestor["_links"]:
                ancestor_urls.append(
                    build_confluence_document_id(
                        self.wiki_base, ancestor["_links"]["webui"], self.is_cloud
                    )
                )
            else:
                ancestor_urls.append(None)

        for i, ancestor in enumerate(ancestors):
            ancestor_url = ancestor_urls[i]
            if not ancestor_url:
                # Can't build URL for this ancestor, skip it
                continue

            if ancestor_url in self.seen_hierarchy_node_raw_ids:
                continue

            self.seen_hierarchy_node_raw_ids.add(ancestor_url)

            # Determine parent of this ancestor
            if i == 0:
                # First ancestor - parent is the space
                parent_raw_id = space_key
            else:
                # Parent is the previous ancestor (use URL)
                parent_raw_id = ancestor_urls[i - 1]

            yield HierarchyNode(
                raw_node_id=ancestor_url,  # Use URL to match document.id
                raw_parent_id=parent_raw_id,
                display_name=ancestor.get("title", f"Page {ancestor.get('id')}"),
                link=ancestor_url,
                node_type=HierarchyNodeType.PAGE,
            )

    def _get_parent_hierarchy_raw_id(self, page: dict[str, Any]) -> str | None:
        """Get the raw hierarchy node ID of this page's parent.

        Returns:
            - Parent page URL if page has a parent page (last item in ancestors)
            - Space key if page is at top level of space
            - None if we can't determine

        Note: For pages, we return URLs (to match document.id and hierarchy node raw_node_id).
        For spaces, we return the space key (spaces don't have documents).
        """
        ancestors = page.get("ancestors", [])
        if ancestors:
            # Last ancestor is the immediate parent page - use URL
            parent = ancestors[-1]
            if "_links" in parent and "webui" in parent["_links"]:
                return build_confluence_document_id(
                    self.wiki_base, parent["_links"]["webui"], self.is_cloud
                )
            # Fallback to page ID if URL not available (shouldn't happen normally)
            return str(parent.get("id"))

        # Top-level page - parent is the space (use space key)
        return page.get("space", {}).get("key")

    def _maybe_yield_page_hierarchy_node(
        self, page: dict[str, Any]
    ) -> HierarchyNode | None:
        """Yield a hierarchy node for this page if not already yielded.

        Used when a page has attachments - attachments are children of the page
        in the hierarchy, so the page must be a hierarchy node.

        Note: raw_node_id uses the page URL (same as document.id) to enable
        document<->hierarchy node linking in the indexing pipeline.
        """
        # Build page URL - we use this as raw_node_id to match document.id
        if "_links" not in page or "webui" not in page["_links"]:
            return None  # Can't build URL, skip

        page_url = build_confluence_document_id(
            self.wiki_base, page["_links"]["webui"], self.is_cloud
        )

        if page_url in self.seen_hierarchy_node_raw_ids:
            return None

        self.seen_hierarchy_node_raw_ids.add(page_url)

        # Get parent hierarchy ID
        parent_raw_id = self._get_parent_hierarchy_raw_id(page)

        return HierarchyNode(
            raw_node_id=page_url,  # Use URL to match document.id
            raw_parent_id=parent_raw_id,
            display_name=page.get("title", f"Page {_get_page_id(page)}"),
            link=page_url,
            node_type=HierarchyNodeType.PAGE,
        )

    @property
    def confluence_client(self) -> OnyxConfluence:
        if self._confluence_client is None:
            raise ConnectorMissingCredentialError("Confluence")
        return self._confluence_client

    @property
    def low_timeout_confluence_client(self) -> OnyxConfluence:
        if self._low_timeout_confluence_client is None:
            raise ConnectorMissingCredentialError("Confluence")
        return self._low_timeout_confluence_client

    def set_credentials_provider(
        self, credentials_provider: CredentialsProviderInterface
    ) -> None:
        self.credentials_provider = credentials_provider

        # raises exception if there's a problem
        confluence_client = OnyxConfluence(
            is_cloud=self.is_cloud,
            url=self.wiki_base,
            credentials_provider=credentials_provider,
            scoped_token=self.scoped_token,
        )
        confluence_client._probe_connection(**self.probe_kwargs)
        confluence_client._initialize_connection(**self.final_kwargs)

        self._confluence_client = confluence_client

        # create a low timeout confluence client for sync flows
        low_timeout_confluence_client = OnyxConfluence(
            is_cloud=self.is_cloud,
            url=self.wiki_base,
            credentials_provider=credentials_provider,
            timeout=3,
            scoped_token=self.scoped_token,
        )
        low_timeout_confluence_client._probe_connection(**self.probe_kwargs)
        low_timeout_confluence_client._initialize_connection(**self.final_kwargs)

        self._low_timeout_confluence_client = low_timeout_confluence_client

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        raise NotImplementedError("Use set_credentials_provider with this connector.")

    def _construct_page_cql_query(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> str:
        """
        Constructs a CQL query for use in the confluence API. See
        https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/
        for more information. This is JUST the CQL, not the full URL used to hit the API.
        Use _build_page_retrieval_url to get the full URL.
        """
        page_query = self.base_cql_page_query + self.cql_label_filter
        # Add time filters
        if start:
            formatted_start_time = datetime.fromtimestamp(
                start, tz=self.timezone
            ).strftime("%Y-%m-%d %H:%M")
            page_query += f" and lastmodified >= '{formatted_start_time}'"
        if end:
            formatted_end_time = datetime.fromtimestamp(end, tz=self.timezone).strftime(
                "%Y-%m-%d %H:%M"
            )
            page_query += f" and lastmodified <= '{formatted_end_time}'"

        page_query += " order by lastmodified asc"
        return page_query

    def _construct_attachment_query(
        self,
        confluence_page_id: str,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> str:
        attachment_query = f"type=attachment and container='{confluence_page_id}'"
        attachment_query += self.cql_label_filter
        # Add time filters to avoid reprocessing unchanged attachments during refresh
        if start:
            formatted_start_time = datetime.fromtimestamp(
                start, tz=self.timezone
            ).strftime("%Y-%m-%d %H:%M")
            attachment_query += f" and lastmodified >= '{formatted_start_time}'"
        if end:
            formatted_end_time = datetime.fromtimestamp(end, tz=self.timezone).strftime(
                "%Y-%m-%d %H:%M"
            )
            attachment_query += f" and lastmodified <= '{formatted_end_time}'"
        attachment_query += " order by lastmodified asc"
        return attachment_query

    def _get_comment_string_for_page_id(self, page_id: str) -> str:
        comment_string = ""
        comment_cql = f"type=comment and container='{page_id}'"
        comment_cql += self.cql_label_filter
        expand = ",".join(_COMMENT_EXPANSION_FIELDS)

        for comment in self.confluence_client.paginated_cql_retrieval(
            cql=comment_cql,
            expand=expand,
        ):
            comment_string += "\nComment:\n"
            comment_string += extract_text_from_confluence_html(
                confluence_client=self.confluence_client,
                confluence_object=comment,
                fetched_titles=set(),
            )
        return comment_string

    def _convert_page_to_document(
        self, page: dict[str, Any]
    ) -> Document | ConnectorFailure:
        """
        Converts a Confluence page to a Document object.
        Includes the page content, comments, and attachments.
        """
        page_id = page_url = ""
        try:
            # Extract basic page information
            page_id = _get_page_id(page)
            page_title = page["title"]
            logger.info(f"Converting page {page_title} to document")
            page_url = build_confluence_document_id(
                self.wiki_base, page["_links"]["webui"], self.is_cloud
            )

            # Get the page content
            page_content = extract_text_from_confluence_html(
                self.confluence_client, page, self._fetched_titles
            )

            # Create the main section for the page content
            sections: list[TextSection | ImageSection] = [
                TextSection(text=page_content, link=page_url)
            ]

            # Process comments if available
            comment_text = self._get_comment_string_for_page_id(page_id)
            if comment_text:
                sections.append(
                    TextSection(text=comment_text, link=f"{page_url}#comments")
                )
            # Note: attachments are no longer merged into the page document.
            # They are indexed as separate documents downstream.

            # Extract metadata
            metadata = {}
            if "space" in page:
                metadata["space"] = page["space"].get("name", "")

            # Extract labels
            labels = []
            if "metadata" in page and "labels" in page["metadata"]:
                for label in page["metadata"]["labels"].get("results", []):
                    labels.append(label.get("name", ""))
            if labels:
                metadata["labels"] = labels

            # Extract owners
            primary_owners = []
            if "version" in page and "by" in page["version"]:
                author = page["version"]["by"]
                display_name = author.get("displayName", "Unknown")
                email = author.get("email", "unknown@domain.invalid")
                primary_owners.append(
                    BasicExpertInfo(display_name=display_name, email=email)
                )

            # Determine parent hierarchy node
            parent_hierarchy_raw_node_id = self._get_parent_hierarchy_raw_id(page)

            # Create the document
            return Document(
                id=page_url,
                sections=sections,
                source=DocumentSource.CONFLUENCE,
                semantic_identifier=page_title,
                metadata=metadata,
                doc_updated_at=datetime_from_string(page["version"]["when"]),
                primary_owners=primary_owners if primary_owners else None,
                parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
            )
        except Exception as e:
            logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}")
            if is_atlassian_date_error(e):  # propagate error to be caught and retried
                raise
            return ConnectorFailure(
                failed_document=DocumentFailure(
                    document_id=page_id,
                    document_link=page_url,
                ),
                failure_message=f"Error converting page {page.get('id', 'unknown')}: {e}",
                exception=e,
            )

    def _fetch_page_attachments(
        self,
        page: dict[str, Any],
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> tuple[list[Document | HierarchyNode], list[ConnectorFailure]]:
        """
        Inline attachments are added directly to the document as text or image sections by
        this function. The returned documents/connectorfailures are for non-inline attachments
        and those at the end of the page.

        If there are valid attachments, the page itself is yielded as a hierarchy node
        (since attachments are children of the page in the hierarchy).
        """
        attachment_query = self._construct_attachment_query(
            _get_page_id(page), start, end
        )
        attachment_failures: list[ConnectorFailure] = []
        attachment_docs: list[Document | HierarchyNode] = []
        page_url = ""
        page_hierarchy_node_yielded = False

        try:
            for attachment in self.confluence_client.paginated_cql_retrieval(
                cql=attachment_query,
                expand=",".join(_ATTACHMENT_EXPANSION_FIELDS),
            ):
                media_type: str = attachment.get("metadata", {}).get("mediaType", "")

                # TODO(rkuo): this check is partially redundant with validate_attachment_filetype
                # and checks in convert_attachment_to_content/process_attachment
                # but doing the check here avoids an unnecessary download. Due for refactoring.
                if not self.allow_images:
                    if media_type.startswith("image/"):
                        logger.info(
                            f"Skipping attachment because allow images is False: {attachment['title']}"
                        )
                        continue

                if not validate_attachment_filetype(
                    attachment,
                ):
                    logger.info(
                        f"Skipping attachment because it is not an accepted file type: {attachment['title']}"
                    )
                    continue

                logger.info(
                    f"Processing attachment: {attachment['title']} attached to page {page['title']}"
                )
                # Attachment document id: use the download URL for stable identity
                try:
                    object_url = build_confluence_document_id(
                        self.wiki_base, attachment["_links"]["download"], self.is_cloud
                    )
                except Exception as e:
                    logger.warning(
                        f"Invalid attachment url for id {attachment['id']}, skipping"
                    )
                    logger.debug(f"Error building attachment url: {e}")
                    continue
                try:
                    response = convert_attachment_to_content(
                        confluence_client=self.confluence_client,
                        attachment=attachment,
                        page_id=_get_page_id(page),
                        allow_images=self.allow_images,
                    )
                    if response is None:
                        continue

                    content_text, file_storage_name = response

                    sections: list[TextSection | ImageSection] = []
                    if content_text:
                        sections.append(TextSection(text=content_text, link=object_url))
                    elif file_storage_name:
                        sections.append(
                            ImageSection(
                                link=object_url, image_file_id=file_storage_name
                            )
                        )

                    # Build attachment-specific metadata
                    attachment_metadata: dict[str, str | list[str]] = {}
                    if "space" in attachment:
                        attachment_metadata["space"] = attachment["space"].get(
                            "name", ""
                        )
                    labels: list[str] = []
                    if "metadata" in attachment and "labels" in attachment["metadata"]:
                        for label in attachment["metadata"]["labels"].get(
                            "results", []
                        ):
                            labels.append(label.get("name", ""))
                    if labels:
                        attachment_metadata["labels"] = labels
                    page_url = page_url or build_confluence_document_id(
                        self.wiki_base, page["_links"]["webui"], self.is_cloud
                    )
                    attachment_metadata["parent_page_id"] = page_url
                    attachment_id = build_confluence_document_id(
                        self.wiki_base, attachment["_links"]["webui"], self.is_cloud
                    )

                    primary_owners: list[BasicExpertInfo] | None = None
                    if "version" in attachment and "by" in attachment["version"]:
                        author = attachment["version"]["by"]
                        display_name = author.get("displayName", "Unknown")
                        email = author.get("email", "unknown@domain.invalid")
                        primary_owners = [
                            BasicExpertInfo(display_name=display_name, email=email)
                        ]

                    # Attachments have their parent page as the hierarchy parent
                    # Use page URL to match the hierarchy node's raw_node_id
                    attachment_parent_hierarchy_raw_id = page_url

                    attachment_doc = Document(
                        id=attachment_id,
                        sections=sections,
                        source=DocumentSource.CONFLUENCE,
                        semantic_identifier=attachment.get("title", object_url),
                        metadata=attachment_metadata,
                        doc_updated_at=(
                            datetime_from_string(attachment["version"]["when"])
                            if attachment.get("version")
                            and attachment["version"].get("when")
                            else None
                        ),
                        primary_owners=primary_owners,
                        parent_hierarchy_raw_node_id=attachment_parent_hierarchy_raw_id,
                    )

                    # If this is the first valid attachment, yield the page as a
                    # hierarchy node (attachments are children of the page)
                    if not page_hierarchy_node_yielded:
                        page_hierarchy_node = self._maybe_yield_page_hierarchy_node(
                            page
                        )
                        if page_hierarchy_node:
                            attachment_docs.append(page_hierarchy_node)
                        page_hierarchy_node_yielded = True

                    attachment_docs.append(attachment_doc)
                except Exception as e:
                    logger.error(
                        f"Failed to extract/summarize attachment {attachment['title']}",
                        exc_info=e,
                    )
                    if is_atlassian_date_error(e):
                        # propagate error to be caught and retried
                        raise
                    attachment_failures.append(
                        ConnectorFailure(
                            failed_document=DocumentFailure(
                                document_id=object_url,
                                document_link=object_url,
                            ),
                            failure_message=f"Failed to extract/summarize attachment {attachment['title']} for doc {object_url}",
                            exception=e,
                        )
                    )
        except HTTPError as e:
            # If we get a 403 after all retries, the user likely doesn't have permission
            # to access attachments on this page. Log and skip rather than failing the whole job.
            page_id = _get_page_id(page, allow_missing=True)
            page_title = page.get("title", "unknown")
            if e.response and e.response.status_code in [401, 403]:
                failure_message_prefix = (
                    "Invalid credentials (401)"
                    if e.response.status_code == 401
                    else "Permission denied (403)"
                )
                failure_message = (
                    f"{failure_message_prefix} when fetching attachments for page '{page_title}' "
                    f"(ID: {page_id}). The user may not have permission to query attachments on this page. "
                    "Skipping attachments for this page."
                )
                logger.warning(failure_message)

                # Build the page URL for the failure record
                try:
                    page_url = build_confluence_document_id(
                        self.wiki_base, page["_links"]["webui"], self.is_cloud
                    )
                except Exception:
                    page_url = f"page_id:{page_id}"

                return [], [
                    ConnectorFailure(
                        failed_document=DocumentFailure(
                            document_id=page_id,
                            document_link=page_url,
                        ),
                        failure_message=failure_message,
                        exception=e,
                    )
                ]
            else:
                raise

        return attachment_docs, attachment_failures

    def _fetch_document_batches(
        self,
        checkpoint: ConfluenceCheckpoint,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> CheckpointOutput[ConfluenceCheckpoint]:
        """
        Yields batches of Documents and HierarchyNodes. For each page:
         - Yield hierarchy nodes for spaces and ancestor pages (parent-before-child ordering)
         - Create a Document with 1 Section for the page text/comments
         - Then fetch attachments. For each attachment:
             - Attempt to convert it with convert_attachment_to_content(...)
             - If successful, create a new Section with the extracted text or summary.
        """
        checkpoint = copy.deepcopy(checkpoint)

        # Yield space hierarchy nodes FIRST (only once per connector run)
        if not checkpoint.next_page_url:
            yield from self._yield_space_hierarchy_nodes()

        # use "start" when last_updated is 0 or for confluence server
        start_ts = start
        page_query_url = checkpoint.next_page_url or self._build_page_retrieval_url(
            start_ts, end, self.batch_size
        )
        logger.debug(f"page_query_url: {page_query_url}")

        # store the next page start for confluence server, cursor for confluence cloud
        def store_next_page_url(next_page_url: str) -> None:
            checkpoint.next_page_url = next_page_url

        for page in self.confluence_client.paginated_page_retrieval(
            cql_url=page_query_url,
            limit=self.batch_size,
            next_page_callback=store_next_page_url,
        ):
            # Yield hierarchy nodes for all ancestors (parent-before-child ordering)
            yield from self._yield_ancestor_hierarchy_nodes(page)

            # Build doc from page
            doc_or_failure = self._convert_page_to_document(page)

            if isinstance(doc_or_failure, ConnectorFailure):
                yield doc_or_failure
                continue

            # yield completed document (or failure)
            yield doc_or_failure

            # Now get attachments for that page:
            attachment_docs, attachment_failures = self._fetch_page_attachments(
                page, start, end
            )
            # yield attached docs and failures
            yield from attachment_docs
            yield from attachment_failures

            # Create checkpoint once a full page of results is returned
            if checkpoint.next_page_url and checkpoint.next_page_url != page_query_url:
                return checkpoint

        checkpoint.has_more = False
        return checkpoint

    def _build_page_retrieval_url(
        self,
        start: SecondsSinceUnixEpoch | None,
        end: SecondsSinceUnixEpoch | None,
        limit: int,
    ) -> str:
        """
        Builds the full URL used to retrieve pages from the confluence API.
        This can be used as input to the confluence client's _paginate_url
        or paginated_page_retrieval methods.
        """
        page_query = self._construct_page_cql_query(start, end)
        cql_url = self.confluence_client.build_cql_url(
            page_query, expand=",".join(_PAGE_EXPANSION_FIELDS)
        )
        return update_param_in_path(cql_url, "limit", str(limit))

    @override
    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: ConfluenceCheckpoint,
    ) -> CheckpointOutput[ConfluenceCheckpoint]:
        end += ONE_DAY  # handle time zone weirdness
        try:
            return self._fetch_document_batches(checkpoint, start, end)
        except Exception as e:
            if is_atlassian_date_error(e) and start is not None:
                logger.warning(
                    "Confluence says we provided an invalid 'updated' field. This may indicate"
                    "a real issue, but can also appear during edge cases like daylight"
                    f"savings time changes. Retrying with a 1 hour offset. Error: {e}"
                )
                return self._fetch_document_batches(checkpoint, start - ONE_HOUR, end)
            raise

    @override
    def build_dummy_checkpoint(self) -> ConfluenceCheckpoint:
        return ConfluenceCheckpoint(has_more=True, next_page_url=None)

    @override
    def validate_checkpoint_json(self, checkpoint_json: str) -> ConfluenceCheckpoint:
        return ConfluenceCheckpoint.model_validate_json(checkpoint_json)

    @override
    def retrieve_all_slim_docs(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        return self._retrieve_all_slim_docs(
            start=start,
            end=end,
            callback=callback,
            include_permissions=False,
        )

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        """
        Return 'slim' docs (IDs + minimal permission data).
        Does not fetch actual text. Used primarily for incremental permission sync.
        """
        return self._retrieve_all_slim_docs(
            start=start,
            end=end,
            callback=callback,
            include_permissions=True,
        )

    def _retrieve_all_slim_docs(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
        include_permissions: bool = True,
    ) -> GenerateSlimDocumentOutput:
        doc_metadata_list: list[SlimDocument | HierarchyNode] = []
        restrictions_expand = ",".join(_RESTRICTIONS_EXPANSION_FIELDS)

        space_level_access_info: dict[str, ExternalAccess] = {}
        if include_permissions:
            space_level_access_info = get_all_space_permissions(
                self.confluence_client, self.is_cloud
            )

        # Yield space hierarchy nodes first
        for node in self._yield_space_hierarchy_nodes():
            doc_metadata_list.append(node)

        def get_external_access(
            doc_id: str, restrictions: dict[str, Any], ancestors: list[dict[str, Any]]
        ) -> ExternalAccess | None:
            return get_page_restrictions(
                self.confluence_client, doc_id, restrictions, ancestors
            ) or space_level_access_info.get(page_space_key)

        # Query pages (with optional time filtering for indexing_start)
        page_query = self._construct_page_cql_query(start, end)
        for page in self.confluence_client.cql_paginate_all_expansions(
            cql=page_query,
            expand=restrictions_expand,
            limit=_SLIM_DOC_BATCH_SIZE,
        ):
            # Yield ancestor hierarchy nodes for this page
            for node in self._yield_ancestor_hierarchy_nodes(page):
                doc_metadata_list.append(node)

            page_id = _get_page_id(page)
            page_restrictions = page.get("restrictions") or {}
            page_space_key = page.get("space", {}).get("key")
            page_ancestors = page.get("ancestors", [])

            page_id = build_confluence_document_id(
                self.wiki_base, page["_links"]["webui"], self.is_cloud
            )
            doc_metadata_list.append(
                SlimDocument(
                    id=page_id,
                    external_access=(
                        get_external_access(page_id, page_restrictions, page_ancestors)
                        if include_permissions
                        else None
                    ),
                    parent_hierarchy_raw_node_id=self._get_parent_hierarchy_raw_id(
                        page
                    ),
                )
            )

            # Query attachments for each page
            page_hierarchy_node_yielded = False
            attachment_query = self._construct_attachment_query(
                _get_page_id(page), start, end
            )
            for attachment in self.confluence_client.cql_paginate_all_expansions(
                cql=attachment_query,
                expand=restrictions_expand,
                limit=_SLIM_DOC_BATCH_SIZE,
            ):
                # If you skip images, you'll skip them in the permission sync
                attachment["metadata"].get("mediaType", "")
                if not validate_attachment_filetype(
                    attachment,
                ):
                    continue

                # If this page has valid attachments and we haven't yielded it as a
                # hierarchy node yet, do so now (attachments are children of the page)
                if not page_hierarchy_node_yielded:
                    page_node = self._maybe_yield_page_hierarchy_node(page)
                    if page_node:
                        doc_metadata_list.append(page_node)
                    page_hierarchy_node_yielded = True

                attachment_restrictions = attachment.get("restrictions", {})
                if not attachment_restrictions:
                    attachment_restrictions = page_restrictions or {}

                attachment_space_key = attachment.get("space", {}).get("key")
                if not attachment_space_key:
                    attachment_space_key = page_space_key

                attachment_id = build_confluence_document_id(
                    self.wiki_base,
                    attachment["_links"]["webui"],
                    self.is_cloud,
                )
                doc_metadata_list.append(
                    SlimDocument(
                        id=attachment_id,
                        external_access=(
                            get_external_access(
                                attachment_id, attachment_restrictions, []
                            )
                            if include_permissions
                            else None
                        ),
                        parent_hierarchy_raw_node_id=page_id,
                    )
                )

            if len(doc_metadata_list) > _SLIM_DOC_BATCH_SIZE:
                yield doc_metadata_list[:_SLIM_DOC_BATCH_SIZE]
                doc_metadata_list = doc_metadata_list[_SLIM_DOC_BATCH_SIZE:]

                if callback and callback.should_stop():
                    raise RuntimeError(
                        "retrieve_all_slim_docs_perm_sync: Stop signal detected"
                    )
                if callback:
                    callback.progress("retrieve_all_slim_docs_perm_sync", 1)

        yield doc_metadata_list

    def validate_connector_settings(self) -> None:
        try:
            spaces_iter = self.low_timeout_confluence_client.retrieve_confluence_spaces(
                limit=1,
            )
            first_space = next(spaces_iter, None)
        except HTTPError as e:
            status_code = e.response.status_code if e.response else None
            if status_code == 401:
                raise CredentialExpiredError(
                    "Invalid or expired Confluence credentials (HTTP 401)."
                )
            elif status_code == 403:
                raise InsufficientPermissionsError(
                    "Insufficient permissions to access Confluence resources (HTTP 403)."
                )
            raise UnexpectedValidationError(
                f"Unexpected Confluence error (status={status_code}): {e}"
            )
        except Exception as e:
            raise UnexpectedValidationError(
                f"Unexpected error while validating Confluence settings: {e}"
            )

        if not first_space:
            raise ConnectorValidationError(
                "No Confluence spaces found. Either your credentials lack permissions, or "
                "there truly are no spaces in this Confluence instance."
            )

        if self.space:
            try:
                self.low_timeout_confluence_client.get_space(self.space)
            except ApiError as e:
                raise ConnectorValidationError(
                    "Invalid Confluence space key provided"
                ) from e


if __name__ == "__main__":
    import os
    from onyx.utils.variable_functionality import global_version
    from tests.daily.connectors.utils import load_all_from_connector

    # For connector permission testing, set EE to true.
    global_version.set_ee()

    # base url
    wiki_base = os.environ["CONFLUENCE_URL"]

    # auth stuff
    username = os.environ["CONFLUENCE_USERNAME"]
    access_token = os.environ["CONFLUENCE_ACCESS_TOKEN"]
    is_cloud = os.environ["CONFLUENCE_IS_CLOUD"].lower() == "true"

    # space + page
    space = os.environ["CONFLUENCE_SPACE_KEY"]
    # page_id = os.environ["CONFLUENCE_PAGE_ID"]

    confluence_connector = ConfluenceConnector(
        wiki_base=wiki_base,
        space=space,
        is_cloud=is_cloud,
        # page_id=page_id,
    )

    credentials_provider = OnyxStaticCredentialsProvider(
        None,
        DocumentSource.CONFLUENCE,
        {
            "confluence_username": username,
            "confluence_access_token": access_token,
        },
    )
    confluence_connector.set_credentials_provider(credentials_provider)

    start = 0.0
    end = datetime.now().timestamp()

    # Fetch all `SlimDocuments`.
    for slim_doc in confluence_connector.retrieve_all_slim_docs_perm_sync():
        print(slim_doc)

    # Fetch all `Documents`.
    for doc in load_all_from_connector(
        connector=confluence_connector,
        start=start,
        end=end,
    ).documents:
        print(doc)


================================================
FILE: backend/onyx/connectors/confluence/models.py
================================================
from pydantic import BaseModel


class ConfluenceUser(BaseModel):
    user_id: str  # accountId in Cloud, userKey in Server
    username: str | None  # Confluence Cloud doesn't give usernames
    display_name: str
    # Confluence Data Center doesn't give email back by default,
    # have to fetch it with a different endpoint
    email: str | None
    type: str


================================================
FILE: backend/onyx/connectors/confluence/onyx_confluence.py
================================================
"""
# README (notes on Confluence pagination):

We've noticed that the `search/users` and `users/memberof` endpoints for Confluence Cloud use offset-based pagination as
opposed to cursor-based. We also know that page-retrieval uses cursor-based pagination.

Our default pagination strategy right now for cloud is to assume cursor-based.
However, if you notice that a cloud API is not being properly paginated (i.e., if the `_links.next` is not appearing in the
returned payload), then you can force offset-based pagination.

# TODO (@raunakab)
We haven't explored all of the cloud APIs' pagination strategies. @raunakab take time to go through this and figure them out.
"""

import json
import time
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterator
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from typing import cast
from typing import TypeVar
from urllib.parse import quote

import bs4
from atlassian import Confluence  # type:ignore
from redis import Redis
from requests import HTTPError

from onyx.configs.app_configs import CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE
from onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID
from onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET
from onyx.connectors.confluence.models import ConfluenceUser
from onyx.connectors.confluence.user_profile_override import (
    process_confluence_user_profiles_override,
)
from onyx.connectors.confluence.utils import _handle_http_error
from onyx.connectors.confluence.utils import confluence_refresh_tokens
from onyx.connectors.confluence.utils import get_start_param_from_url
from onyx.connectors.confluence.utils import update_param_in_path
from onyx.connectors.cross_connector_utils.miscellaneous_utils import scoped_url
from onyx.connectors.interfaces import CredentialsProviderInterface
from onyx.file_processing.html_utils import format_document_soup
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger

logger = setup_logger()


F = TypeVar("F", bound=Callable[..., Any])


# https://jira.atlassian.com/browse/CONFCLOUD-76433
_PROBLEMATIC_EXPANSIONS = "body.storage.value"
_REPLACEMENT_EXPANSIONS = "body.view.value"

_USER_NOT_FOUND = "Unknown Confluence User"
_USER_ID_TO_DISPLAY_NAME_CACHE: dict[str, str | None] = {}
_USER_EMAIL_CACHE: dict[str, str | None] = {}
_DEFAULT_PAGINATION_LIMIT = 1000

_CONFLUENCE_SPACES_API_V1 = "rest/api/space"
_CONFLUENCE_SPACES_API_V2 = "wiki/api/v2/spaces"


class ConfluenceRateLimitError(Exception):
    pass


class OnyxConfluence:
    """
    This is a custom Confluence class that:

    A. overrides the default Confluence class to add a custom CQL method.
    B.
    This is necessary because the default Confluence class does not properly support cql expansions.
    All methods are automatically wrapped with handle_confluence_rate_limit.
    """

    CREDENTIAL_PREFIX = "connector:confluence:credential"
    CREDENTIAL_TTL = 300  # 5 min
    PROBE_TIMEOUT = 5  # 5 seconds

    def __init__(
        self,
        is_cloud: bool,
        url: str,
        credentials_provider: CredentialsProviderInterface,
        timeout: int | None = None,
        scoped_token: bool = False,
        # should generally not be passed in, but making it overridable for
        # easier testing
        confluence_user_profiles_override: list[dict[str, str]] | None = (
            CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE
        ),
    ) -> None:
        self.base_url = url  #'/'.join(url.rstrip("/").split("/")[:-1])
        url = scoped_url(url, "confluence") if scoped_token else url

        self._is_cloud = is_cloud
        self._url = url.rstrip("/")
        self._credentials_provider = credentials_provider
        self.scoped_token = scoped_token
        self.redis_client: Redis | None = None
        self.static_credentials: dict[str, Any] | None = None
        if self._credentials_provider.is_dynamic():
            self.redis_client = get_redis_client(
                tenant_id=credentials_provider.get_tenant_id()
            )
        else:
            self.static_credentials = self._credentials_provider.get_credentials()

        self._confluence = Confluence(url)
        self.credential_key: str = (
            self.CREDENTIAL_PREFIX
            + f":credential_{self._credentials_provider.get_provider_key()}"
        )

        self._kwargs: Any = None

        self.shared_base_kwargs: dict[str, str | int | bool] = {
            "api_version": "cloud" if is_cloud else "latest",
            "backoff_and_retry": False,
            "cloud": is_cloud,
        }
        if timeout:
            self.shared_base_kwargs["timeout"] = timeout

        self._confluence_user_profiles_override = (
            process_confluence_user_profiles_override(confluence_user_profiles_override)
            if confluence_user_profiles_override
            else None
        )

    def _renew_credentials(self) -> tuple[dict[str, Any], bool]:
        """credential_json - the current json credentials
        Returns a tuple
        1. The up to date credentials
        2. True if the credentials were updated

        This method is intended to be used within a distributed lock.
        Lock, call this, update credentials if the tokens were refreshed, then release
        """
        # static credentials are preloaded, so no locking/redis required
        if self.static_credentials:
            return self.static_credentials, False

        if not self.redis_client:
            raise RuntimeError("self.redis_client is None")

        # dynamic credentials need locking
        # check redis first, then fallback to the DB
        credential_raw = self.redis_client.get(self.credential_key)
        if credential_raw is not None:
            credential_bytes = cast(bytes, credential_raw)
            credential_str = credential_bytes.decode("utf-8")
            credential_json: dict[str, Any] = json.loads(credential_str)
        else:
            credential_json = self._credentials_provider.get_credentials()

        if "confluence_refresh_token" not in credential_json:
            # static credentials ... cache them permanently and return
            self.static_credentials = credential_json
            return credential_json, False

        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_ID:
            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_ID must be set!")

        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET:
            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET must be set!")

        # check if we should refresh tokens. we're deciding to refresh halfway
        # to expiration
        now = datetime.now(timezone.utc)
        created_at = datetime.fromisoformat(credential_json["created_at"])
        expires_in: int = credential_json["expires_in"]
        renew_at = created_at + timedelta(seconds=expires_in // 2)
        if now <= renew_at:
            # cached/current credentials are reasonably up to date
            return credential_json, False

        # we need to refresh
        logger.info("Renewing Confluence Cloud credentials...")
        new_credentials = confluence_refresh_tokens(
            OAUTH_CONFLUENCE_CLOUD_CLIENT_ID,
            OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET,
            credential_json["cloud_id"],
            credential_json["confluence_refresh_token"],
        )

        # store the new credentials to redis and to the db thru the provider
        # redis: we use a 5 min TTL because we are given a 10 minute grace period
        # when keys are rotated. it's easier to expire the cached credentials
        # reasonably frequently rather than trying to handle strong synchronization
        # between the db and redis everywhere the credentials might be updated
        new_credential_str = json.dumps(new_credentials)
        self.redis_client.set(
            self.credential_key, new_credential_str, nx=True, ex=self.CREDENTIAL_TTL
        )
        self._credentials_provider.set_credentials(new_credentials)

        return new_credentials, True

    @staticmethod
    def _make_oauth2_dict(credentials: dict[str, Any]) -> dict[str, Any]:
        oauth2_dict: dict[str, Any] = {}
        if "confluence_refresh_token" in credentials:
            oauth2_dict["client_id"] = OAUTH_CONFLUENCE_CLOUD_CLIENT_ID
            oauth2_dict["token"] = {}
            oauth2_dict["token"]["access_token"] = credentials[
                "confluence_access_token"
            ]
        return oauth2_dict

    def _build_spaces_url(
        self,
        is_v2: bool,
        base_url: str,
        limit: int,
        space_keys: list[str] | None,
        start: int | None = None,
    ) -> str:
        """Build URL for Confluence spaces API with query parameters."""
        key_param = "keys" if is_v2 else "spaceKey"

        params = [f"limit={limit}"]
        if space_keys:
            params.append(f"{key_param}={','.join(space_keys)}")
        if start is not None and not is_v2:
            params.append(f"start={start}")

        return f"{base_url}?{'&'.join(params)}"

    def _paginate_spaces_for_endpoint(
        self,
        is_v2: bool,
        base_url: str,
        limit: int,
        space_keys: list[str] | None,
    ) -> Iterator[dict[str, Any]]:
        """Internal helper to paginate through spaces for a specific API endpoint."""
        start = 0
        url = self._build_spaces_url(
            is_v2, base_url, limit, space_keys, start if not is_v2 else None
        )

        while url:
            response = self.get(url, advanced_mode=True)
            response.raise_for_status()
            data = response.json()

            results = data.get("results", [])
            if not results:
                return

            yield from results

            if is_v2:
                url = data.get("_links", {}).get("next", "")
            else:
                if len(results) < limit:
                    return
                start += len(results)
                url = self._build_spaces_url(is_v2, base_url, limit, space_keys, start)

    def retrieve_confluence_spaces(
        self,
        space_keys: list[str] | None = None,
        limit: int = 50,
    ) -> Iterator[dict[str, str]]:
        """
        Retrieve spaces from Confluence using v2 API (Cloud) or v1 API (Server/fallback).

        Args:
            space_keys: Optional list of space keys to filter by
            limit: Results per page (default 50)

        Yields:
            Space dictionaries with keys: id, key, name, type, status, etc.

        Note:
            For Cloud instances, attempts v2 API first. If v2 returns 404,
            automatically falls back to v1 API for compatibility with older instances.
        """
        # Determine API version once
        use_v2 = self._is_cloud and not self.scoped_token
        base_url = _CONFLUENCE_SPACES_API_V2 if use_v2 else _CONFLUENCE_SPACES_API_V1

        try:
            yield from self._paginate_spaces_for_endpoint(
                use_v2, base_url, limit, space_keys
            )
        except HTTPError as e:
            if e.response.status_code == 404 and use_v2:
                logger.warning(
                    "v2 spaces API returned 404, falling back to v1 API. This may indicate an older Confluence Cloud instance."
                )
                # Fallback to v1
                yield from self._paginate_spaces_for_endpoint(
                    False, _CONFLUENCE_SPACES_API_V1, limit, space_keys
                )
            else:
                raise

    def _probe_connection(
        self,
        **kwargs: Any,
    ) -> None:
        merged_kwargs = {**self.shared_base_kwargs, **kwargs}
        # add special timeout to make sure that we don't hang indefinitely
        merged_kwargs["timeout"] = self.PROBE_TIMEOUT

        with self._credentials_provider:
            credentials, _ = self._renew_credentials()
            if self.scoped_token:
                # v2 endpoint doesn't always work with scoped tokens, use v1
                token = credentials["confluence_access_token"]
                probe_url = f"{self.base_url}/{_CONFLUENCE_SPACES_API_V1}?limit=1"
                import requests

                try:
                    r = requests.get(
                        probe_url,
                        headers={"Authorization": f"Bearer {token}"},
                        timeout=10,
                    )
                    r.raise_for_status()
                except HTTPError as e:
                    if e.response.status_code == 403:
                        logger.warning(
                            "scoped token authenticated but not valid for probe endpoint (spaces)"
                        )
                    else:
                        if "WWW-Authenticate" in e.response.headers:
                            logger.warning(
                                f"WWW-Authenticate: {e.response.headers['WWW-Authenticate']}"
                            )
                            logger.warning(f"Full error: {e.response.text}")
                        raise e
                return

        # Initialize connection with probe timeout settings
        self._confluence = self._initialize_connection_helper(
            credentials, **merged_kwargs
        )

        # Retrieve first space to validate connection
        spaces_iter = self.retrieve_confluence_spaces(limit=1)
        first_space = next(spaces_iter, None)

        if not first_space:
            raise RuntimeError(
                f"No spaces found at {self._url}! Check your credentials and wiki_base and make sure is_cloud is set correctly."
            )

        logger.info("Confluence probe succeeded.")

    def _initialize_connection(
        self,
        **kwargs: Any,
    ) -> None:
        """Called externally to init the connection in a thread safe manner."""
        merged_kwargs = {**self.shared_base_kwargs, **kwargs}
        with self._credentials_provider:
            credentials, _ = self._renew_credentials()
            self._confluence = self._initialize_connection_helper(
                credentials, **merged_kwargs
            )
            self._kwargs = merged_kwargs

    def _initialize_connection_helper(
        self,
        credentials: dict[str, Any],
        **kwargs: Any,
    ) -> Confluence:
        """Called internally to init the connection. Distributed locking
        to prevent multiple threads from modifying the credentials
        must be handled around this function."""

        confluence = None

        # probe connection with direct client, no retries
        if "confluence_refresh_token" in credentials:
            logger.info("Connecting to Confluence Cloud with OAuth Access Token.")

            oauth2_dict: dict[str, Any] = OnyxConfluence._make_oauth2_dict(credentials)
            url = f"https://api.atlassian.com/ex/confluence/{credentials['cloud_id']}"
            confluence = Confluence(url=url, oauth2=oauth2_dict, **kwargs)
        else:
            logger.info(
                f"Connecting to Confluence with Personal Access Token as user: {credentials['confluence_username']}"
            )
            if self._is_cloud:
                confluence = Confluence(
                    url=self._url,
                    username=credentials["confluence_username"],
                    password=credentials["confluence_access_token"],
                    **kwargs,
                )
            else:
                confluence = Confluence(
                    url=self._url,
                    token=credentials["confluence_access_token"],
                    **kwargs,
                )

        return confluence

    # https://developer.atlassian.com/cloud/confluence/rate-limiting/
    # This uses the native rate limiting option provided by the
    # confluence client and otherwise applies a simpler set of error handling.
    def _make_rate_limited_confluence_method(
        self, name: str, credential_provider: CredentialsProviderInterface | None
    ) -> Callable[..., Any]:
        def wrapped_call(*args: list[Any], **kwargs: Any) -> Any:
            MAX_RETRIES = 5

            TIMEOUT = 600
            timeout_at = time.monotonic() + TIMEOUT

            for attempt in range(MAX_RETRIES):
                if time.monotonic() > timeout_at:
                    raise TimeoutError(
                        f"Confluence call attempts took longer than {TIMEOUT} seconds."
                    )

                # we're relying more on the client to rate limit itself
                # and applying our own retries in a more specific set of circumstances
                try:
                    if credential_provider:
                        with credential_provider:
                            credentials, renewed = self._renew_credentials()
                            if renewed:
                                self._confluence = self._initialize_connection_helper(
                                    credentials, **self._kwargs
                                )
                            attr = getattr(self._confluence, name, None)
                            if attr is None:
                                # The underlying Confluence client doesn't have this attribute
                                raise AttributeError(
                                    f"'{type(self).__name__}' object has no attribute '{name}'"
                                )

                            return attr(*args, **kwargs)
                    else:
                        attr = getattr(self._confluence, name, None)
                        if attr is None:
                            # The underlying Confluence client doesn't have this attribute
                            raise AttributeError(
                                f"'{type(self).__name__}' object has no attribute '{name}'"
                            )

                        return attr(*args, **kwargs)

                except HTTPError as e:
                    delay_until = _handle_http_error(e, attempt, MAX_RETRIES)
                    logger.warning(
                        f"HTTPError in confluence call. Retrying in {delay_until} seconds..."
                    )
                    while time.monotonic() < delay_until:
                        # in the future, check a signal here to exit
                        time.sleep(1)
                except AttributeError as e:
                    # Some error within the Confluence library, unclear why it fails.
                    # Users reported it to be intermittent, so just retry
                    if attempt == MAX_RETRIES - 1:
                        raise e

                    logger.exception(
                        "Confluence Client raised an AttributeError. Retrying..."
                    )
                    time.sleep(5)

        return wrapped_call

    def __getattr__(self, name: str) -> Any:
        """Dynamically intercept attribute/method access."""
        attr = getattr(self._confluence, name, None)
        if attr is None:
            # The underlying Confluence client doesn't have this attribute
            raise AttributeError(
                f"'{type(self).__name__}' object has no attribute '{name}'"
            )

        # If it's not a method, just return it after ensuring token validity
        if not callable(attr):
            return attr

        # skip methods that start with "_"
        if name.startswith("_"):
            return attr

        # wrap the method with our retry handler
        rate_limited_method: Callable[..., Any] = (
            self._make_rate_limited_confluence_method(name, self._credentials_provider)
        )

        return rate_limited_method

    def _try_one_by_one_for_paginated_url(
        self,
        url_suffix: str,
        initial_start: int,
        limit: int,
    ) -> Generator[dict[str, Any], None, str | None]:
        """
        Go through `limit` items, starting at `initial_start` one by one (e.g. using
        `limit=1` for each call).

        If we encounter an error, we skip the item and try the next one. We will return
        the items we were able to retrieve successfully.

        Returns the expected next url_suffix. Returns None if it thinks we've hit the end.

        TODO (chris): make this yield failures as well as successes.
        TODO (chris): make this work for confluence cloud somehow.
        """
        if self._is_cloud:
            raise RuntimeError("This method is not implemented for Confluence Cloud.")

        found_empty_page = False
        temp_url_suffix = url_suffix

        for ind in range(limit):
            try:
                temp_url_suffix = update_param_in_path(
                    url_suffix, "start", str(initial_start + ind)
                )
                temp_url_suffix = update_param_in_path(temp_url_suffix, "limit", "1")
                logger.info(f"Making recovery confluence call to {temp_url_suffix}")
                raw_response = self.get(path=temp_url_suffix, advanced_mode=True)
                raw_response.raise_for_status()

                latest_results = raw_response.json().get("results", [])
                yield from latest_results

                if not latest_results:
                    # no more results, break out of the loop
                    logger.info(
                        f"No results found for call '{temp_url_suffix}'Stopping pagination."
                    )
                    found_empty_page = True
                    break
            except Exception:
                logger.exception(
                    f"Error in confluence call to {temp_url_suffix}. Continuing."
                )

        if found_empty_page:
            return None

        # if we got here, we successfully tried `limit` items
        return update_param_in_path(url_suffix, "start", str(initial_start + limit))

    def _paginate_url(
        self,
        url_suffix: str,
        limit: int | None = None,
        # Called with the next url to use to get the next page
        next_page_callback: Callable[[str], None] | None = None,
        force_offset_pagination: bool = False,
    ) -> Iterator[dict[str, Any]]:
        """
        This will paginate through the top level query.
        """
        if not limit:
            limit = _DEFAULT_PAGINATION_LIMIT

        url_suffix = update_param_in_path(url_suffix, "limit", str(limit))

        while url_suffix:
            logger.debug(f"Making confluence call to {url_suffix}")
            try:
                # Only pass params if they're not already in the URL to avoid duplicate
                # params accumulating. Confluence's _links.next already includes these.
                params = {}
                if "body-format=" not in url_suffix:
                    params["body-format"] = "atlas_doc_format"
                if "expand=" not in url_suffix:
                    params["expand"] = "body.atlas_doc_format"

                raw_response = self.get(
                    path=url_suffix,
                    advanced_mode=True,
                    params=params,
                )
            except Exception as e:
                logger.exception(f"Error in confluence call to {url_suffix}")
                raise e

            try:
                raw_response.raise_for_status()
            except Exception as e:
                logger.warning(f"Error in confluence call to {url_suffix}")

                # If the problematic expansion is in the url, replace it
                # with the replacement expansion and try again
                # If that fails, raise the error
                if _PROBLEMATIC_EXPANSIONS in url_suffix:
                    logger.warning(
                        f"Replacing {_PROBLEMATIC_EXPANSIONS} with {_REPLACEMENT_EXPANSIONS} and trying again."
                    )
                    url_suffix = url_suffix.replace(
                        _PROBLEMATIC_EXPANSIONS,
                        _REPLACEMENT_EXPANSIONS,
                    )
                    continue

                # If we fail due to a 500, try one by one.
                # NOTE: this iterative approach only works for server, since cloud uses cursor-based
                # pagination
                if raw_response.status_code == 500 and not self._is_cloud:
                    initial_start = get_start_param_from_url(url_suffix)
                    if initial_start is None:
                        # can't handle this if we don't have offset-based pagination
                        raise

                    # this will just yield the successful items from the batch
                    new_url_suffix = yield from self._try_one_by_one_for_paginated_url(
                        url_suffix,
                        initial_start=initial_start,
                        limit=limit,
                    )

                    # this means we ran into an empty page
                    if new_url_suffix is None:
                        if next_page_callback:
                            next_page_callback("")
                        break

                    url_suffix = new_url_suffix
                    continue

                else:
                    logger.exception(
                        f"Error in confluence call to {url_suffix} \n"
                        f"Raw Response Text: {raw_response.text} \n"
                        f"Full Response: {raw_response.__dict__} \n"
                        f"Error: {e} \n"
                    )
                    raise

            try:
                next_response = raw_response.json()
            except Exception as e:
                logger.exception(
                    f"Failed to parse response as JSON. Response: {raw_response.__dict__}"
                )
                raise e

            # Yield the results individually.
            results = cast(list[dict[str, Any]], next_response.get("results", []))

            # Note 1:
            # Make sure we don't update the start by more than the amount
            # of results we were able to retrieve. The Confluence API has a
            # weird behavior where if you pass in a limit that is too large for
            # the configured server, it will artificially limit the amount of
            # results returned BUT will not apply this to the start parameter.
            # This will cause us to miss results.
            #
            # Note 2:
            # We specifically perform manual yielding (i.e., `for x in xs: yield x`) as opposed to using a `yield from xs`
            # because we *have to call the `next_page_callback`* prior to yielding the last element!
            #
            # If we did:
            #
            # ```py
            # yield from results
            # if next_page_callback:
            #   next_page_callback(url_suffix)
            # ```
            #
            # then the logic would fail since the iterator would finish (and the calling scope would exit out of its driving
            # loop) prior to the callback being called.

            old_url_suffix = url_suffix
            updated_start = get_start_param_from_url(old_url_suffix)
            url_suffix = cast(str, next_response.get("_links", {}).get("next", ""))
            for i, result in enumerate(results):
                updated_start += 1
                if url_suffix and next_page_callback and i == len(results) - 1:
                    # update the url if we're on the last result in the page
                    if not self._is_cloud:
                        # If confluence claims there are more results, we update the start param
                        # based on how many results were returned and try again.
                        url_suffix = update_param_in_path(
                            url_suffix, "start", str(updated_start)
                        )
                    # notify the caller of the new url
                    next_page_callback(url_suffix)

                elif force_offset_pagination and i == len(results) - 1:
                    url_suffix = update_param_in_path(
                        old_url_suffix, "start", str(updated_start)
                    )

                yield result

            # we've observed that Confluence sometimes returns a next link despite giving
            # 0 results. This is a bug with Confluence, so we need to check for it and
            # stop paginating.
            if url_suffix and not results:
                logger.info(
                    f"No results found for call '{old_url_suffix}' despite next link being present. Stopping pagination."
                )
                break

    def build_cql_url(self, cql: str, expand: str | None = None) -> str:
        expand_string = f"&expand={expand}" if expand else ""
        return f"rest/api/content/search?cql={cql}{expand_string}"

    def paginated_cql_retrieval(
        self,
        cql: str,
        expand: str | None = None,
        limit: int | None = None,
    ) -> Iterator[dict[str, Any]]:
        """
        The content/search endpoint can be used to fetch pages, attachments, and comments.
        """
        cql_url = self.build_cql_url(cql, expand)
        yield from self._paginate_url(cql_url, limit)

    def paginated_page_retrieval(
        self,
        cql_url: str,
        limit: int,
        # Called with the next url to use to get the next page
        next_page_callback: Callable[[str], None] | None = None,
    ) -> Iterator[dict[str, Any]]:
        """
        Error handling (and testing) wrapper for _paginate_url,
        because the current approach to page retrieval involves handling the
        next page links manually.
        """
        try:
            yield from self._paginate_url(
                cql_url, limit=limit, next_page_callback=next_page_callback
            )
        except Exception as e:
            logger.exception(f"Error in paginated_page_retrieval: {e}")
            raise e

    def cql_paginate_all_expansions(
        self,
        cql: str,
        expand: str | None = None,
        limit: int | None = None,
    ) -> Iterator[dict[str, Any]]:
        """
        This function will paginate through the top level query first, then
        paginate through all of the expansions.
        """

        def _traverse_and_update(data: dict | list) -> None:
            if isinstance(data, dict):
                next_url = data.get("_links", {}).get("next")
                if next_url and "results" in data:
                    data["results"].extend(self._paginate_url(next_url, limit=limit))

                for value in data.values():
                    _traverse_and_update(value)
            elif isinstance(data, list):
                for item in data:
                    _traverse_and_update(item)

        for confluence_object in self.paginated_cql_retrieval(cql, expand, limit):
            _traverse_and_update(confluence_object)
            yield confluence_object

    def paginated_cql_user_retrieval(
        self,
        expand: str | None = None,
        limit: int | None = None,
    ) -> Iterator[ConfluenceUser]:
        """
        The search/user endpoint can be used to fetch users.
        It's a separate endpoint from the content/search endpoint used only for users.
        Otherwise it's very similar to the content/search endpoint.
        """

        # this is needed since there is a live bug with Confluence Server/Data Center
        # where not all users are returned by the APIs. This is a workaround needed until
        # that is patched.
        if self._confluence_user_profiles_override:
            yield from self._confluence_user_profiles_override

        elif self._is_cloud:
            cql = "type=user"
            url = "rest/api/search/user"
            expand_string = f"&expand={expand}" if expand else ""
            url += f"?cql={cql}{expand_string}"
            for user_result in self._paginate_url(
                url, limit, force_offset_pagination=True
            ):
                # Example response:
                # {
                #     'user': {
                #         'type': 'known',
                #         'accountId': '712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
                #         'accountType': 'atlassian',
                #         'email': 'chris@danswer.ai',
                #         'publicName': 'Chris Weaver',
                #         'profilePicture': {
                #             'path': '/wiki/aa-avatar/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
                #             'width': 48,
                #             'height': 48,
                #             'isDefault': False
                #         },
                #         'displayName': 'Chris Weaver',
                #         'isExternalCollaborator': False,
                #         '_expandable': {
                #             'operations': '',
                #             'personalSpace': ''
                #         },
                #         '_links': {
                #             'self': 'https://danswerai.atlassian.net/wiki/rest/api/user?accountId=712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d'
                #         }
                #     },
                #     'title': 'Chris Weaver',
                #     'excerpt': '',
                #     'url': '/people/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
                #     'breadcrumbs': [],
                #     'entityType': 'user',
                #     'iconCssClass': 'aui-icon content-type-profile',
                #     'lastModified': '2025-02-18T04:08:03.579Z',
                #     'score': 0.0
                # }
                user = user_result["user"]
                yield ConfluenceUser(
                    user_id=user["accountId"],
                    username=None,
                    display_name=user["displayName"],
                    email=user.get("email"),
                    type=user["accountType"],
                )
        else:
            for user in self._paginate_url("rest/api/user/list", limit):
                yield ConfluenceUser(
                    user_id=user["userKey"],
                    username=user["username"],
                    display_name=user["displayName"],
                    email=None,
                    type=user.get("type", "user"),
                )

    def paginated_groups_by_user_retrieval(
        self,
        user_id: str,  # accountId in Cloud, userKey in Server
        limit: int | None = None,
    ) -> Iterator[dict[str, Any]]:
        """
        This is not an SQL like query.
        It's a confluence specific endpoint that can be used to fetch groups.
        """
        user_field = "accountId" if self._is_cloud else "key"
        user_value = user_id
        # Server uses userKey (but calls it key during the API call), Cloud uses accountId
        user_query = f"{user_field}={quote(user_value)}"

        url = f"rest/api/user/memberof?{user_query}"
        yield from self._paginate_url(url, limit, force_offset_pagination=True)

    def paginated_groups_retrieval(
        self,
        limit: int | None = None,
    ) -> Iterator[dict[str, Any]]:
        """
        This is not an SQL like query.
        It's a confluence specific endpoint that can be used to fetch groups.
        """
        yield from self._paginate_url("rest/api/group", limit)

    def paginated_group_members_retrieval(
        self,
        group_name: str,
        limit: int | None = None,
    ) -> Iterator[dict[str, Any]]:
        """
        This is not an SQL like query.
        It's a confluence specific endpoint that can be used to fetch the members of a group.
        THIS DOESN'T WORK FOR SERVER because it breaks when there is a slash in the group name.
        E.g. neither "test/group" nor "test%2Fgroup" works for confluence.
        """
        group_name = quote(group_name)
        yield from self._paginate_url(f"rest/api/group/{group_name}/member", limit)

    def get_all_space_permissions_server(
        self,
        space_key: str,
    ) -> list[dict[str, Any]]:
        """
        This is a confluence server/data center specific method that can be used to
        fetch the permissions of a space.

        NOTE: This uses the JSON-RPC API which is the ONLY way to get space permissions
        on Confluence Server/Data Center. The REST API equivalent (expand=permissions)
        is Cloud-only and not available on Data Center as of version 8.9.x.

        If this fails with 401 Unauthorized, the customer needs to enable JSON-RPC:
        Confluence Admin -> General Configuration -> Further Configuration
        -> Enable "Remote API (XML-RPC & SOAP)"
        """
        url = "rpc/json-rpc/confluenceservice-v2"
        data = {
            "jsonrpc": "2.0",
            "method": "getSpacePermissionSets",
            "id": 7,
            "params": [space_key],
        }
        try:
            response = self.post(url, data=data)
        except HTTPError as e:
            if e.response is not None and e.response.status_code == 401:
                raise HTTPError(
                    "Unauthorized (401) when calling JSON-RPC API for space permissions. "
                    "This is likely because the Remote API is disabled. "
                    "To fix: Confluence Admin -> General Configuration -> Further Configuration "
                    "-> Enable 'Remote API (XML-RPC & SOAP)'",
                    response=e.response,
                ) from e
            raise
        logger.debug(f"jsonrpc response: {response}")
        if not response.get("result"):
            logger.warning(
                f"No jsonrpc response for space permissions for space {space_key}\nResponse: {response}"
            )

        return response.get("result", [])

    def get_current_user(self, expand: str | None = None) -> Any:
        """
        Implements a method that isn't in the third party client.

        Get information about the current user
        :param expand: OPTIONAL expand for get status of user.
                Possible param is "status". Results are "Active, Deactivated"
        :return: Returns the user details
        """

        from atlassian.errors import ApiPermissionError  # type:ignore

        url = "rest/api/user/current"
        params = {}
        if expand:
            params["expand"] = expand
        try:
            response = self.get(url, params=params)
        except HTTPError as e:
            if e.response.status_code == 403:
                raise ApiPermissionError(
                    "The calling user does not have permission", reason=e
                )
            raise
        return response


def get_user_email_from_username__server(
    confluence_client: OnyxConfluence, user_name: str
) -> str | None:
    global _USER_EMAIL_CACHE
    if _USER_EMAIL_CACHE.get(user_name) is None:
        try:
            response = confluence_client.get_mobile_parameters(user_name)
            email = response.get("email")
        except HTTPError as e:
            status_code = e.response.status_code if e.response is not None else "N/A"
            logger.warning(
                f"Failed to get confluence email for {user_name}: HTTP {status_code} - {e}"
            )
            # For now, we'll just return None and log a warning. This means
            # we will keep retrying to get the email every group sync.
            email = None
        except Exception as e:
            logger.warning(
                f"Failed to get confluence email for {user_name}: {type(e).__name__} - {e}"
            )
            email = None
        _USER_EMAIL_CACHE[user_name] = email
    return _USER_EMAIL_CACHE[user_name]


def _get_user(confluence_client: OnyxConfluence, user_id: str) -> str:
    """Get Confluence Display Name based on the account-id or userkey value

    Args:
        user_id (str): The user id (i.e: the account-id or userkey)
        confluence_client (Confluence): The Confluence Client

    Returns:
        str: The User Display Name. 'Unknown User' if the user is deactivated or not found
    """
    global _USER_ID_TO_DISPLAY_NAME_CACHE
    if _USER_ID_TO_DISPLAY_NAME_CACHE.get(user_id) is None:
        try:
            result = confluence_client.get_user_details_by_userkey(user_id)
            found_display_name = result.get("displayName")
        except Exception:
            found_display_name = None

        if not found_display_name:
            try:
                result = confluence_client.get_user_details_by_accountid(user_id)
                found_display_name = result.get("displayName")
            except Exception:
                found_display_name = None

        _USER_ID_TO_DISPLAY_NAME_CACHE[user_id] = found_display_name

    return _USER_ID_TO_DISPLAY_NAME_CACHE.get(user_id) or _USER_NOT_FOUND


def sanitize_attachment_title(title: str) -> str:
    """
    Sanitize the attachment title to be a valid HTML attribute.
    """
    return title.replace("<", "_").replace(">", "_").replace(" ", "_").replace(":", "_")


def extract_text_from_confluence_html(
    confluence_client: OnyxConfluence,
    confluence_object: dict[str, Any],
    fetched_titles: set[str],
) -> str:
    """Parse a Confluence html page and replace the 'user Id' by the real
        User Display Name

    Args:
        confluence_object (dict): The confluence object as a dict
        confluence_client (Confluence): Confluence client
        fetched_titles (set[str]): The titles of the pages that have already been fetched
    Returns:
        str: loaded and formated Confluence page
    """
    body = confluence_object["body"]
    object_html = body.get("storage", body.get("view", {})).get("value")

    soup = bs4.BeautifulSoup(object_html, "html.parser")

    _remove_macro_stylings(soup=soup)

    for user in soup.findAll("ri:user"):
        user_id = (
            user.attrs["ri:account-id"]
            if "ri:account-id" in user.attrs
            else user.get("ri:userkey")
        )
        if not user_id:
            logger.warning(
                f"ri:userkey not found in ri:user element. Found attrs: {user.attrs}"
            )
            continue
        # Include @ sign for tagging, more clear for LLM
        user.replaceWith("@" + _get_user(confluence_client, user_id))

    for html_page_reference in soup.findAll("ac:structured-macro"):
        # Here, we only want to process page within page macros
        if html_page_reference.attrs.get("ac:name") != "include":
            continue

        page_data = html_page_reference.find("ri:page")
        if not page_data:
            logger.warning(
                f"Skipping retrieval of {html_page_reference} because because page data is missing"
            )
            continue

        page_title = page_data.attrs.get("ri:content-title")
        if not page_title:
            # only fetch pages that have a title
            logger.warning(
                f"Skipping retrieval of {html_page_reference} because it has no title"
            )
            continue

        if page_title in fetched_titles:
            # prevent recursive fetching of pages
            logger.debug(f"Skipping {page_title} because it has already been fetched")
            continue

        fetched_titles.add(page_title)

        # Wrap this in a try-except because there are some pages that might not exist
        try:
            page_query = f"type=page and title='{quote(page_title)}'"

            page_contents: dict[str, Any] | None = None
            # Confluence enforces title uniqueness, so we should only get one result here
            for page in confluence_client.paginated_cql_retrieval(
                cql=page_query,
                expand="body.storage.value",
                limit=1,
            ):
                page_contents = page
                break
        except Exception as e:
            logger.warning(
                f"Error getting page contents for object {confluence_object}: {e}"
            )
            continue

        if not page_contents:
            continue

        text_from_page = extract_text_from_confluence_html(
            confluence_client=confluence_client,
            confluence_object=page_contents,
            fetched_titles=fetched_titles,
        )

        html_page_reference.replaceWith(text_from_page)

    for html_link_body in soup.findAll("ac:link-body"):
        # This extracts the text from inline links in the page so they can be
        # represented in the document text as plain text
        try:
            text_from_link = html_link_body.text
            html_link_body.replaceWith(f"(LINK TEXT: {text_from_link})")
        except Exception as e:
            logger.warning(f"Error processing ac:link-body: {e}")

    for html_attachment in soup.findAll("ri:attachment"):
        # This extracts the text from inline attachments in the page so they can be
        # represented in the document text as plain text
        try:
            html_attachment.replaceWith(
                f"<attachment>{sanitize_attachment_title(html_attachment.attrs['ri:filename'])}</attachment>"
            )  # to be replaced later
        except Exception as e:
            logger.warning(f"Error processing ac:attachment: {e}")

    return format_document_soup(soup)


def _remove_macro_stylings(soup: bs4.BeautifulSoup) -> None:
    for macro_root in soup.findAll("ac:structured-macro"):
        if not isinstance(macro_root, bs4.Tag):
            continue

        macro_styling = macro_root.find(name="ac:parameter", attrs={"ac:name": "page"})
        if not macro_styling or not isinstance(macro_styling, bs4.Tag):
            continue

        macro_styling.extract()


================================================
FILE: backend/onyx/connectors/confluence/user_profile_override.py
================================================
from onyx.connectors.confluence.models import ConfluenceUser


def process_confluence_user_profiles_override(
    confluence_user_email_override: list[dict[str, str]],
) -> list[ConfluenceUser]:
    return [
        ConfluenceUser(
            user_id=override["user_id"],
            # username is not returned by the Confluence Server API anyways
            username=override["username"],
            display_name=override["display_name"],
            email=override["email"],
            type=override["type"],
        )
        for override in confluence_user_email_override
        if override is not None
    ]


================================================
FILE: backend/onyx/connectors/confluence/utils.py
================================================
import math
import time
from collections.abc import Callable
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from io import BytesIO
from pathlib import Path
from typing import Any
from typing import cast
from typing import TYPE_CHECKING
from typing import TypeVar
from urllib.parse import parse_qs
from urllib.parse import quote
from urllib.parse import urljoin
from urllib.parse import urlparse

import requests
from pydantic import BaseModel

from onyx.configs.app_configs import (
    CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD,
)
from onyx.configs.app_configs import CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD
from onyx.configs.constants import FileOrigin
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.file_types import OnyxMimeTypes
from onyx.file_processing.image_utils import store_image_and_create_section
from onyx.utils.logger import setup_logger

if TYPE_CHECKING:
    from onyx.connectors.confluence.onyx_confluence import OnyxConfluence


logger = setup_logger()

CONFLUENCE_OAUTH_TOKEN_URL = "https://auth.atlassian.com/oauth/token"
RATE_LIMIT_MESSAGE_LOWERCASE = "Rate limit exceeded".lower()


class TokenResponse(BaseModel):
    access_token: str
    expires_in: int
    token_type: str
    refresh_token: str
    scope: str


def validate_attachment_filetype(
    attachment: dict[str, Any],
) -> bool:
    """
    Validates if the attachment is a supported file type.
    """
    media_type = attachment.get("metadata", {}).get("mediaType", "")
    if media_type.startswith("image/"):
        return media_type in OnyxMimeTypes.IMAGE_MIME_TYPES

    # For non-image files, check if we support the extension
    title = attachment.get("title", "")
    extension = get_file_ext(title)

    return extension in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS


class AttachmentProcessingResult(BaseModel):
    """
    A container for results after processing a Confluence attachment.
    'text' is the textual content of the attachment.
    'file_name' is the final file name used in FileStore to store the content.
    'error' holds an exception or string if something failed.
    """

    text: str | None
    file_name: str | None
    error: str | None = None


def _make_attachment_link(
    confluence_client: "OnyxConfluence",
    attachment: dict[str, Any],
    parent_content_id: str | None = None,
) -> str | None:
    download_link = ""

    if "api.atlassian.com" in confluence_client.url:
        # https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get
        if not parent_content_id:
            logger.warning(
                "parent_content_id is required to download attachments from Confluence Cloud!"
            )
            return None

        download_link = (
            confluence_client.url
            + f"/rest/api/content/{parent_content_id}/child/attachment/{attachment['id']}/download"
        )
    else:
        download_link = confluence_client.url + attachment["_links"]["download"]

    return download_link


def process_attachment(
    confluence_client: "OnyxConfluence",
    attachment: dict[str, Any],
    parent_content_id: str | None,
    allow_images: bool,
) -> AttachmentProcessingResult:
    """
    Processes a Confluence attachment. If it's a document, extracts text,
    or if it's an image, stores it for later analysis. Returns a structured result.
    """
    try:
        # Get the media type from the attachment metadata
        media_type: str = attachment.get("metadata", {}).get("mediaType", "")
        # Validate the attachment type
        if not validate_attachment_filetype(attachment):
            return AttachmentProcessingResult(
                text=None,
                file_name=None,
                error=f"Unsupported file type: {media_type}",
            )

        attachment_link = _make_attachment_link(
            confluence_client, attachment, parent_content_id
        )
        if not attachment_link:
            return AttachmentProcessingResult(
                text=None, file_name=None, error="Failed to make attachment link"
            )

        attachment_size = attachment["extensions"]["fileSize"]

        if media_type.startswith("image/"):
            if not allow_images:
                return AttachmentProcessingResult(
                    text=None,
                    file_name=None,
                    error="Image downloading is not enabled",
                )
        else:
            if attachment_size > CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD:
                logger.warning(
                    f"Skipping {attachment_link} due to size. "
                    f"size={attachment_size} "
                    f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD}"
                )
                return AttachmentProcessingResult(
                    text=None,
                    file_name=None,
                    error=f"Attachment text too long: {attachment_size} chars",
                )

        logger.info(
            f"Downloading attachment: title={attachment['title']} length={attachment_size} link={attachment_link}"
        )

        # Download the attachment
        resp: requests.Response = confluence_client._session.get(attachment_link)
        if resp.status_code != 200:
            logger.warning(
                f"Failed to fetch {attachment_link} with status code {resp.status_code}"
            )
            return AttachmentProcessingResult(
                text=None,
                file_name=None,
                error=f"Attachment download status code is {resp.status_code}",
            )

        raw_bytes = resp.content
        if not raw_bytes:
            return AttachmentProcessingResult(
                text=None, file_name=None, error="attachment.content is None"
            )

        # Process image attachments
        if media_type.startswith("image/"):
            return _process_image_attachment(
                confluence_client, attachment, raw_bytes, media_type
            )

        # Process document attachments
        try:
            text = extract_file_text(
                file=BytesIO(raw_bytes),
                file_name=attachment["title"],
            )

            # Skip if the text is too long
            if len(text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD:
                return AttachmentProcessingResult(
                    text=None,
                    file_name=None,
                    error=f"Attachment text too long: {len(text)} chars",
                )

            return AttachmentProcessingResult(text=text, file_name=None, error=None)
        except Exception as e:
            return AttachmentProcessingResult(
                text=None, file_name=None, error=f"Failed to extract text: {e}"
            )

    except Exception as e:
        return AttachmentProcessingResult(
            text=None, file_name=None, error=f"Failed to process attachment: {e}"
        )


def _process_image_attachment(
    confluence_client: "OnyxConfluence",  # noqa: ARG001
    attachment: dict[str, Any],
    raw_bytes: bytes,
    media_type: str,
) -> AttachmentProcessingResult:
    """Process an image attachment by saving it without generating a summary."""
    try:
        # Use the standardized image storage and section creation
        section, file_name = store_image_and_create_section(
            image_data=raw_bytes,
            file_id=Path(attachment["id"]).name,
            display_name=attachment["title"],
            media_type=media_type,
            file_origin=FileOrigin.CONNECTOR,
        )
        logger.info(f"Stored image attachment with file name: {file_name}")

        # Return empty text but include the file_name for later processing
        return AttachmentProcessingResult(text="", file_name=file_name, error=None)
    except Exception as e:
        msg = f"Image storage failed for {attachment['title']}: {e}"
        logger.error(msg, exc_info=e)
        return AttachmentProcessingResult(text=None, file_name=None, error=msg)


def convert_attachment_to_content(
    confluence_client: "OnyxConfluence",
    attachment: dict[str, Any],
    page_id: str,
    allow_images: bool,
) -> tuple[str | None, str | None] | None:
    """
    Facade function which:
      1. Validates attachment type
      2. Extracts content or stores image for later processing
      3. Returns (content_text, stored_file_name) or None if we should skip it
    """
    media_type = attachment.get("metadata", {}).get("mediaType", "")
    # Quick check for unsupported types:
    if media_type.startswith("video/") or media_type == "application/gliffy+json":
        logger.warning(
            f"Skipping unsupported attachment type: '{media_type}' for {attachment['title']}"
        )
        return None

    result = process_attachment(confluence_client, attachment, page_id, allow_images)
    if result.error is not None:
        logger.warning(
            f"Attachment {attachment['title']} encountered error: {result.error}"
        )
        return None

    # Return the text and the file name
    return result.text, result.file_name


def build_confluence_document_id(
    base_url: str, content_url: str, is_cloud: bool
) -> str:
    """For confluence, the document id is the page url for a page based document
        or the attachment download url for an attachment based document

    Args:
        base_url (str): The base url of the Confluence instance
        content_url (str): The url of the page or attachment download url

    Returns:
        str: The document id
    """

    # NOTE: urljoin is tricky and will drop the last segment of the base if it doesn't
    # end with "/" because it believes that makes it a file.
    final_url = base_url.rstrip("/") + "/"
    if is_cloud and not final_url.endswith("/wiki/"):
        final_url = urljoin(final_url, "wiki") + "/"
    final_url = urljoin(final_url, content_url.lstrip("/"))
    return final_url


def datetime_from_string(datetime_string: str) -> datetime:
    datetime_object = datetime.fromisoformat(datetime_string)

    if datetime_object.tzinfo is None:
        # If no timezone info, assume it is UTC
        datetime_object = datetime_object.replace(tzinfo=timezone.utc)
    else:
        # If not in UTC, translate it
        datetime_object = datetime_object.astimezone(timezone.utc)

    return datetime_object


def confluence_refresh_tokens(
    client_id: str, client_secret: str, cloud_id: str, refresh_token: str
) -> dict[str, Any]:
    # rotate the refresh and access token
    # Note that access tokens are only good for an hour in confluence cloud,
    # so we're going to have problems if the connector runs for longer
    # https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps/#use-a-refresh-token-to-get-another-access-token-and-refresh-token-pair
    response = requests.post(
        CONFLUENCE_OAUTH_TOKEN_URL,
        headers={"Content-Type": "application/x-www-form-urlencoded"},
        data={
            "grant_type": "refresh_token",
            "client_id": client_id,
            "client_secret": client_secret,
            "refresh_token": refresh_token,
        },
    )

    try:
        token_response = TokenResponse.model_validate_json(response.text)
    except Exception:
        raise RuntimeError("Confluence Cloud token refresh failed.")

    now = datetime.now(timezone.utc)
    expires_at = now + timedelta(seconds=token_response.expires_in)

    new_credentials: dict[str, Any] = {}
    new_credentials["confluence_access_token"] = token_response.access_token
    new_credentials["confluence_refresh_token"] = token_response.refresh_token
    new_credentials["created_at"] = now.isoformat()
    new_credentials["expires_at"] = expires_at.isoformat()
    new_credentials["expires_in"] = token_response.expires_in
    new_credentials["scope"] = token_response.scope
    new_credentials["cloud_id"] = cloud_id
    return new_credentials


F = TypeVar("F", bound=Callable[..., Any])


# https://developer.atlassian.com/cloud/confluence/rate-limiting/
# this uses the native rate limiting option provided by the
# confluence client and otherwise applies a simpler set of error handling
def handle_confluence_rate_limit(confluence_call: F) -> F:
    def wrapped_call(*args: list[Any], **kwargs: Any) -> Any:
        MAX_RETRIES = 5

        TIMEOUT = 600
        timeout_at = time.monotonic() + TIMEOUT

        for attempt in range(MAX_RETRIES):
            if time.monotonic() > timeout_at:
                raise TimeoutError(
                    f"Confluence call attempts took longer than {TIMEOUT} seconds."
                )

            try:
                # we're relying more on the client to rate limit itself
                # and applying our own retries in a more specific set of circumstances
                return confluence_call(*args, **kwargs)
            except requests.HTTPError as e:
                delay_until = _handle_http_error(e, attempt, MAX_RETRIES)
                logger.warning(
                    f"HTTPError in confluence call. Retrying in {delay_until} seconds..."
                )
                while time.monotonic() < delay_until:
                    # in the future, check a signal here to exit
                    time.sleep(1)
            except AttributeError as e:
                # Some error within the Confluence library, unclear why it fails.
                # Users reported it to be intermittent, so just retry
                if attempt == MAX_RETRIES - 1:
                    raise e

                logger.exception(
                    "Confluence Client raised an AttributeError. Retrying..."
                )
                time.sleep(5)

    return cast(F, wrapped_call)


def _handle_http_error(e: requests.HTTPError, attempt: int, max_retries: int) -> int:
    MIN_DELAY = 2
    MAX_DELAY = 60
    STARTING_DELAY = 5
    BACKOFF = 2

    # Check if the response or headers are None to avoid potential AttributeError
    if e.response is None or e.response.headers is None:
        logger.warning("HTTPError with `None` as response or as headers")
        raise e

    # Confluence Server returns 403 when rate limited
    if e.response.status_code == 403:
        FORBIDDEN_MAX_RETRY_ATTEMPTS = 7
        FORBIDDEN_RETRY_DELAY = 10
        if attempt < FORBIDDEN_MAX_RETRY_ATTEMPTS:
            logger.warning(
                "403 error. This sometimes happens when we hit "
                f"Confluence rate limits. Retrying in {FORBIDDEN_RETRY_DELAY} seconds..."
            )
            return FORBIDDEN_RETRY_DELAY

        raise e

    if e.response.status_code >= 500:
        if attempt >= max_retries - 1:
            raise e

        delay = min(STARTING_DELAY * (BACKOFF**attempt), MAX_DELAY)
        logger.warning(
            f"Server error {e.response.status_code}. "
            f"Retrying in {delay} seconds (attempt {attempt + 1})..."
        )
        return math.ceil(time.monotonic() + delay)

    if (
        e.response.status_code != 429
        and RATE_LIMIT_MESSAGE_LOWERCASE not in e.response.text.lower()
    ):
        raise e

    retry_after = None

    retry_after_header = e.response.headers.get("Retry-After")
    if retry_after_header is not None:
        try:
            retry_after = int(retry_after_header)
            if retry_after > MAX_DELAY:
                logger.warning(
                    f"Clamping retry_after from {retry_after} to {MAX_DELAY} seconds..."
                )
                retry_after = MAX_DELAY
            if retry_after < MIN_DELAY:
                retry_after = MIN_DELAY
        except ValueError:
            pass

    if retry_after is not None:
        logger.warning(
            f"Rate limiting with retry header. Retrying after {retry_after} seconds..."
        )
        delay = retry_after
    else:
        logger.warning(
            "Rate limiting without retry header. Retrying with exponential backoff..."
        )
        delay = min(STARTING_DELAY * (BACKOFF**attempt), MAX_DELAY)

    delay_until = math.ceil(time.monotonic() + delay)
    return delay_until


def get_single_param_from_url(url: str, param: str) -> str | None:
    """Get a parameter from a url"""
    parsed_url = urlparse(url)
    return parse_qs(parsed_url.query).get(param, [None])[0]


def get_start_param_from_url(url: str) -> int:
    """Get the start parameter from a url"""
    start_str = get_single_param_from_url(url, "start")
    return int(start_str) if start_str else 0


def update_param_in_path(path: str, param: str, value: str) -> str:
    """Update a parameter in a path. Path should look something like:

    /api/rest/users?start=0&limit=10
    """
    parsed_url = urlparse(path)
    query_params = parse_qs(parsed_url.query)
    query_params[param] = [value]
    return (
        path.split("?")[0]
        + "?"
        + "&".join(f"{k}={quote(v[0])}" for k, v in query_params.items())
    )


================================================
FILE: backend/onyx/connectors/connector_runner.py
================================================
import sys
import time
from collections.abc import Generator
from datetime import datetime
from typing import Generic
from typing import TypeVar

from onyx.connectors.interfaces import BaseConnector
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.utils.logger import setup_logger


logger = setup_logger()


TimeRange = tuple[datetime, datetime]

CT = TypeVar("CT", bound=ConnectorCheckpoint)


def batched_doc_ids(
    checkpoint_connector_generator: CheckpointOutput[CT],
    batch_size: int,
) -> Generator[set[str], None, None]:
    batch: set[str] = set()
    for document, hierarchy_node, failure, next_checkpoint in CheckpointOutputWrapper[
        CT
    ]()(checkpoint_connector_generator):
        if document is not None:
            batch.add(document.id)
        elif (
            failure and failure.failed_document and failure.failed_document.document_id
        ):
            batch.add(failure.failed_document.document_id)
        # HierarchyNodes don't have IDs that need to be batched for doc processing

        if len(batch) >= batch_size:
            yield batch
            batch = set()
    if len(batch) > 0:
        yield batch


class CheckpointOutputWrapper(Generic[CT]):
    """
    Wraps a CheckpointOutput generator to give things back in a more digestible format,
    specifically for Document outputs.
    The connector format is easier for the connector implementor (e.g. it enforces exactly
    one new checkpoint is returned AND that the checkpoint is at the end), thus the different
    formats.
    """

    def __init__(self) -> None:
        self.next_checkpoint: CT | None = None

    def __call__(
        self,
        checkpoint_connector_generator: CheckpointOutput[CT],
    ) -> Generator[
        tuple[
            Document | None, HierarchyNode | None, ConnectorFailure | None, CT | None
        ],
        None,
        None,
    ]:
        # grabs the final return value and stores it in the `next_checkpoint` variable
        def _inner_wrapper(
            checkpoint_connector_generator: CheckpointOutput[CT],
        ) -> CheckpointOutput[CT]:
            self.next_checkpoint = yield from checkpoint_connector_generator
            return self.next_checkpoint  # not used

        for item in _inner_wrapper(checkpoint_connector_generator):
            if isinstance(item, Document):
                yield item, None, None, None
            elif isinstance(item, HierarchyNode):
                yield None, item, None, None
            elif isinstance(item, ConnectorFailure):
                yield None, None, item, None
            else:
                raise ValueError(f"Invalid connector output type: {type(item)}")

        if self.next_checkpoint is None:
            raise RuntimeError(
                "Checkpoint is None. This should never happen - the connector should always return a checkpoint."
            )

        yield None, None, None, self.next_checkpoint


class ConnectorRunner(Generic[CT]):
    """
    Handles:
        - Batching
        - Additional exception logging
        - Combining different connector types to a single interface
    """

    def __init__(
        self,
        connector: BaseConnector,
        batch_size: int,
        # cannot be True for non-checkpointed connectors
        include_permissions: bool,
        time_range: TimeRange | None = None,
    ):
        if not isinstance(connector, CheckpointedConnector) and include_permissions:
            raise ValueError(
                "include_permissions cannot be True for non-checkpointed connectors"
            )

        self.connector = connector
        self.time_range = time_range
        self.batch_size = batch_size
        self.include_permissions = include_permissions

        self.doc_batch: list[Document] = []
        self.hierarchy_node_batch: list[HierarchyNode] = []

    def run(self, checkpoint: CT) -> Generator[
        tuple[
            list[Document] | None,
            list[HierarchyNode] | None,
            ConnectorFailure | None,
            CT | None,
        ],
        None,
        None,
    ]:
        """
        Yields batches of Documents, HierarchyNodes, failures, and checkpoints.

        Returns tuples of:
        - (doc_batch, None, None, None) - batch of documents
        - (None, hierarchy_batch, None, None) - batch of hierarchy nodes
        - (None, None, failure, None) - a connector failure
        - (None, None, None, checkpoint) - new checkpoint
        """
        try:
            if isinstance(self.connector, CheckpointedConnector):
                if self.time_range is None:
                    raise ValueError("time_range is required for CheckpointedConnector")

                start = time.monotonic()
                if self.include_permissions:
                    if not isinstance(
                        self.connector, CheckpointedConnectorWithPermSync
                    ):
                        raise ValueError(
                            "Connector does not support permission syncing"
                        )
                    load_from_checkpoint = (
                        self.connector.load_from_checkpoint_with_perm_sync
                    )
                else:
                    load_from_checkpoint = self.connector.load_from_checkpoint
                checkpoint_connector_generator = load_from_checkpoint(
                    start=self.time_range[0].timestamp(),
                    end=self.time_range[1].timestamp(),
                    checkpoint=checkpoint,
                )
                next_checkpoint: CT | None = None
                # this is guaranteed to always run at least once with next_checkpoint being non-None
                for (
                    document,
                    hierarchy_node,
                    failure,
                    next_checkpoint,
                ) in CheckpointOutputWrapper[CT]()(checkpoint_connector_generator):
                    if document is not None:
                        self.doc_batch.append(document)

                    if hierarchy_node is not None:
                        self.hierarchy_node_batch.append(hierarchy_node)

                    if failure is not None:
                        yield None, None, failure, None

                    # Yield hierarchy nodes batch if it reaches batch_size
                    # (yield nodes before docs to maintain parent-before-child invariant)
                    if len(self.hierarchy_node_batch) >= self.batch_size:
                        yield None, self.hierarchy_node_batch, None, None
                        self.hierarchy_node_batch = []

                    # Yield document batch if it reaches batch_size
                    # First flush any pending hierarchy nodes to ensure parents exist
                    if len(self.doc_batch) >= self.batch_size:
                        if len(self.hierarchy_node_batch) > 0:
                            yield None, self.hierarchy_node_batch, None, None
                            self.hierarchy_node_batch = []
                        yield self.doc_batch, None, None, None
                        self.doc_batch = []

                # yield remaining hierarchy nodes first (parents before children)
                if len(self.hierarchy_node_batch) > 0:
                    yield None, self.hierarchy_node_batch, None, None
                    self.hierarchy_node_batch = []

                # yield remaining documents
                if len(self.doc_batch) > 0:
                    yield self.doc_batch, None, None, None
                    self.doc_batch = []

                yield None, None, None, next_checkpoint

                logger.debug(
                    f"Connector took {time.monotonic() - start} seconds to get to the next checkpoint."
                )

            else:
                finished_checkpoint = self.connector.build_dummy_checkpoint()
                finished_checkpoint.has_more = False

                if isinstance(self.connector, PollConnector):
                    if self.time_range is None:
                        raise ValueError("time_range is required for PollConnector")

                    for batch in self.connector.poll_source(
                        start=self.time_range[0].timestamp(),
                        end=self.time_range[1].timestamp(),
                    ):
                        docs, nodes = self._separate_batch(batch)
                        if nodes:
                            yield None, nodes, None, None
                        if docs:
                            yield docs, None, None, None

                    yield None, None, None, finished_checkpoint
                elif isinstance(self.connector, LoadConnector):
                    for batch in self.connector.load_from_state():
                        docs, nodes = self._separate_batch(batch)
                        if nodes:
                            yield None, nodes, None, None
                        if docs:
                            yield docs, None, None, None

                    yield None, None, None, finished_checkpoint
                else:
                    raise ValueError(f"Invalid connector. type: {type(self.connector)}")
        except Exception:
            exc_type, _, exc_traceback = sys.exc_info()

            # Traverse the traceback to find the last frame where the exception was raised
            tb = exc_traceback
            if tb is None:
                logger.error("No traceback found for exception")
                raise

            while tb.tb_next:
                tb = tb.tb_next  # Move to the next frame in the traceback

            # Get the local variables from the frame where the exception occurred
            local_vars = tb.tb_frame.f_locals
            local_vars_str = "\n".join(
                f"{key}: {value}" for key, value in local_vars.items()
            )
            logger.error(
                f"Error in connector. type: {exc_type};\nlocal_vars below -> \n{local_vars_str[:1024]}"
            )
            raise

    def _separate_batch(
        self, batch: list[Document | HierarchyNode]
    ) -> tuple[list[Document], list[HierarchyNode]]:
        """Separate a mixed batch into Documents and HierarchyNodes."""
        docs: list[Document] = []
        nodes: list[HierarchyNode] = []
        for item in batch:
            if isinstance(item, Document):
                docs.append(item)
            elif isinstance(item, HierarchyNode):
                nodes.append(item)
        return docs, nodes


================================================
FILE: backend/onyx/connectors/credentials_provider.py
================================================
import uuid
from types import TracebackType
from typing import Any

from redis.lock import Lock as RedisLock
from sqlalchemy import select

from onyx.connectors.interfaces import CredentialsProviderInterface
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.models import Credential
from onyx.redis.redis_pool import get_redis_client


class OnyxDBCredentialsProvider(
    CredentialsProviderInterface["OnyxDBCredentialsProvider"]
):
    """Implementation to allow the connector to callback and update credentials in the db.
    Required in cases where credentials can rotate while the connector is running.
    """

    LOCK_TTL = 900  # TTL of the lock

    def __init__(self, tenant_id: str, connector_name: str, credential_id: int):
        self._tenant_id = tenant_id
        self._connector_name = connector_name
        self._credential_id = credential_id

        self.redis_client = get_redis_client(tenant_id=tenant_id)

        # lock used to prevent overlapping renewal of credentials
        self.lock_key = f"da_lock:connector:{connector_name}:credential_{credential_id}"
        self._lock: RedisLock = self.redis_client.lock(self.lock_key, self.LOCK_TTL)

    def __enter__(self) -> "OnyxDBCredentialsProvider":
        acquired = self._lock.acquire(blocking_timeout=self.LOCK_TTL)
        if not acquired:
            raise RuntimeError(f"Could not acquire lock for key: {self.lock_key}")

        return self

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_value: BaseException | None,
        traceback: TracebackType | None,
    ) -> None:
        """Release the lock when exiting the context."""
        if self._lock and self._lock.owned():
            self._lock.release()

    def get_tenant_id(self) -> str | None:
        return self._tenant_id

    def get_provider_key(self) -> str:
        return str(self._credential_id)

    def get_credentials(self) -> dict[str, Any]:
        with get_session_with_tenant(tenant_id=self._tenant_id) as db_session:
            credential = db_session.execute(
                select(Credential).where(Credential.id == self._credential_id)
            ).scalar_one()

            if credential is None:
                raise ValueError(
                    f"No credential found: credential={self._credential_id}"
                )

            if credential.credential_json is None:
                return {}
            return credential.credential_json.get_value(apply_mask=False)

    def set_credentials(self, credential_json: dict[str, Any]) -> None:
        with get_session_with_tenant(tenant_id=self._tenant_id) as db_session:
            try:
                credential = db_session.execute(
                    select(Credential)
                    .where(Credential.id == self._credential_id)
                    .with_for_update()
                ).scalar_one()

                if credential is None:
                    raise ValueError(
                        f"No credential found: credential={self._credential_id}"
                    )

                credential.credential_json = credential_json  # type: ignore[assignment]
                db_session.commit()
            except Exception:
                db_session.rollback()
                raise

    def is_dynamic(self) -> bool:
        return True


class OnyxStaticCredentialsProvider(
    CredentialsProviderInterface["OnyxStaticCredentialsProvider"]
):
    """Implementation (a very simple one!) to handle static credentials."""

    def __init__(
        self,
        tenant_id: str | None,
        connector_name: str,
        credential_json: dict[str, Any],
    ):
        self._tenant_id = tenant_id
        self._connector_name = connector_name
        self._credential_json = credential_json

        self._provider_key = str(uuid.uuid4())

    def __enter__(self) -> "OnyxStaticCredentialsProvider":
        return self

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_value: BaseException | None,
        traceback: TracebackType | None,
    ) -> None:
        pass

    def get_tenant_id(self) -> str | None:
        return self._tenant_id

    def get_provider_key(self) -> str:
        return self._provider_key

    def get_credentials(self) -> dict[str, Any]:
        return self._credential_json

    def set_credentials(self, credential_json: dict[str, Any]) -> None:
        self._credential_json = credential_json

    def is_dynamic(self) -> bool:
        return False


================================================
FILE: backend/onyx/connectors/cross_connector_utils/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
================================================
import re
from collections.abc import Callable
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import TypeVar
from urllib.parse import urljoin
from urllib.parse import urlparse

import requests
from dateutil.parser import parse
from dateutil.parser import ParserError

from onyx.configs.app_configs import CONNECTOR_LOCALHOST_OVERRIDE
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import IGNORE_FOR_QA
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import OnyxMetadata
from onyx.utils.logger import setup_logger
from onyx.utils.text_processing import is_valid_email


T = TypeVar("T")
U = TypeVar("U")
logger = setup_logger()


def datetime_to_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
        dt = dt.replace(tzinfo=timezone.utc)

    return dt.astimezone(timezone.utc)


def time_str_to_utc(datetime_str: str) -> datetime:
    # Remove all timezone abbreviations in parentheses
    normalized = re.sub(r"\([A-Z]+\)", "", datetime_str).strip()

    # Remove any remaining parentheses and their contents
    normalized = re.sub(r"\(.*?\)", "", normalized).strip()

    candidates: list[str] = [normalized]

    # Some sources (e.g. Gmail) may prefix the value with labels like "Date:"
    label_stripped = re.sub(
        r"^\s*[A-Za-z][A-Za-z\s_-]*:\s*", "", normalized, count=1
    ).strip()
    if label_stripped and label_stripped != normalized:
        candidates.append(label_stripped)

    # Fix common format issues (e.g. "0000" => "+0000")
    for candidate in list(candidates):
        if " 0000" in candidate:
            fixed = candidate.replace(" 0000", " +0000")
            if fixed not in candidates:
                candidates.append(fixed)

    last_exception: Exception | None = None
    for candidate in candidates:
        try:
            dt = parse(candidate)
            return datetime_to_utc(dt)
        except (ValueError, ParserError) as exc:
            last_exception = exc

    if last_exception is not None:
        raise last_exception

    # Fallback in case parsing failed without raising (should not happen)
    raise ValueError(f"Unable to parse datetime string: {datetime_str}")


# TODO: use this function in other connectors
def datetime_from_utc_timestamp(timestamp: int) -> datetime:
    """Convert a Unix timestamp to a datetime object in UTC"""

    return datetime.fromtimestamp(timestamp, tz=timezone.utc)


def basic_expert_info_representation(info: BasicExpertInfo) -> str | None:
    if info.first_name and info.last_name:
        return f"{info.first_name} {info.middle_initial} {info.last_name}"

    if info.display_name:
        return info.display_name

    if info.email and is_valid_email(info.email):
        return info.email

    if info.first_name:
        return info.first_name

    return None


def get_experts_stores_representations(
    experts: list[BasicExpertInfo] | None,
) -> list[str] | None:
    """Gets string representations of experts supplied.

    If an expert cannot be represented as a string, it is omitted from the
    result.
    """
    if not experts:
        return None

    reps: list[str | None] = [
        basic_expert_info_representation(owner) for owner in experts
    ]
    return [owner for owner in reps if owner is not None]


def process_in_batches(
    objects: list[T], process_function: Callable[[T], U], batch_size: int
) -> Iterator[list[U]]:
    for i in range(0, len(objects), batch_size):
        yield [process_function(obj) for obj in objects[i : i + batch_size]]


def get_metadata_keys_to_ignore() -> list[str]:
    return [IGNORE_FOR_QA]


def _parse_document_source(connector_type: Any) -> DocumentSource | None:
    if connector_type is None:
        return None

    if isinstance(connector_type, DocumentSource):
        return connector_type

    if not isinstance(connector_type, str):
        logger.warning(f"Invalid connector_type type: {type(connector_type).__name__}")
        return None

    normalized = re.sub(r"[\s\-]+", "_", connector_type.strip().lower())
    try:
        return DocumentSource(normalized)
    except ValueError:
        logger.warning(
            f"Invalid connector_type value: '{connector_type}' (normalized: '{normalized}')"
        )
        return None


def process_onyx_metadata(
    metadata: dict[str, Any],
) -> tuple[OnyxMetadata, dict[str, Any]]:
    """
    Users may set Onyx metadata and custom tags in text files. https://docs.onyx.app/admins/connectors/official/file
    Any unrecognized fields are treated as custom tags.
    """
    p_owner_names = metadata.get("primary_owners")
    p_owners = (
        [BasicExpertInfo(display_name=name) for name in p_owner_names]
        if p_owner_names
        else None
    )

    s_owner_names = metadata.get("secondary_owners")
    s_owners = (
        [BasicExpertInfo(display_name=name) for name in s_owner_names]
        if s_owner_names
        else None
    )
    source_type = _parse_document_source(metadata.get("connector_type"))

    dt_str = metadata.get("doc_updated_at")
    doc_updated_at = time_str_to_utc(dt_str) if dt_str else None

    return (
        OnyxMetadata(
            document_id=metadata.get("id"),
            source_type=source_type,
            link=metadata.get("link"),
            file_display_name=metadata.get("file_display_name"),
            title=metadata.get("title"),
            primary_owners=p_owners,
            secondary_owners=s_owners,
            doc_updated_at=doc_updated_at,
        ),
        {
            k: v
            for k, v in metadata.items()
            if k
            not in [
                "document_id",
                "time_updated",
                "doc_updated_at",
                "link",
                "primary_owners",
                "secondary_owners",
                "filename",
                "file_display_name",
                "title",
                "connector_type",
                "pdf_password",
                "mime_type",
            ]
        },
    )


def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:
    if CONNECTOR_LOCALHOST_OVERRIDE:
        # Used for development
        base_domain = CONNECTOR_LOCALHOST_OVERRIDE
    return f"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}"


def is_atlassian_date_error(e: Exception) -> bool:
    return "field 'updated' is invalid" in str(e)


def get_cloudId(base_url: str) -> str:
    tenant_info_url = urljoin(base_url, "/_edge/tenant_info")
    response = requests.get(tenant_info_url, timeout=10)
    response.raise_for_status()
    return response.json()["cloudId"]


def scoped_url(url: str, product: str) -> str:
    parsed = urlparse(url)
    base_url = parsed.scheme + "://" + parsed.netloc
    cloud_id = get_cloudId(base_url)
    return f"https://api.atlassian.com/ex/{product}/{cloud_id}{parsed.path}"


================================================
FILE: backend/onyx/connectors/cross_connector_utils/rate_limit_wrapper.py
================================================
import time
from collections.abc import Callable
from functools import wraps
from typing import Any
from typing import cast
from typing import TypeVar

import requests

from onyx.utils.logger import setup_logger

logger = setup_logger()


F = TypeVar("F", bound=Callable[..., Any])


class RateLimitTriedTooManyTimesError(Exception):
    pass


class _RateLimitDecorator:
    """Builds a generic wrapper/decorator for calls to external APIs that
    prevents making more than `max_calls` requests per `period`

    Implementation inspired by the `ratelimit` library:
    https://github.com/tomasbasham/ratelimit.

    NOTE: is not thread safe.
    """

    def __init__(
        self,
        max_calls: int,
        period: float,  # in seconds
        sleep_time: float = 2,  # in seconds
        sleep_backoff: float = 2,  # applies exponential backoff
        max_num_sleep: int = 0,
    ):
        self.max_calls = max_calls
        self.period = period
        self.sleep_time = sleep_time
        self.sleep_backoff = sleep_backoff
        self.max_num_sleep = max_num_sleep

        self.call_history: list[float] = []
        self.curr_calls = 0

    def __call__(self, func: F) -> F:
        @wraps(func)
        def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any:
            # cleanup calls which are no longer relevant
            self._cleanup()

            # check if we've exceeded the rate limit
            sleep_cnt = 0
            while len(self.call_history) == self.max_calls:
                sleep_time = self.sleep_time * (self.sleep_backoff**sleep_cnt)
                logger.notice(
                    f"Rate limit exceeded for function {func.__name__}. Waiting {sleep_time} seconds before retrying."
                )
                time.sleep(sleep_time)
                sleep_cnt += 1
                if self.max_num_sleep != 0 and sleep_cnt >= self.max_num_sleep:
                    raise RateLimitTriedTooManyTimesError(
                        f"Exceeded '{self.max_num_sleep}' retries for function '{func.__name__}'"
                    )

                self._cleanup()

            # add the current call to the call history
            self.call_history.append(time.monotonic())
            return func(*args, **kwargs)

        return cast(F, wrapped_func)

    def _cleanup(self) -> None:
        curr_time = time.monotonic()
        time_to_expire_before = curr_time - self.period
        self.call_history = [
            call_time
            for call_time in self.call_history
            if call_time > time_to_expire_before
        ]


rate_limit_builder = _RateLimitDecorator


"""If you want to allow the external service to tell you when you've hit the rate limit,
use the following instead"""

R = TypeVar("R", bound=Callable[..., requests.Response])


def wrap_request_to_handle_ratelimiting(
    request_fn: R, default_wait_time_sec: int = 30, max_waits: int = 30
) -> R:
    def wrapped_request(*args: list, **kwargs: dict[str, Any]) -> requests.Response:
        for _ in range(max_waits):
            response = request_fn(*args, **kwargs)
            if response.status_code == 429:
                try:
                    wait_time = int(
                        response.headers.get("Retry-After", default_wait_time_sec)
                    )
                except ValueError:
                    wait_time = default_wait_time_sec

                time.sleep(wait_time)
                continue

            return response

        raise RateLimitTriedTooManyTimesError(f"Exceeded '{max_waits}' retries")

    return cast(R, wrapped_request)


_rate_limited_get = wrap_request_to_handle_ratelimiting(requests.get)
_rate_limited_post = wrap_request_to_handle_ratelimiting(requests.post)


class _RateLimitedRequest:
    get = _rate_limited_get
    post = _rate_limited_post


rl_requests = _RateLimitedRequest


================================================
FILE: backend/onyx/connectors/discord/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/discord/connector.py
================================================
import asyncio
from collections.abc import AsyncGenerator
from collections.abc import AsyncIterable
from collections.abc import Iterable
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast

from discord import Client
from discord.channel import TextChannel
from discord.channel import Thread
from discord.enums import MessageType
from discord.errors import LoginFailure
from discord.flags import Intents
from discord.message import Message as DiscordMessage

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import CredentialInvalidError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger

logger = setup_logger()


_DISCORD_DOC_ID_PREFIX = "DISCORD_"
_SNIPPET_LENGTH = 30


def _convert_message_to_document(
    message: DiscordMessage,
    sections: list[TextSection],
) -> Document:
    """
    Convert a discord message to a document
    Sections are collected before calling this function because it relies on async
        calls to fetch the thread history if there is one
    """

    metadata: dict[str, str | list[str]] = {}
    semantic_substring = ""

    # Only messages from TextChannels will make it here but we have to check for it anyways
    if isinstance(message.channel, TextChannel) and (
        channel_name := message.channel.name
    ):
        metadata["Channel"] = channel_name
        semantic_substring += f" in Channel: #{channel_name}"

    # Single messages dont have a title
    title = ""

    # If there is a thread, add more detail to the metadata, title, and semantic identifier
    if isinstance(message.channel, Thread):
        # Threads do have a title
        title = message.channel.name

        # If its a thread, update the metadata, title, and semantic_substring
        metadata["Thread"] = title

        # Add more detail to the semantic identifier if available
        semantic_substring += f" in Thread: {title}"

    snippet: str = (
        message.content[:_SNIPPET_LENGTH].rstrip() + "..."
        if len(message.content) > _SNIPPET_LENGTH
        else message.content
    )

    semantic_identifier = f"{message.author.name} said{semantic_substring}: {snippet}"

    return Document(
        id=f"{_DISCORD_DOC_ID_PREFIX}{message.id}",
        source=DocumentSource.DISCORD,
        semantic_identifier=semantic_identifier,
        doc_updated_at=message.edited_at,
        title=title,
        sections=(cast(list[TextSection | ImageSection], sections)),
        metadata=metadata,
    )


async def _fetch_filtered_channels(
    discord_client: Client,
    server_ids: list[int] | None,
    channel_names: list[str] | None,
) -> list[TextChannel]:
    filtered_channels: list[TextChannel] = []

    for channel in discord_client.get_all_channels():
        if not channel.permissions_for(channel.guild.me).read_message_history:
            continue
        if not isinstance(channel, TextChannel):
            continue
        if server_ids and len(server_ids) > 0 and channel.guild.id not in server_ids:
            continue
        if channel_names and channel.name not in channel_names:
            continue
        filtered_channels.append(channel)

    logger.info(f"Found {len(filtered_channels)} channels for the authenticated user")
    return filtered_channels


async def _fetch_documents_from_channel(
    channel: TextChannel,
    start_time: datetime | None,
    end_time: datetime | None,
) -> AsyncIterable[Document]:
    # Discord's epoch starts at 2015-01-01
    discord_epoch = datetime(2015, 1, 1, tzinfo=timezone.utc)
    if start_time and start_time < discord_epoch:
        start_time = discord_epoch

    # NOTE: limit=None is the correct way to fetch all messages and threads with pagination
    # The discord package erroneously uses limit for both pagination AND number of results
    # This causes the history and archived_threads methods to return 100 results even if there are more results within the filters
    # Pagination is handled automatically (100 results at a time) when limit=None

    async for channel_message in channel.history(
        limit=None,
        after=start_time,
        before=end_time,
    ):
        # Skip messages that are not the default type
        if channel_message.type != MessageType.default:
            continue

        sections: list[TextSection] = [
            TextSection(
                text=channel_message.content,
                link=channel_message.jump_url,
            )
        ]

        yield _convert_message_to_document(channel_message, sections)

    for active_thread in channel.threads:
        async for thread_message in active_thread.history(
            limit=None,
            after=start_time,
            before=end_time,
        ):
            # Skip messages that are not the default type
            if thread_message.type != MessageType.default:
                continue

            sections = [
                TextSection(
                    text=thread_message.content,
                    link=thread_message.jump_url,
                )
            ]

            yield _convert_message_to_document(thread_message, sections)

    async for archived_thread in channel.archived_threads(
        limit=None,
    ):
        async for thread_message in archived_thread.history(
            limit=None,
            after=start_time,
            before=end_time,
        ):
            # Skip messages that are not the default type
            if thread_message.type != MessageType.default:
                continue

            sections = [
                TextSection(
                    text=thread_message.content,
                    link=thread_message.jump_url,
                )
            ]

            yield _convert_message_to_document(thread_message, sections)


def _manage_async_retrieval(
    token: str,
    requested_start_date_string: str,
    channel_names: list[str],
    server_ids: list[int],
    start: datetime | None = None,
    end: datetime | None = None,
) -> Iterable[Document]:
    # parse requested_start_date_string to datetime
    pull_date: datetime | None = (
        datetime.strptime(requested_start_date_string, "%Y-%m-%d").replace(
            tzinfo=timezone.utc
        )
        if requested_start_date_string
        else None
    )

    # Set start_time to the later of start and pull_date, or whichever is provided
    start_time = max(filter(None, [start, pull_date])) if start or pull_date else None

    end_time: datetime | None = end

    async def _async_fetch() -> AsyncGenerator[Document, None]:
        intents = Intents.default()
        intents.message_content = True
        async with Client(intents=intents) as discord_client:
            start_task = asyncio.create_task(discord_client.start(token))
            ready_task = asyncio.create_task(discord_client.wait_until_ready())

            done, _ = await asyncio.wait(
                {start_task, ready_task},
                return_when=asyncio.FIRST_COMPLETED,
            )

            # start() runs indefinitely once connected, so it only lands
            # in `done` when login/connection failed — propagate the error.
            if start_task in done:
                ready_task.cancel()
                start_task.result()

            filtered_channels: list[TextChannel] = await _fetch_filtered_channels(
                discord_client=discord_client,
                server_ids=server_ids,
                channel_names=channel_names,
            )

            for channel in filtered_channels:
                async for doc in _fetch_documents_from_channel(
                    channel=channel,
                    start_time=start_time,
                    end_time=end_time,
                ):
                    yield doc

    def run_and_yield() -> Iterable[Document]:
        loop = asyncio.new_event_loop()
        async_gen = _async_fetch()
        try:
            while True:
                try:
                    doc = loop.run_until_complete(anext(async_gen))
                    yield doc
                except StopAsyncIteration:
                    break
        finally:
            # Must close the async generator before the loop so the Discord
            # client's `async with` block can await its shutdown coroutine.
            # The nested try/finally ensures the loop always closes even if
            # aclose() raises (same pattern as cursor.close() before conn.close()).
            try:
                loop.run_until_complete(async_gen.aclose())
            finally:
                loop.close()

    return run_and_yield()


class DiscordConnector(PollConnector, LoadConnector):
    def __init__(
        self,
        server_ids: list[str] = [],
        channel_names: list[str] = [],
        # YYYY-MM-DD
        start_date: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
    ):
        self.batch_size = batch_size
        self.channel_names: list[str] = channel_names if channel_names else []
        self.server_ids: list[int] = (
            [int(server_id) for server_id in server_ids] if server_ids else []
        )
        self._discord_bot_token: str | None = None
        self.requested_start_date_string: str = start_date or ""

    @property
    def discord_bot_token(self) -> str:
        if self._discord_bot_token is None:
            raise ConnectorMissingCredentialError("Discord")
        return self._discord_bot_token

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self._discord_bot_token = credentials["discord_bot_token"]
        return None

    def validate_connector_settings(self) -> None:
        loop = asyncio.new_event_loop()
        try:
            client = Client(intents=Intents.default())
            try:
                loop.run_until_complete(client.login(self.discord_bot_token))
            except LoginFailure as e:
                raise CredentialInvalidError(f"Invalid Discord bot token: {e}")
            finally:
                loop.run_until_complete(client.close())
        finally:
            loop.close()

    def _manage_doc_batching(
        self,
        start: datetime | None = None,
        end: datetime | None = None,
    ) -> GenerateDocumentsOutput:
        doc_batch: list[Document | HierarchyNode] = []
        for doc in _manage_async_retrieval(
            token=self.discord_bot_token,
            requested_start_date_string=self.requested_start_date_string,
            channel_names=self.channel_names,
            server_ids=self.server_ids,
            start=start,
            end=end,
        ):
            doc_batch.append(doc)
            if len(doc_batch) >= self.batch_size:
                yield doc_batch
                doc_batch = []

        if doc_batch:
            yield doc_batch

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        return self._manage_doc_batching(
            datetime.fromtimestamp(start, tz=timezone.utc),
            datetime.fromtimestamp(end, tz=timezone.utc),
        )

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._manage_doc_batching(None, None)


if __name__ == "__main__":
    import os
    import time

    end = time.time()
    # 1 day
    start = end - 24 * 60 * 60 * 1
    # "1,2,3"
    server_ids: str | None = os.environ.get("server_ids", None)
    # "channel1,channel2"
    channel_names: str | None = os.environ.get("channel_names", None)

    connector = DiscordConnector(
        server_ids=server_ids.split(",") if server_ids else [],
        channel_names=channel_names.split(",") if channel_names else [],
        start_date=os.environ.get("start_date", None),
    )
    connector.load_credentials(
        {"discord_bot_token": os.environ.get("discord_bot_token")}
    )

    for doc_batch in connector.poll_source(start, end):
        for doc in doc_batch:
            print(doc)


================================================
FILE: backend/onyx/connectors/discourse/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/discourse/connector.py
================================================
import time
import urllib.parse
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast

import requests
from pydantic import BaseModel
from requests import Response

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()


class DiscoursePerms(BaseModel):
    api_key: str
    api_username: str


@retry_builder()
def discourse_request(
    endpoint: str, perms: DiscoursePerms, params: dict | None = None
) -> Response:
    headers = {"Api-Key": perms.api_key, "Api-Username": perms.api_username}

    response = requests.get(endpoint, headers=headers, params=params)
    response.raise_for_status()

    return response


class DiscourseConnector(PollConnector):
    def __init__(
        self,
        base_url: str,
        categories: list[str] | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        parsed_url = urllib.parse.urlparse(base_url)
        if not parsed_url.scheme:
            base_url = "https://" + base_url
        self.base_url = base_url

        self.categories = [c.lower() for c in categories] if categories else []
        self.category_id_map: dict[int, dict] = {}

        self.batch_size = batch_size
        self.permissions: DiscoursePerms | None = None
        self.active_categories: set | None = None

    @rate_limit_builder(max_calls=50, period=60)
    def _make_request(self, endpoint: str, params: dict | None = None) -> Response:
        if not self.permissions:
            raise ConnectorMissingCredentialError("Discourse")
        return discourse_request(endpoint, self.permissions, params)

    def _get_categories_map(
        self,
    ) -> None:
        assert self.permissions is not None
        categories_endpoint = urllib.parse.urljoin(self.base_url, "categories.json")
        response = self._make_request(
            endpoint=categories_endpoint,
            params={"include_subcategories": True},
        )
        categories = response.json()["category_list"]["categories"]
        self.category_id_map = {
            cat["id"]: {"name": cat["name"], "slug": cat["slug"]}
            for cat in categories
            if not self.categories or cat["name"].lower() in self.categories
        }
        self.active_categories = set(self.category_id_map)

    def _get_doc_from_topic(self, topic_id: int) -> Document:
        assert self.permissions is not None
        topic_endpoint = urllib.parse.urljoin(self.base_url, f"t/{topic_id}.json")
        response = self._make_request(endpoint=topic_endpoint)
        topic = response.json()

        topic_url = urllib.parse.urljoin(self.base_url, f"t/{topic['slug']}")

        sections = []
        poster = None
        responders = []
        seen_names = set()
        for ind, post in enumerate(topic["post_stream"]["posts"]):
            if ind == 0:
                poster_name = post.get("name")
                if poster_name:
                    seen_names.add(poster_name)
                    poster = BasicExpertInfo(display_name=poster_name)
            else:
                responder_name = post.get("name")
                if responder_name and responder_name not in seen_names:
                    seen_names.add(responder_name)
                    responders.append(BasicExpertInfo(display_name=responder_name))

            sections.append(
                TextSection(link=topic_url, text=parse_html_page_basic(post["cooked"]))
            )
        category_name = self.category_id_map.get(topic["category_id"], {}).get("name")

        metadata: dict[str, str | list[str]] = (
            {
                "category": category_name,
            }
            if category_name
            else {}
        )

        if topic.get("tags"):
            metadata["tags"] = topic["tags"]

        doc = Document(
            id="_".join([DocumentSource.DISCOURSE.value, str(topic["id"])]),
            sections=cast(list[TextSection | ImageSection], sections),
            source=DocumentSource.DISCOURSE,
            semantic_identifier=topic["title"],
            doc_updated_at=time_str_to_utc(topic["last_posted_at"]),
            primary_owners=[poster] if poster else None,
            secondary_owners=responders or None,
            metadata=metadata,
        )
        return doc

    def _get_latest_topics(
        self, start: datetime | None, end: datetime | None, page: int
    ) -> list[int]:
        assert self.permissions is not None
        topic_ids = []

        if not self.categories:
            latest_endpoint = urllib.parse.urljoin(
                self.base_url, f"latest.json?page={page}"
            )
            response = self._make_request(endpoint=latest_endpoint)
            topics = response.json()["topic_list"]["topics"]

        else:
            topics = []
            empty_categories = []

            for category_id, category_dict in self.category_id_map.items():
                category_endpoint = urllib.parse.urljoin(
                    self.base_url,
                    f"c/{category_dict['slug']}/{category_id}.json?page={page}&sys=latest",
                )
                response = self._make_request(endpoint=category_endpoint)
                new_topics = response.json()["topic_list"]["topics"]

                if len(new_topics) == 0:
                    empty_categories.append(category_id)
                topics.extend(new_topics)

            for empty_category in empty_categories:
                self.category_id_map.pop(empty_category)

        for topic in topics:
            last_time = topic.get("last_posted_at")
            if not last_time:
                continue

            last_time_dt = time_str_to_utc(last_time)
            if (start and start > last_time_dt) or (end and end < last_time_dt):
                continue

            topic_ids.append(topic["id"])

        return topic_ids

    def _yield_discourse_documents(
        self,
        start: datetime,
        end: datetime,
    ) -> GenerateDocumentsOutput:
        page = 0
        while topic_ids := self._get_latest_topics(start, end, page):
            doc_batch: list[Document | HierarchyNode] = []
            for topic_id in topic_ids:
                doc_batch.append(self._get_doc_from_topic(topic_id))
                if len(doc_batch) >= self.batch_size:
                    yield doc_batch
                    doc_batch = []

            if doc_batch:
                yield doc_batch
            page += 1

    def load_credentials(
        self,
        credentials: dict[str, Any],
    ) -> dict[str, Any] | None:
        self.permissions = DiscoursePerms(
            api_key=credentials["discourse_api_key"],
            api_username=credentials["discourse_api_username"],
        )
        return None

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        if self.permissions is None:
            raise ConnectorMissingCredentialError("Discourse")

        start_datetime = datetime.utcfromtimestamp(start).replace(tzinfo=timezone.utc)
        end_datetime = datetime.utcfromtimestamp(end).replace(tzinfo=timezone.utc)

        self._get_categories_map()

        yield from self._yield_discourse_documents(start_datetime, end_datetime)


if __name__ == "__main__":
    import os

    connector = DiscourseConnector(base_url=os.environ["DISCOURSE_BASE_URL"])
    connector.load_credentials(
        {
            "discourse_api_key": os.environ["DISCOURSE_API_KEY"],
            "discourse_api_username": os.environ["DISCOURSE_API_USERNAME"],
        }
    )

    current = time.time()
    one_year_ago = current - 24 * 60 * 60 * 360
    latest_docs = connector.poll_source(one_year_ago, current)
    print(next(latest_docs))


================================================
FILE: backend/onyx/connectors/document360/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/document360/connector.py
================================================
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import List
from typing import Optional

import requests

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.document360.utils import flatten_child_categories
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.utils.retry_wrapper import retry_builder

# Limitations and Potential Improvements
# 1. The "Categories themselves contain potentially relevant information" but they're not pulled in
# 2. Only the HTML Articles are supported, Document360 also has a Markdown and "Block" format
# 3. The contents are not as cleaned up as other HTML connectors

DOCUMENT360_BASE_URL = "https://portal.document360.io"
DOCUMENT360_API_BASE_URL = "https://apihub.document360.io/v2"


class Document360Connector(LoadConnector, PollConnector):
    def __init__(
        self,
        workspace: str,
        categories: List[str] | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
        portal_id: Optional[str] = None,
        api_token: Optional[str] = None,
    ) -> None:
        self.portal_id = portal_id
        self.workspace = workspace
        self.categories = categories
        self.batch_size = batch_size
        self.api_token = api_token

    def load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]:
        self.api_token = credentials.get("document360_api_token")
        self.portal_id = credentials.get("portal_id")
        return None

    # rate limiting set based on the enterprise plan: https://apidocs.document360.com/apidocs/rate-limiting
    # NOTE: retry will handle cases where user is not on enterprise plan - we will just hit the rate limit
    # and then retry after a period
    @retry_builder()
    @rate_limit_builder(max_calls=100, period=60)
    def _make_request(self, endpoint: str, params: Optional[dict] = None) -> Any:
        if not self.api_token:
            raise ConnectorMissingCredentialError("Document360")

        headers = {"accept": "application/json", "api_token": self.api_token}

        response = requests.get(
            f"{DOCUMENT360_API_BASE_URL}/{endpoint}", headers=headers, params=params
        )
        response.raise_for_status()

        return response.json()["data"]

    def _get_workspace_id_by_name(self) -> str:
        projects = self._make_request("ProjectVersions")
        workspace_id = next(
            (
                project["id"]
                for project in projects
                if project["version_code_name"] == self.workspace
            ),
            None,
        )
        if workspace_id is None:
            raise ValueError("Not able to find Workspace ID by the user provided name")

        return workspace_id

    def _get_articles_with_category(self, workspace_id: str) -> Any:
        all_categories = self._make_request(
            f"ProjectVersions/{workspace_id}/categories"
        )
        articles_with_category = []

        for category in all_categories:
            if not self.categories or category["name"] in self.categories:
                for article in category["articles"]:
                    articles_with_category.append(
                        {"id": article["id"], "category_name": category["name"]}
                    )
                for child_category in category["child_categories"]:
                    all_nested_categories = flatten_child_categories(child_category)
                    for nested_category in all_nested_categories:
                        for article in nested_category["articles"]:
                            articles_with_category.append(
                                {
                                    "id": article["id"],
                                    "category_name": nested_category["name"],
                                }
                            )

        return articles_with_category

    def _process_articles(
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        if self.api_token is None:
            raise ConnectorMissingCredentialError("Document360")

        workspace_id = self._get_workspace_id_by_name()
        articles = self._get_articles_with_category(workspace_id)

        doc_batch: List[Document | HierarchyNode] = []

        for article in articles:
            article_details = self._make_request(
                f"Articles/{article['id']}", {"langCode": "en"}
            )

            updated_at = datetime.strptime(
                article_details["modified_at"], "%Y-%m-%dT%H:%M:%S.%fZ"
            ).replace(tzinfo=timezone.utc)
            if start is not None and updated_at < start:
                continue
            if end is not None and updated_at > end:
                continue

            authors = [
                BasicExpertInfo(
                    display_name=author.get("name"), email=author["email_id"]
                )
                for author in article_details.get("authors", [])
                if author["email_id"]
            ]

            doc_link = (
                article_details["url"]
                if article_details.get("url")
                else f"{DOCUMENT360_BASE_URL}/{self.portal_id}/document/v1/view/{article['id']}"
            )

            html_content = article_details["html_content"]
            article_content = (
                parse_html_page_basic(html_content) if html_content is not None else ""
            )
            doc_text = (
                f"{article_details.get('description', '')}\n{article_content}".strip()
            )

            document = Document(
                id=article_details["id"],
                sections=[TextSection(link=doc_link, text=doc_text)],
                source=DocumentSource.DOCUMENT360,
                semantic_identifier=article_details["title"],
                doc_updated_at=updated_at,
                primary_owners=authors,
                metadata={
                    "workspace": self.workspace,
                    "category": article["category_name"],
                },
            )

            doc_batch.append(document)

            if len(doc_batch) >= self.batch_size:
                yield doc_batch
                doc_batch = []

        if doc_batch:
            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._process_articles()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
        return self._process_articles(start_datetime, end_datetime)


if __name__ == "__main__":
    import time
    import os

    document360_connector = Document360Connector(os.environ["DOCUMENT360_WORKSPACE"])
    document360_connector.load_credentials(
        {
            "portal_id": os.environ["DOCUMENT360_PORTAL_ID"],
            "document360_api_token": os.environ["DOCUMENT360_API_TOKEN"],
        }
    )

    current = time.time()
    one_year_ago = current - 24 * 60 * 60 * 360
    latest_docs = document360_connector.poll_source(one_year_ago, current)

    for doc in latest_docs:
        print(doc)


================================================
FILE: backend/onyx/connectors/document360/utils.py
================================================
def flatten_child_categories(category: dict) -> list[dict]:
    if not category["child_categories"]:
        return [category]
    else:
        flattened_categories = [category]
        for child_category in category["child_categories"]:
            flattened_categories.extend(flatten_child_categories(child_category))
        return flattened_categories


================================================
FILE: backend/onyx/connectors/dropbox/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/dropbox/connector.py
================================================
from datetime import timezone
from io import BytesIO
from typing import Any

from dropbox import Dropbox  # type: ignore[import-untyped]
from dropbox.exceptions import ApiError  # type: ignore[import-untyped]
from dropbox.exceptions import AuthError
from dropbox.files import FileMetadata  # type: ignore[import-untyped]
from dropbox.files import FolderMetadata

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialInvalidError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.utils.logger import setup_logger


logger = setup_logger()


class DropboxConnector(LoadConnector, PollConnector):
    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
        self.batch_size = batch_size
        self.dropbox_client: Dropbox | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.dropbox_client = Dropbox(credentials["dropbox_access_token"])
        return None

    def _download_file(self, path: str) -> bytes:
        """Download a single file from Dropbox."""
        if self.dropbox_client is None:
            raise ConnectorMissingCredentialError("Dropbox")
        _, resp = self.dropbox_client.files_download(path)
        return resp.content

    def _get_shared_link(self, path: str) -> str:
        """Create a shared link for a file in Dropbox."""
        if self.dropbox_client is None:
            raise ConnectorMissingCredentialError("Dropbox")

        try:
            # Check if a shared link already exists
            shared_links = self.dropbox_client.sharing_list_shared_links(path=path)
            if shared_links.links:
                return shared_links.links[0].url

            link_metadata = (
                self.dropbox_client.sharing_create_shared_link_with_settings(path)
            )
            return link_metadata.url
        except ApiError as err:
            logger.exception(f"Failed to create a shared link for {path}: {err}")
            return ""

    def _yield_files_recursive(
        self,
        path: str,
        start: SecondsSinceUnixEpoch | None,
        end: SecondsSinceUnixEpoch | None,
    ) -> GenerateDocumentsOutput:
        """Yield files in batches from a specified Dropbox folder, including subfolders."""
        if self.dropbox_client is None:
            raise ConnectorMissingCredentialError("Dropbox")

        result = self.dropbox_client.files_list_folder(
            path,
            limit=self.batch_size,
            recursive=False,
            include_non_downloadable_files=False,
        )

        while True:
            batch: list[Document | HierarchyNode] = []
            for entry in result.entries:
                if isinstance(entry, FileMetadata):
                    modified_time = entry.client_modified
                    if modified_time.tzinfo is None:
                        # If no timezone info, assume it is UTC
                        modified_time = modified_time.replace(tzinfo=timezone.utc)
                    else:
                        # If not in UTC, translate it
                        modified_time = modified_time.astimezone(timezone.utc)

                    time_as_seconds = int(modified_time.timestamp())
                    if start and time_as_seconds < start:
                        continue
                    if end and time_as_seconds > end:
                        continue

                    downloaded_file = self._download_file(entry.path_display)
                    link = self._get_shared_link(entry.path_display)
                    try:
                        text = extract_file_text(
                            BytesIO(downloaded_file),
                            file_name=entry.name,
                            break_on_unprocessable=False,
                        )
                        batch.append(
                            Document(
                                id=f"doc:{entry.id}",
                                sections=[TextSection(link=link, text=text)],
                                source=DocumentSource.DROPBOX,
                                semantic_identifier=entry.name,
                                doc_updated_at=modified_time,
                                metadata={"type": "article"},
                            )
                        )
                    except Exception as e:
                        logger.exception(
                            f"Error decoding file {entry.path_display} as utf-8 error occurred: {e}"
                        )

                elif isinstance(entry, FolderMetadata):
                    yield from self._yield_files_recursive(entry.path_lower, start, end)

            if batch:
                yield batch

            if not result.has_more:
                break

            result = self.dropbox_client.files_list_folder_continue(result.cursor)

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self.poll_source(None, None)

    def poll_source(
        self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
    ) -> GenerateDocumentsOutput:
        if self.dropbox_client is None:
            raise ConnectorMissingCredentialError("Dropbox")

        for batch in self._yield_files_recursive("", start, end):
            yield batch

        return None

    def validate_connector_settings(self) -> None:
        if self.dropbox_client is None:
            raise ConnectorMissingCredentialError("Dropbox credentials not loaded.")

        try:
            self.dropbox_client.files_list_folder(path="", limit=1)
        except AuthError as e:
            logger.exception("Failed to validate Dropbox credentials")
            raise CredentialInvalidError(f"Dropbox credential is invalid: {e.error}")
        except ApiError as e:
            if (
                e.error is not None
                and "insufficient_permissions" in str(e.error).lower()
            ):
                raise InsufficientPermissionsError(
                    "Your Dropbox token does not have sufficient permissions."
                )
            raise ConnectorValidationError(
                f"Unexpected Dropbox error during validation: {e.user_message_text or e}"
            )
        except Exception as e:
            raise Exception(f"Unexpected error during Dropbox settings validation: {e}")


if __name__ == "__main__":
    import os

    connector = DropboxConnector()
    connector.load_credentials(
        {
            "dropbox_access_token": os.environ["DROPBOX_ACCESS_TOKEN"],
        }
    )
    document_batches = connector.load_from_state()
    print(next(document_batches))


================================================
FILE: backend/onyx/connectors/drupal_wiki/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/drupal_wiki/connector.py
================================================
import mimetypes
from io import BytesIO
from typing import Any

import requests
from typing_extensions import override

from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from onyx.configs.app_configs import DRUPAL_WIKI_ATTACHMENT_SIZE_THRESHOLD
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FileOrigin
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    datetime_from_utc_timestamp,
)
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import rate_limit_builder
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import rl_requests
from onyx.connectors.drupal_wiki.models import DrupalWikiCheckpoint
from onyx.connectors.drupal_wiki.models import DrupalWikiPage
from onyx.connectors.drupal_wiki.models import DrupalWikiPageResponse
from onyx.connectors.drupal_wiki.models import DrupalWikiSpaceResponse
from onyx.connectors.drupal_wiki.utils import build_drupal_wiki_document_id
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import ConnectorFailure
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import extract_text_and_images
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.file_processing.image_utils import store_image_and_create_section
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()

MAX_API_PAGE_SIZE = 2000  # max allowed by API
DRUPAL_WIKI_SPACE_KEY = "space"


rate_limited_get = retry_builder()(
    rate_limit_builder(max_calls=10, period=1)(rl_requests.get)
)


class DrupalWikiConnector(
    CheckpointedConnector[DrupalWikiCheckpoint],
    SlimConnector,
):
    # Deprecated parameters that may exist in old connector configurations
    _DEPRECATED_PARAMS = {"drupal_wiki_scope", "include_all_spaces"}

    def __init__(
        self,
        base_url: str,
        spaces: list[str] | None = None,
        pages: list[str] | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
        continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,
        include_attachments: bool = False,
        allow_images: bool = False,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the Drupal Wiki connector.

        Args:
            base_url: The base URL of the Drupal Wiki instance (e.g., https://help.drupal-wiki.com)
            spaces: List of space IDs to index. If None and pages is also None, all spaces will be indexed.
            pages: List of page IDs to index. If provided, these specific pages will be indexed.
            batch_size: Number of documents to process in a batch.
            continue_on_failure: If True, continue indexing even if some documents fail.
            include_attachments: If True, enable processing of page attachments including images and documents.
            allow_images: If True, enable processing of image attachments.
        """

        #########################################################
        # TODO: Remove this after 02/01/2026 and remove **kwargs from the function signature
        # Check for deprecated parameters from old connector configurations
        # If attempting to update without deleting the connector:
        # Remove the deprecated parameters from the custom_connector_config in the relevant connector table rows
        deprecated_found = set(kwargs.keys()) & self._DEPRECATED_PARAMS
        if deprecated_found:
            raise ConnectorValidationError(
                f"Outdated Drupal Wiki connector configuration detected "
                f"(found deprecated parameters: {', '.join(deprecated_found)}). "
                f"Please delete and recreate this connector, or contact Onyx support "
                f"for assistance with updating the configuration without deleting the connector."
            )
        # Reject any other unexpected parameters
        if kwargs:
            raise ConnectorValidationError(
                f"Unexpected parameters for Drupal Wiki connector: {', '.join(kwargs.keys())}"
            )
        #########################################################

        self.base_url = base_url.rstrip("/")
        self.spaces = spaces or []
        self.pages = pages or []

        # If no specific spaces or pages are provided, index all spaces
        self.include_all_spaces = not self.spaces and not self.pages

        self.batch_size = batch_size
        self.continue_on_failure = continue_on_failure

        # Attachment processing configuration
        self.include_attachments = include_attachments
        self.allow_images = allow_images

        self.headers: dict[str, str] = {"Accept": "application/json"}
        self._api_token: str | None = None  # set by load_credentials

    def set_allow_images(self, value: bool) -> None:
        logger.info(f"Setting allow_images to {value}.")
        self.allow_images = value

    def _get_page_attachments(self, page_id: int) -> list[dict[str, Any]]:
        """
        Get all attachments for a specific page.

        Args:
            page_id: ID of the page.

        Returns:
            List of attachment dictionaries.
        """
        url = f"{self.base_url}/api/rest/scope/api/attachment"
        params = {"pageId": str(page_id)}
        logger.debug(f"Fetching attachments for page {page_id} from {url}")

        try:
            response = rate_limited_get(url, headers=self.headers, params=params)
            response.raise_for_status()
            attachments = response.json()
            logger.info(f"Found {len(attachments)} attachments for page {page_id}")
            return attachments
        except Exception as e:
            logger.warning(f"Failed to fetch attachments for page {page_id}: {e}")
            return []

    def _download_attachment(self, attachment_id: int) -> bytes:
        """
        Download attachment content.

        Args:
            attachment_id: ID of the attachment to download.

        Returns:
            Raw bytes of the attachment.
        """
        url = f"{self.base_url}/api/rest/scope/api/attachment/{attachment_id}/download"
        logger.info(f"Downloading attachment {attachment_id} from {url}")

        # Use headers without Accept for binary downloads
        download_headers = {"Authorization": f"Bearer {self._api_token}"}

        response = rate_limited_get(url, headers=download_headers)
        response.raise_for_status()

        return response.content

    def _validate_attachment_filetype(self, attachment: dict[str, Any]) -> bool:
        """
        Validate if the attachment file type is supported.

        Args:
            attachment: Attachment dictionary from Drupal Wiki API.

        Returns:
            True if the file type is supported, False otherwise.
        """
        file_name = attachment.get("fileName", "")
        if not file_name:
            return False

        # Get file extension
        file_extension = get_file_ext(file_name)

        if file_extension in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS:
            return True

        logger.warning(f"Unsupported file type: {file_extension} for {file_name}")
        return False

    def _get_media_type_from_filename(self, filename: str) -> str:
        """
        Get media type from filename using the standard mimetypes library.

        Args:
            filename: The filename.

        Returns:
            Media type string.
        """
        mime_type, _encoding = mimetypes.guess_type(filename)
        return mime_type or "application/octet-stream"

    def _process_attachment(
        self,
        attachment: dict[str, Any],
        page_id: int,
        download_url: str,
    ) -> tuple[list[TextSection | ImageSection], str | None]:
        """
        Process a single attachment and return generated sections.

        Args:
            attachment: Attachment dictionary from Drupal Wiki API.
            page_id: ID of the parent page.
            download_url: Direct download URL for the attachment.

        Returns:
            Tuple of (sections, error_message). If error_message is not None, the
            sections list should be treated as invalid.
        """
        sections: list[TextSection | ImageSection] = []

        try:
            if not self._validate_attachment_filetype(attachment):
                return (
                    [],
                    f"Unsupported file type: {attachment.get('fileName', 'unknown')}",
                )

            attachment_id = attachment["id"]
            file_name = attachment.get("fileName", f"attachment_{attachment_id}")
            file_size = attachment.get("fileSize", 0)
            media_type = self._get_media_type_from_filename(file_name)

            if file_size > DRUPAL_WIKI_ATTACHMENT_SIZE_THRESHOLD:
                return [], f"Attachment too large: {file_size} bytes"

            try:
                raw_bytes = self._download_attachment(attachment_id)
            except Exception as e:
                return [], f"Failed to download attachment: {e}"

            if media_type.startswith("image/"):
                if not self.allow_images:
                    logger.info(
                        f"Skipping image attachment {file_name} because allow_images is False",
                    )
                    return [], None

                try:
                    image_section, _ = store_image_and_create_section(
                        image_data=raw_bytes,
                        file_id=str(attachment_id),
                        display_name=attachment.get(
                            "name", attachment.get("fileName", "Unknown")
                        ),
                        link=download_url,
                        media_type=media_type,
                        file_origin=FileOrigin.CONNECTOR,
                    )
                    sections.append(image_section)
                    logger.debug(f"Stored image attachment with file name: {file_name}")
                except Exception as e:
                    return [], f"Image storage failed: {e}"

                return sections, None

            image_counter = 0

            def _store_embedded_image(image_data: bytes, image_name: str) -> None:
                nonlocal image_counter

                if not self.allow_images:
                    return

                media_for_image = self._get_media_type_from_filename(image_name)
                if media_for_image == "application/octet-stream":
                    try:
                        media_for_image = get_image_type_from_bytes(image_data)
                    except ValueError:
                        logger.warning(
                            f"Unable to determine media type for embedded image {image_name} on attachment {file_name}"
                        )

                image_counter += 1
                display_name = (
                    image_name
                    or f"{attachment.get('name', file_name)} - embedded image {image_counter}"
                )

                try:
                    image_section, _ = store_image_and_create_section(
                        image_data=image_data,
                        file_id=f"{attachment_id}_embedded_{image_counter}",
                        display_name=display_name,
                        link=download_url,
                        media_type=media_for_image,
                        file_origin=FileOrigin.CONNECTOR,
                    )
                    sections.append(image_section)
                except Exception as err:
                    logger.warning(
                        f"Failed to store embedded image {image_name or image_counter} for attachment {file_name}: {err}"
                    )

            extraction_result = extract_text_and_images(
                file=BytesIO(raw_bytes),
                file_name=file_name,
                content_type=media_type,
                image_callback=_store_embedded_image if self.allow_images else None,
            )

            text_content = extraction_result.text_content.strip()
            if text_content:
                sections.insert(0, TextSection(text=text_content, link=download_url))
                logger.info(
                    f"Extracted {len(text_content)} characters from {file_name}"
                )
            elif not sections:
                return [], f"No text extracted for {file_name}"

            return sections, None

        except Exception as e:
            logger.error(
                f"Failed to process attachment {attachment.get('name', 'unknown')} on page {page_id}: {e}"
            )
            return [], f"Failed to process attachment: {e}"

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        """
        Load credentials for the Drupal Wiki connector.

        Args:
            credentials: Dictionary containing the API token.

        Returns:
            None
        """

        api_token = credentials.get("drupal_wiki_api_token", "").strip()

        if not api_token:
            raise ConnectorValidationError(
                "API token is required for Drupal Wiki connector"
            )

        self._api_token = api_token
        self.headers.update(
            {
                "Authorization": f"Bearer {api_token}",
            }
        )

        return None

    def _get_space_ids(self) -> list[int]:
        """
        Get all space IDs from the Drupal Wiki instance.

        Returns:
            List of space IDs (deduplicated). The list is sorted to be deterministic.
        """
        url = f"{self.base_url}/api/rest/scope/api/space"
        size = MAX_API_PAGE_SIZE
        page = 0
        all_space_ids: set[int] = set()
        has_more = True
        last_num_ids = -1

        while has_more and len(all_space_ids) > last_num_ids:
            last_num_ids = len(all_space_ids)
            params = {"size": size, "page": page}
            logger.debug(f"Fetching spaces from {url} (page={page}, size={size})")
            response = rate_limited_get(url, headers=self.headers, params=params)
            response.raise_for_status()
            resp_json = response.json()
            space_response = DrupalWikiSpaceResponse.model_validate(resp_json)

            logger.info(f"Fetched {len(space_response.content)} spaces from {page}")
            # Collect ids into the set to deduplicate
            for space in space_response.content:
                all_space_ids.add(space.id)

            # Continue if we got a full page, indicating there might be more
            has_more = len(space_response.content) >= size

            page += 1

        # Return a deterministic, sorted list of ids
        space_id_list = list(sorted(all_space_ids))
        logger.debug(f"Total spaces fetched: {len(space_id_list)}")
        return space_id_list

    def _get_pages_for_space(
        self, space_id: int, modified_after: SecondsSinceUnixEpoch | None = None
    ) -> list[DrupalWikiPage]:
        """
        Get all pages for a specific space, optionally filtered by modification time.

        Args:
            space_id: ID of the space.
            modified_after: Only return pages modified after this timestamp (seconds since Unix epoch).

        Returns:
            List of DrupalWikiPage objects.
        """
        url = f"{self.base_url}/api/rest/scope/api/page"
        size = MAX_API_PAGE_SIZE
        page = 0
        all_pages = []
        has_more = True

        while has_more:
            params: dict[str, str | int] = {
                DRUPAL_WIKI_SPACE_KEY: str(space_id),
                "size": size,
                "page": page,
            }

            # Add modifiedAfter parameter if provided
            if modified_after is not None:
                params["modifiedAfter"] = int(modified_after)

            logger.debug(
                f"Fetching pages for space {space_id} from {url} ({page=}, {size=}, {modified_after=})"
            )
            response = rate_limited_get(url, headers=self.headers, params=params)
            response.raise_for_status()
            resp_json = response.json()

            try:
                page_response = DrupalWikiPageResponse.model_validate(resp_json)
            except Exception as e:
                logger.error(f"Failed to validate Drupal Wiki page response: {e}")
                raise ConnectorValidationError(f"Invalid API response format: {e}")

            logger.info(
                f"Fetched {len(page_response.content)} pages in space {space_id} (page={page})"
            )

            # Pydantic should automatically parse content items as DrupalWikiPage objects
            # If validation fails, it will raise an exception which we should catch
            all_pages.extend(page_response.content)

            # Continue if we got a full page, indicating there might be more
            has_more = len(page_response.content) >= size

            page += 1

        logger.debug(f"Total pages fetched for space {space_id}: {len(all_pages)}")
        return all_pages

    def _get_page_content(self, page_id: int) -> DrupalWikiPage:
        """
        Get the content of a specific page.

        Args:
            page_id: ID of the page.

        Returns:
            DrupalWikiPage object.
        """
        url = f"{self.base_url}/api/rest/scope/api/page/{page_id}"
        response = rate_limited_get(url, headers=self.headers)
        response.raise_for_status()

        return DrupalWikiPage.model_validate(response.json())

    def _process_page(self, page: DrupalWikiPage) -> Document | ConnectorFailure:
        """
        Process a page and convert it to a Document.

        Args:
            page: DrupalWikiPage object.

        Returns:
            Document object or ConnectorFailure.
        """
        try:
            # Extract text from HTML, handle None body
            text_content = parse_html_page_basic(page.body or "")

            # Ensure text_content is a string, not None
            if text_content is None:
                text_content = ""

            # Create document URL
            page_url = build_drupal_wiki_document_id(self.base_url, page.id)

            # Create sections with just the page content
            sections: list[TextSection | ImageSection] = [
                TextSection(text=text_content, link=page_url)
            ]

            # Only process attachments if self.include_attachments is True
            if self.include_attachments:
                attachments = self._get_page_attachments(page.id)
                for attachment in attachments:
                    logger.info(
                        f"Processing attachment: {attachment.get('name', 'Unknown')} (ID: {attachment['id']})"
                    )
                    # Use downloadUrl from API; fallback to page URL
                    raw_download = attachment.get("downloadUrl")
                    if raw_download:
                        download_url = (
                            raw_download
                            if raw_download.startswith("http")
                            else f"{self.base_url.rstrip('/')}" + raw_download
                        )
                    else:
                        download_url = page_url
                    # Process the attachment
                    attachment_sections, error = self._process_attachment(
                        attachment, page.id, download_url
                    )
                    if error:
                        logger.warning(
                            f"Error processing attachment {attachment.get('name', 'Unknown')}: {error}"
                        )
                        continue

                    if attachment_sections:
                        sections.extend(attachment_sections)
                        logger.debug(
                            f"Added {len(attachment_sections)} section(s) for attachment {attachment.get('name', 'Unknown')}"
                        )

            # Create metadata
            metadata: dict[str, str | list[str]] = {
                "space_id": str(page.homeSpace),
                "page_id": str(page.id),
                "type": page.type,
            }

            # Create document
            return Document(
                id=page_url,
                sections=sections,
                source=DocumentSource.DRUPAL_WIKI,
                semantic_identifier=page.title,
                metadata=metadata,
                doc_updated_at=datetime_from_utc_timestamp(page.lastModified),
            )
        except Exception as e:
            logger.error(f"Error processing page {page.id}: {e}")
            return ConnectorFailure(
                failed_document=DocumentFailure(
                    document_id=str(page.id),
                    document_link=build_drupal_wiki_document_id(self.base_url, page.id),
                ),
                failure_message=f"Error processing page {page.id}: {e}",
                exception=e,
            )

    @override
    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: DrupalWikiCheckpoint,
    ) -> CheckpointOutput[DrupalWikiCheckpoint]:
        """
        Load documents from a checkpoint.

        Args:
            start: Start time as seconds since Unix epoch.
            end: End time as seconds since Unix epoch.
            checkpoint: Checkpoint to resume from.

        Returns:
            Generator yielding documents and the updated checkpoint.
        """
        # Ensure page_ids is not None
        if checkpoint.page_ids is None:
            checkpoint.page_ids = []

        # Initialize page_ids from self.pages if not already set
        if not checkpoint.page_ids and self.pages:
            logger.info(f"Initializing page_ids from self.pages: {self.pages}")
            checkpoint.page_ids = [int(page_id.strip()) for page_id in self.pages]

        # Ensure spaces is not None
        if checkpoint.spaces is None:
            checkpoint.spaces = []

        while checkpoint.current_page_id_index < len(checkpoint.page_ids):
            page_id = checkpoint.page_ids[checkpoint.current_page_id_index]
            logger.debug(f"Processing page ID: {page_id}")

            try:
                # Get the page content directly
                page = self._get_page_content(page_id)

                # Skip pages outside the time range
                if not self._is_page_in_time_range(page.lastModified, start, end):
                    logger.info(f"Skipping page {page_id} - outside time range")
                    checkpoint.current_page_id_index += 1
                    continue

                # Process the page
                doc_or_failure = self._process_page(page)
                yield doc_or_failure

            except Exception as e:
                logger.error(f"Error processing page ID {page_id}: {e}")
                yield ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=str(page_id),
                        document_link=build_drupal_wiki_document_id(
                            self.base_url, page_id
                        ),
                    ),
                    failure_message=f"Error processing page ID {page_id}: {e}",
                    exception=e,
                )

            # Move to the next page ID
            checkpoint.current_page_id_index += 1

        # TODO: The main benefit of CheckpointedConnectors is that they can "save their work"
        # by storing a checkpoint so transient errors are easy to recover from: simply resume
        # from the last checkpoint. The way to get checkpoints saved is to return them somewhere
        # in the middle of this function. The guarantee our checkpointing system gives to you,
        # the connector implementer, is that when you return a checkpoint, this connector will
        # at a later time (generally within a few seconds) call the load_from_checkpoint function
        # again with the checkpoint you last returned as long as has_more=True.

        # Process spaces if include_all_spaces is True or spaces are provided
        if self.include_all_spaces or self.spaces:
            # If include_all_spaces is True, always fetch all spaces
            if self.include_all_spaces:
                logger.info("Fetching all spaces")
                # Fetch all spaces
                all_space_ids = self._get_space_ids()
                # checkpoint.spaces expects a list of ints; assign returned list
                checkpoint.spaces = all_space_ids
                logger.info(f"Found {len(checkpoint.spaces)} spaces to process")
            # Otherwise, use provided spaces if checkpoint is empty
            elif not checkpoint.spaces:
                logger.info(f"Using provided spaces: {self.spaces}")
                # Use provided spaces
                checkpoint.spaces = [int(space_id.strip()) for space_id in self.spaces]

            # Process spaces from the checkpoint
            while checkpoint.current_space_index < len(checkpoint.spaces):
                space_id = checkpoint.spaces[checkpoint.current_space_index]
                logger.debug(f"Processing space ID: {space_id}")

                # Get pages for the current space, filtered by start time if provided
                pages = self._get_pages_for_space(space_id, modified_after=start)

                # Process pages from the checkpoint
                while checkpoint.current_page_index < len(pages):
                    page = pages[checkpoint.current_page_index]
                    logger.debug(f"Processing page: {page.title} (ID: {page.id})")

                    # For space-based pages, we already filtered by modifiedAfter in the API call
                    # Only need to check the end time boundary
                    if end and page.lastModified >= end:
                        logger.info(
                            f"Skipping page {page.id} - outside time range (after end)"
                        )
                        checkpoint.current_page_index += 1
                        continue

                    # Process the page
                    doc_or_failure = self._process_page(page)
                    yield doc_or_failure

                    # Move to the next page
                    checkpoint.current_page_index += 1

                # Move to the next space
                checkpoint.current_space_index += 1
                checkpoint.current_page_index = 0

        # All spaces and pages processed
        logger.info("Finished processing all spaces and pages")
        checkpoint.has_more = False
        return checkpoint

    @override
    def build_dummy_checkpoint(self) -> DrupalWikiCheckpoint:
        """
        Build a dummy checkpoint.

        Returns:
            DrupalWikiCheckpoint with default values.
        """
        return DrupalWikiCheckpoint(
            has_more=True,
            current_space_index=0,
            current_page_index=0,
            current_page_id_index=0,
            spaces=[],
            page_ids=[],
            is_processing_specific_pages=False,
        )

    @override
    def validate_checkpoint_json(self, checkpoint_json: str) -> DrupalWikiCheckpoint:
        """
        Validate a checkpoint JSON string.

        Args:
            checkpoint_json: JSON string representing a checkpoint.

        Returns:
            Validated DrupalWikiCheckpoint.
        """
        return DrupalWikiCheckpoint.model_validate_json(checkpoint_json)

    # TODO: unify approach with load_from_checkpoint.
    # Ideally slim retrieval shares a lot of the same code with non-slim
    # and we pass in a param is_slim to the main helper function
    # that does the retrieval.
    @override
    def retrieve_all_slim_docs(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        """
        Retrieve all slim documents.

        Args:
            start: Start time as seconds since Unix epoch.
            end: End time as seconds since Unix epoch.
            callback: Callback for indexing heartbeat.

        Returns:
            Generator yielding batches of SlimDocument objects.
        """
        slim_docs: list[SlimDocument | HierarchyNode] = []
        logger.info(
            f"Starting retrieve_all_slim_docs with include_all_spaces={self.include_all_spaces}, spaces={self.spaces}"
        )

        # Process specific page IDs if provided
        if self.pages:
            logger.info(f"Processing specific pages: {self.pages}")
            for page_id in self.pages:
                try:
                    # Get the page content directly
                    page_content = self._get_page_content(int(page_id.strip()))

                    # Skip pages outside the time range
                    if not self._is_page_in_time_range(
                        page_content.lastModified, start, end
                    ):
                        logger.info(f"Skipping page {page_id} - outside time range")
                        continue

                    # Create slim document for the page
                    page_url = build_drupal_wiki_document_id(
                        self.base_url, page_content.id
                    )
                    slim_docs.append(
                        SlimDocument(
                            id=page_url,
                        )
                    )
                    logger.debug(f"Added slim document for page {page_content.id}")

                    # Process attachments for this page
                    attachments = self._get_page_attachments(page_content.id)
                    for attachment in attachments:
                        if self._validate_attachment_filetype(attachment):
                            attachment_url = f"{page_url}#attachment-{attachment['id']}"
                            slim_docs.append(
                                SlimDocument(
                                    id=attachment_url,
                                )
                            )
                            logger.debug(
                                f"Added slim document for attachment {attachment['id']}"
                            )

                    # Yield batch if it reaches the batch size
                    if len(slim_docs) >= self.batch_size:
                        logger.debug(
                            f"Yielding batch of {len(slim_docs)} slim documents"
                        )
                        yield slim_docs
                        slim_docs = []

                        if callback and callback.should_stop():
                            return
                        if callback:
                            callback.progress("retrieve_all_slim_docs", 1)

                except Exception as e:
                    logger.error(
                        f"Error processing page ID {page_id} for slim documents: {e}"
                    )

        # Process spaces if include_all_spaces is True or spaces are provided
        if self.include_all_spaces or self.spaces:
            logger.info("Processing spaces for slim documents")
            # Get spaces to process
            spaces_to_process = []
            if self.include_all_spaces:
                logger.info("Fetching all spaces for slim documents")
                # Fetch all spaces
                all_space_ids = self._get_space_ids()
                spaces_to_process = all_space_ids
                logger.info(f"Found {len(spaces_to_process)} spaces to process")
            else:
                logger.info(f"Using provided spaces: {self.spaces}")
                # Use provided spaces
                spaces_to_process = [int(space_id.strip()) for space_id in self.spaces]

            # Process each space
            for space_id in spaces_to_process:
                logger.info(f"Processing space ID: {space_id}")
                # Get pages for the current space, filtered by start time if provided
                pages = self._get_pages_for_space(space_id, modified_after=start)

                # Process each page
                for page in pages:
                    logger.debug(f"Processing page: {page.title} (ID: {page.id})")
                    # Skip pages outside the time range
                    if end and page.lastModified >= end:
                        logger.info(
                            f"Skipping page {page.id} - outside time range (after end)"
                        )
                        continue

                    # Create slim document for the page
                    page_url = build_drupal_wiki_document_id(self.base_url, page.id)
                    slim_docs.append(
                        SlimDocument(
                            id=page_url,
                        )
                    )
                    logger.info(f"Added slim document for page {page.id}")

                    # Process attachments for this page
                    attachments = self._get_page_attachments(page.id)
                    for attachment in attachments:
                        if self._validate_attachment_filetype(attachment):
                            attachment_url = f"{page_url}#attachment-{attachment['id']}"
                            slim_docs.append(
                                SlimDocument(
                                    id=attachment_url,
                                )
                            )
                            logger.info(
                                f"Added slim document for attachment {attachment['id']}"
                            )

                    # Yield batch if it reaches the batch size
                    if len(slim_docs) >= self.batch_size:
                        logger.info(
                            f"Yielding batch of {len(slim_docs)} slim documents"
                        )
                        yield slim_docs
                        slim_docs = []

                        if callback and callback.should_stop():
                            return
                        if callback:
                            callback.progress("retrieve_all_slim_docs", 1)

        # Yield remaining documents
        if slim_docs:
            logger.debug(f"Yielding final batch of {len(slim_docs)} slim documents")
            yield slim_docs

    def validate_connector_settings(self) -> None:
        """
        Validate the connector settings.

        Raises:
            ConnectorValidationError: If the settings are invalid.
        """
        if not self.headers:
            raise ConnectorMissingCredentialError("Drupal Wiki")

        try:
            # Try to fetch spaces to validate the connection
            # Call the new helper which returns the list of space ids
            self._get_space_ids()
        except requests.exceptions.RequestException as e:
            raise ConnectorValidationError(f"Failed to connect to Drupal Wiki: {e}")

    def _is_page_in_time_range(
        self,
        last_modified: int,
        start: SecondsSinceUnixEpoch | None,
        end: SecondsSinceUnixEpoch | None,
    ) -> bool:
        """
        Check if a page's last modified timestamp falls within the specified time range.

        Args:
            last_modified: The page's last modified timestamp.
            start: Start time as seconds since Unix epoch (inclusive).
            end: End time as seconds since Unix epoch (exclusive).

        Returns:
            True if the page is within the time range, False otherwise.
        """
        return (not start or last_modified >= start) and (
            not end or last_modified < end
        )


================================================
FILE: backend/onyx/connectors/drupal_wiki/models.py
================================================
from enum import Enum
from typing import Generic
from typing import List
from typing import Optional
from typing import TypeVar

from pydantic import BaseModel

from onyx.connectors.interfaces import ConnectorCheckpoint


class SpaceAccessStatus(str, Enum):
    """Enum for Drupal Wiki space access status"""

    PRIVATE = "PRIVATE"
    ANONYMOUS = "ANONYMOUS"
    AUTHENTICATED = "AUTHENTICATED"


class DrupalWikiSpace(BaseModel):
    """Model for a Drupal Wiki space"""

    id: int
    name: str
    type: str
    description: Optional[str] = None
    accessStatus: Optional[SpaceAccessStatus] = None
    color: Optional[str] = None


class DrupalWikiPage(BaseModel):
    """Model for a Drupal Wiki page"""

    id: int
    title: str
    homeSpace: int
    lastModified: int
    type: str
    body: Optional[str] = None


T = TypeVar("T")


class DrupalWikiBaseResponse(BaseModel, Generic[T]):
    """Base model for Drupal Wiki API responses"""

    totalPages: int
    totalElements: int
    size: int
    content: List[T]
    number: int
    first: bool
    last: bool
    numberOfElements: int
    empty: bool


class DrupalWikiSpaceResponse(DrupalWikiBaseResponse[DrupalWikiSpace]):
    """Model for the response from the Drupal Wiki spaces API"""


class DrupalWikiPageResponse(DrupalWikiBaseResponse[DrupalWikiPage]):
    """Model for the response from the Drupal Wiki pages API"""


class DrupalWikiCheckpoint(ConnectorCheckpoint):
    """Checkpoint for the Drupal Wiki connector"""

    current_space_index: int = 0
    current_page_index: int = 0
    current_page_id_index: int = 0
    spaces: List[int] = []
    page_ids: List[int] = []
    is_processing_specific_pages: bool = False


================================================
FILE: backend/onyx/connectors/drupal_wiki/utils.py
================================================
from onyx.utils.logger import setup_logger

logger = setup_logger()


def build_drupal_wiki_document_id(base_url: str, page_id: int) -> str:
    """Build a document ID for a Drupal Wiki page using the real URL format"""
    # Ensure base_url ends with a slash
    base_url = base_url.rstrip("/") + "/"
    return f"{base_url}node/{page_id}"


================================================
FILE: backend/onyx/connectors/egnyte/connector.py
================================================
import io
import os
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import IO
from urllib.parse import quote

from pydantic import Field

from onyx.configs.app_configs import EGNYTE_CLIENT_ID
from onyx.configs.app_configs import EGNYTE_CLIENT_SECRET
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_oauth_callback_uri,
)
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import OAuthConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import detect_encoding
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.extract_file_text import read_text_file
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import request_with_retries


logger = setup_logger()

_EGNYTE_API_BASE = "https://{domain}.egnyte.com/pubapi/v1"
_EGNYTE_APP_BASE = "https://{domain}.egnyte.com"


def _parse_last_modified(last_modified: str) -> datetime:
    return datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(
        tzinfo=timezone.utc
    )


def _process_egnyte_file(
    file_metadata: dict[str, Any],
    file_content: IO,
    base_url: str,
    folder_path: str | None = None,
) -> Document | None:
    """Process an Egnyte file into a Document object

    Args:
        file_data: The file data from Egnyte API
        file_content: The raw content of the file in bytes
        base_url: The base URL for the Egnyte instance
        folder_path: Optional folder path to filter results
    """
    # Skip if file path doesn't match folder path filter
    if folder_path and not file_metadata["path"].startswith(folder_path):
        raise ValueError(
            f"File path {file_metadata['path']} does not match folder path {folder_path}"
        )

    file_name = file_metadata["name"]
    extension = get_file_ext(file_name)

    # Explicitly excluding image extensions here. TODO: consider allowing images
    if extension not in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS:
        logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")
        return None

    # Extract text content based on file type
    # TODO @wenxi-onyx: convert to extract_text_and_images
    if extension in OnyxFileExtensions.PLAIN_TEXT_EXTENSIONS:
        encoding = detect_encoding(file_content)
        file_content_raw, file_metadata = read_text_file(
            file_content, encoding=encoding, ignore_onyx_metadata=False
        )
    else:
        file_content_raw = extract_file_text(
            file=file_content,
            file_name=file_name,
            break_on_unprocessable=True,
        )

    # Build the web URL for the file
    web_url = f"{base_url}/navigate/file/{file_metadata['group_id']}"

    # Create document metadata
    metadata: dict[str, str | list[str]] = {
        "file_path": file_metadata["path"],
        "last_modified": file_metadata.get("last_modified", ""),
    }

    # Add lock info if present
    if lock_info := file_metadata.get("lock_info"):
        metadata["lock_owner"] = (
            f"{lock_info.get('first_name', '')} {lock_info.get('last_name', '')}"
        )

    # Create the document owners
    primary_owner = None
    if uploaded_by := file_metadata.get("uploaded_by"):
        primary_owner = BasicExpertInfo(
            email=uploaded_by,  # Using username as email since that's what we have
        )

    # Create the document
    return Document(
        id=f"egnyte-{file_metadata['entry_id']}",
        sections=[TextSection(text=file_content_raw.strip(), link=web_url)],
        source=DocumentSource.EGNYTE,
        semantic_identifier=file_name,
        metadata=metadata,
        doc_updated_at=(
            _parse_last_modified(file_metadata["last_modified"])
            if "last_modified" in file_metadata
            else None
        ),
        primary_owners=[primary_owner] if primary_owner else None,
    )


class EgnyteConnector(LoadConnector, PollConnector, OAuthConnector):
    class AdditionalOauthKwargs(OAuthConnector.AdditionalOauthKwargs):
        egnyte_domain: str = Field(
            title="Egnyte Domain",
            description=(
                "The domain for the Egnyte instance (e.g. 'company' for company.egnyte.com)"
            ),
        )

    def __init__(
        self,
        folder_path: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.domain = ""  # will always be set in `load_credentials`
        self.folder_path = folder_path or ""  # Root folder if not specified
        self.batch_size = batch_size
        self.access_token: str | None = None

    @classmethod
    def oauth_id(cls) -> DocumentSource:
        return DocumentSource.EGNYTE

    @classmethod
    def oauth_authorization_url(
        cls,
        base_domain: str,
        state: str,
        additional_kwargs: dict[str, str],
    ) -> str:
        if not EGNYTE_CLIENT_ID:
            raise ValueError("EGNYTE_CLIENT_ID environment variable must be set")

        oauth_kwargs = cls.AdditionalOauthKwargs(**additional_kwargs)

        callback_uri = get_oauth_callback_uri(base_domain, "egnyte")
        return (
            f"https://{oauth_kwargs.egnyte_domain}.egnyte.com/puboauth/token"
            f"?client_id={EGNYTE_CLIENT_ID}"
            f"&redirect_uri={callback_uri}"
            f"&scope=Egnyte.filesystem"
            f"&state={state}"
            f"&response_type=code"
        )

    @classmethod
    def oauth_code_to_token(
        cls,
        base_domain: str,
        code: str,
        additional_kwargs: dict[str, str],
    ) -> dict[str, Any]:
        if not EGNYTE_CLIENT_ID:
            raise ValueError("EGNYTE_CLIENT_ID environment variable must be set")
        if not EGNYTE_CLIENT_SECRET:
            raise ValueError("EGNYTE_CLIENT_SECRET environment variable must be set")

        oauth_kwargs = cls.AdditionalOauthKwargs(**additional_kwargs)

        # Exchange code for token
        url = f"https://{oauth_kwargs.egnyte_domain}.egnyte.com/puboauth/token"
        redirect_uri = get_oauth_callback_uri(base_domain, "egnyte")

        data = {
            "client_id": EGNYTE_CLIENT_ID,
            "client_secret": EGNYTE_CLIENT_SECRET,
            "code": code,
            "grant_type": "authorization_code",
            "redirect_uri": redirect_uri,
            "scope": "Egnyte.filesystem",
        }
        headers = {"Content-Type": "application/x-www-form-urlencoded"}

        response = request_with_retries(
            method="POST",
            url=url,
            data=data,
            headers=headers,
            # try a lot faster since this is a realtime flow
            backoff=0,
            delay=0.1,
        )
        if not response.ok:
            raise RuntimeError(f"Failed to exchange code for token: {response.text}")

        token_data = response.json()
        return {
            "domain": oauth_kwargs.egnyte_domain,
            "access_token": token_data["access_token"],
        }

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.domain = credentials["domain"]
        self.access_token = credentials["access_token"]
        return None

    def _get_files_list(
        self,
        path: str,
    ) -> Generator[dict[str, Any], None, None]:
        if not self.access_token or not self.domain:
            raise ConnectorMissingCredentialError("Egnyte")

        headers = {
            "Authorization": f"Bearer {self.access_token}",
        }

        params: dict[str, Any] = {
            "list_content": True,
        }

        url_encoded_path = quote(path or "")
        url = f"{_EGNYTE_API_BASE.format(domain=self.domain)}/fs/{url_encoded_path}"
        response = request_with_retries(
            method="GET", url=url, headers=headers, params=params
        )
        if not response.ok:
            raise RuntimeError(f"Failed to fetch files from Egnyte: {response.text}")

        data = response.json()

        # Yield files from current directory
        for file in data.get("files", []):
            yield file

        # Recursively traverse folders
        for folder in data.get("folders", []):
            yield from self._get_files_list(folder["path"])

    def _should_index_file(
        self,
        file: dict[str, Any],
        start_time: datetime | None = None,
        end_time: datetime | None = None,
    ) -> bool:
        """Return True if file should be included based on filters."""
        if file["is_folder"]:
            return False

        file_modified = _parse_last_modified(file["last_modified"])
        if start_time and file_modified < start_time:
            return False
        if end_time and file_modified > end_time:
            return False

        return True

    def _process_files(
        self,
        start_time: datetime | None = None,
        end_time: datetime | None = None,
    ) -> Generator[list[Document | HierarchyNode], None, None]:
        current_batch: list[Document | HierarchyNode] = []

        # Iterate through yielded files and filter them
        for file in self._get_files_list(self.folder_path):
            if not self._should_index_file(file, start_time, end_time):
                logger.debug(f"Skipping file '{file['path']}'.")
                continue

            try:
                # Set up request with streaming enabled
                headers = {
                    "Authorization": f"Bearer {self.access_token}",
                }
                url_encoded_path = quote(file["path"])
                url = f"{_EGNYTE_API_BASE.format(domain=self.domain)}/fs-content/{url_encoded_path}"
                response = request_with_retries(
                    method="GET",
                    url=url,
                    headers=headers,
                    stream=True,
                )

                if not response.ok:
                    logger.error(
                        f"Failed to fetch file content: {file['path']} (status code: {response.status_code})"
                    )
                    continue

                # Stream the response content into a BytesIO buffer
                buffer = io.BytesIO()
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        buffer.write(chunk)

                # Reset buffer's position to the start
                buffer.seek(0)

                # Process the streamed file content
                doc = _process_egnyte_file(
                    file_metadata=file,
                    file_content=buffer,
                    base_url=_EGNYTE_APP_BASE.format(domain=self.domain),
                    folder_path=self.folder_path,
                )

                if doc is not None:
                    current_batch.append(doc)

                    if len(current_batch) >= self.batch_size:
                        yield current_batch
                        current_batch = []

            except Exception:
                logger.exception(f"Failed to process file {file['path']}")
                continue

        if current_batch:
            yield current_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        yield from self._process_files()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_time = datetime.fromtimestamp(start, tz=timezone.utc)
        end_time = datetime.fromtimestamp(end, tz=timezone.utc)

        yield from self._process_files(start_time=start_time, end_time=end_time)


if __name__ == "__main__":
    connector = EgnyteConnector()
    connector.load_credentials(
        {
            "domain": os.environ["EGNYTE_DOMAIN"],
            "access_token": os.environ["EGNYTE_ACCESS_TOKEN"],
        }
    )
    document_batches = connector.load_from_state()
    print(next(document_batches))


================================================
FILE: backend/onyx/connectors/exceptions.py
================================================
class ValidationError(Exception):
    """General exception for validation errors."""

    def __init__(self, message: str):
        self.message = message
        super().__init__(self.message)


class ConnectorValidationError(ValidationError):
    """General exception for connector validation errors."""

    def __init__(self, message: str):
        self.message = message
        super().__init__(self.message)


class UnexpectedValidationError(ValidationError):
    """Raised when an unexpected error occurs during connector validation.

    Unexpected errors don't necessarily mean the credential is invalid,
    but rather that there was an error during the validation process
    or we encountered a currently unhandled error case.

    Currently, unexpected validation errors are defined as transient and should not be
    used to disable the connector.
    """

    def __init__(self, message: str = "Unexpected error during connector validation"):
        super().__init__(message)


class CredentialInvalidError(ConnectorValidationError):
    """Raised when a connector's credential is invalid."""

    def __init__(self, message: str = "Credential is invalid"):
        super().__init__(message)


class CredentialExpiredError(ConnectorValidationError):
    """Raised when a connector's credential is expired."""

    def __init__(self, message: str = "Credential has expired"):
        super().__init__(message)


class InsufficientPermissionsError(ConnectorValidationError):
    """Raised when the credential does not have sufficient API permissions."""

    def __init__(
        self, message: str = "Insufficient permissions for the requested operation"
    ):
        super().__init__(message)


================================================
FILE: backend/onyx/connectors/factory.py
================================================
import importlib
from typing import Any
from typing import Type

from sqlalchemy.orm import Session

from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
from onyx.configs.constants import DocumentSource
from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.interfaces import BaseConnector
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CredentialsConnector
from onyx.connectors.interfaces import EventConnector
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.models import InputType
from onyx.connectors.registry import CONNECTOR_CLASS_MAP
from onyx.db.connector import fetch_connector_by_id
from onyx.db.credentials import backend_update_credential_json
from onyx.db.credentials import fetch_credential_by_id
from onyx.db.enums import AccessType
from onyx.db.models import Credential
from shared_configs.contextvars import get_current_tenant_id


class ConnectorMissingException(Exception):
    pass


# Cache for already imported connector classes
_connector_cache: dict[DocumentSource, Type[BaseConnector]] = {}


def _load_connector_class(source: DocumentSource) -> Type[BaseConnector]:
    """Dynamically load and cache a connector class."""
    if source in _connector_cache:
        return _connector_cache[source]

    if source not in CONNECTOR_CLASS_MAP:
        raise ConnectorMissingException(f"Connector not found for source={source}")

    mapping = CONNECTOR_CLASS_MAP[source]

    try:
        module = importlib.import_module(mapping.module_path)
        connector_class = getattr(module, mapping.class_name)
        _connector_cache[source] = connector_class
        return connector_class
    except (ImportError, AttributeError) as e:
        raise ConnectorMissingException(
            f"Failed to import {mapping.class_name} from {mapping.module_path}: {e}"
        )


def _validate_connector_supports_input_type(
    connector: Type[BaseConnector],
    input_type: InputType | None,
    source: DocumentSource,
) -> None:
    """Validate that a connector supports the requested input type."""
    if input_type is None:
        return

    # Check each input type requirement separately for clarity
    load_state_unsupported = input_type == InputType.LOAD_STATE and not issubclass(
        connector, LoadConnector
    )

    poll_unsupported = (
        input_type == InputType.POLL
        # Either poll or checkpoint works for this, in the future
        # all connectors should be checkpoint connectors
        and (
            not issubclass(connector, PollConnector)
            and not issubclass(connector, CheckpointedConnector)
        )
    )

    event_unsupported = input_type == InputType.EVENT and not issubclass(
        connector, EventConnector
    )

    if any([load_state_unsupported, poll_unsupported, event_unsupported]):
        raise ConnectorMissingException(
            f"Connector for source={source} does not accept input_type={input_type}"
        )


def identify_connector_class(
    source: DocumentSource,
    input_type: InputType | None = None,
) -> Type[BaseConnector]:
    # Load the connector class using lazy loading
    connector = _load_connector_class(source)

    # Validate connector supports the requested input_type
    _validate_connector_supports_input_type(connector, input_type, source)

    return connector


def instantiate_connector(
    db_session: Session,
    source: DocumentSource,
    input_type: InputType,
    connector_specific_config: dict[str, Any],
    credential: Credential,
) -> BaseConnector:
    connector_class = identify_connector_class(source, input_type)

    connector = connector_class(**connector_specific_config)

    if isinstance(connector, CredentialsConnector):
        provider = OnyxDBCredentialsProvider(
            get_current_tenant_id(), str(source), credential.id
        )
        connector.set_credentials_provider(provider)
    else:
        credential_json = (
            credential.credential_json.get_value(apply_mask=False)
            if credential.credential_json
            else {}
        )
        new_credentials = connector.load_credentials(credential_json)

        if new_credentials is not None:
            backend_update_credential_json(credential, new_credentials, db_session)

    connector.set_allow_images(get_image_extraction_and_analysis_enabled())

    return connector


def validate_ccpair_for_user(
    connector_id: int,
    credential_id: int,
    access_type: AccessType,
    db_session: Session,
    enforce_creation: bool = True,
) -> bool:
    if INTEGRATION_TESTS_MODE:
        return True

    # Validate the connector settings
    connector = fetch_connector_by_id(connector_id, db_session)
    credential = fetch_credential_by_id(
        credential_id,
        db_session,
    )

    if not connector:
        raise ValueError("Connector not found")

    if (
        connector.source == DocumentSource.INGESTION_API
        or connector.source == DocumentSource.MOCK_CONNECTOR
    ):
        return True

    if not credential:
        raise ValueError("Credential not found")

    try:
        runnable_connector = instantiate_connector(
            db_session=db_session,
            source=connector.source,
            input_type=connector.input_type,
            connector_specific_config=connector.connector_specific_config,
            credential=credential,
        )
    except ConnectorValidationError as e:
        raise e
    except Exception as e:
        if enforce_creation:
            raise ConnectorValidationError(str(e))
        else:
            return False

    runnable_connector.validate_connector_settings()
    if access_type == AccessType.SYNC:
        runnable_connector.validate_perm_sync()
    return True


================================================
FILE: backend/onyx/connectors/file/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/file/connector.py
================================================
import json
import os
from datetime import datetime
from datetime import timezone
from pathlib import Path
from typing import Any
from typing import IO

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FileOrigin
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    process_onyx_metadata,
)
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import extract_text_and_images
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.image_utils import store_image_and_create_section
from onyx.file_store.file_store import get_default_file_store
from onyx.utils.logger import setup_logger


logger = setup_logger()


def _create_image_section(
    image_data: bytes,
    parent_file_name: str,
    display_name: str,
    media_type: str | None = None,
    link: str | None = None,
    idx: int = 0,
) -> tuple[ImageSection, str | None]:
    """
    Creates an ImageSection for an image file or embedded image.
    Stores the image in FileStore but does not generate a summary.

    Args:
        image_data: Raw image bytes
        db_session: Database session
        parent_file_name: Name of the parent file (for embedded images)
        display_name: Display name for the image
        idx: Index for embedded images

    Returns:
        Tuple of (ImageSection, stored_file_name or None)
    """
    # Create a unique identifier for the image
    file_id = f"{parent_file_name}_embedded_{idx}" if idx > 0 else parent_file_name

    # Store the image and create a section
    try:
        section, stored_file_name = store_image_and_create_section(
            image_data=image_data,
            file_id=file_id,
            display_name=display_name,
            media_type=(
                media_type if media_type is not None else "application/octet-stream"
            ),
            link=link,
            file_origin=FileOrigin.CONNECTOR,
        )
        return section, stored_file_name
    except Exception as e:
        logger.error(f"Failed to store image {display_name}: {e}")
        raise e


def _process_file(
    file_id: str,
    file_name: str,
    file: IO[Any],
    metadata: dict[str, Any] | None,
    pdf_pass: str | None,
    file_type: str | None,
) -> list[Document]:
    """
    Process a file and return a list of Documents.
    For images, creates ImageSection objects without summarization.
    For documents with embedded images, extracts and stores the images.
    """
    if metadata is None:
        metadata = {}

    # Get file extension and determine file type
    extension = get_file_ext(file_name)

    if extension not in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS:
        logger.warning(
            f"Skipping file '{file_name}' with unrecognized extension '{extension}'"
        )
        return []

    # If a zip is uploaded with a metadata file, we can process it here
    onyx_metadata, custom_tags = process_onyx_metadata(metadata)
    file_display_name = onyx_metadata.file_display_name or os.path.basename(file_name)
    time_updated = onyx_metadata.doc_updated_at or datetime.now(timezone.utc)
    primary_owners = onyx_metadata.primary_owners
    secondary_owners = onyx_metadata.secondary_owners
    link = onyx_metadata.link

    # These metadata items are not settable by the user
    source_type = onyx_metadata.source_type or DocumentSource.FILE

    doc_id = onyx_metadata.document_id or f"FILE_CONNECTOR__{file_id}"
    title = metadata.get("title") or file_display_name

    # 1) If the file itself is an image, handle that scenario quickly
    if extension in OnyxFileExtensions.IMAGE_EXTENSIONS:
        # Read the image data
        image_data = file.read()
        if not image_data:
            logger.warning(f"Empty image file: {file_name}")
            return []

        # Create an ImageSection for the image
        try:
            section, _ = _create_image_section(
                image_data=image_data,
                parent_file_name=file_id,
                display_name=title,
                media_type=file_type,
            )

            return [
                Document(
                    id=doc_id,
                    sections=[section],
                    source=source_type,
                    semantic_identifier=file_display_name,
                    title=title,
                    doc_updated_at=time_updated,
                    primary_owners=primary_owners,
                    secondary_owners=secondary_owners,
                    metadata=custom_tags,
                )
            ]
        except Exception as e:
            logger.error(f"Failed to process image file {file_name}: {e}")
            return []

    # 2) Otherwise: text-based approach. Possibly with embedded images.
    file.seek(0)

    # Extract text and images from the file
    extraction_result = extract_text_and_images(
        file=file,
        file_name=file_name,
        pdf_pass=pdf_pass,
        content_type=file_type,
    )

    # Each file may have file-specific ONYX_METADATA https://docs.onyx.app/admins/connectors/official/file
    # If so, we should add it to any metadata processed so far
    if extraction_result.metadata:
        logger.debug(
            f"Found file-specific metadata for {file_name}: {extraction_result.metadata}"
        )
        onyx_metadata, more_custom_tags = process_onyx_metadata(
            extraction_result.metadata
        )

        # Add file-specific tags
        custom_tags.update(more_custom_tags)

        # File-specific metadata overrides metadata processed so far
        source_type = onyx_metadata.source_type or source_type
        primary_owners = onyx_metadata.primary_owners or primary_owners
        secondary_owners = onyx_metadata.secondary_owners or secondary_owners
        time_updated = onyx_metadata.doc_updated_at or time_updated
        file_display_name = onyx_metadata.file_display_name or file_display_name
        title = onyx_metadata.title or onyx_metadata.file_display_name or title
        link = onyx_metadata.link or link

    # Build sections: first the text as a single Section
    sections: list[TextSection | ImageSection] = []
    if extraction_result.text_content.strip():
        logger.debug(f"Creating TextSection for {file_name} with link: {link}")
        sections.append(
            TextSection(link=link, text=extraction_result.text_content.strip())
        )

    # Then any extracted images from docx, PDFs, etc.
    for idx, (img_data, img_name) in enumerate(
        extraction_result.embedded_images, start=1
    ):
        # Store each embedded image as a separate file in FileStore
        # and create a section with the image reference
        try:
            image_section, stored_file_name = _create_image_section(
                image_data=img_data,
                parent_file_name=file_id,
                display_name=f"{title} - image {idx}",
                media_type="application/octet-stream",  # Default media type for embedded images
                idx=idx,
            )
            sections.append(image_section)
            logger.debug(
                f"Created ImageSection for embedded image {idx} in {file_name}, stored as: {stored_file_name}"
            )
        except Exception as e:
            logger.warning(
                f"Failed to process embedded image {idx} in {file_name}: {e}"
            )

    return [
        Document(
            id=doc_id,
            sections=sections,
            source=source_type,
            semantic_identifier=file_display_name,
            title=title,
            doc_updated_at=time_updated,
            primary_owners=primary_owners,
            secondary_owners=secondary_owners,
            metadata=custom_tags,
        )
    ]


class LocalFileConnector(LoadConnector):
    """
    Connector that reads files from Postgres and yields Documents, including
    embedded image extraction without summarization.

    file_locations are S3/Filestore UUIDs
    file_names are the names of the files
    """

    # Note: file_names is a required parameter, but should not break backwards compatibility.
    # If add_file_names migration is not run, old file connector configs will not have file_names.
    # file_names is only used for display purposes in the UI and file_locations is used as a fallback.
    def __init__(
        self,
        file_locations: list[Path | str],
        file_names: list[str] | None = None,  # noqa: ARG002
        zip_metadata_file_id: str | None = None,
        zip_metadata: dict[str, Any] | None = None,  # Deprecated, for backwards compat
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.file_locations = [str(loc) for loc in file_locations]
        self.batch_size = batch_size
        self.pdf_pass: str | None = None
        self._zip_metadata_file_id = zip_metadata_file_id
        self._zip_metadata_deprecated = zip_metadata

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.pdf_pass = credentials.get("pdf_password")

        return None

    def load_from_state(self) -> GenerateDocumentsOutput:
        """
        Iterates over each file path, fetches from Postgres, tries to parse text
        or images, and yields Document batches.
        """
        # Load metadata dict at start (from file store or deprecated inline format)
        zip_metadata: dict[str, Any] = {}
        if self._zip_metadata_file_id:
            try:
                file_store = get_default_file_store()
                metadata_io = file_store.read_file(
                    file_id=self._zip_metadata_file_id, mode="b"
                )
                metadata_bytes = metadata_io.read()
                loaded_metadata = json.loads(metadata_bytes)
                if isinstance(loaded_metadata, list):
                    zip_metadata = {d["filename"]: d for d in loaded_metadata}
                else:
                    zip_metadata = loaded_metadata
            except Exception as e:
                logger.warning(f"Failed to load metadata from file store: {e}")
        elif self._zip_metadata_deprecated:
            logger.warning(
                "Using deprecated inline zip_metadata dict. Re-upload files to use the new file store format."
            )
            zip_metadata = self._zip_metadata_deprecated

        documents: list[Document | HierarchyNode] = []

        for file_id in self.file_locations:
            file_store = get_default_file_store()
            file_record = file_store.read_file_record(file_id=file_id)
            if not file_record:
                # typically an unsupported extension
                logger.warning(f"No file record found for '{file_id}' in PG; skipping.")
                continue

            metadata = zip_metadata.get(
                file_record.display_name, {}
            ) or zip_metadata.get(os.path.basename(file_record.display_name), {})
            file_io = file_store.read_file(file_id=file_id, mode="b")
            new_docs = _process_file(
                file_id=file_id,
                file_name=file_record.display_name,
                file=file_io,
                metadata=metadata,
                pdf_pass=self.pdf_pass,
                file_type=file_record.file_type,
            )
            documents.extend(new_docs)

            if len(documents) >= self.batch_size:
                yield documents

                documents = []

        if documents:
            yield documents


if __name__ == "__main__":
    connector = LocalFileConnector(
        file_locations=[os.environ["TEST_FILE"]],
        file_names=[os.environ["TEST_FILE"]],
    )
    connector.load_credentials({"pdf_password": os.environ.get("PDF_PASSWORD")})
    doc_batches = connector.load_from_state()
    for batch in doc_batches:
        print("BATCH:", batch)


================================================
FILE: backend/onyx/connectors/fireflies/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/fireflies/connector.py
================================================
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import cast
from typing import List

import requests

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger

logger = setup_logger()

_FIREFLIES_ID_PREFIX = "FIREFLIES_"

_FIREFLIES_API_URL = "https://api.fireflies.ai/graphql"

_FIREFLIES_TRANSCRIPT_QUERY_SIZE = 50  # Max page size is 50

_FIREFLIES_API_QUERY = """
    query Transcripts($fromDate: DateTime, $toDate: DateTime, $limit: Int!, $skip: Int!) {
        transcripts(fromDate: $fromDate, toDate: $toDate, limit: $limit, skip: $skip) {
            id
            title
            organizer_email
            participants
            date
            duration
            transcript_url
            sentences {
                text
                speaker_name
                start_time
            }
        }
    }
"""

ONE_MINUTE = 60


def _create_doc_from_transcript(transcript: dict) -> Document | None:
    sections: List[TextSection] = []
    current_speaker_name = None
    current_link = ""
    current_text = ""

    if transcript["sentences"] is None:
        return None

    for sentence in transcript["sentences"]:
        if sentence["speaker_name"] != current_speaker_name:
            if current_speaker_name is not None:
                sections.append(
                    TextSection(
                        link=current_link,
                        text=current_text.strip(),
                    )
                )
            current_speaker_name = sentence.get("speaker_name") or "Unknown Speaker"
            current_link = f"{transcript['transcript_url']}?t={sentence['start_time']}"
            current_text = f"{current_speaker_name}: "

        cleaned_text = sentence["text"].replace("\xa0", " ")
        current_text += f"{cleaned_text} "

    # Sometimes these links (links with a timestamp) do not work, it is a bug with Fireflies.
    sections.append(
        TextSection(
            link=current_link,
            text=current_text.strip(),
        )
    )

    fireflies_id = _FIREFLIES_ID_PREFIX + transcript["id"]

    meeting_title = transcript["title"] or "No Title"

    meeting_date_unix = transcript["date"]
    meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)

    # Build hierarchy based on meeting date (year-month)
    year_month = meeting_date.strftime("%Y-%m")

    meeting_organizer_email = transcript["organizer_email"]
    organizer_email_user_info = [BasicExpertInfo(email=meeting_organizer_email)]

    meeting_participants_email_list = []
    for participant in transcript.get("participants", []):
        if participant != meeting_organizer_email and participant:
            meeting_participants_email_list.append(BasicExpertInfo(email=participant))

    return Document(
        id=fireflies_id,
        sections=cast(list[TextSection | ImageSection], sections),
        source=DocumentSource.FIREFLIES,
        semantic_identifier=meeting_title,
        doc_metadata={
            "hierarchy": {
                "source_path": [year_month],
                "year_month": year_month,
                "meeting_title": meeting_title,
                "organizer_email": meeting_organizer_email,
            }
        },
        metadata={
            k: str(v)
            for k, v in {
                "meeting_date": meeting_date,
                "duration_min": transcript.get("duration"),
            }.items()
            if v is not None
        },
        doc_updated_at=meeting_date,
        primary_owners=organizer_email_user_info,
        secondary_owners=meeting_participants_email_list,
    )


# If not all transcripts are being indexed, try using a more-recently-generated
# API key.
class FirefliesConnector(PollConnector, LoadConnector):
    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
        self.batch_size = batch_size

    def load_credentials(self, credentials: dict[str, str]) -> None:
        api_key = credentials.get("fireflies_api_key")

        if not isinstance(api_key, str):
            raise ConnectorMissingCredentialError(
                "The Fireflies API key must be a string"
            )

        self.api_key = api_key

        return None

    def _fetch_transcripts(
        self, start_datetime: str | None = None, end_datetime: str | None = None
    ) -> Iterator[List[dict]]:
        if self.api_key is None:
            raise ConnectorMissingCredentialError("Missing API key")

        headers = {
            "Content-Type": "application/json",
            "Authorization": "Bearer " + self.api_key,
        }

        skip = 0
        variables: dict[str, int | str] = {
            "limit": _FIREFLIES_TRANSCRIPT_QUERY_SIZE,
        }

        if start_datetime:
            variables["fromDate"] = start_datetime
        if end_datetime:
            variables["toDate"] = end_datetime

        while True:
            variables["skip"] = skip
            response = requests.post(
                _FIREFLIES_API_URL,
                headers=headers,
                json={"query": _FIREFLIES_API_QUERY, "variables": variables},
            )

            response.raise_for_status()

            if response.status_code == 204:
                break

            received_transcripts = response.json()
            parsed_transcripts = received_transcripts.get("data", {}).get(
                "transcripts", []
            )

            yield parsed_transcripts

            if len(parsed_transcripts) < _FIREFLIES_TRANSCRIPT_QUERY_SIZE:
                break

            skip += _FIREFLIES_TRANSCRIPT_QUERY_SIZE

    def _process_transcripts(
        self, start: str | None = None, end: str | None = None
    ) -> GenerateDocumentsOutput:
        doc_batch: List[Document | HierarchyNode] = []

        for transcript_batch in self._fetch_transcripts(start, end):
            for transcript in transcript_batch:
                if doc := _create_doc_from_transcript(transcript):
                    doc_batch.append(doc)

                if len(doc_batch) >= self.batch_size:
                    yield doc_batch
                    doc_batch = []

        if doc_batch:
            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._process_transcripts()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        # add some leeway to account for any timezone funkiness and/or bad handling
        # of start time on the Fireflies side
        start = max(0, start - ONE_MINUTE)
        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
            "%Y-%m-%dT%H:%M:%S.000Z"
        )
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc).strftime(
            "%Y-%m-%dT%H:%M:%S.000Z"
        )

        yield from self._process_transcripts(start_datetime, end_datetime)


================================================
FILE: backend/onyx/connectors/freshdesk/__init__,py
================================================


================================================
FILE: backend/onyx/connectors/freshdesk/connector.py
================================================
import json
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import List

import requests
from retry import retry

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rl_requests,
)
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.utils.logger import setup_logger

logger = setup_logger()

_FRESHDESK_ID_PREFIX = "FRESHDESK_"


_TICKET_FIELDS_TO_INCLUDE = {
    "fr_escalated",
    "spam",
    "priority",
    "source",
    "status",
    "type",
    "is_escalated",
    "tags",
    "nr_due_by",
    "nr_escalated",
    "cc_emails",
    "fwd_emails",
    "reply_cc_emails",
    "ticket_cc_emails",
    "support_email",
    "to_emails",
}

_SOURCE_NUMBER_TYPE_MAP: dict[int, str] = {
    1: "Email",
    2: "Portal",
    3: "Phone",
    7: "Chat",
    9: "Feedback Widget",
    10: "Outbound Email",
}

_PRIORITY_NUMBER_TYPE_MAP: dict[int, str] = {
    1: "low",
    2: "medium",
    3: "high",
    4: "urgent",
}

_STATUS_NUMBER_TYPE_MAP: dict[int, str] = {
    2: "open",
    3: "pending",
    4: "resolved",
    5: "closed",
}


# TODO: unify this with other generic rate limited requests with retries (e.g. Axero, Notion?)
@retry(tries=3, delay=1, backoff=2)
def _rate_limited_freshdesk_get(
    url: str, auth: tuple, params: dict
) -> requests.Response:
    return rl_requests.get(url, auth=auth, params=params)


def _create_metadata_from_ticket(ticket: dict) -> dict:
    metadata: dict[str, str | list[str]] = {}
    # Combine all emails into a list so there are no repeated emails
    email_data: set[str] = set()

    for key, value in ticket.items():
        # Skip fields that aren't useful for embedding
        if key not in _TICKET_FIELDS_TO_INCLUDE:
            continue

        # Skip empty fields
        if not value or value == "[]":
            continue

        # Convert strings or lists to strings
        stringified_value: str | list[str]
        if isinstance(value, list):
            stringified_value = [str(item) for item in value]
        else:
            stringified_value = str(value)

        if "email" in key:
            if isinstance(stringified_value, list):
                email_data.update(stringified_value)
            else:
                email_data.add(stringified_value)
        else:
            metadata[key] = stringified_value

    if email_data:
        metadata["emails"] = list(email_data)

    # Convert source numbers to human-parsable string
    if source_number := ticket.get("source"):
        metadata["source"] = _SOURCE_NUMBER_TYPE_MAP.get(
            source_number, "Unknown Source Type"
        )

    # Convert priority numbers to human-parsable string
    if priority_number := ticket.get("priority"):
        metadata["priority"] = _PRIORITY_NUMBER_TYPE_MAP.get(
            priority_number, "Unknown Priority"
        )

    # Convert status to human-parsable string
    if status_number := ticket.get("status"):
        metadata["status"] = _STATUS_NUMBER_TYPE_MAP.get(
            status_number, "Unknown Status"
        )

    due_by = datetime.fromisoformat(ticket["due_by"].replace("Z", "+00:00"))
    metadata["overdue"] = str(datetime.now(timezone.utc) > due_by)

    return metadata


def _create_doc_from_ticket(ticket: dict, domain: str) -> Document:
    # Use the ticket description as the text
    text = f"Ticket description: {parse_html_page_basic(ticket.get('description_text', ''))}"
    metadata = _create_metadata_from_ticket(ticket)

    # This is also used in the ID because it is more unique than the just the ticket ID
    link = f"https://{domain}.freshdesk.com/helpdesk/tickets/{ticket['id']}"

    return Document(
        id=_FRESHDESK_ID_PREFIX + link,
        sections=[
            TextSection(
                link=link,
                text=text,
            )
        ],
        source=DocumentSource.FRESHDESK,
        semantic_identifier=ticket["subject"],
        metadata=metadata,
        doc_updated_at=datetime.fromisoformat(
            ticket["updated_at"].replace("Z", "+00:00")
        ),
    )


class FreshdeskConnector(PollConnector, LoadConnector):
    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
        self.batch_size = batch_size

    def load_credentials(self, credentials: dict[str, str | int]) -> None:
        api_key = credentials.get("freshdesk_api_key")
        domain = credentials.get("freshdesk_domain")
        if not all(isinstance(cred, str) for cred in [domain, api_key]):
            raise ConnectorMissingCredentialError(
                "All Freshdesk credentials must be strings"
            )

        # TODO: Move the domain to the connector-specific configuration instead of part of the credential
        # Then apply normalization and validation against the config
        # Clean and normalize the domain URL
        domain = str(domain).strip().lower()

        # Remove any trailing slashes
        domain = domain.rstrip("/")

        # Remove protocol if present
        if domain.startswith(("http://", "https://")):
            domain = domain.replace("http://", "").replace("https://", "")

        # Remove .freshdesk.com suffix and any API paths if present
        if ".freshdesk.com" in domain:
            domain = domain.split(".freshdesk.com")[0]

        if not domain:
            raise ConnectorMissingCredentialError("Freshdesk domain cannot be empty")

        self.api_key = str(api_key)
        self.domain = domain

    def _fetch_tickets(
        self,
        start: datetime | None = None,
        end: datetime | None = None,  # noqa: ARG002
    ) -> Iterator[List[dict]]:
        """
        'end' is not currently used, so we may double fetch tickets created after the indexing
        starts but before the actual call is made.

        To use 'end' would require us to use the search endpoint but it has limitations,
        namely having to fetch all IDs and then individually fetch each ticket because there is no
        'include' field available for this endpoint:
        https://developers.freshdesk.com/api/#filter_tickets
        """
        if self.api_key is None or self.domain is None:
            raise ConnectorMissingCredentialError("freshdesk")

        base_url = f"https://{self.domain}.freshdesk.com/api/v2/tickets"
        params: dict[str, int | str] = {
            "include": "description",
            "per_page": 50,
            "page": 1,
        }

        if start:
            params["updated_since"] = start.isoformat()

        while True:
            # Freshdesk API uses API key as the username and any value as the password.
            response = _rate_limited_freshdesk_get(
                base_url,
                auth=(self.api_key, "CanYouBelieveFreshdeskDoesThis"),
                params=params,
            )
            response.raise_for_status()

            if response.status_code == 204:
                break

            tickets = json.loads(response.content)
            logger.info(
                f"Fetched {len(tickets)} tickets from Freshdesk API (Page {params['page']})"
            )

            yield tickets

            if len(tickets) < int(params["per_page"]):
                break

            params["page"] = int(params["page"]) + 1

    def _process_tickets(
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        doc_batch: List[Document | HierarchyNode] = []

        for ticket_batch in self._fetch_tickets(start, end):
            for ticket in ticket_batch:
                doc_batch.append(_create_doc_from_ticket(ticket, self.domain))

                if len(doc_batch) >= self.batch_size:
                    yield doc_batch
                    doc_batch = []

        if doc_batch:
            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._process_tickets()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)

        yield from self._process_tickets(start_datetime, end_datetime)


================================================
FILE: backend/onyx/connectors/gitbook/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/gitbook/connector.py
================================================
from datetime import datetime
from datetime import timezone
from typing import Any
from urllib.parse import urljoin

import requests

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger


logger = setup_logger()

GITBOOK_API_BASE = "https://api.gitbook.com/v1/"


class GitbookApiClient:
    def __init__(self, access_token: str) -> None:
        self.access_token = access_token

    def get(self, endpoint: str, params: dict[str, Any] | None = None) -> Any:
        headers = {
            "Authorization": f"Bearer {self.access_token}",
            "Content-Type": "application/json",
        }

        url = urljoin(GITBOOK_API_BASE, endpoint.lstrip("/"))
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        return response.json()

    def get_page_content(self, space_id: str, page_id: str) -> dict[str, Any]:
        return self.get(f"/spaces/{space_id}/content/page/{page_id}")


def _extract_text_from_document(document: dict[str, Any]) -> str:
    """Extract text content from GitBook document structure by parsing the document nodes
    into markdown format."""

    def parse_leaf(leaf: dict[str, Any]) -> str:
        text = leaf.get("text", "")
        leaf.get("marks", [])
        return text

    def parse_text_node(node: dict[str, Any]) -> str:
        text = ""
        for leaf in node.get("leaves", []):
            text += parse_leaf(leaf)
        return text

    def parse_block_node(node: dict[str, Any]) -> str:
        block_type = node.get("type", "")
        result = ""

        if block_type == "heading-1":
            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
            result = f"# {text}\n\n"

        elif block_type == "heading-2":
            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
            result = f"## {text}\n\n"

        elif block_type == "heading-3":
            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
            result = f"### {text}\n\n"

        elif block_type == "heading-4":
            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
            result = f"#### {text}\n\n"

        elif block_type == "heading-5":
            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
            result = f"##### {text}\n\n"

        elif block_type == "heading-6":
            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
            result = f"###### {text}\n\n"

        elif block_type == "list-unordered":
            for list_item in node.get("nodes", []):
                paragraph = list_item.get("nodes", [])[0]
                text = "".join(parse_text_node(n) for n in paragraph.get("nodes", []))
                result += f"* {text}\n"
            result += "\n"

        elif block_type == "paragraph":
            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
            result = f"{text}\n\n"

        elif block_type == "list-tasks":
            for task_item in node.get("nodes", []):
                checked = task_item.get("data", {}).get("checked", False)
                paragraph = task_item.get("nodes", [])[0]
                text = "".join(parse_text_node(n) for n in paragraph.get("nodes", []))
                checkbox = "[x]" if checked else "[ ]"
                result += f"- {checkbox} {text}\n"
            result += "\n"

        elif block_type == "code":
            for code_line in node.get("nodes", []):
                if code_line.get("type") == "code-line":
                    text = "".join(
                        parse_text_node(n) for n in code_line.get("nodes", [])
                    )
                    result += f"{text}\n"
            result += "\n"

        elif block_type == "blockquote":
            for quote_node in node.get("nodes", []):
                if quote_node.get("type") == "paragraph":
                    text = "".join(
                        parse_text_node(n) for n in quote_node.get("nodes", [])
                    )
                    result += f"> {text}\n"
            result += "\n"

        elif block_type == "table":
            records = node.get("data", {}).get("records", {})
            definition = node.get("data", {}).get("definition", {})
            view = node.get("data", {}).get("view", {})

            columns = view.get("columns", [])

            header_cells = []
            for col_id in columns:
                col_def = definition.get(col_id, {})
                header_cells.append(col_def.get("title", ""))

            result = "| " + " | ".join(header_cells) + " |\n"
            result += "|" + "---|" * len(header_cells) + "\n"

            sorted_records = sorted(
                records.items(), key=lambda x: x[1].get("orderIndex", "")
            )

            for record_id, record_data in sorted_records:
                values = record_data.get("values", {})
                row_cells = []
                for col_id in columns:
                    fragment_id = values.get(col_id, "")
                    fragment_text = ""
                    for fragment in node.get("fragments", []):
                        if fragment.get("fragment") == fragment_id:
                            for frag_node in fragment.get("nodes", []):
                                if frag_node.get("type") == "paragraph":
                                    fragment_text = "".join(
                                        parse_text_node(n)
                                        for n in frag_node.get("nodes", [])
                                    )
                                    break
                    row_cells.append(fragment_text)
                result += "| " + " | ".join(row_cells) + " |\n"

            result += "\n"
        return result

    if not document or "document" not in document:
        return ""

    markdown = ""
    nodes = document["document"].get("nodes", [])

    for node in nodes:
        markdown += parse_block_node(node)

    return markdown


def _convert_page_to_document(
    client: GitbookApiClient, space_id: str, page: dict[str, Any]
) -> Document:
    page_id = page["id"]
    page_content = client.get_page_content(space_id, page_id)

    return Document(
        id=f"gitbook-{space_id}-{page_id}",
        sections=[
            TextSection(
                link=page.get("urls", {}).get("app", ""),
                text=_extract_text_from_document(page_content),
            )
        ],
        source=DocumentSource.GITBOOK,
        semantic_identifier=page.get("title", ""),
        doc_updated_at=datetime.fromisoformat(page["updatedAt"]).replace(
            tzinfo=timezone.utc
        ),
        metadata={
            "path": page.get("path", ""),
            "type": page.get("type", ""),
            "kind": page.get("kind", ""),
        },
    )


class GitbookConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        space_id: str,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.space_id = space_id
        self.batch_size = batch_size
        self.access_token: str | None = None
        self.client: GitbookApiClient | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> None:
        access_token = credentials.get("gitbook_api_key")
        if not access_token:
            raise ConnectorMissingCredentialError("GitBook access token")
        self.access_token = access_token
        self.client = GitbookApiClient(access_token)

    def _fetch_all_pages(
        self,
        start: datetime | None = None,
        end: datetime | None = None,
    ) -> GenerateDocumentsOutput:
        if not self.client:
            raise ConnectorMissingCredentialError("GitBook")

        try:
            content = self.client.get(f"/spaces/{self.space_id}/content/pages")
            pages: list[dict[str, Any]] = content.get("pages", [])
            current_batch: list[Document | HierarchyNode] = []

            logger.info(f"Found {len(pages)} root pages.")
            logger.info(
                f"First 20 Page Ids: {[page.get('id', 'Unknown') for page in pages[:20]]}"
            )

            while pages:
                page = pages.pop(0)

                updated_at_raw = page.get("updatedAt")
                if updated_at_raw is None:
                    # if updatedAt is not present, that means the page has never been edited
                    continue

                updated_at = datetime.fromisoformat(updated_at_raw)
                if start and updated_at < start:
                    continue
                if end and updated_at > end:
                    continue

                current_batch.append(
                    _convert_page_to_document(self.client, self.space_id, page)
                )

                if len(current_batch) >= self.batch_size:
                    yield current_batch
                    current_batch = []

                pages.extend(page.get("pages", []))

            if current_batch:
                yield current_batch

        except requests.RequestException as e:
            logger.error(f"Error fetching GitBook content: {str(e)}")
            raise

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._fetch_all_pages()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
        return self._fetch_all_pages(start_datetime, end_datetime)


if __name__ == "__main__":
    import os

    connector = GitbookConnector(
        space_id=os.environ["GITBOOK_SPACE_ID"],
    )
    connector.load_credentials({"gitbook_api_key": os.environ["GITBOOK_API_KEY"]})
    document_batches = connector.load_from_state()
    print(next(document_batches))


================================================
FILE: backend/onyx/connectors/github/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/github/connector.py
================================================
import copy
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from enum import Enum
from typing import Any
from typing import cast

from github import Github
from github import RateLimitExceededException
from github import Repository
from github.GithubException import GithubException
from github.Issue import Issue
from github.NamedUser import NamedUser
from github.PaginatedList import PaginatedList
from github.PullRequest import PullRequest
from pydantic import BaseModel
from typing_extensions import override

from onyx.access.models import ExternalAccess
from onyx.configs.app_configs import GITHUB_CONNECTOR_BASE_URL
from onyx.configs.constants import DocumentSource
from onyx.connectors.connector_runner import ConnectorRunner
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.github.models import SerializedRepository
from onyx.connectors.github.rate_limit_utils import sleep_after_rate_limit_exception
from onyx.connectors.github.utils import deserialize_repository
from onyx.connectors.github.utils import get_external_access_permission
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import ConnectorCheckpoint
from onyx.connectors.interfaces import ConnectorFailure
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger

logger = setup_logger()

ITEMS_PER_PAGE = 100
CURSOR_LOG_FREQUENCY = 50

_MAX_NUM_RATE_LIMIT_RETRIES = 5

ONE_DAY = timedelta(days=1)
SLIM_BATCH_SIZE = 100
# Cases
# X (from start) standard run, no fallback to cursor-based pagination
# X (from start) standard run errors, fallback to cursor-based pagination
#  X error in the middle of a page
#  X no errors: run to completion
# X (from checkpoint) standard run, no fallback to cursor-based pagination
# X (from checkpoint) continue from cursor-based pagination
#  - retrying
#  - no retrying

# things to check:
# checkpoint state on return
# checkpoint progress (no infinite loop)


class DocMetadata(BaseModel):
    repo: str


def get_nextUrl_key(pag_list: PaginatedList[PullRequest | Issue]) -> str:
    if "_PaginatedList__nextUrl" in pag_list.__dict__:
        return "_PaginatedList__nextUrl"
    for key in pag_list.__dict__:
        if "__nextUrl" in key:
            return key
    for key in pag_list.__dict__:
        if "nextUrl" in key:
            return key
    return ""


def get_nextUrl(
    pag_list: PaginatedList[PullRequest | Issue], nextUrl_key: str
) -> str | None:
    return getattr(pag_list, nextUrl_key) if nextUrl_key else None


def set_nextUrl(
    pag_list: PaginatedList[PullRequest | Issue], nextUrl_key: str, nextUrl: str
) -> None:
    if nextUrl_key:
        setattr(pag_list, nextUrl_key, nextUrl)
    elif nextUrl:
        raise ValueError("Next URL key not found: " + str(pag_list.__dict__))


def _paginate_until_error(
    git_objs: Callable[[], PaginatedList[PullRequest | Issue]],
    cursor_url: str | None,
    prev_num_objs: int,
    cursor_url_callback: Callable[[str | None, int], None],
    retrying: bool = False,
) -> Generator[PullRequest | Issue, None, None]:
    num_objs = prev_num_objs
    pag_list = git_objs()
    nextUrl_key = get_nextUrl_key(pag_list)
    if cursor_url:
        set_nextUrl(pag_list, nextUrl_key, cursor_url)
    elif retrying:
        # if we are retrying, we want to skip the objects retrieved
        # over previous calls. Unfortunately, this WILL retrieve all
        # pages before the one we are resuming from, so we really
        # don't want this case to be hit often
        logger.warning(
            "Retrying from a previous cursor-based pagination call. "
            "This will retrieve all pages before the one we are resuming from, "
            "which may take a while and consume many API calls."
        )
        pag_list = cast(PaginatedList[PullRequest | Issue], pag_list[prev_num_objs:])
        num_objs = 0

    try:
        # this for loop handles cursor-based pagination
        for issue_or_pr in pag_list:
            num_objs += 1
            yield issue_or_pr
            # used to store the current cursor url in the checkpoint. This value
            # is updated during iteration over pag_list.
            cursor_url_callback(get_nextUrl(pag_list, nextUrl_key), num_objs)

            if num_objs % CURSOR_LOG_FREQUENCY == 0:
                logger.info(
                    f"Retrieved {num_objs} objects with current cursor url: {get_nextUrl(pag_list, nextUrl_key)}"
                )

    except Exception as e:
        logger.exception(f"Error during cursor-based pagination: {e}")
        if num_objs - prev_num_objs > 0:
            raise

        if get_nextUrl(pag_list, nextUrl_key) is not None and not retrying:
            logger.info(
                "Assuming that this error is due to cursor "
                "expiration because no objects were retrieved. "
                "Retrying from the first page."
            )
            yield from _paginate_until_error(
                git_objs, None, prev_num_objs, cursor_url_callback, retrying=True
            )
            return

        # for no cursor url or if we reach this point after a retry, raise the error
        raise


def _get_batch_rate_limited(
    # We pass in a callable because we want git_objs to produce a fresh
    # PaginatedList each time it's called to avoid using the same object for cursor-based pagination
    # from a partial offset-based pagination call.
    git_objs: Callable[[], PaginatedList],
    page_num: int,
    cursor_url: str | None,
    prev_num_objs: int,
    cursor_url_callback: Callable[[str | None, int], None],
    github_client: Github,
    attempt_num: int = 0,
) -> Generator[PullRequest | Issue, None, None]:
    if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
        raise RuntimeError(
            "Re-tried fetching batch too many times. Something is going wrong with fetching objects from Github"
        )
    try:
        if cursor_url:
            # when this is set, we are resuming from an earlier
            # cursor-based pagination call.
            yield from _paginate_until_error(
                git_objs, cursor_url, prev_num_objs, cursor_url_callback
            )
            return
        objs = list(git_objs().get_page(page_num))
        # fetch all data here to disable lazy loading later
        # this is needed to capture the rate limit exception here (if one occurs)
        for obj in objs:
            if hasattr(obj, "raw_data"):
                getattr(obj, "raw_data")
        yield from objs
    except RateLimitExceededException:
        sleep_after_rate_limit_exception(github_client)
        yield from _get_batch_rate_limited(
            git_objs,
            page_num,
            cursor_url,
            prev_num_objs,
            cursor_url_callback,
            github_client,
            attempt_num + 1,
        )
    except GithubException as e:
        if not (
            e.status == 422
            and (
                "cursor" in (e.message or "")
                or "cursor" in (e.data or {}).get("message", "")
            )
        ):
            raise
        # Fallback to a cursor-based pagination strategy
        # This can happen for "large datasets," but there's no documentation
        # On the error on the web as far as we can tell.
        # Error message:
        # "Pagination with the page parameter is not supported for large datasets,
        # please use cursor based pagination (after/before)"
        yield from _paginate_until_error(
            git_objs, cursor_url, prev_num_objs, cursor_url_callback
        )


def _get_userinfo(user: NamedUser) -> dict[str, str]:
    def _safe_get(attr_name: str) -> str | None:
        try:
            return cast(str | None, getattr(user, attr_name))
        except GithubException:
            logger.debug(f"Error getting {attr_name} for user")
            return None

    return {
        k: v
        for k, v in {
            "login": _safe_get("login"),
            "name": _safe_get("name"),
            "email": _safe_get("email"),
        }.items()
        if v is not None
    }


def _convert_pr_to_document(
    pull_request: PullRequest, repo_external_access: ExternalAccess | None
) -> Document:
    repo_full_name = pull_request.base.repo.full_name if pull_request.base else ""
    # Split full_name (e.g., "owner/repo") into owner and repo
    parts = repo_full_name.split("/", 1)
    owner_name = parts[0] if parts else ""
    repo_name = parts[1] if len(parts) > 1 else repo_full_name

    doc_metadata = {
        "repo": repo_full_name,
        "hierarchy": {
            "source_path": [owner_name, repo_name, "pull_requests"],
            "owner": owner_name,
            "repo": repo_name,
            "object_type": "pull_request",
        },
    }
    return Document(
        id=pull_request.html_url,
        sections=[
            TextSection(link=pull_request.html_url, text=pull_request.body or "")
        ],
        external_access=repo_external_access,
        source=DocumentSource.GITHUB,
        semantic_identifier=f"{pull_request.number}: {pull_request.title}",
        # updated_at is UTC time but is timezone unaware, explicitly add UTC
        # as there is logic in indexing to prevent wrong timestamped docs
        # due to local time discrepancies with UTC
        doc_updated_at=(
            pull_request.updated_at.replace(tzinfo=timezone.utc)
            if pull_request.updated_at
            else None
        ),
        # this metadata is used in perm sync
        doc_metadata=doc_metadata,
        metadata={
            k: [str(vi) for vi in v] if isinstance(v, list) else str(v)
            for k, v in {
                "object_type": "PullRequest",
                "id": pull_request.number,
                "merged": pull_request.merged,
                "state": pull_request.state,
                "user": _get_userinfo(pull_request.user) if pull_request.user else None,
                "assignees": [
                    _get_userinfo(assignee) for assignee in pull_request.assignees
                ],
                "repo": (
                    pull_request.base.repo.full_name if pull_request.base else None
                ),
                "num_commits": str(pull_request.commits),
                "num_files_changed": str(pull_request.changed_files),
                "labels": [label.name for label in pull_request.labels],
                "created_at": (
                    pull_request.created_at.replace(tzinfo=timezone.utc)
                    if pull_request.created_at
                    else None
                ),
                "updated_at": (
                    pull_request.updated_at.replace(tzinfo=timezone.utc)
                    if pull_request.updated_at
                    else None
                ),
                "closed_at": (
                    pull_request.closed_at.replace(tzinfo=timezone.utc)
                    if pull_request.closed_at
                    else None
                ),
                "merged_at": (
                    pull_request.merged_at.replace(tzinfo=timezone.utc)
                    if pull_request.merged_at
                    else None
                ),
                "merged_by": (
                    _get_userinfo(pull_request.merged_by)
                    if pull_request.merged_by
                    else None
                ),
            }.items()
            if v is not None
        },
    )


def _fetch_issue_comments(issue: Issue) -> str:
    comments = issue.get_comments()
    return "\nComment: ".join(comment.body for comment in comments)


def _convert_issue_to_document(
    issue: Issue, repo_external_access: ExternalAccess | None
) -> Document:
    repo_full_name = issue.repository.full_name if issue.repository else ""
    # Split full_name (e.g., "owner/repo") into owner and repo
    parts = repo_full_name.split("/", 1)
    owner_name = parts[0] if parts else ""
    repo_name = parts[1] if len(parts) > 1 else repo_full_name

    doc_metadata = {
        "repo": repo_full_name,
        "hierarchy": {
            "source_path": [owner_name, repo_name, "issues"],
            "owner": owner_name,
            "repo": repo_name,
            "object_type": "issue",
        },
    }
    return Document(
        id=issue.html_url,
        sections=[TextSection(link=issue.html_url, text=issue.body or "")],
        source=DocumentSource.GITHUB,
        external_access=repo_external_access,
        semantic_identifier=f"{issue.number}: {issue.title}",
        # updated_at is UTC time but is timezone unaware
        doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),
        # this metadata is used in perm sync
        doc_metadata=doc_metadata,
        metadata={
            k: [str(vi) for vi in v] if isinstance(v, list) else str(v)
            for k, v in {
                "object_type": "Issue",
                "id": issue.number,
                "state": issue.state,
                "user": _get_userinfo(issue.user) if issue.user else None,
                "assignees": [_get_userinfo(assignee) for assignee in issue.assignees],
                "repo": issue.repository.full_name if issue.repository else None,
                "labels": [label.name for label in issue.labels],
                "created_at": (
                    issue.created_at.replace(tzinfo=timezone.utc)
                    if issue.created_at
                    else None
                ),
                "updated_at": (
                    issue.updated_at.replace(tzinfo=timezone.utc)
                    if issue.updated_at
                    else None
                ),
                "closed_at": (
                    issue.closed_at.replace(tzinfo=timezone.utc)
                    if issue.closed_at
                    else None
                ),
                "closed_by": (
                    _get_userinfo(issue.closed_by) if issue.closed_by else None
                ),
            }.items()
            if v is not None
        },
    )


class GithubConnectorStage(Enum):
    START = "start"
    PRS = "prs"
    ISSUES = "issues"


class GithubConnectorCheckpoint(ConnectorCheckpoint):
    stage: GithubConnectorStage
    curr_page: int

    cached_repo_ids: list[int] | None = None
    cached_repo: SerializedRepository | None = None

    # Used for the fallback cursor-based pagination strategy
    num_retrieved: int
    cursor_url: str | None = None

    def reset(self) -> None:
        """
        Resets curr_page, num_retrieved, and cursor_url to their initial values (0, 0, None)
        """
        self.curr_page = 0
        self.num_retrieved = 0
        self.cursor_url = None


def make_cursor_url_callback(
    checkpoint: GithubConnectorCheckpoint,
) -> Callable[[str | None, int], None]:
    def cursor_url_callback(cursor_url: str | None, num_objs: int) -> None:
        # we want to maintain the old cursor url so code after retrieval
        # can determine that we are using the fallback cursor-based pagination strategy
        if cursor_url:
            checkpoint.cursor_url = cursor_url
        checkpoint.num_retrieved = num_objs

    return cursor_url_callback


class GithubConnector(CheckpointedConnectorWithPermSync[GithubConnectorCheckpoint]):
    def __init__(
        self,
        repo_owner: str,
        repositories: str | None = None,
        state_filter: str = "all",
        include_prs: bool = True,
        include_issues: bool = False,
    ) -> None:
        self.repo_owner = repo_owner
        self.repositories = repositories
        self.state_filter = state_filter
        self.include_prs = include_prs
        self.include_issues = include_issues
        self.github_client: Github | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        # defaults to 30 items per page, can be set to as high as 100
        self.github_client = (
            Github(
                credentials["github_access_token"],
                base_url=GITHUB_CONNECTOR_BASE_URL,
                per_page=ITEMS_PER_PAGE,
            )
            if GITHUB_CONNECTOR_BASE_URL
            else Github(credentials["github_access_token"], per_page=ITEMS_PER_PAGE)
        )
        return None

    def get_github_repo(
        self, github_client: Github, attempt_num: int = 0
    ) -> Repository.Repository:
        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
            raise RuntimeError(
                "Re-tried fetching repo too many times. Something is going wrong with fetching objects from Github"
            )

        try:
            return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
        except RateLimitExceededException:
            sleep_after_rate_limit_exception(github_client)
            return self.get_github_repo(github_client, attempt_num + 1)

    def get_github_repos(
        self, github_client: Github, attempt_num: int = 0
    ) -> list[Repository.Repository]:
        """Get specific repositories based on comma-separated repo_name string."""
        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
            raise RuntimeError(
                "Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
            )

        try:
            repos = []
            # Split repo_name by comma and strip whitespace
            repo_names = [
                name.strip() for name in (cast(str, self.repositories)).split(",")
            ]

            for repo_name in repo_names:
                if repo_name:  # Skip empty strings
                    try:
                        repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
                        repos.append(repo)
                    except GithubException as e:
                        logger.warning(
                            f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
                        )

            return repos
        except RateLimitExceededException:
            sleep_after_rate_limit_exception(github_client)
            return self.get_github_repos(github_client, attempt_num + 1)

    def get_all_repos(
        self, github_client: Github, attempt_num: int = 0
    ) -> list[Repository.Repository]:
        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
            raise RuntimeError(
                "Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
            )

        try:
            # Try to get organization first
            try:
                org = github_client.get_organization(self.repo_owner)
                return list(org.get_repos())

            except GithubException:
                # If not an org, try as a user
                user = github_client.get_user(self.repo_owner)
                return list(user.get_repos())
        except RateLimitExceededException:
            sleep_after_rate_limit_exception(github_client)
            return self.get_all_repos(github_client, attempt_num + 1)

    def fetch_configured_repos(self) -> list[Repository.Repository]:
        """
        Fetch the configured repositories based on the connector settings.

        Returns:
            list[Repository.Repository]: The configured repositories.
        """
        assert self.github_client is not None  # mypy
        if self.repositories:
            if "," in self.repositories:
                return self.get_github_repos(self.github_client)
            else:
                return [self.get_github_repo(self.github_client)]
        else:
            return self.get_all_repos(self.github_client)

    def _pull_requests_func(
        self, repo: Repository.Repository
    ) -> Callable[[], PaginatedList[PullRequest]]:
        return lambda: repo.get_pulls(
            state=self.state_filter, sort="updated", direction="desc"
        )

    def _issues_func(
        self, repo: Repository.Repository
    ) -> Callable[[], PaginatedList[Issue]]:
        return lambda: repo.get_issues(
            state=self.state_filter, sort="updated", direction="desc"
        )

    def _fetch_from_github(
        self,
        checkpoint: GithubConnectorCheckpoint,
        start: datetime | None = None,
        end: datetime | None = None,
        include_permissions: bool = False,
    ) -> Generator[Document | ConnectorFailure, None, GithubConnectorCheckpoint]:
        if self.github_client is None:
            raise ConnectorMissingCredentialError("GitHub")

        checkpoint = copy.deepcopy(checkpoint)

        # First run of the connector, fetch all repos and store in checkpoint
        if checkpoint.cached_repo_ids is None:
            repos = self.fetch_configured_repos()
            if not repos:
                checkpoint.has_more = False
                return checkpoint

            curr_repo = repos.pop()
            checkpoint.cached_repo_ids = [repo.id for repo in repos]
            checkpoint.cached_repo = SerializedRepository(
                id=curr_repo.id,
                headers=curr_repo.raw_headers,
                raw_data=curr_repo.raw_data,
            )
            checkpoint.stage = GithubConnectorStage.PRS
            checkpoint.curr_page = 0
            # save checkpoint with repo ids retrieved
            return checkpoint

        if checkpoint.cached_repo is None:
            raise ValueError("No repo saved in checkpoint")

        # Deserialize the repository from the checkpoint
        repo = deserialize_repository(checkpoint.cached_repo, self.github_client)

        cursor_url_callback = make_cursor_url_callback(checkpoint)
        repo_external_access: ExternalAccess | None = None
        if include_permissions:
            repo_external_access = get_external_access_permission(
                repo, self.github_client
            )
        if self.include_prs and checkpoint.stage == GithubConnectorStage.PRS:
            logger.info(f"Fetching PRs for repo: {repo.name}")

            pr_batch = _get_batch_rate_limited(
                self._pull_requests_func(repo),
                checkpoint.curr_page,
                checkpoint.cursor_url,
                checkpoint.num_retrieved,
                cursor_url_callback,
                self.github_client,
            )
            checkpoint.curr_page += 1  # NOTE: not used for cursor-based fallback
            done_with_prs = False
            num_prs = 0
            pr = None
            for pr in pr_batch:
                num_prs += 1

                # we iterate backwards in time, so at this point we stop processing prs
                if (
                    start is not None
                    and pr.updated_at
                    and pr.updated_at.replace(tzinfo=timezone.utc) < start
                ):
                    done_with_prs = True
                    break
                # Skip PRs updated after the end date
                if (
                    end is not None
                    and pr.updated_at
                    and pr.updated_at.replace(tzinfo=timezone.utc) > end
                ):
                    continue
                try:
                    yield _convert_pr_to_document(
                        cast(PullRequest, pr), repo_external_access
                    )
                except Exception as e:
                    error_msg = f"Error converting PR to document: {e}"
                    logger.exception(error_msg)
                    yield ConnectorFailure(
                        failed_document=DocumentFailure(
                            document_id=str(pr.id), document_link=pr.html_url
                        ),
                        failure_message=error_msg,
                        exception=e,
                    )
                    continue

            # If we reach this point with a cursor url in the checkpoint, we were using
            # the fallback cursor-based pagination strategy. That strategy tries to get all
            # PRs, so having curosr_url set means we are done with prs. However, we need to
            # return AFTER the checkpoint reset to avoid infinite loops.

            # if we found any PRs on the page and there are more PRs to get, return the checkpoint.
            # In offset mode, while indexing without time constraints, the pr batch
            # will be empty when we're done.
            used_cursor = checkpoint.cursor_url is not None
            logger.info(f"Fetched {num_prs} PRs for repo: {repo.name}")
            if num_prs > 0 and not done_with_prs and not used_cursor:
                return checkpoint

            # if we went past the start date during the loop or there are no more
            # prs to get, we move on to issues
            checkpoint.stage = GithubConnectorStage.ISSUES
            checkpoint.reset()

            if used_cursor:
                # save the checkpoint after changing stage; next run will continue from issues
                return checkpoint

        checkpoint.stage = GithubConnectorStage.ISSUES

        if self.include_issues and checkpoint.stage == GithubConnectorStage.ISSUES:
            logger.info(f"Fetching issues for repo: {repo.name}")

            issue_batch = list(
                _get_batch_rate_limited(
                    self._issues_func(repo),
                    checkpoint.curr_page,
                    checkpoint.cursor_url,
                    checkpoint.num_retrieved,
                    cursor_url_callback,
                    self.github_client,
                )
            )
            logger.info(f"Fetched {len(issue_batch)} issues for repo: {repo.name}")
            checkpoint.curr_page += 1
            done_with_issues = False
            num_issues = 0
            for issue in issue_batch:
                num_issues += 1
                issue = cast(Issue, issue)
                # we iterate backwards in time, so at this point we stop processing prs
                if (
                    start is not None
                    and issue.updated_at.replace(tzinfo=timezone.utc) < start
                ):
                    done_with_issues = True
                    break
                # Skip PRs updated after the end date
                if (
                    end is not None
                    and issue.updated_at.replace(tzinfo=timezone.utc) > end
                ):
                    continue

                if issue.pull_request is not None:
                    # PRs are handled separately
                    continue

                try:
                    yield _convert_issue_to_document(issue, repo_external_access)
                except Exception as e:
                    error_msg = f"Error converting issue to document: {e}"
                    logger.exception(error_msg)
                    yield ConnectorFailure(
                        failed_document=DocumentFailure(
                            document_id=str(issue.id),
                            document_link=issue.html_url,
                        ),
                        failure_message=error_msg,
                        exception=e,
                    )
                    continue

            logger.info(f"Fetched {num_issues} issues for repo: {repo.name}")
            # if we found any issues on the page, and we're not done, return the checkpoint.
            # don't return if we're using cursor-based pagination to avoid infinite loops
            if num_issues > 0 and not done_with_issues and not checkpoint.cursor_url:
                return checkpoint

            # if we went past the start date during the loop or there are no more
            # issues to get, we move on to the next repo
            checkpoint.stage = GithubConnectorStage.PRS
            checkpoint.reset()

        checkpoint.has_more = len(checkpoint.cached_repo_ids) > 0
        if checkpoint.cached_repo_ids:
            next_id = checkpoint.cached_repo_ids.pop()
            next_repo = self.github_client.get_repo(next_id)
            checkpoint.cached_repo = SerializedRepository(
                id=next_id,
                headers=next_repo.raw_headers,
                raw_data=next_repo.raw_data,
            )
            checkpoint.stage = GithubConnectorStage.PRS
            checkpoint.reset()

        if checkpoint.cached_repo_ids:
            logger.info(
                f"{len(checkpoint.cached_repo_ids)} repos remaining (IDs: {checkpoint.cached_repo_ids})"
            )
        else:
            logger.info("No more repos remaining")

        return checkpoint

    def _load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: GithubConnectorCheckpoint,
        include_permissions: bool = False,
    ) -> CheckpointOutput[GithubConnectorCheckpoint]:
        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
        # add a day for timezone safety
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc) + ONE_DAY

        # Move start time back by 3 hours, since some Issues/PRs are getting dropped
        # Could be due to delayed processing on GitHub side
        # The non-updated issues since last poll will be shortcut-ed and not embedded
        adjusted_start_datetime = start_datetime - timedelta(hours=3)

        epoch = datetime.fromtimestamp(0, tz=timezone.utc)
        if adjusted_start_datetime < epoch:
            adjusted_start_datetime = epoch

        return self._fetch_from_github(
            checkpoint,
            start=adjusted_start_datetime,
            end=end_datetime,
            include_permissions=include_permissions,
        )

    @override
    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: GithubConnectorCheckpoint,
    ) -> CheckpointOutput[GithubConnectorCheckpoint]:
        return self._load_from_checkpoint(
            start, end, checkpoint, include_permissions=False
        )

    @override
    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: GithubConnectorCheckpoint,
    ) -> CheckpointOutput[GithubConnectorCheckpoint]:
        return self._load_from_checkpoint(
            start, end, checkpoint, include_permissions=True
        )

    def validate_connector_settings(self) -> None:
        if self.github_client is None:
            raise ConnectorMissingCredentialError("GitHub credentials not loaded.")

        if not self.repo_owner:
            raise ConnectorValidationError(
                "Invalid connector settings: 'repo_owner' must be provided."
            )

        try:
            if self.repositories:
                if "," in self.repositories:
                    # Multiple repositories specified
                    repo_names = [name.strip() for name in self.repositories.split(",")]
                    if not repo_names:
                        raise ConnectorValidationError(
                            "Invalid connector settings: No valid repository names provided."
                        )

                    # Validate at least one repository exists and is accessible
                    valid_repos = False
                    validation_errors = []

                    for repo_name in repo_names:
                        if not repo_name:
                            continue

                        try:
                            test_repo = self.github_client.get_repo(
                                f"{self.repo_owner}/{repo_name}"
                            )
                            logger.info(
                                f"Successfully accessed repository: {self.repo_owner}/{repo_name}"
                            )
                            test_repo.get_contents("")
                            valid_repos = True
                            # If at least one repo is valid, we can proceed
                            break
                        except GithubException as e:
                            validation_errors.append(
                                f"Repository '{repo_name}': {e.data.get('message', str(e))}"
                            )

                    if not valid_repos:
                        error_msg = (
                            "None of the specified repositories could be accessed: "
                        )
                        error_msg += ", ".join(validation_errors)
                        raise ConnectorValidationError(error_msg)
                else:
                    # Single repository (backward compatibility)
                    test_repo = self.github_client.get_repo(
                        f"{self.repo_owner}/{self.repositories}"
                    )
                    test_repo.get_contents("")
            else:
                # Try to get organization first
                try:
                    org = self.github_client.get_organization(self.repo_owner)
                    total_count = org.get_repos().totalCount
                    if total_count == 0:
                        raise ConnectorValidationError(
                            f"Found no repos for organization: {self.repo_owner}. Does the credential have the right scopes?"
                        )
                except GithubException as e:
                    # Check for missing SSO
                    MISSING_SSO_ERROR_MESSAGE = "You must grant your Personal Access token access to this organization".lower()
                    if MISSING_SSO_ERROR_MESSAGE in str(e).lower():
                        SSO_GUIDE_LINK = (
                            "https://docs.github.com/en/enterprise-cloud@latest/authentication/"
                            "authenticating-with-saml-single-sign-on/"
                            "authorizing-a-personal-access-token-for-use-with-saml-single-sign-on"
                        )
                        raise ConnectorValidationError(
                            f"Your GitHub token is missing authorization to access the "
                            f"`{self.repo_owner}` organization. Please follow the guide to "
                            f"authorize your token: {SSO_GUIDE_LINK}"
                        )
                    # If not an org, try as a user
                    user = self.github_client.get_user(self.repo_owner)

                    # Check if we can access any repos
                    total_count = user.get_repos().totalCount
                    if total_count == 0:
                        raise ConnectorValidationError(
                            f"Found no repos for user: {self.repo_owner}. Does the credential have the right scopes?"
                        )

        except RateLimitExceededException:
            raise UnexpectedValidationError(
                "Validation failed due to GitHub rate-limits being exceeded. Please try again later."
            )

        except GithubException as e:
            if e.status == 401:
                raise CredentialExpiredError(
                    "GitHub credential appears to be invalid or expired (HTTP 401)."
                )
            elif e.status == 403:
                raise InsufficientPermissionsError(
                    "Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
                )
            elif e.status == 404:
                if self.repositories:
                    if "," in self.repositories:
                        raise ConnectorValidationError(
                            f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
                        )
                    else:
                        raise ConnectorValidationError(
                            f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
                        )
                else:
                    raise ConnectorValidationError(
                        f"GitHub user or organization not found: {self.repo_owner}"
                    )
            else:
                raise ConnectorValidationError(
                    f"Unexpected GitHub error (status={e.status}): {e.data}"
                )

        except Exception as exc:
            raise Exception(
                f"Unexpected error during GitHub settings validation: {exc}"
            )

    def validate_checkpoint_json(
        self, checkpoint_json: str
    ) -> GithubConnectorCheckpoint:
        return GithubConnectorCheckpoint.model_validate_json(checkpoint_json)

    def build_dummy_checkpoint(self) -> GithubConnectorCheckpoint:
        return GithubConnectorCheckpoint(
            stage=GithubConnectorStage.PRS, curr_page=0, has_more=True, num_retrieved=0
        )


if __name__ == "__main__":
    import os

    # Initialize the connector
    connector = GithubConnector(
        repo_owner=os.environ["REPO_OWNER"],
        repositories=os.environ.get("REPOSITORIES"),
    )
    connector.load_credentials(
        {"github_access_token": os.environ["ACCESS_TOKEN_GITHUB"]}
    )

    if connector.github_client:
        get_external_access_permission(
            connector.get_github_repos(connector.github_client).pop(),
            connector.github_client,
        )

    # Create a time range from epoch to now
    end_time = datetime.now(timezone.utc)
    start_time = datetime.fromtimestamp(0, tz=timezone.utc)
    time_range = (start_time, end_time)

    # Initialize the runner with a batch size of 10
    runner: ConnectorRunner[GithubConnectorCheckpoint] = ConnectorRunner(
        connector, batch_size=10, include_permissions=False, time_range=time_range
    )

    # Get initial checkpoint
    checkpoint = connector.build_dummy_checkpoint()

    # Run the connector
    while checkpoint.has_more:
        for doc_batch, hierarchy_node_batch, failure, next_checkpoint in runner.run(
            checkpoint
        ):
            if doc_batch:
                print(f"Retrieved batch of {len(doc_batch)} documents")
                for doc in doc_batch:
                    print(f"Document: {doc.semantic_identifier}")
            if failure:
                print(f"Failure: {failure.failure_message}")
            if next_checkpoint:
                checkpoint = next_checkpoint


================================================
FILE: backend/onyx/connectors/github/models.py
================================================
from typing import Any

from github import Repository
from github.Requester import Requester
from pydantic import BaseModel


class SerializedRepository(BaseModel):
    # id is part of the raw_data as well, just pulled out for convenience
    id: int
    headers: dict[str, str | int]
    raw_data: dict[str, Any]

    def to_Repository(self, requester: Requester) -> Repository.Repository:
        return Repository.Repository(
            requester, self.headers, self.raw_data, completed=True
        )


================================================
FILE: backend/onyx/connectors/github/rate_limit_utils.py
================================================
import time
from datetime import datetime
from datetime import timedelta
from datetime import timezone

from github import Github

from onyx.utils.logger import setup_logger

logger = setup_logger()


def sleep_after_rate_limit_exception(github_client: Github) -> None:
    """
    Sleep until the GitHub rate limit resets.

    Args:
        github_client: The GitHub client that hit the rate limit
    """
    sleep_time = github_client.get_rate_limit().core.reset.replace(
        tzinfo=timezone.utc
    ) - datetime.now(tz=timezone.utc)
    sleep_time += timedelta(minutes=1)  # add an extra minute just to be safe
    logger.notice(f"Ran into Github rate-limit. Sleeping {sleep_time.seconds} seconds.")
    time.sleep(sleep_time.total_seconds())


================================================
FILE: backend/onyx/connectors/github/utils.py
================================================
from collections.abc import Callable
from typing import cast

from github import Github
from github.Repository import Repository

from onyx.access.models import ExternalAccess
from onyx.connectors.github.models import SerializedRepository
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version

logger = setup_logger()


def get_external_access_permission(
    repo: Repository, github_client: Github
) -> ExternalAccess:
    """
    Get the external access permission for a repository.
    This functionality requires Enterprise Edition.
    """
    # Check if EE is enabled
    if not global_version.is_ee_version():
        # For the MIT version, return an empty ExternalAccess (private document)
        return ExternalAccess.empty()

    # Fetch the EE implementation
    ee_get_external_access_permission = cast(
        Callable[[Repository, Github, bool], ExternalAccess],
        fetch_versioned_implementation(
            "onyx.external_permissions.github.utils",
            "get_external_access_permission",
        ),
    )

    return ee_get_external_access_permission(repo, github_client, True)


def deserialize_repository(
    cached_repo: SerializedRepository, github_client: Github
) -> Repository:
    """
    Deserialize a SerializedRepository back into a Repository object.
    """
    # Try to access the requester - different PyGithub versions may use different attribute names
    try:
        # Try to get the requester using getattr to avoid linter errors
        requester = getattr(github_client, "_requester", None)
        if requester is None:
            requester = getattr(github_client, "_Github__requester", None)
        if requester is None:
            # If we can't find the requester attribute, we need to fall back to recreating the repo
            raise AttributeError("Could not find requester attribute")

        return cached_repo.to_Repository(requester)
    except Exception as e:
        # If all else fails, re-fetch the repo directly
        logger.warning(
            f"Failed to deserialize repository: {e}. Attempting to re-fetch."
        )
        repo_id = cached_repo.id
        return github_client.get_repo(repo_id)


================================================
FILE: backend/onyx/connectors/gitlab/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/gitlab/connector.py
================================================
import fnmatch
import itertools
from collections import deque
from collections.abc import Iterable
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import TypeVar

import gitlab
import pytz
from gitlab.v4.objects import Project

from onyx.configs.app_configs import GITLAB_CONNECTOR_INCLUDE_CODE_FILES
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger

T = TypeVar("T")


logger = setup_logger()

# List of directories/Files to exclude
exclude_patterns = [
    "logs",
    ".github/",
    ".gitlab/",
    ".pre-commit-config.yaml",
]


def _batch_gitlab_objects(git_objs: Iterable[T], batch_size: int) -> Iterator[list[T]]:
    it = iter(git_objs)
    while True:
        batch = list(itertools.islice(it, batch_size))
        if not batch:
            break
        yield batch


def get_author(author: Any) -> BasicExpertInfo:
    return BasicExpertInfo(
        display_name=author.get("name"),
    )


def _convert_merge_request_to_document(mr: Any) -> Document:
    doc = Document(
        id=mr.web_url,
        sections=[TextSection(link=mr.web_url, text=mr.description or "")],
        source=DocumentSource.GITLAB,
        semantic_identifier=mr.title,
        # updated_at is UTC time but is timezone unaware, explicitly add UTC
        # as there is logic in indexing to prevent wrong timestamped docs
        # due to local time discrepancies with UTC
        doc_updated_at=mr.updated_at.replace(tzinfo=timezone.utc),
        primary_owners=[get_author(mr.author)],
        metadata={"state": mr.state, "type": "MergeRequest"},
    )
    return doc


def _convert_issue_to_document(issue: Any) -> Document:
    doc = Document(
        id=issue.web_url,
        sections=[TextSection(link=issue.web_url, text=issue.description or "")],
        source=DocumentSource.GITLAB,
        semantic_identifier=issue.title,
        # updated_at is UTC time but is timezone unaware, explicitly add UTC
        # as there is logic in indexing to prevent wrong timestamped docs
        # due to local time discrepancies with UTC
        doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),
        primary_owners=[get_author(issue.author)],
        metadata={"state": issue.state, "type": issue.type if issue.type else "Issue"},
    )
    return doc


def _convert_code_to_document(
    project: Project, file: Any, url: str, projectName: str, projectOwner: str
) -> Document:
    # Dynamically get the default branch from the project object
    default_branch = project.default_branch

    # Fetch the file content using the correct branch
    file_content_obj = project.files.get(
        file_path=file["path"],
        ref=default_branch,  # Use the default branch
    )
    try:
        file_content = file_content_obj.decode().decode("utf-8")
    except UnicodeDecodeError:
        file_content = file_content_obj.decode().decode("latin-1")

    # Construct the file URL dynamically using the default branch
    file_url = (
        f"{url}/{projectOwner}/{projectName}/-/blob/{default_branch}/{file['path']}"
    )

    # Create and return a Document object
    doc = Document(
        id=file["id"],
        sections=[TextSection(link=file_url, text=file_content)],
        source=DocumentSource.GITLAB,
        semantic_identifier=file["name"],
        doc_updated_at=datetime.now().replace(tzinfo=timezone.utc),
        primary_owners=[],  # Add owners if needed
        metadata={"type": "CodeFile"},
    )
    return doc


def _should_exclude(path: str) -> bool:
    """Check if a path matches any of the exclude patterns."""
    return any(fnmatch.fnmatch(path, pattern) for pattern in exclude_patterns)


class GitlabConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        project_owner: str,
        project_name: str,
        batch_size: int = INDEX_BATCH_SIZE,
        state_filter: str = "all",
        include_mrs: bool = True,
        include_issues: bool = True,
        include_code_files: bool = GITLAB_CONNECTOR_INCLUDE_CODE_FILES,
    ) -> None:
        self.project_owner = project_owner
        self.project_name = project_name
        self.batch_size = batch_size
        self.state_filter = state_filter
        self.include_mrs = include_mrs
        self.include_issues = include_issues
        self.include_code_files = include_code_files
        self.gitlab_client: gitlab.Gitlab | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.gitlab_client = gitlab.Gitlab(
            credentials["gitlab_url"], private_token=credentials["gitlab_access_token"]
        )
        return None

    def _fetch_from_gitlab(
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        if self.gitlab_client is None:
            raise ConnectorMissingCredentialError("Gitlab")
        project: Project = self.gitlab_client.projects.get(
            f"{self.project_owner}/{self.project_name}"
        )

        # Fetch code files
        if self.include_code_files:
            # Fetching using BFS as project.report_tree with recursion causing slow load
            queue = deque([""])  # Start with the root directory
            while queue:
                current_path = queue.popleft()
                files = project.repository_tree(path=current_path, all=True)
                for file_batch in _batch_gitlab_objects(files, self.batch_size):
                    code_doc_batch: list[Document | HierarchyNode] = []
                    for file in file_batch:
                        if _should_exclude(file["path"]):
                            continue

                        if file["type"] == "blob":
                            code_doc_batch.append(
                                _convert_code_to_document(
                                    project,
                                    file,
                                    self.gitlab_client.url,
                                    self.project_name,
                                    self.project_owner,
                                )
                            )
                        elif file["type"] == "tree":
                            queue.append(file["path"])

                    if code_doc_batch:
                        yield code_doc_batch

        if self.include_mrs:
            merge_requests = project.mergerequests.list(
                state=self.state_filter,
                order_by="updated_at",
                sort="desc",
                iterator=True,
            )

            for mr_batch in _batch_gitlab_objects(merge_requests, self.batch_size):
                mr_doc_batch: list[Document | HierarchyNode] = []
                for mr in mr_batch:
                    mr.updated_at = datetime.strptime(
                        mr.updated_at, "%Y-%m-%dT%H:%M:%S.%f%z"
                    )
                    if start is not None and mr.updated_at < start.replace(
                        tzinfo=pytz.UTC
                    ):
                        yield mr_doc_batch
                        return
                    if end is not None and mr.updated_at > end.replace(tzinfo=pytz.UTC):
                        continue
                    mr_doc_batch.append(_convert_merge_request_to_document(mr))
                yield mr_doc_batch

        if self.include_issues:
            issues = project.issues.list(state=self.state_filter, iterator=True)

            for issue_batch in _batch_gitlab_objects(issues, self.batch_size):
                issue_doc_batch: list[Document | HierarchyNode] = []
                for issue in issue_batch:
                    issue.updated_at = datetime.strptime(
                        issue.updated_at, "%Y-%m-%dT%H:%M:%S.%f%z"
                    )
                    if start is not None:
                        start = start.replace(tzinfo=pytz.UTC)
                        if issue.updated_at < start:
                            yield issue_doc_batch
                            return
                    if end is not None:
                        end = end.replace(tzinfo=pytz.UTC)
                        if issue.updated_at > end:
                            continue
                    issue_doc_batch.append(_convert_issue_to_document(issue))
                yield issue_doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._fetch_from_gitlab()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
        return self._fetch_from_gitlab(start_datetime, end_datetime)


if __name__ == "__main__":
    import os

    connector = GitlabConnector(
        # gitlab_url="https://gitlab.com/api/v4",
        project_owner=os.environ["PROJECT_OWNER"],
        project_name=os.environ["PROJECT_NAME"],
        batch_size=10,
        state_filter="all",
        include_mrs=True,
        include_issues=True,
        include_code_files=GITLAB_CONNECTOR_INCLUDE_CODE_FILES,
    )

    connector.load_credentials(
        {
            "gitlab_access_token": os.environ["GITLAB_ACCESS_TOKEN"],
            "gitlab_url": os.environ["GITLAB_URL"],
        }
    )
    document_batches = connector.load_from_state()
    print(next(document_batches))


================================================
FILE: backend/onyx/connectors/gmail/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/gmail/connector.py
================================================
from base64 import urlsafe_b64decode
from collections.abc import Callable
from collections.abc import Iterator
from typing import Any
from typing import cast
from typing import Dict

from google.oauth2.credentials import Credentials as OAuthCredentials
from google.oauth2.service_account import Credentials as ServiceAccountCredentials
from googleapiclient.errors import HttpError  # type: ignore

from onyx.access.models import ExternalAccess
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.google_utils.google_auth import get_google_creds
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
from onyx.connectors.google_utils.google_utils import (
    execute_paginated_retrieval_with_max_pages,
)
from onyx.connectors.google_utils.google_utils import execute_single_retrieval
from onyx.connectors.google_utils.google_utils import PAGE_TOKEN_KEY
from onyx.connectors.google_utils.resources import get_admin_service
from onyx.connectors.google_utils.resources import get_gmail_service
from onyx.connectors.google_utils.resources import GmailService
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
)
from onyx.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_STR
from onyx.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS
from onyx.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE
from onyx.connectors.google_utils.shared_constants import USER_FIELDS
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import ConnectorFailure
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder


logger = setup_logger()

# This is for the initial list call to get the thread ids
THREAD_LIST_FIELDS = "nextPageToken, threads(id)"

# These are the fields to retrieve using the ID from the initial list call
PARTS_FIELDS = "parts(body(data), mimeType)"
PAYLOAD_FIELDS = f"payload(headers, {PARTS_FIELDS})"
MESSAGES_FIELDS = f"messages(id, {PAYLOAD_FIELDS})"
THREADS_FIELDS = f"threads(id, {MESSAGES_FIELDS})"
THREAD_FIELDS = f"id, {MESSAGES_FIELDS}"

EMAIL_FIELDS = [
    "cc",
    "bcc",
    "from",
    "to",
]

MAX_MESSAGE_BODY_BYTES = 10 * 1024 * 1024  # 10MB cap to keep large threads safe

PAGES_PER_CHECKPOINT = 1

add_retries = retry_builder(tries=50, max_delay=30)


def _is_mail_service_disabled_error(error: HttpError) -> bool:
    """Detect if the Gmail API is telling us the mailbox is not provisioned."""

    if error.resp.status != 400:
        return False

    error_message = str(error)
    return (
        "Mail service not enabled" in error_message
        or "failedPrecondition" in error_message
    )


def _build_time_range_query(
    time_range_start: SecondsSinceUnixEpoch | None = None,
    time_range_end: SecondsSinceUnixEpoch | None = None,
) -> str | None:
    query = ""
    if time_range_start is not None and time_range_start != 0:
        query += f"after:{int(time_range_start)}"
    if time_range_end is not None and time_range_end != 0:
        query += f" before:{int(time_range_end)}"
    query = query.strip()

    if len(query) == 0:
        return None

    return query


def _clean_email_and_extract_name(email: str) -> tuple[str, str | None]:
    email = email.strip()
    if "<" in email and ">" in email:
        # Handle format: "Display Name <email@domain.com>"
        display_name = email[: email.find("<")].strip()
        email_address = email[email.find("<") + 1 : email.find(">")].strip()
        return email_address, display_name if display_name else None
    else:
        # Handle plain email address
        return email.strip(), None


def _get_owners_from_emails(emails: dict[str, str | None]) -> list[BasicExpertInfo]:
    owners = []
    for email, names in emails.items():
        if names:
            name_parts = names.split(" ")
            first_name = " ".join(name_parts[:-1])
            last_name = name_parts[-1]
        else:
            first_name = None
            last_name = None
        owners.append(
            BasicExpertInfo(email=email, first_name=first_name, last_name=last_name)
        )
    return owners


def _get_message_body(payload: dict[str, Any]) -> str:
    """
    Gmail threads can contain large inline parts (including attachments
    transmitted as base64). Only decode text/plain parts and skip anything
    that breaches the safety threshold to protect against OOMs.
    """

    message_body_chunks: list[str] = []
    stack = [payload]

    while stack:
        part = stack.pop()
        if not part:
            continue

        children = part.get("parts", [])
        stack.extend(reversed(children))

        mime_type = part.get("mimeType")
        if mime_type != "text/plain":
            continue

        body = part.get("body", {})
        data = body.get("data", "")

        if not data:
            continue

        # base64 inflates storage by ~4/3; work with decoded size estimate
        approx_decoded_size = (len(data) * 3) // 4
        if approx_decoded_size > MAX_MESSAGE_BODY_BYTES:
            logger.warning(
                "Skipping oversized Gmail message part (%s bytes > %s limit)",
                approx_decoded_size,
                MAX_MESSAGE_BODY_BYTES,
            )
            continue

        try:
            text = urlsafe_b64decode(data).decode()
        except (ValueError, UnicodeDecodeError) as error:
            logger.warning("Failed to decode Gmail message part: %s", error)
            continue

        message_body_chunks.append(text)

    return "".join(message_body_chunks)


def _build_document_link(thread_id: str) -> str:
    return f"https://mail.google.com/mail/u/0/#inbox/{thread_id}"


def message_to_section(message: Dict[str, Any]) -> tuple[TextSection, dict[str, str]]:
    link = _build_document_link(message["id"])

    payload = message.get("payload", {})
    headers = payload.get("headers", [])
    metadata: dict[str, Any] = {}
    for header in headers:
        name = header.get("name").lower()
        value = header.get("value")
        if name in EMAIL_FIELDS:
            metadata[name] = value
        if name == "subject":
            metadata["subject"] = value
        if name == "date":
            metadata["updated_at"] = value

    if labels := message.get("labelIds"):
        metadata["labels"] = labels

    message_data = ""
    for name, value in metadata.items():
        # updated at isnt super useful for the llm
        if name != "updated_at":
            message_data += f"{name}: {value}\n"

    message_body_text: str = _get_message_body(payload)

    return TextSection(link=link, text=message_body_text + message_data), metadata


def thread_to_document(
    full_thread: Dict[str, Any], email_used_to_fetch_thread: str
) -> Document | None:
    all_messages = full_thread.get("messages", [])
    if not all_messages:
        return None

    sections = []
    semantic_identifier = ""
    updated_at = None
    from_emails: dict[str, str | None] = {}
    other_emails: dict[str, str | None] = {}
    for message in all_messages:
        section, message_metadata = message_to_section(message)
        sections.append(section)

        for name, value in message_metadata.items():
            if name in EMAIL_FIELDS:
                email, display_name = _clean_email_and_extract_name(value)
                if name == "from":
                    from_emails[email] = (
                        display_name if not from_emails.get(email) else None
                    )
                else:
                    other_emails[email] = (
                        display_name if not other_emails.get(email) else None
                    )

        # If we haven't set the semantic identifier yet, set it to the subject of the first message
        if not semantic_identifier:
            semantic_identifier = message_metadata.get("subject", "")

        if message_metadata.get("updated_at"):
            updated_at = message_metadata.get("updated_at")

    updated_at_datetime = None
    if updated_at:
        updated_at_datetime = time_str_to_utc(updated_at)

    id = full_thread.get("id")
    if not id:
        raise ValueError("Thread ID is required")

    primary_owners = _get_owners_from_emails(from_emails)
    secondary_owners = _get_owners_from_emails(other_emails)

    # If emails have no subject, match Gmail's default "no subject"
    # Search will break without a semantic identifier
    if not semantic_identifier:
        semantic_identifier = "(no subject)"

    # NOTE: we're choosing to unconditionally include perm sync info
    # (external_access) as it doesn't cost much space
    return Document(
        id=id,
        semantic_identifier=semantic_identifier,
        sections=cast(list[TextSection | ImageSection], sections),
        source=DocumentSource.GMAIL,
        # This is used to perform permission sync
        primary_owners=primary_owners,
        secondary_owners=secondary_owners,
        doc_updated_at=updated_at_datetime,
        # Not adding emails to metadata because it's already in the sections
        metadata={},
        external_access=ExternalAccess(
            external_user_emails={email_used_to_fetch_thread},
            external_user_group_ids=set(),
            is_public=False,
        ),
    )


def _full_thread_from_id(
    thread_id: str,
    user_email: str,
    gmail_service: GmailService,
) -> Document | ConnectorFailure | None:
    try:
        thread = next(
            execute_single_retrieval(
                retrieval_function=gmail_service.users().threads().get,
                list_key=None,
                userId=user_email,
                fields=THREAD_FIELDS,
                id=thread_id,
                continue_on_404_or_403=True,
            ),
            None,
        )
        if thread is None:
            raise ValueError(f"Thread {thread_id} not found")
        return thread_to_document(thread, user_email)
    except Exception as e:
        return ConnectorFailure(
            failed_document=DocumentFailure(
                document_id=thread_id, document_link=_build_document_link(thread_id)
            ),
            failure_message=f"Failed to retrieve thread {thread_id}",
            exception=e,
        )


def _slim_thread_from_id(
    thread_id: str,
    user_email: str,
    gmail_service: GmailService,  # noqa: ARG001
) -> SlimDocument:
    return SlimDocument(
        id=thread_id,
        external_access=ExternalAccess(
            external_user_emails={user_email},
            external_user_group_ids=set(),
            is_public=False,
        ),
    )


class GmailCheckpoint(ConnectorCheckpoint):
    user_emails: list[str] = []  # stack of user emails to process
    page_token: str | None = None


class GmailConnector(
    SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GmailCheckpoint]
):
    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
        self.batch_size = batch_size

        self._creds: OAuthCredentials | ServiceAccountCredentials | None = None
        self._primary_admin_email: str | None = None

    @property
    def primary_admin_email(self) -> str:
        if self._primary_admin_email is None:
            raise RuntimeError(
                "Primary admin email missing, should not call this property before calling load_credentials"
            )
        return self._primary_admin_email

    @property
    def google_domain(self) -> str:
        if self._primary_admin_email is None:
            raise RuntimeError(
                "Primary admin email missing, should not call this property before calling load_credentials"
            )
        return self._primary_admin_email.split("@")[-1]

    @property
    def creds(self) -> OAuthCredentials | ServiceAccountCredentials:
        if self._creds is None:
            raise RuntimeError(
                "Creds missing, should not call this property before calling load_credentials"
            )
        return self._creds

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None:
        primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]
        self._primary_admin_email = primary_admin_email

        self._creds, new_creds_dict = get_google_creds(
            credentials=credentials,
            source=DocumentSource.GMAIL,
        )
        return new_creds_dict

    def _get_all_user_emails(self) -> list[str]:
        """
        List all user emails if we are on a Google Workspace domain.
        If the domain is gmail.com, or if we attempt to call the Admin SDK and
        get a 404 or 403, fall back to using the single user.
        A 404 indicates a personal Gmail account with no Workspace domain.
        A 403 indicates insufficient permissions (e.g., OAuth user without admin privileges).
        """

        try:
            admin_service = get_admin_service(self.creds, self.primary_admin_email)
            emails = []
            for user in execute_paginated_retrieval(
                retrieval_function=admin_service.users().list,
                list_key="users",
                fields=USER_FIELDS,
                domain=self.google_domain,
            ):
                if email := user.get("primaryEmail"):
                    emails.append(email)
            return emails

        except HttpError as e:
            if e.resp.status == 404:
                logger.warning(
                    "Received 404 from Admin SDK; this may indicate a personal Gmail account "
                    "with no Workspace domain. Falling back to single user."
                )
                return [self.primary_admin_email]
            elif e.resp.status == 403:
                logger.warning(
                    "Received 403 from Admin SDK; this may indicate insufficient permissions "
                    "(e.g., OAuth user without admin privileges or service account without "
                    "domain-wide delegation). Falling back to single user."
                )
                return [self.primary_admin_email]
            raise

    def _fetch_threads_impl(
        self,
        user_email: str,
        time_range_start: SecondsSinceUnixEpoch | None = None,
        time_range_end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
        page_token: str | None = None,
        set_page_token: Callable[[str | None], None] = lambda x: None,  # noqa: ARG005
        is_slim: bool = False,
    ) -> Iterator[Document | ConnectorFailure] | GenerateSlimDocumentOutput:
        query = _build_time_range_query(time_range_start, time_range_end)
        slim_doc_batch: list[SlimDocument | HierarchyNode] = []
        logger.info(
            f"Fetching {'slim' if is_slim else 'full'} threads for user: {user_email}"
        )
        gmail_service = get_gmail_service(self.creds, user_email)
        try:
            for thread in execute_paginated_retrieval_with_max_pages(
                max_num_pages=PAGES_PER_CHECKPOINT,
                retrieval_function=gmail_service.users().threads().list,
                list_key="threads",
                userId=user_email,
                fields=THREAD_LIST_FIELDS,
                q=query,
                continue_on_404_or_403=True,
                **({PAGE_TOKEN_KEY: page_token} if page_token else {}),
            ):
                # if a page token is returned, set it and leave the function
                if isinstance(thread, str):
                    set_page_token(thread)
                    return
                if is_slim:
                    slim_doc_batch.append(
                        SlimDocument(
                            id=thread["id"],
                            external_access=ExternalAccess(
                                external_user_emails={user_email},
                                external_user_group_ids=set(),
                                is_public=False,
                            ),
                        )
                    )
                    if len(slim_doc_batch) >= SLIM_BATCH_SIZE:
                        yield slim_doc_batch
                        slim_doc_batch = []
                else:
                    result = _full_thread_from_id(
                        thread["id"], user_email, gmail_service
                    )
                    if result is not None:
                        yield result
                if callback:
                    tag = (
                        "retrieve_all_slim_docs_perm_sync"
                        if is_slim
                        else "gmail_retrieve_all_docs"
                    )
                    if callback.should_stop():
                        raise RuntimeError(f"{tag}: Stop signal detected")

                    callback.progress(tag, 1)
            if slim_doc_batch:
                yield slim_doc_batch

            # done with user
            set_page_token(None)
        except HttpError as e:
            if _is_mail_service_disabled_error(e):
                logger.warning(
                    "Skipping Gmail sync for %s because the mailbox is disabled.",
                    user_email,
                )
                return
            raise

    def _fetch_threads(
        self,
        user_email: str,
        page_token: str | None = None,
        set_page_token: Callable[[str | None], None] = lambda x: None,  # noqa: ARG005
        time_range_start: SecondsSinceUnixEpoch | None = None,
        time_range_end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> Iterator[Document | ConnectorFailure]:
        yield from cast(
            Iterator[Document | ConnectorFailure],
            self._fetch_threads_impl(
                user_email,
                time_range_start,
                time_range_end,
                callback,
                page_token,
                set_page_token,
                False,
            ),
        )

    def _fetch_slim_threads(
        self,
        user_email: str,
        page_token: str | None = None,
        set_page_token: Callable[[str | None], None] = lambda x: None,  # noqa: ARG005
        time_range_start: SecondsSinceUnixEpoch | None = None,
        time_range_end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        yield from cast(
            GenerateSlimDocumentOutput,
            self._fetch_threads_impl(
                user_email,
                time_range_start,
                time_range_end,
                callback,
                page_token,
                set_page_token,
                True,
            ),
        )

    def _load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: GmailCheckpoint,
    ) -> CheckpointOutput[GmailCheckpoint]:
        if not checkpoint.user_emails:
            checkpoint.user_emails = self._get_all_user_emails()
        try:

            def set_page_token(page_token: str | None) -> None:
                checkpoint.page_token = page_token

            yield from self._fetch_threads(
                checkpoint.user_emails[-1],
                checkpoint.page_token,
                set_page_token,
                start,
                end,
                callback=None,
            )
            if checkpoint.page_token is None:
                # we're done with this user
                checkpoint.user_emails.pop()

            if len(checkpoint.user_emails) == 0:
                checkpoint.has_more = False
            return checkpoint
        except Exception as e:
            if MISSING_SCOPES_ERROR_STR in str(e):
                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
            raise e

    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: GmailCheckpoint,
    ) -> CheckpointOutput[GmailCheckpoint]:
        return self._load_from_checkpoint(
            start=start,
            end=end,
            checkpoint=checkpoint,
        )

    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: GmailCheckpoint,
    ) -> CheckpointOutput[GmailCheckpoint]:
        # NOTE: we're choosing to unconditionally include perm sync info
        # (external_access) as it doesn't cost much space
        return self._load_from_checkpoint(
            start=start,
            end=end,
            checkpoint=checkpoint,
        )

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        try:
            pt_dict: dict[str, str | None] = {PAGE_TOKEN_KEY: None}

            def set_page_token(page_token: str | None) -> None:
                pt_dict[PAGE_TOKEN_KEY] = page_token

            for user_email in self._get_all_user_emails():
                yield from self._fetch_slim_threads(
                    user_email,
                    pt_dict[PAGE_TOKEN_KEY],
                    set_page_token,
                    start,
                    end,
                    callback=callback,
                )
        except Exception as e:
            if MISSING_SCOPES_ERROR_STR in str(e):
                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
            raise e

    def build_dummy_checkpoint(self) -> GmailCheckpoint:
        return GmailCheckpoint(has_more=True)

    def validate_checkpoint_json(self, checkpoint_json: str) -> GmailCheckpoint:
        return GmailCheckpoint.model_validate_json(checkpoint_json)


if __name__ == "__main__":
    pass


================================================
FILE: backend/onyx/connectors/gong/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/gong/connector.py
================================================
import base64
import time
from collections.abc import Generator
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from typing import cast

import requests
from requests.adapters import HTTPAdapter
from urllib3.util import Retry

from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from onyx.configs.app_configs import GONG_CONNECTOR_START_TIME
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger

logger = setup_logger()


class GongConnector(LoadConnector, PollConnector):
    BASE_URL = "https://api.gong.io"
    MAX_CALL_DETAILS_ATTEMPTS = 6
    CALL_DETAILS_DELAY = 30  # in seconds
    # Gong API limit is 3 calls/sec — stay safely under it
    MIN_REQUEST_INTERVAL = 0.5  # seconds between requests

    def __init__(
        self,
        workspaces: list[str] | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
        continue_on_fail: bool = CONTINUE_ON_CONNECTOR_FAILURE,
        hide_user_info: bool = False,
    ) -> None:
        self.workspaces = workspaces
        self.batch_size: int = batch_size
        self.continue_on_fail = continue_on_fail
        self.auth_token_basic: str | None = None
        self.hide_user_info = hide_user_info
        self._last_request_time: float = 0.0

        # urllib3 Retry already respects the Retry-After header by default
        # (respect_retry_after_header=True), so on 429 it will sleep for the
        # duration Gong specifies before retrying.
        retry_strategy = Retry(
            total=10,
            backoff_factor=2,
            status_forcelist=[429, 500, 502, 503, 504],
        )

        session = requests.Session()
        session.mount(GongConnector.BASE_URL, HTTPAdapter(max_retries=retry_strategy))
        self._session = session

    @staticmethod
    def make_url(endpoint: str) -> str:
        url = f"{GongConnector.BASE_URL}{endpoint}"
        return url

    def _throttled_request(
        self, method: str, url: str, **kwargs: Any
    ) -> requests.Response:
        """Rate-limited request wrapper. Enforces MIN_REQUEST_INTERVAL between
        calls to stay under Gong's 3 calls/sec limit and avoid triggering 429s."""
        now = time.monotonic()
        elapsed = now - self._last_request_time
        if elapsed < self.MIN_REQUEST_INTERVAL:
            time.sleep(self.MIN_REQUEST_INTERVAL - elapsed)

        response = self._session.request(method, url, **kwargs)
        self._last_request_time = time.monotonic()
        return response

    def _get_workspace_id_map(self) -> dict[str, str]:
        response = self._throttled_request(
            "GET", GongConnector.make_url("/v2/workspaces")
        )
        response.raise_for_status()

        workspaces_details = response.json().get("workspaces")
        name_id_map = {
            workspace["name"]: workspace["id"] for workspace in workspaces_details
        }
        id_id_map = {
            workspace["id"]: workspace["id"] for workspace in workspaces_details
        }
        # In very rare case, if a workspace is given a name which is the id of another workspace,
        # Then the user input is treated as the name
        return {**id_id_map, **name_id_map}

    def _get_transcript_batches(
        self, start_datetime: str | None = None, end_datetime: str | None = None
    ) -> Generator[list[dict[str, Any]], None, None]:
        body: dict[str, dict] = {"filter": {}}
        if start_datetime:
            body["filter"]["fromDateTime"] = start_datetime
        if end_datetime:
            body["filter"]["toDateTime"] = end_datetime

        # The batch_ids in the previous method appears to be batches of call_ids to process
        # In this method, we will retrieve transcripts for them in batches.
        transcripts: list[dict[str, Any]] = []
        workspace_list = self.workspaces or [None]  # type: ignore
        workspace_map = self._get_workspace_id_map() if self.workspaces else {}

        for workspace in workspace_list:
            if workspace:
                logger.info(f"Updating Gong workspace: {workspace}")
                workspace_id = workspace_map.get(workspace)
                if not workspace_id:
                    logger.error(f"Invalid Gong workspace: {workspace}")
                    if not self.continue_on_fail:
                        raise ValueError(f"Invalid workspace: {workspace}")
                    continue
                body["filter"]["workspaceId"] = workspace_id
            else:
                if "workspaceId" in body["filter"]:
                    del body["filter"]["workspaceId"]

            while True:
                response = self._throttled_request(
                    "POST", GongConnector.make_url("/v2/calls/transcript"), json=body
                )
                # If no calls in the range, just break out
                if response.status_code == 404:
                    break

                try:
                    response.raise_for_status()
                except Exception:
                    logger.error(f"Error fetching transcripts: {response.text}")
                    raise

                data = response.json()
                call_transcripts = data.get("callTranscripts", [])
                transcripts.extend(call_transcripts)

                while len(transcripts) >= self.batch_size:
                    yield transcripts[: self.batch_size]
                    transcripts = transcripts[self.batch_size :]

                cursor = data.get("records", {}).get("cursor")
                if cursor:
                    body["cursor"] = cursor
                else:
                    break

        if transcripts:
            yield transcripts

    def _get_call_details_by_ids(self, call_ids: list[str]) -> dict:
        body = {
            "filter": {"callIds": call_ids},
            "contentSelector": {"exposedFields": {"parties": True}},
        }

        response = self._throttled_request(
            "POST", GongConnector.make_url("/v2/calls/extensive"), json=body
        )
        response.raise_for_status()

        calls = response.json().get("calls")
        call_to_metadata = {}
        for call in calls:
            call_to_metadata[call["metaData"]["id"]] = call

        return call_to_metadata

    @staticmethod
    def _parse_parties(parties: list[dict]) -> dict[str, str]:
        id_mapping = {}
        for party in parties:
            name = party.get("name")
            email = party.get("emailAddress")

            if name and email:
                full_identifier = f"{name} ({email})"
            elif name:
                full_identifier = name
            elif email:
                full_identifier = email
            else:
                full_identifier = "Unknown"

            id_mapping[party["speakerId"]] = full_identifier

        return id_mapping

    def _fetch_calls(
        self, start_datetime: str | None = None, end_datetime: str | None = None
    ) -> GenerateDocumentsOutput:
        num_calls = 0

        for transcript_batch in self._get_transcript_batches(
            start_datetime, end_datetime
        ):
            doc_batch: list[Document | HierarchyNode] = []

            transcript_call_ids = cast(
                list[str],
                [t.get("callId") for t in transcript_batch if t.get("callId")],
            )

            call_details_map: dict[str, Any] = {}

            # There's a likely race condition in the API where a transcript will have a
            # call id but the call to v2/calls/extensive will not return all of the id's
            # retry with exponential backoff has been observed to mitigate this
            # in ~2 minutes. After max attempts, proceed with whatever we have —
            # the per-call loop below will skip missing IDs gracefully.
            current_attempt = 0
            while True:
                current_attempt += 1
                call_details_map = self._get_call_details_by_ids(transcript_call_ids)
                if set(transcript_call_ids) == set(call_details_map.keys()):
                    # we got all the id's we were expecting ... break and continue
                    break

                # we are missing some id's. Log and retry with exponential backoff
                missing_call_ids = set(transcript_call_ids) - set(
                    call_details_map.keys()
                )
                logger.warning(
                    f"_get_call_details_by_ids is missing call id's: "
                    f"current_attempt={current_attempt} "
                    f"missing_call_ids={missing_call_ids}"
                )
                if current_attempt >= self.MAX_CALL_DETAILS_ATTEMPTS:
                    logger.error(
                        f"Giving up on missing call id's after "
                        f"{self.MAX_CALL_DETAILS_ATTEMPTS} attempts: "
                        f"missing_call_ids={missing_call_ids} — "
                        f"proceeding with {len(call_details_map)} of "
                        f"{len(transcript_call_ids)} calls"
                    )
                    break

                wait_seconds = self.CALL_DETAILS_DELAY * pow(2, current_attempt - 1)
                logger.warning(
                    f"_get_call_details_by_ids waiting to retry: "
                    f"wait={wait_seconds}s "
                    f"current_attempt={current_attempt} "
                    f"next_attempt={current_attempt + 1} "
                    f"max_attempts={self.MAX_CALL_DETAILS_ATTEMPTS}"
                )
                time.sleep(wait_seconds)

            # now we can iterate per call/transcript
            for transcript in transcript_batch:
                call_id = transcript.get("callId")

                if not call_id or call_id not in call_details_map:
                    # NOTE(rkuo): seeing odd behavior where call_ids from the transcript
                    # don't have call details. adding error debugging logs to trace.
                    logger.error(
                        f"Couldn't get call information for Call ID: {call_id}"
                    )
                    if call_id:
                        logger.error(
                            f"Call debug info: call_id={call_id} "
                            f"call_ids={transcript_call_ids} "
                            f"call_details_map={call_details_map.keys()}"
                        )
                    if not self.continue_on_fail:
                        raise RuntimeError(
                            f"Couldn't get call information for Call ID: {call_id}"
                        )
                    continue

                call_details = call_details_map[call_id]
                call_metadata = call_details["metaData"]

                call_time_str = call_metadata["started"]
                call_title = call_metadata["title"]
                logger.info(
                    f"{num_calls + 1}: Indexing Gong call id {call_id} from {call_time_str.split('T', 1)[0]}: {call_title}"
                )

                call_parties = cast(list[dict] | None, call_details.get("parties"))
                if call_parties is None:
                    logger.error(f"Couldn't get parties for Call ID: {call_id}")
                    call_parties = []

                id_to_name_map = self._parse_parties(call_parties)

                # Keeping a separate dict here in case the parties info is incomplete
                speaker_to_name: dict[str, str] = {}

                transcript_text = ""
                call_purpose = call_metadata["purpose"]
                if call_purpose:
                    transcript_text += f"Call Description: {call_purpose}\n\n"

                contents = transcript["transcript"]
                for segment in contents:
                    speaker_id = segment.get("speakerId", "")
                    if speaker_id not in speaker_to_name:
                        if self.hide_user_info:
                            speaker_to_name[speaker_id] = (
                                f"User {len(speaker_to_name) + 1}"
                            )
                        else:
                            speaker_to_name[speaker_id] = id_to_name_map.get(
                                speaker_id, "Unknown"
                            )

                    speaker_name = speaker_to_name[speaker_id]

                    sentences = segment.get("sentences", {})
                    monolog = " ".join(
                        [sentence.get("text", "") for sentence in sentences]
                    )
                    transcript_text += f"{speaker_name}: {monolog}\n\n"

                metadata = {}
                if call_metadata.get("system"):
                    metadata["client"] = call_metadata.get("system")
                # TODO calls have a clientUniqueId field, can pull that in later

                doc_batch.append(
                    Document(
                        id=call_id,
                        sections=[
                            TextSection(link=call_metadata["url"], text=transcript_text)
                        ],
                        source=DocumentSource.GONG,
                        # Should not ever be Untitled as a call cannot be made without a Title
                        semantic_identifier=call_title or "Untitled",
                        doc_updated_at=datetime.fromisoformat(call_time_str).astimezone(
                            timezone.utc
                        ),
                        metadata={"client": call_metadata.get("system")},
                    )
                )

                num_calls += 1

            yield doc_batch

        logger.info(f"_fetch_calls finished: num_calls={num_calls}")

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        combined = (
            f"{credentials['gong_access_key']}:{credentials['gong_access_key_secret']}"
        )
        self.auth_token_basic = base64.b64encode(combined.encode("utf-8")).decode(
            "utf-8"
        )

        if self.auth_token_basic is None:
            raise ConnectorMissingCredentialError("Gong")

        self._session.headers.update(
            {"Authorization": f"Basic {self.auth_token_basic}"}
        )
        return None

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._fetch_calls()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)

        # if this env variable is set, don't start from a timestamp before the specified
        # start time
        # TODO: remove this once this is globally available
        if GONG_CONNECTOR_START_TIME:
            special_start_datetime = datetime.fromisoformat(GONG_CONNECTOR_START_TIME)
            special_start_datetime = special_start_datetime.replace(tzinfo=timezone.utc)
        else:
            special_start_datetime = datetime.fromtimestamp(0, tz=timezone.utc)

        # don't let the special start dt be past the end time, this causes issues when
        # the Gong API (`filter.fromDateTime: must be before toDateTime`)
        special_start_datetime = min(special_start_datetime, end_datetime)

        start_datetime = max(
            datetime.fromtimestamp(start, tz=timezone.utc), special_start_datetime
        )

        # Because these are meeting start times, the meeting needs to end and be processed
        # so adding a 1 day buffer and fetching by default till current time
        start_one_day_offset = start_datetime - timedelta(days=1)
        start_time = start_one_day_offset.isoformat()

        end_time = datetime.fromtimestamp(end, tz=timezone.utc).isoformat()

        logger.info(f"Fetching Gong calls between {start_time} and {end_time}")
        return self._fetch_calls(start_time, end_time)


if __name__ == "__main__":
    import os

    connector = GongConnector()
    connector.load_credentials(
        {
            "gong_access_key": os.environ["GONG_ACCESS_KEY"],
            "gong_access_key_secret": os.environ["GONG_ACCESS_KEY_SECRET"],
        }
    )

    latest_docs = connector.load_from_state()
    print(next(latest_docs))


================================================
FILE: backend/onyx/connectors/google_drive/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/google_drive/connector.py
================================================
import copy
import json
import os
import sys
import threading
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterator
from datetime import datetime
from enum import Enum
from typing import Any
from typing import cast
from typing import Protocol
from urllib.parse import parse_qs
from urllib.parse import urlparse
from urllib.parse import urlunparse

from google.auth.exceptions import RefreshError
from google.oauth2.credentials import Credentials as OAuthCredentials
from google.oauth2.service_account import Credentials as ServiceAccountCredentials
from googleapiclient.errors import HttpError  # type: ignore
from typing_extensions import override

from onyx.access.models import ExternalAccess
from onyx.configs.app_configs import GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import MAX_DRIVE_WORKERS
from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.google_drive.doc_conversion import build_slim_document
from onyx.connectors.google_drive.doc_conversion import (
    convert_drive_item_to_document,
)
from onyx.connectors.google_drive.doc_conversion import onyx_document_id_from_drive_file
from onyx.connectors.google_drive.doc_conversion import PermissionSyncContext
from onyx.connectors.google_drive.file_retrieval import crawl_folders_for_files
from onyx.connectors.google_drive.file_retrieval import DriveFileFieldType
from onyx.connectors.google_drive.file_retrieval import get_all_files_for_oauth
from onyx.connectors.google_drive.file_retrieval import (
    get_all_files_in_my_drive_and_shared,
)
from onyx.connectors.google_drive.file_retrieval import get_external_access_for_folder
from onyx.connectors.google_drive.file_retrieval import get_files_in_shared_drive
from onyx.connectors.google_drive.file_retrieval import get_folder_metadata
from onyx.connectors.google_drive.file_retrieval import get_root_folder_id
from onyx.connectors.google_drive.file_retrieval import get_shared_drive_name
from onyx.connectors.google_drive.file_retrieval import has_link_only_permission
from onyx.connectors.google_drive.models import DriveRetrievalStage
from onyx.connectors.google_drive.models import GoogleDriveCheckpoint
from onyx.connectors.google_drive.models import GoogleDriveFileType
from onyx.connectors.google_drive.models import RetrievedDriveFile
from onyx.connectors.google_drive.models import StageCompletion
from onyx.connectors.google_utils.google_auth import get_google_creds
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
from onyx.connectors.google_utils.google_utils import get_file_owners
from onyx.connectors.google_utils.google_utils import GoogleFields
from onyx.connectors.google_utils.resources import get_admin_service
from onyx.connectors.google_utils.resources import get_drive_service
from onyx.connectors.google_utils.resources import GoogleDriveService
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
)
from onyx.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_STR
from onyx.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS
from onyx.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE
from onyx.connectors.google_utils.shared_constants import USER_FIELDS
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import NormalizationResult
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import EntityFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.db.enums import HierarchyNodeType
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder
from onyx.utils.threadpool_concurrency import parallel_yield
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.threadpool_concurrency import ThreadSafeDict
from onyx.utils.threadpool_concurrency import ThreadSafeSet

logger = setup_logger()
# TODO: Improve this by using the batch utility: https://googleapis.github.io/google-api-python-client/docs/batch.html
# All file retrievals could be batched and made at once

BATCHES_PER_CHECKPOINT = 1

DRIVE_BATCH_SIZE = 80

SHARED_DRIVE_PAGES_PER_CHECKPOINT = 2
MY_DRIVE_PAGES_PER_CHECKPOINT = 2
OAUTH_PAGES_PER_CHECKPOINT = 2
FOLDERS_PER_CHECKPOINT = 1


def _extract_str_list_from_comma_str(string: str | None) -> list[str]:
    if not string:
        return []
    return [s.strip() for s in string.split(",") if s.strip()]


def _extract_ids_from_urls(urls: list[str]) -> list[str]:
    return [urlparse(url).path.strip("/").split("/")[-1] for url in urls]


def _clean_requested_drive_ids(
    requested_drive_ids: set[str],
    requested_folder_ids: set[str],
    all_drive_ids_available: set[str],
) -> tuple[list[str], list[str]]:
    invalid_requested_drive_ids = requested_drive_ids - all_drive_ids_available
    filtered_folder_ids = requested_folder_ids - all_drive_ids_available
    if invalid_requested_drive_ids:
        logger.warning(
            f"Some shared drive IDs were not found. IDs: {invalid_requested_drive_ids}"
        )
        logger.warning("Checking for folder access instead...")
        filtered_folder_ids.update(invalid_requested_drive_ids)

    valid_requested_drive_ids = requested_drive_ids - invalid_requested_drive_ids
    return sorted(valid_requested_drive_ids), sorted(filtered_folder_ids)


def _get_parent_id_from_file(drive_file: GoogleDriveFileType) -> str | None:
    """Extract the first parent ID from a drive file."""
    parents = drive_file.get("parents")
    if parents and len(parents) > 0:
        return parents[0]  # files have a unique parent
    return None


def _is_shared_drive_root(folder: GoogleDriveFileType) -> bool:
    """
    Check if a folder is a verified shared drive root.

    For shared drives, we can verify using driveId:
    - If driveId is set and folder_id == driveId AND no parents, it's the shared drive root
    - If driveId is set but folder_id != driveId with empty parents, it's a permission issue

    Returns True only for verified shared drive roots.
    """
    folder_id = folder.get("id")
    drive_id = folder.get("driveId")
    parents = folder.get("parents", [])

    # Must have no parents to be a root
    if parents:
        return False

    # For shared drive content, the root has id == driveId
    return bool(drive_id and folder_id == drive_id)


def _public_access() -> ExternalAccess:
    return ExternalAccess(
        external_user_emails=set(),
        external_user_group_ids=set(),
        is_public=True,
    )


class CredentialedRetrievalMethod(Protocol):
    def __call__(
        self,
        field_type: DriveFileFieldType,
        checkpoint: GoogleDriveCheckpoint,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> Iterator[RetrievedDriveFile]: ...


def add_retrieval_info(
    drive_files: Iterator[GoogleDriveFileType | str],
    user_email: str,
    completion_stage: DriveRetrievalStage,
    parent_id: str | None = None,
) -> Iterator[RetrievedDriveFile | str]:
    for file in drive_files:
        if isinstance(file, str):
            yield file
            continue
        yield RetrievedDriveFile(
            drive_file=file,
            user_email=user_email,
            parent_id=parent_id,
            completion_stage=completion_stage,
        )


class DriveIdStatus(Enum):
    AVAILABLE = "available"
    IN_PROGRESS = "in_progress"
    FINISHED = "finished"


class GoogleDriveConnector(
    SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint]
):
    def __init__(
        self,
        include_shared_drives: bool = False,
        include_my_drives: bool = False,
        include_files_shared_with_me: bool = False,
        shared_drive_urls: str | None = None,
        my_drive_emails: str | None = None,
        shared_folder_urls: str | None = None,
        specific_user_emails: str | None = None,
        exclude_domain_link_only: bool = False,
        batch_size: int = INDEX_BATCH_SIZE,  # noqa: ARG002
        # OLD PARAMETERS
        folder_paths: list[str] | None = None,
        include_shared: bool | None = None,
        follow_shortcuts: bool | None = None,
        only_org_public: bool | None = None,
        continue_on_failure: bool | None = None,
    ) -> None:
        # Check for old input parameters
        if folder_paths is not None:
            logger.warning(
                "The 'folder_paths' parameter is deprecated. Use 'shared_folder_urls' instead."
            )
        if include_shared is not None:
            logger.warning(
                "The 'include_shared' parameter is deprecated. Use 'include_files_shared_with_me' instead."
            )
        if follow_shortcuts is not None:
            logger.warning("The 'follow_shortcuts' parameter is deprecated.")
        if only_org_public is not None:
            logger.warning("The 'only_org_public' parameter is deprecated.")
        if continue_on_failure is not None:
            logger.warning("The 'continue_on_failure' parameter is deprecated.")

        if not any(
            (
                include_shared_drives,
                include_my_drives,
                include_files_shared_with_me,
                shared_folder_urls,
                my_drive_emails,
                shared_drive_urls,
            )
        ):
            raise ConnectorValidationError(
                "Nothing to index. Please specify at least one of the following: "
                "include_shared_drives, include_my_drives, include_files_shared_with_me, "
                "shared_folder_urls, or my_drive_emails"
            )

        specific_requests_made = False
        if bool(shared_drive_urls) or bool(my_drive_emails) or bool(shared_folder_urls):
            specific_requests_made = True
        self.specific_requests_made = specific_requests_made

        # NOTE: potentially modified in load_credentials if using service account
        self.include_files_shared_with_me = (
            False if specific_requests_made else include_files_shared_with_me
        )
        self.include_my_drives = False if specific_requests_made else include_my_drives
        self.include_shared_drives = (
            False if specific_requests_made else include_shared_drives
        )

        shared_drive_url_list = _extract_str_list_from_comma_str(shared_drive_urls)
        self._requested_shared_drive_ids = set(
            _extract_ids_from_urls(shared_drive_url_list)
        )

        self._requested_my_drive_emails = set(
            _extract_str_list_from_comma_str(my_drive_emails)
        )

        shared_folder_url_list = _extract_str_list_from_comma_str(shared_folder_urls)
        self._requested_folder_ids = set(_extract_ids_from_urls(shared_folder_url_list))
        self._specific_user_emails = _extract_str_list_from_comma_str(
            specific_user_emails
        )
        self.exclude_domain_link_only = exclude_domain_link_only

        self._primary_admin_email: str | None = None

        self._creds: OAuthCredentials | ServiceAccountCredentials | None = None
        self._creds_dict: dict[str, Any] | None = None

        # ids of folders and shared drives that have been traversed
        self._retrieved_folder_and_drive_ids: set[str] = set()

        # Cache of known My Drive root IDs (user_email -> root_id)
        # Used to verify if a folder with no parents is actually a My Drive root
        # Thread-safe because multiple impersonation threads access this concurrently
        self._my_drive_root_id_cache: ThreadSafeDict[str, str] = ThreadSafeDict()

        self.allow_images = False

        self.size_threshold = GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD

    def set_allow_images(self, value: bool) -> None:
        self.allow_images = value

    @property
    def primary_admin_email(self) -> str:
        if self._primary_admin_email is None:
            raise RuntimeError(
                "Primary admin email missing, should not call this property before calling load_credentials"
            )
        return self._primary_admin_email

    @property
    def google_domain(self) -> str:
        if self._primary_admin_email is None:
            raise RuntimeError(
                "Primary admin email missing, should not call this property before calling load_credentials"
            )
        return self._primary_admin_email.split("@")[-1]

    @property
    def creds(self) -> OAuthCredentials | ServiceAccountCredentials:
        if self._creds is None:
            raise RuntimeError(
                "Creds missing, should not call this property before calling load_credentials"
            )
        return self._creds

    @classmethod
    @override
    def normalize_url(cls, url: str) -> NormalizationResult:
        """Normalize a Google Drive URL to match the canonical Document.id format.

        Reuses the connector's existing document ID creation logic from
        onyx_document_id_from_drive_file.
        """
        parsed = urlparse(url)
        netloc = parsed.netloc.lower()

        if not (
            netloc.startswith("docs.google.com")
            or netloc.startswith("drive.google.com")
        ):
            return NormalizationResult(normalized_url=None, use_default=False)

        # Handle ?id= query parameter case
        query_params = parse_qs(parsed.query)
        doc_id = query_params.get("id", [None])[0]
        if doc_id:
            scheme = parsed.scheme or "https"
            netloc = "drive.google.com"
            path = f"/file/d/{doc_id}"
            params = ""
            query = ""
            fragment = ""
            normalized = urlunparse(
                (scheme, netloc, path, params, query, fragment)
            ).rstrip("/")
            return NormalizationResult(normalized_url=normalized, use_default=False)

        # Extract file ID and use connector's function
        path_parts = parsed.path.split("/")
        file_id = None
        for i, part in enumerate(path_parts):
            if part == "d" and i + 1 < len(path_parts):
                file_id = path_parts[i + 1]
                break

        if not file_id:
            return NormalizationResult(normalized_url=None, use_default=False)

        # Create minimal file object for connector function
        file_obj = {"webViewLink": url, "id": file_id}
        normalized = onyx_document_id_from_drive_file(file_obj).rstrip("/")
        return NormalizationResult(normalized_url=normalized, use_default=False)

    # TODO: ensure returned new_creds_dict is actually persisted when this is called?
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None:
        try:
            self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]
        except KeyError:
            raise ValueError("Credentials json missing primary admin key")

        self._creds, new_creds_dict = get_google_creds(
            credentials=credentials,
            source=DocumentSource.GOOGLE_DRIVE,
        )

        # Service account connectors don't have a specific setting determining whether
        # to include "shared with me" for each user, so we default to true unless the connector
        # is in specific folders/drives mode. Note that shared files are only picked up during
        # the My Drive stage, so this does nothing if the connector is set to only index shared drives.
        if (
            isinstance(self._creds, ServiceAccountCredentials)
            and not self.specific_requests_made
        ):
            self.include_files_shared_with_me = True

        self._creds_dict = new_creds_dict

        return new_creds_dict

    def _update_traversed_parent_ids(self, folder_id: str) -> None:
        self._retrieved_folder_and_drive_ids.add(folder_id)

    def _get_all_user_emails(self) -> list[str]:
        if self._specific_user_emails:
            return self._specific_user_emails

        # Start with primary admin email
        user_emails = [self.primary_admin_email]

        # Only fetch additional users if using service account
        if isinstance(self.creds, OAuthCredentials):
            return user_emails

        admin_service = get_admin_service(
            creds=self.creds,
            user_email=self.primary_admin_email,
        )

        # Get admins first since they're more likely to have access to most files
        for is_admin in [True, False]:
            query = "isAdmin=true" if is_admin else "isAdmin=false"
            for user in execute_paginated_retrieval(
                retrieval_function=admin_service.users().list,
                list_key="users",
                fields=USER_FIELDS,
                domain=self.google_domain,
                query=query,
            ):
                if email := user.get("primaryEmail"):
                    if email not in user_emails:
                        user_emails.append(email)
        return user_emails

    def _get_my_drive_root_id(self, user_email: str) -> str | None:
        """
        Get the My Drive root folder ID for a user.

        Uses a cache to avoid repeated API calls. Returns None if the user
        doesn't have access to Drive APIs or the call fails.
        """
        if user_email in self._my_drive_root_id_cache:
            return self._my_drive_root_id_cache[user_email]

        try:
            drive_service = get_drive_service(self.creds, user_email)
            root_id = get_root_folder_id(drive_service)
            self._my_drive_root_id_cache[user_email] = root_id
            return root_id
        except Exception:
            # User might not have access to Drive APIs
            return None

    def _is_my_drive_root(
        self, folder: GoogleDriveFileType, retriever_email: str
    ) -> bool:
        """
        Check if a folder is a My Drive root.

        For My Drive folders (no driveId), we verify by comparing the folder ID
        to the actual My Drive root ID obtained via files().get(fileId='root').
        """
        folder_id = folder.get("id")
        drive_id = folder.get("driveId")
        parents = folder.get("parents", [])

        # If there are parents, this is not a root
        if parents:
            return False

        # If driveId is set, this is shared drive content, not My Drive
        if drive_id:
            return False

        # Get the My Drive root ID for this user and compare
        root_id = self._get_my_drive_root_id(retriever_email)
        if root_id and folder_id == root_id:
            return True

        # Also check with admin in case the retriever doesn't have access
        admin_root_id = self._get_my_drive_root_id(self.primary_admin_email)
        if admin_root_id and folder_id == admin_root_id:
            return True

        return False

    def _get_new_ancestors_for_files(
        self,
        files: list[RetrievedDriveFile],
        seen_hierarchy_node_raw_ids: ThreadSafeSet[str],
        fully_walked_hierarchy_node_raw_ids: ThreadSafeSet[str],
        permission_sync_context: PermissionSyncContext | None = None,
        add_prefix: bool = False,
    ) -> list[HierarchyNode]:
        """
        Get all NEW ancestor hierarchy nodes for a batch of files.

        For each file, walks up the parent chain until reaching a root/drive
        (terminal node with no parent). Returns HierarchyNode objects for all
        new ancestors.

        The function tracks two separate sets:
        - seen_hierarchy_node_raw_ids: Nodes we've already yielded (to avoid duplicates)
        - fully_walked_hierarchy_node_raw_ids: Nodes where we've successfully walked
          to a terminal root. Only skip walking from a node if it's in this set.

        This separation ensures that if User A can access folder C but not its parent B,
        a later User B who has access to both can still complete the walk to the root.

        Args:
            files: List of retrieved drive files to get ancestors for
            seen_hierarchy_node_raw_ids: Set of already-yielded node IDs (modified in place)
            fully_walked_hierarchy_node_raw_ids: Set of node IDs where the walk to root
                succeeded (modified in place)
            permission_sync_context: If provided, permissions will be fetched for hierarchy nodes.
                Contains google_domain and primary_admin_email needed for permission syncing.
            add_prefix: When True, prefix group IDs with source type (for indexing path).
                       When False (default), leave unprefixed (for permission sync path).

        Returns:
            List of HierarchyNode objects for new ancestors (ordered parent-first)
        """
        service = get_drive_service(self.creds, self.primary_admin_email)
        field_type = (
            DriveFileFieldType.WITH_PERMISSIONS
            if permission_sync_context
            else DriveFileFieldType.STANDARD
        )
        new_nodes: list[HierarchyNode] = []

        for file in files:
            parent_id = _get_parent_id_from_file(file.drive_file)
            if not parent_id:
                continue

            # Only skip if we've already successfully walked from this node to a root.
            # Don't skip just because it's "seen" - a previous user may have failed
            # to walk to the root, and this user might have better access.
            if parent_id in fully_walked_hierarchy_node_raw_ids:
                continue

            # Walk up the parent chain
            ancestors_to_add: list[HierarchyNode] = []
            node_ids_in_walk: list[str] = []
            current_id: str | None = parent_id
            reached_terminal = False

            while current_id:
                node_ids_in_walk.append(current_id)

                # If we hit a node that's already been fully walked, we know
                # the path from here to root is complete
                if current_id in fully_walked_hierarchy_node_raw_ids:
                    reached_terminal = True
                    break

                # Fetch folder metadata
                folder = self._get_folder_metadata(
                    current_id, file.user_email, field_type
                )
                if not folder:
                    # Can't access this folder - stop climbing
                    # Don't mark as fully walked since we didn't reach root
                    break

                folder_parent_id = _get_parent_id_from_file(folder)

                # Create the node BEFORE marking as seen to avoid a race condition where:
                # 1. Thread A marks node as "seen"
                # 2. Thread A fails to create node (e.g., API error in get_external_access)
                # 3. Thread B sees node as "already seen" and skips it
                # 4. Result: node is never yielded
                #
                # By creating first and then atomically checking/marking, we ensure that
                # if creation fails, another thread can still try. If both succeed,
                # only one will add to ancestors_to_add (the one that wins check_and_add).
                if permission_sync_context:
                    external_access = get_external_access_for_folder(
                        folder,
                        permission_sync_context.google_domain,
                        service,
                        add_prefix,
                    )
                else:
                    external_access = _public_access()

                node = HierarchyNode(
                    raw_node_id=current_id,
                    raw_parent_id=folder_parent_id,
                    display_name=folder.get("name", "Unknown Folder"),
                    link=folder.get("webViewLink"),
                    node_type=HierarchyNodeType.FOLDER,
                    external_access=external_access,
                )

                # Now atomically check and add - only append if we're the first thread
                # to successfully create this node
                already_seen = seen_hierarchy_node_raw_ids.check_and_add(current_id)
                if not already_seen:
                    ancestors_to_add.append(node)

                # Check if this is a verified terminal node (actual root, not just
                # empty parents due to permission limitations)
                # Check shared drive root first (simple ID comparison)
                if _is_shared_drive_root(folder):
                    # files().get() returns 'Drive' for shared drive roots;
                    # fetch the real name via drives().get().
                    # Try both the retriever and admin since the admin may
                    # not have access to private shared drives.
                    drive_name = self._get_shared_drive_name(
                        current_id, file.user_email
                    )
                    if drive_name:
                        node.display_name = drive_name
                    node.node_type = HierarchyNodeType.SHARED_DRIVE
                    reached_terminal = True
                    break

                # Check if this is a My Drive root (requires API call, but cached)
                if self._is_my_drive_root(folder, file.user_email):
                    reached_terminal = True
                    break

                # If parents is empty but we couldn't verify it's a true root,
                # stop walking but don't mark as fully walked (another user
                # with better access might be able to continue)
                if folder_parent_id is None:
                    break

                # Move to parent
                current_id = folder_parent_id

            # If we successfully reached a terminal node (or a fully-walked node),
            # mark all nodes in this walk as fully walked
            if reached_terminal:
                fully_walked_hierarchy_node_raw_ids.update(set(node_ids_in_walk))

            new_nodes += ancestors_to_add

        return new_nodes

    def _get_folder_metadata(
        self, folder_id: str, retriever_email: str, field_type: DriveFileFieldType
    ) -> GoogleDriveFileType | None:
        """
        Fetch metadata for a folder by ID.

        Important: When a user has access to a shared folder but NOT its parent,
        the Google Drive API returns the folder metadata WITHOUT the parent info.
        To handle this, if the retriever gets a folder without parents, we also
        try with admin who may have better access and can see the parent chain.
        """
        best_folder: GoogleDriveFileType | None = None

        # Use a set to deduplicate if retriever_email == primary_admin_email
        for email in {retriever_email, self.primary_admin_email}:
            service = get_drive_service(self.creds, email)
            folder = get_folder_metadata(service, folder_id, field_type)

            if not folder:
                logger.debug(f"Failed to fetch folder {folder_id} using {email}")
                continue

            logger.debug(f"Successfully fetched folder {folder_id} using {email}")

            # If this folder has parents, use it
            if folder.get("parents"):
                return folder

            # Folder has no parents - could be a root OR user lacks access to parent
            # Keep this as a fallback but try admin to see if they can see parents
            if best_folder is None:
                best_folder = folder
                logger.debug(
                    f"Folder {folder_id} has no parents when fetched by {email}, will try admin to check for parent access"
                )

        if best_folder:
            logger.debug(
                f"Successfully fetched folder {folder_id} but no parents found"
            )
            return best_folder

        logger.debug(
            f"All attempts failed to fetch folder {folder_id} (tried {retriever_email} and {self.primary_admin_email})"
        )
        return None

    def _get_shared_drive_name(self, drive_id: str, retriever_email: str) -> str | None:
        """Fetch the name of a shared drive, trying both the retriever and admin."""
        for email in {retriever_email, self.primary_admin_email}:
            svc = get_drive_service(self.creds, email)
            name = get_shared_drive_name(svc, drive_id)
            if name:
                return name
        return None

    def get_all_drive_ids(self) -> set[str]:
        return self._get_all_drives_for_user(self.primary_admin_email)

    def _get_all_drives_for_user(self, user_email: str) -> set[str]:
        drive_service = get_drive_service(self.creds, user_email)
        is_service_account = isinstance(self.creds, ServiceAccountCredentials)
        logger.info(
            f"Getting all drives for user {user_email} with service account: {is_service_account}"
        )
        all_drive_ids: set[str] = set()
        for drive in execute_paginated_retrieval(
            retrieval_function=drive_service.drives().list,
            list_key="drives",
            useDomainAdminAccess=is_service_account,
            fields="drives(id),nextPageToken",
        ):
            all_drive_ids.add(drive["id"])

        if not all_drive_ids:
            logger.warning(
                "No drives found even though indexing shared drives was requested."
            )

        return all_drive_ids

    def make_drive_id_getter(
        self, drive_ids: list[str], checkpoint: GoogleDriveCheckpoint
    ) -> Callable[[str], str | None]:
        status_lock = threading.Lock()

        in_progress_drive_ids = {
            completion.current_folder_or_drive_id: user_email
            for user_email, completion in checkpoint.completion_map.items()
            if completion.stage == DriveRetrievalStage.SHARED_DRIVE_FILES
            and completion.current_folder_or_drive_id is not None
        }
        drive_id_status: dict[str, DriveIdStatus] = {}
        for drive_id in drive_ids:
            if drive_id in self._retrieved_folder_and_drive_ids:
                drive_id_status[drive_id] = DriveIdStatus.FINISHED
            elif drive_id in in_progress_drive_ids:
                drive_id_status[drive_id] = DriveIdStatus.IN_PROGRESS
            else:
                drive_id_status[drive_id] = DriveIdStatus.AVAILABLE

        def get_available_drive_id(thread_id: str) -> str | None:
            completion = checkpoint.completion_map[thread_id]
            with status_lock:
                future_work = None
                for drive_id, status in drive_id_status.items():
                    if drive_id in self._retrieved_folder_and_drive_ids:
                        drive_id_status[drive_id] = DriveIdStatus.FINISHED
                        continue
                    if drive_id in completion.processed_drive_ids:
                        continue

                    if status == DriveIdStatus.AVAILABLE:
                        # add to processed drive ids so if this user fails to retrieve once
                        # they won't try again on the next checkpoint run
                        completion.processed_drive_ids.add(drive_id)
                        return drive_id
                    elif status == DriveIdStatus.IN_PROGRESS:
                        logger.debug(f"Drive id in progress: {drive_id}")
                        future_work = drive_id

                if future_work:
                    # in this case, all drive ids are either finished or in progress.
                    # This thread will pick up one of the in progress ones in case it fails.
                    # This is a much simpler approach than waiting for a failure picking it up,
                    # at the cost of some repeated work until all shared drives are retrieved.
                    # we avoid apocalyptic cases like all threads focusing on one huge drive
                    # because the drive id is added to _retrieved_folder_and_drive_ids after any thread
                    # manages to retrieve any file from it (unfortunately, this is also the reason we currently
                    # sometimes fail to retrieve restricted access folders/files)
                    completion.processed_drive_ids.add(future_work)
                    return future_work
            return None  # no work available, return None

        return get_available_drive_id

    def _impersonate_user_for_retrieval(
        self,
        user_email: str,
        field_type: DriveFileFieldType,
        checkpoint: GoogleDriveCheckpoint,
        get_new_drive_id: Callable[[str], str | None],
        sorted_filtered_folder_ids: list[str],
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> Iterator[RetrievedDriveFile]:
        logger.info(f"Impersonating user {user_email}")
        curr_stage = checkpoint.completion_map[user_email]
        resuming = True
        if curr_stage.stage == DriveRetrievalStage.START:
            logger.info(f"Setting stage to {DriveRetrievalStage.MY_DRIVE_FILES.value}")
            curr_stage.stage = DriveRetrievalStage.MY_DRIVE_FILES
            resuming = False
        drive_service = get_drive_service(self.creds, user_email)

        # validate that the user has access to the drive APIs by performing a simple
        # request and checking for a 401
        try:
            logger.debug(f"Getting root folder id for user {user_email}")
            # default is ~17mins of retries, don't do that here for cases so we don't
            # waste 17mins everytime we run into a user without access to drive APIs
            retry_builder(tries=3, delay=1)(get_root_folder_id)(drive_service)
        except HttpError as e:
            if e.status_code == 401:
                # fail gracefully, let the other impersonations continue
                # one user without access shouldn't block the entire connector
                logger.warning(
                    f"User '{user_email}' does not have access to the drive APIs."
                )
                # mark this user as done so we don't try to retrieve anything for them
                # again
                curr_stage.stage = DriveRetrievalStage.DONE
                return
            raise
        except RefreshError as e:
            logger.warning(
                f"User '{user_email}' could not refresh their token. Error: {e}"
            )
            # mark this user as done so we don't try to retrieve anything for them
            # again
            yield RetrievedDriveFile(
                completion_stage=DriveRetrievalStage.DONE,
                drive_file={},
                user_email=user_email,
                error=e,
            )
            curr_stage.stage = DriveRetrievalStage.DONE
            return
        # if we are including my drives, try to get the current user's my
        # drive if any of the following are true:
        # - include_my_drives is true
        # - the current user's email is in the requested emails
        if curr_stage.stage == DriveRetrievalStage.MY_DRIVE_FILES:
            if self.include_my_drives or user_email in self._requested_my_drive_emails:
                logger.info(
                    f"Getting all files in my drive as '{user_email}. Resuming: {resuming}. "
                    f"Stage completed until: {curr_stage.completed_until}. "
                    f"Next page token: {curr_stage.next_page_token}"
                )

                for file_or_token in add_retrieval_info(
                    get_all_files_in_my_drive_and_shared(
                        service=drive_service,
                        update_traversed_ids_func=self._update_traversed_parent_ids,
                        field_type=field_type,
                        include_shared_with_me=self.include_files_shared_with_me,
                        max_num_pages=MY_DRIVE_PAGES_PER_CHECKPOINT,
                        start=curr_stage.completed_until if resuming else start,
                        end=end,
                        cache_folders=not bool(curr_stage.completed_until),
                        page_token=curr_stage.next_page_token,
                    ),
                    user_email,
                    DriveRetrievalStage.MY_DRIVE_FILES,
                ):
                    if isinstance(file_or_token, str):
                        logger.debug(f"Done with max num pages for user {user_email}")
                        checkpoint.completion_map[user_email].next_page_token = (
                            file_or_token
                        )
                        return  # done with the max num pages, return checkpoint
                    yield file_or_token

            checkpoint.completion_map[user_email].next_page_token = None
            curr_stage.stage = DriveRetrievalStage.SHARED_DRIVE_FILES
            curr_stage.current_folder_or_drive_id = None
            return  # resume from next stage on the next run

        if curr_stage.stage == DriveRetrievalStage.SHARED_DRIVE_FILES:

            def _yield_from_drive(
                drive_id: str, drive_start: SecondsSinceUnixEpoch | None
            ) -> Iterator[RetrievedDriveFile | str]:
                yield from add_retrieval_info(
                    get_files_in_shared_drive(
                        service=drive_service,
                        drive_id=drive_id,
                        field_type=field_type,
                        max_num_pages=SHARED_DRIVE_PAGES_PER_CHECKPOINT,
                        update_traversed_ids_func=self._update_traversed_parent_ids,
                        cache_folders=not bool(
                            drive_start
                        ),  # only cache folders for 0 or None
                        start=drive_start,
                        end=end,
                        page_token=curr_stage.next_page_token,
                    ),
                    user_email,
                    DriveRetrievalStage.SHARED_DRIVE_FILES,
                    parent_id=drive_id,
                )

            # resume from a checkpoint
            if resuming and (drive_id := curr_stage.current_folder_or_drive_id):
                resume_start = curr_stage.completed_until
                for file_or_token in _yield_from_drive(drive_id, resume_start):
                    if isinstance(file_or_token, str):
                        checkpoint.completion_map[user_email].next_page_token = (
                            file_or_token
                        )
                        return  # done with the max num pages, return checkpoint
                    yield file_or_token

            drive_id = get_new_drive_id(user_email)
            if drive_id:
                logger.info(
                    f"Getting files in shared drive '{drive_id}' as '{user_email}. Resuming: {resuming}"
                )
                curr_stage.completed_until = 0
                curr_stage.current_folder_or_drive_id = drive_id
                for file_or_token in _yield_from_drive(drive_id, start):
                    if isinstance(file_or_token, str):
                        checkpoint.completion_map[user_email].next_page_token = (
                            file_or_token
                        )
                        return  # done with the max num pages, return checkpoint
                    yield file_or_token
                curr_stage.current_folder_or_drive_id = None
                return  # get a new drive id on the next run

            checkpoint.completion_map[user_email].next_page_token = None
            curr_stage.stage = DriveRetrievalStage.FOLDER_FILES
            curr_stage.current_folder_or_drive_id = None
            return  # resume from next stage on the next run

        # In the folder files section of service account retrieval we take extra care
        # to not retrieve duplicate docs. In particular, we only add a folder to
        # retrieved_folder_and_drive_ids when all users are finished retrieving files
        # from that folder, and maintain a set of all file ids that have been retrieved
        # for each folder. This might get rather large; in practice we assume that the
        # specific folders users choose to index don't have too many files.
        if curr_stage.stage == DriveRetrievalStage.FOLDER_FILES:

            def _yield_from_folder_crawl(
                folder_id: str, folder_start: SecondsSinceUnixEpoch | None
            ) -> Iterator[RetrievedDriveFile]:
                for retrieved_file in crawl_folders_for_files(
                    service=drive_service,
                    parent_id=folder_id,
                    field_type=field_type,
                    user_email=user_email,
                    traversed_parent_ids=self._retrieved_folder_and_drive_ids,
                    update_traversed_ids_func=self._update_traversed_parent_ids,
                    start=folder_start,
                    end=end,
                ):
                    yield retrieved_file

            # resume from a checkpoint
            last_processed_folder = None
            if resuming:
                folder_id = curr_stage.current_folder_or_drive_id
                if folder_id is None:
                    logger.warning(
                        f"folder id not set in checkpoint for user {user_email}. "
                        "This happens occasionally when the connector is interrupted "
                        "and resumed."
                    )
                else:
                    resume_start = curr_stage.completed_until
                    yield from _yield_from_folder_crawl(folder_id, resume_start)
                last_processed_folder = folder_id

            skipping_seen_folders = last_processed_folder is not None
            # NOTE: this assumes a small number of folders to crawl. If someone
            # really wants to specify a large number of folders, we should use
            # binary search to find the first unseen folder.
            num_completed_folders = 0
            for folder_id in sorted_filtered_folder_ids:
                if skipping_seen_folders:
                    skipping_seen_folders = folder_id != last_processed_folder
                    continue

                if folder_id in self._retrieved_folder_and_drive_ids:
                    continue

                curr_stage.completed_until = 0
                curr_stage.current_folder_or_drive_id = folder_id

                if num_completed_folders >= FOLDERS_PER_CHECKPOINT:
                    return  # resume from this folder on the next run

                logger.info(f"Getting files in folder '{folder_id}' as '{user_email}'")
                yield from _yield_from_folder_crawl(folder_id, start)
                num_completed_folders += 1

        curr_stage.stage = DriveRetrievalStage.DONE

    def _manage_service_account_retrieval(
        self,
        field_type: DriveFileFieldType,
        checkpoint: GoogleDriveCheckpoint,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> Iterator[RetrievedDriveFile]:
        """
        The current implementation of the service account retrieval does some
        initial setup work using the primary admin email, then runs MAX_DRIVE_WORKERS
        concurrent threads, each of which impersonates a different user and retrieves
        files for that user. Technically, the actual work each thread does is "yield the
        next file retrieved by the user", at which point it returns to the thread pool;
        see parallel_yield for more details.
        """
        if checkpoint.completion_stage == DriveRetrievalStage.START:
            checkpoint.completion_stage = DriveRetrievalStage.USER_EMAILS

        if checkpoint.completion_stage == DriveRetrievalStage.USER_EMAILS:
            all_org_emails: list[str] = self._get_all_user_emails()
            checkpoint.user_emails = all_org_emails
            checkpoint.completion_stage = DriveRetrievalStage.DRIVE_IDS
        else:
            if checkpoint.user_emails is None:
                raise ValueError("user emails not set")
            all_org_emails = checkpoint.user_emails

        sorted_drive_ids, sorted_folder_ids = self._determine_retrieval_ids(
            checkpoint, DriveRetrievalStage.MY_DRIVE_FILES
        )

        # Setup initial completion map on first connector run
        for email in all_org_emails:
            # don't overwrite existing completion map on resuming runs
            if email in checkpoint.completion_map:
                continue
            checkpoint.completion_map[email] = StageCompletion(
                stage=DriveRetrievalStage.START,
                completed_until=0,
                processed_drive_ids=set(),
            )

        # we've found all users and drives, now time to actually start
        # fetching stuff
        logger.info(f"Found {len(all_org_emails)} users to impersonate")
        logger.debug(f"Users: {all_org_emails}")
        logger.info(f"Found {len(sorted_drive_ids)} drives to retrieve")
        logger.debug(f"Drives: {sorted_drive_ids}")
        logger.info(f"Found {len(sorted_folder_ids)} folders to retrieve")
        logger.debug(f"Folders: {sorted_folder_ids}")

        drive_id_getter = self.make_drive_id_getter(sorted_drive_ids, checkpoint)

        # only process emails that we haven't already completed retrieval for
        non_completed_org_emails = [
            user_email
            for user_email, stage_completion in checkpoint.completion_map.items()
            if stage_completion.stage != DriveRetrievalStage.DONE
        ]

        logger.debug(f"Non-completed users remaining: {len(non_completed_org_emails)}")

        # don't process too many emails before returning a checkpoint. This is
        # to resolve the case where there are a ton of emails that don't have access
        # to the drive APIs. Without this, we could loop through these emails for
        # more than 3 hours, causing a timeout and stalling progress.
        email_batch_takes_us_to_completion = True
        MAX_EMAILS_TO_PROCESS_BEFORE_CHECKPOINTING = MAX_DRIVE_WORKERS
        if len(non_completed_org_emails) > MAX_EMAILS_TO_PROCESS_BEFORE_CHECKPOINTING:
            non_completed_org_emails = non_completed_org_emails[
                :MAX_EMAILS_TO_PROCESS_BEFORE_CHECKPOINTING
            ]
            email_batch_takes_us_to_completion = False

        user_retrieval_gens = [
            self._impersonate_user_for_retrieval(
                email,
                field_type,
                checkpoint,
                drive_id_getter,
                sorted_folder_ids,
                start,
                end,
            )
            for email in non_completed_org_emails
        ]
        yield from parallel_yield(user_retrieval_gens, max_workers=MAX_DRIVE_WORKERS)

        # if there are more emails to process, don't mark as complete
        if not email_batch_takes_us_to_completion:
            return

        remaining_folders = (
            set(sorted_drive_ids) | set(sorted_folder_ids)
        ) - self._retrieved_folder_and_drive_ids
        if remaining_folders:
            logger.warning(
                f"Some folders/drives were not retrieved. IDs: {remaining_folders}"
            )
        if any(
            checkpoint.completion_map[user_email].stage != DriveRetrievalStage.DONE
            for user_email in all_org_emails
        ):
            logger.info(
                "some users did not complete retrieval, returning checkpoint for another run"
            )
            return
        checkpoint.completion_stage = DriveRetrievalStage.DONE

    def _determine_retrieval_ids(
        self,
        checkpoint: GoogleDriveCheckpoint,
        next_stage: DriveRetrievalStage,
    ) -> tuple[list[str], list[str]]:
        all_drive_ids = self.get_all_drive_ids()
        sorted_drive_ids: list[str] = []
        sorted_folder_ids: list[str] = []
        if checkpoint.completion_stage == DriveRetrievalStage.DRIVE_IDS:
            if self._requested_shared_drive_ids or self._requested_folder_ids:
                (
                    sorted_drive_ids,
                    sorted_folder_ids,
                ) = _clean_requested_drive_ids(
                    requested_drive_ids=self._requested_shared_drive_ids,
                    requested_folder_ids=self._requested_folder_ids,
                    all_drive_ids_available=all_drive_ids,
                )
            elif self.include_shared_drives:
                sorted_drive_ids = sorted(all_drive_ids)

            checkpoint.drive_ids_to_retrieve = sorted_drive_ids
            checkpoint.folder_ids_to_retrieve = sorted_folder_ids
            checkpoint.completion_stage = next_stage
        else:
            if checkpoint.drive_ids_to_retrieve is None:
                raise ValueError("drive ids to retrieve not set in checkpoint")
            if checkpoint.folder_ids_to_retrieve is None:
                raise ValueError("folder ids to retrieve not set in checkpoint")
            # When loading from a checkpoint, load the previously cached drive and folder ids
            sorted_drive_ids = checkpoint.drive_ids_to_retrieve
            sorted_folder_ids = checkpoint.folder_ids_to_retrieve

        return sorted_drive_ids, sorted_folder_ids

    def _oauth_retrieval_all_files(
        self,
        field_type: DriveFileFieldType,
        drive_service: GoogleDriveService,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        page_token: str | None = None,
    ) -> Iterator[RetrievedDriveFile | str]:
        if not self.include_files_shared_with_me and not self.include_my_drives:
            return

        logger.info(
            f"Getting shared files/my drive files for OAuth "
            f"with include_files_shared_with_me={self.include_files_shared_with_me}, "
            f"include_my_drives={self.include_my_drives}, "
            f"include_shared_drives={self.include_shared_drives}."
            f"Using '{self.primary_admin_email}' as the account."
        )
        yield from add_retrieval_info(
            get_all_files_for_oauth(
                service=drive_service,
                include_files_shared_with_me=self.include_files_shared_with_me,
                include_my_drives=self.include_my_drives,
                include_shared_drives=self.include_shared_drives,
                field_type=field_type,
                max_num_pages=OAUTH_PAGES_PER_CHECKPOINT,
                start=start,
                end=end,
                page_token=page_token,
            ),
            self.primary_admin_email,
            DriveRetrievalStage.OAUTH_FILES,
        )

    def _oauth_retrieval_drives(
        self,
        field_type: DriveFileFieldType,
        drive_service: GoogleDriveService,
        drive_ids_to_retrieve: list[str],
        checkpoint: GoogleDriveCheckpoint,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> Iterator[RetrievedDriveFile | str]:
        def _yield_from_drive(
            drive_id: str, drive_start: SecondsSinceUnixEpoch | None
        ) -> Iterator[RetrievedDriveFile | str]:
            yield from add_retrieval_info(
                get_files_in_shared_drive(
                    service=drive_service,
                    drive_id=drive_id,
                    field_type=field_type,
                    max_num_pages=SHARED_DRIVE_PAGES_PER_CHECKPOINT,
                    cache_folders=not bool(
                        drive_start
                    ),  # only cache folders for 0 or None
                    update_traversed_ids_func=self._update_traversed_parent_ids,
                    start=drive_start,
                    end=end,
                    page_token=checkpoint.completion_map[
                        self.primary_admin_email
                    ].next_page_token,
                ),
                self.primary_admin_email,
                DriveRetrievalStage.SHARED_DRIVE_FILES,
                parent_id=drive_id,
            )

        # If we are resuming from a checkpoint, we need to finish retrieving the files from the last drive we retrieved
        if (
            checkpoint.completion_map[self.primary_admin_email].stage
            == DriveRetrievalStage.SHARED_DRIVE_FILES
        ):
            drive_id = checkpoint.completion_map[
                self.primary_admin_email
            ].current_folder_or_drive_id
            if drive_id is None:
                raise ValueError("drive id not set in checkpoint")
            resume_start = checkpoint.completion_map[
                self.primary_admin_email
            ].completed_until
            for file_or_token in _yield_from_drive(drive_id, resume_start):
                if isinstance(file_or_token, str):
                    checkpoint.completion_map[
                        self.primary_admin_email
                    ].next_page_token = file_or_token
                    return  # done with the max num pages, return checkpoint
                yield file_or_token
            checkpoint.completion_map[self.primary_admin_email].next_page_token = None

        for drive_id in drive_ids_to_retrieve:
            if drive_id in self._retrieved_folder_and_drive_ids:
                logger.info(
                    f"Skipping drive '{drive_id}' as it has already been retrieved"
                )
                continue
            logger.info(
                f"Getting files in shared drive '{drive_id}' as '{self.primary_admin_email}'"
            )
            for file_or_token in _yield_from_drive(drive_id, start):
                if isinstance(file_or_token, str):
                    checkpoint.completion_map[
                        self.primary_admin_email
                    ].next_page_token = file_or_token
                    return  # done with the max num pages, return checkpoint
                yield file_or_token
            checkpoint.completion_map[self.primary_admin_email].next_page_token = None

    def _oauth_retrieval_folders(
        self,
        field_type: DriveFileFieldType,
        drive_service: GoogleDriveService,
        drive_ids_to_retrieve: set[str],
        folder_ids_to_retrieve: set[str],
        checkpoint: GoogleDriveCheckpoint,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> Iterator[RetrievedDriveFile]:
        """
        If there are any remaining folder ids to retrieve found earlier in the
        retrieval process, we recursively descend the file tree and retrieve all
        files in the folder(s).
        """
        # Even if no folders were requested, we still check if any drives were requested
        # that could be folders.
        remaining_folders = (
            folder_ids_to_retrieve - self._retrieved_folder_and_drive_ids
        )

        def _yield_from_folder_crawl(
            folder_id: str, folder_start: SecondsSinceUnixEpoch | None
        ) -> Iterator[RetrievedDriveFile]:
            yield from crawl_folders_for_files(
                service=drive_service,
                parent_id=folder_id,
                field_type=field_type,
                user_email=self.primary_admin_email,
                traversed_parent_ids=self._retrieved_folder_and_drive_ids,
                update_traversed_ids_func=self._update_traversed_parent_ids,
                start=folder_start,
                end=end,
            )

        # resume from a checkpoint
        # TODO: actually checkpoint folder retrieval. Since we moved towards returning from
        # generator functions to indicate when a checkpoint should be returned, this code
        # shouldn't be used currently. Unfortunately folder crawling is quite difficult to checkpoint
        # effectively (likely need separate folder crawling and file retrieval stages),
        # so we'll revisit this later.
        if checkpoint.completion_map[
            self.primary_admin_email
        ].stage == DriveRetrievalStage.FOLDER_FILES and (
            folder_id := checkpoint.completion_map[
                self.primary_admin_email
            ].current_folder_or_drive_id
        ):
            resume_start = checkpoint.completion_map[
                self.primary_admin_email
            ].completed_until
            yield from _yield_from_folder_crawl(folder_id, resume_start)

        # the times stored in the completion_map aren't used due to the crawling behavior
        # instead, the traversed_parent_ids are used to determine what we have left to retrieve
        for folder_id in remaining_folders:
            logger.info(
                f"Getting files in folder '{folder_id}' as '{self.primary_admin_email}'"
            )
            yield from _yield_from_folder_crawl(folder_id, start)

        remaining_folders = (
            drive_ids_to_retrieve | folder_ids_to_retrieve
        ) - self._retrieved_folder_and_drive_ids
        if remaining_folders:
            logger.warning(
                f"Some folders/drives were not retrieved. IDs: {remaining_folders}"
            )

    def _checkpointed_retrieval(
        self,
        retrieval_method: CredentialedRetrievalMethod,
        field_type: DriveFileFieldType,
        checkpoint: GoogleDriveCheckpoint,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> Iterator[RetrievedDriveFile]:
        drive_files = retrieval_method(
            field_type=field_type,
            checkpoint=checkpoint,
            start=start,
            end=end,
        )

        for file in drive_files:
            drive_file = file.drive_file or {}
            completion = checkpoint.completion_map[file.user_email]

            completed_until = completion.completed_until
            modified_time = drive_file.get(GoogleFields.MODIFIED_TIME.value)
            if isinstance(modified_time, str):
                try:
                    completed_until = datetime.fromisoformat(modified_time).timestamp()
                except ValueError:
                    logger.warning(
                        "Invalid modifiedTime for file '%s' (stage=%s, user=%s).",
                        drive_file.get("id"),
                        file.completion_stage,
                        file.user_email,
                    )

            completion.update(
                stage=file.completion_stage,
                completed_until=completed_until,
                current_folder_or_drive_id=file.parent_id,
            )

            if file.error is not None or not drive_file:
                yield file
                continue

            try:
                document_id = onyx_document_id_from_drive_file(drive_file)
            except KeyError as exc:
                logger.warning(
                    "Drive file missing id/webViewLink (stage=%s user=%s). Skipping.",
                    file.completion_stage,
                    file.user_email,
                )
                if file.error is None:
                    file.error = exc
                yield file
                continue

            logger.debug(
                f"Updating checkpoint for file: {drive_file.get('name')}. "
                f"Seen: {document_id in checkpoint.all_retrieved_file_ids}"
            )
            if document_id in checkpoint.all_retrieved_file_ids:
                continue

            checkpoint.all_retrieved_file_ids.add(document_id)
            yield file

    def _manage_oauth_retrieval(
        self,
        field_type: DriveFileFieldType,
        checkpoint: GoogleDriveCheckpoint,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> Iterator[RetrievedDriveFile]:
        if checkpoint.completion_stage == DriveRetrievalStage.START:
            checkpoint.completion_stage = DriveRetrievalStage.OAUTH_FILES
            checkpoint.completion_map[self.primary_admin_email] = StageCompletion(
                stage=DriveRetrievalStage.START,
                completed_until=0,
                current_folder_or_drive_id=None,
            )

        drive_service = get_drive_service(self.creds, self.primary_admin_email)

        if checkpoint.completion_stage == DriveRetrievalStage.OAUTH_FILES:
            completion = checkpoint.completion_map[self.primary_admin_email]
            all_files_start = start
            # if resuming from a checkpoint
            if completion.stage == DriveRetrievalStage.OAUTH_FILES:
                all_files_start = completion.completed_until

            for file_or_token in self._oauth_retrieval_all_files(
                field_type=field_type,
                drive_service=drive_service,
                start=all_files_start,
                end=end,
                page_token=checkpoint.completion_map[
                    self.primary_admin_email
                ].next_page_token,
            ):
                if isinstance(file_or_token, str):
                    checkpoint.completion_map[
                        self.primary_admin_email
                    ].next_page_token = file_or_token
                    return  # done with the max num pages, return checkpoint
                yield file_or_token
            checkpoint.completion_stage = DriveRetrievalStage.DRIVE_IDS
            checkpoint.completion_map[self.primary_admin_email].next_page_token = None
            return  # create a new checkpoint

        all_requested = (
            self.include_files_shared_with_me
            and self.include_my_drives
            and self.include_shared_drives
        )
        if all_requested:
            # If all 3 are true, we already yielded from get_all_files_for_oauth
            checkpoint.completion_stage = DriveRetrievalStage.DONE
            return

        sorted_drive_ids, sorted_folder_ids = self._determine_retrieval_ids(
            checkpoint, DriveRetrievalStage.SHARED_DRIVE_FILES
        )

        if checkpoint.completion_stage == DriveRetrievalStage.SHARED_DRIVE_FILES:
            for file_or_token in self._oauth_retrieval_drives(
                field_type=field_type,
                drive_service=drive_service,
                drive_ids_to_retrieve=sorted_drive_ids,
                checkpoint=checkpoint,
                start=start,
                end=end,
            ):
                if isinstance(file_or_token, str):
                    checkpoint.completion_map[
                        self.primary_admin_email
                    ].next_page_token = file_or_token
                    return  # done with the max num pages, return checkpoint
                yield file_or_token
            checkpoint.completion_stage = DriveRetrievalStage.FOLDER_FILES
            checkpoint.completion_map[self.primary_admin_email].next_page_token = None
            return  # create a new checkpoint

        if checkpoint.completion_stage == DriveRetrievalStage.FOLDER_FILES:
            yield from self._oauth_retrieval_folders(
                field_type=field_type,
                drive_service=drive_service,
                drive_ids_to_retrieve=set(sorted_drive_ids),
                folder_ids_to_retrieve=set(sorted_folder_ids),
                checkpoint=checkpoint,
                start=start,
                end=end,
            )

        checkpoint.completion_stage = DriveRetrievalStage.DONE

    def _fetch_drive_items(
        self,
        field_type: DriveFileFieldType,
        checkpoint: GoogleDriveCheckpoint,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> Iterator[RetrievedDriveFile]:
        retrieval_method = (
            self._manage_service_account_retrieval
            if isinstance(self.creds, ServiceAccountCredentials)
            else self._manage_oauth_retrieval
        )

        return self._checkpointed_retrieval(
            retrieval_method=retrieval_method,
            field_type=field_type,
            checkpoint=checkpoint,
            start=start,
            end=end,
        )

    def _convert_retrieved_files_to_documents(
        self,
        drive_files_iter: Iterator[RetrievedDriveFile],
        checkpoint: GoogleDriveCheckpoint,
        include_permissions: bool,
    ) -> Iterator[Document | ConnectorFailure | HierarchyNode]:
        """
        Converts retrieved files to documents, yielding HierarchyNode
        objects for ancestor folders before the converted documents.
        """
        permission_sync_context = (
            PermissionSyncContext(
                primary_admin_email=self.primary_admin_email,
                google_domain=self.google_domain,
            )
            if include_permissions
            else None
        )

        files_batch: list[RetrievedDriveFile] = []
        for retrieved_file in drive_files_iter:
            if self.exclude_domain_link_only and has_link_only_permission(
                retrieved_file.drive_file
            ):
                continue
            if retrieved_file.error is None:
                files_batch.append(retrieved_file)
                continue

            failure_stage = retrieved_file.completion_stage.value
            failure_message = f"retrieval failure during stage: {failure_stage},"
            failure_message += f"user: {retrieved_file.user_email},"
            failure_message += f"parent drive/folder: {retrieved_file.parent_id},"
            failure_message += f"error: {retrieved_file.error}"
            logger.error(failure_message)
            yield ConnectorFailure(
                failed_entity=EntityFailure(
                    entity_id=retrieved_file.drive_file.get("id", failure_stage),
                ),
                failure_message=failure_message,
                exception=retrieved_file.error,
            )

        new_ancestors = self._get_new_ancestors_for_files(
            files=files_batch,
            seen_hierarchy_node_raw_ids=checkpoint.seen_hierarchy_node_raw_ids,
            fully_walked_hierarchy_node_raw_ids=checkpoint.fully_walked_hierarchy_node_raw_ids,
            permission_sync_context=permission_sync_context,
            add_prefix=True,
        )
        if new_ancestors:
            logger.debug(f"Yielding {len(new_ancestors)} new hierarchy nodes")
            yield from new_ancestors

        func_with_args = [
            (
                self._convert_retrieved_file_to_document,
                (retrieved_file, permission_sync_context),
            )
            for retrieved_file in files_batch
        ]
        raw_results = cast(
            list[Document | ConnectorFailure | None],
            run_functions_tuples_in_parallel(func_with_args, max_workers=8),
        )

        results: list[Document | ConnectorFailure] = [
            r for r in raw_results if r is not None
        ]
        logger.debug(f"batch has {len(results)} docs or failures")
        yield from results

        checkpoint.retrieved_folder_and_drive_ids = self._retrieved_folder_and_drive_ids

    def _convert_retrieved_file_to_document(
        self,
        retrieved_file: RetrievedDriveFile,
        permission_sync_context: PermissionSyncContext | None,
    ) -> Document | ConnectorFailure | None:
        """
        Converts a single retrieved file to a document.
        """
        try:
            return convert_drive_item_to_document(
                self.creds,
                self.allow_images,
                self.size_threshold,
                permission_sync_context,
                [retrieved_file.user_email, self.primary_admin_email]
                + get_file_owners(retrieved_file.drive_file, self.primary_admin_email),
                retrieved_file.drive_file,
            )
        except Exception as e:
            logger.exception(
                f"Error extracting document: "
                f"{retrieved_file.drive_file.get('name')} from Google Drive"
            )
            return ConnectorFailure(
                failed_entity=EntityFailure(
                    entity_id=retrieved_file.drive_file.get("id", "unknown"),
                ),
                failure_message=(
                    f"Error extracting document: "
                    f"{retrieved_file.drive_file.get('name')}"
                ),
                exception=e,
            )

    def _load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: GoogleDriveCheckpoint,
        include_permissions: bool,
    ) -> CheckpointOutput[GoogleDriveCheckpoint]:
        """
        Entrypoint for the connector; first run is with an empty checkpoint.
        """
        if self._creds is None or self._primary_admin_email is None:
            raise RuntimeError(
                "Credentials missing, should not call this method before calling load_credentials"
            )

        logger.info(
            f"Loading from checkpoint with completion stage: {checkpoint.completion_stage},"
            f"num retrieved ids: {len(checkpoint.all_retrieved_file_ids)}"
        )
        checkpoint = copy.deepcopy(checkpoint)
        self._retrieved_folder_and_drive_ids = checkpoint.retrieved_folder_and_drive_ids
        try:
            field_type = (
                DriveFileFieldType.WITH_PERMISSIONS
                if include_permissions or self.exclude_domain_link_only
                else DriveFileFieldType.STANDARD
            )
            drive_files_iter = self._fetch_drive_items(
                field_type=field_type,
                checkpoint=checkpoint,
                start=start,
                end=end,
            )
            yield from self._convert_retrieved_files_to_documents(
                drive_files_iter, checkpoint, include_permissions
            )
        except Exception as e:
            if MISSING_SCOPES_ERROR_STR in str(e):
                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
            raise e
        checkpoint.retrieved_folder_and_drive_ids = self._retrieved_folder_and_drive_ids

        logger.info(
            f"num drive files retrieved: {len(checkpoint.all_retrieved_file_ids)}"
        )
        if checkpoint.completion_stage == DriveRetrievalStage.DONE:
            checkpoint.has_more = False
        return checkpoint

    @override
    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: GoogleDriveCheckpoint,
    ) -> CheckpointOutput[GoogleDriveCheckpoint]:
        return self._load_from_checkpoint(
            start, end, checkpoint, include_permissions=False
        )

    @override
    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: GoogleDriveCheckpoint,
    ) -> CheckpointOutput[GoogleDriveCheckpoint]:
        return self._load_from_checkpoint(
            start, end, checkpoint, include_permissions=True
        )

    def _extract_slim_docs_from_google_drive(
        self,
        checkpoint: GoogleDriveCheckpoint,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        files_batch: list[RetrievedDriveFile] = []
        slim_batch: list[SlimDocument | HierarchyNode] = []

        def _yield_slim_batch() -> list[SlimDocument | HierarchyNode]:
            """Process files batch and return items to yield (hierarchy nodes + slim docs)."""
            nonlocal files_batch, slim_batch

            # Get new ancestor hierarchy nodes first
            permission_sync_context = PermissionSyncContext(
                primary_admin_email=self.primary_admin_email,
                google_domain=self.google_domain,
            )
            new_ancestors = self._get_new_ancestors_for_files(
                files=files_batch,
                seen_hierarchy_node_raw_ids=checkpoint.seen_hierarchy_node_raw_ids,
                fully_walked_hierarchy_node_raw_ids=checkpoint.fully_walked_hierarchy_node_raw_ids,
                permission_sync_context=permission_sync_context,
            )

            # Build slim documents
            for file in files_batch:
                if doc := build_slim_document(
                    self.creds,
                    file.drive_file,
                    PermissionSyncContext(
                        primary_admin_email=self.primary_admin_email,
                        google_domain=self.google_domain,
                    ),
                    retriever_email=file.user_email,
                ):
                    slim_batch.append(doc)

            # Combine: hierarchy nodes first, then slim docs
            result: list[SlimDocument | HierarchyNode] = []
            result.extend(new_ancestors)
            result.extend(slim_batch)
            files_batch = []
            slim_batch = []
            return result

        for file in self._fetch_drive_items(
            field_type=DriveFileFieldType.SLIM,
            checkpoint=checkpoint,
            start=start,
            end=end,
        ):
            if file.error is not None:
                raise file.error
            if self.exclude_domain_link_only and has_link_only_permission(
                file.drive_file
            ):
                continue
            files_batch.append(file)

            if len(files_batch) >= SLIM_BATCH_SIZE:
                yield _yield_slim_batch()
                if callback:
                    if callback.should_stop():
                        raise RuntimeError(
                            "_extract_slim_docs_from_google_drive: Stop signal detected"
                        )
                    callback.progress("_extract_slim_docs_from_google_drive", 1)

        # Yield remaining files
        if files_batch:
            yield _yield_slim_batch()

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        try:
            checkpoint = self.build_dummy_checkpoint()
            while checkpoint.completion_stage != DriveRetrievalStage.DONE:
                yield from self._extract_slim_docs_from_google_drive(
                    checkpoint=checkpoint,
                    start=start,
                    end=end,
                    callback=callback,
                )
            logger.info("Drive perm sync: Slim doc retrieval complete")

        except Exception as e:
            if MISSING_SCOPES_ERROR_STR in str(e):
                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
            raise e

    def validate_connector_settings(self) -> None:
        if self._creds is None:
            raise ConnectorMissingCredentialError(
                "Google Drive credentials not loaded."
            )

        if self._primary_admin_email is None:
            raise ConnectorValidationError(
                "Primary admin email not found in credentials. Ensure DB_CREDENTIALS_PRIMARY_ADMIN_KEY is set."
            )

        try:
            drive_service = get_drive_service(self._creds, self._primary_admin_email)
            drive_service.files().list(pageSize=1, fields="files(id)").execute()

            if isinstance(self._creds, ServiceAccountCredentials):
                # default is ~17mins of retries, don't do that here since this is called from
                # the UI
                retry_builder(tries=3, delay=0.1)(get_root_folder_id)(drive_service)

        except HttpError as e:
            status_code = e.resp.status if e.resp else None
            if status_code == 401:
                raise CredentialExpiredError(
                    "Invalid or expired Google Drive credentials (401)."
                )
            elif status_code == 403:
                raise InsufficientPermissionsError(
                    "Google Drive app lacks required permissions (403). "
                    "Please ensure the necessary scopes are granted and Drive "
                    "apps are enabled."
                )
            else:
                raise ConnectorValidationError(
                    f"Unexpected Google Drive error (status={status_code}): {e}"
                )

        except Exception as e:
            # Check for scope-related hints from the error message
            if MISSING_SCOPES_ERROR_STR in str(e):
                raise InsufficientPermissionsError(
                    f"Google Drive credentials are missing required scopes. {ONYX_SCOPE_INSTRUCTIONS}"
                )
            raise ConnectorValidationError(
                f"Unexpected error during Google Drive validation: {e}"
            )

    @override
    def build_dummy_checkpoint(self) -> GoogleDriveCheckpoint:
        return GoogleDriveCheckpoint(
            retrieved_folder_and_drive_ids=set(),
            completion_stage=DriveRetrievalStage.START,
            completion_map=ThreadSafeDict(),
            all_retrieved_file_ids=set(),
            has_more=True,
        )

    @override
    def validate_checkpoint_json(self, checkpoint_json: str) -> GoogleDriveCheckpoint:
        return GoogleDriveCheckpoint.model_validate_json(checkpoint_json)


def get_credentials_from_env(email: str, oauth: bool) -> dict:
    if oauth:
        raw_credential_string = os.environ["GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR"]
    else:
        raw_credential_string = os.environ["GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR"]

    refried_credential_string = json.dumps(json.loads(raw_credential_string))

    # This is the Oauth token
    DB_CREDENTIALS_DICT_TOKEN_KEY = "google_tokens"
    # This is the service account key
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_service_account_key"
    # The email saved for both auth types
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY = "google_primary_admin"
    DB_CREDENTIALS_AUTHENTICATION_METHOD = "authentication_method"
    cred_key = (
        DB_CREDENTIALS_DICT_TOKEN_KEY
        if oauth
        else DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
    )
    return {
        cred_key: refried_credential_string,
        DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
        DB_CREDENTIALS_AUTHENTICATION_METHOD: "uploaded",
    }


class CheckpointOutputWrapper:
    """
    Wraps a CheckpointOutput generator to give things back in a more digestible format.
    The connector format is easier for the connector implementor (e.g. it enforces exactly
    one new checkpoint is returned AND that the checkpoint is at the end), thus the different
    formats.
    """

    def __init__(self) -> None:
        self.next_checkpoint: GoogleDriveCheckpoint | None = None

    def __call__(
        self,
        checkpoint_connector_generator: CheckpointOutput[GoogleDriveCheckpoint],
    ) -> Generator[
        tuple[Document | None, ConnectorFailure | None, GoogleDriveCheckpoint | None],
        None,
        None,
    ]:
        # grabs the final return value and stores it in the `next_checkpoint` variable
        def _inner_wrapper(
            checkpoint_connector_generator: CheckpointOutput[GoogleDriveCheckpoint],
        ) -> CheckpointOutput[GoogleDriveCheckpoint]:
            self.next_checkpoint = yield from checkpoint_connector_generator
            return self.next_checkpoint  # not used

        for document_or_failure in _inner_wrapper(checkpoint_connector_generator):
            if isinstance(document_or_failure, Document):
                yield document_or_failure, None, None
            elif isinstance(document_or_failure, ConnectorFailure):
                yield None, document_or_failure, None
            else:
                raise ValueError(
                    f"Invalid document_or_failure type: {type(document_or_failure)}"
                )

        if self.next_checkpoint is None:
            raise RuntimeError(
                "Checkpoint is None. This should never happen - the connector should always return a checkpoint."
            )

        yield None, None, self.next_checkpoint


def yield_all_docs_from_checkpoint_connector(
    connector: GoogleDriveConnector,
    start: SecondsSinceUnixEpoch,
    end: SecondsSinceUnixEpoch,
) -> Iterator[Document | ConnectorFailure]:
    num_iterations = 0

    checkpoint = connector.build_dummy_checkpoint()
    while checkpoint.has_more:
        doc_batch_generator = CheckpointOutputWrapper()(
            connector.load_from_checkpoint(start, end, checkpoint)
        )
        for document, failure, next_checkpoint in doc_batch_generator:
            if failure is not None:
                yield failure
            if document is not None:
                yield document
            if next_checkpoint is not None:
                checkpoint = next_checkpoint

        num_iterations += 1
        if num_iterations > 100_000:
            raise RuntimeError("Too many iterations. Infinite loop?")


if __name__ == "__main__":
    import time

    creds = get_credentials_from_env(
        os.environ["GOOGLE_DRIVE_PRIMARY_ADMIN_EMAIL"], False
    )
    connector = GoogleDriveConnector(
        include_shared_drives=True,
        shared_drive_urls=None,
        include_my_drives=True,
        my_drive_emails=None,
        shared_folder_urls=None,
        include_files_shared_with_me=True,
        specific_user_emails=None,
    )
    connector.load_credentials(creds)
    max_fsize = 0
    biggest_fsize = 0
    num_errors = 0
    start_time = time.time()
    with open("stats.txt", "w") as f:
        for num, doc_or_failure in enumerate(
            yield_all_docs_from_checkpoint_connector(connector, 0, time.time())
        ):
            if num % 200 == 0:
                f.write(f"Processed {num} files\n")
                f.write(f"Max file size: {max_fsize / 1000_000:.2f} MB\n")
                f.write(f"Time so far: {time.time() - start_time:.2f} seconds\n")
                f.write(
                    f"Docs per minute: {num / (time.time() - start_time) * 60:.2f}\n"
                )
                biggest_fsize = max(biggest_fsize, max_fsize)
                max_fsize = 0
            if isinstance(doc_or_failure, Document):
                max_fsize = max(max_fsize, sys.getsizeof(doc_or_failure))
            elif isinstance(doc_or_failure, ConnectorFailure):
                num_errors += 1
        print(f"Num errors: {num_errors}")
        print(f"Biggest file size: {biggest_fsize / 1000_000:.2f} MB")
        print(f"Time taken: {time.time() - start_time:.2f} seconds")


================================================
FILE: backend/onyx/connectors/google_drive/constants.py
================================================
UNSUPPORTED_FILE_TYPE_CONTENT = ""  # keep empty for now
DRIVE_FOLDER_TYPE = "application/vnd.google-apps.folder"
DRIVE_SHORTCUT_TYPE = "application/vnd.google-apps.shortcut"
DRIVE_FILE_TYPE = "application/vnd.google-apps.file"


================================================
FILE: backend/onyx/connectors/google_drive/doc_conversion.py
================================================
import io
from collections.abc import Callable
from datetime import datetime
from typing import Any
from typing import cast
from urllib.parse import urlparse
from urllib.parse import urlunparse

from googleapiclient.errors import HttpError  # type: ignore
from googleapiclient.http import MediaIoBaseDownload  # type: ignore
from pydantic import BaseModel

from onyx.access.models import ExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FileOrigin
from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
from onyx.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE
from onyx.connectors.google_drive.models import GDriveMimeType
from onyx.connectors.google_drive.models import GoogleDriveFileType
from onyx.connectors.google_drive.section_extraction import get_document_sections
from onyx.connectors.google_drive.section_extraction import HEADING_DELIMITER
from onyx.connectors.google_utils.resources import get_drive_service
from onyx.connectors.google_utils.resources import get_google_docs_service
from onyx.connectors.google_utils.resources import GoogleDocsService
from onyx.connectors.google_utils.resources import GoogleDriveService
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import ImageSection
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.extract_file_text import pptx_to_text
from onyx.file_processing.extract_file_text import read_docx_file
from onyx.file_processing.extract_file_text import read_pdf_file
from onyx.file_processing.extract_file_text import xlsx_to_text
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.file_types import OnyxMimeTypes
from onyx.file_processing.image_utils import store_image_and_create_section
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)
from onyx.utils.variable_functionality import noop_fallback

logger = setup_logger()

# Cache for folder path lookups to avoid redundant API calls
# Maps folder_id -> (folder_name, parent_id)
_folder_cache: dict[str, tuple[str, str | None]] = {}


def _get_folder_info(
    service: GoogleDriveService, folder_id: str
) -> tuple[str, str | None]:
    """Fetch folder name and parent ID, with caching."""
    if folder_id in _folder_cache:
        return _folder_cache[folder_id]

    try:
        folder = (
            service.files()
            .get(
                fileId=folder_id,
                fields="name, parents",
                supportsAllDrives=True,
            )
            .execute()
        )
        folder_name = folder.get("name", "Unknown")
        parents = folder.get("parents", [])
        parent_id = parents[0] if parents else None
        _folder_cache[folder_id] = (folder_name, parent_id)
        return folder_name, parent_id
    except HttpError as e:
        logger.warning(f"Failed to get folder info for {folder_id}: {e}")
        _folder_cache[folder_id] = ("Unknown", None)
        return "Unknown", None


def _get_drive_name(service: GoogleDriveService, drive_id: str) -> str:
    """Fetch shared drive name."""
    cache_key = f"drive_{drive_id}"
    if cache_key in _folder_cache:
        return _folder_cache[cache_key][0]

    try:
        drive = service.drives().get(driveId=drive_id).execute()
        drive_name = drive.get("name", f"Shared Drive {drive_id}")
        _folder_cache[cache_key] = (drive_name, None)
        return drive_name
    except HttpError as e:
        logger.warning(f"Failed to get drive name for {drive_id}: {e}")
        _folder_cache[cache_key] = (f"Shared Drive {drive_id}", None)
        return f"Shared Drive {drive_id}"


def build_folder_path(
    file: GoogleDriveFileType,
    service: GoogleDriveService,
    drive_id: str | None = None,
    user_email: str | None = None,
) -> list[str]:
    """
    Build the full folder path for a file by walking up the parent chain.
    Returns a list of folder names from root to immediate parent.

    Args:
        file: The Google Drive file object
        service: Google Drive service instance
        drive_id: Optional drive ID (will be extracted from file if not provided)
        user_email: Optional user email to check ownership for "My Drive" vs "Shared with me"
    """
    path_parts: list[str] = []

    # Get drive_id from file if not provided
    if drive_id is None:
        drive_id = file.get("driveId")

    # Check if file is owned by the user (for distinguishing "My Drive" vs "Shared with me")
    is_owned_by_user = False
    if user_email:
        owners = file.get("owners", [])
        is_owned_by_user = any(
            owner.get("emailAddress", "").lower() == user_email.lower()
            for owner in owners
        )

    # Get the file's parent folder ID
    parents = file.get("parents", [])
    if not parents:
        # File is at root level
        if drive_id:
            return [_get_drive_name(service, drive_id)]
        # If not in a shared drive, check if it's owned by the user
        if is_owned_by_user:
            return ["My Drive"]
        else:
            return ["Shared with me"]

    parent_id: str | None = parents[0]

    # Walk up the folder hierarchy (limit to 50 levels to prevent infinite loops)
    visited: set[str] = set()
    for _ in range(50):
        if not parent_id or parent_id in visited:
            break
        visited.add(parent_id)

        folder_name, next_parent = _get_folder_info(service, parent_id)

        # Check if we've reached the root (parent is the drive itself or no parent)
        if next_parent is None:
            # This folder's name is either the drive root, My Drive, or Shared with me
            if drive_id:
                path_parts.insert(0, _get_drive_name(service, drive_id))
            else:
                # Not in a shared drive - determine if it's "My Drive" or "Shared with me"
                if is_owned_by_user:
                    path_parts.insert(0, "My Drive")
                else:
                    path_parts.insert(0, "Shared with me")
            break
        else:
            path_parts.insert(0, folder_name)
            parent_id = next_parent

    # If we didn't find a root, determine the root based on ownership and drive
    if not path_parts:
        if drive_id:
            return [_get_drive_name(service, drive_id)]
        elif is_owned_by_user:
            return ["My Drive"]
        else:
            return ["Shared with me"]

    return path_parts


# This is not a standard valid unicode char, it is used by the docs advanced API to
# represent smart chips (elements like dates and doc links).
SMART_CHIP_CHAR = "\ue907"
WEB_VIEW_LINK_KEY = "webViewLink"
# Fallback templates for generating web links when Drive omits webViewLink.
_FALLBACK_WEB_VIEW_LINK_TEMPLATES = {
    GDriveMimeType.DOC.value: "https://docs.google.com/document/d/{}/view",
    GDriveMimeType.SPREADSHEET.value: "https://docs.google.com/spreadsheets/d/{}/view",
    GDriveMimeType.PPT.value: "https://docs.google.com/presentation/d/{}/view",
}

MAX_RETRIEVER_EMAILS = 20
CHUNK_SIZE_BUFFER = 64  # extra bytes past the limit to read

# Mapping of Google Drive mime types to export formats
GOOGLE_MIME_TYPES_TO_EXPORT = {
    GDriveMimeType.DOC.value: "text/plain",
    GDriveMimeType.SPREADSHEET.value: "text/csv",
    GDriveMimeType.PPT.value: "text/plain",
}

# Define Google MIME types mapping
GOOGLE_MIME_TYPES = {
    GDriveMimeType.DOC.value: "text/plain",
    GDriveMimeType.SPREADSHEET.value: "text/csv",
    GDriveMimeType.PPT.value: "text/plain",
}


class PermissionSyncContext(BaseModel):
    """
    This is the information that is needed to sync permissions for a document.
    """

    primary_admin_email: str
    google_domain: str


def onyx_document_id_from_drive_file(file: GoogleDriveFileType) -> str:
    link = file.get(WEB_VIEW_LINK_KEY)
    if not link:
        file_id = file.get("id")
        if not file_id:
            raise KeyError(
                f"Google Drive file missing both '{WEB_VIEW_LINK_KEY}' and 'id' fields."
            )
        mime_type = file.get("mimeType", "")
        template = _FALLBACK_WEB_VIEW_LINK_TEMPLATES.get(mime_type)
        if template is None:
            link = f"https://drive.google.com/file/d/{file_id}/view"
        else:
            link = template.format(file_id)
        logger.debug(
            "Missing webViewLink for Google Drive file with id %s. Falling back to constructed link %s",
            file_id,
            link,
        )
    parsed_url = urlparse(link)
    parsed_url = parsed_url._replace(query="")  # remove query parameters
    spl_path = parsed_url.path.split("/")
    if spl_path and (spl_path[-1] in ["edit", "view", "preview"]):
        spl_path.pop()
        parsed_url = parsed_url._replace(path="/".join(spl_path))
    # Remove query parameters and reconstruct URL
    return urlunparse(parsed_url)


def download_request(
    service: GoogleDriveService, file_id: str, size_threshold: int
) -> bytes:
    """
    Download the file from Google Drive.
    """
    # For other file types, download the file
    # Use the correct API call for downloading files
    request = service.files().get_media(fileId=file_id)
    return _download_request(request, file_id, size_threshold)


_DOWNLOAD_NUM_RETRIES = 3


def _download_request(request: Any, file_id: str, size_threshold: int) -> bytes:
    response_bytes = io.BytesIO()
    downloader = MediaIoBaseDownload(
        response_bytes, request, chunksize=size_threshold + CHUNK_SIZE_BUFFER
    )
    done = False
    while not done:
        # num_retries enables automatic retry with exponential backoff for transient errors
        download_progress, done = downloader.next_chunk(
            num_retries=_DOWNLOAD_NUM_RETRIES
        )
        if download_progress.resumable_progress > size_threshold:
            logger.warning(
                f"File {file_id} exceeds size threshold of {size_threshold}. Skipping2."
            )
            return bytes()

    response = response_bytes.getvalue()
    if not response:
        logger.warning(f"Failed to download {file_id}")
        return bytes()
    return response


def _download_and_extract_sections_basic(
    file: dict[str, str],
    service: GoogleDriveService,
    allow_images: bool,
    size_threshold: int,
) -> list[TextSection | ImageSection]:
    """Extract text and images from a Google Drive file."""
    file_id = file["id"]
    file_name = file["name"]
    mime_type = file["mimeType"]
    link = file.get(WEB_VIEW_LINK_KEY, "")

    # For non-Google files, download the file
    # Use the correct API call for downloading files
    # lazy evaluation to only download the file if necessary
    def response_call() -> bytes:
        return download_request(service, file_id, size_threshold)

    if mime_type in OnyxMimeTypes.IMAGE_MIME_TYPES:
        # Skip images if not explicitly enabled
        if not allow_images:
            return []

        # Store images for later processing
        sections: list[TextSection | ImageSection] = []
        try:
            section, embedded_id = store_image_and_create_section(
                image_data=response_call(),
                file_id=file_id,
                display_name=file_name,
                media_type=mime_type,
                file_origin=FileOrigin.CONNECTOR,
                link=link,
            )
            sections.append(section)
        except Exception as e:
            logger.error(f"Failed to process image {file_name}: {e}")
        return sections

    # For Google Docs, Sheets, and Slides, export as plain text
    if mime_type in GOOGLE_MIME_TYPES_TO_EXPORT:
        export_mime_type = GOOGLE_MIME_TYPES_TO_EXPORT[mime_type]
        # Use the correct API call for exporting files
        request = service.files().export_media(
            fileId=file_id, mimeType=export_mime_type
        )
        response = _download_request(request, file_id, size_threshold)
        if not response:
            logger.warning(f"Failed to export {file_name} as {export_mime_type}")
            return []

        text = response.decode("utf-8")
        return [TextSection(link=link, text=text)]

    # Process based on mime type
    if mime_type == "text/plain":
        try:
            text = response_call().decode("utf-8")
            return [TextSection(link=link, text=text)]
        except UnicodeDecodeError as e:
            logger.warning(f"Failed to extract text from {file_name}: {e}")
            return []

    elif (
        mime_type
        == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
    ):
        text, _ = read_docx_file(io.BytesIO(response_call()))
        return [TextSection(link=link, text=text)]

    elif (
        mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    ):
        text = xlsx_to_text(io.BytesIO(response_call()), file_name=file_name)
        return [TextSection(link=link, text=text)] if text else []

    elif (
        mime_type
        == "application/vnd.openxmlformats-officedocument.presentationml.presentation"
    ):
        text = pptx_to_text(io.BytesIO(response_call()), file_name=file_name)
        return [TextSection(link=link, text=text)] if text else []

    elif mime_type == "application/pdf":
        text, _pdf_meta, images = read_pdf_file(io.BytesIO(response_call()))
        pdf_sections: list[TextSection | ImageSection] = [
            TextSection(link=link, text=text)
        ]

        # Process embedded images in the PDF
        try:
            for idx, (img_data, img_name) in enumerate(images):
                section, embedded_id = store_image_and_create_section(
                    image_data=img_data,
                    file_id=f"{file_id}_img_{idx}",
                    display_name=img_name or f"{file_name} - image {idx}",
                    file_origin=FileOrigin.CONNECTOR,
                )
                pdf_sections.append(section)
        except Exception as e:
            logger.error(f"Failed to process PDF images in {file_name}: {e}")
        return pdf_sections

    # Final attempt at extracting text
    file_ext = get_file_ext(file.get("name", ""))
    if file_ext not in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS:
        logger.warning(f"Skipping file {file.get('name')} due to extension.")
        return []

    try:
        text = extract_file_text(io.BytesIO(response_call()), file_name)
        return [TextSection(link=link, text=text)]
    except Exception as e:
        logger.warning(f"Failed to extract text from {file_name}: {e}")
        return []


def _find_nth(haystack: str, needle: str, n: int, start: int = 0) -> int:
    start = haystack.find(needle, start)
    while start >= 0 and n > 1:
        start = haystack.find(needle, start + len(needle))
        n -= 1
    return start


def align_basic_advanced(
    basic_sections: list[TextSection | ImageSection], adv_sections: list[TextSection]
) -> list[TextSection | ImageSection]:
    """Align the basic sections with the advanced sections.
    In particular, the basic sections contain all content of the file,
    including smart chips like dates and doc links. The advanced sections
    are separated by section headers and contain header-based links that
    improve user experience when they click on the source in the UI.

    There are edge cases in text matching (i.e. the heading is a smart chip or
    there is a smart chip in the doc with text containing the actual heading text)
    that make the matching imperfect; this is hence done on a best-effort basis.
    """
    if len(adv_sections) <= 1:
        return basic_sections  # no benefit from aligning

    basic_full_text = "".join(
        [section.text for section in basic_sections if isinstance(section, TextSection)]
    )
    new_sections: list[TextSection | ImageSection] = []
    heading_start = 0
    for adv_ind in range(1, len(adv_sections)):
        heading = adv_sections[adv_ind].text.split(HEADING_DELIMITER)[0]
        # retrieve the longest part of the heading that is not a smart chip
        heading_key = max(heading.split(SMART_CHIP_CHAR), key=len).strip()
        if heading_key == "":
            logger.warning(
                f"Cannot match heading: {heading}, its link will come from the following section"
            )
            continue
        heading_offset = heading.find(heading_key)

        # count occurrences of heading str in previous section
        heading_count = adv_sections[adv_ind - 1].text.count(heading_key)

        prev_start = heading_start
        heading_start = (
            _find_nth(basic_full_text, heading_key, heading_count, start=prev_start)
            - heading_offset
        )
        if heading_start < 0:
            logger.warning(
                f"Heading key {heading_key} from heading {heading} not found in basic text"
            )
            heading_start = prev_start
            continue

        new_sections.append(
            TextSection(
                link=adv_sections[adv_ind - 1].link,
                text=basic_full_text[prev_start:heading_start],
            )
        )

    # handle last section
    new_sections.append(
        TextSection(link=adv_sections[-1].link, text=basic_full_text[heading_start:])
    )
    return new_sections


def _get_external_access_for_raw_gdrive_file(
    file: GoogleDriveFileType,
    company_domain: str,
    retriever_drive_service: GoogleDriveService | None,
    admin_drive_service: GoogleDriveService,
    fallback_user_email: str,
    add_prefix: bool = False,
) -> ExternalAccess:
    """
    Get the external access for a raw Google Drive file.

    add_prefix: When True, prefix group IDs with source type (for indexing path).
               When False (default), leave unprefixed (for permission sync path
               where upsert_document_external_perms handles prefixing).
    fallback_user_email: When permission info can't be retrieved (e.g. externally-owned
               files), fall back to granting access to this user.
    """
    external_access_fn = cast(
        Callable[
            [
                GoogleDriveFileType,
                str,
                GoogleDriveService | None,
                GoogleDriveService,
                str,
                bool,
            ],
            ExternalAccess,
        ],
        fetch_versioned_implementation_with_fallback(
            "onyx.external_permissions.google_drive.doc_sync",
            "get_external_access_for_raw_gdrive_file",
            fallback=noop_fallback,
        ),
    )
    return external_access_fn(
        file,
        company_domain,
        retriever_drive_service,
        admin_drive_service,
        fallback_user_email,
        add_prefix,
    )


def convert_drive_item_to_document(
    creds: Any,
    allow_images: bool,
    size_threshold: int,
    # if not specified, we will not sync permissions
    # will also be a no-op if EE is not enabled
    permission_sync_context: PermissionSyncContext | None,
    retriever_emails: list[str],
    file: GoogleDriveFileType,
) -> Document | ConnectorFailure | None:
    """
    Attempt to convert a drive item to a document with each retriever email
    in order. returns upon a successful retrieval or a non-403 error.

    We used to always get the user email from the file owners when available,
    but this was causing issues with shared folders where the owner was not included in the service account
    now we use the email of the account that successfully listed the file. There are cases where a
    user that can list a file cannot download it, so we retry with file owners and admin email.
    """
    first_error = None
    doc_or_failure = None
    retriever_emails = retriever_emails[:MAX_RETRIEVER_EMAILS]
    # use seen instead of list(set()) to avoid re-ordering the retriever emails
    seen = set()
    for retriever_email in retriever_emails:
        if retriever_email in seen:
            continue
        seen.add(retriever_email)
        doc_or_failure = _convert_drive_item_to_document(
            creds,
            allow_images,
            size_threshold,
            retriever_email,
            file,
            permission_sync_context,
        )

        # There are a variety of permissions-based errors that occasionally occur
        # when retrieving files. Often when these occur, there is another user
        # that can successfully retrieve the file, so we try the next user.
        if (
            doc_or_failure is None
            or isinstance(doc_or_failure, Document)
            or not (
                isinstance(doc_or_failure.exception, HttpError)
                and doc_or_failure.exception.status_code in [401, 403, 404]
            )
        ):
            return doc_or_failure

        if first_error is None:
            first_error = doc_or_failure
        else:
            first_error.failure_message += f"\n\n{doc_or_failure.failure_message}"

    if (
        first_error
        and isinstance(first_error.exception, HttpError)
        and first_error.exception.status_code == 403
    ):
        # This SHOULD happen very rarely, and we don't want to break the indexing process when
        # a high volume of 403s occurs early. We leave a verbose log to help investigate.
        logger.error(
            f"Skipping file id: {file.get('id')} name: {file.get('name')} due to 403 error."
            f"Attempted to retrieve with {retriever_emails},"
            f"got the following errors: {first_error.failure_message}"
        )
        return None
    return first_error


def _convert_drive_item_to_document(
    creds: Any,
    allow_images: bool,
    size_threshold: int,
    retriever_email: str,
    file: GoogleDriveFileType,
    # if not specified, we will not sync permissions
    # will also be a no-op if EE is not enabled
    permission_sync_context: PermissionSyncContext | None,
) -> Document | ConnectorFailure | None:
    """
    Main entry point for converting a Google Drive file => Document object.
    """
    sections: list[TextSection | ImageSection] = []

    # Only construct these services when needed
    def _get_drive_service() -> GoogleDriveService:
        return get_drive_service(creds, user_email=retriever_email)

    def _get_docs_service() -> GoogleDocsService:
        return get_google_docs_service(creds, user_email=retriever_email)

    doc_id = "unknown"

    try:
        # skip shortcuts or folders
        if file.get("mimeType") in [DRIVE_SHORTCUT_TYPE, DRIVE_FOLDER_TYPE]:
            logger.info("Skipping shortcut/folder.")
            return None

        size_str = file.get("size")
        if size_str:
            try:
                size_int = int(size_str)
            except ValueError:
                logger.warning(f"Parsing string to int failed: size_str={size_str}")
            else:
                if size_int > size_threshold:
                    logger.warning(
                        f"{file.get('name')} exceeds size threshold of {size_threshold}. Skipping."
                    )
                    return None

        # If it's a Google Doc, we might do advanced parsing
        if file.get("mimeType") == GDriveMimeType.DOC.value:
            try:
                logger.debug(f"starting advanced parsing for {file.get('name')}")
                # get_document_sections is the advanced approach for Google Docs
                doc_sections = get_document_sections(
                    docs_service=_get_docs_service(),
                    doc_id=file.get("id", ""),
                )
                if doc_sections:
                    sections = cast(list[TextSection | ImageSection], doc_sections)
                    if any(SMART_CHIP_CHAR in section.text for section in doc_sections):
                        logger.debug(
                            f"found smart chips in {file.get('name')}, aligning with basic sections"
                        )
                        basic_sections = _download_and_extract_sections_basic(
                            file, _get_drive_service(), allow_images, size_threshold
                        )
                        sections = align_basic_advanced(basic_sections, doc_sections)

            except Exception as e:
                logger.warning(
                    f"Error in advanced parsing: {e}. Falling back to basic extraction."
                )
        # Not Google Doc, attempt basic extraction
        else:
            sections = _download_and_extract_sections_basic(
                file, _get_drive_service(), allow_images, size_threshold
            )

        # If we still don't have any sections, skip this file
        if not sections:
            logger.warning(f"No content extracted from {file.get('name')}. Skipping.")
            return None

        doc_id = onyx_document_id_from_drive_file(file)
        external_access = (
            _get_external_access_for_raw_gdrive_file(
                file=file,
                company_domain=permission_sync_context.google_domain,
                # try both retriever_email and primary_admin_email if necessary
                retriever_drive_service=_get_drive_service(),
                admin_drive_service=get_drive_service(
                    creds, user_email=permission_sync_context.primary_admin_email
                ),
                add_prefix=True,  # Indexing path - prefix here
                fallback_user_email=retriever_email,
            )
            if permission_sync_context
            else None
        )

        # Build doc_metadata with hierarchy information
        file_name = file.get("name", "")
        mime_type = file.get("mimeType", "")
        drive_id = file.get("driveId")

        # Build full folder path by walking up the parent chain
        # Pass retriever_email to determine if file is in "My Drive" vs "Shared with me"
        source_path = build_folder_path(
            file, _get_drive_service(), drive_id, retriever_email
        )

        doc_metadata = {
            "hierarchy": {
                "source_path": source_path,
                "drive_id": drive_id,
                "file_name": file_name,
                "mime_type": mime_type,
            }
        }

        # Create the document
        return Document(
            id=doc_id,
            sections=sections,
            source=DocumentSource.GOOGLE_DRIVE,
            semantic_identifier=file_name,
            doc_metadata=doc_metadata,
            metadata={
                "owner_names": ", ".join(
                    owner.get("displayName", "") for owner in file.get("owners", [])
                ),
            },
            doc_updated_at=datetime.fromisoformat(
                file.get("modifiedTime", "").replace("Z", "+00:00")
            ),
            external_access=external_access,
            parent_hierarchy_raw_node_id=(file.get("parents") or [None])[0],
        )
    except Exception as e:
        doc_id = "unknown"
        try:
            doc_id = onyx_document_id_from_drive_file(file)
        except Exception as e2:
            logger.warning(f"Error getting document id from file: {e2}")

        file_name = file.get("name")
        error_str = (
            f"Error converting file '{file_name}' to Document as {retriever_email}: {e}"
        )
        if isinstance(e, HttpError) and e.status_code == 403:
            logger.warning(
                f"Uncommon permissions error while downloading file. User "
                f"{retriever_email} was able to see file {file_name} "
                "but cannot download it."
            )
            logger.warning(error_str)

        return ConnectorFailure(
            failed_document=DocumentFailure(
                document_id=doc_id,
                document_link=(
                    sections[0].link if sections else None
                ),  # TODO: see if this is the best way to get a link
            ),
            failed_entity=None,
            failure_message=error_str,
            exception=e,
        )


def build_slim_document(
    creds: Any,
    file: GoogleDriveFileType,
    # if not specified, we will not sync permissions
    # will also be a no-op if EE is not enabled
    permission_sync_context: PermissionSyncContext | None,
    retriever_email: str,
) -> SlimDocument | None:
    if file.get("mimeType") in [DRIVE_FOLDER_TYPE, DRIVE_SHORTCUT_TYPE]:
        return None

    owner_email = cast(str | None, file.get("owners", [{}])[0].get("emailAddress"))
    external_access = (
        _get_external_access_for_raw_gdrive_file(
            file=file,
            company_domain=permission_sync_context.google_domain,
            retriever_drive_service=(
                get_drive_service(
                    creds,
                    user_email=owner_email,
                )
                if owner_email
                else None
            ),
            admin_drive_service=get_drive_service(
                creds,
                user_email=permission_sync_context.primary_admin_email,
            ),
            fallback_user_email=retriever_email,
        )
        if permission_sync_context
        else None
    )
    return SlimDocument(
        id=onyx_document_id_from_drive_file(file),
        external_access=external_access,
        parent_hierarchy_raw_node_id=(file.get("parents") or [None])[0],
    )


================================================
FILE: backend/onyx/connectors/google_drive/file_retrieval.py
================================================
from collections.abc import Callable
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from enum import Enum
from typing import cast
from urllib.parse import parse_qs
from urllib.parse import urlparse

from googleapiclient.discovery import Resource  # type: ignore
from googleapiclient.errors import HttpError  # type: ignore

from onyx.access.models import ExternalAccess
from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
from onyx.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE
from onyx.connectors.google_drive.models import DriveRetrievalStage
from onyx.connectors.google_drive.models import GoogleDriveFileType
from onyx.connectors.google_drive.models import RetrievedDriveFile
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
from onyx.connectors.google_utils.google_utils import (
    execute_paginated_retrieval_with_max_pages,
)
from onyx.connectors.google_utils.google_utils import GoogleFields
from onyx.connectors.google_utils.google_utils import ORDER_BY_KEY
from onyx.connectors.google_utils.google_utils import PAGE_TOKEN_KEY
from onyx.connectors.google_utils.resources import GoogleDriveService
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)
from onyx.utils.variable_functionality import noop_fallback


logger = setup_logger()


class DriveFileFieldType(Enum):
    """Enum to specify which fields to retrieve from Google Drive files"""

    SLIM = "slim"  # Minimal fields for basic file info
    STANDARD = "standard"  # Standard fields including content metadata
    WITH_PERMISSIONS = "with_permissions"  # Full fields including permissions


PERMISSION_FULL_DESCRIPTION = (
    "permissions(id, emailAddress, type, domain, allowFileDiscovery, permissionDetails)"
)
FILE_FIELDS = (
    "nextPageToken, files(mimeType, id, name, driveId, parents, "
    "modifiedTime, webViewLink, shortcutDetails, owners(emailAddress), size)"
)
FILE_FIELDS_WITH_PERMISSIONS = (
    f"nextPageToken, files(mimeType, id, name, driveId, parents, {PERMISSION_FULL_DESCRIPTION}, permissionIds, "
    "modifiedTime, webViewLink, shortcutDetails, owners(emailAddress), size)"
)
SLIM_FILE_FIELDS = (
    f"nextPageToken, files(mimeType, driveId, id, name, parents, {PERMISSION_FULL_DESCRIPTION}, "
    "permissionIds, webViewLink, owners(emailAddress), modifiedTime)"
)
FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"

HIERARCHY_FIELDS = "id, name, parents, webViewLink, mimeType, driveId"

HIERARCHY_FIELDS_WITH_PERMISSIONS = (
    "id, name, parents, webViewLink, mimeType, permissionIds, driveId"
)


def generate_time_range_filter(
    start: SecondsSinceUnixEpoch | None = None,
    end: SecondsSinceUnixEpoch | None = None,
) -> str:
    time_range_filter = ""
    if start is not None:
        time_start = datetime.fromtimestamp(start, tz=timezone.utc).isoformat()
        time_range_filter += (
            f" and {GoogleFields.MODIFIED_TIME.value} >= '{time_start}'"
        )
    if end is not None:
        time_stop = datetime.fromtimestamp(end, tz=timezone.utc).isoformat()
        time_range_filter += f" and {GoogleFields.MODIFIED_TIME.value} <= '{time_stop}'"
    return time_range_filter


LINK_ONLY_PERMISSION_TYPES = {"domain", "anyone"}


def has_link_only_permission(file: GoogleDriveFileType) -> bool:
    """
    Return True if any permission requires a direct link to access
    (allowFileDiscovery is explicitly false for supported types).
    """
    permissions = file.get("permissions") or []
    for permission in permissions:
        if permission.get("type") not in LINK_ONLY_PERMISSION_TYPES:
            continue
        if permission.get("allowFileDiscovery") is False:
            return True
    return False


def _get_folders_in_parent(
    service: Resource,
    parent_id: str | None = None,
) -> Iterator[GoogleDriveFileType]:
    # Follow shortcuts to folders
    query = f"(mimeType = '{DRIVE_FOLDER_TYPE}' or mimeType = '{DRIVE_SHORTCUT_TYPE}')"
    query += " and trashed = false"

    if parent_id:
        query += f" and '{parent_id}' in parents"

    for file in execute_paginated_retrieval(
        retrieval_function=service.files().list,
        list_key="files",
        continue_on_404_or_403=True,
        corpora="allDrives",
        supportsAllDrives=True,
        includeItemsFromAllDrives=True,
        fields=FOLDER_FIELDS,
        q=query,
    ):
        yield file


def get_folder_metadata(
    service: Resource,
    folder_id: str,
    field_type: DriveFileFieldType,
) -> GoogleDriveFileType | None:
    """Fetch metadata for a folder by ID."""
    fields = _get_hierarchy_fields_for_file_type(field_type)
    try:
        return (
            service.files()
            .get(
                fileId=folder_id,
                fields=fields,
                supportsAllDrives=True,
            )
            .execute()
        )
    except HttpError as e:
        if e.resp.status in (403, 404):
            logger.debug(f"Cannot access folder {folder_id}: {e}")
        else:
            raise e
    return None


def _get_hierarchy_fields_for_file_type(field_type: DriveFileFieldType) -> str:
    if field_type == DriveFileFieldType.WITH_PERMISSIONS:
        return HIERARCHY_FIELDS_WITH_PERMISSIONS
    else:
        return HIERARCHY_FIELDS


def get_shared_drive_name(
    service: Resource,
    drive_id: str,
) -> str | None:
    """Fetch the actual name of a shared drive via the drives().get() API.

    The files().get() API returns 'Drive' as the name for shared drive root
    folders. Only drives().get() returns the real user-assigned name.
    """
    try:
        drive = service.drives().get(driveId=drive_id, fields="name").execute()
        return drive.get("name")
    except HttpError as e:
        if e.resp.status in (403, 404):
            logger.debug(f"Cannot access drive {drive_id}: {e}")
        else:
            raise
    return None


def get_external_access_for_folder(
    folder: GoogleDriveFileType,
    google_domain: str,
    drive_service: GoogleDriveService,
    add_prefix: bool = False,
) -> ExternalAccess:
    """
    Extract ExternalAccess from a folder's permissions.

    This fetches permissions using the Drive API (via permissionIds) and extracts
    user emails, group emails, and public access status.

    Uses the EE implementation if available, otherwise returns public access
    (fallback for non-EE deployments).

    Args:
        folder: The folder metadata from Google Drive API (must include permissionIds field)
        google_domain: The company's Google Workspace domain (e.g., "company.com")
        drive_service: Google Drive service for fetching permission details
        add_prefix: When True, prefix group IDs with source type (for indexing path).
                   When False (default), leave unprefixed (for permission sync path
                   where upsert_document_external_perms handles prefixing).

    Returns:
        ExternalAccess with extracted permission info
    """
    # Try to get the EE implementation
    get_folder_access_fn = cast(
        Callable[[GoogleDriveFileType, str, GoogleDriveService, bool], ExternalAccess],
        fetch_versioned_implementation_with_fallback(
            "onyx.external_permissions.google_drive.doc_sync",
            "get_external_access_for_folder",
            noop_fallback,
        ),
    )

    return get_folder_access_fn(folder, google_domain, drive_service, add_prefix)


def _get_fields_for_file_type(field_type: DriveFileFieldType) -> str:
    """Get the appropriate fields string based on the field type enum"""
    if field_type == DriveFileFieldType.SLIM:
        return SLIM_FILE_FIELDS
    elif field_type == DriveFileFieldType.WITH_PERMISSIONS:
        return FILE_FIELDS_WITH_PERMISSIONS
    else:  # DriveFileFieldType.STANDARD
        return FILE_FIELDS


def _get_files_in_parent(
    service: Resource,
    parent_id: str,
    field_type: DriveFileFieldType,
    start: SecondsSinceUnixEpoch | None = None,
    end: SecondsSinceUnixEpoch | None = None,
) -> Iterator[GoogleDriveFileType]:
    query = f"mimeType != '{DRIVE_FOLDER_TYPE}' and '{parent_id}' in parents"
    query += " and trashed = false"
    query += generate_time_range_filter(start, end)

    kwargs = {ORDER_BY_KEY: GoogleFields.MODIFIED_TIME.value}

    for file in execute_paginated_retrieval(
        retrieval_function=service.files().list,
        list_key="files",
        continue_on_404_or_403=True,
        corpora="allDrives",
        supportsAllDrives=True,
        includeItemsFromAllDrives=True,
        fields=_get_fields_for_file_type(field_type),
        q=query,
        **kwargs,
    ):
        yield file


def crawl_folders_for_files(
    service: Resource,
    parent_id: str,
    field_type: DriveFileFieldType,
    user_email: str,
    traversed_parent_ids: set[str],
    update_traversed_ids_func: Callable[[str], None],
    start: SecondsSinceUnixEpoch | None = None,
    end: SecondsSinceUnixEpoch | None = None,
) -> Iterator[RetrievedDriveFile]:
    """
    This function starts crawling from any folder. It is slower though.
    """
    logger.info("Entered crawl_folders_for_files with parent_id: " + parent_id)
    if parent_id not in traversed_parent_ids:
        logger.info("Parent id not in traversed parent ids, getting files")
        found_files = False
        file = {}
        try:
            for file in _get_files_in_parent(
                service=service,
                parent_id=parent_id,
                field_type=field_type,
                start=start,
                end=end,
            ):
                logger.info(f"Found file: {file['name']}, user email: {user_email}")
                found_files = True
                yield RetrievedDriveFile(
                    drive_file=file,
                    user_email=user_email,
                    parent_id=parent_id,
                    completion_stage=DriveRetrievalStage.FOLDER_FILES,
                )
            # Only mark a folder as done if it was fully traversed without errors
            # This usually indicates that the owner of the folder was impersonated.
            # In cases where this never happens, most likely the folder owner is
            # not part of the google workspace in question (or for oauth, the authenticated
            # user doesn't own the folder)
            if found_files:
                update_traversed_ids_func(parent_id)
        except Exception as e:
            if isinstance(e, HttpError) and e.status_code == 403:
                # don't yield an error here because this is expected behavior
                # when a user doesn't have access to a folder
                logger.debug(f"Error getting files in parent {parent_id}: {e}")
            else:
                logger.error(f"Error getting files in parent {parent_id}: {e}")
                yield RetrievedDriveFile(
                    drive_file=file,
                    user_email=user_email,
                    parent_id=parent_id,
                    completion_stage=DriveRetrievalStage.FOLDER_FILES,
                    error=e,
                )
    else:
        logger.info(f"Skipping subfolder files since already traversed: {parent_id}")

    for subfolder in _get_folders_in_parent(
        service=service,
        parent_id=parent_id,
    ):
        logger.info("Fetching all files in subfolder: " + subfolder["name"])
        yield from crawl_folders_for_files(
            service=service,
            parent_id=subfolder["id"],
            field_type=field_type,
            user_email=user_email,
            traversed_parent_ids=traversed_parent_ids,
            update_traversed_ids_func=update_traversed_ids_func,
            start=start,
            end=end,
        )


def get_files_in_shared_drive(
    service: Resource,
    drive_id: str,
    field_type: DriveFileFieldType,
    max_num_pages: int,
    update_traversed_ids_func: Callable[[str], None] = lambda _: None,
    cache_folders: bool = True,
    start: SecondsSinceUnixEpoch | None = None,
    end: SecondsSinceUnixEpoch | None = None,
    page_token: str | None = None,
) -> Iterator[GoogleDriveFileType | str]:
    kwargs = {ORDER_BY_KEY: GoogleFields.MODIFIED_TIME.value}
    if page_token:
        logger.info(f"Using page token: {page_token}")
        kwargs[PAGE_TOKEN_KEY] = page_token

    if cache_folders:
        # If we know we are going to folder crawl later, we can cache the folders here
        # Get all folders being queried and add them to the traversed set
        folder_query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
        folder_query += " and trashed = false"
        for folder in execute_paginated_retrieval(
            retrieval_function=service.files().list,
            list_key="files",
            continue_on_404_or_403=True,
            corpora="drive",
            driveId=drive_id,
            supportsAllDrives=True,
            includeItemsFromAllDrives=True,
            fields="nextPageToken, files(id)",
            q=folder_query,
        ):
            update_traversed_ids_func(folder["id"])

    # Get all files in the shared drive
    file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
    file_query += " and trashed = false"
    file_query += generate_time_range_filter(start, end)

    for file in execute_paginated_retrieval_with_max_pages(
        retrieval_function=service.files().list,
        max_num_pages=max_num_pages,
        list_key="files",
        continue_on_404_or_403=True,
        corpora="drive",
        driveId=drive_id,
        supportsAllDrives=True,
        includeItemsFromAllDrives=True,
        fields=_get_fields_for_file_type(field_type),
        q=file_query,
        **kwargs,
    ):
        # If we found any files, mark this drive as traversed. When a user has access to a drive,
        # they have access to all the files in the drive. Also not a huge deal if we re-traverse
        # empty drives.
        # NOTE: ^^ the above is not actually true due to folder restrictions:
        # https://support.google.com/a/users/answer/12380484?hl=en
        # So we may have to change this logic for people who use folder restrictions.
        update_traversed_ids_func(drive_id)
        yield file


def get_all_files_in_my_drive_and_shared(
    service: GoogleDriveService,
    update_traversed_ids_func: Callable,
    field_type: DriveFileFieldType,
    include_shared_with_me: bool,
    max_num_pages: int,
    start: SecondsSinceUnixEpoch | None = None,
    end: SecondsSinceUnixEpoch | None = None,
    cache_folders: bool = True,
    page_token: str | None = None,
) -> Iterator[GoogleDriveFileType | str]:
    kwargs = {ORDER_BY_KEY: GoogleFields.MODIFIED_TIME.value}
    if page_token:
        logger.info(f"Using page token: {page_token}")
        kwargs[PAGE_TOKEN_KEY] = page_token

    if cache_folders:
        # If we know we are going to folder crawl later, we can cache the folders here
        # Get all folders being queried and add them to the traversed set
        folder_query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
        folder_query += " and trashed = false"
        if not include_shared_with_me:
            folder_query += " and 'me' in owners"
        found_folders = False
        for folder in execute_paginated_retrieval(
            retrieval_function=service.files().list,
            list_key="files",
            corpora="user",
            fields=_get_fields_for_file_type(field_type),
            q=folder_query,
        ):
            update_traversed_ids_func(folder[GoogleFields.ID])
            found_folders = True
        if found_folders:
            update_traversed_ids_func(get_root_folder_id(service))

    # Then get the files
    file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
    file_query += " and trashed = false"
    if not include_shared_with_me:
        file_query += " and 'me' in owners"
    file_query += generate_time_range_filter(start, end)
    yield from execute_paginated_retrieval_with_max_pages(
        retrieval_function=service.files().list,
        max_num_pages=max_num_pages,
        list_key="files",
        continue_on_404_or_403=False,
        corpora="user",
        fields=_get_fields_for_file_type(field_type),
        q=file_query,
        **kwargs,
    )


def get_all_files_for_oauth(
    service: GoogleDriveService,
    include_files_shared_with_me: bool,
    include_my_drives: bool,
    # One of the above 2 should be true
    include_shared_drives: bool,
    field_type: DriveFileFieldType,
    max_num_pages: int,
    start: SecondsSinceUnixEpoch | None = None,
    end: SecondsSinceUnixEpoch | None = None,
    page_token: str | None = None,
) -> Iterator[GoogleDriveFileType | str]:
    kwargs = {ORDER_BY_KEY: GoogleFields.MODIFIED_TIME.value}
    if page_token:
        logger.info(f"Using page token: {page_token}")
        kwargs[PAGE_TOKEN_KEY] = page_token

    should_get_all = (
        include_shared_drives and include_my_drives and include_files_shared_with_me
    )
    corpora = "allDrives" if should_get_all else "user"

    file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
    file_query += " and trashed = false"
    file_query += generate_time_range_filter(start, end)

    if not should_get_all:
        if include_files_shared_with_me and not include_my_drives:
            file_query += " and not 'me' in owners"
        if not include_files_shared_with_me and include_my_drives:
            file_query += " and 'me' in owners"

    yield from execute_paginated_retrieval_with_max_pages(
        max_num_pages=max_num_pages,
        retrieval_function=service.files().list,
        list_key="files",
        continue_on_404_or_403=False,
        corpora=corpora,
        includeItemsFromAllDrives=should_get_all,
        supportsAllDrives=should_get_all,
        fields=_get_fields_for_file_type(field_type),
        q=file_query,
        **kwargs,
    )


# Just in case we need to get the root folder id
def get_root_folder_id(service: Resource) -> str:
    # we dont paginate here because there is only one root folder per user
    # https://developers.google.com/drive/api/guides/v2-to-v3-reference
    return (
        service.files()
        .get(fileId="root", fields=GoogleFields.ID.value)
        .execute()[GoogleFields.ID.value]
    )


def _extract_file_id_from_web_view_link(web_view_link: str) -> str:
    parsed = urlparse(web_view_link)
    path_parts = [part for part in parsed.path.split("/") if part]

    if "d" in path_parts:
        idx = path_parts.index("d")
        if idx + 1 < len(path_parts):
            return path_parts[idx + 1]

    query_params = parse_qs(parsed.query)
    for key in ("id", "fileId"):
        value = query_params.get(key)
        if value and value[0]:
            return value[0]

    raise ValueError(
        f"Unable to extract Drive file id from webViewLink: {web_view_link}"
    )


def get_file_by_web_view_link(
    service: GoogleDriveService,
    web_view_link: str,
    fields: str,
) -> GoogleDriveFileType:
    """Retrieve a Google Drive file using its webViewLink."""
    file_id = _extract_file_id_from_web_view_link(web_view_link)
    return (
        service.files()
        .get(
            fileId=file_id,
            supportsAllDrives=True,
            fields=fields,
        )
        .execute()
    )


================================================
FILE: backend/onyx/connectors/google_drive/models.py
================================================
from enum import Enum
from typing import Any

from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import field_serializer
from pydantic import field_validator

from onyx.connectors.interfaces import ConnectorCheckpoint
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.utils.threadpool_concurrency import ThreadSafeDict
from onyx.utils.threadpool_concurrency import ThreadSafeSet


class GDriveMimeType(str, Enum):
    DOC = "application/vnd.google-apps.document"
    SPREADSHEET = "application/vnd.google-apps.spreadsheet"
    SPREADSHEET_OPEN_FORMAT = (
        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )
    SPREADSHEET_MS_EXCEL = "application/vnd.ms-excel"
    PDF = "application/pdf"
    WORD_DOC = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
    PPT = "application/vnd.google-apps.presentation"
    POWERPOINT = (
        "application/vnd.openxmlformats-officedocument.presentationml.presentation"
    )
    PLAIN_TEXT = "text/plain"
    MARKDOWN = "text/markdown"


GoogleDriveFileType = dict[str, Any]


TOKEN_EXPIRATION_TIME = 3600  # 1 hour


# These correspond to The major stages of retrieval for google drive.
# The stages for the oauth flow are:
# get_all_files_for_oauth(),
# get_all_drive_ids(),
# get_files_in_shared_drive(),
# crawl_folders_for_files()
#
# The stages for the service account flow are roughly:
# get_all_user_emails(),
# get_all_drive_ids(),
# get_files_in_shared_drive(),
# Then for each user:
#   get_files_in_my_drive()
#   get_files_in_shared_drive()
#   crawl_folders_for_files()
class DriveRetrievalStage(str, Enum):
    START = "start"
    DONE = "done"
    # OAuth specific stages
    OAUTH_FILES = "oauth_files"

    # Service account specific stages
    USER_EMAILS = "user_emails"
    MY_DRIVE_FILES = "my_drive_files"

    # Used for both oauth and service account flows
    DRIVE_IDS = "drive_ids"
    SHARED_DRIVE_FILES = "shared_drive_files"
    FOLDER_FILES = "folder_files"


class StageCompletion(BaseModel):
    """
    Describes the point in the retrieval+indexing process that the
    connector is at. completed_until is the timestamp of the latest
    file that has been retrieved or error that has been yielded.
    Optional fields are used for retrieval stages that need more information
    for resuming than just the timestamp of the latest file.
    """

    stage: DriveRetrievalStage
    completed_until: SecondsSinceUnixEpoch
    current_folder_or_drive_id: str | None = None
    next_page_token: str | None = None

    # only used for shared drives
    processed_drive_ids: set[str] = set()

    def update(
        self,
        stage: DriveRetrievalStage,
        completed_until: SecondsSinceUnixEpoch,
        current_folder_or_drive_id: str | None = None,
    ) -> None:
        self.stage = stage
        self.completed_until = completed_until
        self.current_folder_or_drive_id = current_folder_or_drive_id


class RetrievedDriveFile(BaseModel):
    """
    Describes a file that has been retrieved from google drive.
    user_email is the email of the user that the file was retrieved
    by impersonating. If an error worthy of being reported is encountered,
    error should be set and later propagated as a ConnectorFailure.
    """

    # The stage at which this file was retrieved
    completion_stage: DriveRetrievalStage

    # The file that was retrieved
    drive_file: GoogleDriveFileType

    # The email of the user that the file was retrieved by impersonating
    user_email: str

    # The id of the parent folder or drive of the file
    parent_id: str | None = None

    # Any unexpected error that occurred while retrieving the file.
    # In particular, this is not used for 403/404 errors, which are expected
    # in the context of impersonating all the users to try to retrieve all
    # files from all their Drives and Folders.
    error: Exception | None = None

    model_config = ConfigDict(arbitrary_types_allowed=True)


class GoogleDriveCheckpoint(ConnectorCheckpoint):
    # Checkpoint version of _retrieved_ids
    retrieved_folder_and_drive_ids: set[str]

    # Describes the point in the retrieval+indexing process that the
    # checkpoint is at. when this is set to a given stage, the connector
    # has finished yielding all values from the previous stage.
    completion_stage: DriveRetrievalStage

    # The latest timestamp of a file that has been retrieved per user email.
    # StageCompletion is used to track the completion of each stage, but the
    # timestamp part is not used for folder crawling.
    completion_map: ThreadSafeDict[str, StageCompletion]

    # all file ids that have been retrieved
    all_retrieved_file_ids: set[str] = set()

    # cached version of the drive and folder ids to retrieve
    drive_ids_to_retrieve: list[str] | None = None
    folder_ids_to_retrieve: list[str] | None = None

    # cached user emails
    user_emails: list[str] | None = None

    # Hierarchy node raw IDs that have already been yielded.
    # Used to avoid yielding duplicate hierarchy nodes across checkpoints.
    # Thread-safe because multiple impersonation threads access this concurrently.
    # Uses default_factory to ensure each checkpoint instance gets a fresh set.
    seen_hierarchy_node_raw_ids: ThreadSafeSet[str] = Field(
        default_factory=ThreadSafeSet
    )

    # Hierarchy node raw IDs where we have successfully walked up to a terminal
    # node (a drive root with no parent). This is separate from seen_hierarchy_node_raw_ids
    # because a node might be yielded before we've walked its full ancestry chain.
    # We only skip walking from a node if it's in this set, ensuring that if one user
    # fails to walk to the root, another user with better access can still complete the walk.
    # Thread-safe because multiple impersonation threads access this concurrently.
    # Uses default_factory to ensure each checkpoint instance gets a fresh set.
    fully_walked_hierarchy_node_raw_ids: ThreadSafeSet[str] = Field(
        default_factory=ThreadSafeSet
    )

    @field_serializer("completion_map")
    def serialize_completion_map(
        self, completion_map: ThreadSafeDict[str, StageCompletion], _info: Any
    ) -> dict[str, StageCompletion]:
        return completion_map._dict

    @field_serializer("seen_hierarchy_node_raw_ids")
    def serialize_seen_hierarchy(
        self, seen_hierarchy_node_raw_ids: ThreadSafeSet[str], _info: Any
    ) -> set[str]:
        return seen_hierarchy_node_raw_ids.copy()

    @field_serializer("fully_walked_hierarchy_node_raw_ids")
    def serialize_fully_walked_hierarchy(
        self, fully_walked_hierarchy_node_raw_ids: ThreadSafeSet[str], _info: Any
    ) -> set[str]:
        return fully_walked_hierarchy_node_raw_ids.copy()

    @field_validator("completion_map", mode="before")
    def validate_completion_map(cls, v: Any) -> ThreadSafeDict[str, StageCompletion]:
        assert isinstance(v, dict) or isinstance(v, ThreadSafeDict)
        return ThreadSafeDict(
            {k: StageCompletion.model_validate(val) for k, val in v.items()}
        )

    @field_validator("seen_hierarchy_node_raw_ids", mode="before")
    def validate_seen_hierarchy(cls, v: Any) -> ThreadSafeSet[str]:
        if isinstance(v, ThreadSafeSet):
            return v
        if isinstance(v, set):
            return ThreadSafeSet(v)
        if isinstance(v, list):
            return ThreadSafeSet(set(v))
        return ThreadSafeSet()

    @field_validator("fully_walked_hierarchy_node_raw_ids", mode="before")
    def validate_fully_walked_hierarchy(cls, v: Any) -> ThreadSafeSet[str]:
        if isinstance(v, ThreadSafeSet):
            return v
        if isinstance(v, set):
            return ThreadSafeSet(v)
        if isinstance(v, list):
            return ThreadSafeSet(set(v))
        return ThreadSafeSet()


================================================
FILE: backend/onyx/connectors/google_drive/section_extraction.py
================================================
from typing import Any

from pydantic import BaseModel

from onyx.connectors.google_utils.resources import GoogleDocsService
from onyx.connectors.models import TextSection

HEADING_DELIMITER = "\n"


class CurrentHeading(BaseModel):
    id: str | None
    text: str


def _build_gdoc_section_link(doc_id: str, tab_id: str, heading_id: str | None) -> str:
    """Builds a Google Doc link that jumps to a specific heading"""
    # NOTE: doesn't support docs with multiple tabs atm, if we need that ask
    # @Chris
    heading_str = f"#heading={heading_id}" if heading_id else ""
    return f"https://docs.google.com/document/d/{doc_id}/edit?tab={tab_id}{heading_str}"


def _extract_id_from_heading(paragraph: dict[str, Any]) -> str:
    """Extracts the id from a heading paragraph element"""
    return paragraph["paragraphStyle"]["headingId"]


def _extract_text_from_paragraph(paragraph: dict[str, Any]) -> str:
    """Extracts the text content from a paragraph element"""
    text_elements = []
    for element in paragraph.get("elements", []):
        if "textRun" in element:
            text_elements.append(element["textRun"].get("content", ""))

        # Handle links
        if "textStyle" in element and "link" in element["textStyle"]:
            text_elements.append(f"({element['textStyle']['link'].get('url', '')})")

        if "person" in element:
            name = element["person"].get("personProperties", {}).get("name", "")
            email = element["person"].get("personProperties", {}).get("email", "")
            person_str = "<Person|"
            if name:
                person_str += f"name: {name}, "
            if email:
                person_str += f"email: {email}"
            person_str += ">"
            text_elements.append(person_str)

        if "richLink" in element:
            props = element["richLink"].get("richLinkProperties", {})
            title = props.get("title", "")
            uri = props.get("uri", "")
            link_str = f"[{title}]({uri})"
            text_elements.append(link_str)

    return "".join(text_elements)


def _extract_text_from_table(table: dict[str, Any]) -> str:
    """
    Extracts the text content from a table element.
    """
    row_strs = []

    for row in table.get("tableRows", []):
        cells = row.get("tableCells", [])
        cell_strs = []
        for cell in cells:
            child_elements = cell.get("content", {})
            cell_str = []
            for child_elem in child_elements:
                if "paragraph" not in child_elem:
                    continue
                cell_str.append(_extract_text_from_paragraph(child_elem["paragraph"]))
            cell_strs.append("".join(cell_str))
        row_strs.append(", ".join(cell_strs))
    return "\n".join(row_strs)


def get_document_sections(
    docs_service: GoogleDocsService,
    doc_id: str,
) -> list[TextSection]:
    """Extracts sections from a Google Doc, including their headings and content"""
    # Fetch the document structure
    http_request = docs_service.documents().get(documentId=doc_id)

    # Google has poor support for tabs in the docs api, see
    # https://cloud.google.com/python/docs/reference/cloudtasks/
    # latest/google.cloud.tasks_v2.types.HttpRequest
    # https://developers.google.com/workspace/docs/api/how-tos/tabs
    # https://developers.google.com/workspace/docs/api/reference/rest/v1/documents/get
    # this is a hack to use the param mentioned in the rest api docs
    # TODO: check if it can be specified i.e. in documents()
    http_request.uri += "&includeTabsContent=true"
    doc = http_request.execute()

    # Get the content
    tabs = doc.get("tabs", {})
    sections: list[TextSection] = []
    for tab in tabs:
        sections.extend(get_tab_sections(tab, doc_id))
    return sections


def _is_heading(paragraph: dict[str, Any]) -> bool:
    """Checks if a paragraph (a block of text in a drive document) is a heading"""
    if not (
        "paragraphStyle" in paragraph
        and "namedStyleType" in paragraph["paragraphStyle"]
    ):
        return False

    style = paragraph["paragraphStyle"]["namedStyleType"]
    is_heading = style.startswith("HEADING_")
    is_title = style.startswith("TITLE")
    return is_heading or is_title


def _add_finished_section(
    sections: list[TextSection],
    doc_id: str,
    tab_id: str,
    current_heading: CurrentHeading,
    current_section: list[str],
) -> None:
    """Adds a finished section to the list of sections if the section has content.
    Returns the list of sections to use going forward, which may be the old list
    if a new section was not added.
    """
    if not (current_section or current_heading.text):
        return
    # If we were building a previous section, add it to sections list

    # this is unlikely to ever matter, but helps if the doc contains weird headings
    header_text = current_heading.text.replace(HEADING_DELIMITER, "")
    section_text = f"{header_text}{HEADING_DELIMITER}" + "\n".join(current_section)
    sections.append(
        TextSection(
            text=section_text.strip(),
            link=_build_gdoc_section_link(doc_id, tab_id, current_heading.id),
        )
    )


def get_tab_sections(tab: dict[str, Any], doc_id: str) -> list[TextSection]:
    tab_id = tab["tabProperties"]["tabId"]
    content = tab.get("documentTab", {}).get("body", {}).get("content", [])

    sections: list[TextSection] = []
    current_section: list[str] = []
    current_heading = CurrentHeading(id=None, text="")

    for element in content:
        if "paragraph" in element:
            paragraph = element["paragraph"]

            # If this is not a heading, add content to current section
            if not _is_heading(paragraph):
                text = _extract_text_from_paragraph(paragraph)
                if text.strip():
                    current_section.append(text)
                continue

            _add_finished_section(
                sections, doc_id, tab_id, current_heading, current_section
            )

            current_section = []

            # Start new heading
            heading_id = _extract_id_from_heading(paragraph)
            heading_text = _extract_text_from_paragraph(paragraph)
            current_heading = CurrentHeading(
                id=heading_id,
                text=heading_text,
            )
        elif "table" in element:
            text = _extract_text_from_table(element["table"])
            if text.strip():
                current_section.append(text)

    # Don't forget to add the last section
    _add_finished_section(sections, doc_id, tab_id, current_heading, current_section)

    return sections


================================================
FILE: backend/onyx/connectors/google_site/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/google_site/connector.py
================================================
import os
import re
from typing import Any
from typing import cast

from bs4 import BeautifulSoup
from bs4 import Tag

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import load_files_from_zip
from onyx.file_processing.extract_file_text import read_text_file
from onyx.file_processing.html_utils import web_html_cleanup
from onyx.file_store.file_store import get_default_file_store
from onyx.utils.logger import setup_logger

logger = setup_logger()


def a_tag_text_to_path(atag: Tag) -> str:
    page_path = atag.text.strip().lower()
    page_path = re.sub(r"[^a-zA-Z0-9\s]", "", page_path)
    page_path = "-".join(page_path.split())

    return page_path


def find_google_sites_page_path_from_navbar(
    element: BeautifulSoup | Tag, path: str, depth: int
) -> str | None:
    lis = cast(
        list[Tag],
        element.find_all("li", attrs={"data-nav-level": f"{depth}"}),
    )
    for li in lis:
        a = cast(Tag, li.find("a"))
        if a.get("aria-selected") == "true":
            return f"{path}/{a_tag_text_to_path(a)}"
        elif a.get("aria-expanded") == "true":
            sub_path = find_google_sites_page_path_from_navbar(
                element, f"{path}/{a_tag_text_to_path(a)}", depth + 1
            )
            if sub_path:
                return sub_path

    return None


class GoogleSitesConnector(LoadConnector):
    def __init__(
        self,
        zip_path: str,
        base_url: str,
        batch_size: int = INDEX_BATCH_SIZE,
    ):
        self.zip_path = zip_path
        self.base_url = base_url
        self.batch_size = batch_size

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        pass

    def load_from_state(self) -> GenerateDocumentsOutput:
        documents: list[Document | HierarchyNode] = []

        file_content_io = get_default_file_store().read_file(self.zip_path, mode="b")

        # load the HTML files
        files = load_files_from_zip(file_content_io)
        count = 0
        for file_info, file_io in files:
            # skip non-published files
            if "/PUBLISHED/" not in file_info.filename:
                continue

            file_path, extension = os.path.splitext(file_info.filename)
            if extension != ".html":
                continue

            file_content, _ = read_text_file(file_io)
            soup = BeautifulSoup(file_content, "html.parser")

            # get the link out of the navbar
            header = cast(Tag, soup.find("header"))
            nav = cast(Tag, header.find("nav"))
            path = find_google_sites_page_path_from_navbar(nav, "", 1)
            if not path:
                count += 1
                logger.error(
                    f"Could not find path for '{file_info.filename}'. "
                    + "This page will not have a working link.\n\n"
                    + f"# of broken links so far - {count}"
                )
            logger.info(f"Path to page: {path}")
            # cleanup the hidden `Skip to main content` and `Skip to navigation` that
            # appears at the top of every page
            for div in soup.find_all("div", attrs={"data-is-touch-wrapper": "true"}):
                div.extract()

            # get the body of the page
            parsed_html = web_html_cleanup(
                soup, additional_element_types_to_discard=["header", "nav"]
            )

            title = parsed_html.title or file_path.split("/")[-1]
            documents.append(
                Document(
                    id=f"{DocumentSource.GOOGLE_SITES.value}:{path}",
                    source=DocumentSource.GOOGLE_SITES,
                    semantic_identifier=title,
                    sections=[
                        TextSection(
                            link=(
                                (self.base_url.rstrip("/") + "/" + path.lstrip("/"))
                                if path
                                else ""
                            ),
                            text=parsed_html.cleaned_text,
                        )
                    ],
                    metadata={},
                )
            )

            if len(documents) >= self.batch_size:
                yield documents
                documents = []

        if documents:
            yield documents


if __name__ == "__main__":
    connector = GoogleSitesConnector(
        os.environ["GOOGLE_SITES_ZIP_PATH"],
        os.environ.get("GOOGLE_SITES_BASE_URL", ""),
    )
    for doc_batch in connector.load_from_state():
        for doc in doc_batch:
            print(doc)


================================================
FILE: backend/onyx/connectors/google_utils/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/google_utils/google_auth.py
================================================
import json
from typing import Any

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials as OAuthCredentials
from google.oauth2.service_account import Credentials as ServiceAccountCredentials

from onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID
from onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
from onyx.configs.constants import DocumentSource
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_AUTHENTICATION_METHOD,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_TOKEN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    GOOGLE_SCOPES,
)
from onyx.connectors.google_utils.shared_constants import (
    GoogleOAuthAuthenticationMethod,
)
from onyx.utils.logger import setup_logger

logger = setup_logger()


def sanitize_oauth_credentials(oauth_creds: OAuthCredentials) -> str:
    """we really don't want to be persisting the client id and secret anywhere but the
    environment.

    Returns a string of serialized json.
    """

    # strip the client id and secret
    oauth_creds_json_str = oauth_creds.to_json()
    oauth_creds_sanitized_json: dict[str, Any] = json.loads(oauth_creds_json_str)
    oauth_creds_sanitized_json.pop("client_id", None)
    oauth_creds_sanitized_json.pop("client_secret", None)
    oauth_creds_sanitized_json_str = json.dumps(oauth_creds_sanitized_json)
    return oauth_creds_sanitized_json_str


def get_google_oauth_creds(
    token_json_str: str, source: DocumentSource
) -> OAuthCredentials | None:
    """creds_json only needs to contain client_id, client_secret and refresh_token to
    refresh the creds.

    expiry and token are optional ... however, if passing in expiry, token
    should also be passed in or else we may not return any creds.
    (probably a sign we should refactor the function)
    """
    creds_json = json.loads(token_json_str)
    creds = OAuthCredentials.from_authorized_user_info(
        info=creds_json,
        scopes=GOOGLE_SCOPES[source],
    )
    if creds.valid:
        return creds

    if creds.expired and creds.refresh_token:
        try:
            creds.refresh(Request())
            if creds.valid:
                logger.notice("Refreshed Google Drive tokens.")
                return creds
        except Exception:
            logger.exception("Failed to refresh google drive access token")
            return None

    return None


def get_google_creds(
    credentials: dict[str, str],
    source: DocumentSource,
) -> tuple[ServiceAccountCredentials | OAuthCredentials, dict[str, str] | None]:
    """Checks for two different types of credentials.
    (1) A credential which holds a token acquired via a user going through
    the Google OAuth flow.
    (2) A credential which holds a service account key JSON file, which
    can then be used to impersonate any user in the workspace.

    Return a tuple where:
        The first element is the requested credentials
        The second element is a new credentials dict that the caller should write back
        to the db. This happens if token rotation occurs while loading credentials.
    """
    oauth_creds = None
    service_creds = None
    new_creds_dict = None
    if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials:
        # OAUTH
        authentication_method: str = credentials.get(
            DB_CREDENTIALS_AUTHENTICATION_METHOD,
            GoogleOAuthAuthenticationMethod.UPLOADED.value,
        )

        credentials_dict_str = credentials[DB_CREDENTIALS_DICT_TOKEN_KEY]
        credentials_dict = json.loads(credentials_dict_str)

        # only send what get_google_oauth_creds needs
        authorized_user_info = {}

        # oauth_interactive is sanitized and needs credentials from the environment
        if (
            authentication_method
            == GoogleOAuthAuthenticationMethod.OAUTH_INTERACTIVE.value
        ):
            authorized_user_info["client_id"] = OAUTH_GOOGLE_DRIVE_CLIENT_ID
            authorized_user_info["client_secret"] = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
        else:
            authorized_user_info["client_id"] = credentials_dict["client_id"]
            authorized_user_info["client_secret"] = credentials_dict["client_secret"]

        authorized_user_info["refresh_token"] = credentials_dict["refresh_token"]

        authorized_user_info["token"] = credentials_dict["token"]
        authorized_user_info["expiry"] = credentials_dict["expiry"]

        token_json_str = json.dumps(authorized_user_info)
        oauth_creds = get_google_oauth_creds(
            token_json_str=token_json_str, source=source
        )

        # tell caller to update token stored in DB if the refresh token changed
        if oauth_creds:
            if oauth_creds.refresh_token != authorized_user_info["refresh_token"]:
                # if oauth_interactive, sanitize the credentials so they don't get stored in the db
                if (
                    authentication_method
                    == GoogleOAuthAuthenticationMethod.OAUTH_INTERACTIVE.value
                ):
                    oauth_creds_json_str = sanitize_oauth_credentials(oauth_creds)
                else:
                    oauth_creds_json_str = oauth_creds.to_json()

                new_creds_dict = {
                    DB_CREDENTIALS_DICT_TOKEN_KEY: oauth_creds_json_str,
                    DB_CREDENTIALS_PRIMARY_ADMIN_KEY: credentials[
                        DB_CREDENTIALS_PRIMARY_ADMIN_KEY
                    ],
                    DB_CREDENTIALS_AUTHENTICATION_METHOD: authentication_method,
                }
    elif DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials:
        # SERVICE ACCOUNT
        service_account_key_json_str = credentials[
            DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
        ]
        service_account_key = json.loads(service_account_key_json_str)

        service_creds = ServiceAccountCredentials.from_service_account_info(
            service_account_key, scopes=GOOGLE_SCOPES[source]
        )

        if not service_creds.valid or not service_creds.expired:
            service_creds.refresh(Request())

        if not service_creds.valid:
            raise PermissionError(
                f"Unable to access {source} - service account credentials are invalid."
            )

    creds: ServiceAccountCredentials | OAuthCredentials | None = (
        oauth_creds or service_creds
    )
    if creds is None:
        raise PermissionError(
            f"Unable to access {source} - unknown credential structure."
        )

    return creds, new_creds_dict


================================================
FILE: backend/onyx/connectors/google_utils/google_kv.py
================================================
import json
from typing import cast
from urllib.parse import parse_qs
from urllib.parse import ParseResult
from urllib.parse import urlparse

from google.oauth2.credentials import Credentials as OAuthCredentials
from google_auth_oauthlib.flow import InstalledAppFlow  # type: ignore
from sqlalchemy.orm import Session

from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import KV_CRED_KEY
from onyx.configs.constants import KV_GMAIL_CRED_KEY
from onyx.configs.constants import KV_GMAIL_SERVICE_ACCOUNT_KEY
from onyx.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY
from onyx.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY
from onyx.connectors.google_utils.resources import get_drive_service
from onyx.connectors.google_utils.resources import get_gmail_service
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_AUTHENTICATION_METHOD,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_TOKEN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    GOOGLE_SCOPES,
)
from onyx.connectors.google_utils.shared_constants import (
    GoogleOAuthAuthenticationMethod,
)
from onyx.connectors.google_utils.shared_constants import (
    MISSING_SCOPES_ERROR_STR,
)
from onyx.connectors.google_utils.shared_constants import (
    ONYX_SCOPE_INSTRUCTIONS,
)
from onyx.db.credentials import update_credential_json
from onyx.db.models import User
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import unwrap_str
from onyx.server.documents.models import CredentialBase
from onyx.server.documents.models import GoogleAppCredentials
from onyx.server.documents.models import GoogleServiceAccountKey
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _build_frontend_google_drive_redirect(source: DocumentSource) -> str:
    if source == DocumentSource.GOOGLE_DRIVE:
        return f"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback"
    elif source == DocumentSource.GMAIL:
        return f"{WEB_DOMAIN}/admin/connectors/gmail/auth/callback"
    else:
        raise ValueError(f"Unsupported source: {source}")


def _get_current_oauth_user(creds: OAuthCredentials, source: DocumentSource) -> str:
    if source == DocumentSource.GOOGLE_DRIVE:
        drive_service = get_drive_service(creds)
        user_info = (
            drive_service.about()
            .get(
                fields="user(emailAddress)",
            )
            .execute()
        )
        email = user_info.get("user", {}).get("emailAddress")
    elif source == DocumentSource.GMAIL:
        gmail_service = get_gmail_service(creds)
        user_info = (
            gmail_service.users()
            .getProfile(
                userId="me",
                fields="emailAddress",
            )
            .execute()
        )
        email = user_info.get("emailAddress")
    else:
        raise ValueError(f"Unsupported source: {source}")
    return email


def verify_csrf(credential_id: int, state: str) -> None:
    csrf = unwrap_str(get_kv_store().load(KV_CRED_KEY.format(str(credential_id))))
    if csrf != state:
        raise PermissionError(
            "State from Google Drive Connector callback does not match expected"
        )


def update_credential_access_tokens(
    auth_code: str,
    credential_id: int,
    user: User,
    db_session: Session,
    source: DocumentSource,
    auth_method: GoogleOAuthAuthenticationMethod,
) -> OAuthCredentials | None:
    app_credentials = get_google_app_cred(source)
    flow = InstalledAppFlow.from_client_config(
        app_credentials.model_dump(),
        scopes=GOOGLE_SCOPES[source],
        redirect_uri=_build_frontend_google_drive_redirect(source),
    )
    flow.fetch_token(code=auth_code)
    creds = flow.credentials
    token_json_str = creds.to_json()

    # Get user email from Google API so we know who
    # the primary admin is for this connector
    try:
        email = _get_current_oauth_user(creds, source)
    except Exception as e:
        if MISSING_SCOPES_ERROR_STR in str(e):
            raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
        raise e

    new_creds_dict = {
        DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str,
        DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
        DB_CREDENTIALS_AUTHENTICATION_METHOD: auth_method.value,
    }

    if not update_credential_json(credential_id, new_creds_dict, user, db_session):
        return None
    return creds


def build_service_account_creds(
    source: DocumentSource,
    primary_admin_email: str | None = None,
    name: str | None = None,
) -> CredentialBase:
    service_account_key = get_service_account_key(source=source)

    credential_dict = {
        DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key.json(),
    }
    if primary_admin_email:
        credential_dict[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] = primary_admin_email

    credential_dict[DB_CREDENTIALS_AUTHENTICATION_METHOD] = (
        GoogleOAuthAuthenticationMethod.UPLOADED.value
    )

    return CredentialBase(
        credential_json=credential_dict,
        admin_public=True,
        source=source,
        name=name,
    )


def get_auth_url(credential_id: int, source: DocumentSource) -> str:
    if source == DocumentSource.GOOGLE_DRIVE:
        creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
    elif source == DocumentSource.GMAIL:
        creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
    else:
        raise ValueError(f"Unsupported source: {source}")
    credential_json = json.loads(creds_str)
    flow = InstalledAppFlow.from_client_config(
        credential_json,
        scopes=GOOGLE_SCOPES[source],
        redirect_uri=_build_frontend_google_drive_redirect(source),
    )
    auth_url, _ = flow.authorization_url(prompt="consent")

    parsed_url = cast(ParseResult, urlparse(auth_url))
    params = parse_qs(parsed_url.query)

    get_kv_store().store(
        KV_CRED_KEY.format(credential_id),
        {"value": params.get("state", [None])[0]},
        encrypt=True,
    )
    return str(auth_url)


def get_google_app_cred(source: DocumentSource) -> GoogleAppCredentials:
    if source == DocumentSource.GOOGLE_DRIVE:
        creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
    elif source == DocumentSource.GMAIL:
        creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
    else:
        raise ValueError(f"Unsupported source: {source}")
    return GoogleAppCredentials(**json.loads(creds_str))


def upsert_google_app_cred(
    app_credentials: GoogleAppCredentials, source: DocumentSource
) -> None:
    if source == DocumentSource.GOOGLE_DRIVE:
        get_kv_store().store(
            KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True
        )
    elif source == DocumentSource.GMAIL:
        get_kv_store().store(KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True)
    else:
        raise ValueError(f"Unsupported source: {source}")


def delete_google_app_cred(source: DocumentSource) -> None:
    if source == DocumentSource.GOOGLE_DRIVE:
        get_kv_store().delete(KV_GOOGLE_DRIVE_CRED_KEY)
    elif source == DocumentSource.GMAIL:
        get_kv_store().delete(KV_GMAIL_CRED_KEY)
    else:
        raise ValueError(f"Unsupported source: {source}")


def get_service_account_key(source: DocumentSource) -> GoogleServiceAccountKey:
    if source == DocumentSource.GOOGLE_DRIVE:
        creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY))
    elif source == DocumentSource.GMAIL:
        creds_str = str(get_kv_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY))
    else:
        raise ValueError(f"Unsupported source: {source}")
    return GoogleServiceAccountKey(**json.loads(creds_str))


def upsert_service_account_key(
    service_account_key: GoogleServiceAccountKey, source: DocumentSource
) -> None:
    if source == DocumentSource.GOOGLE_DRIVE:
        get_kv_store().store(
            KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY,
            service_account_key.json(),
            encrypt=True,
        )
    elif source == DocumentSource.GMAIL:
        get_kv_store().store(
            KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
        )
    else:
        raise ValueError(f"Unsupported source: {source}")


def delete_service_account_key(source: DocumentSource) -> None:
    if source == DocumentSource.GOOGLE_DRIVE:
        get_kv_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)
    elif source == DocumentSource.GMAIL:
        get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
    else:
        raise ValueError(f"Unsupported source: {source}")


================================================
FILE: backend/onyx/connectors/google_utils/google_utils.py
================================================
import re
import socket
import time
from collections.abc import Callable
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from enum import Enum
from typing import Any

from googleapiclient.errors import HttpError  # type: ignore

from onyx.connectors.google_drive.models import GoogleDriveFileType
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()

_RATE_LIMIT_REASONS = {"userRateLimitExceeded", "rateLimitExceeded"}


def _is_rate_limit_error(error: HttpError) -> bool:
    """Google sometimes returns rate-limit errors as 403 with reason
    'userRateLimitExceeded' instead of 429. This helper detects both."""
    if error.resp.status == 429:
        return True
    if error.resp.status != 403:
        return False
    error_details = getattr(error, "error_details", None) or []
    for detail in error_details:
        if isinstance(detail, dict) and detail.get("reason") in _RATE_LIMIT_REASONS:
            return True
    return "userRateLimitExceeded" in str(error) or "rateLimitExceeded" in str(error)


# Google Drive APIs are quite flakey and may 500 for an
# extended period of time. This is now addressed by checkpointing.
#
# NOTE: We previously tried to combat this here by adding a very
# long retry period (~20 minutes of trying, one request a minute.)
# This is no longer necessary due to checkpointing.
add_retries = retry_builder(tries=5, max_delay=10)

NEXT_PAGE_TOKEN_KEY = "nextPageToken"
PAGE_TOKEN_KEY = "pageToken"
ORDER_BY_KEY = "orderBy"


# See https://developers.google.com/drive/api/reference/rest/v3/files/list for more
class GoogleFields(str, Enum):
    ID = "id"
    CREATED_TIME = "createdTime"
    MODIFIED_TIME = "modifiedTime"
    NAME = "name"
    SIZE = "size"
    PARENTS = "parents"


def _execute_with_retry(request: Any) -> Any:
    max_attempts = 6
    attempt = 1

    while attempt < max_attempts:
        # Note for reasons unknown, the Google API will sometimes return a 429
        # and even after waiting the retry period, it will return another 429.
        # It could be due to a few possibilities:
        # 1. Other things are also requesting from the Drive/Gmail API with the same key
        # 2. It's a rolling rate limit so the moment we get some amount of requests cleared, we hit it again very quickly
        # 3. The retry-after has a maximum and we've already hit the limit for the day
        # or it's something else...
        try:
            return request.execute()
        except HttpError as error:
            attempt += 1

            if _is_rate_limit_error(error):
                # Attempt to get 'Retry-After' from headers
                retry_after = error.resp.get("Retry-After")
                if retry_after:
                    sleep_time = int(retry_after)
                else:
                    # Extract 'Retry after' timestamp from error message
                    match = re.search(
                        r"Retry after (\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)",
                        str(error),
                    )
                    if match:
                        retry_after_timestamp = match.group(1)
                        retry_after_dt = datetime.strptime(
                            retry_after_timestamp, "%Y-%m-%dT%H:%M:%S.%fZ"
                        ).replace(tzinfo=timezone.utc)
                        current_time = datetime.now(timezone.utc)
                        sleep_time = max(
                            int((retry_after_dt - current_time).total_seconds()),
                            0,
                        )
                    else:
                        logger.error(
                            f"No Retry-After header or timestamp found in error message: {error}"
                        )
                        sleep_time = 60

                sleep_time += 3  # Add a buffer to be safe

                logger.info(
                    f"Rate limit exceeded. Attempt {attempt}/{max_attempts}. Sleeping for {sleep_time} seconds."
                )
                time.sleep(sleep_time)

            else:
                raise

    # If we've exhausted all attempts
    raise Exception(f"Failed to execute request after {max_attempts} attempts")


def get_file_owners(file: GoogleDriveFileType, primary_admin_email: str) -> list[str]:
    """
    Get the owners of a file if the attribute is present.
    """
    return [
        email
        for owner in file.get("owners", [])
        if (email := owner.get("emailAddress"))
        and email.split("@")[-1] == primary_admin_email.split("@")[-1]
    ]


def _execute_single_retrieval(
    retrieval_function: Callable,
    continue_on_404_or_403: bool = False,
    **request_kwargs: Any,
) -> GoogleDriveFileType:
    """Execute a single retrieval from Google Drive API"""
    try:
        results = retrieval_function(**request_kwargs).execute()
    except HttpError as e:
        if e.resp.status >= 500:
            results = add_retries(
                lambda: retrieval_function(**request_kwargs).execute()
            )()
        elif e.resp.status == 400:
            if (
                "pageToken" in request_kwargs
                and "Invalid Value" in str(e)
                and "pageToken" in str(e)
            ):
                logger.warning(
                    f"Invalid page token: {request_kwargs['pageToken']}, retrying from start of request"
                )
                request_kwargs.pop("pageToken")
                return _execute_single_retrieval(
                    retrieval_function,
                    continue_on_404_or_403,
                    **request_kwargs,
                )
            logger.error(f"Error executing request: {e}")
            raise e
        elif _is_rate_limit_error(e):
            results = _execute_with_retry(retrieval_function(**request_kwargs))
        elif e.resp.status == 404 or e.resp.status == 403:
            if continue_on_404_or_403:
                logger.debug(f"Error executing request: {e}")
                results = {}
            else:
                raise e
        else:
            logger.exception("Error executing request:")
            raise e
    except (TimeoutError, socket.timeout) as error:
        logger.warning(
            "Timed out executing Google API request; retrying with backoff. Details: %s",
            error,
        )
        results = add_retries(lambda: retrieval_function(**request_kwargs).execute())()

    return results


def execute_single_retrieval(
    retrieval_function: Callable,
    list_key: str | None = None,
    continue_on_404_or_403: bool = False,
    **request_kwargs: Any,
) -> Iterator[GoogleDriveFileType]:
    results = _execute_single_retrieval(
        retrieval_function,
        continue_on_404_or_403,
        **request_kwargs,
    )
    if list_key:
        for item in results.get(list_key, []):
            yield item
    else:
        yield results


# included for type purposes; caller should not need to address
# Nones unless max_num_pages is specified. Use
# execute_paginated_retrieval_with_max_pages instead if you want
# the early stop + yield None after max_num_pages behavior.
def execute_paginated_retrieval(
    retrieval_function: Callable,
    list_key: str | None = None,
    continue_on_404_or_403: bool = False,
    **kwargs: Any,
) -> Iterator[GoogleDriveFileType]:
    for item in _execute_paginated_retrieval(
        retrieval_function,
        list_key,
        continue_on_404_or_403,
        **kwargs,
    ):
        if not isinstance(item, str):
            yield item


def execute_paginated_retrieval_with_max_pages(
    retrieval_function: Callable,
    max_num_pages: int,
    list_key: str | None = None,
    continue_on_404_or_403: bool = False,
    **kwargs: Any,
) -> Iterator[GoogleDriveFileType | str]:
    yield from _execute_paginated_retrieval(
        retrieval_function,
        list_key,
        continue_on_404_or_403,
        max_num_pages=max_num_pages,
        **kwargs,
    )


def _execute_paginated_retrieval(
    retrieval_function: Callable,
    list_key: str | None = None,
    continue_on_404_or_403: bool = False,
    max_num_pages: int | None = None,
    **kwargs: Any,
) -> Iterator[GoogleDriveFileType | str]:
    """Execute a paginated retrieval from Google Drive API
    Args:
        retrieval_function: The specific list function to call (e.g., service.files().list)
        list_key: If specified, each object returned by the retrieval function
                  will be accessed at the specified key and yielded from.
        continue_on_404_or_403: If True, the retrieval will continue even if the request returns a 404 or 403 error.
        max_num_pages: If specified, the retrieval will stop after the specified number of pages and yield None.
        **kwargs: Arguments to pass to the list function
    """
    if "fields" not in kwargs or "nextPageToken" not in kwargs["fields"]:
        raise ValueError(
            "fields must contain nextPageToken for execute_paginated_retrieval"
        )
    next_page_token = kwargs.get(PAGE_TOKEN_KEY, "")
    num_pages = 0
    while next_page_token is not None:
        if max_num_pages is not None and num_pages >= max_num_pages:
            yield next_page_token
            return
        num_pages += 1
        request_kwargs = kwargs.copy()
        if next_page_token:
            request_kwargs[PAGE_TOKEN_KEY] = next_page_token
        results = _execute_single_retrieval(
            retrieval_function,
            continue_on_404_or_403,
            **request_kwargs,
        )

        next_page_token = results.get(NEXT_PAGE_TOKEN_KEY)
        if list_key:
            for item in results.get(list_key, []):
                yield item
        else:
            yield results


================================================
FILE: backend/onyx/connectors/google_utils/resources.py
================================================
from collections.abc import Callable
from typing import Any

from google.auth.exceptions import RefreshError
from google.oauth2.credentials import Credentials as OAuthCredentials
from google.oauth2.service_account import Credentials as ServiceAccountCredentials
from googleapiclient.discovery import build  # type: ignore[import-untyped]
from googleapiclient.discovery import Resource

from onyx.utils.logger import setup_logger

logger = setup_logger()


class GoogleDriveService(Resource):
    pass


class GoogleDocsService(Resource):
    pass


class AdminService(Resource):
    pass


class GmailService(Resource):
    pass


class RefreshableDriveObject:
    """
    Running Google drive service retrieval functions
    involves accessing methods of the service object (ie. files().list())
    which can raise a RefreshError if the access token is expired.
    This class is a wrapper that propagates the ability to refresh the access token
    and retry the final retrieval function until execute() is called.
    """

    def __init__(
        self,
        call_stack: Callable[[ServiceAccountCredentials | OAuthCredentials], Any],
        creds: ServiceAccountCredentials | OAuthCredentials,
        creds_getter: Callable[..., ServiceAccountCredentials | OAuthCredentials],
    ):
        self.call_stack = call_stack
        self.creds = creds
        self.creds_getter = creds_getter

    def __getattr__(self, name: str) -> Any:
        if name == "execute":
            return self.make_refreshable_execute()
        return RefreshableDriveObject(
            lambda creds: getattr(self.call_stack(creds), name),
            self.creds,
            self.creds_getter,
        )

    def __call__(self, *args: Any, **kwargs: Any) -> Any:
        return RefreshableDriveObject(
            lambda creds: self.call_stack(creds)(*args, **kwargs),
            self.creds,
            self.creds_getter,
        )

    def make_refreshable_execute(self) -> Callable:
        def execute(*args: Any, **kwargs: Any) -> Any:
            try:
                return self.call_stack(self.creds).execute(*args, **kwargs)
            except RefreshError as e:
                logger.warning(
                    f"RefreshError, going to attempt a creds refresh and retry: {e}"
                )
                # Refresh the access token
                self.creds = self.creds_getter()
                return self.call_stack(self.creds).execute(*args, **kwargs)

        return execute


def _get_google_service(
    service_name: str,
    service_version: str,
    creds: ServiceAccountCredentials | OAuthCredentials,
    user_email: str | None = None,
) -> GoogleDriveService | GoogleDocsService | AdminService | GmailService:
    service: Resource
    if isinstance(creds, ServiceAccountCredentials):
        # NOTE: https://developers.google.com/identity/protocols/oauth2/service-account#error-codes
        creds = creds.with_subject(user_email)
        service = build(service_name, service_version, credentials=creds)
    elif isinstance(creds, OAuthCredentials):
        service = build(service_name, service_version, credentials=creds)

    return service


def get_google_docs_service(
    creds: ServiceAccountCredentials | OAuthCredentials,
    user_email: str | None = None,
) -> GoogleDocsService:
    return _get_google_service("docs", "v1", creds, user_email)


def get_drive_service(
    creds: ServiceAccountCredentials | OAuthCredentials,
    user_email: str | None = None,
) -> GoogleDriveService:
    return _get_google_service("drive", "v3", creds, user_email)


def get_admin_service(
    creds: ServiceAccountCredentials | OAuthCredentials,
    user_email: str | None = None,
) -> AdminService:
    return _get_google_service("admin", "directory_v1", creds, user_email)


def get_gmail_service(
    creds: ServiceAccountCredentials | OAuthCredentials,
    user_email: str | None = None,
) -> GmailService:
    return _get_google_service("gmail", "v1", creds, user_email)


================================================
FILE: backend/onyx/connectors/google_utils/shared_constants.py
================================================
from enum import Enum as PyEnum

from onyx.configs.constants import DocumentSource

# NOTE: do not need https://www.googleapis.com/auth/documents.readonly
# this is counted under `/auth/drive.readonly`
GOOGLE_SCOPES = {
    DocumentSource.GOOGLE_DRIVE: [
        "https://www.googleapis.com/auth/drive.readonly",
        "https://www.googleapis.com/auth/drive.metadata.readonly",
        "https://www.googleapis.com/auth/admin.directory.group.readonly",
        "https://www.googleapis.com/auth/admin.directory.user.readonly",
    ],
    DocumentSource.GMAIL: [
        "https://www.googleapis.com/auth/gmail.readonly",
        "https://www.googleapis.com/auth/admin.directory.user.readonly",
        "https://www.googleapis.com/auth/admin.directory.group.readonly",
    ],
}

# This is the Oauth token
DB_CREDENTIALS_DICT_TOKEN_KEY = "google_tokens"
# This is the service account key
DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_service_account_key"
# The email saved for both auth types
DB_CREDENTIALS_PRIMARY_ADMIN_KEY = "google_primary_admin"

# https://developers.google.com/workspace/guides/create-credentials
# Internally defined authentication method type.
# The value must be one of "oauth_interactive" or "uploaded"
# Used to disambiguate whether credentials have already been created via
# certain methods and what actions we allow users to take
DB_CREDENTIALS_AUTHENTICATION_METHOD = "authentication_method"


class GoogleOAuthAuthenticationMethod(str, PyEnum):
    OAUTH_INTERACTIVE = "oauth_interactive"
    UPLOADED = "uploaded"


USER_FIELDS = "nextPageToken, users(primaryEmail)"

# Error message substrings
MISSING_SCOPES_ERROR_STR = "client not authorized for any of the scopes requested"

# Documentation and error messages
SCOPE_DOC_URL = "https://docs.onyx.app/admins/connectors/official/google_drive/overview"
ONYX_SCOPE_INSTRUCTIONS = (
    "You have upgraded Onyx without updating the Google Auth scopes. "
    f"Please refer to the documentation to learn how to update the scopes: {SCOPE_DOC_URL}"
)


# This is the maximum number of threads that can be retrieved at once
SLIM_BATCH_SIZE = 500


================================================
FILE: backend/onyx/connectors/guru/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/guru/connector.py
================================================
import json
from datetime import datetime
from datetime import timezone
from typing import Any

import requests

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.utils.logger import setup_logger


logger = setup_logger()

# Potential Improvements
# 1. Support fetching per collection via collection token (configured at connector creation)
GURU_API_BASE = "https://api.getguru.com/api/v1/"
GURU_QUERY_ENDPOINT = GURU_API_BASE + "search/query"
GURU_CARDS_URL = "https://app.getguru.com/card/"


def unixtime_to_guru_time_str(unix_time: SecondsSinceUnixEpoch) -> str:
    date_obj = datetime.fromtimestamp(unix_time, tz=timezone.utc)
    date_str = date_obj.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
    tz_str = date_obj.strftime("%z")
    return date_str + tz_str


class GuruConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
        guru_user: str | None = None,
        guru_user_token: str | None = None,
    ) -> None:
        self.batch_size = batch_size
        self.guru_user = guru_user
        self.guru_user_token = guru_user_token

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.guru_user = credentials["guru_user"]
        self.guru_user_token = credentials["guru_user_token"]
        return None

    def _process_cards(
        self, start_str: str | None = None, end_str: str | None = None
    ) -> GenerateDocumentsOutput:
        if self.guru_user is None or self.guru_user_token is None:
            raise ConnectorMissingCredentialError("Guru")

        doc_batch: list[Document | HierarchyNode] = []

        session = requests.Session()
        session.auth = (self.guru_user, self.guru_user_token)

        params: dict[str, str | int] = {"maxResults": self.batch_size}

        if start_str is not None and end_str is not None:
            params["q"] = f"lastModified >= {start_str} AND lastModified < {end_str}"

        current_url = GURU_QUERY_ENDPOINT  # This is how they handle pagination, a different url will be provided
        while True:
            response = session.get(current_url, params=params)
            response.raise_for_status()

            if response.status_code == 204:
                break

            cards = json.loads(response.text)
            for card in cards:
                title = card["preferredPhrase"]
                link = GURU_CARDS_URL + card["slug"]
                content_text = parse_html_page_basic(card["content"])
                last_updated = time_str_to_utc(card["lastModified"])
                last_verified = (
                    time_str_to_utc(card.get("lastVerified"))
                    if card.get("lastVerified")
                    else None
                )

                # For Onyx, we decay document score overtime, either last_updated or
                # last_verified is a good enough signal for the document's recency
                latest_time = (
                    max(last_verified, last_updated) if last_verified else last_updated
                )

                metadata_dict: dict[str, str | list[str]] = {}
                tags = [tag.get("value") for tag in card.get("tags", [])]
                if tags:
                    metadata_dict["tags"] = tags

                boards = [board.get("title") for board in card.get("boards", [])]
                if boards:
                    # In UI it's called Folders
                    metadata_dict["folders"] = boards

                collection = card.get("collection", {})
                if collection:
                    metadata_dict["collection_name"] = collection.get("name", "")

                owner = card.get("owner", {})
                author = None
                if owner:
                    author = BasicExpertInfo(
                        email=owner.get("email"),
                        first_name=owner.get("firstName"),
                        last_name=owner.get("lastName"),
                    )

                doc_batch.append(
                    Document(
                        id=card["id"],
                        sections=[TextSection(link=link, text=content_text)],
                        source=DocumentSource.GURU,
                        semantic_identifier=title,
                        doc_updated_at=latest_time,
                        primary_owners=[author] if author is not None else None,
                        # Can add verifies and commenters later
                        metadata=metadata_dict,
                    )
                )

                if len(doc_batch) >= self.batch_size:
                    yield doc_batch
                    doc_batch = []

            if not hasattr(response, "links") or not response.links:
                break
            current_url = response.links["next-page"]["url"]

        if doc_batch:
            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._process_cards()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_time = unixtime_to_guru_time_str(start)
        end_time = unixtime_to_guru_time_str(end)

        return self._process_cards(start_time, end_time)


if __name__ == "__main__":
    import os

    connector = GuruConnector()
    connector.load_credentials(
        {
            "guru_user": os.environ["GURU_USER"],
            "guru_user_token": os.environ["GURU_USER_TOKEN"],
        }
    )

    latest_docs = connector.load_from_state()
    print(next(latest_docs))


================================================
FILE: backend/onyx/connectors/highspot/__init__.py
================================================
"""
Highspot connector package for Onyx.
Enables integration with Highspot's knowledge base.
"""


================================================
FILE: backend/onyx/connectors/highspot/client.py
================================================
import base64
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from urllib.parse import urljoin

import requests
from requests.adapters import HTTPAdapter
from requests.exceptions import HTTPError
from requests.exceptions import RequestException
from requests.exceptions import Timeout
from urllib3.util.retry import Retry

from onyx.utils.logger import setup_logger

logger = setup_logger()
PAGE_SIZE = 100


class HighspotClientError(Exception):
    """Base exception for Highspot API client errors."""

    def __init__(self, message: str, status_code: Optional[int] = None):
        self.message = message
        self.status_code = status_code
        super().__init__(self.message)


class HighspotAuthenticationError(HighspotClientError):
    """Exception raised for authentication errors."""


class HighspotRateLimitError(HighspotClientError):
    """Exception raised when rate limit is exceeded."""

    def __init__(self, message: str, retry_after: Optional[str] = None):
        self.retry_after = retry_after
        super().__init__(message)


class HighspotClient:
    """
    Client for interacting with the Highspot API.

    Uses basic authentication with provided key (username) and secret (password).
    Implements retry logic, error handling, and connection pooling.
    """

    BASE_URL = "https://api-su2.highspot.com/v1.0/"

    def __init__(
        self,
        key: str,
        secret: str,
        base_url: str = BASE_URL,
        timeout: int = 30,
        max_retries: int = 3,
        backoff_factor: float = 0.5,
        status_forcelist: Optional[List[int]] = None,
    ):
        """
        Initialize the Highspot API client.

        Args:
            key: API key (used as username)
            secret: API secret (used as password)
            base_url: Base URL for the Highspot API
            timeout: Request timeout in seconds
            max_retries: Maximum number of retries for failed requests
            backoff_factor: Backoff factor for retries
            status_forcelist: HTTP status codes to retry on
        """
        if not key or not secret:
            raise ValueError("API key and secret are required")

        self.key = key
        self.secret = secret
        self.base_url = base_url.rstrip("/") + "/"
        self.timeout = timeout

        # Set up session with retry logic
        self.session = requests.Session()
        retry_strategy = Retry(
            total=max_retries,
            backoff_factor=backoff_factor,
            status_forcelist=status_forcelist or [429, 500, 502, 503, 504],
            allowed_methods=["GET", "POST", "PUT", "DELETE"],
        )
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)

        # Set up authentication
        self._setup_auth()

    def _setup_auth(self) -> None:
        """Set up basic authentication for the session."""
        auth = f"{self.key}:{self.secret}"
        encoded_auth = base64.b64encode(auth.encode()).decode()
        self.session.headers.update(
            {
                "Authorization": f"Basic {encoded_auth}",
                "Content-Type": "application/json",
                "Accept": "application/json",
            }
        )

    def _make_request(
        self,
        method: str,
        endpoint: str,
        params: Optional[Dict[str, Any]] = None,
        data: Optional[Dict[str, Any]] = None,
        json_data: Optional[Dict[str, Any]] = None,
        headers: Optional[Dict[str, str]] = None,
    ) -> Dict[str, Any]:
        """
        Make a request to the Highspot API.

        Args:
            method: HTTP method (GET, POST, etc.)
            endpoint: API endpoint
            params: URL parameters
            data: Form data
            json_data: JSON data
            headers: Additional headers

        Returns:
            API response as a dictionary

        Raises:
            HighspotClientError: On API errors
            HighspotAuthenticationError: On authentication errors
            HighspotRateLimitError: On rate limiting
            requests.exceptions.RequestException: On request failures
        """
        url = urljoin(self.base_url, endpoint)
        request_headers = {}
        if headers:
            request_headers.update(headers)

        try:
            logger.debug(f"Making {method} request to {url}")
            response = self.session.request(
                method=method,
                url=url,
                params=params,
                data=data,
                json=json_data,
                headers=request_headers,
                timeout=self.timeout,
            )
            response.raise_for_status()

            if response.content and response.content.strip():
                return response.json()
            return {}

        except HTTPError as e:
            status_code = e.response.status_code
            error_msg = str(e)

            try:
                error_data = e.response.json()
                if isinstance(error_data, dict):
                    error_msg = error_data.get("message", str(e))
            except (ValueError, KeyError):
                pass

            if status_code == 401:
                raise HighspotAuthenticationError(f"Authentication failed: {error_msg}")
            elif status_code == 429:
                retry_after = e.response.headers.get("Retry-After")
                raise HighspotRateLimitError(
                    f"Rate limit exceeded: {error_msg}", retry_after=retry_after
                )
            else:
                raise HighspotClientError(
                    f"API error {status_code}: {error_msg}", status_code=status_code
                )

        except Timeout:
            raise HighspotClientError("Request timed out")
        except RequestException as e:
            raise HighspotClientError(f"Request failed: {str(e)}")

    def get_spots(self) -> List[Dict[str, Any]]:
        """
        Get all available spots, paginated.

        Returns:
            List of spots with their names and IDs
        """
        all_spots = []
        has_more = True
        current_offset = 0

        while has_more:
            params = {"right": "view", "start": current_offset, "limit": PAGE_SIZE}
            response = self._make_request("GET", "spots", params=params)
            found_spots = response.get("collection", [])
            logger.info(f"Received {len(found_spots)} spots at offset {current_offset}")
            all_spots.extend(found_spots)
            if len(found_spots) < PAGE_SIZE:
                has_more = False
            else:
                current_offset += PAGE_SIZE
        logger.info(f"Total spots retrieved: {len(all_spots)}")
        return all_spots

    def get_spot(self, spot_id: str) -> Dict[str, Any]:
        """
        Get details for a specific spot.

        Args:
            spot_id: ID of the spot

        Returns:
            Spot details
        """
        if not spot_id:
            raise ValueError("spot_id is required")
        return self._make_request("GET", f"spots/{spot_id}")

    def get_spot_items(
        self, spot_id: str, offset: int = 0, page_size: int = PAGE_SIZE
    ) -> Dict[str, Any]:
        """
        Get items in a specific spot.

        Args:
            spot_id: ID of the spot
            offset: offset number
            page_size: Number of items per page

        Returns:
            Items in the spot
        """
        if not spot_id:
            raise ValueError("spot_id is required")

        params = {"spot": spot_id, "start": offset, "limit": page_size}
        return self._make_request("GET", "items", params=params)

    def get_item(self, item_id: str) -> Dict[str, Any]:
        """
        Get details for a specific item.

        Args:
            item_id: ID of the item

        Returns:
            Item details
        """
        if not item_id:
            raise ValueError("item_id is required")
        return self._make_request("GET", f"items/{item_id}")

    def get_item_content(self, item_id: str) -> bytes:
        """
        Get the raw content of an item.

        Args:
            item_id: ID of the item

        Returns:
            Raw content bytes
        """
        if not item_id:
            raise ValueError("item_id is required")

        url = urljoin(self.base_url, f"items/{item_id}/content")
        response = self.session.get(url, timeout=self.timeout)
        response.raise_for_status()
        return response.content

    def health_check(self) -> bool:
        """
        Check if the API is accessible and credentials are valid.

        Returns:
            True if API is accessible, False otherwise
        """
        try:
            self._make_request("GET", "spots", params={"limit": 1})
            return True
        except (HighspotClientError, HighspotAuthenticationError):
            return False


================================================
FILE: backend/onyx/connectors/highspot/connector.py
================================================
import os
from datetime import datetime
from io import BytesIO
from typing import Any
from typing import Dict
from typing import List
from typing import Optional

from pydantic import BaseModel

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.highspot.client import HighspotClient
from onyx.connectors.highspot.client import HighspotClientError
from onyx.connectors.highspot.utils import scrape_url_content
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger

logger = setup_logger()
_SLIM_BATCH_SIZE = 1000


class HighspotSpot(BaseModel):
    id: str
    name: str


class HighspotConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
    """
    Connector for loading data from Highspot.

    Retrieves content from specified spots using the Highspot API.
    If no spots are specified, retrieves content from all available spots.
    """

    def __init__(
        self,
        spot_names: list[str] | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
    ):
        """
        Initialize the Highspot connector.

        Args:
            spot_names: List of spot names to retrieve content from (if empty, gets all spots)
            batch_size: Number of items to retrieve in each batch
        """
        self.spot_names = spot_names or []
        self.batch_size = batch_size

        self._client: Optional[HighspotClient] = None
        self.highspot_url: Optional[str] = None
        self.key: Optional[str] = None
        self.secret: Optional[str] = None

    @property
    def client(self) -> HighspotClient:
        if self._client is None:
            if not self.key or not self.secret:
                raise ConnectorMissingCredentialError("Highspot")
            # Ensure highspot_url is a string, use default if None
            base_url = (
                self.highspot_url
                if self.highspot_url is not None
                else HighspotClient.BASE_URL
            )
            self._client = HighspotClient(self.key, self.secret, base_url=base_url)
        return self._client

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        logger.info("Loading Highspot credentials")
        self.highspot_url = credentials.get("highspot_url")
        self.key = credentials.get("highspot_key")
        self.secret = credentials.get("highspot_secret")
        return None

    def _fetch_spots(self) -> list[HighspotSpot]:
        """
        Populate the spot ID map with all available spots.
        Keys are stored as lowercase for case-insensitive lookups.
        """
        return [
            HighspotSpot(id=spot["id"], name=spot["title"])
            for spot in self.client.get_spots()
        ]

    def _fetch_spots_to_process(self) -> list[HighspotSpot]:
        """
        Fetch spots to process based on the configured spot names.
        """
        spots = self._fetch_spots()
        if not spots:
            raise ValueError("No spots found in Highspot.")

        if self.spot_names:
            lower_spot_names = [name.lower() for name in self.spot_names]
            spots_to_process = [
                spot for spot in spots if spot.name.lower() in lower_spot_names
            ]
            if not spots_to_process:
                raise ValueError(
                    f"No valid spots found in Highspot. Found {spots} but {self.spot_names} were requested."
                )
            return spots_to_process

        return spots

    def load_from_state(self) -> GenerateDocumentsOutput:
        """
        Load content from configured spots in Highspot.
        If no spots are configured, loads from all spots.

        Yields:
            Batches of Document objects
        """
        return self.poll_source(None, None)

    def poll_source(
        self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
    ) -> GenerateDocumentsOutput:
        """
        Poll Highspot for content updated since the start time.

        Args:
            start: Start time as seconds since Unix epoch
            end: End time as seconds since Unix epoch

        Yields:
            Batches of Document objects
        """
        spots_to_process = self._fetch_spots_to_process()

        doc_batch: list[Document | HierarchyNode] = []
        try:
            for spot in spots_to_process:
                try:
                    offset = 0
                    has_more = True

                    while has_more:
                        logger.info(
                            f"Retrieving items from spot {spot.name}, offset {offset}"
                        )
                        response = self.client.get_spot_items(
                            spot_id=spot.id, offset=offset, page_size=self.batch_size
                        )
                        items = response.get("collection", [])
                        logger.info(
                            f"Received {len(items)} items from spot {spot.name}"
                        )
                        if not items:
                            has_more = False
                            continue

                        for item in items:
                            try:
                                item_id = item.get("id")
                                if not item_id:
                                    logger.warning("Item without ID found, skipping")
                                    continue

                                item_details = self.client.get_item(item_id)
                                if not item_details:
                                    logger.warning(
                                        f"Item {item_id} details not found, skipping"
                                    )
                                    continue
                                # Apply time filter if specified
                                if start or end:
                                    updated_at = item_details.get("date_updated")
                                    if updated_at:
                                        # Convert to datetime for comparison
                                        try:
                                            updated_time = datetime.fromisoformat(
                                                updated_at.replace("Z", "+00:00")
                                            )
                                            if (
                                                start
                                                and updated_time.timestamp() < start
                                            ) or (
                                                end and updated_time.timestamp() > end
                                            ):
                                                continue
                                        except (ValueError, TypeError):
                                            # Skip if date cannot be parsed
                                            logger.warning(
                                                f"Invalid date format for item {item_id}: {updated_at}"
                                            )
                                            continue

                                content = self._get_item_content(item_details)

                                title = item_details.get("title", "")

                                doc_batch.append(
                                    Document(
                                        id=f"HIGHSPOT_{item_id}",
                                        sections=[
                                            TextSection(
                                                link=item_details.get(
                                                    "url",
                                                    f"https://www.highspot.com/items/{item_id}",
                                                ),
                                                text=content,
                                            )
                                        ],
                                        source=DocumentSource.HIGHSPOT,
                                        semantic_identifier=title,
                                        metadata={
                                            "spot_name": spot.name,
                                            "type": item_details.get(
                                                "content_type", ""
                                            ),
                                            "created_at": item_details.get(
                                                "date_added", ""
                                            ),
                                            "author": item_details.get("author", ""),
                                            "language": item_details.get(
                                                "language", ""
                                            ),
                                            "can_download": str(
                                                item_details.get("can_download", False)
                                            ),
                                        },
                                        doc_updated_at=item_details.get("date_updated"),
                                    )
                                )

                                if len(doc_batch) >= self.batch_size:
                                    yield doc_batch
                                    doc_batch = []

                            except HighspotClientError as e:
                                item_id = "ID" if not item_id else item_id
                                logger.error(
                                    f"Error retrieving item {item_id}: {str(e)}"
                                )
                            except Exception as e:
                                item_id = "ID" if not item_id else item_id
                                logger.error(
                                    f"Unexpected error for item {item_id}: {str(e)}"
                                )

                        has_more = len(items) >= self.batch_size
                        offset += self.batch_size

                except (HighspotClientError, ValueError) as e:
                    logger.error(f"Error processing spot {spot.name}: {str(e)}")
                    raise
                except Exception as e:
                    logger.error(
                        f"Unexpected error processing spot {spot.name}: {str(e)}"
                    )
                    raise

        except Exception as e:
            logger.error(f"Error in Highspot connector: {str(e)}")
            raise

        if doc_batch:
            yield doc_batch

    def _get_item_content(self, item_details: Dict[str, Any]) -> str:
        """
        Get the text content of an item.

        Args:
            item_details: Item details from the API

        Returns:
            Text content of the item
        """
        item_id = item_details.get("id", "")
        content_name = item_details.get("content_name", "")
        is_valid_format = content_name and "." in content_name
        file_extension = content_name.split(".")[-1].lower() if is_valid_format else ""
        file_extension = "." + file_extension if file_extension else ""
        can_download = item_details.get("can_download", False)
        content_type = item_details.get("content_type", "")

        # Extract title and description once at the beginning
        title, description = self._extract_title_and_description(item_details)
        default_content = f"{title}\n{description}"
        logger.info(
            f"Processing item {item_id} with extension {file_extension} and file name {content_name}"
        )

        try:
            if content_type == "WebLink":
                url = item_details.get("url")
                if not url:
                    return default_content
                content = scrape_url_content(url, True)
                return content if content else default_content

            elif (
                is_valid_format
                and file_extension in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS
                and can_download
            ):
                content_response = self.client.get_item_content(item_id)
                # Process and extract text from binary content based on type
                if content_response:
                    text_content = extract_file_text(
                        BytesIO(content_response), content_name, False
                    )
                    return text_content if text_content else default_content
                return default_content

            else:
                logger.warning(
                    f"Item {item_id} has unsupported format: {file_extension}"
                )
                return default_content

        except HighspotClientError as e:
            error_context = f"item {item_id}" if item_id else "(item id not found)"
            logger.warning(f"Could not retrieve content for {error_context}: {str(e)}")
            return default_content
        except ValueError as e:
            error_context = f"item {item_id}" if item_id else "(item id not found)"
            logger.error(f"Value error for {error_context}: {str(e)}")
            return default_content

        except Exception as e:
            error_context = f"item {item_id}" if item_id else "(item id not found)"
            logger.error(
                f"Unexpected error retrieving content for {error_context}: {str(e)}"
            )
            return default_content

    def _extract_title_and_description(
        self, item_details: Dict[str, Any]
    ) -> tuple[str, str]:
        """
        Extract the title and description from item details.

        Args:
            item_details: Item details from the API

        Returns:
            Tuple of title and description
        """
        title = item_details.get("title", "")
        description = item_details.get("description", "")
        return title, description

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002
    ) -> GenerateSlimDocumentOutput:
        """
        Retrieve all document IDs from the configured spots.
        If no spots are configured, retrieves from all spots.

        Args:
            start: Optional start time filter
            end: Optional end time filter
            callback: Optional indexing heartbeat callback

        Yields:
            Batches of SlimDocument objects
        """
        spots_to_process = self._fetch_spots_to_process()

        slim_doc_batch: list[SlimDocument | HierarchyNode] = []
        try:
            for spot in spots_to_process:
                try:
                    offset = 0
                    has_more = True

                    while has_more:
                        logger.info(
                            f"Retrieving slim documents from spot {spot.name}, offset {offset}"
                        )
                        response = self.client.get_spot_items(
                            spot_id=spot.id, offset=offset, page_size=self.batch_size
                        )

                        items = response.get("collection", [])
                        if not items:
                            has_more = False
                            continue

                        for item in items:
                            item_id = item.get("id")
                            if not item_id:
                                logger.warning("Item without ID found, skipping")
                                continue

                            slim_doc_batch.append(
                                SlimDocument(id=f"HIGHSPOT_{item_id}")
                            )

                            if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
                                yield slim_doc_batch
                                slim_doc_batch = []

                        has_more = len(items) >= self.batch_size
                        offset += self.batch_size

                except (HighspotClientError, ValueError):
                    logger.exception(
                        f"Error retrieving slim documents from spot {spot.name}"
                    )
                    raise

            if slim_doc_batch:
                yield slim_doc_batch
        except Exception:
            logger.exception("Error in Highspot Slim Connector")
            raise

    def validate_credentials(self) -> bool:
        """
        Validate that the provided credentials can access the Highspot API.

        Returns:
            True if credentials are valid, False otherwise
        """
        try:
            return self.client.health_check()
        except Exception as e:
            logger.error(f"Failed to validate credentials: {str(e)}")
            return False


if __name__ == "__main__":
    spot_names: List[str] = []
    connector = HighspotConnector(spot_names)
    credentials = {
        "highspot_key": os.environ.get("HIGHSPOT_KEY"),
        "highspot_secret": os.environ.get("HIGHSPOT_SECRET"),
    }
    connector.load_credentials(credentials=credentials)
    for doc in connector.load_from_state():
        print(doc)


================================================
FILE: backend/onyx/connectors/highspot/utils.py
================================================
from typing import Optional
from urllib.parse import urlparse

from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright

from onyx.file_processing.html_utils import web_html_cleanup
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Constants
WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20
JAVASCRIPT_DISABLED_MESSAGE = "You have JavaScript disabled in your browser"
DEFAULT_TIMEOUT = 60000  # 60 seconds


def scrape_url_content(
    url: str, scroll_before_scraping: bool = False, timeout_ms: int = DEFAULT_TIMEOUT
) -> Optional[str]:
    """
    Scrapes content from a given URL and returns the cleaned text.

    Args:
        url: The URL to scrape
        scroll_before_scraping: Whether to scroll through the page to load lazy content
        timeout_ms: Timeout in milliseconds for page navigation and loading

    Returns:
        The cleaned text content of the page or None if scraping fails
    """
    playwright = None
    browser = None
    try:
        validate_url(url)
        playwright = sync_playwright().start()
        browser = playwright.chromium.launch(headless=True)
        context = browser.new_context()
        page = context.new_page()

        logger.info(f"Navigating to URL: {url}")
        try:
            page.goto(url, timeout=timeout_ms)
        except Exception as e:
            logger.error(f"Failed to navigate to {url}: {str(e)}")
            return None

        if scroll_before_scraping:
            logger.debug("Scrolling page to load lazy content")
            scroll_attempts = 0
            previous_height = page.evaluate("document.body.scrollHeight")
            while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS:
                page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
                try:
                    page.wait_for_load_state("networkidle", timeout=timeout_ms)
                except Exception as e:
                    logger.warning(f"Network idle wait timed out: {str(e)}")
                    break

                new_height = page.evaluate("document.body.scrollHeight")
                if new_height == previous_height:
                    break
                previous_height = new_height
                scroll_attempts += 1

        content = page.content()
        soup = BeautifulSoup(content, "html.parser")

        parsed_html = web_html_cleanup(soup)

        if JAVASCRIPT_DISABLED_MESSAGE in parsed_html.cleaned_text:
            logger.debug("JavaScript disabled message detected, checking iframes")
            try:
                iframe_count = page.frame_locator("iframe").locator("html").count()
                if iframe_count > 0:
                    iframe_texts = (
                        page.frame_locator("iframe").locator("html").all_inner_texts()
                    )
                    iframe_content = "\n".join(iframe_texts)

                    if len(parsed_html.cleaned_text) < 700:
                        parsed_html.cleaned_text = iframe_content
                    else:
                        parsed_html.cleaned_text += "\n" + iframe_content
            except Exception as e:
                logger.warning(f"Error processing iframes: {str(e)}")

        return parsed_html.cleaned_text

    except Exception as e:
        logger.error(f"Error scraping URL {url}: {str(e)}")
        return None

    finally:
        if browser:
            try:
                browser.close()
            except Exception as e:
                logger.debug(f"Error closing browser: {str(e)}")
        if playwright:
            try:
                playwright.stop()
            except Exception as e:
                logger.debug(f"Error stopping playwright: {str(e)}")


def validate_url(url: str) -> None:
    """
    Validates that a URL is properly formatted.

    Args:
        url: The URL to validate

    Raises:
        ValueError: If URL is not valid
    """
    parse = urlparse(url)
    if parse.scheme != "http" and parse.scheme != "https":
        raise ValueError("URL must be of scheme https?://")

    if not parse.hostname:
        raise ValueError("URL must include a hostname")


================================================
FILE: backend/onyx/connectors/hubspot/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/hubspot/connector.py
================================================
import re
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast
from typing import TypeVar

import requests
from hubspot import HubSpot  # type: ignore

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.hubspot.rate_limit import HubSpotRateLimiter
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger

HUBSPOT_BASE_URL = "https://app.hubspot.com"
HUBSPOT_API_URL = "https://api.hubapi.com/integrations/v1/me"

AVAILABLE_OBJECT_TYPES = {"tickets", "companies", "deals", "contacts"}

HUBSPOT_PAGE_SIZE = 100

T = TypeVar("T")

logger = setup_logger()


class HubSpotConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
        access_token: str | None = None,
        object_types: list[str] | None = None,
    ) -> None:
        self.batch_size = batch_size
        self._access_token = access_token
        self._portal_id: str | None = None
        self._rate_limiter = HubSpotRateLimiter()

        # Set object types to fetch, default to all available types
        if object_types is None:
            self.object_types = AVAILABLE_OBJECT_TYPES.copy()
        else:
            object_types_set = set(object_types)

            # Validate provided object types
            invalid_types = object_types_set - AVAILABLE_OBJECT_TYPES
            if invalid_types:
                raise ValueError(
                    f"Invalid object types: {invalid_types}. Available types: {AVAILABLE_OBJECT_TYPES}"
                )
            self.object_types = object_types_set.copy()

    @property
    def access_token(self) -> str:
        """Get the access token, raising an exception if not set."""
        if self._access_token is None:
            raise ConnectorMissingCredentialError("HubSpot access token not set")
        return self._access_token

    @access_token.setter
    def access_token(self, value: str | None) -> None:
        """Set the access token."""
        self._access_token = value

    @property
    def portal_id(self) -> str:
        """Get the portal ID, raising an exception if not set."""
        if self._portal_id is None:
            raise ConnectorMissingCredentialError("HubSpot portal ID not set")
        return self._portal_id

    @portal_id.setter
    def portal_id(self, value: str | None) -> None:
        """Set the portal ID."""
        self._portal_id = value

    def _call_hubspot(self, func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
        return self._rate_limiter.call(func, *args, **kwargs)

    def _paginated_results(
        self,
        fetch_page: Callable[..., Any],
        **kwargs: Any,
    ) -> Generator[Any, None, None]:
        base_kwargs = dict(kwargs)
        base_kwargs.setdefault("limit", HUBSPOT_PAGE_SIZE)

        after: str | None = None
        while True:
            page_kwargs = base_kwargs.copy()
            if after is not None:
                page_kwargs["after"] = after

            page = self._call_hubspot(fetch_page, **page_kwargs)
            results = getattr(page, "results", [])
            for result in results:
                yield result

            paging = getattr(page, "paging", None)
            next_page = getattr(paging, "next", None) if paging else None
            if next_page is None:
                break

            after = getattr(next_page, "after", None)
            if after is None:
                break

    def _clean_html_content(self, html_content: str) -> str:
        """Clean HTML content and extract raw text"""
        if not html_content:
            return ""

        # Remove HTML tags using regex
        clean_text = re.sub(r"<[^>]+>", "", html_content)

        # Decode common HTML entities
        clean_text = clean_text.replace("&nbsp;", " ")
        clean_text = clean_text.replace("&amp;", "&")
        clean_text = clean_text.replace("&lt;", "<")
        clean_text = clean_text.replace("&gt;", ">")
        clean_text = clean_text.replace("&quot;", '"')
        clean_text = clean_text.replace("&#39;", "'")

        # Clean up whitespace
        clean_text = " ".join(clean_text.split())

        return clean_text.strip()

    def get_portal_id(self) -> str:
        headers = {
            "Authorization": f"Bearer {self.access_token}",
            "Content-Type": "application/json",
        }

        response = requests.get(HUBSPOT_API_URL, headers=headers)
        if response.status_code != 200:
            raise Exception("Error fetching portal ID")

        data = response.json()
        return str(data["portalId"])

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.access_token = cast(str, credentials["hubspot_access_token"])
        self.portal_id = self.get_portal_id()
        return None

    def _get_object_url(self, object_type: str, object_id: str) -> str:
        """Generate HubSpot URL for different object types"""
        if object_type == "tickets":
            return (
                f"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/record/0-5/{object_id}"
            )
        elif object_type == "companies":
            return (
                f"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/record/0-2/{object_id}"
            )
        elif object_type == "deals":
            return (
                f"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/record/0-3/{object_id}"
            )
        elif object_type == "contacts":
            return (
                f"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/record/0-1/{object_id}"
            )
        elif object_type == "notes":
            return (
                f"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/objects/0-4/{object_id}"
            )
        else:
            return f"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/{object_type}/{object_id}"

    def _get_associated_objects(
        self,
        api_client: HubSpot,
        object_id: str,
        from_object_type: str,
        to_object_type: str,
    ) -> list[dict[str, Any]]:
        """Get associated objects for a given object"""
        try:
            associations_iter = self._paginated_results(
                api_client.crm.associations.v4.basic_api.get_page,
                object_type=from_object_type,
                object_id=object_id,
                to_object_type=to_object_type,
            )

            object_ids = [assoc.to_object_id for assoc in associations_iter]

            associated_objects: list[dict[str, Any]] = []

            if to_object_type == "contacts":
                for obj_id in object_ids:
                    try:
                        obj = self._call_hubspot(
                            api_client.crm.contacts.basic_api.get_by_id,
                            contact_id=obj_id,
                            properties=[
                                "firstname",
                                "lastname",
                                "email",
                                "company",
                                "jobtitle",
                            ],
                        )
                        associated_objects.append(obj.to_dict())
                    except Exception as e:
                        logger.warning(f"Failed to fetch contact {obj_id}: {e}")

            elif to_object_type == "companies":
                for obj_id in object_ids:
                    try:
                        obj = self._call_hubspot(
                            api_client.crm.companies.basic_api.get_by_id,
                            company_id=obj_id,
                            properties=[
                                "name",
                                "domain",
                                "industry",
                                "city",
                                "state",
                            ],
                        )
                        associated_objects.append(obj.to_dict())
                    except Exception as e:
                        logger.warning(f"Failed to fetch company {obj_id}: {e}")

            elif to_object_type == "deals":
                for obj_id in object_ids:
                    try:
                        obj = self._call_hubspot(
                            api_client.crm.deals.basic_api.get_by_id,
                            deal_id=obj_id,
                            properties=[
                                "dealname",
                                "amount",
                                "dealstage",
                                "closedate",
                                "pipeline",
                            ],
                        )
                        associated_objects.append(obj.to_dict())
                    except Exception as e:
                        logger.warning(f"Failed to fetch deal {obj_id}: {e}")

            elif to_object_type == "tickets":
                for obj_id in object_ids:
                    try:
                        obj = self._call_hubspot(
                            api_client.crm.tickets.basic_api.get_by_id,
                            ticket_id=obj_id,
                            properties=["subject", "content", "hs_ticket_priority"],
                        )
                        associated_objects.append(obj.to_dict())
                    except Exception as e:
                        logger.warning(f"Failed to fetch ticket {obj_id}: {e}")

            return associated_objects

        except Exception as e:
            logger.warning(
                f"Failed to get associations from {from_object_type} to {to_object_type}: {e}"
            )
            return []

    def _get_associated_notes(
        self,
        api_client: HubSpot,
        object_id: str,
        object_type: str,
    ) -> list[dict[str, Any]]:
        """Get notes associated with a given object"""
        try:
            associations_iter = self._paginated_results(
                api_client.crm.associations.v4.basic_api.get_page,
                object_type=object_type,
                object_id=object_id,
                to_object_type="notes",
            )

            note_ids = [assoc.to_object_id for assoc in associations_iter]

            associated_notes = []

            for note_id in note_ids:
                try:
                    # Notes are engagements in HubSpot, use the engagements API
                    note = self._call_hubspot(
                        api_client.crm.objects.notes.basic_api.get_by_id,
                        note_id=note_id,
                        properties=[
                            "hs_note_body",
                            "hs_timestamp",
                            "hs_created_by",
                            "hubspot_owner_id",
                        ],
                    )
                    associated_notes.append(note.to_dict())
                except Exception as e:
                    logger.warning(f"Failed to fetch note {note_id}: {e}")

            return associated_notes

        except Exception as e:
            logger.warning(f"Failed to get notes for {object_type} {object_id}: {e}")
            return []

    def _create_object_section(
        self, obj: dict[str, Any], object_type: str
    ) -> TextSection:
        """Create a TextSection for an associated object"""
        obj_id = obj.get("id", "")
        properties = obj.get("properties", {})

        if object_type == "contacts":
            name_parts = []
            if properties.get("firstname"):
                name_parts.append(properties["firstname"])
            if properties.get("lastname"):
                name_parts.append(properties["lastname"])

            if name_parts:
                name = " ".join(name_parts)
            elif properties.get("email"):
                # Use email as fallback if no first/last name
                name = properties["email"]
            else:
                name = "Unknown Contact"

            content_parts = [f"Contact: {name}"]
            if properties.get("email"):
                content_parts.append(f"Email: {properties['email']}")
            if properties.get("company"):
                content_parts.append(f"Company: {properties['company']}")
            if properties.get("jobtitle"):
                content_parts.append(f"Job Title: {properties['jobtitle']}")

        elif object_type == "companies":
            name = properties.get("name", "Unknown Company")
            content_parts = [f"Company: {name}"]
            if properties.get("domain"):
                content_parts.append(f"Domain: {properties['domain']}")
            if properties.get("industry"):
                content_parts.append(f"Industry: {properties['industry']}")
            if properties.get("city") and properties.get("state"):
                content_parts.append(
                    f"Location: {properties['city']}, {properties['state']}"
                )

        elif object_type == "deals":
            name = properties.get("dealname", "Unknown Deal")
            content_parts = [f"Deal: {name}"]
            if properties.get("amount"):
                content_parts.append(f"Amount: ${properties['amount']}")
            if properties.get("dealstage"):
                content_parts.append(f"Stage: {properties['dealstage']}")
            if properties.get("closedate"):
                content_parts.append(f"Close Date: {properties['closedate']}")
            if properties.get("pipeline"):
                content_parts.append(f"Pipeline: {properties['pipeline']}")

        elif object_type == "tickets":
            name = properties.get("subject", "Unknown Ticket")
            content_parts = [f"Ticket: {name}"]
            if properties.get("content"):
                content_parts.append(f"Content: {properties['content']}")
            if properties.get("hs_ticket_priority"):
                content_parts.append(f"Priority: {properties['hs_ticket_priority']}")
        elif object_type == "notes":
            # Notes have a body property that contains the note content
            body = properties.get("hs_note_body", "")
            timestamp = properties.get("hs_timestamp", "")

            # Clean HTML content to get raw text
            clean_body = self._clean_html_content(body)

            # Use full content, not truncated
            content_parts = [f"Note: {clean_body}"]
            if timestamp:
                content_parts.append(f"Created: {timestamp}")
        else:
            content_parts = [f"{object_type.capitalize()}: {obj_id}"]

        content = "\n".join(content_parts)
        link = self._get_object_url(object_type, obj_id)

        return TextSection(link=link, text=content)

    def _process_tickets(
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        api_client = HubSpot(access_token=self.access_token)

        tickets_iter = self._paginated_results(
            api_client.crm.tickets.basic_api.get_page,
            properties=[
                "subject",
                "content",
                "hs_ticket_priority",
                "createdate",
                "hs_lastmodifieddate",
            ],
            associations=["contacts", "companies", "deals"],
        )

        doc_batch: list[Document | HierarchyNode] = []

        for ticket in tickets_iter:
            updated_at = ticket.updated_at.replace(tzinfo=None)
            if start is not None and updated_at < start.replace(tzinfo=None):
                continue
            if end is not None and updated_at > end.replace(tzinfo=None):
                continue

            title = ticket.properties.get("subject") or f"Ticket {ticket.id}"
            link = self._get_object_url("tickets", ticket.id)
            content_text = ticket.properties.get("content") or ""

            # Main ticket section
            sections = [TextSection(link=link, text=content_text)]

            # Metadata with parent object IDs
            metadata: dict[str, str | list[str]] = {
                "object_type": "ticket",
            }

            if ticket.properties.get("hs_ticket_priority"):
                metadata["priority"] = ticket.properties["hs_ticket_priority"]

            # Add associated objects as sections
            associated_contact_ids = []
            associated_company_ids = []
            associated_deal_ids = []

            # Get associated contacts
            associated_contacts = self._get_associated_objects(
                api_client, ticket.id, "tickets", "contacts"
            )
            for contact in associated_contacts:
                sections.append(self._create_object_section(contact, "contacts"))
                associated_contact_ids.append(contact["id"])

            # Get associated companies
            associated_companies = self._get_associated_objects(
                api_client, ticket.id, "tickets", "companies"
            )
            for company in associated_companies:
                sections.append(self._create_object_section(company, "companies"))
                associated_company_ids.append(company["id"])

            # Get associated deals
            associated_deals = self._get_associated_objects(
                api_client, ticket.id, "tickets", "deals"
            )
            for deal in associated_deals:
                sections.append(self._create_object_section(deal, "deals"))
                associated_deal_ids.append(deal["id"])

            # Get associated notes
            associated_notes = self._get_associated_notes(
                api_client, ticket.id, "tickets"
            )
            for note in associated_notes:
                sections.append(self._create_object_section(note, "notes"))

            # Add association IDs to metadata
            if associated_contact_ids:
                metadata["associated_contact_ids"] = associated_contact_ids
            if associated_company_ids:
                metadata["associated_company_ids"] = associated_company_ids
            if associated_deal_ids:
                metadata["associated_deal_ids"] = associated_deal_ids

            doc_batch.append(
                Document(
                    id=f"hubspot_ticket_{ticket.id}",
                    sections=cast(list[TextSection | ImageSection], sections),
                    source=DocumentSource.HUBSPOT,
                    semantic_identifier=title,
                    doc_updated_at=ticket.updated_at.replace(tzinfo=timezone.utc),
                    metadata=metadata,
                    doc_metadata={
                        "hierarchy": {
                            "source_path": ["Tickets"],
                            "object_type": "ticket",
                            "object_id": ticket.id,
                        }
                    },
                )
            )

            if len(doc_batch) >= self.batch_size:
                yield doc_batch
                doc_batch = []

        if doc_batch:
            yield doc_batch

    def _process_companies(
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        api_client = HubSpot(access_token=self.access_token)

        companies_iter = self._paginated_results(
            api_client.crm.companies.basic_api.get_page,
            properties=[
                "name",
                "domain",
                "industry",
                "city",
                "state",
                "description",
                "createdate",
                "hs_lastmodifieddate",
            ],
            associations=["contacts", "deals", "tickets"],
        )

        doc_batch: list[Document | HierarchyNode] = []

        for company in companies_iter:
            updated_at = company.updated_at.replace(tzinfo=None)
            if start is not None and updated_at < start.replace(tzinfo=None):
                continue
            if end is not None and updated_at > end.replace(tzinfo=None):
                continue

            title = company.properties.get("name") or f"Company {company.id}"
            link = self._get_object_url("companies", company.id)

            # Build main content
            content_parts = [f"Company: {title}"]
            if company.properties.get("domain"):
                content_parts.append(f"Domain: {company.properties['domain']}")
            if company.properties.get("industry"):
                content_parts.append(f"Industry: {company.properties['industry']}")
            if company.properties.get("city") and company.properties.get("state"):
                content_parts.append(
                    f"Location: {company.properties['city']}, {company.properties['state']}"
                )
            if company.properties.get("description"):
                content_parts.append(
                    f"Description: {company.properties['description']}"
                )

            content_text = "\n".join(content_parts)

            # Main company section
            sections = [TextSection(link=link, text=content_text)]

            # Metadata with parent object IDs
            metadata: dict[str, str | list[str]] = {
                "company_id": company.id,
                "object_type": "company",
            }

            if company.properties.get("industry"):
                metadata["industry"] = company.properties["industry"]
            if company.properties.get("domain"):
                metadata["domain"] = company.properties["domain"]

            # Add associated objects as sections
            associated_contact_ids = []
            associated_deal_ids = []
            associated_ticket_ids = []

            # Get associated contacts
            associated_contacts = self._get_associated_objects(
                api_client, company.id, "companies", "contacts"
            )
            for contact in associated_contacts:
                sections.append(self._create_object_section(contact, "contacts"))
                associated_contact_ids.append(contact["id"])

            # Get associated deals
            associated_deals = self._get_associated_objects(
                api_client, company.id, "companies", "deals"
            )
            for deal in associated_deals:
                sections.append(self._create_object_section(deal, "deals"))
                associated_deal_ids.append(deal["id"])

            # Get associated tickets
            associated_tickets = self._get_associated_objects(
                api_client, company.id, "companies", "tickets"
            )
            for ticket in associated_tickets:
                sections.append(self._create_object_section(ticket, "tickets"))
                associated_ticket_ids.append(ticket["id"])

            # Get associated notes
            associated_notes = self._get_associated_notes(
                api_client, company.id, "companies"
            )
            for note in associated_notes:
                sections.append(self._create_object_section(note, "notes"))

            # Add association IDs to metadata
            if associated_contact_ids:
                metadata["associated_contact_ids"] = associated_contact_ids
            if associated_deal_ids:
                metadata["associated_deal_ids"] = associated_deal_ids
            if associated_ticket_ids:
                metadata["associated_ticket_ids"] = associated_ticket_ids

            doc_batch.append(
                Document(
                    id=f"hubspot_company_{company.id}",
                    sections=cast(list[TextSection | ImageSection], sections),
                    source=DocumentSource.HUBSPOT,
                    semantic_identifier=title,
                    doc_updated_at=company.updated_at.replace(tzinfo=timezone.utc),
                    metadata=metadata,
                    doc_metadata={
                        "hierarchy": {
                            "source_path": ["Companies"],
                            "object_type": "company",
                            "object_id": company.id,
                        }
                    },
                )
            )

            if len(doc_batch) >= self.batch_size:
                yield doc_batch
                doc_batch = []

        if doc_batch:
            yield doc_batch

    def _process_deals(
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        api_client = HubSpot(access_token=self.access_token)

        deals_iter = self._paginated_results(
            api_client.crm.deals.basic_api.get_page,
            properties=[
                "dealname",
                "amount",
                "dealstage",
                "closedate",
                "pipeline",
                "description",
                "createdate",
                "hs_lastmodifieddate",
            ],
            associations=["contacts", "companies", "tickets"],
        )

        doc_batch: list[Document | HierarchyNode] = []

        for deal in deals_iter:
            updated_at = deal.updated_at.replace(tzinfo=None)
            if start is not None and updated_at < start.replace(tzinfo=None):
                continue
            if end is not None and updated_at > end.replace(tzinfo=None):
                continue

            title = deal.properties.get("dealname") or f"Deal {deal.id}"
            link = self._get_object_url("deals", deal.id)

            # Build main content
            content_parts = [f"Deal: {title}"]
            if deal.properties.get("amount"):
                content_parts.append(f"Amount: ${deal.properties['amount']}")
            if deal.properties.get("dealstage"):
                content_parts.append(f"Stage: {deal.properties['dealstage']}")
            if deal.properties.get("closedate"):
                content_parts.append(f"Close Date: {deal.properties['closedate']}")
            if deal.properties.get("pipeline"):
                content_parts.append(f"Pipeline: {deal.properties['pipeline']}")
            if deal.properties.get("description"):
                content_parts.append(f"Description: {deal.properties['description']}")

            content_text = "\n".join(content_parts)

            # Main deal section
            sections = [TextSection(link=link, text=content_text)]

            # Metadata with parent object IDs
            metadata: dict[str, str | list[str]] = {
                "deal_id": deal.id,
                "object_type": "deal",
            }

            if deal.properties.get("dealstage"):
                metadata["deal_stage"] = deal.properties["dealstage"]
            if deal.properties.get("pipeline"):
                metadata["pipeline"] = deal.properties["pipeline"]
            if deal.properties.get("amount"):
                metadata["amount"] = deal.properties["amount"]

            # Add associated objects as sections
            associated_contact_ids = []
            associated_company_ids = []
            associated_ticket_ids = []

            # Get associated contacts
            associated_contacts = self._get_associated_objects(
                api_client, deal.id, "deals", "contacts"
            )
            for contact in associated_contacts:
                sections.append(self._create_object_section(contact, "contacts"))
                associated_contact_ids.append(contact["id"])

            # Get associated companies
            associated_companies = self._get_associated_objects(
                api_client, deal.id, "deals", "companies"
            )
            for company in associated_companies:
                sections.append(self._create_object_section(company, "companies"))
                associated_company_ids.append(company["id"])

            # Get associated tickets
            associated_tickets = self._get_associated_objects(
                api_client, deal.id, "deals", "tickets"
            )
            for ticket in associated_tickets:
                sections.append(self._create_object_section(ticket, "tickets"))
                associated_ticket_ids.append(ticket["id"])

            # Get associated notes
            associated_notes = self._get_associated_notes(api_client, deal.id, "deals")
            for note in associated_notes:
                sections.append(self._create_object_section(note, "notes"))

            # Add association IDs to metadata
            if associated_contact_ids:
                metadata["associated_contact_ids"] = associated_contact_ids
            if associated_company_ids:
                metadata["associated_company_ids"] = associated_company_ids
            if associated_ticket_ids:
                metadata["associated_ticket_ids"] = associated_ticket_ids

            doc_batch.append(
                Document(
                    id=f"hubspot_deal_{deal.id}",
                    sections=cast(list[TextSection | ImageSection], sections),
                    source=DocumentSource.HUBSPOT,
                    semantic_identifier=title,
                    doc_updated_at=deal.updated_at.replace(tzinfo=timezone.utc),
                    metadata=metadata,
                    doc_metadata={
                        "hierarchy": {
                            "source_path": ["Deals"],
                            "object_type": "deal",
                            "object_id": deal.id,
                        }
                    },
                )
            )

            if len(doc_batch) >= self.batch_size:
                yield doc_batch
                doc_batch = []

        if doc_batch:
            yield doc_batch

    def _process_contacts(
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        api_client = HubSpot(access_token=self.access_token)

        contacts_iter = self._paginated_results(
            api_client.crm.contacts.basic_api.get_page,
            properties=[
                "firstname",
                "lastname",
                "email",
                "company",
                "jobtitle",
                "phone",
                "city",
                "state",
                "createdate",
                "lastmodifieddate",
            ],
            associations=["companies", "deals", "tickets"],
        )

        doc_batch: list[Document | HierarchyNode] = []

        for contact in contacts_iter:
            updated_at = contact.updated_at.replace(tzinfo=None)
            if start is not None and updated_at < start.replace(tzinfo=None):
                continue
            if end is not None and updated_at > end.replace(tzinfo=None):
                continue

            # Build contact name
            name_parts = []
            if contact.properties.get("firstname"):
                name_parts.append(contact.properties["firstname"])
            if contact.properties.get("lastname"):
                name_parts.append(contact.properties["lastname"])

            if name_parts:
                title = " ".join(name_parts)
            elif contact.properties.get("email"):
                # Use email as fallback if no first/last name
                title = contact.properties["email"]
            else:
                title = f"Contact {contact.id}"

            link = self._get_object_url("contacts", contact.id)

            # Build main content
            content_parts = [f"Contact: {title}"]
            if contact.properties.get("email"):
                content_parts.append(f"Email: {contact.properties['email']}")
            if contact.properties.get("company"):
                content_parts.append(f"Company: {contact.properties['company']}")
            if contact.properties.get("jobtitle"):
                content_parts.append(f"Job Title: {contact.properties['jobtitle']}")
            if contact.properties.get("phone"):
                content_parts.append(f"Phone: {contact.properties['phone']}")
            if contact.properties.get("city") and contact.properties.get("state"):
                content_parts.append(
                    f"Location: {contact.properties['city']}, {contact.properties['state']}"
                )

            content_text = "\n".join(content_parts)

            # Main contact section
            sections = [TextSection(link=link, text=content_text)]

            # Metadata with parent object IDs
            metadata: dict[str, str | list[str]] = {
                "contact_id": contact.id,
                "object_type": "contact",
            }

            if contact.properties.get("email"):
                metadata["email"] = contact.properties["email"]
            if contact.properties.get("company"):
                metadata["company"] = contact.properties["company"]
            if contact.properties.get("jobtitle"):
                metadata["job_title"] = contact.properties["jobtitle"]

            # Add associated objects as sections
            associated_company_ids = []
            associated_deal_ids = []
            associated_ticket_ids = []

            # Get associated companies
            associated_companies = self._get_associated_objects(
                api_client, contact.id, "contacts", "companies"
            )
            for company in associated_companies:
                sections.append(self._create_object_section(company, "companies"))
                associated_company_ids.append(company["id"])

            # Get associated deals
            associated_deals = self._get_associated_objects(
                api_client, contact.id, "contacts", "deals"
            )
            for deal in associated_deals:
                sections.append(self._create_object_section(deal, "deals"))
                associated_deal_ids.append(deal["id"])

            # Get associated tickets
            associated_tickets = self._get_associated_objects(
                api_client, contact.id, "contacts", "tickets"
            )
            for ticket in associated_tickets:
                sections.append(self._create_object_section(ticket, "tickets"))
                associated_ticket_ids.append(ticket["id"])

            # Get associated notes
            associated_notes = self._get_associated_notes(
                api_client, contact.id, "contacts"
            )
            for note in associated_notes:
                sections.append(self._create_object_section(note, "notes"))

            # Add association IDs to metadata
            if associated_company_ids:
                metadata["associated_company_ids"] = associated_company_ids
            if associated_deal_ids:
                metadata["associated_deal_ids"] = associated_deal_ids
            if associated_ticket_ids:
                metadata["associated_ticket_ids"] = associated_ticket_ids

            doc_batch.append(
                Document(
                    id=f"hubspot_contact_{contact.id}",
                    sections=cast(list[TextSection | ImageSection], sections),
                    source=DocumentSource.HUBSPOT,
                    semantic_identifier=title,
                    doc_updated_at=contact.updated_at.replace(tzinfo=timezone.utc),
                    metadata=metadata,
                    doc_metadata={
                        "hierarchy": {
                            "source_path": ["Contacts"],
                            "object_type": "contact",
                            "object_id": contact.id,
                        }
                    },
                )
            )

            if len(doc_batch) >= self.batch_size:
                yield doc_batch
                doc_batch = []

        if doc_batch:
            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        """Load all HubSpot objects (tickets, companies, deals, contacts)"""
        # Process each object type based on configuration
        if "tickets" in self.object_types:
            yield from self._process_tickets()
        if "companies" in self.object_types:
            yield from self._process_companies()
        if "deals" in self.object_types:
            yield from self._process_deals()
        if "contacts" in self.object_types:
            yield from self._process_contacts()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)

        # Process each object type with time filtering based on configuration
        if "tickets" in self.object_types:
            yield from self._process_tickets(start_datetime, end_datetime)
        if "companies" in self.object_types:
            yield from self._process_companies(start_datetime, end_datetime)
        if "deals" in self.object_types:
            yield from self._process_deals(start_datetime, end_datetime)
        if "contacts" in self.object_types:
            yield from self._process_contacts(start_datetime, end_datetime)


if __name__ == "__main__":
    import os

    connector = HubSpotConnector()
    connector.load_credentials(
        {"hubspot_access_token": os.environ["HUBSPOT_ACCESS_TOKEN"]}
    )
    # Run the first example
    document_batches = connector.load_from_state()
    first_batch = next(document_batches)
    for doc in first_batch:
        print(doc.model_dump_json(indent=2))


================================================
FILE: backend/onyx/connectors/hubspot/rate_limit.py
================================================
from __future__ import annotations

import time
from collections.abc import Callable
from typing import Any
from typing import TypeVar

from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    RateLimitTriedTooManyTimesError,
)
from onyx.utils.logger import setup_logger

logger = setup_logger()

T = TypeVar("T")

# HubSpot exposes a ten second rolling window (x-hubspot-ratelimit-interval-milliseconds)
# with a maximum of 190 requests, and a per-second limit of 19 requests.
_HUBSPOT_TEN_SECOND_LIMIT = 190
_HUBSPOT_TEN_SECOND_PERIOD = 10  # seconds
_HUBSPOT_SECONDLY_LIMIT = 19
_HUBSPOT_SECONDLY_PERIOD = 1  # second
_DEFAULT_SLEEP_SECONDS = 10
_SLEEP_PADDING_SECONDS = 1.0
_MAX_RATE_LIMIT_RETRIES = 5


def _extract_header(headers: Any, key: str) -> str | None:
    if headers is None:
        return None

    getter = getattr(headers, "get", None)
    if callable(getter):
        value = getter(key)
        if value is not None:
            return value

    if isinstance(headers, dict):
        value = headers.get(key)
        if value is not None:
            return value

    return None


def is_rate_limit_error(exception: Exception) -> bool:
    status = getattr(exception, "status", None)
    if status == 429:
        return True

    headers = getattr(exception, "headers", None)
    if headers is not None:
        remaining = _extract_header(headers, "x-hubspot-ratelimit-remaining")
        if remaining == "0":
            return True
        secondly_remaining = _extract_header(
            headers, "x-hubspot-ratelimit-secondly-remaining"
        )
        if secondly_remaining == "0":
            return True

    message = str(exception)
    return "RATE_LIMIT" in message or "Too Many Requests" in message


def get_rate_limit_retry_delay_seconds(exception: Exception) -> float:
    headers = getattr(exception, "headers", None)

    retry_after = _extract_header(headers, "Retry-After")
    if retry_after:
        try:
            return float(retry_after) + _SLEEP_PADDING_SECONDS
        except ValueError:
            logger.debug(
                "Failed to parse Retry-After header '%s' as float", retry_after
            )

    interval_ms = _extract_header(headers, "x-hubspot-ratelimit-interval-milliseconds")
    if interval_ms:
        try:
            return float(interval_ms) / 1000.0 + _SLEEP_PADDING_SECONDS
        except ValueError:
            logger.debug(
                "Failed to parse x-hubspot-ratelimit-interval-milliseconds '%s' as float",
                interval_ms,
            )

    secondly_limit = _extract_header(headers, "x-hubspot-ratelimit-secondly")
    if secondly_limit:
        try:
            per_second = max(float(secondly_limit), 1.0)
            return (1.0 / per_second) + _SLEEP_PADDING_SECONDS
        except ValueError:
            logger.debug(
                "Failed to parse x-hubspot-ratelimit-secondly '%s' as float",
                secondly_limit,
            )

    return _DEFAULT_SLEEP_SECONDS + _SLEEP_PADDING_SECONDS


class HubSpotRateLimiter:
    def __init__(
        self,
        *,
        ten_second_limit: int = _HUBSPOT_TEN_SECOND_LIMIT,
        ten_second_period: int = _HUBSPOT_TEN_SECOND_PERIOD,
        secondly_limit: int = _HUBSPOT_SECONDLY_LIMIT,
        secondly_period: int = _HUBSPOT_SECONDLY_PERIOD,
        max_retries: int = _MAX_RATE_LIMIT_RETRIES,
    ) -> None:
        self._max_retries = max_retries

        @rate_limit_builder(max_calls=secondly_limit, period=secondly_period)
        @rate_limit_builder(max_calls=ten_second_limit, period=ten_second_period)
        def _execute(callable_: Callable[[], T]) -> T:
            return callable_()

        self._execute = _execute

    def call(self, func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
        attempts = 0

        while True:
            try:
                return self._execute(lambda: func(*args, **kwargs))
            except Exception as exc:  # pylint: disable=broad-except
                if not is_rate_limit_error(exc):
                    raise

                attempts += 1
                if attempts > self._max_retries:
                    raise RateLimitTriedTooManyTimesError(
                        "Exceeded configured HubSpot rate limit retries"
                    ) from exc

                wait_time = get_rate_limit_retry_delay_seconds(exc)
                logger.notice(
                    "HubSpot rate limit reached. Sleeping %.2f seconds before retrying.",
                    wait_time,
                )
                time.sleep(wait_time)


================================================
FILE: backend/onyx/connectors/imap/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/imap/connector.py
================================================
import copy
import email
import imaplib
import os
import re
from datetime import datetime
from datetime import timezone
from email.message import Message
from email.utils import parseaddr
from enum import Enum
from typing import Any
from typing import cast

import bs4
from pydantic import BaseModel

from onyx.access.models import ExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.imap.models import EmailHeaders
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import CredentialsConnector
from onyx.connectors.interfaces import CredentialsProviderInterface
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger

logger = setup_logger()


_DEFAULT_IMAP_PORT_NUMBER = int(os.environ.get("IMAP_PORT", 993))
_IMAP_OKAY_STATUS = "OK"
_PAGE_SIZE = 100
_USERNAME_KEY = "imap_username"
_PASSWORD_KEY = "imap_password"


class CurrentMailbox(BaseModel):
    mailbox: str
    todo_email_ids: list[str]


# An email has a list of mailboxes.
# Each mailbox has a list of email-ids inside of it.
#
# Usage:
# To use this checkpointer, first fetch all the mailboxes.
# Then, pop a mailbox and fetch all of its email-ids.
# Then, pop each email-id and fetch its content (and parse it, etc..).
# When you have popped all email-ids for this mailbox, pop the next mailbox and repeat the above process until you're done.
#
# For initial checkpointing, set both fields to `None`.
class ImapCheckpoint(ConnectorCheckpoint):
    todo_mailboxes: list[str] | None = None
    current_mailbox: CurrentMailbox | None = None


class LoginState(str, Enum):
    LoggedIn = "logged_in"
    LoggedOut = "logged_out"


class ImapConnector(
    CredentialsConnector,
    CheckpointedConnectorWithPermSync[ImapCheckpoint],
):
    def __init__(
        self,
        host: str,
        port: int = _DEFAULT_IMAP_PORT_NUMBER,
        mailboxes: list[str] | None = None,
    ) -> None:
        self._host = host
        self._port = port
        self._mailboxes = mailboxes
        self._credentials: dict[str, Any] | None = None

    @property
    def credentials(self) -> dict[str, Any]:
        if not self._credentials:
            raise RuntimeError(
                "Credentials have not been initialized; call `set_credentials_provider` first"
            )
        return self._credentials

    def _get_mail_client(self) -> imaplib.IMAP4_SSL:
        """
        Returns a new `imaplib.IMAP4_SSL` instance.

        The `imaplib.IMAP4_SSL` object is supposed to be an "ephemeral" object; it's not something that you can login,
        logout, then log back into again. I.e., the following will fail:

        ```py
        mail_client.login(..)
        mail_client.logout();
        mail_client.login(..)
        ```

        Therefore, you need a fresh, new instance in order to operate with IMAP. This function gives one to you.

        # Notes
        This function will throw an error if the credentials have not yet been set.
        """

        def get_or_raise(name: str) -> str:
            value = self.credentials.get(name)
            if not value:
                raise RuntimeError(f"Credential item {name=} was not found")
            if not isinstance(value, str):
                raise RuntimeError(
                    f"Credential item {name=} must be of type str, instead received {type(name)=}"
                )
            return value

        username = get_or_raise(_USERNAME_KEY)
        password = get_or_raise(_PASSWORD_KEY)

        mail_client = imaplib.IMAP4_SSL(host=self._host, port=self._port)
        status, _data = mail_client.login(user=username, password=password)

        if status != _IMAP_OKAY_STATUS:
            raise RuntimeError(f"Failed to log into imap server; {status=}")

        return mail_client

    def _load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: ImapCheckpoint,
        include_perm_sync: bool,
    ) -> CheckpointOutput[ImapCheckpoint]:
        checkpoint = cast(ImapCheckpoint, copy.deepcopy(checkpoint))
        checkpoint.has_more = True

        mail_client = self._get_mail_client()

        if checkpoint.todo_mailboxes is None:
            # This is the dummy checkpoint.
            # Fill it with mailboxes first.
            if self._mailboxes:
                checkpoint.todo_mailboxes = _sanitize_mailbox_names(self._mailboxes)
            else:
                fetched_mailboxes = _fetch_all_mailboxes_for_email_account(
                    mail_client=mail_client
                )
                if not fetched_mailboxes:
                    raise RuntimeError(
                        "Failed to find any mailboxes for this email account"
                    )
                checkpoint.todo_mailboxes = _sanitize_mailbox_names(fetched_mailboxes)

            return checkpoint

        if (
            not checkpoint.current_mailbox
            or not checkpoint.current_mailbox.todo_email_ids
        ):
            if not checkpoint.todo_mailboxes:
                checkpoint.has_more = False
                return checkpoint

            mailbox = checkpoint.todo_mailboxes.pop()
            email_ids = _fetch_email_ids_in_mailbox(
                mail_client=mail_client,
                mailbox=mailbox,
                start=start,
                end=end,
            )
            checkpoint.current_mailbox = CurrentMailbox(
                mailbox=mailbox,
                todo_email_ids=email_ids,
            )

        _select_mailbox(
            mail_client=mail_client, mailbox=checkpoint.current_mailbox.mailbox
        )
        current_todos = cast(
            list, copy.deepcopy(checkpoint.current_mailbox.todo_email_ids[:_PAGE_SIZE])
        )
        checkpoint.current_mailbox.todo_email_ids = (
            checkpoint.current_mailbox.todo_email_ids[_PAGE_SIZE:]
        )

        for email_id in current_todos:
            email_msg = _fetch_email(mail_client=mail_client, email_id=email_id)
            if not email_msg:
                logger.warn(f"Failed to fetch message {email_id=}; skipping")
                continue

            email_headers = EmailHeaders.from_email_msg(email_msg=email_msg)

            yield _convert_email_headers_and_body_into_document(
                email_msg=email_msg,
                email_headers=email_headers,
                include_perm_sync=include_perm_sync,
            )

        return checkpoint

    # impls for BaseConnector

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        raise NotImplementedError("Use `set_credentials_provider` instead")

    def validate_connector_settings(self) -> None:
        self._get_mail_client()

    # impls for CredentialsConnector

    def set_credentials_provider(
        self, credentials_provider: CredentialsProviderInterface
    ) -> None:
        self._credentials = credentials_provider.get_credentials()

    # impls for CheckpointedConnector

    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: ImapCheckpoint,
    ) -> CheckpointOutput[ImapCheckpoint]:
        return self._load_from_checkpoint(
            start=start, end=end, checkpoint=checkpoint, include_perm_sync=False
        )

    def build_dummy_checkpoint(self) -> ImapCheckpoint:
        return ImapCheckpoint(has_more=True)

    def validate_checkpoint_json(self, checkpoint_json: str) -> ImapCheckpoint:
        return ImapCheckpoint.model_validate_json(json_data=checkpoint_json)

    # impls for CheckpointedConnectorWithPermSync

    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: ImapCheckpoint,
    ) -> CheckpointOutput[ImapCheckpoint]:
        return self._load_from_checkpoint(
            start=start, end=end, checkpoint=checkpoint, include_perm_sync=True
        )


def _fetch_all_mailboxes_for_email_account(mail_client: imaplib.IMAP4_SSL) -> list[str]:
    status, mailboxes_data = mail_client.list(directory="*", pattern="*")
    if status != _IMAP_OKAY_STATUS:
        raise RuntimeError(f"Failed to fetch mailboxes; {status=}")

    mailboxes = []

    for mailboxes_raw in mailboxes_data:
        if isinstance(mailboxes_raw, bytes):
            mailboxes_str = mailboxes_raw.decode()
        elif isinstance(mailboxes_raw, str):
            mailboxes_str = mailboxes_raw
        else:
            logger.warn(
                f"Expected the mailbox data to be of type str, instead got {type(mailboxes_raw)=} {mailboxes_raw}; skipping"
            )
            continue

        # The mailbox LIST response output can be found here:
        # https://www.rfc-editor.org/rfc/rfc3501.html#section-7.2.2
        #
        # The general format is:
        # `(<name-attributes>) <hierarchy-delimiter> <mailbox-name>`
        #
        # The below regex matches on that pattern; from there, we select the 3rd match (index 2), which is the mailbox-name.
        match = re.match(r'\([^)]*\)\s+"([^"]+)"\s+"?(.+?)"?$', mailboxes_str)
        if not match:
            logger.warn(
                f"Invalid mailbox-data formatting structure: {mailboxes_str=}; skipping"
            )
            continue

        mailbox = match.group(2)
        mailboxes.append(mailbox)

    return mailboxes


def _select_mailbox(mail_client: imaplib.IMAP4_SSL, mailbox: str) -> None:
    status, _ids = mail_client.select(mailbox=mailbox, readonly=True)
    if status != _IMAP_OKAY_STATUS:
        raise RuntimeError(f"Failed to select {mailbox=}")


def _fetch_email_ids_in_mailbox(
    mail_client: imaplib.IMAP4_SSL,
    mailbox: str,
    start: SecondsSinceUnixEpoch,
    end: SecondsSinceUnixEpoch,
) -> list[str]:
    _select_mailbox(mail_client=mail_client, mailbox=mailbox)

    start_str = datetime.fromtimestamp(start, tz=timezone.utc).strftime("%d-%b-%Y")
    end_str = datetime.fromtimestamp(end, tz=timezone.utc).strftime("%d-%b-%Y")
    search_criteria = f'(SINCE "{start_str}" BEFORE "{end_str}")'

    status, email_ids_byte_array = mail_client.search(None, search_criteria)

    if status != _IMAP_OKAY_STATUS or not email_ids_byte_array:
        raise RuntimeError(f"Failed to fetch email ids; {status=}")

    email_ids: bytes = email_ids_byte_array[0]

    return [email_id.decode() for email_id in email_ids.split()]


def _fetch_email(mail_client: imaplib.IMAP4_SSL, email_id: str) -> Message | None:
    status, msg_data = mail_client.fetch(message_set=email_id, message_parts="(RFC822)")
    if status != _IMAP_OKAY_STATUS or not msg_data:
        return None

    data = msg_data[0]
    if not isinstance(data, tuple):
        raise RuntimeError(
            f"Message data should be a tuple; instead got a {type(data)=} {data=}"
        )

    _metadata, raw_email = data
    return email.message_from_bytes(raw_email)


def _convert_email_headers_and_body_into_document(
    email_msg: Message,
    email_headers: EmailHeaders,
    include_perm_sync: bool,
) -> Document:
    sender_name, sender_addr = _parse_singular_addr(raw_header=email_headers.sender)
    parsed_recipients = (
        _parse_addrs(raw_header=email_headers.recipients)
        if email_headers.recipients
        else []
    )

    expert_info_map = {
        recipient_addr: BasicExpertInfo(
            display_name=recipient_name, email=recipient_addr
        )
        for recipient_name, recipient_addr in parsed_recipients
    }
    if sender_addr not in expert_info_map:
        expert_info_map[sender_addr] = BasicExpertInfo(
            display_name=sender_name, email=sender_addr
        )

    email_body = _parse_email_body(email_msg=email_msg, email_headers=email_headers)
    primary_owners = list(expert_info_map.values())
    external_access = (
        ExternalAccess(
            external_user_emails=set(expert_info_map.keys()),
            external_user_group_ids=set(),
            is_public=False,
        )
        if include_perm_sync
        else None
    )

    return Document(
        id=email_headers.id,
        title=email_headers.subject,
        semantic_identifier=email_headers.subject,
        metadata={},
        source=DocumentSource.IMAP,
        sections=[TextSection(text=email_body)],
        primary_owners=primary_owners,
        external_access=external_access,
    )


def _parse_email_body(
    email_msg: Message,
    email_headers: EmailHeaders,
) -> str:
    body = None
    for part in email_msg.walk():
        if part.is_multipart():
            # Multipart parts are *containers* for other parts, not the actual content itself.
            # Therefore, we skip until we find the individual parts instead.
            continue

        charset = part.get_content_charset() or "utf-8"

        try:
            raw_payload = part.get_payload(decode=True)
            if not isinstance(raw_payload, bytes):
                logger.warn(
                    "Payload section from email was expected to be an array of bytes, instead got "
                    f"{type(raw_payload)=}, {raw_payload=}"
                )
                continue
            body = raw_payload.decode(charset)
            break
        except (UnicodeDecodeError, LookupError) as e:
            print(f"Warning: Could not decode part with charset {charset}. Error: {e}")
            continue

    if not body:
        logger.warn(
            f"Email with {email_headers.id=} has an empty body; returning an empty string"
        )
        return ""

    soup = bs4.BeautifulSoup(markup=body, features="html.parser")

    return " ".join(str_section for str_section in soup.stripped_strings)


def _sanitize_mailbox_names(mailboxes: list[str]) -> list[str]:
    """
    Mailboxes with special characters in them must be enclosed by double-quotes, as per the IMAP protocol.
    Just to be safe, we wrap *all* mailboxes with double-quotes.
    """
    return [f'"{mailbox}"' for mailbox in mailboxes if mailbox]


def _parse_addrs(raw_header: str) -> list[tuple[str, str]]:
    addrs = raw_header.split(",")
    name_addr_pairs = [parseaddr(addr=addr) for addr in addrs if addr]
    return [(name, addr) for name, addr in name_addr_pairs if addr]


def _parse_singular_addr(raw_header: str) -> tuple[str, str]:
    addrs = _parse_addrs(raw_header=raw_header)
    if not addrs:
        raise RuntimeError(
            f"Parsing email header resulted in no addresses being found; {raw_header=}"
        )
    elif len(addrs) >= 2:
        raise RuntimeError(
            f"Expected a singular address, but instead got multiple; {raw_header=} {addrs=}"
        )

    return addrs[0]


if __name__ == "__main__":
    import time
    from tests.daily.connectors.utils import load_all_from_connector
    from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider

    host = os.environ.get("IMAP_HOST")
    mailboxes_str = os.environ.get("IMAP_MAILBOXES")
    username = os.environ.get("IMAP_USERNAME")
    password = os.environ.get("IMAP_PASSWORD")

    mailboxes = (
        [mailbox.strip() for mailbox in mailboxes_str.split(",")]
        if mailboxes_str
        else []
    )

    if not host:
        raise RuntimeError("`IMAP_HOST` must be set")

    imap_connector = ImapConnector(
        host=host,
        mailboxes=mailboxes,
    )

    imap_connector.set_credentials_provider(
        OnyxStaticCredentialsProvider(
            tenant_id=None,
            connector_name=DocumentSource.IMAP,
            credential_json={
                _USERNAME_KEY: username,
                _PASSWORD_KEY: password,
            },
        )
    )

    for doc in load_all_from_connector(
        connector=imap_connector,
        start=0,
        end=time.time(),
    ).documents:
        print(doc)


================================================
FILE: backend/onyx/connectors/imap/models.py
================================================
import email
from datetime import datetime
from email.message import Message
from enum import Enum

from pydantic import BaseModel


class Header(str, Enum):
    SUBJECT_HEADER = "subject"
    FROM_HEADER = "from"
    TO_HEADER = "to"
    DELIVERED_TO_HEADER = (
        "Delivered-To"  # Used in mailing lists instead of the "to" header.
    )
    DATE_HEADER = "date"
    MESSAGE_ID_HEADER = "Message-ID"


class EmailHeaders(BaseModel):
    """
    Model for email headers extracted from IMAP messages.
    """

    id: str
    subject: str
    sender: str
    recipients: str | None
    date: datetime

    @classmethod
    def from_email_msg(cls, email_msg: Message) -> "EmailHeaders":
        def _decode(header: str, default: str | None = None) -> str | None:
            value = email_msg.get(header, default)
            if not value:
                return None

            decoded_value, encoding = email.header.decode_header(value)[0]
            if isinstance(decoded_value, bytes):
                encoding = encoding or "utf-8"
                return decoded_value.decode(encoding, errors="replace")
            elif isinstance(decoded_value, str):
                return decoded_value
            else:
                return None

        def _parse_date(date_str: str | None) -> datetime | None:
            if not date_str:
                return None
            try:
                return email.utils.parsedate_to_datetime(date_str)
            except (TypeError, ValueError):
                return None

        message_id = _decode(header=Header.MESSAGE_ID_HEADER)
        # It's possible for the subject line to not exist or be an empty string.
        subject = _decode(header=Header.SUBJECT_HEADER) or "Unknown Subject"
        from_ = _decode(header=Header.FROM_HEADER)
        to = _decode(header=Header.TO_HEADER)
        if not to:
            to = _decode(header=Header.DELIVERED_TO_HEADER)
        date_str = _decode(header=Header.DATE_HEADER)
        date = _parse_date(date_str=date_str)

        # If any of the above are `None`, model validation will fail.
        # Therefore, no guards (i.e.: `if <header> is None: raise RuntimeError(..)`) were written.
        return cls.model_validate(
            {
                "id": message_id,
                "subject": subject,
                "sender": from_,
                "recipients": to,
                "date": date,
            }
        )


================================================
FILE: backend/onyx/connectors/interfaces.py
================================================
import abc
from collections.abc import Generator
from collections.abc import Iterator
from types import TracebackType
from typing import Any
from typing import Generic
from typing import TypeAlias
from typing import TypeVar

from pydantic import BaseModel

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop

SecondsSinceUnixEpoch = float

# Output types that can include HierarchyNode alongside Documents/SlimDocuments
GenerateDocumentsOutput = Iterator[list[Document | HierarchyNode]]
GenerateSlimDocumentOutput = Iterator[list[SlimDocument | HierarchyNode]]

CT = TypeVar("CT", bound=ConnectorCheckpoint)


class NormalizationResult(BaseModel):
    """Result of URL normalization attempt.

    Attributes:
        normalized_url: The normalized URL string, or None if normalization failed
        use_default: If True, fall back to default normalizer. If False, return None.
    """

    normalized_url: str | None
    use_default: bool = False


class BaseConnector(abc.ABC, Generic[CT]):
    REDIS_KEY_PREFIX = "da_connector_data:"

    @abc.abstractmethod
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        raise NotImplementedError

    @staticmethod
    def parse_metadata(metadata: dict[str, Any]) -> list[str]:
        """Parse the metadata for a document/chunk into a string to pass to Generative AI as additional context"""
        custom_parser_req_msg = (
            "Specific metadata parsing required, connector has not implemented it."
        )
        metadata_lines = []
        for metadata_key, metadata_value in metadata.items():
            if isinstance(metadata_value, str):
                metadata_lines.append(f"{metadata_key}: {metadata_value}")
            elif isinstance(metadata_value, list):
                if not all([isinstance(val, str) for val in metadata_value]):
                    raise RuntimeError(custom_parser_req_msg)
                metadata_lines.append(f"{metadata_key}: {', '.join(metadata_value)}")
            else:
                raise RuntimeError(custom_parser_req_msg)
        return metadata_lines

    def validate_connector_settings(self) -> None:
        """
        Override this if your connector needs to validate credentials or settings.
        Raise an exception if invalid, otherwise do nothing.

        Default is a no-op (always successful).
        """

    def validate_perm_sync(self) -> None:
        """
        Don't override this; add a function to perm_sync_valid.py in the ee package
        to do permission sync validation
        """
        validate_connector_settings_fn = fetch_ee_implementation_or_noop(
            "onyx.connectors.perm_sync_valid",
            "validate_perm_sync",
            noop_return_value=None,
        )
        validate_connector_settings_fn(self)

    def set_allow_images(self, value: bool) -> None:
        """Implement if the underlying connector wants to skip/allow image downloading
        based on the application level image analysis setting."""

    @classmethod
    def normalize_url(cls, url: str) -> "NormalizationResult":  # noqa: ARG003
        """Normalize a URL to match the canonical Document.id format used during ingestion.

        Connectors that use URLs as document IDs should override this method.
        Returns NormalizationResult with use_default=True if not implemented.
        """
        return NormalizationResult(normalized_url=None, use_default=True)

    def build_dummy_checkpoint(self) -> CT:
        # TODO: find a way to make this work without type: ignore
        return ConnectorCheckpoint(has_more=True)  # type: ignore


# Large set update or reindex, generally pulling a complete state or from a savestate file
class LoadConnector(BaseConnector):
    @abc.abstractmethod
    def load_from_state(self) -> GenerateDocumentsOutput:
        raise NotImplementedError


# Small set updates by time
class PollConnector(BaseConnector):
    @abc.abstractmethod
    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        raise NotImplementedError


# Slim connectors retrieve just the ids of documents
class SlimConnector(BaseConnector):
    @abc.abstractmethod
    def retrieve_all_slim_docs(
        self,
    ) -> GenerateSlimDocumentOutput:
        raise NotImplementedError


# Slim connectors retrieve both the ids AND
# permission syncing information for connected documents
class SlimConnectorWithPermSync(BaseConnector):
    @abc.abstractmethod
    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        raise NotImplementedError


class OAuthConnector(BaseConnector):
    class AdditionalOauthKwargs(BaseModel):
        # if overridden, all fields should be str type
        pass

    @classmethod
    @abc.abstractmethod
    def oauth_id(cls) -> DocumentSource:
        raise NotImplementedError

    @classmethod
    @abc.abstractmethod
    def oauth_authorization_url(
        cls,
        base_domain: str,
        state: str,
        additional_kwargs: dict[str, str],
    ) -> str:
        raise NotImplementedError

    @classmethod
    @abc.abstractmethod
    def oauth_code_to_token(
        cls,
        base_domain: str,
        code: str,
        additional_kwargs: dict[str, str],
    ) -> dict[str, Any]:
        raise NotImplementedError


T = TypeVar("T", bound="CredentialsProviderInterface")


class CredentialsProviderInterface(abc.ABC, Generic[T]):
    @abc.abstractmethod
    def __enter__(self) -> T:
        raise NotImplementedError

    @abc.abstractmethod
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_value: BaseException | None,
        traceback: TracebackType | None,
    ) -> None:
        raise NotImplementedError

    @abc.abstractmethod
    def get_tenant_id(self) -> str | None:
        raise NotImplementedError

    @abc.abstractmethod
    def get_provider_key(self) -> str:
        """a unique key that the connector can use to lock around a credential
        that might be used simultaneously.

        Will typically be the credential id, but can also just be something random
        in cases when there is nothing to lock (aka static credentials)
        """
        raise NotImplementedError

    @abc.abstractmethod
    def get_credentials(self) -> dict[str, Any]:
        raise NotImplementedError

    @abc.abstractmethod
    def set_credentials(self, credential_json: dict[str, Any]) -> None:
        raise NotImplementedError

    @abc.abstractmethod
    def is_dynamic(self) -> bool:
        """If dynamic, the credentials may change during usage ... meaning the client
        needs to use the locking features of the credentials provider to operate
        correctly.

        If static, the client can simply reference the credentials once and use them
        through the entire indexing run.
        """
        raise NotImplementedError


class CredentialsConnector(BaseConnector):
    """Implement this if the connector needs to be able to read and write credentials
    on the fly. Typically used with shared credentials/tokens that might be renewed
    at any time."""

    @abc.abstractmethod
    def set_credentials_provider(
        self, credentials_provider: CredentialsProviderInterface
    ) -> None:
        raise NotImplementedError


# Event driven
class EventConnector(BaseConnector):
    @abc.abstractmethod
    def handle_event(self, event: Any) -> GenerateDocumentsOutput:
        raise NotImplementedError


CheckpointOutput: TypeAlias = Generator[
    Document | HierarchyNode | ConnectorFailure, None, CT
]

HierarchyOutput: TypeAlias = Generator[HierarchyNode, None, None]


class CheckpointedConnector(BaseConnector[CT]):
    @abc.abstractmethod
    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: CT,
    ) -> CheckpointOutput[CT]:
        """Yields back documents or failures. Final return is the new checkpoint.

        Final return can be access via either:

        ```
        try:
            for document_or_failure in connector.load_from_checkpoint(start, end, checkpoint):
                print(document_or_failure)
        except StopIteration as e:
            checkpoint = e.value  # Extracting the return value
            print(checkpoint)
        ```

        OR

        ```
        checkpoint = yield from connector.load_from_checkpoint(start, end, checkpoint)
        ```
        """
        raise NotImplementedError

    @abc.abstractmethod
    def build_dummy_checkpoint(self) -> CT:
        raise NotImplementedError

    @abc.abstractmethod
    def validate_checkpoint_json(self, checkpoint_json: str) -> CT:
        """Validate the checkpoint json and return the checkpoint object"""
        raise NotImplementedError


class CheckpointedConnectorWithPermSync(CheckpointedConnector[CT]):
    @abc.abstractmethod
    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: CT,
    ) -> CheckpointOutput[CT]:
        raise NotImplementedError


class HierarchyConnector(BaseConnector):
    @abc.abstractmethod
    def load_hierarchy(
        self,
        start: SecondsSinceUnixEpoch,  # may be unused if the connector must load the full hierarchy each time
        end: SecondsSinceUnixEpoch,
    ) -> HierarchyOutput:
        raise NotImplementedError


================================================
FILE: backend/onyx/connectors/jira/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/jira/access.py
================================================
"""
Permissioning / AccessControl logic for JIRA Projects + Issues.
"""

from collections.abc import Callable
from typing import cast

from jira import JIRA

from onyx.access.models import ExternalAccess
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version


def get_project_permissions(
    jira_client: JIRA,
    jira_project: str,
    add_prefix: bool = False,
) -> ExternalAccess | None:
    """
    Fetch the project + issue level permissions / access-control.
    This functionality requires Enterprise Edition.

    Args:
        jira_client: The JIRA client instance.
        jira_project: The JIRA project string.
        add_prefix: When True, prefix group IDs with source type (for indexing path).
                   When False (default), leave unprefixed (for permission sync path
                   where upsert_document_external_perms handles prefixing).

    Returns:
        ExternalAccess object for the page. None if EE is not enabled or no restrictions found.
    """

    # Check if EE is enabled
    if not global_version.is_ee_version():
        return None

    ee_get_project_permissions = cast(
        Callable[
            [JIRA, str, bool],
            ExternalAccess | None,
        ],
        fetch_versioned_implementation(
            "onyx.external_permissions.jira.page_access", "get_project_permissions"
        ),
    )

    return ee_get_project_permissions(
        jira_client,
        jira_project,
        add_prefix,
    )


================================================
FILE: backend/onyx/connectors/jira/connector.py
================================================
import copy
import json
import os
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterable
from collections.abc import Iterator
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any

import requests
from jira import JIRA
from jira.exceptions import JIRAError
from jira.resources import Issue
from more_itertools import chunked
from typing_extensions import override

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
from onyx.configs.app_configs import JIRA_SLIM_PAGE_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    is_atlassian_date_error,
)
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.jira.access import get_project_permissions
from onyx.connectors.jira.utils import best_effort_basic_expert_info
from onyx.connectors.jira.utils import best_effort_get_field_from_issue
from onyx.connectors.jira.utils import build_jira_client
from onyx.connectors.jira.utils import build_jira_url
from onyx.connectors.jira.utils import extract_text_from_adf
from onyx.connectors.jira.utils import get_comment_strs
from onyx.connectors.jira.utils import JIRA_CLOUD_API_VERSION
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.db.enums import HierarchyNodeType
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger


logger = setup_logger()

ONE_HOUR = 3600

_MAX_RESULTS_FETCH_IDS = 5000  # 5000
_JIRA_FULL_PAGE_SIZE = 50

# Constants for Jira field names
_FIELD_REPORTER = "reporter"
_FIELD_ASSIGNEE = "assignee"
_FIELD_PRIORITY = "priority"
_FIELD_STATUS = "status"
_FIELD_RESOLUTION = "resolution"
_FIELD_LABELS = "labels"
_FIELD_KEY = "key"
_FIELD_CREATED = "created"
_FIELD_DUEDATE = "duedate"
_FIELD_ISSUETYPE = "issuetype"
_FIELD_PARENT = "parent"
_FIELD_ASSIGNEE_EMAIL = "assignee_email"
_FIELD_REPORTER_EMAIL = "reporter_email"
_FIELD_PROJECT = "project"
_FIELD_PROJECT_NAME = "project_name"
_FIELD_UPDATED = "updated"
_FIELD_RESOLUTION_DATE = "resolutiondate"
_FIELD_RESOLUTION_DATE_KEY = "resolution_date"


def _is_cloud_client(jira_client: JIRA) -> bool:
    return jira_client._options["rest_api_version"] == JIRA_CLOUD_API_VERSION


def _perform_jql_search(
    jira_client: JIRA,
    jql: str,
    start: int,
    max_results: int,
    fields: str | None = None,
    all_issue_ids: list[list[str]] | None = None,
    checkpoint_callback: (
        Callable[[Iterator[list[str]], str | None], None] | None
    ) = None,
    nextPageToken: str | None = None,
    ids_done: bool = False,
) -> Iterable[Issue]:
    """
    The caller should expect
    a) this function returns an iterable of issues of length 0 < len(issues) <= max_results.
       - caveat; if all_issue_ids is provided, the iterable will be the size of some sub-list.
       - this will only not match the above bound if a recent deployment changed max_results.

    IF the v3 API is used (i.e. the jira instance is a cloud instance), then the caller should expect:

    b) this function will call checkpoint_callback ONCE after at least one of the following has happened:
       - a new batch of ids has been fetched via enhanced search
       - a batch of issues has been bulk-fetched
    c) checkpoint_callback is called with the new all_issue_ids and the pageToken of the enhanced
       search request. We pass in a pageToken of None once we've fetched all the issue ids.

    Note: nextPageToken is valid for 7 days according to a post from a year ago, so for now
    we won't add any handling for restarting (just re-index, since there's no easy
    way to recover from this).
    """
    # it would be preferable to use one approach for both versions, but
    # v2 doesnt have the bulk fetch api and v3 has fully deprecated the search
    # api that v2 uses
    if _is_cloud_client(jira_client):
        if all_issue_ids is None:
            raise ValueError("all_issue_ids is required for v3")
        return _perform_jql_search_v3(
            jira_client,
            jql,
            max_results,
            all_issue_ids,
            fields=fields,
            checkpoint_callback=checkpoint_callback,
            nextPageToken=nextPageToken,
            ids_done=ids_done,
        )
    else:
        return _perform_jql_search_v2(jira_client, jql, start, max_results, fields)


def _handle_jira_search_error(e: Exception, jql: str) -> None:
    """Handle common Jira search errors and raise appropriate exceptions.

    Args:
        e: The exception raised by the Jira API
        jql: The JQL query that caused the error

    Raises:
        ConnectorValidationError: For HTTP 400 errors (invalid JQL or project)
        CredentialExpiredError: For HTTP 401 errors
        InsufficientPermissionsError: For HTTP 403 errors
        Exception: Re-raises the original exception for other error types
    """
    # Extract error information from the exception
    error_text = ""
    status_code = None

    def _format_error_text(error_payload: Any) -> str:
        error_messages = (
            error_payload.get("errorMessages", [])
            if isinstance(error_payload, dict)
            else []
        )
        if error_messages:
            return (
                "; ".join(error_messages)
                if isinstance(error_messages, list)
                else str(error_messages)
            )
        return str(error_payload)

    # Try to get status code and error text from JIRAError or requests response
    if hasattr(e, "status_code"):
        status_code = e.status_code
        raw_text = getattr(e, "text", "")
        if isinstance(raw_text, str):
            try:
                error_text = _format_error_text(json.loads(raw_text))
            except Exception:
                error_text = raw_text
        else:
            error_text = str(raw_text)
    elif hasattr(e, "response") and e.response is not None:
        status_code = e.response.status_code
        # Try JSON first, fall back to text
        try:
            error_json = e.response.json()
            error_text = _format_error_text(error_json)
        except Exception:
            error_text = e.response.text

    # Handle specific status codes
    if status_code == 400:
        if "does not exist for the field 'project'" in error_text:
            raise ConnectorValidationError(
                f"The specified Jira project does not exist or you don't have access to it. JQL query: {jql}. Error: {error_text}"
            )
        raise ConnectorValidationError(
            f"Invalid JQL query. JQL: {jql}. Error: {error_text}"
        )
    elif status_code == 401:
        raise CredentialExpiredError(
            "Jira credentials are expired or invalid (HTTP 401)."
        )
    elif status_code == 403:
        raise InsufficientPermissionsError(
            f"Insufficient permissions to execute JQL query. JQL: {jql}"
        )

    # Re-raise for other error types
    raise e


def enhanced_search_ids(
    jira_client: JIRA, jql: str, nextPageToken: str | None = None
) -> tuple[list[str], str | None]:
    # https://community.atlassian.com/forums/Jira-articles/
    # Avoiding-Pitfalls-A-Guide-to-Smooth-Migration-to-Enhanced-JQL/ba-p/2985433
    # For cloud, it's recommended that we fetch all ids first then use the bulk fetch API.
    # The enhanced search isn't currently supported by our python library, so we have to
    # do this janky thing where we use the session directly.
    enhanced_search_path = jira_client._get_url("search/jql")
    params: dict[str, str | int | None] = {
        "jql": jql,
        "maxResults": _MAX_RESULTS_FETCH_IDS,
        "nextPageToken": nextPageToken,
        "fields": "id",
    }
    try:
        response = jira_client._session.get(enhanced_search_path, params=params)
        response.raise_for_status()
        response_json = response.json()
    except Exception as e:
        _handle_jira_search_error(e, jql)
        raise  # Explicitly re-raise for type checker, should never reach here

    return [str(issue["id"]) for issue in response_json["issues"]], response_json.get(
        "nextPageToken"
    )


def _bulk_fetch_request(
    jira_client: JIRA, issue_ids: list[str], fields: str | None
) -> list[dict[str, Any]]:
    """Raw POST to the bulkfetch endpoint. Returns the list of raw issue dicts."""
    bulk_fetch_path = jira_client._get_url("issue/bulkfetch")
    # Prepare the payload according to Jira API v3 specification
    payload: dict[str, Any] = {"issueIdsOrKeys": issue_ids}
    # Only restrict fields if specified, might want to explicitly do this in the future
    # to avoid reading unnecessary data
    payload["fields"] = fields.split(",") if fields else ["*all"]

    resp = jira_client._session.post(bulk_fetch_path, json=payload)
    return resp.json()["issues"]


def bulk_fetch_issues(
    jira_client: JIRA, issue_ids: list[str], fields: str | None = None
) -> list[Issue]:
    # TODO(evan): move away from this jira library if they continue to not support
    # the endpoints we need. Using private fields is not ideal, but
    # is likely fine for now since we pin the library version

    try:
        raw_issues = _bulk_fetch_request(jira_client, issue_ids, fields)
    except requests.exceptions.JSONDecodeError:
        if len(issue_ids) <= 1:
            logger.exception(
                f"Jira bulk-fetch response for issue(s) {issue_ids} could not "
                f"be decoded as JSON (response too large or truncated)."
            )
            raise

        mid = len(issue_ids) // 2
        logger.warning(
            f"Jira bulk-fetch JSON decode failed for batch of {len(issue_ids)} issues. "
            f"Splitting into sub-batches of {mid} and {len(issue_ids) - mid}."
        )
        left = bulk_fetch_issues(jira_client, issue_ids[:mid], fields)
        right = bulk_fetch_issues(jira_client, issue_ids[mid:], fields)
        return left + right
    except Exception as e:
        logger.error(f"Error fetching issues: {e}")
        raise

    return [
        Issue(jira_client._options, jira_client._session, raw=issue)
        for issue in raw_issues
    ]


def _perform_jql_search_v3(
    jira_client: JIRA,
    jql: str,
    max_results: int,
    all_issue_ids: list[list[str]],
    fields: str | None = None,
    checkpoint_callback: (
        Callable[[Iterator[list[str]], str | None], None] | None
    ) = None,
    nextPageToken: str | None = None,
    ids_done: bool = False,
) -> Iterable[Issue]:
    """
    The way this works is we get all the issue ids and bulk fetch them in batches.
    However, for really large deployments we can't do these operations sequentially,
    as it might take several hours to fetch all the issue ids.

    So, each run of this function does at least one of:
     - fetch a batch of issue ids
     - bulk fetch a batch of issues

    If all_issue_ids is not None, we use it to bulk fetch issues.
    """

    # with some careful synchronization these steps can be done in parallel,
    # leaving that out for now to avoid rate limit issues
    if not ids_done:
        new_ids, pageToken = enhanced_search_ids(jira_client, jql, nextPageToken)
        if checkpoint_callback is not None:
            checkpoint_callback(chunked(new_ids, max_results), pageToken)

    # bulk fetch issues from ids. Note that the above callback MAY mutate all_issue_ids,
    # but this fetch always just takes the last id batch.
    if all_issue_ids:
        yield from bulk_fetch_issues(jira_client, all_issue_ids.pop(), fields)


def _perform_jql_search_v2(
    jira_client: JIRA,
    jql: str,
    start: int,
    max_results: int,
    fields: str | None = None,
) -> Iterable[Issue]:
    """
    Unfortunately, jira server/data center will forever use the v2 APIs that are now deprecated.
    """
    logger.debug(
        f"Fetching Jira issues with JQL: {jql}, starting at {start}, max results: {max_results}"
    )
    try:
        issues = jira_client.search_issues(
            jql_str=jql,
            startAt=start,
            maxResults=max_results,
            fields=fields,
        )
    except JIRAError as e:
        _handle_jira_search_error(e, jql)
        raise  # Explicitly re-raise for type checker, should never reach here

    for issue in issues:
        if isinstance(issue, Issue):
            yield issue
        else:
            raise RuntimeError(f"Found Jira object not of type Issue: {issue}")


def process_jira_issue(
    jira_base_url: str,
    issue: Issue,
    comment_email_blacklist: tuple[str, ...] = (),
    labels_to_skip: set[str] | None = None,
    parent_hierarchy_raw_node_id: str | None = None,
) -> Document | None:
    if labels_to_skip:
        if any(label in issue.fields.labels for label in labels_to_skip):
            logger.info(
                f"Skipping {issue.key} because it has a label to skip. Found "
                f"labels: {issue.fields.labels}. Labels to skip: {labels_to_skip}."
            )
            return None

    if isinstance(issue.fields.description, str):
        description = issue.fields.description
    else:
        description = extract_text_from_adf(issue.raw["fields"]["description"])

    comments = get_comment_strs(
        issue=issue,
        comment_email_blacklist=comment_email_blacklist,
    )
    ticket_content = f"{description}\n" + "\n".join(
        [f"Comment: {comment}" for comment in comments if comment]
    )

    # Check ticket size
    if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:
        logger.info(
            f"Skipping {issue.key} because it exceeds the maximum size of {JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes."
        )
        return None

    page_url = build_jira_url(jira_base_url, issue.key)

    metadata_dict: dict[str, str | list[str]] = {}
    people = set()

    creator = best_effort_get_field_from_issue(issue, _FIELD_REPORTER)
    if creator is not None and (
        basic_expert_info := best_effort_basic_expert_info(creator)
    ):
        people.add(basic_expert_info)
        metadata_dict[_FIELD_REPORTER] = basic_expert_info.get_semantic_name()
        if email := basic_expert_info.get_email():
            metadata_dict[_FIELD_REPORTER_EMAIL] = email

    assignee = best_effort_get_field_from_issue(issue, _FIELD_ASSIGNEE)
    if assignee is not None and (
        basic_expert_info := best_effort_basic_expert_info(assignee)
    ):
        people.add(basic_expert_info)
        metadata_dict[_FIELD_ASSIGNEE] = basic_expert_info.get_semantic_name()
        if email := basic_expert_info.get_email():
            metadata_dict[_FIELD_ASSIGNEE_EMAIL] = email

    metadata_dict[_FIELD_KEY] = issue.key
    if priority := best_effort_get_field_from_issue(issue, _FIELD_PRIORITY):
        metadata_dict[_FIELD_PRIORITY] = priority.name
    if status := best_effort_get_field_from_issue(issue, _FIELD_STATUS):
        metadata_dict[_FIELD_STATUS] = status.name
    if resolution := best_effort_get_field_from_issue(issue, _FIELD_RESOLUTION):
        metadata_dict[_FIELD_RESOLUTION] = resolution.name
    if labels := best_effort_get_field_from_issue(issue, _FIELD_LABELS):
        metadata_dict[_FIELD_LABELS] = labels
    if created := best_effort_get_field_from_issue(issue, _FIELD_CREATED):
        metadata_dict[_FIELD_CREATED] = created
    if updated := best_effort_get_field_from_issue(issue, _FIELD_UPDATED):
        metadata_dict[_FIELD_UPDATED] = updated
    if duedate := best_effort_get_field_from_issue(issue, _FIELD_DUEDATE):
        metadata_dict[_FIELD_DUEDATE] = duedate
    if issuetype := best_effort_get_field_from_issue(issue, _FIELD_ISSUETYPE):
        metadata_dict[_FIELD_ISSUETYPE] = issuetype.name
    if resolutiondate := best_effort_get_field_from_issue(
        issue, _FIELD_RESOLUTION_DATE
    ):
        metadata_dict[_FIELD_RESOLUTION_DATE_KEY] = resolutiondate

    parent = best_effort_get_field_from_issue(issue, _FIELD_PARENT)
    if parent is not None:
        metadata_dict[_FIELD_PARENT] = parent.key

    project = best_effort_get_field_from_issue(issue, _FIELD_PROJECT)
    if project is not None:
        metadata_dict[_FIELD_PROJECT_NAME] = project.name
        metadata_dict[_FIELD_PROJECT] = project.key
    else:
        logger.error(f"Project should exist but does not for {issue.key}")

    return Document(
        id=page_url,
        sections=[TextSection(link=page_url, text=ticket_content)],
        source=DocumentSource.JIRA,
        semantic_identifier=f"{issue.key}: {issue.fields.summary}",
        title=f"{issue.key} {issue.fields.summary}",
        doc_updated_at=time_str_to_utc(issue.fields.updated),
        primary_owners=list(people) or None,
        metadata=metadata_dict,
        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
    )


class JiraConnectorCheckpoint(ConnectorCheckpoint):
    # used for v3 (cloud) endpoint
    all_issue_ids: list[list[str]] = []
    ids_done: bool = False
    cursor: str | None = None
    # deprecated
    # Used for v2 endpoint (server/data center)
    offset: int | None = None
    # Track hierarchy nodes we've already yielded to avoid duplicates across restarts
    seen_hierarchy_node_ids: list[str] = []


class JiraConnector(
    CheckpointedConnectorWithPermSync[JiraConnectorCheckpoint],
    SlimConnectorWithPermSync,
):
    def __init__(
        self,
        jira_base_url: str,
        project_key: str | None = None,
        comment_email_blacklist: list[str] | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
        # if a ticket has one of the labels specified in this list, we will just
        # skip it. This is generally used to avoid indexing extra sensitive
        # tickets.
        labels_to_skip: list[str] = JIRA_CONNECTOR_LABELS_TO_SKIP,
        # Custom JQL query to filter Jira issues
        jql_query: str | None = None,
        scoped_token: bool = False,
    ) -> None:
        self.batch_size = batch_size

        # dealing with scoped tokens is a bit tricky becasue we need to hit api.atlassian.net
        # when making jira requests but still want correct links to issues in the UI.
        # So, the user's base url is stored here, but converted to a scoped url when passed
        # to the jira client.
        self.jira_base = jira_base_url.rstrip("/")  # Remove trailing slash if present
        self.jira_project = project_key
        self._comment_email_blacklist = comment_email_blacklist or []
        self.labels_to_skip = set(labels_to_skip)
        self.jql_query = jql_query
        self.scoped_token = scoped_token
        self._jira_client: JIRA | None = None
        # Cache project permissions to avoid fetching them repeatedly across runs
        self._project_permissions_cache: dict[str, Any] = {}

    @property
    def comment_email_blacklist(self) -> tuple:
        return tuple(email.strip() for email in self._comment_email_blacklist)

    @property
    def jira_client(self) -> JIRA:
        if self._jira_client is None:
            raise ConnectorMissingCredentialError("Jira")
        return self._jira_client

    @property
    def quoted_jira_project(self) -> str:
        # Quote the project name to handle reserved words
        if not self.jira_project:
            return ""
        return f'"{self.jira_project}"'

    def _get_project_permissions(
        self, project_key: str, add_prefix: bool = False
    ) -> Any:
        """Get project permissions with caching.

        Args:
            project_key: The Jira project key
            add_prefix: When True, prefix group IDs with source type (for indexing path).
                       When False (default), leave unprefixed (for permission sync path).

        Returns:
            The external access permissions for the project
        """
        # Use different cache keys for prefixed vs unprefixed to avoid mixing
        cache_key = f"{project_key}:{'prefixed' if add_prefix else 'unprefixed'}"
        if cache_key not in self._project_permissions_cache:
            self._project_permissions_cache[cache_key] = get_project_permissions(
                jira_client=self.jira_client,
                jira_project=project_key,
                add_prefix=add_prefix,
            )
        return self._project_permissions_cache[cache_key]

    def _is_epic(self, issue: Issue) -> bool:
        """Check if issue is an Epic."""
        issuetype = best_effort_get_field_from_issue(issue, _FIELD_ISSUETYPE)
        if issuetype is None:
            return False
        return issuetype.name.lower() == "epic"

    def _is_parent_epic(self, parent: Any) -> bool:
        """Check if a parent reference is an Epic.

        The parent object from issue.fields.parent has a different structure
        than a full Issue, so we handle it separately.
        """
        parent_issuetype = (
            getattr(parent.fields, "issuetype", None)
            if hasattr(parent, "fields")
            else None
        )
        if parent_issuetype is None:
            return False
        return parent_issuetype.name.lower() == "epic"

    def _yield_project_hierarchy_node(
        self,
        project_key: str,
        project_name: str | None,
        seen_hierarchy_node_ids: set[str],
    ) -> Generator[HierarchyNode, None, None]:
        """Yield a hierarchy node for a project if not already yielded."""
        if project_key in seen_hierarchy_node_ids:
            return

        seen_hierarchy_node_ids.add(project_key)

        yield HierarchyNode(
            raw_node_id=project_key,
            raw_parent_id=None,  # Parent is SOURCE
            display_name=project_name or project_key,
            link=f"{self.jira_base}/projects/{project_key}",
            node_type=HierarchyNodeType.PROJECT,
        )

    def _yield_epic_hierarchy_node(
        self,
        issue: Issue,
        project_key: str,
        seen_hierarchy_node_ids: set[str],
    ) -> Generator[HierarchyNode, None, None]:
        """Yield a hierarchy node for an Epic issue."""
        issue_key = issue.key
        if issue_key in seen_hierarchy_node_ids:
            return

        seen_hierarchy_node_ids.add(issue_key)

        yield HierarchyNode(
            raw_node_id=issue_key,
            raw_parent_id=project_key,
            display_name=f"{issue_key}: {issue.fields.summary}",
            link=build_jira_url(self.jira_base, issue_key),
            node_type=HierarchyNodeType.FOLDER,  # don't have a separate epic node type
        )

    def _yield_parent_hierarchy_node_if_epic(
        self,
        parent: Any,
        project_key: str,
        seen_hierarchy_node_ids: set[str],
    ) -> Generator[HierarchyNode, None, None]:
        """Yield hierarchy node for parent issue if it's an Epic we haven't seen."""
        parent_key = parent.key
        if parent_key in seen_hierarchy_node_ids:
            return

        if not self._is_parent_epic(parent):
            # Not an epic, don't create hierarchy node for it
            return

        seen_hierarchy_node_ids.add(parent_key)

        # Get summary if available
        parent_summary = (
            getattr(parent.fields, "summary", None)
            if hasattr(parent, "fields")
            else None
        )
        display_name = (
            f"{parent_key}: {parent_summary}" if parent_summary else parent_key
        )

        yield HierarchyNode(
            raw_node_id=parent_key,
            raw_parent_id=project_key,
            display_name=display_name,
            link=build_jira_url(self.jira_base, parent_key),
            node_type=HierarchyNodeType.FOLDER,  # don't have a separate epic node type
        )

    def _get_parent_hierarchy_raw_node_id(self, issue: Issue, project_key: str) -> str:
        """Determine the parent hierarchy node ID for an issue.

        Returns:
            - Epic key if issue's parent is an Epic
            - Project key otherwise (for top-level issues or non-epic parents)
        """
        parent = best_effort_get_field_from_issue(issue, _FIELD_PARENT)
        if parent is None:
            # No parent, directly under project
            return project_key

        if self._is_parent_epic(parent):
            return parent.key

        # For non-epic parents (e.g., story with subtasks),
        # the document belongs directly under the project in the hierarchy
        return project_key

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self._jira_client = build_jira_client(
            credentials=credentials,
            jira_base=self.jira_base,
            scoped_token=self.scoped_token,
        )
        return None

    def _get_jql_query(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> str:
        """Get the JQL query based on configuration and time range

        If a custom JQL query is provided, it will be used and combined with time constraints.
        Otherwise, the query will be constructed based on project key (if provided).
        """
        start_date_str = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
            "%Y-%m-%d %H:%M"
        )
        end_date_str = datetime.fromtimestamp(end, tz=timezone.utc).strftime(
            "%Y-%m-%d %H:%M"
        )

        time_jql = f"updated >= '{start_date_str}' AND updated <= '{end_date_str}'"

        # If custom JQL query is provided, use it and combine with time constraints
        if self.jql_query:
            return f"({self.jql_query}) AND {time_jql}"

        # Otherwise, use project key if provided
        if self.jira_project:
            base_jql = f"project = {self.quoted_jira_project}"
            return f"{base_jql} AND {time_jql}"

        return time_jql

    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: JiraConnectorCheckpoint,
    ) -> CheckpointOutput[JiraConnectorCheckpoint]:
        jql = self._get_jql_query(start, end)
        try:
            return self._load_from_checkpoint(
                jql, checkpoint, include_permissions=False
            )
        except Exception as e:
            if is_atlassian_date_error(e):
                jql = self._get_jql_query(start - ONE_HOUR, end)
                return self._load_from_checkpoint(
                    jql, checkpoint, include_permissions=False
                )
            raise e

    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: JiraConnectorCheckpoint,
    ) -> CheckpointOutput[JiraConnectorCheckpoint]:
        """Load documents from checkpoint with permission information included."""
        jql = self._get_jql_query(start, end)
        try:
            return self._load_from_checkpoint(jql, checkpoint, include_permissions=True)
        except Exception as e:
            if is_atlassian_date_error(e):
                jql = self._get_jql_query(start - ONE_HOUR, end)
                return self._load_from_checkpoint(
                    jql, checkpoint, include_permissions=True
                )
            raise e

    def _load_from_checkpoint(
        self, jql: str, checkpoint: JiraConnectorCheckpoint, include_permissions: bool
    ) -> CheckpointOutput[JiraConnectorCheckpoint]:
        # Get the current offset from checkpoint or start at 0
        starting_offset = checkpoint.offset or 0
        current_offset = starting_offset
        new_checkpoint = copy.deepcopy(checkpoint)

        # Convert checkpoint list to set for efficient lookups
        seen_hierarchy_node_ids = set(new_checkpoint.seen_hierarchy_node_ids)

        checkpoint_callback = make_checkpoint_callback(new_checkpoint)

        for issue in _perform_jql_search(
            jira_client=self.jira_client,
            jql=jql,
            start=current_offset,
            max_results=_JIRA_FULL_PAGE_SIZE,
            all_issue_ids=new_checkpoint.all_issue_ids,
            checkpoint_callback=checkpoint_callback,
            nextPageToken=new_checkpoint.cursor,
            ids_done=new_checkpoint.ids_done,
        ):
            issue_key = issue.key
            try:
                # Get project info for hierarchy
                project = best_effort_get_field_from_issue(issue, _FIELD_PROJECT)
                project_key = project.key if project else None
                project_name = project.name if project else None

                # Yield hierarchy nodes BEFORE the document (parent-before-child)
                if project_key:
                    # 1. Yield project hierarchy node (if not already yielded)
                    yield from self._yield_project_hierarchy_node(
                        project_key, project_name, seen_hierarchy_node_ids
                    )

                    # 2. If parent is an Epic, yield hierarchy node for it
                    parent = best_effort_get_field_from_issue(issue, _FIELD_PARENT)
                    if parent:
                        yield from self._yield_parent_hierarchy_node_if_epic(
                            parent, project_key, seen_hierarchy_node_ids
                        )

                    # 3. If this issue IS an Epic, yield it as hierarchy node
                    if self._is_epic(issue):
                        yield from self._yield_epic_hierarchy_node(
                            issue, project_key, seen_hierarchy_node_ids
                        )

                # Determine parent hierarchy node ID for the document
                parent_hierarchy_raw_node_id = (
                    self._get_parent_hierarchy_raw_node_id(issue, project_key)
                    if project_key
                    else None
                )

                if document := process_jira_issue(
                    jira_base_url=self.jira_base,
                    issue=issue,
                    comment_email_blacklist=self.comment_email_blacklist,
                    labels_to_skip=self.labels_to_skip,
                    parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
                ):
                    # Add permission information to the document if requested
                    if include_permissions:
                        document.external_access = self._get_project_permissions(
                            project_key,
                            add_prefix=True,  # Indexing path - prefix here
                        )
                    yield document

            except Exception as e:
                yield ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=issue_key,
                        document_link=build_jira_url(self.jira_base, issue_key),
                    ),
                    failure_message=f"Failed to process Jira issue: {str(e)}",
                    exception=e,
                )

            current_offset += 1

        # Update checkpoint with seen hierarchy nodes
        new_checkpoint.seen_hierarchy_node_ids = list(seen_hierarchy_node_ids)

        # Update checkpoint
        self.update_checkpoint_for_next_run(
            new_checkpoint, current_offset, starting_offset, _JIRA_FULL_PAGE_SIZE
        )

        return new_checkpoint

    def update_checkpoint_for_next_run(
        self,
        checkpoint: JiraConnectorCheckpoint,
        current_offset: int,
        starting_offset: int,
        page_size: int,
    ) -> None:
        if _is_cloud_client(self.jira_client):
            # other updates done in the checkpoint callback
            checkpoint.has_more = (
                len(checkpoint.all_issue_ids) > 0 or not checkpoint.ids_done
            )
        else:
            checkpoint.offset = current_offset
            # if we didn't retrieve a full batch, we're done
            checkpoint.has_more = current_offset - starting_offset == page_size

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002
    ) -> GenerateSlimDocumentOutput:
        one_day = timedelta(hours=24).total_seconds()

        start = start or 0
        end = (
            end or datetime.now().timestamp() + one_day
        )  # we add one day to account for any potential timezone issues

        jql = self._get_jql_query(start, end)
        checkpoint = self.build_dummy_checkpoint()
        checkpoint_callback = make_checkpoint_callback(checkpoint)
        prev_offset = 0
        current_offset = 0
        slim_doc_batch: list[SlimDocument | HierarchyNode] = []

        # Track seen hierarchy nodes within this sync run
        seen_hierarchy_node_ids: set[str] = set()

        while checkpoint.has_more:
            for issue in _perform_jql_search(
                jira_client=self.jira_client,
                jql=jql,
                start=current_offset,
                max_results=JIRA_SLIM_PAGE_SIZE,
                all_issue_ids=checkpoint.all_issue_ids,
                checkpoint_callback=checkpoint_callback,
                nextPageToken=checkpoint.cursor,
                ids_done=checkpoint.ids_done,
            ):
                # Get project info
                project = best_effort_get_field_from_issue(issue, _FIELD_PROJECT)
                project_key = project.key if project else None
                project_name = project.name if project else None

                if not project_key:
                    continue

                # Yield hierarchy nodes BEFORE the slim document (parent-before-child)
                # 1. Yield project hierarchy node (if not already yielded)
                for node in self._yield_project_hierarchy_node(
                    project_key, project_name, seen_hierarchy_node_ids
                ):
                    slim_doc_batch.append(node)

                # 2. If parent is an Epic, yield hierarchy node for it
                parent = best_effort_get_field_from_issue(issue, _FIELD_PARENT)
                if parent:
                    for node in self._yield_parent_hierarchy_node_if_epic(
                        parent, project_key, seen_hierarchy_node_ids
                    ):
                        slim_doc_batch.append(node)

                # 3. If this issue IS an Epic, yield it as hierarchy node
                if self._is_epic(issue):
                    for node in self._yield_epic_hierarchy_node(
                        issue, project_key, seen_hierarchy_node_ids
                    ):
                        slim_doc_batch.append(node)

                # Now add the slim document
                issue_key = best_effort_get_field_from_issue(issue, _FIELD_KEY)
                doc_id = build_jira_url(self.jira_base, issue_key)

                slim_doc_batch.append(
                    SlimDocument(
                        id=doc_id,
                        # Permission sync path - don't prefix, upsert_document_external_perms handles it
                        external_access=self._get_project_permissions(
                            project_key, add_prefix=False
                        ),
                        parent_hierarchy_raw_node_id=(
                            self._get_parent_hierarchy_raw_node_id(issue, project_key)
                            if project_key
                            else None
                        ),
                    )
                )
                current_offset += 1
                if len(slim_doc_batch) >= JIRA_SLIM_PAGE_SIZE:
                    yield slim_doc_batch
                    slim_doc_batch = []
            self.update_checkpoint_for_next_run(
                checkpoint, current_offset, prev_offset, JIRA_SLIM_PAGE_SIZE
            )
            prev_offset = current_offset

        if slim_doc_batch:
            yield slim_doc_batch

    def validate_connector_settings(self) -> None:
        if self._jira_client is None:
            raise ConnectorMissingCredentialError("Jira")

        # If a custom JQL query is set, validate it's valid
        if self.jql_query:
            try:
                # Try to execute the JQL query with a small limit to validate its syntax
                # Use next(iter(...), None) to get just the first result without
                # forcing evaluation of all results
                next(
                    iter(
                        _perform_jql_search(
                            jira_client=self.jira_client,
                            jql=self.jql_query,
                            start=0,
                            max_results=1,
                            all_issue_ids=[],
                        )
                    ),
                    None,
                )
            except Exception as e:
                self._handle_jira_connector_settings_error(e)

        # If a specific project is set, validate it exists
        elif self.jira_project:
            try:
                self.jira_client.project(self.jira_project)
            except Exception as e:
                self._handle_jira_connector_settings_error(e)
        else:
            # If neither JQL nor project specified, validate we can access the Jira API
            try:
                # Try to list projects to validate access
                self.jira_client.projects()
            except Exception as e:
                self._handle_jira_connector_settings_error(e)

    def _handle_jira_connector_settings_error(self, e: Exception) -> None:
        """Helper method to handle Jira API errors consistently.

        Extracts error messages from the Jira API response for all status codes when possible,
        providing more user-friendly error messages.

        Args:
            e: The exception raised by the Jira API

        Raises:
            CredentialExpiredError: If the status code is 401
            InsufficientPermissionsError: If the status code is 403
            ConnectorValidationError: For other HTTP errors with extracted error messages
        """
        status_code = getattr(e, "status_code", None)
        logger.error(f"Jira API error during validation: {e}")

        # Handle specific status codes with appropriate exceptions
        if status_code == 401:
            raise CredentialExpiredError(
                "Jira credential appears to be expired or invalid (HTTP 401)."
            )
        elif status_code == 403:
            raise InsufficientPermissionsError(
                "Your Jira token does not have sufficient permissions for this configuration (HTTP 403)."
            )
        elif status_code == 429:
            raise ConnectorValidationError(
                "Validation failed due to Jira rate-limits being exceeded. Please try again later."
            )

        # Try to extract original error message from the response
        error_message = getattr(e, "text", None)
        if error_message is None:
            raise UnexpectedValidationError(
                f"Unexpected Jira error during validation: {e}"
            )

        raise ConnectorValidationError(
            f"Validation failed due to Jira error: {error_message}"
        )

    @override
    def validate_checkpoint_json(self, checkpoint_json: str) -> JiraConnectorCheckpoint:
        return JiraConnectorCheckpoint.model_validate_json(checkpoint_json)

    @override
    def build_dummy_checkpoint(self) -> JiraConnectorCheckpoint:
        return JiraConnectorCheckpoint(
            has_more=True,
        )


def make_checkpoint_callback(
    checkpoint: JiraConnectorCheckpoint,
) -> Callable[[Iterator[list[str]], str | None], None]:
    def checkpoint_callback(
        issue_ids: Iterator[list[str]], pageToken: str | None
    ) -> None:
        for id_batch in issue_ids:
            checkpoint.all_issue_ids.append(id_batch)
        checkpoint.cursor = pageToken
        # pageToken starts out as None and is only None once we've fetched all the issue ids
        checkpoint.ids_done = pageToken is None

    return checkpoint_callback


if __name__ == "__main__":
    import os
    from onyx.utils.variable_functionality import global_version
    from tests.daily.connectors.utils import load_all_from_connector

    # For connector permission testing, set EE to true.
    global_version.set_ee()

    connector = JiraConnector(
        jira_base_url=os.environ["JIRA_BASE_URL"],
        project_key=os.environ.get("JIRA_PROJECT_KEY"),
        comment_email_blacklist=[],
    )

    connector.load_credentials(
        {
            "jira_user_email": os.environ["JIRA_USER_EMAIL"],
            "jira_api_token": os.environ["JIRA_API_TOKEN"],
        }
    )

    start = 0
    end = datetime.now().timestamp()

    for slim_doc in connector.retrieve_all_slim_docs_perm_sync(
        start=start,
        end=end,
    ):
        print(slim_doc)

    for doc in load_all_from_connector(
        connector=connector,
        start=start,
        end=end,
    ).documents:
        print(doc)


================================================
FILE: backend/onyx/connectors/jira/utils.py
================================================
"""Module with custom fields processing functions"""

import os
from typing import Any
from typing import List
from urllib.parse import urlparse

from jira import JIRA
from jira.resources import CustomFieldOption
from jira.resources import Issue
from jira.resources import User

from onyx.connectors.cross_connector_utils.miscellaneous_utils import scoped_url
from onyx.connectors.models import BasicExpertInfo
from onyx.utils.logger import setup_logger

logger = setup_logger()


PROJECT_URL_PAT = "projects"
JIRA_SERVER_API_VERSION = os.environ.get("JIRA_SERVER_API_VERSION") or "2"
JIRA_CLOUD_API_VERSION = os.environ.get("JIRA_CLOUD_API_VERSION") or "3"


def best_effort_basic_expert_info(obj: Any) -> BasicExpertInfo | None:
    display_name = None
    email = None

    try:
        if hasattr(obj, "displayName"):
            display_name = obj.displayName
        else:
            display_name = obj.get("displayName")

        if hasattr(obj, "emailAddress"):
            email = obj.emailAddress
        else:
            email = obj.get("emailAddress")

    except Exception:
        return None

    if not email and not display_name:
        return None

    return BasicExpertInfo(display_name=display_name, email=email)


def best_effort_get_field_from_issue(jira_issue: Issue, field: str) -> Any:
    if hasattr(jira_issue, field):
        return getattr(jira_issue, field)

    if hasattr(jira_issue, "fields") and hasattr(jira_issue.fields, field):
        return getattr(jira_issue.fields, field)

    try:
        return jira_issue.raw["fields"][field]
    except Exception:
        return None


def extract_text_from_adf(adf: dict | None) -> str:
    """Extracts plain text from Atlassian Document Format:
    https://developer.atlassian.com/cloud/jira/platform/apis/document/structure/

    WARNING: This function is incomplete and will e.g. skip lists!
    """
    # TODO: complete this function
    texts = []
    if adf is not None and "content" in adf:
        for block in adf["content"]:
            if "content" in block:
                for item in block["content"]:
                    if item["type"] == "text":
                        texts.append(item["text"])
    return " ".join(texts)


def build_jira_url(jira_base_url: str, issue_key: str) -> str:
    """
    Get the url used to access an issue in the UI.
    """
    return f"{jira_base_url}/browse/{issue_key}"


def build_jira_client(
    credentials: dict[str, Any], jira_base: str, scoped_token: bool = False
) -> JIRA:

    jira_base = scoped_url(jira_base, "jira") if scoped_token else jira_base
    api_token = credentials["jira_api_token"]
    # if user provide an email we assume it's cloud
    if "jira_user_email" in credentials:
        email = credentials["jira_user_email"]
        return JIRA(
            basic_auth=(email, api_token),
            server=jira_base,
            options={"rest_api_version": JIRA_CLOUD_API_VERSION},
        )
    else:
        return JIRA(
            token_auth=api_token,
            server=jira_base,
            options={"rest_api_version": JIRA_SERVER_API_VERSION},
        )


def extract_jira_project(url: str) -> tuple[str, str]:
    parsed_url = urlparse(url)
    jira_base = parsed_url.scheme + "://" + parsed_url.netloc

    # Split the path by '/' and find the position of 'projects' to get the project name
    split_path = parsed_url.path.split("/")
    if PROJECT_URL_PAT in split_path:
        project_pos = split_path.index(PROJECT_URL_PAT)
        if len(split_path) > project_pos + 1:
            jira_project = split_path[project_pos + 1]
        else:
            raise ValueError("No project name found in the URL")
    else:
        raise ValueError("'projects' not found in the URL")

    return jira_base, jira_project


def get_comment_strs(
    issue: Issue, comment_email_blacklist: tuple[str, ...] = ()
) -> list[str]:
    comment_strs = []
    for comment in issue.fields.comment.comments:
        try:
            if isinstance(comment.body, str):
                body_text = comment.body
            else:
                body_text = extract_text_from_adf(comment.raw["body"])

            if (
                hasattr(comment, "author")
                and hasattr(comment.author, "emailAddress")
                and comment.author.emailAddress in comment_email_blacklist
            ):
                continue  # Skip adding comment if author's email is in blacklist

            comment_strs.append(body_text)
        except Exception as e:
            logger.error(f"Failed to process comment due to an error: {e}")
            continue

    return comment_strs


def get_jira_project_key_from_issue(issue: Issue) -> str | None:
    if not hasattr(issue, "fields"):
        return None
    if not hasattr(issue.fields, "project"):
        return None
    if not hasattr(issue.fields.project, "key"):
        return None

    return issue.fields.project.key


class CustomFieldExtractor:
    @staticmethod
    def _process_custom_field_value(value: Any) -> str:
        """
        Process a custom field value to a string
        """
        try:
            if isinstance(value, str):
                return value
            elif isinstance(value, CustomFieldOption):
                return value.value
            elif isinstance(value, User):
                return value.displayName
            elif isinstance(value, List):
                return " ".join(
                    [CustomFieldExtractor._process_custom_field_value(v) for v in value]
                )
            else:
                return str(value)
        except Exception as e:
            logger.error(f"Error processing custom field value {value}: {e}")
            return ""

    @staticmethod
    def get_issue_custom_fields(
        jira: Issue, custom_fields: dict, max_value_length: int = 250
    ) -> dict:
        """
        Process all custom fields of an issue to a dictionary of strings
        :param jira: jira_issue, bug or similar
        :param custom_fields: custom fields dictionary
        :param max_value_length: maximum length of the value to be processed, if exceeded, it will be truncated
        """

        issue_custom_fields = {
            custom_fields[key]: value
            for key, value in jira.fields.__dict__.items()
            if value and key in custom_fields.keys()
        }

        processed_fields = {}

        if issue_custom_fields:
            for key, value in issue_custom_fields.items():
                processed = CustomFieldExtractor._process_custom_field_value(value)
                # We need max length  parameter, because there are some plugins that often has very long description
                # and there is just a technical information so we just avoid long values
                if len(processed) < max_value_length:
                    processed_fields[key] = processed

        return processed_fields

    @staticmethod
    def get_all_custom_fields(jira_client: JIRA) -> dict:
        """Get all custom fields from Jira"""
        fields = jira_client.fields()
        fields_dct = {
            field["id"]: field["name"] for field in fields if field["custom"] is True
        }
        return fields_dct


class CommonFieldExtractor:
    @staticmethod
    def get_issue_common_fields(jira: Issue) -> dict:
        return {
            "Priority": jira.fields.priority.name if jira.fields.priority else None,
            "Reporter": (
                jira.fields.reporter.displayName if jira.fields.reporter else None
            ),
            "Assignee": (
                jira.fields.assignee.displayName if jira.fields.assignee else None
            ),
            "Status": jira.fields.status.name if jira.fields.status else None,
            "Resolution": (
                jira.fields.resolution.name if jira.fields.resolution else None
            ),
        }


================================================
FILE: backend/onyx/connectors/linear/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/linear/connector.py
================================================
import os
import re
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast
from urllib.parse import urlparse

import requests
from typing_extensions import override

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import LINEAR_CLIENT_ID
from onyx.configs.app_configs import LINEAR_CLIENT_SECRET
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_oauth_callback_uri,
)
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import NormalizationResult
from onyx.connectors.interfaces import OAuthConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import request_with_retries


logger = setup_logger()

_NUM_RETRIES = 5
_TIMEOUT = 60
_LINEAR_GRAPHQL_URL = "https://api.linear.app/graphql"


def _make_query(request_body: dict[str, Any], api_key: str) -> requests.Response:
    headers = {
        "Authorization": api_key,
        "Content-Type": "application/json",
    }

    for i in range(_NUM_RETRIES):
        try:
            response = requests.post(
                _LINEAR_GRAPHQL_URL,
                headers=headers,
                json=request_body,
                timeout=_TIMEOUT,
            )
            if not response.ok:
                raise RuntimeError(
                    f"Error fetching issues from Linear: {response.text}"
                )

            return response
        except Exception as e:
            if i == _NUM_RETRIES - 1:
                raise e

            logger.warning(f"A Linear GraphQL error occurred: {e}. Retrying...")

    raise RuntimeError(
        "Unexpected execution when querying Linear. This should never happen."
    )


class LinearConnector(LoadConnector, PollConnector, OAuthConnector):
    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.batch_size = batch_size
        self.linear_api_key: str | None = None

    @classmethod
    def oauth_id(cls) -> DocumentSource:
        return DocumentSource.LINEAR

    @classmethod
    def oauth_authorization_url(
        cls,
        base_domain: str,
        state: str,
        additional_kwargs: dict[str, str],  # noqa: ARG003
    ) -> str:
        if not LINEAR_CLIENT_ID:
            raise ValueError("LINEAR_CLIENT_ID environment variable must be set")

        callback_uri = get_oauth_callback_uri(base_domain, DocumentSource.LINEAR.value)
        return (
            f"https://linear.app/oauth/authorize"
            f"?client_id={LINEAR_CLIENT_ID}"
            f"&redirect_uri={callback_uri}"
            f"&response_type=code"
            f"&scope=read"
            f"&state={state}"
            f"&prompt=consent"  # prompts user for access; allows choosing workspace
        )

    @classmethod
    def oauth_code_to_token(
        cls,
        base_domain: str,
        code: str,
        additional_kwargs: dict[str, str],  # noqa: ARG003
    ) -> dict[str, Any]:
        data = {
            "code": code,
            "redirect_uri": get_oauth_callback_uri(
                base_domain, DocumentSource.LINEAR.value
            ),
            "client_id": LINEAR_CLIENT_ID,
            "client_secret": LINEAR_CLIENT_SECRET,
            "grant_type": "authorization_code",
        }
        headers = {"Content-Type": "application/x-www-form-urlencoded"}

        response = request_with_retries(
            method="POST",
            url="https://api.linear.app/oauth/token",
            data=data,
            headers=headers,
            backoff=0,
            delay=0.1,
        )
        if not response.ok:
            raise RuntimeError(f"Failed to exchange code for token: {response.text}")

        token_data = response.json()

        return {
            "access_token": token_data["access_token"],
        }

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        if "linear_api_key" in credentials:
            self.linear_api_key = cast(str, credentials["linear_api_key"])
        elif "access_token" in credentials:
            self.linear_api_key = "Bearer " + cast(str, credentials["access_token"])
        else:
            # May need to handle case in the future if the OAuth flow expires
            raise ConnectorMissingCredentialError("Linear")

        return None

    def _process_issues(
        self, start_str: datetime | None = None, end_str: datetime | None = None
    ) -> GenerateDocumentsOutput:
        if self.linear_api_key is None:
            raise ConnectorMissingCredentialError("Linear")

        lte_filter = f'lte: "{end_str}"' if end_str else ""
        gte_filter = f'gte: "{start_str}"' if start_str else ""
        updatedAtFilter = f"""
            {lte_filter}
            {gte_filter}
        """

        query = (
            """
            query IterateIssueBatches($first: Int, $after: String) {
                issues(
                    orderBy: updatedAt,
                    first: $first,
                    after: $after,
                    filter: {
                        updatedAt: {
        """
            + updatedAtFilter
            + """
                        },

                    }
                ) {
                    edges {
                        node {
                            id
                            createdAt
                            updatedAt
                            archivedAt
                            number
                            title
                            priority
                            estimate
                            sortOrder
                            startedAt
                            completedAt
                            startedTriageAt
                            triagedAt
                            canceledAt
                            autoClosedAt
                            autoArchivedAt
                            dueDate
                            slaStartedAt
                            slaBreachesAt
                            trashed
                            snoozedUntilAt
                            team {
                                name
                            }
                            creator {
                                name
                                email
                            }
                            assignee {
                                name
                                email
                            }
                            previousIdentifiers
                            subIssueSortOrder
                            priorityLabel
                            identifier
                            url
                            branchName
                            state {
                                id
                                name
                            }
                            customerTicketCount
                            description
                            comments {
                                nodes {
                                    url
                                    body
                                }
                            }
                        }
                    }
                    pageInfo {
                        hasNextPage
                        endCursor
                    }
                }
            }
        """
        )

        has_more = True
        endCursor = None
        while has_more:
            graphql_query = {
                "query": query,
                "variables": {
                    "first": self.batch_size,
                    "after": endCursor,
                },
            }
            logger.debug(f"Requesting issues from Linear with query: {graphql_query}")

            response = _make_query(graphql_query, self.linear_api_key)
            response_json = response.json()
            logger.debug(f"Raw response from Linear: {response_json}")
            edges = response_json["data"]["issues"]["edges"]

            documents: list[Document | HierarchyNode] = []
            for edge in edges:
                node = edge["node"]
                # Create sections for description and comments
                sections = [
                    TextSection(
                        link=node["url"],
                        text=node["description"] or "",
                    )
                ]

                # Add comment sections
                for comment in node["comments"]["nodes"]:
                    sections.append(
                        TextSection(
                            link=node["url"],
                            text=comment["body"] or "",
                        )
                    )

                # Cast the sections list to the expected type
                typed_sections = cast(list[TextSection | ImageSection], sections)

                # Extract team name for hierarchy
                team_name = (node.get("team") or {}).get("name") or "Unknown Team"
                identifier = node.get("identifier", node["id"])

                documents.append(
                    Document(
                        id=node["id"],
                        sections=typed_sections,
                        source=DocumentSource.LINEAR,
                        semantic_identifier=f"[{node['identifier']}] {node['title']}",
                        title=node["title"],
                        doc_updated_at=time_str_to_utc(node["updatedAt"]),
                        doc_metadata={
                            "hierarchy": {
                                "source_path": [team_name],
                                "team_name": team_name,
                                "identifier": identifier,
                            }
                        },
                        metadata={
                            k: str(v)
                            for k, v in {
                                "team": (node.get("team") or {}).get("name"),
                                "creator": node.get("creator"),
                                "assignee": node.get("assignee"),
                                "state": (node.get("state") or {}).get("name"),
                                "priority": node.get("priority"),
                                "estimate": node.get("estimate"),
                                "started_at": node.get("startedAt"),
                                "completed_at": node.get("completedAt"),
                                "created_at": node.get("createdAt"),
                                "due_date": node.get("dueDate"),
                            }.items()
                            if v is not None
                        },
                    )
                )
            yield documents

            endCursor = response_json["data"]["issues"]["pageInfo"]["endCursor"]
            has_more = response_json["data"]["issues"]["pageInfo"]["hasNextPage"]

    def load_from_state(self) -> GenerateDocumentsOutput:
        yield from self._process_issues()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_time = datetime.fromtimestamp(start, tz=timezone.utc)
        end_time = datetime.fromtimestamp(end, tz=timezone.utc)

        yield from self._process_issues(start_str=start_time, end_str=end_time)

    @classmethod
    @override
    def normalize_url(cls, url: str) -> NormalizationResult:
        """Extract Linear issue identifier from URL.

        Linear URLs are like: https://linear.app/team/issue/IDENTIFIER/...
        Returns the identifier (e.g., "DAN-2327") which can be used to match Document.link.
        """
        parsed = urlparse(url)
        netloc = parsed.netloc.lower()

        if "linear.app" not in netloc:
            return NormalizationResult(normalized_url=None, use_default=False)

        # Extract identifier from path: /team/issue/IDENTIFIER/...
        # Pattern: /{team}/issue/{identifier}/...
        path_parts = [p for p in parsed.path.split("/") if p]
        if len(path_parts) >= 3 and path_parts[1] == "issue":
            identifier = path_parts[2]
            # Validate identifier format (e.g., "DAN-2327")
            if re.match(r"^[A-Z]+-\d+$", identifier):
                return NormalizationResult(normalized_url=identifier, use_default=False)

        return NormalizationResult(normalized_url=None, use_default=False)


if __name__ == "__main__":
    connector = LinearConnector()
    connector.load_credentials({"linear_api_key": os.environ["LINEAR_API_KEY"]})

    document_batches = connector.load_from_state()
    print(next(document_batches))


================================================
FILE: backend/onyx/connectors/loopio/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/loopio/connector.py
================================================
import json
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any

from oauthlib.oauth2 import BackendApplicationClient
from requests_oauthlib import OAuth2Session  # type: ignore

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.file_processing.html_utils import strip_excessive_newlines_and_spaces
from onyx.utils.logger import setup_logger

LOOPIO_API_BASE = "https://api.loopio.com/"
LOOPIO_AUTH_URL = LOOPIO_API_BASE + "oauth2/access_token"
LOOPIO_DATA_URL = LOOPIO_API_BASE + "data/"

logger = setup_logger()


class LoopioConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        loopio_stack_name: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.batch_size = batch_size
        self.loopio_client_id: str | None = None
        self.loopio_client_token: str | None = None
        self.loopio_stack_name = loopio_stack_name

    def _fetch_data(
        self, resource: str, params: dict[str, str | int]
    ) -> Generator[dict[str, Any], None, None]:
        client = BackendApplicationClient(
            client_id=self.loopio_client_id, scope=["library:read"]
        )
        session = OAuth2Session(client=client)
        session.fetch_token(
            token_url=LOOPIO_AUTH_URL,
            client_id=self.loopio_client_id,
            client_secret=self.loopio_client_token,
        )
        page = 0
        stop_at_page = 1
        while (page := page + 1) <= stop_at_page:
            params["page"] = page
            response = session.request(
                "GET",
                LOOPIO_DATA_URL + resource,
                headers={"Accept": "application/json"},
                params=params,
            )
            if response.status_code == 400:
                logger.error(
                    f"Loopio API returned 400 for {resource} with params {params}",
                )
                logger.error(response.text)
            response.raise_for_status()
            response_data = json.loads(response.text)
            stop_at_page = response_data.get("totalPages", 1)
            yield response_data

    def _build_search_filter(
        self, stack_name: str | None, start: str | None, end: str | None
    ) -> dict[str, Any]:
        filter: dict[str, Any] = {}
        if start is not None and end is not None:
            filter["lastUpdatedDate"] = {"gte": start, "lt": end}

        if stack_name is not None:
            # Right now this is fetching the stacks every time, which is not ideal.
            # We should update this later to store the ID when we create the Connector
            for stack in self._fetch_data(resource="v2/stacks", params={}):
                for item in stack["items"]:
                    if item["name"] == stack_name:
                        filter["locations"] = [{"stackID": item["id"]}]
                        break
            if "locations" not in filter:
                raise ValueError(f"Stack {stack_name} not found in Loopio")
        return filter

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.loopio_subdomain = credentials["loopio_subdomain"]
        self.loopio_client_id = credentials["loopio_client_id"]
        self.loopio_client_token = credentials["loopio_client_token"]
        return None

    def _process_entries(
        self, start: str | None = None, end: str | None = None
    ) -> GenerateDocumentsOutput:
        if self.loopio_client_id is None or self.loopio_client_token is None:
            raise ConnectorMissingCredentialError("Loopio")

        filter = self._build_search_filter(
            stack_name=self.loopio_stack_name, start=start, end=end
        )
        params: dict[str, str | int] = {"pageSize": self.batch_size}
        params["filter"] = json.dumps(filter)

        doc_batch: list[Document | HierarchyNode] = []
        for library_entries in self._fetch_data(
            resource="v2/libraryEntries", params=params
        ):
            for entry in library_entries.get("items", []):
                link = f"https://{self.loopio_subdomain}.loopio.com/library?entry={entry['id']}"
                topic = "/".join(
                    part["name"] for part in entry["location"].values() if part
                )

                answer_text = entry.get("answer", {}).get("text", "")
                if not answer_text:
                    logger.warning(
                        f"The Library entry {entry['id']} has no answer text. Skipping."
                    )
                    continue

                try:
                    answer = parse_html_page_basic(answer_text)
                except Exception as e:
                    logger.error(f"Error parsing HTML for entry {entry['id']}: {e}")
                    continue

                questions = [
                    question.get("text").replace("\xa0", " ")
                    for question in entry["questions"]
                    if question.get("text")
                ]
                questions_string = strip_excessive_newlines_and_spaces(
                    "\n".join(questions)
                )
                content_text = f"{answer}\n\nRelated Questions: {questions_string}"
                content_text = strip_excessive_newlines_and_spaces(
                    content_text.replace("\xa0", " ")
                )

                last_updated = time_str_to_utc(entry["lastUpdatedDate"])
                last_reviewed = (
                    time_str_to_utc(entry["lastReviewedDate"])
                    if entry.get("lastReviewedDate")
                    else None
                )

                # For Onyx, we decay document score overtime, either last_updated or
                # last_reviewed is a good enough signal for the document's recency
                latest_time = (
                    max(last_reviewed, last_updated) if last_reviewed else last_updated
                )
                creator = entry.get("creator")
                last_updated_by = entry.get("lastUpdatedBy")
                last_reviewed_by = entry.get("lastReviewedBy")

                primary_owners: list[BasicExpertInfo] = [
                    BasicExpertInfo(display_name=owner.get("name"))
                    for owner in [creator, last_updated_by]
                    if owner is not None
                ]
                secondary_owners: list[BasicExpertInfo] = [
                    BasicExpertInfo(display_name=owner.get("name"))
                    for owner in [last_reviewed_by]
                    if owner is not None
                ]
                doc_batch.append(
                    Document(
                        id=str(entry["id"]),
                        sections=[TextSection(link=link, text=content_text)],
                        source=DocumentSource.LOOPIO,
                        semantic_identifier=questions[0],
                        doc_updated_at=latest_time,
                        primary_owners=primary_owners,
                        secondary_owners=secondary_owners,
                        metadata={
                            "topic": topic,
                            "questions": "\n".join(questions),
                            "creator": creator.get("name") if creator else "",
                        },
                    )
                )

            if len(doc_batch) >= self.batch_size:
                yield doc_batch
                doc_batch = []
        if len(doc_batch) > 0:
            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._process_entries()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_time = datetime.fromtimestamp(start, tz=timezone.utc).isoformat(
            timespec="seconds"
        )
        end_time = datetime.fromtimestamp(end, tz=timezone.utc).isoformat(
            timespec="seconds"
        )

        return self._process_entries(start_time, end_time)


if __name__ == "__main__":
    import os

    connector = LoopioConnector(
        loopio_stack_name=os.environ.get("LOOPIO_STACK_NAME", None)
    )
    connector.load_credentials(
        {
            "loopio_client_id": os.environ["LOOPIO_CLIENT_ID"],
            "loopio_client_token": os.environ["LOOPIO_CLIENT_TOKEN"],
            "loopio_subdomain": os.environ["LOOPIO_SUBDOMAIN"],
        }
    )

    latest_docs = connector.load_from_state()
    print(next(latest_docs))


================================================
FILE: backend/onyx/connectors/mediawiki/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/mediawiki/family.py
================================================
from __future__ import annotations

import builtins
import functools
import itertools
import tempfile
from typing import Any
from unittest import mock
from urllib.parse import urlparse
from urllib.parse import urlunparse

from pywikibot import family  # type: ignore[import-untyped]
from pywikibot import pagegenerators
from pywikibot.scripts import generate_family_file  # type: ignore[import-untyped]
from pywikibot.scripts.generate_user_files import pywikibot  # type: ignore[import-untyped]

from onyx.utils.logger import setup_logger


logger = setup_logger()

pywikibot.config.base_dir = tempfile.TemporaryDirectory().name


@mock.patch.object(
    builtins, "print", lambda *args: logger.info("\t".join(map(str, args)))
)
class FamilyFileGeneratorInMemory(generate_family_file.FamilyFileGenerator):
    """A subclass of FamilyFileGenerator that writes the family file to memory instead of to disk."""

    def __init__(
        self,
        url: str,
        name: str,
        dointerwiki: str | bool = True,
        verify: str | bool = True,
    ):
        """Initialize the FamilyFileGeneratorInMemory."""

        url_parse = urlparse(url, "https")
        if not url_parse.netloc and url_parse.path:
            url = urlunparse(
                (url_parse.scheme, url_parse.path, url_parse.netloc, *url_parse[3:])
            )
        else:
            url = urlunparse(url_parse)
        assert isinstance(url, str)

        if any(x not in generate_family_file.NAME_CHARACTERS for x in name):
            raise ValueError(
                f'ERROR: Name of family "{name}" must be ASCII letters and digits [a-zA-Z0-9]',
            )

        if isinstance(dointerwiki, bool):
            dointerwiki = "Y" if dointerwiki else "N"
        assert isinstance(dointerwiki, str)

        if isinstance(verify, bool):
            verify = "Y" if verify else "N"
        assert isinstance(verify, str)

        super().__init__(url, name, dointerwiki, verify)
        self.family_definition: type[family.Family] | None = None

    def get_params(self) -> bool:
        """Get the parameters for the family class definition.

        This override prevents the method from prompting the user for input (which would be impossible in this context).
        We do all the input validation in the constructor.
        """
        return True

    def writefile(self, verify: Any) -> None:  # noqa: ARG002
        """Write the family file.

        This overrides the method in the parent class to write the family definition to memory instead of to disk.

        Args:
            verify: unused argument necessary to match the signature of the method in the parent class.
        """
        code_hostname_pairs = {
            f"{k}": f"{urlparse(w.server).netloc}" for k, w in self.wikis.items()
        }

        code_path_pairs = {f"{k}": f"{w.scriptpath}" for k, w in self.wikis.items()}

        code_protocol_pairs = {
            f"{k}": f"{urlparse(w.server).scheme}" for k, w in self.wikis.items()
        }

        class Family(family.Family):  # noqa: D101
            """The family definition for the wiki."""

            name = "%(name)s"
            langs = code_hostname_pairs

            def scriptpath(self, code: str) -> str:
                return code_path_pairs[code]

            def protocol(self, code: str) -> str:
                return code_protocol_pairs[code]

        self.family_definition = Family


@functools.lru_cache(maxsize=None)
def generate_family_class(url: str, name: str) -> type[family.Family]:
    """Generate a family file for a given URL and name.

    Args:
        url: The URL of the wiki.
        name: The short name of the wiki (customizable by the user).

    Returns:
        The family definition.

    Raises:
        ValueError: If the family definition was not generated.
    """

    generator = FamilyFileGeneratorInMemory(url, name, "Y", "Y")
    generator.run()
    if generator.family_definition is None:
        raise ValueError("Family definition was not generated.")
    return generator.family_definition


def family_class_dispatch(url: str, name: str) -> type[family.Family]:
    """Find or generate a family class for a given URL and name.

    Args:
        url: The URL of the wiki.
        name: The short name of the wiki (customizable by the user).

    """
    if "wikipedia" in url:
        import pywikibot.families.wikipedia_family  # type: ignore[import-untyped]

        return pywikibot.families.wikipedia_family.Family
    # TODO: Support additional families pre-defined in `pywikibot.families.*_family.py` files
    return generate_family_class(url, name)


if __name__ == "__main__":
    url = "fallout.fandom.com/wiki/Fallout_Wiki"
    name = "falloutfandom"

    categories: list[str] = []
    pages = ["Fallout: New Vegas"]
    recursion_depth = 1
    family_type = generate_family_class(url, name)

    site = pywikibot.Site(fam=family_type(), code="en")
    categories = [
        pywikibot.Category(site, f"Category:{category.replace(' ', '_')}")
        for category in categories
    ]
    pages = [pywikibot.Page(site, page) for page in pages]
    all_pages = itertools.chain(
        pages,
        *[
            pagegenerators.CategorizedPageGenerator(category, recurse=recursion_depth)
            for category in categories
        ],
    )
    for page in all_pages:
        print(page.title())
        print(page.text[:1000])


================================================
FILE: backend/onyx/connectors/mediawiki/wiki.py
================================================
from __future__ import annotations

import datetime
import itertools
import tempfile
from collections.abc import Iterator
from typing import Any
from typing import cast
from typing import ClassVar

import pywikibot.time  # type: ignore[import-untyped]
from pywikibot import pagegenerators
from pywikibot import textlib

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.mediawiki.family import family_class_dispatch
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger


logger = setup_logger()

pywikibot.config.base_dir = tempfile.TemporaryDirectory().name


def pywikibot_timestamp_to_utc_datetime(
    timestamp: pywikibot.time.Timestamp,
) -> datetime.datetime:
    """Convert a pywikibot timestamp to a datetime object in UTC.

    Args:
        timestamp: The pywikibot timestamp to convert.

    Returns:
        A datetime object in UTC.
    """
    return datetime.datetime.astimezone(timestamp, tz=datetime.timezone.utc)


def get_doc_from_page(
    page: pywikibot.Page, site: pywikibot.Site | None, source_type: DocumentSource
) -> Document:
    """Generate Onyx Document from a MediaWiki page object.

    Args:
        page: Page from a MediaWiki site.
        site: MediaWiki site (used to parse the sections of the page using the site template, if available).
        source_type: Source of the document.

    Returns:
        Generated document.
    """
    page_text = page.text
    sections_extracted: textlib.Content = textlib.extract_sections(page_text, site)

    sections = [
        TextSection(
            link=f"{page.full_url()}#" + section.heading.replace(" ", "_"),
            text=section.title + section.content,
        )
        for section in sections_extracted.sections
    ]
    sections.append(
        TextSection(
            link=page.full_url(),
            text=sections_extracted.header,
        )
    )

    return Document(
        source=source_type,
        title=page.title(),
        doc_updated_at=pywikibot_timestamp_to_utc_datetime(
            page.latest_revision.timestamp
        ),
        sections=cast(list[TextSection | ImageSection], sections),
        semantic_identifier=page.title(),
        metadata={"categories": [category.title() for category in page.categories()]},
        id=f"MEDIAWIKI_{page.pageid}_{page.full_url()}",
    )


class MediaWikiConnector(LoadConnector, PollConnector):
    """A connector for MediaWiki wikis.

    Args:
        hostname: The hostname of the wiki.
        categories: The categories to include in the index.
        pages: The pages to include in the index.
        recurse_depth: The depth to recurse into categories. -1 means unbounded recursion.
        language_code: The language code of the wiki.
        batch_size: The batch size for loading documents.

    Raises:
        ValueError: If `recurse_depth` is not an integer greater than or equal to -1.
    """

    document_source_type: ClassVar[DocumentSource] = DocumentSource.MEDIAWIKI
    """DocumentSource type for all documents generated by instances of this class. Can be overridden for connectors
    tailored for specific sites."""

    def __init__(
        self,
        hostname: str,
        categories: list[str],
        pages: list[str],
        recurse_depth: int,
        language_code: str = "en",
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        if recurse_depth < -1:
            raise ValueError(
                f"recurse_depth must be an integer greater than or equal to -1. Got {recurse_depth} instead."
            )
        # -1 means infinite recursion, which `pywikibot` will only do with `True`
        self.recurse_depth: bool | int = True if recurse_depth == -1 else recurse_depth

        self.batch_size = batch_size

        # short names can only have ascii letters and digits
        self.family = family_class_dispatch(hostname, "WikipediaConnector")()
        self.site = pywikibot.Site(fam=self.family, code=language_code)
        self.categories = [
            pywikibot.Category(
                self.site,
                (
                    f"{category.replace(' ', '_')}"
                    if category.startswith("Category:")
                    else f"Category:{category.replace(' ', '_')}"
                ),
            )
            for category in categories
        ]

        self.pages = []
        for page in pages:
            if not page:
                continue
            self.pages.append(pywikibot.Page(self.site, page))

    def load_credentials(
        self,
        credentials: dict[str, Any],  # noqa: ARG002
    ) -> dict[str, Any] | None:
        """Load credentials for a MediaWiki site.

        Note:
            For most read-only operations, MediaWiki API credentials are not necessary.
            This method can be overridden in the event that a particular MediaWiki site
            requires credentials.
        """
        return None

    def _get_doc_batch(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> GenerateDocumentsOutput:
        """Request batches of pages from a MediaWiki site.

        Args:
            start: The beginning of the time period of pages to request.
            end: The end of the time period of pages to request.

        Yields:
            Lists of Documents containing each parsed page in a batch.
        """
        doc_batch: list[Document | HierarchyNode] = []

        # Pywikibot can handle batching for us, including only loading page contents when we finally request them.
        category_pages = [
            pagegenerators.PreloadingGenerator(
                pagegenerators.EdittimeFilterPageGenerator(
                    pagegenerators.CategorizedPageGenerator(
                        category, recurse=self.recurse_depth
                    ),
                    last_edit_start=(
                        datetime.datetime.fromtimestamp(start) if start else None
                    ),
                    last_edit_end=datetime.datetime.fromtimestamp(end) if end else None,
                ),
                groupsize=self.batch_size,
            )
            for category in self.categories
        ]

        # Since we can specify both individual pages and categories, we need to iterate over all of them.
        all_pages: Iterator[pywikibot.Page] = itertools.chain(
            self.pages, *category_pages
        )
        for page in all_pages:
            logger.info(
                f"MediaWikiConnector: title='{page.title()}' url={page.full_url()}"
            )
            doc_batch.append(
                get_doc_from_page(page, self.site, self.document_source_type)
            )
            if len(doc_batch) >= self.batch_size:
                yield doc_batch
                doc_batch = []
        if doc_batch:
            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        """Load all documents from the source.

        Returns:
            A generator of documents.
        """
        return self.poll_source(None, None)

    def poll_source(
        self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
    ) -> GenerateDocumentsOutput:
        """Poll the source for new documents.

        Args:
            start: The start of the time range to poll.
            end: The end of the time range to poll.

        Returns:
            A generator of documents.
        """
        return self._get_doc_batch(start, end)


if __name__ == "__main__":
    HOSTNAME = "fallout.fandom.com"
    test_connector = MediaWikiConnector(
        hostname=HOSTNAME,
        categories=["Fallout:_New_Vegas_factions"],
        pages=["Fallout: New Vegas"],
        recurse_depth=1,
    )

    all_docs = list(test_connector.load_from_state())
    print("All docs", all_docs)
    current = datetime.datetime.now().timestamp()
    one_day_ago = current - 30 * 24 * 60 * 60  # 30 days

    latest_docs = list(test_connector.poll_source(one_day_ago, current))

    print("Latest docs", latest_docs)


================================================
FILE: backend/onyx/connectors/microsoft_graph_env.py
================================================
"""Inverse mapping from user-facing Microsoft host URLs to the SDK's AzureEnvironment.

The office365 library's GraphClient requires an ``AzureEnvironment`` string
(e.g. ``"Global"``, ``"GCC High"``) to route requests to the correct national
cloud.  Our connectors instead expose free-text ``authority_host`` and
``graph_api_host`` fields so the frontend doesn't need to know about SDK
internals.

This module bridges the gap: given the two host URLs the user configured, it
resolves the matching ``AzureEnvironment`` value (and the implied SharePoint
domain suffix) so callers can pass ``environment=…`` to ``GraphClient``.
"""

from office365.graph_client import AzureEnvironment  # type: ignore[import-untyped]
from pydantic import BaseModel

from onyx.connectors.exceptions import ConnectorValidationError


class MicrosoftGraphEnvironment(BaseModel):
    """One row of the inverse mapping."""

    environment: str
    graph_host: str
    authority_host: str
    sharepoint_domain_suffix: str


_ENVIRONMENTS: list[MicrosoftGraphEnvironment] = [
    MicrosoftGraphEnvironment(
        environment=AzureEnvironment.Global,
        graph_host="https://graph.microsoft.com",
        authority_host="https://login.microsoftonline.com",
        sharepoint_domain_suffix="sharepoint.com",
    ),
    MicrosoftGraphEnvironment(
        environment=AzureEnvironment.USGovernmentHigh,
        graph_host="https://graph.microsoft.us",
        authority_host="https://login.microsoftonline.us",
        sharepoint_domain_suffix="sharepoint.us",
    ),
    MicrosoftGraphEnvironment(
        environment=AzureEnvironment.USGovernmentDoD,
        graph_host="https://dod-graph.microsoft.us",
        authority_host="https://login.microsoftonline.us",
        sharepoint_domain_suffix="sharepoint.us",
    ),
    MicrosoftGraphEnvironment(
        environment=AzureEnvironment.China,
        graph_host="https://microsoftgraph.chinacloudapi.cn",
        authority_host="https://login.chinacloudapi.cn",
        sharepoint_domain_suffix="sharepoint.cn",
    ),
    MicrosoftGraphEnvironment(
        environment=AzureEnvironment.Germany,
        graph_host="https://graph.microsoft.de",
        authority_host="https://login.microsoftonline.de",
        sharepoint_domain_suffix="sharepoint.de",
    ),
]

_GRAPH_HOST_INDEX: dict[str, MicrosoftGraphEnvironment] = {
    env.graph_host: env for env in _ENVIRONMENTS
}


def resolve_microsoft_environment(
    graph_api_host: str,
    authority_host: str,
) -> MicrosoftGraphEnvironment:
    """Return the ``MicrosoftGraphEnvironment`` that matches the supplied hosts.

    Raises ``ConnectorValidationError`` when the combination is unknown or
    internally inconsistent (e.g. a GCC-High graph host paired with a
    commercial authority host).
    """
    graph_api_host = graph_api_host.rstrip("/")
    authority_host = authority_host.rstrip("/")

    env = _GRAPH_HOST_INDEX.get(graph_api_host)
    if env is None:
        known = ", ".join(sorted(_GRAPH_HOST_INDEX))
        raise ConnectorValidationError(
            f"Unsupported Microsoft Graph API host '{graph_api_host}'. Recognised hosts: {known}"
        )

    if env.authority_host != authority_host:
        raise ConnectorValidationError(
            f"Authority host '{authority_host}' is inconsistent with "
            f"graph API host '{graph_api_host}'. "
            f"Expected authority host '{env.authority_host}' "
            f"for the {env.environment} environment."
        )

    return env


================================================
FILE: backend/onyx/connectors/mock_connector/connector.py
================================================
from typing import Any

import httpx
from pydantic import BaseModel
from typing_extensions import override

from onyx.access.models import ExternalAccess
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.utils.logger import setup_logger


logger = setup_logger()


EXTERNAL_USER_EMAILS = {"test@example.com", "admin@example.com"}
EXTERNAL_USER_GROUP_IDS = {"mock-group-1", "mock-group-2"}


class MockConnectorCheckpoint(ConnectorCheckpoint):
    last_document_id: str | None = None


class SingleConnectorYield(BaseModel):
    documents: list[Document]
    checkpoint: MockConnectorCheckpoint
    failures: list[ConnectorFailure]
    unhandled_exception: str | None = None


class MockConnector(CheckpointedConnectorWithPermSync[MockConnectorCheckpoint]):
    def __init__(
        self,
        mock_server_host: str,
        mock_server_port: int,
    ) -> None:
        self.mock_server_host = mock_server_host
        self.mock_server_port = mock_server_port
        self.client = httpx.Client(timeout=30.0)

        self.connector_yields: list[SingleConnectorYield] | None = None
        self.current_yield_index: int = 0

    def load_credentials(
        self,
        credentials: dict[str, Any],  # noqa: ARG002
    ) -> dict[str, Any] | None:
        response = self.client.get(self._get_mock_server_url("get-documents"))
        response.raise_for_status()
        data = response.json()

        self.connector_yields = [
            SingleConnectorYield(**yield_data) for yield_data in data
        ]
        return None

    def _get_mock_server_url(self, endpoint: str) -> str:
        return f"http://{self.mock_server_host}:{self.mock_server_port}/{endpoint}"

    def _save_checkpoint(self, checkpoint: MockConnectorCheckpoint) -> None:
        response = self.client.post(
            self._get_mock_server_url("add-checkpoint"),
            json=checkpoint.model_dump(mode="json"),
        )
        response.raise_for_status()

    def _load_from_checkpoint_common(
        self,
        start: SecondsSinceUnixEpoch,  # noqa: ARG002
        end: SecondsSinceUnixEpoch,  # noqa: ARG002
        checkpoint: MockConnectorCheckpoint,
        include_permissions: bool = False,
    ) -> CheckpointOutput[MockConnectorCheckpoint]:
        if self.connector_yields is None:
            raise ValueError("No connector yields configured")

        # Save the checkpoint to the mock server
        self._save_checkpoint(checkpoint)

        yield_index = self.current_yield_index
        self.current_yield_index += 1
        current_yield = self.connector_yields[yield_index]

        # If the current yield has an unhandled exception, raise it
        # This is used to simulate an unhandled failure in the connector.
        if current_yield.unhandled_exception:
            raise RuntimeError(current_yield.unhandled_exception)

        # yield all documents
        for document in current_yield.documents:
            # If permissions are requested and not already set, add mock permissions
            if include_permissions and document.external_access is None:
                # Add mock permissions - make documents accessible to specific users/groups
                document.external_access = ExternalAccess(
                    external_user_emails=EXTERNAL_USER_EMAILS,
                    external_user_group_ids=EXTERNAL_USER_GROUP_IDS,
                    is_public=False,
                )
            yield document

        for failure in current_yield.failures:
            yield failure

        return current_yield.checkpoint

    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: MockConnectorCheckpoint,
    ) -> CheckpointOutput[MockConnectorCheckpoint]:
        return self._load_from_checkpoint_common(
            start, end, checkpoint, include_permissions=False
        )

    @override
    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: MockConnectorCheckpoint,
    ) -> CheckpointOutput[MockConnectorCheckpoint]:
        return self._load_from_checkpoint_common(
            start, end, checkpoint, include_permissions=True
        )

    @override
    def build_dummy_checkpoint(self) -> MockConnectorCheckpoint:
        return MockConnectorCheckpoint(
            has_more=True,
            last_document_id=None,
        )

    def validate_checkpoint_json(self, checkpoint_json: str) -> MockConnectorCheckpoint:
        return MockConnectorCheckpoint.model_validate_json(checkpoint_json)


================================================
FILE: backend/onyx/connectors/models.py
================================================
import sys
from datetime import datetime
from enum import Enum
from typing import Any
from typing import cast

from pydantic import BaseModel
from pydantic import Field
from pydantic import field_validator
from pydantic import model_validator

from onyx.access.models import ExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import INDEX_SEPARATOR
from onyx.configs.constants import RETURN_SEPARATOR
from onyx.db.enums import HierarchyNodeType
from onyx.db.enums import IndexModelStatus
from onyx.utils.text_processing import make_url_compatible


class InputType(str, Enum):
    LOAD_STATE = "load_state"  # e.g. loading a current full state or a save state, such as from a file
    POLL = "poll"  # e.g. calling an API to get all documents in the last hour
    EVENT = "event"  # e.g. registered an endpoint as a listener, and processing connector events
    SLIM_RETRIEVAL = "slim_retrieval"


class ConnectorMissingCredentialError(PermissionError):
    def __init__(self, connector_name: str) -> None:
        connector_name = connector_name or "Unknown"
        super().__init__(
            f"{connector_name} connector missing credentials, was load_credentials called?"
        )


class Section(BaseModel):
    """Base section class with common attributes"""

    link: str | None = None
    text: str | None = None
    image_file_id: str | None = None


class TextSection(Section):
    """Section containing text content"""

    text: str

    def __sizeof__(self) -> int:
        return sys.getsizeof(self.text) + sys.getsizeof(self.link)


class ImageSection(Section):
    """Section containing an image reference"""

    image_file_id: str

    def __sizeof__(self) -> int:
        return sys.getsizeof(self.image_file_id) + sys.getsizeof(self.link)


class BasicExpertInfo(BaseModel):
    """Basic Information for the owner of a document, any of the fields can be left as None
    Display fallback goes as follows:
    - first_name + (optional middle_initial) + last_name
    - display_name
    - email
    - first_name
    """

    display_name: str | None = None
    first_name: str | None = None
    middle_initial: str | None = None
    last_name: str | None = None
    email: str | None = None

    def get_semantic_name(self) -> str:
        if self.first_name and self.last_name:
            name_parts = [self.first_name]
            if self.middle_initial:
                name_parts.append(self.middle_initial + ".")
            name_parts.append(self.last_name)
            return " ".join([name_part.capitalize() for name_part in name_parts])

        if self.display_name:
            return self.display_name

        if self.email:
            return self.email

        if self.first_name:
            return self.first_name.capitalize()

        return "Unknown"

    def get_email(self) -> str | None:
        return self.email or None

    def __eq__(self, other: Any) -> bool:
        if not isinstance(other, BasicExpertInfo):
            return False
        return (
            self.display_name,
            self.first_name,
            self.middle_initial,
            self.last_name,
            self.email,
        ) == (
            other.display_name,
            other.first_name,
            other.middle_initial,
            other.last_name,
            other.email,
        )

    def __hash__(self) -> int:
        return hash(
            (
                self.display_name,
                self.first_name,
                self.middle_initial,
                self.last_name,
                self.email,
            )
        )

    def __sizeof__(self) -> int:
        size = sys.getsizeof(self.display_name)
        size += sys.getsizeof(self.first_name)
        size += sys.getsizeof(self.middle_initial)
        size += sys.getsizeof(self.last_name)
        size += sys.getsizeof(self.email)
        return size

    @classmethod
    def from_dict(cls, model_dict: dict[str, Any]) -> "BasicExpertInfo":

        first_name = cast(str, model_dict.get("FirstName"))
        last_name = cast(str, model_dict.get("LastName"))
        email = cast(str, model_dict.get("Email"))
        display_name = cast(str, model_dict.get("Name"))

        # Check if all fields are None
        if (
            first_name is None
            and last_name is None
            and email is None
            and display_name is None
        ):
            raise ValueError("No identifying information found for user")

        return cls(
            first_name=first_name,
            last_name=last_name,
            email=email,
            display_name=display_name,
        )


class DocumentBase(BaseModel):
    """Used for Onyx ingestion api, the ID is inferred before use if not provided"""

    id: str | None = None
    sections: list[TextSection | ImageSection]
    source: DocumentSource | None = None
    semantic_identifier: str  # displayed in the UI as the main identifier for the doc
    # TODO(andrei): Ideally we could improve this to where each value is just a
    # list of strings.
    metadata: dict[str, str | list[str]]

    @field_validator("metadata", mode="before")
    @classmethod
    def _coerce_metadata_values(cls, v: dict[str, Any]) -> dict[str, str | list[str]]:
        return {
            key: [str(item) for item in val] if isinstance(val, list) else str(val)
            for key, val in v.items()
        }

    # UTC time
    doc_updated_at: datetime | None = None
    chunk_count: int | None = None

    # Owner, creator, etc.
    primary_owners: list[BasicExpertInfo] | None = None
    # Assignee, space owner, etc.
    secondary_owners: list[BasicExpertInfo] | None = None
    # title is used for search whereas semantic_identifier is used for displaying in the UI
    # different because Slack message may display as #general but general should not be part
    # of the search, at least not in the same way as a document title should be for like Confluence
    # The default title is semantic_identifier though unless otherwise specified
    title: str | None = None
    from_ingestion_api: bool = False
    # Anything else that may be useful that is specific to this particular connector type that other
    # parts of the code may need. If you're unsure, this can be left as None
    additional_info: Any = None

    # only filled in EE for connectors w/ permission sync enabled
    external_access: ExternalAccess | None = None
    doc_metadata: dict[str, Any] | None = None

    # Parent hierarchy node raw ID - the folder/space/page containing this document
    # If None, document's hierarchy position is unknown or connector doesn't support hierarchy
    parent_hierarchy_raw_node_id: str | None = None

    # Resolved database ID of the parent hierarchy node
    # Set during docfetching after hierarchy nodes are cached
    parent_hierarchy_node_id: int | None = None

    def get_title_for_document_index(
        self,
    ) -> str | None:
        # If title is explicitly empty, return a None here for embedding purposes
        if self.title == "":
            return None
        replace_chars = set(RETURN_SEPARATOR)
        title = self.semantic_identifier if self.title is None else self.title
        for char in replace_chars:
            title = title.replace(char, " ")
        title = title.strip()
        return title

    def get_metadata_str_attributes(self) -> list[str] | None:
        if not self.metadata:
            return None
        # Combined string for the key/value for easy filtering
        return convert_metadata_dict_to_list_of_strings(self.metadata)

    def __sizeof__(self) -> int:
        size = sys.getsizeof(self.id)
        for section in self.sections:
            size += sys.getsizeof(section)
        size += sys.getsizeof(self.source)
        size += sys.getsizeof(self.semantic_identifier)
        size += sys.getsizeof(self.doc_updated_at)
        size += sys.getsizeof(self.chunk_count)

        if self.primary_owners is not None:
            for primary_owner in self.primary_owners:
                size += sys.getsizeof(primary_owner)
        else:
            size += sys.getsizeof(self.primary_owners)

        if self.secondary_owners is not None:
            for secondary_owner in self.secondary_owners:
                size += sys.getsizeof(secondary_owner)
        else:
            size += sys.getsizeof(self.secondary_owners)

        size += sys.getsizeof(self.title)
        size += sys.getsizeof(self.from_ingestion_api)
        size += sys.getsizeof(self.additional_info)
        return size

    def get_text_content(self) -> str:
        return " ".join([section.text for section in self.sections if section.text])


def convert_metadata_dict_to_list_of_strings(
    metadata: dict[str, str | list[str]],
) -> list[str]:
    """Converts a metadata dict to a list of strings.

    Each string is a key-value pair separated by the INDEX_SEPARATOR. If a key
    points to a list of values, each value generates a unique pair.

    NOTE: Whatever formatting strategy is used here to generate a key-value
    string must be replicated when constructing query filters.

    Args:
        metadata: The metadata dict to convert where values can be either a
            string or a list of strings.

    Returns:
        A list of strings where each string is a key-value pair separated by the
            INDEX_SEPARATOR.
    """
    attributes: list[str] = []
    for k, v in metadata.items():
        if isinstance(v, list):
            attributes.extend([k + INDEX_SEPARATOR + vi for vi in v])
        else:
            attributes.append(k + INDEX_SEPARATOR + v)
    return attributes


def convert_metadata_list_of_strings_to_dict(
    metadata_list: list[str],
) -> dict[str, str | list[str]]:
    """
    Converts a list of strings to a metadata dict. The inverse of
    convert_metadata_dict_to_list_of_strings.

    Assumes the input strings are formatted as in the output of
    convert_metadata_dict_to_list_of_strings.

    The schema of the output metadata dict is suboptimal yet bound to legacy
    code. Ideally each key would just point to a list of strings, where each
    list might contain just one element.

    Args:
        metadata_list: The list of strings to convert to a metadata dict.

    Returns:
        A metadata dict where values can be either a string or a list of
            strings.
    """
    metadata: dict[str, str | list[str]] = {}
    for item in metadata_list:
        key, value = item.split(INDEX_SEPARATOR, 1)
        if key in metadata:
            # We have already seen this key therefore it must point to a list.
            if isinstance(metadata[key], list):
                cast(list[str], metadata[key]).append(value)
            else:
                metadata[key] = [cast(str, metadata[key]), value]
        else:
            metadata[key] = value
    return metadata


class Document(DocumentBase):
    """Used for Onyx ingestion api, the ID is required"""

    id: str
    source: DocumentSource

    def to_short_descriptor(self) -> str:
        """Used when logging the identity of a document"""
        return f"ID: '{self.id}'; Semantic ID: '{self.semantic_identifier}'"

    @classmethod
    def from_base(cls, base: DocumentBase) -> "Document":
        return cls(
            id=(
                make_url_compatible(base.id)
                if base.id
                else "ingestion_api_" + make_url_compatible(base.semantic_identifier)
            ),
            sections=base.sections,
            source=base.source or DocumentSource.INGESTION_API,
            semantic_identifier=base.semantic_identifier,
            metadata=base.metadata,
            doc_updated_at=base.doc_updated_at,
            primary_owners=base.primary_owners,
            secondary_owners=base.secondary_owners,
            title=base.title,
            from_ingestion_api=base.from_ingestion_api,
        )

    def __sizeof__(self) -> int:
        size = super().__sizeof__()
        size += sys.getsizeof(self.id)
        size += sys.getsizeof(self.source)
        return size


class IndexingDocument(Document):
    """Document with processed sections for indexing"""

    processed_sections: list[Section] = []

    def get_total_char_length(self) -> int:
        """Get the total character length of the document including processed sections"""
        title_len = len(self.title or self.semantic_identifier)

        # Use processed_sections if available, otherwise fall back to original sections
        if self.processed_sections:
            section_len = sum(
                len(section.text) if section.text is not None else 0
                for section in self.processed_sections
            )
        else:
            section_len = sum(
                (
                    len(section.text)
                    if isinstance(section, TextSection) and section.text is not None
                    else 0
                )
                for section in self.sections
            )

        return title_len + section_len


class SlimDocument(BaseModel):
    id: str
    external_access: ExternalAccess | None = None
    parent_hierarchy_raw_node_id: str | None = None


class HierarchyNode(BaseModel):
    """
    Hierarchy node yielded by connectors.

    This is the Pydantic model used by connectors, distinct from the
    SQLAlchemy HierarchyNode model in db/models.py. The connector runner
    layer converts this to the DB model when persisting to Postgres.
    """

    # Raw identifier from the source system
    # e.g., "1h7uWUR2BYZjtMfEXFt43tauj-Gp36DTPtwnsNuA665I" for Google Drive
    raw_node_id: str

    # Raw ID of parent node, or None for SOURCE-level children (direct children of the source root)
    raw_parent_id: str | None = None

    # Human-readable name for display
    display_name: str

    # Link to view this node in the source system
    link: str | None = None

    # What kind of structural node this is (folder, space, page, etc.)
    node_type: HierarchyNodeType

    # If this hierarchy node represents a document (e.g., Confluence page),
    # The db model stores that doc's document_id. This gets set during docprocessing
    # after the document row is created. Matching is done by raw_node_id matching document.id.
    # so, we don't allow connectors to specify this as it would be unused
    # document_id: str | None = None

    # External access information for the node
    external_access: ExternalAccess | None = None


class IndexAttemptMetadata(BaseModel):
    connector_id: int
    credential_id: int
    batch_num: int | None = None
    attempt_id: int | None = None
    request_id: str | None = None

    # Work in progress: will likely contain metadata about cc pair / index attempt
    structured_id: str | None = None


class ConnectorCheckpoint(BaseModel):
    # TODO: maybe move this to something disk-based to handle extremely large checkpoints?
    has_more: bool

    def __str__(self) -> str:
        """String representation of the checkpoint, with truncation for large checkpoint content."""
        MAX_CHECKPOINT_CONTENT_CHARS = 1000

        content_str = self.model_dump_json()
        if len(content_str) > MAX_CHECKPOINT_CONTENT_CHARS:
            content_str = content_str[: MAX_CHECKPOINT_CONTENT_CHARS - 3] + "..."
        return content_str


class DocumentFailure(BaseModel):
    document_id: str
    document_link: str | None = None


class EntityFailure(BaseModel):
    entity_id: str
    missed_time_range: tuple[datetime, datetime] | None = None


class ConnectorFailure(BaseModel):
    failed_document: DocumentFailure | None = None
    failed_entity: EntityFailure | None = None
    failure_message: str
    exception: Exception | None = Field(default=None, exclude=True)

    model_config = {"arbitrary_types_allowed": True}

    @model_validator(mode="before")
    def check_failed_fields(cls, values: dict) -> dict:
        failed_document = values.get("failed_document")
        failed_entity = values.get("failed_entity")
        if (failed_document is None and failed_entity is None) or (
            failed_document is not None and failed_entity is not None
        ):
            raise ValueError(
                "Exactly one of 'failed_document' or 'failed_entity' must be specified."
            )
        return values


class ConnectorStopSignal(Exception):
    """A custom exception used to signal a stop in processing."""


class OnyxMetadata(BaseModel):
    # Careful overriding the document_id, may cause visual issues in the UI.
    # Kept here for API based use cases mostly
    document_id: str | None = None
    source_type: DocumentSource | None = None
    link: str | None = None
    file_display_name: str | None = None
    primary_owners: list[BasicExpertInfo] | None = None
    secondary_owners: list[BasicExpertInfo] | None = None
    doc_updated_at: datetime | None = None
    title: str | None = None


class DocExtractionContext(BaseModel):
    index_name: str
    cc_pair_id: int
    connector_id: int
    credential_id: int
    source: DocumentSource
    earliest_index_time: float
    from_beginning: bool
    is_primary: bool
    should_fetch_permissions_during_indexing: bool
    search_settings_status: IndexModelStatus
    doc_extraction_complete_batch_num: int | None


class DocIndexingContext(BaseModel):
    batches_done: int
    total_failures: int
    net_doc_change: int
    total_chunks: int


================================================
FILE: backend/onyx/connectors/notion/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/notion/connector.py
================================================
import re
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast
from typing import Optional
from urllib.parse import parse_qs
from urllib.parse import urlparse

import requests
from pydantic import BaseModel
from retry import retry
from typing_extensions import override

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rl_requests,
)
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import NormalizationResult
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.db.enums import HierarchyNodeType
from onyx.utils.batching import batch_generator
from onyx.utils.logger import setup_logger

logger = setup_logger()

_NOTION_PAGE_SIZE = 100
_NOTION_CALL_TIMEOUT = 30  # 30 seconds
_MAX_PAGES = 1000


# TODO: Tables need to be ingested, Pages need to have their metadata ingested


class NotionPage(BaseModel):
    """Represents a Notion Page object"""

    id: str
    created_time: str
    last_edited_time: str
    in_trash: bool
    properties: dict[str, Any]
    url: str

    database_name: str | None = None  # Only applicable to the database type page (wiki)
    parent: dict[str, Any] | None = (
        None  # Raw parent object from API for hierarchy tracking
    )


class NotionDataSource(BaseModel):
    """Represents a Notion Data Source within a database."""

    id: str
    name: str = ""


class NotionBlock(BaseModel):
    """Represents a Notion Block object"""

    id: str  # Used for the URL
    text: str
    # In a plaintext representation of the page, how this block should be joined
    # with the existing text up to this point, separated out from text for clarity
    prefix: str


class NotionSearchResponse(BaseModel):
    """Represents the response from the Notion Search API"""

    results: list[dict[str, Any]]
    next_cursor: Optional[str]
    has_more: bool = False


class BlockReadOutput(BaseModel):
    """Output from reading blocks of a page."""

    blocks: list[NotionBlock]
    child_page_ids: list[str]
    hierarchy_nodes: list[HierarchyNode]


class NotionConnector(LoadConnector, PollConnector):
    """Notion Page connector that reads all Notion pages
    this integration has been granted access to.

    Arguments:
        batch_size (int): Number of objects to index in a batch
    """

    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
        recursive_index_enabled: bool = not NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP,
        root_page_id: str | None = None,
    ) -> None:
        """Initialize with parameters."""
        self.batch_size = batch_size
        self.headers = {
            "Content-Type": "application/json",
            "Notion-Version": "2026-03-11",
        }
        self.indexed_pages: set[str] = set()
        self.root_page_id = root_page_id
        # if enabled, will recursively index child pages as they are found rather
        # relying entirely on the `search` API. We have received reports that the
        # `search` API misses many pages - in those cases, this might need to be
        # turned on. It's not currently known why/when this is required.
        # NOTE: this also removes all benefits polling, since we need to traverse
        # all pages regardless of if they are updated. If the notion workspace is
        # very large, this may not be practical.
        self.recursive_index_enabled = recursive_index_enabled or self.root_page_id

        # Hierarchy tracking state
        self.seen_hierarchy_node_raw_ids: set[str] = set()
        self.workspace_id: str | None = None
        self.workspace_name: str | None = None
        # Maps child page IDs to their containing page ID (discovered in _read_blocks).
        # Used to resolve block_id parent types to the actual containing page.
        self._child_page_parent_map: dict[str, str] = {}
        # Maps data_source_id -> database_id (populated in _read_pages_from_database).
        # Used to resolve data_source_id parent types back to the database.
        self._data_source_to_database_map: dict[str, str] = {}

    @classmethod
    @override
    def normalize_url(cls, url: str) -> NormalizationResult:
        """Normalize a Notion URL to extract the page ID (UUID format)."""
        parsed = urlparse(url)
        netloc = parsed.netloc.lower()

        if not ("notion.so" in netloc or "notion.site" in netloc):
            return NormalizationResult(normalized_url=None, use_default=False)

        # Extract page ID from path (format: "Title-PageID")
        path_last = parsed.path.split("/")[-1]
        candidate = path_last.split("-")[-1] if "-" in path_last else path_last

        # Clean and format as UUID
        candidate = re.sub(r"[^0-9a-fA-F-]", "", candidate)
        cleaned = candidate.replace("-", "")

        if len(cleaned) == 32 and re.fullmatch(r"[0-9a-fA-F]{32}", cleaned):
            normalized_uuid = (
                f"{cleaned[0:8]}-{cleaned[8:12]}-{cleaned[12:16]}-{cleaned[16:20]}-{cleaned[20:]}"
            ).lower()
            return NormalizationResult(
                normalized_url=normalized_uuid, use_default=False
            )

        # Try query params
        params = parse_qs(parsed.query)
        for key in ("p", "page_id"):
            if key in params and params[key]:
                candidate = params[key][0].replace("-", "")
                if len(candidate) == 32 and re.fullmatch(r"[0-9a-fA-F]{32}", candidate):
                    normalized_uuid = (
                        f"{candidate[0:8]}-{candidate[8:12]}-{candidate[12:16]}-{candidate[16:20]}-{candidate[20:]}"
                    ).lower()
                    return NormalizationResult(
                        normalized_url=normalized_uuid, use_default=False
                    )

        return NormalizationResult(normalized_url=None, use_default=False)

    @retry(tries=3, delay=1, backoff=2)
    def _fetch_child_blocks(
        self, block_id: str, cursor: str | None = None
    ) -> dict[str, Any] | None:
        """Fetch all child blocks via the Notion API."""
        logger.debug(f"Fetching children of block with ID '{block_id}'")
        block_url = f"https://api.notion.com/v1/blocks/{block_id}/children"
        query_params = None if not cursor else {"start_cursor": cursor}
        res = rl_requests.get(
            block_url,
            headers=self.headers,
            params=query_params,
            timeout=_NOTION_CALL_TIMEOUT,
        )
        try:
            res.raise_for_status()
        except Exception as e:
            if res.status_code == 404:
                # this happens when a page is not shared with the integration
                # in this case, we should just ignore the page
                logger.error(
                    f"Unable to access block with ID '{block_id}'. "
                    f"This is likely due to the block not being shared "
                    f"with the Onyx integration. Exact exception:\n\n{e}"
                )
            else:
                logger.exception(
                    f"Error fetching blocks with status code {res.status_code}: {res.json()}"
                )

            # This can occasionally happen, the reason is unknown and cannot be reproduced on our internal Notion
            # Assuming this will not be a critical loss of data
            return None
        return res.json()

    @retry(tries=3, delay=1, backoff=2)
    def _fetch_page(self, page_id: str) -> NotionPage:
        """Fetch a page from its ID via the Notion API, retry with database if page fetch fails."""
        logger.debug(f"Fetching page for ID '{page_id}'")
        page_url = f"https://api.notion.com/v1/pages/{page_id}"
        res = rl_requests.get(
            page_url,
            headers=self.headers,
            timeout=_NOTION_CALL_TIMEOUT,
        )
        try:
            res.raise_for_status()
        except Exception as e:
            logger.warning(
                f"Failed to fetch page, trying database for ID '{page_id}'. Exception: {e}"
            )
            # Try fetching as a database if page fetch fails, this happens if the page is set to a wiki
            # it becomes a database from the notion perspective
            return self._fetch_database_as_page(page_id)
        return NotionPage(**res.json())

    @retry(tries=3, delay=1, backoff=2)
    def _fetch_database_as_page(self, database_id: str) -> NotionPage:
        """Attempt to fetch a database as a page.

        Note: As of API 2025-09-03, database objects no longer include
        `properties` (schema moved to individual data sources).
        """
        logger.debug(f"Fetching database for ID '{database_id}' as a page")
        database_url = f"https://api.notion.com/v1/databases/{database_id}"
        res = rl_requests.get(
            database_url,
            headers=self.headers,
            timeout=_NOTION_CALL_TIMEOUT,
        )
        try:
            res.raise_for_status()
        except Exception as e:
            logger.exception(f"Error fetching database as page - {res.json()}")
            raise e
        db_data = res.json()
        database_name = db_data.get("title")
        database_name = (
            database_name[0].get("text", {}).get("content") if database_name else None
        )

        db_data.setdefault("properties", {})

        return NotionPage(**db_data, database_name=database_name)

    @retry(tries=3, delay=1, backoff=2)
    def _fetch_data_sources_for_database(
        self, database_id: str
    ) -> list[NotionDataSource]:
        """Fetch the list of data sources for a database."""
        logger.debug(f"Fetching data sources for database '{database_id}'")
        res = rl_requests.get(
            f"https://api.notion.com/v1/databases/{database_id}",
            headers=self.headers,
            timeout=_NOTION_CALL_TIMEOUT,
        )
        try:
            res.raise_for_status()
        except Exception as e:
            if res.status_code in (403, 404):
                logger.error(
                    f"Unable to access database with ID '{database_id}'. "
                    f"This is likely due to the database not being shared "
                    f"with the Onyx integration. Exact exception:\n{e}"
                )
                return []
            logger.exception(f"Error fetching database - {res.json()}")
            raise e

        db_data = res.json()
        data_sources = db_data.get("data_sources", [])
        return [
            NotionDataSource(id=ds["id"], name=ds.get("name", ""))
            for ds in data_sources
            if ds.get("id")
        ]

    @retry(tries=3, delay=1, backoff=2)
    def _fetch_data_source(
        self, data_source_id: str, cursor: str | None = None
    ) -> dict[str, Any]:
        """Query a data source via POST /v1/data_sources/{id}/query."""
        logger.debug(f"Querying data source '{data_source_id}'")
        url = f"https://api.notion.com/v1/data_sources/{data_source_id}/query"
        body = None if not cursor else {"start_cursor": cursor}
        res = rl_requests.post(
            url,
            headers=self.headers,
            json=body,
            timeout=_NOTION_CALL_TIMEOUT,
        )
        try:
            res.raise_for_status()
        except Exception as e:
            if res.status_code in (403, 404):
                logger.error(
                    f"Unable to access data source with ID '{data_source_id}'. "
                    f"This is likely due to it not being shared "
                    f"with the Onyx integration. Exact exception:\n{e}"
                )
                return {"results": [], "next_cursor": None}
            logger.exception(f"Error querying data source - {res.json()}")
            raise e
        return res.json()

    @retry(tries=3, delay=1, backoff=2)
    def _fetch_workspace_info(self) -> tuple[str, str]:
        """Fetch workspace ID and name from the bot user endpoint."""
        res = rl_requests.get(
            "https://api.notion.com/v1/users/me",
            headers=self.headers,
            timeout=_NOTION_CALL_TIMEOUT,
        )
        res.raise_for_status()
        data = res.json()
        bot = data.get("bot", {})
        # workspace_id may be in bot object, fallback to user id
        workspace_id = bot.get("workspace_id", data.get("id"))
        workspace_name = bot.get("workspace_name", "Notion Workspace")
        return workspace_id, workspace_name

    def _get_workspace_hierarchy_node(self) -> HierarchyNode | None:
        """Get the workspace hierarchy node, fetching workspace info if needed.

        Returns None if the workspace node has already been yielded.
        """
        if self.workspace_id is None:
            self.workspace_id, self.workspace_name = self._fetch_workspace_info()

        if self.workspace_id in self.seen_hierarchy_node_raw_ids:
            return None

        self.seen_hierarchy_node_raw_ids.add(self.workspace_id)
        return HierarchyNode(
            raw_node_id=self.workspace_id,
            raw_parent_id=None,  # Parent is SOURCE (auto-created by system)
            display_name=self.workspace_name or "Notion Workspace",
            link=f"https://notion.so/{self.workspace_id.replace('-', '')}",
            node_type=HierarchyNodeType.WORKSPACE,
        )

    def _get_parent_raw_id(
        self, parent: dict[str, Any] | None, page_id: str | None = None
    ) -> str | None:
        """Get the parent raw ID for hierarchy tracking.

        Returns workspace_id for top-level pages, or the direct parent ID for nested pages.

        Args:
            parent: The parent object from the Notion API
            page_id: The page's own ID, used to look up block_id parents in our cache
        """
        if not parent:
            return self.workspace_id  # Default to workspace if no parent info

        parent_type = parent.get("type")

        if parent_type == "workspace":
            return self.workspace_id
        elif parent_type == "block_id":
            # Inline page in a block - resolve to the containing page if we discovered it
            if page_id and page_id in self._child_page_parent_map:
                return self._child_page_parent_map[page_id]
            # Fallback to workspace if we don't know the parent
            return self.workspace_id
        elif parent_type == "data_source_id":
            ds_id = parent.get("data_source_id")
            if ds_id:
                return self._data_source_to_database_map.get(ds_id, self.workspace_id)
        elif parent_type in ["page_id", "database_id"]:
            return parent.get(parent_type)

        return self.workspace_id

    def _maybe_yield_hierarchy_node(
        self,
        raw_node_id: str,
        raw_parent_id: str | None,
        display_name: str,
        link: str | None,
        node_type: HierarchyNodeType,
    ) -> HierarchyNode | None:
        """Create and return a hierarchy node if not already yielded.

        Args:
            raw_node_id: The raw ID of the node
            raw_parent_id: The raw ID of the parent node
            display_name: Human-readable name
            link: URL to the node in Notion
            node_type: Type of hierarchy node

        Returns:
            HierarchyNode if new, None if already yielded
        """
        if raw_node_id in self.seen_hierarchy_node_raw_ids:
            return None
        self.seen_hierarchy_node_raw_ids.add(raw_node_id)
        return HierarchyNode(
            raw_node_id=raw_node_id,
            raw_parent_id=raw_parent_id,
            display_name=display_name,
            link=link,
            node_type=node_type,
        )

    @staticmethod
    def _properties_to_str(properties: dict[str, Any]) -> str:
        """Converts Notion properties to a string"""

        def _recurse_list_properties(inner_list: list[Any]) -> str | None:
            list_properties: list[str | None] = []
            for item in inner_list:
                if item and isinstance(item, dict):
                    list_properties.append(_recurse_properties(item))
                elif item and isinstance(item, list):
                    list_properties.append(_recurse_list_properties(item))
                else:
                    list_properties.append(str(item))
            return (
                ", ".join(
                    [
                        list_property
                        for list_property in list_properties
                        if list_property
                    ]
                )
                or None
            )

        def _recurse_properties(inner_dict: dict[str, Any]) -> str | None:
            sub_inner_dict: dict[str, Any] | list[Any] | str = inner_dict
            while isinstance(sub_inner_dict, dict) and "type" in sub_inner_dict:
                type_name = sub_inner_dict["type"]
                sub_inner_dict = sub_inner_dict[type_name]

                # If the innermost layer is None, the value is not set
                if not sub_inner_dict:
                    return None

            # TODO there may be more types to handle here
            if isinstance(sub_inner_dict, list):
                return _recurse_list_properties(sub_inner_dict)
            elif isinstance(sub_inner_dict, str):
                # For some objects the innermost value could just be a string, not sure what causes this
                return sub_inner_dict
            elif isinstance(sub_inner_dict, dict):
                if "name" in sub_inner_dict:
                    return sub_inner_dict["name"]
                if "content" in sub_inner_dict:
                    return sub_inner_dict["content"]
                start = sub_inner_dict.get("start")
                end = sub_inner_dict.get("end")
                if start is not None:
                    if end is not None:
                        return f"{start} - {end}"
                    return start
                elif end is not None:
                    return f"Until {end}"

                if "id" in sub_inner_dict:
                    # This is not useful to index, it's a reference to another Notion object
                    # and this ID value in plaintext is useless outside of the Notion context
                    logger.debug("Skipping Notion object id field property")
                    return None

            logger.debug(f"Unreadable property from innermost prop: {sub_inner_dict}")
            return None

        result = ""
        for prop_name, prop in properties.items():
            if not prop or not isinstance(prop, dict):
                continue

            try:
                inner_value = _recurse_properties(prop)
            except Exception as e:
                # This is not a critical failure, these properties are not the actual contents of the page
                # more similar to metadata
                logger.warning(f"Error recursing properties for {prop_name}: {e}")
                continue
            # Not a perfect way to format Notion database tables but there's no perfect representation
            # since this must be represented as plaintext
            if inner_value:
                result += f"{prop_name}: {inner_value}\t"

        return result

    def _read_pages_from_database(
        self,
        database_id: str,
        database_parent_raw_id: str | None = None,
        database_name: str | None = None,
    ) -> BlockReadOutput:
        """Returns blocks, page IDs, and hierarchy nodes from a database.

        Args:
            database_id: The ID of the database
            database_parent_raw_id: The raw ID of the database's parent (containing page or workspace)
            database_name: The name of the database (from child_database block title)
        """
        result_blocks: list[NotionBlock] = []
        result_pages: list[str] = []
        hierarchy_nodes: list[HierarchyNode] = []

        # Create hierarchy node for this database if not already yielded.
        # Notion URLs omit dashes from UUIDs: https://notion.so/17ab3186873d418fb899c3f6a43f68de
        db_node = self._maybe_yield_hierarchy_node(
            raw_node_id=database_id,
            raw_parent_id=database_parent_raw_id or self.workspace_id,
            display_name=database_name or f"Database {database_id}",
            link=f"https://notion.so/{database_id.replace('-', '')}",
            node_type=HierarchyNodeType.DATABASE,
        )
        if db_node:
            hierarchy_nodes.append(db_node)

        # Discover all data sources under this database, then query each one.
        # Even legacy single-source databases have one entry in the array.
        data_sources = self._fetch_data_sources_for_database(database_id)
        if not data_sources:
            logger.warning(
                f"Database '{database_id}' returned zero data sources — "
                f"no pages will be indexed from this database."
            )
        for ds in data_sources:
            self._data_source_to_database_map[ds.id] = database_id
            cursor = None
            while True:
                data = self._fetch_data_source(ds.id, cursor)

                for result in data["results"]:
                    obj_id = result["id"]
                    obj_type = result["object"]
                    text = self._properties_to_str(result.get("properties", {}))
                    if text:
                        result_blocks.append(
                            NotionBlock(id=obj_id, text=text, prefix="\n")
                        )

                    if not self.recursive_index_enabled:
                        continue

                    if obj_type == "page":
                        logger.debug(
                            f"Found page with ID '{obj_id}' in database '{database_id}'"
                        )
                        result_pages.append(result["id"])
                    elif obj_type == "database":
                        logger.debug(
                            f"Found database with ID '{obj_id}' in database '{database_id}'"
                        )
                        nested_db_title = result.get("title", [])
                        nested_db_name = None
                        if nested_db_title and len(nested_db_title) > 0:
                            nested_db_name = (
                                nested_db_title[0].get("text", {}).get("content")
                            )
                        nested_output = self._read_pages_from_database(
                            obj_id,
                            database_parent_raw_id=database_id,
                            database_name=nested_db_name,
                        )
                        result_pages.extend(nested_output.child_page_ids)
                        hierarchy_nodes.extend(nested_output.hierarchy_nodes)

                if data["next_cursor"] is None:
                    break

                cursor = data["next_cursor"]

        return BlockReadOutput(
            blocks=result_blocks,
            child_page_ids=result_pages,
            hierarchy_nodes=hierarchy_nodes,
        )

    def _read_blocks(
        self, base_block_id: str, containing_page_id: str | None = None
    ) -> BlockReadOutput:
        """Reads all child blocks for the specified block.

        Args:
            base_block_id: The block ID to read children from
            containing_page_id: The ID of the page that contains this block tree.
                Used to correctly map child pages/databases to their parent page
                rather than intermediate block IDs.
        """
        # If no containing_page_id provided, assume base_block_id is the page itself
        page_id = containing_page_id or base_block_id
        result_blocks: list[NotionBlock] = []
        child_pages: list[str] = []
        hierarchy_nodes: list[HierarchyNode] = []
        cursor = None
        while True:
            data = self._fetch_child_blocks(base_block_id, cursor)

            # this happens when a block is not shared with the integration
            if data is None:
                return BlockReadOutput(
                    blocks=result_blocks,
                    child_page_ids=child_pages,
                    hierarchy_nodes=hierarchy_nodes,
                )

            for result in data["results"]:
                logger.debug(
                    f"Found child block for block with ID '{base_block_id}': {result}"
                )
                result_block_id = result["id"]
                result_type = result["type"]
                result_obj = result[result_type]

                if result_type == "ai_block":
                    logger.warning(
                        f"Skipping 'ai_block' ('{result_block_id}') for base block '{base_block_id}': "
                        f"Notion API does not currently support reading AI blocks (as of 24/02/09) "
                        f"(discussion: https://github.com/onyx-dot-app/onyx/issues/1053)"
                    )
                    continue

                if result_type == "unsupported":
                    logger.warning(
                        f"Skipping unsupported block type '{result_type}' "
                        f"('{result_block_id}') for base block '{base_block_id}': "
                        f"(discussion: https://github.com/onyx-dot-app/onyx/issues/1230)"
                    )
                    continue

                if result_type == "external_object_instance_page":
                    logger.warning(
                        f"Skipping 'external_object_instance_page' ('{result_block_id}') for base block '{base_block_id}': "
                        f"Notion API does not currently support reading external blocks (as of 24/07/03) "
                        f"(discussion: https://github.com/onyx-dot-app/onyx/issues/1761)"
                    )
                    continue

                cur_result_text_arr = []
                if "rich_text" in result_obj:
                    for rich_text in result_obj["rich_text"]:
                        # skip if doesn't have text object
                        if "text" in rich_text:
                            text = rich_text["text"]["content"]
                            cur_result_text_arr.append(text)

                if result["has_children"]:
                    if result_type == "child_page":
                        # Child pages will not be included at this top level, it will be a separate document.
                        # Track parent page so we can resolve block_id parents later.
                        # Use page_id (not base_block_id) to ensure we map to the containing page,
                        # not an intermediate block like a toggle or callout.
                        child_pages.append(result_block_id)
                        self._child_page_parent_map[result_block_id] = page_id
                    else:
                        logger.debug(f"Entering sub-block: {result_block_id}")
                        sub_output = self._read_blocks(result_block_id, page_id)
                        logger.debug(f"Finished sub-block: {result_block_id}")
                        result_blocks.extend(sub_output.blocks)
                        child_pages.extend(sub_output.child_page_ids)
                        hierarchy_nodes.extend(sub_output.hierarchy_nodes)

                if result_type == "child_database":
                    # Extract database name from the child_database block
                    db_title = result_obj.get("title", "")
                    db_output = self._read_pages_from_database(
                        result_block_id,
                        database_parent_raw_id=page_id,  # Parent is the containing page
                        database_name=db_title or None,
                    )
                    # A database on a page often looks like a table, we need to include it for the contents
                    # of the page but the children (cells) should be processed as other Documents
                    result_blocks.extend(db_output.blocks)
                    hierarchy_nodes.extend(db_output.hierarchy_nodes)

                    if self.recursive_index_enabled:
                        child_pages.extend(db_output.child_page_ids)

                if cur_result_text_arr:
                    new_block = NotionBlock(
                        id=result_block_id,
                        text="\n".join(cur_result_text_arr),
                        prefix="\n",
                    )
                    result_blocks.append(new_block)

            if data["next_cursor"] is None:
                break

            cursor = data["next_cursor"]

        return BlockReadOutput(
            blocks=result_blocks,
            child_page_ids=child_pages,
            hierarchy_nodes=hierarchy_nodes,
        )

    def _read_page_title(self, page: NotionPage) -> str | None:
        """Extracts the title from a Notion page"""
        page_title = None
        if hasattr(page, "database_name") and page.database_name:
            return page.database_name
        for _, prop in page.properties.items():
            if prop["type"] == "title" and len(prop["title"]) > 0:
                page_title = " ".join([t["plain_text"] for t in prop["title"]]).strip()
                break

        return page_title

    def _read_pages(
        self,
        pages: list[NotionPage],
    ) -> Generator[Document | HierarchyNode, None, None]:
        """Reads pages for rich text content and generates Documents and HierarchyNodes

        Note that a page which is turned into a "wiki" becomes a database but both top level pages and top level databases
        do not seem to have any properties associated with them.

        Pages that are part of a database can have properties which are like the values of the row in the "database" table
        in which they exist

        This is not clearly outlined in the Notion API docs but it is observable empirically.
        https://developers.notion.com/docs/working-with-page-content
        """
        all_child_page_ids: list[str] = []
        for page in pages:
            if page.id in self.indexed_pages:
                logger.debug(f"Already indexed page with ID '{page.id}'. Skipping.")
                continue

            logger.info(f"Reading page with ID '{page.id}', with url {page.url}")
            block_output = self._read_blocks(page.id)
            all_child_page_ids.extend(block_output.child_page_ids)

            # okay to mark here since there's no way for this to not succeed
            # without a critical failure
            self.indexed_pages.add(page.id)

            raw_page_title = self._read_page_title(page)
            page_title = raw_page_title or f"Untitled Page with ID {page.id}"
            parent_raw_id = self._get_parent_raw_id(page.parent, page_id=page.id)

            # If this page has children (pages or databases), yield it as a hierarchy node FIRST
            # This ensures parent nodes are created before child documents reference them
            if block_output.child_page_ids or block_output.hierarchy_nodes:
                hierarchy_node = self._maybe_yield_hierarchy_node(
                    raw_node_id=page.id,
                    raw_parent_id=parent_raw_id,
                    display_name=page_title,
                    link=page.url,
                    node_type=HierarchyNodeType.PAGE,
                )
                if hierarchy_node:
                    yield hierarchy_node

            # Yield database hierarchy nodes discovered in this page's blocks
            for db_node in block_output.hierarchy_nodes:
                yield db_node

            if not block_output.blocks:
                if not raw_page_title:
                    logger.warning(
                        f"No blocks OR title found for page with ID '{page.id}'. Skipping."
                    )
                    continue

                logger.debug(f"No blocks found for page with ID '{page.id}'")
                """
                Something like:

                TITLE

                PROP1: PROP1_VALUE
                PROP2: PROP2_VALUE
                """
                text = page_title
                if page.properties:
                    text += "\n\n" + "\n".join(
                        [f"{key}: {value}" for key, value in page.properties.items()]
                    )
                sections = [
                    TextSection(
                        link=f"{page.url}",
                        text=text,
                    )
                ]
            else:
                sections = [
                    TextSection(
                        link=f"{page.url}#{block.id.replace('-', '')}",
                        text=block.prefix + block.text,
                    )
                    for block in block_output.blocks
                ]

            yield (
                Document(
                    id=page.id,
                    sections=cast(list[TextSection | ImageSection], sections),
                    source=DocumentSource.NOTION,
                    semantic_identifier=page_title,
                    doc_updated_at=datetime.fromisoformat(
                        page.last_edited_time
                    ).astimezone(timezone.utc),
                    metadata={},
                    parent_hierarchy_raw_node_id=parent_raw_id,
                )
            )
            self.indexed_pages.add(page.id)

        if self.recursive_index_enabled and all_child_page_ids:
            # NOTE: checking if page_id is in self.indexed_pages to prevent extra
            # calls to `_fetch_page` for pages we've already indexed
            for child_page_batch_ids in batch_generator(
                all_child_page_ids, batch_size=INDEX_BATCH_SIZE
            ):
                child_page_batch = [
                    self._fetch_page(page_id)
                    for page_id in child_page_batch_ids
                    if page_id not in self.indexed_pages
                ]
                yield from self._read_pages(child_page_batch)

    @retry(tries=3, delay=1, backoff=2)
    def _search_notion(self, query_dict: dict[str, Any]) -> NotionSearchResponse:
        """Search for pages from a Notion database. Includes some small number of
        retries to handle misc, flakey failures."""
        logger.debug(f"Searching for pages in Notion with query_dict: {query_dict}")
        res = rl_requests.post(
            "https://api.notion.com/v1/search",
            headers=self.headers,
            json=query_dict,
            timeout=_NOTION_CALL_TIMEOUT,
        )
        res.raise_for_status()
        return NotionSearchResponse(**res.json())

    # The | Document is needed for mypy type checking
    def _yield_database_hierarchy_nodes(
        self,
    ) -> Generator[HierarchyNode | Document, None, None]:
        """Search for all data sources and yield hierarchy nodes for their parent databases.

        This must be called BEFORE page indexing so that database hierarchy nodes
        exist when pages inside databases reference them as parents.

        With the new API, search returns data source objects instead of databases.
        Multiple data sources can share the same parent database, so we use
        database_id as the hierarchy node key and deduplicate via
        _maybe_yield_hierarchy_node.
        """
        query_dict: dict[str, Any] = {
            "filter": {"property": "object", "value": "data_source"},
            "page_size": _NOTION_PAGE_SIZE,
        }
        pages_seen = 0
        while pages_seen < _MAX_PAGES:
            db_res = self._search_notion(query_dict)
            for ds in db_res.results:
                # Extract the parent database_id from the data source's parent
                ds_parent = ds.get("parent", {})
                db_id = ds_parent.get("database_id")
                if not db_id:
                    continue

                # Populate the mapping so _get_parent_raw_id can resolve later
                ds_id = ds.get("id")
                if not ds_id:
                    continue
                self._data_source_to_database_map[ds_id] = db_id

                # Fetch the database to get its actual name and parent
                try:
                    db_page = self._fetch_database_as_page(db_id)
                    db_name = db_page.database_name or f"Database {db_id}"
                    parent_raw_id = self._get_parent_raw_id(db_page.parent)
                    db_url = (
                        db_page.url or f"https://notion.so/{db_id.replace('-', '')}"
                    )
                except requests.exceptions.RequestException as e:
                    logger.warning(
                        f"Could not fetch database '{db_id}', "
                        f"defaulting to workspace root. Error: {e}"
                    )
                    db_name = f"Database {db_id}"
                    parent_raw_id = self.workspace_id
                    db_url = f"https://notion.so/{db_id.replace('-', '')}"

                # _maybe_yield_hierarchy_node deduplicates by raw_node_id,
                # so multiple data sources under one database produce one node.
                node = self._maybe_yield_hierarchy_node(
                    raw_node_id=db_id,
                    raw_parent_id=parent_raw_id or self.workspace_id,
                    display_name=db_name,
                    link=db_url,
                    node_type=HierarchyNodeType.DATABASE,
                )
                if node:
                    yield node

            if not db_res.has_more:
                break
            query_dict["start_cursor"] = db_res.next_cursor
            pages_seen += 1

    def _filter_pages_by_time(
        self,
        pages: list[dict[str, Any]],
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        filter_field: str = "last_edited_time",
    ) -> list[NotionPage]:
        """A helper function to filter out pages outside of a time
        range. This functionality doesn't yet exist in the Notion Search API,
        but when it does, this approach can be deprecated.

        Arguments:
            pages (list[dict]) - Pages to filter
            start (float) - start epoch time to filter from
            end (float) - end epoch time to filter to
            filter_field (str) - the attribute on the page to apply the filter
        """
        filtered_pages: list[NotionPage] = []
        for page in pages:
            # Parse ISO 8601 timestamp and convert to UTC epoch time
            timestamp = page[filter_field].replace(".000Z", "+00:00")
            compare_time = datetime.fromisoformat(timestamp).timestamp()
            if compare_time > start and compare_time <= end:
                filtered_pages += [NotionPage(**page)]
        return filtered_pages

    def _recursive_load(self) -> GenerateDocumentsOutput:
        if self.root_page_id is None or not self.recursive_index_enabled:
            raise RuntimeError(
                "Recursive page lookup is not enabled, but we are trying to recursively load pages. This should never happen."
            )

        # Yield workspace hierarchy node FIRST before any pages
        workspace_node = self._get_workspace_hierarchy_node()
        if workspace_node:
            yield [workspace_node]

        logger.info(
            f"Recursively loading pages from Notion based on root page with ID: {self.root_page_id}"
        )
        pages = [self._fetch_page(page_id=self.root_page_id)]
        yield from batch_generator(self._read_pages(pages), self.batch_size)

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        """Applies integration token to headers"""
        self.headers["Authorization"] = (
            f"Bearer {credentials['notion_integration_token']}"
        )
        return None

    def load_from_state(self) -> GenerateDocumentsOutput:
        """Loads all page data from a Notion workspace.

        Returns:
            list[Document]: list of documents.
        """
        # TODO: remove once Notion search issue is discovered
        if self.recursive_index_enabled and self.root_page_id:
            yield from self._recursive_load()
            return

        # Yield workspace hierarchy node FIRST before any pages
        workspace_node = self._get_workspace_hierarchy_node()
        if workspace_node:
            yield [workspace_node]

        # Yield database hierarchy nodes BEFORE pages so parent references resolve
        yield from batch_generator(
            self._yield_database_hierarchy_nodes(), self.batch_size
        )

        query_dict: dict[str, Any] = {
            "filter": {"property": "object", "value": "page"},
            "page_size": _NOTION_PAGE_SIZE,
        }
        while True:
            db_res = self._search_notion(query_dict)
            pages = [NotionPage(**page) for page in db_res.results]
            yield from batch_generator(self._read_pages(pages), self.batch_size)
            if db_res.has_more:
                query_dict["start_cursor"] = db_res.next_cursor
            else:
                break

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        """Uses the Notion search API to fetch updated pages
        within a time period.
        Unfortunately the search API doesn't yet support filtering by times,
        so until they add that, we're just going to page through results until,
        we reach ones that are older than our search criteria.
        """
        # TODO: remove once Notion search issue is discovered
        if self.recursive_index_enabled and self.root_page_id:
            yield from self._recursive_load()
            return

        # Yield workspace hierarchy node FIRST before any pages
        workspace_node = self._get_workspace_hierarchy_node()
        if workspace_node:
            yield [workspace_node]

        # Yield database hierarchy nodes BEFORE pages so parent references resolve.
        # We yield all databases without time filtering because a page's parent
        # database might not have been edited even if the page was.
        yield from batch_generator(
            self._yield_database_hierarchy_nodes(), self.batch_size
        )

        query_dict: dict[str, Any] = {
            "page_size": _NOTION_PAGE_SIZE,
            "sort": {"timestamp": "last_edited_time", "direction": "descending"},
            "filter": {"property": "object", "value": "page"},
        }
        while True:
            db_res = self._search_notion(query_dict)
            pages = self._filter_pages_by_time(
                db_res.results, start, end, filter_field="last_edited_time"
            )
            if len(pages) > 0:
                yield from batch_generator(self._read_pages(pages), self.batch_size)
                if db_res.has_more:
                    query_dict["start_cursor"] = db_res.next_cursor
                else:
                    break
            else:
                break

    def validate_connector_settings(self) -> None:
        if not self.headers.get("Authorization"):
            raise ConnectorMissingCredentialError("Notion credentials not loaded.")

        try:
            # We'll do a minimal search call (page_size=1) to confirm accessibility
            if self.root_page_id:
                # If root_page_id is set, fetch the specific page
                res = rl_requests.get(
                    f"https://api.notion.com/v1/pages/{self.root_page_id}",
                    headers=self.headers,
                    timeout=_NOTION_CALL_TIMEOUT,
                )
            else:
                # If root_page_id is not set, perform a minimal search
                test_query = {
                    "filter": {"property": "object", "value": "page"},
                    "page_size": 1,
                }
                res = rl_requests.post(
                    "https://api.notion.com/v1/search",
                    headers=self.headers,
                    json=test_query,
                    timeout=_NOTION_CALL_TIMEOUT,
                )
            res.raise_for_status()

        except requests.exceptions.HTTPError as http_err:
            status_code = http_err.response.status_code if http_err.response else None

            if status_code == 401:
                raise CredentialExpiredError(
                    "Notion credential appears to be invalid or expired (HTTP 401)."
                )
            elif status_code == 403:
                raise InsufficientPermissionsError(
                    "Your Notion token does not have sufficient permissions (HTTP 403)."
                )
            elif status_code == 404:
                # Typically means resource not found or not shared. Could be root_page_id is invalid.
                raise ConnectorValidationError(
                    "Notion resource not found or not shared with the integration (HTTP 404)."
                )
            elif status_code == 429:
                raise ConnectorValidationError(
                    "Validation failed due to Notion rate-limits being exceeded (HTTP 429). Please try again later."
                )
            else:
                raise UnexpectedValidationError(
                    f"Unexpected Notion HTTP error (status={status_code}): {http_err}"
                ) from http_err

        except Exception as exc:
            raise UnexpectedValidationError(
                f"Unexpected error during Notion settings validation: {exc}"
            )


if __name__ == "__main__":
    import os

    root_page_id = os.environ.get("NOTION_ROOT_PAGE_ID")
    connector = NotionConnector(root_page_id=root_page_id)
    connector.load_credentials(
        {"notion_integration_token": os.environ.get("NOTION_INTEGRATION_TOKEN")}
    )
    document_batches = connector.load_from_state()
    for doc_batch in document_batches:
        for doc in doc_batch:
            print(doc)


================================================
FILE: backend/onyx/connectors/outline/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/outline/client.py
================================================
from typing import Any

import requests
from requests.exceptions import ConnectionError as RequestsConnectionError
from requests.exceptions import RequestException
from requests.exceptions import Timeout

from onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS


class OutlineClientRequestFailedError(ConnectionError):
    """Custom error class for handling failed requests to the Outline API with status code and error message"""

    def __init__(self, status: int, error: str) -> None:
        self.status_code = status
        self.error = error
        super().__init__(f"Outline Client request failed with status {status}: {error}")


class OutlineApiClient:
    """Client for interacting with the Outline API. Handles authentication and making HTTP requests."""

    def __init__(
        self,
        api_token: str,
        base_url: str,
    ) -> None:
        self.base_url = base_url.rstrip("/")
        self.api_token = api_token

    def post(self, endpoint: str, data: dict[str, Any] | None = None) -> dict[str, Any]:
        if data is None:
            data = {}
        url: str = self._build_url(endpoint)
        headers = self._build_headers()

        try:
            response = requests.post(
                url, headers=headers, json=data, timeout=REQUEST_TIMEOUT_SECONDS
            )
        except Timeout:
            raise OutlineClientRequestFailedError(
                408,
                f"Request timed out - server did not respond within {REQUEST_TIMEOUT_SECONDS} seconds",
            )
        except RequestsConnectionError as e:
            raise OutlineClientRequestFailedError(
                -1, f"Connection error - unable to reach Outline server: {e}"
            )
        except RequestException as e:
            raise OutlineClientRequestFailedError(-1, f"Network error occurred: {e}")

        if response.status_code >= 300:
            error = response.reason
            try:
                response_json = response.json()
                if isinstance(response_json, dict):
                    response_error = response_json.get("error", {}).get("message", "")
                    if response_error:
                        error = response_error
            except Exception:
                # If JSON parsing fails, fall back to response.text for better debugging
                if response.text.strip():
                    error = f"{response.reason}: {response.text.strip()}"
            raise OutlineClientRequestFailedError(response.status_code, error)

        try:
            return response.json()
        except Exception:
            raise OutlineClientRequestFailedError(
                response.status_code,
                f"Response was successful but contained invalid JSON: {response.text}",
            )

    def _build_headers(self) -> dict[str, str]:
        return {
            "Authorization": f"Bearer {self.api_token}",
            "Accept": "application/json",
            "Content-Type": "application/json",
        }

    def _build_url(self, endpoint: str) -> str:
        return self.base_url.rstrip("/") + "/api/" + endpoint.lstrip("/")

    def build_app_url(self, endpoint: str) -> str:
        return self.base_url.rstrip("/") + "/" + endpoint.lstrip("/")


================================================
FILE: backend/onyx/connectors/outline/connector.py
================================================
import html
import time
from collections.abc import Callable
from typing import Any

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.connectors.outline.client import OutlineApiClient
from onyx.connectors.outline.client import OutlineClientRequestFailedError


class OutlineConnector(LoadConnector, PollConnector):
    """Connector for Outline knowledge base. Handles authentication, document loading and polling.
    Implements both LoadConnector for initial state loading and PollConnector for incremental updates.
    """

    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.batch_size = batch_size
        self.outline_client: OutlineApiClient | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        required_keys = ["outline_api_token", "outline_base_url"]
        for key in required_keys:
            if key not in credentials:
                raise ConnectorMissingCredentialError("Outline")

        self.outline_client = OutlineApiClient(
            api_token=credentials["outline_api_token"],
            base_url=credentials["outline_base_url"],
        )
        return None

    @staticmethod
    def _get_doc_batch(
        batch_size: int,
        outline_client: OutlineApiClient,
        endpoint: str,
        transformer: Callable[[OutlineApiClient, dict], Document],
        start_ind: int,
    ) -> tuple[list[Document], int]:
        data = {
            "limit": batch_size,
            "offset": start_ind,
        }

        batch = outline_client.post(endpoint, data=data).get("data", [])
        doc_batch = [transformer(outline_client, item) for item in batch]

        return doc_batch, len(batch)

    @staticmethod
    def _collection_to_document(
        outline_client: OutlineApiClient, collection: dict[str, Any]
    ) -> Document:
        url = outline_client.build_app_url(f"/collection/{collection.get('id')}")
        title = str(collection.get("name", ""))
        name = collection.get("name") or ""
        description = collection.get("description") or ""
        text = name + "\n" + description
        updated_at_str = (
            str(collection.get("updatedAt"))
            if collection.get("updatedAt") is not None
            else None
        )
        return Document(
            id="outline_collection__" + str(collection.get("id")),
            sections=[TextSection(link=url, text=html.unescape(text))],
            source=DocumentSource.OUTLINE,
            semantic_identifier="Collection: " + title,
            title=title,
            doc_updated_at=(
                time_str_to_utc(updated_at_str) if updated_at_str is not None else None
            ),
            metadata={"type": "collection"},
        )

    @staticmethod
    def _document_to_document(
        outline_client: OutlineApiClient, document: dict[str, Any]
    ) -> Document:
        url = outline_client.build_app_url(f"/doc/{document.get('id')}")
        title = str(document.get("title", ""))
        doc_title = document.get("title") or ""
        doc_text = document.get("text") or ""
        text = doc_title + "\n" + doc_text
        updated_at_str = (
            str(document.get("updatedAt"))
            if document.get("updatedAt") is not None
            else None
        )
        return Document(
            id="outline_document__" + str(document.get("id")),
            sections=[TextSection(link=url, text=html.unescape(text))],
            source=DocumentSource.OUTLINE,
            semantic_identifier="Document: " + title,
            title=title,
            doc_updated_at=(
                time_str_to_utc(updated_at_str) if updated_at_str is not None else None
            ),
            metadata={"type": "document"},
        )

    def load_from_state(self) -> GenerateDocumentsOutput:
        if self.outline_client is None:
            raise ConnectorMissingCredentialError("Outline")

        return self._fetch_documents()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        if self.outline_client is None:
            raise ConnectorMissingCredentialError("Outline")

        # Outline API does not support date-based filtering natively,
        # so we implement client-side filtering after fetching documents
        def time_filter(doc: Document) -> bool:
            if doc.doc_updated_at is None:
                return False
            doc_timestamp = doc.doc_updated_at.timestamp()
            if doc_timestamp < start:
                return False
            if doc_timestamp > end:
                return False
            return True

        return self._fetch_documents(time_filter)

    def _fetch_documents(
        self, time_filter: Callable[[Document], bool] | None = None
    ) -> GenerateDocumentsOutput:
        if self.outline_client is None:
            raise ConnectorMissingCredentialError("Outline")

        transform_by_endpoint: dict[
            str, Callable[[OutlineApiClient, dict], Document]
        ] = {
            "documents.list": self._document_to_document,
            "collections.list": self._collection_to_document,
        }

        for endpoint, transform in transform_by_endpoint.items():
            start_ind = 0
            while True:
                doc_batch, num_results = self._get_doc_batch(
                    batch_size=self.batch_size,
                    outline_client=self.outline_client,
                    endpoint=endpoint,
                    transformer=transform,
                    start_ind=start_ind,
                )

                # Apply time filtering if specified
                filtered_batch: list[Document | HierarchyNode] = []
                for doc in doc_batch:
                    if time_filter is None or time_filter(doc):
                        filtered_batch.append(doc)

                start_ind += num_results
                if filtered_batch:
                    yield filtered_batch

                if num_results < self.batch_size:
                    break
                else:
                    time.sleep(0.2)

    def validate_connector_settings(self) -> None:
        """
        Validate that the Outline credentials and connector settings are correct.
        Specifically checks that we can make an authenticated request to Outline.
        """
        if not self.outline_client:
            raise ConnectorMissingCredentialError("Outline")

        try:
            # Use auth.info endpoint for validation
            _ = self.outline_client.post("auth.info", data={})

        except OutlineClientRequestFailedError as e:
            # Check for HTTP status codes
            if e.status_code == 401:
                raise CredentialExpiredError(
                    "Your Outline credentials appear to be invalid or expired (HTTP 401)."
                ) from e
            elif e.status_code == 403:
                raise InsufficientPermissionsError(
                    "The configured Outline token does not have sufficient permissions (HTTP 403)."
                ) from e
            else:
                raise ConnectorValidationError(
                    f"Unexpected Outline error (status={e.status_code}): {e}"
                ) from e

        except Exception as exc:
            raise ConnectorValidationError(
                f"Unexpected error while validating Outline connector settings: {exc}"
            ) from exc


================================================
FILE: backend/onyx/connectors/productboard/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/productboard/connector.py
================================================
from collections.abc import Generator
from itertools import chain
from typing import Any
from typing import cast

import requests
from bs4 import BeautifulSoup
from dateutil import parser
from retry import retry

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger


logger = setup_logger()


_PRODUCT_BOARD_BASE_URL = "https://api.productboard.com"


class ProductboardApiError(Exception):
    pass


class ProductboardConnector(PollConnector):
    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.batch_size = batch_size
        self.access_token: str | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.access_token = credentials["productboard_access_token"]
        return None

    def _build_headers(self) -> dict[str, str]:
        return {
            "Authorization": f"Bearer {self.access_token}",
            "X-Version": "1",
        }

    @staticmethod
    def _parse_description_html(description_html: str) -> str:
        soup = BeautifulSoup(description_html, "html.parser")
        return soup.get_text()

    @staticmethod
    def _get_owner_email(productboard_obj: dict[str, Any]) -> str | None:
        owner_dict = cast(dict[str, str] | None, productboard_obj.get("owner"))
        if not owner_dict:
            return None
        return owner_dict.get("email")

    def _fetch_documents(
        self,
        initial_link: str,
    ) -> Generator[dict[str, Any], None, None]:
        headers = self._build_headers()

        @retry(tries=3, delay=1, backoff=2)
        def fetch(link: str) -> dict[str, Any]:
            response = requests.get(link, headers=headers)
            if not response.ok:
                # rate-limiting is at 50 requests per second.
                # The delay in this retry should handle this while this is
                # not parallelized.
                raise ProductboardApiError(
                    f"Failed to fetch from productboard - status code: {response.status_code} - response: {response.text}"
                )

            return response.json()

        curr_link = initial_link
        while True:
            response_json = fetch(curr_link)
            for entity in response_json["data"]:
                yield entity

            curr_link = response_json.get("links", {}).get("next")
            if not curr_link:
                break

    def _get_features(self) -> Generator[Document, None, None]:
        """A Feature is like a ticket in Jira"""
        for feature in self._fetch_documents(
            initial_link=f"{_PRODUCT_BOARD_BASE_URL}/features"
        ):
            owner = self._get_owner_email(feature)
            experts = [BasicExpertInfo(email=owner)] if owner else None

            metadata: dict[str, str | list[str]] = {}
            entity_type = feature.get("type", "feature")
            if entity_type:
                metadata["entity_type"] = str(entity_type)

            status = feature.get("status", {}).get("name")
            if status:
                metadata["status"] = str(status)

            yield Document(
                id=feature["id"],
                sections=[
                    TextSection(
                        link=feature["links"]["html"],
                        text=self._parse_description_html(feature["description"]),
                    )
                ],
                semantic_identifier=feature["name"],
                source=DocumentSource.PRODUCTBOARD,
                doc_updated_at=time_str_to_utc(feature["updatedAt"]),
                primary_owners=experts,
                metadata=metadata,
            )

    def _get_components(self) -> Generator[Document, None, None]:
        """A Component is like an epic in Jira. It contains Features"""
        for component in self._fetch_documents(
            initial_link=f"{_PRODUCT_BOARD_BASE_URL}/components"
        ):
            owner = self._get_owner_email(component)
            experts = [BasicExpertInfo(email=owner)] if owner else None

            yield Document(
                id=component["id"],
                sections=[
                    TextSection(
                        link=component["links"]["html"],
                        text=self._parse_description_html(component["description"]),
                    )
                ],
                semantic_identifier=component["name"],
                source=DocumentSource.PRODUCTBOARD,
                doc_updated_at=time_str_to_utc(component["updatedAt"]),
                primary_owners=experts,
                metadata={
                    "entity_type": "component",
                },
            )

    def _get_products(self) -> Generator[Document, None, None]:
        """A Product is the highest level of organization.
        A Product contains components, which contains features."""
        for product in self._fetch_documents(
            initial_link=f"{_PRODUCT_BOARD_BASE_URL}/products"
        ):
            owner = self._get_owner_email(product)
            experts = [BasicExpertInfo(email=owner)] if owner else None

            yield Document(
                id=product["id"],
                sections=[
                    TextSection(
                        link=product["links"]["html"],
                        text=self._parse_description_html(product["description"]),
                    )
                ],
                semantic_identifier=product["name"],
                source=DocumentSource.PRODUCTBOARD,
                doc_updated_at=time_str_to_utc(product["updatedAt"]),
                primary_owners=experts,
                metadata={
                    "entity_type": "product",
                },
            )

    def _get_objectives(self) -> Generator[Document, None, None]:
        for objective in self._fetch_documents(
            initial_link=f"{_PRODUCT_BOARD_BASE_URL}/objectives"
        ):
            owner = self._get_owner_email(objective)
            experts = [BasicExpertInfo(email=owner)] if owner else None

            metadata: dict[str, str | list[str]] = {
                "entity_type": "objective",
            }
            if objective.get("state"):
                metadata["state"] = str(objective["state"])

            yield Document(
                id=objective["id"],
                sections=[
                    TextSection(
                        link=objective["links"]["html"],
                        text=self._parse_description_html(objective["description"]),
                    )
                ],
                semantic_identifier=objective["name"],
                source=DocumentSource.PRODUCTBOARD,
                doc_updated_at=time_str_to_utc(objective["updatedAt"]),
                primary_owners=experts,
                metadata=metadata,
            )

    def _is_updated_at_out_of_time_range(
        self,
        document: Document,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
    ) -> bool:
        updated_at = cast(str, document.metadata.get("updated_at", ""))
        if updated_at:
            updated_at_datetime = parser.parse(updated_at)
            if (
                updated_at_datetime.timestamp() < start
                or updated_at_datetime.timestamp() > end
            ):
                return True
        else:
            logger.debug(f"Unable to find updated_at for document '{document.id}'")

        return False

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        if self.access_token is None:
            raise PermissionError(
                "Access token is not set up, was load_credentials called?"
            )

        document_batch: list[Document | HierarchyNode] = []

        # NOTE: there is a concept of a "Note" in productboard, however
        # there is no read API for it atm. Additionally, comments are not
        # included with features. Finally, "Releases" are not fetched atm,
        # since they do not provide an updatedAt.
        feature_documents = self._get_features()
        component_documents = self._get_components()
        product_documents = self._get_products()
        objective_documents = self._get_objectives()
        for document in chain(
            feature_documents,
            component_documents,
            product_documents,
            objective_documents,
        ):
            # skip documents that are not in the time range
            if self._is_updated_at_out_of_time_range(document, start, end):
                continue

            document_batch.append(document)
            if len(document_batch) >= self.batch_size:
                yield document_batch
                document_batch = []

        if document_batch:
            yield document_batch


if __name__ == "__main__":
    import os
    import time

    connector = ProductboardConnector()
    connector.load_credentials(
        {
            "productboard_access_token": os.environ["PRODUCTBOARD_ACCESS_TOKEN"],
        }
    )

    current = time.time()
    one_year_ago = current - 24 * 60 * 60 * 360
    latest_docs = connector.poll_source(one_year_ago, current)
    print(next(latest_docs))


================================================
FILE: backend/onyx/connectors/registry.py
================================================
"""Registry mapping for connector classes."""

from pydantic import BaseModel

from onyx.configs.constants import DocumentSource


class ConnectorMapping(BaseModel):
    module_path: str
    class_name: str


# Mapping of DocumentSource to connector details for lazy loading
CONNECTOR_CLASS_MAP = {
    DocumentSource.WEB: ConnectorMapping(
        module_path="onyx.connectors.web.connector",
        class_name="WebConnector",
    ),
    DocumentSource.FILE: ConnectorMapping(
        module_path="onyx.connectors.file.connector",
        class_name="LocalFileConnector",
    ),
    DocumentSource.SLACK: ConnectorMapping(
        module_path="onyx.connectors.slack.connector",
        class_name="SlackConnector",
    ),
    DocumentSource.GITHUB: ConnectorMapping(
        module_path="onyx.connectors.github.connector",
        class_name="GithubConnector",
    ),
    DocumentSource.GMAIL: ConnectorMapping(
        module_path="onyx.connectors.gmail.connector",
        class_name="GmailConnector",
    ),
    DocumentSource.GITLAB: ConnectorMapping(
        module_path="onyx.connectors.gitlab.connector",
        class_name="GitlabConnector",
    ),
    DocumentSource.GITBOOK: ConnectorMapping(
        module_path="onyx.connectors.gitbook.connector",
        class_name="GitbookConnector",
    ),
    DocumentSource.GOOGLE_DRIVE: ConnectorMapping(
        module_path="onyx.connectors.google_drive.connector",
        class_name="GoogleDriveConnector",
    ),
    DocumentSource.BOOKSTACK: ConnectorMapping(
        module_path="onyx.connectors.bookstack.connector",
        class_name="BookstackConnector",
    ),
    DocumentSource.OUTLINE: ConnectorMapping(
        module_path="onyx.connectors.outline.connector",
        class_name="OutlineConnector",
    ),
    DocumentSource.CONFLUENCE: ConnectorMapping(
        module_path="onyx.connectors.confluence.connector",
        class_name="ConfluenceConnector",
    ),
    DocumentSource.JIRA: ConnectorMapping(
        module_path="onyx.connectors.jira.connector",
        class_name="JiraConnector",
    ),
    DocumentSource.PRODUCTBOARD: ConnectorMapping(
        module_path="onyx.connectors.productboard.connector",
        class_name="ProductboardConnector",
    ),
    DocumentSource.SLAB: ConnectorMapping(
        module_path="onyx.connectors.slab.connector",
        class_name="SlabConnector",
    ),
    DocumentSource.CODA: ConnectorMapping(
        module_path="onyx.connectors.coda.connector",
        class_name="CodaConnector",
    ),
    DocumentSource.CANVAS: ConnectorMapping(
        module_path="onyx.connectors.canvas.connector",
        class_name="CanvasConnector",
    ),
    DocumentSource.NOTION: ConnectorMapping(
        module_path="onyx.connectors.notion.connector",
        class_name="NotionConnector",
    ),
    DocumentSource.ZULIP: ConnectorMapping(
        module_path="onyx.connectors.zulip.connector",
        class_name="ZulipConnector",
    ),
    DocumentSource.GURU: ConnectorMapping(
        module_path="onyx.connectors.guru.connector",
        class_name="GuruConnector",
    ),
    DocumentSource.LINEAR: ConnectorMapping(
        module_path="onyx.connectors.linear.connector",
        class_name="LinearConnector",
    ),
    DocumentSource.HUBSPOT: ConnectorMapping(
        module_path="onyx.connectors.hubspot.connector",
        class_name="HubSpotConnector",
    ),
    DocumentSource.DOCUMENT360: ConnectorMapping(
        module_path="onyx.connectors.document360.connector",
        class_name="Document360Connector",
    ),
    DocumentSource.GONG: ConnectorMapping(
        module_path="onyx.connectors.gong.connector",
        class_name="GongConnector",
    ),
    DocumentSource.GOOGLE_SITES: ConnectorMapping(
        module_path="onyx.connectors.google_site.connector",
        class_name="GoogleSitesConnector",
    ),
    DocumentSource.ZENDESK: ConnectorMapping(
        module_path="onyx.connectors.zendesk.connector",
        class_name="ZendeskConnector",
    ),
    DocumentSource.LOOPIO: ConnectorMapping(
        module_path="onyx.connectors.loopio.connector",
        class_name="LoopioConnector",
    ),
    DocumentSource.DROPBOX: ConnectorMapping(
        module_path="onyx.connectors.dropbox.connector",
        class_name="DropboxConnector",
    ),
    DocumentSource.SHAREPOINT: ConnectorMapping(
        module_path="onyx.connectors.sharepoint.connector",
        class_name="SharepointConnector",
    ),
    DocumentSource.TEAMS: ConnectorMapping(
        module_path="onyx.connectors.teams.connector",
        class_name="TeamsConnector",
    ),
    DocumentSource.SALESFORCE: ConnectorMapping(
        module_path="onyx.connectors.salesforce.connector",
        class_name="SalesforceConnector",
    ),
    DocumentSource.DISCOURSE: ConnectorMapping(
        module_path="onyx.connectors.discourse.connector",
        class_name="DiscourseConnector",
    ),
    DocumentSource.AXERO: ConnectorMapping(
        module_path="onyx.connectors.axero.connector",
        class_name="AxeroConnector",
    ),
    DocumentSource.CLICKUP: ConnectorMapping(
        module_path="onyx.connectors.clickup.connector",
        class_name="ClickupConnector",
    ),
    DocumentSource.MEDIAWIKI: ConnectorMapping(
        module_path="onyx.connectors.mediawiki.wiki",
        class_name="MediaWikiConnector",
    ),
    DocumentSource.WIKIPEDIA: ConnectorMapping(
        module_path="onyx.connectors.wikipedia.connector",
        class_name="WikipediaConnector",
    ),
    DocumentSource.ASANA: ConnectorMapping(
        module_path="onyx.connectors.asana.connector",
        class_name="AsanaConnector",
    ),
    DocumentSource.S3: ConnectorMapping(
        module_path="onyx.connectors.blob.connector",
        class_name="BlobStorageConnector",
    ),
    DocumentSource.R2: ConnectorMapping(
        module_path="onyx.connectors.blob.connector",
        class_name="BlobStorageConnector",
    ),
    DocumentSource.GOOGLE_CLOUD_STORAGE: ConnectorMapping(
        module_path="onyx.connectors.blob.connector",
        class_name="BlobStorageConnector",
    ),
    DocumentSource.OCI_STORAGE: ConnectorMapping(
        module_path="onyx.connectors.blob.connector",
        class_name="BlobStorageConnector",
    ),
    DocumentSource.XENFORO: ConnectorMapping(
        module_path="onyx.connectors.xenforo.connector",
        class_name="XenforoConnector",
    ),
    DocumentSource.DISCORD: ConnectorMapping(
        module_path="onyx.connectors.discord.connector",
        class_name="DiscordConnector",
    ),
    DocumentSource.FRESHDESK: ConnectorMapping(
        module_path="onyx.connectors.freshdesk.connector",
        class_name="FreshdeskConnector",
    ),
    DocumentSource.FIREFLIES: ConnectorMapping(
        module_path="onyx.connectors.fireflies.connector",
        class_name="FirefliesConnector",
    ),
    DocumentSource.EGNYTE: ConnectorMapping(
        module_path="onyx.connectors.egnyte.connector",
        class_name="EgnyteConnector",
    ),
    DocumentSource.AIRTABLE: ConnectorMapping(
        module_path="onyx.connectors.airtable.airtable_connector",
        class_name="AirtableConnector",
    ),
    DocumentSource.HIGHSPOT: ConnectorMapping(
        module_path="onyx.connectors.highspot.connector",
        class_name="HighspotConnector",
    ),
    DocumentSource.DRUPAL_WIKI: ConnectorMapping(
        module_path="onyx.connectors.drupal_wiki.connector",
        class_name="DrupalWikiConnector",
    ),
    DocumentSource.IMAP: ConnectorMapping(
        module_path="onyx.connectors.imap.connector",
        class_name="ImapConnector",
    ),
    DocumentSource.BITBUCKET: ConnectorMapping(
        module_path="onyx.connectors.bitbucket.connector",
        class_name="BitbucketConnector",
    ),
    DocumentSource.TESTRAIL: ConnectorMapping(
        module_path="onyx.connectors.testrail.connector",
        class_name="TestRailConnector",
    ),
    # just for integration tests
    DocumentSource.MOCK_CONNECTOR: ConnectorMapping(
        module_path="onyx.connectors.mock_connector.connector",
        class_name="MockConnector",
    ),
}


================================================
FILE: backend/onyx/connectors/requesttracker/.gitignore
================================================
.env


================================================
FILE: backend/onyx/connectors/requesttracker/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/requesttracker/connector.py
================================================
# from datetime import datetime
# from datetime import timezone
# from logging import DEBUG as LOG_LVL_DEBUG
# from typing import Any
# from typing import List
# from typing import Optional
# from rt.rest1 import ALL_QUEUES
# from rt.rest1 import Rt
# from onyx.configs.app_configs import INDEX_BATCH_SIZE
# from onyx.configs.constants import DocumentSource
# from onyx.connectors.interfaces import GenerateDocumentsOutput
# from onyx.connectors.interfaces import PollConnector
# from onyx.connectors.interfaces import SecondsSinceUnixEpoch
# from onyx.connectors.models import ConnectorMissingCredentialError
# from onyx.connectors.models import Document
# from onyx.connectors.models import Section
# from onyx.utils.logger import setup_logger
# logger = setup_logger()
# class RequestTrackerError(Exception):
#     pass
# class RequestTrackerConnector(PollConnector):
#     def __init__(
#         self,
#         batch_size: int = INDEX_BATCH_SIZE,
#     ) -> None:
#         self.batch_size = batch_size
#     def txn_link(self, tid: int, txn: int) -> str:
#         return f"{self.rt_base_url}/Ticket/Display.html?id={tid}&txn={txn}"
#     def build_doc_sections_from_txn(
#         self, connection: Rt, ticket_id: int
#     ) -> List[Section]:
#         Sections: List[Section] = []
#         get_history_resp = connection.get_history(ticket_id)
#         if get_history_resp is None:
#             raise RequestTrackerError(f"Ticket {ticket_id} cannot be found")
#         for tx in get_history_resp:
#             Sections.append(
#                 Section(
#                     link=self.txn_link(ticket_id, int(tx["id"])),
#                     text="\n".join(
#                         [
#                             f"{k}:\n{v}\n" if k != "Attachments" else ""
#                             for (k, v) in tx.items()
#                         ]
#                     ),
#                 )
#             )
#         return Sections
#     def load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]:
#         self.rt_username = credentials.get("requesttracker_username")
#         self.rt_password = credentials.get("requesttracker_password")
#         self.rt_base_url = credentials.get("requesttracker_base_url")
#         return None
#     # This does not include RT file attachments yet.
#     def _process_tickets(
#         self, start: datetime, end: datetime
#     ) -> GenerateDocumentsOutput:
#         if any([self.rt_username, self.rt_password, self.rt_base_url]) is None:
#             raise ConnectorMissingCredentialError("requesttracker")
#         Rt0 = Rt(
#             f"{self.rt_base_url}/REST/1.0/",
#             self.rt_username,
#             self.rt_password,
#         )
#         Rt0.login()
#         d0 = start.strftime("%Y-%m-%d %H:%M:%S")
#         d1 = end.strftime("%Y-%m-%d %H:%M:%S")
#         tickets = Rt0.search(
#             Queue=ALL_QUEUES,
#             raw_query=f"Updated > '{d0}' AND Updated < '{d1}'",
#         )
#         doc_batch: List[Document] = []
#         for ticket in tickets:
#             ticket_keys_to_omit = ["id", "Subject"]
#             tid: int = int(ticket["numerical_id"])
#             ticketLink: str = f"{self.rt_base_url}/Ticket/Display.html?id={tid}"
#             logger.info(f"Processing ticket {tid}")
#             doc = Document(
#                 id=ticket["id"],
#                 # Will add title to the first section later in processing
#                 sections=[Section(link=ticketLink, text="")]
#                 + self.build_doc_sections_from_txn(Rt0, tid),
#                 source=DocumentSource.REQUESTTRACKER,
#                 semantic_identifier=ticket["Subject"],
#                 metadata={
#                     key: value
#                     for key, value in ticket.items()
#                     if key not in ticket_keys_to_omit
#                 },
#             )
#             doc_batch.append(doc)
#             if len(doc_batch) >= self.batch_size:
#                 yield doc_batch
#                 doc_batch = []
#         if doc_batch:
#             yield doc_batch
#     def poll_source(
#         self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
#     ) -> GenerateDocumentsOutput:
#         # Keep query short, only look behind 1 day at maximum
#         one_day_ago: float = end - (24 * 60 * 60)
#         _start: float = start if start > one_day_ago else one_day_ago
#         start_datetime = datetime.fromtimestamp(_start, tz=timezone.utc)
#         end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
#         yield from self._process_tickets(start_datetime, end_datetime)
# if __name__ == "__main__":
#     import time
#     import os
#     from dotenv import load_dotenv
#     load_dotenv()
#     logger.setLevel(LOG_LVL_DEBUG)
#     rt_connector = RequestTrackerConnector()
#     rt_connector.load_credentials(
#         {
#             "requesttracker_username": os.getenv("RT_USERNAME"),
#             "requesttracker_password": os.getenv("RT_PASSWORD"),
#             "requesttracker_base_url": os.getenv("RT_BASE_URL"),
#         }
#     )
#     current = time.time()
#     one_day_ago = current - (24 * 60 * 60)  # 1 days
#     latest_docs = rt_connector.poll_source(one_day_ago, current)
#     for doc in latest_docs:
#         print(doc)


================================================
FILE: backend/onyx/connectors/salesforce/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/salesforce/blacklist.py
================================================
# NOTE(rkuo): I can't find an actual API that allows us to distinguish
# broken/incompatible objects from regular ones.
# taking hints from
# https://docs.resco.net/wiki/Salesforce_object_blacklist

SALESFORCE_BLACKLISTED_PREFIXES: set[str] = set(
    [
        "process",
        "aura",
        "app",
        "auth",
        "duplicate",
        "secure",
        "data",
        "listemail",
        "fsl__optimization",
        "fsl_scheduling",
        "feed",
        "chatter",
    ]
)

SALESFORCE_BLACKLISTED_SUFFIXES: set[str] = set(
    [
        "history",
        "share",
        "__tag",
        "__hd",
        "feed",
        "changeevent",
        "__ka",
        "__votestat",
        "__viewstat",
        "__kav",
        "__datacategoryselection",
        "subscription",
        "definition",
        "eventstream",
        "__mdt",
    ]
)

SALESFORCE_BLACKLISTED_OBJECTS: set[str] = set(
    [
        "acceptedeventrelation",
        "accountchangeevent",
        "accountcontactrole",
        "accountcontactrolechangeevent",
        "accounthistory",
        "accountshare",
        "actionlinkgrouptemplate",
        "actionlinktemplate",
        "activityhistory",
        "adminsetupevent",
        "aggregateresult",
        "announcement",
        "apexclass",
        "apexcomponent",
        "apexemailnotification",
        "apexlog",
        "apexpage",
        "apexpageinfo",
        "apextestqueueitem",
        "apextestresult",
        "apextestresultlimits",
        "apextestrunresult",
        "apextestsuite",
        "apextrigger",
        "apievent",
        "apptabmember",
        "assetchangeevent",
        "assethistory",
        "assetrelationshiphistory",
        "assettokenevent",
        "assignmentrule",
        "asyncapexjob",
        "backgroundoperation",
        "backgroundoperationresult",
        "batchapexerrorevent",
        "brandingset",
        "brandingsetproperty",
        "brandtemplate",
        "businessprocess",
        "campaignchangeevent",
        "campaignhistory",
        "campaignshare",
        "casechangeevent",
        "caseexternaldocument",
        "casehistory",
        "caseshare",
        "clientbrowser",
        "collaborationgroup",
        "collaborationgroupmember",
        "collaborationgroupmemberrequest",
        "collaborationinvitation",
        "connectedapplication",
        "contactchangeevent",
        "contacthistory",
        "contactrequest",
        "contactrequestshare",
        "contactshare",
        "contentasset",
        "contentbody",
        "contentdocumenthistory",
        "contenthubrepository",
        "contenttagsubscription",
        "contentusersubscription",
        "contentversionhistory",
        "contracthistory",
        "corswhitelistentry",
        "cronjobdetail",
        "crontrigger",
        "csptrustedsite",
        "custombrand",
        "custombrandasset",
        "customhelpmenuitem",
        "customhelpmenusection",
        "customhttpheader",
        "customobjectuserlicensemetrics",
        "custompermission",
        "custompermissiondependency",
        "dandbcompany",
        "dashboard",
        "dashboardcomponent",
        "digitalsignature",
        "documentattachmentmap",
        "domain",
        "domainsite",
        "emailcapture",
        "emaildomainfilter",
        "emaildomainkey",
        "emailrelay",
        "emailservicesaddress",
        "emailservicesfunction",
        "emailstatus",
        "emailtemplate",
        "embeddedservicedetail",
        "embeddedservicelabel",
        "entityparticle",
        "eventbussubscriber",
        "eventchangeevent",
        "eventlogfile",
        "eventrelationchangeevent",
        "expressionfilter",
        "expressionfiltercriteria",
        "externaldatasource",
        "externaldatauserauth",
        "fieldhistoryarchive",
        "fieldpermissions",
        "fieldservicemobilesettings",
        "filesearchactivity",
        "fiscalyearsettings",
        "flexqueueitem",
        "flowinterview",
        "flowinterviewshare",
        "flowrecordrelation",
        "flowstagerelation",
        "forecastingshare",
        "forecastshare",
        "fsl__criteria__c",
        "fsl__gantt_filter__c",
        "fsl__ganttpalette__c",
        "fsl__service_goal__c",
        "fsl__slr_cache__c",
        "fsl__territory_optimization_request__c",
        "goalhistory",
        "goalshare",
        "grantedbylicense",
        "idpeventlog",
        "iframewhitelisturl",
        "image",
        "imageshare",
        "installedmobileapp",
        "leadchangeevent",
        "leadhistory",
        "leadshare",
        "lightningexitbypagemetrics",
        "lightningexperiencetheme",
        "lightningtogglemetrics",
        "lightningusagebyapptypemetrics",
        "lightningusagebybrowsermetrics",
        "lightningusagebyflexipagemetrics",
        "lightningusagebypagemetrics",
        "linkedarticle",
        "listemailchangeevent",
        "listemailshare",
        "listview",
        "listviewchart",
        "listviewchartinstance",
        "listviewevent",
        "loginasevent",
        "loginevent",
        "logingeo",
        "loginhistory",
        "loginip",
        "logoutevent",
        "lookedupfromactivity",
        "macro",
        "macrohistory",
        "macroinstruction",
        "macroshare",
        "mailmergetemplate",
        "matchingrule",
        "matchingruleitem",
        "metricdatalinkhistory",
        "metrichistory",
        "metricshare",
        "mobilesettingsassignment",
        "mydomaindiscoverablelogin",
        "name",
        "namedcredential",
        "noteandattachment",
        "notificationmember",
        "oauthtoken",
        "objectpermissions",
        "onboardingmetrics",
        "openactivity",
        "opportunitychangeevent",
        "opportunitycontactrolechangeevent",
        "opportunityfieldhistory",
        "opportunityhistory",
        "opportunityshare",
        "orderchangeevent",
        "orderhistory",
        "orderitemchangeevent",
        "orderitemhistory",
        "ordershare",
        "orgdeleterequest",
        "orgdeleterequestshare",
        "orglifecyclenotification",
        "orgwideemailaddress",
        "outgoingemail",
        "outgoingemailrelation",
        "ownerchangeoptioninfo",
        "packagelicense",
        "period",
        "permissionsetlicense",
        "permissionsetlicenseassign",
        "permissionsettabsetting",
        "person",
        "picklistvalueinfo",
        "platformaction",
        "platformcachepartition",
        "platformcachepartitiontype",
        "platformstatusalertevent",
        "pricebook2history",
        "processinstancehistory",
        "product2changeevent",
        "product2history",
        "publisher",
        "pushtopic",
        "pushupgradeexcludedorg",
        "quicktexthistory",
        "quicktextshare",
        "quotetemplaterichtextdata",
        "recordaction",
        "recordactionhistory",
        "recordvisibility",
        "relationshipdomain",
        "relationshipinfo",
        "reportevent",
        "samlssoconfig",
        "scontrol",
        "searchactivity",
        "searchlayout",
        "searchpromotionrule",
        "securitycustombaseline",
        "servicereportlayout",
        "sessionpermsetactivation",
        "setupaudittrail",
        "setupentityaccess",
        "site",
        "sitedetail",
        "sitehistory",
        "siteiframewhitelisturl",
        "solutionhistory",
        "sosdeployment",
        "sossession",
        "sossessionactivity",
        "sossessionhistory",
        "sossessionshare",
        "staticresource",
        "streamingchannel",
        "streamingchannelshare",
        "subscriberpackage",
        "subscriberpackageversion",
        "taskchangeevent",
        "tenantusageentitlement",
        "testsuitemembership",
        "thirdpartyaccountlink",
        "todaygoal",
        "todaygoalshare",
        "transactionsecuritypolicy",
        "twofactorinfo",
        "twofactormethodsinfo",
        "twofactortempcode",
        "urievent",
        "userappinfo",
        "userappmenucustomization",
        "userappmenucustomizationshare",
        "userappmenuitem",
        "userchangeevent",
        "useremailpreferredperson",
        "useremailpreferredpersonshare",
        "userentityaccess",
        "userfieldaccess",
        "userlicense",
        "userlistview",
        "userlistviewcriterion",
        "userlogin",
        "userpackagelicense",
        "userpermissionaccess",
        "userpreference",
        "userprovaccount",
        "userprovaccountstaging",
        "userprovisioningconfig",
        "userprovisioninglog",
        "userprovisioningrequest",
        "userprovisioningrequestshare",
        "userprovmocktarget",
        "userrecordaccess",
        "usershare",
        "verificationhistory",
        "visibilitychangenotification",
        "visualforceaccessmetrics",
        "waveautoinstallrequest",
        "wavecompatibilitycheckitem",
        "weblink",
        "workcoachinghistory",
        "workcoachingshare",
        "workfeedbackhistory",
        "workfeedbackquestion",
        "workfeedbackquestionhistory",
        "workfeedbackquestionsethistory",
        "workfeedbackquestionsetshare",
        "workfeedbackquestionshare",
        "workfeedbackrequesthistory",
        "workfeedbackrequestshare",
        "workfeedbackshare",
        "workfeedbacktemplateshare",
        "workperformancecyclehistory",
        "workperformancecycleshare",
    ]
)


================================================
FILE: backend/onyx/connectors/salesforce/connector.py
================================================
import csv
import gc
import json
import os
import sys
import tempfile
import time
from collections import defaultdict
from collections.abc import Callable
from pathlib import Path
from typing import Any
from typing import cast

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.connectors.salesforce.doc_conversion import convert_sf_object_to_doc
from onyx.connectors.salesforce.doc_conversion import convert_sf_query_result_to_doc
from onyx.connectors.salesforce.doc_conversion import ID_PREFIX
from onyx.connectors.salesforce.onyx_salesforce import OnyxSalesforce
from onyx.connectors.salesforce.salesforce_calls import fetch_all_csvs_in_parallel
from onyx.connectors.salesforce.sqlite_functions import OnyxSalesforceSQLite
from onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE
from onyx.connectors.salesforce.utils import ID_FIELD
from onyx.connectors.salesforce.utils import MODIFIED_FIELD
from onyx.connectors.salesforce.utils import NAME_FIELD
from onyx.connectors.salesforce.utils import USER_OBJECT_TYPE
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger


logger = setup_logger()


def _convert_to_metadata_value(value: Any) -> str | list[str]:
    """Convert a Salesforce field value to a valid metadata value.

    Document metadata expects str | list[str], but Salesforce returns
    various types (bool, float, int, etc.). This function ensures all
    values are properly converted to strings.
    """
    if isinstance(value, list):
        return [str(item) for item in value]
    return str(value)


_DEFAULT_PARENT_OBJECT_TYPES = [ACCOUNT_OBJECT_TYPE]

_DEFAULT_ATTRIBUTES_TO_KEEP: dict[str, dict[str, str]] = {
    "Opportunity": {
        ACCOUNT_OBJECT_TYPE: "account",
        "FiscalQuarter": "fiscal_quarter",
        "FiscalYear": "fiscal_year",
        "IsClosed": "is_closed",
        NAME_FIELD: "name",
        "StageName": "stage_name",
        "Type": "type",
        "Amount": "amount",
        "CloseDate": "close_date",
        "Probability": "probability",
        "CreatedDate": "created_date",
        MODIFIED_FIELD: "last_modified_date",
    },
    "Contact": {
        ACCOUNT_OBJECT_TYPE: "account",
        "CreatedDate": "created_date",
        MODIFIED_FIELD: "last_modified_date",
    },
}


class SalesforceCheckpoint(ConnectorCheckpoint):
    initial_sync_complete: bool
    current_timestamp: SecondsSinceUnixEpoch


class SalesforceConnectorContext:
    parent_types: set[str] = set()
    child_types: set[str] = set()
    parent_to_child_types: dict[str, set[str]] = {}  # map from parent to child types
    child_to_parent_types: dict[str, set[str]] = {}  # map from child to parent types
    parent_reference_fields_by_type: dict[str, dict[str, list[str]]] = {}
    type_to_queryable_fields: dict[str, set[str]] = {}
    prefix_to_type: dict[str, str] = {}  # infer the object type of an id immediately

    parent_to_child_relationships: dict[str, set[str]] = (
        {}
    )  # map from parent to child relationships
    parent_to_relationship_queryable_fields: dict[str, dict[str, set[str]]] = (
        {}
    )  # map from relationship to queryable fields

    parent_child_names_to_relationships: dict[str, str] = {}


def _extract_fields_and_associations_from_config(
    config: dict[str, Any], object_type: str
) -> tuple[list[str] | None, dict[str, list[str]]]:
    """
    Extract fields and associations for a specific object type from custom config.

    Returns:
        tuple of (fields_list, associations_dict)
        - fields_list: List of fields to query, or None if not specified (use all)
        - associations_dict: Dict mapping association names to their config
    """
    if object_type not in config:
        return None, {}

    obj_config = config[object_type]
    fields = obj_config.get("fields")
    associations = obj_config.get("associations", {})

    return fields, associations


def _validate_custom_query_config(config: dict[str, Any]) -> None:
    """
    Validate the structure of the custom query configuration.
    """

    for object_type, obj_config in config.items():
        if not isinstance(obj_config, dict):
            raise ValueError(
                f"top level object {object_type} must be mapped to a dictionary"
            )

        # Check if fields is a list when present
        if "fields" in obj_config:
            if not isinstance(obj_config["fields"], list):
                raise ValueError("if fields key exists, value must be a list")
            for v in obj_config["fields"]:
                if not isinstance(v, str):
                    raise ValueError(f"if fields list value {v} is not a string")

        # Check if associations is a dict when present
        if "associations" in obj_config:
            if not isinstance(obj_config["associations"], dict):
                raise ValueError(
                    "if associations key exists, value must be a dictionary"
                )
            for assoc_name, assoc_fields in obj_config["associations"].items():
                if not isinstance(assoc_fields, list):
                    raise ValueError(
                        f"associations list value {assoc_fields} for key {assoc_name} is not a list"
                    )
                for v in assoc_fields:
                    if not isinstance(v, str):
                        raise ValueError(
                            f"if associations list value {v} is not a string"
                        )


class SalesforceConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
    """Approach outline

    Goal
    - get data for every record of every parent object type
    - The data should consist of the parent object record and all direct child relationship objects


    Initial sync
    - Does a full sync, then indexes each parent object + children as a document via
    the local sqlite db

    - get the first level children object types of parent object types
    - bulk export all object types to CSV
    -- NOTE: bulk exports of an object type contain parent id's, but not child id's
    - Load all CSV's to the DB
    - generate all parent object types as documents and yield them

    - Initial sync's must always be for the entire dataset.
      Otherwise, you can have cases where some records relate to other records that were
      updated recently. The more recently updated records will not be pulled down in the query.

    Delta sync's
    - delta sync's detect changes in parent objects, then perform a full sync of
    each parent object and its children

    If loading the entire db, this approach is much slower. For deltas, it works well.

    - query all changed records (includes children and parents)
    - extrapolate all changed parent objects
    - for each parent object, construct a query and yield the result back

    - Delta sync's can be done object by object by identifying the parent id of any changed
      record, and querying a single record at a time to get all the updated data.  In this way,
      we avoid having to keep a locally synchronized copy of the entire salesforce db.

    TODO: verify record to doc conversion
    figure out why sometimes the field names are missing.
    """

    MAX_BATCH_BYTES = 1024 * 1024
    LOG_INTERVAL = 10.0  # how often to log stats in loop heavy parts of the connector

    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
        requested_objects: list[str] = [],
        custom_query_config: str | None = None,
    ) -> None:
        self.batch_size = batch_size
        self._sf_client: OnyxSalesforce | None = None

        # Validate and store custom query config
        if custom_query_config:
            config_json = json.loads(custom_query_config)
            self.custom_query_config: dict[str, Any] | None = config_json
            # If custom query config is provided, use the object types from it
            self.parent_object_list = list(config_json.keys())
        else:
            self.custom_query_config = None
            # Use the traditional requested_objects approach
            self.parent_object_list = (
                [obj.strip().capitalize() for obj in requested_objects]
                if requested_objects
                else _DEFAULT_PARENT_OBJECT_TYPES
            )

    def load_credentials(
        self,
        credentials: dict[str, Any],
    ) -> dict[str, Any] | None:
        domain = "test" if credentials.get("is_sandbox") else None
        self._sf_client = OnyxSalesforce(
            username=credentials["sf_username"],
            password=credentials["sf_password"],
            security_token=credentials["sf_security_token"],
            domain=domain,
        )
        return None

    @property
    def sf_client(self) -> OnyxSalesforce:
        if self._sf_client is None:
            raise ConnectorMissingCredentialError("Salesforce")
        return self._sf_client

    @staticmethod
    def reconstruct_object_types(directory: str) -> dict[str, list[str] | None]:
        """
        Scans the given directory for all CSV files and reconstructs the available object types.
        Assumes filenames are formatted as "ObjectType.filename.csv" or "ObjectType.csv".

        Args:
            directory (str): The path to the directory containing CSV files.

        Returns:
            dict[str, list[str]]: A dictionary mapping object types to lists of file paths.
        """
        object_types = defaultdict(list)

        for filename in os.listdir(directory):
            if filename.endswith(".csv"):
                parts = filename.split(".", 1)  # Split on the first period
                object_type = parts[0]  # Take the first part as the object type
                object_types[object_type].append(os.path.join(directory, filename))

        return dict(object_types)

    @staticmethod
    def _download_object_csvs(
        all_types_to_filter: dict[str, bool],
        queryable_fields_by_type: dict[str, set[str]],
        directory: str,
        sf_client: OnyxSalesforce,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> None:
        # checkpoint - we've found all object types, now time to fetch the data
        logger.info("Fetching CSVs for all object types")

        # This takes like 30 minutes first time and <2 minutes for updates
        object_type_to_csv_path = fetch_all_csvs_in_parallel(
            sf_client=sf_client,
            all_types_to_filter=all_types_to_filter,
            queryable_fields_by_type=queryable_fields_by_type,
            start=start,
            end=end,
            target_dir=directory,
        )

        # print useful information
        num_csvs = 0
        num_bytes = 0
        for object_type, csv_paths in object_type_to_csv_path.items():
            if not csv_paths:
                continue

            for csv_path in csv_paths:
                if not csv_path:
                    continue

                file_path = Path(csv_path)
                file_size = file_path.stat().st_size
                num_csvs += 1
                num_bytes += file_size
                logger.info(
                    f"CSV download: object_type={object_type} path={csv_path} bytes={file_size}"
                )

        logger.info(
            f"CSV download total: total_csvs={num_csvs} total_bytes={num_bytes}"
        )

    @staticmethod
    def _load_csvs_to_db(
        csv_directory: str, remove_ids: bool, sf_db: OnyxSalesforceSQLite
    ) -> dict[str, str]:
        """
        Returns a dict of id to object type. Each id is a newly seen row in salesforce.
        """

        updated_ids: dict[str, str] = {}

        object_type_to_csv_path = SalesforceConnector.reconstruct_object_types(
            csv_directory
        )

        # NOTE(rkuo): this timing note is meaningless without a reference point in terms
        # of number of records, etc
        # This takes like 10 seconds

        # This is for testing the rest of the functionality if data has
        # already been fetched and put in sqlite
        # from import onyx.connectors.salesforce.sf_db.sqlite_functions find_ids_by_type
        # for object_type in self.parent_object_list:
        #     updated_ids.update(list(find_ids_by_type(object_type)))

        # This takes 10-70 minutes first time (idk why the range is so big)
        total_types = len(object_type_to_csv_path)
        logger.info(f"Starting to process {total_types} object types")

        for i, (object_type, csv_paths) in enumerate(
            object_type_to_csv_path.items(), 1
        ):
            logger.info(f"Processing object type {object_type} ({i}/{total_types})")
            # If path is None, it means it failed to fetch the csv
            if csv_paths is None:
                continue

            # Go through each csv path and use it to update the db
            for csv_path in csv_paths:
                num_records = 0

                logger.debug(
                    f"Processing CSV: object_type={object_type} "
                    f"csv={csv_path} "
                    f"len={Path(csv_path).stat().st_size} "
                    f"records={num_records}"
                )

                with open(csv_path, "r", newline="", encoding="utf-8") as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        num_records += 1

                new_ids = sf_db.update_from_csv(
                    object_type=object_type,
                    csv_download_path=csv_path,
                    remove_ids=remove_ids,
                )
                for new_id in new_ids:
                    updated_ids[new_id] = object_type

                sf_db.flush()

                logger.debug(
                    f"Added {len(new_ids)} new/updated records for {object_type}"
                )

                logger.info(
                    f"Processed CSV: object_type={object_type} "
                    f"csv={csv_path} "
                    f"len={Path(csv_path).stat().st_size} "
                    f"records={num_records} "
                    f"db_len={sf_db.file_size}"
                )
                os.remove(csv_path)

        return updated_ids

    # @staticmethod
    # def _get_child_types(
    #     parent_types: list[str], sf_client: OnyxSalesforce
    # ) -> set[str]:
    #     all_types: set[str] = set(parent_types)

    #     # Step 1 - get all object types
    #     logger.info(f"Parent object types: num={len(parent_types)} list={parent_types}")

    #     # This takes like 20 seconds
    #     for parent_object_type in parent_types:
    #         child_types = sf_client.get_children_of_sf_type(parent_object_type)
    #         logger.debug(
    #             f"Found {len(child_types)} child types for {parent_object_type}"
    #         )

    #         all_types.update(child_types.keys())

    #     # Always want to make sure user is grabbed for permissioning purposes
    #     all_types.add(USER_OBJECT_TYPE)
    #     # Always want to make sure account is grabbed for reference purposes
    #     all_types.add(ACCOUNT_OBJECT_TYPE)

    #     logger.info(f"All object types: num={len(all_types)} list={all_types}")

    #     # gc.collect()
    #     return all_types

    # @staticmethod
    # def _get_all_types(parent_types: list[str], sf_client: Salesforce) -> set[str]:
    #     all_types: set[str] = set(parent_types)

    #     # Step 1 - get all object types
    #     logger.info(f"Parent object types: num={len(parent_types)} list={parent_types}")

    #     # This takes like 20 seconds
    #     for parent_object_type in parent_types:
    #         child_types = get_children_of_sf_type(sf_client, parent_object_type)
    #         logger.debug(
    #             f"Found {len(child_types)} child types for {parent_object_type}"
    #         )

    #         all_types.update(child_types)

    #     # Always want to make sure user is grabbed for permissioning purposes
    #     all_types.add(USER_OBJECT_TYPE)

    #     logger.info(f"All object types: num={len(all_types)} list={all_types}")

    #     # gc.collect()
    #     return all_types

    def _yield_doc_batches(
        self,
        sf_db: OnyxSalesforceSQLite,
        type_to_processed: dict[str, int],
        changed_ids_to_type: dict[str, str],
        parent_types: set[str],
        increment_parents_changed: Callable[[], None],
    ) -> GenerateDocumentsOutput:
        """ """
        docs_to_yield: list[Document | HierarchyNode] = []
        docs_to_yield_bytes = 0

        last_log_time = 0.0

        for (
            parent_type,
            parent_id,
            examined_ids,
        ) in sf_db.get_changed_parent_ids_by_type(
            changed_ids=list(changed_ids_to_type.keys()),
            parent_types=parent_types,
        ):
            now = time.monotonic()

            processed = examined_ids - 1
            if now - last_log_time > SalesforceConnector.LOG_INTERVAL:
                logger.info(
                    f"Processing stats: {type_to_processed} "
                    f"file_size={sf_db.file_size} "
                    f"processed={processed} "
                    f"remaining={len(changed_ids_to_type) - processed}"
                )
                last_log_time = now

            type_to_processed[parent_type] = type_to_processed.get(parent_type, 0) + 1

            parent_object = sf_db.get_record(parent_id, parent_type)
            if not parent_object:
                logger.warning(
                    f"Failed to get parent object {parent_id} for {parent_type}"
                )
                continue

            # use the db to create a document we can yield
            doc = convert_sf_object_to_doc(
                sf_db,
                sf_object=parent_object,
                sf_instance=self.sf_client.sf_instance,
            )

            doc.metadata["object_type"] = parent_type

            # Add default attributes to the metadata
            for (
                sf_attribute,
                canonical_attribute,
            ) in _DEFAULT_ATTRIBUTES_TO_KEEP.get(parent_type, {}).items():
                if sf_attribute in parent_object.data:
                    doc.metadata[canonical_attribute] = _convert_to_metadata_value(
                        parent_object.data[sf_attribute]
                    )

            doc_sizeof = sys.getsizeof(doc)
            docs_to_yield_bytes += doc_sizeof
            docs_to_yield.append(doc)
            increment_parents_changed()

            # memory usage is sensitive to the input length, so we're yielding immediately
            # if the batch exceeds a certain byte length
            if (
                len(docs_to_yield) >= self.batch_size
                or docs_to_yield_bytes > SalesforceConnector.MAX_BATCH_BYTES
            ):
                yield docs_to_yield
                docs_to_yield = []
                docs_to_yield_bytes = 0

                # observed a memory leak / size issue with the account table if we don't gc.collect here.
                gc.collect()

        yield docs_to_yield

    def _full_sync(
        self,
        temp_dir: str,
    ) -> GenerateDocumentsOutput:
        type_to_processed: dict[str, int] = {}

        logger.info("_fetch_from_salesforce starting (full sync).")
        if not self._sf_client:
            raise RuntimeError("self._sf_client is None!")

        changed_ids_to_type: dict[str, str] = {}
        parents_changed = 0
        examined_ids = 0

        sf_db = OnyxSalesforceSQLite(os.path.join(temp_dir, "salesforce_db.sqlite"))
        sf_db.connect()

        try:
            sf_db.apply_schema()
            sf_db.log_stats()

            ctx = self._make_context(
                None, None, temp_dir, self.parent_object_list, self._sf_client
            )
            gc.collect()

            # Step 2 - load CSV's to sqlite
            object_type_to_csv_paths = SalesforceConnector.reconstruct_object_types(
                temp_dir
            )

            total_types = len(object_type_to_csv_paths)
            logger.info(f"Starting to process {total_types} object types")

            for i, (object_type, csv_paths) in enumerate(
                object_type_to_csv_paths.items(), 1
            ):
                logger.info(f"Processing object type {object_type} ({i}/{total_types})")
                # If path is None, it means it failed to fetch the csv
                if csv_paths is None:
                    continue

                # Go through each csv path and use it to update the db
                for csv_path in csv_paths:
                    num_records = 0
                    with open(csv_path, "r", newline="", encoding="utf-8") as f:
                        reader = csv.DictReader(f)
                        for row in reader:
                            num_records += 1

                    logger.debug(
                        f"Processing CSV: object_type={object_type} "
                        f"csv={csv_path} "
                        f"len={Path(csv_path).stat().st_size} "
                        f"records={num_records}"
                    )

                    new_ids = sf_db.update_from_csv(
                        object_type=object_type,
                        csv_download_path=csv_path,
                    )
                    for new_id in new_ids:
                        changed_ids_to_type[new_id] = object_type

                    sf_db.flush()

                    logger.debug(
                        f"Added {len(new_ids)} new/updated records for {object_type}"
                    )

                    logger.info(
                        f"Processed CSV: object_type={object_type} "
                        f"csv={csv_path} "
                        f"len={Path(csv_path).stat().st_size} "
                        f"records={num_records} "
                        f"db_len={sf_db.file_size}"
                    )

                    os.remove(csv_path)
                    gc.collect()

            gc.collect()

            logger.info(f"Found {len(changed_ids_to_type)} total updated records")
            logger.info(
                f"Starting to process parent objects of types: {ctx.parent_types}"
            )

            # Step 3 - extract and index docs
            def increment_parents_changed() -> None:
                nonlocal parents_changed
                parents_changed += 1

            yield from self._yield_doc_batches(
                sf_db,
                type_to_processed,
                changed_ids_to_type,
                ctx.parent_types,
                increment_parents_changed,
            )
        except Exception:
            logger.exception("Unexpected exception")
            raise
        finally:
            logger.info(
                f"Final processing stats: "
                f"examined={examined_ids} "
                f"parents_changed={parents_changed} "
                f"remaining={len(changed_ids_to_type) - examined_ids}"
            )

            logger.info(f"Top level object types processed: {type_to_processed}")

            sf_db.close()

    def _delta_sync(
        self,
        temp_dir: str,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> GenerateDocumentsOutput:
        type_to_processed: dict[str, int] = {}

        logger.info("_fetch_from_salesforce starting (delta sync).")
        if not self._sf_client:
            raise RuntimeError("self._sf_client is None!")

        changed_ids_to_type: dict[str, str] = {}
        parents_changed = 0
        processed = 0

        sf_db = OnyxSalesforceSQLite(os.path.join(temp_dir, "salesforce_db.sqlite"))
        sf_db.connect()

        try:
            sf_db.apply_schema()
            sf_db.log_stats()

            ctx = self._make_context(
                start, end, temp_dir, self.parent_object_list, self._sf_client
            )
            gc.collect()

            # Step 2 - load CSV's to sqlite
            changed_ids_to_type = SalesforceConnector._load_csvs_to_db(
                temp_dir, False, sf_db
            )
            gc.collect()

            logger.info(f"Found {len(changed_ids_to_type)} total updated records")
            logger.info(
                f"Starting to process parent objects of types: {ctx.parent_types}"
            )

            # Step 3 - extract and index docs
            docs_to_yield: list[Document | HierarchyNode] = []
            docs_to_yield_bytes = 0

            last_log_time = 0.0

            # this is a partial sync, so all changed parent id's must be retrieved from salesforce
            # NOTE: it may be an option to identify the object type of an id with its prefix
            # but unfortunately it's possible for an object type to not have a prefix.
            # so that would work in many important cases, but not all.
            for (
                parent_id,
                actual_parent_type,
                num_examined,
            ) in sf_db.get_changed_parent_ids_by_type_2(
                changed_ids=changed_ids_to_type,
                parent_types=ctx.parent_types,
                parent_relationship_fields_by_type=ctx.parent_reference_fields_by_type,
                prefix_to_type=ctx.prefix_to_type,
            ):
                # this yields back each changed parent record, where changed means
                # the parent record itself or a child record was updated.
                now = time.monotonic()

                # query salesforce for the changed parent id record
                # NOTE(rkuo): we only know the record id and its possible types,
                # so we actually need to check each type until we succeed
                # to be entirely correct
                # this may be a source of inefficiency and thinking about
                # caching the most likely parent record type might be helpful

                # actual_parent_type: str | None = None
                # for possible_parent_type in possible_parent_types:
                #     queryable_fields = ctx.queryable_fields_by_type[
                #         possible_parent_type
                #     ]
                #     query = _get_object_by_id_query(
                #         parent_id, possible_parent_type, queryable_fields
                #     )
                #     result = self._sf_client.query(query)
                #     if result:
                #         actual_parent_type = possible_parent_type
                #         print(result)
                #         break

                # get the parent record fields
                record = self._sf_client.query_object(
                    actual_parent_type, parent_id, ctx.type_to_queryable_fields
                )
                if not record:
                    continue

                # queryable_fields = ctx.type_to_queryable_fields[
                #     actual_parent_type
                # ]
                # query = get_object_by_id_query(
                #     parent_id, actual_parent_type, queryable_fields
                # )
                # result = self._sf_client.query(query)
                # if not result:
                #     continue

                # # print(result)
                # record: dict[str, Any] = {}

                # record_0 = result["records"][0]
                # for record_key, record_value in record_0.items():
                #     if record_key == "attributes":
                #         continue

                #     record[record_key] = record_value

                # for this parent type, increment the counter on the stats object
                type_to_processed[actual_parent_type] = (
                    type_to_processed.get(actual_parent_type, 0) + 1
                )

                # get the child records
                child_relationships = ctx.parent_to_child_relationships[
                    actual_parent_type
                ]
                relationship_to_queryable_fields = (
                    ctx.parent_to_relationship_queryable_fields[actual_parent_type]
                )
                child_records = self.sf_client.get_child_objects_by_id(
                    parent_id,
                    actual_parent_type,
                    list(child_relationships),
                    relationship_to_queryable_fields,
                )

                # NOTE(rkuo): does using the parent last modified make sense if the update
                # is being triggered because a child object changed?
                primary_owner_list: list[BasicExpertInfo] | None = None
                if "LastModifiedById" in record:
                    try:
                        last_modified_by_id = record["LastModifiedById"]
                        user_record = self.sf_client.query_object(
                            USER_OBJECT_TYPE,
                            last_modified_by_id,
                            ctx.type_to_queryable_fields,
                        )
                        if user_record:
                            primary_owner = BasicExpertInfo.from_dict(user_record)
                            primary_owner_list = [primary_owner]
                    except Exception:
                        pass

                # for child_record_key, child_record in child_records.items():
                #     if not child_record:
                #         continue

                #     child_text_section = _extract_section(
                #         child_record,
                #         f"https://{self._sf_client.sf_instance}/{child_record_key}",
                #     )
                #     sections.append(child_text_section)

                # for parent_relationship_field in parent_relationship_fields:
                #     parent_relationship_id
                # json.loads(parent_object.data)

                # create and yield a document from the salesforce query
                doc = convert_sf_query_result_to_doc(
                    parent_id,
                    record,
                    child_records,
                    primary_owner_list,
                    self._sf_client,
                )

                # doc = Document(
                #     id=ID_PREFIX + parent_id,
                #     sections=cast(list[TextSection | ImageSection], sections),
                #     source=DocumentSource.SALESFORCE,
                #     semantic_identifier=parent_semantic_identifier,
                #     doc_updated_at=time_str_to_utc(parent_last_modified_date),
                #     primary_owners=primary_owner_list,
                #     metadata={},
                # )

                # Add default attributes to the metadata
                for (
                    sf_attribute,
                    canonical_attribute,
                ) in _DEFAULT_ATTRIBUTES_TO_KEEP.get(actual_parent_type, {}).items():
                    if sf_attribute in record:
                        doc.metadata[canonical_attribute] = _convert_to_metadata_value(
                            record[sf_attribute]
                        )

                doc_sizeof = sys.getsizeof(doc)
                docs_to_yield_bytes += doc_sizeof
                docs_to_yield.append(doc)
                parents_changed += 1

                # memory usage is sensitive to the input length, so we're yielding immediately
                # if the batch exceeds a certain byte length
                if (
                    len(docs_to_yield) >= self.batch_size
                    or docs_to_yield_bytes > SalesforceConnector.MAX_BATCH_BYTES
                ):
                    yield docs_to_yield
                    docs_to_yield = []
                    docs_to_yield_bytes = 0

                    # observed a memory leak / size issue with the account table if we don't gc.collect here.
                    gc.collect()

                processed = num_examined
                if now - last_log_time > SalesforceConnector.LOG_INTERVAL:
                    logger.info(
                        f"Processing stats: {type_to_processed} "
                        f"processed={processed} "
                        f"remaining={len(changed_ids_to_type) - processed}"
                    )
                    last_log_time = now

            yield docs_to_yield
        except Exception:
            logger.exception("Unexpected exception")
            raise
        finally:
            logger.info(
                f"Final processing stats: "
                f"processed={processed} "
                f"remaining={len(changed_ids_to_type) - processed} "
                f"parents_changed={parents_changed}"
            )

            logger.info(f"Top level object types processed: {type_to_processed}")

            sf_db.close()

    def _make_context(
        self,
        start: SecondsSinceUnixEpoch | None,
        end: SecondsSinceUnixEpoch | None,
        temp_dir: str,
        parent_object_list: list[str],
        sf_client: OnyxSalesforce,
    ) -> SalesforceConnectorContext:
        """NOTE: I suspect we're doing way too many queries here. Likely fewer queries
        and just parsing all the info we need in less passes will work."""

        parent_types = set(parent_object_list)
        child_types: set[str] = set()
        parent_to_child_types: dict[str, set[str]] = (
            {}
        )  # map from parent to child types
        child_to_parent_types: dict[str, set[str]] = (
            {}
        )  # map from child to parent types

        parent_reference_fields_by_type: dict[str, dict[str, list[str]]] = (
            {}
        )  # for a given object, the fields reference parent objects
        type_to_queryable_fields: dict[str, set[str]] = {}
        prefix_to_type: dict[str, str] = {}

        parent_to_child_relationships: dict[str, set[str]] = (
            {}
        )  # map from parent to child relationships

        # relationship keys are formatted as "parent__relationship"
        # we have to do this because relationship names are not unique!
        # values are a dict of relationship names to a list of queryable fields
        parent_to_relationship_queryable_fields: dict[str, dict[str, set[str]]] = {}

        parent_child_names_to_relationships: dict[str, str] = {}

        full_sync = start is None and end is None

        # Step 1 - make a list of all the types to download (parent + direct child + USER_OBJECT_TYPE)
        # prefixes = {}

        global_description = sf_client.describe()
        if not global_description:
            raise RuntimeError("sf_client.describe failed")

        for sobject in global_description["sobjects"]:
            if sobject["keyPrefix"]:
                prefix_to_type[sobject["keyPrefix"]] = sobject["name"]
                # prefixes[sobject['keyPrefix']] = {
                #     'object_name': sobject['name'],
                #     'label': sobject['label'],
                #     'is_custom': sobject['custom']
                # }

        logger.info(f"Describe: num_prefixes={len(prefix_to_type)}")

        logger.info(f"Parent object types: num={len(parent_types)} list={parent_types}")
        for parent_type in parent_types:
            # parent_onyx_sf_type = OnyxSalesforceType(parent_type, sf_client)

            custom_fields: list[str] | None = []
            associations_config: dict[str, list[str]] | None = None

            # Set queryable fields for parent type
            if self.custom_query_config:
                custom_fields, associations_config = (
                    _extract_fields_and_associations_from_config(
                        self.custom_query_config, parent_type
                    )
                )
                custom_fields = custom_fields or []

                # Get custom fields for parent type
                field_set = set(custom_fields)
                # used during doc conversion
                # field_set.add(NAME_FIELD) # does not always exist
                field_set.add(ID_FIELD)
                field_set.add(MODIFIED_FIELD)

                # Use only the specified fields
                type_to_queryable_fields[parent_type] = field_set
                logger.info(f"Using custom fields for {parent_type}: {field_set}")
            else:
                # Use all queryable fields
                type_to_queryable_fields[parent_type] = (
                    sf_client.get_queryable_fields_by_type(parent_type)
                )
                logger.info(f"Using all fields for {parent_type}")

            child_types_all = sf_client.get_children_of_sf_type(parent_type)
            logger.debug(f"Found {len(child_types_all)} child types for {parent_type}")
            logger.debug(f"child types: {child_types_all}")

            child_types_working = child_types_all.copy()
            if associations_config is not None:
                child_types_working = {
                    k: v for k, v in child_types_all.items() if k in associations_config
                }
                any_not_found = False
                for k in associations_config:
                    if k not in child_types_working:
                        any_not_found = True
                        logger.warning(f"Association {k} not found in {parent_type}")
                if any_not_found:
                    queryable_fields = sf_client.get_queryable_fields_by_type(
                        parent_type
                    )
                    raise RuntimeError(
                        f"Associations {associations_config} not found in {parent_type} "
                        "make sure your parent-child associations are in the right order"
                        # f"with child objects {child_types_all}"
                        # f" and fields {queryable_fields}"
                    )

            parent_to_child_relationships[parent_type] = set()
            parent_to_child_types[parent_type] = set()
            parent_to_relationship_queryable_fields[parent_type] = {}

            for child_type, child_relationship in child_types_working.items():
                child_type = cast(str, child_type)

                # onyx_sf_type = OnyxSalesforceType(child_type, sf_client)

                # map parent name to child name
                parent_to_child_types[parent_type].add(child_type)

                # reverse map child name to parent name
                if child_type not in child_to_parent_types:
                    child_to_parent_types[child_type] = set()
                child_to_parent_types[child_type].add(parent_type)

                # map parent name to child relationship
                parent_to_child_relationships[parent_type].add(child_relationship)

                # map relationship to queryable fields of the target table
                if config_fields := (
                    associations_config and associations_config.get(child_type)
                ):
                    field_set = set(config_fields)
                    # these are expected and used during doc conversion
                    # field_set.add(NAME_FIELD) # does not always exist
                    field_set.add(ID_FIELD)
                    field_set.add(MODIFIED_FIELD)
                    queryable_fields = field_set
                else:
                    queryable_fields = sf_client.get_queryable_fields_by_type(
                        child_type
                    )

                if child_relationship in parent_to_relationship_queryable_fields:
                    raise RuntimeError(f"{child_relationship=} already exists")

                parent_to_relationship_queryable_fields[parent_type][
                    child_relationship
                ] = queryable_fields

                type_to_queryable_fields[child_type] = queryable_fields

                parent_child_names_to_relationships[f"{parent_type}__{child_type}"] = (
                    child_relationship
                )

            child_types.update(child_types_working.keys())
            logger.info(
                f"Child object types: parent={parent_type} num={len(child_types_working)} list={child_types_working.keys()}"
            )

        logger.info(
            f"Final child object types: num={len(child_types)} list={child_types}"
        )

        all_types: set[str] = set(parent_types)
        all_types.update(child_types)

        # NOTE(rkuo): should this be an implicit parent type?
        all_types.add(USER_OBJECT_TYPE)  # Always add User for permissioning purposes
        all_types.add(ACCOUNT_OBJECT_TYPE)  # Always add Account for reference purposes

        logger.info(f"All object types: num={len(all_types)} list={all_types}")

        # Ensure User and Account have queryable fields if they weren't already processed
        essential_types = [USER_OBJECT_TYPE, ACCOUNT_OBJECT_TYPE]
        for essential_type in essential_types:
            if essential_type not in type_to_queryable_fields:
                type_to_queryable_fields[essential_type] = (
                    sf_client.get_queryable_fields_by_type(essential_type)
                )

        # 1.1 - Detect all fields in child types which reference a parent type.
        # build dicts to detect relationships between parent and child
        for child_type in child_types.union(essential_types):
            # onyx_sf_type = OnyxSalesforceType(child_type, sf_client)
            parent_reference_fields = sf_client.get_parent_reference_fields(
                child_type, parent_types
            )

            parent_reference_fields_by_type[child_type] = parent_reference_fields

        # Only add time filter if there is at least one object of the type
        # in the database. We aren't worried about partially completed object update runs
        # because this occurs after we check for existing csvs which covers this case
        # NOTE(rkuo):
        all_types_to_filter: dict[str, bool] = {}
        for sf_type in all_types:
            # onyx_sf_type = OnyxSalesforceType(sf_type, sf_client)

            # NOTE(rkuo): I'm not convinced it makes sense to restrict filtering at all
            # all_types_to_filter[sf_type] = sf_db.object_type_count(sf_type) > 0
            all_types_to_filter[sf_type] = not full_sync

        # Step 1.2 - bulk download the CSV's for each object type
        SalesforceConnector._download_object_csvs(
            all_types_to_filter,
            type_to_queryable_fields,
            temp_dir,
            sf_client,
            start,
            end,
        )

        return_context = SalesforceConnectorContext()
        return_context.parent_types = parent_types
        return_context.child_types = child_types
        return_context.parent_to_child_types = parent_to_child_types
        return_context.child_to_parent_types = child_to_parent_types
        return_context.parent_reference_fields_by_type = parent_reference_fields_by_type
        return_context.type_to_queryable_fields = type_to_queryable_fields
        return_context.prefix_to_type = prefix_to_type

        return_context.parent_to_child_relationships = parent_to_child_relationships
        return_context.parent_to_relationship_queryable_fields = (
            parent_to_relationship_queryable_fields
        )

        return_context.parent_child_names_to_relationships = (
            parent_child_names_to_relationships
        )

        return return_context

    def load_from_state(self) -> GenerateDocumentsOutput:
        # Always use a temp directory for SQLite - the database is rebuilt
        # from scratch each time via CSV downloads, so there's no caching benefit
        # from persisting it. Using temp dirs also avoids collisions between
        # multiple CC pairs and eliminates stale WAL/SHM file issues.
        # TODO(evan): make this thing checkpointed and persist/load db from filestore
        with tempfile.TemporaryDirectory() as temp_dir:
            yield from self._full_sync(temp_dir)

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        """Poll source will synchronize updated parent objects one by one."""
        # Always use a temp directory - see comment in load_from_state()
        with tempfile.TemporaryDirectory() as temp_dir:
            yield from self._delta_sync(temp_dir, start, end)

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002
    ) -> GenerateSlimDocumentOutput:
        doc_metadata_list: list[SlimDocument | HierarchyNode] = []
        for parent_object_type in self.parent_object_list:
            query = f"SELECT Id FROM {parent_object_type}"
            query_result = self.sf_client.safe_query_all(query)
            doc_metadata_list.extend(
                SlimDocument(
                    id=f"{ID_PREFIX}{instance_dict.get('Id', '')}",
                    external_access=None,
                )
                for instance_dict in query_result["records"]
            )

        yield doc_metadata_list

    def validate_connector_settings(self) -> None:
        """
        Validate that the Salesforce credentials and connector settings are correct.
        Specifically checks that we can make an authenticated request to Salesforce.
        """

        try:
            # Attempt to fetch a small batch of objects (arbitrary endpoint) to verify credentials
            self.sf_client.describe()
        except Exception as e:
            raise ConnectorMissingCredentialError(
                f"Failed to validate Salesforce credentials. Please check yourcredentials and try again. Error: {e}"
            )

        if self.custom_query_config:
            try:
                _validate_custom_query_config(self.custom_query_config)
            except Exception as e:
                raise ConnectorMissingCredentialError(
                    f"Failed to validate Salesforce custom query config. Please check yourconfig and try again. Error: {e}"
                )

        logger.info("Salesforce credentials validated successfully.")

    # @override
    # def load_from_checkpoint(
    #     self,
    #     start: SecondsSinceUnixEpoch,
    #     end: SecondsSinceUnixEpoch,
    #     checkpoint: SalesforceCheckpoint,
    # ) -> CheckpointOutput[SalesforceCheckpoint]:
    #     try:
    #         return self._fetch_document_batches(checkpoint, start, end)
    #     except Exception as e:
    #         if _should_propagate_error(e) and start is not None:
    #             logger.warning(
    #                 "Confluence says we provided an invalid 'updated' field. This may indicate"
    #                 "a real issue, but can also appear during edge cases like daylight"
    #                 f"savings time changes. Retrying with a 1 hour offset. Error: {e}"
    #             )
    #             return self._fetch_document_batches(checkpoint, start - ONE_HOUR, end)
    #         raise

    # @override
    # def build_dummy_checkpoint(self) -> SalesforceCheckpoint:
    #     return SalesforceCheckpoint(last_updated=0, has_more=True, last_seen_doc_ids=[])

    # @override
    # def validate_checkpoint_json(self, checkpoint_json: str) -> SalesforceCheckpoint:
    #     return SalesforceCheckpoint.model_validate_json(checkpoint_json)


if __name__ == "__main__":
    connector = SalesforceConnector(requested_objects=[ACCOUNT_OBJECT_TYPE])

    connector.load_credentials(
        {
            "sf_username": os.environ["SF_USERNAME"],
            "sf_password": os.environ["SF_PASSWORD"],
            "sf_security_token": os.environ["SF_SECURITY_TOKEN"],
        }
    )
    start_time = time.monotonic()
    doc_count = 0
    section_count = 0
    text_count = 0
    for doc_batch in connector.load_from_state():
        doc_count += len(doc_batch)
        print(f"doc_count: {doc_count}")
        for doc in doc_batch:
            if isinstance(doc, HierarchyNode):
                continue
            section_count += len(doc.sections)
            for section in doc.sections:
                if isinstance(section, TextSection) and section.text is not None:
                    text_count += len(section.text)
    end_time = time.monotonic()

    print(f"Doc count: {doc_count}")
    print(f"Section count: {section_count}")
    print(f"Text count: {text_count}")
    print(f"Time taken: {end_time - start_time}")


================================================
FILE: backend/onyx/connectors/salesforce/doc_conversion.py
================================================
import re
from typing import Any
from typing import cast

from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.connectors.salesforce.onyx_salesforce import OnyxSalesforce
from onyx.connectors.salesforce.sqlite_functions import OnyxSalesforceSQLite
from onyx.connectors.salesforce.utils import ID_FIELD
from onyx.connectors.salesforce.utils import MODIFIED_FIELD
from onyx.connectors.salesforce.utils import NAME_FIELD
from onyx.connectors.salesforce.utils import SalesforceObject
from onyx.utils.logger import setup_logger

logger = setup_logger()

ID_PREFIX = "SALESFORCE_"

# All of these types of keys are handled by specific fields in the doc
# conversion process (E.g. URLs) or are not useful for the user (E.g. UUIDs)
_SF_JSON_FILTER = r"Id$|Date$|stamp$|url$"


def _clean_salesforce_dict(data: dict | list) -> dict | list:
    """Clean and transform Salesforce API response data by recursively:
    1. Extracting records from the response if present
    2. Merging attributes into the main dictionary
    3. Filtering out keys matching certain patterns (Id, Date, stamp, url)
    4. Removing '__c' suffix from custom field names
    5. Removing None values and empty containers

    Args:
        data: A dictionary or list from Salesforce API response

    Returns:
        Cleaned dictionary or list with transformed keys and filtered values
    """
    if isinstance(data, dict):
        if "records" in data.keys():
            data = data["records"]
    if isinstance(data, dict):
        if "attributes" in data.keys():
            if isinstance(data["attributes"], dict):
                data.update(data.pop("attributes"))

    if isinstance(data, dict):
        filtered_dict = {}
        for key, value in data.items():
            if not re.search(_SF_JSON_FILTER, key, re.IGNORECASE):
                # remove the custom object indicator for display
                if "__c" in key:
                    key = key[:-3]
                if isinstance(value, (dict, list)):
                    filtered_value = _clean_salesforce_dict(value)
                    # Only add non-empty dictionaries or lists
                    if filtered_value:
                        filtered_dict[key] = filtered_value
                elif value is not None:
                    filtered_dict[key] = value
        return filtered_dict

    if isinstance(data, list):
        filtered_list = []
        for item in data:
            filtered_item: dict | list
            if isinstance(item, (dict, list)):
                filtered_item = _clean_salesforce_dict(item)
                # Only add non-empty dictionaries or lists
                if filtered_item:
                    filtered_list.append(filtered_item)
            elif item is not None:
                filtered_list.append(item)
        return filtered_list

    return data


def _json_to_natural_language(data: dict | list, indent: int = 0) -> str:
    """Convert a nested dictionary or list into a human-readable string format.

    Recursively traverses the data structure and formats it with:
    - Key-value pairs on separate lines
    - Nested structures indented for readability
    - Lists and dictionaries handled with appropriate formatting

    Args:
        data: The dictionary or list to convert
        indent: Number of spaces to indent (default: 0)

    Returns:
        A formatted string representation of the data structure
    """
    result = []
    indent_str = " " * indent

    if isinstance(data, dict):
        for key, value in data.items():
            if isinstance(value, (dict, list)):
                result.append(f"{indent_str}{key}:")
                result.append(_json_to_natural_language(value, indent + 2))
            else:
                result.append(f"{indent_str}{key}: {value}")
    elif isinstance(data, list):
        for item in data:
            result.append(_json_to_natural_language(item, indent + 2))

    return "\n".join(result)


def _extract_section(salesforce_object_data: dict[str, Any], link: str) -> TextSection:
    """Converts a dict to a TextSection"""

    # Extract text from a Salesforce API response dictionary by:
    # 1. Cleaning the dictionary
    # 2. Converting the cleaned dictionary to natural language
    processed_dict = _clean_salesforce_dict(salesforce_object_data)
    natural_language_for_dict = _json_to_natural_language(processed_dict)

    return TextSection(
        text=natural_language_for_dict,
        link=link,
    )


def _extract_primary_owner(
    sf_db: OnyxSalesforceSQLite,
    sf_object: SalesforceObject,
) -> BasicExpertInfo | None:
    object_dict = sf_object.data
    if not (last_modified_by_id := object_dict.get("LastModifiedById")):
        logger.warning(f"No LastModifiedById found for {sf_object.id}")
        return None
    if not (last_modified_by := sf_db.get_record(last_modified_by_id)):
        logger.warning(f"No LastModifiedBy found for {last_modified_by_id}")
        return None

    user_data = last_modified_by.data
    expert_info = BasicExpertInfo(
        first_name=user_data.get("FirstName"),
        last_name=user_data.get("LastName"),
        email=user_data.get("Email"),
        display_name=user_data.get(NAME_FIELD),
    )

    # Check if all fields are None
    if (
        expert_info.first_name is None
        and expert_info.last_name is None
        and expert_info.email is None
        and expert_info.display_name is None
    ):
        logger.warning(f"No identifying information found for user {user_data}")
        return None

    return expert_info


def convert_sf_query_result_to_doc(
    record_id: str,
    record: dict[str, Any],
    child_records: dict[str, dict[str, Any]],
    primary_owner_list: list[BasicExpertInfo] | None,
    sf_client: OnyxSalesforce,
) -> Document:
    """Generates a yieldable Document from query results"""

    base_url = f"https://{sf_client.sf_instance}"
    extracted_doc_updated_at = time_str_to_utc(record[MODIFIED_FIELD])
    extracted_semantic_identifier = record.get(NAME_FIELD) or record.get(
        ID_FIELD, "Unknown Object"
    )

    sections = [_extract_section(record, f"{base_url}/{record_id}")]
    for child_record_key, child_record in child_records.items():
        if not child_record:
            continue

        key_fields = child_record_key.split(":")
        child_record_id = key_fields[1]

        child_text_section = _extract_section(
            child_record,
            f"{base_url}/{child_record_id}",
        )
        sections.append(child_text_section)

    doc = Document(
        id=f"{ID_PREFIX}{record_id}",
        sections=cast(list[TextSection | ImageSection], sections),
        source=DocumentSource.SALESFORCE,
        semantic_identifier=extracted_semantic_identifier,
        doc_updated_at=extracted_doc_updated_at,
        primary_owners=primary_owner_list,
        metadata={},
    )
    return doc


def convert_sf_object_to_doc(
    sf_db: OnyxSalesforceSQLite,
    sf_object: SalesforceObject,
    sf_instance: str,
) -> Document:
    """Would be nice if this function was documented"""
    object_dict = sf_object.data
    salesforce_id = object_dict[ID_FIELD]
    onyx_salesforce_id = f"{ID_PREFIX}{salesforce_id}"
    base_url = f"https://{sf_instance}"
    extracted_doc_updated_at = time_str_to_utc(object_dict[MODIFIED_FIELD])
    extracted_semantic_identifier = object_dict.get(NAME_FIELD) or object_dict.get(
        ID_FIELD, "Unknown Object"
    )

    sections = [_extract_section(sf_object.data, f"{base_url}/{sf_object.id}")]
    for id in sf_db.get_child_ids(sf_object.id):
        if not (child_object := sf_db.get_record(id, isChild=True)):
            continue
        sections.append(
            _extract_section(child_object.data, f"{base_url}/{child_object.id}")
        )

    # NOTE(rkuo): does using the parent last modified make sense if the update
    # is being triggered because a child object changed?
    primary_owner_list: list[BasicExpertInfo] | None = None

    primary_owner = sf_db.make_basic_expert_info_from_record(sf_object)
    if primary_owner:
        primary_owner_list = [primary_owner]

    doc = Document(
        id=onyx_salesforce_id,
        sections=cast(list[TextSection | ImageSection], sections),
        source=DocumentSource.SALESFORCE,
        semantic_identifier=extracted_semantic_identifier,
        doc_updated_at=extracted_doc_updated_at,
        primary_owners=primary_owner_list,
        metadata={},
    )
    return doc


================================================
FILE: backend/onyx/connectors/salesforce/onyx_salesforce.py
================================================
import time
from typing import Any

from simple_salesforce import Salesforce
from simple_salesforce import SFType
from simple_salesforce.exceptions import SalesforceRefusedRequest

from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.salesforce.blacklist import SALESFORCE_BLACKLISTED_OBJECTS
from onyx.connectors.salesforce.blacklist import SALESFORCE_BLACKLISTED_PREFIXES
from onyx.connectors.salesforce.blacklist import SALESFORCE_BLACKLISTED_SUFFIXES
from onyx.connectors.salesforce.salesforce_calls import get_object_by_id_query
from onyx.connectors.salesforce.utils import ID_FIELD
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder


logger = setup_logger()


def is_salesforce_rate_limit_error(exception: Exception) -> bool:
    """Check if an exception is a Salesforce rate limit error."""
    return isinstance(
        exception, SalesforceRefusedRequest
    ) and "REQUEST_LIMIT_EXCEEDED" in str(exception)


class OnyxSalesforce(Salesforce):
    SOQL_MAX_SUBQUERIES = 20

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)

        self.parent_types: set[str] = set()
        self.child_types: set[str] = set()
        self.parent_to_child_types: dict[str, set[str]] = (
            {}
        )  # map from parent to child types
        self.child_to_parent_types: dict[str, set[str]] = (
            {}
        )  # map from child to parent types
        self.parent_reference_fields_by_type: dict[str, dict[str, list[str]]] = {}
        self.queryable_fields_by_type: dict[str, list[str]] = {}
        self.prefix_to_type: dict[str, str] = (
            {}
        )  # infer the object type of an id immediately

    def initialize(self) -> bool:
        """Eventually cache all first run client state with this method"""
        return True

    def is_blacklisted(self, object_type: str) -> bool:
        """Returns True if the object type is blacklisted."""
        object_type_lower = object_type.lower()
        if object_type_lower in SALESFORCE_BLACKLISTED_OBJECTS:
            return True
        for prefix in SALESFORCE_BLACKLISTED_PREFIXES:
            if object_type_lower.startswith(prefix):
                return True

        for suffix in SALESFORCE_BLACKLISTED_SUFFIXES:
            if object_type_lower.endswith(suffix):
                return True

        return False

    @retry_builder(
        tries=6,
        delay=20,
        backoff=1.5,
        max_delay=60,
        exceptions=(SalesforceRefusedRequest,),
    )
    @rate_limit_builder(max_calls=50, period=60)
    def safe_query(self, query: str, **kwargs: Any) -> dict[str, Any]:
        """Wrapper around the original query method with retry logic and rate limiting."""
        try:
            return super().query(query, **kwargs)
        except SalesforceRefusedRequest as e:
            if is_salesforce_rate_limit_error(e):
                logger.warning(
                    f"Salesforce rate limit exceeded for query: {query[:100]}..."
                )
                # Add additional delay for rate limit errors
                time.sleep(5)
            raise

    @retry_builder(
        tries=5,
        delay=20,
        backoff=1.5,
        max_delay=60,
        exceptions=(SalesforceRefusedRequest,),
    )
    @rate_limit_builder(max_calls=50, period=60)
    def safe_query_all(self, query: str, **kwargs: Any) -> dict[str, Any]:
        """Wrapper around the original query_all method with retry logic and rate limiting."""
        try:
            return super().query_all(query, **kwargs)
        except SalesforceRefusedRequest as e:
            if is_salesforce_rate_limit_error(e):
                logger.warning(
                    f"Salesforce rate limit exceeded for query_all: {query[:100]}..."
                )
                # Add additional delay for rate limit errors
                time.sleep(5)
            raise

    @staticmethod
    def _make_child_objects_by_id_query(
        object_id: str,
        sf_type: str,
        child_relationships: list[str],
        relationships_to_fields: dict[str, set[str]],
    ) -> str:
        """Returns a SOQL query given the object id, type and child relationships.

        object_id: the id of the parent object
        sf_type: the object name/type of the parent object
        child_relationships: a list of the child object names/types to retrieve
        relationships_to_fields: a mapping of objects to their queryable fields

        When the query is executed, it comes back as result.records[0][child_relationship]
        """

        # supposedly the real limit is 200? But we limit to 10 for practical reasons
        SUBQUERY_LIMIT = 10

        query = "SELECT "
        for child_relationship in child_relationships:
            # TODO(rkuo): what happens if there is a very large list of child records?
            # is that possible problem?

            # NOTE: we actually have to list out the subqueries we want.
            # We can't use the following shortcuts:
            #   FIELDS(ALL) can include binary fields, so don't use that
            #   FIELDS(CUSTOM) can include aggregate queries, so don't use that
            fields = relationships_to_fields[child_relationship]
            fields_fragment = ",".join(fields)
            query += f"(SELECT {fields_fragment} FROM {child_relationship} LIMIT {SUBQUERY_LIMIT}), "

        query = query.rstrip(", ")
        query += f" FROM {sf_type} WHERE Id = '{object_id}'"
        return query

    def query_object(
        self,
        object_type: str,
        object_id: str,
        type_to_queryable_fields: dict[str, set[str]],
    ) -> dict[str, Any] | None:
        record: dict[str, Any] = {}

        queryable_fields = type_to_queryable_fields[object_type]
        query = get_object_by_id_query(object_id, object_type, queryable_fields)
        result = self.safe_query(query)
        if not result:
            return None

        record_0 = result["records"][0]
        for record_key, record_value in record_0.items():
            if record_key == "attributes":
                continue

            record[record_key] = record_value

        return record

    def get_child_objects_by_id(
        self,
        object_id: str,
        sf_type: str,
        child_relationships: list[str],
        relationships_to_fields: dict[str, set[str]],
    ) -> dict[str, dict[str, Any]]:
        """There's a limit on the number of subqueries we can put in a single query."""
        child_records: dict[str, dict[str, Any]] = {}
        child_relationships_batch: list[str] = []
        remaining_child_relationships = list(child_relationships)

        while True:
            process_batch = False

            if (
                len(remaining_child_relationships) == 0
                and len(child_relationships_batch) == 0
            ):
                break

            if len(child_relationships_batch) >= OnyxSalesforce.SOQL_MAX_SUBQUERIES:
                process_batch = True

            if len(remaining_child_relationships) == 0:
                process_batch = True

            if process_batch:
                if len(child_relationships_batch) == 0:
                    break

                query = OnyxSalesforce._make_child_objects_by_id_query(
                    object_id,
                    sf_type,
                    child_relationships_batch,
                    relationships_to_fields,
                )

                try:
                    result = self.safe_query(query)
                except Exception:
                    logger.exception(f"Query failed: {query=}")
                else:
                    for child_record_key, child_result in result["records"][0].items():
                        if child_record_key == "attributes":
                            continue

                        if not child_result:
                            continue

                        for child_record in child_result["records"]:
                            child_record_id = child_record[ID_FIELD]
                            if not child_record_id:
                                logger.warning("Child record has no id")
                                continue

                            child_records[f"{child_record_key}:{child_record_id}"] = (
                                child_record
                            )
                finally:
                    child_relationships_batch.clear()

                continue

            if len(remaining_child_relationships) == 0:
                break

            child_relationship = remaining_child_relationships.pop(0)

            # this is binary content, skip it
            if child_relationship == "Attachments":
                continue

            child_relationships_batch.append(child_relationship)

        return child_records

    @retry_builder(
        tries=3,
        delay=1,
        backoff=2,
        exceptions=(SalesforceRefusedRequest,),
    )
    def describe_type(self, name: str) -> Any:
        sf_object = SFType(name, self.session_id, self.sf_instance)
        try:
            result = sf_object.describe()
            return result
        except SalesforceRefusedRequest as e:
            if is_salesforce_rate_limit_error(e):
                logger.warning(
                    f"Salesforce rate limit exceeded for describe_type: {name}"
                )
                # Add additional delay for rate limit errors
                time.sleep(3)
            raise

    def get_queryable_fields_by_type(self, name: str) -> set[str]:
        object_description = self.describe_type(name)
        if object_description is None:
            return set()

        fields: list[dict[str, Any]] = object_description["fields"]
        valid_fields: set[str] = set()
        field_names_to_remove: set[str] = set()
        for field in fields:
            if compound_field_name := field.get("compoundFieldName"):
                # We do want to get name fields even if they are compound
                if not field.get("nameField"):
                    field_names_to_remove.add(compound_field_name)

            field_name = field.get("name")
            field_type = field.get("type")
            if field_type in ["base64", "blob", "encryptedstring"]:
                continue

            if field_name:
                valid_fields.add(field_name)

        return valid_fields - field_names_to_remove

    def get_children_of_sf_type(self, sf_type: str) -> dict[str, str]:
        """Returns a dict of child object names to relationship names.
        Relationship names (not object names) are used in subqueries!
        """
        names_to_relationships: dict[str, str] = {}

        object_description = self.describe_type(sf_type)

        index = 0
        len_relationships = len(object_description["childRelationships"])
        for child_relationship in object_description["childRelationships"]:
            child_name = child_relationship["childSObject"]

            index += 1
            valid, reason = self._is_valid_child_object(child_relationship)
            if not valid:
                logger.debug(
                    f"{index}/{len_relationships} - Invalid child object: "
                    f"parent={sf_type} child={child_name} child_field_backreference={child_relationship['field']} {reason=}"
                )
                continue

            logger.debug(
                f"{index}/{len_relationships} - Found valid child object: "
                f"parent={sf_type} child={child_name} child_field_backreference={child_relationship['field']}"
            )

            name = child_name
            relationship = child_relationship["relationshipName"]

            names_to_relationships[name] = relationship

        return names_to_relationships

    def _is_valid_child_object(
        self, child_relationship: dict[str, Any]
    ) -> tuple[bool, str]:

        if not child_relationship["childSObject"]:
            return False, "childSObject is None"

        child_name = child_relationship["childSObject"]

        if self.is_blacklisted(child_name):
            return False, f"{child_name=} is blacklisted."

        if not child_relationship["relationshipName"]:
            return False, f"{child_name=} has no relationshipName."

        object_description = self.describe_type(child_relationship["childSObject"])
        if not object_description["queryable"]:
            return False, f"{child_name=} is not queryable."

        if not child_relationship["field"]:
            return False, f"{child_name=} has no relationship field."

        if child_relationship["field"] == "RelatedToId":
            return False, f"{child_name=} field is RelatedToId and blacklisted."

        return True, ""

    def get_parent_reference_fields(
        self, sf_type: str, parent_types: set[str]
    ) -> dict[str, list[str]]:
        """
        sf_type: the type in which to find parent reference fields
        parent_types: a list of parent reference field types we are actually interested in
        Other parent types will not be returned.

        Given an object type, returns a dict of field names to a list of referenced parent
        object types.
        (Yes, it is possible for a field to reference one of multiple object types,
        although this seems very unlikely.)

        Returns an empty dict if there are no parent reference fields.
        """

        parent_reference_fields: dict[str, list[str]] = {}

        object_description = self.describe_type(sf_type)
        for field in object_description["fields"]:
            if field["type"] == "reference":
                for reference_to in field["referenceTo"]:
                    if reference_to in parent_types:
                        if field["name"] not in parent_reference_fields:
                            parent_reference_fields[field["name"]] = []
                        parent_reference_fields[field["name"]].append(
                            field["referenceTo"]
                        )

        return parent_reference_fields


================================================
FILE: backend/onyx/connectors/salesforce/salesforce_calls.py
================================================
import gc
import os
import time
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime

from pytz import UTC
from simple_salesforce import Salesforce
from simple_salesforce.bulk2 import SFBulk2Handler
from simple_salesforce.bulk2 import SFBulk2Type
from simple_salesforce.exceptions import SalesforceRefusedRequest

from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.salesforce.utils import MODIFIED_FIELD
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()


def is_salesforce_rate_limit_error(exception: Exception) -> bool:
    """Check if an exception is a Salesforce rate limit error."""
    return isinstance(
        exception, SalesforceRefusedRequest
    ) and "REQUEST_LIMIT_EXCEEDED" in str(exception)


def _build_last_modified_time_filter_for_salesforce(
    start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
) -> str:
    if start is None or end is None:
        return ""
    start_datetime = datetime.fromtimestamp(start, UTC)
    end_datetime = datetime.fromtimestamp(end, UTC)
    return f" WHERE LastModifiedDate > {start_datetime.isoformat()} AND LastModifiedDate < {end_datetime.isoformat()}"


def _build_created_date_time_filter_for_salesforce(
    start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
) -> str:
    if start is None or end is None:
        return ""
    start_datetime = datetime.fromtimestamp(start, UTC)
    end_datetime = datetime.fromtimestamp(end, UTC)
    return f" WHERE CreatedDate > {start_datetime.isoformat()} AND CreatedDate < {end_datetime.isoformat()}"


def _make_time_filter_for_sf_type(
    queryable_fields: set[str],
    start: SecondsSinceUnixEpoch,
    end: SecondsSinceUnixEpoch,
) -> str | None:

    if MODIFIED_FIELD in queryable_fields:
        return _build_last_modified_time_filter_for_salesforce(start, end)

    if "CreatedDate" in queryable_fields:
        return _build_created_date_time_filter_for_salesforce(start, end)

    return None


def _make_time_filtered_query(
    queryable_fields: set[str], sf_type: str, time_filter: str
) -> str:
    query = f"SELECT {', '.join(queryable_fields)} FROM {sf_type}{time_filter}"
    return query


def get_object_by_id_query(
    object_id: str, sf_type: str, queryable_fields: set[str]
) -> str:
    query = (
        f"SELECT {', '.join(queryable_fields)} FROM {sf_type} WHERE Id = '{object_id}'"
    )
    return query


@retry_builder(
    tries=5,
    delay=2,
    backoff=2,
    max_delay=60,
    exceptions=(SalesforceRefusedRequest,),
)
@rate_limit_builder(max_calls=50, period=60)
def _object_type_has_api_data(
    sf_client: Salesforce, sf_type: str, time_filter: str
) -> bool:
    """
    Use the rest api to check to make sure the query will result in a non-empty response.
    """
    try:
        query = f"SELECT Count() FROM {sf_type}{time_filter} LIMIT 1"
        result = sf_client.query(query)
        if result["totalSize"] == 0:
            return False
    except SalesforceRefusedRequest as e:
        if is_salesforce_rate_limit_error(e):
            logger.warning(
                f"Salesforce rate limit exceeded for object type check: {sf_type}"
            )
            # Add additional delay for rate limit errors
            time.sleep(3)
        raise

    except Exception as e:
        if "OPERATION_TOO_LARGE" not in str(e):
            logger.warning(f"Object type {sf_type} doesn't support query: {e}")
            return False
    return True


def _bulk_retrieve_from_salesforce(
    sf_type: str,
    query: str,
    target_dir: str,
    sf_client: Salesforce,
) -> tuple[str, list[str] | None]:
    """Returns a tuple of
    1. the salesforce object type (NOTE: seems redundant)
    2. the list of CSV's written into the target directory
    """

    bulk_2_handler: SFBulk2Handler | None = SFBulk2Handler(
        session_id=sf_client.session_id,
        bulk2_url=sf_client.bulk2_url,
        proxies=sf_client.proxies,
        session=sf_client.session,
    )
    if not bulk_2_handler:
        return sf_type, None

    # NOTE(rkuo): there are signs this download is allocating large
    # amounts of memory instead of streaming the results to disk.
    # we're doing a gc.collect to try and mitigate this.

    # see https://github.com/simple-salesforce/simple-salesforce/issues/428 for a
    # possible solution
    bulk_2_type: SFBulk2Type | None = SFBulk2Type(
        object_name=sf_type,
        bulk2_url=bulk_2_handler.bulk2_url,
        headers=bulk_2_handler.headers,
        session=bulk_2_handler.session,
    )
    if not bulk_2_type:
        return sf_type, None

    logger.info(f"Downloading {sf_type}")

    logger.debug(f"Query: {query}")

    try:
        # This downloads the file to a file in the target path with a random name
        results = bulk_2_type.download(
            query=query,
            path=target_dir,
            max_records=500000,
        )

        # prepend each downloaded csv with the object type (delimiter = '.')
        all_download_paths: list[str] = []
        for result in results:
            original_file_path = result["file"]
            directory, filename = os.path.split(original_file_path)
            new_filename = f"{sf_type}.{filename}"
            new_file_path = os.path.join(directory, new_filename)
            os.rename(original_file_path, new_file_path)
            all_download_paths.append(new_file_path)
    except Exception as e:
        logger.error(
            f"Failed to download salesforce csv for object type {sf_type}: {e}"
        )
        logger.warning(f"Exceptioning query for object type {sf_type}: {query}")
        return sf_type, None
    finally:
        bulk_2_handler = None
        bulk_2_type = None
        gc.collect()

    logger.info(f"Downloaded {sf_type} to {all_download_paths}")
    return sf_type, all_download_paths


def fetch_all_csvs_in_parallel(
    sf_client: Salesforce,
    all_types_to_filter: dict[str, bool],
    queryable_fields_by_type: dict[str, set[str]],
    start: SecondsSinceUnixEpoch | None,
    end: SecondsSinceUnixEpoch | None,
    target_dir: str,
) -> dict[str, list[str] | None]:
    """
    Fetches all the csvs in parallel for the given object types
    Returns a dict of (sf_type, full_download_path)

    NOTE: We can probably lift object type has api data out of here
    """

    type_to_query = {}

    # query the available fields for each object type and determine how to filter
    for sf_type, apply_filter in all_types_to_filter.items():
        queryable_fields = queryable_fields_by_type[sf_type]

        time_filter = ""
        while True:
            if not apply_filter:
                break

            if start is not None and end is not None:
                time_filter_temp = _make_time_filter_for_sf_type(
                    queryable_fields, start, end
                )
                if time_filter_temp is None:
                    logger.warning(
                        f"Object type not filterable: type={sf_type} fields={queryable_fields}"
                    )
                    time_filter = ""
                else:
                    logger.info(
                        f"Object type filterable: type={sf_type} filter={time_filter_temp}"
                    )
                    time_filter = time_filter_temp

            break

        if not _object_type_has_api_data(sf_client, sf_type, time_filter):
            logger.warning(f"Object type skipped (no data available): type={sf_type}")
            continue

        query = _make_time_filtered_query(queryable_fields, sf_type, time_filter)
        type_to_query[sf_type] = query

    logger.info(
        f"Object types to query: initial={len(all_types_to_filter)} queryable={len(type_to_query)}"
    )

    # Run the bulk retrieve in parallel
    # limit to 4 to help with memory usage
    with ThreadPoolExecutor(max_workers=4) as executor:
        results = executor.map(
            lambda object_type: _bulk_retrieve_from_salesforce(
                sf_type=object_type,
                query=type_to_query[object_type],
                target_dir=target_dir,
                sf_client=sf_client,
            ),
            type_to_query.keys(),
        )
        return dict(results)


================================================
FILE: backend/onyx/connectors/salesforce/shelve_stuff/old_test_salesforce_shelves.py
================================================
import csv
import os
import shutil

from onyx.connectors.salesforce.shelve_stuff.shelve_functions import find_ids_by_type
from onyx.connectors.salesforce.shelve_stuff.shelve_functions import (
    get_affected_parent_ids_by_type,
)
from onyx.connectors.salesforce.shelve_stuff.shelve_functions import get_child_ids
from onyx.connectors.salesforce.shelve_stuff.shelve_functions import get_record
from onyx.connectors.salesforce.shelve_stuff.shelve_functions import (
    update_sf_db_with_csv,
)
from onyx.connectors.salesforce.utils import BASE_DATA_PATH
from onyx.connectors.salesforce.utils import get_object_type_path

_VALID_SALESFORCE_IDS = [
    "001bm00000fd9Z3AAI",
    "001bm00000fdYTdAAM",
    "001bm00000fdYTeAAM",
    "001bm00000fdYTfAAM",
    "001bm00000fdYTgAAM",
    "001bm00000fdYThAAM",
    "001bm00000fdYTiAAM",
    "001bm00000fdYTjAAM",
    "001bm00000fdYTkAAM",
    "001bm00000fdYTlAAM",
    "001bm00000fdYTmAAM",
    "001bm00000fdYTnAAM",
    "001bm00000fdYToAAM",
    "500bm00000XoOxtAAF",
    "500bm00000XoOxuAAF",
    "500bm00000XoOxvAAF",
    "500bm00000XoOxwAAF",
    "500bm00000XoOxxAAF",
    "500bm00000XoOxyAAF",
    "500bm00000XoOxzAAF",
    "500bm00000XoOy0AAF",
    "500bm00000XoOy1AAF",
    "500bm00000XoOy2AAF",
    "500bm00000XoOy3AAF",
    "500bm00000XoOy4AAF",
    "500bm00000XoOy5AAF",
    "500bm00000XoOy6AAF",
    "500bm00000XoOy7AAF",
    "500bm00000XoOy8AAF",
    "500bm00000XoOy9AAF",
    "500bm00000XoOyAAAV",
    "500bm00000XoOyBAAV",
    "500bm00000XoOyCAAV",
    "500bm00000XoOyDAAV",
    "500bm00000XoOyEAAV",
    "500bm00000XoOyFAAV",
    "500bm00000XoOyGAAV",
    "500bm00000XoOyHAAV",
    "500bm00000XoOyIAAV",
    "003bm00000EjHCjAAN",
    "003bm00000EjHCkAAN",
    "003bm00000EjHClAAN",
    "003bm00000EjHCmAAN",
    "003bm00000EjHCnAAN",
    "003bm00000EjHCoAAN",
    "003bm00000EjHCpAAN",
    "003bm00000EjHCqAAN",
    "003bm00000EjHCrAAN",
    "003bm00000EjHCsAAN",
    "003bm00000EjHCtAAN",
    "003bm00000EjHCuAAN",
    "003bm00000EjHCvAAN",
    "003bm00000EjHCwAAN",
    "003bm00000EjHCxAAN",
    "003bm00000EjHCyAAN",
    "003bm00000EjHCzAAN",
    "003bm00000EjHD0AAN",
    "003bm00000EjHD1AAN",
    "003bm00000EjHD2AAN",
    "550bm00000EXc2tAAD",
    "006bm000006kyDpAAI",
    "006bm000006kyDqAAI",
    "006bm000006kyDrAAI",
    "006bm000006kyDsAAI",
    "006bm000006kyDtAAI",
    "006bm000006kyDuAAI",
    "006bm000006kyDvAAI",
    "006bm000006kyDwAAI",
    "006bm000006kyDxAAI",
    "006bm000006kyDyAAI",
    "006bm000006kyDzAAI",
    "006bm000006kyE0AAI",
    "006bm000006kyE1AAI",
    "006bm000006kyE2AAI",
    "006bm000006kyE3AAI",
    "006bm000006kyE4AAI",
    "006bm000006kyE5AAI",
    "006bm000006kyE6AAI",
    "006bm000006kyE7AAI",
    "006bm000006kyE8AAI",
    "006bm000006kyE9AAI",
    "006bm000006kyEAAAY",
    "006bm000006kyEBAAY",
    "006bm000006kyECAAY",
    "006bm000006kyEDAAY",
    "006bm000006kyEEAAY",
    "006bm000006kyEFAAY",
    "006bm000006kyEGAAY",
    "006bm000006kyEHAAY",
    "006bm000006kyEIAAY",
    "006bm000006kyEJAAY",
    "005bm000009zy0TAAQ",
    "005bm000009zy25AAA",
    "005bm000009zy26AAA",
    "005bm000009zy28AAA",
    "005bm000009zy29AAA",
    "005bm000009zy2AAAQ",
    "005bm000009zy2BAAQ",
]


def clear_sf_db() -> None:
    """
    Clears the SF DB by deleting all files in the data directory.
    """
    shutil.rmtree(BASE_DATA_PATH)


def create_csv_file(
    object_type: str, records: list[dict], filename: str = "test_data.csv"
) -> None:
    """
    Creates a CSV file for the given object type and records.

    Args:
        object_type: The Salesforce object type (e.g. "Account", "Contact")
        records: List of dictionaries containing the record data
        filename: Name of the CSV file to create (default: test_data.csv)
    """
    if not records:
        return

    # Get all unique fields from records
    fields: set[str] = set()
    for record in records:
        fields.update(record.keys())
    fields = set(sorted(list(fields)))  # Sort for consistent order

    # Create CSV file
    csv_path = os.path.join(get_object_type_path(object_type), filename)
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fields)
        writer.writeheader()
        for record in records:
            writer.writerow(record)

    # Update the database with the CSV
    update_sf_db_with_csv(object_type, csv_path)


def create_csv_with_example_data() -> None:
    """
    Creates CSV files with example data, organized by object type.
    """
    example_data: dict[str, list[dict]] = {
        "Account": [
            {
                "Id": _VALID_SALESFORCE_IDS[0],
                "Name": "Acme Inc.",
                "BillingCity": "New York",
                "Industry": "Technology",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[1],
                "Name": "Globex Corp",
                "BillingCity": "Los Angeles",
                "Industry": "Manufacturing",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[2],
                "Name": "Initech",
                "BillingCity": "Austin",
                "Industry": "Software",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[3],
                "Name": "TechCorp Solutions",
                "BillingCity": "San Francisco",
                "Industry": "Software",
                "AnnualRevenue": 5000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[4],
                "Name": "BioMed Research",
                "BillingCity": "Boston",
                "Industry": "Healthcare",
                "AnnualRevenue": 12000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[5],
                "Name": "Green Energy Co",
                "BillingCity": "Portland",
                "Industry": "Energy",
                "AnnualRevenue": 8000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[6],
                "Name": "DataFlow Analytics",
                "BillingCity": "Seattle",
                "Industry": "Technology",
                "AnnualRevenue": 3000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[7],
                "Name": "Cloud Nine Services",
                "BillingCity": "Denver",
                "Industry": "Cloud Computing",
                "AnnualRevenue": 7000000,
            },
        ],
        "Contact": [
            {
                "Id": _VALID_SALESFORCE_IDS[40],
                "FirstName": "John",
                "LastName": "Doe",
                "Email": "john.doe@acme.com",
                "Title": "CEO",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[41],
                "FirstName": "Jane",
                "LastName": "Smith",
                "Email": "jane.smith@acme.com",
                "Title": "CTO",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[42],
                "FirstName": "Bob",
                "LastName": "Johnson",
                "Email": "bob.j@globex.com",
                "Title": "Sales Director",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[43],
                "FirstName": "Sarah",
                "LastName": "Chen",
                "Email": "sarah.chen@techcorp.com",
                "Title": "Product Manager",
                "Phone": "415-555-0101",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[44],
                "FirstName": "Michael",
                "LastName": "Rodriguez",
                "Email": "m.rodriguez@biomed.com",
                "Title": "Research Director",
                "Phone": "617-555-0202",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[45],
                "FirstName": "Emily",
                "LastName": "Green",
                "Email": "emily.g@greenenergy.com",
                "Title": "Sustainability Lead",
                "Phone": "503-555-0303",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[46],
                "FirstName": "David",
                "LastName": "Kim",
                "Email": "david.kim@dataflow.com",
                "Title": "Data Scientist",
                "Phone": "206-555-0404",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[47],
                "FirstName": "Rachel",
                "LastName": "Taylor",
                "Email": "r.taylor@cloudnine.com",
                "Title": "Cloud Architect",
                "Phone": "303-555-0505",
            },
        ],
        "Opportunity": [
            {
                "Id": _VALID_SALESFORCE_IDS[62],
                "Name": "Acme Server Upgrade",
                "Amount": 50000,
                "Stage": "Prospecting",
                "CloseDate": "2024-06-30",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[63],
                "Name": "Globex Manufacturing Line",
                "Amount": 150000,
                "Stage": "Negotiation",
                "CloseDate": "2024-03-15",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[64],
                "Name": "Initech Software License",
                "Amount": 75000,
                "Stage": "Closed Won",
                "CloseDate": "2024-01-30",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[65],
                "Name": "TechCorp AI Implementation",
                "Amount": 250000,
                "Stage": "Needs Analysis",
                "CloseDate": "2024-08-15",
                "Probability": 60,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[66],
                "Name": "BioMed Lab Equipment",
                "Amount": 500000,
                "Stage": "Value Proposition",
                "CloseDate": "2024-09-30",
                "Probability": 75,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[67],
                "Name": "Green Energy Solar Project",
                "Amount": 750000,
                "Stage": "Proposal",
                "CloseDate": "2024-07-15",
                "Probability": 80,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[68],
                "Name": "DataFlow Analytics Platform",
                "Amount": 180000,
                "Stage": "Negotiation",
                "CloseDate": "2024-05-30",
                "Probability": 90,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[69],
                "Name": "Cloud Nine Infrastructure",
                "Amount": 300000,
                "Stage": "Qualification",
                "CloseDate": "2024-10-15",
                "Probability": 40,
            },
        ],
    }

    # Create CSV files for each object type
    for object_type, records in example_data.items():
        create_csv_file(object_type, records)


def test_query() -> None:
    """
    Tests querying functionality by verifying:
    1. All expected Account IDs are found
    2. Each Account's data matches what was inserted
    """
    # Expected test data for verification
    expected_accounts: dict[str, dict[str, str | int]] = {
        _VALID_SALESFORCE_IDS[0]: {
            "Name": "Acme Inc.",
            "BillingCity": "New York",
            "Industry": "Technology",
        },
        _VALID_SALESFORCE_IDS[1]: {
            "Name": "Globex Corp",
            "BillingCity": "Los Angeles",
            "Industry": "Manufacturing",
        },
        _VALID_SALESFORCE_IDS[2]: {
            "Name": "Initech",
            "BillingCity": "Austin",
            "Industry": "Software",
        },
        _VALID_SALESFORCE_IDS[3]: {
            "Name": "TechCorp Solutions",
            "BillingCity": "San Francisco",
            "Industry": "Software",
            "AnnualRevenue": 5000000,
        },
        _VALID_SALESFORCE_IDS[4]: {
            "Name": "BioMed Research",
            "BillingCity": "Boston",
            "Industry": "Healthcare",
            "AnnualRevenue": 12000000,
        },
        _VALID_SALESFORCE_IDS[5]: {
            "Name": "Green Energy Co",
            "BillingCity": "Portland",
            "Industry": "Energy",
            "AnnualRevenue": 8000000,
        },
        _VALID_SALESFORCE_IDS[6]: {
            "Name": "DataFlow Analytics",
            "BillingCity": "Seattle",
            "Industry": "Technology",
            "AnnualRevenue": 3000000,
        },
        _VALID_SALESFORCE_IDS[7]: {
            "Name": "Cloud Nine Services",
            "BillingCity": "Denver",
            "Industry": "Cloud Computing",
            "AnnualRevenue": 7000000,
        },
    }

    # Get all Account IDs
    account_ids = find_ids_by_type("Account")

    # Verify we found all expected accounts
    assert len(account_ids) == len(
        expected_accounts
    ), f"Expected {len(expected_accounts)} accounts, found {len(account_ids)}"
    assert set(account_ids) == set(
        expected_accounts.keys()
    ), "Found account IDs don't match expected IDs"

    # Verify each account's data
    for acc_id in account_ids:
        combined = get_record(acc_id)
        assert combined is not None, f"Could not find account {acc_id}"

        expected = expected_accounts[acc_id]

        # Verify account data matches
        for key, value in expected.items():
            value = str(value)
            assert (
                combined.data[key] == value
            ), f"Account {acc_id} field {key} expected {value}, got {combined.data[key]}"

    print("All query tests passed successfully!")


def test_upsert() -> None:
    """
    Tests upsert functionality by:
    1. Updating an existing account
    2. Creating a new account
    3. Verifying both operations were successful
    """
    # Create CSV for updating an existing account and adding a new one
    update_data: list[dict[str, str | int]] = [
        {
            "Id": _VALID_SALESFORCE_IDS[0],
            "Name": "Acme Inc. Updated",
            "BillingCity": "New York",
            "Industry": "Technology",
            "Description": "Updated company info",
        },
        {
            "Id": _VALID_SALESFORCE_IDS[2],
            "Name": "New Company Inc.",
            "BillingCity": "Miami",
            "Industry": "Finance",
            "AnnualRevenue": 1000000,
        },
    ]

    create_csv_file("Account", update_data, "update_data.csv")

    # Verify the update worked
    updated_record = get_record(_VALID_SALESFORCE_IDS[0])
    assert updated_record is not None, "Updated record not found"
    assert updated_record.data["Name"] == "Acme Inc. Updated", "Name not updated"
    assert (
        updated_record.data["Description"] == "Updated company info"
    ), "Description not added"

    # Verify the new record was created
    new_record = get_record(_VALID_SALESFORCE_IDS[2])
    assert new_record is not None, "New record not found"
    assert new_record.data["Name"] == "New Company Inc.", "New record name incorrect"
    assert new_record.data["AnnualRevenue"] == "1000000", "New record revenue incorrect"

    print("All upsert tests passed successfully!")


def test_relationships() -> None:
    """
    Tests relationship shelf updates and queries by:
    1. Creating test data with relationships
    2. Verifying the relationships are correctly stored
    3. Testing relationship queries
    """
    # Create test data for each object type
    test_data: dict[str, list[dict[str, str | int]]] = {
        "Case": [
            {
                "Id": _VALID_SALESFORCE_IDS[13],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "Subject": "Test Case 1",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[14],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "Subject": "Test Case 2",
            },
        ],
        "Contact": [
            {
                "Id": _VALID_SALESFORCE_IDS[48],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "FirstName": "Test",
                "LastName": "Contact",
            }
        ],
        "Opportunity": [
            {
                "Id": _VALID_SALESFORCE_IDS[62],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "Name": "Test Opportunity",
                "Amount": 100000,
            }
        ],
    }

    # Create and update CSV files for each object type
    for object_type, records in test_data.items():
        create_csv_file(object_type, records, "relationship_test.csv")

    # Test relationship queries
    # All these objects should be children of Acme Inc.
    child_ids = get_child_ids(_VALID_SALESFORCE_IDS[0])
    assert len(child_ids) == 4, f"Expected 4 child objects, found {len(child_ids)}"
    assert _VALID_SALESFORCE_IDS[13] in child_ids, "Case 1 not found in relationship"
    assert _VALID_SALESFORCE_IDS[14] in child_ids, "Case 2 not found in relationship"
    assert _VALID_SALESFORCE_IDS[48] in child_ids, "Contact not found in relationship"
    assert (
        _VALID_SALESFORCE_IDS[62] in child_ids
    ), "Opportunity not found in relationship"

    # Test querying relationships for a different account (should be empty)
    other_account_children = get_child_ids(_VALID_SALESFORCE_IDS[1])
    assert (
        len(other_account_children) == 0
    ), "Expected no children for different account"

    print("All relationship tests passed successfully!")


def test_account_with_children() -> None:
    """
    Tests querying all accounts and retrieving their child objects.
    This test verifies that:
    1. All accounts can be retrieved
    2. Child objects are correctly linked
    3. Child object data is complete and accurate
    """
    # First get all account IDs
    account_ids = find_ids_by_type("Account")
    assert len(account_ids) > 0, "No accounts found"

    # For each account, get its children and verify the data
    for account_id in account_ids:
        account = get_record(account_id)
        assert account is not None, f"Could not find account {account_id}"

        # Get all child objects
        child_ids = get_child_ids(account_id)

        # For Acme Inc., verify specific relationships
        if account_id == _VALID_SALESFORCE_IDS[0]:  # Acme Inc.
            assert (
                len(child_ids) == 4
            ), f"Expected 4 children for Acme Inc., found {len(child_ids)}"

            # Get all child records
            child_records = []
            for child_id in child_ids:
                child_record = get_record(child_id)
                if child_record is not None:
                    child_records.append(child_record)
            # Verify Cases
            cases = [r for r in child_records if r.type == "Case"]
            assert (
                len(cases) == 2
            ), f"Expected 2 cases for Acme Inc., found {len(cases)}"
            case_subjects = {case.data["Subject"] for case in cases}
            assert "Test Case 1" in case_subjects, "Test Case 1 not found"
            assert "Test Case 2" in case_subjects, "Test Case 2 not found"

            # Verify Contacts
            contacts = [r for r in child_records if r.type == "Contact"]
            assert (
                len(contacts) == 1
            ), f"Expected 1 contact for Acme Inc., found {len(contacts)}"
            contact = contacts[0]
            assert contact.data["FirstName"] == "Test", "Contact first name mismatch"
            assert contact.data["LastName"] == "Contact", "Contact last name mismatch"

            # Verify Opportunities
            opportunities = [r for r in child_records if r.type == "Opportunity"]
            assert (
                len(opportunities) == 1
            ), f"Expected 1 opportunity for Acme Inc., found {len(opportunities)}"
            opportunity = opportunities[0]
            assert (
                opportunity.data["Name"] == "Test Opportunity"
            ), "Opportunity name mismatch"
            assert opportunity.data["Amount"] == "100000", "Opportunity amount mismatch"

    print("All account with children tests passed successfully!")


def test_relationship_updates() -> None:
    """
    Tests that relationships are properly updated when a child object's parent reference changes.
    This test verifies:
    1. Initial relationship is created correctly
    2. When parent reference is updated, old relationship is removed
    3. New relationship is created correctly
    """
    # Create initial test data - Contact linked to Acme Inc.
    initial_contact = [
        {
            "Id": _VALID_SALESFORCE_IDS[40],
            "AccountId": _VALID_SALESFORCE_IDS[0],
            "FirstName": "Test",
            "LastName": "Contact",
        }
    ]
    create_csv_file("Contact", initial_contact, "initial_contact.csv")

    # Verify initial relationship
    acme_children = get_child_ids(_VALID_SALESFORCE_IDS[0])
    assert (
        _VALID_SALESFORCE_IDS[40] in acme_children
    ), "Initial relationship not created"

    # Update contact to be linked to Globex Corp instead
    updated_contact = [
        {
            "Id": _VALID_SALESFORCE_IDS[40],
            "AccountId": _VALID_SALESFORCE_IDS[1],
            "FirstName": "Test",
            "LastName": "Contact",
        }
    ]
    create_csv_file("Contact", updated_contact, "updated_contact.csv")

    # Verify old relationship is removed
    acme_children = get_child_ids(_VALID_SALESFORCE_IDS[0])
    assert (
        _VALID_SALESFORCE_IDS[40] not in acme_children
    ), "Old relationship not removed"

    # Verify new relationship is created
    globex_children = get_child_ids(_VALID_SALESFORCE_IDS[1])
    assert _VALID_SALESFORCE_IDS[40] in globex_children, "New relationship not created"

    print("All relationship update tests passed successfully!")


def test_get_affected_parent_ids() -> None:
    """
    Tests get_affected_parent_ids functionality by verifying:
    1. IDs that are directly in the parent_types list are included
    2. IDs that have children in the updated_ids list are included
    3. IDs that are neither of the above are not included
    """
    # Create test data with relationships
    test_data = {
        "Account": [
            {
                "Id": _VALID_SALESFORCE_IDS[0],
                "Name": "Parent Account 1",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[1],
                "Name": "Parent Account 2",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[2],
                "Name": "Not Affected Account",
            },
        ],
        "Contact": [
            {
                "Id": _VALID_SALESFORCE_IDS[40],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "FirstName": "Child",
                "LastName": "Contact",
            }
        ],
    }

    # Create and update CSV files for test data
    for object_type, records in test_data.items():
        create_csv_file(object_type, records)

    # Test Case 1: Account directly in updated_ids and parent_types
    updated_ids = {_VALID_SALESFORCE_IDS[1]}  # Parent Account 2
    parent_types = ["Account"]
    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)
    assert _VALID_SALESFORCE_IDS[1] in affected_ids, "Direct parent ID not included"

    # Test Case 2: Account with child in updated_ids
    updated_ids = {_VALID_SALESFORCE_IDS[40]}  # Child Contact
    parent_types = ["Account"]
    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)
    assert (
        _VALID_SALESFORCE_IDS[0] in affected_ids
    ), "Parent of updated child not included"

    # Test Case 3: Both direct and indirect affects
    updated_ids = {_VALID_SALESFORCE_IDS[1], _VALID_SALESFORCE_IDS[40]}  # Both cases
    parent_types = ["Account"]
    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)
    assert len(affected_ids) == 2, "Expected exactly two affected parent IDs"
    assert _VALID_SALESFORCE_IDS[0] in affected_ids, "Parent of child not included"
    assert _VALID_SALESFORCE_IDS[1] in affected_ids, "Direct parent ID not included"
    assert (
        _VALID_SALESFORCE_IDS[2] not in affected_ids
    ), "Unaffected ID incorrectly included"

    # Test Case 4: No matches
    updated_ids = {_VALID_SALESFORCE_IDS[40]}  # Child Contact
    parent_types = ["Opportunity"]  # Wrong type
    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)
    assert len(affected_ids) == 0, "Should return empty list when no matches"

    print("All get_affected_parent_ids tests passed successfully!")


def main_build() -> None:
    clear_sf_db()
    create_csv_with_example_data()
    test_query()
    test_upsert()
    test_relationships()
    test_account_with_children()
    test_relationship_updates()
    test_get_affected_parent_ids()


if __name__ == "__main__":
    main_build()


================================================
FILE: backend/onyx/connectors/salesforce/shelve_stuff/shelve_functions.py
================================================
import csv
import shelve

from onyx.connectors.salesforce.shelve_stuff.shelve_utils import (
    get_child_to_parent_shelf_path,
)
from onyx.connectors.salesforce.shelve_stuff.shelve_utils import get_id_type_shelf_path
from onyx.connectors.salesforce.shelve_stuff.shelve_utils import get_object_shelf_path
from onyx.connectors.salesforce.shelve_stuff.shelve_utils import (
    get_parent_to_child_shelf_path,
)
from onyx.connectors.salesforce.utils import SalesforceObject
from onyx.connectors.salesforce.utils import validate_salesforce_id
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _update_relationship_shelves(
    child_id: str,
    parent_ids: set[str],
) -> None:
    """Update the relationship shelf when a record is updated."""
    try:
        # Convert child_id to string once
        str_child_id = str(child_id)

        # First update child to parent mapping
        with shelve.open(
            get_child_to_parent_shelf_path(),
            flag="c",
            protocol=None,
            writeback=True,
        ) as child_to_parent_db:
            old_parent_ids = set(child_to_parent_db.get(str_child_id, []))
            child_to_parent_db[str_child_id] = list(parent_ids)

            # Calculate differences outside the next context manager
            parent_ids_to_remove = old_parent_ids - parent_ids
            parent_ids_to_add = parent_ids - old_parent_ids

            # Only sync once at the end
            child_to_parent_db.sync()

        # Then update parent to child mapping in a single transaction
        if not parent_ids_to_remove and not parent_ids_to_add:
            return
        with shelve.open(
            get_parent_to_child_shelf_path(),
            flag="c",
            protocol=None,
            writeback=True,
        ) as parent_to_child_db:
            # Process all removals first
            for parent_id in parent_ids_to_remove:
                str_parent_id = str(parent_id)
                existing_children = set(parent_to_child_db.get(str_parent_id, []))
                if str_child_id in existing_children:
                    existing_children.remove(str_child_id)
                    parent_to_child_db[str_parent_id] = list(existing_children)

            # Then process all additions
            for parent_id in parent_ids_to_add:
                str_parent_id = str(parent_id)
                existing_children = set(parent_to_child_db.get(str_parent_id, []))
                existing_children.add(str_child_id)
                parent_to_child_db[str_parent_id] = list(existing_children)

            # Single sync at the end
            parent_to_child_db.sync()

    except Exception as e:
        logger.error(f"Error updating relationship shelves: {e}")
        logger.error(f"Child ID: {child_id}, Parent IDs: {parent_ids}")
        raise


def get_child_ids(parent_id: str) -> set[str]:
    """Get all child IDs for a given parent ID.

    Args:
        parent_id: The ID of the parent object

    Returns:
        A set of child object IDs
    """
    with shelve.open(get_parent_to_child_shelf_path()) as parent_to_child_db:
        return set(parent_to_child_db.get(parent_id, []))


def update_sf_db_with_csv(
    object_type: str,
    csv_download_path: str,
) -> list[str]:
    """Update the SF DB with a CSV file using shelve storage."""
    updated_ids = []
    shelf_path = get_object_shelf_path(object_type)

    # First read the CSV to get all the data
    with open(csv_download_path, "r", newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            id = row["Id"]
            parent_ids = set()
            field_to_remove: set[str] = set()
            # Update relationship shelves for any parent references
            for field, value in row.items():
                if validate_salesforce_id(value) and field != "Id":
                    parent_ids.add(value)
                    field_to_remove.add(field)
                if not value:
                    field_to_remove.add(field)
            _update_relationship_shelves(id, parent_ids)
            for field in field_to_remove:
                # We use this to extract the Primary Owner later
                if field != "LastModifiedById":
                    del row[field]

            # Update the main object shelf
            with shelve.open(shelf_path) as object_type_db:
                object_type_db[id] = row
            # Update the ID-to-type mapping shelf
            with shelve.open(get_id_type_shelf_path()) as id_type_db:
                id_type_db[id] = object_type

            updated_ids.append(id)

    # os.remove(csv_download_path)
    return updated_ids


def get_type_from_id(object_id: str) -> str | None:
    """Get the type of an object from its ID."""
    # Look up the object type from the ID-to-type mapping
    with shelve.open(get_id_type_shelf_path()) as id_type_db:
        if object_id not in id_type_db:
            logger.warning(f"Object ID {object_id} not found in ID-to-type mapping")
            return None
        return id_type_db[object_id]


def get_record(
    object_id: str, object_type: str | None = None
) -> SalesforceObject | None:
    """
    Retrieve the record and return it as a SalesforceObject.
    The object type will be looked up from the ID-to-type mapping shelf.
    """
    if object_type is None:
        if not (object_type := get_type_from_id(object_id)):
            return None

    shelf_path = get_object_shelf_path(object_type)
    with shelve.open(shelf_path) as db:
        if object_id not in db:
            logger.warning(f"Object ID {object_id} not found in {shelf_path}")
            return None
        data = db[object_id]
        return SalesforceObject(
            id=object_id,
            type=object_type,
            data=data,
        )


def find_ids_by_type(object_type: str) -> list[str]:
    """
    Find all object IDs for rows of the specified type.
    """
    shelf_path = get_object_shelf_path(object_type)
    try:
        with shelve.open(shelf_path) as db:
            return list(db.keys())
    except FileNotFoundError:
        return []


def get_affected_parent_ids_by_type(
    updated_ids: set[str], parent_types: list[str]
) -> dict[str, set[str]]:
    """Get IDs of objects that are of the specified parent types and are either in the updated_ids
    or have children in the updated_ids.

    Args:
        updated_ids: List of IDs that were updated
        parent_types: List of object types to filter by

    Returns:
        A dictionary of IDs that match the criteria
    """
    affected_ids_by_type: dict[str, set[str]] = {}

    # Check each updated ID
    for updated_id in updated_ids:
        # Add the ID itself if it's of a parent type
        updated_type = get_type_from_id(updated_id)
        if updated_type in parent_types:
            affected_ids_by_type.setdefault(updated_type, set()).add(updated_id)
            continue

        # Get parents of this ID and add them if they're of a parent type
        with shelve.open(get_child_to_parent_shelf_path()) as child_to_parent_db:
            parent_ids = child_to_parent_db.get(updated_id, [])
            for parent_id in parent_ids:
                parent_type = get_type_from_id(parent_id)
                if parent_type in parent_types:
                    affected_ids_by_type.setdefault(parent_type, set()).add(parent_id)

    return affected_ids_by_type


================================================
FILE: backend/onyx/connectors/salesforce/shelve_stuff/shelve_utils.py
================================================
import os

from onyx.connectors.salesforce.utils import BASE_DATA_PATH
from onyx.connectors.salesforce.utils import get_object_type_path


def get_object_shelf_path(object_type: str) -> str:
    """Get the path to the shelf file for a specific object type."""
    base_path = get_object_type_path(object_type)
    os.makedirs(base_path, exist_ok=True)
    return os.path.join(base_path, "data.shelf")


def get_id_type_shelf_path() -> str:
    """Get the path to the ID-to-type mapping shelf."""
    os.makedirs(BASE_DATA_PATH, exist_ok=True)
    return os.path.join(BASE_DATA_PATH, "id_type_mapping.shelf.4g")


def get_parent_to_child_shelf_path() -> str:
    """Get the path to the parent-to-child mapping shelf."""
    os.makedirs(BASE_DATA_PATH, exist_ok=True)
    return os.path.join(BASE_DATA_PATH, "parent_to_child_mapping.shelf.4g")


def get_child_to_parent_shelf_path() -> str:
    """Get the path to the child-to-parent mapping shelf."""
    os.makedirs(BASE_DATA_PATH, exist_ok=True)
    return os.path.join(BASE_DATA_PATH, "child_to_parent_mapping.shelf.4g")


================================================
FILE: backend/onyx/connectors/salesforce/shelve_stuff/test_salesforce_shelves.py
================================================
import csv
import os
import shutil

from onyx.connectors.salesforce.shelve_stuff.shelve_functions import find_ids_by_type
from onyx.connectors.salesforce.shelve_stuff.shelve_functions import (
    get_affected_parent_ids_by_type,
)
from onyx.connectors.salesforce.shelve_stuff.shelve_functions import get_child_ids
from onyx.connectors.salesforce.shelve_stuff.shelve_functions import get_record
from onyx.connectors.salesforce.shelve_stuff.shelve_functions import (
    update_sf_db_with_csv,
)
from onyx.connectors.salesforce.utils import BASE_DATA_PATH
from onyx.connectors.salesforce.utils import get_object_type_path

_VALID_SALESFORCE_IDS = [
    "001bm00000fd9Z3AAI",
    "001bm00000fdYTdAAM",
    "001bm00000fdYTeAAM",
    "001bm00000fdYTfAAM",
    "001bm00000fdYTgAAM",
    "001bm00000fdYThAAM",
    "001bm00000fdYTiAAM",
    "001bm00000fdYTjAAM",
    "001bm00000fdYTkAAM",
    "001bm00000fdYTlAAM",
    "001bm00000fdYTmAAM",
    "001bm00000fdYTnAAM",
    "001bm00000fdYToAAM",
    "500bm00000XoOxtAAF",
    "500bm00000XoOxuAAF",
    "500bm00000XoOxvAAF",
    "500bm00000XoOxwAAF",
    "500bm00000XoOxxAAF",
    "500bm00000XoOxyAAF",
    "500bm00000XoOxzAAF",
    "500bm00000XoOy0AAF",
    "500bm00000XoOy1AAF",
    "500bm00000XoOy2AAF",
    "500bm00000XoOy3AAF",
    "500bm00000XoOy4AAF",
    "500bm00000XoOy5AAF",
    "500bm00000XoOy6AAF",
    "500bm00000XoOy7AAF",
    "500bm00000XoOy8AAF",
    "500bm00000XoOy9AAF",
    "500bm00000XoOyAAAV",
    "500bm00000XoOyBAAV",
    "500bm00000XoOyCAAV",
    "500bm00000XoOyDAAV",
    "500bm00000XoOyEAAV",
    "500bm00000XoOyFAAV",
    "500bm00000XoOyGAAV",
    "500bm00000XoOyHAAV",
    "500bm00000XoOyIAAV",
    "003bm00000EjHCjAAN",
    "003bm00000EjHCkAAN",
    "003bm00000EjHClAAN",
    "003bm00000EjHCmAAN",
    "003bm00000EjHCnAAN",
    "003bm00000EjHCoAAN",
    "003bm00000EjHCpAAN",
    "003bm00000EjHCqAAN",
    "003bm00000EjHCrAAN",
    "003bm00000EjHCsAAN",
    "003bm00000EjHCtAAN",
    "003bm00000EjHCuAAN",
    "003bm00000EjHCvAAN",
    "003bm00000EjHCwAAN",
    "003bm00000EjHCxAAN",
    "003bm00000EjHCyAAN",
    "003bm00000EjHCzAAN",
    "003bm00000EjHD0AAN",
    "003bm00000EjHD1AAN",
    "003bm00000EjHD2AAN",
    "550bm00000EXc2tAAD",
    "006bm000006kyDpAAI",
    "006bm000006kyDqAAI",
    "006bm000006kyDrAAI",
    "006bm000006kyDsAAI",
    "006bm000006kyDtAAI",
    "006bm000006kyDuAAI",
    "006bm000006kyDvAAI",
    "006bm000006kyDwAAI",
    "006bm000006kyDxAAI",
    "006bm000006kyDyAAI",
    "006bm000006kyDzAAI",
    "006bm000006kyE0AAI",
    "006bm000006kyE1AAI",
    "006bm000006kyE2AAI",
    "006bm000006kyE3AAI",
    "006bm000006kyE4AAI",
    "006bm000006kyE5AAI",
    "006bm000006kyE6AAI",
    "006bm000006kyE7AAI",
    "006bm000006kyE8AAI",
    "006bm000006kyE9AAI",
    "006bm000006kyEAAAY",
    "006bm000006kyEBAAY",
    "006bm000006kyECAAY",
    "006bm000006kyEDAAY",
    "006bm000006kyEEAAY",
    "006bm000006kyEFAAY",
    "006bm000006kyEGAAY",
    "006bm000006kyEHAAY",
    "006bm000006kyEIAAY",
    "006bm000006kyEJAAY",
    "005bm000009zy0TAAQ",
    "005bm000009zy25AAA",
    "005bm000009zy26AAA",
    "005bm000009zy28AAA",
    "005bm000009zy29AAA",
    "005bm000009zy2AAAQ",
    "005bm000009zy2BAAQ",
]


def clear_sf_db() -> None:
    """
    Clears the SF DB by deleting all files in the data directory.
    """
    shutil.rmtree(BASE_DATA_PATH)


def create_csv_file(
    object_type: str, records: list[dict], filename: str = "test_data.csv"
) -> None:
    """
    Creates a CSV file for the given object type and records.

    Args:
        object_type: The Salesforce object type (e.g. "Account", "Contact")
        records: List of dictionaries containing the record data
        filename: Name of the CSV file to create (default: test_data.csv)
    """
    if not records:
        return

    # Get all unique fields from records
    fields: set[str] = set()
    for record in records:
        fields.update(record.keys())
    fields = set(sorted(list(fields)))  # Sort for consistent order

    # Create CSV file
    csv_path = os.path.join(get_object_type_path(object_type), filename)
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fields)
        writer.writeheader()
        for record in records:
            writer.writerow(record)

    # Update the database with the CSV
    update_sf_db_with_csv(object_type, csv_path)


def create_csv_with_example_data() -> None:
    """
    Creates CSV files with example data, organized by object type.
    """
    example_data: dict[str, list[dict]] = {
        "Account": [
            {
                "Id": _VALID_SALESFORCE_IDS[0],
                "Name": "Acme Inc.",
                "BillingCity": "New York",
                "Industry": "Technology",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[1],
                "Name": "Globex Corp",
                "BillingCity": "Los Angeles",
                "Industry": "Manufacturing",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[2],
                "Name": "Initech",
                "BillingCity": "Austin",
                "Industry": "Software",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[3],
                "Name": "TechCorp Solutions",
                "BillingCity": "San Francisco",
                "Industry": "Software",
                "AnnualRevenue": 5000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[4],
                "Name": "BioMed Research",
                "BillingCity": "Boston",
                "Industry": "Healthcare",
                "AnnualRevenue": 12000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[5],
                "Name": "Green Energy Co",
                "BillingCity": "Portland",
                "Industry": "Energy",
                "AnnualRevenue": 8000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[6],
                "Name": "DataFlow Analytics",
                "BillingCity": "Seattle",
                "Industry": "Technology",
                "AnnualRevenue": 3000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[7],
                "Name": "Cloud Nine Services",
                "BillingCity": "Denver",
                "Industry": "Cloud Computing",
                "AnnualRevenue": 7000000,
            },
        ],
        "Contact": [
            {
                "Id": _VALID_SALESFORCE_IDS[40],
                "FirstName": "John",
                "LastName": "Doe",
                "Email": "john.doe@acme.com",
                "Title": "CEO",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[41],
                "FirstName": "Jane",
                "LastName": "Smith",
                "Email": "jane.smith@acme.com",
                "Title": "CTO",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[42],
                "FirstName": "Bob",
                "LastName": "Johnson",
                "Email": "bob.j@globex.com",
                "Title": "Sales Director",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[43],
                "FirstName": "Sarah",
                "LastName": "Chen",
                "Email": "sarah.chen@techcorp.com",
                "Title": "Product Manager",
                "Phone": "415-555-0101",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[44],
                "FirstName": "Michael",
                "LastName": "Rodriguez",
                "Email": "m.rodriguez@biomed.com",
                "Title": "Research Director",
                "Phone": "617-555-0202",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[45],
                "FirstName": "Emily",
                "LastName": "Green",
                "Email": "emily.g@greenenergy.com",
                "Title": "Sustainability Lead",
                "Phone": "503-555-0303",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[46],
                "FirstName": "David",
                "LastName": "Kim",
                "Email": "david.kim@dataflow.com",
                "Title": "Data Scientist",
                "Phone": "206-555-0404",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[47],
                "FirstName": "Rachel",
                "LastName": "Taylor",
                "Email": "r.taylor@cloudnine.com",
                "Title": "Cloud Architect",
                "Phone": "303-555-0505",
            },
        ],
        "Opportunity": [
            {
                "Id": _VALID_SALESFORCE_IDS[62],
                "Name": "Acme Server Upgrade",
                "Amount": 50000,
                "Stage": "Prospecting",
                "CloseDate": "2024-06-30",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[63],
                "Name": "Globex Manufacturing Line",
                "Amount": 150000,
                "Stage": "Negotiation",
                "CloseDate": "2024-03-15",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[64],
                "Name": "Initech Software License",
                "Amount": 75000,
                "Stage": "Closed Won",
                "CloseDate": "2024-01-30",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[65],
                "Name": "TechCorp AI Implementation",
                "Amount": 250000,
                "Stage": "Needs Analysis",
                "CloseDate": "2024-08-15",
                "Probability": 60,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[66],
                "Name": "BioMed Lab Equipment",
                "Amount": 500000,
                "Stage": "Value Proposition",
                "CloseDate": "2024-09-30",
                "Probability": 75,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[67],
                "Name": "Green Energy Solar Project",
                "Amount": 750000,
                "Stage": "Proposal",
                "CloseDate": "2024-07-15",
                "Probability": 80,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[68],
                "Name": "DataFlow Analytics Platform",
                "Amount": 180000,
                "Stage": "Negotiation",
                "CloseDate": "2024-05-30",
                "Probability": 90,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[69],
                "Name": "Cloud Nine Infrastructure",
                "Amount": 300000,
                "Stage": "Qualification",
                "CloseDate": "2024-10-15",
                "Probability": 40,
            },
        ],
    }

    # Create CSV files for each object type
    for object_type, records in example_data.items():
        create_csv_file(object_type, records)


def test_query() -> None:
    """
    Tests querying functionality by verifying:
    1. All expected Account IDs are found
    2. Each Account's data matches what was inserted
    """
    # Expected test data for verification
    expected_accounts: dict[str, dict[str, str | int]] = {
        _VALID_SALESFORCE_IDS[0]: {
            "Name": "Acme Inc.",
            "BillingCity": "New York",
            "Industry": "Technology",
        },
        _VALID_SALESFORCE_IDS[1]: {
            "Name": "Globex Corp",
            "BillingCity": "Los Angeles",
            "Industry": "Manufacturing",
        },
        _VALID_SALESFORCE_IDS[2]: {
            "Name": "Initech",
            "BillingCity": "Austin",
            "Industry": "Software",
        },
        _VALID_SALESFORCE_IDS[3]: {
            "Name": "TechCorp Solutions",
            "BillingCity": "San Francisco",
            "Industry": "Software",
            "AnnualRevenue": 5000000,
        },
        _VALID_SALESFORCE_IDS[4]: {
            "Name": "BioMed Research",
            "BillingCity": "Boston",
            "Industry": "Healthcare",
            "AnnualRevenue": 12000000,
        },
        _VALID_SALESFORCE_IDS[5]: {
            "Name": "Green Energy Co",
            "BillingCity": "Portland",
            "Industry": "Energy",
            "AnnualRevenue": 8000000,
        },
        _VALID_SALESFORCE_IDS[6]: {
            "Name": "DataFlow Analytics",
            "BillingCity": "Seattle",
            "Industry": "Technology",
            "AnnualRevenue": 3000000,
        },
        _VALID_SALESFORCE_IDS[7]: {
            "Name": "Cloud Nine Services",
            "BillingCity": "Denver",
            "Industry": "Cloud Computing",
            "AnnualRevenue": 7000000,
        },
    }

    # Get all Account IDs
    account_ids = find_ids_by_type("Account")

    # Verify we found all expected accounts
    assert len(account_ids) == len(
        expected_accounts
    ), f"Expected {len(expected_accounts)} accounts, found {len(account_ids)}"
    assert set(account_ids) == set(
        expected_accounts.keys()
    ), "Found account IDs don't match expected IDs"

    # Verify each account's data
    for acc_id in account_ids:
        combined = get_record(acc_id)
        assert combined is not None, f"Could not find account {acc_id}"

        expected = expected_accounts[acc_id]

        # Verify account data matches
        for key, value in expected.items():
            value = str(value)
            assert (
                combined.data[key] == value
            ), f"Account {acc_id} field {key} expected {value}, got {combined.data[key]}"

    print("All query tests passed successfully!")


def test_upsert() -> None:
    """
    Tests upsert functionality by:
    1. Updating an existing account
    2. Creating a new account
    3. Verifying both operations were successful
    """
    # Create CSV for updating an existing account and adding a new one
    update_data: list[dict[str, str | int]] = [
        {
            "Id": _VALID_SALESFORCE_IDS[0],
            "Name": "Acme Inc. Updated",
            "BillingCity": "New York",
            "Industry": "Technology",
            "Description": "Updated company info",
        },
        {
            "Id": _VALID_SALESFORCE_IDS[2],
            "Name": "New Company Inc.",
            "BillingCity": "Miami",
            "Industry": "Finance",
            "AnnualRevenue": 1000000,
        },
    ]

    create_csv_file("Account", update_data, "update_data.csv")

    # Verify the update worked
    updated_record = get_record(_VALID_SALESFORCE_IDS[0])
    assert updated_record is not None, "Updated record not found"
    assert updated_record.data["Name"] == "Acme Inc. Updated", "Name not updated"
    assert (
        updated_record.data["Description"] == "Updated company info"
    ), "Description not added"

    # Verify the new record was created
    new_record = get_record(_VALID_SALESFORCE_IDS[2])
    assert new_record is not None, "New record not found"
    assert new_record.data["Name"] == "New Company Inc.", "New record name incorrect"
    assert new_record.data["AnnualRevenue"] == "1000000", "New record revenue incorrect"

    print("All upsert tests passed successfully!")


def test_relationships() -> None:
    """
    Tests relationship shelf updates and queries by:
    1. Creating test data with relationships
    2. Verifying the relationships are correctly stored
    3. Testing relationship queries
    """
    # Create test data for each object type
    test_data: dict[str, list[dict[str, str | int]]] = {
        "Case": [
            {
                "Id": _VALID_SALESFORCE_IDS[13],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "Subject": "Test Case 1",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[14],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "Subject": "Test Case 2",
            },
        ],
        "Contact": [
            {
                "Id": _VALID_SALESFORCE_IDS[48],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "FirstName": "Test",
                "LastName": "Contact",
            }
        ],
        "Opportunity": [
            {
                "Id": _VALID_SALESFORCE_IDS[62],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "Name": "Test Opportunity",
                "Amount": 100000,
            }
        ],
    }

    # Create and update CSV files for each object type
    for object_type, records in test_data.items():
        create_csv_file(object_type, records, "relationship_test.csv")

    # Test relationship queries
    # All these objects should be children of Acme Inc.
    child_ids = get_child_ids(_VALID_SALESFORCE_IDS[0])
    assert len(child_ids) == 4, f"Expected 4 child objects, found {len(child_ids)}"
    assert _VALID_SALESFORCE_IDS[13] in child_ids, "Case 1 not found in relationship"
    assert _VALID_SALESFORCE_IDS[14] in child_ids, "Case 2 not found in relationship"
    assert _VALID_SALESFORCE_IDS[48] in child_ids, "Contact not found in relationship"
    assert (
        _VALID_SALESFORCE_IDS[62] in child_ids
    ), "Opportunity not found in relationship"

    # Test querying relationships for a different account (should be empty)
    other_account_children = get_child_ids(_VALID_SALESFORCE_IDS[1])
    assert (
        len(other_account_children) == 0
    ), "Expected no children for different account"

    print("All relationship tests passed successfully!")


def test_account_with_children() -> None:
    """
    Tests querying all accounts and retrieving their child objects.
    This test verifies that:
    1. All accounts can be retrieved
    2. Child objects are correctly linked
    3. Child object data is complete and accurate
    """
    # First get all account IDs
    account_ids = find_ids_by_type("Account")
    assert len(account_ids) > 0, "No accounts found"

    # For each account, get its children and verify the data
    for account_id in account_ids:
        account = get_record(account_id)
        assert account is not None, f"Could not find account {account_id}"

        # Get all child objects
        child_ids = get_child_ids(account_id)

        # For Acme Inc., verify specific relationships
        if account_id == _VALID_SALESFORCE_IDS[0]:  # Acme Inc.
            assert (
                len(child_ids) == 4
            ), f"Expected 4 children for Acme Inc., found {len(child_ids)}"

            # Get all child records
            child_records = []
            for child_id in child_ids:
                child_record = get_record(child_id)
                if child_record is not None:
                    child_records.append(child_record)
            # Verify Cases
            cases = [r for r in child_records if r.type == "Case"]
            assert (
                len(cases) == 2
            ), f"Expected 2 cases for Acme Inc., found {len(cases)}"
            case_subjects = {case.data["Subject"] for case in cases}
            assert "Test Case 1" in case_subjects, "Test Case 1 not found"
            assert "Test Case 2" in case_subjects, "Test Case 2 not found"

            # Verify Contacts
            contacts = [r for r in child_records if r.type == "Contact"]
            assert (
                len(contacts) == 1
            ), f"Expected 1 contact for Acme Inc., found {len(contacts)}"
            contact = contacts[0]
            assert contact.data["FirstName"] == "Test", "Contact first name mismatch"
            assert contact.data["LastName"] == "Contact", "Contact last name mismatch"

            # Verify Opportunities
            opportunities = [r for r in child_records if r.type == "Opportunity"]
            assert (
                len(opportunities) == 1
            ), f"Expected 1 opportunity for Acme Inc., found {len(opportunities)}"
            opportunity = opportunities[0]
            assert (
                opportunity.data["Name"] == "Test Opportunity"
            ), "Opportunity name mismatch"
            assert opportunity.data["Amount"] == "100000", "Opportunity amount mismatch"

    print("All account with children tests passed successfully!")


def test_relationship_updates() -> None:
    """
    Tests that relationships are properly updated when a child object's parent reference changes.
    This test verifies:
    1. Initial relationship is created correctly
    2. When parent reference is updated, old relationship is removed
    3. New relationship is created correctly
    """
    # Create initial test data - Contact linked to Acme Inc.
    initial_contact = [
        {
            "Id": _VALID_SALESFORCE_IDS[40],
            "AccountId": _VALID_SALESFORCE_IDS[0],
            "FirstName": "Test",
            "LastName": "Contact",
        }
    ]
    create_csv_file("Contact", initial_contact, "initial_contact.csv")

    # Verify initial relationship
    acme_children = get_child_ids(_VALID_SALESFORCE_IDS[0])
    assert (
        _VALID_SALESFORCE_IDS[40] in acme_children
    ), "Initial relationship not created"

    # Update contact to be linked to Globex Corp instead
    updated_contact = [
        {
            "Id": _VALID_SALESFORCE_IDS[40],
            "AccountId": _VALID_SALESFORCE_IDS[1],
            "FirstName": "Test",
            "LastName": "Contact",
        }
    ]
    create_csv_file("Contact", updated_contact, "updated_contact.csv")

    # Verify old relationship is removed
    acme_children = get_child_ids(_VALID_SALESFORCE_IDS[0])
    assert (
        _VALID_SALESFORCE_IDS[40] not in acme_children
    ), "Old relationship not removed"

    # Verify new relationship is created
    globex_children = get_child_ids(_VALID_SALESFORCE_IDS[1])
    assert _VALID_SALESFORCE_IDS[40] in globex_children, "New relationship not created"

    print("All relationship update tests passed successfully!")


def test_get_affected_parent_ids() -> None:
    """
    Tests get_affected_parent_ids functionality by verifying:
    1. IDs that are directly in the parent_types list are included
    2. IDs that have children in the updated_ids list are included
    3. IDs that are neither of the above are not included
    """
    # Create test data with relationships
    test_data = {
        "Account": [
            {
                "Id": _VALID_SALESFORCE_IDS[0],
                "Name": "Parent Account 1",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[1],
                "Name": "Parent Account 2",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[2],
                "Name": "Not Affected Account",
            },
        ],
        "Contact": [
            {
                "Id": _VALID_SALESFORCE_IDS[40],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "FirstName": "Child",
                "LastName": "Contact",
            }
        ],
    }

    # Create and update CSV files for test data
    for object_type, records in test_data.items():
        create_csv_file(object_type, records)

    # Test Case 1: Account directly in updated_ids and parent_types
    updated_ids = {_VALID_SALESFORCE_IDS[1]}  # Parent Account 2
    parent_types = ["Account"]
    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)
    assert _VALID_SALESFORCE_IDS[1] in affected_ids, "Direct parent ID not included"

    # Test Case 2: Account with child in updated_ids
    updated_ids = {_VALID_SALESFORCE_IDS[40]}  # Child Contact
    parent_types = ["Account"]
    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)
    assert (
        _VALID_SALESFORCE_IDS[0] in affected_ids
    ), "Parent of updated child not included"

    # Test Case 3: Both direct and indirect affects
    updated_ids = {_VALID_SALESFORCE_IDS[1], _VALID_SALESFORCE_IDS[40]}  # Both cases
    parent_types = ["Account"]
    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)
    assert len(affected_ids) == 2, "Expected exactly two affected parent IDs"
    assert _VALID_SALESFORCE_IDS[0] in affected_ids, "Parent of child not included"
    assert _VALID_SALESFORCE_IDS[1] in affected_ids, "Direct parent ID not included"
    assert (
        _VALID_SALESFORCE_IDS[2] not in affected_ids
    ), "Unaffected ID incorrectly included"

    # Test Case 4: No matches
    updated_ids = {_VALID_SALESFORCE_IDS[40]}  # Child Contact
    parent_types = ["Opportunity"]  # Wrong type
    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)
    assert len(affected_ids) == 0, "Should return empty list when no matches"

    print("All get_affected_parent_ids tests passed successfully!")


def main_build() -> None:
    clear_sf_db()
    create_csv_with_example_data()
    test_query()
    test_upsert()
    test_relationships()
    test_account_with_children()
    test_relationship_updates()
    test_get_affected_parent_ids()


if __name__ == "__main__":
    main_build()


================================================
FILE: backend/onyx/connectors/salesforce/sqlite_functions.py
================================================
import csv
import json
import os
import sqlite3
import time
from collections.abc import Iterator
from pathlib import Path
from typing import Any
from typing import cast

from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE
from onyx.connectors.salesforce.utils import ID_FIELD
from onyx.connectors.salesforce.utils import NAME_FIELD
from onyx.connectors.salesforce.utils import remove_sqlite_db_files
from onyx.connectors.salesforce.utils import SalesforceObject
from onyx.connectors.salesforce.utils import USER_OBJECT_TYPE
from onyx.connectors.salesforce.utils import validate_salesforce_id
from onyx.utils.logger import setup_logger
from shared_configs.utils import batch_list


logger = setup_logger()


SQLITE_DISK_IO_ERROR = "disk I/O error"


class OnyxSalesforceSQLite:
    """Notes on context management using 'with self.conn':

    Does autocommit / rollback on exit.
    Does NOT close on exit! .close must be called explicitly.
    """

    # NOTE(rkuo): this string could probably occur naturally. A more unique value
    # might be appropriate here.
    NULL_ID_STRING = "N/A"

    def __init__(self, filename: str, isolation_level: str | None = None):
        self.filename = filename
        self.isolation_level = isolation_level
        self._conn: sqlite3.Connection | None = None

        # this is only set on connection. This variable does not change
        # when a new db is initialized with this class.
        self._existing_db = True

    def __del__(self) -> None:
        self.close()

    @property
    def file_size(self) -> int:
        """Returns -1 if the file does not exist."""
        if not self.filename:
            return -1

        if not os.path.exists(self.filename):
            return -1

        file_path = Path(self.filename)
        return file_path.stat().st_size

    def connect(self) -> None:
        if self._conn is not None:
            self._conn.close()
            self._conn = None

        self._existing_db = os.path.exists(self.filename)

        # make the path if it doesn't already exist
        os.makedirs(os.path.dirname(self.filename), exist_ok=True)

        conn = sqlite3.connect(self.filename, timeout=60.0)
        if self.isolation_level is not None:
            conn.isolation_level = self.isolation_level

        self._conn = conn

    def close(self) -> None:
        if self._conn is None:
            return

        self._conn.close()
        self._conn = None

    def cursor(self) -> sqlite3.Cursor:
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        return self._conn.cursor()

    def flush(self) -> None:
        """We're using SQLite in WAL mode sometimes. To flush to the DB we have to
        call this."""
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        with self._conn:
            cursor = self._conn.cursor()
            cursor.execute("PRAGMA wal_checkpoint(FULL)")

    def apply_schema(self) -> None:
        """Initialize the SQLite database with required tables if they don't exist.

        Non-destructive operation. If a disk I/O error is encountered (often due
        to stale WAL/SHM files from a previous crash), this method will attempt
        to recover by removing the corrupted files and recreating the database.
        """
        try:
            self._apply_schema_impl()
        except sqlite3.OperationalError as e:
            if SQLITE_DISK_IO_ERROR not in str(e):
                raise

            logger.warning(f"SQLite disk I/O error detected, attempting recovery: {e}")
            self._recover_from_corruption()
            self._apply_schema_impl()

    def _recover_from_corruption(self) -> None:
        """Recover from SQLite corruption by removing all database files and reconnecting."""
        logger.info(f"Removing corrupted SQLite files: {self.filename}")

        # Close existing connection
        self.close()

        # Remove all SQLite files (main db, WAL, SHM)
        remove_sqlite_db_files(self.filename)

        # Reconnect - this will create a fresh database
        self.connect()

        logger.info("SQLite recovery complete, fresh database created")

    def _apply_schema_impl(self) -> None:
        """Internal implementation of apply_schema."""
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        start = time.monotonic()

        with self._conn:
            cursor = self._conn.cursor()

            if self._existing_db:
                file_path = Path(self.filename)
                file_size = file_path.stat().st_size
                logger.info(f"init_db - found existing sqlite db: len={file_size}")
            else:
                # NOTE(rkuo): why is this only if the db doesn't exist?

                # Enable WAL mode for better concurrent access and write performance
                cursor.execute("PRAGMA journal_mode=WAL")
                cursor.execute("PRAGMA synchronous=NORMAL")
                cursor.execute("PRAGMA temp_store=MEMORY")
                cursor.execute("PRAGMA cache_size=-2000000")  # Use 2GB memory for cache

            # Main table for storing Salesforce objects
            cursor.execute(
                """
                CREATE TABLE IF NOT EXISTS salesforce_objects (
                    id TEXT PRIMARY KEY,
                    object_type TEXT NOT NULL,
                    data TEXT NOT NULL,  -- JSON serialized data
                    last_modified INTEGER DEFAULT (strftime('%s', 'now'))  -- Add timestamp for better cache management
                ) WITHOUT ROWID  -- Optimize for primary key lookups
            """
            )

            # NOTE(rkuo): this seems completely redundant with relationship_types
            # Table for parent-child relationships with covering index
            cursor.execute(
                """
                CREATE TABLE IF NOT EXISTS relationships (
                    child_id TEXT NOT NULL,
                    parent_id TEXT NOT NULL,
                    PRIMARY KEY (child_id, parent_id)
                ) WITHOUT ROWID  -- Optimize for primary key lookups
            """
            )

            # New table for caching parent-child relationships with object types
            cursor.execute(
                """
                CREATE TABLE IF NOT EXISTS relationship_types (
                    child_id TEXT NOT NULL,
                    parent_id TEXT NOT NULL,
                    parent_type TEXT NOT NULL,
                    PRIMARY KEY (child_id, parent_id, parent_type)
                ) WITHOUT ROWID
            """
            )

            # Create a table for User email to ID mapping if it doesn't exist
            cursor.execute(
                """
                CREATE TABLE IF NOT EXISTS user_email_map (
                    email TEXT PRIMARY KEY,
                    user_id TEXT,  -- Nullable to allow for users without IDs
                    FOREIGN KEY (user_id) REFERENCES salesforce_objects(id)
                ) WITHOUT ROWID
            """
            )

            # Create indexes if they don't exist (SQLite ignores IF NOT EXISTS for indexes)
            def create_index_if_not_exists(
                index_name: str, create_statement: str
            ) -> None:
                cursor.execute(
                    f"SELECT name FROM sqlite_master WHERE type='index' AND name='{index_name}'"
                )
                if not cursor.fetchone():
                    cursor.execute(create_statement)

            create_index_if_not_exists(
                "idx_object_type",
                """
                CREATE INDEX idx_object_type
                ON salesforce_objects(object_type, id)
                WHERE object_type IS NOT NULL
                """,
            )

            create_index_if_not_exists(
                "idx_parent_id",
                """
                CREATE INDEX idx_parent_id
                ON relationships(parent_id, child_id)
                """,
            )

            create_index_if_not_exists(
                "idx_child_parent",
                """
                CREATE INDEX idx_child_parent
                ON relationships(child_id)
                WHERE child_id IS NOT NULL
                """,
            )

            create_index_if_not_exists(
                "idx_relationship_types_lookup",
                """
                CREATE INDEX idx_relationship_types_lookup
                ON relationship_types(parent_type, child_id, parent_id)
                """,
            )

            elapsed = time.monotonic() - start
            logger.info(f"init_db - create tables and indices: elapsed={elapsed:.2f}")

            # Analyze tables to help query planner
            # NOTE(rkuo): skip ANALYZE - it takes too long and we likely don't have
            # complicated queries that need this
            # start = time.monotonic()
            # cursor.execute("ANALYZE relationships")
            # cursor.execute("ANALYZE salesforce_objects")
            # cursor.execute("ANALYZE relationship_types")
            # cursor.execute("ANALYZE user_email_map")
            # elapsed = time.monotonic() - start
            # logger.info(f"init_db - analyze: elapsed={elapsed:.2f}")

            # If database already existed but user_email_map needs to be populated
            start = time.monotonic()
            cursor.execute("SELECT COUNT(*) FROM user_email_map")
            elapsed = time.monotonic() - start
            logger.info(f"init_db - count user_email_map: elapsed={elapsed:.2f}")

            start = time.monotonic()
            if cursor.fetchone()[0] == 0:
                OnyxSalesforceSQLite._update_user_email_map(cursor)
            elapsed = time.monotonic() - start
            logger.info(f"init_db - update_user_email_map: elapsed={elapsed:.2f}")

    def get_user_id_by_email(self, email: str) -> str | None:
        """Get the Salesforce User ID for a given email address.

        Args:
            email: The email address to look up

        Returns:
            A tuple of (was_found, user_id):
                - was_found: True if the email exists in the table, False if not found
                - user_id: The Salesforce User ID if exists, None otherwise
        """
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        with self._conn:
            cursor = self._conn.cursor()
            cursor.execute(
                "SELECT user_id FROM user_email_map WHERE email = ?", (email,)
            )
            result = cursor.fetchone()
            if result is None:
                return None
            return result[0]

    def update_email_to_id_table(self, email: str, id: str | None) -> None:
        """Update the email to ID map table with a new email and ID."""
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        id_to_use = id or self.NULL_ID_STRING
        with self._conn:
            cursor = self._conn.cursor()
            cursor.execute(
                "INSERT OR REPLACE INTO user_email_map (email, user_id) VALUES (?, ?)",
                (email, id_to_use),
            )

    def log_stats(self) -> None:
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        with self._conn:
            cache_pages = self._conn.execute("PRAGMA cache_size").fetchone()[0]
            page_size = self._conn.execute("PRAGMA page_size").fetchone()[0]
            if cache_pages >= 0:
                cache_bytes = cache_pages * page_size
            else:
                cache_bytes = abs(cache_pages * 1024)
            logger.info(
                f"SQLite stats: sqlite_version={sqlite3.sqlite_version} "
                f"cache_pages={cache_pages} "
                f"page_size={page_size} "
                f"cache_bytes={cache_bytes}"
            )

    # get_changed_parent_ids_by_type_2 replaces this
    def get_changed_parent_ids_by_type(
        self,
        changed_ids: list[str],
        parent_types: set[str],
        batch_size: int = 500,
    ) -> Iterator[tuple[str, str, int]]:
        """Get IDs of objects that are of the specified parent types and are either in the
        updated_ids or have children in the updated_ids. Yields tuples of (parent_type, affected_ids, num_examined).

        NOTE(rkuo): This function used to have some interesting behavior ... it created batches of id's
        and yielded back a list once for each parent type within that batch.

        There's no need to expose the details of the internal batching to the caller, so
        we're now yielding once per changed parent.
        """
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        updated_parent_ids: set[str] = (
            set()
        )  # dedupes parent id's that have already been yielded

        # SQLite typically has a limit of 999 variables
        num_examined = 0
        updated_ids_batches = batch_list(changed_ids, batch_size)

        with self._conn:
            cursor = self._conn.cursor()

            for batch_ids in updated_ids_batches:
                num_examined += len(batch_ids)

                batch_ids = list(set(batch_ids) - updated_parent_ids)
                if not batch_ids:
                    continue
                id_placeholders = ",".join(["?" for _ in batch_ids])

                for parent_type in parent_types:
                    affected_ids: set[str] = set()

                    # Get directly updated objects of parent types - using index on object_type
                    cursor.execute(
                        f"""
                        SELECT id FROM salesforce_objects
                        WHERE id IN ({id_placeholders})
                        AND object_type = ?
                        """,
                        batch_ids + [parent_type],
                    )
                    affected_ids.update(row[0] for row in cursor.fetchall())

                    # Get parent objects of updated objects - using optimized relationship_types table
                    cursor.execute(
                        f"""
                        SELECT DISTINCT parent_id
                        FROM relationship_types
                        INDEXED BY idx_relationship_types_lookup
                        WHERE parent_type = ?
                        AND child_id IN ({id_placeholders})
                        """,
                        [parent_type] + batch_ids,
                    )
                    affected_ids.update(row[0] for row in cursor.fetchall())

                    # Remove any parent IDs that have already been processed
                    newly_affected_ids = affected_ids - updated_parent_ids
                    # Add the new affected IDs to the set of updated parent IDs
                    if newly_affected_ids:
                        # Yield each newly affected ID individually
                        for parent_id in newly_affected_ids:
                            yield parent_type, parent_id, num_examined

                        updated_parent_ids.update(newly_affected_ids)

    def get_changed_parent_ids_by_type_2(
        self,
        changed_ids: dict[str, str],
        parent_types: set[str],
        parent_relationship_fields_by_type: dict[str, dict[str, list[str]]],
        prefix_to_type: dict[str, str],
    ) -> Iterator[tuple[str, str, int]]:
        """
        This function yields back any changed parent id's based on
        a relationship lookup.

        Yields tuples of (changed_id, parent_type, num_examined)
        changed_id is the id of the changed parent record
        parent_type is the object table/type of the id (based on a prefix lookup)
        num_examined is an integer which signifies our progress through the changed_id's dict

        changed_ids is a list of all id's that changed, both parent and children.
        parent

        This is much simpler than get_changed_parent_ids_by_type.

        TODO(rkuo): for common entities, the first 3 chars identify the object type
        see https://help.salesforce.com/s/articleView?id=000385203&type=1
        """
        changed_parent_ids: set[str] = (
            set()
        )  # dedupes parent id's that have already been yielded

        # SQLite typically has a limit of 999 variables
        num_examined = 0

        for changed_id, changed_type in changed_ids.items():
            num_examined += 1

            # if we yielded this id already, continue
            if changed_id in changed_parent_ids:
                continue

            # if this id is a parent type, yield it directly
            if changed_type in parent_types:
                yield changed_id, changed_type, num_examined
                changed_parent_ids.add(changed_id)
                continue

            # if this id is a child type, then check the columns
            # that relate it to the parent id and yield those ids
            # NOTE: Although unlikely, id's yielded in this way may not be of the
            # type we're interested in, so the caller must be prepared
            # for the id to not be present

            # get the child id record
            sf_object = self.get_record(changed_id, changed_type)
            if not sf_object:
                continue

            # get the fields that contain parent id's
            parent_relationship_fields = parent_relationship_fields_by_type[
                changed_type
            ]
            for field_name, _ in parent_relationship_fields.items():
                if field_name not in sf_object.data:
                    logger.warning(f"{field_name=} not in data for {changed_type=}!")
                    continue

                parent_id = cast(str, sf_object.data[field_name])
                parent_id_prefix = parent_id[:3]

                if parent_id_prefix not in prefix_to_type:
                    logger.warning(
                        f"Could not lookup type for prefix: {parent_id_prefix=}"
                    )
                    continue

                parent_type = prefix_to_type[parent_id_prefix]
                if parent_type not in parent_types:
                    continue

                yield parent_id, parent_type, num_examined
                changed_parent_ids.add(parent_id)
                break

    def object_type_count(self, object_type: str) -> int:
        """Check if there is at least one object of the specified type in the database.

        Args:
            object_type: The Salesforce object type to check

        Returns:
            bool: True if at least one object exists, False otherwise
        """
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        with self._conn:
            cursor = self._conn.cursor()
            cursor.execute(
                "SELECT COUNT(*) FROM salesforce_objects WHERE object_type = ?",
                (object_type,),
            )
            count = cursor.fetchone()[0]
            return count

    @staticmethod
    def normalize_record(
        original_record: dict[str, Any],
        remove_ids: bool = True,
    ) -> tuple[dict[str, Any], set[str]]:
        """Takes a dict of field names to values and removes fields
        we don't want.

        This means most parent id field's and any fields with null values.

        Return a json string and a list of parent_id's in the record.
        """
        parent_ids: set[str] = set()
        fields_to_remove: set[str] = set()

        record = original_record.copy()

        for field, value in record.items():
            # remove empty fields
            if not value:
                fields_to_remove.add(field)
                continue

            if field == "attributes":
                fields_to_remove.add(field)
                continue

            # remove salesforce id's (and add to parent id set)
            if (
                field != ID_FIELD
                and isinstance(value, str)
                and validate_salesforce_id(value)
            ):
                parent_ids.add(value)
                if remove_ids:
                    fields_to_remove.add(field)
                continue

            # this field is real data, leave it alone

        # Remove unwanted fields
        for field in fields_to_remove:
            if field != "LastModifiedById":
                del record[field]

        return record, parent_ids

    def update_from_csv(
        self, object_type: str, csv_download_path: str, remove_ids: bool = True
    ) -> list[str]:
        """Update the SF DB with a CSV file using SQLite storage."""
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        # some customers need this to be larger than the default 128KB, go with 16MB
        csv.field_size_limit(16 * 1024 * 1024)

        updated_ids = []

        with self._conn:
            cursor = self._conn.cursor()

            with open(csv_download_path, "r", newline="", encoding="utf-8") as f:
                reader = csv.DictReader(f)
                uncommitted_rows = 0
                for row in reader:
                    if ID_FIELD not in row:
                        logger.warning(
                            f"Row {row} does not have an {ID_FIELD} field in {csv_download_path}"
                        )
                        continue

                    row_id = row[ID_FIELD]

                    normalized_record, parent_ids = (
                        OnyxSalesforceSQLite.normalize_record(row, remove_ids)
                    )
                    normalized_record_json_str = json.dumps(normalized_record)

                    # Update main object data
                    # NOTE(rkuo): looks like we take a list and dump it as json into the db
                    cursor.execute(
                        """
                        INSERT OR REPLACE INTO salesforce_objects (id, object_type, data)
                        VALUES (?, ?, ?)
                        """,
                        (row_id, object_type, normalized_record_json_str),
                    )

                    # Update relationships using the same connection
                    OnyxSalesforceSQLite._update_relationship_tables(
                        cursor, row_id, parent_ids
                    )
                    updated_ids.append(row_id)

                    # periodically commit or else memory will balloon
                    uncommitted_rows += 1
                    if uncommitted_rows >= 1024:
                        self._conn.commit()
                        uncommitted_rows = 0

            # If we're updating User objects, update the email map
            if object_type == USER_OBJECT_TYPE:
                OnyxSalesforceSQLite._update_user_email_map(cursor)

        return updated_ids

    def get_child_ids(self, parent_id: str) -> set[str]:
        """Get all child IDs for a given parent ID."""
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        with self._conn:
            cursor = self._conn.cursor()

            # Force index usage with INDEXED BY
            cursor.execute(
                "SELECT child_id FROM relationships INDEXED BY idx_parent_id WHERE parent_id = ?",
                (parent_id,),
            )
            child_ids = {row[0] for row in cursor.fetchall()}
        return child_ids

    def get_type_from_id(self, object_id: str) -> str | None:
        """Get the type of an object from its ID."""
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        with self._conn:
            cursor = self._conn.cursor()
            cursor.execute(
                "SELECT object_type FROM salesforce_objects WHERE id = ?", (object_id,)
            )
            result = cursor.fetchone()
            if not result:
                logger.warning(f"Object ID {object_id} not found")
                return None
            return result[0]

    def get_record(
        self, object_id: str, object_type: str | None = None, isChild: bool = False
    ) -> SalesforceObject | None:
        """Retrieve the record and return it as a SalesforceObject."""
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        if object_type is None:
            object_type = self.get_type_from_id(object_id)
            if not object_type:
                return None

        with self._conn:
            cursor = self._conn.cursor()
            # Get the object data and account data
            if object_type == ACCOUNT_OBJECT_TYPE or isChild:
                cursor.execute(
                    "SELECT data FROM salesforce_objects WHERE id = ?", (object_id,)
                )
            else:
                cursor.execute(
                    "SELECT pso.data, r.parent_id as parent_id, sso.object_type FROM salesforce_objects pso \
                        LEFT JOIN relationships r on r.child_id = pso.id \
                        LEFT JOIN salesforce_objects sso on r.parent_id = sso.id \
                        WHERE pso.id = ? ",
                    (object_id,),
                )
            result = cursor.fetchall()
            if not result:
                logger.warning(f"Object ID {object_id} not found")
                return None

            data = json.loads(result[0][0])

            if object_type != ACCOUNT_OBJECT_TYPE:
                # convert any account ids of the relationships back into data fields, with name
                for row in result:
                    # the following skips Account objects.
                    if len(row) < 3:
                        continue

                    if row[1] and row[2] and row[2] == ACCOUNT_OBJECT_TYPE:
                        data["AccountId"] = row[1]
                        cursor.execute(
                            "SELECT data FROM salesforce_objects WHERE id = ?",
                            (row[1],),
                        )
                        account_data = json.loads(cursor.fetchone()[0])
                        data[ACCOUNT_OBJECT_TYPE] = account_data.get(NAME_FIELD, "")

            return SalesforceObject(id=object_id, type=object_type, data=data)

    def find_ids_by_type(self, object_type: str) -> list[str]:
        """Find all object IDs for rows of the specified type."""
        if self._conn is None:
            raise RuntimeError("Database connection is closed")

        with self._conn:
            cursor = self._conn.cursor()
            cursor.execute(
                "SELECT id FROM salesforce_objects WHERE object_type = ?",
                (object_type,),
            )
            return [row[0] for row in cursor.fetchall()]

    @staticmethod
    def _update_relationship_tables(
        cursor: sqlite3.Cursor, child_id: str, parent_ids: set[str]
    ) -> None:
        """Given a child id and a set of parent id's, updates the
        relationships of the child to the parents in the db and removes old relationships.

        Args:
            conn: The database connection to use (must be in a transaction)
            child_id: The ID of the child record
            parent_ids: Set of parent IDs to link to
        """

        try:
            # Get existing parent IDs
            cursor.execute(
                "SELECT parent_id FROM relationships WHERE child_id = ?", (child_id,)
            )
            old_parent_ids = {row[0] for row in cursor.fetchall()}

            # Calculate differences
            parent_ids_to_remove = old_parent_ids - parent_ids
            parent_ids_to_add = parent_ids - old_parent_ids

            # Remove old relationships
            if parent_ids_to_remove:
                cursor.executemany(
                    "DELETE FROM relationships WHERE child_id = ? AND parent_id = ?",
                    [(child_id, parent_id) for parent_id in parent_ids_to_remove],
                )
                # Also remove from relationship_types
                cursor.executemany(
                    "DELETE FROM relationship_types WHERE child_id = ? AND parent_id = ?",
                    [(child_id, parent_id) for parent_id in parent_ids_to_remove],
                )

            # Add new relationships
            if parent_ids_to_add:
                # First add to relationships table
                cursor.executemany(
                    "INSERT INTO relationships (child_id, parent_id) VALUES (?, ?)",
                    [(child_id, parent_id) for parent_id in parent_ids_to_add],
                )

                # Then get the types of the parent objects and add to relationship_types
                for parent_id in parent_ids_to_add:
                    cursor.execute(
                        "SELECT object_type FROM salesforce_objects WHERE id = ?",
                        (parent_id,),
                    )
                    result = cursor.fetchone()
                    if result:
                        parent_type = result[0]
                        cursor.execute(
                            """
                            INSERT INTO relationship_types (child_id, parent_id, parent_type)
                            VALUES (?, ?, ?)
                            """,
                            (child_id, parent_id, parent_type),
                        )

        except Exception:
            logger.exception(
                f"Error updating relationship tables: child_id={child_id} parent_ids={parent_ids}"
            )
            raise

    @staticmethod
    def _update_user_email_map(cursor: sqlite3.Cursor) -> None:
        """Update the user_email_map table with current User objects.
        Called internally by update_sf_db_with_csv when User objects are updated.
        """

        cursor.execute(
            """
            INSERT OR REPLACE INTO user_email_map (email, user_id)
            SELECT json_extract(data, '$.Email'), id
            FROM salesforce_objects
            WHERE object_type = 'User'
            AND json_extract(data, '$.Email') IS NOT NULL
            """
        )

    def make_basic_expert_info_from_record(
        self,
        sf_object: SalesforceObject,
    ) -> BasicExpertInfo | None:
        """Parses record for LastModifiedById and returns BasicExpertInfo
        of the user if possible."""
        object_dict: dict[str, Any] = sf_object.data
        if not (last_modified_by_id := object_dict.get("LastModifiedById")):
            logger.warning(f"No LastModifiedById found for {sf_object.id}")
            return None
        if not (last_modified_by := self.get_record(last_modified_by_id)):
            logger.warning(f"No LastModifiedBy found for {last_modified_by_id}")
            return None

        try:
            expert_info = BasicExpertInfo.from_dict(last_modified_by.data)
        except Exception:
            return None

        return expert_info


================================================
FILE: backend/onyx/connectors/salesforce/utils.py
================================================
import os
from dataclasses import dataclass
from typing import Any

NAME_FIELD = "Name"
MODIFIED_FIELD = "LastModifiedDate"
ID_FIELD = "Id"
ACCOUNT_OBJECT_TYPE = "Account"
USER_OBJECT_TYPE = "User"


@dataclass
class SalesforceObject:
    id: str
    type: str
    data: dict[str, Any]

    def to_dict(self) -> dict[str, Any]:
        return {
            "ID": self.id,
            "Type": self.type,
            "Data": self.data,
        }

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "SalesforceObject":
        return cls(
            id=data[ID_FIELD],
            type=data["Type"],
            data=data,
        )


# This defines the base path for all data files relative to this file
# AKA BE CAREFUL WHEN MOVING THIS FILE
BASE_DATA_PATH = os.path.join(os.path.dirname(__file__), "data")


def get_sqlite_db_path(directory: str) -> str:
    """Get the path to the sqlite db file."""
    return os.path.join(directory, "salesforce_db.sqlite")


def remove_sqlite_db_files(db_path: str) -> None:
    """Remove SQLite database and all associated files (WAL, SHM).

    SQLite in WAL mode creates additional files:
    - .sqlite-wal: Write-ahead log
    - .sqlite-shm: Shared memory file

    If these files become stale (e.g., after a crash), they can cause
    'disk I/O error' when trying to open the database. This function
    ensures all related files are removed.
    """
    files_to_remove = [
        db_path,
        f"{db_path}-wal",
        f"{db_path}-shm",
    ]
    for file_path in files_to_remove:
        if os.path.exists(file_path):
            os.remove(file_path)


# NOTE: only used with shelves, deprecated at this point
def get_object_type_path(object_type: str) -> str:
    """Get the directory path for a specific object type."""
    type_dir = os.path.join(BASE_DATA_PATH, object_type)
    os.makedirs(type_dir, exist_ok=True)
    return type_dir


_CHECKSUM_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"
_LOOKUP = {format(i, "05b"): _CHECKSUM_CHARS[i] for i in range(32)}


def validate_salesforce_id(salesforce_id: str) -> bool:
    """Validate the checksum portion of an 18-character Salesforce ID.

    Args:
        salesforce_id: An 18-character Salesforce ID

    Returns:
        bool: True if the checksum is valid, False otherwise
    """
    if len(salesforce_id) != 18:
        return False

    chunks = [salesforce_id[0:5], salesforce_id[5:10], salesforce_id[10:15]]

    checksum = salesforce_id[15:18]
    calculated_checksum = ""

    for chunk in chunks:
        result_string = "".join(
            "1" if char.isupper() else "0" for char in reversed(chunk)
        )
        calculated_checksum += _LOOKUP[result_string]

    return checksum == calculated_checksum


================================================
FILE: backend/onyx/connectors/sharepoint/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/sharepoint/connector.py
================================================
import base64
import copy
import fnmatch
import html
import io
import os
import re
import time
from collections import deque
from collections.abc import Generator
from collections.abc import Iterable
from datetime import datetime
from datetime import timezone
from enum import Enum
from typing import Any
from typing import cast
from urllib.parse import quote
from urllib.parse import unquote
from urllib.parse import urlsplit

import msal  # type: ignore[import-untyped]
import requests
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.serialization import pkcs12
from office365.graph_client import GraphClient  # type: ignore[import-untyped]
from office365.onedrive.driveitems.driveItem import DriveItem  # type: ignore[import-untyped]
from office365.onedrive.sites.site import Site  # type: ignore[import-untyped]
from office365.onedrive.sites.sites_with_root import SitesWithRoot  # type: ignore[import-untyped]
from office365.runtime.auth.token_response import TokenResponse  # type: ignore[import-untyped]
from office365.runtime.client_request import ClientRequestException  # type: ignore
from office365.runtime.paths.resource_path import ResourcePath  # type: ignore[import-untyped]
from office365.runtime.queries.client_query import ClientQuery  # type: ignore[import-untyped]
from office365.sharepoint.client_context import ClientContext  # type: ignore[import-untyped]
from pydantic import BaseModel
from pydantic import Field
from requests.exceptions import HTTPError

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS
from onyx.configs.app_configs import SHAREPOINT_CONNECTOR_SIZE_THRESHOLD
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FileOrigin
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import IndexingHeartbeatInterface
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.microsoft_graph_env import resolve_microsoft_environment
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import EntityFailure
from onyx.connectors.models import ExternalAccess
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.connectors.sharepoint.connector_utils import get_sharepoint_external_access
from onyx.db.enums import HierarchyNodeType
from onyx.file_processing.extract_file_text import extract_text_and_images
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.file_types import OnyxMimeTypes
from onyx.file_processing.image_utils import store_image_and_create_section
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger

logger = setup_logger()
SLIM_BATCH_SIZE = 1000
_EPOCH = datetime.fromtimestamp(0, tz=timezone.utc)


SHARED_DOCUMENTS_MAP = {
    "Documents": "Shared Documents",
    "Dokumente": "Freigegebene Dokumente",
    "Documentos": "Documentos compartidos",
}
SHARED_DOCUMENTS_MAP_REVERSE = {v: k for k, v in SHARED_DOCUMENTS_MAP.items()}

ASPX_EXTENSION = ".aspx"


def _is_site_excluded(site_url: str, excluded_site_patterns: list[str]) -> bool:
    """Check if a site URL matches any of the exclusion glob patterns."""
    for pattern in excluded_site_patterns:
        if fnmatch.fnmatch(site_url, pattern) or fnmatch.fnmatch(
            site_url.rstrip("/"), pattern.rstrip("/")
        ):
            return True
    return False


def _is_path_excluded(item_path: str, excluded_path_patterns: list[str]) -> bool:
    """Check if a drive item path matches any of the exclusion glob patterns.

    item_path is the relative path within a drive, e.g. "Engineering/API/report.docx".
    Matches are attempted against the full path and the filename alone so that
    patterns like "*.tmp" match files at any depth.
    """
    filename = item_path.rsplit("/", 1)[-1] if "/" in item_path else item_path
    for pattern in excluded_path_patterns:
        if fnmatch.fnmatch(item_path, pattern) or fnmatch.fnmatch(filename, pattern):
            return True
    return False


def _build_item_relative_path(parent_reference_path: str | None, item_name: str) -> str:
    """Build the relative path of a drive item from its parentReference.path and name.

    Example: parentReference.path="/drives/abc/root:/Eng/API", name="report.docx"
    => "Eng/API/report.docx"
    """
    if parent_reference_path and "root:/" in parent_reference_path:
        folder = unquote(parent_reference_path.split("root:/", 1)[1])
        if folder:
            return f"{folder}/{item_name}"
    return item_name


DEFAULT_AUTHORITY_HOST = "https://login.microsoftonline.com"
DEFAULT_GRAPH_API_HOST = "https://graph.microsoft.com"
DEFAULT_SHAREPOINT_DOMAIN_SUFFIX = "sharepoint.com"

GRAPH_API_BASE = f"{DEFAULT_GRAPH_API_HOST}/v1.0"
GRAPH_API_MAX_RETRIES = 5
GRAPH_API_RETRYABLE_STATUSES = frozenset({429, 500, 502, 503, 504})


class DriveItemData(BaseModel):
    """Lightweight representation of a Graph API drive item, parsed from JSON.

    Replaces the SDK DriveItem for fetching/listing so that we can paginate
    lazily through the Graph API without materialising every item in memory.
    """

    id: str
    name: str
    web_url: str
    size: int | None = None
    mime_type: str | None = None
    download_url: str | None = None
    last_modified_datetime: datetime | None = None
    last_modified_by_display_name: str | None = None
    last_modified_by_email: str | None = None
    parent_reference_path: str | None = None
    drive_id: str | None = None

    @classmethod
    def from_graph_json(cls, item: dict[str, Any]) -> "DriveItemData":
        last_mod_raw = item.get("lastModifiedDateTime")
        last_mod: datetime | None = None
        if isinstance(last_mod_raw, str):
            last_mod = datetime.fromisoformat(last_mod_raw.replace("Z", "+00:00"))

        last_modified_by = item.get("lastModifiedBy", {}).get("user", {})
        parent_ref = item.get("parentReference", {})

        return cls(
            id=item["id"],
            name=item.get("name", ""),
            web_url=item.get("webUrl", ""),
            size=item.get("size"),
            mime_type=item.get("file", {}).get("mimeType"),
            download_url=item.get("@microsoft.graph.downloadUrl"),
            last_modified_datetime=last_mod,
            last_modified_by_display_name=last_modified_by.get("displayName"),
            last_modified_by_email=(
                last_modified_by.get("email")
                or last_modified_by.get("userPrincipalName")
            ),
            parent_reference_path=parent_ref.get("path"),
            drive_id=parent_ref.get("driveId"),
        )

    def to_sdk_driveitem(self, graph_client: GraphClient) -> DriveItem:
        """Construct a lazy SDK DriveItem for permission lookups."""
        if not self.drive_id:
            raise ValueError("drive_id is required to construct SDK DriveItem")
        path = ResourcePath(
            self.id,
            ResourcePath("items", ResourcePath(self.drive_id, ResourcePath("drives"))),
        )
        item = DriveItem(graph_client, path)
        item.set_property("id", self.id)
        return item


# The office365 library's ClientContext caches the access token from its
# first request and never re-invokes the token callback.  Microsoft access
# tokens live ~60-75 minutes, so we recreate the cached ClientContext every
# 30 minutes to let MSAL transparently handle token refresh.
_REST_CTX_MAX_AGE_S = 30 * 60


class SiteDescriptor(BaseModel):
    """Data class for storing SharePoint site information.

    Args:
        url: The base site URL (e.g. https://danswerai.sharepoint.com/sites/sharepoint-tests
             or https://danswerai.sharepoint.com/teams/team-name)
        drive_name: The name of the drive to access (e.g. "Shared Documents", "Other Library")
                   If None, all drives will be accessed.
        folder_path: The folder path within the drive to access (e.g. "test/nested with spaces")
                    If None, all folders will be accessed.
    """

    url: str
    drive_name: str | None
    folder_path: str | None


class CertificateData(BaseModel):
    """Data class for storing certificate information loaded from PFX file."""

    private_key: bytes
    thumbprint: str


def _site_page_in_time_window(
    page: dict[str, Any],
    start: datetime | None,
    end: datetime | None,
) -> bool:
    """Return True if the page's lastModifiedDateTime falls within [start, end]."""
    if start is None and end is None:
        return True
    raw = page.get("lastModifiedDateTime")
    if not raw:
        return True
    if not isinstance(raw, str):
        raise ValueError(f"lastModifiedDateTime is not a string: {raw}")
    last_modified = datetime.fromisoformat(raw.replace("Z", "+00:00"))
    return (start is None or last_modified >= start) and (
        end is None or last_modified <= end
    )


def sleep_and_retry(
    query_obj: ClientQuery, method_name: str, max_retries: int = 3
) -> Any:
    """
    Execute a SharePoint query with retry logic for rate limiting.
    """
    for attempt in range(max_retries + 1):
        try:
            return query_obj.execute_query()
        except ClientRequestException as e:
            status = e.response.status_code if e.response is not None else None

            # 429 / 503 — rate limit or transient error.  Back off and retry.
            if status in (429, 503) and attempt < max_retries:
                logger.warning(
                    f"Rate limit exceeded on {method_name}, attempt {attempt + 1}/{max_retries + 1}, sleeping and retrying"
                )
                retry_after = e.response.headers.get("Retry-After")
                if retry_after:
                    sleep_time = int(retry_after)
                else:
                    # Exponential backoff: 2^attempt * 5 seconds
                    sleep_time = min(30, (2**attempt) * 5)

                logger.info(f"Sleeping for {sleep_time} seconds before retry")
                time.sleep(sleep_time)
                continue

            # Non-retryable error or retries exhausted — log details and raise.
            if e.response is not None:
                logger.error(
                    f"SharePoint request failed for {method_name}: status={status}, "
                )
            raise e


class SharepointConnectorCheckpoint(ConnectorCheckpoint):
    cached_site_descriptors: deque[SiteDescriptor] | None = None
    current_site_descriptor: SiteDescriptor | None = None

    cached_drive_names: deque[str] | None = None
    current_drive_name: str | None = None
    # Drive's web_url from the API - used as raw_node_id for DRIVE hierarchy nodes
    current_drive_web_url: str | None = None
    # Resolved drive ID — avoids re-resolving on checkpoint resume
    current_drive_id: str | None = None
    # Next delta API page URL for per-page checkpointing within a drive.
    # When set, Phase 3b fetches one page at a time so progress is persisted
    # between pages.  None means BFS path or no active delta traversal.
    current_drive_delta_next_link: str | None = None

    process_site_pages: bool = False

    # Track yielded hierarchy nodes by their raw_node_id (URLs) to avoid duplicates
    seen_hierarchy_node_raw_ids: set[str] = Field(default_factory=set)

    # Track yielded document IDs to avoid processing the same document twice.
    # The Microsoft Graph delta API can return the same item on multiple pages.
    seen_document_ids: set[str] = Field(default_factory=set)


class SharepointAuthMethod(Enum):
    CLIENT_SECRET = "client_secret"
    CERTIFICATE = "certificate"


class SizeCapExceeded(Exception):
    """Exception raised when the size cap is exceeded."""


def _log_and_raise_for_status(response: requests.Response) -> None:
    """Log the response text and raise for status."""
    try:
        response.raise_for_status()
    except Exception:
        logger.error(f"HTTP request failed: {response.text}")
        raise


GRAPH_INVALID_REQUEST_CODE = "invalidRequest"


def _is_graph_invalid_request(response: requests.Response) -> bool:
    """Return True if the response body is the generic Graph API
    ``{"error": {"code": "invalidRequest", "message": "Invalid request"}}``
    shape. This particular error has no actionable inner error code and is
    returned by the site-pages endpoint when a page has a corrupt canvas layout
    (e.g. duplicate web-part IDs — see SharePoint/sp-dev-docs#8822)."""
    try:
        body = response.json()
    except Exception:
        return False
    error = body.get("error", {})
    return error.get("code") == GRAPH_INVALID_REQUEST_CODE


def load_certificate_from_pfx(pfx_data: bytes, password: str) -> CertificateData | None:
    """Load certificate from .pfx file for MSAL authentication"""
    try:
        # Load the certificate and private key
        private_key, certificate, additional_certificates = (
            pkcs12.load_key_and_certificates(pfx_data, password.encode("utf-8"))
        )

        # Validate that certificate and private key are not None
        if certificate is None or private_key is None:
            raise ValueError("Certificate or private key is None")

        # Convert to PEM format that MSAL expects
        key_pem = private_key.private_bytes(
            encoding=serialization.Encoding.PEM,
            format=serialization.PrivateFormat.PKCS8,
            encryption_algorithm=serialization.NoEncryption(),
        )

        return CertificateData(
            private_key=key_pem,
            thumbprint=certificate.fingerprint(hashes.SHA1()).hex(),
        )
    except Exception as e:
        logger.error(f"Error loading certificate: {e}")
        return None


def acquire_token_for_rest(
    msal_app: msal.ConfidentialClientApplication,
    sp_tenant_domain: str,
    sharepoint_domain_suffix: str,
) -> TokenResponse:
    token = msal_app.acquire_token_for_client(
        scopes=[f"https://{sp_tenant_domain}.{sharepoint_domain_suffix}/.default"]
    )
    return TokenResponse.from_json(token)


def _create_document_failure(
    driveitem: DriveItemData,
    error_message: str,
    exception: Exception | None = None,
) -> ConnectorFailure:
    """Helper method to create a ConnectorFailure for document processing errors."""
    return ConnectorFailure(
        failed_document=DocumentFailure(
            document_id=driveitem.id or "unknown",
            document_link=driveitem.web_url,
        ),
        failure_message=f"SharePoint document '{driveitem.name or 'unknown'}': {error_message}",
        exception=exception,
    )


def _create_entity_failure(
    entity_id: str,
    error_message: str,
    time_range: tuple[datetime, datetime] | None = None,
    exception: Exception | None = None,
) -> ConnectorFailure:
    """Helper method to create a ConnectorFailure for entity-level errors."""
    return ConnectorFailure(
        failed_entity=EntityFailure(
            entity_id=entity_id,
            missed_time_range=time_range,
        ),
        failure_message=f"SharePoint entity '{entity_id}': {error_message}",
        exception=exception,
    )


def _probe_remote_size(url: str, timeout: int) -> int | None:
    """Determine remote size using HEAD or a range GET probe. Returns None if unknown."""
    try:
        head_resp = requests.head(url, timeout=timeout, allow_redirects=True)
        _log_and_raise_for_status(head_resp)
        cl = head_resp.headers.get("Content-Length")
        if cl and cl.isdigit():
            return int(cl)
    except requests.RequestException:
        pass

    # Fallback: Range request for first byte to read total from Content-Range
    try:
        with requests.get(
            url,
            headers={"Range": "bytes=0-0"},
            timeout=timeout,
            stream=True,
        ) as range_resp:
            _log_and_raise_for_status(range_resp)
            cr = range_resp.headers.get("Content-Range")  # e.g., "bytes 0-0/12345"
            if cr and "/" in cr:
                total = cr.split("/")[-1]
                if total.isdigit():
                    return int(total)
    except requests.RequestException:
        pass

    # If both HEAD and a range GET failed to reveal a size, signal unknown size.
    # Callers should treat None as "size unavailable" and proceed with a safe
    # streaming path that enforces a hard cap to avoid excessive memory usage.
    return None


def _download_with_cap(url: str, timeout: int, cap: int) -> bytes:
    """Stream download content with an upper bound on bytes read.

    Behavior:
    - Checks `Content-Length` first and aborts early if it exceeds `cap`.
    - Otherwise streams the body in chunks and stops once `cap` is surpassed.
    - Raises `SizeCapExceeded` when the cap would be exceeded.
    - Returns the full bytes if the content fits within `cap`.
    """
    with requests.get(url, stream=True, timeout=timeout) as resp:
        _log_and_raise_for_status(resp)

        # If the server provides Content-Length, prefer an early decision.
        cl_header = resp.headers.get("Content-Length")
        if cl_header and cl_header.isdigit():
            content_len = int(cl_header)
            if content_len > cap:
                logger.warning(
                    f"Content-Length {content_len} exceeds cap {cap}; skipping download."
                )
                raise SizeCapExceeded("pre_download")

        buf = io.BytesIO()
        # Stream in 64KB chunks; adjust if needed for slower networks.
        for chunk in resp.iter_content(64 * 1024):
            if not chunk:
                continue
            buf.write(chunk)
            if buf.tell() > cap:
                # Avoid keeping a large partial buffer; close and signal caller to skip.
                logger.warning(
                    f"Streaming download exceeded cap {cap} bytes; aborting early."
                )
                raise SizeCapExceeded("during_download")

        return buf.getvalue()


def _download_via_graph_api(
    access_token: str,
    drive_id: str,
    item_id: str,
    bytes_allowed: int,
    graph_api_base: str,
) -> bytes:
    """Download a drive item via the Graph API /content endpoint with a byte cap.

    Raises SizeCapExceeded if the cap is exceeded.
    """
    url = f"{graph_api_base}/drives/{drive_id}/items/{item_id}/content"
    headers = {"Authorization": f"Bearer {access_token}"}
    with requests.get(
        url, headers=headers, stream=True, timeout=REQUEST_TIMEOUT_SECONDS
    ) as resp:
        _log_and_raise_for_status(resp)
        buf = io.BytesIO()
        for chunk in resp.iter_content(64 * 1024):
            if not chunk:
                continue
            buf.write(chunk)
            if buf.tell() > bytes_allowed:
                raise SizeCapExceeded("during_graph_api_download")
        return buf.getvalue()


def _convert_driveitem_to_document_with_permissions(
    driveitem: DriveItemData,
    drive_name: str,
    ctx: ClientContext | None,
    graph_client: GraphClient,
    graph_api_base: str,
    include_permissions: bool = False,
    parent_hierarchy_raw_node_id: str | None = None,
    access_token: str | None = None,
    treat_sharing_link_as_public: bool = False,
) -> Document | ConnectorFailure | None:

    if not driveitem.name or not driveitem.id:
        raise ValueError("DriveItem name/id is required")

    if include_permissions and ctx is None:
        raise ValueError("ClientContext is required for permissions")

    mime_type = driveitem.mime_type
    if not mime_type or mime_type in OnyxMimeTypes.EXCLUDED_IMAGE_TYPES:
        logger.debug(
            f"Skipping malformed or excluded mime type {mime_type} for {driveitem.name}"
        )
        return None

    file_size = driveitem.size
    download_url = driveitem.download_url

    if file_size is None and download_url:
        file_size = _probe_remote_size(download_url, REQUEST_TIMEOUT_SECONDS)

    if file_size is not None and file_size > SHAREPOINT_CONNECTOR_SIZE_THRESHOLD:
        logger.warning(
            f"Skipping '{driveitem.name}' over size threshold ({file_size} > {SHAREPOINT_CONNECTOR_SIZE_THRESHOLD} bytes)."
        )
        return None

    # Prefer downloadUrl streaming with size cap
    content_bytes: bytes | None = None
    if download_url:
        try:
            content_bytes = _download_with_cap(
                download_url,
                REQUEST_TIMEOUT_SECONDS,
                SHAREPOINT_CONNECTOR_SIZE_THRESHOLD,
            )
        except SizeCapExceeded as e:
            logger.warning(f"Skipping '{driveitem.name}' exceeded size cap: {str(e)}")
            return None
        except requests.RequestException as e:
            status = e.response.status_code if e.response is not None else -1
            logger.warning(
                f"Failed to download via downloadUrl for '{driveitem.name}' (status={status}); falling back to Graph API."
            )

    # Fallback: download via Graph API /content endpoint
    if content_bytes is None and access_token and driveitem.drive_id:
        try:
            content_bytes = _download_via_graph_api(
                access_token,
                driveitem.drive_id,
                driveitem.id,
                SHAREPOINT_CONNECTOR_SIZE_THRESHOLD,
                graph_api_base=graph_api_base,
            )
        except SizeCapExceeded:
            logger.warning(
                f"Skipping '{driveitem.name}' exceeded size cap during Graph API download."
            )
            return None
        except Exception as e:
            logger.warning(
                f"Failed to download via Graph API for '{driveitem.name}': {e}"
            )
            return _create_document_failure(
                driveitem, f"Failed to download via graph api: {e}", e
            )

    sections: list[TextSection | ImageSection] = []
    file_ext = get_file_ext(driveitem.name)

    if not content_bytes:
        logger.warning(
            f"Zero-length content for '{driveitem.name}'. Skipping text/image extraction."
        )
    elif file_ext in OnyxFileExtensions.IMAGE_EXTENSIONS:
        image_section, _ = store_image_and_create_section(
            image_data=content_bytes,
            file_id=driveitem.id,
            display_name=driveitem.name,
            file_origin=FileOrigin.CONNECTOR,
        )
        image_section.link = driveitem.web_url
        sections.append(image_section)
    else:

        def _store_embedded_image(img_data: bytes, img_name: str) -> None:
            try:
                img_mime = get_image_type_from_bytes(img_data)
            except ValueError:
                logger.debug(
                    "Skipping embedded image with unknown format for %s",
                    driveitem.name,
                )
                return

            if img_mime in OnyxMimeTypes.EXCLUDED_IMAGE_TYPES:
                logger.debug(
                    "Skipping embedded image of excluded type %s for %s",
                    img_mime,
                    driveitem.name,
                )
                return

            image_section, _ = store_image_and_create_section(
                image_data=img_data,
                file_id=f"{driveitem.id}_img_{len(sections)}",
                display_name=img_name or f"{driveitem.name} - image {len(sections)}",
                file_origin=FileOrigin.CONNECTOR,
            )
            image_section.link = driveitem.web_url
            sections.append(image_section)

        extraction_result = extract_text_and_images(
            file=io.BytesIO(content_bytes),
            file_name=driveitem.name,
            image_callback=_store_embedded_image,
        )
        if extraction_result.text_content:
            sections.append(
                TextSection(link=driveitem.web_url, text=extraction_result.text_content)
            )

    if include_permissions and ctx is not None:
        logger.info(f"Getting external access for {driveitem.name}")
        sdk_item = driveitem.to_sdk_driveitem(graph_client)
        external_access = get_sharepoint_external_access(
            ctx=ctx,
            graph_client=graph_client,
            drive_item=sdk_item,
            drive_name=drive_name,
            add_prefix=True,
            treat_sharing_link_as_public=treat_sharing_link_as_public,
        )
    else:
        external_access = ExternalAccess.empty()

    doc = Document(
        id=driveitem.id,
        sections=sections,
        source=DocumentSource.SHAREPOINT,
        semantic_identifier=driveitem.name,
        external_access=external_access,
        doc_updated_at=(
            driveitem.last_modified_datetime.replace(tzinfo=timezone.utc)
            if driveitem.last_modified_datetime
            else None
        ),
        primary_owners=[
            BasicExpertInfo(
                display_name=driveitem.last_modified_by_display_name or "",
                email=driveitem.last_modified_by_email or "",
            )
        ],
        metadata={"drive": drive_name},
        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
    )
    return doc


def _convert_sitepage_to_document(
    site_page: dict[str, Any],
    site_name: str | None,
    ctx: ClientContext | None,
    graph_client: GraphClient,
    include_permissions: bool = False,
    parent_hierarchy_raw_node_id: str | None = None,
    treat_sharing_link_as_public: bool = False,
) -> Document:
    """Convert a SharePoint site page to a Document object."""
    # Extract text content from the site page
    page_text = ""
    # Get title and description
    title = cast(str, site_page.get("title", ""))
    description = cast(str, site_page.get("description", ""))

    # Build the text content
    if title:
        page_text += f"# {title}\n\n"
    if description:
        page_text += f"{description}\n\n"

    # Extract content from canvas layout if available
    canvas_layout = site_page.get("canvasLayout", {})
    if canvas_layout:
        horizontal_sections = canvas_layout.get("horizontalSections", [])
        for section in horizontal_sections:
            columns = section.get("columns", [])
            for column in columns:
                webparts = column.get("webparts", [])
                for webpart in webparts:
                    # Extract text from different types of webparts
                    webpart_type = webpart.get("@odata.type", "")

                    # Extract text from text webparts
                    if webpart_type == "#microsoft.graph.textWebPart":
                        inner_html = webpart.get("innerHtml", "")
                        if inner_html:
                            # Basic HTML to text conversion
                            # Remove HTML tags but preserve some structure
                            text_content = re.sub(r"<br\s*/?>", "\n", inner_html)
                            text_content = re.sub(r"<li>", "• ", text_content)
                            text_content = re.sub(r"</li>", "\n", text_content)
                            text_content = re.sub(
                                r"<h[1-6][^>]*>", "\n## ", text_content
                            )
                            text_content = re.sub(r"</h[1-6]>", "\n", text_content)
                            text_content = re.sub(r"<p[^>]*>", "\n", text_content)
                            text_content = re.sub(r"</p>", "\n", text_content)
                            text_content = re.sub(r"<[^>]+>", "", text_content)
                            # Decode HTML entities
                            text_content = html.unescape(text_content)
                            # Clean up extra whitespace
                            text_content = re.sub(
                                r"\n\s*\n", "\n\n", text_content
                            ).strip()
                            if text_content:
                                page_text += f"{text_content}\n\n"

                    # Extract text from standard webparts
                    elif webpart_type == "#microsoft.graph.standardWebPart":
                        data = webpart.get("data", {})

                        # Extract from serverProcessedContent
                        server_content = data.get("serverProcessedContent", {})
                        searchable_texts = server_content.get(
                            "searchablePlainTexts", []
                        )

                        for text_item in searchable_texts:
                            if isinstance(text_item, dict):
                                key = text_item.get("key", "")
                                value = text_item.get("value", "")
                                if value:
                                    # Add context based on key
                                    if key == "title":
                                        page_text += f"## {value}\n\n"
                                    else:
                                        page_text += f"{value}\n\n"

                        # Extract description if available
                        description = data.get("description", "")
                        if description:
                            page_text += f"{description}\n\n"

                        # Extract title if available
                        webpart_title = data.get("title", "")
                        if webpart_title and webpart_title != description:
                            page_text += f"## {webpart_title}\n\n"

    page_text = page_text.strip()

    # If no content extracted, use the title as fallback
    if not page_text and title:
        page_text = title

    # Parse creation and modification info
    created_datetime = site_page.get("createdDateTime")
    if created_datetime:
        if isinstance(created_datetime, str):
            created_datetime = datetime.fromisoformat(
                created_datetime.replace("Z", "+00:00")
            )
        elif not created_datetime.tzinfo:
            created_datetime = created_datetime.replace(tzinfo=timezone.utc)

    last_modified_datetime = site_page.get("lastModifiedDateTime")
    if last_modified_datetime:
        if isinstance(last_modified_datetime, str):
            last_modified_datetime = datetime.fromisoformat(
                last_modified_datetime.replace("Z", "+00:00")
            )
        elif not last_modified_datetime.tzinfo:
            last_modified_datetime = last_modified_datetime.replace(tzinfo=timezone.utc)

    # Extract owner information
    primary_owners = []
    created_by = site_page.get("createdBy", {}).get("user", {})
    if created_by.get("displayName"):
        primary_owners.append(
            BasicExpertInfo(
                display_name=created_by.get("displayName"),
                email=created_by.get("email", ""),
            )
        )

    web_url = site_page["webUrl"]
    semantic_identifier = cast(str, site_page.get("name", title))
    if semantic_identifier.endswith(ASPX_EXTENSION):
        semantic_identifier = semantic_identifier[: -len(ASPX_EXTENSION)]

    if include_permissions:
        external_access = get_sharepoint_external_access(
            ctx=ctx,
            graph_client=graph_client,
            site_page=site_page,
            add_prefix=True,
            treat_sharing_link_as_public=treat_sharing_link_as_public,
        )
    else:
        external_access = ExternalAccess.empty()

    doc = Document(
        id=site_page["id"],
        sections=[TextSection(link=web_url, text=page_text)],
        source=DocumentSource.SHAREPOINT,
        external_access=external_access,
        semantic_identifier=semantic_identifier,
        doc_updated_at=last_modified_datetime or created_datetime,
        primary_owners=primary_owners,
        metadata=(
            {
                "site": site_name,
            }
            if site_name
            else {}
        ),
        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
    )
    return doc


def _convert_driveitem_to_slim_document(
    driveitem: DriveItemData,
    drive_name: str,
    ctx: ClientContext,
    graph_client: GraphClient,
    parent_hierarchy_raw_node_id: str | None = None,
    treat_sharing_link_as_public: bool = False,
) -> SlimDocument:
    if driveitem.id is None:
        raise ValueError("DriveItem ID is required")

    sdk_item = driveitem.to_sdk_driveitem(graph_client)
    external_access = get_sharepoint_external_access(
        ctx=ctx,
        graph_client=graph_client,
        drive_item=sdk_item,
        drive_name=drive_name,
        treat_sharing_link_as_public=treat_sharing_link_as_public,
    )

    return SlimDocument(
        id=driveitem.id,
        external_access=external_access,
        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
    )


def _convert_sitepage_to_slim_document(
    site_page: dict[str, Any],
    ctx: ClientContext | None,
    graph_client: GraphClient,
    parent_hierarchy_raw_node_id: str | None = None,
    treat_sharing_link_as_public: bool = False,
) -> SlimDocument:
    """Convert a SharePoint site page to a SlimDocument object."""
    if site_page.get("id") is None:
        raise ValueError("Site page ID is required")

    external_access = get_sharepoint_external_access(
        ctx=ctx,
        graph_client=graph_client,
        site_page=site_page,
        treat_sharing_link_as_public=treat_sharing_link_as_public,
    )
    id = site_page.get("id")
    if id is None:
        raise ValueError("Site page ID is required")
    return SlimDocument(
        id=id,
        external_access=external_access,
        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
    )


class SharepointConnector(
    SlimConnectorWithPermSync,
    CheckpointedConnectorWithPermSync[SharepointConnectorCheckpoint],
):
    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
        sites: list[str] = [],
        excluded_sites: list[str] = [],
        excluded_paths: list[str] = [],
        include_site_pages: bool = True,
        include_site_documents: bool = True,
        treat_sharing_link_as_public: bool = False,
        authority_host: str = DEFAULT_AUTHORITY_HOST,
        graph_api_host: str = DEFAULT_GRAPH_API_HOST,
        sharepoint_domain_suffix: str = DEFAULT_SHAREPOINT_DOMAIN_SUFFIX,
    ) -> None:
        self.batch_size = batch_size
        self.sites = list(sites)
        self.excluded_sites = [s for p in excluded_sites if (s := p.strip())]
        self.excluded_paths = [s for p in excluded_paths if (s := p.strip())]
        self.treat_sharing_link_as_public = treat_sharing_link_as_public
        self.site_descriptors: list[SiteDescriptor] = self._extract_site_and_drive_info(
            sites
        )
        self._graph_client: GraphClient | None = None
        self.msal_app: msal.ConfidentialClientApplication | None = None
        self.include_site_pages = include_site_pages
        self.include_site_documents = include_site_documents
        self.sp_tenant_domain: str | None = None
        self._credential_json: dict[str, Any] | None = None
        self._cached_rest_ctx: ClientContext | None = None
        self._cached_rest_ctx_url: str | None = None
        self._cached_rest_ctx_created_at: float = 0.0

        resolved_env = resolve_microsoft_environment(graph_api_host, authority_host)
        self._azure_environment = resolved_env.environment
        self.authority_host = resolved_env.authority_host
        self.graph_api_host = resolved_env.graph_host
        self.graph_api_base = f"{self.graph_api_host}/v1.0"
        self.sharepoint_domain_suffix = resolved_env.sharepoint_domain_suffix
        if sharepoint_domain_suffix != resolved_env.sharepoint_domain_suffix:
            logger.warning(
                f"Configured sharepoint_domain_suffix '{sharepoint_domain_suffix}' "
                f"differs from the expected suffix '{resolved_env.sharepoint_domain_suffix}' "
                f"for the {resolved_env.environment} environment. "
                f"Using '{resolved_env.sharepoint_domain_suffix}'."
            )

    def validate_connector_settings(self) -> None:
        # Validate that at least one content type is enabled
        if not self.include_site_documents and not self.include_site_pages:
            raise ConnectorValidationError(
                "At least one content type must be enabled. "
                "Please check either 'Include Site Documents' or 'Include Site Pages' (or both)."
            )

        # Ensure sites are sharepoint urls
        for site_url in self.sites:
            if not site_url.startswith("https://") or not (
                "/sites/" in site_url or "/teams/" in site_url
            ):
                raise ConnectorValidationError(
                    "Site URLs must be full Sharepoint URLs (e.g. https://your-tenant.sharepoint.com/sites/your-site or https://your-tenant.sharepoint.com/teams/your-team)"
                )

    def _extract_tenant_domain_from_sites(self) -> str | None:
        """Extract the tenant domain from configured site URLs.

        Site URLs look like https://{tenant}.sharepoint.com/sites/... so the
        tenant domain is the first label of the hostname.
        """
        for site_url in self.sites:
            try:
                hostname = urlsplit(site_url.strip()).hostname
            except ValueError:
                continue
            if not hostname:
                continue
            tenant = hostname.split(".")[0]
            if tenant:
                return tenant
        logger.warning(f"No tenant domain found from {len(self.sites)} sites")
        return None

    def _resolve_tenant_domain_from_root_site(self) -> str:
        """Resolve tenant domain via GET /v1.0/sites/root which only requires
        Sites.Read.All (a permission the connector already needs)."""
        root_site = self.graph_client.sites.root.get().execute_query()
        hostname = root_site.site_collection.hostname
        if not hostname:
            raise ConnectorValidationError(
                "Could not determine tenant domain from root site"
            )
        tenant_domain = hostname.split(".")[0]
        logger.info(
            "Resolved tenant domain '%s' from root site hostname '%s'",
            tenant_domain,
            hostname,
        )
        return tenant_domain

    def _resolve_tenant_domain(self) -> str:
        """Determine the tenant domain, preferring site URLs over a Graph API
        call to avoid needing extra permissions."""
        from_sites = self._extract_tenant_domain_from_sites()
        if from_sites:
            logger.info(
                "Resolved tenant domain '%s' from site URLs",
                from_sites,
            )
            return from_sites

        logger.info("No site URLs available; resolving tenant domain from root site")
        return self._resolve_tenant_domain_from_root_site()

    @property
    def graph_client(self) -> GraphClient:
        if self._graph_client is None:
            raise ConnectorMissingCredentialError("Sharepoint")

        return self._graph_client

    def _create_rest_client_context(self, site_url: str) -> ClientContext:
        """Return a ClientContext for SharePoint REST API calls, with caching.

        The office365 library's ClientContext caches the access token from its
        first request and never re-invokes the token callback.  We cache the
        context and recreate it when the site URL changes or after
        ``_REST_CTX_MAX_AGE_S``.  On recreation we also call
        ``load_credentials`` to build a fresh MSAL app with an empty token
        cache, guaranteeing a brand-new token from Azure AD."""
        elapsed = time.monotonic() - self._cached_rest_ctx_created_at
        if (
            self._cached_rest_ctx is not None
            and self._cached_rest_ctx_url == site_url
            and elapsed <= _REST_CTX_MAX_AGE_S
        ):
            return self._cached_rest_ctx

        if self._credential_json:
            logger.info(
                "Rebuilding SharePoint REST client context (elapsed=%.0fs, site_changed=%s)",
                elapsed,
                self._cached_rest_ctx_url != site_url,
            )
            self.load_credentials(self._credential_json)

        if not self.msal_app or not self.sp_tenant_domain:
            raise RuntimeError("MSAL app or tenant domain is not set")

        msal_app = self.msal_app
        sp_tenant_domain = self.sp_tenant_domain
        sp_domain_suffix = self.sharepoint_domain_suffix
        self._cached_rest_ctx = ClientContext(site_url).with_access_token(
            lambda: acquire_token_for_rest(msal_app, sp_tenant_domain, sp_domain_suffix)
        )
        self._cached_rest_ctx_url = site_url
        self._cached_rest_ctx_created_at = time.monotonic()
        return self._cached_rest_ctx

    @staticmethod
    def _strip_share_link_tokens(path: str) -> list[str]:
        # Share links often include a token prefix like /:f:/r/ or /:x:/r/.
        segments = [segment for segment in path.split("/") if segment]
        if segments and segments[0].startswith(":"):
            segments = segments[1:]
            if segments and segments[0] in {"r", "s", "g"}:
                segments = segments[1:]
        return segments

    @staticmethod
    def _normalize_sharepoint_url(url: str) -> tuple[str | None, list[str]]:
        try:
            parsed = urlsplit(url)
        except ValueError:
            logger.warning(f"Sharepoint URL '{url}' could not be parsed")
            return None, []

        if not parsed.scheme or not parsed.netloc:
            logger.warning(
                f"Sharepoint URL '{url}' is not a valid absolute URL (missing scheme or host)"
            )
            return None, []

        path_segments = SharepointConnector._strip_share_link_tokens(parsed.path)
        return f"{parsed.scheme}://{parsed.netloc}", path_segments

    @staticmethod
    def _extract_site_and_drive_info(site_urls: list[str]) -> list[SiteDescriptor]:
        site_data_list = []
        for url in site_urls:
            base_url, parts = SharepointConnector._normalize_sharepoint_url(url.strip())
            if base_url is None:
                continue

            lower_parts = [part.lower() for part in parts]
            site_type_index = None
            for site_token in ("sites", "teams"):
                if site_token in lower_parts:
                    site_type_index = lower_parts.index(site_token)
                    break

            if site_type_index is None or len(parts) <= site_type_index + 1:
                logger.warning(
                    f"Site URL '{url}' is not a valid Sharepoint URL (must contain /sites/<name> or /teams/<name>)"
                )
                continue

            site_path = parts[: site_type_index + 2]
            remaining_parts = parts[site_type_index + 2 :]
            site_url = f"{base_url}/" + "/".join(site_path)

            # Extract drive name and folder path
            if remaining_parts:
                drive_name = unquote(remaining_parts[0])
                folder_path = (
                    "/".join(unquote(part) for part in remaining_parts[1:])
                    if len(remaining_parts) > 1
                    else None
                )
            else:
                drive_name = None
                folder_path = None

            site_data_list.append(
                SiteDescriptor(
                    url=site_url,
                    drive_name=drive_name,
                    folder_path=folder_path,
                )
            )
        return site_data_list

    def _resolve_drive(
        self,
        site_descriptor: SiteDescriptor,
        drive_name: str,
    ) -> tuple[str, str | None] | None:
        """Find the drive ID and web_url for a given drive name on a site.

        Returns (drive_id, drive_web_url) or None if the drive was not found.
        Raises on auth/permission errors so callers can propagate them.
        """
        site = self.graph_client.sites.get_by_url(site_descriptor.url)
        drives = site.drives.get().execute_query()
        logger.info(f"Found drives: {[d.name for d in drives]}")

        matched = [
            d
            for d in drives
            if (d.name and d.name.lower() == drive_name.lower())
            or (
                d.name in SHARED_DOCUMENTS_MAP
                and SHARED_DOCUMENTS_MAP[d.name] == drive_name
            )
        ]
        if not matched:
            logger.warning(f"Drive '{drive_name}' not found")
            return None

        drive = matched[0]
        drive_web_url: str | None = drive.web_url
        logger.info(f"Found drive: {drive.name} (web_url: {drive_web_url})")
        return cast(str, drive.id), drive_web_url

    def _get_drive_items_for_drive_id(
        self,
        site_descriptor: SiteDescriptor,
        drive_id: str,
        start: datetime | None = None,
        end: datetime | None = None,
    ) -> Generator[DriveItemData, None, None]:
        """Yield drive items lazily for a given drive name.

        Uses the delta API for whole-drive enumeration (flat, incremental via
        timestamp token) and falls back to BFS /children traversal when a
        folder_path is configured, since delta cannot scope to a subtree
        efficiently.

        Returns:
            A generator of DriveItemData.
            The generator paginates through the Graph API so items are never
            all held in memory at once.
        """
        try:
            if site_descriptor.folder_path:
                yield from self._iter_drive_items_paged(
                    drive_id=drive_id,
                    folder_path=site_descriptor.folder_path,
                    start=start,
                    end=end,
                )
            else:
                yield from self._iter_drive_items_delta(
                    drive_id=drive_id,
                    start=start,
                    end=end,
                )

        except Exception as e:
            err_str = str(e)
            if (
                "403 Client Error" in err_str
                or "404 Client Error" in err_str
                or "invalid_client" in err_str
            ):
                raise e

            logger.warning(f"Failed to process site: {site_descriptor.url} - {err_str}")

    def _fetch_driveitems(
        self,
        site_descriptor: SiteDescriptor,
        start: datetime | None = None,
        end: datetime | None = None,
    ) -> Generator[tuple[DriveItemData, str, str | None], None, None]:
        """Yield drive items lazily for all drives in a site.

        Yields (DriveItemData, drive_name, drive_web_url) tuples one item at
        a time, paginating through the Graph API internally.
        """
        try:
            site = self.graph_client.sites.get_by_url(site_descriptor.url)
            drives = site.drives.get().execute_query()
            logger.debug(f"Found drives: {[d.name for d in drives]}")

            if site_descriptor.drive_name:
                drives = [
                    drive
                    for drive in drives
                    if drive.name == site_descriptor.drive_name
                    or (
                        drive.name in SHARED_DOCUMENTS_MAP
                        and SHARED_DOCUMENTS_MAP[drive.name]
                        == site_descriptor.drive_name
                    )
                ]
                if not drives:
                    logger.warning(f"Drive '{site_descriptor.drive_name}' not found")
                    return

            for drive in drives:
                try:
                    drive_name = (
                        SHARED_DOCUMENTS_MAP[drive.name]
                        if drive.name in SHARED_DOCUMENTS_MAP
                        else cast(str, drive.name)
                    )
                    drive_web_url: str | None = drive.web_url

                    if site_descriptor.folder_path:
                        item_iter = self._iter_drive_items_paged(
                            drive_id=cast(str, drive.id),
                            folder_path=site_descriptor.folder_path,
                            start=start,
                            end=end,
                        )
                    else:
                        item_iter = self._iter_drive_items_delta(
                            drive_id=cast(str, drive.id),
                            start=start,
                            end=end,
                        )

                    for item in item_iter:
                        yield item, drive_name or "", drive_web_url

                except Exception as e:
                    logger.warning(f"Failed to process drive '{drive.name}': {str(e)}")

        except Exception as e:
            err_str = str(e)
            if (
                "403 Client Error" in err_str
                or "404 Client Error" in err_str
                or "invalid_client" in err_str
            ):
                raise e

            logger.warning(f"Failed to process site: {err_str}")

    def _handle_paginated_sites(
        self, sites: SitesWithRoot
    ) -> Generator[Site, None, None]:
        while sites:
            if sites.current_page:
                yield from sites.current_page
            if not sites.has_next:
                break
            sites = sites._get_next().execute_query()

    def _is_driveitem_excluded(self, driveitem: DriveItemData) -> bool:
        """Check if a drive item should be excluded based on excluded_paths patterns."""
        if not self.excluded_paths:
            return False
        relative_path = _build_item_relative_path(
            driveitem.parent_reference_path, driveitem.name
        )
        return _is_path_excluded(relative_path, self.excluded_paths)

    def _filter_excluded_sites(
        self, site_descriptors: list[SiteDescriptor]
    ) -> list[SiteDescriptor]:
        """Remove sites matching any excluded_sites glob pattern."""
        if not self.excluded_sites:
            return site_descriptors
        result = []
        for sd in site_descriptors:
            if _is_site_excluded(sd.url, self.excluded_sites):
                logger.info(f"Excluding site by denylist: {sd.url}")
                continue
            result.append(sd)
        return result

    def fetch_sites(self) -> list[SiteDescriptor]:
        sites = self.graph_client.sites.get_all_sites().execute_query()

        if not sites:
            raise RuntimeError("No sites found in the tenant")

        # OneDrive personal sites should not be indexed with SharepointConnector
        site_descriptors = [
            SiteDescriptor(
                url=site.web_url or "",
                drive_name=None,
                folder_path=None,
            )
            for site in self._handle_paginated_sites(sites)
            if "-my.sharepoint" not in site.web_url
        ]
        return self._filter_excluded_sites(site_descriptors)

    def _fetch_site_pages(
        self,
        site_descriptor: SiteDescriptor,
        start: datetime | None = None,
        end: datetime | None = None,
    ) -> Generator[dict[str, Any], None, None]:
        """Yield SharePoint site pages (.aspx files) one at a time.

        Pages are fetched via the Graph Pages API and yielded lazily as each
        API page arrives, so memory stays bounded regardless of total page count.
        Time-window filtering is applied per-item before yielding.
        """
        site = self.graph_client.sites.get_by_url(site_descriptor.url)
        site.execute_query()
        site_id = site.id

        site_pages_base = (
            f"{self.graph_api_base}/sites/{site_id}/pages/microsoft.graph.sitePage"
        )
        page_url: str | None = site_pages_base
        params: dict[str, str] | None = {"$expand": "canvasLayout"}
        total_yielded = 0
        yielded_ids: set[str] = set()

        while page_url:
            try:
                data = self._graph_api_get_json(page_url, params)
            except HTTPError as e:
                if e.response is not None and e.response.status_code == 404:
                    logger.warning(f"Site page not found: {page_url}")
                    break
                if (
                    e.response is not None
                    and e.response.status_code == 400
                    and _is_graph_invalid_request(e.response)
                ):
                    logger.warning(
                        f"$expand=canvasLayout on the LIST endpoint returned 400 "
                        f"for site {site_descriptor.url}. Falling back to "
                        f"per-page expansion."
                    )
                    yield from self._fetch_site_pages_individually(
                        site_pages_base, start, end, skip_ids=yielded_ids
                    )
                    return
                raise

            params = None  # nextLink already embeds query params

            for page in data.get("value", []):
                if not _site_page_in_time_window(page, start, end):
                    continue
                total_yielded += 1
                page_id = page.get("id")
                if page_id:
                    yielded_ids.add(page_id)
                yield page

            page_url = data.get("@odata.nextLink")

        logger.debug(f"Yielded {total_yielded} site pages for {site_descriptor.url}")

    def _fetch_site_pages_individually(
        self,
        site_pages_base: str,
        start: datetime | None = None,
        end: datetime | None = None,
        skip_ids: set[str] | None = None,
    ) -> Generator[dict[str, Any], None, None]:
        """Fallback for _fetch_site_pages: list pages without $expand, then
        expand canvasLayout on each page individually.

        The Graph API's LIST endpoint can return 400 when $expand=canvasLayout
        is used and *any* page in the site has a corrupt canvas layout (e.g.
        duplicate web part IDs — see SharePoint/sp-dev-docs#8822). Since the
        LIST expansion is all-or-nothing, a single bad page poisons the entire
        response. This method works around it by fetching metadata first, then
        expanding each page individually so only the broken page loses its
        canvas content.

        ``skip_ids`` contains page IDs already yielded by the caller before the
        fallback was triggered, preventing duplicates.
        """
        page_url: str | None = site_pages_base
        total_yielded = 0
        _skip_ids = skip_ids or set()

        while page_url:
            try:
                data = self._graph_api_get_json(page_url)
            except HTTPError as e:
                if e.response is not None and e.response.status_code == 404:
                    break
                raise

            for page in data.get("value", []):
                if not _site_page_in_time_window(page, start, end):
                    continue

                page_id = page.get("id")
                if page_id and page_id in _skip_ids:
                    continue

                if not page_id:
                    total_yielded += 1
                    yield page
                    continue

                expanded = self._try_expand_single_page(site_pages_base, page_id, page)
                total_yielded += 1
                yield expanded

            page_url = data.get("@odata.nextLink")

        logger.debug(
            f"Yielded {total_yielded} site pages (per-page expansion fallback)"
        )

    def _try_expand_single_page(
        self,
        site_pages_base: str,
        page_id: str,
        fallback_page: dict[str, Any],
    ) -> dict[str, Any]:
        """Try to GET a single page with $expand=canvasLayout. On 400, return
        the metadata-only fallback so the page is still indexed (without canvas
        content)."""
        pages_collection = site_pages_base.removesuffix("/microsoft.graph.sitePage")
        single_url = f"{pages_collection}/{page_id}/microsoft.graph.sitePage"
        try:
            return self._graph_api_get_json(single_url, {"$expand": "canvasLayout"})
        except HTTPError as e:
            if (
                e.response is not None
                and e.response.status_code == 400
                and _is_graph_invalid_request(e.response)
            ):
                page_name = fallback_page.get("name", page_id)
                logger.warning(
                    f"$expand=canvasLayout failed for page '{page_name}' ({page_id}). Indexing metadata only."
                )
                return fallback_page
            raise

    def _acquire_token(self) -> dict[str, Any]:
        """
        Acquire token via MSAL
        """
        if self.msal_app is None:
            raise RuntimeError("MSAL app is not initialized")

        token = self.msal_app.acquire_token_for_client(
            scopes=[f"{self.graph_api_host}/.default"]
        )
        return token

    def _get_graph_access_token(self) -> str:
        token_data = self._acquire_token()
        access_token = token_data.get("access_token")
        if not access_token:
            raise RuntimeError("Failed to acquire Graph API access token")
        return access_token

    def _graph_api_get_json(
        self,
        url: str,
        params: dict[str, str] | None = None,
    ) -> dict[str, Any]:
        """Make an authenticated GET request to the Graph API with retry."""
        access_token = self._get_graph_access_token()
        headers = {"Authorization": f"Bearer {access_token}"}

        for attempt in range(GRAPH_API_MAX_RETRIES + 1):
            try:
                response = requests.get(
                    url,
                    headers=headers,
                    params=params,
                    timeout=REQUEST_TIMEOUT_SECONDS,
                )
                if response.status_code in GRAPH_API_RETRYABLE_STATUSES:
                    if attempt < GRAPH_API_MAX_RETRIES:
                        retry_after = int(
                            response.headers.get("Retry-After", str(2**attempt))
                        )
                        wait = min(retry_after, 60)
                        logger.warning(
                            f"Graph API {response.status_code} on attempt {attempt + 1}, retrying in {wait}s: {url}"
                        )
                        time.sleep(wait)
                        # Re-acquire token in case it expired during a long traversal
                        access_token = self._get_graph_access_token()
                        headers = {"Authorization": f"Bearer {access_token}"}
                        continue
                _log_and_raise_for_status(response)
                return response.json()
            except (requests.ConnectionError, requests.Timeout):
                if attempt < GRAPH_API_MAX_RETRIES:
                    wait = min(2**attempt, 60)
                    logger.warning(
                        f"Graph API connection error on attempt {attempt + 1}, retrying in {wait}s: {url}"
                    )
                    time.sleep(wait)
                    continue
                raise

        raise RuntimeError(
            f"Graph API request failed after {GRAPH_API_MAX_RETRIES + 1} attempts: {url}"
        )

    def _iter_drive_items_paged(
        self,
        drive_id: str,
        folder_path: str | None = None,
        start: datetime | None = None,
        end: datetime | None = None,
        page_size: int = 200,
    ) -> Generator[DriveItemData, None, None]:
        """Yield DriveItemData for every file in a drive via the Graph API.

        Performs BFS folder traversal manually, fetching one page of children
        at a time so that memory usage stays bounded regardless of drive size.
        """
        base = f"{self.graph_api_base}/drives/{drive_id}"
        if folder_path:
            encoded_path = quote(folder_path, safe="/")
            start_url = f"{base}/root:/{encoded_path}:/children"
        else:
            start_url = f"{base}/root/children"

        folder_queue: deque[str] = deque([start_url])

        while folder_queue:
            page_url: str | None = folder_queue.popleft()
            params: dict[str, str] | None = {"$top": str(page_size)}

            while page_url:
                data = self._graph_api_get_json(page_url, params)
                params = None  # nextLink already embeds query params

                for item in data.get("value", []):
                    if "folder" in item:
                        child_url = f"{base}/items/{item['id']}/children"
                        folder_queue.append(child_url)
                        continue

                    # Skip non-file items (e.g. OneNote notebooks without a "file" facet)
                    # but still yield them — the downstream conversion handles filtering
                    # by extension / mime type.

                    # NOTE: We are now including items without a lastModifiedDateTime,
                    # and respecting when only one of start or end is set.
                    if start is not None or end is not None:
                        raw_ts = item.get("lastModifiedDateTime")
                        if raw_ts:
                            mod_dt = datetime.fromisoformat(
                                raw_ts.replace("Z", "+00:00")
                            )
                            if start is not None and mod_dt < start:
                                continue
                            if end is not None and mod_dt > end:
                                continue

                    yield DriveItemData.from_graph_json(item)

                page_url = data.get("@odata.nextLink")

    def _iter_drive_items_delta(
        self,
        drive_id: str,
        start: datetime | None = None,
        end: datetime | None = None,
        page_size: int = 200,
    ) -> Generator[DriveItemData, None, None]:
        """Yield DriveItemData for every file in a drive via the Graph delta API.

        Uses the flat delta endpoint instead of recursive folder traversal.
        On subsequent runs (start > epoch), passes the start timestamp as a
        delta token so that only changed items are returned.

        Falls back to full enumeration if the API returns 410 Gone (expired token).
        """
        use_timestamp_token = start is not None and start > _EPOCH

        initial_url = f"{self.graph_api_base}/drives/{drive_id}/root/delta"
        if use_timestamp_token:
            assert start is not None  # mypy
            token = quote(start.isoformat(timespec="seconds"))
            initial_url += f"?token={token}"

        yield from self._iter_delta_pages(
            initial_url=initial_url,
            drive_id=drive_id,
            start=start,
            end=end,
            page_size=page_size,
            allow_full_resync=use_timestamp_token,
        )

    def _iter_delta_pages(
        self,
        initial_url: str,
        drive_id: str,
        start: datetime | None,
        end: datetime | None,
        page_size: int,
        allow_full_resync: bool,
    ) -> Generator[DriveItemData, None, None]:
        """Paginate through delta API responses, yielding file DriveItemData.

        If the API responds with 410 Gone and allow_full_resync is True,
        restarts with a full delta enumeration.
        """
        page_url: str | None = initial_url
        params: dict[str, str] | None = {"$top": str(page_size)}

        while page_url:
            try:
                data = self._graph_api_get_json(page_url, params)
            except requests.HTTPError as e:
                # 410 means the delta token expired, so we need to fall back to full enumeration
                if e.response is not None and e.response.status_code == 410:
                    if not allow_full_resync:
                        raise
                    logger.warning(
                        "Delta token expired (410 Gone) for drive '%s'. Falling back to full delta enumeration.",
                        drive_id,
                    )
                    yield from self._iter_delta_pages(
                        initial_url=f"{self.graph_api_base}/drives/{drive_id}/root/delta",
                        drive_id=drive_id,
                        start=start,
                        end=end,
                        page_size=page_size,
                        allow_full_resync=False,
                    )
                    return
                raise

            params = None  # nextLink/deltaLink already embed query params

            for item in data.get("value", []):
                if "folder" in item or "deleted" in item:
                    continue

                if start is not None or end is not None:
                    raw_ts = item.get("lastModifiedDateTime")
                    if raw_ts:
                        mod_dt = datetime.fromisoformat(raw_ts.replace("Z", "+00:00"))
                        if start is not None and mod_dt < start:
                            continue
                        if end is not None and mod_dt > end:
                            continue

                yield DriveItemData.from_graph_json(item)

            page_url = data.get("@odata.nextLink")
            if not page_url:
                break

    def _build_delta_start_url(
        self,
        drive_id: str,
        start: datetime | None = None,
        page_size: int = 200,
    ) -> str:
        """Build the initial delta API URL with query parameters embedded.

        Embeds ``$top`` (and optionally a timestamp ``token``) directly in the
        URL so that the returned string is fully self-contained and can be
        stored in a checkpoint without needing a separate params dict.
        """
        base_url = f"{self.graph_api_base}/drives/{drive_id}/root/delta"
        params = [f"$top={page_size}"]
        if start is not None and start > _EPOCH:
            token = quote(start.isoformat(timespec="seconds"))
            params.append(f"token={token}")
        return f"{base_url}?{'&'.join(params)}"

    def _fetch_one_delta_page(
        self,
        page_url: str,
        drive_id: str,
        start: datetime | None = None,
        end: datetime | None = None,
        page_size: int = 200,
    ) -> tuple[list[DriveItemData], str | None]:
        """Fetch a single page of delta API results.

        Returns ``(items, next_page_url)``.  *next_page_url* is ``None`` when
        the delta enumeration is complete (deltaLink with no nextLink).

        On 410 Gone (expired token) returns ``([], full_resync_url)`` so
        the caller can store the resync URL in the checkpoint and retry on
        the next cycle.
        """
        try:
            data = self._graph_api_get_json(page_url)
        except requests.HTTPError as e:
            if e.response is not None and e.response.status_code == 410:
                logger.warning(
                    "Delta token expired (410 Gone) for drive '%s'. Will restart with full delta enumeration.",
                    drive_id,
                )
                full_url = f"{self.graph_api_base}/drives/{drive_id}/root/delta?$top={page_size}"
                return [], full_url
            raise

        items: list[DriveItemData] = []
        for item in data.get("value", []):
            if "folder" in item or "deleted" in item:
                continue
            if start is not None or end is not None:
                raw_ts = item.get("lastModifiedDateTime")
                if raw_ts:
                    mod_dt = datetime.fromisoformat(raw_ts.replace("Z", "+00:00"))
                    if start is not None and mod_dt < start:
                        continue
                    if end is not None and mod_dt > end:
                        continue
            items.append(DriveItemData.from_graph_json(item))

        next_url = data.get("@odata.nextLink")
        if next_url:
            return items, next_url
        return items, None

    @staticmethod
    def _clear_drive_checkpoint_state(
        checkpoint: "SharepointConnectorCheckpoint",
    ) -> None:
        """Reset all drive-level fields in the checkpoint."""
        checkpoint.current_drive_name = None
        checkpoint.current_drive_id = None
        checkpoint.current_drive_web_url = None
        checkpoint.current_drive_delta_next_link = None
        checkpoint.seen_document_ids.clear()

    def _fetch_slim_documents_from_sharepoint(
        self,
        start: datetime | None = None,
        end: datetime | None = None,
    ) -> GenerateSlimDocumentOutput:
        site_descriptors = self._filter_excluded_sites(
            self.site_descriptors or self.fetch_sites()
        )

        # Create a temporary checkpoint for hierarchy node tracking
        temp_checkpoint = SharepointConnectorCheckpoint(has_more=True)

        # goes over all urls, converts them into SlimDocument objects and then yields them in batches
        doc_batch: list[SlimDocument | HierarchyNode] = []
        for site_descriptor in site_descriptors:
            site_url = site_descriptor.url

            # Yield site hierarchy node using helper
            doc_batch.extend(
                self._yield_site_hierarchy_node(site_descriptor, temp_checkpoint)
            )

            # Process site documents if flag is True
            if self.include_site_documents:
                for driveitem, drive_name, drive_web_url in self._fetch_driveitems(
                    site_descriptor=site_descriptor,
                    start=start,
                    end=end,
                ):
                    if self._is_driveitem_excluded(driveitem):
                        logger.debug(f"Excluding by path denylist: {driveitem.web_url}")
                        continue

                    if drive_web_url:
                        doc_batch.extend(
                            self._yield_drive_hierarchy_node(
                                site_url, drive_web_url, drive_name, temp_checkpoint
                            )
                        )

                    folder_path = self._extract_folder_path_from_parent_reference(
                        driveitem.parent_reference_path
                    )
                    if folder_path and drive_web_url:
                        doc_batch.extend(
                            self._yield_folder_hierarchy_nodes(
                                site_url,
                                drive_web_url,
                                drive_name,
                                folder_path,
                                temp_checkpoint,
                            )
                        )

                    parent_hierarchy_url: str | None = None
                    if drive_web_url:
                        parent_hierarchy_url = self._get_parent_hierarchy_url(
                            site_url, drive_web_url, drive_name, driveitem
                        )

                    try:
                        logger.debug(f"Processing: {driveitem.web_url}")
                        ctx = self._create_rest_client_context(site_descriptor.url)
                        doc_batch.append(
                            _convert_driveitem_to_slim_document(
                                driveitem,
                                drive_name,
                                ctx,
                                self.graph_client,
                                parent_hierarchy_raw_node_id=parent_hierarchy_url,
                                treat_sharing_link_as_public=self.treat_sharing_link_as_public,
                            )
                        )
                    except Exception as e:
                        logger.warning(f"Failed to process driveitem: {str(e)}")

                    if len(doc_batch) >= SLIM_BATCH_SIZE:
                        yield doc_batch
                        doc_batch = []

            # Process site pages if flag is True
            if self.include_site_pages:
                site_pages = self._fetch_site_pages(
                    site_descriptor, start=start, end=end
                )
                for site_page in site_pages:
                    logger.debug(
                        f"Processing site page: {site_page.get('webUrl', site_page.get('name', 'Unknown'))}"
                    )
                    ctx = self._create_rest_client_context(site_descriptor.url)
                    doc_batch.append(
                        _convert_sitepage_to_slim_document(
                            site_page,
                            ctx,
                            self.graph_client,
                            parent_hierarchy_raw_node_id=site_descriptor.url,
                            treat_sharing_link_as_public=self.treat_sharing_link_as_public,
                        )
                    )
                    if len(doc_batch) >= SLIM_BATCH_SIZE:
                        yield doc_batch
                        doc_batch = []
        yield doc_batch

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self._credential_json = credentials
        auth_method = credentials.get(
            "authentication_method", SharepointAuthMethod.CLIENT_SECRET.value
        )
        sp_client_id = credentials.get("sp_client_id")
        sp_client_secret = credentials.get("sp_client_secret")
        sp_directory_id = credentials.get("sp_directory_id")
        sp_private_key = credentials.get("sp_private_key")
        sp_certificate_password = credentials.get("sp_certificate_password")

        if not sp_client_id:
            raise ConnectorValidationError("Client ID is required")
        if not sp_directory_id:
            raise ConnectorValidationError("Directory (tenant) ID is required")

        authority_url = f"{self.authority_host}/{sp_directory_id}"

        if auth_method == SharepointAuthMethod.CERTIFICATE.value:
            logger.info("Using certificate authentication")
            if not sp_private_key or not sp_certificate_password:
                raise ConnectorValidationError(
                    "Private key and certificate password are required for certificate authentication"
                )

            pfx_data = base64.b64decode(sp_private_key)
            certificate_data = load_certificate_from_pfx(
                pfx_data, sp_certificate_password
            )
            if certificate_data is None:
                raise RuntimeError("Failed to load certificate")

            logger.info(f"Creating MSAL app with authority url {authority_url}")
            self.msal_app = msal.ConfidentialClientApplication(
                authority=authority_url,
                client_id=sp_client_id,
                client_credential=certificate_data.model_dump(),
            )
        elif auth_method == SharepointAuthMethod.CLIENT_SECRET.value:
            logger.info("Using client secret authentication")
            self.msal_app = msal.ConfidentialClientApplication(
                authority=authority_url,
                client_id=sp_client_id,
                client_credential=sp_client_secret,
            )
        else:
            raise ConnectorValidationError(
                "Invalid authentication method or missing required credentials"
            )

        def _acquire_token_for_graph() -> dict[str, Any]:
            """
            Acquire token via MSAL
            """
            if self.msal_app is None:
                raise ConnectorValidationError("MSAL app is not initialized")

            token = self.msal_app.acquire_token_for_client(
                scopes=[f"{self.graph_api_host}/.default"]
            )
            if token is None:
                raise ConnectorValidationError("Failed to acquire token for graph")
            return token

        self._graph_client = GraphClient(
            _acquire_token_for_graph, environment=self._azure_environment
        )
        if auth_method == SharepointAuthMethod.CERTIFICATE.value:
            self.sp_tenant_domain = self._resolve_tenant_domain()
        return None

    def _get_drive_names_for_site(self, site_url: str) -> list[str]:
        """Return all library/drive names for a given SharePoint site."""
        try:
            site = self.graph_client.sites.get_by_url(site_url)
            drives = site.drives.get_all(page_loaded=lambda _: None).execute_query()
            drive_names: list[str] = []
            for drive in drives:
                if drive.name is None:
                    continue
                drive_names.append(drive.name)

            return drive_names
        except Exception as e:
            logger.warning(f"Failed to fetch drives for site '{site_url}': {e}")
            return []

    def _build_folder_url(
        self, site_url: str, drive_name: str, folder_path: str
    ) -> str:
        """Build a URL for a folder to use as raw_node_id.

        NOTE: This constructs an approximate folder URL from components rather than
        fetching the actual webUrl from the API. The constructed URL may differ
        slightly from SharePoint's canonical webUrl (e.g., URL encoding differences),
        but it functions correctly as a unique identifier for hierarchy tracking.
        We avoid fetching folder metadata to minimize API calls.
        """
        return f"{site_url}/{drive_name}/{folder_path}"

    def _extract_folder_path_from_parent_reference(
        self, parent_reference_path: str | None
    ) -> str | None:
        """Extract folder path from DriveItem's parentReference.path.

        Example input: "/drives/b!abc123/root:/Engineering/API"
        Example output: "Engineering/API"

        Returns None if the item is at the root of the drive.
        """
        if not parent_reference_path:
            return None

        # Path format: /drives/{drive_id}/root:/folder/path
        if "root:/" in parent_reference_path:
            folder_path = parent_reference_path.split("root:/")[1]
            return folder_path if folder_path else None

        # Item is at drive root
        return None

    def _yield_site_hierarchy_node(
        self,
        site_descriptor: SiteDescriptor,
        checkpoint: SharepointConnectorCheckpoint,
    ) -> Generator[HierarchyNode, None, None]:
        """Yield a hierarchy node for a site if not already yielded.

        Uses site.web_url as the raw_node_id (exact URL from API).
        """
        site_url = site_descriptor.url

        if site_url in checkpoint.seen_hierarchy_node_raw_ids:
            return

        checkpoint.seen_hierarchy_node_raw_ids.add(site_url)

        # Extract display name from URL (last path segment)
        display_name = site_url.rstrip("/").split("/")[-1]

        yield HierarchyNode(
            raw_node_id=site_url,
            raw_parent_id=None,  # Parent is SOURCE
            display_name=display_name,
            link=site_url,
            node_type=HierarchyNodeType.SITE,
        )

    def _yield_drive_hierarchy_node(
        self,
        site_url: str,
        drive_web_url: str,
        drive_name: str,
        checkpoint: SharepointConnectorCheckpoint,
    ) -> Generator[HierarchyNode, None, None]:
        """Yield a hierarchy node for a drive if not already yielded.

        Uses drive.web_url as the raw_node_id (exact URL from API).
        """
        if drive_web_url in checkpoint.seen_hierarchy_node_raw_ids:
            return

        checkpoint.seen_hierarchy_node_raw_ids.add(drive_web_url)

        yield HierarchyNode(
            raw_node_id=drive_web_url,
            raw_parent_id=site_url,  # Site URL is parent
            display_name=drive_name,
            link=drive_web_url,
            node_type=HierarchyNodeType.DRIVE,
        )

    def _yield_folder_hierarchy_nodes(
        self,
        site_url: str,
        drive_web_url: str,
        drive_name: str,
        folder_path: str,
        checkpoint: SharepointConnectorCheckpoint,
    ) -> Generator[HierarchyNode, None, None]:
        """Yield hierarchy nodes for all folders in a path.

        For path "Engineering/API/v2", yields nodes for:
        1. "Engineering" (parent = drive)
        2. "Engineering/API" (parent = "Engineering")
        3. "Engineering/API/v2" (parent = "Engineering/API")

        Nodes are yielded in parent-to-child order.

        Uses constructed URLs as raw_node_id. See _build_folder_url for details
        on why we construct URLs rather than fetching them from the API.
        """
        if not folder_path:
            return

        path_parts = folder_path.split("/")

        for i, part in enumerate(path_parts):
            current_path = "/".join(path_parts[: i + 1])
            folder_url = self._build_folder_url(site_url, drive_name, current_path)

            if folder_url in checkpoint.seen_hierarchy_node_raw_ids:
                continue

            checkpoint.seen_hierarchy_node_raw_ids.add(folder_url)

            # Determine parent URL
            if i == 0:
                # First folder, parent is the drive
                parent_url = drive_web_url
            else:
                # Parent is the previous folder
                parent_path = "/".join(path_parts[:i])
                parent_url = self._build_folder_url(site_url, drive_name, parent_path)

            yield HierarchyNode(
                raw_node_id=folder_url,
                raw_parent_id=parent_url,
                display_name=part,  # Just the folder name
                link=folder_url,
                node_type=HierarchyNodeType.FOLDER,
            )

    def _get_parent_hierarchy_url(
        self,
        site_url: str,
        drive_web_url: str,
        drive_name: str,
        driveitem: DriveItemData,
    ) -> str:
        """Determine the parent hierarchy node URL for a document.

        Returns:
            - Folder URL if document is in a folder
            - Drive URL if document is at drive root
        """
        folder_path = self._extract_folder_path_from_parent_reference(
            driveitem.parent_reference_path
        )

        if folder_path:
            return self._build_folder_url(site_url, drive_name, folder_path)

        # Document is at drive root
        return drive_web_url

    def _load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: SharepointConnectorCheckpoint,
        include_permissions: bool = False,
    ) -> CheckpointOutput[SharepointConnectorCheckpoint]:

        if self._graph_client is None:
            raise ConnectorMissingCredentialError("Sharepoint")

        checkpoint = copy.deepcopy(checkpoint)

        # Phase 1: Initialize cached_site_descriptors if needed
        if (
            checkpoint.has_more
            and checkpoint.cached_site_descriptors is None
            and not checkpoint.process_site_pages
        ):
            logger.info("Initializing SharePoint sites for processing")
            site_descs = self._filter_excluded_sites(
                self.site_descriptors or self.fetch_sites()
            )
            checkpoint.cached_site_descriptors = deque(site_descs)

            if not checkpoint.cached_site_descriptors:
                logger.warning(
                    "No SharePoint sites found or accessible - nothing to process"
                )
                checkpoint.has_more = False
                return checkpoint

            logger.info(
                f"Found {len(checkpoint.cached_site_descriptors)} sites to process"
            )
            # Set first site and return to allow checkpoint persistence
            if checkpoint.cached_site_descriptors:
                checkpoint.current_site_descriptor = (
                    checkpoint.cached_site_descriptors.popleft()
                )
                logger.info(
                    f"Starting with site: {checkpoint.current_site_descriptor.url}"
                )
                # Yield site hierarchy node for the first site
                yield from self._yield_site_hierarchy_node(
                    checkpoint.current_site_descriptor, checkpoint
                )
                return checkpoint

        # Phase 2: Initialize cached_drive_names for current site if needed
        if checkpoint.current_site_descriptor and checkpoint.cached_drive_names is None:
            # If site documents flag is False, set empty drive list to skip document processing
            if not self.include_site_documents:
                logger.debug("Documents disabled, skipping drive initialization")
                checkpoint.cached_drive_names = deque()
                return checkpoint

            logger.info(
                f"Initializing drives for site: {checkpoint.current_site_descriptor.url}"
            )

            try:
                # If the user explicitly specified drive(s) for this site, honour that
                if checkpoint.current_site_descriptor.drive_name:
                    logger.info(
                        f"Using explicitly specified drive: {checkpoint.current_site_descriptor.drive_name}"
                    )
                    checkpoint.cached_drive_names = deque(
                        [checkpoint.current_site_descriptor.drive_name]
                    )
                else:
                    drive_names = self._get_drive_names_for_site(
                        checkpoint.current_site_descriptor.url
                    )
                    checkpoint.cached_drive_names = deque(drive_names)

                if not checkpoint.cached_drive_names:
                    logger.warning(
                        f"No accessible drives found for site: {checkpoint.current_site_descriptor.url}"
                    )
                else:
                    logger.info(
                        f"Found {len(checkpoint.cached_drive_names)} drives: {list(checkpoint.cached_drive_names)}"
                    )

            except Exception as e:
                logger.error(
                    f"Failed to initialize drives for site: {checkpoint.current_site_descriptor.url}: {e}"
                )
                # Yield a ConnectorFailure for site-level access failures
                start_dt = datetime.fromtimestamp(start, tz=timezone.utc)
                end_dt = datetime.fromtimestamp(end, tz=timezone.utc)
                yield _create_entity_failure(
                    checkpoint.current_site_descriptor.url,
                    f"Failed to access site: {str(e)}",
                    (start_dt, end_dt),
                    e,
                )
                # Move to next site if available
                if (
                    checkpoint.cached_site_descriptors
                    and len(checkpoint.cached_site_descriptors) > 0
                ):
                    checkpoint.current_site_descriptor = (
                        checkpoint.cached_site_descriptors.popleft()
                    )
                    checkpoint.cached_drive_names = None  # Reset for new site
                    return checkpoint
                else:
                    # No more sites - we're done
                    checkpoint.has_more = False
                    return checkpoint

            # Return checkpoint to allow persistence after drive initialization
            return checkpoint

        # Phase 3a: Initialize the next drive for processing
        if (
            checkpoint.current_site_descriptor
            and checkpoint.cached_drive_names
            and len(checkpoint.cached_drive_names) > 0
            and checkpoint.current_drive_name is None
        ):
            checkpoint.current_drive_name = checkpoint.cached_drive_names.popleft()

            start_dt = datetime.fromtimestamp(start, tz=timezone.utc)
            end_dt = datetime.fromtimestamp(end, tz=timezone.utc)
            site_descriptor = checkpoint.current_site_descriptor

            logger.info(
                f"Processing drive '{checkpoint.current_drive_name}' in site: {site_descriptor.url}"
            )
            logger.debug(f"Time range: {start_dt} to {end_dt}")

            current_drive_name = checkpoint.current_drive_name
            if current_drive_name is None:
                logger.warning("Current drive name is None, skipping")
                return checkpoint

            try:
                logger.info(
                    f"Fetching drive items for drive name: {current_drive_name}"
                )
                result = self._resolve_drive(site_descriptor, current_drive_name)
                if result is None:
                    logger.warning(f"Drive '{current_drive_name}' not found, skipping")
                    self._clear_drive_checkpoint_state(checkpoint)
                    return checkpoint

                drive_id, drive_web_url = result
                checkpoint.current_drive_id = drive_id
                checkpoint.current_drive_web_url = drive_web_url
            except Exception as e:
                logger.error(
                    f"Failed to retrieve items from drive '{current_drive_name}' in site: {site_descriptor.url}: {e}"
                )
                yield _create_entity_failure(
                    f"{site_descriptor.url}|{current_drive_name}",
                    f"Failed to access drive '{current_drive_name}' in site '{site_descriptor.url}': {str(e)}",
                    (start_dt, end_dt),
                    e,
                )
                self._clear_drive_checkpoint_state(checkpoint)
                return checkpoint

            display_drive_name = SHARED_DOCUMENTS_MAP.get(
                current_drive_name, current_drive_name
            )

            if drive_web_url:
                yield from self._yield_drive_hierarchy_node(
                    site_descriptor.url,
                    drive_web_url,
                    display_drive_name,
                    checkpoint,
                )

            # For non-folder-scoped drives, use delta API with per-page
            # checkpointing.  Build the initial URL and fall through to 3b.
            if not site_descriptor.folder_path:
                checkpoint.current_drive_delta_next_link = self._build_delta_start_url(
                    drive_id, start_dt
                )
            # else: BFS path — delta_next_link stays None;
            # Phase 3b will use _iter_drive_items_paged.

        # Phase 3b: Process items from the current drive
        if (
            checkpoint.current_site_descriptor
            and checkpoint.current_drive_name is not None
            and checkpoint.current_drive_id is not None
        ):
            site_descriptor = checkpoint.current_site_descriptor
            start_dt = datetime.fromtimestamp(start, tz=timezone.utc)
            end_dt = datetime.fromtimestamp(end, tz=timezone.utc)
            current_drive_name = SHARED_DOCUMENTS_MAP.get(
                checkpoint.current_drive_name, checkpoint.current_drive_name
            )
            drive_web_url = checkpoint.current_drive_web_url

            # --- determine item source ---
            driveitems: Iterable[DriveItemData]
            has_more_delta_pages = False

            if checkpoint.current_drive_delta_next_link:
                # Delta path: fetch one page at a time for checkpointing
                try:
                    page_items, next_url = self._fetch_one_delta_page(
                        page_url=checkpoint.current_drive_delta_next_link,
                        drive_id=checkpoint.current_drive_id,
                        start=start_dt,
                        end=end_dt,
                    )
                except Exception as e:
                    logger.error(
                        f"Failed to fetch delta page for drive '{current_drive_name}': {e}"
                    )
                    yield _create_entity_failure(
                        f"{site_descriptor.url}|{current_drive_name}",
                        f"Failed to fetch delta page for drive '{current_drive_name}': {str(e)}",
                        (start_dt, end_dt),
                        e,
                    )
                    self._clear_drive_checkpoint_state(checkpoint)
                    return checkpoint

                driveitems = page_items
                has_more_delta_pages = next_url is not None
                if next_url:
                    checkpoint.current_drive_delta_next_link = next_url
            else:
                # BFS path (folder-scoped): process all items at once
                driveitems = self._iter_drive_items_paged(
                    drive_id=checkpoint.current_drive_id,
                    folder_path=site_descriptor.folder_path,
                    start=start_dt,
                    end=end_dt,
                )

            item_count = 0
            for driveitem in driveitems:
                item_count += 1

                if self._is_driveitem_excluded(driveitem):
                    logger.debug(f"Excluding by path denylist: {driveitem.web_url}")
                    continue

                if driveitem.id and driveitem.id in checkpoint.seen_document_ids:
                    logger.debug(
                        f"Skipping duplicate document {driveitem.id} ({driveitem.name})"
                    )
                    continue

                driveitem_extension = get_file_ext(driveitem.name)
                if driveitem_extension not in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS:
                    logger.warning(
                        f"Skipping {driveitem.web_url} as it is not a supported file type"
                    )
                    continue

                should_yield_if_empty = (
                    driveitem_extension in OnyxFileExtensions.IMAGE_EXTENSIONS
                    or driveitem_extension == ".pdf"
                )

                folder_path = self._extract_folder_path_from_parent_reference(
                    driveitem.parent_reference_path
                )
                if folder_path and drive_web_url:
                    yield from self._yield_folder_hierarchy_nodes(
                        site_descriptor.url,
                        drive_web_url,
                        current_drive_name,
                        folder_path,
                        checkpoint,
                    )

                parent_hierarchy_url: str | None = None
                if drive_web_url:
                    parent_hierarchy_url = self._get_parent_hierarchy_url(
                        site_descriptor.url,
                        drive_web_url,
                        current_drive_name,
                        driveitem,
                    )

                try:
                    ctx: ClientContext | None = None
                    if include_permissions:
                        ctx = self._create_rest_client_context(site_descriptor.url)

                    access_token = self._get_graph_access_token()
                    doc_or_failure = _convert_driveitem_to_document_with_permissions(
                        driveitem,
                        current_drive_name,
                        ctx,
                        self.graph_client,
                        include_permissions=include_permissions,
                        parent_hierarchy_raw_node_id=parent_hierarchy_url,
                        graph_api_base=self.graph_api_base,
                        access_token=access_token,
                        treat_sharing_link_as_public=self.treat_sharing_link_as_public,
                    )

                    if isinstance(doc_or_failure, Document):
                        if doc_or_failure.sections:
                            checkpoint.seen_document_ids.add(doc_or_failure.id)
                            yield doc_or_failure
                        elif should_yield_if_empty:
                            doc_or_failure.sections = [
                                TextSection(link=driveitem.web_url, text="")
                            ]
                            checkpoint.seen_document_ids.add(doc_or_failure.id)
                            yield doc_or_failure
                        else:
                            logger.warning(
                                f"Skipping {driveitem.web_url} as it is empty and not a PDF or image"
                            )
                    elif isinstance(doc_or_failure, ConnectorFailure):
                        yield doc_or_failure
                except Exception as e:
                    logger.warning(
                        f"Failed to process driveitem {driveitem.web_url}: {e}"
                    )
                    yield _create_document_failure(
                        driveitem, f"Failed to process: {str(e)}", e
                    )

            logger.info(f"Processed {item_count} items in drive '{current_drive_name}'")

            if has_more_delta_pages:
                return checkpoint

            self._clear_drive_checkpoint_state(checkpoint)

        # Phase 4: Progression logic - determine next step
        # If we have more drives in current site, continue with current site
        if checkpoint.cached_drive_names and len(checkpoint.cached_drive_names) > 0:
            logger.debug(
                f"Continuing with {len(checkpoint.cached_drive_names)} remaining drives in current site"
            )
            return checkpoint

        if (
            self.include_site_pages
            and not checkpoint.process_site_pages
            and checkpoint.current_site_descriptor is not None
        ):
            logger.info(
                f"Processing site pages for site: {checkpoint.current_site_descriptor.url}"
            )
            checkpoint.process_site_pages = True
            return checkpoint

        # Phase 5: Process site pages
        if (
            checkpoint.process_site_pages
            and checkpoint.current_site_descriptor is not None
        ):
            # Fetch SharePoint site pages (.aspx files)
            site_descriptor = checkpoint.current_site_descriptor
            start_dt = datetime.fromtimestamp(start, tz=timezone.utc)
            end_dt = datetime.fromtimestamp(end, tz=timezone.utc)
            site_pages = self._fetch_site_pages(
                site_descriptor, start=start_dt, end=end_dt
            )
            for site_page in site_pages:
                logger.debug(
                    f"Processing site page: {site_page.get('webUrl', site_page.get('name', 'Unknown'))}"
                )
                client_ctx: ClientContext | None = None
                if include_permissions:
                    client_ctx = self._create_rest_client_context(site_descriptor.url)
                yield (
                    _convert_sitepage_to_document(
                        site_page,
                        site_descriptor.drive_name,
                        client_ctx,
                        self.graph_client,
                        include_permissions=include_permissions,
                        # Site pages have the site as their parent
                        parent_hierarchy_raw_node_id=site_descriptor.url,
                        treat_sharing_link_as_public=self.treat_sharing_link_as_public,
                    )
                )
            logger.info(
                f"Finished processing site pages for site: {site_descriptor.url}"
            )

        # If no more drives, move to next site if available
        if (
            checkpoint.cached_site_descriptors
            and len(checkpoint.cached_site_descriptors) > 0
        ):
            current_site = (
                checkpoint.current_site_descriptor.url
                if checkpoint.current_site_descriptor
                else "unknown"
            )
            checkpoint.current_site_descriptor = (
                checkpoint.cached_site_descriptors.popleft()
            )
            checkpoint.cached_drive_names = None  # Reset for new site
            checkpoint.process_site_pages = False
            logger.info(
                f"Finished site '{current_site}', moving to next site: {checkpoint.current_site_descriptor.url}"
            )
            logger.info(
                f"Remaining sites to process: {len(checkpoint.cached_site_descriptors) + 1}"
            )
            # Yield site hierarchy node for the new site
            yield from self._yield_site_hierarchy_node(
                checkpoint.current_site_descriptor, checkpoint
            )
            return checkpoint

        # No more sites or drives - we're done
        current_site = (
            checkpoint.current_site_descriptor.url
            if checkpoint.current_site_descriptor
            else "unknown"
        )
        logger.info(
            f"SharePoint processing complete. Finished last site: {current_site}"
        )
        checkpoint.has_more = False
        return checkpoint

    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: SharepointConnectorCheckpoint,
    ) -> CheckpointOutput[SharepointConnectorCheckpoint]:
        return self._load_from_checkpoint(
            start, end, checkpoint, include_permissions=False
        )

    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: SharepointConnectorCheckpoint,
    ) -> CheckpointOutput[SharepointConnectorCheckpoint]:
        return self._load_from_checkpoint(
            start, end, checkpoint, include_permissions=True
        )

    def build_dummy_checkpoint(self) -> SharepointConnectorCheckpoint:
        return SharepointConnectorCheckpoint(has_more=True)

    def validate_checkpoint_json(
        self, checkpoint_json: str
    ) -> SharepointConnectorCheckpoint:
        return SharepointConnectorCheckpoint.model_validate_json(checkpoint_json)

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002
    ) -> GenerateSlimDocumentOutput:
        start_dt = (
            datetime.fromtimestamp(start, tz=timezone.utc)
            if start is not None
            else None
        )
        end_dt = (
            datetime.fromtimestamp(end, tz=timezone.utc) if end is not None else None
        )
        yield from self._fetch_slim_documents_from_sharepoint(
            start=start_dt,
            end=end_dt,
        )


if __name__ == "__main__":
    from onyx.connectors.connector_runner import ConnectorRunner

    connector = SharepointConnector(sites=os.environ["SHAREPOINT_SITES"].split(","))

    connector.load_credentials(
        {
            "sp_client_id": os.environ["SHAREPOINT_CLIENT_ID"],
            "sp_client_secret": os.environ["SHAREPOINT_CLIENT_SECRET"],
            "sp_directory_id": os.environ["SHAREPOINT_CLIENT_DIRECTORY_ID"],
        }
    )

    # Create a time range from epoch to now
    end_time = datetime.now(timezone.utc)
    start_time = datetime.fromtimestamp(0, tz=timezone.utc)
    time_range = (start_time, end_time)

    # Initialize the runner with a batch size of 10
    runner: ConnectorRunner[SharepointConnectorCheckpoint] = ConnectorRunner(
        connector, batch_size=10, include_permissions=False, time_range=time_range
    )

    # Get initial checkpoint
    checkpoint = connector.build_dummy_checkpoint()

    # Run the connector
    while checkpoint.has_more:
        for doc_batch, hierarchy_node_batch, failure, next_checkpoint in runner.run(
            checkpoint
        ):
            if doc_batch:
                print(f"Retrieved batch of {len(doc_batch)} documents")
                for test_doc in doc_batch:
                    print(f"Document: {test_doc.semantic_identifier}")
            if failure:
                print(f"Failure: {failure.failure_message}")
            if next_checkpoint:
                checkpoint = next_checkpoint


================================================
FILE: backend/onyx/connectors/sharepoint/connector_utils.py
================================================
from typing import Any

from office365.graph_client import GraphClient  # type: ignore[import-untyped]
from office365.onedrive.driveitems.driveItem import DriveItem  # type: ignore[import-untyped]
from office365.sharepoint.client_context import ClientContext  # type: ignore[import-untyped]

from onyx.connectors.models import ExternalAccess
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)


def get_sharepoint_external_access(
    ctx: ClientContext,
    graph_client: GraphClient,
    drive_item: DriveItem | None = None,
    drive_name: str | None = None,
    site_page: dict[str, Any] | None = None,
    add_prefix: bool = False,
    treat_sharing_link_as_public: bool = False,
) -> ExternalAccess:
    if drive_item and drive_item.id is None:
        raise ValueError("DriveItem ID is required")

    # Get external access using the EE implementation
    def noop_fallback(
        *args: Any, **kwargs: Any  # noqa: ARG001
    ) -> ExternalAccess:  # noqa: ARG001
        return ExternalAccess.empty()

    get_external_access_func = fetch_versioned_implementation_with_fallback(
        "onyx.external_permissions.sharepoint.permission_utils",
        "get_external_access_from_sharepoint",
        fallback=noop_fallback,
    )

    external_access = get_external_access_func(
        ctx,
        graph_client,
        drive_name,
        drive_item,
        site_page,
        add_prefix,
        treat_sharing_link_as_public,
    )

    return external_access


================================================
FILE: backend/onyx/connectors/slab/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/slab/connector.py
================================================
import json
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from urllib.parse import urljoin

import requests
from dateutil import parser

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger


logger = setup_logger()


# Fairly generous retry because it's not understood why occasionally GraphQL requests fail even with timeout > 1 min
SLAB_GRAPHQL_MAX_TRIES = 10
SLAB_API_URL = "https://api.slab.com/v1/graphql"

_SLIM_BATCH_SIZE = 1000


def run_graphql_request(
    graphql_query: dict, bot_token: str, max_tries: int = SLAB_GRAPHQL_MAX_TRIES
) -> str:
    headers = {"Authorization": bot_token, "Content-Type": "application/json"}

    for try_count in range(max_tries):
        try:
            response = requests.post(
                SLAB_API_URL, headers=headers, json=graphql_query, timeout=60
            )
            response.raise_for_status()

            if response.status_code != 200:
                raise ValueError(f"GraphQL query failed: {graphql_query}")

            return response.text

        except (requests.exceptions.Timeout, ValueError) as e:
            if try_count < max_tries - 1:
                logger.warning("A Slab GraphQL error occurred. Retrying...")
                continue

            if isinstance(e, requests.exceptions.Timeout):
                raise TimeoutError("Slab API timed out after 3 attempts")
            else:
                raise ValueError("Slab GraphQL query failed after 3 attempts")

    raise RuntimeError(
        "Unexpected execution from Slab Connector. This should not happen."
    )  # for static checker


def get_all_post_ids(bot_token: str) -> list[str]:
    query = """
        query GetAllPostIds {
            organization {
                posts {
                    id
                }
            }
        }
        """

    graphql_query = {"query": query}

    results = json.loads(run_graphql_request(graphql_query, bot_token))
    posts = results["data"]["organization"]["posts"]
    return [post["id"] for post in posts]


def get_post_by_id(post_id: str, bot_token: str) -> dict[str, str]:
    query = """
        query GetPostById($postId: ID!) {
            post(id: $postId) {
                title
                content
                linkAccess
                updatedAt
            }
        }
        """
    graphql_query = {"query": query, "variables": {"postId": post_id}}
    results = json.loads(run_graphql_request(graphql_query, bot_token))
    return results["data"]["post"]


def iterate_post_batches(
    batch_size: int, bot_token: str
) -> Generator[list[dict[str, str]], None, None]:
    """This may not be safe to use, not sure if page edits will change the order of results"""
    query = """
        query IteratePostBatches($query: String!, $first: Int, $types: [SearchType], $after: String) {
            search(query: $query, first: $first, types: $types, after: $after) {
                edges {
                    node {
                        ... on PostSearchResult {
                            post {
                                id
                                title
                                content
                                updatedAt
                            }
                        }
                    }
                }
                pageInfo {
                    endCursor
                    hasNextPage
                }
            }
        }
    """
    pagination_start = None
    exists_more_pages = True
    while exists_more_pages:
        graphql_query = {
            "query": query,
            "variables": {
                "query": "",
                "first": batch_size,
                "types": ["POST"],
                "after": pagination_start,
            },
        }
        results = json.loads(run_graphql_request(graphql_query, bot_token))
        pagination_start = results["data"]["search"]["pageInfo"]["endCursor"]
        hits = results["data"]["search"]["edges"]

        posts = [hit["node"] for hit in hits]
        if posts:
            yield posts

        exists_more_pages = results["data"]["search"]["pageInfo"]["hasNextPage"]


def get_slab_url_from_title_id(base_url: str, title: str, page_id: str) -> str:
    """This is not a documented approach but seems to be the way it works currently
    May be subject to change without notification"""
    title = (
        title.replace("[", "")
        .replace("]", "")
        .replace(":", "")
        .replace(" ", "-")
        .lower()
    )
    url_id = title + "-" + page_id
    return urljoin(urljoin(base_url, "posts/"), url_id)


class SlabConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
    def __init__(
        self,
        base_url: str,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.base_url = base_url
        self.batch_size = batch_size
        self._slab_bot_token: str | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self._slab_bot_token = credentials["slab_bot_token"]
        return None

    @property
    def slab_bot_token(self) -> str:
        if self._slab_bot_token is None:
            raise ConnectorMissingCredentialError("Slab")
        return self._slab_bot_token

    def _iterate_posts(
        self, time_filter: Callable[[datetime], bool] | None = None
    ) -> GenerateDocumentsOutput:
        doc_batch: list[Document | HierarchyNode] = []

        if self.slab_bot_token is None:
            raise ConnectorMissingCredentialError("Slab")

        all_post_ids: list[str] = get_all_post_ids(self.slab_bot_token)

        for post_id in all_post_ids:
            post = get_post_by_id(post_id, self.slab_bot_token)
            last_modified = parser.parse(post["updatedAt"])
            if time_filter is not None and not time_filter(last_modified):
                continue

            page_url = get_slab_url_from_title_id(self.base_url, post["title"], post_id)

            content_text = ""
            contents = json.loads(post["content"])
            for content_segment in contents:
                insert = content_segment.get("insert")
                if insert and isinstance(insert, str):
                    content_text += insert

            doc_batch.append(
                Document(
                    id=post_id,  # can't be url as this changes with the post title
                    sections=[TextSection(link=page_url, text=content_text)],
                    source=DocumentSource.SLAB,
                    semantic_identifier=post["title"],
                    metadata={},
                )
            )

            if len(doc_batch) >= self.batch_size:
                yield doc_batch
                doc_batch = []

        if doc_batch:
            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
        yield from self._iterate_posts()

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        start_time = datetime.fromtimestamp(start, tz=timezone.utc)
        end_time = datetime.fromtimestamp(end, tz=timezone.utc)

        yield from self._iterate_posts(
            time_filter=lambda t: start_time <= t <= end_time
        )

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002
    ) -> GenerateSlimDocumentOutput:
        slim_doc_batch: list[SlimDocument | HierarchyNode] = []
        for post_id in get_all_post_ids(self.slab_bot_token):
            slim_doc_batch.append(
                SlimDocument(
                    id=post_id,
                )
            )
            if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
                yield slim_doc_batch
                slim_doc_batch = []
        if slim_doc_batch:
            yield slim_doc_batch

    def validate_connector_settings(self) -> None:
        """
        Very basic validation, we could do more here
        """
        if not self.base_url.startswith("https://") and not self.base_url.startswith(
            "http://"
        ):
            raise ConnectorValidationError(
                "Base URL must start with https:// or http://"
            )

        try:
            get_all_post_ids(self.slab_bot_token)
        except ConnectorMissingCredentialError:
            raise
        except Exception as e:
            raise ConnectorValidationError(f"Failed to fetch posts from Slab: {e}")


================================================
FILE: backend/onyx/connectors/slack/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/slack/access.py
================================================
from collections.abc import Callable
from typing import cast

from slack_sdk import WebClient

from onyx.access.models import ExternalAccess
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.slack.models import ChannelType
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version


def get_channel_access(
    client: WebClient,
    channel: ChannelType,
    user_cache: dict[str, BasicExpertInfo | None],
) -> ExternalAccess | None:
    """
    Get channel access permissions for a Slack channel.
    This functionality requires Enterprise Edition.

    Args:
        client: Slack WebClient instance
        channel: Slack channel object containing channel info
        user_cache: Cache of user IDs to BasicExpertInfo objects. May be updated in place.

    Returns:
        ExternalAccess object for the channel. None if EE is not enabled.
    """
    # Check if EE is enabled
    if not global_version.is_ee_version():
        return None

    # Fetch the EE implementation
    ee_get_channel_access = cast(
        Callable[
            [WebClient, ChannelType, dict[str, BasicExpertInfo | None]],
            ExternalAccess,
        ],
        fetch_versioned_implementation(
            "onyx.external_permissions.slack.channel_access", "get_channel_access"
        ),
    )

    return ee_get_channel_access(client, channel, user_cache)


================================================
FILE: backend/onyx/connectors/slack/connector.py
================================================
import contextvars
import copy
import itertools
import re
from collections.abc import Callable
from collections.abc import Generator
from concurrent.futures import as_completed
from concurrent.futures import Future
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from datetime import timezone
from enum import Enum
from http.client import IncompleteRead
from http.client import RemoteDisconnected
from typing import Any
from typing import cast
from urllib.error import URLError
from urllib.parse import urlparse

from pydantic import BaseModel
from redis import Redis
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_sdk.http_retry import ConnectionErrorRetryHandler
from slack_sdk.http_retry import RetryHandler
from slack_sdk.http_retry.builtin_interval_calculators import (
    FixedValueRetryIntervalCalculator,
)
from typing_extensions import override

from onyx.access.models import ExternalAccess
from onyx.configs.app_configs import ENABLE_EXPENSIVE_EXPERT_CALLS
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import SLACK_NUM_THREADS
from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import CredentialsConnector
from onyx.connectors.interfaces import CredentialsProviderInterface
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import NormalizationResult
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import EntityFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.connectors.slack.access import get_channel_access
from onyx.connectors.slack.models import ChannelType
from onyx.connectors.slack.models import MessageType
from onyx.connectors.slack.models import ThreadType
from onyx.connectors.slack.onyx_retry_handler import OnyxRedisSlackRetryHandler
from onyx.connectors.slack.onyx_slack_web_client import OnyxSlackWebClient
from onyx.connectors.slack.utils import (
    expert_info_from_slack_id,
)
from onyx.connectors.slack.utils import get_message_link
from onyx.connectors.slack.utils import make_paginated_slack_api_call
from onyx.connectors.slack.utils import SlackTextCleaner
from onyx.db.enums import HierarchyNodeType
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger

logger = setup_logger()

_SLACK_LIMIT = 900


class SlackCheckpoint(ConnectorCheckpoint):
    channel_ids: list[str] | None  # e.g. C8E6WHE2X

    # channel id mapped to the timestamp we want to retrieve messages up to
    # NOTE: this is usually the earliest timestamp of all the messages we have
    # since we walk backwards
    channel_completion_map: dict[str, str]
    current_channel: ChannelType | None
    current_channel_access: ExternalAccess | None

    seen_thread_ts: list[
        str
    ]  # apparently we identify threads/messages uniquely by timestamp?


def _collect_paginated_channels(
    client: WebClient,
    exclude_archived: bool,
    channel_types: list[str],
) -> list[ChannelType]:
    channels: list[ChannelType] = []
    for result in make_paginated_slack_api_call(
        client.conversations_list,
        exclude_archived=exclude_archived,
        # also get private channels the bot is added to
        types=channel_types,
    ):
        channels.extend(result["channels"])

    return channels


def get_channels(
    client: WebClient,
    exclude_archived: bool = True,
    get_public: bool = True,
    get_private: bool = True,
) -> list[ChannelType]:
    """Get all channels in the workspace."""
    channels: list[ChannelType] = []
    channel_types = []
    if get_public:
        channel_types.append("public_channel")
    if get_private:
        channel_types.append("private_channel")
    # Try fetching both public and private channels first:
    try:
        channels = _collect_paginated_channels(
            client=client,
            exclude_archived=exclude_archived,
            channel_types=channel_types,
        )
    except SlackApiError as e:
        msg = f"Unable to fetch private channels due to: {e}."
        if not get_public:
            logger.warning(msg + " Public channels are not enabled.")
            return []

        logger.warning(msg + " Trying again with public channels only.")
        channel_types = ["public_channel"]
        channels = _collect_paginated_channels(
            client=client,
            exclude_archived=exclude_archived,
            channel_types=channel_types,
        )
    return channels


def get_channel_messages(
    client: WebClient,
    channel: ChannelType,
    oldest: str | None = None,
    latest: str | None = None,
    callback: IndexingHeartbeatInterface | None = None,
) -> Generator[list[MessageType], None, None]:
    """Get all messages in a channel"""
    # join so that the bot can access messages
    if not channel["is_member"]:
        client.conversations_join(
            channel=channel["id"],
            is_private=channel["is_private"],
        )
        logger.info(f"Successfully joined '{channel['name']}'")

    for result in make_paginated_slack_api_call(
        client.conversations_history,
        channel=channel["id"],
        oldest=oldest,
        latest=latest,
    ):
        if callback:
            if callback.should_stop():
                raise RuntimeError("get_channel_messages: Stop signal detected")

            callback.progress("get_channel_messages", 0)
        yield cast(list[MessageType], result["messages"])


def get_thread(client: WebClient, channel_id: str, thread_id: str) -> ThreadType:
    """Get all messages in a thread"""
    threads: list[MessageType] = []
    for result in make_paginated_slack_api_call(
        client.conversations_replies, channel=channel_id, ts=thread_id
    ):
        threads.extend(result["messages"])
    return threads


def get_latest_message_time(thread: ThreadType) -> datetime:
    max_ts = max([float(msg.get("ts", 0)) for msg in thread])
    return datetime.fromtimestamp(max_ts, tz=timezone.utc)


def _build_doc_id(channel_id: str, thread_ts: str) -> str:
    return f"{channel_id}__{thread_ts}"


def thread_to_doc(
    channel: ChannelType,
    thread: ThreadType,
    slack_cleaner: SlackTextCleaner,
    client: WebClient,
    user_cache: dict[str, BasicExpertInfo | None],
    channel_access: ExternalAccess | None,
) -> Document:
    channel_id = channel["id"]

    initial_sender_expert_info = expert_info_from_slack_id(
        user_id=thread[0].get("user"), client=client, user_cache=user_cache
    )
    initial_sender_name = (
        initial_sender_expert_info.get_semantic_name()
        if initial_sender_expert_info
        else "Unknown"
    )

    valid_experts = None
    if ENABLE_EXPENSIVE_EXPERT_CALLS:
        all_sender_ids = [m.get("user") for m in thread]
        experts = [
            expert_info_from_slack_id(
                user_id=sender_id, client=client, user_cache=user_cache
            )
            for sender_id in all_sender_ids
            if sender_id
        ]
        valid_experts = [expert for expert in experts if expert]

    first_message = slack_cleaner.index_clean(cast(str, thread[0]["text"]))
    snippet = (
        first_message[:50].rstrip() + "..."
        if len(first_message) > 50
        else first_message
    )

    doc_sem_id = f"{initial_sender_name} in #{channel['name']}: {snippet}".replace(
        "\n", " "
    )

    channel_name = channel["name"]

    return Document(
        id=_build_doc_id(channel_id=channel_id, thread_ts=thread[0]["ts"]),
        sections=[
            TextSection(
                link=get_message_link(event=m, client=client, channel_id=channel_id),
                text=slack_cleaner.index_clean(cast(str, m["text"])),
            )
            for m in thread
        ],
        source=DocumentSource.SLACK,
        semantic_identifier=doc_sem_id,
        doc_updated_at=get_latest_message_time(thread),
        primary_owners=valid_experts,
        doc_metadata={
            "hierarchy": {
                "source_path": [channel_name],
                "channel_name": channel_name,
                "channel_id": channel_id,
            }
        },
        metadata={"Channel": channel_name},
        external_access=channel_access,
        parent_hierarchy_raw_node_id=channel_id,
    )


# list of subtypes can be found here: https://api.slack.com/events/message
_DISALLOWED_MSG_SUBTYPES = {
    "channel_join",
    "channel_leave",
    "channel_archive",
    "channel_unarchive",
    "pinned_item",
    "unpinned_item",
    "ekm_access_denied",
    "channel_posting_permissions",
    "group_join",
    "group_leave",
    "group_archive",
    "group_unarchive",
    "channel_leave",
    "channel_name",
    "channel_join",
}


class SlackMessageFilterReason(str, Enum):
    BOT = "bot"
    DISALLOWED = "disallowed"


def default_msg_filter(message: MessageType) -> SlackMessageFilterReason | None:
    """Returns a filter reason if the message should be filtered out.
    Returns None if the message can be kept.
    """

    # Don't keep messages from bots
    if message.get("bot_id") or message.get("app_id"):
        bot_profile_name = message.get("bot_profile", {}).get("name")
        if bot_profile_name == "DanswerBot Testing":
            return None
        return SlackMessageFilterReason.BOT

    # Uninformative
    if message.get("subtype", "") in _DISALLOWED_MSG_SUBTYPES:
        return SlackMessageFilterReason.DISALLOWED

    return None


def _bot_inclusive_msg_filter(
    message: MessageType,
) -> SlackMessageFilterReason | None:
    """Like default_msg_filter but allows bot/app messages through.
    Only filters out disallowed subtypes (channel_join, channel_leave, etc.).
    """
    if message.get("subtype", "") in _DISALLOWED_MSG_SUBTYPES:
        return SlackMessageFilterReason.DISALLOWED

    return None


def filter_channels(
    all_channels: list[ChannelType],
    channels_to_connect: list[str] | None,
    regex_enabled: bool,
) -> list[ChannelType]:
    if not channels_to_connect:
        return all_channels

    if regex_enabled:
        return [
            channel
            for channel in all_channels
            if any(
                re.fullmatch(channel_to_connect, channel["name"])
                for channel_to_connect in channels_to_connect
            )
        ]

    # validate that all channels in `channels_to_connect` are valid
    # fail loudly in the case of an invalid channel so that the user
    # knows that one of the channels they've specified is typo'd or private
    all_channel_names = {channel["name"] for channel in all_channels}
    for channel in channels_to_connect:
        if channel not in all_channel_names:
            raise ValueError(
                f"Channel '{channel}' not found in workspace. "
                f"Available channels (Showing {len(all_channel_names)} of "
                f"{min(len(all_channel_names), SlackConnector.MAX_CHANNELS_TO_LOG)}): "
                f"{list(itertools.islice(all_channel_names, SlackConnector.MAX_CHANNELS_TO_LOG))}"
            )

    return [
        channel for channel in all_channels if channel["name"] in channels_to_connect
    ]


def _channel_to_hierarchy_node(
    channel: ChannelType,
    channel_access: ExternalAccess | None,
    workspace_url: str | None = None,
) -> HierarchyNode:
    """Convert a Slack channel to a HierarchyNode.

    Args:
        channel: The Slack channel object
        channel_access: External access permissions for the channel
        workspace_url: The workspace URL (e.g., https://myworkspace.slack.com)

    Returns:
        A HierarchyNode representing the channel
    """
    # Link format: https://{workspace}.slack.com/archives/{channel_id}
    link = f"{workspace_url}/archives/{channel['id']}" if workspace_url else None

    return HierarchyNode(
        raw_node_id=channel["id"],
        raw_parent_id=None,  # Direct child of SOURCE
        display_name=f"#{channel['name']}",
        link=link,
        node_type=HierarchyNodeType.CHANNEL,
        external_access=channel_access,
    )


def _get_channel_by_id(client: WebClient, channel_id: str) -> ChannelType:
    """Get a channel by its ID.

    Args:
        client: The Slack WebClient instance
        channel_id: The ID of the channel to fetch

    Returns:
        The channel information

    Raises:
        SlackApiError: If the channel cannot be fetched
    """
    response = client.conversations_info(
        channel=channel_id,
    )
    return cast(ChannelType, response["channel"])


def _get_messages(
    channel: ChannelType,
    client: WebClient,
    oldest: str | None = None,
    latest: str | None = None,
    limit: int = _SLACK_LIMIT,
) -> tuple[list[MessageType], bool]:
    """Slack goes from newest to oldest."""

    # have to be in the channel in order to read messages
    if not channel["is_member"]:
        try:
            client.conversations_join(
                channel=channel["id"],
                is_private=channel["is_private"],
            )
        except SlackApiError as e:
            if e.response["error"] == "is_archived":
                logger.warning(f"Channel {channel['name']} is archived. Skipping.")
                return [], False

            logger.exception(f"Error joining channel {channel['name']}")
            raise
        logger.info(f"Successfully joined '{channel['name']}'")

    response = client.conversations_history(
        channel=channel["id"],
        oldest=oldest,
        latest=latest,
        limit=limit,
    )
    response.validate()

    messages = cast(list[MessageType], response.get("messages", []))

    cursor = cast(dict[str, Any], response.get("response_metadata", {})).get(
        "next_cursor", ""
    )
    has_more = bool(cursor)
    return messages, has_more


def _message_to_doc(
    message: MessageType,
    client: WebClient,
    channel: ChannelType,
    slack_cleaner: SlackTextCleaner,
    user_cache: dict[str, BasicExpertInfo | None],
    seen_thread_ts: set[str],
    channel_access: ExternalAccess | None,
    msg_filter_func: Callable[
        [MessageType], SlackMessageFilterReason | None
    ] = default_msg_filter,
) -> tuple[Document | None, SlackMessageFilterReason | None]:
    """Returns a doc or None.
    If None is returned, the second element of the tuple may be a filter reason
    """
    filtered_thread: ThreadType | None = None
    filter_reason: SlackMessageFilterReason | None = None
    thread_ts = message.get("thread_ts")
    if thread_ts:
        # NOTE: if thread_ts is present, there's a thread we need to process
        # ... otherwise, we can skip it

        # skip threads we've already seen, since we've already processed all
        # messages in that thread
        if thread_ts in seen_thread_ts:
            return None, None

        thread = get_thread(
            client=client, channel_id=channel["id"], thread_id=thread_ts
        )

        # we'll just set and use the last filter reason if
        # we bomb out later
        filtered_thread = []
        for message in thread:
            filter_reason = msg_filter_func(message)
            if filter_reason:
                continue

            filtered_thread.append(message)
    else:
        filter_reason = msg_filter_func(message)
        if filter_reason:
            return None, filter_reason

        filtered_thread = [message]

    # we'll just set and use the last filter reason if we get an empty list
    if not filtered_thread:
        return None, filter_reason

    doc = thread_to_doc(
        channel=channel,
        thread=filtered_thread,
        slack_cleaner=slack_cleaner,
        client=client,
        user_cache=user_cache,
        channel_access=channel_access,
    )
    return doc, None


def _get_all_doc_ids(
    client: WebClient,
    channels: list[str] | None = None,
    channel_name_regex_enabled: bool = False,
    msg_filter_func: Callable[
        [MessageType], SlackMessageFilterReason | None
    ] = default_msg_filter,
    callback: IndexingHeartbeatInterface | None = None,
    workspace_url: str | None = None,
    start: SecondsSinceUnixEpoch | None = None,
    end: SecondsSinceUnixEpoch | None = None,
) -> GenerateSlimDocumentOutput:
    """
    Get all document ids in the workspace, channel by channel
    This is pretty identical to get_all_docs, but it returns a set of ids instead of documents
    This makes it an order of magnitude faster than get_all_docs
    """

    all_channels = get_channels(client)
    filtered_channels = filter_channels(
        all_channels, channels, channel_name_regex_enabled
    )
    user_cache: dict[str, BasicExpertInfo | None] = {}

    for channel in filtered_channels:
        channel_id = channel["id"]
        # NOTE: external_access is a frozen object, so it's okay to safe to use a single
        # instance for all documents in the channel
        external_access = get_channel_access(
            client=client,
            channel=channel,
            user_cache=user_cache,
        )

        # Yield the channel as a HierarchyNode first (before any documents)
        yield [_channel_to_hierarchy_node(channel, external_access, workspace_url)]

        channel_message_batches = get_channel_messages(
            client=client,
            channel=channel,
            callback=callback,
            oldest=str(start) if start else None,  # 0.0 -> None intentionally
            latest=str(end) if end is not None else None,
        )

        for message_batch in channel_message_batches:
            slim_doc_batch: list[SlimDocument | HierarchyNode] = []
            for message in message_batch:
                filter_reason = msg_filter_func(message)
                if filter_reason:
                    continue

                # The document id is the channel id and the ts of the first message in the thread
                # Since we already have the first message of the thread, we dont have to
                # fetch the thread for id retrieval, saving time and API calls

                slim_doc_batch.append(
                    SlimDocument(
                        id=_build_doc_id(
                            channel_id=channel_id, thread_ts=message["ts"]
                        ),
                        external_access=external_access,
                        parent_hierarchy_raw_node_id=channel_id,
                    )
                )

            yield slim_doc_batch


class ProcessedSlackMessage(BaseModel):
    doc: Document | None
    # if the message is part of a thread, this is the thread_ts
    # otherwise, this is the message_ts. Either way, will be a unique identifier.
    # In the future, if the message becomes a thread, then the thread_ts
    # will be set to the message_ts.
    thread_or_message_ts: str

    # if doc is None, filter_reason may be populated
    filter_reason: SlackMessageFilterReason | None
    failure: ConnectorFailure | None


def _process_message(
    message: MessageType,
    client: WebClient,
    channel: ChannelType,
    slack_cleaner: SlackTextCleaner,
    user_cache: dict[str, BasicExpertInfo | None],
    seen_thread_ts: set[str],
    channel_access: ExternalAccess | None,
    msg_filter_func: Callable[
        [MessageType], SlackMessageFilterReason | None
    ] = default_msg_filter,
) -> ProcessedSlackMessage:
    thread_ts = message.get("thread_ts")
    thread_or_message_ts = thread_ts or message["ts"]
    try:
        # causes random failures for testing checkpointing / continue on failure
        # import random
        # if random.random() > 0.95:
        #     raise RuntimeError("Random failure :P")

        doc, filter_reason = _message_to_doc(
            message=message,
            client=client,
            channel=channel,
            slack_cleaner=slack_cleaner,
            user_cache=user_cache,
            seen_thread_ts=seen_thread_ts,
            channel_access=channel_access,
            msg_filter_func=msg_filter_func,
        )
        return ProcessedSlackMessage(
            doc=doc,
            thread_or_message_ts=thread_or_message_ts,
            filter_reason=filter_reason,
            failure=None,
        )
    except Exception as e:
        logger.exception(f"Error processing message {message['ts']}")
        return ProcessedSlackMessage(
            doc=None,
            thread_or_message_ts=thread_or_message_ts,
            filter_reason=None,
            failure=ConnectorFailure(
                failed_document=DocumentFailure(
                    document_id=_build_doc_id(
                        channel_id=channel["id"], thread_ts=thread_or_message_ts
                    ),
                    document_link=get_message_link(message, client, channel["id"]),
                ),
                failure_message=str(e),
                exception=e,
            ),
        )


class SlackConnector(
    SlimConnectorWithPermSync,
    CredentialsConnector,
    CheckpointedConnectorWithPermSync[SlackCheckpoint],
):
    FAST_TIMEOUT = 1

    MAX_RETRIES = 7  # arbitrarily selected

    MAX_CHANNELS_TO_LOG = 50

    # *** values to use when filtering bot channels ***

    # the number of messages in the batch must be greater than or equal to this number
    # to consider filtering the channel
    BOT_CHANNEL_MIN_BATCH_SIZE = 256

    # the percentage of messages in the batch above which the channel will be considered
    # a bot channel
    BOT_CHANNEL_PERCENTAGE_THRESHOLD = 0.95

    def __init__(
        self,
        channels: list[str] | None = None,
        # if specified, will treat the specified channel strings as
        # regexes, and will only index channels that fully match the regexes
        channel_regex_enabled: bool = False,
        # if True, messages from bots/apps will be indexed instead of filtered out
        include_bot_messages: bool = False,
        batch_size: int = INDEX_BATCH_SIZE,
        num_threads: int = SLACK_NUM_THREADS,
        use_redis: bool = True,
    ) -> None:
        self.channels = channels
        self.channel_regex_enabled = channel_regex_enabled
        self.include_bot_messages = include_bot_messages
        self.msg_filter_func = (
            _bot_inclusive_msg_filter if include_bot_messages else default_msg_filter
        )
        self.batch_size = batch_size
        self.num_threads = num_threads
        self.client: WebClient | None = None
        self.fast_client: WebClient | None = None
        # just used for efficiency
        self.text_cleaner: SlackTextCleaner | None = None
        self.user_cache: dict[str, BasicExpertInfo | None] = {}
        self.credentials_provider: CredentialsProviderInterface | None = None
        self.credential_prefix: str | None = None
        self.use_redis: bool = use_redis
        # Workspace URL for building channel links (e.g., https://myworkspace.slack.com)
        self._workspace_url: str | None = None
        # self.delay_lock: str | None = None  # the redis key for the shared lock
        # self.delay_key: str | None = None  # the redis key for the shared delay

    @classmethod
    @override
    def normalize_url(cls, url: str) -> NormalizationResult:
        """Normalize a Slack URL to extract channel_id__thread_ts format."""
        parsed = urlparse(url)
        if "slack.com" not in parsed.netloc.lower():
            return NormalizationResult(normalized_url=None, use_default=False)

        # Slack document IDs are format: channel_id__thread_ts
        # Extract from URL pattern: .../archives/{channel_id}/p{timestamp}
        path_parts = parsed.path.split("/")
        if "archives" not in path_parts:
            return NormalizationResult(normalized_url=None, use_default=False)

        archives_idx = path_parts.index("archives")
        if archives_idx + 1 >= len(path_parts):
            return NormalizationResult(normalized_url=None, use_default=False)

        channel_id = path_parts[archives_idx + 1]
        if archives_idx + 2 >= len(path_parts):
            return NormalizationResult(normalized_url=None, use_default=False)

        thread_part = path_parts[archives_idx + 2]
        if not thread_part.startswith("p"):
            return NormalizationResult(normalized_url=None, use_default=False)

        # Convert p1234567890123456 to 1234567890.123456 format
        timestamp_str = thread_part[1:]  # Remove 'p' prefix
        if len(timestamp_str) == 16:
            # Insert dot at position 10 to match canonical format
            thread_ts = f"{timestamp_str[:10]}.{timestamp_str[10:]}"
        else:
            thread_ts = timestamp_str

        normalized = f"{channel_id}__{thread_ts}"
        return NormalizationResult(normalized_url=normalized, use_default=False)

    @staticmethod
    def make_credential_prefix(key: str) -> str:
        return f"connector:slack:credential_{key}"

    @staticmethod
    def make_delay_lock(prefix: str) -> str:
        return f"{prefix}:delay_lock"

    @staticmethod
    def make_delay_key(prefix: str) -> str:
        return f"{prefix}:delay"

    @staticmethod
    def make_slack_web_client(
        prefix: str, token: str, max_retry_count: int, r: Redis
    ) -> WebClient:
        delay_lock = SlackConnector.make_delay_lock(prefix)
        delay_key = SlackConnector.make_delay_key(prefix)

        # NOTE: slack has a built in RateLimitErrorRetryHandler, but it isn't designed
        # for concurrent workers. We've extended it with OnyxRedisSlackRetryHandler.
        connection_error_retry_handler = ConnectionErrorRetryHandler(
            max_retry_count=max_retry_count,
            interval_calculator=FixedValueRetryIntervalCalculator(),
            error_types=[
                URLError,
                ConnectionResetError,
                RemoteDisconnected,
                IncompleteRead,
            ],
        )

        onyx_rate_limit_error_retry_handler = OnyxRedisSlackRetryHandler(
            max_retry_count=max_retry_count,
            delay_key=delay_key,
            r=r,
        )
        custom_retry_handlers: list[RetryHandler] = [
            connection_error_retry_handler,
            onyx_rate_limit_error_retry_handler,
        ]

        client = OnyxSlackWebClient(
            delay_lock=delay_lock,
            delay_key=delay_key,
            r=r,
            token=token,
            retry_handlers=custom_retry_handlers,
        )
        return client

    @property
    def channels(self) -> list[str] | None:
        return self._channels

    @channels.setter
    def channels(self, channels: list[str] | None) -> None:
        self._channels = (
            [channel.removeprefix("#") for channel in channels] if channels else None
        )

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        raise NotImplementedError("Use set_credentials_provider with this connector.")

    def set_credentials_provider(
        self, credentials_provider: CredentialsProviderInterface
    ) -> None:
        credentials = credentials_provider.get_credentials()
        tenant_id = credentials_provider.get_tenant_id()
        if not tenant_id:
            raise ValueError("tenant_id cannot be None!")

        bot_token = credentials["slack_bot_token"]

        if self.use_redis:
            self.redis = get_redis_client(tenant_id=tenant_id)
            self.credential_prefix = SlackConnector.make_credential_prefix(
                credentials_provider.get_provider_key()
            )

            self.client = SlackConnector.make_slack_web_client(
                self.credential_prefix, bot_token, self.MAX_RETRIES, self.redis
            )
        else:
            connection_error_retry_handler = ConnectionErrorRetryHandler(
                max_retry_count=self.MAX_RETRIES,
                interval_calculator=FixedValueRetryIntervalCalculator(),
                error_types=[
                    URLError,
                    ConnectionResetError,
                    RemoteDisconnected,
                    IncompleteRead,
                ],
            )

            self.client = WebClient(
                token=bot_token, retry_handlers=[connection_error_retry_handler]
            )

        # use for requests that must return quickly (e.g. realtime flows where user is waiting)
        self.fast_client = WebClient(
            token=bot_token, timeout=SlackConnector.FAST_TIMEOUT
        )
        self.text_cleaner = SlackTextCleaner(client=self.client)
        self.credentials_provider = credentials_provider

        # Extract workspace URL from auth_test response for building channel links
        try:
            auth_response = self.client.auth_test()
            self._workspace_url = auth_response.get("url")
        except Exception as e:
            logger.warning(f"Failed to get workspace URL from auth_test: {e}")
            self._workspace_url = None

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        if self.client is None:
            raise ConnectorMissingCredentialError("Slack")

        return _get_all_doc_ids(
            client=self.client,
            channels=self.channels,
            channel_name_regex_enabled=self.channel_regex_enabled,
            msg_filter_func=self.msg_filter_func,
            callback=callback,
            workspace_url=self._workspace_url,
            start=start,
            end=end,
        )

    def _load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: SlackCheckpoint,
        include_permissions: bool = False,
    ) -> CheckpointOutput[SlackCheckpoint]:
        """Rough outline:

        Step 1: Get all channels, yield back Checkpoint.
        Step 2: Loop through each channel. For each channel:
            Step 2.1: Get messages within the time range.
            Step 2.2: Process messages in parallel, yield back docs.
            Step 2.3: Update checkpoint with new_oldest, seen_thread_ts, and current_channel.
                      Slack returns messages from newest to oldest, so we need to keep track of
                      the latest message we've seen in each channel.
            Step 2.4: If there are no more messages in the channel, switch the current
                      channel to the next channel.
        """
        num_channels_remaining = 0

        if self.client is None or self.text_cleaner is None:
            raise ConnectorMissingCredentialError("Slack")

        checkpoint = cast(SlackCheckpoint, copy.deepcopy(checkpoint))

        # if this is the very first time we've called this, need to
        # get all relevant channels and save them into the checkpoint
        if checkpoint.channel_ids is None:
            raw_channels = get_channels(self.client)
            filtered_channels = filter_channels(
                raw_channels, self.channels, self.channel_regex_enabled
            )
            logger.info(
                f"Channels - initial checkpoint: all={len(raw_channels)} post_filtering={len(filtered_channels)}"
            )

            checkpoint.channel_ids = [c["id"] for c in filtered_channels]
            if len(filtered_channels) == 0:
                checkpoint.has_more = False
                return checkpoint

            checkpoint.current_channel = filtered_channels[0]
            if include_permissions:
                # checkpoint.current_channel is guaranteed to be non-None here since we just assigned it
                assert checkpoint.current_channel is not None
                channel_access = get_channel_access(
                    client=self.client,
                    channel=checkpoint.current_channel,
                    user_cache=self.user_cache,
                )
                checkpoint.current_channel_access = channel_access
            checkpoint.has_more = True
            return checkpoint

        final_channel_ids = checkpoint.channel_ids
        for channel_id in final_channel_ids:
            if channel_id not in checkpoint.channel_completion_map:
                num_channels_remaining += 1

        logger.info(
            f"Channels - current status: "
            f"processed={len(final_channel_ids) - num_channels_remaining} "
            f"remaining={num_channels_remaining} "
            f"total={len(final_channel_ids)}"
        )

        channel = checkpoint.current_channel
        if channel is None:
            raise ValueError("current_channel key not set in checkpoint")

        channel_id = channel["id"]
        if channel_id not in final_channel_ids:
            raise ValueError(f"Channel {channel_id} not found in checkpoint")

        channel_created = channel["created"]

        seen_thread_ts = set(checkpoint.seen_thread_ts)

        try:
            num_bot_filtered_messages = 0
            num_other_filtered_messages = 0

            oldest = str(start) if start else None
            latest = str(end)

            channel_message_ts = checkpoint.channel_completion_map.get(channel_id)
            if channel_message_ts:
                # Set oldest to the checkpoint timestamp to resume from where we left off
                oldest = channel_message_ts
            else:
                # First time processing this channel - yield its hierarchy node
                yield _channel_to_hierarchy_node(
                    channel,
                    checkpoint.current_channel_access,
                    self._workspace_url,
                )

            logger.debug(
                f"Getting messages for channel {channel} within range {oldest} - {latest}"
            )

            message_batch, has_more_in_channel = _get_messages(
                channel, self.client, oldest, latest
            )

            logger.info(
                f"Retrieved messages: {len(message_batch)=} {channel=} {oldest=} {latest=}"
            )

            # message_batch[0] is the newest message (Slack returns newest to oldest)
            new_oldest = message_batch[0]["ts"] if message_batch else latest

            num_threads_start = len(seen_thread_ts)

            # Process messages in parallel using ThreadPoolExecutor
            with ThreadPoolExecutor(max_workers=self.num_threads) as executor:
                # NOTE(rkuo): this seems to be assuming the slack sdk is thread safe.
                # That's a very bold assumption! Haven't seen a direct issue with this
                # yet, but likely not correct to rely on.

                futures: list[Future[ProcessedSlackMessage]] = []
                for message in message_batch:
                    # Capture the current context so that the thread gets the current tenant ID
                    current_context = contextvars.copy_context()
                    futures.append(
                        executor.submit(
                            current_context.run,
                            _process_message,
                            message=message,
                            client=self.client,
                            channel=channel,
                            slack_cleaner=self.text_cleaner,
                            user_cache=self.user_cache,
                            seen_thread_ts=seen_thread_ts,
                            channel_access=checkpoint.current_channel_access,
                            msg_filter_func=self.msg_filter_func,
                        )
                    )

                for future in as_completed(futures):
                    processed_slack_message = future.result()
                    doc = processed_slack_message.doc
                    thread_or_message_ts = processed_slack_message.thread_or_message_ts
                    failure = processed_slack_message.failure
                    if doc:
                        # handle race conditions here since this is single
                        # threaded. Multi-threaded _process_message reads from this
                        # but since this is single threaded, we won't run into simul
                        # writes. At worst, we can duplicate a thread, which will be
                        # deduped later on.
                        if thread_or_message_ts not in seen_thread_ts:
                            yield doc

                        seen_thread_ts.add(thread_or_message_ts)
                    elif processed_slack_message.filter_reason:
                        if (
                            processed_slack_message.filter_reason
                            == SlackMessageFilterReason.BOT
                        ):
                            num_bot_filtered_messages += 1
                        else:
                            num_other_filtered_messages += 1
                    elif failure:
                        yield failure

            num_threads_processed = len(seen_thread_ts) - num_threads_start

            # calculate a percentage progress for the current channel by determining
            # how much of the time range we've processed so far
            new_oldest_seconds_epoch = SecondsSinceUnixEpoch(new_oldest)
            range_start = start if start else max(0, channel_created)
            if new_oldest_seconds_epoch < range_start:
                range_complete = 0.0
            else:
                range_complete = new_oldest_seconds_epoch - range_start

            range_total = end - range_start
            if range_total <= 0:
                range_total = 1
            range_percent_complete = range_complete / range_total * 100.0

            num_filtered = num_bot_filtered_messages + num_other_filtered_messages
            log_func = logger.warning if num_bot_filtered_messages > 0 else logger.info
            log_func(
                f"Message processing stats: "
                f"batch_len={len(message_batch)} "
                f"batch_yielded={num_threads_processed} "
                f"filtered={num_filtered} "
                f"(bot={num_bot_filtered_messages} other={num_other_filtered_messages}) "
                f"total_threads_seen={len(seen_thread_ts)}"
            )

            logger.info(
                f"Current channel processing stats: {range_start=} range_end={end} percent_complete={range_percent_complete=:.2f}"
            )

            checkpoint.seen_thread_ts = list(seen_thread_ts)
            checkpoint.channel_completion_map[channel["id"]] = new_oldest

            # bypass channels where the first set of messages seen are all
            # filtered (bots + disallowed subtypes like channel_join)
            # check at least MIN_BOT_MESSAGE_THRESHOLD messages are in the batch
            # we shouldn't skip based on a small sampling of messages
            if (
                channel_message_ts is None
                and len(message_batch) > SlackConnector.BOT_CHANNEL_MIN_BATCH_SIZE
            ):
                if (
                    num_filtered
                    > SlackConnector.BOT_CHANNEL_PERCENTAGE_THRESHOLD
                    * len(message_batch)
                ):
                    logger.warning(
                        "Bypassing this channel since it appears to be mostly bot messages"
                    )
                    has_more_in_channel = False

            if not has_more_in_channel:
                num_channels_remaining -= 1

                new_channel_id = next(
                    (
                        channel_id
                        for channel_id in final_channel_ids
                        if channel_id not in checkpoint.channel_completion_map
                    ),
                    None,
                )

                if new_channel_id:
                    new_channel = _get_channel_by_id(self.client, new_channel_id)
                    checkpoint.current_channel = new_channel
                    if include_permissions:
                        channel_access = get_channel_access(
                            client=self.client,
                            channel=new_channel,
                            user_cache=self.user_cache,
                        )
                        checkpoint.current_channel_access = channel_access
                else:
                    checkpoint.current_channel = None

            checkpoint.has_more = checkpoint.current_channel is not None

            channels_processed = len(final_channel_ids) - num_channels_remaining
            channels_percent_complete = (
                channels_processed / len(final_channel_ids) * 100.0
            )
            logger.info(
                f"All channels processing stats: "
                f"processed={len(final_channel_ids) - num_channels_remaining} "
                f"remaining={num_channels_remaining} "
                f"total={len(final_channel_ids)} "
                f"percent_complete={channels_percent_complete:.2f}"
            )
        except Exception as e:
            logger.exception(f"Error processing channel {channel['name']}")
            yield ConnectorFailure(
                failed_entity=EntityFailure(
                    entity_id=channel["id"],
                    missed_time_range=(
                        datetime.fromtimestamp(start, tz=timezone.utc),
                        datetime.fromtimestamp(end, tz=timezone.utc),
                    ),
                ),
                failure_message=str(e),
                exception=e,
            )

        return checkpoint

    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: SlackCheckpoint,
    ) -> CheckpointOutput[SlackCheckpoint]:
        return self._load_from_checkpoint(
            start, end, checkpoint, include_permissions=False
        )

    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: SlackCheckpoint,
    ) -> CheckpointOutput[SlackCheckpoint]:
        return self._load_from_checkpoint(
            start, end, checkpoint, include_permissions=True
        )

    def validate_connector_settings(self) -> None:
        """
        1. Verify the bot token is valid for the workspace (via auth_test).
        2. Ensure the bot has enough scope to list channels.
        3. Check that every channel specified in self.channels exists (only when regex is not enabled).
        """
        if self.fast_client is None:
            raise ConnectorMissingCredentialError("Slack credentials not loaded.")

        try:
            # 1) Validate connection to workspace
            auth_response = self.fast_client.auth_test()
            if not auth_response.get("ok", False):
                error_msg = auth_response.get(
                    "error", "Unknown error from Slack auth_test"
                )
                raise ConnectorValidationError(f"Failed Slack auth_test: {error_msg}")

            # 2) Minimal test to confirm listing channels works
            test_resp = self.fast_client.conversations_list(
                limit=1, types=["public_channel"]
            )
            if not test_resp.get("ok", False):
                error_msg = test_resp.get("error", "Unknown error from Slack")
                if error_msg == "invalid_auth":
                    raise ConnectorValidationError(
                        f"Invalid Slack bot token ({error_msg})."
                    )
                elif error_msg == "not_authed":
                    raise CredentialExpiredError(
                        f"Invalid or expired Slack bot token ({error_msg})."
                    )
                raise UnexpectedValidationError(
                    f"Slack API returned a failure: {error_msg}"
                )

            # 3) If channels are specified and regex is not enabled, verify each is accessible
            # NOTE: removed this for now since it may be too slow for large workspaces which may
            # have some automations which create a lot of channels (100k+)

            # if self.channels and not self.channel_regex_enabled:
            #     accessible_channels = get_channels(
            #         client=self.fast_client,
            #         exclude_archived=True,
            #         get_public=True,
            #         get_private=True,
            #     )
            #     # For quick lookups by name or ID, build a map:
            #     accessible_channel_names = {ch["name"] for ch in accessible_channels}
            #     accessible_channel_ids = {ch["id"] for ch in accessible_channels}

            #     for user_channel in self.channels:
            #         if (
            #             user_channel not in accessible_channel_names
            #             and user_channel not in accessible_channel_ids
            #         ):
            #             raise ConnectorValidationError(
            #                 f"Channel '{user_channel}' not found or inaccessible in this workspace."
            #             )

        except SlackApiError as e:
            slack_error = e.response.get("error", "")
            if slack_error == "ratelimited":
                # Handle rate limiting specifically
                retry_after = int(e.response.headers.get("Retry-After", 1))
                logger.warning(
                    f"Slack API rate limited during validation. Retry suggested after {retry_after} seconds. "
                    "Proceeding with validation, but be aware that connector operations might be throttled."
                )
                # Continue validation without failing - the connector is likely valid but just rate limited
                return
            elif slack_error == "missing_scope":
                raise InsufficientPermissionsError(
                    "Slack bot token lacks the necessary scope to list/access channels. "
                    "Please ensure your Slack app has 'channels:read' (and/or 'groups:read' for private channels)."
                )
            elif slack_error == "invalid_auth":
                raise CredentialExpiredError(
                    f"Invalid Slack bot token ({slack_error})."
                )
            elif slack_error == "not_authed":
                raise CredentialExpiredError(
                    f"Invalid or expired Slack bot token ({slack_error})."
                )
            raise UnexpectedValidationError(
                f"Unexpected Slack error '{slack_error}' during settings validation."
            )
        except ConnectorValidationError as e:
            raise e
        except Exception as e:
            raise UnexpectedValidationError(
                f"Unexpected error during Slack settings validation: {e}"
            )

    @override
    def build_dummy_checkpoint(self) -> SlackCheckpoint:
        return SlackCheckpoint(
            channel_ids=None,
            channel_completion_map={},
            current_channel=None,
            current_channel_access=None,
            seen_thread_ts=[],
            has_more=True,
        )

    @override
    def validate_checkpoint_json(self, checkpoint_json: str) -> SlackCheckpoint:
        return SlackCheckpoint.model_validate_json(checkpoint_json)


if __name__ == "__main__":
    import os
    import time
    from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider
    from shared_configs.contextvars import get_current_tenant_id

    slack_channel = os.environ.get("SLACK_CHANNEL")
    connector = SlackConnector(
        channels=[slack_channel] if slack_channel else None,
    )

    provider = OnyxStaticCredentialsProvider(
        tenant_id=get_current_tenant_id(),
        connector_name="slack",
        credential_json={
            "slack_bot_token": os.environ["SLACK_BOT_TOKEN"],
        },
    )
    connector.set_credentials_provider(provider)

    current = time.time()
    one_day_ago = current - 24 * 60 * 60  # 1 day

    checkpoint = connector.build_dummy_checkpoint()

    gen = connector.load_from_checkpoint(
        one_day_ago,
        current,
        cast(SlackCheckpoint, checkpoint),
    )
    try:
        for document_or_failure in gen:
            if isinstance(document_or_failure, Document):
                print(document_or_failure)
            elif isinstance(document_or_failure, ConnectorFailure):
                print(document_or_failure)
    except StopIteration as e:
        checkpoint = e.value
        print("Next checkpoint:", checkpoint)

    print("Next checkpoint:", checkpoint)


================================================
FILE: backend/onyx/connectors/slack/models.py
================================================
from typing import NotRequired

from typing_extensions import TypedDict


class ChannelTopicPurposeType(TypedDict):
    """
    Represents the topic or purpose of a Slack channel.
    """

    value: str
    creator: str
    last_set: int


class ChannelType(TypedDict):
    """
    Represents a Slack channel.
    """

    id: str
    name: str
    is_channel: bool
    is_group: bool
    is_im: bool
    created: int
    creator: str
    is_archived: bool
    is_general: bool
    unlinked: int
    name_normalized: str
    is_shared: bool
    is_ext_shared: bool
    is_org_shared: bool
    pending_shared: list[str]
    is_pending_ext_shared: bool
    is_member: bool
    is_private: bool
    is_mpim: bool
    updated: int
    topic: ChannelTopicPurposeType
    purpose: ChannelTopicPurposeType
    previous_names: list[str]
    num_members: int


class AttachmentType(TypedDict):
    """
    Represents a Slack message attachment.
    """

    service_name: NotRequired[str]
    text: NotRequired[str]
    fallback: NotRequired[str]
    thumb_url: NotRequired[str]
    thumb_width: NotRequired[int]
    thumb_height: NotRequired[int]
    id: NotRequired[int]


class BotProfileType(TypedDict):
    """
    Represents a Slack bot profile.
    """

    id: NotRequired[str]
    deleted: NotRequired[bool]
    name: NotRequired[str]
    updated: NotRequired[int]
    app_id: NotRequired[str]
    team_id: NotRequired[str]


class MessageType(TypedDict):
    """
    Represents a Slack message.
    """

    type: str
    user: str
    text: str
    ts: str
    attachments: NotRequired[list[AttachmentType]]
    # Bot-related fields
    bot_id: NotRequired[str]
    app_id: NotRequired[str]
    bot_profile: NotRequired[BotProfileType]
    # Message threading
    thread_ts: NotRequired[str]
    # Message subtype (for filtering certain message types)
    subtype: NotRequired[str]


# list of messages in a thread
ThreadType = list[MessageType]


================================================
FILE: backend/onyx/connectors/slack/onyx_retry_handler.py
================================================
import random
from typing import cast
from typing import Optional

from redis import Redis
from slack_sdk.http_retry.handler import RetryHandler
from slack_sdk.http_retry.request import HttpRequest
from slack_sdk.http_retry.response import HttpResponse
from slack_sdk.http_retry.state import RetryState

from onyx.utils.logger import setup_logger

logger = setup_logger()


class OnyxRedisSlackRetryHandler(RetryHandler):
    """
    This class uses Redis to share a rate limit among multiple threads.

    As currently implemented, this code is already surrounded by a lock in Redis
    via an override of _perform_urllib_http_request in OnyxSlackWebClient.

    This just sets the desired retry delay with TTL in redis. In conjunction with
    a custom subclass of the client, the value is read and obeyed prior to an API call
    and also serialized.

    Another way to do this is just to do exponential backoff. Might be easier?

    Adapted from slack's RateLimitErrorRetryHandler.
    """

    """RetryHandler that does retries for rate limited errors."""

    def __init__(
        self,
        max_retry_count: int,
        delay_key: str,
        r: Redis,
    ):
        """
        delay_lock: the redis key to use with RedisLock (to synchronize access to delay_key)
        delay_key: the redis key containing a shared TTL
        """
        super().__init__(max_retry_count=max_retry_count)
        self._redis: Redis = r
        self._delay_key = delay_key

    def _can_retry(
        self,
        *,
        state: RetryState,  # noqa: ARG002
        request: HttpRequest,  # noqa: ARG002
        response: Optional[HttpResponse] = None,
        error: Optional[Exception] = None,  # noqa: ARG002
    ) -> bool:
        return response is not None and response.status_code == 429

    def prepare_for_next_attempt(
        self,
        *,
        state: RetryState,
        request: HttpRequest,  # noqa: ARG002
        response: Optional[HttpResponse] = None,
        error: Optional[Exception] = None,
    ) -> None:
        """As initially designed by the SDK authors, this function is responsible for
        the wait to retry ... aka we actually sleep in this function.

        This doesn't work well with multiple clients because every thread is unaware
        of the current retry value until it actually calls the endpoint.

        We're combining this with an actual subclass of the slack web client so
        that the delay is used BEFORE calling an API endpoint. The subclassed client
        has already taken the lock in redis when this method is called.
        """
        ttl_ms: int | None = None

        retry_after_value: str | None = None
        retry_after_header_name: Optional[str] = None
        duration_s: float = 1.0  # seconds

        if response is None:
            # NOTE(rkuo): this logic comes from RateLimitErrorRetryHandler.
            # This reads oddly, as if the caller itself could raise the exception.
            # We don't have the luxury of changing this.
            if error:
                raise error

            return

        state.next_attempt_requested = True  # this signals the caller to retry

        # calculate wait duration based on retry-after + some jitter
        for k in response.headers.keys():
            if k.lower() == "retry-after":
                retry_after_header_name = k
                break

        try:
            if retry_after_header_name is None:
                # This situation usually does not arise. Just in case.
                raise ValueError(
                    "OnyxRedisSlackRetryHandler.prepare_for_next_attempt: retry-after header name is None"
                )

            retry_after_header_value = response.headers.get(retry_after_header_name)
            if not retry_after_header_value:
                raise ValueError(
                    "OnyxRedisSlackRetryHandler.prepare_for_next_attempt: retry-after header value is None"
                )

            # Handle case where header value might be a list
            retry_after_value = (
                retry_after_header_value[0]
                if isinstance(retry_after_header_value, list)
                else retry_after_header_value
            )

            retry_after_value_int = int(
                retry_after_value
            )  # will raise ValueError if somehow we can't convert to int
            jitter = retry_after_value_int * 0.25 * random.random()
            duration_s = retry_after_value_int + jitter
        except ValueError:
            duration_s += random.random()

        # Read and extend the ttl
        ttl_ms = cast(int, self._redis.pttl(self._delay_key))
        if ttl_ms < 0:  # negative values are error status codes ... see docs
            ttl_ms = 0
        ttl_ms_new = ttl_ms + int(duration_s * 1000.0)
        self._redis.set(self._delay_key, "1", px=ttl_ms_new)

        logger.warning(
            f"OnyxRedisSlackRetryHandler.prepare_for_next_attempt setting delay: "
            f"current_attempt={state.current_attempt} "
            f"retry-after={retry_after_value} "
            f"{ttl_ms_new=}"
        )

        state.increment_current_attempt()


================================================
FILE: backend/onyx/connectors/slack/onyx_slack_web_client.py
================================================
import threading
import time
from typing import Any
from typing import cast
from typing import Dict
from urllib.request import Request

from redis import Redis
from redis.lock import Lock as RedisLock
from slack_sdk import WebClient

from onyx.connectors.slack.utils import ONYX_SLACK_LOCK_BLOCKING_TIMEOUT
from onyx.connectors.slack.utils import ONYX_SLACK_LOCK_TOTAL_BLOCKING_TIMEOUT
from onyx.connectors.slack.utils import ONYX_SLACK_LOCK_TTL
from onyx.utils.logger import setup_logger

logger = setup_logger()


class OnyxSlackWebClient(WebClient):
    """Use in combination with the Onyx Retry Handler.

    This client wrapper enforces a proper retry delay through redis BEFORE the api call
    so that multiple clients can synchronize and rate limit properly.

    The retry handler writes the correct delay value to redis so that it is can be used
    by this wrapper.

    """

    def __init__(
        self, delay_lock: str, delay_key: str, r: Redis, *args: Any, **kwargs: Any
    ) -> None:
        super().__init__(*args, **kwargs)
        self._delay_key = delay_key
        self._delay_lock = delay_lock
        self._redis: Redis = r
        self.num_requests: int = 0
        self._lock = threading.Lock()

    def _perform_urllib_http_request(
        self, *, url: str, args: Dict[str, Dict[str, Any]]
    ) -> Dict[str, Any]:
        """By locking around the base class method, we ensure that both the delay from
        Redis and parsing/writing of retry values to Redis are handled properly in
        one place"""
        # lock and extend the ttl
        lock: RedisLock = self._redis.lock(
            self._delay_lock,
            timeout=ONYX_SLACK_LOCK_TTL,
        )

        # try to acquire the lock
        start = time.monotonic()
        while True:
            acquired = lock.acquire(blocking_timeout=ONYX_SLACK_LOCK_BLOCKING_TIMEOUT)
            if acquired:
                break

            # if we couldn't acquire the lock but it exists, there's at least some activity
            # so keep trying...
            if self._redis.exists(self._delay_lock):
                continue

            if time.monotonic() - start > ONYX_SLACK_LOCK_TOTAL_BLOCKING_TIMEOUT:
                raise RuntimeError(
                    f"OnyxSlackWebClient._perform_urllib_http_request - "
                    f"timed out waiting for lock: {ONYX_SLACK_LOCK_TOTAL_BLOCKING_TIMEOUT=}"
                )

        try:
            result = super()._perform_urllib_http_request(url=url, args=args)
        finally:
            if lock.owned():
                lock.release()
            else:
                logger.warning(
                    "OnyxSlackWebClient._perform_urllib_http_request lock not owned on release"
                )

        time.monotonic() - start
        # logger.info(
        #     f"OnyxSlackWebClient._perform_urllib_http_request: Releasing lock: {elapsed=}"
        # )

        return result

    def _perform_urllib_http_request_internal(
        self,
        url: str,
        req: Request,
    ) -> Dict[str, Any]:
        """Overrides the internal method which is mostly the direct call to
        urllib/urlopen ... so this is a good place to perform our delay."""

        # read and execute the delay
        delay_ms = cast(int, self._redis.pttl(self._delay_key))
        if delay_ms < 0:  # negative values are error status codes ... see docs
            delay_ms = 0

        if delay_ms > 0:
            logger.warning(
                f"OnyxSlackWebClient._perform_urllib_http_request_internal delay: {delay_ms=} {self.num_requests=}"
            )

            time.sleep(delay_ms / 1000.0)

        result = super()._perform_urllib_http_request_internal(url, req)

        with self._lock:
            self.num_requests += 1

        # the delay key should have naturally expired by this point
        return result


================================================
FILE: backend/onyx/connectors/slack/utils.py
================================================
import re
from collections.abc import Callable
from collections.abc import Generator
from functools import lru_cache
from functools import wraps
from typing import Any
from typing import cast

from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_sdk.web import SlackResponse

from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.slack.models import MessageType
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()

# retry after 0.1, 1.2, 3.4, 7.8, 16.6, 34.2 seconds
basic_retry_wrapper = retry_builder(tries=7)
# number of messages we request per page when fetching paginated slack messages
_SLACK_LIMIT = 900

# used to serialize access to the retry TTL
ONYX_SLACK_LOCK_TTL = 1800  # how long the lock is allowed to idle before it expires
ONYX_SLACK_LOCK_BLOCKING_TIMEOUT = 60  # how long to wait for the lock per wait attempt
ONYX_SLACK_LOCK_TOTAL_BLOCKING_TIMEOUT = 3600  # how long to wait for the lock in total


@lru_cache()
def get_base_url(token: str) -> str:
    """Retrieve and cache the base URL of the Slack workspace based on the client token."""
    client = WebClient(token=token)
    return client.auth_test()["url"]


def get_message_link(event: MessageType, client: WebClient, channel_id: str) -> str:
    message_ts = event["ts"]
    message_ts_without_dot = message_ts.replace(".", "")
    thread_ts = event.get("thread_ts")
    base_url = get_base_url(client.token)

    link = f"{base_url.rstrip('/')}/archives/{channel_id}/p{message_ts_without_dot}" + (
        f"?thread_ts={thread_ts}" if thread_ts else ""
    )
    return link


def make_slack_api_call(
    call: Callable[..., SlackResponse], **kwargs: Any
) -> SlackResponse:
    return call(**kwargs)


def make_paginated_slack_api_call(
    call: Callable[..., SlackResponse], **kwargs: Any
) -> Generator[dict[str, Any], None, None]:
    return _make_slack_api_call_paginated(call)(**kwargs)


def _make_slack_api_call_paginated(
    call: Callable[..., SlackResponse],
) -> Callable[..., Generator[dict[str, Any], None, None]]:
    """Wraps calls to slack API so that they automatically handle pagination"""

    @wraps(call)
    def paginated_call(**kwargs: Any) -> Generator[dict[str, Any], None, None]:
        cursor: str | None = None
        has_more = True
        while has_more:
            response = call(cursor=cursor, limit=_SLACK_LIMIT, **kwargs)
            yield cast(dict[str, Any], response.validate())
            cursor = cast(dict[str, Any], response.get("response_metadata", {})).get(
                "next_cursor", ""
            )
            has_more = bool(cursor)

    return paginated_call


# NOTE(rkuo): we may not need this any more if the integrated retry handlers work as
# expected.  Do we want to keep this around?

# def make_slack_api_rate_limited(
#     call: Callable[..., SlackResponse], max_retries: int = 7
# ) -> Callable[..., SlackResponse]:
#     """Wraps calls to slack API so that they automatically handle rate limiting"""

#     @wraps(call)
#     def rate_limited_call(**kwargs: Any) -> SlackResponse:
#         last_exception = None

#         for _ in range(max_retries):
#             try:
#                 # Make the API call
#                 response = call(**kwargs)

#                 # Check for errors in the response, will raise `SlackApiError`
#                 # if anything went wrong
#                 response.validate()
#                 return response

#             except SlackApiError as e:
#                 last_exception = e
#                 try:
#                     error = e.response["error"]
#                 except KeyError:
#                     error = "unknown error"

#                 if error == "ratelimited":
#                     # Handle rate limiting: get the 'Retry-After' header value and sleep for that duration
#                     retry_after = int(e.response.headers.get("Retry-After", 1))
#                     logger.info(
#                         f"Slack call rate limited, retrying after {retry_after} seconds. Exception: {e}"
#                     )
#                     time.sleep(retry_after)
#                 elif error in ["already_reacted", "no_reaction", "internal_error"]:
#                     # Log internal_error and return the response instead of failing
#                     logger.warning(
#                         f"Slack call encountered '{error}', skipping and continuing..."
#                     )
#                     return e.response
#                 else:
#                     # Raise the error for non-transient errors
#                     raise

#         # If the code reaches this point, all retries have been exhausted
#         msg = f"Max retries ({max_retries}) exceeded"
#         if last_exception:
#             raise Exception(msg) from last_exception
#         else:
#             raise Exception(msg)

#     return rate_limited_call

# temporarily disabling due to using a different retry approach
# might be permanent if everything works out
# def make_slack_api_call_w_retries(
#     call: Callable[..., SlackResponse], **kwargs: Any
# ) -> SlackResponse:
#     return basic_retry_wrapper(call)(**kwargs)


# def make_paginated_slack_api_call_w_retries(
#     call: Callable[..., SlackResponse], **kwargs: Any
# ) -> Generator[dict[str, Any], None, None]:
#     return _make_slack_api_call_paginated(basic_retry_wrapper(call))(**kwargs)


def expert_info_from_slack_id(
    user_id: str | None,
    client: WebClient,
    user_cache: dict[str, BasicExpertInfo | None],
) -> BasicExpertInfo | None:
    if not user_id:
        return None

    if user_id in user_cache:
        return user_cache[user_id]

    response = client.users_info(user=user_id)

    if not response["ok"]:
        user_cache[user_id] = None
        return None

    user: dict = cast(dict[Any, dict], response.data).get("user", {})
    profile = user.get("profile", {})

    expert = BasicExpertInfo(
        display_name=user.get("real_name") or profile.get("display_name"),
        first_name=profile.get("first_name"),
        last_name=profile.get("last_name"),
        email=profile.get("email"),
    )

    user_cache[user_id] = expert

    return expert


class SlackTextCleaner:
    """Utility class to replace user IDs with usernames in a message.
    Handles caching, so the same request is not made multiple times
    for the same user ID"""

    def __init__(self, client: WebClient) -> None:
        self._client = client
        self._id_to_name_map: dict[str, str] = {}

    def _get_slack_name(self, user_id: str) -> str:
        if user_id not in self._id_to_name_map:
            try:
                response = self._client.users_info(user=user_id)
                # prefer display name if set, since that is what is shown in Slack
                self._id_to_name_map[user_id] = (
                    response["user"]["profile"]["display_name"]
                    or response["user"]["profile"]["real_name"]
                )
            except SlackApiError as e:
                logger.exception(
                    f"Error fetching data for user {user_id}: {e.response['error']}"
                )
                raise

        return self._id_to_name_map[user_id]

    def _replace_user_ids_with_names(self, message: str) -> str:
        # Find user IDs in the message
        user_ids = re.findall("<@(.*?)>", message)

        # Iterate over each user ID found
        for user_id in user_ids:
            try:
                if user_id in self._id_to_name_map:
                    user_name = self._id_to_name_map[user_id]
                else:
                    user_name = self._get_slack_name(user_id)

                # Replace the user ID with the username in the message
                message = message.replace(f"<@{user_id}>", f"@{user_name}")
            except Exception:
                logger.exception(
                    f"Unable to replace user ID with username for user_id '{user_id}'"
                )

        return message

    def index_clean(self, message: str) -> str:
        """During indexing, replace pattern sets that may cause confusion to the model
        Some special patterns are left in as they can provide information
        ie. links that contain format text|link, both the text and the link may be informative
        """
        message = self._replace_user_ids_with_names(message)
        message = self.replace_tags_basic(message)
        message = self.replace_channels_basic(message)
        message = self.replace_special_mentions(message)
        message = self.replace_special_catchall(message)
        return message

    @staticmethod
    def replace_tags_basic(message: str) -> str:
        """Simply replaces all tags with `@<USER_ID>` in order to prevent us from
        tagging users in Slack when we don't want to"""
        # Find user IDs in the message
        user_ids = re.findall("<@(.*?)>", message)
        for user_id in user_ids:
            message = message.replace(f"<@{user_id}>", f"@{user_id}")
        return message

    @staticmethod
    def replace_channels_basic(message: str) -> str:
        """Simply replaces all channel mentions with `#<CHANNEL_ID>` in order
        to make a message work as part of a link"""
        # Find user IDs in the message
        channel_matches = re.findall(r"<#(.*?)\|(.*?)>", message)
        for channel_id, channel_name in channel_matches:
            message = message.replace(
                f"<#{channel_id}|{channel_name}>", f"#{channel_name}"
            )
        return message

    @staticmethod
    def replace_special_mentions(message: str) -> str:
        """Simply replaces @channel, @here, and @everyone so we don't tag
        a bunch of people in Slack when we don't want to"""
        # Find user IDs in the message
        message = message.replace("<!channel>", "@channel")
        message = message.replace("<!here>", "@here")
        message = message.replace("<!everyone>", "@everyone")
        return message

    @staticmethod
    def replace_special_catchall(message: str) -> str:
        """Replaces pattern of <!something|another-thing> with another-thing
        This is added for <!subteam^TEAM-ID|@team-name> but may match other cases as well
        """

        pattern = r"<!([^|]+)\|([^>]+)>"
        return re.sub(pattern, r"\2", message)

    @staticmethod
    def add_zero_width_whitespace_after_tag(message: str) -> str:
        """Add a 0 width whitespace after every @"""
        return message.replace("@", "@\u200b")


================================================
FILE: backend/onyx/connectors/teams/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/teams/connector.py
================================================
import copy
import os
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast

import msal  # type: ignore
from office365.graph_client import GraphClient  # type: ignore
from office365.runtime.client_request_exception import ClientRequestException  # type: ignore
from office365.runtime.http.request_options import RequestOptions  # type: ignore[import-untyped]
from office365.teams.channels.channel import Channel  # type: ignore
from office365.teams.team import Team  # type: ignore

from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.microsoft_graph_env import resolve_microsoft_environment
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import EntityFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.connectors.teams.models import Message
from onyx.connectors.teams.utils import fetch_expert_infos
from onyx.connectors.teams.utils import fetch_external_access
from onyx.connectors.teams.utils import fetch_messages
from onyx.connectors.teams.utils import fetch_replies
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout

logger = setup_logger()

_SLIM_DOC_BATCH_SIZE = 5000


class TeamsCheckpoint(ConnectorCheckpoint):
    todo_team_ids: list[str] | None = None


DEFAULT_AUTHORITY_HOST = "https://login.microsoftonline.com"
DEFAULT_GRAPH_API_HOST = "https://graph.microsoft.com"


class TeamsConnector(
    CheckpointedConnectorWithPermSync[TeamsCheckpoint],
    SlimConnectorWithPermSync,
):
    MAX_WORKERS = 10

    def __init__(
        self,
        # TODO: (chris) move from "Display Names" to IDs, since display names
        # are not necessarily guaranteed to be unique
        teams: list[str] = [],
        max_workers: int = MAX_WORKERS,
        authority_host: str = DEFAULT_AUTHORITY_HOST,
        graph_api_host: str = DEFAULT_GRAPH_API_HOST,
    ) -> None:
        self.graph_client: GraphClient | None = None
        self.msal_app: msal.ConfidentialClientApplication | None = None
        self.max_workers = max_workers
        self.requested_team_list: list[str] = teams

        resolved_env = resolve_microsoft_environment(graph_api_host, authority_host)
        self._azure_environment = resolved_env.environment
        self.authority_host = resolved_env.authority_host
        self.graph_api_host = resolved_env.graph_host

    # impls for BaseConnector

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        teams_client_id = credentials["teams_client_id"]
        teams_client_secret = credentials["teams_client_secret"]
        teams_directory_id = credentials["teams_directory_id"]

        authority_url = f"{self.authority_host}/{teams_directory_id}"
        self.msal_app = msal.ConfidentialClientApplication(
            authority=authority_url,
            client_id=teams_client_id,
            client_credential=teams_client_secret,
        )

        def _acquire_token_func() -> dict[str, Any]:
            """
            Acquire token via MSAL
            """
            if self.msal_app is None:
                raise RuntimeError("MSAL app is not initialized")

            token = self.msal_app.acquire_token_for_client(
                scopes=[f"{self.graph_api_host}/.default"]
            )

            if not isinstance(token, dict):
                raise RuntimeError("`token` instance must be of type dict")

            return token

        self.graph_client = GraphClient(
            _acquire_token_func, environment=self._azure_environment
        )
        return None

    def validate_connector_settings(self) -> None:
        if self.graph_client is None:
            raise ConnectorMissingCredentialError("Teams credentials not loaded.")

        # Check if any requested teams have special characters that need client-side filtering
        has_special_chars = _has_odata_incompatible_chars(self.requested_team_list)
        if has_special_chars:
            logger.info(
                "Some requested team names contain special characters (&, (, )) that require "
                "client-side filtering during data retrieval."
            )

        # Minimal validation: just check if we can access the teams endpoint
        timeout = 10  # Short timeout for basic validation

        try:
            # For validation, do a lightweight check instead of full team search
            logger.info(
                f"Requested team count: {len(self.requested_team_list) if self.requested_team_list else 0}, "
                f"Has special chars: {has_special_chars}"
            )

            validation_query = self.graph_client.teams.get().top(1)
            run_with_timeout(
                timeout=timeout,
                func=lambda: validation_query.execute_query(),
            )

            logger.info(
                "Teams validation successful - Access to teams endpoint confirmed"
            )

        except TimeoutError as e:
            raise ConnectorValidationError(
                f"Timeout while validating Teams access (waited {timeout}s). "
                f"This may indicate network issues or authentication problems. "
                f"Error: {e}"
            )

        except ClientRequestException as e:
            if not e.response:
                raise RuntimeError(f"No response provided in error; {e=}")
            status_code = e.response.status_code
            if status_code == 401:
                raise CredentialExpiredError(
                    "Invalid or expired Microsoft Teams credentials (401 Unauthorized)."
                )
            elif status_code == 403:
                raise InsufficientPermissionsError(
                    "Your app lacks sufficient permissions to read Teams (403 Forbidden)."
                )
            raise UnexpectedValidationError(f"Unexpected error retrieving teams: {e}")

        except Exception as e:
            error_str = str(e).lower()
            if (
                "unauthorized" in error_str
                or "401" in error_str
                or "invalid_grant" in error_str
            ):
                raise CredentialExpiredError(
                    "Invalid or expired Microsoft Teams credentials."
                )
            elif "forbidden" in error_str or "403" in error_str:
                raise InsufficientPermissionsError(
                    "App lacks required permissions to read from Microsoft Teams."
                )
            raise ConnectorValidationError(
                f"Unexpected error during Teams validation: {e}"
            )

    # impls for CheckpointedConnector

    def build_dummy_checkpoint(self) -> TeamsCheckpoint:
        return TeamsCheckpoint(
            has_more=True,
        )

    def validate_checkpoint_json(self, checkpoint_json: str) -> TeamsCheckpoint:
        return TeamsCheckpoint.model_validate_json(checkpoint_json)

    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,  # noqa: ARG002
        checkpoint: TeamsCheckpoint,
    ) -> CheckpointOutput[TeamsCheckpoint]:
        if self.graph_client is None:
            raise ConnectorMissingCredentialError("Teams")

        checkpoint = cast(TeamsCheckpoint, copy.deepcopy(checkpoint))

        todos = checkpoint.todo_team_ids

        if todos is None:
            teams = _collect_all_teams(
                graph_client=self.graph_client,
                requested=self.requested_team_list,
            )
            todo_team_ids = [team.id for team in teams if team.id]
            return TeamsCheckpoint(
                todo_team_ids=todo_team_ids,
                has_more=bool(todo_team_ids),
            )

        # `todos.pop()` should always return an element. This is because if
        # `todos` was the empty list, then we would have set `has_more=False`
        # during the previous invocation of `TeamsConnector.load_from_checkpoint`,
        # meaning that this function wouldn't have been called in the first place.
        todo_team_id = todos.pop()
        team = _get_team_by_id(
            graph_client=self.graph_client,
            team_id=todo_team_id,
        )
        channels = _collect_all_channels_from_team(
            team=team,
        )

        # An iterator of channels, in which each channel is an iterator of docs.
        channels_docs = [
            _collect_documents_for_channel(
                graph_client=self.graph_client,
                team=team,
                channel=channel,
                start=start,
            )
            for channel in channels
        ]

        # Was previously `for doc in parallel_yield(gens=docs, max_workers=self.max_workers): ...`.
        # However, that lead to some weird exceptions (potentially due to non-thread-safe behaviour in the Teams library).
        # Reverting back to the non-threaded case for now.
        for channel_docs in channels_docs:
            for channel_doc in channel_docs:
                if channel_doc:
                    yield channel_doc

        logger.info(
            f"Processed team with id {todo_team_id}; {len(todos)} team(s) left to process"
        )

        return TeamsCheckpoint(
            todo_team_ids=todos,
            has_more=bool(todos),
        )

    def load_from_checkpoint_with_perm_sync(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: TeamsCheckpoint,
    ) -> CheckpointOutput[TeamsCheckpoint]:
        # Teams already fetches external_access (permissions) for each document
        # in _convert_thread_to_document, so we can just delegate to load_from_checkpoint
        return self.load_from_checkpoint(start, end, checkpoint)

    # impls for SlimConnectorWithPermSync

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        start = start or 0

        teams = _collect_all_teams(
            graph_client=self.graph_client,
            requested=self.requested_team_list,
        )

        for team in teams:
            if not team.id:
                logger.warning(
                    f"Expected a team with an id, instead got no id: {team=}"
                )
                continue

            channels = _collect_all_channels_from_team(
                team=team,
            )

            for channel in channels:
                if not channel.id:
                    logger.warning(
                        f"Expected a channel with an id, instead got no id: {channel=}"
                    )
                    continue

                external_access = fetch_external_access(
                    graph_client=self.graph_client, channel=channel
                )

                messages = fetch_messages(
                    graph_client=self.graph_client,
                    team_id=team.id,
                    channel_id=channel.id,
                    start=start,
                )

                slim_doc_buffer: list[SlimDocument | HierarchyNode] = []

                for message in messages:
                    slim_doc_buffer.append(
                        SlimDocument(
                            id=message.id,
                            external_access=external_access,
                        )
                    )

                    if len(slim_doc_buffer) >= _SLIM_DOC_BATCH_SIZE:
                        if callback:
                            if callback.should_stop():
                                raise RuntimeError(
                                    "retrieve_all_slim_docs_perm_sync: Stop signal detected"
                                )
                            callback.progress("retrieve_all_slim_docs_perm_sync", 1)
                        yield slim_doc_buffer
                        slim_doc_buffer = []

                # Flush any remaining slim documents collected for this channel
                if slim_doc_buffer:
                    yield slim_doc_buffer
                    slim_doc_buffer = []


def _escape_odata_string(name: str) -> str:
    """Escape special characters for OData string literals.

    Uses proper OData v4 string literal escaping:
    - Single quotes: ' becomes ''
    - Other characters are handled by using contains() instead of eq for problematic cases
    """
    # Escape single quotes for OData syntax (replace ' with '')
    escaped = name.replace("'", "''")
    return escaped


def _has_odata_incompatible_chars(team_names: list[str] | None) -> bool:
    """Check if any team name contains characters that break Microsoft Graph OData filters.

    The Microsoft Graph Teams API has limited OData support. Characters like
    &, (, and ) cause parsing errors and require client-side filtering instead.
    """
    if not team_names:
        return False
    return any(char in name for name in team_names for char in ["&", "(", ")"])


def _can_use_odata_filter(
    team_names: list[str] | None,
) -> tuple[bool, list[str], list[str]]:
    """Determine which teams can use OData filtering vs client-side filtering.

    Microsoft Graph /teams endpoint OData limitations:
    - Only supports basic 'eq' operators in filters
    - No 'contains', 'startswith', or other advanced operators
    - Special characters (&, (, )) break OData parsing

    Returns:
        tuple: (can_use_odata, safe_names, problematic_names)
    """
    if not team_names:
        return False, [], []

    safe_names = []
    problematic_names = []

    for name in team_names:
        if any(char in name for char in ["&", "(", ")"]):
            problematic_names.append(name)
        else:
            safe_names.append(name)

    return bool(safe_names), safe_names, problematic_names


def _build_simple_odata_filter(safe_names: list[str]) -> str | None:
    """Build simple OData filter using only 'eq' operators for safe names."""
    if not safe_names:
        return None

    filter_parts = []
    for name in safe_names:
        escaped_name = _escape_odata_string(name)
        filter_parts.append(f"displayName eq '{escaped_name}'")

    return " or ".join(filter_parts)


def _construct_semantic_identifier(channel: Channel, top_message: Message) -> str:
    top_message_user_name: str

    if top_message.from_ and top_message.from_.user:
        user_display_name = top_message.from_.user.display_name
        top_message_user_name = (
            user_display_name if user_display_name else "Unknown User"
        )
    else:
        logger.warning(f"Message {top_message=} has no `from.user` field")
        top_message_user_name = "Unknown User"

    top_message_content = top_message.body.content or ""
    top_message_subject = top_message.subject or "Unknown Subject"
    channel_name = channel.properties.get("displayName", "Unknown")

    try:
        snippet = parse_html_page_basic(top_message_content.rstrip())
        snippet = snippet[:50] + "..." if len(snippet) > 50 else snippet

    except Exception:
        logger.exception(
            f"Error parsing snippet for message {top_message.id} with url {top_message.web_url}"
        )
        snippet = ""

    semantic_identifier = (
        f"{top_message_user_name} in {channel_name} about {top_message_subject}"
    )
    if snippet:
        semantic_identifier += f": {snippet}"

    return semantic_identifier


def _convert_thread_to_document(
    graph_client: GraphClient,
    channel: Channel,
    thread: list[Message],
) -> Document | None:
    if len(thread) == 0:
        return None

    most_recent_message_datetime: datetime | None = None
    top_message = thread[0]
    thread_text = ""

    sorted_thread = sorted(thread, key=lambda m: m.created_date_time, reverse=True)

    if sorted_thread:
        most_recent_message_datetime = sorted_thread[0].created_date_time

    for message in thread:
        # Add text and a newline
        if message.body.content:
            thread_text += parse_html_page_basic(message.body.content)

        # If it has a subject, that means its the top level post message, so grab its id, url, and subject
        if message.subject:
            top_message = message

    if not thread_text:
        return None

    semantic_string = _construct_semantic_identifier(channel, top_message)
    expert_infos = fetch_expert_infos(graph_client=graph_client, channel=channel)
    external_access = fetch_external_access(
        graph_client=graph_client, channel=channel, expert_infos=expert_infos
    )

    return Document(
        id=top_message.id,
        sections=[TextSection(link=top_message.web_url, text=thread_text)],
        source=DocumentSource.TEAMS,
        semantic_identifier=semantic_string,
        title="",  # teams threads don't really have a "title"
        doc_updated_at=most_recent_message_datetime,
        primary_owners=expert_infos,
        metadata={},
        external_access=external_access,
    )


def _update_request_url(request: RequestOptions, next_url: str) -> None:
    request.url = next_url


def _add_prefer_header(request: RequestOptions) -> None:
    """Add Prefer header to work around Microsoft Graph API ampersand bug.
    See: https://developer.microsoft.com/en-us/graph/known-issues/?search=18185
    """
    if not hasattr(request, "headers") or request.headers is None:
        request.headers = {}
    # Add header to handle properly encoded ampersands in filters
    request.headers["Prefer"] = "legacySearch=false"


def _collect_all_teams(
    graph_client: GraphClient,
    requested: list[str] | None = None,
) -> list[Team]:
    """Collect teams from Microsoft Graph using appropriate filtering strategy.

    For teams with special characters (&, (, )), uses client-side filtering
    with paginated search. For teams without special characters, uses efficient
    OData server-side filtering.

    Args:
        graph_client: Authenticated Microsoft Graph client
        requested: List of team names to find, or None for all teams

    Returns:
        List of Team objects matching the requested names
    """
    teams: list[Team] = []
    next_url: str | None = None

    # Determine filtering strategy based on Microsoft Graph limitations
    if not requested:
        # No specific teams requested - return empty list (avoid fetching all teams)
        logger.info("No specific teams requested - returning empty list")
        return []

    _, safe_names, problematic_names = _can_use_odata_filter(requested)

    if problematic_names and not safe_names:
        # ALL requested teams have special characters - cannot use OData filtering
        logger.info(
            f"All requested team names contain special characters (&, (, )) which require "
            f"client-side filtering. Using basic /teams endpoint with pagination. "
            f"Teams: {problematic_names}"
        )
        # Use unfiltered query with pagination limit to avoid fetching too many teams
        use_client_side_filtering = True
        odata_filter = None
    elif problematic_names and safe_names:
        # Mixed scenario - need to fetch more teams to find the problematic ones
        logger.info(
            f"Mixed team types: will use client-side filtering for all. "
            f"Safe names: {safe_names}, Special char names: {problematic_names}"
        )
        use_client_side_filtering = True
        odata_filter = None
    elif safe_names:
        # All names are safe - use OData filtering
        logger.info(f"Using OData filtering for all requested teams: {safe_names}")
        use_client_side_filtering = False
        odata_filter = _build_simple_odata_filter(safe_names)
    else:
        # No valid names
        return []

    # Track pagination to avoid fetching too many teams for client-side filtering
    max_pages = 200
    page_count = 0

    while True:
        try:
            if use_client_side_filtering:
                # Use basic /teams endpoint with top parameter to limit results per page
                query = graph_client.teams.get().top(50)  # Limit to 50 teams per page
            else:
                # Use OData filter with only 'eq' operators
                query = graph_client.teams.get().filter(odata_filter)

            # Add header to work around Microsoft Graph API issues
            query.before_execute(lambda req: _add_prefer_header(request=req))

            if next_url:
                url = next_url
                query.before_execute(
                    lambda req: _update_request_url(request=req, next_url=url)
                )

            team_collection = query.execute_query()
        except (ClientRequestException, ValueError) as e:
            # If OData filter fails, fall back to client-side filtering
            if not use_client_side_filtering and odata_filter:
                logger.warning(
                    f"OData filter failed: {e}. Falling back to client-side filtering."
                )
                use_client_side_filtering = True
                odata_filter = None
                teams = []
                next_url = None
                page_count = 0
                continue
            # If client-side approach also fails, re-raise
            logger.error(f"Teams query failed: {e}")
            raise

        filtered_teams = (
            team
            for team in team_collection
            if _filter_team(team=team, requested=requested)
        )
        teams.extend(filtered_teams)

        # For client-side filtering, check if we found all requested teams or hit page limit
        if use_client_side_filtering:
            page_count += 1
            found_team_names = {
                team.display_name for team in teams if team.display_name
            }
            requested_set = set(requested)

            # Log progress every 10 pages to avoid excessive logging
            if page_count % 10 == 0:
                logger.info(
                    f"Searched {page_count} pages, found {len(found_team_names)} matching teams so far"
                )

            # Stop if we found all requested teams or hit the page limit
            if requested_set.issubset(found_team_names):
                logger.info(f"Found all requested teams after {page_count} pages")
                break
            elif page_count >= max_pages:
                logger.warning(
                    f"Reached maximum page limit ({max_pages}) while searching for teams. "
                    f"Found: {found_team_names & requested_set}, "
                    f"Missing: {requested_set - found_team_names}"
                )
                break

        if not team_collection.has_next:
            break

        if not isinstance(team_collection._next_request_url, str):
            raise ValueError(
                f"The next request url field should be a string, instead got {type(team_collection._next_request_url)}"
            )

        next_url = team_collection._next_request_url

    return teams


def _normalize_team_name(name: str) -> str:
    """Normalize team name for flexible matching."""
    if not name:
        return ""
    # Convert to lowercase and strip whitespace for case-insensitive matching
    return name.lower().strip()


def _matches_requested_team(
    team_display_name: str, requested: list[str] | None
) -> bool:
    """Check if team display name matches any of the requested team names.

    Uses flexible matching to handle slight variations in team names.
    """
    if not requested or not team_display_name:
        return (
            not requested
        )  # If no teams requested, match all; if no name, don't match

    normalized_team_name = _normalize_team_name(team_display_name)

    for requested_name in requested:
        normalized_requested = _normalize_team_name(requested_name)

        # Exact match after normalization
        if normalized_team_name == normalized_requested:
            return True

        # Flexible matching - check if team name contains all significant words
        # This helps with slight variations in formatting
        team_words = set(normalized_team_name.split())
        requested_words = set(normalized_requested.split())

        # If the requested name has special characters, split on those too
        for char in ["&", "(", ")"]:
            if char in normalized_requested:
                # Split on special characters and add words
                parts = normalized_requested.replace(char, " ").split()
                requested_words.update(parts)

        # Remove very short words that aren't meaningful
        meaningful_requested_words = {
            word for word in requested_words if len(word) >= 3
        }

        # Check if team name contains most of the meaningful words
        if (
            meaningful_requested_words
            and len(meaningful_requested_words & team_words)
            >= len(meaningful_requested_words) * 0.7
        ):
            return True

    return False


def _filter_team(
    team: Team,
    requested: list[str] | None = None,
) -> bool:
    """
    Returns the true if:
        - Team is not expired / deleted
        - Team has a display-name and ID
        - Team display-name matches any of the requested teams (with flexible matching)

    Otherwise, returns false.
    """

    if not team.id or not team.display_name:
        return False

    if not _matches_requested_team(team.display_name, requested):
        return False

    props = team.properties

    expiration = props.get("expirationDateTime")
    deleted = props.get("deletedDateTime")

    # We just check for the existence of those two fields, not their actual dates.
    # This is because if these fields do exist, they have to have occurred in the past, thus making them already
    # expired / deleted.
    return not expiration and not deleted


def _get_team_by_id(
    graph_client: GraphClient,
    team_id: str,
) -> Team:
    team_collection = (
        graph_client.teams.get().filter(f"id eq '{team_id}'").top(1).execute_query()
    )

    if not team_collection:
        raise ValueError(f"No team with {team_id=} was found")
    elif team_collection.has_next:
        # shouldn't happen, but catching it regardless
        raise RuntimeError(f"Multiple teams with {team_id=} were found")

    return team_collection[0]


def _collect_all_channels_from_team(
    team: Team,
) -> list[Channel]:
    if not team.id:
        raise RuntimeError(f"The {team=} has an empty `id` field")

    channels: list[Channel] = []
    next_url = None

    while True:
        query = team.channels.get_all(
            # explicitly needed because of incorrect type definitions provided by the `office365` library
            page_loaded=lambda _: None
        )
        if next_url:
            url = next_url
            query = query.before_execute(
                lambda req: _update_request_url(request=req, next_url=url)
            )

        channel_collection = query.execute_query()
        channels.extend(channel for channel in channel_collection if channel.id)

        if not channel_collection.has_next:
            break

    return channels


def _collect_documents_for_channel(
    graph_client: GraphClient,
    team: Team,
    channel: Channel,
    start: SecondsSinceUnixEpoch,
) -> Iterator[Document | None | ConnectorFailure]:
    """
    This function yields an iterator of `Document`s, where each `Document` corresponds to a "thread".

    A "thread" is the conjunction of the "root" message and all of its replies.
    """

    for message in fetch_messages(
        graph_client=graph_client,
        team_id=team.id,
        channel_id=channel.id,
        start=start,
    ):
        try:
            replies = list(
                fetch_replies(
                    graph_client=graph_client,
                    team_id=team.id,
                    channel_id=channel.id,
                    root_message_id=message.id,
                )
            )

            thread = [message]
            thread.extend(replies[::-1])

            # Note:
            # We convert an entire *thread* (including the root message and its replies) into one, singular `Document`.
            # I.e., we don't convert each individual message and each individual reply into their own individual `Document`s.
            if doc := _convert_thread_to_document(
                graph_client=graph_client,
                channel=channel,
                thread=thread,
            ):
                yield doc

        except Exception as e:
            yield ConnectorFailure(
                failed_entity=EntityFailure(
                    entity_id=message.id,
                ),
                failure_message=f"Retrieval of message and its replies failed; {channel.id=} {message.id}",
                exception=e,
            )


if __name__ == "__main__":
    from tests.daily.connectors.utils import load_all_from_connector

    app_id = os.environ["TEAMS_APPLICATION_ID"]
    dir_id = os.environ["TEAMS_DIRECTORY_ID"]
    secret = os.environ["TEAMS_SECRET"]

    teams_env_var = os.environ.get("TEAMS", None)
    teams = teams_env_var.split(",") if teams_env_var else []

    teams_connector = TeamsConnector(teams=teams)
    teams_connector.load_credentials(
        {
            "teams_client_id": app_id,
            "teams_directory_id": dir_id,
            "teams_client_secret": secret,
        }
    )
    teams_connector.validate_connector_settings()

    for slim_doc in teams_connector.retrieve_all_slim_docs_perm_sync():
        ...

    for doc in load_all_from_connector(
        connector=teams_connector,
        start=0.0,
        end=datetime.now(tz=timezone.utc).timestamp(),
    ).documents:
        print(doc)


================================================
FILE: backend/onyx/connectors/teams/models.py
================================================
from datetime import datetime

from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic.alias_generators import to_camel


class Body(BaseModel):
    content_type: str
    content: str | None

    model_config = ConfigDict(
        alias_generator=to_camel,
        populate_by_name=True,
    )


class User(BaseModel):
    id: str
    display_name: str

    model_config = ConfigDict(
        alias_generator=to_camel,
        populate_by_name=True,
    )


class From(BaseModel):
    user: User | None

    model_config = ConfigDict(
        alias_generator=to_camel,
        populate_by_name=True,
    )


class Message(BaseModel):
    id: str
    replyToId: str | None
    subject: str | None
    from_: From | None = Field(alias="from")
    body: Body
    created_date_time: datetime
    last_modified_date_time: datetime | None
    last_edited_date_time: datetime | None
    deleted_date_time: datetime | None
    web_url: str

    model_config = ConfigDict(
        alias_generator=to_camel,
        populate_by_name=True,
    )


================================================
FILE: backend/onyx/connectors/teams/utils.py
================================================
import time
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from http import HTTPStatus

from office365.graph_client import GraphClient  # type: ignore[import-untyped]
from office365.teams.channels.channel import Channel  # type: ignore[import-untyped]
from office365.teams.channels.channel import ConversationMember

from onyx.access.models import ExternalAccess
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.teams.models import Message
from onyx.utils.logger import setup_logger

logger = setup_logger()


_PUBLIC_MEMBERSHIP_TYPE = "standard"  # public teams channel


def _sanitize_message_user_display_name(value: dict) -> dict:
    try:
        from_obj = value.get("from")
        if isinstance(from_obj, dict):
            user_obj = from_obj.get("user")
            if isinstance(user_obj, dict) and user_obj.get("displayName") is None:
                value = dict(value)
                from_obj = dict(from_obj)
                user_obj = dict(user_obj)
                user_obj["displayName"] = "Unknown User"
                from_obj["user"] = user_obj
                value["from"] = from_obj
    except (AttributeError, TypeError, KeyError):
        pass
    return value


def _retry(
    graph_client: GraphClient,
    request_url: str,
) -> dict:
    MAX_RETRIES = 10
    retry_number = 0

    while retry_number < MAX_RETRIES:
        response = graph_client.execute_request_direct(request_url)
        if response.ok:
            json = response.json()
            if not isinstance(json, dict):
                raise RuntimeError(f"Expected a JSON object, instead got {json=}")

            return json

        if response.status_code == int(HTTPStatus.TOO_MANY_REQUESTS):
            retry_number += 1

            cooldown = int(response.headers.get("Retry-After", 10))
            time.sleep(cooldown)

            continue

        response.raise_for_status()

    raise RuntimeError(
        f"Max number of retries for hitting {request_url=} exceeded; unable to fetch data"
    )


def _get_next_url(
    graph_client: GraphClient,
    json_response: dict,
) -> str | None:
    next_url = json_response.get("@odata.nextLink")

    if not next_url:
        return None

    if not isinstance(next_url, str):
        raise RuntimeError(
            f"Expected a string for the `@odata.nextUrl`, instead got {next_url=}"
        )

    return next_url.removeprefix(graph_client.service_root_url()).removeprefix("/")


def _get_or_fetch_email(
    graph_client: GraphClient,
    member: ConversationMember,
) -> str | None:
    if email := member.properties.get("email"):
        return email

    user_id = member.properties.get("userId")
    if not user_id:
        logger.warn(f"No user-id found for this member; {member=}")
        return None

    json_data = _retry(graph_client=graph_client, request_url=f"users/{user_id}")
    email = json_data.get("userPrincipalName")

    if not isinstance(email, str):
        logger.warn(f"Expected email to be of type str, instead got {email=}")
        return None

    return email


def _is_channel_public(channel: Channel) -> bool:
    return (
        channel.membership_type and channel.membership_type == _PUBLIC_MEMBERSHIP_TYPE
    )


def fetch_messages(
    graph_client: GraphClient,
    team_id: str,
    channel_id: str,
    start: SecondsSinceUnixEpoch,
) -> Generator[Message]:
    startfmt = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
        "%Y-%m-%dT%H:%M:%SZ"
    )

    initial_request_url = f"teams/{team_id}/channels/{channel_id}/messages/delta?$filter=lastModifiedDateTime gt {startfmt}"

    request_url: str | None = initial_request_url

    while request_url:
        json_response = _retry(graph_client=graph_client, request_url=request_url)

        for value in json_response.get("value", []):
            yield Message(**_sanitize_message_user_display_name(value))

        request_url = _get_next_url(
            graph_client=graph_client, json_response=json_response
        )


def fetch_replies(
    graph_client: GraphClient,
    team_id: str,
    channel_id: str,
    root_message_id: str,
) -> Generator[Message]:
    initial_request_url = (
        f"teams/{team_id}/channels/{channel_id}/messages/{root_message_id}/replies"
    )

    request_url: str | None = initial_request_url

    while request_url:
        json_response = _retry(graph_client=graph_client, request_url=request_url)

        for value in json_response.get("value", []):
            yield Message(**_sanitize_message_user_display_name(value))

        request_url = _get_next_url(
            graph_client=graph_client, json_response=json_response
        )


def fetch_expert_infos(
    graph_client: GraphClient, channel: Channel
) -> list[BasicExpertInfo]:
    members = channel.members.get_all(
        # explicitly needed because of incorrect type definitions provided by the `office365` library
        page_loaded=lambda _: None
    ).execute_query_retry()

    expert_infos = []
    for member in members:
        if not member.display_name:
            logger.warn(f"Failed to grab the display-name of {member=}; skipping")
            continue

        email = _get_or_fetch_email(graph_client=graph_client, member=member)
        if not email:
            logger.warn(f"Failed to grab the email of {member=}; skipping")
            continue

        expert_infos.append(
            BasicExpertInfo(
                display_name=member.display_name,
                email=email,
            )
        )

    return expert_infos


def fetch_external_access(
    graph_client: GraphClient,
    channel: Channel,
    expert_infos: list[BasicExpertInfo] | None = None,
) -> ExternalAccess:
    is_public = _is_channel_public(channel=channel)

    if is_public:
        return ExternalAccess.public()

    expert_infos = (
        expert_infos
        if expert_infos is not None
        else fetch_expert_infos(graph_client=graph_client, channel=channel)
    )
    emails = {expert_info.email for expert_info in expert_infos if expert_info.email}

    return ExternalAccess(
        external_user_emails=emails,
        external_user_group_ids=set(),
        is_public=is_public,
    )


================================================
FILE: backend/onyx/connectors/testrail/__init__.py
================================================
# Package marker for TestRail connector


================================================
FILE: backend/onyx/connectors/testrail/connector.py
================================================
from __future__ import annotations

from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import ClassVar
from typing import Optional

import requests
from bs4 import BeautifulSoup

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import format_document_soup
from onyx.utils.logger import setup_logger
from onyx.utils.text_processing import remove_markdown_image_references


logger = setup_logger()


class TestRailConnector(LoadConnector, PollConnector):
    """Connector for TestRail.

    Minimal implementation that indexes Test Cases per project.
    """

    document_source_type: ClassVar[DocumentSource] = DocumentSource.TESTRAIL

    # Fields that need ID-to-label value mapping
    FIELDS_NEEDING_VALUE_MAPPING: ClassVar[set[str]] = {
        "priority_id",
        "custom_automation_type",
        "custom_scenario_db_automation",
        "custom_case_golden_canvas_automation",
        "custom_customers",
        "custom_case_environments",
        "custom_case_overall_automation",
        "custom_case_team_ownership",
        "custom_case_unit_or_integration_automation",
        "custom_effort",
    }

    def __init__(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
        project_ids: str | list[int] | None = None,
        cases_page_size: int | None = None,
        max_pages: int | None = None,
        skip_doc_absolute_chars: int | None = None,
    ) -> None:
        self.base_url: str | None = None
        self.username: str | None = None
        self.api_key: str | None = None
        self.batch_size = batch_size
        parsed_project_ids: list[int] | None

        # Parse project_ids from string if needed
        # None = all projects (no filtering), [] = no projects, [1,2,3] = specific projects
        if isinstance(project_ids, str):
            if project_ids.strip():
                parsed_project_ids = [
                    int(x.strip()) for x in project_ids.split(",") if x.strip()
                ]
            else:
                # Empty string from UI means "all projects"
                parsed_project_ids = None
        elif project_ids is None:
            parsed_project_ids = None
        else:
            parsed_project_ids = [int(pid) for pid in project_ids]

        self.project_ids: list[int] | None = parsed_project_ids

        # Handle empty strings from UI and convert to int with defaults
        self.cases_page_size = (
            int(cases_page_size)
            if cases_page_size and str(cases_page_size).strip()
            else 250
        )
        self.max_pages = (
            int(max_pages) if max_pages and str(max_pages).strip() else 10000
        )
        self.skip_doc_absolute_chars = (
            int(skip_doc_absolute_chars)
            if skip_doc_absolute_chars and str(skip_doc_absolute_chars).strip()
            else 200000
        )

        # Cache for field labels and value mappings - will be populated on first use
        self._field_labels: dict[str, str] | None = None
        self._value_maps: dict[str, dict[str, str]] | None = None

    # --- Rich text sanitization helpers ---
    # Note: TestRail stores some fields as HTML (e.g. shared test steps).
    # This function handles both HTML and plain text.
    @staticmethod
    def _sanitize_rich_text(value: Any) -> str:
        if value is None:
            return ""
        text = str(value)

        # Parse HTML and remove image tags
        soup = BeautifulSoup(text, "html.parser")

        # Remove all img tags and their containers
        for img_tag in soup.find_all("img"):
            img_tag.decompose()
        for span in soup.find_all("span", class_="markdown-img-container"):
            span.decompose()

        # Use format_document_soup for better HTML-to-text conversion
        # This preserves document structure (paragraphs, lists, line breaks, etc.)
        text = format_document_soup(soup)

        # Also remove markdown-style image references (in case any remain)
        text = remove_markdown_image_references(text)

        return text.strip()

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        # Expected keys from UI credential JSON
        self.base_url = str(credentials["testrail_base_url"]).rstrip("/")
        self.username = str(credentials["testrail_username"])  # email or username
        self.api_key = str(credentials["testrail_api_key"])  # API key (password)
        return None

    def validate_connector_settings(self) -> None:
        """Lightweight validation to surface common misconfigurations early."""
        projects = self._list_projects()
        if not projects:
            logger.warning("TestRail: no projects visible to this credential.")

    # ---- API helpers ----
    def _api_get(self, endpoint: str, params: Optional[dict[str, Any]] = None) -> Any:
        if not self.base_url or not self.username or not self.api_key:
            raise ConnectorMissingCredentialError("testrail")

        # TestRail API base is typically /index.php?/api/v2/<endpoint>
        url = f"{self.base_url}/index.php?/api/v2/{endpoint}"
        try:
            response = requests.get(
                url,
                auth=(self.username, self.api_key),
                params=params,
            )
            response.raise_for_status()
        except requests.exceptions.HTTPError as e:
            status = e.response.status_code if getattr(e, "response", None) else None
            if status == 401:
                raise CredentialExpiredError(
                    "Invalid or expired TestRail credentials (HTTP 401)."
                ) from e
            if status == 403:
                raise InsufficientPermissionsError(
                    "Insufficient permissions to access TestRail resources (HTTP 403)."
                ) from e
            raise UnexpectedValidationError(
                f"Unexpected TestRail HTTP error (status={status})."
            ) from e
        except requests.exceptions.RequestException as e:
            raise UnexpectedValidationError(f"TestRail request failed: {e}") from e

        try:
            return response.json()
        except ValueError as e:
            raise UnexpectedValidationError(
                "Invalid JSON returned by TestRail API"
            ) from e

    def _list_projects(self) -> list[dict[str, Any]]:
        projects = self._api_get("get_projects")
        if isinstance(projects, dict):
            projects_list = projects.get("projects")
            return projects_list if isinstance(projects_list, list) else []
        return []

    def _list_suites(self, project_id: int) -> list[dict[str, Any]]:
        """Return suites for a project. If the project is in single-suite mode,
        some TestRail instances may return an empty list; callers should
        gracefully fallback to calling get_cases without suite_id.
        """
        suites = self._api_get(f"get_suites/{project_id}")
        if isinstance(suites, dict):
            suites_list = suites.get("suites")
            return suites_list if isinstance(suites_list, list) else []
        return []

    def _get_case_fields(self) -> list[dict[str, Any]]:
        """Get case field definitions from TestRail API."""
        try:
            fields = self._api_get("get_case_fields")
            return fields if isinstance(fields, list) else []
        except Exception as e:
            logger.warning(f"Failed to fetch case fields from TestRail: {e}")
            return []

    def _parse_items_string(self, items_str: str) -> dict[str, str]:
        """Parse items string from field config into ID -> label mapping.

        Format: "1, Option A\\n2, Option B\\n3, Option C"
        Returns: {"1": "Option A", "2": "Option B", "3": "Option C"}
        """
        id_to_label: dict[str, str] = {}
        if not items_str:
            return id_to_label

        for line in items_str.split("\n"):
            line = line.strip()
            if not line:
                continue
            parts = line.split(",", 1)
            if len(parts) == 2:
                item_id = parts[0].strip()
                item_label = parts[1].strip()
                id_to_label[item_id] = item_label

        return id_to_label

    def _build_field_maps(self) -> tuple[dict[str, str], dict[str, dict[str, str]]]:
        """Build both field labels and value mappings in one pass.

        Returns:
            (field_labels, value_maps) where:
            - field_labels: system_name -> label
            - value_maps: system_name -> {id -> label}
        """
        field_labels = {}
        value_maps = {}

        try:
            fields = self._get_case_fields()
            for field in fields:
                system_name = field.get("system_name")

                # Build field label map
                label = field.get("label")
                if system_name and label:
                    field_labels[system_name] = label

                # Build value map if needed
                if system_name in self.FIELDS_NEEDING_VALUE_MAPPING:
                    configs = field.get("configs", [])
                    if configs and len(configs) > 0:
                        options = configs[0].get("options", {})
                        items_str = options.get("items")
                        if items_str:
                            value_maps[system_name] = self._parse_items_string(
                                items_str
                            )

        except Exception as e:
            logger.warning(f"Failed to build field maps from TestRail: {e}")

        return field_labels, value_maps

    def _get_field_labels(self) -> dict[str, str]:
        """Get field labels, fetching from API if not cached."""
        if self._field_labels is None:
            self._field_labels, self._value_maps = self._build_field_maps()
        return self._field_labels

    def _get_value_maps(self) -> dict[str, dict[str, str]]:
        """Get value maps, fetching from API if not cached."""
        if self._value_maps is None:
            self._field_labels, self._value_maps = self._build_field_maps()
        return self._value_maps

    def _map_field_value(self, field_name: str, field_value: Any) -> str:
        """Map a field value using the value map if available.

        Examples:
        - priority_id: 2 -> "Medium"
        - custom_case_team_ownership: [10] -> "Sim Platform"
        - custom_case_environments: [1, 2] -> "Local, Cloud"
        """
        if field_value is None or field_value == "":
            return ""

        # Get value map for this field
        value_maps = self._get_value_maps()
        value_map = value_maps.get(field_name, {})

        # Handle list values
        if isinstance(field_value, list):
            if not field_value:
                return ""
            mapped = [value_map.get(str(v), str(v)) for v in field_value]
            return ", ".join(mapped)

        # Handle single values
        val_str = str(field_value)
        return value_map.get(val_str, val_str)

    def _get_cases(
        self, project_id: int, suite_id: Optional[int], limit: int, offset: int
    ) -> list[dict[str, Any]]:
        """Get cases for a project from the API."""
        params: dict[str, Any] = {"limit": limit, "offset": offset}
        if suite_id is not None:
            params["suite_id"] = suite_id
        cases_response = self._api_get(f"get_cases/{project_id}", params=params)
        cases_list: list[dict[str, Any]] = []
        if isinstance(cases_response, dict):
            cases_items = cases_response.get("cases")
            if isinstance(cases_items, list):
                cases_list = cases_items
        return cases_list

    def _iter_cases(
        self,
        project_id: int,
        suite_id: Optional[int] = None,
        start: Optional[SecondsSinceUnixEpoch] = None,
        end: Optional[SecondsSinceUnixEpoch] = None,
    ) -> Iterator[dict[str, Any]]:
        # Pagination: TestRail supports 'limit' and 'offset' for many list endpoints
        limit = self.cases_page_size
        # Use a bounded page loop to avoid infinite loops on API anomalies
        for page_index in range(self.max_pages):
            offset = page_index * limit
            cases = self._get_cases(project_id, suite_id, limit, offset)

            if not cases:
                break

            # Filter by updated window if provided
            for case in cases:
                # 'updated_on' is unix timestamp (seconds)
                updated_on = case.get("updated_on") or case.get("created_on")
                if start is not None and updated_on is not None and updated_on < start:
                    continue
                if end is not None and updated_on is not None and updated_on > end:
                    continue
                yield case

            if len(cases) < limit:
                break

    def _build_case_link(self, project_id: int, case_id: int) -> str:  # noqa: ARG002
        # Standard UI link to a case
        return f"{self.base_url}/index.php?/cases/view/{case_id}"

    def _doc_from_case(
        self,
        project: dict[str, Any],
        case: dict[str, Any],
        suite: dict[str, Any] | None = None,  # noqa: ARG002
    ) -> Document | None:
        project_id = project.get("id")
        if not isinstance(project_id, int):
            logger.warning(
                "Skipping TestRail case because project id is missing or invalid: %s",
                project_id,
            )
            return None

        case_id = case.get("id")
        if not isinstance(case_id, int):
            logger.warning(
                "Skipping TestRail case because case id is missing or invalid: %s",
                case_id,
            )
            return None

        title = case.get("title", f"Case {case_id}")
        case_key = f"C{case_id}"

        # Convert epoch seconds to aware datetime if available
        updated = case.get("updated_on") or case.get("created_on")
        updated_dt = (
            datetime.fromtimestamp(updated, tz=timezone.utc)
            if isinstance(updated, (int, float))
            else None
        )

        text_lines: list[str] = []
        if case.get("title"):
            text_lines.append(f"Title: {case['title']}")
        if case_key:
            text_lines.append(f"Case ID: {case_key}")
        if case_id is not None:
            text_lines.append(f"ID: {case_id}")
        doc_link = case.get("custom_documentation_link")
        if doc_link:
            text_lines.append(f"Documentation: {doc_link}")

        # Add fields that need value mapping
        field_labels = self._get_field_labels()
        for field_name in self.FIELDS_NEEDING_VALUE_MAPPING:
            field_value = case.get(field_name)
            if field_value is not None and field_value != "" and field_value != []:
                mapped_value = self._map_field_value(field_name, field_value)
                if mapped_value:
                    # Get label from TestRail field definition
                    label = field_labels.get(
                        field_name, field_name.replace("_", " ").title()
                    )
                    text_lines.append(f"{label}: {mapped_value}")

        pre = self._sanitize_rich_text(case.get("custom_preconds"))
        if pre:
            text_lines.append(f"Preconditions: {pre}")

        # Steps: use separated steps format if available
        steps_added = False
        steps_separated = case.get("custom_steps_separated")
        if isinstance(steps_separated, list) and steps_separated:
            rendered_steps: list[str] = []
            for idx, step_item in enumerate(steps_separated, start=1):
                step_content = self._sanitize_rich_text(step_item.get("content"))
                step_expected = self._sanitize_rich_text(step_item.get("expected"))
                parts: list[str] = []
                if step_content:
                    parts.append(f"Step {idx}: {step_content}")
                else:
                    parts.append(f"Step {idx}:")
                if step_expected:
                    parts.append(f"Expected: {step_expected}")
                rendered_steps.append("\n".join(parts))
            if rendered_steps:
                text_lines.append("Steps:\n" + "\n".join(rendered_steps))
                steps_added = True

        # Fallback to custom_steps and custom_expected if no separated steps
        if not steps_added:
            custom_steps = self._sanitize_rich_text(case.get("custom_steps"))
            custom_expected = self._sanitize_rich_text(case.get("custom_expected"))
            if custom_steps:
                text_lines.append(f"Steps: {custom_steps}")
            if custom_expected:
                text_lines.append(f"Expected: {custom_expected}")

        link = self._build_case_link(project_id, case_id)

        # Build full text and apply size policies
        full_text = "\n".join(text_lines)
        if len(full_text) > self.skip_doc_absolute_chars:
            logger.warning(
                f"Skipping TestRail case {case_id} due to excessive size: {len(full_text)} chars"
            )
            return None

        # Metadata for document identification
        metadata: dict[str, Any] = {}
        if case_key:
            metadata["case_key"] = case_key

        # Include the human-friendly case key in identifiers for easier search
        display_title = f"{case_key}: {title}" if case_key else title

        return Document(
            id=f"TESTRAIL_CASE_{case_id}",
            source=DocumentSource.TESTRAIL,
            semantic_identifier=display_title,
            title=display_title,
            sections=[TextSection(link=link, text=full_text)],
            metadata=metadata,
            doc_updated_at=updated_dt,
        )

    def _generate_documents(
        self,
        start: Optional[SecondsSinceUnixEpoch],
        end: Optional[SecondsSinceUnixEpoch],
    ) -> GenerateDocumentsOutput:
        if not self.base_url or not self.username or not self.api_key:
            raise ConnectorMissingCredentialError("testrail")

        doc_batch: list[Document | HierarchyNode] = []

        projects = self._list_projects()
        project_filter: list[int] | None = self.project_ids

        for project in projects:
            project_id_raw = project.get("id")
            if not isinstance(project_id_raw, int):
                logger.warning(
                    "Skipping TestRail project with invalid id: %s", project_id_raw
                )
                continue
            project_id = project_id_raw
            # None = index all, [] = index none, [1,2,3] = index only those
            if project_filter is not None and project_id not in project_filter:
                continue

            suites = self._list_suites(project_id)
            if suites:
                for s in suites:
                    suite_id = s.get("id")
                    for case in self._iter_cases(project_id, suite_id, start, end):
                        doc = self._doc_from_case(project, case, s)
                        if doc is None:
                            continue
                        doc_batch.append(doc)
                        if len(doc_batch) >= self.batch_size:
                            yield doc_batch
                            doc_batch = []
            else:
                # single-suite mode fallback
                for case in self._iter_cases(project_id, None, start, end):
                    doc = self._doc_from_case(project, case, None)
                    if doc is None:
                        continue
                    doc_batch.append(doc)
                    if len(doc_batch) >= self.batch_size:
                        yield doc_batch
                        doc_batch = []

        if doc_batch:
            yield doc_batch

    # ---- Onyx interfaces ----
    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._generate_documents(start=None, end=None)

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        return self._generate_documents(start=start, end=end)


if __name__ == "__main__":
    from onyx.configs.app_configs import (
        TESTRAIL_API_KEY,
        TESTRAIL_BASE_URL,
        TESTRAIL_USERNAME,
    )

    connector = TestRailConnector()

    connector.load_credentials(
        {
            "testrail_base_url": TESTRAIL_BASE_URL,
            "testrail_username": TESTRAIL_USERNAME,
            "testrail_api_key": TESTRAIL_API_KEY,
        }
    )

    connector.validate_connector_settings()

    # Probe a tiny batch from load
    total = 0
    for batch in connector.load_from_state():
        print(f"Fetched batch: {len(batch)} docs")
        total += len(batch)
        if total >= 10:
            break
    print(f"Total fetched in test: {total}")


================================================
FILE: backend/onyx/connectors/web/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/web/connector.py
================================================
import ipaddress
import random
import socket
import time
from datetime import datetime
from datetime import timezone
from enum import Enum
from typing import Any
from typing import cast
from typing import Tuple
from urllib.parse import urljoin
from urllib.parse import urlparse

import requests
from bs4 import BeautifulSoup
from oauthlib.oauth2 import BackendApplicationClient
from playwright.sync_api import BrowserContext
from playwright.sync_api import Playwright
from playwright.sync_api import sync_playwright
from playwright.sync_api import TimeoutError
from requests_oauthlib import OAuth2Session  # type:ignore
from urllib3.exceptions import MaxRetryError

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID
from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
from onyx.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS
from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import web_html_cleanup
from onyx.utils.logger import setup_logger
from onyx.utils.sitemap import list_pages_for_site
from onyx.utils.web_content import extract_pdf_text
from onyx.utils.web_content import is_pdf_resource
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


class ScrapeSessionContext:
    """Session level context for scraping"""

    def __init__(self, base_url: str, to_visit: list[str]):
        self.base_url = base_url
        self.to_visit = to_visit
        self.visited_links: set[str] = set()
        self.content_hashes: set[int] = set()

        self.doc_batch: list[Document | HierarchyNode] = []

        self.at_least_one_doc: bool = False
        self.last_error: str | None = None
        self.needs_retry: bool = False

        self.playwright: Playwright | None = None
        self.playwright_context: BrowserContext | None = None

    def initialize(self) -> None:
        self.stop()
        self.playwright, self.playwright_context = start_playwright()

    def stop(self) -> None:
        if self.playwright_context:
            self.playwright_context.close()
            self.playwright_context = None

        if self.playwright:
            self.playwright.stop()
            self.playwright = None


class ScrapeResult:
    doc: Document | None = None
    retry: bool = False


WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20
# Threshold for determining when to replace vs append iframe content
IFRAME_TEXT_LENGTH_THRESHOLD = 700
# Message indicating JavaScript is disabled, which often appears when scraping fails
JAVASCRIPT_DISABLED_MESSAGE = "You have JavaScript disabled in your browser"
# Grace period after page navigation to allow bot-detection challenges
# and SPA content rendering to complete
PAGE_RENDER_TIMEOUT_MS = 5000

# Define common headers that mimic a real browser
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
DEFAULT_HEADERS = {
    "User-Agent": DEFAULT_USER_AGENT,
    "Accept": (
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,"
        "image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
    ),
    "Accept-Language": "en-US,en;q=0.9",
    # Brotli decoding has been flaky in brotlicffi/httpx for certain chunked responses;
    # stick to gzip/deflate to keep connectivity checks stable.
    "Accept-Encoding": "gzip, deflate",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "none",
    "Sec-Fetch-User": "?1",
    "Sec-CH-UA": '"Google Chrome";v="123", "Not:A-Brand";v="8"',
    "Sec-CH-UA-Mobile": "?0",
    "Sec-CH-UA-Platform": '"macOS"',
}


class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
    # Given a base site, index everything under that path
    RECURSIVE = "recursive"
    # Given a URL, index only the given page
    SINGLE = "single"
    # Given a sitemap.xml URL, parse all the pages in it
    SITEMAP = "sitemap"
    # Given a file upload where every line is a URL, parse all the URLs provided
    UPLOAD = "upload"


def protected_url_check(url: str) -> None:
    """Couple considerations:
    - DNS mapping changes over time so we don't want to cache the results
    - Fetching this is assumed to be relatively fast compared to other bottlenecks like reading
      the page or embedding the contents
    - To be extra safe, all IPs associated with the URL must be global
    - This is to prevent misuse and not explicit attacks
    """
    if not WEB_CONNECTOR_VALIDATE_URLS:
        return

    parse = urlparse(url)
    if parse.scheme != "http" and parse.scheme != "https":
        raise ValueError("URL must be of scheme https?://")

    if not parse.hostname:
        raise ValueError("URL must include a hostname")

    try:
        # This may give a large list of IP addresses for domains with extensive DNS configurations
        # such as large distributed systems of CDNs
        info = socket.getaddrinfo(parse.hostname, None)
    except socket.gaierror as e:
        raise ConnectionError(f"DNS resolution failed for {parse.hostname}: {e}")

    for address in info:
        ip = address[4][0]
        if not ipaddress.ip_address(ip).is_global:
            raise ValueError(
                f"Non-global IP address detected: {ip}, skipping page {url}. "
                f"The Web Connector is not allowed to read loopback, link-local, or private ranges"
            )


def check_internet_connection(url: str) -> None:
    try:
        # Use a more realistic browser-like request
        session = requests.Session()
        session.headers.update(DEFAULT_HEADERS)

        response = session.get(url, timeout=5, allow_redirects=True)

        response.raise_for_status()
    except requests.exceptions.HTTPError as e:
        # Extract status code from the response, defaulting to -1 if response is None
        status_code = e.response.status_code if e.response is not None else -1

        # For 403 errors, we do have internet connection, but the request is blocked by the server
        # this is usually due to bot detection. Future calls (via Playwright) will usually get
        # around this.
        if status_code == 403:
            logger.warning(
                f"Received 403 Forbidden for {url}, will retry with browser automation"
            )
            return

        error_msg = {
            400: "Bad Request",
            401: "Unauthorized",
            403: "Forbidden",
            404: "Not Found",
            500: "Internal Server Error",
            502: "Bad Gateway",
            503: "Service Unavailable",
            504: "Gateway Timeout",
        }.get(status_code, "HTTP Error")
        raise Exception(f"{error_msg} ({status_code}) for {url} - {e}")
    except requests.exceptions.SSLError as e:
        cause = (
            e.args[0].reason
            if isinstance(e.args, tuple) and isinstance(e.args[0], MaxRetryError)
            else e.args
        )
        raise Exception(f"SSL error {str(cause)}")
    except (requests.RequestException, ValueError) as e:
        raise Exception(f"Unable to reach {url} - check your internet connection: {e}")


def is_valid_url(url: str) -> bool:
    try:
        result = urlparse(url)
        return all([result.scheme, result.netloc])
    except ValueError:
        return False


def _same_site(base_url: str, candidate_url: str) -> bool:
    base, candidate = urlparse(base_url), urlparse(candidate_url)
    base_netloc = base.netloc.lower().removeprefix("www.")
    candidate_netloc = candidate.netloc.lower().removeprefix("www.")
    if base_netloc != candidate_netloc:
        return False

    base_path = (base.path or "/").rstrip("/")
    if base_path in ("", "/"):
        return True

    candidate_path = candidate.path or "/"
    if candidate_path == base_path:
        return True

    boundary = f"{base_path}/"
    return candidate_path.startswith(boundary)


def get_internal_links(
    base_url: str, url: str, soup: BeautifulSoup, should_ignore_pound: bool = True
) -> set[str]:
    internal_links = set()
    for link in cast(list[dict[str, Any]], soup.find_all("a")):
        href = cast(str | None, link.get("href"))
        if not href:
            continue

        # Account for malformed backslashes in URLs
        href = href.replace("\\", "/")

        # "#!" indicates the page is using a hashbang URL, which is a client-side routing technique
        if should_ignore_pound and "#" in href and "#!" not in href:
            href = href.split("#")[0]

        if not is_valid_url(href):
            # Relative path handling
            href = urljoin(url, href)

        if _same_site(base_url, href):
            internal_links.add(href)
    return internal_links


def start_playwright() -> Tuple[Playwright, BrowserContext]:
    playwright = sync_playwright().start()

    # Launch browser with more realistic settings
    browser = playwright.chromium.launch(
        headless=True,
        args=[
            "--disable-blink-features=AutomationControlled",
            "--disable-features=IsolateOrigins,site-per-process",
            "--disable-site-isolation-trials",
        ],
    )

    # Create a context with realistic browser properties
    context = browser.new_context(
        user_agent=DEFAULT_USER_AGENT,
        viewport={"width": 1440, "height": 900},
        device_scale_factor=2.0,
        locale="en-US",
        timezone_id="America/Los_Angeles",
        has_touch=False,
        java_script_enabled=True,
        color_scheme="light",
        # Add more realistic browser properties
        bypass_csp=True,
        ignore_https_errors=True,
    )

    # Set additional headers to mimic a real browser
    context.set_extra_http_headers(
        {
            "Accept": DEFAULT_HEADERS["Accept"],
            "Accept-Language": DEFAULT_HEADERS["Accept-Language"],
            "Sec-Fetch-Dest": DEFAULT_HEADERS["Sec-Fetch-Dest"],
            "Sec-Fetch-Mode": DEFAULT_HEADERS["Sec-Fetch-Mode"],
            "Sec-Fetch-Site": DEFAULT_HEADERS["Sec-Fetch-Site"],
            "Sec-Fetch-User": DEFAULT_HEADERS["Sec-Fetch-User"],
            "Sec-CH-UA": DEFAULT_HEADERS["Sec-CH-UA"],
            "Sec-CH-UA-Mobile": DEFAULT_HEADERS["Sec-CH-UA-Mobile"],
            "Sec-CH-UA-Platform": DEFAULT_HEADERS["Sec-CH-UA-Platform"],
            "Cache-Control": "max-age=0",
            "DNT": "1",
        }
    )

    # Add a script to modify navigator properties to avoid detection
    context.add_init_script(
        """
        Object.defineProperty(navigator, 'webdriver', {
            get: () => undefined
        });
        Object.defineProperty(navigator, 'plugins', {
            get: () => [1, 2, 3, 4, 5]
        });
        Object.defineProperty(navigator, 'languages', {
            get: () => ['en-US', 'en']
        });
    """
    )

    if (
        WEB_CONNECTOR_OAUTH_CLIENT_ID
        and WEB_CONNECTOR_OAUTH_CLIENT_SECRET
        and WEB_CONNECTOR_OAUTH_TOKEN_URL
    ):
        client = BackendApplicationClient(client_id=WEB_CONNECTOR_OAUTH_CLIENT_ID)
        oauth = OAuth2Session(client=client)
        token = oauth.fetch_token(
            token_url=WEB_CONNECTOR_OAUTH_TOKEN_URL,
            client_id=WEB_CONNECTOR_OAUTH_CLIENT_ID,
            client_secret=WEB_CONNECTOR_OAUTH_CLIENT_SECRET,
        )
        context.set_extra_http_headers(
            {"Authorization": "Bearer {}".format(token["access_token"])}
        )

    return playwright, context


def extract_urls_from_sitemap(sitemap_url: str) -> list[str]:
    # requests should handle brotli compression automatically
    # as long as the brotli package is available in the venv. Leaving this line here to avoid
    # a regression as someone says "Ah, looks like this brotli package isn't used anywhere, let's remove it"
    # import brotli
    try:
        response = requests.get(sitemap_url, headers=DEFAULT_HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, "html.parser")
        urls = [
            _ensure_absolute_url(sitemap_url, loc_tag.text)
            for loc_tag in soup.find_all("loc")
        ]

        if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
            # the given url doesn't look like a sitemap, let's try to find one
            urls = list_pages_for_site(sitemap_url)

        if len(urls) == 0:
            raise ValueError(
                f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
            )

        return urls
    except requests.RequestException as e:
        raise RuntimeError(f"Failed to fetch sitemap from {sitemap_url}: {e}")
    except ValueError as e:
        raise RuntimeError(f"Error processing sitemap {sitemap_url}: {e}")
    except Exception as e:
        raise RuntimeError(
            f"Unexpected error while processing sitemap {sitemap_url}: {e}"
        )


def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str:
    if not urlparse(maybe_relative_url).netloc:
        return urljoin(source_url, maybe_relative_url)
    return maybe_relative_url


def _ensure_valid_url(url: str) -> str:
    if "://" not in url:
        return "https://" + url
    return url


def _read_urls_file(location: str) -> list[str]:
    with open(location, "r") as f:
        urls = [_ensure_valid_url(line.strip()) for line in f if line.strip()]
    return urls


def _get_datetime_from_last_modified_header(last_modified: str) -> datetime | None:
    try:
        return datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(
            tzinfo=timezone.utc
        )
    except (ValueError, TypeError):
        return None


def _handle_cookies(context: BrowserContext, url: str) -> None:
    """Handle cookies for the given URL to help with bot detection"""
    try:
        # Parse the URL to get the domain
        parsed_url = urlparse(url)
        domain = parsed_url.netloc

        # Add some common cookies that might help with bot detection
        cookies: list[dict[str, str]] = [
            {
                "name": "cookieconsent",
                "value": "accepted",
                "domain": domain,
                "path": "/",
            },
            {
                "name": "consent",
                "value": "true",
                "domain": domain,
                "path": "/",
            },
            {
                "name": "session",
                "value": "random_session_id",
                "domain": domain,
                "path": "/",
            },
        ]

        # Add cookies to the context
        for cookie in cookies:
            try:
                context.add_cookies([cookie])  # type: ignore
            except Exception as e:
                logger.debug(f"Failed to add cookie {cookie['name']} for {domain}: {e}")
    except Exception:
        logger.exception(
            f"Unexpected error while handling cookies for Web Connector with URL {url}"
        )


class WebConnector(LoadConnector):
    MAX_RETRIES = 3

    def __init__(
        self,
        base_url: str,  # Can't change this without disrupting existing users
        web_connector_type: str = WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value,
        mintlify_cleanup: bool = True,  # Mostly ok to apply to other websites as well
        batch_size: int = INDEX_BATCH_SIZE,
        scroll_before_scraping: bool = False,
        **kwargs: Any,  # noqa: ARG002
    ) -> None:
        self.mintlify_cleanup = mintlify_cleanup
        self.batch_size = batch_size
        self.recursive = False
        self.scroll_before_scraping = scroll_before_scraping
        self.web_connector_type = web_connector_type
        if web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value:
            self.recursive = True
            self.to_visit_list = [_ensure_valid_url(base_url)]
            return

        elif web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value:
            self.to_visit_list = [_ensure_valid_url(base_url)]

        elif web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP:
            self.to_visit_list = extract_urls_from_sitemap(_ensure_valid_url(base_url))

        elif web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.UPLOAD:
            # Explicitly check if running in multi-tenant mode to prevent potential security risks
            if MULTI_TENANT:
                raise ValueError(
                    "Upload input for web connector is not supported in cloud environments"
                )

            logger.warning(
                "This is not a UI supported Web Connector flow, are you sure you want to do this?"
            )
            self.to_visit_list = _read_urls_file(base_url)

        else:
            raise ValueError(
                "Invalid Web Connector Config, must choose a valid type between: "
            )

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        if credentials:
            logger.warning("Unexpected credentials provided for Web Connector")
        return None

    def _do_scrape(
        self,
        index: int,
        initial_url: str,
        session_ctx: ScrapeSessionContext,
    ) -> ScrapeResult:
        """Returns a ScrapeResult object with a doc and retry flag."""

        if session_ctx.playwright is None:
            raise RuntimeError("scrape_context.playwright is None")

        if session_ctx.playwright_context is None:
            raise RuntimeError("scrape_context.playwright_context is None")

        result = ScrapeResult()

        # Handle cookies for the URL
        _handle_cookies(session_ctx.playwright_context, initial_url)

        # First do a HEAD request to check content type without downloading the entire content
        head_response = requests.head(
            initial_url, headers=DEFAULT_HEADERS, allow_redirects=True
        )
        content_type = head_response.headers.get("content-type")
        is_pdf = is_pdf_resource(initial_url, content_type)

        if is_pdf:
            # PDF files are not checked for links
            response = requests.get(initial_url, headers=DEFAULT_HEADERS)
            page_text, metadata = extract_pdf_text(response.content)
            last_modified = response.headers.get("Last-Modified")

            result.doc = Document(
                id=initial_url,
                sections=[TextSection(link=initial_url, text=page_text)],
                source=DocumentSource.WEB,
                semantic_identifier=initial_url.rstrip("/").split("/")[-1]
                or initial_url,
                metadata=metadata,
                doc_updated_at=(
                    _get_datetime_from_last_modified_header(last_modified)
                    if last_modified
                    else None
                ),
            )

            return result

        page = session_ctx.playwright_context.new_page()
        try:
            # Use "commit" instead of "domcontentloaded" to avoid hanging on bot-detection pages
            # that may never fire domcontentloaded. "commit" waits only for navigation to be
            # committed (response received), then we add a short wait for initial rendering.
            page_response = page.goto(
                initial_url,
                timeout=30000,  # 30 seconds
                wait_until="commit",  # Wait for navigation to commit
            )
            # Give the page a moment to start rendering after navigation commits.
            # Allows CloudFlare and other bot-detection challenges to complete.
            page.wait_for_timeout(PAGE_RENDER_TIMEOUT_MS)

            # Wait for network activity to settle so SPAs that fetch content
            # asynchronously after the initial JS bundle have time to render.
            try:
                # A bit of extra time to account for long-polling, websockets, etc.
                page.wait_for_load_state("networkidle", timeout=PAGE_RENDER_TIMEOUT_MS)
            except TimeoutError:
                pass

            last_modified = (
                page_response.header_value("Last-Modified") if page_response else None
            )
            final_url = page.url
            if final_url != initial_url:
                protected_url_check(final_url)
                initial_url = final_url
                if initial_url in session_ctx.visited_links:
                    logger.info(
                        f"{index}: {initial_url} redirected to {final_url} - already indexed"
                    )
                    page.close()
                    return result

                logger.info(f"{index}: {initial_url} redirected to {final_url}")
                session_ctx.visited_links.add(initial_url)

            # If we got here, the request was successful
            if self.scroll_before_scraping:
                scroll_attempts = 0
                previous_height = page.evaluate("document.body.scrollHeight")
                while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS:
                    page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
                    # Wait for content to load, but catch timeout if page never reaches networkidle
                    # (e.g., CloudFlare protection keeps making requests)
                    try:
                        page.wait_for_load_state(
                            "networkidle", timeout=PAGE_RENDER_TIMEOUT_MS
                        )
                    except TimeoutError:
                        # If networkidle times out, just give it a moment for content to render
                        time.sleep(1)
                    time.sleep(0.5)  # let javascript run

                    new_height = page.evaluate("document.body.scrollHeight")
                    if new_height == previous_height:
                        break  # Stop scrolling when no more content is loaded
                    previous_height = new_height
                    scroll_attempts += 1

            content = page.content()
            soup = BeautifulSoup(content, "html.parser")

            if self.recursive:
                internal_links = get_internal_links(
                    session_ctx.base_url, initial_url, soup
                )
                for link in internal_links:
                    if link not in session_ctx.visited_links:
                        session_ctx.to_visit.append(link)

            if page_response and str(page_response.status)[0] in ("4", "5"):
                session_ctx.last_error = f"Skipped indexing {initial_url} due to HTTP {page_response.status} response"
                logger.info(session_ctx.last_error)
                result.retry = True
                return result

            # after this point, we don't need the caller to retry
            parsed_html = web_html_cleanup(soup, self.mintlify_cleanup)

            """For websites containing iframes that need to be scraped,
            the code below can extract text from within these iframes.
            """
            logger.debug(
                f"{index}: Length of cleaned text {len(parsed_html.cleaned_text)}"
            )
            if JAVASCRIPT_DISABLED_MESSAGE in parsed_html.cleaned_text:
                iframe_count = page.frame_locator("iframe").locator("html").count()
                if iframe_count > 0:
                    iframe_texts = (
                        page.frame_locator("iframe").locator("html").all_inner_texts()
                    )
                    document_text = "\n".join(iframe_texts)
                    """ 700 is the threshold value for the length of the text extracted
                    from the iframe based on the issue faced """
                    if len(parsed_html.cleaned_text) < IFRAME_TEXT_LENGTH_THRESHOLD:
                        parsed_html.cleaned_text = document_text
                    else:
                        parsed_html.cleaned_text += "\n" + document_text

            # Sometimes pages with #! will serve duplicate content
            # There are also just other ways this can happen
            hashed_text = hash((parsed_html.title, parsed_html.cleaned_text))
            if hashed_text in session_ctx.content_hashes:
                logger.info(
                    f"{index}: Skipping duplicate title + content for {initial_url}"
                )
                return result

            session_ctx.content_hashes.add(hashed_text)

            result.doc = Document(
                id=initial_url,
                sections=[TextSection(link=initial_url, text=parsed_html.cleaned_text)],
                source=DocumentSource.WEB,
                semantic_identifier=parsed_html.title or initial_url,
                metadata={},
                doc_updated_at=(
                    _get_datetime_from_last_modified_header(last_modified)
                    if last_modified
                    else None
                ),
            )
        finally:
            page.close()

        return result

    def load_from_state(self) -> GenerateDocumentsOutput:
        """Traverses through all pages found on the website
        and converts them into documents"""

        if not self.to_visit_list:
            raise ValueError("No URLs to visit")

        base_url = self.to_visit_list[0]  # For the recursive case
        check_internet_connection(base_url)  # make sure we can connect to the base url

        session_ctx = ScrapeSessionContext(base_url, self.to_visit_list)
        session_ctx.initialize()

        while session_ctx.to_visit:
            initial_url = session_ctx.to_visit.pop()
            if initial_url in session_ctx.visited_links:
                continue
            session_ctx.visited_links.add(initial_url)

            try:
                protected_url_check(initial_url)
            except Exception as e:
                session_ctx.last_error = f"Invalid URL {initial_url} due to {e}"
                logger.warning(session_ctx.last_error)
                continue

            index = len(session_ctx.visited_links)
            logger.info(f"{index}: Visiting {initial_url}")

            # Add retry mechanism with exponential backoff
            retry_count = 0

            while retry_count < self.MAX_RETRIES:
                if retry_count > 0:
                    # Add a random delay between retries (exponential backoff)
                    delay = min(2**retry_count + random.uniform(0, 1), 10)
                    logger.info(
                        f"Retry {retry_count}/{self.MAX_RETRIES} for {initial_url} after {delay:.2f}s delay"
                    )
                    time.sleep(delay)

                try:
                    result = self._do_scrape(index, initial_url, session_ctx)
                    if result.retry:
                        continue

                    if result.doc:
                        session_ctx.doc_batch.append(result.doc)
                except Exception as e:
                    session_ctx.last_error = f"Failed to fetch '{initial_url}': {e}"
                    logger.exception(session_ctx.last_error)
                    session_ctx.initialize()
                    continue
                finally:
                    retry_count += 1

                break  # success / don't retry

            if len(session_ctx.doc_batch) >= self.batch_size:
                session_ctx.initialize()
                session_ctx.at_least_one_doc = True
                yield session_ctx.doc_batch
                session_ctx.doc_batch = []

        if session_ctx.doc_batch:
            session_ctx.stop()
            session_ctx.at_least_one_doc = True
            yield session_ctx.doc_batch

        if not session_ctx.at_least_one_doc:
            if session_ctx.last_error:
                raise RuntimeError(session_ctx.last_error)
            raise RuntimeError("No valid pages found.")

        session_ctx.stop()

    def validate_connector_settings(self) -> None:
        # Make sure we have at least one valid URL to check
        if not self.to_visit_list:
            raise ConnectorValidationError(
                "No URL configured. Please provide at least one valid URL."
            )

        if (
            self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value
            or self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value
        ):
            return None

        # We'll just test the first URL for connectivity and correctness
        test_url = self.to_visit_list[0]

        # Check that the URL is allowed and well-formed
        try:
            protected_url_check(test_url)
        except ValueError as e:
            raise ConnectorValidationError(
                f"Protected URL check failed for '{test_url}': {e}"
            )
        except ConnectionError as e:
            # Typically DNS or other network issues
            raise ConnectorValidationError(str(e))

        # Make a quick request to see if we get a valid response
        try:
            check_internet_connection(test_url)
        except Exception as e:
            err_str = str(e)
            if "401" in err_str:
                raise CredentialExpiredError(
                    f"Unauthorized access to '{test_url}': {e}"
                )
            elif "403" in err_str:
                raise InsufficientPermissionsError(
                    f"Forbidden access to '{test_url}': {e}"
                )
            elif "404" in err_str:
                raise ConnectorValidationError(f"Page not found for '{test_url}': {e}")
            elif "Max retries exceeded" in err_str and "NameResolutionError" in err_str:
                raise ConnectorValidationError(
                    f"Unable to resolve hostname for '{test_url}'. Please check the URL and your internet connection."
                )
            else:
                # Could be a 5xx or another error, treat as unexpected
                raise UnexpectedValidationError(
                    f"Unexpected error validating '{test_url}': {e}"
                )


if __name__ == "__main__":
    connector = WebConnector("https://docs.onyx.app/")
    document_batches = connector.load_from_state()
    print(next(document_batches))


================================================
FILE: backend/onyx/connectors/wikipedia/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/wikipedia/connector.py
================================================
from typing import ClassVar

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.mediawiki import wiki


class WikipediaConnector(wiki.MediaWikiConnector):
    """Connector for Wikipedia."""

    document_source_type: ClassVar[DocumentSource] = DocumentSource.WIKIPEDIA

    def __init__(
        self,
        categories: list[str],
        pages: list[str],
        recurse_depth: int,
        language_code: str = "en",
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        super().__init__(
            hostname="wikipedia.org",
            categories=categories,
            pages=pages,
            recurse_depth=recurse_depth,
            language_code=language_code,
            batch_size=batch_size,
        )


================================================
FILE: backend/onyx/connectors/xenforo/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/xenforo/connector.py
================================================
"""
This is the XenforoConnector class. It is used to connect to a Xenforo forum and load or update documents from the forum.

To use this class, you need to provide the URL of the Xenforo forum board you want to connect to when creating an instance
of the class. The URL should be a string that starts with 'http://' or 'https://', followed by the domain name of the
forum, followed by the board name. For example:

    base_url = 'https://www.example.com/forum/boards/some-topic/'

The `load_from_state` method is used to load documents from the forum. It takes an optional `state` parameter, which
can be used to specify a state from which to start loading documents.
"""

import re
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from urllib.parse import urlparse

import pytz
import requests
from bs4 import BeautifulSoup
from bs4 import Tag

from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import datetime_to_utc
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger

logger = setup_logger()


def get_title(soup: BeautifulSoup) -> str:
    el = soup.find("h1", "p-title-value")
    if not el:
        return ""
    title = el.text
    for char in (";", ":", "!", "*", "/", "\\", "?", '"', "<", ">", "|"):
        title = title.replace(char, "_")
    return title


def get_pages(soup: BeautifulSoup, url: str) -> list[str]:
    page_tags = soup.select("li.pageNav-page")
    page_numbers = []
    for button in page_tags:
        if re.match(r"^\d+$", button.text):
            page_numbers.append(button.text)

    max_pages = int(max(page_numbers, key=int)) if page_numbers else 1

    all_pages = []
    for x in range(1, int(max_pages) + 1):
        all_pages.append(f"{url}page-{x}")
    return all_pages


def parse_post_date(post_element: BeautifulSoup) -> datetime:
    el = post_element.find("time")
    if not isinstance(el, Tag) or "datetime" not in el.attrs:
        return datetime.utcfromtimestamp(0).replace(tzinfo=timezone.utc)

    date_value = el["datetime"]

    # Ensure date_value is a string (if it's a list, take the first element)
    if isinstance(date_value, list):
        date_value = date_value[0]

    post_date = datetime.strptime(date_value, "%Y-%m-%dT%H:%M:%S%z")
    return datetime_to_utc(post_date)


def scrape_page_posts(
    soup: BeautifulSoup,
    page_index: int,
    url: str,
    initial_run: bool,
    start_time: datetime,
) -> list:
    title = get_title(soup)

    documents = []
    for post in soup.find_all("div", class_="message-inner"):
        post_date = parse_post_date(post)
        if initial_run or post_date > start_time:
            el = post.find("div", class_="bbWrapper")
            if not el:
                continue
            post_text = el.get_text(strip=True) + "\n"
            author_tag = post.find("a", class_="username")
            if author_tag is None:
                author_tag = post.find("span", class_="username")
            author = author_tag.get_text(strip=True) if author_tag else "Deleted author"
            formatted_time = post_date.strftime("%Y-%m-%d %H:%M:%S")

            # TODO: if a caller calls this for each page of a thread, it may see the
            # same post multiple times if there is a sticky post
            # that appears on each page of a thread.
            # it's important to generate unique doc id's, so page index is part of the
            # id. We may want to de-dupe this stuff inside the indexing service.
            document = Document(
                id=f"{DocumentSource.XENFORO.value}_{title}_{page_index}_{formatted_time}",
                sections=[TextSection(link=url, text=post_text)],
                title=title,
                source=DocumentSource.XENFORO,
                semantic_identifier=title,
                primary_owners=[BasicExpertInfo(display_name=author)],
                metadata={
                    "type": "post",
                    "author": author,
                    "time": formatted_time,
                },
                doc_updated_at=post_date,
            )

            documents.append(document)
    return documents


class XenforoConnector(LoadConnector):
    # Class variable to track if the connector has been run before
    has_been_run_before = False

    def __init__(self, base_url: str) -> None:
        self.base_url = base_url
        self.initial_run = not XenforoConnector.has_been_run_before
        self.start = datetime.utcnow().replace(tzinfo=pytz.utc) - timedelta(days=1)
        self.cookies: dict[str, str] = {}
        # mimic user browser to avoid being blocked by the website (see: https://www.useragents.me/)
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/121.0.0.0 Safari/537.36"
        }

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        if credentials:
            logger.warning("Unexpected credentials provided for Xenforo Connector")
        return None

    def load_from_state(self) -> GenerateDocumentsOutput:
        # Standardize URL to always end in /.
        if self.base_url[-1] != "/":
            self.base_url += "/"

        # Remove all extra parameters from the end such as page, post.
        matches = ("threads/", "boards/", "forums/")
        for each in matches:
            if each in self.base_url:
                try:
                    self.base_url = self.base_url[
                        0 : self.base_url.index(
                            "/", self.base_url.index(each) + len(each)
                        )
                        + 1
                    ]
                except ValueError:
                    pass

        doc_batch: list[Document | HierarchyNode] = []
        all_threads = []

        # If the URL contains "boards/" or "forums/", find all threads.
        if "boards/" in self.base_url or "forums/" in self.base_url:
            pages = get_pages(self.requestsite(self.base_url), self.base_url)

            # Get all pages on thread_list_page
            for pre_count, thread_list_page in enumerate(pages, start=1):
                logger.info(
                    f"Getting pages from thread_list_page.. Current: {pre_count}/{len(pages)}\r"
                )
                all_threads += self.get_threads(thread_list_page)
        # If the URL contains "threads/", add the thread to the list.
        elif "threads/" in self.base_url:
            all_threads.append(self.base_url)

        # Process all threads
        for thread_count, thread_url in enumerate(all_threads, start=1):
            soup = self.requestsite(thread_url)
            if soup is None:
                logger.error(f"Failed to load page: {self.base_url}")
                continue
            pages = get_pages(soup, thread_url)
            # Getting all pages for all threads
            for page_index, page in enumerate(pages, start=1):
                logger.info(
                    f"Progress: Page {page_index}/{len(pages)} - Thread {thread_count}/{len(all_threads)}\r"
                )
                soup_page = self.requestsite(page)
                doc_batch.extend(
                    scrape_page_posts(
                        soup_page, page_index, thread_url, self.initial_run, self.start
                    )
                )
            if doc_batch:
                yield doc_batch

        # Mark the initial run finished after all threads and pages have been processed
        XenforoConnector.has_been_run_before = True

    def get_threads(self, url: str) -> list[str]:
        soup = self.requestsite(url)
        thread_tags = soup.find_all(class_="structItem-title")
        base_url = "{uri.scheme}://{uri.netloc}".format(uri=urlparse(url))
        threads = []
        for x in thread_tags:
            y = x.find_all(href=True)
            for element in y:
                link = element["href"]
                if "threads/" in link:
                    stripped = link[0 : link.rfind("/") + 1]
                    if base_url + stripped not in threads:
                        threads.append(base_url + stripped)
        return threads

    def requestsite(self, url: str) -> BeautifulSoup:
        try:
            response = requests.get(
                url, cookies=self.cookies, headers=self.headers, timeout=10
            )
            if response.status_code != 200:
                logger.error(
                    f"<{url}> Request Error: {response.status_code} - {response.reason}"
                )
            return BeautifulSoup(response.text, "html.parser")
        except TimeoutError:
            logger.error("Timed out Error.")
        except Exception as e:
            logger.error(f"Error on {url}")
            logger.exception(e)
        return BeautifulSoup("", "html.parser")


if __name__ == "__main__":
    connector = XenforoConnector(
        # base_url="https://cassiopaea.org/forum/threads/how-to-change-your-emotional-state.41381/"
        base_url="https://xenforo.com/community/threads/whats-new-with-enhanced-search-resource-manager-and-media-gallery-in-xenforo-2-3.220935/"
    )
    document_batches = connector.load_from_state()
    print(next(document_batches))


================================================
FILE: backend/onyx/connectors/zendesk/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/zendesk/connector.py
================================================
import copy
import time
from collections.abc import Callable
from collections.abc import Iterator
from typing import Any
from typing import cast

import requests
from pydantic import BaseModel
from requests.exceptions import HTTPError
from typing_extensions import override

from onyx.configs.app_configs import ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    time_str_to_utc,
)
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import ConnectorFailure
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.retry_wrapper import retry_builder


MAX_PAGE_SIZE = 30  # Zendesk API maximum
MAX_AUTHOR_MAP_SIZE = 50_000  # Reset author map cache if it gets too large
_SLIM_BATCH_SIZE = 1000


class ZendeskCredentialsNotSetUpError(PermissionError):
    def __init__(self) -> None:
        super().__init__(
            "Zendesk Credentials are not set up, was load_credentials called?"
        )


class ZendeskClient:
    def __init__(
        self,
        subdomain: str,
        email: str,
        token: str,
        calls_per_minute: int | None = None,
    ):
        self.base_url = f"https://{subdomain}.zendesk.com/api/v2"
        self.auth = (f"{email}/token", token)
        self.make_request = request_with_rate_limit(self, calls_per_minute)


def request_with_rate_limit(
    client: ZendeskClient, max_calls_per_minute: int | None = None
) -> Callable[[str, dict[str, Any]], dict[str, Any]]:
    @retry_builder()
    @(
        rate_limit_builder(max_calls=max_calls_per_minute, period=60)
        if max_calls_per_minute
        else lambda x: x
    )
    def make_request(endpoint: str, params: dict[str, Any]) -> dict[str, Any]:
        response = requests.get(
            f"{client.base_url}/{endpoint}", auth=client.auth, params=params
        )

        if response.status_code == 429:
            retry_after = response.headers.get("Retry-After")
            if retry_after is not None:
                # Sleep for the duration indicated by the Retry-After header
                time.sleep(int(retry_after))

        elif (
            response.status_code == 403
            and response.json().get("error") == "SupportProductInactive"
        ):
            return response.json()

        response.raise_for_status()
        return response.json()

    return make_request


class ZendeskPageResponse(BaseModel):
    data: list[dict[str, Any]]
    meta: dict[str, Any]
    has_more: bool


def _get_content_tag_mapping(client: ZendeskClient) -> dict[str, str]:
    content_tags: dict[str, str] = {}
    params = {"page[size]": MAX_PAGE_SIZE}

    try:
        while True:
            data = client.make_request("guide/content_tags", params)

            for tag in data.get("records", []):
                content_tags[tag["id"]] = tag["name"]

            # Check if there are more pages
            if data.get("meta", {}).get("has_more", False):
                params["page[after]"] = data["meta"]["after_cursor"]
            else:
                break

        return content_tags
    except Exception as e:
        raise Exception(f"Error fetching content tags: {str(e)}")


def _get_articles(
    client: ZendeskClient, start_time: int | None = None, page_size: int = MAX_PAGE_SIZE
) -> Iterator[dict[str, Any]]:
    params = {"page[size]": page_size, "sort_by": "updated_at", "sort_order": "asc"}
    if start_time is not None:
        params["start_time"] = start_time

    while True:
        data = client.make_request("help_center/articles", params)
        for article in data["articles"]:
            yield article

        if not data.get("meta", {}).get("has_more"):
            break
        params["page[after]"] = data["meta"]["after_cursor"]


def _get_article_page(
    client: ZendeskClient,
    start_time: int | None = None,
    after_cursor: str | None = None,
    page_size: int = MAX_PAGE_SIZE,
) -> ZendeskPageResponse:
    params = {"page[size]": page_size, "sort_by": "updated_at", "sort_order": "asc"}
    if start_time is not None:
        params["start_time"] = start_time
    if after_cursor is not None:
        params["page[after]"] = after_cursor

    data = client.make_request("help_center/articles", params)
    return ZendeskPageResponse(
        data=data["articles"],
        meta=data["meta"],
        has_more=bool(data["meta"].get("has_more", False)),
    )


def _get_tickets(
    client: ZendeskClient, start_time: int | None = None
) -> Iterator[dict[str, Any]]:
    params = {"start_time": start_time or 0}

    while True:
        data = client.make_request("incremental/tickets.json", params)
        for ticket in data["tickets"]:
            yield ticket

        if not data.get("end_of_stream", False):
            params["start_time"] = data["end_time"]
        else:
            break


# TODO: maybe these don't need to be their own functions?
def _get_tickets_page(
    client: ZendeskClient, start_time: int | None = None
) -> ZendeskPageResponse:
    params = {"start_time": start_time or 0}

    # NOTE: for some reason zendesk doesn't seem to be respecting the start_time param
    # in my local testing with very few tickets. We'll look into it if this becomes an
    # issue in larger deployments
    data = client.make_request("incremental/tickets.json", params)
    if data.get("error") == "SupportProductInactive":
        raise ValueError(
            "Zendesk Support Product is not active for this account, No tickets to index"
        )
    return ZendeskPageResponse(
        data=data["tickets"],
        meta={"end_time": data["end_time"]},
        has_more=not bool(data.get("end_of_stream", False)),
    )


def _fetch_author(
    client: ZendeskClient, author_id: str | int
) -> BasicExpertInfo | None:
    # Skip fetching if author_id is invalid
    # cast to str to avoid issues with zendesk changing their types
    if not author_id or str(author_id) == "-1":
        return None

    try:
        author_data = client.make_request(f"users/{author_id}", {})
        user = author_data.get("user")
        return (
            BasicExpertInfo(display_name=user.get("name"), email=user.get("email"))
            if user and user.get("name") and user.get("email")
            else None
        )
    except requests.exceptions.HTTPError:
        # Handle any API errors gracefully
        return None


def _article_to_document(
    article: dict[str, Any],
    content_tags: dict[str, str],
    author_map: dict[str, BasicExpertInfo],
    client: ZendeskClient,
) -> tuple[dict[str, BasicExpertInfo] | None, Document]:
    author_id = article.get("author_id")
    if not author_id:
        author = None
    else:
        author = (
            author_map.get(author_id)
            if author_id in author_map
            else _fetch_author(client, author_id)
        )

    new_author_mapping = {author_id: author} if author_id and author else None

    updated_at = article.get("updated_at")
    update_time = time_str_to_utc(updated_at) if updated_at else None

    # Build metadata
    metadata: dict[str, str | list[str]] = {
        "labels": [str(label) for label in article.get("label_names", []) if label],
        "content_tags": [
            content_tags[tag_id]
            for tag_id in article.get("content_tag_ids", [])
            if tag_id in content_tags
        ],
    }

    # Remove empty values
    metadata = {k: v for k, v in metadata.items() if v}

    return new_author_mapping, Document(
        id=f"article:{article['id']}",
        sections=[
            TextSection(
                link=cast(str, article.get("html_url")),
                text=parse_html_page_basic(article["body"]),
            )
        ],
        source=DocumentSource.ZENDESK,
        semantic_identifier=article["title"],
        doc_updated_at=update_time,
        primary_owners=[author] if author else None,
        metadata=metadata,
    )


def _get_comment_text(
    comment: dict[str, Any],
    author_map: dict[str, BasicExpertInfo],
    client: ZendeskClient,
) -> tuple[dict[str, BasicExpertInfo] | None, str]:
    author_id = comment.get("author_id")
    if not author_id:
        author = None
    else:
        author = (
            author_map.get(author_id)
            if author_id in author_map
            else _fetch_author(client, author_id)
        )

    new_author_mapping = {author_id: author} if author_id and author else None

    comment_text = f"Comment{' by ' + author.display_name if author and author.display_name else ''}"
    comment_text += f"{' at ' + comment['created_at'] if comment.get('created_at') else ''}:\n{comment['body']}"

    return new_author_mapping, comment_text


def _ticket_to_document(
    ticket: dict[str, Any],
    author_map: dict[str, BasicExpertInfo],
    client: ZendeskClient,
    default_subdomain: str,
) -> tuple[dict[str, BasicExpertInfo] | None, Document]:
    submitter_id = ticket.get("submitter")
    if not submitter_id:
        submitter = None
    else:
        submitter = (
            author_map.get(submitter_id)
            if submitter_id in author_map
            else _fetch_author(client, submitter_id)
        )

    new_author_mapping = (
        {submitter_id: submitter} if submitter_id and submitter else None
    )

    updated_at = ticket.get("updated_at")
    update_time = time_str_to_utc(updated_at) if updated_at else None

    metadata: dict[str, str | list[str]] = {}
    if status := ticket.get("status"):
        metadata["status"] = status
    if priority := ticket.get("priority"):
        metadata["priority"] = priority
    if tags := ticket.get("tags"):
        metadata["tags"] = tags
    if ticket_type := ticket.get("type"):
        metadata["ticket_type"] = ticket_type

    # Fetch comments for the ticket
    comments_data = client.make_request(f"tickets/{ticket.get('id')}/comments", {})
    comments = comments_data.get("comments", [])

    comment_texts = []
    for comment in comments:
        new_author_mapping, comment_text = _get_comment_text(
            comment, author_map, client
        )
        if new_author_mapping:
            author_map.update(new_author_mapping)
        comment_texts.append(comment_text)

    comments_text = "\n\n".join(comment_texts)

    subject = ticket.get("subject")
    full_text = f"Ticket Subject:\n{subject}\n\nComments:\n{comments_text}"

    ticket_url = ticket.get("url")
    subdomain = (
        ticket_url.split("//")[1].split(".zendesk.com")[0]
        if ticket_url
        else default_subdomain
    )

    ticket_display_url = (
        f"https://{subdomain}.zendesk.com/agent/tickets/{ticket.get('id')}"
    )

    return new_author_mapping, Document(
        id=f"zendesk_ticket_{ticket['id']}",
        sections=[TextSection(link=ticket_display_url, text=full_text)],
        source=DocumentSource.ZENDESK,
        semantic_identifier=f"Ticket #{ticket['id']}: {subject or 'No Subject'}",
        doc_updated_at=update_time,
        primary_owners=[submitter] if submitter else None,
        metadata=metadata,
    )


class ZendeskConnectorCheckpoint(ConnectorCheckpoint):
    # We use cursor-based paginated retrieval for articles
    after_cursor_articles: str | None

    # We use timestamp-based paginated retrieval for tickets
    next_start_time_tickets: int | None

    cached_author_map: dict[str, BasicExpertInfo] | None
    cached_content_tags: dict[str, str] | None


class ZendeskConnector(
    SlimConnectorWithPermSync, CheckpointedConnector[ZendeskConnectorCheckpoint]
):
    def __init__(
        self,
        content_type: str = "articles",
        calls_per_minute: int | None = None,
    ) -> None:
        self.content_type = content_type
        self.subdomain = ""
        # Fetch all tags ahead of time
        self.content_tags: dict[str, str] = {}
        self.calls_per_minute = calls_per_minute

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        # Subdomain is actually the whole URL
        subdomain = (
            credentials["zendesk_subdomain"]
            .replace("https://", "")
            .split(".zendesk.com")[0]
        )
        self.subdomain = subdomain

        self.client = ZendeskClient(
            subdomain,
            credentials["zendesk_email"],
            credentials["zendesk_token"],
            calls_per_minute=self.calls_per_minute,
        )
        return None

    @override
    def load_from_checkpoint(
        self,
        start: SecondsSinceUnixEpoch,
        end: SecondsSinceUnixEpoch,
        checkpoint: ZendeskConnectorCheckpoint,
    ) -> CheckpointOutput[ZendeskConnectorCheckpoint]:
        if self.client is None:
            raise ZendeskCredentialsNotSetUpError()

        if checkpoint.cached_content_tags is None:
            checkpoint.cached_content_tags = _get_content_tag_mapping(self.client)
            return checkpoint  # save the content tags to the checkpoint
        self.content_tags = checkpoint.cached_content_tags

        if self.content_type == "articles":
            checkpoint = yield from self._retrieve_articles(start, end, checkpoint)
            return checkpoint
        elif self.content_type == "tickets":
            checkpoint = yield from self._retrieve_tickets(start, end, checkpoint)
            return checkpoint
        else:
            raise ValueError(f"Unsupported content_type: {self.content_type}")

    def _retrieve_articles(
        self,
        start: SecondsSinceUnixEpoch | None,
        end: SecondsSinceUnixEpoch | None,
        checkpoint: ZendeskConnectorCheckpoint,
    ) -> CheckpointOutput[ZendeskConnectorCheckpoint]:
        checkpoint = copy.deepcopy(checkpoint)
        # This one is built on the fly as there may be more many more authors than tags
        author_map: dict[str, BasicExpertInfo] = checkpoint.cached_author_map or {}
        after_cursor = checkpoint.after_cursor_articles
        doc_batch: list[Document] = []

        response = _get_article_page(
            self.client,
            start_time=int(start) if start else None,
            after_cursor=after_cursor,
        )
        articles = response.data
        has_more = response.has_more
        after_cursor = response.meta.get("after_cursor")
        for article in articles:
            if (
                article.get("body") is None
                or article.get("draft")
                or any(
                    label in ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS
                    for label in article.get("label_names", [])
                )
            ):
                continue

            try:
                new_author_map, document = _article_to_document(
                    article, self.content_tags, author_map, self.client
                )
            except Exception as e:
                yield ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=f"{article.get('id')}",
                        document_link=article.get("html_url", ""),
                    ),
                    failure_message=str(e),
                    exception=e,
                )
                continue

            if new_author_map:
                author_map.update(new_author_map)

            doc_batch.append(document)

        if not has_more:
            yield from doc_batch
            checkpoint.has_more = False
            return checkpoint

        # Sometimes no documents are retrieved, but the cursor
        # is still updated so the connector makes progress.
        yield from doc_batch
        checkpoint.after_cursor_articles = after_cursor

        last_doc_updated_at = doc_batch[-1].doc_updated_at if doc_batch else None
        checkpoint.has_more = bool(
            end is None
            or last_doc_updated_at is None
            or last_doc_updated_at.timestamp() <= end
        )
        checkpoint.cached_author_map = (
            author_map if len(author_map) <= MAX_AUTHOR_MAP_SIZE else None
        )
        return checkpoint

    def _retrieve_tickets(
        self,
        start: SecondsSinceUnixEpoch | None,
        end: SecondsSinceUnixEpoch | None,
        checkpoint: ZendeskConnectorCheckpoint,
    ) -> CheckpointOutput[ZendeskConnectorCheckpoint]:
        checkpoint = copy.deepcopy(checkpoint)
        if self.client is None:
            raise ZendeskCredentialsNotSetUpError()

        author_map: dict[str, BasicExpertInfo] = checkpoint.cached_author_map or {}

        doc_batch: list[Document] = []
        next_start_time = int(checkpoint.next_start_time_tickets or start or 0)
        ticket_response = _get_tickets_page(self.client, start_time=next_start_time)
        tickets = ticket_response.data
        has_more = ticket_response.has_more
        next_start_time = ticket_response.meta["end_time"]
        for ticket in tickets:
            if ticket.get("status") == "deleted":
                continue

            try:
                new_author_map, document = _ticket_to_document(
                    ticket=ticket,
                    author_map=author_map,
                    client=self.client,
                    default_subdomain=self.subdomain,
                )
            except Exception as e:
                yield ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=f"{ticket.get('id')}",
                        document_link=ticket.get("url", ""),
                    ),
                    failure_message=str(e),
                    exception=e,
                )
                continue

            if new_author_map:
                author_map.update(new_author_map)

            doc_batch.append(document)

        if not has_more:
            yield from doc_batch
            checkpoint.has_more = False
            return checkpoint

        yield from doc_batch
        checkpoint.next_start_time_tickets = next_start_time
        last_doc_updated_at = doc_batch[-1].doc_updated_at if doc_batch else None
        checkpoint.has_more = bool(
            end is None
            or last_doc_updated_at is None
            or last_doc_updated_at.timestamp() <= end
        )
        checkpoint.cached_author_map = (
            author_map if len(author_map) <= MAX_AUTHOR_MAP_SIZE else None
        )
        return checkpoint

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002
    ) -> GenerateSlimDocumentOutput:
        slim_doc_batch: list[SlimDocument | HierarchyNode] = []
        if self.content_type == "articles":
            articles = _get_articles(
                self.client, start_time=int(start) if start else None
            )
            for article in articles:
                slim_doc_batch.append(
                    SlimDocument(
                        id=f"article:{article['id']}",
                    )
                )
                if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
                    yield slim_doc_batch
                    slim_doc_batch = []
        elif self.content_type == "tickets":
            tickets = _get_tickets(
                self.client, start_time=int(start) if start else None
            )
            for ticket in tickets:
                slim_doc_batch.append(
                    SlimDocument(
                        id=f"zendesk_ticket_{ticket['id']}",
                    )
                )
                if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
                    yield slim_doc_batch
                    slim_doc_batch = []
        else:
            raise ValueError(f"Unsupported content_type: {self.content_type}")
        if slim_doc_batch:
            yield slim_doc_batch

    @override
    def validate_connector_settings(self) -> None:
        if self.client is None:
            raise ZendeskCredentialsNotSetUpError()

        try:
            _get_article_page(self.client, start_time=0)
        except HTTPError as e:
            # Check for HTTP status codes
            if e.response.status_code == 401:
                raise CredentialExpiredError(
                    "Your Zendesk credentials appear to be invalid or expired (HTTP 401)."
                ) from e
            elif e.response.status_code == 403:
                raise InsufficientPermissionsError(
                    "Your Zendesk token does not have sufficient permissions (HTTP 403)."
                ) from e
            elif e.response.status_code == 404:
                raise ConnectorValidationError(
                    "Zendesk resource not found (HTTP 404)."
                ) from e
            else:
                raise ConnectorValidationError(
                    f"Unexpected Zendesk error (status={e.response.status_code}): {e}"
                ) from e

    @override
    def validate_checkpoint_json(
        self, checkpoint_json: str
    ) -> ZendeskConnectorCheckpoint:
        return ZendeskConnectorCheckpoint.model_validate_json(checkpoint_json)

    @override
    def build_dummy_checkpoint(self) -> ZendeskConnectorCheckpoint:
        return ZendeskConnectorCheckpoint(
            after_cursor_articles=None,
            next_start_time_tickets=None,
            cached_author_map=None,
            cached_content_tags=None,
            has_more=True,
        )


if __name__ == "__main__":
    import os

    connector = ZendeskConnector()
    connector.load_credentials(
        {
            "zendesk_subdomain": os.environ["ZENDESK_SUBDOMAIN"],
            "zendesk_email": os.environ["ZENDESK_EMAIL"],
            "zendesk_token": os.environ["ZENDESK_TOKEN"],
        }
    )

    current = time.time()
    one_day_ago = current - 24 * 60 * 60  # 1 day
    document_batches = connector.load_from_checkpoint(
        one_day_ago,
        current,
        connector.build_dummy_checkpoint(),
    )

    print(next(document_batches))


================================================
FILE: backend/onyx/connectors/zulip/__init__.py
================================================


================================================
FILE: backend/onyx/connectors/zulip/connector.py
================================================
import os
import tempfile
import urllib.parse
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import Dict
from typing import List
from typing import Tuple
from typing import Union

from zulip import Client

from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.connectors.zulip.schemas import GetMessagesResponse
from onyx.connectors.zulip.schemas import Message
from onyx.connectors.zulip.utils import build_search_narrow
from onyx.connectors.zulip.utils import call_api
from onyx.connectors.zulip.utils import encode_zulip_narrow_operand
from onyx.utils.logger import setup_logger

# Potential improvements
# 1. Group documents messages into topics, make 1 document per topic per week
# 2. Add end date support once https://github.com/zulip/zulip/issues/25436 is solved

logger = setup_logger()


class ZulipConnector(LoadConnector, PollConnector):
    def __init__(
        self, realm_name: str, realm_url: str, batch_size: int = INDEX_BATCH_SIZE
    ) -> None:
        self.batch_size = batch_size
        self.realm_name = realm_name

        # Clean and normalize the URL
        realm_url = realm_url.strip().lower()

        # Remove any trailing slashes
        realm_url = realm_url.rstrip("/")

        # Ensure the URL has a scheme
        if not realm_url.startswith(("http://", "https://")):
            realm_url = f"https://{realm_url}"

        try:
            parsed = urllib.parse.urlparse(realm_url)

            # Extract the base domain without any paths or ports
            netloc = parsed.netloc.split(":")[0]  # Remove port if present

            if not netloc:
                raise ValueError(
                    f"Invalid realm URL format: {realm_url}. URL must include a valid domain name."
                )

            # Always use HTTPS for security
            self.base_url = f"https://{netloc}"
            self.client: Client | None = None

        except Exception as e:
            raise ValueError(
                f"Failed to parse Zulip realm URL: {realm_url}. "
                f"Please provide a URL in the format: domain.com or https://domain.com. "
                f"Error: {str(e)}"
            )

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        contents = credentials["zuliprc_content"]
        # The input field converts newlines to spaces in the provided
        # zuliprc file. This reverts them back to newlines.
        contents_spaces_to_newlines = contents.replace(" ", "\n")
        # create a temporary zuliprc file
        tempdir = tempfile.tempdir
        if tempdir is None:
            raise Exception("Could not determine tempfile directory")
        config_file = os.path.join(tempdir, f"zuliprc-{self.realm_name}")
        with open(config_file, "w") as f:
            f.write(contents_spaces_to_newlines)
        self.client = Client(config_file=config_file)
        return None

    def _message_to_narrow_link(self, m: Message) -> str:
        try:
            stream_name = m.display_recipient  # assume str
            stream_operand = encode_zulip_narrow_operand(f"{m.stream_id}-{stream_name}")
            topic_operand = encode_zulip_narrow_operand(m.subject)

            narrow_link = f"{self.base_url}#narrow/stream/{stream_operand}/topic/{topic_operand}/near/{m.id}"
            return narrow_link
        except Exception as e:
            logger.error(f"Error generating Zulip message link: {e}")
            # Fallback to a basic link that at least includes the base URL
            return f"{self.base_url}#narrow/id/{m.id}"

    def _get_message_batch(self, anchor: str) -> Tuple[bool, List[Message]]:
        if self.client is None:
            raise ConnectorMissingCredentialError("Zulip")

        logger.info(f"Fetching messages starting with anchor={anchor}")
        request = build_search_narrow(
            limit=INDEX_BATCH_SIZE, anchor=anchor, apply_md=False
        )
        response = GetMessagesResponse(**call_api(self.client.get_messages, request))

        end = False
        if len(response.messages) == 0 or response.found_oldest:
            end = True

        # reverse, so that the last message is the new anchor
        # and the order is from newest to oldest
        return end, response.messages[::-1]

    def _message_to_doc(self, message: Message) -> Document:
        text = f"{message.sender_full_name}: {message.content}"

        try:
            # Convert timestamps to UTC datetime objects
            post_time = datetime.fromtimestamp(message.timestamp, tz=timezone.utc)
            edit_time = (
                datetime.fromtimestamp(message.last_edit_timestamp, tz=timezone.utc)
                if message.last_edit_timestamp is not None
                else None
            )

            # Use the most recent edit time if available, otherwise use post time
            doc_time = edit_time if edit_time is not None else post_time

        except (ValueError, TypeError) as e:
            logger.warning(f"Failed to parse timestamp for message {message.id}: {e}")
            post_time = None
            edit_time = None
            doc_time = None

        metadata: Dict[str, Union[str, List[str]]] = {
            "stream_name": str(message.display_recipient),
            "topic": str(message.subject),
            "sender_name": str(message.sender_full_name),
            "sender_email": str(message.sender_email),
            "message_timestamp": str(message.timestamp),
            "message_id": str(message.id),
            "stream_id": str(message.stream_id),
            "has_reactions": str(len(message.reactions) > 0),
            "content_type": str(message.content_type or "text"),
        }

        # Always include edit timestamp in metadata when available
        if edit_time is not None:
            metadata["edit_timestamp"] = str(message.last_edit_timestamp)

        return Document(
            id=f"{message.stream_id}__{message.id}",
            sections=[
                TextSection(
                    link=self._message_to_narrow_link(message),
                    text=text,
                )
            ],
            source=DocumentSource.ZULIP,
            semantic_identifier=f"{message.display_recipient} > {message.subject}",
            metadata=metadata,
            doc_updated_at=doc_time,  # Use most recent edit time or post time
        )

    def _get_docs(
        self, anchor: str, start: SecondsSinceUnixEpoch | None = None
    ) -> Generator[Document, None, None]:
        message: Message | None = None
        while True:
            end, message_batch = self._get_message_batch(anchor)

            for message in message_batch:
                if start is not None and float(message.timestamp) < start:
                    return
                yield self._message_to_doc(message)

            if end or message is None:
                return

            # Last message is oldest, use as next anchor
            anchor = str(message.id)

    def _poll_source(
        self,
        start: SecondsSinceUnixEpoch | None,
        end: SecondsSinceUnixEpoch | None,  # noqa: ARG002
    ) -> GenerateDocumentsOutput:
        # Since Zulip doesn't support searching by timestamp,
        # we have to always start from the newest message
        # and go backwards.
        anchor = "newest"

        docs: list[Document | HierarchyNode] = []
        for doc in self._get_docs(anchor=anchor, start=start):
            docs.append(doc)
            if len(docs) == self.batch_size:
                yield docs
                docs = []
        if docs:
            yield docs

    def load_from_state(self) -> GenerateDocumentsOutput:
        return self._poll_source(start=None, end=None)

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
        return self._poll_source(start, end)


================================================
FILE: backend/onyx/connectors/zulip/schemas.py
================================================
from typing import Any
from typing import List
from typing import Optional
from typing import Union

from pydantic import BaseModel
from pydantic import Field


class Message(BaseModel):
    id: int
    sender_id: int
    content: str
    recipient_id: int
    timestamp: int
    client: str
    is_me_message: bool
    sender_full_name: str
    sender_email: str
    sender_realm_str: str
    subject: str
    topic_links: Optional[List[Any]] = None
    last_edit_timestamp: Optional[int] = None
    edit_history: Any = None
    reactions: List[Any]
    submessages: List[Any]
    flags: List[str] = Field(default_factory=list)
    display_recipient: Optional[str] = None
    type: Optional[str] = None
    stream_id: int
    avatar_url: Optional[str]
    content_type: Optional[str]
    rendered_content: Optional[str] = None


class GetMessagesResponse(BaseModel):
    result: str
    msg: str
    found_anchor: Optional[bool] = None
    found_oldest: Optional[bool] = None
    found_newest: Optional[bool] = None
    history_limited: Optional[bool] = None
    anchor: Optional[Union[str, int]] = None
    messages: List[Message] = Field(default_factory=list)


================================================
FILE: backend/onyx/connectors/zulip/utils.py
================================================
import time
from collections.abc import Callable
from typing import Any
from typing import Dict
from typing import Optional
from urllib.parse import quote

from onyx.utils.logger import setup_logger

logger = setup_logger()


class ZulipAPIError(Exception):
    def __init__(self, code: Any = None, msg: str | None = None) -> None:
        self.code = code
        self.msg = msg

    def __str__(self) -> str:
        return (
            f"Error occurred during Zulip API call: {self.msg}" + ""
            if self.code is None
            else f" ({self.code})"
        )


class ZulipHTTPError(ZulipAPIError):
    def __init__(self, msg: str | None = None, status_code: Any = None) -> None:
        super().__init__(code=None, msg=msg)
        self.status_code = status_code

    def __str__(self) -> str:
        return f"HTTP error {self.status_code} occurred during Zulip API call"


def __call_with_retry(fun: Callable, *args: Any, **kwargs: Any) -> Dict[str, Any]:
    result = fun(*args, **kwargs)
    if result.get("result") == "error":
        if result.get("code") == "RATE_LIMIT_HIT":
            retry_after = float(result["retry-after"]) + 1
            logger.warn(f"Rate limit hit, retrying after {retry_after} seconds")
            time.sleep(retry_after)
            return __call_with_retry(fun, *args)
    return result


def __raise_if_error(response: dict[str, Any]) -> None:
    if response.get("result") == "error":
        raise ZulipAPIError(
            code=response.get("code"),
            msg=response.get("msg"),
        )
    elif response.get("result") == "http-error":
        raise ZulipHTTPError(
            msg=response.get("msg"), status_code=response.get("status_code")
        )


def call_api(fun: Callable, *args: Any, **kwargs: Any) -> Dict[str, Any]:
    response = __call_with_retry(fun, *args, **kwargs)
    __raise_if_error(response)
    return response


def build_search_narrow(
    *,
    stream: Optional[str] = None,
    topic: Optional[str] = None,
    limit: int = 100,
    content: Optional[str] = None,
    apply_md: bool = False,
    anchor: str = "newest",
) -> Dict[str, Any]:
    narrow_filters = []

    if stream:
        narrow_filters.append({"operator": "stream", "operand": stream})

    if topic:
        narrow_filters.append({"operator": "topic", "operand": topic})

    if content:
        narrow_filters.append({"operator": "has", "operand": content})

    if not stream and not topic and not content:
        narrow_filters.append({"operator": "streams", "operand": "public"})

    narrow = {
        "anchor": anchor,
        "num_before": limit,
        "num_after": 0,
        "narrow": narrow_filters,
    }
    narrow["apply_markdown"] = apply_md

    return narrow


def encode_zulip_narrow_operand(value: str) -> str:
    # like https://github.com/zulip/zulip/blob/1577662a6/static/js/hash_util.js#L18-L25
    # safe characters necessary to make Python match Javascript's escaping behaviour,
    # see: https://stackoverflow.com/a/74439601
    return quote(value, safe="!~*'()").replace(".", "%2E").replace("%", ".")


================================================
FILE: backend/onyx/context/search/__init__.py
================================================


================================================
FILE: backend/onyx/context/search/enums.py
================================================
"""NOTE: this needs to be separate from models.py because of circular imports.
Both search/models.py and db/models.py import enums from this file AND
search/models.py imports from db/models.py."""

from enum import Enum


class RecencyBiasSetting(str, Enum):
    FAVOR_RECENT = "favor_recent"  # 2x decay rate
    BASE_DECAY = "base_decay"
    NO_DECAY = "no_decay"
    # Determine based on query if to use base_decay or favor_recent
    AUTO = "auto"


class QueryType(str, Enum):
    """
    The type of first-pass query to use for hybrid search.

    The values of this enum are injected into the ranking profile name which
    should match the name in the schema.
    """

    KEYWORD = "keyword"
    SEMANTIC = "semantic"


class SearchType(str, Enum):
    KEYWORD = "keyword"
    SEMANTIC = "semantic"
    INTERNET = "internet"


================================================
FILE: backend/onyx/context/search/federated/models.py
================================================
from datetime import datetime
from typing import TypedDict

from pydantic import BaseModel

from onyx.onyxbot.slack.models import ChannelType


class ChannelMetadata(TypedDict):
    """Type definition for cached channel metadata."""

    name: str
    type: ChannelType
    is_private: bool
    is_member: bool


class SlackMessage(BaseModel):
    document_id: str
    channel_id: str
    message_id: str
    thread_id: str | None
    link: str
    metadata: dict[str, str | list[str]]
    timestamp: datetime
    recency_bias: float
    semantic_identifier: str
    text: str
    highlighted_texts: set[str]
    slack_score: float


================================================
FILE: backend/onyx/context/search/federated/slack_search.py
================================================
import json
import re
import time
from datetime import datetime
from typing import Any
from typing import cast

from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import ValidationError
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from sqlalchemy.orm import Session

from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
from onyx.configs.app_configs import MAX_SLACK_THREAD_CONTEXT_MESSAGES
from onyx.configs.app_configs import SLACK_THREAD_CONTEXT_BATCH_SIZE
from onyx.configs.chat_configs import DOC_TIME_DECAY
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import TextSection
from onyx.context.search.federated.models import ChannelMetadata
from onyx.context.search.federated.models import SlackMessage
from onyx.context.search.federated.slack_search_utils import ALL_CHANNEL_TYPES
from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
from onyx.context.search.federated.slack_search_utils import build_slack_queries
from onyx.context.search.federated.slack_search_utils import get_channel_type
from onyx.context.search.federated.slack_search_utils import (
    get_channel_type_for_missing_scope,
)
from onyx.context.search.federated.slack_search_utils import is_recency_query
from onyx.context.search.federated.slack_search_utils import should_include_message
from onyx.context.search.models import ChunkIndexRequest
from onyx.context.search.models import InferenceChunk
from onyx.db.document import DocumentSource
from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.document_index_utils import (
    get_multipass_config,
)
from onyx.federated_connectors.slack.models import SlackEntities
from onyx.indexing.chunker import Chunker
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.models import DocAwareChunk
from onyx.llm.factory import get_default_llm
from onyx.onyxbot.slack.models import ChannelType
from onyx.onyxbot.slack.models import SlackContext
from onyx.redis.redis_pool import get_redis_client
from onyx.server.federated.models import FederatedConnectorDetail
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.timing import log_function_time
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE

logger = setup_logger()

HIGHLIGHT_START_CHAR = "\ue000"
HIGHLIGHT_END_CHAR = "\ue001"

CHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24  # 24 hours
USER_PROFILE_CACHE_TTL = 60 * 60 * 24  # 24 hours
SLACK_THREAD_CONTEXT_WINDOW = 3  # Number of messages before matched message to include
CHANNEL_METADATA_MAX_RETRIES = 3  # Maximum retry attempts for channel metadata fetching
CHANNEL_METADATA_RETRY_DELAY = 1  # Initial retry delay in seconds (exponential backoff)


def fetch_and_cache_channel_metadata(
    access_token: str, team_id: str, include_private: bool = True
) -> dict[str, ChannelMetadata]:
    """
    Fetch ALL channel metadata in one API call and cache it.

    Returns a dict mapping channel_id -> metadata including name, type, etc.
    This replaces multiple conversations.info calls with a single conversations.list.

    Note: We ALWAYS fetch all channel types (including private) and cache them together.
    This ensures a single cache entry per team, avoiding duplicate API calls.
    """
    # Use tenant-specific Redis client
    redis_client = get_redis_client()
    # (tenant_id prefix is added automatically by TenantRedis)
    cache_key = f"slack_federated_search:{team_id}:channels:metadata"

    try:
        cached = redis_client.get(cache_key)
        if cached:
            logger.debug(f"Channel metadata cache HIT for team {team_id}")
            cached_str: str = (
                cached.decode("utf-8") if isinstance(cached, bytes) else str(cached)
            )
            cached_data = cast(dict[str, ChannelMetadata], json.loads(cached_str))
            logger.debug(f"Loaded {len(cached_data)} channels from cache")
            if not include_private:
                filtered: dict[str, ChannelMetadata] = {
                    k: v
                    for k, v in cached_data.items()
                    if v.get("type") != ChannelType.PRIVATE_CHANNEL.value
                }
                logger.debug(f"Filtered to {len(filtered)} channels (exclude private)")
                return filtered
            return cached_data
    except Exception as e:
        logger.warning(f"Error reading from channel metadata cache: {e}")

    # Cache miss - fetch from Slack API with retry logic
    logger.debug(f"Channel metadata cache MISS for team {team_id} - fetching from API")
    slack_client = WebClient(token=access_token)
    channel_metadata: dict[str, ChannelMetadata] = {}

    # Retry logic with exponential backoff
    last_exception = None
    available_channel_types = ALL_CHANNEL_TYPES.copy()

    for attempt in range(CHANNEL_METADATA_MAX_RETRIES):
        try:
            # Use available channel types (may be reduced if scopes are missing)
            channel_types = ",".join(available_channel_types)

            # Fetch all channels in one call
            cursor = None
            channel_count = 0
            while True:
                response = slack_client.conversations_list(
                    types=channel_types,
                    exclude_archived=True,
                    limit=1000,
                    cursor=cursor,
                )
                response.validate()

                # Cast response.data to dict for type checking
                response_data: dict[str, Any] = response.data  # type: ignore
                for ch in response_data.get("channels", []):
                    channel_id = ch.get("id")
                    if not channel_id:
                        continue

                    # Determine channel type
                    channel_type_enum = get_channel_type(channel_info=ch)
                    channel_type = ChannelType(channel_type_enum.value)

                    channel_metadata[channel_id] = {
                        "name": ch.get("name", ""),
                        "type": channel_type,
                        "is_private": ch.get("is_private", False),
                        "is_member": ch.get("is_member", False),
                    }
                    channel_count += 1

                cursor = response_data.get("response_metadata", {}).get("next_cursor")
                if not cursor:
                    break

            logger.info(f"Fetched {channel_count} channels for team {team_id}")

            # Cache the results
            try:
                redis_client.set(
                    cache_key,
                    json.dumps(channel_metadata),
                    ex=CHANNEL_METADATA_CACHE_TTL,
                )
                logger.info(
                    f"Cached {channel_count} channels for team {team_id} (TTL: {CHANNEL_METADATA_CACHE_TTL}s, key: {cache_key})"
                )
            except Exception as e:
                logger.warning(f"Error caching channel metadata: {e}")

            return channel_metadata

        except SlackApiError as e:
            last_exception = e

            # Extract all needed fields from response upfront
            if e.response:
                error_response = e.response.get("error", "")
                needed_scope = e.response.get("needed", "")
            else:
                error_response = ""
                needed_scope = ""

            # Check if this is a missing_scope error
            if error_response == "missing_scope":
                # Get the channel type that requires this scope
                missing_channel_type = get_channel_type_for_missing_scope(needed_scope)

                if (
                    missing_channel_type
                    and missing_channel_type in available_channel_types
                ):
                    # Remove the problematic channel type and retry
                    available_channel_types.remove(missing_channel_type)
                    logger.warning(
                        f"Missing scope '{needed_scope}' for channel type '{missing_channel_type}'. "
                        f"Continuing with reduced channel types: {available_channel_types}"
                    )
                    # Don't count this as a retry attempt, just try again with fewer types
                    if available_channel_types:  # Only continue if we have types left
                        continue
                    # Otherwise fall through to retry logic
                else:
                    logger.error(
                        f"Missing scope '{needed_scope}' but could not map to channel type or already removed. "
                        f"Response: {e.response}"
                    )

            # For other errors, use retry logic
            if attempt < CHANNEL_METADATA_MAX_RETRIES - 1:
                retry_delay = CHANNEL_METADATA_RETRY_DELAY * (2**attempt)
                logger.warning(
                    f"Failed to fetch channel metadata (attempt {attempt + 1}/{CHANNEL_METADATA_MAX_RETRIES}): {e}. "
                    f"Retrying in {retry_delay}s..."
                )
                time.sleep(retry_delay)
            else:
                logger.error(
                    f"Failed to fetch channel metadata after {CHANNEL_METADATA_MAX_RETRIES} attempts: {e}"
                )

    # If we have some channel metadata despite errors, return it with a warning
    if channel_metadata:
        logger.warning(
            f"Returning partial channel metadata ({len(channel_metadata)} channels) despite errors. Last error: {last_exception}"
        )
        return channel_metadata

    # If we exhausted all retries and have no data, raise the last exception
    if last_exception:
        raise SlackApiError(
            f"Channel metadata fetching failed after {CHANNEL_METADATA_MAX_RETRIES} attempts",
            last_exception.response,
        )

    return {}


def get_available_channels(
    access_token: str, team_id: str, include_private: bool = False
) -> list[str]:
    """Fetch list of available channel names using cached metadata."""
    metadata = fetch_and_cache_channel_metadata(access_token, team_id, include_private)
    return [meta["name"] for meta in metadata.values() if meta["name"]]


def get_cached_user_profile(
    access_token: str, team_id: str, user_id: str
) -> str | None:
    """
    Get a user's display name from cache or fetch from Slack API.

    Uses Redis caching to avoid repeated API calls and rate limiting.
    Returns the user's real_name or email, or None if not found.
    """
    redis_client = get_redis_client()
    cache_key = f"slack_federated_search:{team_id}:user:{user_id}"

    # Check cache first
    try:
        cached = redis_client.get(cache_key)
        if cached is not None:
            cached_str = (
                cached.decode("utf-8") if isinstance(cached, bytes) else str(cached)
            )
            # Empty string means user was not found previously
            return cached_str if cached_str else None
    except Exception as e:
        logger.debug(f"Error reading user profile cache: {e}")

    # Cache miss - fetch from Slack API
    slack_client = WebClient(token=access_token)
    try:
        response = slack_client.users_profile_get(user=user_id)
        response.validate()
        profile: dict[str, Any] = response.get("profile", {})
        name: str | None = profile.get("real_name") or profile.get("email")

        # Cache the result (empty string for not found)
        try:
            redis_client.set(
                cache_key,
                name or "",
                ex=USER_PROFILE_CACHE_TTL,
            )
        except Exception as e:
            logger.debug(f"Error caching user profile: {e}")

        return name

    except SlackApiError as e:
        error_str = str(e)
        if "user_not_found" in error_str:
            logger.debug(
                f"User {user_id} not found in Slack workspace (likely deleted/deactivated)"
            )
        elif "ratelimited" in error_str:
            # Don't cache rate limit errors - we'll retry later
            logger.debug(f"Rate limited fetching user {user_id}, will retry later")
            return None
        else:
            logger.warning(f"Could not fetch profile for user {user_id}: {e}")

        # Cache negative result to avoid repeated lookups for missing users
        try:
            redis_client.set(cache_key, "", ex=USER_PROFILE_CACHE_TTL)
        except Exception:
            pass

        return None


def batch_get_user_profiles(
    access_token: str, team_id: str, user_ids: set[str]
) -> dict[str, str]:
    """
    Batch fetch user profiles with caching.

    Returns a dict mapping user_id -> display_name for users that were found.
    """
    result: dict[str, str] = {}

    for user_id in user_ids:
        name = get_cached_user_profile(access_token, team_id, user_id)
        if name:
            result[user_id] = name

    return result


def _extract_channel_data_from_entities(
    entities: dict[str, Any] | None,
    channel_metadata_dict: dict[str, ChannelMetadata] | None,
) -> list[str] | None:
    """Extract available channels list from metadata based on entity configuration.

    Args:
        entities: Entity filter configuration dict
        channel_metadata_dict: Pre-fetched channel metadata dictionary

    Returns:
        List of available channel names, or None if not needed
    """
    if not entities or not channel_metadata_dict:
        return None

    try:
        parsed_entities = SlackEntities(**entities)
        # Only extract if we have exclusions or channel filters
        if parsed_entities.exclude_channels or parsed_entities.channels:
            # Extract channel names from metadata dict
            return [
                meta["name"]
                for meta in channel_metadata_dict.values()
                if meta["name"]
                and (
                    parsed_entities.include_private_channels
                    or meta.get("type") != ChannelType.PRIVATE_CHANNEL.value
                )
            ]
    except ValidationError:
        logger.debug("Failed to parse entities for channel data extraction")

    return None


def _should_skip_channel(
    channel_id: str,
    allowed_private_channel: str | None,
    bot_token: str | None,
    access_token: str,
    include_dm: bool,
    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
) -> bool:
    """Bot context filtering: skip private channels unless explicitly allowed.

    Uses pre-fetched channel metadata when available to avoid API calls.
    """
    if bot_token and not include_dm:
        try:
            # First try to use pre-fetched metadata from cache
            if channel_metadata_dict and channel_id in channel_metadata_dict:
                channel_meta = channel_metadata_dict[channel_id]
                channel_type_str = channel_meta.get("type", "")
                is_private_or_dm = channel_type_str in [
                    ChannelType.PRIVATE_CHANNEL.value,
                    ChannelType.IM.value,
                    ChannelType.MPIM.value,
                ]
                if is_private_or_dm and channel_id != allowed_private_channel:
                    return True
                return False

            # Fallback: API call only if not in cache (should be rare)
            token_to_use = bot_token or access_token
            channel_client = WebClient(token=token_to_use)
            channel_info = channel_client.conversations_info(channel=channel_id)

            if isinstance(channel_info.data, dict):
                channel_data = channel_info.data.get("channel", {})
                channel_type = get_channel_type(channel_info=channel_data)
                is_private_or_dm = channel_type in [
                    ChannelType.PRIVATE_CHANNEL,
                    ChannelType.IM,
                    ChannelType.MPIM,
                ]

                if is_private_or_dm and channel_id != allowed_private_channel:
                    return True
        except Exception as e:
            logger.warning(
                f"Could not determine channel type for {channel_id}, filtering out: {e}"
            )
            return True
    return False


class SlackQueryResult(BaseModel):
    """Result from a single Slack query including stats."""

    model_config = ConfigDict(arbitrary_types_allowed=True)

    messages: list[SlackMessage]
    filtered_channels: list[str]  # Channels filtered out during this query


def query_slack(
    query_string: str,
    access_token: str,
    limit: int | None = None,
    allowed_private_channel: str | None = None,
    bot_token: str | None = None,
    include_dm: bool = False,
    entities: dict[str, Any] | None = None,
    available_channels: list[str] | None = None,
    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
) -> SlackQueryResult:

    # Check if query has channel override (user specified channels in query)
    has_channel_override = query_string.startswith("__CHANNEL_OVERRIDE__")

    if has_channel_override:
        # Remove the marker and use the query as-is (already has channel filters)
        final_query = query_string.replace("__CHANNEL_OVERRIDE__", "").strip()
    else:
        # Normal flow: build channel filters from entity config
        channel_filter = ""
        if entities:
            channel_filter = build_channel_query_filter(entities, available_channels)

        final_query = query_string
        if channel_filter:
            # Add channel filter to query
            final_query = f"{query_string} {channel_filter}"

    logger.info(f"Final query to slack: {final_query}")

    # Detect if query asks for most recent results
    sort_by_time = is_recency_query(query_string)

    slack_client = WebClient(token=access_token)
    try:
        search_params: dict[str, Any] = {
            "query": final_query,
            "count": limit,
            "highlight": True,
        }

        # Sort by timestamp for recency-focused queries, otherwise by relevance
        if sort_by_time:
            search_params["sort"] = "timestamp"
            search_params["sort_dir"] = "desc"

        response = slack_client.search_messages(**search_params)
        response.validate()

        messages: dict[str, Any] = response.get("messages", {})
        matches: list[dict[str, Any]] = messages.get("matches", [])

        logger.info(f"Slack search found {len(matches)} messages")
    except SlackApiError as slack_error:
        logger.error(f"Slack API error in search_messages: {slack_error}")
        logger.error(
            f"Slack API error details: status={slack_error.response.status_code}, error={slack_error.response.get('error')}"
        )
        if "not_allowed_token_type" in str(slack_error):
            # Log token type prefix
            token_prefix = access_token[:4] if len(access_token) >= 4 else "unknown"
            logger.error(f"TOKEN TYPE ERROR: access_token type: {token_prefix}...")
        return SlackQueryResult(messages=[], filtered_channels=[])

    # convert matches to slack messages
    slack_messages: list[SlackMessage] = []
    filtered_channels: list[str] = []
    for match in matches:
        text: str | None = match.get("text")
        permalink: str | None = match.get("permalink")
        message_id: str | None = match.get("ts")
        channel_id: str | None = match.get("channel", {}).get("id")
        channel_name: str | None = match.get("channel", {}).get("name")
        username: str | None = match.get("username")
        if not username:
            # Fallback: try to get from user field if username is missing
            user_info = match.get("user", "")
            if isinstance(user_info, str) and user_info:
                username = user_info  # Use user ID as fallback
            else:
                username = "unknown_user"
        score: float = match.get("score", 0.0)
        if (  # can't use any() because of type checking :(
            not text
            or not permalink
            or not message_id
            or not channel_id
            or not channel_name
            or not username
        ):
            continue

        # Apply channel filtering if needed
        if _should_skip_channel(
            channel_id,
            allowed_private_channel,
            bot_token,
            access_token,
            include_dm,
            channel_metadata_dict,
        ):
            filtered_channels.append(f"{channel_name}({channel_id})")
            continue

        # generate thread id and document id
        thread_id = (
            permalink.split("?thread_ts=", 1)[1] if "?thread_ts=" in permalink else None
        )
        document_id = f"{channel_id}_{message_id}"

        decay_factor = DOC_TIME_DECAY
        doc_time = datetime.fromtimestamp(float(message_id))
        doc_age_years = (datetime.now() - doc_time).total_seconds() / (
            365 * 24 * 60 * 60
        )
        recency_bias = max(1 / (1 + decay_factor * doc_age_years), 0.75)
        metadata: dict[str, str | list[str]] = {
            "channel": channel_name,
            "time": doc_time.isoformat(),
        }

        # extract out the highlighted texts
        highlighted_texts = set(
            re.findall(
                rf"{re.escape(HIGHLIGHT_START_CHAR)}(.*?){re.escape(HIGHLIGHT_END_CHAR)}",
                text,
            )
        )
        cleaned_text = text.replace(HIGHLIGHT_START_CHAR, "").replace(
            HIGHLIGHT_END_CHAR, ""
        )

        # get the semantic identifier
        snippet = (
            cleaned_text[:50].rstrip() + "..." if len(cleaned_text) > 50 else text
        ).replace("\n", " ")
        doc_sem_id = f"{username} in #{channel_name}: {snippet}"

        slack_messages.append(
            SlackMessage(
                document_id=document_id,
                channel_id=channel_id,
                message_id=message_id,
                thread_id=thread_id,
                link=permalink,
                metadata=metadata,
                timestamp=doc_time,
                recency_bias=recency_bias,
                semantic_identifier=doc_sem_id,
                text=f"{username}: {cleaned_text}",
                highlighted_texts=highlighted_texts,
                slack_score=score,
            )
        )

    return SlackQueryResult(
        messages=slack_messages, filtered_channels=filtered_channels
    )


def merge_slack_messages(
    query_results: list[SlackQueryResult],
) -> tuple[list[SlackMessage], dict[str, SlackMessage], set[str]]:
    """Merge messages from multiple query results, deduplicating by document_id.

    Returns:
        Tuple of (merged_messages, docid_to_message, all_filtered_channels)
    """
    merged_messages: list[SlackMessage] = []
    docid_to_message: dict[str, SlackMessage] = {}
    all_filtered_channels: set[str] = set()

    for result in query_results:
        # Collect filtered channels from all queries
        all_filtered_channels.update(result.filtered_channels)

        for message in result.messages:
            if message.document_id in docid_to_message:
                # update the score and highlighted texts, rest should be identical
                docid_to_message[message.document_id].slack_score = max(
                    docid_to_message[message.document_id].slack_score,
                    message.slack_score,
                )
                docid_to_message[message.document_id].highlighted_texts.update(
                    message.highlighted_texts
                )
                continue

            # add the message to the list
            docid_to_message[message.document_id] = message
            merged_messages.append(message)

    # re-sort by score
    merged_messages.sort(key=lambda x: x.slack_score, reverse=True)

    return merged_messages, docid_to_message, all_filtered_channels


class SlackRateLimitError(Exception):
    """Raised when Slack API returns a rate limit error (429)."""


class ThreadContextResult:
    """Result wrapper for thread context fetch that captures error type."""

    __slots__ = ("text", "is_rate_limited", "is_error")

    def __init__(
        self, text: str, is_rate_limited: bool = False, is_error: bool = False
    ):
        self.text = text
        self.is_rate_limited = is_rate_limited
        self.is_error = is_error

    @classmethod
    def success(cls, text: str) -> "ThreadContextResult":
        return cls(text)

    @classmethod
    def rate_limited(cls, original_text: str) -> "ThreadContextResult":
        return cls(original_text, is_rate_limited=True)

    @classmethod
    def error(cls, original_text: str) -> "ThreadContextResult":
        return cls(original_text, is_error=True)


def _fetch_thread_context(
    message: SlackMessage, access_token: str, team_id: str | None = None
) -> ThreadContextResult:
    """
    Fetch thread context for a message, returning a result object.

    Returns ThreadContextResult with:
    - success: enriched thread text
    - rate_limited: original text + flag indicating we should stop
    - error: original text for other failures (graceful degradation)
    """
    channel_id = message.channel_id
    thread_id = message.thread_id
    message_id = message.message_id

    # If not a thread, return original text as success
    if thread_id is None:
        return ThreadContextResult.success(message.text)

    slack_client = WebClient(token=access_token, timeout=30)
    try:
        response = slack_client.conversations_replies(
            channel=channel_id,
            ts=thread_id,
        )
        response.validate()
        messages: list[dict[str, Any]] = response.get("messages", [])
    except SlackApiError as e:
        # Check for rate limit error specifically
        if e.response and e.response.status_code == 429:
            logger.warning(
                f"Slack rate limit hit while fetching thread context for {channel_id}/{thread_id}"
            )
            return ThreadContextResult.rate_limited(message.text)
        # For other Slack errors, log and return original text
        logger.error(f"Slack API error in thread context fetch: {e}")
        return ThreadContextResult.error(message.text)
    except Exception as e:
        # Network errors, timeouts, etc - treat as recoverable error
        logger.error(f"Unexpected error in thread context fetch: {e}")
        return ThreadContextResult.error(message.text)

    # If empty response or single message (not a thread), return original text
    if len(messages) <= 1:
        return ThreadContextResult.success(message.text)

    # Build thread text from thread starter + context window around matched message
    thread_text = _build_thread_text(
        messages, message_id, thread_id, access_token, team_id, slack_client
    )
    return ThreadContextResult.success(thread_text)


def _build_thread_text(
    messages: list[dict[str, Any]],
    message_id: str,
    thread_id: str,
    access_token: str,
    team_id: str | None,
    slack_client: WebClient,
) -> str:
    """Build the thread text from messages."""
    msg_text = messages[0].get("text", "")
    msg_sender = messages[0].get("user", "")
    thread_text = f"<@{msg_sender}>: {msg_text}"

    thread_text += "\n\nReplies:"
    if thread_id == message_id:
        message_id_idx = 0
    else:
        message_id_idx = next(
            (i for i, msg in enumerate(messages) if msg.get("ts") == message_id), 0
        )
        if not message_id_idx:
            return thread_text

        start_idx = max(1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW)

        if start_idx > 1:
            thread_text += "\n..."

        for i in range(start_idx, message_id_idx):
            msg_text = messages[i].get("text", "")
            msg_sender = messages[i].get("user", "")
            thread_text += f"\n\n<@{msg_sender}>: {msg_text}"

        msg_text = messages[message_id_idx].get("text", "")
        msg_sender = messages[message_id_idx].get("user", "")
        thread_text += f"\n\n<@{msg_sender}>: {msg_text}"

    # Add following replies
    len_replies = 0
    for msg in messages[message_id_idx + 1 :]:
        msg_text = msg.get("text", "")
        msg_sender = msg.get("user", "")
        reply = f"\n\n<@{msg_sender}>: {msg_text}"
        thread_text += reply

        len_replies += len(reply)
        if len_replies >= DOC_EMBEDDING_CONTEXT_SIZE * 4:
            thread_text += "\n..."
            break

    # Replace user IDs with names using cached lookups
    userids: set[str] = set(re.findall(r"<@([A-Z0-9]+)>", thread_text))

    if team_id:
        user_profiles = batch_get_user_profiles(access_token, team_id, userids)
        for userid, name in user_profiles.items():
            thread_text = thread_text.replace(f"<@{userid}>", name)
    else:
        for userid in userids:
            try:
                response = slack_client.users_profile_get(user=userid)
                response.validate()
                profile: dict[str, Any] = response.get("profile", {})
                user_name: str | None = profile.get("real_name") or profile.get("email")
            except SlackApiError as e:
                if "user_not_found" in str(e):
                    logger.debug(
                        f"User {userid} not found (likely deleted/deactivated)"
                    )
                else:
                    logger.warning(f"Could not fetch profile for user {userid}: {e}")
                continue
            if not user_name:
                continue
            thread_text = thread_text.replace(f"<@{userid}>", user_name)

    return thread_text


def fetch_thread_contexts_with_rate_limit_handling(
    slack_messages: list[SlackMessage],
    access_token: str,
    team_id: str | None,
    batch_size: int = SLACK_THREAD_CONTEXT_BATCH_SIZE,
    max_messages: int | None = MAX_SLACK_THREAD_CONTEXT_MESSAGES,
) -> list[str]:
    """
    Fetch thread contexts in controlled batches, stopping on rate limit.

    Distinguishes between error types:
    - Rate limit (429): Stop processing further batches
    - Other errors: Continue processing (graceful degradation)

    Args:
        slack_messages: Messages to fetch thread context for (should be sorted by relevance)
        access_token: Slack OAuth token
        team_id: Slack team ID for user profile caching
        batch_size: Number of concurrent API calls per batch
        max_messages: Maximum messages to fetch thread context for (None = no limit)

    Returns:
        List of thread texts, one per input message.
        Messages beyond max_messages or after rate limit get their original text.
    """
    if not slack_messages:
        return []

    # Limit how many messages we fetch thread context for (if max_messages is set)
    if max_messages and max_messages < len(slack_messages):
        messages_for_context = slack_messages[:max_messages]
        messages_without_context = slack_messages[max_messages:]
    else:
        messages_for_context = slack_messages
        messages_without_context = []

    logger.info(
        f"Fetching thread context for {len(messages_for_context)} of {len(slack_messages)} messages "
        f"(batch_size={batch_size}, max={max_messages or 'unlimited'})"
    )

    results: list[str] = []
    rate_limited = False
    total_batches = (len(messages_for_context) + batch_size - 1) // batch_size
    rate_limit_batch = 0

    # Process in batches
    for i in range(0, len(messages_for_context), batch_size):
        current_batch = i // batch_size + 1

        if rate_limited:
            # Skip remaining batches, use original message text
            remaining = messages_for_context[i:]
            skipped_batches = total_batches - rate_limit_batch
            logger.warning(
                f"Slack rate limit: skipping {len(remaining)} remaining messages "
                f"({skipped_batches} of {total_batches} batches). "
                f"Successfully enriched {len(results)} messages before rate limit."
            )
            results.extend([msg.text for msg in remaining])
            break

        batch = messages_for_context[i : i + batch_size]

        # _fetch_thread_context returns ThreadContextResult (never raises)
        # allow_failures=True is a safety net for any unexpected exceptions
        batch_results: list[ThreadContextResult | None] = (
            run_functions_tuples_in_parallel(
                [
                    (
                        _fetch_thread_context,
                        (msg, access_token, team_id),
                    )
                    for msg in batch
                ],
                allow_failures=True,
                max_workers=batch_size,
            )
        )

        # Process results - ThreadContextResult tells us exactly what happened
        for j, result in enumerate(batch_results):
            if result is None:
                # Unexpected exception (shouldn't happen) - use original text, stop
                logger.error(f"Unexpected None result for message {j} in batch")
                results.append(batch[j].text)
                rate_limited = True
                rate_limit_batch = current_batch
            elif result.is_rate_limited:
                # Rate limit hit - use original text, stop further batches
                results.append(result.text)
                rate_limited = True
                rate_limit_batch = current_batch
            else:
                # Success or recoverable error - use the text (enriched or original)
                results.append(result.text)

        if rate_limited:
            logger.warning(
                f"Slack rate limit (429) hit at batch {current_batch}/{total_batches} "
                f"while fetching thread context. Stopping further API calls."
            )

    # Add original text for messages we didn't fetch context for
    results.extend([msg.text for msg in messages_without_context])

    return results


def convert_slack_score(slack_score: float) -> float:
    """
    Convert slack score to a score between 0 and 1.
    Will affect UI ordering and LLM ordering, but not the pruning.
    I.e., should have very little effect on the search/answer quality.
    """
    return max(0.0, min(1.0, slack_score / 90_000))


@log_function_time(print_only=True)
def slack_retrieval(
    query: ChunkIndexRequest,
    access_token: str,
    db_session: Session | None = None,
    connector: FederatedConnectorDetail | None = None,  # noqa: ARG001
    entities: dict[str, Any] | None = None,
    limit: int | None = None,
    slack_event_context: SlackContext | None = None,
    bot_token: str | None = None,  # Add bot token parameter
    team_id: str | None = None,
    # Pre-fetched data — when provided, avoids DB query (no session needed)
    search_settings: SearchSettings | None = None,
) -> list[InferenceChunk]:
    """
    Main entry point for Slack federated search with entity filtering.

    Applies entity filtering including:
    - Channel selection and exclusion
    - Date range extraction and enforcement
    - DM/private channel filtering
    - Multi-layer caching

    Args:
        query: Search query object
        access_token: User OAuth access token
        db_session: Database session (optional if search_settings provided)
        connector: Federated connector detail (unused, kept for backwards compat)
        entities: Connector-level config (entity filtering configuration)
        limit: Maximum number of results
        slack_event_context: Context when called from Slack bot
        bot_token: Bot token for enhanced permissions
        team_id: Slack team/workspace ID

    Returns:
        List of InferenceChunk objects
    """
    # Use connector-level config
    entities = entities or {}

    if not entities:
        logger.debug("No entity configuration found, using defaults")
    else:
        logger.debug(f"Using entity configuration: {entities}")

    # Extract limit from entity config if not explicitly provided
    query_limit = limit
    if entities:
        try:
            parsed_entities = SlackEntities(**entities)
            if limit is None:
                query_limit = parsed_entities.max_messages_per_query
                logger.debug(f"Using max_messages_per_query from config: {query_limit}")
        except Exception as e:
            logger.warning(f"Error parsing entities for limit: {e}")
            if limit is None:
                query_limit = 100  # Fallback default
    elif limit is None:
        query_limit = 100  # Default when no entities and no limit provided

    # Pre-fetch channel metadata from Redis cache and extract available channels
    # This avoids repeated Redis lookups during parallel search execution
    available_channels = None
    channel_metadata_dict = None
    if team_id:
        # Always fetch all channel types (include_private=True) to ensure single cache entry
        channel_metadata_dict = fetch_and_cache_channel_metadata(
            access_token, team_id, include_private=True
        )

        # Extract available channels list if needed for pattern matching
        available_channels = _extract_channel_data_from_entities(
            entities, channel_metadata_dict
        )

    # Query slack with entity filtering
    llm = get_default_llm()
    query_strings = build_slack_queries(query, llm, entities, available_channels)

    # Determine filtering based on entities OR context (bot)
    include_dm = False
    allowed_private_channel = None

    # Bot context overrides (if entities not specified)
    if slack_event_context and not entities:
        channel_type = slack_event_context.channel_type
        if channel_type == ChannelType.IM:  # DM with user
            include_dm = True
        if channel_type == ChannelType.PRIVATE_CHANNEL:
            allowed_private_channel = slack_event_context.channel_id
            logger.debug(
                f"Private channel context: will only allow messages from {allowed_private_channel} + public channels"
            )

    # Build search tasks
    search_tasks = [
        (
            query_slack,
            (
                query_string,
                access_token,
                query_limit,
                allowed_private_channel,
                bot_token,
                include_dm,
                entities,
                available_channels,
                channel_metadata_dict,
            ),
        )
        for query_string in query_strings
    ]

    # If include_dm is True AND we're not already searching all channels,
    # add additional searches without channel filters.
    # This allows searching DMs/group DMs while still searching the specified channels.
    # Skip this if search_all_channels is already True (would be duplicate queries).
    if (
        entities
        and entities.get("include_dm")
        and not entities.get("search_all_channels")
    ):
        # Create a minimal entities dict that won't add channel filters
        # This ensures we search ALL conversations (DMs, group DMs, private channels)
        # BUT we still want to exclude channels specified in exclude_channels
        dm_entities = {
            "include_dm": True,
            "include_private_channels": entities.get("include_private_channels", False),
            "default_search_days": entities.get("default_search_days", 30),
            "search_all_channels": True,
            "channels": None,
            "exclude_channels": entities.get(
                "exclude_channels"
            ),  # ALWAYS apply exclude_channels
        }

        for query_string in query_strings:
            search_tasks.append(
                (
                    query_slack,
                    (
                        query_string,
                        access_token,
                        query_limit,
                        allowed_private_channel,
                        bot_token,
                        include_dm,
                        dm_entities,
                        available_channels,
                        channel_metadata_dict,
                    ),
                )
            )

    # Execute searches in parallel
    results = run_functions_tuples_in_parallel(search_tasks)

    # Calculate stats for consolidated logging
    total_raw_messages = sum(len(r.messages) for r in results)

    # Merge and post-filter results
    slack_messages, docid_to_message, query_filtered_channels = merge_slack_messages(
        results
    )
    messages_after_dedup = len(slack_messages)

    # Post-filter by channel type (DM, private channel, etc.)
    # NOTE: We must post-filter because Slack's search.messages API only supports
    # filtering by channel NAME (via in:#channel syntax), not by channel TYPE.
    # There's no way to specify "only public channels" or "exclude DMs" in the query.
    # Start with channels filtered during query execution, then add post-filter channels
    filtered_out_channels: set[str] = set(query_filtered_channels)
    if entities and team_id:
        # Use pre-fetched channel metadata to avoid cache misses
        # Pass it directly instead of relying on Redis cache

        filtered_messages = []
        for msg in slack_messages:
            # Pass pre-fetched metadata to avoid cache lookups
            channel_type = get_channel_type(
                channel_id=msg.channel_id,
                channel_metadata=channel_metadata_dict,
            )
            if should_include_message(channel_type, entities):
                filtered_messages.append(msg)
            else:
                # Track unique channel name for summary
                channel_name = msg.metadata.get("channel", msg.channel_id)
                filtered_out_channels.add(f"{channel_name}({msg.channel_id})")

        slack_messages = filtered_messages

    slack_messages = slack_messages[: limit or len(slack_messages)]

    # Log consolidated summary with request ID for correlation
    request_id = (
        slack_event_context.message_ts[:10]
        if slack_event_context and slack_event_context.message_ts
        else "no-ctx"
    )
    logger.info(
        f"[req:{request_id}] Slack federated search: {len(search_tasks)} queries, "
        f"{total_raw_messages} raw msgs -> {messages_after_dedup} after dedup -> "
        f"{len(slack_messages)} final"
        + (
            f", filtered channels: {sorted(filtered_out_channels)}"
            if filtered_out_channels
            else ""
        )
    )

    if not slack_messages:
        return []

    # Fetch thread context with rate limit handling and message limiting
    # Messages are already sorted by relevance (slack_score), so top N get full context
    thread_texts = fetch_thread_contexts_with_rate_limit_handling(
        slack_messages=slack_messages,
        access_token=access_token,
        team_id=team_id,
    )
    for slack_message, thread_text in zip(slack_messages, thread_texts):
        slack_message.text = thread_text

    # get the highlighted texts from shortest to longest
    highlighted_texts: set[str] = set()
    for slack_message in slack_messages:
        highlighted_texts.update(slack_message.highlighted_texts)
    sorted_highlighted_texts = sorted(highlighted_texts, key=len)

    # For queries without highlights (e.g., empty recency queries), we should keep all chunks
    has_highlights = len(sorted_highlighted_texts) > 0

    # convert slack messages to index documents
    index_docs: list[IndexingDocument] = []
    for slack_message in slack_messages:
        section: TextSection = TextSection(
            text=slack_message.text, link=slack_message.link
        )
        index_docs.append(
            IndexingDocument(
                id=slack_message.document_id,
                sections=[section],
                processed_sections=[section],
                source=DocumentSource.SLACK,
                title=slack_message.semantic_identifier,
                semantic_identifier=slack_message.semantic_identifier,
                metadata=slack_message.metadata,
                doc_updated_at=slack_message.timestamp,
            )
        )

    # chunk index docs into doc aware chunks
    # a single index doc can get split into multiple chunks
    if search_settings is None:
        if db_session is None:
            raise ValueError("Either db_session or search_settings must be provided")
        search_settings = get_current_search_settings(db_session)
    embedder = DefaultIndexingEmbedder.from_db_search_settings(
        search_settings=search_settings
    )
    multipass_config = get_multipass_config(search_settings)
    enable_contextual_rag = (
        search_settings.enable_contextual_rag or ENABLE_CONTEXTUAL_RAG
    )
    chunker = Chunker(
        tokenizer=embedder.embedding_model.tokenizer,
        enable_multipass=multipass_config.multipass_indexing,
        enable_large_chunks=multipass_config.enable_large_chunks,
        enable_contextual_rag=enable_contextual_rag,
    )
    chunks = chunker.chunk(index_docs)

    # prune chunks without any highlighted texts
    # BUT: for recency queries without keywords, keep all chunks
    relevant_chunks: list[DocAwareChunk] = []
    chunkid_to_match_highlight: dict[str, str] = {}

    if not has_highlights:
        # No highlighted terms - keep all chunks (recency query)
        for chunk in chunks:
            chunk_id = f"{chunk.source_document.id}__{chunk.chunk_id}"
            relevant_chunks.append(chunk)
            chunkid_to_match_highlight[chunk_id] = chunk.content  # No highlighting
            if limit and len(relevant_chunks) >= limit:
                break
    else:
        # Prune chunks that don't contain highlighted terms
        for chunk in chunks:
            match_highlight = chunk.content
            for highlight in sorted_highlighted_texts:  # faster than re sub
                match_highlight = match_highlight.replace(
                    highlight, f"<hi>{highlight}</hi>"
                )

            # if nothing got replaced, the chunk is irrelevant
            if len(match_highlight) == len(chunk.content):
                continue

            chunk_id = f"{chunk.source_document.id}__{chunk.chunk_id}"
            relevant_chunks.append(chunk)
            chunkid_to_match_highlight[chunk_id] = match_highlight
            if limit and len(relevant_chunks) >= limit:
                break

    # convert to inference chunks
    top_chunks: list[InferenceChunk] = []
    for chunk in relevant_chunks:
        document_id = chunk.source_document.id
        chunk_id = f"{document_id}__{chunk.chunk_id}"

        top_chunks.append(
            InferenceChunk(
                chunk_id=chunk.chunk_id,
                blurb=chunk.blurb,
                content=chunk.content,
                source_links=chunk.source_links,
                image_file_id=chunk.image_file_id,
                section_continuation=chunk.section_continuation,
                semantic_identifier=docid_to_message[document_id].semantic_identifier,
                document_id=document_id,
                source_type=DocumentSource.SLACK,
                title=chunk.title_prefix,
                boost=0,
                score=convert_slack_score(docid_to_message[document_id].slack_score),
                hidden=False,
                is_relevant=None,
                relevance_explanation="",
                metadata=docid_to_message[document_id].metadata,
                match_highlights=[chunkid_to_match_highlight[chunk_id]],
                doc_summary="",
                chunk_context="",
                updated_at=docid_to_message[document_id].timestamp,
                is_federated=True,
            )
        )

    return top_chunks


================================================
FILE: backend/onyx/context/search/federated/slack_search_utils.py
================================================
import fnmatch
import json
import re
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any

from pydantic import ValidationError

from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
from onyx.context.search.federated.models import ChannelMetadata
from onyx.context.search.models import ChunkIndexRequest
from onyx.federated_connectors.slack.models import SlackEntities
from onyx.llm.interfaces import LLM
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.natural_language_processing.english_stopwords import ENGLISH_STOPWORDS_SET
from onyx.onyxbot.slack.models import ChannelType
from onyx.prompts.federated_search import SLACK_DATE_EXTRACTION_PROMPT
from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
from onyx.tracing.llm_utils import llm_generation_span
from onyx.tracing.llm_utils import record_llm_response
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Constants for date extraction heuristics
DEFAULT_RECENCY_DAYS = 7
DEFAULT_LATELY_DAYS = 14
DAYS_PER_WEEK = 7
DAYS_PER_MONTH = 30
MAX_CONTENT_WORDS = 3

# Punctuation to strip from words during analysis
WORD_PUNCTUATION = ".,!?;:\"'#"

RECENCY_KEYWORDS = ["recent", "latest", "newest", "last"]

# All Slack channel types for fetching metadata
ALL_CHANNEL_TYPES = [
    ChannelType.PUBLIC_CHANNEL.value,
    ChannelType.IM.value,
    ChannelType.MPIM.value,
    ChannelType.PRIVATE_CHANNEL.value,
]

# Map Slack API scopes to their corresponding channel types
# This is used for graceful degradation when scopes are missing
SCOPE_TO_CHANNEL_TYPE_MAP = {
    "mpim:read": ChannelType.MPIM.value,
    "mpim:history": ChannelType.MPIM.value,
    "im:read": ChannelType.IM.value,
    "im:history": ChannelType.IM.value,
    "groups:read": ChannelType.PRIVATE_CHANNEL.value,
    "groups:history": ChannelType.PRIVATE_CHANNEL.value,
    "channels:read": ChannelType.PUBLIC_CHANNEL.value,
    "channels:history": ChannelType.PUBLIC_CHANNEL.value,
}


def get_channel_type_for_missing_scope(scope: str) -> str | None:
    """Get the channel type that requires a specific Slack scope.

    Args:
        scope: The Slack API scope (e.g., 'mpim:read', 'im:history')

    Returns:
        The channel type string if scope is recognized, None otherwise

    Examples:
        >>> get_channel_type_for_missing_scope('mpim:read')
        'mpim'
        >>> get_channel_type_for_missing_scope('im:read')
        'im'
        >>> get_channel_type_for_missing_scope('unknown:scope')
        None
    """
    return SCOPE_TO_CHANNEL_TYPE_MAP.get(scope)


def _parse_llm_code_block_response(response: str) -> str:
    """Remove code block markers from LLM response if present.

    Handles responses wrapped in triple backticks (```) by removing
    the opening and closing markers.

    Args:
        response: Raw LLM response string

    Returns:
        Cleaned response with code block markers removed
    """
    response_clean = response.strip()
    if response_clean.startswith("```"):
        lines = response_clean.split("\n")
        lines = lines[1:]
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        response_clean = "\n".join(lines)
    return response_clean


def is_recency_query(query: str) -> bool:
    """Check if a query is primarily about recency (not content + recency).

    Returns True only for pure recency queries like "recent messages" or "latest updates",
    but False for queries with content + recency like "golf scores last saturday".
    """
    # Check if query contains recency keywords
    has_recency_keyword = any(
        re.search(rf"\b{re.escape(keyword)}\b", query, flags=re.IGNORECASE)
        for keyword in RECENCY_KEYWORDS
    )

    if not has_recency_keyword:
        return False

    # Get combined stop words (English + Slack-specific)
    all_stop_words = _get_combined_stop_words()

    # Extract content words (excluding stop words)
    query_lower = query.lower()
    words = query_lower.split()

    # Count content words (not stop words, length > 2)
    content_word_count = 0
    for word in words:
        clean_word = word.strip(WORD_PUNCTUATION)
        if clean_word and len(clean_word) > 2 and clean_word not in all_stop_words:
            content_word_count += 1

    # If query has significant content words (>= 2), it's not a pure recency query
    # Examples:
    # - "recent messages" -> content_word_count = 0 -> pure recency
    # - "golf scores last saturday" -> content_word_count = 3 (golf, scores, saturday) -> not pure recency
    return content_word_count < 2


def extract_date_range_from_query(
    query: str,
    llm: LLM,
    default_search_days: int,
) -> int:
    query_lower = query.lower()

    if re.search(r"\btoday(?:\'?s)?\b", query_lower):
        return 0

    if re.search(r"\byesterday\b", query_lower):
        return min(1, default_search_days)

    # Handle "last [day of week]" - e.g., "last monday", "last saturday"
    days_of_week = [
        "monday",
        "tuesday",
        "wednesday",
        "thursday",
        "friday",
        "saturday",
        "sunday",
    ]
    for day in days_of_week:
        if re.search(rf"\b(?:last|this)\s+{day}\b", query_lower):
            # Assume last occurrence of that day was within the past week
            return min(DAYS_PER_WEEK, default_search_days)

    match = re.search(r"\b(?:last|past)\s+(\d+)\s+days?\b", query_lower)
    if match:
        days = int(match.group(1))
        return min(days, default_search_days)

    if re.search(r"\b(?:last|past|this)\s+week\b", query_lower):
        return min(DAYS_PER_WEEK, default_search_days)

    match = re.search(r"\b(?:last|past)\s+(\d+)\s+weeks?\b", query_lower)
    if match:
        weeks = int(match.group(1))
        return min(weeks * DAYS_PER_WEEK, default_search_days)

    if re.search(r"\b(?:last|past|this)\s+month\b", query_lower):
        return min(DAYS_PER_MONTH, default_search_days)

    match = re.search(r"\b(?:last|past)\s+(\d+)\s+months?\b", query_lower)
    if match:
        months = int(match.group(1))
        return min(months * DAYS_PER_MONTH, default_search_days)

    if re.search(r"\brecent(?:ly)?\b", query_lower):
        return min(DEFAULT_RECENCY_DAYS, default_search_days)

    if re.search(r"\blately\b", query_lower):
        return min(DEFAULT_LATELY_DAYS, default_search_days)

    try:
        prompt = SLACK_DATE_EXTRACTION_PROMPT.format(query=query)
        prompt_msg = UserMessage(content=prompt)

        # Call LLM with Braintrust tracing
        with llm_generation_span(
            llm=llm, flow="slack_date_extraction", input_messages=[prompt_msg]
        ) as span_generation:
            llm_response = llm.invoke(prompt_msg)
            record_llm_response(span_generation, llm_response)
            response = llm_response_to_string(llm_response)

        response_clean = _parse_llm_code_block_response(response)

        try:
            data = json.loads(response_clean)
            if not isinstance(data, dict):
                logger.debug(
                    f"LLM date extraction returned non-dict response for query: "
                    f"'{query}', using default: {default_search_days} days"
                )
                return default_search_days

            days_back = data.get("days_back")
            if days_back is None:
                logger.debug(
                    f"LLM date extraction returned null for query: '{query}', using default: {default_search_days} days"
                )
                return default_search_days

            if not isinstance(days_back, (int, float)):
                logger.debug(
                    f"LLM date extraction returned non-numeric days_back for "
                    f"query: '{query}', using default: {default_search_days} days"
                )
                return default_search_days

        except json.JSONDecodeError:
            logger.debug(
                f"Failed to parse LLM date extraction response for query: '{query}' "
                f"(response: '{response_clean}'), "
                f"using default: {default_search_days} days"
            )
            return default_search_days

        return min(int(days_back), default_search_days)

    except Exception as e:
        logger.warning(f"Error extracting date range with LLM for query '{query}': {e}")
        return default_search_days


def matches_exclude_pattern(channel_name: str, patterns: list[str]) -> bool:
    if not patterns:
        return False

    channel_norm = channel_name.lower().strip().lstrip("#")

    for pattern in patterns:
        pattern_norm = pattern.lower().strip().lstrip("#")
        if fnmatch.fnmatch(channel_norm, pattern_norm):
            return True

    return False


def build_channel_query_filter(
    parsed_entities: SlackEntities | dict[str, Any],
    available_channels: list[str] | None = None,
) -> str:
    # Parse entities if dict
    try:
        if isinstance(parsed_entities, dict):
            entities = SlackEntities(**parsed_entities)
        else:
            entities = parsed_entities
    except ValidationError:
        return ""

    search_all_channels = entities.search_all_channels

    if search_all_channels:
        if not entities.exclude_channels:
            return ""

        # Can't apply exclusions without available_channels
        if not available_channels:
            return ""

        excluded_channels = [
            ch
            for ch in available_channels
            if matches_exclude_pattern(ch, entities.exclude_channels)
        ]
        normalized_excluded = [ch.lstrip("#") for ch in excluded_channels]

        exclusion_filters = [f"-in:#{channel}" for channel in normalized_excluded]
        return " ".join(exclusion_filters)

    if not entities.channels:
        return ""

    included_channels: list[str] = []
    for pattern in entities.channels:
        pattern_norm = pattern.lstrip("#")
        if "*" in pattern_norm or "?" in pattern_norm:
            # Glob patterns require available_channels
            if available_channels:
                matching = [
                    ch
                    for ch in available_channels
                    if fnmatch.fnmatch(ch.lstrip("#").lower(), pattern_norm.lower())
                ]
                included_channels.extend(matching)
        else:
            # Exact match: use directly or verify against available_channels
            if not available_channels or pattern_norm in [
                ch.lstrip("#") for ch in available_channels
            ]:
                included_channels.append(pattern_norm)

    # Apply exclusions to included channels
    if entities.exclude_channels:
        included_channels = [
            ch
            for ch in included_channels
            if not matches_exclude_pattern(ch, entities.exclude_channels)
        ]

    if not included_channels:
        return ""

    normalized_channels = [ch.lstrip("#") for ch in included_channels]
    filters = [f"in:#{channel}" for channel in normalized_channels]
    return " ".join(filters)


def get_channel_type(
    channel_info: dict[str, Any] | None = None,
    channel_id: str | None = None,
    channel_metadata: dict[str, ChannelMetadata] | None = None,
) -> ChannelType:
    """
    Determine channel type from channel info dict or by looking up channel_id.

    Args:
        channel_info: Channel info dict from Slack API (direct mode)
        channel_id: Channel ID to look up (lookup mode)
        channel_metadata: Pre-fetched metadata dict (for lookup mode)

    Returns:
        ChannelType enum
    """
    if channel_info is not None:
        if channel_info.get("is_im"):
            return ChannelType.IM
        if channel_info.get("is_mpim"):
            return ChannelType.MPIM
        if channel_info.get("is_private"):
            return ChannelType.PRIVATE_CHANNEL
        return ChannelType.PUBLIC_CHANNEL

    # Lookup mode: get type from pre-fetched metadata
    if channel_id and channel_metadata:
        ch_meta = channel_metadata.get(channel_id)
        if ch_meta:
            type_str = ch_meta.get("type")
            if type_str == ChannelType.IM.value:
                return ChannelType.IM
            elif type_str == ChannelType.MPIM.value:
                return ChannelType.MPIM
            elif type_str == ChannelType.PRIVATE_CHANNEL.value:
                return ChannelType.PRIVATE_CHANNEL
            return ChannelType.PUBLIC_CHANNEL

    return ChannelType.PUBLIC_CHANNEL


def should_include_message(channel_type: ChannelType, entities: dict[str, Any]) -> bool:
    include_dm = entities.get("include_dm", False)
    include_group_dm = entities.get("include_group_dm", False)
    include_private = entities.get("include_private_channels", False)

    if channel_type == ChannelType.IM:
        return include_dm
    if channel_type == ChannelType.MPIM:
        return include_group_dm
    if channel_type == ChannelType.PRIVATE_CHANNEL:
        return include_private
    return True


def extract_channel_references_from_query(query_text: str) -> set[str]:
    """Extract channel names referenced in the query text.

    Only matches explicit channel references with prepositions or # symbols:
    - "in the office channel"
    - "from the office channel"
    - "in #office"
    - "from #office"

    Does NOT match generic phrases like "slack discussions" or "team channel".

    Args:
        query_text: The user's query text

    Returns:
        Set of channel names (without # prefix)
    """
    channel_references = set()
    query_lower = query_text.lower()

    # Only match channels with explicit prepositions (in/from) or # prefix
    # This prevents false positives like "slack discussions" being interpreted as channel "slack"
    channel_patterns = [
        r"\bin\s+(?:the\s+)?([a-z0-9_-]+)\s+(?:slack\s+)?channels?\b",  # "in the office channel"
        r"\bfrom\s+(?:the\s+)?([a-z0-9_-]+)\s+(?:slack\s+)?channels?\b",  # "from the office channel"
        r"\bin[:\s]*#([a-z0-9_-]+)\b",  # "in #office" or "in:#office"
        r"\bfrom[:\s]*#([a-z0-9_-]+)\b",  # "from #office" or "from:#office"
    ]

    for pattern in channel_patterns:
        matches = re.finditer(pattern, query_lower)
        for match in matches:
            channel_references.add(match.group(1))

    return channel_references


def validate_channel_references(
    channel_references: set[str],
    entities: dict[str, Any],
    available_channels: list[str] | None,
) -> None:
    """Validate that referenced channels exist and are allowed by entity config.

    Args:
        channel_references: Set of channel names extracted from query
        entities: Entity configuration dict
        available_channels: List of available channel names in workspace

    Raises:
        ValueError: If channel doesn't exist, is excluded, or not in inclusion list
    """
    if not channel_references or not entities:
        return

    try:
        parsed_entities = SlackEntities(**entities)

        for channel_name in channel_references:
            # Check if channel exists
            if available_channels is not None:
                # Normalize for comparison (available_channels may or may not have #)
                normalized_available = [
                    ch.lstrip("#").lower() for ch in available_channels
                ]
                if channel_name.lower() not in normalized_available:
                    raise ValueError(
                        f"Channel '{channel_name}' does not exist in your Slack workspace. "
                        f"Please check the channel name and try again."
                    )

            # Check if channel is in exclusion list
            if parsed_entities.exclude_channels:
                if matches_exclude_pattern(
                    channel_name, parsed_entities.exclude_channels
                ):
                    raise ValueError(
                        f"Channel '{channel_name}' is excluded from search by your configuration. "
                        f"Please update your connector settings to search this channel."
                    )

            # Check if channel is in inclusion list (when search_all_channels is False)
            if not parsed_entities.search_all_channels:
                if parsed_entities.channels:
                    # Normalize channel lists for comparison
                    normalized_channels = [
                        ch.lstrip("#").lower() for ch in parsed_entities.channels
                    ]
                    if channel_name.lower() not in normalized_channels:
                        raise ValueError(
                            f"Channel '{channel_name}' is not in your configured channel list. "
                            f"Please update your connector settings to include this channel."
                        )

    except ValidationError:
        # If entities are malformed, skip validation
        pass


def build_channel_override_query(channel_references: set[str], time_filter: str) -> str:
    """Build a Slack query with ONLY channel filters and time filter (no keywords).

    Args:
        channel_references: Set of channel names to search
        time_filter: Time filter string (e.g., " after:2025-11-07")

    Returns:
        Query string with __CHANNEL_OVERRIDE__ marker
    """
    normalized_channels = [ch.lstrip("#") for ch in channel_references]
    channel_filter = " ".join([f"in:#{channel}" for channel in normalized_channels])
    return f"__CHANNEL_OVERRIDE__ {channel_filter}{time_filter}"


# Slack-specific stop words (in addition to standard English stop words)
# These include Slack-specific terms and temporal/recency keywords
SLACK_SPECIFIC_STOP_WORDS = frozenset(
    RECENCY_KEYWORDS
    + [
        "dm",
        "dms",
        "message",
        "messages",
        "channel",
        "channels",
        "slack",
        "post",
        "posted",
        "posting",
        "sent",
    ]
)


def _get_combined_stop_words() -> frozenset[str]:
    """Get combined English + Slack-specific stop words.

    Returns a frozenset of stop words for filtering content words.

    Note: Currently only supports English stop words. Non-English queries
    may have suboptimal content word extraction. Future enhancement could
    detect query language and load appropriate stop words.
    """
    return ENGLISH_STOPWORDS_SET | SLACK_SPECIFIC_STOP_WORDS


def extract_content_words_from_recency_query(
    query_text: str, channel_references: set[str]
) -> list[str]:
    """Extract meaningful content words from a recency query.

    Filters out English stop words, Slack-specific terms, channel references, and proper nouns.

    Args:
        query_text: The user's query text
        channel_references: Channel names to exclude from content words

    Returns:
        List of content words (up to MAX_CONTENT_WORDS)
    """
    # Get combined stop words (English + Slack-specific)
    all_stop_words = _get_combined_stop_words()

    words = query_text.split()
    content_words = []

    for word in words:
        clean_word = word.lower().strip(WORD_PUNCTUATION)
        # Skip if it's a channel reference or a stop word
        if clean_word in channel_references:
            continue
        if clean_word and clean_word not in all_stop_words and len(clean_word) > 2:
            clean_word_orig = word.strip(WORD_PUNCTUATION)
            if clean_word_orig.lower() not in all_stop_words:
                content_words.append(clean_word_orig)

    # Filter out proper nouns (capitalized words)
    content_words_filtered = [word for word in content_words if not word[0].isupper()]

    return content_words_filtered[:MAX_CONTENT_WORDS]


def _is_valid_keyword_query(line: str) -> bool:
    """Check if a line looks like a valid keyword query vs explanatory text.

    Returns False for lines that appear to be LLM explanations rather than keywords.
    """
    # Reject lines that start with parentheses (explanatory notes)
    if line.startswith("("):
        return False

    # Reject lines that are too long (likely sentences, not keywords)
    # Keywords should be short - reject if > 50 chars or > 6 words
    if len(line) > 50 or len(line.split()) > 6:
        return False

    return True


def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
    """Use LLM to expand query into multiple search variations.

    Args:
        query_text: The user's original query
        llm: LLM instance to use for expansion

    Returns:
        List of rephrased query strings (up to MAX_SLACK_QUERY_EXPANSIONS)
    """
    prompt = UserMessage(
        content=SLACK_QUERY_EXPANSION_PROMPT.format(
            query=query_text, max_queries=MAX_SLACK_QUERY_EXPANSIONS
        )
    )

    try:
        # Call LLM with Braintrust tracing
        with llm_generation_span(
            llm=llm, flow="slack_query_expansion", input_messages=[prompt]
        ) as span_generation:
            llm_response = llm.invoke(prompt)
            record_llm_response(span_generation, llm_response)
            response = llm_response_to_string(llm_response)

        response_clean = _parse_llm_code_block_response(response)

        # Split into lines and filter out empty lines
        raw_queries = [
            line.strip() for line in response_clean.split("\n") if line.strip()
        ]

        # Filter out lines that look like explanatory text rather than keywords
        rephrased_queries = [q for q in raw_queries if _is_valid_keyword_query(q)]

        # Log if we filtered out garbage
        if len(raw_queries) != len(rephrased_queries):
            filtered_out = set(raw_queries) - set(rephrased_queries)
            logger.warning(f"Filtered out non-keyword LLM responses: {filtered_out}")

        # If no queries generated, use empty query
        if not rephrased_queries:
            logger.debug("No content keywords extracted from query expansion")
            return [""]

        logger.debug(
            f"Expanded query into {len(rephrased_queries)} queries: {rephrased_queries}"
        )
        return rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]

    except Exception as e:
        logger.error(f"Error expanding query: {e}")
        return [query_text]


def build_slack_queries(
    query: ChunkIndexRequest,
    llm: LLM,
    entities: dict[str, Any] | None = None,
    available_channels: list[str] | None = None,
) -> list[str]:
    """Build Slack query strings with date filtering and query expansion."""
    default_search_days = 30
    if entities:
        try:
            parsed_entities = SlackEntities(**entities)
            default_search_days = parsed_entities.default_search_days
        except ValidationError as e:
            logger.warning(f"Invalid entities in build_slack_queries: {e}")

    days_back = extract_date_range_from_query(
        query=query.query,
        llm=llm,
        default_search_days=default_search_days,
    )

    # get time filter
    time_filter = ""
    if days_back is not None and days_back >= 0:
        if days_back == 0:
            time_filter = " on:today"
        else:
            cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
            time_filter = f" after:{cutoff_date.strftime('%Y-%m-%d')}"

    # ALWAYS extract channel references from the query (not just for recency queries)
    channel_references = extract_channel_references_from_query(query.query)

    # Validate channel references against available channels and entity config
    # This will raise ValueError if channels are invalid
    if channel_references and entities:
        try:
            validate_channel_references(
                channel_references, entities, available_channels
            )
            logger.info(
                f"Detected and validated channel references: {channel_references}"
            )

            # If valid channels detected, use ONLY those channels with NO keywords
            # Return query with ONLY time filter + channel filter (no keywords)
            return [build_channel_override_query(channel_references, time_filter)]
        except ValueError as e:
            # If validation fails, log the error and continue with normal flow
            logger.warning(f"Channel reference validation failed: {e}")
            channel_references = set()

    # use llm to generate slack queries (use original query to use same keywords as the user)
    if is_recency_query(query.query):
        # For recency queries, extract content words (excluding channel names and stop words)
        content_words = extract_content_words_from_recency_query(
            query.query, channel_references
        )
        rephrased_queries = [" ".join(content_words)] if content_words else [""]
    else:
        # For other queries, use LLM to expand into multiple variations
        rephrased_queries = expand_query_with_llm(query.query, llm)

    # Build final query strings with time filters
    return [
        rephrased_query.strip() + time_filter
        for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
    ]


================================================
FILE: backend/onyx/context/search/models.py
================================================
from collections.abc import Sequence
from datetime import datetime
from enum import Enum
from typing import Any

from pydantic import BaseModel
from pydantic import Field

from onyx.configs.constants import DocumentSource
from onyx.db.models import SearchSettings
from onyx.indexing.models import BaseChunk
from onyx.indexing.models import IndexingSetting
from onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX


class QueryExpansions(BaseModel):
    keywords_expansions: list[str] | None = None
    semantic_expansions: list[str] | None = None


class QueryExpansionType(Enum):
    KEYWORD = "keyword"
    SEMANTIC = "semantic"


class SearchSettingsCreationRequest(IndexingSetting):
    @classmethod
    def from_db_model(
        cls, search_settings: SearchSettings
    ) -> "SearchSettingsCreationRequest":
        indexing_setting = IndexingSetting.from_db_model(search_settings)
        return cls(**indexing_setting.model_dump())


class SavedSearchSettings(IndexingSetting):
    # Previously this contained also Inference time settings. Keeping this wrapper class around
    # as there may again be inference time settings that may get added.
    @classmethod
    def from_db_model(cls, search_settings: SearchSettings) -> "SavedSearchSettings":
        return cls(
            # Indexing Setting
            model_name=search_settings.model_name,
            model_dim=search_settings.model_dim,
            normalize=search_settings.normalize,
            query_prefix=search_settings.query_prefix,
            passage_prefix=search_settings.passage_prefix,
            provider_type=search_settings.provider_type,
            index_name=search_settings.index_name,
            multipass_indexing=search_settings.multipass_indexing,
            embedding_precision=search_settings.embedding_precision,
            reduced_dimension=search_settings.reduced_dimension,
            switchover_type=search_settings.switchover_type,
            enable_contextual_rag=search_settings.enable_contextual_rag,
            contextual_rag_llm_name=search_settings.contextual_rag_llm_name,
            contextual_rag_llm_provider=search_settings.contextual_rag_llm_provider,
        )


class Tag(BaseModel):
    tag_key: str
    tag_value: str


class BaseFilters(BaseModel):
    source_type: list[DocumentSource] | None = None
    document_set: list[str] | None = None
    time_cutoff: datetime | None = None
    tags: list[Tag] | None = None


class UserFileFilters(BaseModel):
    # Scopes search to user files tagged with a given project/persona in Vespa.
    # These are NOT simply the IDs of the current project or persona — they are
    # only set when the persona's/project's user files overflowed the LLM
    # context window and must be searched via vector DB instead of being loaded
    # directly into the prompt.
    project_id_filter: int | None = None
    persona_id_filter: int | None = None


class AssistantKnowledgeFilters(BaseModel):
    """Filters for knowledge attached to an assistant (persona).

    These filters scope search to documents/folders explicitly attached
    to the assistant. When present, only documents matching these criteria
    are searched (in addition to ACL filtering).
    """

    # Document IDs explicitly attached to the assistant
    attached_document_ids: list[str] | None = None
    # Hierarchy node IDs (folders/spaces) attached to the assistant.
    # Matches chunks where ancestor_hierarchy_node_ids contains any of these.
    hierarchy_node_ids: list[int] | None = None


class IndexFilters(BaseFilters, UserFileFilters, AssistantKnowledgeFilters):
    # NOTE: These strings must be formatted in the same way as the output of
    # DocumentAccess::to_acl.
    access_control_list: list[str] | None
    tenant_id: str | None = None


class BasicChunkRequest(BaseModel):
    query: str

    # In case the caller wants to override the weighting between semantic and keyword search.
    hybrid_alpha: float | None = None

    # In case some queries favor recency more than other queries.
    recency_bias_multiplier: float = 1.0

    limit: int | None = None


class ChunkSearchRequest(BasicChunkRequest):
    # Final filters are calculated from these
    user_selected_filters: BaseFilters | None = None

    # Use with caution!
    bypass_acl: bool = False


# From the Chat Session we know what project (if any) this search should include
# From the user uploads and persona uploaded files, we know which of those to include
class ChunkIndexRequest(BasicChunkRequest):
    # Calculated final filters
    filters: IndexFilters

    query_keywords: list[str] | None = None


class ContextExpansionType(str, Enum):
    NOT_RELEVANT = "not_relevant"
    MAIN_SECTION_ONLY = "main_section_only"
    INCLUDE_ADJACENT_SECTIONS = "include_adjacent_sections"
    FULL_DOCUMENT = "full_document"


class InferenceChunk(BaseChunk):
    document_id: str
    source_type: DocumentSource
    semantic_identifier: str
    title: str | None  # Separate from Semantic Identifier though often same
    boost: int
    score: float | None
    hidden: bool
    is_relevant: bool | None = None
    relevance_explanation: str | None = None
    # TODO(andrei): Ideally we could improve this to where each value is just a
    # list of strings.
    metadata: dict[str, str | list[str]]
    # Matched sections in the chunk. Uses Vespa syntax e.g. <hi>TEXT</hi>
    # to specify that a set of words should be highlighted. For example:
    # ["<hi>the</hi> <hi>answer</hi> is 42", "he couldn't find an <hi>answer</hi>"]
    match_highlights: list[str]
    doc_summary: str
    chunk_context: str

    # when the doc was last updated
    updated_at: datetime | None
    primary_owners: list[str] | None = None
    secondary_owners: list[str] | None = None
    large_chunk_reference_ids: list[int] = Field(default_factory=list)

    is_federated: bool = False

    @property
    def unique_id(self) -> str:
        return f"{self.document_id}__{self.chunk_id}"

    def __repr__(self) -> str:
        blurb_words = self.blurb.split()
        short_blurb = ""
        for word in blurb_words:
            if not short_blurb:
                short_blurb = word
                continue
            if len(short_blurb) > 25:
                break
            short_blurb += " " + word
        return f"Inference Chunk: {self.document_id} - {short_blurb}..."

    def __eq__(self, other: Any) -> bool:
        if not isinstance(other, InferenceChunk):
            return False
        return (self.document_id, self.chunk_id) == (other.document_id, other.chunk_id)

    def __hash__(self) -> int:
        return hash((self.document_id, self.chunk_id))

    def __lt__(self, other: Any) -> bool:
        if not isinstance(other, InferenceChunk):
            return NotImplemented
        if self.score is None:
            if other.score is None:
                return self.chunk_id > other.chunk_id
            return True
        if other.score is None:
            return False
        if self.score == other.score:
            return self.chunk_id > other.chunk_id
        return self.score < other.score

    def __gt__(self, other: Any) -> bool:
        if not isinstance(other, InferenceChunk):
            return NotImplemented
        if self.score is None:
            return False
        if other.score is None:
            return True
        if self.score == other.score:
            return self.chunk_id < other.chunk_id
        return self.score > other.score


class InferenceChunkUncleaned(InferenceChunk):
    metadata_suffix: str | None

    def to_inference_chunk(self) -> InferenceChunk:
        # Create a dict of all fields except 'metadata_suffix'
        # Assumes the cleaning has already been applied and just needs to translate to the right type
        inference_chunk_data = {
            k: v
            for k, v in self.model_dump().items()
            if k
            not in ["metadata_suffix"]  # May be other fields to throw out in the future
        }
        return InferenceChunk(**inference_chunk_data)


class InferenceSection(BaseModel):
    """Section list of chunks with a combined content. A section could be a single chunk, several
    chunks from the same document or the entire document."""

    center_chunk: InferenceChunk
    chunks: list[InferenceChunk]
    combined_content: str


class SearchDoc(BaseModel):
    document_id: str
    chunk_ind: int
    semantic_identifier: str
    link: str | None = None
    blurb: str
    source_type: DocumentSource
    boost: int
    # Whether the document is hidden when doing a standard search
    # since a standard search will never find a hidden doc, this can only ever
    # be `True` when doing an admin search
    hidden: bool
    metadata: dict[str, str | list[str]]
    score: float | None = None
    is_relevant: bool | None = None
    relevance_explanation: str | None = None
    # Matched sections in the doc. Uses Vespa syntax e.g. <hi>TEXT</hi>
    # to specify that a set of words should be highlighted. For example:
    # ["<hi>the</hi> <hi>answer</hi> is 42", "the answer is <hi>42</hi>""]
    match_highlights: list[str]
    # when the doc was last updated
    updated_at: datetime | None = None
    primary_owners: list[str] | None = None
    secondary_owners: list[str] | None = None
    is_internet: bool = False

    @classmethod
    def from_chunks_or_sections(
        cls,
        items: "Sequence[InferenceChunk | InferenceSection] | None",
    ) -> list["SearchDoc"]:
        """Convert a sequence of InferenceChunk or InferenceSection objects to SearchDoc objects."""
        if not items:
            return []

        search_docs = [
            cls(
                document_id=(
                    chunk := (
                        item.center_chunk
                        if isinstance(item, InferenceSection)
                        else item
                    )
                ).document_id,
                chunk_ind=chunk.chunk_id,
                semantic_identifier=chunk.semantic_identifier or "Unknown",
                link=chunk.source_links[0] if chunk.source_links else None,
                blurb=chunk.blurb,
                source_type=chunk.source_type,
                boost=chunk.boost,
                hidden=chunk.hidden,
                metadata=chunk.metadata,
                score=chunk.score,
                match_highlights=chunk.match_highlights,
                updated_at=chunk.updated_at,
                primary_owners=chunk.primary_owners,
                secondary_owners=chunk.secondary_owners,
                is_internet=False,
            )
            for item in items
        ]

        return search_docs

    # TODO - there is likely a way to clean this all up and not have the switch between these
    @classmethod
    def from_saved_search_doc(cls, saved_search_doc: "SavedSearchDoc") -> "SearchDoc":
        """Convert a SavedSearchDoc to SearchDoc by dropping the db_doc_id field."""
        saved_search_doc_data = saved_search_doc.model_dump()
        # Remove db_doc_id as it's not part of SearchDoc
        saved_search_doc_data.pop("db_doc_id", None)
        return cls(**saved_search_doc_data)

    @classmethod
    def from_saved_search_docs(
        cls, saved_search_docs: list["SavedSearchDoc"]
    ) -> list["SearchDoc"]:
        return [
            cls.from_saved_search_doc(saved_search_doc)
            for saved_search_doc in saved_search_docs
        ]

    def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]:  # type: ignore
        initial_dict = super().model_dump(*args, **kwargs)  # type: ignore
        initial_dict["updated_at"] = (
            self.updated_at.isoformat() if self.updated_at else None
        )
        return initial_dict


class SearchDocsResponse(BaseModel):
    search_docs: list[SearchDoc]
    # Maps the citation number to the document id
    # Since these are no longer just links on the frontend but instead document cards, mapping it to the
    # document id is  the most staightforward way.
    citation_mapping: dict[int, str]

    # For cases where the frontend only needs to display a subset of the search docs
    # The whole list is typically still needed for later steps but this set should be saved separately
    displayed_docs: list[SearchDoc] | None = None


class SavedSearchDoc(SearchDoc):
    db_doc_id: int
    score: float | None = 0.0

    @classmethod
    def from_search_doc(
        cls, search_doc: SearchDoc, db_doc_id: int = 0
    ) -> "SavedSearchDoc":
        """IMPORTANT: careful using this and not providing a db_doc_id If db_doc_id is not
        provided, it won't be able to actually fetch the saved doc and info later on. So only skip
        providing this if the SavedSearchDoc will not be used in the future"""
        search_doc_data = search_doc.model_dump()
        search_doc_data["score"] = search_doc_data.get("score") or 0.0
        return cls(**search_doc_data, db_doc_id=db_doc_id)

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "SavedSearchDoc":
        """Create SavedSearchDoc from serialized dictionary data (e.g., from database JSON)"""
        return cls(**data)

    @classmethod
    def from_url(cls, url: str) -> "SavedSearchDoc":
        """Create a SavedSearchDoc from a URL for internet search documents.

        Uses the INTERNET_SEARCH_DOC_ prefix for document_id to match the format
        used by inference sections created from internet content.
        """
        return cls(
            # db_doc_id can be a filler value since these docs are not saved to the database.
            db_doc_id=0,
            document_id=WEB_SEARCH_PREFIX + url,
            chunk_ind=0,
            semantic_identifier=url,
            link=url,
            blurb="",
            source_type=DocumentSource.WEB,
            boost=1,
            hidden=False,
            metadata={},
            score=0.0,
            is_relevant=None,
            relevance_explanation=None,
            match_highlights=[],
            updated_at=None,
            primary_owners=None,
            secondary_owners=None,
            is_internet=True,
        )

    def __lt__(self, other: Any) -> bool:
        if not isinstance(other, SavedSearchDoc):
            return NotImplemented
        self_score = self.score if self.score is not None else 0.0
        other_score = other.score if other.score is not None else 0.0
        return self_score < other_score


class SavedSearchDocWithContent(SavedSearchDoc):
    """Used for endpoints that need to return the actual contents of the retrieved
    section in addition to the match_highlights."""

    content: str


class PersonaSearchInfo(BaseModel):
    """Snapshot of persona data needed by the search pipeline.

    Extracted from the ORM Persona before the DB session is released so that
    SearchTool and search_pipeline never lazy-load relationships post-commit.
    """

    document_set_names: list[str]
    search_start_date: datetime | None
    attached_document_ids: list[str]
    hierarchy_node_ids: list[int]


================================================
FILE: backend/onyx/context/search/pipeline.py
================================================
from collections import defaultdict
from datetime import datetime

from sqlalchemy.orm import Session

from onyx.context.search.models import BaseFilters
from onyx.context.search.models import ChunkIndexRequest
from onyx.context.search.models import ChunkSearchRequest
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import PersonaSearchInfo
from onyx.context.search.preprocessing.access_filters import (
    build_access_filters_for_user,
)
from onyx.context.search.retrieval.search_runner import search_chunks
from onyx.context.search.utils import inference_section_from_chunks
from onyx.db.models import User
from onyx.document_index.interfaces import DocumentIndex
from onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo
from onyx.llm.interfaces import LLM
from onyx.natural_language_processing.english_stopwords import strip_stopwords
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.secondary_llm_flows.source_filter import extract_source_filter
from onyx.secondary_llm_flows.time_filter import extract_time_filter
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import FunctionCall
from onyx.utils.threadpool_concurrency import run_functions_in_parallel
from onyx.utils.timing import log_function_time
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


@log_function_time(print_only=True)
def _build_index_filters(
    user_provided_filters: BaseFilters | None,
    user: User,  # Used for ACLs, anonymous users only see public docs
    project_id_filter: int | None,
    persona_id_filter: int | None,
    persona_document_sets: list[str] | None,
    persona_time_cutoff: datetime | None,
    db_session: Session | None = None,
    auto_detect_filters: bool = False,
    query: str | None = None,
    llm: LLM | None = None,
    bypass_acl: bool = False,
    # Assistant knowledge filters
    attached_document_ids: list[str] | None = None,
    hierarchy_node_ids: list[int] | None = None,
    # Pre-fetched ACL filters (skips DB query when provided)
    acl_filters: list[str] | None = None,
) -> IndexFilters:
    if auto_detect_filters and (llm is None or query is None):
        raise RuntimeError("LLM and query are required for auto detect filters")

    base_filters = user_provided_filters or BaseFilters()

    document_set_filter = (
        base_filters.document_set
        if base_filters.document_set is not None
        else persona_document_sets
    )

    time_filter = base_filters.time_cutoff or persona_time_cutoff
    source_filter = base_filters.source_type

    detected_time_filter = None
    detected_source_filter = None
    if auto_detect_filters:
        time_filter_fnc = FunctionCall(extract_time_filter, (query, llm), {})
        if not source_filter:
            source_filter_fnc = FunctionCall(
                extract_source_filter, (query, llm, db_session), {}
            )
        else:
            source_filter_fnc = None

        functions_to_run = [fn for fn in [time_filter_fnc, source_filter_fnc] if fn]
        parallel_results = run_functions_in_parallel(functions_to_run)
        # Detected favor recent is not used for now
        detected_time_filter, _detected_favor_recent = parallel_results[
            time_filter_fnc.result_id
        ]
        if source_filter_fnc:
            detected_source_filter = parallel_results[source_filter_fnc.result_id]

    # If the detected time filter is more recent, use that one
    if time_filter and detected_time_filter and detected_time_filter > time_filter:
        time_filter = detected_time_filter

    # If the user has explicitly set a source filter, use that one
    if not source_filter and detected_source_filter:
        source_filter = detected_source_filter

    if bypass_acl:
        user_acl_filters = None
    elif acl_filters is not None:
        user_acl_filters = acl_filters
    else:
        if db_session is None:
            raise ValueError("Either db_session or acl_filters must be provided")
        user_acl_filters = build_access_filters_for_user(user, db_session)

    final_filters = IndexFilters(
        project_id_filter=project_id_filter,
        persona_id_filter=persona_id_filter,
        source_type=source_filter,
        document_set=document_set_filter,
        time_cutoff=time_filter,
        tags=base_filters.tags,
        access_control_list=user_acl_filters,
        tenant_id=get_current_tenant_id() if MULTI_TENANT else None,
        # Assistant knowledge filters
        attached_document_ids=attached_document_ids,
        hierarchy_node_ids=hierarchy_node_ids,
    )

    return final_filters


def merge_individual_chunks(
    chunks: list[InferenceChunk],
) -> list[InferenceSection]:
    """Merge adjacent chunks from the same document into sections.

    Chunks are considered adjacent if their chunk_ids differ by 1 and they
    are from the same document. The section maintains the position of the
    first chunk in the original list.
    """
    if not chunks:
        return []

    # Create a mapping from (document_id, chunk_id) to original index
    # This helps us find the chunk that appears first in the original list
    chunk_to_original_index: dict[tuple[str, int], int] = {}
    for idx, chunk in enumerate(chunks):
        chunk_to_original_index[(chunk.document_id, chunk.chunk_id)] = idx

    # Group chunks by document_id
    doc_chunks: dict[str, list[InferenceChunk]] = defaultdict(list)
    for chunk in chunks:
        doc_chunks[chunk.document_id].append(chunk)

    # For each document, sort chunks by chunk_id to identify adjacent chunks
    for doc_id in doc_chunks:
        doc_chunks[doc_id].sort(key=lambda c: c.chunk_id)

    # Create a mapping from (document_id, chunk_id) to the section it belongs to
    # This helps us maintain the original order
    chunk_to_section: dict[tuple[str, int], InferenceSection] = {}

    # Process each document's chunks
    for doc_id, doc_chunk_list in doc_chunks.items():
        if not doc_chunk_list:
            continue

        # Group adjacent chunks into sections
        current_section_chunks = [doc_chunk_list[0]]

        for i in range(1, len(doc_chunk_list)):
            prev_chunk = doc_chunk_list[i - 1]
            curr_chunk = doc_chunk_list[i]

            # Check if chunks are adjacent (chunk_id difference is 1)
            if curr_chunk.chunk_id == prev_chunk.chunk_id + 1:
                # Add to current section
                current_section_chunks.append(curr_chunk)
            else:
                # Create section from previous chunks
                # Find the chunk that appears first in the original list
                center_chunk = min(
                    current_section_chunks,
                    key=lambda c: chunk_to_original_index.get(
                        (c.document_id, c.chunk_id), float("inf")
                    ),
                )
                section = inference_section_from_chunks(
                    center_chunk=center_chunk,
                    chunks=current_section_chunks.copy(),
                )
                if section:
                    for chunk in current_section_chunks:
                        chunk_to_section[(chunk.document_id, chunk.chunk_id)] = section

                # Start new section
                current_section_chunks = [curr_chunk]

        # Create section for the last group
        if current_section_chunks:
            # Find the chunk that appears first in the original list
            center_chunk = min(
                current_section_chunks,
                key=lambda c: chunk_to_original_index.get(
                    (c.document_id, c.chunk_id), float("inf")
                ),
            )
            section = inference_section_from_chunks(
                center_chunk=center_chunk,
                chunks=current_section_chunks.copy(),
            )
            if section:
                for chunk in current_section_chunks:
                    chunk_to_section[(chunk.document_id, chunk.chunk_id)] = section

    # Build result list maintaining original order
    # Use (document_id, chunk_id) of center_chunk as unique identifier for sections
    seen_section_ids: set[tuple[str, int]] = set()
    result: list[InferenceSection] = []

    for chunk in chunks:
        section = chunk_to_section.get((chunk.document_id, chunk.chunk_id))
        if section:
            section_id = (
                section.center_chunk.document_id,
                section.center_chunk.chunk_id,
            )
            if section_id not in seen_section_ids:
                seen_section_ids.add(section_id)
                result.append(section)
        else:
            # Chunk wasn't part of any merged section, create a single-chunk section
            single_section = inference_section_from_chunks(
                center_chunk=chunk,
                chunks=[chunk],
            )
            if single_section:
                single_section_id = (
                    single_section.center_chunk.document_id,
                    single_section.center_chunk.chunk_id,
                )
                if single_section_id not in seen_section_ids:
                    seen_section_ids.add(single_section_id)
                    result.append(single_section)

    return result


@log_function_time(print_only=True, debug_only=True)
def search_pipeline(
    # Query and settings
    chunk_search_request: ChunkSearchRequest,
    # Document index to search over
    # Note that federated sources will also be used (not related to this arg)
    document_index: DocumentIndex,
    # Used for ACLs and federated search, anonymous users only see public docs
    user: User,
    # Pre-extracted persona search configuration (None when no persona)
    persona_search_info: PersonaSearchInfo | None,
    db_session: Session | None = None,
    auto_detect_filters: bool = False,
    llm: LLM | None = None,
    # Vespa metadata filters for overflowing user files.  NOT the raw IDs
    # of the current project/persona — only set when user files couldn't fit
    # in the LLM context and need to be searched via vector DB.
    project_id_filter: int | None = None,
    persona_id_filter: int | None = None,
    # Pre-fetched data — when provided, avoids DB queries (no session needed)
    acl_filters: list[str] | None = None,
    embedding_model: EmbeddingModel | None = None,
    prefetched_federated_retrieval_infos: list[FederatedRetrievalInfo] | None = None,
) -> list[InferenceChunk]:
    persona_document_sets: list[str] | None = (
        persona_search_info.document_set_names if persona_search_info else None
    )
    persona_time_cutoff: datetime | None = (
        persona_search_info.search_start_date if persona_search_info else None
    )
    attached_document_ids: list[str] | None = (
        persona_search_info.attached_document_ids or None
        if persona_search_info
        else None
    )
    hierarchy_node_ids: list[int] | None = (
        persona_search_info.hierarchy_node_ids or None if persona_search_info else None
    )

    filters = _build_index_filters(
        user_provided_filters=chunk_search_request.user_selected_filters,
        user=user,
        project_id_filter=project_id_filter,
        persona_id_filter=persona_id_filter,
        persona_document_sets=persona_document_sets,
        persona_time_cutoff=persona_time_cutoff,
        db_session=db_session,
        auto_detect_filters=auto_detect_filters,
        query=chunk_search_request.query,
        llm=llm,
        bypass_acl=chunk_search_request.bypass_acl,
        attached_document_ids=attached_document_ids,
        hierarchy_node_ids=hierarchy_node_ids,
        acl_filters=acl_filters,
    )

    query_keywords = strip_stopwords(chunk_search_request.query)

    query_request = ChunkIndexRequest(
        query=chunk_search_request.query,
        hybrid_alpha=chunk_search_request.hybrid_alpha,
        recency_bias_multiplier=chunk_search_request.recency_bias_multiplier,
        query_keywords=query_keywords,
        filters=filters,
        limit=chunk_search_request.limit,
    )

    retrieved_chunks = search_chunks(
        query_request=query_request,
        user_id=user.id if user else None,
        document_index=document_index,
        db_session=db_session,
        embedding_model=embedding_model,
        prefetched_federated_retrieval_infos=prefetched_federated_retrieval_infos,
    )

    # For some specific connectors like Salesforce, a user that has access to an object doesn't mean
    # that they have access to all of the fields of the object.
    censored_chunks: list[InferenceChunk] = fetch_ee_implementation_or_noop(
        "onyx.external_permissions.post_query_censoring",
        "_post_query_chunk_censoring",
        retrieved_chunks,
    )(
        chunks=retrieved_chunks,
        user=user,
    )

    return censored_chunks


================================================
FILE: backend/onyx/context/search/preprocessing/access_filters.py
================================================
from sqlalchemy.orm import Session

from onyx.access.access import get_acl_for_user
from onyx.context.search.models import IndexFilters
from onyx.db.models import User


def build_access_filters_for_user(user: User, session: Session) -> list[str]:
    user_acl = get_acl_for_user(user, session)
    return list(user_acl)


def build_user_only_filters(user: User, db_session: Session) -> IndexFilters:
    user_acl_filters = build_access_filters_for_user(user, db_session)
    return IndexFilters(
        source_type=None,
        document_set=None,
        time_cutoff=None,
        tags=None,
        access_control_list=user_acl_filters,
    )


================================================
FILE: backend/onyx/context/search/retrieval/search_runner.py
================================================
from collections.abc import Callable
from uuid import UUID

from sqlalchemy.orm import Session

from onyx.configs.chat_configs import HYBRID_ALPHA
from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.context.search.models import ChunkIndexRequest
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import QueryExpansionType
from onyx.context.search.utils import get_query_embedding
from onyx.context.search.utils import inference_section_from_chunks
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.interfaces_new import DocumentIndex as NewDocumentIndex
from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchOldDocumentIndex,
)
from onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo
from onyx.federated_connectors.federated_retrieval import (
    get_federated_retrieval_functions,
)
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel

logger = setup_logger()


def combine_retrieval_results(
    chunk_sets: list[list[InferenceChunk]],
) -> list[InferenceChunk]:
    all_chunks = [chunk for chunk_set in chunk_sets for chunk in chunk_set]

    unique_chunks: dict[tuple[str, int], InferenceChunk] = {}
    for chunk in all_chunks:
        key = (chunk.document_id, chunk.chunk_id)
        if key not in unique_chunks:
            unique_chunks[key] = chunk
            continue

        stored_chunk_score = unique_chunks[key].score or 0
        this_chunk_score = chunk.score or 0
        if stored_chunk_score < this_chunk_score:
            unique_chunks[key] = chunk

    sorted_chunks = sorted(
        unique_chunks.values(), key=lambda x: x.score or 0, reverse=True
    )

    return sorted_chunks


def _embed_and_hybrid_search(
    query_request: ChunkIndexRequest,
    document_index: DocumentIndex,
    db_session: Session | None = None,
    embedding_model: EmbeddingModel | None = None,
) -> list[InferenceChunk]:
    query_embedding = get_query_embedding(
        query_request.query,
        db_session=db_session,
        embedding_model=embedding_model,
    )

    hybrid_alpha = query_request.hybrid_alpha or HYBRID_ALPHA

    top_chunks = document_index.hybrid_retrieval(
        query=query_request.query,
        query_embedding=query_embedding,
        final_keywords=query_request.query_keywords,
        filters=query_request.filters,
        hybrid_alpha=hybrid_alpha,
        time_decay_multiplier=query_request.recency_bias_multiplier,
        num_to_retrieve=query_request.limit or NUM_RETURNED_HITS,
        ranking_profile_type=(
            QueryExpansionType.KEYWORD
            if hybrid_alpha <= 0.3
            else QueryExpansionType.SEMANTIC
        ),
    )

    return top_chunks


def _keyword_search(
    query_request: ChunkIndexRequest,
    document_index: NewDocumentIndex,
) -> list[InferenceChunk]:
    return document_index.keyword_retrieval(
        query=query_request.query,
        filters=query_request.filters,
        num_to_retrieve=query_request.limit or NUM_RETURNED_HITS,
    )


def search_chunks(
    query_request: ChunkIndexRequest,
    user_id: UUID | None,
    document_index: DocumentIndex,
    db_session: Session | None = None,
    embedding_model: EmbeddingModel | None = None,
    prefetched_federated_retrieval_infos: list[FederatedRetrievalInfo] | None = None,
) -> list[InferenceChunk]:
    run_queries: list[tuple[Callable, tuple]] = []

    source_filters = (
        set(query_request.filters.source_type)
        if query_request.filters.source_type
        else None
    )

    # Federated retrieval — use pre-fetched if available, otherwise query DB
    if prefetched_federated_retrieval_infos is not None:
        federated_retrieval_infos = prefetched_federated_retrieval_infos
    else:
        if db_session is None:
            raise ValueError(
                "Either db_session or prefetched_federated_retrieval_infos must be provided"
            )
        federated_retrieval_infos = get_federated_retrieval_functions(
            db_session=db_session,
            user_id=user_id,
            source_types=list(source_filters) if source_filters else None,
            document_set_names=query_request.filters.document_set,
        )

    federated_sources = set(
        federated_retrieval_info.source.to_non_federated_source()
        for federated_retrieval_info in federated_retrieval_infos
    )
    for federated_retrieval_info in federated_retrieval_infos:
        run_queries.append(
            (federated_retrieval_info.retrieval_function, (query_request,))
        )

    # Don't run normal hybrid search if there are no indexed sources to
    # search over
    normal_search_enabled = (source_filters is None) or (
        len(set(source_filters) - federated_sources) > 0
    )

    if normal_search_enabled:
        if (
            query_request.hybrid_alpha is not None
            and query_request.hybrid_alpha == 0.0
            and isinstance(document_index, OpenSearchOldDocumentIndex)
        ):
            # If hybrid alpha is explicitly set to keyword only, do pure keyword
            # search without generating an embedding. This is currently only
            # supported with OpenSearchDocumentIndex.
            opensearch_new_document_index: NewDocumentIndex = document_index._real_index
            run_queries.append(
                (
                    lambda: _keyword_search(
                        query_request, opensearch_new_document_index
                    ),
                    (),
                )
            )
        else:
            run_queries.append(
                (
                    _embed_and_hybrid_search,
                    (query_request, document_index, db_session, embedding_model),
                )
            )

    parallel_search_results = run_functions_tuples_in_parallel(run_queries)
    top_chunks = combine_retrieval_results(parallel_search_results)

    if not top_chunks:
        logger.debug(
            f"Search returned no results for query: {query_request.query} with filters: {query_request.filters}."
        )

    return top_chunks


# TODO: This is unused code.
def inference_sections_from_ids(
    doc_identifiers: list[tuple[str, int]],
    document_index: DocumentIndex,
) -> list[InferenceSection]:
    # Currently only fetches whole docs
    doc_ids_set = set(doc_id for doc_id, _ in doc_identifiers)

    chunk_requests: list[VespaChunkRequest] = [
        VespaChunkRequest(document_id=doc_id) for doc_id in doc_ids_set
    ]

    # No need for ACL here because the doc ids were validated beforehand
    filters = IndexFilters(access_control_list=None)

    retrieved_chunks = document_index.id_based_retrieval(
        chunk_requests=chunk_requests,
        filters=filters,
    )

    if not retrieved_chunks:
        return []

    # Group chunks by document ID
    chunks_by_doc_id: dict[str, list[InferenceChunk]] = {}
    for chunk in retrieved_chunks:
        chunks_by_doc_id.setdefault(chunk.document_id, []).append(chunk)

    inference_sections = [
        section
        for chunks in chunks_by_doc_id.values()
        if chunks
        and (
            section := inference_section_from_chunks(
                # The scores will always be 0 because the fetching by id gives back
                # no search scores. This is not needed though if the user is explicitly
                # selecting a document.
                center_chunk=chunks[0],
                chunks=chunks,
            )
        )
    ]

    return inference_sections


================================================
FILE: backend/onyx/context/search/utils.py
================================================
from typing import TypeVar

from sqlalchemy.orm import Session

from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SavedSearchDocWithContent
from onyx.context.search.models import SearchDoc
from onyx.db.search_settings import get_current_search_settings
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.utils.logger import setup_logger
from onyx.utils.timing import log_function_time
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT
from shared_configs.enums import EmbedTextType
from shared_configs.model_server_models import Embedding

logger = setup_logger()


T = TypeVar(
    "T",
    InferenceSection,
    InferenceChunk,
    SearchDoc,
    SavedSearchDoc,
    SavedSearchDocWithContent,
)

TSection = TypeVar(
    "TSection",
    InferenceSection,
    SearchDoc,
    SavedSearchDoc,
    SavedSearchDocWithContent,
)


def inference_section_from_chunks(
    center_chunk: InferenceChunk,
    chunks: list[InferenceChunk],
) -> InferenceSection | None:
    if not chunks:
        return None

    combined_content = "\n".join([chunk.content for chunk in chunks])

    return InferenceSection(
        center_chunk=center_chunk,
        chunks=chunks,
        combined_content=combined_content,
    )


# If it should be a real section, don't use this one
def inference_section_from_single_chunk(
    chunk: InferenceChunk,
) -> InferenceSection:
    return InferenceSection(
        center_chunk=chunk,
        chunks=[chunk],
        combined_content=chunk.content,
    )


def get_query_embeddings(
    queries: list[str],
    db_session: Session | None = None,
    embedding_model: EmbeddingModel | None = None,
) -> list[Embedding]:
    if embedding_model is None:
        if db_session is None:
            raise ValueError("Either db_session or embedding_model must be provided")
        search_settings = get_current_search_settings(db_session)
        embedding_model = EmbeddingModel.from_db_model(
            search_settings=search_settings,
            server_host=MODEL_SERVER_HOST,
            server_port=MODEL_SERVER_PORT,
        )

    query_embedding = embedding_model.encode(queries, text_type=EmbedTextType.QUERY)
    return query_embedding


@log_function_time(print_only=True, debug_only=True)
def get_query_embedding(
    query: str,
    db_session: Session | None = None,
    embedding_model: EmbeddingModel | None = None,
) -> Embedding:
    return get_query_embeddings(
        [query], db_session=db_session, embedding_model=embedding_model
    )[0]


def convert_inference_sections_to_search_docs(
    inference_sections: list[InferenceSection],
    is_internet: bool = False,
) -> list[SearchDoc]:
    search_docs = SearchDoc.from_chunks_or_sections(inference_sections)
    for search_doc in search_docs:
        search_doc.is_internet = is_internet
    return search_docs


================================================
FILE: backend/onyx/db/README.md
================================================
An explanation of how the history of messages, tool calls, and docs are stored in the database:

Messages are grouped by a chat session, a tree structured is used to allow edits and for the
user to switch between branches. Each ChatMessage is either a user message or an assistant message.
It should always alternate between the two, System messages, custom agent prompt injections, and
reminder messages are injected dynamically after the chat session is loaded into memory. The user
and assistant messages are stored in pairs, though it is ok if the user message is stored and the
assistant message fails.

The user chat message is relatively simple and includes the user prompt and any attached documents.
The assistant message includes the response, tool calls, feedback, citations, etc.
Things provided as input are part of the user message, things that happen during the inference and
LLM loop are part of the assistant message.

Reasoning is part of the message or tool call that occured after the reasoning. Really the reasoning
should be part of the previous message / tool call because if it branches afterwards as a result of
the reasoning, this is somewhat unintuitive. But to not include reasoning as part of the user message,
it is instead included with the following message or tool call. With parallel tool calls, the reasoning
will be included with each of the tool calls.

Tool calls are stored in the ToolCall table and can represent all of the following:
- Parallel tool calls, these will have the same turn number and parent tool call id
- Sequential tool calls, these will have a different turn number and parent tool call id
- Tool calls attached to the ChatMessage are top level tool calls directly triggered by the LLM
- Tool calls that are instead attached to other ToolCalls are tool calls that happen as part of an
  agent that has been called. The top level tool call is the agent call and the tool calls that have
  the agent call as a parent are the tool calls that happen as part of the agent.

The different branches are generated by sending a new search query to an existing parent.
```
                 [Empty Root Message]  (This allows the first message to be branched/edited as well)
              /           |           \
[First Message] [First Message Edit 1] [First Message Edit 2]
       |                  |
[Second Message]  [Second Message of Edit 1 Branch]
```


================================================
FILE: backend/onyx/db/__init__.py
================================================


================================================
FILE: backend/onyx/db/_deprecated/pg_file_store.py
================================================
"""Kept around since it's used in the migration to move to S3/MinIO"""

import tempfile
from io import BytesIO
from typing import IO

from psycopg2.extensions import connection
from sqlalchemy import text  # NEW: for SQL large-object helpers
from sqlalchemy.orm import Session

from onyx.file_store.constants import MAX_IN_MEMORY_SIZE
from onyx.file_store.constants import STANDARD_CHUNK_SIZE
from onyx.utils.logger import setup_logger

logger = setup_logger()


def get_pg_conn_from_session(db_session: Session) -> connection:
    return db_session.connection().connection.connection  # type: ignore


def create_populate_lobj(
    content: IO,
    db_session: Session,
) -> int:
    """Create a PostgreSQL large object from *content* and return its OID.

    Preferred approach is to use the psycopg2 ``lobject`` API, but if that is
    unavailable (e.g. when the underlying connection is an asyncpg adapter)
    we fall back to PostgreSQL helper functions such as ``lo_from_bytea``.

    NOTE: this function intentionally *does not* commit the surrounding
    transaction – that is handled by the caller so all work stays atomic.
    """

    pg_conn = None
    try:
        pg_conn = get_pg_conn_from_session(db_session)
        # ``AsyncAdapt_asyncpg_connection`` (asyncpg) has no ``lobject``
        if not hasattr(pg_conn, "lobject"):
            raise AttributeError  # will be handled by fallback below

        large_object = pg_conn.lobject()

        # write in multiple chunks to avoid loading the whole file into memory
        while True:
            chunk = content.read(STANDARD_CHUNK_SIZE)
            if not chunk:
                break
            large_object.write(chunk)

        large_object.close()

        return large_object.oid

    except AttributeError:
        # Fall back to SQL helper functions – read the full content into memory
        # (acceptable for the limited number and size of files handled during
        # migrations).  ``lo_from_bytea`` returns the new OID.
        byte_data = content.read()
        result = db_session.execute(
            text("SELECT lo_from_bytea(0, :data) AS oid"),
            {"data": byte_data},
        )
        # ``scalar_one`` is 2.0-style; ``scalar`` works on both 1.4/2.0.
        lobj_oid = result.scalar()
        if lobj_oid is None:
            raise RuntimeError("Failed to create large object")
        return int(lobj_oid)


def read_lobj(
    lobj_oid: int,
    db_session: Session,
    mode: str | None = None,
    use_tempfile: bool = False,
) -> IO:
    """Read a PostgreSQL large object identified by *lobj_oid*.

    Attempts to use the native ``lobject`` API first; if unavailable falls back
    to ``lo_get`` which returns the large object's contents as *bytea*.
    """

    pg_conn = None
    try:
        pg_conn = get_pg_conn_from_session(db_session)
        if not hasattr(pg_conn, "lobject"):
            raise AttributeError

        # Ensure binary mode by default
        if mode is None:
            mode = "rb"
        large_object = (
            pg_conn.lobject(lobj_oid, mode=mode) if mode else pg_conn.lobject(lobj_oid)
        )

        if use_tempfile:
            temp_file = tempfile.SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE)
            while True:
                chunk = large_object.read(STANDARD_CHUNK_SIZE)
                if not chunk:
                    break
                temp_file.write(chunk)
            temp_file.seek(0)
            return temp_file
        else:
            return BytesIO(large_object.read())

    except AttributeError:
        # Fallback path using ``lo_get``
        result = db_session.execute(
            text("SELECT lo_get(:oid) AS data"),
            {"oid": lobj_oid},
        )
        byte_data = result.scalar()
        if byte_data is None:
            raise RuntimeError("Failed to read large object")

        if use_tempfile:
            temp_file = tempfile.SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE)
            temp_file.write(byte_data)
            temp_file.seek(0)
            return temp_file
        return BytesIO(byte_data)


def delete_lobj_by_id(
    lobj_oid: int,
    db_session: Session,
) -> None:
    """Remove a large object by OID, regardless of driver implementation."""

    try:
        pg_conn = get_pg_conn_from_session(db_session)
        if hasattr(pg_conn, "lobject"):
            pg_conn.lobject(lobj_oid).unlink()
            return
        raise AttributeError
    except AttributeError:
        # Fallback for drivers without ``lobject`` support
        db_session.execute(text("SELECT lo_unlink(:oid)"), {"oid": lobj_oid})
        # No explicit result expected


================================================
FILE: backend/onyx/db/api_key.py
================================================
import uuid

from fastapi_users.password import PasswordHelper
from sqlalchemy import delete
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session

from onyx.auth.api_key import ApiKeyDescriptor
from onyx.auth.api_key import build_displayable_api_key
from onyx.auth.api_key import generate_api_key
from onyx.auth.api_key import hash_api_key
from onyx.auth.schemas import UserRole
from onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN
from onyx.configs.constants import DANSWER_API_KEY_PREFIX
from onyx.configs.constants import UNNAMED_KEY_PLACEHOLDER
from onyx.db.enums import AccountType
from onyx.db.models import ApiKey
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.db.permissions import recompute_user_permissions__no_commit
from onyx.db.users import assign_user_to_default_groups__no_commit
from onyx.server.api_key.models import APIKeyArgs
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


def get_api_key_email_pattern() -> str:
    return DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN


def is_api_key_email_address(email: str) -> bool:
    return email.endswith(get_api_key_email_pattern())


def fetch_api_keys(db_session: Session) -> list[ApiKeyDescriptor]:
    api_keys = (
        db_session.scalars(select(ApiKey).options(joinedload(ApiKey.user)))
        .unique()
        .all()
    )
    return [
        ApiKeyDescriptor(
            api_key_id=api_key.id,
            api_key_role=api_key.user.role,
            api_key_display=api_key.api_key_display,
            api_key_name=api_key.name,
            user_id=api_key.user_id,
        )
        for api_key in api_keys
    ]


async def fetch_user_for_api_key(
    hashed_api_key: str, async_db_session: AsyncSession
) -> User | None:
    """NOTE: this is async, since it's used during auth
    (which is necessarily async due to FastAPI Users)"""
    return await async_db_session.scalar(
        select(User)
        .join(ApiKey, ApiKey.user_id == User.id)
        .where(ApiKey.hashed_api_key == hashed_api_key)
    )


def get_api_key_fake_email(
    name: str,
    unique_id: str,
) -> str:
    return f"{DANSWER_API_KEY_PREFIX}{name}@{unique_id}{DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN}"


def insert_api_key(
    db_session: Session, api_key_args: APIKeyArgs, user_id: uuid.UUID | None
) -> ApiKeyDescriptor:
    std_password_helper = PasswordHelper()

    # Get tenant_id from context var (will be default schema for single tenant)
    tenant_id = get_current_tenant_id()

    api_key = generate_api_key(tenant_id)
    api_key_user_id = uuid.uuid4()

    display_name = api_key_args.name or UNNAMED_KEY_PLACEHOLDER
    api_key_user_row = User(
        id=api_key_user_id,
        email=get_api_key_fake_email(display_name, str(api_key_user_id)),
        # a random password for the "user"
        hashed_password=std_password_helper.hash(std_password_helper.generate()),
        is_active=True,
        is_superuser=False,
        is_verified=True,
        role=api_key_args.role,
        account_type=AccountType.SERVICE_ACCOUNT,
    )
    db_session.add(api_key_user_row)

    api_key_row = ApiKey(
        name=api_key_args.name,
        hashed_api_key=hash_api_key(api_key),
        api_key_display=build_displayable_api_key(api_key),
        user_id=api_key_user_id,
        owner_id=user_id,
    )
    db_session.add(api_key_row)

    # Assign the API key virtual user to the appropriate default group
    # before commit so everything is atomic.
    # LIMITED role service accounts should have no group membership.
    if api_key_args.role != UserRole.LIMITED:
        assign_user_to_default_groups__no_commit(
            db_session,
            api_key_user_row,
            is_admin=(api_key_args.role == UserRole.ADMIN),
        )

    db_session.commit()

    return ApiKeyDescriptor(
        api_key_id=api_key_row.id,
        api_key_role=api_key_user_row.role,
        api_key_display=api_key_row.api_key_display,
        api_key=api_key,
        api_key_name=api_key_args.name,
        user_id=api_key_user_id,
    )


def update_api_key(
    db_session: Session, api_key_id: int, api_key_args: APIKeyArgs
) -> ApiKeyDescriptor:
    existing_api_key = db_session.scalar(select(ApiKey).where(ApiKey.id == api_key_id))
    if existing_api_key is None:
        raise ValueError(f"API key with id {api_key_id} does not exist")

    existing_api_key.name = api_key_args.name
    api_key_user = db_session.scalar(
        select(User).where(User.id == existing_api_key.user_id)  # type: ignore
    )
    if api_key_user is None:
        raise RuntimeError("API Key does not have associated user.")

    email_name = api_key_args.name or UNNAMED_KEY_PLACEHOLDER
    api_key_user.email = get_api_key_fake_email(email_name, str(api_key_user.id))

    old_role = api_key_user.role
    api_key_user.role = api_key_args.role

    # Reconcile default-group membership when the role changes.
    if old_role != api_key_args.role:
        # Remove from all default groups first.
        delete_stmt = delete(User__UserGroup).where(
            User__UserGroup.user_id == api_key_user.id,
            User__UserGroup.user_group_id.in_(
                select(UserGroup.id).where(UserGroup.is_default.is_(True))
            ),
        )
        db_session.execute(delete_stmt)

        # Re-assign to the correct default group (skip for LIMITED).
        if api_key_args.role != UserRole.LIMITED:
            assign_user_to_default_groups__no_commit(
                db_session,
                api_key_user,
                is_admin=(api_key_args.role == UserRole.ADMIN),
            )
        else:
            # No group assigned for LIMITED, but we still need to recompute
            # since we just removed the old default-group membership above.
            recompute_user_permissions__no_commit(api_key_user.id, db_session)

    db_session.commit()

    return ApiKeyDescriptor(
        api_key_id=existing_api_key.id,
        api_key_display=existing_api_key.api_key_display,
        api_key_name=api_key_args.name,
        api_key_role=api_key_user.role,
        user_id=existing_api_key.user_id,
    )


def regenerate_api_key(db_session: Session, api_key_id: int) -> ApiKeyDescriptor:
    """NOTE: currently, any admin can regenerate any API key."""
    existing_api_key = db_session.scalar(select(ApiKey).where(ApiKey.id == api_key_id))
    if existing_api_key is None:
        raise ValueError(f"API key with id {api_key_id} does not exist")

    api_key_user = db_session.scalar(
        select(User).where(User.id == existing_api_key.user_id)  # type: ignore
    )
    if api_key_user is None:
        raise RuntimeError("API Key does not have associated user.")

    # Get tenant_id from context var (will be default schema for single tenant)
    tenant_id = get_current_tenant_id()

    new_api_key = generate_api_key(tenant_id)
    existing_api_key.hashed_api_key = hash_api_key(new_api_key)
    existing_api_key.api_key_display = build_displayable_api_key(new_api_key)
    db_session.commit()

    return ApiKeyDescriptor(
        api_key_id=existing_api_key.id,
        api_key_display=existing_api_key.api_key_display,
        api_key=new_api_key,
        api_key_name=existing_api_key.name,
        api_key_role=api_key_user.role,
        user_id=existing_api_key.user_id,
    )


def remove_api_key(db_session: Session, api_key_id: int) -> None:
    existing_api_key = db_session.scalar(select(ApiKey).where(ApiKey.id == api_key_id))
    if existing_api_key is None:
        raise ValueError(f"API key with id {api_key_id} does not exist")

    user_associated_with_key = db_session.scalar(
        select(User).where(User.id == existing_api_key.user_id)  # type: ignore
    )
    if user_associated_with_key is None:
        raise ValueError(
            f"User associated with API key with id {api_key_id} does not exist. This should not happen."
        )

    db_session.delete(existing_api_key)
    db_session.delete(user_associated_with_key)
    db_session.commit()


================================================
FILE: backend/onyx/db/auth.py
================================================
from collections.abc import AsyncGenerator
from collections.abc import Callable
from typing import Any
from typing import Dict
from typing import TypeVar

from fastapi import Depends
from fastapi_users.models import ID
from fastapi_users.models import UP
from fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase
from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyAccessTokenDatabase
from sqlalchemy import func
from sqlalchemy import Select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm import Session

from onyx.auth.schemas import UserRole
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.configs.constants import NO_AUTH_PLACEHOLDER_USER_EMAIL
from onyx.db.api_key import get_api_key_email_pattern
from onyx.db.engine.async_sql_engine import get_async_session
from onyx.db.engine.async_sql_engine import get_async_session_context_manager
from onyx.db.models import AccessToken
from onyx.db.models import OAuthAccount
from onyx.db.models import User
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)

T = TypeVar("T", bound=tuple[Any, ...])


def get_default_admin_user_emails() -> list[str]:
    """Returns a list of emails who should default to Admin role.
    Only used in the EE version. For MIT, just return empty list."""
    get_default_admin_user_emails_fn: Callable[[], list[str]] = (
        fetch_versioned_implementation_with_fallback(
            "onyx.auth.users", "get_default_admin_user_emails_", lambda: list[str]()
        )
    )
    return get_default_admin_user_emails_fn()


def _add_live_user_count_where_clause(
    select_stmt: Select[T],
    only_admin_users: bool,
) -> Select[T]:
    """
    Builds a SQL column expression that can be used to filter out
    users who should not be included in the live user count.

    Excludes:
    - API key users (by email pattern)
    - System users (anonymous user, no-auth placeholder)
    - External permission users (unless only_admin_users is True)
    """
    select_stmt = select_stmt.where(~User.email.endswith(get_api_key_email_pattern()))  # type: ignore

    # Exclude system users (anonymous user, no-auth placeholder)
    select_stmt = select_stmt.where(User.email != ANONYMOUS_USER_EMAIL)  # type: ignore
    select_stmt = select_stmt.where(User.email != NO_AUTH_PLACEHOLDER_USER_EMAIL)  # type: ignore

    if only_admin_users:
        return select_stmt.where(User.role == UserRole.ADMIN)

    return select_stmt.where(
        User.role != UserRole.EXT_PERM_USER,
    )


def get_live_users_count(db_session: Session) -> int:
    """
    Returns the number of users in the system.
    This does NOT include invited users, "users" pulled in
    from external connectors, or API keys.
    """
    count_stmt = func.count(User.id)
    select_stmt = select(count_stmt)
    select_stmt_w_filters = _add_live_user_count_where_clause(select_stmt, False)
    user_count = db_session.scalar(select_stmt_w_filters)
    if user_count is None:
        raise RuntimeError("Was not able to fetch the user count.")
    return user_count


async def get_user_count(only_admin_users: bool = False) -> int:
    async with get_async_session_context_manager() as session:
        count_stmt = func.count(User.id)
        stmt = select(count_stmt)
        stmt_w_filters = _add_live_user_count_where_clause(stmt, only_admin_users)
        user_count = await session.scalar(stmt_w_filters)
        if user_count is None:
            raise RuntimeError("Was not able to fetch the user count.")
        return user_count


# Need to override this because FastAPI Users doesn't give flexibility for backend field creation logic in OAuth flow
class SQLAlchemyUserAdminDB(SQLAlchemyUserDatabase[UP, ID]):
    async def create(
        self,
        create_dict: Dict[str, Any],
    ) -> UP:
        user_count = await get_user_count()
        if user_count == 0 or create_dict["email"] in get_default_admin_user_emails():
            create_dict["role"] = UserRole.ADMIN
        else:
            create_dict["role"] = UserRole.BASIC
        return await super().create(create_dict)


async def get_user_db(
    session: AsyncSession = Depends(get_async_session),
) -> AsyncGenerator[SQLAlchemyUserAdminDB, None]:
    yield SQLAlchemyUserAdminDB(session, User, OAuthAccount)


async def get_access_token_db(
    session: AsyncSession = Depends(get_async_session),
) -> AsyncGenerator[SQLAlchemyAccessTokenDatabase, None]:
    yield SQLAlchemyAccessTokenDatabase(session, AccessToken)


================================================
FILE: backend/onyx/db/background_error.py
================================================
from sqlalchemy.orm import Session

from onyx.db.models import BackgroundError


def create_background_error(
    db_session: Session, message: str, cc_pair_id: int | None
) -> None:
    db_session.add(BackgroundError(message=message, cc_pair_id=cc_pair_id))
    db_session.commit()


================================================
FILE: backend/onyx/db/chat.py
================================================
from collections.abc import Sequence
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Tuple
from uuid import UUID

from fastapi import HTTPException
from sqlalchemy import delete
from sqlalchemy import desc
from sqlalchemy import func
from sqlalchemy import nullsfirst
from sqlalchemy import or_
from sqlalchemy import Row
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.exc import MultipleResultsFound
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.configs.chat_configs import HARD_DELETE_CHATS
from onyx.configs.constants import MessageType
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc as ServerSearchDoc
from onyx.db.models import ChatMessage
from onyx.db.models import ChatMessage__SearchDoc
from onyx.db.models import ChatSession
from onyx.db.models import ChatSessionSharedStatus
from onyx.db.models import Persona
from onyx.db.models import SearchDoc as DBSearchDoc
from onyx.db.models import ToolCall
from onyx.db.models import User
from onyx.db.persona import get_best_persona_id_for_user
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import FileDescriptor
from onyx.llm.override_models import LLMOverride
from onyx.llm.override_models import PromptOverride
from onyx.server.query_and_chat.models import ChatMessageDetail
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_string


logger = setup_logger()


# Note: search/streaming packet helpers moved to streaming_utils.py


def get_chat_session_by_id(
    chat_session_id: UUID,
    user_id: UUID | None,
    db_session: Session,
    include_deleted: bool = False,
    is_shared: bool = False,
    eager_load_persona: bool = False,
) -> ChatSession:
    stmt = select(ChatSession).where(ChatSession.id == chat_session_id)

    if eager_load_persona:
        stmt = stmt.options(
            joinedload(ChatSession.persona).options(
                selectinload(Persona.tools),
                selectinload(Persona.user_files),
                selectinload(Persona.document_sets),
                selectinload(Persona.attached_documents),
                selectinload(Persona.hierarchy_nodes),
            ),
            joinedload(ChatSession.project),
        )

    if is_shared:
        stmt = stmt.where(ChatSession.shared_status == ChatSessionSharedStatus.PUBLIC)
    else:
        # if user_id is None, assume this is an admin who should be able
        # to view all chat sessions
        if user_id is not None:
            stmt = stmt.where(
                or_(ChatSession.user_id == user_id, ChatSession.user_id.is_(None))
            )

    result = db_session.execute(stmt)
    chat_session = result.scalar_one_or_none()

    if not chat_session:
        raise ValueError("Invalid Chat Session ID provided")

    if not include_deleted and chat_session.deleted:
        raise ValueError("Chat session has been deleted")

    return chat_session


def get_chat_sessions_by_slack_thread_id(
    slack_thread_id: str,
    user_id: UUID | None,
    db_session: Session,
) -> Sequence[ChatSession]:
    stmt = select(ChatSession).where(ChatSession.slack_thread_id == slack_thread_id)
    if user_id is not None:
        stmt = stmt.where(
            or_(ChatSession.user_id == user_id, ChatSession.user_id.is_(None))
        )
    return db_session.scalars(stmt).all()


# Retrieves chat sessions by user
# Chat sessions do not include onyxbot flows
def get_chat_sessions_by_user(
    user_id: UUID | None,
    deleted: bool | None,
    db_session: Session,
    include_onyxbot_flows: bool = False,
    limit: int = 50,
    before: datetime | None = None,
    project_id: int | None = None,
    only_non_project_chats: bool = False,
    include_failed_chats: bool = False,
) -> list[ChatSession]:
    stmt = select(ChatSession).where(ChatSession.user_id == user_id)

    if not include_onyxbot_flows:
        stmt = stmt.where(ChatSession.onyxbot_flow.is_(False))

    stmt = stmt.order_by(desc(ChatSession.time_updated))

    if deleted is not None:
        stmt = stmt.where(ChatSession.deleted == deleted)

    if before is not None:
        stmt = stmt.where(ChatSession.time_updated < before)

    if project_id is not None:
        stmt = stmt.where(ChatSession.project_id == project_id)
    elif only_non_project_chats:
        stmt = stmt.where(ChatSession.project_id.is_(None))

    # When filtering out failed chats, we apply the limit in Python after
    # filtering rather than in SQL, since the post-filter may remove rows.
    if limit and include_failed_chats:
        stmt = stmt.limit(limit)

    result = db_session.execute(stmt)
    chat_sessions = list(result.scalars().all())

    if not include_failed_chats and chat_sessions:
        # Filter out "failed" sessions (those with only SYSTEM messages)
        # using a separate efficient query instead of a correlated EXISTS
        # subquery, which causes full sequential scans of chat_message.
        leeway = datetime.now(timezone.utc) - timedelta(minutes=5)
        session_ids = [cs.id for cs in chat_sessions if cs.time_created < leeway]

        if session_ids:
            valid_session_ids_stmt = (
                select(ChatMessage.chat_session_id)
                .where(ChatMessage.chat_session_id.in_(session_ids))
                .where(ChatMessage.message_type != MessageType.SYSTEM)
                .distinct()
            )
            valid_session_ids = set(
                db_session.execute(valid_session_ids_stmt).scalars().all()
            )

            chat_sessions = [
                cs
                for cs in chat_sessions
                if cs.time_created >= leeway or cs.id in valid_session_ids
            ]

        if limit:
            chat_sessions = chat_sessions[:limit]

    return chat_sessions


def delete_orphaned_search_docs(db_session: Session) -> None:
    orphaned_docs = (
        db_session.query(DBSearchDoc)
        .outerjoin(ChatMessage__SearchDoc)
        .filter(ChatMessage__SearchDoc.chat_message_id.is_(None))
        .all()
    )
    for doc in orphaned_docs:
        db_session.delete(doc)
    db_session.commit()


def delete_messages_and_files_from_chat_session(
    chat_session_id: UUID, db_session: Session
) -> None:
    # Select messages older than cutoff_time with files
    messages_with_files = db_session.execute(
        select(ChatMessage.id, ChatMessage.files).where(
            ChatMessage.chat_session_id == chat_session_id,
        )
    ).fetchall()

    for _, files in messages_with_files:
        file_store = get_default_file_store()
        for file_info in files or []:
            file_store.delete_file(file_id=file_info.get("id"))

    # Delete ChatMessage records - CASCADE constraints will automatically handle:
    # - ChatMessage__StandardAnswer relationship records
    db_session.execute(
        delete(ChatMessage).where(ChatMessage.chat_session_id == chat_session_id)
    )
    db_session.commit()

    delete_orphaned_search_docs(db_session)


def create_chat_session(
    db_session: Session,
    description: str | None,
    user_id: UUID | None,
    persona_id: int | None,  # Can be none if temporary persona is used
    llm_override: LLMOverride | None = None,
    prompt_override: PromptOverride | None = None,
    onyxbot_flow: bool = False,
    slack_thread_id: str | None = None,
    project_id: int | None = None,
) -> ChatSession:
    chat_session = ChatSession(
        user_id=user_id,
        persona_id=persona_id,
        description=description,
        llm_override=llm_override,
        prompt_override=prompt_override,
        onyxbot_flow=onyxbot_flow,
        slack_thread_id=slack_thread_id,
        project_id=project_id,
    )

    db_session.add(chat_session)
    db_session.commit()

    return chat_session


def duplicate_chat_session_for_user_from_slack(
    db_session: Session,
    user: User,
    chat_session_id: UUID,
) -> ChatSession:
    """
    This takes a chat session id for a session in Slack and:
    - Creates a new chat session in the DB
    - Tries to copy the persona from the original chat session
        (if it is available to the user clicking the button)
    - Sets the user to the given user (if provided)
    """
    chat_session = get_chat_session_by_id(
        chat_session_id=chat_session_id,
        user_id=None,  # Ignore user permissions for this
        db_session=db_session,
    )
    if not chat_session:
        raise HTTPException(status_code=400, detail="Invalid Chat Session ID provided")

    # This enforces permissions and sets a default
    new_persona_id = get_best_persona_id_for_user(
        db_session=db_session,
        user=user,
        persona_id=chat_session.persona_id,
    )

    return create_chat_session(
        db_session=db_session,
        user_id=user.id,
        persona_id=new_persona_id,
        # Set this to empty string so the frontend will force a rename
        description="",
        llm_override=chat_session.llm_override,
        prompt_override=chat_session.prompt_override,
        # Chat is in UI now so this is false
        onyxbot_flow=False,
        # Maybe we want this in the future to track if it was created from Slack
        slack_thread_id=None,
    )


def update_chat_session(
    db_session: Session,
    user_id: UUID | None,
    chat_session_id: UUID,
    description: str | None = None,
    sharing_status: ChatSessionSharedStatus | None = None,
) -> ChatSession:
    chat_session = get_chat_session_by_id(
        chat_session_id=chat_session_id, user_id=user_id, db_session=db_session
    )

    if chat_session.deleted:
        raise ValueError("Trying to rename a deleted chat session")

    if description is not None:
        chat_session.description = description
    if sharing_status is not None:
        chat_session.shared_status = sharing_status

    db_session.commit()

    return chat_session


def delete_all_chat_sessions_for_user(
    user: User, db_session: Session, hard_delete: bool = HARD_DELETE_CHATS
) -> None:
    user_id = user.id

    chat_sessions = (
        db_session.query(ChatSession)
        .filter(ChatSession.user_id == user_id, ChatSession.onyxbot_flow.is_(False))
        .all()
    )

    if hard_delete:
        for chat_session in chat_sessions:
            delete_messages_and_files_from_chat_session(chat_session.id, db_session)
        db_session.execute(
            delete(ChatSession).where(
                ChatSession.user_id == user_id, ChatSession.onyxbot_flow.is_(False)
            )
        )
    else:
        db_session.execute(
            update(ChatSession)
            .where(ChatSession.user_id == user_id, ChatSession.onyxbot_flow.is_(False))
            .values(deleted=True)
        )

    db_session.commit()


def delete_chat_session(
    user_id: UUID | None,
    chat_session_id: UUID,
    db_session: Session,
    include_deleted: bool = False,
    hard_delete: bool = HARD_DELETE_CHATS,
) -> None:
    chat_session = get_chat_session_by_id(
        chat_session_id=chat_session_id,
        user_id=user_id,
        db_session=db_session,
        include_deleted=include_deleted,
    )

    if chat_session.deleted and not include_deleted:
        raise ValueError("Cannot delete an already deleted chat session")

    if hard_delete:
        delete_messages_and_files_from_chat_session(chat_session_id, db_session)
        db_session.execute(delete(ChatSession).where(ChatSession.id == chat_session_id))
    else:
        chat_session = get_chat_session_by_id(
            chat_session_id=chat_session_id, user_id=user_id, db_session=db_session
        )
        chat_session.deleted = True

    db_session.commit()


def get_chat_sessions_older_than(
    days_old: int, db_session: Session
) -> list[tuple[UUID | None, UUID]]:
    """
    Retrieves chat sessions older than a specified number of days.

    Args:
        days_old: The number of days to consider as "old".
        db_session: The database session.

    Returns:
        A list of tuples, where each tuple contains the user_id (can be None) and the chat_session_id of an old chat session.
    """

    cutoff_time = datetime.utcnow() - timedelta(days=days_old)
    old_sessions: Sequence[Row[Tuple[UUID | None, UUID]]] = db_session.execute(
        select(ChatSession.user_id, ChatSession.id).where(
            ChatSession.time_created < cutoff_time
        )
    ).fetchall()

    # convert old_sessions to a conventional list of tuples
    returned_sessions: list[tuple[UUID | None, UUID]] = [
        (user_id, session_id) for user_id, session_id in old_sessions
    ]

    return returned_sessions


def get_chat_message(
    chat_message_id: int,
    user_id: UUID | None,
    db_session: Session,
) -> ChatMessage:
    stmt = select(ChatMessage).where(ChatMessage.id == chat_message_id)

    result = db_session.execute(stmt)
    chat_message = result.scalar_one_or_none()

    if not chat_message:
        raise ValueError("Invalid Chat Message specified")

    chat_user = chat_message.chat_session.user
    expected_user_id = chat_user.id if chat_user is not None else None

    if expected_user_id != user_id:
        logger.error(
            f"User {user_id} tried to fetch a chat message that does not belong to them"
        )
        raise ValueError("Chat message does not belong to user")

    return chat_message


def get_chat_session_by_message_id(
    db_session: Session,
    message_id: int,
) -> ChatSession:
    """
    Should only be used for Slack
    Get the chat session associated with a specific message ID
    Note: this ignores permission checks.
    """
    stmt = select(ChatMessage).where(ChatMessage.id == message_id)

    result = db_session.execute(stmt)
    chat_message = result.scalar_one_or_none()

    if chat_message is None:
        raise ValueError(
            f"Unable to find chat session associated with message ID: {message_id}"
        )

    return chat_message.chat_session


def get_chat_messages_by_sessions(
    chat_session_ids: list[UUID],
    user_id: UUID | None,
    db_session: Session,
    skip_permission_check: bool = False,
) -> Sequence[ChatMessage]:
    if not skip_permission_check:
        for chat_session_id in chat_session_ids:
            get_chat_session_by_id(
                chat_session_id=chat_session_id, user_id=user_id, db_session=db_session
            )
    stmt = (
        select(ChatMessage)
        .where(ChatMessage.chat_session_id.in_(chat_session_ids))
        .order_by(nullsfirst(ChatMessage.parent_message_id))
    )
    return db_session.execute(stmt).scalars().all()


def add_chats_to_session_from_slack_thread(
    db_session: Session,
    slack_chat_session_id: UUID,
    new_chat_session_id: UUID,
) -> None:
    new_root_message = get_or_create_root_message(
        chat_session_id=new_chat_session_id,
        db_session=db_session,
    )

    for chat_message in get_chat_messages_by_sessions(
        chat_session_ids=[slack_chat_session_id],
        user_id=None,  # Ignore user permissions for this
        db_session=db_session,
        skip_permission_check=True,
    ):
        if chat_message.message_type == MessageType.SYSTEM:
            continue
        # Duplicate the message
        new_root_message = create_new_chat_message(
            db_session=db_session,
            chat_session_id=new_chat_session_id,
            parent_message=new_root_message,
            message=chat_message.message,
            files=chat_message.files,
            error=chat_message.error,
            token_count=chat_message.token_count,
            message_type=chat_message.message_type,
            reasoning_tokens=chat_message.reasoning_tokens,
        )


def add_search_docs_to_chat_message(
    chat_message_id: int, search_doc_ids: list[int], db_session: Session
) -> None:
    """
    Link SearchDocs to a ChatMessage by creating entries in the chat_message__search_doc junction table.

    Args:
        chat_message_id: The ID of the chat message
        search_doc_ids: List of search document IDs to link
        db_session: The database session
    """
    for search_doc_id in search_doc_ids:
        chat_message_search_doc = ChatMessage__SearchDoc(
            chat_message_id=chat_message_id, search_doc_id=search_doc_id
        )
        db_session.add(chat_message_search_doc)


def add_search_docs_to_tool_call(
    tool_call_id: int, search_doc_ids: list[int], db_session: Session
) -> None:
    """
    Link SearchDocs to a ToolCall by creating entries in the tool_call__search_doc junction table.

    Args:
        tool_call_id: The ID of the tool call
        search_doc_ids: List of search document IDs to link
        db_session: The database session
    """
    from onyx.db.models import ToolCall__SearchDoc

    for search_doc_id in search_doc_ids:
        tool_call_search_doc = ToolCall__SearchDoc(
            tool_call_id=tool_call_id, search_doc_id=search_doc_id
        )
        db_session.add(tool_call_search_doc)


def get_chat_messages_by_session(
    chat_session_id: UUID,
    user_id: UUID | None,
    db_session: Session,
    skip_permission_check: bool = False,
    prefetch_top_two_level_tool_calls: bool = True,
) -> list[ChatMessage]:
    if not skip_permission_check:
        # bug if we ever call this expecting the permission check to not be skipped
        get_chat_session_by_id(
            chat_session_id=chat_session_id, user_id=user_id, db_session=db_session
        )

    stmt = (
        select(ChatMessage)
        .where(ChatMessage.chat_session_id == chat_session_id)
        .order_by(nullsfirst(ChatMessage.parent_message_id))
    )

    # This should handle both the top level tool calls and deep research
    # If there are future nested agents, this can be extended.
    if prefetch_top_two_level_tool_calls:
        # Load tool_calls and their direct children (one level deep)
        stmt = stmt.options(
            selectinload(ChatMessage.tool_calls).selectinload(
                ToolCall.tool_call_children
            )
        )
        result = db_session.scalars(stmt).unique().all()
    else:
        result = db_session.scalars(stmt).all()

    return list(result)


def get_or_create_root_message(
    chat_session_id: UUID,
    db_session: Session,
) -> ChatMessage:
    try:
        root_message: ChatMessage | None = (
            db_session.query(ChatMessage)
            .filter(
                ChatMessage.chat_session_id == chat_session_id,
                ChatMessage.parent_message_id.is_(None),
            )
            .one_or_none()
        )
    except MultipleResultsFound:
        raise Exception(
            "Multiple root messages found for chat session. Data inconsistency detected."
        )

    if root_message is not None:
        return root_message
    else:
        new_root_message = ChatMessage(
            chat_session_id=chat_session_id,
            parent_message_id=None,
            latest_child_message_id=None,
            message="",
            token_count=0,
            message_type=MessageType.SYSTEM,
        )
        db_session.add(new_root_message)
        db_session.commit()
        return new_root_message


def reserve_message_id(
    db_session: Session,
    chat_session_id: UUID,
    parent_message: int,
    message_type: MessageType = MessageType.ASSISTANT,
) -> ChatMessage:
    # Create an temporary holding chat message to the updated and saved at the end
    empty_message = ChatMessage(
        chat_session_id=chat_session_id,
        parent_message_id=parent_message,
        latest_child_message_id=None,
        message="Response was terminated prior to completion, try regenerating.",
        token_count=15,
        message_type=message_type,
    )

    # Add the empty message to the session
    db_session.add(empty_message)
    db_session.flush()

    # Get the parent message and set its child pointer to the current message
    parent_chat_message = (
        db_session.query(ChatMessage).filter(ChatMessage.id == parent_message).first()
    )
    if parent_chat_message:
        parent_chat_message.latest_child_message_id = empty_message.id

    # Committing because it's ok to recover this state. More clear to the user than it is now.
    # Ideally there's a special UI for a case like this with a regenerate button but not needed for now.
    db_session.commit()

    return empty_message


def reserve_multi_model_message_ids(
    db_session: Session,
    chat_session_id: UUID,
    parent_message_id: int,
    model_display_names: list[str],
) -> list[ChatMessage]:
    """Reserve N assistant message placeholders for multi-model parallel streaming.

    All messages share the same parent (the user message). The parent's
    latest_child_message_id points to the LAST reserved message so that the
    default history-chain walker picks it up.
    """
    reserved: list[ChatMessage] = []
    for display_name in model_display_names:
        msg = ChatMessage(
            chat_session_id=chat_session_id,
            parent_message_id=parent_message_id,
            latest_child_message_id=None,
            message="Response was terminated prior to completion, try regenerating.",
            token_count=15,  # placeholder; updated on completion by llm_loop_completion_handle
            message_type=MessageType.ASSISTANT,
            model_display_name=display_name,
        )
        db_session.add(msg)
        reserved.append(msg)

    # Flush to assign IDs without committing yet
    db_session.flush()

    # Point parent's latest_child to the last reserved message
    parent = (
        db_session.query(ChatMessage)
        .filter(ChatMessage.id == parent_message_id)
        .first()
    )
    if parent:
        parent.latest_child_message_id = reserved[-1].id

    db_session.commit()
    return reserved


def set_preferred_response(
    db_session: Session,
    user_message_id: int,
    preferred_assistant_message_id: int,
) -> None:
    """Mark one assistant response as the user's preferred choice in a multi-model turn.

    Also advances ``latest_child_message_id`` so the preferred response becomes
    the active branch for any subsequent messages in the conversation.

    Args:
        db_session: Active database session.
        user_message_id: Primary key of the ``USER``-type ``ChatMessage`` whose
            preferred response is being set.
        preferred_assistant_message_id: Primary key of the ``ASSISTANT``-type
            ``ChatMessage`` to prefer. Must be a direct child of ``user_message_id``.

    Raises:
        ValueError: If either message is not found, if ``user_message_id`` does not
            refer to a USER message, or if the assistant message is not a direct child
            of the user message.
    """
    user_msg = db_session.get(ChatMessage, user_message_id)
    if user_msg is None:
        raise ValueError(f"User message {user_message_id} not found")
    if user_msg.message_type != MessageType.USER:
        raise ValueError(f"Message {user_message_id} is not a user message")

    assistant_msg = db_session.get(ChatMessage, preferred_assistant_message_id)
    if assistant_msg is None:
        raise ValueError(
            f"Assistant message {preferred_assistant_message_id} not found"
        )
    if assistant_msg.parent_message_id != user_message_id:
        raise ValueError(
            f"Assistant message {preferred_assistant_message_id} is not a child of user message {user_message_id}"
        )

    user_msg.preferred_response_id = preferred_assistant_message_id
    user_msg.latest_child_message_id = preferred_assistant_message_id
    db_session.commit()


def create_new_chat_message(
    chat_session_id: UUID,
    parent_message: ChatMessage,
    message: str,
    token_count: int,
    message_type: MessageType,
    db_session: Session,
    files: list[FileDescriptor] | None = None,
    error: str | None = None,
    commit: bool = True,
    reserved_message_id: int | None = None,
    reasoning_tokens: str | None = None,
) -> ChatMessage:
    if reserved_message_id is not None:
        # Edit existing message
        existing_message = db_session.query(ChatMessage).get(reserved_message_id)
        if existing_message is None:
            raise ValueError(f"No message found with id {reserved_message_id}")

        existing_message.chat_session_id = chat_session_id
        existing_message.parent_message_id = parent_message.id
        existing_message.message = message
        existing_message.token_count = token_count
        existing_message.message_type = message_type
        existing_message.files = files
        existing_message.error = error
        existing_message.reasoning_tokens = reasoning_tokens
        new_chat_message = existing_message
    else:
        # Create new message
        new_chat_message = ChatMessage(
            chat_session_id=chat_session_id,
            parent_message_id=parent_message.id,
            latest_child_message_id=None,
            message=message,
            token_count=token_count,
            message_type=message_type,
            files=files,
            error=error,
            reasoning_tokens=reasoning_tokens,
        )
        db_session.add(new_chat_message)

    # Flush the session to get an ID for the new chat message
    db_session.flush()

    parent_message.latest_child_message_id = new_chat_message.id
    if commit:
        db_session.commit()

    return new_chat_message


def set_as_latest_chat_message(
    chat_message: ChatMessage,
    user_id: UUID | None,
    db_session: Session,
) -> None:
    parent_message_id = chat_message.parent_message_id

    if parent_message_id is None:
        raise RuntimeError(
            f"Trying to set a latest message without parent, message id: {chat_message.id}"
        )

    parent_message = get_chat_message(
        chat_message_id=parent_message_id, user_id=user_id, db_session=db_session
    )

    parent_message.latest_child_message_id = chat_message.id

    db_session.commit()


def create_db_search_doc(
    server_search_doc: ServerSearchDoc,
    db_session: Session,
    commit: bool = True,
) -> DBSearchDoc:
    db_search_doc = DBSearchDoc(
        document_id=sanitize_string(server_search_doc.document_id),
        chunk_ind=server_search_doc.chunk_ind,
        semantic_id=sanitize_string(server_search_doc.semantic_identifier),
        link=(
            sanitize_string(server_search_doc.link)
            if server_search_doc.link is not None
            else None
        ),
        blurb=sanitize_string(server_search_doc.blurb),
        source_type=server_search_doc.source_type,
        boost=server_search_doc.boost,
        hidden=server_search_doc.hidden,
        doc_metadata=server_search_doc.metadata,
        is_relevant=server_search_doc.is_relevant,
        relevance_explanation=(
            sanitize_string(server_search_doc.relevance_explanation)
            if server_search_doc.relevance_explanation is not None
            else None
        ),
        score=server_search_doc.score or 0.0,
        match_highlights=[
            sanitize_string(h) for h in server_search_doc.match_highlights
        ],
        updated_at=server_search_doc.updated_at,
        primary_owners=(
            [sanitize_string(o) for o in server_search_doc.primary_owners]
            if server_search_doc.primary_owners is not None
            else None
        ),
        secondary_owners=(
            [sanitize_string(o) for o in server_search_doc.secondary_owners]
            if server_search_doc.secondary_owners is not None
            else None
        ),
        is_internet=server_search_doc.is_internet,
    )

    db_session.add(db_search_doc)
    if commit:
        db_session.commit()
    else:
        db_session.flush()
    return db_search_doc


def get_db_search_doc_by_id(doc_id: int, db_session: Session) -> DBSearchDoc | None:
    """There are no safety checks here like user permission etc., use with caution"""
    search_doc = db_session.query(DBSearchDoc).filter(DBSearchDoc.id == doc_id).first()
    return search_doc


def get_db_search_doc_by_document_id(
    document_id: str, db_session: Session
) -> DBSearchDoc | None:
    """Get SearchDoc by document_id field. There are no safety checks here like user permission etc., use with caution"""
    search_doc = (
        db_session.query(DBSearchDoc)
        .filter(DBSearchDoc.document_id == document_id)
        .first()
    )
    return search_doc


def translate_db_search_doc_to_saved_search_doc(
    db_search_doc: DBSearchDoc,
    remove_doc_content: bool = False,
) -> SavedSearchDoc:
    return SavedSearchDoc(
        db_doc_id=db_search_doc.id,
        score=db_search_doc.score,
        document_id=db_search_doc.document_id,
        chunk_ind=db_search_doc.chunk_ind,
        semantic_identifier=db_search_doc.semantic_id,
        link=db_search_doc.link,
        blurb=db_search_doc.blurb if not remove_doc_content else "",
        source_type=db_search_doc.source_type,
        boost=db_search_doc.boost,
        hidden=db_search_doc.hidden,
        metadata=db_search_doc.doc_metadata if not remove_doc_content else {},
        match_highlights=(
            db_search_doc.match_highlights if not remove_doc_content else []
        ),
        relevance_explanation=db_search_doc.relevance_explanation,
        is_relevant=db_search_doc.is_relevant,
        updated_at=db_search_doc.updated_at if not remove_doc_content else None,
        primary_owners=db_search_doc.primary_owners if not remove_doc_content else [],
        secondary_owners=(
            db_search_doc.secondary_owners if not remove_doc_content else []
        ),
        is_internet=db_search_doc.is_internet,
    )


def translate_db_message_to_chat_message_detail(
    chat_message: ChatMessage,
    remove_doc_content: bool = False,
) -> ChatMessageDetail:
    # Get current feedback if any
    current_feedback = None
    if chat_message.chat_message_feedbacks:
        latest_feedback = chat_message.chat_message_feedbacks[-1]
        if latest_feedback.is_positive is not None:
            current_feedback = "like" if latest_feedback.is_positive else "dislike"

    # Convert citations from {citation_num: db_doc_id} to {citation_num: document_id}
    converted_citations = None
    if chat_message.citations and chat_message.search_docs:
        # Build lookup map: db_doc_id -> document_id
        db_doc_id_to_document_id = {
            doc.id: doc.document_id for doc in chat_message.search_docs
        }

        converted_citations = {}
        for citation_num, db_doc_id in chat_message.citations.items():
            document_id = db_doc_id_to_document_id.get(db_doc_id)
            if document_id:
                converted_citations[citation_num] = document_id

    top_documents = [
        translate_db_search_doc_to_saved_search_doc(
            db_doc, remove_doc_content=remove_doc_content
        )
        for db_doc in chat_message.search_docs
    ]
    top_documents = sorted(
        top_documents, key=lambda doc: doc.score or 0.0, reverse=True
    )
    chat_msg_detail = ChatMessageDetail(
        chat_session_id=chat_message.chat_session_id,
        message_id=chat_message.id,
        parent_message=chat_message.parent_message_id,
        latest_child_message=chat_message.latest_child_message_id,
        message=chat_message.message,
        reasoning_tokens=chat_message.reasoning_tokens,
        message_type=chat_message.message_type,
        context_docs=top_documents,
        citations=converted_citations,
        time_sent=chat_message.time_sent,
        files=chat_message.files or [],
        error=chat_message.error,
        current_feedback=current_feedback,
        processing_duration_seconds=chat_message.processing_duration_seconds,
        preferred_response_id=chat_message.preferred_response_id,
        model_display_name=chat_message.model_display_name,
    )

    return chat_msg_detail


def update_chat_session_updated_at_timestamp(
    chat_session_id: UUID, db_session: Session
) -> None:
    """
    Explicitly update the timestamp on a chat session without modifying other fields.
    This is useful when adding messages to a chat session to reflect recent activity.
    """

    # Direct SQL update to avoid loading the entire object if it's not already loaded
    db_session.execute(
        update(ChatSession)
        .where(ChatSession.id == chat_session_id)
        .values(time_updated=func.now())
    )
    # No commit - the caller is responsible for committing the transaction


def create_search_doc_from_inference_section(
    inference_section: InferenceSection,
    is_internet: bool,
    db_session: Session,
    score: float = 0.0,
    is_relevant: bool | None = None,
    relevance_explanation: str | None = None,
    commit: bool = False,
) -> DBSearchDoc:
    """Create a SearchDoc in the database from an InferenceSection."""

    db_search_doc = DBSearchDoc(
        document_id=inference_section.center_chunk.document_id,
        chunk_ind=inference_section.center_chunk.chunk_id,
        semantic_id=inference_section.center_chunk.semantic_identifier,
        link=(
            inference_section.center_chunk.source_links.get(0)
            if inference_section.center_chunk.source_links
            else None
        ),
        blurb=inference_section.center_chunk.blurb,
        source_type=inference_section.center_chunk.source_type,
        boost=inference_section.center_chunk.boost,
        hidden=inference_section.center_chunk.hidden,
        doc_metadata=inference_section.center_chunk.metadata,
        score=score,
        is_relevant=is_relevant,
        relevance_explanation=relevance_explanation,
        match_highlights=inference_section.center_chunk.match_highlights,
        updated_at=inference_section.center_chunk.updated_at,
        primary_owners=inference_section.center_chunk.primary_owners or [],
        secondary_owners=inference_section.center_chunk.secondary_owners or [],
        is_internet=is_internet,
    )

    db_session.add(db_search_doc)
    if commit:
        db_session.commit()
    else:
        db_session.flush()

    return db_search_doc


def create_search_doc_from_saved_search_doc(
    saved_search_doc: SavedSearchDoc,
) -> DBSearchDoc:
    """Convert SavedSearchDoc (server model) into DB SearchDoc with correct field mapping."""
    return DBSearchDoc(
        document_id=saved_search_doc.document_id,
        chunk_ind=saved_search_doc.chunk_ind,
        # Map Pydantic semantic_identifier -> DB semantic_id; ensure non-null
        semantic_id=saved_search_doc.semantic_identifier or "Unknown",
        link=saved_search_doc.link,
        blurb=saved_search_doc.blurb,
        source_type=saved_search_doc.source_type,
        boost=saved_search_doc.boost,
        hidden=saved_search_doc.hidden,
        # Map metadata -> doc_metadata (DB column name)
        doc_metadata=saved_search_doc.metadata,
        # SavedSearchDoc.score exists and defaults to 0.0
        score=saved_search_doc.score or 0.0,
        match_highlights=saved_search_doc.match_highlights,
        updated_at=saved_search_doc.updated_at,
        primary_owners=saved_search_doc.primary_owners,
        secondary_owners=saved_search_doc.secondary_owners,
        is_internet=saved_search_doc.is_internet,
        is_relevant=saved_search_doc.is_relevant,
        relevance_explanation=saved_search_doc.relevance_explanation,
    )


def update_db_session_with_messages(
    db_session: Session,
    chat_message_id: int,
    chat_session_id: UUID,
    message: str | None = None,
    message_type: str | None = None,
    token_count: int | None = None,
    error: str | None = None,
    update_parent_message: bool = True,
    files: list[FileDescriptor] | None = None,
    reasoning_tokens: str | None = None,
    commit: bool = False,
) -> ChatMessage:
    chat_message = (
        db_session.query(ChatMessage)
        .filter(
            ChatMessage.id == chat_message_id,
            ChatMessage.chat_session_id == chat_session_id,
        )
        .first()
    )
    if not chat_message:
        raise ValueError("Chat message with id not found")  # should never happen

    if message:
        chat_message.message = message
    if message_type:
        chat_message.message_type = MessageType(message_type)
    if token_count:
        chat_message.token_count = token_count
    if error:
        chat_message.error = error
    if files is not None:
        chat_message.files = files
    if reasoning_tokens is not None:
        chat_message.reasoning_tokens = reasoning_tokens

    if update_parent_message:
        parent_chat_message = (
            db_session.query(ChatMessage)
            .filter(ChatMessage.id == chat_message.parent_message_id)
            .first()
        )
        if parent_chat_message:
            parent_chat_message.latest_child_message_id = chat_message.id

    if commit:
        db_session.commit()
    else:
        db_session.flush()

    return chat_message


================================================
FILE: backend/onyx/db/chat_search.py
================================================
from typing import List
from typing import Optional
from typing import Tuple
from uuid import UUID

from sqlalchemy import column
from sqlalchemy import desc
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session
from sqlalchemy.sql.expression import ColumnClause

from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession


def search_chat_sessions(
    user_id: UUID | None,
    db_session: Session,
    query: Optional[str] = None,
    page: int = 1,
    page_size: int = 10,
    include_deleted: bool = False,
    include_onyxbot_flows: bool = False,
) -> Tuple[List[ChatSession], bool]:
    """
    Fast full-text search on ChatSession + ChatMessage using tsvectors.

    If no query is provided, returns the most recent chat sessions.
    Otherwise, searches both chat messages and session descriptions.

    Returns a tuple of (sessions, has_more) where has_more indicates if
    there are additional results beyond the requested page.
    """
    offset_val = (page - 1) * page_size

    # If no query, just return the most recent sessions
    if not query or not query.strip():
        stmt = (
            select(ChatSession)
            .order_by(desc(ChatSession.time_created))
            .offset(offset_val)
            .limit(page_size + 1)
        )
        if user_id is not None:
            stmt = stmt.where(ChatSession.user_id == user_id)
        if not include_onyxbot_flows:
            stmt = stmt.where(ChatSession.onyxbot_flow.is_(False))
        if not include_deleted:
            stmt = stmt.where(ChatSession.deleted.is_(False))

        result = db_session.execute(stmt.options(joinedload(ChatSession.persona)))
        sessions = result.scalars().all()

        has_more = len(sessions) > page_size
        if has_more:
            sessions = sessions[:page_size]

        return list(sessions), has_more

    # Otherwise, proceed with full-text search
    query = query.strip()

    base_conditions = []
    if user_id is not None:
        base_conditions.append(ChatSession.user_id == user_id)
    if not include_onyxbot_flows:
        base_conditions.append(ChatSession.onyxbot_flow.is_(False))
    if not include_deleted:
        base_conditions.append(ChatSession.deleted.is_(False))

    message_tsv: ColumnClause = column("message_tsv")
    description_tsv: ColumnClause = column("description_tsv")

    ts_query = func.plainto_tsquery("english", query)

    description_session_ids = (
        select(ChatSession.id)
        .where(*base_conditions)
        .where(description_tsv.op("@@")(ts_query))
    )

    message_session_ids = (
        select(ChatMessage.chat_session_id)
        .join(ChatSession, ChatMessage.chat_session_id == ChatSession.id)
        .where(*base_conditions)
        .where(message_tsv.op("@@")(ts_query))
    )

    combined_ids = description_session_ids.union(message_session_ids).alias(
        "combined_ids"
    )

    final_stmt = (
        select(ChatSession)
        .join(combined_ids, ChatSession.id == combined_ids.c.id)
        .order_by(desc(ChatSession.time_created))
        .distinct()
        .offset(offset_val)
        .limit(page_size + 1)
        .options(joinedload(ChatSession.persona))
    )

    session_objs = db_session.execute(final_stmt).scalars().all()

    has_more = len(session_objs) > page_size
    if has_more:
        session_objs = session_objs[:page_size]

    return list(session_objs), has_more


================================================
FILE: backend/onyx/db/chunk.py
================================================
from datetime import datetime
from datetime import timezone

from sqlalchemy import delete
from sqlalchemy.orm import Session

from onyx.db.models import ChunkStats
from onyx.indexing.models import UpdatableChunkData


def update_chunk_boost_components__no_commit(
    chunk_data: list[UpdatableChunkData],
    db_session: Session,
) -> None:
    """Updates the chunk_boost_components for chunks in the database.

    Args:
        chunk_data: List of dicts containing chunk_id, document_id, and boost_score
        db_session: SQLAlchemy database session
    """
    if not chunk_data:
        return

    for data in chunk_data:
        chunk_in_doc_id = int(data.chunk_id)
        if chunk_in_doc_id < 0:
            raise ValueError(f"Chunk ID is empty for chunk {data}")

        chunk_document_id = f"{data.document_id}__{chunk_in_doc_id}"
        chunk_stats = (
            db_session.query(ChunkStats)
            .filter(
                ChunkStats.id == chunk_document_id,
            )
            .first()
        )

        score = data.boost_score

        if chunk_stats:
            chunk_stats.information_content_boost = score
            chunk_stats.last_modified = datetime.now(timezone.utc)
            db_session.add(chunk_stats)
        else:
            # do not save new chunks with a neutral boost score
            if score == 1.0:
                continue
            # Create new record
            chunk_stats = ChunkStats(
                document_id=data.document_id,
                chunk_in_doc_id=chunk_in_doc_id,
                information_content_boost=score,
            )
            db_session.add(chunk_stats)


def delete_chunk_stats_by_connector_credential_pair__no_commit(
    db_session: Session, document_ids: list[str]
) -> None:
    """This deletes just chunk stats in postgres."""
    stmt = delete(ChunkStats).where(ChunkStats.document_id.in_(document_ids))

    db_session.execute(stmt)


================================================
FILE: backend/onyx/db/code_interpreter.py
================================================
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.models import CodeInterpreterServer


def fetch_code_interpreter_server(
    db_session: Session,
) -> CodeInterpreterServer:
    server = db_session.scalars(select(CodeInterpreterServer)).one()
    return server


def update_code_interpreter_server_enabled(
    db_session: Session,
    enabled: bool,
) -> CodeInterpreterServer:
    server = db_session.scalars(select(CodeInterpreterServer)).one()
    server.server_enabled = enabled
    db_session.commit()
    return server


================================================
FILE: backend/onyx/db/connector.py
================================================
from datetime import datetime
from datetime import timezone
from typing import cast

from sqlalchemy import and_
from sqlalchemy import exists
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DEFAULT_PRUNING_FREQ
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import IndexingMode
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import FederatedConnector
from onyx.db.models import IndexAttempt
from onyx.kg.models import KGConnectorData
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import ObjectCreationIdResponse
from onyx.server.models import StatusResponse
from onyx.utils.logger import setup_logger

logger = setup_logger()


def check_federated_connectors_exist(db_session: Session) -> bool:
    stmt = select(exists(FederatedConnector))
    result = db_session.execute(stmt)
    return result.scalar() or False


def check_connectors_exist(db_session: Session) -> bool:
    # Connector 0 is created on server startup as a default for ingestion
    # it will always exist and we don't need to count it for this
    stmt = select(exists(Connector).where(Connector.id > 0))
    result = db_session.execute(stmt)
    return result.scalar() or False


def check_user_files_exist(db_session: Session) -> bool:
    """Check if any user files exist in the system.

    This is used to determine if the search tool should be available
    when there are no regular connectors but there are user files
    (User Knowledge mode).
    """
    from onyx.db.models import UserFile
    from onyx.db.enums import UserFileStatus

    stmt = select(exists(UserFile).where(UserFile.status == UserFileStatus.COMPLETED))
    result = db_session.execute(stmt)
    return result.scalar() or False


def fetch_connectors(
    db_session: Session,
    sources: list[DocumentSource] | None = None,
    input_types: list[InputType] | None = None,
) -> list[Connector]:
    stmt = select(Connector)
    if sources is not None:
        stmt = stmt.where(Connector.source.in_(sources))
    if input_types is not None:
        stmt = stmt.where(Connector.input_type.in_(input_types))
    results = db_session.scalars(stmt)
    return list(results.all())


def connector_by_name_source_exists(
    connector_name: str, source: DocumentSource, db_session: Session
) -> bool:
    stmt = select(Connector).where(
        Connector.name == connector_name, Connector.source == source
    )
    result = db_session.execute(stmt)
    connector = result.scalar_one_or_none()
    return connector is not None


def fetch_connector_by_id(connector_id: int, db_session: Session) -> Connector | None:
    stmt = select(Connector).where(Connector.id == connector_id)
    result = db_session.execute(stmt)
    connector = result.scalar_one_or_none()
    return connector


def fetch_ingestion_connector_by_name(
    connector_name: str, db_session: Session
) -> Connector | None:
    stmt = (
        select(Connector)
        .where(Connector.name == connector_name)
        .where(Connector.source == DocumentSource.INGESTION_API)
    )
    result = db_session.execute(stmt)
    connector = result.scalar_one_or_none()
    return connector


def create_connector(
    db_session: Session,
    connector_data: ConnectorBase,
) -> ObjectCreationIdResponse:
    if connector_by_name_source_exists(
        connector_data.name, connector_data.source, db_session
    ):
        raise ValueError(
            "Connector by this name already exists, duplicate naming not allowed."
        )

    connector = Connector(
        name=connector_data.name,
        source=connector_data.source,
        input_type=connector_data.input_type,
        connector_specific_config=connector_data.connector_specific_config,
        refresh_freq=connector_data.refresh_freq,
        indexing_start=connector_data.indexing_start,
        prune_freq=connector_data.prune_freq,
    )
    db_session.add(connector)
    db_session.commit()

    return ObjectCreationIdResponse(id=connector.id)


def update_connector(
    connector_id: int,
    connector_data: ConnectorBase,
    db_session: Session,
) -> Connector | None:
    connector = fetch_connector_by_id(connector_id, db_session)
    if connector is None:
        return None

    if connector_data.name != connector.name and connector_by_name_source_exists(
        connector_data.name, connector_data.source, db_session
    ):
        raise ValueError(
            "Connector by this name already exists, duplicate naming not allowed."
        )

    connector.name = connector_data.name
    connector.source = connector_data.source
    connector.input_type = connector_data.input_type
    connector.connector_specific_config = connector_data.connector_specific_config
    connector.refresh_freq = connector_data.refresh_freq
    connector.prune_freq = (
        connector_data.prune_freq
        if connector_data.prune_freq is not None
        else DEFAULT_PRUNING_FREQ
    )

    db_session.commit()
    return connector


def delete_connector(
    db_session: Session,
    connector_id: int,
) -> StatusResponse[int]:
    """Only used in special cases (e.g. a connector is in a bad state and we need to delete it).
    Be VERY careful using this, as it could lead to a bad state if not used correctly.
    """
    connector = fetch_connector_by_id(connector_id, db_session)
    if connector is None:
        return StatusResponse(
            success=True, message="Connector was already deleted", data=connector_id
        )

    db_session.delete(connector)
    return StatusResponse(
        success=True, message="Connector deleted successfully", data=connector_id
    )


def get_connector_credential_ids(
    connector_id: int,
    db_session: Session,
) -> list[int]:
    connector = fetch_connector_by_id(connector_id, db_session)
    if connector is None:
        raise ValueError(f"Connector by id {connector_id} does not exist")

    return [association.credential.id for association in connector.credentials]


def fetch_latest_index_attempt_by_connector(
    db_session: Session,
    source: DocumentSource | None = None,
) -> list[IndexAttempt]:
    latest_index_attempts: list[IndexAttempt] = []

    if source:
        connectors = fetch_connectors(db_session, sources=[source])
    else:
        connectors = fetch_connectors(db_session)

    if not connectors:
        return []

    for connector in connectors:
        latest_index_attempt = (
            db_session.query(IndexAttempt)
            .join(ConnectorCredentialPair)
            .filter(ConnectorCredentialPair.connector_id == connector.id)
            .order_by(IndexAttempt.time_updated.desc())
            .first()
        )

        if latest_index_attempt is not None:
            latest_index_attempts.append(latest_index_attempt)

    return latest_index_attempts


def fetch_latest_index_attempts_by_status(
    db_session: Session,
) -> list[IndexAttempt]:
    subquery = (
        db_session.query(
            IndexAttempt.connector_credential_pair_id,
            IndexAttempt.status,
            func.max(IndexAttempt.time_updated).label("time_updated"),
        )
        .group_by(IndexAttempt.connector_credential_pair_id)
        .group_by(IndexAttempt.status)
        .subquery()
    )

    alias = aliased(IndexAttempt, subquery)

    query = db_session.query(IndexAttempt).join(
        alias,
        and_(
            IndexAttempt.connector_credential_pair_id
            == alias.connector_credential_pair_id,
            IndexAttempt.status == alias.status,
            IndexAttempt.time_updated == alias.time_updated,
        ),
    )

    return cast(list[IndexAttempt], query.all())


def fetch_unique_document_sources(db_session: Session) -> list[DocumentSource]:
    distinct_sources = db_session.query(Connector.source).distinct().all()

    sources = [
        source[0]
        for source in distinct_sources
        if source[0] != DocumentSource.INGESTION_API
    ]

    return sources


def create_initial_default_connector(db_session: Session) -> None:
    default_connector_id = 0
    default_connector = fetch_connector_by_id(default_connector_id, db_session)
    if default_connector is not None:
        if (
            default_connector.source != DocumentSource.INGESTION_API
            or default_connector.input_type != InputType.LOAD_STATE
            or default_connector.refresh_freq is not None
            or default_connector.name != "Ingestion API"
            or default_connector.connector_specific_config != {}
            or default_connector.prune_freq is not None
        ):
            logger.warning(
                "Default connector does not have expected values. Updating to proper state."
            )
            # Ensure default connector has correct values
            default_connector.source = DocumentSource.INGESTION_API
            default_connector.input_type = InputType.LOAD_STATE
            default_connector.refresh_freq = None
            default_connector.name = "Ingestion API"
            default_connector.connector_specific_config = {}
            default_connector.prune_freq = None
            db_session.commit()
        return

    # Create a new default connector if it doesn't exist
    connector = Connector(
        id=default_connector_id,
        name="Ingestion API",
        source=DocumentSource.INGESTION_API,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={},
        refresh_freq=None,
        prune_freq=None,
    )
    db_session.add(connector)
    db_session.commit()


def mark_ccpair_as_pruned(cc_pair_id: int, db_session: Session) -> None:
    stmt = select(ConnectorCredentialPair).where(
        ConnectorCredentialPair.id == cc_pair_id
    )
    cc_pair = db_session.scalar(stmt)
    if cc_pair is None:
        raise ValueError(f"No cc_pair with ID: {cc_pair_id}")

    cc_pair.last_pruned = datetime.now(timezone.utc)
    db_session.commit()


def mark_cc_pair_as_hierarchy_fetched(db_session: Session, cc_pair_id: int) -> None:
    stmt = select(ConnectorCredentialPair).where(
        ConnectorCredentialPair.id == cc_pair_id
    )
    cc_pair = db_session.scalar(stmt)
    if cc_pair is None:
        raise ValueError(f"No cc_pair with ID: {cc_pair_id}")

    cc_pair.last_time_hierarchy_fetch = datetime.now(timezone.utc)
    db_session.commit()


def mark_cc_pair_as_permissions_synced(
    db_session: Session, cc_pair_id: int, start_time: datetime | None
) -> None:
    stmt = select(ConnectorCredentialPair).where(
        ConnectorCredentialPair.id == cc_pair_id
    )
    cc_pair = db_session.scalar(stmt)
    if cc_pair is None:
        raise ValueError(f"No cc_pair with ID: {cc_pair_id}")

    cc_pair.last_time_perm_sync = start_time
    db_session.commit()


def mark_cc_pair_as_external_group_synced(db_session: Session, cc_pair_id: int) -> None:
    stmt = select(ConnectorCredentialPair).where(
        ConnectorCredentialPair.id == cc_pair_id
    )
    cc_pair = db_session.scalar(stmt)
    if cc_pair is None:
        raise ValueError(f"No cc_pair with ID: {cc_pair_id}")

    # The sync time can be marked after it ran because all group syncs
    # are run in full, not polling for changes.
    # If this changes, we need to update this function.
    cc_pair.last_time_external_group_sync = datetime.now(timezone.utc)
    db_session.commit()


def mark_ccpair_with_indexing_trigger(
    cc_pair_id: int, indexing_mode: IndexingMode | None, db_session: Session
) -> None:
    """indexing_mode sets a field which will be picked up by a background task
    to trigger indexing. Set to None to disable the trigger."""
    try:
        cc_pair = db_session.execute(
            select(ConnectorCredentialPair)
            .where(ConnectorCredentialPair.id == cc_pair_id)
            .with_for_update()
        ).scalar_one()

        if cc_pair is None:
            raise ValueError(f"No cc_pair with ID: {cc_pair_id}")

        cc_pair.indexing_trigger = indexing_mode
        db_session.commit()
    except Exception:
        db_session.rollback()
        raise


def get_kg_enabled_connectors(db_session: Session) -> list[KGConnectorData]:
    """
    Retrieves a list of connector IDs that have not been KG processed for a given tenant.
    Args:
        db_session (Session): The database session to use
    Returns:
        list[KGConnectorData]: List of connector IDs with KG extraction enabled but have unprocessed documents
    """
    try:
        stmt = select(Connector.id, Connector.source, Connector.kg_coverage_days).where(
            Connector.kg_processing_enabled
        )
        result = db_session.execute(stmt)

        connector_results = [
            KGConnectorData(id=row[0], source=row[1].lower(), kg_coverage_days=row[2])
            for row in result.fetchall()
        ]

        return connector_results

    except Exception as e:
        logger.error(f"Error fetching unprocessed connector IDs: {str(e)}")
        raise e


================================================
FILE: backend/onyx/db/connector_credential_pair.py
================================================
from datetime import datetime
from enum import Enum
from typing import TypeVarTuple

from fastapi import HTTPException
from sqlalchemy import delete
from sqlalchemy import desc
from sqlalchemy import exists
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import aliased
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.db.connector import fetch_connector_by_id
from onyx.db.credentials import fetch_credential_by_id
from onyx.db.credentials import fetch_credential_by_id_for_user
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import ProcessingMode
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexingStatus
from onyx.db.models import SearchSettings
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup__ConnectorCredentialPair
from onyx.db.models import UserRole
from onyx.server.models import StatusResponse
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop

logger = setup_logger()

R = TypeVarTuple("R")


class ConnectorType(str, Enum):
    STANDARD = "standard"
    USER_FILE = "user_file"


def _add_user_filters(
    stmt: Select[tuple[*R]], user: User, get_editable: bool = True
) -> Select[tuple[*R]]:
    if user.role == UserRole.ADMIN:
        return stmt

    # If anonymous user, only show public cc_pairs
    if user.is_anonymous:
        where_clause = ConnectorCredentialPair.access_type == AccessType.PUBLIC
        return stmt.where(where_clause)

    stmt = stmt.distinct()
    UG__CCpair = aliased(UserGroup__ConnectorCredentialPair)
    User__UG = aliased(User__UserGroup)

    """
    Here we select cc_pairs by relation:
    User -> User__UserGroup -> UserGroup__ConnectorCredentialPair ->
    ConnectorCredentialPair
    """
    stmt = stmt.outerjoin(UG__CCpair).outerjoin(
        User__UG,
        User__UG.user_group_id == UG__CCpair.user_group_id,
    )

    """
    Filter cc_pairs by:
    - if the user is in the user_group that owns the cc_pair
    - if the user is not a global_curator, they must also have a curator relationship
    to the user_group
    - if editing is being done, we also filter out cc_pairs that are owned by groups
    that the user isn't a curator for
    - if we are not editing, we show all cc_pairs in the groups the user is a curator
    for (as well as public cc_pairs)
    """

    where_clause = User__UG.user_id == user.id
    if user.role == UserRole.CURATOR and get_editable:
        where_clause &= User__UG.is_curator == True  # noqa: E712
    if get_editable:
        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)
        if user.role == UserRole.CURATOR:
            user_groups = user_groups.where(
                User__UserGroup.is_curator == True  # noqa: E712
            )
        where_clause &= (
            ~exists()
            .where(UG__CCpair.cc_pair_id == ConnectorCredentialPair.id)
            .where(~UG__CCpair.user_group_id.in_(user_groups))
            .correlate(ConnectorCredentialPair)
        )
        where_clause |= ConnectorCredentialPair.creator_id == user.id
    else:
        where_clause |= ConnectorCredentialPair.access_type == AccessType.PUBLIC
        where_clause |= ConnectorCredentialPair.access_type == AccessType.SYNC

    return stmt.where(where_clause)


def get_connector_credential_pairs_for_user(
    db_session: Session,
    user: User,
    get_editable: bool = True,
    ids: list[int] | None = None,
    eager_load_connector: bool = False,
    eager_load_credential: bool = False,
    eager_load_user: bool = False,
    order_by_desc: bool = False,
    source: DocumentSource | None = None,
    processing_mode: ProcessingMode | None = ProcessingMode.REGULAR,
    defer_connector_config: bool = False,
) -> list[ConnectorCredentialPair]:
    """Get connector credential pairs for a user.

    Args:
        processing_mode: Filter by processing mode. Defaults to REGULAR to hide
            FILE_SYSTEM connectors from standard admin UI. Pass None to get all.
        defer_connector_config: If True, skips loading Connector.connector_specific_config
            to avoid fetching large JSONB blobs when they aren't needed.
    """
    if eager_load_user:
        assert (
            eager_load_credential
        ), "eager_load_credential must be True if eager_load_user is True"
    stmt = select(ConnectorCredentialPair).distinct()

    if eager_load_connector:
        connector_load = selectinload(ConnectorCredentialPair.connector)
        if defer_connector_config:
            connector_load = connector_load.defer(Connector.connector_specific_config)
        stmt = stmt.options(connector_load)

    if eager_load_credential:
        load_opts = selectinload(ConnectorCredentialPair.credential)
        if eager_load_user:
            load_opts = load_opts.joinedload(Credential.user)
        stmt = stmt.options(load_opts)

    stmt = _add_user_filters(stmt, user, get_editable)

    if source:
        stmt = stmt.join(ConnectorCredentialPair.connector).where(
            Connector.source == source.value
        )

    if ids:
        stmt = stmt.where(ConnectorCredentialPair.id.in_(ids))

    if processing_mode is not None:
        stmt = stmt.where(ConnectorCredentialPair.processing_mode == processing_mode)

    if order_by_desc:
        stmt = stmt.order_by(desc(ConnectorCredentialPair.id))

    return list(db_session.scalars(stmt).unique().all())


# For use with our thread-level parallelism utils. Note that any relationships
# you wish to use MUST be eagerly loaded, as the session will not be available
# after this function to allow lazy loading.
def get_connector_credential_pairs_for_user_parallel(
    user: User,
    get_editable: bool = True,
    ids: list[int] | None = None,
    eager_load_connector: bool = False,
    eager_load_credential: bool = False,
    eager_load_user: bool = False,
    order_by_desc: bool = False,
    source: DocumentSource | None = None,
    processing_mode: ProcessingMode | None = ProcessingMode.REGULAR,
    defer_connector_config: bool = False,
) -> list[ConnectorCredentialPair]:
    with get_session_with_current_tenant() as db_session:
        return get_connector_credential_pairs_for_user(
            db_session=db_session,
            user=user,
            get_editable=get_editable,
            ids=ids,
            eager_load_connector=eager_load_connector,
            eager_load_credential=eager_load_credential,
            eager_load_user=eager_load_user,
            order_by_desc=order_by_desc,
            source=source,
            processing_mode=processing_mode,
            defer_connector_config=defer_connector_config,
        )


def get_connector_credential_pairs(
    db_session: Session, ids: list[int] | None = None
) -> list[ConnectorCredentialPair]:
    stmt = select(ConnectorCredentialPair).distinct()

    if ids:
        stmt = stmt.where(ConnectorCredentialPair.id.in_(ids))

    return list(db_session.scalars(stmt).all())


def add_deletion_failure_message(
    db_session: Session,
    cc_pair_id: int,
    failure_message: str,
) -> None:
    cc_pair = get_connector_credential_pair_from_id(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
    )
    if not cc_pair:
        return
    cc_pair.deletion_failure_message = failure_message
    db_session.commit()


def get_cc_pair_groups_for_ids(
    db_session: Session,
    cc_pair_ids: list[int],
) -> list[UserGroup__ConnectorCredentialPair]:
    stmt = select(UserGroup__ConnectorCredentialPair).distinct()
    stmt = stmt.outerjoin(
        ConnectorCredentialPair,
        UserGroup__ConnectorCredentialPair.cc_pair_id == ConnectorCredentialPair.id,
    )
    stmt = stmt.where(UserGroup__ConnectorCredentialPair.cc_pair_id.in_(cc_pair_ids))
    return list(db_session.scalars(stmt).all())


# For use with our thread-level parallelism utils. Note that any relationships
# you wish to use MUST be eagerly loaded, as the session will not be available
# after this function to allow lazy loading.
def get_cc_pair_groups_for_ids_parallel(
    cc_pair_ids: list[int],
) -> list[UserGroup__ConnectorCredentialPair]:
    with get_session_with_current_tenant() as db_session:
        return get_cc_pair_groups_for_ids(db_session, cc_pair_ids)


def get_connector_credential_pair_for_user(
    db_session: Session,
    connector_id: int,
    credential_id: int,
    user: User,
    get_editable: bool = True,
) -> ConnectorCredentialPair | None:
    stmt = select(ConnectorCredentialPair)
    stmt = _add_user_filters(stmt, user, get_editable)
    stmt = stmt.where(ConnectorCredentialPair.connector_id == connector_id)
    stmt = stmt.where(ConnectorCredentialPair.credential_id == credential_id)
    result = db_session.execute(stmt)
    return result.scalar_one_or_none()


def get_connector_credential_pair(
    db_session: Session,
    connector_id: int,
    credential_id: int,
) -> ConnectorCredentialPair | None:
    stmt = select(ConnectorCredentialPair)
    stmt = stmt.where(ConnectorCredentialPair.connector_id == connector_id)
    stmt = stmt.where(ConnectorCredentialPair.credential_id == credential_id)
    result = db_session.execute(stmt)
    return result.scalar_one_or_none()


def get_connector_credential_pair_from_id_for_user(
    cc_pair_id: int,
    db_session: Session,
    user: User,
    get_editable: bool = True,
) -> ConnectorCredentialPair | None:
    stmt = select(ConnectorCredentialPair).distinct()
    stmt = _add_user_filters(stmt, user, get_editable)
    stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id)
    result = db_session.execute(stmt)
    return result.scalar_one_or_none()


def verify_user_has_access_to_cc_pair(
    cc_pair_id: int,
    db_session: Session,
    user: User,
    get_editable: bool = True,
) -> bool:
    stmt = select(ConnectorCredentialPair.id)
    stmt = _add_user_filters(stmt, user, get_editable)
    stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id)
    result = db_session.execute(stmt)
    return result.scalars().first() is not None


def get_connector_credential_pair_from_id(
    db_session: Session,
    cc_pair_id: int,
    eager_load_connector: bool = False,
    eager_load_credential: bool = False,
) -> ConnectorCredentialPair | None:
    stmt = select(ConnectorCredentialPair).distinct()
    stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id)

    if eager_load_credential:
        stmt = stmt.options(joinedload(ConnectorCredentialPair.credential))
    if eager_load_connector:
        stmt = stmt.options(joinedload(ConnectorCredentialPair.connector))

    result = db_session.execute(stmt)
    return result.scalar_one_or_none()


def get_connector_credential_pairs_for_source(
    db_session: Session,
    source: DocumentSource,
) -> list[ConnectorCredentialPair]:
    stmt = (
        select(ConnectorCredentialPair)
        .join(ConnectorCredentialPair.connector)
        .where(Connector.source == source)
    )
    return list(db_session.scalars(stmt).unique().all())


def get_last_successful_attempt_poll_range_end(
    cc_pair_id: int,
    earliest_index: float,
    search_settings: SearchSettings,
    db_session: Session,
) -> float:
    """Used to get the latest `poll_range_end` for a given connector and credential.

    This can be used to determine the next "start" time for a new index attempt.

    Note that the attempts time_started is not necessarily correct - that gets set
    separately and is similar but not exactly the same as the `poll_range_end`.
    """
    latest_successful_index_attempt = (
        db_session.query(IndexAttempt)
        .join(
            ConnectorCredentialPair,
            IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,
        )
        .filter(
            ConnectorCredentialPair.id == cc_pair_id,
            IndexAttempt.search_settings_id == search_settings.id,
            IndexAttempt.status == IndexingStatus.SUCCESS,
        )
        .order_by(IndexAttempt.poll_range_end.desc())
        .first()
    )
    if (
        not latest_successful_index_attempt
        or not latest_successful_index_attempt.poll_range_end
    ):
        return earliest_index

    return latest_successful_index_attempt.poll_range_end.timestamp()


"""Updates"""


def _update_connector_credential_pair(
    db_session: Session,
    cc_pair: ConnectorCredentialPair,
    status: ConnectorCredentialPairStatus | None = None,
    net_docs: int | None = None,
    run_dt: datetime | None = None,
) -> None:
    # simply don't update last_successful_index_time if run_dt is not specified
    # at worst, this would result in re-indexing documents that were already indexed
    if run_dt is not None:
        cc_pair.last_successful_index_time = run_dt
    if net_docs is not None:
        cc_pair.total_docs_indexed += net_docs
    if status is not None:
        cc_pair.status = status

    db_session.commit()


def update_connector_credential_pair_from_id(
    db_session: Session,
    cc_pair_id: int,
    status: ConnectorCredentialPairStatus | None = None,
    net_docs: int | None = None,
    run_dt: datetime | None = None,
) -> None:
    cc_pair = get_connector_credential_pair_from_id(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
    )
    if not cc_pair:
        logger.warning(
            f"Attempted to update pair for Connector Credential Pair '{cc_pair_id}' but it does not exist"
        )
        return

    _update_connector_credential_pair(
        db_session=db_session,
        cc_pair=cc_pair,
        status=status,
        net_docs=net_docs,
        run_dt=run_dt,
    )


def update_connector_credential_pair(
    db_session: Session,
    connector_id: int,
    credential_id: int,
    status: ConnectorCredentialPairStatus | None = None,
    net_docs: int | None = None,
    run_dt: datetime | None = None,
) -> None:
    cc_pair = get_connector_credential_pair(
        db_session=db_session,
        connector_id=connector_id,
        credential_id=credential_id,
    )
    if not cc_pair:
        logger.warning(
            f"Attempted to update pair for connector id {connector_id} and credential id {credential_id}"
        )
        return

    _update_connector_credential_pair(
        db_session=db_session,
        cc_pair=cc_pair,
        status=status,
        net_docs=net_docs,
        run_dt=run_dt,
    )


def set_cc_pair_repeated_error_state(
    db_session: Session,
    cc_pair_id: int,
    in_repeated_error_state: bool,
) -> None:
    stmt = (
        update(ConnectorCredentialPair)
        .where(ConnectorCredentialPair.id == cc_pair_id)
        .values(in_repeated_error_state=in_repeated_error_state)
    )
    db_session.execute(stmt)
    db_session.commit()


def delete_connector_credential_pair__no_commit(
    db_session: Session,
    connector_id: int,
    credential_id: int,
) -> None:
    stmt = delete(ConnectorCredentialPair).where(
        ConnectorCredentialPair.connector_id == connector_id,
        ConnectorCredentialPair.credential_id == credential_id,
    )
    db_session.execute(stmt)


def associate_default_cc_pair(db_session: Session) -> None:
    existing_association = (
        db_session.query(ConnectorCredentialPair)
        .filter(
            ConnectorCredentialPair.connector_id == 0,
            ConnectorCredentialPair.credential_id == 0,
        )
        .one_or_none()
    )
    if existing_association is not None:
        return

    # DefaultCCPair has id 1 since it is the first CC pair created
    # It is DEFAULT_CC_PAIR_ID, but can't set it explicitly because it messed with the
    # auto-incrementing id
    association = ConnectorCredentialPair(
        connector_id=0,
        credential_id=0,
        access_type=AccessType.PUBLIC,
        name="DefaultCCPair",
        status=ConnectorCredentialPairStatus.ACTIVE,
    )
    db_session.add(association)
    db_session.commit()


def _relate_groups_to_cc_pair__no_commit(
    db_session: Session,
    cc_pair_id: int,
    user_group_ids: list[int] | None = None,
) -> None:
    if not user_group_ids:
        return

    for group_id in user_group_ids:
        db_session.add(
            UserGroup__ConnectorCredentialPair(
                user_group_id=group_id, cc_pair_id=cc_pair_id
            )
        )


def add_credential_to_connector(
    db_session: Session,
    user: User,
    connector_id: int,
    credential_id: int,
    cc_pair_name: str,
    access_type: AccessType,
    groups: list[int] | None,
    auto_sync_options: dict | None = None,
    initial_status: ConnectorCredentialPairStatus = ConnectorCredentialPairStatus.SCHEDULED,
    last_successful_index_time: datetime | None = None,
    seeding_flow: bool = False,
    processing_mode: ProcessingMode = ProcessingMode.REGULAR,
) -> StatusResponse:
    connector = fetch_connector_by_id(connector_id, db_session)

    # If we are in the seeding flow, we shouldn't need to check if the credential belongs to the user
    if seeding_flow:
        credential = fetch_credential_by_id(
            credential_id=credential_id,
            db_session=db_session,
        )
    else:
        credential = fetch_credential_by_id_for_user(
            credential_id,
            user,
            db_session,
            get_editable=False,
        )

    if connector is None:
        raise HTTPException(status_code=404, detail="Connector does not exist")

    if access_type == AccessType.SYNC:
        if not fetch_ee_implementation_or_noop(
            "onyx.external_permissions.sync_params",
            "check_if_valid_sync_source",
            noop_return_value=True,
        )(connector.source):
            raise HTTPException(
                status_code=400,
                detail=f"Connector of type {connector.source} does not support SYNC access type",
            )

    if credential is None:
        error_msg = (
            f"Credential {credential_id} does not exist or does not belong to user"
        )
        logger.error(error_msg)
        raise HTTPException(
            status_code=401,
            detail=error_msg,
        )

    existing_association = (
        db_session.query(ConnectorCredentialPair)
        .filter(
            ConnectorCredentialPair.connector_id == connector_id,
            ConnectorCredentialPair.credential_id == credential_id,
        )
        .one_or_none()
    )
    if existing_association is not None:
        return StatusResponse(
            success=False,
            message=f"Connector {connector_id} already has Credential {credential_id}",
            data=connector_id,
        )

    association = ConnectorCredentialPair(
        creator_id=user.id,
        connector_id=connector_id,
        credential_id=credential_id,
        name=cc_pair_name,
        status=initial_status,
        access_type=access_type,
        auto_sync_options=auto_sync_options,
        last_successful_index_time=last_successful_index_time,
        processing_mode=processing_mode,
    )
    db_session.add(association)
    db_session.flush()  # make sure the association has an id
    db_session.refresh(association)

    _relate_groups_to_cc_pair__no_commit(
        db_session=db_session,
        cc_pair_id=association.id,
        user_group_ids=groups,
    )

    db_session.commit()

    return StatusResponse(
        success=True,
        message=f"Creating new association between Connector {connector_id} and Credential {credential_id}",
        data=association.id,
    )


def remove_credential_from_connector(
    connector_id: int,
    credential_id: int,
    user: User,
    db_session: Session,
) -> StatusResponse[int]:
    connector = fetch_connector_by_id(connector_id, db_session)
    credential = fetch_credential_by_id_for_user(
        credential_id,
        user,
        db_session,
        get_editable=False,
    )

    if connector is None:
        raise HTTPException(status_code=404, detail="Connector does not exist")

    if credential is None:
        raise HTTPException(
            status_code=404,
            detail="Credential does not exist or does not belong to user",
        )

    association = get_connector_credential_pair_for_user(
        db_session=db_session,
        connector_id=connector_id,
        credential_id=credential_id,
        user=user,
        get_editable=True,
    )

    if association is not None:
        fetch_ee_implementation_or_noop(
            "onyx.db.external_perm",
            "delete_user__ext_group_for_cc_pair__no_commit",
        )(
            db_session=db_session,
            cc_pair_id=association.id,
        )
        db_session.delete(association)
        db_session.commit()
        return StatusResponse(
            success=True,
            message=f"Credential {credential_id} removed from Connector",
            data=connector_id,
        )

    return StatusResponse(
        success=False,
        message=f"Connector already does not have Credential {credential_id}",
        data=connector_id,
    )


def fetch_indexable_standard_connector_credential_pair_ids(
    db_session: Session,
    active_cc_pairs_only: bool = True,
    limit: int | None = None,
) -> list[int]:
    stmt = select(ConnectorCredentialPair.id)

    # For regular indexing checks
    if active_cc_pairs_only:
        stmt = stmt.where(
            ConnectorCredentialPair.status.in_(
                ConnectorCredentialPairStatus.active_statuses()
            )
        )
    else:
        # For embedding swap checks, include PAUSED and exclude DELETING or INVALID
        stmt = stmt.where(
            ConnectorCredentialPair.status.in_(
                ConnectorCredentialPairStatus.indexable_statuses()
            )
        )

    if limit:
        stmt = stmt.limit(limit)

    return list(db_session.scalars(stmt))


def fetch_connector_credential_pair_for_connector(
    db_session: Session,
    connector_id: int,
) -> ConnectorCredentialPair | None:
    stmt = select(ConnectorCredentialPair).where(
        ConnectorCredentialPair.connector_id == connector_id,
    )
    return db_session.scalar(stmt)


def resync_cc_pair(
    cc_pair: ConnectorCredentialPair,
    search_settings_id: int,
    db_session: Session,
) -> None:
    """
    Updates state stored in the connector_credential_pair table based on the
    latest index attempt for the given search settings.

    Args:
        cc_pair: ConnectorCredentialPair to resync
        search_settings_id: SearchSettings to use for resync
        db_session: Database session
    """

    def find_latest_index_attempt(
        connector_id: int,
        credential_id: int,
        only_include_success: bool,
        db_session: Session,
    ) -> IndexAttempt | None:
        query = (
            db_session.query(IndexAttempt)
            .join(
                ConnectorCredentialPair,
                IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,
            )
            .filter(
                ConnectorCredentialPair.connector_id == connector_id,
                ConnectorCredentialPair.credential_id == credential_id,
                IndexAttempt.search_settings_id == search_settings_id,
            )
        )

        if only_include_success:
            query = query.filter(IndexAttempt.status == IndexingStatus.SUCCESS)

        latest_index_attempt = query.order_by(desc(IndexAttempt.time_started)).first()

        return latest_index_attempt

    last_success = find_latest_index_attempt(
        connector_id=cc_pair.connector_id,
        credential_id=cc_pair.credential_id,
        only_include_success=True,
        db_session=db_session,
    )

    cc_pair.last_successful_index_time = (
        last_success.time_started if last_success else None
    )

    db_session.commit()


# ── Metrics query helpers ──────────────────────────────────────────────


def get_connector_health_for_metrics(
    db_session: Session,
) -> list:  # Returns list of Row tuples
    """Return connector health data for Prometheus metrics.

    Each row is (cc_pair_id, status, in_repeated_error_state,
    last_successful_index_time, name, source).
    """
    return (
        db_session.query(
            ConnectorCredentialPair.id,
            ConnectorCredentialPair.status,
            ConnectorCredentialPair.in_repeated_error_state,
            ConnectorCredentialPair.last_successful_index_time,
            ConnectorCredentialPair.name,
            Connector.source,
        )
        .join(
            Connector,
            ConnectorCredentialPair.connector_id == Connector.id,
        )
        .all()
    )


================================================
FILE: backend/onyx/db/constants.py
================================================
SLACK_BOT_PERSONA_PREFIX = "__slack_bot_persona__"
DEFAULT_PERSONA_SLACK_CHANNEL_NAME = "DEFAULT_SLACK_CHANNEL"

CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX = "ConnectorValidationError:"


# Sentinel value to distinguish between "not provided" and "explicitly set to None"
class UnsetType:
    def __repr__(self) -> str:
        return "<UNSET>"


UNSET = UnsetType()


================================================
FILE: backend/onyx/db/credentials.py
================================================
from typing import Any

from sqlalchemy import exists
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import Session
from sqlalchemy.sql.expression import and_
from sqlalchemy.sql.expression import or_

from onyx.auth.schemas import UserRole
from onyx.configs.constants import DocumentSource
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
)
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import Credential__UserGroup
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.server.documents.models import CredentialBase
from onyx.utils.logger import setup_logger


logger = setup_logger()

# The credentials for these sources are not real so
# permissions are not enforced for them
CREDENTIAL_PERMISSIONS_TO_IGNORE = {
    DocumentSource.FILE,
    DocumentSource.WEB,
    DocumentSource.NOT_APPLICABLE,
    DocumentSource.GOOGLE_SITES,
    DocumentSource.WIKIPEDIA,
    DocumentSource.MEDIAWIKI,
}

PUBLIC_CREDENTIAL_ID = 0


def _add_user_filters(
    stmt: Select,
    user: User,
    get_editable: bool = True,
) -> Select:
    """Attaches filters to the statement to ensure that the user can only
    access the appropriate credentials"""
    if user.is_anonymous:
        raise ValueError("Anonymous users are not allowed to access credentials")

    if user.role == UserRole.ADMIN:
        # Admins can access all credentials that are public or owned by them
        # or are not associated with any user
        return stmt.where(
            or_(
                Credential.user_id == user.id,
                Credential.user_id.is_(None),
                Credential.admin_public == True,  # noqa: E712
                Credential.source.in_(CREDENTIAL_PERMISSIONS_TO_IGNORE),
            )
        )
    if user.role == UserRole.BASIC:
        # Basic users can only access credentials that are owned by them
        return stmt.where(Credential.user_id == user.id)

    stmt = stmt.distinct()
    """
    THIS PART IS FOR CURATORS AND GLOBAL CURATORS
    Here we select cc_pairs by relation:
    User -> User__UserGroup -> Credential__UserGroup -> Credential
    """
    stmt = stmt.outerjoin(Credential__UserGroup).outerjoin(
        User__UserGroup,
        User__UserGroup.user_group_id == Credential__UserGroup.user_group_id,
    )
    """
    Filter Credentials by:
    - if the user is in the user_group that owns the Credential
    - if the user is a curator, they must also have a curator relationship
    to the user_group
    - if editing is being done, we also filter out Credentials that are owned by groups
    that the user isn't a curator for
    - if we are not editing, we show all Credentials in the groups the user is a curator
    for (as well as public Credentials)
    - if we are not editing, we return all Credentials directly connected to the user
    """
    where_clause = User__UserGroup.user_id == user.id
    if user.role == UserRole.CURATOR:
        where_clause &= User__UserGroup.is_curator == True  # noqa: E712

    if get_editable:
        user_groups = select(User__UserGroup.user_group_id).where(
            User__UserGroup.user_id == user.id
        )
        if user.role == UserRole.CURATOR:
            user_groups = user_groups.where(
                User__UserGroup.is_curator == True  # noqa: E712
            )
        where_clause &= (
            ~exists()
            .where(Credential__UserGroup.credential_id == Credential.id)
            .where(~Credential__UserGroup.user_group_id.in_(user_groups))
            .correlate(Credential)
        )
    else:
        where_clause |= Credential.curator_public == True  # noqa: E712
        where_clause |= Credential.user_id == user.id  # noqa: E712

    where_clause |= Credential.source.in_(CREDENTIAL_PERMISSIONS_TO_IGNORE)

    return stmt.where(where_clause)


def _relate_credential_to_user_groups__no_commit(
    db_session: Session,
    credential_id: int,
    user_group_ids: list[int],
) -> None:
    credential_user_groups = []
    for group_id in user_group_ids:
        credential_user_groups.append(
            Credential__UserGroup(
                credential_id=credential_id,
                user_group_id=group_id,
            )
        )
    db_session.add_all(credential_user_groups)


def fetch_credentials_for_user(
    db_session: Session,
    user: User,
    get_editable: bool = True,
) -> list[Credential]:
    stmt = select(Credential)
    stmt = _add_user_filters(stmt, user, get_editable=get_editable)
    results = db_session.scalars(stmt)
    return list(results.all())


def fetch_credential_by_id_for_user(
    credential_id: int,
    user: User,
    db_session: Session,
    get_editable: bool = True,
) -> Credential | None:
    stmt = select(Credential).distinct()
    stmt = stmt.where(Credential.id == credential_id)
    stmt = _add_user_filters(
        stmt=stmt,
        user=user,
        get_editable=get_editable,
    )
    result = db_session.execute(stmt)
    credential = result.scalar_one_or_none()
    return credential


def fetch_credential_by_id(
    credential_id: int,
    db_session: Session,
) -> Credential | None:
    stmt = select(Credential).distinct()
    stmt = stmt.where(Credential.id == credential_id)
    result = db_session.execute(stmt)
    credential = result.scalar_one_or_none()
    return credential


def fetch_credentials_by_source_for_user(
    db_session: Session,
    user: User,
    document_source: DocumentSource | None = None,
    get_editable: bool = True,
) -> list[Credential]:
    base_query = select(Credential).where(Credential.source == document_source)
    base_query = _add_user_filters(base_query, user, get_editable=get_editable)
    credentials = db_session.execute(base_query).scalars().all()
    return list(credentials)


def fetch_credentials_by_source(
    db_session: Session,
    document_source: DocumentSource | None = None,
) -> list[Credential]:
    base_query = select(Credential).where(Credential.source == document_source)
    credentials = db_session.execute(base_query).scalars().all()
    return list(credentials)


def swap_credentials_connector(
    new_credential_id: int, connector_id: int, user: User, db_session: Session
) -> ConnectorCredentialPair:
    # Check if the user has permission to use the new credential
    new_credential = fetch_credential_by_id_for_user(
        new_credential_id, user, db_session
    )
    if not new_credential:
        raise ValueError(
            f"No Credential found with id {new_credential_id} or user doesn't have permission to use it"
        )

    # Existing pair
    existing_pair = db_session.execute(
        select(ConnectorCredentialPair).where(
            ConnectorCredentialPair.connector_id == connector_id
        )
    ).scalar_one_or_none()

    if not existing_pair:
        raise ValueError(
            f"No ConnectorCredentialPair found for connector_id {connector_id}"
        )

    # Check if the new credential is compatible with the connector
    if new_credential.source != existing_pair.connector.source:
        raise ValueError(
            f"New credential source {new_credential.source} does not match connector source {existing_pair.connector.source}"
        )

    db_session.execute(
        update(DocumentByConnectorCredentialPair)
        .where(
            and_(
                DocumentByConnectorCredentialPair.connector_id == connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == existing_pair.credential_id,
            )
        )
        .values(credential_id=new_credential_id)
    )

    # Update the existing pair with the new credential
    existing_pair.credential_id = new_credential_id
    existing_pair.credential = new_credential

    # Update ccpair status if it's in INVALID state
    if existing_pair.status == ConnectorCredentialPairStatus.INVALID:
        existing_pair.status = ConnectorCredentialPairStatus.ACTIVE

    # Commit the changes
    db_session.commit()

    # Refresh the object to ensure all relationships are up-to-date
    db_session.refresh(existing_pair)
    return existing_pair


def create_credential(
    credential_data: CredentialBase,
    user: User,
    db_session: Session,
) -> Credential:
    credential = Credential(
        credential_json=credential_data.credential_json,
        user_id=user.id,
        admin_public=credential_data.admin_public,
        source=credential_data.source,
        name=credential_data.name,
        curator_public=credential_data.curator_public,
    )
    db_session.add(credential)
    db_session.flush()  # This ensures the credential gets an ID
    _relate_credential_to_user_groups__no_commit(
        db_session=db_session,
        credential_id=credential.id,
        user_group_ids=credential_data.groups,
    )

    db_session.commit()
    # Expire to ensure credential_json is reloaded as SensitiveValue from DB
    db_session.expire(credential)
    return credential


def _cleanup_credential__user_group_relationships__no_commit(
    db_session: Session, credential_id: int
) -> None:
    """NOTE: does not commit the transaction."""
    db_session.query(Credential__UserGroup).filter(
        Credential__UserGroup.credential_id == credential_id
    ).delete(synchronize_session=False)


def alter_credential(
    credential_id: int,
    name: str,
    credential_json: dict[str, Any],
    user: User,
    db_session: Session,
) -> Credential | None:
    # TODO: add user group relationship update
    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)

    if credential is None:
        return None

    credential.name = name

    # Get existing credential_json and merge with new values
    existing_json = (
        credential.credential_json.get_value(apply_mask=False)
        if credential.credential_json
        else {}
    )
    credential.credential_json = {  # type: ignore[assignment]
        **existing_json,
        **credential_json,
    }

    credential.user_id = user.id
    db_session.commit()
    # Expire to ensure credential_json is reloaded as SensitiveValue from DB
    db_session.expire(credential)
    return credential


def update_credential(
    credential_id: int,
    credential_data: CredentialBase,
    user: User,
    db_session: Session,
) -> Credential | None:
    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)
    if credential is None:
        return None

    credential.credential_json = credential_data.credential_json  # type: ignore[assignment]
    credential.user_id = user.id if user is not None else None

    db_session.commit()
    # Expire to ensure credential_json is reloaded as SensitiveValue from DB
    db_session.expire(credential)
    return credential


def update_credential_json(
    credential_id: int,
    credential_json: dict[str, Any],
    user: User,
    db_session: Session,
) -> Credential | None:
    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)
    if credential is None:
        return None

    credential.credential_json = credential_json  # type: ignore[assignment]
    db_session.commit()
    # Expire to ensure credential_json is reloaded as SensitiveValue from DB
    db_session.expire(credential)
    return credential


def backend_update_credential_json(
    credential: Credential,
    credential_json: dict[str, Any],
    db_session: Session,
) -> None:
    """This should not be used in any flows involving the frontend or users"""
    credential.credential_json = credential_json  # type: ignore[assignment]
    db_session.commit()


def _delete_credential_internal(
    credential: Credential,
    credential_id: int,
    db_session: Session,
    force: bool = False,
) -> None:
    """Internal utility function to handle the actual deletion of a credential"""
    associated_connectors = (
        db_session.query(ConnectorCredentialPair)
        .filter(ConnectorCredentialPair.credential_id == credential_id)
        .all()
    )

    associated_doc_cc_pairs = (
        db_session.query(DocumentByConnectorCredentialPair)
        .filter(DocumentByConnectorCredentialPair.credential_id == credential_id)
        .all()
    )

    if associated_connectors or associated_doc_cc_pairs:
        if force:
            logger.warning(
                f"Force deleting credential {credential_id} and its associated records"
            )

            # Delete DocumentByConnectorCredentialPair records first
            for doc_cc_pair in associated_doc_cc_pairs:
                db_session.delete(doc_cc_pair)

            # Then delete ConnectorCredentialPair records
            for connector in associated_connectors:
                db_session.delete(connector)

            # Commit these deletions before deleting the credential
            db_session.flush()
        else:
            raise ValueError(
                f"Cannot delete credential as it is still associated with "
                f"{len(associated_connectors)} connector(s) and {len(associated_doc_cc_pairs)} document(s). "
            )

    if force:
        logger.warning(f"Force deleting credential {credential_id}")
    else:
        logger.notice(f"Deleting credential {credential_id}")

    _cleanup_credential__user_group_relationships__no_commit(db_session, credential_id)
    db_session.delete(credential)
    db_session.commit()


def delete_credential_for_user(
    credential_id: int,
    user: User,
    db_session: Session,
    force: bool = False,
) -> None:
    """Delete a credential that belongs to a specific user"""
    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)
    if credential is None:
        raise ValueError(
            f"Credential by provided id {credential_id} does not exist or does not belong to user"
        )

    _delete_credential_internal(credential, credential_id, db_session, force)


def delete_credential(
    credential_id: int,
    db_session: Session,
    force: bool = False,
) -> None:
    """Delete a credential regardless of ownership (admin function)"""
    credential = fetch_credential_by_id(credential_id, db_session)
    if credential is None:
        raise ValueError(f"Credential by provided id {credential_id} does not exist")

    _delete_credential_internal(credential, credential_id, db_session, force)


def create_initial_public_credential(db_session: Session) -> None:
    error_msg = (
        "DB is not in a valid initial state."
        "There must exist an empty public credential for data connectors that do not require additional Auth."
    )
    first_credential = fetch_credential_by_id(
        credential_id=PUBLIC_CREDENTIAL_ID,
        db_session=db_session,
    )

    if first_credential is not None:
        credential_json_value = (
            first_credential.credential_json.get_value(apply_mask=False)
            if first_credential.credential_json
            else {}
        )
        if credential_json_value != {} or first_credential.user is not None:
            raise ValueError(error_msg)
        return

    credential = Credential(
        id=PUBLIC_CREDENTIAL_ID,
        credential_json={},
        user_id=None,
    )
    db_session.add(credential)
    db_session.commit()


def cleanup_gmail_credentials(db_session: Session) -> None:
    gmail_credentials = fetch_credentials_by_source(
        db_session=db_session, document_source=DocumentSource.GMAIL
    )
    for credential in gmail_credentials:
        db_session.delete(credential)
    db_session.commit()


def cleanup_google_drive_credentials(db_session: Session) -> None:
    google_drive_credentials = fetch_credentials_by_source(
        db_session=db_session, document_source=DocumentSource.GOOGLE_DRIVE
    )
    for credential in google_drive_credentials:
        db_session.delete(credential)
    db_session.commit()


def delete_service_account_credentials(
    user: User, db_session: Session, source: DocumentSource
) -> None:
    credentials = fetch_credentials_for_user(db_session=db_session, user=user)
    for credential in credentials:
        credential_json = (
            credential.credential_json.get_value(apply_mask=False)
            if credential.credential_json
            else {}
        )
        if (
            credential_json.get(DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY)
            and credential.source == source
        ):
            db_session.delete(credential)

    db_session.commit()


================================================
FILE: backend/onyx/db/dal.py
================================================
"""Base Data Access Layer (DAL) for database operations.

The DAL pattern groups related database operations into cohesive classes
with explicit session management. It supports two usage modes:

  1. **External session** (FastAPI endpoints) — the caller provides a session
     whose lifecycle is managed by FastAPI's dependency injection.

  2. **Self-managed session** (Celery tasks, scripts) — the DAL creates its
     own session via the tenant-aware session factory.

Subclasses add domain-specific query methods while inheriting session
management. See ``ee.onyx.db.scim.ScimDAL`` for a concrete example.

Example (FastAPI)::

    def get_scim_dal(db_session: Session = Depends(get_session)) -> ScimDAL:
        return ScimDAL(db_session)

    @router.get("/users")
    def list_users(dal: ScimDAL = Depends(get_scim_dal)) -> ...:
        return dal.list_user_mappings(...)

Example (Celery)::

    with ScimDAL.from_tenant("tenant_abc") as dal:
        dal.create_user_mapping(...)
        dal.commit()
"""

from __future__ import annotations

from collections.abc import Generator
from contextlib import contextmanager

from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_session_with_tenant


class DAL:
    """Base Data Access Layer.

    Holds a SQLAlchemy session and provides transaction control helpers.
    Subclasses add domain-specific query methods.
    """

    def __init__(self, db_session: Session) -> None:
        self._session = db_session

    @property
    def session(self) -> Session:
        """Direct access to the underlying session for advanced use cases."""
        return self._session

    def commit(self) -> None:
        self._session.commit()

    def flush(self) -> None:
        self._session.flush()

    def rollback(self) -> None:
        self._session.rollback()

    @classmethod
    @contextmanager
    def from_tenant(cls, tenant_id: str) -> Generator["DAL", None, None]:
        """Create a DAL with a self-managed session for the given tenant.

        The session is automatically closed when the context manager exits.
        The caller must explicitly call ``commit()`` to persist changes.
        """
        with get_session_with_tenant(tenant_id=tenant_id) as session:
            yield cls(session)


================================================
FILE: backend/onyx/db/deletion_attempt.py
================================================
from sqlalchemy.orm import Session

from onyx.db.index_attempt import get_last_attempt
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import IndexingStatus
from onyx.db.search_settings import get_current_search_settings


def check_deletion_attempt_is_allowed(
    connector_credential_pair: ConnectorCredentialPair,
    db_session: Session,
    allow_scheduled: bool = False,
) -> str | None:
    """
    To be deletable:
        (1) connector should be paused
        (2) there should be no in-progress/planned index attempts

    Returns an error message if the deletion attempt is not allowed, otherwise None.
    """
    base_error_msg = (
        f"Connector with ID '{connector_credential_pair.connector_id}' and credential ID "
        f"'{connector_credential_pair.credential_id}' is not deletable."
    )

    if connector_credential_pair.status.is_active():
        return base_error_msg + " Connector must be paused."

    connector_id = connector_credential_pair.connector_id
    credential_id = connector_credential_pair.credential_id
    search_settings = get_current_search_settings(db_session)

    last_indexing = get_last_attempt(
        connector_id=connector_id,
        credential_id=credential_id,
        search_settings_id=search_settings.id,
        db_session=db_session,
    )

    if not last_indexing:
        return None

    if last_indexing.status == IndexingStatus.IN_PROGRESS or (
        last_indexing.status == IndexingStatus.NOT_STARTED and not allow_scheduled
    ):
        return (
            base_error_msg
            + " There is an ongoing / planned indexing attempt. "
            + "The indexing attempt must be completed or cancelled before deletion."
        )

    return None


================================================
FILE: backend/onyx/db/discord_bot.py
================================================
"""CRUD operations for Discord bot models."""

from datetime import datetime
from datetime import timezone

from sqlalchemy import delete
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session

from onyx.auth.api_key import build_displayable_api_key
from onyx.auth.api_key import generate_api_key
from onyx.auth.api_key import hash_api_key
from onyx.auth.schemas import UserRole
from onyx.configs.constants import DISCORD_SERVICE_API_KEY_NAME
from onyx.db.api_key import insert_api_key
from onyx.db.models import ApiKey
from onyx.db.models import DiscordBotConfig
from onyx.db.models import DiscordChannelConfig
from onyx.db.models import DiscordGuildConfig
from onyx.db.models import User
from onyx.db.utils import DiscordChannelView
from onyx.server.api_key.models import APIKeyArgs
from onyx.utils.logger import setup_logger

logger = setup_logger()


# === DiscordBotConfig ===


def get_discord_bot_config(db_session: Session) -> DiscordBotConfig | None:
    """Get the Discord bot config for this tenant (at most one)."""
    return db_session.scalar(select(DiscordBotConfig).limit(1))


def create_discord_bot_config(
    db_session: Session,
    bot_token: str,
) -> DiscordBotConfig:
    """Create the Discord bot config. Raises ValueError if already exists.

    The check constraint on id='SINGLETON' ensures only one config per tenant.
    """
    existing = get_discord_bot_config(db_session)
    if existing:
        raise ValueError("Discord bot config already exists")

    config = DiscordBotConfig(bot_token=bot_token)
    db_session.add(config)
    try:
        db_session.flush()
    except IntegrityError:
        # Race condition: another request created the config concurrently
        db_session.rollback()
        raise ValueError("Discord bot config already exists")
    return config


def delete_discord_bot_config(db_session: Session) -> bool:
    """Delete the Discord bot config. Returns True if deleted."""
    result = db_session.execute(delete(DiscordBotConfig))
    db_session.flush()
    return result.rowcount > 0  # type: ignore[attr-defined]


# === Discord Service API Key ===


def get_discord_service_api_key(db_session: Session) -> ApiKey | None:
    """Get the Discord service API key if it exists."""
    return db_session.scalar(
        select(ApiKey).where(ApiKey.name == DISCORD_SERVICE_API_KEY_NAME)
    )


def get_or_create_discord_service_api_key(
    db_session: Session,
    tenant_id: str,
) -> str:
    """Get existing Discord service API key or create one.

    The API key is used by the Discord bot to authenticate with the
    Onyx API pods when sending chat requests.

    Args:
        db_session: Database session for the tenant.
        tenant_id: The tenant ID (used for logging/context).

    Returns:
        The raw API key string (not hashed).

    Raises:
        RuntimeError: If API key creation fails.
    """
    # Check for existing key
    existing = get_discord_service_api_key(db_session)
    if existing:
        # Database only stores the hash, so we must regenerate to get the raw key.
        # This is safe since the Discord bot is the only consumer of this key.
        logger.debug(
            f"Found existing Discord service API key for tenant {tenant_id} that isn't in cache, regenerating to update cache"
        )
        new_api_key = generate_api_key(tenant_id)
        existing.hashed_api_key = hash_api_key(new_api_key)
        existing.api_key_display = build_displayable_api_key(new_api_key)
        db_session.flush()
        return new_api_key

    # Create new API key
    logger.info(f"Creating Discord service API key for tenant {tenant_id}")
    api_key_args = APIKeyArgs(
        name=DISCORD_SERVICE_API_KEY_NAME,
        role=UserRole.LIMITED,  # Limited role is sufficient for chat requests
    )
    api_key_descriptor = insert_api_key(
        db_session=db_session,
        api_key_args=api_key_args,
        user_id=None,  # Service account, no owner
    )

    if not api_key_descriptor.api_key:
        raise RuntimeError(
            f"Failed to create Discord service API key for tenant {tenant_id}"
        )

    return api_key_descriptor.api_key


def delete_discord_service_api_key(db_session: Session) -> bool:
    """Delete the Discord service API key for a tenant.

    Called when:
    - Bot config is deleted (self-hosted)
    - All guild configs are deleted (Cloud)

    Args:
        db_session: Database session for the tenant.

    Returns:
        True if the key was deleted, False if it didn't exist.
    """
    existing_key = get_discord_service_api_key(db_session)
    if not existing_key:
        return False

    # Also delete the associated user
    api_key_user = db_session.scalar(
        select(User).where(User.id == existing_key.user_id)  # type: ignore[arg-type]
    )

    db_session.delete(existing_key)
    if api_key_user:
        db_session.delete(api_key_user)

    db_session.flush()
    logger.info("Deleted Discord service API key")
    return True


# === DiscordGuildConfig ===


def get_guild_configs(
    db_session: Session,
    include_channels: bool = False,
) -> list[DiscordGuildConfig]:
    """Get all guild configs for this tenant."""
    stmt = select(DiscordGuildConfig)
    if include_channels:
        stmt = stmt.options(joinedload(DiscordGuildConfig.channels))
    return list(db_session.scalars(stmt).unique().all())


def get_guild_config_by_internal_id(
    db_session: Session,
    internal_id: int,
) -> DiscordGuildConfig | None:
    """Get a specific guild config by its ID."""
    return db_session.scalar(
        select(DiscordGuildConfig).where(DiscordGuildConfig.id == internal_id)
    )


def get_guild_config_by_discord_id(
    db_session: Session,
    guild_id: int,
) -> DiscordGuildConfig | None:
    """Get a guild config by Discord guild ID."""
    return db_session.scalar(
        select(DiscordGuildConfig).where(DiscordGuildConfig.guild_id == guild_id)
    )


def get_guild_config_by_registration_key(
    db_session: Session,
    registration_key: str,
) -> DiscordGuildConfig | None:
    """Get a guild config by its registration key."""
    return db_session.scalar(
        select(DiscordGuildConfig).where(
            DiscordGuildConfig.registration_key == registration_key
        )
    )


def create_guild_config(
    db_session: Session,
    registration_key: str,
) -> DiscordGuildConfig:
    """Create a new guild config with a registration key (guild_id=NULL)."""
    config = DiscordGuildConfig(registration_key=registration_key)
    db_session.add(config)
    db_session.flush()
    return config


def register_guild(
    db_session: Session,
    config: DiscordGuildConfig,
    guild_id: int,
    guild_name: str,
) -> DiscordGuildConfig:
    """Complete registration by setting guild_id and guild_name."""
    config.guild_id = guild_id
    config.guild_name = guild_name
    config.registered_at = datetime.now(timezone.utc)
    db_session.flush()
    return config


def update_guild_config(
    db_session: Session,
    config: DiscordGuildConfig,
    enabled: bool,
    default_persona_id: int | None = None,
) -> DiscordGuildConfig:
    """Update guild config fields."""
    config.enabled = enabled
    config.default_persona_id = default_persona_id
    db_session.flush()
    return config


def delete_guild_config(
    db_session: Session,
    internal_id: int,
) -> bool:
    """Delete guild config (cascades to channel configs). Returns True if deleted."""
    result = db_session.execute(
        delete(DiscordGuildConfig).where(DiscordGuildConfig.id == internal_id)
    )
    db_session.flush()
    return result.rowcount > 0  # type: ignore[attr-defined]


# === DiscordChannelConfig ===


def get_channel_configs(
    db_session: Session,
    guild_config_id: int,
) -> list[DiscordChannelConfig]:
    """Get all channel configs for a guild."""
    return list(
        db_session.scalars(
            select(DiscordChannelConfig).where(
                DiscordChannelConfig.guild_config_id == guild_config_id
            )
        ).all()
    )


def get_channel_config_by_discord_ids(
    db_session: Session,
    guild_id: int,
    channel_id: int,
) -> DiscordChannelConfig | None:
    """Get a specific channel config by guild_id and channel_id."""
    return db_session.scalar(
        select(DiscordChannelConfig)
        .join(DiscordGuildConfig)
        .where(
            DiscordGuildConfig.guild_id == guild_id,
            DiscordChannelConfig.channel_id == channel_id,
        )
    )


def get_channel_config_by_internal_ids(
    db_session: Session,
    guild_config_id: int,
    channel_config_id: int,
) -> DiscordChannelConfig | None:
    """Get a specific channel config by guild_config_id and channel_config_id"""
    return db_session.scalar(
        select(DiscordChannelConfig).where(
            DiscordChannelConfig.guild_config_id == guild_config_id,
            DiscordChannelConfig.id == channel_config_id,
        )
    )


def update_discord_channel_config(
    db_session: Session,
    config: DiscordChannelConfig,
    channel_name: str,
    thread_only_mode: bool,
    require_bot_invocation: bool,
    enabled: bool,
    persona_override_id: int | None = None,
) -> DiscordChannelConfig:
    """Update channel config fields."""
    config.channel_name = channel_name
    config.require_bot_invocation = require_bot_invocation
    config.persona_override_id = persona_override_id
    config.enabled = enabled
    config.thread_only_mode = thread_only_mode
    db_session.flush()
    return config


def delete_discord_channel_config(
    db_session: Session,
    guild_config_id: int,
    channel_config_id: int,
) -> bool:
    """Delete a channel config. Returns True if deleted."""
    result = db_session.execute(
        delete(DiscordChannelConfig).where(
            DiscordChannelConfig.guild_config_id == guild_config_id,
            DiscordChannelConfig.id == channel_config_id,
        )
    )
    db_session.flush()
    return result.rowcount > 0  # type: ignore[attr-defined]


def create_channel_config(
    db_session: Session,
    guild_config_id: int,
    channel_view: DiscordChannelView,
) -> DiscordChannelConfig:
    """Create a new channel config with default settings (disabled by default, admin enables via UI)."""
    config = DiscordChannelConfig(
        guild_config_id=guild_config_id,
        channel_id=channel_view.channel_id,
        channel_name=channel_view.channel_name,
        channel_type=channel_view.channel_type,
        is_private=channel_view.is_private,
    )
    db_session.add(config)
    db_session.flush()
    return config


def bulk_create_channel_configs(
    db_session: Session,
    guild_config_id: int,
    channels: list[DiscordChannelView],
) -> list[DiscordChannelConfig]:
    """Create multiple channel configs at once. Skips existing channels."""
    # Get existing channel IDs for this guild
    existing_channel_ids = set(
        db_session.scalars(
            select(DiscordChannelConfig.channel_id).where(
                DiscordChannelConfig.guild_config_id == guild_config_id
            )
        ).all()
    )

    # Create configs for new channels only
    new_configs = []
    for channel_view in channels:
        if channel_view.channel_id not in existing_channel_ids:
            config = DiscordChannelConfig(
                guild_config_id=guild_config_id,
                channel_id=channel_view.channel_id,
                channel_name=channel_view.channel_name,
                channel_type=channel_view.channel_type,
                is_private=channel_view.is_private,
            )
            db_session.add(config)
            new_configs.append(config)

    db_session.flush()
    return new_configs


def sync_channel_configs(
    db_session: Session,
    guild_config_id: int,
    current_channels: list[DiscordChannelView],
) -> tuple[int, int, int]:
    """Sync channel configs with current Discord channels.

    - Creates configs for new channels (disabled by default)
    - Removes configs for deleted channels
    - Updates names and types for existing channels if changed

    Returns: (added_count, removed_count, updated_count)
    """
    current_channel_map = {
        channel_view.channel_id: channel_view for channel_view in current_channels
    }
    current_channel_ids = set(current_channel_map.keys())

    # Get existing configs
    existing_configs = get_channel_configs(db_session, guild_config_id)
    existing_channel_ids = {c.channel_id for c in existing_configs}

    # Find channels to add, remove, and potentially update
    to_add = current_channel_ids - existing_channel_ids
    to_remove = existing_channel_ids - current_channel_ids

    # Add new channels
    added_count = 0
    for channel_id in to_add:
        channel_view = current_channel_map[channel_id]
        create_channel_config(db_session, guild_config_id, channel_view)
        added_count += 1

    # Remove deleted channels
    removed_count = 0
    for config in existing_configs:
        if config.channel_id in to_remove:
            db_session.delete(config)
            removed_count += 1

    # Update names, types, and privacy for existing channels if changed
    updated_count = 0
    for config in existing_configs:
        if config.channel_id in current_channel_ids:
            channel_view = current_channel_map[config.channel_id]
            changed = False
            if config.channel_name != channel_view.channel_name:
                config.channel_name = channel_view.channel_name
                changed = True
            if config.channel_type != channel_view.channel_type:
                config.channel_type = channel_view.channel_type
                changed = True
            if config.is_private != channel_view.is_private:
                config.is_private = channel_view.is_private
                changed = True
            if changed:
                updated_count += 1

    db_session.flush()
    return added_count, removed_count, updated_count


================================================
FILE: backend/onyx/db/document.py
================================================
import contextlib
import time
from collections.abc import Generator
from collections.abc import Iterable
from collections.abc import Sequence
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from uuid import UUID

from sqlalchemy import and_
from sqlalchemy import delete
from sqlalchemy import exists
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import tuple_
from sqlalchemy import update
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.engine.util import TransactionalContext
from sqlalchemy.exc import OperationalError
from sqlalchemy.orm import Session
from sqlalchemy.sql.expression import null

from onyx.configs.constants import DEFAULT_BOOST
from onyx.configs.constants import DocumentSource
from onyx.configs.kg_configs import KG_SIMPLE_ANSWER_MAX_DISPLAYED_SOURCES
from onyx.db.chunk import delete_chunk_stats_by_connector_credential_pair__no_commit
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.document_access import apply_document_access_filter
from onyx.db.entities import delete_from_kg_entities__no_commit
from onyx.db.entities import delete_from_kg_entities_extraction_staging__no_commit
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.feedback import delete_document_feedback_for_documents__no_commit
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import Document as DbDocument
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import KGEntity
from onyx.db.models import KGRelationship
from onyx.db.models import User
from onyx.db.relationships import delete_from_kg_relationships__no_commit
from onyx.db.relationships import (
    delete_from_kg_relationships_extraction_staging__no_commit,
)
from onyx.db.tag import delete_document_tags_for_documents__no_commit
from onyx.db.utils import DocumentRow
from onyx.db.utils import model_to_dict
from onyx.db.utils import SortOrder
from onyx.document_index.interfaces import DocumentMetadata
from onyx.kg.models import KGStage
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
from onyx.utils.logger import setup_logger

logger = setup_logger()

ONE_HOUR_IN_SECONDS = 60 * 60


def check_docs_exist(db_session: Session) -> bool:
    stmt = select(exists(DbDocument))
    result = db_session.execute(stmt)
    return result.scalar() or False


def count_documents_by_needs_sync(session: Session) -> int:
    """Get the count of all documents where:
    1. last_modified is newer than last_synced
    2. last_synced is null (meaning we've never synced)
    AND the document has a relationship with a connector/credential pair

    TODO: The documents without a relationship with a connector/credential pair
    should be cleaned up somehow eventually.

    This function executes the query and returns the count of
    documents matching the criteria."""

    return (
        session.query(DbDocument.id)
        .filter(
            or_(
                DbDocument.last_modified > DbDocument.last_synced,
                DbDocument.last_synced.is_(None),
            )
        )
        .count()
    )


def construct_document_id_select_by_needs_sync() -> Select:
    """Get all document IDs that need syncing across all connector credential pairs.

    Returns a Select statement for documents where:
    1. last_modified is newer than last_synced
    2. last_synced is null (meaning we've never synced)
    AND the document has a relationship with a connector/credential pair
    """
    return select(DbDocument.id).where(
        or_(
            DbDocument.last_modified > DbDocument.last_synced,
            DbDocument.last_synced.is_(None),
        )
    )


def construct_document_id_select_for_connector_credential_pair(
    connector_id: int, credential_id: int | None = None
) -> Select:
    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
        and_(
            DocumentByConnectorCredentialPair.connector_id == connector_id,
            DocumentByConnectorCredentialPair.credential_id == credential_id,
        )
    )
    stmt = (
        select(DbDocument.id).where(DbDocument.id.in_(initial_doc_ids_stmt)).distinct()
    )
    return stmt


def construct_document_select_for_connector_credential_pair(
    connector_id: int, credential_id: int | None = None
) -> Select:
    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
        and_(
            DocumentByConnectorCredentialPair.connector_id == connector_id,
            DocumentByConnectorCredentialPair.credential_id == credential_id,
        )
    )
    stmt = select(DbDocument).where(DbDocument.id.in_(initial_doc_ids_stmt)).distinct()
    return stmt


def get_documents_for_cc_pair(
    db_session: Session,
    cc_pair_id: int,
) -> list[DbDocument]:
    cc_pair = get_connector_credential_pair_from_id(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
    )
    if not cc_pair:
        raise ValueError(f"No CC pair found with ID: {cc_pair_id}")
    stmt = construct_document_select_for_connector_credential_pair(
        connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id
    )
    return list(db_session.scalars(stmt).all())


def get_document_ids_for_connector_credential_pair(
    db_session: Session, connector_id: int, credential_id: int
) -> list[str]:
    doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
        and_(
            DocumentByConnectorCredentialPair.connector_id == connector_id,
            DocumentByConnectorCredentialPair.credential_id == credential_id,
        )
    )
    return list(db_session.execute(doc_ids_stmt).scalars().all())


def get_documents_for_connector_credential_pair_limited_columns(
    db_session: Session,
    connector_id: int,
    credential_id: int,
    sort_order: SortOrder | None = None,
) -> Sequence[DocumentRow]:

    doc_ids_subquery = select(DocumentByConnectorCredentialPair.id).where(
        and_(
            DocumentByConnectorCredentialPair.connector_id == connector_id,
            DocumentByConnectorCredentialPair.credential_id == credential_id,
        )
    )
    doc_ids_subquery = doc_ids_subquery.join(
        DbDocument, DocumentByConnectorCredentialPair.id == DbDocument.id
    )

    stmt = select(
        DbDocument.id, DbDocument.doc_metadata, DbDocument.external_user_group_ids
    )

    stmt = stmt.where(DbDocument.id.in_(doc_ids_subquery))

    if sort_order == SortOrder.ASC:
        stmt = stmt.order_by(DbDocument.last_modified.asc())
    elif sort_order == SortOrder.DESC:
        stmt = stmt.order_by(DbDocument.last_modified.desc())

    rows = db_session.execute(stmt).mappings().all()

    doc_rows: list[DocumentRow] = []
    for row in rows:
        doc_row = DocumentRow(
            id=row.id,
            doc_metadata=row.doc_metadata,
            external_user_group_ids=row.external_user_group_ids or [],
        )
        doc_rows.append(doc_row)
    return doc_rows


def get_documents_for_connector_credential_pair(
    db_session: Session, connector_id: int, credential_id: int, limit: int | None = None
) -> Sequence[DbDocument]:
    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
        and_(
            DocumentByConnectorCredentialPair.connector_id == connector_id,
            DocumentByConnectorCredentialPair.credential_id == credential_id,
        )
    )
    stmt = select(DbDocument).where(DbDocument.id.in_(initial_doc_ids_stmt)).distinct()
    if limit:
        stmt = stmt.limit(limit)
    return db_session.scalars(stmt).all()


def get_documents_by_ids(
    db_session: Session,
    document_ids: list[str],
) -> list[DbDocument]:
    stmt = select(DbDocument).where(DbDocument.id.in_(document_ids))
    documents = db_session.execute(stmt).scalars().all()
    return list(documents)


def get_documents_by_source(
    db_session: Session,
    source: DocumentSource,
    creator_id: UUID | None = None,
) -> list[DbDocument]:
    """Get all documents associated with a specific source type.

    This queries through the connector relationship to find all documents
    that were indexed by connectors of the given source type.

    Args:
        db_session: Database session
        source: The document source type to filter by
        creator_id: If provided, only return documents from connectors
                    created by this user. Filters via ConnectorCredentialPair.
    """
    stmt = (
        select(DbDocument)
        .join(
            DocumentByConnectorCredentialPair,
            DbDocument.id == DocumentByConnectorCredentialPair.id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .join(
            Connector,
            ConnectorCredentialPair.connector_id == Connector.id,
        )
        .where(Connector.source == source)
    )
    if creator_id is not None:
        stmt = stmt.where(ConnectorCredentialPair.creator_id == creator_id)
    stmt = stmt.distinct()
    documents = db_session.execute(stmt).scalars().all()
    return list(documents)


def _apply_last_updated_cursor_filter(
    stmt: Select,
    cursor_last_modified: datetime | None,
    cursor_last_synced: datetime | None,
    cursor_document_id: str | None,
    is_ascending: bool,
) -> Select:
    """Apply cursor filter for last_updated sorting.

    ASC uses nulls_first (NULLs at start), DESC uses nulls_last (NULLs at end).
    This affects which extra clauses are needed when the cursor has NULL last_synced
    vs non-NULL last_synced.
    """
    if not cursor_last_modified or not cursor_document_id:
        return stmt

    # Pick comparison operators based on sort direction
    if is_ascending:
        modified_cmp = DbDocument.last_modified > cursor_last_modified
        synced_cmp = DbDocument.last_synced > cursor_last_synced
        id_cmp = DbDocument.id > cursor_document_id
    else:
        modified_cmp = DbDocument.last_modified < cursor_last_modified
        synced_cmp = DbDocument.last_synced < cursor_last_synced
        id_cmp = DbDocument.id < cursor_document_id

    if cursor_last_synced is None:
        # Cursor has NULL last_synced
        # ASC (nulls_first): NULL is at start, so non-NULL values come after
        # DESC (nulls_last): NULL is at end, so nothing with non-NULL comes after
        base_clauses = [
            modified_cmp,
            and_(
                DbDocument.last_modified == cursor_last_modified,
                DbDocument.last_synced.is_(None),
                id_cmp,
            ),
        ]
        if is_ascending:
            # Any non-NULL last_synced comes after NULL when nulls_first
            base_clauses.append(
                and_(
                    DbDocument.last_modified == cursor_last_modified,
                    DbDocument.last_synced.is_not(None),
                )
            )
        return stmt.where(or_(*base_clauses))

    # Cursor has non-NULL last_synced
    # ASC (nulls_first): NULLs came before, so no NULL clause needed
    # DESC (nulls_last): NULLs come after non-NULL values
    synced_clauses = [
        synced_cmp,
        and_(DbDocument.last_synced == cursor_last_synced, id_cmp),
    ]
    if not is_ascending:
        # NULLs come after all non-NULL values when nulls_last
        synced_clauses.append(DbDocument.last_synced.is_(None))

    return stmt.where(
        or_(
            modified_cmp,
            and_(
                DbDocument.last_modified == cursor_last_modified,
                or_(*synced_clauses),
            ),
        )
    )


def _apply_name_cursor_filter_asc(
    stmt: Select,
    cursor_name: str | None,
    cursor_document_id: str | None,
) -> Select:
    """Apply cursor filter for name ASC sorting."""
    if not cursor_name or not cursor_document_id:
        return stmt
    return stmt.where(
        or_(
            DbDocument.semantic_id > cursor_name,
            and_(
                DbDocument.semantic_id == cursor_name,
                DbDocument.id > cursor_document_id,
            ),
        )
    )


def _apply_name_cursor_filter_desc(
    stmt: Select,
    cursor_name: str | None,
    cursor_document_id: str | None,
) -> Select:
    """Apply cursor filter for name DESC sorting."""
    if not cursor_name or not cursor_document_id:
        return stmt
    return stmt.where(
        or_(
            DbDocument.semantic_id < cursor_name,
            and_(
                DbDocument.semantic_id == cursor_name,
                DbDocument.id < cursor_document_id,
            ),
        )
    )


def get_accessible_documents_for_hierarchy_node_paginated(
    db_session: Session,
    parent_hierarchy_node_id: int,
    user_email: str | None,
    external_group_ids: list[str],
    limit: int,
    # Sort options
    sort_by_name: bool = False,
    sort_ascending: bool = False,
    # Cursor fields for last_updated sorting
    cursor_last_modified: datetime | None = None,
    cursor_last_synced: datetime | None = None,
    # Cursor field for name sorting
    cursor_name: str | None = None,
    # Document ID for tie-breaking (used by both sort types)
    cursor_document_id: str | None = None,
) -> list[DbDocument]:
    stmt = select(DbDocument).where(
        DbDocument.parent_hierarchy_node_id == parent_hierarchy_node_id
    )
    stmt = apply_document_access_filter(stmt, user_email, external_group_ids)

    # Apply cursor filter based on sort type and direction
    if sort_by_name:
        if sort_ascending:
            stmt = _apply_name_cursor_filter_asc(stmt, cursor_name, cursor_document_id)
            stmt = stmt.order_by(DbDocument.semantic_id.asc(), DbDocument.id.asc())
        else:
            stmt = _apply_name_cursor_filter_desc(stmt, cursor_name, cursor_document_id)
            stmt = stmt.order_by(DbDocument.semantic_id.desc(), DbDocument.id.desc())
    else:
        # Sort by last_updated
        if sort_ascending:
            stmt = _apply_last_updated_cursor_filter(
                stmt,
                cursor_last_modified,
                cursor_last_synced,
                cursor_document_id,
                is_ascending=True,
            )
            stmt = stmt.order_by(
                DbDocument.last_modified.asc(),
                DbDocument.last_synced.asc().nulls_first(),
                DbDocument.id.asc(),
            )
        else:
            stmt = _apply_last_updated_cursor_filter(
                stmt,
                cursor_last_modified,
                cursor_last_synced,
                cursor_document_id,
                is_ascending=False,
            )
            stmt = stmt.order_by(
                DbDocument.last_modified.desc(),
                DbDocument.last_synced.desc().nulls_last(),
                DbDocument.id.desc(),
            )

    # Use distinct to avoid duplicates when a document belongs to multiple cc_pairs
    stmt = stmt.distinct()
    stmt = stmt.limit(limit)
    return list(db_session.execute(stmt).scalars().all())


def filter_existing_document_ids(
    db_session: Session,
    document_ids: list[str],
) -> set[str]:
    """Filter a list of document IDs to only those that exist in the database.

    Args:
        db_session: Database session
        document_ids: List of document IDs to check for existence

    Returns:
        Set of document IDs from the input list that exist in the database
    """
    if not document_ids:
        return set()
    stmt = select(DbDocument.id).where(DbDocument.id.in_(document_ids))
    return set(db_session.execute(stmt).scalars().all())


def fetch_document_ids_by_links(
    db_session: Session,
    links: list[str],
) -> dict[str, str]:
    """Fetch document IDs for documents whose link matches any of the provided values."""
    if not links:
        return {}

    stmt = select(DbDocument.link, DbDocument.id).where(DbDocument.link.in_(links))
    rows = db_session.execute(stmt).all()
    return {link: doc_id for link, doc_id in rows if link}


def get_document_connector_count(
    db_session: Session,
    document_id: str,
) -> int:
    results = get_document_connector_counts(db_session, [document_id])
    if not results or len(results) == 0:
        return 0

    return results[0][1]


def get_document_connector_counts(
    db_session: Session,
    document_ids: list[str],
) -> Sequence[tuple[str, int]]:
    stmt = (
        select(
            DocumentByConnectorCredentialPair.id,
            func.count(),
        )
        .where(DocumentByConnectorCredentialPair.id.in_(document_ids))
        .group_by(DocumentByConnectorCredentialPair.id)
    )
    return db_session.execute(stmt).all()  # type: ignore


def get_document_counts_for_cc_pairs(
    db_session: Session, cc_pairs: list[ConnectorCredentialPairIdentifier]
) -> Sequence[tuple[int, int, int]]:
    """Returns a sequence of tuples of (connector_id, credential_id, document count)"""

    if not cc_pairs:
        return []

    # Prepare a list of (connector_id, credential_id) tuples
    cc_ids = [(x.connector_id, x.credential_id) for x in cc_pairs]

    # Batch to avoid generating extremely large IN clauses that can blow Postgres stack depth
    batch_size = 1000
    aggregated_counts: dict[tuple[int, int], int] = {}

    for start_idx in range(0, len(cc_ids), batch_size):
        batch = cc_ids[start_idx : start_idx + batch_size]

        stmt = (
            select(
                DocumentByConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id,
                func.count(),
            )
            .where(
                and_(
                    tuple_(
                        DocumentByConnectorCredentialPair.connector_id,
                        DocumentByConnectorCredentialPair.credential_id,
                    ).in_(batch),
                    DocumentByConnectorCredentialPair.has_been_indexed.is_(True),
                )
            )
            .group_by(
                DocumentByConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id,
            )
        )

        for connector_id, credential_id, cnt in db_session.execute(stmt).all():
            aggregated_counts[(connector_id, credential_id)] = cnt

    # Convert aggregated results back to the expected sequence of tuples
    return [
        (connector_id, credential_id, cnt)
        for (connector_id, credential_id), cnt in aggregated_counts.items()
    ]


def get_document_counts_for_all_cc_pairs(
    db_session: Session,
) -> Sequence[tuple[int, int, int]]:
    """Return (connector_id, credential_id, count) for ALL CC pairs with indexed docs.

    Executes a single grouped query so Postgres can fully leverage indexes,
    avoiding large batched IN-lists.
    """
    stmt = (
        select(
            DocumentByConnectorCredentialPair.connector_id,
            DocumentByConnectorCredentialPair.credential_id,
            func.count(),
        )
        .where(DocumentByConnectorCredentialPair.has_been_indexed.is_(True))
        .group_by(
            DocumentByConnectorCredentialPair.connector_id,
            DocumentByConnectorCredentialPair.credential_id,
        )
    )
    return db_session.execute(stmt).all()  # type: ignore


def get_access_info_for_document(
    db_session: Session,
    document_id: str,
) -> tuple[str, list[str | None], bool] | None:
    """Gets access info for a single document by calling the get_access_info_for_documents function
    and passing a list with a single document ID.
    Args:
        db_session (Session): The database session to use.
        document_id (str): The document ID to fetch access info for.
    Returns:
        Optional[Tuple[str, List[str | None], bool]]: A tuple containing the document ID, a list of user emails,
        and a boolean indicating if the document is globally public, or None if no results are found.
    """
    results = get_access_info_for_documents(db_session, [document_id])
    if not results:
        return None

    return results[0]


def get_access_info_for_documents(
    db_session: Session,
    document_ids: list[str],
) -> Sequence[tuple[str, list[str | None], bool]]:
    """Gets back all relevant access info for the given documents. This includes
    the user_ids for cc pairs that the document is associated with + whether any
    of the associated cc pairs are intending to make the document globally public.
    Returns the list where each element contains:
    - Document ID (which is also the ID of the DocumentByConnectorCredentialPair)
    - List of emails of Onyx users with direct access to the doc (includes a "None" element if
      the connector was set up by an admin when auth was off
    - bool for whether the document is public (the document later can also be marked public by
      automatic permission sync step)
    """
    stmt = select(
        DocumentByConnectorCredentialPair.id,
        func.array_agg(func.coalesce(User.email, null())).label("user_emails"),
        func.bool_or(ConnectorCredentialPair.access_type == AccessType.PUBLIC).label(
            "public_doc"
        ),
    ).where(DocumentByConnectorCredentialPair.id.in_(document_ids))

    stmt = (
        stmt.join(
            Credential,
            DocumentByConnectorCredentialPair.credential_id == Credential.id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .outerjoin(
            User,
            and_(
                Credential.user_id == User.id,
                ConnectorCredentialPair.access_type != AccessType.SYNC,
            ),
        )
        # don't include CC pairs that are being deleted
        # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them
        .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING)
        .group_by(DocumentByConnectorCredentialPair.id)
    )
    return db_session.execute(stmt).all()  # type: ignore


def upsert_documents(
    db_session: Session,
    document_metadata_batch: list[DocumentMetadata],
    initial_boost: int = DEFAULT_BOOST,
) -> None:
    """NOTE: this function is Postgres specific. Not all DBs support the ON CONFLICT clause.
    Also note, this function should not be used for updating documents, only creating and
    ensuring that it exists. It IGNORES the doc_updated_at field"""
    seen_documents: dict[str, DocumentMetadata] = {}
    for document_metadata in document_metadata_batch:
        doc_id = document_metadata.document_id
        if doc_id not in seen_documents:
            seen_documents[doc_id] = document_metadata

    if not seen_documents:
        logger.info("No documents to upsert. Skipping.")
        return

    includes_permissions = any(doc.external_access for doc in seen_documents.values())

    insert_stmt = insert(DbDocument).values(
        [
            model_to_dict(
                DbDocument(
                    id=doc.document_id,
                    from_ingestion_api=doc.from_ingestion_api,
                    boost=initial_boost,
                    hidden=False,
                    semantic_id=doc.semantic_identifier,
                    link=doc.first_link,
                    doc_updated_at=None,  # this is intentional
                    last_modified=datetime.now(timezone.utc),
                    primary_owners=doc.primary_owners,
                    secondary_owners=doc.secondary_owners,
                    kg_stage=KGStage.NOT_STARTED,
                    parent_hierarchy_node_id=doc.parent_hierarchy_node_id,
                    **(
                        {
                            "external_user_emails": list(
                                doc.external_access.external_user_emails
                            ),
                            "external_user_group_ids": list(
                                doc.external_access.external_user_group_ids
                            ),
                            "is_public": doc.external_access.is_public,
                        }
                        if doc.external_access
                        else {}
                    ),
                    doc_metadata=doc.doc_metadata,
                )
            )
            for doc in seen_documents.values()
        ]
    )

    update_set = {
        "from_ingestion_api": insert_stmt.excluded.from_ingestion_api,
        "boost": insert_stmt.excluded.boost,
        "hidden": insert_stmt.excluded.hidden,
        "semantic_id": insert_stmt.excluded.semantic_id,
        "link": insert_stmt.excluded.link,
        "primary_owners": insert_stmt.excluded.primary_owners,
        "secondary_owners": insert_stmt.excluded.secondary_owners,
        "doc_metadata": insert_stmt.excluded.doc_metadata,
        "parent_hierarchy_node_id": insert_stmt.excluded.parent_hierarchy_node_id,
    }
    if includes_permissions:
        # Use COALESCE to preserve existing permissions when new values are NULL.
        # This prevents subsequent indexing runs (which don't fetch permissions)
        # from overwriting permissions set by permission sync jobs.
        update_set.update(
            {
                "external_user_emails": func.coalesce(
                    insert_stmt.excluded.external_user_emails,
                    DbDocument.external_user_emails,
                ),
                "external_user_group_ids": func.coalesce(
                    insert_stmt.excluded.external_user_group_ids,
                    DbDocument.external_user_group_ids,
                ),
                "is_public": func.coalesce(
                    insert_stmt.excluded.is_public,
                    DbDocument.is_public,
                ),
            }
        )
    on_conflict_stmt = insert_stmt.on_conflict_do_update(
        index_elements=["id"],
        set_=update_set,  # Conflict target
    )
    db_session.execute(on_conflict_stmt)
    db_session.commit()


def upsert_document_by_connector_credential_pair(
    db_session: Session, connector_id: int, credential_id: int, document_ids: list[str]
) -> None:
    """NOTE: this function is Postgres specific. Not all DBs support the ON CONFLICT clause."""
    if not document_ids:
        logger.info("`document_ids` is empty. Skipping.")
        return

    insert_stmt = insert(DocumentByConnectorCredentialPair).values(
        [
            model_to_dict(
                DocumentByConnectorCredentialPair(
                    id=doc_id,
                    connector_id=connector_id,
                    credential_id=credential_id,
                    has_been_indexed=False,
                )
            )
            for doc_id in document_ids
        ]
    )
    # this must be `on_conflict_do_nothing` rather than `on_conflict_do_update`
    # since we don't want to update the `has_been_indexed` field for documents
    # that already exist
    on_conflict_stmt = insert_stmt.on_conflict_do_nothing()
    db_session.execute(on_conflict_stmt)
    db_session.commit()


def mark_document_as_indexed_for_cc_pair__no_commit(
    db_session: Session,
    connector_id: int,
    credential_id: int,
    document_ids: Iterable[str],
) -> None:
    """Should be called only after a successful index operation for a batch."""
    db_session.execute(
        update(DocumentByConnectorCredentialPair)
        .where(
            and_(
                DocumentByConnectorCredentialPair.connector_id == connector_id,
                DocumentByConnectorCredentialPair.credential_id == credential_id,
                DocumentByConnectorCredentialPair.id.in_(document_ids),
            )
        )
        .values(has_been_indexed=True)
    )


def update_docs_updated_at__no_commit(
    ids_to_new_updated_at: dict[str, datetime],
    db_session: Session,
) -> None:
    doc_ids = list(ids_to_new_updated_at.keys())
    documents_to_update = (
        db_session.query(DbDocument).filter(DbDocument.id.in_(doc_ids)).all()
    )

    for document in documents_to_update:
        document.doc_updated_at = ids_to_new_updated_at[document.id]


def update_docs_last_modified__no_commit(
    document_ids: list[str],
    db_session: Session,
) -> None:
    documents_to_update = (
        db_session.query(DbDocument).filter(DbDocument.id.in_(document_ids)).all()
    )

    now = datetime.now(timezone.utc)
    for doc in documents_to_update:
        doc.last_modified = now


def update_docs_chunk_count__no_commit(
    document_ids: list[str],
    doc_id_to_chunk_count: dict[str, int],
    db_session: Session,
) -> None:
    documents_to_update = (
        db_session.query(DbDocument).filter(DbDocument.id.in_(document_ids)).all()
    )
    for doc in documents_to_update:
        doc.chunk_count = doc_id_to_chunk_count[doc.id]


def mark_document_as_modified(
    document_id: str,
    db_session: Session,
) -> None:
    stmt = select(DbDocument).where(DbDocument.id == document_id)
    doc = db_session.scalar(stmt)
    if doc is None:
        raise ValueError(f"No document with ID: {document_id}")

    # update last_synced
    doc.last_modified = datetime.now(timezone.utc)
    db_session.commit()


def mark_document_as_synced(document_id: str, db_session: Session) -> None:
    stmt = select(DbDocument).where(DbDocument.id == document_id)
    doc = db_session.scalar(stmt)
    if doc is None:
        raise ValueError(f"No document with ID: {document_id}")

    # update last_synced
    doc.last_synced = datetime.now(timezone.utc)
    db_session.commit()


def delete_document_by_connector_credential_pair__no_commit(
    db_session: Session,
    document_id: str,
    connector_credential_pair_identifier: (
        ConnectorCredentialPairIdentifier | None
    ) = None,
) -> None:
    """Deletes a single document by cc pair relationship entry.
    Foreign key rows are left in place.
    The implicit assumption is that the document itself still has other cc_pair
    references and needs to continue existing.
    """
    delete_documents_by_connector_credential_pair__no_commit(
        db_session=db_session,
        document_ids=[document_id],
        connector_credential_pair_identifier=connector_credential_pair_identifier,
    )


def delete_documents_by_connector_credential_pair__no_commit(
    db_session: Session,
    document_ids: list[str],
    connector_credential_pair_identifier: (
        ConnectorCredentialPairIdentifier | None
    ) = None,
) -> None:
    """This deletes just the document by cc pair entries for a particular cc pair.
    Foreign key rows are left in place.
    The implicit assumption is that the document itself still has other cc_pair
    references and needs to continue existing.
    """
    stmt = delete(DocumentByConnectorCredentialPair).where(
        DocumentByConnectorCredentialPair.id.in_(document_ids)
    )
    if connector_credential_pair_identifier:
        stmt = stmt.where(
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == connector_credential_pair_identifier.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == connector_credential_pair_identifier.credential_id,
            )
        )
    db_session.execute(stmt)


def delete_all_documents_by_connector_credential_pair__no_commit(
    db_session: Session,
    connector_id: int,
    credential_id: int,
) -> None:
    """Deletes all document by connector credential pair entries for a specific connector and credential.
    This is primarily used during connector deletion to ensure all references are removed
    before deleting the connector itself. This is crucial because connector_id is part of the
    primary key in DocumentByConnectorCredentialPair, and attempting to delete the Connector
    would otherwise try to set the foreign key to NULL, which fails for primary keys.

    NOTE: Does not commit the transaction, this must be done by the caller.
    """
    stmt = delete(DocumentByConnectorCredentialPair).where(
        and_(
            DocumentByConnectorCredentialPair.connector_id == connector_id,
            DocumentByConnectorCredentialPair.credential_id == credential_id,
        )
    )
    db_session.execute(stmt)


def delete_documents__no_commit(db_session: Session, document_ids: list[str]) -> None:
    db_session.execute(delete(DbDocument).where(DbDocument.id.in_(document_ids)))


def delete_documents_complete__no_commit(
    db_session: Session, document_ids: list[str]
) -> None:
    """This completely deletes the documents from the db, including all foreign key relationships"""

    # Start with the kg references

    delete_from_kg_relationships__no_commit(
        db_session=db_session,
        document_ids=document_ids,
    )

    delete_from_kg_entities__no_commit(
        db_session=db_session,
        document_ids=document_ids,
    )

    delete_from_kg_relationships_extraction_staging__no_commit(
        db_session=db_session,
        document_ids=document_ids,
    )

    delete_from_kg_entities_extraction_staging__no_commit(
        db_session=db_session,
        document_ids=document_ids,
    )

    # Continue with deleting the chunk stats for the documents
    delete_chunk_stats_by_connector_credential_pair__no_commit(
        db_session=db_session,
        document_ids=document_ids,
    )

    delete_documents_by_connector_credential_pair__no_commit(db_session, document_ids)
    delete_document_feedback_for_documents__no_commit(
        document_ids=document_ids, db_session=db_session
    )
    delete_document_tags_for_documents__no_commit(
        document_ids=document_ids, db_session=db_session
    )
    delete_documents__no_commit(db_session, document_ids)


def delete_all_documents_for_connector_credential_pair(
    db_session: Session,
    connector_id: int,
    credential_id: int,
    timeout: int = ONE_HOUR_IN_SECONDS,
) -> None:
    """Delete all documents for a given connector credential pair.
    This will delete all documents and their associated data (chunks, feedback, tags, etc.)

    NOTE: a bit inefficient, but it's not a big deal since this is done rarely - only during
    an index swap. If we wanted to make this more efficient, we could use a single delete
    statement + cascade.
    """
    batch_size = 1000
    start_time = time.monotonic()

    while True:
        # Get document IDs in batches
        stmt = (
            select(DocumentByConnectorCredentialPair.id)
            .where(
                DocumentByConnectorCredentialPair.connector_id == connector_id,
                DocumentByConnectorCredentialPair.credential_id == credential_id,
            )
            .limit(batch_size)
        )
        document_ids = db_session.scalars(stmt).all()

        if not document_ids:
            break

        delete_documents_complete__no_commit(
            db_session=db_session, document_ids=list(document_ids)
        )
        db_session.commit()

        if time.monotonic() - start_time > timeout:
            raise RuntimeError("Timeout reached while deleting documents")


def acquire_document_locks(db_session: Session, document_ids: list[str]) -> bool:
    """Acquire locks for the specified documents. Ideally this shouldn't be
    called with large list of document_ids (an exception could be made if the
    length of holding the lock is very short).

    Will simply raise an exception if any of the documents are already locked.
    This prevents deadlocks (assuming that the caller passes in all required
    document IDs in a single call).
    """
    stmt = (
        select(DbDocument.id)
        .where(DbDocument.id.in_(document_ids))
        .with_for_update(nowait=True)
    )
    # will raise exception if any of the documents are already locked
    documents = db_session.scalars(stmt).all()

    # make sure we found every document
    if len(documents) != len(set(document_ids)):
        logger.warning("Didn't find row for all specified document IDs. Aborting.")
        return False

    return True


_NUM_LOCK_ATTEMPTS = 10
_LOCK_RETRY_DELAY = 10


@contextlib.contextmanager
def prepare_to_modify_documents(
    db_session: Session, document_ids: list[str], retry_delay: int = _LOCK_RETRY_DELAY
) -> Generator[TransactionalContext, None, None]:
    """Try and acquire locks for the documents to prevent other jobs from
    modifying them at the same time (e.g. avoid race conditions). This should be
    called ahead of any modification to Vespa. Locks should be released by the
    caller as soon as updates are complete by finishing the transaction.

    NOTE: only one commit is allowed within the context manager returned by this function.
    Multiple commits will result in a sqlalchemy.exc.InvalidRequestError.
    NOTE: this function will commit any existing transaction.
    """

    db_session.commit()  # ensure that we're not in a transaction

    lock_acquired = False
    for i in range(_NUM_LOCK_ATTEMPTS):
        try:
            with db_session.begin() as transaction:
                lock_acquired = acquire_document_locks(
                    db_session=db_session, document_ids=document_ids
                )
                if lock_acquired:
                    yield transaction
                    break
        except OperationalError as e:
            logger.warning(
                f"Failed to acquire locks for documents on attempt {i}, retrying. Error: {e}"
            )

        time.sleep(retry_delay)

    if not lock_acquired:
        raise RuntimeError(
            f"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts for documents: {document_ids}"
        )


def get_ingestion_documents(
    db_session: Session,
) -> list[DbDocument]:
    # TODO add the option to filter by DocumentSource
    stmt = select(DbDocument).where(DbDocument.from_ingestion_api.is_(True))
    documents = db_session.execute(stmt).scalars().all()
    return list(documents)


def get_documents_by_cc_pair(
    cc_pair_id: int,
    db_session: Session,
) -> list[DbDocument]:
    return (
        db_session.query(DbDocument)
        .join(
            DocumentByConnectorCredentialPair,
            DbDocument.id == DocumentByConnectorCredentialPair.id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .filter(ConnectorCredentialPair.id == cc_pair_id)
        .all()
    )


def get_document(
    document_id: str,
    db_session: Session,
) -> DbDocument | None:
    stmt = select(DbDocument).where(DbDocument.id == document_id)
    doc: DbDocument | None = db_session.execute(stmt).scalar_one_or_none()
    return doc


def get_cc_pairs_for_document(
    db_session: Session,
    document_id: str,
) -> list[ConnectorCredentialPair]:
    stmt = (
        select(ConnectorCredentialPair)
        .join(
            DocumentByConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .where(DocumentByConnectorCredentialPair.id == document_id)
    )
    return list(db_session.execute(stmt).scalars().all())


def get_document_sources(
    db_session: Session,
    document_ids: list[str],
) -> dict[str, DocumentSource]:
    """Gets the sources for a list of document IDs.
    Returns a dictionary mapping document ID to its source.
    If a document has multiple sources (multiple CC pairs), returns the first one found.
    """
    stmt = (
        select(
            DocumentByConnectorCredentialPair.id,
            Connector.source,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .join(
            Connector,
            ConnectorCredentialPair.connector_id == Connector.id,
        )
        .where(DocumentByConnectorCredentialPair.id.in_(document_ids))
        .distinct()
    )

    results = db_session.execute(stmt).all()
    return {doc_id: source for doc_id, source in results}


def fetch_chunk_counts_for_documents(
    document_ids: list[str],
    db_session: Session,
) -> list[tuple[str, int]]:
    """
    Return a list of (document_id, chunk_count) tuples.
    If a document_id is not found in the database, it will be returned with a chunk_count of 0.
    """
    stmt = select(DbDocument.id, DbDocument.chunk_count).where(
        DbDocument.id.in_(document_ids)
    )

    results = db_session.execute(stmt).all()

    # Create a dictionary of document_id to chunk_count
    chunk_counts = {str(row.id): row.chunk_count or 0 for row in results}

    # Return a list of tuples, preserving `None` for documents not found or with
    # an unknown chunk count. Callers should handle the `None` case and fall
    # back to an existence check against the vector DB if necessary.
    return [(doc_id, chunk_counts.get(doc_id, 0)) for doc_id in document_ids]


def fetch_chunk_count_for_document(
    document_id: str,
    db_session: Session,
) -> int | None:
    stmt = select(DbDocument.chunk_count).where(DbDocument.id == document_id)
    return db_session.execute(stmt).scalar_one_or_none()


def get_unprocessed_kg_document_batch_for_connector(
    db_session: Session,
    connector_id: int,
    kg_coverage_start: datetime,
    kg_max_coverage_days: int,
    batch_size: int = 100,
) -> list[DbDocument]:
    """
    Retrieves a batch of documents that have not been processed for knowledge graph extraction.
    Args:
        db_session (Session): The database session to use
        connector_id (int): The ID of the connector to get documents for
        batch_size (int): The maximum number of documents to retrieve
    Returns:
        list[DbDocument]: List of documents that need KG processing
    """

    stmt = (
        select(DbDocument)
        .join(
            DocumentByConnectorCredentialPair,
            DbDocument.id == DocumentByConnectorCredentialPair.id,
        )
        .where(
            and_(
                DocumentByConnectorCredentialPair.connector_id == connector_id,
                DbDocument.doc_updated_at
                >= max(
                    kg_coverage_start,
                    datetime.now() - timedelta(days=kg_max_coverage_days),
                ),
                or_(
                    DbDocument.kg_stage.is_(None),
                    DbDocument.kg_stage == KGStage.NOT_STARTED,
                    DbDocument.doc_updated_at > DbDocument.kg_processing_time,
                ),
            )
        )
        .distinct()
        .limit(batch_size)
    )

    documents = db_session.scalars(stmt).all()
    db_session.flush()

    return list(documents)


def get_kg_extracted_document_ids(db_session: Session) -> list[str]:
    """
    Retrieves all document IDs where kg_stage is EXTRACTED.
    Args:
        db_session (Session): The database session to use
    Returns:
        list[str]: List of document IDs that have been KG processed
    """
    stmt = select(DbDocument.id).where(DbDocument.kg_stage == KGStage.EXTRACTED)

    return list(db_session.scalars(stmt).all())


def update_document_kg_info(
    db_session: Session, document_id: str, kg_stage: KGStage
) -> None:
    """Updates the knowledge graph related information for a document.
    Args:
        db_session (Session): The database session to use
        document_id (str): The ID of the document to update
        kg_stage (KGStage): The stage of the knowledge graph processing for the document
    Raises:
        ValueError: If the document with the given ID is not found
    """
    stmt = (
        update(DbDocument)
        .where(DbDocument.id == document_id)
        .values(
            kg_stage=kg_stage,
            kg_processing_time=datetime.now(timezone.utc),
        )
    )
    db_session.execute(stmt)


def update_document_kg_stage(
    db_session: Session,
    document_id: str,
    kg_stage: KGStage,
) -> None:
    stmt = (
        update(DbDocument).where(DbDocument.id == document_id).values(kg_stage=kg_stage)
    )
    db_session.execute(stmt)
    db_session.flush()


def get_all_kg_extracted_documents_info(
    db_session: Session,
) -> list[str]:
    """Retrieves the knowledge graph data for all documents that have been processed.
    Args:
        db_session (Session): The database session to use
    Returns:
        List[Tuple[str, dict]]: A list of tuples containing:
            - str: The document ID
            - dict: The KG data containing 'entities', 'relationships', and 'terms'
        Only returns documents where kg_stage is EXTRACTED
    """
    stmt = (
        select(DbDocument.id)
        .where(DbDocument.kg_stage == KGStage.EXTRACTED)
        .order_by(DbDocument.id)
    )

    results = db_session.execute(stmt).all()
    return [str(doc_id) for doc_id in results]


def get_base_llm_doc_information(
    db_session: Session, document_ids: list[str]
) -> list[str]:
    stmt = select(DbDocument).where(DbDocument.id.in_(document_ids))
    results = db_session.execute(stmt).all()

    documents = []

    for doc_nr, doc in enumerate(results):
        bare_doc = doc[0]
        documents.append(
            f"""* [{bare_doc.semantic_id}]({bare_doc.link}) ({bare_doc.doc_updated_at})"""
        )

    return documents[:KG_SIMPLE_ANSWER_MAX_DISPLAYED_SOURCES]


def get_document_updated_at(
    document_id: str,
    db_session: Session,
) -> datetime | None:
    """Retrieves the doc_updated_at timestamp for a given document ID.
    Args:
        document_id (str): The ID of the document to query
        db_session (Session): The database session to use
    Returns:
        Optional[datetime]: The doc_updated_at timestamp if found, None if document doesn't exist
    """

    stmt = select(DbDocument.doc_updated_at).where(DbDocument.id == document_id)
    return db_session.execute(stmt).scalar_one_or_none()


def reset_all_document_kg_stages(db_session: Session) -> int:
    """Reset the KG stage of all documents that are not in NOT_STARTED state to NOT_STARTED.

    Args:
        db_session (Session): The database session to use

    Returns:
        int: Number of documents that were reset
    """
    stmt = (
        update(DbDocument)
        .where(DbDocument.kg_stage != KGStage.NOT_STARTED)
        .values(kg_stage=KGStage.NOT_STARTED)
    )
    result = db_session.execute(stmt)

    # The hasattr check is needed for type checking, even though rowcount
    # is guaranteed to exist at runtime for UPDATE operations
    return result.rowcount if hasattr(result, "rowcount") else 0


def update_document_kg_stages(
    db_session: Session, source_stage: KGStage, target_stage: KGStage
) -> int:
    """Reset the KG stage only of documents back to NOT_STARTED.
    Part of reset flow for documents that have been extracted but not clustered.

    Args:
        db_session (Session): The database session to use

    Returns:
        int: Number of documents that were reset
    """
    stmt = (
        update(DbDocument)
        .where(DbDocument.kg_stage == source_stage)
        .values(kg_stage=target_stage)
    )
    result = db_session.execute(stmt)
    # The hasattr check is needed for type checking, even though rowcount
    # is guaranteed to exist at runtime for UPDATE operations
    return result.rowcount if hasattr(result, "rowcount") else 0


def get_skipped_kg_documents(db_session: Session) -> list[str]:
    """
    Retrieves all document IDs where kg_stage is SKIPPED.
    Args:
        db_session (Session): The database session to use
    Returns:
        list[str]: List of document IDs that have been skipped in KG processing
    """
    stmt = select(DbDocument.id).where(DbDocument.kg_stage == KGStage.SKIPPED)

    return list(db_session.scalars(stmt).all())


# def get_kg_doc_info_for_entity_name(
#     db_session: Session, document_id: str, entity_type: str
# ) -> KGEntityDocInfo:
#     """
#     Get the semantic ID and the link for an entity name.
#     """

#     result = (
#         db_session.query(Document.semantic_id, Document.link)
#         .filter(Document.id == document_id)
#         .first()
#     )

#     if result is None:
#         return KGEntityDocInfo(
#             doc_id=None,
#             doc_semantic_id=None,
#             doc_link=None,
#             semantic_entity_name=f"{entity_type}:{document_id}",
#             semantic_linked_entity_name=f"{entity_type}:{document_id}",
#         )

#     return KGEntityDocInfo(
#         doc_id=document_id,
#         doc_semantic_id=result[0],
#         doc_link=result[1],
#         semantic_entity_name=f"{entity_type.upper()}:{result[0]}",
#         semantic_linked_entity_name=f"[{entity_type.upper()}:{result[0]}]({result[1]})",
#     )


def check_for_documents_needing_kg_processing(
    db_session: Session, kg_coverage_start: datetime, kg_max_coverage_days: int
) -> bool:
    """Check if there are any documents that need KG processing.

    A document needs KG processing if:
    1. It is associated with a connector that has kg_processing_enabled = true
    2. AND either:
       - Its kg_stage is NOT_STARTED or NULL
       - OR its last_updated timestamp is greater than its kg_processing_time

    Args:
        db_session (Session): The database session to use

    Returns:
        bool: True if there are any documents needing KG processing, False otherwise
    """

    stmt = (
        select(1)
        .select_from(DbDocument)
        .join(
            DocumentByConnectorCredentialPair,
            DbDocument.id == DocumentByConnectorCredentialPair.id,
        )
        .join(
            Connector,
            DocumentByConnectorCredentialPair.connector_id == Connector.id,
        )
        .where(
            and_(
                Connector.kg_processing_enabled.is_(True),
                DbDocument.doc_updated_at
                >= max(
                    kg_coverage_start,
                    datetime.now() - timedelta(days=kg_max_coverage_days),
                ),
                or_(
                    DbDocument.kg_stage.is_(None),
                    DbDocument.kg_stage == KGStage.NOT_STARTED,
                    DbDocument.doc_updated_at > DbDocument.kg_processing_time,
                ),
            )
        )
        .exists()
    )

    return db_session.execute(select(stmt)).scalar() or False


def check_for_documents_needing_kg_clustering(db_session: Session) -> bool:
    """Check if there are any documents that need KG clustering.

    A document needs KG clustering if:
    1. It is associated with a connector that has kg_processing_enabled = true
    2. AND either:
       - Its kg_stage is EXTRACTED
       - OR its last_updated timestamp is greater than its kg_processing_time

    Args:
        db_session (Session): The database session to use

    Returns:
        bool: True if there are any documents needing KG clustering, False otherwise
    """
    stmt = (
        select(1)
        .select_from(DbDocument)
        .join(
            DocumentByConnectorCredentialPair,
            DbDocument.id == DocumentByConnectorCredentialPair.id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .join(
            Connector,
            ConnectorCredentialPair.connector_id == Connector.id,
        )
        .where(
            and_(
                Connector.kg_processing_enabled.is_(True),
                ConnectorCredentialPair.status
                != ConnectorCredentialPairStatus.DELETING,
                or_(
                    DbDocument.kg_stage == KGStage.EXTRACTED,
                    DbDocument.last_modified > DbDocument.kg_processing_time,
                ),
            )
        )
        .exists()
    )

    return db_session.execute(select(stmt)).scalar() or False


def get_document_kg_entities_and_relationships(
    db_session: Session, document_id: str
) -> tuple[list[KGEntity], list[KGRelationship]]:
    """
    Get the KG entities and relationships that references the document.
    """
    entities = (
        db_session.query(KGEntity).filter(KGEntity.document_id == document_id).all()
    )
    if not entities:
        return [], []
    entity_id_names = [entity.id_name for entity in entities]

    relationships = (
        db_session.query(KGRelationship)
        .filter(
            or_(
                KGRelationship.source_node.in_(entity_id_names),
                KGRelationship.target_node.in_(entity_id_names),
                KGRelationship.source_document == document_id,
            )
        )
        .all()
    )
    return entities, relationships


def get_num_chunks_for_document(db_session: Session, document_id: str) -> int:
    stmt = select(DbDocument.chunk_count).where(DbDocument.id == document_id)
    return db_session.execute(stmt).scalar_one_or_none() or 0


def update_document_metadata__no_commit(
    db_session: Session,
    document_id: str,
    doc_metadata: dict[str, Any],
) -> None:
    """Update the doc_metadata field for a document.

    Note: Does not commit. Caller is responsible for committing.

    Args:
        db_session: Database session
        document_id: The ID of the document to update
        doc_metadata: The new metadata dictionary to set
    """
    stmt = (
        update(DbDocument)
        .where(DbDocument.id == document_id)
        .values(doc_metadata=doc_metadata)
    )
    db_session.execute(stmt)


def delete_document_by_id__no_commit(
    db_session: Session,
    document_id: str,
) -> None:
    """Delete a single document and its connector credential pair relationships.

    Note: Does not commit. Caller is responsible for committing.

    This uses delete_documents_complete__no_commit which handles
    all foreign key relationships (KG entities, relationships, chunk stats,
    cc pair associations, feedback, tags).
    """
    delete_documents_complete__no_commit(db_session, [document_id])


================================================
FILE: backend/onyx/db/document_access.py
================================================
"""
Document access filtering utilities.

This module provides reusable access filtering logic for documents based on:
- Connector access type (PUBLIC vs SYNC)
- Document-level public flag
- User email matching external_user_emails
- User group overlap with external_user_group_ids

This is a standalone module to avoid circular imports between document.py and persona.py.
"""

from sqlalchemy import and_
from sqlalchemy import any_
from sqlalchemy import cast
from sqlalchemy import or_
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import String
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session
from sqlalchemy.sql.elements import ColumnElement

from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document
from onyx.db.models import DocumentByConnectorCredentialPair


def apply_document_access_filter(
    stmt: Select,
    user_email: str | None,
    external_group_ids: list[str],
) -> Select:
    """
    Apply document access filtering to a query.

    This joins with DocumentByConnectorCredentialPair and ConnectorCredentialPair to:
    1. Check if the document is from a PUBLIC connector (access_type = PUBLIC)
    2. Check document-level permissions (is_public, external_user_emails, external_user_group_ids)
    3. Exclude documents from cc_pairs that are being deleted

    Args:
        stmt: The SELECT statement to modify (must be selecting from Document)
        user_email: The user's email for permission checking
        external_group_ids: List of external group IDs the user belongs to

    Returns:
        Modified SELECT statement with access filtering applied
    """
    # Join to get cc_pair info for each document
    stmt = stmt.join(
        DocumentByConnectorCredentialPair,
        Document.id == DocumentByConnectorCredentialPair.id,
    ).join(
        ConnectorCredentialPair,
        and_(
            DocumentByConnectorCredentialPair.connector_id
            == ConnectorCredentialPair.connector_id,
            DocumentByConnectorCredentialPair.credential_id
            == ConnectorCredentialPair.credential_id,
        ),
    )

    # Exclude documents from cc_pairs that are being deleted
    stmt = stmt.where(
        ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING
    )

    # Build access filters
    access_filters: list[ColumnElement[bool]] = [
        # Document is from a PUBLIC connector
        ConnectorCredentialPair.access_type == AccessType.PUBLIC,
        # Document is marked as public (e.g., "Anyone with link" in source)
        Document.is_public.is_(True),
    ]
    if user_email:
        access_filters.append(any_(Document.external_user_emails) == user_email)
    if external_group_ids:
        access_filters.append(
            Document.external_user_group_ids.overlap(
                cast(postgresql.array(external_group_ids), postgresql.ARRAY(String))
            )
        )

    stmt = stmt.where(or_(*access_filters))
    return stmt


def get_accessible_documents_by_ids(
    db_session: Session,
    document_ids: list[str],
    user_email: str | None,
    external_group_ids: list[str],
) -> list[Document]:
    """
    Fetch documents by IDs, filtering to only those the user has access to.

    Uses the same access filtering logic as other document queries:
    - Documents from PUBLIC connectors
    - Documents marked as public (e.g., "Anyone with link")
    - Documents where user email matches external_user_emails
    - Documents where user's groups overlap with external_user_group_ids

    Args:
        db_session: Database session
        document_ids: List of document IDs to fetch
        user_email: User's email for permission checking
        external_group_ids: List of external group IDs the user belongs to

    Returns:
        List of Document objects from the input that the user has access to
    """
    if not document_ids:
        return []

    stmt = select(Document).where(Document.id.in_(document_ids))
    stmt = apply_document_access_filter(stmt, user_email, external_group_ids)
    # Use distinct to avoid duplicates when a document belongs to multiple cc_pairs
    stmt = stmt.distinct()
    return list(db_session.execute(stmt).scalars().all())


================================================
FILE: backend/onyx/db/document_set.py
================================================
from collections.abc import Sequence
from typing import cast
from uuid import UUID

from sqlalchemy import and_
from sqlalchemy import delete
from sqlalchemy import exists
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.federated import create_federated_connector_document_set_mapping
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import DocumentSet as DocumentSetDBModel
from onyx.db.models import DocumentSet__ConnectorCredentialPair
from onyx.db.models import DocumentSet__UserGroup
from onyx.db.models import FederatedConnector__DocumentSet
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserRole
from onyx.server.features.document_set.models import DocumentSetCreationRequest
from onyx.server.features.document_set.models import DocumentSetUpdateRequest
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation

logger = setup_logger()


def _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Select:
    if user.role == UserRole.ADMIN:
        return stmt

    stmt = stmt.distinct()
    DocumentSet__UG = aliased(DocumentSet__UserGroup)
    User__UG = aliased(User__UserGroup)
    """
    Here we select cc_pairs by relation:
    User -> User__UserGroup -> DocumentSet__UserGroup -> DocumentSet
    """
    stmt = stmt.outerjoin(DocumentSet__UG).outerjoin(
        User__UserGroup,
        User__UserGroup.user_group_id == DocumentSet__UG.user_group_id,
    )
    """
    Filter DocumentSets by:
    - if the user is in the user_group that owns the DocumentSet
    - if the user is not a global_curator, they must also have a curator relationship
    to the user_group
    - if editing is being done, we also filter out DocumentSets that are owned by groups
    that the user isn't a curator for
    - if we are not editing, we show all DocumentSets in the groups the user is a curator
    for (as well as public DocumentSets)
    """

    # Anonymous users only see public DocumentSets
    if user.is_anonymous:
        where_clause = DocumentSetDBModel.is_public == True  # noqa: E712
        return stmt.where(where_clause)

    where_clause = User__UserGroup.user_id == user.id
    if user.role == UserRole.CURATOR and get_editable:
        where_clause &= User__UserGroup.is_curator == True  # noqa: E712
    if get_editable:
        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)
        if user.role == UserRole.CURATOR:
            user_groups = user_groups.where(User__UG.is_curator == True)  # noqa: E712
        where_clause &= (
            ~exists()
            .where(DocumentSet__UG.document_set_id == DocumentSetDBModel.id)
            .where(~DocumentSet__UG.user_group_id.in_(user_groups))
            .correlate(DocumentSetDBModel)
        )
        where_clause |= DocumentSetDBModel.user_id == user.id
    else:
        where_clause |= DocumentSetDBModel.is_public == True  # noqa: E712

    return stmt.where(where_clause)


def _delete_document_set_cc_pairs__no_commit(
    db_session: Session, document_set_id: int, is_current: bool | None = None
) -> None:
    """NOTE: does not commit transaction, this must be done by the caller"""
    stmt = delete(DocumentSet__ConnectorCredentialPair).where(
        DocumentSet__ConnectorCredentialPair.document_set_id == document_set_id
    )
    if is_current is not None:
        stmt = stmt.where(DocumentSet__ConnectorCredentialPair.is_current == is_current)
    db_session.execute(stmt)


def _mark_document_set_cc_pairs_as_outdated__no_commit(
    db_session: Session, document_set_id: int
) -> None:
    """NOTE: does not commit transaction, this must be done by the caller"""
    stmt = select(DocumentSet__ConnectorCredentialPair).where(
        DocumentSet__ConnectorCredentialPair.document_set_id == document_set_id
    )
    for row in db_session.scalars(stmt):
        row.is_current = False


def delete_document_set_privacy__no_commit(
    document_set_id: int, db_session: Session
) -> None:
    """No private document sets in Onyx MIT"""


def get_document_set_by_id_for_user(
    db_session: Session,
    document_set_id: int,
    user: User,
    get_editable: bool = True,
) -> DocumentSetDBModel | None:
    stmt = (
        select(DocumentSetDBModel)
        .distinct()
        .options(selectinload(DocumentSetDBModel.federated_connectors))
    )
    stmt = stmt.where(DocumentSetDBModel.id == document_set_id)
    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable)
    return db_session.scalar(stmt)


def get_document_set_by_id(
    db_session: Session,
    document_set_id: int,
) -> DocumentSetDBModel | None:
    stmt = select(DocumentSetDBModel).distinct()
    stmt = stmt.where(DocumentSetDBModel.id == document_set_id)
    return db_session.scalar(stmt)


def get_document_set_by_name(
    db_session: Session, document_set_name: str
) -> DocumentSetDBModel | None:
    return db_session.scalar(
        select(DocumentSetDBModel).where(DocumentSetDBModel.name == document_set_name)
    )


def get_document_sets_by_name(
    db_session: Session, document_set_names: list[str]
) -> Sequence[DocumentSetDBModel]:
    return db_session.scalars(
        select(DocumentSetDBModel).where(
            DocumentSetDBModel.name.in_(document_set_names)
        )
    ).all()


def get_document_sets_by_ids(
    db_session: Session, document_set_ids: list[int]
) -> Sequence[DocumentSetDBModel]:
    if not document_set_ids:
        return []
    return db_session.scalars(
        select(DocumentSetDBModel).where(DocumentSetDBModel.id.in_(document_set_ids))
    ).all()


def make_doc_set_private(
    document_set_id: int,  # noqa: ARG001
    user_ids: list[UUID] | None,
    group_ids: list[int] | None,
    db_session: Session,  # noqa: ARG001
) -> None:
    # May cause error if someone switches down to MIT from EE
    if user_ids or group_ids:
        raise NotImplementedError("Onyx MIT does not support private Document Sets")


def _check_if_cc_pairs_are_owned_by_groups(
    db_session: Session,
    cc_pair_ids: list[int],
    group_ids: list[int],
) -> None:
    """
    This function checks if the CC pairs are owned by the specified groups or public.
    If not, it raises a ValueError.
    """
    group_cc_pair_relationships = get_cc_pair_groups_for_ids(
        db_session=db_session,
        cc_pair_ids=cc_pair_ids,
    )

    group_cc_pair_relationships_set = {
        (relationship.cc_pair_id, relationship.user_group_id)
        for relationship in group_cc_pair_relationships
    }

    missing_cc_pair_ids = []
    for cc_pair_id in cc_pair_ids:
        for group_id in group_ids:
            if (cc_pair_id, group_id) not in group_cc_pair_relationships_set:
                missing_cc_pair_ids.append(cc_pair_id)
                break

    if missing_cc_pair_ids:
        cc_pairs = get_connector_credential_pairs(
            db_session=db_session,
            ids=missing_cc_pair_ids,
        )
        for cc_pair in cc_pairs:
            if cc_pair.access_type == AccessType.PRIVATE:
                raise ValueError(
                    f"Connector Credential Pair with ID: '{cc_pair.id}' is not owned by the specified groups"
                )


def insert_document_set(
    document_set_creation_request: DocumentSetCreationRequest,
    user_id: UUID | None,
    db_session: Session,
) -> tuple[DocumentSetDBModel, list[DocumentSet__ConnectorCredentialPair]]:
    # Check if we have either CC pairs or federated connectors (or both)
    if (
        not document_set_creation_request.cc_pair_ids
        and not document_set_creation_request.federated_connectors
    ):
        raise ValueError("Cannot create a document set with no connectors")

    if not document_set_creation_request.is_public:
        _check_if_cc_pairs_are_owned_by_groups(
            db_session=db_session,
            cc_pair_ids=document_set_creation_request.cc_pair_ids,
            group_ids=document_set_creation_request.groups or [],
        )

    new_document_set_row: DocumentSetDBModel
    ds_cc_pairs: list[DocumentSet__ConnectorCredentialPair]
    try:
        new_document_set_row = DocumentSetDBModel(
            name=document_set_creation_request.name,
            description=document_set_creation_request.description,
            user_id=user_id,
            is_public=document_set_creation_request.is_public,
            is_up_to_date=DISABLE_VECTOR_DB,
            time_last_modified_by_user=func.now(),
        )
        db_session.add(new_document_set_row)
        db_session.flush()  # ensure the new document set gets assigned an ID

        # Create CC pair mappings
        ds_cc_pairs = [
            DocumentSet__ConnectorCredentialPair(
                document_set_id=new_document_set_row.id,
                connector_credential_pair_id=cc_pair_id,
                is_current=True,
            )
            for cc_pair_id in document_set_creation_request.cc_pair_ids
        ]
        db_session.add_all(ds_cc_pairs)

        # Create federated connector mappings
        from onyx.db.federated import create_federated_connector_document_set_mapping

        for fc_config in document_set_creation_request.federated_connectors:
            create_federated_connector_document_set_mapping(
                db_session=db_session,
                federated_connector_id=fc_config.federated_connector_id,
                document_set_id=new_document_set_row.id,
                entities=fc_config.entities,
            )

        versioned_private_doc_set_fn = fetch_versioned_implementation(
            "onyx.db.document_set", "make_doc_set_private"
        )

        # Private Document Sets
        versioned_private_doc_set_fn(
            document_set_id=new_document_set_row.id,
            user_ids=document_set_creation_request.users,
            group_ids=document_set_creation_request.groups,
            db_session=db_session,
        )

        db_session.commit()
    except Exception as e:
        db_session.rollback()
        logger.error(f"Error creating document set: {e}")
        raise

    return new_document_set_row, ds_cc_pairs


def update_document_set(
    db_session: Session,
    document_set_update_request: DocumentSetUpdateRequest,
    user: User,
) -> tuple[DocumentSetDBModel, list[DocumentSet__ConnectorCredentialPair]]:
    """If successful, this sets document_set_row.is_up_to_date = False.
    That will be processed via Celery in check_for_vespa_sync_task
    and trigger a long running background sync to Vespa.
    """
    # Check if we have either CC pairs or federated connectors (or both)
    if (
        not document_set_update_request.cc_pair_ids
        and not document_set_update_request.federated_connectors
    ):
        raise ValueError("Cannot update a document set with no connectors")

    if not document_set_update_request.is_public:
        _check_if_cc_pairs_are_owned_by_groups(
            db_session=db_session,
            cc_pair_ids=document_set_update_request.cc_pair_ids,
            group_ids=document_set_update_request.groups,
        )

    try:
        # update the description
        document_set_row = get_document_set_by_id_for_user(
            db_session=db_session,
            document_set_id=document_set_update_request.id,
            user=user,
            get_editable=True,
        )
        if document_set_row is None:
            raise ValueError(
                f"No document set with ID '{document_set_update_request.id}'"
            )
        if not document_set_row.is_up_to_date:
            raise ValueError(
                "Cannot update document set while it is syncing. Please wait for it to finish syncing, and then try again."
            )

        document_set_row.description = document_set_update_request.description
        if not DISABLE_VECTOR_DB:
            document_set_row.is_up_to_date = False
        document_set_row.is_public = document_set_update_request.is_public
        document_set_row.time_last_modified_by_user = func.now()
        versioned_private_doc_set_fn = fetch_versioned_implementation(
            "onyx.db.document_set", "make_doc_set_private"
        )

        # Private Document Sets
        versioned_private_doc_set_fn(
            document_set_id=document_set_row.id,
            user_ids=document_set_update_request.users,
            group_ids=document_set_update_request.groups,
            db_session=db_session,
        )

        # update the attached CC pairs
        # first, mark all existing CC pairs as not current
        _mark_document_set_cc_pairs_as_outdated__no_commit(
            db_session=db_session, document_set_id=document_set_row.id
        )
        # add in rows for the new CC pairs
        ds_cc_pairs = [
            DocumentSet__ConnectorCredentialPair(
                document_set_id=document_set_update_request.id,
                connector_credential_pair_id=cc_pair_id,
                is_current=True,
            )
            for cc_pair_id in document_set_update_request.cc_pair_ids
        ]
        db_session.add_all(ds_cc_pairs)

        # Update federated connector mappings
        # Delete existing federated connector mappings for this document set
        delete_stmt = delete(FederatedConnector__DocumentSet).where(
            FederatedConnector__DocumentSet.document_set_id == document_set_row.id
        )
        db_session.execute(delete_stmt)

        # Create new federated connector mappings
        for fc_config in document_set_update_request.federated_connectors:
            create_federated_connector_document_set_mapping(
                db_session=db_session,
                federated_connector_id=fc_config.federated_connector_id,
                document_set_id=document_set_row.id,
                entities=fc_config.entities,
            )

        db_session.commit()
    except:
        db_session.rollback()
        raise

    return document_set_row, ds_cc_pairs


def mark_document_set_as_synced(document_set_id: int, db_session: Session) -> None:
    stmt = select(DocumentSetDBModel).where(DocumentSetDBModel.id == document_set_id)
    document_set = db_session.scalar(stmt)
    if document_set is None:
        raise ValueError(f"No document set with ID: {document_set_id}")

    # mark as up to date
    document_set.is_up_to_date = True
    # delete outdated relationship table rows
    _delete_document_set_cc_pairs__no_commit(
        db_session=db_session, document_set_id=document_set_id, is_current=False
    )
    db_session.commit()


def delete_document_set(
    document_set_row: DocumentSetDBModel, db_session: Session
) -> None:
    # delete all relationships to CC pairs
    _delete_document_set_cc_pairs__no_commit(
        db_session=db_session, document_set_id=document_set_row.id
    )
    db_session.delete(document_set_row)
    db_session.commit()


def mark_document_set_as_to_be_deleted(
    db_session: Session,
    document_set_id: int,
    user: User,
) -> None:
    """Cleans up all document_set -> cc_pair relationships and marks the document set
    as needing an update. The actual document set row will be deleted by the background
    job which syncs these changes to Vespa."""

    try:
        document_set_row = get_document_set_by_id_for_user(
            db_session=db_session,
            document_set_id=document_set_id,
            user=user,
            get_editable=True,
        )
        if document_set_row is None:
            error_msg = f"Document set with ID: '{document_set_id}' does not exist "
            if user is not None:
                error_msg += f"or is not editable by user with email: '{user.email}'"
            raise ValueError(error_msg)
        if not document_set_row.is_up_to_date:
            raise ValueError(
                "Cannot delete document set while it is syncing. Please wait for it to finish syncing, and then try again."
            )

        # delete all relationships to CC pairs
        _delete_document_set_cc_pairs__no_commit(
            db_session=db_session, document_set_id=document_set_id
        )

        # delete all federated connector mappings so the cleanup task can fully
        # remove the document set once the Vespa sync completes
        delete_stmt = delete(FederatedConnector__DocumentSet).where(
            FederatedConnector__DocumentSet.document_set_id == document_set_id
        )
        db_session.execute(delete_stmt)

        # delete all private document set information
        versioned_delete_private_fn = fetch_versioned_implementation(
            "onyx.db.document_set", "delete_document_set_privacy__no_commit"
        )
        versioned_delete_private_fn(
            document_set_id=document_set_id, db_session=db_session
        )

        # mark the row as needing a sync, it will be deleted there since there
        # are no more relationships to cc pairs
        document_set_row.is_up_to_date = False
        db_session.commit()
    except:
        db_session.rollback()
        raise


def delete_document_set_cc_pair_relationship__no_commit(
    connector_id: int, credential_id: int, db_session: Session
) -> int:
    """Deletes all rows from DocumentSet__ConnectorCredentialPair where the
    connector_credential_pair_id matches the given cc_pair_id."""
    delete_stmt = delete(DocumentSet__ConnectorCredentialPair).where(
        and_(
            ConnectorCredentialPair.connector_id == connector_id,
            ConnectorCredentialPair.credential_id == credential_id,
            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id
            == ConnectorCredentialPair.id,
        )
    )
    result = db_session.execute(delete_stmt)
    return result.rowcount  # type: ignore


def fetch_document_sets(
    user_id: UUID | None,  # noqa: ARG001
    db_session: Session,
    include_outdated: bool = False,
) -> list[tuple[DocumentSetDBModel, list[ConnectorCredentialPair]]]:
    """Return is a list where each element contains a tuple of:
    1. The document set itself
    2. All CC pairs associated with the document set"""
    stmt = (
        select(DocumentSetDBModel, ConnectorCredentialPair)
        .join(
            DocumentSet__ConnectorCredentialPair,
            DocumentSetDBModel.id
            == DocumentSet__ConnectorCredentialPair.document_set_id,
            isouter=True,  # outer join is needed to also fetch document sets with no cc pairs
        )
        .join(
            ConnectorCredentialPair,
            ConnectorCredentialPair.id
            == DocumentSet__ConnectorCredentialPair.connector_credential_pair_id,
            isouter=True,  # outer join is needed to also fetch document sets with no cc pairs
        )
    )
    if not include_outdated:
        stmt = stmt.where(
            or_(
                DocumentSet__ConnectorCredentialPair.is_current == True,  # noqa: E712
                # `None` handles case where no CC Pairs exist for a Document Set
                DocumentSet__ConnectorCredentialPair.is_current.is_(None),
            )
        )

    results = cast(
        list[tuple[DocumentSetDBModel, ConnectorCredentialPair | None]],
        db_session.execute(stmt).all(),
    )

    aggregated_results: dict[
        int, tuple[DocumentSetDBModel, list[ConnectorCredentialPair]]
    ] = {}
    for document_set, cc_pair in results:
        if document_set.id not in aggregated_results:
            aggregated_results[document_set.id] = (
                document_set,
                [cc_pair] if cc_pair else [],
            )
        else:
            if cc_pair:
                aggregated_results[document_set.id][1].append(cc_pair)

    return [
        (document_set, cc_pairs)
        for document_set, cc_pairs in aggregated_results.values()
    ]


def fetch_all_document_sets_for_user(
    db_session: Session,
    user: User,
    get_editable: bool = True,
) -> Sequence[DocumentSetDBModel]:
    stmt = (
        select(DocumentSetDBModel)
        .distinct()
        .options(
            selectinload(DocumentSetDBModel.connector_credential_pairs).selectinload(
                ConnectorCredentialPair.connector
            ),
            selectinload(DocumentSetDBModel.users),
            selectinload(DocumentSetDBModel.groups),
            selectinload(DocumentSetDBModel.federated_connectors).selectinload(
                FederatedConnector__DocumentSet.federated_connector
            ),
        )
    )
    stmt = _add_user_filters(stmt, user, get_editable=get_editable)
    return db_session.scalars(stmt).unique().all()


def fetch_documents_for_document_set_paginated(
    document_set_id: int,
    db_session: Session,
    current_only: bool = True,
    last_document_id: str | None = None,
    limit: int = 100,
) -> tuple[Sequence[Document], str | None]:
    stmt = (
        select(Document)
        .join(
            DocumentByConnectorCredentialPair,
            DocumentByConnectorCredentialPair.id == Document.id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                ConnectorCredentialPair.connector_id
                == DocumentByConnectorCredentialPair.connector_id,
                ConnectorCredentialPair.credential_id
                == DocumentByConnectorCredentialPair.credential_id,
            ),
        )
        .join(
            DocumentSet__ConnectorCredentialPair,
            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id
            == ConnectorCredentialPair.id,
        )
        .join(
            DocumentSetDBModel,
            DocumentSetDBModel.id
            == DocumentSet__ConnectorCredentialPair.document_set_id,
        )
        .where(DocumentSetDBModel.id == document_set_id)
        .order_by(Document.id)
        .limit(limit)
    )
    if last_document_id is not None:
        stmt = stmt.where(Document.id > last_document_id)
    if current_only:
        stmt = stmt.where(
            DocumentSet__ConnectorCredentialPair.is_current == True  # noqa: E712
        )
    stmt = stmt.distinct()

    documents = db_session.scalars(stmt).all()
    return documents, documents[-1].id if documents else None


def construct_document_id_select_by_docset(
    document_set_id: int,
    current_only: bool = True,
) -> Select:
    """This returns a statement that should be executed using
    .yield_per() to minimize overhead. The primary consumers of this function
    are background processing task generators."""

    stmt = (
        select(Document.id)
        .join(
            DocumentByConnectorCredentialPair,
            DocumentByConnectorCredentialPair.id == Document.id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                ConnectorCredentialPair.connector_id
                == DocumentByConnectorCredentialPair.connector_id,
                ConnectorCredentialPair.credential_id
                == DocumentByConnectorCredentialPair.credential_id,
            ),
        )
        .join(
            DocumentSet__ConnectorCredentialPair,
            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id
            == ConnectorCredentialPair.id,
        )
        .join(
            DocumentSetDBModel,
            DocumentSetDBModel.id
            == DocumentSet__ConnectorCredentialPair.document_set_id,
        )
        .where(DocumentSetDBModel.id == document_set_id)
        .order_by(Document.id)
    )

    if current_only:
        stmt = stmt.where(
            DocumentSet__ConnectorCredentialPair.is_current == True  # noqa: E712
        )

    stmt = stmt.distinct()
    return stmt


def fetch_document_sets_for_document(
    document_id: str,
    db_session: Session,
) -> list[str]:
    """
    Fetches the document set names for a single document ID.

    :param document_id: The ID of the document to fetch sets for.
    :param db_session: The SQLAlchemy session to use for the query.
    :return: A list of document set names, or None if no result is found.
    """
    result = fetch_document_sets_for_documents([document_id], db_session)
    if not result:
        return []
    return result[0][1]


def fetch_document_sets_for_documents(
    document_ids: list[str],
    db_session: Session,
) -> Sequence[tuple[str, list[str]]]:
    """Gives back a list of (document_id, list[document_set_names]) tuples"""

    """Building subqueries"""
    # NOTE: have to build these subqueries first in order to guarantee that we get one
    # returned row for each specified document_id. Basically, we want to do the filters first,
    # then the outer joins.

    # don't include CC pairs that are being deleted
    # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them
    # as we can assume their document sets are no longer relevant
    valid_cc_pairs_subquery = aliased(
        ConnectorCredentialPair,
        select(ConnectorCredentialPair)
        .where(
            ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING
        )  # noqa: E712
        .subquery(),
    )

    valid_document_set__cc_pairs_subquery = aliased(
        DocumentSet__ConnectorCredentialPair,
        select(DocumentSet__ConnectorCredentialPair)
        .where(DocumentSet__ConnectorCredentialPair.is_current == True)  # noqa: E712
        .subquery(),
    )
    """End building subqueries"""

    stmt = (
        select(
            Document.id,
            func.coalesce(
                func.array_remove(func.array_agg(DocumentSetDBModel.name), None), []
            ).label("document_set_names"),
        )
        # Here we select document sets by relation:
        # Document -> DocumentByConnectorCredentialPair -> ConnectorCredentialPair ->
        # DocumentSet__ConnectorCredentialPair -> DocumentSet
        .outerjoin(
            DocumentByConnectorCredentialPair,
            Document.id == DocumentByConnectorCredentialPair.id,
        )
        .outerjoin(
            valid_cc_pairs_subquery,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == valid_cc_pairs_subquery.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == valid_cc_pairs_subquery.credential_id,
            ),
        )
        .outerjoin(
            valid_document_set__cc_pairs_subquery,
            valid_cc_pairs_subquery.id
            == valid_document_set__cc_pairs_subquery.connector_credential_pair_id,
        )
        .outerjoin(
            DocumentSetDBModel,
            DocumentSetDBModel.id
            == valid_document_set__cc_pairs_subquery.document_set_id,
        )
        .where(Document.id.in_(document_ids))
        .group_by(Document.id)
    )
    return db_session.execute(stmt).all()  # type: ignore


def get_or_create_document_set_by_name(
    db_session: Session,
    document_set_name: str,
    document_set_description: str = "Default Persona created Document-Set, please update description",
) -> DocumentSetDBModel:
    """This is used by the default personas which need to attach to document sets
    on server startup"""
    doc_set = get_document_set_by_name(db_session, document_set_name)
    if doc_set is not None:
        return doc_set

    new_doc_set = DocumentSetDBModel(
        name=document_set_name,
        description=document_set_description,
        user_id=None,
        is_up_to_date=True,
    )

    db_session.add(new_doc_set)
    db_session.commit()

    return new_doc_set


def check_document_sets_are_public(
    db_session: Session,
    document_set_ids: list[int],
) -> bool:
    """Checks if any of the CC-Pairs are Non Public (meaning that some documents in this document
    set is not Public"""
    connector_credential_pair_ids = (
        db_session.query(
            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id
        )
        .filter(
            DocumentSet__ConnectorCredentialPair.document_set_id.in_(document_set_ids)
        )
        .subquery()
    )

    not_public_exists = (
        db_session.query(ConnectorCredentialPair.id)
        .filter(
            ConnectorCredentialPair.id.in_(
                connector_credential_pair_ids  # type:ignore
            ),
            ConnectorCredentialPair.access_type != AccessType.PUBLIC,
        )
        .limit(1)
        .first()
        is not None
    )

    return not not_public_exists


================================================
FILE: backend/onyx/db/engine/__init__.py
================================================


================================================
FILE: backend/onyx/db/engine/async_sql_engine.py
================================================
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from typing import Any
from typing import AsyncContextManager

import asyncpg  # type: ignore
from fastapi import HTTPException
from sqlalchemy import event
from sqlalchemy import pool
from sqlalchemy.ext.asyncio import AsyncEngine
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.ext.asyncio import create_async_engine

from onyx.configs.app_configs import AWS_REGION_NAME
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
from onyx.configs.app_configs import POSTGRES_DB
from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_POOL_PRE_PING
from onyx.configs.app_configs import POSTGRES_POOL_RECYCLE
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USE_NULL_POOL
from onyx.configs.app_configs import POSTGRES_USER
from onyx.db.engine.iam_auth import create_ssl_context_if_iam
from onyx.db.engine.iam_auth import get_iam_auth_token
from onyx.db.engine.sql_engine import ASYNC_DB_API
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.sql_engine import is_valid_schema_name
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.engine.sql_engine import USE_IAM_AUTH
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
from shared_configs.contextvars import get_current_tenant_id


# Global so we don't create more than one engine per process
_ASYNC_ENGINE: AsyncEngine | None = None


async def get_async_connection() -> Any:
    """
    Custom connection function for async engine when using IAM auth.
    """
    host = POSTGRES_HOST
    port = POSTGRES_PORT
    user = POSTGRES_USER
    db = POSTGRES_DB
    token = get_iam_auth_token(host, port, user, AWS_REGION_NAME)

    # asyncpg requires 'ssl="require"' if SSL needed
    return await asyncpg.connect(
        user=user, password=token, host=host, port=int(port), database=db, ssl="require"
    )


def get_sqlalchemy_async_engine() -> AsyncEngine:
    global _ASYNC_ENGINE
    if _ASYNC_ENGINE is None:
        app_name = SqlEngine.get_app_name() + "_async"
        connection_string = build_connection_string(
            db_api=ASYNC_DB_API,
            use_iam_auth=USE_IAM_AUTH,
        )

        connect_args: dict[str, Any] = {}
        if app_name:
            connect_args["server_settings"] = {"application_name": app_name}

        connect_args["ssl"] = create_ssl_context_if_iam()

        engine_kwargs = {
            "connect_args": connect_args,
            "pool_pre_ping": POSTGRES_POOL_PRE_PING,
            "pool_recycle": POSTGRES_POOL_RECYCLE,
        }

        if POSTGRES_USE_NULL_POOL:
            engine_kwargs["poolclass"] = pool.NullPool
        else:
            engine_kwargs["pool_size"] = POSTGRES_API_SERVER_POOL_SIZE
            engine_kwargs["max_overflow"] = POSTGRES_API_SERVER_POOL_OVERFLOW

        _ASYNC_ENGINE = create_async_engine(
            connection_string,
            **engine_kwargs,
        )

        if USE_IAM_AUTH:

            @event.listens_for(_ASYNC_ENGINE.sync_engine, "do_connect")
            def provide_iam_token_async(
                dialect: Any,  # noqa: ARG001
                conn_rec: Any,  # noqa: ARG001
                cargs: Any,  # noqa: ARG001
                cparams: Any,
            ) -> None:
                # For async engine using asyncpg, we still need to set the IAM token here.
                host = POSTGRES_HOST
                port = POSTGRES_PORT
                user = POSTGRES_USER
                token = get_iam_auth_token(host, port, user, AWS_REGION_NAME)
                cparams["password"] = token
                cparams["ssl"] = create_ssl_context_if_iam()

    return _ASYNC_ENGINE


async def get_async_session(
    tenant_id: str | None = None,
) -> AsyncGenerator[AsyncSession, None]:
    """For use w/ Depends for *async* FastAPI endpoints.

    For standard `async with ... as ...` use, use get_async_session_context_manager.
    """

    if tenant_id is None:
        tenant_id = get_current_tenant_id()

    if not is_valid_schema_name(tenant_id):
        raise HTTPException(status_code=400, detail="Invalid tenant ID")

    engine = get_sqlalchemy_async_engine()

    # no need to use the schema translation map for self-hosted + default schema
    if not MULTI_TENANT and tenant_id == POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE:
        async with AsyncSession(bind=engine, expire_on_commit=False) as session:
            yield session
        return

    # Create connection with schema translation to handle querying the right schema
    schema_translate_map = {None: tenant_id}
    async with engine.connect() as connection:
        connection = await connection.execution_options(
            schema_translate_map=schema_translate_map
        )
        async with AsyncSession(
            bind=connection, expire_on_commit=False
        ) as async_session:
            yield async_session


def get_async_session_context_manager(
    tenant_id: str | None = None,
) -> AsyncContextManager[AsyncSession]:
    return asynccontextmanager(get_async_session)(tenant_id)


================================================
FILE: backend/onyx/db/engine/connection_warmup.py
================================================
from sqlalchemy import text

from onyx.db.engine.async_sql_engine import get_sqlalchemy_async_engine
from onyx.db.engine.sql_engine import get_sqlalchemy_engine


async def warm_up_connections(
    sync_connections_to_warm_up: int = 20, async_connections_to_warm_up: int = 20
) -> None:
    sync_postgres_engine = get_sqlalchemy_engine()
    connections = [
        sync_postgres_engine.connect() for _ in range(sync_connections_to_warm_up)
    ]
    for conn in connections:
        conn.execute(text("SELECT 1"))
    for conn in connections:
        conn.close()

    async_postgres_engine = get_sqlalchemy_async_engine()
    async_connections = [
        await async_postgres_engine.connect()
        for _ in range(async_connections_to_warm_up)
    ]
    for async_conn in async_connections:
        await async_conn.execute(text("SELECT 1"))
    for async_conn in async_connections:
        await async_conn.close()


================================================
FILE: backend/onyx/db/engine/iam_auth.py
================================================
import functools
import os
import ssl
from typing import Any

import boto3

from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USER
from onyx.configs.app_configs import USE_IAM_AUTH
from onyx.configs.constants import SSL_CERT_FILE


def get_iam_auth_token(
    host: str, port: str, user: str, region: str = "us-east-2"
) -> str:
    """
    Generate an IAM authentication token using boto3.
    """
    client = boto3.client("rds", region_name=region)
    token = client.generate_db_auth_token(
        DBHostname=host, Port=int(port), DBUsername=user
    )
    return token


def configure_psycopg2_iam_auth(
    cparams: dict[str, Any], host: str, port: str, user: str, region: str
) -> None:
    """
    Configure cparams for psycopg2 with IAM token and SSL.
    """
    token = get_iam_auth_token(host, port, user, region)
    cparams["password"] = token
    cparams["sslmode"] = "require"
    cparams["sslrootcert"] = SSL_CERT_FILE


def provide_iam_token(
    dialect: Any,  # noqa: ARG001
    conn_rec: Any,  # noqa: ARG001
    cargs: Any,  # noqa: ARG001
    cparams: Any,
) -> None:
    if USE_IAM_AUTH:
        host = POSTGRES_HOST
        port = POSTGRES_PORT
        user = POSTGRES_USER
        region = os.getenv("AWS_REGION_NAME", "us-east-2")
        # Configure for psycopg2 with IAM token
        configure_psycopg2_iam_auth(cparams, host, port, user, region)


@functools.cache
def create_ssl_context_if_iam() -> ssl.SSLContext | None:
    """Create an SSL context if IAM authentication is enabled, else return None."""
    if USE_IAM_AUTH:
        return ssl.create_default_context(cafile=SSL_CERT_FILE)
    return None


================================================
FILE: backend/onyx/db/engine/sql_engine.py
================================================
import os
import re
import threading
import time
from collections.abc import Generator
from contextlib import contextmanager
from typing import Any

from fastapi import HTTPException
from sqlalchemy import event
from sqlalchemy import pool
from sqlalchemy.engine import create_engine
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DB_READONLY_PASSWORD
from onyx.configs.app_configs import DB_READONLY_USER
from onyx.configs.app_configs import LOG_POSTGRES_CONN_COUNTS
from onyx.configs.app_configs import LOG_POSTGRES_LATENCY
from onyx.configs.app_configs import POSTGRES_DB
from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_PASSWORD
from onyx.configs.app_configs import POSTGRES_POOL_PRE_PING
from onyx.configs.app_configs import POSTGRES_POOL_RECYCLE
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USE_NULL_POOL
from onyx.configs.app_configs import POSTGRES_USER
from onyx.configs.constants import POSTGRES_UNKNOWN_APP_NAME
from onyx.db.engine.iam_auth import provide_iam_token
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id

# Moved is_valid_schema_name here to avoid circular import


logger = setup_logger()


# Schema name validation (moved here to avoid circular import)
SCHEMA_NAME_REGEX = re.compile(r"^[a-zA-Z0-9_-]+$")


def is_valid_schema_name(name: str) -> bool:
    return SCHEMA_NAME_REGEX.match(name) is not None


SYNC_DB_API = "psycopg2"
ASYNC_DB_API = "asyncpg"

# why isn't this in configs?
USE_IAM_AUTH = os.getenv("USE_IAM_AUTH", "False").lower() == "true"


def build_connection_string(
    *,
    db_api: str = ASYNC_DB_API,
    user: str = POSTGRES_USER,
    password: str = POSTGRES_PASSWORD,
    host: str = POSTGRES_HOST,
    port: str = POSTGRES_PORT,
    db: str = POSTGRES_DB,
    app_name: str | None = None,
    use_iam_auth: bool = USE_IAM_AUTH,
    region: str = "us-west-2",  # noqa: ARG001
) -> str:
    if use_iam_auth:
        base_conn_str = f"postgresql+{db_api}://{user}@{host}:{port}/{db}"
    else:
        base_conn_str = f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"

    # For asyncpg, do not include application_name in the connection string
    if app_name and db_api != "asyncpg":
        if "?" in base_conn_str:
            return f"{base_conn_str}&application_name={app_name}"
        else:
            return f"{base_conn_str}?application_name={app_name}"
    return base_conn_str


if LOG_POSTGRES_LATENCY:

    @event.listens_for(Engine, "before_cursor_execute")
    def before_cursor_execute(  # type: ignore
        conn,
        cursor,  # noqa: ARG001
        statement,  # noqa: ARG001
        parameters,  # noqa: ARG001
        context,  # noqa: ARG001
        executemany,  # noqa: ARG001
    ):
        conn.info["query_start_time"] = time.time()

    @event.listens_for(Engine, "after_cursor_execute")
    def after_cursor_execute(  # type: ignore
        conn,
        cursor,  # noqa: ARG001
        statement,
        parameters,  # noqa: ARG001
        context,  # noqa: ARG001
        executemany,  # noqa: ARG001
    ):
        total_time = time.time() - conn.info["query_start_time"]
        if total_time > 0.1:
            logger.debug(
                f"Query Complete: {statement}\n\nTotal Time: {total_time:.4f} seconds"
            )


if LOG_POSTGRES_CONN_COUNTS:
    checkout_count = 0
    checkin_count = 0

    @event.listens_for(Engine, "checkout")
    def log_checkout(dbapi_connection, connection_record, connection_proxy):  # type: ignore  # noqa: ARG001
        global checkout_count
        checkout_count += 1

        active_connections = connection_proxy._pool.checkedout()
        idle_connections = connection_proxy._pool.checkedin()
        pool_size = connection_proxy._pool.size()
        logger.debug(
            "Connection Checkout\n"
            f"Active Connections: {active_connections};\n"
            f"Idle: {idle_connections};\n"
            f"Pool Size: {pool_size};\n"
            f"Total connection checkouts: {checkout_count}"
        )

    @event.listens_for(Engine, "checkin")
    def log_checkin(dbapi_connection, connection_record):  # type: ignore  # noqa: ARG001
        global checkin_count
        checkin_count += 1
        logger.debug(f"Total connection checkins: {checkin_count}")


class SqlEngine:
    _engine: Engine | None = None
    _readonly_engine: Engine | None = None
    _lock: threading.Lock = threading.Lock()
    _readonly_lock: threading.Lock = threading.Lock()
    _app_name: str = POSTGRES_UNKNOWN_APP_NAME

    @classmethod
    def init_engine(
        cls,
        pool_size: int,
        # is really `pool_max_overflow`, but calling it `max_overflow` to stay consistent with SQLAlchemy
        max_overflow: int,
        app_name: str | None = None,  # noqa: ARG003
        db_api: str = SYNC_DB_API,
        use_iam: bool = USE_IAM_AUTH,
        connection_string: str | None = None,
        **extra_engine_kwargs: Any,
    ) -> None:
        """NOTE: enforce that pool_size and pool_max_overflow are passed in. These are
        important args, and if incorrectly specified, we have run into hitting the pool
        limit / using too many connections and overwhelming the database.

        Specifying connection_string directly will cause some of the other parameters
        to be ignored.
        """
        with cls._lock:
            if cls._engine:
                return

            if not connection_string:
                connection_string = build_connection_string(
                    db_api=db_api,
                    app_name=cls._app_name + "_sync",
                    use_iam_auth=use_iam,
                )

            # Start with base kwargs that are valid for all pool types
            final_engine_kwargs: dict[str, Any] = {}

            if POSTGRES_USE_NULL_POOL:
                # if null pool is specified, then we need to make sure that
                # we remove any passed in kwargs related to pool size that would
                # cause the initialization to fail
                final_engine_kwargs.update(extra_engine_kwargs)

                final_engine_kwargs["poolclass"] = pool.NullPool
                if "pool_size" in final_engine_kwargs:
                    del final_engine_kwargs["pool_size"]
                if "max_overflow" in final_engine_kwargs:
                    del final_engine_kwargs["max_overflow"]
            else:
                final_engine_kwargs["pool_size"] = pool_size
                final_engine_kwargs["max_overflow"] = max_overflow
                final_engine_kwargs["pool_pre_ping"] = POSTGRES_POOL_PRE_PING
                final_engine_kwargs["pool_recycle"] = POSTGRES_POOL_RECYCLE

                # any passed in kwargs override the defaults
                final_engine_kwargs.update(extra_engine_kwargs)

            logger.info(f"Creating engine with kwargs: {final_engine_kwargs}")
            # echo=True here for inspecting all emitted db queries
            engine = create_engine(connection_string, **final_engine_kwargs)

            if use_iam:
                event.listen(engine, "do_connect", provide_iam_token)

            cls._engine = engine

    @classmethod
    def init_readonly_engine(
        cls,
        pool_size: int,
        # is really `pool_max_overflow`, but calling it `max_overflow` to stay consistent with SQLAlchemy
        max_overflow: int,
        **extra_engine_kwargs: Any,
    ) -> None:
        """NOTE: enforce that pool_size and pool_max_overflow are passed in. These are
        important args, and if incorrectly specified, we have run into hitting the pool
        limit / using too many connections and overwhelming the database."""
        with cls._readonly_lock:
            if cls._readonly_engine:
                return

            if not DB_READONLY_USER or not DB_READONLY_PASSWORD:
                raise ValueError(
                    "Custom database user credentials not configured in environment variables"
                )

            # Build connection string with custom user
            connection_string = build_connection_string(
                user=DB_READONLY_USER,
                password=DB_READONLY_PASSWORD,
                use_iam_auth=False,  # Custom users typically don't use IAM auth
                db_api=SYNC_DB_API,  # Explicitly use sync DB API
            )

            # Start with base kwargs that are valid for all pool types
            final_engine_kwargs: dict[str, Any] = {}

            if POSTGRES_USE_NULL_POOL:
                # if null pool is specified, then we need to make sure that
                # we remove any passed in kwargs related to pool size that would
                # cause the initialization to fail
                final_engine_kwargs.update(extra_engine_kwargs)

                final_engine_kwargs["poolclass"] = pool.NullPool
                if "pool_size" in final_engine_kwargs:
                    del final_engine_kwargs["pool_size"]
                if "max_overflow" in final_engine_kwargs:
                    del final_engine_kwargs["max_overflow"]
            else:
                final_engine_kwargs["pool_size"] = pool_size
                final_engine_kwargs["max_overflow"] = max_overflow
                final_engine_kwargs["pool_pre_ping"] = POSTGRES_POOL_PRE_PING
                final_engine_kwargs["pool_recycle"] = POSTGRES_POOL_RECYCLE

                # any passed in kwargs override the defaults
                final_engine_kwargs.update(extra_engine_kwargs)

            logger.info(f"Creating engine with kwargs: {final_engine_kwargs}")
            # echo=True here for inspecting all emitted db queries
            engine = create_engine(connection_string, **final_engine_kwargs)

            if USE_IAM_AUTH:
                event.listen(engine, "do_connect", provide_iam_token)

            cls._readonly_engine = engine

    @classmethod
    def get_engine(cls) -> Engine:
        if not cls._engine:
            raise RuntimeError("Engine not initialized. Must call init_engine first.")
        return cls._engine

    @classmethod
    def get_readonly_engine(cls) -> Engine:
        if not cls._readonly_engine:
            raise RuntimeError(
                "Readonly engine not initialized. Must call init_readonly_engine first."
            )
        return cls._readonly_engine

    @classmethod
    def set_app_name(cls, app_name: str) -> None:
        cls._app_name = app_name

    @classmethod
    def get_app_name(cls) -> str:
        if not cls._app_name:
            return ""
        return cls._app_name

    @classmethod
    def reset_engine(cls) -> None:
        with cls._lock:
            if cls._engine:
                cls._engine.dispose()
                cls._engine = None

    @classmethod
    @contextmanager
    def scoped_engine(cls, **init_kwargs: Any) -> Generator[None, None, None]:
        """Context manager that initializes the engine and guarantees cleanup."""
        cls.init_engine(**init_kwargs)
        try:
            yield
        finally:
            cls.reset_engine()


def get_sqlalchemy_engine() -> Engine:
    return SqlEngine.get_engine()


def get_readonly_sqlalchemy_engine() -> Engine:
    return SqlEngine.get_readonly_engine()


@contextmanager
def get_session_with_current_tenant() -> Generator[Session, None, None]:
    """Standard way to get a DB session."""
    tenant_id = get_current_tenant_id()
    with get_session_with_tenant(tenant_id=tenant_id) as session:
        yield session


@contextmanager
def get_session_with_current_tenant_if_none(
    session: Session | None,
) -> Generator[Session, None, None]:
    if session is None:
        tenant_id = get_current_tenant_id()
        with get_session_with_tenant(tenant_id=tenant_id) as session:
            yield session
    else:
        yield session


# Used in multi tenant mode when need to refer to the shared `public` schema
@contextmanager
def get_session_with_shared_schema() -> Generator[Session, None, None]:
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)
    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as session:
        yield session
    CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


@contextmanager
def get_session_with_tenant(*, tenant_id: str) -> Generator[Session, None, None]:
    """
    Generate a database session for a specific tenant.
    """
    engine = get_sqlalchemy_engine()

    if not is_valid_schema_name(tenant_id):
        raise HTTPException(status_code=400, detail="Invalid tenant ID")

    # no need to use the schema translation map for self-hosted + default schema
    if not MULTI_TENANT and tenant_id == POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE:
        with Session(bind=engine, expire_on_commit=False) as session:
            yield session
        return

    # Create connection with schema translation to handle querying the right schema
    schema_translate_map = {None: tenant_id}
    with engine.connect().execution_options(
        schema_translate_map=schema_translate_map
    ) as connection:
        with Session(bind=connection, expire_on_commit=False) as session:
            yield session


def get_session() -> Generator[Session, None, None]:
    """For use w/ Depends for FastAPI endpoints.

    Has some additional validation, and likely should be merged
    with get_session_with_current_tenant in the future."""
    tenant_id = get_current_tenant_id()
    if tenant_id == POSTGRES_DEFAULT_SCHEMA and MULTI_TENANT:
        raise BasicAuthenticationError(detail="User must authenticate")

    if not is_valid_schema_name(tenant_id):
        raise HTTPException(status_code=400, detail="Invalid tenant ID")

    with get_session_with_current_tenant() as db_session:
        yield db_session


@contextmanager
def get_db_readonly_user_session_with_current_tenant() -> (
    Generator[Session, None, None]
):
    """
    Generate a database session using a custom database user for the current tenant.
    The custom user credentials are obtained from environment variables.
    """
    tenant_id = get_current_tenant_id()

    readonly_engine = get_readonly_sqlalchemy_engine()

    if not is_valid_schema_name(tenant_id):
        raise HTTPException(status_code=400, detail="Invalid tenant ID")

    # no need to use the schema translation map for self-hosted + default schema
    if not MULTI_TENANT and tenant_id == POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE:
        with Session(readonly_engine, expire_on_commit=False) as session:
            yield session
        return

    schema_translate_map = {None: tenant_id}
    with readonly_engine.connect().execution_options(
        schema_translate_map=schema_translate_map
    ) as connection:
        with Session(bind=connection, expire_on_commit=False) as session:
            yield session


================================================
FILE: backend/onyx/db/engine/tenant_utils.py
================================================
from sqlalchemy import text

from onyx.db.engine.sql_engine import get_session_with_shared_schema
from onyx.db.engine.sql_engine import SqlEngine
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import TENANT_ID_PREFIX


def get_schemas_needing_migration(
    tenant_schemas: list[str], head_rev: str
) -> list[str]:
    """Return only schemas whose current alembic version is not at head.

    Uses a server-side PL/pgSQL loop to collect each schema's alembic version
    into a temp table one at a time. This avoids building a massive UNION ALL
    query (which locks the DB and times out at 17k+ schemas) and instead
    acquires locks sequentially, one schema per iteration.
    """
    if not tenant_schemas:
        return []

    engine = SqlEngine.get_engine()

    with engine.connect() as conn:
        # Populate a temp input table with exactly the schemas we care about.
        # The DO block reads from this table so it only iterates the requested
        # schemas instead of every tenant_% schema in the database.
        conn.execute(text("DROP TABLE IF EXISTS _alembic_version_snapshot"))
        conn.execute(text("DROP TABLE IF EXISTS _tenant_schemas_input"))
        conn.execute(text("CREATE TEMP TABLE _tenant_schemas_input (schema_name text)"))
        conn.execute(
            text(
                "INSERT INTO _tenant_schemas_input (schema_name) SELECT unnest(CAST(:schemas AS text[]))"
            ),
            {"schemas": tenant_schemas},
        )
        conn.execute(
            text(
                "CREATE TEMP TABLE _alembic_version_snapshot (schema_name text, version_num text)"
            )
        )

        conn.execute(
            text(
                """
                DO $$
                DECLARE
                    s        text;
                    schemas  text[];
                BEGIN
                    SELECT array_agg(schema_name) INTO schemas
                    FROM _tenant_schemas_input;

                    IF schemas IS NULL THEN
                        RAISE NOTICE 'No tenant schemas found.';
                        RETURN;
                    END IF;

                    FOREACH s IN ARRAY schemas LOOP
                        BEGIN
                            EXECUTE format(
                                'INSERT INTO _alembic_version_snapshot
                                 SELECT %L, version_num FROM %I.alembic_version',
                                s, s
                            );
                        EXCEPTION
                            -- undefined_table: schema exists but has no alembic_version
                            --   table yet (new tenant, not yet migrated).
                            -- invalid_schema_name: tenant is registered but its
                            --   PostgreSQL schema does not exist yet (e.g. provisioning
                            --   incomplete). Both cases mean no version is available and
                            --   the schema will be included in the migration list.
                            WHEN undefined_table THEN NULL;
                            WHEN invalid_schema_name THEN NULL;
                        END;
                    END LOOP;
                END;
                $$
                """
            )
        )

        rows = conn.execute(
            text("SELECT schema_name, version_num FROM _alembic_version_snapshot")
        )
        version_by_schema = {row[0]: row[1] for row in rows}

        conn.execute(text("DROP TABLE IF EXISTS _alembic_version_snapshot"))
        conn.execute(text("DROP TABLE IF EXISTS _tenant_schemas_input"))

    # Schemas missing from the snapshot have no alembic_version table yet and
    # also need migration. version_by_schema.get(s) returns None for those,
    # and None != head_rev, so they are included automatically.
    return [s for s in tenant_schemas if version_by_schema.get(s) != head_rev]


def get_all_tenant_ids() -> list[str]:
    """Returning [None] means the only tenant is the 'public' or self hosted tenant."""

    tenant_ids: list[str]

    if not MULTI_TENANT:
        return [POSTGRES_DEFAULT_SCHEMA]

    with get_session_with_shared_schema() as session:
        result = session.execute(
            text(
                f"""
                SELECT schema_name
                FROM information_schema.schemata
                WHERE schema_name NOT IN ('pg_catalog', 'information_schema', '{POSTGRES_DEFAULT_SCHEMA}')"""
            )
        )
        tenant_ids = [row[0] for row in result]

    valid_tenants = [
        tenant
        for tenant in tenant_ids
        if tenant is None or tenant.startswith(TENANT_ID_PREFIX)
    ]
    return valid_tenants


================================================
FILE: backend/onyx/db/engine/time_utils.py
================================================
from datetime import datetime

from sqlalchemy import text
from sqlalchemy.orm import Session


def get_db_current_time(db_session: Session) -> datetime:
    result = db_session.execute(text("SELECT NOW()")).scalar()
    if result is None:
        raise ValueError("Database did not return a time")
    return result


================================================
FILE: backend/onyx/db/entities.py
================================================
import uuid
from datetime import datetime
from datetime import timezone
from typing import List

from sqlalchemy import func
from sqlalchemy import literal
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Session

import onyx.db.document as dbdocument
from onyx.db.entity_type import UNGROUNDED_SOURCE_NAME
from onyx.db.models import Document
from onyx.db.models import KGEntity
from onyx.db.models import KGEntityExtractionStaging
from onyx.db.models import KGEntityType
from onyx.kg.models import KGGroundingType
from onyx.kg.models import KGStage
from onyx.kg.utils.formatting_utils import make_entity_id


def upsert_staging_entity(
    db_session: Session,
    name: str,
    entity_type: str,
    document_id: str | None = None,
    occurrences: int = 1,
    attributes: dict[str, str] | None = None,
    event_time: datetime | None = None,
) -> KGEntityExtractionStaging:
    """Add or update a new staging entity to the database.

    Args:
        db_session: SQLAlchemy session
        name: Name of the entity
        entity_type: Type of the entity (must match an existing KGEntityType)
        document_id: ID of the document the entity belongs to
        occurrences: Number of times this entity has been found
        attributes: Attributes of the entity
        event_time: Time the entity was added to the database

    Returns:
        KGEntityExtractionStaging: The created entity
    """
    entity_type = entity_type.upper()
    name = name.title()
    id_name = make_entity_id(entity_type, name)
    attributes = attributes or {}

    entity_key = attributes.get("key")
    entity_parent = attributes.get("parent")

    keep_attributes = {
        attr_key: attr_val
        for attr_key, attr_val in attributes.items()
        if attr_key not in ("key", "parent")
    }

    # Create new entity
    stmt = (
        pg_insert(KGEntityExtractionStaging)
        .values(
            id_name=id_name,
            name=name,
            entity_type_id_name=entity_type,
            entity_key=entity_key,
            parent_key=entity_parent,
            document_id=document_id,
            occurrences=occurrences,
            attributes=keep_attributes,
            event_time=event_time,
        )
        .on_conflict_do_update(
            index_elements=["id_name"],
            set_=dict(
                occurrences=KGEntityExtractionStaging.occurrences + occurrences,
            ),
        )
        .returning(KGEntityExtractionStaging)
    )

    result = db_session.execute(stmt).scalar()
    if result is None:
        raise RuntimeError(
            f"Failed to create or increment staging entity with id_name: {id_name}"
        )

    # Update the document's kg_stage if document_id is provided
    if document_id is not None:
        db_session.query(Document).filter(Document.id == document_id).update(
            {
                "kg_stage": KGStage.EXTRACTED,
                "kg_processing_time": datetime.now(timezone.utc),
            }
        )
    db_session.flush()

    return result


def transfer_entity(
    db_session: Session,
    entity: KGEntityExtractionStaging,
) -> KGEntity:
    """Transfer an entity from the extraction staging table to the normalized table.

    Args:
        db_session: SQLAlchemy session
        entity: Entity to transfer

    Returns:
        KGEntity: The transferred entity
    """
    # Create the transferred entity
    stmt = (
        pg_insert(KGEntity)
        .values(
            id_name=make_entity_id(entity.entity_type_id_name, uuid.uuid4().hex[:20]),
            name=entity.name.casefold(),
            entity_key=entity.entity_key,
            parent_key=entity.parent_key,
            alternative_names=entity.alternative_names or [],
            entity_type_id_name=entity.entity_type_id_name,
            document_id=entity.document_id,
            occurrences=entity.occurrences,
            attributes=entity.attributes,
            event_time=entity.event_time,
        )
        .on_conflict_do_update(
            index_elements=["name", "entity_type_id_name", "document_id"],
            set_=dict(
                occurrences=KGEntity.occurrences + entity.occurrences,
                attributes=KGEntity.attributes.op("||")(
                    literal(entity.attributes, JSONB)
                ),
                entity_key=func.coalesce(KGEntity.entity_key, entity.entity_key),
                parent_key=func.coalesce(KGEntity.parent_key, entity.parent_key),
                event_time=entity.event_time,
                time_updated=datetime.now(),
            ),
        )
        .returning(KGEntity)
    )
    new_entity = db_session.execute(stmt).scalar()
    if new_entity is None:
        raise RuntimeError(f"Failed to transfer entity with id_name: {entity.id_name}")

    # Update the document's kg_stage if document_id is provided
    if entity.document_id is not None:
        dbdocument.update_document_kg_info(
            db_session,
            document_id=entity.document_id,
            kg_stage=KGStage.NORMALIZED,
        )

    # Update transferred
    db_session.query(KGEntityExtractionStaging).filter(
        KGEntityExtractionStaging.id_name == entity.id_name
    ).update({"transferred_id_name": new_entity.id_name})
    db_session.flush()

    return new_entity


def merge_entities(
    db_session: Session, parent: KGEntity, child: KGEntityExtractionStaging
) -> KGEntity:
    """Merge an entity from the extraction staging table into
    an existing entity in the normalized table.

    Args:
        db_session: SQLAlchemy session
        parent: Parent entity to merge into
        child: Child staging entity to merge

    Returns:
        KGEntity: The merged entity
    """
    # check we're not merging two entities with different document_ids
    if (
        parent.document_id is not None
        and child.document_id is not None
        and parent.document_id != child.document_id
    ):
        raise ValueError(
            "Overwriting the document_id of an entity with a document_id already is not allowed"
        )

    # update the parent entity (only document_id, alternative_names, occurrences)
    setting_doc = parent.document_id is None and child.document_id is not None
    document_id = child.document_id if setting_doc else parent.document_id
    alternative_names = set(parent.alternative_names or [])
    alternative_names.update(child.alternative_names or [])
    alternative_names.add(child.name.lower())
    alternative_names.discard(parent.name)

    stmt = (
        update(KGEntity)
        .where(KGEntity.id_name == parent.id_name)
        .values(
            document_id=document_id,
            alternative_names=list(alternative_names),
            occurrences=parent.occurrences + child.occurrences,
            attributes=parent.attributes | child.attributes,
            entity_key=parent.entity_key or child.entity_key,
            parent_key=parent.parent_key or child.parent_key,
        )
        .returning(KGEntity)
    )

    result = db_session.execute(stmt).scalar()
    if result is None:
        raise RuntimeError(f"Failed to merge entities with id_name: {parent.id_name}")

    # Update the document's kg_stage if document_id is set
    if setting_doc and child.document_id is not None:
        dbdocument.update_document_kg_info(
            db_session,
            document_id=child.document_id,
            kg_stage=KGStage.NORMALIZED,
        )

    # Update transferred
    db_session.query(KGEntityExtractionStaging).filter(
        KGEntityExtractionStaging.id_name == child.id_name
    ).update({"transferred_id_name": parent.id_name})
    db_session.flush()

    return result


def get_kg_entity_by_document(db: Session, document_id: str) -> KGEntity | None:
    """
    Check if a document_id exists in the kg_entities table and return its id_name if found.

    Args:
        db: SQLAlchemy database session
        document_id: The document ID to search for

    Returns:
        The id_name of the matching KGEntity if found, None otherwise
    """
    query = select(KGEntity).where(KGEntity.document_id == document_id)
    result = db.execute(query).scalar()
    return result


def get_grounded_entities_by_types(
    db_session: Session, entity_types: List[str], grounding: KGGroundingType
) -> List[KGEntity]:
    """Get all entities matching an entity_type.

    Args:
        db_session: SQLAlchemy session
        entity_types: List of entity types to filter by

    Returns:
        List of KGEntity objects belonging to the specified entity types
    """
    return (
        db_session.query(KGEntity)
        .join(KGEntityType, KGEntity.entity_type_id_name == KGEntityType.id_name)
        .filter(KGEntity.entity_type_id_name.in_(entity_types))
        .filter(KGEntityType.grounding == grounding)
        .all()
    )


def get_document_id_for_entity(db_session: Session, entity_id_name: str) -> str | None:
    """Get the document ID associated with an entity.

    Args:
        db_session: SQLAlchemy database session
        entity_id_name: The entity id_name to look up

    Returns:
        The document ID if found, None otherwise
    """
    entity = (
        db_session.query(KGEntity).filter(KGEntity.id_name == entity_id_name).first()
    )
    return entity.document_id if entity else None


def delete_from_kg_entities_extraction_staging__no_commit(
    db_session: Session, document_ids: list[str]
) -> None:
    """Delete entities from the extraction staging table."""
    db_session.query(KGEntityExtractionStaging).filter(
        KGEntityExtractionStaging.document_id.in_(document_ids)
    ).delete(synchronize_session=False)


def delete_from_kg_entities__no_commit(
    db_session: Session, document_ids: list[str]
) -> None:
    """Delete entities from the normalized table."""
    db_session.query(KGEntity).filter(KGEntity.document_id.in_(document_ids)).delete(
        synchronize_session=False
    )


def get_entity_name(db_session: Session, entity_id_name: str) -> str | None:
    """Get the name of an entity."""
    entity = (
        db_session.query(KGEntity).filter(KGEntity.id_name == entity_id_name).first()
    )
    return entity.name if entity else None


def get_entity_stats_by_grounded_source_name(
    db_session: Session,
) -> dict[str, tuple[datetime, int]]:
    """
    Returns a dict mapping each grounded_source_name to a tuple in which:
        - the first element is the latest update time across all entities with the same entity-type
        - the second element is the count of `KGEntity`s
    """
    results = (
        db_session.query(
            KGEntityType.grounded_source_name,
            func.count(KGEntity.id_name).label("entities_count"),
            func.max(KGEntity.time_updated).label("last_updated"),
        )
        .join(KGEntityType, KGEntity.entity_type_id_name == KGEntityType.id_name)
        .group_by(KGEntityType.grounded_source_name)
        .all()
    )

    # `row.grounded_source_name` is NULLABLE in the database schema.
    # Thus, for all "ungrounded" entity-types, we use a default name.
    return {
        (row.grounded_source_name or UNGROUNDED_SOURCE_NAME): (
            row.last_updated,
            row.entities_count,
        )
        for row in results
    }


================================================
FILE: backend/onyx/db/entity_type.py
================================================
from collections import defaultdict

from sqlalchemy import update
from sqlalchemy.orm import Session

from onyx.db.connector import fetch_unique_document_sources
from onyx.db.document import DocumentSource
from onyx.db.models import Connector
from onyx.db.models import KGEntityType
from onyx.kg.models import KGAttributeEntityOption
from onyx.server.kg.models import EntityType


UNGROUNDED_SOURCE_NAME = "Ungrounded"


def get_entity_types_with_grounded_source_name(
    db_session: Session,
) -> list[KGEntityType]:
    """Get all entity types that have non-null grounded_source_name.

    Args:
        db_session: SQLAlchemy session

    Returns:
        List of KGEntityType objects that have grounded_source_name defined
    """
    return (
        db_session.query(KGEntityType)
        .filter(KGEntityType.grounded_source_name.isnot(None))
        .all()
    )


def get_entity_types(
    db_session: Session,
    active: bool | None = True,
) -> list[KGEntityType]:
    # Query the database for all distinct entity types

    if active is None:
        return db_session.query(KGEntityType).order_by(KGEntityType.id_name).all()

    else:
        return (
            db_session.query(KGEntityType)
            .filter(KGEntityType.active == active)
            .order_by(KGEntityType.id_name)
            .all()
        )


def get_configured_entity_types(db_session: Session) -> dict[str, list[KGEntityType]]:
    # get entity types from configured sources
    configured_connector_sources = {
        source.value.lower()
        for source in fetch_unique_document_sources(db_session=db_session)
    }
    entity_types = (
        db_session.query(KGEntityType)
        .filter(KGEntityType.grounded_source_name.in_(configured_connector_sources))
        .all()
    )
    entity_type_set = {et.id_name for et in entity_types}

    # get implied entity types from those entity types
    for et in entity_types:
        for prop in et.parsed_attributes.metadata_attribute_conversion.values():
            if prop.implication_property is None:
                continue

            implied_et = prop.implication_property.implied_entity_type
            if implied_et == KGAttributeEntityOption.FROM_EMAIL:
                if "ACCOUNT" not in entity_type_set:
                    entity_type_set.add("ACCOUNT")
                if "EMPLOYEE" not in entity_type_set:
                    entity_type_set.add("EMPLOYEE")
            elif isinstance(implied_et, str):
                if implied_et not in entity_type_set:
                    entity_type_set.add(implied_et)

    ets = (
        db_session.query(KGEntityType)
        .filter(KGEntityType.id_name.in_(entity_type_set))
        .all()
    )

    et_map = defaultdict(list)
    for et in ets:
        key = et.grounded_source_name or UNGROUNDED_SOURCE_NAME
        et_map[key].append(et)

    return et_map


def update_entity_types_and_related_connectors__commit(
    db_session: Session, updates: list[EntityType]
) -> None:
    for upd in updates:
        db_session.execute(
            update(KGEntityType)
            .where(KGEntityType.id_name == upd.name)
            .values(
                description=upd.description,
                active=upd.active,
            )
        )
    db_session.flush()

    # Update connector sources

    configured_entity_types = get_configured_entity_types(db_session=db_session)

    active_entity_type_sources = {
        et.grounded_source_name
        for ets in configured_entity_types.values()
        for et in ets
        if et.active
    }

    # Update connectors that should be enabled
    db_session.execute(
        update(Connector)
        .where(
            Connector.source.in_(
                [
                    source
                    for source in DocumentSource
                    if source.value.lower() in active_entity_type_sources
                ]
            )
        )
        .where(~Connector.kg_processing_enabled)
        .values(kg_processing_enabled=True)
    )

    # Update connectors that should be disabled
    db_session.execute(
        update(Connector)
        .where(
            Connector.source.in_(
                [
                    source
                    for source in DocumentSource
                    if source.value.lower() not in active_entity_type_sources
                ]
            )
        )
        .where(Connector.kg_processing_enabled)
        .values(kg_processing_enabled=False)
    )

    db_session.commit()


================================================
FILE: backend/onyx/db/enums.py
================================================
from __future__ import annotations

from enum import Enum as PyEnum
from typing import ClassVar


class AccountType(str, PyEnum):
    """
    What kind of account this is — determines whether the user
    enters the group-based permission system.

    STANDARD + SERVICE_ACCOUNT → participate in group system
    BOT, EXT_PERM_USER, ANONYMOUS → fixed behavior
    """

    STANDARD = "STANDARD"
    BOT = "BOT"
    EXT_PERM_USER = "EXT_PERM_USER"
    SERVICE_ACCOUNT = "SERVICE_ACCOUNT"
    ANONYMOUS = "ANONYMOUS"

    def is_web_login(self) -> bool:
        """Whether this account type supports interactive web login."""
        return self not in (
            AccountType.BOT,
            AccountType.EXT_PERM_USER,
        )


class GrantSource(str, PyEnum):
    """How a permission grant was created."""

    USER = "USER"
    SCIM = "SCIM"
    SYSTEM = "SYSTEM"


class IndexingStatus(str, PyEnum):
    NOT_STARTED = "not_started"
    IN_PROGRESS = "in_progress"
    SUCCESS = "success"
    CANCELED = "canceled"
    FAILED = "failed"
    COMPLETED_WITH_ERRORS = "completed_with_errors"

    def is_terminal(self) -> bool:
        terminal_states = {
            IndexingStatus.SUCCESS,
            IndexingStatus.COMPLETED_WITH_ERRORS,
            IndexingStatus.CANCELED,
            IndexingStatus.FAILED,
        }
        return self in terminal_states

    def is_successful(self) -> bool:
        return (
            self == IndexingStatus.SUCCESS
            or self == IndexingStatus.COMPLETED_WITH_ERRORS
        )


class PermissionSyncStatus(str, PyEnum):
    """Status enum for permission sync attempts"""

    NOT_STARTED = "not_started"
    IN_PROGRESS = "in_progress"
    SUCCESS = "success"
    CANCELED = "canceled"
    FAILED = "failed"
    COMPLETED_WITH_ERRORS = "completed_with_errors"

    def is_terminal(self) -> bool:
        terminal_states = {
            PermissionSyncStatus.SUCCESS,
            PermissionSyncStatus.COMPLETED_WITH_ERRORS,
            PermissionSyncStatus.CANCELED,
            PermissionSyncStatus.FAILED,
        }
        return self in terminal_states

    def is_successful(self) -> bool:
        return (
            self == PermissionSyncStatus.SUCCESS
            or self == PermissionSyncStatus.COMPLETED_WITH_ERRORS
        )


class IndexingMode(str, PyEnum):
    UPDATE = "update"
    REINDEX = "reindex"


class ProcessingMode(str, PyEnum):
    """Determines how documents are processed after fetching."""

    REGULAR = "REGULAR"  # Full pipeline: chunk → embed → Vespa
    FILE_SYSTEM = "FILE_SYSTEM"  # Write to file system only (JSON documents)
    RAW_BINARY = "RAW_BINARY"  # Write raw binary to S3 (no text extraction)


class SyncType(str, PyEnum):
    DOCUMENT_SET = "document_set"
    USER_GROUP = "user_group"
    CONNECTOR_DELETION = "connector_deletion"
    PRUNING = "pruning"  # not really a sync, but close enough
    EXTERNAL_PERMISSIONS = "external_permissions"
    EXTERNAL_GROUP = "external_group"

    def __str__(self) -> str:
        return self.value


class SyncStatus(str, PyEnum):
    IN_PROGRESS = "in_progress"
    SUCCESS = "success"
    FAILED = "failed"
    CANCELED = "canceled"

    def is_terminal(self) -> bool:
        terminal_states = {
            SyncStatus.SUCCESS,
            SyncStatus.FAILED,
        }
        return self in terminal_states


class MCPAuthenticationType(str, PyEnum):
    NONE = "NONE"
    API_TOKEN = "API_TOKEN"
    OAUTH = "OAUTH"
    PT_OAUTH = "PT_OAUTH"  # Pass-Through OAuth


class MCPTransport(str, PyEnum):
    """MCP transport types"""

    STDIO = "STDIO"  # TODO: currently unsupported, need to add a user guide for setup
    SSE = "SSE"  # Server-Sent Events (deprecated but still used)
    STREAMABLE_HTTP = "STREAMABLE_HTTP"  # Modern HTTP streaming


class MCPAuthenticationPerformer(str, PyEnum):
    ADMIN = "ADMIN"
    PER_USER = "PER_USER"


class MCPServerStatus(str, PyEnum):
    CREATED = "CREATED"  # Server created, needs auth configuration
    AWAITING_AUTH = "AWAITING_AUTH"  # Auth configured, pending user authentication
    FETCHING_TOOLS = "FETCHING_TOOLS"  # Auth complete, fetching tools
    CONNECTED = "CONNECTED"  # Fully configured and connected
    DISCONNECTED = "DISCONNECTED"  # Server disconnected, but not deleted


# Consistent with Celery task statuses
class TaskStatus(str, PyEnum):
    PENDING = "PENDING"
    STARTED = "STARTED"
    SUCCESS = "SUCCESS"
    FAILURE = "FAILURE"


class IndexModelStatus(str, PyEnum):
    PAST = "PAST"
    PRESENT = "PRESENT"
    FUTURE = "FUTURE"

    def is_current(self) -> bool:
        return self == IndexModelStatus.PRESENT

    def is_future(self) -> bool:
        return self == IndexModelStatus.FUTURE


class ChatSessionSharedStatus(str, PyEnum):
    PUBLIC = "public"
    PRIVATE = "private"


class ConnectorCredentialPairStatus(str, PyEnum):
    SCHEDULED = "SCHEDULED"
    INITIAL_INDEXING = "INITIAL_INDEXING"
    ACTIVE = "ACTIVE"
    PAUSED = "PAUSED"
    DELETING = "DELETING"
    INVALID = "INVALID"

    @classmethod
    def active_statuses(cls) -> list["ConnectorCredentialPairStatus"]:
        return [
            ConnectorCredentialPairStatus.ACTIVE,
            ConnectorCredentialPairStatus.SCHEDULED,
            ConnectorCredentialPairStatus.INITIAL_INDEXING,
        ]

    @classmethod
    def indexable_statuses(self) -> list["ConnectorCredentialPairStatus"]:
        # Superset of active statuses for indexing model swaps
        return self.active_statuses() + [
            ConnectorCredentialPairStatus.PAUSED,
        ]

    def is_active(self) -> bool:
        return self in self.active_statuses()


class AccessType(str, PyEnum):
    PUBLIC = "public"
    PRIVATE = "private"
    SYNC = "sync"


class EmbeddingPrecision(str, PyEnum):
    # matches vespa tensor type
    # only support float / bfloat16 for now, since there's not a
    # good reason to specify anything else
    BFLOAT16 = "bfloat16"
    FLOAT = "float"


class UserFileStatus(str, PyEnum):
    PROCESSING = "PROCESSING"
    INDEXING = "INDEXING"
    COMPLETED = "COMPLETED"
    SKIPPED = "SKIPPED"
    FAILED = "FAILED"
    CANCELED = "CANCELED"
    DELETING = "DELETING"


class ThemePreference(str, PyEnum):
    LIGHT = "light"
    DARK = "dark"
    SYSTEM = "system"


class DefaultAppMode(str, PyEnum):
    AUTO = "AUTO"
    CHAT = "CHAT"
    SEARCH = "SEARCH"


class SwitchoverType(str, PyEnum):
    REINDEX = "reindex"
    ACTIVE_ONLY = "active_only"
    INSTANT = "instant"


class OpenSearchDocumentMigrationStatus(str, PyEnum):
    """Status for Vespa to OpenSearch migration per document."""

    PENDING = "pending"
    COMPLETED = "completed"
    FAILED = "failed"
    PERMANENTLY_FAILED = "permanently_failed"


class OpenSearchTenantMigrationStatus(str, PyEnum):
    """Status for tenant-level OpenSearch migration."""

    PENDING = "pending"
    COMPLETED = "completed"


# Onyx Build Mode Enums
class BuildSessionStatus(str, PyEnum):
    ACTIVE = "active"
    IDLE = "idle"


class SharingScope(str, PyEnum):
    PRIVATE = "private"
    PUBLIC_ORG = "public_org"
    PUBLIC_GLOBAL = "public_global"


class SandboxStatus(str, PyEnum):
    PROVISIONING = "provisioning"
    RUNNING = "running"
    SLEEPING = "sleeping"  # Pod terminated, snapshots saved to S3
    TERMINATED = "terminated"
    FAILED = "failed"

    def is_active(self) -> bool:
        """Check if sandbox is in an active state (running)."""
        return self == SandboxStatus.RUNNING

    def is_terminal(self) -> bool:
        """Check if sandbox is in a terminal state."""
        return self in (SandboxStatus.TERMINATED, SandboxStatus.FAILED)

    def is_sleeping(self) -> bool:
        """Check if sandbox is sleeping (pod terminated but can be restored)."""
        return self == SandboxStatus.SLEEPING


class ArtifactType(str, PyEnum):
    WEB_APP = "web_app"
    PPTX = "pptx"
    DOCX = "docx"
    IMAGE = "image"
    MARKDOWN = "markdown"
    EXCEL = "excel"


class HierarchyNodeType(str, PyEnum):
    """Types of hierarchy nodes across different sources"""

    # Generic
    FOLDER = "folder"

    # Root-level type
    SOURCE = "source"  # Root node for a source (e.g., "Google Drive")

    # Google Drive
    SHARED_DRIVE = "shared_drive"
    MY_DRIVE = "my_drive"

    # Confluence
    SPACE = "space"
    PAGE = "page"  # Confluence pages can be both hierarchy nodes AND documents

    # Jira
    PROJECT = "project"

    # Notion
    DATABASE = "database"
    WORKSPACE = "workspace"

    # Sharepoint
    SITE = "site"
    DRIVE = "drive"  # Document library within a site

    # Slack
    CHANNEL = "channel"


class LLMModelFlowType(str, PyEnum):
    CHAT = "chat"
    VISION = "vision"
    CONTEXTUAL_RAG = "contextual_rag"


class HookPoint(str, PyEnum):
    DOCUMENT_INGESTION = "document_ingestion"
    QUERY_PROCESSING = "query_processing"


class HookFailStrategy(str, PyEnum):
    HARD = "hard"  # exception propagates, pipeline aborts
    SOFT = "soft"  # log error, return original input, pipeline continues


class Permission(str, PyEnum):
    """
    Permission tokens for group-based authorization.
    19 tokens total. full_admin_panel_access is an override —
    if present, any permission check passes.
    """

    # Basic (auto-granted to every new group)
    BASIC_ACCESS = "basic"

    # Read tokens — implied only, never granted directly
    READ_CONNECTORS = "read:connectors"
    READ_DOCUMENT_SETS = "read:document_sets"
    READ_AGENTS = "read:agents"
    READ_USERS = "read:users"

    # Add / Manage pairs
    ADD_AGENTS = "add:agents"
    MANAGE_AGENTS = "manage:agents"
    MANAGE_DOCUMENT_SETS = "manage:document_sets"
    ADD_CONNECTORS = "add:connectors"
    MANAGE_CONNECTORS = "manage:connectors"
    MANAGE_LLMS = "manage:llms"

    # Toggle tokens
    READ_AGENT_ANALYTICS = "read:agent_analytics"
    MANAGE_ACTIONS = "manage:actions"
    READ_QUERY_HISTORY = "read:query_history"
    MANAGE_USER_GROUPS = "manage:user_groups"
    CREATE_USER_API_KEYS = "create:user_api_keys"
    CREATE_SERVICE_ACCOUNT_API_KEYS = "create:service_account_api_keys"
    CREATE_SLACK_DISCORD_BOTS = "create:slack_discord_bots"

    # Override — any permission check passes
    FULL_ADMIN_PANEL_ACCESS = "admin"

    # Permissions that are implied by other grants and must never be stored
    # directly in the permission_grant table.
    IMPLIED: ClassVar[frozenset[Permission]]


Permission.IMPLIED = frozenset(
    {
        Permission.READ_CONNECTORS,
        Permission.READ_DOCUMENT_SETS,
        Permission.READ_AGENTS,
        Permission.READ_USERS,
    }
)


================================================
FILE: backend/onyx/db/federated.py
================================================
from datetime import datetime
from typing import Any
from uuid import UUID

from sqlalchemy import select
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.configs.constants import FederatedConnectorSource
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import DocumentSet
from onyx.db.models import FederatedConnector
from onyx.db.models import FederatedConnector__DocumentSet
from onyx.db.models import FederatedConnectorOAuthToken
from onyx.federated_connectors.factory import get_federated_connector
from onyx.utils.logger import setup_logger

logger = setup_logger()


def fetch_federated_connector_by_id(
    federated_connector_id: int, db_session: Session
) -> FederatedConnector | None:
    """Fetch a federated connector by its ID."""
    stmt = select(FederatedConnector).where(
        FederatedConnector.id == federated_connector_id
    )
    result = db_session.execute(stmt)
    return result.scalar_one_or_none()


def fetch_all_federated_connectors(db_session: Session) -> list[FederatedConnector]:
    """Fetch all federated connectors with their OAuth tokens and document sets."""
    stmt = select(FederatedConnector).options(
        selectinload(FederatedConnector.oauth_tokens),
        selectinload(FederatedConnector.document_sets),
    )
    result = db_session.execute(stmt)
    return list(result.scalars().all())


def fetch_all_federated_connectors_parallel() -> list[FederatedConnector]:
    with get_session_with_current_tenant() as db_session:
        return fetch_all_federated_connectors(db_session)


def validate_federated_connector_credentials(
    source: FederatedConnectorSource,
    credentials: dict[str, Any],
) -> bool:
    """Validate credentials for a federated connector using the connector's validation logic."""
    try:
        # the initialization will fail if the credentials are invalid
        get_federated_connector(source, credentials)
        return True
    except Exception as e:
        logger.error(f"Error validating credentials for source {source}: {e}")
        return False


def create_federated_connector(
    db_session: Session,
    source: FederatedConnectorSource,
    credentials: dict[str, Any],
    config: dict[str, Any] | None = None,
) -> FederatedConnector:
    """Create a new federated connector with credential and config validation."""
    # Validate credentials before creating
    if not validate_federated_connector_credentials(source, credentials):
        raise ValueError(
            f"Invalid credentials for federated connector source: {source}"
        )

    # Validate config using connector-specific validation
    if config:
        try:
            # Get connector instance to access validate_config method
            connector = get_federated_connector(source, credentials)
            if not connector.validate_config(config):
                raise ValueError(
                    f"Invalid config for federated connector source: {source}"
                )
        except Exception as e:
            raise ValueError(f"Config validation failed for {source}: {str(e)}")

    federated_connector = FederatedConnector(
        source=source,
        credentials=credentials,
        config=config or {},
    )
    db_session.add(federated_connector)
    db_session.commit()
    return federated_connector


def update_federated_connector_oauth_token(
    db_session: Session,
    federated_connector_id: int,
    user_id: UUID,
    token: str,
    expires_at: datetime | None = None,
) -> FederatedConnectorOAuthToken:
    """Update or create OAuth token for a federated connector and user."""
    # First, try to find existing token for this user and connector
    stmt = select(FederatedConnectorOAuthToken).where(
        FederatedConnectorOAuthToken.federated_connector_id == federated_connector_id,
        FederatedConnectorOAuthToken.user_id == user_id,
    )
    existing_token = db_session.execute(stmt).scalar_one_or_none()

    if existing_token:
        # Update existing token
        existing_token.token = token  # type: ignore[assignment]
        existing_token.expires_at = expires_at
        db_session.commit()
        return existing_token
    else:
        # Create new token
        oauth_token = FederatedConnectorOAuthToken(
            federated_connector_id=federated_connector_id,
            user_id=user_id,
            token=token,
            expires_at=expires_at,
        )
        db_session.add(oauth_token)
        db_session.commit()
        return oauth_token


def get_federated_connector_oauth_token(
    db_session: Session,
    federated_connector_id: int,
    user_id: UUID,
) -> FederatedConnectorOAuthToken | None:
    """Get OAuth token for a federated connector and user."""
    stmt = select(FederatedConnectorOAuthToken).where(
        FederatedConnectorOAuthToken.federated_connector_id == federated_connector_id,
        FederatedConnectorOAuthToken.user_id == user_id,
    )
    result = db_session.execute(stmt)
    return result.scalar_one_or_none()


def list_federated_connector_oauth_tokens(
    db_session: Session,
    user_id: UUID,
) -> list[FederatedConnectorOAuthToken]:
    """List all OAuth tokens for all federated connectors."""
    stmt = (
        select(FederatedConnectorOAuthToken)
        .where(
            FederatedConnectorOAuthToken.user_id == user_id,
        )
        .options(
            joinedload(FederatedConnectorOAuthToken.federated_connector),
        )
    )
    result = db_session.scalars(stmt)
    return list(result)


def create_federated_connector_document_set_mapping(
    db_session: Session,
    federated_connector_id: int,
    document_set_id: int,
    entities: dict[str, Any],
) -> FederatedConnector__DocumentSet:
    """Create a mapping between federated connector and document set with entities."""
    mapping = FederatedConnector__DocumentSet(
        federated_connector_id=federated_connector_id,
        document_set_id=document_set_id,
        entities=entities,
    )
    db_session.add(mapping)
    db_session.commit()
    return mapping


def update_federated_connector_document_set_entities(
    db_session: Session,
    federated_connector_id: int,
    document_set_id: int,
    entities: dict[str, Any],
) -> FederatedConnector__DocumentSet | None:
    """Update entities for a federated connector document set mapping."""
    stmt = select(FederatedConnector__DocumentSet).where(
        FederatedConnector__DocumentSet.federated_connector_id
        == federated_connector_id,
        FederatedConnector__DocumentSet.document_set_id == document_set_id,
    )
    mapping = db_session.execute(stmt).scalar_one_or_none()

    if mapping:
        mapping.entities = entities
        db_session.commit()
        return mapping

    return None


def get_federated_connector_document_set_mappings(
    db_session: Session,
    federated_connector_id: int,
) -> list[FederatedConnector__DocumentSet]:
    """Get all document set mappings for a federated connector."""
    stmt = select(FederatedConnector__DocumentSet).where(
        FederatedConnector__DocumentSet.federated_connector_id == federated_connector_id
    )
    result = db_session.execute(stmt)
    return list(result.scalars().all())


def delete_federated_connector_document_set_mapping(
    db_session: Session,
    federated_connector_id: int,
    document_set_id: int,
) -> bool:
    """Delete a federated connector document set mapping."""
    stmt = select(FederatedConnector__DocumentSet).where(
        FederatedConnector__DocumentSet.federated_connector_id
        == federated_connector_id,
        FederatedConnector__DocumentSet.document_set_id == document_set_id,
    )
    mapping = db_session.execute(stmt).scalar_one_or_none()

    if mapping:
        db_session.delete(mapping)
        db_session.commit()
        return True

    return False


def get_federated_connector_document_set_mappings_by_document_set_names(
    db_session: Session,
    document_set_names: list[str],
) -> list[FederatedConnector__DocumentSet]:
    """Get all document set mappings for a federated connector by document set names."""
    stmt = (
        select(FederatedConnector__DocumentSet)
        .join(
            DocumentSet,
            FederatedConnector__DocumentSet.document_set_id == DocumentSet.id,
        )
        .options(joinedload(FederatedConnector__DocumentSet.federated_connector))
        .where(DocumentSet.name.in_(document_set_names))
    )
    result = db_session.scalars(stmt)
    # Use unique() because joinedload can cause duplicate rows
    return list(result.unique())


def update_federated_connector(
    db_session: Session,
    federated_connector_id: int,
    credentials: dict[str, Any] | None = None,
    config: dict[str, Any] | None = None,
) -> FederatedConnector | None:
    """Update a federated connector with credential and config validation."""
    federated_connector = fetch_federated_connector_by_id(
        federated_connector_id, db_session
    )
    if not federated_connector:
        return None

    # Use provided credentials if updating them, otherwise use existing credentials
    # This is needed to instantiate the connector for config validation when only config is being updated
    creds_to_use = (
        credentials
        if credentials is not None
        else (
            federated_connector.credentials.get_value(apply_mask=False)
            if federated_connector.credentials
            else {}
        )
    )

    if credentials is not None:
        # Validate credentials before updating
        if not validate_federated_connector_credentials(
            federated_connector.source, credentials
        ):
            raise ValueError(
                f"Invalid credentials for federated connector source: {federated_connector.source}"
            )
        federated_connector.credentials = credentials  # type: ignore[assignment]

    if config is not None:
        # Validate config using connector-specific validation
        try:
            # Get connector instance to access validate_config method
            connector = get_federated_connector(
                federated_connector.source, creds_to_use
            )
            if not connector.validate_config(config):
                raise ValueError(
                    f"Invalid config for federated connector source: {federated_connector.source}"
                )
        except Exception as e:
            raise ValueError(
                f"Config validation failed for {federated_connector.source}: {str(e)}"
            )
        federated_connector.config = config

    db_session.commit()
    return federated_connector


def delete_federated_connector(
    db_session: Session,
    federated_connector_id: int,
) -> bool:
    """Delete a federated connector and all its related data."""
    federated_connector = fetch_federated_connector_by_id(
        federated_connector_id, db_session
    )
    if not federated_connector:
        return False

    # Delete related OAuth tokens (cascade should handle this)
    # Delete related document set mappings (cascade should handle this)
    db_session.delete(federated_connector)
    db_session.commit()
    return True


================================================
FILE: backend/onyx/db/feedback.py
================================================
from datetime import datetime
from datetime import timezone
from uuid import UUID

from fastapi import HTTPException
from sqlalchemy import and_
from sqlalchemy import asc
from sqlalchemy import delete
from sqlalchemy import desc
from sqlalchemy import exists
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session

from onyx.configs.constants import MessageType
from onyx.configs.constants import SearchFeedbackType
from onyx.db.chat import get_chat_message
from onyx.db.enums import AccessType
from onyx.db.models import ChatMessageFeedback
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document as DbDocument
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import DocumentRetrievalFeedback
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup__ConnectorCredentialPair
from onyx.db.models import UserRole
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _fetch_db_doc_by_id(doc_id: str, db_session: Session) -> DbDocument:
    stmt = select(DbDocument).where(DbDocument.id == doc_id)
    result = db_session.execute(stmt)
    doc = result.scalar_one_or_none()

    if not doc:
        raise ValueError("Invalid Document ID Provided")

    return doc


def _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Select:
    if user.role == UserRole.ADMIN:
        return stmt

    stmt = stmt.distinct()
    DocByCC = aliased(DocumentByConnectorCredentialPair)
    CCPair = aliased(ConnectorCredentialPair)
    UG__CCpair = aliased(UserGroup__ConnectorCredentialPair)
    User__UG = aliased(User__UserGroup)

    """
    Here we select documents by relation:
    User -> User__UserGroup -> UserGroup__ConnectorCredentialPair ->
    ConnectorCredentialPair -> DocumentByConnectorCredentialPair -> Document
    """
    stmt = (
        stmt.outerjoin(DocByCC, DocByCC.id == DbDocument.id)
        .outerjoin(
            CCPair,
            and_(
                CCPair.connector_id == DocByCC.connector_id,
                CCPair.credential_id == DocByCC.credential_id,
            ),
        )
        .outerjoin(UG__CCpair, UG__CCpair.cc_pair_id == CCPair.id)
        .outerjoin(User__UG, User__UG.user_group_id == UG__CCpair.user_group_id)
    )

    """
    Filter Documents by:
    - if the user is in the user_group that owns the object
    - if the user is not a global_curator, they must also have a curator relationship
    to the user_group
    - if editing is being done, we also filter out objects that are owned by groups
    that the user isn't a curator for
    - if we are not editing, we show all objects in the groups the user is a curator
    for (as well as public objects as well)
    """

    # Anonymous users only see public documents
    if user.is_anonymous:
        where_clause = CCPair.access_type == AccessType.PUBLIC
        return stmt.where(where_clause)

    where_clause = User__UG.user_id == user.id
    if user.role == UserRole.CURATOR and get_editable:
        where_clause &= User__UG.is_curator == True  # noqa: E712
    if get_editable:
        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)
        where_clause &= (
            ~exists()
            .where(UG__CCpair.cc_pair_id == CCPair.id)
            .where(~UG__CCpair.user_group_id.in_(user_groups))
            .correlate(CCPair)
        )
    else:
        where_clause |= CCPair.access_type == AccessType.PUBLIC

    return stmt.where(where_clause)


def fetch_docs_ranked_by_boost_for_user(
    db_session: Session,
    user: User,
    ascending: bool = False,
    limit: int = 100,
) -> list[DbDocument]:
    order_func = asc if ascending else desc
    stmt = select(DbDocument)

    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=False)

    stmt = stmt.order_by(
        order_func(DbDocument.boost), order_func(DbDocument.semantic_id)
    )
    stmt = stmt.limit(limit)
    result = db_session.execute(stmt)
    doc_list = result.scalars().all()

    return list(doc_list)


def update_document_boost_for_user(
    db_session: Session,
    document_id: str,
    boost: int,
    user: User,
) -> None:
    stmt = select(DbDocument).where(DbDocument.id == document_id)
    stmt = _add_user_filters(stmt, user, get_editable=True)
    result: DbDocument | None = db_session.execute(stmt).scalar_one_or_none()
    if result is None:
        raise HTTPException(
            status_code=400, detail="Document is not editable by this user"
        )

    result.boost = boost

    # updating last_modified triggers sync
    # TODO: Should this submit to the queue directly so that the UI can update?
    result.last_modified = datetime.now(timezone.utc)
    db_session.commit()


def update_document_hidden_for_user(
    db_session: Session,
    document_id: str,
    hidden: bool,
    user: User,
) -> None:
    stmt = select(DbDocument).where(DbDocument.id == document_id)
    stmt = _add_user_filters(stmt, user, get_editable=True)
    result = db_session.execute(stmt).scalar_one_or_none()
    if result is None:
        raise HTTPException(
            status_code=400, detail="Document is not editable by this user"
        )

    result.hidden = hidden

    # updating last_modified triggers sync
    # TODO: Should this submit to the queue directly so that the UI can update?
    result.last_modified = datetime.now(timezone.utc)
    db_session.commit()


def create_doc_retrieval_feedback(
    message_id: int,
    document_id: str,
    document_rank: int,
    db_session: Session,
    clicked: bool = False,
    feedback: SearchFeedbackType | None = None,
) -> None:
    """Creates a new Document feedback row and updates the boost value in Postgres and Vespa"""
    db_doc = _fetch_db_doc_by_id(document_id, db_session)

    retrieval_feedback = DocumentRetrievalFeedback(
        chat_message_id=message_id,
        document_id=document_id,
        document_rank=document_rank,
        clicked=clicked,
        feedback=feedback,
    )

    if feedback is not None:
        if feedback == SearchFeedbackType.ENDORSE:
            db_doc.boost += 1
        elif feedback == SearchFeedbackType.REJECT:
            db_doc.boost -= 1
        elif feedback == SearchFeedbackType.HIDE:
            db_doc.hidden = True
        elif feedback == SearchFeedbackType.UNHIDE:
            db_doc.hidden = False
        else:
            raise ValueError("Unhandled document feedback type")

    if feedback in [
        SearchFeedbackType.ENDORSE,
        SearchFeedbackType.REJECT,
        SearchFeedbackType.HIDE,
    ]:
        # updating last_modified triggers sync
        # TODO: Should this submit to the queue directly so that the UI can update?
        db_doc.last_modified = datetime.now(timezone.utc)

    db_session.add(retrieval_feedback)
    db_session.commit()


def delete_document_feedback_for_documents__no_commit(
    document_ids: list[str], db_session: Session
) -> None:
    """NOTE: does not commit transaction so that this can be used as part of a
    larger transaction block."""
    stmt = delete(DocumentRetrievalFeedback).where(
        DocumentRetrievalFeedback.document_id.in_(document_ids)
    )
    db_session.execute(stmt)


def create_chat_message_feedback(
    is_positive: bool | None,
    feedback_text: str | None,
    chat_message_id: int,
    user_id: UUID | None,
    db_session: Session,
    # Slack user requested help from human
    required_followup: bool | None = None,
    predefined_feedback: str | None = None,  # Added predefined_feedback parameter
) -> None:
    if (
        is_positive is None
        and feedback_text is None
        and required_followup is None
        and predefined_feedback is None
    ):
        raise ValueError("No feedback provided")

    chat_message = get_chat_message(
        chat_message_id=chat_message_id, user_id=user_id, db_session=db_session
    )

    if chat_message.message_type != MessageType.ASSISTANT:
        raise ValueError("Can only provide feedback on LLM Outputs")

    message_feedback = ChatMessageFeedback(
        chat_message_id=chat_message_id,
        is_positive=is_positive,
        feedback_text=feedback_text,
        required_followup=required_followup,
        predefined_feedback=predefined_feedback,
    )

    db_session.add(message_feedback)
    db_session.commit()


def remove_chat_message_feedback(
    chat_message_id: int,
    user_id: UUID | None,
    db_session: Session,
) -> None:
    """Remove all feedback for a chat message."""
    chat_message = get_chat_message(
        chat_message_id=chat_message_id, user_id=user_id, db_session=db_session
    )

    if chat_message.message_type != MessageType.ASSISTANT:
        raise ValueError("Can only remove feedback from LLM Outputs")

    # Delete all feedback for this message
    db_session.query(ChatMessageFeedback).filter(
        ChatMessageFeedback.chat_message_id == chat_message_id
    ).delete()

    db_session.commit()


================================================
FILE: backend/onyx/db/file_content.py
================================================
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import Session

from onyx.db.models import FileContent


def get_file_content_by_file_id(
    file_id: str,
    db_session: Session,
) -> FileContent:
    record = db_session.query(FileContent).filter_by(file_id=file_id).first()
    if not record:
        raise RuntimeError(
            f"File content for file_id {file_id} does not exist or was deleted"
        )
    return record


def get_file_content_by_file_id_optional(
    file_id: str,
    db_session: Session,
) -> FileContent | None:
    return db_session.query(FileContent).filter_by(file_id=file_id).first()


def upsert_file_content(
    file_id: str,
    lobj_oid: int,
    file_size: int,
    db_session: Session,
) -> FileContent:
    """Atomic upsert using INSERT ... ON CONFLICT DO UPDATE to avoid
    race conditions when concurrent calls target the same file_id."""
    stmt = insert(FileContent).values(
        file_id=file_id,
        lobj_oid=lobj_oid,
        file_size=file_size,
    )
    stmt = stmt.on_conflict_do_update(
        index_elements=[FileContent.file_id],
        set_={
            "lobj_oid": stmt.excluded.lobj_oid,
            "file_size": stmt.excluded.file_size,
        },
    )
    db_session.execute(stmt)

    # Return the merged ORM instance so callers can inspect the result
    return db_session.get(FileContent, file_id)  # type: ignore[return-value]


def transfer_file_content_file_id(
    old_file_id: str,
    new_file_id: str,
    db_session: Session,
) -> None:
    """Move a file_content row from old_file_id to new_file_id in-place.

    This avoids creating a duplicate row that shares the same Large Object OID,
    keeping OID ownership unique at all times.  The caller must ensure that
    new_file_id already exists in file_record (FK target)."""
    rows = (
        db_session.query(FileContent)
        .filter_by(file_id=old_file_id)
        .update({"file_id": new_file_id})
    )
    if not rows:
        raise RuntimeError(
            f"File content for file_id {old_file_id} does not exist or was deleted"
        )


def delete_file_content_by_file_id(
    file_id: str,
    db_session: Session,
) -> None:
    db_session.query(FileContent).filter_by(file_id=file_id).delete()


================================================
FILE: backend/onyx/db/file_record.py
================================================
from sqlalchemy import and_
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import Session

from onyx.background.task_utils import QUERY_REPORT_NAME_PREFIX
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import FileType
from onyx.db.models import FileRecord


def get_query_history_export_files(
    db_session: Session,
) -> list[FileRecord]:
    return list(
        db_session.scalars(
            select(FileRecord).where(
                and_(
                    FileRecord.file_id.like(f"{QUERY_REPORT_NAME_PREFIX}-%"),
                    FileRecord.file_type == FileType.CSV,
                    FileRecord.file_origin == FileOrigin.QUERY_HISTORY_CSV,
                )
            )
        )
    )


def get_filerecord_by_file_id_optional(
    file_id: str,
    db_session: Session,
) -> FileRecord | None:
    return db_session.query(FileRecord).filter_by(file_id=file_id).first()


def get_filerecord_by_file_id(
    file_id: str,
    db_session: Session,
) -> FileRecord:
    filestore = db_session.query(FileRecord).filter_by(file_id=file_id).first()

    if not filestore:
        raise RuntimeError(f"File by id {file_id} does not exist or was deleted")

    return filestore


def get_filerecord_by_prefix(
    prefix: str,
    db_session: Session,
) -> list[FileRecord]:
    if not prefix:
        return db_session.query(FileRecord).all()
    return (
        db_session.query(FileRecord).filter(FileRecord.file_id.like(f"{prefix}%")).all()
    )


def delete_filerecord_by_file_id(
    file_id: str,
    db_session: Session,
) -> None:
    db_session.query(FileRecord).filter_by(file_id=file_id).delete()


def upsert_filerecord(
    file_id: str,
    display_name: str,
    file_origin: FileOrigin,
    file_type: str,
    bucket_name: str,
    object_key: str,
    db_session: Session,
    file_metadata: dict | None = None,
) -> FileRecord:
    """Atomic upsert using INSERT ... ON CONFLICT DO UPDATE to avoid
    race conditions when concurrent calls target the same file_id."""
    stmt = insert(FileRecord).values(
        file_id=file_id,
        display_name=display_name,
        file_origin=file_origin,
        file_type=file_type,
        file_metadata=file_metadata,
        bucket_name=bucket_name,
        object_key=object_key,
    )
    stmt = stmt.on_conflict_do_update(
        index_elements=[FileRecord.file_id],
        set_={
            "display_name": stmt.excluded.display_name,
            "file_origin": stmt.excluded.file_origin,
            "file_type": stmt.excluded.file_type,
            "file_metadata": stmt.excluded.file_metadata,
            "bucket_name": stmt.excluded.bucket_name,
            "object_key": stmt.excluded.object_key,
        },
    )
    db_session.execute(stmt)

    return db_session.get(FileRecord, file_id)  # type: ignore[return-value]


================================================
FILE: backend/onyx/db/hierarchy.py
================================================
"""CRUD operations for HierarchyNode."""

from collections import defaultdict

from sqlalchemy import delete
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.engine import CursorResult
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import HierarchyNode as PydanticHierarchyNode
from onyx.db.enums import HierarchyNodeType
from onyx.db.models import Document
from onyx.db.models import HierarchyNode
from onyx.db.models import HierarchyNodeByConnectorCredentialPair
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation

logger = setup_logger()

# Sources where hierarchy nodes can also be documents.
# For these sources, pages/items can be both a hierarchy node (with children)
# AND a document with indexed content. For example:
# - Notion: Pages with child pages are hierarchy nodes, but also documents
# - Confluence: Pages can have child pages and also contain content
# Other sources like Google Drive have folders as hierarchy nodes, but folders
# are not documents themselves.
SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS: set[DocumentSource] = {
    DocumentSource.NOTION,
    DocumentSource.CONFLUENCE,
}


def _get_source_display_name(source: DocumentSource) -> str:
    """Get a human-readable display name for a source type."""
    return source.value.replace("_", " ").title()


def get_hierarchy_node_by_raw_id(
    db_session: Session,
    raw_node_id: str,
    source: DocumentSource,
) -> HierarchyNode | None:
    """Get a hierarchy node by its raw ID and source."""
    stmt = select(HierarchyNode).where(
        HierarchyNode.raw_node_id == raw_node_id,
        HierarchyNode.source == source,
    )
    return db_session.execute(stmt).scalar_one_or_none()


def get_source_hierarchy_node(
    db_session: Session,
    source: DocumentSource,
) -> HierarchyNode | None:
    """Get the SOURCE-type root node for a given source."""
    stmt = select(HierarchyNode).where(
        HierarchyNode.source == source,
        HierarchyNode.node_type == HierarchyNodeType.SOURCE,
    )
    return db_session.execute(stmt).scalar_one_or_none()


def ensure_source_node_exists(
    db_session: Session,
    source: DocumentSource,
    commit: bool = True,
) -> HierarchyNode:
    """
    Ensure that a SOURCE-type root node exists for the given source.

    This function is idempotent - it will return the existing SOURCE node if one
    exists, or create a new one if not.

    The SOURCE node is the root of the hierarchy tree for a given source type
    (e.g., "Google Drive", "Confluence"). All other hierarchy nodes for that
    source should ultimately have this node as an ancestor.

    For the SOURCE node:
    - raw_node_id is set to the source name (e.g., "google_drive")
    - parent_id is None (it's the root)
    - display_name is a human-readable version (e.g., "Google Drive")

    Args:
        db_session: SQLAlchemy session
        source: The document source type
        commit: Whether to commit the transaction

    Returns:
        The existing or newly created SOURCE-type HierarchyNode
    """
    # Try to get existing SOURCE node first
    existing_node = get_source_hierarchy_node(db_session, source)
    if existing_node:
        return existing_node

    # Create the SOURCE node
    display_name = _get_source_display_name(source)

    source_node = HierarchyNode(
        raw_node_id=source.value,  # Use source name as raw_node_id
        display_name=display_name,
        link=None,
        source=source,
        node_type=HierarchyNodeType.SOURCE,
        document_id=None,
        parent_id=None,  # SOURCE nodes have no parent
    )

    db_session.add(source_node)

    # Flush to get the ID and detect any race conditions
    try:
        db_session.flush()
    except Exception:
        # Race condition - another worker created it. Roll back and fetch.
        db_session.rollback()
        existing_node = get_source_hierarchy_node(db_session, source)
        if existing_node:
            return existing_node
        # If still not found, re-raise the original exception
        raise

    if commit:
        db_session.commit()

    logger.info(
        f"Created SOURCE hierarchy node for {source.value}: id={source_node.id}, display_name={display_name}"
    )

    return source_node


def resolve_parent_hierarchy_node_id(
    db_session: Session,
    raw_parent_id: str | None,
    source: DocumentSource,
) -> int | None:
    """
    Resolve a raw_parent_id to a database HierarchyNode ID.

    If raw_parent_id is None, returns the SOURCE node ID for backward compatibility.
    If the parent node doesn't exist, returns the SOURCE node ID as fallback.
    """
    if raw_parent_id is None:
        # No parent specified - use the SOURCE node
        source_node = get_source_hierarchy_node(db_session, source)
        return source_node.id if source_node else None

    parent_node = get_hierarchy_node_by_raw_id(db_session, raw_parent_id, source)
    if parent_node:
        return parent_node.id

    # Parent not found - fall back to SOURCE node
    logger.warning(
        f"Parent hierarchy node not found: raw_id={raw_parent_id}, source={source}. Falling back to SOURCE node."
    )
    source_node = get_source_hierarchy_node(db_session, source)
    return source_node.id if source_node else None


def upsert_parents(
    db_session: Session,
    node: PydanticHierarchyNode,
    source: DocumentSource,
    node_by_id: dict[str, PydanticHierarchyNode],
    done_ids: set[str],
    is_connector_public: bool = False,
) -> None:
    """
    Upsert the parents of a hierarchy node.
    """
    if (
        node.node_type == HierarchyNodeType.SOURCE
        or (node.raw_parent_id not in node_by_id)
        or (node.raw_parent_id in done_ids)
    ):
        return
    parent_node = node_by_id[node.raw_parent_id]
    upsert_parents(
        db_session,
        parent_node,
        source,
        node_by_id,
        done_ids,
        is_connector_public=is_connector_public,
    )
    upsert_hierarchy_node(
        db_session,
        parent_node,
        source,
        commit=False,
        is_connector_public=is_connector_public,
    )
    done_ids.add(parent_node.raw_node_id)


def upsert_hierarchy_node(
    db_session: Session,
    node: PydanticHierarchyNode,
    source: DocumentSource,
    commit: bool = True,
    is_connector_public: bool = False,
) -> HierarchyNode:
    """
    Upsert a hierarchy node from a Pydantic model.

    If a node with the same raw_node_id and source exists, updates it.
    Otherwise, creates a new node.

    Args:
        db_session: SQLAlchemy session
        node: The Pydantic hierarchy node to upsert
        source: Document source type
        commit: Whether to commit the transaction
        is_connector_public: If True, the connector is public (organization-wide access)
            and all hierarchy nodes should be marked as public regardless of their
            external_access settings. This ensures nodes from public connectors are
            accessible to all users.
    """
    # Resolve parent_id from raw_parent_id
    parent_id = (
        None
        if node.node_type == HierarchyNodeType.SOURCE
        else resolve_parent_hierarchy_node_id(db_session, node.raw_parent_id, source)
    )

    # For public connectors, all nodes are public
    # Otherwise, extract permission fields from external_access if present
    if is_connector_public:
        is_public = True
        external_user_emails: list[str] | None = None
        external_user_group_ids: list[str] | None = None
    elif node.external_access:
        is_public = node.external_access.is_public
        external_user_emails = (
            list(node.external_access.external_user_emails)
            if node.external_access.external_user_emails
            else None
        )
        external_user_group_ids = (
            list(node.external_access.external_user_group_ids)
            if node.external_access.external_user_group_ids
            else None
        )
    else:
        is_public = False
        external_user_emails = None
        external_user_group_ids = None

    # Check if node already exists
    existing_node = get_hierarchy_node_by_raw_id(db_session, node.raw_node_id, source)

    if existing_node:
        # Update existing node
        existing_node.display_name = node.display_name
        existing_node.link = node.link
        existing_node.node_type = node.node_type
        existing_node.parent_id = parent_id
        # Update permission fields
        existing_node.is_public = is_public
        existing_node.external_user_emails = external_user_emails
        existing_node.external_user_group_ids = external_user_group_ids
        hierarchy_node = existing_node
    else:
        # Create new node
        hierarchy_node = HierarchyNode(
            raw_node_id=node.raw_node_id,
            display_name=node.display_name,
            link=node.link,
            source=source,
            node_type=node.node_type,
            parent_id=parent_id,
            is_public=is_public,
            external_user_emails=external_user_emails,
            external_user_group_ids=external_user_group_ids,
        )
        db_session.add(hierarchy_node)

    if commit:
        db_session.commit()
    else:
        db_session.flush()

    return hierarchy_node


def upsert_hierarchy_nodes_batch(
    db_session: Session,
    nodes: list[PydanticHierarchyNode],
    source: DocumentSource,
    commit: bool = True,
    is_connector_public: bool = False,
) -> list[HierarchyNode]:
    """
    Batch upsert hierarchy nodes.

    Note: This function requires that for each node passed in, all
    its ancestors exist in either the database or elsewhere in the nodes list.
    This function handles parent dependencies for you as long as that condition is met
    (so you don't need to worry about parent nodes appearing before their children in the list).

    Args:
        db_session: SQLAlchemy session
        nodes: List of Pydantic hierarchy nodes to upsert
        source: Document source type
        commit: Whether to commit the transaction
        is_connector_public: If True, the connector is public (organization-wide access)
            and all hierarchy nodes should be marked as public regardless of their
            external_access settings.
    """
    node_by_id = {}
    for node in nodes:
        if node.node_type != HierarchyNodeType.SOURCE:
            node_by_id[node.raw_node_id] = node
    done_ids = set[str]()

    results = []
    for node in nodes:
        if node.raw_node_id in done_ids:
            continue
        upsert_parents(
            db_session,
            node,
            source,
            node_by_id,
            done_ids,
            is_connector_public=is_connector_public,
        )
        hierarchy_node = upsert_hierarchy_node(
            db_session,
            node,
            source,
            commit=False,
            is_connector_public=is_connector_public,
        )
        done_ids.add(node.raw_node_id)
        results.append(hierarchy_node)

    if commit:
        db_session.commit()

    return results


def link_hierarchy_nodes_to_documents(
    db_session: Session,
    document_ids: list[str],
    source: DocumentSource,
    commit: bool = True,
) -> int:
    """
    Link hierarchy nodes to their corresponding documents.

    For connectors like Notion and Confluence where pages can be both hierarchy nodes
    AND documents, we need to set the document_id field on hierarchy nodes after the
    documents are created. This is because hierarchy nodes are processed before documents,
    and the FK constraint on document_id requires the document to exist first.

    Args:
        db_session: SQLAlchemy session
        document_ids: List of document IDs that were just created/updated
        source: The document source (e.g., NOTION, CONFLUENCE)
        commit: Whether to commit the transaction

    Returns:
        Number of hierarchy nodes that were linked to documents
    """
    # Skip for sources where hierarchy nodes cannot also be documents
    if source not in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS:
        return 0

    if not document_ids:
        return 0

    # Find hierarchy nodes where raw_node_id matches a document_id
    # These are pages that are both hierarchy nodes and documents
    stmt = select(HierarchyNode).where(
        HierarchyNode.source == source,
        HierarchyNode.raw_node_id.in_(document_ids),
        HierarchyNode.document_id.is_(None),  # Only update if not already linked
    )
    nodes_to_update = list(db_session.execute(stmt).scalars().all())

    # Update document_id for each matching node
    for node in nodes_to_update:
        node.document_id = node.raw_node_id

    if commit:
        db_session.commit()

    if nodes_to_update:
        logger.debug(
            f"Linked {len(nodes_to_update)} hierarchy nodes to documents for source {source.value}"
        )

    return len(nodes_to_update)


def get_hierarchy_node_children(
    db_session: Session,
    parent_id: int,
    limit: int = 100,
    offset: int = 0,
) -> list[HierarchyNode]:
    """Get children of a hierarchy node, paginated."""
    stmt = (
        select(HierarchyNode)
        .where(HierarchyNode.parent_id == parent_id)
        .order_by(HierarchyNode.display_name)
        .limit(limit)
        .offset(offset)
    )
    return list(db_session.execute(stmt).scalars().all())


def get_hierarchy_node_by_id(
    db_session: Session,
    node_id: int,
) -> HierarchyNode | None:
    """Get a hierarchy node by its database ID."""
    return db_session.get(HierarchyNode, node_id)


def get_root_hierarchy_nodes_for_source(
    db_session: Session,
    source: DocumentSource,
) -> list[HierarchyNode]:
    """Get all root-level hierarchy nodes for a source (children of SOURCE node)."""
    source_node = get_source_hierarchy_node(db_session, source)
    if not source_node:
        return []

    return get_hierarchy_node_children(db_session, source_node.id)


def get_all_hierarchy_nodes_for_source(
    db_session: Session,
    source: DocumentSource,
) -> list[HierarchyNode]:
    """
    Get ALL hierarchy nodes for a given source.

    This is used to populate the Redis cache. Returns all nodes including
    the SOURCE-type root node.

    Args:
        db_session: SQLAlchemy session
        source: The document source to get nodes for

    Returns:
        List of all HierarchyNode objects for the source
    """
    stmt = select(HierarchyNode).where(HierarchyNode.source == source)
    return list(db_session.execute(stmt).scalars().all())


def _get_accessible_hierarchy_nodes_for_source(
    db_session: Session,
    source: DocumentSource,
    user_email: str,  # noqa: ARG001
    external_group_ids: list[str],  # noqa: ARG001
) -> list[HierarchyNode]:
    """
    MIT version: Returns all hierarchy nodes for the source without permission filtering.

    In the MIT version, permission checks are not performed on hierarchy nodes.
    The EE version overrides this to apply permission filtering based on user
    email and external group IDs.

    Args:
        db_session: SQLAlchemy session
        source: Document source type
        user_email: User's email (unused in MIT version)
        external_group_ids: User's external group IDs (unused in MIT version)

    Returns:
        List of all HierarchyNode objects for the source
    """
    stmt = select(HierarchyNode).where(HierarchyNode.source == source)
    stmt = stmt.order_by(HierarchyNode.display_name)
    return list(db_session.execute(stmt).scalars().all())


def get_accessible_hierarchy_nodes_for_source(
    db_session: Session,
    source: DocumentSource,
    user_email: str,
    external_group_ids: list[str],
) -> list[HierarchyNode]:
    """
    Get hierarchy nodes for a source that are accessible to the user.

    Uses fetch_versioned_implementation to get the appropriate version:
    - MIT version: Returns all nodes (no permission filtering)
    - EE version: Filters based on user email and external group IDs
    """
    versioned_fn = fetch_versioned_implementation(
        "onyx.db.hierarchy", "_get_accessible_hierarchy_nodes_for_source"
    )
    return versioned_fn(db_session, source, user_email, external_group_ids)


def get_document_parent_hierarchy_node_ids(
    db_session: Session,
    document_ids: list[str],
) -> dict[str, int | None]:
    """
    Get the parent_hierarchy_node_id for multiple documents in a single query.

    Args:
        db_session: SQLAlchemy session
        document_ids: List of document IDs to look up

    Returns:
        Dict mapping document_id -> parent_hierarchy_node_id (or None if not set)
    """

    if not document_ids:
        return {}

    stmt = select(Document.id, Document.parent_hierarchy_node_id).where(
        Document.id.in_(document_ids)
    )
    results = db_session.execute(stmt).all()

    return {doc_id: parent_id for doc_id, parent_id in results}


def update_document_parent_hierarchy_nodes(
    db_session: Session,
    doc_parent_map: dict[str, int | None],
    commit: bool = True,
) -> int:
    """Bulk-update Document.parent_hierarchy_node_id for multiple documents.

    Only updates rows whose current value differs from the desired value to
    avoid unnecessary writes.

    Args:
        db_session: SQLAlchemy session
        doc_parent_map: Mapping of document_id → desired parent_hierarchy_node_id
        commit: Whether to commit the transaction

    Returns:
        Number of documents actually updated
    """
    if not doc_parent_map:
        return 0

    doc_ids = list(doc_parent_map.keys())
    existing = get_document_parent_hierarchy_node_ids(db_session, doc_ids)

    by_parent: dict[int | None, list[str]] = defaultdict(list)
    for doc_id, desired_parent_id in doc_parent_map.items():
        current = existing.get(doc_id)
        if current == desired_parent_id or doc_id not in existing:
            continue
        by_parent[desired_parent_id].append(doc_id)

    updated = 0
    for desired_parent_id, ids in by_parent.items():
        db_session.query(Document).filter(Document.id.in_(ids)).update(
            {Document.parent_hierarchy_node_id: desired_parent_id},
            synchronize_session=False,
        )
        updated += len(ids)

    if commit:
        db_session.commit()
    elif updated:
        db_session.flush()

    return updated


def update_hierarchy_node_permissions(
    db_session: Session,
    raw_node_id: str,
    source: DocumentSource,
    is_public: bool,
    external_user_emails: list[str] | None,
    external_user_group_ids: list[str] | None,
    commit: bool = True,
) -> bool:
    """
    Update permissions for an existing hierarchy node.

    This is used during permission sync to update folder permissions
    without needing the full Pydantic HierarchyNode model.

    Args:
        db_session: SQLAlchemy session
        raw_node_id: Raw node ID from the source system
        source: Document source type
        is_public: Whether the node is public
        external_user_emails: List of user emails with access
        external_user_group_ids: List of group IDs with access
        commit: Whether to commit the transaction

    Returns:
        True if the node was found and updated, False if not found
    """
    existing_node = get_hierarchy_node_by_raw_id(db_session, raw_node_id, source)

    if not existing_node:
        logger.warning(
            f"Hierarchy node not found for permission update: raw_node_id={raw_node_id}, source={source}"
        )
        return False

    existing_node.is_public = is_public
    existing_node.external_user_emails = external_user_emails
    existing_node.external_user_group_ids = external_user_group_ids

    if commit:
        db_session.commit()
    else:
        db_session.flush()

    return True


def upsert_hierarchy_node_cc_pair_entries(
    db_session: Session,
    hierarchy_node_ids: list[int],
    connector_id: int,
    credential_id: int,
    commit: bool = True,
) -> None:
    """Insert rows into HierarchyNodeByConnectorCredentialPair, ignoring conflicts.

    This records that the given cc_pair "owns" these hierarchy nodes. Used by
    indexing, pruning, and hierarchy-fetching paths.
    """
    if not hierarchy_node_ids:
        return

    _M = HierarchyNodeByConnectorCredentialPair
    stmt = pg_insert(_M).values(
        [
            {
                _M.hierarchy_node_id: node_id,
                _M.connector_id: connector_id,
                _M.credential_id: credential_id,
            }
            for node_id in hierarchy_node_ids
        ]
    )
    stmt = stmt.on_conflict_do_nothing()
    db_session.execute(stmt)

    if commit:
        db_session.commit()
    else:
        db_session.flush()


def remove_stale_hierarchy_node_cc_pair_entries(
    db_session: Session,
    connector_id: int,
    credential_id: int,
    live_hierarchy_node_ids: set[int],
    commit: bool = True,
) -> int:
    """Delete join-table rows for this cc_pair that are NOT in the live set.

    If ``live_hierarchy_node_ids`` is empty ALL rows for the cc_pair are deleted
    (i.e. the connector no longer has any hierarchy nodes). Callers that want a
    no-op when there are no live nodes must guard before calling.

    Returns the number of deleted rows.
    """
    stmt = delete(HierarchyNodeByConnectorCredentialPair).where(
        HierarchyNodeByConnectorCredentialPair.connector_id == connector_id,
        HierarchyNodeByConnectorCredentialPair.credential_id == credential_id,
    )
    if live_hierarchy_node_ids:
        stmt = stmt.where(
            HierarchyNodeByConnectorCredentialPair.hierarchy_node_id.notin_(
                live_hierarchy_node_ids
            )
        )

    result: CursorResult = db_session.execute(stmt)  # type: ignore[assignment]
    deleted = result.rowcount

    if commit:
        db_session.commit()
    elif deleted:
        db_session.flush()

    return deleted


def delete_orphaned_hierarchy_nodes(
    db_session: Session,
    source: DocumentSource,
    commit: bool = True,
) -> list[str]:
    """Delete hierarchy nodes for a source that have zero cc_pair associations.

    SOURCE-type nodes are excluded (they are synthetic roots).

    Returns the list of raw_node_ids that were deleted (for cache eviction).
    """
    # Find orphaned nodes: no rows in the join table
    orphan_stmt = (
        select(HierarchyNode.id, HierarchyNode.raw_node_id)
        .outerjoin(
            HierarchyNodeByConnectorCredentialPair,
            HierarchyNode.id
            == HierarchyNodeByConnectorCredentialPair.hierarchy_node_id,
        )
        .where(
            HierarchyNode.source == source,
            HierarchyNode.node_type != HierarchyNodeType.SOURCE,
            HierarchyNodeByConnectorCredentialPair.hierarchy_node_id.is_(None),
        )
    )
    orphans = db_session.execute(orphan_stmt).all()
    if not orphans:
        return []

    orphan_ids = [row[0] for row in orphans]
    deleted_raw_ids = [row[1] for row in orphans]

    db_session.execute(delete(HierarchyNode).where(HierarchyNode.id.in_(orphan_ids)))

    if commit:
        db_session.commit()
    else:
        db_session.flush()

    return deleted_raw_ids


def reparent_orphaned_hierarchy_nodes(
    db_session: Session,
    source: DocumentSource,
    commit: bool = True,
) -> list[HierarchyNode]:
    """Re-parent hierarchy nodes whose parent_id is NULL to the SOURCE node.

    After pruning deletes stale nodes, their former children get parent_id=NULL
    via the SET NULL cascade. This function points them back to the SOURCE root.

    Returns the reparented HierarchyNode objects (with updated parent_id)
    so callers can refresh downstream caches.
    """
    source_node = get_source_hierarchy_node(db_session, source)
    if not source_node:
        return []

    stmt = select(HierarchyNode).where(
        HierarchyNode.source == source,
        HierarchyNode.parent_id.is_(None),
        HierarchyNode.node_type != HierarchyNodeType.SOURCE,
    )
    orphans = list(db_session.execute(stmt).scalars().all())
    if not orphans:
        return []

    for node in orphans:
        node.parent_id = source_node.id

    if commit:
        db_session.commit()
    else:
        db_session.flush()

    return orphans


================================================
FILE: backend/onyx/db/hook.py
================================================
import datetime
from uuid import UUID

from sqlalchemy import delete
from sqlalchemy import select
from sqlalchemy.engine import CursorResult
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.db.constants import UNSET
from onyx.db.constants import UnsetType
from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.db.models import Hook
from onyx.db.models import HookExecutionLog
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError


# ── Hook CRUD ────────────────────────────────────────────────────────────


def get_hook_by_id(
    *,
    db_session: Session,
    hook_id: int,
    include_deleted: bool = False,
    include_creator: bool = False,
) -> Hook | None:
    stmt = select(Hook).where(Hook.id == hook_id)
    if not include_deleted:
        stmt = stmt.where(Hook.deleted.is_(False))
    if include_creator:
        stmt = stmt.options(selectinload(Hook.creator))
    return db_session.scalar(stmt)


def get_non_deleted_hook_by_hook_point(
    *,
    db_session: Session,
    hook_point: HookPoint,
    include_creator: bool = False,
) -> Hook | None:
    stmt = (
        select(Hook).where(Hook.hook_point == hook_point).where(Hook.deleted.is_(False))
    )
    if include_creator:
        stmt = stmt.options(selectinload(Hook.creator))
    return db_session.scalar(stmt)


def get_hooks(
    *,
    db_session: Session,
    include_deleted: bool = False,
    include_creator: bool = False,
) -> list[Hook]:
    stmt = select(Hook)
    if not include_deleted:
        stmt = stmt.where(Hook.deleted.is_(False))
    if include_creator:
        stmt = stmt.options(selectinload(Hook.creator))
    stmt = stmt.order_by(Hook.hook_point, Hook.created_at.desc())
    return list(db_session.scalars(stmt).all())


def create_hook__no_commit(
    *,
    db_session: Session,
    name: str,
    hook_point: HookPoint,
    endpoint_url: str | None = None,
    api_key: str | None = None,
    fail_strategy: HookFailStrategy,
    timeout_seconds: float,
    is_active: bool = False,
    is_reachable: bool | None = None,
    creator_id: UUID | None = None,
) -> Hook:
    """Create a new hook for the given hook point.

    At most one non-deleted hook per hook point is allowed. Raises
    OnyxError(CONFLICT) if a hook already exists, including under concurrent
    duplicate creates where the partial unique index fires an IntegrityError.
    """
    existing = get_non_deleted_hook_by_hook_point(
        db_session=db_session, hook_point=hook_point
    )
    if existing:
        raise OnyxError(
            OnyxErrorCode.CONFLICT,
            f"A hook for '{hook_point.value}' already exists (id={existing.id}).",
        )

    hook = Hook(
        name=name,
        hook_point=hook_point,
        endpoint_url=endpoint_url,
        api_key=api_key,
        fail_strategy=fail_strategy,
        timeout_seconds=timeout_seconds,
        is_active=is_active,
        is_reachable=is_reachable,
        creator_id=creator_id,
    )
    # Use a savepoint so that a failed insert only rolls back this operation,
    # not the entire outer transaction.
    savepoint = db_session.begin_nested()
    try:
        db_session.add(hook)
        savepoint.commit()
    except IntegrityError as exc:
        savepoint.rollback()
        if "ix_hook_one_non_deleted_per_point" in str(exc.orig):
            raise OnyxError(
                OnyxErrorCode.CONFLICT,
                f"A hook for '{hook_point.value}' already exists.",
            )
        raise  # re-raise unrelated integrity errors (FK violations, etc.)
    return hook


def update_hook__no_commit(
    *,
    db_session: Session,
    hook_id: int,
    name: str | None = None,
    endpoint_url: str | None | UnsetType = UNSET,
    api_key: str | None | UnsetType = UNSET,
    fail_strategy: HookFailStrategy | None = None,
    timeout_seconds: float | None = None,
    is_active: bool | None = None,
    is_reachable: bool | None = None,
    include_creator: bool = False,
) -> Hook:
    """Update hook fields.

    Sentinel conventions:
    - endpoint_url, api_key: pass UNSET to leave unchanged; pass None to clear.
    - name, fail_strategy, timeout_seconds, is_active, is_reachable: pass None to leave unchanged.
    """
    hook = get_hook_by_id(
        db_session=db_session, hook_id=hook_id, include_creator=include_creator
    )
    if hook is None:
        raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Hook with id {hook_id} not found.")

    if name is not None:
        hook.name = name
    if not isinstance(endpoint_url, UnsetType):
        hook.endpoint_url = endpoint_url
    if not isinstance(api_key, UnsetType):
        hook.api_key = api_key  # type: ignore[assignment]  # EncryptedString coerces str → SensitiveValue at the ORM level
    if fail_strategy is not None:
        hook.fail_strategy = fail_strategy
    if timeout_seconds is not None:
        hook.timeout_seconds = timeout_seconds
    if is_active is not None:
        hook.is_active = is_active
    if is_reachable is not None:
        hook.is_reachable = is_reachable

    db_session.flush()
    return hook


def delete_hook__no_commit(
    *,
    db_session: Session,
    hook_id: int,
) -> None:
    hook = get_hook_by_id(db_session=db_session, hook_id=hook_id)
    if hook is None:
        raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Hook with id {hook_id} not found.")

    hook.deleted = True
    hook.is_active = False
    db_session.flush()


# ── HookExecutionLog CRUD ────────────────────────────────────────────────


def create_hook_execution_log__no_commit(
    *,
    db_session: Session,
    hook_id: int,
    is_success: bool,
    error_message: str | None = None,
    status_code: int | None = None,
    duration_ms: int | None = None,
) -> HookExecutionLog:
    log = HookExecutionLog(
        hook_id=hook_id,
        is_success=is_success,
        error_message=error_message,
        status_code=status_code,
        duration_ms=duration_ms,
    )
    db_session.add(log)
    db_session.flush()
    return log


def get_hook_execution_logs(
    *,
    db_session: Session,
    hook_id: int,
    limit: int,
) -> list[HookExecutionLog]:
    stmt = (
        select(HookExecutionLog)
        .where(HookExecutionLog.hook_id == hook_id)
        .order_by(HookExecutionLog.created_at.desc())
        .limit(limit)
    )
    return list(db_session.scalars(stmt).all())


def cleanup_old_execution_logs__no_commit(
    *,
    db_session: Session,
    max_age_days: int,
) -> int:
    """Delete execution logs older than max_age_days. Returns the number of rows deleted."""
    cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
        days=max_age_days
    )
    result: CursorResult = db_session.execute(  # type: ignore[assignment]
        delete(HookExecutionLog)
        .where(HookExecutionLog.created_at < cutoff)
        .execution_options(synchronize_session=False)
    )
    return result.rowcount


================================================
FILE: backend/onyx/db/image_generation.py
================================================
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.db.models import ImageGenerationConfig
from onyx.db.models import LLMProvider
from onyx.db.models import ModelConfiguration
from onyx.llm.utils import get_max_input_tokens
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Default image generation config constants
DEFAULT_IMAGE_PROVIDER_ID = "openai_gpt_image_1"
DEFAULT_IMAGE_MODEL_NAME = "gpt-image-1"
DEFAULT_IMAGE_PROVIDER = "openai"


def create_image_generation_config__no_commit(
    db_session: Session,
    image_provider_id: str,
    model_configuration_id: int,
    is_default: bool = False,
) -> ImageGenerationConfig:
    """Create a new image generation config."""
    # If setting as default, clear ALL existing defaults in a single atomic update
    # This is more atomic than select-then-update pattern
    if is_default:
        db_session.execute(
            update(ImageGenerationConfig)
            .where(ImageGenerationConfig.is_default.is_(True))
            .values(is_default=False)
        )

    new_config = ImageGenerationConfig(
        image_provider_id=image_provider_id,
        model_configuration_id=model_configuration_id,
        is_default=is_default,
    )
    db_session.add(new_config)
    db_session.flush()
    return new_config


def get_all_image_generation_configs(
    db_session: Session,
) -> list[ImageGenerationConfig]:
    """Get all image generation configs.

    Returns:
        List of all ImageGenerationConfig objects
    """
    stmt = select(ImageGenerationConfig)
    return list(db_session.scalars(stmt).all())


def get_image_generation_config(
    db_session: Session,
    image_provider_id: str,
) -> ImageGenerationConfig | None:
    """Get a single image generation config by image_provider_id with relationships loaded.

    Args:
        db_session: Database session
        image_provider_id: The image provider ID (primary key)

    Returns:
        The ImageGenerationConfig or None if not found
    """
    stmt = (
        select(ImageGenerationConfig)
        .where(ImageGenerationConfig.image_provider_id == image_provider_id)
        .options(
            selectinload(ImageGenerationConfig.model_configuration).selectinload(
                ModelConfiguration.llm_provider
            )
        )
    )
    return db_session.scalar(stmt)


def get_default_image_generation_config(
    db_session: Session,
) -> ImageGenerationConfig | None:
    """Get the default image generation config.

    Returns:
        The default ImageGenerationConfig or None if not set
    """
    stmt = (
        select(ImageGenerationConfig)
        .where(ImageGenerationConfig.is_default.is_(True))
        .options(
            selectinload(ImageGenerationConfig.model_configuration).selectinload(
                ModelConfiguration.llm_provider
            )
        )
    )
    return db_session.scalar(stmt)


def set_default_image_generation_config(
    db_session: Session,
    image_provider_id: str,
) -> None:
    """Set a config as the default (clears previous default).

    Args:
        db_session: Database session
        image_provider_id: The image provider ID to set as default

    Raises:
        ValueError: If config not found
    """
    # Get the config to set as default
    new_default = db_session.get(ImageGenerationConfig, image_provider_id)
    if not new_default:
        raise ValueError(
            f"ImageGenerationConfig with image_provider_id {image_provider_id} not found"
        )

    # Clear ALL existing defaults in a single atomic update
    # This is more atomic than select-then-update pattern
    db_session.execute(
        update(ImageGenerationConfig)
        .where(
            ImageGenerationConfig.is_default.is_(True),
            ImageGenerationConfig.image_provider_id != image_provider_id,
        )
        .values(is_default=False)
    )

    # Set new default
    new_default.is_default = True
    db_session.commit()


def unset_default_image_generation_config(
    db_session: Session,
    image_provider_id: str,
) -> None:
    """Unset a config as the default."""
    config = db_session.get(ImageGenerationConfig, image_provider_id)
    if not config:
        raise ValueError(
            f"ImageGenerationConfig with image_provider_id {image_provider_id} not found"
        )
    config.is_default = False
    db_session.commit()


def delete_image_generation_config__no_commit(
    db_session: Session,
    image_provider_id: str,
) -> None:
    """Delete an image generation config by image_provider_id."""
    config = db_session.get(ImageGenerationConfig, image_provider_id)
    if not config:
        raise ValueError(
            f"ImageGenerationConfig with image_provider_id {image_provider_id} not found"
        )

    db_session.delete(config)
    db_session.flush()


def create_default_image_gen_config_from_api_key(
    db_session: Session,
    api_key: str,
    provider: str = DEFAULT_IMAGE_PROVIDER,
    image_provider_id: str = DEFAULT_IMAGE_PROVIDER_ID,
    model_name: str = DEFAULT_IMAGE_MODEL_NAME,
) -> ImageGenerationConfig | None:
    """Create default image gen config using an API key directly.

    This function is used during tenant provisioning to automatically create
    a default image generation config when an OpenAI provider is configured.

    Args:
        db_session: Database session
        api_key: API key for the LLM provider
        provider: Provider name (default: openai)
        image_provider_id: Static unique key for the config (default: openai_gpt_image_1)
        model_name: Model name for image generation (default: gpt-image-1)

    Returns:
        The created ImageGenerationConfig, or None if:
        - image_generation_config table already has records
    """
    # Check if any image generation configs already exist (optimization to avoid work)
    existing_configs = get_all_image_generation_configs(db_session)
    if existing_configs:
        logger.info("Image generation config already exists, skipping default creation")
        return None

    try:
        # Create new LLM provider for image generation
        new_provider = LLMProvider(
            name=f"Image Gen - {image_provider_id}",
            provider=provider,
            api_key=api_key,
            api_base=None,
            api_version=None,
            deployment_name=None,
            is_public=True,
        )
        db_session.add(new_provider)
        db_session.flush()

        # Create model configuration
        max_input_tokens = get_max_input_tokens(
            model_name=model_name,
            model_provider=provider,
        )

        model_config = ModelConfiguration(
            llm_provider_id=new_provider.id,
            name=model_name,
            is_visible=True,
            max_input_tokens=max_input_tokens,
        )
        db_session.add(model_config)
        db_session.flush()

        # Create image generation config
        config = create_image_generation_config__no_commit(
            db_session=db_session,
            image_provider_id=image_provider_id,
            model_configuration_id=model_config.id,
            is_default=True,
        )

        db_session.commit()

        logger.info(f"Created default image generation config: {image_provider_id}")

        return config

    except Exception:
        db_session.rollback()
        logger.exception(
            f"Failed to create default image generation config {image_provider_id}"
        )
        return None


================================================
FILE: backend/onyx/db/index_attempt.py
================================================
from collections.abc import Sequence
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import NamedTuple
from typing import TYPE_CHECKING
from typing import TypeVarTuple

from sqlalchemy import and_
from sqlalchemy import delete
from sqlalchemy import desc
from sqlalchemy import func
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session

from onyx.connectors.models import ConnectorFailure
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import IndexModelStatus
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexAttemptError
from onyx.db.models import SearchSettings
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType

if TYPE_CHECKING:
    from onyx.configs.constants import DocumentSource

# from sqlalchemy.sql.selectable import Select

# Comment out unused imports that cause mypy errors
# from onyx.auth.models import UserRole
# from onyx.configs.constants import MAX_LAST_VALID_CHECKPOINT_AGE_SECONDS
# from onyx.db.connector_credential_pair import ConnectorCredentialPairIdentifier
# from onyx.db.engine import async_query_for_dms

logger = setup_logger()


def get_last_attempt_for_cc_pair(
    cc_pair_id: int,
    search_settings_id: int,
    db_session: Session,
) -> IndexAttempt | None:
    return (
        db_session.query(IndexAttempt)
        .filter(
            IndexAttempt.connector_credential_pair_id == cc_pair_id,
            IndexAttempt.search_settings_id == search_settings_id,
        )
        .order_by(IndexAttempt.time_updated.desc())
        .first()
    )


def get_recent_completed_attempts_for_cc_pair(
    cc_pair_id: int,
    search_settings_id: int,
    limit: int,
    db_session: Session,
) -> list[IndexAttempt]:
    """Most recent to least recent."""
    return (
        db_session.query(IndexAttempt)
        .filter(
            IndexAttempt.connector_credential_pair_id == cc_pair_id,
            IndexAttempt.search_settings_id == search_settings_id,
            IndexAttempt.status.notin_(
                [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
            ),
        )
        .order_by(IndexAttempt.time_updated.desc())
        .limit(limit)
        .all()
    )


def get_recent_attempts_for_cc_pair(
    cc_pair_id: int,
    search_settings_id: int,
    limit: int,
    db_session: Session,
) -> list[IndexAttempt]:
    """Most recent to least recent."""
    return (
        db_session.query(IndexAttempt)
        .filter(
            IndexAttempt.connector_credential_pair_id == cc_pair_id,
            IndexAttempt.search_settings_id == search_settings_id,
        )
        .order_by(IndexAttempt.time_updated.desc())
        .limit(limit)
        .all()
    )


def get_index_attempt(
    db_session: Session,
    index_attempt_id: int,
    eager_load_cc_pair: bool = False,
    eager_load_search_settings: bool = False,
) -> IndexAttempt | None:
    stmt = select(IndexAttempt).where(IndexAttempt.id == index_attempt_id)
    if eager_load_cc_pair:
        stmt = stmt.options(
            joinedload(IndexAttempt.connector_credential_pair).joinedload(
                ConnectorCredentialPair.connector
            )
        )
        stmt = stmt.options(
            joinedload(IndexAttempt.connector_credential_pair).joinedload(
                ConnectorCredentialPair.credential
            )
        )
    if eager_load_search_settings:
        stmt = stmt.options(joinedload(IndexAttempt.search_settings))
    return db_session.scalars(stmt).first()


def count_error_rows_for_index_attempt(
    index_attempt_id: int,
    db_session: Session,
) -> int:
    return (
        db_session.query(IndexAttemptError)
        .filter(IndexAttemptError.index_attempt_id == index_attempt_id)
        .count()
    )


def create_index_attempt(
    connector_credential_pair_id: int,
    search_settings_id: int,
    db_session: Session,
    from_beginning: bool = False,
    celery_task_id: str | None = None,
) -> int:
    new_attempt = IndexAttempt(
        connector_credential_pair_id=connector_credential_pair_id,
        search_settings_id=search_settings_id,
        from_beginning=from_beginning,
        status=IndexingStatus.NOT_STARTED,
        celery_task_id=celery_task_id,
    )
    db_session.add(new_attempt)
    db_session.commit()

    return new_attempt.id


def delete_index_attempt(db_session: Session, index_attempt_id: int) -> None:
    index_attempt = get_index_attempt(db_session, index_attempt_id)
    if index_attempt:
        db_session.delete(index_attempt)
        db_session.commit()


def mock_successful_index_attempt(
    connector_credential_pair_id: int,
    search_settings_id: int,
    docs_indexed: int,
    db_session: Session,
) -> int:
    """Should not be used in any user triggered flows"""
    db_time = func.now()
    new_attempt = IndexAttempt(
        connector_credential_pair_id=connector_credential_pair_id,
        search_settings_id=search_settings_id,
        from_beginning=True,
        status=IndexingStatus.SUCCESS,
        total_docs_indexed=docs_indexed,
        new_docs_indexed=docs_indexed,
        # Need this to be some convincing random looking value and it can't be 0
        # or the indexing rate would calculate out to infinity
        time_started=db_time - timedelta(seconds=1.92),
        time_updated=db_time,
    )
    db_session.add(new_attempt)
    db_session.commit()

    return new_attempt.id


def get_in_progress_index_attempts(
    connector_id: int | None,
    db_session: Session,
) -> list[IndexAttempt]:
    stmt = select(IndexAttempt)
    if connector_id is not None:
        stmt = stmt.where(
            IndexAttempt.connector_credential_pair.has(connector_id=connector_id)
        )
    stmt = stmt.where(IndexAttempt.status == IndexingStatus.IN_PROGRESS)

    incomplete_attempts = db_session.scalars(stmt)
    return list(incomplete_attempts.all())


def get_all_index_attempts_by_status(
    status: IndexingStatus, db_session: Session
) -> list[IndexAttempt]:
    """Returns index attempts with the given status.
    Only recommend calling this with non-terminal states as the full list of
    terminal statuses may be quite large.

    Results are ordered by time_created (oldest to newest)."""
    stmt = select(IndexAttempt)
    stmt = stmt.where(IndexAttempt.status == status)
    stmt = stmt.order_by(IndexAttempt.time_created)
    new_attempts = db_session.scalars(stmt)
    return list(new_attempts.all())


def transition_attempt_to_in_progress(
    index_attempt_id: int,
    db_session: Session,
) -> IndexAttempt:
    """Locks the row when we try to update"""
    try:
        attempt = db_session.execute(
            select(IndexAttempt)
            .where(IndexAttempt.id == index_attempt_id)
            .with_for_update()
        ).scalar_one()

        if attempt is None:
            raise RuntimeError(
                f"Unable to find IndexAttempt for ID '{index_attempt_id}'"
            )

        if attempt.status != IndexingStatus.NOT_STARTED:
            raise RuntimeError(
                f"Indexing attempt with ID '{index_attempt_id}' is not in NOT_STARTED status. "
                f"Current status is '{attempt.status}'."
            )

        attempt.status = IndexingStatus.IN_PROGRESS
        attempt.time_started = attempt.time_started or func.now()  # type: ignore
        db_session.commit()
        return attempt
    except Exception:
        db_session.rollback()
        logger.exception("transition_attempt_to_in_progress exceptioned.")
        raise


def mark_attempt_in_progress(
    index_attempt: IndexAttempt,
    db_session: Session,
) -> None:
    try:
        attempt = db_session.execute(
            select(IndexAttempt)
            .where(IndexAttempt.id == index_attempt.id)
            .with_for_update()
        ).scalar_one()

        attempt.status = IndexingStatus.IN_PROGRESS
        attempt.time_started = index_attempt.time_started or func.now()  # type: ignore
        db_session.commit()

        # Add telemetry for index attempt status change
        optional_telemetry(
            record_type=RecordType.INDEX_ATTEMPT_STATUS,
            data={
                "index_attempt_id": index_attempt.id,
                "status": IndexingStatus.IN_PROGRESS.value,
                "cc_pair_id": index_attempt.connector_credential_pair_id,
            },
        )
    except Exception:
        db_session.rollback()
        raise


def mark_attempt_succeeded(
    index_attempt_id: int,
    db_session: Session,
) -> IndexAttempt:
    try:
        attempt = db_session.execute(
            select(IndexAttempt)
            .where(IndexAttempt.id == index_attempt_id)
            .with_for_update()
        ).scalar_one()

        attempt.status = IndexingStatus.SUCCESS
        attempt.celery_task_id = None
        db_session.commit()

        # Add telemetry for index attempt status change
        optional_telemetry(
            record_type=RecordType.INDEX_ATTEMPT_STATUS,
            data={
                "index_attempt_id": index_attempt_id,
                "status": IndexingStatus.SUCCESS.value,
                "cc_pair_id": attempt.connector_credential_pair_id,
            },
        )
        return attempt
    except Exception:
        db_session.rollback()
        raise


def mark_attempt_partially_succeeded(
    index_attempt_id: int,
    db_session: Session,
) -> IndexAttempt:
    try:
        attempt = db_session.execute(
            select(IndexAttempt)
            .where(IndexAttempt.id == index_attempt_id)
            .with_for_update()
        ).scalar_one()

        attempt.status = IndexingStatus.COMPLETED_WITH_ERRORS
        attempt.celery_task_id = None
        db_session.commit()

        # Add telemetry for index attempt status change
        optional_telemetry(
            record_type=RecordType.INDEX_ATTEMPT_STATUS,
            data={
                "index_attempt_id": index_attempt_id,
                "status": IndexingStatus.COMPLETED_WITH_ERRORS.value,
                "cc_pair_id": attempt.connector_credential_pair_id,
            },
        )
        return attempt
    except Exception:
        db_session.rollback()
        raise


def mark_attempt_canceled(
    index_attempt_id: int,
    db_session: Session,
    reason: str = "Unknown",
) -> None:
    try:
        attempt = db_session.execute(
            select(IndexAttempt)
            .where(IndexAttempt.id == index_attempt_id)
            .with_for_update()
        ).scalar_one()

        if not attempt.time_started:
            attempt.time_started = datetime.now(timezone.utc)
        attempt.status = IndexingStatus.CANCELED
        attempt.error_msg = reason
        db_session.commit()

        # Add telemetry for index attempt status change
        optional_telemetry(
            record_type=RecordType.INDEX_ATTEMPT_STATUS,
            data={
                "index_attempt_id": index_attempt_id,
                "status": IndexingStatus.CANCELED.value,
                "cc_pair_id": attempt.connector_credential_pair_id,
            },
        )
    except Exception:
        db_session.rollback()
        raise


def mark_attempt_failed(
    index_attempt_id: int,
    db_session: Session,
    failure_reason: str = "Unknown",
    full_exception_trace: str | None = None,
) -> None:
    try:
        attempt = db_session.execute(
            select(IndexAttempt)
            .where(IndexAttempt.id == index_attempt_id)
            .with_for_update()
        ).scalar_one()

        if not attempt.time_started:
            attempt.time_started = datetime.now(timezone.utc)
        attempt.status = IndexingStatus.FAILED
        attempt.error_msg = failure_reason
        attempt.full_exception_trace = full_exception_trace
        attempt.celery_task_id = None
        db_session.commit()

        # Add telemetry for index attempt status change
        optional_telemetry(
            record_type=RecordType.INDEX_ATTEMPT_STATUS,
            data={
                "index_attempt_id": index_attempt_id,
                "status": IndexingStatus.FAILED.value,
                "cc_pair_id": attempt.connector_credential_pair_id,
            },
        )
    except Exception:
        db_session.rollback()
        raise


def update_docs_indexed(
    db_session: Session,
    index_attempt_id: int,
    total_docs_indexed: int,
    new_docs_indexed: int,
    docs_removed_from_index: int,
) -> None:
    """Updates the docs_indexed and new_docs_indexed fields of an index attempt.
    Adds the given values to the current values in the db"""
    try:
        attempt = db_session.execute(
            select(IndexAttempt)
            .where(IndexAttempt.id == index_attempt_id)
            .with_for_update()  # Locks the row when we try to update
        ).scalar_one()

        attempt.total_docs_indexed = (
            attempt.total_docs_indexed or 0
        ) + total_docs_indexed
        attempt.new_docs_indexed = (attempt.new_docs_indexed or 0) + new_docs_indexed
        attempt.docs_removed_from_index = (
            attempt.docs_removed_from_index or 0
        ) + docs_removed_from_index
        db_session.commit()
    except Exception:
        db_session.rollback()
        logger.exception("update_docs_indexed exceptioned.")
        raise


def get_last_attempt(
    connector_id: int,
    credential_id: int,
    search_settings_id: int | None,
    db_session: Session,
) -> IndexAttempt | None:
    stmt = (
        select(IndexAttempt)
        .join(ConnectorCredentialPair)
        .where(
            ConnectorCredentialPair.connector_id == connector_id,
            ConnectorCredentialPair.credential_id == credential_id,
            IndexAttempt.search_settings_id == search_settings_id,
        )
    )

    # Note, the below is using time_created instead of time_updated
    stmt = stmt.order_by(desc(IndexAttempt.time_created))

    return db_session.execute(stmt).scalars().first()


def get_latest_index_attempts_by_status(
    secondary_index: bool,
    db_session: Session,
    status: IndexingStatus,
) -> Sequence[IndexAttempt]:
    """
    Retrieves the most recent index attempt with the specified status for each connector_credential_pair.
    Filters attempts based on the secondary_index flag to get either future or present index attempts.
    Returns a sequence of IndexAttempt objects, one for each unique connector_credential_pair.
    """
    latest_failed_attempts = (
        select(
            IndexAttempt.connector_credential_pair_id,
            func.max(IndexAttempt.id).label("max_failed_id"),
        )
        .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)
        .where(
            SearchSettings.status
            == (
                IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
            ),
            IndexAttempt.status == status,
        )
        .group_by(IndexAttempt.connector_credential_pair_id)
        .subquery()
    )

    stmt = select(IndexAttempt).join(
        latest_failed_attempts,
        (
            IndexAttempt.connector_credential_pair_id
            == latest_failed_attempts.c.connector_credential_pair_id
        )
        & (IndexAttempt.id == latest_failed_attempts.c.max_failed_id),
    )

    return db_session.execute(stmt).scalars().all()


T = TypeVarTuple("T")


def _add_only_finished_clause(stmt: Select[tuple[*T]]) -> Select[tuple[*T]]:
    return stmt.where(
        IndexAttempt.status.not_in(
            [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
        ),
    )


def get_latest_index_attempts(
    secondary_index: bool,
    db_session: Session,
    eager_load_cc_pair: bool = False,
    only_finished: bool = False,
) -> Sequence[IndexAttempt]:
    ids_stmt = select(
        IndexAttempt.connector_credential_pair_id,
        func.max(IndexAttempt.id).label("max_id"),
    ).join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)

    status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
    ids_stmt = ids_stmt.where(SearchSettings.status == status)

    if only_finished:
        ids_stmt = _add_only_finished_clause(ids_stmt)

    ids_stmt = ids_stmt.group_by(IndexAttempt.connector_credential_pair_id)
    ids_subquery = ids_stmt.subquery()

    stmt = (
        select(IndexAttempt)
        .join(
            ids_subquery,
            IndexAttempt.connector_credential_pair_id
            == ids_subquery.c.connector_credential_pair_id,
        )
        .where(IndexAttempt.id == ids_subquery.c.max_id)
    )

    if only_finished:
        stmt = _add_only_finished_clause(stmt)

    if eager_load_cc_pair:
        stmt = stmt.options(
            joinedload(IndexAttempt.connector_credential_pair),
            joinedload(IndexAttempt.error_rows),
        )

    return db_session.execute(stmt).scalars().unique().all()


# For use with our thread-level parallelism utils. Note that any relationships
# you wish to use MUST be eagerly loaded, as the session will not be available
# after this function to allow lazy loading.
def get_latest_index_attempts_parallel(
    secondary_index: bool,
    eager_load_cc_pair: bool = False,
    only_finished: bool = False,
) -> Sequence[IndexAttempt]:
    with get_session_with_current_tenant() as db_session:
        return get_latest_index_attempts(
            secondary_index,
            db_session,
            eager_load_cc_pair,
            only_finished,
        )


def get_latest_index_attempt_for_cc_pair_id(
    db_session: Session,
    connector_credential_pair_id: int,
    secondary_index: bool,
    only_finished: bool = True,
) -> IndexAttempt | None:
    stmt = select(IndexAttempt)
    stmt = stmt.where(
        IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,
    )
    if only_finished:
        stmt = _add_only_finished_clause(stmt)

    status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
    stmt = stmt.join(SearchSettings).where(SearchSettings.status == status)
    stmt = stmt.order_by(desc(IndexAttempt.time_created))
    stmt = stmt.limit(1)
    return db_session.execute(stmt).scalar_one_or_none()


def get_latest_successful_index_attempt_for_cc_pair_id(
    db_session: Session,
    connector_credential_pair_id: int,
    secondary_index: bool = False,
) -> IndexAttempt | None:
    """Returns the most recent successful index attempt for the given cc pair,
    filtered to the current (or future) search settings.
    Uses MAX(id) semantics to match get_latest_index_attempts_by_status."""
    status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
    stmt = (
        select(IndexAttempt)
        .where(
            IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,
            IndexAttempt.status.in_(
                [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
            ),
        )
        .join(SearchSettings)
        .where(SearchSettings.status == status)
        .order_by(desc(IndexAttempt.id))
        .limit(1)
    )
    return db_session.execute(stmt).scalar_one_or_none()


def get_latest_successful_index_attempts_parallel(
    secondary_index: bool = False,
) -> Sequence[IndexAttempt]:
    """Batch version: returns the latest successful index attempt per cc pair.
    Covers both SUCCESS and COMPLETED_WITH_ERRORS (matching is_successful())."""
    model_status = (
        IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
    )
    with get_session_with_current_tenant() as db_session:
        latest_ids = (
            select(
                IndexAttempt.connector_credential_pair_id,
                func.max(IndexAttempt.id).label("max_id"),
            )
            .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)
            .where(
                SearchSettings.status == model_status,
                IndexAttempt.status.in_(
                    [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
                ),
            )
            .group_by(IndexAttempt.connector_credential_pair_id)
            .subquery()
        )

        stmt = select(IndexAttempt).join(
            latest_ids,
            (
                IndexAttempt.connector_credential_pair_id
                == latest_ids.c.connector_credential_pair_id
            )
            & (IndexAttempt.id == latest_ids.c.max_id),
        )
        return db_session.execute(stmt).scalars().all()


def count_index_attempts_for_cc_pair(
    db_session: Session,
    cc_pair_id: int,
    only_current: bool = True,
    disinclude_finished: bool = False,
) -> int:
    stmt = select(IndexAttempt).where(
        IndexAttempt.connector_credential_pair_id == cc_pair_id
    )
    if disinclude_finished:
        stmt = stmt.where(
            IndexAttempt.status.in_(
                [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
            )
        )
    if only_current:
        stmt = stmt.join(SearchSettings).where(
            SearchSettings.status == IndexModelStatus.PRESENT
        )
    # Count total items for pagination
    count_stmt = stmt.with_only_columns(func.count()).order_by(None)
    total_count = db_session.execute(count_stmt).scalar_one()
    return total_count


def get_paginated_index_attempts_for_cc_pair_id(
    db_session: Session,
    cc_pair_id: int,
    page: int,
    page_size: int,
    only_current: bool = True,
    disinclude_finished: bool = False,
) -> list[IndexAttempt]:
    stmt = select(IndexAttempt).where(
        IndexAttempt.connector_credential_pair_id == cc_pair_id
    )
    if disinclude_finished:
        stmt = stmt.where(
            IndexAttempt.status.in_(
                [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
            )
        )
    if only_current:
        stmt = stmt.join(SearchSettings).where(
            SearchSettings.status == IndexModelStatus.PRESENT
        )

    stmt = stmt.order_by(IndexAttempt.time_started.desc())

    # Apply pagination
    stmt = stmt.offset(page * page_size).limit(page_size)

    return list(db_session.execute(stmt).scalars().unique().all())


def get_index_attempts_for_cc_pair(
    db_session: Session,
    cc_pair_identifier: ConnectorCredentialPairIdentifier,
    only_current: bool = True,
    disinclude_finished: bool = False,
) -> Sequence[IndexAttempt]:
    stmt = (
        select(IndexAttempt)
        .join(ConnectorCredentialPair)
        .where(
            and_(
                ConnectorCredentialPair.connector_id == cc_pair_identifier.connector_id,
                ConnectorCredentialPair.credential_id
                == cc_pair_identifier.credential_id,
            )
        )
    )
    if disinclude_finished:
        stmt = stmt.where(
            IndexAttempt.status.in_(
                [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
            )
        )
    if only_current:
        stmt = stmt.join(SearchSettings).where(
            SearchSettings.status == IndexModelStatus.PRESENT
        )

    stmt = stmt.order_by(IndexAttempt.time_created.desc())
    return db_session.execute(stmt).scalars().all()


def delete_index_attempts(
    cc_pair_id: int,
    db_session: Session,
) -> None:
    # First, delete related entries in IndexAttemptErrors
    stmt_errors = delete(IndexAttemptError).where(
        IndexAttemptError.index_attempt_id.in_(
            select(IndexAttempt.id).where(
                IndexAttempt.connector_credential_pair_id == cc_pair_id
            )
        )
    )
    db_session.execute(stmt_errors)

    stmt = delete(IndexAttempt).where(
        IndexAttempt.connector_credential_pair_id == cc_pair_id,
    )

    db_session.execute(stmt)


def expire_index_attempts(
    search_settings_id: int,
    db_session: Session,
) -> None:
    not_started_query = (
        update(IndexAttempt)
        .where(IndexAttempt.search_settings_id == search_settings_id)
        .where(IndexAttempt.status == IndexingStatus.NOT_STARTED)
        .values(
            status=IndexingStatus.CANCELED,
            error_msg="Canceled, likely due to model swap",
        )
    )
    db_session.execute(not_started_query)

    update_query = (
        update(IndexAttempt)
        .where(IndexAttempt.search_settings_id == search_settings_id)
        .where(IndexAttempt.status != IndexingStatus.SUCCESS)
        .values(
            status=IndexingStatus.FAILED,
            error_msg="Canceled due to embedding model swap",
        )
    )
    db_session.execute(update_query)

    db_session.commit()


def cancel_indexing_attempts_for_ccpair(
    cc_pair_id: int,
    db_session: Session,
    include_secondary_index: bool = False,
) -> None:
    stmt = (
        update(IndexAttempt)
        .where(IndexAttempt.connector_credential_pair_id == cc_pair_id)
        .where(IndexAttempt.status == IndexingStatus.NOT_STARTED)
        .values(
            status=IndexingStatus.CANCELED,
            error_msg="Canceled by user",
            time_started=datetime.now(timezone.utc),
        )
    )

    if not include_secondary_index:
        subquery = select(SearchSettings.id).where(
            SearchSettings.status != IndexModelStatus.FUTURE
        )
        stmt = stmt.where(IndexAttempt.search_settings_id.in_(subquery))

    db_session.execute(stmt)


def cancel_indexing_attempts_past_model(
    db_session: Session,
) -> None:
    """Stops all indexing attempts that are in progress or not started for
    any embedding model that not present/future"""

    db_session.execute(
        update(IndexAttempt)
        .where(
            IndexAttempt.status.in_(
                [IndexingStatus.IN_PROGRESS, IndexingStatus.NOT_STARTED]
            ),
            IndexAttempt.search_settings_id == SearchSettings.id,
            SearchSettings.status == IndexModelStatus.PAST,
        )
        .values(status=IndexingStatus.FAILED)
    )


def cancel_indexing_attempts_for_search_settings(
    search_settings_id: int,
    db_session: Session,
) -> None:
    """Stops all indexing attempts that are in progress or not started for
    the specified search settings."""

    db_session.execute(
        update(IndexAttempt)
        .where(
            IndexAttempt.status.in_(
                [IndexingStatus.IN_PROGRESS, IndexingStatus.NOT_STARTED]
            ),
            IndexAttempt.search_settings_id == search_settings_id,
        )
        .values(status=IndexingStatus.FAILED)
    )


def count_unique_cc_pairs_with_successful_index_attempts(
    search_settings_id: int | None,
    db_session: Session,
) -> int:
    """Collect all of the Index Attempts that are successful and for the specified embedding model
    Then do distinct by connector_id and credential_id which is equivalent to the cc-pair. Finally,
    do a count to get the total number of unique cc-pairs with successful attempts"""
    unique_pairs_count = (
        db_session.query(IndexAttempt.connector_credential_pair_id)
        .join(ConnectorCredentialPair)
        .filter(
            IndexAttempt.search_settings_id == search_settings_id,
            IndexAttempt.status == IndexingStatus.SUCCESS,
        )
        .distinct()
        .count()
    )

    return unique_pairs_count


def count_unique_active_cc_pairs_with_successful_index_attempts(
    search_settings_id: int | None,
    db_session: Session,
) -> int:
    """Collect all of the Index Attempts that are successful and for the specified embedding model,
    but only for non-paused connector-credential pairs. Then do distinct by connector_id and credential_id
    which is equivalent to the cc-pair. Finally, do a count to get the total number of unique non-paused
    cc-pairs with successful attempts."""
    unique_pairs_count = (
        db_session.query(IndexAttempt.connector_credential_pair_id)
        .join(ConnectorCredentialPair)
        .filter(
            IndexAttempt.search_settings_id == search_settings_id,
            IndexAttempt.status == IndexingStatus.SUCCESS,
            ConnectorCredentialPair.status != ConnectorCredentialPairStatus.PAUSED,
        )
        .distinct()
        .count()
    )

    return unique_pairs_count


def create_index_attempt_error(
    index_attempt_id: int | None,
    connector_credential_pair_id: int,
    failure: ConnectorFailure,
    db_session: Session,
) -> int:
    new_error = IndexAttemptError(
        index_attempt_id=index_attempt_id,
        connector_credential_pair_id=connector_credential_pair_id,
        document_id=(
            failure.failed_document.document_id if failure.failed_document else None
        ),
        document_link=(
            failure.failed_document.document_link if failure.failed_document else None
        ),
        entity_id=(failure.failed_entity.entity_id if failure.failed_entity else None),
        failed_time_range_start=(
            failure.failed_entity.missed_time_range[0]
            if failure.failed_entity and failure.failed_entity.missed_time_range
            else None
        ),
        failed_time_range_end=(
            failure.failed_entity.missed_time_range[1]
            if failure.failed_entity and failure.failed_entity.missed_time_range
            else None
        ),
        failure_message=failure.failure_message,
        is_resolved=False,
    )
    db_session.add(new_error)
    db_session.commit()

    return new_error.id


def get_index_attempt_errors(
    index_attempt_id: int,
    db_session: Session,
) -> list[IndexAttemptError]:
    stmt = select(IndexAttemptError).where(
        IndexAttemptError.index_attempt_id == index_attempt_id
    )

    errors = db_session.scalars(stmt)
    return list(errors.all())


def count_index_attempt_errors_for_cc_pair(
    cc_pair_id: int,
    unresolved_only: bool,
    db_session: Session,
) -> int:
    stmt = (
        select(func.count())
        .select_from(IndexAttemptError)
        .where(IndexAttemptError.connector_credential_pair_id == cc_pair_id)
    )
    if unresolved_only:
        stmt = stmt.where(IndexAttemptError.is_resolved.is_(False))

    result = db_session.scalar(stmt)
    return 0 if result is None else result


def get_index_attempt_errors_for_cc_pair(
    cc_pair_id: int,
    unresolved_only: bool,
    db_session: Session,
    page: int | None = None,
    page_size: int | None = None,
) -> list[IndexAttemptError]:
    stmt = select(IndexAttemptError).where(
        IndexAttemptError.connector_credential_pair_id == cc_pair_id
    )
    if unresolved_only:
        stmt = stmt.where(IndexAttemptError.is_resolved.is_(False))

    # Order by most recent first
    stmt = stmt.order_by(desc(IndexAttemptError.time_created))

    if page is not None and page_size is not None:
        stmt = stmt.offset(page * page_size).limit(page_size)

    return list(db_session.scalars(stmt).all())


# ── Metrics query helpers ──────────────────────────────────────────────


class ActiveIndexAttemptMetric(NamedTuple):
    """Row returned by get_active_index_attempts_for_metrics."""

    status: IndexingStatus
    source: "DocumentSource"
    cc_pair_id: int
    cc_pair_name: str | None
    attempt_count: int


def get_active_index_attempts_for_metrics(
    db_session: Session,
) -> list[ActiveIndexAttemptMetric]:
    """Return non-terminal index attempts grouped by status, source, and connector.

    Each row is (status, source, cc_pair_id, cc_pair_name, attempt_count).
    """
    from onyx.db.models import Connector

    terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
    rows = (
        db_session.query(
            IndexAttempt.status,
            Connector.source,
            ConnectorCredentialPair.id,
            ConnectorCredentialPair.name,
            func.count(),
        )
        .join(
            ConnectorCredentialPair,
            IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,
        )
        .join(
            Connector,
            ConnectorCredentialPair.connector_id == Connector.id,
        )
        .filter(IndexAttempt.status.notin_(terminal_statuses))
        .group_by(
            IndexAttempt.status,
            Connector.source,
            ConnectorCredentialPair.id,
            ConnectorCredentialPair.name,
        )
        .all()
    )
    return [ActiveIndexAttemptMetric(*row) for row in rows]


def get_failed_attempt_counts_by_cc_pair(
    db_session: Session,
    since: datetime | None = None,
) -> dict[int, int]:
    """Return {cc_pair_id: failed_attempt_count} for all connectors.

    When ``since`` is provided, only attempts created after that timestamp
    are counted. Defaults to the last 90 days to avoid unbounded historical
    aggregation.
    """
    if since is None:
        since = datetime.now(timezone.utc) - timedelta(days=90)

    rows = (
        db_session.query(
            IndexAttempt.connector_credential_pair_id,
            func.count(),
        )
        .filter(IndexAttempt.status == IndexingStatus.FAILED)
        .filter(IndexAttempt.time_created >= since)
        .group_by(IndexAttempt.connector_credential_pair_id)
        .all()
    )
    return {cc_id: count for cc_id, count in rows}


def get_docs_indexed_by_cc_pair(
    db_session: Session,
    since: datetime | None = None,
) -> dict[int, int]:
    """Return {cc_pair_id: total_new_docs_indexed} across successful attempts.

    Only counts attempts with status SUCCESS to avoid inflating counts with
    partial results from failed attempts. When ``since`` is provided, only
    attempts created after that timestamp are included.
    """
    if since is None:
        since = datetime.now(timezone.utc) - timedelta(days=90)

    query = (
        db_session.query(
            IndexAttempt.connector_credential_pair_id,
            func.sum(func.coalesce(IndexAttempt.new_docs_indexed, 0)),
        )
        .filter(IndexAttempt.status == IndexingStatus.SUCCESS)
        .filter(IndexAttempt.time_created >= since)
        .group_by(IndexAttempt.connector_credential_pair_id)
    )
    rows = query.all()
    return {cc_id: int(total or 0) for cc_id, total in rows}


================================================
FILE: backend/onyx/db/indexing_coordination.py
================================================
"""Database-based indexing coordination to replace Redis fencing."""

from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session

from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.enums import IndexingStatus
from onyx.db.index_attempt import count_error_rows_for_index_attempt
from onyx.db.index_attempt import create_index_attempt
from onyx.db.index_attempt import get_index_attempt
from onyx.db.models import IndexAttempt
from onyx.utils.logger import setup_logger

logger = setup_logger()

INDEXING_PROGRESS_TIMEOUT_HOURS = 6


class CoordinationStatus(BaseModel):
    """Status of an indexing attempt's coordination."""

    found: bool
    total_batches: int | None
    completed_batches: int
    total_failures: int
    total_docs: int
    total_chunks: int
    status: IndexingStatus | None = None
    cancellation_requested: bool = False


class IndexingCoordination:
    """Database-based coordination for indexing tasks, replacing Redis fencing."""

    @staticmethod
    def try_create_index_attempt(
        db_session: Session,
        cc_pair_id: int,
        search_settings_id: int,
        celery_task_id: str,
        from_beginning: bool = False,
    ) -> int | None:
        """
        Try to create a new index attempt for the given CC pair and search settings.
        Returns the index_attempt_id if successful, None if another attempt is already running.

        This replaces the Redis fencing mechanism by using database constraints
        and transactions to prevent duplicate attempts.
        """
        try:
            # Check for existing active attempts (this is the "fence" check)
            existing_attempt = db_session.execute(
                select(IndexAttempt)
                .where(
                    IndexAttempt.connector_credential_pair_id == cc_pair_id,
                    IndexAttempt.search_settings_id == search_settings_id,
                    IndexAttempt.status.in_(
                        [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
                    ),
                )
                .with_for_update(nowait=True)
            ).first()

            if existing_attempt:
                logger.info(
                    f"Indexing already in progress: "
                    f"cc_pair={cc_pair_id} "
                    f"search_settings={search_settings_id} "
                    f"existing_attempt={existing_attempt[0].id}"
                )
                return None

            # Create new index attempt (this is setting the "fence")
            attempt_id = create_index_attempt(
                connector_credential_pair_id=cc_pair_id,
                search_settings_id=search_settings_id,
                from_beginning=from_beginning,
                db_session=db_session,
                celery_task_id=celery_task_id,
            )

            logger.info(
                f"Created Index Attempt: "
                f"cc_pair={cc_pair_id} "
                f"search_settings={search_settings_id} "
                f"attempt_id={attempt_id} "
                f"celery_task_id={celery_task_id}"
            )

            return attempt_id

        except SQLAlchemyError as e:
            logger.info(
                f"Failed to create index attempt (likely race condition): "
                f"cc_pair={cc_pair_id} "
                f"search_settings={search_settings_id} "
                f"error={str(e)}"
            )
            db_session.rollback()
            return None

    @staticmethod
    def check_cancellation_requested(
        db_session: Session,
        index_attempt_id: int,
    ) -> bool:
        """
        Check if cancellation has been requested for this indexing attempt.
        This replaces Redis termination signals.
        """
        attempt = get_index_attempt(db_session, index_attempt_id)
        return attempt.cancellation_requested if attempt else False

    @staticmethod
    def request_cancellation(
        db_session: Session,
        index_attempt_id: int,
    ) -> None:
        """
        Request cancellation of an indexing attempt.
        This replaces Redis termination signals.
        """
        attempt = get_index_attempt(db_session, index_attempt_id)
        if attempt:
            attempt.cancellation_requested = True
            db_session.commit()

            logger.info(f"Requested cancellation for attempt {index_attempt_id}")

    @staticmethod
    def set_total_batches(
        db_session: Session,
        index_attempt_id: int,
        total_batches: int,
    ) -> None:
        """
        Set the total number of batches for this indexing attempt.
        Called by docfetching when extraction is complete.
        """
        attempt = get_index_attempt(db_session, index_attempt_id)
        if attempt:
            attempt.total_batches = total_batches
            db_session.commit()

            logger.info(
                f"Set total batches: attempt={index_attempt_id} total={total_batches}"
            )

    @staticmethod
    def update_batch_completion_and_docs(
        db_session: Session,
        index_attempt_id: int,
        total_docs_indexed: int,
        new_docs_indexed: int,
        total_chunks: int,
    ) -> tuple[int, int | None]:
        """
        Update batch completion and document counts atomically.
        Returns (completed_batches, total_batches).
        This extends the existing update_docs_indexed pattern.
        """
        try:
            attempt = db_session.execute(
                select(IndexAttempt)
                .where(IndexAttempt.id == index_attempt_id)
                .with_for_update()  # Same pattern as existing update_docs_indexed
            ).scalar_one()

            # Existing document count updates
            attempt.total_docs_indexed = (
                attempt.total_docs_indexed or 0
            ) + total_docs_indexed
            attempt.new_docs_indexed = (
                attempt.new_docs_indexed or 0
            ) + new_docs_indexed

            # New coordination updates
            attempt.completed_batches = (attempt.completed_batches or 0) + 1
            attempt.total_chunks = (attempt.total_chunks or 0) + total_chunks

            db_session.commit()

            logger.info(
                f"Updated batch completion: "
                f"attempt={index_attempt_id} "
                f"completed={attempt.completed_batches} "
                f"total={attempt.total_batches} "
                f"docs={total_docs_indexed} "
            )

            return attempt.completed_batches, attempt.total_batches

        except Exception:
            db_session.rollback()
            logger.exception(
                f"Failed to update batch completion for attempt {index_attempt_id}"
            )
            raise

    @staticmethod
    def get_coordination_status(
        db_session: Session,
        index_attempt_id: int,
    ) -> CoordinationStatus:
        """
        Get the current coordination status for an indexing attempt.
        This replaces reading FileStore state files.
        """
        attempt = get_index_attempt(db_session, index_attempt_id)
        if not attempt:
            return CoordinationStatus(
                found=False,
                total_batches=None,
                completed_batches=0,
                total_failures=0,
                total_docs=0,
                total_chunks=0,
                status=None,
                cancellation_requested=False,
            )

        return CoordinationStatus(
            found=True,
            total_batches=attempt.total_batches,
            completed_batches=attempt.completed_batches,
            total_failures=count_error_rows_for_index_attempt(
                index_attempt_id, db_session
            ),
            total_docs=attempt.total_docs_indexed or 0,
            total_chunks=attempt.total_chunks,
            status=attempt.status,
            cancellation_requested=attempt.cancellation_requested,
        )

    @staticmethod
    def get_orphaned_index_attempt_ids(db_session: Session) -> list[int]:
        """
        Gets a list of potentially orphaned index attempts.
        These are attempts in non-terminal state that have task IDs but may have died.

        This replaces the old get_unfenced_index_attempt_ids function.
        The actual orphan detection requires checking with Celery, which should be
        done by the caller.
        """
        # Find attempts that are active and have task IDs
        # The caller needs to check each one with Celery to confirm orphaned status
        active_attempts = (
            db_session.execute(
                select(IndexAttempt).where(
                    IndexAttempt.status.in_(
                        [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
                    ),
                    IndexAttempt.celery_task_id.isnot(None),
                )
            )
            .scalars()
            .all()
        )

        return [attempt.id for attempt in active_attempts]

    @staticmethod
    def update_progress_tracking(
        db_session: Session,
        index_attempt_id: int,
        current_batches_completed: int,
        timeout_hours: int = INDEXING_PROGRESS_TIMEOUT_HOURS,
        force_update_progress: bool = False,
    ) -> bool:
        """
        Update progress tracking for stall detection.
        Returns True if sufficient progress was made, False if stalled.
        """

        attempt = get_index_attempt(db_session, index_attempt_id)
        if not attempt:
            logger.error(f"Index attempt {index_attempt_id} not found in database")
            return False

        current_time = get_db_current_time(db_session)

        # No progress - check if this is the first time tracking
        # or if the caller wants to simulate guaranteed progress
        if attempt.last_progress_time is None or force_update_progress:
            # First time tracking - initialize
            attempt.last_progress_time = current_time
            attempt.last_batches_completed_count = current_batches_completed
            db_session.commit()
            return True

        time_elapsed = (current_time - attempt.last_progress_time).total_seconds()
        # only actually write to db every timeout_hours/2
        # this ensure thats at most timeout_hours will pass with no activity
        if time_elapsed < timeout_hours * 1800:
            return True

        # Check if progress has been made
        if current_batches_completed <= attempt.last_batches_completed_count:
            # if between timeout_hours/2 and timeout_hours has passed
            # without an update, we consider the attempt stalled
            return False

        # Progress made - update tracking
        attempt.last_progress_time = current_time
        attempt.last_batches_completed_count = current_batches_completed
        db_session.commit()
        return True


================================================
FILE: backend/onyx/db/input_prompt.py
================================================
from uuid import UUID

from fastapi import HTTPException
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session

from onyx.db.models import InputPrompt
from onyx.db.models import InputPrompt__User
from onyx.db.models import User
from onyx.server.features.input_prompt.models import InputPromptSnapshot
from onyx.server.manage.models import UserInfo
from onyx.utils.logger import setup_logger

logger = setup_logger()


def insert_input_prompt(
    prompt: str,
    content: str,
    is_public: bool,
    user: User | None,
    db_session: Session,
) -> InputPrompt:
    user_id = user.id if user else None

    # Use atomic INSERT ... ON CONFLICT DO NOTHING with RETURNING
    # to avoid race conditions with the uniqueness check
    stmt = pg_insert(InputPrompt).values(
        prompt=prompt,
        content=content,
        active=True,
        is_public=is_public,
        user_id=user_id,
    )

    # Use the appropriate constraint based on whether this is a user-owned or public prompt
    if user_id is not None:
        stmt = stmt.on_conflict_do_nothing(constraint="uq_inputprompt_prompt_user_id")
    else:
        # Partial unique indexes cannot be targeted by constraint name;
        # must use index_elements + index_where
        stmt = stmt.on_conflict_do_nothing(
            index_elements=[InputPrompt.prompt],
            index_where=InputPrompt.user_id.is_(None),
        )

    stmt = stmt.returning(InputPrompt)

    result = db_session.execute(stmt)
    input_prompt = result.scalar_one_or_none()

    if input_prompt is None:
        raise HTTPException(
            status_code=409,
            detail=f"A prompt shortcut with the name '{prompt}' already exists",
        )

    db_session.commit()
    return input_prompt


def update_input_prompt(
    user: User,
    input_prompt_id: int,
    prompt: str,
    content: str,
    active: bool,
    db_session: Session,
) -> InputPrompt:
    input_prompt = db_session.scalar(
        select(InputPrompt).where(InputPrompt.id == input_prompt_id)
    )
    if input_prompt is None:
        raise ValueError(f"No input prompt with id {input_prompt_id}")

    if not validate_user_prompt_authorization(user, input_prompt):
        raise HTTPException(status_code=401, detail="You don't own this prompt")

    input_prompt.prompt = prompt
    input_prompt.content = content
    input_prompt.active = active

    try:
        db_session.commit()
    except IntegrityError:
        db_session.rollback()
        raise HTTPException(
            status_code=409,
            detail=f"A prompt shortcut with the name '{prompt}' already exists",
        )

    return input_prompt


def validate_user_prompt_authorization(user: User, input_prompt: InputPrompt) -> bool:
    prompt = InputPromptSnapshot.from_model(input_prompt=input_prompt)

    # Public prompts cannot be modified via the user API (only admins via admin endpoints)
    if prompt.is_public or prompt.user_id is None:
        return False

    # Anonymous users cannot modify user-owned prompts
    if user.is_anonymous:
        return False

    # User must own the prompt
    user_details = UserInfo.from_model(user)
    return str(user_details.id) == str(prompt.user_id)


def remove_public_input_prompt(input_prompt_id: int, db_session: Session) -> None:
    input_prompt = db_session.scalar(
        select(InputPrompt).where(InputPrompt.id == input_prompt_id)
    )

    if input_prompt is None:
        raise ValueError(f"No input prompt with id {input_prompt_id}")

    if not input_prompt.is_public:
        raise HTTPException(status_code=400, detail="This prompt is not public")

    db_session.delete(input_prompt)
    db_session.commit()


def remove_input_prompt(
    user: User,
    input_prompt_id: int,
    db_session: Session,
    delete_public: bool = False,
) -> None:
    input_prompt = db_session.scalar(
        select(InputPrompt).where(InputPrompt.id == input_prompt_id)
    )
    if input_prompt is None:
        raise ValueError(f"No input prompt with id {input_prompt_id}")

    if input_prompt.is_public and not delete_public:
        raise HTTPException(
            status_code=400, detail="Cannot delete public prompts with this method"
        )

    if not validate_user_prompt_authorization(user, input_prompt):
        raise HTTPException(status_code=401, detail="You do not own this prompt")

    db_session.delete(input_prompt)
    db_session.commit()


def fetch_input_prompt_by_id(
    id: int, user_id: UUID | None, db_session: Session
) -> InputPrompt:
    query = select(InputPrompt).where(InputPrompt.id == id)

    if user_id:
        query = query.where(
            (InputPrompt.user_id == user_id) | (InputPrompt.user_id is None)
        )
    else:
        # If no user_id is provided, only fetch prompts without a user_id (aka public)
        query = query.where(InputPrompt.user_id == None)  # noqa

    result = db_session.scalar(query)

    if result is None:
        raise HTTPException(422, "No input prompt found")

    return result


def fetch_public_input_prompts(
    db_session: Session,
) -> list[InputPrompt]:
    query = select(InputPrompt).where(InputPrompt.is_public)
    return list(db_session.scalars(query).all())


def fetch_input_prompts_by_user(
    db_session: Session,
    user_id: UUID | None,
    active: bool | None = None,
    include_public: bool = False,
) -> list[InputPrompt]:
    """
    Returns all prompts belonging to the user or public prompts,
    excluding those the user has specifically disabled.
    """

    query = select(InputPrompt)

    if user_id is not None:
        # If we have a user, left join to InputPrompt__User to check "disabled"
        IPU = aliased(InputPrompt__User)
        query = query.join(
            IPU,
            (IPU.input_prompt_id == InputPrompt.id) & (IPU.user_id == user_id),
            isouter=True,
        )

        # Exclude disabled prompts
        query = query.where(or_(IPU.disabled.is_(None), IPU.disabled.is_(False)))

        if include_public:
            # Return both user-owned and public prompts
            query = query.where(
                or_(
                    InputPrompt.user_id == user_id,
                    InputPrompt.is_public,
                )
            )
        else:
            # Return only user-owned prompts
            query = query.where(InputPrompt.user_id == user_id)

    else:
        # user_id is None - anonymous usage
        if include_public:
            query = query.where(InputPrompt.is_public)
        else:
            # No user and not requesting public prompts - return nothing
            return []

    if active is not None:
        query = query.where(InputPrompt.active == active)

    return list(db_session.scalars(query).all())


def disable_input_prompt_for_user(
    input_prompt_id: int,
    user_id: UUID,
    db_session: Session,
) -> None:
    """
    Sets (or creates) a record in InputPrompt__User with disabled=True
    so that this prompt is hidden for the user.
    """
    ipu = (
        db_session.query(InputPrompt__User)
        .filter_by(input_prompt_id=input_prompt_id, user_id=user_id)
        .first()
    )

    if ipu is None:
        # Create a new association row
        ipu = InputPrompt__User(
            input_prompt_id=input_prompt_id, user_id=user_id, disabled=True
        )
        db_session.add(ipu)
    else:
        # Just update the existing record
        ipu.disabled = True

    db_session.commit()


================================================
FILE: backend/onyx/db/kg_config.py
================================================
from onyx.configs.constants import KV_KG_CONFIG_KEY
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.kg.models import KGConfigSettings
from onyx.server.kg.models import EnableKGConfigRequest
from onyx.utils.logger import setup_logger

logger = setup_logger()


def set_kg_config_settings(kg_config_settings: KGConfigSettings) -> None:
    kv_store = get_kv_store()
    kv_store.store(KV_KG_CONFIG_KEY, kg_config_settings.model_dump())


def get_kg_config_settings() -> KGConfigSettings:
    kv_store = get_kv_store()
    try:
        # refresh cache True until beta is over as we may manually update the config in the db
        stored_config = kv_store.load(KV_KG_CONFIG_KEY, refresh_cache=True)
        return KGConfigSettings.model_validate(stored_config or {})
    except KvKeyNotFoundError:
        # Default to empty kg config if no config have been set yet
        logger.debug(f"No kg config found in KV store for key: {KV_KG_CONFIG_KEY}")
        return KGConfigSettings()
    except Exception as e:
        logger.error(f"Error loading kg config from KV store: {str(e)}")
        return KGConfigSettings()


def validate_kg_settings(kg_config_settings: KGConfigSettings) -> None:
    if not kg_config_settings.KG_ENABLED:
        raise ValueError("KG is not enabled")
    if not kg_config_settings.KG_VENDOR:
        raise ValueError("KG_VENDOR is not set")
    if not kg_config_settings.KG_VENDOR_DOMAINS:
        raise ValueError("KG_VENDOR_DOMAINS is not set")


def is_kg_config_settings_enabled_valid(kg_config_settings: KGConfigSettings) -> bool:
    try:
        validate_kg_settings(kg_config_settings)
        return True
    except Exception:
        return False


def enable_kg(enable_req: EnableKGConfigRequest) -> None:
    kg_config_settings = get_kg_config_settings()
    kg_config_settings.KG_ENABLED = True
    kg_config_settings.KG_VENDOR = enable_req.vendor
    kg_config_settings.KG_VENDOR_DOMAINS = enable_req.vendor_domains
    kg_config_settings.KG_IGNORE_EMAIL_DOMAINS = enable_req.ignore_domains
    kg_config_settings.KG_COVERAGE_START = enable_req.coverage_start.strftime(
        "%Y-%m-%d"
    )
    kg_config_settings.KG_MAX_COVERAGE_DAYS = 10000  # TODO: revisit after public beta

    validate_kg_settings(kg_config_settings)
    set_kg_config_settings(kg_config_settings)


def disable_kg() -> None:
    kg_config_settings = get_kg_config_settings()
    kg_config_settings.KG_ENABLED = False
    set_kg_config_settings(kg_config_settings)


================================================
FILE: backend/onyx/db/kg_temp_view.py
================================================
# import random

# from sqlalchemy import text
# from sqlalchemy.ext.declarative import declarative_base
# from sqlalchemy.orm import Session

# from onyx.agents.agent_search.kb_search.models import KGViewNames
# from onyx.configs.app_configs import DB_READONLY_USER
# from onyx.configs.kg_configs import KG_TEMP_ALLOWED_DOCS_VIEW_NAME_PREFIX
# from onyx.configs.kg_configs import KG_TEMP_KG_ENTITIES_VIEW_NAME_PREFIX
# from onyx.configs.kg_configs import KG_TEMP_KG_RELATIONSHIPS_VIEW_NAME_PREFIX
# from onyx.db.engine.sql_engine import get_session_with_current_tenant


# Base = declarative_base()


# def get_user_view_names(
#     user_email: str, tenant_id: str
# ) -> KGViewNames:
#     user_email_cleaned = (
#         user_email.replace("@", "__")
#         .replace(".", "_")
#         .replace("+", "_")
#     )
#     random_suffix_str = str(
#         random.randint(1000000, 9999999)
#     )
#     return KGViewNames(
#         allowed_docs_view_name=(
#             f'"{tenant_id}".'
#             f"{KG_TEMP_ALLOWED_DOCS_VIEW_NAME_PREFIX}_"
#             f"{user_email_cleaned}_{random_suffix_str}"
#         ),
#         kg_relationships_view_name=(
#             f'"{tenant_id}".'
#             f"{KG_TEMP_KG_RELATIONSHIPS_VIEW_NAME_PREFIX}_"
#             f"{user_email_cleaned}_{random_suffix_str}"
#         ),
#         kg_entity_view_name=(
#             f'"{tenant_id}".'
#             f"{KG_TEMP_KG_ENTITIES_VIEW_NAME_PREFIX}_"
#             f"{user_email_cleaned}_{random_suffix_str}"
#         ),
#     )


# # First, create the view definition
# def create_views(
#     db_session: Session,
#     tenant_id: str,
#     user_email: str,
#     allowed_docs_view_name: str,
#     kg_relationships_view_name: str,
#     kg_entity_view_name: str,
# ) -> None:

#     # Create ALLOWED_DOCS view
#     allowed_docs_view = text(
#         f"""
#     CREATE OR REPLACE VIEW {allowed_docs_view_name} AS
#     WITH kg_used_docs AS (
#         SELECT document_id as kg_used_doc_id
#         FROM "{tenant_id}".kg_entity d
#         WHERE document_id IS NOT NULL
#     ),

#     base_public_docs AS (
#         SELECT d.id as allowed_doc_id
#         FROM "{tenant_id}".document d
#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id
#         WHERE d.is_public
#     ),
#     user_owned_and_public_docs AS (
#         SELECT d.id as allowed_doc_id
#         FROM "{tenant_id}".document_by_connector_credential_pair d
#         JOIN "{tenant_id}".credential c ON d.credential_id = c.id
#         JOIN "{tenant_id}".connector_credential_pair ccp ON
#             d.connector_id = ccp.connector_id AND
#             d.credential_id = ccp.credential_id
#         JOIN "{tenant_id}".user u ON c.user_id = u.id
#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id
#         WHERE ccp.status != 'DELETING'
#         AND ccp.access_type != 'SYNC'
#         AND (u.email = :user_email or ccp.access_type::text = 'PUBLIC')
#     ),
#     user_group_accessible_docs AS (
#         SELECT d.id as allowed_doc_id
#         FROM "{tenant_id}".document_by_connector_credential_pair d
#         JOIN "{tenant_id}".connector_credential_pair ccp ON
#             d.connector_id = ccp.connector_id AND
#             d.credential_id = ccp.credential_id
#         JOIN "{tenant_id}".user_group__connector_credential_pair ugccp ON
#             ccp.id = ugccp.cc_pair_id
#         JOIN "{tenant_id}".user__user_group uug ON
#             uug.user_group_id = ugccp.user_group_id
#         JOIN "{tenant_id}".user u ON uug.user_id = u.id
#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id
#         WHERE kud.kg_used_doc_id IS NOT NULL
#         AND ccp.status != 'DELETING'
#         AND ccp.access_type != 'SYNC'
#         AND u.email = :user_email
#     ),
#     external_user_docs AS (
#         SELECT d.id as allowed_doc_id
#         FROM "{tenant_id}".document d
#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id
#         WHERE kud.kg_used_doc_id IS NOT NULL
#         AND :user_email = ANY(external_user_emails)
#     ),
#     external_group_docs AS (
#         SELECT d.id as allowed_doc_id
#         FROM "{tenant_id}".document d
#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id
#         JOIN "{tenant_id}".user__external_user_group_id ueg ON ueg.external_user_group_id = ANY(d.external_user_group_ids)
#         JOIN "{tenant_id}".user u ON ueg.user_id = u.id
#         WHERE kud.kg_used_doc_id IS NOT NULL
#         AND u.email = :user_email
#     )
#     SELECT DISTINCT allowed_doc_id FROM (
#         SELECT allowed_doc_id FROM base_public_docs
#         UNION
#         SELECT allowed_doc_id FROM user_owned_and_public_docs
#         UNION
#         SELECT allowed_doc_id FROM user_group_accessible_docs
#         UNION
#         SELECT allowed_doc_id FROM external_user_docs
#         UNION
#         SELECT allowed_doc_id FROM external_group_docs
#     ) combined_docs
#     """
#     ).bindparams(user_email=user_email)

#     # Create the main view that uses ALLOWED_DOCS for Relationships
#     kg_relationships_view = text(
#         f"""
#     CREATE OR REPLACE VIEW {kg_relationships_view_name} AS
#     SELECT kgr.id_name as relationship,
#            kgr.source_node as source_entity,
#            kgr.target_node as target_entity,
#            kgr.source_node_type as source_entity_type,
#            kgr.target_node_type as target_entity_type,
#            kgr.type as relationship_description,
#            kgr.relationship_type_id_name as relationship_type,
#            kgr.source_document as source_document,
#            d.doc_updated_at as source_date,
#            se.attributes as source_entity_attributes,
#            te.attributes as target_entity_attributes
#     FROM "{tenant_id}".kg_relationship kgr
#     INNER JOIN {allowed_docs_view_name} AD on AD.allowed_doc_id = kgr.source_document
#     JOIN "{tenant_id}".document d on d.id = kgr.source_document
#     JOIN "{tenant_id}".kg_entity se on se.id_name = kgr.source_node
#     JOIN "{tenant_id}".kg_entity te on te.id_name = kgr.target_node
#     """
#     )

#     # Create the main view that uses ALLOWED_DOCS for Entities
#     kg_entity_view = text(
#         f"""
#     CREATE OR REPLACE VIEW {kg_entity_view_name} AS
#     SELECT kge.id_name as entity,
#            kge.entity_type_id_name as entity_type,
#            kge.attributes as entity_attributes,
#            kge.document_id as source_document,
#            d.doc_updated_at as source_date
#     FROM "{tenant_id}".kg_entity kge
#     INNER JOIN {allowed_docs_view_name} AD on AD.allowed_doc_id = kge.document_id
#     JOIN "{tenant_id}".document d on d.id = kge.document_id
#     """
#     )

#     # Execute the views using the session
#     db_session.execute(allowed_docs_view)
#     db_session.execute(kg_relationships_view)
#     db_session.execute(kg_entity_view)

#     # Grant permissions on view to readonly user

#     db_session.execute(
#         text(f"GRANT SELECT ON {kg_relationships_view_name} TO {DB_READONLY_USER}")
#     )
#     db_session.execute(
#         text(f"GRANT SELECT ON {kg_entity_view_name} TO {DB_READONLY_USER}")
#     )

#     db_session.commit()

#     return None


# def drop_views(
#     allowed_docs_view_name: str | None = None,
#     kg_relationships_view_name: str | None = None,
#     kg_entity_view_name: str | None = None,
# ) -> None:
#     """
#     Drops the temporary views created by create_views.

#     Args:
#         db_session: SQLAlchemy session
#         allowed_docs_view_name: Name of the allowed_docs view
#         kg_relationships_view_name: Name of the allowed kg_relationships view
#         kg_entity_view_name: Name of the allowed kg_entity view
#     """

#     with get_session_with_current_tenant() as db_drop_session:
#         if kg_relationships_view_name:
#             revoke_kg_relationships = text(
#                 f"REVOKE SELECT ON {kg_relationships_view_name} FROM {DB_READONLY_USER}"
#             )
#             db_drop_session.execute(revoke_kg_relationships)
#             drop_kg_relationships = text(
#                 f"DROP VIEW IF EXISTS {kg_relationships_view_name}"
#             )
#             db_drop_session.execute(drop_kg_relationships)

#         if kg_entity_view_name:
#             revoke_kg_entities = text(
#                 f"REVOKE SELECT ON {kg_entity_view_name} FROM {DB_READONLY_USER}"
#             )
#             db_drop_session.execute(revoke_kg_entities)
#             drop_kg_entities = text(f"DROP VIEW IF EXISTS {kg_entity_view_name}")
#             db_drop_session.execute(drop_kg_entities)

#         if allowed_docs_view_name:
#             drop_allowed_docs = text(f"DROP VIEW IF EXISTS {allowed_docs_view_name}")
#             db_drop_session.execute(drop_allowed_docs)

#         db_drop_session.commit()
#     return None


================================================
FILE: backend/onyx/db/llm.py
================================================
from sqlalchemy import delete
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.db.enums import LLMModelFlowType
from onyx.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel
from onyx.db.models import DocumentSet
from onyx.db.models import ImageGenerationConfig
from onyx.db.models import LLMModelFlow
from onyx.db.models import LLMProvider as LLMProviderModel
from onyx.db.models import LLMProvider__Persona
from onyx.db.models import LLMProvider__UserGroup
from onyx.db.models import ModelConfiguration
from onyx.db.models import Persona
from onyx.db.models import SearchSettings
from onyx.db.models import Tool as ToolModel
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.llm.utils import model_supports_image_input
from onyx.llm.well_known_providers.auto_update_models import LLMRecommendations
from onyx.server.manage.embedding.models import CloudEmbeddingProvider
from onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.manage.llm.models import SyncModelEntry
from onyx.utils.logger import setup_logger
from shared_configs.enums import EmbeddingProvider

logger = setup_logger()


def update_group_llm_provider_relationships__no_commit(
    llm_provider_id: int,
    group_ids: list[int] | None,
    db_session: Session,
) -> None:
    # Delete existing relationships
    db_session.query(LLMProvider__UserGroup).filter(
        LLMProvider__UserGroup.llm_provider_id == llm_provider_id
    ).delete(synchronize_session="fetch")

    # Add new relationships from given group_ids
    if group_ids:
        new_relationships = [
            LLMProvider__UserGroup(
                llm_provider_id=llm_provider_id,
                user_group_id=group_id,
            )
            for group_id in group_ids
        ]
        db_session.add_all(new_relationships)


def update_llm_provider_persona_relationships__no_commit(
    db_session: Session,
    llm_provider_id: int,
    persona_ids: list[int] | None,
) -> None:
    """Replace the persona restrictions for a provider within an open transaction."""
    db_session.execute(
        delete(LLMProvider__Persona).where(
            LLMProvider__Persona.llm_provider_id == llm_provider_id
        )
    )

    if persona_ids:
        db_session.add_all(
            LLMProvider__Persona(
                llm_provider_id=llm_provider_id,
                persona_id=persona_id,
            )
            for persona_id in persona_ids
        )


def fetch_user_group_ids(db_session: Session, user: User) -> set[int]:
    """Fetch the set of user group IDs for a given user.

    Args:
        db_session: Database session
        user: User to fetch groups for

    Returns:
        Set of user group IDs. Empty set for anonymous users.
    """
    if user.is_anonymous:
        return set()

    return set(
        db_session.scalars(
            select(User__UserGroup.user_group_id).where(
                User__UserGroup.user_id == user.id
            )
        ).all()
    )


def can_user_access_llm_provider(
    provider: LLMProviderModel,
    user_group_ids: set[int],
    persona: Persona | None,
    is_admin: bool = False,
) -> bool:
    """Check if a user may use an LLM provider.

    Args:
        provider: The LLM provider to check access for
        user_group_ids: Set of user group IDs the user belongs to
        persona: The persona being used (if any)
        is_admin: If True, bypass user group restrictions but still respect persona restrictions

    Access logic:
    - is_public controls USER access (group bypass): when True, all users can access
      regardless of group membership. When False, user must be in a whitelisted group
      (or be admin).
    - Persona restrictions are ALWAYS enforced when set, regardless of is_public.
      This allows admins to make a provider available to all users while still
      restricting which personas (assistants) can use it.

    Decision matrix:
    1. is_public=True, no personas set → everyone has access
    2. is_public=True, personas set → all users, but only whitelisted personas
    3. is_public=False, groups+personas set → must satisfy BOTH (admins bypass groups)
    4. is_public=False, only groups set → must be in group (admins bypass)
    5. is_public=False, only personas set → must use whitelisted persona
    6. is_public=False, neither set → admin-only (locked)
    """
    provider_group_ids = {g.id for g in (provider.groups or [])}
    provider_persona_ids = {p.id for p in (provider.personas or [])}
    has_groups = bool(provider_group_ids)
    has_personas = bool(provider_persona_ids)

    # Persona restrictions are always enforced when set, regardless of is_public
    if has_personas and not (persona and persona.id in provider_persona_ids):
        return False

    if provider.is_public:
        return True

    if has_groups:
        return is_admin or bool(user_group_ids & provider_group_ids)

    # No groups: either persona-whitelisted (already passed) or admin-only if locked
    return has_personas or is_admin


def validate_persona_ids_exist(
    db_session: Session, persona_ids: list[int]
) -> tuple[set[int], list[int]]:
    """Validate that persona IDs exist in the database.

    Returns:
        Tuple of (fetched_persona_ids, missing_personas)
    """
    fetched_persona_ids = set(
        db_session.scalars(select(Persona.id).where(Persona.id.in_(persona_ids))).all()
    )
    missing_personas = sorted(set(persona_ids) - fetched_persona_ids)
    return fetched_persona_ids, missing_personas


def get_personas_using_provider(
    db_session: Session, provider_name: str
) -> list[Persona]:
    """Get all non-deleted personas that use a specific LLM provider."""
    return list(
        db_session.scalars(
            select(Persona).where(
                Persona.llm_model_provider_override == provider_name,
                Persona.deleted == False,  # noqa: E712
            )
        ).all()
    )


def fetch_persona_with_groups(db_session: Session, persona_id: int) -> Persona | None:
    """Fetch a persona with its groups eagerly loaded."""
    return db_session.scalar(
        select(Persona)
        .options(selectinload(Persona.groups))
        .where(Persona.id == persona_id, Persona.deleted == False)  # noqa: E712
    )


def upsert_cloud_embedding_provider(
    db_session: Session, provider: CloudEmbeddingProviderCreationRequest
) -> CloudEmbeddingProvider:
    existing_provider = (
        db_session.query(CloudEmbeddingProviderModel)
        .filter_by(provider_type=provider.provider_type)
        .first()
    )
    if existing_provider:
        for key, value in provider.model_dump().items():
            setattr(existing_provider, key, value)
    else:
        new_provider = CloudEmbeddingProviderModel(**provider.model_dump())

        db_session.add(new_provider)
        existing_provider = new_provider
    db_session.commit()
    db_session.refresh(existing_provider)
    return CloudEmbeddingProvider.from_request(existing_provider)


def upsert_llm_provider(
    llm_provider_upsert_request: LLMProviderUpsertRequest,
    db_session: Session,
) -> LLMProviderView:
    existing_llm_provider: LLMProviderModel | None = None
    if llm_provider_upsert_request.id:
        existing_llm_provider = fetch_existing_llm_provider_by_id(
            id=llm_provider_upsert_request.id, db_session=db_session
        )
        if not existing_llm_provider:
            raise ValueError(
                f"LLM provider with id {llm_provider_upsert_request.id} not found"
            )

        if existing_llm_provider.name != llm_provider_upsert_request.name:
            raise ValueError(
                f"LLM provider with id {llm_provider_upsert_request.id} name change not allowed"
            )
    else:
        existing_llm_provider = fetch_existing_llm_provider(
            name=llm_provider_upsert_request.name, db_session=db_session
        )
        if existing_llm_provider:
            raise ValueError(
                f"LLM provider with name '{llm_provider_upsert_request.name}' already exists"
            )
        existing_llm_provider = LLMProviderModel(name=llm_provider_upsert_request.name)
        db_session.add(existing_llm_provider)

    # Filter out empty strings and None values from custom_config to allow
    # providers like Bedrock to fall back to IAM roles when credentials are not provided
    custom_config = llm_provider_upsert_request.custom_config
    if custom_config:
        custom_config = {
            k: v for k, v in custom_config.items() if v is not None and v.strip() != ""
        }
        # Set to None if the dict is empty after filtering
        custom_config = custom_config or None

    api_base = llm_provider_upsert_request.api_base or None
    existing_llm_provider.provider = llm_provider_upsert_request.provider
    # EncryptedString accepts str for writes, returns SensitiveValue for reads
    existing_llm_provider.api_key = llm_provider_upsert_request.api_key  # type: ignore[assignment]
    existing_llm_provider.api_base = api_base
    existing_llm_provider.api_version = llm_provider_upsert_request.api_version
    existing_llm_provider.custom_config = custom_config

    existing_llm_provider.is_public = llm_provider_upsert_request.is_public
    existing_llm_provider.is_auto_mode = llm_provider_upsert_request.is_auto_mode
    existing_llm_provider.deployment_name = llm_provider_upsert_request.deployment_name

    if not existing_llm_provider.id:
        # If its not already in the db, we need to generate an ID by flushing
        db_session.flush()

    # Build a lookup of existing model configurations by name (single iteration)
    existing_by_name = {
        mc.name: mc for mc in existing_llm_provider.model_configurations
    }

    models_to_exist = {
        mc.name for mc in llm_provider_upsert_request.model_configurations
    }

    # Build a lookup of requested visibility by model name
    requested_visibility = {
        mc.name: mc.is_visible
        for mc in llm_provider_upsert_request.model_configurations
    }

    # Delete removed models
    removed_ids = [
        mc.id for name, mc in existing_by_name.items() if name not in models_to_exist
    ]

    default_model = fetch_default_llm_model(db_session)

    # Prevent removing and hiding the default model
    if default_model:
        for name, mc in existing_by_name.items():
            if mc.id == default_model.id:
                if default_model.id in removed_ids:
                    raise ValueError(
                        f"Cannot remove the default model '{name}'. Please change the default model before removing."
                    )
                if not requested_visibility.get(name, True):
                    raise ValueError(
                        f"Cannot hide the default model '{name}'. Please change the default model before hiding."
                    )
                break

    if removed_ids:
        db_session.query(ModelConfiguration).filter(
            ModelConfiguration.id.in_(removed_ids)
        ).delete(synchronize_session="fetch")
        db_session.flush()

    # Import here to avoid circular imports
    from onyx.llm.utils import get_max_input_tokens

    for model_config in llm_provider_upsert_request.model_configurations:
        max_input_tokens = model_config.max_input_tokens
        if max_input_tokens is None:
            max_input_tokens = get_max_input_tokens(
                model_name=model_config.name,
                model_provider=llm_provider_upsert_request.provider,
            )

        supported_flows = [LLMModelFlowType.CHAT]
        if model_config.supports_image_input:
            supported_flows.append(LLMModelFlowType.VISION)

        existing = existing_by_name.get(model_config.name)
        if existing:
            update_model_configuration__no_commit(
                db_session=db_session,
                model_configuration_id=existing.id,
                supported_flows=supported_flows,
                is_visible=model_config.is_visible,
                max_input_tokens=max_input_tokens,
                display_name=model_config.display_name,
            )
        else:
            insert_new_model_configuration__no_commit(
                db_session=db_session,
                llm_provider_id=existing_llm_provider.id,
                model_name=model_config.name,
                supported_flows=supported_flows,
                is_visible=model_config.is_visible,
                max_input_tokens=max_input_tokens,
                display_name=model_config.display_name,
            )

    # Make sure the relationship table stays up to date
    update_group_llm_provider_relationships__no_commit(
        llm_provider_id=existing_llm_provider.id,
        group_ids=llm_provider_upsert_request.groups,
        db_session=db_session,
    )
    update_llm_provider_persona_relationships__no_commit(
        db_session=db_session,
        llm_provider_id=existing_llm_provider.id,
        persona_ids=llm_provider_upsert_request.personas,
    )

    db_session.flush()
    db_session.refresh(existing_llm_provider)

    try:
        db_session.commit()
    except Exception as e:
        db_session.rollback()
        raise ValueError(f"Failed to save LLM provider: {str(e)}") from e

    full_llm_provider = LLMProviderView.from_model(existing_llm_provider)
    return full_llm_provider


def sync_model_configurations(
    db_session: Session,
    provider_name: str,
    models: list[SyncModelEntry],
) -> int:
    """Sync model configurations for a dynamic provider (OpenRouter, Bedrock, Ollama, etc.).

    This inserts NEW models from the source API without overwriting existing ones.
    User preferences (is_visible, max_input_tokens) are preserved for existing models.

    Args:
        db_session: Database session
        provider_name: Name of the LLM provider
        models: List of SyncModelEntry objects describing the fetched models

    Returns:
        Number of new models added
    """
    provider = fetch_existing_llm_provider(name=provider_name, db_session=db_session)
    if not provider:
        raise ValueError(f"LLM Provider '{provider_name}' not found")

    # Get existing model names to count new additions
    existing_names = {mc.name for mc in provider.model_configurations}

    new_count = 0
    for model in models:
        if model.name not in existing_names:
            # Insert new model with is_visible=False (user must explicitly enable)
            supported_flows = [LLMModelFlowType.CHAT]
            if model.supports_image_input:
                supported_flows.append(LLMModelFlowType.VISION)

            insert_new_model_configuration__no_commit(
                db_session=db_session,
                llm_provider_id=provider.id,
                model_name=model.name,
                supported_flows=supported_flows,
                is_visible=False,
                max_input_tokens=model.max_input_tokens,
                display_name=model.display_name,
            )
            new_count += 1

    if new_count > 0:
        db_session.commit()

    return new_count


def fetch_existing_embedding_providers(
    db_session: Session,
) -> list[CloudEmbeddingProviderModel]:
    return list(db_session.scalars(select(CloudEmbeddingProviderModel)).all())


def fetch_existing_doc_sets(
    db_session: Session, doc_ids: list[int]
) -> list[DocumentSet]:
    return list(
        db_session.scalars(select(DocumentSet).where(DocumentSet.id.in_(doc_ids))).all()
    )


def fetch_existing_tools(db_session: Session, tool_ids: list[int]) -> list[ToolModel]:
    return list(
        db_session.scalars(select(ToolModel).where(ToolModel.id.in_(tool_ids))).all()
    )


def fetch_existing_models(
    db_session: Session,
    flow_types: list[LLMModelFlowType],
) -> list[ModelConfiguration]:
    models = (
        select(ModelConfiguration)
        .join(LLMModelFlow)
        .where(LLMModelFlow.llm_model_flow_type.in_(flow_types))
        .options(
            selectinload(ModelConfiguration.llm_provider),
            selectinload(ModelConfiguration.llm_model_flows),
        )
    )

    return list(db_session.scalars(models).all())


def fetch_existing_llm_providers(
    db_session: Session,
    flow_type_filter: list[LLMModelFlowType],
    only_public: bool = False,
    exclude_image_generation_providers: bool = True,
) -> list[LLMProviderModel]:
    """Fetch all LLM providers with optional filtering.

    Args:
        db_session: Database session
        flow_type_filter: List of flow types to filter by, empty list for no filter
        only_public: If True, only return public providers
        exclude_image_generation_providers: If True, exclude providers that are
            used for image generation configs
    """
    stmt = select(LLMProviderModel)

    if flow_type_filter:
        providers_with_flows = (
            select(ModelConfiguration.llm_provider_id)
            .join(LLMModelFlow)
            .where(LLMModelFlow.llm_model_flow_type.in_(flow_type_filter))
            .distinct()
        )
        stmt = stmt.where(LLMProviderModel.id.in_(providers_with_flows))

    if exclude_image_generation_providers:
        image_gen_provider_ids = select(ModelConfiguration.llm_provider_id).join(
            ImageGenerationConfig
        )
        stmt = stmt.where(~LLMProviderModel.id.in_(image_gen_provider_ids))

    stmt = stmt.options(
        selectinload(LLMProviderModel.model_configurations),
        selectinload(LLMProviderModel.groups),
        selectinload(LLMProviderModel.personas),
    )

    providers = list(db_session.scalars(stmt).all())
    if only_public:
        return [provider for provider in providers if provider.is_public]
    return providers


def fetch_existing_llm_provider(
    name: str, db_session: Session
) -> LLMProviderModel | None:
    provider_model = db_session.scalar(
        select(LLMProviderModel)
        .where(LLMProviderModel.name == name)
        .options(
            selectinload(LLMProviderModel.model_configurations),
            selectinload(LLMProviderModel.groups),
            selectinload(LLMProviderModel.personas),
        )
    )

    return provider_model


def fetch_existing_llm_provider_by_id(
    id: int, db_session: Session
) -> LLMProviderModel | None:
    provider_model = db_session.scalar(
        select(LLMProviderModel)
        .where(LLMProviderModel.id == id)
        .options(
            selectinload(LLMProviderModel.model_configurations),
            selectinload(LLMProviderModel.groups),
            selectinload(LLMProviderModel.personas),
        )
    )

    return provider_model


def fetch_embedding_provider(
    db_session: Session, provider_type: EmbeddingProvider
) -> CloudEmbeddingProviderModel | None:
    return db_session.scalar(
        select(CloudEmbeddingProviderModel).where(
            CloudEmbeddingProviderModel.provider_type == provider_type
        )
    )


def fetch_default_llm_model(db_session: Session) -> ModelConfiguration | None:
    return fetch_default_model(db_session, LLMModelFlowType.CHAT)


def fetch_default_vision_model(db_session: Session) -> ModelConfiguration | None:
    return fetch_default_model(db_session, LLMModelFlowType.VISION)


def fetch_default_contextual_rag_model(
    db_session: Session,
) -> ModelConfiguration | None:
    return fetch_default_model(db_session, LLMModelFlowType.CONTEXTUAL_RAG)


def fetch_default_model(
    db_session: Session,
    flow_type: LLMModelFlowType,
) -> ModelConfiguration | None:
    model_config = db_session.scalar(
        select(ModelConfiguration)
        .options(selectinload(ModelConfiguration.llm_provider))
        .join(LLMModelFlow)
        .where(
            LLMModelFlow.llm_model_flow_type == flow_type,
            LLMModelFlow.is_default == True,  # noqa: E712
        )
    )

    return model_config


def fetch_llm_provider_view(
    db_session: Session, provider_name: str
) -> LLMProviderView | None:
    provider_model = fetch_existing_llm_provider(
        name=provider_name, db_session=db_session
    )
    if not provider_model:
        return None
    return LLMProviderView.from_model(provider_model)


def remove_embedding_provider(
    db_session: Session, provider_type: EmbeddingProvider
) -> None:
    db_session.execute(
        delete(SearchSettings).where(SearchSettings.provider_type == provider_type)
    )

    # Delete the embedding provider
    db_session.execute(
        delete(CloudEmbeddingProviderModel).where(
            CloudEmbeddingProviderModel.provider_type == provider_type
        )
    )

    db_session.commit()


def remove_llm_provider(db_session: Session, provider_id: int) -> None:
    provider = db_session.get(LLMProviderModel, provider_id)
    if not provider:
        raise ValueError("LLM Provider not found")

    # Clear the provider override from any personas using it
    # This causes them to fall back to the default provider
    personas_using_provider = get_personas_using_provider(db_session, provider.name)
    for persona in personas_using_provider:
        persona.llm_model_provider_override = None

    db_session.execute(
        delete(LLMProvider__UserGroup).where(
            LLMProvider__UserGroup.llm_provider_id == provider_id
        )
    )
    # Remove LLMProvider
    db_session.execute(
        delete(LLMProviderModel).where(LLMProviderModel.id == provider_id)
    )
    db_session.commit()


def remove_llm_provider__no_commit(db_session: Session, provider_id: int) -> None:
    """Remove LLM provider."""
    provider = db_session.get(LLMProviderModel, provider_id)
    if not provider:
        raise ValueError("LLM Provider not found")

    # Clear the provider override from any personas using it
    # This causes them to fall back to the default provider
    personas_using_provider = get_personas_using_provider(db_session, provider.name)
    for persona in personas_using_provider:
        persona.llm_model_provider_override = None

    db_session.execute(
        delete(LLMProvider__UserGroup).where(
            LLMProvider__UserGroup.llm_provider_id == provider_id
        )
    )
    # Remove LLMProvider
    db_session.execute(
        delete(LLMProviderModel).where(LLMProviderModel.id == provider_id)
    )
    db_session.flush()


def update_default_provider(
    provider_id: int, model_name: str, db_session: Session
) -> None:
    _update_default_model(
        db_session,
        provider_id,
        model_name,
        LLMModelFlowType.CHAT,
    )


def update_default_vision_provider(
    provider_id: int, vision_model: str, db_session: Session
) -> None:
    provider = db_session.scalar(
        select(LLMProviderModel).where(
            LLMProviderModel.id == provider_id,
        )
    )

    if provider is None:
        raise ValueError(f"LLM Provider with id={provider_id} does not exist")

    if not model_supports_image_input(vision_model, provider.provider):
        raise ValueError(
            f"Model '{vision_model}' for provider '{provider.provider} does not support image input"
        )

    _update_default_model(
        db_session=db_session,
        provider_id=provider_id,
        model=vision_model,
        flow_type=LLMModelFlowType.VISION,
    )


def update_no_default_contextual_rag_provider(
    db_session: Session,
) -> None:
    db_session.execute(
        update(LLMModelFlow)
        .where(
            LLMModelFlow.llm_model_flow_type == LLMModelFlowType.CONTEXTUAL_RAG,
            LLMModelFlow.is_default == True,  # noqa: E712
        )
        .values(is_default=False)
    )
    db_session.commit()


def update_default_contextual_model(
    db_session: Session,
    enable_contextual_rag: bool,
    contextual_rag_llm_provider: str | None,
    contextual_rag_llm_name: str | None,
) -> None:
    """Sets or clears the default contextual RAG model.

    Should be called whenever the PRESENT search settings change
    (e.g. inline update or FUTURE → PRESENT swap).
    """
    if (
        not enable_contextual_rag
        or not contextual_rag_llm_name
        or not contextual_rag_llm_provider
    ):
        update_no_default_contextual_rag_provider(db_session=db_session)
        return

    provider = fetch_existing_llm_provider(
        name=contextual_rag_llm_provider, db_session=db_session
    )
    if not provider:
        raise ValueError(f"Provider '{contextual_rag_llm_provider}' not found")

    model_config = next(
        (
            mc
            for mc in provider.model_configurations
            if mc.name == contextual_rag_llm_name
        ),
        None,
    )
    if not model_config:
        raise ValueError(
            f"Model '{contextual_rag_llm_name}' not found for provider '{contextual_rag_llm_provider}'"
        )

    add_model_to_flow(
        db_session=db_session,
        model_configuration_id=model_config.id,
        flow_type=LLMModelFlowType.CONTEXTUAL_RAG,
    )
    _update_default_model(
        db_session=db_session,
        provider_id=provider.id,
        model=contextual_rag_llm_name,
        flow_type=LLMModelFlowType.CONTEXTUAL_RAG,
    )

    return


def fetch_auto_mode_providers(db_session: Session) -> list[LLMProviderModel]:
    """Fetch all LLM providers that are in Auto mode."""
    query = (
        select(LLMProviderModel)
        .where(LLMProviderModel.is_auto_mode.is_(True))
        .options(selectinload(LLMProviderModel.model_configurations))
    )
    return list(db_session.scalars(query).all())


def sync_auto_mode_models(
    db_session: Session,
    provider: LLMProviderModel,
    llm_recommendations: LLMRecommendations,
) -> int:
    """Sync models from GitHub config to a provider in Auto mode.

    In Auto mode, the model list and default are controlled by GitHub config.
    The schema has:
    - default_model: The default model config (always visible)
    - additional_visible_models: List of additional visible models

    Admin only provides API credentials.

    Args:
        db_session: Database session
        provider: LLM provider in Auto mode
        github_config: Configuration from GitHub

    Returns:
        The number of changes made.
    """
    changes = 0

    # Build the list of all visible models from the config
    # All models in the config are visible (default + additional_visible_models)
    recommended_visible_models = llm_recommendations.get_visible_models(
        provider.provider
    )
    recommended_visible_model_names = [
        model.name for model in recommended_visible_models
    ]

    # Get existing models
    existing_models: dict[str, ModelConfiguration] = {
        mc.name: mc
        for mc in db_session.scalars(
            select(ModelConfiguration).where(
                ModelConfiguration.llm_provider_id == provider.id
            )
        ).all()
    }

    # Mark models that are no longer in GitHub config as not visible
    for model_name, model in existing_models.items():
        if model_name not in recommended_visible_model_names:
            if model.is_visible:
                model.is_visible = False
                changes += 1

    # Add or update models from GitHub config
    for model_config in recommended_visible_models:
        if model_config.name in existing_models:
            # Update existing model
            existing = existing_models[model_config.name]
            # Check each field for changes
            updated = False
            if existing.display_name != model_config.display_name:
                existing.display_name = model_config.display_name
                updated = True
            # All models in the config are visible
            if not existing.is_visible:
                existing.is_visible = True
                updated = True
            if updated:
                changes += 1
        else:
            # Add new model - all models from GitHub config are visible
            insert_new_model_configuration__no_commit(
                db_session=db_session,
                llm_provider_id=provider.id,
                model_name=model_config.name,
                supported_flows=[LLMModelFlowType.CHAT],
                is_visible=True,
                max_input_tokens=None,
                display_name=model_config.display_name,
            )
            changes += 1

    # Update the default if this provider currently holds the global CHAT default.
    # We flush (but don't commit) so that _update_default_model can see the new
    # model rows, then commit everything atomically to avoid a window where the
    # old default is invisible but still pointed-to.
    db_session.flush()

    recommended_default = llm_recommendations.get_default_model(provider.provider)
    if recommended_default:
        current_default = fetch_default_llm_model(db_session)

        if (
            current_default
            and current_default.llm_provider_id == provider.id
            and current_default.name != recommended_default.name
        ):
            _update_default_model__no_commit(
                db_session=db_session,
                provider_id=provider.id,
                model=recommended_default.name,
                flow_type=LLMModelFlowType.CHAT,
            )
            changes += 1

    db_session.commit()
    return changes


def create_new_flow_mapping__no_commit(
    db_session: Session,
    model_configuration_id: int,
    flow_type: LLMModelFlowType,
) -> LLMModelFlow:
    result = db_session.execute(
        insert(LLMModelFlow)
        .values(
            model_configuration_id=model_configuration_id,
            llm_model_flow_type=flow_type,
            is_default=False,
        )
        .on_conflict_do_nothing()
        .returning(LLMModelFlow)
    )

    flow = result.scalar()
    if not flow:
        # Row already exists — fetch it
        flow = db_session.scalar(
            select(LLMModelFlow).where(
                LLMModelFlow.model_configuration_id == model_configuration_id,
                LLMModelFlow.llm_model_flow_type == flow_type,
            )
        )
    if not flow:
        raise ValueError(
            f"Failed to create or find flow mapping for model_configuration_id={model_configuration_id} and flow_type={flow_type}"
        )

    return flow


def insert_new_model_configuration__no_commit(
    db_session: Session,
    llm_provider_id: int,
    model_name: str,
    supported_flows: list[LLMModelFlowType],
    is_visible: bool,
    max_input_tokens: int | None,
    display_name: str | None,
) -> int | None:
    result = db_session.execute(
        insert(ModelConfiguration)
        .values(
            llm_provider_id=llm_provider_id,
            name=model_name,
            is_visible=is_visible,
            max_input_tokens=max_input_tokens,
            display_name=display_name,
            supports_image_input=LLMModelFlowType.VISION in supported_flows,
        )
        .on_conflict_do_nothing()
        .returning(ModelConfiguration.id)
    )

    model_config_id = result.scalar()

    if not model_config_id:
        return None

    for flow_type in supported_flows:
        create_new_flow_mapping__no_commit(
            db_session=db_session,
            model_configuration_id=model_config_id,
            flow_type=flow_type,
        )

    return model_config_id


def update_model_configuration__no_commit(
    db_session: Session,
    model_configuration_id: int,
    supported_flows: list[LLMModelFlowType],
    is_visible: bool,
    max_input_tokens: int | None,
    display_name: str | None,
) -> None:
    result = db_session.execute(
        update(ModelConfiguration)
        .values(
            is_visible=is_visible,
            max_input_tokens=max_input_tokens,
            display_name=display_name,
            supports_image_input=LLMModelFlowType.VISION in supported_flows,
        )
        .where(ModelConfiguration.id == model_configuration_id)
        .returning(ModelConfiguration)
    )

    model_configuration = result.scalar()
    if not model_configuration:
        raise ValueError(
            f"Failed to update model configuration with id={model_configuration_id}"
        )

    new_flows = {
        flow_type
        for flow_type in supported_flows
        if flow_type not in model_configuration.llm_model_flow_types
    }
    removed_flows = {
        flow_type
        for flow_type in model_configuration.llm_model_flow_types
        if flow_type not in supported_flows
    }

    for flow_type in new_flows:
        create_new_flow_mapping__no_commit(
            db_session=db_session,
            model_configuration_id=model_configuration_id,
            flow_type=flow_type,
        )

    for flow_type in removed_flows:
        db_session.execute(
            delete(LLMModelFlow).where(
                LLMModelFlow.model_configuration_id == model_configuration_id,
                LLMModelFlow.llm_model_flow_type == flow_type,
            )
        )

    db_session.flush()


def _update_default_model__no_commit(
    db_session: Session,
    provider_id: int,
    model: str,
    flow_type: LLMModelFlowType,
) -> None:
    result = db_session.execute(
        select(ModelConfiguration, LLMModelFlow)
        .join(
            LLMModelFlow, LLMModelFlow.model_configuration_id == ModelConfiguration.id
        )
        .where(
            ModelConfiguration.llm_provider_id == provider_id,
            ModelConfiguration.name == model,
            LLMModelFlow.llm_model_flow_type == flow_type,
        )
    ).first()

    if not result:
        raise ValueError(
            f"Model '{model}' is not a valid model for provider_id={provider_id}"
        )

    model_config, new_default = result

    # Clear existing default and set in an atomic operation
    db_session.execute(
        update(LLMModelFlow)
        .where(
            LLMModelFlow.llm_model_flow_type == flow_type,
            LLMModelFlow.is_default == True,  # noqa: E712
        )
        .values(is_default=False)
    )

    new_default.is_default = True
    model_config.is_visible = True


def _update_default_model(
    db_session: Session,
    provider_id: int,
    model: str,
    flow_type: LLMModelFlowType,
) -> None:
    _update_default_model__no_commit(db_session, provider_id, model, flow_type)
    db_session.commit()


def add_model_to_flow(
    db_session: Session,
    model_configuration_id: int,
    flow_type: LLMModelFlowType,
) -> None:
    # Function does nothing on conflict
    create_new_flow_mapping__no_commit(
        db_session=db_session,
        model_configuration_id=model_configuration_id,
        flow_type=flow_type,
    )

    db_session.commit()


================================================
FILE: backend/onyx/db/mcp.py
================================================
import datetime
from typing import cast
from uuid import UUID

from sqlalchemy import and_
from sqlalchemy import delete
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.orm.attributes import flag_modified

from onyx.db.enums import MCPAuthenticationPerformer
from onyx.db.enums import MCPServerStatus
from onyx.db.enums import MCPTransport
from onyx.db.models import MCPAuthenticationType
from onyx.db.models import MCPConnectionConfig
from onyx.db.models import MCPServer
from onyx.db.models import Persona
from onyx.db.models import Tool
from onyx.db.models import User
from onyx.server.features.mcp.models import MCPConnectionData
from onyx.utils.logger import setup_logger
from onyx.utils.sensitive import SensitiveValue

logger = setup_logger()


# MCPServer operations
def get_all_mcp_servers(db_session: Session) -> list[MCPServer]:
    """Get all MCP servers"""
    return list(
        db_session.scalars(select(MCPServer).order_by(MCPServer.created_at)).all()
    )


def get_mcp_server_by_id(server_id: int, db_session: Session) -> MCPServer:
    """Get MCP server by ID"""
    server = db_session.scalar(select(MCPServer).where(MCPServer.id == server_id))
    if not server:
        raise ValueError("MCP server by specified id does not exist")
    return server


def get_mcp_servers_by_owner(owner_email: str, db_session: Session) -> list[MCPServer]:
    """Get all MCP servers owned by a specific user"""
    return list(
        db_session.scalars(
            select(MCPServer).where(MCPServer.owner == owner_email)
        ).all()
    )


def get_mcp_servers_for_persona(
    persona_id: int,
    db_session: Session,
    user: User,  # noqa: ARG001
) -> list[MCPServer]:
    """Get all MCP servers associated with a persona via its tools"""
    # Get the persona and its tools
    persona = db_session.query(Persona).filter(Persona.id == persona_id).first()
    if not persona:
        return []

    # Collect unique MCP server IDs from the persona's tools
    mcp_server_ids = set()
    for tool in persona.tools:
        if tool.mcp_server_id:
            mcp_server_ids.add(tool.mcp_server_id)

    if not mcp_server_ids:
        return []

    # Fetch the MCP servers
    mcp_servers = (
        db_session.query(MCPServer).filter(MCPServer.id.in_(mcp_server_ids)).all()
    )

    return list(mcp_servers)


def get_mcp_servers_accessible_to_user(
    user_id: UUID, db_session: Session
) -> list[MCPServer]:
    """Get all MCP servers accessible to a user (directly or through groups)"""
    user = db_session.scalar(select(User).where(User.id == user_id))  # type: ignore
    if not user:
        return []
    user = cast(User, user)
    # Get servers accessible directly to user
    user_servers = list(user.accessible_mcp_servers)

    # TODO: Add group-based access once relationships are fully implemented
    # For now, just return direct user access
    return user_servers


def create_mcp_server__no_commit(
    owner_email: str,
    name: str,
    description: str | None,
    server_url: str,
    auth_type: MCPAuthenticationType | None,
    transport: MCPTransport | None,
    auth_performer: MCPAuthenticationPerformer | None,
    db_session: Session,
    admin_connection_config_id: int | None = None,
) -> MCPServer:
    """Create a new MCP server"""
    new_server = MCPServer(
        owner=owner_email,
        name=name,
        description=description,
        server_url=server_url,
        transport=transport,
        auth_type=auth_type,
        auth_performer=auth_performer,
        admin_connection_config_id=admin_connection_config_id,
    )
    db_session.add(new_server)
    db_session.flush()  # Get the ID without committing
    return new_server


def update_mcp_server__no_commit(
    server_id: int,
    db_session: Session,
    name: str | None = None,
    description: str | None = None,
    server_url: str | None = None,
    auth_type: MCPAuthenticationType | None = None,
    admin_connection_config_id: int | None = None,
    auth_performer: MCPAuthenticationPerformer | None = None,
    transport: MCPTransport | None = None,
    status: MCPServerStatus | None = None,
    last_refreshed_at: datetime.datetime | None = None,
) -> MCPServer:
    """Update an existing MCP server"""
    server = get_mcp_server_by_id(server_id, db_session)

    if name is not None:
        server.name = name
    if description is not None:
        server.description = description
    if server_url is not None:
        server.server_url = server_url
    if auth_type is not None:
        server.auth_type = auth_type
    if admin_connection_config_id is not None:
        server.admin_connection_config_id = admin_connection_config_id
    if auth_performer is not None:
        server.auth_performer = auth_performer
    if transport is not None:
        server.transport = transport
    if status is not None:
        server.status = status
    if last_refreshed_at is not None:
        server.last_refreshed_at = last_refreshed_at

    db_session.flush()  # Don't commit yet, let caller decide when to commit
    return server


def delete_mcp_server(server_id: int, db_session: Session) -> None:
    """Delete an MCP server and all associated tools (via CASCADE)"""
    server = get_mcp_server_by_id(server_id, db_session)

    # Count tools that will be deleted
    tools_count = db_session.query(Tool).filter(Tool.mcp_server_id == server_id).count()
    logger.info(f"Deleting MCP server {server_id} with {tools_count} associated tools")

    db_session.delete(server)
    db_session.commit()

    logger.info(f"Successfully deleted MCP server {server_id} and its tools")


def get_all_mcp_tools_for_server(server_id: int, db_session: Session) -> list[Tool]:
    """Get all MCP tools for a server"""
    return list(
        db_session.scalars(select(Tool).where(Tool.mcp_server_id == server_id)).all()
    )


def add_user_to_mcp_server(server_id: int, user_id: UUID, db_session: Session) -> None:
    """Grant a user access to an MCP server"""
    server = get_mcp_server_by_id(server_id, db_session)
    user = db_session.scalar(select(User).where(User.id == user_id))  # type: ignore
    if not user:
        raise ValueError("User not found")

    if user not in server.users:
        server.users.append(user)
        db_session.commit()


def remove_user_from_mcp_server(
    server_id: int, user_id: UUID, db_session: Session
) -> None:
    """Remove a user's access to an MCP server"""
    server = get_mcp_server_by_id(server_id, db_session)
    user = db_session.scalar(select(User).where(User.id == user_id))  # type: ignore
    if not user:
        raise ValueError("User not found")

    if user in server.users:
        server.users.remove(user)
        db_session.commit()


# MCPConnectionConfig operations
def extract_connection_data(
    config: MCPConnectionConfig | None, apply_mask: bool = False
) -> MCPConnectionData:
    """Extract MCPConnectionData from a connection config, with proper typing.

    This helper encapsulates the cast from the JSON column's dict[str, Any]
    to the typed MCPConnectionData structure.
    """
    if config is None or config.config is None:
        return MCPConnectionData(headers={})
    if isinstance(config.config, SensitiveValue):
        return cast(MCPConnectionData, config.config.get_value(apply_mask=apply_mask))
    return cast(MCPConnectionData, config.config)


def get_connection_config_by_id(
    config_id: int, db_session: Session
) -> MCPConnectionConfig:
    """Get connection config by ID"""
    config = db_session.scalar(
        select(MCPConnectionConfig).where(MCPConnectionConfig.id == config_id)
    )
    if not config:
        raise ValueError("Connection config by specified id does not exist")
    return config


def get_user_connection_config(
    server_id: int, user_email: str, db_session: Session
) -> MCPConnectionConfig | None:
    """Get a user's connection config for a specific MCP server"""
    return db_session.scalar(
        select(MCPConnectionConfig).where(
            and_(
                MCPConnectionConfig.mcp_server_id == server_id,
                MCPConnectionConfig.user_email == user_email,
            )
        )
    )


def get_user_connection_configs_for_server(
    server_id: int, db_session: Session
) -> list[MCPConnectionConfig]:
    """Get all user connection configs for a specific MCP server"""
    return list(
        db_session.scalars(
            select(MCPConnectionConfig).where(
                MCPConnectionConfig.mcp_server_id == server_id
            )
        ).all()
    )


def create_connection_config(
    config_data: MCPConnectionData,
    db_session: Session,
    mcp_server_id: int | None = None,
    user_email: str = "",
) -> MCPConnectionConfig:
    """Create a new connection config"""
    new_config = MCPConnectionConfig(
        mcp_server_id=mcp_server_id,
        user_email=user_email,
        config=config_data,
    )
    db_session.add(new_config)
    db_session.flush()  # Don't commit yet, let caller decide when to commit
    return new_config


def update_connection_config(
    config_id: int,
    db_session: Session,
    config_data: MCPConnectionData | None = None,
) -> MCPConnectionConfig:
    """Update an existing connection config"""
    config = get_connection_config_by_id(config_id, db_session)

    if config_data is not None:
        config.config = config_data  # type: ignore[assignment]
        # Force SQLAlchemy to detect the change by marking the field as modified
        flag_modified(config, "config")

    db_session.commit()
    return config


def upsert_user_connection_config(
    server_id: int,
    user_email: str,
    config_data: MCPConnectionData,
    db_session: Session,
) -> MCPConnectionConfig:
    """Create or update a user's connection config for an MCP server"""
    existing_config = get_user_connection_config(server_id, user_email, db_session)

    if existing_config:
        existing_config.config = config_data  # type: ignore[assignment]
        db_session.flush()  # Don't commit yet, let caller decide when to commit
        return existing_config
    else:
        return create_connection_config(
            config_data=config_data,
            mcp_server_id=server_id,
            user_email=user_email,
            db_session=db_session,
        )


# TODO: do this in one db call
def get_server_auth_template(
    server_id: int, db_session: Session
) -> MCPConnectionConfig | None:
    """Get the authentication template for a server (from the admin connection config)"""
    server = get_mcp_server_by_id(server_id, db_session)
    if not server.admin_connection_config_id:
        return None

    if server.auth_performer == MCPAuthenticationPerformer.ADMIN:
        return None  # admin server implies no template
    return server.admin_connection_config


def delete_connection_config(config_id: int, db_session: Session) -> None:
    """Delete a connection config"""
    config = get_connection_config_by_id(config_id, db_session)
    db_session.delete(config)
    db_session.flush()  # Don't commit yet, let caller decide when to commit


def delete_user_connection_configs_for_server(
    server_id: int, user_email: str, db_session: Session
) -> None:
    """Delete all connection configs for a user on a specific server"""
    configs = db_session.scalars(
        select(MCPConnectionConfig).where(
            and_(
                MCPConnectionConfig.mcp_server_id == server_id,
                MCPConnectionConfig.user_email == user_email,
            )
        )
    ).all()

    for config in configs:
        db_session.delete(config)

    db_session.commit()


def delete_all_user_connection_configs_for_server_no_commit(
    server_id: int, db_session: Session
) -> None:
    """Delete all user connection configs for a specific MCP server"""
    db_session.execute(
        delete(MCPConnectionConfig).where(
            MCPConnectionConfig.mcp_server_id == server_id
        )
    )
    db_session.flush()  # Don't commit yet, let caller decide when to commit


================================================
FILE: backend/onyx/db/memory.py
================================================
from uuid import UUID

from pydantic import BaseModel
from pydantic import ConfigDict
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.models import Memory
from onyx.db.models import User

MAX_MEMORIES_PER_USER = 10


class UserInfo(BaseModel):
    name: str | None = None
    role: str | None = None
    email: str | None = None

    def to_dict(self) -> dict:
        return {
            "name": self.name,
            "role": self.role,
            "email": self.email,
        }


class UserMemoryContext(BaseModel):
    model_config = ConfigDict(frozen=True)

    user_id: UUID | None = None
    user_info: UserInfo
    user_preferences: str | None = None
    memories: tuple[str, ...] = ()

    def without_memories(self) -> "UserMemoryContext":
        """Return a copy with memories cleared but user info/preferences intact."""
        return UserMemoryContext(
            user_id=self.user_id,
            user_info=self.user_info,
            user_preferences=self.user_preferences,
            memories=(),
        )

    def as_formatted_list(self) -> list[str]:
        """Returns combined list of user info, preferences, and memories."""
        result = []
        if self.user_info.name:
            result.append(f"User's name: {self.user_info.name}")
        if self.user_info.role:
            result.append(f"User's role: {self.user_info.role}")
        if self.user_info.email:
            result.append(f"User's email: {self.user_info.email}")
        if self.user_preferences:
            result.append(f"User preferences: {self.user_preferences}")
        result.extend(self.memories)
        return result


def get_memories(user: User, db_session: Session) -> UserMemoryContext:
    user_info = UserInfo(
        name=user.personal_name,
        role=user.personal_role,
        email=user.email,
    )

    user_preferences = None
    if user.user_preferences:
        user_preferences = user.user_preferences

    memory_rows = db_session.scalars(
        select(Memory).where(Memory.user_id == user.id).order_by(Memory.id.asc())
    ).all()
    memories = tuple(memory.memory_text for memory in memory_rows if memory.memory_text)

    return UserMemoryContext(
        user_id=user.id,
        user_info=user_info,
        user_preferences=user_preferences,
        memories=memories,
    )


def add_memory(
    user_id: UUID,
    memory_text: str,
    db_session: Session,
) -> Memory:
    """Insert a new Memory row for the given user.

    If the user already has MAX_MEMORIES_PER_USER memories, the oldest
    one (lowest id) is deleted before inserting the new one.
    """
    existing = db_session.scalars(
        select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())
    ).all()

    if len(existing) >= MAX_MEMORIES_PER_USER:
        db_session.delete(existing[0])

    memory = Memory(
        user_id=user_id,
        memory_text=memory_text,
    )
    db_session.add(memory)
    db_session.commit()
    return memory


def update_memory_at_index(
    user_id: UUID,
    index: int,
    new_text: str,
    db_session: Session,
) -> Memory | None:
    """Update the memory at the given 0-based index (ordered by id ASC, matching get_memories()).

    Returns the updated Memory row, or None if the index is out of range.
    """
    memory_rows = db_session.scalars(
        select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())
    ).all()

    if index < 0 or index >= len(memory_rows):
        return None

    memory = memory_rows[index]
    memory.memory_text = new_text
    db_session.commit()
    return memory


================================================
FILE: backend/onyx/db/models.py
================================================
import datetime
import json
from typing import Any
from typing import Literal
from typing import NotRequired
from uuid import uuid4

from pydantic import BaseModel
from sqlalchemy.orm import validates

from typing_extensions import TypedDict  # noreorder
from uuid import UUID
from pydantic import ValidationError

from sqlalchemy.dialects.postgresql import JSONB as PGJSONB
from sqlalchemy.dialects.postgresql import UUID as PGUUID

from fastapi_users_db_sqlalchemy import SQLAlchemyBaseOAuthAccountTableUUID
from fastapi_users_db_sqlalchemy import SQLAlchemyBaseUserTableUUID
from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID
from fastapi_users_db_sqlalchemy.generics import TIMESTAMPAware
from sqlalchemy import Boolean
from sqlalchemy import DateTime
from sqlalchemy import desc
from sqlalchemy import Enum
from sqlalchemy import Float
from sqlalchemy import ForeignKey
from sqlalchemy import ForeignKeyConstraint
from sqlalchemy import func
from sqlalchemy import Index
from sqlalchemy import Integer
from sqlalchemy import BigInteger

from sqlalchemy import Sequence
from sqlalchemy import String
from sqlalchemy import Text
from sqlalchemy import text
from sqlalchemy import UniqueConstraint
from sqlalchemy.dialects import postgresql
from sqlalchemy import event
from sqlalchemy.engine.interfaces import Dialect
from sqlalchemy.orm import DeclarativeBase
from sqlalchemy.orm import Mapped
from sqlalchemy.orm import Mapper
from sqlalchemy.orm import mapped_column
from sqlalchemy.orm import relationship
from sqlalchemy.types import LargeBinary
from sqlalchemy.types import TypeDecorator
from sqlalchemy import PrimaryKeyConstraint

from onyx.db.enums import AccountType
from onyx.auth.schemas import UserRole
from onyx.configs.constants import (
    ANONYMOUS_USER_UUID,
    DEFAULT_BOOST,
    FederatedConnectorSource,
    MilestoneRecordType,
)
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import MessageType
from onyx.db.enums import (
    AccessType,
    ArtifactType,
    BuildSessionStatus,
    EmbeddingPrecision,
    HierarchyNodeType,
    HookFailStrategy,
    HookPoint,
    IndexingMode,
    OpenSearchDocumentMigrationStatus,
    OpenSearchTenantMigrationStatus,
    ProcessingMode,
    SandboxStatus,
    SyncType,
    SyncStatus,
    MCPAuthenticationType,
    UserFileStatus,
    MCPAuthenticationPerformer,
    MCPTransport,
    MCPServerStatus,
    Permission,
    GrantSource,
    LLMModelFlowType,
    ThemePreference,
    DefaultAppMode,
    SwitchoverType,
    SharingScope,
)
from onyx.configs.constants import NotificationType
from onyx.configs.constants import SearchFeedbackType
from onyx.configs.constants import TokenRateLimitScope
from onyx.connectors.models import InputType
from onyx.db.enums import ChatSessionSharedStatus
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import IndexModelStatus
from onyx.db.enums import PermissionSyncStatus
from onyx.db.enums import TaskStatus
from onyx.db.pydantic_type import PydanticListType, PydanticType
from onyx.kg.models import KGEntityTypeAttributes
from onyx.utils.logger import setup_logger
from onyx.utils.special_types import JSON_ro
from onyx.file_store.models import FileDescriptor
from onyx.llm.override_models import LLMOverride
from onyx.llm.override_models import PromptOverride
from onyx.kg.models import KGStage
from onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig
from onyx.utils.encryption import decrypt_bytes_to_string
from onyx.utils.encryption import encrypt_string_to_bytes
from onyx.utils.sensitive import SensitiveValue
from onyx.utils.headers import HeaderItemDict
from shared_configs.enums import EmbeddingProvider

# TODO: After anonymous user migration has been deployed, make user_id columns NOT NULL
# and update Mapped[User | None] relationships to Mapped[User] where needed.


logger = setup_logger()

PROMPT_LENGTH = 5_000_000


class Base(DeclarativeBase):
    __abstract__ = True


class _EncryptedBase(TypeDecorator):
    """Base for encrypted column types that wrap values in SensitiveValue."""

    impl = LargeBinary
    cache_ok = True
    _is_json: bool = False

    def wrap_raw(self, value: Any) -> SensitiveValue:
        """Encrypt a raw value and wrap it in SensitiveValue.

        Called by the attribute set event so the Python-side type is always
        SensitiveValue, regardless of whether the value was loaded from the DB
        or assigned in application code.
        """
        if self._is_json:
            if not isinstance(value, dict):
                raise TypeError(
                    f"EncryptedJson column expected dict, got {type(value).__name__}"
                )
            raw_str = json.dumps(value)
        else:
            if not isinstance(value, str):
                raise TypeError(
                    f"EncryptedString column expected str, got {type(value).__name__}"
                )
            raw_str = value
        return SensitiveValue(
            encrypted_bytes=encrypt_string_to_bytes(raw_str),
            decrypt_fn=decrypt_bytes_to_string,
            is_json=self._is_json,
        )

    def compare_values(self, x: Any, y: Any) -> bool:
        if x is None or y is None:
            return x == y
        if isinstance(x, SensitiveValue):
            x = x.get_value(apply_mask=False)
        if isinstance(y, SensitiveValue):
            y = y.get_value(apply_mask=False)
        return x == y


class EncryptedString(_EncryptedBase):
    # Must redeclare cache_ok in this child class since we explicitly redeclare _is_json
    cache_ok = True
    _is_json: bool = False

    def process_bind_param(
        self,
        value: str | SensitiveValue[str] | None,
        dialect: Dialect,  # noqa: ARG002
    ) -> bytes | None:
        if value is not None:
            # Handle both raw strings and SensitiveValue wrappers
            if isinstance(value, SensitiveValue):
                # Get raw value for storage
                value = value.get_value(apply_mask=False)
            return encrypt_string_to_bytes(value)
        return value

    def process_result_value(
        self,
        value: bytes | None,
        dialect: Dialect,  # noqa: ARG002
    ) -> SensitiveValue[str] | None:
        if value is not None:
            return SensitiveValue(
                encrypted_bytes=value,
                decrypt_fn=decrypt_bytes_to_string,
                is_json=False,
            )
        return None


class EncryptedJson(_EncryptedBase):
    cache_ok = True
    _is_json: bool = True

    def process_bind_param(
        self,
        value: dict[str, Any] | SensitiveValue[dict[str, Any]] | None,
        dialect: Dialect,  # noqa: ARG002
    ) -> bytes | None:
        if value is not None:
            if isinstance(value, SensitiveValue):
                value = value.get_value(apply_mask=False)
            json_str = json.dumps(value)
            return encrypt_string_to_bytes(json_str)
        return value

    def process_result_value(
        self,
        value: bytes | None,
        dialect: Dialect,  # noqa: ARG002
    ) -> SensitiveValue[dict[str, Any]] | None:
        if value is not None:
            return SensitiveValue(
                encrypted_bytes=value,
                decrypt_fn=decrypt_bytes_to_string,
                is_json=True,
            )
        return None


_REGISTERED_ATTRS: set[str] = set()


@event.listens_for(Mapper, "mapper_configured")
def _register_sensitive_value_set_events(
    mapper: Mapper,
    class_: type,
) -> None:
    """Auto-wrap raw values in SensitiveValue when assigned to encrypted columns."""
    for prop in mapper.column_attrs:
        for col in prop.columns:
            if isinstance(col.type, _EncryptedBase):
                col_type = col.type
                attr = getattr(class_, prop.key)

                # Guard against double-registration (e.g. if mapper is
                # re-configured in test setups)
                attr_key = f"{class_.__qualname__}.{prop.key}"
                if attr_key in _REGISTERED_ATTRS:
                    continue
                _REGISTERED_ATTRS.add(attr_key)

                @event.listens_for(attr, "set", retval=True)
                def _wrap_value(
                    target: Any,  # noqa: ARG001
                    value: Any,
                    oldvalue: Any,  # noqa: ARG001
                    initiator: Any,  # noqa: ARG001
                    _col_type: _EncryptedBase = col_type,
                ) -> Any:
                    if value is not None and not isinstance(value, SensitiveValue):
                        return _col_type.wrap_raw(value)
                    return value


class NullFilteredString(TypeDecorator):
    impl = String
    # This type's behavior is fully deterministic and doesn't depend on any external factors.
    cache_ok = True

    def process_bind_param(
        self,
        value: str | None,
        dialect: Dialect,  # noqa: ARG002
    ) -> str | None:
        if value is not None and "\x00" in value:
            logger.warning(f"NUL characters found in value: {value}")
            return value.replace("\x00", "")
        return value

    def process_result_value(
        self,
        value: str | None,
        dialect: Dialect,  # noqa: ARG002
    ) -> str | None:
        return value


"""
Auth/Authz (users, permissions, access) Tables
"""


class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base):
    # even an almost empty token from keycloak will not fit the default 1024 bytes
    access_token: Mapped[str] = mapped_column(Text, nullable=False)  # type: ignore
    refresh_token: Mapped[str] = mapped_column(Text, nullable=False)  # type: ignore


class User(SQLAlchemyBaseUserTableUUID, Base):
    oauth_accounts: Mapped[list[OAuthAccount]] = relationship(
        "OAuthAccount", lazy="joined", cascade="all, delete-orphan"
    )
    role: Mapped[UserRole] = mapped_column(
        Enum(UserRole, native_enum=False, default=UserRole.BASIC)
    )
    account_type: Mapped[AccountType] = mapped_column(
        Enum(AccountType, native_enum=False),
        nullable=False,
        default=AccountType.STANDARD,
        server_default="STANDARD",
    )

    """
    Preferences probably should be in a separate table at some point, but for now
    putting here for simpicity
    """

    temperature_override_enabled: Mapped[bool | None] = mapped_column(
        Boolean, default=None
    )
    auto_scroll: Mapped[bool | None] = mapped_column(Boolean, default=None)
    shortcut_enabled: Mapped[bool] = mapped_column(Boolean, default=False)
    theme_preference: Mapped[ThemePreference | None] = mapped_column(
        Enum(ThemePreference, native_enum=False),
        nullable=True,
        default=None,
    )
    chat_background: Mapped[str | None] = mapped_column(String, nullable=True)
    default_app_mode: Mapped[DefaultAppMode] = mapped_column(
        Enum(DefaultAppMode, native_enum=False),
        nullable=False,
        default=DefaultAppMode.CHAT,
    )
    # personalization fields are exposed via the chat user settings "Personalization" tab
    personal_name: Mapped[str | None] = mapped_column(String, nullable=True)
    personal_role: Mapped[str | None] = mapped_column(String, nullable=True)
    use_memories: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
    enable_memory_tool: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=True
    )
    user_preferences: Mapped[str | None] = mapped_column(Text, nullable=True)

    chosen_assistants: Mapped[list[int] | None] = mapped_column(
        postgresql.JSONB(), nullable=True, default=None
    )
    visible_assistants: Mapped[list[int]] = mapped_column(
        postgresql.JSONB(), nullable=False, default=[]
    )
    hidden_assistants: Mapped[list[int]] = mapped_column(
        postgresql.JSONB(), nullable=False, default=[]
    )

    pinned_assistants: Mapped[list[int] | None] = mapped_column(
        postgresql.JSONB(), nullable=True, default=None
    )

    effective_permissions: Mapped[list[str]] = mapped_column(
        postgresql.JSONB(),
        nullable=False,
        default=list,
        server_default=text("'[]'::jsonb"),
    )

    oidc_expiry: Mapped[datetime.datetime] = mapped_column(
        TIMESTAMPAware(timezone=True), nullable=True
    )

    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )

    default_model: Mapped[str] = mapped_column(Text, nullable=True)
    # organized in typical structured fashion
    # formatted as `displayName__provider__modelName`

    # Voice preferences
    voice_auto_send: Mapped[bool] = mapped_column(Boolean, default=False)
    voice_auto_playback: Mapped[bool] = mapped_column(Boolean, default=False)
    voice_playback_speed: Mapped[float] = mapped_column(Float, default=1.0)

    # relationships
    credentials: Mapped[list["Credential"]] = relationship(
        "Credential", back_populates="user"
    )
    chat_sessions: Mapped[list["ChatSession"]] = relationship(
        "ChatSession", back_populates="user"
    )

    input_prompts: Mapped[list["InputPrompt"]] = relationship(
        "InputPrompt", back_populates="user"
    )
    # Personas owned by this user
    personas: Mapped[list["Persona"]] = relationship("Persona", back_populates="user")
    # Custom tools created by this user
    custom_tools: Mapped[list["Tool"]] = relationship("Tool", back_populates="user")
    # Notifications for the UI
    notifications: Mapped[list["Notification"]] = relationship(
        "Notification", back_populates="user"
    )
    cc_pairs: Mapped[list["ConnectorCredentialPair"]] = relationship(
        "ConnectorCredentialPair",
        back_populates="creator",
        primaryjoin="User.id == foreign(ConnectorCredentialPair.creator_id)",
    )
    projects: Mapped[list["UserProject"]] = relationship(
        "UserProject", back_populates="user"
    )
    files: Mapped[list["UserFile"]] = relationship("UserFile", back_populates="user")
    # MCP servers accessible to this user
    accessible_mcp_servers: Mapped[list["MCPServer"]] = relationship(
        "MCPServer", secondary="mcp_server__user", back_populates="users"
    )
    memories: Mapped[list["Memory"]] = relationship(
        "Memory",
        back_populates="user",
        cascade="all, delete-orphan",
        order_by="desc(Memory.id)",
    )
    oauth_user_tokens: Mapped[list["OAuthUserToken"]] = relationship(
        "OAuthUserToken",
        back_populates="user",
        cascade="all, delete-orphan",
    )

    @validates("email")
    def validate_email(self, key: str, value: str) -> str:  # noqa: ARG002
        return value.lower() if value else value

    @property
    def password_configured(self) -> bool:
        """
        Returns True if the user has at least one OAuth (or OIDC) account.
        """
        return not bool(self.oauth_accounts)

    @property
    def is_anonymous(self) -> bool:
        """Returns True if this is the anonymous user."""
        return str(self.id) == ANONYMOUS_USER_UUID


class AccessToken(SQLAlchemyBaseAccessTokenTableUUID, Base):
    pass


class Memory(Base):
    __tablename__ = "memory"

    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    user_id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
    )
    memory_text: Mapped[str] = mapped_column(Text, nullable=False)
    conversation_id: Mapped[UUID | None] = mapped_column(
        PGUUID(as_uuid=True), nullable=True
    )
    message_id: Mapped[int | None] = mapped_column(Integer, nullable=True)
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )

    user: Mapped["User"] = relationship("User", back_populates="memories")


class ApiKey(Base):
    __tablename__ = "api_key"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str | None] = mapped_column(String, nullable=True)
    hashed_api_key: Mapped[str] = mapped_column(String, unique=True)
    api_key_display: Mapped[str] = mapped_column(String, unique=True)
    # the ID of the "user" who represents the access credentials for the API key
    user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), nullable=False)
    # the ID of the user who owns the key
    owner_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    # Add this relationship to access the User object via user_id
    user: Mapped["User"] = relationship("User", foreign_keys=[user_id])


class PersonalAccessToken(Base):
    __tablename__ = "personal_access_token"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, nullable=False)  # User-provided label
    hashed_token: Mapped[str] = mapped_column(
        String(64), unique=True, nullable=False
    )  # SHA256 = 64 hex chars
    token_display: Mapped[str] = mapped_column(String, nullable=False)

    user_id: Mapped[UUID] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=False
    )

    expires_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True, index=True
    )  # NULL = no expiration. Revocation sets this to NOW() for immediate expiry.

    # Audit fields
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    last_used_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
    is_revoked: Mapped[bool] = mapped_column(
        Boolean, server_default=text("false"), nullable=False
    )  # True if user explicitly revoked (vs naturally expired)

    user: Mapped["User"] = relationship("User", foreign_keys=[user_id])

    # Indexes for performance
    __table_args__ = (
        Index(
            "ix_pat_user_created", user_id, created_at.desc()
        ),  # Fast user token listing
    )


class Notification(Base):
    __tablename__ = "notification"

    id: Mapped[int] = mapped_column(primary_key=True)
    notif_type: Mapped[NotificationType] = mapped_column(
        Enum(NotificationType, native_enum=False)
    )
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )
    dismissed: Mapped[bool] = mapped_column(Boolean, default=False)
    last_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
    first_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
    title: Mapped[str] = mapped_column(String)
    description: Mapped[str | None] = mapped_column(String, nullable=True)

    user: Mapped[User] = relationship("User", back_populates="notifications")
    additional_data: Mapped[dict | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )

    # Unique constraint ix_notification_user_type_data on (user_id, notif_type, additional_data)
    # ensures notification deduplication for batch inserts. Defined in migration 8405ca81cc83.
    __table_args__ = (
        Index(
            "ix_notification_user_sort",
            "user_id",
            "dismissed",
            desc("first_shown"),
        ),
    )


"""
Association Tables
NOTE: must be at the top since they are referenced by other tables
"""


class Persona__DocumentSet(Base):
    __tablename__ = "persona__document_set"

    persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True)
    document_set_id: Mapped[int] = mapped_column(
        ForeignKey("document_set.id"), primary_key=True
    )


class Persona__User(Base):
    __tablename__ = "persona__user"

    persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True)
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), primary_key=True, nullable=True
    )


class DocumentSet__User(Base):
    __tablename__ = "document_set__user"

    document_set_id: Mapped[int] = mapped_column(
        ForeignKey("document_set.id"), primary_key=True
    )
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), primary_key=True, nullable=True
    )


class DocumentSet__ConnectorCredentialPair(Base):
    __tablename__ = "document_set__connector_credential_pair"

    document_set_id: Mapped[int] = mapped_column(
        ForeignKey("document_set.id"), primary_key=True
    )
    connector_credential_pair_id: Mapped[int] = mapped_column(
        ForeignKey("connector_credential_pair.id"), primary_key=True
    )
    # if `True`, then is part of the current state of the document set
    # if `False`, then is a part of the prior state of the document set
    # rows with `is_current=False` should be deleted when the document
    # set is updated and should not exist for a given document set if
    # `DocumentSet.is_up_to_date == True`
    is_current: Mapped[bool] = mapped_column(
        Boolean,
        nullable=False,
        default=True,
        primary_key=True,
    )

    document_set: Mapped["DocumentSet"] = relationship("DocumentSet")


class ChatMessage__SearchDoc(Base):
    __tablename__ = "chat_message__search_doc"

    chat_message_id: Mapped[int] = mapped_column(
        ForeignKey("chat_message.id", ondelete="CASCADE"), primary_key=True
    )
    search_doc_id: Mapped[int] = mapped_column(
        ForeignKey("search_doc.id", ondelete="CASCADE"), primary_key=True
    )


class ToolCall__SearchDoc(Base):
    __tablename__ = "tool_call__search_doc"

    tool_call_id: Mapped[int] = mapped_column(
        ForeignKey("tool_call.id", ondelete="CASCADE"), primary_key=True
    )
    search_doc_id: Mapped[int] = mapped_column(
        ForeignKey("search_doc.id", ondelete="CASCADE"), primary_key=True
    )


class Document__Tag(Base):
    __tablename__ = "document__tag"

    document_id: Mapped[str] = mapped_column(
        ForeignKey("document.id"), primary_key=True
    )
    tag_id: Mapped[int] = mapped_column(
        ForeignKey("tag.id"), primary_key=True, index=True
    )


class Persona__Tool(Base):
    """An entry in this table represents a tool that is **available** to a persona.
    It does NOT necessarily mean that the tool is actually usable to the persona.

    For example, a persona may have the image generation tool attached to it, even though
    the image generation tool is not set up / enabled. In this case, the tool should not
    show up in the UI for the persona + it should not be usable by the persona in chat.
    """

    __tablename__ = "persona__tool"

    persona_id: Mapped[int] = mapped_column(
        ForeignKey("persona.id", ondelete="CASCADE"), primary_key=True
    )
    tool_id: Mapped[int] = mapped_column(
        ForeignKey("tool.id", ondelete="CASCADE"), primary_key=True
    )


class StandardAnswer__StandardAnswerCategory(Base):
    __tablename__ = "standard_answer__standard_answer_category"

    standard_answer_id: Mapped[int] = mapped_column(
        ForeignKey("standard_answer.id"), primary_key=True
    )
    standard_answer_category_id: Mapped[int] = mapped_column(
        ForeignKey("standard_answer_category.id"), primary_key=True
    )


class SlackChannelConfig__StandardAnswerCategory(Base):
    __tablename__ = "slack_channel_config__standard_answer_category"

    slack_channel_config_id: Mapped[int] = mapped_column(
        ForeignKey("slack_channel_config.id"), primary_key=True
    )
    standard_answer_category_id: Mapped[int] = mapped_column(
        ForeignKey("standard_answer_category.id"), primary_key=True
    )


class ChatMessage__StandardAnswer(Base):
    __tablename__ = "chat_message__standard_answer"

    chat_message_id: Mapped[int] = mapped_column(
        ForeignKey("chat_message.id", ondelete="CASCADE"), primary_key=True
    )
    standard_answer_id: Mapped[int] = mapped_column(
        ForeignKey("standard_answer.id"), primary_key=True
    )


"""
Documents/Indexing Tables
"""


class ConnectorCredentialPair(Base):
    """Connectors and Credentials can have a many-to-many relationship
    I.e. A Confluence Connector may have multiple admin users who can run it with their own credentials
    I.e. An admin user may use the same credential to index multiple Confluence Spaces
    """

    __tablename__ = "connector_credential_pair"
    # NOTE: this `id` column has to use `Sequence` instead of `autoincrement=True`
    # due to some SQLAlchemy quirks + this not being a primary key column
    id: Mapped[int] = mapped_column(
        Integer,
        Sequence("connector_credential_pair_id_seq"),
        unique=True,
        nullable=False,
    )
    name: Mapped[str] = mapped_column(String, nullable=False)
    status: Mapped[ConnectorCredentialPairStatus] = mapped_column(
        Enum(ConnectorCredentialPairStatus, native_enum=False), nullable=False
    )
    # this is separate from the `status` above, since a connector can be `INITIAL_INDEXING`, `ACTIVE`,
    # or `PAUSED` and still be in a repeated error state.
    in_repeated_error_state: Mapped[bool] = mapped_column(Boolean, default=False)
    connector_id: Mapped[int] = mapped_column(
        ForeignKey("connector.id"), primary_key=True
    )

    deletion_failure_message: Mapped[str | None] = mapped_column(String, nullable=True)

    credential_id: Mapped[int] = mapped_column(
        ForeignKey("credential.id"), primary_key=True
    )
    # controls whether the documents indexed by this CC pair are visible to all
    # or if they are only visible to those with that are given explicit access
    # (e.g. via owning the credential or being a part of a group that is given access)
    access_type: Mapped[AccessType] = mapped_column(
        Enum(AccessType, native_enum=False), nullable=False
    )

    # special info needed for the auto-sync feature. The exact structure depends on the

    # source type (defined in the connector's `source` field)
    # E.g. for google_drive perm sync:
    # {"customer_id": "123567", "company_domain": "@onyx.app"}
    auto_sync_options: Mapped[dict[str, Any] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )
    last_time_perm_sync: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
    last_time_external_group_sync: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
    # Time finished, not used for calculating backend jobs which uses time started (created)
    last_successful_index_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )

    # last successful prune
    last_pruned: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True, index=True
    )

    # last successful hierarchy fetch
    last_time_hierarchy_fetch: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    total_docs_indexed: Mapped[int] = mapped_column(Integer, default=0)

    indexing_trigger: Mapped[IndexingMode | None] = mapped_column(
        Enum(IndexingMode, native_enum=False), nullable=True
    )

    # Determines how documents are processed after fetching:
    # REGULAR: Full pipeline (chunk → embed → Vespa)
    # FILE_SYSTEM: Write to file system only (for CLI agent sandbox)
    processing_mode: Mapped[ProcessingMode] = mapped_column(
        Enum(ProcessingMode, native_enum=False),
        nullable=False,
        default=ProcessingMode.REGULAR,
        server_default="REGULAR",
    )

    connector: Mapped["Connector"] = relationship(
        "Connector", back_populates="credentials"
    )
    credential: Mapped["Credential"] = relationship(
        "Credential", back_populates="connectors"
    )
    document_sets: Mapped[list["DocumentSet"]] = relationship(
        "DocumentSet",
        secondary=DocumentSet__ConnectorCredentialPair.__table__,
        primaryjoin=(
            (DocumentSet__ConnectorCredentialPair.connector_credential_pair_id == id)
            & (DocumentSet__ConnectorCredentialPair.is_current.is_(True))
        ),
        back_populates="connector_credential_pairs",
        overlaps="document_set",
    )
    index_attempts: Mapped[list["IndexAttempt"]] = relationship(
        "IndexAttempt", back_populates="connector_credential_pair"
    )

    # the user id of the user that created this cc pair
    creator_id: Mapped[UUID | None] = mapped_column(nullable=True)
    creator: Mapped["User"] = relationship(
        "User",
        back_populates="cc_pairs",
        primaryjoin="foreign(ConnectorCredentialPair.creator_id) == remote(User.id)",
    )

    background_errors: Mapped[list["BackgroundError"]] = relationship(
        "BackgroundError", back_populates="cc_pair", cascade="all, delete-orphan"
    )


class HierarchyNode(Base):
    """
    Represents a structural node in a connected source's hierarchy.
    Examples: folders, drives, spaces, projects, channels.

    Stores hierarchy structure WITH permission information, using the same
    permission model as Documents (external_user_emails, external_user_group_ids,
    is_public). This enables user-scoped hierarchy browsing in the UI.

    Some hierarchy nodes (e.g., Confluence pages) can also be documents.
    In these cases, `document_id` will be set.
    """

    __tablename__ = "hierarchy_node"

    # Primary key - Integer for simplicity
    id: Mapped[int] = mapped_column(Integer, primary_key=True)

    # Raw identifier from the source system
    # e.g., "1h7uWUR2BYZjtMfEXFt43tauj-Gp36DTPtwnsNuA665I" for Google Drive
    # For SOURCE nodes, this is the source name (e.g., "google_drive")
    raw_node_id: Mapped[str] = mapped_column(String, nullable=False)

    # Human-readable name for display
    # e.g., "Engineering", "Q4 Planning", "Google Drive"
    display_name: Mapped[str] = mapped_column(String, nullable=False)

    # Link to view this node in the source system
    link: Mapped[str | None] = mapped_column(NullFilteredString, nullable=True)

    # Source type (google_drive, confluence, etc.)
    source: Mapped[DocumentSource] = mapped_column(
        Enum(DocumentSource, native_enum=False), nullable=False
    )

    # What kind of structural node this is
    node_type: Mapped[HierarchyNodeType] = mapped_column(
        Enum(HierarchyNodeType, native_enum=False), nullable=False
    )

    # ============= PERMISSION FIELDS (same pattern as Document) =============
    # Email addresses of external users with access to this node in the source system
    external_user_emails: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
    # External group IDs with access (prefixed by source type)
    external_user_group_ids: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
    # Whether this node is publicly accessible (org-wide or world-public)
    # SOURCE nodes are always public. Other nodes get this from source permissions.
    is_public: Mapped[bool] = mapped_column(Boolean, default=False)
    # ==========================================================================

    # Foreign keys
    # For hierarchy nodes that are also documents (e.g., Confluence pages)
    # SET NULL when document is deleted - node can exist without its document
    document_id: Mapped[str | None] = mapped_column(
        ForeignKey("document.id", ondelete="SET NULL"), nullable=True
    )

    # Self-referential FK for tree structure
    # SET NULL when parent is deleted - orphan children for cleanup via pruning
    parent_id: Mapped[int | None] = mapped_column(
        ForeignKey("hierarchy_node.id", ondelete="SET NULL"), nullable=True, index=True
    )

    # Relationships
    document: Mapped["Document | None"] = relationship(
        "Document", back_populates="hierarchy_node", foreign_keys=[document_id]
    )
    parent: Mapped["HierarchyNode | None"] = relationship(
        "HierarchyNode", remote_side=[id], back_populates="children"
    )
    children: Mapped[list["HierarchyNode"]] = relationship(
        "HierarchyNode", back_populates="parent", passive_deletes=True
    )
    child_documents: Mapped[list["Document"]] = relationship(
        "Document",
        back_populates="parent_hierarchy_node",
        foreign_keys="Document.parent_hierarchy_node_id",
        passive_deletes=True,
    )
    # Personas that have this hierarchy node attached for scoped search
    personas: Mapped[list["Persona"]] = relationship(
        "Persona",
        secondary="persona__hierarchy_node",
        back_populates="hierarchy_nodes",
        viewonly=True,
    )

    __table_args__ = (
        # Unique constraint: same raw_node_id + source should not exist twice
        UniqueConstraint(
            "raw_node_id", "source", name="uq_hierarchy_node_raw_id_source"
        ),
        Index("ix_hierarchy_node_source_type", source, node_type),
    )


class Document(Base):
    __tablename__ = "document"
    # NOTE: if more sensitive data is added here for display, make sure to add user/group permission

    # this should correspond to the ID of the document
    # (as is passed around in Onyx)
    id: Mapped[str] = mapped_column(NullFilteredString, primary_key=True)
    from_ingestion_api: Mapped[bool] = mapped_column(
        Boolean, default=False, nullable=True
    )
    # 0 for neutral, positive for mostly endorse, negative for mostly reject
    boost: Mapped[int] = mapped_column(Integer, default=DEFAULT_BOOST)
    hidden: Mapped[bool] = mapped_column(Boolean, default=False)
    semantic_id: Mapped[str] = mapped_column(NullFilteredString)
    # First Section's link
    link: Mapped[str | None] = mapped_column(NullFilteredString, nullable=True)

    # The updated time is also used as a measure of the last successful state of the doc
    # pulled from the source (to help skip reindexing already updated docs in case of
    # connector retries)
    # TODO: rename this column because it conflates the time of the source doc
    # with the local last modified time of the doc and any associated metadata
    # it should just be the server timestamp of the source doc
    doc_updated_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    # Number of chunks in the document (in Vespa)
    # Only null for documents indexed prior to this change
    chunk_count: Mapped[int | None] = mapped_column(Integer, nullable=True)

    # last time any vespa relevant row metadata or the doc changed.
    # does not include last_synced
    last_modified: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=False, index=True, default=func.now()
    )

    # last successful sync to vespa
    last_synced: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True, index=True
    )
    # The following are not attached to User because the account/email may not be known
    # within Onyx
    # Something like the document creator
    primary_owners: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
    secondary_owners: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
    # Permission sync columns
    # Email addresses are saved at the document level for externally synced permissions
    # This is becuase the normal flow of assigning permissions is through the cc_pair
    # doesn't apply here
    external_user_emails: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
    # These group ids have been prefixed by the source type
    external_user_group_ids: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
    is_public: Mapped[bool] = mapped_column(Boolean, default=False)

    # Reference to parent hierarchy node (the folder/space containing this doc)
    # If None, document's hierarchy position is unknown or connector doesn't support hierarchy
    # SET NULL when hierarchy node is deleted - document should not be blocked by node deletion
    parent_hierarchy_node_id: Mapped[int | None] = mapped_column(
        ForeignKey("hierarchy_node.id", ondelete="SET NULL"), nullable=True, index=True
    )

    # tables for the knowledge graph data
    kg_stage: Mapped[KGStage] = mapped_column(
        Enum(KGStage, native_enum=False),
        comment="Status of knowledge graph extraction for this document",
        index=True,
    )

    kg_processing_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    retrieval_feedbacks: Mapped[list["DocumentRetrievalFeedback"]] = relationship(
        "DocumentRetrievalFeedback", back_populates="document"
    )

    doc_metadata: Mapped[dict[str, Any] | None] = mapped_column(
        postgresql.JSONB(), nullable=True, default=None
    )
    tags = relationship(
        "Tag",
        secondary=Document__Tag.__table__,
        back_populates="documents",
    )

    # Relationship to parent hierarchy node (the folder/space containing this doc)
    parent_hierarchy_node: Mapped["HierarchyNode | None"] = relationship(
        "HierarchyNode",
        back_populates="child_documents",
        foreign_keys=[parent_hierarchy_node_id],
    )

    # For documents that ARE hierarchy nodes (e.g., Confluence pages with children)
    hierarchy_node: Mapped["HierarchyNode | None"] = relationship(
        "HierarchyNode",
        back_populates="document",
        foreign_keys="HierarchyNode.document_id",
        passive_deletes=True,
    )
    # Personas that have this document directly attached for scoped search
    attached_personas: Mapped[list["Persona"]] = relationship(
        "Persona",
        secondary="persona__document",
        back_populates="attached_documents",
        viewonly=True,
    )

    __table_args__ = (
        Index(
            "ix_document_sync_status",
            last_modified,
            last_synced,
        ),
    )


class OpenSearchDocumentMigrationRecord(Base):
    """Tracks the migration status of documents from Vespa to OpenSearch.

    This table can be dropped when the migration is complete for all Onyx
    instances.
    """

    __tablename__ = "opensearch_document_migration_record"

    document_id: Mapped[str] = mapped_column(
        String,
        ForeignKey("document.id", ondelete="CASCADE"),
        primary_key=True,
        nullable=False,
        index=True,
    )
    status: Mapped[OpenSearchDocumentMigrationStatus] = mapped_column(
        Enum(OpenSearchDocumentMigrationStatus, native_enum=False),
        default=OpenSearchDocumentMigrationStatus.PENDING,
        nullable=False,
        index=True,
    )
    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
    attempts_count: Mapped[int] = mapped_column(
        Integer, default=0, nullable=False, index=True
    )
    last_attempt_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        nullable=False,
        index=True,
    )

    document: Mapped["Document"] = relationship("Document")


class OpenSearchTenantMigrationRecord(Base):
    """Tracks the state of the OpenSearch migration for a tenant.

    Should only contain one row.

    This table can be dropped when the migration is complete for all Onyx
    instances.
    """

    __tablename__ = "opensearch_tenant_migration_record"
    __table_args__ = (
        # Singleton pattern - unique index on constant ensures only one row.
        Index("idx_opensearch_tenant_migration_singleton", text("(true)"), unique=True),
    )

    id: Mapped[int] = mapped_column(primary_key=True, nullable=False)
    document_migration_record_table_population_status: Mapped[
        OpenSearchTenantMigrationStatus
    ] = mapped_column(
        Enum(OpenSearchTenantMigrationStatus, native_enum=False),
        default=OpenSearchTenantMigrationStatus.PENDING,
        nullable=False,
    )
    num_times_observed_no_additional_docs_to_populate_migration_table: Mapped[int] = (
        mapped_column(Integer, default=0, nullable=False)
    )
    overall_document_migration_status: Mapped[OpenSearchTenantMigrationStatus] = (
        mapped_column(
            Enum(OpenSearchTenantMigrationStatus, native_enum=False),
            default=OpenSearchTenantMigrationStatus.PENDING,
            nullable=False,
        )
    )
    num_times_observed_no_additional_docs_to_migrate: Mapped[int] = mapped_column(
        Integer,
        default=0,
        nullable=False,
    )
    last_updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )
    # Opaque continuation token from Vespa's Visit API.
    # NULL means "not started".
    # Otherwise contains a serialized mapping between slice ID and continuation
    # token for that slice.
    vespa_visit_continuation_token: Mapped[str | None] = mapped_column(
        Text, nullable=True
    )
    total_chunks_migrated: Mapped[int] = mapped_column(
        Integer, default=0, nullable=False
    )
    total_chunks_errored: Mapped[int] = mapped_column(
        Integer, default=0, nullable=False
    )
    total_chunks_in_vespa: Mapped[int] = mapped_column(
        Integer, default=0, nullable=False
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        nullable=False,
    )
    migration_completed_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
    enable_opensearch_retrieval: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=False
    )
    approx_chunk_count_in_vespa: Mapped[int | None] = mapped_column(
        Integer, nullable=True
    )


class KGEntityType(Base):
    __tablename__ = "kg_entity_type"

    # Primary identifier
    id_name: Mapped[str] = mapped_column(
        String, primary_key=True, nullable=False, index=True
    )

    description: Mapped[str | None] = mapped_column(NullFilteredString, nullable=True)

    grounding: Mapped[str] = mapped_column(
        NullFilteredString, nullable=False, index=False
    )

    attributes: Mapped[dict | None] = mapped_column(
        postgresql.JSONB,
        nullable=True,
        default=dict,
        server_default="{}",
        comment="Filtering based on document attribute",
    )

    @property
    def parsed_attributes(self) -> KGEntityTypeAttributes:
        if self.attributes is None:
            return KGEntityTypeAttributes()

        try:
            return KGEntityTypeAttributes(**self.attributes)
        except ValidationError:
            return KGEntityTypeAttributes()

    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)

    active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    deep_extraction: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=False
    )

    # Tracking fields
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
    )
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    grounded_source_name: Mapped[str | None] = mapped_column(
        NullFilteredString, nullable=True, index=False
    )

    entity_values: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String), nullable=True, default=None
    )

    clustering: Mapped[dict] = mapped_column(
        postgresql.JSONB,
        nullable=False,
        default=dict,
        server_default="{}",
        comment="Clustering information for this entity type",
    )


class KGRelationshipType(Base):
    __tablename__ = "kg_relationship_type"

    # Primary identifier
    id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        primary_key=True,
        nullable=False,
        index=True,
    )

    name: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)

    source_entity_type_id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    target_entity_type_id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    definition: Mapped[bool] = mapped_column(
        Boolean,
        nullable=False,
        default=False,
        comment="Whether this relationship type represents a definition",
    )

    clustering: Mapped[dict] = mapped_column(
        postgresql.JSONB,
        nullable=False,
        default=dict,
        server_default="{}",
        comment="Clustering information for this relationship type",
    )

    type: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)

    active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)

    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)

    # Tracking fields
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
    )
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    # Relationships to EntityType
    source_type: Mapped["KGEntityType"] = relationship(
        "KGEntityType",
        foreign_keys=[source_entity_type_id_name],
        backref="source_relationship_type",
    )
    target_type: Mapped["KGEntityType"] = relationship(
        "KGEntityType",
        foreign_keys=[target_entity_type_id_name],
        backref="target_relationship_type",
    )


class KGRelationshipTypeExtractionStaging(Base):
    __tablename__ = "kg_relationship_type_extraction_staging"

    # Primary identifier
    id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        primary_key=True,
        nullable=False,
        index=True,
    )

    name: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)

    source_entity_type_id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    target_entity_type_id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    definition: Mapped[bool] = mapped_column(
        Boolean,
        nullable=False,
        default=False,
        comment="Whether this relationship type represents a definition",
    )

    clustering: Mapped[dict] = mapped_column(
        postgresql.JSONB,
        nullable=False,
        default=dict,
        server_default="{}",
        comment="Clustering information for this relationship type",
    )

    type: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)

    active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)

    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)

    transferred: Mapped[bool] = mapped_column(
        Boolean,
        nullable=False,
        default=False,
    )

    # Tracking fields
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    # Relationships to EntityType
    source_type: Mapped["KGEntityType"] = relationship(
        "KGEntityType",
        foreign_keys=[source_entity_type_id_name],
        backref="source_relationship_type_staging",
    )
    target_type: Mapped["KGEntityType"] = relationship(
        "KGEntityType",
        foreign_keys=[target_entity_type_id_name],
        backref="target_relationship_type_staging",
    )


class KGEntity(Base):
    __tablename__ = "kg_entity"

    # Primary identifier
    id_name: Mapped[str] = mapped_column(
        NullFilteredString, primary_key=True, index=True
    )

    # Basic entity information
    name: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)
    entity_key: Mapped[str] = mapped_column(
        NullFilteredString, nullable=True, index=True
    )
    parent_key: Mapped[str | None] = mapped_column(
        NullFilteredString, nullable=True, index=True
    )

    name_trigrams: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String(3)),
        nullable=True,
    )

    attributes: Mapped[dict] = mapped_column(
        postgresql.JSONB,
        nullable=False,
        default=dict,
        server_default="{}",
        comment="Attributes for this entity",
    )

    document_id: Mapped[str | None] = mapped_column(
        NullFilteredString, nullable=True, index=True
    )

    alternative_names: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String), nullable=False, default=list
    )

    # Reference to KGEntityType
    entity_type_id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    # Relationship to KGEntityType
    entity_type: Mapped["KGEntityType"] = relationship("KGEntityType", backref="entity")

    description: Mapped[str | None] = mapped_column(String, nullable=True)

    keywords: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String), nullable=False, default=list
    )

    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)

    # Access control
    acl: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String), nullable=False, default=list
    )

    # Boosts - using JSON for flexibility
    boosts: Mapped[dict] = mapped_column(postgresql.JSONB, nullable=False, default=dict)

    event_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True),
        nullable=True,
        comment="Time of the event being processed",
    )

    # Tracking fields
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
    )
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    __table_args__ = (
        # Fixed column names in indexes
        Index("ix_entity_type_acl", entity_type_id_name, acl),
        Index("ix_entity_name_search", name, entity_type_id_name),
    )


class KGEntityExtractionStaging(Base):
    __tablename__ = "kg_entity_extraction_staging"

    # Primary identifier
    id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        primary_key=True,
        nullable=False,
        index=True,
    )

    # Basic entity information
    name: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)

    attributes: Mapped[dict] = mapped_column(
        postgresql.JSONB,
        nullable=False,
        default=dict,
        server_default="{}",
        comment="Attributes for this entity",
    )

    document_id: Mapped[str | None] = mapped_column(
        NullFilteredString, nullable=True, index=True
    )

    alternative_names: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String), nullable=False, default=list
    )

    # Reference to KGEntityType
    entity_type_id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    # Relationship to KGEntityType
    entity_type: Mapped["KGEntityType"] = relationship(
        "KGEntityType", backref="entity_staging"
    )

    description: Mapped[str | None] = mapped_column(String, nullable=True)

    keywords: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String), nullable=False, default=list
    )

    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)

    # Access control
    acl: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String), nullable=False, default=list
    )

    # Boosts - using JSON for flexibility
    boosts: Mapped[dict] = mapped_column(postgresql.JSONB, nullable=False, default=dict)

    transferred_id_name: Mapped[str | None] = mapped_column(
        NullFilteredString,
        nullable=True,
    )

    # Parent Child Information
    entity_key: Mapped[str] = mapped_column(
        NullFilteredString, nullable=True, index=True
    )
    parent_key: Mapped[str | None] = mapped_column(
        NullFilteredString, nullable=True, index=True
    )

    event_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True),
        nullable=True,
        comment="Time of the event being processed",
    )

    # Tracking fields
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    __table_args__ = (
        # Fixed column names in indexes
        Index("ix_entity_type_acl", entity_type_id_name, acl),
        Index("ix_entity_name_search", name, entity_type_id_name),
    )


class KGRelationship(Base):
    __tablename__ = "kg_relationship"

    # Primary identifier - now part of composite key
    id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        nullable=False,
        index=True,
    )

    source_document: Mapped[str | None] = mapped_column(
        NullFilteredString, ForeignKey("document.id"), nullable=True, index=True
    )

    # Source and target nodes (foreign keys to Entity table)
    source_node: Mapped[str] = mapped_column(
        NullFilteredString, ForeignKey("kg_entity.id_name"), nullable=False, index=True
    )

    target_node: Mapped[str] = mapped_column(
        NullFilteredString, ForeignKey("kg_entity.id_name"), nullable=False, index=True
    )

    source_node_type: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    target_node_type: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    # Relationship type
    type: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)

    # Add new relationship type reference
    relationship_type_id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_relationship_type.id_name"),
        nullable=False,
        index=True,
    )

    # Add the SQLAlchemy relationship property
    relationship_type: Mapped["KGRelationshipType"] = relationship(
        "KGRelationshipType", backref="relationship"
    )

    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)

    # Tracking fields
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
    )
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    # Relationships to Entity table
    source: Mapped["KGEntity"] = relationship("KGEntity", foreign_keys=[source_node])
    target: Mapped["KGEntity"] = relationship("KGEntity", foreign_keys=[target_node])
    document: Mapped["Document"] = relationship(
        "Document", foreign_keys=[source_document]
    )

    __table_args__ = (
        # Composite primary key
        PrimaryKeyConstraint("id_name", "source_document"),
        # Index for querying relationships by type
        Index("ix_kg_relationship_type", type),
        # Composite index for source/target queries
        Index("ix_kg_relationship_nodes", source_node, target_node),
        # Ensure unique relationships between nodes of a specific type
        UniqueConstraint(
            "source_node",
            "target_node",
            "type",
            name="uq_kg_relationship_source_target_type",
        ),
    )


class KGRelationshipExtractionStaging(Base):
    __tablename__ = "kg_relationship_extraction_staging"

    # Primary identifier - now part of composite key
    id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        nullable=False,
        index=True,
    )

    source_document: Mapped[str | None] = mapped_column(
        NullFilteredString, ForeignKey("document.id"), nullable=True, index=True
    )

    # Source and target nodes (foreign keys to Entity table)
    source_node: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_extraction_staging.id_name"),
        nullable=False,
        index=True,
    )

    target_node: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_extraction_staging.id_name"),
        nullable=False,
        index=True,
    )

    source_node_type: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    target_node_type: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_entity_type.id_name"),
        nullable=False,
        index=True,
    )

    # Relationship type
    type: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)

    # Add new relationship type reference
    relationship_type_id_name: Mapped[str] = mapped_column(
        NullFilteredString,
        ForeignKey("kg_relationship_type_extraction_staging.id_name"),
        nullable=False,
        index=True,
    )

    # Add the SQLAlchemy relationship property
    relationship_type: Mapped["KGRelationshipTypeExtractionStaging"] = relationship(
        "KGRelationshipTypeExtractionStaging", backref="relationship_staging"
    )

    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)

    transferred: Mapped[bool] = mapped_column(
        Boolean,
        nullable=False,
        default=False,
    )

    # Tracking fields
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    # Relationships to Entity table
    source: Mapped["KGEntityExtractionStaging"] = relationship(
        "KGEntityExtractionStaging", foreign_keys=[source_node]
    )
    target: Mapped["KGEntityExtractionStaging"] = relationship(
        "KGEntityExtractionStaging", foreign_keys=[target_node]
    )
    document: Mapped["Document"] = relationship(
        "Document", foreign_keys=[source_document]
    )

    __table_args__ = (
        # Composite primary key
        PrimaryKeyConstraint("id_name", "source_document"),
        # Index for querying relationships by type
        Index("ix_kg_relationship_type", type),
        # Composite index for source/target queries
        Index("ix_kg_relationship_nodes", source_node, target_node),
        # Ensure unique relationships between nodes of a specific type
        UniqueConstraint(
            "source_node",
            "target_node",
            "type",
            name="uq_kg_relationship_source_target_type",
        ),
    )


class KGTerm(Base):
    __tablename__ = "kg_term"

    # Make id_term the primary key
    id_term: Mapped[str] = mapped_column(
        NullFilteredString, primary_key=True, nullable=False, index=True
    )

    # List of entity types this term applies to
    entity_types: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String), nullable=False, default=list
    )

    # Tracking fields
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
    )
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    __table_args__ = (
        # Index for searching terms with specific entity types
        Index("ix_search_term_entities", entity_types),
        # Index for term lookups
        Index("ix_search_term_term", id_term),
    )


class ChunkStats(Base):
    __tablename__ = "chunk_stats"
    # NOTE: if more sensitive data is added here for display, make sure to add user/group permission

    # this should correspond to the ID of the document
    # (as is passed around in Onyx)x
    id: Mapped[str] = mapped_column(
        NullFilteredString,
        primary_key=True,
        default=lambda context: (
            f"{context.get_current_parameters()['document_id']}__{context.get_current_parameters()['chunk_in_doc_id']}"
        ),
        index=True,
    )

    # Reference to parent document
    document_id: Mapped[str] = mapped_column(
        NullFilteredString, ForeignKey("document.id"), nullable=False, index=True
    )

    chunk_in_doc_id: Mapped[int] = mapped_column(
        Integer,
        nullable=False,
    )

    information_content_boost: Mapped[float | None] = mapped_column(
        Float, nullable=True
    )

    last_modified: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=False, index=True, default=func.now()
    )
    last_synced: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True, index=True
    )

    __table_args__ = (
        Index(
            "ix_chunk_sync_status",
            last_modified,
            last_synced,
        ),
        UniqueConstraint(
            "document_id", "chunk_in_doc_id", name="uq_chunk_stats_doc_chunk"
        ),
    )


class Tag(Base):
    __tablename__ = "tag"

    id: Mapped[int] = mapped_column(primary_key=True)
    tag_key: Mapped[str] = mapped_column(String)
    tag_value: Mapped[str] = mapped_column(String)
    source: Mapped[DocumentSource] = mapped_column(
        Enum(DocumentSource, native_enum=False)
    )
    is_list: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    documents = relationship(
        "Document",
        secondary=Document__Tag.__table__,
        back_populates="tags",
    )

    __table_args__ = (
        UniqueConstraint(
            "tag_key",
            "tag_value",
            "source",
            "is_list",
            name="_tag_key_value_source_list_uc",
        ),
    )


class Connector(Base):
    __tablename__ = "connector"

    id: Mapped[int] = mapped_column(primary_key=True)
    name: Mapped[str] = mapped_column(String)
    source: Mapped[DocumentSource] = mapped_column(
        Enum(DocumentSource, native_enum=False)
    )
    input_type = mapped_column(Enum(InputType, native_enum=False))
    connector_specific_config: Mapped[dict[str, Any]] = mapped_column(
        postgresql.JSONB()
    )
    indexing_start: Mapped[datetime.datetime | None] = mapped_column(
        DateTime, nullable=True
    )

    kg_processing_enabled: Mapped[bool] = mapped_column(
        Boolean,
        nullable=False,
        default=False,
        comment="Whether this connector should extract knowledge graph entities",
    )

    kg_coverage_days: Mapped[int | None] = mapped_column(Integer, nullable=True)

    refresh_freq: Mapped[int | None] = mapped_column(Integer, nullable=True)
    prune_freq: Mapped[int | None] = mapped_column(Integer, nullable=True)
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )

    credentials: Mapped[list["ConnectorCredentialPair"]] = relationship(
        "ConnectorCredentialPair",
        back_populates="connector",
        cascade="all, delete-orphan",
    )
    documents_by_connector: Mapped[list["DocumentByConnectorCredentialPair"]] = (
        relationship(
            "DocumentByConnectorCredentialPair",
            back_populates="connector",
            passive_deletes=True,
        )
    )

    # synchronize this validation logic with RefreshFrequencySchema etc on front end
    # until we have a centralized validation schema

    # TODO(rkuo): experiment with SQLAlchemy validators rather than manual checks
    # https://docs.sqlalchemy.org/en/20/orm/mapped_attributes.html
    def validate_refresh_freq(self) -> None:
        if self.refresh_freq is not None:
            if self.refresh_freq < 60:
                raise ValueError(
                    "refresh_freq must be greater than or equal to 1 minute."
                )

    def validate_prune_freq(self) -> None:
        if self.prune_freq is not None:
            if self.prune_freq < 300:
                raise ValueError(
                    "prune_freq must be greater than or equal to 5 minutes."
                )


class Credential(Base):
    __tablename__ = "credential"

    name: Mapped[str] = mapped_column(String, nullable=True)

    source: Mapped[DocumentSource] = mapped_column(
        Enum(DocumentSource, native_enum=False)
    )

    id: Mapped[int] = mapped_column(primary_key=True)
    credential_json: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(
        EncryptedJson()
    )
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )
    # if `true`, then all Admins will have access to the credential
    admin_public: Mapped[bool] = mapped_column(Boolean, default=True)
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )

    curator_public: Mapped[bool] = mapped_column(Boolean, default=False)

    connectors: Mapped[list["ConnectorCredentialPair"]] = relationship(
        "ConnectorCredentialPair",
        back_populates="credential",
        cascade="all, delete-orphan",
    )
    documents_by_credential: Mapped[list["DocumentByConnectorCredentialPair"]] = (
        relationship(
            "DocumentByConnectorCredentialPair",
            back_populates="credential",
            passive_deletes=True,
        )
    )

    user: Mapped[User | None] = relationship("User", back_populates="credentials")


class FederatedConnector(Base):
    __tablename__ = "federated_connector"

    id: Mapped[int] = mapped_column(primary_key=True)
    source: Mapped[FederatedConnectorSource] = mapped_column(
        Enum(FederatedConnectorSource, native_enum=False)
    )
    credentials: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(
        EncryptedJson(), nullable=False
    )
    config: Mapped[dict[str, Any]] = mapped_column(
        postgresql.JSONB(), default=dict, nullable=False, server_default="{}"
    )

    oauth_tokens: Mapped[list["FederatedConnectorOAuthToken"]] = relationship(
        "FederatedConnectorOAuthToken",
        back_populates="federated_connector",
        cascade="all, delete-orphan",
    )
    document_sets: Mapped[list["FederatedConnector__DocumentSet"]] = relationship(
        "FederatedConnector__DocumentSet",
        back_populates="federated_connector",
        cascade="all, delete-orphan",
    )


class FederatedConnectorOAuthToken(Base):
    __tablename__ = "federated_connector_oauth_token"

    id: Mapped[int] = mapped_column(primary_key=True)
    federated_connector_id: Mapped[int] = mapped_column(
        ForeignKey("federated_connector.id", ondelete="CASCADE"), nullable=False
    )
    user_id: Mapped[UUID] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=False
    )
    token: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=False
    )
    expires_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime, nullable=True
    )

    federated_connector: Mapped["FederatedConnector"] = relationship(
        "FederatedConnector", back_populates="oauth_tokens"
    )
    user: Mapped["User"] = relationship("User")


class FederatedConnector__DocumentSet(Base):
    __tablename__ = "federated_connector__document_set"

    id: Mapped[int] = mapped_column(primary_key=True)
    federated_connector_id: Mapped[int] = mapped_column(
        ForeignKey("federated_connector.id", ondelete="CASCADE"), nullable=False
    )
    document_set_id: Mapped[int] = mapped_column(
        ForeignKey("document_set.id", ondelete="CASCADE"), nullable=False
    )
    # unique per source type. Validated before insertion.
    entities: Mapped[dict[str, Any]] = mapped_column(postgresql.JSONB(), nullable=False)

    federated_connector: Mapped["FederatedConnector"] = relationship(
        "FederatedConnector", back_populates="document_sets"
    )
    document_set: Mapped["DocumentSet"] = relationship(
        "DocumentSet", back_populates="federated_connectors"
    )

    __table_args__ = (
        UniqueConstraint(
            "federated_connector_id",
            "document_set_id",
            name="uq_federated_connector_document_set",
        ),
    )


class SearchSettings(Base):
    __tablename__ = "search_settings"

    id: Mapped[int] = mapped_column(primary_key=True)
    model_name: Mapped[str] = mapped_column(String)
    model_dim: Mapped[int] = mapped_column(Integer)
    normalize: Mapped[bool] = mapped_column(Boolean)
    query_prefix: Mapped[str | None] = mapped_column(String, nullable=True)
    passage_prefix: Mapped[str | None] = mapped_column(String, nullable=True)

    status: Mapped[IndexModelStatus] = mapped_column(
        Enum(IndexModelStatus, native_enum=False)
    )
    index_name: Mapped[str] = mapped_column(String)
    provider_type: Mapped[EmbeddingProvider | None] = mapped_column(
        ForeignKey("embedding_provider.provider_type"), nullable=True
    )

    # Type of switchover to perform when switching embedding models
    # REINDEX: waits for all connectors to complete
    # ACTIVE_ONLY: waits for only non-paused connectors to complete
    # INSTANT: swaps immediately without waiting
    switchover_type: Mapped[SwitchoverType] = mapped_column(
        Enum(SwitchoverType, native_enum=False), default=SwitchoverType.REINDEX
    )

    # allows for quantization -> less memory usage for a small performance hit
    embedding_precision: Mapped[EmbeddingPrecision] = mapped_column(
        Enum(EmbeddingPrecision, native_enum=False)
    )

    # can be used to reduce dimensionality of vectors and save memory with
    # a small performance hit. More details in the `Reducing embedding dimensions`
    # section here:
    # https://platform.openai.com/docs/guides/embeddings#embedding-models
    # If not specified, will just use the model_dim without any reduction.
    # NOTE: this is only currently available for OpenAI models
    reduced_dimension: Mapped[int | None] = mapped_column(Integer, nullable=True)

    # Mini and Large Chunks (large chunk also checks for model max context)
    multipass_indexing: Mapped[bool] = mapped_column(Boolean, default=True)

    # Contextual RAG
    enable_contextual_rag: Mapped[bool] = mapped_column(Boolean, default=False)

    # Contextual RAG LLM
    contextual_rag_llm_name: Mapped[str | None] = mapped_column(String, nullable=True)
    contextual_rag_llm_provider: Mapped[str | None] = mapped_column(
        String, nullable=True
    )

    multilingual_expansion: Mapped[list[str]] = mapped_column(
        postgresql.ARRAY(String), default=[]
    )

    cloud_provider: Mapped["CloudEmbeddingProvider"] = relationship(
        "CloudEmbeddingProvider",
        back_populates="search_settings",
        foreign_keys=[provider_type],
    )

    index_attempts: Mapped[list["IndexAttempt"]] = relationship(
        "IndexAttempt", back_populates="search_settings"
    )

    __table_args__ = (
        Index(
            "ix_embedding_model_present_unique",
            "status",
            unique=True,
            postgresql_where=(status == IndexModelStatus.PRESENT),
        ),
        Index(
            "ix_embedding_model_future_unique",
            "status",
            unique=True,
            postgresql_where=(status == IndexModelStatus.FUTURE),
        ),
    )

    def __repr__(self) -> str:
        return f"<EmbeddingModel(model_name='{self.model_name}', status='{self.status}',\
          cloud_provider='{self.cloud_provider.provider_type if self.cloud_provider else 'None'}')>"

    @property
    def api_version(self) -> str | None:
        return (
            self.cloud_provider.api_version if self.cloud_provider is not None else None
        )

    @property
    def deployment_name(self) -> str | None:
        return (
            self.cloud_provider.deployment_name
            if self.cloud_provider is not None
            else None
        )

    @property
    def api_url(self) -> str | None:
        return self.cloud_provider.api_url if self.cloud_provider is not None else None

    @property
    def api_key(self) -> str | None:
        if self.cloud_provider is None or self.cloud_provider.api_key is None:
            return None
        return self.cloud_provider.api_key.get_value(apply_mask=False)

    @property
    def large_chunks_enabled(self) -> bool:
        """
        Given multipass usage and an embedder, decides whether large chunks are allowed
        based on model/provider constraints.
        """
        # Only local models that support a larger context are from Nomic
        # Cohere does not support larger contexts (they recommend not going above ~512 tokens)
        return SearchSettings.can_use_large_chunks(
            self.multipass_indexing, self.model_name, self.provider_type
        )

    @property
    def final_embedding_dim(self) -> int:
        return self.reduced_dimension or self.model_dim

    @staticmethod
    def can_use_large_chunks(
        multipass: bool, model_name: str, provider_type: EmbeddingProvider | None
    ) -> bool:
        """
        Given multipass usage and an embedder, decides whether large chunks are allowed
        based on model/provider constraints.
        """
        # Only local models that support a larger context are from Nomic
        # Cohere does not support larger contexts (they recommend not going above ~512 tokens)
        return (
            multipass
            and model_name.startswith("nomic-ai")
            and provider_type != EmbeddingProvider.COHERE
        )


class IndexAttempt(Base):
    """
    Represents an attempt to index a group of 0 or more documents from a
    source. For example, a single pull from Google Drive, a single event from
    slack event API, or a single website crawl.
    """

    __tablename__ = "index_attempt"

    id: Mapped[int] = mapped_column(primary_key=True)

    connector_credential_pair_id: Mapped[int] = mapped_column(
        ForeignKey("connector_credential_pair.id"),
        nullable=False,
    )

    # Some index attempts that run from beginning will still have this as False
    # This is only for attempts that are explicitly marked as from the start via
    # the run once API
    from_beginning: Mapped[bool] = mapped_column(Boolean)
    status: Mapped[IndexingStatus] = mapped_column(
        Enum(IndexingStatus, native_enum=False, index=True)
    )
    # The two below may be slightly out of sync if user switches Embedding Model
    new_docs_indexed: Mapped[int | None] = mapped_column(Integer, default=0)
    total_docs_indexed: Mapped[int | None] = mapped_column(Integer, default=0)
    docs_removed_from_index: Mapped[int | None] = mapped_column(Integer, default=0)
    # only filled if status = "failed"
    error_msg: Mapped[str | None] = mapped_column(Text, default=None)
    # only filled if status = "failed" AND an unhandled exception caused the failure
    full_exception_trace: Mapped[str | None] = mapped_column(Text, default=None)
    # Nullable because in the past, we didn't allow swapping out embedding models live
    search_settings_id: Mapped[int] = mapped_column(
        ForeignKey("search_settings.id", ondelete="SET NULL"),
        nullable=True,
    )

    # for polling connectors, the start and end time of the poll window
    # will be set when the index attempt starts
    poll_range_start: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True, default=None
    )
    poll_range_end: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True, default=None
    )

    # Points to the last checkpoint that was saved for this run. The pointer here
    # can be taken to the FileStore to grab the actual checkpoint value
    checkpoint_pointer: Mapped[str | None] = mapped_column(String, nullable=True)

    # Database-based coordination fields (replacing Redis fencing)
    celery_task_id: Mapped[str | None] = mapped_column(String, nullable=True)
    cancellation_requested: Mapped[bool] = mapped_column(Boolean, default=False)

    # Batch coordination fields
    # Once this is set, docfetching has completed
    total_batches: Mapped[int | None] = mapped_column(Integer, nullable=True)
    # batches that are fully indexed (i.e. have completed docfetching and docprocessing)
    completed_batches: Mapped[int] = mapped_column(Integer, default=0)
    # TODO: unused, remove this column
    total_failures_batch_level: Mapped[int] = mapped_column(Integer, default=0)
    total_chunks: Mapped[int] = mapped_column(Integer, default=0)

    # Progress tracking for stall detection
    last_progress_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
    last_batches_completed_count: Mapped[int] = mapped_column(Integer, default=0)

    # Heartbeat tracking for worker liveness detection
    heartbeat_counter: Mapped[int] = mapped_column(Integer, default=0)
    last_heartbeat_value: Mapped[int] = mapped_column(Integer, default=0)
    last_heartbeat_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        index=True,
    )
    # when the actual indexing run began
    # NOTE: will use the api_server clock rather than DB server clock
    time_started: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
    )

    connector_credential_pair: Mapped[ConnectorCredentialPair] = relationship(
        "ConnectorCredentialPair", back_populates="index_attempts"
    )

    search_settings: Mapped[SearchSettings | None] = relationship(
        "SearchSettings", back_populates="index_attempts"
    )

    error_rows = relationship(
        "IndexAttemptError",
        back_populates="index_attempt",
        cascade="all, delete-orphan",
    )

    __table_args__ = (
        Index(
            "ix_index_attempt_latest_for_connector_credential_pair",
            "connector_credential_pair_id",
            "time_created",
        ),
        Index(
            "ix_index_attempt_ccpair_search_settings_time_updated",
            "connector_credential_pair_id",
            "search_settings_id",
            desc("time_updated"),
            unique=False,
        ),
        Index(
            "ix_index_attempt_cc_pair_settings_poll",
            "connector_credential_pair_id",
            "search_settings_id",
            "status",
            desc("time_updated"),
        ),
        # NEW: Index for coordination queries
        Index(
            "ix_index_attempt_active_coordination",
            "connector_credential_pair_id",
            "search_settings_id",
            "status",
        ),
    )

    def __repr__(self) -> str:
        return (
            f"<IndexAttempt(id={self.id!r}, "
            f"status={self.status!r}, "
            f"error_msg={self.error_msg!r})>"
            f"time_created={self.time_created!r}, "
            f"time_updated={self.time_updated!r}, "
        )

    def is_finished(self) -> bool:
        return self.status.is_terminal()

    def is_coordination_complete(self) -> bool:
        """Check if all batches have been processed"""
        return (
            self.total_batches is not None
            and self.completed_batches >= self.total_batches
        )


class HierarchyFetchAttempt(Base):
    """Tracks attempts to fetch hierarchy nodes from a source"""

    __tablename__ = "hierarchy_fetch_attempt"

    id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), primary_key=True, default=uuid4
    )

    connector_credential_pair_id: Mapped[int] = mapped_column(
        ForeignKey("connector_credential_pair.id", ondelete="CASCADE"),
        nullable=False,
    )

    status: Mapped[IndexingStatus] = mapped_column(
        Enum(IndexingStatus, native_enum=False), nullable=False, index=True
    )

    # Statistics
    nodes_fetched: Mapped[int | None] = mapped_column(Integer, default=0)
    nodes_updated: Mapped[int | None] = mapped_column(Integer, default=0)

    # Error information (only filled if status = "failed")
    error_msg: Mapped[str | None] = mapped_column(Text, default=None)
    full_exception_trace: Mapped[str | None] = mapped_column(Text, default=None)

    # Timestamps
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        index=True,
    )
    time_started: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
    )

    # Relationships
    connector_credential_pair: Mapped["ConnectorCredentialPair"] = relationship(
        "ConnectorCredentialPair"
    )

    __table_args__ = (
        Index(
            "ix_hierarchy_fetch_attempt_cc_pair",
            connector_credential_pair_id,
        ),
    )


class IndexAttemptError(Base):
    __tablename__ = "index_attempt_errors"

    id: Mapped[int] = mapped_column(primary_key=True)

    index_attempt_id: Mapped[int] = mapped_column(
        ForeignKey("index_attempt.id"),
        nullable=False,
    )
    connector_credential_pair_id: Mapped[int] = mapped_column(
        ForeignKey("connector_credential_pair.id"),
        nullable=False,
    )

    document_id: Mapped[str | None] = mapped_column(String, nullable=True)
    document_link: Mapped[str | None] = mapped_column(String, nullable=True)

    entity_id: Mapped[str | None] = mapped_column(String, nullable=True)
    failed_time_range_start: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
    failed_time_range_end: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    failure_message: Mapped[str] = mapped_column(Text)
    is_resolved: Mapped[bool] = mapped_column(Boolean, default=False)

    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
    )

    # This is the reverse side of the relationship
    index_attempt = relationship("IndexAttempt", back_populates="error_rows")


class SyncRecord(Base):
    """
    Represents the status of a "sync" operation (e.g. document set, user group, deletion).

    A "sync" operation is an operation which needs to update a set of documents within
    Vespa, usually to match the state of Postgres.
    """

    __tablename__ = "sync_record"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    # document set id, user group id, or deletion id
    entity_id: Mapped[int] = mapped_column(Integer)

    sync_type: Mapped[SyncType] = mapped_column(Enum(SyncType, native_enum=False))
    sync_status: Mapped[SyncStatus] = mapped_column(Enum(SyncStatus, native_enum=False))

    num_docs_synced: Mapped[int] = mapped_column(Integer, default=0)

    sync_start_time: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
    sync_end_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    __table_args__ = (
        Index(
            "ix_sync_record_entity_id_sync_type_sync_start_time",
            "entity_id",
            "sync_type",
            "sync_start_time",
        ),
        Index(
            "ix_sync_record_entity_id_sync_type_sync_status",
            "entity_id",
            "sync_type",
            "sync_status",
        ),
    )


class HierarchyNodeByConnectorCredentialPair(Base):
    """Tracks which cc_pairs reference each hierarchy node.

    During pruning, stale entries are removed for the current cc_pair.
    Hierarchy nodes with zero remaining entries are then deleted.
    """

    __tablename__ = "hierarchy_node_by_connector_credential_pair"

    hierarchy_node_id: Mapped[int] = mapped_column(
        ForeignKey("hierarchy_node.id", ondelete="CASCADE"), primary_key=True
    )
    connector_id: Mapped[int] = mapped_column(primary_key=True)
    credential_id: Mapped[int] = mapped_column(primary_key=True)

    __table_args__ = (
        ForeignKeyConstraint(
            ["connector_id", "credential_id"],
            [
                "connector_credential_pair.connector_id",
                "connector_credential_pair.credential_id",
            ],
            ondelete="CASCADE",
        ),
        Index(
            "ix_hierarchy_node_cc_pair_connector_credential",
            "connector_id",
            "credential_id",
        ),
    )


class DocumentByConnectorCredentialPair(Base):
    """Represents an indexing of a document by a specific connector / credential pair"""

    __tablename__ = "document_by_connector_credential_pair"

    id: Mapped[str] = mapped_column(ForeignKey("document.id"), primary_key=True)
    # TODO: transition this to use the ConnectorCredentialPair id directly
    connector_id: Mapped[int] = mapped_column(
        ForeignKey("connector.id", ondelete="CASCADE"), primary_key=True
    )
    credential_id: Mapped[int] = mapped_column(
        ForeignKey("credential.id", ondelete="CASCADE"), primary_key=True
    )

    # used to better keep track of document counts at a connector level
    # e.g. if a document is added as part of permission syncing, it should
    # not be counted as part of the connector's document count until
    # the actual indexing is complete
    has_been_indexed: Mapped[bool] = mapped_column(Boolean)

    connector: Mapped[Connector] = relationship(
        "Connector", back_populates="documents_by_connector", passive_deletes=True
    )
    credential: Mapped[Credential] = relationship(
        "Credential", back_populates="documents_by_credential", passive_deletes=True
    )

    __table_args__ = (
        Index(
            "idx_document_cc_pair_connector_credential",
            "connector_id",
            "credential_id",
            unique=False,
        ),
        # Index to optimize get_document_counts_for_cc_pairs query pattern
        Index(
            "idx_document_cc_pair_counts",
            "connector_id",
            "credential_id",
            "has_been_indexed",
            unique=False,
        ),
    )


"""
Messages Tables
"""


class ChatSession(Base):
    __tablename__ = "chat_session"

    id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), primary_key=True, default=uuid4
    )
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )
    persona_id: Mapped[int | None] = mapped_column(
        ForeignKey("persona.id"), nullable=True
    )
    description: Mapped[str | None] = mapped_column(Text, nullable=True)
    # This chat created by OnyxBot
    onyxbot_flow: Mapped[bool] = mapped_column(Boolean, default=False)
    # Only ever set to True if system is set to not hard-delete chats
    deleted: Mapped[bool] = mapped_column(Boolean, default=False)
    # controls whether or not this conversation is viewable by others
    shared_status: Mapped[ChatSessionSharedStatus] = mapped_column(
        Enum(ChatSessionSharedStatus, native_enum=False),
        default=ChatSessionSharedStatus.PRIVATE,
    )

    current_alternate_model: Mapped[str | None] = mapped_column(String, default=None)

    slack_thread_id: Mapped[str | None] = mapped_column(
        String, nullable=True, default=None
    )

    project_id: Mapped[int | None] = mapped_column(
        ForeignKey("user_project.id"), nullable=True
    )

    project: Mapped["UserProject"] = relationship(
        "UserProject", back_populates="chat_sessions", foreign_keys=[project_id]
    )

    # the latest "overrides" specified by the user. These take precedence over
    # the attached persona. However, overrides specified directly in the
    # `send-message` call will take precedence over these.
    # NOTE: currently only used by the chat seeding flow, will be used in the
    # future once we allow users to override default values via the Chat UI
    # itself
    llm_override: Mapped[LLMOverride | None] = mapped_column(
        PydanticType(LLMOverride), nullable=True
    )

    # The latest temperature override specified by the user
    temperature_override: Mapped[float | None] = mapped_column(Float, nullable=True)

    prompt_override: Mapped[PromptOverride | None] = mapped_column(
        PydanticType(PromptOverride), nullable=True
    )
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
    )
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    user: Mapped[User] = relationship("User", back_populates="chat_sessions")
    messages: Mapped[list["ChatMessage"]] = relationship(
        "ChatMessage",
        back_populates="chat_session",
        cascade="all, delete-orphan",
        foreign_keys="ChatMessage.chat_session_id",
    )
    persona: Mapped["Persona"] = relationship("Persona")


class ChatMessage(Base):
    """Note, the first message in a chain has no contents, it's a workaround to allow edits
    on the first message of a session, an empty root node basically

    Since every user message is followed by a LLM response, chat messages generally come in pairs.
    Keeping them as separate messages however for future Agentification extensions
    Fields will be largely duplicated in the pair.
    """

    __tablename__ = "chat_message"

    id: Mapped[int] = mapped_column(primary_key=True)

    # Where is this message located
    chat_session_id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), ForeignKey("chat_session.id")
    )

    # Parent message pointer for the tree structure, nullable because the first message is
    # an empty root node to allow edits on the first message of a session.
    parent_message_id: Mapped[int | None] = mapped_column(
        ForeignKey("chat_message.id"), nullable=True
    )
    # This only maps to the latest because only that message chain is needed.
    # It can be updated as needed to trace other branches.
    latest_child_message_id: Mapped[int | None] = mapped_column(
        ForeignKey("chat_message.id"), nullable=True
    )

    # Only set on summary messages - the ID of the last message included in this summary
    # Used for chat history compression
    last_summarized_message_id: Mapped[int | None] = mapped_column(
        ForeignKey("chat_message.id", ondelete="SET NULL"),
        nullable=True,
    )

    # For multi-model turns: the user message points to which assistant response
    # was selected as the preferred one to continue the conversation with.
    preferred_response_id: Mapped[int | None] = mapped_column(
        ForeignKey("chat_message.id", ondelete="SET NULL"), nullable=True
    )

    # The display name of the model that generated this assistant message
    model_display_name: Mapped[str | None] = mapped_column(String, nullable=True)

    # What does this message contain
    reasoning_tokens: Mapped[str | None] = mapped_column(Text, nullable=True)
    message: Mapped[str] = mapped_column(Text)
    token_count: Mapped[int] = mapped_column(Integer)
    message_type: Mapped[MessageType] = mapped_column(
        Enum(MessageType, native_enum=False)
    )
    # Files attached to the message, when parsed into history, it becomes a separate message
    files: Mapped[list[FileDescriptor] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )

    # Maps the citation numbers to a SearchDoc id
    citations: Mapped[dict[int, int] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )

    # Metadata
    error: Mapped[str | None] = mapped_column(Text, nullable=True)
    time_sent: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    # True if this assistant message is a clarification question (deep research flow)
    is_clarification: Mapped[bool] = mapped_column(Boolean, default=False)
    # Duration in seconds for processing this message (assistant messages only)
    processing_duration_seconds: Mapped[float | None] = mapped_column(
        Float, nullable=True
    )

    # Relationships
    chat_session: Mapped[ChatSession] = relationship(
        "ChatSession",
        back_populates="messages",
        foreign_keys=[chat_session_id],
    )

    chat_message_feedbacks: Mapped[list["ChatMessageFeedback"]] = relationship(
        "ChatMessageFeedback",
        back_populates="chat_message",
    )

    document_feedbacks: Mapped[list["DocumentRetrievalFeedback"]] = relationship(
        "DocumentRetrievalFeedback",
        back_populates="chat_message",
    )

    # Even though search docs come from tool calls, the answer has a final set of saved search docs that we will show
    search_docs: Mapped[list["SearchDoc"]] = relationship(
        "SearchDoc",
        secondary=ChatMessage__SearchDoc.__table__,
        back_populates="chat_messages",
        cascade="all, delete-orphan",
        single_parent=True,
    )

    parent_message: Mapped["ChatMessage | None"] = relationship(
        "ChatMessage",
        foreign_keys=[parent_message_id],
        remote_side="ChatMessage.id",
    )

    latest_child_message: Mapped["ChatMessage | None"] = relationship(
        "ChatMessage",
        foreign_keys=[latest_child_message_id],
        remote_side="ChatMessage.id",
    )

    preferred_response: Mapped["ChatMessage | None"] = relationship(
        "ChatMessage",
        foreign_keys=[preferred_response_id],
        remote_side="ChatMessage.id",
    )

    # Chat messages only need to know their immediate tool call children
    # If there are nested tool calls, they are stored in the tool_call_children relationship.
    tool_calls: Mapped[list["ToolCall"] | None] = relationship(
        "ToolCall",
        back_populates="chat_message",
    )

    standard_answers: Mapped[list["StandardAnswer"]] = relationship(
        "StandardAnswer",
        secondary=ChatMessage__StandardAnswer.__table__,
        back_populates="chat_messages",
    )


class ToolCall(Base):
    """Represents a Tool Call and Tool Response"""

    __tablename__ = "tool_call"

    id: Mapped[int] = mapped_column(primary_key=True)

    chat_session_id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), ForeignKey("chat_session.id", ondelete="CASCADE")
    )

    # If this is not None, it's a top level tool call from the user message
    # If this is None, it's a lower level call from another tool/agent
    parent_chat_message_id: Mapped[int | None] = mapped_column(
        ForeignKey("chat_message.id", ondelete="CASCADE"), nullable=True
    )
    # If this is not None, this tool call is a child of another tool call
    parent_tool_call_id: Mapped[int | None] = mapped_column(
        ForeignKey("tool_call.id", ondelete="CASCADE"), nullable=True
    )
    # The tools with the same turn number (and parent) were called in parallel
    # Ones with different turn numbers (and same parent) were called sequentially
    turn_number: Mapped[int] = mapped_column(Integer)
    # Index order of tool calls from the LLM for parallel tool calls
    tab_index: Mapped[int] = mapped_column(Integer, default=0)

    # Not a FK because we want to be able to delete the tool without deleting
    # this entry
    tool_id: Mapped[int] = mapped_column(Integer())
    # This is needed because LLMs expect the tool call and the response to have matching IDs
    # This is better than just regenerating one randomly
    tool_call_id: Mapped[str] = mapped_column(String())
    # Preceeding reasoning tokens for this tool call, not included in the history
    reasoning_tokens: Mapped[str | None] = mapped_column(Text, nullable=True)
    # For "Agents" like the Research Agent for Deep Research -
    # the argument and final report are stored as the argument and response.
    tool_call_arguments: Mapped[dict[str, JSON_ro]] = mapped_column(postgresql.JSONB())
    tool_call_response: Mapped[str] = mapped_column(Text)
    # This just counts the number of tokens in the arg because it's all that's kept for the history
    # Only the top level tools (the ones with a parent_chat_message_id) have token counts that are counted
    # towards the session total.
    tool_call_tokens: Mapped[int] = mapped_column(Integer())
    # For image generation tool - stores GeneratedImage objects for replay
    generated_images: Mapped[list[dict] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )

    # Relationships
    chat_session: Mapped[ChatSession] = relationship("ChatSession")

    chat_message: Mapped["ChatMessage | None"] = relationship(
        "ChatMessage",
        foreign_keys=[parent_chat_message_id],
        back_populates="tool_calls",
    )
    parent_tool_call: Mapped["ToolCall | None"] = relationship(
        "ToolCall",
        foreign_keys=[parent_tool_call_id],
        remote_side="ToolCall.id",
    )
    tool_call_children: Mapped[list["ToolCall"]] = relationship(
        "ToolCall",
        foreign_keys=[parent_tool_call_id],
        back_populates="parent_tool_call",
    )
    # Other tools may need to save other things, might need to figure out a more generic way to store
    # rich tool returns
    search_docs: Mapped[list["SearchDoc"]] = relationship(
        "SearchDoc",
        secondary=ToolCall__SearchDoc.__table__,
        back_populates="tool_calls",
        cascade="all, delete-orphan",
        single_parent=True,
    )


class SearchDoc(Base):
    """Different from Document table. This one stores the state of a document from a retrieval.
    This allows chat sessions to be replayed with the searched docs

    Notably, this does not include the contents of the Document/Chunk, during inference if a stored
    SearchDoc is selected, an inference must be remade to retrieve the contents
    """

    __tablename__ = "search_doc"

    id: Mapped[int] = mapped_column(primary_key=True)
    document_id: Mapped[str] = mapped_column(String)
    chunk_ind: Mapped[int] = mapped_column(Integer)
    semantic_id: Mapped[str] = mapped_column(String)
    link: Mapped[str | None] = mapped_column(String, nullable=True)
    blurb: Mapped[str] = mapped_column(String)
    boost: Mapped[int] = mapped_column(Integer)
    source_type: Mapped[DocumentSource] = mapped_column(
        Enum(DocumentSource, native_enum=False)
    )
    hidden: Mapped[bool] = mapped_column(Boolean)
    doc_metadata: Mapped[dict[str, str | list[str]]] = mapped_column(postgresql.JSONB())
    score: Mapped[float] = mapped_column(Float)
    match_highlights: Mapped[list[str]] = mapped_column(postgresql.ARRAY(String))
    # This is for the document, not this row in the table
    updated_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
    primary_owners: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
    secondary_owners: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
    is_internet: Mapped[bool] = mapped_column(Boolean, default=False, nullable=True)

    is_relevant: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
    relevance_explanation: Mapped[str | None] = mapped_column(String, nullable=True)

    chat_messages: Mapped[list["ChatMessage"]] = relationship(
        "ChatMessage",
        secondary=ChatMessage__SearchDoc.__table__,
        back_populates="search_docs",
    )

    tool_calls: Mapped[list["ToolCall"]] = relationship(
        "ToolCall",
        secondary=ToolCall__SearchDoc.__table__,
        back_populates="search_docs",
    )


class SearchQuery(Base):
    # This table contains search queries for the Search UI. There are no followups and less is stored because the reply
    # functionality is simply to rerun the search query again as things may have changed and this is more common for search.
    __tablename__ = "search_query"
    id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), primary_key=True, default=uuid4
    )
    user_id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE")
    )
    query: Mapped[str] = mapped_column(String)
    query_expansions: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )


"""
Feedback, Logging, Metrics Tables
"""


class DocumentRetrievalFeedback(Base):
    __tablename__ = "document_retrieval_feedback"

    id: Mapped[int] = mapped_column(primary_key=True)
    chat_message_id: Mapped[int | None] = mapped_column(
        ForeignKey("chat_message.id", ondelete="SET NULL"), nullable=True
    )
    document_id: Mapped[str] = mapped_column(ForeignKey("document.id"))
    # How high up this document is in the results, 1 for first
    document_rank: Mapped[int] = mapped_column(Integer)
    clicked: Mapped[bool] = mapped_column(Boolean, default=False)
    feedback: Mapped[SearchFeedbackType | None] = mapped_column(
        Enum(SearchFeedbackType, native_enum=False), nullable=True
    )

    chat_message: Mapped[ChatMessage] = relationship(
        "ChatMessage",
        back_populates="document_feedbacks",
        foreign_keys=[chat_message_id],
    )
    document: Mapped[Document] = relationship(
        "Document", back_populates="retrieval_feedbacks"
    )


class ChatMessageFeedback(Base):
    __tablename__ = "chat_feedback"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    chat_message_id: Mapped[int | None] = mapped_column(
        ForeignKey("chat_message.id", ondelete="SET NULL"), nullable=True
    )
    is_positive: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
    required_followup: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
    feedback_text: Mapped[str | None] = mapped_column(Text, nullable=True)
    predefined_feedback: Mapped[str | None] = mapped_column(String, nullable=True)

    chat_message: Mapped[ChatMessage] = relationship(
        "ChatMessage",
        back_populates="chat_message_feedbacks",
        foreign_keys=[chat_message_id],
    )


class LLMProvider(Base):
    __tablename__ = "llm_provider"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True)
    provider: Mapped[str] = mapped_column(String)
    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=True
    )
    api_base: Mapped[str | None] = mapped_column(String, nullable=True)
    api_version: Mapped[str | None] = mapped_column(String, nullable=True)
    # custom configs that should be passed to the LLM provider at inference time
    # (e.g. `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, etc. for bedrock)
    custom_config: Mapped[dict[str, str] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )

    # Deprecated: use LLMModelFlow with CHAT flow type instead
    default_model_name: Mapped[str | None] = mapped_column(String, nullable=True)

    deployment_name: Mapped[str | None] = mapped_column(String, nullable=True)

    # Deprecated: use LLMModelFlow.is_default with CHAT flow type instead
    is_default_provider: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
    # Deprecated: use LLMModelFlow.is_default with VISION flow type instead
    is_default_vision_provider: Mapped[bool | None] = mapped_column(Boolean)
    # Deprecated: use LLMModelFlow with VISION flow type instead
    default_vision_model: Mapped[str | None] = mapped_column(String, nullable=True)
    # EE only
    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
    # Auto mode: models, visibility, and defaults are managed by GitHub config
    is_auto_mode: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    groups: Mapped[list["UserGroup"]] = relationship(
        "UserGroup",
        secondary="llm_provider__user_group",
        viewonly=True,
    )
    personas: Mapped[list["Persona"]] = relationship(
        "Persona",
        secondary="llm_provider__persona",
        back_populates="allowed_by_llm_providers",
        viewonly=True,
    )
    model_configurations: Mapped[list["ModelConfiguration"]] = relationship(
        "ModelConfiguration",
        back_populates="llm_provider",
        foreign_keys="ModelConfiguration.llm_provider_id",
    )


class ModelConfiguration(Base):
    __tablename__ = "model_configuration"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    llm_provider_id: Mapped[int] = mapped_column(
        ForeignKey("llm_provider.id", ondelete="CASCADE"),
        nullable=False,
    )
    name: Mapped[str] = mapped_column(String, nullable=False)

    # Represents whether or not a given model will be usable by the end user or not.
    # This field is primarily used for "Well Known LLM Providers", since for them,
    # we have a pre-defined list of LLM models that we allow them to choose from.
    # For example, for OpenAI, we allow the end-user to choose multiple models from
    # `["gpt-4", "gpt-4o", etc.]`. Once they make their selections, we set each
    # selected model to `is_visible = True`.
    #
    # For "Custom LLM Providers", we don't provide a comprehensive list of models
    # for the end-user to choose from; *they provide it themselves*. Therefore,
    # for Custom LLM Providers, `is_visible` will always be True.
    is_visible: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    # Max input tokens can be null when:
    # - The end-user configures models through a "Well Known LLM Provider".
    # - The end-user is configuring a model and chooses not to set a max-input-tokens limit.
    max_input_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)

    # Deprecated: use LLMModelFlow with VISION flow type instead
    supports_image_input: Mapped[bool | None] = mapped_column(Boolean, nullable=True)

    # Human-readable display name for the model.
    # For dynamic providers (OpenRouter, Bedrock, Ollama), this comes from the source API.
    # For static providers (OpenAI, Anthropic), this may be null and will fall back to LiteLLM.
    display_name: Mapped[str | None] = mapped_column(String, nullable=True)

    llm_provider: Mapped["LLMProvider"] = relationship(
        "LLMProvider",
        back_populates="model_configurations",
    )

    llm_model_flows: Mapped[list["LLMModelFlow"]] = relationship(
        "LLMModelFlow",
        back_populates="model_configuration",
        cascade="all, delete-orphan",
        passive_deletes=True,
    )

    @property
    def llm_model_flow_types(self) -> list[LLMModelFlowType]:
        return [flow.llm_model_flow_type for flow in self.llm_model_flows]


class LLMModelFlow(Base):
    __tablename__ = "llm_model_flow"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)

    llm_model_flow_type: Mapped[LLMModelFlowType] = mapped_column(
        Enum(LLMModelFlowType, native_enum=False), nullable=False
    )
    model_configuration_id: Mapped[int] = mapped_column(
        ForeignKey("model_configuration.id", ondelete="CASCADE"),
        nullable=False,
    )
    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    model_configuration: Mapped["ModelConfiguration"] = relationship(
        "ModelConfiguration",
        back_populates="llm_model_flows",
    )

    __table_args__ = (
        UniqueConstraint(
            "llm_model_flow_type",
            "model_configuration_id",
            name="uq_model_config_per_llm_model_flow_type",
        ),
        Index(
            "ix_one_default_per_llm_model_flow",
            "llm_model_flow_type",
            unique=True,
            postgresql_where=(is_default == True),  # noqa: E712
        ),
    )


class ImageGenerationConfig(Base):
    __tablename__ = "image_generation_config"

    image_provider_id: Mapped[str] = mapped_column(String, primary_key=True)
    model_configuration_id: Mapped[int] = mapped_column(
        ForeignKey("model_configuration.id", ondelete="CASCADE"),
        nullable=False,
    )
    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    model_configuration: Mapped["ModelConfiguration"] = relationship(
        "ModelConfiguration"
    )

    __table_args__ = (
        Index("ix_image_generation_config_is_default", "is_default"),
        Index(
            "ix_image_generation_config_model_configuration_id",
            "model_configuration_id",
        ),
    )


class VoiceProvider(Base):
    """Configuration for voice services (STT and TTS)."""

    __tablename__ = "voice_provider"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True)
    provider_type: Mapped[str] = mapped_column(
        String
    )  # "openai", "azure", "elevenlabs"
    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=True
    )
    api_base: Mapped[str | None] = mapped_column(String, nullable=True)
    custom_config: Mapped[dict[str, Any] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )

    # Model/voice configuration
    stt_model: Mapped[str | None] = mapped_column(
        String, nullable=True
    )  # e.g., "whisper-1"
    tts_model: Mapped[str | None] = mapped_column(
        String, nullable=True
    )  # e.g., "tts-1", "tts-1-hd"
    default_voice: Mapped[str | None] = mapped_column(
        String, nullable=True
    )  # e.g., "alloy", "echo"

    # STT and TTS can use different providers - only one provider per type
    is_default_stt: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    is_default_tts: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )

    # Enforce only one default STT provider and one default TTS provider at DB level
    __table_args__ = (
        Index(
            "ix_voice_provider_one_default_stt",
            "is_default_stt",
            unique=True,
            postgresql_where=(is_default_stt == True),  # noqa: E712
        ),
        Index(
            "ix_voice_provider_one_default_tts",
            "is_default_tts",
            unique=True,
            postgresql_where=(is_default_tts == True),  # noqa: E712
        ),
    )


class CloudEmbeddingProvider(Base):
    __tablename__ = "embedding_provider"

    provider_type: Mapped[EmbeddingProvider] = mapped_column(
        Enum(EmbeddingProvider), primary_key=True
    )
    api_url: Mapped[str | None] = mapped_column(String, nullable=True)
    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(EncryptedString())
    api_version: Mapped[str | None] = mapped_column(String, nullable=True)
    deployment_name: Mapped[str | None] = mapped_column(String, nullable=True)

    search_settings: Mapped[list["SearchSettings"]] = relationship(
        "SearchSettings",
        back_populates="cloud_provider",
    )

    def __repr__(self) -> str:
        return f"<EmbeddingProvider(type='{self.provider_type}')>"


class InternetSearchProvider(Base):
    __tablename__ = "internet_search_provider"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True, nullable=False)
    provider_type: Mapped[str] = mapped_column(String, nullable=False)
    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=True
    )
    config: Mapped[dict[str, str] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )
    is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )

    def __repr__(self) -> str:
        return f"<InternetSearchProvider(name='{self.name}', provider_type='{self.provider_type}')>"


class InternetContentProvider(Base):
    __tablename__ = "internet_content_provider"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True, nullable=False)
    provider_type: Mapped[str] = mapped_column(String, nullable=False)
    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=True
    )
    config: Mapped[WebContentProviderConfig | None] = mapped_column(
        PydanticType(WebContentProviderConfig), nullable=True
    )
    is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )

    def __repr__(self) -> str:
        return f"<InternetContentProvider(name='{self.name}', provider_type='{self.provider_type}')>"


class DocumentSet(Base):
    __tablename__ = "document_set"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True)
    description: Mapped[str | None] = mapped_column(String)
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )
    # Whether changes to the document set have been propagated
    is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    # If `False`, then the document set is not visible to users who are not explicitly
    # given access to it either via the `users` or `groups` relationships
    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)

    # Last time a user updated this document set
    time_last_modified_by_user: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    connector_credential_pairs: Mapped[list[ConnectorCredentialPair]] = relationship(
        "ConnectorCredentialPair",
        secondary=DocumentSet__ConnectorCredentialPair.__table__,
        primaryjoin=(
            (DocumentSet__ConnectorCredentialPair.document_set_id == id)
            & (DocumentSet__ConnectorCredentialPair.is_current.is_(True))
        ),
        secondaryjoin=(
            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id
            == ConnectorCredentialPair.id
        ),
        back_populates="document_sets",
        overlaps="document_set",
    )
    personas: Mapped[list["Persona"]] = relationship(
        "Persona",
        secondary=Persona__DocumentSet.__table__,
        back_populates="document_sets",
    )
    # Other users with access
    users: Mapped[list[User]] = relationship(
        "User",
        secondary=DocumentSet__User.__table__,
        viewonly=True,
    )
    # EE only
    groups: Mapped[list["UserGroup"]] = relationship(
        "UserGroup",
        secondary="document_set__user_group",
        viewonly=True,
    )
    federated_connectors: Mapped[list["FederatedConnector__DocumentSet"]] = (
        relationship(
            "FederatedConnector__DocumentSet",
            back_populates="document_set",
            cascade="all, delete-orphan",
        )
    )


class Tool(Base):
    __tablename__ = "tool"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    # The name of the tool that the LLM will see
    name: Mapped[str] = mapped_column(String, nullable=False)
    description: Mapped[str] = mapped_column(Text, nullable=True)
    # ID of the tool in the codebase, only applies for in-code tools.
    # tools defined via the UI will have this as None
    in_code_tool_id: Mapped[str | None] = mapped_column(String, nullable=True)
    display_name: Mapped[str] = mapped_column(String, nullable=True)

    # OpenAPI scheme for the tool. Only applies to tools defined via the UI.
    openapi_schema: Mapped[dict[str, Any] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )
    # MCP tool input schema. Only applies to MCP tools.
    mcp_input_schema: Mapped[dict[str, Any] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )
    custom_headers: Mapped[list[HeaderItemDict] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )
    # user who created / owns the tool. Will be None for built-in tools.
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )
    # whether to pass through the user's OAuth token as Authorization header
    passthrough_auth: Mapped[bool] = mapped_column(Boolean, default=False)
    # MCP server this tool is associated with (null for non-MCP tools)
    mcp_server_id: Mapped[int | None] = mapped_column(
        Integer, ForeignKey("mcp_server.id", ondelete="CASCADE"), nullable=True
    )
    # OAuth configuration for this tool (null for tools without OAuth)
    oauth_config_id: Mapped[int | None] = mapped_column(
        Integer, ForeignKey("oauth_config.id", ondelete="SET NULL"), nullable=True
    )
    enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)

    user: Mapped[User | None] = relationship("User", back_populates="custom_tools")
    oauth_config: Mapped["OAuthConfig | None"] = relationship(
        "OAuthConfig", back_populates="tools"
    )
    # Relationship to Persona through the association table
    personas: Mapped[list["Persona"]] = relationship(
        "Persona",
        secondary=Persona__Tool.__table__,
        back_populates="tools",
    )
    # MCP server relationship
    mcp_server: Mapped["MCPServer | None"] = relationship(
        "MCPServer", back_populates="current_actions"
    )


class OAuthConfig(Base):
    """OAuth provider configuration that can be shared across multiple tools"""

    __tablename__ = "oauth_config"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True, nullable=False)

    # OAuth provider endpoints
    authorization_url: Mapped[str] = mapped_column(Text, nullable=False)
    token_url: Mapped[str] = mapped_column(Text, nullable=False)

    # Client credentials (encrypted)
    client_id: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=False
    )
    client_secret: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=False
    )

    # Optional configurations
    scopes: Mapped[list[str] | None] = mapped_column(postgresql.JSONB(), nullable=True)
    additional_params: Mapped[dict[str, Any] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )

    # Metadata
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )

    # Relationships
    tools: Mapped[list["Tool"]] = relationship("Tool", back_populates="oauth_config")
    user_tokens: Mapped[list["OAuthUserToken"]] = relationship(
        "OAuthUserToken", back_populates="oauth_config", cascade="all, delete-orphan"
    )


class OAuthUserToken(Base):
    """Per-user OAuth tokens for a specific OAuth configuration"""

    __tablename__ = "oauth_user_token"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    oauth_config_id: Mapped[int] = mapped_column(
        ForeignKey("oauth_config.id", ondelete="CASCADE"), nullable=False
    )
    user_id: Mapped[UUID] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=False
    )

    # Token data (encrypted)
    # Structure: {
    #   "access_token": "...",
    #   "refresh_token": "...",  # Optional
    #   "token_type": "Bearer",
    #   "expires_at": 1234567890,  # Unix timestamp, optional
    #   "scope": "repo user"  # Optional
    # }
    token_data: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(
        EncryptedJson(), nullable=False
    )

    # Metadata
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )

    # Relationships
    oauth_config: Mapped["OAuthConfig"] = relationship(
        "OAuthConfig", back_populates="user_tokens"
    )
    user: Mapped["User"] = relationship("User")

    # Unique constraint: One token per user per OAuth config
    __table_args__ = (
        UniqueConstraint("oauth_config_id", "user_id", name="uq_oauth_user_token"),
    )


class StarterMessage(BaseModel):
    """Starter message for a persona."""

    name: str
    message: str


class Persona__PersonaLabel(Base):
    __tablename__ = "persona__persona_label"

    persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True)
    persona_label_id: Mapped[int] = mapped_column(
        ForeignKey("persona_label.id", ondelete="CASCADE"), primary_key=True
    )


class Persona(Base):
    __tablename__ = "persona"

    id: Mapped[int] = mapped_column(primary_key=True)
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )
    name: Mapped[str] = mapped_column(String)
    description: Mapped[str] = mapped_column(String)

    # Allows the persona to specify a specific default LLM model
    # NOTE: only is applied on the actual response generation - is not used for things like
    # auto-detected time filters, relevance filters, etc.
    llm_model_provider_override: Mapped[str | None] = mapped_column(
        String, nullable=True
    )
    llm_model_version_override: Mapped[str | None] = mapped_column(
        String, nullable=True
    )
    default_model_configuration_id: Mapped[int | None] = mapped_column(
        Integer,
        ForeignKey("model_configuration.id", ondelete="SET NULL"),
        nullable=True,
    )

    starter_messages: Mapped[list[StarterMessage] | None] = mapped_column(
        PydanticListType(StarterMessage), nullable=True
    )
    search_start_date: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )
    # Built-in personas are configured via backend during deployment
    # Treated specially (cannot be user edited etc.)
    builtin_persona: Mapped[bool] = mapped_column(Boolean, default=False)

    # Featured personas are highlighted in the UI
    is_featured: Mapped[bool] = mapped_column(Boolean, default=False)
    # controls whether the persona is listed in user-facing agent lists
    is_listed: Mapped[bool] = mapped_column(Boolean, default=True)
    # controls the ordering of personas in the UI
    # higher priority personas are displayed first, ties are resolved by the ID,
    # where lower value IDs (e.g. created earlier) are displayed first
    display_priority: Mapped[int | None] = mapped_column(
        Integer, nullable=True, default=None
    )
    deleted: Mapped[bool] = mapped_column(Boolean, default=False)

    # Custom Agent Prompt
    system_prompt: Mapped[str | None] = mapped_column(
        String(length=PROMPT_LENGTH), nullable=True
    )
    replace_base_system_prompt: Mapped[bool] = mapped_column(Boolean, default=False)
    task_prompt: Mapped[str | None] = mapped_column(
        String(length=PROMPT_LENGTH), nullable=True
    )
    datetime_aware: Mapped[bool] = mapped_column(Boolean, default=True)

    uploaded_image_id: Mapped[str | None] = mapped_column(String, nullable=True)
    icon_name: Mapped[str | None] = mapped_column(String, nullable=True)

    # These are only defaults, users can select from all if desired
    document_sets: Mapped[list[DocumentSet]] = relationship(
        "DocumentSet",
        secondary=Persona__DocumentSet.__table__,
        back_populates="personas",
    )
    tools: Mapped[list[Tool]] = relationship(
        "Tool",
        secondary=Persona__Tool.__table__,
        back_populates="personas",
    )
    # Owner
    user: Mapped[User | None] = relationship("User", back_populates="personas")
    # Other users with access
    users: Mapped[list[User]] = relationship(
        "User",
        secondary=Persona__User.__table__,
        viewonly=True,
    )
    # EE only
    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
    groups: Mapped[list["UserGroup"]] = relationship(
        "UserGroup",
        secondary="persona__user_group",
        viewonly=True,
    )
    allowed_by_llm_providers: Mapped[list["LLMProvider"]] = relationship(
        "LLMProvider",
        secondary="llm_provider__persona",
        back_populates="personas",
        viewonly=True,
    )
    # Relationship to UserFile
    user_files: Mapped[list["UserFile"]] = relationship(
        "UserFile",
        secondary="persona__user_file",
        back_populates="assistants",
    )
    labels: Mapped[list["PersonaLabel"]] = relationship(
        "PersonaLabel",
        secondary=Persona__PersonaLabel.__table__,
        back_populates="personas",
    )
    # Hierarchy nodes attached to this persona for scoped search
    hierarchy_nodes: Mapped[list["HierarchyNode"]] = relationship(
        "HierarchyNode",
        secondary="persona__hierarchy_node",
        back_populates="personas",
    )
    # Individual documents attached to this persona for scoped search
    attached_documents: Mapped[list["Document"]] = relationship(
        "Document",
        secondary="persona__document",
        back_populates="attached_personas",
    )

    # Default personas loaded via yaml cannot have the same name
    __table_args__ = (
        Index(
            "_builtin_persona_name_idx",
            "name",
            unique=True,
            postgresql_where=(builtin_persona == True),  # noqa: E712
        ),
    )


class Persona__UserFile(Base):
    __tablename__ = "persona__user_file"

    persona_id: Mapped[int] = mapped_column(
        ForeignKey("persona.id", ondelete="CASCADE"), primary_key=True
    )
    user_file_id: Mapped[UUID] = mapped_column(
        ForeignKey("user_file.id", ondelete="CASCADE"), primary_key=True
    )


class Persona__HierarchyNode(Base):
    """Association table linking personas to hierarchy nodes.

    This allows assistants to be configured with specific hierarchy nodes
    (folders, spaces, channels, etc.) for scoped search/retrieval.
    """

    __tablename__ = "persona__hierarchy_node"

    persona_id: Mapped[int] = mapped_column(
        ForeignKey("persona.id", ondelete="CASCADE"), primary_key=True
    )
    hierarchy_node_id: Mapped[int] = mapped_column(
        ForeignKey("hierarchy_node.id", ondelete="CASCADE"), primary_key=True
    )


class Persona__Document(Base):
    """Association table linking personas to individual documents.

    This allows assistants to be configured with specific documents
    for scoped search/retrieval. Complements hierarchy_nodes which
    allow attaching folders/spaces.
    """

    __tablename__ = "persona__document"

    persona_id: Mapped[int] = mapped_column(
        ForeignKey("persona.id", ondelete="CASCADE"), primary_key=True
    )
    document_id: Mapped[str] = mapped_column(
        ForeignKey("document.id", ondelete="CASCADE"), primary_key=True
    )


class PersonaLabel(Base):
    __tablename__ = "persona_label"

    id: Mapped[int] = mapped_column(primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True)
    personas: Mapped[list["Persona"]] = relationship(
        "Persona",
        secondary=Persona__PersonaLabel.__table__,
        back_populates="labels",
    )


class Assistant__UserSpecificConfig(Base):
    __tablename__ = "assistant__user_specific_config"

    assistant_id: Mapped[int] = mapped_column(
        ForeignKey("persona.id", ondelete="CASCADE"), primary_key=True
    )
    user_id: Mapped[UUID] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), primary_key=True
    )
    disabled_tool_ids: Mapped[list[int]] = mapped_column(
        postgresql.ARRAY(Integer), nullable=False
    )


AllowedAnswerFilters = (
    Literal["well_answered_postfilter"] | Literal["questionmark_prefilter"]
)


class ChannelConfig(TypedDict):
    """NOTE: is a `TypedDict` so it can be used as a type hint for a JSONB column
    in Postgres"""

    channel_name: str | None  # None for default channel config
    respond_tag_only: NotRequired[bool]  # defaults to False
    respond_to_bots: NotRequired[bool]  # defaults to False
    is_ephemeral: NotRequired[bool]  # defaults to False
    respond_member_group_list: NotRequired[list[str]]
    answer_filters: NotRequired[list[AllowedAnswerFilters]]
    # If None then no follow up
    # If empty list, follow up with no tags
    follow_up_tags: NotRequired[list[str]]
    show_continue_in_web_ui: NotRequired[bool]  # defaults to False
    disabled: NotRequired[bool]  # defaults to False


class SlackChannelConfig(Base):
    __tablename__ = "slack_channel_config"

    id: Mapped[int] = mapped_column(primary_key=True)
    slack_bot_id: Mapped[int] = mapped_column(
        ForeignKey("slack_bot.id"), nullable=False
    )
    persona_id: Mapped[int | None] = mapped_column(
        ForeignKey("persona.id"), nullable=True
    )
    channel_config: Mapped[ChannelConfig] = mapped_column(
        postgresql.JSONB(), nullable=False
    )

    enable_auto_filters: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=False
    )

    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    persona: Mapped[Persona | None] = relationship("Persona")

    slack_bot: Mapped["SlackBot"] = relationship(
        "SlackBot",
        back_populates="slack_channel_configs",
    )
    standard_answer_categories: Mapped[list["StandardAnswerCategory"]] = relationship(
        "StandardAnswerCategory",
        secondary=SlackChannelConfig__StandardAnswerCategory.__table__,
        back_populates="slack_channel_configs",
    )

    __table_args__ = (
        UniqueConstraint(
            "slack_bot_id",
            "is_default",
            name="uq_slack_channel_config_slack_bot_id_default",
        ),
        Index(
            "ix_slack_channel_config_slack_bot_id_default",
            "slack_bot_id",
            "is_default",
            unique=True,
            postgresql_where=(is_default is True),
        ),
    )


class SlackBot(Base):
    __tablename__ = "slack_bot"

    id: Mapped[int] = mapped_column(primary_key=True)
    name: Mapped[str] = mapped_column(String)
    enabled: Mapped[bool] = mapped_column(Boolean, default=True)

    bot_token: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), unique=True
    )
    app_token: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), unique=True
    )
    user_token: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=True
    )

    slack_channel_configs: Mapped[list[SlackChannelConfig]] = relationship(
        "SlackChannelConfig",
        back_populates="slack_bot",
        cascade="all, delete-orphan",
    )


class DiscordBotConfig(Base):
    """Global Discord bot configuration (one per tenant).

    Stores the bot token when not provided via DISCORD_BOT_TOKEN env var.
    Uses a fixed ID with check constraint to enforce only one row per tenant.
    """

    __tablename__ = "discord_bot_config"

    id: Mapped[str] = mapped_column(
        String, primary_key=True, server_default=text("'SINGLETON'")
    )
    bot_token: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=False
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )


class DiscordGuildConfig(Base):
    """Configuration for a Discord guild (server) connected to this tenant.

    registration_key is a one-time key used to link a Discord server to this tenant.
    Format: discord_<tenant_id>.<random_token>
    guild_id is NULL until the Discord admin runs !register with the key.
    """

    __tablename__ = "discord_guild_config"

    id: Mapped[int] = mapped_column(primary_key=True)

    # Discord snowflake - NULL until registered via command in Discord
    guild_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True, unique=True)
    guild_name: Mapped[str | None] = mapped_column(String(256), nullable=True)

    # One-time registration key: discord_<tenant_id>.<random_token>
    registration_key: Mapped[str] = mapped_column(String, unique=True, nullable=False)

    registered_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    # Configuration
    default_persona_id: Mapped[int | None] = mapped_column(
        ForeignKey("persona.id", ondelete="SET NULL"), nullable=True
    )
    enabled: Mapped[bool] = mapped_column(
        Boolean, server_default=text("true"), nullable=False
    )

    # Relationships
    default_persona: Mapped["Persona | None"] = relationship(
        "Persona", foreign_keys=[default_persona_id]
    )
    channels: Mapped[list["DiscordChannelConfig"]] = relationship(
        back_populates="guild_config", cascade="all, delete-orphan"
    )


class DiscordChannelConfig(Base):
    """Per-channel configuration for Discord bot behavior.

    Used to whitelist specific channels and configure per-channel behavior.
    """

    __tablename__ = "discord_channel_config"

    id: Mapped[int] = mapped_column(primary_key=True)
    guild_config_id: Mapped[int] = mapped_column(
        ForeignKey("discord_guild_config.id", ondelete="CASCADE"), nullable=False
    )

    # Discord snowflake
    channel_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
    channel_name: Mapped[str] = mapped_column(String(), nullable=False)

    # Channel type from Discord (text, forum)
    channel_type: Mapped[str] = mapped_column(
        String(20), server_default=text("'text'"), nullable=False
    )

    # True if @everyone cannot view the channel
    is_private: Mapped[bool] = mapped_column(
        Boolean, server_default=text("false"), nullable=False
    )

    # If true, bot only responds to messages in threads
    # Otherwise, will reply in channel
    thread_only_mode: Mapped[bool] = mapped_column(
        Boolean, server_default=text("false"), nullable=False
    )

    # If true (default), bot only responds when @mentioned
    # If false, bot responds to ALL messages in this channel
    require_bot_invocation: Mapped[bool] = mapped_column(
        Boolean, server_default=text("true"), nullable=False
    )

    # Override the guild's default persona for this channel
    persona_override_id: Mapped[int | None] = mapped_column(
        ForeignKey("persona.id", ondelete="SET NULL"), nullable=True
    )

    enabled: Mapped[bool] = mapped_column(
        Boolean, server_default=text("false"), nullable=False
    )

    # Relationships
    guild_config: Mapped["DiscordGuildConfig"] = relationship(back_populates="channels")
    persona_override: Mapped["Persona | None"] = relationship()

    # Constraints
    __table_args__ = (
        UniqueConstraint(
            "guild_config_id", "channel_id", name="uq_discord_channel_guild_channel"
        ),
    )


class Milestone(Base):
    # This table is used to track significant events for a deployment towards finding value
    # The table is currently not used for features but it may be used in the future to inform
    # users about the product features and encourage usage/exploration.
    __tablename__ = "milestone"

    id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), primary_key=True, default=uuid4
    )
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )
    event_type: Mapped[MilestoneRecordType] = mapped_column(String)
    # Need to track counts and specific ids of certain events to know if the Milestone has been reached
    event_tracker: Mapped[dict | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    user: Mapped[User | None] = relationship("User")

    __table_args__ = (UniqueConstraint("event_type", name="uq_milestone_event_type"),)


class TaskQueueState(Base):
    # Currently refers to Celery Tasks
    __tablename__ = "task_queue_jobs"

    id: Mapped[int] = mapped_column(primary_key=True)
    # Celery task id. currently only for readability/diagnostics
    task_id: Mapped[str] = mapped_column(String)
    # For any job type, this would be the same
    task_name: Mapped[str] = mapped_column(String)
    # Note that if the task dies, this won't necessarily be marked FAILED correctly
    status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus, native_enum=False))
    start_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True)
    )
    register_time: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )


class KVStore(Base):
    __tablename__ = "key_value_store"

    key: Mapped[str] = mapped_column(String, primary_key=True)
    value: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True)
    encrypted_value: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(
        EncryptedJson(), nullable=True
    )


class FileRecord(Base):
    __tablename__ = "file_record"

    # Internal file ID, must be unique across all files.
    file_id: Mapped[str] = mapped_column(String, primary_key=True)

    display_name: Mapped[str] = mapped_column(String, nullable=True)
    file_origin: Mapped[FileOrigin] = mapped_column(Enum(FileOrigin, native_enum=False))
    file_type: Mapped[str] = mapped_column(String, default="text/plain")
    file_metadata: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True)

    # External storage support (S3, MinIO, Azure Blob, etc.)
    bucket_name: Mapped[str] = mapped_column(String)
    object_key: Mapped[str] = mapped_column(String)

    # Timestamps for external storage
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )


class FileContent(Base):
    """Stores file content in PostgreSQL using Large Objects.
    Used when FILE_STORE_BACKEND=postgres to avoid needing S3/MinIO."""

    __tablename__ = "file_content"

    file_id: Mapped[str] = mapped_column(
        String,
        ForeignKey("file_record.file_id", ondelete="CASCADE"),
        primary_key=True,
    )
    # PostgreSQL Large Object OID referencing pg_largeobject
    lobj_oid: Mapped[int] = mapped_column(BigInteger, nullable=False)
    file_size: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)


"""
************************************************************************
Enterprise Edition Models
************************************************************************

These models are only used in Enterprise Edition only features in Onyx.
They are kept here to simplify the codebase and avoid having different assumptions
on the shape of data being passed around between the MIT and EE versions of Onyx.

In the MIT version of Onyx, assume these tables are always empty.
"""


class SamlAccount(Base):
    __tablename__ = "saml"

    id: Mapped[int] = mapped_column(primary_key=True)
    user_id: Mapped[int] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), unique=True
    )
    encrypted_cookie: Mapped[str] = mapped_column(Text, unique=True)
    expires_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )

    user: Mapped[User] = relationship("User")


class User__UserGroup(Base):
    __tablename__ = "user__user_group"

    __table_args__ = (Index("ix_user__user_group_user_id", "user_id"),)

    is_curator: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    user_group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id"), primary_key=True
    )
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), primary_key=True, nullable=True
    )


class PermissionGrant(Base):
    __tablename__ = "permission_grant"

    __table_args__ = (
        UniqueConstraint(
            "group_id", "permission", name="uq_permission_grant_group_permission"
        ),
    )

    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id", ondelete="CASCADE"), nullable=False
    )
    permission: Mapped[Permission] = mapped_column(
        Enum(
            Permission,
            native_enum=False,
            values_callable=lambda x: [e.value for e in x],
        ),
        nullable=False,
    )
    grant_source: Mapped[GrantSource] = mapped_column(
        Enum(GrantSource, native_enum=False), nullable=False
    )
    granted_by: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="SET NULL"), nullable=True
    )
    granted_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    is_deleted: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=False, server_default=text("false")
    )

    group: Mapped["UserGroup"] = relationship(
        "UserGroup", back_populates="permission_grants"
    )

    @validates("permission")
    def _validate_permission(self, _key: str, value: Permission) -> Permission:
        if value in Permission.IMPLIED:
            raise ValueError(
                f"{value!r} is an implied permission and cannot be granted directly"
            )
        return value


class UserGroup__ConnectorCredentialPair(Base):
    __tablename__ = "user_group__connector_credential_pair"

    user_group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id"), primary_key=True
    )
    cc_pair_id: Mapped[int] = mapped_column(
        ForeignKey("connector_credential_pair.id"), primary_key=True
    )
    # if `True`, then is part of the current state of the UserGroup
    # if `False`, then is a part of the prior state of the UserGroup
    # rows with `is_current=False` should be deleted when the UserGroup
    # is updated and should not exist for a given UserGroup if
    # `UserGroup.is_up_to_date == True`
    is_current: Mapped[bool] = mapped_column(
        Boolean,
        default=True,
        primary_key=True,
    )

    cc_pair: Mapped[ConnectorCredentialPair] = relationship(
        "ConnectorCredentialPair",
    )


class Persona__UserGroup(Base):
    __tablename__ = "persona__user_group"

    persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True)
    user_group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id"), primary_key=True
    )


class LLMProvider__Persona(Base):
    """Association table restricting LLM providers to specific personas.

    If no such rows exist for a given LLM provider, then it is accessible by all personas.
    """

    __tablename__ = "llm_provider__persona"

    llm_provider_id: Mapped[int] = mapped_column(
        ForeignKey("llm_provider.id", ondelete="CASCADE"), primary_key=True
    )
    persona_id: Mapped[int] = mapped_column(
        ForeignKey("persona.id", ondelete="CASCADE"), primary_key=True
    )


class LLMProvider__UserGroup(Base):
    __tablename__ = "llm_provider__user_group"

    llm_provider_id: Mapped[int] = mapped_column(
        ForeignKey("llm_provider.id"), primary_key=True
    )
    user_group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id"), primary_key=True
    )


class DocumentSet__UserGroup(Base):
    __tablename__ = "document_set__user_group"

    document_set_id: Mapped[int] = mapped_column(
        ForeignKey("document_set.id"), primary_key=True
    )
    user_group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id"), primary_key=True
    )


class Credential__UserGroup(Base):
    __tablename__ = "credential__user_group"

    credential_id: Mapped[int] = mapped_column(
        ForeignKey("credential.id"), primary_key=True
    )
    user_group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id"), primary_key=True
    )


class UserGroup(Base):
    __tablename__ = "user_group"

    id: Mapped[int] = mapped_column(primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True)
    # whether or not changes to the UserGroup have been propagated to Vespa
    is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    # tell the sync job to clean up the group
    is_up_for_deletion: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=False
    )
    # whether this is a default group (e.g. "Basic", "Admins") that cannot be deleted
    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    # Last time a user updated this user group
    time_last_modified_by_user: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    users: Mapped[list[User]] = relationship(
        "User",
        secondary=User__UserGroup.__table__,
    )
    user_group_relationships: Mapped[list[User__UserGroup]] = relationship(
        "User__UserGroup",
        viewonly=True,
    )
    cc_pairs: Mapped[list[ConnectorCredentialPair]] = relationship(
        "ConnectorCredentialPair",
        secondary=UserGroup__ConnectorCredentialPair.__table__,
        viewonly=True,
    )
    cc_pair_relationships: Mapped[list[UserGroup__ConnectorCredentialPair]] = (
        relationship(
            "UserGroup__ConnectorCredentialPair",
            viewonly=True,
        )
    )
    personas: Mapped[list[Persona]] = relationship(
        "Persona",
        secondary=Persona__UserGroup.__table__,
        viewonly=True,
    )
    document_sets: Mapped[list[DocumentSet]] = relationship(
        "DocumentSet",
        secondary=DocumentSet__UserGroup.__table__,
        viewonly=True,
    )
    credentials: Mapped[list[Credential]] = relationship(
        "Credential",
        secondary=Credential__UserGroup.__table__,
    )
    # MCP servers accessible to this user group
    accessible_mcp_servers: Mapped[list["MCPServer"]] = relationship(
        "MCPServer", secondary="mcp_server__user_group", back_populates="user_groups"
    )
    permission_grants: Mapped[list["PermissionGrant"]] = relationship(
        "PermissionGrant", back_populates="group", cascade="all, delete-orphan"
    )


"""Tables related to Token Rate Limiting
NOTE: `TokenRateLimit` is partially an MIT feature (global rate limit)
"""


class TokenRateLimit(Base):
    __tablename__ = "token_rate_limit"

    id: Mapped[int] = mapped_column(primary_key=True)
    enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
    token_budget: Mapped[int] = mapped_column(Integer, nullable=False)
    period_hours: Mapped[int] = mapped_column(Integer, nullable=False)
    scope: Mapped[TokenRateLimitScope] = mapped_column(
        Enum(TokenRateLimitScope, native_enum=False)
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )


class TokenRateLimit__UserGroup(Base):
    __tablename__ = "token_rate_limit__user_group"

    rate_limit_id: Mapped[int] = mapped_column(
        ForeignKey("token_rate_limit.id"), primary_key=True
    )
    user_group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id"), primary_key=True
    )


class StandardAnswerCategory(Base):
    __tablename__ = "standard_answer_category"

    id: Mapped[int] = mapped_column(primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True)
    standard_answers: Mapped[list["StandardAnswer"]] = relationship(
        "StandardAnswer",
        secondary=StandardAnswer__StandardAnswerCategory.__table__,
        back_populates="categories",
    )
    slack_channel_configs: Mapped[list["SlackChannelConfig"]] = relationship(
        "SlackChannelConfig",
        secondary=SlackChannelConfig__StandardAnswerCategory.__table__,
        back_populates="standard_answer_categories",
    )


class StandardAnswer(Base):
    __tablename__ = "standard_answer"

    id: Mapped[int] = mapped_column(primary_key=True)
    keyword: Mapped[str] = mapped_column(String)
    answer: Mapped[str] = mapped_column(String)
    active: Mapped[bool] = mapped_column(Boolean)
    match_regex: Mapped[bool] = mapped_column(Boolean)
    match_any_keywords: Mapped[bool] = mapped_column(Boolean)

    __table_args__ = (
        Index(
            "unique_keyword_active",
            keyword,
            active,
            unique=True,
            postgresql_where=(active == True),  # noqa: E712
        ),
    )

    categories: Mapped[list[StandardAnswerCategory]] = relationship(
        "StandardAnswerCategory",
        secondary=StandardAnswer__StandardAnswerCategory.__table__,
        back_populates="standard_answers",
    )
    chat_messages: Mapped[list[ChatMessage]] = relationship(
        "ChatMessage",
        secondary=ChatMessage__StandardAnswer.__table__,
        back_populates="standard_answers",
    )


class BackgroundError(Base):
    """Important background errors. Serves to:
    1. Ensure that important logs are kept around and not lost on rotation/container restarts
    2. A trail for high-signal events so that the debugger doesn't need to remember/know every
       possible relevant log line.
    """

    __tablename__ = "background_error"

    id: Mapped[int] = mapped_column(primary_key=True)
    message: Mapped[str] = mapped_column(String)
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    # option to link the error to a specific CC Pair
    cc_pair_id: Mapped[int | None] = mapped_column(
        ForeignKey("connector_credential_pair.id", ondelete="CASCADE"), nullable=True
    )

    cc_pair: Mapped["ConnectorCredentialPair | None"] = relationship(
        "ConnectorCredentialPair", back_populates="background_errors"
    )


"""Tables related to Permission Sync"""


class User__ExternalUserGroupId(Base):
    """Maps user info both internal and external to the name of the external group
    This maps the user to all of their external groups so that the external group name can be
    attached to the ACL list matching during query time. User level permissions can be handled by
    directly adding the Onyx user to the doc ACL list"""

    __tablename__ = "user__external_user_group_id"

    user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True)
    # These group ids have been prefixed by the source type
    external_user_group_id: Mapped[str] = mapped_column(String, primary_key=True)
    cc_pair_id: Mapped[int] = mapped_column(
        ForeignKey("connector_credential_pair.id"), primary_key=True
    )

    # Signifies whether or not the group should be cleaned up at the end of a
    # group sync run.
    stale: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    __table_args__ = (
        Index(
            "ix_user_external_group_cc_pair_stale",
            "cc_pair_id",
            "stale",
        ),
        Index(
            "ix_user_external_group_stale",
            "stale",
        ),
    )


class PublicExternalUserGroup(Base):
    """Stores all public external user "groups".

    For example, things like Google Drive folders that are marked
    as `Anyone with the link` or `Anyone in the domain`
    """

    __tablename__ = "public_external_user_group"

    external_user_group_id: Mapped[str] = mapped_column(String, primary_key=True)
    cc_pair_id: Mapped[int] = mapped_column(
        ForeignKey("connector_credential_pair.id", ondelete="CASCADE"), primary_key=True
    )

    # Signifies whether or not the group should be cleaned up at the end of a
    # group sync run.
    stale: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    __table_args__ = (
        Index(
            "ix_public_external_group_cc_pair_stale",
            "cc_pair_id",
            "stale",
        ),
        Index(
            "ix_public_external_group_stale",
            "stale",
        ),
    )


class UsageReport(Base):
    """This stores metadata about usage reports generated by admin including user who generated
    them as well as the period they cover. The actual zip file of the report is stored as a lo
    using the FileRecord
    """

    __tablename__ = "usage_reports"

    id: Mapped[int] = mapped_column(primary_key=True)
    report_name: Mapped[str] = mapped_column(ForeignKey("file_record.file_id"))

    # if None, report was auto-generated
    requestor_user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    period_from: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True)
    )
    period_to: Mapped[datetime.datetime | None] = mapped_column(DateTime(timezone=True))

    requestor = relationship("User")
    file = relationship("FileRecord")


class InputPrompt(Base):
    __tablename__ = "inputprompt"

    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    prompt: Mapped[str] = mapped_column(String)
    content: Mapped[str] = mapped_column(String)
    active: Mapped[bool] = mapped_column(Boolean)
    user: Mapped[User | None] = relationship("User", back_populates="input_prompts")
    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )

    __table_args__ = (
        # Unique constraint on (prompt, user_id) for user-owned prompts
        UniqueConstraint("prompt", "user_id", name="uq_inputprompt_prompt_user_id"),
        # Partial unique index for public prompts (user_id IS NULL)
        Index(
            "uq_inputprompt_prompt_public",
            "prompt",
            unique=True,
            postgresql_where=text("user_id IS NULL"),
        ),
    )


class InputPrompt__User(Base):
    __tablename__ = "inputprompt__user"

    input_prompt_id: Mapped[int] = mapped_column(
        ForeignKey("inputprompt.id"), primary_key=True
    )
    user_id: Mapped[UUID | None] = mapped_column(
        ForeignKey("user.id"), primary_key=True
    )
    disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)


class Project__UserFile(Base):
    __tablename__ = "project__user_file"

    project_id: Mapped[int] = mapped_column(
        ForeignKey("user_project.id"), primary_key=True
    )
    user_file_id: Mapped[UUID] = mapped_column(
        ForeignKey("user_file.id"), primary_key=True
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )

    __table_args__ = (
        Index(
            "ix_project__user_file_project_id_created_at",
            project_id,
            created_at.desc(),
        ),
    )


class UserProject(Base):
    __tablename__ = "user_project"

    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=False)
    name: Mapped[str] = mapped_column(nullable=False)
    description: Mapped[str] = mapped_column(nullable=True)
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    user: Mapped["User"] = relationship(back_populates="projects")
    user_files: Mapped[list["UserFile"]] = relationship(
        "UserFile",
        secondary=Project__UserFile.__table__,
        back_populates="projects",
    )
    chat_sessions: Mapped[list["ChatSession"]] = relationship(
        "ChatSession", back_populates="project", lazy="selectin"
    )
    instructions: Mapped[str] = mapped_column(String)


class UserDocument(str, Enum):
    CHAT = "chat"
    RECENT = "recent"
    FILE = "file"


class UserFile(Base):
    __tablename__ = "user_file"

    id: Mapped[UUID] = mapped_column(PGUUID(as_uuid=True), primary_key=True)
    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=False)
    assistants: Mapped[list["Persona"]] = relationship(
        "Persona",
        secondary=Persona__UserFile.__table__,
        back_populates="user_files",
    )
    file_id: Mapped[str] = mapped_column(nullable=False)
    name: Mapped[str] = mapped_column(nullable=False)
    created_at: Mapped[datetime.datetime] = mapped_column(
        default=datetime.datetime.utcnow
    )
    user: Mapped["User"] = relationship(back_populates="files")
    token_count: Mapped[int | None] = mapped_column(Integer, nullable=True)

    file_type: Mapped[str] = mapped_column(String, nullable=False)

    status: Mapped[UserFileStatus] = mapped_column(
        Enum(UserFileStatus, native_enum=False, name="userfilestatus"),
        nullable=False,
        default=UserFileStatus.PROCESSING,
    )
    needs_project_sync: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=False
    )
    needs_persona_sync: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=False
    )
    last_project_sync_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
    chunk_count: Mapped[int | None] = mapped_column(Integer, nullable=True)
    last_accessed_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    link_url: Mapped[str | None] = mapped_column(String, nullable=True)
    content_type: Mapped[str | None] = mapped_column(String, nullable=True)

    projects: Mapped[list["UserProject"]] = relationship(
        "UserProject",
        secondary=Project__UserFile.__table__,
        back_populates="user_files",
        lazy="selectin",
    )


"""
Multi-tenancy related tables
"""


class PublicBase(DeclarativeBase):
    __abstract__ = True


# Strictly keeps track of the tenant that a given user will authenticate to.
class UserTenantMapping(Base):
    __tablename__ = "user_tenant_mapping"
    __table_args__ = ({"schema": "public"},)

    email: Mapped[str] = mapped_column(String, nullable=False, primary_key=True)
    tenant_id: Mapped[str] = mapped_column(String, nullable=False, primary_key=True)
    active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)

    @validates("email")
    def validate_email(self, key: str, value: str) -> str:  # noqa: ARG002
        return value.lower() if value else value


class AvailableTenant(Base):
    __tablename__ = "available_tenant"
    """
    These entries will only exist ephemerally and are meant to be picked up by new users on registration.
    """

    tenant_id: Mapped[str] = mapped_column(String, primary_key=True, nullable=False)
    alembic_version: Mapped[str] = mapped_column(String, nullable=False)
    date_created: Mapped[datetime.datetime] = mapped_column(DateTime, nullable=False)


# This is a mapping from tenant IDs to anonymous user paths
class TenantAnonymousUserPath(Base):
    __tablename__ = "tenant_anonymous_user_path"

    tenant_id: Mapped[str] = mapped_column(String, primary_key=True, nullable=False)
    anonymous_user_path: Mapped[str] = mapped_column(
        String, nullable=False, unique=True
    )


class MCPServer(Base):
    """Model for storing MCP server configurations"""

    __tablename__ = "mcp_server"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    # Owner email of user who configured this server
    owner: Mapped[str] = mapped_column(String, nullable=False)
    name: Mapped[str] = mapped_column(String, nullable=False)
    description: Mapped[str | None] = mapped_column(String, nullable=True)
    server_url: Mapped[str] = mapped_column(String, nullable=False)
    # Transport type for connecting to the MCP server
    transport: Mapped[MCPTransport | None] = mapped_column(
        Enum(MCPTransport, native_enum=False), nullable=True
    )
    # Auth type: "none", "api_token", or "oauth"
    auth_type: Mapped[MCPAuthenticationType | None] = mapped_column(
        Enum(MCPAuthenticationType, native_enum=False), nullable=True
    )
    # Who performs authentication for this server (ADMIN or PER_USER)
    auth_performer: Mapped[MCPAuthenticationPerformer | None] = mapped_column(
        Enum(MCPAuthenticationPerformer, native_enum=False), nullable=True
    )
    # Status tracking for configuration flow
    status: Mapped[MCPServerStatus] = mapped_column(
        Enum(MCPServerStatus, native_enum=False),
        nullable=False,
        server_default="CREATED",
    )
    # Admin connection config - used for the config page
    # and (when applicable) admin-managed auth
    # and (when applicable) per-user auth
    admin_connection_config_id: Mapped[int | None] = mapped_column(
        Integer,
        ForeignKey("mcp_connection_config.id", ondelete="SET NULL"),
        nullable=True,
    )

    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )
    last_refreshed_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    # Relationships
    admin_connection_config: Mapped["MCPConnectionConfig | None"] = relationship(
        "MCPConnectionConfig",
        foreign_keys=[admin_connection_config_id],
        back_populates="admin_servers",
    )

    user_connection_configs: Mapped[list["MCPConnectionConfig"]] = relationship(
        "MCPConnectionConfig",
        foreign_keys="MCPConnectionConfig.mcp_server_id",
        back_populates="mcp_server",
        passive_deletes=True,
    )
    current_actions: Mapped[list["Tool"]] = relationship(
        "Tool", back_populates="mcp_server", cascade="all, delete-orphan"
    )
    # Many-to-many relationships for access control
    users: Mapped[list["User"]] = relationship(
        "User", secondary="mcp_server__user", back_populates="accessible_mcp_servers"
    )
    user_groups: Mapped[list["UserGroup"]] = relationship(
        "UserGroup",
        secondary="mcp_server__user_group",
        back_populates="accessible_mcp_servers",
    )


class MCPServer__User(Base):
    __tablename__ = "mcp_server__user"
    mcp_server_id: Mapped[int] = mapped_column(
        ForeignKey("mcp_server.id", ondelete="CASCADE"), primary_key=True
    )
    user_id: Mapped[UUID] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), primary_key=True
    )


class MCPServer__UserGroup(Base):
    __tablename__ = "mcp_server__user_group"
    mcp_server_id: Mapped[int] = mapped_column(
        ForeignKey("mcp_server.id"), primary_key=True
    )
    user_group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id"), primary_key=True
    )


class MCPConnectionConfig(Base):
    """Model for storing MCP connection configurations (credentials, auth data)"""

    __tablename__ = "mcp_connection_config"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    # Server this config is for (nullable for template configs)
    mcp_server_id: Mapped[int | None] = mapped_column(
        Integer, ForeignKey("mcp_server.id", ondelete="CASCADE"), nullable=True
    )
    # User email this config is for (empty for admin configs and templates)
    user_email: Mapped[str] = mapped_column(String, nullable=False, default="")
    # Config data stored as JSON
    # Format: {
    #   "refresh_token": "<token>",  # OAuth only
    #   "access_token": "<token>",   # OAuth only
    #   "headers": {"key": "value", "key2": "value2"},
    #   "header_substitutions": {"<key>": "<value>"}, # stored header template substitutions
    #   "request_body": ["path/in/body:value", "path2/in2/body2:value2"] # TBD
    #   "client_id": "<id>",  # For dynamically registered OAuth clients
    #   "client_secret": "<secret>",  # For confidential clients
    #   "registration_access_token": "<token>",  # For managing registration
    #   "registration_client_uri": "<uri>",  # For managing registration
    # }
    config: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(
        EncryptedJson(), nullable=False, default=dict
    )

    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )

    # Relationships
    mcp_server: Mapped["MCPServer | None"] = relationship(
        "MCPServer",
        foreign_keys=[mcp_server_id],
        back_populates="user_connection_configs",
    )
    admin_servers: Mapped[list["MCPServer"]] = relationship(
        "MCPServer",
        foreign_keys="MCPServer.admin_connection_config_id",
        back_populates="admin_connection_config",
    )

    __table_args__ = (
        Index("ix_mcp_connection_config_user_email", "user_email"),
        Index("ix_mcp_connection_config_server_user", "mcp_server_id", "user_email"),
    )


"""
Permission Sync Tables
"""


class DocPermissionSyncAttempt(Base):
    """
    Represents an attempt to sync document permissions for a connector credential pair.
    Similar to IndexAttempt but specifically for document permission syncing operations.
    """

    __tablename__ = "doc_permission_sync_attempt"

    id: Mapped[int] = mapped_column(primary_key=True)

    connector_credential_pair_id: Mapped[int] = mapped_column(
        ForeignKey("connector_credential_pair.id"),
        nullable=False,
    )

    # Status of the sync attempt
    status: Mapped[PermissionSyncStatus] = mapped_column(
        Enum(PermissionSyncStatus, native_enum=False, index=True)
    )

    # Counts for tracking progress
    total_docs_synced: Mapped[int | None] = mapped_column(Integer, default=0)
    docs_with_permission_errors: Mapped[int | None] = mapped_column(Integer, default=0)

    # Error message if sync fails
    error_message: Mapped[str | None] = mapped_column(Text, default=None)

    # Timestamps
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        index=True,
    )
    time_started: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )
    time_finished: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )

    # Relationships
    connector_credential_pair: Mapped[ConnectorCredentialPair] = relationship(
        "ConnectorCredentialPair"
    )

    __table_args__ = (
        Index(
            "ix_permission_sync_attempt_latest_for_cc_pair",
            "connector_credential_pair_id",
            "time_created",
        ),
        Index(
            "ix_permission_sync_attempt_status_time",
            "status",
            desc("time_finished"),
        ),
    )

    def __repr__(self) -> str:
        return f"<DocPermissionSyncAttempt(id={self.id!r}, status={self.status!r})>"

    def is_finished(self) -> bool:
        return self.status.is_terminal()


class ExternalGroupPermissionSyncAttempt(Base):
    """
    Represents an attempt to sync external group memberships for users.
    This tracks the syncing of user-to-external-group mappings across connectors.
    """

    __tablename__ = "external_group_permission_sync_attempt"

    id: Mapped[int] = mapped_column(primary_key=True)

    # Can be tied to a specific connector or be a global group sync
    connector_credential_pair_id: Mapped[int | None] = mapped_column(
        ForeignKey("connector_credential_pair.id"),
        nullable=True,  # Nullable for global group syncs across all connectors
    )

    # Status of the group sync attempt
    status: Mapped[PermissionSyncStatus] = mapped_column(
        Enum(PermissionSyncStatus, native_enum=False, index=True)
    )

    # Counts for tracking progress
    total_users_processed: Mapped[int | None] = mapped_column(Integer, default=0)
    total_groups_processed: Mapped[int | None] = mapped_column(Integer, default=0)
    total_group_memberships_synced: Mapped[int | None] = mapped_column(
        Integer, default=0
    )

    # Error message if sync fails
    error_message: Mapped[str | None] = mapped_column(Text, default=None)

    # Timestamps
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        index=True,
    )
    time_started: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )
    time_finished: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )

    # Relationships
    connector_credential_pair: Mapped[ConnectorCredentialPair | None] = relationship(
        "ConnectorCredentialPair"
    )

    __table_args__ = (
        Index(
            "ix_group_sync_attempt_cc_pair_time",
            "connector_credential_pair_id",
            "time_created",
        ),
        Index(
            "ix_group_sync_attempt_status_time",
            "status",
            desc("time_finished"),
        ),
    )

    def __repr__(self) -> str:
        return f"<ExternalGroupPermissionSyncAttempt(id={self.id!r}, status={self.status!r})>"

    def is_finished(self) -> bool:
        return self.status.is_terminal()


class License(Base):
    """Stores the signed license blob (singleton pattern - only one row)."""

    __tablename__ = "license"
    __table_args__ = (
        # Singleton pattern - unique index on constant ensures only one row
        Index("idx_license_singleton", text("(true)"), unique=True),
    )

    id: Mapped[int] = mapped_column(primary_key=True)
    license_data: Mapped[str] = mapped_column(Text, nullable=False)
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )


class TenantUsage(Base):
    """
    Tracks per-tenant usage statistics within a time window for cloud usage limits.

    Each row represents usage for a specific tenant during a specific time window.
    A new row is created when the window rolls over (typically weekly).
    """

    __tablename__ = "tenant_usage"

    id: Mapped[int] = mapped_column(primary_key=True)

    # The start of the usage tracking window (e.g., start of the week in UTC)
    window_start: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, index=True
    )

    # Cumulative LLM usage cost in cents for the window
    llm_cost_cents: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)

    # Number of chunks indexed during the window
    chunks_indexed: Mapped[int] = mapped_column(Integer, nullable=False, default=0)

    # Number of API calls using API keys or Personal Access Tokens
    api_calls: Mapped[int] = mapped_column(Integer, nullable=False, default=0)

    # Number of non-streaming API calls (more expensive operations)
    non_streaming_api_calls: Mapped[int] = mapped_column(
        Integer, nullable=False, default=0
    )

    # Last updated timestamp for tracking freshness
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )

    __table_args__ = (
        # Ensure only one row per window start (tenant_id is in the schema name)
        UniqueConstraint("window_start", name="uq_tenant_usage_window"),
    )


"""Tables related to Build Mode (CLI Agent Platform)"""


class BuildSession(Base):
    """Stores metadata about CLI agent build sessions."""

    __tablename__ = "build_session"

    id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), primary_key=True, default=uuid4
    )
    user_id: Mapped[UUID | None] = mapped_column(
        PGUUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )
    name: Mapped[str | None] = mapped_column(String, nullable=True)
    status: Mapped[BuildSessionStatus] = mapped_column(
        Enum(BuildSessionStatus, native_enum=False, name="buildsessionstatus"),
        nullable=False,
        default=BuildSessionStatus.ACTIVE,
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    last_activity_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )
    nextjs_port: Mapped[int | None] = mapped_column(Integer, nullable=True)
    demo_data_enabled: Mapped[bool] = mapped_column(
        Boolean, nullable=False, server_default=text("true")
    )
    sharing_scope: Mapped[SharingScope] = mapped_column(
        String,
        nullable=False,
        default=SharingScope.PRIVATE,
        server_default="private",
    )

    # Relationships
    user: Mapped[User | None] = relationship("User", foreign_keys=[user_id])
    artifacts: Mapped[list["Artifact"]] = relationship(
        "Artifact", back_populates="session", cascade="all, delete-orphan"
    )
    messages: Mapped[list["BuildMessage"]] = relationship(
        "BuildMessage", back_populates="session", cascade="all, delete-orphan"
    )
    snapshots: Mapped[list["Snapshot"]] = relationship(
        "Snapshot", back_populates="session", cascade="all, delete-orphan"
    )

    __table_args__ = (
        Index("ix_build_session_user_created", "user_id", desc("created_at")),
        Index("ix_build_session_status", "status"),
    )


class Sandbox(Base):
    """Stores sandbox container metadata for users (one sandbox per user)."""

    __tablename__ = "sandbox"

    id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), primary_key=True, default=uuid4
    )
    user_id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True),
        ForeignKey("user.id", ondelete="CASCADE"),
        nullable=False,
        unique=True,
    )
    container_id: Mapped[str | None] = mapped_column(String, nullable=True)
    status: Mapped[SandboxStatus] = mapped_column(
        Enum(SandboxStatus, native_enum=False, name="sandboxstatus"),
        nullable=False,
        default=SandboxStatus.PROVISIONING,
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    last_heartbeat: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    # Relationships
    user: Mapped[User] = relationship("User")

    __table_args__ = (
        Index("ix_sandbox_status", "status"),
        Index("ix_sandbox_container_id", "container_id"),
    )


class Artifact(Base):
    """Stores metadata about artifacts generated by CLI agents."""

    __tablename__ = "artifact"

    id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), primary_key=True, default=uuid4
    )
    session_id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True),
        ForeignKey("build_session.id", ondelete="CASCADE"),
        nullable=False,
    )
    type: Mapped[ArtifactType] = mapped_column(
        Enum(ArtifactType, native_enum=False, name="artifacttype"), nullable=False
    )
    # path of artifact in sandbox relative to outputs/
    path: Mapped[str] = mapped_column(String, nullable=False)
    name: Mapped[str] = mapped_column(String, nullable=False)
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )

    # Relationships
    session: Mapped[BuildSession] = relationship(
        "BuildSession", back_populates="artifacts"
    )

    __table_args__ = (
        Index("ix_artifact_session_created", "session_id", desc("created_at")),
        Index("ix_artifact_type", "type"),
    )


class Snapshot(Base):
    """Stores metadata about session output snapshots."""

    __tablename__ = "snapshot"

    id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), primary_key=True, default=uuid4
    )
    session_id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True),
        ForeignKey("build_session.id", ondelete="CASCADE"),
        nullable=False,
    )
    storage_path: Mapped[str] = mapped_column(String, nullable=False)
    size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )

    # Relationships
    session: Mapped[BuildSession] = relationship(
        "BuildSession", back_populates="snapshots"
    )

    __table_args__ = (
        Index("ix_snapshot_session_created", "session_id", desc("created_at")),
    )


class BuildMessage(Base):
    """Stores messages exchanged in build sessions.

    All message data is stored in message_metadata as JSON (the raw ACP packet).
    The turn_index groups all assistant responses under the user prompt they respond to.

    Packet types stored in message_metadata:
    - user_message: {type: "user_message", content: {...}}
    - agent_message: {type: "agent_message", content: {...}} (accumulated from chunks)
    - agent_thought: {type: "agent_thought", content: {...}} (accumulated from chunks)
    - tool_call_progress: {type: "tool_call_progress", status: "completed", ...} (only completed)
    - agent_plan_update: {type: "agent_plan_update", entries: [...]} (upserted, latest only)
    """

    __tablename__ = "build_message"

    id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True), primary_key=True, default=uuid4
    )
    session_id: Mapped[UUID] = mapped_column(
        PGUUID(as_uuid=True),
        ForeignKey("build_session.id", ondelete="CASCADE"),
        nullable=False,
    )
    turn_index: Mapped[int] = mapped_column(Integer, nullable=False)
    type: Mapped[MessageType] = mapped_column(
        Enum(MessageType, native_enum=False, name="messagetype"), nullable=False
    )
    message_metadata: Mapped[dict[str, Any]] = mapped_column(PGJSONB, nullable=False)
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )

    # Relationships
    session: Mapped[BuildSession] = relationship(
        "BuildSession", back_populates="messages"
    )

    __table_args__ = (
        Index(
            "ix_build_message_session_turn", "session_id", "turn_index", "created_at"
        ),
    )


"""
SCIM 2.0 Provisioning Models (Enterprise Edition only)
Used for automated user/group provisioning from identity providers (Okta, Azure AD).
"""


class ScimToken(Base):
    """Bearer tokens for IdP SCIM authentication."""

    __tablename__ = "scim_token"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, nullable=False)
    hashed_token: Mapped[str] = mapped_column(
        String(64), unique=True, nullable=False
    )  # SHA256 = 64 hex chars
    token_display: Mapped[str] = mapped_column(
        String, nullable=False
    )  # Last 4 chars for UI identification

    created_by_id: Mapped[UUID] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), nullable=False
    )

    is_active: Mapped[bool] = mapped_column(
        Boolean, server_default=text("true"), nullable=False
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    last_used_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )

    created_by: Mapped[User] = relationship("User", foreign_keys=[created_by_id])


class ScimUserMapping(Base):
    """Maps SCIM externalId from the IdP to an Onyx User."""

    __tablename__ = "scim_user_mapping"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    external_id: Mapped[str | None] = mapped_column(
        String, unique=True, index=True, nullable=True
    )
    user_id: Mapped[UUID] = mapped_column(
        ForeignKey("user.id", ondelete="CASCADE"), unique=True, nullable=False
    )
    scim_username: Mapped[str | None] = mapped_column(String, nullable=True)
    department: Mapped[str | None] = mapped_column(String, nullable=True)
    manager: Mapped[str | None] = mapped_column(String, nullable=True)
    given_name: Mapped[str | None] = mapped_column(String, nullable=True)
    family_name: Mapped[str | None] = mapped_column(String, nullable=True)
    scim_emails_json: Mapped[str | None] = mapped_column(Text, nullable=True)

    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )

    user: Mapped[User] = relationship("User", foreign_keys=[user_id])


class ScimGroupMapping(Base):
    """Maps SCIM externalId from the IdP to an Onyx UserGroup."""

    __tablename__ = "scim_group_mapping"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    external_id: Mapped[str] = mapped_column(String, unique=True, index=True)
    user_group_id: Mapped[int] = mapped_column(
        ForeignKey("user_group.id", ondelete="CASCADE"), unique=True, nullable=False
    )

    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )

    user_group: Mapped[UserGroup] = relationship(
        "UserGroup", foreign_keys=[user_group_id]
    )


class CodeInterpreterServer(Base):
    """Details about the code interpreter server"""

    __tablename__ = "code_interpreter_server"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    server_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)


class CacheStore(Base):
    """Key-value cache table used by ``PostgresCacheBackend``.

    Replaces Redis for simple KV caching, locks, and list operations
    when ``CACHE_BACKEND=postgres`` (NO_VECTOR_DB deployments).

    Intentionally separate from ``KVStore``:
    - Stores raw bytes (LargeBinary) vs JSONB, matching Redis semantics.
    - Has ``expires_at`` for TTL; rows are periodically garbage-collected.
    - Holds ephemeral data (tokens, stop signals, lock state) not
      persistent application config, so cleanup can be aggressive.
    """

    __tablename__ = "cache_store"

    key: Mapped[str] = mapped_column(String, primary_key=True)
    value: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
    expires_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )


class Hook(Base):
    """Pairs a HookPoint with a customer-provided API endpoint.

    At most one non-deleted Hook per HookPoint is allowed, enforced by a
    partial unique index on (hook_point) where deleted=false.
    """

    __tablename__ = "hook"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, nullable=False)
    hook_point: Mapped[HookPoint] = mapped_column(
        Enum(HookPoint, native_enum=False), nullable=False
    )
    endpoint_url: Mapped[str | None] = mapped_column(Text, nullable=True)
    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(
        EncryptedString(), nullable=True
    )
    is_reachable: Mapped[bool | None] = mapped_column(
        Boolean, nullable=True, default=None
    )  # null = never validated, true = last check passed, false = last check failed
    fail_strategy: Mapped[HookFailStrategy] = mapped_column(
        Enum(HookFailStrategy, native_enum=False),
        nullable=False,
        default=HookFailStrategy.HARD,
    )
    timeout_seconds: Mapped[float] = mapped_column(Float, nullable=False, default=30.0)
    is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    deleted: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    creator_id: Mapped[UUID | None] = mapped_column(
        PGUUID(as_uuid=True),
        ForeignKey("user.id", ondelete="SET NULL"),
        nullable=True,
    )
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
        onupdate=func.now(),
        nullable=False,
    )

    creator: Mapped["User | None"] = relationship("User", foreign_keys=[creator_id])
    execution_logs: Mapped[list["HookExecutionLog"]] = relationship(
        "HookExecutionLog", back_populates="hook", cascade="all, delete-orphan"
    )

    __table_args__ = (
        Index(
            "ix_hook_one_non_deleted_per_point",
            "hook_point",
            unique=True,
            postgresql_where=(deleted == False),  # noqa: E712
        ),
    )


class HookExecutionLog(Base):
    """Records hook executions for health monitoring and debugging.

    Currently only failures are logged; the is_success column exists so
    success logging can be added later without a schema change.
    Retention: rows older than 30 days are deleted by a nightly Celery task.
    """

    __tablename__ = "hook_execution_log"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    hook_id: Mapped[int] = mapped_column(
        Integer,
        ForeignKey("hook.id", ondelete="CASCADE"),
        nullable=False,
        index=True,
    )
    is_success: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
    status_code: Mapped[int | None] = mapped_column(Integer, nullable=True)
    duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
    )

    hook: Mapped["Hook"] = relationship("Hook", back_populates="execution_logs")


================================================
FILE: backend/onyx/db/notification.py
================================================
from datetime import datetime
from datetime import timezone
from uuid import UUID

from sqlalchemy import cast
from sqlalchemy import select
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import Session
from sqlalchemy.sql import func

from onyx.auth.schemas import UserRole
from onyx.configs.constants import NotificationType
from onyx.db.models import Notification
from onyx.db.models import User


def create_notification(
    user_id: UUID | None,
    notif_type: NotificationType,
    db_session: Session,
    title: str,
    description: str | None = None,
    additional_data: dict | None = None,
    autocommit: bool = True,
) -> Notification:
    # Previously, we only matched the first identical, undismissed notification
    # Now, we assume some uniqueness to notifications
    # If we previously issued a notification that was dismissed, we no longer issue a new one

    # Normalize additional_data to match the unique index behavior
    # The index uses COALESCE(additional_data, '{}'::jsonb)
    # We need to match this logic in our query
    additional_data_normalized = additional_data if additional_data is not None else {}

    existing_notification = (
        db_session.query(Notification)
        .filter_by(user_id=user_id, notif_type=notif_type)
        .filter(
            func.coalesce(Notification.additional_data, cast({}, postgresql.JSONB))
            == additional_data_normalized
        )
        .first()
    )

    if existing_notification:
        # Update the last_shown timestamp if the notification is not dismissed
        if not existing_notification.dismissed:
            existing_notification.last_shown = func.now()
            if autocommit:
                db_session.commit()
        return existing_notification

    # Create a new notification if none exists
    notification = Notification(
        user_id=user_id,
        notif_type=notif_type,
        title=title,
        description=description,
        dismissed=False,
        last_shown=func.now(),
        first_shown=func.now(),
        additional_data=additional_data,
    )
    db_session.add(notification)
    if autocommit:
        db_session.commit()
    return notification


def get_notification_by_id(
    notification_id: int, user: User, db_session: Session
) -> Notification:
    user_id = user.id
    notif = db_session.get(Notification, notification_id)
    if not notif:
        raise ValueError(f"No notification found with id {notification_id}")
    if notif.user_id != user_id and not (
        notif.user_id is None and user is not None and user.role == UserRole.ADMIN
    ):
        raise PermissionError(
            f"User {user_id} is not authorized to access notification {notification_id}"
        )
    return notif


def get_notifications(
    user: User | None,
    db_session: Session,
    notif_type: NotificationType | None = None,
    include_dismissed: bool = True,
) -> list[Notification]:
    query = select(Notification).where(
        Notification.user_id == user.id if user else Notification.user_id.is_(None)
    )
    if not include_dismissed:
        query = query.where(Notification.dismissed.is_(False))
    if notif_type:
        query = query.where(Notification.notif_type == notif_type)
    # Sort: undismissed first, then by date (newest first)
    query = query.order_by(
        Notification.dismissed.asc(),
        Notification.first_shown.desc(),
    )
    return list(db_session.execute(query).scalars().all())


def dismiss_all_notifications(
    notif_type: NotificationType,
    db_session: Session,
) -> None:
    db_session.query(Notification).filter(Notification.notif_type == notif_type).update(
        {"dismissed": True}
    )
    db_session.commit()


def dismiss_notification(notification: Notification, db_session: Session) -> None:
    notification.dismissed = True
    db_session.commit()


def batch_dismiss_notifications(
    notifications: list[Notification],
    db_session: Session,
) -> None:
    for notification in notifications:
        notification.dismissed = True
    db_session.commit()


def batch_create_notifications(
    user_ids: list[UUID],
    notif_type: NotificationType,
    db_session: Session,
    title: str,
    description: str | None = None,
    additional_data: dict | None = None,
) -> int:
    """
    Create notifications for multiple users in a single batch operation.
    Uses ON CONFLICT DO NOTHING for atomic idempotent inserts - if a user already
    has a notification with the same (user_id, notif_type, additional_data), the
    insert is silently skipped.

    Returns the number of notifications created.

    Relies on unique index on (user_id, notif_type, COALESCE(additional_data, '{}'))
    """
    if not user_ids:
        return 0

    now = datetime.now(timezone.utc)
    # Use empty dict instead of None to match COALESCE behavior in the unique index
    additional_data_normalized = additional_data if additional_data is not None else {}

    values = [
        {
            "user_id": uid,
            "notif_type": notif_type.value,
            "title": title,
            "description": description,
            "dismissed": False,
            "last_shown": now,
            "first_shown": now,
            "additional_data": additional_data_normalized,
        }
        for uid in user_ids
    ]

    stmt = insert(Notification).values(values).on_conflict_do_nothing()
    result = db_session.execute(stmt)
    db_session.commit()

    # rowcount returns number of rows inserted (excludes conflicts)
    # CursorResult has rowcount but session.execute type hints are too broad
    return result.rowcount if result.rowcount >= 0 else 0  # type: ignore[attr-defined]


def update_notification_last_shown(
    notification: Notification, db_session: Session
) -> None:
    notification.last_shown = func.now()
    db_session.commit()


================================================
FILE: backend/onyx/db/oauth_config.py
================================================
from typing import Any
from uuid import UUID

from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.models import OAuthConfig
from onyx.db.models import OAuthUserToken
from onyx.db.models import Tool
from onyx.utils.logger import setup_logger


logger = setup_logger()


# OAuth Config CRUD operations


def create_oauth_config(
    name: str,
    authorization_url: str,
    token_url: str,
    client_id: str,
    client_secret: str,
    scopes: list[str] | None,
    additional_params: dict[str, str] | None,
    db_session: Session,
) -> OAuthConfig:
    """Create a new OAuth configuration"""
    oauth_config = OAuthConfig(
        name=name,
        authorization_url=authorization_url,
        token_url=token_url,
        client_id=client_id,
        client_secret=client_secret,
        scopes=scopes,
        additional_params=additional_params,
    )
    db_session.add(oauth_config)
    db_session.commit()
    return oauth_config


def get_oauth_config(oauth_config_id: int, db_session: Session) -> OAuthConfig | None:
    """Get OAuth configuration by ID"""
    return db_session.scalar(
        select(OAuthConfig).where(OAuthConfig.id == oauth_config_id)
    )


def get_oauth_configs(db_session: Session) -> list[OAuthConfig]:
    """Get all OAuth configurations"""
    return list(db_session.scalars(select(OAuthConfig)).all())


def update_oauth_config(
    oauth_config_id: int,
    db_session: Session,
    name: str | None = None,
    authorization_url: str | None = None,
    token_url: str | None = None,
    client_id: str | None = None,
    client_secret: str | None = None,
    scopes: list[str] | None = None,
    additional_params: dict[str, Any] | None = None,
    clear_client_id: bool = False,
    clear_client_secret: bool = False,
) -> OAuthConfig:
    """
    Update OAuth configuration.

    NOTE: If client_id or client_secret are None, existing values are preserved.
    To clear these values, set clear_client_id or clear_client_secret to True.
    This allows partial updates without re-entering secrets.
    """
    oauth_config = db_session.scalar(
        select(OAuthConfig).where(OAuthConfig.id == oauth_config_id)
    )
    if oauth_config is None:
        raise ValueError(f"OAuth config with id {oauth_config_id} does not exist")

    # Update only provided fields
    if name is not None:
        oauth_config.name = name
    if authorization_url is not None:
        oauth_config.authorization_url = authorization_url
    if token_url is not None:
        oauth_config.token_url = token_url
    if clear_client_id:
        oauth_config.client_id = ""  # type: ignore[assignment]
    elif client_id is not None:
        oauth_config.client_id = client_id  # type: ignore[assignment]
    if clear_client_secret:
        oauth_config.client_secret = ""  # type: ignore[assignment]
    elif client_secret is not None:
        oauth_config.client_secret = client_secret  # type: ignore[assignment]
    if scopes is not None:
        oauth_config.scopes = scopes
    if additional_params is not None:
        oauth_config.additional_params = additional_params

    db_session.commit()
    return oauth_config


def delete_oauth_config(oauth_config_id: int, db_session: Session) -> None:
    """
    Delete OAuth configuration.

    Sets oauth_config_id to NULL for associated tools due to SET NULL foreign key.
    Cascades delete to user tokens.
    """
    oauth_config = db_session.scalar(
        select(OAuthConfig).where(OAuthConfig.id == oauth_config_id)
    )
    if oauth_config is None:
        raise ValueError(f"OAuth config with id {oauth_config_id} does not exist")

    db_session.delete(oauth_config)
    db_session.commit()


# User Token operations


def get_user_oauth_token(
    oauth_config_id: int, user_id: UUID, db_session: Session
) -> OAuthUserToken | None:
    """Get user's OAuth token for a specific configuration"""
    return db_session.scalar(
        select(OAuthUserToken).where(
            OAuthUserToken.oauth_config_id == oauth_config_id,
            OAuthUserToken.user_id == user_id,
        )
    )


def get_all_user_oauth_tokens(
    user_id: UUID, db_session: Session
) -> list[OAuthUserToken]:
    """
    Get all user OAuth tokens.
    """
    stmt = select(OAuthUserToken).where(OAuthUserToken.user_id == user_id)

    return list(db_session.scalars(stmt).all())


def upsert_user_oauth_token(
    oauth_config_id: int, user_id: UUID, token_data: dict, db_session: Session
) -> OAuthUserToken:
    """Insert or update user's OAuth token for a specific configuration"""
    existing_token = get_user_oauth_token(oauth_config_id, user_id, db_session)

    if existing_token:
        # Update existing token
        existing_token.token_data = token_data  # type: ignore[assignment]
        db_session.commit()
        return existing_token
    else:
        # Create new token
        new_token = OAuthUserToken(
            oauth_config_id=oauth_config_id,
            user_id=user_id,
            token_data=token_data,
        )
        db_session.add(new_token)
        db_session.commit()
        return new_token


def delete_user_oauth_token(
    oauth_config_id: int, user_id: UUID, db_session: Session
) -> None:
    """Delete user's OAuth token for a specific configuration"""
    user_token = get_user_oauth_token(oauth_config_id, user_id, db_session)
    if user_token is None:
        raise ValueError(
            f"OAuth token for user {user_id} and config {oauth_config_id} does not exist"
        )

    db_session.delete(user_token)
    db_session.commit()


# Helper operations


def get_tools_by_oauth_config(oauth_config_id: int, db_session: Session) -> list[Tool]:
    """Get all tools that use a specific OAuth configuration"""
    return list(
        db_session.scalars(
            select(Tool).where(Tool.oauth_config_id == oauth_config_id)
        ).all()
    )


================================================
FILE: backend/onyx/db/opensearch_migration.py
================================================
"""Database operations for OpenSearch migration tracking.

This module provides functions to track the progress of migrating documents
from Vespa to OpenSearch.
"""

import json
from datetime import datetime
from datetime import timezone

from sqlalchemy import select
from sqlalchemy import text
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import Session

from onyx.background.celery.tasks.opensearch_migration.constants import (
    GET_VESPA_CHUNKS_SLICE_COUNT,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
    TOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE,
)
from onyx.configs.app_configs import ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX
from onyx.db.enums import OpenSearchDocumentMigrationStatus
from onyx.db.models import Document
from onyx.db.models import OpenSearchDocumentMigrationRecord
from onyx.db.models import OpenSearchTenantMigrationRecord
from onyx.document_index.vespa.shared_utils.utils import (
    replace_invalid_doc_id_characters,
)
from onyx.utils.logger import setup_logger

logger = setup_logger()


def get_paginated_document_batch(
    db_session: Session,
    limit: int,
    prev_ending_document_id: str | None = None,
) -> list[str]:
    """Gets a paginated batch of document IDs from the Document table.

    We need some deterministic ordering to ensure that we don't miss any
    documents when paginating. This function uses the document ID. It is
    possible a document is inserted above a spot this function has already
    passed. In that event we assume that the document will be indexed into
    OpenSearch anyway and we don't need to migrate.
    TODO(andrei): Consider ordering on last_modified in addition to ID to better
    match get_opensearch_migration_records_needing_migration.

    Args:
        db_session: SQLAlchemy session.
        limit: Number of document IDs to fetch.
        prev_ending_document_id: Document ID to start after (for pagination). If
            None, returns the first batch of documents. If not None, this should
            be the last ordered ID which was fetched in a previous batch.
            Defaults to None.

    Returns:
        List of document IDs.
    """
    stmt = select(Document.id).order_by(Document.id.asc()).limit(limit)
    if prev_ending_document_id is not None:
        stmt = stmt.where(Document.id > prev_ending_document_id)
    return list(db_session.scalars(stmt).all())


def get_last_opensearch_migration_document_id(
    db_session: Session,
) -> str | None:
    """
    Gets the last document ID in the OpenSearchDocumentMigrationRecord table.

    Returns None if no records are found.
    """
    stmt = (
        select(OpenSearchDocumentMigrationRecord.document_id)
        .order_by(OpenSearchDocumentMigrationRecord.document_id.desc())
        .limit(1)
    )
    return db_session.scalars(stmt).first()


def create_opensearch_migration_records_with_commit(
    db_session: Session,
    document_ids: list[str],
) -> None:
    """Creates new OpenSearchDocumentMigrationRecord records.

    Silently skips any document IDs that already have records.
    """
    if not document_ids:
        return

    values = [
        {
            "document_id": document_id,
            "status": OpenSearchDocumentMigrationStatus.PENDING,
        }
        for document_id in document_ids
    ]

    stmt = insert(OpenSearchDocumentMigrationRecord).values(values)
    stmt = stmt.on_conflict_do_nothing(index_elements=["document_id"])

    db_session.execute(stmt)
    db_session.commit()


def get_opensearch_migration_records_needing_migration(
    db_session: Session,
    limit: int,
) -> list[OpenSearchDocumentMigrationRecord]:
    """Gets records of documents that need to be migrated.

    Properties:
    - First tries documents with status PENDING.
    - Of these, orders documents with the oldest last_modified to prioritize
      documents that were modified a long time ago, as they are presumed to be
      stable. This column is modified in many flows so is not a guarantee of the
      document having been indexed.
    - Then if there's room in the result, tries documents with status FAILED.
    - Of these, first orders documents on the least attempts_count so as to have
      a backoff for recently-failed docs. Then orders on last_modified as
      before.
    """
    result: list[OpenSearchDocumentMigrationRecord] = []

    # Step 1: Fetch as many PENDING status records as possible ordered by
    # last_modified (oldest first). last_modified lives on Document, so we join.
    stmt_pending = (
        select(OpenSearchDocumentMigrationRecord)
        .join(Document, OpenSearchDocumentMigrationRecord.document_id == Document.id)
        .where(
            OpenSearchDocumentMigrationRecord.status
            == OpenSearchDocumentMigrationStatus.PENDING
        )
        .order_by(Document.last_modified.asc())
        .limit(limit)
    )
    result.extend(list(db_session.scalars(stmt_pending).all()))
    remaining = limit - len(result)

    # Step 2: If more are needed, fetch records with status FAILED, ordered by
    # attempts_count (lowest first), then last_modified (oldest first).
    if remaining > 0:
        stmt_failed = (
            select(OpenSearchDocumentMigrationRecord)
            .join(
                Document,
                OpenSearchDocumentMigrationRecord.document_id == Document.id,
            )
            .where(
                OpenSearchDocumentMigrationRecord.status
                == OpenSearchDocumentMigrationStatus.FAILED
            )
            .order_by(
                OpenSearchDocumentMigrationRecord.attempts_count.asc(),
                Document.last_modified.asc(),
            )
            .limit(remaining)
        )
        result.extend(list(db_session.scalars(stmt_failed).all()))

    return result


def get_total_opensearch_migration_record_count(
    db_session: Session,
) -> int:
    """Gets the total number of OpenSearch migration records.

    Used to check whether every document has been tracked for migration.
    """
    return db_session.query(OpenSearchDocumentMigrationRecord).count()


def get_total_document_count(db_session: Session) -> int:
    """Gets the total number of documents.

    Used to check whether every document has been tracked for migration.
    """
    return db_session.query(Document).count()


def try_insert_opensearch_tenant_migration_record_with_commit(
    db_session: Session,
) -> None:
    """Tries to insert the singleton row on OpenSearchTenantMigrationRecord.

    Does nothing if the row already exists.
    """
    stmt = insert(OpenSearchTenantMigrationRecord).on_conflict_do_nothing(
        index_elements=[text("(true)")]
    )
    db_session.execute(stmt)
    db_session.commit()


def increment_num_times_observed_no_additional_docs_to_migrate_with_commit(
    db_session: Session,
) -> None:
    """Increments the number of times observed no additional docs to migrate.

    Requires the OpenSearchTenantMigrationRecord to exist.

    Used to track when to stop the migration task.
    """
    record = db_session.query(OpenSearchTenantMigrationRecord).first()
    if record is None:
        raise RuntimeError("OpenSearchTenantMigrationRecord not found.")
    record.num_times_observed_no_additional_docs_to_migrate += 1
    db_session.commit()


def increment_num_times_observed_no_additional_docs_to_populate_migration_table_with_commit(
    db_session: Session,
) -> None:
    """
    Increments the number of times observed no additional docs to populate the
    migration table.

    Requires the OpenSearchTenantMigrationRecord to exist.

    Used to track when to stop the migration check task.
    """
    record = db_session.query(OpenSearchTenantMigrationRecord).first()
    if record is None:
        raise RuntimeError("OpenSearchTenantMigrationRecord not found.")
    record.num_times_observed_no_additional_docs_to_populate_migration_table += 1
    db_session.commit()


def should_document_migration_be_permanently_failed(
    opensearch_document_migration_record: OpenSearchDocumentMigrationRecord,
) -> bool:
    return (
        opensearch_document_migration_record.status
        == OpenSearchDocumentMigrationStatus.PERMANENTLY_FAILED
        or (
            opensearch_document_migration_record.status
            == OpenSearchDocumentMigrationStatus.FAILED
            and opensearch_document_migration_record.attempts_count
            >= TOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE
        )
    )


def get_vespa_visit_state(
    db_session: Session,
) -> tuple[dict[int, str | None], int]:
    """Gets the current Vespa migration state from the tenant migration record.

    Requires the OpenSearchTenantMigrationRecord to exist.

    Returns:
        Tuple of (continuation_token_map, total_chunks_migrated).
    """
    record = db_session.query(OpenSearchTenantMigrationRecord).first()
    if record is None:
        raise RuntimeError("OpenSearchTenantMigrationRecord not found.")
    if record.vespa_visit_continuation_token is None:
        continuation_token_map: dict[int, str | None] = {
            slice_id: None for slice_id in range(GET_VESPA_CHUNKS_SLICE_COUNT)
        }
    else:
        json_loaded_continuation_token_map = json.loads(
            record.vespa_visit_continuation_token
        )
        continuation_token_map = {
            int(key): value for key, value in json_loaded_continuation_token_map.items()
        }
    return continuation_token_map, record.total_chunks_migrated


def update_vespa_visit_progress_with_commit(
    db_session: Session,
    continuation_token_map: dict[int, str | None],
    chunks_processed: int,
    chunks_errored: int,
    approx_chunk_count_in_vespa: int | None,
) -> None:
    """Updates the Vespa migration progress and commits.

    Requires the OpenSearchTenantMigrationRecord to exist.

    Args:
        db_session: SQLAlchemy session.
        continuation_token_map: The new continuation token map. None entry means
            the visit is complete for that slice.
        chunks_processed: Number of chunks processed in this batch (added to
            the running total).
        chunks_errored: Number of chunks errored in this batch (added to the
            running errored total).
        approx_chunk_count_in_vespa: Approximate number of chunks in Vespa. If
            None, the existing value is used.
    """
    record = db_session.query(OpenSearchTenantMigrationRecord).first()
    if record is None:
        raise RuntimeError("OpenSearchTenantMigrationRecord not found.")
    record.vespa_visit_continuation_token = json.dumps(continuation_token_map)
    record.total_chunks_migrated += chunks_processed
    record.total_chunks_errored += chunks_errored
    record.approx_chunk_count_in_vespa = (
        approx_chunk_count_in_vespa
        if approx_chunk_count_in_vespa is not None
        else record.approx_chunk_count_in_vespa
    )
    db_session.commit()


def mark_migration_completed_time_if_not_set_with_commit(
    db_session: Session,
) -> None:
    """Marks the migration completed time if not set.

    Requires the OpenSearchTenantMigrationRecord to exist.
    """
    record = db_session.query(OpenSearchTenantMigrationRecord).first()
    if record is None:
        raise RuntimeError("OpenSearchTenantMigrationRecord not found.")
    if record.migration_completed_at is not None:
        return
    record.migration_completed_at = datetime.now(timezone.utc)
    db_session.commit()


def is_migration_completed(db_session: Session) -> bool:
    """Returns True if the migration is completed.

    Can be run even if the migration record does not exist.
    """
    record = db_session.query(OpenSearchTenantMigrationRecord).first()
    return record is not None and record.migration_completed_at is not None


def build_sanitized_to_original_doc_id_mapping(
    db_session: Session,
) -> dict[str, str]:
    """Pre-computes a mapping of sanitized -> original document IDs.

    Only includes documents whose ID contains single quotes (the only character
    that gets sanitized by replace_invalid_doc_id_characters). For all other
    documents, sanitized == original and no mapping entry is needed.

    Scans over all documents.

    Checks if the sanitized ID already exists as a genuine separate document in
    the Document table. If so, raises as there is no way of resolving the
    conflict in the migration. The user will need to reindex.

    Args:
        db_session: SQLAlchemy session.

    Returns:
        Dict mapping sanitized_id -> original_id, only for documents where
        the IDs differ. Empty dict means no documents have single quotes
        in their IDs.
    """
    # Find all documents with single quotes in their ID.
    stmt = select(Document.id).where(Document.id.contains("'"))
    ids_with_quotes = list(db_session.scalars(stmt).all())

    result: dict[str, str] = {}
    for original_id in ids_with_quotes:
        sanitized_id = replace_invalid_doc_id_characters(original_id)
        if sanitized_id != original_id:
            result[sanitized_id] = original_id

    # See if there are any documents whose ID is a sanitized ID of another
    # document. If there is even one match, we cannot proceed.
    stmt = select(Document.id).where(Document.id.in_(result.keys()))
    ids_with_matches = list(db_session.scalars(stmt).all())
    if ids_with_matches:
        raise RuntimeError(
            f"Documents with IDs {ids_with_matches} have sanitized IDs that match other documents. "
            "This is not supported and the user will need to reindex."
        )

    return result


def get_opensearch_migration_state(
    db_session: Session,
) -> tuple[int, datetime | None, datetime | None, int | None]:
    """Returns the state of the Vespa to OpenSearch migration.

    If the tenant migration record is not found, returns defaults of 0, None,
    None, None.

    Args:
        db_session: SQLAlchemy session.

    Returns:
        Tuple of (total_chunks_migrated, created_at, migration_completed_at,
            approx_chunk_count_in_vespa).
    """
    record = db_session.query(OpenSearchTenantMigrationRecord).first()
    if record is None:
        return 0, None, None, None
    return (
        record.total_chunks_migrated,
        record.created_at,
        record.migration_completed_at,
        record.approx_chunk_count_in_vespa,
    )


def get_opensearch_retrieval_state(
    db_session: Session,
) -> bool:
    """Returns the state of the OpenSearch retrieval.

    If the tenant migration record is not found, defaults to
    ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX.
    """
    record = db_session.query(OpenSearchTenantMigrationRecord).first()
    if record is None:
        return ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX
    return record.enable_opensearch_retrieval


def set_enable_opensearch_retrieval_with_commit(
    db_session: Session,
    enable: bool,
) -> None:
    """Sets the enable_opensearch_retrieval flag on the singleton record.

    Creates the record if it doesn't exist yet.
    """
    try_insert_opensearch_tenant_migration_record_with_commit(db_session)
    record = db_session.query(OpenSearchTenantMigrationRecord).first()
    if record is None:
        raise RuntimeError("OpenSearchTenantMigrationRecord not found.")
    record.enable_opensearch_retrieval = enable
    db_session.commit()


================================================
FILE: backend/onyx/db/pat.py
================================================
"""Database operations for Personal Access Tokens."""

import asyncio
from datetime import datetime
from datetime import timezone
from uuid import UUID

from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session

from onyx.auth.pat import build_displayable_pat
from onyx.auth.pat import calculate_expiration
from onyx.auth.pat import generate_pat
from onyx.auth.pat import hash_pat
from onyx.db.engine.async_sql_engine import get_async_session_context_manager
from onyx.db.models import PersonalAccessToken
from onyx.db.models import User
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id


logger = setup_logger()


async def fetch_user_for_pat(
    hashed_token: str, async_db_session: AsyncSession
) -> User | None:
    """Fetch user associated with PAT. Returns None if invalid, expired, or inactive user.

    NOTE: This is async since it's used during auth (which is necessarily async due to FastAPI Users).
    NOTE: Expired includes both naturally expired and user-revoked tokens (revocation sets expires_at=NOW()).

    Uses select(User) as primary entity so that joined-eager relationships (e.g. oauth_accounts)
    are loaded correctly — matching the pattern in fetch_user_for_api_key.
    """
    now = datetime.now(timezone.utc)

    user = await async_db_session.scalar(
        select(User)
        .join(PersonalAccessToken, PersonalAccessToken.user_id == User.id)
        .where(PersonalAccessToken.hashed_token == hashed_token)
        .where(User.is_active)  # type: ignore
        .where(
            (PersonalAccessToken.expires_at.is_(None))
            | (PersonalAccessToken.expires_at > now)
        )
    )
    if not user:
        return None

    _schedule_pat_last_used_update(hashed_token, now)
    return user


def _schedule_pat_last_used_update(hashed_token: str, now: datetime) -> None:
    """Fire-and-forget update of last_used_at, throttled to 5-minute granularity."""

    async def _update() -> None:
        try:
            tenant_id = get_current_tenant_id()
            async with get_async_session_context_manager(tenant_id) as session:
                pat = await session.scalar(
                    select(PersonalAccessToken).where(
                        PersonalAccessToken.hashed_token == hashed_token
                    )
                )
                if not pat:
                    return
                if (
                    pat.last_used_at is not None
                    and (now - pat.last_used_at).total_seconds() <= 300
                ):
                    return
                await session.execute(
                    update(PersonalAccessToken)
                    .where(PersonalAccessToken.hashed_token == hashed_token)
                    .values(last_used_at=now)
                )
                await session.commit()
        except Exception as e:
            logger.warning(f"Failed to update last_used_at for PAT: {e}")

    asyncio.create_task(_update())


def create_pat(
    db_session: Session,
    user_id: UUID,
    name: str,
    expiration_days: int | None,
) -> tuple[PersonalAccessToken, str]:
    """Create new PAT. Returns (db_record, raw_token).

    Raises ValueError if user is inactive or not found.
    """
    user = db_session.scalar(select(User).where(User.id == user_id))  # type: ignore
    if not user or not user.is_active:
        raise ValueError("Cannot create PAT for inactive or non-existent user")

    tenant_id = get_current_tenant_id()
    raw_token = generate_pat(tenant_id)

    pat = PersonalAccessToken(
        name=name,
        hashed_token=hash_pat(raw_token),
        token_display=build_displayable_pat(raw_token),
        user_id=user_id,
        expires_at=calculate_expiration(expiration_days),
    )
    db_session.add(pat)
    db_session.commit()

    return pat, raw_token


def list_user_pats(db_session: Session, user_id: UUID) -> list[PersonalAccessToken]:
    """List all active (non-expired) PATs for a user."""
    return list(
        db_session.scalars(
            select(PersonalAccessToken)
            .where(PersonalAccessToken.user_id == user_id)
            .where(
                (PersonalAccessToken.expires_at.is_(None))
                | (PersonalAccessToken.expires_at > datetime.now(timezone.utc))
            )
            .order_by(PersonalAccessToken.created_at.desc())
        ).all()
    )


def revoke_pat(db_session: Session, pat_id: int, user_id: UUID) -> bool:
    """Revoke PAT by setting expires_at=NOW() for immediate expiry.

    Returns True if revoked, False if not found, not owned by user, or already expired.
    """
    now = datetime.now(timezone.utc)
    pat = db_session.scalar(
        select(PersonalAccessToken)
        .where(PersonalAccessToken.id == pat_id)
        .where(PersonalAccessToken.user_id == user_id)
        .where(
            (PersonalAccessToken.expires_at.is_(None))
            | (PersonalAccessToken.expires_at > now)
        )  # Only revoke active (non-expired) tokens
    )
    if not pat:
        return False

    # Revoke by setting expires_at to NOW() and marking as revoked for audit trail
    pat.expires_at = now
    pat.is_revoked = True
    db_session.commit()
    return True


================================================
FILE: backend/onyx/db/permission_sync_attempt.py
================================================
"""Permission sync attempt CRUD operations and utilities.

This module contains all CRUD operations for both DocPermissionSyncAttempt
and ExternalGroupPermissionSyncAttempt models, along with shared utilities.
"""

from typing import Any
from typing import cast

from sqlalchemy import delete
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.engine.cursor import CursorResult
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session

from onyx.db.enums import PermissionSyncStatus
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import DocPermissionSyncAttempt
from onyx.db.models import ExternalGroupPermissionSyncAttempt
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType

logger = setup_logger()


# =============================================================================
# DOC PERMISSION SYNC ATTEMPT CRUD
# =============================================================================


def create_doc_permission_sync_attempt(
    connector_credential_pair_id: int,
    db_session: Session,
) -> int:
    """Create a new doc permission sync attempt.

    Args:
        connector_credential_pair_id: The ID of the connector credential pair
        db_session: The database session

    Returns:
        The ID of the created attempt
    """
    attempt = DocPermissionSyncAttempt(
        connector_credential_pair_id=connector_credential_pair_id,
        status=PermissionSyncStatus.NOT_STARTED,
    )
    db_session.add(attempt)
    db_session.commit()

    return attempt.id


def get_doc_permission_sync_attempt(
    db_session: Session,
    attempt_id: int,
    eager_load_connector: bool = False,
) -> DocPermissionSyncAttempt | None:
    """Get a doc permission sync attempt by ID.

    Args:
        db_session: The database session
        attempt_id: The ID of the attempt
        eager_load_connector: If True, eagerly loads the connector and cc_pair relationships

    Returns:
        The attempt if found, None otherwise
    """
    stmt = select(DocPermissionSyncAttempt).where(
        DocPermissionSyncAttempt.id == attempt_id
    )

    if eager_load_connector:
        stmt = stmt.options(
            joinedload(DocPermissionSyncAttempt.connector_credential_pair).joinedload(
                ConnectorCredentialPair.connector
            )
        )

    return db_session.scalars(stmt).first()


def get_latest_doc_permission_sync_attempt_for_cc_pair(
    db_session: Session,
    connector_credential_pair_id: int,
) -> DocPermissionSyncAttempt | None:
    """Get the latest doc permission sync attempt for a connector credential pair."""
    return db_session.execute(
        select(DocPermissionSyncAttempt)
        .where(
            DocPermissionSyncAttempt.connector_credential_pair_id
            == connector_credential_pair_id
        )
        .order_by(DocPermissionSyncAttempt.time_created.desc())
        .limit(1)
    ).scalar_one_or_none()


def get_recent_doc_permission_sync_attempts_for_cc_pair(
    cc_pair_id: int,
    limit: int,
    db_session: Session,
) -> list[DocPermissionSyncAttempt]:
    """Get recent doc permission sync attempts for a cc pair, most recent first."""
    return list(
        db_session.execute(
            select(DocPermissionSyncAttempt)
            .where(DocPermissionSyncAttempt.connector_credential_pair_id == cc_pair_id)
            .order_by(DocPermissionSyncAttempt.time_created.desc())
            .limit(limit)
        ).scalars()
    )


def mark_doc_permission_sync_attempt_in_progress(
    attempt_id: int,
    db_session: Session,
) -> DocPermissionSyncAttempt:
    """Mark a doc permission sync attempt as IN_PROGRESS.
    Locks the row during update."""
    try:
        attempt = db_session.execute(
            select(DocPermissionSyncAttempt)
            .where(DocPermissionSyncAttempt.id == attempt_id)
            .with_for_update()
        ).scalar_one()

        if attempt.status != PermissionSyncStatus.NOT_STARTED:
            raise RuntimeError(
                f"Doc permission sync attempt with ID '{attempt_id}' is not in NOT_STARTED status. "
                f"Current status is '{attempt.status}'."
            )

        attempt.status = PermissionSyncStatus.IN_PROGRESS
        attempt.time_started = func.now()  # type: ignore
        db_session.commit()
        return attempt
    except Exception:
        db_session.rollback()
        logger.exception("mark_doc_permission_sync_attempt_in_progress exceptioned.")
        raise


def mark_doc_permission_sync_attempt_failed(
    attempt_id: int,
    db_session: Session,
    error_message: str,
) -> None:
    """Mark a doc permission sync attempt as failed."""
    try:
        attempt = db_session.execute(
            select(DocPermissionSyncAttempt)
            .where(DocPermissionSyncAttempt.id == attempt_id)
            .with_for_update()
        ).scalar_one()

        if not attempt.time_started:
            attempt.time_started = func.now()  # type: ignore
        attempt.status = PermissionSyncStatus.FAILED
        attempt.time_finished = func.now()  # type: ignore
        attempt.error_message = error_message
        db_session.commit()

        # Add telemetry for permission sync attempt status change
        optional_telemetry(
            record_type=RecordType.PERMISSION_SYNC_COMPLETE,
            data={
                "doc_permission_sync_attempt_id": attempt_id,
                "status": PermissionSyncStatus.FAILED.value,
                "cc_pair_id": attempt.connector_credential_pair_id,
            },
        )
    except Exception:
        db_session.rollback()
        raise


def complete_doc_permission_sync_attempt(
    db_session: Session,
    attempt_id: int,
    total_docs_synced: int,
    docs_with_permission_errors: int,
) -> DocPermissionSyncAttempt:
    """Complete a doc permission sync attempt by updating progress and setting final status.

    This combines the progress update and final status marking into a single operation.
    If there were permission errors, the attempt is marked as COMPLETED_WITH_ERRORS,
    otherwise it's marked as SUCCESS.

    Args:
        db_session: The database session
        attempt_id: The ID of the attempt
        total_docs_synced: Total number of documents synced
        docs_with_permission_errors: Number of documents that had permission errors

    Returns:
        The completed attempt
    """
    try:
        attempt = db_session.execute(
            select(DocPermissionSyncAttempt)
            .where(DocPermissionSyncAttempt.id == attempt_id)
            .with_for_update()
        ).scalar_one()

        # Update progress counters
        attempt.total_docs_synced = (attempt.total_docs_synced or 0) + total_docs_synced
        attempt.docs_with_permission_errors = (
            attempt.docs_with_permission_errors or 0
        ) + docs_with_permission_errors

        # Set final status based on whether there were errors
        if docs_with_permission_errors > 0:
            attempt.status = PermissionSyncStatus.COMPLETED_WITH_ERRORS
        else:
            attempt.status = PermissionSyncStatus.SUCCESS

        attempt.time_finished = func.now()  # type: ignore
        db_session.commit()

        # Add telemetry
        optional_telemetry(
            record_type=RecordType.PERMISSION_SYNC_COMPLETE,
            data={
                "doc_permission_sync_attempt_id": attempt_id,
                "status": attempt.status.value,
                "cc_pair_id": attempt.connector_credential_pair_id,
            },
        )
        return attempt
    except Exception:
        db_session.rollback()
        logger.exception("complete_doc_permission_sync_attempt exceptioned.")
        raise


# =============================================================================
# EXTERNAL GROUP PERMISSION SYNC ATTEMPT CRUD
# =============================================================================


def create_external_group_sync_attempt(
    connector_credential_pair_id: int | None,
    db_session: Session,
) -> int:
    """Create a new external group sync attempt.

    Args:
        connector_credential_pair_id: The ID of the connector credential pair, or None for global syncs
        db_session: The database session

    Returns:
        The ID of the created attempt
    """
    attempt = ExternalGroupPermissionSyncAttempt(
        connector_credential_pair_id=connector_credential_pair_id,
        status=PermissionSyncStatus.NOT_STARTED,
    )
    db_session.add(attempt)
    db_session.commit()

    return attempt.id


def get_external_group_sync_attempt(
    db_session: Session,
    attempt_id: int,
    eager_load_connector: bool = False,
) -> ExternalGroupPermissionSyncAttempt | None:
    """Get an external group sync attempt by ID.

    Args:
        db_session: The database session
        attempt_id: The ID of the attempt
        eager_load_connector: If True, eagerly loads the connector and cc_pair relationships

    Returns:
        The attempt if found, None otherwise
    """
    stmt = select(ExternalGroupPermissionSyncAttempt).where(
        ExternalGroupPermissionSyncAttempt.id == attempt_id
    )

    if eager_load_connector:
        stmt = stmt.options(
            joinedload(
                ExternalGroupPermissionSyncAttempt.connector_credential_pair
            ).joinedload(ConnectorCredentialPair.connector)
        )

    return db_session.scalars(stmt).first()


def get_recent_external_group_sync_attempts_for_cc_pair(
    cc_pair_id: int | None,
    limit: int,
    db_session: Session,
) -> list[ExternalGroupPermissionSyncAttempt]:
    """Get recent external group sync attempts for a cc pair, most recent first.
    If cc_pair_id is None, gets global group sync attempts."""
    stmt = select(ExternalGroupPermissionSyncAttempt)

    if cc_pair_id is not None:
        stmt = stmt.where(
            ExternalGroupPermissionSyncAttempt.connector_credential_pair_id
            == cc_pair_id
        )
    else:
        stmt = stmt.where(
            ExternalGroupPermissionSyncAttempt.connector_credential_pair_id.is_(None)
        )

    return list(
        db_session.execute(
            stmt.order_by(ExternalGroupPermissionSyncAttempt.time_created.desc()).limit(
                limit
            )
        ).scalars()
    )


def mark_external_group_sync_attempt_in_progress(
    attempt_id: int,
    db_session: Session,
) -> ExternalGroupPermissionSyncAttempt:
    """Mark an external group sync attempt as IN_PROGRESS.
    Locks the row during update."""
    try:
        attempt = db_session.execute(
            select(ExternalGroupPermissionSyncAttempt)
            .where(ExternalGroupPermissionSyncAttempt.id == attempt_id)
            .with_for_update()
        ).scalar_one()

        if attempt.status != PermissionSyncStatus.NOT_STARTED:
            raise RuntimeError(
                f"External group sync attempt with ID '{attempt_id}' is not in NOT_STARTED status. "
                f"Current status is '{attempt.status}'."
            )

        attempt.status = PermissionSyncStatus.IN_PROGRESS
        attempt.time_started = func.now()  # type: ignore
        db_session.commit()
        return attempt
    except Exception:
        db_session.rollback()
        logger.exception("mark_external_group_sync_attempt_in_progress exceptioned.")
        raise


def mark_external_group_sync_attempt_failed(
    attempt_id: int,
    db_session: Session,
    error_message: str,
) -> None:
    """Mark an external group sync attempt as failed."""
    try:
        attempt = db_session.execute(
            select(ExternalGroupPermissionSyncAttempt)
            .where(ExternalGroupPermissionSyncAttempt.id == attempt_id)
            .with_for_update()
        ).scalar_one()

        if not attempt.time_started:
            attempt.time_started = func.now()  # type: ignore
        attempt.status = PermissionSyncStatus.FAILED
        attempt.time_finished = func.now()  # type: ignore
        attempt.error_message = error_message
        db_session.commit()

        # Add telemetry for permission sync attempt status change
        optional_telemetry(
            record_type=RecordType.PERMISSION_SYNC_COMPLETE,
            data={
                "external_group_sync_attempt_id": attempt_id,
                "status": PermissionSyncStatus.FAILED.value,
                "cc_pair_id": attempt.connector_credential_pair_id,
            },
        )
    except Exception:
        db_session.rollback()
        raise


def complete_external_group_sync_attempt(
    db_session: Session,
    attempt_id: int,
    total_users_processed: int,
    total_groups_processed: int,
    total_group_memberships_synced: int,
    errors_encountered: int = 0,
) -> ExternalGroupPermissionSyncAttempt:
    """Complete an external group sync attempt by updating progress and setting final status.

    This combines the progress update and final status marking into a single operation.
    If there were errors, the attempt is marked as COMPLETED_WITH_ERRORS,
    otherwise it's marked as SUCCESS.

    Args:
        db_session: The database session
        attempt_id: The ID of the attempt
        total_users_processed: Total users processed
        total_groups_processed: Total groups processed
        total_group_memberships_synced: Total group memberships synced
        errors_encountered: Number of errors encountered (determines if COMPLETED_WITH_ERRORS)

    Returns:
        The completed attempt
    """
    try:
        attempt = db_session.execute(
            select(ExternalGroupPermissionSyncAttempt)
            .where(ExternalGroupPermissionSyncAttempt.id == attempt_id)
            .with_for_update()
        ).scalar_one()

        # Update progress counters
        attempt.total_users_processed = (
            attempt.total_users_processed or 0
        ) + total_users_processed
        attempt.total_groups_processed = (
            attempt.total_groups_processed or 0
        ) + total_groups_processed
        attempt.total_group_memberships_synced = (
            attempt.total_group_memberships_synced or 0
        ) + total_group_memberships_synced

        # Set final status based on whether there were errors
        if errors_encountered > 0:
            attempt.status = PermissionSyncStatus.COMPLETED_WITH_ERRORS
        else:
            attempt.status = PermissionSyncStatus.SUCCESS

        attempt.time_finished = func.now()  # type: ignore
        db_session.commit()

        # Add telemetry
        optional_telemetry(
            record_type=RecordType.PERMISSION_SYNC_COMPLETE,
            data={
                "external_group_sync_attempt_id": attempt_id,
                "status": attempt.status.value,
                "cc_pair_id": attempt.connector_credential_pair_id,
            },
        )
        return attempt
    except Exception:
        db_session.rollback()
        logger.exception("complete_external_group_sync_attempt exceptioned.")
        raise


# =============================================================================
# DELETION FUNCTIONS
# =============================================================================


def delete_doc_permission_sync_attempts__no_commit(
    db_session: Session,
    cc_pair_id: int,
) -> int:
    """Delete all doc permission sync attempts for a connector credential pair.

    This does not commit the transaction. It should be used within an existing transaction.

    Args:
        db_session: The database session
        cc_pair_id: The connector credential pair ID

    Returns:
        The number of attempts deleted
    """
    stmt = delete(DocPermissionSyncAttempt).where(
        DocPermissionSyncAttempt.connector_credential_pair_id == cc_pair_id
    )
    result = cast(CursorResult[Any], db_session.execute(stmt))
    return result.rowcount or 0


def delete_external_group_permission_sync_attempts__no_commit(
    db_session: Session,
    cc_pair_id: int,
) -> int:
    """Delete all external group permission sync attempts for a connector credential pair.

    This does not commit the transaction. It should be used within an existing transaction.

    Args:
        db_session: The database session
        cc_pair_id: The connector credential pair ID

    Returns:
        The number of attempts deleted
    """
    stmt = delete(ExternalGroupPermissionSyncAttempt).where(
        ExternalGroupPermissionSyncAttempt.connector_credential_pair_id == cc_pair_id
    )
    result = cast(CursorResult[Any], db_session.execute(stmt))
    return result.rowcount or 0


================================================
FILE: backend/onyx/db/permissions.py
================================================
"""
DB operations for recomputing user effective_permissions.

These live in onyx/db/ (not onyx/auth/) because they are pure DB operations
that query PermissionGrant rows and update the User.effective_permissions
JSONB column.  Keeping them here avoids circular imports when called from
other onyx/db/ modules such as users.py.
"""

from collections import defaultdict
from uuid import UUID

from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import Session

from onyx.db.models import PermissionGrant
from onyx.db.models import User
from onyx.db.models import User__UserGroup


def recompute_user_permissions__no_commit(
    user_ids: UUID | str | list[UUID] | list[str], db_session: Session
) -> None:
    """Recompute granted permissions for one or more users.

    Accepts a single UUID or a list.  Uses a single query regardless of
    how many users are passed, avoiding N+1 issues.

    Stores only directly granted permissions — implication expansion
    happens at read time via get_effective_permissions().

    Does NOT commit — caller must commit the session.
    """
    if isinstance(user_ids, (UUID, str)):
        uid_list = [user_ids]
    else:
        uid_list = list(user_ids)

    if not uid_list:
        return

    # Single query to fetch ALL permissions for these users across ALL their
    # groups (a user may belong to multiple groups with different grants).
    rows = db_session.execute(
        select(User__UserGroup.user_id, PermissionGrant.permission)
        .join(
            PermissionGrant,
            PermissionGrant.group_id == User__UserGroup.user_group_id,
        )
        .where(
            User__UserGroup.user_id.in_(uid_list),
            PermissionGrant.is_deleted.is_(False),
        )
    ).all()

    # Group permissions by user; users with no grants get an empty set.
    perms_by_user: dict[UUID | str, set[str]] = defaultdict(set)
    for uid in uid_list:
        perms_by_user[uid]  # ensure every user has an entry
    for uid, perm in rows:
        perms_by_user[uid].add(perm.value)

    for uid, perms in perms_by_user.items():
        db_session.execute(
            update(User)
            .where(User.id == uid)  # type: ignore[arg-type]
            .values(effective_permissions=sorted(perms))
        )


def recompute_permissions_for_group__no_commit(
    group_id: int, db_session: Session
) -> None:
    """Recompute granted permissions for all users in a group.

    Does NOT commit — caller must commit the session.
    """
    user_ids: list[UUID] = [
        uid
        for uid in db_session.execute(
            select(User__UserGroup.user_id).where(
                User__UserGroup.user_group_id == group_id,
                User__UserGroup.user_id.isnot(None),
            )
        )
        .scalars()
        .all()
        if uid is not None
    ]

    if not user_ids:
        return

    recompute_user_permissions__no_commit(user_ids, db_session)


================================================
FILE: backend/onyx/db/persona.py
================================================
from collections.abc import Sequence
from datetime import datetime
from enum import Enum
from uuid import UUID

from fastapi import HTTPException
from sqlalchemy import exists
from sqlalchemy import func
from sqlalchemy import not_
from sqlalchemy import or_
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import aliased
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.access.hierarchy_access import get_user_external_group_ids
from onyx.auth.schemas import UserRole
from onyx.configs.app_configs import CURATORS_CANNOT_VIEW_OR_EDIT_NON_OWNED_ASSISTANTS
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.configs.constants import NotificationType
from onyx.db.constants import SLACK_BOT_PERSONA_PREFIX
from onyx.db.document_access import get_accessible_documents_by_ids
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document
from onyx.db.models import DocumentSet
from onyx.db.models import FederatedConnector__DocumentSet
from onyx.db.models import HierarchyNode
from onyx.db.models import Persona
from onyx.db.models import Persona__User
from onyx.db.models import Persona__UserGroup
from onyx.db.models import PersonaLabel
from onyx.db.models import StarterMessage
from onyx.db.models import Tool
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserFile
from onyx.db.models import UserGroup
from onyx.db.notification import create_notification
from onyx.server.features.persona.models import FullPersonaSnapshot
from onyx.server.features.persona.models import MinimalPersonaSnapshot
from onyx.server.features.persona.models import PersonaSharedNotificationData
from onyx.server.features.persona.models import PersonaSnapshot
from onyx.server.features.persona.models import PersonaUpsertRequest
from onyx.server.features.tool.tool_visibility import should_expose_tool_to_fe
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation

logger = setup_logger()


def get_default_behavior_persona(
    db_session: Session,
    eager_load_for_tools: bool = False,
) -> Persona | None:
    stmt = select(Persona).where(Persona.id == DEFAULT_PERSONA_ID)
    if eager_load_for_tools:
        stmt = stmt.options(
            selectinload(Persona.tools),
            selectinload(Persona.document_sets),
            selectinload(Persona.attached_documents),
            selectinload(Persona.hierarchy_nodes),
        )
    return db_session.scalars(stmt).first()


class PersonaLoadType(Enum):
    NONE = "none"
    MINIMAL = "minimal"
    FULL = "full"


def _add_user_filters(
    stmt: Select[tuple[Persona]], user: User, get_editable: bool = True
) -> Select[tuple[Persona]]:
    if user.role == UserRole.ADMIN:
        return stmt

    stmt = stmt.distinct()
    Persona__UG = aliased(Persona__UserGroup)
    User__UG = aliased(User__UserGroup)
    """
    Here we select cc_pairs by relation:
    User -> User__UserGroup -> Persona__UserGroup -> Persona
    """
    stmt = (
        stmt.outerjoin(Persona__UG)
        .outerjoin(
            User__UserGroup,
            User__UserGroup.user_group_id == Persona__UG.user_group_id,
        )
        .outerjoin(
            Persona__User,
            Persona__User.persona_id == Persona.id,
        )
    )
    """
    Filter Personas by:
    - if the user is in the user_group that owns the Persona
    - if the user is not a global_curator, they must also have a curator relationship
    to the user_group
    - if editing is being done, we also filter out Personas that are owned by groups
    that the user isn't a curator for
    - if we are not editing, we show all Personas in the groups the user is a curator
    for (as well as public Personas)
    - if we are not editing, we return all Personas directly connected to the user
    """

    # Anonymous users only see public Personas
    if user.is_anonymous:
        where_clause = Persona.is_public == True  # noqa: E712
        return stmt.where(where_clause)

    # If curator ownership restriction is enabled, curators can only access their own assistants
    if CURATORS_CANNOT_VIEW_OR_EDIT_NON_OWNED_ASSISTANTS and user.role in [
        UserRole.CURATOR,
        UserRole.GLOBAL_CURATOR,
    ]:
        where_clause = (Persona.user_id == user.id) | (Persona.user_id.is_(None))
        return stmt.where(where_clause)

    where_clause = User__UserGroup.user_id == user.id
    if user.role == UserRole.CURATOR and get_editable:
        where_clause &= User__UserGroup.is_curator == True  # noqa: E712
    if get_editable:
        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)
        if user.role == UserRole.CURATOR:
            user_groups = user_groups.where(User__UG.is_curator == True)  # noqa: E712
        where_clause &= (
            ~exists()
            .where(Persona__UG.persona_id == Persona.id)
            .where(~Persona__UG.user_group_id.in_(user_groups))
            .correlate(Persona)
        )
    else:
        # Group the public persona conditions
        public_condition = (Persona.is_public == True) & (  # noqa: E712
            Persona.is_listed == True  # noqa: E712
        )

        where_clause |= public_condition
        where_clause |= Persona__User.user_id == user.id

    where_clause |= Persona.user_id == user.id

    return stmt.where(where_clause)


def fetch_persona_by_id_for_user(
    db_session: Session, persona_id: int, user: User, get_editable: bool = True
) -> Persona:
    stmt = select(Persona).where(Persona.id == persona_id).distinct()
    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable)
    persona = db_session.scalars(stmt).one_or_none()
    if not persona:
        raise HTTPException(
            status_code=403,
            detail=f"Persona with ID {persona_id} does not exist or user is not authorized to access it",
        )
    return persona


def get_best_persona_id_for_user(
    db_session: Session, user: User, persona_id: int | None = None
) -> int | None:
    if persona_id is not None:
        stmt = select(Persona).where(Persona.id == persona_id).distinct()
        stmt = _add_user_filters(
            stmt=stmt,
            user=user,
            # We don't want to filter by editable here, we just want to see if the
            # persona is usable by the user
            get_editable=False,
        )
        persona = db_session.scalars(stmt).one_or_none()
        if persona:
            return persona.id

    # If the persona is not found, or the slack bot is using doc sets instead of personas,
    # we need to find the best persona for the user
    # This is the persona with the highest display priority that the user has access to
    stmt = select(Persona).order_by(Persona.display_priority.desc()).distinct()
    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=True)
    persona = db_session.scalars(stmt).one_or_none()
    return persona.id if persona else None


def _get_persona_by_name(
    persona_name: str, user: User | None, db_session: Session
) -> Persona | None:
    """Fetch a persona by name with access control.

    Access rules:
    - user=None (system operations): can see all personas
    - Admin users: can see all personas
    - Non-admin users: can only see their own personas
    """
    stmt = select(Persona).where(Persona.name == persona_name)
    if user and user.role != UserRole.ADMIN:
        stmt = stmt.where(Persona.user_id == user.id)
    result = db_session.execute(stmt).scalar_one_or_none()
    return result


def update_persona_access(
    persona_id: int,
    creator_user_id: UUID | None,
    db_session: Session,
    is_public: bool | None = None,
    user_ids: list[UUID] | None = None,
    group_ids: list[int] | None = None,
) -> None:
    """Updates the access settings for a persona including public status and user shares.

    NOTE: Callers are responsible for committing."""

    needs_sync = False
    if is_public is not None:
        needs_sync = True
        persona = db_session.query(Persona).filter(Persona.id == persona_id).first()
        if persona:
            persona.is_public = is_public

    # NOTE: For user-ids and group-ids, `None` means "leave unchanged", `[]` means "clear all shares",
    # and a non-empty list means "replace with these shares".
    if user_ids is not None:
        needs_sync = True
        db_session.query(Persona__User).filter(
            Persona__User.persona_id == persona_id
        ).delete(synchronize_session="fetch")

        for user_uuid in user_ids:
            db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid))
            if user_uuid != creator_user_id:
                create_notification(
                    user_id=user_uuid,
                    notif_type=NotificationType.PERSONA_SHARED,
                    title="A new agent was shared with you!",
                    db_session=db_session,
                    additional_data=PersonaSharedNotificationData(
                        persona_id=persona_id,
                    ).model_dump(),
                )

    # MIT doesn't support group-based sharing, so we allow clearing (no-op since
    # there shouldn't be any) but raise an error if trying to add actual groups.
    if group_ids is not None:
        needs_sync = True
        db_session.query(Persona__UserGroup).filter(
            Persona__UserGroup.persona_id == persona_id
        ).delete(synchronize_session="fetch")

        if group_ids:
            raise NotImplementedError("Onyx MIT does not support group-based sharing")

    # When sharing changes, user file ACLs need to be updated in the vector DB
    if needs_sync:
        mark_persona_user_files_for_sync(persona_id, db_session)


def create_update_persona(
    persona_id: int | None,
    create_persona_request: PersonaUpsertRequest,
    user: User,
    db_session: Session,
) -> FullPersonaSnapshot:
    """Higher level function than upsert_persona, although either is valid to use."""
    # Permission to actually use these is checked later

    try:
        # Featured persona validation
        if create_persona_request.is_featured:
            # Curators can edit featured personas, but not make them
            # TODO this will be reworked soon with RBAC permissions feature
            if user.role == UserRole.CURATOR or user.role == UserRole.GLOBAL_CURATOR:
                pass
            elif user.role != UserRole.ADMIN:
                raise ValueError("Only admins can make a featured persona")

        # Convert incoming string UUIDs to UUID objects for DB operations
        converted_user_file_ids = None
        if create_persona_request.user_file_ids is not None:
            try:
                converted_user_file_ids = [
                    UUID(str_id) for str_id in create_persona_request.user_file_ids
                ]
            except Exception:
                raise ValueError("Invalid user_file_ids; must be UUID strings")

        persona = upsert_persona(
            persona_id=persona_id,
            user=user,
            db_session=db_session,
            description=create_persona_request.description,
            name=create_persona_request.name,
            document_set_ids=create_persona_request.document_set_ids,
            tool_ids=create_persona_request.tool_ids,
            is_public=create_persona_request.is_public,
            llm_model_provider_override=create_persona_request.llm_model_provider_override,
            llm_model_version_override=create_persona_request.llm_model_version_override,
            starter_messages=create_persona_request.starter_messages,
            system_prompt=create_persona_request.system_prompt,
            task_prompt=create_persona_request.task_prompt,
            datetime_aware=create_persona_request.datetime_aware,
            replace_base_system_prompt=create_persona_request.replace_base_system_prompt,
            uploaded_image_id=create_persona_request.uploaded_image_id,
            icon_name=create_persona_request.icon_name,
            display_priority=create_persona_request.display_priority,
            remove_image=create_persona_request.remove_image,
            search_start_date=create_persona_request.search_start_date,
            label_ids=create_persona_request.label_ids,
            is_featured=create_persona_request.is_featured,
            user_file_ids=converted_user_file_ids,
            commit=False,
            hierarchy_node_ids=create_persona_request.hierarchy_node_ids,
            document_ids=create_persona_request.document_ids,
        )

        versioned_update_persona_access = fetch_versioned_implementation(
            "onyx.db.persona", "update_persona_access"
        )

        versioned_update_persona_access(
            persona_id=persona.id,
            creator_user_id=user.id,
            db_session=db_session,
            user_ids=create_persona_request.users,
            group_ids=create_persona_request.groups,
        )
        db_session.commit()

    except ValueError as e:
        logger.exception("Failed to create persona")
        raise HTTPException(status_code=400, detail=str(e))

    return FullPersonaSnapshot.from_model(persona)


def update_persona_shared(
    persona_id: int,
    user_ids: list[UUID] | None,
    user: User,
    db_session: Session,
    group_ids: list[int] | None = None,
    is_public: bool | None = None,
    label_ids: list[int] | None = None,
) -> None:
    """Simplified version of `create_update_persona` which only touches the
    accessibility rather than any of the logic (e.g. prompt, connected data sources,
    etc.)."""
    persona = fetch_persona_by_id_for_user(
        db_session=db_session, persona_id=persona_id, user=user, get_editable=True
    )

    if user and user.role != UserRole.ADMIN and persona.user_id != user.id:
        raise PermissionError("You don't have permission to modify this persona")

    versioned_update_persona_access = fetch_versioned_implementation(
        "onyx.db.persona", "update_persona_access"
    )
    versioned_update_persona_access(
        persona_id=persona_id,
        creator_user_id=user.id,
        db_session=db_session,
        is_public=is_public,
        user_ids=user_ids,
        group_ids=group_ids,
    )

    if label_ids is not None:
        labels = (
            db_session.query(PersonaLabel).filter(PersonaLabel.id.in_(label_ids)).all()
        )
        if len(labels) != len(label_ids):
            raise ValueError("Some label IDs were not found in the database")
        persona.labels.clear()
        persona.labels = labels

    db_session.commit()


def update_persona_public_status(
    persona_id: int,
    is_public: bool,
    db_session: Session,
    user: User,
) -> None:
    persona = fetch_persona_by_id_for_user(
        db_session=db_session, persona_id=persona_id, user=user, get_editable=True
    )
    if user.role != UserRole.ADMIN and persona.user_id != user.id:
        raise ValueError("You don't have permission to modify this persona")

    persona.is_public = is_public
    db_session.commit()


def _build_persona_filters(
    stmt: Select[tuple[Persona]],
    include_default: bool,
    include_slack_bot_personas: bool,
    include_deleted: bool,
) -> Select[tuple[Persona]]:
    """Filters which Personas are included in the query.

    Args:
        stmt: The base query to filter.
        include_default: If True, includes builtin/default personas.
        include_slack_bot_personas: If True, includes Slack bot personas.
        include_deleted: If True, includes deleted personas.

    Returns:
        The modified query with the filters applied.
    """
    if not include_default:
        stmt = stmt.where(Persona.builtin_persona.is_(False))
    if not include_slack_bot_personas:
        stmt = stmt.where(not_(Persona.name.startswith(SLACK_BOT_PERSONA_PREFIX)))
    if not include_deleted:
        stmt = stmt.where(Persona.deleted.is_(False))
    return stmt


def get_minimal_persona_snapshots_for_user(
    user: User,
    db_session: Session,
    get_editable: bool = True,
    include_default: bool = True,
    include_slack_bot_personas: bool = False,
    include_deleted: bool = False,
) -> list[MinimalPersonaSnapshot]:
    stmt = select(Persona)
    stmt = _add_user_filters(stmt, user, get_editable)
    stmt = _build_persona_filters(
        stmt, include_default, include_slack_bot_personas, include_deleted
    )
    stmt = stmt.options(
        selectinload(Persona.tools),
        selectinload(Persona.labels),
        selectinload(Persona.document_sets).options(
            selectinload(DocumentSet.connector_credential_pairs).selectinload(
                ConnectorCredentialPair.connector
            ),
            selectinload(DocumentSet.users),
            selectinload(DocumentSet.groups),
            selectinload(DocumentSet.federated_connectors).selectinload(
                FederatedConnector__DocumentSet.federated_connector
            ),
        ),
        selectinload(Persona.hierarchy_nodes),
        selectinload(Persona.attached_documents).selectinload(
            Document.parent_hierarchy_node
        ),
        selectinload(Persona.user),
    )
    results = db_session.scalars(stmt).all()
    return [MinimalPersonaSnapshot.from_model(persona) for persona in results]


def get_persona_snapshots_for_user(
    user: User,
    db_session: Session,
    get_editable: bool = True,
    include_default: bool = True,
    include_slack_bot_personas: bool = False,
    include_deleted: bool = False,
) -> list[PersonaSnapshot]:
    stmt = select(Persona)
    stmt = _add_user_filters(stmt, user, get_editable)
    stmt = _build_persona_filters(
        stmt, include_default, include_slack_bot_personas, include_deleted
    )
    stmt = stmt.options(
        selectinload(Persona.tools),
        selectinload(Persona.hierarchy_nodes),
        selectinload(Persona.attached_documents).selectinload(
            Document.parent_hierarchy_node
        ),
        selectinload(Persona.labels),
        selectinload(Persona.document_sets).options(
            selectinload(DocumentSet.connector_credential_pairs).selectinload(
                ConnectorCredentialPair.connector
            ),
            selectinload(DocumentSet.users),
            selectinload(DocumentSet.groups),
            selectinload(DocumentSet.federated_connectors).selectinload(
                FederatedConnector__DocumentSet.federated_connector
            ),
        ),
        selectinload(Persona.user),
        selectinload(Persona.user_files),
        selectinload(Persona.users),
        selectinload(Persona.groups),
    )

    results = db_session.scalars(stmt).all()
    return [PersonaSnapshot.from_model(persona) for persona in results]


def get_persona_count_for_user(
    user: User,
    db_session: Session,
    get_editable: bool = True,
    include_default: bool = True,
    include_slack_bot_personas: bool = False,
    include_deleted: bool = False,
) -> int:
    """Counts the total number of personas accessible to the user.

    Args:
        user: The user to filter personas for. If None and auth is disabled,
            assumes the user is an admin. Otherwise, if None shows only public
            personas.
        db_session: Database session for executing queries.
        get_editable: If True, only returns personas the user can edit.
        include_default: If True, includes builtin/default personas.
        include_slack_bot_personas: If True, includes Slack bot personas.
        include_deleted: If True, includes deleted personas.

    Returns:
        Total count of personas matching the filters and user permissions.
    """
    stmt = _build_persona_base_query(
        user=user,
        get_editable=get_editable,
        include_default=include_default,
        include_slack_bot_personas=include_slack_bot_personas,
        include_deleted=include_deleted,
    )
    # Convert to count query.
    count_stmt = stmt.with_only_columns(func.count(func.distinct(Persona.id))).order_by(
        None
    )
    return db_session.scalar(count_stmt) or 0


def get_minimal_persona_snapshots_paginated(
    user: User,
    db_session: Session,
    page_num: int,
    page_size: int,
    get_editable: bool = True,
    include_default: bool = True,
    include_slack_bot_personas: bool = False,
    include_deleted: bool = False,
) -> list[MinimalPersonaSnapshot]:
    """Gets a single page of minimal persona snapshots with ordering.

    Personas are ordered by display_priority (ASC, nulls last) then by ID (ASC
    distance from 0).

    Args:
        user: The user to filter personas for. If None and auth is disabled,
            assumes the user is an admin. Otherwise, if None shows only public
            personas.
        db_session: Database session for executing queries.
        page_num: Zero-indexed page number (e.g., 0 for the first page).
        page_size: Number of items per page.
        get_editable: If True, only returns personas the user can edit.
        include_default: If True, includes builtin/default personas.
        include_slack_bot_personas: If True, includes Slack bot personas.
        include_deleted: If True, includes deleted personas.

    Returns:
        List of MinimalPersonaSnapshot objects for the requested page, ordered
        by display_priority (nulls last) then ID.
    """
    stmt = _get_paginated_persona_query(
        user,
        page_num,
        page_size,
        get_editable,
        include_default,
        include_slack_bot_personas,
        include_deleted,
    )
    # Do eager loading of columns we know MinimalPersonaSnapshot.from_model will
    # need.
    stmt = stmt.options(
        selectinload(Persona.tools),
        selectinload(Persona.hierarchy_nodes),
        selectinload(Persona.attached_documents).selectinload(
            Document.parent_hierarchy_node
        ),
        selectinload(Persona.labels),
        selectinload(Persona.document_sets).options(
            selectinload(DocumentSet.connector_credential_pairs).selectinload(
                ConnectorCredentialPair.connector
            ),
            selectinload(DocumentSet.users),
            selectinload(DocumentSet.groups),
            selectinload(DocumentSet.federated_connectors).selectinload(
                FederatedConnector__DocumentSet.federated_connector
            ),
        ),
        selectinload(Persona.user),
    )

    results = db_session.scalars(stmt).all()
    return [MinimalPersonaSnapshot.from_model(persona) for persona in results]


def get_persona_snapshots_paginated(
    user: User,
    db_session: Session,
    page_num: int,
    page_size: int,
    get_editable: bool = True,
    include_default: bool = True,
    include_slack_bot_personas: bool = False,
    include_deleted: bool = False,
) -> list[PersonaSnapshot]:
    """Gets a single page of persona snapshots (admin view) with ordering.

    Personas are ordered by display_priority (ASC, nulls last) then by ID (ASC
    distance from 0).

    This function returns PersonaSnapshot objects which contain more detailed
    information than MinimalPersonaSnapshot, used for admin views.

    Args:
        user: The user to filter personas for. If None and auth is disabled,
            assumes the user is an admin. Otherwise, if None shows only public
            personas.
        db_session: Database session for executing queries.
        page_num: Zero-indexed page number (e.g., 0 for the first page).
        page_size: Number of items per page.
        get_editable: If True, only returns personas the user can edit.
        include_default: If True, includes builtin/default personas.
        include_slack_bot_personas: If True, includes Slack bot personas.
        include_deleted: If True, includes deleted personas.

    Returns:
        List of PersonaSnapshot objects for the requested page, ordered by
        display_priority (nulls last) then ID.
    """
    stmt = _get_paginated_persona_query(
        user,
        page_num,
        page_size,
        get_editable,
        include_default,
        include_slack_bot_personas,
        include_deleted,
    )
    # Do eager loading of columns we know PersonaSnapshot.from_model will need.
    stmt = stmt.options(
        selectinload(Persona.tools),
        selectinload(Persona.hierarchy_nodes),
        selectinload(Persona.attached_documents).selectinload(
            Document.parent_hierarchy_node
        ),
        selectinload(Persona.labels),
        selectinload(Persona.document_sets).options(
            selectinload(DocumentSet.connector_credential_pairs).selectinload(
                ConnectorCredentialPair.connector
            ),
            selectinload(DocumentSet.users),
            selectinload(DocumentSet.groups),
            selectinload(DocumentSet.federated_connectors).selectinload(
                FederatedConnector__DocumentSet.federated_connector
            ),
        ),
        selectinload(Persona.user),
        selectinload(Persona.user_files),
        selectinload(Persona.users),
        selectinload(Persona.groups),
    )

    results = db_session.scalars(stmt).all()
    return [PersonaSnapshot.from_model(persona) for persona in results]


def _get_paginated_persona_query(
    user: User,
    page_num: int,
    page_size: int,
    get_editable: bool = True,
    include_default: bool = True,
    include_slack_bot_personas: bool = False,
    include_deleted: bool = False,
) -> Select[tuple[Persona]]:
    """Builds a paginated query on personas ordered on display_priority and id.

    Personas are ordered by display_priority (ASC, nulls last) then by ID (ASC
    distance from 0) to match the frontend personaComparator() logic.

    Args:
        user: The user to filter personas for. If None and auth is disabled,
            assumes the user is an admin. Otherwise, if None shows only public
            personas.
        page_num: Zero-indexed page number (e.g., 0 for the first page).
        page_size: Number of items per page.
        get_editable: If True, only returns personas the user can edit.
        include_default: If True, includes builtin/default personas.
        include_slack_bot_personas: If True, includes Slack bot personas.
        include_deleted: If True, includes deleted personas.

    Returns:
        SQLAlchemy Select statement with all filters, ordering, and pagination
        applied.
    """
    stmt = _build_persona_base_query(
        user=user,
        get_editable=get_editable,
        include_default=include_default,
        include_slack_bot_personas=include_slack_bot_personas,
        include_deleted=include_deleted,
    )
    # Add the abs(id) expression to the SELECT list (required for DISTINCT +
    # ORDER BY).
    stmt = stmt.add_columns(func.abs(Persona.id).label("abs_id"))
    # Apply ordering.
    stmt = stmt.order_by(
        Persona.display_priority.asc().nullslast(),
        func.abs(Persona.id).asc(),
    )
    # Apply pagination.
    stmt = stmt.offset(page_num * page_size).limit(page_size)
    return stmt


def _build_persona_base_query(
    user: User,
    get_editable: bool = True,
    include_default: bool = True,
    include_slack_bot_personas: bool = False,
    include_deleted: bool = False,
) -> Select[tuple[Persona]]:
    """Builds a base persona query with all user and persona filters applied.

    This helper constructs a filtered query that can then be customized for
    counting, pagination, or full retrieval.

    Args:
        user: The user to filter personas for. If None and auth is disabled,
            assumes the user is an admin. Otherwise, if None shows only public
            personas.
        get_editable: If True, only returns personas the user can edit.
        include_default: If True, includes builtin/default personas.
        include_slack_bot_personas: If True, includes Slack bot personas.
        include_deleted: If True, includes deleted personas.

    Returns:
        SQLAlchemy Select statement with all filters applied.
    """
    stmt = select(Persona)
    stmt = _add_user_filters(stmt, user, get_editable)
    stmt = _build_persona_filters(
        stmt, include_default, include_slack_bot_personas, include_deleted
    )
    return stmt


def get_raw_personas_for_user(
    user: User,
    db_session: Session,
    get_editable: bool = True,
    include_default: bool = True,
    include_slack_bot_personas: bool = False,
    include_deleted: bool = False,
) -> Sequence[Persona]:
    stmt = _build_persona_base_query(
        user, get_editable, include_default, include_slack_bot_personas, include_deleted
    )
    return db_session.scalars(stmt).all()


def get_personas(db_session: Session) -> Sequence[Persona]:
    """WARNING: Unsafe, can fetch personas from all users."""
    stmt = select(Persona).distinct()
    stmt = stmt.where(not_(Persona.name.startswith(SLACK_BOT_PERSONA_PREFIX)))
    stmt = stmt.where(Persona.deleted.is_(False))
    return db_session.execute(stmt).unique().scalars().all()


def mark_persona_as_deleted(
    persona_id: int,
    user: User,
    db_session: Session,
) -> None:
    persona = get_persona_by_id(persona_id=persona_id, user=user, db_session=db_session)
    persona.deleted = True
    affected_file_ids = [uf.id for uf in persona.user_files]
    if affected_file_ids:
        _mark_files_need_persona_sync(db_session, affected_file_ids)
    db_session.commit()


def mark_persona_as_not_deleted(
    persona_id: int,
    user: User,
    db_session: Session,
) -> None:
    persona = get_persona_by_id(
        persona_id=persona_id, user=user, db_session=db_session, include_deleted=True
    )
    if not persona.deleted:
        raise ValueError(f"Persona with ID {persona_id} is not deleted.")
    persona.deleted = False
    affected_file_ids = [uf.id for uf in persona.user_files]
    if affected_file_ids:
        _mark_files_need_persona_sync(db_session, affected_file_ids)
    db_session.commit()


def mark_delete_persona_by_name(
    persona_name: str, db_session: Session, is_default: bool = True
) -> None:
    stmt = (
        update(Persona)
        .where(Persona.name == persona_name, Persona.builtin_persona == is_default)
        .values(deleted=True)
    )

    db_session.execute(stmt)
    db_session.commit()


def update_personas_display_priority(
    display_priority_map: dict[int, int],
    db_session: Session,
    user: User,
    commit_db_txn: bool = False,
) -> None:
    """Updates the display priorities of the specified Personas.

    Args:
        display_priority_map: A map of persona IDs to intended display
            priorities.
        db_session: Database session for executing queries.
        user: The user to filter personas for. If None and auth is disabled,
            assumes the user is an admin. Otherwise, if None shows only public
            personas.
        commit_db_txn: If True, commits the database transaction after
            updating the display priorities. Defaults to False.

    Raises:
        ValueError: The caller tried to update a persona for which the user does
            not have access.
    """
    # No-op to save a query if it is not necessary.
    if len(display_priority_map) == 0:
        return

    personas = get_raw_personas_for_user(
        user,
        db_session,
        get_editable=False,
        include_default=True,
        include_slack_bot_personas=True,
        include_deleted=True,
    )
    available_personas_map: dict[int, Persona] = {
        persona.id: persona for persona in personas
    }

    for persona_id, priority in display_priority_map.items():
        if persona_id not in available_personas_map:
            raise ValueError(
                f"Invalid persona ID provided: Persona with ID {persona_id} was not found for this user."
            )

        available_personas_map[persona_id].display_priority = priority

    if commit_db_txn:
        db_session.commit()


def mark_persona_user_files_for_sync(
    persona_id: int,
    db_session: Session,
) -> None:
    """When persona sharing changes, mark all of its user files for sync
    so that their ACLs get updated in the vector DB."""
    persona = (
        db_session.query(Persona)
        .options(selectinload(Persona.user_files))
        .filter(Persona.id == persona_id)
        .first()
    )
    if not persona:
        return
    file_ids = [uf.id for uf in persona.user_files]
    _mark_files_need_persona_sync(db_session, file_ids)


def _mark_files_need_persona_sync(
    db_session: Session,
    user_file_ids: list[UUID],
) -> None:
    """Flag the given UserFile rows so the background sync task picks them up
    and updates their persona metadata in the vector DB."""
    if not user_file_ids:
        return
    db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).update(
        {UserFile.needs_persona_sync: True},
        synchronize_session=False,
    )


def upsert_persona(
    user: User | None,
    name: str,
    description: str,
    llm_model_provider_override: str | None,
    llm_model_version_override: str | None,
    starter_messages: list[StarterMessage] | None,
    # Embedded prompt fields
    system_prompt: str | None,
    task_prompt: str | None,
    datetime_aware: bool | None,
    is_public: bool,
    db_session: Session,
    document_set_ids: list[int] | None = None,
    tool_ids: list[int] | None = None,
    persona_id: int | None = None,
    commit: bool = True,
    uploaded_image_id: str | None = None,
    icon_name: str | None = None,
    display_priority: int | None = None,
    is_listed: bool = True,
    remove_image: bool | None = None,
    search_start_date: datetime | None = None,
    builtin_persona: bool = False,
    is_featured: bool | None = None,
    label_ids: list[int] | None = None,
    user_file_ids: list[UUID] | None = None,
    hierarchy_node_ids: list[int] | None = None,
    document_ids: list[str] | None = None,
    replace_base_system_prompt: bool = False,
) -> Persona:
    """
    NOTE: This operation cannot update persona configuration options that
    are core to the persona, such as its display priority and
    whether or not the assistant is a built-in / default assistant
    """

    if persona_id is not None:
        existing_persona = db_session.query(Persona).filter_by(id=persona_id).first()
    else:
        existing_persona = _get_persona_by_name(
            persona_name=name, user=user, db_session=db_session
        )

        # Check for duplicate names when creating new personas
        # Deleted personas are allowed to be overwritten
        if existing_persona and not existing_persona.deleted:
            raise ValueError(
                f"Assistant with name '{name}' already exists. Please rename your assistant."
            )

    if existing_persona and user:
        # this checks if the user has permission to edit the persona
        # will raise an Exception if the user does not have permission
        # Skip check if user is None (system/admin operation)
        existing_persona = fetch_persona_by_id_for_user(
            db_session=db_session,
            persona_id=existing_persona.id,
            user=user,
            get_editable=True,
        )

    # Fetch and attach tools by IDs
    tools = None
    if tool_ids is not None:
        tools = db_session.query(Tool).filter(Tool.id.in_(tool_ids)).all()
        if not tools and tool_ids:
            raise ValueError("Tools not found")

    # Fetch and attach document_sets by IDs
    document_sets = None
    if document_set_ids is not None:
        document_sets = (
            db_session.query(DocumentSet)
            .filter(DocumentSet.id.in_(document_set_ids))
            .all()
        )
        if not document_sets and document_set_ids:
            raise ValueError("document_sets not found")

    # Fetch and attach user_files by IDs
    user_files = None
    if user_file_ids is not None:
        user_files = (
            db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all()
        )
        if not user_files and user_file_ids:
            raise ValueError("user_files not found")

    labels = None
    if label_ids is not None:
        labels = (
            db_session.query(PersonaLabel).filter(PersonaLabel.id.in_(label_ids)).all()
        )
        if len(labels) != len(label_ids):
            raise ValueError("Some label IDs were not found in the database")

    # Fetch and attach hierarchy_nodes by IDs
    hierarchy_nodes = None
    if hierarchy_node_ids:
        hierarchy_nodes = (
            db_session.query(HierarchyNode)
            .filter(HierarchyNode.id.in_(hierarchy_node_ids))
            .all()
        )
        if not hierarchy_nodes and hierarchy_node_ids:
            raise ValueError("hierarchy_nodes not found")

    # Fetch and attach documents by IDs, filtering for access permissions
    attached_documents = None
    if document_ids is not None:
        user_email = user.email if user else None
        external_group_ids = (
            get_user_external_group_ids(db_session, user) if user else []
        )
        attached_documents = get_accessible_documents_by_ids(
            db_session=db_session,
            document_ids=document_ids,
            user_email=user_email,
            external_group_ids=external_group_ids,
        )
        if not attached_documents and document_ids:
            raise ValueError("documents not found or not accessible")

    # ensure all specified tools are valid
    if tools:
        validate_persona_tools(tools, db_session)

    if existing_persona:
        # Built-in personas can only be updated through YAML configuration.
        # This ensures that core system personas are not modified unintentionally.
        if existing_persona.builtin_persona and not builtin_persona:
            raise ValueError("Cannot update builtin persona with non-builtin.")

        # The following update excludes `default`, `built-in`, and display priority.
        # Display priority is handled separately in the `display-priority` endpoint.
        # `default` and `built-in` properties can only be set when creating a persona.
        existing_persona.name = name
        existing_persona.description = description
        existing_persona.llm_model_provider_override = llm_model_provider_override
        existing_persona.llm_model_version_override = llm_model_version_override
        existing_persona.starter_messages = starter_messages
        existing_persona.deleted = False  # Un-delete if previously deleted
        existing_persona.is_public = is_public
        if remove_image or uploaded_image_id:
            existing_persona.uploaded_image_id = uploaded_image_id
        existing_persona.icon_name = icon_name
        existing_persona.is_listed = is_listed
        existing_persona.search_start_date = search_start_date
        if label_ids is not None:
            existing_persona.labels.clear()
            existing_persona.labels = labels or []
        existing_persona.is_featured = (
            is_featured if is_featured is not None else existing_persona.is_featured
        )
        # Update embedded prompt fields if provided
        if system_prompt is not None:
            existing_persona.system_prompt = system_prompt
        if task_prompt is not None:
            existing_persona.task_prompt = task_prompt
        if datetime_aware is not None:
            existing_persona.datetime_aware = datetime_aware
        existing_persona.replace_base_system_prompt = replace_base_system_prompt

        # Do not delete any associations manually added unless
        # a new updated list is provided
        if document_sets is not None:
            existing_persona.document_sets.clear()
            existing_persona.document_sets = document_sets or []

        # Note: prompts are now embedded in personas - no separate prompts relationship

        if tools is not None:
            existing_persona.tools = tools or []

        if user_file_ids is not None:
            old_file_ids = {uf.id for uf in existing_persona.user_files}
            new_file_ids = {uf.id for uf in (user_files or [])}
            affected_file_ids = old_file_ids | new_file_ids
            existing_persona.user_files.clear()
            existing_persona.user_files = user_files or []
            if affected_file_ids:
                _mark_files_need_persona_sync(db_session, list(affected_file_ids))

        if hierarchy_node_ids is not None:
            existing_persona.hierarchy_nodes.clear()
            existing_persona.hierarchy_nodes = hierarchy_nodes or []

        if document_ids is not None:
            existing_persona.attached_documents.clear()
            existing_persona.attached_documents = attached_documents or []

        # We should only update display priority if it is not already set
        if existing_persona.display_priority is None:
            existing_persona.display_priority = display_priority

        persona = existing_persona

    else:
        # Create new persona - prompt configuration will be set separately if needed
        new_persona = Persona(
            id=persona_id,
            user_id=user.id if user else None,
            is_public=is_public,
            name=name,
            description=description,
            builtin_persona=builtin_persona,
            system_prompt=system_prompt or "",
            task_prompt=task_prompt or "",
            datetime_aware=(datetime_aware if datetime_aware is not None else True),
            replace_base_system_prompt=replace_base_system_prompt,
            document_sets=document_sets or [],
            llm_model_provider_override=llm_model_provider_override,
            llm_model_version_override=llm_model_version_override,
            starter_messages=starter_messages,
            tools=tools or [],
            uploaded_image_id=uploaded_image_id,
            icon_name=icon_name,
            display_priority=display_priority,
            is_listed=is_listed,
            search_start_date=search_start_date,
            is_featured=(is_featured if is_featured is not None else False),
            user_files=user_files or [],
            labels=labels or [],
            hierarchy_nodes=hierarchy_nodes or [],
            attached_documents=attached_documents or [],
        )
        db_session.add(new_persona)
        if user_files:
            _mark_files_need_persona_sync(db_session, [uf.id for uf in user_files])
        persona = new_persona
    if commit:
        db_session.commit()
    else:
        # flush the session so that the persona has an ID
        db_session.flush()

    return persona


def delete_old_default_personas(
    db_session: Session,
) -> None:
    """Note, this locks out the Summarize and Paraphrase personas for now
    Need a more graceful fix later or those need to never have IDs.

    This function is idempotent, so it can be run multiple times without issue.
    """
    OLD_SUFFIX = "_old"
    stmt = (
        update(Persona)
        .where(
            Persona.builtin_persona,
            Persona.id > 0,
            or_(
                Persona.deleted.is_(False),
                not_(Persona.name.endswith(OLD_SUFFIX)),
            ),
        )
        .values(deleted=True, name=func.concat(Persona.name, OLD_SUFFIX))
    )

    db_session.execute(stmt)
    db_session.commit()


def update_persona_featured(
    persona_id: int,
    is_featured: bool,
    db_session: Session,
    user: User,
) -> None:
    persona = fetch_persona_by_id_for_user(
        db_session=db_session, persona_id=persona_id, user=user, get_editable=True
    )

    persona.is_featured = is_featured
    db_session.commit()


def update_persona_visibility(
    persona_id: int,
    is_listed: bool,
    db_session: Session,
    user: User,
) -> None:
    persona = fetch_persona_by_id_for_user(
        db_session=db_session, persona_id=persona_id, user=user, get_editable=True
    )

    persona.is_listed = is_listed
    db_session.commit()


def validate_persona_tools(tools: list[Tool], db_session: Session) -> None:
    # local import to avoid circular import. DB layer should not depend on tools layer.
    from onyx.tools.built_in_tools import get_built_in_tool_by_id

    for tool in tools:
        if tool.in_code_tool_id is not None:
            tool_cls = get_built_in_tool_by_id(tool.in_code_tool_id)
            if not tool_cls.is_available(db_session):
                raise ValueError(f"Tool {tool.in_code_tool_id} is not available")


# TODO: since this gets called with every chat message, could it be more efficient to pregenerate
# a direct mapping indicating whether a user has access to a specific persona?
def get_persona_by_id(
    persona_id: int,
    user: User | None,
    db_session: Session,
    include_deleted: bool = False,
    is_for_edit: bool = True,  # NOTE: assume true for safety
) -> Persona:
    persona_stmt = (
        select(Persona)
        .distinct()
        .outerjoin(Persona.groups)
        .outerjoin(Persona.users)
        .outerjoin(UserGroup.user_group_relationships)
        .where(Persona.id == persona_id)
    )

    if not include_deleted:
        persona_stmt = persona_stmt.where(Persona.deleted.is_(False))

    if not user or user.role == UserRole.ADMIN:
        result = db_session.execute(persona_stmt)
        persona = result.scalar_one_or_none()
        if persona is None:
            raise ValueError(f"Persona with ID {persona_id} does not exist")
        return persona

    # or check if user owns persona
    or_conditions = Persona.user_id == user.id
    # allow access if persona user id is None
    or_conditions |= Persona.user_id == None  # noqa: E711
    if not is_for_edit:
        # if the user is in a group related to the persona
        or_conditions |= User__UserGroup.user_id == user.id
        # if the user is in the .users of the persona
        or_conditions |= User.id == user.id
        or_conditions |= Persona.is_public == True  # noqa: E712
    elif user.role == UserRole.GLOBAL_CURATOR:
        # global curators can edit personas for the groups they are in
        or_conditions |= User__UserGroup.user_id == user.id
    elif user.role == UserRole.CURATOR:
        # curators can edit personas for the groups they are curators of
        or_conditions |= (User__UserGroup.user_id == user.id) & (
            User__UserGroup.is_curator == True  # noqa: E712
        )

    persona_stmt = persona_stmt.where(or_conditions)
    result = db_session.execute(persona_stmt)
    persona = result.scalar_one_or_none()
    if persona is None:
        raise ValueError(
            f"Persona with ID {persona_id} does not exist or does not belong to user"
        )
    return persona


def get_personas_by_ids(
    persona_ids: list[int], db_session: Session
) -> Sequence[Persona]:
    """WARNING: Unsafe, can fetch personas from all users."""
    if not persona_ids:
        return []
    personas = db_session.scalars(
        select(Persona).where(Persona.id.in_(persona_ids))
    ).all()

    return personas


def delete_persona_by_name(
    persona_name: str, db_session: Session, is_default: bool = True
) -> None:
    stmt = (
        update(Persona)
        .where(Persona.name == persona_name, Persona.builtin_persona == is_default)
        .values(deleted=True)
    )

    db_session.execute(stmt)
    db_session.commit()


def get_assistant_labels(db_session: Session) -> list[PersonaLabel]:
    return db_session.query(PersonaLabel).all()


def create_assistant_label(db_session: Session, name: str) -> PersonaLabel:
    label = PersonaLabel(name=name)
    db_session.add(label)
    db_session.commit()
    return label


def update_persona_label(
    label_id: int,
    label_name: str,
    db_session: Session,
) -> None:
    persona_label = (
        db_session.query(PersonaLabel).filter(PersonaLabel.id == label_id).one_or_none()
    )
    if persona_label is None:
        raise ValueError(f"Persona label with ID {label_id} does not exist")
    persona_label.name = label_name
    db_session.commit()


def delete_persona_label(label_id: int, db_session: Session) -> None:
    db_session.query(PersonaLabel).filter(PersonaLabel.id == label_id).delete()
    db_session.commit()


def persona_has_search_tool(persona_id: int, db_session: Session) -> bool:
    persona = (
        db_session.query(Persona)
        .options(selectinload(Persona.tools))
        .filter(Persona.id == persona_id)
        .one_or_none()
    )
    if persona is None:
        raise ValueError(f"Persona with ID {persona_id} does not exist")
    return any(tool.in_code_tool_id == "run_search" for tool in persona.tools)


def get_default_assistant(db_session: Session) -> Persona | None:
    """Fetch the default assistant (persona with builtin_persona=True)."""
    return (
        db_session.query(Persona)
        .options(selectinload(Persona.tools))
        .filter(Persona.builtin_persona.is_(True))
        # NOTE: need to add this since we had prior builtin personas
        # that have since been deleted
        .filter(Persona.deleted.is_(False))
        .one_or_none()
    )


def update_default_assistant_configuration(
    db_session: Session,
    tool_ids: list[int] | None = None,
    system_prompt: str | None = None,
    update_system_prompt: bool = False,
) -> Persona:
    """Update only tools and system_prompt for the default assistant.

    Args:
        db_session: Database session
        tool_ids: List of tool IDs to enable (if None, tools are not updated)
        system_prompt: New system prompt value (None means use default)
        update_system_prompt: If True, update the system_prompt field (allows setting to None)

    Returns:
        Updated Persona object

    Raises:
        ValueError: If default assistant not found or invalid tool IDs provided
    """
    # Get the default assistant
    persona = get_default_assistant(db_session)
    if not persona:
        raise ValueError("Default assistant not found")

    # Update system prompt if explicitly requested
    if update_system_prompt:
        persona.system_prompt = system_prompt

    # Update tools if provided
    if tool_ids is not None:
        # Clear existing tool associations
        persona.tools = []

        # Add new tool associations
        for tool_id in tool_ids:
            tool = db_session.query(Tool).filter(Tool.id == tool_id).one_or_none()
            if not tool:
                raise ValueError(f"Tool with ID {tool_id} not found")

            if not should_expose_tool_to_fe(tool):
                raise ValueError(f"Tool with ID {tool_id} cannot be assigned")

            if not tool.enabled:
                raise ValueError(
                    f"Enable tool {tool.display_name or tool.name} before assigning it"
                )

            persona.tools.append(tool)

    db_session.commit()
    return persona


def user_can_access_persona(
    db_session: Session, persona_id: int, user: User, get_editable: bool = False
) -> bool:
    """Check if a user has access to a specific persona.

    Args:
        db_session: Database session
        persona_id: ID of the persona to check
        user: User to check access for
        get_editable: If True, check for edit access; if False, check for view access

    Returns:
        True if user can access the persona, False otherwise
    """
    stmt = select(Persona).where(Persona.id == persona_id, Persona.deleted.is_(False))
    stmt = _add_user_filters(stmt, user, get_editable=get_editable)
    return db_session.scalar(stmt) is not None


================================================
FILE: backend/onyx/db/projects.py
================================================
import datetime
import uuid
from typing import List
from uuid import UUID

from fastapi import HTTPException
from fastapi import UploadFile
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from sqlalchemy import func
from sqlalchemy.orm import Session
from starlette.background import BackgroundTasks

from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.enums import UserFileStatus
from onyx.db.models import Project__UserFile
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserProject
from onyx.server.documents.connector import upload_files
from onyx.server.features.projects.projects_file_utils import categorize_uploaded_files
from onyx.server.features.projects.projects_file_utils import RejectedFile
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


class CategorizedFilesResult(BaseModel):
    user_files: list[UserFile]
    rejected_files: list[RejectedFile]
    id_to_temp_id: dict[str, str]
    # Filenames that should be stored but not indexed.
    skip_indexing_filenames: set[str] = Field(default_factory=set)
    # Allow SQLAlchemy ORM models inside this result container
    model_config = ConfigDict(arbitrary_types_allowed=True)

    @property
    def indexable_files(self) -> list[UserFile]:
        return [
            uf
            for uf in self.user_files
            if (uf.name or "") not in self.skip_indexing_filenames
        ]


def build_hashed_file_key(file: UploadFile) -> str:
    name_prefix = (file.filename or "")[:50]
    return f"{file.size}|{name_prefix}"


def create_user_files(
    files: List[UploadFile],
    project_id: int | None,
    user: User,
    db_session: Session,
    link_url: str | None = None,
    temp_id_map: dict[str, str] | None = None,
) -> CategorizedFilesResult:

    # Categorize the files
    categorized_files = categorize_uploaded_files(files, db_session)
    # NOTE: At the moment, zip metadata is not used for user files.
    # Should revisit to decide whether this should be a feature.
    upload_response = upload_files(categorized_files.acceptable, FileOrigin.USER_FILE)
    user_files = []
    rejected_files = categorized_files.rejected
    id_to_temp_id: dict[str, str] = {}
    # Pair returned storage paths with the same set of acceptable files we uploaded
    for file_path, file in zip(
        upload_response.file_paths, categorized_files.acceptable
    ):
        new_id = uuid.uuid4()
        new_temp_id = (
            temp_id_map.get(build_hashed_file_key(file)) if temp_id_map else None
        )
        if new_temp_id is not None:
            id_to_temp_id[str(new_id)] = new_temp_id
        should_skip = (file.filename or "") in categorized_files.skip_indexing
        new_file = UserFile(
            id=new_id,
            user_id=user.id,
            file_id=file_path,
            name=file.filename,
            token_count=categorized_files.acceptable_file_to_token_count[
                file.filename or ""
            ],
            link_url=link_url,
            content_type=file.content_type,
            file_type=file.content_type,
            status=UserFileStatus.SKIPPED if should_skip else UserFileStatus.PROCESSING,
            last_accessed_at=datetime.datetime.now(datetime.timezone.utc),
        )
        # Persist the UserFile first to satisfy FK constraints for association table
        db_session.add(new_file)
        db_session.flush()
        if project_id:
            project_to_user_file = Project__UserFile(
                project_id=project_id,
                user_file_id=new_file.id,
            )
            db_session.add(project_to_user_file)
        user_files.append(new_file)
    db_session.commit()
    return CategorizedFilesResult(
        user_files=user_files,
        rejected_files=rejected_files,
        id_to_temp_id=id_to_temp_id,
        skip_indexing_filenames=categorized_files.skip_indexing,
    )


def upload_files_to_user_files_with_indexing(
    files: List[UploadFile],
    project_id: int | None,
    user: User,
    temp_id_map: dict[str, str] | None,
    db_session: Session,
    background_tasks: BackgroundTasks | None = None,
) -> CategorizedFilesResult:
    if project_id is not None and user is not None:
        if not check_project_ownership(project_id, user.id, db_session):
            raise HTTPException(status_code=404, detail="Project not found")

    categorized_files_result = create_user_files(
        files,
        project_id,
        user,
        db_session,
        temp_id_map=temp_id_map,
    )
    user_files = categorized_files_result.user_files
    rejected_files = categorized_files_result.rejected_files
    id_to_temp_id = categorized_files_result.id_to_temp_id
    indexable_files = categorized_files_result.indexable_files
    # Trigger per-file processing immediately for the current tenant
    tenant_id = get_current_tenant_id()
    for rejected_file in rejected_files:
        logger.warning(
            f"File {rejected_file.filename} rejected for {rejected_file.reason}"
        )

    if DISABLE_VECTOR_DB and background_tasks is not None:
        from onyx.background.task_utils import drain_processing_loop

        background_tasks.add_task(drain_processing_loop, tenant_id)
        for user_file in indexable_files:
            logger.info(f"Queued in-process processing for user_file_id={user_file.id}")
    else:
        from onyx.background.celery.versioned_apps.client import app as client_app

        for user_file in indexable_files:
            task = client_app.send_task(
                OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
                kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
                queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
                priority=OnyxCeleryPriority.HIGH,
                expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
            )
            logger.info(
                f"Triggered indexing for user_file_id={user_file.id} with task_id={task.id}"
            )

    return CategorizedFilesResult(
        user_files=user_files,
        rejected_files=rejected_files,
        id_to_temp_id=id_to_temp_id,
        skip_indexing_filenames=categorized_files_result.skip_indexing_filenames,
    )


def check_project_ownership(
    project_id: int, user_id: UUID | None, db_session: Session
) -> bool:
    # In no-auth mode, all projects are accessible
    if user_id is None:
        # Verify project exists
        return (
            db_session.query(UserProject).filter(UserProject.id == project_id).first()
            is not None
        )

    return (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
        .first()
        is not None
    )


def get_user_files_from_project(
    project_id: int, user_id: UUID | None, db_session: Session
) -> list[UserFile]:
    # First check if the user owns the project
    if not check_project_ownership(project_id, user_id, db_session):
        return []

    return (
        db_session.query(UserFile)
        .join(Project__UserFile)
        .filter(Project__UserFile.project_id == project_id)
        .all()
    )


def get_project_instructions(db_session: Session, project_id: int | None) -> str | None:
    """Return the project's instruction text from the project, else None.

    Safe helper that swallows DB errors and returns None on any failure.
    """
    if not project_id:
        return None
    try:
        project = (
            db_session.query(UserProject)
            .filter(UserProject.id == project_id)
            .one_or_none()
        )
        if not project or not project.instructions:
            return None
        instructions = project.instructions.strip()
        return instructions or None
    except Exception:
        return None


def get_project_token_count(
    project_id: int | None,
    user_id: UUID | None,
    db_session: Session,
) -> int:
    """Return sum of token_count for all user files in the given project.

    If project_id is None, returns 0.
    """
    if project_id is None:
        return 0

    total_tokens = (
        db_session.query(func.coalesce(func.sum(UserFile.token_count), 0))
        .filter(
            UserFile.user_id == user_id,
            UserFile.projects.any(id=project_id),
        )
        .scalar()
        or 0
    )

    return int(total_tokens)


================================================
FILE: backend/onyx/db/pydantic_type.py
================================================
import json
from typing import Any
from typing import Optional
from typing import Type

from pydantic import BaseModel
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.types import TypeDecorator


class PydanticType(TypeDecorator):
    impl = JSONB

    def __init__(
        self, pydantic_model: Type[BaseModel], *args: Any, **kwargs: Any
    ) -> None:
        super().__init__(*args, **kwargs)
        self.pydantic_model = pydantic_model

    def process_bind_param(
        self,
        value: Optional[BaseModel],
        dialect: Any,  # noqa: ARG002
    ) -> Optional[dict]:
        if value is not None:
            return json.loads(value.json())
        return None

    def process_result_value(
        self,
        value: Optional[dict],
        dialect: Any,  # noqa: ARG002
    ) -> Optional[BaseModel]:
        if value is not None:
            return self.pydantic_model.parse_obj(value)
        return None


class PydanticListType(TypeDecorator):
    impl = JSONB

    def __init__(
        self, pydantic_model: Type[BaseModel], *args: Any, **kwargs: Any
    ) -> None:
        super().__init__(*args, **kwargs)
        self.pydantic_model = pydantic_model

    def process_bind_param(
        self,
        value: Optional[list[BaseModel]],
        dialect: Any,  # noqa: ARG002
    ) -> Optional[list[dict]]:
        if value is not None:
            return [json.loads(item.model_dump_json()) for item in value]
        return None

    def process_result_value(
        self,
        value: Optional[list[dict]],
        dialect: Any,  # noqa: ARG002
    ) -> Optional[list[BaseModel]]:
        if value is not None:
            return [self.pydantic_model.model_validate(item) for item in value]
        return None


================================================
FILE: backend/onyx/db/relationships.py
================================================
from typing import List

from sqlalchemy import or_
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.orm import Session

import onyx.db.document as dbdocument
from onyx.db.models import KGEntity
from onyx.db.models import KGEntityExtractionStaging
from onyx.db.models import KGRelationship
from onyx.db.models import KGRelationshipExtractionStaging
from onyx.db.models import KGRelationshipType
from onyx.db.models import KGRelationshipTypeExtractionStaging
from onyx.db.models import KGStage
from onyx.kg.utils.formatting_utils import extract_relationship_type_id
from onyx.kg.utils.formatting_utils import format_relationship_id
from onyx.kg.utils.formatting_utils import get_entity_type
from onyx.kg.utils.formatting_utils import make_relationship_id
from onyx.kg.utils.formatting_utils import make_relationship_type_id
from onyx.kg.utils.formatting_utils import split_relationship_id
from onyx.utils.logger import setup_logger

logger = setup_logger()


def upsert_staging_relationship(
    db_session: Session,
    relationship_id_name: str,
    source_document_id: str | None,
    occurrences: int = 1,
) -> KGRelationshipExtractionStaging:
    """
    Add or update a new staging relationship to the database.

    Args:
        db_session: SQLAlchemy database session
        relationship_id_name: The ID name of the relationship in format "source__relationship__target"
        source_document_id: ID of the source document
        occurrences: Number of times this relationship has been found
    Returns:
        The created or updated KGRelationshipExtractionStaging object

    Raises:
        sqlalchemy.exc.IntegrityError: If there's an error with the database operation
    """
    # Generate a unique ID for the relationship
    relationship_id_name = format_relationship_id(relationship_id_name)
    (
        source_entity_id_name,
        relationship_string,
        target_entity_id_name,
    ) = split_relationship_id(relationship_id_name)

    source_entity_type = get_entity_type(source_entity_id_name)
    target_entity_type = get_entity_type(target_entity_id_name)
    relationship_type = extract_relationship_type_id(relationship_id_name)

    # Insert the new relationship
    stmt = (
        postgresql.insert(KGRelationshipExtractionStaging)
        .values(
            {
                "id_name": relationship_id_name,
                "source_node": source_entity_id_name,
                "target_node": target_entity_id_name,
                "source_node_type": source_entity_type,
                "target_node_type": target_entity_type,
                "type": relationship_string.lower(),
                "relationship_type_id_name": relationship_type,
                "source_document": source_document_id,
                "occurrences": occurrences,
            }
        )
        .on_conflict_do_update(
            index_elements=["id_name", "source_document"],
            set_=dict(
                occurrences=KGRelationshipExtractionStaging.occurrences + occurrences,
            ),
        )
        .returning(KGRelationshipExtractionStaging)
    )

    result = db_session.execute(stmt).scalar()
    if result is None:
        raise RuntimeError(
            f"Failed to create or increment staging relationship with id_name: {relationship_id_name}"
        )

    # Update the document's kg_stage if source_document is provided
    if source_document_id is not None:
        dbdocument.update_document_kg_info(
            db_session,
            document_id=source_document_id,
            kg_stage=KGStage.EXTRACTED,
        )
    db_session.flush()  # Flush to get any DB errors early

    return result


def upsert_relationship(
    db_session: Session,
    relationship_id_name: str,
    source_document_id: str | None,
    occurrences: int = 1,
) -> KGRelationship:
    """
    Upsert a new relationship directly to the database.

    Args:
        db_session: SQLAlchemy database session
        relationship_id_name: The ID name of the relationship in format "source__relationship__target"
        source_document_id: ID of the source document
        occurrences: Number of times this relationship has been found
    Returns:
        The created or updated KGRelationship object

    Raises:
        sqlalchemy.exc.IntegrityError: If there's an error with the database operation
    """
    # Generate a unique ID for the relationship
    relationship_id_name = format_relationship_id(relationship_id_name)
    (
        source_entity_id_name,
        relationship_string,
        target_entity_id_name,
    ) = split_relationship_id(relationship_id_name)

    source_entity_type = get_entity_type(source_entity_id_name)
    target_entity_type = get_entity_type(target_entity_id_name)
    relationship_type = extract_relationship_type_id(relationship_id_name)

    # Insert the new relationship
    stmt = (
        postgresql.insert(KGRelationship)
        .values(
            {
                "id_name": relationship_id_name,
                "source_node": source_entity_id_name,
                "target_node": target_entity_id_name,
                "source_node_type": source_entity_type,
                "target_node_type": target_entity_type,
                "type": relationship_string.lower(),
                "relationship_type_id_name": relationship_type,
                "source_document": source_document_id,
                "occurrences": occurrences,
            }
        )
        .on_conflict_do_update(
            index_elements=["id_name", "source_document"],
            set_=dict(
                occurrences=KGRelationship.occurrences + occurrences,
            ),
        )
        .returning(KGRelationship)
    )

    new_relationship = db_session.execute(stmt).scalar()
    if new_relationship is None:
        raise RuntimeError(
            f"Failed to upsert relationship with id_name: {relationship_id_name}"
        )
    db_session.flush()
    return new_relationship


def transfer_relationship(
    db_session: Session,
    relationship: KGRelationshipExtractionStaging,
    entity_translations: dict[str, str],
) -> KGRelationship:
    """
    Transfer a relationship from the staging table to the normalized table.
    """
    # Translate the source and target nodes
    source_node = entity_translations[relationship.source_node]
    target_node = entity_translations[relationship.target_node]
    relationship_id_name = make_relationship_id(
        source_node, relationship.type, target_node
    )

    # Create the transferred relationship
    stmt = (
        pg_insert(KGRelationship)
        .values(
            id_name=relationship_id_name,
            source_node=source_node,
            target_node=target_node,
            source_node_type=relationship.source_node_type,
            target_node_type=relationship.target_node_type,
            type=relationship.type,
            relationship_type_id_name=relationship.relationship_type_id_name,
            source_document=relationship.source_document,
            occurrences=relationship.occurrences,
        )
        .on_conflict_do_update(
            index_elements=["id_name", "source_document"],
            set_=dict(
                occurrences=KGRelationship.occurrences + relationship.occurrences,
            ),
        )
        .returning(KGRelationship)
    )

    new_relationship = db_session.execute(stmt).scalar()
    if new_relationship is None:
        raise RuntimeError(
            f"Failed to transfer relationship with id_name: {relationship.id_name}"
        )

    # Update transferred
    db_session.query(KGRelationshipExtractionStaging).filter(
        KGRelationshipExtractionStaging.id_name == relationship.id_name,
        KGRelationshipExtractionStaging.source_document == relationship.source_document,
    ).update({"transferred": True})
    db_session.flush()

    return new_relationship


def upsert_staging_relationship_type(
    db_session: Session,
    source_entity_type: str,
    relationship_type: str,
    target_entity_type: str,
    definition: bool = False,
    extraction_count: int = 1,
) -> KGRelationshipTypeExtractionStaging:
    """
    Add a new relationship type to the database.

    Args:
        db_session: SQLAlchemy session
        source_entity_type: Type of the source entity
        relationship_type: Type of relationship
        target_entity_type: Type of the target entity
        definition: Whether this relationship type represents a definition (default False)

    Returns:
        The created KGRelationshipTypeExtractionStaging object
    """

    id_name = make_relationship_type_id(
        source_entity_type, relationship_type, target_entity_type
    )

    # Create new relationship type
    stmt = (
        postgresql.insert(KGRelationshipTypeExtractionStaging)
        .values(
            {
                "id_name": id_name,
                "name": relationship_type,
                "source_entity_type_id_name": source_entity_type.upper(),
                "target_entity_type_id_name": target_entity_type.upper(),
                "definition": definition,
                "occurrences": extraction_count,
                "type": relationship_type,  # Using the relationship_type as the type
                "active": True,  # Setting as active by default
            }
        )
        .on_conflict_do_update(
            index_elements=["id_name"],
            set_=dict(
                occurrences=KGRelationshipTypeExtractionStaging.occurrences
                + extraction_count,
            ),
        )
        .returning(KGRelationshipTypeExtractionStaging)
    )

    result = db_session.execute(stmt).scalar()
    if result is None:
        raise RuntimeError(
            f"Failed to create or increment staging relationship type with id_name: {id_name}"
        )
    db_session.flush()  # Flush to get any DB errors early

    return result


def upsert_relationship_type(
    db_session: Session,
    source_entity_type: str,
    relationship_type: str,
    target_entity_type: str,
    definition: bool = False,
    extraction_count: int = 1,
) -> KGRelationshipType:
    """
    Upsert a new relationship type directly to the database.

    Args:
        db_session: SQLAlchemy session
        source_entity_type: Type of the source entity
        relationship_type: Type of relationship
        target_entity_type: Type of the target entity
        definition: Whether this relationship type represents a definition (default False)

    Returns:
        The created KGRelationshipType object
    """

    id_name = make_relationship_type_id(
        source_entity_type, relationship_type, target_entity_type
    )

    # Create new relationship type
    stmt = (
        postgresql.insert(KGRelationshipType)
        .values(
            {
                "id_name": id_name,
                "name": relationship_type,
                "source_entity_type_id_name": source_entity_type.upper(),
                "target_entity_type_id_name": target_entity_type.upper(),
                "definition": definition,
                "occurrences": extraction_count,
                "type": relationship_type,  # Using the relationship_type as the type
                "active": True,  # Setting as active by default
            }
        )
        .on_conflict_do_update(
            index_elements=["id_name"],
            set_=dict(
                occurrences=KGRelationshipType.occurrences + extraction_count,
            ),
        )
        .returning(KGRelationshipType)
    )

    new_relationship_type = db_session.execute(stmt).scalar()
    if new_relationship_type is None:
        raise RuntimeError(
            f"Failed to upsert relationship type with id_name: {id_name}"
        )
    db_session.flush()
    return new_relationship_type


def transfer_relationship_type(
    db_session: Session,
    relationship_type: KGRelationshipTypeExtractionStaging,
) -> KGRelationshipType:
    """
    Transfer a relationship type from the staging table to the normalized table.
    """
    stmt = (
        pg_insert(KGRelationshipType)
        .values(
            id_name=relationship_type.id_name,
            name=relationship_type.name,
            source_entity_type_id_name=relationship_type.source_entity_type_id_name,
            target_entity_type_id_name=relationship_type.target_entity_type_id_name,
            definition=relationship_type.definition,
            occurrences=relationship_type.occurrences,
            type=relationship_type.type,
            active=relationship_type.active,
        )
        .on_conflict_do_update(
            index_elements=["id_name"],
            set_=dict(
                occurrences=KGRelationshipType.occurrences
                + relationship_type.occurrences,
            ),
        )
        .returning(KGRelationshipType)
    )

    new_relationship_type = db_session.execute(stmt).scalar()
    if new_relationship_type is None:
        raise RuntimeError(
            f"Failed to transfer relationship type with id_name: {relationship_type.id_name}"
        )

    # Update transferred
    db_session.query(KGRelationshipTypeExtractionStaging).filter(
        KGRelationshipTypeExtractionStaging.id_name == relationship_type.id_name
    ).update({"transferred": True})
    db_session.flush()

    return new_relationship_type


def delete_relationships_by_id_names(
    db_session: Session, id_names: list[str], kg_stage: KGStage
) -> int:
    """
    Delete relationships from the database based on a list of id_names.

    Args:
        db_session: SQLAlchemy database session
        id_names: List of relationship id_names to delete

    Returns:
        Number of relationships deleted

    Raises:
        sqlalchemy.exc.SQLAlchemyError: If there's an error during deletion
    """

    deleted_count = 0

    if kg_stage == KGStage.EXTRACTED:
        deleted_count = (
            db_session.query(KGRelationshipExtractionStaging)
            .filter(KGRelationshipExtractionStaging.id_name.in_(id_names))
            .delete(synchronize_session=False)
        )
    elif kg_stage == KGStage.NORMALIZED:
        deleted_count = (
            db_session.query(KGRelationship)
            .filter(KGRelationship.id_name.in_(id_names))
            .delete(synchronize_session=False)
        )

    db_session.flush()  # Flush to ensure deletion is processed
    return deleted_count


def delete_relationship_types_by_id_names(
    db_session: Session, id_names: list[str], kg_stage: KGStage
) -> int:
    """
    Delete relationship types from the database based on a list of id_names.

    Args:
        db_session: SQLAlchemy database session
        id_names: List of relationship type id_names to delete

    Returns:
        Number of relationship types deleted

    Raises:
        sqlalchemy.exc.SQLAlchemyError: If there's an error during deletion
    """
    deleted_count = 0

    if kg_stage == KGStage.EXTRACTED:
        deleted_count = (
            db_session.query(KGRelationshipTypeExtractionStaging)
            .filter(KGRelationshipTypeExtractionStaging.id_name.in_(id_names))
            .delete(synchronize_session=False)
        )
    elif kg_stage == KGStage.NORMALIZED:
        deleted_count = (
            db_session.query(KGRelationshipType)
            .filter(KGRelationshipType.id_name.in_(id_names))
            .delete(synchronize_session=False)
        )

    db_session.flush()  # Flush to ensure deletion is processed
    return deleted_count


def get_relationships_for_entity_type_pairs(
    db_session: Session, entity_type_pairs: list[tuple[str, str]]
) -> list["KGRelationshipType"]:
    """
    Get relationship types from the database based on a list of entity type pairs.

    Args:
        db_session: SQLAlchemy database session
        entity_type_pairs: List of tuples where each tuple contains (source_entity_type, target_entity_type)

    Returns:
        List of KGRelationshipType objects where source and target types match the provided pairs
    """

    conditions = [
        (
            (KGRelationshipType.source_entity_type_id_name == source_type)
            & (KGRelationshipType.target_entity_type_id_name == target_type)
        )
        for source_type, target_type in entity_type_pairs
    ]

    return db_session.query(KGRelationshipType).filter(or_(*conditions)).all()


def get_allowed_relationship_type_pairs(
    db_session: Session, entities: list[str]
) -> list[str]:
    """
    Get the allowed relationship pairs for the given entities.

    Args:
        db_session: SQLAlchemy database session
        entities: List of entity type ID names to filter by

    Returns:
        List of id_names from KGRelationshipType where source or target entity types
        are in the provided entities list. We also filter out for now the catch-all
        relationship types 'VENDOR__<relationship>__<target entity type>'
    """

    entity_types = list({get_entity_type(entity) for entity in entities})

    return [
        row[0]
        for row in (
            db_session.query(KGRelationshipType.id_name)
            .filter(
                or_(
                    KGRelationshipType.source_entity_type_id_name.in_(entity_types),
                    KGRelationshipType.target_entity_type_id_name.in_(entity_types),
                )
            )
            .filter(~KGRelationshipType.source_entity_type_id_name.like("VENDOR::%"))
            .distinct()
            .all()
        )
    ]


def get_relationships_of_entity(db_session: Session, entity_id: str) -> List[str]:
    """Get all relationship ID names where the given entity is either the source or target node.

    Args:
        db_session: SQLAlchemy session
        entity_id: ID of the entity to find relationships for

    Returns:
        List of relationship ID names where the entity is either source or target
    """
    return [
        row[0]
        for row in (
            db_session.query(KGRelationship.id_name)
            .filter(
                or_(
                    KGRelationship.source_node == entity_id,
                    KGRelationship.target_node == entity_id,
                )
            )
            .all()
        )
    ]


def get_relationship_types_of_entity_types(
    db_session: Session, entity_types_id: str
) -> List[str]:
    """Get all relationship ID names where the given entity is either the source or target node.

    Args:
        db_session: SQLAlchemy session
        entity_types_id: ID of the entity to find relationships for

    Returns:
        List of relationship ID names where the entity is either source or target
    """

    if entity_types_id.endswith(":*"):
        entity_types_id = entity_types_id[:-2]

    return [
        row[0]
        for row in (
            db_session.query(KGRelationshipType.id_name)
            .filter(
                or_(
                    KGRelationshipType.source_entity_type_id_name == entity_types_id,
                    KGRelationshipType.target_entity_type_id_name == entity_types_id,
                )
            )
            .all()
        )
    ]


def delete_document_references_from_kg(db_session: Session, document_id: str) -> None:
    # Delete relationships from normalized stage
    db_session.query(KGRelationship).filter(
        KGRelationship.source_document == document_id
    ).delete(synchronize_session=False)

    # Delete relationships from extraction staging
    db_session.query(KGRelationshipExtractionStaging).filter(
        KGRelationshipExtractionStaging.source_document == document_id
    ).delete(synchronize_session=False)

    # Delete entities from normalized stage
    db_session.query(KGEntity).filter(KGEntity.document_id == document_id).delete(
        synchronize_session=False
    )

    # Delete entities from extraction staging
    db_session.query(KGEntityExtractionStaging).filter(
        KGEntityExtractionStaging.document_id == document_id
    ).delete(synchronize_session=False)

    db_session.flush()


def delete_from_kg_relationships_extraction_staging__no_commit(
    db_session: Session, document_ids: list[str]
) -> None:
    """Delete relationships from the extraction staging table."""
    db_session.query(KGRelationshipExtractionStaging).filter(
        KGRelationshipExtractionStaging.source_document.in_(document_ids)
    ).delete(synchronize_session=False)


def delete_from_kg_relationships__no_commit(
    db_session: Session, document_ids: list[str]
) -> None:
    """Delete relationships from the normalized table."""
    db_session.query(KGRelationship).filter(
        KGRelationship.source_document.in_(document_ids)
    ).delete(synchronize_session=False)


================================================
FILE: backend/onyx/db/release_notes.py
================================================
"""Database functions for release notes functionality."""

from urllib.parse import urlencode

from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.configs.app_configs import INSTANCE_TYPE
from onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN
from onyx.configs.constants import NotificationType
from onyx.configs.constants import ONYX_UTM_SOURCE
from onyx.db.enums import AccountType
from onyx.db.models import User
from onyx.db.notification import batch_create_notifications
from onyx.server.features.release_notes.constants import DOCS_CHANGELOG_BASE_URL
from onyx.server.features.release_notes.models import ReleaseNoteEntry
from onyx.utils.logger import setup_logger

logger = setup_logger()


def create_release_notifications_for_versions(
    db_session: Session,
    release_note_entries: list[ReleaseNoteEntry],
) -> int:
    """
    Create release notes notifications for each release note entry.
    Uses batch_create_notifications for efficient bulk insertion.

    If a user already has a notification for a specific version (dismissed or not),
    no new one is created (handled by unique constraint on additional_data).

    Note: Entries should already be filtered by app_version before calling this
    function. The filtering happens in _parse_mdx_to_release_note_entries().

    Args:
        db_session: Database session
        release_note_entries: List of release note entries to notify about (pre-filtered)

    Returns:
        Total number of notifications created across all versions.
    """
    if not release_note_entries:
        logger.debug("No release note entries to notify about")
        return 0

    # Get active users and exclude API key users
    user_ids = list(
        db_session.scalars(
            select(User.id).where(  # type: ignore
                User.is_active == True,  # noqa: E712
                User.account_type.notin_([AccountType.BOT, AccountType.EXT_PERM_USER]),
                User.email.endswith(DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN).is_(False),  # type: ignore[attr-defined]
            )
        ).all()
    )

    total_created = 0
    for entry in release_note_entries:
        # Convert version to anchor format for external docs links
        # v2.7.0 -> v2-7-0
        version_anchor = entry.version.replace(".", "-")

        # Build UTM parameters for tracking
        utm_params = {
            "utm_source": ONYX_UTM_SOURCE,
            "utm_medium": "notification",
            "utm_campaign": INSTANCE_TYPE,
            "utm_content": f"release_notes-{entry.version}",
        }

        link = f"{DOCS_CHANGELOG_BASE_URL}#{version_anchor}?{urlencode(utm_params)}"

        additional_data: dict[str, str] = {
            "version": entry.version,
            "link": link,
        }

        created_count = batch_create_notifications(
            user_ids,
            NotificationType.RELEASE_NOTES,
            db_session,
            title=entry.title,
            description=f"Check out what's new in {entry.version}",
            additional_data=additional_data,
        )
        total_created += created_count

        logger.debug(
            f"Created {created_count} release notes notifications (version {entry.version}, {len(user_ids)} eligible users)"
        )

    return total_created


================================================
FILE: backend/onyx/db/rotate_encryption_key.py
================================================
"""Rotate encryption key for all encrypted columns.

Dynamically discovers all columns using EncryptedString / EncryptedJson,
decrypts each value with the old key, and re-encrypts with the current
ENCRYPTION_KEY_SECRET.

The operation is idempotent: rows already encrypted with the current key
are skipped. Commits are made in batches so a crash mid-rotation can be
safely resumed by re-running.
"""

import json
from typing import Any

from sqlalchemy import LargeBinary
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import Session

from onyx.configs.app_configs import ENCRYPTION_KEY_SECRET
from onyx.db.models import Base
from onyx.db.models import EncryptedJson
from onyx.db.models import EncryptedString
from onyx.utils.encryption import decrypt_bytes_to_string
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import global_version

logger = setup_logger()

_BATCH_SIZE = 500


def _can_decrypt_with_current_key(data: bytes) -> bool:
    """Check if data is already encrypted with the current key.

    Passes the key explicitly so the fallback-to-raw-decode path in
    _decrypt_bytes is NOT triggered — a clean success/failure signal.
    """
    try:
        decrypt_bytes_to_string(data, key=ENCRYPTION_KEY_SECRET)
        return True
    except Exception:
        return False


def _discover_encrypted_columns() -> list[tuple[type, str, list[str], bool]]:
    """Walk all ORM models and find columns using EncryptedString/EncryptedJson.

    Returns list of (ModelClass, column_attr_name, [pk_attr_names], is_json).
    """
    results: list[tuple[type, str, list[str], bool]] = []

    for mapper in Base.registry.mappers:
        model_cls = mapper.class_
        pk_names = [col.key for col in mapper.primary_key]

        for prop in mapper.column_attrs:
            for col in prop.columns:
                if isinstance(col.type, EncryptedJson):
                    results.append((model_cls, prop.key, pk_names, True))
                elif isinstance(col.type, EncryptedString):
                    results.append((model_cls, prop.key, pk_names, False))

    return results


def rotate_encryption_key(
    db_session: Session,
    old_key: str | None,
    dry_run: bool = False,
) -> dict[str, int]:
    """Decrypt all encrypted columns with old_key and re-encrypt with the current key.

    Args:
        db_session: Active database session.
        old_key: The previous encryption key. Pass None or "" if values were
                 not previously encrypted with a key.
        dry_run: If True, count rows that need rotation without modifying data.

    Returns:
        Dict of "table.column" -> number of rows re-encrypted (or would be).

    Commits every _BATCH_SIZE rows so that locks are held briefly and progress
    is preserved on crash. Already-rotated rows are detected and skipped,
    making the operation safe to re-run.
    """
    if not global_version.is_ee_version():
        raise RuntimeError("EE mode is not enabled — rotation requires EE encryption.")

    if not ENCRYPTION_KEY_SECRET:
        raise RuntimeError(
            "ENCRYPTION_KEY_SECRET is not set — cannot rotate. Set the target encryption key in the environment before running."
        )

    encrypted_columns = _discover_encrypted_columns()
    totals: dict[str, int] = {}

    for model_cls, col_name, pk_names, is_json in encrypted_columns:
        table_name: str = model_cls.__tablename__  # type: ignore[attr-defined]
        col_attr = getattr(model_cls, col_name)
        pk_attrs = [getattr(model_cls, pk) for pk in pk_names]

        # Read raw bytes directly, bypassing the TypeDecorator
        raw_col = col_attr.property.columns[0]

        stmt = select(*pk_attrs, raw_col.cast(LargeBinary)).where(col_attr.is_not(None))
        rows = db_session.execute(stmt).all()

        reencrypted = 0
        batch_pending = 0
        for row in rows:
            raw_bytes: bytes | None = row[-1]
            if raw_bytes is None:
                continue

            if _can_decrypt_with_current_key(raw_bytes):
                continue

            try:
                if not old_key:
                    decrypted_str = raw_bytes.decode("utf-8")
                else:
                    decrypted_str = decrypt_bytes_to_string(raw_bytes, key=old_key)

                # For EncryptedJson, parse back to dict so the TypeDecorator
                # can json.dumps() it cleanly (avoids double-encoding).
                value: Any = json.loads(decrypted_str) if is_json else decrypted_str
            except (ValueError, UnicodeDecodeError) as e:
                pk_vals = [row[i] for i in range(len(pk_names))]
                logger.warning(
                    f"Could not decrypt/parse {table_name}.{col_name} row {pk_vals} — skipping: {e}"
                )
                continue

            if not dry_run:
                pk_filters = [pk_attr == row[i] for i, pk_attr in enumerate(pk_attrs)]
                update_stmt = (
                    update(model_cls).where(*pk_filters).values({col_name: value})
                )
                db_session.execute(update_stmt)
                batch_pending += 1

                if batch_pending >= _BATCH_SIZE:
                    db_session.commit()
                    batch_pending = 0
            reencrypted += 1

        # Flush remaining rows in this column
        if batch_pending > 0:
            db_session.commit()

        if reencrypted > 0:
            totals[f"{table_name}.{col_name}"] = reencrypted
            logger.info(
                f"{'[DRY RUN] Would re-encrypt' if dry_run else 'Re-encrypted'} {reencrypted} value(s) in {table_name}.{col_name}"
            )

    return totals


================================================
FILE: backend/onyx/db/saml.py
================================================
import datetime
from typing import cast
from uuid import UUID

from sqlalchemy import and_
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS
from onyx.db.models import SamlAccount


def upsert_saml_account(
    user_id: UUID,
    cookie: str,
    db_session: Session,
    expiration_offset: int = SESSION_EXPIRE_TIME_SECONDS,
) -> datetime.datetime:
    expires_at = func.now() + datetime.timedelta(seconds=expiration_offset)

    existing_saml_acc = (
        db_session.query(SamlAccount)
        .filter(SamlAccount.user_id == user_id)
        .one_or_none()
    )

    if existing_saml_acc:
        existing_saml_acc.encrypted_cookie = cookie
        existing_saml_acc.expires_at = cast(datetime.datetime, expires_at)
        existing_saml_acc.updated_at = func.now()
        saml_acc = existing_saml_acc
    else:
        saml_acc = SamlAccount(
            user_id=user_id,
            encrypted_cookie=cookie,
            expires_at=expires_at,
        )
        db_session.add(saml_acc)

    db_session.commit()

    return saml_acc.expires_at


async def get_saml_account(
    cookie: str, async_db_session: AsyncSession
) -> SamlAccount | None:
    """NOTE: this is async, since it's used during auth
    (which is necessarily async due to FastAPI Users)"""
    stmt = (
        select(SamlAccount)
        .options(selectinload(SamlAccount.user))  # Use selectinload for collections
        .where(
            and_(
                SamlAccount.encrypted_cookie == cookie,
                SamlAccount.expires_at > func.now(),
            )
        )
    )

    result = await async_db_session.execute(stmt)
    return result.scalars().unique().one_or_none()


async def expire_saml_account(
    saml_account: SamlAccount, async_db_session: AsyncSession
) -> None:
    saml_account.expires_at = func.now()
    await async_db_session.commit()


================================================
FILE: backend/onyx/db/search_settings.py
================================================
from sqlalchemy import and_
from sqlalchemy import delete
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.configs.model_configs import DEFAULT_DOCUMENT_ENCODER_MODEL
from onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL
from onyx.context.search.models import SavedSearchSettings
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.llm import fetch_embedding_provider
from onyx.db.models import CloudEmbeddingProvider
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexModelStatus
from onyx.db.models import SearchSettings
from onyx.server.manage.embedding.models import (
    CloudEmbeddingProvider as ServerCloudEmbeddingProvider,
)
from onyx.utils.logger import setup_logger
from shared_configs.configs import PRESERVED_SEARCH_FIELDS
from shared_configs.enums import EmbeddingProvider


logger = setup_logger()


class ActiveSearchSettings:
    primary: SearchSettings
    secondary: SearchSettings | None

    def __init__(
        self, primary: SearchSettings, secondary: SearchSettings | None
    ) -> None:
        self.primary = primary
        self.secondary = secondary


def create_search_settings(
    search_settings: SavedSearchSettings,
    db_session: Session,
    status: IndexModelStatus = IndexModelStatus.FUTURE,
) -> SearchSettings:
    embedding_model = SearchSettings(
        model_name=search_settings.model_name,
        model_dim=search_settings.model_dim,
        normalize=search_settings.normalize,
        query_prefix=search_settings.query_prefix,
        passage_prefix=search_settings.passage_prefix,
        status=status,
        index_name=search_settings.index_name,
        provider_type=search_settings.provider_type,
        multipass_indexing=search_settings.multipass_indexing,
        embedding_precision=search_settings.embedding_precision,
        reduced_dimension=search_settings.reduced_dimension,
        enable_contextual_rag=search_settings.enable_contextual_rag,
        contextual_rag_llm_name=search_settings.contextual_rag_llm_name,
        contextual_rag_llm_provider=search_settings.contextual_rag_llm_provider,
        switchover_type=search_settings.switchover_type,
    )

    db_session.add(embedding_model)
    db_session.commit()

    return embedding_model


def get_embedding_provider_from_provider_type(
    db_session: Session, provider_type: EmbeddingProvider
) -> CloudEmbeddingProvider | None:
    query = select(CloudEmbeddingProvider).where(
        CloudEmbeddingProvider.provider_type == provider_type
    )
    provider = db_session.execute(query).scalars().first()
    return provider if provider else None


def get_current_db_embedding_provider(
    db_session: Session,
) -> ServerCloudEmbeddingProvider | None:
    search_settings = get_current_search_settings(db_session=db_session)

    if search_settings.provider_type is None:
        return None

    embedding_provider = fetch_embedding_provider(
        db_session=db_session,
        provider_type=search_settings.provider_type,
    )
    if embedding_provider is None:
        raise RuntimeError("No embedding provider exists for this model.")

    current_embedding_provider = ServerCloudEmbeddingProvider.from_request(
        cloud_provider_model=embedding_provider
    )

    return current_embedding_provider


def delete_search_settings(db_session: Session, search_settings_id: int) -> None:
    current_settings = get_current_search_settings(db_session)

    if current_settings.id == search_settings_id:
        raise ValueError("Cannot delete currently active search settings")

    # First, delete associated index attempts
    index_attempts_query = delete(IndexAttempt).where(
        IndexAttempt.search_settings_id == search_settings_id
    )
    db_session.execute(index_attempts_query)

    # Then, delete the search settings
    search_settings_query = delete(SearchSettings).where(
        and_(
            SearchSettings.id == search_settings_id,
            SearchSettings.status != IndexModelStatus.PRESENT,
        )
    )

    db_session.execute(search_settings_query)
    db_session.commit()


def get_current_search_settings(db_session: Session) -> SearchSettings:
    query = (
        select(SearchSettings)
        .where(SearchSettings.status == IndexModelStatus.PRESENT)
        .order_by(SearchSettings.id.desc())
    )
    result = db_session.execute(query)
    latest_settings = result.scalars().first()

    if not latest_settings:
        raise RuntimeError("No search settings specified; DB is not in a valid state.")
    return latest_settings


def get_secondary_search_settings(db_session: Session) -> SearchSettings | None:
    query = (
        select(SearchSettings)
        .where(SearchSettings.status == IndexModelStatus.FUTURE)
        .order_by(SearchSettings.id.desc())
    )
    result = db_session.execute(query)
    latest_settings = result.scalars().first()

    return latest_settings


def get_active_search_settings(db_session: Session) -> ActiveSearchSettings:
    """Returns active search settings. Secondary search settings may be None."""

    # Get the primary and secondary search settings
    primary_search_settings = get_current_search_settings(db_session)
    secondary_search_settings = get_secondary_search_settings(db_session)
    return ActiveSearchSettings(
        primary=primary_search_settings, secondary=secondary_search_settings
    )


def get_active_search_settings_list(db_session: Session) -> list[SearchSettings]:
    """Returns active search settings as a list. Primary settings are the first element,
    and if secondary search settings exist, they will be the second element."""

    search_settings_list: list[SearchSettings] = []

    active_search_settings = get_active_search_settings(db_session)
    search_settings_list.append(active_search_settings.primary)
    if active_search_settings.secondary:
        search_settings_list.append(active_search_settings.secondary)

    return search_settings_list


def get_all_search_settings(db_session: Session) -> list[SearchSettings]:
    query = select(SearchSettings).order_by(SearchSettings.id.desc())
    result = db_session.execute(query)
    all_settings = result.scalars().all()
    return list(all_settings)


def get_multilingual_expansion(db_session: Session | None = None) -> list[str]:
    if db_session is None:
        with get_session_with_current_tenant() as db_session:
            search_settings = get_current_search_settings(db_session)
    else:
        search_settings = get_current_search_settings(db_session)
    if not search_settings:
        return []
    return search_settings.multilingual_expansion


def update_search_settings(
    current_settings: SearchSettings,
    updated_settings: SavedSearchSettings,
    preserved_fields: list[str],
) -> None:
    for field, value in updated_settings.dict().items():
        if field not in preserved_fields:
            setattr(current_settings, field, value)


def update_current_search_settings(
    db_session: Session,
    search_settings: SavedSearchSettings,
    preserved_fields: list[str] = PRESERVED_SEARCH_FIELDS,
) -> None:
    current_settings = get_current_search_settings(db_session)
    if not current_settings:
        logger.warning("No current search settings found to update")
        return

    update_search_settings(current_settings, search_settings, preserved_fields)
    db_session.commit()
    logger.info("Current search settings updated successfully")


def update_secondary_search_settings(
    db_session: Session,
    search_settings: SavedSearchSettings,
    preserved_fields: list[str] = PRESERVED_SEARCH_FIELDS,
) -> None:
    secondary_settings = get_secondary_search_settings(db_session)
    if not secondary_settings:
        logger.warning("No secondary search settings found to update")
        return

    preserved_fields = PRESERVED_SEARCH_FIELDS
    update_search_settings(secondary_settings, search_settings, preserved_fields)

    db_session.commit()
    logger.info("Secondary search settings updated successfully")


def update_search_settings_status(
    search_settings: SearchSettings, new_status: IndexModelStatus, db_session: Session
) -> None:
    search_settings.status = new_status
    db_session.commit()


def user_has_overridden_embedding_model() -> bool:
    return DOCUMENT_ENCODER_MODEL != DEFAULT_DOCUMENT_ENCODER_MODEL


================================================
FILE: backend/onyx/db/seeding/chat_history_seeding.py
================================================
import random
from datetime import datetime
from datetime import timedelta
from logging import getLogger
from uuid import UUID

from onyx.configs.constants import MessageType
from onyx.db.chat import create_chat_session
from onyx.db.chat import create_new_chat_message
from onyx.db.chat import get_or_create_root_message
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import ChatSession

logger = getLogger(__name__)


def seed_chat_history(
    num_sessions: int,
    num_messages: int,
    days: int,
    user_id: UUID | None = None,
    persona_id: int | None = None,
) -> None:
    """Utility function to seed chat history for testing.

    num_sessions: the number of sessions to seed
    num_messages: the number of messages to seed per sessions
    days: the number of days looking backwards from the current time over which to randomize
    the times.
    user_id: optional user to associate with sessions
    persona_id: optional persona/assistant to associate with sessions
    """
    with get_session_with_current_tenant() as db_session:
        logger.info(f"Seeding {num_sessions} sessions.")
        for y in range(0, num_sessions):
            create_chat_session(db_session, f"pytest_session_{y}", user_id, persona_id)

        # randomize all session times
        logger.info(f"Seeding {num_messages} messages per session.")
        rows = db_session.query(ChatSession).all()
        for x in range(0, len(rows)):
            if x % 1024 == 0:
                logger.info(f"Seeded messages for {x} sessions so far.")

            row = rows[x]
            row.time_created = datetime.utcnow() - timedelta(
                days=random.randint(0, days)
            )
            row.time_updated = row.time_created + timedelta(
                minutes=random.randint(0, 10)
            )

            root_message = get_or_create_root_message(row.id, db_session)

            current_message_type = MessageType.USER
            parent_message = root_message
            for x in range(0, num_messages):
                if current_message_type == MessageType.USER:
                    msg = f"pytest_message_user_{x}"
                else:
                    msg = f"pytest_message_assistant_{x}"

                chat_message = create_new_chat_message(
                    chat_session_id=row.id,
                    parent_message=parent_message,
                    message=msg,
                    token_count=0,
                    message_type=current_message_type,
                    commit=False,
                    db_session=db_session,
                )

                chat_message.time_sent = row.time_created + timedelta(
                    minutes=random.randint(0, 10)
                )

                db_session.commit()

                current_message_type = (
                    MessageType.ASSISTANT
                    if current_message_type == MessageType.USER
                    else MessageType.USER
                )
                parent_message = chat_message

        db_session.commit()

        logger.info(f"Seeded messages for {len(rows)} sessions. Finished.")


================================================
FILE: backend/onyx/db/slack_bot.py
================================================
from collections.abc import Sequence

from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.models import SlackBot


def insert_slack_bot(
    db_session: Session,
    name: str,
    enabled: bool,
    bot_token: str,
    app_token: str,
    user_token: str | None = None,
) -> SlackBot:
    slack_bot = SlackBot(
        name=name,
        enabled=enabled,
        bot_token=bot_token,
        app_token=app_token,
        user_token=user_token,
    )
    db_session.add(slack_bot)
    db_session.commit()

    return slack_bot


def update_slack_bot(
    db_session: Session,
    slack_bot_id: int,
    name: str,
    enabled: bool,
    bot_token: str,
    app_token: str,
    user_token: str | None = None,
) -> SlackBot:
    slack_bot = db_session.scalar(select(SlackBot).where(SlackBot.id == slack_bot_id))
    if slack_bot is None:
        raise ValueError(f"Unable to find Slack Bot with ID {slack_bot_id}")

    # update the app
    slack_bot.name = name
    slack_bot.enabled = enabled
    slack_bot.bot_token = bot_token  # type: ignore[assignment]
    slack_bot.app_token = app_token  # type: ignore[assignment]
    slack_bot.user_token = user_token  # type: ignore[assignment]

    db_session.commit()

    return slack_bot


def fetch_slack_bot(
    db_session: Session,
    slack_bot_id: int,
) -> SlackBot:
    slack_bot = db_session.scalar(select(SlackBot).where(SlackBot.id == slack_bot_id))
    if slack_bot is None:
        raise ValueError(f"Unable to find Slack Bot with ID {slack_bot_id}")

    return slack_bot


def remove_slack_bot(
    db_session: Session,
    slack_bot_id: int,
) -> None:
    slack_bot = fetch_slack_bot(
        db_session=db_session,
        slack_bot_id=slack_bot_id,
    )

    db_session.delete(slack_bot)
    db_session.commit()


def fetch_slack_bots(db_session: Session) -> Sequence[SlackBot]:
    return db_session.scalars(select(SlackBot)).all()


================================================
FILE: backend/onyx/db/slack_channel_config.py
================================================
from collections.abc import Sequence
from typing import Any

from sqlalchemy import select
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session

from onyx.db.constants import DEFAULT_PERSONA_SLACK_CHANNEL_NAME
from onyx.db.constants import SLACK_BOT_PERSONA_PREFIX
from onyx.db.models import ChannelConfig
from onyx.db.models import Persona
from onyx.db.models import Persona__DocumentSet
from onyx.db.models import SlackChannelConfig
from onyx.db.models import User
from onyx.db.persona import mark_persona_as_deleted
from onyx.db.persona import upsert_persona
from onyx.db.tools import get_builtin_tool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.utils.errors import EERequiredError
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)


def _build_persona_name(channel_name: str | None) -> str:
    return f"{SLACK_BOT_PERSONA_PREFIX}{channel_name if channel_name else DEFAULT_PERSONA_SLACK_CHANNEL_NAME}"


def _cleanup_relationships(db_session: Session, persona_id: int) -> None:
    """NOTE: does not commit changes"""
    # delete existing persona-document_set relationships
    existing_relationships = db_session.scalars(
        select(Persona__DocumentSet).where(
            Persona__DocumentSet.persona_id == persona_id
        )
    )
    for rel in existing_relationships:
        db_session.delete(rel)


def create_slack_channel_persona(
    db_session: Session,
    channel_name: str | None,
    document_set_ids: list[int],
    existing_persona_id: int | None = None,
) -> Persona:
    """NOTE: does not commit changes"""

    search_tool = get_builtin_tool(db_session=db_session, tool_type=SearchTool)

    # create/update persona associated with the Slack channel
    persona_name = _build_persona_name(channel_name)
    persona_id_to_update = existing_persona_id
    if persona_id_to_update is None:
        # Reuse any previous Slack persona for this channel (even if the config was
        # temporarily switched to a different persona) so we don't trip duplicate name
        # validation inside `upsert_persona`.
        existing_persona = db_session.scalar(
            select(Persona).where(Persona.name == persona_name)
        )
        if existing_persona:
            persona_id_to_update = existing_persona.id

    persona = upsert_persona(
        user=None,  # Slack channel Personas are not attached to users
        persona_id=persona_id_to_update,
        name=persona_name,
        description="",
        system_prompt="",
        task_prompt="",
        datetime_aware=True,
        tool_ids=[search_tool.id],
        document_set_ids=document_set_ids,
        llm_model_provider_override=None,
        llm_model_version_override=None,
        starter_messages=None,
        is_public=True,
        is_featured=False,
        db_session=db_session,
        commit=False,
    )

    return persona


def _no_ee_standard_answer_categories(
    *args: Any,  # noqa: ARG001
    **kwargs: Any,  # noqa: ARG001
) -> list:
    return []


def insert_slack_channel_config(
    db_session: Session,
    slack_bot_id: int,
    persona_id: int | None,
    channel_config: ChannelConfig,
    standard_answer_category_ids: list[int],
    enable_auto_filters: bool,
    is_default: bool = False,
) -> SlackChannelConfig:
    versioned_fetch_standard_answer_categories_by_ids = (
        fetch_versioned_implementation_with_fallback(
            "onyx.db.standard_answer",
            "fetch_standard_answer_categories_by_ids",
            _no_ee_standard_answer_categories,
        )
    )
    existing_standard_answer_categories = (
        versioned_fetch_standard_answer_categories_by_ids(
            standard_answer_category_ids=standard_answer_category_ids,
            db_session=db_session,
        )
    )

    if len(existing_standard_answer_categories) != len(standard_answer_category_ids):
        if len(existing_standard_answer_categories) == 0:
            raise EERequiredError(
                "Standard answers are a paid Enterprise Edition feature - enable EE or remove standard answer categories"
            )
        else:
            raise ValueError(
                f"Some or all categories with ids {standard_answer_category_ids} do not exist"
            )

    if is_default:
        existing_default = db_session.scalar(
            select(SlackChannelConfig).where(
                SlackChannelConfig.slack_bot_id == slack_bot_id,
                SlackChannelConfig.is_default is True,  # type: ignore
            )
        )
        if existing_default:
            raise ValueError("A default config already exists for this Slack bot.")
    else:
        if "channel_name" not in channel_config:
            raise ValueError("Channel name is required for non-default configs.")

    slack_channel_config = SlackChannelConfig(
        slack_bot_id=slack_bot_id,
        persona_id=persona_id,
        channel_config=channel_config,
        standard_answer_categories=existing_standard_answer_categories,
        enable_auto_filters=enable_auto_filters,
        is_default=is_default,
    )
    db_session.add(slack_channel_config)
    db_session.commit()

    return slack_channel_config


def update_slack_channel_config(
    db_session: Session,
    slack_channel_config_id: int,
    persona_id: int | None,
    channel_config: ChannelConfig,
    standard_answer_category_ids: list[int],
    enable_auto_filters: bool,
    disabled: bool,  # noqa: ARG001
) -> SlackChannelConfig:
    slack_channel_config = db_session.scalar(
        select(SlackChannelConfig).where(
            SlackChannelConfig.id == slack_channel_config_id
        )
    )
    if slack_channel_config is None:
        raise ValueError(
            f"Unable to find Slack channel config with ID {slack_channel_config_id}"
        )

    versioned_fetch_standard_answer_categories_by_ids = (
        fetch_versioned_implementation_with_fallback(
            "onyx.db.standard_answer",
            "fetch_standard_answer_categories_by_ids",
            _no_ee_standard_answer_categories,
        )
    )
    existing_standard_answer_categories = (
        versioned_fetch_standard_answer_categories_by_ids(
            standard_answer_category_ids=standard_answer_category_ids,
            db_session=db_session,
        )
    )
    if len(existing_standard_answer_categories) != len(standard_answer_category_ids):
        raise ValueError(
            f"Some or all categories with ids {standard_answer_category_ids} do not exist"
        )

    # update the config
    slack_channel_config.persona_id = persona_id
    slack_channel_config.channel_config = channel_config
    slack_channel_config.standard_answer_categories = list(
        existing_standard_answer_categories
    )
    slack_channel_config.enable_auto_filters = enable_auto_filters

    db_session.commit()

    return slack_channel_config


def remove_slack_channel_config(
    db_session: Session,
    slack_channel_config_id: int,
    user: User,
) -> None:
    slack_channel_config = db_session.scalar(
        select(SlackChannelConfig).where(
            SlackChannelConfig.id == slack_channel_config_id
        )
    )
    if slack_channel_config is None:
        raise ValueError(
            f"Unable to find Slack channel config with ID {slack_channel_config_id}"
        )

    existing_persona_id = slack_channel_config.persona_id
    if existing_persona_id:
        existing_persona = db_session.scalar(
            select(Persona).where(Persona.id == existing_persona_id)
        )
        # if the existing persona was one created just for use with this Slack channel,
        # then clean it up
        if existing_persona and existing_persona.name.startswith(
            SLACK_BOT_PERSONA_PREFIX
        ):
            _cleanup_relationships(
                db_session=db_session, persona_id=existing_persona_id
            )
            mark_persona_as_deleted(
                persona_id=existing_persona_id, user=user, db_session=db_session
            )

    db_session.delete(slack_channel_config)
    db_session.commit()


def fetch_slack_channel_configs(
    db_session: Session, slack_bot_id: int | None = None
) -> Sequence[SlackChannelConfig]:
    if not slack_bot_id:
        return db_session.scalars(select(SlackChannelConfig)).all()

    return db_session.scalars(
        select(SlackChannelConfig).where(
            SlackChannelConfig.slack_bot_id == slack_bot_id
        )
    ).all()


def fetch_slack_channel_config(
    db_session: Session, slack_channel_config_id: int
) -> SlackChannelConfig | None:
    return db_session.scalar(
        select(SlackChannelConfig).where(
            SlackChannelConfig.id == slack_channel_config_id
        )
    )


def fetch_slack_channel_config_for_channel_or_default(
    db_session: Session, slack_bot_id: int, channel_name: str | None
) -> SlackChannelConfig | None:
    # attempt to find channel-specific config first
    if channel_name is not None:
        sc_config = db_session.scalar(
            select(SlackChannelConfig)
            .options(joinedload(SlackChannelConfig.persona))
            .where(
                SlackChannelConfig.slack_bot_id == slack_bot_id,
                SlackChannelConfig.channel_config["channel_name"].astext
                == channel_name,
            )
        )
    else:
        sc_config = None

    if sc_config:
        return sc_config

    # if none found, see if there is a default
    default_sc = db_session.scalar(
        select(SlackChannelConfig)
        .options(joinedload(SlackChannelConfig.persona))
        .where(
            SlackChannelConfig.slack_bot_id == slack_bot_id,
            SlackChannelConfig.is_default == True,  # noqa: E712
        )
    )

    return default_sc


================================================
FILE: backend/onyx/db/swap_index.py
================================================
import time

from sqlalchemy.orm import Session

from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
from onyx.configs.constants import KV_REINDEX_KEY
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.connector_credential_pair import resync_cc_pair
from onyx.db.document import delete_all_documents_for_connector_credential_pair
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexModelStatus
from onyx.db.enums import SwitchoverType
from onyx.db.index_attempt import cancel_indexing_attempts_for_search_settings
from onyx.db.index_attempt import (
    count_unique_active_cc_pairs_with_successful_index_attempts,
)
from onyx.db.index_attempt import count_unique_cc_pairs_with_successful_index_attempts
from onyx.db.llm import update_default_contextual_model
from onyx.db.llm import update_no_default_contextual_rag_provider
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_search_settings_status
from onyx.document_index.factory import get_all_document_indices
from onyx.key_value_store.factory import get_kv_store
from onyx.utils.logger import setup_logger


logger = setup_logger()


def _perform_index_swap(
    db_session: Session,
    new_search_settings: SearchSettings,
    all_cc_pairs: list[ConnectorCredentialPair],
    cleanup_documents: bool = False,
) -> SearchSettings | None:
    """Swap the indices and expire the old one.

    Returns the old search settings if the swap was successful, otherwise None.
    """
    current_search_settings = get_current_search_settings(db_session)
    if len(all_cc_pairs) > 0:
        kv_store = get_kv_store()
        kv_store.store(KV_REINDEX_KEY, False)

        # Expire jobs for the now past index/embedding model
        cancel_indexing_attempts_for_search_settings(
            search_settings_id=current_search_settings.id,
            db_session=db_session,
        )

        # Recount aggregates
        for cc_pair in all_cc_pairs:
            resync_cc_pair(
                cc_pair=cc_pair,
                # sync based on the new search settings
                search_settings_id=new_search_settings.id,
                db_session=db_session,
            )

        if cleanup_documents:
            # clean up all DocumentByConnectorCredentialPair / Document rows, since we're
            # doing an instant swap and no documents will exist in the new index.
            for cc_pair in all_cc_pairs:
                delete_all_documents_for_connector_credential_pair(
                    db_session=db_session,
                    connector_id=cc_pair.connector_id,
                    credential_id=cc_pair.credential_id,
                )

    # swap over search settings
    update_search_settings_status(
        search_settings=current_search_settings,
        new_status=IndexModelStatus.PAST,
        db_session=db_session,
    )
    update_search_settings_status(
        search_settings=new_search_settings,
        new_status=IndexModelStatus.PRESENT,
        db_session=db_session,
    )

    # Update the default contextual model to match the newly promoted settings
    try:
        update_default_contextual_model(
            db_session=db_session,
            enable_contextual_rag=new_search_settings.enable_contextual_rag,
            contextual_rag_llm_provider=new_search_settings.contextual_rag_llm_provider,
            contextual_rag_llm_name=new_search_settings.contextual_rag_llm_name,
        )
    except ValueError as e:
        logger.error(f"Model not found, defaulting to no contextual model: {e}")
        update_no_default_contextual_rag_provider(
            db_session=db_session,
        )
        new_search_settings.enable_contextual_rag = False
        new_search_settings.contextual_rag_llm_provider = None
        new_search_settings.contextual_rag_llm_name = None
        db_session.commit()

    # This flow is for checking and possibly creating an index so we get all
    # indices.
    document_indices = get_all_document_indices(new_search_settings, None, None)

    WAIT_SECONDS = 5

    for document_index in document_indices:
        success = False
        for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
            try:
                logger.notice(
                    f"Document index {document_index.__class__.__name__} swap (attempt {x + 1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
                )
                document_index.ensure_indices_exist(
                    primary_embedding_dim=new_search_settings.final_embedding_dim,
                    primary_embedding_precision=new_search_settings.embedding_precision,
                    # just finished swap, no more secondary index
                    secondary_index_embedding_dim=None,
                    secondary_index_embedding_precision=None,
                )

                logger.notice("Document index swap complete.")
                success = True
                break
            except Exception:
                logger.exception(
                    f"Document index swap for {document_index.__class__.__name__} did not succeed. "
                    f"The document index services may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
                )
                time.sleep(WAIT_SECONDS)

        if not success:
            logger.error(
                f"Document index swap for {document_index.__class__.__name__} did not succeed. "
                f"Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
            )
            return None

    return current_search_settings


def check_and_perform_index_swap(db_session: Session) -> SearchSettings | None:
    """Get count of cc-pairs and count of successful index_attempts for the
    new model grouped by connector + credential, if it's the same, then assume
    new index is done building. If so, swap the indices and expire the old one.

    Returns None if search settings did not change, or the old search settings if they
    did change.
    """
    if DISABLE_VECTOR_DB:
        return None

    # Default CC-pair created for Ingestion API unused here
    all_cc_pairs = get_connector_credential_pairs(db_session)
    cc_pair_count = max(len(all_cc_pairs) - 1, 0)
    new_search_settings = get_secondary_search_settings(db_session)

    if not new_search_settings:
        return None

    # Handle switchover based on switchover_type
    switchover_type = new_search_settings.switchover_type

    # INSTANT: Swap immediately without waiting
    if switchover_type == SwitchoverType.INSTANT:
        return _perform_index_swap(
            db_session=db_session,
            new_search_settings=new_search_settings,
            all_cc_pairs=all_cc_pairs,
            # clean up all DocumentByConnectorCredentialPair / Document rows, since we're
            # doing an instant swap.
            cleanup_documents=True,
        )

    # REINDEX: Wait for all connectors to complete
    elif switchover_type == SwitchoverType.REINDEX:
        unique_cc_indexings = count_unique_cc_pairs_with_successful_index_attempts(
            search_settings_id=new_search_settings.id, db_session=db_session
        )

        # Index Attempts are cleaned up as well when the cc-pair is deleted so the logic in this
        # function is correct. The unique_cc_indexings are specifically for the existing cc-pairs
        if unique_cc_indexings > cc_pair_count:
            logger.error("More unique indexings than cc pairs, should not occur")

        if cc_pair_count == 0 or cc_pair_count == unique_cc_indexings:
            # Swap indices
            return _perform_index_swap(
                db_session=db_session,
                new_search_settings=new_search_settings,
                all_cc_pairs=all_cc_pairs,
            )

        return None

    # ACTIVE_ONLY: Wait for only non-paused connectors to complete
    elif switchover_type == SwitchoverType.ACTIVE_ONLY:
        # Count non-paused cc_pairs (excluding the default Ingestion API cc_pair)
        active_cc_pairs = [
            cc_pair
            for cc_pair in all_cc_pairs
            if cc_pair.status != ConnectorCredentialPairStatus.PAUSED
        ]
        active_cc_pair_count = max(len(active_cc_pairs) - 1, 0)

        unique_active_cc_indexings = (
            count_unique_active_cc_pairs_with_successful_index_attempts(
                search_settings_id=new_search_settings.id, db_session=db_session
            )
        )

        if unique_active_cc_indexings > active_cc_pair_count:
            logger.error(
                "More unique active indexings than active cc pairs, should not occur"
            )

        if (
            active_cc_pair_count == 0
            or active_cc_pair_count == unique_active_cc_indexings
        ):
            # Swap indices
            return _perform_index_swap(
                db_session=db_session,
                new_search_settings=new_search_settings,
                all_cc_pairs=all_cc_pairs,
            )

        return None

    # Should not reach here, but handle gracefully
    logger.error(f"Unknown switchover_type: {switchover_type}")
    return None


================================================
FILE: backend/onyx/db/sync_record.py
================================================
from sqlalchemy import and_
from sqlalchemy import desc
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import Session

from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.models import SyncRecord
from onyx.utils.logger import setup_logger

logger = setup_logger()


def insert_sync_record(
    db_session: Session,
    entity_id: int,
    sync_type: SyncType,
) -> SyncRecord:
    """Insert a new sync record into the database, cancelling any existing in-progress records.

    Args:
        db_session: The database session to use
        entity_id: The ID of the entity being synced (document set ID, user group ID, etc.)
        sync_type: The type of sync operation
    """
    # If an existing in-progress sync record exists, mark as cancelled
    existing_in_progress_sync_record = fetch_latest_sync_record(
        db_session, entity_id, sync_type, sync_status=SyncStatus.IN_PROGRESS
    )

    if existing_in_progress_sync_record is not None:
        logger.info(
            f"Cancelling existing in-progress sync record {existing_in_progress_sync_record.id} "
            f"for entity_id={entity_id} sync_type={sync_type}"
        )
        mark_sync_records_as_cancelled(db_session, entity_id, sync_type)

    return _create_sync_record(db_session, entity_id, sync_type)


def mark_sync_records_as_cancelled(
    db_session: Session,
    entity_id: int | None,
    sync_type: SyncType,
) -> None:
    stmt = (
        update(SyncRecord)
        .where(
            and_(
                SyncRecord.entity_id == entity_id,
                SyncRecord.sync_type == sync_type,
                SyncRecord.sync_status == SyncStatus.IN_PROGRESS,
            )
        )
        .values(sync_status=SyncStatus.CANCELED)
    )
    db_session.execute(stmt)
    db_session.commit()


def _create_sync_record(
    db_session: Session,
    entity_id: int | None,
    sync_type: SyncType,
) -> SyncRecord:
    """Create and insert a new sync record into the database."""
    sync_record = SyncRecord(
        entity_id=entity_id,
        sync_type=sync_type,
        sync_status=SyncStatus.IN_PROGRESS,
        num_docs_synced=0,
        sync_start_time=func.now(),
    )
    db_session.add(sync_record)
    db_session.commit()

    return sync_record


def fetch_latest_sync_record(
    db_session: Session,
    entity_id: int,
    sync_type: SyncType,
    sync_status: SyncStatus | None = None,
) -> SyncRecord | None:
    """Fetch the most recent sync record for a given entity ID and status.

    Args:
        db_session: The database session to use
        entity_id: The ID of the entity to fetch sync record for
        sync_type: The type of sync operation
    """
    stmt = (
        select(SyncRecord)
        .where(
            and_(
                SyncRecord.entity_id == entity_id,
                SyncRecord.sync_type == sync_type,
            )
        )
        .order_by(desc(SyncRecord.sync_start_time))
        .limit(1)
    )

    if sync_status is not None:
        stmt = stmt.where(SyncRecord.sync_status == sync_status)

    result = db_session.execute(stmt)
    return result.scalar_one_or_none()


def update_sync_record_status(
    db_session: Session,
    entity_id: int,
    sync_type: SyncType,
    sync_status: SyncStatus,
    num_docs_synced: int | None = None,
) -> None:
    """Update the status of a sync record.

    Args:
        db_session: The database session to use
        entity_id: The ID of the entity being synced
        sync_type: The type of sync operation
        sync_status: The new status to set
        num_docs_synced: Optional number of documents synced to update
    """
    sync_record = fetch_latest_sync_record(db_session, entity_id, sync_type)
    if sync_record is None:
        raise ValueError(
            f"No sync record found for entity_id={entity_id} sync_type={sync_type}"
        )

    sync_record.sync_status = sync_status
    if num_docs_synced is not None:
        sync_record.num_docs_synced = num_docs_synced

    if sync_status.is_terminal():
        sync_record.sync_end_time = func.now()  # type: ignore

    db_session.commit()


def cleanup_sync_records(
    db_session: Session, entity_id: int, sync_type: SyncType
) -> None:
    """Cleanup sync records for a given entity ID and sync type by marking them as failed."""
    stmt = (
        update(SyncRecord)
        .where(SyncRecord.entity_id == entity_id)
        .where(SyncRecord.sync_type == sync_type)
        .where(SyncRecord.sync_status == SyncStatus.IN_PROGRESS)
        .values(sync_status=SyncStatus.CANCELED, sync_end_time=func.now())
    )
    db_session.execute(stmt)
    db_session.commit()


================================================
FILE: backend/onyx/db/tag.py
================================================
from typing import Any

from sqlalchemy import and_
from sqlalchemy import delete
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.db.models import Document
from onyx.db.models import Document__Tag
from onyx.db.models import Tag
from onyx.utils.logger import setup_logger

logger = setup_logger()


def check_tag_validity(tag_key: str, tag_value: str) -> bool:
    """If a tag is too long, it should not be used (it will cause an error in Postgres
    as the unique constraint can only apply to entries that are less than 2704 bytes).

    Additionally, extremely long tags are not really usable / useful."""
    if len(tag_key) + len(tag_value) > 255:
        logger.error(
            f"Tag with key '{tag_key}' and value '{tag_value}' is too long, cannot be used"
        )
        return False

    return True


def create_or_add_document_tag(
    tag_key: str,
    tag_value: str,
    source: DocumentSource,
    document_id: str,
    db_session: Session,
) -> Tag | None:
    if not check_tag_validity(tag_key, tag_value):
        return None

    document = db_session.get(Document, document_id)
    if not document:
        raise ValueError("Invalid Document, cannot attach Tags")

    # Use upsert to avoid race condition when multiple workers try to create the same tag
    insert_stmt = pg_insert(Tag).values(
        tag_key=tag_key,
        tag_value=tag_value,
        source=source,
        is_list=False,
    )
    insert_stmt = insert_stmt.on_conflict_do_nothing(
        constraint="_tag_key_value_source_list_uc"
    )
    db_session.execute(insert_stmt)

    # Now fetch the tag (either just inserted or already existed)
    tag_stmt = select(Tag).where(
        Tag.tag_key == tag_key,
        Tag.tag_value == tag_value,
        Tag.source == source,
        Tag.is_list.is_(False),
    )
    tag = db_session.execute(tag_stmt).scalar_one()

    if tag not in document.tags:
        document.tags.append(tag)

    db_session.commit()
    return tag


def create_or_add_document_tag_list(
    tag_key: str,
    tag_values: list[str],
    source: DocumentSource,
    document_id: str,
    db_session: Session,
) -> list[Tag]:
    valid_tag_values = [
        tag_value for tag_value in tag_values if check_tag_validity(tag_key, tag_value)
    ]
    if not valid_tag_values:
        return []

    document = db_session.get(Document, document_id)
    if not document:
        raise ValueError("Invalid Document, cannot attach Tags")

    # Use upsert to avoid race condition when multiple workers try to create the same tags
    for tag_value in valid_tag_values:
        insert_stmt = pg_insert(Tag).values(
            tag_key=tag_key,
            tag_value=tag_value,
            source=source,
            is_list=True,
        )
        insert_stmt = insert_stmt.on_conflict_do_nothing(
            constraint="_tag_key_value_source_list_uc"
        )
        db_session.execute(insert_stmt)

    # Now fetch all tags (either just inserted or already existed)
    all_tags_stmt = select(Tag).where(
        Tag.tag_key == tag_key,
        Tag.tag_value.in_(valid_tag_values),
        Tag.source == source,
        Tag.is_list.is_(True),
    )
    all_tags = list(db_session.execute(all_tags_stmt).scalars().all())

    for tag in all_tags:
        if tag not in document.tags:
            document.tags.append(tag)

    db_session.commit()
    return all_tags


def upsert_document_tags(
    document_id: str,
    source: DocumentSource,
    metadata: dict[str, str | list[str]],
    db_session: Session,
) -> list[Tag]:
    document = db_session.get(Document, document_id)
    if not document:
        raise ValueError("Invalid Document, cannot attach Tags")

    old_tag_ids: set[int] = {tag.id for tag in document.tags}

    new_tags: list[Tag] = []
    new_tag_ids: set[int] = set()
    for k, v in metadata.items():
        if isinstance(v, list):
            new_tags.extend(
                create_or_add_document_tag_list(k, v, source, document_id, db_session)
            )
            new_tag_ids.update({tag.id for tag in new_tags})
            continue

        new_tag = create_or_add_document_tag(k, v, source, document_id, db_session)
        if new_tag:
            new_tag_ids.add(new_tag.id)
            new_tags.append(new_tag)

    delete_tags = old_tag_ids - new_tag_ids
    if delete_tags:
        delete_stmt = delete(Document__Tag).where(
            Document__Tag.document_id == document_id,
            Document__Tag.tag_id.in_(delete_tags),
        )
        db_session.execute(delete_stmt)
        db_session.commit()

    return new_tags


def find_tags(
    tag_key_prefix: str | None,
    tag_value_prefix: str | None,
    sources: list[DocumentSource] | None,
    limit: int | None,
    db_session: Session,
    # if set, both tag_key_prefix and tag_value_prefix must be a match
    require_both_to_match: bool = False,
) -> list[Tag]:
    query = select(Tag)

    if tag_key_prefix or tag_value_prefix:
        conditions = []
        if tag_key_prefix:
            conditions.append(Tag.tag_key.ilike(f"{tag_key_prefix}%"))
        if tag_value_prefix:
            conditions.append(Tag.tag_value.ilike(f"{tag_value_prefix}%"))

        final_prefix_condition = (
            and_(*conditions) if require_both_to_match else or_(*conditions)
        )
        query = query.where(final_prefix_condition)

    if sources:
        query = query.where(Tag.source.in_(sources))

    if limit:
        query = query.limit(limit)

    result = db_session.execute(query)

    tags = result.scalars().all()
    return list(tags)


def get_structured_tags_for_document(
    document_id: str, db_session: Session
) -> dict[str, str | list[str]]:
    """Essentially returns the document metadata from postgres."""
    document = db_session.get(Document, document_id)
    if not document:
        raise ValueError("Invalid Document, cannot find tags")

    document_metadata: dict[str, Any] = {}
    for tag in document.tags:
        if tag.is_list:
            document_metadata.setdefault(tag.tag_key, [])
            # should always be a list (if tag.is_list is always True for this key), but just in case
            if not isinstance(document_metadata[tag.tag_key], list):
                logger.warning(
                    "Inconsistent is_list for document %s, tag_key %s",
                    document_id,
                    tag.tag_key,
                )
                document_metadata[tag.tag_key] = [document_metadata[tag.tag_key]]
            document_metadata[tag.tag_key].append(tag.tag_value)
            continue

        # set value (ignore duplicate keys, though there should be none)
        document_metadata.setdefault(tag.tag_key, tag.tag_value)

        # should always be a value, but just in case (treat it as a list in this case)
        if isinstance(document_metadata[tag.tag_key], list):
            logger.warning(
                "Inconsistent is_list for document %s, tag_key %s",
                document_id,
                tag.tag_key,
            )
            document_metadata[tag.tag_key] = [document_metadata[tag.tag_key]]
    return document_metadata


def delete_document_tags_for_documents__no_commit(
    document_ids: list[str], db_session: Session
) -> None:
    stmt = delete(Document__Tag).where(Document__Tag.document_id.in_(document_ids))
    db_session.execute(stmt)


def delete_orphan_tags__no_commit(db_session: Session) -> None:
    orphan_tags_query = select(Tag.id).where(
        ~db_session.query(Document__Tag.tag_id)
        .filter(Document__Tag.tag_id == Tag.id)
        .exists()
    )

    orphan_tags = db_session.execute(orphan_tags_query).scalars().all()

    if orphan_tags:
        delete_orphan_tags_stmt = delete(Tag).where(Tag.id.in_(orphan_tags))
        db_session.execute(delete_orphan_tags_stmt)


================================================
FILE: backend/onyx/db/tasks.py
================================================
from datetime import datetime

from sqlalchemy import desc
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.sql import delete

from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.models import TaskQueueState
from onyx.db.models import TaskStatus


def get_latest_task(
    task_name: str,
    db_session: Session,
) -> TaskQueueState | None:
    stmt = (
        select(TaskQueueState)
        .where(TaskQueueState.task_name == task_name)
        .order_by(desc(TaskQueueState.id))
        .limit(1)
    )

    result = db_session.execute(stmt)
    latest_task = result.scalars().first()

    return latest_task


def get_latest_task_by_type(
    task_name: str,
    db_session: Session,
) -> TaskQueueState | None:
    stmt = (
        select(TaskQueueState)
        .where(TaskQueueState.task_name.like(f"%{task_name}%"))
        .order_by(desc(TaskQueueState.id))
        .limit(1)
    )

    result = db_session.execute(stmt)
    latest_task = result.scalars().first()

    return latest_task


def register_task(
    task_name: str,
    db_session: Session,
    task_id: str = "",
    status: TaskStatus = TaskStatus.PENDING,
    start_time: datetime | None = None,
) -> TaskQueueState:
    new_task = TaskQueueState(
        task_id=task_id,
        task_name=task_name,
        status=status,
        start_time=start_time,
    )

    db_session.add(new_task)
    db_session.commit()

    return new_task


def get_task_with_id(
    db_session: Session,
    task_id: str,
) -> TaskQueueState | None:
    return db_session.scalar(
        select(TaskQueueState).where(TaskQueueState.task_id == task_id)
    )


def delete_task_with_id(
    db_session: Session,
    task_id: str,
) -> None:
    db_session.execute(delete(TaskQueueState).where(TaskQueueState.task_id == task_id))
    db_session.commit()


def get_all_tasks_with_prefix(
    db_session: Session, task_name_prefix: str
) -> list[TaskQueueState]:
    return list(
        db_session.scalars(
            select(TaskQueueState).where(
                TaskQueueState.task_name.like(f"{task_name_prefix}_%")
            )
        )
    )


def mark_task_as_started_with_id(
    db_session: Session,
    task_id: str,
) -> None:
    task = get_task_with_id(db_session=db_session, task_id=task_id)
    if not task:
        raise RuntimeError(f"A task with the task-id {task_id=} does not exist")

    task.status = TaskStatus.STARTED
    db_session.commit()


def mark_task_as_finished_with_id(
    db_session: Session,
    task_id: str,
    success: bool = True,
) -> None:
    task = get_task_with_id(db_session=db_session, task_id=task_id)
    if not task:
        raise RuntimeError(f"A task with the task-id {task_id=} does not exist")

    task.status = TaskStatus.SUCCESS if success else TaskStatus.FAILURE
    db_session.commit()


def mark_task_start(
    task_name: str,
    db_session: Session,
) -> None:
    task = get_latest_task(task_name, db_session)
    if not task:
        raise ValueError(f"No task found with name {task_name}")

    task.start_time = func.now()  # type: ignore
    db_session.commit()


def mark_task_finished(
    task_name: str,
    db_session: Session,
    success: bool = True,
) -> None:
    latest_task = get_latest_task(task_name, db_session)
    if latest_task is None:
        raise ValueError(f"tasks for {task_name} do not exist")

    latest_task.status = TaskStatus.SUCCESS if success else TaskStatus.FAILURE
    db_session.commit()


def check_task_is_live_and_not_timed_out(
    task: TaskQueueState,
    db_session: Session,
    timeout: int = JOB_TIMEOUT,
) -> bool:
    # We only care for live tasks to not create new periodic tasks
    if task.status in [TaskStatus.SUCCESS, TaskStatus.FAILURE]:
        return False

    current_db_time = get_db_current_time(db_session=db_session)

    last_update_time = task.register_time
    if task.start_time:
        last_update_time = max(task.register_time, task.start_time)

    time_elapsed = current_db_time - last_update_time
    return time_elapsed.total_seconds() < timeout


================================================
FILE: backend/onyx/db/token_limit.py
================================================
from collections.abc import Sequence

from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.configs.constants import TokenRateLimitScope
from onyx.db.models import TokenRateLimit
from onyx.db.models import TokenRateLimit__UserGroup
from onyx.server.token_rate_limits.models import TokenRateLimitArgs


def fetch_all_user_token_rate_limits(
    db_session: Session,
    enabled_only: bool = False,
    ordered: bool = True,
) -> Sequence[TokenRateLimit]:
    query = select(TokenRateLimit).where(
        TokenRateLimit.scope == TokenRateLimitScope.USER
    )

    if enabled_only:
        query = query.where(TokenRateLimit.enabled.is_(True))

    if ordered:
        query = query.order_by(TokenRateLimit.created_at.desc())

    return db_session.scalars(query).all()


def fetch_all_global_token_rate_limits(
    db_session: Session,
    enabled_only: bool = False,
    ordered: bool = True,
) -> Sequence[TokenRateLimit]:
    query = select(TokenRateLimit).where(
        TokenRateLimit.scope == TokenRateLimitScope.GLOBAL
    )

    if enabled_only:
        query = query.where(TokenRateLimit.enabled.is_(True))

    if ordered:
        query = query.order_by(TokenRateLimit.created_at.desc())

    token_rate_limits = db_session.scalars(query).all()
    return token_rate_limits


def insert_user_token_rate_limit(
    db_session: Session,
    token_rate_limit_settings: TokenRateLimitArgs,
) -> TokenRateLimit:
    token_limit = TokenRateLimit(
        enabled=token_rate_limit_settings.enabled,
        token_budget=token_rate_limit_settings.token_budget,
        period_hours=token_rate_limit_settings.period_hours,
        scope=TokenRateLimitScope.USER,
    )
    db_session.add(token_limit)
    db_session.commit()

    return token_limit


def insert_global_token_rate_limit(
    db_session: Session,
    token_rate_limit_settings: TokenRateLimitArgs,
) -> TokenRateLimit:
    token_limit = TokenRateLimit(
        enabled=token_rate_limit_settings.enabled,
        token_budget=token_rate_limit_settings.token_budget,
        period_hours=token_rate_limit_settings.period_hours,
        scope=TokenRateLimitScope.GLOBAL,
    )
    db_session.add(token_limit)
    db_session.commit()

    return token_limit


def update_token_rate_limit(
    db_session: Session,
    token_rate_limit_id: int,
    token_rate_limit_settings: TokenRateLimitArgs,
) -> TokenRateLimit:
    token_limit = db_session.get(TokenRateLimit, token_rate_limit_id)
    if token_limit is None:
        raise ValueError(f"TokenRateLimit with id '{token_rate_limit_id}' not found")

    token_limit.enabled = token_rate_limit_settings.enabled
    token_limit.token_budget = token_rate_limit_settings.token_budget
    token_limit.period_hours = token_rate_limit_settings.period_hours
    db_session.commit()

    return token_limit


def delete_token_rate_limit(
    db_session: Session,
    token_rate_limit_id: int,
) -> None:
    token_limit = db_session.get(TokenRateLimit, token_rate_limit_id)
    if token_limit is None:
        raise ValueError(f"TokenRateLimit with id '{token_rate_limit_id}' not found")

    db_session.query(TokenRateLimit__UserGroup).filter(
        TokenRateLimit__UserGroup.rate_limit_id == token_rate_limit_id
    ).delete()

    db_session.delete(token_limit)
    db_session.commit()


================================================
FILE: backend/onyx/db/tools.py
================================================
from typing import Any
from typing import cast
from typing import Type
from typing import TYPE_CHECKING
from uuid import UUID

from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.constants import UNSET
from onyx.db.constants import UnsetType
from onyx.db.enums import MCPServerStatus
from onyx.db.models import MCPServer
from onyx.db.models import OAuthConfig
from onyx.db.models import Tool
from onyx.db.models import ToolCall
from onyx.server.features.tool.models import Header
from onyx.tools.built_in_tools import BUILT_IN_TOOL_TYPES
from onyx.utils.headers import HeaderItemDict
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_json_like
from onyx.utils.postgres_sanitization import sanitize_string

if TYPE_CHECKING:
    pass

logger = setup_logger()


def get_tools(
    db_session: Session,
    *,
    only_enabled: bool = False,
    only_connected_mcp: bool = False,
    only_openapi: bool = False,
) -> list[Tool]:
    query = select(Tool)

    if only_connected_mcp:
        # Keep tools that either:
        # 1. Don't have an MCP server (mcp_server_id IS NULL) - Non-MCP tools
        # 2. Have an MCP server that is connected - Connected MCP tools
        query = query.outerjoin(MCPServer, Tool.mcp_server_id == MCPServer.id).where(
            or_(
                Tool.mcp_server_id.is_(None),  # Non-MCP tools (built-in, custom)
                MCPServer.status == MCPServerStatus.CONNECTED,  # MCP tools connected
            )
        )

    if only_enabled:
        query = query.where(Tool.enabled.is_(True))

    if only_openapi:
        query = query.where(
            Tool.openapi_schema.is_not(None),
            # To avoid showing rows that have JSON literal `null` stored in the column to the user.
            # tools from mcp servers will not have an openapi schema but it has `null`, so we need to exclude them.
            func.jsonb_typeof(Tool.openapi_schema) == "object",
            # Exclude built-in tools that happen to have an openapi_schema
            Tool.in_code_tool_id.is_(None),
        )

    return list(db_session.scalars(query).all())


def get_tools_by_mcp_server_id(
    mcp_server_id: int,
    db_session: Session,
    *,
    only_enabled: bool = False,
    order_by_id: bool = False,
) -> list[Tool]:
    query = select(Tool).where(Tool.mcp_server_id == mcp_server_id)
    if only_enabled:
        query = query.where(Tool.enabled.is_(True))
    if order_by_id:
        query = query.order_by(Tool.id)
    return list(db_session.scalars(query).all())


def get_tools_by_ids(tool_ids: list[int], db_session: Session) -> list[Tool]:
    if not tool_ids:
        return []
    stmt = select(Tool).where(Tool.id.in_(tool_ids))
    return list(db_session.scalars(stmt).all())


def get_tool_by_id(tool_id: int, db_session: Session) -> Tool:
    tool = db_session.scalar(select(Tool).where(Tool.id == tool_id))
    if not tool:
        raise ValueError("Tool by specified id does not exist")
    return tool


def get_tool_by_name(tool_name: str, db_session: Session) -> Tool:
    tool = db_session.scalar(select(Tool).where(Tool.name == tool_name))
    if not tool:
        raise ValueError("Tool by specified name does not exist")
    return tool


def create_tool__no_commit(
    name: str,
    description: str | None,
    openapi_schema: dict[str, Any] | None,
    custom_headers: list[Header] | None,
    user_id: UUID | None,
    db_session: Session,
    passthrough_auth: bool,
    *,
    mcp_server_id: int | None = None,
    oauth_config_id: int | None = None,
    enabled: bool = True,
) -> Tool:
    new_tool = Tool(
        name=name,
        description=description,
        in_code_tool_id=None,
        openapi_schema=openapi_schema,
        custom_headers=(
            [header.model_dump() for header in custom_headers] if custom_headers else []
        ),
        user_id=user_id,
        passthrough_auth=passthrough_auth,
        mcp_server_id=mcp_server_id,
        oauth_config_id=oauth_config_id,
        enabled=enabled,
    )
    db_session.add(new_tool)
    db_session.flush()  # Don't commit yet, let caller decide when to commit
    return new_tool


def update_tool(
    tool_id: int,
    name: str | None,
    description: str | None,
    openapi_schema: dict[str, Any] | None,
    custom_headers: list[Header] | None,
    user_id: UUID | None,
    db_session: Session,
    passthrough_auth: bool | None,
    oauth_config_id: int | None | UnsetType = UNSET,
) -> Tool:
    tool = get_tool_by_id(tool_id, db_session)
    if tool is None:
        raise ValueError(f"Tool with ID {tool_id} does not exist")

    if name is not None:
        tool.name = name
    if description is not None:
        tool.description = description
    if openapi_schema is not None:
        tool.openapi_schema = openapi_schema
    if user_id is not None:
        tool.user_id = user_id
    if custom_headers is not None:
        tool.custom_headers = [
            cast(HeaderItemDict, header.model_dump()) for header in custom_headers
        ]
    if passthrough_auth is not None:
        tool.passthrough_auth = passthrough_auth
    old_oauth_config_id = tool.oauth_config_id
    if not isinstance(oauth_config_id, UnsetType):
        tool.oauth_config_id = oauth_config_id
        db_session.flush()

    # Clean up orphaned OAuthConfig if the oauth_config_id was changed
    if (
        old_oauth_config_id is not None
        and not isinstance(oauth_config_id, UnsetType)
        and old_oauth_config_id != oauth_config_id
    ):
        other_tools = db_session.scalars(
            select(Tool).where(Tool.oauth_config_id == old_oauth_config_id)
        ).all()
        if not other_tools:
            oauth_config = db_session.get(OAuthConfig, old_oauth_config_id)
            if oauth_config:
                db_session.delete(oauth_config)

    db_session.commit()
    return tool


def delete_tool__no_commit(tool_id: int, db_session: Session) -> None:
    tool = get_tool_by_id(tool_id, db_session)
    if tool is None:
        raise ValueError(f"Tool with ID {tool_id} does not exist")

    oauth_config_id = tool.oauth_config_id

    db_session.delete(tool)
    db_session.flush()

    # Clean up orphaned OAuthConfig if no other tools reference it
    if oauth_config_id is not None:
        other_tools = db_session.scalars(
            select(Tool).where(Tool.oauth_config_id == oauth_config_id)
        ).all()
        if not other_tools:
            oauth_config = db_session.get(OAuthConfig, oauth_config_id)
            if oauth_config:
                db_session.delete(oauth_config)
                db_session.flush()


def get_builtin_tool(
    db_session: Session,
    tool_type: Type[BUILT_IN_TOOL_TYPES],
) -> Tool:
    """
    Retrieves a built-in tool from the database based on the tool type.
    """
    # local import to avoid circular import. DB layer should not depend on tools layer.
    from onyx.tools.built_in_tools import BUILT_IN_TOOL_MAP

    tool_id = next(
        (
            in_code_tool_id
            for in_code_tool_id, tool_cls in BUILT_IN_TOOL_MAP.items()
            if tool_cls.__name__ == tool_type.__name__
        ),
        None,
    )

    if not tool_id:
        raise RuntimeError(
            f"Tool type {tool_type.__name__} not found in the BUILT_IN_TOOLS list."
        )

    db_tool = db_session.execute(
        select(Tool).where(Tool.in_code_tool_id == tool_id)
    ).scalar_one_or_none()

    if not db_tool:
        raise RuntimeError(f"Tool type {tool_type.__name__} not found in the database.")

    return db_tool


def create_tool_call_no_commit(
    chat_session_id: UUID,
    parent_chat_message_id: int | None,
    turn_number: int,
    tool_id: int,
    tool_call_id: str,
    tool_call_arguments: dict[str, Any],
    tool_call_response: Any,
    tool_call_tokens: int,
    db_session: Session,
    *,
    parent_tool_call_id: int | None = None,
    reasoning_tokens: str | None = None,
    generated_images: list[dict] | None = None,
    tab_index: int = 0,
    add_only: bool = True,
) -> ToolCall:
    """
    Create a ToolCall entry in the database.

    Args:
        chat_session_id: The chat session ID
        parent_chat_message_id: The parent chat message ID
        turn_number: The turn number for this tool call
        tool_id: The tool ID
        tool_call_id: The tool call ID (string identifier from LLM)
        tool_call_arguments: The tool call arguments
        tool_call_response: The tool call response
        tool_call_tokens: The number of tokens in the tool call arguments
        db_session: The database session
        parent_tool_call_id: Optional parent tool call ID (for nested tool calls)
        reasoning_tokens: Optional reasoning tokens
        generated_images: Optional list of generated image metadata for replay
        tab_index: Index order of tool calls from the LLM for parallel tool calls
        commit: If True, commit the transaction; if False, flush only

    Returns:
        The created ToolCall object
    """
    tool_call = ToolCall(
        chat_session_id=chat_session_id,
        parent_chat_message_id=parent_chat_message_id,
        parent_tool_call_id=parent_tool_call_id,
        turn_number=turn_number,
        tab_index=tab_index,
        tool_id=tool_id,
        tool_call_id=tool_call_id,
        reasoning_tokens=(
            sanitize_string(reasoning_tokens) if reasoning_tokens else reasoning_tokens
        ),
        tool_call_arguments=sanitize_json_like(tool_call_arguments),
        tool_call_response=sanitize_json_like(tool_call_response),
        tool_call_tokens=tool_call_tokens,
        generated_images=sanitize_json_like(generated_images),
    )

    db_session.add(tool_call)
    if not add_only:
        db_session.add(tool_call)
    else:
        db_session.flush()
    return tool_call


================================================
FILE: backend/onyx/db/usage.py
================================================
"""Database interactions for tenant usage tracking (cloud usage limits)."""

from datetime import datetime
from datetime import timezone
from enum import Enum

from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.orm import Session

from onyx.db.models import TenantUsage
from onyx.utils.logger import setup_logger
from shared_configs.configs import USAGE_LIMIT_WINDOW_SECONDS

logger = setup_logger()


class UsageType(str, Enum):
    """Types of usage that can be tracked and limited."""

    LLM_COST = "llm_cost_cents"
    CHUNKS_INDEXED = "chunks_indexed"
    API_CALLS = "api_calls"
    NON_STREAMING_API_CALLS = "non_streaming_api_calls"


class TenantUsageStats(BaseModel):
    """Current usage statistics for a tenant."""

    window_start: datetime
    llm_cost_cents: float
    chunks_indexed: int
    api_calls: int
    non_streaming_api_calls: int


class UsageLimitExceededError(Exception):
    """Raised when a tenant exceeds their usage limit."""

    def __init__(self, usage_type: UsageType, current: float, limit: float):
        self.usage_type = usage_type
        self.current = current
        self.limit = limit
        super().__init__(
            f"Usage limit exceeded for {usage_type.value}: current usage {current}, limit {limit}"
        )


def get_current_window_start() -> datetime:
    """
    Calculate the start of the current usage window.

    Uses fixed windows aligned to Monday 00:00 UTC for predictability.
    The window duration is configured via USAGE_LIMIT_WINDOW_SECONDS.
    """
    now = datetime.now(timezone.utc)
    # For weekly windows (default), align to Monday 00:00 UTC
    if USAGE_LIMIT_WINDOW_SECONDS == 604800:  # 1 week
        # Get the start of the current week (Monday)
        days_since_monday = now.weekday()
        window_start = now.replace(
            hour=0, minute=0, second=0, microsecond=0
        ) - __import__("datetime").timedelta(days=days_since_monday)
        return window_start

    # For other window sizes, use epoch-aligned windows
    epoch = datetime(1970, 1, 1, tzinfo=timezone.utc)
    seconds_since_epoch = int((now - epoch).total_seconds())
    window_number = seconds_since_epoch // USAGE_LIMIT_WINDOW_SECONDS
    window_start_seconds = window_number * USAGE_LIMIT_WINDOW_SECONDS
    return epoch + __import__("datetime").timedelta(seconds=window_start_seconds)


def get_or_create_tenant_usage(
    db_session: Session,
    window_start: datetime | None = None,
) -> TenantUsage:
    """
    Get or create the usage record for the current window.

    Uses INSERT ... ON CONFLICT DO UPDATE to atomically create or get the record,
    avoiding TOCTOU race conditions where two concurrent requests could both
    attempt to insert a new record.
    """
    if window_start is None:
        window_start = get_current_window_start()

    # Atomic upsert: insert if not exists, or update a field to itself if exists
    # This ensures we always get back a valid row without race conditions
    stmt = (
        pg_insert(TenantUsage)
        .values(
            window_start=window_start,
            llm_cost_cents=0.0,
            chunks_indexed=0,
            api_calls=0,
            non_streaming_api_calls=0,
        )
        .on_conflict_do_update(
            index_elements=["window_start"],
            # No-op update: just set a field to its current value
            # This ensures the row is returned even on conflict
            set_={"llm_cost_cents": TenantUsage.llm_cost_cents},
        )
        .returning(TenantUsage)
    )

    result = db_session.execute(stmt).scalar_one()
    db_session.flush()

    return result


def get_tenant_usage_stats(
    db_session: Session,
    window_start: datetime | None = None,
) -> TenantUsageStats:
    """Get the current usage statistics for the tenant (read-only, no lock)."""
    if window_start is None:
        window_start = get_current_window_start()

    usage = db_session.execute(
        select(TenantUsage).where(TenantUsage.window_start == window_start)
    ).scalar_one_or_none()

    if usage is None:
        # No usage recorded yet for this window
        return TenantUsageStats(
            window_start=window_start,
            llm_cost_cents=0.0,
            chunks_indexed=0,
            api_calls=0,
            non_streaming_api_calls=0,
        )

    return TenantUsageStats(
        window_start=usage.window_start,
        llm_cost_cents=usage.llm_cost_cents,
        chunks_indexed=usage.chunks_indexed,
        api_calls=usage.api_calls,
        non_streaming_api_calls=usage.non_streaming_api_calls,
    )


def increment_usage(
    db_session: Session,
    usage_type: UsageType,
    amount: float | int,
) -> None:
    """
    Atomically increment a usage counter.

    Uses row-level locking to prevent race conditions.
    The caller should handle the transaction commit.
    """
    usage = get_or_create_tenant_usage(db_session)

    if usage_type == UsageType.LLM_COST:
        usage.llm_cost_cents += float(amount)
    elif usage_type == UsageType.CHUNKS_INDEXED:
        usage.chunks_indexed += int(amount)
    elif usage_type == UsageType.API_CALLS:
        usage.api_calls += int(amount)
    elif usage_type == UsageType.NON_STREAMING_API_CALLS:
        usage.non_streaming_api_calls += int(amount)

    db_session.flush()


def check_usage_limit(
    db_session: Session,
    usage_type: UsageType,
    limit: float | int,
    pending_amount: float | int = 0,
) -> None:
    """
    Check if the current usage plus pending amount would exceed the limit.

    Args:
        db_session: Database session
        usage_type: Type of usage to check
        limit: The maximum allowed usage
        pending_amount: Amount about to be used (to check before committing)

    Raises:
        UsageLimitExceededError: If usage would exceed the limit
    """
    stats = get_tenant_usage_stats(db_session)

    current_value: float
    if usage_type == UsageType.LLM_COST:
        current_value = stats.llm_cost_cents
    elif usage_type == UsageType.CHUNKS_INDEXED:
        current_value = float(stats.chunks_indexed)
    elif usage_type == UsageType.API_CALLS:
        current_value = float(stats.api_calls)
    elif usage_type == UsageType.NON_STREAMING_API_CALLS:
        current_value = float(stats.non_streaming_api_calls)
    else:
        current_value = 0.0

    if current_value + pending_amount > limit:
        raise UsageLimitExceededError(
            usage_type=usage_type,
            current=current_value + pending_amount,
            limit=float(limit),
        )


================================================
FILE: backend/onyx/db/user_file.py
================================================
import datetime
from uuid import UUID

from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.db.models import Persona
from onyx.db.models import Project__UserFile
from onyx.db.models import UserFile


def fetch_chunk_counts_for_user_files(
    user_file_ids: list[str],
    db_session: Session,
) -> list[tuple[str, int]]:
    """
    Return a list of (user_file_id, chunk_count) tuples.
    If a user_file_id is not found in the database, it will be returned with a chunk_count of 0.
    """
    stmt = select(UserFile.id, UserFile.chunk_count).where(
        UserFile.id.in_(user_file_ids)
    )

    results = db_session.execute(stmt).all()

    # Create a dictionary of user_file_id to chunk_count
    chunk_counts = {str(row.id): row.chunk_count or 0 for row in results}

    # Return a list of tuples, preserving `None` for documents not found or with
    # an unknown chunk count. Callers should handle the `None` case and fall
    # back to an existence check against the vector DB if necessary.
    return [
        (user_file_id, chunk_counts.get(user_file_id, 0))
        for user_file_id in user_file_ids
    ]


def calculate_user_files_token_count(file_ids: list[UUID], db_session: Session) -> int:
    """Calculate total token count for specified files"""
    total_tokens = 0

    # Get tokens from individual files
    if file_ids:
        file_tokens = (
            db_session.query(func.sum(UserFile.token_count))
            .filter(UserFile.id.in_(file_ids))
            .scalar()
            or 0
        )
        total_tokens += file_tokens

    return total_tokens


def fetch_user_project_ids_for_user_files(
    user_file_ids: list[str],
    db_session: Session,
) -> dict[str, list[int]]:
    """Fetch user project ids for specified user files"""
    user_file_uuid_ids = [UUID(user_file_id) for user_file_id in user_file_ids]
    stmt = select(Project__UserFile.user_file_id, Project__UserFile.project_id).where(
        Project__UserFile.user_file_id.in_(user_file_uuid_ids)
    )
    rows = db_session.execute(stmt).all()

    user_file_id_to_project_ids: dict[str, list[int]] = {
        user_file_id: [] for user_file_id in user_file_ids
    }
    for user_file_id, project_id in rows:
        user_file_id_to_project_ids[str(user_file_id)].append(project_id)

    return user_file_id_to_project_ids


def fetch_persona_ids_for_user_files(
    user_file_ids: list[str],
    db_session: Session,
) -> dict[str, list[int]]:
    """Fetch persona (assistant) ids for specified user files."""
    stmt = (
        select(UserFile)
        .where(UserFile.id.in_(user_file_ids))
        .options(selectinload(UserFile.assistants))
    )
    results = db_session.execute(stmt).scalars().all()
    return {
        str(user_file.id): [persona.id for persona in user_file.assistants]
        for user_file in results
    }


def update_last_accessed_at_for_user_files(
    user_file_ids: list[UUID],
    db_session: Session,
) -> None:
    """Update `last_accessed_at` to now (UTC) for the given user files."""
    if not user_file_ids:
        return
    now = datetime.datetime.now(datetime.timezone.utc)
    (
        db_session.query(UserFile)
        .filter(UserFile.id.in_(user_file_ids))
        .update({UserFile.last_accessed_at: now}, synchronize_session=False)
    )
    db_session.commit()


def get_file_id_by_user_file_id(user_file_id: str, db_session: Session) -> str | None:
    user_file = db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
    if user_file:
        return user_file.file_id
    return None


def get_file_ids_by_user_file_ids(
    user_file_ids: list[UUID], db_session: Session
) -> list[str]:
    user_files = db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all()
    return [user_file.file_id for user_file in user_files]


def fetch_user_files_with_access_relationships(
    user_file_ids: list[str],
    db_session: Session,
    eager_load_groups: bool = False,
) -> list[UserFile]:
    """Fetch user files with the owner and assistant relationships
    eagerly loaded (needed for computing access control).

    When eager_load_groups is True, Persona.groups is also loaded so that
    callers can extract user-group names without a second DB round-trip."""
    persona_sub_options = [
        selectinload(Persona.users),
        selectinload(Persona.user),
    ]
    if eager_load_groups:
        persona_sub_options.append(selectinload(Persona.groups))

    return (
        db_session.query(UserFile)
        .options(
            joinedload(UserFile.user),
            selectinload(UserFile.assistants).options(*persona_sub_options),
        )
        .filter(UserFile.id.in_(user_file_ids))
        .all()
    )


================================================
FILE: backend/onyx/db/user_preferences.py
================================================
from collections.abc import Sequence
from uuid import UUID

from sqlalchemy import Column
from sqlalchemy import delete
from sqlalchemy import desc
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import Session

from onyx.auth.schemas import UserRole
from onyx.db.enums import AccountType
from onyx.db.enums import DefaultAppMode
from onyx.db.enums import ThemePreference
from onyx.db.models import AccessToken
from onyx.db.models import Assistant__UserSpecificConfig
from onyx.db.models import Memory
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.db.permissions import recompute_user_permissions__no_commit
from onyx.db.users import assign_user_to_default_groups__no_commit
from onyx.server.manage.models import MemoryItem
from onyx.server.manage.models import UserSpecificAssistantPreference
from onyx.utils.logger import setup_logger


logger = setup_logger()


_ROLE_TO_ACCOUNT_TYPE: dict[UserRole, AccountType] = {
    UserRole.SLACK_USER: AccountType.BOT,
    UserRole.EXT_PERM_USER: AccountType.EXT_PERM_USER,
}


def update_user_role(
    user: User,
    new_role: UserRole,
    db_session: Session,
) -> None:
    """Update a user's role in the database.
    Dual-writes account_type to keep it in sync with role and
    reconciles default-group membership (Admin / Basic)."""
    old_role = user.role
    user.role = new_role
    # Note: setting account_type to BOT or EXT_PERM_USER causes
    # assign_user_to_default_groups__no_commit to early-return, which is
    # intentional — these account types should not be in default groups.
    if new_role in _ROLE_TO_ACCOUNT_TYPE:
        user.account_type = _ROLE_TO_ACCOUNT_TYPE[new_role]
    elif user.account_type in (AccountType.BOT, AccountType.EXT_PERM_USER):
        # Upgrading from a non-web-login account type to a web role
        user.account_type = AccountType.STANDARD

    # Reconcile default-group membership when the role changes.
    if old_role != new_role:
        # Remove from all default groups first.
        db_session.execute(
            delete(User__UserGroup).where(
                User__UserGroup.user_id == user.id,
                User__UserGroup.user_group_id.in_(
                    select(UserGroup.id).where(UserGroup.is_default.is_(True))
                ),
            )
        )

        # Re-assign to the correct default group (skip for LIMITED).
        if new_role != UserRole.LIMITED:
            assign_user_to_default_groups__no_commit(
                db_session,
                user,
                is_admin=(new_role == UserRole.ADMIN),
            )

        recompute_user_permissions__no_commit(user.id, db_session)

    db_session.commit()


def deactivate_user(
    user: User,
    db_session: Session,
) -> None:
    """Deactivate a user by setting is_active to False."""
    user.is_active = False
    db_session.add(user)
    db_session.commit()


def activate_user(
    user: User,
    db_session: Session,
) -> None:
    """Activate a user by setting is_active to True.

    Also reconciles default-group membership — the user may have been
    created while inactive or deactivated before the backfill migration.
    """
    user.is_active = True
    if user.role != UserRole.LIMITED:
        assign_user_to_default_groups__no_commit(
            db_session, user, is_admin=(user.role == UserRole.ADMIN)
        )
    db_session.add(user)
    db_session.commit()


def get_latest_access_token_for_user(
    user_id: UUID,
    db_session: Session,
) -> AccessToken | None:
    """Get the most recent access token for a user."""
    try:
        result = db_session.execute(
            select(AccessToken)
            .where(AccessToken.user_id == user_id)  # type: ignore
            .order_by(desc(Column("created_at")))
            .limit(1)
        )
        return result.scalar_one_or_none()
    except Exception as e:
        logger.error(f"Error fetching AccessToken: {e}")
        return None


def update_user_temperature_override_enabled(
    user_id: UUID,
    temperature_override_enabled: bool,
    db_session: Session,
) -> None:
    """Update user's temperature override enabled setting."""
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(temperature_override_enabled=temperature_override_enabled)
    )
    db_session.commit()


def update_user_shortcut_enabled(
    user_id: UUID,
    shortcut_enabled: bool,
    db_session: Session,
) -> None:
    """Update user's shortcut enabled setting."""
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(shortcut_enabled=shortcut_enabled)
    )
    db_session.commit()


def update_user_auto_scroll(
    user_id: UUID,
    auto_scroll: bool | None,
    db_session: Session,
) -> None:
    """Update user's auto scroll setting."""
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(auto_scroll=auto_scroll)
    )
    db_session.commit()


def update_user_default_model(
    user_id: UUID,
    default_model: str | None,
    db_session: Session,
) -> None:
    """Update user's default model setting."""
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(default_model=default_model)
    )
    db_session.commit()


def update_user_theme_preference(
    user_id: UUID,
    theme_preference: ThemePreference,
    db_session: Session,
) -> None:
    """Update user's theme preference setting."""
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(theme_preference=theme_preference)
    )
    db_session.commit()


def update_user_chat_background(
    user_id: UUID,
    chat_background: str | None,
    db_session: Session,
) -> None:
    """Update user's chat background setting."""
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(chat_background=chat_background)
    )
    db_session.commit()


def update_user_default_app_mode(
    user_id: UUID,
    default_app_mode: DefaultAppMode,
    db_session: Session,
) -> None:
    """Update user's default app mode setting."""
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(default_app_mode=default_app_mode)
    )
    db_session.commit()


def update_user_personalization(
    user_id: UUID,
    *,
    personal_name: str | None,
    personal_role: str | None,
    use_memories: bool,
    enable_memory_tool: bool,
    memories: list[MemoryItem],
    user_preferences: str | None,
    db_session: Session,
) -> None:
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(
            personal_name=personal_name,
            personal_role=personal_role,
            use_memories=use_memories,
            enable_memory_tool=enable_memory_tool,
            user_preferences=user_preferences,
        )
    )

    # ID-based upsert: use real DB IDs from the frontend to match memories.
    incoming_ids = {m.id for m in memories if m.id is not None}

    # Delete existing rows not in the incoming set (scoped to user_id)
    existing_memories = list(
        db_session.scalars(select(Memory).where(Memory.user_id == user_id)).all()
    )
    existing_ids = {mem.id for mem in existing_memories}
    ids_to_delete = existing_ids - incoming_ids
    if ids_to_delete:
        db_session.execute(
            delete(Memory).where(
                Memory.id.in_(ids_to_delete),
                Memory.user_id == user_id,
            )
        )

    # Update existing rows whose IDs match
    existing_by_id = {mem.id: mem for mem in existing_memories}
    for item in memories:
        if item.id is not None and item.id in existing_by_id:
            existing_by_id[item.id].memory_text = item.content

    # Create new rows for items without an ID
    new_items = [m for m in memories if m.id is None]
    if new_items:
        db_session.add_all(
            [Memory(user_id=user_id, memory_text=item.content) for item in new_items]
        )

    db_session.commit()


def get_memories_for_user(
    user_id: UUID,
    db_session: Session,
) -> Sequence[Memory]:
    return db_session.scalars(
        select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.desc())
    ).all()


def update_user_pinned_assistants(
    user_id: UUID,
    pinned_assistants: list[int],
    db_session: Session,
) -> None:
    """Update user's pinned assistants list."""
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(pinned_assistants=pinned_assistants)
    )
    db_session.commit()


def update_user_assistant_visibility(
    user_id: UUID,
    hidden_assistants: list[int] | None,
    visible_assistants: list[int] | None,
    chosen_assistants: list[int] | None,
    db_session: Session,
) -> None:
    """Update user's assistant visibility settings."""
    db_session.execute(
        update(User)
        .where(User.id == user_id)  # type: ignore
        .values(
            hidden_assistants=hidden_assistants,
            visible_assistants=visible_assistants,
            chosen_assistants=chosen_assistants,
        )
    )
    db_session.commit()


def get_all_user_assistant_specific_configs(
    user_id: UUID,
    db_session: Session,
) -> Sequence[Assistant__UserSpecificConfig]:
    """Get the full user assistant specific config for a specific assistant and user."""
    return db_session.scalars(
        select(Assistant__UserSpecificConfig).where(
            Assistant__UserSpecificConfig.user_id == user_id
        )
    ).all()


def update_assistant_preferences(
    assistant_id: int,
    user_id: UUID,
    new_assistant_preference: UserSpecificAssistantPreference,
    db_session: Session,
) -> None:
    """Update the disabled tools for a specific assistant for a specific user."""
    # First check if a config already exists
    result = db_session.execute(
        select(Assistant__UserSpecificConfig)
        .where(Assistant__UserSpecificConfig.assistant_id == assistant_id)
        .where(Assistant__UserSpecificConfig.user_id == user_id)
    )
    config = result.scalar_one_or_none()

    if config:
        # Update existing config
        config.disabled_tool_ids = new_assistant_preference.disabled_tool_ids
    else:
        # Create new config
        config = Assistant__UserSpecificConfig(
            assistant_id=assistant_id,
            user_id=user_id,
            disabled_tool_ids=new_assistant_preference.disabled_tool_ids,
        )
        db_session.add(config)

    db_session.commit()


================================================
FILE: backend/onyx/db/users.py
================================================
from collections.abc import Sequence
from typing import Any
from uuid import UUID

from fastapi import HTTPException
from fastapi_users.password import PasswordHelper
from sqlalchemy import case
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from sqlalchemy.sql import expression
from sqlalchemy.sql.elements import ColumnElement
from sqlalchemy.sql.elements import KeyedColumnElement
from sqlalchemy.sql.expression import or_

from onyx.auth.invited_users import remove_user_from_invited_users
from onyx.auth.schemas import UserRole
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN
from onyx.configs.constants import NO_AUTH_PLACEHOLDER_USER_EMAIL
from onyx.db.enums import AccountType
from onyx.db.models import DocumentSet
from onyx.db.models import DocumentSet__User
from onyx.db.models import Persona
from onyx.db.models import Persona__User
from onyx.db.models import SamlAccount
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop

logger = setup_logger()


def validate_user_role_update(
    requested_role: UserRole,
    current_role: UserRole,
    current_account_type: AccountType,
    explicit_override: bool = False,
) -> None:
    """
    Validate that a user role update is valid.
    Assumed only admins can hit this endpoint.
    raise if:
    - requested role is a curator
    - requested role is a slack user
    - requested role is an external permissioned user
    - requested role is a limited user
    - current account type is BOT (slack user)
    - current account type is EXT_PERM_USER
    - current role is a limited user
    """

    if current_account_type == AccountType.BOT:
        raise HTTPException(
            status_code=400,
            detail="To change a Slack User's role, they must first login to Onyx via the web app.",
        )

    if current_account_type == AccountType.EXT_PERM_USER:
        raise HTTPException(
            status_code=400,
            detail="To change an External Permissioned User's role, they must first login to Onyx via the web app.",
        )

    if current_role == UserRole.LIMITED:
        raise HTTPException(
            status_code=400,
            detail="To change a Limited User's role, they must first login to Onyx via the web app.",
        )

    if explicit_override:
        return

    if requested_role == UserRole.CURATOR:
        # This shouldn't happen, but just in case
        raise HTTPException(
            status_code=400,
            detail="Curator role must be set via the User Group Menu",
        )

    if requested_role == UserRole.LIMITED:
        # This shouldn't happen, but just in case
        raise HTTPException(
            status_code=400,
            detail=(
                "A user cannot be set to a Limited User role. "
                "This role is automatically assigned to users through certain endpoints in the API."
            ),
        )

    if requested_role == UserRole.SLACK_USER:
        # This shouldn't happen, but just in case
        raise HTTPException(
            status_code=400,
            detail=(
                "A user cannot be set to a Slack User role. "
                "This role is automatically assigned to users who only use Onyx via Slack."
            ),
        )

    if requested_role == UserRole.EXT_PERM_USER:
        # This shouldn't happen, but just in case
        raise HTTPException(
            status_code=400,
            detail=(
                "A user cannot be set to an External Permissioned User role. "
                "This role is automatically assigned to users who have been "
                "pulled in to the system via an external permissions system."
            ),
        )


def get_all_users(
    db_session: Session,
    email_filter_string: str | None = None,
    include_external: bool = False,
) -> Sequence[User]:
    """List all users. No pagination as of now, as the # of users
    is assumed to be relatively small (<< 1 million)"""
    stmt = select(User)

    # Exclude system users (anonymous user, no-auth placeholder)
    stmt = stmt.where(User.email != ANONYMOUS_USER_EMAIL)  # type: ignore
    stmt = stmt.where(User.email != NO_AUTH_PLACEHOLDER_USER_EMAIL)  # type: ignore

    if not include_external:
        stmt = stmt.where(User.role != UserRole.EXT_PERM_USER)

    if email_filter_string is not None:
        stmt = stmt.where(User.email.ilike(f"%{email_filter_string}%"))  # type: ignore

    return db_session.scalars(stmt).unique().all()


def _get_accepted_user_where_clause(
    email_filter_string: str | None = None,
    roles_filter: list[UserRole] = [],
    include_external: bool = False,
    is_active_filter: bool | None = None,
) -> list[ColumnElement[bool]]:
    """
    Generates a SQLAlchemy where clause for filtering users based on the provided parameters.
    This is used to build the filters for the function that retrieves the users for the users table in the admin panel.

    Parameters:
    - email_filter_string: A substring to filter user emails. Only users whose emails contain this substring will be included.
    - is_active_filter: When True, only active users will be included. When False, only inactive users will be included.
    - roles_filter: A list of user roles to filter by. Only users with roles in this list will be included.
    - include_external: If False, external permissioned users will be excluded.

    Returns:
    - list: A list of conditions to be used in a SQLAlchemy query to filter users.
    """

    # Access table columns directly via __table__.c to get proper SQLAlchemy column types
    # This ensures type checking works correctly for SQL operations like ilike, endswith, and is_
    email_col: KeyedColumnElement[Any] = User.__table__.c.email
    is_active_col: KeyedColumnElement[Any] = User.__table__.c.is_active

    where_clause: list[ColumnElement[bool]] = [
        expression.not_(email_col.endswith(DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN)),
        # Exclude system users (anonymous user, no-auth placeholder)
        email_col != ANONYMOUS_USER_EMAIL,
        email_col != NO_AUTH_PLACEHOLDER_USER_EMAIL,
    ]

    if not include_external:
        where_clause.append(User.role != UserRole.EXT_PERM_USER)

    if email_filter_string is not None:
        personal_name_col: KeyedColumnElement[Any] = User.__table__.c.personal_name
        where_clause.append(
            or_(
                email_col.ilike(f"%{email_filter_string}%"),
                personal_name_col.ilike(f"%{email_filter_string}%"),
            )
        )

    if roles_filter:
        where_clause.append(User.role.in_(roles_filter))

    if is_active_filter is not None:
        where_clause.append(is_active_col.is_(is_active_filter))

    return where_clause


def get_all_accepted_users(
    db_session: Session,
    include_external: bool = False,
) -> Sequence[User]:
    """Returns all accepted users without pagination.
    Uses the same filtering as the paginated endpoint but without
    search, role, or active filters."""
    stmt = select(User)
    where_clause = _get_accepted_user_where_clause(
        include_external=include_external,
    )
    stmt = stmt.where(*where_clause).order_by(User.email)
    return db_session.scalars(stmt).unique().all()


def get_page_of_filtered_users(
    db_session: Session,
    page_size: int,
    page_num: int,
    email_filter_string: str | None = None,
    is_active_filter: bool | None = None,
    roles_filter: list[UserRole] = [],
    include_external: bool = False,
) -> Sequence[User]:
    users_stmt = select(User)

    where_clause = _get_accepted_user_where_clause(
        email_filter_string=email_filter_string,
        roles_filter=roles_filter,
        include_external=include_external,
        is_active_filter=is_active_filter,
    )
    # Apply pagination
    users_stmt = users_stmt.offset((page_num) * page_size).limit(page_size)
    # Apply filtering
    users_stmt = users_stmt.where(*where_clause)

    return db_session.scalars(users_stmt).unique().all()


def get_total_filtered_users_count(
    db_session: Session,
    email_filter_string: str | None = None,
    is_active_filter: bool | None = None,
    roles_filter: list[UserRole] = [],
    include_external: bool = False,
) -> int:
    where_clause = _get_accepted_user_where_clause(
        email_filter_string=email_filter_string,
        roles_filter=roles_filter,
        include_external=include_external,
        is_active_filter=is_active_filter,
    )
    total_count_stmt = select(func.count()).select_from(User)
    # Apply filtering
    total_count_stmt = total_count_stmt.where(*where_clause)

    return db_session.scalar(total_count_stmt) or 0


def get_user_counts_by_role_and_status(
    db_session: Session,
) -> dict[str, dict[str, int]]:
    """Returns user counts grouped by role and by active/inactive status.

    Excludes API key users, anonymous users, and no-auth placeholder users.
    Uses a single query with conditional aggregation.
    """
    base_where = _get_accepted_user_where_clause()
    role_col = User.__table__.c.role
    is_active_col = User.__table__.c.is_active

    stmt = (
        select(
            role_col,
            func.count().label("total"),
            func.sum(case((is_active_col.is_(True), 1), else_=0)).label("active"),
            func.sum(case((is_active_col.is_(False), 1), else_=0)).label("inactive"),
        )
        .where(*base_where)
        .group_by(role_col)
    )

    role_counts: dict[str, int] = {}
    status_counts: dict[str, int] = {"active": 0, "inactive": 0}

    for role_val, total, active, inactive in db_session.execute(stmt).all():
        key = role_val.value if hasattr(role_val, "value") else str(role_val)
        role_counts[key] = total
        status_counts["active"] += active or 0
        status_counts["inactive"] += inactive or 0

    return {"role_counts": role_counts, "status_counts": status_counts}


def get_user_by_email(email: str, db_session: Session) -> User | None:
    user = (
        db_session.query(User)
        .filter(func.lower(User.email) == func.lower(email))
        .first()
    )
    return user


def fetch_user_by_id(db_session: Session, user_id: UUID) -> User | None:
    return db_session.query(User).filter(User.id == user_id).first()  # type: ignore


def _generate_slack_user(email: str) -> User:
    fastapi_users_pw_helper = PasswordHelper()
    password = fastapi_users_pw_helper.generate()
    hashed_pass = fastapi_users_pw_helper.hash(password)
    return User(
        email=email,
        hashed_password=hashed_pass,
        role=UserRole.SLACK_USER,
        account_type=AccountType.BOT,
    )


def add_slack_user_if_not_exists(db_session: Session, email: str) -> User:
    email = email.lower()
    user = get_user_by_email(email, db_session)
    if user is not None:
        # If the user is an external permissioned user, we update it to a slack user
        if user.account_type == AccountType.EXT_PERM_USER:
            user.role = UserRole.SLACK_USER
            user.account_type = AccountType.BOT
            db_session.commit()
        return user

    user = _generate_slack_user(email=email)
    db_session.add(user)
    db_session.commit()
    return user


def _get_users_by_emails(
    db_session: Session, lower_emails: list[str]
) -> tuple[list[User], list[str]]:
    """given a list of lowercase emails,
    returns a list[User] of Users whose emails match and a list[str]
    the missing emails that had no User"""
    stmt = select(User).filter(func.lower(User.email).in_(lower_emails))
    found_users = list(db_session.scalars(stmt).unique().all())  # Convert to list

    # Extract found emails and convert to lowercase to avoid case sensitivity issues
    found_users_emails = [user.email.lower() for user in found_users]

    # Separate emails for users that were not found
    missing_user_emails = [
        email for email in lower_emails if email not in found_users_emails
    ]
    return found_users, missing_user_emails


def _generate_ext_permissioned_user(email: str) -> User:
    fastapi_users_pw_helper = PasswordHelper()
    password = fastapi_users_pw_helper.generate()
    hashed_pass = fastapi_users_pw_helper.hash(password)
    return User(
        email=email,
        hashed_password=hashed_pass,
        role=UserRole.EXT_PERM_USER,
        account_type=AccountType.EXT_PERM_USER,
    )


def batch_add_ext_perm_user_if_not_exists(
    db_session: Session, emails: list[str], continue_on_error: bool = False
) -> list[User]:
    lower_emails = [email.lower() for email in emails]
    found_users, missing_lower_emails = _get_users_by_emails(db_session, lower_emails)

    # Use savepoints (begin_nested) so that a failed insert only rolls back
    # that single user, not the entire transaction. A plain rollback() would
    # discard all previously flushed users in the same transaction.
    # We also avoid add_all() because SQLAlchemy 2.0's insertmanyvalues
    # batch path hits a UUID sentinel mismatch with server_default columns.
    for email in missing_lower_emails:
        user = _generate_ext_permissioned_user(email=email)
        savepoint = db_session.begin_nested()
        try:
            db_session.add(user)
            savepoint.commit()
        except IntegrityError:
            savepoint.rollback()
            if not continue_on_error:
                raise

    db_session.commit()
    # Fetch all users again to ensure we have the most up-to-date list
    all_users, _ = _get_users_by_emails(db_session, lower_emails)
    return all_users


def assign_user_to_default_groups__no_commit(
    db_session: Session,
    user: User,
    is_admin: bool = False,
) -> None:
    """Assign a newly created user to the appropriate default group.

    Does NOT commit — callers must commit the session themselves so that
    group assignment can be part of the same transaction as user creation.

    Args:
        is_admin: If True, assign to Admin default group; otherwise Basic.
            Callers determine this from their own context (e.g. user_count,
            admin email list, explicit choice). Defaults to False (Basic).
    """
    if user.account_type in (
        AccountType.BOT,
        AccountType.EXT_PERM_USER,
        AccountType.ANONYMOUS,
    ):
        return

    target_group_name = "Admin" if is_admin else "Basic"

    default_group = (
        db_session.query(UserGroup)
        .filter(
            UserGroup.name == target_group_name,
            UserGroup.is_default.is_(True),
        )
        .first()
    )

    if default_group is None:
        raise RuntimeError(
            f"Default group '{target_group_name}' not found. "
            f"Cannot assign user {user.email} to a group. "
            f"Ensure the seed_default_groups migration has run."
        )

    # Check if the user is already in the group
    existing = (
        db_session.query(User__UserGroup)
        .filter(
            User__UserGroup.user_id == user.id,
            User__UserGroup.user_group_id == default_group.id,
        )
        .first()
    )
    if existing is not None:
        return

    savepoint = db_session.begin_nested()
    try:
        db_session.add(
            User__UserGroup(
                user_id=user.id,
                user_group_id=default_group.id,
            )
        )
        db_session.flush()
    except IntegrityError:
        # Race condition: another transaction inserted this membership
        # between our SELECT and INSERT. The savepoint isolates the failure
        # so the outer transaction (user creation) stays intact.
        savepoint.rollback()
        return

    from onyx.db.permissions import recompute_user_permissions__no_commit

    recompute_user_permissions__no_commit(user.id, db_session)

    logger.info(f"Assigned user {user.email} to default group '{default_group.name}'")


def delete_user_from_db(
    user_to_delete: User,
    db_session: Session,
) -> None:
    for oauth_account in user_to_delete.oauth_accounts:
        db_session.delete(oauth_account)

    fetch_ee_implementation_or_noop(
        "onyx.db.external_perm",
        "delete_user__ext_group_for_user__no_commit",
    )(
        db_session=db_session,
        user_id=user_to_delete.id,
    )
    db_session.query(SamlAccount).filter(
        SamlAccount.user_id == user_to_delete.id
    ).delete()
    # Null out ownership on document sets and personas so they're
    # preserved for other users instead of being cascade-deleted
    db_session.query(DocumentSet).filter(
        DocumentSet.user_id == user_to_delete.id
    ).update({DocumentSet.user_id: None})
    db_session.query(Persona).filter(Persona.user_id == user_to_delete.id).update(
        {Persona.user_id: None}
    )

    db_session.query(DocumentSet__User).filter(
        DocumentSet__User.user_id == user_to_delete.id
    ).delete()
    db_session.query(Persona__User).filter(
        Persona__User.user_id == user_to_delete.id
    ).delete()
    db_session.query(User__UserGroup).filter(
        User__UserGroup.user_id == user_to_delete.id
    ).delete()
    db_session.delete(user_to_delete)
    db_session.commit()

    # NOTE: edge case may exist with race conditions
    # with this `invited user` scheme generally.
    remove_user_from_invited_users(user_to_delete.email)


def batch_get_user_groups(
    db_session: Session,
    user_ids: list[UUID],
    include_default: bool = False,
) -> dict[UUID, list[tuple[int, str]]]:
    """Fetch group memberships for a batch of users in a single query.
    Returns a mapping of user_id -> list of (group_id, group_name) tuples."""
    if not user_ids:
        return {}

    stmt = (
        select(
            User__UserGroup.user_id,
            UserGroup.id,
            UserGroup.name,
        )
        .join(UserGroup, UserGroup.id == User__UserGroup.user_group_id)
        .where(User__UserGroup.user_id.in_(user_ids))
    )
    if not include_default:
        stmt = stmt.where(UserGroup.is_default == False)  # noqa: E712

    rows = db_session.execute(stmt).all()

    result: dict[UUID, list[tuple[int, str]]] = {uid: [] for uid in user_ids}
    for user_id, group_id, group_name in rows:
        result[user_id].append((group_id, group_name))
    return result


================================================
FILE: backend/onyx/db/utils.py
================================================
from enum import Enum
from typing import Any

from psycopg2 import errorcodes
from psycopg2 import OperationalError
from pydantic import BaseModel
from sqlalchemy import inspect

from onyx.db.models import Base


def model_to_dict(model: Base) -> dict[str, Any]:
    return {c.key: getattr(model, c.key) for c in inspect(model).mapper.column_attrs}  # type: ignore


RETRYABLE_PG_CODES = {
    errorcodes.SERIALIZATION_FAILURE,  # '40001'
    errorcodes.DEADLOCK_DETECTED,  # '40P01'
    errorcodes.CONNECTION_EXCEPTION,  # '08000'
    errorcodes.CONNECTION_DOES_NOT_EXIST,  # '08003'
    errorcodes.CONNECTION_FAILURE,  # '08006'
    errorcodes.TRANSACTION_ROLLBACK,  # '40000'
}


def is_retryable_sqlalchemy_error(exc: BaseException) -> bool:
    """Helper function for use with tenacity's retry_if_exception as the callback"""
    if isinstance(exc, OperationalError):
        pgcode = getattr(getattr(exc, "orig", None), "pgcode", None)
        return pgcode in RETRYABLE_PG_CODES
    return False


class DocumentRow(BaseModel):
    id: str
    doc_metadata: dict[str, Any]
    external_user_group_ids: list[str]


class SortOrder(str, Enum):
    ASC = "asc"
    DESC = "desc"


class DiscordChannelView(BaseModel):
    channel_id: int
    channel_name: str
    channel_type: str = "text"  # text, forum
    is_private: bool = False  # True if @everyone cannot view the channel


================================================
FILE: backend/onyx/db/voice.py
================================================
from typing import Any
from uuid import UUID

from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import Session

from onyx.db.models import User
from onyx.db.models import VoiceProvider
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError

MIN_VOICE_PLAYBACK_SPEED = 0.5
MAX_VOICE_PLAYBACK_SPEED = 2.0


def fetch_voice_providers(db_session: Session) -> list[VoiceProvider]:
    """Fetch all voice providers."""
    return list(
        db_session.scalars(select(VoiceProvider).order_by(VoiceProvider.name)).all()
    )


def fetch_voice_provider_by_id(
    db_session: Session, provider_id: int
) -> VoiceProvider | None:
    """Fetch a voice provider by ID."""
    return db_session.scalar(
        select(VoiceProvider).where(VoiceProvider.id == provider_id)
    )


def fetch_default_stt_provider(db_session: Session) -> VoiceProvider | None:
    """Fetch the default STT provider."""
    return db_session.scalar(
        select(VoiceProvider).where(VoiceProvider.is_default_stt.is_(True))
    )


def fetch_default_tts_provider(db_session: Session) -> VoiceProvider | None:
    """Fetch the default TTS provider."""
    return db_session.scalar(
        select(VoiceProvider).where(VoiceProvider.is_default_tts.is_(True))
    )


def fetch_voice_provider_by_type(
    db_session: Session, provider_type: str
) -> VoiceProvider | None:
    """Fetch a voice provider by type."""
    return db_session.scalar(
        select(VoiceProvider).where(VoiceProvider.provider_type == provider_type)
    )


def upsert_voice_provider(
    *,
    db_session: Session,
    provider_id: int | None,
    name: str,
    provider_type: str,
    api_key: str | None,
    api_key_changed: bool,
    api_base: str | None = None,
    custom_config: dict[str, Any] | None = None,
    stt_model: str | None = None,
    tts_model: str | None = None,
    default_voice: str | None = None,
    activate_stt: bool = False,
    activate_tts: bool = False,
) -> VoiceProvider:
    """Create or update a voice provider."""
    provider: VoiceProvider | None = None

    if provider_id is not None:
        provider = fetch_voice_provider_by_id(db_session, provider_id)
        if provider is None:
            raise OnyxError(
                OnyxErrorCode.NOT_FOUND,
                f"No voice provider with id {provider_id} exists.",
            )
    else:
        provider = VoiceProvider()
        db_session.add(provider)

    # Apply updates
    provider.name = name
    provider.provider_type = provider_type
    provider.api_base = api_base
    provider.custom_config = custom_config
    provider.stt_model = stt_model
    provider.tts_model = tts_model
    provider.default_voice = default_voice

    # Only update API key if explicitly changed or if provider has no key
    if api_key_changed or provider.api_key is None:
        provider.api_key = api_key  # type: ignore[assignment]

    db_session.flush()

    if activate_stt:
        set_default_stt_provider(db_session=db_session, provider_id=provider.id)
    if activate_tts:
        set_default_tts_provider(db_session=db_session, provider_id=provider.id)

    db_session.refresh(provider)
    return provider


def delete_voice_provider(db_session: Session, provider_id: int) -> None:
    """Delete a voice provider by ID."""
    provider = fetch_voice_provider_by_id(db_session, provider_id)
    if provider:
        db_session.delete(provider)
        db_session.flush()


def set_default_stt_provider(*, db_session: Session, provider_id: int) -> VoiceProvider:
    """Set a voice provider as the default STT provider."""
    provider = fetch_voice_provider_by_id(db_session, provider_id)
    if provider is None:
        raise OnyxError(
            OnyxErrorCode.NOT_FOUND,
            f"No voice provider with id {provider_id} exists.",
        )

    # Deactivate all other STT providers
    db_session.execute(
        update(VoiceProvider)
        .where(
            VoiceProvider.is_default_stt.is_(True),
            VoiceProvider.id != provider_id,
        )
        .values(is_default_stt=False)
    )

    # Activate this provider
    provider.is_default_stt = True

    db_session.flush()
    db_session.refresh(provider)
    return provider


def set_default_tts_provider(
    *, db_session: Session, provider_id: int, tts_model: str | None = None
) -> VoiceProvider:
    """Set a voice provider as the default TTS provider."""
    provider = fetch_voice_provider_by_id(db_session, provider_id)
    if provider is None:
        raise OnyxError(
            OnyxErrorCode.NOT_FOUND,
            f"No voice provider with id {provider_id} exists.",
        )

    # Deactivate all other TTS providers
    db_session.execute(
        update(VoiceProvider)
        .where(
            VoiceProvider.is_default_tts.is_(True),
            VoiceProvider.id != provider_id,
        )
        .values(is_default_tts=False)
    )

    # Activate this provider
    provider.is_default_tts = True

    # Update the TTS model if specified
    if tts_model is not None:
        provider.tts_model = tts_model

    db_session.flush()
    db_session.refresh(provider)
    return provider


def deactivate_stt_provider(*, db_session: Session, provider_id: int) -> VoiceProvider:
    """Remove the default STT status from a voice provider."""
    provider = fetch_voice_provider_by_id(db_session, provider_id)
    if provider is None:
        raise OnyxError(
            OnyxErrorCode.NOT_FOUND,
            f"No voice provider with id {provider_id} exists.",
        )

    provider.is_default_stt = False

    db_session.flush()
    db_session.refresh(provider)
    return provider


def deactivate_tts_provider(*, db_session: Session, provider_id: int) -> VoiceProvider:
    """Remove the default TTS status from a voice provider."""
    provider = fetch_voice_provider_by_id(db_session, provider_id)
    if provider is None:
        raise OnyxError(
            OnyxErrorCode.NOT_FOUND,
            f"No voice provider with id {provider_id} exists.",
        )

    provider.is_default_tts = False

    db_session.flush()
    db_session.refresh(provider)
    return provider


# User voice preferences


def update_user_voice_settings(
    db_session: Session,
    user_id: UUID,
    auto_send: bool | None = None,
    auto_playback: bool | None = None,
    playback_speed: float | None = None,
) -> None:
    """Update user's voice settings.

    For all fields, None means "don't update this field".
    """
    values: dict[str, bool | float] = {}

    if auto_send is not None:
        values["voice_auto_send"] = auto_send
    if auto_playback is not None:
        values["voice_auto_playback"] = auto_playback
    if playback_speed is not None:
        values["voice_playback_speed"] = max(
            MIN_VOICE_PLAYBACK_SPEED, min(MAX_VOICE_PLAYBACK_SPEED, playback_speed)
        )

    if values:
        db_session.execute(update(User).where(User.id == user_id).values(**values))  # type: ignore[arg-type]
        db_session.flush()


================================================
FILE: backend/onyx/db/web_search.py
================================================
from __future__ import annotations

from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import Session

from onyx.db.models import InternetContentProvider
from onyx.db.models import InternetSearchProvider
from onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig
from shared_configs.enums import WebContentProviderType
from shared_configs.enums import WebSearchProviderType


def fetch_web_search_providers(db_session: Session) -> list[InternetSearchProvider]:
    stmt = select(InternetSearchProvider).order_by(InternetSearchProvider.id.asc())
    return list(db_session.scalars(stmt).all())


def fetch_web_content_providers(db_session: Session) -> list[InternetContentProvider]:
    stmt = select(InternetContentProvider).order_by(InternetContentProvider.id.asc())
    return list(db_session.scalars(stmt).all())


def fetch_active_web_search_provider(
    db_session: Session,
) -> InternetSearchProvider | None:
    stmt = select(InternetSearchProvider).where(
        InternetSearchProvider.is_active.is_(True)
    )
    return db_session.scalars(stmt).first()


def fetch_web_search_provider_by_id(
    provider_id: int, db_session: Session
) -> InternetSearchProvider | None:
    return db_session.get(InternetSearchProvider, provider_id)


def fetch_web_search_provider_by_name(
    name: str, db_session: Session
) -> InternetSearchProvider | None:
    stmt = select(InternetSearchProvider).where(InternetSearchProvider.name.ilike(name))
    return db_session.scalars(stmt).first()


def fetch_web_search_provider_by_type(
    provider_type: WebSearchProviderType, db_session: Session
) -> InternetSearchProvider | None:
    stmt = select(InternetSearchProvider).where(
        InternetSearchProvider.provider_type == provider_type.value
    )
    return db_session.scalars(stmt).first()


def _ensure_unique_search_name(
    name: str, provider_id: int | None, db_session: Session
) -> None:
    existing = fetch_web_search_provider_by_name(name=name, db_session=db_session)
    if existing and existing.id != provider_id:
        raise ValueError(f"A web search provider named '{name}' already exists.")


def _apply_search_provider_updates(
    provider: InternetSearchProvider,
    *,
    name: str,
    provider_type: WebSearchProviderType,
    api_key: str | None,
    api_key_changed: bool,
    config: dict[str, str] | None,
) -> None:
    provider.name = name
    provider.provider_type = provider_type.value
    provider.config = config
    if api_key_changed or provider.api_key is None:
        # EncryptedString accepts str for writes, returns SensitiveValue for reads
        provider.api_key = api_key  # type: ignore[assignment]


def upsert_web_search_provider(
    *,
    provider_id: int | None,
    name: str,
    provider_type: WebSearchProviderType,
    api_key: str | None,
    api_key_changed: bool,
    config: dict[str, str] | None,
    activate: bool,
    db_session: Session,
) -> InternetSearchProvider:
    _ensure_unique_search_name(
        name=name, provider_id=provider_id, db_session=db_session
    )

    provider: InternetSearchProvider | None = None
    if provider_id is not None:
        provider = fetch_web_search_provider_by_id(provider_id, db_session)
        if provider is None:
            raise ValueError(f"No web search provider with id {provider_id} exists.")
    else:
        provider = InternetSearchProvider()
        db_session.add(provider)

    _apply_search_provider_updates(
        provider,
        name=name,
        provider_type=provider_type,
        api_key=api_key,
        api_key_changed=api_key_changed,
        config=config,
    )

    db_session.flush()

    if activate:
        set_active_web_search_provider(provider_id=provider.id, db_session=db_session)

    db_session.refresh(provider)
    return provider


def set_active_web_search_provider(
    *, provider_id: int | None, db_session: Session
) -> InternetSearchProvider:
    if provider_id is None:
        raise ValueError("Cannot activate a provider without an id.")

    provider = fetch_web_search_provider_by_id(provider_id, db_session)
    if provider is None:
        raise ValueError(f"No web search provider with id {provider_id} exists.")

    db_session.execute(
        update(InternetSearchProvider)
        .where(
            InternetSearchProvider.is_active.is_(True),
            InternetSearchProvider.id != provider_id,
        )
        .values(is_active=False)
    )
    provider.is_active = True

    db_session.flush()
    db_session.refresh(provider)
    return provider


def deactivate_web_search_provider(
    *, provider_id: int | None, db_session: Session
) -> InternetSearchProvider:
    if provider_id is None:
        raise ValueError("Cannot deactivate a provider without an id.")

    provider = fetch_web_search_provider_by_id(provider_id, db_session)
    if provider is None:
        raise ValueError(f"No web search provider with id {provider_id} exists.")

    provider.is_active = False

    db_session.flush()
    db_session.refresh(provider)
    return provider


def delete_web_search_provider(provider_id: int, db_session: Session) -> None:
    provider = fetch_web_search_provider_by_id(provider_id, db_session)
    if provider is None:
        raise ValueError(f"No web search provider with id {provider_id} exists.")

    db_session.delete(provider)
    db_session.flush()

    db_session.commit()


# Content provider helpers


def fetch_active_web_content_provider(
    db_session: Session,
) -> InternetContentProvider | None:
    stmt = select(InternetContentProvider).where(
        InternetContentProvider.is_active.is_(True)
    )
    return db_session.scalars(stmt).first()


def fetch_web_content_provider_by_id(
    provider_id: int, db_session: Session
) -> InternetContentProvider | None:
    return db_session.get(InternetContentProvider, provider_id)


def fetch_web_content_provider_by_name(
    name: str, db_session: Session
) -> InternetContentProvider | None:
    stmt = select(InternetContentProvider).where(
        InternetContentProvider.name.ilike(name)
    )
    return db_session.scalars(stmt).first()


def fetch_web_content_provider_by_type(
    provider_type: WebContentProviderType, db_session: Session
) -> InternetContentProvider | None:
    stmt = select(InternetContentProvider).where(
        InternetContentProvider.provider_type == provider_type.value
    )
    return db_session.scalars(stmt).first()


def _ensure_unique_content_name(
    name: str, provider_id: int | None, db_session: Session
) -> None:
    existing = fetch_web_content_provider_by_name(name=name, db_session=db_session)
    if existing and existing.id != provider_id:
        raise ValueError(f"A web content provider named '{name}' already exists.")


def _apply_content_provider_updates(
    provider: InternetContentProvider,
    *,
    name: str,
    provider_type: WebContentProviderType,
    api_key: str | None,
    api_key_changed: bool,
    config: WebContentProviderConfig | None,
) -> None:
    provider.name = name
    provider.provider_type = provider_type.value
    provider.config = config
    if api_key_changed or provider.api_key is None:
        # EncryptedString accepts str for writes, returns SensitiveValue for reads
        provider.api_key = api_key  # type: ignore[assignment]


def upsert_web_content_provider(
    *,
    provider_id: int | None,
    name: str,
    provider_type: WebContentProviderType,
    api_key: str | None,
    api_key_changed: bool,
    config: WebContentProviderConfig | None,
    activate: bool,
    db_session: Session,
) -> InternetContentProvider:
    _ensure_unique_content_name(
        name=name, provider_id=provider_id, db_session=db_session
    )

    provider: InternetContentProvider | None = None
    if provider_id is not None:
        provider = fetch_web_content_provider_by_id(provider_id, db_session)
        if provider is None:
            raise ValueError(f"No web content provider with id {provider_id} exists.")
    else:
        provider = InternetContentProvider()
        db_session.add(provider)

    _apply_content_provider_updates(
        provider,
        name=name,
        provider_type=provider_type,
        api_key=api_key,
        api_key_changed=api_key_changed,
        config=config,
    )

    db_session.flush()

    if activate:
        set_active_web_content_provider(provider_id=provider.id, db_session=db_session)

    db_session.refresh(provider)
    return provider


def set_active_web_content_provider(
    *, provider_id: int | None, db_session: Session
) -> InternetContentProvider:
    if provider_id is None:
        raise ValueError("Cannot activate a provider without an id.")

    provider = fetch_web_content_provider_by_id(provider_id, db_session)
    if provider is None:
        raise ValueError(f"No web content provider with id {provider_id} exists.")

    db_session.execute(
        update(InternetContentProvider)
        .where(
            InternetContentProvider.is_active.is_(True),
            InternetContentProvider.id != provider_id,
        )
        .values(is_active=False)
    )
    provider.is_active = True

    db_session.flush()
    db_session.refresh(provider)
    return provider


def deactivate_web_content_provider(
    *, provider_id: int | None, db_session: Session
) -> InternetContentProvider:
    if provider_id is None:
        raise ValueError("Cannot deactivate a provider without an id.")

    provider = fetch_web_content_provider_by_id(provider_id, db_session)
    if provider is None:
        raise ValueError(f"No web content provider with id {provider_id} exists.")

    provider.is_active = False

    db_session.flush()
    db_session.refresh(provider)
    return provider


def delete_web_content_provider(provider_id: int, db_session: Session) -> None:
    provider = fetch_web_content_provider_by_id(provider_id, db_session)
    if provider is None:
        raise ValueError(f"No web content provider with id {provider_id} exists.")

    db_session.delete(provider)
    db_session.flush()

    db_session.commit()


================================================
FILE: backend/onyx/deep_research/__init__.py
================================================


================================================
FILE: backend/onyx/deep_research/dr_loop.py
================================================
# TODO: Notes for potential extensions and future improvements:
# 1. Allow tools that aren't search specific tools
# 2. Use user provided custom prompts
# 3. Save the plan for replay

import time
from collections.abc import Callable
from typing import cast

from sqlalchemy.orm import Session

from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.chat.emitter import Emitter
from onyx.chat.llm_loop import construct_message_history
from onyx.chat.llm_step import run_llm_step
from onyx.chat.llm_step import run_llm_step_pkt_generator
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import FileToolMetadata
from onyx.chat.models import LlmStepResult
from onyx.chat.models import ToolCallSimple
from onyx.configs.chat_configs import SKIP_DEEP_RESEARCH_CLARIFICATION
from onyx.configs.constants import MessageType
from onyx.db.tools import get_tool_by_name
from onyx.deep_research.dr_mock_tools import get_clarification_tool_definitions
from onyx.deep_research.dr_mock_tools import get_orchestrator_tools
from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_TOOL_NAME
from onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_MESSAGE
from onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_TOKEN_COUNT
from onyx.deep_research.utils import check_special_tool_calls
from onyx.deep_research.utils import create_think_tool_token_processor
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.models import ToolChoiceOptions
from onyx.llm.utils import model_is_reasoning_model
from onyx.prompts.deep_research.orchestration_layer import CLARIFICATION_PROMPT
from onyx.prompts.deep_research.orchestration_layer import FINAL_REPORT_PROMPT
from onyx.prompts.deep_research.orchestration_layer import FIRST_CYCLE_REMINDER
from onyx.prompts.deep_research.orchestration_layer import FIRST_CYCLE_REMINDER_TOKENS
from onyx.prompts.deep_research.orchestration_layer import (
    INTERNAL_SEARCH_CLARIFICATION_GUIDANCE,
)
from onyx.prompts.deep_research.orchestration_layer import (
    INTERNAL_SEARCH_RESEARCH_TASK_GUIDANCE,
)
from onyx.prompts.deep_research.orchestration_layer import ORCHESTRATOR_PROMPT
from onyx.prompts.deep_research.orchestration_layer import ORCHESTRATOR_PROMPT_REASONING
from onyx.prompts.deep_research.orchestration_layer import RESEARCH_PLAN_PROMPT
from onyx.prompts.deep_research.orchestration_layer import RESEARCH_PLAN_REMINDER
from onyx.prompts.deep_research.orchestration_layer import USER_FINAL_REPORT_QUERY
from onyx.prompts.prompt_utils import get_current_llm_day_time
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import DeepResearchPlanDelta
from onyx.server.query_and_chat.streaming_models import DeepResearchPlanStart
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.server.query_and_chat.streaming_models import TopLevelBranching
from onyx.tools.fake_tools.research_agent import run_research_agent_calls
from onyx.tools.interface import Tool
from onyx.tools.models import ToolCallInfo
from onyx.tools.models import ToolCallKickoff
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tracing.framework.create import function_span
from onyx.tracing.framework.create import trace
from onyx.utils.logger import setup_logger
from onyx.utils.timing import log_function_time
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

MAX_USER_MESSAGES_FOR_CONTEXT = 5
MAX_FINAL_REPORT_TOKENS = 20000

# 30 minute timeout before forcing final report generation
# NOTE: The overall execution may be much longer still because it could run a research cycle at minute 29
# and that runs for another nearly 30 minutes.
DEEP_RESEARCH_FORCE_REPORT_SECONDS = 30 * 60

# Might be something like (this gives a lot of leeway for change but typically the models don't do this):
# 0. Research topics 1-3
# 1. Think
# 2. Research topics 4-5
# 3. Think
# 4. Research topics 6 + something new or different from the plan
# 5. Think
# 6. Research, possibly something new or different from the plan
# 7. Think
# 8. Generate report
MAX_ORCHESTRATOR_CYCLES = 8

# Similar but without the 4 thinking tool calls
MAX_ORCHESTRATOR_CYCLES_REASONING = 4


def generate_final_report(
    history: list[ChatMessageSimple],
    research_plan: str,
    llm: LLM,
    token_counter: Callable[[str], int],
    state_container: ChatStateContainer,
    emitter: Emitter,
    turn_index: int,
    citation_mapping: CitationMapping,
    user_identity: LLMUserIdentity | None,
    saved_reasoning: str | None = None,
    pre_answer_processing_time: float | None = None,
    all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,
) -> bool:
    """Generate the final research report.

    Returns:
        bool: True if reasoning occurred during report generation (turn_index was incremented),
              False otherwise.
    """
    with function_span("generate_report") as span:
        span.span_data.input = f"history_length={len(history)}, turn_index={turn_index}"
        final_report_prompt = FINAL_REPORT_PROMPT.format(
            current_datetime=get_current_llm_day_time(full_sentence=False),
        )
        system_prompt = ChatMessageSimple(
            message=final_report_prompt,
            token_count=token_counter(final_report_prompt),
            message_type=MessageType.SYSTEM,
        )
        final_reminder = USER_FINAL_REPORT_QUERY.format(research_plan=research_plan)
        reminder_message = ChatMessageSimple(
            message=final_reminder,
            token_count=token_counter(final_reminder),
            message_type=MessageType.USER_REMINDER,
        )
        final_report_history = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=history,
            reminder_message=reminder_message,
            context_files=None,
            available_tokens=llm.config.max_input_tokens,
            all_injected_file_metadata=all_injected_file_metadata,
        )

        citation_processor = DynamicCitationProcessor()
        citation_processor.update_citation_mapping(citation_mapping)

        # Only passing in the cited documents as the whole list would be too long
        final_documents = list(citation_processor.citation_to_doc.values())

        llm_step_result, has_reasoned = run_llm_step(
            emitter=emitter,
            history=final_report_history,
            tool_definitions=[],
            tool_choice=ToolChoiceOptions.NONE,
            llm=llm,
            placement=Placement(turn_index=turn_index),
            citation_processor=citation_processor,
            state_container=state_container,
            final_documents=final_documents,
            user_identity=user_identity,
            max_tokens=MAX_FINAL_REPORT_TOKENS,
            is_deep_research=True,
            pre_answer_processing_time=pre_answer_processing_time,
            timeout_override=300,  # 5 minute read timeout for long report generation
        )

        # Save citation mapping to state_container so citations are persisted
        state_container.set_citation_mapping(citation_processor.citation_to_doc)

        final_report = llm_step_result.answer
        if final_report is None:
            raise ValueError("LLM failed to generate the final deep research report")

        if saved_reasoning:
            # The reasoning we want to save with the message is more about calling this
            # generate report and why it's done. Also some models don't have reasoning
            # but we'd still want to capture the reasoning from the think_tool of theprevious turn.
            state_container.set_reasoning_tokens(saved_reasoning)

        span.span_data.output = final_report if final_report else None
        return has_reasoned


@log_function_time(print_only=True)
def run_deep_research_llm_loop(
    emitter: Emitter,
    state_container: ChatStateContainer,
    simple_chat_history: list[ChatMessageSimple],
    tools: list[Tool],
    custom_agent_prompt: str | None,  # noqa: ARG001
    llm: LLM,
    token_counter: Callable[[str], int],
    db_session: Session,
    skip_clarification: bool = False,
    user_identity: LLMUserIdentity | None = None,
    chat_session_id: str | None = None,
    all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,
) -> None:
    with trace(
        "run_deep_research_llm_loop",
        group_id=chat_session_id,
        metadata={
            "tenant_id": get_current_tenant_id(),
            "chat_session_id": chat_session_id,
        },
    ):
        # Here for lazy load LiteLLM
        from onyx.llm.litellm_singleton.config import initialize_litellm

        # An approximate limit. In extreme cases it may still fail but this should allow deep research
        # to work in most cases.
        if llm.config.max_input_tokens < 50000:
            raise RuntimeError(
                "Cannot run Deep Research with an LLM that has less than 50,000 max input tokens"
            )

        initialize_litellm()

        # Track processing start time for tool duration calculation
        processing_start_time = time.monotonic()

        available_tokens = llm.config.max_input_tokens

        llm_step_result: LlmStepResult | None = None

        # Filter tools to only allow web search, internal search, and open URL
        allowed_tool_names = {SearchTool.NAME, WebSearchTool.NAME, OpenURLTool.NAME}
        allowed_tools = [tool for tool in tools if tool.name in allowed_tool_names]
        include_internal_search_tunings = SearchTool.NAME in allowed_tool_names
        orchestrator_start_turn_index = 1

        #########################################################
        # CLARIFICATION STEP (optional)
        #########################################################
        internal_search_clarification_guidance = (
            INTERNAL_SEARCH_CLARIFICATION_GUIDANCE
            if include_internal_search_tunings
            else ""
        )
        if not SKIP_DEEP_RESEARCH_CLARIFICATION and not skip_clarification:
            with function_span("clarification_step") as span:
                clarification_prompt = CLARIFICATION_PROMPT.format(
                    current_datetime=get_current_llm_day_time(full_sentence=False),
                    internal_search_clarification_guidance=internal_search_clarification_guidance,
                )
                system_prompt = ChatMessageSimple(
                    message=clarification_prompt,
                    token_count=300,  # Skips the exact token count but has enough leeway
                    message_type=MessageType.SYSTEM,
                )

                truncated_message_history = construct_message_history(
                    system_prompt=system_prompt,
                    custom_agent_prompt=None,
                    simple_chat_history=simple_chat_history,
                    reminder_message=None,
                    context_files=None,
                    available_tokens=available_tokens,
                    last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
                    all_injected_file_metadata=all_injected_file_metadata,
                )

                # Calculate tool processing duration for clarification step
                # (used if the LLM emits a clarification question instead of calling tools)
                clarification_tool_duration = time.monotonic() - processing_start_time
                llm_step_result, _ = run_llm_step(
                    emitter=emitter,
                    history=truncated_message_history,
                    tool_definitions=get_clarification_tool_definitions(),
                    tool_choice=ToolChoiceOptions.AUTO,
                    llm=llm,
                    placement=Placement(turn_index=0),
                    # No citations in this step, it should just pass through all
                    # tokens directly so initialized as an empty citation processor
                    citation_processor=None,
                    state_container=state_container,
                    final_documents=None,
                    user_identity=user_identity,
                    is_deep_research=True,
                    pre_answer_processing_time=clarification_tool_duration,
                )

                if not llm_step_result.tool_calls:
                    # Mark this turn as a clarification question
                    state_container.set_is_clarification(True)
                    span.span_data.output = "clarification_required"

                    emitter.emit(
                        Packet(
                            placement=Placement(turn_index=0),
                            obj=OverallStop(type="stop"),
                        )
                    )

                    # If a clarification is asked, we need to end this turn and wait on user input
                    return

        #########################################################
        # RESEARCH PLAN STEP
        #########################################################
        with function_span("research_plan_step") as span:
            system_prompt = ChatMessageSimple(
                message=RESEARCH_PLAN_PROMPT.format(
                    current_datetime=get_current_llm_day_time(full_sentence=False)
                ),
                token_count=300,
                message_type=MessageType.SYSTEM,
            )
            # Note this is fine to use a USER message type here as it can just be interpretered as a
            # user's message directly to the LLM.
            reminder_message = ChatMessageSimple(
                message=RESEARCH_PLAN_REMINDER,
                token_count=100,
                message_type=MessageType.USER,
            )
            truncated_message_history = construct_message_history(
                system_prompt=system_prompt,
                custom_agent_prompt=None,
                simple_chat_history=simple_chat_history + [reminder_message],
                reminder_message=None,
                context_files=None,
                available_tokens=available_tokens,
                last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT + 1,
                all_injected_file_metadata=all_injected_file_metadata,
            )

            research_plan_generator = run_llm_step_pkt_generator(
                history=truncated_message_history,
                tool_definitions=[],
                tool_choice=ToolChoiceOptions.NONE,
                llm=llm,
                placement=Placement(turn_index=0),
                citation_processor=None,
                state_container=state_container,
                final_documents=None,
                user_identity=user_identity,
                is_deep_research=True,
            )

            while True:
                try:
                    packet = next(research_plan_generator)
                    # Translate AgentResponseStart/Delta packets to DeepResearchPlanStart/Delta
                    # The LLM response from this prompt is the research plan
                    if isinstance(packet.obj, AgentResponseStart):
                        emitter.emit(
                            Packet(
                                placement=packet.placement,
                                obj=DeepResearchPlanStart(),
                            )
                        )
                    elif isinstance(packet.obj, AgentResponseDelta):
                        emitter.emit(
                            Packet(
                                placement=packet.placement,
                                obj=DeepResearchPlanDelta(content=packet.obj.content),
                            )
                        )
                    else:
                        # Pass through other packet types (e.g., ReasoningStart, ReasoningDelta, etc.)
                        emitter.emit(packet)
                except StopIteration as e:
                    llm_step_result, reasoned = e.value
                    emitter.emit(
                        Packet(
                            # Marks the last turn end which should be the plan generation
                            placement=Placement(
                                turn_index=1 if reasoned else 0,
                            ),
                            obj=SectionEnd(),
                        )
                    )
                    if reasoned:
                        orchestrator_start_turn_index += 1
                    break
            llm_step_result = cast(LlmStepResult, llm_step_result)

            research_plan = llm_step_result.answer
            if research_plan is None:
                raise RuntimeError("Deep Research failed to generate a research plan")
            span.span_data.output = research_plan if research_plan else None

        #########################################################
        # RESEARCH EXECUTION STEP
        #########################################################
        with function_span("research_execution_step") as span:
            is_reasoning_model = model_is_reasoning_model(
                llm.config.model_name, llm.config.model_provider
            )

            max_orchestrator_cycles = (
                MAX_ORCHESTRATOR_CYCLES
                if not is_reasoning_model
                else MAX_ORCHESTRATOR_CYCLES_REASONING
            )

            orchestrator_prompt_template = (
                ORCHESTRATOR_PROMPT
                if not is_reasoning_model
                else ORCHESTRATOR_PROMPT_REASONING
            )

            internal_search_research_task_guidance = (
                INTERNAL_SEARCH_RESEARCH_TASK_GUIDANCE
                if include_internal_search_tunings
                else ""
            )
            token_count_prompt = orchestrator_prompt_template.format(
                current_datetime=get_current_llm_day_time(full_sentence=False),
                current_cycle_count=1,
                max_cycles=max_orchestrator_cycles,
                research_plan=research_plan,
                internal_search_research_task_guidance=internal_search_research_task_guidance,
            )
            orchestration_tokens = token_counter(token_count_prompt)

            reasoning_cycles = 0
            most_recent_reasoning: str | None = None
            citation_mapping: CitationMapping = {}
            final_turn_index: int = (
                orchestrator_start_turn_index  # Track the final turn_index for stop packet
            )
            for cycle in range(max_orchestrator_cycles):
                # Check if we've exceeded the time limit or reached the last cycle
                # - if so, skip LLM and generate final report
                elapsed_seconds = time.monotonic() - processing_start_time
                timed_out = elapsed_seconds > DEEP_RESEARCH_FORCE_REPORT_SECONDS
                is_last_cycle = cycle == max_orchestrator_cycles - 1

                if timed_out or is_last_cycle:
                    if timed_out:
                        logger.info(
                            f"Deep research exceeded {DEEP_RESEARCH_FORCE_REPORT_SECONDS}s "
                            f"(elapsed: {elapsed_seconds:.1f}s), forcing final report generation"
                        )
                    report_turn_index = (
                        orchestrator_start_turn_index + cycle + reasoning_cycles
                    )
                    report_reasoned = generate_final_report(
                        history=simple_chat_history,
                        research_plan=research_plan,
                        llm=llm,
                        token_counter=token_counter,
                        state_container=state_container,
                        emitter=emitter,
                        turn_index=report_turn_index,
                        citation_mapping=citation_mapping,
                        user_identity=user_identity,
                        pre_answer_processing_time=elapsed_seconds,
                        all_injected_file_metadata=all_injected_file_metadata,
                    )
                    final_turn_index = report_turn_index + (1 if report_reasoned else 0)
                    break

                if cycle == 1:
                    first_cycle_reminder_message = ChatMessageSimple(
                        message=FIRST_CYCLE_REMINDER,
                        token_count=FIRST_CYCLE_REMINDER_TOKENS,
                        message_type=MessageType.USER_REMINDER,
                    )
                else:
                    first_cycle_reminder_message = None

                research_agent_calls: list[ToolCallKickoff] = []

                orchestrator_prompt = orchestrator_prompt_template.format(
                    current_datetime=get_current_llm_day_time(full_sentence=False),
                    current_cycle_count=cycle,
                    max_cycles=max_orchestrator_cycles,
                    research_plan=research_plan,
                    internal_search_research_task_guidance=internal_search_research_task_guidance,
                )

                system_prompt = ChatMessageSimple(
                    message=orchestrator_prompt,
                    token_count=orchestration_tokens,
                    message_type=MessageType.SYSTEM,
                )

                truncated_message_history = construct_message_history(
                    system_prompt=system_prompt,
                    custom_agent_prompt=None,
                    simple_chat_history=simple_chat_history,
                    reminder_message=first_cycle_reminder_message,
                    context_files=None,
                    available_tokens=available_tokens,
                    last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
                    all_injected_file_metadata=all_injected_file_metadata,
                )

                # Use think tool processor for non-reasoning models to convert
                # think_tool calls to reasoning content
                custom_processor = (
                    create_think_tool_token_processor()
                    if not is_reasoning_model
                    else None
                )

                llm_step_result, has_reasoned = run_llm_step(
                    emitter=emitter,
                    history=truncated_message_history,
                    tool_definitions=get_orchestrator_tools(
                        include_think_tool=not is_reasoning_model
                    ),
                    tool_choice=ToolChoiceOptions.REQUIRED,
                    llm=llm,
                    placement=Placement(
                        turn_index=orchestrator_start_turn_index
                        + cycle
                        + reasoning_cycles
                    ),
                    # No citations in this step, it should just pass through all
                    # tokens directly so initialized as an empty citation processor
                    citation_processor=DynamicCitationProcessor(),
                    state_container=state_container,
                    final_documents=None,
                    user_identity=user_identity,
                    custom_token_processor=custom_processor,
                    is_deep_research=True,
                    # Even for the reasoning tool, this should be plenty
                    # The generation here should never be very long as it's just the tool calls.
                    # This prevents timeouts where the model gets into an endless loop of null or bad tokens.
                    max_tokens=1024,
                )
                if has_reasoned:
                    reasoning_cycles += 1

                tool_calls = llm_step_result.tool_calls or []

                if not tool_calls and cycle == 0:
                    raise RuntimeError(
                        "Deep Research failed to generate any research tasks for the agents."
                    )

                if not tool_calls:
                    # Basically hope that this is an infrequent occurence and hopefully multiple research
                    # cycles have already ran
                    logger.warning("No tool calls found, this should not happen.")
                    report_turn_index = (
                        orchestrator_start_turn_index + cycle + reasoning_cycles
                    )
                    report_reasoned = generate_final_report(
                        history=simple_chat_history,
                        research_plan=research_plan,
                        llm=llm,
                        token_counter=token_counter,
                        state_container=state_container,
                        emitter=emitter,
                        turn_index=report_turn_index,
                        citation_mapping=citation_mapping,
                        user_identity=user_identity,
                        pre_answer_processing_time=time.monotonic()
                        - processing_start_time,
                        all_injected_file_metadata=all_injected_file_metadata,
                    )
                    final_turn_index = report_turn_index + (1 if report_reasoned else 0)
                    break

                special_tool_calls = check_special_tool_calls(tool_calls=tool_calls)

                if special_tool_calls.generate_report_tool_call:
                    report_turn_index = (
                        special_tool_calls.generate_report_tool_call.placement.turn_index
                    )
                    report_reasoned = generate_final_report(
                        history=simple_chat_history,
                        research_plan=research_plan,
                        llm=llm,
                        token_counter=token_counter,
                        state_container=state_container,
                        emitter=emitter,
                        turn_index=report_turn_index,
                        citation_mapping=citation_mapping,
                        user_identity=user_identity,
                        saved_reasoning=most_recent_reasoning,
                        pre_answer_processing_time=time.monotonic()
                        - processing_start_time,
                        all_injected_file_metadata=all_injected_file_metadata,
                    )
                    final_turn_index = report_turn_index + (1 if report_reasoned else 0)
                    break
                elif special_tool_calls.think_tool_call:
                    think_tool_call = special_tool_calls.think_tool_call
                    # Only process the THINK_TOOL and skip all other tool calls
                    # This will not actually get saved to the db as a tool call but we'll attach it to the tool(s) called after
                    # it as if it were just a reasoning model doing it. In the chat history, because it happens in 2 steps,
                    # we will show it as a separate message.
                    # NOTE: This does not need to increment the reasoning cycles because the custom token processor causes
                    # the LLM step to handle this
                    with function_span("think_tool") as span:
                        span.span_data.input = str(think_tool_call.tool_args)
                        most_recent_reasoning = state_container.reasoning_tokens
                        tool_call_message = think_tool_call.to_msg_str()
                        tool_call_token_count = token_counter(tool_call_message)

                        # Create ASSISTANT message with tool_calls (OpenAI parallel format)
                        think_tool_simple = ToolCallSimple(
                            tool_call_id=think_tool_call.tool_call_id,
                            tool_name=think_tool_call.tool_name,
                            tool_arguments=think_tool_call.tool_args,
                            token_count=tool_call_token_count,
                        )
                        think_assistant_msg = ChatMessageSimple(
                            message="",
                            token_count=tool_call_token_count,
                            message_type=MessageType.ASSISTANT,
                            tool_calls=[think_tool_simple],
                            image_files=None,
                        )
                        simple_chat_history.append(think_assistant_msg)

                        think_tool_response_msg = ChatMessageSimple(
                            message=THINK_TOOL_RESPONSE_MESSAGE,
                            token_count=THINK_TOOL_RESPONSE_TOKEN_COUNT,
                            message_type=MessageType.TOOL_CALL_RESPONSE,
                            tool_call_id=think_tool_call.tool_call_id,
                            image_files=None,
                        )
                        simple_chat_history.append(think_tool_response_msg)
                        span.span_data.output = THINK_TOOL_RESPONSE_MESSAGE
                    continue
                else:
                    for tool_call in tool_calls:
                        if tool_call.tool_name != RESEARCH_AGENT_TOOL_NAME:
                            logger.warning(
                                f"Unexpected tool call: {tool_call.tool_name}"
                            )
                            continue

                        research_agent_calls.append(tool_call)

                    if not research_agent_calls:
                        logger.warning(
                            "No research agent tool calls found, this should not happen."
                        )
                        report_turn_index = (
                            orchestrator_start_turn_index + cycle + reasoning_cycles
                        )
                        report_reasoned = generate_final_report(
                            history=simple_chat_history,
                            research_plan=research_plan,
                            llm=llm,
                            token_counter=token_counter,
                            state_container=state_container,
                            emitter=emitter,
                            turn_index=report_turn_index,
                            citation_mapping=citation_mapping,
                            user_identity=user_identity,
                            pre_answer_processing_time=time.monotonic()
                            - processing_start_time,
                            all_injected_file_metadata=all_injected_file_metadata,
                        )
                        final_turn_index = report_turn_index + (
                            1 if report_reasoned else 0
                        )
                        break

                    if len(research_agent_calls) > 1:
                        emitter.emit(
                            Packet(
                                placement=Placement(
                                    turn_index=research_agent_calls[
                                        0
                                    ].placement.turn_index
                                ),
                                obj=TopLevelBranching(
                                    num_parallel_branches=len(research_agent_calls)
                                ),
                            )
                        )

                    research_results = run_research_agent_calls(
                        # The tool calls here contain the placement information
                        research_agent_calls=research_agent_calls,
                        parent_tool_call_ids=[
                            tool_call.tool_call_id for tool_call in tool_calls
                        ],
                        tools=allowed_tools,
                        emitter=emitter,
                        state_container=state_container,
                        llm=llm,
                        is_reasoning_model=is_reasoning_model,
                        token_counter=token_counter,
                        citation_mapping=citation_mapping,
                        user_identity=user_identity,
                    )

                    citation_mapping = research_results.citation_mapping

                    # Build ONE ASSISTANT message with all tool calls (OpenAI parallel format)
                    tool_calls_simple: list[ToolCallSimple] = []
                    for current_tool_call in research_agent_calls:
                        tool_call_message = current_tool_call.to_msg_str()
                        tool_call_token_count = token_counter(tool_call_message)
                        tool_calls_simple.append(
                            ToolCallSimple(
                                tool_call_id=current_tool_call.tool_call_id,
                                tool_name=current_tool_call.tool_name,
                                tool_arguments=current_tool_call.tool_args,
                                token_count=tool_call_token_count,
                            )
                        )

                    total_tool_call_tokens = sum(
                        tc.token_count for tc in tool_calls_simple
                    )
                    assistant_with_tools = ChatMessageSimple(
                        message="",
                        token_count=total_tool_call_tokens,
                        message_type=MessageType.ASSISTANT,
                        tool_calls=tool_calls_simple,
                        image_files=None,
                    )
                    simple_chat_history.append(assistant_with_tools)

                    # Now add TOOL_CALL_RESPONSE messages and tool call info for each result
                    for tab_index, report in enumerate(
                        research_results.intermediate_reports
                    ):
                        if report is None:
                            # The LLM will not see that this research was even attempted, it may try
                            # something similar again but this is not bad.
                            logger.error(
                                f"Research agent call at tab_index {tab_index} failed, skipping"
                            )
                            continue

                        current_tool_call = research_agent_calls[tab_index]
                        tool_call_info = ToolCallInfo(
                            parent_tool_call_id=None,
                            turn_index=orchestrator_start_turn_index
                            + cycle
                            + reasoning_cycles,
                            tab_index=tab_index,
                            tool_name=current_tool_call.tool_name,
                            tool_call_id=current_tool_call.tool_call_id,
                            tool_id=get_tool_by_name(
                                tool_name=RESEARCH_AGENT_TOOL_NAME,
                                db_session=db_session,
                            ).id,
                            reasoning_tokens=llm_step_result.reasoning
                            or most_recent_reasoning,
                            tool_call_arguments=current_tool_call.tool_args,
                            tool_call_response=report,
                            search_docs=None,  # Intermediate docs are not saved/shown
                            generated_images=None,
                        )
                        state_container.add_tool_call(tool_call_info)

                        tool_call_response_msg = ChatMessageSimple(
                            message=report,
                            token_count=token_counter(report),
                            message_type=MessageType.TOOL_CALL_RESPONSE,
                            tool_call_id=current_tool_call.tool_call_id,
                            image_files=None,
                        )
                        simple_chat_history.append(tool_call_response_msg)

                # If it reached this point, it did not call reasoning, so here we wipe it to not save it to multiple turns
                most_recent_reasoning = None

        emitter.emit(
            Packet(
                placement=Placement(turn_index=final_turn_index),
                obj=OverallStop(type="stop"),
            )
        )


================================================
FILE: backend/onyx/deep_research/dr_mock_tools.py
================================================
GENERATE_PLAN_TOOL_NAME = "generate_plan"

RESEARCH_AGENT_IN_CODE_ID = "ResearchAgent"
RESEARCH_AGENT_TOOL_NAME = "research_agent"
RESEARCH_AGENT_TASK_KEY = "task"

GENERATE_REPORT_TOOL_NAME = "generate_report"

THINK_TOOL_NAME = "think_tool"


# ruff: noqa: E501, W605 start
GENERATE_PLAN_TOOL_DESCRIPTION = {
    "type": "function",
    "function": {
        "name": GENERATE_PLAN_TOOL_NAME,
        "description": "No clarification needed, generate a research plan for the user's query.",
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
        },
    },
}


RESEARCH_AGENT_TOOL_DESCRIPTION = {
    "type": "function",
    "function": {
        "name": RESEARCH_AGENT_TOOL_NAME,
        "description": "Conduct research on a specific topic.",
        "parameters": {
            "type": "object",
            "properties": {
                RESEARCH_AGENT_TASK_KEY: {
                    "type": "string",
                    "description": "The research task to investigate, should be 1-2 descriptive sentences outlining the direction of investigation.",
                }
            },
            "required": [RESEARCH_AGENT_TASK_KEY],
        },
    },
}


GENERATE_REPORT_TOOL_DESCRIPTION = {
    "type": "function",
    "function": {
        "name": GENERATE_REPORT_TOOL_NAME,
        "description": "Generate the final research report from all of the findings. Should be called when all aspects of the user's query have been researched, or maximum cycles are reached.",
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
        },
    },
}


THINK_TOOL_DESCRIPTION = {
    "type": "function",
    "function": {
        "name": THINK_TOOL_NAME,
        "description": "Use this for reasoning between research_agent calls and before calling generate_report. Think deeply about key results, identify knowledge gaps, and plan next steps.",
        "parameters": {
            "type": "object",
            "properties": {
                "reasoning": {
                    "type": "string",
                    "description": "Your chain of thought reasoning, use paragraph format, no lists.",
                }
            },
            "required": ["reasoning"],
        },
    },
}


RESEARCH_AGENT_THINK_TOOL_DESCRIPTION = {
    "type": "function",
    "function": {
        "name": "think_tool",
        "description": "Use this for reasoning between research steps. Think deeply about key results, identify knowledge gaps, and plan next steps.",
        "parameters": {
            "type": "object",
            "properties": {
                "reasoning": {
                    "type": "string",
                    "description": "Your chain of thought reasoning, can be as long as a lengthy paragraph.",
                }
            },
            "required": ["reasoning"],
        },
    },
}


RESEARCH_AGENT_GENERATE_REPORT_TOOL_DESCRIPTION = {
    "type": "function",
    "function": {
        "name": "generate_report",
        "description": "Generate the final research report from all findings. Should be called when research is complete.",
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
        },
    },
}


THINK_TOOL_RESPONSE_MESSAGE = "Acknowledged, please continue."
THINK_TOOL_RESPONSE_TOKEN_COUNT = 10


def get_clarification_tool_definitions() -> list[dict]:
    return [GENERATE_PLAN_TOOL_DESCRIPTION]


def get_orchestrator_tools(include_think_tool: bool) -> list[dict]:
    tools = [
        RESEARCH_AGENT_TOOL_DESCRIPTION,
        GENERATE_REPORT_TOOL_DESCRIPTION,
    ]
    if include_think_tool:
        tools.append(THINK_TOOL_DESCRIPTION)
    return tools


def get_research_agent_additional_tool_definitions(
    include_think_tool: bool,
) -> list[dict]:
    tools = [GENERATE_REPORT_TOOL_DESCRIPTION]
    if include_think_tool:
        tools.append(RESEARCH_AGENT_THINK_TOOL_DESCRIPTION)
    return tools


# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/deep_research/models.py
================================================
from pydantic import BaseModel

from onyx.chat.citation_processor import CitationMapping
from onyx.tools.models import ToolCallKickoff


class SpecialToolCalls(BaseModel):
    think_tool_call: ToolCallKickoff | None = None
    generate_report_tool_call: ToolCallKickoff | None = None


class ResearchAgentCallResult(BaseModel):
    intermediate_report: str
    citation_mapping: CitationMapping


class CombinedResearchAgentCallResult(BaseModel):
    # The None is needed here to keep the mappings consistent
    # we later skip the failed research results but we need to know
    # which ones failed
    intermediate_reports: list[str | None]
    citation_mapping: CitationMapping


================================================
FILE: backend/onyx/deep_research/utils.py
================================================
from collections.abc import Callable
from typing import Any

from pydantic import BaseModel

from onyx.deep_research.dr_mock_tools import GENERATE_REPORT_TOOL_NAME
from onyx.deep_research.dr_mock_tools import THINK_TOOL_NAME
from onyx.deep_research.models import SpecialToolCalls
from onyx.llm.model_response import ChatCompletionDeltaToolCall
from onyx.llm.model_response import Delta
from onyx.llm.model_response import FunctionCall
from onyx.tools.models import ToolCallKickoff


# JSON prefixes to detect in think_tool arguments
# The schema is: {"reasoning": "...content..."}
JSON_PREFIX_WITH_SPACE = '{"reasoning": "'
JSON_PREFIX_NO_SPACE = '{"reasoning":"'


class ThinkToolProcessorState(BaseModel):
    """State for tracking think tool processing across streaming deltas."""

    think_tool_found: bool = False
    think_tool_index: int | None = None
    think_tool_id: str | None = None
    full_arguments: str = ""  # Full accumulated arguments for final tool call
    accumulated_args: str = ""  # Working buffer for JSON parsing
    json_prefix_stripped: bool = False
    # Buffer holds content that might be the JSON suffix "}
    # We hold back 2 chars to avoid emitting the closing "}
    buffer: str = ""


def _unescape_json_string(s: str) -> str:
    """
    Unescape JSON string escape sequences.

    JSON strings use backslash escapes like \\n for newlines, \\t for tabs, etc.
    When we extract content from JSON by string manipulation (without json.loads),
    we need to manually decode these escape sequences.

    Note: We use a placeholder approach to handle escaped backslashes correctly.
    For example, "\\\\n" (escaped backslash + n) should become "\\n" (literal backslash + n),
    not a newline character.
    """
    # First, protect escaped backslashes with a placeholder
    placeholder = "\x00ESCAPED_BACKSLASH\x00"
    result = s.replace("\\\\", placeholder)

    # Now unescape common JSON escape sequences
    result = result.replace("\\n", "\n")
    result = result.replace("\\r", "\r")
    result = result.replace("\\t", "\t")
    result = result.replace('\\"', '"')

    # Finally, restore escaped backslashes as single backslashes
    result = result.replace(placeholder, "\\")

    return result


def _extract_reasoning_chunk(state: ThinkToolProcessorState) -> str | None:
    """
    Extract reasoning content from accumulated arguments, stripping JSON wrapper.

    Returns the next chunk of reasoning to emit, or None if nothing to emit yet.
    """
    # If we haven't found the JSON prefix yet, look for it
    if not state.json_prefix_stripped:
        # Try both prefix variants
        for prefix in [JSON_PREFIX_WITH_SPACE, JSON_PREFIX_NO_SPACE]:
            prefix_pos = state.accumulated_args.find(prefix)
            if prefix_pos != -1:
                # Found prefix - extract content after it
                content_start = prefix_pos + len(prefix)
                state.buffer = state.accumulated_args[content_start:]
                state.accumulated_args = ""
                state.json_prefix_stripped = True
                break

        if not state.json_prefix_stripped:
            # Haven't seen full prefix yet, keep accumulating
            return None
    else:
        # Already stripped prefix, add new content to buffer
        state.buffer += state.accumulated_args
        state.accumulated_args = ""

    # Hold back enough chars to avoid splitting escape sequences AND the JSON suffix "}
    # We need at least 2 for the suffix, but we also need to ensure escape sequences
    # like \n, \t, \\, \" don't get split. The longest escape is \\ (2 chars).
    # So we hold back 3 chars to be safe: if the last char is \, we don't want to
    # emit it without knowing what follows.
    holdback = 3
    if len(state.buffer) <= holdback:
        return None

    # Check if there's a trailing backslash that could be part of an escape sequence
    # If so, hold back one more character to avoid splitting the escape
    to_emit = state.buffer[:-holdback]
    remaining = state.buffer[-holdback:]

    # If to_emit ends with a backslash, it might be the start of an escape sequence
    # Move it to the remaining buffer to process with the next chunk
    # If to_emit ends with a backslash, it might be the start of an escape sequence
    # Move it to the remaining buffer to process with the next chunk
    if to_emit and to_emit[-1] == "\\":
        remaining = to_emit[-1] + remaining
        to_emit = to_emit[:-1]

    state.buffer = remaining

    # Unescape JSON escape sequences (e.g., \\n -> \n)
    if to_emit:
        to_emit = _unescape_json_string(to_emit)

    return to_emit if to_emit else None


def create_think_tool_token_processor() -> (
    Callable[[Delta | None, Any], tuple[Delta | None, Any]]
):
    """
    Create a custom token processor that converts think_tool calls to reasoning content.

    When the think_tool is detected:
    - Tool call arguments are converted to reasoning_content (JSON wrapper stripped)
    - All other deltas (content, other tool calls) are dropped

    This allows non-reasoning models to emit chain-of-thought via the think_tool,
    which gets displayed as reasoning tokens in the UI.

    Returns:
        A function compatible with run_llm_step_pkt_generator's custom_token_processor parameter.
        The function takes (Delta, state) and returns (modified Delta | None, new state).
    """

    def process_token(delta: Delta | None, state: Any) -> tuple[Delta | None, Any]:
        if state is None:
            state = ThinkToolProcessorState()

        # Handle flush signal (delta=None) - emit the complete tool call
        if delta is None:
            if state.think_tool_found and state.think_tool_id:
                # Return the complete think tool call
                complete_tool_call = ChatCompletionDeltaToolCall(
                    id=state.think_tool_id,
                    index=state.think_tool_index or 0,
                    type="function",
                    function=FunctionCall(
                        name=THINK_TOOL_NAME,
                        arguments=state.full_arguments,
                    ),
                )
                return Delta(tool_calls=[complete_tool_call]), state
            return None, state

        # Check for think tool in tool_calls
        if delta.tool_calls:
            for tool_call in delta.tool_calls:
                # Detect think tool by name
                if tool_call.function and tool_call.function.name == THINK_TOOL_NAME:
                    state.think_tool_found = True
                    state.think_tool_index = tool_call.index

                # Capture tool call id when available
                if (
                    state.think_tool_found
                    and tool_call.index == state.think_tool_index
                    and tool_call.id
                ):
                    state.think_tool_id = tool_call.id

                # Accumulate arguments for the think tool
                if (
                    state.think_tool_found
                    and tool_call.index == state.think_tool_index
                    and tool_call.function
                    and tool_call.function.arguments
                ):
                    # Track full arguments for final tool call
                    state.full_arguments += tool_call.function.arguments
                    # Also accumulate for JSON parsing
                    state.accumulated_args += tool_call.function.arguments

                    # Try to extract reasoning content
                    reasoning_chunk = _extract_reasoning_chunk(state)
                    if reasoning_chunk:
                        # Return delta with reasoning_content to trigger reasoning streaming
                        return Delta(reasoning_content=reasoning_chunk), state

        # If think tool found, drop all other content
        if state.think_tool_found:
            return None, state

        # No think tool detected, pass through original delta
        return delta, state

    return process_token


def check_special_tool_calls(tool_calls: list[ToolCallKickoff]) -> SpecialToolCalls:
    think_tool_call: ToolCallKickoff | None = None
    generate_report_tool_call: ToolCallKickoff | None = None

    for tool_call in tool_calls:
        if tool_call.tool_name == THINK_TOOL_NAME:
            think_tool_call = tool_call
        elif tool_call.tool_name == GENERATE_REPORT_TOOL_NAME:
            generate_report_tool_call = tool_call

    return SpecialToolCalls(
        think_tool_call=think_tool_call,
        generate_report_tool_call=generate_report_tool_call,
    )


================================================
FILE: backend/onyx/document_index/FILTER_SEMANTICS.md
================================================
# Vector DB Filter Semantics

How `IndexFilters` fields combine into the final query filter. Applies to both Vespa and OpenSearch.

## Filter categories

| Category | Fields | Join logic |
|---|---|---|
| **Visibility** | `hidden` | Always applied (unless `include_hidden`) |
| **Tenant** | `tenant_id` | AND (multi-tenant only) |
| **ACL** | `access_control_list` | OR within, AND with rest |
| **Narrowing** | `source_type`, `tags`, `time_cutoff` | Each OR within, AND with rest |
| **Knowledge scope** | `document_set`, `attached_document_ids`, `hierarchy_node_ids`, `persona_id_filter` | OR within group, AND with rest |
| **Additive scope** | `project_id_filter` | OR'd into knowledge scope **only when** a knowledge scope filter already exists |

## How filters combine

All categories are AND'd together. Within the knowledge scope category, individual filters are OR'd.

```
NOT hidden
AND tenant = T                          -- if multi-tenant
AND (acl contains A1 OR acl contains A2)
AND (source_type = S1 OR ...)           -- if set
AND (tag = T1 OR ...)                   -- if set
AND <knowledge scope>                   -- see below
AND time >= cutoff                      -- if set
```

## Knowledge scope rules

The knowledge scope filter controls **what knowledge an assistant can access**.

### Primary vs additive triggers

- **`persona_id_filter`** is a **primary** trigger. A persona with user files IS explicit
  knowledge, so `persona_id_filter` alone can start a knowledge scope. Note: this is
  NOT the raw ID of the persona being used — it is only set when the persona's
  user files overflowed the LLM context window.
- **`project_id_filter`** is **additive**. It widens an existing scope to include project
  files but never restricts on its own — a chat inside a project should still search
  team knowledge when no other knowledge is attached.

### No explicit knowledge attached

When `document_set`, `attached_document_ids`, `hierarchy_node_ids`, and `persona_id_filter` are all empty/None:

- **No knowledge scope filter is applied.** The assistant can see everything (subject to ACL).
- `project_id_filter` is ignored — it never restricts on its own.

### One explicit knowledge type

```
-- Only document sets
AND (document_sets contains "Engineering" OR document_sets contains "Legal")

-- Only persona user files (overflowed context)
AND (personas contains 42)
```

### Multiple explicit knowledge types (OR'd)

```
-- Document sets + persona user files
AND (
    document_sets contains "Engineering"
    OR personas contains 42
)
```

### Explicit knowledge + overflowing project files

When an explicit knowledge restriction is in effect **and** `project_id_filter` is set (project files overflowed the LLM context window), `project_id_filter` widens the filter:

```
-- Document sets + project files overflowed
AND (
    document_sets contains "Engineering"
    OR user_project contains 7
)

-- Persona user files + project files (won't happen in practice;
-- custom personas ignore project files per the precedence rule)
AND (
    personas contains 42
    OR user_project contains 7
)
```

### Only project_id_filter (no explicit knowledge)

No knowledge scope filter. The assistant searches everything.

```
-- Just ACL, no restriction
NOT hidden
AND (acl contains ...)
```

## Field reference

| Filter field | Vespa field | Vespa type | Purpose |
|---|---|---|---|
| `document_set` | `document_sets` | `weightedset<string>` | Connector doc sets attached to assistant |
| `attached_document_ids` | `document_id` | `string` | Documents explicitly attached (OpenSearch only) |
| `hierarchy_node_ids` | `ancestor_hierarchy_node_ids` | `array<int>` | Folder/space nodes (OpenSearch only) |
| `persona_id_filter` | `personas` | `array<int>` | Persona tag for overflowing user files (**primary** trigger) |
| `project_id_filter` | `user_project` | `array<int>` | Project tag for overflowing project files (**additive** only) |
| `access_control_list` | `access_control_list` | `weightedset<string>` | ACL entries for the requesting user |
| `source_type` | `source_type` | `string` | Connector source type (e.g. `web`, `jira`) |
| `tags` | `metadata_list` | `array<string>` | Document metadata tags |
| `time_cutoff` | `doc_updated_at` | `long` | Minimum document update timestamp |
| `tenant_id` | `tenant_id` | `string` | Tenant isolation (multi-tenant) |


================================================
FILE: backend/onyx/document_index/__init__.py
================================================


================================================
FILE: backend/onyx/document_index/chunk_content_enrichment.py
================================================
from onyx.configs.app_configs import BLURB_SIZE
from onyx.configs.constants import RETURN_SEPARATOR
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import DocMetadataAwareIndexChunk


def generate_enriched_content_for_chunk_text(chunk: DocMetadataAwareIndexChunk) -> str:
    return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_keyword}"


def generate_enriched_content_for_chunk_embedding(chunk: DocAwareChunk) -> str:
    return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"


def cleanup_content_for_chunks(
    chunks: list[InferenceChunkUncleaned],
) -> list[InferenceChunk]:
    """
    Removes indexing-time content additions from chunks. Inverse of
    generate_enriched_content_for_chunk.

    During indexing, chunks are augmented with additional text to improve search
    quality:
    - Title prepended to content (for better keyword/semantic matching)
    - Metadata suffix appended to content
    - Contextual RAG: doc_summary (beginning) and chunk_context (end)

    This function strips these additions before returning chunks to users,
    restoring the original document content. Cleaning is applied in sequence:
    1. Title removal:
        - Full match: Strips exact title from beginning
        - Partial match: If content starts with title[:BLURB_SIZE], splits on
          RETURN_SEPARATOR to remove title section
    2. Metadata suffix removal:
        - Strips metadata_suffix from end, plus trailing RETURN_SEPARATOR
    3. Contextual RAG removal:
        - Strips doc_summary from beginning (if present)
        - Strips chunk_context from end (if present)

    TODO(andrei): This entire function is not that fantastic, clean it up during
    QA before rolling out OpenSearch.

    Args:
        chunks: Chunks as retrieved from the document index with indexing
            augmentations intact.

    Returns:
        Clean InferenceChunk objects with augmentations removed, containing only
            the original document content that should be shown to users.
    """

    def _remove_title(chunk: InferenceChunkUncleaned) -> str:
        # TODO(andrei): This was ported over from
        # backend/onyx/document_index/vespa/vespa_document_index.py but I don't
        # think this logic is correct. In Vespa at least we set the title field
        # from the output of get_title_for_document_index, which is not
        # necessarily the same data that is prepended to the content; that comes
        # from title_prefix.
        # This was added in
        # https://github.com/onyx-dot-app/onyx/commit/e90c66c1b61c5b7da949652d703f7c906863e6e4#diff-2a2a29d5929de75cdaea77867a397934d9f8b785ce40a861c0d704033e3663ab,
        # see postprocessing.py. At that time the content enrichment logic was
        # also added in that commit, see
        # https://github.com/onyx-dot-app/onyx/commit/e90c66c1b61c5b7da949652d703f7c906863e6e4#diff-d807718aa263a15c1d991a4ab063c360c8419eaad210b4ba70e1e9f47d2aa6d2R77
        # chunker.py.
        if not chunk.title or not chunk.content:
            return chunk.content

        if chunk.content.startswith(chunk.title):
            return chunk.content[len(chunk.title) :].lstrip()

        # BLURB SIZE is by token instead of char but each token is at least 1 char
        # If this prefix matches the content, it's assumed the title was prepended
        if chunk.content.startswith(chunk.title[:BLURB_SIZE]):
            return (
                chunk.content.split(RETURN_SEPARATOR, 1)[-1]
                if RETURN_SEPARATOR in chunk.content
                else chunk.content
            )
        return chunk.content

    def _remove_metadata_suffix(chunk: InferenceChunkUncleaned) -> str:
        if not chunk.metadata_suffix:
            return chunk.content
        return chunk.content.removesuffix(chunk.metadata_suffix).rstrip(
            RETURN_SEPARATOR
        )

    def _remove_contextual_rag(chunk: InferenceChunkUncleaned) -> str:
        # remove document summary
        if chunk.doc_summary and chunk.content.startswith(chunk.doc_summary):
            chunk.content = chunk.content[len(chunk.doc_summary) :].lstrip()
        # remove chunk context
        if chunk.chunk_context and chunk.content.endswith(chunk.chunk_context):
            chunk.content = chunk.content[
                : len(chunk.content) - len(chunk.chunk_context)
            ].rstrip()
        return chunk.content

    for chunk in chunks:
        chunk.content = _remove_title(chunk)
        chunk.content = _remove_metadata_suffix(chunk)
        chunk.content = _remove_contextual_rag(chunk)

    return [chunk.to_inference_chunk() for chunk in chunks]


================================================
FILE: backend/onyx/document_index/disabled.py
================================================
"""A DocumentIndex implementation that raises on every operation.

Used as a safety net when DISABLE_VECTOR_DB is True. Any code path that
accidentally reaches the vector DB layer will fail loudly instead of timing
out against a nonexistent Vespa/OpenSearch instance.
"""

from collections.abc import Iterable
from typing import Any

from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import QueryExpansionType
from onyx.db.enums import EmbeddingPrecision
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import DocumentInsertionRecord
from onyx.document_index.interfaces import IndexBatchParams
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.indexing.models import DocMetadataAwareIndexChunk
from shared_configs.model_server_models import Embedding

VECTOR_DB_DISABLED_ERROR = "Vector DB is disabled (DISABLE_VECTOR_DB=true). This operation requires a vector database."


class DisabledDocumentIndex(DocumentIndex):
    """A DocumentIndex where every method raises RuntimeError.

    Returned by the factory when DISABLE_VECTOR_DB is True so that any
    accidental vector-DB call surfaces immediately.
    """

    def __init__(
        self,
        index_name: str = "disabled",
        secondary_index_name: str | None = None,
        *args: Any,  # noqa: ARG002
        **kwargs: Any,  # noqa: ARG002
    ) -> None:
        self.index_name = index_name
        self.secondary_index_name = secondary_index_name

    # ------------------------------------------------------------------
    # Verifiable
    # ------------------------------------------------------------------
    def ensure_indices_exist(
        self,
        primary_embedding_dim: int,  # noqa: ARG002
        primary_embedding_precision: EmbeddingPrecision,  # noqa: ARG002
        secondary_index_embedding_dim: int | None,  # noqa: ARG002
        secondary_index_embedding_precision: EmbeddingPrecision | None,  # noqa: ARG002
    ) -> None:
        # No-op: there are no indices to create when the vector DB is disabled.
        pass

    @staticmethod
    def register_multitenant_indices(
        indices: list[str],  # noqa: ARG002, ARG004
        embedding_dims: list[int],  # noqa: ARG002, ARG004
        embedding_precisions: list[EmbeddingPrecision],  # noqa: ARG002, ARG004
    ) -> None:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)

    # ------------------------------------------------------------------
    # Indexable
    # ------------------------------------------------------------------
    def index(
        self,
        chunks: Iterable[DocMetadataAwareIndexChunk],  # noqa: ARG002
        index_batch_params: IndexBatchParams,  # noqa: ARG002
    ) -> set[DocumentInsertionRecord]:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)

    # ------------------------------------------------------------------
    # Deletable
    # ------------------------------------------------------------------
    def delete_single(
        self,
        doc_id: str,  # noqa: ARG002
        *,
        tenant_id: str,  # noqa: ARG002
        chunk_count: int | None,  # noqa: ARG002
    ) -> int:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)

    # ------------------------------------------------------------------
    # Updatable
    # ------------------------------------------------------------------
    def update_single(
        self,
        doc_id: str,  # noqa: ARG002
        *,
        tenant_id: str,  # noqa: ARG002
        chunk_count: int | None,  # noqa: ARG002
        fields: VespaDocumentFields | None,  # noqa: ARG002
        user_fields: VespaDocumentUserFields | None,  # noqa: ARG002
    ) -> None:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)

    # ------------------------------------------------------------------
    # IdRetrievalCapable
    # ------------------------------------------------------------------
    def id_based_retrieval(
        self,
        chunk_requests: list[VespaChunkRequest],  # noqa: ARG002
        filters: IndexFilters,  # noqa: ARG002
        batch_retrieval: bool = False,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)

    # ------------------------------------------------------------------
    # HybridCapable
    # ------------------------------------------------------------------
    def hybrid_retrieval(
        self,
        query: str,  # noqa: ARG002
        query_embedding: Embedding,  # noqa: ARG002
        final_keywords: list[str] | None,  # noqa: ARG002
        filters: IndexFilters,  # noqa: ARG002
        hybrid_alpha: float,  # noqa: ARG002
        time_decay_multiplier: float,  # noqa: ARG002
        num_to_retrieve: int,  # noqa: ARG002
        ranking_profile_type: QueryExpansionType,  # noqa: ARG002
        title_content_ratio: float | None = None,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)

    # ------------------------------------------------------------------
    # AdminCapable
    # ------------------------------------------------------------------
    def admin_retrieval(
        self,
        query: str,  # noqa: ARG002
        query_embedding: Embedding,  # noqa: ARG002
        filters: IndexFilters,  # noqa: ARG002
        num_to_retrieve: int = 10,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)

    # ------------------------------------------------------------------
    # RandomCapable
    # ------------------------------------------------------------------
    def random_retrieval(
        self,
        filters: IndexFilters,  # noqa: ARG002
        num_to_retrieve: int = 10,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)


================================================
FILE: backend/onyx/document_index/document_index_utils.py
================================================
import math
import uuid
from uuid import UUID

from sqlalchemy.orm import Session

from onyx.configs.app_configs import ENABLE_MULTIPASS_INDEXING
from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import MultipassConfig
from shared_configs.configs import MULTI_TENANT

DEFAULT_BATCH_SIZE = 30
DEFAULT_INDEX_NAME = "danswer_chunk"


def should_use_multipass(search_settings: SearchSettings | None) -> bool:
    """
    Determines whether multipass should be used based on the search settings
    or the default config if settings are unavailable.
    """
    if search_settings is not None:
        return search_settings.multipass_indexing
    return ENABLE_MULTIPASS_INDEXING


def get_multipass_config(search_settings: SearchSettings) -> MultipassConfig:
    """
    Determines whether to enable multipass and large chunks by examining
    the current search settings and the embedder configuration.
    """
    multipass = should_use_multipass(search_settings)
    enable_large_chunks = SearchSettings.can_use_large_chunks(
        multipass, search_settings.model_name, search_settings.provider_type
    )
    return MultipassConfig(
        multipass_indexing=multipass, enable_large_chunks=enable_large_chunks
    )


def get_both_index_properties(
    db_session: Session,
) -> tuple[str, str | None, bool, bool | None]:
    search_settings = get_current_search_settings(db_session)
    config_1 = get_multipass_config(search_settings)

    search_settings_new = get_secondary_search_settings(db_session)
    if not search_settings_new:
        return search_settings.index_name, None, config_1.enable_large_chunks, None

    config_2 = get_multipass_config(search_settings)
    return (
        search_settings.index_name,
        search_settings_new.index_name,
        config_1.enable_large_chunks,
        config_2.enable_large_chunks,
    )


def translate_boost_count_to_multiplier(boost: int) -> float:
    """Mapping boost integer values to a multiplier according to a sigmoid curve
    Piecewise such that at many downvotes, its 0.5x the score and with many upvotes
    it is 2x the score. This should be in line with the Vespa calculation."""
    # 3 in the equation below stretches it out to hit asymptotes slower
    if boost < 0:
        # 0.5 + sigmoid -> range of 0.5 to 1
        return 0.5 + (1 / (1 + math.exp(-1 * boost / 3)))

    # 2 x sigmoid -> range of 1 to 2
    return 2 / (1 + math.exp(-1 * boost / 3))


# Assembles a list of Vespa chunk IDs for a document
# given the required context. This can be used to directly query
# Vespa's Document API.
def get_document_chunk_ids(
    enriched_document_info_list: list[EnrichedDocumentIndexingInfo],
    tenant_id: str,
    large_chunks_enabled: bool,
) -> list[UUID]:
    doc_chunk_ids = []

    for enriched_document_info in enriched_document_info_list:
        for chunk_index in range(
            enriched_document_info.chunk_start_index,
            enriched_document_info.chunk_end_index,
        ):
            if not enriched_document_info.old_version:
                doc_chunk_ids.append(
                    get_uuid_from_chunk_info(
                        document_id=enriched_document_info.doc_id,
                        chunk_id=chunk_index,
                        tenant_id=tenant_id,
                    )
                )
            else:
                doc_chunk_ids.append(
                    get_uuid_from_chunk_info_old(
                        document_id=enriched_document_info.doc_id,
                        chunk_id=chunk_index,
                    )
                )

            if large_chunks_enabled and chunk_index % 4 == 0:
                large_chunk_id = int(chunk_index / 4)
                large_chunk_reference_ids = [
                    large_chunk_id + i
                    for i in range(4)
                    if large_chunk_id + i < enriched_document_info.chunk_end_index
                ]
                if enriched_document_info.old_version:
                    doc_chunk_ids.append(
                        get_uuid_from_chunk_info_old(
                            document_id=enriched_document_info.doc_id,
                            chunk_id=large_chunk_id,
                            large_chunk_reference_ids=large_chunk_reference_ids,
                        )
                    )
                else:
                    doc_chunk_ids.append(
                        get_uuid_from_chunk_info(
                            document_id=enriched_document_info.doc_id,
                            chunk_id=large_chunk_id,
                            tenant_id=tenant_id,
                            large_chunk_id=large_chunk_id,
                        )
                    )

    return doc_chunk_ids


def get_uuid_from_chunk_info(
    *,
    document_id: str,
    chunk_id: int,
    tenant_id: str,
    large_chunk_id: int | None = None,
) -> UUID:
    """NOTE: be VERY carefuly about changing this function. If changed without a migration,
    this can cause deletion/update/insertion to function incorrectly."""
    doc_str = document_id

    # Web parsing URL duplicate catching
    if doc_str and doc_str[-1] == "/":
        doc_str = doc_str[:-1]

    chunk_index = (
        "large_" + str(large_chunk_id) if large_chunk_id is not None else str(chunk_id)
    )
    unique_identifier_string = "_".join([doc_str, chunk_index])
    if MULTI_TENANT:
        unique_identifier_string += "_" + tenant_id

    uuid_value = uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)
    return uuid_value


def get_uuid_from_chunk_info_old(
    *, document_id: str, chunk_id: int, large_chunk_reference_ids: list[int] = []
) -> UUID:
    doc_str = document_id

    # Web parsing URL duplicate catching
    if doc_str and doc_str[-1] == "/":
        doc_str = doc_str[:-1]
    unique_identifier_string = "_".join([doc_str, str(chunk_id), "0"])
    if large_chunk_reference_ids:
        unique_identifier_string += "_large" + "_".join(
            [
                str(referenced_chunk_id)
                for referenced_chunk_id in large_chunk_reference_ids
            ]
        )
    return uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)


def get_uuid_from_chunk(chunk: DocMetadataAwareIndexChunk) -> uuid.UUID:
    return get_uuid_from_chunk_info(
        document_id=chunk.source_document.id,
        chunk_id=chunk.chunk_id,
        tenant_id=chunk.tenant_id,
        large_chunk_id=chunk.large_chunk_id,
    )


def get_uuid_from_chunk_old(
    chunk: DocMetadataAwareIndexChunk, large_chunk_reference_ids: list[int] = []
) -> UUID:
    return get_uuid_from_chunk_info_old(
        document_id=chunk.source_document.id,
        chunk_id=chunk.chunk_id,
        large_chunk_reference_ids=large_chunk_reference_ids,
    )


================================================
FILE: backend/onyx/document_index/factory.py
================================================
import httpx
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.db.models import SearchSettings
from onyx.db.opensearch_migration import get_opensearch_retrieval_state
from onyx.document_index.disabled import DisabledDocumentIndex
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchOldDocumentIndex,
)
from onyx.document_index.vespa.index import VespaIndex
from onyx.indexing.models import IndexingSetting
from shared_configs.configs import MULTI_TENANT


def get_default_document_index(
    search_settings: SearchSettings,
    secondary_search_settings: SearchSettings | None,
    db_session: Session,
    httpx_client: httpx.Client | None = None,
) -> DocumentIndex:
    """Gets the default document index from env vars.

    To be used for retrieval only. Indexing should be done through both indices
    until Vespa is deprecated.

    Primary index is the index that is used for querying/updating etc. Secondary
    index is for when both the currently used index and the upcoming index both
    need to be updated. Updates are applied to both indices.
    WARNING: In that case, get_all_document_indices should be used.
    """
    if DISABLE_VECTOR_DB:
        return DisabledDocumentIndex(
            index_name=search_settings.index_name,
            secondary_index_name=(
                secondary_search_settings.index_name
                if secondary_search_settings
                else None
            ),
        )

    secondary_index_name: str | None = None
    secondary_large_chunks_enabled: bool | None = None
    if secondary_search_settings:
        secondary_index_name = secondary_search_settings.index_name
        secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled

    opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)
    if opensearch_retrieval_enabled:
        indexing_setting = IndexingSetting.from_db_model(search_settings)
        secondary_indexing_setting = (
            IndexingSetting.from_db_model(secondary_search_settings)
            if secondary_search_settings
            else None
        )
        return OpenSearchOldDocumentIndex(
            index_name=search_settings.index_name,
            embedding_dim=indexing_setting.final_embedding_dim,
            embedding_precision=indexing_setting.embedding_precision,
            secondary_index_name=secondary_index_name,
            secondary_embedding_dim=(
                secondary_indexing_setting.final_embedding_dim
                if secondary_indexing_setting
                else None
            ),
            secondary_embedding_precision=(
                secondary_indexing_setting.embedding_precision
                if secondary_indexing_setting
                else None
            ),
            large_chunks_enabled=search_settings.large_chunks_enabled,
            secondary_large_chunks_enabled=secondary_large_chunks_enabled,
            multitenant=MULTI_TENANT,
            httpx_client=httpx_client,
        )
    else:
        return VespaIndex(
            index_name=search_settings.index_name,
            secondary_index_name=secondary_index_name,
            large_chunks_enabled=search_settings.large_chunks_enabled,
            secondary_large_chunks_enabled=secondary_large_chunks_enabled,
            multitenant=MULTI_TENANT,
            httpx_client=httpx_client,
        )


def get_all_document_indices(
    search_settings: SearchSettings,
    secondary_search_settings: SearchSettings | None,
    httpx_client: httpx.Client | None = None,
) -> list[DocumentIndex]:
    """Gets all document indices.

    NOTE: Will only return an OpenSearch index interface if
    ENABLE_OPENSEARCH_INDEXING_FOR_ONYX is True. This is so we don't break flows
    where we know it won't be enabled.

    Used for indexing only. Until Vespa is deprecated we will index into both
    document indices. Retrieval is done through only one index however.

    Large chunks are not currently supported so we hardcode appropriate values.

    NOTE: Make sure the Vespa index object is returned first. In the rare event
    that there is some conflict between indexing and the migration task, it is
    assumed that the state of Vespa is more up-to-date than the state of
    OpenSearch.
    """
    if DISABLE_VECTOR_DB:
        return [
            DisabledDocumentIndex(
                index_name=search_settings.index_name,
                secondary_index_name=(
                    secondary_search_settings.index_name
                    if secondary_search_settings
                    else None
                ),
            )
        ]

    vespa_document_index = VespaIndex(
        index_name=search_settings.index_name,
        secondary_index_name=(
            secondary_search_settings.index_name if secondary_search_settings else None
        ),
        large_chunks_enabled=search_settings.large_chunks_enabled,
        secondary_large_chunks_enabled=(
            secondary_search_settings.large_chunks_enabled
            if secondary_search_settings
            else None
        ),
        multitenant=MULTI_TENANT,
        httpx_client=httpx_client,
    )
    opensearch_document_index: OpenSearchOldDocumentIndex | None = None
    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
        indexing_setting = IndexingSetting.from_db_model(search_settings)
        secondary_indexing_setting = (
            IndexingSetting.from_db_model(secondary_search_settings)
            if secondary_search_settings
            else None
        )
        opensearch_document_index = OpenSearchOldDocumentIndex(
            index_name=search_settings.index_name,
            embedding_dim=indexing_setting.final_embedding_dim,
            embedding_precision=indexing_setting.embedding_precision,
            secondary_index_name=(
                secondary_search_settings.index_name
                if secondary_search_settings
                else None
            ),
            secondary_embedding_dim=(
                secondary_indexing_setting.final_embedding_dim
                if secondary_indexing_setting
                else None
            ),
            secondary_embedding_precision=(
                secondary_indexing_setting.embedding_precision
                if secondary_indexing_setting
                else None
            ),
            large_chunks_enabled=search_settings.large_chunks_enabled,
            secondary_large_chunks_enabled=(
                secondary_search_settings.large_chunks_enabled
                if secondary_search_settings
                else None
            ),
            multitenant=MULTI_TENANT,
            httpx_client=httpx_client,
        )
    result: list[DocumentIndex] = [vespa_document_index]
    if opensearch_document_index:
        result.append(opensearch_document_index)
    return result


================================================
FILE: backend/onyx/document_index/interfaces.py
================================================
import abc
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import datetime
from typing import Any

from onyx.access.models import DocumentAccess
from onyx.access.models import ExternalAccess
from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import QueryExpansionType
from onyx.db.enums import EmbeddingPrecision
from onyx.indexing.models import DocMetadataAwareIndexChunk
from shared_configs.model_server_models import Embedding


@dataclass(frozen=True)
class DocumentInsertionRecord:
    document_id: str
    already_existed: bool


@dataclass(frozen=True)
class VespaChunkRequest:
    document_id: str
    min_chunk_ind: int | None = None
    max_chunk_ind: int | None = None

    @property
    def is_capped(self) -> bool:
        # If the max chunk index is not None, then the chunk request is capped
        # If the min chunk index is None, we can assume the min is 0
        return self.max_chunk_ind is not None

    @property
    def range(self) -> int | None:
        if self.max_chunk_ind is not None:
            return (self.max_chunk_ind - (self.min_chunk_ind or 0)) + 1
        return None


@dataclass
class IndexBatchParams:
    """
    Information necessary for efficiently indexing a batch of documents
    """

    doc_id_to_previous_chunk_cnt: dict[str, int]
    doc_id_to_new_chunk_cnt: dict[str, int]
    tenant_id: str
    large_chunks_enabled: bool


@dataclass
class MinimalDocumentIndexingInfo:
    """
    Minimal information necessary for indexing a document
    """

    doc_id: str
    chunk_start_index: int


@dataclass
class EnrichedDocumentIndexingInfo(MinimalDocumentIndexingInfo):
    """
    Enriched information necessary for indexing a document, including version and chunk range.
    """

    old_version: bool
    chunk_end_index: int


@dataclass
class DocumentMetadata:
    """
    Document information that needs to be inserted into Postgres on first time encountering this
    document during indexing across any of the connectors.
    """

    connector_id: int
    credential_id: int
    document_id: str
    semantic_identifier: str
    first_link: str
    doc_updated_at: datetime | None = None
    # Emails, not necessarily attached to users
    # Users may not be in Onyx
    primary_owners: list[str] | None = None
    secondary_owners: list[str] | None = None
    from_ingestion_api: bool = False

    external_access: ExternalAccess | None = None
    doc_metadata: dict[str, Any] | None = None

    # The resolved database ID of the parent hierarchy node (folder/container)
    parent_hierarchy_node_id: int | None = None


@dataclass
class VespaDocumentFields:
    """
    Specifies fields in Vespa for a document.  Fields set to None will be ignored.
    Perhaps we should name this in an implementation agnostic fashion, but it's more
    understandable like this for now.
    """

    # all other fields except these 4 will always be left alone by the update request
    access: DocumentAccess | None = None
    document_sets: set[str] | None = None
    boost: float | None = None
    hidden: bool | None = None
    aggregated_chunk_boost_factor: float | None = None


@dataclass
class VespaDocumentUserFields:
    """
    Fields that are specific to the user who is indexing the document.
    """

    user_projects: list[int] | None = None
    personas: list[int] | None = None


@dataclass
class UpdateRequest:
    """
    For all document_ids, update the allowed_users and the boost to the new values
    Does not update any of the None fields
    """

    minimal_document_indexing_info: list[MinimalDocumentIndexingInfo]
    # all other fields except these 4 will always be left alone by the update request
    access: DocumentAccess | None = None
    document_sets: set[str] | None = None
    boost: float | None = None
    hidden: bool | None = None


class Verifiable(abc.ABC):
    """
    Class must implement document index schema verification. For example, verify that all of the
    necessary attributes for indexing, querying, filtering, and fields to return from search are
    all valid in the schema.

    Parameters:
    - index_name: The name of the primary index currently used for querying
    - secondary_index_name: The name of the secondary index being built in the background, if it
            currently exists. Some functions on the document index act on both the primary and
            secondary index, some act on just one.
    """

    @abc.abstractmethod
    def __init__(
        self,
        index_name: str,
        secondary_index_name: str | None,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        super().__init__(*args, **kwargs)
        self.index_name = index_name
        self.secondary_index_name = secondary_index_name

    @abc.abstractmethod
    def ensure_indices_exist(
        self,
        primary_embedding_dim: int,
        primary_embedding_precision: EmbeddingPrecision,
        secondary_index_embedding_dim: int | None,
        secondary_index_embedding_precision: EmbeddingPrecision | None,
    ) -> None:
        """
        Verify that the document index exists and is consistent with the expectations in the code.

        Parameters:
        - primary_embedding_dim: Vector dimensionality for the vector similarity part of the search
        - primary_embedding_precision: Precision of the vector similarity part of the search
        - secondary_index_embedding_dim: Vector dimensionality of the secondary index being built
                behind the scenes. The secondary index should only be built when switching
                embedding models therefore this dim should be different from the primary index.
        - secondary_index_embedding_precision: Precision of the vector similarity part of the secondary index
        """
        raise NotImplementedError

    @staticmethod
    @abc.abstractmethod
    def register_multitenant_indices(
        indices: list[str],
        embedding_dims: list[int],
        embedding_precisions: list[EmbeddingPrecision],
    ) -> None:
        """
        Register multitenant indices with the document index.
        """
        raise NotImplementedError


class Indexable(abc.ABC):
    """
    Class must implement the ability to index document chunks
    """

    @abc.abstractmethod
    def index(
        self,
        chunks: Iterable[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[DocumentInsertionRecord]:
        """
        Takes a list of document chunks and indexes them in the document index

        NOTE: When a document is reindexed/updated here, it must clear all of the existing document
        chunks before reindexing. This is because the document may have gotten shorter since the
        last run. Therefore, upserting the first 0 through n chunks may leave some old chunks that
        have not been written over.

        NOTE: The chunks of a document are never separated into separate index() calls. So there is
        no worry of receiving the first 0 through n chunks in one index call and the next n through
        m chunks of a docu in the next index call.

        NOTE: Due to some asymmetry between the primary and secondary indexing logic, this function
        only needs to index chunks into the PRIMARY index. Do not update the secondary index here,
        it is done automatically outside of this code.

        Parameters:
        - chunks: Document chunks with all of the information needed for
                indexing to the document index.
        - tenant_id: The tenant id of the user whose chunks are being indexed
        - large_chunks_enabled: Whether large chunks are enabled

        Returns:
            List of document ids which map to unique documents and are used for deduping chunks
            when updating, as well as if the document is newly indexed or already existed and
            just updated
        """
        raise NotImplementedError


class Deletable(abc.ABC):
    """
    Class must implement the ability to delete document by a given unique document id.
    """

    @abc.abstractmethod
    def delete_single(
        self,
        doc_id: str,
        *,
        tenant_id: str,
        chunk_count: int | None,
    ) -> int:
        """
        Given a single document id, hard delete it from the document index

        Parameters:
        - doc_id: document id as specified by the connector
        """
        raise NotImplementedError


class Updatable(abc.ABC):
    """
    Class must implement the ability to update certain attributes of a document without needing to
    update all of the fields. Specifically, needs to be able to update:
    - Access Control List
    - Document-set membership
    - Boost value (learning from feedback mechanism)
    - Whether the document is hidden or not, hidden documents are not returned from search
    """

    @abc.abstractmethod
    def update_single(
        self,
        doc_id: str,
        *,
        tenant_id: str,
        chunk_count: int | None,
        fields: VespaDocumentFields | None,
        user_fields: VespaDocumentUserFields | None,
    ) -> None:
        """
        Updates all chunks for a document with the specified fields.
        None values mean that the field does not need an update.

        The rationale for a single update function is that it allows retries and parallelism
        to happen at a higher / more strategic level, is simpler to read, and allows
        us to individually handle error conditions per document.

        Parameters:
        - fields: the fields to update in the document. Any field set to None will not be changed.

        Return:
            None
        """
        raise NotImplementedError


class IdRetrievalCapable(abc.ABC):
    """
    Class must implement the ability to retrieve either:
    - all of the chunks of a document IN ORDER given a document id.
    - a specific chunk given a document id and a chunk index (0 based)
    """

    @abc.abstractmethod
    def id_based_retrieval(
        self,
        chunk_requests: list[VespaChunkRequest],
        filters: IndexFilters,
        batch_retrieval: bool = False,
    ) -> list[InferenceChunk]:
        """
        Fetch chunk(s) based on document id

        NOTE: This is used to reconstruct a full document or an extended (multi-chunk) section
        of a document. Downstream currently assumes that the chunking does not introduce overlaps
        between the chunks. If there are overlaps for the chunks, then the reconstructed document
        or extended section will have duplicate segments.

        Parameters:
        - chunk_requests: requests containing the document id and the chunk range to retrieve
        - filters: Filters to apply to retrieval
        - batch_retrieval: If True, perform a batch retrieval

        Returns:
            list of chunks for the document id or the specific chunk by the specified chunk index
            and document id
        """
        raise NotImplementedError


class HybridCapable(abc.ABC):
    """
    Class must implement hybrid (keyword + vector) search functionality
    """

    @abc.abstractmethod
    def hybrid_retrieval(
        self,
        query: str,
        query_embedding: Embedding,
        final_keywords: list[str] | None,
        filters: IndexFilters,
        hybrid_alpha: float,
        time_decay_multiplier: float,
        num_to_retrieve: int,
        ranking_profile_type: QueryExpansionType,
        title_content_ratio: float | None = TITLE_CONTENT_RATIO,
    ) -> list[InferenceChunk]:
        """
        Run hybrid search and return a list of inference chunks.

        NOTE: the query passed in here is the unprocessed plain text query. Preprocessing is
        expected to be handled by this function as it may depend on the index implementation.
        Things like query expansion, synonym injection, stop word removal, lemmatization, etc. are
        done here.

        Parameters:
        - query: unmodified user query. This is needed for getting the matching highlighted
                keywords
        - query_embedding: vector representation of the query, must be of the correct
                dimensionality for the primary index
        - final_keywords: Final keywords to be used from the query, defaults to query if not set
        - filters: standard filter object
        - hybrid_alpha: weighting between the keyword and vector search results. It is important
                that the two scores are normalized to the same range so that a meaningful
                comparison can be made. 1 for 100% weighting on vector score, 0 for 100% weighting
                on keyword score.
        - time_decay_multiplier: how much to decay the document scores as they age. Some queries
                based on the persona settings, will have this be a 2x or 3x of the default
        - num_to_retrieve: number of highest matching chunks to return

        Returns:
            best matching chunks based on weighted sum of keyword and vector/semantic search scores
        """
        raise NotImplementedError


class AdminCapable(abc.ABC):
    """
    Class must implement a search for the admin "Explorer" page. The assumption here is that the
    admin is not "searching" for knowledge but has some document already in mind. They are either
    looking to positively boost it because they know it's a good reference document, looking to
    negatively boost it as a way of "deprecating", or hiding the document.

    Assuming the admin knows the document name, this search has high emphasis on the title match.

    Suggested implementation:
    Keyword only, BM25 search with 5x weighting on the title field compared to the contents
    """

    @abc.abstractmethod
    def admin_retrieval(
        self,
        query: str,
        query_embedding: Embedding,
        filters: IndexFilters,
        num_to_retrieve: int = NUM_RETURNED_HITS,
    ) -> list[InferenceChunk]:
        """
        Run the special search for the admin document explorer page

        Parameters:
        - query: unmodified user query. Though in this flow probably unmodified is best
        - filters: standard filter object
        - num_to_retrieve: number of highest matching chunks to return

        Returns:
            list of best matching chunks for the explorer page query
        """
        raise NotImplementedError


class RandomCapable(abc.ABC):
    """Class must implement random document retrieval capability"""

    @abc.abstractmethod
    def random_retrieval(
        self,
        filters: IndexFilters,
        num_to_retrieve: int = 10,
    ) -> list[InferenceChunk]:
        """Retrieve random chunks matching the filters"""
        raise NotImplementedError


class BaseIndex(
    Verifiable,
    Indexable,
    Updatable,
    Deletable,
    AdminCapable,
    IdRetrievalCapable,
    RandomCapable,
    abc.ABC,
):
    """
    All basic document index functionalities excluding the actual querying approach.

    As a summary, document indices need to be able to
    - Verify the schema definition is valid
    - Index new documents
    - Update specific attributes of existing documents
    - Delete documents
    - Provide a search for the admin document explorer page
    - Retrieve documents based on document id
    """


class DocumentIndex(HybridCapable, BaseIndex, abc.ABC):
    """
    A valid document index that can plug into all Onyx flows must implement all of these
    functionalities, though "technically" it does not need to be keyword or vector capable as
    currently all default search flows use Hybrid Search.
    """


================================================
FILE: backend/onyx/document_index/interfaces_new.py
================================================
import abc
from collections.abc import Iterable
from typing import Self

from pydantic import BaseModel
from pydantic import model_validator

from onyx.access.models import DocumentAccess
from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.context.search.enums import QueryType
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.db.enums import EmbeddingPrecision
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.indexing.models import DocMetadataAwareIndexChunk
from shared_configs.model_server_models import Embedding

# NOTE: "Document" in the naming convention is used to refer to the entire
# document as represented in Onyx. What is actually stored in the index is the
# document chunks. By the terminology of most search engines / vector databases,
# the individual objects stored are called documents, but in this case it refers
# to a chunk.


__all__ = [
    # Main interfaces - these are what you should inherit from
    "DocumentIndex",
    # Data models - used in method signatures
    "DocumentInsertionRecord",
    "DocumentSectionRequest",
    "IndexingMetadata",
    "MetadataUpdateRequest",
    # Capability mixins - for custom compositions or type checking
    "SchemaVerifiable",
    "Indexable",
    "Deletable",
    "Updatable",
    "IdRetrievalCapable",
    "HybridCapable",
    "RandomCapable",
]


class TenantState(BaseModel):
    """
    Captures the tenant-related state for an instance of DocumentIndex.

    NOTE: Tenant ID must be set in multitenant mode.
    """

    model_config = {"frozen": True}

    tenant_id: str
    multitenant: bool

    def __str__(self) -> str:
        return (
            f"TenantState(tenant_id={self.tenant_id}, multitenant={self.multitenant})"
        )

    @model_validator(mode="after")
    def check_tenant_id_is_set_in_multitenant_mode(self) -> Self:
        if self.multitenant and not self.tenant_id:
            raise ValueError("Bug: Tenant ID must be set in multitenant mode.")
        return self


class DocumentInsertionRecord(BaseModel):
    """
    Result of indexing a document.
    """

    model_config = {"frozen": True}

    document_id: str
    already_existed: bool


class DocumentSectionRequest(BaseModel):
    """Request for a document section or whole document.

    If no min_chunk_ind is provided it should start at the beginning of the
    document.
    If no max_chunk_ind is provided it should go to the end of the document.
    """

    model_config = {"frozen": True}

    document_id: str
    min_chunk_ind: int | None = None
    max_chunk_ind: int | None = None
    # A given document can have multiple chunking strategies.
    max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE

    @model_validator(mode="after")
    def check_chunk_index_range_is_valid(self) -> Self:
        if (
            self.min_chunk_ind is not None
            and self.max_chunk_ind is not None
            and self.min_chunk_ind > self.max_chunk_ind
        ):
            raise ValueError(
                "Bug: Min chunk index must be less than or equal to max chunk index."
            )
        return self


class IndexingMetadata(BaseModel):
    """
    Information about chunk counts for efficient cleaning / updating of document
    chunks.

    A common pattern to ensure that no chunks are left over is to delete all of
    the chunks for a document and then re-index the document. This information
    allows us to only delete the extra "tail" chunks when the document has
    gotten shorter.
    """

    class ChunkCounts(BaseModel):
        model_config = {"frozen": True}

        old_chunk_cnt: int
        new_chunk_cnt: int

    model_config = {"frozen": True}

    doc_id_to_chunk_cnt_diff: dict[str, ChunkCounts]


class MetadataUpdateRequest(BaseModel):
    """
    Updates to the documents that can happen without there being an update to
    the contents of the document.
    """

    model_config = {"frozen": True}

    document_ids: list[str]
    # Passed in to help with potential optimizations of the implementation. The
    # keys should be redundant with document_ids.
    # NOTE: Generally the chunk count should always be known, however for
    # documents still using the legacy chunk ID system it may not be. Any chunk
    # count value < 0 should represent an unknown chunk count.
    doc_id_to_chunk_cnt: dict[str, int]
    # For the ones that are None, there is no update required to that field.
    access: DocumentAccess | None = None
    document_sets: set[str] | None = None
    boost: float | None = None
    hidden: bool | None = None
    secondary_index_updated: bool | None = None
    project_ids: set[int] | None = None
    persona_ids: set[int] | None = None


class IndexRetrievalFilters(BaseModel):
    """
    Filters for retrieving chunks from the index.

    Used to filter on permissions and other Onyx-specific metadata rather than
    chunk content. Should be passed in for every retrieval method.

    TODO(andrei): Currently unused, use this when making retrieval methods more
    strict.
    """

    model_config = {"frozen": True}

    # frozenset gets around the issue of python's mutable defaults.
    # WARNING: Falls back to only public docs as default for security. If
    # callers want no access filtering they must explicitly supply an empty set.
    # Doing so should be done sparingly.
    access_control_list: frozenset[str] = frozenset({PUBLIC_DOC_PAT})


class SchemaVerifiable(abc.ABC):
    """
    Class must implement document index schema verification. For example, verify
    that all of the necessary attributes for indexing, querying, filtering, and
    fields to return from search are all valid in the schema.
    """

    @abc.abstractmethod
    def verify_and_create_index_if_necessary(
        self,
        embedding_dim: int,
        embedding_precision: EmbeddingPrecision,
    ) -> None:
        """
        Verifies that the document index exists and is consistent with the
        expectations in the code.

        For certain search engines, the schema needs to be created before
        indexing can happen. This call should create the schema if it does not
        exist.

        Args:
            embedding_dim: Vector dimensionality for the vector similarity part
                of the search.
            embedding_precision: Precision of the values of the vectors for the
                similarity part of the search.
        """
        raise NotImplementedError


class Indexable(abc.ABC):
    """
    Class must implement the ability to index document chunks.
    """

    @abc.abstractmethod
    def index(
        self,
        chunks: Iterable[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
        """Indexes an iterable of document chunks into the document index.

        This is often a batch operation including chunks from multiple
        documents.

        NOTE: When a document is reindexed/updated here and has gotten shorter,
        it is important to delete the extra chunks at the end to ensure there
        are no stale chunks in the index. The implementation should do this.

        NOTE: The chunks of a document are never separated into separate index()
        calls. So there is no worry of receiving the first 0 through n chunks in
        one index call and the next n through m chunks of a document in the next
        index call.

        Args:
            chunks: Document chunks with all of the information needed for
                indexing to the document index.
            indexing_metadata: Information about chunk counts for efficient
                cleaning / updating.

        Returns:
            List of document IDs which map to unique documents as well as if the
                document is newly indexed or had already existed and was just
                updated.
        """
        raise NotImplementedError


class Deletable(abc.ABC):
    """
    Class must implement the ability to delete a document by a given unique
    document ID.
    """

    @abc.abstractmethod
    def delete(
        self,
        # TODO(andrei): Fine for now but this can probably be a batch operation
        # that takes in a list of IDs.
        document_id: str,
        chunk_count: int | None = None,
        # TODO(andrei): Shouldn't this also have some acl filtering at minimum?
    ) -> int:
        """
        Hard deletes all of the chunks for the corresponding document in the
        document index.

        TODO(andrei): Not a pressing issue now but think about what we want the
        contract of this method to be in the event the specified document ID
        does not exist.

        Args:
            document_id: The unique identifier for the document as represented
                in Onyx, not necessarily in the document index.
            chunk_count: The number of chunks in the document. May be useful for
                improving the efficiency of the delete operation. Defaults to
                None.

        Returns:
            The number of chunks deleted.
        """
        raise NotImplementedError


class Updatable(abc.ABC):
    """
    Class must implement the ability to update certain attributes of a document
    without needing to update all of the fields. Specifically, needs to be able
    to update:
    - Access Control List
    - Document-set membership
    - Boost value (learning from feedback mechanism)
    - Whether the document is hidden or not; hidden documents are not returned
      from search
    - Which Projects the document is a part of
    """

    @abc.abstractmethod
    def update(
        self,
        update_requests: list[MetadataUpdateRequest],
    ) -> None:
        """Updates some set of chunks.

        The document and fields to update are specified in the update requests.
        Each update request in the list applies its changes to a list of
        document IDs. None values mean that the field does not need an update.

        Args:
            update_requests: A list of update requests, each containing a list
                of document IDs and the fields to update. The field updates
                apply to all of the specified documents in each update request.
        """
        raise NotImplementedError


class IdRetrievalCapable(abc.ABC):
    """
    Class must implement the ability to retrieve either:
    - All of the chunks of a document IN ORDER given a document ID.
    - A specific section (continuous set of chunks) for some document.
    """

    @abc.abstractmethod
    def id_based_retrieval(
        self,
        chunk_requests: list[DocumentSectionRequest],
        # TODO(andrei): Make this more strict w.r.t. acl, temporary for now.
        filters: IndexFilters,
        # TODO(andrei): This is temporary, we will not expose this in the long
        # run.
        batch_retrieval: bool = False,
        # TODO(andrei): Add a param for whether to retrieve hidden docs.
    ) -> list[InferenceChunk]:
        """Fetches chunk(s) based on document ID.

        NOTE: This is used to reconstruct a full document or an extended
        (multi-chunk) section of a document. Downstream currently assumes that
        the chunking does not introduce overlaps between the chunks. If there
        are overlaps for the chunks, then the reconstructed document or extended
        section will have duplicate segments.

        Args:
            chunk_requests: Requests containing the document ID and the chunk
                range to retrieve.

        Returns:
            List of sections from the documents specified.
        """
        raise NotImplementedError


class HybridCapable(abc.ABC):
    """
    Class must implement hybrid (keyword + vector) search functionality.
    """

    @abc.abstractmethod
    def hybrid_retrieval(
        self,
        query: str,
        query_embedding: Embedding,
        # TODO(andrei): This param is not great design, get rid of it.
        final_keywords: list[str] | None,
        query_type: QueryType,
        # TODO(andrei): Make this more strict w.r.t. acl, temporary for now.
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
        """Runs hybrid search and returns a list of inference chunks.

        Args:
            query: Unmodified user query. This may be needed for getting the
                matching highlighted keywords or for logging purposes.
            query_embedding: Vector representation of the query. Must be of the
                correct dimensionality for the primary index.
            final_keywords: Final keywords to be used from the query; defaults
                to query if not set.
            query_type: Semantic or keyword type query; may use different
                scoring logic for each.
            filters: Filters for things like permissions, source type, time,
                etc.
            num_to_retrieve: Number of highest matching chunks to return.

        Returns:
            Score-ranked (highest first) list of highest matching chunks.
        """
        raise NotImplementedError

    @abc.abstractmethod
    def keyword_retrieval(
        self,
        query: str,
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
        """Runs keyword-only search and returns a list of inference chunks.

        Args:
            query: User query.
            filters: Filters for things like permissions, source type, time,
                etc.
            num_to_retrieve: Number of highest matching chunks to return.

        Returns:
            Score-ranked (highest first) list of highest matching chunks.
        """
        raise NotImplementedError

    @abc.abstractmethod
    def semantic_retrieval(
        self,
        query_embedding: Embedding,
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
        """Runs semantic-only search and returns a list of inference chunks.

        Args:
            query_embedding: Vector representation of the query. Must be of the
                correct dimensionality for the primary index.
            filters: Filters for things like permissions, source type, time,
                etc.
            num_to_retrieve: Number of highest matching chunks to return.

        Returns:
            Score-ranked (highest first) list of highest matching chunks.
        """
        raise NotImplementedError


class RandomCapable(abc.ABC):
    """
    Class must implement random document retrieval.
    """

    @abc.abstractmethod
    def random_retrieval(
        self,
        # TODO(andrei): Make this more strict w.r.t. acl, temporary for now.
        filters: IndexFilters,
        num_to_retrieve: int = 10,
        dirty: bool | None = None,
    ) -> list[InferenceChunk]:
        """Retrieves random chunks matching the filters.

        Args:
            filters: Filters for things like permissions, source type, time,
                etc.
            num_to_retrieve: Number of chunks to retrieve. Defaults to 10.
            dirty: If set, retrieve chunks whose "dirty" flag matches this
                argument. If None, there is no restriction on retrieved chunks
                with respect to that flag. A chunk is considered dirty if there
                is a secondary index but the chunk's state has not been ported
                over to it yet. Defaults to None.

        Returns:
            List of chunks matching the filters.
        """
        raise NotImplementedError


class DocumentIndex(
    SchemaVerifiable,
    Indexable,
    Updatable,
    Deletable,
    HybridCapable,
    IdRetrievalCapable,
    RandomCapable,
    abc.ABC,
):
    """
    A valid document index that can plug into all Onyx flows must implement all
    of these functionalities.

    As a high-level summary, document indices need to be able to:
    - Verify the schema definition is valid
    - Index new documents
    - Update specific attributes of existing documents
    - Delete documents
    - Run hybrid search
    - Retrieve document or sections of documents based on document id
    - Retrieve sets of random documents
    """


================================================
FILE: backend/onyx/document_index/opensearch/README.md
================================================
# Opensearch Idiosyncrasies

## How it works at a high level
Opensearch has 2 phases, a `Search` phase and a `Fetch` phase. The `Search` phase works by getting the document scores on each
shard separately, then typically a fetch phase grabs all of the relevant fields/data for returning to the user. There is also
an intermediate phase (seemingly built specifically to handle hybrid search queries) which can run in between as a processor.
References:
https://docs.opensearch.org/latest/search-plugins/search-pipelines/search-processors/
https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/
https://docs.opensearch.org/latest/query-dsl/compound/hybrid/

## How Hybrid queries work
Hybrid queries are basically parallel queries that each run through their own `Search` phase and do not interact in any way.
They also run across all the shards. It is not entirely clear what happens if a combination pipeline is not specified for them,
perhaps the scores are just summed.

When the normalization processor is applied to keyword/vector hybrid searches, documents that show up due to keyword match may
not also have showed up in the vector search and vice versa. In these situations, it just receives a 0 score for the missing
query component. Opensearch does not run another phase to recapture those missing values. The impact of this is that after
normalizing, the missing scores are 0 but this is a higher score than if it actually received a non-zero score.

This may not be immediately obvious so an explanation is included here. If it got a non-zero score instead, it must be lower
than all of the other scores of the list (otherwise it would have shown up). Therefore it would impact the normalization and
push the other scores higher so that it's not only the lowest score still, but now it's a differentiated lowest score. This is
not strictly the case in a multi-node setup but the high level concept approximately holds. So basically the 0 score is a form
of "minimum value clipping".

## On time decay and boosting
Embedding models do not have a uniform distribution from 0 to 1. The values typically cluster strongly around 0.6 to 0.8 but also
varies between models and even the query. It is not a safe assumption to pre-normalize the scores so we also cannot apply any
additive or multiplicative boost to it. i.e. if results of a doc cluster around 0.6 to 0.8 and I give a 50% penalty to the score,
it doesn't bring a result from the top of the range to 50th percentile, it brings it under the 0.6 and is now the worst match.
Same logic applies to additive boosting.

So these boosts can only be applied after normalization. Unfortunately with Opensearch, the normalization processor runs last
and only applies to the results of the completely independent `Search` phase queries. So if a time based boost (a separate
query which filters on recently updated documents) is added, it would not be able to introduce any new documents
to the set (since the new documents would have no keyword/vector score or already be present) since the 0 scores on keyword
and vector would make the docs which only came because of time filter very low scoring. This can however make some of the lower
scored documents from the union of all the `Search` phase documents to show up higher and potentially not get dropped before
being fetched and returned to the user. But there are other issues of including these:
- There is no way to sort by this field, only a filter, so there's no way to guarantee the best docs even irrespective of the
contents. If there are lots of updates, this may miss.
- There is not a good way to normalize this field, the best is to clip it on the bottom.
- This would require using min-max norm but z-score norm is better for the other functions due to things like it being less
sensitive to outliers, better handles distribution drifts (min-max assumes stable meaningful ranges), better for comparing
"unusual-ness" across distributions.

So while it is possible to apply time based boosting at the normalization stage (or specifically to the keyword score), we have
decided it is better to not apply it during the OpenSearch query.

Because of these limitations, Onyx in code applies further refinements, boostings, etc. based on OpenSearch providing an initial
filtering. The impact of time decay and boost should not be so big that we would need orders of magnitude more results back
from OpenSearch.

## Other concepts to be aware of
Within the `Search` phase, there are optional steps like Rescore but these are not useful for the combination/normalization
work that is relevant for the hybrid search. Since the Rescore happens prior to normalization, it's not able to provide any
meaningful operations to the query for our usage.

Because the Title is included in the Contents for both embedding and keyword searches, the Title scores are very low relative to
the actual full contents scoring. It is seen as a boost rather than a core scoring component. Time decay works similarly.


================================================
FILE: backend/onyx/document_index/opensearch/client.py
================================================
import json
import logging
import time
from contextlib import AbstractContextManager
from contextlib import nullcontext
from typing import Any
from typing import Generic
from typing import TypeVar

from opensearchpy import OpenSearch
from opensearchpy import TransportError
from opensearchpy.helpers import bulk
from pydantic import BaseModel

from onyx.configs.app_configs import DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S
from onyx.configs.app_configs import OPENSEARCH_ADMIN_PASSWORD
from onyx.configs.app_configs import OPENSEARCH_ADMIN_USERNAME
from onyx.configs.app_configs import OPENSEARCH_HOST
from onyx.configs.app_configs import OPENSEARCH_REST_API_PORT
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.constants import OpenSearchSearchType
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.search import DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW
from onyx.server.metrics.opensearch_search import observe_opensearch_search
from onyx.server.metrics.opensearch_search import track_opensearch_search_in_progress
from onyx.utils.logger import setup_logger
from onyx.utils.timing import log_function_time


CLIENT_THRESHOLD_TO_LOG_SLOW_SEARCH_MS = 2000


logger = setup_logger(__name__)
# Set the logging level to WARNING to ignore INFO and DEBUG logs from
# opensearch. By default it emits INFO-level logs for every request.
# The opensearch-py library uses "opensearch" as the logger name for HTTP
# requests (see opensearchpy/connection/base.py)
opensearch_logger = logging.getLogger("opensearch")
opensearch_logger.setLevel(logging.WARNING)


SchemaDocumentModel = TypeVar("SchemaDocumentModel")


class SearchHit(BaseModel, Generic[SchemaDocumentModel]):
    """Represents a hit from OpenSearch in response to a query.

    Templated on the specific document model as defined by a schema.
    """

    model_config = {"frozen": True}

    # The document chunk source retrieved from OpenSearch.
    document_chunk: SchemaDocumentModel
    # The match score for the document chunk as calculated by OpenSearch. Only
    # relevant for "fuzzy searches"; this will be None for direct queries where
    # score is not relevant like direct retrieval on ID.
    score: float | None = None
    # Maps schema property name to a list of highlighted snippets with match
    # terms wrapped in tags (e.g. "something <hi>keyword</hi> other thing").
    match_highlights: dict[str, list[str]] = {}
    # Score explanation from OpenSearch when "explain": true is set in the
    # query. Contains detailed breakdown of how the score was calculated.
    explanation: dict[str, Any] | None = None


class IndexInfo(BaseModel):
    """
    Represents information about an OpenSearch index.
    """

    model_config = {"frozen": True}

    name: str
    health: str
    status: str
    num_primary_shards: str
    num_replica_shards: str
    docs_count: str
    docs_deleted: str
    created_at: str
    total_size: str
    primary_shards_size: str


def get_new_body_without_vectors(body: dict[str, Any]) -> dict[str, Any]:
    """Recursively replaces vectors in the body with their length.

    TODO(andrei): Do better.

    Args:
        body: The body to replace the vectors.

    Returns:
        A copy of body with vectors replaced with their length.
    """
    new_body: dict[str, Any] = {}
    for k, v in body.items():
        if k == "vector":
            new_body[k] = len(v)
        elif isinstance(v, dict):
            new_body[k] = get_new_body_without_vectors(v)
        elif isinstance(v, list) and len(v) > 0 and isinstance(v[0], dict):
            new_body[k] = [get_new_body_without_vectors(item) for item in v]
        else:
            new_body[k] = v
    return new_body


class OpenSearchClient(AbstractContextManager):
    """Client for interacting with OpenSearch for cluster-level operations.

    Args:
        host: The host of the OpenSearch cluster.
        port: The port of the OpenSearch cluster.
        auth: The authentication credentials for the OpenSearch cluster. A tuple
            of (username, password).
        use_ssl: Whether to use SSL for the OpenSearch cluster. Defaults to
            True.
        verify_certs: Whether to verify the SSL certificates for the OpenSearch
            cluster. Defaults to False.
        ssl_show_warn: Whether to show warnings for SSL certificates. Defaults
            to False.
        timeout: The timeout for the OpenSearch cluster. Defaults to
            DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S.
    """

    def __init__(
        self,
        host: str = OPENSEARCH_HOST,
        port: int = OPENSEARCH_REST_API_PORT,
        auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),
        use_ssl: bool = True,
        verify_certs: bool = False,
        ssl_show_warn: bool = False,
        timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,
    ):
        logger.debug(
            f"Creating OpenSearch client with host {host}, port {port} and timeout {timeout} seconds."
        )
        self._client = OpenSearch(
            hosts=[{"host": host, "port": port}],
            http_auth=auth,
            use_ssl=use_ssl,
            verify_certs=verify_certs,
            ssl_show_warn=ssl_show_warn,
            # NOTE: This timeout applies to all requests the client makes,
            # including bulk indexing. When exceeded, the client will raise a
            # ConnectionTimeout and return no useful results. The OpenSearch
            # server will log that the client cancelled the request. To get
            # partial results from OpenSearch, pass in a timeout parameter to
            # your request body that is less than this value.
            timeout=timeout,
        )

    def __exit__(self, *_: Any) -> None:
        self.close()

    def __del__(self) -> None:
        try:
            self.close()
        except Exception:
            pass

    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def create_search_pipeline(
        self,
        pipeline_id: str,
        pipeline_body: dict[str, Any],
    ) -> None:
        """Creates a search pipeline.

        See the OpenSearch documentation for more information on the search
        pipeline body.
        https://docs.opensearch.org/latest/search-plugins/search-pipelines/index/

        Args:
            pipeline_id: The ID of the search pipeline to create.
            pipeline_body: The body of the search pipeline to create.

        Raises:
            Exception: There was an error creating the search pipeline.
        """
        response = self._client.search_pipeline.put(id=pipeline_id, body=pipeline_body)
        if not response.get("acknowledged", False):
            raise RuntimeError(f"Failed to create search pipeline {pipeline_id}.")

    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def delete_search_pipeline(self, pipeline_id: str) -> None:
        """Deletes a search pipeline.

        Args:
            pipeline_id: The ID of the search pipeline to delete.

        Raises:
            Exception: There was an error deleting the search pipeline.
        """
        response = self._client.search_pipeline.delete(id=pipeline_id)
        if not response.get("acknowledged", False):
            raise RuntimeError(f"Failed to delete search pipeline {pipeline_id}.")

    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def put_cluster_settings(self, settings: dict[str, Any]) -> bool:
        """Puts cluster settings.

        Args:
            settings: The settings to put.

        Raises:
            Exception: There was an error putting the cluster settings.

        Returns:
            True if the settings were put successfully, False otherwise.
        """
        response = self._client.cluster.put_settings(body=settings)
        if response.get("acknowledged", False):
            logger.info("Successfully put cluster settings.")
            return True
        else:
            logger.error(f"Failed to put cluster settings: {response}.")
            return False

    @log_function_time(print_only=True, debug_only=True)
    def list_indices_with_info(self) -> list[IndexInfo]:
        """
        Lists the indices in the OpenSearch cluster with information about each
        index.

        Returns:
            A list of IndexInfo objects for each index.
        """
        response = self._client.cat.indices(format="json")
        indices: list[IndexInfo] = []
        for raw_index_info in response:
            indices.append(
                IndexInfo(
                    name=raw_index_info.get("index", ""),
                    health=raw_index_info.get("health", ""),
                    status=raw_index_info.get("status", ""),
                    num_primary_shards=raw_index_info.get("pri", ""),
                    num_replica_shards=raw_index_info.get("rep", ""),
                    docs_count=raw_index_info.get("docs.count", ""),
                    docs_deleted=raw_index_info.get("docs.deleted", ""),
                    created_at=raw_index_info.get("creation.date.string", ""),
                    total_size=raw_index_info.get("store.size", ""),
                    primary_shards_size=raw_index_info.get("pri.store.size", ""),
                )
            )
        return indices

    @log_function_time(print_only=True, debug_only=True)
    def ping(self) -> bool:
        """Pings the OpenSearch cluster.

        Returns:
            True if OpenSearch could be reached, False if it could not.
        """
        return self._client.ping()

    def close(self) -> None:
        """Closes the client.

        Raises:
            Exception: There was an error closing the client.
        """
        self._client.close()


class OpenSearchIndexClient(OpenSearchClient):
    """Client for interacting with OpenSearch for index-level operations.

    OpenSearch's Python module has pretty bad typing support so this client
    attempts to protect the rest of the codebase from this. As a consequence,
    most methods here return the minimum data needed for the rest of Onyx, and
    tend to rely on Exceptions to handle errors.

    TODO(andrei): This class currently assumes the structure of the database
    schema when it returns a DocumentChunk. Make the class, or at least the
    search method, templated on the structure the caller can expect.

    Args:
        index_name: The name of the index to interact with.
        host: The host of the OpenSearch cluster.
        port: The port of the OpenSearch cluster.
        auth: The authentication credentials for the OpenSearch cluster. A tuple
            of (username, password).
        use_ssl: Whether to use SSL for the OpenSearch cluster. Defaults to
            True.
        verify_certs: Whether to verify the SSL certificates for the OpenSearch
            cluster. Defaults to False.
        ssl_show_warn: Whether to show warnings for SSL certificates. Defaults
            to False.
        timeout: The timeout for the OpenSearch cluster. Defaults to
            DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S.
    """

    def __init__(
        self,
        index_name: str,
        host: str = OPENSEARCH_HOST,
        port: int = OPENSEARCH_REST_API_PORT,
        auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),
        use_ssl: bool = True,
        verify_certs: bool = False,
        ssl_show_warn: bool = False,
        timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,
        emit_metrics: bool = True,
    ):
        super().__init__(
            host=host,
            port=port,
            auth=auth,
            use_ssl=use_ssl,
            verify_certs=verify_certs,
            ssl_show_warn=ssl_show_warn,
            timeout=timeout,
        )
        self._index_name = index_name
        self._emit_metrics = emit_metrics
        logger.debug(
            f"OpenSearch client created successfully for index {self._index_name}."
        )

    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def create_index(self, mappings: dict[str, Any], settings: dict[str, Any]) -> None:
        """Creates the index.

        See the OpenSearch documentation for more information on mappings and
        settings.

        Args:
            mappings: The mappings for the index to create.
            settings: The settings for the index to create.

        Raises:
            Exception: There was an error creating the index.
        """
        body: dict[str, Any] = {
            "mappings": mappings,
            "settings": settings,
        }
        logger.debug(f"Creating index {self._index_name} with body {body}.")
        response = self._client.indices.create(index=self._index_name, body=body)
        if not response.get("acknowledged", False):
            raise RuntimeError(f"Failed to create index {self._index_name}.")
        response_index = response.get("index", "")
        if response_index != self._index_name:
            raise RuntimeError(
                f"OpenSearch responded with index name {response_index} when creating index {self._index_name}."
            )
        logger.debug(f"Index {self._index_name} created successfully.")

    @log_function_time(print_only=True, debug_only=True)
    def delete_index(self) -> bool:
        """Deletes the index.

        Raises:
            Exception: There was an error deleting the index.

        Returns:
            True if the index was deleted, False if it did not exist.
        """
        if not self._client.indices.exists(index=self._index_name):
            logger.warning(
                f"Tried to delete index {self._index_name} but it does not exist."
            )
            return False

        logger.debug(f"Deleting index {self._index_name}.")
        response = self._client.indices.delete(index=self._index_name)
        if not response.get("acknowledged", False):
            raise RuntimeError(f"Failed to delete index {self._index_name}.")
        return True

    @log_function_time(print_only=True, debug_only=True)
    def index_exists(self) -> bool:
        """Checks if the index exists.

        Raises:
            Exception: There was an error checking if the index exists.

        Returns:
            True if the index exists, False if it does not.
        """
        return self._client.indices.exists(index=self._index_name)

    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def put_mapping(self, mappings: dict[str, Any]) -> None:
        """Updates the index mapping in an idempotent manner.

        - Existing fields with the same definition: No-op (succeeds silently).
        - New fields: Added to the index.
        - Existing fields with different types: Raises exception (requires
          reindex).

        See the OpenSearch documentation for more information:
        https://docs.opensearch.org/latest/api-reference/index-apis/put-mapping/

        Args:
            mappings: The complete mapping definition to apply. This will be
                merged with existing mappings in the index.

        Raises:
            Exception: There was an error updating the mappings, such as
                attempting to change the type of an existing field.
        """
        logger.debug(
            f"Putting mappings for index {self._index_name} with mappings {mappings}."
        )
        response = self._client.indices.put_mapping(
            index=self._index_name, body=mappings
        )
        if not response.get("acknowledged", False):
            raise RuntimeError(
                f"Failed to put the mapping update for index {self._index_name}."
            )
        logger.debug(f"Successfully put mappings for index {self._index_name}.")

    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def validate_index(self, expected_mappings: dict[str, Any]) -> bool:
        """Validates the index.

        Short-circuit returns False on the first mismatch. Logs the mismatch.

        See the OpenSearch documentation for more information on the index
        mappings.
        https://docs.opensearch.org/latest/mappings/

        Args:
            mappings: The expected mappings of the index to validate.

        Raises:
            Exception: There was an error validating the index.

        Returns:
            True if the index is valid, False if it is not based on the mappings
                supplied.
        """
        # OpenSearch's documentation makes no mention of what happens when you
        # invoke client.indices.get on an index that does not exist, so we check
        # for existence explicitly just to be sure.
        exists_response = self.index_exists()
        if not exists_response:
            logger.warning(
                f"Tried to validate index {self._index_name} but it does not exist."
            )
            return False
        logger.debug(
            f"Validating index {self._index_name} with expected mappings {expected_mappings}."
        )

        get_result = self._client.indices.get(index=self._index_name)
        index_info: dict[str, Any] = get_result.get(self._index_name, {})
        if not index_info:
            raise ValueError(
                f"Bug: OpenSearch did not return any index info for index {self._index_name}, "
                "even though it confirmed that the index exists."
            )
        index_mapping_properties: dict[str, Any] = index_info.get("mappings", {}).get(
            "properties", {}
        )
        expected_mapping_properties: dict[str, Any] = expected_mappings.get(
            "properties", {}
        )
        assert (
            expected_mapping_properties
        ), "Bug: No properties were found in the provided expected mappings."

        for property in expected_mapping_properties:
            if property not in index_mapping_properties:
                logger.warning(
                    f'The field "{property}" was not found in the index {self._index_name}.'
                )
                return False

            expected_property_type = expected_mapping_properties[property].get(
                "type", ""
            )
            assert (
                expected_property_type
            ), f'Bug: The field "{property}" in the supplied expected schema mappings has no type.'

            index_property_type = index_mapping_properties[property].get("type", "")
            if expected_property_type != index_property_type:
                logger.warning(
                    f'The field "{property}" in the index {self._index_name} has type {index_property_type} '
                    f"but the expected type is {expected_property_type}."
                )
                return False

        logger.debug(f"Index {self._index_name} validated successfully.")
        return True

    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def update_settings(self, settings: dict[str, Any]) -> None:
        """Updates the settings of the index.

        See the OpenSearch documentation for more information on the index
        settings.
        https://docs.opensearch.org/latest/install-and-configure/configuring-opensearch/index-settings/

        Args:
            settings: The settings to update the index with.

        Raises:
            Exception: There was an error updating the settings of the index.
        """
        # TODO(andrei): Implement this.
        raise NotImplementedError

    @log_function_time(
        print_only=True,
        debug_only=True,
        include_args_subset={
            "document": str,
            "tenant_state": str,
            "update_if_exists": str,
        },
    )
    def index_document(
        self,
        document: DocumentChunk,
        tenant_state: TenantState,
        update_if_exists: bool = False,
    ) -> None:
        """Indexes a document.

        Args:
            document: The document to index. In Onyx this is a chunk of a
                document, OpenSearch simply refers to this as a document as
                well.
            tenant_state: The tenant state of the caller.
            update_if_exists: Whether to update the document if it already
                exists. If False, will raise an exception if the document
                already exists. Defaults to False.

        Raises:
            Exception: There was an error indexing the document. This includes
                the case where a document with the same ID already exists if
                update_if_exists is False.
        """
        logger.debug(
            f"Trying to index document ID {document.document_id} for tenant {tenant_state.tenant_id}. "
            f"update_if_exists={update_if_exists}."
        )
        document_chunk_id: str = get_opensearch_doc_chunk_id(
            tenant_state=tenant_state,
            document_id=document.document_id,
            chunk_index=document.chunk_index,
            max_chunk_size=document.max_chunk_size,
        )
        body: dict[str, Any] = document.model_dump(exclude_none=True)
        # client.create will raise if a doc with the same ID exists.
        # client.index does not do this.
        if update_if_exists:
            result = self._client.index(
                index=self._index_name, id=document_chunk_id, body=body
            )
        else:
            result = self._client.create(
                index=self._index_name, id=document_chunk_id, body=body
            )
        result_id = result.get("_id", "")
        # Sanity check.
        if result_id != document_chunk_id:
            raise RuntimeError(
                f'Upon trying to index a document, OpenSearch responded with ID "{result_id}" '
                f'instead of "{document_chunk_id}" which is the ID it was given.'
            )
        result_string: str = result.get("result", "")
        match result_string:
            # Sanity check.
            case "created":
                pass
            case "updated":
                if not update_if_exists:
                    raise RuntimeError(
                        f'The OpenSearch client returned result "updated" for indexing document chunk "{document_chunk_id}". '
                        "This indicates that a document chunk with that ID already exists, which is not expected."
                    )
            case _:
                raise RuntimeError(
                    f'Unknown OpenSearch indexing result: "{result_string}".'
                )
        logger.debug(f"Successfully indexed {document_chunk_id}.")

    @log_function_time(
        print_only=True,
        debug_only=True,
        include_args_subset={
            "documents": len,
            "tenant_state": str,
            "update_if_exists": str,
        },
    )
    def bulk_index_documents(
        self,
        documents: list[DocumentChunk],
        tenant_state: TenantState,
        update_if_exists: bool = False,
    ) -> None:
        """Bulk indexes documents.

        Raises if there are any errors during the bulk index. It should be
        assumed that no documents in the batch were indexed successfully if
        there is an error.

        Retries on 429 too many requests.

        Args:
            documents: The documents to index. In Onyx this is a chunk of a
                document, OpenSearch simply refers to this as a document as
                well.
            tenant_state: The tenant state of the caller.
            update_if_exists: Whether to update the document if it already
                exists. If False, will raise an exception if the document
                already exists. Defaults to False.

        Raises:
            Exception: There was an error during the bulk index. This
                includes the case where a document with the same ID already
                exists if update_if_exists is False.
        """
        if not documents:
            return
        logger.debug(
            f"Bulk indexing {len(documents)} documents for tenant {tenant_state.tenant_id}. update_if_exists={update_if_exists}."
        )
        data = []
        for document in documents:
            document_chunk_id: str = get_opensearch_doc_chunk_id(
                tenant_state=tenant_state,
                document_id=document.document_id,
                chunk_index=document.chunk_index,
                max_chunk_size=document.max_chunk_size,
            )
            body: dict[str, Any] = document.model_dump(exclude_none=True)
            data_for_document: dict[str, Any] = {
                "_index": self._index_name,
                "_id": document_chunk_id,
                "_op_type": "index" if update_if_exists else "create",
                "_source": body,
            }
            data.append(data_for_document)
        # max_retries is the number of times to retry a request if we get a 429.
        success, errors = bulk(self._client, data, max_retries=3)
        if errors:
            raise RuntimeError(
                f"Failed to bulk index documents for index {self._index_name}. Errors: {errors}"
            )
        if success != len(documents):
            raise RuntimeError(
                f"OpenSearch reported no errors during bulk index but the number of successful operations "
                f"({success}) does not match the number of documents ({len(documents)})."
            )
        logger.debug(f"Successfully bulk indexed {len(documents)} documents.")

    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def delete_document(self, document_chunk_id: str) -> bool:
        """Deletes a document.

        Args:
            document_chunk_id: The OpenSearch ID of the document chunk to
                delete.

        Raises:
            Exception: There was an error deleting the document.

        Returns:
            True if the document was deleted, False if it was not found.
        """
        try:
            logger.debug(
                f"Trying to delete document chunk {document_chunk_id} from index {self._index_name}."
            )
            result = self._client.delete(index=self._index_name, id=document_chunk_id)
        except TransportError as e:
            if e.status_code == 404:
                logger.debug(
                    f"Document chunk {document_chunk_id} not found in index {self._index_name}."
                )
                return False
            else:
                raise e

        result_string: str = result.get("result", "")
        match result_string:
            case "deleted":
                logger.debug(
                    f"Successfully deleted document chunk {document_chunk_id} from index {self._index_name}."
                )
                return True
            case "not_found":
                logger.debug(
                    f"Document chunk {document_chunk_id} not found in index {self._index_name}."
                )
                return False
            case _:
                raise RuntimeError(
                    f'Unknown OpenSearch deletion result: "{result_string}".'
                )

    @log_function_time(print_only=True, debug_only=True)
    def delete_by_query(self, query_body: dict[str, Any]) -> int:
        """Deletes documents by a query.

        Args:
            query_body: The body of the query to delete documents by.

        Raises:
            Exception: There was an error deleting the documents.

        Returns:
            The number of documents deleted.
        """
        logger.debug(
            f"Trying to delete documents by query for index {self._index_name}."
        )
        result = self._client.delete_by_query(index=self._index_name, body=query_body)
        if result.get("timed_out", False):
            raise RuntimeError(
                f"Delete by query timed out for index {self._index_name}."
            )
        if len(result.get("failures", [])) > 0:
            raise RuntimeError(
                f"Failed to delete some or all of the documents for index {self._index_name}."
            )

        num_deleted = result.get("deleted", 0)
        num_processed = result.get("total", 0)
        if num_deleted != num_processed:
            raise RuntimeError(
                f"Failed to delete some or all of the documents for index {self._index_name}. "
                f"{num_deleted} documents were deleted out of {num_processed} documents that were processed."
            )

        logger.debug(
            f"Successfully deleted {num_deleted} documents by query for index {self._index_name}."
        )
        return num_deleted

    @log_function_time(
        print_only=True,
        debug_only=True,
        include_args_subset={
            "document_chunk_id": str,
            "properties_to_update": lambda x: x.keys(),
        },
    )
    def update_document(
        self, document_chunk_id: str, properties_to_update: dict[str, Any]
    ) -> None:
        """Updates an OpenSearch document chunk's properties.

        Args:
            document_chunk_id: The OpenSearch ID of the document chunk to
                update.
            properties_to_update: The properties of the document to update. Each
                property should exist in the schema.

        Raises:
            Exception: There was an error updating the document.
        """
        logger.debug(
            f"Trying to update document chunk {document_chunk_id} for index {self._index_name}."
        )
        update_body: dict[str, Any] = {"doc": properties_to_update}
        result = self._client.update(
            index=self._index_name,
            id=document_chunk_id,
            body=update_body,
            _source=False,
        )
        result_id = result.get("_id", "")
        # Sanity check.
        if result_id != document_chunk_id:
            raise RuntimeError(
                f'Upon trying to update a document, OpenSearch responded with ID "{result_id}" '
                f'instead of "{document_chunk_id}" which is the ID it was given.'
            )
        result_string: str = result.get("result", "")
        match result_string:
            # Sanity check.
            case "updated":
                logger.debug(
                    f"Successfully updated document chunk {document_chunk_id} for index {self._index_name}."
                )
                return
            case "noop":
                logger.warning(
                    f'OpenSearch reported a no-op when trying to update document with ID "{document_chunk_id}".'
                )
                return
            case _:
                raise RuntimeError(
                    f'The OpenSearch client returned result "{result_string}" for updating document chunk "{document_chunk_id}". '
                    "This is unexpected."
                )

    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def get_document(self, document_chunk_id: str) -> DocumentChunk:
        """Gets an OpenSearch document chunk.

        Will raise an exception if the document chunk is not found.

        Args:
            document_chunk_id: The OpenSearch ID of the document chunk to get.

        Raises:
            Exception: There was an error getting the document. This includes
                the case where the document is not found.

        Returns:
            The document chunk.
        """
        logger.debug(
            f"Trying to get document chunk {document_chunk_id} from index {self._index_name}."
        )
        result = self._client.get(index=self._index_name, id=document_chunk_id)
        found_result: bool = result.get("found", False)
        if not found_result:
            raise RuntimeError(
                f'Document chunk with ID "{document_chunk_id}" was not found.'
            )

        document_chunk_source: dict[str, Any] | None = result.get("_source")
        if not document_chunk_source:
            raise RuntimeError(
                f'Document chunk with ID "{document_chunk_id}" has no data.'
            )

        logger.debug(
            f"Successfully got document chunk {document_chunk_id} from index {self._index_name}."
        )
        return DocumentChunk.model_validate(document_chunk_source)

    @log_function_time(print_only=True, debug_only=True)
    def search(
        self,
        body: dict[str, Any],
        search_pipeline_id: str | None,
        search_type: OpenSearchSearchType = OpenSearchSearchType.UNKNOWN,
    ) -> list[SearchHit[DocumentChunkWithoutVectors]]:
        """Searches the index.

        NOTE: Does not return vector fields. In order to take advantage of
        performance benefits, the search body should exclude the schema's vector
        fields.

        TODO(andrei): Ideally we could check that every field in the body is
        present in the index, to avoid a class of runtime bugs that could easily
        be caught during development. Or change the function signature to accept
        a predefined pydantic model of allowed fields.

        Args:
            body: The body of the search request. See the OpenSearch
                documentation for more information on search request bodies.
            search_pipeline_id: The ID of the search pipeline to use. If None,
                the default search pipeline will be used.
            search_type: Label for Prometheus metrics. Does not affect search
                behavior.

        Raises:
            Exception: There was an error searching the index.

        Returns:
            List of search hits that match the search request.
        """
        logger.debug(
            f"Trying to search index {self._index_name} with search pipeline {search_pipeline_id}."
        )
        result: dict[str, Any]
        params = {"phase_took": "true"}
        ctx = self._get_emit_metrics_context_manager(search_type)
        t0 = time.perf_counter()
        with ctx:
            if search_pipeline_id:
                result = self._client.search(
                    index=self._index_name,
                    search_pipeline=search_pipeline_id,
                    body=body,
                    params=params,
                )
            else:
                result = self._client.search(
                    index=self._index_name, body=body, params=params
                )
        client_duration_s = time.perf_counter() - t0

        hits, time_took, timed_out, phase_took, profile = (
            self._get_hits_and_profile_from_search_result(result)
        )
        if self._emit_metrics:
            observe_opensearch_search(search_type, client_duration_s, time_took)
        self._log_search_result_perf(
            time_took=time_took,
            timed_out=timed_out,
            phase_took=phase_took,
            profile=profile,
            body=body,
            search_pipeline_id=search_pipeline_id,
            raise_on_timeout=True,
        )

        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []
        for hit in hits:
            document_chunk_source: dict[str, Any] | None = hit.get("_source")
            if not document_chunk_source:
                raise RuntimeError(
                    f'Document chunk with ID "{hit.get("_id", "")}" has no data.'
                )
            document_chunk_score = hit.get("_score", None)
            match_highlights: dict[str, list[str]] = hit.get("highlight", {})
            explanation: dict[str, Any] | None = hit.get("_explanation", None)
            search_hit = SearchHit[DocumentChunkWithoutVectors](
                document_chunk=DocumentChunkWithoutVectors.model_validate(
                    document_chunk_source
                ),
                score=document_chunk_score,
                match_highlights=match_highlights,
                explanation=explanation,
            )
            search_hits.append(search_hit)
        logger.debug(
            f"Successfully searched index {self._index_name} and got {len(search_hits)} hits."
        )
        return search_hits

    @log_function_time(print_only=True, debug_only=True)
    def search_for_document_ids(
        self,
        body: dict[str, Any],
        search_type: OpenSearchSearchType = OpenSearchSearchType.UNKNOWN,
    ) -> list[str]:
        """Searches the index and returns only document chunk IDs.

        In order to take advantage of the performance benefits of only returning
        IDs, the body should have a key, value pair of "_source": False.
        Otherwise, OpenSearch will return the entire document body and this
        method's performance will be the same as the search method's.

        TODO(andrei): Ideally we could check that every field in the body is
        present in the index, to avoid a class of runtime bugs that could easily
        be caught during development.

        Args:
            body: The body of the search request. See the OpenSearch
                documentation for more information on search request bodies.
                TODO(andrei): Make this a more deep interface; callers shouldn't
                need to know to set _source: False for example.
            search_type: Label for Prometheus metrics. Does not affect search
                behavior.

        Raises:
            Exception: There was an error searching the index.

        Returns:
            List of document chunk IDs that match the search request.
        """
        logger.debug(
            f"Trying to search for document chunk IDs in index {self._index_name}."
        )
        if "_source" not in body or body["_source"] is not False:
            logger.warning(
                "The body of the search request for document chunk IDs is missing the key, value pair of "
                '"_source": False. This query will therefore be inefficient.'
            )

        params = {"phase_took": "true"}
        ctx = self._get_emit_metrics_context_manager(search_type)
        t0 = time.perf_counter()
        with ctx:
            result: dict[str, Any] = self._client.search(
                index=self._index_name, body=body, params=params
            )
        client_duration_s = time.perf_counter() - t0

        hits, time_took, timed_out, phase_took, profile = (
            self._get_hits_and_profile_from_search_result(result)
        )
        if self._emit_metrics:
            observe_opensearch_search(search_type, client_duration_s, time_took)
        self._log_search_result_perf(
            time_took=time_took,
            timed_out=timed_out,
            phase_took=phase_took,
            profile=profile,
            body=body,
            raise_on_timeout=True,
        )

        # TODO(andrei): Implement scroll/point in time for results so that we
        # can return arbitrarily-many IDs.
        if len(hits) == DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW:
            logger.warning(
                "The search request for document chunk IDs returned the maximum number of results. "
                "It is extremely likely that there are more hits in OpenSearch than the returned results."
            )

        # Extract only the _id field from each hit.
        document_chunk_ids: list[str] = []
        for hit in hits:
            document_chunk_id = hit.get("_id")
            if not document_chunk_id:
                raise RuntimeError(
                    "Received a hit from OpenSearch but the _id field is missing."
                )
            document_chunk_ids.append(document_chunk_id)
        logger.debug(
            f"Successfully searched for document chunk IDs in index {self._index_name} and got {len(document_chunk_ids)} hits."
        )
        return document_chunk_ids

    @log_function_time(print_only=True, debug_only=True)
    def refresh_index(self) -> None:
        """Refreshes the index to make recent changes searchable.

        In OpenSearch, documents are not immediately searchable after indexing.
        This method forces a refresh to make them available for search.

        Raises:
            Exception: There was an error refreshing the index.
        """
        self._client.indices.refresh(index=self._index_name)

    def _get_hits_and_profile_from_search_result(
        self, result: dict[str, Any]
    ) -> tuple[list[Any], int | None, bool | None, dict[str, Any], dict[str, Any]]:
        """Extracts the hits and profiling information from a search result.

        Args:
            result: The search result to extract the hits from.

        Raises:
            Exception: There was an error extracting the hits from the search
                result.

        Returns:
            A tuple containing the hits from the search result, the time taken
                to execute the search in milliseconds, whether the search timed
                out, the time taken to execute each phase of the search, and the
                profile.
        """
        time_took: int | None = result.get("took")
        timed_out: bool | None = result.get("timed_out")
        phase_took: dict[str, Any] = result.get("phase_took", {})
        profile: dict[str, Any] = result.get("profile", {})

        hits_first_layer: dict[str, Any] = result.get("hits", {})
        if not hits_first_layer:
            raise RuntimeError(
                f"Hits field missing from response when trying to search index {self._index_name}."
            )
        hits_second_layer: list[Any] = hits_first_layer.get("hits", [])

        return hits_second_layer, time_took, timed_out, phase_took, profile

    def _log_search_result_perf(
        self,
        time_took: int | None,
        timed_out: bool | None,
        phase_took: dict[str, Any],
        profile: dict[str, Any],
        body: dict[str, Any],
        search_pipeline_id: str | None = None,
        raise_on_timeout: bool = False,
    ) -> None:
        """Logs the performance of a search result.

        Args:
            time_took: The time taken to execute the search in milliseconds.
            timed_out: Whether the search timed out.
            phase_took: The time taken to execute each phase of the search.
            profile: The profile for the search.
            body: The body of the search request for logging.
            search_pipeline_id: The ID of the search pipeline used for the
                search, if any, for logging. Defaults to None.
            raise_on_timeout: Whether to raise an exception if the search timed
                out. Note that the result may still contain useful partial
                results. Defaults to False.

        Raises:
            Exception: If raise_on_timeout is True and the search timed out.
        """
        if time_took and time_took > CLIENT_THRESHOLD_TO_LOG_SLOW_SEARCH_MS:
            logger.warning(
                f"OpenSearch client warning: Search for index {self._index_name} took {time_took} milliseconds.\n"
                f"Body: {get_new_body_without_vectors(body)}\n"
                f"Search pipeline ID: {search_pipeline_id}\n"
                f"Phase took: {phase_took}\n"
                f"Profile: {json.dumps(profile, indent=2)}\n"
            )
        if timed_out:
            error_str = f"OpenSearch client error: Search timed out for index {self._index_name}."
            logger.error(error_str)
            if raise_on_timeout:
                raise RuntimeError(error_str)

    def _get_emit_metrics_context_manager(
        self, search_type: OpenSearchSearchType
    ) -> AbstractContextManager[None]:
        """
        Returns a context manager that tracks in-flight OpenSearch searches via
        a Gauge if emit_metrics is True, otherwise returns a null context
        manager.
        """
        return (
            track_opensearch_search_in_progress(search_type)
            if self._emit_metrics
            else nullcontext()
        )


def wait_for_opensearch_with_timeout(
    wait_interval_s: int = 5,
    wait_limit_s: int = 60,
    client: OpenSearchClient | None = None,
) -> bool:
    """Waits for OpenSearch to become ready subject to a timeout.

    Will create a new dummy client if no client is provided. Will close this
    client at the end of the function. Will not close the client if it was
    supplied.

    Args:
        wait_interval_s: The interval in seconds to wait between checks.
            Defaults to 5.
        wait_limit_s: The total timeout in seconds to wait for OpenSearch to
            become ready. Defaults to 60.
        client: The OpenSearch client to use for pinging. If None, a new dummy
            client will be created. Defaults to None.

    Returns:
        True if OpenSearch is ready, False otherwise.
    """
    with nullcontext(client) if client else OpenSearchClient() as client:
        time_start = time.monotonic()
        while True:
            if client.ping():
                logger.info("[OpenSearch] Readiness probe succeeded. Continuing...")
                return True
            time_elapsed = time.monotonic() - time_start
            if time_elapsed > wait_limit_s:
                logger.info(
                    f"[OpenSearch] Readiness probe did not succeed within the timeout ({wait_limit_s} seconds)."
                )
                return False
            logger.info(
                f"[OpenSearch] Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit_s:.1f}"
            )
            time.sleep(wait_interval_s)


================================================
FILE: backend/onyx/document_index/opensearch/cluster_settings.py
================================================
from typing import Any

OPENSEARCH_CLUSTER_SETTINGS: dict[str, Any] = {
    "persistent": {
        # By default, when you index a document to a non-existent index,
        # OpenSearch will automatically create the index. This behavior is
        # undesirable so this function exposes the ability to disable it.
        # See
        # https://docs.opensearch.org/latest/install-and-configure/configuring-opensearch/index/#updating-cluster-settings-using-the-api
        "action.auto_create_index": False,
        # Thresholds for OpenSearch to log slow queries at the server level.
        "cluster.search.request.slowlog.level": "INFO",
        "cluster.search.request.slowlog.threshold.warn": "5s",
        "cluster.search.request.slowlog.threshold.info": "2s",
        "cluster.search.request.slowlog.threshold.debug": "1s",
        "cluster.search.request.slowlog.threshold.trace": "500ms",
    }
}


================================================
FILE: backend/onyx/document_index/opensearch/constants.py
================================================
# Default value for the maximum number of tokens a chunk can hold, if none is
# specified when creating an index.
import os
from enum import Enum


DEFAULT_MAX_CHUNK_SIZE = 512


# By default OpenSearch will only return a maximum of this many results in a
# given search. This value is configurable in the index settings.
DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW = 10_000


# For documents which do not have a value for LAST_UPDATED_FIELD_NAME, we assume
# that the document was last updated this many days ago for the purpose of time
# cutoff filtering during retrieval.
ASSUMED_DOCUMENT_AGE_DAYS = 90


# Size of the dynamic list used to consider elements during kNN graph creation.
# Higher values improve search quality but increase indexing time. Values
# typically range between 100 - 512.
EF_CONSTRUCTION = 256
# Number of bi-directional links per element. Higher values improve search
# quality but increase memory footprint. Values typically range between 12 - 48.
M = 32  # Set relatively high for better accuracy.

# When performing hybrid search, we need to consider more candidates than the
# number of results to be returned. This is because the scoring is hybrid and
# the results are reordered due to the hybrid scoring. Higher = more candidates
# for hybrid fusion = better retrieval accuracy, but results in more computation
# per query. Imagine a simple case with a single keyword query and a single
# vector query and we want 10 final docs. If we only fetch 10 candidates from
# each of keyword and vector, they would have to have perfect overlap to get a
# good hybrid ranking for the 10 results. If we fetch 1000 candidates from each,
# we have a much higher chance of all 10 of the final desired docs showing up
# and getting scored. In worse situations, the final 10 docs don't even show up
# as the final 10 (worse than just a miss at the reranking step).
# Defaults to 500 for now. Initially this defaulted to 750 but we were seeing
# poor search performance; bumped from 100 to 500 to improve recall.
DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES = int(
    os.environ.get("DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES", 500)
)

# Number of vectors to examine to decide the top k neighbors for the HNSW
# method.
# NOTE: "When creating a search query, you must specify k. If you provide both k
# and ef_search, then the larger value is passed to the engine. If ef_search is
# larger than k, you can provide the size parameter to limit the final number of
# results to k." from
# https://docs.opensearch.org/latest/query-dsl/specialized/k-nn/index/#ef_search
EF_SEARCH = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES


class OpenSearchSearchType(str, Enum):
    """Search type label used for Prometheus metrics."""

    HYBRID = "hybrid"
    KEYWORD = "keyword"
    SEMANTIC = "semantic"
    RANDOM = "random"
    DOC_ID_RETRIEVAL = "doc_id_retrieval"
    UNKNOWN = "unknown"


class HybridSearchSubqueryConfiguration(Enum):
    TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD = 1
    # Current default.
    CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD = 2


# Will raise and block application start if HYBRID_SEARCH_SUBQUERY_CONFIGURATION
# is set but not a valid value. If not set, defaults to
# CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD.
HYBRID_SEARCH_SUBQUERY_CONFIGURATION: HybridSearchSubqueryConfiguration = (
    HybridSearchSubqueryConfiguration(
        int(os.environ["HYBRID_SEARCH_SUBQUERY_CONFIGURATION"])
    )
    if os.environ.get("HYBRID_SEARCH_SUBQUERY_CONFIGURATION", None) is not None
    else HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
)


class HybridSearchNormalizationPipeline(Enum):
    # Current default.
    MIN_MAX = 1
    # NOTE: Using z-score normalization is better for hybrid search from a
    # theoretical standpoint. Empirically on a small dataset of up to 10K docs,
    # it's not very different. Likely more impactful at scale.
    # https://opensearch.org/blog/introducing-the-z-score-normalization-technique-for-hybrid-search/
    ZSCORE = 2


# Will raise and block application start if HYBRID_SEARCH_NORMALIZATION_PIPELINE
# is set but not a valid value. If not set, defaults to MIN_MAX.
HYBRID_SEARCH_NORMALIZATION_PIPELINE: HybridSearchNormalizationPipeline = (
    HybridSearchNormalizationPipeline(
        int(os.environ["HYBRID_SEARCH_NORMALIZATION_PIPELINE"])
    )
    if os.environ.get("HYBRID_SEARCH_NORMALIZATION_PIPELINE", None) is not None
    else HybridSearchNormalizationPipeline.MIN_MAX
)


================================================
FILE: backend/onyx/document_index/opensearch/opensearch_document_index.py
================================================
import json
from collections.abc import Iterable
from typing import Any

import httpx
from opensearchpy import NotFoundError

from onyx.access.models import DocumentAccess
from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
from onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT
from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_experts_stores_representations,
)
from onyx.connectors.models import convert_metadata_list_of_strings_to_dict
from onyx.context.search.enums import QueryType
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.context.search.models import QueryExpansionType
from onyx.db.enums import EmbeddingPrecision
from onyx.db.models import DocumentSource
from onyx.document_index.chunk_content_enrichment import cleanup_content_for_chunks
from onyx.document_index.chunk_content_enrichment import (
    generate_enriched_content_for_chunk_text,
)
from onyx.document_index.interfaces import DocumentIndex as OldDocumentIndex
from onyx.document_index.interfaces import (
    DocumentInsertionRecord as OldDocumentInsertionRecord,
)
from onyx.document_index.interfaces import IndexBatchParams
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.document_index.interfaces_new import DocumentIndex
from onyx.document_index.interfaces_new import DocumentInsertionRecord
from onyx.document_index.interfaces_new import DocumentSectionRequest
from onyx.document_index.interfaces_new import IndexingMetadata
from onyx.document_index.interfaces_new import MetadataUpdateRequest
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.client import OpenSearchClient
from onyx.document_index.opensearch.client import OpenSearchIndexClient
from onyx.document_index.opensearch.client import SearchHit
from onyx.document_index.opensearch.cluster_settings import OPENSEARCH_CLUSTER_SETTINGS
from onyx.document_index.opensearch.constants import OpenSearchSearchType
from onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME
from onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME
from onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
from onyx.document_index.opensearch.schema import DocumentSchema
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.schema import GLOBAL_BOOST_FIELD_NAME
from onyx.document_index.opensearch.schema import HIDDEN_FIELD_NAME
from onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME
from onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME
from onyx.document_index.opensearch.search import DocumentQuery
from onyx.document_index.opensearch.search import (
    get_min_max_normalization_pipeline_name_and_config,
)
from onyx.document_index.opensearch.search import (
    get_normalization_pipeline_name_and_config,
)
from onyx.document_index.opensearch.search import (
    get_zscore_normalization_pipeline_name_and_config,
)
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import Document
from onyx.utils.logger import setup_logger
from onyx.utils.text_processing import remove_invalid_unicode_chars
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
from shared_configs.model_server_models import Embedding


logger = setup_logger(__name__)


class ChunkCountNotFoundError(ValueError):
    """Raised when a document has no chunk count."""


def generate_opensearch_filtered_access_control_list(
    access: DocumentAccess,
) -> list[str]:
    """Generates an access control list with PUBLIC_DOC_PAT removed.

    In the OpenSearch schema this is represented by PUBLIC_FIELD_NAME.
    """
    access_control_list = access.to_acl()
    access_control_list.discard(PUBLIC_DOC_PAT)
    return list(access_control_list)


def set_cluster_state(client: OpenSearchClient) -> None:
    if not client.put_cluster_settings(settings=OPENSEARCH_CLUSTER_SETTINGS):
        logger.error(
            "Failed to put cluster settings. If the settings have never been set before, "
            "this may cause unexpected index creation when indexing documents into an "
            "index that does not exist, or may cause expected logs to not appear. If this "
            "is not the first time running Onyx against this instance of OpenSearch, these "
            "settings have likely already been set. Not taking any further action..."
        )
    min_max_normalization_pipeline_name, min_max_normalization_pipeline_config = (
        get_min_max_normalization_pipeline_name_and_config()
    )
    zscore_normalization_pipeline_name, zscore_normalization_pipeline_config = (
        get_zscore_normalization_pipeline_name_and_config()
    )
    client.create_search_pipeline(
        pipeline_id=min_max_normalization_pipeline_name,
        pipeline_body=min_max_normalization_pipeline_config,
    )
    client.create_search_pipeline(
        pipeline_id=zscore_normalization_pipeline_name,
        pipeline_body=zscore_normalization_pipeline_config,
    )


def _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
    chunk: DocumentChunkWithoutVectors,
    score: float | None,
    highlights: dict[str, list[str]],
) -> InferenceChunkUncleaned:
    """
    Generates an inference chunk from an OpenSearch document chunk, its score,
    and its match highlights.

    Args:
        chunk: The document chunk returned by OpenSearch.
        score: The document chunk match score as calculated by OpenSearch. Only
            relevant for searches like hybrid search. It is acceptable for this
            value to be None for results from other queries like ID-based
            retrieval as a match score makes no sense in those contexts.
        highlights: Maps schema property name to a list of highlighted snippets
            with match terms wrapped in tags (e.g. "something <hi>keyword</hi>
            other thing").

    Returns:
        An Onyx inference chunk representation.
    """
    return InferenceChunkUncleaned(
        chunk_id=chunk.chunk_index,
        blurb=chunk.blurb,
        # Includes extra content prepended/appended during indexing.
        content=chunk.content,
        # When we read a string and turn it into a dict the keys will be
        # strings, but in this case they need to be ints.
        source_links=(
            {int(k): v for k, v in json.loads(chunk.source_links).items()}
            if chunk.source_links
            else None
        ),
        image_file_id=chunk.image_file_id,
        # Deprecated. Fill in some reasonable default.
        section_continuation=False,
        document_id=chunk.document_id,
        source_type=DocumentSource(chunk.source_type),
        semantic_identifier=chunk.semantic_identifier,
        title=chunk.title,
        boost=chunk.global_boost,
        score=score,
        hidden=chunk.hidden,
        metadata=(
            convert_metadata_list_of_strings_to_dict(chunk.metadata_list)
            if chunk.metadata_list
            else {}
        ),
        # Extract highlighted snippets from the content field, if available. In
        # the future we may want to match on other fields too, currently we only
        # use the content field.
        match_highlights=highlights.get(CONTENT_FIELD_NAME, []),
        # TODO(andrei) Consider storing a chunk content index instead of a full
        # string when working on chunk content augmentation.
        doc_summary=chunk.doc_summary,
        # TODO(andrei) Same thing as above.
        chunk_context=chunk.chunk_context,
        updated_at=chunk.last_updated,
        primary_owners=chunk.primary_owners,
        secondary_owners=chunk.secondary_owners,
        # TODO(andrei) Same thing as chunk_context above.
        metadata_suffix=chunk.metadata_suffix,
    )


def _convert_onyx_chunk_to_opensearch_document(
    chunk: DocMetadataAwareIndexChunk,
) -> DocumentChunk:
    filtered_blurb = remove_invalid_unicode_chars(chunk.blurb)
    _title = chunk.source_document.get_title_for_document_index()
    filtered_title = remove_invalid_unicode_chars(_title) if _title else None
    filtered_content = remove_invalid_unicode_chars(
        generate_enriched_content_for_chunk_text(chunk)
    )
    filtered_semantic_identifier = remove_invalid_unicode_chars(
        chunk.source_document.semantic_identifier
    )
    filtered_metadata_suffix = remove_invalid_unicode_chars(
        chunk.metadata_suffix_keyword
    )
    _metadata_list = chunk.source_document.get_metadata_str_attributes()
    filtered_metadata_list = (
        [remove_invalid_unicode_chars(metadata) for metadata in _metadata_list]
        if _metadata_list
        else None
    )
    return DocumentChunk(
        document_id=chunk.source_document.id,
        chunk_index=chunk.chunk_id,
        # Use get_title_for_document_index to match the logic used when creating
        # the title_embedding in the embedder. This method falls back to
        # semantic_identifier when title is None (but not empty string).
        title=filtered_title,
        title_vector=chunk.title_embedding,
        content=filtered_content,
        content_vector=chunk.embeddings.full_embedding,
        source_type=chunk.source_document.source.value,
        metadata_list=filtered_metadata_list,
        metadata_suffix=filtered_metadata_suffix,
        last_updated=chunk.source_document.doc_updated_at,
        public=chunk.access.is_public,
        access_control_list=generate_opensearch_filtered_access_control_list(
            chunk.access
        ),
        global_boost=chunk.boost,
        semantic_identifier=filtered_semantic_identifier,
        image_file_id=chunk.image_file_id,
        # Small optimization, if this list is empty we can supply None to
        # OpenSearch and it will not store any data at all for this field, which
        # is different from supplying an empty list.
        source_links=json.dumps(chunk.source_links) if chunk.source_links else None,
        blurb=filtered_blurb,
        doc_summary=chunk.doc_summary,
        chunk_context=chunk.chunk_context,
        # Small optimization, if this list is empty we can supply None to
        # OpenSearch and it will not store any data at all for this field, which
        # is different from supplying an empty list.
        document_sets=list(chunk.document_sets) if chunk.document_sets else None,
        # Small optimization, if this list is empty we can supply None to
        # OpenSearch and it will not store any data at all for this field, which
        # is different from supplying an empty list.
        user_projects=chunk.user_project or None,
        personas=chunk.personas or None,
        primary_owners=get_experts_stores_representations(
            chunk.source_document.primary_owners
        ),
        secondary_owners=get_experts_stores_representations(
            chunk.source_document.secondary_owners
        ),
        # TODO(andrei): Consider not even getting this from
        # DocMetadataAwareIndexChunk and instead using OpenSearchDocumentIndex's
        # instance variable. One source of truth -> less chance of a very bad
        # bug in prod.
        tenant_id=TenantState(tenant_id=chunk.tenant_id, multitenant=MULTI_TENANT),
        # Store ancestor hierarchy node IDs for hierarchy-based filtering.
        ancestor_hierarchy_node_ids=chunk.ancestor_hierarchy_node_ids or None,
    )


class OpenSearchOldDocumentIndex(OldDocumentIndex):
    """
    Wrapper for OpenSearch to adapt the new DocumentIndex interface with
    invocations to the old DocumentIndex interface in the hotpath.

    The analogous class for Vespa is VespaIndex which calls to
    VespaDocumentIndex.

    TODO(andrei): This is very dumb and purely temporary until there are no more
    references to the old interface in the hotpath.
    """

    def __init__(
        self,
        index_name: str,
        embedding_dim: int,
        embedding_precision: EmbeddingPrecision,
        secondary_index_name: str | None,
        secondary_embedding_dim: int | None,
        secondary_embedding_precision: EmbeddingPrecision | None,
        # NOTE: We do not support large chunks right now.
        large_chunks_enabled: bool,  # noqa: ARG002
        secondary_large_chunks_enabled: bool | None,  # noqa: ARG002
        multitenant: bool = False,
        httpx_client: httpx.Client | None = None,  # noqa: ARG002
    ) -> None:
        super().__init__(
            index_name=index_name,
            secondary_index_name=secondary_index_name,
        )
        if multitenant != MULTI_TENANT:
            raise ValueError(
                "Bug: Multitenant mismatch when initializing an OpenSearchDocumentIndex. "
                f"Expected {MULTI_TENANT}, got {multitenant}."
            )
        tenant_id = get_current_tenant_id()
        tenant_state = TenantState(tenant_id=tenant_id, multitenant=multitenant)
        self._real_index = OpenSearchDocumentIndex(
            tenant_state=tenant_state,
            index_name=index_name,
            embedding_dim=embedding_dim,
            embedding_precision=embedding_precision,
        )
        self._secondary_real_index: OpenSearchDocumentIndex | None = None
        if self.secondary_index_name:
            if secondary_embedding_dim is None or secondary_embedding_precision is None:
                raise ValueError(
                    "Bug: Secondary index embedding dimension and precision are not set."
                )
            self._secondary_real_index = OpenSearchDocumentIndex(
                tenant_state=tenant_state,
                index_name=self.secondary_index_name,
                embedding_dim=secondary_embedding_dim,
                embedding_precision=secondary_embedding_precision,
            )

    @staticmethod
    def register_multitenant_indices(
        indices: list[str],
        embedding_dims: list[int],
        embedding_precisions: list[EmbeddingPrecision],
    ) -> None:
        raise NotImplementedError(
            "Bug: Multitenant index registration is not supported for OpenSearch."
        )

    def ensure_indices_exist(
        self,
        primary_embedding_dim: int,
        primary_embedding_precision: EmbeddingPrecision,
        secondary_index_embedding_dim: int | None,
        secondary_index_embedding_precision: EmbeddingPrecision | None,
    ) -> None:
        self._real_index.verify_and_create_index_if_necessary(
            primary_embedding_dim, primary_embedding_precision
        )
        if self.secondary_index_name:
            if (
                secondary_index_embedding_dim is None
                or secondary_index_embedding_precision is None
            ):
                raise ValueError(
                    "Bug: Secondary index embedding dimension and precision are not set."
                )
            assert (
                self._secondary_real_index is not None
            ), "Bug: Secondary index is not initialized."
            self._secondary_real_index.verify_and_create_index_if_necessary(
                secondary_index_embedding_dim, secondary_index_embedding_precision
            )

    def index(
        self,
        chunks: Iterable[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
        """
        NOTE: Do NOT consider the secondary index here. A separate indexing
        pipeline will be responsible for indexing to the secondary index. This
        design is not ideal and we should reconsider this when revamping index
        swapping.
        """
        # Convert IndexBatchParams to IndexingMetadata.
        chunk_counts: dict[str, IndexingMetadata.ChunkCounts] = {}
        for doc_id in index_batch_params.doc_id_to_new_chunk_cnt:
            old_count = index_batch_params.doc_id_to_previous_chunk_cnt[doc_id]
            new_count = index_batch_params.doc_id_to_new_chunk_cnt[doc_id]
            chunk_counts[doc_id] = IndexingMetadata.ChunkCounts(
                old_chunk_cnt=old_count,
                new_chunk_cnt=new_count,
            )

        indexing_metadata = IndexingMetadata(doc_id_to_chunk_cnt_diff=chunk_counts)

        results = self._real_index.index(chunks, indexing_metadata)

        # Convert list[DocumentInsertionRecord] to
        # set[OldDocumentInsertionRecord].
        return {
            OldDocumentInsertionRecord(
                document_id=record.document_id,
                already_existed=record.already_existed,
            )
            for record in results
        }

    def delete_single(
        self,
        doc_id: str,
        *,
        tenant_id: str,  # noqa: ARG002
        chunk_count: int | None,
    ) -> int:
        """
        NOTE: Remember to handle the secondary index here. There is no separate
        pipeline for deleting chunks in the secondary index. This design is not
        ideal and we should reconsider this when revamping index swapping.
        """
        total_chunks_deleted = self._real_index.delete(doc_id, chunk_count)
        if self.secondary_index_name:
            assert (
                self._secondary_real_index is not None
            ), "Bug: Secondary index is not initialized."
            total_chunks_deleted += self._secondary_real_index.delete(
                doc_id, chunk_count
            )
        return total_chunks_deleted

    def update_single(
        self,
        doc_id: str,
        *,
        tenant_id: str,  # noqa: ARG002
        chunk_count: int | None,
        fields: VespaDocumentFields | None,
        user_fields: VespaDocumentUserFields | None,
    ) -> None:
        """
        NOTE: Remember to handle the secondary index here. There is no separate
        pipeline for updating chunks in the secondary index. This design is not
        ideal and we should reconsider this when revamping index swapping.
        """
        if fields is None and user_fields is None:
            logger.warning(
                f"Tried to update document {doc_id} with no updated fields or user fields."
            )
            return

        # Convert VespaDocumentFields to MetadataUpdateRequest.
        update_request = MetadataUpdateRequest(
            document_ids=[doc_id],
            doc_id_to_chunk_cnt={
                doc_id: chunk_count if chunk_count is not None else -1
            },
            access=fields.access if fields else None,
            document_sets=fields.document_sets if fields else None,
            boost=fields.boost if fields else None,
            hidden=fields.hidden if fields else None,
            project_ids=(
                set(user_fields.user_projects)
                # NOTE: Empty user_projects is semantically different from None
                # user_projects.
                if user_fields and user_fields.user_projects is not None
                else None
            ),
            persona_ids=(
                set(user_fields.personas)
                # NOTE: Empty personas is semantically different from None
                # personas.
                if user_fields and user_fields.personas is not None
                else None
            ),
        )

        try:
            self._real_index.update([update_request])
            if self.secondary_index_name:
                assert (
                    self._secondary_real_index is not None
                ), "Bug: Secondary index is not initialized."
                self._secondary_real_index.update([update_request])
        except NotFoundError:
            logger.exception(
                f"Tried to update document {doc_id} but at least one of its chunks was not found in OpenSearch. "
                "This is likely due to it not having been indexed yet. Skipping update for now..."
            )
            return
        except ChunkCountNotFoundError:
            logger.exception(
                f"Tried to update document {doc_id} but its chunk count is not known. We tolerate this for now "
                "but this will not be an acceptable state once OpenSearch is the primary document index and the "
                "indexing/updating race condition is fixed."
            )
            return

    def id_based_retrieval(
        self,
        chunk_requests: list[VespaChunkRequest],
        filters: IndexFilters,
        batch_retrieval: bool = False,
        get_large_chunks: bool = False,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        section_requests = [
            DocumentSectionRequest(
                document_id=req.document_id,
                min_chunk_ind=req.min_chunk_ind,
                max_chunk_ind=req.max_chunk_ind,
            )
            for req in chunk_requests
        ]

        return self._real_index.id_based_retrieval(
            section_requests, filters, batch_retrieval
        )

    def hybrid_retrieval(
        self,
        query: str,
        query_embedding: Embedding,
        final_keywords: list[str] | None,
        filters: IndexFilters,
        hybrid_alpha: float,
        time_decay_multiplier: float,  # noqa: ARG002
        num_to_retrieve: int,
        ranking_profile_type: QueryExpansionType = QueryExpansionType.SEMANTIC,  # noqa: ARG002
        title_content_ratio: float | None = TITLE_CONTENT_RATIO,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        # Determine query type based on hybrid_alpha.
        if hybrid_alpha >= 0.8:
            query_type = QueryType.SEMANTIC
        elif hybrid_alpha <= 0.2:
            query_type = QueryType.KEYWORD
        else:
            query_type = QueryType.SEMANTIC  # Default to semantic for hybrid.

        return self._real_index.hybrid_retrieval(
            query=query,
            query_embedding=query_embedding,
            final_keywords=final_keywords,
            query_type=query_type,
            filters=filters,
            num_to_retrieve=num_to_retrieve,
        )

    def admin_retrieval(
        self,
        query: str,
        query_embedding: Embedding,
        filters: IndexFilters,
        num_to_retrieve: int = NUM_RETURNED_HITS,
    ) -> list[InferenceChunk]:
        return self._real_index.hybrid_retrieval(
            query=query,
            query_embedding=query_embedding,
            final_keywords=None,
            query_type=QueryType.KEYWORD,
            filters=filters,
            num_to_retrieve=num_to_retrieve,
        )

    def random_retrieval(
        self,
        filters: IndexFilters,
        num_to_retrieve: int = 10,
    ) -> list[InferenceChunk]:
        return self._real_index.random_retrieval(
            filters=filters,
            num_to_retrieve=num_to_retrieve,
            dirty=None,
        )


class OpenSearchDocumentIndex(DocumentIndex):
    """OpenSearch-specific implementation of the DocumentIndex interface.

    This class provides document indexing, retrieval, and management operations
    for an OpenSearch search engine instance. It handles the complete lifecycle
    of document chunks within a specific OpenSearch index/schema.

    Each kind of embedding used should correspond to a different instance of
    this class, and therefore a different index in OpenSearch.

    If in a multitenant environment and
    VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT, will verify and create the index
    if necessary on initialization. This is because there is no logic which runs
    on cluster restart which scans through all search settings over all tenants
    and creates the relevant indices.

    Args:
        tenant_state: The tenant state of the caller.
        index_name: The name of the index to interact with.
        embedding_dim: The dimensionality of the embeddings used for the index.
        embedding_precision: The precision of the embeddings used for the index.
    """

    def __init__(
        self,
        tenant_state: TenantState,
        index_name: str,
        embedding_dim: int,
        embedding_precision: EmbeddingPrecision,
    ) -> None:
        self._index_name: str = index_name
        self._tenant_state: TenantState = tenant_state
        self._client = OpenSearchIndexClient(index_name=self._index_name)

        if self._tenant_state.multitenant and VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT:
            self.verify_and_create_index_if_necessary(
                embedding_dim=embedding_dim, embedding_precision=embedding_precision
            )

    def verify_and_create_index_if_necessary(
        self,
        embedding_dim: int,
        embedding_precision: EmbeddingPrecision,  # noqa: ARG002
    ) -> None:
        """Verifies and creates the index if necessary.

        Also puts the desired cluster settings if not in a multitenant
        environment.

        Also puts the desired search pipeline state if not in a multitenant
        environment, creating the pipelines if they do not exist and updating
        them otherwise.

        In a multitenant environment, the above steps happen explicitly on
        setup.

        Args:
            embedding_dim: Vector dimensionality for the vector similarity part
                of the search.
            embedding_precision: Precision of the values of the vectors for the
                similarity part of the search.

        Raises:
            Exception: There was an error verifying or creating the index or
                search pipelines.
        """
        logger.debug(
            f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if "
            f"necessary, with embedding dimension {embedding_dim}."
        )

        if not self._tenant_state.multitenant:
            set_cluster_state(self._client)

        expected_mappings = DocumentSchema.get_document_schema(
            embedding_dim, self._tenant_state.multitenant
        )

        if not self._client.index_exists():
            index_settings = DocumentSchema.get_index_settings_based_on_environment()
            self._client.create_index(
                mappings=expected_mappings,
                settings=index_settings,
            )
        else:
            # Ensure schema is up to date by applying the current mappings.
            try:
                self._client.put_mapping(expected_mappings)
            except Exception as e:
                logger.error(
                    f"Failed to update mappings for index {self._index_name}. This likely means a "
                    f"field type was changed which requires reindexing. Error: {e}"
                )
                raise

    def index(
        self,
        chunks: Iterable[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
        """Indexes an iterable of document chunks into the document index.

        Groups chunks by document ID and for each document, deletes existing
        chunks and indexes the new chunks in bulk.

        NOTE: It is assumed that chunks for a given document are not spread out
        over multiple index() calls.

        Args:
            chunks: Document chunks with all of the information needed for
                indexing to the document index.
            indexing_metadata: Information about chunk counts for efficient
                cleaning / updating.

        Raises:
            Exception: Failed to index some or all of the chunks for the
                specified documents.

        Returns:
            List of document IDs which map to unique documents as well as if the
                document is newly indexed or had already existed and was just
                updated.
        """
        total_chunks = sum(
            cc.new_chunk_cnt
            for cc in indexing_metadata.doc_id_to_chunk_cnt_diff.values()
        )
        logger.debug(
            f"[OpenSearchDocumentIndex] Indexing {total_chunks} chunks from {len(indexing_metadata.doc_id_to_chunk_cnt_diff)} "
            f"documents for index {self._index_name}."
        )

        document_indexing_results: list[DocumentInsertionRecord] = []
        deleted_doc_ids: set[str] = set()
        # Buffer chunks per document as they arrive from the iterable.
        # When the document ID changes flush the buffered chunks.
        current_doc_id: str | None = None
        current_chunks: list[DocMetadataAwareIndexChunk] = []

        def _flush_chunks(doc_chunks: list[DocMetadataAwareIndexChunk]) -> None:
            assert len(doc_chunks) > 0, "doc_chunks is empty"

            # Create a batch of OpenSearch-formatted chunks for bulk insertion.
            # Since we are doing this in batches, an error occurring midway
            # can result in a state where chunks are deleted and not all the
            # new chunks have been indexed.
            chunk_batch: list[DocumentChunk] = [
                _convert_onyx_chunk_to_opensearch_document(chunk)
                for chunk in doc_chunks
            ]
            onyx_document: Document = doc_chunks[0].source_document
            # First delete the doc's chunks from the index. This is so that
            # there are no dangling chunks in the index, in the event that the
            # new document's content contains fewer chunks than the previous
            # content.
            # TODO(andrei): This can possibly be made more efficient by checking
            # if the chunk count has actually decreased. This assumes that
            # overlapping chunks are perfectly overwritten. If we can't
            # guarantee that then we need the code as-is.
            if onyx_document.id not in deleted_doc_ids:
                num_chunks_deleted = self.delete(
                    onyx_document.id, onyx_document.chunk_count
                )
                deleted_doc_ids.add(onyx_document.id)
                # If we see that chunks were deleted we assume the doc already
                # existed. We record the result before bulk_index_documents
                # runs. If indexing raises, this entire result list is discarded
                # by the caller's retry logic, so early recording is safe.
                document_indexing_results.append(
                    DocumentInsertionRecord(
                        document_id=onyx_document.id,
                        already_existed=num_chunks_deleted > 0,
                    )
                )
            # Now index. This will raise if a chunk of the same ID exists, which
            # we do not expect because we should have deleted all chunks.
            self._client.bulk_index_documents(
                documents=chunk_batch,
                tenant_state=self._tenant_state,
            )

        for chunk in chunks:
            doc_id = chunk.source_document.id
            if doc_id != current_doc_id:
                if current_chunks:
                    _flush_chunks(current_chunks)
                current_doc_id = doc_id
                current_chunks = [chunk]
            elif len(current_chunks) >= MAX_CHUNKS_PER_DOC_BATCH:
                _flush_chunks(current_chunks)
                current_chunks = [chunk]
            else:
                current_chunks.append(chunk)

        if current_chunks:
            _flush_chunks(current_chunks)

        return document_indexing_results

    def delete(
        self,
        document_id: str,
        chunk_count: int | None = None,  # noqa: ARG002
    ) -> int:
        """Deletes all chunks for a given document.

        Does nothing if the specified document ID does not exist.

        TODO(andrei): Consider implementing this method to delete on document
        chunk IDs vs querying for matching document chunks. Unclear if this is
        any better though.

        Args:
            document_id: The unique identifier for the document as represented
                in Onyx, not necessarily in the document index.
            chunk_count: The number of chunks in OpenSearch for the document.
                Defaults to None.

        Raises:
            Exception: Failed to delete some or all of the chunks for the
                document.

        Returns:
            The number of chunks successfully deleted.
        """
        logger.debug(
            f"[OpenSearchDocumentIndex] Deleting document {document_id} from index {self._index_name}."
        )
        query_body = DocumentQuery.delete_from_document_id_query(
            document_id=document_id,
            tenant_state=self._tenant_state,
        )

        return self._client.delete_by_query(query_body)

    def update(
        self,
        update_requests: list[MetadataUpdateRequest],
    ) -> None:
        """Updates some set of chunks.

        NOTE: Will raise if one of the specified document chunks do not exist.
        This may be due to a concurrent ongoing indexing operation. In that
        event callers are expected to retry after a bit once the state of the
        document index is updated.
        NOTE: Requires document chunk count be known; will raise if it is not.
        This may be caused by the same situation outlined above.
        NOTE: Will no-op if an update request has no fields to update.

        TODO(andrei): Consider exploring a batch API for OpenSearch for this
        operation.

        Args:
            update_requests: A list of update requests, each containing a list
                of document IDs and the fields to update. The field updates
                apply to all of the specified documents in each update request.

        Raises:
            Exception: Failed to update some or all of the chunks for the
                specified documents.
        """
        logger.debug(
            f"[OpenSearchDocumentIndex] Updating {len(update_requests)} chunks for index {self._index_name}."
        )
        for update_request in update_requests:
            properties_to_update: dict[str, Any] = dict()
            # TODO(andrei): Nit but consider if we can use DocumentChunk
            # here so we don't have to think about passing in the
            # appropriate types into this dict.
            if update_request.access is not None:
                properties_to_update[ACCESS_CONTROL_LIST_FIELD_NAME] = (
                    generate_opensearch_filtered_access_control_list(
                        update_request.access
                    )
                )
            if update_request.document_sets is not None:
                properties_to_update[DOCUMENT_SETS_FIELD_NAME] = list(
                    update_request.document_sets
                )
            if update_request.boost is not None:
                properties_to_update[GLOBAL_BOOST_FIELD_NAME] = int(
                    update_request.boost
                )
            if update_request.hidden is not None:
                properties_to_update[HIDDEN_FIELD_NAME] = update_request.hidden
            if update_request.project_ids is not None:
                properties_to_update[USER_PROJECTS_FIELD_NAME] = list(
                    update_request.project_ids
                )
            if update_request.persona_ids is not None:
                properties_to_update[PERSONAS_FIELD_NAME] = list(
                    update_request.persona_ids
                )

            if not properties_to_update:
                if len(update_request.document_ids) > 1:
                    update_string = f"{len(update_request.document_ids)} documents"
                else:
                    update_string = f"document {update_request.document_ids[0]}"
                logger.warning(
                    f"[OpenSearchDocumentIndex] Tried to update {update_string} "
                    "with no specified update fields. This will be a no-op."
                )
                continue

            for doc_id in update_request.document_ids:
                doc_chunk_count = update_request.doc_id_to_chunk_cnt.get(doc_id, -1)
                if doc_chunk_count < 0:
                    # This means the chunk count is not known. This is due to a
                    # race condition between doc indexing and updating steps
                    # which run concurrently when a doc is indexed. The indexing
                    # step should update chunk count shortly. This could also
                    # have been due to an older version of the indexing pipeline
                    # which did not compute chunk count, but that codepath has
                    # since been deprecated and should no longer be the case
                    # here.
                    # TODO(andrei): Fix the aforementioned race condition.
                    raise ChunkCountNotFoundError(
                        f"Tried to update document {doc_id} but its chunk count is not known. "
                        "Older versions of the application used to permit this but is not a "
                        "supported state for a document when using OpenSearch. The document was "
                        "likely just added to the indexing pipeline and the chunk count will be "
                        "updated shortly."
                    )
                if doc_chunk_count == 0:
                    raise ValueError(
                        f"Bug: Tried to update document {doc_id} but its chunk count was 0."
                    )

                for chunk_index in range(doc_chunk_count):
                    document_chunk_id = get_opensearch_doc_chunk_id(
                        tenant_state=self._tenant_state,
                        document_id=doc_id,
                        chunk_index=chunk_index,
                    )
                    self._client.update_document(
                        document_chunk_id=document_chunk_id,
                        properties_to_update=properties_to_update,
                    )

    def id_based_retrieval(
        self,
        chunk_requests: list[DocumentSectionRequest],
        filters: IndexFilters,
        # TODO(andrei): Remove this from the new interface at some point; we
        # should not be exposing this.
        batch_retrieval: bool = False,  # noqa: ARG002
        # TODO(andrei): Add a param for whether to retrieve hidden docs.
    ) -> list[InferenceChunk]:
        """
        TODO(andrei): Consider implementing this method to retrieve on document
        chunk IDs vs querying for matching document chunks.
        """
        logger.debug(
            f"[OpenSearchDocumentIndex] Retrieving {len(chunk_requests)} chunks for index {self._index_name}."
        )
        results: list[InferenceChunk] = []
        for chunk_request in chunk_requests:
            search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []
            query_body = DocumentQuery.get_from_document_id_query(
                document_id=chunk_request.document_id,
                tenant_state=self._tenant_state,
                # NOTE: Index filters includes metadata tags which were filtered
                # for invalid unicode at indexing time. In theory it would be
                # ideal to do filtering here as well, in practice we never did
                # that in the Vespa codepath and have not seen issues in
                # production, so we deliberately conform to the existing logic
                # in order to not unknowningly introduce a possible bug.
                index_filters=filters,
                include_hidden=False,
                max_chunk_size=chunk_request.max_chunk_size,
                min_chunk_index=chunk_request.min_chunk_ind,
                max_chunk_index=chunk_request.max_chunk_ind,
            )
            search_hits = self._client.search(
                body=query_body,
                search_pipeline_id=None,
                search_type=OpenSearchSearchType.DOC_ID_RETRIEVAL,
            )
            inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
                _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
                    search_hit.document_chunk, None, {}
                )
                for search_hit in search_hits
            ]
            inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(
                inference_chunks_uncleaned
            )
            results.extend(inference_chunks)
        return results

    def hybrid_retrieval(
        self,
        query: str,
        query_embedding: Embedding,
        # TODO(andrei): This param is not great design, get rid of it.
        final_keywords: list[str] | None,
        query_type: QueryType,  # noqa: ARG002
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
        # TODO(andrei): There is some duplicated logic in this function with
        # others in this file.
        logger.debug(
            f"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index {self._index_name}."
        )
        # TODO(andrei): This could be better, the caller should just make this
        # decision when passing in the query param. See the above comment in the
        # function signature.
        final_query = " ".join(final_keywords) if final_keywords else query
        query_body = DocumentQuery.get_hybrid_search_query(
            query_text=final_query,
            query_vector=query_embedding,
            num_hits=num_to_retrieve,
            tenant_state=self._tenant_state,
            # NOTE: Index filters includes metadata tags which were filtered
            # for invalid unicode at indexing time. In theory it would be
            # ideal to do filtering here as well, in practice we never did
            # that in the Vespa codepath and have not seen issues in
            # production, so we deliberately conform to the existing logic
            # in order to not unknowningly introduce a possible bug.
            index_filters=filters,
            include_hidden=False,
        )
        normalization_pipeline_name, _ = get_normalization_pipeline_name_and_config()
        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
            body=query_body,
            search_pipeline_id=normalization_pipeline_name,
            search_type=OpenSearchSearchType.HYBRID,
        )

        # Good place for a breakpoint to inspect the search hits if you have
        # "explain" enabled.
        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
                search_hit.document_chunk, search_hit.score, search_hit.match_highlights
            )
            for search_hit in search_hits
        ]
        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(
            inference_chunks_uncleaned
        )

        return inference_chunks

    def keyword_retrieval(
        self,
        query: str,
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
        # TODO(andrei): There is some duplicated logic in this function with
        # others in this file.
        logger.debug(
            f"[OpenSearchDocumentIndex] Keyword retrieving {num_to_retrieve} chunks for index {self._index_name}."
        )
        query_body = DocumentQuery.get_keyword_search_query(
            query_text=query,
            num_hits=num_to_retrieve,
            tenant_state=self._tenant_state,
            # NOTE: Index filters includes metadata tags which were filtered
            # for invalid unicode at indexing time. In theory it would be
            # ideal to do filtering here as well, in practice we never did
            # that in the Vespa codepath and have not seen issues in
            # production, so we deliberately conform to the existing logic
            # in order to not unknowningly introduce a possible bug.
            index_filters=filters,
            include_hidden=False,
        )
        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
            body=query_body,
            search_pipeline_id=None,
            search_type=OpenSearchSearchType.KEYWORD,
        )

        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
                search_hit.document_chunk, search_hit.score, search_hit.match_highlights
            )
            for search_hit in search_hits
        ]
        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(
            inference_chunks_uncleaned
        )

        return inference_chunks

    def semantic_retrieval(
        self,
        query_embedding: Embedding,
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
        # TODO(andrei): There is some duplicated logic in this function with
        # others in this file.
        logger.debug(
            f"[OpenSearchDocumentIndex] Semantic retrieving {num_to_retrieve} chunks for index {self._index_name}."
        )
        query_body = DocumentQuery.get_semantic_search_query(
            query_embedding=query_embedding,
            num_hits=num_to_retrieve,
            tenant_state=self._tenant_state,
            # NOTE: Index filters includes metadata tags which were filtered
            # for invalid unicode at indexing time. In theory it would be
            # ideal to do filtering here as well, in practice we never did
            # that in the Vespa codepath and have not seen issues in
            # production, so we deliberately conform to the existing logic
            # in order to not unknowningly introduce a possible bug.
            index_filters=filters,
            include_hidden=False,
        )
        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
            body=query_body,
            search_pipeline_id=None,
            search_type=OpenSearchSearchType.SEMANTIC,
        )

        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
                search_hit.document_chunk, search_hit.score, search_hit.match_highlights
            )
            for search_hit in search_hits
        ]
        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(
            inference_chunks_uncleaned
        )

        return inference_chunks

    def random_retrieval(
        self,
        filters: IndexFilters,
        num_to_retrieve: int = 10,
        dirty: bool | None = None,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        logger.debug(
            f"[OpenSearchDocumentIndex] Randomly retrieving {num_to_retrieve} chunks for index {self._index_name}."
        )
        query_body = DocumentQuery.get_random_search_query(
            tenant_state=self._tenant_state,
            index_filters=filters,
            num_to_retrieve=num_to_retrieve,
        )
        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
            body=query_body,
            search_pipeline_id=None,
            search_type=OpenSearchSearchType.RANDOM,
        )
        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
                search_hit.document_chunk, search_hit.score, search_hit.match_highlights
            )
            for search_hit in search_hits
        ]
        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(
            inference_chunks_uncleaned
        )

        return inference_chunks

    def index_raw_chunks(self, chunks: list[DocumentChunk]) -> None:
        """Indexes raw document chunks into OpenSearch.

        Used in the Vespa migration task. Can be deleted after migrations are
        complete.
        """
        logger.debug(
            f"[OpenSearchDocumentIndex] Indexing {len(chunks)} raw chunks for index {self._index_name}."
        )
        # Do not raise if the document already exists, just update. This is
        # because the document may already have been indexed during the
        # OpenSearch transition period.
        self._client.bulk_index_documents(
            documents=chunks, tenant_state=self._tenant_state, update_if_exists=True
        )


================================================
FILE: backend/onyx/document_index/opensearch/schema.py
================================================
import hashlib
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import Self

from pydantic import BaseModel
from pydantic import Field
from pydantic import field_serializer
from pydantic import field_validator
from pydantic import model_serializer
from pydantic import model_validator
from pydantic import SerializerFunctionWrapHandler

from onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_REPLICAS
from onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_SHARDS
from onyx.configs.app_configs import OPENSEARCH_TEXT_ANALYZER
from onyx.configs.app_configs import USING_AWS_MANAGED_OPENSEARCH
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.constants import EF_CONSTRUCTION
from onyx.document_index.opensearch.constants import EF_SEARCH
from onyx.document_index.opensearch.constants import M
from onyx.document_index.opensearch.string_filtering import DocumentIDTooLongError
from onyx.document_index.opensearch.string_filtering import (
    filter_and_validate_document_id,
)
from onyx.document_index.opensearch.string_filtering import (
    MAX_DOCUMENT_ID_ENCODED_LENGTH,
)
from onyx.utils.tenant import get_tenant_id_short_string
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id


TITLE_FIELD_NAME = "title"
TITLE_VECTOR_FIELD_NAME = "title_vector"
CONTENT_FIELD_NAME = "content"
CONTENT_VECTOR_FIELD_NAME = "content_vector"
SOURCE_TYPE_FIELD_NAME = "source_type"
METADATA_LIST_FIELD_NAME = "metadata_list"
LAST_UPDATED_FIELD_NAME = "last_updated"
PUBLIC_FIELD_NAME = "public"
ACCESS_CONTROL_LIST_FIELD_NAME = "access_control_list"
HIDDEN_FIELD_NAME = "hidden"
GLOBAL_BOOST_FIELD_NAME = "global_boost"
SEMANTIC_IDENTIFIER_FIELD_NAME = "semantic_identifier"
IMAGE_FILE_ID_FIELD_NAME = "image_file_id"
SOURCE_LINKS_FIELD_NAME = "source_links"
DOCUMENT_SETS_FIELD_NAME = "document_sets"
USER_PROJECTS_FIELD_NAME = "user_projects"
PERSONAS_FIELD_NAME = "personas"
DOCUMENT_ID_FIELD_NAME = "document_id"
CHUNK_INDEX_FIELD_NAME = "chunk_index"
MAX_CHUNK_SIZE_FIELD_NAME = "max_chunk_size"
TENANT_ID_FIELD_NAME = "tenant_id"
BLURB_FIELD_NAME = "blurb"
DOC_SUMMARY_FIELD_NAME = "doc_summary"
CHUNK_CONTEXT_FIELD_NAME = "chunk_context"
METADATA_SUFFIX_FIELD_NAME = "metadata_suffix"
PRIMARY_OWNERS_FIELD_NAME = "primary_owners"
SECONDARY_OWNERS_FIELD_NAME = "secondary_owners"
# Hierarchy filtering - list of ancestor hierarchy node IDs
ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME = "ancestor_hierarchy_node_ids"


# Faiss was also tried but it didn't have any benefits
# NMSLIB is deprecated, not recommended
OPENSEARCH_KNN_ENGINE = "lucene"


def get_opensearch_doc_chunk_id(
    tenant_state: TenantState,
    document_id: str,
    chunk_index: int,
    max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE,
) -> str:
    """
    Returns a unique identifier for the chunk.

    This will be the string used to identify the chunk in OpenSearch. Any direct
    chunk queries should use this function.

    If the document ID is too long, a hash of the ID is used instead.
    """
    opensearch_doc_chunk_id_suffix: str = f"__{max_chunk_size}__{chunk_index}"
    encoded_suffix_length: int = len(opensearch_doc_chunk_id_suffix.encode("utf-8"))
    max_encoded_permissible_doc_id_length: int = (
        MAX_DOCUMENT_ID_ENCODED_LENGTH - encoded_suffix_length
    )
    opensearch_doc_chunk_id_tenant_prefix: str = ""
    if tenant_state.multitenant:
        short_tenant_id: str = get_tenant_id_short_string(tenant_state.tenant_id)
        # Use tenant ID because in multitenant mode each tenant has its own
        # Documents table, so there is a very small chance that doc IDs are not
        # actually unique across all tenants.
        opensearch_doc_chunk_id_tenant_prefix = f"{short_tenant_id}__"
        encoded_prefix_length: int = len(
            opensearch_doc_chunk_id_tenant_prefix.encode("utf-8")
        )
        max_encoded_permissible_doc_id_length -= encoded_prefix_length

    try:
        sanitized_document_id: str = filter_and_validate_document_id(
            document_id, max_encoded_length=max_encoded_permissible_doc_id_length
        )
    except DocumentIDTooLongError:
        # If the document ID is too long, use a hash instead.
        # We use blake2b because it is faster and equally secure as SHA256, and
        # accepts digest_size which controls the number of bytes returned in the
        # hash.
        # digest_size is the size of the returned hash in bytes. Since we're
        # decoding the hash bytes as a hex string, the digest_size should be
        # half the max target size of the hash string.
        # Subtract 1 because filter_and_validate_document_id compares on >= on
        # max_encoded_length.
        # 64 is the max digest_size blake2b returns.
        digest_size: int = min((max_encoded_permissible_doc_id_length - 1) // 2, 64)
        sanitized_document_id = hashlib.blake2b(
            document_id.encode("utf-8"), digest_size=digest_size
        ).hexdigest()

    opensearch_doc_chunk_id: str = (
        f"{opensearch_doc_chunk_id_tenant_prefix}{sanitized_document_id}{opensearch_doc_chunk_id_suffix}"
    )

    # Do one more validation to ensure we haven't exceeded the max length.
    opensearch_doc_chunk_id = filter_and_validate_document_id(opensearch_doc_chunk_id)
    return opensearch_doc_chunk_id


def set_or_convert_timezone_to_utc(value: datetime) -> datetime:
    if value.tzinfo is None:
        # astimezone will raise if value does not have a timezone set.
        value = value.replace(tzinfo=timezone.utc)
    else:
        # Does appropriate time conversion if value was set in a different
        # timezone.
        value = value.astimezone(timezone.utc)
    return value


class DocumentChunkWithoutVectors(BaseModel):
    """
    Represents a chunk of a document in the OpenSearch index without vectors.

    The names of these fields are based on the OpenSearch schema. Changes to the
    schema require changes here. See get_document_schema.

    WARNING: Relies on MULTI_TENANT which is global state. Also uses
    get_current_tenant_id. Generally relying on global state is bad, in this
    case we accept it because of the importance of validating tenant logic.
    """

    model_config = {"frozen": True}

    document_id: str
    chunk_index: int
    # The maximum number of tokens this chunk's content can hold. Previously
    # there was a concept of large chunks, this is a generic concept of that. We
    # can choose to have any size of chunks in the index and they should be
    # distinct from one another.
    max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE

    # Either both should be None or both should be non-None.
    title: str | None = None
    content: str

    source_type: str
    # A list of key-value pairs separated by INDEX_SEPARATOR. See
    # convert_metadata_dict_to_list_of_strings.
    metadata_list: list[str] | None = None
    # If it exists, time zone should always be UTC.
    last_updated: datetime | None = None

    public: bool
    access_control_list: list[str]
    # Defaults to False, currently gets written during update not index.
    hidden: bool = False

    global_boost: int

    semantic_identifier: str
    image_file_id: str | None = None
    # Contains a string representation of a dict which maps offset into the raw
    # chunk text to the link corresponding to that point.
    source_links: str | None = None
    blurb: str
    # doc_summary, chunk_context, and metadata_suffix are all stored simply to
    # reverse the augmentations to content. Ideally these would just be start
    # and stop indices into the content string. For legacy reasons they are not
    # right now.
    doc_summary: str
    chunk_context: str
    metadata_suffix: str | None = None

    document_sets: list[str] | None = None
    user_projects: list[int] | None = None
    personas: list[int] | None = None
    primary_owners: list[str] | None = None
    secondary_owners: list[str] | None = None

    # List of ancestor hierarchy node IDs for hierarchy-based filtering.
    # None means no hierarchy info (document will be excluded from
    # hierarchy-filtered searches).
    ancestor_hierarchy_node_ids: list[int] | None = None

    tenant_id: TenantState = Field(
        default_factory=lambda: TenantState(
            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
        )
    )

    def __str__(self) -> str:
        return (
            f"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, "
            f"content length={len(self.content)}, tenant_id={self.tenant_id.tenant_id})."
        )

    @model_serializer(mode="wrap")
    def serialize_model(
        self, handler: SerializerFunctionWrapHandler
    ) -> dict[str, object]:
        """Invokes pydantic's serialization logic, then excludes Nones.

        We do this because .model_dump(exclude_none=True) does not work after
        @field_serializer logic, so for some field serializers which return None
        and which we would like to exclude from the final dump, they would be
        included without this.

        Args:
            handler: Callable from pydantic which takes the instance of the
                model as an argument and performs standard serialization.

        Returns:
            The return of handler but with None items excluded.
        """
        serialized: dict[str, object] = handler(self)
        serialized_exclude_none = {k: v for k, v in serialized.items() if v is not None}
        return serialized_exclude_none

    @field_serializer("last_updated", mode="wrap")
    def serialize_datetime_fields_to_epoch_seconds(
        self,
        value: datetime | None,
        handler: SerializerFunctionWrapHandler,  # noqa: ARG002
    ) -> int | None:
        """
        Serializes datetime fields to seconds since the Unix epoch.

        If there is no datetime, returns None.
        """
        if value is None:
            return None
        value = set_or_convert_timezone_to_utc(value)
        return int(value.timestamp())

    @field_validator("last_updated", mode="before")
    @classmethod
    def parse_epoch_seconds_to_datetime(cls, value: Any) -> datetime | None:
        """Parses seconds since the Unix epoch to a datetime object.

        If the input is None, returns None.

        The datetime returned will be in UTC.
        """
        if value is None:
            return None
        if isinstance(value, datetime):
            value = set_or_convert_timezone_to_utc(value)
            return value
        if not isinstance(value, int):
            raise ValueError(
                f"Bug: Expected an int for the last_updated property from OpenSearch, got {type(value)} instead."
            )
        return datetime.fromtimestamp(value, tz=timezone.utc)

    @field_serializer("tenant_id", mode="wrap")
    def serialize_tenant_state(
        self,
        value: TenantState,
        handler: SerializerFunctionWrapHandler,  # noqa: ARG002
    ) -> str | None:
        """
        Serializes tenant_state to the tenant str if multitenant, or None if
        not.

        The idea is that in single tenant mode, the schema does not have a
        tenant_id field, so we don't want to supply it in our serialized
        DocumentChunk. This assumes the final serialized model excludes None
        fields, which serialize_model should enforce.
        """
        if not value.multitenant:
            return None
        else:
            return value.tenant_id

    @field_validator("tenant_id", mode="before")
    @classmethod
    def parse_tenant_id(cls, value: Any) -> TenantState:
        """
        Generates a TenantState from OpenSearch's tenant_id if it exists, or
        generates a default state if it does not (implies we are in single
        tenant mode).
        """
        if value is None:
            if MULTI_TENANT:
                raise ValueError(
                    "Bug: No tenant_id was supplied but multi-tenant mode is enabled."
                )
            return TenantState(
                tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
            )
        elif isinstance(value, TenantState):
            if MULTI_TENANT != value.multitenant:
                raise ValueError(
                    f"Bug: An existing TenantState object was supplied to the DocumentChunk model "
                    f"but its multi-tenant mode ({value.multitenant}) does not match the program's "
                    "current global tenancy state."
                )
            return value
        elif not isinstance(value, str):
            raise ValueError(
                f"Bug: Expected a str for the tenant_id property from OpenSearch, got {type(value)} instead."
            )
        else:
            if not MULTI_TENANT:
                raise ValueError(
                    "Bug: Got a non-null str for the tenant_id property from OpenSearch but "
                    "multi-tenant mode is not enabled. This is unexpected because in single-tenant "
                    "mode we don't expect to see a tenant_id."
                )
            return TenantState(tenant_id=value, multitenant=MULTI_TENANT)


class DocumentChunk(DocumentChunkWithoutVectors):
    """Represents a chunk of a document in the OpenSearch index.

    The names of these fields are based on the OpenSearch schema. Changes to the
    schema require changes here. See get_document_schema.
    """

    model_config = {"frozen": True}

    title_vector: list[float] | None = None
    content_vector: list[float]

    def __str__(self) -> str:
        return (
            f"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, "
            f"content length={len(self.content)}, content vector length={len(self.content_vector)}, "
            f"tenant_id={self.tenant_id.tenant_id})"
        )

    @model_validator(mode="after")
    def check_title_and_title_vector_are_consistent(self) -> Self:
        # title and title_vector should both either be None or not.
        if self.title is not None and self.title_vector is None:
            raise ValueError("Bug: Title vector must not be None if title is not None.")
        if self.title_vector is not None and self.title is None:
            raise ValueError("Bug: Title must not be None if title vector is not None.")
        return self


class DocumentSchema:
    """
    Represents the schema and indexing strategies of the OpenSearch index.

    TODO(andrei): Implement multi-phase indexing strategies.
    """

    @staticmethod
    def get_document_schema(vector_dimension: int, multitenant: bool) -> dict[str, Any]:
        """Returns the document schema for the OpenSearch index.

        WARNING: Changes / additions to field names here require changes to the
        DocumentChunk class above.

        Notes:
          - By default all fields have indexing enabled.
          - By default almost all fields except text fields have doc_values
            enabled, enabling operations like sorting and aggregations.
          - By default all fields are nullable.
          - "type": "keyword" fields are stored as-is, used for exact matches,
            filtering, etc.
          - "type": "text" fields are OpenSearch-processed strings, used for
            full-text searches.
          - "store": True fields are stored and can be returned on their own,
            independent of the parent document.
          - "index": True fields can be queried on.
          - "doc_values": True fields can be sorted and aggregated efficiently.
            Not supported for "text" type fields.
          - "store": True fields are stored separately from the source document
            and can thus be returned from a query separately from _source.
            Generally this is not necessary.

        Args:
            vector_dimension: The dimension of vector embeddings. Must be a
                positive integer.
            multitenant: Whether the index is multitenant.

        Returns:
            A dictionary representing the document schema, to be supplied to the
                OpenSearch client. The structure of this dictionary is
                determined by OpenSearch documentation.
        """
        schema: dict[str, Any] = {
            # By default OpenSearch allows dynamically adding new properties
            # based on indexed documents. This is awful and we disable it here.
            # An exception will be raised if you try to index a new doc which
            # contains unexpected fields.
            "dynamic": "strict",
            "properties": {
                TITLE_FIELD_NAME: {
                    "type": "text",
                    # Language analyzer (e.g. english) stems at index and search
                    # time for variant matching. Configure via
                    # OPENSEARCH_TEXT_ANALYZER. Existing indices need reindexing
                    # after a change.
                    "analyzer": OPENSEARCH_TEXT_ANALYZER,
                    "fields": {
                        # Subfield accessed as title.keyword. Not indexed for
                        # values longer than 256 chars.
                        # TODO(andrei): Ask Yuhong do we want this?
                        "keyword": {"type": "keyword", "ignore_above": 256}
                    },
                    # This makes highlighting text during queries more efficient
                    # at the cost of disk space. See
                    # https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/#methods-of-obtaining-offsets
                    "index_options": "offsets",
                },
                CONTENT_FIELD_NAME: {
                    "type": "text",
                    "store": True,
                    "analyzer": OPENSEARCH_TEXT_ANALYZER,
                    "index_options": "offsets",
                },
                TITLE_VECTOR_FIELD_NAME: {
                    "type": "knn_vector",
                    "dimension": vector_dimension,
                    "method": {
                        "name": "hnsw",
                        "space_type": "cosinesimil",
                        "engine": OPENSEARCH_KNN_ENGINE,
                        "parameters": {"ef_construction": EF_CONSTRUCTION, "m": M},
                    },
                },
                # TODO(andrei): This is a tensor in Vespa. Also look at feature
                # parity for these other method fields.
                CONTENT_VECTOR_FIELD_NAME: {
                    "type": "knn_vector",
                    "dimension": vector_dimension,
                    "method": {
                        "name": "hnsw",
                        "space_type": "cosinesimil",
                        "engine": OPENSEARCH_KNN_ENGINE,
                        "parameters": {"ef_construction": EF_CONSTRUCTION, "m": M},
                    },
                },
                SOURCE_TYPE_FIELD_NAME: {"type": "keyword"},
                METADATA_LIST_FIELD_NAME: {"type": "keyword"},
                LAST_UPDATED_FIELD_NAME: {
                    "type": "date",
                    "format": "epoch_second",
                    # For some reason date defaults to False, even though it
                    # would make sense to sort by date.
                    "doc_values": True,
                },
                # Access control fields.
                # Whether the doc is public. Could have fallen under access
                # control list but is such a broad and critical filter that it
                # is its own field. If true, ACCESS_CONTROL_LIST_FIELD_NAME
                # should have no effect on queries.
                PUBLIC_FIELD_NAME: {"type": "boolean"},
                # Access control list for the doc, excluding public access,
                # which is covered above.
                # If a user's access set contains at least one entry from this
                # set, the user should be able to retrieve this document. This
                # only applies if public is set to false; public non-hidden
                # documents are always visible to anyone in a given tenancy
                # regardless of this field.
                ACCESS_CONTROL_LIST_FIELD_NAME: {"type": "keyword"},
                # Whether the doc is hidden from search results.
                # Should clobber all other access search filters, namely
                # PUBLIC_FIELD_NAME and ACCESS_CONTROL_LIST_FIELD_NAME; up to
                # search implementations to guarantee this.
                HIDDEN_FIELD_NAME: {"type": "boolean"},
                GLOBAL_BOOST_FIELD_NAME: {"type": "integer"},
                # This field is only used for displaying a useful name for the
                # doc in the UI and is not used for searching. Disabling these
                # features to increase perf. This field is therefore essentially
                # just metadata.
                SEMANTIC_IDENTIFIER_FIELD_NAME: {
                    "type": "keyword",
                    "index": False,
                    "doc_values": False,
                    # Generally False by default; just making sure.
                    "store": False,
                },
                # Same as above; used to display an image along with the doc.
                IMAGE_FILE_ID_FIELD_NAME: {
                    "type": "keyword",
                    "index": False,
                    "doc_values": False,
                    # Generally False by default; just making sure.
                    "store": False,
                },
                # Same as above; used to link to the source doc.
                SOURCE_LINKS_FIELD_NAME: {
                    "type": "keyword",
                    "index": False,
                    "doc_values": False,
                    # Generally False by default; just making sure.
                    "store": False,
                },
                # Same as above; used to quickly summarize the doc in the UI.
                BLURB_FIELD_NAME: {
                    "type": "keyword",
                    "index": False,
                    "doc_values": False,
                    # Generally False by default; just making sure.
                    "store": False,
                },
                # Same as above.
                # TODO(andrei): If we want to search on this this needs to be
                # changed.
                DOC_SUMMARY_FIELD_NAME: {
                    "type": "keyword",
                    "index": False,
                    "doc_values": False,
                    # Generally False by default; just making sure.
                    "store": False,
                },
                # Same as above.
                # TODO(andrei): If we want to search on this this needs to be
                # changed.
                CHUNK_CONTEXT_FIELD_NAME: {
                    "type": "keyword",
                    "index": False,
                    "doc_values": False,
                    # Generally False by default; just making sure.
                    "store": False,
                },
                # Same as above.
                METADATA_SUFFIX_FIELD_NAME: {
                    "type": "keyword",
                    "index": False,
                    "doc_values": False,
                    "store": False,
                },
                # Product-specific fields.
                DOCUMENT_SETS_FIELD_NAME: {"type": "keyword"},
                USER_PROJECTS_FIELD_NAME: {"type": "integer"},
                PERSONAS_FIELD_NAME: {"type": "integer"},
                PRIMARY_OWNERS_FIELD_NAME: {"type": "keyword"},
                SECONDARY_OWNERS_FIELD_NAME: {"type": "keyword"},
                # OpenSearch metadata fields.
                DOCUMENT_ID_FIELD_NAME: {"type": "keyword"},
                CHUNK_INDEX_FIELD_NAME: {"type": "integer"},
                # The maximum number of tokens this chunk's content can hold.
                MAX_CHUNK_SIZE_FIELD_NAME: {"type": "integer"},
                # Hierarchy filtering - list of ancestor hierarchy node IDs.
                # Used for scoped search within folder/space hierarchies.
                # OpenSearch's terms query with value_type: "bitmap" can
                # efficiently check if any value in this array matches a
                # query bitmap.
                ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME: {"type": "integer"},
            },
        }

        if multitenant:
            schema["properties"][TENANT_ID_FIELD_NAME] = {"type": "keyword"}

        return schema

    @staticmethod
    def get_index_settings_based_on_environment() -> dict[str, Any]:
        """
        Returns the index settings based on the environment.
        """
        if USING_AWS_MANAGED_OPENSEARCH:
            # NOTE: The number of data copies, including the primary (not a
            # replica) copy, must be divisible by the number of AZs.
            if MULTI_TENANT:
                number_of_shards = 324
                number_of_replicas = 2
            else:
                number_of_shards = 3
                number_of_replicas = 2
        else:
            number_of_shards = 1
            number_of_replicas = 1

        if OPENSEARCH_INDEX_NUM_SHARDS is not None:
            number_of_shards = OPENSEARCH_INDEX_NUM_SHARDS
        if OPENSEARCH_INDEX_NUM_REPLICAS is not None:
            number_of_replicas = OPENSEARCH_INDEX_NUM_REPLICAS

        return {
            "index": {
                "number_of_shards": number_of_shards,
                "number_of_replicas": number_of_replicas,
                # Required for vector search.
                "knn": True,
                "knn.algo_param.ef_search": EF_SEARCH,
            }
        }


================================================
FILE: backend/onyx/document_index/opensearch/search.py
================================================
import random
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from typing import TypeAlias
from typing import TypeVar

from onyx.configs.app_configs import DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S
from onyx.configs.app_configs import OPENSEARCH_EXPLAIN_ENABLED
from onyx.configs.app_configs import OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED
from onyx.configs.app_configs import OPENSEARCH_PROFILING_DISABLED
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import INDEX_SEPARATOR
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import Tag
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.constants import ASSUMED_DOCUMENT_AGE_DAYS
from onyx.document_index.opensearch.constants import (
    DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,
)
from onyx.document_index.opensearch.constants import (
    DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW,
)
from onyx.document_index.opensearch.constants import (
    HYBRID_SEARCH_NORMALIZATION_PIPELINE,
)
from onyx.document_index.opensearch.constants import (
    HYBRID_SEARCH_SUBQUERY_CONFIGURATION,
)
from onyx.document_index.opensearch.constants import HybridSearchNormalizationPipeline
from onyx.document_index.opensearch.constants import HybridSearchSubqueryConfiguration
from onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME
from onyx.document_index.opensearch.schema import ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME
from onyx.document_index.opensearch.schema import CHUNK_INDEX_FIELD_NAME
from onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME
from onyx.document_index.opensearch.schema import CONTENT_VECTOR_FIELD_NAME
from onyx.document_index.opensearch.schema import DOCUMENT_ID_FIELD_NAME
from onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME
from onyx.document_index.opensearch.schema import HIDDEN_FIELD_NAME
from onyx.document_index.opensearch.schema import LAST_UPDATED_FIELD_NAME
from onyx.document_index.opensearch.schema import MAX_CHUNK_SIZE_FIELD_NAME
from onyx.document_index.opensearch.schema import METADATA_LIST_FIELD_NAME
from onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME
from onyx.document_index.opensearch.schema import PUBLIC_FIELD_NAME
from onyx.document_index.opensearch.schema import set_or_convert_timezone_to_utc
from onyx.document_index.opensearch.schema import SOURCE_TYPE_FIELD_NAME
from onyx.document_index.opensearch.schema import TENANT_ID_FIELD_NAME
from onyx.document_index.opensearch.schema import TITLE_FIELD_NAME
from onyx.document_index.opensearch.schema import TITLE_VECTOR_FIELD_NAME
from onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME

# See https://docs.opensearch.org/latest/query-dsl/term/terms/.
MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY = 65_536


_T = TypeVar("_T")
TermsQuery: TypeAlias = dict[str, dict[str, list[_T]]]
TermQuery: TypeAlias = dict[str, dict[str, dict[str, _T]]]


# TODO(andrei): Turn all magic dictionaries to pydantic models.


# Normalization pipelines combine document scores from multiple query clauses.
# The number and ordering of weights should match the query clauses. The values
# of the weights should sum to 1.
def _get_hybrid_search_normalization_weights() -> list[float]:
    if (
        HYBRID_SEARCH_SUBQUERY_CONFIGURATION
        is HybridSearchSubqueryConfiguration.TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
    ):
        # Since the titles are included in the contents, the embedding matches
        # are heavily downweighted as they act as a boost rather than an
        # independent scoring component.
        search_title_vector_weight = 0.1
        search_content_vector_weight = 0.45
        # Single keyword weight for both title and content (merged from former
        # title keyword + content keyword).
        search_keyword_weight = 0.45

        # NOTE: It is critical that the order of these weights matches the order
        # of the sub-queries in the hybrid search.
        hybrid_search_normalization_weights = [
            search_title_vector_weight,
            search_content_vector_weight,
            search_keyword_weight,
        ]
    elif (
        HYBRID_SEARCH_SUBQUERY_CONFIGURATION
        is HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
    ):
        search_content_vector_weight = 0.5
        # Single keyword weight for both title and content (merged from former
        # title keyword + content keyword).
        search_keyword_weight = 0.5

        # NOTE: It is critical that the order of these weights matches the order
        # of the sub-queries in the hybrid search.
        hybrid_search_normalization_weights = [
            search_content_vector_weight,
            search_keyword_weight,
        ]
    else:
        raise ValueError(
            f"Bug: Unhandled hybrid search subquery configuration: {HYBRID_SEARCH_SUBQUERY_CONFIGURATION}."
        )

    assert (
        sum(hybrid_search_normalization_weights) == 1.0
    ), "Bug: Hybrid search normalization weights do not sum to 1.0."

    return hybrid_search_normalization_weights


def get_min_max_normalization_pipeline_name_and_config() -> tuple[str, dict[str, Any]]:
    min_max_normalization_pipeline_name = "normalization_pipeline_min_max"
    min_max_normalization_pipeline_config: dict[str, Any] = {
        "description": "Normalization for keyword and vector scores using min-max",
        "phase_results_processors": [
            {
                # https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/
                "normalization-processor": {
                    "normalization": {"technique": "min_max"},
                    "combination": {
                        "technique": "arithmetic_mean",
                        "parameters": {
                            "weights": _get_hybrid_search_normalization_weights()
                        },
                    },
                }
            }
        ],
    }
    return min_max_normalization_pipeline_name, min_max_normalization_pipeline_config


def get_zscore_normalization_pipeline_name_and_config() -> tuple[str, dict[str, Any]]:
    zscore_normalization_pipeline_name = "normalization_pipeline_zscore"
    zscore_normalization_pipeline_config: dict[str, Any] = {
        "description": "Normalization for keyword and vector scores using z-score",
        "phase_results_processors": [
            {
                # https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/
                "normalization-processor": {
                    "normalization": {"technique": "z_score"},
                    "combination": {
                        "technique": "arithmetic_mean",
                        "parameters": {
                            "weights": _get_hybrid_search_normalization_weights()
                        },
                    },
                }
            }
        ],
    }
    return zscore_normalization_pipeline_name, zscore_normalization_pipeline_config


def get_normalization_pipeline_name_and_config() -> tuple[str, dict[str, Any]]:
    if (
        HYBRID_SEARCH_NORMALIZATION_PIPELINE
        is HybridSearchNormalizationPipeline.MIN_MAX
    ):
        return get_min_max_normalization_pipeline_name_and_config()
    elif (
        HYBRID_SEARCH_NORMALIZATION_PIPELINE is HybridSearchNormalizationPipeline.ZSCORE
    ):
        return get_zscore_normalization_pipeline_name_and_config()
    else:
        raise ValueError(
            f"Bug: Unhandled hybrid search normalization pipeline: {HYBRID_SEARCH_NORMALIZATION_PIPELINE}."
        )


class DocumentQuery:
    """
    TODO(andrei): Implement multi-phase search strategies.
    TODO(andrei): Implement document boost.
    TODO(andrei): Implement document age.
    """

    @staticmethod
    def get_from_document_id_query(
        document_id: str,
        tenant_state: TenantState,
        index_filters: IndexFilters,
        include_hidden: bool,
        max_chunk_size: int,
        min_chunk_index: int | None,
        max_chunk_index: int | None,
        get_full_document: bool = True,
    ) -> dict[str, Any]:
        """
        Returns a final search query which gets chunks from a given document ID.

        This query can be directly supplied to the OpenSearch client.

        TODO(andrei): Currently capped at 10k results. Implement scroll/point in
        time for results so that we can return arbitrarily-many IDs.

        Args:
            document_id: Onyx document ID. Notably not an OpenSearch document
                ID, which points to what Onyx would refer to as a chunk.
            tenant_state: Tenant state containing the tenant ID.
            index_filters: Filters for the document retrieval query.
            include_hidden: Whether to include hidden documents.
            max_chunk_size: Document chunks are categorized by the maximum
                number of tokens they can hold. This parameter specifies the
                maximum size category of document chunks to retrieve.
            min_chunk_index: The minimum chunk index to retrieve, inclusive. If
                None, no minimum chunk index will be applied.
            max_chunk_index: The maximum chunk index to retrieve, inclusive. If
                None, no maximum chunk index will be applied.
            get_full_document: Whether to get the full document body. If False,
                OpenSearch will only return the matching document chunk IDs plus
                metadata; the source data will be omitted from the response. Use
                this for performance optimization if OpenSearch IDs are
                sufficient. Defaults to True.

        Returns:
            A dictionary representing the final ID search query.
        """
        filter_clauses = DocumentQuery._get_search_filters(
            tenant_state=tenant_state,
            include_hidden=include_hidden,
            access_control_list=index_filters.access_control_list,
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
            project_id_filter=index_filters.project_id_filter,
            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=min_chunk_index,
            max_chunk_index=max_chunk_index,
            max_chunk_size=max_chunk_size,
            document_id=document_id,
            attached_document_ids=index_filters.attached_document_ids,
            hierarchy_node_ids=index_filters.hierarchy_node_ids,
        )
        final_get_ids_query: dict[str, Any] = {
            "query": {"bool": {"filter": filter_clauses}},
            # We include this to make sure OpenSearch does not revert to
            # returning some number of results less than the index max allowed
            # return size.
            "size": DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW,
            # By default exclude retrieving the vector fields in order to save
            # on retrieval cost as we don't need them upstream.
            "_source": {
                "excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
            },
            "timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
        }
        if not get_full_document:
            # If we explicitly do not want the underlying document, we will only
            # retrieve IDs.
            final_get_ids_query["_source"] = False
        if not OPENSEARCH_PROFILING_DISABLED:
            final_get_ids_query["profile"] = True

        return final_get_ids_query

    @staticmethod
    def delete_from_document_id_query(
        document_id: str,
        tenant_state: TenantState,
    ) -> dict[str, Any]:
        """
        Returns a final search query which deletes chunks from a given document
        ID.

        This query can be directly supplied to the OpenSearch client.

        Intended to be supplied to the OpenSearch client's delete_by_query
        method.

        TODO(andrei): There is no limit to the number of document chunks that
        can be deleted by this query. This could get expensive. Consider
        implementing batching.

        Args:
            document_id: Onyx document ID. Notably not an OpenSearch document
                ID, which points to what Onyx would refer to as a chunk.
            tenant_state: Tenant state containing the tenant ID.

        Returns:
            A dictionary representing the final delete query.
        """
        filter_clauses = DocumentQuery._get_search_filters(
            tenant_state=tenant_state,
            # Delete hidden docs too.
            include_hidden=True,
            access_control_list=None,
            source_types=[],
            tags=[],
            document_sets=[],
            project_id_filter=None,
            persona_id_filter=None,
            time_cutoff=None,
            min_chunk_index=None,
            max_chunk_index=None,
            max_chunk_size=None,
            document_id=document_id,
        )
        final_delete_query: dict[str, Any] = {
            "query": {"bool": {"filter": filter_clauses}},
            "timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
        }
        if not OPENSEARCH_PROFILING_DISABLED:
            final_delete_query["profile"] = True

        return final_delete_query

    @staticmethod
    def get_hybrid_search_query(
        query_text: str,
        query_vector: list[float],
        num_hits: int,
        tenant_state: TenantState,
        index_filters: IndexFilters,
        include_hidden: bool,
    ) -> dict[str, Any]:
        """Returns a final hybrid search query.

        NOTE: This query can be directly supplied to the OpenSearch client, but
        it MUST be supplied in addition to a search pipeline. The results from
        hybrid search are not meaningful without that step.

        TODO(andrei): There is some duplicated logic in this function with
        others in this file.

        Args:
            query_text: The text to query for.
            query_vector: The vector embedding of the text to query for.
            num_hits: The final number of hits to return.
            tenant_state: Tenant state containing the tenant ID.
            index_filters: Filters for the hybrid search query.
            include_hidden: Whether to include hidden documents.

        Returns:
            A dictionary representing the final hybrid search query.
        """
        # WARNING: Profiling does not work with hybrid search; do not add it at
        # this level. See https://github.com/opensearch-project/neural-search/issues/1255

        if num_hits > DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW:
            raise ValueError(
                f"Bug: num_hits ({num_hits}) is greater than the current maximum allowed "
                f"result window ({DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW})."
            )

        # TODO(andrei, yuhong): We can tune this more dynamically based on
        # num_hits.
        max_results_per_subquery = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES

        hybrid_search_subqueries = DocumentQuery._get_hybrid_search_subqueries(
            query_text, query_vector, vector_candidates=max_results_per_subquery
        )
        hybrid_search_filters = DocumentQuery._get_search_filters(
            tenant_state=tenant_state,
            include_hidden=include_hidden,
            # TODO(andrei): We've done no filtering for PUBLIC_DOC_PAT up to
            # now. This should not cause any issues but it can introduce
            # redundant filters in queries that may affect performance.
            access_control_list=index_filters.access_control_list,
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
            project_id_filter=index_filters.project_id_filter,
            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
            attached_document_ids=index_filters.attached_document_ids,
            hierarchy_node_ids=index_filters.hierarchy_node_ids,
        )

        # See https://docs.opensearch.org/latest/query-dsl/compound/hybrid/
        hybrid_search_query: dict[str, Any] = {
            "hybrid": {
                "queries": hybrid_search_subqueries,
                # Max results per subquery per shard before aggregation. Ensures
                # keyword and vector subqueries contribute equally to the
                # candidate pool for hybrid fusion.
                # Sources:
                # https://docs.opensearch.org/latest/vector-search/ai-search/hybrid-search/pagination/
                # https://opensearch.org/blog/navigating-pagination-in-hybrid-queries-with-the-pagination_depth-parameter/
                "pagination_depth": max_results_per_subquery,
                # Applied to all the sub-queries independently (this avoids
                # subqueries having a lot of results thrown out during
                # aggregation).
                # Sources:
                # https://docs.opensearch.org/latest/query-dsl/compound/hybrid/
                # https://opensearch.org/blog/introducing-common-filter-support-for-hybrid-search-queries
                # Does AND for each filter in the list.
                "filter": {"bool": {"filter": hybrid_search_filters}},
            }
        }

        final_hybrid_search_body: dict[str, Any] = {
            "query": hybrid_search_query,
            "size": num_hits,
            "timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
            # Exclude retrieving the vector fields in order to save on
            # retrieval cost as we don't need them upstream.
            "_source": {
                "excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
            },
        }

        if not OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED:
            final_hybrid_search_body["highlight"] = (
                DocumentQuery._get_match_highlights_configuration()
            )

        # Explain is for scoring breakdowns. Setting this significantly
        # increases query latency.
        if OPENSEARCH_EXPLAIN_ENABLED:
            final_hybrid_search_body["explain"] = True

        return final_hybrid_search_body

    @staticmethod
    def get_keyword_search_query(
        query_text: str,
        num_hits: int,
        tenant_state: TenantState,
        index_filters: IndexFilters,
        include_hidden: bool,
    ) -> dict[str, Any]:
        """Returns a final keyword search query.

        This query can be directly supplied to the OpenSearch client.

        TODO(andrei): There is some duplicated logic in this function with
        others in this file.

        Args:
            query_text: The text to query for.
            num_hits: The final number of hits to return.
            tenant_state: Tenant state containing the tenant ID.
            index_filters: Filters for the keyword search query.
            include_hidden: Whether to include hidden documents.

        Returns:
            A dictionary representing the final keyword search query.
        """
        if num_hits > DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW:
            raise ValueError(
                f"Bug: num_hits ({num_hits}) is greater than the current maximum allowed "
                f"result window ({DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW})."
            )

        keyword_search_filters = DocumentQuery._get_search_filters(
            tenant_state=tenant_state,
            include_hidden=include_hidden,
            # TODO(andrei): We've done no filtering for PUBLIC_DOC_PAT up to
            # now. This should not cause any issues but it can introduce
            # redundant filters in queries that may affect performance.
            access_control_list=index_filters.access_control_list,
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
            project_id_filter=index_filters.project_id_filter,
            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
            attached_document_ids=index_filters.attached_document_ids,
            hierarchy_node_ids=index_filters.hierarchy_node_ids,
        )

        keyword_search_query = (
            DocumentQuery._get_title_content_combined_keyword_search_query(
                query_text, search_filters=keyword_search_filters
            )
        )

        final_keyword_search_query: dict[str, Any] = {
            "query": keyword_search_query,
            "size": num_hits,
            "timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
            # Exclude retrieving the vector fields in order to save on
            # retrieval cost as we don't need them upstream.
            "_source": {
                "excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
            },
        }

        if not OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED:
            final_keyword_search_query["highlight"] = (
                DocumentQuery._get_match_highlights_configuration()
            )

        if not OPENSEARCH_PROFILING_DISABLED:
            final_keyword_search_query["profile"] = True

        # Explain is for scoring breakdowns. Setting this significantly
        # increases query latency.
        if OPENSEARCH_EXPLAIN_ENABLED:
            final_keyword_search_query["explain"] = True

        return final_keyword_search_query

    @staticmethod
    def get_semantic_search_query(
        query_embedding: list[float],
        num_hits: int,
        tenant_state: TenantState,
        index_filters: IndexFilters,
        include_hidden: bool,
    ) -> dict[str, Any]:
        """Returns a final semantic search query.

        This query can be directly supplied to the OpenSearch client.

        TODO(andrei): There is some duplicated logic in this function with
        others in this file.

        Args:
            query_embedding: The vector embedding of the text to query for.
            num_hits: The final number of hits to return.
            tenant_state: Tenant state containing the tenant ID.
            index_filters: Filters for the semantic search query.
            include_hidden: Whether to include hidden documents.

        Returns:
            A dictionary representing the final semantic search query.
        """
        if num_hits > DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW:
            raise ValueError(
                f"Bug: num_hits ({num_hits}) is greater than the current maximum allowed "
                f"result window ({DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW})."
            )

        semantic_search_filters = DocumentQuery._get_search_filters(
            tenant_state=tenant_state,
            include_hidden=include_hidden,
            # TODO(andrei): We've done no filtering for PUBLIC_DOC_PAT up to
            # now. This should not cause any issues but it can introduce
            # redundant filters in queries that may affect performance.
            access_control_list=index_filters.access_control_list,
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
            project_id_filter=index_filters.project_id_filter,
            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
            attached_document_ids=index_filters.attached_document_ids,
            hierarchy_node_ids=index_filters.hierarchy_node_ids,
        )

        semantic_search_query = (
            DocumentQuery._get_content_vector_similarity_search_query(
                query_embedding,
                vector_candidates=num_hits,
                search_filters=semantic_search_filters,
            )
        )

        final_semantic_search_query: dict[str, Any] = {
            "query": semantic_search_query,
            "size": num_hits,
            "timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
            # Exclude retrieving the vector fields in order to save on
            # retrieval cost as we don't need them upstream.
            "_source": {
                "excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
            },
        }

        if not OPENSEARCH_PROFILING_DISABLED:
            final_semantic_search_query["profile"] = True

        # Explain is for scoring breakdowns. Setting this significantly
        # increases query latency.
        if OPENSEARCH_EXPLAIN_ENABLED:
            final_semantic_search_query["explain"] = True

        return final_semantic_search_query

    @staticmethod
    def get_random_search_query(
        tenant_state: TenantState,
        index_filters: IndexFilters,
        num_to_retrieve: int,
    ) -> dict[str, Any]:
        """Returns a final search query that gets document chunks randomly.

        Args:
            tenant_state: Tenant state containing the tenant ID.
            index_filters: Filters for the random search query.
            num_to_retrieve: Number of document chunks to retrieve.

        Returns:
            A dictionary representing the final random search query.
        """
        search_filters = DocumentQuery._get_search_filters(
            tenant_state=tenant_state,
            include_hidden=False,
            access_control_list=index_filters.access_control_list,
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
            project_id_filter=index_filters.project_id_filter,
            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
            attached_document_ids=index_filters.attached_document_ids,
            hierarchy_node_ids=index_filters.hierarchy_node_ids,
        )
        final_random_search_query = {
            "query": {
                "function_score": {
                    "query": {"bool": {"filter": search_filters}},
                    # See
                    # https://docs.opensearch.org/latest/query-dsl/compound/function-score/#the-random-score-function
                    "random_score": {
                        # We'll use a different seed per invocation.
                        "seed": random.randint(0, 1_000_000),
                        # Some field which has a unique value per document
                        # chunk.
                        "field": "_seq_no",
                    },
                    # Replaces whatever score was computed in the query.
                    "boost_mode": "replace",
                }
            },
            "size": num_to_retrieve,
            "timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
            # Exclude retrieving the vector fields in order to save on
            # retrieval cost as we don't need them upstream.
            "_source": {
                "excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
            },
        }
        if not OPENSEARCH_PROFILING_DISABLED:
            final_random_search_query["profile"] = True

        return final_random_search_query

    @staticmethod
    def _get_hybrid_search_subqueries(
        query_text: str,
        query_vector: list[float],
        # The default number of neighbors to consider for knn vector similarity
        # search. This is higher than the number of results because the scoring
        # is hybrid. For a detailed breakdown, see where the default value is
        # set.
        vector_candidates: int = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,
    ) -> list[dict[str, Any]]:
        """Returns subqueries for hybrid search.

        Each of these subqueries are the "hybrid" component of this search. We
        search on various things and combine results.

        The return of this function is not sufficient to be directly supplied to
        the OpenSearch client. See get_hybrid_search_query.

        Normalization is not performed here.
        The weights of each of these subqueries should be configured in a search
        pipeline.

        The exact subqueries executed depend on the
        HYBRID_SEARCH_SUBQUERY_CONFIGURATION setting.

        NOTE: For OpenSearch, 5 is the maximum number of query clauses allowed
        in a single hybrid query. Source:
        https://docs.opensearch.org/latest/query-dsl/compound/hybrid/

        NOTE: Each query is independent during the search phase; there is no
        backfilling of scores for missing query components. What this means is
        that if a document was a good vector match but did not show up for
        keyword, it gets a score of 0 for the keyword component of the hybrid
        scoring. This is not as bad as just disregarding a score though as there
        is normalization applied after. So really it is "increasing" the missing
        score compared to if it was included and the range was renormalized.
        This does however mean that between docs that have high scores for say
        the vector field, the keyword scores between them are completely ignored
        unless they also showed up in the keyword query as a reasonably high
        match. TLDR, this is a bit of unique funky behavior but it seems ok.

        NOTE: Options considered and rejected:
        - minimum_should_match: Since it's hybrid search and users often provide
          semantic queries, there is often a lot of terms, and very low number
          of meaningful keywords (and a low ratio of keywords).
        - fuzziness AUTO: Typo tolerance (0/1/2 edit distance by term length).
          It's mostly for typos as the analyzer ("english" by default) already
          does some stemming and tokenization. In testing datasets, this makes
          recall slightly worse. It also is less performant so not really any
          reason to do it.

        Args:
            query_text: The text of the query to search for.
            query_vector: The vector embedding of the query to search for.
            num_candidates: The number of candidates to consider for vector
                similarity search.
        """
        # Build sub-queries for hybrid search. Order must match normalization
        # pipeline weights.
        if (
            HYBRID_SEARCH_SUBQUERY_CONFIGURATION
            is HybridSearchSubqueryConfiguration.TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
        ):
            return [
                DocumentQuery._get_title_vector_similarity_search_query(
                    query_vector, vector_candidates
                ),
                DocumentQuery._get_content_vector_similarity_search_query(
                    query_vector, vector_candidates
                ),
                DocumentQuery._get_title_content_combined_keyword_search_query(
                    query_text
                ),
            ]
        elif (
            HYBRID_SEARCH_SUBQUERY_CONFIGURATION
            is HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
        ):
            return [
                DocumentQuery._get_content_vector_similarity_search_query(
                    query_vector, vector_candidates
                ),
                DocumentQuery._get_title_content_combined_keyword_search_query(
                    query_text
                ),
            ]
        else:
            raise ValueError(
                f"Bug: Unhandled hybrid search subquery configuration: {HYBRID_SEARCH_SUBQUERY_CONFIGURATION}"
            )

    @staticmethod
    def _get_title_vector_similarity_search_query(
        query_vector: list[float],
        vector_candidates: int = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,
    ) -> dict[str, Any]:
        return {
            "knn": {
                TITLE_VECTOR_FIELD_NAME: {
                    "vector": query_vector,
                    "k": vector_candidates,
                }
            }
        }

    @staticmethod
    def _get_content_vector_similarity_search_query(
        query_vector: list[float],
        vector_candidates: int = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,
        search_filters: list[dict[str, Any]] | None = None,
    ) -> dict[str, Any]:
        query = {
            "knn": {
                CONTENT_VECTOR_FIELD_NAME: {
                    "vector": query_vector,
                    "k": vector_candidates,
                }
            }
        }

        if search_filters is not None:
            query["knn"][CONTENT_VECTOR_FIELD_NAME]["filter"] = {
                "bool": {"filter": search_filters}
            }

        return query

    @staticmethod
    def _get_title_content_combined_keyword_search_query(
        query_text: str,
        search_filters: list[dict[str, Any]] | None = None,
    ) -> dict[str, Any]:
        query = {
            "bool": {
                "should": [
                    {
                        "match": {
                            TITLE_FIELD_NAME: {
                                "query": query_text,
                                "operator": "or",
                                # The title fields are strongly discounted as
                                # they are included in the content. This just
                                # acts as a minor boost.
                                "boost": 0.1,
                            }
                        }
                    },
                    {
                        "match_phrase": {
                            TITLE_FIELD_NAME: {
                                "query": query_text,
                                "slop": 1,
                                "boost": 0.2,
                            }
                        }
                    },
                    {
                        # Analyzes the query and returns results which match any
                        # of the query's terms. More matches result in higher
                        # scores.
                        "match": {
                            CONTENT_FIELD_NAME: {
                                "query": query_text,
                                "operator": "or",
                                "boost": 1.0,
                            }
                        }
                    },
                    {
                        # Matches an exact phrase in a specified order.
                        "match_phrase": {
                            CONTENT_FIELD_NAME: {
                                "query": query_text,
                                # The number of words permitted between words of
                                # a query phrase and still result in a match.
                                "slop": 1,
                                "boost": 1.5,
                            }
                        }
                    },
                ],
                # Ensures at least one match subquery from the query is present
                # in the document. This defaults to 1, unless a filter or must
                # clause is supplied, in which case it defaults to 0.
                "minimum_should_match": 1,
            }
        }

        if search_filters is not None:
            query["bool"]["filter"] = search_filters

        return query

    @staticmethod
    def _get_search_filters(
        tenant_state: TenantState,
        include_hidden: bool,
        access_control_list: list[str] | None,
        source_types: list[DocumentSource],
        tags: list[Tag],
        document_sets: list[str],
        project_id_filter: int | None,
        persona_id_filter: int | None,
        time_cutoff: datetime | None,
        min_chunk_index: int | None,
        max_chunk_index: int | None,
        max_chunk_size: int | None = None,
        document_id: str | None = None,
        # Assistant knowledge filters
        attached_document_ids: list[str] | None = None,
        hierarchy_node_ids: list[int] | None = None,
    ) -> list[dict[str, Any]]:
        """Returns filters to be passed into the "filter" key of a search query.

        The "filter" key applies a logical AND operator to its elements, so
        every subfilter must evaluate to true in order for the document to be
        retrieved. This function returns a list of such subfilters.
        See https://docs.opensearch.org/latest/query-dsl/compound/bool/.

        TODO(ENG-3874): The terms queries returned by this function can be made
        more performant for large cardinality sets by sorting the values by
        their UTF-8 byte order.

        TODO(ENG-3875): This function can take even better advantage of filter
        caching by grouping "static" filters together into one sub-clause.

        Args:
            tenant_state: Tenant state containing the tenant ID.
            include_hidden: Whether to include hidden documents.
            access_control_list: Access control list for the documents to
                retrieve. If None, there is no restriction on the documents that
                can be retrieved. If not None, only public documents can be
                retrieved, or non-public documents where at least one acl
                provided here is present in the document's acl list.
            source_types: If supplied, only documents of one of these source
                types will be retrieved.
            tags: If supplied, only documents with an entry in their metadata
                list corresponding to a tag will be retrieved.
            document_sets: If supplied, only documents with at least one
                document set ID from this list will be retrieved.
            project_id_filter: If not None, only documents with this project ID
                in user projects will be retrieved. Additive — only applied
                when a knowledge scope already exists.
            persona_id_filter: If not None, only documents whose personas array
                contains this persona ID will be retrieved. Primary — creates
                a knowledge scope on its own.
            time_cutoff: Time cutoff for the documents to retrieve. If not None,
                Documents which were last updated before this date will not be
                returned. For documents which do not have a value for their last
                updated time, we assume some default age of
                ASSUMED_DOCUMENT_AGE_DAYS for when the document was last
                updated.
            min_chunk_index: The minimum chunk index to retrieve, inclusive. If
                None, no minimum chunk index will be applied.
            max_chunk_index: The maximum chunk index to retrieve, inclusive. If
                None, no maximum chunk index will be applied.
            max_chunk_size: The type of chunk to retrieve, specified by the
                maximum number of tokens it can hold. If None, no filter will be
                applied for this. Defaults to None.
                NOTE: See DocumentChunk.max_chunk_size.
            document_id: The document ID to retrieve. If None, no filter will be
                applied for this. Defaults to None.
            attached_document_ids: Document IDs explicitly attached to the
                assistant. If provided along with hierarchy_node_ids, documents
                matching EITHER criteria will be retrieved (OR logic).
            hierarchy_node_ids: Hierarchy node IDs (folders/spaces) attached to
                the assistant. Matches chunks where ancestor_hierarchy_node_ids
                contains any of these values.

        Raises:
            ValueError: document_id and attached_document_ids were supplied
                together. This is not allowed because they operate on the same
                schema field, and it does not semantically make sense to use
                them together.
            ValueError: Too many of one of the collection arguments was
                supplied.

        Returns:
            A list of filters to be passed into the "filter" key of a search
                query.
        """

        def _get_acl_visibility_filter(
            access_control_list: list[str],
        ) -> dict[str, dict[str, list[TermQuery[bool] | TermsQuery[str]] | int]]:
            """Returns a filter for the access control list.

            Since this returns an isolated bool should clause, it can be cached
            in OpenSearch independently of other clauses in _get_search_filters.

            Args:
                access_control_list: The access control list to restrict
                    documents to.

            Raises:
                ValueError: The number of access control list entries is greater
                    than MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.

            Returns:
                A filter for the access control list.
            """
            # Logical OR operator on its elements.
            acl_visibility_filter: dict[str, dict[str, Any]] = {
                "bool": {
                    "should": [{"term": {PUBLIC_FIELD_NAME: {"value": True}}}],
                    "minimum_should_match": 1,
                }
            }
            if access_control_list:
                if len(access_control_list) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:
                    raise ValueError(
                        f"Too many access control list entries: {len(access_control_list)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}."
                    )
                # Use terms instead of a list of term within a should clause
                # because Lucene will optimize the filtering for large sets of
                # terms. Small sets of terms are not expected to perform any
                # differently than individual term clauses.
                acl_subclause: TermsQuery[str] = {
                    "terms": {ACCESS_CONTROL_LIST_FIELD_NAME: list(access_control_list)}
                }
                acl_visibility_filter["bool"]["should"].append(acl_subclause)
            return acl_visibility_filter

        def _get_source_type_filter(
            source_types: list[DocumentSource],
        ) -> TermsQuery[str]:
            """Returns a filter for the source types.

            Since this returns an isolated terms clause, it can be cached in
            OpenSearch independently of other clauses in _get_search_filters.

            Args:
                source_types: The source types to restrict documents to.

            Raises:
                ValueError: The number of source types is greater than
                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.
                ValueError: An empty list was supplied.

            Returns:
                A filter for the source types.
            """
            if not source_types:
                raise ValueError(
                    "source_types cannot be empty if trying to create a source type filter."
                )
            if len(source_types) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:
                raise ValueError(
                    f"Too many source types: {len(source_types)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}."
                )
            # Use terms instead of a list of term within a should clause because
            # Lucene will optimize the filtering for large sets of terms. Small
            # sets of terms are not expected to perform any differently than
            # individual term clauses.
            return {
                "terms": {
                    SOURCE_TYPE_FIELD_NAME: [
                        source_type.value for source_type in source_types
                    ]
                }
            }

        def _get_tag_filter(tags: list[Tag]) -> TermsQuery[str]:
            """Returns a filter for the tags.

            Since this returns an isolated terms clause, it can be cached in
            OpenSearch independently of other clauses in _get_search_filters.

            Args:
                tags: The tags to restrict documents to.

            Raises:
                ValueError: The number of tags is greater than
                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.
                ValueError: An empty list was supplied.

            Returns:
                A filter for the tags.
            """
            if not tags:
                raise ValueError(
                    "tags cannot be empty if trying to create a tag filter."
                )
            if len(tags) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:
                raise ValueError(
                    f"Too many tags: {len(tags)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}."
                )
            # Kind of an abstraction leak, see
            # convert_metadata_dict_to_list_of_strings for why metadata list
            # entries are expected to look this way.
            tag_str_list = [
                f"{tag.tag_key}{INDEX_SEPARATOR}{tag.tag_value}" for tag in tags
            ]
            # Use terms instead of a list of term within a should clause because
            # Lucene will optimize the filtering for large sets of terms. Small
            # sets of terms are not expected to perform any differently than
            # individual term clauses.
            return {"terms": {METADATA_LIST_FIELD_NAME: tag_str_list}}

        def _get_document_set_filter(document_sets: list[str]) -> TermsQuery[str]:
            """Returns a filter for the document sets.

            Since this returns an isolated terms clause, it can be cached in
            OpenSearch independently of other clauses in _get_search_filters.

            Args:
                document_sets: The document sets to restrict documents to.

            Raises:
                ValueError: The number of document sets is greater than
                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.
                ValueError: An empty list was supplied.

            Returns:
                A filter for the document sets.
            """
            if not document_sets:
                raise ValueError(
                    "document_sets cannot be empty if trying to create a document set filter."
                )
            if len(document_sets) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:
                raise ValueError(
                    f"Too many document sets: {len(document_sets)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}."
                )
            # Use terms instead of a list of term within a should clause because
            # Lucene will optimize the filtering for large sets of terms. Small
            # sets of terms are not expected to perform any differently than
            # individual term clauses.
            return {"terms": {DOCUMENT_SETS_FIELD_NAME: list(document_sets)}}

        def _get_user_project_filter(project_id: int) -> TermQuery[int]:
            return {"term": {USER_PROJECTS_FIELD_NAME: {"value": project_id}}}

        def _get_persona_filter(persona_id: int) -> TermQuery[int]:
            return {"term": {PERSONAS_FIELD_NAME: {"value": persona_id}}}

        def _get_time_cutoff_filter(time_cutoff: datetime) -> dict[str, Any]:
            # Convert to UTC if not already so the cutoff is comparable to the
            # document data.
            time_cutoff = set_or_convert_timezone_to_utc(time_cutoff)
            # Logical OR operator on its elements.
            time_cutoff_filter: dict[str, Any] = {
                "bool": {"should": [], "minimum_should_match": 1}
            }
            time_cutoff_filter["bool"]["should"].append(
                {
                    "range": {
                        LAST_UPDATED_FIELD_NAME: {"gte": int(time_cutoff.timestamp())}
                    }
                }
            )
            if time_cutoff < datetime.now(timezone.utc) - timedelta(
                days=ASSUMED_DOCUMENT_AGE_DAYS
            ):
                # Since the time cutoff is older than ASSUMED_DOCUMENT_AGE_DAYS
                # ago, we include documents which have no
                # LAST_UPDATED_FIELD_NAME value.
                time_cutoff_filter["bool"]["should"].append(
                    {
                        "bool": {
                            "must_not": {"exists": {"field": LAST_UPDATED_FIELD_NAME}}
                        }
                    }
                )
            return time_cutoff_filter

        def _get_chunk_index_filter(
            min_chunk_index: int | None, max_chunk_index: int | None
        ) -> dict[str, Any]:
            range_clause: dict[str, Any] = {"range": {CHUNK_INDEX_FIELD_NAME: {}}}
            if min_chunk_index is not None:
                range_clause["range"][CHUNK_INDEX_FIELD_NAME]["gte"] = min_chunk_index
            if max_chunk_index is not None:
                range_clause["range"][CHUNK_INDEX_FIELD_NAME]["lte"] = max_chunk_index
            return range_clause

        def _get_attached_document_id_filter(
            doc_ids: list[str],
        ) -> TermsQuery[str]:
            """
            Returns a filter for documents explicitly attached to an assistant.

            Since this returns an isolated terms clause, it can be cached in
            OpenSearch independently of other clauses in _get_search_filters.

            Args:
                doc_ids: The document IDs to restrict documents to.

            Raises:
                ValueError: The number of document IDs is greater than
                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.
                ValueError: An empty list was supplied.

            Returns:
                A filter for the document IDs.
            """
            if not doc_ids:
                raise ValueError(
                    "doc_ids cannot be empty if trying to create a document ID filter."
                )
            if len(doc_ids) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:
                raise ValueError(
                    f"Too many document IDs: {len(doc_ids)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}."
                )
            # Use terms instead of a list of term within a should clause because
            # Lucene will optimize the filtering for large sets of terms. Small
            # sets of terms are not expected to perform any differently than
            # individual term clauses.
            return {"terms": {DOCUMENT_ID_FIELD_NAME: list(doc_ids)}}

        def _get_hierarchy_node_filter(
            node_ids: list[int],
        ) -> TermsQuery[int]:
            """
            Returns a filter for chunks whose ancestors include any of the given
            hierarchy nodes.

            Since this returns an isolated terms clause, it can be cached in
            OpenSearch independently of other clauses in _get_search_filters.

            Args:
                node_ids: The hierarchy node IDs to restrict documents to.

            Raises:
                ValueError: The number of hierarchy node IDs is greater than
                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.
                ValueError: An empty list was supplied.

            Returns:
                A filter for the hierarchy node IDs.
            """
            if not node_ids:
                raise ValueError(
                    "node_ids cannot be empty if trying to create a hierarchy node ID filter."
                )
            if len(node_ids) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:
                raise ValueError(
                    f"Too many hierarchy node IDs: {len(node_ids)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}."
                )
            # Use terms instead of a list of term within a should clause because
            # Lucene will optimize the filtering for large sets of terms. Small
            # sets of terms are not expected to perform any differently than
            # individual term clauses.
            return {"terms": {ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME: list(node_ids)}}

        if document_id is not None and attached_document_ids is not None:
            raise ValueError(
                "document_id and attached_document_ids cannot be used together."
            )

        filter_clauses: list[dict[str, Any]] = []

        if not include_hidden:
            filter_clauses.append({"term": {HIDDEN_FIELD_NAME: {"value": False}}})

        if access_control_list is not None:
            # If an access control list is provided, the caller can only
            # retrieve public documents, and non-public documents where at least
            # one acl provided here is present in the document's acl list. If
            # there is explicitly no list provided, we make no restrictions on
            # the documents that can be retrieved.
            filter_clauses.append(_get_acl_visibility_filter(access_control_list))

        if source_types:
            # If at least one source type is provided, the caller will only
            # retrieve documents whose source type is present in this input
            # list.
            filter_clauses.append(_get_source_type_filter(source_types))

        if tags:
            # If at least one tag is provided, the caller will only retrieve
            # documents where at least one tag provided here is present in the
            # document's metadata list.
            filter_clauses.append(_get_tag_filter(tags))

        # Knowledge scope: explicit knowledge attachments restrict what an
        # assistant can see. When none are set the assistant searches
        # everything.
        #
        # persona_id_filter is a primary trigger — a persona with user files IS
        # explicit knowledge, so it can start a knowledge scope on its own.
        #
        # project_id_filter is additive — it widens the scope to also cover
        # overflowing project files but never restricts on its own (a chat
        # inside a project should still search team knowledge).
        has_knowledge_scope = (
            attached_document_ids
            or hierarchy_node_ids
            or document_sets
            or persona_id_filter is not None
        )

        if has_knowledge_scope:
            # Since this returns an isolated bool should clause, it can be
            # cached in OpenSearch independently of other clauses in
            # _get_search_filters.
            knowledge_filter: dict[str, Any] = {
                "bool": {"should": [], "minimum_should_match": 1}
            }
            if attached_document_ids:
                knowledge_filter["bool"]["should"].append(
                    _get_attached_document_id_filter(attached_document_ids)
                )
            if hierarchy_node_ids:
                knowledge_filter["bool"]["should"].append(
                    _get_hierarchy_node_filter(hierarchy_node_ids)
                )
            if document_sets:
                knowledge_filter["bool"]["should"].append(
                    _get_document_set_filter(document_sets)
                )
            if persona_id_filter is not None:
                knowledge_filter["bool"]["should"].append(
                    _get_persona_filter(persona_id_filter)
                )
            if project_id_filter is not None:
                knowledge_filter["bool"]["should"].append(
                    _get_user_project_filter(project_id_filter)
                )
            filter_clauses.append(knowledge_filter)

        if time_cutoff is not None:
            # If a time cutoff is provided, the caller will only retrieve
            # documents where the document was last updated at or after the time
            # cutoff. For documents which do not have a value for
            # LAST_UPDATED_FIELD_NAME, we assume some default age for the
            # purposes of time cutoff.
            filter_clauses.append(_get_time_cutoff_filter(time_cutoff))

        if min_chunk_index is not None or max_chunk_index is not None:
            filter_clauses.append(
                _get_chunk_index_filter(min_chunk_index, max_chunk_index)
            )

        if document_id is not None:
            filter_clauses.append(
                {"term": {DOCUMENT_ID_FIELD_NAME: {"value": document_id}}}
            )

        if max_chunk_size is not None:
            filter_clauses.append(
                {"term": {MAX_CHUNK_SIZE_FIELD_NAME: {"value": max_chunk_size}}}
            )

        if tenant_state.multitenant:
            filter_clauses.append(
                {"term": {TENANT_ID_FIELD_NAME: {"value": tenant_state.tenant_id}}}
            )

        return filter_clauses

    @staticmethod
    def _get_match_highlights_configuration() -> dict[str, Any]:
        """
        Gets configuration for returning match highlights for a hit.
        """
        match_highlights_configuration: dict[str, Any] = {
            "fields": {
                CONTENT_FIELD_NAME: {
                    # See https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/#highlighter-types
                    "type": "unified",
                    # The length in chars of a match snippet. Somewhat
                    # arbitrarily-chosen. The Vespa codepath limited total
                    # highlights length to 400 chars. fragment_size *
                    # number_of_fragments = 400 should be good enough.
                    "fragment_size": 100,
                    # The number of snippets to return per field per document
                    # hit.
                    "number_of_fragments": 4,
                    # These tags wrap matched keywords and they match what Vespa
                    # used to return. Use them to minimize changes to our code.
                    "pre_tags": ["<hi>"],
                    "post_tags": ["</hi>"],
                }
            }
        }

        return match_highlights_configuration


================================================
FILE: backend/onyx/document_index/opensearch/string_filtering.py
================================================
import re

MAX_DOCUMENT_ID_ENCODED_LENGTH: int = 512


class DocumentIDTooLongError(ValueError):
    """Raised when a document ID is too long for OpenSearch after filtering."""


def filter_and_validate_document_id(
    document_id: str, max_encoded_length: int = MAX_DOCUMENT_ID_ENCODED_LENGTH
) -> str:
    """
    Filters and validates a document ID such that it can be used as an ID in
    OpenSearch.

    OpenSearch imposes the following restrictions on IDs:
    - Must not be an empty string.
    - Must not exceed 512 bytes.
    - Must not contain any control characters (newline, etc.).
    - Must not contain URL-unsafe characters (#, ?, /, %, &, etc.).

    For extra resilience, this function simply removes all characters that are
    not alphanumeric or one of _.-~.

    Any query on document ID should use this function.

    Args:
        document_id: The document ID to filter and validate.
        max_encoded_length: The maximum length of the document ID after
            filtering in bytes. Compared with >= for extra resilience, so
            encoded values of this length will fail.

    Raises:
        DocumentIDTooLongError: If the document ID is too long after filtering.
        ValueError: If the document ID is empty after filtering.

    Returns:
        str: The filtered document ID.
    """
    filtered_document_id = re.sub(r"[^A-Za-z0-9_.\-~]", "", document_id)
    if not filtered_document_id:
        raise ValueError(f"Document ID {document_id} is empty after filtering.")
    if len(filtered_document_id.encode("utf-8")) >= max_encoded_length:
        raise DocumentIDTooLongError(
            f"Document ID {document_id} is too long after filtering."
        )
    return filtered_document_id


================================================
FILE: backend/onyx/document_index/vespa/__init__.py
================================================


================================================
FILE: backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd.jinja
================================================
schema {{ schema_name }} {
    # source, type, target triplets for kg_relationships
    struct kg_relationship {
        field source type string {}
        field rel_type type string {}
        field target type string {}
    }

    document {{ schema_name }} {
        {% if multi_tenant %}
        field tenant_id type string {
            indexing: summary | attribute
            rank: filter
            attribute: fast-search
        }
        {% endif %}
        # Not to be confused with the UUID generated for this chunk which is called documentid by default
        field document_id type string {
            indexing: summary | attribute
            rank: filter
            attribute: fast-search
        }
        field chunk_id type int {
            indexing: summary | attribute
        }
        # Displayed in the UI as the main identifier for the doc
        field semantic_identifier type string {
            indexing: summary | attribute
        }
        # Must have an additional field for whether to skip title embeddings
        # This information cannot be extracted from either the title field nor title embedding
        field skip_title type bool {
            indexing: attribute
        }
        # May not always match the `semantic_identifier` e.g. for Slack docs the
        # `semantic_identifier` will be the channel name, but the `title` will be empty
        field title type string {
            indexing: summary | index | attribute
            index: enable-bm25
        }
        field content type string {
            indexing: summary | index
            index: enable-bm25
        }
        # duplication of `content` is far from ideal, but is needed for
        # non-gram based highlighting for now. If the capability to re-use a
        # single field to do both is added, `content_summary` should be removed
        field content_summary type string {
            indexing: summary | index
            summary: dynamic
        }
        # Title embedding (x1)
        field title_embedding type tensor<{{ embedding_precision }}>(x[{{ dim }}]) {
            indexing: attribute | index
            attribute {
                distance-metric: angular
            }
        }
        # Content embeddings (chunk + optional mini chunks embeddings)
        # "t" and "x" are arbitrary names, not special keywords
        field embeddings type tensor<{{ embedding_precision }}>(t{},x[{{ dim }}]) {
            indexing: attribute | index
            attribute {
                distance-metric: angular
            }
        }
        # Starting section of the doc, currently unused as it has been replaced by match highlighting
        field blurb type string {
            indexing: summary | attribute
        }
        field image_file_name type string {
            indexing: summary | attribute
        }
        # https://docs.vespa.ai/en/attributes.html potential enum store for speed, but probably not worth it
        field source_type type string {
            indexing: summary | attribute
            rank: filter
            attribute: fast-search
        }
        # Can also index links https://docs.vespa.ai/en/reference/schema-reference.html#attribute
        # URL type matching
        field source_links type string {
            indexing: summary | attribute
        }
        field section_continuation type bool {
            indexing: summary | attribute
        }
        # Technically this one should be int, but can't change without causing breaks to existing index
        field boost type float {
            indexing: summary | attribute
        }
        field hidden type bool {
            indexing: summary | attribute
            rank: filter
        }
        # Field to indicate whether a short chunk is a low content chunk
        field aggregated_chunk_boost_factor type float {
            indexing: attribute
        }

        # Separate array fields for knowledge graph data
        field kg_entities type array<string> {
            indexing: summary | attribute
            attribute: fast-search
        }

        field kg_relationships type array<kg_relationship> {
            indexing: summary
            struct-field source {
                indexing: attribute
                attribute: fast-search
            }
            struct-field rel_type {
                indexing: attribute
                attribute: fast-search
            }
            struct-field target {
                indexing: attribute
                attribute: fast-search
            }
        }

        field kg_terms type array<string> {
            indexing: summary | attribute
            attribute: fast-search
        }

        # Needs to have a separate Attribute list for efficient filtering
        field metadata_list type array<string> {
            indexing: summary | attribute
            rank:filter
            attribute: fast-search
        }
        # If chunk is a large chunk, this will contain the ids of the smaller chunks
        field large_chunk_reference_ids type array<int> {
            indexing: summary | attribute
        }
        field metadata type string {
            indexing: summary | attribute
        }
        field chunk_context type string {
            indexing: summary | attribute
        }
        field doc_summary type string {
            indexing: summary | attribute
        }
        field metadata_suffix type string {
            indexing: summary | attribute
        }
        field doc_updated_at type int {
            indexing: summary | attribute
        }
        field primary_owners type array<string> {
            indexing: summary | attribute
        }
        field secondary_owners type array<string> {
            indexing: summary | attribute
        }
        field access_control_list type weightedset<string> {
            indexing: summary | attribute
            rank: filter
            attribute: fast-search
        }
        field document_sets type weightedset<string> {
            indexing: summary | attribute
            rank: filter
            attribute: fast-search
        }
        field user_file type int {
            indexing: summary | attribute
            rank: filter
            attribute: fast-search
        }
        field user_folder type int {
            indexing: summary | attribute
            rank: filter
            attribute: fast-search
        }
        field user_project type array<int> {
            indexing: summary | attribute
            rank: filter
            attribute: fast-search
        }
        field personas type array<int> {
            indexing: summary | attribute
            rank: filter
            attribute: fast-search
        }
    }

    # If using different tokenization settings, the fieldset has to be removed, and the field must
    # be specified in the yql like:
    # + 'or ({grammar: "weakAnd", defaultIndex:"title"}userInput(@query)) '
    # + 'or ({grammar: "weakAnd", defaultIndex:"content"}userInput(@query)) '
    # Note: for BM-25, the ngram size (and whether ngrams are used) changes the range of the scores
    fieldset default {
        fields: content, title
    }

    rank-profile default_rank {
        inputs {
            query(decay_factor) double
        }

        function inline document_boost() {
            # 0.5 to 2x score: piecewise sigmoid function stretched out by factor of 3
            # meaning requires 3x the number of feedback votes to have default sigmoid effect
            expression: if(attribute(boost) < 0, 0.5 + (1 / (1 + exp(-attribute(boost) / 3))), 2 / (1 + exp(-attribute(boost) / 3)))
        }

        function inline document_age() {
            # Time in years (91.3 days ~= 3 Months ~= 1 fiscal quarter if no age found)
            expression: max(if(isNan(attribute(doc_updated_at)) == 1, 7890000, now() - attribute(doc_updated_at)) / 31536000, 0)
        }

        function inline aggregated_chunk_boost() {
            # Aggregated boost factor, currently only used for information content classification
            expression: if(isNan(attribute(aggregated_chunk_boost_factor)) == 1, 1.0, attribute(aggregated_chunk_boost_factor))
        }

        # Document score decays from 1 to 0.75 as age of last updated time increases
        function inline recency_bias() {
            expression: max(1 / (1 + query(decay_factor) * document_age), 0.75)
        }

        match-features: recency_bias
    }

    rank-profile hybrid_search_semantic_base_{{ dim }} inherits default, default_rank {
        inputs {
            query(query_embedding) tensor<float>(x[{{ dim }}])
        }

        function title_vector_score() {
            expression {
                # If no good matching titles, then it should use the context embeddings rather than having some
                # irrelevant title have a vector score of 1. This way at least it will be the doc with the highest
                # matching content score getting the full score
                max(closeness(field, embeddings), closeness(field, title_embedding))
            }
        }

        # First phase must be vector to allow hits that have no keyword matches
        first-phase {
            expression: query(title_content_ratio) * closeness(field, title_embedding) + (1 - query(title_content_ratio)) * closeness(field, embeddings)
        }

        # Weighted average between Vector Search and BM-25
        global-phase {
            expression {
                (
                    # Weighted Vector Similarity Score
                    (
                        query(alpha) * (
                            (query(title_content_ratio) * normalize_linear(title_vector_score))
                            +
                            ((1 - query(title_content_ratio)) * normalize_linear(closeness(field, embeddings)))
                        )
                    )

                    +

                    # Weighted Keyword Similarity Score
                    # Note: for the BM25 Title score, it requires decent stopword removal in the query
                    # This needs to be the case so there aren't irrelevant titles being normalized to a score of 1
                    (
                        (1 - query(alpha)) * (
                            (query(title_content_ratio) * normalize_linear(bm25(title)))
                            +
                            ((1 - query(title_content_ratio)) * normalize_linear(bm25(content)))
                        )
                    )
                )
                # Boost based on user feedback
                * document_boost
                # Decay factor based on time document was last updated
                * recency_bias
                # Boost based on aggregated boost calculation
                * aggregated_chunk_boost
            }
            # Target hits for hybrid retrieval should be at least this value.
            rerank-count: 1000
        }

        match-features {
            bm25(title)
            bm25(content)
            closeness(field, title_embedding)
            closeness(field, embeddings)
            document_boost
            recency_bias
            aggregated_chunk_boost
            closest(embeddings)
        }
    }


    rank-profile hybrid_search_keyword_base_{{ dim }} inherits default, default_rank {
        inputs {
            query(query_embedding) tensor<float>(x[{{ dim }}])
        }

        function title_vector_score() {
            expression {
                # If no good matching titles, then it should use the context embeddings rather than having some
                # irrelevant title have a vector score of 1. This way at least it will be the doc with the highest
                # matching content score getting the full score
                max(closeness(field, embeddings), closeness(field, title_embedding))
            }
        }

        # First phase must be vector to allow hits that have no keyword matches
        first-phase {
            expression: query(title_content_ratio) * bm25(title) + (1 - query(title_content_ratio)) * bm25(content)
        }

        # Weighted average between Vector Search and BM-25
        global-phase {
            expression {
                (
                    # Weighted Vector Similarity Score
                    (
                        query(alpha) * (
                            (query(title_content_ratio) * normalize_linear(title_vector_score))
                            +
                            ((1 - query(title_content_ratio)) * normalize_linear(closeness(field, embeddings)))
                        )
                    )

                    +

                    # Weighted Keyword Similarity Score
                    # Note: for the BM25 Title score, it requires decent stopword removal in the query
                    # This needs to be the case so there aren't irrelevant titles being normalized to a score of 1
                    (
                        (1 - query(alpha)) * (
                            (query(title_content_ratio) * normalize_linear(bm25(title)))
                            +
                            ((1 - query(title_content_ratio)) * normalize_linear(bm25(content)))
                        )
                    )
                )
                # Boost based on user feedback
                * document_boost
                # Decay factor based on time document was last updated
                * recency_bias
                # Boost based on aggregated boost calculation
                * aggregated_chunk_boost
            }
            # Target hits for hybrid retrieval should be at least this value.
            rerank-count: 1000
        }

        match-features {
            bm25(title)
            bm25(content)
            closeness(field, title_embedding)
            closeness(field, embeddings)
            document_boost
            recency_bias
            aggregated_chunk_boost
            closest(embeddings)
        }
    }

    # Used when searching from the admin UI for a specific doc to hide / boost
    # Very heavily prioritize title
    rank-profile admin_search inherits default, default_rank {
        first-phase {
            expression: bm25(content) + (5 * bm25(title))
        }
    }

    rank-profile random_ inherits default {
        first-phase {
            expression: random
        }
    }
}


================================================
FILE: backend/onyx/document_index/vespa/app_config/services.xml.jinja
================================================
<?xml version="1.0" encoding="utf-8" ?>
<services version="1.0">
    <container id="default" version="1.0">
        <document-api/>
        <search/>
        <http>
            <server id="default" port="8081"/>
        </http>
        <nodes>
            <node hostalias="danswer-node" />
        </nodes>
    </container>
    <content id="danswer_index" version="1.0">
        <redundancy>1</redundancy>
        <documents>
            <!-- <document type="danswer_chunk" mode="index" /> -->
            {{ document_elements }}
        </documents>
        <nodes>
            <node hostalias="danswer-node" distribution-key="0" />
        </nodes>
        <tuning>
            <resource-limits>
                <!-- Default is 75% but this can be increased for Dockerized deployments -->
                <!-- https://docs.vespa.ai/en/operations/feed-block.html -->
                <disk>0.85</disk>
            </resource-limits>
        </tuning>
        <engine>    
            <proton>
                <tuning>
                    <searchnode>
                        <requestthreads>
                            <persearch>{{ num_search_threads }}</persearch>
                        </requestthreads>
                    </searchnode>
                </tuning>
            </proton>
        </engine>
        <config name="vespa.config.search.summary.juniperrc">
            <max_matches>3</max_matches>
            <length>750</length>
            <surround_max>350</surround_max>
            <min_length>300</min_length>
        </config>
    </content>
</services>

================================================
FILE: backend/onyx/document_index/vespa/app_config/validation-overrides.xml.jinja
================================================
<validation-overrides>
    <allow
        until="{{ until_date }}"
        comment="We need to be able to create/delete indices for swapping models">schema-removal</allow>
    <allow
        until="{{ until_date }}"
        comment="We need to be able to update the schema for updates to the Onyx schema">indexing-change</allow>
    <allow 
        until="{{ until_date }}"
        comment="Prevents old alt indices from interfering with changes">field-type-change</allow>
</validation-overrides>


================================================
FILE: backend/onyx/document_index/vespa/chunk_retrieval.py
================================================
import json
import string
import time
from collections.abc import Callable
from collections.abc import Mapping
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast

import httpx
from retry import retry

from onyx.background.celery.tasks.opensearch_migration.constants import (
    FINISHED_VISITING_SLICE_CONTINUATION_TOKEN,
)
from onyx.background.celery.tasks.opensearch_migration.transformer import (
    FIELDS_NEEDED_FOR_TRANSFORMATION,
)
from onyx.configs.app_configs import LOG_VESPA_TIMING_INFORMATION
from onyx.configs.app_configs import VESPA_LANGUAGE_OVERRIDE
from onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S
from onyx.configs.app_configs import VESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
    build_vespa_filters,
)
from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
    build_vespa_id_based_retrieval_yql,
)
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import BLURB
from onyx.document_index.vespa_constants import BOOST
from onyx.document_index.vespa_constants import CHUNK_CONTEXT
from onyx.document_index.vespa_constants import CHUNK_ID
from onyx.document_index.vespa_constants import CONTENT
from onyx.document_index.vespa_constants import CONTENT_SUMMARY
from onyx.document_index.vespa_constants import DOC_SUMMARY
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_ID
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
from onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
from onyx.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE
from onyx.document_index.vespa_constants import MAX_OR_CONDITIONS
from onyx.document_index.vespa_constants import METADATA
from onyx.document_index.vespa_constants import METADATA_SUFFIX
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
from onyx.document_index.vespa_constants import SEARCH_ENDPOINT
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
from onyx.document_index.vespa_constants import SECTION_CONTINUATION
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import YQL_BASE
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


def _process_dynamic_summary(
    dynamic_summary: str, max_summary_length: int = 400
) -> list[str]:
    if not dynamic_summary:
        return []

    current_length = 0
    processed_summary: list[str] = []
    for summary_section in dynamic_summary.split("<sep />"):
        # if we're past the desired max length, break at the last word
        if current_length + len(summary_section) >= max_summary_length:
            summary_section = summary_section[: max_summary_length - current_length]
            summary_section = summary_section.lstrip()  # remove any leading whitespace

            # handle the case where the truncated section is either just a
            # single (partial) word or if it's empty
            first_space = summary_section.find(" ")
            if first_space == -1:
                # add ``...`` to previous section
                if processed_summary:
                    processed_summary[-1] += "..."
                break

            # handle the valid truncated section case
            summary_section = summary_section.rsplit(" ", 1)[0]
            if summary_section[-1] in string.punctuation:
                summary_section = summary_section[:-1]
            summary_section += "..."
            processed_summary.append(summary_section)
            break

        processed_summary.append(summary_section)
        current_length += len(summary_section)

    return processed_summary


def _vespa_hit_to_inference_chunk(
    hit: dict[str, Any], null_score: bool = False
) -> InferenceChunkUncleaned:
    fields = cast(dict[str, Any], hit["fields"])

    # parse fields that are stored as strings, but are really json / datetime
    metadata = json.loads(fields[METADATA]) if METADATA in fields else {}
    updated_at = (
        datetime.fromtimestamp(fields[DOC_UPDATED_AT], tz=timezone.utc)
        if DOC_UPDATED_AT in fields
        else None
    )

    match_highlights = _process_dynamic_summary(
        # fallback to regular `content` if the `content_summary` field
        # isn't present
        dynamic_summary=hit["fields"].get(CONTENT_SUMMARY, hit["fields"][CONTENT]),
    )
    semantic_identifier = fields.get(SEMANTIC_IDENTIFIER, "")
    if not semantic_identifier:
        logger.error(
            f"Chunk with blurb: {fields.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier"
        )

    source_links = fields.get(SOURCE_LINKS, {})
    source_links_dict_unprocessed = (
        json.loads(source_links) if isinstance(source_links, str) else source_links
    )
    source_links_dict = {
        int(k): v
        for k, v in cast(dict[str, str], source_links_dict_unprocessed).items()
    }

    return InferenceChunkUncleaned(
        chunk_id=fields[CHUNK_ID],
        blurb=fields.get(BLURB, ""),  # Unused
        content=fields[CONTENT],  # Includes extra title prefix and metadata suffix;
        # also sometimes context for contextual rag
        source_links=source_links_dict or {0: ""},
        section_continuation=fields[SECTION_CONTINUATION],
        document_id=fields[DOCUMENT_ID],
        source_type=fields[SOURCE_TYPE],
        # still called `image_file_name` in Vespa for backwards compatibility
        image_file_id=fields.get(IMAGE_FILE_NAME),
        title=fields.get(TITLE),
        semantic_identifier=fields[SEMANTIC_IDENTIFIER],
        boost=fields.get(BOOST, 1),
        score=None if null_score else hit.get("relevance", 0),
        hidden=fields.get(HIDDEN, False),
        primary_owners=fields.get(PRIMARY_OWNERS),
        secondary_owners=fields.get(SECONDARY_OWNERS),
        large_chunk_reference_ids=fields.get(LARGE_CHUNK_REFERENCE_IDS, []),
        metadata=metadata,
        metadata_suffix=fields.get(METADATA_SUFFIX),
        doc_summary=fields.get(DOC_SUMMARY, ""),
        chunk_context=fields.get(CHUNK_CONTEXT, ""),
        match_highlights=match_highlights,
        updated_at=updated_at,
    )


def get_chunks_via_visit_api(
    chunk_request: VespaChunkRequest,
    index_name: str,
    filters: IndexFilters,
    field_names: list[str] | None = None,
    get_large_chunks: bool = False,
    short_tensor_format: bool = False,
) -> list[dict]:
    # Constructing the URL for the Visit API
    # NOTE: visit API uses the same URL as the document API, but with different params
    url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)

    # build the list of fields to retrieve
    field_set_list = (
        [f"{field_name}" for field_name in field_names] if field_names else []
    )
    acl_fieldset_entry = f"{ACCESS_CONTROL_LIST}"
    if (
        field_set_list
        and filters.access_control_list
        and acl_fieldset_entry not in field_set_list
    ):
        field_set_list.append(acl_fieldset_entry)

    if MULTI_TENANT:
        tenant_id_fieldset_entry = f"{TENANT_ID}"
        if field_set_list and tenant_id_fieldset_entry not in field_set_list:
            field_set_list.append(tenant_id_fieldset_entry)

    if field_set_list:
        field_set = f"{index_name}:" + ",".join(field_set_list)
    else:
        field_set = None

    # build filters
    selection = f"{index_name}.document_id=='{chunk_request.document_id}'"

    if chunk_request.is_capped:
        selection += f" and {index_name}.chunk_id>={chunk_request.min_chunk_ind or 0}"
        selection += f" and {index_name}.chunk_id<={chunk_request.max_chunk_ind}"
    if not get_large_chunks:
        selection += f" and {index_name}.large_chunk_reference_ids == null"

    # enforcing tenant_id through a == condition
    if MULTI_TENANT:
        if filters.tenant_id:
            selection += f" and {index_name}.tenant_id=='{filters.tenant_id}'"
        else:
            raise ValueError("Tenant ID is required for multi-tenant")

    # Setting up the selection criteria in the query parameters
    params = {
        # NOTE: Document Selector Language doesn't allow `contains`, so we can't check
        # for the ACL in the selection. Instead, we have to check as a postfilter
        "selection": selection,
        "continuation": None,
        "wantedDocumentCount": 1_000,
        "fieldSet": field_set,
    }
    # Vespa can supply tensors in various different formats. This explicitly
    # asks to retrieve tensor data in "short-value" format.
    if short_tensor_format:
        params["format.tensors"] = "short-value"

    document_chunks: list[dict] = []
    while True:
        try:
            filtered_params = {k: v for k, v in params.items() if v is not None}
            with get_vespa_http_client() as http_client:
                response = http_client.get(url, params=filtered_params)
                response.raise_for_status()
        except httpx.HTTPError as e:
            error_base = "Failed to query Vespa"
            logger.error(
                f"{error_base}:\n"
                f"Request URL: {e.request.url}\n"
                f"Request Headers: {e.request.headers}\n"
                f"Request Payload: {params}\n"
                f"Exception: {str(e)}"
            )
            raise httpx.HTTPError(error_base) from e

        # Check if the response contains any documents
        response_data = response.json()

        if "documents" in response_data:
            for document in response_data["documents"]:
                if filters.access_control_list:
                    document_acl = document["fields"].get(ACCESS_CONTROL_LIST)
                    if not document_acl or not any(
                        user_acl_entry in document_acl
                        for user_acl_entry in filters.access_control_list
                    ):
                        continue

                if MULTI_TENANT:
                    if not filters.tenant_id:
                        raise ValueError("Tenant ID is required for multi-tenant")
                    document_tenant_id = document["fields"].get(TENANT_ID)
                    if document_tenant_id != filters.tenant_id:
                        logger.error(
                            f"Skipping document {document['document_id']} because "
                            f"it does not belong to tenant {filters.tenant_id}. "
                            "This should never happen."
                        )
                        continue

                document_chunks.append(document)

        # Check for continuation token to handle pagination
        if "continuation" in response_data and response_data["continuation"]:
            params["continuation"] = response_data["continuation"]
        else:
            break  # Exit loop if no continuation token

    return document_chunks


def get_all_chunks_paginated(
    index_name: str,
    tenant_state: TenantState,
    continuation_token_map: dict[int, str | None],
    page_size: int,
) -> tuple[list[dict], dict[int, str | None]]:
    """Gets all chunks in Vespa matching the filters, paginated.

    Uses the Visit API with slicing. Each continuation token map entry is for a
    different slice. The number of entries determines the number of slices.

    Args:
        index_name: The name of the Vespa index to visit.
        tenant_state: The tenant state to filter by.
        continuation_token_map: Map of slice ID to a token returned by Vespa
            representing a page offset. None to start from the beginning of the
            slice.
        page_size: Best-effort batch size for the visit. Defaults to 1,000.

    Returns:
        Tuple of (list of chunk dicts, next continuation token or None). The
            continuation token is None when the visit is complete.
    """

    def _get_all_chunks_paginated_for_slice(
        index_name: str,
        tenant_state: TenantState,
        slice_id: int,
        total_slices: int,
        continuation_token: str | None,
        page_size: int,
    ) -> tuple[list[dict], str | None]:
        if continuation_token == FINISHED_VISITING_SLICE_CONTINUATION_TOKEN:
            logger.debug(
                f"Slice {slice_id} has finished visiting. Returning empty list and {FINISHED_VISITING_SLICE_CONTINUATION_TOKEN}."
            )
            return [], FINISHED_VISITING_SLICE_CONTINUATION_TOKEN

        url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)

        selection: str = f"{index_name}.large_chunk_reference_ids == null"
        if MULTI_TENANT:
            selection += f" and {index_name}.tenant_id=='{tenant_state.tenant_id}'"

        field_set = f"{index_name}:" + ",".join(FIELDS_NEEDED_FOR_TRANSFORMATION)

        params: dict[str, str | int | None] = {
            "selection": selection,
            "fieldSet": field_set,
            "wantedDocumentCount": page_size,
            "format.tensors": "short-value",
            "slices": total_slices,
            "sliceId": slice_id,
            # When exceeded, Vespa should return gracefully with partial
            # results. Even if no hits are returned, Vespa should still return a
            # new continuation token representing a new spot in the linear
            # traversal.
            "timeout": VESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT,
        }
        if continuation_token is not None:
            params["continuation"] = continuation_token

        response: httpx.Response | None = None
        start_time = time.monotonic()
        try:
            with get_vespa_http_client(
                # When exceeded, an exception is raised in our code. No progress
                # is saved, and the task will retry this spot in the traversal
                # later.
                timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
            ) as http_client:
                response = http_client.get(url, params=params)
                response.raise_for_status()
        except httpx.HTTPError as e:
            error_base = (
                f"Failed to get chunks from Vespa slice {slice_id} with continuation token "
                f"{continuation_token} in {time.monotonic() - start_time:.3f} seconds."
            )
            logger.exception(
                f"Request URL: {e.request.url}\nRequest Headers: {e.request.headers}\nRequest Payload: {params}\n"
            )
            error_message = (
                response.json().get("message") if response else "No response"
            )
            logger.error("Error message from response: %s", error_message)
            raise httpx.HTTPError(error_base) from e

        response_data = response.json()

        # NOTE: If we see a falsey value for "continuation" in the response we
        # assume we are done and return
        # FINISHED_VISITING_SLICE_CONTINUATION_TOKEN instead.
        next_continuation_token = (
            response_data.get("continuation")
            or FINISHED_VISITING_SLICE_CONTINUATION_TOKEN
        )
        chunks = [chunk["fields"] for chunk in response_data.get("documents", [])]
        if next_continuation_token == FINISHED_VISITING_SLICE_CONTINUATION_TOKEN:
            logger.debug(
                f"Slice {slice_id} has finished visiting. Returning {len(chunks)} chunks and {next_continuation_token}."
            )
        return chunks, next_continuation_token

    total_slices = len(continuation_token_map)
    if total_slices < 1:
        raise ValueError("continuation_token_map must have at least one entry.")
    # We want to guarantee that these invocations are ordered by slice_id,
    # because we read in the same order below when parsing parallel_results.
    functions_with_args: list[tuple[Callable, tuple]] = [
        (
            _get_all_chunks_paginated_for_slice,
            (
                index_name,
                tenant_state,
                slice_id,
                total_slices,
                continuation_token,
                page_size,
            ),
        )
        for slice_id, continuation_token in sorted(continuation_token_map.items())
    ]

    parallel_results = run_functions_tuples_in_parallel(
        functions_with_args, allow_failures=True
    )
    if len(parallel_results) != total_slices:
        raise RuntimeError(
            f"Expected {total_slices} parallel results, but got {len(parallel_results)}."
        )

    chunks: list[dict] = []
    next_continuation_token_map: dict[int, str | None] = {
        key: value for key, value in continuation_token_map.items()
    }
    for i, parallel_result in enumerate(parallel_results):
        if i not in next_continuation_token_map:
            raise RuntimeError(f"Slice {i} is not in the continuation token map.")
        if parallel_result is None:
            logger.error(
                f"Failed to get chunks for slice {i} of {total_slices}. "
                "The continuation token for this slice will not be updated."
            )
            continue
        chunks.extend(parallel_result[0])
        next_continuation_token_map[i] = parallel_result[1]

    return chunks, next_continuation_token_map


# TODO(rkuo): candidate for removal if not being used
# @retry(tries=10, delay=1, backoff=2)
# def get_all_vespa_ids_for_document_id(
#     document_id: str,
#     index_name: str,
#     filters: IndexFilters | None = None,
#     get_large_chunks: bool = False,
# ) -> list[str]:
#     document_chunks = get_chunks_via_visit_api(
#         chunk_request=VespaChunkRequest(document_id=document_id),
#         index_name=index_name,
#         filters=filters or IndexFilters(access_control_list=None),
#         field_names=[DOCUMENT_ID],
#         get_large_chunks=get_large_chunks,
#     )
#     return [chunk["id"].split("::", 1)[-1] for chunk in document_chunks]


def parallel_visit_api_retrieval(
    index_name: str,
    chunk_requests: list[VespaChunkRequest],
    filters: IndexFilters,
    get_large_chunks: bool = False,
) -> list[InferenceChunkUncleaned]:
    functions_with_args: list[tuple[Callable, tuple]] = [
        (
            get_chunks_via_visit_api,
            (chunk_request, index_name, filters, get_large_chunks),
        )
        for chunk_request in chunk_requests
    ]

    parallel_results = run_functions_tuples_in_parallel(
        functions_with_args, allow_failures=True
    )

    # Any failures to retrieve would give a None, drop the Nones and empty lists
    vespa_chunk_sets = [res for res in parallel_results if res]

    flattened_vespa_chunks = []
    for chunk_set in vespa_chunk_sets:
        flattened_vespa_chunks.extend(chunk_set)

    inference_chunks = [
        _vespa_hit_to_inference_chunk(chunk, null_score=True)
        for chunk in flattened_vespa_chunks
    ]

    return inference_chunks


@retry(tries=3, delay=1, backoff=2)
def query_vespa(
    query_params: Mapping[str, str | int | float],
) -> list[InferenceChunkUncleaned]:
    if "query" in query_params and not cast(str, query_params["query"]).strip():
        raise ValueError("No/empty query received")

    params = dict(
        **query_params,
        **(
            {
                "presentation.timing": True,
            }
            if LOG_VESPA_TIMING_INFORMATION
            else {}
        ),
    )

    if VESPA_LANGUAGE_OVERRIDE:
        params["language"] = VESPA_LANGUAGE_OVERRIDE

    try:
        with get_vespa_http_client() as http_client:
            response = http_client.post(SEARCH_ENDPOINT, json=params)
            response.raise_for_status()
    except httpx.HTTPError as e:
        response_text = (
            e.response.text if isinstance(e, httpx.HTTPStatusError) else None
        )
        status_code = (
            e.response.status_code if isinstance(e, httpx.HTTPStatusError) else None
        )
        yql_value = params.get("yql", "")
        yql_length = len(str(yql_value))

        # Log each detail on its own line so log collectors capture them
        # as separate entries rather than truncating a single multiline msg
        logger.error(
            f"Failed to query Vespa | "
            f"status={status_code} | "
            f"yql_length={yql_length} | "
            f"exception={str(e)}"
        )
        if response_text:
            logger.error(f"Vespa error response: {response_text[:1000]}")
        logger.error(f"Vespa request URL: {e.request.url}")

        # Re-raise with diagnostics so callers see what actually went wrong
        raise httpx.HTTPError(
            f"Failed to query Vespa (status={status_code}, " f"yql_length={yql_length})"
        ) from e

    response_json: dict[str, Any] = response.json()

    if LOG_VESPA_TIMING_INFORMATION:
        logger.debug("Vespa timing info: %s", response_json.get("timing"))
    hits = response_json["root"].get("children", [])

    if not hits:
        logger.warning(
            f"No hits found for YQL Query: {query_params.get('yql', 'No YQL Query')}"
        )
        logger.debug(f"Vespa Response: {response.text}")

    for hit in hits:
        if hit["fields"].get(CONTENT) is None:
            identifier = hit["fields"].get("documentid") or hit["id"]
            logger.error(
                f"Vespa Index with Vespa ID {identifier} has no contents. "
                f"This is invalid because the vector is not meaningful and keywordsearch cannot "
                f"fetch this document"
            )

    filtered_hits = [hit for hit in hits if hit["fields"].get(CONTENT) is not None]

    inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits]

    try:
        num_retrieved_inference_chunks = len(inference_chunks)
        num_retrieved_document_ids = len(
            set([chunk.document_id for chunk in inference_chunks])
        )
        logger.info(
            f"Retrieved {num_retrieved_inference_chunks} inference chunks for {num_retrieved_document_ids} documents"
        )
    except Exception as e:
        # Debug logging only, should not fail the retrieval
        logger.error(f"Error logging retrieval statistics: {e}")

    # Good Debugging Spot
    return inference_chunks


def _get_chunks_via_batch_search(
    index_name: str,
    chunk_requests: list[VespaChunkRequest],
    filters: IndexFilters,
    get_large_chunks: bool = False,
) -> list[InferenceChunkUncleaned]:
    if not chunk_requests:
        return []

    filters_str = build_vespa_filters(filters=filters, include_hidden=True)

    yql = (
        YQL_BASE.format(index_name=index_name)
        + filters_str
        + build_vespa_id_based_retrieval_yql(chunk_requests[0])
    )
    chunk_requests.pop(0)

    for request in chunk_requests:
        yql += " or " + build_vespa_id_based_retrieval_yql(request)
    params: dict[str, str | int | float] = {
        "yql": yql,
        "hits": MAX_ID_SEARCH_QUERY_SIZE,
    }

    inference_chunks = query_vespa(params)
    if not get_large_chunks:
        inference_chunks = [
            chunk for chunk in inference_chunks if not chunk.large_chunk_reference_ids
        ]
    inference_chunks.sort(key=lambda chunk: chunk.chunk_id)
    return inference_chunks


def batch_search_api_retrieval(
    index_name: str,
    chunk_requests: list[VespaChunkRequest],
    filters: IndexFilters,
    get_large_chunks: bool = False,
) -> list[InferenceChunkUncleaned]:
    retrieved_chunks: list[InferenceChunkUncleaned] = []
    capped_requests: list[VespaChunkRequest] = []
    uncapped_requests: list[VespaChunkRequest] = []
    chunk_count = 0
    for req_ind, request in enumerate(chunk_requests, start=1):
        # All requests without a chunk range are uncapped
        # Uncapped requests are retrieved using the Visit API
        range = request.range
        if range is None:
            uncapped_requests.append(request)
            continue

        if (
            chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE
            or req_ind % MAX_OR_CONDITIONS == 0
        ):
            retrieved_chunks.extend(
                _get_chunks_via_batch_search(
                    index_name=index_name,
                    chunk_requests=capped_requests,
                    filters=filters,
                    get_large_chunks=get_large_chunks,
                )
            )
            capped_requests = []
            chunk_count = 0
        capped_requests.append(request)
        chunk_count += range

    if capped_requests:
        retrieved_chunks.extend(
            _get_chunks_via_batch_search(
                index_name=index_name,
                chunk_requests=capped_requests,
                filters=filters,
                get_large_chunks=get_large_chunks,
            )
        )

    if uncapped_requests:
        logger.debug(f"Retrieving {len(uncapped_requests)} uncapped requests")
        retrieved_chunks.extend(
            parallel_visit_api_retrieval(
                index_name, uncapped_requests, filters, get_large_chunks
            )
        )

    return retrieved_chunks


================================================
FILE: backend/onyx/document_index/vespa/deletion.py
================================================
import concurrent.futures
from uuid import UUID

import httpx
from retry import retry

from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.document_index.vespa_constants import NUM_THREADS
from onyx.utils.logger import setup_logger

logger = setup_logger()


CONTENT_SUMMARY = "content_summary"


@retry(tries=10, delay=1, backoff=2)
def _retryable_http_delete(http_client: httpx.Client, url: str) -> None:
    res = http_client.delete(url)
    res.raise_for_status()


def _delete_vespa_chunk(
    doc_chunk_id: UUID, index_name: str, http_client: httpx.Client
) -> None:
    try:
        _retryable_http_delete(
            http_client,
            f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}",
        )
    except httpx.HTTPStatusError as e:
        logger.error(f"Failed to delete chunk, details: {e.response.text}")
        raise


def delete_vespa_chunks(
    doc_chunk_ids: list[UUID],
    index_name: str,
    http_client: httpx.Client,
    executor: concurrent.futures.ThreadPoolExecutor | None = None,
) -> None:
    """Deletes a list of chunks from a Vespa index in parallel.

    Args:
        doc_chunk_ids: List of chunk IDs to delete.
        index_name: Name of the index to delete from.
        http_client: HTTP client to use for the request.
        executor: Executor to use for the request.
    """
    external_executor = True

    if not executor:
        external_executor = False
        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)

    try:
        chunk_deletion_future = {
            executor.submit(
                _delete_vespa_chunk, doc_chunk_id, index_name, http_client
            ): doc_chunk_id
            for doc_chunk_id in doc_chunk_ids
        }
        for future in concurrent.futures.as_completed(chunk_deletion_future):
            # Will raise exception if the deletion raised an exception
            future.result()

    finally:
        if not external_executor:
            executor.shutdown(wait=True)


================================================
FILE: backend/onyx/document_index/vespa/index.py
================================================
import concurrent.futures
import io
import logging
import os
import re
import time
import urllib
import zipfile
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import datetime
from datetime import timedelta
from typing import BinaryIO
from typing import cast
from typing import List

import httpx
import jinja2
import requests
from pydantic import BaseModel
from retry import retry

from onyx.configs.app_configs import BLURB_SIZE
from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
from onyx.configs.chat_configs import VESPA_SEARCHER_THREADS
from onyx.configs.constants import KV_REINDEX_KEY
from onyx.configs.constants import RETURN_SEPARATOR
from onyx.context.search.enums import QueryType
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.context.search.models import QueryExpansionType
from onyx.db.enums import EmbeddingPrecision
from onyx.document_index.document_index_utils import get_uuid_from_chunk_info
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import (
    DocumentInsertionRecord as OldDocumentInsertionRecord,
)
from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
from onyx.document_index.interfaces import IndexBatchParams
from onyx.document_index.interfaces import MinimalDocumentIndexingInfo
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.document_index.interfaces_new import DocumentSectionRequest
from onyx.document_index.interfaces_new import IndexingMetadata
from onyx.document_index.interfaces_new import MetadataUpdateRequest
from onyx.document_index.vespa.chunk_retrieval import query_vespa
from onyx.document_index.vespa.indexing_utils import BaseHTTPXClientContext
from onyx.document_index.vespa.indexing_utils import check_for_final_chunk_existence
from onyx.document_index.vespa.indexing_utils import GlobalHTTPXClientContext
from onyx.document_index.vespa.indexing_utils import TemporaryHTTPXClientContext
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
    build_vespa_filters,
)
from onyx.document_index.vespa.vespa_document_index import TenantState
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.document_index.vespa_constants import BATCH_SIZE
from onyx.document_index.vespa_constants import CONTENT_SUMMARY
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.document_index.vespa_constants import NUM_THREADS
from onyx.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT
from onyx.document_index.vespa_constants import VESPA_TIMEOUT
from onyx.document_index.vespa_constants import YQL_BASE
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.key_value_store.factory import get_shared_kv_store
from onyx.kg.utils.formatting_utils import split_relationship_id
from onyx.utils.batching import batch_generator
from onyx.utils.logger import setup_logger
from onyx.utils.timing import log_function_time
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
from shared_configs.model_server_models import Embedding

logger = setup_logger()

# Set the logging level to WARNING to ignore INFO and DEBUG logs
httpx_logger = logging.getLogger("httpx")
httpx_logger.setLevel(logging.WARNING)


@dataclass
class _VespaUpdateRequest:
    document_id: str
    url: str
    update_request: dict[str, dict]


class KGVespaChunkUpdateRequest(BaseModel):
    document_id: str
    chunk_id: int
    url: str
    update_request: dict[str, dict]


class KGUChunkUpdateRequest(BaseModel):
    """
    Update KG fields for a document
    """

    document_id: str
    chunk_id: int
    core_entity: str
    entities: set[str] | None = None
    relationships: set[str] | None = None
    terms: set[str] | None = None


class KGUDocumentUpdateRequest(BaseModel):
    """
    Update KG fields for a document
    """

    document_id: str
    entities: set[str]
    relationships: set[str]
    terms: set[str]


def generate_kg_update_request(
    kg_update_request: KGUChunkUpdateRequest,
) -> dict[str, dict]:
    kg_update_dict: dict[str, dict] = {}

    if kg_update_request.entities is not None:
        kg_update_dict["kg_entities"] = {"assign": list(kg_update_request.entities)}

    if kg_update_request.relationships is not None:
        kg_update_dict["kg_relationships"] = {"assign": []}
        for relationship in kg_update_request.relationships:
            source, rel_type, target = split_relationship_id(relationship)
            kg_update_dict["kg_relationships"]["assign"].append(
                {
                    "source": source,
                    "rel_type": rel_type,
                    "target": target,
                }
            )

    return kg_update_dict


def in_memory_zip_from_file_bytes(file_contents: dict[str, bytes]) -> BinaryIO:
    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
        for filename, content in file_contents.items():
            zipf.writestr(filename, content)
    zip_buffer.seek(0)
    return zip_buffer


def _create_document_xml_lines(doc_names: list[str | None] | list[str]) -> str:
    doc_lines = [
        f'<document type="{doc_name}" mode="index" />'
        for doc_name in doc_names
        if doc_name
    ]
    return "\n".join(doc_lines)


def add_ngrams_to_schema(schema_content: str) -> str:
    # Add the match blocks containing gram and gram-size to title and content fields
    schema_content = re.sub(
        r"(field title type string \{[^}]*indexing: summary \| index \| attribute)",
        r"\1\n            match {\n                gram\n                gram-size: 3\n            }",
        schema_content,
    )
    schema_content = re.sub(
        r"(field content type string \{[^}]*indexing: summary \| index)",
        r"\1\n            match {\n                gram\n                gram-size: 3\n            }",
        schema_content,
    )
    return schema_content


def cleanup_chunks(chunks: list[InferenceChunkUncleaned]) -> list[InferenceChunk]:
    def _remove_title(chunk: InferenceChunkUncleaned) -> str:
        if not chunk.title or not chunk.content:
            return chunk.content

        if chunk.content.startswith(chunk.title):
            return chunk.content[len(chunk.title) :].lstrip()

        # BLURB SIZE is by token instead of char but each token is at least 1 char
        # If this prefix matches the content, it's assumed the title was prepended
        if chunk.content.startswith(chunk.title[:BLURB_SIZE]):
            return (
                chunk.content.split(RETURN_SEPARATOR, 1)[-1]
                if RETURN_SEPARATOR in chunk.content
                else chunk.content
            )

        return chunk.content

    def _remove_metadata_suffix(chunk: InferenceChunkUncleaned) -> str:
        if not chunk.metadata_suffix:
            return chunk.content
        return chunk.content.removesuffix(chunk.metadata_suffix).rstrip(
            RETURN_SEPARATOR
        )

    def _remove_contextual_rag(chunk: InferenceChunkUncleaned) -> str:
        # remove document summary
        if chunk.content.startswith(chunk.doc_summary):
            chunk.content = chunk.content[len(chunk.doc_summary) :].lstrip()
        # remove chunk context
        if chunk.content.endswith(chunk.chunk_context):
            chunk.content = chunk.content[
                : len(chunk.content) - len(chunk.chunk_context)
            ].rstrip()
        return chunk.content

    for chunk in chunks:
        chunk.content = _remove_title(chunk)
        chunk.content = _remove_metadata_suffix(chunk)
        chunk.content = _remove_contextual_rag(chunk)

    return [chunk.to_inference_chunk() for chunk in chunks]


class VespaIndex(DocumentIndex):
    VESPA_SCHEMA_JINJA_FILENAME = "danswer_chunk.sd.jinja"

    def __init__(
        self,
        index_name: str,
        secondary_index_name: str | None,
        large_chunks_enabled: bool,
        secondary_large_chunks_enabled: bool | None,
        multitenant: bool = False,
        httpx_client: httpx.Client | None = None,
    ) -> None:
        self.index_name = index_name
        self.secondary_index_name = secondary_index_name

        self.large_chunks_enabled = large_chunks_enabled
        self.secondary_large_chunks_enabled = secondary_large_chunks_enabled

        self.multitenant = multitenant

        # Temporary until we refactor the entirety of this class.
        self.httpx_client = httpx_client

        self.httpx_client_context: BaseHTTPXClientContext
        if httpx_client:
            self.httpx_client_context = GlobalHTTPXClientContext(httpx_client)
        else:
            self.httpx_client_context = TemporaryHTTPXClientContext(
                get_vespa_http_client
            )

        self.index_to_large_chunks_enabled: dict[str, bool] = {}
        self.index_to_large_chunks_enabled[index_name] = large_chunks_enabled
        if secondary_index_name and secondary_large_chunks_enabled:
            self.index_to_large_chunks_enabled[secondary_index_name] = (
                secondary_large_chunks_enabled
            )

    def ensure_indices_exist(
        self,
        primary_embedding_dim: int,
        primary_embedding_precision: EmbeddingPrecision,
        secondary_index_embedding_dim: int | None,
        secondary_index_embedding_precision: EmbeddingPrecision | None,
    ) -> None:
        if MULTI_TENANT:
            logger.info(
                "Skipping Vespa index setup for multitenant (would wipe all indices)"
            )
            return None

        jinja_env = jinja2.Environment()

        deploy_url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate"
        logger.notice(f"Deploying Vespa application package to {deploy_url}")

        vespa_schema_path = os.path.join(
            os.getcwd(), "onyx", "document_index", "vespa", "app_config"
        )
        schema_jinja_file = os.path.join(
            vespa_schema_path, "schemas", VespaIndex.VESPA_SCHEMA_JINJA_FILENAME
        )
        services_jinja_file = os.path.join(vespa_schema_path, "services.xml.jinja")
        overrides_jinja_file = os.path.join(
            vespa_schema_path, "validation-overrides.xml.jinja"
        )

        with open(services_jinja_file, "r") as services_f:
            schema_names = [self.index_name, self.secondary_index_name]
            doc_lines = _create_document_xml_lines(schema_names)

            services_template_str = services_f.read()
            services_template = jinja_env.from_string(services_template_str)
            services = services_template.render(
                document_elements=doc_lines,
                num_search_threads=str(VESPA_SEARCHER_THREADS),
            )

        kv_store = get_shared_kv_store()

        needs_reindexing = False
        try:
            needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))
        except Exception:
            logger.debug("Could not load the reindexing flag. Using ngrams")

        # Vespa requires an override to erase data including the indices we're no longer using
        # It also has a 30 day cap from current so we set it to 7 dynamically
        with open(overrides_jinja_file, "r") as overrides_f:
            overrides_template_str = overrides_f.read()
            overrides_template = jinja_env.from_string(overrides_template_str)

            now = datetime.now()
            date_in_7_days = now + timedelta(days=7)
            formatted_date = date_in_7_days.strftime("%Y-%m-%d")
            overrides = overrides_template.render(
                until_date=formatted_date,
            )

        zip_dict = {
            "services.xml": services.encode("utf-8"),
            "validation-overrides.xml": overrides.encode("utf-8"),
        }

        with open(schema_jinja_file, "r") as schema_f:
            template_str = schema_f.read()

        template = jinja_env.from_string(template_str)
        schema = template.render(
            multi_tenant=MULTI_TENANT,
            schema_name=self.index_name,
            dim=primary_embedding_dim,
            embedding_precision=primary_embedding_precision.value,
        )

        schema = add_ngrams_to_schema(schema) if needs_reindexing else schema
        zip_dict[f"schemas/{schema_names[0]}.sd"] = schema.encode("utf-8")

        if self.secondary_index_name:
            if secondary_index_embedding_dim is None:
                raise ValueError("Secondary index embedding dimension is required")
            if secondary_index_embedding_precision is None:
                raise ValueError("Secondary index embedding precision is required")

            upcoming_schema = template.render(
                multi_tenant=MULTI_TENANT,
                schema_name=self.secondary_index_name,
                dim=secondary_index_embedding_dim,
                embedding_precision=secondary_index_embedding_precision.value,
            )

            zip_dict[f"schemas/{schema_names[1]}.sd"] = upcoming_schema.encode("utf-8")

        zip_file = in_memory_zip_from_file_bytes(zip_dict)

        headers = {"Content-Type": "application/zip"}
        response = requests.post(deploy_url, headers=headers, data=zip_file)
        if response.status_code != 200:
            logger.error(
                f"Failed to prepare Vespa Onyx Index. Response: {response.text}"
            )
            raise RuntimeError(
                f"Failed to prepare Vespa Onyx Index. Response: {response.text}"
            )

    @staticmethod
    def register_multitenant_indices(
        indices: list[str],
        embedding_dims: list[int],
        embedding_precisions: list[EmbeddingPrecision],
    ) -> None:
        if not MULTI_TENANT:
            raise ValueError("Multi-tenant is not enabled")

        deploy_url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate"
        logger.info(f"Deploying Vespa application package to {deploy_url}")

        vespa_schema_path = os.path.join(
            os.getcwd(), "onyx", "document_index", "vespa", "app_config"
        )
        schema_jinja_file = os.path.join(
            vespa_schema_path, "schemas", VespaIndex.VESPA_SCHEMA_JINJA_FILENAME
        )
        services_jinja_file = os.path.join(vespa_schema_path, "services.xml.jinja")
        overrides_jinja_file = os.path.join(
            vespa_schema_path, "validation-overrides.xml.jinja"
        )

        jinja_env = jinja2.Environment()

        # Generate schema names from index settings
        with open(services_jinja_file, "r") as services_f:
            schema_names = [index_name for index_name in indices]
            doc_lines = _create_document_xml_lines(schema_names)

            services_template_str = services_f.read()
            services_template = jinja_env.from_string(services_template_str)
            services = services_template.render(
                document_elements=doc_lines,
                num_search_threads=str(VESPA_SEARCHER_THREADS),
            )

        kv_store = get_shared_kv_store()

        needs_reindexing = False
        try:
            needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))
        except Exception:
            logger.debug("Could not load the reindexing flag. Using ngrams")

        # Vespa requires an override to erase data including the indices we're no longer using
        # It also has a 30 day cap from current so we set it to 7 dynamically
        with open(overrides_jinja_file, "r") as overrides_f:
            overrides_template_str = overrides_f.read()
            overrides_template = jinja_env.from_string(overrides_template_str)

            now = datetime.now()
            date_in_7_days = now + timedelta(days=7)
            formatted_date = date_in_7_days.strftime("%Y-%m-%d")
            overrides = overrides_template.render(
                until_date=formatted_date,
            )

        zip_dict = {
            "services.xml": services.encode("utf-8"),
            "validation-overrides.xml": overrides.encode("utf-8"),
        }

        with open(schema_jinja_file, "r") as schema_f:
            schema_template_str = schema_f.read()

        schema_template = jinja_env.from_string(schema_template_str)

        for i, index_name in enumerate(indices):
            embedding_dim = embedding_dims[i]
            embedding_precision = embedding_precisions[i]
            logger.info(
                f"Creating index: {index_name} with embedding dimension: {embedding_dim}"
            )

            schema = schema_template.render(
                multi_tenant=MULTI_TENANT,
                schema_name=index_name,
                dim=embedding_dim,
                embedding_precision=embedding_precision.value,
            )

            schema = add_ngrams_to_schema(schema) if needs_reindexing else schema
            zip_dict[f"schemas/{index_name}.sd"] = schema.encode("utf-8")

        zip_file = in_memory_zip_from_file_bytes(zip_dict)

        headers = {"Content-Type": "application/zip"}
        response = requests.post(deploy_url, headers=headers, data=zip_file)

        if response.status_code != 200:
            raise RuntimeError(
                f"Failed to prepare Vespa Onyx Indexes. Response: {response.text}"
            )

    def index(
        self,
        chunks: Iterable[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
        """
        NOTE: Do NOT consider the secondary index here. A separate indexing
        pipeline will be responsible for indexing to the secondary index. This
        design is not ideal and we should reconsider this when revamping index
        swapping.
        """
        if len(index_batch_params.doc_id_to_previous_chunk_cnt) != len(
            index_batch_params.doc_id_to_new_chunk_cnt
        ):
            raise ValueError("Bug: Length of doc ID to chunk maps does not match.")
        doc_id_to_chunk_cnt_diff = {
            doc_id: IndexingMetadata.ChunkCounts(
                old_chunk_cnt=index_batch_params.doc_id_to_previous_chunk_cnt[doc_id],
                new_chunk_cnt=index_batch_params.doc_id_to_new_chunk_cnt[doc_id],
            )
            for doc_id in index_batch_params.doc_id_to_previous_chunk_cnt.keys()
        }
        indexing_metadata = IndexingMetadata(
            doc_id_to_chunk_cnt_diff=doc_id_to_chunk_cnt_diff,
        )
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(),
            multitenant=MULTI_TENANT,
        )
        if tenant_state.multitenant != self.multitenant:
            raise ValueError(
                f"Bug: Multitenant mismatch. Expected {tenant_state.multitenant}, got {self.multitenant}."
            )
        if (
            tenant_state.multitenant
            and tenant_state.tenant_id != index_batch_params.tenant_id
        ):
            raise ValueError(
                f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {index_batch_params.tenant_id}."
            )
        vespa_document_index = VespaDocumentIndex(
            index_name=self.index_name,
            tenant_state=tenant_state,
            large_chunks_enabled=self.large_chunks_enabled,
            httpx_client=self.httpx_client,
        )
        # This conversion from list to set only to be converted again to a list
        # upstream is suboptimal and only temporary until we refactor the
        # entirety of this class.
        document_insertion_records = vespa_document_index.index(
            chunks, indexing_metadata
        )
        return set(
            [
                OldDocumentInsertionRecord(
                    document_id=doc_insertion_record.document_id,
                    already_existed=doc_insertion_record.already_existed,
                )
                for doc_insertion_record in document_insertion_records
            ]
        )

    @classmethod
    def _apply_updates_batched(
        cls,
        updates: list[_VespaUpdateRequest],
        httpx_client: httpx.Client,
        batch_size: int = BATCH_SIZE,
    ) -> None:
        """Runs a batch of updates in parallel via the ThreadPoolExecutor."""

        def _update_chunk(
            update: _VespaUpdateRequest, http_client: httpx.Client
        ) -> httpx.Response:
            logger.debug(
                f"Updating with request to {update.url} with body {update.update_request}"
            )
            return http_client.put(
                update.url,
                headers={"Content-Type": "application/json"},
                json=update.update_request,
            )

        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficient for
        # indexing / updates / deletes since we have to make a large volume of requests.

        with (
            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
            httpx_client as http_client,
        ):
            for update_batch in batch_generator(updates, batch_size):
                future_to_document_id = {
                    executor.submit(
                        _update_chunk,
                        update,
                        http_client,
                    ): update.document_id
                    for update in update_batch
                }
                for future in concurrent.futures.as_completed(future_to_document_id):
                    res = future.result()
                    try:
                        res.raise_for_status()
                    except requests.HTTPError as e:
                        failure_msg = f"Failed to update document: {future_to_document_id[future]}"
                        raise requests.HTTPError(failure_msg) from e

    @classmethod
    def _apply_kg_chunk_updates_batched(
        cls,
        updates: list[KGVespaChunkUpdateRequest],
        httpx_client: httpx.Client,
        batch_size: int = BATCH_SIZE,
    ) -> None:
        """Runs a batch of updates in parallel via the ThreadPoolExecutor."""

        @retry(tries=3, delay=1, backoff=2, jitter=(0.0, 1.0))
        def _kg_update_chunk(
            update: KGVespaChunkUpdateRequest, http_client: httpx.Client
        ) -> httpx.Response:
            return http_client.put(
                update.url,
                headers={"Content-Type": "application/json"},
                json=update.update_request,
            )

        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficient for
        # indexing / updates / deletes since we have to make a large volume of requests.

        with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
            for update_batch in batch_generator(updates, batch_size):
                future_to_document_id = {
                    executor.submit(
                        _kg_update_chunk,
                        update,
                        httpx_client,
                    ): update.document_id
                    for update in update_batch
                }
                for future in concurrent.futures.as_completed(future_to_document_id):
                    res = future.result()
                    try:
                        res.raise_for_status()
                    except requests.HTTPError as e:
                        failure_msg = f"Failed to update document {future_to_document_id[future]}\nResponse: {res.text}"
                        raise requests.HTTPError(failure_msg) from e

    def kg_chunk_updates(
        self, kg_update_requests: list[KGUChunkUpdateRequest], tenant_id: str
    ) -> None:

        processed_updates_requests: list[KGVespaChunkUpdateRequest] = []
        update_start = time.monotonic()

        # Build the _VespaUpdateRequest objects

        for kg_update_request in kg_update_requests:
            kg_update_dict: dict[str, dict] = {
                "fields": generate_kg_update_request(kg_update_request)
            }
            if not kg_update_dict["fields"]:
                logger.error("Update request received but nothing to update")
                continue

            doc_chunk_id = get_uuid_from_chunk_info(
                document_id=kg_update_request.document_id,
                chunk_id=kg_update_request.chunk_id,
                tenant_id=tenant_id,
                large_chunk_id=None,
            )

            processed_updates_requests.append(
                KGVespaChunkUpdateRequest(
                    document_id=kg_update_request.document_id,
                    chunk_id=kg_update_request.chunk_id,
                    url=f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}/{doc_chunk_id}",
                    update_request=kg_update_dict,
                )
            )

        with self.httpx_client_context as httpx_client:
            self._apply_kg_chunk_updates_batched(
                processed_updates_requests, httpx_client
            )
        logger.debug(
            "Updated %d vespa documents in %.2f seconds",
            len(processed_updates_requests),
            time.monotonic() - update_start,
        )

    def update_single(
        self,
        doc_id: str,
        *,
        chunk_count: int | None,
        tenant_id: str,
        fields: VespaDocumentFields | None,
        user_fields: VespaDocumentUserFields | None,
    ) -> None:
        """Note: if the document id does not exist, the update will be a no-op and the
        function will complete with no errors or exceptions.
        Handle other exceptions if you wish to implement retry behavior

        NOTE: Remember to handle the secondary index here. There is no separate
        pipeline for updating chunks in the secondary index. This design is not
        ideal and we should reconsider this when revamping index swapping.
        """
        if fields is None and user_fields is None:
            logger.warning(
                f"Tried to update document {doc_id} with no updated fields or user fields."
            )
            return

        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(),
            multitenant=MULTI_TENANT,
        )
        if tenant_state.multitenant != self.multitenant:
            raise ValueError(
                f"Bug: Multitenant mismatch. Expected {tenant_state.multitenant}, got {self.multitenant}."
            )
        if tenant_state.multitenant and tenant_state.tenant_id != tenant_id:
            raise ValueError(
                f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
            )

        project_ids: set[int] | None = None
        # NOTE: Empty user_projects is semantically different from None
        # user_projects.
        if user_fields is not None and user_fields.user_projects is not None:
            project_ids = set(user_fields.user_projects)
        persona_ids: set[int] | None = None
        # NOTE: Empty personas is semantically different from None personas.
        if user_fields is not None and user_fields.personas is not None:
            persona_ids = set(user_fields.personas)
        update_request = MetadataUpdateRequest(
            document_ids=[doc_id],
            doc_id_to_chunk_cnt={
                doc_id: chunk_count if chunk_count is not None else -1
            },  # NOTE: -1 represents an unknown chunk count.
            access=fields.access if fields is not None else None,
            document_sets=fields.document_sets if fields is not None else None,
            boost=fields.boost if fields is not None else None,
            hidden=fields.hidden if fields is not None else None,
            project_ids=project_ids,
            persona_ids=persona_ids,
        )

        indices = [self.index_name]
        if self.secondary_index_name:
            indices.append(self.secondary_index_name)

        for index_name in indices:
            vespa_document_index = VespaDocumentIndex(
                index_name=index_name,
                tenant_state=tenant_state,
                large_chunks_enabled=self.index_to_large_chunks_enabled.get(
                    index_name, False
                ),
                httpx_client=self.httpx_client,
            )
            vespa_document_index.update([update_request])

    def delete_single(
        self,
        doc_id: str,
        *,
        tenant_id: str,
        chunk_count: int | None,
    ) -> int:
        """
        NOTE: Remember to handle the secondary index here. There is no separate
        pipeline for deleting chunks in the secondary index. This design is not
        ideal and we should reconsider this when revamping index swapping.
        """
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(),
            multitenant=MULTI_TENANT,
        )
        if tenant_state.multitenant != self.multitenant:
            raise ValueError(
                f"Bug: Multitenant mismatch. Expected {tenant_state.multitenant}, got {self.multitenant}."
            )
        if tenant_state.multitenant and tenant_state.tenant_id != tenant_id:
            raise ValueError(
                f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
            )
        indices = [self.index_name]
        if self.secondary_index_name:
            indices.append(self.secondary_index_name)

        total_chunks_deleted = 0
        for index_name in indices:
            vespa_document_index = VespaDocumentIndex(
                index_name=index_name,
                tenant_state=tenant_state,
                large_chunks_enabled=self.index_to_large_chunks_enabled.get(
                    index_name, False
                ),
                httpx_client=self.httpx_client,
            )
            total_chunks_deleted += vespa_document_index.delete(
                document_id=doc_id, chunk_count=chunk_count
            )

        return total_chunks_deleted

    def id_based_retrieval(
        self,
        chunk_requests: list[VespaChunkRequest],
        filters: IndexFilters,
        batch_retrieval: bool = False,
        get_large_chunks: bool = False,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(),
            multitenant=MULTI_TENANT,
        )
        vespa_document_index = VespaDocumentIndex(
            index_name=self.index_name,
            tenant_state=tenant_state,
            large_chunks_enabled=self.large_chunks_enabled,
            httpx_client=self.httpx_client,
        )
        generic_chunk_requests: list[DocumentSectionRequest] = []
        for chunk_request in chunk_requests:
            generic_chunk_requests.append(
                DocumentSectionRequest(
                    document_id=chunk_request.document_id,
                    min_chunk_ind=chunk_request.min_chunk_ind,
                    max_chunk_ind=chunk_request.max_chunk_ind,
                )
            )
        return vespa_document_index.id_based_retrieval(
            chunk_requests=generic_chunk_requests,
            filters=filters,
            batch_retrieval=batch_retrieval,
        )

    @log_function_time(print_only=True, debug_only=True)
    def hybrid_retrieval(
        self,
        query: str,
        query_embedding: Embedding,
        final_keywords: list[str] | None,
        filters: IndexFilters,
        hybrid_alpha: float,  # noqa: ARG002
        time_decay_multiplier: float,  # noqa: ARG002
        num_to_retrieve: int,
        ranking_profile_type: QueryExpansionType = QueryExpansionType.SEMANTIC,
        title_content_ratio: float | None = TITLE_CONTENT_RATIO,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(),
            multitenant=MULTI_TENANT,
        )
        vespa_document_index = VespaDocumentIndex(
            index_name=self.index_name,
            tenant_state=tenant_state,
            large_chunks_enabled=self.large_chunks_enabled,
            httpx_client=self.httpx_client,
        )
        if not (
            ranking_profile_type == QueryExpansionType.KEYWORD
            or ranking_profile_type == QueryExpansionType.SEMANTIC
        ):
            raise ValueError(
                f"Bug: Received invalid ranking profile type: {ranking_profile_type}"
            )
        query_type = (
            QueryType.KEYWORD
            if ranking_profile_type == QueryExpansionType.KEYWORD
            else QueryType.SEMANTIC
        )
        return vespa_document_index.hybrid_retrieval(
            query,
            query_embedding,
            final_keywords,
            query_type,
            filters,
            num_to_retrieve,
        )

    def admin_retrieval(
        self,
        query: str,
        query_embedding: Embedding,  # noqa: ARG002
        filters: IndexFilters,
        num_to_retrieve: int = NUM_RETURNED_HITS,
    ) -> list[InferenceChunk]:
        vespa_where_clauses = build_vespa_filters(filters, include_hidden=True)
        yql = (
            YQL_BASE.format(index_name=self.index_name)
            + vespa_where_clauses
            + '({grammar: "weakAnd"}userInput(@query) '
            # `({defaultIndex: "content_summary"}userInput(@query))` section is
            # needed for highlighting while the N-gram highlighting is broken /
            # not working as desired
            + f'or ({{defaultIndex: "{CONTENT_SUMMARY}"}}userInput(@query)))'
        )

        params: dict[str, str | int] = {
            "yql": yql,
            "query": query,
            "hits": num_to_retrieve,
            "ranking.profile": "admin_search",
            "timeout": VESPA_TIMEOUT,
        }

        return cleanup_chunks(query_vespa(params))

    # Retrieves chunk information for a document:
    # - Determines the last indexed chunk
    # - Identifies if the document uses the old or new chunk ID system
    # This data is crucial for Vespa document updates without relying on the visit API.
    @classmethod
    def enrich_basic_chunk_info(
        cls,
        index_name: str,
        http_client: httpx.Client,
        document_id: str,
        previous_chunk_count: int | None = None,
        new_chunk_count: int = 0,
    ) -> EnrichedDocumentIndexingInfo:
        last_indexed_chunk = previous_chunk_count

        # If the document has no `chunk_count` in the database, we know that it
        # has the old chunk ID system and we must check for the final chunk index
        is_old_version = False
        if last_indexed_chunk is None:
            is_old_version = True
            minimal_doc_info = MinimalDocumentIndexingInfo(
                doc_id=document_id, chunk_start_index=new_chunk_count
            )
            last_indexed_chunk = check_for_final_chunk_existence(
                minimal_doc_info=minimal_doc_info,
                start_index=new_chunk_count,
                index_name=index_name,
                http_client=http_client,
            )

        enriched_doc_info = EnrichedDocumentIndexingInfo(
            doc_id=document_id,
            chunk_start_index=new_chunk_count,
            chunk_end_index=last_indexed_chunk,
            old_version=is_old_version,
        )
        return enriched_doc_info

    @classmethod
    def delete_entries_by_tenant_id(
        cls,
        *,
        tenant_id: str,
        index_name: str,
    ) -> int:
        """
        Deletes all entries in the specified index with the given tenant_id.

        Currently unused, but we anticipate this being useful. The entire flow does not
        use the httpx connection pool of an instance.

        Parameters:
            tenant_id (str): The tenant ID whose documents are to be deleted.
            index_name (str): The name of the index from which to delete documents.

        Returns:
            int: The number of documents deleted.
        """
        logger.info(
            f"Deleting entries with tenant_id: {tenant_id} from index: {index_name}"
        )

        # Step 1: Retrieve all document IDs with the given tenant_id
        document_ids = cls._get_all_document_ids_by_tenant_id(tenant_id, index_name)

        if not document_ids:
            logger.info(
                f"No documents found with tenant_id: {tenant_id} in index: {index_name}"
            )
            return 0

        # Step 2: Delete documents in batches
        delete_requests = [
            _VespaDeleteRequest(document_id=doc_id, index_name=index_name)
            for doc_id in document_ids
        ]

        cls._apply_deletes_batched(delete_requests)
        return len(document_ids)

    @classmethod
    def _get_all_document_ids_by_tenant_id(
        cls, tenant_id: str, index_name: str
    ) -> List[str]:
        """
        Retrieves all document IDs with the specified tenant_id, handling pagination.

        Internal helper function for delete_entries_by_tenant_id.

        Parameters:
            tenant_id (str): The tenant ID to search for.
            index_name (str): The name of the index to search in.

        Returns:
            List[str]: A list of document IDs matching the tenant_id.
        """
        offset = 0
        limit = 1000  # Vespa's maximum hits per query
        document_ids = []

        logger.debug(
            f"Starting document ID retrieval for tenant_id: {tenant_id} in index: {index_name}"
        )

        while True:
            # Construct the query to fetch document IDs
            query_params = {
                "yql": f'select id from sources * where tenant_id contains "{tenant_id}";',
                "offset": str(offset),
                "hits": str(limit),
                "timeout": "10s",
                "format": "json",
                "summary": "id",
            }

            url = f"{VESPA_APPLICATION_ENDPOINT}/search/"

            logger.debug(
                f"Querying for document IDs with tenant_id: {tenant_id}, offset: {offset}"
            )

            with get_vespa_http_client() as http_client:
                response = http_client.get(url, params=query_params, timeout=None)
                response.raise_for_status()

                search_result = response.json()
                hits = search_result.get("root", {}).get("children", [])

                if not hits:
                    break

                for hit in hits:
                    doc_id = hit.get("id")
                    if doc_id:
                        document_ids.append(doc_id)

                offset += limit  # Move to the next page

        logger.debug(
            f"Retrieved {len(document_ids)} document IDs for tenant_id: {tenant_id}"
        )
        return document_ids

    @classmethod
    def _apply_deletes_batched(
        cls,
        delete_requests: List["_VespaDeleteRequest"],
        batch_size: int = BATCH_SIZE,
    ) -> None:
        """
        Deletes documents in batches using multiple threads.

        Internal helper function for delete_entries_by_tenant_id.

        This is a class method and does not use the httpx pool of the instance.
        This is OK because we don't use this method often.

        Parameters:
            delete_requests (List[_VespaDeleteRequest]): The list of delete requests.
            batch_size (int): The number of documents to delete in each batch.
        """

        def _delete_document(
            delete_request: "_VespaDeleteRequest", http_client: httpx.Client
        ) -> None:
            logger.debug(f"Deleting document with ID {delete_request.document_id}")
            response = http_client.delete(
                delete_request.url,
                headers={"Content-Type": "application/json"},
                timeout=None,
            )
            response.raise_for_status()

        logger.debug(f"Starting batch deletion for {len(delete_requests)} documents")

        with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
            with get_vespa_http_client() as http_client:
                for batch_start in range(0, len(delete_requests), batch_size):
                    batch = delete_requests[batch_start : batch_start + batch_size]

                    future_to_document_id = {
                        executor.submit(
                            _delete_document,
                            delete_request,
                            http_client,
                        ): delete_request.document_id
                        for delete_request in batch
                    }

                    for future in concurrent.futures.as_completed(
                        future_to_document_id
                    ):
                        doc_id = future_to_document_id[future]
                        try:
                            future.result()
                            logger.debug(f"Successfully deleted document: {doc_id}")
                        except httpx.HTTPError as e:
                            logger.error(f"Failed to delete document {doc_id}: {e}")
                            # Optionally, implement retry logic or error handling here

        logger.info("Batch deletion completed")

    def random_retrieval(
        self,
        filters: IndexFilters,
        num_to_retrieve: int = 10,
    ) -> list[InferenceChunk]:
        """Retrieve random chunks matching the filters using Vespa's random ranking

        This method is currently used for random chunk retrieval in the context of
        assistant starter message creation (passed as sample context for usage by the assistant).
        """
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(),
            multitenant=MULTI_TENANT,
        )
        vespa_document_index = VespaDocumentIndex(
            index_name=self.index_name,
            tenant_state=tenant_state,
            large_chunks_enabled=self.large_chunks_enabled,
            httpx_client=self.httpx_client,
        )
        return vespa_document_index.random_retrieval(
            filters=filters,
            num_to_retrieve=num_to_retrieve,
        )


class _VespaDeleteRequest:
    def __init__(self, document_id: str, index_name: str) -> None:
        self.document_id = document_id
        # Encode the document ID to ensure it's safe for use in the URL
        encoded_doc_id = urllib.parse.quote_plus(self.document_id)
        self.url = f"{VESPA_APPLICATION_ENDPOINT}/document/v1/{index_name}/{index_name}/docid/{encoded_doc_id}"


================================================
FILE: backend/onyx/document_index/vespa/indexing_utils.py
================================================
import concurrent.futures
import json
import random
import time
import uuid
from abc import ABC
from abc import abstractmethod
from collections.abc import Callable
from datetime import datetime
from datetime import timezone
from http import HTTPStatus

import httpx
from retry import retry

from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_experts_stores_representations,
)
from onyx.document_index.chunk_content_enrichment import (
    generate_enriched_content_for_chunk_text,
)
from onyx.document_index.document_index_utils import get_uuid_from_chunk
from onyx.document_index.document_index_utils import get_uuid_from_chunk_info_old
from onyx.document_index.interfaces import MinimalDocumentIndexingInfo
from onyx.document_index.vespa.shared_utils.utils import (
    replace_invalid_doc_id_characters,
)
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import AGGREGATED_CHUNK_BOOST_FACTOR
from onyx.document_index.vespa_constants import BLURB
from onyx.document_index.vespa_constants import BOOST
from onyx.document_index.vespa_constants import CHUNK_CONTEXT
from onyx.document_index.vespa_constants import CHUNK_ID
from onyx.document_index.vespa_constants import CONTENT
from onyx.document_index.vespa_constants import CONTENT_SUMMARY
from onyx.document_index.vespa_constants import DOC_SUMMARY
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_ID
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import EMBEDDINGS
from onyx.document_index.vespa_constants import FULL_CHUNK_EMBEDDING_KEY
from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
from onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
from onyx.document_index.vespa_constants import METADATA
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import METADATA_SUFFIX
from onyx.document_index.vespa_constants import NUM_THREADS
from onyx.document_index.vespa_constants import PERSONAS
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
from onyx.document_index.vespa_constants import SECTION_CONTINUATION
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
from onyx.document_index.vespa_constants import SKIP_TITLE_EMBEDDING
from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import TITLE_EMBEDDING
from onyx.document_index.vespa_constants import USER_PROJECT
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.utils.logger import setup_logger
from onyx.utils.text_processing import remove_invalid_unicode_chars


logger = setup_logger()

# Retry configuration constants
INDEXING_MAX_RETRIES = 5
INDEXING_BASE_DELAY = 1.0
INDEXING_MAX_DELAY = 60.0


@retry(tries=3, delay=1, backoff=2)
def _does_doc_chunk_exist(
    doc_chunk_id: uuid.UUID, index_name: str, http_client: httpx.Client
) -> bool:
    doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
    doc_fetch_response = http_client.get(doc_url)
    if doc_fetch_response.status_code == 404:
        return False

    if doc_fetch_response.status_code != 200:
        logger.debug(f"Failed to check for document with URL {doc_url}")
        raise RuntimeError(
            f"Unexpected fetch document by ID value from Vespa: "
            f"error={doc_fetch_response.status_code} "
            f"index={index_name} "
            f"doc_chunk_id={doc_chunk_id}"
        )
    return True


def _vespa_get_updated_at_attribute(t: datetime | None) -> int | None:
    if not t:
        return None

    if t.tzinfo != timezone.utc:
        raise ValueError("Connectors must provide document update time in UTC")

    return int(t.timestamp())


def get_existing_documents_from_chunks(
    chunks: list[DocMetadataAwareIndexChunk],
    index_name: str,
    http_client: httpx.Client,
    executor: concurrent.futures.ThreadPoolExecutor | None = None,
) -> set[str]:
    external_executor = True

    if not executor:
        external_executor = False
        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)

    document_ids: set[str] = set()
    try:
        chunk_existence_future = {
            executor.submit(
                _does_doc_chunk_exist,
                get_uuid_from_chunk(chunk),
                index_name,
                http_client,
            ): chunk
            for chunk in chunks
        }
        for future in concurrent.futures.as_completed(chunk_existence_future):
            chunk = chunk_existence_future[future]
            chunk_already_existed = future.result()
            if chunk_already_existed:
                document_ids.add(chunk.source_document.id)

    finally:
        if not external_executor:
            executor.shutdown(wait=True)

    return document_ids


def _index_vespa_chunk(
    chunk: DocMetadataAwareIndexChunk,
    index_name: str,
    http_client: httpx.Client,
    multitenant: bool,
) -> None:
    json_header = {
        "Content-Type": "application/json",
    }
    document = chunk.source_document

    # No minichunk documents in vespa, minichunk vectors are stored in the chunk itself

    vespa_chunk_id = str(get_uuid_from_chunk(chunk))

    embeddings = chunk.embeddings

    embeddings_name_vector_map = {FULL_CHUNK_EMBEDDING_KEY: embeddings.full_embedding}

    if embeddings.mini_chunk_embeddings:
        for ind, m_c_embed in enumerate(embeddings.mini_chunk_embeddings):
            embeddings_name_vector_map[f"mini_chunk_{ind}"] = m_c_embed

    title = document.get_title_for_document_index()

    metadata_json = document.metadata
    cleaned_metadata_json: dict[str, str | list[str]] = {}
    for key, value in metadata_json.items():
        cleaned_key = remove_invalid_unicode_chars(key)
        if isinstance(value, list):
            cleaned_metadata_json[cleaned_key] = [
                remove_invalid_unicode_chars(item) for item in value
            ]
        else:
            cleaned_metadata_json[cleaned_key] = remove_invalid_unicode_chars(value)

    metadata_list = document.get_metadata_str_attributes()
    if metadata_list:
        metadata_list = [
            remove_invalid_unicode_chars(metadata) for metadata in metadata_list
        ]

    vespa_document_fields = {
        DOCUMENT_ID: document.id,
        CHUNK_ID: chunk.chunk_id,
        BLURB: remove_invalid_unicode_chars(chunk.blurb),
        TITLE: remove_invalid_unicode_chars(title) if title else None,
        SKIP_TITLE_EMBEDDING: not title,
        # For the BM25 index, the keyword suffix is used, the vector is already generated with the more
        # natural language representation of the metadata section
        CONTENT: remove_invalid_unicode_chars(
            generate_enriched_content_for_chunk_text(chunk)
        ),
        # This duplication of `content` is needed for keyword highlighting
        # Note that it's not exactly the same as the actual content
        # which contains the title prefix and metadata suffix
        CONTENT_SUMMARY: remove_invalid_unicode_chars(chunk.content),
        SOURCE_TYPE: str(document.source.value),
        SOURCE_LINKS: json.dumps(chunk.source_links),
        SEMANTIC_IDENTIFIER: remove_invalid_unicode_chars(document.semantic_identifier),
        SECTION_CONTINUATION: chunk.section_continuation,
        LARGE_CHUNK_REFERENCE_IDS: chunk.large_chunk_reference_ids,
        METADATA: json.dumps(cleaned_metadata_json),
        # Save as a list for efficient extraction as an Attribute
        METADATA_LIST: metadata_list,
        METADATA_SUFFIX: remove_invalid_unicode_chars(chunk.metadata_suffix_keyword),
        CHUNK_CONTEXT: chunk.chunk_context,
        DOC_SUMMARY: chunk.doc_summary,
        EMBEDDINGS: embeddings_name_vector_map,
        TITLE_EMBEDDING: chunk.title_embedding,
        DOC_UPDATED_AT: _vespa_get_updated_at_attribute(document.doc_updated_at),
        PRIMARY_OWNERS: get_experts_stores_representations(document.primary_owners),
        SECONDARY_OWNERS: get_experts_stores_representations(document.secondary_owners),
        # the only `set` vespa has is `weightedset`, so we have to give each
        # element an arbitrary weight
        # rkuo: acl, docset and boost metadata are also updated through the metadata sync queue
        # which only calls VespaIndex.update
        ACCESS_CONTROL_LIST: {acl_entry: 1 for acl_entry in chunk.access.to_acl()},
        DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets},
        # still called `image_file_name` in Vespa for backwards compatibility
        IMAGE_FILE_NAME: chunk.image_file_id,
        USER_PROJECT: chunk.user_project if chunk.user_project is not None else [],
        PERSONAS: chunk.personas if chunk.personas is not None else [],
        BOOST: chunk.boost,
        AGGREGATED_CHUNK_BOOST_FACTOR: chunk.aggregated_chunk_boost_factor,
    }

    if multitenant:
        if chunk.tenant_id:
            vespa_document_fields[TENANT_ID] = chunk.tenant_id
    vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}"
    logger.debug(f'Indexing to URL "{vespa_url}"')

    # Retry logic with exponential backoff for rate limiting
    for attempt in range(INDEXING_MAX_RETRIES):
        try:
            res = http_client.post(
                vespa_url, headers=json_header, json={"fields": vespa_document_fields}
            )
            res.raise_for_status()
            return  # Success, exit the function
        except httpx.HTTPStatusError as e:
            # Handle 429 rate limiting specifically
            if e.response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
                if attempt < INDEXING_MAX_RETRIES - 1:
                    # Calculate exponential backoff with jitter
                    delay = min(
                        INDEXING_BASE_DELAY * (2**attempt), INDEXING_MAX_DELAY
                    ) * random.uniform(0.5, 1.0)
                    logger.warning(
                        f"Rate limited while indexing document '{document.id}' "
                        f"(attempt {attempt + 1}/{INDEXING_MAX_RETRIES}). "
                        f"Vespa response: '{e.response.text}'. "
                        f"Backing off for {delay:.2f} seconds."
                    )
                    time.sleep(delay)
                    continue
                else:
                    raise RuntimeError(
                        f"Failed to index document '{document.id}' after {INDEXING_MAX_RETRIES} attempts due to rate limiting"
                    ) from e
            elif e.response.status_code == HTTPStatus.INSUFFICIENT_STORAGE:
                logger.error(
                    f"Failed to index document: '{document.id}'. Got response: '{e.response.text}'"
                )
                logger.error(
                    "NOTE: HTTP Status 507 Insufficient Storage usually means "
                    "you need to allocate more memory or disk space to the "
                    "Vespa/index container."
                )
                raise
            else:
                # For other HTTP errors, check if retryable
                if e.response.status_code in (
                    HTTPStatus.BAD_REQUEST,
                    HTTPStatus.UNAUTHORIZED,
                    HTTPStatus.FORBIDDEN,
                    HTTPStatus.NOT_FOUND,
                ):
                    # Non-retryable errors - fail immediately
                    logger.error(
                        f"Non-retryable HTTP {e.response.status_code} error for document '{document.id}'"
                    )
                    raise
                # Retry other errors with shorter backoff
                if attempt < INDEXING_MAX_RETRIES - 1:
                    delay = INDEXING_BASE_DELAY * (1.5**attempt)
                    logger.warning(
                        f"HTTP error {e.response.status_code} while indexing document '{document.id}' "
                        f"(attempt {attempt + 1}/{INDEXING_MAX_RETRIES}). Retrying in {delay:.2f} seconds."
                    )
                    time.sleep(delay)
                    continue
                else:
                    logger.exception(
                        f"Failed to index document: '{document.id}'. Got response: '{e.response.text}'"
                    )
                    raise
        except Exception as e:
            # For non-HTTP errors, use simple retry logic
            if attempt < INDEXING_MAX_RETRIES - 1:
                delay = INDEXING_BASE_DELAY * (1.5**attempt)
                logger.warning(
                    f"Error while indexing document '{document.id}' "
                    f"(attempt {attempt + 1}/{INDEXING_MAX_RETRIES}): {str(e)}. "
                    f"Retrying in {delay:.2f} seconds."
                )
                time.sleep(delay)
                continue
            else:
                logger.exception(f"Failed to index document: '{document.id}'")
                raise


def batch_index_vespa_chunks(
    chunks: list[DocMetadataAwareIndexChunk],
    index_name: str,
    http_client: httpx.Client,
    multitenant: bool,
    executor: concurrent.futures.ThreadPoolExecutor | None = None,
) -> None:
    """Indexes a list of chunks in a Vespa index in parallel.

    Args:
        chunks: List of chunks to index.
        index_name: Name of the index to index into.
        http_client: HTTP client to use for the request.
        multitenant: Whether the index is multitenant.
        executor: Executor to use for the request.
    """
    external_executor = True

    if not executor:
        external_executor = False
        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)

    try:
        chunk_index_future = {
            executor.submit(
                _index_vespa_chunk, chunk, index_name, http_client, multitenant
            ): chunk
            for chunk in chunks
        }
        for future in concurrent.futures.as_completed(chunk_index_future):
            # Will raise exception if any indexing raised an exception
            future.result()

    finally:
        if not external_executor:
            executor.shutdown(wait=True)


def clean_chunk_id_copy(
    chunk: DocMetadataAwareIndexChunk,
) -> DocMetadataAwareIndexChunk:
    clean_chunk = chunk.model_copy(
        update={
            "source_document": chunk.source_document.model_copy(
                update={
                    "id": replace_invalid_doc_id_characters(chunk.source_document.id)
                }
            )
        }
    )
    return clean_chunk


def check_for_final_chunk_existence(
    minimal_doc_info: MinimalDocumentIndexingInfo,
    start_index: int,
    index_name: str,
    http_client: httpx.Client,
) -> int:
    index = start_index
    while True:
        doc_chunk_id = get_uuid_from_chunk_info_old(
            document_id=minimal_doc_info.doc_id,
            chunk_id=index,
            large_chunk_reference_ids=[],
        )
        if not _does_doc_chunk_exist(doc_chunk_id, index_name, http_client):
            return index
        index += 1


class BaseHTTPXClientContext(ABC):
    """Abstract base class for an HTTPX client context manager."""

    @abstractmethod
    def __enter__(self) -> httpx.Client:
        pass

    @abstractmethod
    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore
        pass


class GlobalHTTPXClientContext(BaseHTTPXClientContext):
    """Context manager for a global HTTPX client that does not close it."""

    def __init__(self, client: httpx.Client):
        self._client = client

    def __enter__(self) -> httpx.Client:
        return self._client  # Reuse the global client

    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore
        pass  # Do nothing; don't close the global client


class TemporaryHTTPXClientContext(BaseHTTPXClientContext):
    """Context manager for a temporary HTTPX client that closes it after use."""

    def __init__(self, client_factory: Callable[[], httpx.Client]):
        self._client_factory = client_factory
        self._client: httpx.Client | None = None  # Client will be created in __enter__

    def __enter__(self) -> httpx.Client:
        self._client = self._client_factory()  # Create a new client
        return self._client

    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore
        if self._client:
            self._client.close()


================================================
FILE: backend/onyx/document_index/vespa/kg_interactions.py
================================================
from onyx.db.document import get_document_kg_entities_and_relationships
from onyx.db.document import get_num_chunks_for_document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.document_index.vespa.index import KGUChunkUpdateRequest
from onyx.document_index.vespa.index import VespaIndex
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


def update_kg_chunks_vespa_info(
    kg_update_requests: list[KGUChunkUpdateRequest],
    index_name: str,
    tenant_id: str,
) -> None:
    """ """
    # Use the existing visit API infrastructure
    vespa_index = VespaIndex(
        index_name=index_name,
        secondary_index_name=None,
        large_chunks_enabled=False,
        secondary_large_chunks_enabled=False,
        multitenant=MULTI_TENANT,
        httpx_client=None,
    )

    vespa_index.kg_chunk_updates(
        kg_update_requests=kg_update_requests, tenant_id=tenant_id
    )


def get_kg_vespa_info_update_requests_for_document(
    document_id: str,
) -> list[KGUChunkUpdateRequest]:
    """Get the kg_info update requests for a document."""
    # get all entities and relationships tied to the document
    with get_session_with_current_tenant() as db_session:
        entities, relationships = get_document_kg_entities_and_relationships(
            db_session, document_id
        )

    # create the kg vespa info
    kg_entities = {entity.id_name for entity in entities}
    kg_relationships = {relationship.id_name for relationship in relationships}

    # get chunks in the document
    with get_session_with_current_tenant() as db_session:
        num_chunks = get_num_chunks_for_document(db_session, document_id)

    # get vespa update requests
    return [
        KGUChunkUpdateRequest(
            document_id=document_id,
            chunk_id=chunk_id,
            core_entity="unused",
            entities=kg_entities,
            relationships=kg_relationships or None,
        )
        for chunk_id in range(num_chunks)
    ]


================================================
FILE: backend/onyx/document_index/vespa/shared_utils/utils.py
================================================
import time
from typing import cast

import httpx

from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.app_configs import VESPA_REQUEST_TIMEOUT
from onyx.document_index.vespa_constants import VESPA_APP_CONTAINER_URL
from onyx.utils.logger import setup_logger

logger = setup_logger()

# NOTE: This does not seem to be used in reality despite the Vespa Docs pointing to this code
# See here for reference: https://docs.vespa.ai/en/documents.html
# https://github.com/vespa-engine/vespa/blob/master/vespajlib/src/main/java/com/yahoo/text/Text.java

# Define allowed ASCII characters
ALLOWED_ASCII_CHARS: list[bool] = [False] * 0x80
ALLOWED_ASCII_CHARS[0x9] = True  # tab
ALLOWED_ASCII_CHARS[0xA] = True  # newline
ALLOWED_ASCII_CHARS[0xD] = True  # carriage return
for i in range(0x20, 0x7F):
    ALLOWED_ASCII_CHARS[i] = True  # printable ASCII chars
ALLOWED_ASCII_CHARS[0x7F] = True  # del - discouraged, but allowed


def is_text_character(codepoint: int) -> bool:
    """Returns whether the given codepoint is a valid text character."""
    if codepoint < 0x80:
        return ALLOWED_ASCII_CHARS[codepoint]
    if codepoint < 0xD800:
        return True
    if codepoint <= 0xDFFF:
        return False
    if codepoint < 0xFDD0:
        return True
    if codepoint <= 0xFDEF:
        return False
    if codepoint >= 0x10FFFE:
        return False
    return (codepoint & 0xFFFF) < 0xFFFE


def replace_invalid_doc_id_characters(text: str) -> str:
    """Replaces invalid document ID characters in text.
    NOTE: this must be called at the start of every vespa-related operation or else we
    risk discrepancies -> silent failures on deletion/update/insertion."""
    # There may be a more complete set of replacements that need to be made but Vespa docs are unclear
    # and users only seem to be running into this error with single quotes
    return text.replace("'", "_")


def get_vespa_http_client(
    no_timeout: bool = False, http2: bool = True, timeout: int | None = None
) -> httpx.Client:
    """
    Configures and returns an HTTP client for communicating with Vespa,
    including authentication if needed.
    """
    return httpx.Client(
        cert=(
            cast(tuple[str, str], (VESPA_CLOUD_CERT_PATH, VESPA_CLOUD_KEY_PATH))
            if MANAGED_VESPA
            else None
        ),
        verify=False if not MANAGED_VESPA else True,
        timeout=None if no_timeout else (timeout or VESPA_REQUEST_TIMEOUT),
        http2=http2,
    )


def wait_for_vespa_with_timeout(wait_interval: int = 5, wait_limit: int = 60) -> bool:
    """Waits for Vespa to become ready subject to a timeout.
    Returns True if Vespa is ready, False otherwise."""

    time_start = time.monotonic()
    logger.info("Vespa: Readiness probe starting.")
    while True:
        url = f"{VESPA_APP_CONTAINER_URL}/state/v1/health"
        try:
            client = get_vespa_http_client()
            response = client.get(url)
            response.raise_for_status()

            response_dict = response.json()
            if response_dict["status"]["code"] == "up":
                logger.info("Vespa: Readiness probe succeeded. Continuing...")
                return True
        except Exception as e:
            logger.warning(
                f"Vespa: Readiness probe failed trying to connect to {url}. Exception: {e}"
            )

        time_elapsed = time.monotonic() - time_start
        if time_elapsed > wait_limit:
            logger.info(
                f"Vespa: Readiness probe did not succeed within the timeout ({wait_limit} seconds)."
            )
            return False

        logger.info(
            f"Vespa: Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit:.1f}"
        )

        time.sleep(wait_interval)


================================================
FILE: backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py
================================================
from datetime import datetime
from datetime import timedelta
from datetime import timezone

from onyx.configs.constants import INDEX_SEPARATOR
from onyx.context.search.models import IndexFilters
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import CHUNK_ID
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_ID
from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import PERSONAS
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import USER_PROJECT
from onyx.kg.utils.formatting_utils import split_relationship_id
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


def build_tenant_id_filter(tenant_id: str) -> str:
    return f'({TENANT_ID} contains "{tenant_id}")'


def build_vespa_filters(
    filters: IndexFilters,
    *,
    include_hidden: bool = False,
    remove_trailing_and: bool = False,  # Set to True when using as a complete Vespa query
) -> str:
    def _build_or_filters(key: str, vals: list[str] | None) -> str:
        """For string-based 'contains' filters, e.g. WSET fields or array<string> fields.
        Returns a bare clause like '(key contains "v1" or key contains "v2")' or ""."""
        if not key or not vals:
            return ""
        eq_elems = [f'{key} contains "{val}"' for val in vals if val]
        if not eq_elems:
            return ""
        return f"({' or '.join(eq_elems)})"

    def _build_weighted_set_filter(key: str, vals: list[str] | None) -> str:
        """Build a Vespa weightedSet filter for large value lists.

        Uses Vespa's native weightedSet() operator instead of OR-chained
        'contains' clauses.  This is critical for fields like
        access_control_list where a single user may have tens of thousands
        of ACL entries — OR clauses at that scale cause Vespa to reject
        the query with HTTP 400."""
        if not key or not vals:
            return ""
        filtered = [val for val in vals if val]
        if not filtered:
            return ""
        items = ", ".join(f'"{val}":1' for val in filtered)
        return f"weightedSet({key}, {{{items}}})"

    def _build_int_or_filters(key: str, vals: list[int] | None) -> str:
        """For an integer field filter.
        Returns a bare clause or ""."""
        if vals is None or not vals:
            return ""
        eq_elems = [f"{key} = {val}" for val in vals]
        return f"({' or '.join(eq_elems)})"

    def _build_kg_filter(
        kg_entities: list[str] | None,
        kg_relationships: list[str] | None,
        kg_terms: list[str] | None,
    ) -> str:
        if not kg_entities and not kg_relationships and not kg_terms:
            return ""

        combined_filter_parts = []

        def _build_kge(entity: str) -> str:
            GENERAL = "::*"
            if entity.endswith(GENERAL):
                return f'({{prefix: true}}"{entity.split(GENERAL, 1)[0]}")'
            else:
                return f'"{entity}"'

        if kg_entities:
            filter_parts = []
            for kg_entity in kg_entities:
                filter_parts.append(f"(kg_entities contains {_build_kge(kg_entity)})")
            combined_filter_parts.append(f"({' or '.join(filter_parts)})")

        # TODO: handle complex nested relationship logic (e.g., A participated, and B or C participated)
        if kg_relationships:
            filter_parts = []
            for kg_relationship in kg_relationships:
                source, rel_type, target = split_relationship_id(kg_relationship)
                filter_parts.append(
                    "(kg_relationships contains sameElement("
                    f"source contains {_build_kge(source)},"
                    f'rel_type contains "{rel_type}",'
                    f"target contains {_build_kge(target)}))"
                )
            combined_filter_parts.append(f"{' and '.join(filter_parts)}")

        # TODO: remove kg terms entirely from prompts and codebase

        return f"({' and '.join(combined_filter_parts)})"

    def _build_kg_source_filters(
        kg_sources: list[str] | None,
    ) -> str:
        if not kg_sources:
            return ""

        source_phrases = [f'{DOCUMENT_ID} contains "{source}"' for source in kg_sources]
        return f"({' or '.join(source_phrases)})"

    def _build_kg_chunk_id_zero_only_filter(
        kg_chunk_id_zero_only: bool,
    ) -> str:
        if not kg_chunk_id_zero_only:
            return ""
        return "(chunk_id = 0)"

    def _build_time_filter(
        cutoff: datetime | None,
        untimed_doc_cutoff: timedelta = timedelta(days=92),
    ) -> str:
        if not cutoff:
            return ""
        include_untimed = datetime.now(timezone.utc) - untimed_doc_cutoff > cutoff
        cutoff_secs = int(cutoff.timestamp())

        if include_untimed:
            return f"!({DOC_UPDATED_AT} < {cutoff_secs})"
        return f"({DOC_UPDATED_AT} >= {cutoff_secs})"

    def _build_user_project_filter(
        project_id: int | None,
    ) -> str:
        if project_id is None:
            return ""
        try:
            pid = int(project_id)
        except Exception:
            return ""
        return f'({USER_PROJECT} contains "{pid}")'

    def _build_persona_filter(
        persona_id: int | None,
    ) -> str:
        if persona_id is None:
            return ""
        try:
            pid = int(persona_id)
        except Exception:
            logger.warning(f"Invalid persona ID: {persona_id}")
            return ""
        return f'({PERSONAS} contains "{pid}")'

    def _append(parts: list[str], clause: str) -> None:
        if clause:
            parts.append(clause)

    # Collect all top-level filter clauses, then join with " and " at the end.
    filter_parts: list[str] = []

    if not include_hidden:
        filter_parts.append(f"!({HIDDEN}=true)")

    # TODO: add error condition if MULTI_TENANT and no tenant_id filter is set
    if filters.tenant_id and MULTI_TENANT:
        filter_parts.append(build_tenant_id_filter(filters.tenant_id))

    # ACL filters — use weightedSet for efficient matching against the
    # access_control_list weightedset<string> field.  OR-chaining thousands
    # of 'contains' clauses causes Vespa to reject the query (HTTP 400)
    # for users with large numbers of external permission groups.
    if filters.access_control_list is not None:
        _append(
            filter_parts,
            _build_weighted_set_filter(
                ACCESS_CONTROL_LIST, filters.access_control_list
            ),
        )

    # Source type filters
    source_strs = (
        [s.value for s in filters.source_type] if filters.source_type else None
    )
    _append(filter_parts, _build_or_filters(SOURCE_TYPE, source_strs))

    # Tag filters
    tag_attributes = None
    if filters.tags:
        tag_attributes = [
            f"{tag.tag_key}{INDEX_SEPARATOR}{tag.tag_value}" for tag in filters.tags
        ]
    _append(filter_parts, _build_or_filters(METADATA_LIST, tag_attributes))

    # Knowledge scope: explicit knowledge attachments restrict what an
    # assistant can see.  When none are set, the assistant can see
    # everything.
    #
    # persona_id_filter is a primary trigger — a persona with user files IS
    # explicit knowledge, so it can start a knowledge scope on its own.
    #
    # project_id_filter is additive — it widens the scope to also cover
    # overflowing project files but never restricts on its own (a chat
    # inside a project should still search team knowledge).
    knowledge_scope_parts: list[str] = []

    _append(
        knowledge_scope_parts, _build_or_filters(DOCUMENT_SETS, filters.document_set)
    )
    _append(knowledge_scope_parts, _build_persona_filter(filters.persona_id_filter))

    # project_id_filter only widens an existing scope.
    if knowledge_scope_parts:
        _append(
            knowledge_scope_parts,
            _build_user_project_filter(filters.project_id_filter),
        )

    if len(knowledge_scope_parts) > 1:
        filter_parts.append("(" + " or ".join(knowledge_scope_parts) + ")")
    elif len(knowledge_scope_parts) == 1:
        filter_parts.append(knowledge_scope_parts[0])

    # Time filter
    _append(filter_parts, _build_time_filter(filters.time_cutoff))

    # # Knowledge Graph Filters
    # _append(filter_parts, _build_kg_filter(
    #     kg_entities=filters.kg_entities,
    #     kg_relationships=filters.kg_relationships,
    #     kg_terms=filters.kg_terms,
    # ))

    # _append(filter_parts, _build_kg_source_filters(filters.kg_sources))

    # _append(filter_parts, _build_kg_chunk_id_zero_only_filter(
    #     filters.kg_chunk_id_zero_only or False
    # ))

    filter_str = " and ".join(filter_parts)

    if filter_str and not remove_trailing_and:
        filter_str += " and "

    return filter_str


def build_vespa_id_based_retrieval_yql(
    chunk_request: VespaChunkRequest,
) -> str:
    id_based_retrieval_yql_section = (
        f'({DOCUMENT_ID} contains "{chunk_request.document_id}"'
    )

    if chunk_request.is_capped:
        id_based_retrieval_yql_section += (
            f" and {CHUNK_ID} >= {chunk_request.min_chunk_ind or 0}"
        )
        id_based_retrieval_yql_section += (
            f" and {CHUNK_ID} <= {chunk_request.max_chunk_ind}"
        )

    id_based_retrieval_yql_section += ")"
    return id_based_retrieval_yql_section


================================================
FILE: backend/onyx/document_index/vespa/vespa_document_index.py
================================================
import concurrent.futures
import logging
import random
from collections.abc import Generator
from collections.abc import Iterable
from typing import Any
from uuid import UUID

import httpx
from pydantic import BaseModel
from retry import retry

from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
from onyx.configs.app_configs import RECENCY_BIAS_MULTIPLIER
from onyx.configs.app_configs import RERANK_COUNT
from onyx.configs.chat_configs import DOC_TIME_DECAY
from onyx.configs.chat_configs import HYBRID_ALPHA
from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
from onyx.context.search.enums import QueryType
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.db.enums import EmbeddingPrecision
from onyx.document_index.chunk_content_enrichment import cleanup_content_for_chunks
from onyx.document_index.document_index_utils import get_document_chunk_ids
from onyx.document_index.document_index_utils import get_uuid_from_chunk_info
from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
from onyx.document_index.interfaces import MinimalDocumentIndexingInfo
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.interfaces_new import DocumentIndex
from onyx.document_index.interfaces_new import DocumentInsertionRecord
from onyx.document_index.interfaces_new import DocumentSectionRequest
from onyx.document_index.interfaces_new import IndexingMetadata
from onyx.document_index.interfaces_new import MetadataUpdateRequest
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.vespa.chunk_retrieval import batch_search_api_retrieval
from onyx.document_index.vespa.chunk_retrieval import get_all_chunks_paginated
from onyx.document_index.vespa.chunk_retrieval import get_chunks_via_visit_api
from onyx.document_index.vespa.chunk_retrieval import (
    parallel_visit_api_retrieval,
)
from onyx.document_index.vespa.chunk_retrieval import query_vespa
from onyx.document_index.vespa.deletion import delete_vespa_chunks
from onyx.document_index.vespa.indexing_utils import BaseHTTPXClientContext
from onyx.document_index.vespa.indexing_utils import batch_index_vespa_chunks
from onyx.document_index.vespa.indexing_utils import check_for_final_chunk_existence
from onyx.document_index.vespa.indexing_utils import clean_chunk_id_copy
from onyx.document_index.vespa.indexing_utils import GlobalHTTPXClientContext
from onyx.document_index.vespa.indexing_utils import TemporaryHTTPXClientContext
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.shared_utils.utils import (
    replace_invalid_doc_id_characters,
)
from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
    build_vespa_filters,
)
from onyx.document_index.vespa_constants import BATCH_SIZE
from onyx.document_index.vespa_constants import CHUNK_ID
from onyx.document_index.vespa_constants import CONTENT_SUMMARY
from onyx.document_index.vespa_constants import DOCUMENT_ID
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.document_index.vespa_constants import NUM_THREADS
from onyx.document_index.vespa_constants import SEARCH_ENDPOINT
from onyx.document_index.vespa_constants import VESPA_TIMEOUT
from onyx.document_index.vespa_constants import YQL_BASE
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.tools.tool_implementations.search.constants import KEYWORD_QUERY_HYBRID_ALPHA
from onyx.utils.batching import batch_generator
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.model_server_models import Embedding


logger = setup_logger(__name__)
# Set the logging level to WARNING to ignore INFO and DEBUG logs from httpx. By
# default it emits INFO-level logs for every request.
httpx_logger = logging.getLogger("httpx")
httpx_logger.setLevel(logging.WARNING)


def _enrich_basic_chunk_info(
    index_name: str,
    http_client: httpx.Client,
    document_id: str,
    previous_chunk_count: int | None,
    new_chunk_count: int,
) -> EnrichedDocumentIndexingInfo:
    """Determines which chunks need to be deleted during document reindexing.

    When a document is reindexed, it may have fewer chunks than before. This
    function identifies the range of old chunks that need to be deleted by
    comparing the new chunk count with the previous chunk count.

    Example:
        If a document previously had 10 chunks (0-9) and now has 7 chunks (0-6),
        this function identifies that chunks 7-9 need to be deleted.

    Args:
        index_name: The Vespa index/schema name.
        http_client: HTTP client for making requests to Vespa.
        document_id: The Vespa-sanitized ID of the document being reindexed.
        previous_chunk_count: The total number of chunks the document had before
            reindexing. None for documents using the legacy chunk ID system.
        new_chunk_count: The total number of chunks the document has after
            reindexing. This becomes the starting index for deletion since
            chunks are 0-indexed.

    Returns:
        EnrichedDocumentIndexingInfo with chunk_start_index set to
        new_chunk_count (where deletion begins) and chunk_end_index set to
        previous_chunk_count (where deletion ends).
    """
    # Technically last indexed chunk index +1.
    last_indexed_chunk = previous_chunk_count
    # If the document has no `chunk_count` in the database, we know that it
    # has the old chunk ID system and we must check for the final chunk index.
    is_old_version = False
    if last_indexed_chunk is None:
        is_old_version = True
        minimal_doc_info = MinimalDocumentIndexingInfo(
            doc_id=document_id, chunk_start_index=new_chunk_count
        )
        last_indexed_chunk = check_for_final_chunk_existence(
            minimal_doc_info=minimal_doc_info,
            start_index=new_chunk_count,
            index_name=index_name,
            http_client=http_client,
        )

    assert (
        last_indexed_chunk is not None and last_indexed_chunk >= 0
    ), f"Bug: Last indexed chunk index is None or less than 0 for document: {document_id}."

    enriched_doc_info = EnrichedDocumentIndexingInfo(
        doc_id=document_id,
        chunk_start_index=new_chunk_count,
        chunk_end_index=last_indexed_chunk,
        old_version=is_old_version,
    )
    return enriched_doc_info


@retry(
    tries=3,
    delay=1,
    backoff=2,
    exceptions=httpx.HTTPError,
)
def _update_single_chunk(
    doc_chunk_id: UUID,
    index_name: str,
    doc_id: str,
    http_client: httpx.Client,
    update_request: MetadataUpdateRequest,
) -> None:
    """Updates a single document chunk in Vespa.

    TODO(andrei): Couldn't this be batched?

    Args:
        doc_chunk_id: The ID of the chunk to update.
        index_name: The index the chunk belongs to.
        doc_id: The ID of the document the chunk belongs to. Used only for
            logging.
        http_client: The HTTP client to use to make the request.
        update_request: Metadata update request object received in the bulk
            update method containing fields to update.
    """

    class _Boost(BaseModel):
        model_config = {"frozen": True}
        assign: float

    class _DocumentSets(BaseModel):
        model_config = {"frozen": True}
        assign: dict[str, int]

    class _AccessControl(BaseModel):
        model_config = {"frozen": True}
        assign: dict[str, int]

    class _Hidden(BaseModel):
        model_config = {"frozen": True}
        assign: bool

    class _UserProjects(BaseModel):
        model_config = {"frozen": True}
        assign: list[int]

    class _Personas(BaseModel):
        model_config = {"frozen": True}
        assign: list[int]

    class _VespaPutFields(BaseModel):
        model_config = {"frozen": True}
        # The names of these fields are based the Vespa schema. Changes to the
        # schema require changes here. These names were originally found in
        # backend/onyx/document_index/vespa_constants.py.
        boost: _Boost | None = None
        document_sets: _DocumentSets | None = None
        access_control_list: _AccessControl | None = None
        hidden: _Hidden | None = None
        user_project: _UserProjects | None = None
        personas: _Personas | None = None

    class _VespaPutRequest(BaseModel):
        model_config = {"frozen": True}
        fields: _VespaPutFields

    boost_update: _Boost | None = (
        _Boost(assign=update_request.boost)
        if update_request.boost is not None
        else None
    )
    document_sets_update: _DocumentSets | None = (
        _DocumentSets(
            assign={document_set: 1 for document_set in update_request.document_sets}
        )
        if update_request.document_sets is not None
        else None
    )
    access_update: _AccessControl | None = (
        _AccessControl(
            assign={acl_entry: 1 for acl_entry in update_request.access.to_acl()}
        )
        if update_request.access is not None
        else None
    )
    hidden_update: _Hidden | None = (
        _Hidden(assign=update_request.hidden)
        if update_request.hidden is not None
        else None
    )
    user_projects_update: _UserProjects | None = (
        _UserProjects(assign=list(update_request.project_ids))
        if update_request.project_ids is not None
        else None
    )
    personas_update: _Personas | None = (
        _Personas(assign=list(update_request.persona_ids))
        if update_request.persona_ids is not None
        else None
    )

    vespa_put_fields = _VespaPutFields(
        boost=boost_update,
        document_sets=document_sets_update,
        access_control_list=access_update,
        hidden=hidden_update,
        user_project=user_projects_update,
        personas=personas_update,
    )

    vespa_put_request = _VespaPutRequest(
        fields=vespa_put_fields,
    )

    vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}?create=true"

    try:
        resp = http_client.put(
            vespa_url,
            headers={"Content-Type": "application/json"},
            json=vespa_put_request.model_dump(
                exclude_none=True
            ),  # NOTE: Important to not produce null fields in the json.
        )
        resp.raise_for_status()
    except httpx.HTTPStatusError as e:
        logger.error(
            f"Failed to update doc chunk {doc_chunk_id} (doc_id={doc_id}). "
            f"Code: {e.response.status_code}. Details: {e.response.text}"
        )
        # Re-raise so the @retry decorator will catch and retry, unless the
        # status code is < 5xx, in which case wrap the exception in something
        # other than an HTTPError to skip retries.
        if e.response.status_code >= 500:
            raise
        raise RuntimeError(
            f"Non-retryable error updating chunk {doc_chunk_id}: {e}"
        ) from e


class VespaDocumentIndex(DocumentIndex):
    """Vespa-specific implementation of the DocumentIndex interface.

    This class provides document indexing, retrieval, and management operations
    for a Vespa search engine instance. It handles the complete lifecycle of
    document chunks within a specific Vespa index/schema.
    """

    def __init__(
        self,
        index_name: str,
        tenant_state: TenantState,
        large_chunks_enabled: bool,
        httpx_client: httpx.Client | None = None,
    ) -> None:
        self._index_name = index_name
        self._tenant_id = tenant_state.tenant_id
        self._large_chunks_enabled = large_chunks_enabled
        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This
        # is beneficial for indexing / updates / deletes since we have to make a
        # large volume of requests.
        self._httpx_client_context: BaseHTTPXClientContext
        if httpx_client:
            # Use the provided client. Because this client is presumed global,
            # it does not close after exiting a context manager.
            self._httpx_client_context = GlobalHTTPXClientContext(httpx_client)
        else:
            # We did not receive a client, so create one what will close after
            # exiting a context manager.
            self._httpx_client_context = TemporaryHTTPXClientContext(
                get_vespa_http_client
            )
        self._multitenant = tenant_state.multitenant

    def verify_and_create_index_if_necessary(
        self, embedding_dim: int, embedding_precision: EmbeddingPrecision
    ) -> None:
        raise NotImplementedError

    def index(
        self,
        chunks: Iterable[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
        doc_id_to_chunk_cnt_diff = indexing_metadata.doc_id_to_chunk_cnt_diff
        doc_id_to_previous_chunk_cnt = {
            doc_id: chunk_cnt_diff.old_chunk_cnt
            for doc_id, chunk_cnt_diff in doc_id_to_chunk_cnt_diff.items()
        }
        doc_id_to_new_chunk_cnt = {
            doc_id: chunk_cnt_diff.new_chunk_cnt
            for doc_id, chunk_cnt_diff in doc_id_to_chunk_cnt_diff.items()
        }
        assert (
            len(doc_id_to_chunk_cnt_diff)
            == len(doc_id_to_previous_chunk_cnt)
            == len(doc_id_to_new_chunk_cnt)
        ), "Bug: Doc ID to chunk maps have different lengths."

        # Vespa has restrictions on valid characters, yet document IDs come from
        # external w.r.t. this class. We need to sanitize them.
        #
        # Instead of materializing all cleaned chunks upfront, we stream them
        # through a generator that cleans IDs and builds the original-ID mapping
        # incrementally as chunks flow into Vespa.
        def _clean_and_track(
            chunks_iter: Iterable[DocMetadataAwareIndexChunk],
            id_map: dict[str, str],
            seen_ids: set[str],
        ) -> Generator[DocMetadataAwareIndexChunk, None, None]:
            """Cleans chunk IDs and builds the original-ID mapping
            incrementally as chunks flow through, avoiding a separate
            materialization pass."""
            for chunk in chunks_iter:
                original_id = chunk.source_document.id
                cleaned = clean_chunk_id_copy(chunk)
                cleaned_id = cleaned.source_document.id
                # Needed so the final DocumentInsertionRecord returned can have
                # the original document ID. cleaned_chunks might not contain IDs
                # exactly as callers supplied them.
                id_map[cleaned_id] = original_id
                seen_ids.add(cleaned_id)
                yield cleaned

        new_document_id_to_original_document_id: dict[str, str] = {}
        all_cleaned_doc_ids: set[str] = set()

        existing_docs: set[str] = set()

        with (
            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
            self._httpx_client_context as http_client,
        ):
            # We require the start and end index for each document in order to
            # know precisely which chunks to delete. This information exists for
            # documents that have `chunk_count` in the database, but not for
            # `old_version` documents.
            enriched_doc_infos: list[EnrichedDocumentIndexingInfo] = [
                _enrich_basic_chunk_info(
                    index_name=self._index_name,
                    http_client=http_client,
                    document_id=doc_id,
                    previous_chunk_count=doc_id_to_previous_chunk_cnt[doc_id],
                    new_chunk_count=doc_id_to_new_chunk_cnt[doc_id],
                )
                for doc_id in doc_id_to_chunk_cnt_diff.keys()
                # TODO(andrei), WARNING: Don't we need to sanitize these doc IDs?
            ]

            for enriched_doc_info in enriched_doc_infos:
                # If the document has previously indexed chunks, we know it
                # previously existed and this is a reindex.
                if enriched_doc_info.chunk_end_index:
                    existing_docs.add(enriched_doc_info.doc_id)

            # Now, for each doc, we know exactly where to start and end our
            # deletion. So let's generate the chunk IDs for each chunk to
            # delete.
            # WARNING: This code seems to use
            # indexing_metadata.doc_id_to_chunk_cnt_diff as the source of truth
            # for which chunks to delete. This implies that the onus is on the
            # caller to ensure doc_id_to_chunk_cnt_diff only contains docs
            # relevant to the chunks argument to this method. This should not be
            # the contract of DocumentIndex; and this code is only a refactor
            # from old code. It would seem we should use all_cleaned_doc_ids as
            # the source of truth.
            chunks_to_delete = get_document_chunk_ids(
                enriched_document_info_list=enriched_doc_infos,
                tenant_id=self._tenant_id,
                large_chunks_enabled=self._large_chunks_enabled,
            )

            # Delete old Vespa documents.
            for doc_chunk_ids_batch in batch_generator(chunks_to_delete, BATCH_SIZE):
                delete_vespa_chunks(
                    doc_chunk_ids=doc_chunk_ids_batch,
                    index_name=self._index_name,
                    http_client=http_client,
                    executor=executor,
                )

            # Insert new Vespa documents, streaming through the cleaning
            # pipeline so chunks are never fully materialized.
            cleaned_chunks = _clean_and_track(
                chunks,
                new_document_id_to_original_document_id,
                all_cleaned_doc_ids,
            )
            for chunk_batch in batch_generator(
                cleaned_chunks, min(BATCH_SIZE, MAX_CHUNKS_PER_DOC_BATCH)
            ):
                batch_index_vespa_chunks(
                    chunks=chunk_batch,
                    index_name=self._index_name,
                    http_client=http_client,
                    multitenant=self._multitenant,
                    executor=executor,
                )

        return [
            DocumentInsertionRecord(
                document_id=new_document_id_to_original_document_id[cleaned_doc_id],
                already_existed=cleaned_doc_id in existing_docs,
            )
            for cleaned_doc_id in all_cleaned_doc_ids
        ]

    def delete(self, document_id: str, chunk_count: int | None = None) -> int:
        total_chunks_deleted = 0

        sanitized_doc_id = replace_invalid_doc_id_characters(document_id)

        with (
            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
            self._httpx_client_context as http_client,
        ):
            enriched_doc_info = _enrich_basic_chunk_info(
                index_name=self._index_name,
                http_client=http_client,
                document_id=sanitized_doc_id,
                previous_chunk_count=chunk_count,
                new_chunk_count=0,
            )
            chunks_to_delete = get_document_chunk_ids(
                enriched_document_info_list=[enriched_doc_info],
                tenant_id=self._tenant_id,
                large_chunks_enabled=self._large_chunks_enabled,
            )

            for doc_chunk_ids_batch in batch_generator(chunks_to_delete, BATCH_SIZE):
                total_chunks_deleted += len(doc_chunk_ids_batch)
                delete_vespa_chunks(
                    doc_chunk_ids=doc_chunk_ids_batch,
                    index_name=self._index_name,
                    http_client=http_client,
                    executor=executor,
                )

        return total_chunks_deleted

    def update(
        self,
        update_requests: list[MetadataUpdateRequest],
    ) -> None:
        # WARNING: This method can be called by vespa_metadata_sync_task, which
        # is kicked off by check_for_vespa_sync_task, notably before a document
        # has finished indexing. In this way, chunk_count below could be unknown
        # even for chunks not on the "old" chunk ID system; i.e. there could be
        # a race condition. Passing in None to _enrich_basic_chunk_info should
        # handle this, but a higher level TODO might be to not run update at all
        # on connectors that are still indexing, and therefore do not yet have a
        # chunk count because update_docs_chunk_count__no_commit has not been
        # run yet.
        with self._httpx_client_context as httpx_client:
            # Each invocation of this method can contain multiple update requests.
            for update_request in update_requests:
                # Each update request can correspond to multiple documents.
                for doc_id in update_request.document_ids:
                    # NOTE: -1 represents an unknown chunk count.
                    chunk_count = update_request.doc_id_to_chunk_cnt[doc_id]
                    sanitized_doc_id = replace_invalid_doc_id_characters(doc_id)
                    enriched_doc_info = _enrich_basic_chunk_info(
                        index_name=self._index_name,
                        http_client=httpx_client,
                        document_id=sanitized_doc_id,
                        previous_chunk_count=chunk_count if chunk_count >= 0 else None,
                        new_chunk_count=0,  # WARNING: This semantically makes no sense and is misusing this function.
                    )

                    doc_chunk_ids = get_document_chunk_ids(
                        enriched_document_info_list=[enriched_doc_info],
                        tenant_id=self._tenant_id,
                        large_chunks_enabled=self._large_chunks_enabled,
                    )

                    for doc_chunk_id in doc_chunk_ids:
                        _update_single_chunk(
                            doc_chunk_id,
                            self._index_name,
                            # NOTE: Used only for logging, raw ID is ok here.
                            doc_id,
                            httpx_client,
                            update_request,
                        )

                    logger.info(
                        f"Updated {len(doc_chunk_ids)} chunks for document {doc_id}."
                    )

    def id_based_retrieval(
        self,
        chunk_requests: list[DocumentSectionRequest],
        filters: IndexFilters,
        batch_retrieval: bool = False,
    ) -> list[InferenceChunk]:
        sanitized_chunk_requests = [
            VespaChunkRequest(
                document_id=replace_invalid_doc_id_characters(
                    chunk_request.document_id
                ),
                min_chunk_ind=chunk_request.min_chunk_ind,
                max_chunk_ind=chunk_request.max_chunk_ind,
            )
            for chunk_request in chunk_requests
        ]

        if batch_retrieval:
            return cleanup_content_for_chunks(
                batch_search_api_retrieval(
                    index_name=self._index_name,
                    chunk_requests=sanitized_chunk_requests,
                    filters=filters,
                    # No one was passing in this parameter in the legacy
                    # interface, it always defaulted to False.
                    get_large_chunks=False,
                )
            )
        return cleanup_content_for_chunks(
            parallel_visit_api_retrieval(
                index_name=self._index_name,
                chunk_requests=sanitized_chunk_requests,
                filters=filters,
                # No one was passing in this parameter in the legacy interface,
                # it always defaulted to False.
                get_large_chunks=False,
            )
        )

    def hybrid_retrieval(
        self,
        query: str,
        query_embedding: Embedding,
        final_keywords: list[str] | None,
        query_type: QueryType,
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
        vespa_where_clauses = build_vespa_filters(filters)
        # Avoid over-fetching a very large candidate set for global-phase reranking.
        # Keep enough headroom for quality while capping cost on larger indices.
        target_hits = min(max(4 * num_to_retrieve, 100), RERANK_COUNT)

        yql = (
            YQL_BASE.format(index_name=self._index_name)
            + vespa_where_clauses
            + f"(({{targetHits: {target_hits}}}nearestNeighbor(embeddings, query_embedding)) "
            + f"or ({{targetHits: {target_hits}}}nearestNeighbor(title_embedding, query_embedding)) "
            + 'or ({grammar: "weakAnd"}userInput(@query)) '
            + f'or ({{defaultIndex: "{CONTENT_SUMMARY}"}}userInput(@query)))'
        )

        final_query = " ".join(final_keywords) if final_keywords else query

        ranking_profile = (
            f"hybrid_search_{query_type.value}_base_{len(query_embedding)}"
        )

        logger.info(f"Selected ranking profile: {ranking_profile}")

        logger.debug(f"Query YQL: {yql}")

        # In this interface we do not pass in hybrid alpha. Tracing the codepath
        # of the legacy Vespa interface, it so happens that KEYWORD always
        # corresponds to an alpha of 0.2 (from KEYWORD_QUERY_HYBRID_ALPHA), and
        # SEMANTIC to 0.5 (from HYBRID_ALPHA). HYBRID_ALPHA_KEYWORD was only
        # used in dead code so we do not use it here.
        hybrid_alpha = (
            KEYWORD_QUERY_HYBRID_ALPHA
            if query_type == QueryType.KEYWORD
            else HYBRID_ALPHA
        )

        params: dict[str, str | int | float] = {
            "yql": yql,
            "query": final_query,
            "input.query(query_embedding)": str(query_embedding),
            "input.query(decay_factor)": str(DOC_TIME_DECAY * RECENCY_BIAS_MULTIPLIER),
            "input.query(alpha)": hybrid_alpha,
            "input.query(title_content_ratio)": TITLE_CONTENT_RATIO,
            "hits": num_to_retrieve,
            "ranking.profile": ranking_profile,
            "timeout": VESPA_TIMEOUT,
        }

        return cleanup_content_for_chunks(query_vespa(params))

    def keyword_retrieval(
        self,
        query: str,
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
        raise NotImplementedError

    def semantic_retrieval(
        self,
        query_embedding: Embedding,
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
        raise NotImplementedError

    def random_retrieval(
        self,
        filters: IndexFilters,
        num_to_retrieve: int = 100,
        dirty: bool | None = None,  # noqa: ARG002
    ) -> list[InferenceChunk]:
        vespa_where_clauses = build_vespa_filters(filters, remove_trailing_and=True)

        yql = YQL_BASE.format(index_name=self._index_name) + vespa_where_clauses

        random_seed = random.randint(0, 1_000_000)

        params: dict[str, str | int | float] = {
            "yql": yql,
            "hits": num_to_retrieve,
            "timeout": VESPA_TIMEOUT,
            "ranking.profile": "random_",
            "ranking.properties.random.seed": random_seed,
        }

        return cleanup_content_for_chunks(query_vespa(params))

    def get_raw_document_chunks(self, document_id: str) -> list[dict[str, Any]]:
        """Gets all raw document chunks for a document as returned by Vespa.

        Used in the Vespa migration task.

        Args:
            document_id: The ID of the document to get chunks for.

        Returns:
            List of raw document chunks.
        """
        # Vespa doc IDs are sanitized using replace_invalid_doc_id_characters.
        sanitized_document_id = replace_invalid_doc_id_characters(document_id)
        chunk_request = VespaChunkRequest(document_id=sanitized_document_id)
        raw_chunks = get_chunks_via_visit_api(
            chunk_request=chunk_request,
            index_name=self._index_name,
            filters=IndexFilters(access_control_list=None, tenant_id=self._tenant_id),
            get_large_chunks=False,
            short_tensor_format=True,
        )
        # Vespa returns other metadata around the actual document chunk. The raw
        # chunk we're interested in is in the "fields" field.
        raw_document_chunks = [chunk["fields"] for chunk in raw_chunks]
        return raw_document_chunks

    def get_all_raw_document_chunks_paginated(
        self,
        continuation_token_map: dict[int, str | None],
        page_size: int,
    ) -> tuple[list[dict[str, Any]], dict[int, str | None]]:
        """Gets all the chunks in Vespa, paginated.

        Used in the chunk-level Vespa-to-OpenSearch migration task.

        Args:
            continuation_token: Token returned by Vespa representing a page
                offset. None to start from the beginning. Defaults to None.
            page_size: Best-effort batch size for the visit.

        Returns:
            Tuple of (list of chunk dicts, next continuation token or None). The
                continuation token is None when the visit is complete.
        """
        raw_chunks, next_continuation_token_map = get_all_chunks_paginated(
            index_name=self._index_name,
            tenant_state=TenantState(
                tenant_id=self._tenant_id, multitenant=MULTI_TENANT
            ),
            continuation_token_map=continuation_token_map,
            page_size=page_size,
        )
        return raw_chunks, next_continuation_token_map

    def index_raw_chunks(self, chunks: list[dict[str, Any]]) -> None:
        """Indexes raw document chunks into Vespa.

        To only be used in tests. Not for production.
        """
        json_header = {
            "Content-Type": "application/json",
        }
        with self._httpx_client_context as http_client:
            for chunk in chunks:
                chunk_id = str(
                    get_uuid_from_chunk_info(
                        document_id=chunk[DOCUMENT_ID],
                        chunk_id=chunk[CHUNK_ID],
                        tenant_id=self._tenant_id,
                    )
                )
                vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=self._index_name)}/{chunk_id}"
                response = http_client.post(
                    vespa_url,
                    headers=json_header,
                    json={"fields": chunk},
                )
                response.raise_for_status()

    def get_chunk_count(self) -> int:
        """Returns the exact number of document chunks in Vespa for this tenant.

        Uses the Vespa Search API with `limit 0` and `ranking.profile=unranked`
        to get an exact count without fetching any document data.

        Includes large chunks. There is no way to filter these out using the
        Search API.
        """
        where_clause = (
            f'tenant_id contains "{self._tenant_id}"' if self._multitenant else "true"
        )
        yql = f"select documentid from {self._index_name} where {where_clause} limit 0"
        params: dict[str, str | int] = {
            "yql": yql,
            "ranking.profile": "unranked",
            "timeout": VESPA_TIMEOUT,
        }

        with get_vespa_http_client() as http_client:
            response = http_client.post(SEARCH_ENDPOINT, json=params)
            response.raise_for_status()
            response_data = response.json()
        return response_data["root"]["fields"]["totalCount"]


================================================
FILE: backend/onyx/document_index/vespa_constants.py
================================================
from onyx.configs.app_configs import VESPA_CLOUD_URL
from onyx.configs.app_configs import VESPA_CONFIG_SERVER_HOST
from onyx.configs.app_configs import VESPA_HOST
from onyx.configs.app_configs import VESPA_PORT
from onyx.configs.app_configs import VESPA_TENANT_PORT
from onyx.configs.constants import SOURCE_TYPE

# config server


VESPA_CONFIG_SERVER_URL = (
    VESPA_CLOUD_URL or f"http://{VESPA_CONFIG_SERVER_HOST}:{VESPA_TENANT_PORT}"
)
VESPA_APPLICATION_ENDPOINT = f"{VESPA_CONFIG_SERVER_URL}/application/v2"

# main search application
VESPA_APP_CONTAINER_URL = VESPA_CLOUD_URL or f"http://{VESPA_HOST}:{VESPA_PORT}"


# danswer_chunk below is defined in vespa/app_configs/schemas/danswer_chunk.sd.jinja
DOCUMENT_ID_ENDPOINT = (
    f"{VESPA_APP_CONTAINER_URL}/document/v1/default/{{index_name}}/docid"
)

# the default document id endpoint is http://localhost:8080/document/v1/default/danswer_chunk/docid

SEARCH_ENDPOINT = f"{VESPA_APP_CONTAINER_URL}/search/"

# Since Vespa doesn't allow batching of inserts / updates, we use threads to
# parallelize the operations.
NUM_THREADS = 32
MAX_ID_SEARCH_QUERY_SIZE = 400
# Suspect that adding too many "or" conditions will cause Vespa to timeout and return
# an empty list of hits (with no error status and coverage: 0 and degraded)
MAX_OR_CONDITIONS = 10
# up from 500ms for now, since we've seen quite a few timeouts
# in the long term, we are looking to improve the performance of Vespa
# so that we can bring this back to default
VESPA_TIMEOUT = "10s"
# The size of the batch to use for batched operations like inserts / updates.
# The batch will likely be sent to a threadpool of size NUM_THREADS.
BATCH_SIZE = 128

TENANT_ID = "tenant_id"
DOCUMENT_ID = "document_id"
CHUNK_ID = "chunk_id"
BLURB = "blurb"
CONTENT = "content"
SOURCE_LINKS = "source_links"
SEMANTIC_IDENTIFIER = "semantic_identifier"
TITLE = "title"
SKIP_TITLE_EMBEDDING = "skip_title"
SECTION_CONTINUATION = "section_continuation"
EMBEDDINGS = "embeddings"
TITLE_EMBEDDING = "title_embedding"
ACCESS_CONTROL_LIST = "access_control_list"
DOCUMENT_SETS = "document_sets"
USER_FILE = "user_file"
USER_FOLDER = "user_folder"
USER_PROJECT = "user_project"
PERSONAS = "personas"
LARGE_CHUNK_REFERENCE_IDS = "large_chunk_reference_ids"
METADATA = "metadata"
METADATA_LIST = "metadata_list"
METADATA_SUFFIX = "metadata_suffix"
DOC_SUMMARY = "doc_summary"
CHUNK_CONTEXT = "chunk_context"
BOOST = "boost"
AGGREGATED_CHUNK_BOOST_FACTOR = "aggregated_chunk_boost_factor"
DOC_UPDATED_AT = "doc_updated_at"  # Indexed as seconds since epoch
PRIMARY_OWNERS = "primary_owners"
SECONDARY_OWNERS = "secondary_owners"
RECENCY_BIAS = "recency_bias"
HIDDEN = "hidden"
# for legacy reasons, called `name` in Vespa despite it really being an ID
IMAGE_FILE_NAME = "image_file_name"

# Specific to Vespa, needed for highlighting matching keywords / section
CONTENT_SUMMARY = "content_summary"

FULL_CHUNK_EMBEDDING_KEY = "full_chunk"


YQL_BASE = (
    f"select "
    f"documentid, "
    f"{DOCUMENT_ID}, "
    f"{CHUNK_ID}, "
    f"{BLURB}, "
    f"{CONTENT}, "
    f"{SOURCE_TYPE}, "
    f"{SOURCE_LINKS}, "
    f"{SEMANTIC_IDENTIFIER}, "
    f"{TITLE}, "
    f"{SECTION_CONTINUATION}, "
    f"{IMAGE_FILE_NAME}, "
    f"{BOOST}, "
    f"{AGGREGATED_CHUNK_BOOST_FACTOR}, "
    f"{HIDDEN}, "
    f"{DOC_UPDATED_AT}, "
    f"{PRIMARY_OWNERS}, "
    f"{SECONDARY_OWNERS}, "
    f"{LARGE_CHUNK_REFERENCE_IDS}, "
    f"{METADATA}, "
    f"{METADATA_SUFFIX}, "
    f"{DOC_SUMMARY}, "
    f"{CHUNK_CONTEXT}, "
    f"{CONTENT_SUMMARY} "
    f"from {{index_name}} where "
)


================================================
FILE: backend/onyx/error_handling/__init__.py
================================================


================================================
FILE: backend/onyx/error_handling/error_codes.py
================================================
"""
Standardized error codes for the Onyx backend.

Usage:
    from onyx.error_handling.error_codes import OnyxErrorCode
    from onyx.error_handling.exceptions import OnyxError

    raise OnyxError(OnyxErrorCode.UNAUTHENTICATED, "Token expired")
"""

from enum import Enum


class OnyxErrorCode(Enum):
    """
    Each member is a tuple of (error_code_string, http_status_code).

    The error_code_string is a stable, machine-readable identifier that
    API consumers can match on. The http_status_code is the default HTTP
    status to return.
    """

    # ------------------------------------------------------------------
    # Authentication (401)
    # ------------------------------------------------------------------
    UNAUTHENTICATED = ("UNAUTHENTICATED", 401)
    INVALID_TOKEN = ("INVALID_TOKEN", 401)
    TOKEN_EXPIRED = ("TOKEN_EXPIRED", 401)
    CSRF_FAILURE = ("CSRF_FAILURE", 403)

    # ------------------------------------------------------------------
    # Authorization (403)
    # ------------------------------------------------------------------
    UNAUTHORIZED = ("UNAUTHORIZED", 403)
    INSUFFICIENT_PERMISSIONS = ("INSUFFICIENT_PERMISSIONS", 403)
    ADMIN_ONLY = ("ADMIN_ONLY", 403)
    EE_REQUIRED = ("EE_REQUIRED", 403)
    SINGLE_TENANT_ONLY = ("SINGLE_TENANT_ONLY", 403)
    ENV_VAR_GATED = ("ENV_VAR_GATED", 403)

    # ------------------------------------------------------------------
    # Validation / Bad Request (400)
    # ------------------------------------------------------------------
    VALIDATION_ERROR = ("VALIDATION_ERROR", 400)
    INVALID_INPUT = ("INVALID_INPUT", 400)
    MISSING_REQUIRED_FIELD = ("MISSING_REQUIRED_FIELD", 400)
    QUERY_REJECTED = ("QUERY_REJECTED", 400)

    # ------------------------------------------------------------------
    # Not Found (404)
    # ------------------------------------------------------------------
    NOT_FOUND = ("NOT_FOUND", 404)
    CONNECTOR_NOT_FOUND = ("CONNECTOR_NOT_FOUND", 404)
    CREDENTIAL_NOT_FOUND = ("CREDENTIAL_NOT_FOUND", 404)
    PERSONA_NOT_FOUND = ("PERSONA_NOT_FOUND", 404)
    DOCUMENT_NOT_FOUND = ("DOCUMENT_NOT_FOUND", 404)
    SESSION_NOT_FOUND = ("SESSION_NOT_FOUND", 404)
    USER_NOT_FOUND = ("USER_NOT_FOUND", 404)

    # ------------------------------------------------------------------
    # Conflict (409)
    # ------------------------------------------------------------------
    CONFLICT = ("CONFLICT", 409)
    DUPLICATE_RESOURCE = ("DUPLICATE_RESOURCE", 409)

    # ------------------------------------------------------------------
    # Rate Limiting / Quotas (429 / 402)
    # ------------------------------------------------------------------
    RATE_LIMITED = ("RATE_LIMITED", 429)
    SEAT_LIMIT_EXCEEDED = ("SEAT_LIMIT_EXCEEDED", 402)

    # ------------------------------------------------------------------
    # Payload (413)
    # ------------------------------------------------------------------
    PAYLOAD_TOO_LARGE = ("PAYLOAD_TOO_LARGE", 413)

    # ------------------------------------------------------------------
    # Connector / Credential Errors (400-range)
    # ------------------------------------------------------------------
    CONNECTOR_VALIDATION_FAILED = ("CONNECTOR_VALIDATION_FAILED", 400)
    CREDENTIAL_INVALID = ("CREDENTIAL_INVALID", 400)
    CREDENTIAL_EXPIRED = ("CREDENTIAL_EXPIRED", 401)

    # ------------------------------------------------------------------
    # Server Errors (5xx)
    # ------------------------------------------------------------------
    INTERNAL_ERROR = ("INTERNAL_ERROR", 500)
    NOT_IMPLEMENTED = ("NOT_IMPLEMENTED", 501)
    SERVICE_UNAVAILABLE = ("SERVICE_UNAVAILABLE", 503)
    BAD_GATEWAY = ("BAD_GATEWAY", 502)
    LLM_PROVIDER_ERROR = ("LLM_PROVIDER_ERROR", 502)
    HOOK_EXECUTION_FAILED = ("HOOK_EXECUTION_FAILED", 502)
    GATEWAY_TIMEOUT = ("GATEWAY_TIMEOUT", 504)

    def __init__(self, code: str, status_code: int) -> None:
        self.code = code
        self.status_code = status_code

    def detail(self, message: str | None = None) -> dict[str, str]:
        """Build a structured error detail dict.

        Returns a dict like:
            {"error_code": "UNAUTHENTICATED", "detail": "Token expired"}

        If no message is supplied, the error code itself is used as the detail.
        """
        return {
            "error_code": self.code,
            "detail": message or self.code,
        }


================================================
FILE: backend/onyx/error_handling/exceptions.py
================================================
"""OnyxError — the single exception type for all Onyx business errors.

Raise ``OnyxError`` instead of ``HTTPException`` in business code.  A global
FastAPI exception handler (registered via ``register_onyx_exception_handlers``)
converts it into a JSON response with the standard
``{"error_code": "...", "detail": "..."}`` shape.

Usage::

    from onyx.error_handling.error_codes import OnyxErrorCode
    from onyx.error_handling.exceptions import OnyxError

    raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")

For upstream errors with a dynamic HTTP status (e.g. billing service),
use ``status_code_override``::

    raise OnyxError(
        OnyxErrorCode.BAD_GATEWAY,
        detail,
        status_code_override=upstream_status,
    )
"""

from fastapi import FastAPI
from fastapi import Request
from fastapi.responses import JSONResponse

from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.utils.logger import setup_logger

logger = setup_logger()


class OnyxError(Exception):
    """Structured error that maps to a specific ``OnyxErrorCode``.

    Attributes:
        error_code: The ``OnyxErrorCode`` enum member.
        detail: Human-readable detail (defaults to the error code string).
        status_code: HTTP status — either overridden or from the error code.
    """

    def __init__(
        self,
        error_code: OnyxErrorCode,
        detail: str | None = None,
        *,
        status_code_override: int | None = None,
    ) -> None:
        resolved_detail = detail or error_code.code
        super().__init__(resolved_detail)
        self.error_code = error_code
        self.detail = resolved_detail
        self._status_code_override = status_code_override

    @property
    def status_code(self) -> int:
        return self._status_code_override or self.error_code.status_code


def log_onyx_error(exc: OnyxError) -> None:
    detail = exc.detail
    status_code = exc.status_code
    if status_code >= 500:
        logger.error(f"OnyxError {exc.error_code.code}: {detail}")
    elif status_code >= 400:
        logger.warning(f"OnyxError {exc.error_code.code}: {detail}")


def onyx_error_to_json_response(exc: OnyxError) -> JSONResponse:
    return JSONResponse(
        status_code=exc.status_code,
        content=exc.error_code.detail(exc.detail),
    )


def register_onyx_exception_handlers(app: FastAPI) -> None:
    """Register a global handler that converts ``OnyxError`` to JSON responses.

    Must be called *after* the app is created but *before* it starts serving.
    The handler logs at WARNING for 4xx and ERROR for 5xx.
    """

    @app.exception_handler(OnyxError)
    async def _handle_onyx_error(
        request: Request,  # noqa: ARG001
        exc: OnyxError,
    ) -> JSONResponse:
        log_onyx_error(exc)
        return onyx_error_to_json_response(exc)


================================================
FILE: backend/onyx/evals/README.md
================================================
# Onyx Evaluations

This directory contains the evaluation framework for testing and measuring the performance of Onyx's chat and retrieval systems.

## Overview

The evaluation system uses [Braintrust](https://www.braintrust.dev/) to run automated evaluations against test datasets. It measures the quality of responses generated by Onyx's chat system and can be used to track performance improvements over time.

## Prerequisites

**Important**: The model server must be running in order for evals to work properly. Make sure your model server is up and running before executing any evaluations.

## Running Evaluations

Kick off a remote job
```bash
onyx/backend$ python -m dotenv -f .vscode/.env run -- python onyx/evals/eval_cli.py --remote --api-key <SUPER_CLOUD_USER_API_KEY> --search-permissions-email <email account to reference> --remote --remote-dataset-name Simple
```

You can also run the CLI directly from the command line:

```bash
onyx$ python -m dotenv -f .vscode/.env run -- python backend/onyx/evals/eval_cli.py --local-dataset-path backend/onyx/evals/data/eval.json --search-permissions-email richard@onyx.app
```
Save the env var ONYX_EVAL_API_KEY in your .env file so you don't have to specify it every time for triggering remote runs.
You'll need to create an API key in the admin panel to run evals.


### Production Environment

### Local Development

For local development, use the `eval_cli.py` script. We recommend starting it from the VS Code launch configuration for the best debugging experience.

#### Using VS Code Launch Configuration

1. Open VS Code in the project root
2. Go to the "Run and Debug" panel (Ctrl/Cmd + Shift + D)
3. Select "Eval CLI" from the dropdown
4. Click the play button or press F5

This will run the evaluation with the following default settings:
- Uses the local data file at `evals/data/data.json`
- Enables verbose output
- Sets up proper environment variables and Python path

#### CLI Options

- `--local-data-path`: Path to local JSON file containing test data (defaults to `evals/data/data.json`)
- `--remote-dataset-name`: Name of remote Braintrust dataset
- `--braintrust-project`: Braintrust project name (overrides `BRAINTRUST_PROJECT` env var)
- `--verbose`: Enable verbose output
- `--no-send-logs`: Skip sending logs to Braintrust (useful for local testing)
- `--local-only`: Run evals locally without Braintrust, output results to CLI only

## Test Data

The evaluation system uses test data stored in `evals/data/data.json`. This file contains a list of test cases, each with:
- `input`: The question or prompt to test

Example test case:
```json
{
    "input": {
      "message": "What is the capital of France?"
    }
}
```

### Per-Test Configuration

Configure tool forcing, assertions, and model settings per-test by adding optional fields to each test case.

#### Tool Configuration

- `force_tools`: List of tool type names to force for this specific test
- `expected_tools`: List of tool type names expected to be called
- `require_all_tools`: If true, all expected tools must be called (default: false)

#### Model Configuration

- `model`: Model version to use (e.g., "gpt-4o", "claude-3-5-sonnet")
- `model_provider`: Model provider (e.g., "openai", "anthropic")
- `temperature`: Temperature for the model (default: 0.0)

Example with tool and model configuration:
```json
[
  {
    "input": {
      "message": "Find information about Python programming"
    },
    "expected_tools": ["SearchTool"],
    "force_tools": ["SearchTool"],
    "model": "gpt-4o"
  },
  {
    "input": {
      "message": "Search the web for recent news about AI"
    },
    "expected_tools": ["WebSearchTool"],
    "model": "claude-3-5-sonnet",
    "model_provider": "anthropic"
  },
  {
    "input": {
      "message": "Calculate 2 + 2"
    },
    "expected_tools": ["PythonTool"],
    "temperature": 0.5
  }
]
```

### Multi-Turn Evaluations

For testing realistic multi-turn conversations where each turn may require different tools, use the `messages` array format instead of a single `message`:

```json
{
  "input": {
    "messages": [
      {
        "message": "What's the latest news about OpenAI today?",
        "expected_tools": ["WebSearchTool", "OpenURLTool"]
      },
      {
        "message": "Now search our internal docs for our OpenAI integration guide",
        "expected_tools": ["SearchTool"]
      },
      {
        "message": "Thanks, that's helpful!",
        "expected_tools": []
      }
    ]
  }
}
```

Each message in the `messages` array can have its own configuration:
- `message`: The user message text (required)
- `expected_tools`: List of tool types expected to be called for this turn
- `require_all_tools`: If true, all expected tools must be called (default: false)
- `force_tools`: List of tool types to force for this turn
- `model`: Model version override for this turn
- `model_provider`: Model provider override for this turn
- `temperature`: Temperature override for this turn

Multi-turn evals run within a single chat session, so the model has full context of previous turns when responding.

### Available Tool Types

The following built-in tool types can be used:
- `SearchTool`: Internal document search
- `WebSearchTool`: Internet/web search
- `ImageGenerationTool`: Image generation
- `PythonTool`: Python code execution
- `OpenURLTool`: Open and read URLs

### Braintrust Dashboard

After running evaluations, you can view results in the Braintrust dashboard. The evaluation will report:
- `tool_assertion`: Score of 1.0 if tool assertions passed (or no assertions configured), 0.0 if failed
- Metadata including `tools_called`, `tools_called_count`, and assertion details


================================================
FILE: backend/onyx/evals/eval.py
================================================
import time
from collections.abc import Callable
from collections.abc import Generator
from contextlib import contextmanager
from typing import Any

from sqlalchemy import Engine
from sqlalchemy import event
from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm.session import SessionTransaction

from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.models import ChatFullResponse
from onyx.chat.process_message import gather_stream_full
from onyx.chat.process_message import handle_stream_message_objects
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.db.chat import create_chat_session
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from onyx.db.users import get_user_by_email
from onyx.evals.models import ChatFullEvalResult
from onyx.evals.models import EvalationAck
from onyx.evals.models import EvalConfigurationOptions
from onyx.evals.models import EvalMessage
from onyx.evals.models import EvalProvider
from onyx.evals.models import EvalTimings
from onyx.evals.models import EvalToolResult
from onyx.evals.models import MultiTurnEvalResult
from onyx.evals.models import ToolAssertion
from onyx.evals.provider import get_provider
from onyx.llm.override_models import LLMOverride
from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


@contextmanager
def isolated_ephemeral_session_factory(
    engine: Engine,
) -> Generator[Callable[[], Session], None, None]:
    """
    Create a session factory that creates sessions that run in a transaction that gets rolled back.
    This is useful for running evals without any lasting db side effects.
    """
    tenant_id = get_current_tenant_id()
    schema_translate_map = {None: tenant_id}
    conn = engine.connect().execution_options(schema_translate_map=schema_translate_map)
    outer_tx = conn.begin()
    Maker = sessionmaker(bind=conn, expire_on_commit=False, future=True)

    def make_session() -> Session:
        s = Maker()
        s.begin_nested()

        @event.listens_for(s, "after_transaction_end")
        def _restart_savepoint(
            session: Session, transaction: SessionTransaction
        ) -> None:
            if transaction.nested and not (
                transaction._parent is not None and transaction._parent.nested
            ):
                session.begin_nested()

        return s

    try:
        yield make_session
    finally:
        outer_tx.rollback()
        conn.close()


def _chat_full_response_to_eval_result(
    full: ChatFullResponse,
    stream_start_time: float,
) -> ChatFullEvalResult:
    """Map ChatFullResponse from gather_stream_full to eval result components."""
    tools_called = [tc.tool_name for tc in full.tool_calls]
    tool_call_details: list[dict[str, Any]] = [
        {"tool_name": tc.tool_name, "tool_arguments": tc.tool_arguments}
        for tc in full.tool_calls
    ]
    stream_end_time = time.time()
    total_ms = (stream_end_time - stream_start_time) * 1000
    timings = EvalTimings(
        total_ms=total_ms,
        llm_first_token_ms=None,
        tool_execution_ms={},
        stream_processing_ms=total_ms,
    )
    return ChatFullEvalResult(
        answer=full.answer,
        tools_called=tools_called,
        tool_call_details=tool_call_details,
        citations=full.citation_info,
        timings=timings,
    )


def evaluate_tool_assertions(
    tools_called: list[str],
    assertions: ToolAssertion | None,
) -> tuple[bool | None, str | None]:
    """
    Evaluate tool assertions against the tools that were called.

    Args:
        tools_called: List of tool names that were called during evaluation
        assertions: Tool assertions to check, or None if no assertions

    Returns:
        Tuple of (passed, details) where:
        - passed: True if assertions passed, False if failed, None if no assertions
        - details: Human-readable explanation of the result
    """
    if assertions is None:
        return None, None

    expected_tools = set(assertions.expected_tools)
    called_tools = set(tools_called)

    if assertions.require_all:
        # All expected tools must be called
        missing_tools = expected_tools - called_tools
        if missing_tools:
            return False, (
                f"Missing expected tools: {sorted(missing_tools)}. Called tools: {sorted(called_tools)}"
            )
        return True, (
            f"All expected tools called: {sorted(expected_tools)}. Called tools: {sorted(called_tools)}"
        )
    else:
        # At least one expected tool must be called
        matched_tools = expected_tools & called_tools
        if not matched_tools:
            return False, (
                f"None of expected tools called. Expected one of: {sorted(expected_tools)}. Called tools: {sorted(called_tools)}"
            )
        return True, (
            f"Expected tool(s) called: {sorted(matched_tools)}. Called tools: {sorted(called_tools)}"
        )


def _get_answer_with_tools(
    eval_input: dict[str, Any],
    configuration: EvalConfigurationOptions,
) -> EvalToolResult:
    """
    Get answer from the chat system with full tool call tracking.

    Args:
        eval_input: Dictionary containing:
            - 'message': The user message to send
            - 'force_tools' (optional): List of tool types to force for this input
            - 'expected_tools' (optional): List of tool types expected to be called
            - 'require_all_tools' (optional): If true, all expected tools must be called
            - 'model' (optional): Model version to use (e.g., "gpt-4o", "claude-3-5-sonnet")
            - 'model_provider' (optional): Model provider (e.g., "openai", "anthropic")
            - 'temperature' (optional): Temperature for the model
        configuration: Evaluation configuration options

    Returns:
        EvalToolResult containing the answer and tool call information
    """
    engine = get_sqlalchemy_engine()
    with isolated_ephemeral_session_factory(engine) as SessionLocal:
        with SessionLocal() as db_session:
            full_configuration = configuration.get_configuration(db_session)

            # Handle per-input tool forcing (from data file)
            forced_tool_ids: list[int] = []
            input_force_tools = eval_input.get("force_tools", [])
            if input_force_tools:
                from onyx.db.tools import get_builtin_tool
                from onyx.tools.built_in_tools import BUILT_IN_TOOL_MAP

                for tool_type in input_force_tools:
                    if tool_type in BUILT_IN_TOOL_MAP:
                        tool_id = get_builtin_tool(
                            db_session, BUILT_IN_TOOL_MAP[tool_type]
                        ).id
                        if tool_id not in forced_tool_ids:
                            forced_tool_ids.append(tool_id)

            # Build tool assertions from per-input config
            tool_assertions: ToolAssertion | None = None
            input_expected_tools = eval_input.get("expected_tools", [])
            if input_expected_tools:
                tool_assertions = ToolAssertion(
                    expected_tools=input_expected_tools,
                    require_all=eval_input.get("require_all_tools", False),
                )

            # Handle per-input model configuration
            llm_override = full_configuration.llm
            input_model = eval_input.get("model")
            input_model_provider = eval_input.get("model_provider")
            input_temperature = eval_input.get("temperature")

            if input_model or input_model_provider or input_temperature is not None:
                # Create a new LLMOverride with per-input values, falling back to config
                llm_override = LLMOverride(
                    model_provider=input_model_provider or llm_override.model_provider,
                    model_version=input_model or llm_override.model_version,
                    temperature=(
                        input_temperature
                        if input_temperature is not None
                        else llm_override.temperature
                    ),
                )

            user = get_user_by_email(configuration.search_permissions_email, db_session)
            if not user:
                raise ValueError(
                    f"User not found for email: {configuration.search_permissions_email}"
                )

            forced_tool_id = forced_tool_ids[0] if forced_tool_ids else None
            request = SendMessageRequest(
                message=eval_input["message"],
                llm_override=llm_override,
                allowed_tool_ids=full_configuration.allowed_tool_ids,
                forced_tool_id=forced_tool_id,
                chat_session_info=ChatSessionCreationRequest(
                    persona_id=DEFAULT_PERSONA_ID,
                    description="Eval session",
                ),
            )

            stream_start_time = time.time()
            state_container = ChatStateContainer()
            packets = handle_stream_message_objects(
                new_msg_req=request,
                user=user,
                db_session=db_session,
                external_state_container=state_container,
            )
            full = gather_stream_full(packets, state_container)

            result = _chat_full_response_to_eval_result(full, stream_start_time)

            # Evaluate tool assertions
            assertion_passed, assertion_details = evaluate_tool_assertions(
                result.tools_called, tool_assertions
            )

            logger.info(
                f"Eval completed. Tools called: {result.tools_called}.\n"
                f"Assertion passed: {assertion_passed}. Details: {assertion_details}\n"
            )

            return EvalToolResult(
                answer=result.answer,
                tools_called=result.tools_called,
                tool_call_details=result.tool_call_details,
                citations=result.citations,
                assertion_passed=assertion_passed,
                assertion_details=assertion_details,
                timings=result.timings,
            )


def _get_multi_turn_answer_with_tools(
    eval_input: dict[str, Any],
    configuration: EvalConfigurationOptions,
) -> MultiTurnEvalResult:
    """
    Get answers from a multi-turn conversation with tool call tracking for each turn.

    Args:
        eval_input: Dictionary containing:
            - 'messages': List of message dicts, each with:
                - 'message': The user message text
                - 'expected_tools' (optional): List of expected tool types
                - 'require_all_tools' (optional): If true, all expected tools must be called
                - 'model' (optional): Model version override for this turn
                - 'model_provider' (optional): Provider override for this turn
                - 'temperature' (optional): Temperature override for this turn
                - 'force_tools' (optional): List of tool types to force
        configuration: Evaluation configuration options

    Returns:
        MultiTurnEvalResult containing per-turn results and aggregate metrics
    """
    messages_data = eval_input.get("messages", [])
    if not messages_data:
        raise ValueError("Multi-turn eval requires 'messages' array in input")

    # Parse messages into EvalMessage objects
    messages: list[EvalMessage] = []
    for msg_data in messages_data:
        messages.append(
            EvalMessage(
                message=msg_data["message"],
                expected_tools=msg_data.get("expected_tools", []),
                require_all_tools=msg_data.get("require_all_tools", False),
                model=msg_data.get("model"),
                model_provider=msg_data.get("model_provider"),
                temperature=msg_data.get("temperature"),
                force_tools=msg_data.get("force_tools", []),
            )
        )

    turn_results: list[EvalToolResult] = []

    engine = get_sqlalchemy_engine()
    with isolated_ephemeral_session_factory(engine) as SessionLocal:
        with SessionLocal() as db_session:
            full_configuration = configuration.get_configuration(db_session)

            user = get_user_by_email(configuration.search_permissions_email, db_session)
            if not user:
                raise ValueError(
                    f"User not found for email: {configuration.search_permissions_email}"
                )
            # Cache user_id to avoid SQLAlchemy expiration issues
            user_id = user.id

            # Create a single chat session for all turns
            chat_session = create_chat_session(
                db_session=db_session,
                description="Multi-turn eval session",
                user_id=user_id,
                persona_id=DEFAULT_PERSONA_ID,
                onyxbot_flow=True,
            )
            chat_session_id = chat_session.id

            # Process each turn sequentially
            for turn_idx, msg in enumerate(messages):
                logger.info(
                    f"Processing turn {turn_idx + 1}/{len(messages)}: {msg.message[:50]}..."
                )

                # Handle per-turn tool forcing
                forced_tool_ids: list[int] = []
                if msg.force_tools:
                    from onyx.db.tools import get_builtin_tool
                    from onyx.tools.built_in_tools import BUILT_IN_TOOL_MAP

                    for tool_type in msg.force_tools:
                        if tool_type in BUILT_IN_TOOL_MAP:
                            tool_id = get_builtin_tool(
                                db_session, BUILT_IN_TOOL_MAP[tool_type]
                            ).id
                            if tool_id not in forced_tool_ids:
                                forced_tool_ids.append(tool_id)

                # Build tool assertions for this turn
                tool_assertions: ToolAssertion | None = None
                if msg.expected_tools:
                    tool_assertions = ToolAssertion(
                        expected_tools=msg.expected_tools,
                        require_all=msg.require_all_tools,
                    )

                # Handle per-turn model configuration
                llm_override = full_configuration.llm
                if msg.model or msg.model_provider or msg.temperature is not None:
                    llm_override = LLMOverride(
                        model_provider=msg.model_provider
                        or llm_override.model_provider,
                        model_version=msg.model or llm_override.model_version,
                        temperature=(
                            msg.temperature
                            if msg.temperature is not None
                            else llm_override.temperature
                        ),
                    )

                # Create request for this turn using SendMessageRequest (same API as handle_stream_message_objects)
                # Use AUTO_PLACE_AFTER_LATEST_MESSAGE to chain messages
                forced_tool_id = forced_tool_ids[0] if forced_tool_ids else None
                request = SendMessageRequest(
                    chat_session_id=chat_session_id,
                    parent_message_id=AUTO_PLACE_AFTER_LATEST_MESSAGE,
                    message=msg.message,
                    llm_override=llm_override,
                    allowed_tool_ids=full_configuration.allowed_tool_ids,
                    forced_tool_id=forced_tool_id,
                )

                # Stream and gather results for this turn via handle_stream_message_objects + gather_stream_full
                stream_start_time = time.time()
                state_container = ChatStateContainer()
                packets = handle_stream_message_objects(
                    new_msg_req=request,
                    user=user,
                    db_session=db_session,
                    external_state_container=state_container,
                )
                full = gather_stream_full(packets, state_container)

                result = _chat_full_response_to_eval_result(full, stream_start_time)

                # Evaluate tool assertions for this turn
                assertion_passed, assertion_details = evaluate_tool_assertions(
                    result.tools_called, tool_assertions
                )

                logger.info(
                    f"Turn {turn_idx + 1} completed. Tools called: {result.tools_called}.\n"
                    f"Assertion passed: {assertion_passed}. Details: {assertion_details}\n"
                )

                turn_results.append(
                    EvalToolResult(
                        answer=result.answer,
                        tools_called=result.tools_called,
                        tool_call_details=result.tool_call_details,
                        citations=result.citations,
                        assertion_passed=assertion_passed,
                        assertion_details=assertion_details,
                        timings=result.timings,
                    )
                )

    # Calculate aggregate metrics
    pass_count = sum(1 for r in turn_results if r.assertion_passed is True)
    fail_count = sum(1 for r in turn_results if r.assertion_passed is False)
    # Consider "all passed" only if there are no failures
    # (turns with no assertions don't count as failures)
    all_passed = fail_count == 0

    return MultiTurnEvalResult(
        turn_results=turn_results,
        all_passed=all_passed,
        pass_count=pass_count,
        fail_count=fail_count,
        total_turns=len(turn_results),
    )


def run_eval(
    configuration: EvalConfigurationOptions,
    data: list[dict[str, Any]] | None = None,
    remote_dataset_name: str | None = None,
    provider: EvalProvider = get_provider(),
) -> EvalationAck:
    if data is not None and remote_dataset_name is not None:
        raise ValueError("Cannot specify both data and remote_dataset_name")

    if data is None and remote_dataset_name is None:
        raise ValueError("Must specify either data or remote_dataset_name")

    return provider.eval(
        task=lambda eval_input: _get_answer_with_tools(eval_input, configuration),
        configuration=configuration,
        data=data,
        remote_dataset_name=remote_dataset_name,
        multi_turn_task=lambda eval_input: _get_multi_turn_answer_with_tools(
            eval_input, configuration
        ),
    )


================================================
FILE: backend/onyx/evals/eval_cli.py
================================================
#!/usr/bin/env python3
"""
CLI for running evaluations with local configurations.
"""

import argparse
import json
import logging
import os
from typing import Any

import braintrust
import requests

from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
from onyx.configs.constants import POSTGRES_WEB_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.evals.eval import run_eval
from onyx.evals.models import EvalationAck
from onyx.evals.models import EvalConfigurationOptions
from onyx.evals.provider import get_provider
from onyx.tracing.setup import setup_tracing


def setup_session_factory() -> None:
    SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME)
    SqlEngine.init_engine(
        pool_size=POSTGRES_API_SERVER_POOL_SIZE,
        max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,
    )


def load_data_local(
    local_data_path: str,
) -> list[dict[str, Any]]:
    if not os.path.isfile(local_data_path):
        raise ValueError(f"Local data file does not exist: {local_data_path}")
    with open(local_data_path, "r") as f:
        return json.load(f)


def configure_logging_for_evals(verbose: bool) -> None:
    """Set logging level to WARNING to reduce noise during evals."""
    if verbose:
        return

    # Set environment variable for any future logger creation
    os.environ["LOG_LEVEL"] = "WARNING"

    # Force WARNING level for root logger and its handlers
    root = logging.getLogger()
    root.setLevel(logging.WARNING)
    for handler in root.handlers:
        handler.setLevel(logging.WARNING)

    # Force WARNING level for all existing loggers and their handlers
    for name in list(logging.Logger.manager.loggerDict.keys()):
        logger = logging.getLogger(name)
        logger.setLevel(logging.WARNING)
        for handler in logger.handlers:
            handler.setLevel(logging.WARNING)

    # Set a basic config to ensure new loggers also use WARNING
    logging.basicConfig(level=logging.WARNING, force=True)


def run_local(
    local_data_path: str | None,
    remote_dataset_name: str | None,
    search_permissions_email: str | None = None,
    no_send_logs: bool = False,
    local_only: bool = False,
    verbose: bool = False,
) -> EvalationAck:
    """
    Run evaluation with local configurations.

    Tool forcing and assertions are configured per-test in the data file using:
    - force_tools: List of tool type names to force
    - expected_tools: List of tool type names expected to be called
    - require_all_tools: If true, all expected tools must be called

    Args:
        local_data_path: Path to local JSON file
        remote_dataset_name: Name of remote Braintrust dataset
        search_permissions_email: Optional email address to impersonate for the evaluation
        no_send_logs: Whether to skip sending logs to Braintrust
        local_only: If True, use LocalEvalProvider (CLI output only, no Braintrust)

    Returns:
        EvalationAck: The evaluation result
    """
    setup_session_factory()
    configure_logging_for_evals(
        verbose=verbose,
    )
    # Only setup tracing if not running in local-only mode
    if not local_only:
        setup_tracing()

    if search_permissions_email is None:
        raise ValueError("search_permissions_email is required for local evaluation")

    configuration = EvalConfigurationOptions(
        search_permissions_email=search_permissions_email,
        dataset_name=remote_dataset_name or "local",
        no_send_logs=no_send_logs,
    )

    # Get the appropriate provider
    provider = get_provider(local_only=local_only)

    if remote_dataset_name:
        score = run_eval(
            configuration=configuration,
            remote_dataset_name=remote_dataset_name,
            provider=provider,
        )
    else:
        if local_data_path is None:
            raise ValueError(
                "local_data_path or remote_dataset_name is required for local evaluation"
            )
        data = load_data_local(local_data_path)
        score = run_eval(configuration=configuration, data=data, provider=provider)

    return score


def run_remote(
    base_url: str,
    api_key: str,
    remote_dataset_name: str,
    search_permissions_email: str,
    payload: dict[str, Any] | None = None,
) -> dict[str, Any]:
    """
    Trigger an eval pipeline execution on a remote server.

    Tool forcing and assertions are configured per-test in the dataset.

    Args:
        base_url: Base URL of the remote server (e.g., "https://test.onyx.app")
        api_key: API key for authentication
        remote_dataset_name: Name of remote Braintrust dataset
        search_permissions_email: Email address to use for the evaluation.
        payload: Optional payload to send with the request

    Returns:
        Response from the remote server

    Raises:
        requests.RequestException: If the request fails
    """
    if payload is None:
        payload = {}

    payload["search_permissions_email"] = search_permissions_email
    payload["dataset_name"] = remote_dataset_name

    url = f"{base_url}/api/evals/eval_run"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    response = requests.post(url, headers=headers, json=payload)

    response.raise_for_status()
    return response.json()


def main() -> None:
    """Main CLI entry point."""
    parser = argparse.ArgumentParser(
        description="Run evaluations with local configurations"
    )

    parser.add_argument(
        "--local-data-path",
        type=str,
        help="Path to local JSON file containing test data",
    )

    parser.add_argument(
        "--remote-dataset-name",
        type=str,
        help="Name of remote Braintrust dataset",
    )

    parser.add_argument(
        "--braintrust-project",
        type=str,
        help="Braintrust project name",
        default="Onyx",
    )

    parser.add_argument("--verbose", action="store_true", help="Enable verbose output")

    # Remote eval arguments
    parser.add_argument(
        "--base-url",
        type=str,
        default="https://test.onyx.app",
        help="Base URL of the remote server (default: https://test.onyx.app)",
    )

    parser.add_argument(
        "--api-key",
        type=str,
        help="API key for authentication with the remote server",
    )

    parser.add_argument(
        "--remote",
        action="store_true",
        help="Run evaluation on remote server instead of locally",
    )

    parser.add_argument(
        "--search-permissions-email",
        type=str,
        help="Email address to impersonate for the evaluation",
    )

    parser.add_argument(
        "--no-send-logs",
        action="store_true",
        help="Do not send logs to the remote server",
        default=False,
    )

    parser.add_argument(
        "--local-only",
        action="store_true",
        help="Run evals locally without Braintrust, output results to CLI only",
        default=False,
    )

    args = parser.parse_args()

    if args.local_data_path:
        print(f"Loading data from local file: {args.local_data_path}")
    elif args.remote_dataset_name:
        if args.local_only:
            raise ValueError(
                "--local-only cannot be used with --remote-dataset-name. Use --local-data-path with a local JSON file instead."
            )
        print(f"Loading data from remote dataset: {args.remote_dataset_name}")
        dataset = braintrust.init_dataset(
            project=args.braintrust_project, name=args.remote_dataset_name
        )
        dataset_size = len(list(dataset.fetch()))
        print(f"Dataset size: {dataset_size}")
    if args.remote:
        if not args.api_key:
            print("Using API Key from ONYX_EVAL_API_KEY")
        api_key: str = (
            args.api_key if args.api_key else os.environ.get("ONYX_EVAL_API_KEY", "")
        )
        print(f"Running evaluation on remote server: {args.base_url}")

        if args.search_permissions_email:
            print(f"Using search permissions email: {args.search_permissions_email}")

        try:
            result = run_remote(
                args.base_url,
                api_key,
                args.remote_dataset_name,
                search_permissions_email=args.search_permissions_email,
            )
            print(f"Remote evaluation triggered successfully: {result}")
        except requests.RequestException as e:
            print(f"Error triggering remote evaluation: {e}")
            return
    else:
        if args.local_only:
            print("Running in local-only mode (no Braintrust)")
        else:
            print(f"Using Braintrust project: {args.braintrust_project}")

        if args.search_permissions_email:
            print(f"Using search permissions email: {args.search_permissions_email}")

        run_local(
            local_data_path=args.local_data_path,
            remote_dataset_name=args.remote_dataset_name,
            search_permissions_email=args.search_permissions_email,
            no_send_logs=args.no_send_logs,
            local_only=args.local_only,
            verbose=args.verbose,
        )


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/evals/models.py
================================================
from abc import ABC
from abc import abstractmethod
from collections.abc import Callable
from typing import Any

from pydantic import BaseModel
from pydantic import Field
from sqlalchemy.orm import Session

from onyx.db.tools import get_builtin_tool
from onyx.llm.override_models import LLMOverride
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.tools.built_in_tools import BUILT_IN_TOOL_MAP


class ToolAssertion(BaseModel):
    """Assertion about expected tool usage during evaluation."""

    expected_tools: list[str]  # Tool type names that should be called
    require_all: bool = False  # If True, ALL expected tools must be called


class EvalTimings(BaseModel):
    """Timing information for eval execution."""

    total_ms: float  # Total time for the eval
    llm_first_token_ms: float | None = None  # Time to first token from LLM
    tool_execution_ms: dict[str, float] = Field(
        default_factory=dict
    )  # Per-tool timings
    stream_processing_ms: float | None = None  # Time to process the stream


class ChatFullEvalResult(BaseModel):
    """Raw eval components from ChatFullResponse (before tool assertions)."""

    answer: str
    tools_called: list[str]
    tool_call_details: list[dict[str, Any]]
    citations: list[CitationInfo]
    timings: EvalTimings


class EvalToolResult(BaseModel):
    """Result of a single eval with tool call information."""

    answer: str
    tools_called: list[str]  # Names of tools that were called
    tool_call_details: list[dict[str, Any]]  # Full tool call info
    citations: list[CitationInfo]  # Citations used in the answer
    assertion_passed: bool | None = None  # None if no assertion configured
    assertion_details: str | None = None  # Explanation of pass/fail
    timings: EvalTimings | None = None  # Timing information for the eval


class EvalMessage(BaseModel):
    """Single message in a multi-turn evaluation conversation."""

    message: str  # The message text to send
    expected_tools: list[str] = Field(
        default_factory=list
    )  # Expected tools for this turn
    require_all_tools: bool = False  # If True, ALL expected tools must be called
    # Per-message model configuration overrides
    model: str | None = None
    model_provider: str | None = None
    temperature: float | None = None
    force_tools: list[str] = Field(default_factory=list)  # Tools to force for this turn


class MultiTurnEvalResult(BaseModel):
    """Result of a multi-turn evaluation containing per-message results."""

    turn_results: list[EvalToolResult]  # Results for each turn/message
    all_passed: bool  # True if all turn assertions passed
    pass_count: int  # Number of turns that passed
    fail_count: int  # Number of turns that failed
    total_turns: int  # Total number of turns


class EvalConfiguration(BaseModel):
    llm: LLMOverride = Field(default_factory=LLMOverride)
    search_permissions_email: str
    allowed_tool_ids: list[int]


class EvalConfigurationOptions(BaseModel):
    builtin_tool_types: list[str] = list(BUILT_IN_TOOL_MAP.keys())
    llm: LLMOverride = LLMOverride(
        model_provider=None,
        model_version="gpt-4o",
        temperature=0.0,
    )
    search_permissions_email: str
    dataset_name: str
    no_send_logs: bool = False
    # Optional override for Braintrust project (defaults to BRAINTRUST_PROJECT env var)
    braintrust_project: str | None = None
    # Optional experiment name for the eval run (shows in Braintrust UI)
    experiment_name: str | None = None

    def get_configuration(self, db_session: Session) -> EvalConfiguration:
        return EvalConfiguration(
            llm=self.llm,
            search_permissions_email=self.search_permissions_email,
            allowed_tool_ids=[
                get_builtin_tool(db_session, BUILT_IN_TOOL_MAP[tool]).id
                for tool in self.builtin_tool_types
            ],
        )


class EvalationAck(BaseModel):
    success: bool


class EvalProvider(ABC):
    @abstractmethod
    def eval(
        self,
        task: Callable[[dict[str, Any]], EvalToolResult],
        configuration: EvalConfigurationOptions,
        data: list[dict[str, Any]] | None = None,
        remote_dataset_name: str | None = None,
        multi_turn_task: "Callable[[dict[str, Any]], MultiTurnEvalResult] | None" = None,
    ) -> EvalationAck:
        pass


================================================
FILE: backend/onyx/evals/one_off/create_braintrust_dataset.py
================================================
#!/usr/bin/env python3
"""
Script to create a Braintrust dataset from the DR Master Question & Metric Sheet CSV.

This script:
1. Parses the CSV file
2. Filters records where "Should we use it" is TRUE and "web-only" is in categories
3. Creates a Braintrust dataset with Question as input and research_type metadata

Usage:
    python create_braintrust_dataset.py --dataset-name "MyDataset"
    python create_braintrust_dataset.py --dataset-name "MyDataset" --csv-path "/path/to/csv"
"""

import argparse
import csv
import os
import sys
from typing import Any
from typing import Dict
from typing import List

from onyx.configs.app_configs import BRAINTRUST_API_KEY

try:
    from braintrust import init_dataset
except ImportError:
    print(
        "Error: braintrust package not found. Please install it with: pip install braintrust"
    )
    sys.exit(1)


def column_letter_to_index(column_letter: str) -> int:
    """Convert Google Sheets column letter (A, B, C, etc.) to 0-based index."""
    result = 0
    for char in column_letter.upper():
        result = result * 26 + (ord(char) - ord("A") + 1)
    return result - 1


def parse_csv_file(csv_path: str) -> List[Dict[str, Any]]:
    """Parse the CSV file and extract relevant records."""
    records = []

    with open(csv_path, "r", encoding="utf-8") as file:
        # Skip the first few header rows and read the actual data
        lines = file.readlines()

        # Find the actual data start (skip header rows)
        data_start = 0
        for i, line in enumerate(lines):
            if "Should we use it?" in line:
                data_start = i + 1
                break

        # Parse the CSV data starting from the data_start line
        csv_reader = csv.reader(lines[data_start:])

        # Define Google Sheets column references for easy modification
        SHOULD_USE_COL = "C"  # "Should we use it?"
        QUESTION_COL = "H"  # "Question"
        EXPECTED_DEPTH_COL = "J"  # "Expected Depth"
        CATEGORIES_COL = "M"  # "Categories"
        OPENAI_DEEP_COL = "AA"  # "OpenAI Deep Answer"
        OPENAI_THINKING_COL = "O"  # "OpenAI Thinking Answer"

        for row_num, row in enumerate(csv_reader, start=data_start + 1):
            if len(row) < 15:  # Ensure we have enough columns
                continue

            # Extract relevant fields using Google Sheets column references
            should_use = (
                row[column_letter_to_index(SHOULD_USE_COL)].strip().upper()
                if len(row) > column_letter_to_index(SHOULD_USE_COL)
                else ""
            )
            question = (
                row[column_letter_to_index(QUESTION_COL)].strip()
                if len(row) > column_letter_to_index(QUESTION_COL)
                else ""
            )
            expected_depth = (
                row[column_letter_to_index(EXPECTED_DEPTH_COL)].strip()
                if len(row) > column_letter_to_index(EXPECTED_DEPTH_COL)
                else ""
            )
            categories = (
                row[column_letter_to_index(CATEGORIES_COL)].strip()
                if len(row) > column_letter_to_index(CATEGORIES_COL)
                else ""
            )
            openai_deep_answer = (
                row[column_letter_to_index(OPENAI_DEEP_COL)].strip()
                if len(row) > column_letter_to_index(OPENAI_DEEP_COL)
                else ""
            )
            openai_thinking_answer = (
                row[column_letter_to_index(OPENAI_THINKING_COL)].strip()
                if len(row) > column_letter_to_index(OPENAI_THINKING_COL)
                else ""
            )

            # Filter records: should_use = TRUE and categories contains "web-only"
            if (
                should_use == "TRUE" and "web-only" in categories and question
            ):  # Ensure question is not empty
                if expected_depth == "Deep":
                    records.extend(
                        [
                            {
                                "question": question
                                + ". All info is contained in the quesiton. DO NOT ask any clarifying questions.",
                                "research_type": "DEEP",
                                "categories": categories,
                                "expected_depth": expected_depth,
                                "expected_answer": openai_deep_answer,
                                "row_number": row_num,
                            }
                        ]
                    )
                else:
                    records.extend(
                        [
                            {
                                "question": question,
                                "research_type": "THOUGHTFUL",
                                "categories": categories,
                                "expected_depth": expected_depth,
                                "expected_answer": openai_thinking_answer,
                                "row_number": row_num,
                            }
                        ]
                    )

    return records


def create_braintrust_dataset(records: List[Dict[str, Any]], dataset_name: str) -> None:
    """Create a Braintrust dataset with the filtered records."""

    # Check if BRAINTRUST_API_KEY is set
    if BRAINTRUST_API_KEY == "":
        print("WARNING: BRAINTRUST_API_KEY environment variable is not set.")
        print(
            "The script will show what would be inserted but won't actually create the dataset."
        )
        print(
            "To actually create the dataset, set your BRAINTRUST_API_KEY environment variable."
        )
        print()

        # Show what would be inserted
        print(
            f"Would create Braintrust dataset '{dataset_name}' with {len(records)} records:"
        )
        for i, record in enumerate(records, 1):
            print(f"Record {i}/{len(records)}:")
            print(f"  Question: {record['question'][:100]}...")
            print(f"  Research Type: {record['research_type']}")
            print(f"  Expected Answer: {record['expected_answer'][:100]}...")
            print()
        return

    # Initialize the dataset
    dataset = init_dataset("Onyx", dataset_name, api_key=BRAINTRUST_API_KEY)

    print(f"Creating Braintrust dataset with {len(records)} records...")

    # Insert records into the dataset
    for i, record in enumerate(records, 1):
        record_id = dataset.insert(
            {"message": record["question"], "research_type": record["research_type"]},
            expected=record["expected_answer"],
        )
        print(f"Inserted record {i}/{len(records)}: ID {record_id}")
        print(f"  Question: {record['question'][:100]}...")
        print(f"  Research Type: {record['research_type']}")
        print(f"  Expected Answer: {record['expected_answer'][:100]}...")
        print()

    # Flush to ensure all records are sent
    dataset.flush()
    print(f"Successfully created dataset with {len(records)} records!")


def main() -> None:
    """Main function to run the script."""
    parser = argparse.ArgumentParser(
        description="Create a Braintrust dataset from the DR Master Question & Metric Sheet CSV"
    )
    parser.add_argument(
        "--dataset-name", required=True, help="Name of the Braintrust dataset to create"
    )
    parser.add_argument(
        "--csv-path",
        default="/Users/richardguan/onyx/backend/onyx/evals/data/DR Master Question & Metric Sheet - Sheet1.csv",
        help="Path to the CSV file (default: %(default)s)",
    )

    args = parser.parse_args()

    csv_path = args.csv_path
    dataset_name = args.dataset_name

    if not os.path.exists(csv_path):
        print(f"Error: CSV file not found at {csv_path}")
        sys.exit(1)

    print("Parsing CSV file...")
    records = parse_csv_file(csv_path)

    print(f"Found {len(records)} records matching criteria:")
    print("- Should we use it = TRUE")
    print("- Categories contains 'web-only'")
    print("- Question is not empty")
    print()

    if not records:
        print("No records found matching the criteria!")
        sys.exit(1)

    # Show summary of research types
    deep_count = sum(1 for r in records if r["research_type"] == "DEEP")
    thoughtful_count = sum(1 for r in records if r["research_type"] == "THOUGHTFUL")

    print("Research type breakdown:")
    print(f"  DEEP: {deep_count}")
    print(f"  THOUGHTFUL: {thoughtful_count}")
    print()

    # Create the Braintrust dataset
    create_braintrust_dataset(records, dataset_name)


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/evals/provider.py
================================================
from onyx.evals.models import EvalProvider
from onyx.evals.providers.braintrust import BraintrustEvalProvider
from onyx.evals.providers.local import LocalEvalProvider


def get_provider(local_only: bool = False) -> EvalProvider:
    """
    Get the appropriate eval provider.

    Args:
        local_only: If True, use LocalEvalProvider (CLI output only, no Braintrust).
                   If False, use BraintrustEvalProvider.

    Returns:
        The appropriate EvalProvider instance.
    """
    if local_only:
        return LocalEvalProvider()
    return BraintrustEvalProvider()


================================================
FILE: backend/onyx/evals/providers/braintrust.py
================================================
from collections.abc import Callable
from typing import Any
from typing import Union

from braintrust import Eval
from braintrust import EvalCase
from braintrust import init_dataset
from braintrust import Score

from onyx.configs.app_configs import BRAINTRUST_MAX_CONCURRENCY
from onyx.configs.app_configs import BRAINTRUST_PROJECT
from onyx.evals.models import EvalationAck
from onyx.evals.models import EvalConfigurationOptions
from onyx.evals.models import EvalProvider
from onyx.evals.models import EvalToolResult
from onyx.evals.models import MultiTurnEvalResult
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Union type for both single and multi-turn results
EvalResult = Union[EvalToolResult, MultiTurnEvalResult]


def tool_assertion_scorer(
    input: dict[str, Any], output: EvalResult, expected: EvalResult | None
) -> Score:
    """
    Scorer that checks if tool assertions passed.

    Handles both single-turn (EvalToolResult) and multi-turn (MultiTurnEvalResult) outputs.

    Args:
        input: The input data for the evaluation case.
        output: The actual output from the task.
        expected: The expected output (unused for this scorer).

    Returns:
        Score with value 1.0 if passed or no assertions, 0.0 if failed.
    """
    # input and expected are unused but required by Braintrust scorer signature
    _ = input, expected

    # Handle multi-turn results
    if isinstance(output, MultiTurnEvalResult):
        # Calculate score based on pass rate
        if output.total_turns == 0:
            score = 1.0
        else:
            # Score is the ratio of passed assertions
            assertions_evaluated = output.pass_count + output.fail_count
            if assertions_evaluated == 0:
                score = 1.0  # No assertions configured
            else:
                score = output.pass_count / assertions_evaluated

        return Score(
            name="tool_assertion",
            score=score,
            metadata={
                "is_multi_turn": True,
                "total_turns": output.total_turns,
                "pass_count": output.pass_count,
                "fail_count": output.fail_count,
                "all_passed": output.all_passed,
                "turn_details": [
                    {
                        "tools_called": r.tools_called,
                        "assertion_passed": r.assertion_passed,
                        "assertion_details": r.assertion_details,
                    }
                    for r in output.turn_results
                ],
            },
        )

    # Handle single-turn results (EvalToolResult)
    if output.assertion_passed is None:
        # No assertions configured - return passing score
        return Score(
            name="tool_assertion",
            score=1.0,
            metadata={
                "is_multi_turn": False,
                "tools_called": output.tools_called,
                "tools_called_count": len(output.tools_called),
                "assertion_configured": False,
            },
        )

    return Score(
        name="tool_assertion",
        score=1.0 if output.assertion_passed else 0.0,
        metadata={
            "is_multi_turn": False,
            "tools_called": output.tools_called,
            "tools_called_count": len(output.tools_called),
            "assertion_passed": output.assertion_passed,
            "assertion_details": output.assertion_details,
            "tool_call_details": output.tool_call_details,
        },
    )


class BraintrustEvalProvider(EvalProvider):
    def eval(
        self,
        task: Callable[[dict[str, Any]], EvalToolResult],
        configuration: EvalConfigurationOptions,
        data: list[dict[str, Any]] | None = None,
        remote_dataset_name: str | None = None,
        multi_turn_task: Callable[[dict[str, Any]], MultiTurnEvalResult] | None = None,
    ) -> EvalationAck:
        if data is not None and remote_dataset_name is not None:
            raise ValueError("Cannot specify both data and remote_dataset_name")
        if data is None and remote_dataset_name is None:
            raise ValueError("Must specify either data or remote_dataset_name")

        # Create a wrapper task that dispatches to the appropriate handler
        def dispatch_task(eval_input: dict[str, Any]) -> EvalResult:
            if "messages" in eval_input and multi_turn_task is not None:
                return multi_turn_task(eval_input)
            return task(eval_input)

        project_name = configuration.braintrust_project or BRAINTRUST_PROJECT
        experiment_name = configuration.experiment_name

        eval_data: Any = None
        if remote_dataset_name is not None:
            eval_data = init_dataset(project=project_name, name=remote_dataset_name)
        else:
            if data:
                eval_data = [
                    EvalCase(
                        input={
                            **item.get("input", {}),
                            # Pass through per-test tool configuration (for single-turn)
                            "force_tools": item.get("force_tools", []),
                            "expected_tools": item.get("expected_tools", []),
                            "require_all_tools": item.get("require_all_tools", False),
                            # Pass through per-test model configuration
                            "model": item.get("model"),
                            "model_provider": item.get("model_provider"),
                            "temperature": item.get("temperature"),
                        },
                        expected=item.get("expected"),
                    )
                    for item in data
                ]

        metadata = configuration.model_dump()

        Eval(  # type: ignore[misc]
            name=project_name,
            experiment_name=experiment_name,
            data=eval_data,
            task=dispatch_task,
            scores=[tool_assertion_scorer],
            metadata=metadata,
            max_concurrency=BRAINTRUST_MAX_CONCURRENCY,
            no_send_logs=configuration.no_send_logs,
        )
        return EvalationAck(success=True)


================================================
FILE: backend/onyx/evals/providers/local.py
================================================
"""
Local eval provider that runs evaluations and outputs results to the CLI.
No external dependencies like Braintrust required.
"""

from collections.abc import Callable
from typing import Any

from onyx.evals.models import EvalationAck
from onyx.evals.models import EvalConfigurationOptions
from onyx.evals.models import EvalProvider
from onyx.evals.models import EvalToolResult
from onyx.evals.models import MultiTurnEvalResult
from onyx.utils.logger import setup_logger

logger = setup_logger()

# ANSI color codes
GREEN = "\033[92m"
RED = "\033[91m"
YELLOW = "\033[93m"
BLUE = "\033[94m"
BOLD = "\033[1m"
RESET = "\033[0m"
DIM = "\033[2m"


def _display_single_turn_result(
    result: EvalToolResult,
    passed_count: list[int],
    failed_count: list[int],
    no_assertion_count: list[int],
) -> None:
    """Display results for a single turn and update counters."""
    # Display timing trace
    if result.timings:
        print(f"  {BOLD}Trace:{RESET}")
        print(f"    Total: {result.timings.total_ms:.0f}ms")
        if result.timings.llm_first_token_ms is not None:
            print(f"    First token: {result.timings.llm_first_token_ms:.0f}ms")
        if result.timings.tool_execution_ms:
            for tool_name, duration_ms in result.timings.tool_execution_ms.items():
                print(f"    {tool_name}: {duration_ms:.0f}ms")

    # Display tools called
    tools_str = ", ".join(result.tools_called) if result.tools_called else "(none)"
    print(f"  Tools called: {BLUE}{tools_str}{RESET}")

    # Display assertion result
    if result.assertion_passed is None:
        print(f"  Assertion: {YELLOW}N/A{RESET} - No assertion configured")
        no_assertion_count[0] += 1
    elif result.assertion_passed:
        print(f"  Assertion: {GREEN}PASS{RESET} - {result.assertion_details}")
        passed_count[0] += 1
    else:
        print(f"  Assertion: {RED}FAIL{RESET} - {result.assertion_details}")
        failed_count[0] += 1

    # Display truncated answer
    answer = result.answer
    truncated_answer = answer[:200] + "..." if len(answer) > 200 else answer
    truncated_answer = truncated_answer.replace("\n", " ")
    print(f"  Answer: {truncated_answer}")


class LocalEvalProvider(EvalProvider):
    """
    Eval provider that runs evaluations locally and prints results to the CLI.
    Does not require Braintrust or any external service.
    """

    def eval(
        self,
        task: Callable[[dict[str, Any]], EvalToolResult],
        configuration: EvalConfigurationOptions,  # noqa: ARG002
        data: list[dict[str, Any]] | None = None,
        remote_dataset_name: str | None = None,
        multi_turn_task: Callable[[dict[str, Any]], MultiTurnEvalResult] | None = None,
    ) -> EvalationAck:
        if remote_dataset_name is not None:
            raise ValueError(
                "LocalEvalProvider does not support remote datasets. Use --local-data-path with a local JSON file."
            )

        if data is None:
            raise ValueError("data is required for LocalEvalProvider")

        total = len(data)
        # Use lists to allow mutation in helper function
        passed = [0]
        failed = [0]
        no_assertion = [0]

        print(f"\n{BOLD}Running {total} evaluation(s)...{RESET}\n")
        print("=" * 60)

        for i, item in enumerate(data, 1):
            input_data = item.get("input", {})

            # Check if this is a multi-turn eval (has 'messages' array)
            if "messages" in input_data:
                self._run_multi_turn_eval(
                    i, total, item, multi_turn_task, passed, failed, no_assertion
                )
            else:
                self._run_single_turn_eval(
                    i, total, item, task, passed, failed, no_assertion
                )

        # Summary
        print("\n" + "=" * 60)
        total_with_assertions = passed[0] + failed[0]
        if total_with_assertions > 0:
            pass_rate = (passed[0] / total_with_assertions) * 100
            print(
                f"{BOLD}Summary:{RESET} {passed[0]}/{total_with_assertions} passed ({pass_rate:.1f}%)"
            )
        else:
            print(f"{BOLD}Summary:{RESET} No assertions configured")

        print(f"  {GREEN}Passed:{RESET} {passed[0]}")
        print(f"  {RED}Failed:{RESET} {failed[0]}")
        if no_assertion[0] > 0:
            print(f"  {YELLOW}No assertion:{RESET} {no_assertion[0]}")
        print("=" * 60 + "\n")

        # Return success if no failures
        return EvalationAck(success=(failed[0] == 0))

    def _run_single_turn_eval(
        self,
        i: int,
        total: int,
        item: dict[str, Any],
        task: Callable[[dict[str, Any]], EvalToolResult],
        passed: list[int],
        failed: list[int],
        no_assertion: list[int],
    ) -> None:
        """Run a single-turn evaluation."""
        # Build input with tool and model config
        eval_input = {
            **item.get("input", {}),
            # Tool configuration
            "force_tools": item.get("force_tools", []),
            "expected_tools": item.get("expected_tools", []),
            "require_all_tools": item.get("require_all_tools", False),
            # Model configuration
            "model": item.get("model"),
            "model_provider": item.get("model_provider"),
            "temperature": item.get("temperature"),
        }

        message = eval_input.get("message", "(no message)")
        truncated_message = message[:50] + "..." if len(message) > 50 else message

        # Show model if specified
        model_info = ""
        if item.get("model"):
            model_info = f" [{item.get('model')}]"

        print(f'\n{BOLD}[{i}/{total}]{RESET} "{truncated_message}"{model_info}')

        try:
            result = task(eval_input)
            _display_single_turn_result(result, passed, failed, no_assertion)
        except Exception as e:
            print(f"  {RED}ERROR:{RESET} {e}")
            failed[0] += 1
            logger.exception(f"Error running eval for input: {message}")

    def _run_multi_turn_eval(
        self,
        i: int,
        total: int,
        item: dict[str, Any],
        multi_turn_task: Callable[[dict[str, Any]], MultiTurnEvalResult] | None,
        passed: list[int],
        failed: list[int],
        no_assertion: list[int],
    ) -> None:
        """Run a multi-turn evaluation."""
        if multi_turn_task is None:
            print(
                f"\n{BOLD}[{i}/{total}]{RESET} {RED}ERROR:{RESET} Multi-turn task not configured"
            )
            failed[0] += 1
            return

        input_data = item.get("input", {})
        messages = input_data.get("messages", [])
        num_turns = len(messages)

        # Show first message as preview
        first_msg = (
            messages[0].get("message", "(no message)") if messages else "(no messages)"
        )
        truncated_first = first_msg[:40] + "..." if len(first_msg) > 40 else first_msg

        print(f"\n{BOLD}[{i}/{total}] Multi-turn ({num_turns} turns){RESET}")
        print(f'  First: "{truncated_first}"')

        try:
            # Pass the full input with messages
            eval_input = {**input_data}
            result = multi_turn_task(eval_input)

            # Display each turn's result
            for turn_idx, turn_result in enumerate(result.turn_results):
                turn_msg = messages[turn_idx].get("message", "")
                truncated_turn = (
                    turn_msg[:40] + "..." if len(turn_msg) > 40 else turn_msg
                )
                print(f'\n  {DIM}Turn {turn_idx + 1}:{RESET} "{truncated_turn}"')
                _display_single_turn_result(turn_result, passed, failed, no_assertion)

            # Show multi-turn summary
            status = (
                f"{GREEN}ALL PASSED{RESET}"
                if result.all_passed
                else f"{RED}SOME FAILED{RESET}"
            )
            print(
                f"\n  {BOLD}Multi-turn result:{RESET} {status} ({result.pass_count}/{result.total_turns} turns passed)"
            )

        except Exception as e:
            print(f"  {RED}ERROR:{RESET} {e}")
            failed[0] += 1
            logger.exception(f"Error running multi-turn eval: {first_msg}")


================================================
FILE: backend/onyx/feature_flags/__init__.py
================================================


================================================
FILE: backend/onyx/feature_flags/factory.py
================================================
from onyx.configs.app_configs import DEV_MODE
from onyx.feature_flags.interface import FeatureFlagProvider
from onyx.feature_flags.interface import NoOpFeatureFlagProvider
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)
from shared_configs.configs import MULTI_TENANT


def get_default_feature_flag_provider() -> FeatureFlagProvider:
    """
    Get the default feature flag provider implementation.

    Returns the PostHog-based provider in Enterprise Edition when available,
    otherwise returns a no-op provider that always returns False.

    This function is designed for dependency injection - callers should
    use this factory rather than directly instantiating providers.

    Returns:
        FeatureFlagProvider: The configured feature flag provider instance
    """
    if MULTI_TENANT or DEV_MODE:
        return fetch_versioned_implementation_with_fallback(
            module="onyx.feature_flags.factory",
            attribute="get_posthog_feature_flag_provider",
            fallback=lambda: NoOpFeatureFlagProvider(),
        )()
    return NoOpFeatureFlagProvider()


================================================
FILE: backend/onyx/feature_flags/feature_flags_keys.py
================================================
"""
Feature flag keys used throughout the application.
Centralizes feature flag key definitions to avoid magic strings.
"""


================================================
FILE: backend/onyx/feature_flags/flags.py
================================================


================================================
FILE: backend/onyx/feature_flags/interface.py
================================================
import abc
from typing import Any
from uuid import UUID

from onyx.db.models import User
from shared_configs.configs import ENVIRONMENT


class FeatureFlagProvider(abc.ABC):
    """
    Abstract base class for feature flag providers.

    Implementations should provide vendor-specific logic for checking
    whether a feature flag is enabled for a given user.
    """

    @abc.abstractmethod
    def feature_enabled(
        self,
        flag_key: str,
        user_id: UUID,
        user_properties: dict[str, Any] | None = None,
    ) -> bool:
        """
        Check if a feature flag is enabled for a user.

        Args:
            flag_key: The identifier for the feature flag to check
            user_id: The unique identifier for the user
            user_properties: Optional dictionary of user properties/attributes
                           that may influence flag evaluation

        Returns:
            True if the feature is enabled for the user, False otherwise
        """
        raise NotImplementedError

    def feature_enabled_for_user_tenant(
        self, flag_key: str, user: User, tenant_id: str
    ) -> bool:
        """
        Check if a feature flag is enabled for a user.
        """
        return self.feature_enabled(
            flag_key,
            # For anonymous/unauthenticated users, use a fixed UUID as fallback
            user.id if user else UUID("caa1e0cd-6ee6-4550-b1ec-8affaef4bf83"),
            user_properties={
                "tenant_id": tenant_id,
                "email": user.email if user else "anonymous@onyx.app",
            },
        )


class NoOpFeatureFlagProvider(FeatureFlagProvider):
    """
    No-operation feature flag provider that always returns False.

    Used as a fallback when no real feature flag provider is available
    (e.g., in MIT version without PostHog).
    """

    def feature_enabled(
        self,
        flag_key: str,  # noqa: ARG002
        user_id: UUID,  # noqa: ARG002
        user_properties: dict[str, Any] | None = None,  # noqa: ARG002
    ) -> bool:
        environment = ENVIRONMENT
        if environment == "local":
            return True
        return False


================================================
FILE: backend/onyx/federated_connectors/__init__.py
================================================


================================================
FILE: backend/onyx/federated_connectors/factory.py
================================================
"""Factory for creating federated connector instances."""

import importlib
from typing import Any
from typing import Type

from onyx.configs.constants import FederatedConnectorSource
from onyx.federated_connectors.interfaces import FederatedConnector
from onyx.federated_connectors.registry import FEDERATED_CONNECTOR_CLASS_MAP
from onyx.utils.logger import setup_logger

logger = setup_logger()


class FederatedConnectorMissingException(Exception):
    pass


# Cache for already imported federated connector classes
_federated_connector_cache: dict[FederatedConnectorSource, Type[FederatedConnector]] = (
    {}
)


def _load_federated_connector_class(
    source: FederatedConnectorSource,
) -> Type[FederatedConnector]:
    """Dynamically load and cache a federated connector class."""
    if source in _federated_connector_cache:
        return _federated_connector_cache[source]

    if source not in FEDERATED_CONNECTOR_CLASS_MAP:
        raise FederatedConnectorMissingException(
            f"Federated connector not found for source={source}"
        )

    mapping = FEDERATED_CONNECTOR_CLASS_MAP[source]

    try:
        module = importlib.import_module(mapping.module_path)
        connector_class = getattr(module, mapping.class_name)
        _federated_connector_cache[source] = connector_class
        return connector_class
    except (ImportError, AttributeError) as e:
        raise FederatedConnectorMissingException(
            f"Failed to import {mapping.class_name} from {mapping.module_path}: {e}"
        )


def get_federated_connector(
    source: FederatedConnectorSource,
    credentials: dict[str, Any],
) -> FederatedConnector:
    """Get an instance of the appropriate federated connector."""
    connector_cls = get_federated_connector_cls(source)
    return connector_cls(credentials)


def get_federated_connector_cls(
    source: FederatedConnectorSource,
) -> Type[FederatedConnector]:
    """Get the class of the appropriate federated connector."""
    return _load_federated_connector_class(source)


================================================
FILE: backend/onyx/federated_connectors/federated_retrieval.py
================================================
from collections import defaultdict
from collections.abc import Callable
from typing import Any
from uuid import UUID

from pydantic import BaseModel
from pydantic import ConfigDict
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FederatedConnectorSource
from onyx.context.search.models import ChunkIndexRequest
from onyx.context.search.models import InferenceChunk
from onyx.db.federated import (
    get_federated_connector_document_set_mappings_by_document_set_names,
)
from onyx.db.federated import list_federated_connector_oauth_tokens
from onyx.db.models import FederatedConnector__DocumentSet
from onyx.db.slack_bot import fetch_slack_bots
from onyx.federated_connectors.factory import get_federated_connector
from onyx.federated_connectors.interfaces import FederatedConnector
from onyx.onyxbot.slack.models import SlackContext
from onyx.utils.logger import setup_logger

logger = setup_logger()


class FederatedRetrievalInfo(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)

    retrieval_function: Callable[[ChunkIndexRequest], list[InferenceChunk]]
    source: FederatedConnectorSource


def get_federated_retrieval_functions(
    db_session: Session,
    user_id: UUID | None,
    source_types: list[DocumentSource] | None,
    document_set_names: list[str] | None,
    slack_context: SlackContext | None = None,
) -> list[FederatedRetrievalInfo]:

    # Check for Slack bot context first (regardless of user_id)
    if slack_context:
        logger.debug("Slack context detected, checking for Slack bot setup...")

        # Slack federated search requires a Slack federated connector to be linked
        # via document sets. If no document sets are provided, skip Slack federated search.
        if not document_set_names:
            logger.debug(
                "Skipping Slack federated search: no document sets provided, "
                "Slack federated connector must be linked via document sets"
            )
            return []

        # Check if any Slack federated connector is associated with the document sets
        # and extract its config (entities) for channel filtering
        slack_federated_connector_config: dict[str, Any] | None = None
        slack_federated_mappings = (
            get_federated_connector_document_set_mappings_by_document_set_names(
                db_session, document_set_names
            )
        )
        for mapping in slack_federated_mappings:
            if (
                mapping.federated_connector is not None
                and mapping.federated_connector.source
                == FederatedConnectorSource.FEDERATED_SLACK
            ):
                slack_federated_connector_config = (
                    mapping.federated_connector.config or {}
                )
                logger.debug(
                    f"Found Slack federated connector config: {slack_federated_connector_config}"
                )
                break

        if slack_federated_connector_config is None:
            logger.debug(
                f"Skipping Slack federated search: document sets {document_set_names} "
                "are not associated with any Slack federated connector"
            )
            # Return empty list - no Slack federated search for this context
            return []

        try:
            slack_bots = fetch_slack_bots(db_session)
            logger.debug(f"Found {len(slack_bots)} Slack bots")

            # First try to find a bot with user token
            tenant_slack_bot = next(
                (bot for bot in slack_bots if bot.enabled and bot.user_token), None
            )
            if tenant_slack_bot:
                logger.debug(f"Selected bot with user_token: {tenant_slack_bot.name}")
            else:
                # Fall back to any enabled bot without user token
                tenant_slack_bot = next(
                    (bot for bot in slack_bots if bot.enabled), None
                )
                if tenant_slack_bot:
                    logger.debug(
                        f"Selected bot without user_token: {tenant_slack_bot.name} (limited functionality)"
                    )
                else:
                    logger.warning("No enabled Slack bots found")

            if tenant_slack_bot:
                federated_retrieval_infos_slack = []

                # Use user_token if available, otherwise fall back to bot_token
                # Unwrap SensitiveValue for backend API calls
                access_token = (
                    tenant_slack_bot.user_token.get_value(apply_mask=False)
                    if tenant_slack_bot.user_token
                    else (
                        tenant_slack_bot.bot_token.get_value(apply_mask=False)
                        if tenant_slack_bot.bot_token
                        else ""
                    )
                )
                if not tenant_slack_bot.user_token:
                    logger.warning(
                        f"Using bot_token for Slack search (limited functionality): {tenant_slack_bot.name}"
                    )

                # For bot context, we don't need real OAuth credentials
                credentials = {
                    "client_id": "bot-context",  # Placeholder for bot context
                    "client_secret": "bot-context",  # Placeholder for bot context
                }

                # Create Slack federated connector
                connector = get_federated_connector(
                    FederatedConnectorSource.FEDERATED_SLACK,
                    credentials,
                )

                # Capture variables by value to avoid lambda closure issues
                # Unwrap SensitiveValue for backend API calls
                bot_token = (
                    tenant_slack_bot.bot_token.get_value(apply_mask=False)
                    if tenant_slack_bot.bot_token
                    else ""
                )

                # Use connector config for channel filtering (guaranteed to exist at this point)
                connector_entities = slack_federated_connector_config
                logger.debug(
                    f"Using Slack federated connector entities for bot context: {connector_entities}"
                )

                def create_slack_retrieval_function(
                    conn: FederatedConnector,
                    token: str,
                    ctx: SlackContext,
                    bot_tok: str,
                    entities: dict[str, Any],
                ) -> Callable[[ChunkIndexRequest], list[InferenceChunk]]:
                    def retrieval_fn(query: ChunkIndexRequest) -> list[InferenceChunk]:
                        return conn.search(
                            query,
                            entities,  # Use connector-level entities for channel filtering
                            access_token=token,
                            limit=None,  # Let connector use its own max_messages_per_query config
                            slack_event_context=ctx,
                            bot_token=bot_tok,
                        )

                    return retrieval_fn

                federated_retrieval_infos_slack.append(
                    FederatedRetrievalInfo(
                        retrieval_function=create_slack_retrieval_function(
                            connector,
                            access_token,
                            slack_context,
                            bot_token,
                            connector_entities,
                        ),
                        source=FederatedConnectorSource.FEDERATED_SLACK,
                    )
                )
                logger.debug(
                    f"Added Slack federated search for bot, returning {len(federated_retrieval_infos_slack)} retrieval functions"
                )
                return federated_retrieval_infos_slack

        except Exception as e:
            logger.warning(f"Could not setup Slack bot federated search: {e}")
            # Fall through to regular federated connector logic

    if user_id is None:
        # No user ID provided and no Slack context, return empty
        logger.warning(
            "No user ID provided and no Slack context, returning empty retrieval functions"
        )
        return []

    federated_connector__document_set_pairs = (
        (
            get_federated_connector_document_set_mappings_by_document_set_names(
                db_session, document_set_names
            )
        )
        if document_set_names
        else []
    )
    federated_connector_id_to_document_sets: dict[
        int, list[FederatedConnector__DocumentSet]
    ] = defaultdict(list)
    for pair in federated_connector__document_set_pairs:
        federated_connector_id_to_document_sets[pair.federated_connector_id].append(
            pair
        )

    # At this point, user_id is guaranteed to be not None since we're in the else branch
    assert user_id is not None

    # If no source types are specified, don't use any federated connectors
    if source_types is None:
        logger.debug("No source types specified, skipping all federated connectors")
        return []

    federated_retrieval_infos: list[FederatedRetrievalInfo] = []
    federated_oauth_tokens = list_federated_connector_oauth_tokens(db_session, user_id)
    for oauth_token in federated_oauth_tokens:
        # Slack is handled separately inside SearchTool
        if (
            oauth_token.federated_connector.source
            == FederatedConnectorSource.FEDERATED_SLACK
        ):
            logger.debug(
                "Skipping Slack federated connector in user OAuth path - handled by SearchTool"
            )
            continue

        if (
            oauth_token.federated_connector.source.to_non_federated_source()
            not in source_types
        ):
            continue

        document_set_associations = federated_connector_id_to_document_sets[
            oauth_token.federated_connector_id
        ]

        # if document set names are specified by the user, skip federated connectors that are
        # not associated with any of the document sets
        if document_set_names and not document_set_associations:
            continue

        # Only use connector-level config (no junction table entities)
        entities = oauth_token.federated_connector.config or {}

        connector = get_federated_connector(
            oauth_token.federated_connector.source,
            oauth_token.federated_connector.credentials.get_value(apply_mask=False),
        )

        # Capture variables by value to avoid lambda closure issues
        access_token = oauth_token.token.get_value(apply_mask=False)

        def create_retrieval_function(
            conn: FederatedConnector,
            ent: dict[str, Any],
            token: str,
        ) -> Callable[[ChunkIndexRequest], list[InferenceChunk]]:
            return lambda query: conn.search(
                query,
                ent,
                access_token=token,
                limit=None,  # Let connector use its own max_messages_per_query config
            )

        federated_retrieval_infos.append(
            FederatedRetrievalInfo(
                retrieval_function=create_retrieval_function(
                    connector, entities, access_token
                ),
                source=oauth_token.federated_connector.source,
            )
        )
    return federated_retrieval_infos


================================================
FILE: backend/onyx/federated_connectors/interfaces.py
================================================
from abc import ABC
from abc import abstractmethod
from typing import Any
from typing import Dict

from onyx.context.search.models import ChunkIndexRequest
from onyx.context.search.models import InferenceChunk
from onyx.federated_connectors.models import CredentialField
from onyx.federated_connectors.models import EntityField
from onyx.federated_connectors.models import OAuthResult
from onyx.onyxbot.slack.models import SlackContext


class FederatedConnector(ABC):
    """Base interface that all federated connectors must implement."""

    @abstractmethod
    def __init__(self, credentials: dict[str, Any]):
        """
        Initialize the connector with credentials + validate their structure.

        Args:
            credentials: Dictionary of credentials to initialize the connector with
        """
        self.credentials = credentials

    @abstractmethod
    def validate_entities(self, entities: Dict[str, Any]) -> bool:
        """
        Validate that the provided entities match the expected structure.

        Args:
            entities: Dictionary of entities to validate

        Returns:
            True if entities are valid, False otherwise

        Note: This method is used for backward compatibility with document-set level entities.
        For connector-level config validation, use validate_config() instead.
        """

    def validate_config(self, config: Dict[str, Any]) -> bool:
        """
        Validate that the provided config matches the expected structure.

        This is an alias for validate_entities() to provide clearer semantics
        when validating connector-level configuration.

        Args:
            config: Dictionary of configuration to validate

        Returns:
            True if config is valid, False otherwise
        """
        return self.validate_entities(config)

    @classmethod
    @abstractmethod
    def configuration_schema(cls) -> Dict[str, EntityField]:
        """
        Return the specification of what configuration fields are available for this connector.

        Returns:
            Dictionary where keys are configuration field names and values are EntityField objects
            describing the expected structure and constraints.
        """

    @classmethod
    @abstractmethod
    def credentials_schema(cls) -> Dict[str, CredentialField]:
        """
        Return the specification of what credentials are required for this connector.

        Returns:
            Dictionary where keys are credential field names and values are CredentialField objects
            describing the expected structure, validation rules, and security properties.
        """

    @abstractmethod
    def authorize(self, redirect_uri: str) -> str:
        """
        Generate the OAuth authorization URL.

        Returns:
            The URL where users should be redirected to authorize the application
        """

    @abstractmethod
    def callback(self, callback_data: Dict[str, Any], redirect_uri: str) -> OAuthResult:
        """
        Handle the OAuth callback and exchange the authorization code for tokens.

        Args:
            callback_data: The data received from the OAuth callback (query params, etc.)
            redirect_uri: The OAuth redirect URI used in the authorization request

        Returns:
            Standardized OAuthResult containing tokens and metadata
        """

    @abstractmethod
    def search(
        self,
        query: ChunkIndexRequest,
        entities: dict[str, Any],
        access_token: str,
        limit: int | None = None,
        # Slack-specific parameters
        slack_event_context: SlackContext | None = None,
        bot_token: str | None = None,
    ) -> list[InferenceChunk]:
        """
        Perform a federated search using the provided query and entities.

        Args:
            query: The search query
            entities: Connector-level config (entity filtering configuration)
            access_token: The OAuth access token
            limit: Maximum number of results to return
            slack_event_context: Slack-specific context (only used by Slack bot)
            bot_token: Slack bot token (only used by Slack bot)

        Returns:
            Search results in a standardized format
        """


================================================
FILE: backend/onyx/federated_connectors/models.py
================================================
from datetime import datetime
from typing import Any
from typing import Dict
from typing import Optional

from pydantic import BaseModel
from pydantic import Field


class FieldSpec(BaseModel):
    """Model for describing a field specification."""

    type: str = Field(
        ..., description="The type of the field (e.g., 'str', 'bool', 'list[str]')"
    )
    description: str = Field(
        ..., description="Description of what this field represents"
    )
    required: bool = Field(default=False, description="Whether this field is required")
    default: Optional[Any] = Field(
        default=None, description="Default value if not provided"
    )
    example: Optional[Any] = Field(
        default=None, description="Example value for documentation"
    )
    secret: bool = Field(
        default=False, description="Whether this field contains sensitive data"
    )


class EntityField(FieldSpec):
    """Model for describing an entity field in the entities specification."""


class CredentialField(FieldSpec):
    """Model for describing a credential field in the credentials specification."""


class OAuthResult(BaseModel):
    """Standardized OAuth result that all federated connectors should return from callback."""

    access_token: Optional[str] = Field(
        default=None, description="The bot access token for bot operations"
    )
    user_token: Optional[str] = Field(
        default=None,
        description="The user access token for user-scoped operations like federated search",
    )
    token_type: Optional[str] = Field(
        default=None, description="Token type (usually 'bearer')"
    )
    scope: Optional[str] = Field(default=None, description="Granted scopes")
    expires_at: Optional[datetime] = Field(
        default=None, description="When the token expires"
    )
    refresh_token: Optional[str] = Field(
        default=None, description="Refresh token if applicable"
    )

    # Additional fields that might be useful
    team: Optional[Dict[str, Any]] = Field(
        default=None, description="Team/workspace information"
    )
    user: Optional[Dict[str, Any]] = Field(default=None, description="User information")
    raw_response: Optional[Dict[str, Any]] = Field(
        default=None, description="Raw response for debugging"
    )

    # Pydantic V2 automatically serializes datetime to ISO format, so no custom encoder needed


================================================
FILE: backend/onyx/federated_connectors/oauth_utils.py
================================================
"""Generic OAuth utilities for federated connectors API layer."""

import base64
import json
import uuid
from typing import Any

from onyx.cache.factory import get_cache_backend
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.utils.logger import setup_logger

logger = setup_logger()

OAUTH_STATE_PREFIX = "federated_oauth"
OAUTH_STATE_TTL = 300  # 5 minutes


class OAuthSession:
    """Represents an OAuth session stored in the cache backend."""

    def __init__(
        self,
        federated_connector_id: int,
        user_id: str,
        redirect_uri: str | None = None,
        additional_data: dict[str, Any] | None = None,
    ):
        self.federated_connector_id = federated_connector_id
        self.user_id = user_id
        self.redirect_uri = redirect_uri
        self.additional_data = additional_data or {}

    def to_dict(self) -> dict[str, Any]:
        return {
            "federated_connector_id": self.federated_connector_id,
            "user_id": self.user_id,
            "redirect_uri": self.redirect_uri,
            "additional_data": self.additional_data,
        }

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "OAuthSession":
        return cls(
            federated_connector_id=data["federated_connector_id"],
            user_id=data["user_id"],
            redirect_uri=data.get("redirect_uri"),
            additional_data=data.get("additional_data", {}),
        )


def generate_oauth_state(
    federated_connector_id: int,
    user_id: str,
    redirect_uri: str | None = None,
    additional_data: dict[str, Any] | None = None,
    ttl: int = OAUTH_STATE_TTL,
) -> str:
    """
    Generate a secure state parameter and store session data in the cache backend.

    Args:
        federated_connector_id: ID of the federated connector
        user_id: ID of the user initiating OAuth
        redirect_uri: Optional redirect URI after OAuth completion
        additional_data: Any additional data to store with the session
        ttl: Time-to-live in seconds for the cache key

    Returns:
        Base64-encoded state parameter
    """
    # Generate a random UUID for the state
    state_uuid = uuid.uuid4()
    state_b64 = base64.urlsafe_b64encode(state_uuid.bytes).decode("utf-8").rstrip("=")

    session = OAuthSession(
        federated_connector_id=federated_connector_id,
        user_id=user_id,
        redirect_uri=redirect_uri,
        additional_data=additional_data,
    )

    cache = get_cache_backend()
    cache_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"
    cache.set(cache_key, json.dumps(session.to_dict()), ex=ttl)

    logger.info(
        f"Generated OAuth state for federated_connector_id={federated_connector_id}, user_id={user_id}, state={state_b64}"
    )

    return state_b64


def verify_oauth_state(state: str) -> OAuthSession:
    """
    Verify OAuth state parameter and retrieve session data.

    Args:
        state: Base64-encoded state parameter from OAuth callback

    Returns:
        OAuthSession if state is valid, None otherwise
    """
    # Add padding if needed for base64 decoding
    padded_state = state + "=" * (-len(state) % 4)

    # Decode base64 to get UUID bytes
    state_bytes = base64.urlsafe_b64decode(padded_state)
    state_uuid = uuid.UUID(bytes=state_bytes)

    cache = get_cache_backend()
    cache_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"

    session_data = cache.get(cache_key)
    if not session_data:
        raise ValueError(f"OAuth state not found: {state}")

    cache.delete(cache_key)

    session_dict = json.loads(session_data)
    return OAuthSession.from_dict(session_dict)


def get_oauth_callback_uri() -> str:
    """
    Generate the OAuth callback URI for a federated connector.

    Returns:
        The callback URI
    """
    # Use the frontend callback page as the OAuth redirect URI
    # The frontend will then make an API call to process the callback
    return f"{WEB_DOMAIN}/federated/oauth/callback"


def add_state_to_oauth_url(base_oauth_url: str, state: str) -> str:
    """
    Add state parameter to an OAuth URL.

    Args:
        base_oauth_url: The base OAuth URL from the connector
        state: The state parameter to add

    Returns:
        The OAuth URL with state parameter added
    """
    # Check if URL already has query parameters
    separator = "&" if "?" in base_oauth_url else "?"
    return f"{base_oauth_url}{separator}state={state}"


================================================
FILE: backend/onyx/federated_connectors/registry.py
================================================
"""Registry mapping for federated connector classes."""

from pydantic import BaseModel

from onyx.configs.constants import FederatedConnectorSource


class FederatedConnectorMapping(BaseModel):
    module_path: str
    class_name: str


# Mapping of FederatedConnectorSource to connector details for lazy loading
FEDERATED_CONNECTOR_CLASS_MAP = {
    FederatedConnectorSource.FEDERATED_SLACK: FederatedConnectorMapping(
        module_path="onyx.federated_connectors.slack.federated_connector",
        class_name="SlackFederatedConnector",
    ),
}


================================================
FILE: backend/onyx/federated_connectors/slack/__init__.py
================================================
# Slack federated connector module


================================================
FILE: backend/onyx/federated_connectors/slack/federated_connector.py
================================================
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from urllib.parse import urlencode

import requests
from pydantic import ValidationError
from slack_sdk import WebClient
from typing_extensions import override

from onyx.context.search.federated.slack_search import slack_retrieval
from onyx.context.search.models import ChunkIndexRequest
from onyx.context.search.models import InferenceChunk
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.federated_connectors.interfaces import FederatedConnector
from onyx.federated_connectors.models import CredentialField
from onyx.federated_connectors.models import EntityField
from onyx.federated_connectors.models import OAuthResult
from onyx.federated_connectors.slack.models import SlackCredentials
from onyx.federated_connectors.slack.models import SlackEntities
from onyx.onyxbot.slack.models import SlackContext
from onyx.utils.logger import setup_logger

logger = setup_logger()


SCOPES = [
    "channels:read",
    "groups:read",
    "im:read",
    "mpim:read",
    "search:read",
    "channels:history",
    "groups:history",
    "im:history",
    "mpim:history",
    "users:read",
    "users.profile:read",
]


class SlackFederatedConnector(FederatedConnector):
    def __init__(self, credentials: dict[str, Any]):
        self.slack_credentials = SlackCredentials(**credentials)

    @override
    def validate_entities(self, entities: dict[str, Any]) -> bool:
        """Check the entities and verify that they match the expected structure/all values are valid.

        For Slack federated search, we expect:
        - channels: list[str] (list of channel names or IDs)
        - include_dm: bool (whether to include direct messages)
        """
        try:
            # Use Pydantic model for validation
            SlackEntities(**entities)
            return True
        except ValidationError as e:
            logger.warning(f"Validation error for Slack entities: {e}")
            return False
        except Exception as e:
            logger.error(f"Error validating Slack entities: {e}")
            return False

    @classmethod
    def entities_schema(cls) -> dict[str, EntityField]:
        """Return the specifications of what entity configuration fields are available for Slack.

        This is the canonical schema definition for Slack entities.
        """
        return {
            "exclude_channels": EntityField(
                type="list[str]",
                description="Exclude the following channels from search. Glob patterns are supported.",
                required=False,
                example=["secure-channel", "private-*", "customer*"],
            ),
            "search_all_channels": EntityField(
                type="bool",
                description="Search all accessible channels. If not set, must specify channels below.",
                required=False,
                default=False,
                example=False,
            ),
            "channels": EntityField(
                type="list[str]",
                description="Search the following channels",
                required=False,
                example=["general", "eng*", "product-*"],
            ),
            "include_dm": EntityField(
                type="bool",
                description="Include user direct messages in search results",
                required=False,
                default=False,
                example=False,
            ),
            "include_group_dm": EntityField(
                type="bool",
                description="Include group direct messages (multi-person DMs) in search results",
                required=False,
                default=False,
                example=False,
            ),
            "include_private_channels": EntityField(
                type="bool",
                description="Include private channels in search results (user must have access)",
                required=False,
                default=False,
                example=False,
            ),
            "default_search_days": EntityField(
                type="int",
                description="Maximum number of days to search back. Increasing this value degrades answer quality.",
                required=False,
                default=30,
                example=30,
            ),
            "max_messages_per_query": EntityField(
                type="int",
                description=(
                    "Maximum number of messages to retrieve per search query. "
                    "Higher values provide more context but may be slower."
                ),
                required=False,
                default=25,
                example=25,
            ),
        }

    @classmethod
    def configuration_schema(cls) -> dict[str, EntityField]:
        """Wrapper for backwards compatibility - delegates to entities_schema()."""
        return cls.entities_schema()

    @classmethod
    @override
    def credentials_schema(cls) -> dict[str, CredentialField]:
        """Return the specification of what credentials are required for Slack connector."""
        return {
            "client_id": CredentialField(
                type="str",
                description="Slack app client ID from your Slack app configuration",
                required=True,
                example="1234567890.1234567890123",
                secret=False,
            ),
            "client_secret": CredentialField(
                type="str",
                description="Slack app client secret from your Slack app configuration",
                required=True,
                example="1a2b3c4d5e6f7g8h9i0j1k2l3m4n5o6p",
                secret=True,
            ),
        }

    @override
    def authorize(self, redirect_uri: str) -> str:
        """Get back the OAuth URL for Slack authorization.

        Returns the URL where users should be redirected to authorize the application.
        Note: State parameter will be added by the API layer.
        """
        # Build OAuth URL with proper parameters (no state - handled by API layer)
        params = {
            "client_id": self.slack_credentials.client_id,
            "user_scope": " ".join(SCOPES),
            "redirect_uri": redirect_uri,
        }

        # Build query string
        oauth_url = f"https://slack.com/oauth/v2/authorize?{urlencode(params)}"

        logger.info("Generated Slack OAuth authorization URL")
        return oauth_url

    @override
    def callback(self, callback_data: dict[str, Any], redirect_uri: str) -> OAuthResult:
        """Handle the response from the OAuth flow and return it in a standard format.

        Args:
            callback_data: The data received from the OAuth callback (state already validated by API layer)

        Returns:
            Standardized OAuthResult
        """
        # Extract authorization code from callback
        auth_code = callback_data.get("code")
        error = callback_data.get("error")

        if error:
            raise RuntimeError(f"OAuth error received: {error}")

        if not auth_code:
            raise ValueError("No authorization code received")

        # Exchange authorization code for access token
        token_response = self._exchange_code_for_token(auth_code, redirect_uri)

        if not token_response.get("ok"):
            raise RuntimeError(
                f"Failed to exchange authorization code for token: {token_response.get('error')}"
            )

        # Build team info
        team_info = None
        if "team" in token_response:
            team_info = {
                "id": token_response["team"]["id"],
                "name": token_response["team"]["name"],
            }

        # Build user info and extract OAuth tokens
        if "authed_user" not in token_response:
            raise RuntimeError("Missing authed_user in OAuth response from Slack")

        authed_user = token_response["authed_user"]
        user_info = {
            "id": authed_user["id"],
            "scope": authed_user.get("scope"),
            "token_type": authed_user.get("token_type"),
        }

        # Extract OAuth tokens - bot token from root, user token from authed_user
        user_token = authed_user.get("access_token")  # User token
        refresh_token = authed_user.get("refresh_token")
        token_type = authed_user.get("token_type", "bearer")
        scope = authed_user.get("scope")

        # Calculate expires_at from expires_in if present
        expires_at = None
        if "expires_in" in authed_user:
            expires_at = datetime.now(timezone.utc) + timedelta(
                seconds=authed_user["expires_in"]
            )

        return OAuthResult(
            access_token=user_token,  # Bot token for bot operations
            token_type=token_type,
            scope=scope,
            expires_at=expires_at,
            refresh_token=refresh_token,
            team=team_info,
            user=user_info,
            raw_response=token_response,
        )

    def _exchange_code_for_token(self, code: str, redirect_uri: str) -> dict[str, Any]:
        """Exchange authorization code for access token.

        Args:
            code: Authorization code from OAuth callback

        Returns:
            Token response from Slack API
        """
        response = requests.post(
            "https://slack.com/api/oauth.v2.access",
            data={
                "client_id": self.slack_credentials.client_id,
                "client_secret": self.slack_credentials.client_secret,
                "code": code,
                "redirect_uri": redirect_uri,
            },
        )
        response.raise_for_status()
        return response.json()

    @override
    def search(
        self,
        query: ChunkIndexRequest,
        entities: dict[str, Any],
        access_token: str,
        limit: int | None = None,
        slack_event_context: SlackContext | None = None,
        bot_token: str | None = None,
    ) -> list[InferenceChunk]:
        """Perform a federated search on Slack.

        Args:
            query: The search query
            entities: Connector-level config (entity filtering configuration)
            access_token: The OAuth access token
            limit: Maximum number of results to return
            slack_event_context: Optional Slack context for slack bot
            bot_token: Optional bot token for slack bot

        Returns:
            Search results in SlackSearchResponse format
        """
        logger.debug(f"Slack federated search called with entities: {entities}")

        # Get team_id from Slack API for caching and filtering
        team_id = None
        try:
            slack_client = WebClient(token=access_token)
            auth_response = slack_client.auth_test()
            auth_response.validate()

            # Cast response.data to dict for type checking
            auth_data: dict[str, Any] = auth_response.data  # type: ignore
            team_id = auth_data.get("team_id")
            logger.debug(f"Slack team_id: {team_id}")
        except Exception as e:
            logger.warning(f"Could not fetch team_id from Slack API: {e}")

        with get_session_with_current_tenant() as db_session:
            return slack_retrieval(
                query,
                access_token,
                db_session,
                entities=entities,
                limit=limit,
                slack_event_context=slack_event_context,
                bot_token=bot_token,
                team_id=team_id,
            )


================================================
FILE: backend/onyx/federated_connectors/slack/models.py
================================================
from typing import Optional

from pydantic import BaseModel
from pydantic import Field
from pydantic import field_validator
from pydantic import model_validator


class SlackEntities(BaseModel):
    """Pydantic model for Slack federated search entities."""

    # Channel filtering
    search_all_channels: bool = Field(
        default=True,
        description="Search all accessible channels. If not set, must specify channels below.",
    )
    channels: Optional[list[str]] = Field(
        default=None,
        description="List of Slack channel names to search across.",
    )
    exclude_channels: Optional[list[str]] = Field(
        default=None,
        description="List of channel names or patterns to exclude e.g. 'private-*, customer-*, secure-channel'.",
    )

    # Direct message filtering
    include_dm: bool = Field(
        default=True,
        description="Include user direct messages in search results",
    )
    include_group_dm: bool = Field(
        default=True,
        description="Include group direct messages (multi-person DMs) in search results",
    )

    # Private channel filtering
    include_private_channels: bool = Field(
        default=True,
        description="Include private channels in search results (user must have access)",
    )

    # Date range filtering
    default_search_days: int = Field(
        default=30,
        description="Maximum number of days to search back. Increasing this value degrades answer quality.",
    )

    # Message count per slack request
    max_messages_per_query: int = Field(
        default=10,
        description=(
            "Maximum number of messages to retrieve per search query. "
            "Higher values increase API calls and may trigger rate limits."
        ),
    )

    @field_validator("default_search_days")
    @classmethod
    def validate_default_search_days(cls, v: int) -> int:
        """Validate default_search_days is positive and reasonable"""
        if v < 1:
            raise ValueError("default_search_days must be at least 1")
        if v > 365:
            raise ValueError("default_search_days cannot exceed 365 days")
        return v

    @field_validator("max_messages_per_query")
    @classmethod
    def validate_max_messages_per_query(cls, v: int) -> int:
        """Validate max_messages_per_query is positive and reasonable"""
        if v < 1:
            raise ValueError("max_messages_per_query must be at least 1")
        if v > 100:
            raise ValueError("max_messages_per_query cannot exceed 100")
        return v

    @field_validator("channels")
    @classmethod
    def validate_channels(cls, v: Optional[list[str]]) -> Optional[list[str]]:
        """Validate each channel is a non-empty string"""
        if v is not None:
            if not isinstance(v, list):
                raise ValueError("channels must be a list")
            for channel in v:
                if not isinstance(channel, str) or not channel.strip():
                    raise ValueError("Each channel must be a non-empty string")
        return v

    @field_validator("exclude_channels")
    @classmethod
    def validate_exclude_patterns(cls, v: Optional[list[str]]) -> Optional[list[str]]:
        """Validate each exclude pattern is a non-empty string"""
        if v is None:
            return v

        for pattern in v:
            if not isinstance(pattern, str) or not pattern.strip():
                raise ValueError("Each exclude pattern must be a non-empty string")

        return v

    @model_validator(mode="after")
    def validate_channel_config(self) -> "SlackEntities":
        """Validate search_all_channels configuration"""
        # If search_all_channels is False, channels list must be provided
        if not self.search_all_channels:
            if self.channels is None or len(self.channels) == 0:
                raise ValueError(
                    "Must specify at least one channel when search_all_channels is False"
                )

        return self


class SlackCredentials(BaseModel):
    """Slack federated connector credentials."""

    client_id: str = Field(..., description="Slack app client ID")
    client_secret: str = Field(..., description="Slack app client secret")

    @field_validator("client_id")
    @classmethod
    def validate_client_id(cls, v: str) -> str:
        if not v or not v.strip():
            raise ValueError("Client ID cannot be empty")
        return v.strip()

    @field_validator("client_secret")
    @classmethod
    def validate_client_secret(cls, v: str) -> str:
        if not v or not v.strip():
            raise ValueError("Client secret cannot be empty")
        return v.strip()


class SlackTeamInfo(BaseModel):
    """Information about a Slack team/workspace."""

    id: str = Field(..., description="Team ID")
    name: str = Field(..., description="Team name")
    domain: Optional[str] = Field(default=None, description="Team domain")


class SlackUserInfo(BaseModel):
    """Information about a Slack user."""

    id: str = Field(..., description="User ID")
    team_id: Optional[str] = Field(default=None, description="Team ID")
    name: Optional[str] = Field(default=None, description="User name")
    email: Optional[str] = Field(default=None, description="User email")


class SlackSearchResult(BaseModel):
    """Individual search result from Slack."""

    channel: str = Field(..., description="Channel where the message was found")
    timestamp: str = Field(..., description="Message timestamp")
    user: Optional[str] = Field(default=None, description="User who sent the message")
    text: str = Field(..., description="Message text")
    permalink: Optional[str] = Field(
        default=None, description="Permalink to the message"
    )
    score: Optional[float] = Field(default=None, description="Search relevance score")

    # Additional context
    thread_ts: Optional[str] = Field(
        default=None, description="Thread timestamp if in a thread"
    )
    reply_count: Optional[int] = Field(
        default=None, description="Number of replies if it's a thread"
    )


class SlackSearchResponse(BaseModel):
    """Response from Slack federated search."""

    query: str = Field(..., description="The search query")
    total_count: int = Field(..., description="Total number of results")
    results: list[SlackSearchResult] = Field(..., description="Search results")
    next_cursor: Optional[str] = Field(
        default=None, description="Cursor for pagination"
    )

    # Metadata
    channels_searched: Optional[list[str]] = Field(
        default=None, description="Channels that were searched"
    )
    search_time_ms: Optional[int] = Field(
        default=None, description="Time taken to search in milliseconds"
    )


================================================
FILE: backend/onyx/file_processing/__init__.py
================================================


================================================
FILE: backend/onyx/file_processing/enums.py
================================================
from enum import Enum


class HtmlBasedConnectorTransformLinksStrategy(str, Enum):
    # remove links entirely
    STRIP = "strip"
    # turn HTML links into markdown links
    MARKDOWN = "markdown"


================================================
FILE: backend/onyx/file_processing/extract_file_text.py
================================================
import csv
import gc
import io
import json
import os
import re
import zipfile
from collections.abc import Callable
from collections.abc import Iterator
from collections.abc import Sequence
from email.parser import Parser as EmailParser
from io import BytesIO
from pathlib import Path
from typing import Any
from typing import IO
from typing import NamedTuple
from typing import Optional
from typing import TYPE_CHECKING
from zipfile import BadZipFile

import chardet
import openpyxl
from openpyxl.worksheet.worksheet import Worksheet
from PIL import Image

from onyx.configs.constants import ONYX_METADATA_FILENAME
from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.file_types import OnyxMimeTypes
from onyx.file_processing.file_types import PRESENTATION_MIME_TYPE
from onyx.file_processing.file_types import WORD_PROCESSING_MIME_TYPE
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.file_processing.unstructured import get_unstructured_api_key
from onyx.file_processing.unstructured import unstructured_to_text
from onyx.utils.logger import setup_logger

if TYPE_CHECKING:
    from markitdown import MarkItDown
logger = setup_logger()

TEXT_SECTION_SEPARATOR = "\n\n"

_MARKITDOWN_CONVERTER: Optional["MarkItDown"] = None

KNOWN_OPENPYXL_BUGS = [
    "Value must be either numerical or a string containing a wildcard",
    "File contains no valid workbook part",
    "Unable to read workbook: could not read stylesheet from None",
    "Colors must be aRGB hex values",
]


def get_markitdown_converter() -> "MarkItDown":
    global _MARKITDOWN_CONVERTER
    from markitdown import MarkItDown

    if _MARKITDOWN_CONVERTER is None:
        _MARKITDOWN_CONVERTER = MarkItDown(enable_plugins=False)
    return _MARKITDOWN_CONVERTER


def get_file_ext(file_path_or_name: str | Path) -> str:
    _, extension = os.path.splitext(file_path_or_name)
    return extension.lower()


def is_text_file(file: IO[bytes]) -> bool:
    """
    checks if the first 1024 bytes only contain printable or whitespace characters
    if it does, then we say it's a plaintext file
    """
    raw_data = file.read(1024)
    file.seek(0)
    text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
    return all(c in text_chars for c in raw_data)


def detect_encoding(file: IO[bytes]) -> str:
    raw_data = file.read(50000)
    file.seek(0)
    encoding = chardet.detect(raw_data)["encoding"] or "utf-8"
    return encoding


def is_macos_resource_fork_file(file_name: str) -> bool:
    return os.path.basename(file_name).startswith("._") and file_name.startswith(
        "__MACOSX"
    )


def to_bytesio(stream: IO[bytes]) -> BytesIO:
    if isinstance(stream, BytesIO):
        return stream
    data = stream.read()  # consumes the stream!
    return BytesIO(data)


def load_files_from_zip(
    zip_file_io: IO,
    ignore_macos_resource_fork_files: bool = True,
    ignore_dirs: bool = True,
) -> Iterator[tuple[zipfile.ZipInfo, IO[Any]]]:
    """
    Iterates through files in a zip archive, yielding (ZipInfo, file handle) pairs.
    """
    with zipfile.ZipFile(zip_file_io, "r") as zip_file:
        for file_info in zip_file.infolist():
            if ignore_dirs and file_info.is_dir():
                continue

            if (
                ignore_macos_resource_fork_files
                and is_macos_resource_fork_file(file_info.filename)
            ) or file_info.filename == ONYX_METADATA_FILENAME:
                continue

            with zip_file.open(file_info.filename, "r") as subfile:
                # Try to match by exact filename first
                yield file_info, subfile


def _extract_onyx_metadata(line: str) -> dict | None:
    """
    Example: first line has:
        <!-- ONYX_METADATA={"title": "..."} -->
      or
        #ONYX_METADATA={"title":"..."}
    """
    html_comment_pattern = r"<!--\s*ONYX_METADATA=\{(.*?)\}\s*-->"
    hashtag_pattern = r"#ONYX_METADATA=\{(.*?)\}"

    html_comment_match = re.search(html_comment_pattern, line)
    hashtag_match = re.search(hashtag_pattern, line)

    if html_comment_match:
        json_str = html_comment_match.group(1)
    elif hashtag_match:
        json_str = hashtag_match.group(1)
    else:
        return None

    try:
        return json.loads("{" + json_str + "}")
    except json.JSONDecodeError:
        return None


def read_text_file(
    file: IO,
    encoding: str = "utf-8",
    errors: str = "replace",
    ignore_onyx_metadata: bool = True,
) -> tuple[str, dict]:
    """
    For plain text files. Optionally extracts Onyx metadata from the first line.
    """
    metadata = {}
    file_content_raw = ""
    for ind, line in enumerate(file):
        # decode
        try:
            line = line.decode(encoding) if isinstance(line, bytes) else line
        except UnicodeDecodeError:
            line = (
                line.decode(encoding, errors=errors)
                if isinstance(line, bytes)
                else line
            )

        # optionally parse metadata in the first line
        if ind == 0 and not ignore_onyx_metadata:
            potential_meta = _extract_onyx_metadata(line)
            if potential_meta is not None:
                metadata = potential_meta
                continue

        file_content_raw += line

    return file_content_raw, metadata


def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
    """
    Extract text from a PDF. For embedded images, a more complex approach is needed.
    This is a minimal approach returning text only.
    """
    text, _, _ = read_pdf_file(file, pdf_pass)
    return text


def read_pdf_file(
    file: IO[Any],
    pdf_pass: str | None = None,
    extract_images: bool = False,
    image_callback: Callable[[bytes, str], None] | None = None,
) -> tuple[str, dict[str, Any], Sequence[tuple[bytes, str]]]:
    """
    Returns the text, basic PDF metadata, and optionally extracted images.
    """
    from pypdf import PdfReader
    from pypdf.errors import PdfStreamError

    metadata: dict[str, Any] = {}
    extracted_images: list[tuple[bytes, str]] = []
    try:
        pdf_reader = PdfReader(file)

        if pdf_reader.is_encrypted and pdf_pass is not None:
            decrypt_success = False
            try:
                decrypt_success = pdf_reader.decrypt(pdf_pass) != 0
            except Exception:
                logger.error("Unable to decrypt pdf")

            if not decrypt_success:
                return "", metadata, []
        elif pdf_reader.is_encrypted:
            logger.warning("No Password for an encrypted PDF, returning empty text.")
            return "", metadata, []

        # Basic PDF metadata
        if pdf_reader.metadata is not None:
            for key, value in pdf_reader.metadata.items():
                clean_key = key.lstrip("/")
                if isinstance(value, str) and value.strip():
                    metadata[clean_key] = value
                elif isinstance(value, list) and all(
                    isinstance(item, str) for item in value
                ):
                    metadata[clean_key] = ", ".join(value)

        text = TEXT_SECTION_SEPARATOR.join(
            page.extract_text() for page in pdf_reader.pages
        )

        if extract_images:
            for page_num, page in enumerate(pdf_reader.pages):
                for image_file_object in page.images:
                    image = Image.open(io.BytesIO(image_file_object.data))
                    img_byte_arr = io.BytesIO()
                    image.save(img_byte_arr, format=image.format)
                    img_bytes = img_byte_arr.getvalue()

                    image_format = image.format.lower() if image.format else "png"
                    image_name = f"page_{page_num + 1}_image_{image_file_object.name}.{image_format}"
                    if image_callback is not None:
                        # Stream image out immediately
                        image_callback(img_bytes, image_name)
                    else:
                        extracted_images.append((img_bytes, image_name))

        return text, metadata, extracted_images

    except PdfStreamError:
        logger.exception("Invalid PDF file")
    except Exception:
        logger.exception("Failed to read PDF")

    return "", metadata, []


def extract_docx_images(docx_bytes: IO[Any]) -> Iterator[tuple[bytes, str]]:
    """
    Given the bytes of a docx file, extract all the images.
    Returns a list of tuples (image_bytes, image_name).
    """
    try:
        with zipfile.ZipFile(docx_bytes) as z:
            for name in z.namelist():
                if name.startswith("word/media/"):
                    yield (z.read(name), name.split("/")[-1])
    except Exception:
        logger.exception("Failed to extract all docx images")


def read_docx_file(
    file: IO[Any],
    file_name: str = "",
    extract_images: bool = False,
    image_callback: Callable[[bytes, str], None] | None = None,
) -> tuple[str, Sequence[tuple[bytes, str]]]:
    """
    Extract text from a docx.
    Return (text_content, list_of_images).

    The caller can choose to provide a callback to handle images with the intent
    of avoiding materializing the list of images in memory.
    The images list returned is empty in this case.
    """
    md = get_markitdown_converter()
    from markitdown import (
        StreamInfo,
        FileConversionException,
        UnsupportedFormatException,
    )

    try:
        doc = md.convert(
            to_bytesio(file), stream_info=StreamInfo(mimetype=WORD_PROCESSING_MIME_TYPE)
        )
    except (
        BadZipFile,
        ValueError,
        FileConversionException,
        UnsupportedFormatException,
    ) as e:
        logger.warning(
            f"Failed to extract docx {file_name or 'docx file'}: {e}. Attempting to read as text file."
        )

        # May be an invalid docx, but still a valid text file
        file.seek(0)
        encoding = detect_encoding(file)
        text_content_raw, _ = read_text_file(
            file, encoding=encoding, ignore_onyx_metadata=False
        )
        return text_content_raw or "", []

    file.seek(0)

    if extract_images:
        if image_callback is None:
            return doc.markdown, list(extract_docx_images(to_bytesio(file)))
        # If a callback is provided, iterate and stream images without accumulating
        try:
            for img_file_bytes, img_file_name in extract_docx_images(to_bytesio(file)):
                image_callback(img_file_bytes, img_file_name)
        except Exception:
            logger.exception("Failed to stream docx images")
    return doc.markdown, []


def pptx_to_text(file: IO[Any], file_name: str = "") -> str:
    md = get_markitdown_converter()
    from markitdown import (
        StreamInfo,
        FileConversionException,
        UnsupportedFormatException,
    )

    stream_info = StreamInfo(
        mimetype=PRESENTATION_MIME_TYPE, filename=file_name or None, extension=".pptx"
    )
    try:
        presentation = md.convert(to_bytesio(file), stream_info=stream_info)
    except (
        BadZipFile,
        ValueError,
        FileConversionException,
        UnsupportedFormatException,
    ) as e:
        error_str = f"Failed to extract text from {file_name or 'pptx file'}: {e}"
        logger.warning(error_str)
        return ""
    return presentation.markdown


def _worksheet_to_matrix(
    worksheet: Worksheet,
) -> list[list[str]]:
    """
    Converts a singular worksheet to a matrix of values
    """
    rows: list[list[str]] = []
    for worksheet_row in worksheet.iter_rows(min_row=1, values_only=True):
        row = ["" if cell is None else str(cell) for cell in worksheet_row]
        rows.append(row)

    return rows


def _clean_worksheet_matrix(matrix: list[list[str]]) -> list[list[str]]:
    """
    Cleans a worksheet matrix by removing rows if there are N consecutive empty
    rows and removing cols if there are M consecutive empty columns
    """
    MAX_EMPTY_ROWS = 2  # Runs longer than this are capped to max_empty; shorter runs are preserved as-is
    MAX_EMPTY_COLS = 2

    # Row cleanup
    matrix = _remove_empty_runs(matrix, max_empty=MAX_EMPTY_ROWS)

    if not matrix:
        return matrix

    # Column cleanup — determine which columns to keep without transposing.
    num_cols = len(matrix[0])
    keep_cols = _columns_to_keep(matrix, num_cols, max_empty=MAX_EMPTY_COLS)
    if len(keep_cols) < num_cols:
        matrix = [[row[c] for c in keep_cols] for row in matrix]

    return matrix


def _columns_to_keep(
    matrix: list[list[str]], num_cols: int, max_empty: int
) -> list[int]:
    """Return the indices of columns to keep after removing empty-column runs.

    Uses the same logic as ``_remove_empty_runs`` but operates on column
    indices so no transpose is needed.
    """
    kept: list[int] = []
    empty_buffer: list[int] = []

    for col_idx in range(num_cols):
        col_is_empty = all(not row[col_idx] for row in matrix)
        if col_is_empty:
            empty_buffer.append(col_idx)
        else:
            kept.extend(empty_buffer[:max_empty])
            kept.append(col_idx)
            empty_buffer = []

    return kept


def _remove_empty_runs(
    rows: list[list[str]],
    max_empty: int,
) -> list[list[str]]:
    """Removes entire runs of empty rows when the run length exceeds max_empty.

    Leading empty runs are capped to max_empty, just like interior runs.
    Trailing empty rows are always dropped since there is no subsequent
    non-empty row to flush them.
    """
    result: list[list[str]] = []
    empty_buffer: list[list[str]] = []

    for row in rows:
        # Check if empty
        if not any(row):
            if len(empty_buffer) < max_empty:
                empty_buffer.append(row)
        else:
            # Add upto max empty rows onto the result - that's what we allow
            result.extend(empty_buffer[:max_empty])
            # Add the new non-empty row
            result.append(row)
            empty_buffer = []

    return result


def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
    # TODO: switch back to this approach in a few months when markitdown
    # fixes their handling of excel files

    # md = get_markitdown_converter()
    # stream_info = StreamInfo(
    #     mimetype=SPREADSHEET_MIME_TYPE, filename=file_name or None, extension=".xlsx"
    # )
    # try:
    #     workbook = md.convert(to_bytesio(file), stream_info=stream_info)
    # except (
    #     BadZipFile,
    #     ValueError,
    #     FileConversionException,
    #     UnsupportedFormatException,
    # ) as e:
    #     error_str = f"Failed to extract text from {file_name or 'xlsx file'}: {e}"
    #     if file_name.startswith("~"):
    #         logger.debug(error_str + " (this is expected for files with ~)")
    #     else:
    #         logger.warning(error_str)
    #     return ""
    # return workbook.markdown
    try:
        workbook = openpyxl.load_workbook(file, read_only=True)
    except BadZipFile as e:
        error_str = f"Failed to extract text from {file_name or 'xlsx file'}: {e}"
        if file_name.startswith("~"):
            logger.debug(error_str + " (this is expected for files with ~)")
        else:
            logger.warning(error_str)
        return ""
    except Exception as e:
        if any(s in str(e) for s in KNOWN_OPENPYXL_BUGS):
            logger.error(
                f"Failed to extract text from {file_name or 'xlsx file'}. This happens due to a bug in openpyxl. {e}"
            )
            return ""
        raise

    text_content = []
    for sheet in workbook.worksheets:
        sheet_matrix = _clean_worksheet_matrix(_worksheet_to_matrix(sheet))
        buf = io.StringIO()
        writer = csv.writer(buf, lineterminator="\n")
        writer.writerows(sheet_matrix)
        text_content.append(buf.getvalue().rstrip("\n"))
    return TEXT_SECTION_SEPARATOR.join(text_content)


def eml_to_text(file: IO[Any]) -> str:
    encoding = detect_encoding(file)
    text_file = io.TextIOWrapper(file, encoding=encoding)
    parser = EmailParser()
    try:
        message = parser.parse(text_file)
    finally:
        try:
            # Keep underlying upload handle open for downstream consumers.
            raw_file = text_file.detach()
        except Exception as detach_error:
            logger.warning(
                f"Failed to detach TextIOWrapper for EML upload, using original file: {detach_error}"
            )
            raw_file = file
        try:
            raw_file.seek(0)
        except Exception:
            pass

    text_content = []
    for part in message.walk():
        if part.get_content_type().startswith("text/plain"):
            payload = part.get_payload()
            if isinstance(payload, str):
                text_content.append(payload)
            elif isinstance(payload, list):
                text_content.extend(item for item in payload if isinstance(item, str))
            else:
                logger.warning(f"Unexpected payload type: {type(payload)}")
    return TEXT_SECTION_SEPARATOR.join(text_content)


def epub_to_text(file: IO[Any]) -> str:
    with zipfile.ZipFile(file) as epub:
        text_content = []
        for item in epub.infolist():
            if item.filename.endswith(".xhtml") or item.filename.endswith(".html"):
                with epub.open(item) as html_file:
                    text_content.append(parse_html_page_basic(html_file))
        return TEXT_SECTION_SEPARATOR.join(text_content)


def file_io_to_text(file: IO[Any]) -> str:
    encoding = detect_encoding(file)
    file_content, _ = read_text_file(file, encoding=encoding)
    return file_content


def extract_file_text(
    file: IO[Any],
    file_name: str,
    break_on_unprocessable: bool = True,
    extension: str | None = None,
) -> str:
    """
    Legacy function that returns *only text*, ignoring embedded images.
    For backward-compatibility in code that only wants text.

    NOTE: Ignoring seems to be defined as returning an empty string for files it can't
    handle (such as images).
    """
    extension_to_function: dict[str, Callable[[IO[Any]], str]] = {
        ".pdf": pdf_to_text,
        ".docx": lambda f: read_docx_file(f, file_name)[0],  # no images
        ".pptx": lambda f: pptx_to_text(f, file_name),
        ".xlsx": lambda f: xlsx_to_text(f, file_name),
        ".eml": eml_to_text,
        ".epub": epub_to_text,
        ".html": parse_html_page_basic,
    }

    try:
        if get_unstructured_api_key():
            try:
                return unstructured_to_text(file, file_name)
            except Exception as unstructured_error:
                logger.error(
                    f"Failed to process with Unstructured: {str(unstructured_error)}. Falling back to normal processing."
                )
        if extension is None:
            extension = get_file_ext(file_name)

        if extension in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS:
            func = extension_to_function.get(extension, file_io_to_text)
            file.seek(0)
            return func(file)

        # If unknown extension, maybe it's a text file
        file.seek(0)
        if is_text_file(file):
            return file_io_to_text(file)

        raise ValueError("Unknown file extension or not recognized as text data")

    except Exception as e:
        if break_on_unprocessable:
            raise RuntimeError(
                f"Failed to process file {file_name or 'Unknown'}: {str(e)}"
            ) from e
        logger.warning(f"Failed to process file {file_name or 'Unknown'}: {str(e)}")
        return ""


class ExtractionResult(NamedTuple):
    """Structured result from text and image extraction from various file types."""

    text_content: str
    embedded_images: Sequence[tuple[bytes, str]]
    metadata: dict[str, Any]


def extract_result_from_text_file(file: IO[Any]) -> ExtractionResult:
    encoding = detect_encoding(file)
    text_content_raw, file_metadata = read_text_file(
        file, encoding=encoding, ignore_onyx_metadata=False
    )
    return ExtractionResult(
        text_content=text_content_raw,
        embedded_images=[],
        metadata=file_metadata,
    )


def extract_text_and_images(
    file: IO[Any],
    file_name: str,
    pdf_pass: str | None = None,
    content_type: str | None = None,
    image_callback: Callable[[bytes, str], None] | None = None,
) -> ExtractionResult:
    """
    Primary new function for the updated connector.
    Returns structured extraction result with text content, embedded images, and metadata.

    Args:
        file: File-like object to extract content from.
        file_name: Name of the file (used to determine extension/type).
        pdf_pass: Optional password for encrypted PDFs.
        content_type: Optional MIME type override for the file.
        image_callback: Optional callback for streaming image extraction. When provided,
            embedded images are passed to this callback one at a time as (bytes, filename)
            instead of being accumulated in the returned ExtractionResult.embedded_images
            list. This is a memory optimization for large documents with many images -
            the caller can process/store each image immediately rather than holding all
            images in memory. When using a callback, ExtractionResult.embedded_images
            will be an empty list.

    Returns:
        ExtractionResult containing text_content, embedded_images (empty if callback used),
        and metadata extracted from the file.
    """
    res = _extract_text_and_images(
        file, file_name, pdf_pass, content_type, image_callback
    )
    # Clean up any temporary objects and force garbage collection
    unreachable = gc.collect()
    logger.info(f"Unreachable objects: {unreachable}")

    return res


def _extract_text_and_images(
    file: IO[Any],
    file_name: str,
    pdf_pass: str | None = None,
    content_type: str | None = None,
    image_callback: Callable[[bytes, str], None] | None = None,
) -> ExtractionResult:
    file.seek(0)

    if get_unstructured_api_key():
        try:
            text_content = unstructured_to_text(file, file_name)
            return ExtractionResult(
                text_content=text_content, embedded_images=[], metadata={}
            )
        except Exception as e:
            logger.error(
                f"Failed to process with Unstructured: {str(e)}. Falling back to normal processing."
            )
            file.seek(0)  # Reset file pointer just in case

    # When we upload a document via a connector or MyDocuments, we extract and store the content of files
    # with content types in UploadMimeTypes.DOCUMENT_MIME_TYPES as plain text files.
    # As a result, the file name extension may differ from the original content type.
    # We process files with a plain text content type first to handle this scenario.
    if content_type in OnyxMimeTypes.TEXT_MIME_TYPES:
        return extract_result_from_text_file(file)

    # Default processing
    try:
        extension = get_file_ext(file_name)
        # docx example for embedded images
        if extension == ".docx":
            text_content, images = read_docx_file(
                file, file_name, extract_images=True, image_callback=image_callback
            )
            return ExtractionResult(
                text_content=text_content, embedded_images=images, metadata={}
            )

        # PDF example: we do not show complicated PDF image extraction here
        # so we simply extract text for now and skip images.
        if extension == ".pdf":
            text_content, pdf_metadata, images = read_pdf_file(
                file,
                pdf_pass,
                extract_images=get_image_extraction_and_analysis_enabled(),
                image_callback=image_callback,
            )
            return ExtractionResult(
                text_content=text_content, embedded_images=images, metadata=pdf_metadata
            )

        # For PPTX, XLSX, EML, etc., we do not show embedded image logic here.
        # You can do something similar to docx if needed.
        if extension == ".pptx":
            return ExtractionResult(
                text_content=pptx_to_text(file, file_name=file_name),
                embedded_images=[],
                metadata={},
            )

        if extension == ".xlsx":
            return ExtractionResult(
                text_content=xlsx_to_text(file, file_name=file_name),
                embedded_images=[],
                metadata={},
            )

        if extension == ".eml":
            return ExtractionResult(
                text_content=eml_to_text(file), embedded_images=[], metadata={}
            )

        if extension == ".epub":
            return ExtractionResult(
                text_content=epub_to_text(file), embedded_images=[], metadata={}
            )

        if extension == ".html":
            return ExtractionResult(
                text_content=parse_html_page_basic(file),
                embedded_images=[],
                metadata={},
            )

        # If we reach here and it's a recognized text extension
        if extension in OnyxFileExtensions.PLAIN_TEXT_EXTENSIONS:
            return extract_result_from_text_file(file)

        # If it's an image file or something else, we do not parse embedded images from them
        # just return empty text
        return ExtractionResult(text_content="", embedded_images=[], metadata={})

    except Exception as e:
        logger.exception(f"Failed to extract text/images from {file_name}: {e}")
        return ExtractionResult(text_content="", embedded_images=[], metadata={})


def docx_to_txt_filename(file_path: str) -> str:
    return file_path.rsplit(".", 1)[0] + ".txt"


================================================
FILE: backend/onyx/file_processing/file_types.py
================================================
PRESENTATION_MIME_TYPE = (
    "application/vnd.openxmlformats-officedocument.presentationml.presentation"
)

SPREADSHEET_MIME_TYPE = (
    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
WORD_PROCESSING_MIME_TYPE = (
    "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
PDF_MIME_TYPE = "application/pdf"
PLAIN_TEXT_MIME_TYPE = "text/plain"


class OnyxMimeTypes:
    IMAGE_MIME_TYPES = {"image/jpg", "image/jpeg", "image/png", "image/webp"}
    CSV_MIME_TYPES = {"text/csv"}
    TABULAR_MIME_TYPES = CSV_MIME_TYPES | {SPREADSHEET_MIME_TYPE}
    TEXT_MIME_TYPES = {
        PLAIN_TEXT_MIME_TYPE,
        "text/markdown",
        "text/x-markdown",
        "text/x-log",
        "text/x-config",
        "text/tab-separated-values",
        "application/json",
        "application/xml",
        "text/xml",
        "application/x-yaml",
        "application/yaml",
        "text/yaml",
        "text/x-yaml",
    }
    DOCUMENT_MIME_TYPES = {
        PDF_MIME_TYPE,
        WORD_PROCESSING_MIME_TYPE,
        PRESENTATION_MIME_TYPE,
        "message/rfc822",
        "application/epub+zip",
    }

    ALLOWED_MIME_TYPES = IMAGE_MIME_TYPES.union(
        TEXT_MIME_TYPES, DOCUMENT_MIME_TYPES, TABULAR_MIME_TYPES
    )

    EXCLUDED_IMAGE_TYPES = {
        "image/bmp",
        "image/tiff",
        "image/gif",
        "image/svg+xml",
        "image/avif",
    }


class OnyxFileExtensions:
    TABULAR_EXTENSIONS = {
        ".csv",
        ".tsv",
        ".xlsx",
    }
    PLAIN_TEXT_EXTENSIONS = {
        ".txt",
        ".md",
        ".mdx",
        ".conf",
        ".log",
        ".json",
        ".csv",
        ".tsv",
        ".xml",
        ".yml",
        ".yaml",
        ".sql",
    }
    DOCUMENT_EXTENSIONS = {
        ".pdf",
        ".docx",
        ".pptx",
        ".xlsx",
        ".eml",
        ".epub",
        ".html",
    }
    IMAGE_EXTENSIONS = {
        ".png",
        ".jpg",
        ".jpeg",
        ".webp",
    }

    TEXT_AND_DOCUMENT_EXTENSIONS = PLAIN_TEXT_EXTENSIONS.union(DOCUMENT_EXTENSIONS)

    ALL_ALLOWED_EXTENSIONS = TEXT_AND_DOCUMENT_EXTENSIONS.union(IMAGE_EXTENSIONS)


================================================
FILE: backend/onyx/file_processing/html_utils.py
================================================
import re
from copy import copy
from dataclasses import dataclass
from io import BytesIO
from typing import IO

import bs4

from onyx.configs.app_configs import HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY
from onyx.configs.app_configs import PARSE_WITH_TRAFILATURA
from onyx.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
from onyx.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
from onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy
from onyx.utils.logger import setup_logger

logger = setup_logger()

MINTLIFY_UNWANTED = ["sticky", "hidden"]


@dataclass
class ParsedHTML:
    title: str | None
    cleaned_text: str


def strip_excessive_newlines_and_spaces(document: str) -> str:
    # collapse repeated spaces into one
    document = re.sub(r" +", " ", document)
    # remove trailing spaces
    document = re.sub(r" +[\n\r]", "\n", document)
    # remove repeated newlines
    document = re.sub(r"[\n\r]+", "\n", document)
    return document.strip()


def strip_newlines(document: str) -> str:
    # HTML might contain newlines which are just whitespaces to a browser
    return re.sub(r"[\n\r]+", " ", document)


def format_element_text(element_text: str, link_href: str | None) -> str:
    element_text_no_newlines = strip_newlines(element_text)

    if (
        not link_href
        or HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY
        == HtmlBasedConnectorTransformLinksStrategy.STRIP
    ):
        return element_text_no_newlines

    return f"[{element_text_no_newlines}]({link_href})"


def parse_html_with_trafilatura(html_content: str) -> str:
    """Parse HTML content using trafilatura."""
    import trafilatura  # type: ignore
    from trafilatura.settings import use_config  # type: ignore

    config = use_config()
    config.set("DEFAULT", "include_links", "True")
    config.set("DEFAULT", "include_tables", "True")
    config.set("DEFAULT", "include_images", "True")
    config.set("DEFAULT", "include_formatting", "True")

    extracted_text = trafilatura.extract(html_content, config=config)
    return strip_excessive_newlines_and_spaces(extracted_text) if extracted_text else ""


def format_document_soup(
    document: bs4.BeautifulSoup, table_cell_separator: str = "\t"
) -> str:
    """Format html to a flat text document.

    The following goals:
    - Newlines from within the HTML are removed (as browser would ignore them as well).
    - Repeated newlines/spaces are removed (as browsers would ignore them).
    - Newlines only before and after headlines and paragraphs or when explicit (br or pre tag)
    - Table columns/rows are separated by newline
    - List elements are separated by newline and start with a hyphen
    """
    text = ""
    list_element_start = False
    verbatim_output = 0
    in_table = False
    last_added_newline = False
    link_href: str | None = None

    for e in document.descendants:
        verbatim_output -= 1
        if isinstance(e, bs4.element.NavigableString):
            if isinstance(e, (bs4.element.Comment, bs4.element.Doctype)):
                continue
            element_text = e.text
            if in_table:
                # Tables are represented in natural language with rows separated by newlines
                # Can't have newlines then in the table elements
                element_text = element_text.replace("\n", " ").strip()

            # Some tags are translated to spaces but in the logic underneath this section, we
            # translate them to newlines as a browser should render them such as with br
            # This logic here avoids a space after newline when it shouldn't be there.
            if last_added_newline and element_text.startswith(" "):
                element_text = element_text[1:]
                last_added_newline = False

            if element_text:
                content_to_add = (
                    element_text
                    if verbatim_output > 0
                    else format_element_text(element_text, link_href)
                )

                # Don't join separate elements without any spacing
                if (text and not text[-1].isspace()) and (
                    content_to_add and not content_to_add[0].isspace()
                ):
                    text += " "

                text += content_to_add

                list_element_start = False
        elif isinstance(e, bs4.element.Tag):
            # table is standard HTML element
            if e.name == "table":
                in_table = True
            # tr is for rows
            elif e.name == "tr" and in_table:
                text += "\n"
            # td for data cell, th for header
            elif e.name in ["td", "th"] and in_table:
                text += table_cell_separator
            elif e.name == "/table":
                in_table = False
            elif in_table:
                # don't handle other cases while in table
                pass
            elif e.name == "a":
                href_value = e.get("href", None)
                # mostly for typing, having multiple hrefs is not valid HTML
                link_href = (
                    href_value[0] if isinstance(href_value, list) else href_value
                )
            elif e.name == "/a":
                link_href = None
            elif e.name in ["p", "div"]:
                if not list_element_start:
                    text += "\n"
            elif e.name in ["h1", "h2", "h3", "h4"]:
                text += "\n"
                list_element_start = False
                last_added_newline = True
            elif e.name == "br":
                text += "\n"
                list_element_start = False
                last_added_newline = True
            elif e.name == "li":
                text += "\n- "
                list_element_start = True
            elif e.name == "pre":
                if verbatim_output <= 0:
                    verbatim_output = len(list(e.childGenerator()))
    return strip_excessive_newlines_and_spaces(text)


def parse_html_page_basic(text: str | BytesIO | IO[bytes]) -> str:
    soup = bs4.BeautifulSoup(text, "lxml")
    return format_document_soup(soup)


def web_html_cleanup(
    page_content: str | bs4.BeautifulSoup,
    mintlify_cleanup_enabled: bool = True,
    additional_element_types_to_discard: list[str] | None = None,
) -> ParsedHTML:
    if isinstance(page_content, str):
        soup = bs4.BeautifulSoup(page_content, "lxml")
    else:
        soup = page_content

    title_tag = soup.find("title")
    title = None
    if title_tag and title_tag.text:
        title = title_tag.text
        title_tag.extract()

    # Heuristics based cleaning of elements based on css classes
    unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)
    if mintlify_cleanup_enabled:
        unwanted_classes.extend(MINTLIFY_UNWANTED)
    for undesired_element in unwanted_classes:
        [
            tag.extract()
            for tag in soup.find_all(
                class_=lambda x: x and undesired_element in x.split()
            )
        ]

    for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:
        [tag.extract() for tag in soup.find_all(undesired_tag)]

    if additional_element_types_to_discard:
        for undesired_tag in additional_element_types_to_discard:
            [tag.extract() for tag in soup.find_all(undesired_tag)]

    soup_string = str(soup)
    page_text = ""

    if PARSE_WITH_TRAFILATURA:
        try:
            page_text = parse_html_with_trafilatura(soup_string)
            if not page_text:
                raise ValueError("Empty content returned by trafilatura.")
        except Exception as e:
            logger.info(f"Trafilatura parsing failed: {e}. Falling back on bs4.")
            page_text = format_document_soup(soup)
    else:
        page_text = format_document_soup(soup)

    # 200B is ZeroWidthSpace which we don't care for
    cleaned_text = page_text.replace("\u200b", "")

    return ParsedHTML(title=title, cleaned_text=cleaned_text)


================================================
FILE: backend/onyx/file_processing/image_summarization.py
================================================
import base64
from io import BytesIO

from PIL import Image

from onyx.configs.app_configs import IMAGE_SUMMARIZATION_SYSTEM_PROMPT
from onyx.configs.app_configs import IMAGE_SUMMARIZATION_USER_PROMPT
from onyx.llm.interfaces import LLM
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.models import ContentPart
from onyx.llm.models import ImageContentPart
from onyx.llm.models import ImageUrlDetail
from onyx.llm.models import SystemMessage
from onyx.llm.models import TextContentPart
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.tracing.llm_utils import llm_generation_span
from onyx.tracing.llm_utils import record_llm_response
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger

logger = setup_logger()


class UnsupportedImageFormatError(ValueError):
    """Raised when an image uses a MIME type unsupported by the summarization flow."""


def prepare_image_bytes(image_data: bytes) -> str:
    """Prepare image bytes for summarization.
    Resizes image if it's larger than 20MB. Encodes image as a base64 string."""
    image_data = _resize_image_if_needed(image_data)

    # encode image (base64)
    encoded_image = _encode_image_for_llm_prompt(image_data)

    return encoded_image


def summarize_image_pipeline(
    llm: LLM,
    image_data: bytes,
    query: str | None = None,
    system_prompt: str | None = None,
) -> str:
    """Pipeline to generate a summary of an image.
    Resizes images if it is bigger than 20MB. Encodes image as a base64 string.
    And finally uses the Default LLM to generate a textual summary of the image."""
    # resize image if it's bigger than 20MB
    encoded_image = prepare_image_bytes(image_data)

    summary = _summarize_image(
        encoded_image,
        llm,
        query,
        system_prompt,
    )

    return summary


def summarize_image_with_error_handling(
    llm: LLM | None,
    image_data: bytes,
    context_name: str,
    system_prompt: str = IMAGE_SUMMARIZATION_SYSTEM_PROMPT,
    user_prompt_template: str = IMAGE_SUMMARIZATION_USER_PROMPT,
) -> str | None:
    """Wrapper function that handles error cases and configuration consistently.

    Args:
        llm: The LLM with vision capabilities to use for summarization
        image_data: The raw image bytes
        context_name: Name or title of the image for context
        system_prompt: System prompt to use for the LLM
        user_prompt_template: User prompt to use (without title)

    Returns:
        The image summary text, or None if summarization failed or is disabled
    """
    if llm is None:
        return None

    # Prepend the image filename to the user prompt
    user_prompt = (
        f"The image has the file name '{context_name}'.\n{user_prompt_template}"
    )
    try:
        return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt)
    except UnsupportedImageFormatError:
        magic_hex = image_data[:8].hex() if image_data else "empty"
        logger.info(
            "Skipping image summarization due to unsupported MIME type "
            "for %s (magic_bytes=%s, size=%d bytes)",
            context_name,
            magic_hex,
            len(image_data),
        )
        return None


def _summarize_image(
    encoded_image: str,
    llm: LLM,
    query: str | None = None,
    system_prompt: str | None = None,
) -> str:
    """Use default LLM (if it is multimodal) to generate a summary of an image."""

    messages: list[ChatCompletionMessage] = []

    if system_prompt:
        messages.append(SystemMessage(content=system_prompt))

    content: list[ContentPart] = []
    if query:
        content.append(TextContentPart(text=query))
    content.append(ImageContentPart(image_url=ImageUrlDetail(url=encoded_image)))

    messages.append(
        UserMessage(
            content=content,
        ),
    )

    try:
        # Call LLM with Braintrust tracing
        with llm_generation_span(
            llm=llm,
            flow="image_summarization",
            input_messages=[{"type": "image_summarization_request"}],
        ) as span_generation:
            # Note: We don't include the actual image in the span input to avoid bloating traces
            response = llm.invoke(messages)
            record_llm_response(span_generation, response)
            summary = llm_response_to_string(response)

        return summary

    except Exception as e:
        # Extract structured details from LiteLLM exceptions when available,
        # rather than dumping the full messages payload (which contains base64
        # image data and produces enormous, unreadable error logs).
        str_e = str(e)
        if len(str_e) > 512:
            str_e = str_e[:512] + "... (truncated)"
        parts = [f"Summarization failed: {type(e).__name__}: {str_e}"]
        status_code = getattr(e, "status_code", None)
        llm_provider = getattr(e, "llm_provider", None)
        model = getattr(e, "model", None)
        if status_code is not None:
            parts.append(f"status_code={status_code}")
        if llm_provider is not None:
            parts.append(f"llm_provider={llm_provider}")
        if model is not None:
            parts.append(f"model={model}")
        raise ValueError(" | ".join(parts)) from e


def _encode_image_for_llm_prompt(image_data: bytes) -> str:
    """Prepare a data URL with the correct MIME type for the LLM message."""
    try:
        mime_type = get_image_type_from_bytes(image_data)
    except ValueError as exc:
        raise UnsupportedImageFormatError(
            "Unsupported image format for summarization"
        ) from exc

    base64_encoded_data = base64.b64encode(image_data).decode("utf-8")

    return f"data:{mime_type};base64,{base64_encoded_data}"


def _resize_image_if_needed(image_data: bytes, max_size_mb: int = 20) -> bytes:
    """Resize image if it's larger than the specified max size in MB."""
    max_size_bytes = max_size_mb * 1024 * 1024

    if len(image_data) > max_size_bytes:
        with Image.open(BytesIO(image_data)) as img:
            # Reduce dimensions for better size reduction
            img.thumbnail((1024, 1024), Image.Resampling.LANCZOS)
            output = BytesIO()

            # Save with lower quality for compression
            img.save(output, format="JPEG", quality=85)
            resized_data = output.getvalue()

            return resized_data

    return image_data


================================================
FILE: backend/onyx/file_processing/image_utils.py
================================================
from io import BytesIO
from typing import Tuple

from onyx.configs.constants import FileOrigin
from onyx.connectors.models import ImageSection
from onyx.file_store.file_store import get_default_file_store
from onyx.utils.logger import setup_logger

logger = setup_logger()


def store_image_and_create_section(
    image_data: bytes,
    file_id: str,
    display_name: str,
    link: str | None = None,
    media_type: str = "application/octet-stream",
    file_origin: FileOrigin = FileOrigin.OTHER,
) -> Tuple[ImageSection, str | None]:
    """
    Stores an image in FileStore and creates an ImageSection object without summarization.

    Args:
        image_data: Raw image bytes
        file_id: Base identifier for the file
        display_name: Human-readable name for the image
        media_type: MIME type of the image
        file_origin: Origin of the file (e.g., CONFLUENCE, GOOGLE_DRIVE, etc.)

    Returns:
        Tuple containing:
        - ImageSection object with image reference
        - The file_id in FileStore or None if storage failed
    """
    # Storage logic
    try:
        file_store = get_default_file_store()
        file_id = file_store.save_file(
            content=BytesIO(image_data),
            display_name=display_name,
            file_origin=file_origin,
            file_type=media_type,
            file_id=file_id,
        )
    except Exception as e:
        logger.error(f"Failed to store image: {e}")
        raise e

    # Create an ImageSection with empty text (will be filled by LLM later in the pipeline)
    return (
        ImageSection(image_file_id=file_id, link=link),
        file_id,
    )


================================================
FILE: backend/onyx/file_processing/password_validation.py
================================================
from collections.abc import Callable
from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from typing import IO

from onyx.file_processing.extract_file_text import get_file_ext
from onyx.utils.logger import setup_logger

logger = setup_logger()

PASSWORD_PROTECTED_FILES = [
    ".pdf",
    ".docx",
    ".pptx",
    ".xlsx",
]


@contextmanager
def preserve_position(file: IO[Any]) -> Generator[IO[Any], None, None]:
    """Preserves the file's cursor position"""
    pos = file.tell()
    try:
        file.seek(0)
        yield file
    finally:
        file.seek(pos)


def is_pdf_protected(file: IO[Any]) -> bool:
    from pypdf import PdfReader

    with preserve_position(file):
        reader = PdfReader(file)

    return bool(reader.is_encrypted)


def is_docx_protected(file: IO[Any]) -> bool:
    return is_office_file_protected(file)


def is_pptx_protected(file: IO[Any]) -> bool:
    return is_office_file_protected(file)


def is_xlsx_protected(file: IO[Any]) -> bool:
    return is_office_file_protected(file)


def is_office_file_protected(file: IO[Any]) -> bool:
    import msoffcrypto  # type: ignore[import-untyped]

    with preserve_position(file):
        office = msoffcrypto.OfficeFile(file)

    return office.is_encrypted()


def is_file_password_protected(
    file: IO[Any],
    file_name: str,
    extension: str | None = None,
) -> bool:
    extension_to_function: dict[str, Callable[[IO[Any]], bool]] = {
        ".pdf": is_pdf_protected,
        ".docx": is_docx_protected,
        ".pptx": is_pptx_protected,
        ".xlsx": is_xlsx_protected,
    }

    if not extension:
        extension = get_file_ext(file_name)

    if extension not in PASSWORD_PROTECTED_FILES:
        return False

    if extension not in extension_to_function:
        logger.warning(
            f"Extension={extension} can be password protected, but no function found"
        )
        return False

    func = extension_to_function[extension]

    return func(file)


================================================
FILE: backend/onyx/file_processing/unstructured.py
================================================
from typing import Any
from typing import cast
from typing import IO
from typing import TYPE_CHECKING

from onyx.configs.constants import KV_UNSTRUCTURED_API_KEY
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.utils.logger import setup_logger

if TYPE_CHECKING:
    from unstructured_client.models import operations


logger = setup_logger()


def get_unstructured_api_key() -> str | None:
    kv_store = get_kv_store()
    try:
        return cast(str, kv_store.load(KV_UNSTRUCTURED_API_KEY))
    except KvKeyNotFoundError:
        return None


def update_unstructured_api_key(api_key: str) -> None:
    kv_store = get_kv_store()
    kv_store.store(KV_UNSTRUCTURED_API_KEY, api_key)


def delete_unstructured_api_key() -> None:
    kv_store = get_kv_store()
    kv_store.delete(KV_UNSTRUCTURED_API_KEY)


def _sdk_partition_request(
    file: IO[Any], file_name: str, **kwargs: Any
) -> "operations.PartitionRequest":
    from unstructured_client.models import operations
    from unstructured_client.models import shared

    file.seek(0, 0)
    try:
        request = operations.PartitionRequest(
            partition_parameters=shared.PartitionParameters(
                files=shared.Files(content=file.read(), file_name=file_name),
                **kwargs,
            ),
        )
        return request
    except Exception as e:
        logger.error(f"Error creating partition request for file {file_name}: {str(e)}")
        raise


def unstructured_to_text(file: IO[Any], file_name: str) -> str:
    from unstructured.staging.base import dict_to_elements
    from unstructured_client import UnstructuredClient

    logger.debug(f"Starting to read file: {file_name}")
    req = _sdk_partition_request(file, file_name, strategy="fast")

    unstructured_client = UnstructuredClient(api_key_auth=get_unstructured_api_key())

    response = unstructured_client.general.partition(request=req)

    if response.status_code != 200:
        err = f"Received unexpected status code {response.status_code} from Unstructured API."
        logger.error(err)
        raise ValueError(err)

    elements = dict_to_elements(response.elements or [])
    return "\n\n".join(str(el) for el in elements)


================================================
FILE: backend/onyx/file_store/README.md
================================================
# Onyx File Store

The Onyx file store provides a unified interface for storing files and large binary objects in S3-compatible storage systems. It supports AWS S3, MinIO, Azure Blob Storage, Digital Ocean Spaces, and other S3-compatible services.

## Architecture

The file store uses a single database table (`file_record`) to store file metadata while the actual file content is stored in external S3-compatible storage. This approach provides scalability, cost-effectiveness, and decouples file storage from the database.

### Database Schema

The `file_record` table contains the following columns:

- `file_id` (primary key): Unique identifier for the file
- `display_name`: Human-readable name for the file
- `file_origin`: Origin/source of the file (enum)
- `file_type`: MIME type of the file
- `file_metadata`: Additional metadata as JSON
- `bucket_name`: External storage bucket/container name
- `object_key`: External storage object key/path
- `created_at`: Timestamp when the file was created
- `updated_at`: Timestamp when the file was last updated

## Storage Backend

### S3-Compatible Storage

Stores files in external S3-compatible storage systems while keeping metadata in the database.

**Pros:**
- Scalable storage
- Cost-effective for large files
- CDN integration possible
- Decoupled from database
- Wide ecosystem support

**Cons:**
- Additional infrastructure required
- Network dependency
- Eventual consistency considerations

## Configuration

All configuration is handled via environment variables. The system requires S3-compatible storage to be configured.

### AWS S3

```bash
S3_FILE_STORE_BUCKET_NAME=your-bucket-name  # Defaults to 'onyx-file-store-bucket'
S3_FILE_STORE_PREFIX=onyx-files  # Optional, defaults to 'onyx-files'

# AWS credentials (use one of these methods):
# 1. Environment variables
S3_AWS_ACCESS_KEY_ID=your-access-key
S3_AWS_SECRET_ACCESS_KEY=your-secret-key
AWS_REGION_NAME=us-east-2  # Optional, defaults to 'us-east-2'

# 2. IAM roles (recommended for EC2/ECS deployments)
# No additional configuration needed if using IAM roles
```

### MinIO

```bash
S3_FILE_STORE_BUCKET_NAME=your-bucket-name
S3_ENDPOINT_URL=http://localhost:9000  # MinIO endpoint
S3_AWS_ACCESS_KEY_ID=minioadmin
S3_AWS_SECRET_ACCESS_KEY=minioadmin
AWS_REGION_NAME=us-east-1  # Any region name
S3_VERIFY_SSL=false  # Optional, defaults to false
```

### Digital Ocean Spaces

```bash
S3_FILE_STORE_BUCKET_NAME=your-space-name
S3_ENDPOINT_URL=https://nyc3.digitaloceanspaces.com
S3_AWS_ACCESS_KEY_ID=your-spaces-key
S3_AWS_SECRET_ACCESS_KEY=your-spaces-secret
AWS_REGION_NAME=nyc3
```

### Other S3-Compatible Services

The file store works with any S3-compatible service. Simply configure:
- `S3_FILE_STORE_BUCKET_NAME`: Your bucket/container name
- `S3_ENDPOINT_URL`: The service endpoint URL
- `S3_AWS_ACCESS_KEY_ID` and `S3_AWS_SECRET_ACCESS_KEY`: Your credentials
- `AWS_REGION_NAME`: The region (any valid region name)

## Implementation

The system uses the `S3BackedFileStore` class that implements the abstract `FileStore` interface. The database uses generic column names (`bucket_name`, `object_key`) to maintain compatibility with different S3-compatible services.

### File Store Interface

The `FileStore` abstract base class defines the following methods:

- `initialize()`: Initialize the storage backend (create bucket if needed)
- `has_file(file_id, file_origin, file_type)`: Check if a file exists
- `save_file(content, display_name, file_origin, file_type, file_metadata, file_id)`: Save a file
- `read_file(file_id, mode, use_tempfile)`: Read file content
- `read_file_record(file_id)`: Get file metadata from database
- `delete_file(file_id)`: Delete a file and its metadata
- `get_file_with_mime_type(file_id)`: Get file with parsed MIME type

## Usage Example

```python
from onyx.file_store.file_store import get_default_file_store
from onyx.configs.constants import FileOrigin

# Get the configured file store
file_store = get_default_file_store(db_session)

# Initialize the storage backend (creates bucket if needed)
file_store.initialize()

# Save a file
with open("example.pdf", "rb") as f:
    file_id = file_store.save_file(
        content=f,
        display_name="Important Document.pdf",
        file_origin=FileOrigin.OTHER,
        file_type="application/pdf",
        file_metadata={"department": "engineering", "version": "1.0"}
    )

# Check if a file exists
exists = file_store.has_file(
    file_id=file_id,
    file_origin=FileOrigin.OTHER,
    file_type="application/pdf"
)

# Read a file
file_content = file_store.read_file(file_id)

# Read file with temporary file (for large files)
file_content = file_store.read_file(file_id, use_tempfile=True)

# Get file metadata
file_record = file_store.read_file_record(file_id)

# Get file with MIME type detection
file_with_mime = file_store.get_file_with_mime_type(file_id)

# Delete a file
file_store.delete_file(file_id)
```

## Initialization

When deploying the application, ensure that:

1. The S3-compatible storage service is accessible
2. Credentials are properly configured
3. The bucket specified in `S3_FILE_STORE_BUCKET_NAME` exists or the service account has permissions to create it
4. Call `file_store.initialize()` during application startup to ensure the bucket exists

The file store will automatically create the bucket if it doesn't exist and the credentials have sufficient permissions.
 

================================================
FILE: backend/onyx/file_store/constants.py
================================================
MAX_IN_MEMORY_SIZE = 30 * 1024 * 1024  # 30MB
STANDARD_CHUNK_SIZE = 10 * 1024 * 1024  # 10MB chunks


================================================
FILE: backend/onyx/file_store/document_batch_storage.py
================================================
import json
from abc import ABC
from abc import abstractmethod
from enum import Enum
from io import StringIO
from typing import List
from typing import Optional
from typing import TypeAlias

from pydantic import BaseModel

from onyx.configs.constants import FileOrigin
from onyx.connectors.models import DocExtractionContext
from onyx.connectors.models import DocIndexingContext
from onyx.connectors.models import Document
from onyx.file_store.file_store import FileStore
from onyx.file_store.file_store import get_default_file_store
from onyx.utils.logger import setup_logger

logger = setup_logger()


class DocumentBatchStorageStateType(str, Enum):
    EXTRACTION = "extraction"
    INDEXING = "indexing"


DocumentStorageState: TypeAlias = DocExtractionContext | DocIndexingContext

STATE_TYPE_TO_MODEL: dict[str, type[DocumentStorageState]] = {
    DocumentBatchStorageStateType.EXTRACTION.value: DocExtractionContext,
    DocumentBatchStorageStateType.INDEXING.value: DocIndexingContext,
}


class BatchStoragePathInfo(BaseModel):
    cc_pair_id: int
    index_attempt_id: int
    batch_num: int


class DocumentBatchStorage(ABC):
    """Abstract base class for document batch storage implementations."""

    def __init__(self, cc_pair_id: int, index_attempt_id: int):
        self.cc_pair_id = cc_pair_id
        self.index_attempt_id = index_attempt_id
        self.base_path = f"{self._per_cc_pair_base_path()}/{index_attempt_id}"

    @abstractmethod
    def store_batch(self, batch_num: int, documents: List[Document]) -> None:
        """Store a batch of documents."""

    @abstractmethod
    def get_batch(self, batch_num: int) -> Optional[List[Document]]:
        """Retrieve a batch of documents."""

    @abstractmethod
    def delete_batch_by_name(self, batch_file_name: str) -> None:
        """Delete a specific batch."""

    @abstractmethod
    def delete_batch_by_num(self, batch_num: int) -> None:
        """Delete a specific batch."""

    @abstractmethod
    def cleanup_all_batches(self) -> None:
        """Clean up all batches and state for this index attempt."""

    @abstractmethod
    def get_all_batches_for_cc_pair(self) -> list[str]:
        """Get all IDs of batches stored in the file store."""

    @abstractmethod
    def update_old_batches_to_new_index_attempt(self, batch_names: list[str]) -> None:
        """Update all batches to the new index attempt."""
        """
        This is used when we need to re-issue docprocessing tasks for a new index attempt.
        We need to update the batch file names to the new index attempt ID.
        """

    @abstractmethod
    def extract_path_info(self, path: str) -> BatchStoragePathInfo | None:
        """Extract path info from a path."""

    def _serialize_documents(self, documents: list[Document]) -> str:
        """Serialize documents to JSON string."""
        # Use mode='json' to properly serialize datetime and other complex types
        return json.dumps([doc.model_dump(mode="json") for doc in documents], indent=2)

    def _deserialize_documents(self, data: str) -> list[Document]:
        """Deserialize documents from JSON string."""
        doc_dicts = json.loads(data)
        return [
            Document.model_validate(self._normalize_doc_dict(doc_dict))
            for doc_dict in doc_dicts
        ]

    def _normalize_doc_dict(self, doc_dict: dict) -> dict:
        """Normalize document dict to handle legacy data with non-string metadata values.

        Before the _convert_to_metadata_value fix, Salesforce connector stored raw
        types (bool, float, None) in metadata. This converts them to strings for
        backward compatibility.
        """
        if "metadata" not in doc_dict:
            return doc_dict

        metadata = doc_dict["metadata"]
        if not isinstance(metadata, dict):
            return doc_dict

        normalized_metadata: dict[str, str | list[str]] = {}
        converted_keys: list[str] = []
        for key, value in metadata.items():
            if isinstance(value, list):
                normalized_metadata[key] = [str(item) for item in value]
            elif isinstance(value, str):
                normalized_metadata[key] = value
            else:
                # Convert bool, int, float, None to string
                converted_keys.append(f"{key}={type(value).__name__}")
                normalized_metadata[key] = str(value)

        if converted_keys:
            doc_id = doc_dict.get("id", "unknown")
            logger.warning(
                f"Normalized legacy metadata for document {doc_id}: {converted_keys}"
            )

        doc_dict["metadata"] = normalized_metadata
        return doc_dict

    def _per_cc_pair_base_path(self) -> str:
        """Get the base path for the cc pair."""
        return f"iab/{self.cc_pair_id}"


class FileStoreDocumentBatchStorage(DocumentBatchStorage):
    """FileStore-based implementation of document batch storage."""

    def __init__(self, cc_pair_id: int, index_attempt_id: int, file_store: FileStore):
        super().__init__(cc_pair_id, index_attempt_id)
        self.file_store = file_store

    def _get_batch_file_name(self, batch_num: int) -> str:
        """Generate file name for a document batch."""
        return f"{self.base_path}/{batch_num}.json"

    def store_batch(self, batch_num: int, documents: list[Document]) -> None:
        """Store a batch of documents using FileStore."""
        file_name = self._get_batch_file_name(batch_num)
        try:
            data = self._serialize_documents(documents)
            content = StringIO(data)

            self.file_store.save_file(
                file_id=file_name,
                content=content,
                display_name=f"Document Batch {batch_num}",
                file_origin=FileOrigin.OTHER,
                file_type="application/json",
                file_metadata={
                    "batch_num": batch_num,
                    "document_count": str(len(documents)),
                },
            )

            logger.debug(
                f"Stored batch {batch_num} with {len(documents)} documents to FileStore as {file_name}"
            )
        except Exception as e:
            logger.error(f"Failed to store batch {batch_num}: {e}")
            raise

    def get_batch(self, batch_num: int) -> list[Document] | None:
        """Retrieve a batch of documents from FileStore."""
        file_name = self._get_batch_file_name(batch_num)
        try:
            # Check if file exists
            if not self.file_store.has_file(
                file_id=file_name,
                file_origin=FileOrigin.OTHER,
                file_type="application/json",
            ):
                logger.warning(
                    f"Batch {batch_num} not found in FileStore with name {file_name}"
                )
                return None

            content_io = self.file_store.read_file(file_name)
            data = content_io.read().decode("utf-8")

            documents = self._deserialize_documents(data)
            logger.debug(
                f"Retrieved batch {batch_num} with {len(documents)} documents from FileStore"
            )
            return documents
        except Exception as e:
            logger.error(f"Failed to retrieve batch {batch_num}: {e}")
            raise

    def delete_batch_by_name(self, batch_file_name: str) -> None:
        """Delete a specific batch from FileStore."""
        self.file_store.delete_file(batch_file_name)
        logger.debug(f"Deleted batch {batch_file_name} from FileStore")

    def delete_batch_by_num(self, batch_num: int) -> None:
        """Delete a specific batch from FileStore."""
        batch_file_name = self._get_batch_file_name(batch_num)
        self.delete_batch_by_name(batch_file_name)
        logger.debug(f"Deleted batch num {batch_num} {batch_file_name} from FileStore")

    def cleanup_all_batches(self) -> None:
        """Clean up all batches for this index attempt."""
        for batch_file_name in self.get_all_batches_for_cc_pair():
            self.delete_batch_by_name(batch_file_name)

    def get_all_batches_for_cc_pair(self) -> list[str]:
        """Get all IDs of batches stored in the file store for the cc pair
        this batch store was initialized with.
        This includes any batches left over from a previous
        indexing attempt that need to be processed.
        """
        return [
            file.file_id
            for file in self.file_store.list_files_by_prefix(
                self._per_cc_pair_base_path()
            )
        ]

    def update_old_batches_to_new_index_attempt(self, batch_names: list[str]) -> None:
        """Update all batches to the new index attempt."""
        for batch_file_name in batch_names:
            path_info = self.extract_path_info(batch_file_name)
            if path_info is None:
                logger.warning(
                    f"Could not extract path info from batch file: {batch_file_name}"
                )
                continue
            new_batch_file_name = self._get_batch_file_name(path_info.batch_num)
            self.file_store.change_file_id(batch_file_name, new_batch_file_name)

    def extract_path_info(self, path: str) -> BatchStoragePathInfo | None:
        """Extract path info from a path."""
        path_spl = path.split("/")
        # TODO: remove this in a few months, just for backwards compatibility
        if len(path_spl) == 3:
            path_spl = ["iab"] + path_spl
        try:
            _, cc_pair_id, index_attempt_id, batch_num = path_spl
            return BatchStoragePathInfo(
                cc_pair_id=int(cc_pair_id),
                index_attempt_id=int(index_attempt_id),
                batch_num=int(batch_num.split(".")[0]),  # remove .json
            )
        except Exception as e:
            logger.error(f"Failed to extract path info from {path}: {e}")
            return None


def get_document_batch_storage(
    cc_pair_id: int, index_attempt_id: int
) -> DocumentBatchStorage:
    """Factory function to get the configured document batch storage implementation."""
    # The get_default_file_store will now correctly use S3BackedFileStore
    # or other configured stores based on environment variables
    file_store = get_default_file_store()
    return FileStoreDocumentBatchStorage(cc_pair_id, index_attempt_id, file_store)


================================================
FILE: backend/onyx/file_store/file_store.py
================================================
import hashlib
import tempfile
import uuid
from abc import ABC
from abc import abstractmethod
from io import BytesIO
from typing import Any
from typing import cast
from typing import IO
from typing import NotRequired
from typing import TypedDict

import boto3
import puremagic
from botocore.config import Config
from botocore.exceptions import ClientError
from mypy_boto3_s3 import S3Client
from sqlalchemy.orm import Session

from onyx.configs.app_configs import AWS_REGION_NAME
from onyx.configs.app_configs import S3_AWS_ACCESS_KEY_ID
from onyx.configs.app_configs import S3_AWS_SECRET_ACCESS_KEY
from onyx.configs.app_configs import S3_ENDPOINT_URL
from onyx.configs.app_configs import S3_FILE_STORE_BUCKET_NAME
from onyx.configs.app_configs import S3_FILE_STORE_PREFIX
from onyx.configs.app_configs import S3_GENERATE_LOCAL_CHECKSUM
from onyx.configs.app_configs import S3_VERIFY_SSL
from onyx.configs.constants import FileOrigin
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import get_session_with_current_tenant_if_none
from onyx.db.file_record import delete_filerecord_by_file_id
from onyx.db.file_record import get_filerecord_by_file_id
from onyx.db.file_record import get_filerecord_by_file_id_optional
from onyx.db.file_record import get_filerecord_by_prefix
from onyx.db.file_record import upsert_filerecord
from onyx.db.models import FileRecord
from onyx.db.models import FileRecord as FileStoreModel
from onyx.file_store.s3_key_utils import generate_s3_key
from onyx.utils.file import FileWithMimeType
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


class S3PutKwargs(TypedDict):
    ChecksumSHA256: NotRequired[str]


class FileStore(ABC):
    """
    An abstraction for storing files and large binary objects.
    """

    @abstractmethod
    def initialize(self) -> None:
        """
        Should generally be called once before any other methods are called.
        """
        raise NotImplementedError

    @abstractmethod
    def has_file(
        self,
        file_id: str,
        file_origin: FileOrigin,
        file_type: str,
    ) -> bool:
        """
        Check if a file exists in the blob store

        Parameters:
        - file_id: Unique ID of the file to check for
        - file_origin: Origin of the file
        - file_type: Type of the file
        """
        raise NotImplementedError

    @abstractmethod
    def save_file(
        self,
        content: IO,
        display_name: str | None,
        file_origin: FileOrigin,
        file_type: str,
        file_metadata: dict[str, Any] | None = None,
        file_id: str | None = None,
    ) -> str:
        """
        Save a file to the blob store

        Parameters:
        - content: Contents of the file
        - display_name: Display name of the file to save
        - file_origin: Origin of the file
        - file_type: Type of the file
        - file_metadata: Additional metadata for the file
        - file_id: Unique ID of the file to save. If not provided, a random UUID will be generated.
                   It is generally NOT recommended to provide this.

        Returns:
            The unique ID of the file that was saved.
        """
        raise NotImplementedError

    @abstractmethod
    def read_file(
        self, file_id: str, mode: str | None = None, use_tempfile: bool = False
    ) -> IO[bytes]:
        """
        Read the content of a given file by the ID

        Parameters:
        - file_id: Unique ID of file to read
        - mode: Mode to open the file (e.g. 'b' for binary)
        - use_tempfile: Whether to use a temporary file to store the contents
                        in order to avoid loading the entire file into memory

        Returns:
            Contents of the file and metadata dict
        """

    @abstractmethod
    def read_file_record(self, file_id: str) -> FileStoreModel:
        """
        Read the file record by the ID
        """

    @abstractmethod
    def get_file_size(
        self, file_id: str, db_session: Session | None = None
    ) -> int | None:
        """
        Get the size of a file in bytes.
        Optionally provide a db_session for database access.
        """

    @abstractmethod
    def delete_file(self, file_id: str) -> None:
        """
        Delete a file by its ID.

        Parameters:
        - file_name: Name of file to delete
        """

    @abstractmethod
    def get_file_with_mime_type(self, file_id: str) -> FileWithMimeType | None:
        """
        Get the file + parse out the mime type.
        """

    @abstractmethod
    def change_file_id(self, old_file_id: str, new_file_id: str) -> None:
        """
        Change the file ID of an existing file.

        Parameters:
        - old_file_id: Current file ID
        - new_file_id: New file ID to assign
        """
        raise NotImplementedError

    @abstractmethod
    def list_files_by_prefix(self, prefix: str) -> list[FileRecord]:
        """
        List all file IDs that start with the given prefix.
        """


class S3BackedFileStore(FileStore):
    """Isn't necessarily S3, but is any S3-compatible storage (e.g. MinIO)"""

    def __init__(
        self,
        bucket_name: str,
        aws_access_key_id: str | None = None,
        aws_secret_access_key: str | None = None,
        aws_region_name: str | None = None,
        s3_endpoint_url: str | None = None,
        s3_prefix: str | None = None,
        s3_verify_ssl: bool = True,
    ) -> None:
        self._s3_client: S3Client | None = None
        self._bucket_name = bucket_name
        self._aws_access_key_id = aws_access_key_id
        self._aws_secret_access_key = aws_secret_access_key
        self._aws_region_name = aws_region_name or "us-east-2"
        self._s3_endpoint_url = s3_endpoint_url
        self._s3_prefix = s3_prefix or "onyx-files"
        self._s3_verify_ssl = s3_verify_ssl

    def _get_s3_client(self) -> S3Client:
        """Initialize S3 client if not already done"""
        if self._s3_client is None:
            try:
                client_kwargs: dict[str, Any] = {
                    "service_name": "s3",
                    "region_name": self._aws_region_name,
                }

                # Add endpoint URL if specified (for MinIO, etc.)
                if self._s3_endpoint_url:
                    client_kwargs["endpoint_url"] = self._s3_endpoint_url
                    client_kwargs["config"] = Config(
                        signature_version="s3v4",
                        s3={"addressing_style": "path"},  # Required for MinIO
                    )
                    # Disable SSL verification if requested (for local development)
                    if not self._s3_verify_ssl:
                        import urllib3

                        urllib3.disable_warnings(
                            urllib3.exceptions.InsecureRequestWarning
                        )
                        client_kwargs["verify"] = False

                if self._aws_access_key_id and self._aws_secret_access_key:
                    # Use explicit credentials
                    client_kwargs.update(
                        {
                            "aws_access_key_id": self._aws_access_key_id,
                            "aws_secret_access_key": self._aws_secret_access_key,
                        }
                    )
                    self._s3_client = boto3.client(**client_kwargs)
                else:
                    # Use IAM role or default credentials (not typically used with MinIO)
                    self._s3_client = boto3.client(**client_kwargs)

            except Exception as e:
                logger.error(f"Failed to initialize S3 client: {e}")
                raise RuntimeError(f"Failed to initialize S3 client: {e}")

        return self._s3_client

    def _get_bucket_name(self) -> str:
        """Get S3 bucket name from configuration"""
        if not self._bucket_name:
            raise RuntimeError("S3 bucket name is required for S3 file store")
        return self._bucket_name

    def _get_s3_key(self, file_name: str) -> str:
        """Generate S3 key from file name with tenant ID prefix"""
        tenant_id = get_current_tenant_id()

        s3_key = generate_s3_key(
            file_name=file_name,
            prefix=self._s3_prefix,
            tenant_id=tenant_id,
            max_key_length=1024,
        )

        # Log if truncation occurred (when the key is exactly at the limit)
        if len(s3_key) == 1024:
            logger.info(f"File name was too long and was truncated: {file_name}")

        return s3_key

    def initialize(self) -> None:
        """Initialize the S3 file store by ensuring the bucket exists"""
        s3_client = self._get_s3_client()
        bucket_name = self._get_bucket_name()

        # Check if bucket exists
        try:
            s3_client.head_bucket(Bucket=bucket_name)
            logger.info(f"S3 bucket '{bucket_name}' already exists")
        except ClientError as e:
            error_code = e.response["Error"]["Code"]
            if error_code == "404":
                # Bucket doesn't exist, create it
                logger.info(f"Creating S3 bucket '{bucket_name}'")

                # For AWS S3, we need to handle region-specific bucket creation
                region = (
                    s3_client._client_config.region_name
                    if hasattr(s3_client, "_client_config")
                    else None
                )

                if region and region != "us-east-1":
                    # For regions other than us-east-1, we need to specify LocationConstraint
                    s3_client.create_bucket(
                        Bucket=bucket_name,
                        CreateBucketConfiguration={"LocationConstraint": region},
                    )
                else:
                    # For us-east-1 or MinIO/other S3-compatible services
                    s3_client.create_bucket(Bucket=bucket_name)

                logger.info(f"Successfully created S3 bucket '{bucket_name}'")
            elif error_code == "403":
                # Bucket exists but we don't have permission to access it
                logger.warning(
                    f"S3 bucket '{bucket_name}' exists but access is forbidden"
                )
                raise RuntimeError(
                    f"Access denied to S3 bucket '{bucket_name}'. Check credentials and permissions."
                )
            else:
                # Some other error occurred
                logger.error(f"Failed to check S3 bucket '{bucket_name}': {e}")
                raise RuntimeError(f"Failed to check S3 bucket '{bucket_name}': {e}")

    def has_file(
        self,
        file_id: str,
        file_origin: FileOrigin,
        file_type: str,
        db_session: Session | None = None,
    ) -> bool:
        with get_session_with_current_tenant_if_none(db_session) as db_session:
            file_record = get_filerecord_by_file_id_optional(
                file_id=file_id, db_session=db_session
            )
        return (
            file_record is not None
            and file_record.file_origin == file_origin
            and file_record.file_type == file_type
        )

    def save_file(
        self,
        content: IO,
        display_name: str | None,
        file_origin: FileOrigin,
        file_type: str,
        file_metadata: dict[str, Any] | None = None,
        file_id: str | None = None,
        db_session: Session | None = None,
    ) -> str:
        if file_id is None:
            file_id = str(uuid.uuid4())

        s3_client = self._get_s3_client()
        bucket_name = self._get_bucket_name()
        s3_key = self._get_s3_key(file_id)

        hash256 = ""
        sha256_hash = hashlib.sha256()
        kwargs: S3PutKwargs = {}

        # FIX: Optimize checksum generation to avoid creating extra copies in memory
        # Read content from IO object
        if hasattr(content, "read"):
            file_content = content.read()
            if S3_GENERATE_LOCAL_CHECKSUM:
                # FIX: Don't convert to string first (creates unnecessary copy)
                # Work directly with bytes
                if isinstance(file_content, bytes):
                    sha256_hash.update(file_content)
                else:
                    sha256_hash.update(str(file_content).encode())
                hash256 = sha256_hash.hexdigest()
                kwargs["ChecksumSHA256"] = hash256
            if hasattr(content, "seek"):
                content.seek(0)  # Reset position for potential re-reads
        else:
            file_content = content

        # Upload to S3

        s3_client.put_object(
            Bucket=bucket_name,
            Key=s3_key,
            Body=file_content,
            ContentType=file_type,
            **kwargs,
        )

        with get_session_with_current_tenant_if_none(db_session) as db_session:
            # Save metadata to database
            upsert_filerecord(
                file_id=file_id,
                display_name=display_name or file_id,
                file_origin=file_origin,
                file_type=file_type,
                bucket_name=bucket_name,
                object_key=s3_key,
                db_session=db_session,
                file_metadata=file_metadata,
            )
            db_session.commit()

        return file_id

    def read_file(
        self,
        file_id: str,
        mode: str | None = None,  # noqa: ARG002
        use_tempfile: bool = False,
        db_session: Session | None = None,
    ) -> IO[bytes]:
        with get_session_with_current_tenant_if_none(db_session) as db_session:
            file_record = get_filerecord_by_file_id(
                file_id=file_id, db_session=db_session
            )

        s3_client = self._get_s3_client()
        try:
            response = s3_client.get_object(
                Bucket=file_record.bucket_name, Key=file_record.object_key
            )
        except ClientError:
            logger.error(f"Failed to read file {file_id} from S3")
            raise

        # FIX: Stream file content instead of loading entire file into memory
        # This prevents OOM issues with large files (500MB+ PDFs, etc.)
        if use_tempfile:
            # Stream directly to temp file to avoid holding entire file in memory
            temp_file = tempfile.NamedTemporaryFile(mode="w+b", delete=True)
            # Stream in 8MB chunks to reduce memory footprint
            for chunk in response["Body"].iter_chunks(chunk_size=8 * 1024 * 1024):
                temp_file.write(chunk)
            temp_file.seek(0)
            return temp_file
        else:
            # For BytesIO, we still need to read into memory (legacy behavior)
            # but at least we're not creating duplicate copies
            file_content = response["Body"].read()
            return BytesIO(file_content)

    def read_file_record(
        self, file_id: str, db_session: Session | None = None
    ) -> FileStoreModel:
        with get_session_with_current_tenant_if_none(db_session) as db_session:
            file_record = get_filerecord_by_file_id(
                file_id=file_id, db_session=db_session
            )
        return file_record

    def get_file_size(
        self, file_id: str, db_session: Session | None = None
    ) -> int | None:
        """
        Get the size of a file in bytes by querying S3 metadata.
        """
        try:
            with get_session_with_current_tenant_if_none(db_session) as db_session:
                file_record = get_filerecord_by_file_id(
                    file_id=file_id, db_session=db_session
                )

            s3_client = self._get_s3_client()
            response = s3_client.head_object(
                Bucket=file_record.bucket_name, Key=file_record.object_key
            )
            return response.get("ContentLength")
        except Exception as e:
            logger.warning(f"Error getting file size for {file_id}: {e}")
            return None

    def delete_file(self, file_id: str, db_session: Session | None = None) -> None:
        with get_session_with_current_tenant_if_none(db_session) as db_session:
            try:
                file_record = get_filerecord_by_file_id(
                    file_id=file_id, db_session=db_session
                )
                if not file_record.bucket_name:
                    logger.error(
                        f"File record {file_id} with key {file_record.object_key} "
                        "has no bucket name, cannot delete from filestore"
                    )
                    delete_filerecord_by_file_id(file_id=file_id, db_session=db_session)
                    db_session.commit()
                    return

                # Delete from external storage
                s3_client = self._get_s3_client()
                try:
                    s3_client.delete_object(
                        Bucket=file_record.bucket_name, Key=file_record.object_key
                    )
                except ClientError as e:
                    # If the object doesn't exist in file store, treat it as success
                    # since the end goal (object not existing) is achieved
                    if e.response.get("Error", {}).get("Code") == "NoSuchKey":
                        logger.warning(
                            f"delete_file: File {file_id} not found in file store (key: {file_record.object_key}), "
                            "cleaning up database record."
                        )
                    else:
                        raise

                # Delete metadata from database
                delete_filerecord_by_file_id(file_id=file_id, db_session=db_session)

                db_session.commit()

            except Exception:
                db_session.rollback()
                raise

    def change_file_id(
        self, old_file_id: str, new_file_id: str, db_session: Session | None = None
    ) -> None:
        with get_session_with_current_tenant_if_none(db_session) as db_session:
            try:
                # Get the existing file record
                old_file_record = get_filerecord_by_file_id(
                    file_id=old_file_id, db_session=db_session
                )

                # Generate new S3 key for the new file ID
                new_s3_key = self._get_s3_key(new_file_id)

                # Copy S3 object to new key
                s3_client = self._get_s3_client()
                bucket_name = self._get_bucket_name()

                copy_source = (
                    f"{old_file_record.bucket_name}/{old_file_record.object_key}"
                )

                s3_client.copy_object(
                    CopySource=copy_source,
                    Bucket=bucket_name,
                    Key=new_s3_key,
                    MetadataDirective="COPY",
                )

                # Create new file record with new file_id
                # Cast file_metadata to the expected type
                file_metadata = cast(
                    dict[Any, Any] | None, old_file_record.file_metadata
                )

                upsert_filerecord(
                    file_id=new_file_id,
                    display_name=old_file_record.display_name,
                    file_origin=old_file_record.file_origin,
                    file_type=old_file_record.file_type,
                    bucket_name=bucket_name,
                    object_key=new_s3_key,
                    db_session=db_session,
                    file_metadata=file_metadata,
                )

                # Delete old S3 object
                s3_client.delete_object(
                    Bucket=old_file_record.bucket_name, Key=old_file_record.object_key
                )

                # Delete old file record
                delete_filerecord_by_file_id(file_id=old_file_id, db_session=db_session)

                db_session.commit()

            except Exception as e:
                db_session.rollback()
                logger.exception(
                    f"Failed to change file ID from {old_file_id} to {new_file_id}: {e}"
                )
                raise

    def get_file_with_mime_type(self, file_id: str) -> FileWithMimeType | None:
        mime_type: str = "application/octet-stream"
        try:
            file_io = self.read_file(file_id, mode="b")
            file_content = file_io.read()
            matches = puremagic.magic_string(file_content)
            if matches:
                mime_type = cast(str, matches[0].mime_type)
            return FileWithMimeType(data=file_content, mime_type=mime_type)
        except Exception:
            return None

    def list_files_by_prefix(self, prefix: str) -> list[FileRecord]:
        """
        List all file IDs that start with the given prefix.
        """
        with get_session_with_current_tenant() as db_session:
            file_records = get_filerecord_by_prefix(
                prefix=prefix, db_session=db_session
            )
        return file_records


def get_s3_file_store() -> S3BackedFileStore:
    """
    Returns the S3 file store implementation.
    """

    # Get bucket name - this is required
    bucket_name = S3_FILE_STORE_BUCKET_NAME
    if not bucket_name:
        raise RuntimeError(
            "S3_FILE_STORE_BUCKET_NAME configuration is required for S3 file store"
        )

    return S3BackedFileStore(
        bucket_name=bucket_name,
        aws_access_key_id=S3_AWS_ACCESS_KEY_ID,
        aws_secret_access_key=S3_AWS_SECRET_ACCESS_KEY,
        aws_region_name=AWS_REGION_NAME,
        s3_endpoint_url=S3_ENDPOINT_URL,
        s3_prefix=S3_FILE_STORE_PREFIX,
        s3_verify_ssl=S3_VERIFY_SSL,
    )


def get_default_file_store() -> FileStore:
    """
    Returns the configured file store implementation based on FILE_STORE_BACKEND.

    When FILE_STORE_BACKEND=postgres (default):
    - Files are stored in PostgreSQL using Large Objects.
    - No external storage service (S3/MinIO) is required.

    When FILE_STORE_BACKEND=s3:
    - Supports AWS S3, MinIO, and other S3-compatible storage.
    - Configuration via environment variables:
      - S3_FILE_STORE_BUCKET_NAME, S3_ENDPOINT_URL, S3_AWS_ACCESS_KEY_ID, etc.
    """
    from onyx.configs.app_configs import FILE_STORE_BACKEND
    from onyx.configs.constants import FileStoreType

    if FileStoreType(FILE_STORE_BACKEND) == FileStoreType.POSTGRES:
        from onyx.file_store.postgres_file_store import PostgresBackedFileStore

        return PostgresBackedFileStore()

    return get_s3_file_store()


================================================
FILE: backend/onyx/file_store/models.py
================================================
import base64
from enum import Enum
from typing import NotRequired
from typing_extensions import TypedDict  # noreorder

from pydantic import BaseModel


class ChatFileType(str, Enum):
    # Image types only contain the binary data
    IMAGE = "image"
    # Doc types are saved as both the binary, and the parsed text
    DOC = "document"
    # Plain text only contain the text
    PLAIN_TEXT = "plain_text"
    # Tabular data files (CSV, XLSX)
    TABULAR = "tabular"

    def is_text_file(self) -> bool:
        return self in (
            ChatFileType.PLAIN_TEXT,
            ChatFileType.DOC,
            ChatFileType.TABULAR,
        )

    def use_metadata_only(self) -> bool:
        """File types where we can ignore the file content
        and only use the metadata."""
        return self in (ChatFileType.TABULAR,)


class FileDescriptor(TypedDict):
    """NOTE: is a `TypedDict` so it can be used as a type hint for a JSONB column
    in Postgres"""

    id: str
    type: ChatFileType
    name: NotRequired[str | None]
    user_file_id: NotRequired[str | None]


class InMemoryChatFile(BaseModel):
    file_id: str
    content: bytes
    file_type: ChatFileType
    filename: str | None = None

    def to_base64(self) -> str:
        if self.file_type == ChatFileType.IMAGE:
            return base64.b64encode(self.content).decode()
        else:
            raise RuntimeError(
                "Should not be trying to convert a non-image file to base64"
            )

    def to_file_descriptor(self) -> FileDescriptor:
        return {
            "id": str(self.file_id),
            "type": self.file_type,
            "name": self.filename,
            "user_file_id": str(self.file_id) if self.file_id else None,
        }


================================================
FILE: backend/onyx/file_store/postgres_file_store.py
================================================
"""PostgreSQL-backed file store using Large Objects.

Stores file content directly in PostgreSQL via the Large Object facility,
eliminating the need for an external S3/MinIO service.
"""

import tempfile
import uuid
from io import BytesIO
from typing import Any
from typing import cast
from typing import IO

import puremagic
from psycopg2.extensions import connection as Psycopg2Connection
from sqlalchemy.orm import Session

from onyx.configs.constants import FileOrigin
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import get_session_with_current_tenant_if_none
from onyx.db.file_content import delete_file_content_by_file_id
from onyx.db.file_content import get_file_content_by_file_id
from onyx.db.file_content import get_file_content_by_file_id_optional
from onyx.db.file_content import transfer_file_content_file_id
from onyx.db.file_content import upsert_file_content
from onyx.db.file_record import delete_filerecord_by_file_id
from onyx.db.file_record import get_filerecord_by_file_id
from onyx.db.file_record import get_filerecord_by_file_id_optional
from onyx.db.file_record import get_filerecord_by_prefix
from onyx.db.file_record import upsert_filerecord
from onyx.db.models import FileRecord
from onyx.db.models import FileRecord as FileStoreModel
from onyx.file_store.file_store import FileStore
from onyx.utils.file import FileWithMimeType
from onyx.utils.logger import setup_logger

logger = setup_logger()

POSTGRES_BUCKET_SENTINEL = "postgres"
STREAM_CHUNK_SIZE = 8 * 1024 * 1024  # 8 MB


def _get_raw_connection(db_session: Session) -> Psycopg2Connection:
    """Extract the raw psycopg2 connection from a SQLAlchemy session."""
    raw_conn = db_session.connection().connection.dbapi_connection
    if raw_conn is None:
        raise ValueError("Failed to get raw connection from session")
    return cast(Psycopg2Connection, raw_conn)


def _create_large_object(raw_conn: Psycopg2Connection, data: bytes) -> int:
    """Create a new Large Object, write data, and return the OID."""
    lobj = raw_conn.lobject(0, "wb")
    lobj.write(data)
    oid: int = lobj.oid
    lobj.close()
    return oid


def _read_large_object(raw_conn: Psycopg2Connection, oid: int) -> bytes:
    """Read all bytes from a Large Object."""
    lobj = raw_conn.lobject(oid, "rb")
    data: bytes = lobj.read()
    lobj.close()
    return data


def _read_large_object_to_tempfile(raw_conn: Psycopg2Connection, oid: int) -> IO[bytes]:
    """Stream a Large Object into a temporary file to avoid OOM on large files."""
    lobj = raw_conn.lobject(oid, "rb")
    temp = tempfile.NamedTemporaryFile(mode="w+b", delete=True)
    while True:
        chunk = lobj.read(STREAM_CHUNK_SIZE)
        if not chunk:
            break
        temp.write(chunk)
    lobj.close()
    temp.seek(0)
    return temp


def _delete_large_object(raw_conn: Any, oid: int) -> None:
    """Unlink (delete) a Large Object by OID."""
    lobj = raw_conn.lobject(oid, "n")
    lobj.unlink()


class PostgresBackedFileStore(FileStore):
    """File store backed entirely by PostgreSQL.

    Metadata lives in `file_record`, content lives in PostgreSQL Large Objects
    with OID references tracked in `file_content`.
    """

    def initialize(self) -> None:
        # Nothing to do — tables are created by Alembic migrations.
        pass

    def has_file(
        self,
        file_id: str,
        file_origin: FileOrigin,
        file_type: str,
        db_session: Session | None = None,
    ) -> bool:
        with get_session_with_current_tenant_if_none(db_session) as session:
            record = get_filerecord_by_file_id_optional(
                file_id=file_id, db_session=session
            )
        return (
            record is not None
            and record.file_origin == file_origin
            and record.file_type == file_type
        )

    def save_file(
        self,
        content: IO,
        display_name: str | None,
        file_origin: FileOrigin,
        file_type: str,
        file_metadata: dict[str, Any] | None = None,
        file_id: str | None = None,
        db_session: Session | None = None,
    ) -> str:
        if file_id is None:
            file_id = str(uuid.uuid4())

        file_bytes = self._read_content_bytes(content)
        created_lo = False

        with get_session_with_current_tenant_if_none(db_session) as session:
            raw_conn, oid = None, None
            try:
                raw_conn = _get_raw_connection(session)

                # Look up existing content so we can unlink the old
                # Large Object after a successful overwrite.
                existing = get_file_content_by_file_id_optional(
                    file_id=file_id, db_session=session
                )
                old_oid = existing.lobj_oid if existing else None

                oid = _create_large_object(raw_conn, file_bytes)
                created_lo = True

                upsert_filerecord(
                    file_id=file_id,
                    display_name=display_name or file_id,
                    file_origin=file_origin,
                    file_type=file_type,
                    bucket_name=POSTGRES_BUCKET_SENTINEL,
                    object_key=str(oid),
                    db_session=session,
                    file_metadata=file_metadata,
                )
                upsert_file_content(
                    file_id=file_id,
                    lobj_oid=oid,
                    file_size=len(file_bytes),
                    db_session=session,
                )

                # Unlink the previous Large Object to avoid orphans
                if old_oid is not None and old_oid != oid:
                    try:
                        _delete_large_object(raw_conn, old_oid)
                    except Exception:
                        logger.warning(
                            f"Failed to unlink old large object {old_oid} for file {file_id}"
                        )

                session.commit()
            except Exception as e:
                session.rollback()
                try:
                    if created_lo and raw_conn is not None and oid is not None:
                        _delete_large_object(raw_conn, oid)
                except Exception:
                    logger.exception(
                        f"Failed to delete large object {oid} for file {file_id}"
                    )
                raise e

        return file_id

    def read_file(
        self,
        file_id: str,
        mode: str | None = None,  # noqa: ARG002
        use_tempfile: bool = False,
        db_session: Session | None = None,
    ) -> IO[bytes]:
        with get_session_with_current_tenant_if_none(db_session) as session:
            file_content = get_file_content_by_file_id(
                file_id=file_id, db_session=session
            )
            raw_conn = _get_raw_connection(session)

            if use_tempfile:
                return _read_large_object_to_tempfile(raw_conn, file_content.lobj_oid)

            data = _read_large_object(raw_conn, file_content.lobj_oid)
            return BytesIO(data)

    def read_file_record(
        self, file_id: str, db_session: Session | None = None
    ) -> FileStoreModel:
        with get_session_with_current_tenant_if_none(db_session) as session:
            return get_filerecord_by_file_id(file_id=file_id, db_session=session)

    def get_file_size(
        self, file_id: str, db_session: Session | None = None
    ) -> int | None:
        try:
            with get_session_with_current_tenant_if_none(db_session) as session:
                record = get_file_content_by_file_id(
                    file_id=file_id, db_session=session
                )
                return record.file_size
        except Exception as e:
            logger.warning(f"Error getting file size for {file_id}: {e}")
            return None

    def delete_file(self, file_id: str, db_session: Session | None = None) -> None:
        with get_session_with_current_tenant_if_none(db_session) as session:
            try:
                file_content = get_file_content_by_file_id(
                    file_id=file_id, db_session=session
                )
                raw_conn = _get_raw_connection(session)

                try:
                    _delete_large_object(raw_conn, file_content.lobj_oid)
                except Exception:
                    logger.warning(
                        f"Large object {file_content.lobj_oid} for file {file_id} not found, cleaning up records only."
                    )

                delete_file_content_by_file_id(file_id=file_id, db_session=session)
                delete_filerecord_by_file_id(file_id=file_id, db_session=session)
                session.commit()
            except Exception:
                session.rollback()
                raise

    def get_file_with_mime_type(self, file_id: str) -> FileWithMimeType | None:
        mime_type = "application/octet-stream"
        try:
            file_io = self.read_file(file_id, mode="b")
        except Exception:
            return None

        file_content = file_io.read()
        try:
            matches = puremagic.magic_string(file_content)
            if matches:
                mime_type = cast(str, matches[0].mime_type)
        except puremagic.PureError:
            pass

        return FileWithMimeType(data=file_content, mime_type=mime_type)

    def change_file_id(
        self, old_file_id: str, new_file_id: str, db_session: Session | None = None
    ) -> None:
        with get_session_with_current_tenant_if_none(db_session) as session:
            try:
                old_record = get_filerecord_by_file_id(
                    file_id=old_file_id, db_session=session
                )
                file_metadata = cast(dict[Any, Any] | None, old_record.file_metadata)

                # 1. Create the new file_record so the FK target exists
                upsert_filerecord(
                    file_id=new_file_id,
                    display_name=old_record.display_name,
                    file_origin=old_record.file_origin,
                    file_type=old_record.file_type,
                    bucket_name=POSTGRES_BUCKET_SENTINEL,
                    object_key=old_record.object_key,
                    db_session=session,
                    file_metadata=file_metadata,
                )

                # 2. Move file_content in-place — the LO OID is never
                #    shared between two rows.
                transfer_file_content_file_id(
                    old_file_id=old_file_id,
                    new_file_id=new_file_id,
                    db_session=session,
                )

                # 3. Remove the now-orphaned old file_record
                delete_filerecord_by_file_id(file_id=old_file_id, db_session=session)

                session.commit()
            except Exception as e:
                session.rollback()
                logger.exception(
                    f"Failed to change file ID from {old_file_id} to {new_file_id}: {e}"
                )
                raise

    def list_files_by_prefix(self, prefix: str) -> list[FileRecord]:
        with get_session_with_current_tenant() as session:
            return get_filerecord_by_prefix(prefix=prefix, db_session=session)

    @staticmethod
    def _read_content_bytes(content: IO) -> bytes:
        """Normalize an IO object into raw bytes."""
        if hasattr(content, "read"):
            raw = content.read()
        else:
            raw = content

        if isinstance(raw, str):
            return raw.encode("utf-8")
        return raw


================================================
FILE: backend/onyx/file_store/s3_key_utils.py
================================================
"""
S3 key sanitization utilities for ensuring AWS S3 compatibility.

This module provides utilities for sanitizing file names to be compatible with
AWS S3 object key naming guidelines while ensuring uniqueness when significant
sanitization occurs.

Reference: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
"""

import hashlib
import re
import urllib.parse
from re import Match

# Constants for S3 key generation
HASH_LENGTH = 64  # SHA256 hex digest length
HASH_SEPARATOR_LENGTH = 1  # Length of underscore separator
HASH_WITH_SEPARATOR_LENGTH = HASH_LENGTH + HASH_SEPARATOR_LENGTH


def _encode_special_char(match: Match[str]) -> str:
    """Helper function to URL encode special characters."""
    return urllib.parse.quote(match.group(0), safe="")


def sanitize_s3_key_name(file_name: str) -> str:
    """
    Sanitize file name to be S3-compatible according to AWS guidelines.

    This method:
    1. Replaces problematic characters with safe alternatives
    2. URL-encodes characters that might require special handling
    3. Ensures the result is safe for S3 object keys
    4. Adds uniqueness when significant sanitization occurs

    Args:
        file_name: The original file name to sanitize

    Returns:
        A sanitized file name that is S3-compatible

    Reference: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
    """
    if not file_name:
        return "unnamed_file"

    original_name = file_name

    # Characters to avoid completely (replace with underscore)
    # These are characters that AWS recommends avoiding
    avoid_chars = r'[\\{}^%`\[\]"<>#|~/]'

    # Replace avoided characters with underscore
    sanitized = re.sub(avoid_chars, "_", file_name)
    # Characters that might require special handling but are allowed
    # We'll URL encode these to be safe
    special_chars = r"[&$@=;:+,?\s]"

    sanitized = re.sub(special_chars, _encode_special_char, sanitized)

    # Handle non-ASCII characters by URL encoding them
    # This ensures Unicode characters are properly handled
    needs_unicode_encoding = False
    try:
        # Try to encode as ASCII to check if it contains non-ASCII chars
        sanitized.encode("ascii")
    except UnicodeEncodeError:
        needs_unicode_encoding = True
        # Contains non-ASCII characters, URL encode the entire string
        # but preserve safe ASCII characters
        sanitized = urllib.parse.quote(
            sanitized,
            safe="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.()!*",
        )

    # Ensure we don't have consecutive periods at the start (relative path issue)
    sanitized = re.sub(r"^\.+", "", sanitized)

    # Remove any trailing periods to avoid download issues
    sanitized = sanitized.rstrip(".")

    # Remove multiple separators
    sanitized = re.sub(r"[-_]{2,}", "-", sanitized)

    # If sanitization resulted in empty string, use a default
    if not sanitized:
        sanitized = "sanitized_file"

    # Check if significant sanitization occurred and add uniqueness if needed
    significant_changes = (
        # Check if we replaced many characters
        len(re.findall(avoid_chars, original_name)) > 3
        or
        # Check if we had to URL encode Unicode characters
        needs_unicode_encoding
        or
        # Check if the sanitized name is very different in length (expansion due to encoding)
        len(sanitized) > len(original_name) * 2
        or
        # Check if the original had many special characters
        len(re.findall(special_chars, original_name)) > 5
    )

    if significant_changes:
        # Add a short hash to ensure uniqueness while keeping some readability
        name_hash = hashlib.sha256(original_name.encode("utf-8")).hexdigest()[:8]

        # Try to preserve file extension if it exists and is reasonable
        if "." in sanitized and len(sanitized.split(".")[-1]) <= 10:
            name_parts = sanitized.rsplit(".", 1)
            sanitized = f"{name_parts[0]}_{name_hash}.{name_parts[1]}"
        else:
            sanitized = f"{sanitized}_{name_hash}"

    return sanitized


def generate_s3_key(
    file_name: str, prefix: str, tenant_id: str, max_key_length: int = 1024
) -> str:
    """
    Generate a complete S3 key from file name with prefix and tenant ID.

    Args:
        file_name: The original file name
        prefix: S3 key prefix (e.g., 'onyx-files')
        tenant_id: Tenant identifier
        max_key_length: Maximum allowed S3 key length (default: 1024)

    Returns:
        A complete S3 key that fits within the length limit
    """
    # Strip slashes from prefix and tenant_id to avoid double slashes
    prefix_clean = prefix.strip("/")
    tenant_clean = tenant_id.strip("/")

    # Sanitize the file name first
    sanitized_file_name = sanitize_s3_key_name(file_name)

    # Handle long file names that could exceed S3's key limit
    # S3 key format: {prefix}/{tenant_id}/{file_name}
    prefix_and_tenant_parts = [prefix_clean, tenant_clean]
    prefix_and_tenant = "/".join(prefix_and_tenant_parts) + "/"
    max_file_name_length = max_key_length - len(prefix_and_tenant)

    if len(sanitized_file_name) < max_file_name_length:
        return "/".join(prefix_and_tenant_parts + [sanitized_file_name])

    # For very long file names, use hash-based approach to ensure uniqueness
    # Use the original file name for the hash to maintain consistency
    file_hash = hashlib.sha256(file_name.encode("utf-8")).hexdigest()

    # Calculate how much space we have for the readable part
    # Reserve space for hash (64 chars) + underscore separator (1 char)
    readable_part_max_length = max(0, max_file_name_length - HASH_WITH_SEPARATOR_LENGTH)

    if readable_part_max_length > 0:
        # Use first part of sanitized name + hash to maintain some readability
        readable_part = sanitized_file_name[:readable_part_max_length]
        truncated_name = f"{readable_part}_{file_hash}"
    else:
        # If no space for readable part, just use hash
        truncated_name = file_hash

    return "/".join(prefix_and_tenant_parts + [truncated_name])


================================================
FILE: backend/onyx/file_store/utils.py
================================================
import base64
from collections.abc import Callable
from io import BytesIO
from typing import cast
from uuid import UUID

import requests
from sqlalchemy.orm import Session

from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import FileOrigin
from onyx.db.models import UserFile
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import FileDescriptor
from onyx.file_store.models import InMemoryChatFile
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
from onyx.utils.b64 import get_image_type
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.timing import log_function_time

logger = setup_logger()


def plaintext_file_name_for_id(file_id: str) -> str:
    """Generate a consistent file name for storing plaintext content of a file."""
    return f"plaintext_{file_id}"


def store_plaintext(file_id: str, plaintext_content: str) -> bool:
    """
    Store plaintext content for a file in the file store.

    Args:
        file_id: The ID of the file (user_file or artifact_file)
        plaintext_content: The plaintext content to store

    Returns:
        bool: True if storage was successful, False otherwise
    """
    if not plaintext_content:
        return False

    plaintext_file_name = plaintext_file_name_for_id(file_id)
    try:
        file_store = get_default_file_store()
        file_content = BytesIO(plaintext_content.encode("utf-8"))
        file_store.save_file(
            content=file_content,
            display_name=f"Plaintext for {file_id}",
            file_origin=FileOrigin.PLAINTEXT_CACHE,
            file_type="text/plain",
            file_id=plaintext_file_name,
        )
        return True
    except Exception as e:
        logger.warning(f"Failed to store plaintext for {file_id}: {e}")
        return False


# --- Convenience wrappers for callers that use user-file UUIDs ---


def user_file_id_to_plaintext_file_name(user_file_id: UUID) -> str:
    """Generate a consistent file name for storing plaintext content of a user file."""
    return plaintext_file_name_for_id(str(user_file_id))


def store_user_file_plaintext(user_file_id: UUID, plaintext_content: str) -> bool:
    """Store plaintext content for a user file (delegates to :func:`store_plaintext`)."""
    return store_plaintext(str(user_file_id), plaintext_content)


def load_chat_file_by_id(file_id: str) -> InMemoryChatFile:
    """Load a file directly from the file store using its file_record ID.

    This is the fallback path for chat-attached files that don't have a
    corresponding row in the ``user_file`` table."""
    file_store = get_default_file_store()
    file_record = file_store.read_file_record(file_id)
    chat_file_type = mime_type_to_chat_file_type(file_record.file_type)

    file_io = file_store.read_file(file_id, mode="b")
    return InMemoryChatFile(
        file_id=file_id,
        content=file_io.read(),
        file_type=chat_file_type,
        filename=file_record.display_name,
    )


def load_user_file(file_id: UUID, db_session: Session) -> InMemoryChatFile:
    status = "not_loaded"

    user_file = db_session.query(UserFile).filter(UserFile.id == file_id).first()
    if not user_file:
        raise ValueError(f"User file with id {file_id} not found")

    # Get the file record to determine the appropriate chat file type
    file_store = get_default_file_store()
    file_record = file_store.read_file_record(user_file.file_id)

    # Determine appropriate chat file type based on the original file's MIME type
    chat_file_type = mime_type_to_chat_file_type(file_record.file_type)

    # Try to load plaintext version first
    plaintext_file_name = user_file_id_to_plaintext_file_name(file_id)

    # check for plain text normalized version first, then use original file otherwise
    try:
        file_io = file_store.read_file(plaintext_file_name, mode="b")
        # Metadata-only file types preserve their original type so
        # downstream injection paths can route them correctly.
        if chat_file_type.use_metadata_only():
            plaintext_chat_file_type = chat_file_type
        elif file_io is not None:
            # if we have plaintext for image (which happens when image
            # extraction is enabled), we use PLAIN_TEXT type
            plaintext_chat_file_type = ChatFileType.PLAIN_TEXT
        else:
            plaintext_chat_file_type = (
                ChatFileType.PLAIN_TEXT
                if chat_file_type != ChatFileType.IMAGE
                else chat_file_type
            )

        chat_file = InMemoryChatFile(
            file_id=str(user_file.file_id),
            content=file_io.read(),
            file_type=plaintext_chat_file_type,
            filename=user_file.name,
        )
        status = "plaintext"
        return chat_file
    except Exception as e:
        logger.warning(f"Failed to load plaintext for user file {user_file.id}: {e}")
        # Fall back to original file if plaintext not available
        file_io = file_store.read_file(user_file.file_id, mode="b")

        chat_file = InMemoryChatFile(
            file_id=str(user_file.file_id),
            content=file_io.read(),
            file_type=chat_file_type,
            filename=user_file.name,
        )
        status = "original"
        return chat_file
    finally:
        logger.debug(
            f"load_user_file finished: file_id={user_file.file_id} chat_file_type={chat_file_type} status={status}"
        )


def load_in_memory_chat_files(
    user_file_ids: list[UUID],
    db_session: Session,
) -> list[InMemoryChatFile]:
    """
    Loads the actual content of user files specified by individual IDs and those
    within specified project IDs into memory.

    Args:
        user_file_ids: A list of specific UserFile IDs to load.
        db_session: The SQLAlchemy database session.

    Returns:
        A list of InMemoryChatFile objects, each containing the file content (as bytes),
        file ID, file type, and filename. Prioritizes loading plaintext versions if available.
    """
    # Use parallel execution to load files concurrently
    return cast(
        list[InMemoryChatFile],
        run_functions_tuples_in_parallel(
            # 1. Load files specified by individual IDs
            [(load_user_file, (file_id, db_session)) for file_id in user_file_ids]
        ),
    )


def get_user_files(
    user_file_ids: list[UUID],
    db_session: Session,
) -> list[UserFile]:
    """
    Fetches UserFile database records based on provided file and project IDs.

    Args:
        user_file_ids: A list of specific UserFile IDs to fetch.
        db_session: The SQLAlchemy database session.

    Returns:
        A list containing UserFile SQLAlchemy model objects corresponding to the
        specified file IDs and all files within the specified project IDs.
        It does NOT return the actual file content.
    """
    user_files: list[UserFile] = []

    # 1. Fetch UserFile records for specific file IDs
    for user_file_id in user_file_ids:
        # Query the database for a UserFile with the matching ID
        user_file = (
            db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
        )
        # If found, add it to the list
        if user_file is not None:
            user_files.append(user_file)

    # 3. Return the combined list of UserFile database objects
    return user_files


def validate_user_files_ownership(
    user_file_ids: list[UUID],
    user_id: UUID | None,
    db_session: Session,
) -> list[UserFile]:
    """
    Fetches all UserFile database records for a given user.
    """
    user_files = get_user_files(user_file_ids, db_session)
    current_user_files = []
    for user_file in user_files:
        # Note: if user_id is None, then all files should be None as well
        # (since auth must be disabled in this case)
        if user_file.user_id != user_id:
            raise ValueError(
                f"User {user_id} does not have access to file {user_file.id}"
            )
        current_user_files.append(user_file)

    return current_user_files


def save_file_from_url(url: str) -> str:
    response = requests.get(url)
    response.raise_for_status()

    file_io = BytesIO(response.content)
    file_store = get_default_file_store()
    file_id = file_store.save_file(
        content=file_io,
        display_name="GeneratedImage",
        file_origin=FileOrigin.CHAT_IMAGE_GEN,
        file_type="image/png;base64",
    )
    return file_id


def save_file_from_base64(base64_string: str) -> str:
    file_store = get_default_file_store()
    file_id = file_store.save_file(
        content=BytesIO(base64.b64decode(base64_string)),
        display_name="GeneratedImage",
        file_origin=FileOrigin.CHAT_IMAGE_GEN,
        file_type=get_image_type(base64_string),
    )
    return file_id


def save_file(
    url: str | None = None,
    base64_data: str | None = None,
) -> str:
    """Save a file from either a URL or base64 encoded string.

    Args:
        url: URL to download file from
        base64_data: Base64 encoded file data

    Returns:
        The unique ID of the saved file

    Raises:
        ValueError: If neither url nor base64_data is provided, or if both are provided
    """
    if url is not None and base64_data is not None:
        raise ValueError("Cannot specify both url and base64_data")

    if url is not None:
        return save_file_from_url(url)
    elif base64_data is not None:
        return save_file_from_base64(base64_data)
    else:
        raise ValueError("Must specify either url or base64_data")


def save_files(urls: list[str], base64_files: list[str]) -> list[str]:
    # NOTE: be explicit about typing so that if we change things, we get notified
    funcs: list[
        tuple[
            Callable[[str | None, str | None], str],
            tuple[str | None, str | None],
        ]
    ] = [(save_file, (url, None)) for url in urls] + [
        (save_file, (None, base64_file)) for base64_file in base64_files
    ]

    return run_functions_tuples_in_parallel(funcs)


@log_function_time(print_only=True)
def verify_user_files(
    user_files: list[FileDescriptor],
    user_id: UUID | None,
    db_session: Session,
    project_id: int | None = None,
) -> None:
    """
    Verify that all provided file descriptors belong to the specified user.
    For project files (those without user_file_id), verifies access through project ownership.

    Args:
        user_files: List of file descriptors to verify
        user_id: The user ID to check ownership against
        db_session: The SQLAlchemy database session
        project_id: Optional project ID to verify project file access against

    Raises:
        ValueError: If any file does not belong to the user or is not found
    """
    from onyx.db.models import Project__UserFile
    from onyx.db.projects import check_project_ownership

    # Extract user_file_ids and project file_ids from the file descriptors
    user_file_ids = []
    project_file_ids = []

    for file_descriptor in user_files:
        # Check if this file descriptor has a user_file_id
        if file_descriptor.get("user_file_id"):
            try:
                user_file_ids.append(UUID(file_descriptor["user_file_id"]))
            except (ValueError, TypeError):
                logger.warning(
                    f"Invalid user_file_id in file descriptor: {file_descriptor['user_file_id']}"
                )
                continue
        else:
            # This is a project file - use the 'id' field which is the file_id
            if file_descriptor.get("id"):
                project_file_ids.append(file_descriptor["id"])

    # Verify user files (existing logic)
    if user_file_ids:
        validate_user_files_ownership(user_file_ids, user_id, db_session)

    # Verify project files
    if project_file_ids:
        if project_id is None:
            raise ValueError(
                "Project files provided but no project_id specified for verification"
            )

        # Verify user owns the project
        if not check_project_ownership(project_id, user_id, db_session):
            raise ValueError(
                f"User {user_id} does not have access to project {project_id}"
            )

        # Verify all project files belong to the specified project
        user_files_in_project = (
            db_session.query(UserFile)
            .join(Project__UserFile)
            .filter(
                Project__UserFile.project_id == project_id,
                UserFile.file_id.in_(project_file_ids),
            )
            .all()
        )

        # Check if all files were found in the project
        found_file_ids = {uf.file_id for uf in user_files_in_project}
        missing_files = set(project_file_ids) - found_file_ids

        if missing_files:
            raise ValueError(
                f"Files {missing_files} are not associated with project {project_id}"
            )


def build_frontend_file_url(file_id: str) -> str:
    return f"/api/chat/file/{file_id}"


def build_full_frontend_file_url(file_id: str) -> str:
    return f"{WEB_DOMAIN}/api/chat/file/{file_id}"


================================================
FILE: backend/onyx/hooks/__init__.py
================================================


================================================
FILE: backend/onyx/hooks/api_dependencies.py
================================================
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from shared_configs.configs import MULTI_TENANT


def require_hook_enabled() -> None:
    """FastAPI dependency that gates all hook management endpoints.

    Hooks are only available in single-tenant / self-hosted EE deployments.

    Use as: Depends(require_hook_enabled)
    """
    if MULTI_TENANT:
        raise OnyxError(
            OnyxErrorCode.SINGLE_TENANT_ONLY,
            "Hooks are not available in multi-tenant deployments",
        )


================================================
FILE: backend/onyx/hooks/executor.py
================================================
"""CE hook executor.

HookSkipped and HookSoftFailed are real classes kept here because
process_message.py (CE code) uses isinstance checks against them.

execute_hook is the public entry point. It dispatches to _execute_hook_impl
via fetch_versioned_implementation so that:
  - CE: onyx.hooks.executor._execute_hook_impl → no-op, returns HookSkipped()
  - EE: ee.onyx.hooks.executor._execute_hook_impl → real HTTP call
"""

from typing import Any
from typing import TypeVar

from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.db.enums import HookPoint
from onyx.utils.variable_functionality import fetch_versioned_implementation


class HookSkipped:
    """No active hook configured for this hook point."""


class HookSoftFailed:
    """Hook was called but failed with SOFT fail strategy — continuing."""


T = TypeVar("T", bound=BaseModel)


def _execute_hook_impl(
    *,
    db_session: Session,  # noqa: ARG001
    hook_point: HookPoint,  # noqa: ARG001
    payload: dict[str, Any],  # noqa: ARG001
    response_type: type[T],  # noqa: ARG001
) -> T | HookSkipped | HookSoftFailed:
    """CE no-op — hooks are not available without EE."""
    return HookSkipped()


def execute_hook(
    *,
    db_session: Session,
    hook_point: HookPoint,
    payload: dict[str, Any],
    response_type: type[T],
) -> T | HookSkipped | HookSoftFailed:
    """Execute the hook for the given hook point.

    Dispatches to the versioned implementation so EE gets the real executor
    and CE gets the no-op stub, without any changes at the call site.
    """
    impl = fetch_versioned_implementation("onyx.hooks.executor", "_execute_hook_impl")
    return impl(
        db_session=db_session,
        hook_point=hook_point,
        payload=payload,
        response_type=response_type,
    )


================================================
FILE: backend/onyx/hooks/models.py
================================================
from datetime import datetime
from enum import Enum
from typing import Annotated
from typing import Any

from pydantic import BaseModel
from pydantic import Field
from pydantic import field_validator
from pydantic import model_validator
from pydantic import SecretStr

from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint

NonEmptySecretStr = Annotated[SecretStr, Field(min_length=1)]


# ---------------------------------------------------------------------------
# Request models
# ---------------------------------------------------------------------------


class HookCreateRequest(BaseModel):
    name: str = Field(min_length=1)
    hook_point: HookPoint
    endpoint_url: str = Field(min_length=1)
    api_key: NonEmptySecretStr | None = None
    fail_strategy: HookFailStrategy | None = None  # if None, uses HookPointSpec default
    timeout_seconds: float | None = Field(
        default=None, gt=0
    )  # if None, uses HookPointSpec default

    @field_validator("name", "endpoint_url")
    @classmethod
    def no_whitespace_only(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("cannot be whitespace-only.")
        return v


class HookUpdateRequest(BaseModel):
    name: str | None = None
    endpoint_url: str | None = None
    api_key: NonEmptySecretStr | None = None
    fail_strategy: HookFailStrategy | None = None
    timeout_seconds: float | None = Field(default=None, gt=0)

    @model_validator(mode="after")
    def require_at_least_one_field(self) -> "HookUpdateRequest":
        if not self.model_fields_set:
            raise ValueError("At least one field must be provided for an update.")
        if "name" in self.model_fields_set and not (self.name or "").strip():
            raise ValueError("name cannot be cleared.")
        if (
            "endpoint_url" in self.model_fields_set
            and not (self.endpoint_url or "").strip()
        ):
            raise ValueError("endpoint_url cannot be cleared.")
        if "fail_strategy" in self.model_fields_set and self.fail_strategy is None:
            raise ValueError(
                "fail_strategy cannot be null; omit the field to leave it unchanged."
            )
        if "timeout_seconds" in self.model_fields_set and self.timeout_seconds is None:
            raise ValueError(
                "timeout_seconds cannot be null; omit the field to leave it unchanged."
            )
        return self


# ---------------------------------------------------------------------------
# Response models
# ---------------------------------------------------------------------------


class HookPointMetaResponse(BaseModel):
    hook_point: HookPoint
    display_name: str
    description: str
    docs_url: str | None
    input_schema: dict[str, Any]
    output_schema: dict[str, Any]
    default_timeout_seconds: float
    default_fail_strategy: HookFailStrategy
    fail_hard_description: str


class HookResponse(BaseModel):
    id: int
    name: str
    hook_point: HookPoint
    # Nullable to match the DB column — endpoint_url is required on creation but
    # future hook point types may not use an external endpoint (e.g. built-in handlers).
    endpoint_url: str | None
    # Partially-masked API key (e.g. "abcd••••••••wxyz"), or None if no key is set.
    api_key_masked: str | None
    fail_strategy: HookFailStrategy
    timeout_seconds: float  # always resolved — None from request is replaced with spec default before DB write
    is_active: bool
    is_reachable: bool | None
    creator_email: str | None
    created_at: datetime
    updated_at: datetime


class HookValidateStatus(str, Enum):
    passed = "passed"  # server responded (any status except 401/403)
    auth_failed = "auth_failed"  # server responded with 401 or 403
    timeout = (
        "timeout"  # TCP connected, but read/write timed out (server exists but slow)
    )
    cannot_connect = "cannot_connect"  # could not connect to the server


class HookValidateResponse(BaseModel):
    status: HookValidateStatus
    error_message: str | None = None


class HookExecutionRecord(BaseModel):
    error_message: str | None = None
    status_code: int | None = None
    duration_ms: int | None = None
    created_at: datetime


================================================
FILE: backend/onyx/hooks/points/__init__.py
================================================


================================================
FILE: backend/onyx/hooks/points/base.py
================================================
from typing import Any
from typing import ClassVar

from pydantic import BaseModel

from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint


_REQUIRED_ATTRS = (
    "hook_point",
    "display_name",
    "description",
    "default_timeout_seconds",
    "fail_hard_description",
    "default_fail_strategy",
    "payload_model",
    "response_model",
)


class HookPointSpec:
    """Static metadata and contract for a pipeline hook point.

    Each concrete subclass represents exactly one hook point and is instantiated
    once at startup, registered in onyx.hooks.registry._REGISTRY. Prefer
    get_hook_point_spec() or get_all_specs() from the registry over direct
    instantiation.

    Each hook point is a concrete subclass of this class. Onyx engineers
    own these definitions — customers never touch this code.

    Subclasses must define all attributes as class-level constants.
    payload_model and response_model must be Pydantic BaseModel subclasses;
    input_schema and output_schema are derived from them automatically.
    """

    hook_point: HookPoint
    display_name: str
    description: str
    default_timeout_seconds: float
    fail_hard_description: str
    default_fail_strategy: HookFailStrategy
    docs_url: str | None = None

    payload_model: ClassVar[type[BaseModel]]
    response_model: ClassVar[type[BaseModel]]

    # Computed once at class definition time from payload_model / response_model.
    input_schema: ClassVar[dict[str, Any]]
    output_schema: ClassVar[dict[str, Any]]

    def __init_subclass__(cls, **kwargs: object) -> None:
        """Enforce that every subclass declares all required class attributes.

        Called automatically by Python whenever a class inherits from HookPointSpec.
        Raises TypeError at import time if any required attribute is missing or if
        payload_model / response_model are not Pydantic BaseModel subclasses.
        input_schema and output_schema are derived automatically from the models.
        """
        super().__init_subclass__(**kwargs)
        missing = [attr for attr in _REQUIRED_ATTRS if not hasattr(cls, attr)]
        if missing:
            raise TypeError(f"{cls.__name__} must define class attributes: {missing}")
        for attr in ("payload_model", "response_model"):
            val = getattr(cls, attr, None)
            if val is None or not (
                isinstance(val, type) and issubclass(val, BaseModel)
            ):
                raise TypeError(
                    f"{cls.__name__}.{attr} must be a Pydantic BaseModel subclass, got {val!r}"
                )
        cls.input_schema = cls.payload_model.model_json_schema()
        cls.output_schema = cls.response_model.model_json_schema()


================================================
FILE: backend/onyx/hooks/points/document_ingestion.py
================================================
from pydantic import BaseModel
from pydantic import Field

from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.hooks.points.base import HookPointSpec


class DocumentIngestionSection(BaseModel):
    """Represents a single section of a document — either text or image, not both.

    Text section: set `text`, leave `image_file_id` null.
    Image section: set `image_file_id`, leave `text` null.
    """

    text: str | None = Field(
        default=None,
        description="Text content of this section. Set for text sections, null for image sections.",
    )
    link: str | None = Field(
        default=None,
        description="Optional URL associated with this section. Preserve the original link from the payload if you want it retained.",
    )
    image_file_id: str | None = Field(
        default=None,
        description=(
            "Opaque identifier for an image stored in the file store. "
            "The image content is not included — this field signals that the section is an image. "
            "Hooks can use its presence to reorder or drop image sections, but cannot read or modify the image itself."
        ),
    )


class DocumentIngestionOwner(BaseModel):
    display_name: str | None = Field(
        default=None,
        description="Human-readable name of the owner.",
    )
    email: str | None = Field(
        default=None,
        description="Email address of the owner.",
    )


class DocumentIngestionPayload(BaseModel):
    document_id: str = Field(
        description="Unique identifier for the document. Read-only — changes are ignored."
    )
    title: str | None = Field(description="Title of the document.")
    semantic_identifier: str = Field(
        description="Human-readable identifier used for display (e.g. file name, page title)."
    )
    source: str = Field(
        description=(
            "Connector source type (e.g. confluence, slack, google_drive). "
            "Read-only — changes are ignored. "
            "Full list of values: https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/configs/constants.py#L195"
        )
    )
    sections: list[DocumentIngestionSection] = Field(
        description="Sections of the document. Includes both text sections (text set, image_file_id null) and image sections (image_file_id set, text null)."
    )
    metadata: dict[str, list[str]] = Field(
        description="Key-value metadata attached to the document. Values are always a list of strings."
    )
    doc_updated_at: str | None = Field(
        description="ISO 8601 UTC timestamp of the last update at the source, or null if unknown. Example: '2024-03-15T10:30:00+00:00'."
    )
    primary_owners: list[DocumentIngestionOwner] | None = Field(
        description="Primary owners of the document, or null if not available."
    )
    secondary_owners: list[DocumentIngestionOwner] | None = Field(
        description="Secondary owners of the document, or null if not available."
    )


class DocumentIngestionResponse(BaseModel):
    # Intentionally permissive — customer endpoints may return extra fields.
    sections: list[DocumentIngestionSection] | None = Field(
        description="The sections to index, in the desired order. Reorder, drop, or modify sections freely. Null or empty list drops the document."
    )
    rejection_reason: str | None = Field(
        default=None,
        description="Logged when sections is null or empty. Falls back to a generic message if omitted.",
    )


class DocumentIngestionSpec(HookPointSpec):
    """Hook point that runs on every document before it enters the indexing pipeline.

    Call site: immediately after Onyx's internal validation and before the
    indexing pipeline begins — no partial writes have occurred yet.

    If a Document Ingestion hook is configured, it takes precedence —
    Document Ingestion Light will not run. Configure only one per deployment.

    Supported use cases:
    - Document filtering: drop documents based on content or metadata
    - Content rewriting: redact PII or normalize text before indexing
    """

    hook_point = HookPoint.DOCUMENT_INGESTION
    display_name = "Document Ingestion"
    description = (
        "Runs on every document before it enters the indexing pipeline. "
        "Allows filtering, rewriting, or dropping documents."
    )
    default_timeout_seconds = 30.0
    fail_hard_description = "The document will not be indexed."
    default_fail_strategy = HookFailStrategy.HARD
    docs_url = "https://docs.onyx.app/admins/advanced_configs/hook_extensions#document-ingestion"

    payload_model = DocumentIngestionPayload
    response_model = DocumentIngestionResponse


================================================
FILE: backend/onyx/hooks/points/query_processing.py
================================================
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field

from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.hooks.points.base import HookPointSpec


class QueryProcessingPayload(BaseModel):
    model_config = ConfigDict(extra="forbid")

    query: str = Field(description="The raw query string exactly as the user typed it.")
    user_email: str | None = Field(
        description="Email of the user submitting the query, or null if unauthenticated."
    )
    chat_session_id: str = Field(
        description="UUID of the chat session, formatted as a hyphenated lowercase string (e.g. '550e8400-e29b-41d4-a716-446655440000'). Always present — the session is guaranteed to exist by the time this hook fires."
    )


class QueryProcessingResponse(BaseModel):
    # Intentionally permissive — customer endpoints may return extra fields.
    query: str | None = Field(
        default=None,
        description=(
            "The query to use in the pipeline. "
            "Null, empty string, whitespace-only, or absent = reject the query."
        ),
    )
    rejection_message: str | None = Field(
        default=None,
        description="Message shown to the user when the query is rejected. Falls back to a generic message if not provided.",
    )


class QueryProcessingSpec(HookPointSpec):
    """Hook point that runs on every user query before it enters the pipeline.

    Call site: inside handle_stream_message_objects() in
    backend/onyx/chat/process_message.py, immediately after message_text is
    assigned from the request and before create_new_chat_message() saves it.

    This is the earliest possible point in the query pipeline:
    - Raw query — unmodified, exactly as the user typed it
    - No side effects yet — message has not been saved to DB
    - User identity is available for user-specific logic

    Supported use cases:
    - Query rejection: block queries based on content or user context
    - Query rewriting: normalize, expand, or modify the query
    - PII removal: scrub sensitive data before the LLM sees it
    - Access control: reject queries from certain users or groups
    - Query auditing: log or track queries based on business rules
    """

    hook_point = HookPoint.QUERY_PROCESSING
    display_name = "Query Processing"
    description = (
        "Runs on every user query before it enters the pipeline. "
        "Allows rewriting, filtering, or rejecting queries."
    )
    default_timeout_seconds = 5.0  # user is actively waiting — keep tight
    fail_hard_description = (
        "The query will be blocked and the user will see an error message."
    )
    default_fail_strategy = HookFailStrategy.HARD
    docs_url = (
        "https://docs.onyx.app/admins/advanced_configs/hook_extensions#query-processing"
    )

    payload_model = QueryProcessingPayload
    response_model = QueryProcessingResponse


================================================
FILE: backend/onyx/hooks/registry.py
================================================
from onyx.db.enums import HookPoint
from onyx.hooks.points.base import HookPointSpec
from onyx.hooks.points.document_ingestion import DocumentIngestionSpec
from onyx.hooks.points.query_processing import QueryProcessingSpec

# Internal: use `monkeypatch.setattr(registry_module, "_REGISTRY", {...})` to override in tests.
_REGISTRY: dict[HookPoint, HookPointSpec] = {
    HookPoint.DOCUMENT_INGESTION: DocumentIngestionSpec(),
    HookPoint.QUERY_PROCESSING: QueryProcessingSpec(),
}


def validate_registry() -> None:
    """Assert that every HookPoint enum value has a registered spec.

    Call once at application startup (e.g. from the FastAPI lifespan hook).
    Raises RuntimeError if any hook point is missing a spec.
    """
    missing = set(HookPoint) - set(_REGISTRY)
    if missing:
        raise RuntimeError(
            f"Hook point(s) have no registered spec: {missing}. "
            "Add an entry to onyx.hooks.registry._REGISTRY."
        )


def get_hook_point_spec(hook_point: HookPoint) -> HookPointSpec:
    """Returns the spec for a given hook point.

    Raises ValueError if the hook point has no registered spec — this is a
    programmer error; every HookPoint enum value must have a corresponding spec
    in _REGISTRY.
    """
    try:
        return _REGISTRY[hook_point]
    except KeyError:
        raise ValueError(
            f"No spec registered for hook point {hook_point!r}. "
            "Add an entry to onyx.hooks.registry._REGISTRY."
        )


def get_all_specs() -> list[HookPointSpec]:
    """Returns the specs for all registered hook points."""
    return list(_REGISTRY.values())


================================================
FILE: backend/onyx/httpx/httpx_pool.py
================================================
import threading
from typing import Any

import httpx


def make_default_kwargs() -> dict[str, Any]:
    return {
        "http2": True,
        "limits": httpx.Limits(),
    }


class HttpxPool:
    """Class to manage a global httpx Client instance"""

    _clients: dict[str, httpx.Client] = {}
    _lock: threading.Lock = threading.Lock()

    # Default parameters for creation

    def __init__(self) -> None:
        pass

    @classmethod
    def _init_client(cls, **kwargs: Any) -> httpx.Client:
        """Private helper method to create and return an httpx.Client."""
        merged_kwargs = {**(make_default_kwargs()), **kwargs}
        return httpx.Client(**merged_kwargs)

    @classmethod
    def init_client(cls, name: str, **kwargs: Any) -> None:
        """Allow the caller to init the client with extra params."""
        with cls._lock:
            if name not in cls._clients:
                cls._clients[name] = cls._init_client(**kwargs)

    @classmethod
    def close_client(cls, name: str) -> None:
        """Allow the caller to close the client."""
        with cls._lock:
            client = cls._clients.pop(name, None)
            if client:
                client.close()

    @classmethod
    def close_all(cls) -> None:
        """Close all registered clients."""
        with cls._lock:
            for client in cls._clients.values():
                client.close()
            cls._clients.clear()

    @classmethod
    def get(cls, name: str) -> httpx.Client:
        """Gets the httpx.Client. Will init to default settings if not init'd."""
        with cls._lock:
            if name not in cls._clients:
                cls._clients[name] = cls._init_client()
            return cls._clients[name]


================================================
FILE: backend/onyx/image_gen/__init__.py
================================================


================================================
FILE: backend/onyx/image_gen/exceptions.py
================================================
class ImageProviderError(Exception):
    pass


class ImageProviderCredentialsError(ImageProviderError):
    pass


================================================
FILE: backend/onyx/image_gen/factory.py
================================================
from enum import Enum

from onyx.image_gen.interfaces import ImageGenerationProvider
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.image_gen.providers.azure_img_gen import AzureImageGenerationProvider
from onyx.image_gen.providers.openai_img_gen import OpenAIImageGenerationProvider
from onyx.image_gen.providers.vertex_img_gen import VertexImageGenerationProvider


class ImageGenerationProviderName(str, Enum):
    AZURE = "azure"
    OPENAI = "openai"
    VERTEX_AI = "vertex_ai"


PROVIDERS: dict[ImageGenerationProviderName, type[ImageGenerationProvider]] = {
    ImageGenerationProviderName.AZURE: AzureImageGenerationProvider,
    ImageGenerationProviderName.OPENAI: OpenAIImageGenerationProvider,
    ImageGenerationProviderName.VERTEX_AI: VertexImageGenerationProvider,
}


def get_image_generation_provider(
    provider: str,
    credentials: ImageGenerationProviderCredentials,
) -> ImageGenerationProvider:
    provider_cls = _get_provider_cls(provider)
    return provider_cls.build_from_credentials(credentials)


def validate_credentials(
    provider: str,
    credentials: ImageGenerationProviderCredentials,
) -> bool:
    provider_cls = _get_provider_cls(provider)
    return provider_cls.validate_credentials(credentials)


def _get_provider_cls(provider: str) -> type[ImageGenerationProvider]:
    try:
        provider_enum = ImageGenerationProviderName(provider)
    except ValueError:
        raise ValueError(f"Invalid image generation provider: {provider}")
    return PROVIDERS[provider_enum]


================================================
FILE: backend/onyx/image_gen/interfaces.py
================================================
from __future__ import annotations

import abc
from typing import Any
from typing import TYPE_CHECKING

from pydantic import BaseModel

from onyx.image_gen.exceptions import ImageProviderCredentialsError

if TYPE_CHECKING:
    from litellm.types.utils import ImageResponse as ImageGenerationResponse


class ImageGenerationProviderCredentials(BaseModel):
    api_key: str | None = None
    api_base: str | None = None
    api_version: str | None = None
    deployment_name: str | None = None
    custom_config: dict[str, str] | None = None


class ReferenceImage(BaseModel):
    data: bytes
    mime_type: str


class ImageGenerationProvider(abc.ABC):
    @property
    def supports_reference_images(self) -> bool:
        return False

    @property
    def max_reference_images(self) -> int:
        return 0

    @classmethod
    @abc.abstractmethod
    def validate_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,
    ) -> bool:
        """Returns true if sufficient credentials are given to build this provider."""
        raise NotImplementedError("validate_credentials not implemented")

    @classmethod
    def build_from_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,
    ) -> ImageGenerationProvider:
        if not cls.validate_credentials(credentials):
            raise ImageProviderCredentialsError(
                f"Invalid image generation credentials: {credentials}"
            )
        return cls._build_from_credentials(credentials)

    @classmethod
    @abc.abstractmethod
    def _build_from_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,
    ) -> ImageGenerationProvider:
        """
        Given credentials, builds an instance of the provider.
        Should NOT be called directly - use build_from_credentials instead.

        AssertionError if credentials are invalid.
        """
        raise NotImplementedError("build_from_credentials not implemented")

    @abc.abstractmethod
    def generate_image(
        self,
        prompt: str,
        model: str,
        size: str,
        n: int,
        quality: str | None = None,
        reference_images: list[ReferenceImage] | None = None,
        **kwargs: Any,
    ) -> ImageGenerationResponse:
        """Generates an image based on a prompt."""
        raise NotImplementedError("generate_image not implemented")


================================================
FILE: backend/onyx/image_gen/providers/azure_img_gen.py
================================================
from __future__ import annotations

from typing import Any
from typing import TYPE_CHECKING

from onyx.image_gen.interfaces import ImageGenerationProvider
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.image_gen.interfaces import ReferenceImage

if TYPE_CHECKING:
    from onyx.image_gen.interfaces import ImageGenerationResponse


class AzureImageGenerationProvider(ImageGenerationProvider):
    _GPT_IMAGE_MODEL_PREFIX = "gpt-image-"
    _DALL_E_2_MODEL_NAME = "dall-e-2"

    def __init__(
        self,
        api_key: str,
        api_base: str,
        api_version: str,
        deployment_name: str | None = None,
    ):
        self._api_key = api_key
        self._api_base = api_base
        self._api_version = api_version
        self._deployment_name = deployment_name

    @classmethod
    def validate_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,
    ) -> bool:
        return all(
            [
                credentials.api_key,
                credentials.api_base,
                credentials.api_version,
            ]
        )

    @classmethod
    def _build_from_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,
    ) -> AzureImageGenerationProvider:
        assert credentials.api_key
        assert credentials.api_base
        assert credentials.api_version

        return cls(
            api_key=credentials.api_key,
            api_base=credentials.api_base,
            api_version=credentials.api_version,
            deployment_name=credentials.deployment_name,
        )

    @property
    def supports_reference_images(self) -> bool:
        return True

    @property
    def max_reference_images(self) -> int:
        # Azure GPT image models support up to 16 input images for edits.
        return 16

    def _normalize_model_name(self, model: str) -> str:
        return model.rsplit("/", 1)[-1]

    def _model_supports_image_edits(self, model: str) -> bool:
        normalized_model = self._normalize_model_name(model)
        return (
            normalized_model.startswith(self._GPT_IMAGE_MODEL_PREFIX)
            or normalized_model == self._DALL_E_2_MODEL_NAME
        )

    def generate_image(
        self,
        prompt: str,
        model: str,
        size: str,
        n: int,
        quality: str | None = None,
        reference_images: list[ReferenceImage] | None = None,
        **kwargs: Any,
    ) -> ImageGenerationResponse:
        deployment = self._deployment_name or model
        model_name = f"azure/{deployment}"

        if reference_images:
            if not self._model_supports_image_edits(model):
                raise ValueError(
                    f"Model '{model}' does not support image edits with reference images."
                )

            normalized_model = self._normalize_model_name(model)
            if (
                normalized_model == self._DALL_E_2_MODEL_NAME
                and len(reference_images) > 1
            ):
                raise ValueError(
                    "Model 'dall-e-2' only supports a single reference image for edits."
                )

            from litellm import image_edit

            return image_edit(
                image=[image.data for image in reference_images],
                prompt=prompt,
                model=model_name,
                api_key=self._api_key,
                api_base=self._api_base,
                api_version=self._api_version,
                size=size,
                n=n,
                quality=quality,
                **kwargs,
            )

        from litellm import image_generation

        return image_generation(
            prompt=prompt,
            model=model_name,
            api_key=self._api_key,
            api_base=self._api_base,
            api_version=self._api_version,
            size=size,
            n=n,
            quality=quality,
            **kwargs,
        )


================================================
FILE: backend/onyx/image_gen/providers/openai_img_gen.py
================================================
from __future__ import annotations

from typing import Any
from typing import TYPE_CHECKING

from onyx.image_gen.interfaces import ImageGenerationProvider
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.image_gen.interfaces import ReferenceImage

if TYPE_CHECKING:
    from onyx.image_gen.interfaces import ImageGenerationResponse


class OpenAIImageGenerationProvider(ImageGenerationProvider):
    _GPT_IMAGE_MODEL_PREFIX = "gpt-image-"
    _DALL_E_2_MODEL_NAME = "dall-e-2"

    def __init__(
        self,
        api_key: str,
        api_base: str | None = None,
    ):
        self._api_key = api_key
        self._api_base = api_base

    @classmethod
    def validate_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,
    ) -> bool:
        return bool(credentials.api_key)

    @classmethod
    def _build_from_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,
    ) -> OpenAIImageGenerationProvider:
        assert credentials.api_key

        return cls(
            api_key=credentials.api_key,
            api_base=credentials.api_base,
        )

    @property
    def supports_reference_images(self) -> bool:
        return True

    @property
    def max_reference_images(self) -> int:
        # GPT image models support up to 16 input images for edits.
        return 16

    def _normalize_model_name(self, model: str) -> str:
        return model.rsplit("/", 1)[-1]

    def _model_supports_image_edits(self, model: str) -> bool:
        normalized_model = self._normalize_model_name(model)
        return (
            normalized_model.startswith(self._GPT_IMAGE_MODEL_PREFIX)
            or normalized_model == self._DALL_E_2_MODEL_NAME
        )

    def generate_image(
        self,
        prompt: str,
        model: str,
        size: str,
        n: int,
        quality: str | None = None,
        reference_images: list[ReferenceImage] | None = None,
        **kwargs: Any,
    ) -> ImageGenerationResponse:
        if reference_images:
            if not self._model_supports_image_edits(model):
                raise ValueError(
                    f"Model '{model}' does not support image edits with reference images."
                )

            normalized_model = self._normalize_model_name(model)
            if (
                normalized_model == self._DALL_E_2_MODEL_NAME
                and len(reference_images) > 1
            ):
                raise ValueError(
                    "Model 'dall-e-2' only supports a single reference image for edits."
                )

            from litellm import image_edit

            return image_edit(
                image=[image.data for image in reference_images],
                prompt=prompt,
                model=model,
                api_key=self._api_key,
                api_base=self._api_base,
                size=size,
                n=n,
                quality=quality,
                **kwargs,
            )

        from litellm import image_generation

        return image_generation(
            prompt=prompt,
            model=model,
            api_key=self._api_key,
            api_base=self._api_base,
            size=size,
            n=n,
            quality=quality,
            **kwargs,
        )


================================================
FILE: backend/onyx/image_gen/providers/vertex_img_gen.py
================================================
from __future__ import annotations

import base64
import json
from datetime import datetime
from typing import Any
from typing import TYPE_CHECKING

from pydantic import BaseModel

from onyx.image_gen.exceptions import ImageProviderCredentialsError
from onyx.image_gen.interfaces import ImageGenerationProvider
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.image_gen.interfaces import ReferenceImage

if TYPE_CHECKING:
    from onyx.image_gen.interfaces import ImageGenerationResponse


class VertexCredentials(BaseModel):
    vertex_credentials: str
    vertex_location: str
    project_id: str


class VertexImageGenerationProvider(ImageGenerationProvider):
    def __init__(
        self,
        vertex_credentials: VertexCredentials,
    ):
        self._vertex_credentials = vertex_credentials.vertex_credentials
        self._vertex_location = vertex_credentials.vertex_location
        self._vertex_project = vertex_credentials.project_id

    @classmethod
    def validate_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,
    ) -> bool:
        try:
            _parse_to_vertex_credentials(credentials)
            return True
        except ImageProviderCredentialsError:
            return False

    @classmethod
    def _build_from_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,
    ) -> VertexImageGenerationProvider:
        vertex_credentials = _parse_to_vertex_credentials(credentials)

        return cls(
            vertex_credentials=vertex_credentials,
        )

    @property
    def supports_reference_images(self) -> bool:
        return True

    @property
    def max_reference_images(self) -> int:
        # Gemini image editing supports up to 14 input images.
        return 14

    def generate_image(
        self,
        prompt: str,
        model: str,
        size: str,
        n: int,
        quality: str | None = None,
        reference_images: list[ReferenceImage] | None = None,
        **kwargs: Any,
    ) -> ImageGenerationResponse:
        if reference_images:
            return self._generate_image_with_reference_images(
                prompt=prompt,
                model=model,
                size=size,
                n=n,
                reference_images=reference_images,
            )

        from litellm import image_generation

        return image_generation(
            prompt=prompt,
            model=model,
            size=size,
            n=n,
            quality=quality,
            vertex_location=self._vertex_location,
            vertex_credentials=self._vertex_credentials,
            vertex_project=self._vertex_project,
            **kwargs,
        )

    def _generate_image_with_reference_images(
        self,
        prompt: str,
        model: str,
        size: str,
        n: int,
        reference_images: list[ReferenceImage],
    ) -> ImageGenerationResponse:
        from google import genai
        from google.genai import types as genai_types
        from google.oauth2 import service_account
        from litellm.types.utils import ImageObject
        from litellm.types.utils import ImageResponse

        service_account_info = json.loads(self._vertex_credentials)
        credentials = service_account.Credentials.from_service_account_info(
            service_account_info,
            scopes=["https://www.googleapis.com/auth/cloud-platform"],
        )

        client = genai.Client(
            vertexai=True,
            project=self._vertex_project,
            location=self._vertex_location,
            credentials=credentials,
        )

        parts: list[genai_types.Part] = [
            genai_types.Part.from_bytes(data=image.data, mime_type=image.mime_type)
            for image in reference_images
        ]
        parts.append(genai_types.Part.from_text(text=prompt))

        config = genai_types.GenerateContentConfig(
            response_modalities=["TEXT", "IMAGE"],
            candidate_count=max(1, n),
            image_config=genai_types.ImageConfig(
                aspect_ratio=_map_size_to_aspect_ratio(size)
            ),
        )
        model_name = model.replace("vertex_ai/", "")
        response = client.models.generate_content(
            model=model_name,
            contents=genai_types.Content(
                role="user",
                parts=parts,
            ),
            config=config,
        )

        generated_data: list[ImageObject] = []
        for candidate in response.candidates or []:
            candidate_content = candidate.content
            if not candidate_content:
                continue

            for part in candidate_content.parts or []:
                inline_data = part.inline_data
                if not inline_data or inline_data.data is None:
                    continue

                if isinstance(inline_data.data, bytes):
                    b64_json = base64.b64encode(inline_data.data).decode("utf-8")
                elif isinstance(inline_data.data, str):
                    b64_json = inline_data.data
                else:
                    continue

                generated_data.append(
                    ImageObject(
                        b64_json=b64_json,
                        revised_prompt=prompt,
                    )
                )

        if not generated_data:
            raise RuntimeError("No image data returned from Vertex AI.")

        return ImageResponse(
            created=int(datetime.now().timestamp()),
            data=generated_data,
        )


def _map_size_to_aspect_ratio(size: str) -> str:
    return {
        "1024x1024": "1:1",
        "1792x1024": "16:9",
        "1024x1792": "9:16",
        "1536x1024": "3:2",
        "1024x1536": "2:3",
    }.get(size, "1:1")


def _parse_to_vertex_credentials(
    credentials: ImageGenerationProviderCredentials,
) -> VertexCredentials:
    custom_config = credentials.custom_config

    if not custom_config:
        raise ImageProviderCredentialsError("Custom config is required")

    vertex_credentials = custom_config.get("vertex_credentials")
    vertex_location = custom_config.get("vertex_location")

    if not vertex_credentials:
        raise ImageProviderCredentialsError("Vertex credentials are required")

    if not vertex_location:
        raise ImageProviderCredentialsError("Vertex location is required")

    vertex_json = json.loads(vertex_credentials)
    vertex_project = vertex_json.get("project_id")

    if not vertex_project:
        raise ImageProviderCredentialsError("Project ID is required")

    return VertexCredentials(
        vertex_credentials=vertex_credentials,
        vertex_location=vertex_location,
        project_id=vertex_project,
    )


================================================
FILE: backend/onyx/indexing/__init__.py
================================================


================================================
FILE: backend/onyx/indexing/adapters/document_indexing_adapter.py
================================================
import contextlib
from collections.abc import Generator

from sqlalchemy.engine.util import TransactionalContext
from sqlalchemy.orm import Session

from onyx.access.access import get_access_for_documents
from onyx.access.models import DocumentAccess
from onyx.configs.constants import DEFAULT_BOOST
from onyx.connectors.models import Document
from onyx.connectors.models import IndexAttemptMetadata
from onyx.db.chunk import update_chunk_boost_components__no_commit
from onyx.db.document import fetch_chunk_counts_for_documents
from onyx.db.document import mark_document_as_indexed_for_cc_pair__no_commit
from onyx.db.document import prepare_to_modify_documents
from onyx.db.document import update_docs_chunk_count__no_commit
from onyx.db.document import update_docs_last_modified__no_commit
from onyx.db.document import update_docs_updated_at__no_commit
from onyx.db.document_set import fetch_document_sets_for_documents
from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
from onyx.indexing.models import ChunkEnrichmentContext
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.indexing.models import UpdatableChunkData
from onyx.redis.redis_hierarchy import get_ancestors_from_raw_id
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger

logger = setup_logger()


class DocumentIndexingBatchAdapter:
    """Default adapter: handles DB prep, locking, metadata enrichment, and finalize.

    Keeps orchestration logic in the pipeline and side-effects in the adapter.
    """

    def __init__(
        self,
        db_session: Session,
        connector_id: int,
        credential_id: int,
        tenant_id: str,
        index_attempt_metadata: IndexAttemptMetadata,
    ):
        self.db_session = db_session
        self.connector_id = connector_id
        self.credential_id = credential_id
        self.tenant_id = tenant_id
        self.index_attempt_metadata = index_attempt_metadata

    def prepare(
        self, documents: list[Document], ignore_time_skip: bool
    ) -> DocumentBatchPrepareContext | None:
        """Upsert docs, map CC pairs, return context or mark as indexed if no-op."""
        context = index_doc_batch_prepare(
            documents=documents,
            index_attempt_metadata=self.index_attempt_metadata,
            db_session=self.db_session,
            ignore_time_skip=ignore_time_skip,
        )

        if not context:
            # even though we didn't actually index anything, we should still
            # mark them as "completed" for the CC Pair in order to make the
            # counts match
            mark_document_as_indexed_for_cc_pair__no_commit(
                connector_id=self.index_attempt_metadata.connector_id,
                credential_id=self.index_attempt_metadata.credential_id,
                document_ids=[doc.id for doc in documents],
                db_session=self.db_session,
            )
            self.db_session.commit()

        return context

    @contextlib.contextmanager
    def lock_context(
        self, documents: list[Document]
    ) -> Generator[TransactionalContext, None, None]:
        """Acquire transaction/row locks on docs for the critical section."""
        with prepare_to_modify_documents(
            db_session=self.db_session, document_ids=[doc.id for doc in documents]
        ) as transaction:
            yield transaction

    def prepare_enrichment(
        self,
        context: DocumentBatchPrepareContext,
        tenant_id: str,
        chunks: list[DocAwareChunk],
    ) -> "DocumentChunkEnricher":
        """Do all DB lookups once and return a per-chunk enricher."""
        updatable_ids = [doc.id for doc in context.updatable_docs]

        doc_id_to_new_chunk_cnt: dict[str, int] = {
            doc_id: 0 for doc_id in updatable_ids
        }
        for chunk in chunks:
            if chunk.source_document.id in doc_id_to_new_chunk_cnt:
                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1

        no_access = DocumentAccess.build(
            user_emails=[],
            user_groups=[],
            external_user_emails=[],
            external_user_group_ids=[],
            is_public=False,
        )

        return DocumentChunkEnricher(
            doc_id_to_access_info=get_access_for_documents(
                document_ids=updatable_ids, db_session=self.db_session
            ),
            doc_id_to_document_set={
                document_id: document_sets
                for document_id, document_sets in fetch_document_sets_for_documents(
                    document_ids=updatable_ids, db_session=self.db_session
                )
            },
            doc_id_to_ancestor_ids=self._get_ancestor_ids_for_documents(
                context.updatable_docs, tenant_id
            ),
            id_to_boost_map=context.id_to_boost_map,
            doc_id_to_previous_chunk_cnt={
                document_id: chunk_count
                for document_id, chunk_count in fetch_chunk_counts_for_documents(
                    document_ids=updatable_ids,
                    db_session=self.db_session,
                )
            },
            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
            no_access=no_access,
            tenant_id=tenant_id,
        )

    def _get_ancestor_ids_for_documents(
        self,
        documents: list[Document],
        tenant_id: str,
    ) -> dict[str, list[int]]:
        """
        Get ancestor hierarchy node IDs for a batch of documents.

        Uses Redis cache for fast lookups - no DB calls are made unless
        there's a cache miss. Documents provide parent_hierarchy_raw_node_id
        directly from the connector.

        Returns a mapping from document_id to list of ancestor node IDs.
        """
        if not documents:
            return {}

        redis_client = get_redis_client(tenant_id=tenant_id)
        result: dict[str, list[int]] = {}

        for doc in documents:
            # Use parent_hierarchy_raw_node_id directly from the document
            # If None, get_ancestors_from_raw_id will return just the SOURCE node
            ancestors = get_ancestors_from_raw_id(
                redis_client=redis_client,
                source=doc.source,
                parent_hierarchy_raw_node_id=doc.parent_hierarchy_raw_node_id,
                db_session=self.db_session,
            )
            result[doc.id] = ancestors

        return result

    def post_index(
        self,
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
        enrichment: ChunkEnrichmentContext,
    ) -> None:
        """Finalize DB updates, store plaintext, and mark docs as indexed."""
        updatable_ids = [doc.id for doc in context.updatable_docs]
        last_modified_ids = []
        ids_to_new_updated_at = {}
        for doc in context.updatable_docs:
            last_modified_ids.append(doc.id)
            # doc_updated_at is the source's idea (on the other end of the connector)
            # of when the doc was last modified
            if doc.doc_updated_at is None:
                continue
            ids_to_new_updated_at[doc.id] = doc.doc_updated_at

        update_docs_updated_at__no_commit(
            ids_to_new_updated_at=ids_to_new_updated_at, db_session=self.db_session
        )

        update_docs_last_modified__no_commit(
            document_ids=last_modified_ids, db_session=self.db_session
        )

        update_docs_chunk_count__no_commit(
            document_ids=updatable_ids,
            doc_id_to_chunk_count=enrichment.doc_id_to_new_chunk_cnt,
            db_session=self.db_session,
        )

        # these documents can now be counted as part of the CC Pairs
        # document count, so we need to mark them as indexed
        # NOTE: even documents we skipped since they were already up
        # to date should be counted here in order to maintain parity
        # between CC Pair and index attempt counts
        mark_document_as_indexed_for_cc_pair__no_commit(
            connector_id=self.index_attempt_metadata.connector_id,
            credential_id=self.index_attempt_metadata.credential_id,
            document_ids=[doc.id for doc in filtered_documents],
            db_session=self.db_session,
        )

        # save the chunk boost components to postgres
        update_chunk_boost_components__no_commit(
            chunk_data=updatable_chunk_data, db_session=self.db_session
        )

        self.db_session.commit()


class DocumentChunkEnricher:
    """Pre-computed metadata for per-chunk enrichment of connector documents."""

    def __init__(
        self,
        doc_id_to_access_info: dict[str, DocumentAccess],
        doc_id_to_document_set: dict[str, list[str]],
        doc_id_to_ancestor_ids: dict[str, list[int]],
        id_to_boost_map: dict[str, int],
        doc_id_to_previous_chunk_cnt: dict[str, int],
        doc_id_to_new_chunk_cnt: dict[str, int],
        no_access: DocumentAccess,
        tenant_id: str,
    ) -> None:
        self._doc_id_to_access_info = doc_id_to_access_info
        self._doc_id_to_document_set = doc_id_to_document_set
        self._doc_id_to_ancestor_ids = doc_id_to_ancestor_ids
        self._id_to_boost_map = id_to_boost_map
        self._no_access = no_access
        self._tenant_id = tenant_id
        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt

    def enrich_chunk(
        self, chunk: IndexChunk, score: float
    ) -> DocMetadataAwareIndexChunk:
        return DocMetadataAwareIndexChunk.from_index_chunk(
            index_chunk=chunk,
            access=self._doc_id_to_access_info.get(
                chunk.source_document.id, self._no_access
            ),
            document_sets=set(
                self._doc_id_to_document_set.get(chunk.source_document.id, [])
            ),
            user_project=[],
            personas=[],
            boost=(
                self._id_to_boost_map[chunk.source_document.id]
                if chunk.source_document.id in self._id_to_boost_map
                else DEFAULT_BOOST
            ),
            tenant_id=self._tenant_id,
            aggregated_chunk_boost_factor=score,
            ancestor_hierarchy_node_ids=self._doc_id_to_ancestor_ids[
                chunk.source_document.id
            ],
        )


================================================
FILE: backend/onyx/indexing/adapters/user_file_indexing_adapter.py
================================================
from __future__ import annotations

import contextlib
import datetime
import time
from collections import defaultdict
from collections.abc import Generator
from uuid import UUID

from sqlalchemy import select
from sqlalchemy.exc import OperationalError
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
from sqlalchemy.orm.session import TransactionalContext

from onyx.access.access import get_access_for_user_files
from onyx.access.models import DocumentAccess
from onyx.configs.constants import DEFAULT_BOOST
from onyx.configs.constants import NotificationType
from onyx.connectors.models import Document
from onyx.db.enums import UserFileStatus
from onyx.db.models import Persona
from onyx.db.models import UserFile
from onyx.db.notification import create_notification
from onyx.db.user_file import fetch_chunk_counts_for_user_files
from onyx.db.user_file import fetch_persona_ids_for_user_files
from onyx.db.user_file import fetch_user_project_ids_for_user_files
from onyx.file_store.utils import store_user_file_plaintext
from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from onyx.indexing.models import ChunkEnrichmentContext
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.indexing.models import UpdatableChunkData
from onyx.llm.factory import get_default_llm
from onyx.natural_language_processing.utils import count_tokens
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.utils.logger import setup_logger

logger = setup_logger()

_NUM_LOCK_ATTEMPTS = 3
retry_delay = 0.5


def _acquire_user_file_locks(db_session: Session, user_file_ids: list[str]) -> bool:
    """Acquire locks for the specified user files."""
    # Convert to UUIDs for the DB comparison
    user_file_uuid_list = [UUID(user_file_id) for user_file_id in user_file_ids]
    stmt = (
        select(UserFile.id)
        .where(UserFile.id.in_(user_file_uuid_list))
        .with_for_update(nowait=True)
    )
    # will raise exception if any of the documents are already locked
    documents = db_session.scalars(stmt).all()

    # make sure we found every document
    if len(documents) != len(set(user_file_ids)):
        logger.warning("Didn't find row for all specified user file IDs. Aborting.")
        return False

    return True


class UserFileIndexingAdapter:
    def __init__(self, tenant_id: str, db_session: Session):
        self.tenant_id = tenant_id
        self.db_session = db_session

    def prepare(
        self,
        documents: list[Document],
        ignore_time_skip: bool,  # noqa: ARG002
    ) -> DocumentBatchPrepareContext:
        return DocumentBatchPrepareContext(
            updatable_docs=documents,
            id_to_boost_map={},  # TODO(subash): add boost map
        )

    @contextlib.contextmanager
    def lock_context(
        self, documents: list[Document]
    ) -> Generator[TransactionalContext, None, None]:
        self.db_session.commit()  # ensure that we're not in a transaction
        lock_acquired = False
        for i in range(_NUM_LOCK_ATTEMPTS):
            try:
                with self.db_session.begin() as transaction:
                    lock_acquired = _acquire_user_file_locks(
                        db_session=self.db_session,
                        user_file_ids=[doc.id for doc in documents],
                    )
                    if lock_acquired:
                        yield transaction
                        break
            except OperationalError as e:
                logger.warning(
                    f"Failed to acquire locks for user files on attempt {i}, retrying. Error: {e}"
                )

            time.sleep(retry_delay)

        if not lock_acquired:
            raise RuntimeError(
                f"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts for user files: {[doc.id for doc in documents]}"
            )

    def prepare_enrichment(
        self,
        context: DocumentBatchPrepareContext,
        tenant_id: str,
        chunks: list[DocAwareChunk],
    ) -> UserFileChunkEnricher:
        """Do all DB lookups and pre-compute file metadata from chunks."""
        updatable_ids = [doc.id for doc in context.updatable_docs]

        doc_id_to_new_chunk_cnt: dict[str, int] = defaultdict(int)
        content_by_file: dict[str, list[str]] = defaultdict(list)
        for chunk in chunks:
            doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
            content_by_file[chunk.source_document.id].append(chunk.content)

        no_access = DocumentAccess.build(
            user_emails=[],
            user_groups=[],
            external_user_emails=[],
            external_user_group_ids=[],
            is_public=False,
        )

        user_file_id_to_project_ids = fetch_user_project_ids_for_user_files(
            user_file_ids=updatable_ids,
            db_session=self.db_session,
        )
        user_file_id_to_persona_ids = fetch_persona_ids_for_user_files(
            user_file_ids=updatable_ids,
            db_session=self.db_session,
        )
        user_file_id_to_access: dict[str, DocumentAccess] = get_access_for_user_files(
            user_file_ids=updatable_ids,
            db_session=self.db_session,
        )
        user_file_id_to_previous_chunk_cnt: dict[str, int] = {
            user_file_id: chunk_count
            for user_file_id, chunk_count in fetch_chunk_counts_for_user_files(
                user_file_ids=updatable_ids,
                db_session=self.db_session,
            )
        }

        # Initialize tokenizer used for token count calculation
        try:
            llm = get_default_llm()
            llm_tokenizer = get_tokenizer(
                model_name=llm.config.model_name,
                provider_type=llm.config.model_provider,
            )
        except Exception as e:
            logger.error(f"Error getting tokenizer: {e}")
            llm_tokenizer = None

        user_file_id_to_raw_text: dict[str, str] = {}
        user_file_id_to_token_count: dict[str, int | None] = {}
        for user_file_id in updatable_ids:
            contents = content_by_file.get(user_file_id)
            if contents:
                combined_content = " ".join(contents)
                user_file_id_to_raw_text[str(user_file_id)] = combined_content
                token_count: int = (
                    count_tokens(combined_content, llm_tokenizer)
                    if llm_tokenizer
                    else 0
                )
                user_file_id_to_token_count[str(user_file_id)] = token_count
            else:
                user_file_id_to_raw_text[str(user_file_id)] = ""
                user_file_id_to_token_count[str(user_file_id)] = None

        return UserFileChunkEnricher(
            user_file_id_to_access=user_file_id_to_access,
            user_file_id_to_project_ids=user_file_id_to_project_ids,
            user_file_id_to_persona_ids=user_file_id_to_persona_ids,
            doc_id_to_previous_chunk_cnt=user_file_id_to_previous_chunk_cnt,
            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
            user_file_id_to_raw_text=user_file_id_to_raw_text,
            user_file_id_to_token_count=user_file_id_to_token_count,
            no_access=no_access,
            tenant_id=tenant_id,
        )

    def _notify_assistant_owners_if_files_ready(
        self, user_files: list[UserFile]
    ) -> None:
        """
        Check if all files for associated assistants are processed and notify owners.
        Only sends notification when all files for an assistant are COMPLETED.
        """
        for user_file in user_files:
            if user_file.status == UserFileStatus.COMPLETED:
                for assistant in user_file.assistants:
                    # Skip assistants without owners
                    if assistant.user_id is None:
                        continue

                    # Check if all OTHER files for this assistant are completed
                    # (we already know current file is completed from the outer check)
                    all_files_completed = all(
                        f.status == UserFileStatus.COMPLETED
                        for f in assistant.user_files
                        if f.id != user_file.id
                    )

                    if all_files_completed:
                        create_notification(
                            user_id=assistant.user_id,
                            notif_type=NotificationType.ASSISTANT_FILES_READY,
                            db_session=self.db_session,
                            title="Your files are ready!",
                            description=f"All files for agent {assistant.name} have been processed and are now available.",
                            additional_data={
                                "persona_id": assistant.id,
                                "link": f"/assistants/{assistant.id}",
                            },
                            autocommit=False,
                        )

    def post_index(
        self,
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],  # noqa: ARG002
        filtered_documents: list[Document],  # noqa: ARG002
        enrichment: ChunkEnrichmentContext,
    ) -> None:
        assert isinstance(enrichment, UserFileChunkEnricher)
        user_file_ids = [doc.id for doc in context.updatable_docs]

        user_files = (
            self.db_session.query(UserFile)
            .options(selectinload(UserFile.assistants).selectinload(Persona.user_files))
            .filter(UserFile.id.in_(user_file_ids))
            .all()
        )
        for user_file in user_files:
            # don't update the status if the user file is being deleted
            if user_file.status != UserFileStatus.DELETING:
                user_file.status = UserFileStatus.COMPLETED
            user_file.last_project_sync_at = datetime.datetime.now(
                datetime.timezone.utc
            )
            user_file.chunk_count = enrichment.doc_id_to_new_chunk_cnt.get(
                str(user_file.id), 0
            )
            user_file.token_count = enrichment.user_file_id_to_token_count[
                str(user_file.id)
            ]

        # Notify assistant owners if all their files are now processed
        self._notify_assistant_owners_if_files_ready(user_files)

        self.db_session.commit()

        # Store the plaintext in the file store for faster retrieval
        # NOTE: this creates its own session to avoid committing the overall
        # transaction.
        for user_file_id, raw_text in enrichment.user_file_id_to_raw_text.items():
            store_user_file_plaintext(
                user_file_id=UUID(user_file_id),
                plaintext_content=raw_text,
            )


class UserFileChunkEnricher:
    """Pre-computed metadata for per-chunk enrichment of user-uploaded files."""

    def __init__(
        self,
        user_file_id_to_access: dict[str, DocumentAccess],
        user_file_id_to_project_ids: dict[str, list[int]],
        user_file_id_to_persona_ids: dict[str, list[int]],
        doc_id_to_previous_chunk_cnt: dict[str, int],
        doc_id_to_new_chunk_cnt: dict[str, int],
        user_file_id_to_raw_text: dict[str, str],
        user_file_id_to_token_count: dict[str, int | None],
        no_access: DocumentAccess,
        tenant_id: str,
    ) -> None:
        self._user_file_id_to_access = user_file_id_to_access
        self._user_file_id_to_project_ids = user_file_id_to_project_ids
        self._user_file_id_to_persona_ids = user_file_id_to_persona_ids
        self._no_access = no_access
        self._tenant_id = tenant_id
        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
        self.user_file_id_to_raw_text = user_file_id_to_raw_text
        self.user_file_id_to_token_count = user_file_id_to_token_count

    def enrich_chunk(
        self, chunk: IndexChunk, score: float
    ) -> DocMetadataAwareIndexChunk:
        return DocMetadataAwareIndexChunk.from_index_chunk(
            index_chunk=chunk,
            access=self._user_file_id_to_access.get(
                chunk.source_document.id, self._no_access
            ),
            document_sets=set(),
            user_project=self._user_file_id_to_project_ids.get(
                chunk.source_document.id, []
            ),
            personas=self._user_file_id_to_persona_ids.get(
                chunk.source_document.id, []
            ),
            boost=DEFAULT_BOOST,
            tenant_id=self._tenant_id,
            aggregated_chunk_boost_factor=score,
        )


================================================
FILE: backend/onyx/indexing/chunk_batch_store.py
================================================
import pickle
import shutil
import tempfile
from collections.abc import Iterator
from pathlib import Path

from onyx.indexing.models import IndexChunk


class ChunkBatchStore:
    """Manages serialization of embedded chunks to a temporary directory.

    Owns the temp directory lifetime and provides save/load/stream/scrub
    operations.

    Use as a context manager to ensure cleanup::

        with ChunkBatchStore() as store:
            store.save(chunks, batch_idx=0)
            for chunk in store.stream():
                ...
    """

    _EXT = ".pkl"

    def __init__(self) -> None:
        self._tmpdir: Path | None = None

    # -- context manager -----------------------------------------------------

    def __enter__(self) -> "ChunkBatchStore":
        self._tmpdir = Path(tempfile.mkdtemp(prefix="onyx_embeddings_"))
        return self

    def __exit__(self, *_exc: object) -> None:
        if self._tmpdir is not None:
            shutil.rmtree(self._tmpdir, ignore_errors=True)
            self._tmpdir = None

    @property
    def _dir(self) -> Path:
        assert self._tmpdir is not None, "ChunkBatchStore used outside context manager"
        return self._tmpdir

    # -- storage primitives --------------------------------------------------

    def save(self, chunks: list[IndexChunk], batch_idx: int) -> None:
        """Serialize a batch of embedded chunks to disk."""
        with open(self._dir / f"batch_{batch_idx}{self._EXT}", "wb") as f:
            pickle.dump(chunks, f)

    def _load(self, batch_file: Path) -> list[IndexChunk]:
        """Deserialize a batch of embedded chunks from a file."""
        with open(batch_file, "rb") as f:
            return pickle.load(f)

    def _batch_files(self) -> list[Path]:
        """Return batch files sorted by numeric index."""
        return sorted(
            self._dir.glob(f"batch_*{self._EXT}"),
            key=lambda p: int(p.stem.removeprefix("batch_")),
        )

    # -- higher-level operations ---------------------------------------------

    def stream(self) -> Iterator[IndexChunk]:
        """Yield all chunks across all batch files.

        Each call returns a fresh generator, so the data can be iterated
        multiple times (e.g. once per document index).
        """
        for batch_file in self._batch_files():
            yield from self._load(batch_file)

    def scrub_failed_docs(self, failed_doc_ids: set[str]) -> None:
        """Remove chunks belonging to *failed_doc_ids* from all batch files.

        When a document fails embedding in batch N, earlier batches may
        already contain successfully embedded chunks for that document.
        This ensures the output is all-or-nothing per document.
        """
        for batch_file in self._batch_files():
            batch_chunks = self._load(batch_file)
            cleaned = [
                c for c in batch_chunks if c.source_document.id not in failed_doc_ids
            ]
            if len(cleaned) != len(batch_chunks):
                with open(batch_file, "wb") as f:
                    pickle.dump(cleaned, f)


================================================
FILE: backend/onyx/indexing/chunker.py
================================================
from typing import cast

from chonkie import SentenceChunker

from onyx.configs.app_configs import AVERAGE_SUMMARY_EMBEDDINGS
from onyx.configs.app_configs import BLURB_SIZE
from onyx.configs.app_configs import LARGE_CHUNK_RATIO
from onyx.configs.app_configs import MINI_CHUNK_SIZE
from onyx.configs.app_configs import SKIP_METADATA_IN_CHUNK
from onyx.configs.app_configs import USE_CHUNK_SUMMARY
from onyx.configs.app_configs import USE_DOCUMENT_SUMMARY
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import RETURN_SEPARATOR
from onyx.configs.constants import SECTION_SEPARATOR
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_metadata_keys_to_ignore,
)
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import Section
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.models import DocAwareChunk
from onyx.llm.utils import MAX_CONTEXT_TOKENS
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.utils.logger import setup_logger
from onyx.utils.text_processing import clean_text
from onyx.utils.text_processing import shared_precompare_cleanup
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE
from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT

# Not supporting overlaps, we need a clean combination of chunks and it is unclear if overlaps
# actually help quality at all
CHUNK_OVERLAP = 0
# Fairly arbitrary numbers but the general concept is we don't want the title/metadata to
# overwhelm the actual contents of the chunk
MAX_METADATA_PERCENTAGE = 0.25
CHUNK_MIN_CONTENT = 256

logger = setup_logger()


def _get_metadata_suffix_for_document_index(
    metadata: dict[str, str | list[str]], include_separator: bool = False
) -> tuple[str, str]:
    """
    Returns the metadata as a natural language string representation with all of the keys and values
    for the vector embedding and a string of all of the values for the keyword search.
    """
    if not metadata:
        return "", ""

    metadata_str = "Metadata:\n"
    metadata_values = []
    for key, value in metadata.items():
        if key in get_metadata_keys_to_ignore():
            continue

        value_str = ", ".join(value) if isinstance(value, list) else value

        if isinstance(value, list):
            metadata_values.extend(value)
        else:
            metadata_values.append(value)

        metadata_str += f"\t{key} - {value_str}\n"

    metadata_semantic = metadata_str.strip()
    metadata_keyword = " ".join(metadata_values)

    if include_separator:
        return RETURN_SEPARATOR + metadata_semantic, RETURN_SEPARATOR + metadata_keyword
    return metadata_semantic, metadata_keyword


def _combine_chunks(chunks: list[DocAwareChunk], large_chunk_id: int) -> DocAwareChunk:
    """
    Combines multiple DocAwareChunks into one large chunk (for "multipass" mode),
    appending the content and adjusting source_links accordingly.
    """
    merged_chunk = DocAwareChunk(
        source_document=chunks[0].source_document,
        chunk_id=chunks[0].chunk_id,
        blurb=chunks[0].blurb,
        content=chunks[0].content,
        source_links=chunks[0].source_links or {},
        image_file_id=None,
        section_continuation=(chunks[0].chunk_id > 0),
        title_prefix=chunks[0].title_prefix,
        metadata_suffix_semantic=chunks[0].metadata_suffix_semantic,
        metadata_suffix_keyword=chunks[0].metadata_suffix_keyword,
        large_chunk_reference_ids=[chunk.chunk_id for chunk in chunks],
        mini_chunk_texts=None,
        large_chunk_id=large_chunk_id,
        chunk_context="",
        doc_summary="",
        contextual_rag_reserved_tokens=0,
    )

    offset = 0
    for i in range(1, len(chunks)):
        merged_chunk.content += SECTION_SEPARATOR + chunks[i].content

        offset += len(SECTION_SEPARATOR) + len(chunks[i - 1].content)
        for link_offset, link_text in (chunks[i].source_links or {}).items():
            if merged_chunk.source_links is None:
                merged_chunk.source_links = {}
            merged_chunk.source_links[link_offset + offset] = link_text

    return merged_chunk


def generate_large_chunks(chunks: list[DocAwareChunk]) -> list[DocAwareChunk]:
    """
    Generates larger "grouped" chunks by combining sets of smaller chunks.
    """
    large_chunks = []
    for idx, i in enumerate(range(0, len(chunks), LARGE_CHUNK_RATIO)):
        chunk_group = chunks[i : i + LARGE_CHUNK_RATIO]
        if len(chunk_group) > 1:
            large_chunk = _combine_chunks(chunk_group, idx)
            large_chunks.append(large_chunk)
    return large_chunks


class Chunker:
    """
    Chunks documents into smaller chunks for indexing.
    """

    def __init__(
        self,
        tokenizer: BaseTokenizer,
        enable_multipass: bool = False,
        enable_large_chunks: bool = False,
        enable_contextual_rag: bool = False,
        blurb_size: int = BLURB_SIZE,
        include_metadata: bool = not SKIP_METADATA_IN_CHUNK,
        chunk_token_limit: int = DOC_EMBEDDING_CONTEXT_SIZE,
        chunk_overlap: int = CHUNK_OVERLAP,
        mini_chunk_size: int = MINI_CHUNK_SIZE,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> None:
        self.include_metadata = include_metadata
        self.chunk_token_limit = chunk_token_limit
        self.enable_multipass = enable_multipass
        self.enable_large_chunks = enable_large_chunks
        self.enable_contextual_rag = enable_contextual_rag
        if enable_contextual_rag:
            assert (
                USE_CHUNK_SUMMARY or USE_DOCUMENT_SUMMARY
            ), "Contextual RAG requires at least one of chunk summary and document summary enabled"
        self.default_contextual_rag_reserved_tokens = MAX_CONTEXT_TOKENS * (
            int(USE_CHUNK_SUMMARY) + int(USE_DOCUMENT_SUMMARY)
        )
        self.tokenizer = tokenizer
        self.callback = callback

        self.max_context = 0
        self.prompt_tokens = 0

        # Create a token counter function that returns the count instead of the tokens
        def token_counter(text: str) -> int:
            return len(tokenizer.encode(text))

        self.blurb_splitter = SentenceChunker(
            tokenizer_or_token_counter=token_counter,
            chunk_size=blurb_size,
            chunk_overlap=0,
            return_type="texts",
        )

        self.chunk_splitter = SentenceChunker(
            tokenizer_or_token_counter=token_counter,
            chunk_size=chunk_token_limit,
            chunk_overlap=chunk_overlap,
            return_type="texts",
        )

        self.mini_chunk_splitter = (
            SentenceChunker(
                tokenizer_or_token_counter=token_counter,
                chunk_size=mini_chunk_size,
                chunk_overlap=0,
                return_type="texts",
            )
            if enable_multipass
            else None
        )

    def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]:
        """
        Splits the text into smaller chunks based on token count to ensure
        no chunk exceeds the content_token_limit.
        """
        tokens = self.tokenizer.tokenize(text)
        chunks = []
        start = 0
        total_tokens = len(tokens)
        while start < total_tokens:
            end = min(start + content_token_limit, total_tokens)
            token_chunk = tokens[start:end]
            chunk_text = " ".join(token_chunk)
            chunks.append(chunk_text)
            start = end
        return chunks

    def _extract_blurb(self, text: str) -> str:
        """
        Extract a short blurb from the text (first chunk of size `blurb_size`).
        """
        # chunker is in `text` mode
        texts = cast(list[str], self.blurb_splitter.chunk(text))
        if not texts:
            return ""
        return texts[0]

    def _get_mini_chunk_texts(self, chunk_text: str) -> list[str] | None:
        """
        For "multipass" mode: additional sub-chunks (mini-chunks) for use in certain embeddings.
        """
        if self.mini_chunk_splitter and chunk_text.strip():
            # chunker is in `text` mode
            return cast(list[str], self.mini_chunk_splitter.chunk(chunk_text))
        return None

    # ADDED: extra param image_url to store in the chunk
    def _create_chunk(
        self,
        document: IndexingDocument,
        chunks_list: list[DocAwareChunk],
        text: str,
        links: dict[int, str],
        is_continuation: bool = False,
        title_prefix: str = "",
        metadata_suffix_semantic: str = "",
        metadata_suffix_keyword: str = "",
        image_file_id: str | None = None,
    ) -> None:
        """
        Helper to create a new DocAwareChunk, append it to chunks_list.
        """
        new_chunk = DocAwareChunk(
            source_document=document,
            chunk_id=len(chunks_list),
            blurb=self._extract_blurb(text),
            content=text,
            source_links=links or {0: ""},
            image_file_id=image_file_id,
            section_continuation=is_continuation,
            title_prefix=title_prefix,
            metadata_suffix_semantic=metadata_suffix_semantic,
            metadata_suffix_keyword=metadata_suffix_keyword,
            mini_chunk_texts=self._get_mini_chunk_texts(text),
            large_chunk_id=None,
            doc_summary="",
            chunk_context="",
            contextual_rag_reserved_tokens=0,  # set per-document in _handle_single_document
        )
        chunks_list.append(new_chunk)

    def _chunk_document_with_sections(
        self,
        document: IndexingDocument,
        sections: list[Section],
        title_prefix: str,
        metadata_suffix_semantic: str,
        metadata_suffix_keyword: str,
        content_token_limit: int,
    ) -> list[DocAwareChunk]:
        """
        Loops through sections of the document, converting them into one or more chunks.
        Works with processed sections that are base Section objects.
        """
        chunks: list[DocAwareChunk] = []
        link_offsets: dict[int, str] = {}
        chunk_text = ""

        for section_idx, section in enumerate(sections):
            # Get section text and other attributes
            section_text = clean_text(str(section.text or ""))
            section_link_text = section.link or ""
            image_url = section.image_file_id

            # If there is no useful content, skip
            if not section_text and (not document.title or section_idx > 0):
                logger.warning(
                    f"Skipping empty or irrelevant section in doc {document.semantic_identifier}, link={section_link_text}"
                )
                continue

            # CASE 1: If this section has an image, force a separate chunk
            if image_url:
                # First, if we have any partially built text chunk, finalize it
                if chunk_text.strip():
                    self._create_chunk(
                        document,
                        chunks,
                        chunk_text,
                        link_offsets,
                        is_continuation=False,
                        title_prefix=title_prefix,
                        metadata_suffix_semantic=metadata_suffix_semantic,
                        metadata_suffix_keyword=metadata_suffix_keyword,
                    )
                    chunk_text = ""
                    link_offsets = {}

                # Create a chunk specifically for this image section
                # (Using the text summary that was generated during processing)
                self._create_chunk(
                    document,
                    chunks,
                    section_text,
                    links={0: section_link_text} if section_link_text else {},
                    image_file_id=image_url,
                    title_prefix=title_prefix,
                    metadata_suffix_semantic=metadata_suffix_semantic,
                    metadata_suffix_keyword=metadata_suffix_keyword,
                )
                # Continue to next section
                continue

            # CASE 2: Normal text section
            section_token_count = len(self.tokenizer.encode(section_text))

            # If the section is large on its own, split it separately
            if section_token_count > content_token_limit:
                if chunk_text.strip():
                    self._create_chunk(
                        document,
                        chunks,
                        chunk_text,
                        link_offsets,
                        False,
                        title_prefix,
                        metadata_suffix_semantic,
                        metadata_suffix_keyword,
                    )
                    chunk_text = ""
                    link_offsets = {}

                # chunker is in `text` mode
                split_texts = cast(list[str], self.chunk_splitter.chunk(section_text))
                for i, split_text in enumerate(split_texts):
                    # If even the split_text is bigger than strict limit, further split
                    if (
                        STRICT_CHUNK_TOKEN_LIMIT
                        and len(self.tokenizer.encode(split_text)) > content_token_limit
                    ):
                        smaller_chunks = self._split_oversized_chunk(
                            split_text, content_token_limit
                        )
                        for j, small_chunk in enumerate(smaller_chunks):
                            self._create_chunk(
                                document,
                                chunks,
                                small_chunk,
                                {0: section_link_text},
                                is_continuation=(j != 0),
                                title_prefix=title_prefix,
                                metadata_suffix_semantic=metadata_suffix_semantic,
                                metadata_suffix_keyword=metadata_suffix_keyword,
                            )
                    else:
                        self._create_chunk(
                            document,
                            chunks,
                            split_text,
                            {0: section_link_text},
                            is_continuation=(i != 0),
                            title_prefix=title_prefix,
                            metadata_suffix_semantic=metadata_suffix_semantic,
                            metadata_suffix_keyword=metadata_suffix_keyword,
                        )
                continue

            # If we can still fit this section into the current chunk, do so
            current_token_count = len(self.tokenizer.encode(chunk_text))
            current_offset = len(shared_precompare_cleanup(chunk_text))
            next_section_tokens = (
                len(self.tokenizer.encode(SECTION_SEPARATOR)) + section_token_count
            )

            if next_section_tokens + current_token_count <= content_token_limit:
                if chunk_text:
                    chunk_text += SECTION_SEPARATOR
                chunk_text += section_text
                link_offsets[current_offset] = section_link_text
            else:
                # finalize the existing chunk
                self._create_chunk(
                    document,
                    chunks,
                    chunk_text,
                    link_offsets,
                    False,
                    title_prefix,
                    metadata_suffix_semantic,
                    metadata_suffix_keyword,
                )
                # start a new chunk
                link_offsets = {0: section_link_text}
                chunk_text = section_text

        # finalize any leftover text chunk
        if chunk_text.strip() or not chunks:
            self._create_chunk(
                document,
                chunks,
                chunk_text,
                link_offsets or {0: ""},  # safe default
                False,
                title_prefix,
                metadata_suffix_semantic,
                metadata_suffix_keyword,
            )
        return chunks

    def _handle_single_document(
        self, document: IndexingDocument
    ) -> list[DocAwareChunk]:
        # Specifically for reproducing an issue with gmail
        if document.source == DocumentSource.GMAIL:
            logger.debug(f"Chunking {document.semantic_identifier}")

        # Title prep
        title = self._extract_blurb(document.get_title_for_document_index() or "")
        title_prefix = title + RETURN_SEPARATOR if title else ""
        title_tokens = len(self.tokenizer.encode(title_prefix))

        # Metadata prep
        metadata_suffix_semantic = ""
        metadata_suffix_keyword = ""
        metadata_tokens = 0
        if self.include_metadata:
            (
                metadata_suffix_semantic,
                metadata_suffix_keyword,
            ) = _get_metadata_suffix_for_document_index(
                document.metadata, include_separator=True
            )
            metadata_tokens = len(self.tokenizer.encode(metadata_suffix_semantic))

        # If metadata is too large, skip it in the semantic content
        if metadata_tokens >= self.chunk_token_limit * MAX_METADATA_PERCENTAGE:
            metadata_suffix_semantic = ""
            metadata_tokens = 0

        single_chunk_fits = True
        doc_token_count = 0
        if self.enable_contextual_rag:
            doc_content = document.get_text_content()
            tokenized_doc = self.tokenizer.tokenize(doc_content)
            doc_token_count = len(tokenized_doc)

            # check if doc + title + metadata fits in a single chunk. If so, no need for contextual RAG
            single_chunk_fits = (
                doc_token_count + title_tokens + metadata_tokens
                <= self.chunk_token_limit
            )

        # expand the size of the context used for contextual rag based on whether chunk context and doc summary are used
        context_size = 0
        if (
            self.enable_contextual_rag
            and not single_chunk_fits
            and not AVERAGE_SUMMARY_EMBEDDINGS
        ):
            context_size += self.default_contextual_rag_reserved_tokens

        # Adjust content token limit to accommodate title + metadata
        content_token_limit = (
            self.chunk_token_limit - title_tokens - metadata_tokens - context_size
        )

        # first check: if there is not enough actual chunk content when including contextual rag,
        # then don't do contextual rag
        if content_token_limit <= CHUNK_MIN_CONTENT:
            context_size = 0  # Don't do contextual RAG
            # revert to previous content token limit
            content_token_limit = (
                self.chunk_token_limit - title_tokens - metadata_tokens
            )

        # If there is not enough context remaining then just index the chunk with no prefix/suffix
        if content_token_limit <= CHUNK_MIN_CONTENT:
            # Not enough space left, so revert to full chunk without the prefix
            content_token_limit = self.chunk_token_limit
            title_prefix = ""
            metadata_suffix_semantic = ""

        # Use processed_sections if available (IndexingDocument), otherwise use original sections
        sections_to_chunk = document.processed_sections

        normal_chunks = self._chunk_document_with_sections(
            document,
            sections_to_chunk,
            title_prefix,
            metadata_suffix_semantic,
            metadata_suffix_keyword,
            content_token_limit,
        )

        # Optional "multipass" large chunk creation
        if self.enable_multipass and self.enable_large_chunks:
            large_chunks = generate_large_chunks(normal_chunks)
            normal_chunks.extend(large_chunks)

        for chunk in normal_chunks:
            chunk.contextual_rag_reserved_tokens = context_size

        return normal_chunks

    def chunk(self, documents: list[IndexingDocument]) -> list[DocAwareChunk]:
        """
        Takes in a list of documents and chunks them into smaller chunks for indexing
        while persisting the document metadata.

        Works with both standard Document objects and IndexingDocument objects with processed_sections.
        """
        final_chunks: list[DocAwareChunk] = []
        for document in documents:
            if self.callback and self.callback.should_stop():
                raise RuntimeError("Chunker.chunk: Stop signal detected")

            chunks = self._handle_single_document(document)
            final_chunks.extend(chunks)

            if self.callback:
                self.callback.progress("Chunker.chunk", len(chunks))

        return final_chunks


================================================
FILE: backend/onyx/indexing/content_classification.py
================================================


================================================
FILE: backend/onyx/indexing/embedder.py
================================================
import time
from abc import ABC
from abc import abstractmethod
from collections import defaultdict

from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorStopSignal
from onyx.connectors.models import DocumentFailure
from onyx.db.models import SearchSettings
from onyx.document_index.chunk_content_enrichment import (
    generate_enriched_content_for_chunk_embedding,
)
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import IndexChunk
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.utils.logger import setup_logger
from onyx.utils.pydantic_util import shallow_model_dump
from onyx.utils.timing import log_function_time
from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
from shared_configs.configs import INDEXING_MODEL_SERVER_PORT
from shared_configs.enums import EmbeddingProvider
from shared_configs.enums import EmbedTextType
from shared_configs.model_server_models import Embedding


logger = setup_logger()


class IndexingEmbedder(ABC):
    """Converts chunks into chunks with embeddings. Note that one chunk may have
    multiple embeddings associated with it."""

    def __init__(
        self,
        model_name: str,
        normalize: bool,
        query_prefix: str | None,
        passage_prefix: str | None,
        provider_type: EmbeddingProvider | None,
        api_key: str | None,
        api_url: str | None,
        api_version: str | None,
        deployment_name: str | None,
        reduced_dimension: int | None,
        callback: IndexingHeartbeatInterface | None,
    ):
        self.model_name = model_name
        self.normalize = normalize
        self.query_prefix = query_prefix
        self.passage_prefix = passage_prefix
        self.provider_type = provider_type
        self.api_key = api_key
        self.api_url = api_url
        self.api_version = api_version
        self.deployment_name = deployment_name

        self.embedding_model = EmbeddingModel(
            model_name=model_name,
            query_prefix=query_prefix,
            passage_prefix=passage_prefix,
            normalize=normalize,
            api_key=api_key,
            provider_type=provider_type,
            api_url=api_url,
            api_version=api_version,
            deployment_name=deployment_name,
            reduced_dimension=reduced_dimension,
            # The below are globally set, this flow always uses the indexing one
            server_host=INDEXING_MODEL_SERVER_HOST,
            server_port=INDEXING_MODEL_SERVER_PORT,
            retrim_content=True,
            callback=callback,
        )

    @abstractmethod
    def embed_chunks(
        self,
        chunks: list[DocAwareChunk],
        tenant_id: str | None = None,
        request_id: str | None = None,
    ) -> list[IndexChunk]:
        raise NotImplementedError


class DefaultIndexingEmbedder(IndexingEmbedder):
    def __init__(
        self,
        model_name: str,
        normalize: bool,
        query_prefix: str | None,
        passage_prefix: str | None,
        provider_type: EmbeddingProvider | None = None,
        api_key: str | None = None,
        api_url: str | None = None,
        api_version: str | None = None,
        deployment_name: str | None = None,
        reduced_dimension: int | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ):
        super().__init__(
            model_name,
            normalize,
            query_prefix,
            passage_prefix,
            provider_type,
            api_key,
            api_url,
            api_version,
            deployment_name,
            reduced_dimension,
            callback,
        )

    @log_function_time()
    def embed_chunks(
        self,
        chunks: list[DocAwareChunk],
        tenant_id: str | None = None,
        request_id: str | None = None,
    ) -> list[IndexChunk]:
        """Adds embeddings to the chunks, the title and metadata suffixes are added to the chunk as well
        if they exist. If there is no space for it, it would have been thrown out at the chunking step.
        """
        # All chunks at this point must have some non-empty content
        flat_chunk_texts: list[str] = []
        large_chunks_present = False
        for chunk in chunks:
            if chunk.large_chunk_reference_ids:
                large_chunks_present = True
            chunk_text = (
                generate_enriched_content_for_chunk_embedding(chunk)
            ) or chunk.source_document.get_title_for_document_index()

            if not chunk_text:
                # This should never happen, the document would have been dropped
                # before getting to this point
                raise ValueError(f"Chunk has no content: {chunk.to_short_descriptor()}")

            flat_chunk_texts.append(chunk_text)

            if chunk.mini_chunk_texts:
                if chunk.large_chunk_reference_ids:
                    # A large chunk does not contain mini chunks, if it matches the large chunk
                    # with a high score, then mini chunks would not be used anyway
                    # otherwise it should match the normal chunk
                    raise RuntimeError("Large chunk contains mini chunks")
                flat_chunk_texts.extend(chunk.mini_chunk_texts)

        embeddings = self.embedding_model.encode(
            texts=flat_chunk_texts,
            text_type=EmbedTextType.PASSAGE,
            large_chunks_present=large_chunks_present,
            tenant_id=tenant_id,
            request_id=request_id,
        )

        chunk_titles = {
            chunk.source_document.get_title_for_document_index() for chunk in chunks
        }

        # Drop any None or empty strings
        # If there is no title or the title is empty, the title embedding field will be null
        # which is ok, it just won't contribute at all to the scoring.
        chunk_titles_list = [title for title in chunk_titles if title]

        # Cache the Title embeddings to only have to do it once
        title_embed_dict: dict[str, Embedding] = {}
        if chunk_titles_list:
            title_embeddings = self.embedding_model.encode(
                chunk_titles_list,
                text_type=EmbedTextType.PASSAGE,
                tenant_id=tenant_id,
                request_id=request_id,
            )
            title_embed_dict.update(
                {
                    title: vector
                    for title, vector in zip(chunk_titles_list, title_embeddings)
                }
            )

        # Mapping embeddings to chunks
        embedded_chunks: list[IndexChunk] = []
        embedding_ind_start = 0
        for chunk in chunks:
            num_embeddings = 1 + (
                len(chunk.mini_chunk_texts) if chunk.mini_chunk_texts else 0
            )
            chunk_embeddings = embeddings[
                embedding_ind_start : embedding_ind_start + num_embeddings
            ]

            title = chunk.source_document.get_title_for_document_index()

            title_embedding = None
            if title:
                if title in title_embed_dict:
                    # Using cached value to avoid recalculating for every chunk
                    title_embedding = title_embed_dict[title]
                else:
                    logger.error(
                        "Title had to be embedded separately, this should not happen!"
                    )
                    title_embedding = self.embedding_model.encode(
                        [title],
                        text_type=EmbedTextType.PASSAGE,
                        tenant_id=tenant_id,
                        request_id=request_id,
                    )[0]
                    title_embed_dict[title] = title_embedding

            new_embedded_chunk = IndexChunk.model_construct(
                **shallow_model_dump(chunk),
                embeddings=ChunkEmbedding(
                    full_embedding=chunk_embeddings[0],
                    mini_chunk_embeddings=chunk_embeddings[1:],
                ),
                title_embedding=title_embedding,
            )
            embedded_chunks.append(new_embedded_chunk)
            embedding_ind_start += num_embeddings

        return embedded_chunks

    @classmethod
    def from_db_search_settings(
        cls,
        search_settings: SearchSettings,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> "DefaultIndexingEmbedder":
        return cls(
            model_name=search_settings.model_name,
            normalize=search_settings.normalize,
            query_prefix=search_settings.query_prefix,
            passage_prefix=search_settings.passage_prefix,
            provider_type=search_settings.provider_type,
            api_key=search_settings.api_key,
            api_url=search_settings.api_url,
            api_version=search_settings.api_version,
            deployment_name=search_settings.deployment_name,
            reduced_dimension=search_settings.reduced_dimension,
            callback=callback,
        )


def embed_chunks_with_failure_handling(
    chunks: list[DocAwareChunk],
    embedder: IndexingEmbedder,
    tenant_id: str | None = None,
    request_id: str | None = None,
) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
    """Tries to embed all chunks in one large batch. If that batch fails for any reason,
    goes document by document to isolate the failure(s).
    """

    # TODO(rkuo): this doesn't disambiguate calls to the model server on retries.
    # Improve this if needed.

    # First try to embed all chunks in one batch
    try:
        return (
            embedder.embed_chunks(
                chunks=chunks, tenant_id=tenant_id, request_id=request_id
            ),
            [],
        )
    except ConnectorStopSignal as e:
        logger.warning(
            "Connector stop signal detected in embed_chunks_with_failure_handling"
        )
        raise e
    except Exception:
        logger.exception("Failed to embed chunk batch. Trying individual docs.")
        # wait a couple seconds to let any rate limits or temporary issues resolve
        time.sleep(2)

    # Try embedding each document's chunks individually
    chunks_by_doc: dict[str, list[DocAwareChunk]] = defaultdict(list)
    for chunk in chunks:
        chunks_by_doc[chunk.source_document.id].append(chunk)

    embedded_chunks: list[IndexChunk] = []
    failures: list[ConnectorFailure] = []

    for doc_id, chunks_for_doc in chunks_by_doc.items():
        try:
            doc_embedded_chunks = embedder.embed_chunks(
                chunks=chunks_for_doc, tenant_id=tenant_id, request_id=request_id
            )
            embedded_chunks.extend(doc_embedded_chunks)
        except Exception as e:
            logger.exception(f"Failed to embed chunks for document '{doc_id}'")
            failures.append(
                ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=doc_id,
                        document_link=(
                            chunks_for_doc[0].get_link() if chunks_for_doc else None
                        ),
                    ),
                    failure_message=str(e),
                    exception=e,
                )
            )

    return embedded_chunks, failures


================================================
FILE: backend/onyx/indexing/indexing_heartbeat.py
================================================
from abc import ABC
from abc import abstractmethod


class IndexingHeartbeatInterface(ABC):
    """Defines a callback interface to be passed to
    to run_indexing_entrypoint."""

    @abstractmethod
    def should_stop(self) -> bool:
        """Signal to stop the looping function in flight."""

    @abstractmethod
    def progress(self, tag: str, amount: int) -> None:
        """Send progress updates to the caller.
        Amount can be a positive number to indicate progress or <= 0
        just to act as a keep-alive.
        """


================================================
FILE: backend/onyx/indexing/indexing_pipeline.py
================================================
from collections import defaultdict
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterator
from contextlib import contextmanager
from typing import Protocol

from pydantic import BaseModel
from pydantic import ConfigDict
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_NAME
from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER
from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
from onyx.configs.app_configs import MAX_DOCUMENT_CHARS
from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
from onyx.configs.app_configs import USE_CHUNK_SUMMARY
from onyx.configs.app_configs import USE_DOCUMENT_SUMMARY
from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_experts_stores_representations,
)
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorStopSignal
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import ImageSection
from onyx.connectors.models import IndexAttemptMetadata
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import Section
from onyx.connectors.models import TextSection
from onyx.db.document import get_documents_by_ids
from onyx.db.document import upsert_document_by_connector_credential_pair
from onyx.db.document import upsert_documents
from onyx.db.enums import HookPoint
from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
from onyx.db.models import Document as DBDocument
from onyx.db.models import IndexModelStatus
from onyx.db.search_settings import get_active_search_settings
from onyx.db.tag import upsert_document_tags
from onyx.document_index.document_index_utils import (
    get_multipass_config,
)
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import DocumentInsertionRecord
from onyx.document_index.interfaces import DocumentMetadata
from onyx.document_index.interfaces import IndexBatchParams
from onyx.file_processing.image_summarization import summarize_image_with_error_handling
from onyx.file_store.file_store import get_default_file_store
from onyx.hooks.executor import execute_hook
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
from onyx.hooks.points.document_ingestion import DocumentIngestionOwner
from onyx.hooks.points.document_ingestion import DocumentIngestionPayload
from onyx.hooks.points.document_ingestion import DocumentIngestionResponse
from onyx.hooks.points.document_ingestion import DocumentIngestionSection
from onyx.indexing.chunk_batch_store import ChunkBatchStore
from onyx.indexing.chunker import Chunker
from onyx.indexing.embedder import embed_chunks_with_failure_handling
from onyx.indexing.embedder import IndexingEmbedder
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexingBatchAdapter
from onyx.indexing.models import UpdatableChunkData
from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
from onyx.llm.factory import get_default_llm_with_vision
from onyx.llm.factory import get_llm_for_contextual_rag
from onyx.llm.interfaces import LLM
from onyx.llm.models import UserMessage
from onyx.llm.multi_llm import LLMRateLimitError
from onyx.llm.utils import llm_response_to_string
from onyx.llm.utils import MAX_CONTEXT_TOKENS
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.natural_language_processing.utils import tokenizer_trim_middle
from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT1
from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT2
from onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_PROMPT
from onyx.utils.batching import batch_generator
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_documents_for_postgres
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.timing import log_function_time


logger = setup_logger()


class DocumentBatchPrepareContext(BaseModel):
    updatable_docs: list[Document]
    id_to_boost_map: dict[str, int]
    indexable_docs: list[IndexingDocument] = []
    model_config = ConfigDict(arbitrary_types_allowed=True)


class IndexingPipelineResult(BaseModel):
    # number of documents that are completely new (e.g. did
    # not exist as a part of this OR any other connector)
    new_docs: int
    # NOTE: need total_docs, since the pipeline can skip some docs
    # (e.g. not even insert them into Postgres)
    total_docs: int
    # number of chunks that were inserted into Vespa
    total_chunks: int

    failures: list[ConnectorFailure]

    @classmethod
    def empty(cls, total_docs: int) -> "IndexingPipelineResult":
        return cls(
            new_docs=0,
            total_docs=total_docs,
            total_chunks=0,
            failures=[],
        )


class ChunkEmbeddingResult(BaseModel):
    successful_chunk_ids: list[tuple[int, str]]  # (chunk_id, document_id)
    connector_failures: list[ConnectorFailure]


class IndexingPipelineProtocol(Protocol):
    def __call__(
        self,
        document_batch: list[Document],
        index_attempt_metadata: IndexAttemptMetadata,
    ) -> IndexingPipelineResult: ...


def _upsert_documents_in_db(
    documents: list[Document],
    index_attempt_metadata: IndexAttemptMetadata,
    db_session: Session,
) -> None:
    # Metadata here refers to basic document info, not metadata about the actual content
    document_metadata_list: list[DocumentMetadata] = []
    for doc in documents:
        first_link = next(
            (section.link for section in doc.sections if section.link), ""
        )
        db_doc_metadata = DocumentMetadata(
            connector_id=index_attempt_metadata.connector_id,
            credential_id=index_attempt_metadata.credential_id,
            document_id=doc.id,
            semantic_identifier=doc.semantic_identifier,
            first_link=first_link,
            primary_owners=get_experts_stores_representations(doc.primary_owners),
            secondary_owners=get_experts_stores_representations(doc.secondary_owners),
            from_ingestion_api=doc.from_ingestion_api,
            external_access=doc.external_access,
            doc_metadata=doc.doc_metadata,
            # parent_hierarchy_node_id is resolved in docfetching using Redis cache
            parent_hierarchy_node_id=doc.parent_hierarchy_node_id,
        )
        document_metadata_list.append(db_doc_metadata)

    upsert_documents(db_session, document_metadata_list)

    # Insert document content metadata
    for doc in documents:
        upsert_document_tags(
            document_id=doc.id,
            source=doc.source,
            metadata=doc.metadata,
            db_session=db_session,
        )


def _get_failed_doc_ids(failures: list[ConnectorFailure]) -> set[str]:
    """Extract document IDs from a list of connector failures."""
    return {f.failed_document.document_id for f in failures if f.failed_document}


def _embed_chunks_to_store(
    chunks: list[DocAwareChunk],
    embedder: IndexingEmbedder,
    tenant_id: str,
    request_id: str | None,
    store: ChunkBatchStore,
) -> ChunkEmbeddingResult:
    """Embed chunks in batches, spilling each batch to *store*.

    If a document fails embedding in any batch, its chunks are excluded from
    all batches (including earlier ones already written) so that the output
    is all-or-nothing per document.
    """
    successful_chunk_ids: list[tuple[int, str]] = []
    all_embedding_failures: list[ConnectorFailure] = []
    # Track failed doc IDs across all batches so that a failure in batch N
    # causes chunks for that doc to be skipped in batch N+1 and stripped
    # from earlier batches.
    all_failed_doc_ids: set[str] = set()

    for batch_idx, chunk_batch in enumerate(
        batch_generator(chunks, MAX_CHUNKS_PER_DOC_BATCH)
    ):
        # Skip chunks belonging to documents that failed in earlier batches.
        chunk_batch = [
            c for c in chunk_batch if c.source_document.id not in all_failed_doc_ids
        ]
        if not chunk_batch:
            continue

        logger.debug(f"Embedding batch {batch_idx}: {len(chunk_batch)} chunks")

        chunks_with_embeddings, embedding_failures = embed_chunks_with_failure_handling(
            chunks=chunk_batch,
            embedder=embedder,
            tenant_id=tenant_id,
            request_id=request_id,
        )
        all_embedding_failures.extend(embedding_failures)
        all_failed_doc_ids.update(_get_failed_doc_ids(embedding_failures))

        # Only keep successfully embedded chunks for non-failed docs.
        chunks_with_embeddings = [
            c
            for c in chunks_with_embeddings
            if c.source_document.id not in all_failed_doc_ids
        ]

        successful_chunk_ids.extend(
            (c.chunk_id, c.source_document.id) for c in chunks_with_embeddings
        )

        store.save(chunks_with_embeddings, batch_idx)
        del chunks_with_embeddings

    # Scrub earlier batches for docs that failed in later batches.
    if all_failed_doc_ids:
        store.scrub_failed_docs(all_failed_doc_ids)
        successful_chunk_ids = [
            (chunk_id, doc_id)
            for chunk_id, doc_id in successful_chunk_ids
            if doc_id not in all_failed_doc_ids
        ]

    return ChunkEmbeddingResult(
        successful_chunk_ids=successful_chunk_ids,
        connector_failures=all_embedding_failures,
    )


@contextmanager
def embed_and_stream(
    chunks: list[DocAwareChunk],
    embedder: IndexingEmbedder,
    tenant_id: str,
    request_id: str | None,
) -> Generator[tuple[ChunkEmbeddingResult, ChunkBatchStore], None, None]:
    """Embed chunks to disk and yield a ``(result, store)`` pair.

    The store owns the temp directory — files are cleaned up when the context
    manager exits.

    Usage::

        with embed_and_stream(chunks, embedder, tenant_id, req_id) as (result, store):
            for chunk in store.stream():
                ...
    """
    with ChunkBatchStore() as store:
        result = _embed_chunks_to_store(
            chunks=chunks,
            embedder=embedder,
            tenant_id=tenant_id,
            request_id=request_id,
            store=store,
        )
        yield result, store


def get_doc_ids_to_update(
    documents: list[Document], db_docs: list[DBDocument]
) -> list[Document]:
    """Figures out which documents actually need to be updated. If a document is already present
    and the `updated_at` hasn't changed, we shouldn't need to do anything with it.

    NB: Still need to associate the document in the DB if multiple connectors are
    indexing the same doc."""
    id_update_time_map = {
        doc.id: doc.doc_updated_at for doc in db_docs if doc.doc_updated_at
    }

    updatable_docs: list[Document] = []
    for doc in documents:
        if (
            doc.id in id_update_time_map
            and doc.doc_updated_at
            and doc.doc_updated_at <= id_update_time_map[doc.id]
        ):
            continue
        updatable_docs.append(doc)

    return updatable_docs


def index_doc_batch_with_handler(
    *,
    chunker: Chunker,
    embedder: IndexingEmbedder,
    document_indices: list[DocumentIndex],
    document_batch: list[Document],
    request_id: str | None,
    tenant_id: str,
    db_session: Session,
    adapter: IndexingBatchAdapter,
    ignore_time_skip: bool = False,
    enable_contextual_rag: bool = False,
    llm: LLM | None = None,
) -> IndexingPipelineResult:
    try:
        index_pipeline_result = index_doc_batch(
            chunker=chunker,
            embedder=embedder,
            document_indices=document_indices,
            document_batch=document_batch,
            request_id=request_id,
            tenant_id=tenant_id,
            db_session=db_session,
            adapter=adapter,
            ignore_time_skip=ignore_time_skip,
            enable_contextual_rag=enable_contextual_rag,
            llm=llm,
        )

    except ConnectorStopSignal as e:
        logger.warning("Connector stop signal detected in index_doc_batch_with_handler")
        raise e
    except Exception as e:
        # don't log the batch directly, it's too much text
        document_ids = [doc.id for doc in document_batch]
        logger.exception(f"Failed to index document batch: {document_ids}")

        index_pipeline_result = IndexingPipelineResult(
            new_docs=0,
            total_docs=len(document_batch),
            total_chunks=0,
            failures=[
                ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=document.id,
                        document_link=(
                            document.sections[0].link if document.sections else None
                        ),
                    ),
                    failure_message=str(e),
                    exception=e,
                )
                for document in document_batch
            ],
        )

    return index_pipeline_result


def index_doc_batch_prepare(
    documents: list[Document],
    index_attempt_metadata: IndexAttemptMetadata,
    db_session: Session,
    ignore_time_skip: bool = False,
) -> DocumentBatchPrepareContext | None:
    """Sets up the documents in the relational DB (source of truth) for permissions, metadata, etc.
    This preceeds indexing it into the actual document index."""
    documents = sanitize_documents_for_postgres(documents)

    # Create a trimmed list of docs that don't have a newer updated at
    # Shortcuts the time-consuming flow on connector index retries
    document_ids: list[str] = [document.id for document in documents]
    db_docs: list[DBDocument] = get_documents_by_ids(
        db_session=db_session,
        document_ids=document_ids,
    )

    updatable_docs = (
        get_doc_ids_to_update(documents=documents, db_docs=db_docs)
        if not ignore_time_skip
        else documents
    )
    if len(updatable_docs) != len(documents):
        updatable_doc_ids = [doc.id for doc in updatable_docs]
        skipped_doc_ids = [
            doc.id for doc in documents if doc.id not in updatable_doc_ids
        ]
        logger.info(
            f"Skipping {len(skipped_doc_ids)} documents because they are up to date. Skipped doc IDs: {skipped_doc_ids}"
        )

    # for all updatable docs, upsert into the DB
    # Does not include doc_updated_at which is also used to indicate a successful update
    if updatable_docs:
        _upsert_documents_in_db(
            documents=updatable_docs,
            index_attempt_metadata=index_attempt_metadata,
            db_session=db_session,
        )

    logger.info(
        f"Upserted {len(updatable_docs)} changed docs out of {len(documents)} total docs into the DB"
    )

    # for all docs, upsert the document to cc pair relationship
    upsert_document_by_connector_credential_pair(
        db_session,
        index_attempt_metadata.connector_id,
        index_attempt_metadata.credential_id,
        document_ids,
    )

    # Link hierarchy nodes to documents for sources where pages can be both
    # hierarchy nodes AND documents (e.g., Notion, Confluence).
    # This must happen after documents are upserted due to FK constraint.
    if documents:
        link_hierarchy_nodes_to_documents(
            db_session=db_session,
            document_ids=document_ids,
            source=documents[0].source,
            commit=False,  # We'll commit with the rest of the transaction
        )

    # No docs to process because the batch is empty or every doc was already indexed
    if not updatable_docs:
        return None

    id_to_boost_map = {doc.id: doc.boost for doc in db_docs}
    return DocumentBatchPrepareContext(
        updatable_docs=updatable_docs, id_to_boost_map=id_to_boost_map
    )


def filter_documents(document_batch: list[Document]) -> list[Document]:
    documents: list[Document] = []
    total_chars_in_batch = 0
    skipped_too_long = []

    for document in document_batch:
        empty_contents = not any(
            isinstance(section, TextSection)
            and section.text is not None
            and section.text.strip()
            for section in document.sections
        )
        if (
            (not document.title or not document.title.strip())
            and not document.semantic_identifier.strip()
            and empty_contents
        ):
            # Skip documents that have neither title nor content
            # If the document doesn't have either, then there is no useful information in it
            # This is again verified later in the pipeline after chunking but at that point there should
            # already be no documents that are empty.
            logger.warning(
                f"Skipping document with ID {document.id} as it has neither title nor content."
            )
            continue

        if document.title is not None and not document.title.strip() and empty_contents:
            # The title is explicitly empty ("" and not None) and the document is empty
            # so when building the chunk text representation, it will be empty and unuseable
            logger.warning(
                f"Skipping document with ID {document.id} as the chunks will be empty."
            )
            continue

        section_chars = sum(
            (
                len(section.text)
                if isinstance(section, TextSection) and section.text is not None
                else 0
            )
            for section in document.sections
        )
        doc_total_chars = (
            len(document.title or document.semantic_identifier) + section_chars
        )

        if MAX_DOCUMENT_CHARS and doc_total_chars > MAX_DOCUMENT_CHARS:
            # Skip documents that are too long, later on there are more memory intensive steps done on the text
            # and the container will run out of memory and crash. Several other checks are included upstream but
            # those are at the connector level so a catchall is still needed.
            # Assumption here is that files that are that long, are generated files and not the type users
            # generally care for.
            logger.warning(
                f"Skipping document with ID {document.id} as it is too long "
                f"({doc_total_chars:,} chars, max={MAX_DOCUMENT_CHARS:,})"
            )
            skipped_too_long.append((document.id, doc_total_chars))
            continue

        total_chars_in_batch += doc_total_chars
        documents.append(document)

    # Log batch statistics for OOM debugging
    if documents:
        avg_chars = total_chars_in_batch / len(documents)
        # Get the source from the first document (all in batch should be same source)
        source = documents[0].source.value if documents[0].source else "unknown"
        logger.debug(
            f"Document batch filter [{source}]: {len(documents)} docs kept, {len(skipped_too_long)} skipped (too long). "
            f"Total chars: {total_chars_in_batch:,}, Avg: {avg_chars:,.0f} chars/doc"
        )
        if skipped_too_long:
            logger.warning(
                f"Skipped oversized documents [{source}]: {skipped_too_long[:5]}"
            )  # Log first 5

    return documents


def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
    """
    Process all sections in documents by:
    1. Converting both TextSection and ImageSection objects to base Section objects
    2. Processing ImageSections to generate text summaries using a vision-capable LLM
    3. Returning IndexingDocument objects with both original and processed sections

    Args:
        documents: List of documents with TextSection | ImageSection objects

    Returns:
        List of IndexingDocument objects with processed_sections as list[Section]
    """
    # Check if image extraction and analysis is enabled before trying to get a vision LLM
    if not get_image_extraction_and_analysis_enabled():
        llm = None
    else:
        # Only get the vision LLM if image processing is enabled
        llm = get_default_llm_with_vision()

    if not llm:
        if get_image_extraction_and_analysis_enabled():
            logger.warning(
                "Image analysis is enabled but no vision-capable LLM is "
                "available — images will not be summarized. Configure a "
                "vision model in the admin LLM settings."
            )
        # Even without LLM, we still convert to IndexingDocument with base Sections
        return [
            IndexingDocument(
                **document.model_dump(),
                processed_sections=[
                    Section(
                        text=section.text if isinstance(section, TextSection) else "",
                        link=section.link,
                        image_file_id=(
                            section.image_file_id
                            if isinstance(section, ImageSection)
                            else None
                        ),
                    )
                    for section in document.sections
                ],
            )
            for document in documents
        ]

    indexed_documents: list[IndexingDocument] = []

    for document in documents:
        processed_sections: list[Section] = []

        for section in document.sections:
            # For ImageSection, process and create base Section with both text and image_file_id
            if isinstance(section, ImageSection):
                # Default section with image path preserved - ensure text is always a string
                processed_section = Section(
                    link=section.link,
                    image_file_id=section.image_file_id,
                    text="",  # Initialize with empty string
                )

                # Try to get image summary
                try:
                    file_store = get_default_file_store()

                    file_record = file_store.read_file_record(
                        file_id=section.image_file_id
                    )
                    if not file_record:
                        logger.warning(
                            f"Image file {section.image_file_id} not found in FileStore"
                        )

                        processed_section.text = "[Image could not be processed]"
                    else:
                        # Get the image data
                        image_data_io = file_store.read_file(
                            file_id=section.image_file_id
                        )
                        image_data = image_data_io.read()
                        summary = summarize_image_with_error_handling(
                            llm=llm,
                            image_data=image_data,
                            context_name=file_record.display_name or "Image",
                        )

                        if summary:
                            processed_section.text = summary
                        else:
                            processed_section.text = "[Image could not be summarized]"
                except Exception as e:
                    logger.error(f"Error processing image section: {e}")
                    processed_section.text = "[Error processing image]"

                processed_sections.append(processed_section)

            # For TextSection, create a base Section with text and link
            elif isinstance(section, TextSection):
                processed_section = Section(
                    text=section.text or "",  # Ensure text is always a string, not None
                    link=section.link,
                    image_file_id=None,
                )
                processed_sections.append(processed_section)

        # Create IndexingDocument with original sections and processed_sections
        indexed_document = IndexingDocument(
            **document.model_dump(), processed_sections=processed_sections
        )
        indexed_documents.append(indexed_document)

    return indexed_documents


def add_document_summaries(
    chunks_by_doc: list[DocAwareChunk],
    llm: LLM,
    tokenizer: BaseTokenizer,
    trunc_doc_tokens: int,
) -> list[int] | None:
    """
    Adds a document summary to a list of chunks from the same document.
    Returns the number of tokens in the document.
    """

    doc_tokens = []
    # this is value is the same for each chunk in the document; 0 indicates
    # There is not enough space for contextual RAG (the chunk content
    # and possibly metadata took up too much space)
    if chunks_by_doc[0].contextual_rag_reserved_tokens == 0:
        return None

    doc_tokens = tokenizer.encode(chunks_by_doc[0].source_document.get_text_content())
    doc_content = tokenizer_trim_middle(doc_tokens, trunc_doc_tokens, tokenizer)

    # Apply prompt caching: cache the static prompt, document content is the suffix
    # Note: For document summarization, there's no cacheable prefix since the document changes
    # So we just pass the full prompt without caching
    summary_prompt = DOCUMENT_SUMMARY_PROMPT.format(document=doc_content)
    prompt_msg = UserMessage(content=summary_prompt)

    response = llm.invoke(prompt_msg, max_tokens=MAX_CONTEXT_TOKENS)
    doc_summary = llm_response_to_string(response)

    for chunk in chunks_by_doc:
        chunk.doc_summary = doc_summary

    return doc_tokens


def add_chunk_summaries(
    chunks_by_doc: list[DocAwareChunk],
    llm: LLM,
    tokenizer: BaseTokenizer,
    trunc_doc_chunk_tokens: int,
    doc_tokens: list[int] | None,
) -> None:
    """
    Adds chunk summaries to the chunks grouped by document id.
    Chunk summaries look at the chunk as well as the entire document (or a summary,
    if the document is too long) and describe how the chunk relates to the document.
    """
    # all chunks within a document have the same contextual_rag_reserved_tokens
    if chunks_by_doc[0].contextual_rag_reserved_tokens == 0:
        return

    # use values computed in above doc summary section if available
    doc_tokens = doc_tokens or tokenizer.encode(
        chunks_by_doc[0].source_document.get_text_content()
    )
    doc_content = tokenizer_trim_middle(doc_tokens, trunc_doc_chunk_tokens, tokenizer)

    # only compute doc summary if needed
    doc_info = (
        doc_content
        if len(doc_tokens) <= MAX_TOKENS_FOR_FULL_INCLUSION
        else chunks_by_doc[0].doc_summary
    )
    if not doc_info:
        # This happens if the document is too long AND document summaries are turned off
        # In this case we compute a doc summary using the LLM
        fallback_prompt = UserMessage(
            content=DOCUMENT_SUMMARY_PROMPT.format(document=doc_content)
        )
        response = llm.invoke(fallback_prompt, max_tokens=MAX_CONTEXT_TOKENS)
        doc_info = llm_response_to_string(response)

    from onyx.llm.prompt_cache.processor import process_with_prompt_cache

    context_prompt1 = CONTEXTUAL_RAG_PROMPT1.format(document=doc_info)

    def assign_context(chunk: DocAwareChunk) -> None:
        context_prompt2 = CONTEXTUAL_RAG_PROMPT2.format(chunk=chunk.content)
        try:
            # Apply prompt caching: cache the document context (prompt1), chunk content is the suffix
            # For string inputs with continuation=True, the result will be a concatenated string
            processed_prompt, _ = process_with_prompt_cache(
                llm_config=llm.config,
                cacheable_prefix=UserMessage(content=context_prompt1),
                suffix=UserMessage(content=context_prompt2),
                continuation=True,  # Append chunk to the document context
            )

            response = llm.invoke(processed_prompt, max_tokens=MAX_CONTEXT_TOKENS)
            chunk.chunk_context = llm_response_to_string(response)

        except LLMRateLimitError as e:
            # Erroring during chunker is undesirable, so we log the error and continue
            # TODO: for v2, add robust retry logic
            logger.exception(f"Rate limit adding chunk summary: {e}", exc_info=e)
            chunk.chunk_context = ""
        except Exception as e:
            logger.exception(f"Error adding chunk summary: {e}", exc_info=e)
            chunk.chunk_context = ""

    run_functions_tuples_in_parallel(
        [(assign_context, (chunk,)) for chunk in chunks_by_doc]
    )


def add_contextual_summaries(
    chunks: list[DocAwareChunk],
    llm: LLM,
    tokenizer: BaseTokenizer,
    chunk_token_limit: int,
) -> list[DocAwareChunk]:
    """
    Adds Document summary and chunk-within-document context to the chunks
    based on which environment variables are set.
    """
    doc2chunks = defaultdict(list)
    for chunk in chunks:
        doc2chunks[chunk.source_document.id].append(chunk)

    # The number of tokens allowed for the document when computing a document summary
    trunc_doc_summary_tokens = llm.config.max_input_tokens - len(
        tokenizer.encode(DOCUMENT_SUMMARY_PROMPT)
    )

    prompt_tokens = len(
        tokenizer.encode(CONTEXTUAL_RAG_PROMPT1 + CONTEXTUAL_RAG_PROMPT2)
    )
    # The number of tokens allowed for the document when computing a
    # "chunk in context of document" summary
    trunc_doc_chunk_tokens = (
        llm.config.max_input_tokens - prompt_tokens - chunk_token_limit
    )
    for chunks_by_doc in doc2chunks.values():
        doc_tokens = None
        if USE_DOCUMENT_SUMMARY:
            doc_tokens = add_document_summaries(
                chunks_by_doc, llm, tokenizer, trunc_doc_summary_tokens
            )

        if USE_CHUNK_SUMMARY:
            add_chunk_summaries(
                chunks_by_doc, llm, tokenizer, trunc_doc_chunk_tokens, doc_tokens
            )

    return chunks


def _verify_indexing_completeness(
    insertion_records: list[DocumentInsertionRecord],
    write_failures: list[ConnectorFailure],
    embedding_failed_doc_ids: set[str],
    updatable_ids: list[str],
    document_index_name: str,
) -> None:
    """Verify that every updatable document was either indexed or reported as failed."""
    all_returned_doc_ids = (
        {r.document_id for r in insertion_records}
        | {f.failed_document.document_id for f in write_failures if f.failed_document}
        | embedding_failed_doc_ids
    )
    if all_returned_doc_ids != set(updatable_ids):
        raise RuntimeError(
            f"Some documents were not successfully indexed. "
            f"Updatable IDs: {updatable_ids}, "
            f"Returned IDs: {all_returned_doc_ids}. "
            f"This should never happen. "
            f"This occured for document index {document_index_name}"
        )


def _apply_document_ingestion_hook(
    documents: list[Document],
    db_session: Session,
) -> list[Document]:
    """Apply the Document Ingestion hook to each document in the batch.

    - HookSkipped / HookSoftFailed → document passes through unchanged.
    - Response with sections=None → document is dropped (logged).
    - Response with sections → document sections are replaced with the hook's output.
    """

    def _build_payload(doc: Document) -> DocumentIngestionPayload:
        return DocumentIngestionPayload(
            document_id=doc.id or "",
            title=doc.title,
            semantic_identifier=doc.semantic_identifier,
            source=doc.source.value if doc.source is not None else "",
            sections=[
                DocumentIngestionSection(
                    text=s.text if isinstance(s, TextSection) else None,
                    link=s.link,
                    image_file_id=(
                        s.image_file_id if isinstance(s, ImageSection) else None
                    ),
                )
                for s in doc.sections
            ],
            metadata={
                k: v if isinstance(v, list) else [v] for k, v in doc.metadata.items()
            },
            doc_updated_at=(
                doc.doc_updated_at.isoformat() if doc.doc_updated_at else None
            ),
            primary_owners=(
                [
                    DocumentIngestionOwner(
                        display_name=o.get_semantic_name() or None,
                        email=o.email,
                    )
                    for o in doc.primary_owners
                ]
                if doc.primary_owners
                else None
            ),
            secondary_owners=(
                [
                    DocumentIngestionOwner(
                        display_name=o.get_semantic_name() or None,
                        email=o.email,
                    )
                    for o in doc.secondary_owners
                ]
                if doc.secondary_owners
                else None
            ),
        )

    def _apply_result(
        doc: Document,
        hook_result: DocumentIngestionResponse | HookSkipped | HookSoftFailed,
    ) -> Document | None:
        """Return the modified doc, original doc (skip/soft-fail), or None (drop)."""
        if isinstance(hook_result, (HookSkipped, HookSoftFailed)):
            return doc
        if not hook_result.sections:
            reason = hook_result.rejection_reason or "Document rejected by hook"
            logger.info(
                f"Document ingestion hook dropped document doc_id={doc.id!r}: {reason}"
            )
            return None
        new_sections: list[TextSection | ImageSection] = []
        for s in hook_result.sections:
            if s.image_file_id is not None:
                new_sections.append(
                    ImageSection(image_file_id=s.image_file_id, link=s.link)
                )
            elif s.text is not None:
                new_sections.append(TextSection(text=s.text, link=s.link))
            else:
                logger.warning(
                    f"Document ingestion hook returned a section with neither text nor "
                    f"image_file_id for doc_id={doc.id!r} — skipping section."
                )
        if not new_sections:
            logger.info(
                f"Document ingestion hook produced no valid sections for doc_id={doc.id!r} — dropping document."
            )
            return None
        return doc.model_copy(update={"sections": new_sections})

    if not documents:
        return documents

    # Run the hook for the first document. If it returns HookSkipped the hook
    # is not configured — skip the remaining N-1 DB lookups.
    first_doc = documents[0]
    first_payload = _build_payload(first_doc).model_dump()
    first_hook_result = execute_hook(
        db_session=db_session,
        hook_point=HookPoint.DOCUMENT_INGESTION,
        payload=first_payload,
        response_type=DocumentIngestionResponse,
    )
    if isinstance(first_hook_result, HookSkipped):
        return documents

    result: list[Document] = []
    first_applied = _apply_result(first_doc, first_hook_result)
    if first_applied is not None:
        result.append(first_applied)

    for doc in documents[1:]:
        payload = _build_payload(doc).model_dump()
        hook_result = execute_hook(
            db_session=db_session,
            hook_point=HookPoint.DOCUMENT_INGESTION,
            payload=payload,
            response_type=DocumentIngestionResponse,
        )
        applied = _apply_result(doc, hook_result)
        if applied is not None:
            result.append(applied)

    return result


@log_function_time(debug_only=True)
def index_doc_batch(
    *,
    document_batch: list[Document],
    chunker: Chunker,
    embedder: IndexingEmbedder,
    document_indices: list[DocumentIndex],
    request_id: str | None,
    tenant_id: str,
    db_session: Session,
    adapter: IndexingBatchAdapter,
    enable_contextual_rag: bool = False,
    llm: LLM | None = None,
    ignore_time_skip: bool = False,
    filter_fnc: Callable[[list[Document]], list[Document]] = filter_documents,
) -> IndexingPipelineResult:
    """End-to-end indexing for a pre-batched set of documents."""
    """Takes different pieces of the indexing pipeline and applies it to a batch of documents
    Note that the documents should already be batched at this point so that it does not inflate the
    memory requirements

    Returns a tuple where the first element is the number of new docs and the
    second element is the number of chunks."""

    # Log connector info for debugging OOM issues
    connector_id = getattr(adapter, "connector_id", None)
    credential_id = getattr(adapter, "credential_id", None)
    logger.debug(
        f"Starting index_doc_batch: connector_id={connector_id}, "
        f"credential_id={credential_id}, tenant_id={tenant_id}, "
        f"num_docs={len(document_batch)}"
    )

    filtered_documents = filter_fnc(document_batch)
    filtered_documents = _apply_document_ingestion_hook(filtered_documents, db_session)
    context = adapter.prepare(filtered_documents, ignore_time_skip)
    if not context:
        return IndexingPipelineResult.empty(len(filtered_documents))

    # Convert documents to IndexingDocument objects with processed section
    # logger.debug("Processing image sections")
    context.indexable_docs = process_image_sections(context.updatable_docs)

    doc_descriptors = [
        {
            "doc_id": doc.id,
            "doc_length": doc.get_total_char_length(),
        }
        for doc in context.indexable_docs
    ]
    logger.debug(f"Starting indexing process for documents: {doc_descriptors}")

    logger.debug("Starting chunking")
    # NOTE: no special handling for failures here, since the chunker is not
    # a common source of failure for the indexing pipeline
    chunks: list[DocAwareChunk] = chunker.chunk(context.indexable_docs)
    llm_tokenizer: BaseTokenizer | None = None

    # contextual RAG
    if enable_contextual_rag:
        assert llm is not None, "must provide an LLM for contextual RAG"
        llm_tokenizer = get_tokenizer(
            model_name=llm.config.model_name,
            provider_type=llm.config.model_provider,
        )

        # Because the chunker's tokens are different from the LLM's tokens,
        # We add a fudge factor to ensure we truncate prompts to the LLM's token limit
        chunks = add_contextual_summaries(
            chunks=chunks,
            llm=llm,
            tokenizer=llm_tokenizer,
            chunk_token_limit=chunker.chunk_token_limit * 2,
        )

    logger.debug("Starting embedding")
    with embed_and_stream(chunks, embedder, tenant_id, request_id) as (
        embedding_result,
        chunk_store,
    ):
        updatable_ids = [doc.id for doc in context.updatable_docs]
        updatable_chunk_data = [
            UpdatableChunkData(
                chunk_id=chunk_id,
                document_id=document_id,
                boost_score=1.0,
            )
            for chunk_id, document_id in embedding_result.successful_chunk_ids
        ]

        embedding_failed_doc_ids = _get_failed_doc_ids(
            embedding_result.connector_failures
        )

        # Filter to only successfully embedded chunks so
        # doc_id_to_new_chunk_cnt reflects what's actually written to Vespa.
        embedded_chunks = [
            c for c in chunks if c.source_document.id not in embedding_failed_doc_ids
        ]

        # Acquires a lock on the documents so that no other process can modify
        # them.  Not needed until here, since this is when the actual race
        # condition with vector db can occur.
        with adapter.lock_context(context.updatable_docs):
            enricher = adapter.prepare_enrichment(
                context=context,
                tenant_id=tenant_id,
                chunks=embedded_chunks,
            )

            index_batch_params = IndexBatchParams(
                doc_id_to_previous_chunk_cnt=enricher.doc_id_to_previous_chunk_cnt,
                doc_id_to_new_chunk_cnt=enricher.doc_id_to_new_chunk_cnt,
                tenant_id=tenant_id,
                large_chunks_enabled=chunker.enable_large_chunks,
            )

            primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = (
                None
            )
            primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = (
                None
            )

            for document_index in document_indices:

                def _enriched_stream() -> Iterator[DocMetadataAwareIndexChunk]:
                    for chunk in chunk_store.stream():
                        yield enricher.enrich_chunk(chunk, 1.0)

                insertion_records, write_failures = (
                    write_chunks_to_vector_db_with_backoff(
                        document_index=document_index,
                        make_chunks=_enriched_stream,
                        index_batch_params=index_batch_params,
                    )
                )

                _verify_indexing_completeness(
                    insertion_records=insertion_records,
                    write_failures=write_failures,
                    embedding_failed_doc_ids=embedding_failed_doc_ids,
                    updatable_ids=updatable_ids,
                    document_index_name=document_index.__class__.__name__,
                )
                # We treat the first document index we got as the primary one used
                # for reporting the state of indexing.
                if primary_doc_idx_insertion_records is None:
                    primary_doc_idx_insertion_records = insertion_records
                if primary_doc_idx_vector_db_write_failures is None:
                    primary_doc_idx_vector_db_write_failures = write_failures

            adapter.post_index(
                context=context,
                updatable_chunk_data=updatable_chunk_data,
                filtered_documents=filtered_documents,
                enrichment=enricher,
            )

    assert primary_doc_idx_insertion_records is not None
    assert primary_doc_idx_vector_db_write_failures is not None
    return IndexingPipelineResult(
        new_docs=sum(
            1 for r in primary_doc_idx_insertion_records if not r.already_existed
        ),
        total_docs=len(filtered_documents),
        total_chunks=len(embedding_result.successful_chunk_ids),
        failures=primary_doc_idx_vector_db_write_failures
        + embedding_result.connector_failures,
    )


def run_indexing_pipeline(
    *,
    document_batch: list[Document],
    request_id: str | None,
    embedder: IndexingEmbedder,
    document_indices: list[DocumentIndex],
    db_session: Session,
    tenant_id: str,
    adapter: IndexingBatchAdapter,
    chunker: Chunker | None = None,
    ignore_time_skip: bool = False,
) -> IndexingPipelineResult:
    """Builds a pipeline which takes in a list (batch) of docs and indexes them."""
    all_search_settings = get_active_search_settings(db_session)
    if (
        all_search_settings.secondary
        and all_search_settings.secondary.status == IndexModelStatus.FUTURE
    ):
        search_settings = all_search_settings.secondary
    else:
        search_settings = all_search_settings.primary

    multipass_config = get_multipass_config(search_settings)

    enable_contextual_rag = (
        search_settings.enable_contextual_rag or ENABLE_CONTEXTUAL_RAG
    )
    llm = None
    if enable_contextual_rag:
        llm = get_llm_for_contextual_rag(
            search_settings.contextual_rag_llm_name or DEFAULT_CONTEXTUAL_RAG_LLM_NAME,
            search_settings.contextual_rag_llm_provider
            or DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER,
        )

    chunker = chunker or Chunker(
        tokenizer=embedder.embedding_model.tokenizer,
        enable_multipass=multipass_config.multipass_indexing,
        enable_large_chunks=multipass_config.enable_large_chunks,
        enable_contextual_rag=enable_contextual_rag,
        # after every doc, update status in case there are a bunch of really long docs
    )

    return index_doc_batch_with_handler(
        chunker=chunker,
        embedder=embedder,
        document_indices=document_indices,
        document_batch=document_batch,
        request_id=request_id,
        tenant_id=tenant_id,
        db_session=db_session,
        adapter=adapter,
        enable_contextual_rag=enable_contextual_rag,
        llm=llm,
        ignore_time_skip=ignore_time_skip,
    )


================================================
FILE: backend/onyx/indexing/models.py
================================================
import contextlib
from collections.abc import Generator
from typing import Optional
from typing import Protocol
from typing import TYPE_CHECKING

from pydantic import BaseModel
from pydantic import Field

from onyx.access.models import DocumentAccess
from onyx.connectors.models import Document
from onyx.db.enums import EmbeddingPrecision
from onyx.db.enums import SwitchoverType
from onyx.utils.logger import setup_logger
from onyx.utils.pydantic_util import shallow_model_dump
from shared_configs.enums import EmbeddingProvider
from shared_configs.model_server_models import Embedding

if TYPE_CHECKING:
    from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from sqlalchemy.engine.util import TransactionalContext

if TYPE_CHECKING:
    from onyx.db.models import SearchSettings


logger = setup_logger()


class ChunkEmbedding(BaseModel):
    full_embedding: Embedding
    mini_chunk_embeddings: list[Embedding]


class BaseChunk(BaseModel):
    chunk_id: int
    # The first sentence(s) of the first Section of the chunk
    blurb: str
    content: str
    # Holds the link and the offsets into the raw Chunk text
    source_links: dict[int, str] | None
    image_file_id: str | None
    # True if this Chunk's start is not at the start of a Section
    # TODO(andrei): This is deprecated as of the OpenSearch migration. Remove.
    # Do not use.
    section_continuation: bool


class DocAwareChunk(BaseChunk):
    # During indexing flow, we have access to a complete "Document"
    # During inference we only have access to the document id and do not reconstruct the Document
    source_document: Document

    # This could be an empty string if the title is too long and taking up too much of the chunk
    # This does not mean necessarily that the document does not have a title
    title_prefix: str

    # During indexing we also (optionally) build a metadata string from the metadata dict
    # This is also indexed so that we can strip it out after indexing, this way it supports
    # multiple iterations of metadata representation for backwards compatibility
    metadata_suffix_semantic: str
    metadata_suffix_keyword: str

    # This is the number of tokens reserved for contextual RAG
    # in the chunk. doc_summary and chunk_context conbined should
    # contain at most this many tokens.
    contextual_rag_reserved_tokens: int
    # This is the summary for the document generated for contextual RAG
    doc_summary: str
    # This is the context for this chunk generated for contextual RAG
    chunk_context: str

    mini_chunk_texts: list[str] | None

    large_chunk_id: int | None

    large_chunk_reference_ids: list[int] = Field(default_factory=list)

    def to_short_descriptor(self) -> str:
        """Used when logging the identity of a chunk"""
        return f"{self.source_document.to_short_descriptor()} Chunk ID: {self.chunk_id}"

    def get_link(self) -> str | None:
        return (
            self.source_document.sections[0].link
            if self.source_document.sections
            else None
        )


class IndexChunk(DocAwareChunk):
    embeddings: ChunkEmbedding
    title_embedding: Embedding | None


# TODO(rkuo): currently, this extra metadata sent during indexing is just for speed,
# but full consistency happens on background sync
class DocMetadataAwareIndexChunk(IndexChunk):
    """An `IndexChunk` that contains all necessary metadata to be indexed. This includes
    the following:

    access: holds all information about which users should have access to the
            source document for this chunk.
    document_sets: all document sets the source document for this chunk is a part
                   of. This is used for filtering / personas.
    boost: influences the ranking of this chunk at query time. Positive -> ranked higher,
           negative -> ranked lower. Not included in aggregated boost calculation
           for legacy reasons.
    aggregated_chunk_boost_factor: represents the aggregated chunk-level boost (currently: information content)
    """

    tenant_id: str
    access: "DocumentAccess"
    document_sets: set[str]
    user_project: list[int]
    personas: list[int]
    boost: int
    aggregated_chunk_boost_factor: float
    # Full ancestor path from root hierarchy node to document's parent.
    # Stored as an integer array in OpenSearch for hierarchy-based filtering.
    # Empty list means no hierarchy info (document excluded from hierarchy searches).
    ancestor_hierarchy_node_ids: list[int]

    @classmethod
    def from_index_chunk(
        cls,
        index_chunk: IndexChunk,
        access: "DocumentAccess",
        document_sets: set[str],
        user_project: list[int],
        personas: list[int],
        boost: int,
        aggregated_chunk_boost_factor: float,
        tenant_id: str,
        ancestor_hierarchy_node_ids: list[int] | None = None,
    ) -> "DocMetadataAwareIndexChunk":
        return cls.model_construct(
            **shallow_model_dump(index_chunk),
            access=access,
            document_sets=document_sets,
            user_project=user_project,
            personas=personas,
            boost=boost,
            aggregated_chunk_boost_factor=aggregated_chunk_boost_factor,
            tenant_id=tenant_id,
            ancestor_hierarchy_node_ids=ancestor_hierarchy_node_ids or [],
        )


class EmbeddingModelDetail(BaseModel):
    id: int | None = None
    model_name: str
    normalize: bool
    query_prefix: str | None
    passage_prefix: str | None
    api_url: str | None = None
    provider_type: EmbeddingProvider | None = None
    api_key: str | None = None

    # This disables the "model_" protected namespace for pydantic
    model_config = {"protected_namespaces": ()}

    @classmethod
    def from_db_model(
        cls,
        search_settings: "SearchSettings",
    ) -> "EmbeddingModelDetail":
        api_key = None
        if (
            search_settings.cloud_provider is not None
            and search_settings.cloud_provider.api_key is not None
        ):
            api_key = search_settings.cloud_provider.api_key.get_value(apply_mask=True)

        return cls(
            id=search_settings.id,
            model_name=search_settings.model_name,
            normalize=search_settings.normalize,
            query_prefix=search_settings.query_prefix,
            passage_prefix=search_settings.passage_prefix,
            provider_type=search_settings.provider_type,
            api_key=api_key,
            api_url=search_settings.api_url,
        )


# Additional info needed for indexing time
class IndexingSetting(EmbeddingModelDetail):
    model_dim: int
    index_name: str | None
    multipass_indexing: bool
    embedding_precision: EmbeddingPrecision
    reduced_dimension: int | None = None

    switchover_type: SwitchoverType = SwitchoverType.REINDEX
    enable_contextual_rag: bool
    contextual_rag_llm_name: str | None = None
    contextual_rag_llm_provider: str | None = None

    # This disables the "model_" protected namespace for pydantic
    model_config = {"protected_namespaces": ()}

    @property
    def final_embedding_dim(self) -> int:
        if self.reduced_dimension:
            return self.reduced_dimension
        return self.model_dim

    @classmethod
    def from_db_model(cls, search_settings: "SearchSettings") -> "IndexingSetting":
        return cls(
            model_name=search_settings.model_name,
            model_dim=search_settings.model_dim,
            normalize=search_settings.normalize,
            query_prefix=search_settings.query_prefix,
            passage_prefix=search_settings.passage_prefix,
            provider_type=search_settings.provider_type,
            index_name=search_settings.index_name,
            multipass_indexing=search_settings.multipass_indexing,
            embedding_precision=search_settings.embedding_precision,
            reduced_dimension=search_settings.reduced_dimension,
            switchover_type=search_settings.switchover_type,
            enable_contextual_rag=search_settings.enable_contextual_rag,
        )


class MultipassConfig(BaseModel):
    multipass_indexing: bool
    enable_large_chunks: bool


class UpdatableChunkData(BaseModel):
    chunk_id: int
    document_id: str
    boost_score: float


class ChunkEnrichmentContext(Protocol):
    """Returned by prepare_enrichment. Holds pre-computed metadata lookups
    and provides per-chunk enrichment."""

    doc_id_to_previous_chunk_cnt: dict[str, int]
    doc_id_to_new_chunk_cnt: dict[str, int]

    def enrich_chunk(
        self, chunk: IndexChunk, score: float
    ) -> DocMetadataAwareIndexChunk: ...


class IndexingBatchAdapter(Protocol):
    def prepare(
        self, documents: list[Document], ignore_time_skip: bool
    ) -> Optional["DocumentBatchPrepareContext"]: ...

    @contextlib.contextmanager
    def lock_context(
        self, documents: list[Document]
    ) -> Generator[TransactionalContext, None, None]:
        """Provide a transaction/row-lock context for critical updates."""

    def prepare_enrichment(
        self,
        context: "DocumentBatchPrepareContext",
        tenant_id: str,
        chunks: list[DocAwareChunk],
    ) -> ChunkEnrichmentContext:
        """Prepare per-chunk enrichment data (access, document sets, boost, etc.).

        Precondition: ``chunks`` have already been through the embedding step
        (i.e. they are ``IndexChunk`` instances with populated embeddings,
        passed here as the base ``DocAwareChunk`` type).
        """
        ...

    def post_index(
        self,
        context: "DocumentBatchPrepareContext",
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
        enrichment: ChunkEnrichmentContext,
    ) -> None: ...


================================================
FILE: backend/onyx/indexing/vector_db_insertion.py
================================================
import time
from collections.abc import Callable
from collections.abc import Iterable
from http import HTTPStatus
from itertools import chain
from itertools import groupby

import httpx

from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import DocumentFailure
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import DocumentInsertionRecord
from onyx.document_index.interfaces import IndexBatchParams
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.utils.logger import setup_logger


logger = setup_logger()


def _log_insufficient_storage_error(e: Exception) -> None:
    if isinstance(e, httpx.HTTPStatusError):
        if e.response.status_code == HTTPStatus.INSUFFICIENT_STORAGE:
            logger.error(
                "NOTE: HTTP Status 507 Insufficient Storage indicates "
                "you need to allocate more memory or disk space to the "
                "Vespa/index container."
            )


def write_chunks_to_vector_db_with_backoff(
    document_index: DocumentIndex,
    make_chunks: Callable[[], Iterable[DocMetadataAwareIndexChunk]],
    index_batch_params: IndexBatchParams,
) -> tuple[list[DocumentInsertionRecord], list[ConnectorFailure]]:
    """Tries to insert all chunks in one large batch. If that batch fails for any reason,
    goes document by document to isolate the failure(s).

    IMPORTANT: must pass in whole documents at a time not individual chunks, since the
    vector DB interface assumes that all chunks for a single document are present. The
    chunks must also be in contiguous batches
    """
    # first try to write the chunks to the vector db
    try:
        return (
            list(
                document_index.index(
                    chunks=make_chunks(),
                    index_batch_params=index_batch_params,
                )
            ),
            [],
        )
    except Exception as e:
        logger.exception(
            "Failed to write chunk batch to vector db. Trying individual docs."
        )

        # give some specific logging on this common failure case.
        _log_insufficient_storage_error(e)

        # wait a couple seconds just to give the vector db a chance to recover
        time.sleep(2)

    insertion_records: list[DocumentInsertionRecord] = []
    failures: list[ConnectorFailure] = []

    def key(chunk: DocMetadataAwareIndexChunk) -> str:
        return chunk.source_document.id

    seen_doc_ids: set[str] = set()
    for doc_id, chunks_for_doc in groupby(make_chunks(), key=key):
        if doc_id in seen_doc_ids:
            raise RuntimeError(
                f"Doc chunks are not arriving in order. Current doc_id={doc_id}, seen_doc_ids={list(seen_doc_ids)}"
            )
        seen_doc_ids.add(doc_id)

        first_chunk = next(chunks_for_doc)
        chunks_for_doc = chain([first_chunk], chunks_for_doc)

        try:
            insertion_records.extend(
                document_index.index(
                    chunks=chunks_for_doc,
                    index_batch_params=index_batch_params,
                )
            )
        except Exception as e:
            logger.exception(
                f"Failed to write document chunks for '{doc_id}' to vector db"
            )

            # give some specific logging on this common failure case.
            _log_insufficient_storage_error(e)

            failures.append(
                ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=doc_id,
                        document_link=first_chunk.get_link(),
                    ),
                    failure_message=str(e),
                    exception=e,
                )
            )

    return insertion_records, failures


================================================
FILE: backend/onyx/key_value_store/__init__.py
================================================


================================================
FILE: backend/onyx/key_value_store/factory.py
================================================
from onyx.key_value_store.interface import KeyValueStore
from onyx.key_value_store.store import PgRedisKVStore
from shared_configs.configs import DEFAULT_REDIS_PREFIX
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR


def get_kv_store() -> KeyValueStore:
    # In the Multi Tenant case, the tenant context is picked up automatically, it does not need to be passed in
    # It's read from the global thread level variable
    return PgRedisKVStore()


def get_shared_kv_store() -> KeyValueStore:
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(DEFAULT_REDIS_PREFIX)
    try:
        return get_kv_store()
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


================================================
FILE: backend/onyx/key_value_store/interface.py
================================================
import abc
from typing import cast

from onyx.utils.special_types import JSON_ro


class KvKeyNotFoundError(Exception):
    pass


def unwrap_str(val: JSON_ro) -> str:
    """Unwrap a string stored as {"value": str} in the encrypted KV store.
    Also handles legacy plain-string values cached in Redis."""
    if isinstance(val, dict):
        try:
            return cast(str, val["value"])
        except KeyError:
            raise ValueError(
                f"Expected dict with 'value' key, got keys: {list(val.keys())}"
            )
    return cast(str, val)


class KeyValueStore:
    # In the Multi Tenant case, the tenant context is picked up automatically, it does not need to be passed in
    # It's read from the global thread level variable
    @abc.abstractmethod
    def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:
        raise NotImplementedError

    @abc.abstractmethod
    def load(self, key: str, refresh_cache: bool = False) -> JSON_ro:
        raise NotImplementedError

    @abc.abstractmethod
    def delete(self, key: str) -> None:
        raise NotImplementedError


================================================
FILE: backend/onyx/key_value_store/store.py
================================================
import json
from typing import cast

from onyx.cache.interface import CacheBackend
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import KVStore
from onyx.key_value_store.interface import KeyValueStore
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.utils.logger import setup_logger
from onyx.utils.special_types import JSON_ro


logger = setup_logger()


REDIS_KEY_PREFIX = "onyx_kv_store:"
KV_REDIS_KEY_EXPIRATION = 60 * 60 * 24  # 1 Day


class PgRedisKVStore(KeyValueStore):
    def __init__(self, cache: CacheBackend | None = None) -> None:
        self._cache = cache

    def _get_cache(self) -> CacheBackend:
        if self._cache is None:
            from onyx.cache.factory import get_cache_backend

            self._cache = get_cache_backend()
        return self._cache

    def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:
        # Not encrypted in Cache backend (typically Redis), but encrypted in Postgres
        try:
            self._get_cache().set(
                REDIS_KEY_PREFIX + key, json.dumps(val), ex=KV_REDIS_KEY_EXPIRATION
            )
        except Exception as e:
            # Fallback gracefully to Postgres if Cache backend fails
            logger.error(
                f"Failed to set value in Cache backend for key '{key}': {str(e)}"
            )

        encrypted_val = val if encrypt else None
        plain_val = val if not encrypt else None
        with get_session_with_current_tenant() as db_session:
            obj = db_session.query(KVStore).filter_by(key=key).first()
            if obj:
                obj.value = plain_val
                obj.encrypted_value = encrypted_val  # type: ignore[assignment]
            else:
                obj = KVStore(key=key, value=plain_val, encrypted_value=encrypted_val)
                db_session.query(KVStore).filter_by(key=key).delete()  # just in case
                db_session.add(obj)
            db_session.commit()

    def load(self, key: str, refresh_cache: bool = False) -> JSON_ro:
        if not refresh_cache:
            try:
                cached = self._get_cache().get(REDIS_KEY_PREFIX + key)
                if cached is not None:
                    return json.loads(cached.decode("utf-8"))
            except Exception as e:
                logger.error(
                    f"Failed to get value from cache for key '{key}': {str(e)}"
                )

        with get_session_with_current_tenant() as db_session:
            obj = db_session.query(KVStore).filter_by(key=key).first()
            if not obj:
                raise KvKeyNotFoundError

            if obj.value is not None:
                value = obj.value
            elif obj.encrypted_value is not None:
                # Unwrap SensitiveValue - this is internal backend use
                value = obj.encrypted_value.get_value(apply_mask=False)
            else:
                value = None

            try:
                self._get_cache().set(
                    REDIS_KEY_PREFIX + key,
                    json.dumps(value),
                    ex=KV_REDIS_KEY_EXPIRATION,
                )
            except Exception as e:
                logger.error(f"Failed to set value in cache for key '{key}': {str(e)}")

            return cast(JSON_ro, value)

    def delete(self, key: str) -> None:
        try:
            self._get_cache().delete(REDIS_KEY_PREFIX + key)
        except Exception as e:
            logger.error(f"Failed to delete value from cache for key '{key}': {str(e)}")

        with get_session_with_current_tenant() as db_session:
            result = db_session.query(KVStore).filter_by(key=key).delete()
            if result == 0:
                raise KvKeyNotFoundError
            db_session.commit()


================================================
FILE: backend/onyx/kg/clustering/clustering.py
================================================
import time
from collections.abc import Generator
from typing import cast

from rapidfuzz.fuzz import ratio
from redis.lock import Lock as RedisLock
from sqlalchemy import func
from sqlalchemy import text

from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.kg_configs import KG_CLUSTERING_RETRIEVE_THRESHOLD
from onyx.configs.kg_configs import KG_CLUSTERING_THRESHOLD
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.entities import KGEntity
from onyx.db.entities import KGEntityExtractionStaging
from onyx.db.entities import merge_entities
from onyx.db.entities import transfer_entity
from onyx.db.kg_config import get_kg_config_settings
from onyx.db.kg_config import validate_kg_settings
from onyx.db.models import Document
from onyx.db.models import KGEntityType
from onyx.db.models import KGRelationshipExtractionStaging
from onyx.db.models import KGRelationshipTypeExtractionStaging
from onyx.db.relationships import transfer_relationship
from onyx.db.relationships import transfer_relationship_type
from onyx.db.relationships import upsert_relationship
from onyx.db.relationships import upsert_relationship_type
from onyx.document_index.vespa.kg_interactions import (
    get_kg_vespa_info_update_requests_for_document,
)
from onyx.document_index.vespa.kg_interactions import update_kg_chunks_vespa_info
from onyx.kg.models import KGGroundingType
from onyx.kg.utils.formatting_utils import make_relationship_id
from onyx.kg.utils.lock_utils import extend_lock
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

logger = setup_logger()


def _get_batch_untransferred_grounded_entities(
    batch_size: int,
) -> Generator[list[KGEntityExtractionStaging], None, None]:
    while True:
        with get_session_with_current_tenant() as db_session:
            batch = (
                db_session.query(KGEntityExtractionStaging)
                .join(
                    KGEntityType,
                    KGEntityExtractionStaging.entity_type_id_name
                    == KGEntityType.id_name,
                )
                .filter(
                    KGEntityType.grounding == KGGroundingType.GROUNDED,
                    KGEntityExtractionStaging.transferred_id_name.is_(None),
                )
                .limit(batch_size)
                .all()
            )
            if not batch:
                break
            yield batch


def _get_batch_untransferred_relationship_types(
    batch_size: int,
) -> Generator[list[KGRelationshipTypeExtractionStaging], None, None]:
    while True:
        with get_session_with_current_tenant() as db_session:
            batch = (
                db_session.query(KGRelationshipTypeExtractionStaging)
                .filter(KGRelationshipTypeExtractionStaging.transferred.is_(False))
                .limit(batch_size)
                .all()
            )
            if not batch:
                break
            yield batch


def _get_batch_untransferred_relationships(
    batch_size: int,
) -> Generator[list[KGRelationshipExtractionStaging], None, None]:
    while True:
        with get_session_with_current_tenant() as db_session:
            batch = (
                db_session.query(KGRelationshipExtractionStaging)
                .filter(KGRelationshipExtractionStaging.transferred.is_(False))
                .limit(batch_size)
                .all()
            )
            if not batch:
                break
            yield batch


def _get_batch_entities_with_parent(
    batch_size: int,
) -> Generator[list[KGEntityExtractionStaging], None, None]:
    offset = 0

    while True:
        with get_session_with_current_tenant() as db_session:
            batch = (
                db_session.query(KGEntityExtractionStaging)
                .filter(KGEntityExtractionStaging.parent_key.isnot(None))
                .order_by(KGEntityExtractionStaging.id_name)
                .offset(offset)
                .limit(batch_size)
                .all()
            )
            if not batch:
                break
            # we can't filter out ""s earlier as it will mess up the pagination
            yield [entity for entity in batch if entity.parent_key != ""]
            offset += batch_size


def _get_batch_kg_processed_documents(
    batch_size: int,
) -> Generator[list[Document], None, None]:
    offset = 0

    while True:
        with get_session_with_current_tenant() as db_session:
            batch = (
                db_session.query(Document)
                .join(
                    KGEntityExtractionStaging,
                    Document.id == KGEntityExtractionStaging.document_id,
                )
                .filter(
                    KGEntityExtractionStaging.transferred_id_name.is_not(None),
                )
                .order_by(Document.id)
                .offset(offset)
                .limit(batch_size)
                .all()
            )
            if not batch:
                break
            yield batch
            offset += batch_size


def _cluster_one_grounded_entity(
    entity: KGEntityExtractionStaging,
) -> tuple[KGEntity, bool]:
    """
    Cluster a single grounded entity.
    """
    with get_session_with_current_tenant() as db_session:
        # get entity name and filtering conditions
        if entity.document_id is not None:
            entity_name = cast(
                str,
                db_session.query(Document.semantic_id)
                .filter(Document.id == entity.document_id)
                .scalar(),
            ).lower()
            filtering = [KGEntity.document_id.is_(None)]
        else:
            entity_name = entity.name.lower()
            filtering = []

        # skip those with numbers so we don't cluster version1 and version2, etc.
        similar_entities: list[KGEntity] = []
        if not any(char.isdigit() for char in entity_name):
            # find similar entities, uses GIN index, very efficient
            db_session.execute(
                text(
                    "SET pg_trgm.similarity_threshold = "
                    + str(KG_CLUSTERING_RETRIEVE_THRESHOLD)
                )
            )
            similar_entities = (
                db_session.query(KGEntity)
                .filter(
                    # find entities of the same type with a similar name
                    *filtering,
                    KGEntity.entity_type_id_name == entity.entity_type_id_name,
                    getattr(func, POSTGRES_DEFAULT_SCHEMA).similarity_op(
                        KGEntity.name, entity_name
                    ),
                )
                .all()
            )

    # find best match
    best_score = -1.0
    best_entity = None
    for similar in similar_entities:
        # skip those with numbers so we don't cluster version1 and version2, etc.
        if any(char.isdigit() for char in similar.name):
            continue
        score = ratio(similar.name, entity_name)
        if score >= KG_CLUSTERING_THRESHOLD * 100 and score > best_score:
            best_score = score
            best_entity = similar

    # if there is a match, update the entity, otherwise create a new one
    with get_session_with_current_tenant() as db_session:
        if best_entity:
            logger.debug(f"Merged {entity.name} with {best_entity.name}")
            update_vespa = (
                best_entity.document_id is None and entity.document_id is not None
            )
            transferred_entity = merge_entities(
                db_session=db_session, parent=best_entity, child=entity
            )
        else:
            update_vespa = entity.document_id is not None
            transferred_entity = transfer_entity(db_session=db_session, entity=entity)

        db_session.commit()

    return transferred_entity, update_vespa


def _create_one_parent_child_relationship(entity: KGEntityExtractionStaging) -> None:
    """
    Creates a relationship between the entity and its parent, if it exists.
    Then, updates the entity's parent to the next ancestor.
    """
    with get_session_with_current_tenant() as db_session:
        # find the next ancestor
        parent = (
            db_session.query(KGEntity)
            .filter(KGEntity.entity_key == entity.parent_key)
            .first()
        )

        if parent is not None:
            # create parent child relationship and relationship type
            upsert_relationship_type(
                db_session=db_session,
                source_entity_type=parent.entity_type_id_name,
                relationship_type="has_subcomponent",
                target_entity_type=entity.entity_type_id_name,
            )
            relationship_id_name = make_relationship_id(
                parent.id_name,
                "has_subcomponent",
                cast(str, entity.transferred_id_name),
            )
            upsert_relationship(
                db_session=db_session,
                relationship_id_name=relationship_id_name,
                source_document_id=entity.document_id,
            )

            next_ancestor = parent.parent_key or ""
        else:
            next_ancestor = ""

        # set the staging entity's parent to the next ancestor
        # if there is no parent or next ancestor, set to "" to differentiate from None
        # None will mess up the pagination in _get_batch_entities_with_parent
        db_session.query(KGEntityExtractionStaging).filter(
            KGEntityExtractionStaging.id_name == entity.id_name
        ).update({"parent_key": next_ancestor})
        db_session.commit()


def _transfer_one_relationship(
    relationship: KGRelationshipExtractionStaging,
) -> None:
    with get_session_with_current_tenant() as db_session:
        # get the translations
        staging_entity_id_names = {
            relationship.source_node,
            relationship.target_node,
        }
        entity_translations: dict[str, str] = {
            entity.id_name: entity.transferred_id_name
            for entity in db_session.query(KGEntityExtractionStaging)
            .filter(KGEntityExtractionStaging.id_name.in_(staging_entity_id_names))
            .all()
            if entity.transferred_id_name is not None
        }
        if len(entity_translations) != len(staging_entity_id_names):
            logger.error(
                f"Missing entity translations for {staging_entity_id_names - entity_translations.keys()}"
            )
            return

        # transfer the relationship
        transfer_relationship(
            db_session=db_session,
            relationship=relationship,
            entity_translations=entity_translations,
        )
        db_session.commit()


def kg_clustering(
    tenant_id: str,
    index_name: str,
    lock: RedisLock,
    processing_chunk_batch_size: int = 16,
) -> None:
    """
    Here we will cluster the extractions based on their cluster frameworks.
    Initially, this will only focus on grounded entities with pre-determined
    relationships, so 'clustering' is actually not yet required.
    However, we may need to reconcile entities coming from different sources.

    The primary purpose of this function is to populate the actual KG tables
    from the temp_extraction tables.

    This will change with deep extraction, where grounded-sourceless entities
    can be extracted and then need to be clustered.
    """
    logger.info(f"Starting kg clustering for tenant {tenant_id}")

    kg_config_settings = get_kg_config_settings()
    validate_kg_settings(kg_config_settings)

    last_lock_time = time.monotonic()

    # Cluster and transfer grounded entities sequentially
    start_time = time.monotonic()
    i_batch = 0
    for i_batch, untransferred_grounded_entities in enumerate(
        _get_batch_untransferred_grounded_entities(
            batch_size=processing_chunk_batch_size
        )
    ):
        for entity in untransferred_grounded_entities:
            _cluster_one_grounded_entity(entity)
        last_lock_time = extend_lock(
            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time
        )
        # logger.debug(f"Transferred entities batch {i}")
    # NOTE: we assume every entity is transferred, as we currently only have grounded entities
    time_delta = time.monotonic() - start_time
    logger.info(
        f"Finished transferring {i_batch + 1} entity batches in {time_delta:.2f}s"
    )

    # Create parent-child relationships in parallel
    for _ in range(kg_config_settings.KG_MAX_PARENT_RECURSION_DEPTH):
        for root_entities in _get_batch_entities_with_parent(
            batch_size=processing_chunk_batch_size
        ):
            run_functions_tuples_in_parallel(
                [
                    (_create_one_parent_child_relationship, (root_entity,))
                    for root_entity in root_entities
                ]
            )
            last_lock_time = extend_lock(
                lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time
            )
    logger.info("Finished creating all parent-child relationships")

    # Transfer the relationship types (no need to do in parallel as there's only a few)
    start_time = time.monotonic()
    i_batch = 0
    for i_batch, relationship_types in enumerate(
        _get_batch_untransferred_relationship_types(
            batch_size=processing_chunk_batch_size
        )
    ):
        with get_session_with_current_tenant() as db_session:
            for relationship_type in relationship_types:
                transfer_relationship_type(db_session, relationship_type)
            db_session.commit()
        last_lock_time = extend_lock(
            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time
        )
        # logger.debug(f"Transferred relationship types batch {i}")
    time_delta = time.monotonic() - start_time
    logger.info(
        f"Finished transferring {i_batch + 1} relationship type batches in {time_delta:.2f}s"
    )

    # Transfer the relationships in parallel
    start_time = time.monotonic()
    i_batch = 0
    for i_batch, relationships in enumerate(
        _get_batch_untransferred_relationships(batch_size=processing_chunk_batch_size)
    ):
        run_functions_tuples_in_parallel(
            [
                (_transfer_one_relationship, (relationship,))
                for relationship in relationships
            ]
        )
        last_lock_time = extend_lock(
            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time
        )
        # logger.debug(f"Transferred relationships batch {i}")
    time_delta = time.monotonic() - start_time
    logger.info(
        f"Finished transferring {i_batch + 1} relationship batches in {time_delta:.2f}s"
    )

    # Update vespa for each document
    start_time = time.monotonic()
    i_batch = 0
    for i_batch, documents in enumerate(
        _get_batch_kg_processed_documents(batch_size=processing_chunk_batch_size)
    ):
        batch_update_requests = run_functions_tuples_in_parallel(
            [
                (get_kg_vespa_info_update_requests_for_document, (document.id,))
                for document in documents
            ]
        )
        for update_requests, document in zip(batch_update_requests, documents):
            try:
                update_kg_chunks_vespa_info(update_requests, index_name, tenant_id)
            except Exception as e:
                logger.error(f"Error updating vespa for document {document.id}: {e}")
        last_lock_time = extend_lock(
            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time
        )
        # logger.debug(f"Updated vespa for documents batch {i}")
    time_delta = time.monotonic() - start_time
    logger.info(
        f"Finished updating {i_batch + 1} document batches in {time_delta:.2f}s"
    )

    # Delete the transferred objects from the staging tables
    try:
        with get_session_with_current_tenant() as db_session:
            db_session.query(KGRelationshipExtractionStaging).filter(
                KGRelationshipExtractionStaging.transferred.is_(True)
            ).delete(synchronize_session=False)
            db_session.commit()
    except Exception as e:
        logger.error(f"Error deleting relationships: {e}")

    try:
        with get_session_with_current_tenant() as db_session:
            db_session.query(KGRelationshipTypeExtractionStaging).filter(
                KGRelationshipTypeExtractionStaging.transferred.is_(True)
            ).delete(synchronize_session=False)
            db_session.commit()
    except Exception as e:
        logger.error(f"Error deleting relationship types: {e}")

    try:
        with get_session_with_current_tenant() as db_session:
            db_session.query(KGEntityExtractionStaging).filter(
                KGEntityExtractionStaging.transferred_id_name.is_not(None)
            ).delete(synchronize_session=False)
            db_session.commit()
    except Exception as e:
        logger.error(f"Error deleting entities: {e}")
    logger.info("Finished deleting all transferred staging entries")


================================================
FILE: backend/onyx/kg/clustering/normalizations.py
================================================
import re
from collections import defaultdict
from typing import cast

import numpy as np
from rapidfuzz.distance.DamerauLevenshtein import normalized_similarity
from sqlalchemy import desc
from sqlalchemy import Float
from sqlalchemy import func
from sqlalchemy import MetaData
from sqlalchemy import select
from sqlalchemy import String
from sqlalchemy import Table
from sqlalchemy.dialects.postgresql import ARRAY

from onyx.configs.kg_configs import KG_NORMALIZATION_RERANK_LEVENSHTEIN_WEIGHT
from onyx.configs.kg_configs import KG_NORMALIZATION_RERANK_NGRAM_WEIGHTS
from onyx.configs.kg_configs import KG_NORMALIZATION_RERANK_THRESHOLD
from onyx.configs.kg_configs import KG_NORMALIZATION_RETRIEVE_ENTITIES_LIMIT
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import KGEntity
from onyx.db.relationships import get_relationships_for_entity_type_pairs
from onyx.kg.models import NormalizedEntities
from onyx.kg.models import NormalizedRelationships
from onyx.kg.utils.embeddings import encode_string_batch
from onyx.kg.utils.formatting_utils import format_entity_id_for_models
from onyx.kg.utils.formatting_utils import get_attributes
from onyx.kg.utils.formatting_utils import get_entity_type
from onyx.kg.utils.formatting_utils import make_entity_w_attributes
from onyx.kg.utils.formatting_utils import make_relationship_id
from onyx.kg.utils.formatting_utils import split_entity_id
from onyx.kg.utils.formatting_utils import split_relationship_id
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

logger = setup_logger()


alphanum_regex = re.compile(r"[^a-z0-9]+")
rem_email_regex = re.compile(r"(?<=\S)@([a-z0-9-]+)\.([a-z]{2,6})$")


def _ngrams(sequence: str, n: int) -> list[tuple[str, ...]]:
    """Generate n-grams from a sequence."""
    return [tuple(sequence[i : i + n]) for i in range(len(sequence) - n + 1)]


def _clean_name(entity_name: str) -> str:
    """
    Clean an entity string by removing non-alphanumeric characters and email addresses.
    If the name after cleaning is empty, return the original name in lowercase.
    """
    cleaned_entity = entity_name.casefold()
    return (
        alphanum_regex.sub("", rem_email_regex.sub("", cleaned_entity))
        or cleaned_entity
    )


def _normalize_one_entity(
    entity: str,
    attributes: dict[str, str],
    allowed_docs_temp_view_name: str | None = None,
) -> str | None:
    """
    Matches a single entity to the best matching entity of the same type.
    """
    entity_type, entity_name = split_entity_id(entity)
    if entity_name == "*":
        return entity

    cleaned_entity = _clean_name(entity_name)

    # narrow filter to subtype if requested
    type_filters = [KGEntity.entity_type_id_name == entity_type]
    if "subtype" in attributes:
        type_filters.append(
            KGEntity.attributes.op("@>")({"subtype": attributes["subtype"]})
        )

    # step 1: find entities containing the entity_name or something similar
    with get_session_with_current_tenant() as db_session:
        # get allowed documents
        metadata = MetaData()
        if allowed_docs_temp_view_name is None:
            raise ValueError("allowed_docs_temp_view_name is not available")

        effective_schema_allowed_docs_temp_view_name = (
            allowed_docs_temp_view_name.split(".")[-1]
        )

        allowed_docs_temp_view = Table(
            effective_schema_allowed_docs_temp_view_name,
            metadata,
            autoload_with=db_session.get_bind(),
        )

        # generate trigrams of the queried entity Q
        query_trigrams = db_session.query(
            getattr(func, POSTGRES_DEFAULT_SCHEMA)
            .show_trgm(cleaned_entity)
            .cast(ARRAY(String(3)))
            .label("trigrams")
        ).cte("query")

        candidates = cast(
            list[tuple[str, str, float]],
            db_session.query(
                KGEntity.id_name,
                KGEntity.name,
                (
                    # for each entity E, compute score = | Q ∩ E | / min(|Q|, |E|)
                    func.cardinality(
                        func.array(
                            select(func.unnest(KGEntity.name_trigrams))
                            .correlate(KGEntity)
                            .intersect(
                                select(
                                    func.unnest(query_trigrams.c.trigrams)
                                ).correlate(query_trigrams)
                            )
                            .scalar_subquery()
                        )
                    ).cast(Float)
                    / func.least(
                        func.cardinality(query_trigrams.c.trigrams),
                        func.cardinality(KGEntity.name_trigrams),
                    )
                ).label("score"),
            )
            .select_from(KGEntity, query_trigrams)
            .outerjoin(
                allowed_docs_temp_view,
                KGEntity.document_id == allowed_docs_temp_view.c.allowed_doc_id,
            )
            .filter(
                *type_filters,
                KGEntity.name_trigrams.overlap(query_trigrams.c.trigrams),
                # Add filter for allowed docs - either document_id is NULL or it's in allowed_docs
                (
                    KGEntity.document_id.is_(None)
                    | allowed_docs_temp_view.c.allowed_doc_id.isnot(None)
                ),
            )
            .order_by(desc("score"))
            .limit(KG_NORMALIZATION_RETRIEVE_ENTITIES_LIMIT)
            .all(),
        )
    if not candidates:
        return None

    # step 2: do a weighted ngram analysis and damerau levenshtein distance to rerank
    n1, n2, n3 = (
        set(_ngrams(cleaned_entity, 1)),
        set(_ngrams(cleaned_entity, 2)),
        set(_ngrams(cleaned_entity, 3)),
    )
    for i, (candidate_id_name, candidate_name, _) in enumerate(candidates):
        cleaned_candidate = _clean_name(candidate_name)
        h_n1, h_n2, h_n3 = (
            set(_ngrams(cleaned_candidate, 1)),
            set(_ngrams(cleaned_candidate, 2)),
            set(_ngrams(cleaned_candidate, 3)),
        )

        # compute ngram overlap, renormalize scores if the names are too short for larger ngrams
        grams_used = min(2, len(cleaned_entity) - 1, len(cleaned_candidate) - 1)
        W_n1, W_n2, W_n3 = KG_NORMALIZATION_RERANK_NGRAM_WEIGHTS
        ngram_score = (
            # compute | Q ∩ E | / min(|Q|, |E|) for unigrams and bigrams (trigrams already computed)
            W_n1 * len(n1 & h_n1) / max(1, min(len(n1), len(h_n1)))
            + W_n2 * len(n2 & h_n2) / max(1, min(len(n2), len(h_n2)))
            + W_n3 * len(n3 & h_n3) / max(1, min(len(n3), len(h_n3)))
        ) / (W_n1, W_n1 + W_n2, 1.0)[grams_used]

        # compute damerau levenshtein distance to fuzzy match against typos
        W_leven = KG_NORMALIZATION_RERANK_LEVENSHTEIN_WEIGHT
        leven_score = normalized_similarity(cleaned_entity, cleaned_candidate)

        # combine scores
        score = (1.0 - W_leven) * ngram_score + W_leven * leven_score
        candidates[i] = (candidate_id_name, candidate_name, score)
    candidates = list(
        sorted(
            filter(lambda x: x[2] > KG_NORMALIZATION_RERANK_THRESHOLD, candidates),
            key=lambda x: x[2],
            reverse=True,
        )
    )
    if not candidates:
        return None

    return candidates[0][0]


def _get_existing_normalized_relationships(
    raw_relationships: list[str],
) -> dict[str, dict[str, list[str]]]:
    """
    Get existing normalized relationships from the database.
    """

    relationship_type_map: dict[str, dict[str, list[str]]] = defaultdict(
        lambda: defaultdict(list)
    )
    relationship_pairs = list(
        {
            (
                get_entity_type(split_relationship_id(relationship)[0]),
                get_entity_type(split_relationship_id(relationship)[2]),
            )
            for relationship in raw_relationships
        }
    )

    with get_session_with_current_tenant() as db_session:
        relationships = get_relationships_for_entity_type_pairs(
            db_session, relationship_pairs
        )

    for relationship in relationships:
        relationship_type_map[relationship.source_entity_type_id_name][
            relationship.target_entity_type_id_name
        ].append(relationship.id_name)

    return relationship_type_map


def normalize_entities(
    raw_entities: list[str],
    raw_entities_w_attributes: list[str],
    allowed_docs_temp_view_name: str | None = None,
) -> NormalizedEntities:
    """
    Match each entity against a list of normalized entities using fuzzy matching.
    Returns the best matching normalized entity for each input entity.

    Args:
        raw_entities: list of entity strings to normalize, w/o attributes
        raw_entities_w_attributes: list of entity strings to normalize, w/ attributes

    Returns:
        list of normalized entity strings
    """
    normalized_entities: list[str] = []
    normalized_entities_w_attributes: list[str] = []
    normalized_map: dict[str, str] = {}

    entity_attributes = [
        get_attributes(attr_entity) for attr_entity in raw_entities_w_attributes
    ]

    mapping: list[str | None] = run_functions_tuples_in_parallel(
        [
            (_normalize_one_entity, (entity, attributes, allowed_docs_temp_view_name))
            for entity, attributes in zip(raw_entities, entity_attributes)
        ]
    )
    for entity, attributes, normalized_entity in zip(
        raw_entities, entity_attributes, mapping
    ):
        if normalized_entity is not None:
            normalized_entities.append(normalized_entity)
            normalized_entities_w_attributes.append(
                make_entity_w_attributes(normalized_entity, attributes)
            )
            normalized_map[entity] = format_entity_id_for_models(normalized_entity)
        else:
            logger.warning(f"No normalized entity found for {entity}")
            normalized_map[entity] = format_entity_id_for_models(entity)

    return NormalizedEntities(
        entities=normalized_entities,
        entities_w_attributes=normalized_entities_w_attributes,
        entity_normalization_map=normalized_map,
    )


def normalize_relationships(
    raw_relationships: list[str], entity_normalization_map: dict[str, str]
) -> NormalizedRelationships:
    """
    Normalize relationships using entity mappings and relationship string matching.

    Args:
        relationships: list of relationships in format "source__relation__target"
        entity_normalization_map: Mapping of raw entities to normalized ones (or None)

    Returns:
        NormalizedRelationships containing normalized relationships and mapping
    """
    # Placeholder for normalized relationship structure
    nor_relationships = _get_existing_normalized_relationships(raw_relationships)

    normalized_rels: list[str] = []
    normalization_map: dict[str, str] = {}

    for raw_rel in raw_relationships:
        # 1. Split and normalize entities
        try:
            source, rel_string, target = split_relationship_id(raw_rel)
        except ValueError:
            raise ValueError(f"Invalid relationship format: {raw_rel}")

        # Check if entities are in normalization map and not None
        norm_source = entity_normalization_map.get(source)
        norm_target = entity_normalization_map.get(target)

        if norm_source is None or norm_target is None:
            logger.warning(f"No normalized entities found for {raw_rel}")
            continue

        # 2. Find candidate normalized relationships
        candidate_rels = []
        norm_source_type = get_entity_type(format_entity_id_for_models(norm_source))
        norm_target_type = get_entity_type(format_entity_id_for_models(norm_target))
        if (
            norm_source_type in nor_relationships
            and norm_target_type in nor_relationships[norm_source_type]
        ):
            candidate_rels = [
                split_relationship_id(rel)[1]
                for rel in nor_relationships[norm_source_type][norm_target_type]
            ]

        if not candidate_rels:
            logger.warning(f"No candidate relationships found for {raw_rel}")
            continue

        # 3. Encode and find best match
        strings_to_encode = [rel_string] + candidate_rels
        vectors = encode_string_batch(strings_to_encode)

        # Get raw relation vector and candidate vectors
        raw_vector = vectors[0]
        candidate_vectors = vectors[1:]

        # Calculate dot products
        dot_products = np.dot(candidate_vectors, raw_vector)
        best_match_idx = np.argmax(dot_products)

        # Create normalized relationship
        norm_rel = make_relationship_id(
            norm_source, candidate_rels[best_match_idx], norm_target
        )
        normalized_rels.append(norm_rel)
        normalization_map[raw_rel] = norm_rel

    return NormalizedRelationships(
        relationships=normalized_rels, relationship_normalization_map=normalization_map
    )


================================================
FILE: backend/onyx/kg/extractions/extraction_processing.py
================================================
import time
from typing import Any

from redis.lock import Lock as RedisLock

from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.db.connector import get_kg_enabled_connectors
from onyx.db.document import get_document_updated_at
from onyx.db.document import get_skipped_kg_documents
from onyx.db.document import get_unprocessed_kg_document_batch_for_connector
from onyx.db.document import update_document_kg_info
from onyx.db.document import update_document_kg_stage
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.entities import delete_from_kg_entities__no_commit
from onyx.db.entities import upsert_staging_entity
from onyx.db.entity_type import get_entity_types
from onyx.db.kg_config import get_kg_config_settings
from onyx.db.kg_config import validate_kg_settings
from onyx.db.models import Document
from onyx.db.models import KGStage
from onyx.db.relationships import delete_from_kg_relationships__no_commit
from onyx.db.relationships import upsert_staging_relationship
from onyx.db.relationships import upsert_staging_relationship_type
from onyx.kg.models import KGClassificationInstructions
from onyx.kg.models import KGDocumentDeepExtractionResults
from onyx.kg.models import KGEnhancedDocumentMetadata
from onyx.kg.models import KGEntityTypeInstructions
from onyx.kg.models import KGExtractionInstructions
from onyx.kg.models import KGImpliedExtractionResults
from onyx.kg.utils.extraction_utils import EntityTypeMetadataTracker
from onyx.kg.utils.extraction_utils import (
    get_batch_documents_metadata,
)
from onyx.kg.utils.extraction_utils import kg_deep_extraction
from onyx.kg.utils.extraction_utils import (
    kg_implied_extraction,
)
from onyx.kg.utils.formatting_utils import extract_relationship_type_id
from onyx.kg.utils.formatting_utils import get_entity_type
from onyx.kg.utils.formatting_utils import split_entity_id
from onyx.kg.utils.formatting_utils import split_relationship_id
from onyx.kg.utils.lock_utils import extend_lock
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel

logger = setup_logger()


def _get_classification_extraction_instructions() -> (
    dict[str | None, dict[str, KGEntityTypeInstructions]]
):
    """
    Prepare the classification instructions for the given source.
    """

    classification_instructions_dict: dict[
        str | None, dict[str, KGEntityTypeInstructions]
    ] = {}

    with get_session_with_current_tenant() as db_session:
        entity_types = get_entity_types(db_session, active=True)

    for entity_type in entity_types:
        grounded_source_name = entity_type.grounded_source_name

        if grounded_source_name not in classification_instructions_dict:
            classification_instructions_dict[grounded_source_name] = {}

        if grounded_source_name is None:
            continue

        attributes = entity_type.parsed_attributes
        classification_attributes = {
            option: info
            for option, info in attributes.classification_attributes.items()
            if info.extraction
        }
        classification_options = ", ".join(classification_attributes.keys())
        classification_enabled = (
            len(classification_options) > 0 and len(classification_attributes) > 0
        )

        classification_instructions_dict[grounded_source_name][entity_type.id_name] = (
            KGEntityTypeInstructions(
                metadata_attribute_conversion=attributes.metadata_attribute_conversion,
                classification_instructions=KGClassificationInstructions(
                    classification_enabled=classification_enabled,
                    classification_options=classification_options,
                    classification_class_definitions=classification_attributes,
                ),
                extraction_instructions=KGExtractionInstructions(
                    deep_extraction=entity_type.deep_extraction,
                    active=entity_type.active,
                ),
                entity_filter_attributes=attributes.entity_filter_attributes,
            )
        )

    return classification_instructions_dict


def _get_batch_documents_enhanced_metadata(
    unprocessed_document_batch: list[Document],
    source_type_classification_extraction_instructions: dict[
        str, KGEntityTypeInstructions
    ],
    connector_source: str,
) -> dict[str, KGEnhancedDocumentMetadata]:
    """
    Get the entity types for the given unprocessed documents.
    """

    kg_document_meta_data_dict: dict[str, KGEnhancedDocumentMetadata] = {
        document.id: KGEnhancedDocumentMetadata(
            entity_type=None,
            metadata_attribute_conversion=None,
            document_metadata=None,
            deep_extraction=False,
            classification_enabled=False,
            classification_instructions=None,
            skip=True,
        )
        for document in unprocessed_document_batch
    }

    batch_entity = None
    if len(source_type_classification_extraction_instructions) == 1:
        # if source only has one entity type, the document must be of that type
        batch_entity = list(source_type_classification_extraction_instructions.keys())[
            0
        ]

    # the documents can be of multiple entity types. We need to identify the entity type for each document
    batch_metadata = get_batch_documents_metadata(
        [
            unprocessed_document.id
            for unprocessed_document in unprocessed_document_batch
        ],
        connector_source,
    )

    for metadata in batch_metadata:
        document_id = metadata.document_id
        doc_entity = None

        if not isinstance(document_id, str):
            continue

        chunk_metadata = metadata.source_metadata

        if batch_entity:
            doc_entity = batch_entity
        else:
            # TODO: make this a helper function
            if not chunk_metadata:
                continue

            for (
                potential_entity_type
            ) in source_type_classification_extraction_instructions.keys():
                potential_entity_type_attribute_filters = (
                    source_type_classification_extraction_instructions[
                        potential_entity_type
                    ].entity_filter_attributes
                    or {}
                )

                if not potential_entity_type_attribute_filters:
                    continue

                if all(
                    chunk_metadata.get(attribute)
                    == potential_entity_type_attribute_filters.get(attribute)
                    for attribute in potential_entity_type_attribute_filters
                ):
                    doc_entity = potential_entity_type
                    break

        if doc_entity is None:
            continue

        entity_instructions = source_type_classification_extraction_instructions[
            doc_entity
        ]

        kg_document_meta_data_dict[document_id] = KGEnhancedDocumentMetadata(
            entity_type=doc_entity,
            metadata_attribute_conversion=(
                source_type_classification_extraction_instructions[
                    doc_entity
                ].metadata_attribute_conversion
            ),
            document_metadata=chunk_metadata,
            deep_extraction=entity_instructions.extraction_instructions.deep_extraction,
            classification_enabled=entity_instructions.classification_instructions.classification_enabled,
            classification_instructions=entity_instructions.classification_instructions,
            skip=False,
        )

    return kg_document_meta_data_dict


def kg_extraction(
    tenant_id: str,
    index_name: str,
    lock: RedisLock,
    processing_chunk_batch_size: int = 8,
) -> None:
    """
    This extraction will try to extract from all chunks that have not been kg-processed yet.

    Approach:
    - Get all connectors that are enabled for KG extraction
    - For each enabled connector:
        - Get unprocessed documents (using a generator)
        - For each batch of unprocessed documents:
            - Classify each document to select proper ones
            - Get and extract from chunks
            - Update chunks in Vespa
            - Update temporary KG extraction tables
            - Update document table to set kg_extracted = True
    """

    logger.info(f"Starting kg extraction for tenant {tenant_id}")

    kg_config_settings = get_kg_config_settings()
    validate_kg_settings(kg_config_settings)

    # get connector ids that are enabled for KG extraction
    with get_session_with_current_tenant() as db_session:
        kg_enabled_connectors = get_kg_enabled_connectors(db_session)

    document_classification_extraction_instructions = (
        _get_classification_extraction_instructions()
    )

    # get entity type info
    with get_session_with_current_tenant() as db_session:
        all_entity_types = get_entity_types(db_session)
        active_entity_types = {
            entity_type.id_name
            for entity_type in get_entity_types(db_session, active=True)
        }

        # entity_type: (metadata: conversion property)
        entity_metadata_conversion_instructions = {
            entity_type.id_name: entity_type.parsed_attributes.metadata_attribute_conversion
            for entity_type in all_entity_types
        }

    # Track which metadata attributes are possible for each entity type
    metadata_tracker = EntityTypeMetadataTracker()
    metadata_tracker.import_typeinfo()

    last_lock_time = time.monotonic()

    # Iterate over connectors that are enabled for KG extraction
    for kg_enabled_connector in kg_enabled_connectors:
        connector_id = kg_enabled_connector.id
        connector_coverage_days = kg_enabled_connector.kg_coverage_days
        connector_source = kg_enabled_connector.source

        document_batch_counter = 0

        # iterate over un-kg-processed documents in connector
        while True:
            # get a batch of unprocessed documents
            with get_session_with_current_tenant() as db_session:
                unprocessed_document_batch = (
                    get_unprocessed_kg_document_batch_for_connector(
                        db_session,
                        connector_id,
                        kg_coverage_start=kg_config_settings.KG_COVERAGE_START_DATE,
                        kg_max_coverage_days=connector_coverage_days
                        or kg_config_settings.KG_MAX_COVERAGE_DAYS,
                        batch_size=processing_chunk_batch_size,
                    )
                )

            if len(unprocessed_document_batch) == 0:
                logger.info(
                    f"No unprocessed documents found for connector {connector_id}. Processed {document_batch_counter} batches."
                )
                break

            document_batch_counter += 1
            last_lock_time = extend_lock(
                lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time
            )
            logger.info(f"Processing document batch {document_batch_counter}")

            # Get the document attributes and entity types
            batch_metadata = _get_batch_documents_enhanced_metadata(
                unprocessed_document_batch,
                document_classification_extraction_instructions.get(
                    connector_source, {}
                ),
                connector_source,
            )

            # mark docs in unprocessed_document_batch as EXTRACTING
            for unprocessed_document in unprocessed_document_batch:
                if batch_metadata[unprocessed_document.id].entity_type is None:
                    # info for after the connector has been processed
                    kg_stage = KGStage.SKIPPED
                    logger.debug(
                        f"Document {unprocessed_document.id} is not of any entity type"
                    )
                elif batch_metadata[unprocessed_document.id].skip:
                    # info for after the connector has been processed. But no message as there may be many
                    # purposefully skipped documents
                    kg_stage = KGStage.SKIPPED
                else:
                    kg_stage = KGStage.EXTRACTING

                with get_session_with_current_tenant() as db_session:
                    update_document_kg_stage(
                        db_session,
                        unprocessed_document.id,
                        kg_stage,
                    )

                    if kg_stage == KGStage.EXTRACTING:
                        delete_from_kg_relationships__no_commit(
                            db_session, [unprocessed_document.id]
                        )
                        delete_from_kg_entities__no_commit(
                            db_session, [unprocessed_document.id]
                        )
                    db_session.commit()

            # Iterate over batches of unprocessed documents
            # For each document:
            #   - extract implied entities and relationships
            #   - if deep extraction is enabled, extract entities and relationships with LLM
            #   - if deep extraction and classification are enabled, classify document
            #   - update postgres with
            #     - extracted entities (with classification) and relationships
            #     - kg_stage of the processed document

            documents_to_process = [x.id for x in unprocessed_document_batch]
            batch_implied_extraction: dict[str, KGImpliedExtractionResults] = {}
            batch_deep_extraction_args: list[
                tuple[str, KGEnhancedDocumentMetadata, KGImpliedExtractionResults]
            ] = []

            for unprocessed_document in unprocessed_document_batch:
                if (
                    unprocessed_document.id not in documents_to_process
                    or batch_metadata[unprocessed_document.id].entity_type is None
                    or batch_metadata[unprocessed_document.id].skip
                ):
                    with get_session_with_current_tenant() as db_session:
                        update_document_kg_stage(
                            db_session,
                            unprocessed_document.id,
                            KGStage.SKIPPED,
                        )
                        db_session.commit()
                    continue

                # 1. perform (implicit) KG 'extractions' on the documents that should be processed
                # This is really about assigning document meta-data to KG entities/relationships or KG entity attributes
                # General approach:
                #    - vendor emails to Employee-type entities + relationship to current primary grounded entity
                #    - external account emails to Account-type entities + relationship to current primary grounded entity
                #    - non-email owners to KG current entity's attributes, no relationships
                # We also collect email addresses of vendors and external accounts to inform chunk processing
                batch_implied_extraction[unprocessed_document.id] = (
                    kg_implied_extraction(
                        unprocessed_document,
                        batch_metadata[unprocessed_document.id],
                        active_entity_types,
                        kg_config_settings,
                    )
                )

                # 2. prepare inputs for deep extraction and classification
                if batch_metadata[unprocessed_document.id].deep_extraction:
                    batch_deep_extraction_args.append(
                        (
                            unprocessed_document.id,
                            batch_metadata[unprocessed_document.id],
                            batch_implied_extraction[unprocessed_document.id],
                        )
                    )

            # 2. perform deep extraction and classification in parallel
            batch_deep_extraction_func_calls = [
                (
                    kg_deep_extraction,
                    (
                        *arg,
                        tenant_id,
                        index_name,
                        kg_config_settings,
                    ),
                )
                for arg in batch_deep_extraction_args
            ]
            batch_deep_extractions: dict[str, KGDocumentDeepExtractionResults] = {
                document_id: result
                for document_id, result in zip(
                    documents_to_process,
                    run_functions_tuples_in_parallel(batch_deep_extraction_func_calls),
                )
            }

            # Collect entities and relationships to upsert
            batch_entities: list[tuple[str | None, str]] = []
            batch_relationships: list[tuple[str, str]] = []
            entity_classification: dict[str, str] = {}

            for document_id, implied_metadata in batch_implied_extraction.items():
                batch_entities += [
                    (None, entity) for entity in implied_metadata.implied_entities
                ]
                batch_entities.append((document_id, implied_metadata.document_entity))
                batch_relationships += [
                    (document_id, relationship)
                    for relationship in implied_metadata.implied_relationships
                ]

            for document_id, deep_extraction_result in batch_deep_extractions.items():
                batch_entities += [
                    (None, entity)
                    for entity in deep_extraction_result.deep_extracted_entities
                ]
                for relationship in deep_extraction_result.deep_extracted_relationships:
                    source_entity, _, target_entity = split_relationship_id(
                        relationship
                    )
                    if (
                        source_entity in active_entity_types
                        and target_entity in active_entity_types
                    ):
                        batch_relationships += [(document_id, relationship)]

                classification_result = deep_extraction_result.classification_result
                if not classification_result:
                    continue
                entity_classification[classification_result.document_entity] = (
                    classification_result.classification_class
                )

            # Populate the KG database with the extracted entities, relationships, and terms
            for potential_document_id, entity in batch_entities:
                # verify the entity is valid
                parts = split_entity_id(entity)
                if len(parts) != 2:
                    logger.error(
                        f"Invalid entity {entity} in aggregated_kg_extractions.entities"
                    )
                    continue

                entity_type, entity_name = parts
                entity_type = entity_type.upper()
                entity_name = entity_name.capitalize()

                if entity_type not in active_entity_types:
                    continue

                try:
                    with get_session_with_current_tenant() as db_session:
                        entity_attributes: dict[str, Any] = {}

                        if potential_document_id:
                            entity_attributes = (
                                batch_metadata[potential_document_id].document_metadata
                                or {}
                            )

                        # only keep selected attributes (and translate the attribute names)
                        metadata_attributes = entity_metadata_conversion_instructions[
                            entity_type
                        ]
                        keep_attributes = {
                            metadata_attributes[attr_name].name: attr_val
                            for attr_name, attr_val in entity_attributes.items()
                            if (
                                attr_name in metadata_attributes
                                and metadata_attributes[attr_name].keep
                            )
                        }

                        # add the classification result to the attributes
                        if entity in entity_classification:
                            keep_attributes["classification"] = entity_classification[
                                entity
                            ]

                        event_time = None
                        if potential_document_id:
                            event_time = get_document_updated_at(
                                potential_document_id, db_session
                            )

                        upserted_entity = upsert_staging_entity(
                            db_session=db_session,
                            name=entity_name,
                            entity_type=entity_type,
                            document_id=potential_document_id,
                            occurrences=1,
                            attributes=keep_attributes,
                            event_time=event_time,
                        )
                        metadata_tracker.track_metadata(
                            entity_type, upserted_entity.attributes
                        )

                        db_session.commit()
                except Exception as e:
                    logger.error(f"Error adding entity {entity}. Error message: {e}")

            for document_id, relationship in batch_relationships:
                relationship_split = split_relationship_id(relationship)

                if len(relationship_split) != 3:
                    logger.error(
                        f"Invalid relationship {relationship} in aggregated_kg_extractions.relationships"
                    )
                    continue

                source_entity, relationship_type, target_entity = relationship_split

                source_entity_type = get_entity_type(source_entity)
                target_entity_type = get_entity_type(target_entity)

                if (
                    source_entity_type not in active_entity_types
                    or target_entity_type not in active_entity_types
                ):
                    continue

                relationship_type_id_name = extract_relationship_type_id(relationship)

                with get_session_with_current_tenant() as db_session:
                    try:
                        upsert_staging_relationship_type(
                            db_session=db_session,
                            source_entity_type=source_entity_type.upper(),
                            relationship_type=relationship_type,
                            target_entity_type=target_entity_type.upper(),
                            definition=False,
                            extraction_count=1,
                        )
                        db_session.commit()
                    except Exception as e:
                        logger.error(
                            f"Error adding relationship type {relationship_type_id_name} to the database: {e}"
                        )

                    with get_session_with_current_tenant() as db_session:
                        try:
                            upsert_staging_relationship(
                                db_session=db_session,
                                relationship_id_name=relationship,
                                source_document_id=document_id,
                                occurrences=1,
                            )
                            db_session.commit()
                        except Exception as e:
                            logger.error(
                                f"Error adding relationship {relationship} to the database: {e}"
                            )

            # Populate the Documents table with the kg information for the documents

            for processed_document in documents_to_process:
                with get_session_with_current_tenant() as db_session:
                    update_document_kg_info(
                        db_session,
                        processed_document,
                        KGStage.EXTRACTED,
                    )
                    db_session.commit()

        # Update the the Skipped Docs back to Not Started
        with get_session_with_current_tenant() as db_session:
            skipped_documents = get_skipped_kg_documents(db_session)
            for document_id in skipped_documents:
                update_document_kg_stage(
                    db_session,
                    document_id,
                    KGStage.NOT_STARTED,
                )
                db_session.commit()

    metadata_tracker.export_typeinfo()


================================================
FILE: backend/onyx/kg/models.py
================================================
from datetime import datetime
from enum import Enum
from typing import Any

from pydantic import BaseModel

from onyx.configs.constants import DocumentSource
from onyx.configs.kg_configs import KG_DEFAULT_MAX_PARENT_RECURSION_DEPTH


# Note: make sure to write a migration if adding a non-nullable field or removing a field
class KGConfigSettings(BaseModel):
    KG_EXPOSED: bool = False
    KG_ENABLED: bool = False
    KG_VENDOR: str | None = None
    KG_VENDOR_DOMAINS: list[str] = []
    KG_IGNORE_EMAIL_DOMAINS: list[str] = []
    KG_COVERAGE_START: str = datetime(1970, 1, 1).strftime("%Y-%m-%d")
    KG_MAX_COVERAGE_DAYS: int = 10000
    KG_MAX_PARENT_RECURSION_DEPTH: int = KG_DEFAULT_MAX_PARENT_RECURSION_DEPTH
    KG_BETA_PERSONA_ID: int | None = None

    @property
    def KG_COVERAGE_START_DATE(self) -> datetime:
        return datetime.strptime(self.KG_COVERAGE_START, "%Y-%m-%d")


class KGGroundingType(str, Enum):
    UNGROUNDED = "ungrounded"
    GROUNDED = "grounded"


class KGAttributeTrackType(str, Enum):
    VALUE = "value"
    LIST = "list"


class KGAttributeTrackInfo(BaseModel):
    type: KGAttributeTrackType
    values: set[str] | None


class KGAttributeEntityOption(str, Enum):
    FROM_EMAIL = "from_email"  # use email to determine type (ACCOUNT or EMPLOYEE)


class KGAttributeImplicationProperty(BaseModel):
    # type of implied entity to create
    # if str, will create an implied entity of that type
    # if KGAttributeEntityOption, will determine the type based on the option
    implied_entity_type: str | KGAttributeEntityOption
    # name of the implied relationship to create (from implied entity to this entity)
    implied_relationship_name: str


class KGAttributeProperty(BaseModel):
    # name of attribute to map metadata to
    name: str
    # whether to keep this attribute in the entity
    keep: bool
    # properties for creating implied entities and relations from this metadata
    implication_property: KGAttributeImplicationProperty | None = None


class KGEntityTypeClassificationInfo(BaseModel):
    extraction: bool
    description: str


class KGEntityTypeAttributes(BaseModel):
    # information on how to use the metadata to extract attributes, implied entities, and relations
    metadata_attribute_conversion: dict[str, KGAttributeProperty] = {}
    # a metadata key: value pair to match for to differentiate entities from the same source
    entity_filter_attributes: dict[str, Any] = {}
    # mapping of classification names to their corresponding classification info
    classification_attributes: dict[str, KGEntityTypeClassificationInfo] = {}

    # mapping of attribute names to their allowed values, populated during extraction
    attribute_values: dict[str, KGAttributeTrackInfo | None] = {}


class KGEntityTypeDefinition(BaseModel):
    description: str
    grounding: KGGroundingType
    grounded_source_name: DocumentSource | None
    active: bool = False
    attributes: KGEntityTypeAttributes = KGEntityTypeAttributes()
    entity_values: list[str] = []


class KGChunkFormat(BaseModel):
    connector_id: int | None = None
    document_id: str
    chunk_id: int
    title: str
    content: str
    primary_owners: list[str]
    secondary_owners: list[str]
    source_type: str
    metadata: dict[str, str | list[str]] | None = None


class KGPerson(BaseModel):
    name: str
    company: str
    employee: bool


class NormalizedEntities(BaseModel):
    entities: list[str]
    entities_w_attributes: list[str]
    entity_normalization_map: dict[str, str]


class NormalizedRelationships(BaseModel):
    relationships: list[str]
    relationship_normalization_map: dict[str, str]


class KGMetadataContent(BaseModel):
    document_id: str
    source_type: str
    source_metadata: dict[str, Any] | None = None


class KGClassificationInstructions(BaseModel):
    classification_enabled: bool
    classification_options: str
    classification_class_definitions: dict[str, KGEntityTypeClassificationInfo]


class KGExtractionInstructions(BaseModel):
    deep_extraction: bool
    active: bool


class KGEntityTypeInstructions(BaseModel):
    metadata_attribute_conversion: dict[str, KGAttributeProperty]
    classification_instructions: KGClassificationInstructions
    extraction_instructions: KGExtractionInstructions
    entity_filter_attributes: dict[str, Any] | None = None


class KGEnhancedDocumentMetadata(BaseModel):
    entity_type: str | None
    metadata_attribute_conversion: dict[str, KGAttributeProperty] | None
    document_metadata: dict[str, Any] | None
    deep_extraction: bool
    classification_enabled: bool
    classification_instructions: KGClassificationInstructions | None
    skip: bool


class KGConnectorData(BaseModel):
    id: int
    source: str
    kg_coverage_days: int | None


class KGStage(str, Enum):
    EXTRACTED = "extracted"
    NORMALIZED = "normalized"
    FAILED = "failed"
    SKIPPED = "skipped"
    NOT_STARTED = "not_started"
    EXTRACTING = "extracting"
    DO_NOT_EXTRACT = "do_not_extract"


class KGClassificationResult(BaseModel):
    document_entity: str
    classification_class: str


class KGImpliedExtractionResults(BaseModel):
    document_entity: str
    implied_entities: set[str]
    implied_relationships: set[str]
    company_participant_emails: set[str]
    account_participant_emails: set[str]


class KGDocumentDeepExtractionResults(BaseModel):
    classification_result: KGClassificationResult | None
    deep_extracted_entities: set[str]
    deep_extracted_relationships: set[str]


class KGException(Exception):
    pass


================================================
FILE: backend/onyx/kg/resets/reset_index.py
================================================
from sqlalchemy.orm import Session

from onyx.db.document import reset_all_document_kg_stages
from onyx.db.models import Connector
from onyx.db.models import KGEntity
from onyx.db.models import KGEntityExtractionStaging
from onyx.db.models import KGEntityType
from onyx.db.models import KGRelationship
from onyx.db.models import KGRelationshipExtractionStaging
from onyx.db.models import KGRelationshipType
from onyx.db.models import KGRelationshipTypeExtractionStaging


def reset_full_kg_index__commit(db_session: Session) -> None:
    """
    Resets the knowledge graph index.
    """

    db_session.query(KGRelationship).delete()
    db_session.query(KGRelationshipType).delete()
    db_session.query(KGEntity).delete()
    db_session.query(KGRelationshipExtractionStaging).delete()
    db_session.query(KGEntityExtractionStaging).delete()
    db_session.query(KGRelationshipTypeExtractionStaging).delete()
    # Update all connectors to disable KG processing
    db_session.query(Connector).update({"kg_processing_enabled": False})

    # Only reset grounded entity types
    db_session.query(KGEntityType).filter(
        KGEntityType.grounded_source_name.isnot(None)
    ).update({"active": False})

    reset_all_document_kg_stages(db_session)

    db_session.commit()


================================================
FILE: backend/onyx/kg/resets/reset_source.py
================================================
from redis.lock import Lock as RedisLock
from sqlalchemy import or_

from onyx.configs.constants import DocumentSource
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import Connector
from onyx.db.models import Document
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import KGEntity
from onyx.db.models import KGEntityExtractionStaging
from onyx.db.models import KGEntityType
from onyx.db.models import KGRelationship
from onyx.db.models import KGRelationshipExtractionStaging
from onyx.db.models import KGRelationshipType
from onyx.db.models import KGRelationshipTypeExtractionStaging
from onyx.db.models import KGStage
from onyx.kg.resets.reset_index import reset_full_kg_index__commit
from onyx.kg.resets.reset_vespa import reset_vespa_kg_index


def reset_source_kg_index(
    source_name: str | None, tenant_id: str, index_name: str, lock: RedisLock
) -> None:
    """
    Resets the knowledge graph index and vespa for a source.
    """
    # reset vespa for the source
    reset_vespa_kg_index(tenant_id, index_name, lock, source_name)

    with get_session_with_current_tenant() as db_session:
        if source_name is None:
            reset_full_kg_index__commit(db_session)
            return

        # get all the entity types for the given source
        entity_types = [
            et.id_name
            for et in db_session.query(KGEntityType)
            .filter(KGEntityType.grounded_source_name == source_name)
            .all()
        ]
        if not entity_types:
            raise ValueError(f"There are no entity types for the source {source_name}")

        # delete the entity type from the knowledge graph
        for entity_type in entity_types:
            db_session.query(KGRelationship).filter(
                or_(
                    KGRelationship.source_node_type == entity_type,
                    KGRelationship.target_node_type == entity_type,
                )
            ).delete()
            db_session.query(KGRelationshipType).filter(
                or_(
                    KGRelationshipType.source_entity_type_id_name == entity_type,
                    KGRelationshipType.target_entity_type_id_name == entity_type,
                )
            ).delete()
            db_session.query(KGEntity).filter(
                KGEntity.entity_type_id_name == entity_type
            ).delete()
            db_session.query(KGRelationshipExtractionStaging).filter(
                or_(
                    KGRelationshipExtractionStaging.source_node_type == entity_type,
                    KGRelationshipExtractionStaging.target_node_type == entity_type,
                )
            ).delete()
            db_session.query(KGEntityExtractionStaging).filter(
                KGEntityExtractionStaging.entity_type_id_name == entity_type
            ).delete()
            db_session.query(KGRelationshipTypeExtractionStaging).filter(
                or_(
                    KGRelationshipTypeExtractionStaging.source_entity_type_id_name
                    == entity_type,
                    KGRelationshipTypeExtractionStaging.target_entity_type_id_name
                    == entity_type,
                )
            ).delete()
        db_session.commit()

    with get_session_with_current_tenant() as db_session:
        # get all the documents for the given source
        kg_connectors = [
            connector.id
            for connector in db_session.query(Connector)
            .filter(Connector.source == DocumentSource(source_name))
            .all()
        ]
        document_ids = [
            cc_pair.id
            for cc_pair in db_session.query(DocumentByConnectorCredentialPair)
            .filter(DocumentByConnectorCredentialPair.connector_id.in_(kg_connectors))
            .all()
        ]

        # reset the kg stage for the documents
        db_session.query(Document).filter(Document.id.in_(document_ids)).update(
            {"kg_stage": KGStage.NOT_STARTED}
        )
        db_session.commit()


================================================
FILE: backend/onyx/kg/resets/reset_vespa.py
================================================
import time
from typing import Any

from redis.lock import Lock as RedisLock

from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import DocumentSource
from onyx.db.document import get_num_chunks_for_document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import Connector
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import KGEntityType
from onyx.document_index.document_index_utils import get_uuid_from_chunk_info
from onyx.document_index.vespa.index import KGVespaChunkUpdateRequest
from onyx.document_index.vespa.index import VespaIndex
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.kg.utils.lock_utils import extend_lock
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()


def _reset_vespa_for_doc(document_id: str, tenant_id: str, index_name: str) -> None:
    vespa_index = VespaIndex(
        index_name=index_name,
        secondary_index_name=None,
        large_chunks_enabled=False,
        secondary_large_chunks_enabled=False,
        multitenant=MULTI_TENANT,
        httpx_client=None,
    )

    reset_update_dict: dict[str, Any] = {
        "fields": {
            "kg_entities": {"assign": []},
            "kg_relationships": {"assign": []},
            "kg_terms": {"assign": []},
        }
    }

    with get_session_with_current_tenant() as db_session:
        num_chunks = get_num_chunks_for_document(db_session, document_id)

    vespa_requests: list[KGVespaChunkUpdateRequest] = []
    for chunk_num in range(num_chunks):
        doc_chunk_id = get_uuid_from_chunk_info(
            document_id=document_id,
            chunk_id=chunk_num,
            tenant_id=tenant_id,
            large_chunk_id=None,
        )
        vespa_requests.append(
            KGVespaChunkUpdateRequest(
                document_id=document_id,
                chunk_id=chunk_num,
                url=f"{DOCUMENT_ID_ENDPOINT.format(index_name=vespa_index.index_name)}/{doc_chunk_id}",
                update_request=reset_update_dict,
            )
        )

    with vespa_index.httpx_client_context as httpx_client:
        vespa_index._apply_kg_chunk_updates_batched(vespa_requests, httpx_client)


def reset_vespa_kg_index(
    tenant_id: str, index_name: str, lock: RedisLock, source_name: str | None = None
) -> None:
    """
    Reset the kg info in vespa for all documents of a given source name,
    or all documents from kg grounded sources if source_name is None.
    """
    logger.info(
        f"Resetting kg vespa index {index_name} for tenant {tenant_id}, source: {source_name if source_name else 'all'}"
    )

    last_lock_time = time.monotonic()

    # Get all documents that need a vespa reset
    with get_session_with_current_tenant() as db_session:
        if source_name:
            # get all connectors of the given source name
            kg_connectors = [
                connector.id
                for connector in db_session.query(Connector)
                .filter(Connector.source == DocumentSource(source_name))
                .all()
            ]
        else:
            # get all connectors that have kg enabled
            kg_sources = [
                DocumentSource(et.grounded_source_name)
                for et in db_session.query(KGEntityType)
                .filter(
                    KGEntityType.grounded_source_name.is_not(None),
                    KGEntityType.active.is_(True),
                )
                .distinct()
                .all()
            ]
            kg_connectors = [
                connector.id
                for connector in db_session.query(Connector)
                .filter(Connector.source.in_(kg_sources))
                .all()
            ]

        # Get all the documents for the given connectors
        document_ids = [
            cc_pair.id
            for cc_pair in db_session.query(DocumentByConnectorCredentialPair)
            .filter(DocumentByConnectorCredentialPair.connector_id.in_(kg_connectors))
            .all()
        ]

    # Reset the kg fields
    for document_id in document_ids:
        _reset_vespa_for_doc(document_id, tenant_id, index_name)
        last_lock_time = extend_lock(
            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time
        )

    logger.info(
        f"Finished resetting kg vespa index {index_name} for tenant {tenant_id}, source: {source_name if source_name else 'all'}"
    )


================================================
FILE: backend/onyx/kg/setup/kg_default_entity_definitions.py
================================================
from typing import cast

from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.db.entity_type import KGEntityType
from onyx.db.kg_config import get_kg_config_settings
from onyx.db.kg_config import validate_kg_settings
from onyx.kg.models import KGAttributeEntityOption
from onyx.kg.models import KGAttributeImplicationProperty
from onyx.kg.models import KGAttributeProperty
from onyx.kg.models import KGEntityTypeAttributes
from onyx.kg.models import KGEntityTypeClassificationInfo
from onyx.kg.models import KGEntityTypeDefinition
from onyx.kg.models import KGGroundingType


def get_default_entity_types(vendor_name: str) -> dict[str, KGEntityTypeDefinition]:
    return {
        "LINEAR": KGEntityTypeDefinition(
            description="A formal Linear ticket about a product issue or improvement request.",
            attributes=KGEntityTypeAttributes(
                metadata_attribute_conversion={
                    "team": KGAttributeProperty(name="team", keep=True),
                    "state": KGAttributeProperty(name="state", keep=True),
                    "priority": KGAttributeProperty(name="priority", keep=True),
                    "estimate": KGAttributeProperty(name="estimate", keep=True),
                    "created_at": KGAttributeProperty(name="created_at", keep=True),
                    "started_at": KGAttributeProperty(name="started_at", keep=True),
                    "completed_at": KGAttributeProperty(name="completed_at", keep=True),
                    "due_date": KGAttributeProperty(name="due_date", keep=True),
                    "creator": KGAttributeProperty(
                        name="creator",
                        keep=False,
                        implication_property=KGAttributeImplicationProperty(
                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,
                            implied_relationship_name="is_creator_of",
                        ),
                    ),
                    "assignee": KGAttributeProperty(
                        name="assignee",
                        keep=False,
                        implication_property=KGAttributeImplicationProperty(
                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,
                            implied_relationship_name="is_assignee_of",
                        ),
                    ),
                },
            ),
            grounding=KGGroundingType.GROUNDED,
            grounded_source_name=DocumentSource.LINEAR,
        ),
        "JIRA": KGEntityTypeDefinition(
            description=(
                "A formal Jira ticket about a product issue or improvement request."
            ),
            attributes=KGEntityTypeAttributes(
                metadata_attribute_conversion={
                    "issuetype": KGAttributeProperty(name="subtype", keep=True),
                    "status": KGAttributeProperty(name="status", keep=True),
                    "priority": KGAttributeProperty(name="priority", keep=True),
                    "project_name": KGAttributeProperty(name="project", keep=True),
                    "created": KGAttributeProperty(name="created_at", keep=True),
                    "updated": KGAttributeProperty(name="updated_at", keep=True),
                    "resolution_date": KGAttributeProperty(
                        name="completed_at", keep=True
                    ),
                    "duedate": KGAttributeProperty(name="due_date", keep=True),
                    "reporter_email": KGAttributeProperty(
                        name="creator",
                        keep=False,
                        implication_property=KGAttributeImplicationProperty(
                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,
                            implied_relationship_name="is_creator_of",
                        ),
                    ),
                    "assignee_email": KGAttributeProperty(
                        name="assignee",
                        keep=False,
                        implication_property=KGAttributeImplicationProperty(
                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,
                            implied_relationship_name="is_assignee_of",
                        ),
                    ),
                    # not using implication property as that only captures 1 depth
                    "key": KGAttributeProperty(name="key", keep=True),
                    "parent": KGAttributeProperty(name="parent", keep=True),
                },
            ),
            grounding=KGGroundingType.GROUNDED,
            grounded_source_name=DocumentSource.JIRA,
        ),
        "GITHUB_PR": KGEntityTypeDefinition(
            description="A formal engineering request to merge proposed changes into the codebase.",
            attributes=KGEntityTypeAttributes(
                metadata_attribute_conversion={
                    "repo": KGAttributeProperty(name="repository", keep=True),
                    "state": KGAttributeProperty(name="state", keep=True),
                    "num_commits": KGAttributeProperty(name="num_commits", keep=True),
                    "num_files_changed": KGAttributeProperty(
                        name="num_files_changed", keep=True
                    ),
                    "labels": KGAttributeProperty(name="labels", keep=True),
                    "merged": KGAttributeProperty(name="merged", keep=True),
                    "merged_at": KGAttributeProperty(name="merged_at", keep=True),
                    "closed_at": KGAttributeProperty(name="closed_at", keep=True),
                    "created_at": KGAttributeProperty(name="created_at", keep=True),
                    "updated_at": KGAttributeProperty(name="updated_at", keep=True),
                    "user": KGAttributeProperty(
                        name="creator",
                        keep=False,
                        implication_property=KGAttributeImplicationProperty(
                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,
                            implied_relationship_name="is_creator_of",
                        ),
                    ),
                    "assignees": KGAttributeProperty(
                        name="assignees",
                        keep=False,
                        implication_property=KGAttributeImplicationProperty(
                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,
                            implied_relationship_name="is_assignee_of",
                        ),
                    ),
                },
                entity_filter_attributes={"object_type": "PullRequest"},
            ),
            grounding=KGGroundingType.GROUNDED,
            grounded_source_name=DocumentSource.GITHUB,
        ),
        "GITHUB_ISSUE": KGEntityTypeDefinition(
            description="A formal engineering ticket about an issue, idea, inquiry, or task.",
            attributes=KGEntityTypeAttributes(
                metadata_attribute_conversion={
                    "repo": KGAttributeProperty(name="repository", keep=True),
                    "state": KGAttributeProperty(name="state", keep=True),
                    "labels": KGAttributeProperty(name="labels", keep=True),
                    "closed_at": KGAttributeProperty(name="closed_at", keep=True),
                    "created_at": KGAttributeProperty(name="created_at", keep=True),
                    "updated_at": KGAttributeProperty(name="updated_at", keep=True),
                    "user": KGAttributeProperty(
                        name="creator",
                        keep=False,
                        implication_property=KGAttributeImplicationProperty(
                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,
                            implied_relationship_name="is_creator_of",
                        ),
                    ),
                    "assignees": KGAttributeProperty(
                        name="assignees",
                        keep=False,
                        implication_property=KGAttributeImplicationProperty(
                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,
                            implied_relationship_name="is_assignee_of",
                        ),
                    ),
                },
                entity_filter_attributes={"object_type": "Issue"},
            ),
            grounding=KGGroundingType.GROUNDED,
            grounded_source_name=DocumentSource.GITHUB,
        ),
        "FIREFLIES": KGEntityTypeDefinition(
            description=(
                f"A phone call transcript between us ({vendor_name}) and another account or individuals, or an internal meeting."
            ),
            attributes=KGEntityTypeAttributes(
                classification_attributes={
                    "customer": KGEntityTypeClassificationInfo(
                        extraction=True,
                        description="a call with representatives of one or more customers prospects",
                    ),
                    "internal": KGEntityTypeClassificationInfo(
                        extraction=True,
                        description="a call between employees of the vendor's company (a vendor-internal call)",
                    ),
                    "interview": KGEntityTypeClassificationInfo(
                        extraction=True,
                        description=(
                            "a call with an individual who is interviewed or is discussing potential employment with the vendor"
                        ),
                    ),
                    "other": KGEntityTypeClassificationInfo(
                        extraction=True,
                        description=(
                            "a call with representatives of companies having a different reason for the call "
                            "(investment, partnering, etc.)"
                        ),
                    ),
                },
            ),
            grounding=KGGroundingType.GROUNDED,
            grounded_source_name=DocumentSource.FIREFLIES,
        ),
        "ACCOUNT": KGEntityTypeDefinition(
            description=(
                "A company that was, is, or potentially could be a customer of the vendor "
                f"('us, {vendor_name}'). Note that {vendor_name} can never be an ACCOUNT."
            ),
            attributes=KGEntityTypeAttributes(
                entity_filter_attributes={"object_type": "Account"},
            ),
            grounding=KGGroundingType.GROUNDED,
            grounded_source_name=DocumentSource.SALESFORCE,
        ),
        "OPPORTUNITY": KGEntityTypeDefinition(
            description="A sales opportunity.",
            attributes=KGEntityTypeAttributes(
                metadata_attribute_conversion={
                    "name": KGAttributeProperty(name="name", keep=True),
                    "stage_name": KGAttributeProperty(name="stage", keep=True),
                    "type": KGAttributeProperty(name="type", keep=True),
                    "amount": KGAttributeProperty(name="amount", keep=True),
                    "fiscal_year": KGAttributeProperty(name="fiscal_year", keep=True),
                    "fiscal_quarter": KGAttributeProperty(
                        name="fiscal_quarter", keep=True
                    ),
                    "is_closed": KGAttributeProperty(name="is_closed", keep=True),
                    "close_date": KGAttributeProperty(name="close_date", keep=True),
                    "probability": KGAttributeProperty(
                        name="close_probability", keep=True
                    ),
                    "created_date": KGAttributeProperty(name="created_at", keep=True),
                    "last_modified_date": KGAttributeProperty(
                        name="updated_at", keep=True
                    ),
                    "account": KGAttributeProperty(
                        name="account",
                        keep=False,
                        implication_property=KGAttributeImplicationProperty(
                            implied_entity_type="ACCOUNT",
                            implied_relationship_name="is_account_of",
                        ),
                    ),
                },
                entity_filter_attributes={"object_type": "Opportunity"},
            ),
            grounding=KGGroundingType.GROUNDED,
            grounded_source_name=DocumentSource.SALESFORCE,
        ),
        "VENDOR": KGEntityTypeDefinition(
            description=f"The Vendor {vendor_name}, 'us'",
            grounding=KGGroundingType.GROUNDED,
            active=True,
            grounded_source_name=None,
        ),
        "EMPLOYEE": KGEntityTypeDefinition(
            description=(
                f"A person who speaks on behalf of 'our' company (the VENDOR {vendor_name}), "
                "NOT of another account. Therefore, employees of other companies "
                "are NOT included here. If in doubt, do NOT extract."
            ),
            grounding=KGGroundingType.GROUNDED,
            active=False,
            grounded_source_name=None,
        ),
    }


def populate_missing_default_entity_types__commit(db_session: Session) -> None:
    """
    Populates the database with the missing default entity types.
    """
    kg_config_settings = get_kg_config_settings()
    validate_kg_settings(kg_config_settings)

    vendor_name = cast(str, kg_config_settings.KG_VENDOR)

    existing_entity_types = {et.id_name for et in db_session.query(KGEntityType).all()}

    default_entity_types = get_default_entity_types(vendor_name=vendor_name)
    for entity_type_id_name, entity_type_definition in default_entity_types.items():
        if entity_type_id_name in existing_entity_types:
            continue

        grounded_source_name = (
            entity_type_definition.grounded_source_name.value
            if entity_type_definition.grounded_source_name
            else None
        )
        kg_entity_type = KGEntityType(
            id_name=entity_type_id_name,
            description=entity_type_definition.description,
            attributes=entity_type_definition.attributes.model_dump(),
            grounding=entity_type_definition.grounding,
            grounded_source_name=grounded_source_name,
            active=entity_type_definition.active,
        )
        db_session.add(kg_entity_type)
    db_session.commit()


================================================
FILE: backend/onyx/kg/utils/embeddings.py
================================================
from typing import List

import numpy as np

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.search_settings import get_current_search_settings
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.natural_language_processing.search_nlp_models import EmbedTextType
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT


def encode_string_batch(strings: List[str]) -> np.ndarray:
    with get_session_with_current_tenant() as db_session:
        current_search_settings = get_current_search_settings(db_session)
        model = EmbeddingModel.from_db_model(
            search_settings=current_search_settings,
            server_host=MODEL_SERVER_HOST,
            server_port=MODEL_SERVER_PORT,
        )
        # Get embeddings while session is still open
        embedding = model.encode(strings, text_type=EmbedTextType.QUERY)
    return np.array(embedding)


================================================
FILE: backend/onyx/kg/utils/extraction_utils.py
================================================
import json

from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCallTypes
from onyx.configs.kg_configs import KG_METADATA_TRACKING_THRESHOLD
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.entities import get_kg_entity_by_document
from onyx.db.entity_type import get_entity_types
from onyx.db.kg_config import KGConfigSettings
from onyx.db.models import Document
from onyx.db.models import KGEntityType
from onyx.db.models import KGRelationshipType
from onyx.db.tag import get_structured_tags_for_document
from onyx.kg.models import KGAttributeEntityOption
from onyx.kg.models import KGAttributeTrackInfo
from onyx.kg.models import KGAttributeTrackType
from onyx.kg.models import KGChunkFormat
from onyx.kg.models import KGClassificationInstructions
from onyx.kg.models import KGClassificationResult
from onyx.kg.models import KGDocumentDeepExtractionResults
from onyx.kg.models import KGEnhancedDocumentMetadata
from onyx.kg.models import KGImpliedExtractionResults
from onyx.kg.models import KGMetadataContent
from onyx.kg.utils.formatting_utils import extract_email
from onyx.kg.utils.formatting_utils import get_entity_type
from onyx.kg.utils.formatting_utils import kg_email_processing
from onyx.kg.utils.formatting_utils import make_entity_id
from onyx.kg.utils.formatting_utils import make_relationship_id
from onyx.kg.utils.formatting_utils import make_relationship_type_id
from onyx.kg.vespa.vespa_interactions import get_document_vespa_contents
from onyx.llm.factory import get_default_llm
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.prompts.kg_prompts import CALL_CHUNK_PREPROCESSING_PROMPT
from onyx.prompts.kg_prompts import CALL_DOCUMENT_CLASSIFICATION_PROMPT
from onyx.prompts.kg_prompts import GENERAL_CHUNK_PREPROCESSING_PROMPT
from onyx.prompts.kg_prompts import MASTER_EXTRACTION_PROMPT
from onyx.tracing.llm_utils import llm_generation_span
from onyx.tracing.llm_utils import record_llm_response
from onyx.utils.logger import setup_logger

logger = setup_logger()


def get_entity_types_str(active: bool | None = None) -> str:
    """
    Format the entity types into a string for the LLM.
    """
    with get_session_with_current_tenant() as db_session:
        entity_types = get_entity_types(db_session, active)

        entity_types_list: list[str] = []
        for entity_type in entity_types:
            if entity_type.description:
                entity_description = "\n  - Description: " + entity_type.description
            else:
                entity_description = ""

            if entity_type.entity_values:
                allowed_values = "\n  - Allowed Values: " + ", ".join(
                    entity_type.entity_values
                )
            else:
                allowed_values = ""

            attributes = entity_type.parsed_attributes

            entity_type_attribute_list: list[str] = []
            for attribute, values in attributes.attribute_values.items():
                entity_type_attribute_list.append(
                    f"{attribute}: {trackinfo_to_str(values)}"
                )

            if attributes.classification_attributes:
                entity_type_attribute_list.append(
                    # TODO: restructure classification attribute to be a dict of attribute name to classification info
                    # e.g., {scope: {internal: prompt, external: prompt}, sentiment: {positive: prompt, negative: prompt}}
                    "classification: one of: "
                    + ", ".join(attributes.classification_attributes.keys())
                )
            if entity_type_attribute_list:
                entity_attributes = "\n  - Attributes:\n    - " + "\n    - ".join(
                    entity_type_attribute_list
                )
            else:
                entity_attributes = ""

            entity_types_list.append(
                entity_type.id_name
                + entity_description
                + allowed_values
                + entity_attributes
            )

    return "\n".join(entity_types_list)


def get_relationship_types_str(active: bool | None = None) -> str:
    """
    Format the relationship types into a string for the LLM.
    """
    with get_session_with_current_tenant() as db_session:
        active_filters = []
        if active is not None:
            active_filters.append(KGRelationshipType.active == active)

        relationship_types = (
            db_session.query(KGRelationshipType).filter(*active_filters).all()
        )

        relationship_types_list = []
        for rel_type in relationship_types:
            # Format as "source_type__relationship_type__target_type"
            formatted_type = make_relationship_type_id(
                rel_type.source_entity_type_id_name,
                rel_type.type,
                rel_type.target_entity_type_id_name,
            )
            relationship_types_list.append(formatted_type)

    return "\n".join(relationship_types_list)


def kg_process_owners(
    owner_emails: list[str],
    document_entity_id: str,
    relationship_type: str,
    kg_config_settings: KGConfigSettings,
    active_entity_types: set[str],
) -> tuple[set[str], set[str], set[str], set[str]]:
    owner_entities: set[str] = set()
    owner_relationships: set[str] = set()
    company_participant_emails: set[str] = set()
    account_participant_emails: set[str] = set()

    for owner_email in owner_emails:
        if extract_email(owner_email) is None:
            continue

        process_results = kg_process_person(
            owner_email,
            document_entity_id,
            relationship_type,
            kg_config_settings,
            active_entity_types,
        )
        if process_results is None:
            continue

        (
            owner_entity,
            owner_relationship,
            company_participant_email,
            account_participant_email,
        ) = process_results

        owner_entities.add(owner_entity)
        owner_relationships.add(owner_relationship)
        if company_participant_email:
            company_participant_emails.add(company_participant_email)
        if account_participant_email:
            account_participant_emails.add(account_participant_email)

    return (
        owner_entities,
        owner_relationships,
        company_participant_emails,
        account_participant_emails,
    )


def kg_implied_extraction(
    document: Document,
    doc_metadata: KGEnhancedDocumentMetadata,
    active_entity_types: set[str],
    kg_config_settings: KGConfigSettings,
) -> KGImpliedExtractionResults:
    """
    Generate entities, relationships, and attributes for a document.
    """

    # Get document entity and metadata stuff from the KGEnhancedDocumentMetadata
    document_entity_type = doc_metadata.entity_type
    document_metadata = doc_metadata.document_metadata or {}
    metadata_attribute_conversion = doc_metadata.metadata_attribute_conversion
    if document_entity_type is None or metadata_attribute_conversion is None:
        raise ValueError("Entity type and metadata attributes are required")

    implied_entities: set[str] = set()
    implied_relationships: set[str] = set()

    # Quantity needed for call processing - participants from vendor
    company_participant_emails: set[str] = set()
    # Quantity needed for call processing - external participants
    account_participant_emails: set[str] = set()

    # Chunk treatment variables

    document_is_from_call = document_entity_type.lower() in (
        call_type.value.lower() for call_type in OnyxCallTypes
    )

    # Get core entity

    document_id = document.id
    primary_owners = document.primary_owners
    secondary_owners = document.secondary_owners

    with get_session_with_current_tenant() as db_session:
        document_entity = get_kg_entity_by_document(db_session, document_id)

    if document_entity:
        document_entity_id = document_entity.id_name
    else:
        document_entity_id = make_entity_id(document_entity_type, document_id)

    # Get implied entities and relationships from primary/secondary owners

    if document_is_from_call:
        (
            implied_entities,
            implied_relationships,
            company_participant_emails,
            account_participant_emails,
        ) = kg_process_owners(
            owner_emails=(primary_owners or []) + (secondary_owners or []),
            document_entity_id=document_entity_id,
            relationship_type="participates_in",
            kg_config_settings=kg_config_settings,
            active_entity_types=active_entity_types,
        )
    else:
        (
            implied_entities,
            implied_relationships,
            company_participant_emails,
            account_participant_emails,
        ) = kg_process_owners(
            owner_emails=primary_owners or [],
            document_entity_id=document_entity_id,
            relationship_type="leads",
            kg_config_settings=kg_config_settings,
            active_entity_types=active_entity_types,
        )

        (
            participant_entities,
            participant_relationships,
            company_emails,
            account_emails,
        ) = kg_process_owners(
            owner_emails=secondary_owners or [],
            document_entity_id=document_entity_id,
            relationship_type="participates_in",
            kg_config_settings=kg_config_settings,
            active_entity_types=active_entity_types,
        )
        implied_entities.update(participant_entities)
        implied_relationships.update(participant_relationships)
        company_participant_emails.update(company_emails)
        account_participant_emails.update(account_emails)

    # Get implied entities and relationships from document metadata
    for metadata, value in document_metadata.items():
        # get implication property for this metadata
        if metadata not in metadata_attribute_conversion:
            continue
        if (
            implication_property := metadata_attribute_conversion[
                metadata
            ].implication_property
        ) is None:
            continue

        if not isinstance(value, str) and not isinstance(value, list):
            continue
        values: list[str] = [value] if isinstance(value, str) else value

        # create implied entities and relationships
        for item in values:
            if (
                implication_property.implied_entity_type
                == KGAttributeEntityOption.FROM_EMAIL
            ):
                # determine entity type from email
                email = extract_email(item)
                if email is None:
                    continue
                process_results = kg_process_person(
                    email=email,
                    document_entity_id=document_entity_id,
                    relationship_type=implication_property.implied_relationship_name,
                    kg_config_settings=kg_config_settings,
                    active_entity_types=active_entity_types,
                )
                if process_results is None:
                    continue

                (implied_entity, implied_relationship, _, _) = process_results
                implied_entities.add(implied_entity)
                implied_relationships.add(implied_relationship)
            else:
                # use the given entity type
                entity_type = implication_property.implied_entity_type
                if entity_type not in active_entity_types:
                    continue

                implied_entity = make_entity_id(entity_type, item)
                implied_entities.add(implied_entity)
                implied_relationships.add(
                    make_relationship_id(
                        implied_entity,
                        implication_property.implied_relationship_name,
                        document_entity_id,
                    )
                )

    return KGImpliedExtractionResults(
        document_entity=document_entity_id,
        implied_entities=implied_entities,
        implied_relationships=implied_relationships,
        company_participant_emails=company_participant_emails,
        account_participant_emails=account_participant_emails,
    )


def kg_deep_extraction(
    document_id: str,
    metadata: KGEnhancedDocumentMetadata,
    implied_extraction: KGImpliedExtractionResults,
    tenant_id: str,
    index_name: str,
    kg_config_settings: KGConfigSettings,
) -> KGDocumentDeepExtractionResults:
    """
    Perform deep extraction and classification on the document.
    """
    result = KGDocumentDeepExtractionResults(
        classification_result=None,
        deep_extracted_entities=set(),
        deep_extracted_relationships=set(),
    )

    entity_types_str = get_entity_types_str(active=True)
    relationship_types_str = get_relationship_types_str(active=True)

    for i, chunk_batch in enumerate(
        get_document_vespa_contents(document_id, index_name, tenant_id)
    ):
        # use first batch for classification
        if i == 0 and metadata.classification_enabled:
            if not metadata.classification_instructions:
                raise ValueError(
                    "Classification is enabled but no instructions are provided"
                )
            result.classification_result = kg_classify_document(
                document_entity=implied_extraction.document_entity,
                chunk_batch=chunk_batch,
                implied_extraction=implied_extraction,
                classification_instructions=metadata.classification_instructions,
                kg_config_settings=kg_config_settings,
            )

        # deep extract from this chunk batch
        chunk_batch_results = kg_deep_extract_chunks(
            document_entity=implied_extraction.document_entity,
            chunk_batch=chunk_batch,
            implied_extraction=implied_extraction,
            kg_config_settings=kg_config_settings,
            entity_types_str=entity_types_str,
            relationship_types_str=relationship_types_str,
        )
        if chunk_batch_results is not None:
            result.deep_extracted_entities.update(
                chunk_batch_results.deep_extracted_entities
            )
            result.deep_extracted_relationships.update(
                chunk_batch_results.deep_extracted_relationships
            )

    return result


def kg_classify_document(
    document_entity: str,
    chunk_batch: list[KGChunkFormat],
    implied_extraction: KGImpliedExtractionResults,
    classification_instructions: KGClassificationInstructions,
    kg_config_settings: KGConfigSettings,
) -> KGClassificationResult | None:
    # currently, classification is only done for calls
    # TODO: add support (or use same prompt and format) for non-call documents
    entity_type = get_entity_type(document_entity)
    if entity_type not in (call_type.value for call_type in OnyxCallTypes):
        return None

    # prepare prompt
    implied_extraction.document_entity
    company_participants = implied_extraction.company_participant_emails
    account_participants = implied_extraction.account_participant_emails
    content = (
        f"Title: {chunk_batch[0].title}:\nVendor Participants:\n"
        + "".join(f" - {participant}\n" for participant in company_participants)
        + "Other Participants:\n"
        + "".join(f" - {participant}\n" for participant in account_participants)
        + "Call Content:\n"
        + "\n".join(chunk.content for chunk in chunk_batch)
    )
    category_list = {
        cls: definition.description
        for cls, definition in classification_instructions.classification_class_definitions.items()
    }
    prompt = CALL_DOCUMENT_CLASSIFICATION_PROMPT.format(
        beginning_of_call_content=content,
        category_list=category_list,
        category_options=classification_instructions.classification_options,
        vendor=kg_config_settings.KG_VENDOR,
    )

    # classify with LLM with Braintrust tracing
    llm = get_default_llm()
    try:
        prompt_msg = UserMessage(content=prompt)
        with llm_generation_span(
            llm=llm, flow="kg_document_classification", input_messages=[prompt_msg]
        ) as span_generation:
            response = llm.invoke(prompt_msg)
            record_llm_response(span_generation, response)
            raw_classification_result = llm_response_to_string(response)

        classification_result = (
            raw_classification_result.replace("```json", "").replace("```", "").strip()
        )
        # no json parsing here because of reasoning output
        classification_class = classification_result.split("CATEGORY:")[1].strip()

        if (
            classification_class
            in classification_instructions.classification_class_definitions
        ):
            return KGClassificationResult(
                document_entity=document_entity,
                classification_class=classification_class,
            )
    except Exception as e:
        logger.error(f"Failed to classify document {document_entity}. Error: {str(e)}")
    return None


def kg_deep_extract_chunks(
    document_entity: str,
    chunk_batch: list[KGChunkFormat],
    implied_extraction: KGImpliedExtractionResults,
    kg_config_settings: KGConfigSettings,
    entity_types_str: str,
    relationship_types_str: str,
) -> KGDocumentDeepExtractionResults | None:
    # currently, calls are treated differently
    # TODO: either treat some other documents differently too, or ideally all the same way
    entity_type = get_entity_type(document_entity)
    is_call = entity_type in (call_type.value for call_type in OnyxCallTypes)

    content = "\n".join(chunk.content for chunk in chunk_batch)

    # prepare prompt
    if is_call:
        company_participants_str = "".join(
            f" - {participant}\n"
            for participant in implied_extraction.company_participant_emails
        )
        account_participants_str = "".join(
            f" - {participant}\n"
            for participant in implied_extraction.account_participant_emails
        )
        llm_context = CALL_CHUNK_PREPROCESSING_PROMPT.format(
            participant_string=company_participants_str,
            account_participant_string=account_participants_str,
            vendor=kg_config_settings.KG_VENDOR,
            content=content,
        )
    else:
        llm_context = GENERAL_CHUNK_PREPROCESSING_PROMPT.format(
            vendor=kg_config_settings.KG_VENDOR,
            content=content,
        )
    prompt = MASTER_EXTRACTION_PROMPT.format(
        entity_types=entity_types_str,
        relationship_types=relationship_types_str,
    ).replace("---content---", llm_context)

    # extract with LLM with Braintrust tracing
    llm = get_default_llm()
    try:
        prompt_msg = UserMessage(content=prompt)
        with llm_generation_span(
            llm=llm, flow="kg_deep_extraction", input_messages=[prompt_msg]
        ) as span_generation:
            response = llm.invoke(prompt_msg)
            record_llm_response(span_generation, response)
            raw_extraction_result = llm_response_to_string(response)

        cleaned_response = (
            raw_extraction_result.replace("{{", "{")
            .replace("}}", "}")
            .replace("```json\n", "")
            .replace("\n```", "")
            .replace("\n", "")
        )
        first_bracket = cleaned_response.find("{")
        last_bracket = cleaned_response.rfind("}")
        cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
        parsed_result = json.loads(cleaned_response)
        return KGDocumentDeepExtractionResults(
            classification_result=None,
            deep_extracted_entities=set(parsed_result.get("entities", [])),
            deep_extracted_relationships={
                rel.replace(" ", "_") for rel in parsed_result.get("relationships", [])
            },
        )
    except Exception as e:
        failed_chunks = [chunk.chunk_id for chunk in chunk_batch]
        logger.error(
            f"Failed to process chunks {failed_chunks} from document {document_entity}. Error: {str(e)}"
        )
    return None


def kg_process_person(
    email: str,
    document_entity_id: str,
    relationship_type: str,
    kg_config_settings: KGConfigSettings,
    active_entity_types: set[str],
) -> tuple[str, str, str, str] | None:
    """
    Create an employee or account entity from an email address, and a relationship to
    the entity from the document that the email is from.

    Returns:
        tuple containing (person_entity, person_relationship, company_participant_email,
        and account_participant_email), or None if the created entity is not of an
        active entity type or is from an ignored email domain.
    """
    kg_person = kg_email_processing(email, kg_config_settings)
    if any(
        domain.lower() in kg_person.company.lower()
        for domain in kg_config_settings.KG_IGNORE_EMAIL_DOMAINS
    ):
        return None

    person_entity = None
    if kg_person.employee and "EMPLOYEE" in active_entity_types:
        person_entity = make_entity_id("EMPLOYEE", kg_person.name)
    elif not kg_person.employee and "ACCOUNT" in active_entity_types:
        person_entity = make_entity_id("ACCOUNT", kg_person.company)

    if person_entity:
        is_account = person_entity.startswith("ACCOUNT")
        participant_email = f"{kg_person.name} -- ({kg_person.company})"
        return (
            person_entity,
            make_relationship_id(person_entity, relationship_type, document_entity_id),
            participant_email if not is_account else "",
            participant_email if is_account else "",
        )

    return None


def get_batch_documents_metadata(
    document_ids: list[str], connector_source: str
) -> list[KGMetadataContent]:
    """
    Gets the metadata for a batch of documents.
    """
    batch_metadata: list[KGMetadataContent] = []
    source_type = DocumentSource(connector_source).value

    with get_session_with_current_tenant() as db_session:
        for document_id in document_ids:
            # get document metadata
            metadata = get_structured_tags_for_document(document_id, db_session)

            batch_metadata.append(
                KGMetadataContent(
                    document_id=document_id,
                    source_type=source_type,
                    source_metadata=metadata,
                )
            )
    return batch_metadata


def trackinfo_to_str(trackinfo: KGAttributeTrackInfo | None) -> str:
    """Convert trackinfo to an LLM friendly string"""
    if trackinfo is None:
        return ""

    if trackinfo.type == KGAttributeTrackType.LIST:
        if trackinfo.values is None:
            return "a list of any suitable values"
        return "a list with possible values: " + ", ".join(trackinfo.values)
    elif trackinfo.type == KGAttributeTrackType.VALUE:
        if trackinfo.values is None:
            return "any suitable value"
        return "one of: " + ", ".join(trackinfo.values)


def trackinfo_to_dict(trackinfo: KGAttributeTrackInfo | None) -> dict | None:
    if trackinfo is None:
        return None
    return {
        "type": trackinfo.type,
        "values": (list(trackinfo.values) if trackinfo.values else None),
    }


class EntityTypeMetadataTracker:
    def __init__(self) -> None:
        """
        Tracks the possible values the metadata attributes can take for each entity type.
        """
        # entity type -> attribute -> trackinfo
        self.entity_attr_info: dict[str, dict[str, KGAttributeTrackInfo | None]] = {}
        self.entity_allowed_attrs: dict[str, set[str]] = {}

    def import_typeinfo(self) -> None:
        """
        Loads the metadata tracking information from the database.
        """
        with get_session_with_current_tenant() as db_session:
            entity_types = db_session.query(KGEntityType).all()

        for entity_type in entity_types:
            self.entity_attr_info[entity_type.id_name] = (
                entity_type.parsed_attributes.attribute_values
            )
            self.entity_allowed_attrs[entity_type.id_name] = {
                attr.name
                for attr in entity_type.parsed_attributes.metadata_attribute_conversion.values()
            }

    def export_typeinfo(self) -> None:
        """
        Exports the metadata tracking information to the database.
        """
        with get_session_with_current_tenant() as db_session:
            for entity_type_id_name, attribute_values in self.entity_attr_info.items():
                db_session.query(KGEntityType).filter(
                    KGEntityType.id_name == entity_type_id_name
                ).update(
                    {
                        KGEntityType.attributes: KGEntityType.attributes.op("||")(
                            {
                                "attribute_values": {
                                    attr: trackinfo_to_dict(info)
                                    for attr, info in attribute_values.items()
                                }
                            }
                        )
                    },
                    synchronize_session=False,
                )
            db_session.commit()

    def track_metadata(
        self, entity_type: str, attributes: dict[str, str | list[str]]
    ) -> None:
        """
        Tracks which values are possible for the given attributes.
        If the attribute value is a list, we track the values in the list rather than the list itself.
        If we see to many different values, we stop tracking the attribute.
        """
        for attribute, value in attributes.items():
            # ignore types/metadata we are not tracking
            if entity_type not in self.entity_attr_info:
                continue
            if attribute not in self.entity_allowed_attrs[entity_type]:
                continue

            # determine if the attribute is a list or a value
            trackinfo = self.entity_attr_info[entity_type].get(attribute, None)
            if trackinfo is None:
                trackinfo = KGAttributeTrackInfo(
                    type=(
                        KGAttributeTrackType.VALUE
                        if isinstance(value, str)
                        else KGAttributeTrackType.LIST
                    ),
                    values=set(),
                )
                self.entity_attr_info[entity_type][attribute] = trackinfo

            # None means marked as don't track
            if trackinfo.values is None:
                continue

            # track the value
            if isinstance(value, str):
                trackinfo.values.add(value)
            else:
                trackinfo.type = KGAttributeTrackType.LIST
                trackinfo.values.update(value)

            # if we see to many different values, we stop tracking
            if len(trackinfo.values) > KG_METADATA_TRACKING_THRESHOLD:
                trackinfo.values = None


================================================
FILE: backend/onyx/kg/utils/formatting_utils.py
================================================
import re

from onyx.db.kg_config import KGConfigSettings
from onyx.kg.models import KGPerson


def format_entity_id(entity_id_name: str) -> str:
    return make_entity_id(*split_entity_id(entity_id_name))


def make_entity_id(entity_type: str, entity_name: str) -> str:
    return f"{entity_type.upper()}::{entity_name.lower()}"


def split_entity_id(entity_id_name: str) -> list[str]:
    return entity_id_name.split("::")


def get_entity_type(entity_id_name: str) -> str:
    return entity_id_name.split("::", 1)[0].upper()


def format_entity_id_for_models(entity_id_name: str) -> str:
    entity_split = entity_id_name.split("::")
    if len(entity_split) == 2:
        entity_type, entity_name = entity_split
        separator = "::"
    elif len(entity_split) > 2:
        raise ValueError(f"Entity {entity_id_name} is not in the correct format")
    else:
        entity_name = entity_id_name
        separator = entity_type = ""

    formatted_entity_type = entity_type.strip().upper()
    formatted_entity_name = entity_name.strip().replace('"', "").replace("'", "")

    return f"{formatted_entity_type}{separator}{formatted_entity_name}"


def get_attributes(entity_w_attributes: str) -> dict[str, str]:
    """
    Extract attributes from an entity string.
    E.g., "TYPE::Entity--[attr1: value1, attr2: value2]" -> {"attr1": "value1", "attr2": "value2"}
    """
    attr_split = entity_w_attributes.split("--")
    if len(attr_split) != 2:
        raise ValueError(f"Invalid entity with attributes: {entity_w_attributes}")

    match = re.search(r"\[(.*)\]", attr_split[1])
    if not match:
        return {}

    attr_list_str = match.group(1)
    return {
        attr_split[0].strip(): attr_split[1].strip()
        for attr in attr_list_str.split(",")
        if len(attr_split := attr.split(":", 1)) == 2
    }


def make_entity_w_attributes(entity: str, attributes: dict[str, str]) -> str:
    return f"{entity}--[{', '.join(f'{k}: {v}' for k, v in attributes.items())}]"


def format_relationship_id(relationship_id_name: str) -> str:
    return make_relationship_id(*split_relationship_id(relationship_id_name))


def make_relationship_id(
    source_node: str, relationship_type: str, target_node: str
) -> str:
    return f"{format_entity_id(source_node)}__{relationship_type.lower()}__{format_entity_id(target_node)}"


def split_relationship_id(relationship_id_name: str) -> list[str]:
    return relationship_id_name.split("__")


def format_relationship_type_id(relationship_type_id_name: str) -> str:
    return make_relationship_type_id(
        *split_relationship_type_id(relationship_type_id_name)
    )


def make_relationship_type_id(
    source_node_type: str, relationship_type: str, target_node_type: str
) -> str:
    return f"{source_node_type.upper()}__{relationship_type.lower()}__{target_node_type.upper()}"


def split_relationship_type_id(relationship_type_id_name: str) -> list[str]:
    return relationship_type_id_name.split("__")


def extract_relationship_type_id(relationship_id_name: str) -> str:
    source_node, relationship_type, target_node = split_relationship_id(
        relationship_id_name
    )
    return make_relationship_type_id(
        get_entity_type(source_node), relationship_type, get_entity_type(target_node)
    )


def extract_email(email: str) -> str | None:
    """
    Extract an email from an arbitrary string (if any).
    Only the first email is returned.
    """
    match = re.search(r"([A-Za-z0-9._+-]+@[A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)+)", email)
    return match.group(0) if match else None


def kg_email_processing(email: str, kg_config_settings: KGConfigSettings) -> KGPerson:
    """
    Process the email.
    """
    name, company_domain = email.split("@")
    assert isinstance(company_domain, str)
    assert isinstance(kg_config_settings.KG_VENDOR_DOMAINS, list)
    assert isinstance(kg_config_settings.KG_VENDOR, str)

    employee = any(
        domain in company_domain for domain in kg_config_settings.KG_VENDOR_DOMAINS
    )
    if employee:
        company = kg_config_settings.KG_VENDOR
    else:
        # TODO: maybe store a list of domains for each account and use that to match
        # right now, gmail and other random domains are being converted into accounts
        company = company_domain.title()

    return KGPerson(name=name, company=company, employee=employee)


================================================
FILE: backend/onyx/kg/utils/lock_utils.py
================================================
import time

from redis.lock import Lock as RedisLock


def extend_lock(lock: RedisLock, timeout: int, last_lock_time: float) -> float:
    current_time = time.monotonic()
    if current_time - last_lock_time >= (timeout / 4):
        lock.reacquire()
        last_lock_time = current_time

    return last_lock_time


================================================
FILE: backend/onyx/kg/vespa/vespa_interactions.py
================================================
import json
from collections.abc import Generator

from onyx.document_index.vespa.chunk_retrieval import get_chunks_via_visit_api
from onyx.document_index.vespa.chunk_retrieval import VespaChunkRequest
from onyx.document_index.vespa.index import IndexFilters
from onyx.kg.models import KGChunkFormat
from onyx.utils.logger import setup_logger

logger = setup_logger()


def get_document_vespa_contents(
    document_id: str,
    index_name: str,
    tenant_id: str,
    batch_size: int = 8,
) -> Generator[list[KGChunkFormat], None, None]:
    """
    Retrieves chunks from Vespa for the given document IDs and converts them to KGChunks.

    Args:
        document_id (str): ID of the document to fetch chunks for
        index_name (str): Name of the Vespa index
        tenant_id (str): ID of the tenant
        batch_size (int): Number of chunks to fetch per batch

    Yields:
        list[KGChunk]: Batches of chunks ready for KG processing
    """

    current_batch: list[KGChunkFormat] = []

    # get all chunks for the document
    # TODO: revisit the visit function
    chunks = get_chunks_via_visit_api(
        chunk_request=VespaChunkRequest(document_id=document_id),
        index_name=index_name,
        filters=IndexFilters(access_control_list=None, tenant_id=tenant_id),
        field_names=[
            "document_id",
            "chunk_id",
            "title",
            "content",
            "metadata",
            "primary_owners",
            "secondary_owners",
            "source_type",
        ],
        get_large_chunks=False,
    )

    # Convert Vespa chunks to KGChunks
    # kg_chunks: list[KGChunkFormat] = []

    for i, chunk in enumerate(chunks):
        fields = chunk["fields"]
        if isinstance(fields.get("metadata", {}), str):
            fields["metadata"] = json.loads(fields["metadata"])
        current_batch.append(
            KGChunkFormat(
                connector_id=None,  # We may need to adjust this
                document_id=fields.get("document_id"),
                chunk_id=fields.get("chunk_id"),
                primary_owners=fields.get("primary_owners", []),
                secondary_owners=fields.get("secondary_owners", []),
                source_type=fields.get("source_type", ""),
                title=fields.get("title", ""),
                content=fields.get("content", ""),
                metadata=fields.get("metadata", {}),
            )
        )

        if len(current_batch) >= batch_size:
            yield current_batch
            current_batch = []

    # Yield any remaining chunks
    if current_batch:
        yield current_batch


================================================
FILE: backend/onyx/llm/__init__.py
================================================


================================================
FILE: backend/onyx/llm/constants.py
================================================
"""
LLM Constants

Centralized constants for LLM providers, vendors, and display names.
"""

from enum import Enum


# Provider names
class LlmProviderNames(str, Enum):
    """
    Canonical string identifiers for LLM providers.
    """

    OPENAI = "openai"
    ANTHROPIC = "anthropic"
    GOOGLE = "google"
    BEDROCK = "bedrock"
    BEDROCK_CONVERSE = "bedrock_converse"
    VERTEX_AI = "vertex_ai"
    OPENROUTER = "openrouter"
    AZURE = "azure"
    OLLAMA_CHAT = "ollama_chat"
    LM_STUDIO = "lm_studio"
    MISTRAL = "mistral"
    LITELLM_PROXY = "litellm_proxy"
    BIFROST = "bifrost"

    def __str__(self) -> str:
        """Needed so things like:

        f"{LlmProviderNames.OPENAI}/" gives back "openai/" instead of "LlmProviderNames.OPENAI/"
        """
        return self.value


WELL_KNOWN_PROVIDER_NAMES = [
    LlmProviderNames.OPENAI,
    LlmProviderNames.ANTHROPIC,
    LlmProviderNames.VERTEX_AI,
    LlmProviderNames.BEDROCK,
    LlmProviderNames.OPENROUTER,
    LlmProviderNames.AZURE,
    LlmProviderNames.OLLAMA_CHAT,
    LlmProviderNames.LM_STUDIO,
    LlmProviderNames.LITELLM_PROXY,
    LlmProviderNames.BIFROST,
]


# Proper capitalization for known providers and vendors
PROVIDER_DISPLAY_NAMES: dict[str, str] = {
    LlmProviderNames.OPENAI: "OpenAI",
    LlmProviderNames.ANTHROPIC: "Anthropic",
    LlmProviderNames.GOOGLE: "Google",
    LlmProviderNames.BEDROCK: "Bedrock",
    LlmProviderNames.BEDROCK_CONVERSE: "Bedrock",
    LlmProviderNames.VERTEX_AI: "Vertex AI",
    LlmProviderNames.OPENROUTER: "OpenRouter",
    LlmProviderNames.AZURE: "Azure",
    "ollama": "Ollama",
    LlmProviderNames.OLLAMA_CHAT: "Ollama",
    LlmProviderNames.LM_STUDIO: "LM Studio",
    LlmProviderNames.LITELLM_PROXY: "LiteLLM Proxy",
    LlmProviderNames.BIFROST: "Bifrost",
    "groq": "Groq",
    "anyscale": "Anyscale",
    "deepseek": "DeepSeek",
    "xai": "xAI",
    LlmProviderNames.MISTRAL: "Mistral",
    "mistralai": "Mistral",  # Alias used by some providers
    "cohere": "Cohere",
    "perplexity": "Perplexity",
    "amazon": "Amazon",
    "meta": "Meta",
    "meta-llama": "Meta",  # Alias used by some providers
    "ai21": "AI21",
    "nvidia": "NVIDIA",
    "databricks": "Databricks",
    "alibaba": "Alibaba",
    "qwen": "Qwen",
    "microsoft": "Microsoft",
    "gemini": "Gemini",
    "stability": "Stability",
    "writer": "Writer",
}

# Map vendors to their brand names (used for provider_display_name generation)
VENDOR_BRAND_NAMES: dict[str, str] = {
    "anthropic": "Claude",
    "openai": "GPT",
    "google": "Gemini",
    "amazon": "Nova",
    "meta": "Llama",
    "mistral": "Mistral",
    "cohere": "Command",
    "deepseek": "DeepSeek",
    "xai": "Grok",
    "perplexity": "Sonar",
    "ai21": "Jamba",
    "nvidia": "Nemotron",
    "qwen": "Qwen",
    "alibaba": "Qwen",
    "writer": "Palmyra",
}

# Aggregator providers that host models from multiple vendors
AGGREGATOR_PROVIDERS: set[str] = {
    LlmProviderNames.BEDROCK,
    LlmProviderNames.BEDROCK_CONVERSE,
    LlmProviderNames.OPENROUTER,
    LlmProviderNames.OLLAMA_CHAT,
    LlmProviderNames.LM_STUDIO,
    LlmProviderNames.VERTEX_AI,
    LlmProviderNames.AZURE,
    LlmProviderNames.LITELLM_PROXY,
    LlmProviderNames.BIFROST,
}

# Model family name mappings for display name generation
# Used by Bedrock display name generator
BEDROCK_MODEL_NAME_MAPPINGS: dict[str, str] = {
    "claude": "Claude",
    "llama": "Llama",
    "mistral": "Mistral",
    "mixtral": "Mixtral",
    "titan": "Titan",
    "nova": "Nova",
    "jamba": "Jamba",
    "command": "Command",
    "deepseek": "DeepSeek",
}

# Used by Ollama display name generator
OLLAMA_MODEL_NAME_MAPPINGS: dict[str, str] = {
    "llama": "Llama",
    "qwen": "Qwen",
    "mistral": "Mistral",
    "deepseek": "DeepSeek",
    "gemma": "Gemma",
    "phi": "Phi",
    "codellama": "Code Llama",
    "starcoder": "StarCoder",
    "wizardcoder": "WizardCoder",
    "vicuna": "Vicuna",
    "orca": "Orca",
    "dolphin": "Dolphin",
    "nous": "Nous",
    "neural": "Neural",
    "mixtral": "Mixtral",
    "falcon": "Falcon",
    "yi": "Yi",
    "command": "Command",
    "zephyr": "Zephyr",
    "openchat": "OpenChat",
    "solar": "Solar",
}

# Bedrock model token limits (AWS doesn't expose this via API)
# Note: Many Bedrock model IDs include context length suffix (e.g., ":200k")
# which is parsed first. This mapping is for models without suffixes.
# Sources:
# - LiteLLM model_prices_and_context_window.json
# - AWS Bedrock documentation and announcement blogs
BEDROCK_MODEL_TOKEN_LIMITS: dict[str, int] = {
    # Anthropic Claude models (new naming: claude-{tier}-{version})
    "claude-opus-4": 200000,
    "claude-sonnet-4": 200000,
    "claude-haiku-4": 200000,
    # Anthropic Claude models (old naming: claude-{version})
    "claude-4": 200000,
    "claude-3-7": 200000,
    "claude-3-5": 200000,
    "claude-3": 200000,
    "claude-v2": 100000,
    "claude-instant": 100000,
    # Amazon Nova models (from LiteLLM)
    "nova-premier": 1000000,
    "nova-pro": 300000,
    "nova-lite": 300000,
    "nova-2-lite": 1000000,  # Nova 2 Lite has 1M context
    "nova-2-sonic": 128000,
    "nova-micro": 128000,
    # Amazon Titan models (from LiteLLM: all text models are 42K)
    "titan-text-premier": 42000,
    "titan-text-express": 42000,
    "titan-text-lite": 42000,
    "titan-tg1": 8000,
    # Meta Llama models (Llama 3 base = 8K, Llama 3.1+ = 128K)
    "llama4": 128000,
    "llama3-3": 128000,
    "llama3-2": 128000,
    "llama3-1": 128000,
    "llama3-8b": 8000,
    "llama3-70b": 8000,
    # Mistral models (Large 2+ = 128K, original Large/Small = 32K)
    "mistral-large-3": 128000,
    "mistral-large-2407": 128000,  # Mistral Large 2
    "mistral-large-2402": 32000,  # Original Mistral Large
    "mistral-large": 128000,  # Default to newer version
    "mistral-small": 32000,
    "mistral-7b": 32000,
    "mixtral-8x7b": 32000,
    "pixtral": 128000,
    "ministral": 128000,
    "magistral": 128000,
    "voxtral": 32000,
    # Cohere models
    "command-r-plus": 128000,
    "command-r": 128000,
    # DeepSeek models
    "deepseek": 64000,
    # Google Gemma models
    "gemma-3": 128000,
    "gemma-2": 8000,
    "gemma": 8000,
    # Qwen models
    "qwen3": 128000,
    "qwen2": 128000,
    # NVIDIA models
    "nemotron": 128000,
    # Writer Palmyra models
    "palmyra": 128000,
    # Moonshot Kimi
    "kimi": 128000,
    # Minimax
    "minimax": 128000,
    # OpenAI (via Bedrock)
    "gpt-oss": 128000,
    # AI21 models (from LiteLLM: Jamba 1.5 = 256K, Jamba Instruct = 70K)
    "jamba-1-5": 256000,
    "jamba-instruct": 70000,
    "jamba": 256000,  # Default to newer version
}


# Models that should keep their hyphenated format in display names
# These are model families where the hyphen is part of the brand name
HYPHENATED_MODEL_NAMES: set[str] = {
    "gpt-oss",
}


# General model prefix to vendor mapping (used as fallback when enrichment data is missing)
# This covers common model families across all providers
MODEL_PREFIX_TO_VENDOR: dict[str, str] = {
    # Google
    "gemini": "google",
    "gemma": "google",
    "palm": "google",
    # Anthropic
    "claude": "anthropic",
    # OpenAI
    "gpt": "openai",
    "o1": "openai",
    "o3": "openai",
    "o4": "openai",
    "chatgpt": "openai",
    # Meta
    "llama": "meta",
    "codellama": "meta",
    # Mistral
    "mistral": "mistral",
    "mixtral": "mistral",
    "codestral": "mistral",
    "ministral": "mistral",
    "pixtral": "mistral",
    "magistral": "mistral",
    # Cohere
    "command": "cohere",
    "aya": "cohere",
    # Amazon
    "nova": "amazon",
    "titan": "amazon",
    # AI21
    "jamba": "ai21",
    # DeepSeek
    "deepseek": "deepseek",
    # Alibaba/Qwen
    "qwen": "alibaba",
    "qwq": "alibaba",
    # Microsoft
    "phi": "microsoft",
    # NVIDIA
    "nemotron": "nvidia",
    # xAI
    "grok": "xai",
}


# Ollama model prefix to vendor mapping (for grouping models by vendor)
OLLAMA_MODEL_TO_VENDOR: dict[str, str] = {
    "llama": "Meta",
    "codellama": "Meta",
    "qwen": "Alibaba",
    "qwq": "Alibaba",
    "mistral": "Mistral",
    "ministral": "Mistral",
    "mixtral": "Mistral",
    "deepseek": "DeepSeek",
    "gemma": "Google",
    "phi": "Microsoft",
    "command": "Cohere",
    "aya": "Cohere",
    "falcon": "TII",
    "yi": "01.AI",
    "starcoder": "BigCode",
    "wizardcoder": "WizardLM",
    "vicuna": "LMSYS",
    "openchat": "OpenChat",
    "solar": "Upstage",
    "orca": "Microsoft",
    "dolphin": "Cognitive Computations",
    "nous": "Nous Research",
    "neural": "Intel",
    "zephyr": "HuggingFace",
    "granite": "IBM",
    "nemotron": "NVIDIA",
    "smollm": "HuggingFace",
}


================================================
FILE: backend/onyx/llm/cost.py
================================================
"""LLM cost calculation utilities."""

from onyx.utils.logger import setup_logger

logger = setup_logger()


def calculate_llm_cost_cents(
    model_name: str,
    prompt_tokens: int,
    completion_tokens: int,
) -> float:
    """
    Calculate the cost in cents for an LLM API call.

    Uses litellm's cost_per_token function to get current pricing.
    Returns 0 if the model is not found or on any error.
    """
    try:
        import litellm

        # cost_per_token returns (prompt_cost, completion_cost) in USD
        prompt_cost_usd, completion_cost_usd = litellm.cost_per_token(
            model=model_name,
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
        )

        # Convert to cents (multiply by 100)
        total_cost_cents = (prompt_cost_usd + completion_cost_usd) * 100
        return total_cost_cents

    except Exception as e:
        # Log but don't fail - unknown models or errors shouldn't block usage
        logger.debug(
            f"Could not calculate cost for model {model_name}: {e}. Assuming cost is 0."
        )
        return 0.0


================================================
FILE: backend/onyx/llm/factory.py
================================================
from collections.abc import Callable
from typing import Any

from onyx.auth.schemas import UserRole
from onyx.configs.model_configs import GEN_AI_TEMPERATURE
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import LLMModelFlowType
from onyx.db.llm import can_user_access_llm_provider
from onyx.db.llm import fetch_default_llm_model
from onyx.db.llm import fetch_default_vision_model
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import fetch_existing_models
from onyx.db.llm import fetch_llm_provider_view
from onyx.db.llm import fetch_user_group_ids
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLM
from onyx.llm.multi_llm import LitellmLLM
from onyx.llm.override_models import LLMOverride
from onyx.llm.utils import get_max_input_tokens_from_llm_provider
from onyx.llm.utils import model_supports_image_input
from onyx.llm.well_known_providers.constants import (
    PROVIDERS_WITH_SPECIAL_API_KEY_HANDLING,
)
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.server.manage.llm.models import LLMProviderView
from onyx.utils.headers import build_llm_extra_headers
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _build_provider_extra_headers(
    provider: str, custom_config: dict[str, str] | None
) -> dict[str, str]:
    if provider in PROVIDERS_WITH_SPECIAL_API_KEY_HANDLING and custom_config:
        raw = custom_config.get(PROVIDERS_WITH_SPECIAL_API_KEY_HANDLING[provider])
        api_key = raw.strip() if raw else None
        if not api_key:
            return {}
        return {
            "Authorization": (
                api_key
                if api_key.lower().startswith("bearer ")
                else f"Bearer {api_key}"
            )
        }

    # Passing these will put Onyx on the OpenRouter leaderboard
    elif provider == LlmProviderNames.OPENROUTER:
        return {
            "HTTP-Referer": "https://onyx.app",
            "X-Title": "Onyx",
        }

    return {}


def _get_model_configured_max_input_tokens(
    llm_provider: LLMProviderView,
    model_name: str,
) -> int | None:
    for model_configuration in llm_provider.model_configurations:
        if model_configuration.name == model_name:
            return model_configuration.max_input_tokens
    return None


def _build_model_kwargs(
    provider: str,
    configured_max_input_tokens: int | None,
) -> dict[str, Any]:
    model_kwargs: dict[str, Any] = {}
    if (
        provider == LlmProviderNames.OLLAMA_CHAT
        and configured_max_input_tokens
        and configured_max_input_tokens > 0
    ):
        model_kwargs["num_ctx"] = configured_max_input_tokens
    return model_kwargs


def get_llm_for_persona(
    persona: Persona | None,
    user: User,
    llm_override: LLMOverride | None = None,
    additional_headers: dict[str, str] | None = None,
) -> LLM:
    if persona is None:
        logger.warning("No persona provided, using default LLM")
        return get_default_llm()

    provider_name_override = llm_override.model_provider if llm_override else None
    model_version_override = llm_override.model_version if llm_override else None
    temperature_override = llm_override.temperature if llm_override else None

    provider_name = provider_name_override or persona.llm_model_provider_override
    if not provider_name:
        return get_default_llm(
            temperature=temperature_override or GEN_AI_TEMPERATURE,
            additional_headers=additional_headers,
        )

    with get_session_with_current_tenant() as db_session:
        provider_model = fetch_existing_llm_provider(provider_name, db_session)
        if not provider_model:
            raise ValueError("No LLM provider found")

        # Fetch user group IDs for access control check
        user_group_ids = fetch_user_group_ids(db_session, user)

        if not can_user_access_llm_provider(
            provider_model, user_group_ids, persona, user.role == UserRole.ADMIN
        ):
            logger.warning(
                "User %s with persona %s cannot access provider %s. Falling back to default provider.",
                user.id,
                persona.id,
                provider_model.name,
            )
            return get_default_llm(
                temperature=temperature_override or GEN_AI_TEMPERATURE,
                additional_headers=additional_headers,
            )

        llm_provider = LLMProviderView.from_model(provider_model)

    model = model_version_override or persona.llm_model_version_override
    if not model:
        raise ValueError("No model name found")

    return llm_from_provider(
        model_name=model,
        llm_provider=llm_provider,
        temperature=temperature_override,
        additional_headers=additional_headers,
    )


def get_default_llm_with_vision(
    timeout: int | None = None,
    temperature: float | None = None,
    additional_headers: dict[str, str] | None = None,
) -> LLM | None:
    """Get an LLM that supports image input, with the following priority:
    1. Use the designated default vision provider if it exists and supports image input
    2. Fall back to the first LLM provider that supports image input

    Returns None if no providers exist or if no provider supports images.
    """

    def create_vision_llm(provider: LLMProviderView, model: str) -> LLM:
        """Helper to create an LLM if the provider supports image input."""
        return llm_from_provider(
            model_name=model,
            llm_provider=provider,
            timeout=timeout,
            temperature=temperature,
            additional_headers=additional_headers,
        )

    provider_map = {}
    with get_session_with_current_tenant() as db_session:
        # Try the default vision provider first
        default_model = fetch_default_vision_model(db_session)
        if default_model:
            if model_supports_image_input(
                default_model.name, default_model.llm_provider.provider
            ):
                logger.info(
                    "Using default vision model: %s (provider=%s)",
                    default_model.name,
                    default_model.llm_provider.provider,
                )
                return create_vision_llm(
                    LLMProviderView.from_model(default_model.llm_provider),
                    default_model.name,
                )
            else:
                logger.warning(
                    "Default vision model %s (provider=%s) does not support "
                    "image input — falling back to searching all providers",
                    default_model.name,
                    default_model.llm_provider.provider,
                )

        # Fall back to searching all providers
        models = fetch_existing_models(
            db_session=db_session,
            flow_types=[LLMModelFlowType.VISION, LLMModelFlowType.CHAT],
        )

        if not models:
            logger.warning(
                "No LLM models with VISION or CHAT flow type found — "
                "image summarization will be disabled"
            )
            return None

        for model in models:
            if model.llm_provider_id not in provider_map:
                provider_map[model.llm_provider_id] = LLMProviderView.from_model(
                    model.llm_provider
                )

    # Search for viable vision model followed by chat models
    # Sort models from VISION to CHAT priority
    sorted_models = sorted(
        models,
        key=lambda x: (
            LLMModelFlowType.VISION in x.llm_model_flow_types,
            LLMModelFlowType.CHAT in x.llm_model_flow_types,
        ),
        reverse=True,
    )

    for model in sorted_models:
        if model_supports_image_input(model.name, model.llm_provider.provider):
            logger.info(
                "Using fallback vision model: %s (provider=%s)",
                model.name,
                model.llm_provider.provider,
            )
            return create_vision_llm(
                provider_map[model.llm_provider_id],
                model.name,
            )

    checked_models = [
        f"{m.name} (provider={m.llm_provider.provider})" for m in sorted_models
    ]
    logger.warning(
        "No vision-capable model found among %d candidates: %s — "
        "image summarization will be disabled",
        len(sorted_models),
        ", ".join(checked_models),
    )
    return None


def llm_from_provider(
    model_name: str,
    llm_provider: LLMProviderView,
    timeout: int | None = None,
    temperature: float | None = None,
    additional_headers: dict[str, str] | None = None,
) -> LLM:
    configured_max_input_tokens = _get_model_configured_max_input_tokens(
        llm_provider=llm_provider, model_name=model_name
    )
    model_kwargs = _build_model_kwargs(
        provider=llm_provider.provider,
        configured_max_input_tokens=configured_max_input_tokens,
    )
    max_input_tokens = (
        configured_max_input_tokens
        if configured_max_input_tokens
        else get_max_input_tokens_from_llm_provider(
            llm_provider=llm_provider, model_name=model_name
        )
    )
    return get_llm(
        provider=llm_provider.provider,
        model=model_name,
        deployment_name=llm_provider.deployment_name,
        api_key=llm_provider.api_key,
        api_base=llm_provider.api_base,
        api_version=llm_provider.api_version,
        custom_config=llm_provider.custom_config,
        timeout=timeout,
        temperature=temperature,
        additional_headers=additional_headers,
        max_input_tokens=max_input_tokens,
        model_kwargs=model_kwargs,
    )


def get_llm_for_contextual_rag(model_name: str, model_provider: str) -> LLM:
    with get_session_with_current_tenant() as db_session:
        llm_provider = fetch_llm_provider_view(db_session, model_provider)
    if not llm_provider:
        raise ValueError("No LLM provider with name {} found".format(model_provider))
    return llm_from_provider(
        model_name=model_name,
        llm_provider=llm_provider,
    )


def get_default_llm(
    timeout: int | None = None,
    temperature: float | None = None,
    additional_headers: dict[str, str] | None = None,
) -> LLM:
    with get_session_with_current_tenant() as db_session:
        model = fetch_default_llm_model(db_session)

        if not model:
            raise ValueError("No default LLM model found")

        return llm_from_provider(
            model_name=model.name,
            llm_provider=LLMProviderView.from_model(model.llm_provider),
            timeout=timeout,
            temperature=temperature,
            additional_headers=additional_headers,
        )


def get_llm(
    provider: str,
    model: str,
    max_input_tokens: int,
    deployment_name: str | None,
    api_key: str | None = None,
    api_base: str | None = None,
    api_version: str | None = None,
    custom_config: dict[str, str] | None = None,
    temperature: float | None = None,
    timeout: int | None = None,
    additional_headers: dict[str, str] | None = None,
    model_kwargs: dict[str, Any] | None = None,
) -> LLM:
    if temperature is None:
        temperature = GEN_AI_TEMPERATURE

    extra_headers = build_llm_extra_headers(additional_headers)

    # NOTE: this is needed since Ollama API key is optional
    # User may access Ollama cloud via locally hosted instance (logged in)
    # or just via the cloud API (not logged in, using API key)
    provider_extra_headers = _build_provider_extra_headers(provider, custom_config)
    if provider_extra_headers:
        extra_headers.update(provider_extra_headers)

    return LitellmLLM(
        model_provider=provider,
        model_name=model,
        deployment_name=deployment_name,
        api_key=api_key,
        api_base=api_base,
        api_version=api_version,
        timeout=timeout,
        temperature=temperature,
        custom_config=custom_config,
        extra_headers=extra_headers,
        model_kwargs=model_kwargs or {},
        max_input_tokens=max_input_tokens,
    )


def get_llm_tokenizer_encode_func(llm: LLM) -> Callable[[str], list[int]]:
    """Get the tokenizer encode function for an LLM.

    Args:
        llm: The LLM instance to get the tokenizer for

    Returns:
        A callable that encodes a string into a list of token IDs
    """
    llm_provider = llm.config.model_provider
    llm_model_name = llm.config.model_name

    llm_tokenizer = get_tokenizer(
        model_name=llm_model_name,
        provider_type=llm_provider,
    )
    return llm_tokenizer.encode


def get_llm_token_counter(llm: LLM) -> Callable[[str], int]:
    tokenizer_encode_func = get_llm_tokenizer_encode_func(llm)
    return lambda text: len(tokenizer_encode_func(text))


================================================
FILE: backend/onyx/llm/interfaces.py
================================================
import abc
from collections.abc import Iterator

from braintrust import traced
from pydantic import BaseModel

from onyx.llm.model_response import ModelResponse
from onyx.llm.model_response import ModelResponseStream
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import ToolChoiceOptions
from onyx.utils.logger import setup_logger

logger = setup_logger()


class LLMUserIdentity(BaseModel):
    user_id: str | None = None
    session_id: str | None = None


class LLMConfig(BaseModel):
    model_provider: str
    model_name: str
    temperature: float
    api_key: str | None = None
    api_base: str | None = None
    api_version: str | None = None
    deployment_name: str | None = None
    custom_config: dict[str, str] | None = None
    max_input_tokens: int
    # This disables the "model_" protected namespace for pydantic
    model_config = {"protected_namespaces": ()}


class LLM(abc.ABC):
    @property
    @abc.abstractmethod
    def config(self) -> LLMConfig:
        raise NotImplementedError

    @traced(name="invoke llm", type="llm")
    def invoke(
        self,
        prompt: LanguageModelInput,
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
        max_tokens: int | None = None,
        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,
        user_identity: LLMUserIdentity | None = None,
    ) -> "ModelResponse":
        raise NotImplementedError

    def stream(
        self,
        prompt: LanguageModelInput,
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
        max_tokens: int | None = None,
        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,
        user_identity: LLMUserIdentity | None = None,
    ) -> Iterator[ModelResponseStream]:
        raise NotImplementedError


================================================
FILE: backend/onyx/llm/litellm_singleton/__init__.py
================================================
"""
Singleton module for litellm configuration.
This ensures litellm is configured exactly once when first imported.
All other modules should import litellm from here instead of directly.
"""

import litellm

from .config import initialize_litellm
from .monkey_patches import apply_monkey_patches

initialize_litellm()
apply_monkey_patches()

# Export the configured litellm module and model
__all__ = ["litellm"]


================================================
FILE: backend/onyx/llm/litellm_singleton/config.py
================================================
import json
from pathlib import Path

import litellm

from onyx.utils.logger import setup_logger

logger = setup_logger()


def configure_litellm_settings() -> None:
    # If a user configures a different model and it doesn't support all the same
    # parameters like frequency and presence, just ignore them
    litellm.drop_params = True
    litellm.telemetry = False
    litellm.modify_params = True
    litellm.add_function_to_prompt = False
    litellm.suppress_debug_info = True


# TODO: We might not need to register ollama_chat in addition to ollama but let's just do it for good measure for now.
def register_ollama_models() -> None:
    litellm.register_model(
        model_cost={
            # GPT-OSS models
            "ollama_chat/gpt-oss:120b-cloud": {"supports_function_calling": True},
            "ollama_chat/gpt-oss:120b": {"supports_function_calling": True},
            "ollama_chat/gpt-oss:20b-cloud": {"supports_function_calling": True},
            "ollama_chat/gpt-oss:20b": {"supports_function_calling": True},
            "ollama/gpt-oss:120b-cloud": {"supports_function_calling": True},
            "ollama/gpt-oss:120b": {"supports_function_calling": True},
            "ollama/gpt-oss:20b-cloud": {"supports_function_calling": True},
            "ollama/gpt-oss:20b": {"supports_function_calling": True},
            # DeepSeek models
            "ollama_chat/deepseek-r1:latest": {"supports_function_calling": True},
            "ollama_chat/deepseek-r1:1.5b": {"supports_function_calling": True},
            "ollama_chat/deepseek-r1:7b": {"supports_function_calling": True},
            "ollama_chat/deepseek-r1:8b": {"supports_function_calling": True},
            "ollama_chat/deepseek-r1:14b": {"supports_function_calling": True},
            "ollama_chat/deepseek-r1:32b": {"supports_function_calling": True},
            "ollama_chat/deepseek-r1:70b": {"supports_function_calling": True},
            "ollama_chat/deepseek-r1:671b": {"supports_function_calling": True},
            "ollama_chat/deepseek-v3.1:latest": {"supports_function_calling": True},
            "ollama_chat/deepseek-v3.1:671b": {"supports_function_calling": True},
            "ollama_chat/deepseek-v3.1:671b-cloud": {"supports_function_calling": True},
            "ollama/deepseek-r1:latest": {"supports_function_calling": True},
            "ollama/deepseek-r1:1.5b": {"supports_function_calling": True},
            "ollama/deepseek-r1:7b": {"supports_function_calling": True},
            "ollama/deepseek-r1:8b": {"supports_function_calling": True},
            "ollama/deepseek-r1:14b": {"supports_function_calling": True},
            "ollama/deepseek-r1:32b": {"supports_function_calling": True},
            "ollama/deepseek-r1:70b": {"supports_function_calling": True},
            "ollama/deepseek-r1:671b": {"supports_function_calling": True},
            "ollama/deepseek-v3.1:latest": {"supports_function_calling": True},
            "ollama/deepseek-v3.1:671b": {"supports_function_calling": True},
            "ollama/deepseek-v3.1:671b-cloud": {"supports_function_calling": True},
            # Gemma3 models
            "ollama_chat/gemma3:latest": {"supports_function_calling": True},
            "ollama_chat/gemma3:270m": {"supports_function_calling": True},
            "ollama_chat/gemma3:1b": {"supports_function_calling": True},
            "ollama_chat/gemma3:4b": {"supports_function_calling": True},
            "ollama_chat/gemma3:12b": {"supports_function_calling": True},
            "ollama_chat/gemma3:27b": {"supports_function_calling": True},
            "ollama/gemma3:latest": {"supports_function_calling": True},
            "ollama/gemma3:270m": {"supports_function_calling": True},
            "ollama/gemma3:1b": {"supports_function_calling": True},
            "ollama/gemma3:4b": {"supports_function_calling": True},
            "ollama/gemma3:12b": {"supports_function_calling": True},
            "ollama/gemma3:27b": {"supports_function_calling": True},
            # Qwen models
            "ollama_chat/qwen3-coder:latest": {"supports_function_calling": True},
            "ollama_chat/qwen3-coder:30b": {"supports_function_calling": True},
            "ollama_chat/qwen3-coder:480b": {"supports_function_calling": True},
            "ollama_chat/qwen3-coder:480b-cloud": {"supports_function_calling": True},
            "ollama_chat/qwen3-vl:latest": {"supports_function_calling": True},
            "ollama_chat/qwen3-vl:2b": {"supports_function_calling": True},
            "ollama_chat/qwen3-vl:4b": {"supports_function_calling": True},
            "ollama_chat/qwen3-vl:8b": {"supports_function_calling": True},
            "ollama_chat/qwen3-vl:30b": {"supports_function_calling": True},
            "ollama_chat/qwen3-vl:32b": {"supports_function_calling": True},
            "ollama_chat/qwen3-vl:235b": {"supports_function_calling": True},
            "ollama_chat/qwen3-vl:235b-cloud": {"supports_function_calling": True},
            "ollama_chat/qwen3-vl:235b-instruct-cloud": {
                "supports_function_calling": True
            },
            "ollama/qwen3-coder:latest": {"supports_function_calling": True},
            "ollama/qwen3-coder:30b": {"supports_function_calling": True},
            "ollama/qwen3-coder:480b": {"supports_function_calling": True},
            "ollama/qwen3-coder:480b-cloud": {"supports_function_calling": True},
            "ollama/qwen3-vl:latest": {"supports_function_calling": True},
            "ollama/qwen3-vl:2b": {"supports_function_calling": True},
            "ollama/qwen3-vl:4b": {"supports_function_calling": True},
            "ollama/qwen3-vl:8b": {"supports_function_calling": True},
            "ollama/qwen3-vl:30b": {"supports_function_calling": True},
            "ollama/qwen3-vl:32b": {"supports_function_calling": True},
            "ollama/qwen3-vl:235b": {"supports_function_calling": True},
            "ollama/qwen3-vl:235b-cloud": {"supports_function_calling": True},
            "ollama/qwen3-vl:235b-instruct-cloud": {"supports_function_calling": True},
            # Kimi
            "ollama_chat/kimi-k2:1t": {"supports_function_calling": True},
            "ollama_chat/kimi-k2:1t-cloud": {"supports_function_calling": True},
            "ollama/kimi-k2:1t": {"supports_function_calling": True},
            "ollama/kimi-k2:1t-cloud": {"supports_function_calling": True},
            # GLM
            "ollama_chat/glm-4.6:cloud": {"supports_function_calling": True},
            "ollama_chat/glm-4.6": {"supports_function_calling": True},
            "ollama/glm-4.6": {"supports_function_calling": True},
            "ollama/glm-4.6-cloud": {"supports_function_calling": True},
        }
    )


def load_model_metadata_enrichments() -> None:
    """
    Load model metadata enrichments from JSON file and merge into litellm.model_cost.

    This adds model_vendor, display_name, and model_version fields
    to litellm's model_cost dict. These fields are used by the UI to display
    models grouped by vendor with human-friendly names.

    Once LiteLLM accepts our upstream PR to add these fields natively,
    this function and the JSON file can be removed.
    """
    enrichments_path = Path(__file__).parent.parent / "model_metadata_enrichments.json"

    if not enrichments_path.exists():
        logger.warning(f"Model metadata enrichments file not found: {enrichments_path}")
        return

    try:
        with open(enrichments_path) as f:
            enrichments = json.load(f)

        # Merge enrichments into litellm.model_cost
        for model_key, metadata in enrichments.items():
            if model_key in litellm.model_cost:
                # Update existing entry with our metadata
                litellm.model_cost[model_key].update(metadata)
            else:
                # Model not in litellm.model_cost - add it with just our metadata
                litellm.model_cost[model_key] = metadata

        logger.info(f"Loaded model metadata enrichments for {len(enrichments)} models")

        # Clear the model name parser cache since enrichments are now loaded
        # This ensures any parsing done before enrichments were loaded gets refreshed
        try:
            from onyx.llm.model_name_parser import parse_litellm_model_name

            parse_litellm_model_name.cache_clear()
        except ImportError:
            pass  # Parser not yet imported, no cache to clear
    except Exception as e:
        logger.error(f"Failed to load model metadata enrichments: {e}")


def initialize_litellm() -> None:
    configure_litellm_settings()
    register_ollama_models()
    load_model_metadata_enrichments()


================================================
FILE: backend/onyx/llm/litellm_singleton/monkey_patches.py
================================================
"""
LiteLLM Monkey Patches

This module addresses the following issues in LiteLLM:

Status checked against LiteLLM v1.81.6-nightly (2026-02-02):

1. Ollama Streaming Reasoning Content (_patch_ollama_chunk_parser):
   - LiteLLM's chunk_parser doesn't properly handle reasoning content in streaming
     responses from Ollama
   - Processes native "thinking" field from Ollama responses
   - Also handles <think>...</think> tags in content for models that use that format
   - Tracks reasoning state to properly separate thinking from regular content
   STATUS: STILL NEEDED - LiteLLM has a bug where it only yields thinking content on
           the first two chunks, then stops (lines 504-510). Our patch correctly yields
           ALL thinking chunks. The upstream logic sets finished_reasoning_content=True
           on the second chunk instead of when regular content starts.

2. OpenAI Responses API Parallel Tool Calls (_patch_openai_responses_parallel_tool_calls):
   - LiteLLM's translate_responses_chunk_to_openai_stream hardcodes index=0 for all tool calls
   - This breaks parallel tool calls where multiple functions are called simultaneously
   - The OpenAI Responses API provides output_index in streaming events to track which
     tool call each event belongs to
   STATUS: STILL NEEDED - LiteLLM hardcodes index=0 in translate_responses_chunk_to_openai_stream
           for response.output_item.added (line 962), response.function_call_arguments.delta
           (line 989), and response.output_item.done (line 1033). Our patch uses output_index
           from the event to properly track parallel tool calls.

3. OpenAI Responses API Non-Streaming (_patch_openai_responses_transform_response):
   - LiteLLM's transform_response doesn't properly concatenate multiple reasoning
     summary parts in non-streaming responses
   - Multiple ReasoningSummaryItem objects should be joined with newlines
   STATUS: STILL NEEDED - LiteLLM's _convert_response_output_to_choices (lines 366-370)
           only keeps the LAST summary item text, discarding earlier parts. Our patch
           concatenates all summary texts with double newlines.

4. Azure Responses API Fake Streaming (_patch_azure_responses_should_fake_stream):
   - LiteLLM uses "fake streaming" (MockResponsesAPIStreamingIterator) for models
     not in its database, which buffers the entire response before yielding
   - This causes poor time-to-first-token for Azure custom model deployments
   - Azure's Responses API supports native streaming, so we force real streaming
   STATUS: STILL NEEDED - AzureOpenAIResponsesAPIConfig does NOT override should_fake_stream,
           so it inherits from OpenAIResponsesAPIConfig which returns True for models not
           in litellm.utils.supports_native_streaming(). Custom Azure deployments will
           still use fake streaming without this patch.

# Note: 5 and 6 are to supress a warning and may fix usage info but is not strictly required for the app to run
5. Responses API Usage Format Mismatch (_patch_responses_api_usage_format):
   - LiteLLM uses model_construct as a fallback in multiple places when
     ResponsesAPIResponse validation fails
   - This bypasses the usage validator, allowing chat completion format usage
     (completion_tokens, prompt_tokens) to be stored instead of Responses API format
     (input_tokens, output_tokens)
   - When model_dump() is later called, Pydantic emits a serialization warning
   STATUS: STILL NEEDED - Multiple files use model_construct which bypasses validation:
           openai/responses/transformation.py, chatgpt/responses/transformation.py,
           manus/responses/transformation.py, volcengine/responses/transformation.py,
           and handler.py. Our patch wraps ResponsesAPIResponse.model_construct itself
           to transform usage in all code paths.

6. Logging Usage Transformation Warning (_patch_logging_assembled_streaming_response):
   - LiteLLM's _get_assembled_streaming_response in litellm_logging.py transforms
     ResponseAPIUsage to chat completion format and sets it as a dict on the
     ResponsesAPIResponse.usage field
   - This replaces the proper ResponseAPIUsage object with a dict, causing Pydantic
     to emit a serialization warning when model_dump() is called later
   STATUS: STILL NEEDED - litellm_core_utils/litellm_logging.py lines 3185-3199 set
           usage as a dict with chat completion format instead of keeping it as
           ResponseAPIUsage. Our patch creates a deep copy before modification.

7. Responses API metadata=None TypeError (_patch_responses_metadata_none):
   - LiteLLM's @client decorator wrapper in utils.py uses kwargs.get("metadata", {})
     to check for router calls, but when metadata is explicitly None (key exists with
     value None), the default {} is not used
   - This causes "argument of type 'NoneType' is not iterable" TypeError which swallows
     the real exception (e.g. AuthenticationError for wrong API key)
   - Surfaces as: APIConnectionError: OpenAIException - argument of type 'NoneType' is
     not iterable
   STATUS: STILL NEEDED - litellm/utils.py wrapper function (line 1721) does not guard
           against metadata being explicitly None. Triggered when Responses API bridge
           passes **litellm_params containing metadata=None.
"""

import time
import uuid
from typing import Any
from typing import cast
from typing import List
from typing import Optional

from litellm.completion_extras.litellm_responses_transformation.transformation import (
    LiteLLMResponsesTransformationHandler,
)
from litellm.completion_extras.litellm_responses_transformation.transformation import (
    OpenAiResponsesToChatCompletionStreamIterator,
)
from litellm.llms.ollama.chat.transformation import OllamaChatCompletionResponseIterator
from litellm.llms.ollama.common_utils import OllamaError
from litellm.types.utils import ChatCompletionUsageBlock
from litellm.types.utils import ModelResponseStream


def _patch_ollama_chunk_parser() -> None:
    """
    Patches OllamaChatCompletionResponseIterator.chunk_parser to properly handle
    reasoning content and content in streaming responses.
    """
    if (
        getattr(OllamaChatCompletionResponseIterator.chunk_parser, "__name__", "")
        == "_patched_chunk_parser"
    ):
        return

    def _patched_chunk_parser(self: Any, chunk: dict) -> ModelResponseStream:
        try:
            """
            Expected chunk format:
            {
                "model": "llama3.1",
                "created_at": "2025-05-24T02:12:05.859654Z",
                "message": {
                    "role": "assistant",
                    "content": "",
                    "tool_calls": [{
                        "function": {
                            "name": "get_latest_album_ratings",
                            "arguments": {
                                "artist_name": "Taylor Swift"
                            }
                        }
                    }]
                },
                "done_reason": "stop",
                "done": true,
                ...
            }
            Need to:
            - convert 'message' to 'delta'
            - return finish_reason when done is true
            - return usage when done is true
            """
            from litellm.types.utils import Delta
            from litellm.types.utils import StreamingChoices

            # process tool calls - if complete function arg - add id to tool call
            tool_calls = chunk["message"].get("tool_calls")
            if tool_calls is not None:
                for tool_call in tool_calls:
                    function_args = tool_call.get("function").get("arguments")
                    if function_args is not None and len(function_args) > 0:
                        is_function_call_complete = self._is_function_call_complete(
                            function_args
                        )
                        if is_function_call_complete:
                            tool_call["id"] = str(uuid.uuid4())

            # PROCESS REASONING CONTENT
            reasoning_content: Optional[str] = None
            content: Optional[str] = None
            thinking_content = chunk["message"].get("thinking")
            if thinking_content:  # Truthy check: skips None and empty string ""
                reasoning_content = thinking_content
                if self.started_reasoning_content is False:
                    self.started_reasoning_content = True
            if chunk["message"].get("content") is not None:
                message_content = chunk["message"].get("content")
                # Track whether we are inside <think>...</think> tagged content.
                in_think_tag_block = bool(getattr(self, "_in_think_tag_block", False))
                if "<think>" in message_content:
                    message_content = message_content.replace("<think>", "")
                    self.started_reasoning_content = True
                    self.finished_reasoning_content = False
                    in_think_tag_block = True
                if "</think>" in message_content and self.started_reasoning_content:
                    message_content = message_content.replace("</think>", "")
                    self.finished_reasoning_content = True
                    in_think_tag_block = False

                # For native Ollama "thinking" streams, content without active
                # think tags indicates a transition into regular assistant output.
                if (
                    self.started_reasoning_content
                    and not self.finished_reasoning_content
                    and not in_think_tag_block
                    and not thinking_content
                ):
                    self.finished_reasoning_content = True

                self._in_think_tag_block = in_think_tag_block

                # When Ollama returns both "thinking" and "content" in the same
                # chunk, preserve both instead of classifying content as reasoning.
                if thinking_content and not in_think_tag_block:
                    content = message_content
                elif (
                    self.started_reasoning_content
                    and not self.finished_reasoning_content
                ):
                    reasoning_content = message_content
                else:
                    content = message_content

            delta = Delta(
                content=content,
                reasoning_content=reasoning_content,
                tool_calls=tool_calls,
            )
            if chunk["done"] is True:
                finish_reason = chunk.get("done_reason", "stop")
                choices = [
                    StreamingChoices(
                        delta=delta,
                        finish_reason=finish_reason,
                    )
                ]
            else:
                choices = [
                    StreamingChoices(
                        delta=delta,
                    )
                ]

            usage = ChatCompletionUsageBlock(
                prompt_tokens=chunk.get("prompt_eval_count", 0),
                completion_tokens=chunk.get("eval_count", 0),
                total_tokens=chunk.get("prompt_eval_count", 0)
                + chunk.get("eval_count", 0),
            )

            return ModelResponseStream(
                id=str(uuid.uuid4()),
                object="chat.completion.chunk",
                created=int(time.time()),  # ollama created_at is in UTC
                usage=usage,
                model=chunk["model"],
                choices=choices,
            )
        except KeyError as e:
            raise OllamaError(
                message=f"KeyError: {e}, Got unexpected response from Ollama: {chunk}",
                status_code=400,
                headers={"Content-Type": "application/json"},
            )
        except Exception as e:
            raise e

    OllamaChatCompletionResponseIterator.chunk_parser = _patched_chunk_parser  # type: ignore[method-assign]


def _patch_openai_responses_parallel_tool_calls() -> None:
    """
    Patches OpenAiResponsesToChatCompletionStreamIterator to properly handle:
    1. Parallel tool calls by using output_index from streaming events
    2. Reasoning summary sections by inserting newlines between different summary indices

    LiteLLM's implementation hardcodes index=0 for all tool calls, breaking parallel tool calls.
    The OpenAI Responses API provides output_index in each event to track which tool call
    the event belongs to.

    STATUS: STILL NEEDED - LiteLLM hardcodes index=0 in translate_responses_chunk_to_openai_stream
            for response.output_item.added (line 962), response.function_call_arguments.delta
            (line 989), and response.output_item.done (line 1033). Our patch uses output_index
            from the event to properly track parallel tool calls.
    """
    if (
        getattr(
            OpenAiResponsesToChatCompletionStreamIterator.chunk_parser,
            "__name__",
            "",
        )
        == "_patched_responses_chunk_parser"
    ):
        return

    def _patched_responses_chunk_parser(
        self: Any, chunk: dict
    ) -> "ModelResponseStream":
        from pydantic import BaseModel

        from litellm.types.llms.openai import (
            ChatCompletionToolCallFunctionChunk,
            ResponsesAPIStreamEvents,
        )
        from litellm.types.utils import (
            ChatCompletionToolCallChunk,
            Delta,
            ModelResponseStream,
            StreamingChoices,
        )

        parsed_chunk = chunk
        if not parsed_chunk:
            raise ValueError("Chat provider: Empty parsed_chunk")

        if isinstance(parsed_chunk, BaseModel):
            parsed_chunk = parsed_chunk.model_dump()
        if not isinstance(parsed_chunk, dict):
            raise ValueError(f"Chat provider: Invalid chunk type {type(parsed_chunk)}")

        event_type = parsed_chunk.get("type")
        if isinstance(event_type, ResponsesAPIStreamEvents):
            event_type = event_type.value

        # Get the output_index for proper parallel tool call tracking
        output_index = parsed_chunk.get("output_index", 0)

        if event_type == "response.output_item.added":
            output_item = parsed_chunk.get("item", {})
            if output_item.get("type") == "function_call":
                provider_specific_fields = output_item.get("provider_specific_fields")
                if provider_specific_fields and not isinstance(
                    provider_specific_fields, dict
                ):
                    provider_specific_fields = (
                        dict(provider_specific_fields)
                        if hasattr(provider_specific_fields, "__dict__")
                        else {}
                    )

                function_chunk = ChatCompletionToolCallFunctionChunk(
                    name=output_item.get("name", None),
                    arguments=parsed_chunk.get("arguments", ""),
                )
                if provider_specific_fields:
                    function_chunk["provider_specific_fields"] = (
                        provider_specific_fields
                    )

                tool_call_chunk = ChatCompletionToolCallChunk(
                    id=output_item.get("call_id"),
                    index=output_index,  # Use output_index for parallel tool calls
                    type="function",
                    function=function_chunk,
                )
                if provider_specific_fields:
                    tool_call_chunk.provider_specific_fields = provider_specific_fields  # type: ignore

                return ModelResponseStream(
                    choices=[
                        StreamingChoices(
                            index=0,
                            delta=Delta(tool_calls=[tool_call_chunk]),
                            finish_reason=None,
                        )
                    ]
                )

        elif event_type == "response.function_call_arguments.delta":
            content_part: Optional[str] = parsed_chunk.get("delta", None)
            if content_part:
                return ModelResponseStream(
                    choices=[
                        StreamingChoices(
                            index=0,
                            delta=Delta(
                                tool_calls=[
                                    ChatCompletionToolCallChunk(
                                        id=None,
                                        index=output_index,  # Use output_index for parallel tool calls
                                        type="function",
                                        function=ChatCompletionToolCallFunctionChunk(
                                            name=None, arguments=content_part
                                        ),
                                    )
                                ]
                            ),
                            finish_reason=None,
                        )
                    ]
                )
            else:
                raise ValueError(
                    f"Chat provider: Invalid function argument delta {parsed_chunk}"
                )

        elif event_type == "response.output_item.done":
            output_item = parsed_chunk.get("item", {})
            if output_item.get("type") == "function_call":
                provider_specific_fields = output_item.get("provider_specific_fields")
                if provider_specific_fields and not isinstance(
                    provider_specific_fields, dict
                ):
                    provider_specific_fields = (
                        dict(provider_specific_fields)
                        if hasattr(provider_specific_fields, "__dict__")
                        else {}
                    )

                function_chunk = ChatCompletionToolCallFunctionChunk(
                    name=output_item.get("name", None),
                    arguments="",  # responses API sends everything again, we don't need it
                )
                if provider_specific_fields:
                    function_chunk["provider_specific_fields"] = (
                        provider_specific_fields
                    )

                tool_call_chunk = ChatCompletionToolCallChunk(
                    id=output_item.get("call_id"),
                    index=output_index,  # Use output_index for parallel tool calls
                    type="function",
                    function=function_chunk,
                )
                if provider_specific_fields:
                    tool_call_chunk.provider_specific_fields = provider_specific_fields  # type: ignore

                return ModelResponseStream(
                    choices=[
                        StreamingChoices(
                            index=0,
                            delta=Delta(tool_calls=[tool_call_chunk]),
                            finish_reason="tool_calls",
                        )
                    ]
                )

        elif event_type == "response.reasoning_summary_text.delta":
            # Handle reasoning summary with newlines between sections
            content_part = parsed_chunk.get("delta", None)
            if content_part:
                summary_index = parsed_chunk.get("summary_index", 0)

                # Track the last summary index to insert newlines between parts
                last_summary_index = getattr(
                    self, "_last_reasoning_summary_index", None
                )
                if (
                    last_summary_index is not None
                    and summary_index != last_summary_index
                ):
                    # New summary part started, prepend newlines to separate them
                    content_part = "\n\n" + content_part
                self._last_reasoning_summary_index = summary_index

                return ModelResponseStream(
                    choices=[
                        StreamingChoices(
                            index=cast(int, summary_index),
                            delta=Delta(reasoning_content=content_part),
                        )
                    ]
                )

        # For all other event types, use the original static method
        return OpenAiResponsesToChatCompletionStreamIterator.translate_responses_chunk_to_openai_stream(
            parsed_chunk
        )

    _patched_responses_chunk_parser.__name__ = "_patched_responses_chunk_parser"
    OpenAiResponsesToChatCompletionStreamIterator.chunk_parser = _patched_responses_chunk_parser  # type: ignore[method-assign]


def _patch_openai_responses_transform_response() -> None:
    """
    Patches LiteLLMResponsesTransformationHandler.transform_response to properly
    concatenate multiple reasoning summary parts with newlines in non-streaming responses.
    """
    # Store the original method
    original_transform_response = (
        LiteLLMResponsesTransformationHandler.transform_response
    )

    if (
        getattr(
            original_transform_response,
            "__name__",
            "",
        )
        == "_patched_transform_response"
    ):
        return

    def _patched_transform_response(
        self: Any,
        model: str,
        raw_response: Any,
        model_response: Any,
        logging_obj: Any,
        request_data: dict,
        messages: List[Any],
        optional_params: dict,
        litellm_params: dict,
        encoding: Any,
        api_key: Optional[str] = None,
        json_mode: Optional[bool] = None,
    ) -> Any:
        """
        Patched transform_response that properly concatenates reasoning summary parts
        with newlines.
        """
        from openai.types.responses.response import Response as ResponsesAPIResponse
        from openai.types.responses.response_reasoning_item import ResponseReasoningItem

        # Check if raw_response has reasoning items that need concatenation
        if isinstance(raw_response, ResponsesAPIResponse) and raw_response.output:
            for item in raw_response.output:
                if isinstance(item, ResponseReasoningItem) and item.summary:
                    # Concatenate summary texts with double newlines
                    summary_texts = []
                    for summary_item in item.summary:
                        text = getattr(summary_item, "text", "")
                        if text:
                            summary_texts.append(text)

                    if len(summary_texts) > 1:
                        # Modify the first summary item to contain all concatenated text
                        combined_text = "\n\n".join(summary_texts)
                        if hasattr(item.summary[0], "text"):
                            # Create a modified copy of the response with concatenated text
                            # Since OpenAI types are typically frozen, we need to work around this
                            # by modifying the object after the fact or using the result
                            pass  # The fix is applied in the result processing below

        # Call the original method
        result = original_transform_response(
            self,
            model,
            raw_response,
            model_response,
            logging_obj,
            request_data,
            messages,
            optional_params,
            litellm_params,
            encoding,
            api_key,
            json_mode,
        )

        # Post-process: If there are multiple summary items, fix the reasoning_content
        if isinstance(raw_response, ResponsesAPIResponse) and raw_response.output:
            for item in raw_response.output:
                if isinstance(item, ResponseReasoningItem) and item.summary:
                    if len(item.summary) > 1:
                        # Concatenate all summary texts with double newlines
                        summary_texts = []
                        for summary_item in item.summary:
                            text = getattr(summary_item, "text", "")
                            if text:
                                summary_texts.append(text)

                        if summary_texts:
                            combined_text = "\n\n".join(summary_texts)
                            # Update the reasoning_content in the result choices
                            if hasattr(result, "choices"):
                                for choice in result.choices:
                                    if hasattr(choice, "message") and hasattr(
                                        choice.message, "reasoning_content"
                                    ):
                                        choice.message.reasoning_content = combined_text
                    break  # Only process the first reasoning item

        return result

    _patched_transform_response.__name__ = "_patched_transform_response"
    LiteLLMResponsesTransformationHandler.transform_response = _patched_transform_response  # type: ignore[method-assign]


def _patch_azure_responses_should_fake_stream() -> None:
    """
    Patches AzureOpenAIResponsesAPIConfig.should_fake_stream to always return False.

    By default, LiteLLM uses "fake streaming" (MockResponsesAPIStreamingIterator) for models
    not in its database. This causes Azure custom model deployments to buffer the entire
    response before yielding, resulting in poor time-to-first-token.

    Azure's Responses API supports native streaming, so we override this to always use
    real streaming (SyncResponsesAPIStreamingIterator).
    """
    from litellm.llms.azure.responses.transformation import (
        AzureOpenAIResponsesAPIConfig,
    )

    if (
        getattr(AzureOpenAIResponsesAPIConfig.should_fake_stream, "__name__", "")
        == "_patched_should_fake_stream"
    ):
        return

    def _patched_should_fake_stream(
        self: Any,  # noqa: ARG001
        model: Optional[str],  # noqa: ARG001
        stream: Optional[bool],  # noqa: ARG001
        custom_llm_provider: Optional[str] = None,  # noqa: ARG001
    ) -> bool:
        # Azure Responses API supports native streaming - never fake it
        return False

    _patched_should_fake_stream.__name__ = "_patched_should_fake_stream"
    AzureOpenAIResponsesAPIConfig.should_fake_stream = _patched_should_fake_stream  # type: ignore[method-assign]


def _patch_responses_api_usage_format() -> None:
    """
    Patches ResponsesAPIResponse.model_construct to properly transform usage data
    from chat completion format to Responses API format.

    LiteLLM uses model_construct as a fallback in multiple places when ResponsesAPIResponse
    validation fails. This bypasses the usage validator, allowing usage data in chat
    completion format (completion_tokens, prompt_tokens) to be stored instead of Responses
    API format (input_tokens, output_tokens), causing Pydantic serialization warnings.

    This patch wraps model_construct to transform usage before construction, ensuring
    the correct type regardless of which code path calls model_construct.

    Affected locations in LiteLLM:
    - litellm/llms/openai/responses/transformation.py (lines 183, 563)
    - litellm/llms/chatgpt/responses/transformation.py (line 153)
    - litellm/llms/manus/responses/transformation.py (lines 243, 334)
    - litellm/llms/volcengine/responses/transformation.py (line 280)
    - litellm/completion_extras/litellm_responses_transformation/handler.py (line 51)
    """
    from litellm.types.llms.openai import ResponseAPIUsage, ResponsesAPIResponse

    original_model_construct = ResponsesAPIResponse.model_construct

    if getattr(original_model_construct, "_is_patched", False):
        return

    @classmethod  # type: ignore[misc]
    def _patched_model_construct(
        cls: Any,
        _fields_set: Optional[set[str]] = None,
        **values: Any,
    ) -> "ResponsesAPIResponse":
        """
        Patched model_construct that ensures usage is a ResponseAPIUsage object.
        """
        # Transform usage if present and not already the correct type
        if "usage" in values and values["usage"] is not None:
            usage = values["usage"]
            if not isinstance(usage, ResponseAPIUsage):
                if isinstance(usage, dict):
                    values = dict(values)  # Don't mutate original
                    # Check if it's in chat completion format
                    if "prompt_tokens" in usage or "completion_tokens" in usage:
                        # Transform from chat completion format
                        values["usage"] = ResponseAPIUsage(
                            input_tokens=usage.get("prompt_tokens", 0),
                            output_tokens=usage.get("completion_tokens", 0),
                            total_tokens=usage.get("total_tokens", 0),
                        )
                    elif "input_tokens" in usage or "output_tokens" in usage:
                        # Already in Responses API format, just convert to proper type
                        values["usage"] = ResponseAPIUsage(
                            input_tokens=usage.get("input_tokens", 0),
                            output_tokens=usage.get("output_tokens", 0),
                            total_tokens=usage.get("total_tokens", 0),
                        )

        # Call original model_construct (need to call it as unbound method)
        return original_model_construct.__func__(cls, _fields_set, **values)  # type: ignore[attr-defined]

    _patched_model_construct._is_patched = True  # type: ignore[attr-defined]
    ResponsesAPIResponse.model_construct = _patched_model_construct  # type: ignore[method-assign, assignment]


def _patch_logging_assembled_streaming_response() -> None:
    """
    Patches LiteLLMLoggingObj._get_assembled_streaming_response to create a deep copy
    of the ResponsesAPIResponse before modifying its usage field.

    The original code transforms usage to chat completion format and sets it as a dict
    directly on the ResponsesAPIResponse.usage field. This mutates the original object,
    causing Pydantic serialization warnings when model_dump() is called later because
    the usage field contains a dict instead of the expected ResponseAPIUsage type.

    This patch creates a copy of the response before modification, preserving the
    original object with its proper ResponseAPIUsage type.
    """
    from litellm import LiteLLMLoggingObj
    from litellm.responses.utils import ResponseAPILoggingUtils
    from litellm.types.llms.openai import (
        ResponseAPIUsage,
        ResponseCompletedEvent,
        ResponsesAPIResponse,
    )
    from litellm.types.utils import ModelResponse, TextCompletionResponse

    original_method = LiteLLMLoggingObj._get_assembled_streaming_response

    if getattr(original_method, "_is_patched", False):
        return

    def _patched_get_assembled_streaming_response(
        self: Any,  # noqa: ARG001
        result: Any,
        start_time: Any,  # noqa: ARG001
        end_time: Any,  # noqa: ARG001
        is_async: bool,  # noqa: ARG001
        streaming_chunks: List[Any],  # noqa: ARG001
    ) -> Any:
        """
        Patched version that creates a copy before modifying usage.

        The original LiteLLM code transforms usage to chat completion format and
        sets it directly as a dict, which causes Pydantic serialization warnings.
        This patch uses model_construct to rebuild the response with the transformed
        usage, ensuring proper typing.
        """
        if isinstance(result, ModelResponse):
            return result
        elif isinstance(result, TextCompletionResponse):
            return result
        elif isinstance(result, ResponseCompletedEvent):
            # Get the original response data
            original_response = result.response
            response_data = original_response.model_dump()

            # Transform usage if present
            if isinstance(original_response.usage, ResponseAPIUsage):
                transformed_usage = (
                    ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
                        original_response.usage
                    )
                )
                # Put the transformed usage (in chat completion format) into response_data
                # Our patched model_construct will convert it back to ResponseAPIUsage
                response_data["usage"] = (
                    transformed_usage.model_dump()
                    if hasattr(transformed_usage, "model_dump")
                    else dict(transformed_usage)
                )

            # Rebuild using model_construct - our patch ensures usage is properly typed
            response_copy = ResponsesAPIResponse.model_construct(**response_data)

            # Copy hidden params
            if hasattr(original_response, "_hidden_params"):
                response_copy._hidden_params = dict(original_response._hidden_params)

            return response_copy
        else:
            return None

    _patched_get_assembled_streaming_response._is_patched = True  # type: ignore[attr-defined]
    LiteLLMLoggingObj._get_assembled_streaming_response = _patched_get_assembled_streaming_response  # type: ignore[method-assign]


def _patch_responses_metadata_none() -> None:
    """
    Patches litellm.responses to normalize metadata=None to metadata={} in kwargs.

    LiteLLM's @client decorator wrapper in utils.py (line 1721) does:
        _is_litellm_router_call = "model_group" in kwargs.get("metadata", {})
    When metadata is explicitly None in kwargs, kwargs.get("metadata", {}) returns
    None (the key exists, so the default is not used), causing:
        TypeError: argument of type 'NoneType' is not iterable

    This swallows the real exception (e.g. AuthenticationError) and surfaces as:
        APIConnectionError: OpenAIException - argument of type 'NoneType' is not iterable

    This happens when the Responses API bridge calls litellm.responses() with
    **litellm_params which may contain metadata=None.

    STATUS: STILL NEEDED - litellm/utils.py wrapper function uses kwargs.get("metadata", {})
            which does not guard against metadata being explicitly None. Same pattern exists
            on line 1407 for async path.
    """
    import litellm as _litellm
    from functools import wraps

    original_responses = _litellm.responses

    if getattr(original_responses, "_metadata_patched", False):
        return

    @wraps(original_responses)
    def _patched_responses(*args: Any, **kwargs: Any) -> Any:
        if kwargs.get("metadata") is None:
            kwargs["metadata"] = {}
        return original_responses(*args, **kwargs)

    _patched_responses._metadata_patched = True  # type: ignore[attr-defined]
    _litellm.responses = _patched_responses


def apply_monkey_patches() -> None:
    """
    Apply all necessary monkey patches to LiteLLM for compatibility.

    This includes:
    - Patching OllamaChatCompletionResponseIterator.chunk_parser for streaming content
    - Patching translate_responses_chunk_to_openai_stream for parallel tool calls
    - Patching LiteLLMResponsesTransformationHandler.transform_response for non-streaming responses
    - Patching AzureOpenAIResponsesAPIConfig.should_fake_stream to enable native streaming
    - Patching ResponsesAPIResponse.model_construct to fix usage format in all code paths
    - Patching LiteLLMLoggingObj._get_assembled_streaming_response to avoid mutating original response
    - Patching litellm.responses to fix metadata=None causing TypeError in error handling
    """
    _patch_ollama_chunk_parser()
    _patch_openai_responses_parallel_tool_calls()
    _patch_openai_responses_transform_response()
    _patch_azure_responses_should_fake_stream()
    _patch_responses_api_usage_format()
    _patch_logging_assembled_streaming_response()
    _patch_responses_metadata_none()


================================================
FILE: backend/onyx/llm/model_metadata_enrichments.json
================================================
{
  "ai21.j2-mid-v1": {
    "display_name": "J2 Mid",
    "model_vendor": "ai21",
    "model_version": "v1"
  },
  "ai21.j2-ultra-v1": {
    "display_name": "J2 Ultra",
    "model_vendor": "ai21",
    "model_version": "v1"
  },
  "ai21.jamba-1-5-large-v1:0": {
    "display_name": "Jamba 1.5 Large",
    "model_vendor": "ai21",
    "model_version": "v1:0"
  },
  "ai21.jamba-1-5-mini-v1:0": {
    "display_name": "Jamba 1.5 Mini",
    "model_vendor": "ai21",
    "model_version": "v1:0"
  },
  "ai21.jamba-instruct-v1:0": {
    "display_name": "Jamba Instruct",
    "model_vendor": "ai21",
    "model_version": "v1:0"
  },
  "amazon.nova-lite-v1:0": {
    "display_name": "Nova Lite",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "amazon.nova-micro-v1:0": {
    "display_name": "Nova Micro",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "amazon.nova-pro-v1:0": {
    "display_name": "Nova Pro",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "amazon.titan-text-express-v1": {
    "display_name": "Titan Text Express",
    "model_vendor": "amazon",
    "model_version": "v1"
  },
  "amazon.titan-text-lite-v1": {
    "display_name": "Titan Text Lite",
    "model_vendor": "amazon",
    "model_version": "v1"
  },
  "amazon.titan-text-premier-v1:0": {
    "display_name": "Titan Text Premier",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20240620-v1:0"
  },
  "anthropic.claude-3-5-sonnet-20241022-v2:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20241022-v2:0"
  },
  "anthropic.claude-3-sonnet-20240229-v1:0": {
    "display_name": "Claude Sonnet 3",
    "model_vendor": "anthropic",
    "model_version": "20240229-v1:0"
  },
  "anthropic.claude-haiku-4-5-20251001-v1:0": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001-v1:0"
  },
  "anthropic.claude-haiku-4-5@20251001": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001"
  },
  "anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "anthropic.claude-opus-4-1-20250805-v1:0": {
    "display_name": "Claude Opus 4.1",
    "model_vendor": "anthropic",
    "model_version": "20250805-v1:0"
  },
  "anthropic.claude-opus-4-20250514-v1:0": {
    "display_name": "Claude Opus 4",
    "model_vendor": "anthropic",
    "model_version": "20250514-v1:0"
  },
  "anthropic.claude-opus-4-5-20251101-v1:0": {
    "display_name": "Claude Opus 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251101-v1:0"
  },
  "anthropic.claude-sonnet-4-20250514-v1:0": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic",
    "model_version": "20250514-v1:0"
  },
  "anthropic.claude-sonnet-4-5-20250929-v1:0": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929-v1:0"
  },
  "anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "apac.amazon.nova-lite-v1:0": {
    "display_name": "Nova Lite",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "apac.amazon.nova-micro-v1:0": {
    "display_name": "Nova Micro",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "apac.amazon.nova-pro-v1:0": {
    "display_name": "Nova Pro",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "apac.anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20240620-v1:0"
  },
  "apac.anthropic.claude-3-5-sonnet-20241022-v2:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20241022-v2:0"
  },
  "apac.anthropic.claude-3-sonnet-20240229-v1:0": {
    "display_name": "Claude Sonnet 3",
    "model_vendor": "anthropic",
    "model_version": "20240229-v1:0"
  },
  "apac.anthropic.claude-haiku-4-5-20251001-v1:0": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001-v1:0"
  },
  "apac.anthropic.claude-sonnet-4-20250514-v1:0": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic",
    "model_version": "20250514-v1:0"
  },
  "au.anthropic.claude-haiku-4-5-20251001-v1:0": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001-v1:0"
  },
  "au.anthropic.claude-sonnet-4-5-20250929-v1:0": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929-v1:0"
  },
  "azure/claude-haiku-4-5": {
    "display_name": "Claude Haiku",
    "model_vendor": "anthropic"
  },
  "azure/claude-opus-4-1": {
    "display_name": "Claude Opus",
    "model_vendor": "anthropic"
  },
  "azure/claude-sonnet-4-5": {
    "display_name": "Claude Sonnet",
    "model_vendor": "anthropic"
  },
  "azure/codex-mini": {
    "display_name": "Codex Mini",
    "model_vendor": "openai"
  },
  "azure/command-r-plus": {
    "display_name": "Command R Plus",
    "model_vendor": "cohere",
    "model_version": "latest"
  },
  "azure/computer-use-preview": {
    "display_name": "Computer Use Preview",
    "model_vendor": "anthropic",
    "model_version": "preview"
  },
  "azure/container": {
    "display_name": "Container",
    "model_vendor": "azure",
    "model_version": "latest"
  },
  "azure/eu/gpt-4o-2024-08-06": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-08-06"
  },
  "azure/eu/gpt-4o-2024-11-20": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-11-20"
  },
  "azure/eu/gpt-4o-mini-2024-07-18": {
    "display_name": "GPT-4o Mini",
    "model_vendor": "openai",
    "model_version": "2024-07-18"
  },
  "azure/eu/gpt-4o-mini-realtime-preview-2024-12-17": {
    "display_name": "GPT-4o Mini Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/eu/gpt-4o-realtime-preview-2024-10-01": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-10-01"
  },
  "azure/eu/gpt-4o-realtime-preview-2024-12-17": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/eu/gpt-5-2025-08-07": {
    "display_name": "GPT-5",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "azure/eu/gpt-5-mini-2025-08-07": {
    "display_name": "GPT-5 Mini",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "azure/eu/gpt-5-nano-2025-08-07": {
    "display_name": "GPT 5 Nano",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "azure/eu/gpt-5.1": {
    "display_name": "GPT-5.1",
    "model_vendor": "openai"
  },
  "azure/eu/gpt-5.1-chat": {
    "display_name": "GPT-5.1 Chat",
    "model_vendor": "openai"
  },
  "azure/eu/gpt-5.1-codex": {
    "display_name": "GPT-5.1 Codex",
    "model_vendor": "openai"
  },
  "azure/eu/gpt-5.1-codex-mini": {
    "display_name": "GPT-5.1 Codex Mini",
    "model_vendor": "openai"
  },
  "azure/eu/o1-2024-12-17": {
    "display_name": "o1",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/eu/o1-mini-2024-09-12": {
    "display_name": "o1 Mini",
    "model_vendor": "openai",
    "model_version": "2024-09-12"
  },
  "azure/eu/o1-preview-2024-09-12": {
    "display_name": "o1 Preview",
    "model_vendor": "openai",
    "model_version": "2024-09-12"
  },
  "azure/eu/o3-mini-2025-01-31": {
    "display_name": "o3 Mini",
    "model_vendor": "openai",
    "model_version": "2025-01-31"
  },
  "azure/global-standard/gpt-4o-2024-08-06": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-08-06"
  },
  "azure/global-standard/gpt-4o-2024-11-20": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-11-20"
  },
  "azure/global-standard/gpt-4o-mini": {
    "display_name": "GPT-4o Mini",
    "model_vendor": "openai"
  },
  "azure/global/gpt-4o-2024-08-06": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-08-06"
  },
  "azure/global/gpt-4o-2024-11-20": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-11-20"
  },
  "azure/global/gpt-5.1": {
    "display_name": "GPT-5.1",
    "model_vendor": "openai"
  },
  "azure/global/gpt-5.1-chat": {
    "display_name": "GPT-5.1 Chat",
    "model_vendor": "openai"
  },
  "azure/global/gpt-5.1-codex": {
    "display_name": "GPT-5.1 Codex",
    "model_vendor": "openai"
  },
  "azure/global/gpt-5.1-codex-mini": {
    "display_name": "GPT-5.1 Codex Mini",
    "model_vendor": "openai"
  },
  "azure/gpt-3.5-turbo": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai"
  },
  "azure/gpt-3.5-turbo-0125": {
    "display_name": "GPT 3.5 Turbo 0125",
    "model_vendor": "openai",
    "model_version": "0125"
  },
  "azure/gpt-3.5-turbo-instruct-0914": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai",
    "model_version": "0914"
  },
  "azure/gpt-35-turbo": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai"
  },
  "azure/gpt-35-turbo-0125": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai",
    "model_version": "0125"
  },
  "azure/gpt-35-turbo-0301": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai",
    "model_version": "0301"
  },
  "azure/gpt-35-turbo-0613": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "azure/gpt-35-turbo-1106": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai",
    "model_version": "1106"
  },
  "azure/gpt-35-turbo-16k": {
    "display_name": "GPT-3.5 Turbo 16K",
    "model_vendor": "openai"
  },
  "azure/gpt-35-turbo-16k-0613": {
    "display_name": "GPT-3.5 Turbo 16K",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "azure/gpt-35-turbo-instruct": {
    "display_name": "GPT-3.5 Turbo Instruct",
    "model_vendor": "openai"
  },
  "azure/gpt-35-turbo-instruct-0914": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai",
    "model_version": "0914"
  },
  "azure/gpt-4": {
    "display_name": "GPT-4",
    "model_vendor": "openai"
  },
  "azure/gpt-4-0125-preview": {
    "display_name": "GPT 4 0125 Preview",
    "model_vendor": "openai",
    "model_version": "0125"
  },
  "azure/gpt-4-0613": {
    "display_name": "GPT 4 0613",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "azure/gpt-4-1106-preview": {
    "display_name": "GPT 4 1106 Preview",
    "model_vendor": "openai",
    "model_version": "1106"
  },
  "azure/gpt-4-32k": {
    "display_name": "GPT-4 32K",
    "model_vendor": "openai"
  },
  "azure/gpt-4-32k-0613": {
    "display_name": "GPT 4 32k 0613",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "azure/gpt-4-turbo": {
    "display_name": "GPT-4 Turbo",
    "model_vendor": "openai"
  },
  "azure/gpt-4-turbo-2024-04-09": {
    "display_name": "GPT-4 Turbo",
    "model_vendor": "openai",
    "model_version": "2024-04-09"
  },
  "azure/gpt-4-turbo-vision-preview": {
    "display_name": "GPT-4 Turbo Vision Preview",
    "model_vendor": "openai"
  },
  "azure/gpt-4.1": {
    "display_name": "GPT-4.1",
    "model_vendor": "openai"
  },
  "azure/gpt-4.1-2025-04-14": {
    "display_name": "GPT-4.1",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "azure/gpt-4.1-mini": {
    "display_name": "GPT-4.1 Mini",
    "model_vendor": "openai"
  },
  "azure/gpt-4.1-mini-2025-04-14": {
    "display_name": "GPT-4.1 Mini",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "azure/gpt-4.1-nano": {
    "display_name": "GPT-4.1 Nano",
    "model_vendor": "openai"
  },
  "azure/gpt-4.1-nano-2025-04-14": {
    "display_name": "GPT-4.1 Nano",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "azure/gpt-4.5-preview": {
    "display_name": "GPT-4.5 Preview",
    "model_vendor": "openai"
  },
  "azure/gpt-4o": {
    "display_name": "GPT-4o",
    "model_vendor": "openai"
  },
  "azure/gpt-4o-2024-05-13": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-05-13"
  },
  "azure/gpt-4o-2024-08-06": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-08-06"
  },
  "azure/gpt-4o-2024-11-20": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-11-20"
  },
  "azure/gpt-4o-audio-preview-2024-12-17": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/gpt-4o-mini": {
    "display_name": "GPT-4o Mini",
    "model_vendor": "openai"
  },
  "azure/gpt-4o-mini-2024-07-18": {
    "display_name": "GPT-4o Mini",
    "model_vendor": "openai",
    "model_version": "2024-07-18"
  },
  "azure/gpt-4o-mini-audio-preview-2024-12-17": {
    "display_name": "GPT-4o Mini",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/gpt-4o-mini-realtime-preview-2024-12-17": {
    "display_name": "GPT-4o Mini Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/gpt-4o-mini-transcribe": {
    "display_name": "GPT-4o Mini Transcribe",
    "model_vendor": "openai"
  },
  "azure/gpt-4o-mini-tts": {
    "display_name": "GPT-4o Mini TTS",
    "model_vendor": "openai"
  },
  "azure/gpt-4o-realtime-preview-2024-10-01": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-10-01"
  },
  "azure/gpt-4o-realtime-preview-2024-12-17": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/gpt-4o-transcribe": {
    "display_name": "GPT-4o Transcribe",
    "model_vendor": "openai"
  },
  "azure/gpt-4o-transcribe-diarize": {
    "display_name": "GPT-4o Transcribe Diarize",
    "model_vendor": "openai"
  },
  "azure/gpt-5": {
    "display_name": "GPT-5",
    "model_vendor": "openai"
  },
  "azure/gpt-5-2025-08-07": {
    "display_name": "GPT-5",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "azure/gpt-5-chat": {
    "display_name": "GPT-5 Chat",
    "model_vendor": "openai"
  },
  "azure/gpt-5-chat-latest": {
    "display_name": "GPT 5 Chat",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "azure/gpt-5-codex": {
    "display_name": "GPT-5 Codex",
    "model_vendor": "openai"
  },
  "azure/gpt-5-mini": {
    "display_name": "GPT-5 Mini",
    "model_vendor": "openai"
  },
  "azure/gpt-5-mini-2025-08-07": {
    "display_name": "GPT-5 Mini",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "azure/gpt-5-nano": {
    "display_name": "GPT-5 Nano",
    "model_vendor": "openai"
  },
  "azure/gpt-5-nano-2025-08-07": {
    "display_name": "GPT 5 Nano",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "azure/gpt-5-pro": {
    "display_name": "GPT-5 Pro",
    "model_vendor": "openai"
  },
  "azure/gpt-5.1": {
    "display_name": "GPT-5.1",
    "model_vendor": "openai"
  },
  "azure/gpt-5.1-2025-11-13": {
    "display_name": "GPT 5.1",
    "model_vendor": "openai",
    "model_version": "2025-11-13"
  },
  "azure/gpt-5.1-chat": {
    "display_name": "GPT-5.1 Chat",
    "model_vendor": "openai"
  },
  "azure/gpt-5.1-chat-2025-11-13": {
    "display_name": "GPT 5.1 Chat",
    "model_vendor": "openai",
    "model_version": "2025-11-13"
  },
  "azure/gpt-5.1-codex": {
    "display_name": "GPT-5.1 Codex",
    "model_vendor": "openai"
  },
  "azure/gpt-5.1-codex-2025-11-13": {
    "display_name": "GPT-5.1 Codex",
    "model_vendor": "openai",
    "model_version": "2025-11-13"
  },
  "azure/gpt-5.1-codex-mini": {
    "display_name": "GPT-5.1 Codex Mini",
    "model_vendor": "openai"
  },
  "azure/gpt-5.1-codex-mini-2025-11-13": {
    "display_name": "GPT-5.1 Codex Mini",
    "model_vendor": "openai",
    "model_version": "2025-11-13"
  },
  "azure/gpt-audio-2025-08-28": {
    "display_name": "GPT Audio",
    "model_vendor": "openai",
    "model_version": "2025-08-28"
  },
  "azure/gpt-audio-mini-2025-10-06": {
    "display_name": "GPT Audio Mini",
    "model_vendor": "openai",
    "model_version": "2025-10-06"
  },
  "azure/gpt-realtime-2025-08-28": {
    "display_name": "GPT Realtime",
    "model_vendor": "openai",
    "model_version": "2025-08-28"
  },
  "azure/gpt-realtime-mini-2025-10-06": {
    "display_name": "GPT Realtime Mini",
    "model_vendor": "openai",
    "model_version": "2025-10-06"
  },
  "azure/mistral-large-2402": {
    "display_name": "Mistral Large 24.02",
    "model_vendor": "mistral",
    "model_version": "2402"
  },
  "azure/mistral-large-latest": {
    "display_name": "Mistral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "azure/o1": {
    "display_name": "o1",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "azure/o1-2024-12-17": {
    "display_name": "o1",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/o1-mini": {
    "display_name": "o1 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "azure/o1-mini-2024-09-12": {
    "display_name": "o1 Mini",
    "model_vendor": "openai",
    "model_version": "2024-09-12"
  },
  "azure/o1-preview": {
    "display_name": "o1 Preview",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "azure/o1-preview-2024-09-12": {
    "display_name": "o1 Preview",
    "model_vendor": "openai",
    "model_version": "2024-09-12"
  },
  "azure/o3": {
    "display_name": "o3",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "azure/o3-2025-04-16": {
    "display_name": "o3",
    "model_vendor": "openai",
    "model_version": "2025-04-16"
  },
  "azure/o3-deep-research": {
    "display_name": "O3",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "azure/o3-mini": {
    "display_name": "o3 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "azure/o3-mini-2025-01-31": {
    "display_name": "o3 Mini",
    "model_vendor": "openai",
    "model_version": "2025-01-31"
  },
  "azure/o3-pro": {
    "display_name": "O3",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "azure/o3-pro-2025-06-10": {
    "display_name": "O3",
    "model_vendor": "openai",
    "model_version": "2025-06-10"
  },
  "azure/o4-mini": {
    "display_name": "o4 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "azure/o4-mini-2025-04-16": {
    "display_name": "o4 Mini",
    "model_vendor": "openai",
    "model_version": "2025-04-16"
  },
  "azure/us/gpt-4.1-2025-04-14": {
    "display_name": "GPT-4.1",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "azure/us/gpt-4.1-mini-2025-04-14": {
    "display_name": "GPT-4.1 Mini",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "azure/us/gpt-4.1-nano-2025-04-14": {
    "display_name": "GPT-4.1 Nano",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "azure/us/gpt-4o-2024-08-06": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-08-06"
  },
  "azure/us/gpt-4o-2024-11-20": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-11-20"
  },
  "azure/us/gpt-4o-mini-2024-07-18": {
    "display_name": "GPT-4o Mini",
    "model_vendor": "openai",
    "model_version": "2024-07-18"
  },
  "azure/us/gpt-4o-mini-realtime-preview-2024-12-17": {
    "display_name": "GPT-4o Mini Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/us/gpt-4o-realtime-preview-2024-10-01": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-10-01"
  },
  "azure/us/gpt-4o-realtime-preview-2024-12-17": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/us/gpt-5-2025-08-07": {
    "display_name": "GPT-5",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "azure/us/gpt-5-mini-2025-08-07": {
    "display_name": "GPT-5 Mini",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "azure/us/gpt-5-nano-2025-08-07": {
    "display_name": "GPT 5 Nano",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "azure/us/gpt-5.1": {
    "display_name": "GPT-5.1",
    "model_vendor": "openai"
  },
  "azure/us/gpt-5.1-chat": {
    "display_name": "GPT-5.1 Chat",
    "model_vendor": "openai"
  },
  "azure/us/gpt-5.1-codex": {
    "display_name": "GPT-5.1 Codex",
    "model_vendor": "openai"
  },
  "azure/us/gpt-5.1-codex-mini": {
    "display_name": "GPT-5.1 Codex Mini",
    "model_vendor": "openai"
  },
  "azure/us/o1-2024-12-17": {
    "display_name": "o1",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "azure/us/o1-mini-2024-09-12": {
    "display_name": "o1 Mini",
    "model_vendor": "openai",
    "model_version": "2024-09-12"
  },
  "azure/us/o1-preview-2024-09-12": {
    "display_name": "o1 Preview",
    "model_vendor": "openai",
    "model_version": "2024-09-12"
  },
  "azure/us/o3-2025-04-16": {
    "display_name": "o3",
    "model_vendor": "openai",
    "model_version": "2025-04-16"
  },
  "azure/us/o3-mini-2025-01-31": {
    "display_name": "o3 Mini",
    "model_vendor": "openai",
    "model_version": "2025-01-31"
  },
  "azure/us/o4-mini-2025-04-16": {
    "display_name": "o4 Mini",
    "model_vendor": "openai",
    "model_version": "2025-04-16"
  },
  "azure_ai/Llama-3.2-11B-Vision-Instruct": {
    "display_name": "Llama 3.2 11B Vision Instruct",
    "model_vendor": "meta"
  },
  "azure_ai/Llama-3.2-90B-Vision-Instruct": {
    "display_name": "Llama 3.2 90B Vision Instruct",
    "model_vendor": "meta"
  },
  "azure_ai/Llama-3.3-70B-Instruct": {
    "display_name": "Llama 3.3 70B Instruct",
    "model_vendor": "meta"
  },
  "azure_ai/Llama-4-Maverick-17B-128E-Instruct-FP8": {
    "display_name": "Llama 4 Maverick 17B 128E Instruct FP8",
    "model_vendor": "meta"
  },
  "azure_ai/Llama-4-Scout-17B-16E-Instruct": {
    "display_name": "Llama 4 Scout 17B 16E Instruct",
    "model_vendor": "meta"
  },
  "azure_ai/MAI-DS-R1": {
    "display_name": "MAI-DS-R1",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Meta-Llama-3-70B-Instruct": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta"
  },
  "azure_ai/Meta-Llama-3.1-405B-Instruct": {
    "display_name": "Llama 3.1 405B Instruct",
    "model_vendor": "meta"
  },
  "azure_ai/Meta-Llama-3.1-70B-Instruct": {
    "display_name": "Llama 3.1 70B Instruct",
    "model_vendor": "meta"
  },
  "azure_ai/Meta-Llama-3.1-8B-Instruct": {
    "display_name": "Llama 3.1 8B Instruct",
    "model_vendor": "meta"
  },
  "azure_ai/Phi-3-medium-128k-instruct": {
    "display_name": "Phi 3 Medium 128k Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-3-medium-4k-instruct": {
    "display_name": "Phi 3 Medium 4k Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-3-mini-128k-instruct": {
    "display_name": "Phi 3 Mini 128k Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-3-mini-4k-instruct": {
    "display_name": "Phi 3 Mini 4k Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-3-small-128k-instruct": {
    "display_name": "Phi 3 Small 128k Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-3-small-8k-instruct": {
    "display_name": "Phi 3 Small 8k Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-3.5-MoE-instruct": {
    "display_name": "Phi 3.5 MOE Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-3.5-mini-instruct": {
    "display_name": "Phi 3.5 Mini Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-3.5-vision-instruct": {
    "display_name": "Phi 3.5 Vision Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-4": {
    "display_name": "Phi 4",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-4-mini-instruct": {
    "display_name": "Phi 4 Mini Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-4-mini-reasoning": {
    "display_name": "Phi 4 Mini Reasoning",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-4-multimodal-instruct": {
    "display_name": "Phi 4 Multimodal Instruct",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/Phi-4-reasoning": {
    "display_name": "Phi 4 Reasoning",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "azure_ai/deepseek-r1": {
    "display_name": "DeepSeek R1",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "azure_ai/deepseek-v3": {
    "display_name": "DeepSeek V3",
    "model_vendor": "deepseek",
    "model_version": "v3"
  },
  "azure_ai/deepseek-v3-0324": {
    "display_name": "DeepSeek v3 0324",
    "model_vendor": "deepseek",
    "model_version": "0324"
  },
  "azure_ai/global/grok-3": {
    "display_name": "Grok 3",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "azure_ai/global/grok-3-mini": {
    "display_name": "Grok 3 Mini",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "azure_ai/grok-3": {
    "display_name": "Grok 3",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "azure_ai/grok-3-mini": {
    "display_name": "Grok 3 Mini",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "azure_ai/grok-4": {
    "display_name": "Grok 4",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "azure_ai/grok-4-fast-non-reasoning": {
    "display_name": "Grok 4 Fast Non Reasoning",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "azure_ai/grok-4-fast-reasoning": {
    "display_name": "Grok 4 Fast Reasoning",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "azure_ai/grok-code-fast-1": {
    "display_name": "Grok Code Fast 1",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "azure_ai/jais-30b-chat": {
    "display_name": "Jais 30B Chat",
    "model_vendor": "g42",
    "model_version": "latest"
  },
  "azure_ai/jamba-instruct": {
    "display_name": "Jamba Instruct",
    "model_vendor": "ai21",
    "model_version": "latest"
  },
  "azure_ai/ministral-3b": {
    "display_name": "Ministral 3B",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "azure_ai/mistral-large": {
    "display_name": "Mistral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "azure_ai/mistral-large-2407": {
    "display_name": "Mistral Large 24.07",
    "model_vendor": "mistral",
    "model_version": "2407"
  },
  "azure_ai/mistral-large-latest": {
    "display_name": "Mistral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "azure_ai/mistral-medium-2505": {
    "display_name": "Mistral Medium 2505",
    "model_vendor": "mistral",
    "model_version": "2505"
  },
  "azure_ai/mistral-nemo": {
    "display_name": "Mistral Nemo",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "azure_ai/mistral-small": {
    "display_name": "Mistral Small",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "azure_ai/mistral-small-2503": {
    "display_name": "Mistral Small 2503",
    "model_vendor": "mistral",
    "model_version": "2503"
  },
  "bedrock/*/1-month-commitment/cohere.command-light-text-v14": {
    "display_name": "Command Light Text",
    "model_vendor": "cohere",
    "model_version": "v14"
  },
  "bedrock/*/1-month-commitment/cohere.command-text-v14": {
    "display_name": "Command Text",
    "model_vendor": "cohere",
    "model_version": "v14"
  },
  "bedrock/*/6-month-commitment/cohere.command-light-text-v14": {
    "display_name": "Command Light Text",
    "model_vendor": "cohere",
    "model_version": "v14"
  },
  "bedrock/*/6-month-commitment/cohere.command-text-v14": {
    "display_name": "Command Text",
    "model_vendor": "cohere",
    "model_version": "v14"
  },
  "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/ap-northeast-1/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/ap-northeast-1/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/ap-northeast-1/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/eu-central-1/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/eu-central-1/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/eu-central-1/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/eu-west-3/mistral.mistral-7b-instruct-v0:2": {
    "display_name": "Mistral 7B Instruct",
    "model_vendor": "mistral",
    "model_version": "v0:2"
  },
  "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": {
    "display_name": "Mistral Large 24.02",
    "model_vendor": "mistral",
    "model_version": "2402-v1:0"
  },
  "bedrock/eu-west-3/mistral.mixtral-8x7b-instruct-v0:1": {
    "display_name": "Mixtral 8x7B Instruct",
    "model_vendor": "mistral",
    "model_version": "v0:1"
  },
  "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20240620-v1:0"
  },
  "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/us-east-1/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-east-1/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-east-1/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/us-east-1/meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/us-east-1/meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/us-east-1/mistral.mistral-7b-instruct-v0:2": {
    "display_name": "Mistral 7B Instruct",
    "model_vendor": "mistral",
    "model_version": "v0:2"
  },
  "bedrock/us-east-1/mistral.mistral-large-2402-v1:0": {
    "display_name": "Mistral Large 24.02",
    "model_vendor": "mistral",
    "model_version": "2402-v1:0"
  },
  "bedrock/us-east-1/mistral.mixtral-8x7b-instruct-v0:1": {
    "display_name": "Mixtral 8x7B Instruct",
    "model_vendor": "mistral",
    "model_version": "v0:1"
  },
  "bedrock/us-gov-east-1/amazon.nova-pro-v1:0": {
    "display_name": "Nova Pro",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "bedrock/us-gov-east-1/amazon.titan-text-express-v1": {
    "display_name": "Titan Text Express",
    "model_vendor": "amazon",
    "model_version": "v1"
  },
  "bedrock/us-gov-east-1/amazon.titan-text-lite-v1": {
    "display_name": "Titan Text Lite",
    "model_vendor": "amazon",
    "model_version": "v1"
  },
  "bedrock/us-gov-east-1/amazon.titan-text-premier-v1:0": {
    "display_name": "Titan Text Premier",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "bedrock/us-gov-east-1/anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20240620-v1:0"
  },
  "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929-v1:0"
  },
  "bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/us-gov-east-1/meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/us-gov-west-1/amazon.nova-pro-v1:0": {
    "display_name": "Nova Pro",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "bedrock/us-gov-west-1/amazon.titan-text-express-v1": {
    "display_name": "Titan Text Express",
    "model_vendor": "amazon",
    "model_version": "v1"
  },
  "bedrock/us-gov-west-1/amazon.titan-text-lite-v1": {
    "display_name": "Titan Text Lite",
    "model_vendor": "amazon",
    "model_version": "v1"
  },
  "bedrock/us-gov-west-1/amazon.titan-text-premier-v1:0": {
    "display_name": "Titan Text Premier",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "bedrock/us-gov-west-1/anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20240620-v1:0"
  },
  "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929-v1:0"
  },
  "bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/us-gov-west-1/meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/us-west-1/meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/us-west-2/anthropic.claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-west-2/anthropic.claude-v1": {
    "display_name": "Claude",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "bedrock/us-west-2/anthropic.claude-v2:1": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "v2:1"
  },
  "bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2": {
    "display_name": "Mistral 7B Instruct",
    "model_vendor": "mistral",
    "model_version": "v0:2"
  },
  "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": {
    "display_name": "Mistral Large 24.02",
    "model_vendor": "mistral",
    "model_version": "2402-v1:0"
  },
  "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": {
    "display_name": "Mixtral 8x7B Instruct",
    "model_vendor": "mistral",
    "model_version": "v0:1"
  },
  "chat-bison": {
    "display_name": "Chat Bison",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "chat-bison-32k": {
    "display_name": "Chat Bison 32k",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "chat-bison-32k@002": {
    "display_name": "Chat Bison 32k",
    "model_vendor": "google",
    "model_version": "002"
  },
  "chat-bison@001": {
    "display_name": "Chat Bison",
    "model_vendor": "google",
    "model_version": "001"
  },
  "chat-bison@002": {
    "display_name": "Chat Bison",
    "model_vendor": "google",
    "model_version": "002"
  },
  "chatgpt-4o-latest": {
    "display_name": "ChatGPT 4o",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "claude-3-5-sonnet-20240620": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20240620"
  },
  "claude-3-5-sonnet-20241022": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20241022"
  },
  "claude-3-5-sonnet-latest": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "latest"
  },
  "claude-4-opus-20250514": {
    "display_name": "Claude Opus 4",
    "model_vendor": "anthropic",
    "model_version": "20250514"
  },
  "claude-4-sonnet-20250514": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic",
    "model_version": "20250514"
  },
  "claude-haiku-4-5": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic"
  },
  "claude-haiku-4-5-20251001": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001"
  },
  "claude-opus-4-1": {
    "display_name": "Claude Opus 4.1",
    "model_vendor": "anthropic"
  },
  "claude-opus-4-1-20250805": {
    "display_name": "Claude Opus 4.1",
    "model_vendor": "anthropic",
    "model_version": "20250805"
  },
  "claude-opus-4-1@20250805": {
    "display_name": "Claude Opus 4.1",
    "model_vendor": "anthropic",
    "model_version": "20250805"
  },
  "claude-opus-4-20250514": {
    "display_name": "Claude Opus 4",
    "model_vendor": "anthropic",
    "model_version": "20250514"
  },
  "claude-opus-4-5": {
    "display_name": "Claude Opus 4.5",
    "model_vendor": "anthropic"
  },
  "claude-opus-4-6": {
    "display_name": "Claude Opus 4.6",
    "model_vendor": "anthropic"
  },
  "claude-opus-4-5-20251101": {
    "display_name": "Claude Opus 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251101"
  },
  "claude-sonnet-4-20250514": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic",
    "model_version": "20250514"
  },
  "claude-sonnet-4-5": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic"
  },
  "claude-sonnet-4-6": {
    "display_name": "Claude Sonnet 4.6",
    "model_vendor": "anthropic"
  },
  "claude-sonnet-4-5-20250929": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929"
  },
  "claude-sonnet-4-5-20250929-v1:0": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929-v1:0"
  },
  "codechat-bison": {
    "display_name": "Codechat Bison",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "codechat-bison-32k": {
    "display_name": "Codechat Bison 32k",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "codechat-bison-32k@002": {
    "display_name": "Codechat Bison 32k",
    "model_vendor": "google",
    "model_version": "002"
  },
  "codechat-bison@001": {
    "display_name": "Codechat Bison",
    "model_vendor": "google",
    "model_version": "001"
  },
  "codechat-bison@002": {
    "display_name": "Codechat Bison",
    "model_vendor": "google",
    "model_version": "002"
  },
  "codechat-bison@latest": {
    "display_name": "Codechat Bison",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "codex-mini-latest": {
    "display_name": "Codex Mini",
    "model_vendor": "openai"
  },
  "cohere.command-light-text-v14": {
    "display_name": "Command Light Text",
    "model_vendor": "cohere",
    "model_version": "v14"
  },
  "cohere.command-r-plus-v1:0": {
    "display_name": "Command R Plus",
    "model_vendor": "cohere",
    "model_version": "v1:0"
  },
  "cohere.command-r-v1:0": {
    "display_name": "Command R",
    "model_vendor": "cohere",
    "model_version": "v1:0"
  },
  "cohere.command-text-v14": {
    "display_name": "Command Text",
    "model_vendor": "cohere",
    "model_version": "v14"
  },
  "computer-use-preview": {
    "display_name": "Computer Use Preview",
    "model_vendor": "anthropic",
    "model_version": "preview"
  },
  "deepseek.v3-v1:0": {
    "display_name": "DeepSeek V3",
    "model_vendor": "deepseek",
    "model_version": "v1:0"
  },
  "deepseek/deepseek-chat": {
    "display_name": "DeepSeek Chat",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "deepseek/deepseek-coder": {
    "display_name": "DeepSeek Coder",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "deepseek/deepseek-r1": {
    "display_name": "DeepSeek R1",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "deepseek/deepseek-reasoner": {
    "display_name": "DeepSeek Reasoner",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "deepseek/deepseek-v3": {
    "display_name": "DeepSeek V3",
    "model_vendor": "deepseek",
    "model_version": "v3"
  },
  "eu.amazon.nova-lite-v1:0": {
    "display_name": "Nova Lite",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "eu.amazon.nova-micro-v1:0": {
    "display_name": "Nova Micro",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "eu.amazon.nova-pro-v1:0": {
    "display_name": "Nova Pro",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20240620-v1:0"
  },
  "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20241022-v2:0"
  },
  "eu.anthropic.claude-3-sonnet-20240229-v1:0": {
    "display_name": "Claude Sonnet 3",
    "model_vendor": "anthropic",
    "model_version": "20240229-v1:0"
  },
  "eu.anthropic.claude-haiku-4-5-20251001-v1:0": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001-v1:0"
  },
  "eu.anthropic.claude-opus-4-1-20250805-v1:0": {
    "display_name": "Claude Opus 4.1",
    "model_vendor": "anthropic",
    "model_version": "20250805-v1:0"
  },
  "eu.anthropic.claude-opus-4-20250514-v1:0": {
    "display_name": "Claude Opus 4",
    "model_vendor": "anthropic",
    "model_version": "20250514-v1:0"
  },
  "eu.anthropic.claude-sonnet-4-20250514-v1:0": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic",
    "model_version": "20250514-v1:0"
  },
  "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929-v1:0"
  },
  "eu.meta.llama3-2-1b-instruct-v1:0": {
    "display_name": "Llama 3.2 1B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "eu.meta.llama3-2-3b-instruct-v1:0": {
    "display_name": "Llama 3.2 3B Instruct",
    "model_vendor": "meta",
    "model_version": "v1:0"
  },
  "eu.mistral.pixtral-large-2502-v1:0": {
    "display_name": "Pixtral Large 25.02",
    "model_vendor": "mistral",
    "model_version": "2502-v1:0"
  },
  "eu.twelvelabs.pegasus-1-2-v1:0": {
    "display_name": "Pegasus 1.2",
    "model_vendor": "twelvelabs",
    "model_version": "1.2-v1:0"
  },
  "ft:gpt-3.5-turbo": {
    "display_name": "Ft:gpt 3.5 Turbo",
    "model_vendor": "openai"
  },
  "ft:gpt-3.5-turbo-0125": {
    "display_name": "GPT-3.5 Turbo (Fine-tuned)",
    "model_vendor": "openai",
    "model_version": "0125"
  },
  "ft:gpt-3.5-turbo-0613": {
    "display_name": "GPT-3.5 Turbo (Fine-tuned)",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "ft:gpt-3.5-turbo-1106": {
    "display_name": "GPT-3.5 Turbo (Fine-tuned)",
    "model_vendor": "openai",
    "model_version": "1106"
  },
  "ft:gpt-4-0613": {
    "display_name": "GPT-4 (Fine-tuned)",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "ft:gpt-4o-2024-08-06": {
    "display_name": "GPT-4o (Fine-tuned)",
    "model_vendor": "openai",
    "model_version": "2024-08-06"
  },
  "ft:gpt-4o-2024-11-20": {
    "display_name": "GPT-4o (Fine-tuned)",
    "model_vendor": "openai",
    "model_version": "2024-11-20"
  },
  "ft:gpt-4o-mini-2024-07-18": {
    "display_name": "GPT-4o Mini (Fine-tuned)",
    "model_vendor": "openai",
    "model_version": "2024-07-18"
  },
  "gemini-1.0-pro": {
    "display_name": "Gemini 1.0 Pro",
    "model_vendor": "google"
  },
  "gemini-1.0-pro-001": {
    "display_name": "Gemini 1.0 Pro 001",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini-1.0-pro-002": {
    "display_name": "Gemini 1.0 Pro 002",
    "model_vendor": "google",
    "model_version": "002"
  },
  "gemini-1.0-ultra": {
    "display_name": "Gemini 1.0 Ultra",
    "model_vendor": "google"
  },
  "gemini-1.0-ultra-001": {
    "display_name": "Gemini 1.0 Ultra 001",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini-1.5-flash": {
    "display_name": "Gemini 1.5 Flash",
    "model_vendor": "google"
  },
  "gemini-1.5-flash-001": {
    "display_name": "Gemini 1.5 Flash 001",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini-1.5-flash-002": {
    "display_name": "Gemini 1.5 Flash 002",
    "model_vendor": "google",
    "model_version": "002"
  },
  "gemini-1.5-flash-exp-0827": {
    "display_name": "Gemini 1.5 Flash Exp 0827",
    "model_vendor": "google",
    "model_version": "0827"
  },
  "gemini-1.5-flash-preview-0514": {
    "display_name": "Gemini 1.5 Flash Preview 0514",
    "model_vendor": "google",
    "model_version": "0514"
  },
  "gemini-1.5-pro": {
    "display_name": "Gemini 1.5 Pro",
    "model_vendor": "google"
  },
  "gemini-1.5-pro-001": {
    "display_name": "Gemini 1.5 Pro 001",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini-1.5-pro-002": {
    "display_name": "Gemini 1.5 Pro 002",
    "model_vendor": "google",
    "model_version": "002"
  },
  "gemini-1.5-pro-preview-0215": {
    "display_name": "Gemini 1.5 Pro Preview 0215",
    "model_vendor": "google",
    "model_version": "0215"
  },
  "gemini-1.5-pro-preview-0409": {
    "display_name": "Gemini 1.5 Pro Preview 0409",
    "model_vendor": "google",
    "model_version": "0409"
  },
  "gemini-1.5-pro-preview-0514": {
    "display_name": "Gemini 1.5 Pro Preview 0514",
    "model_vendor": "google",
    "model_version": "0514"
  },
  "gemini-2.0-flash": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google"
  },
  "gemini-2.0-flash-001": {
    "display_name": "Gemini 2.0 Flash 001",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini-2.0-flash-exp": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google"
  },
  "gemini-2.0-flash-lite": {
    "display_name": "Gemini 2.0 Flash Lite",
    "model_vendor": "google"
  },
  "gemini-2.0-flash-lite-001": {
    "display_name": "Gemini 2.0 Flash Lite 001",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini-2.0-flash-live-preview-04-09": {
    "display_name": "Gemini 2.0 Flash Live Preview 04 09",
    "model_vendor": "google"
  },
  "gemini-2.0-flash-thinking-exp": {
    "display_name": "Gemini 2.0 Flash Thinking",
    "model_vendor": "google"
  },
  "gemini-2.0-flash-thinking-exp-01-21": {
    "display_name": "Gemini 2.0 Flash Thinking Exp 01 21",
    "model_vendor": "google"
  },
  "gemini-2.0-pro-exp-02-05": {
    "display_name": "Gemini 2.0 Pro Exp 02 05",
    "model_vendor": "google"
  },
  "gemini-2.5-flash": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "gemini-2.5-flash-lite": {
    "display_name": "Gemini 2.5 Flash Lite",
    "model_vendor": "google"
  },
  "gemini-2.5-flash-lite-preview-06-17": {
    "display_name": "Gemini 2.5 Flash Lite Preview 06 17",
    "model_vendor": "google"
  },
  "gemini-2.5-flash-lite-preview-09-2025": {
    "display_name": "Gemini 2.5 Flash Lite Preview 09 2025",
    "model_vendor": "google",
    "model_version": "2025"
  },
  "gemini-2.5-flash-preview-04-17": {
    "display_name": "Gemini 2.5 Flash Preview 04 17",
    "model_vendor": "google"
  },
  "gemini-2.5-flash-preview-05-20": {
    "display_name": "Gemini 2.5 Flash Preview 05 20",
    "model_vendor": "google"
  },
  "gemini-2.5-flash-preview-09-2025": {
    "display_name": "Gemini 2.5 Flash Preview 09 2025",
    "model_vendor": "google",
    "model_version": "2025"
  },
  "gemini-2.5-pro": {
    "display_name": "Gemini 2.5 Pro",
    "model_vendor": "google"
  },
  "gemini-2.5-pro-exp-03-25": {
    "display_name": "Gemini 2.5 Pro Exp 03 25",
    "model_vendor": "google"
  },
  "gemini-2.5-pro-preview-03-25": {
    "display_name": "Gemini 2.5 Pro Preview 03 25",
    "model_vendor": "google"
  },
  "gemini-2.5-pro-preview-05-06": {
    "display_name": "Gemini 2.5 Pro Preview 05 06",
    "model_vendor": "google"
  },
  "gemini-2.5-pro-preview-06-05": {
    "display_name": "Gemini 2.5 Pro Preview 06 05",
    "model_vendor": "google"
  },
  "gemini-3-pro-preview": {
    "display_name": "Gemini 3 Pro Preview",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "gemini-3-flash-preview": {
    "display_name": "Gemini 3 Flash Preview",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "gemini-flash-experimental": {
    "display_name": "Gemini Flash Experimental",
    "model_vendor": "google",
    "model_version": "experimental"
  },
  "gemini-pro": {
    "display_name": "Gemini Pro",
    "model_vendor": "google"
  },
  "gemini-pro-experimental": {
    "display_name": "Gemini Pro Experimental",
    "model_vendor": "google"
  },
  "gemini/gemini-1.5-flash": {
    "display_name": "Gemini 1.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-1.5-flash-001": {
    "display_name": "Gemini 1.5 Flash",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/gemini-1.5-flash-002": {
    "display_name": "Gemini 1.5 Flash",
    "model_vendor": "google",
    "model_version": "002"
  },
  "gemini/gemini-1.5-flash-8b": {
    "display_name": "Gemini 1.5 Flash 8B",
    "model_vendor": "google"
  },
  "gemini/gemini-1.5-flash-8b-exp-0827": {
    "display_name": "Gemini 1.5 Flash 8B",
    "model_vendor": "google",
    "model_version": "0827"
  },
  "gemini/gemini-1.5-flash-8b-exp-0924": {
    "display_name": "Gemini 1.5 Flash 8B",
    "model_vendor": "google",
    "model_version": "0924"
  },
  "gemini/gemini-1.5-flash-exp-0827": {
    "display_name": "Gemini 1.5 Flash",
    "model_vendor": "google",
    "model_version": "0827"
  },
  "gemini/gemini-1.5-flash-latest": {
    "display_name": "Gemini 1.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-1.5-pro": {
    "display_name": "Gemini 1.5 Pro",
    "model_vendor": "google"
  },
  "gemini/gemini-1.5-pro-001": {
    "display_name": "Gemini 1.5 Pro",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/gemini-1.5-pro-002": {
    "display_name": "Gemini 1.5 Pro",
    "model_vendor": "google",
    "model_version": "002"
  },
  "gemini/gemini-1.5-pro-exp-0801": {
    "display_name": "Gemini 1.5 Pro",
    "model_vendor": "google",
    "model_version": "0801"
  },
  "gemini/gemini-1.5-pro-exp-0827": {
    "display_name": "Gemini 1.5 Pro",
    "model_vendor": "google",
    "model_version": "0827"
  },
  "gemini/gemini-1.5-pro-latest": {
    "display_name": "Gemini 1.5 Pro",
    "model_vendor": "google"
  },
  "gemini/gemini-2.0-flash": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.0-flash-001": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/gemini-2.0-flash-exp": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.0-flash-lite": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.0-flash-lite-preview-02-05": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.0-flash-live-001": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/gemini-2.0-flash-preview-image-generation": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.0-flash-thinking-exp": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.0-flash-thinking-exp-01-21": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.0-pro-exp-02-05": {
    "display_name": "Gemini 2.0",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-flash": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-flash-image": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-flash-image-preview": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-flash-lite": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-flash-lite-preview-06-17": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-flash-lite-preview-09-2025": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google",
    "model_version": "2025"
  },
  "gemini/gemini-2.5-flash-preview-04-17": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-flash-preview-05-20": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-flash-preview-09-2025": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google",
    "model_version": "2025"
  },
  "gemini/gemini-2.5-flash-preview-tts": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-pro": {
    "display_name": "Gemini 2.5 Pro",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-pro-exp-03-25": {
    "display_name": "Gemini 2.5 Pro",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-pro-preview-03-25": {
    "display_name": "Gemini 2.5 Pro",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-pro-preview-05-06": {
    "display_name": "Gemini 2.5 Pro",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-pro-preview-06-05": {
    "display_name": "Gemini 2.5 Pro",
    "model_vendor": "google"
  },
  "gemini/gemini-2.5-pro-preview-tts": {
    "display_name": "Gemini 2.5 Pro",
    "model_vendor": "google"
  },
  "gemini/gemini-3-pro-image-preview": {
    "display_name": "Gemini 1.0 Pro",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "gemini/gemini-3-pro-preview": {
    "display_name": "Gemini 1.0 Pro",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "gemini/gemini-embedding-001": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/gemini-exp-1114": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "experimental"
  },
  "gemini/gemini-exp-1206": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "experimental"
  },
  "gemini/gemini-flash-latest": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "gemini/gemini-flash-lite-latest": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "gemini/gemini-gemma-2-27b-it": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "gemini/gemini-gemma-2-9b-it": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "gemini/gemini-live-2.5-flash-preview-native-audio-09-2025": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "gemini/gemini-pro": {
    "display_name": "Gemini 1.0 Pro",
    "model_vendor": "google"
  },
  "gemini/gemini-pro-vision": {
    "display_name": "Gemini 1.0 Pro",
    "model_vendor": "google"
  },
  "gemini/gemma-3-27b-it": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "gemini/imagen-3.0-fast-generate-001": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/imagen-3.0-generate-001": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/imagen-3.0-generate-002": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "002"
  },
  "gemini/imagen-4.0-fast-generate-001": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/imagen-4.0-generate-001": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/imagen-4.0-ultra-generate-001": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/learnlm-1.5-pro-experimental": {
    "display_name": "Gemini 1.5 Pro",
    "model_vendor": "google",
    "model_version": "experimental"
  },
  "gemini/veo-2.0-generate-001": {
    "display_name": "Gemini 2.0",
    "model_vendor": "google",
    "model_version": "001"
  },
  "gemini/veo-3.0-fast-generate-preview": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "gemini/veo-3.0-generate-preview": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "gemini/veo-3.1-fast-generate-preview": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "gemini/veo-3.1-generate-preview": {
    "display_name": "Gemini",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "global.anthropic.claude-haiku-4-5-20251001-v1:0": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001"
  },
  "global.anthropic.claude-sonnet-4-20250514-v1:0": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic",
    "model_version": "20250514"
  },
  "global.anthropic.claude-sonnet-4-5-20250929-v1:0": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929"
  },
  "gpt-3.5-turbo": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai"
  },
  "gpt-3.5-turbo-0125": {
    "display_name": "GPT 3.5 Turbo 0125",
    "model_vendor": "openai",
    "model_version": "0125"
  },
  "gpt-3.5-turbo-0301": {
    "display_name": "GPT 3.5 Turbo 0301",
    "model_vendor": "openai",
    "model_version": "0301"
  },
  "gpt-3.5-turbo-0613": {
    "display_name": "GPT 3.5 Turbo 0613",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "gpt-3.5-turbo-1106": {
    "display_name": "GPT 3.5 Turbo 1106",
    "model_vendor": "openai",
    "model_version": "1106"
  },
  "gpt-3.5-turbo-16k": {
    "display_name": "GPT-3.5 Turbo 16K",
    "model_vendor": "openai"
  },
  "gpt-3.5-turbo-16k-0613": {
    "display_name": "GPT 3.5 Turbo 16k 0613",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "gpt-4": {
    "display_name": "GPT-4",
    "model_vendor": "openai"
  },
  "gpt-4-0125-preview": {
    "display_name": "GPT-4 Preview",
    "model_vendor": "openai",
    "model_version": "0125"
  },
  "gpt-4-0314": {
    "display_name": "GPT-4",
    "model_vendor": "openai",
    "model_version": "0314"
  },
  "gpt-4-0613": {
    "display_name": "GPT-4",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "gpt-4-1106-preview": {
    "display_name": "GPT-4 Preview",
    "model_vendor": "openai",
    "model_version": "1106"
  },
  "gpt-4-1106-vision-preview": {
    "display_name": "GPT-4 Vision Preview",
    "model_vendor": "openai",
    "model_version": "1106"
  },
  "gpt-4-32k": {
    "display_name": "GPT-4 32K",
    "model_vendor": "openai"
  },
  "gpt-4-32k-0314": {
    "display_name": "GPT-4 32K",
    "model_vendor": "openai",
    "model_version": "0314"
  },
  "gpt-4-32k-0613": {
    "display_name": "GPT-4 32K",
    "model_vendor": "openai",
    "model_version": "0613"
  },
  "gpt-4-turbo": {
    "display_name": "GPT-4 Turbo",
    "model_vendor": "openai"
  },
  "gpt-4-turbo-2024-04-09": {
    "display_name": "GPT-4 Turbo",
    "model_vendor": "openai",
    "model_version": "2024-04-09"
  },
  "gpt-4-turbo-preview": {
    "display_name": "GPT-4 Turbo Preview",
    "model_vendor": "openai"
  },
  "gpt-4-vision-preview": {
    "display_name": "GPT-4 Vision Preview",
    "model_vendor": "openai"
  },
  "gpt-4.1": {
    "display_name": "GPT-4.1",
    "model_vendor": "openai"
  },
  "gpt-4.1-2025-04-14": {
    "display_name": "GPT-4.1",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "gpt-4.1-mini": {
    "display_name": "GPT-4.1 Mini",
    "model_vendor": "openai"
  },
  "gpt-4.1-mini-2025-04-14": {
    "display_name": "GPT-4.1 Mini",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "gpt-4.1-nano": {
    "display_name": "GPT-4.1 Nano",
    "model_vendor": "openai"
  },
  "gpt-4.1-nano-2025-04-14": {
    "display_name": "GPT-4.1 Nano",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "gpt-4.5-preview": {
    "display_name": "GPT-4.5 Preview",
    "model_vendor": "openai"
  },
  "gpt-4.5-preview-2025-02-27": {
    "display_name": "GPT-4.5 Preview",
    "model_vendor": "openai",
    "model_version": "2025-02-27"
  },
  "gpt-4o": {
    "display_name": "GPT-4o",
    "model_vendor": "openai"
  },
  "gpt-4o-2024-05-13": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-05-13"
  },
  "gpt-4o-2024-08-06": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-08-06"
  },
  "gpt-4o-2024-11-20": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-11-20"
  },
  "gpt-4o-audio-preview": {
    "display_name": "GPT-4o Audio Preview",
    "model_vendor": "openai"
  },
  "gpt-4o-audio-preview-2024-10-01": {
    "display_name": "GPT-4o Audio Preview",
    "model_vendor": "openai",
    "model_version": "2024-10-01"
  },
  "gpt-4o-audio-preview-2024-12-17": {
    "display_name": "GPT-4o Audio Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "gpt-4o-audio-preview-2025-06-03": {
    "display_name": "GPT-4o Audio Preview",
    "model_vendor": "openai",
    "model_version": "2025-06-03"
  },
  "gpt-4o-mini": {
    "display_name": "GPT-4o Mini",
    "model_vendor": "openai"
  },
  "gpt-4o-mini-2024-07-18": {
    "display_name": "GPT-4o Mini",
    "model_vendor": "openai",
    "model_version": "2024-07-18"
  },
  "gpt-4o-mini-audio-preview": {
    "display_name": "GPT-4o Mini Audio Preview",
    "model_vendor": "openai"
  },
  "gpt-4o-mini-audio-preview-2024-12-17": {
    "display_name": "GPT-4o Mini Audio Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "gpt-4o-mini-realtime-preview": {
    "display_name": "GPT-4o Mini Realtime Preview",
    "model_vendor": "openai"
  },
  "gpt-4o-mini-realtime-preview-2024-12-17": {
    "display_name": "GPT-4o Mini Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "gpt-4o-mini-search-preview": {
    "display_name": "GPT 4o Mini Search Preview",
    "model_vendor": "openai"
  },
  "gpt-4o-mini-search-preview-2025-03-11": {
    "display_name": "GPT 4o Mini Search Preview",
    "model_vendor": "openai",
    "model_version": "2025-03-11"
  },
  "gpt-4o-realtime-preview": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai"
  },
  "gpt-4o-realtime-preview-2024-10-01": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-10-01"
  },
  "gpt-4o-realtime-preview-2024-12-17": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "gpt-4o-realtime-preview-2025-06-03": {
    "display_name": "GPT-4o Realtime Preview",
    "model_vendor": "openai",
    "model_version": "2025-06-03"
  },
  "gpt-4o-search-preview": {
    "display_name": "GPT 4o Search Preview",
    "model_vendor": "openai"
  },
  "gpt-4o-search-preview-2025-03-11": {
    "display_name": "GPT 4o Search Preview",
    "model_vendor": "openai",
    "model_version": "2025-03-11"
  },
  "gpt-5": {
    "display_name": "GPT-5",
    "model_vendor": "openai"
  },
  "gpt-5-2025-08-07": {
    "display_name": "GPT-5",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "gpt-5-chat": {
    "display_name": "GPT 5 Chat",
    "model_vendor": "openai"
  },
  "gpt-5-chat-latest": {
    "display_name": "GPT 5 Chat",
    "model_vendor": "openai"
  },
  "gpt-5-codex": {
    "display_name": "GPT-5 Codex",
    "model_vendor": "openai"
  },
  "gpt-5-mini": {
    "display_name": "GPT-5 Mini",
    "model_vendor": "openai"
  },
  "gpt-5-mini-2025-08-07": {
    "display_name": "GPT-5 Mini",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "gpt-5-nano": {
    "display_name": "GPT 5 Nano",
    "model_vendor": "openai"
  },
  "gpt-5-nano-2025-08-07": {
    "display_name": "GPT 5 Nano",
    "model_vendor": "openai",
    "model_version": "2025-08-07"
  },
  "gpt-5-pro": {
    "display_name": "GPT-5 Pro",
    "model_vendor": "openai"
  },
  "gpt-5-pro-2025-10-06": {
    "display_name": "GPT-5 Pro",
    "model_vendor": "openai",
    "model_version": "2025-10-06"
  },
  "gpt-5.4": {
    "display_name": "GPT-5.4",
    "model_vendor": "openai"
  },
  "gpt-5.2-pro-2025-12-11": {
    "display_name": "GPT-5.2 Pro",
    "model_vendor": "openai",
    "model_version": "2025-12-11"
  },
  "gpt-5.2-pro": {
    "display_name": "GPT-5.2 Pro",
    "model_vendor": "openai"
  },
  "gpt-5.2-chat-latest": {
    "display_name": "GPT 5.2 Chat",
    "model_vendor": "openai"
  },
  "gpt-5.2-2025-12-11": {
    "display_name": "GPT 5.2",
    "model_vendor": "openai",
    "model_version": "2025-12-11"
  },
  "gpt-5.2": {
    "display_name": "GPT 5.2",
    "model_vendor": "openai"
  },
  "gpt-5.1": {
    "display_name": "GPT 5.1",
    "model_vendor": "openai"
  },
  "gpt-5.1-2025-11-13": {
    "display_name": "GPT 5.1",
    "model_vendor": "openai",
    "model_version": "2025-11-13"
  },
  "gpt-5.1-chat-latest": {
    "display_name": "GPT 5.1 Chat",
    "model_vendor": "openai"
  },
  "gpt-5.1-codex": {
    "display_name": "GPT-5.1 Codex",
    "model_vendor": "openai"
  },
  "gpt-5.1-codex-mini": {
    "display_name": "GPT-5.1 Codex Mini",
    "model_vendor": "openai"
  },
  "gpt-image-1-mini": {
    "display_name": "GPT Image 1 Mini",
    "model_vendor": "openai"
  },
  "gpt-realtime": {
    "display_name": "GPT Realtime",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "gpt-realtime-2025-08-28": {
    "display_name": "GPT Realtime",
    "model_vendor": "openai",
    "model_version": "2025-08-28"
  },
  "gpt-realtime-mini": {
    "display_name": "GPT Realtime Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "jp.anthropic.claude-haiku-4-5-20251001-v1:0": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001"
  },
  "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929"
  },
  "medlm-large": {
    "display_name": "MedLM Large",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "medlm-medium": {
    "display_name": "MedLM Medium",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "meta.llama2-13b-chat-v1": {
    "display_name": "Llama 2 13B Chat",
    "model_vendor": "meta",
    "model_version": "v1"
  },
  "meta.llama2-70b-chat-v1": {
    "display_name": "Llama 2 70B Chat",
    "model_vendor": "meta",
    "model_version": "v1"
  },
  "meta.llama3-1-405b-instruct-v1:0": {
    "display_name": "Llama 3.1 405B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama3-1-70b-instruct-v1:0": {
    "display_name": "Llama 3.1 70B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama3-1-8b-instruct-v1:0": {
    "display_name": "Llama 3.1 8B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama3-2-11b-instruct-v1:0": {
    "display_name": "Llama 3.2 11B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama3-2-1b-instruct-v1:0": {
    "display_name": "Llama 3.2 1B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama3-2-3b-instruct-v1:0": {
    "display_name": "Llama 3.2 3B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama3-2-90b-instruct-v1:0": {
    "display_name": "Llama 3.2 90B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama3-3-70b-instruct-v1:0": {
    "display_name": "Llama 3.3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama3-70b-instruct-v1:0": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama3-8b-instruct-v1:0": {
    "display_name": "Llama 3 8B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama4-maverick-17b-instruct-v1:0": {
    "display_name": "Llama 4 Maverick 17B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "meta.llama4-scout-17b-instruct-v1:0": {
    "display_name": "Llama 4 Scout 17B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "mistral.mistral-7b-instruct-v0:2": {
    "display_name": "Mistral 7B Instruct",
    "model_vendor": "mistral",
    "model_version": "v0:2"
  },
  "mistral.mistral-large-2402-v1:0": {
    "display_name": "Mistral Large 24.02",
    "model_vendor": "mistral",
    "model_version": "1:0"
  },
  "mistral.mistral-large-2407-v1:0": {
    "display_name": "Mistral Large 24.07",
    "model_vendor": "mistral",
    "model_version": "1:0"
  },
  "mistral.mistral-small-2402-v1:0": {
    "display_name": "Mistral Small 24.02",
    "model_vendor": "mistral",
    "model_version": "1:0"
  },
  "mistral.mixtral-8x7b-instruct-v0:1": {
    "display_name": "Mixtral 8x7B Instruct",
    "model_vendor": "mistral",
    "model_version": "0:1"
  },
  "mistral/codestral-2405": {
    "display_name": "Codestral",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/codestral-embed": {
    "display_name": "Codestral Embed",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/codestral-embed-2505": {
    "display_name": "Codestral Embed",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/codestral-latest": {
    "display_name": "Codestral",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/codestral-mamba-latest": {
    "display_name": "Codestral Mamba",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/devstral-medium-2507": {
    "display_name": "Devstral Medium",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/devstral-small-2505": {
    "display_name": "Devstral Small",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/devstral-small-2507": {
    "display_name": "Devstral Small",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/magistral-medium-2506": {
    "display_name": "Magistral Medium",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/magistral-medium-2509": {
    "display_name": "Magistral Medium",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/magistral-medium-latest": {
    "display_name": "Magistral Medium",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/magistral-small-2506": {
    "display_name": "Magistral Small",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/magistral-small-latest": {
    "display_name": "Magistral Small",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-embed": {
    "display_name": "Mistral Embed",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-large-2402": {
    "display_name": "Mistral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-large-2407": {
    "display_name": "Mistral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-large-2411": {
    "display_name": "Mistral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-large-latest": {
    "display_name": "Mistral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-medium": {
    "display_name": "Mistral Medium",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-medium-2312": {
    "display_name": "Mistral Medium",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-medium-2505": {
    "display_name": "Mistral Medium",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-medium-latest": {
    "display_name": "Mistral Medium",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-ocr-2505-completion": {
    "display_name": "Mistral",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-ocr-latest": {
    "display_name": "Mistral",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-small": {
    "display_name": "Mistral Small",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-small-latest": {
    "display_name": "Mistral Small",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/mistral-tiny": {
    "display_name": "Mistral Tiny",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/open-codestral-mamba": {
    "display_name": "Codestral Mamba",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/open-mistral-7b": {
    "display_name": "Open Mistral",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/open-mistral-nemo": {
    "display_name": "Open Mistral Nemo",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/open-mistral-nemo-2407": {
    "display_name": "Open Mistral Nemo",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/open-mixtral-8x22b": {
    "display_name": "Open Mixtral 8x22B",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/open-mixtral-8x7b": {
    "display_name": "Open Mixtral 8x7B",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/pixtral-12b-2409": {
    "display_name": "Pixtral",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/pixtral-large-2411": {
    "display_name": "Pixtral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "mistral/pixtral-large-latest": {
    "display_name": "Pixtral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "o1": {
    "display_name": "o1",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o1-2024-12-17": {
    "display_name": "o1",
    "model_vendor": "openai",
    "model_version": "2024-12-17"
  },
  "o1-mini": {
    "display_name": "o1 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o1-mini-2024-09-12": {
    "display_name": "o1 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o1-preview": {
    "display_name": "o1 Preview",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o1-preview-2024-09-12": {
    "display_name": "o1 Preview",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o1-pro": {
    "display_name": "o1 Pro",
    "model_vendor": "openai"
  },
  "o1-pro-2025-03-19": {
    "display_name": "o1 Pro",
    "model_vendor": "openai",
    "model_version": "2025-03-19"
  },
  "o3": {
    "display_name": "o3",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o3-2025-04-16": {
    "display_name": "o3",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o3-deep-research": {
    "display_name": "o3 Deep Research",
    "model_vendor": "openai"
  },
  "o3-deep-research-2025-06-26": {
    "display_name": "o3 Deep Research",
    "model_vendor": "openai",
    "model_version": "2025-06-26"
  },
  "o3-mini": {
    "display_name": "o3 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o3-mini-2025-01-31": {
    "display_name": "o3 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o3-pro": {
    "display_name": "o3 Pro",
    "model_vendor": "openai"
  },
  "o3-pro-2025-06-10": {
    "display_name": "o3 Pro",
    "model_vendor": "openai",
    "model_version": "2025-06-10"
  },
  "o4-mini": {
    "display_name": "o4 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o4-mini-2025-04-16": {
    "display_name": "o4 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "o4-mini-deep-research": {
    "display_name": "o4 Mini Deep Research",
    "model_vendor": "openai"
  },
  "o4-mini-deep-research-2025-06-26": {
    "display_name": "o4 Mini Deep Research",
    "model_vendor": "openai",
    "model_version": "2025-06-26"
  },
  "ollama/codegeex4": {
    "display_name": "CodeGeeX4",
    "model_vendor": "zhipu",
    "model_version": "latest"
  },
  "ollama/codegemma": {
    "display_name": "Codegemma",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "ollama/codellama": {
    "display_name": "CodeLlama",
    "model_vendor": "meta",
    "model_version": "latest"
  },
  "ollama/deepseek-coder-v2-base": {
    "display_name": "DeepSeek Coder v2 Base",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "ollama/deepseek-coder-v2-instruct": {
    "display_name": "DeepSeek Coder v2 Instruct",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "ollama/deepseek-coder-v2-lite-base": {
    "display_name": "DeepSeek Coder v2 Lite Base",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "ollama/deepseek-coder-v2-lite-instruct": {
    "display_name": "DeepSeek Coder v2 Lite Instruct",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "ollama/deepseek-v3.1:671b-cloud": {
    "display_name": "DeepSeek V3.1:671B Cloud",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "ollama/gpt-oss:120b-cloud": {
    "display_name": "GPT Open-Source 120B",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "ollama/gpt-oss:20b-cloud": {
    "display_name": "GPT Open-Source 20B",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "ollama/internlm2_5-20b-chat": {
    "display_name": "InternLM 2.5 20B Chat",
    "model_vendor": "shanghai-ai-lab",
    "model_version": "latest"
  },
  "ollama/llama2": {
    "display_name": "Llama 2",
    "model_vendor": "meta"
  },
  "ollama/llama2-uncensored": {
    "display_name": "Llama 2 Uncensored",
    "model_vendor": "meta"
  },
  "ollama/llama2:13b": {
    "display_name": "Llama 2:13B",
    "model_vendor": "meta"
  },
  "ollama/llama2:70b": {
    "display_name": "Llama 2:70B",
    "model_vendor": "meta"
  },
  "ollama/llama2:7b": {
    "display_name": "Llama 2:7B",
    "model_vendor": "meta"
  },
  "ollama/llama3": {
    "display_name": "Llama 3",
    "model_vendor": "meta"
  },
  "ollama/llama3.1": {
    "display_name": "Llama 3.1",
    "model_vendor": "meta"
  },
  "ollama/llama3:70b": {
    "display_name": "Llama 3:70B",
    "model_vendor": "meta"
  },
  "ollama/llama3:8b": {
    "display_name": "Llama 3:8B",
    "model_vendor": "meta"
  },
  "ollama/mistral": {
    "display_name": "Mistral",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "ollama/mistral-7B-Instruct-v0.1": {
    "display_name": "Mistral 7B Instruct",
    "model_vendor": "mistral",
    "model_version": "v0.1"
  },
  "ollama/mistral-7B-Instruct-v0.2": {
    "display_name": "Mistral 7B Instruct",
    "model_vendor": "mistral",
    "model_version": "v0.2"
  },
  "ollama/mistral-large-instruct-2407": {
    "display_name": "Mistral Large Instruct 24.07",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "ollama/mixtral-8x22B-Instruct-v0.1": {
    "display_name": "Mixtral 8x22B Instruct V0.1",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "ollama/mixtral-8x7B-Instruct-v0.1": {
    "display_name": "Mixtral 8x7B Instruct V0.1",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "ollama/orca-mini": {
    "display_name": "Orca Mini",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "ollama/qwen3-coder:480b-cloud": {
    "display_name": "Qwen3 Coder:480B Cloud",
    "model_vendor": "alibaba",
    "model_version": "latest"
  },
  "ollama/vicuna": {
    "display_name": "Vicuna",
    "model_vendor": "lmsys",
    "model_version": "latest"
  },
  "openai.gpt-oss-120b-1:0": {
    "display_name": "GPT Open-Source 120B",
    "model_vendor": "openai",
    "model_version": "v1:0"
  },
  "openai.gpt-oss-20b-1:0": {
    "display_name": "GPT Open-Source 20B",
    "model_vendor": "openai",
    "model_version": "v1:0"
  },
  "openai/container": {
    "display_name": "Container",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/agentica-org/deepcoder-14b-preview": {
    "display_name": "DeepCoder 14B Preview",
    "model_vendor": "agentica"
  },
  "openrouter/ai21/jamba-1-5-large": {
    "display_name": "Jamba 1.5 Large",
    "model_vendor": "ai21"
  },
  "openrouter/ai21/jamba-1-5-mini": {
    "display_name": "Jamba 1.5 Mini",
    "model_vendor": "ai21"
  },
  "openrouter/ai21/jamba-large-1.7": {
    "display_name": "Jamba Large 1.7",
    "model_vendor": "ai21"
  },
  "openrouter/aion-labs/aion-1.0": {
    "display_name": "AION 1.0",
    "model_vendor": "aion-labs"
  },
  "openrouter/alibaba/qwen-2.5-72b-instruct": {
    "display_name": "Qwen 2.5 72B Instruct",
    "model_vendor": "alibaba"
  },
  "openrouter/alibaba/qwen-2.5-coder-32b-instruct": {
    "display_name": "Qwen 2.5 Coder 32B",
    "model_vendor": "alibaba"
  },
  "openrouter/alibaba/tongyi-deepresearch-30b-a3b": {
    "display_name": "Tongyi DeepResearch 30B",
    "model_vendor": "alibaba"
  },
  "openrouter/alibaba/tongyi-deepresearch-30b-a3b:free": {
    "display_name": "Tongyi DeepResearch 30B (Free)",
    "model_vendor": "alibaba"
  },
  "openrouter/anthropic/claude-2": {
    "display_name": "Claude 2",
    "model_vendor": "anthropic",
    "model_version": "latest"
  },
  "openrouter/anthropic/claude-3-sonnet": {
    "display_name": "Claude Sonnet 3",
    "model_vendor": "anthropic"
  },
  "openrouter/anthropic/claude-3.5-sonnet": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "latest"
  },
  "openrouter/anthropic/claude-3.5-sonnet:beta": {
    "display_name": "Claude Sonnet 3.5:beta",
    "model_vendor": "anthropic",
    "model_version": "latest"
  },
  "openrouter/anthropic/claude-haiku-4.5": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "latest"
  },
  "openrouter/anthropic/claude-instant-v1": {
    "display_name": "Claude Instant",
    "model_vendor": "anthropic",
    "model_version": "v1"
  },
  "openrouter/anthropic/claude-opus-4": {
    "display_name": "Claude Opus 4",
    "model_vendor": "anthropic"
  },
  "openrouter/anthropic/claude-opus-4.1": {
    "display_name": "Claude Opus 4.1",
    "model_vendor": "anthropic"
  },
  "openrouter/anthropic/claude-opus-4.5": {
    "display_name": "Claude Opus 4.5",
    "model_vendor": "anthropic"
  },
  "openrouter/anthropic/claude-sonnet-4": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic"
  },
  "openrouter/anthropic/claude-sonnet-4.5": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic"
  },
  "openrouter/baidu/ernie-4.5-300b-a47b": {
    "display_name": "ERNIE 4.5 300B",
    "model_vendor": "baidu"
  },
  "openrouter/baidu/ernie-4.5-vl-28b-a3b": {
    "display_name": "ERNIE 4.5 VL 28B",
    "model_vendor": "baidu"
  },
  "openrouter/bytedance/ui-tars-1.5-7b": {
    "display_name": "UI-TARS 1.5 7B",
    "model_vendor": "bytedance",
    "model_version": "latest"
  },
  "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": {
    "display_name": "Dolphin Mixtral 8x7B",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "openrouter/cohere/command-a": {
    "display_name": "Command A",
    "model_vendor": "cohere"
  },
  "openrouter/cohere/command-r": {
    "display_name": "Command R",
    "model_vendor": "cohere"
  },
  "openrouter/cohere/command-r-08-2024": {
    "display_name": "Command R",
    "model_vendor": "cohere",
    "model_version": "08-2024"
  },
  "openrouter/cohere/command-r-plus": {
    "display_name": "Command R Plus",
    "model_vendor": "cohere"
  },
  "openrouter/cohere/command-r-plus-08-2024": {
    "display_name": "Command R Plus",
    "model_vendor": "cohere",
    "model_version": "08-2024"
  },
  "openrouter/databricks/dbrx-instruct": {
    "display_name": "DBRX Instruct",
    "model_vendor": "databricks",
    "model_version": "latest"
  },
  "openrouter/deepcogito/cogito-v2-preview-deepseek-671b": {
    "display_name": "Cogito V2 Preview DeepSeek 671B",
    "model_vendor": "deepcogito"
  },
  "openrouter/deepcogito/cogito-v2-preview-llama-109b-moe": {
    "display_name": "Cogito V2 Preview Llama 109B MoE",
    "model_vendor": "deepcogito"
  },
  "openrouter/deepseek/deepseek-chat": {
    "display_name": "DeepSeek Chat",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "openrouter/deepseek/deepseek-chat-v3-0324": {
    "display_name": "DeepSeek Chat v3 0324",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "openrouter/deepseek/deepseek-chat-v3.1": {
    "display_name": "DeepSeek Chat V3.1",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "openrouter/deepseek/deepseek-coder": {
    "display_name": "DeepSeek Coder",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "openrouter/deepseek/deepseek-r1": {
    "display_name": "DeepSeek R1",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "openrouter/deepseek/deepseek-r1-0528": {
    "display_name": "DeepSeek R1 0528",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "openrouter/deepseek/deepseek-v3.2-exp": {
    "display_name": "DeepSeek V3.2",
    "model_vendor": "deepseek",
    "model_version": "experimental"
  },
  "openrouter/fireworks/firellava-13b": {
    "display_name": "FireLLaVA 13B",
    "model_vendor": "fireworks",
    "model_version": "latest"
  },
  "openrouter/google/gemini-2.0-flash-001": {
    "display_name": "Gemini 2.0 Flash",
    "model_vendor": "google",
    "model_version": "001"
  },
  "openrouter/google/gemini-2.5-flash": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "openrouter/google/gemini-2.5-pro": {
    "display_name": "Gemini 2.5 Pro",
    "model_vendor": "google"
  },
  "openrouter/google/gemini-3-pro-preview": {
    "display_name": "Gemini 3 Pro Preview",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "openrouter/google/gemini-pro-1.5": {
    "display_name": "Gemini Pro 1.5",
    "model_vendor": "google"
  },
  "openrouter/google/gemini-pro-vision": {
    "display_name": "Gemini Pro Vision",
    "model_vendor": "google"
  },
  "openrouter/google/gemma-2-27b-it": {
    "display_name": "Gemma 2 27B",
    "model_vendor": "google"
  },
  "openrouter/google/gemma-2-9b-it": {
    "display_name": "Gemma 2 9B",
    "model_vendor": "google"
  },
  "openrouter/google/gemma-2-9b-it:free": {
    "display_name": "Gemma 2 9B (Free)",
    "model_vendor": "google"
  },
  "openrouter/google/gemma-3n-e4b-it": {
    "display_name": "Gemma 3N E4B",
    "model_vendor": "google"
  },
  "openrouter/google/gemma-3n-e4b-it:free": {
    "display_name": "Gemma 3N E4B (Free)",
    "model_vendor": "google"
  },
  "openrouter/google/palm-2-chat-bison": {
    "display_name": "PaLM 2 Chat Bison",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "openrouter/google/palm-2-codechat-bison": {
    "display_name": "PaLM 2 Codechat Bison",
    "model_vendor": "google",
    "model_version": "latest"
  },
  "openrouter/gryphe/mythomax-l2-13b": {
    "display_name": "MythoMax L2 13B",
    "model_vendor": "gryphe",
    "model_version": "latest"
  },
  "openrouter/inclusionai/ring-1t": {
    "display_name": "Ring 1T",
    "model_vendor": "inclusionai"
  },
  "openrouter/jondurbin/airoboros-l2-70b-2.1": {
    "display_name": "Airoboros L2 70B",
    "model_vendor": "jondurbin"
  },
  "openrouter/mancer/weaver": {
    "display_name": "Weaver",
    "model_vendor": "mancer",
    "model_version": "latest"
  },
  "openrouter/meta-llama/codellama-34b-instruct": {
    "display_name": "CodeLlama 34B Instruct",
    "model_vendor": "meta"
  },
  "openrouter/meta-llama/llama-2-13b-chat": {
    "display_name": "Llama 2 13B Chat",
    "model_vendor": "meta"
  },
  "openrouter/meta-llama/llama-2-70b-chat": {
    "display_name": "Llama 2 70B Chat",
    "model_vendor": "meta"
  },
  "openrouter/meta-llama/llama-3-70b-instruct": {
    "display_name": "Llama 3 70B Instruct",
    "model_vendor": "meta"
  },
  "openrouter/meta-llama/llama-3-70b-instruct:nitro": {
    "display_name": "Llama 3 70B Instruct:nitro",
    "model_vendor": "meta"
  },
  "openrouter/meta-llama/llama-3-8b-instruct:extended": {
    "display_name": "Llama 3 8B Instruct:extended",
    "model_vendor": "meta"
  },
  "openrouter/meta-llama/llama-3-8b-instruct:free": {
    "display_name": "Llama 3 8B Instruct:free",
    "model_vendor": "meta"
  },
  "openrouter/microsoft/wizardlm-2-8x22b:nitro": {
    "display_name": "WizardLM 2 8x22B",
    "model_vendor": "microsoft",
    "model_version": "latest"
  },
  "openrouter/minimax/minimax-m2": {
    "display_name": "MiniMax M2",
    "model_vendor": "minimax",
    "model_version": "latest"
  },
  "openrouter/mistralai/mistral-7b-instruct": {
    "display_name": "Mistral 7B Instruct",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "openrouter/mistralai/mistral-7b-instruct:free": {
    "display_name": "Mistral 7B Instruct",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "openrouter/mistralai/mistral-large": {
    "display_name": "Mistral Large",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "openrouter/mistralai/mistral-small-3.1-24b-instruct": {
    "display_name": "Mistral Small 3.1 24B Instruct",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "openrouter/mistralai/mistral-small-3.2-24b-instruct": {
    "display_name": "Mistral Small 3.2 24B Instruct",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "openrouter/mistralai/mixtral-8x22b-instruct": {
    "display_name": "Mixtral 8x22B Instruct",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "openrouter/nousresearch/nous-hermes-llama2-13b": {
    "display_name": "Nous Hermes Llama 2 13B",
    "model_vendor": "meta"
  },
  "openrouter/openai/gpt-3.5-turbo": {
    "display_name": "GPT-3.5 Turbo",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-3.5-turbo-16k": {
    "display_name": "GPT-3.5 Turbo 16K",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-4": {
    "display_name": "GPT-4",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-4-vision-preview": {
    "display_name": "GPT-4 Vision Preview",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-4.1": {
    "display_name": "GPT-4.1",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-4.1-2025-04-14": {
    "display_name": "GPT-4.1",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "openrouter/openai/gpt-4.1-mini": {
    "display_name": "GPT-4.1 Mini",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-4.1-mini-2025-04-14": {
    "display_name": "GPT-4.1 Mini",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "openrouter/openai/gpt-4.1-nano": {
    "display_name": "GPT-4.1 Nano",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-4.1-nano-2025-04-14": {
    "display_name": "GPT-4.1 Nano",
    "model_vendor": "openai",
    "model_version": "2025-04-14"
  },
  "openrouter/openai/gpt-4o": {
    "display_name": "GPT-4o",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-4o-2024-05-13": {
    "display_name": "GPT-4o",
    "model_vendor": "openai",
    "model_version": "2024-05-13"
  },
  "openrouter/openai/gpt-5": {
    "display_name": "GPT-5",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-5-chat": {
    "display_name": "GPT 5 Chat",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-5-codex": {
    "display_name": "GPT-5 Codex",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-5-mini": {
    "display_name": "GPT-5 Mini",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-5-nano": {
    "display_name": "GPT 5 Nano",
    "model_vendor": "openai"
  },
  "openrouter/openai/gpt-oss-120b": {
    "display_name": "GPT Open-Source 120B",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/openai/gpt-oss-20b": {
    "display_name": "GPT Open-Source 20B",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/openai/o1": {
    "display_name": "o1",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/openai/o1-mini": {
    "display_name": "o1 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/openai/o1-mini-2024-09-12": {
    "display_name": "o1 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/openai/o1-preview": {
    "display_name": "o1 Preview",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/openai/o1-preview-2024-09-12": {
    "display_name": "o1 Preview",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/openai/o3-mini": {
    "display_name": "o3 Mini",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/openai/o3-mini-high": {
    "display_name": "O3 Mini High",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "openrouter/pygmalionai/mythalion-13b": {
    "display_name": "Mythalion 13B",
    "model_vendor": "pygmalionai",
    "model_version": "latest"
  },
  "openrouter/qwen/qwen-2.5-coder-32b-instruct": {
    "display_name": "Qwen 2.5 Coder 32B Instruct",
    "model_vendor": "alibaba",
    "model_version": "latest"
  },
  "openrouter/qwen/qwen-vl-plus": {
    "display_name": "Qwen Vl Plus",
    "model_vendor": "alibaba",
    "model_version": "latest"
  },
  "openrouter/qwen/qwen3-coder": {
    "display_name": "Qwen3 Coder",
    "model_vendor": "alibaba",
    "model_version": "latest"
  },
  "openrouter/switchpoint/router": {
    "display_name": "SwitchPoint Router",
    "model_vendor": "switchpoint",
    "model_version": "latest"
  },
  "openrouter/undi95/remm-slerp-l2-13b": {
    "display_name": "ReMM SLERP L2 13B",
    "model_vendor": "undi95",
    "model_version": "latest"
  },
  "openrouter/x-ai/grok-4": {
    "display_name": "Grok 4",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "openrouter/x-ai/grok-4-fast:free": {
    "display_name": "Grok 4 Fast:free",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "openrouter/z-ai/glm-4.6": {
    "display_name": "GLM 4.6",
    "model_vendor": "zhipu",
    "model_version": "latest"
  },
  "openrouter/z-ai/glm-4.6:exacto": {
    "display_name": "GLM 4.6 Exacto",
    "model_vendor": "zhipu",
    "model_version": "latest"
  },
  "qwen.qwen3-235b-a22b-2507-v1:0": {
    "display_name": "Qwen.qwen3 235B A22b 2507",
    "model_vendor": "alibaba",
    "model_version": "1:0"
  },
  "qwen.qwen3-32b-v1:0": {
    "display_name": "Qwen.qwen3 32B",
    "model_vendor": "alibaba",
    "model_version": "1:0"
  },
  "qwen.qwen3-coder-30b-a3b-v1:0": {
    "display_name": "Qwen.qwen3 Coder 30B A3b",
    "model_vendor": "alibaba",
    "model_version": "1:0"
  },
  "qwen.qwen3-coder-480b-a35b-v1:0": {
    "display_name": "Qwen.qwen3 Coder 480B A35b",
    "model_vendor": "alibaba",
    "model_version": "1:0"
  },
  "twelvelabs.pegasus-1-2-v1:0": {
    "display_name": "Pegasus 1.2",
    "model_vendor": "twelvelabs",
    "model_version": "v1:0"
  },
  "us.amazon.nova-lite-v1:0": {
    "display_name": "Nova Lite",
    "model_vendor": "amazon",
    "model_version": "1:0"
  },
  "us.amazon.nova-micro-v1:0": {
    "display_name": "Nova Micro",
    "model_vendor": "amazon",
    "model_version": "1:0"
  },
  "us.amazon.nova-premier-v1:0": {
    "display_name": "Nova Premier",
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
  "us.amazon.nova-pro-v1:0": {
    "display_name": "Nova Pro",
    "model_vendor": "amazon",
    "model_version": "1:0"
  },
  "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20240620"
  },
  "us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20241022"
  },
  "us.anthropic.claude-3-sonnet-20240229-v1:0": {
    "display_name": "Claude Sonnet 3",
    "model_vendor": "anthropic",
    "model_version": "20240229"
  },
  "us.anthropic.claude-haiku-4-5-20251001-v1:0": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001"
  },
  "us.anthropic.claude-opus-4-1-20250805-v1:0": {
    "display_name": "Claude Opus 4.1",
    "model_vendor": "anthropic",
    "model_version": "20250805"
  },
  "us.anthropic.claude-opus-4-20250514-v1:0": {
    "display_name": "Claude Opus 4",
    "model_vendor": "anthropic",
    "model_version": "20250514"
  },
  "us.anthropic.claude-opus-4-5-20251101-v1:0": {
    "display_name": "Claude Opus 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251101"
  },
  "us.anthropic.claude-sonnet-4-20250514-v1:0": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic",
    "model_version": "20250514"
  },
  "us.anthropic.claude-sonnet-4-5-20250929-v1:0": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929"
  },
  "us.deepseek.r1-v1:0": {
    "display_name": "DeepSeek R1",
    "model_vendor": "deepseek",
    "model_version": "v1:0"
  },
  "us.meta.llama3-1-405b-instruct-v1:0": {
    "display_name": "Llama 3.1 405B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.meta.llama3-1-70b-instruct-v1:0": {
    "display_name": "Llama 3.1 70B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.meta.llama3-1-8b-instruct-v1:0": {
    "display_name": "Llama 3.1 8B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.meta.llama3-2-11b-instruct-v1:0": {
    "display_name": "Llama 3.2 11B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.meta.llama3-2-1b-instruct-v1:0": {
    "display_name": "Llama 3.2 1B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.meta.llama3-2-3b-instruct-v1:0": {
    "display_name": "Llama 3.2 3B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.meta.llama3-2-90b-instruct-v1:0": {
    "display_name": "Llama 3.2 90B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.meta.llama3-3-70b-instruct-v1:0": {
    "display_name": "Llama 3.3 70B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.meta.llama4-maverick-17b-instruct-v1:0": {
    "display_name": "Llama 4 Maverick 17B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.meta.llama4-scout-17b-instruct-v1:0": {
    "display_name": "Llama 4 Scout 17B Instruct",
    "model_vendor": "meta",
    "model_version": "1:0"
  },
  "us.mistral.pixtral-large-2502-v1:0": {
    "display_name": "Pixtral Large 25.02",
    "model_vendor": "mistral",
    "model_version": "1:0"
  },
  "us.twelvelabs.pegasus-1-2-v1:0": {
    "display_name": "Pegasus 1.2",
    "model_vendor": "twelvelabs",
    "model_version": "v1:0"
  },
  "vertex_ai/claude-3-5-sonnet": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic"
  },
  "vertex_ai/claude-3-5-sonnet@20240620": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
    "model_version": "20240620"
  },
  "vertex_ai/claude-3-sonnet": {
    "display_name": "Claude Sonnet 3",
    "model_vendor": "anthropic"
  },
  "vertex_ai/claude-3-sonnet@20240229": {
    "display_name": "Claude Sonnet 3",
    "model_vendor": "anthropic",
    "model_version": "20240229"
  },
  "vertex_ai/claude-haiku-4-5": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic"
  },
  "vertex_ai/claude-haiku-4-5@20251001": {
    "display_name": "Claude Haiku 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251001"
  },
  "vertex_ai/claude-opus-4": {
    "display_name": "Claude Opus 4",
    "model_vendor": "anthropic"
  },
  "vertex_ai/claude-opus-4-1": {
    "display_name": "Claude Opus 4.1",
    "model_vendor": "anthropic"
  },
  "vertex_ai/claude-opus-4-1@20250805": {
    "display_name": "Claude Opus 4.1",
    "model_vendor": "anthropic",
    "model_version": "20250805"
  },
  "vertex_ai/claude-opus-4-5": {
    "display_name": "Claude Opus 4.5",
    "model_vendor": "anthropic"
  },
  "vertex_ai/claude-opus-4-5@20251101": {
    "display_name": "Claude Opus 4.5",
    "model_vendor": "anthropic",
    "model_version": "20251101"
  },
  "vertex_ai/claude-opus-4@20250514": {
    "display_name": "Claude Opus 4",
    "model_vendor": "anthropic",
    "model_version": "20250514"
  },
  "vertex_ai/claude-sonnet-4": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic"
  },
  "vertex_ai/claude-sonnet-4-5": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic"
  },
  "vertex_ai/claude-sonnet-4-5@20250929": {
    "display_name": "Claude Sonnet 4.5",
    "model_vendor": "anthropic",
    "model_version": "20250929"
  },
  "vertex_ai/claude-sonnet-4@20250514": {
    "display_name": "Claude Sonnet 4",
    "model_vendor": "anthropic",
    "model_version": "20250514"
  },
  "vertex_ai/codestral-2": {
    "display_name": "Codestral 2",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/codestral-2501": {
    "display_name": "Codestral 25.01",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/codestral-2@001": {
    "display_name": "Codestral 2@001",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/codestral@2405": {
    "display_name": "Codestral@2405",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/codestral@latest": {
    "display_name": "Codestral@latest",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/deepseek-ai/deepseek-r1-0528-maas": {
    "display_name": "DeepSeek R1 0528 Maas",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "vertex_ai/deepseek-ai/deepseek-v3.1-maas": {
    "display_name": "DeepSeek V3.1 Maas",
    "model_vendor": "deepseek",
    "model_version": "latest"
  },
  "vertex_ai/gemini-2.5-flash": {
    "display_name": "Gemini 2.5 Flash",
    "model_vendor": "google"
  },
  "vertex_ai/gemini-2.5-flash-lite": {
    "display_name": "Gemini 2.5 Flash Lite",
    "model_vendor": "google"
  },
  "vertex_ai/gemini-2.5-pro": {
    "display_name": "Gemini 2.5 Pro",
    "model_vendor": "google"
  },
  "vertex_ai/gemini-3-pro-preview": {
    "display_name": "Gemini 3 Pro Preview",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "vertex_ai/gemini-3-flash-preview": {
    "display_name": "Gemini 3 Flash Preview",
    "model_vendor": "google",
    "model_version": "preview"
  },
  "vertex_ai/jamba-1.5": {
    "display_name": "Jamba 1.5",
    "model_vendor": "ai21",
    "model_version": "latest"
  },
  "vertex_ai/jamba-1.5-large": {
    "display_name": "Jamba 1.5 Large",
    "model_vendor": "ai21",
    "model_version": "latest"
  },
  "vertex_ai/jamba-1.5-large@001": {
    "display_name": "Jamba 1.5 Large@001",
    "model_vendor": "ai21",
    "model_version": "latest"
  },
  "vertex_ai/jamba-1.5-mini": {
    "display_name": "Jamba 1.5 Mini",
    "model_vendor": "ai21",
    "model_version": "latest"
  },
  "vertex_ai/jamba-1.5-mini@001": {
    "display_name": "Jamba 1.5 Mini@001",
    "model_vendor": "ai21",
    "model_version": "latest"
  },
  "vertex_ai/meta/llama-3.1-405b-instruct-maas": {
    "display_name": "Llama 3.1 405B Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama-3.1-70b-instruct-maas": {
    "display_name": "Llama 3.1 70B Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama-3.1-8b-instruct-maas": {
    "display_name": "Llama 3.1 8B Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": {
    "display_name": "Llama 3.2 90B Vision Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama-4-maverick-17b-128e-instruct-maas": {
    "display_name": "Llama 4 Maverick 17B 128e Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama-4-maverick-17b-16e-instruct-maas": {
    "display_name": "Llama 4 Maverick 17B 16e Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama-4-scout-17b-128e-instruct-maas": {
    "display_name": "Llama 4 Scout 17B 128e Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama-4-scout-17b-16e-instruct-maas": {
    "display_name": "Llama 4 Scout 17B 16e Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama3-405b-instruct-maas": {
    "display_name": "Llama 3 405B Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama3-70b-instruct-maas": {
    "display_name": "Llama 3 70B Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/meta/llama3-8b-instruct-maas": {
    "display_name": "Llama 3 8B Instruct Maas",
    "model_vendor": "meta"
  },
  "vertex_ai/minimaxai/minimax-m2-maas": {
    "display_name": "MiniMax M2",
    "model_vendor": "minimax",
    "model_version": "latest"
  },
  "vertex_ai/mistral-large-2411": {
    "display_name": "Mistral Large 24.11",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistral-large@2407": {
    "display_name": "Mistral Large@24.07",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistral-large@2411-001": {
    "display_name": "Mistral Large@24.11 001",
    "model_vendor": "mistral",
    "model_version": "001"
  },
  "vertex_ai/mistral-large@latest": {
    "display_name": "Mistral Large@latest",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistral-medium-3": {
    "display_name": "Mistral Medium 3",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistral-medium-3@001": {
    "display_name": "Mistral Medium 3@001",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistral-nemo@2407": {
    "display_name": "Mistral Nemo@24.07",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistral-nemo@latest": {
    "display_name": "Mistral Nemo@latest",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistral-small-2503": {
    "display_name": "Mistral Small 2503",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistral-small-2503@001": {
    "display_name": "Mistral Small 2503@001",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistralai/codestral-2": {
    "display_name": "Codestral 2",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistralai/codestral-2@001": {
    "display_name": "Codestral 2@001",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistralai/mistral-medium-3": {
    "display_name": "Mistral Medium 3",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/mistralai/mistral-medium-3@001": {
    "display_name": "Mistral Medium 3@001",
    "model_vendor": "mistral",
    "model_version": "latest"
  },
  "vertex_ai/moonshotai/kimi-k2-thinking-maas": {
    "display_name": "Kimi K2 Thinking",
    "model_vendor": "moonshot",
    "model_version": "latest"
  },
  "vertex_ai/openai/gpt-oss-120b-maas": {
    "display_name": "GPT Open-Source 120B",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "vertex_ai/openai/gpt-oss-20b-maas": {
    "display_name": "GPT Open-Source 20B",
    "model_vendor": "openai",
    "model_version": "latest"
  },
  "vertex_ai/qwen/qwen3-235b-a22b-instruct-2507-maas": {
    "display_name": "Qwen3 235B A22b Instruct 2507 Maas",
    "model_vendor": "alibaba",
    "model_version": "latest"
  },
  "vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas": {
    "display_name": "Qwen3 Coder 480B A35b Instruct Maas",
    "model_vendor": "alibaba",
    "model_version": "latest"
  },
  "vertex_ai/qwen/qwen3-next-80b-a3b-instruct-maas": {
    "display_name": "Qwen3 Next 80B A3b Instruct Maas",
    "model_vendor": "alibaba",
    "model_version": "latest"
  },
  "vertex_ai/qwen/qwen3-next-80b-a3b-thinking-maas": {
    "display_name": "Qwen3 Next 80B A3b Thinking Maas",
    "model_vendor": "alibaba",
    "model_version": "latest"
  },
  "xai/grok-2": {
    "display_name": "Grok 2",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-2-1212": {
    "display_name": "Grok 2",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-2-latest": {
    "display_name": "Grok 2",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-2-vision": {
    "display_name": "Grok 2 Vision",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-2-vision-1212": {
    "display_name": "Grok 2 Vision",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-2-vision-latest": {
    "display_name": "Grok 2 Vision",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3": {
    "display_name": "Grok 3",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-beta": {
    "display_name": "Grok 3",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-fast-beta": {
    "display_name": "Grok 3",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-fast-latest": {
    "display_name": "Grok 3",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-latest": {
    "display_name": "Grok 3",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-mini": {
    "display_name": "Grok 3 Mini",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-mini-beta": {
    "display_name": "Grok 3 Mini",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-mini-fast": {
    "display_name": "Grok 3 Mini",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-mini-fast-beta": {
    "display_name": "Grok 3 Mini",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-mini-fast-latest": {
    "display_name": "Grok 3 Mini",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-3-mini-latest": {
    "display_name": "Grok 3 Mini",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4-0709": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4-1-fast": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4-1-fast-non-reasoning": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4-1-fast-non-reasoning-latest": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4-1-fast-reasoning": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4-1-fast-reasoning-latest": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4-fast-non-reasoning": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4-fast-reasoning": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-4-latest": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-beta": {
    "display_name": "Grok Beta",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-code-fast": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-code-fast-1": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-code-fast-1-0825": {
    "display_name": "Grok",
    "model_vendor": "xai",
    "model_version": "latest"
  },
  "xai/grok-vision-beta": {
    "display_name": "Grok Vision",
    "model_vendor": "xai",
    "model_version": "latest"
  }
}


================================================
FILE: backend/onyx/llm/model_name_parser.py
================================================
"""
LiteLLM Model Name Parser

Parses LiteLLM model strings and returns structured metadata for UI display.
All metadata comes from litellm's model_cost dictionary. Until this upstream patch to LiteLLM
is merged (https://github.com/BerriAI/litellm/pull/17330), we use the model_metadata_enrichments.json
to add these fields at server startup.

Enrichment fields:
- display_name: Human-friendly name (e.g., "Claude 3.5 Sonnet")
- model_vendor: The company that made the model (anthropic, openai, meta, etc.)
- model_version: Version string (e.g., "20241022-v2:0", "v1:0")

The parser only extracts provider and region from the model key - everything
else comes from enrichment.
"""

import re
from functools import lru_cache

from pydantic import BaseModel

from onyx.llm.constants import AGGREGATOR_PROVIDERS
from onyx.llm.constants import HYPHENATED_MODEL_NAMES
from onyx.llm.constants import LlmProviderNames
from onyx.llm.constants import MODEL_PREFIX_TO_VENDOR
from onyx.llm.constants import PROVIDER_DISPLAY_NAMES
from onyx.llm.constants import VENDOR_BRAND_NAMES


class ParsedModelName(BaseModel):
    """Structured representation of a parsed LiteLLM model name."""

    raw_name: str  # Original: "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0"
    provider: str  # "bedrock", "azure", "openai", etc. (the API route)
    vendor: str | None = None  # From enrichment: "anthropic", "openai", "meta", etc.
    version: str | None = None  # From enrichment: "20241022-v2:0", "v1:0", etc.
    region: str | None = None  # Extracted: "us", "eu", or None
    display_name: str  # From enrichment: "Claude 3.5 Sonnet"
    provider_display_name: str  # Generated: "Claude (Bedrock - Anthropic)"


def _get_model_info(model_key: str) -> dict:
    """Get model info from litellm.model_cost."""
    from onyx.llm.litellm_singleton import litellm

    # Try exact key first
    info = litellm.model_cost.get(model_key)
    if info:
        return info

    # Try without provider prefix (e.g., "bedrock/anthropic.claude-..." -> "anthropic.claude-...")
    if "/" in model_key:
        return litellm.model_cost.get(model_key.split("/", 1)[-1], {})

    return {}


def _extract_provider(model_key: str) -> str:
    """Extract provider from model key prefix."""
    from onyx.llm.litellm_singleton import litellm

    if "/" in model_key:
        return model_key.split("/")[0]

    # No prefix - try to get from litellm.model_cost
    info = litellm.model_cost.get(model_key, {})
    litellm_provider = info.get("litellm_provider", "")

    if litellm_provider:
        # Normalize vertex_ai variants
        if litellm_provider.startswith(LlmProviderNames.VERTEX_AI):
            return LlmProviderNames.VERTEX_AI
        return litellm_provider

    return "unknown"


def _extract_region(model_key: str) -> str | None:
    """Extract region from model key (e.g., us., eu., apac. prefix)."""
    base = model_key.split("/")[-1].lower()

    for prefix in ["us.", "eu.", "apac.", "global.", "us-gov."]:
        if base.startswith(prefix):
            return prefix.rstrip(".")

    return None


def _format_name(name: str | None) -> str:
    """Format provider or vendor name with proper capitalization."""
    if not name:
        return "Unknown"
    return PROVIDER_DISPLAY_NAMES.get(name.lower(), name.replace("_", " ").title())


def _infer_vendor_from_model_name(model_name: str) -> str | None:
    """
    Infer vendor from model name patterns when enrichment data is missing.

    Uses MODEL_PREFIX_TO_VENDOR mapping to match model name prefixes.
    Returns lowercase vendor name for consistency with enrichment data.

    Examples:
        "gemini-3-flash-preview" → "google"
        "claude-3-5-sonnet" → "anthropic"
        "llama-3.1-70b" → "meta"
    """
    try:
        # Get the base model name (remove provider prefix if present)
        base_name = model_name.split("/")[-1].lower()

        # Try to match against known prefixes (sorted by length to match longest first)
        for prefix in sorted(MODEL_PREFIX_TO_VENDOR.keys(), key=len, reverse=True):
            if base_name.startswith(prefix):
                return MODEL_PREFIX_TO_VENDOR[prefix]
    except Exception:
        pass

    return None


def _generate_display_name_from_model(model_name: str) -> str:
    """
    Generate a human-friendly display name from a model identifier.

    Used as fallback when the model is not in enrichment data.
    Cleans up the raw model name by removing provider prefixes and
    formatting version numbers nicely.

    Examples:
        "vertex_ai/gemini-3-flash-preview" → "Gemini 3 Flash Preview"
        "gemini-2.5-pro-exp-03-25" → "Gemini 2.5 Pro"
        "claude-3-5-sonnet-20241022" → "Claude 3.5 Sonnet"
        "gpt-oss:120b" → "GPT-OSS 120B" (hyphenated exception)
    """
    try:
        # Remove provider prefix if present
        base_name = model_name.split("/")[-1]

        # Remove tag suffix (e.g., :14b, :latest) - handle separately
        size_suffix = ""
        if ":" in base_name:
            base_name, tag = base_name.rsplit(":", 1)
            # Keep size tags like "14b", "70b", "120b"
            if re.match(r"^\d+[bBmM]$", tag):
                size_suffix = f" {tag.upper()}"

        # Check if this is a hyphenated model that should keep its format
        base_name_lower = base_name.lower()
        for hyphenated in HYPHENATED_MODEL_NAMES:
            if base_name_lower.startswith(hyphenated):
                # Keep the hyphenated prefix, uppercase it
                return hyphenated.upper() + size_suffix

        # Remove common suffixes: date stamps, version numbers
        cleaned = base_name
        # Remove date stamps like -20241022, @20250219, -2024-08-06
        cleaned = re.sub(r"[-@]\d{4}-?\d{2}-?\d{2}", "", cleaned)
        # Remove experimental/preview date suffixes like -exp-03-25
        cleaned = re.sub(r"-exp-\d{2}-\d{2}", "", cleaned)
        # Remove version suffixes like -v1, -v2
        cleaned = re.sub(r"-v\d+$", "", cleaned)

        # Convert separators to spaces
        cleaned = cleaned.replace("-", " ").replace("_", " ")

        # Clean up version numbers: "3 5" → "3.5", "2 5" → "2.5"
        # But only for single digits that look like version numbers
        cleaned = re.sub(r"(\d) (\d)(?!\d)", r"\1.\2", cleaned)

        # Title case each word, preserving version numbers
        words = cleaned.split()
        result_words = []
        for word in words:
            if word.isdigit() or re.match(r"^\d+\.?\d*$", word):
                # Keep numbers as-is
                result_words.append(word)
            elif word.lower() in ("pro", "lite", "mini", "flash", "preview", "ultra"):
                # Common suffixes get title case
                result_words.append(word.title())
            else:
                # Title case other words
                result_words.append(word.title())

        return " ".join(result_words) + size_suffix
    except Exception:
        return model_name


def _generate_provider_display_name(provider: str, vendor: str | None) -> str:
    """
    Generate provider display name with model brand and vendor info.

    Examples:
        - Direct OpenAI: "GPT (OpenAI)"
        - Bedrock via Anthropic: "Claude (Bedrock - Anthropic)"
        - Vertex AI via Google: "Gemini (Vertex AI - Google)"
    """
    provider_nice = _format_name(provider)
    vendor_nice = _format_name(vendor) if vendor else None
    brand = VENDOR_BRAND_NAMES.get(vendor.lower()) if vendor else None

    # For aggregator providers, show: Brand (Provider - Vendor)
    if provider.lower() in AGGREGATOR_PROVIDERS:
        if brand and vendor_nice:
            return f"{brand} ({provider_nice} - {vendor_nice})"
        elif vendor_nice:
            return f"{provider_nice} - {vendor_nice}"
        return provider_nice

    # For direct providers, show: Brand (Provider)
    if brand:
        return f"{brand} ({provider_nice})"

    return provider_nice


@lru_cache(maxsize=1024)
def parse_litellm_model_name(raw_name: str) -> ParsedModelName:
    """
    Parse a LiteLLM model string into structured data.

    Metadata comes from enrichment when available, with fallback logic
    for models not in the enrichment data.

    Args:
        raw_name: The LiteLLM model string

    Returns:
        ParsedModelName with all components from enrichment or fallback
    """
    model_info = _get_model_info(raw_name)

    # Extract from key (not in enrichment)
    provider = _extract_provider(raw_name)
    region = _extract_region(raw_name)

    # Get from enrichment, with fallbacks for unenriched models
    vendor = model_info.get("model_vendor") or _infer_vendor_from_model_name(raw_name)
    version = model_info.get("model_version")
    display_name = model_info.get("display_name") or _generate_display_name_from_model(
        raw_name
    )

    # Generate provider display name
    provider_display_name = _generate_provider_display_name(provider, vendor)

    return ParsedModelName(
        raw_name=raw_name,
        provider=provider,
        vendor=vendor,
        version=version,
        region=region,
        display_name=display_name,
        provider_display_name=provider_display_name,
    )


================================================
FILE: backend/onyx/llm/model_response.py
================================================
from __future__ import annotations

from typing import Any
from typing import List
from typing import TYPE_CHECKING

from pydantic import BaseModel
from pydantic import Field


class FunctionCall(BaseModel):
    arguments: str | None = None
    name: str | None = None


class ChatCompletionMessageToolCall(BaseModel):
    id: str
    type: str = "function"
    function: FunctionCall


class ChatCompletionDeltaToolCall(BaseModel):
    id: str | None = None
    index: int = 0
    type: str = "function"
    function: FunctionCall | None = None


class Delta(BaseModel):
    content: str | None = None
    reasoning_content: str | None = None
    tool_calls: List[ChatCompletionDeltaToolCall] = Field(default_factory=list)


class StreamingChoice(BaseModel):
    finish_reason: str | None = None
    index: int = 0
    delta: Delta = Field(default_factory=Delta)


class Usage(BaseModel):
    completion_tokens: int
    prompt_tokens: int
    total_tokens: int
    cache_creation_input_tokens: int
    cache_read_input_tokens: int


class ModelResponseStream(BaseModel):
    id: str
    created: str
    choice: StreamingChoice
    usage: Usage | None = None


if TYPE_CHECKING:
    from litellm.types.utils import ModelResponseStream as LiteLLMModelResponseStream


class Message(BaseModel):
    content: str | None = None
    role: str = "assistant"
    tool_calls: List[ChatCompletionMessageToolCall] | None = None
    reasoning_content: str | None = None


class Choice(BaseModel):
    finish_reason: str | None = None
    index: int = 0
    message: Message = Field(default_factory=Message)


class ModelResponse(BaseModel):
    id: str
    created: str
    choice: Choice
    usage: Usage | None = None


if TYPE_CHECKING:
    from litellm.types.utils import (
        ModelResponse as LiteLLMModelResponse,
        ModelResponseStream as LiteLLMModelResponseStream,
    )


def _parse_function_call(
    function_payload: dict[str, Any] | None,
) -> FunctionCall | None:
    """Parse a function call payload into a FunctionCall object."""
    if not function_payload or not isinstance(function_payload, dict):
        return None
    return FunctionCall(
        arguments=function_payload.get("arguments"),
        name=function_payload.get("name"),
    )


def _parse_delta_tool_calls(
    tool_calls: list[dict[str, Any]] | None,
) -> list[ChatCompletionDeltaToolCall]:
    """Parse tool calls for streaming responses (delta format)."""
    if not tool_calls:
        return []

    parsed_tool_calls: list[ChatCompletionDeltaToolCall] = []
    for tool_call in tool_calls:
        parsed_tool_calls.append(
            ChatCompletionDeltaToolCall(
                id=tool_call.get("id"),
                index=tool_call.get("index", 0),
                type=tool_call.get("type", "function"),
                function=_parse_function_call(tool_call.get("function")),
            )
        )
    return parsed_tool_calls


def _parse_message_tool_calls(
    tool_calls: list[dict[str, Any]] | None,
) -> list[ChatCompletionMessageToolCall]:
    """Parse tool calls for non-streaming responses (message format)."""
    if not tool_calls:
        return []

    parsed_tool_calls: list[ChatCompletionMessageToolCall] = []
    for tool_call in tool_calls:
        function_call = _parse_function_call(tool_call.get("function"))
        if not function_call:
            continue

        parsed_tool_calls.append(
            ChatCompletionMessageToolCall(
                id=tool_call.get("id", ""),
                type=tool_call.get("type", "function"),
                function=function_call,
            )
        )
    return parsed_tool_calls


def _validate_and_extract_base_fields(
    response_data: dict[str, Any], error_prefix: str
) -> tuple[str, str, dict[str, Any]]:
    """
    Validate and extract common fields (id, created, first choice) from a LiteLLM response.

    Returns:
        Tuple of (id, created, choice_data)
    """
    response_id = response_data.get("id")
    created = response_data.get("created")
    if response_id is None or created is None:
        raise ValueError(f"{error_prefix} must include 'id' and 'created'.")

    choices: list[dict[str, Any]] = response_data.get("choices") or []
    if not choices:
        raise ValueError(f"{error_prefix} must include at least one choice.")

    return str(response_id), str(created), choices[0] or {}


def _usage_from_usage_data(usage_data: dict[str, Any]) -> Usage:
    # NOTE: sometimes the usage data dictionary has these keys and the values are None
    # hence the "or 0" instead of just using default values
    return Usage(
        completion_tokens=usage_data.get("completion_tokens") or 0,
        prompt_tokens=usage_data.get("prompt_tokens") or 0,
        total_tokens=usage_data.get("total_tokens") or 0,
        cache_creation_input_tokens=usage_data.get("cache_creation_input_tokens") or 0,
        cache_read_input_tokens=usage_data.get(
            "cache_read_input_tokens",
            (usage_data.get("prompt_tokens_details") or {}).get("cached_tokens"),
        )
        or 0,
    )


def from_litellm_model_response_stream(
    response: "LiteLLMModelResponseStream",
) -> ModelResponseStream:
    """
    Convert a LiteLLM ModelResponseStream into the simplified Onyx representation.
    """
    response_data = response.model_dump()
    response_id, created, choice_data = _validate_and_extract_base_fields(
        response_data, "LiteLLM response stream"
    )

    delta_data: dict[str, Any] = choice_data.get("delta") or {}
    parsed_delta = Delta(
        content=delta_data.get("content"),
        reasoning_content=delta_data.get("reasoning_content"),
        tool_calls=_parse_delta_tool_calls(delta_data.get("tool_calls")),
    )

    streaming_choice = StreamingChoice(
        finish_reason=choice_data.get("finish_reason"),
        index=choice_data.get("index", 0),
        delta=parsed_delta,
    )

    usage_data = response_data.get("usage")
    return ModelResponseStream(
        id=response_id,
        created=created,
        choice=streaming_choice,
        usage=(_usage_from_usage_data(usage_data) if usage_data else None),
    )


def from_litellm_model_response(
    response: "LiteLLMModelResponse",
) -> ModelResponse:
    """
    Convert a LiteLLM ModelResponse into the simplified Onyx representation.
    """
    response_data = response.model_dump()
    response_id, created, choice_data = _validate_and_extract_base_fields(
        response_data, "LiteLLM response"
    )

    message_data: dict[str, Any] = choice_data.get("message") or {}
    parsed_tool_calls = _parse_message_tool_calls(message_data.get("tool_calls"))

    message = Message(
        content=message_data.get("content"),
        role=message_data.get("role", "assistant"),
        tool_calls=parsed_tool_calls if parsed_tool_calls else None,
        reasoning_content=message_data.get("reasoning_content"),
    )

    choice = Choice(
        finish_reason=choice_data.get("finish_reason"),
        index=choice_data.get("index", 0),
        message=message,
    )

    usage_data = response_data.get("usage")
    return ModelResponse(
        id=response_id,
        created=created,
        choice=choice,
        usage=(_usage_from_usage_data(usage_data) if usage_data else None),
    )


================================================
FILE: backend/onyx/llm/models.py
================================================
from enum import Enum
from typing import Literal

from pydantic import BaseModel


class ToolChoiceOptions(str, Enum):
    REQUIRED = "required"
    AUTO = "auto"
    NONE = "none"


class ReasoningEffort(str, Enum):
    """Reasoning effort levels for models that support extended thinking.

    Different providers map these values differently:
    - OpenAI: Uses "low", "medium", "high" directly for reasoning_effort. Recently added "none" for 5 series
              which is like "minimal"
    - Claude: Uses budget_tokens with different values for each level
    - Gemini: Uses "none", "low", "medium", "high" for thinking_budget (via litellm mapping)
    """

    AUTO = "auto"
    OFF = "off"
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"


# OpenAI reasoning effort mapping
# Note: OpenAI API does not support "auto" - valid values are: none, minimal, low, medium, high, xhigh
OPENAI_REASONING_EFFORT: dict[ReasoningEffort, str] = {
    ReasoningEffort.AUTO: "medium",  # Default to medium when auto is requested
    ReasoningEffort.OFF: "none",
    ReasoningEffort.LOW: "low",
    ReasoningEffort.MEDIUM: "medium",
    ReasoningEffort.HIGH: "high",
}

# Anthropic reasoning effort to budget tokens mapping
# Loosely based on budgets from LiteLLM but this ensures it's not updated without our knowing from a version bump.
ANTHROPIC_REASONING_EFFORT_BUDGET: dict[ReasoningEffort, int] = {
    ReasoningEffort.AUTO: 2048,
    ReasoningEffort.LOW: 1024,
    ReasoningEffort.MEDIUM: 2048,
    ReasoningEffort.HIGH: 4096,
}


# Content part structures for multimodal messages
# The classes in this mirror the OpenAI Chat Completions message types and work well with routers like LiteLLM
class TextContentPart(BaseModel):
    type: Literal["text"] = "text"
    text: str
    # Some providers (e.g. Anthropic/Gemini) support prompt caching controls on content blocks.
    cache_control: dict | None = None


class ImageUrlDetail(BaseModel):
    url: str
    detail: Literal["auto", "low", "high"] | None = None


class ImageContentPart(BaseModel):
    type: Literal["image_url"] = "image_url"
    image_url: ImageUrlDetail


ContentPart = TextContentPart | ImageContentPart


# Tool call structures
class FunctionCall(BaseModel):
    name: str
    arguments: str


class ToolCall(BaseModel):
    type: Literal["function"] = "function"
    id: str
    function: FunctionCall


# Message types


# Base class for all cacheable messages
class CacheableMessage(BaseModel):
    # Some providers support prompt caching controls at the message level (passed through via LiteLLM).
    cache_control: dict | None = None


class SystemMessage(CacheableMessage):
    role: Literal["system"] = "system"
    content: str


class UserMessage(CacheableMessage):
    role: Literal["user"] = "user"
    content: str | list[ContentPart]


class AssistantMessage(CacheableMessage):
    role: Literal["assistant"] = "assistant"
    content: str | None = None
    tool_calls: list[ToolCall] | None = None


class ToolMessage(CacheableMessage):
    role: Literal["tool"] = "tool"
    content: str
    tool_call_id: str


# Union type for all OpenAI Chat Completions messages
ChatCompletionMessage = SystemMessage | UserMessage | AssistantMessage | ToolMessage
# Allows for passing in a string directly. This is provided for convenience and is wrapped as a UserMessage.
LanguageModelInput = list[ChatCompletionMessage] | ChatCompletionMessage


================================================
FILE: backend/onyx/llm/multi_llm.py
================================================
import os
import threading
from collections.abc import Iterator
from contextlib import contextmanager
from contextlib import nullcontext
from typing import Any
from typing import cast
from typing import TYPE_CHECKING
from typing import Union

from onyx.configs.app_configs import MOCK_LLM_RESPONSE
from onyx.configs.chat_configs import LLM_SOCKET_READ_TIMEOUT
from onyx.configs.model_configs import GEN_AI_TEMPERATURE
from onyx.configs.model_configs import LITELLM_EXTRA_BODY
from onyx.llm.constants import LlmProviderNames
from onyx.llm.cost import calculate_llm_cost_cents
from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMConfig
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.interfaces import ReasoningEffort
from onyx.llm.interfaces import ToolChoiceOptions
from onyx.llm.model_response import ModelResponse
from onyx.llm.model_response import ModelResponseStream
from onyx.llm.model_response import Usage
from onyx.llm.models import ANTHROPIC_REASONING_EFFORT_BUDGET
from onyx.llm.models import OPENAI_REASONING_EFFORT
from onyx.llm.request_context import get_llm_mock_response
from onyx.llm.utils import build_litellm_passthrough_kwargs
from onyx.llm.utils import is_true_openai_model
from onyx.llm.utils import model_is_reasoning_model
from onyx.llm.well_known_providers.constants import AWS_ACCESS_KEY_ID_KWARG
from onyx.llm.well_known_providers.constants import (
    AWS_ACCESS_KEY_ID_KWARG_ENV_VAR_FORMAT,
)
from onyx.llm.well_known_providers.constants import (
    AWS_BEARER_TOKEN_BEDROCK_KWARG_ENV_VAR_FORMAT,
)
from onyx.llm.well_known_providers.constants import AWS_REGION_NAME_KWARG
from onyx.llm.well_known_providers.constants import AWS_REGION_NAME_KWARG_ENV_VAR_FORMAT
from onyx.llm.well_known_providers.constants import AWS_SECRET_ACCESS_KEY_KWARG
from onyx.llm.well_known_providers.constants import (
    AWS_SECRET_ACCESS_KEY_KWARG_ENV_VAR_FORMAT,
)
from onyx.llm.well_known_providers.constants import LM_STUDIO_API_KEY_CONFIG_KEY
from onyx.llm.well_known_providers.constants import OLLAMA_API_KEY_CONFIG_KEY
from onyx.llm.well_known_providers.constants import VERTEX_CREDENTIALS_FILE_KWARG
from onyx.llm.well_known_providers.constants import (
    VERTEX_CREDENTIALS_FILE_KWARG_ENV_VAR_FORMAT,
)
from onyx.llm.well_known_providers.constants import VERTEX_LOCATION_KWARG
from onyx.utils.encryption import mask_string
from onyx.utils.logger import setup_logger

logger = setup_logger()

_env_lock = threading.Lock()

if TYPE_CHECKING:
    from litellm import CustomStreamWrapper
    from litellm import HTTPHandler


_LLM_PROMPT_LONG_TERM_LOG_CATEGORY = "llm_prompt"
LEGACY_MAX_TOKENS_KWARG = "max_tokens"
STANDARD_MAX_TOKENS_KWARG = "max_completion_tokens"
_VERTEX_ANTHROPIC_MODELS_REJECTING_OUTPUT_CONFIG = (
    "claude-opus-4-5",
    "claude-opus-4-6",
)


class LLMTimeoutError(Exception):
    """
    Exception raised when an LLM call times out.
    """


class LLMRateLimitError(Exception):
    """
    Exception raised when an LLM call is rate limited.
    """


def _prompt_to_dicts(prompt: LanguageModelInput) -> list[dict[str, Any]]:
    """Convert Pydantic message models to dictionaries for LiteLLM.

    LiteLLM expects messages to be dictionaries (with .get() method),
    not Pydantic models. This function serializes the messages.
    """
    if isinstance(prompt, list):
        return [msg.model_dump(exclude_none=True) for msg in prompt]
    return [prompt.model_dump(exclude_none=True)]


def _normalize_content(raw: Any) -> str:
    """Normalize a message content field to a plain string.

    Content can be a string, None, or a list of content-block dicts
    (e.g. [{"type": "text", "text": "..."}]).
    """
    if raw is None:
        return ""
    if isinstance(raw, str):
        return raw
    if isinstance(raw, list):
        return "\n".join(
            block.get("text", "") if isinstance(block, dict) else str(block)
            for block in raw
        )
    return str(raw)


def _strip_tool_content_from_messages(
    messages: list[dict[str, Any]],
) -> list[dict[str, Any]]:
    """Convert tool-related messages to plain text.

    Bedrock's Converse API requires toolConfig when messages contain
    toolUse/toolResult content blocks. When no tools are provided for the
    current request, we must convert any tool-related history into plain text
    to avoid the "toolConfig field must be defined" error.

    This is the same approach used by _OllamaHistoryMessageFormatter.
    """
    result: list[dict[str, Any]] = []
    for msg in messages:
        role = msg.get("role")
        tool_calls = msg.get("tool_calls")

        if role == "assistant" and tool_calls:
            # Convert structured tool calls to text representation
            tool_call_lines = []
            for tc in tool_calls:
                func = tc.get("function", {})
                name = func.get("name", "unknown")
                args = func.get("arguments", "{}")
                tc_id = tc.get("id", "")
                tool_call_lines.append(
                    f"[Tool Call] name={name} id={tc_id} args={args}"
                )

            existing_content = _normalize_content(msg.get("content"))
            parts = (
                [existing_content] + tool_call_lines
                if existing_content
                else tool_call_lines
            )
            new_msg = {
                "role": "assistant",
                "content": "\n".join(parts),
            }
            result.append(new_msg)

        elif role == "tool":
            # Convert tool response to user message with text content
            tool_call_id = msg.get("tool_call_id", "")
            content = _normalize_content(msg.get("content"))
            tool_result_text = f"[Tool Result] id={tool_call_id}\n{content}"
            # Merge into previous user message if it is also a converted
            # tool result to avoid consecutive user messages (Bedrock requires
            # strict user/assistant alternation).
            if (
                result
                and result[-1]["role"] == "user"
                and "[Tool Result]" in result[-1].get("content", "")
            ):
                result[-1]["content"] += "\n\n" + tool_result_text
            else:
                result.append({"role": "user", "content": tool_result_text})

        else:
            result.append(msg)

    return result


def _fix_tool_user_message_ordering(
    messages: list[dict[str, Any]],
) -> list[dict[str, Any]]:
    """Insert a synthetic assistant message between tool and user messages.

    Some models (e.g. Mistral on Azure) require strict message ordering where
    a user message cannot immediately follow a tool message. This function
    inserts a minimal assistant message to bridge the gap.
    """
    if len(messages) < 2:
        return messages

    result: list[dict[str, Any]] = [messages[0]]
    for msg in messages[1:]:
        prev_role = result[-1].get("role")
        curr_role = msg.get("role")
        if prev_role == "tool" and curr_role == "user":
            result.append({"role": "assistant", "content": "Noted. Continuing."})
        result.append(msg)
    return result


def _messages_contain_tool_content(messages: list[dict[str, Any]]) -> bool:
    """Check if any messages contain tool-related content blocks."""
    for msg in messages:
        if msg.get("role") == "tool":
            return True
        if msg.get("role") == "assistant" and msg.get("tool_calls"):
            return True
    return False


def _prompt_contains_tool_call_history(prompt: LanguageModelInput) -> bool:
    """Check if the prompt contains any assistant messages with tool_calls.

    When Anthropic's extended thinking is enabled, the API requires every
    assistant message to start with a thinking block before any tool_use
    blocks.  Since we don't preserve thinking_blocks (they carry
    cryptographic signatures that can't be reconstructed), we must skip
    the thinking param whenever history contains prior tool-calling turns.
    """
    from onyx.llm.models import AssistantMessage

    msgs = prompt if isinstance(prompt, list) else [prompt]
    return any(isinstance(msg, AssistantMessage) and msg.tool_calls for msg in msgs)


def _is_vertex_model_rejecting_output_config(model_name: str) -> bool:
    normalized_model_name = model_name.lower()
    return any(
        blocked_model in normalized_model_name
        for blocked_model in _VERTEX_ANTHROPIC_MODELS_REJECTING_OUTPUT_CONFIG
    )


class LitellmLLM(LLM):
    """Uses Litellm library to allow easy configuration to use a multitude of LLMs
    See https://python.langchain.com/docs/integrations/chat/litellm"""

    def __init__(
        self,
        api_key: str | None,
        model_provider: str,
        model_name: str,
        max_input_tokens: int,
        timeout: int | None = None,
        api_base: str | None = None,
        api_version: str | None = None,
        deployment_name: str | None = None,
        custom_llm_provider: str | None = None,
        temperature: float | None = None,
        custom_config: dict[str, str] | None = None,
        extra_headers: dict[str, str] | None = None,
        extra_body: dict | None = LITELLM_EXTRA_BODY,
        model_kwargs: dict[str, Any] | None = None,
    ):
        # Timeout in seconds for each socket read operation (i.e., max time between
        # receiving data chunks/tokens). This is NOT a total request timeout - a
        # request can run indefinitely as long as data keeps arriving within this
        # window. If the LLM pauses for longer than this timeout between chunks,
        # a ReadTimeout is raised.
        self._timeout = timeout
        if timeout is None:
            self._timeout = LLM_SOCKET_READ_TIMEOUT

        self._temperature = GEN_AI_TEMPERATURE if temperature is None else temperature

        self._model_provider = model_provider
        self._model_version = model_name
        self._api_key = api_key
        self._deployment_name = deployment_name
        self._api_base = api_base
        self._api_version = api_version
        self._custom_llm_provider = custom_llm_provider
        self._max_input_tokens = max_input_tokens
        self._custom_config = custom_config

        # Create a dictionary for model-specific arguments if it's None
        model_kwargs = model_kwargs or {}

        if custom_config:
            for k, v in custom_config.items():
                if model_provider == LlmProviderNames.VERTEX_AI:
                    if k == VERTEX_CREDENTIALS_FILE_KWARG:
                        model_kwargs[k] = v
                    elif k == VERTEX_CREDENTIALS_FILE_KWARG_ENV_VAR_FORMAT:
                        model_kwargs[VERTEX_CREDENTIALS_FILE_KWARG] = v
                    elif k == VERTEX_LOCATION_KWARG:
                        model_kwargs[k] = v
                elif model_provider == LlmProviderNames.OLLAMA_CHAT:
                    if k == OLLAMA_API_KEY_CONFIG_KEY:
                        model_kwargs["api_key"] = v
                elif model_provider == LlmProviderNames.LM_STUDIO:
                    if k == LM_STUDIO_API_KEY_CONFIG_KEY:
                        model_kwargs["api_key"] = v
                elif model_provider == LlmProviderNames.BEDROCK:
                    if k == AWS_REGION_NAME_KWARG:
                        model_kwargs[k] = v
                    elif k == AWS_REGION_NAME_KWARG_ENV_VAR_FORMAT:
                        model_kwargs[AWS_REGION_NAME_KWARG] = v
                    elif k == AWS_BEARER_TOKEN_BEDROCK_KWARG_ENV_VAR_FORMAT:
                        model_kwargs["api_key"] = v
                    elif k == AWS_ACCESS_KEY_ID_KWARG:
                        model_kwargs[k] = v
                    elif k == AWS_ACCESS_KEY_ID_KWARG_ENV_VAR_FORMAT:
                        model_kwargs[AWS_ACCESS_KEY_ID_KWARG] = v
                    elif k == AWS_SECRET_ACCESS_KEY_KWARG:
                        model_kwargs[k] = v
                    elif k == AWS_SECRET_ACCESS_KEY_KWARG_ENV_VAR_FORMAT:
                        model_kwargs[AWS_SECRET_ACCESS_KEY_KWARG] = v

        # LM Studio: LiteLLM defaults to "fake-api-key" when no key is provided,
        # which LM Studio rejects. Ensure we always pass an explicit key (or empty
        # string) to prevent LiteLLM from injecting its fake default.
        if model_provider == LlmProviderNames.LM_STUDIO:
            model_kwargs.setdefault("api_key", "")

            # Users provide the server root (e.g. http://localhost:1234) but LiteLLM
            # needs /v1 for OpenAI-compatible calls.
            if self._api_base is not None:
                base = self._api_base.rstrip("/")
                self._api_base = base if base.endswith("/v1") else f"{base}/v1"
                model_kwargs["api_base"] = self._api_base

        # Default vertex_location to "global" if not provided for Vertex AI
        # Latest gemini models are only available through the global region
        if (
            model_provider == LlmProviderNames.VERTEX_AI
            and VERTEX_LOCATION_KWARG not in model_kwargs
        ):
            model_kwargs[VERTEX_LOCATION_KWARG] = "global"

        # Bifrost: OpenAI-compatible proxy that expects model names in
        # provider/model format (e.g. "anthropic/claude-sonnet-4-6").
        # We route through LiteLLM's openai provider with the Bifrost base URL,
        # and ensure /v1 is appended.
        if model_provider == LlmProviderNames.BIFROST:
            self._custom_llm_provider = "openai"
            if self._api_base is not None:
                base = self._api_base.rstrip("/")
                self._api_base = base if base.endswith("/v1") else f"{base}/v1"
                model_kwargs["api_base"] = self._api_base

        # This is needed for Ollama to do proper function calling
        if model_provider == LlmProviderNames.OLLAMA_CHAT and api_base is not None:
            model_kwargs["api_base"] = api_base
        if extra_headers:
            model_kwargs.update({"extra_headers": extra_headers})
        if extra_body:
            model_kwargs.update({"extra_body": extra_body})

        self._model_kwargs = model_kwargs

    def _safe_model_config(self) -> dict:
        dump = self.config.model_dump()
        dump["api_key"] = mask_string(dump.get("api_key") or "")
        custom_config = dump.get("custom_config")
        if isinstance(custom_config, dict):
            # Mask sensitive values in custom_config
            masked_config = {}
            for k, v in custom_config.items():
                masked_config[k] = mask_string(v) if v else v
            dump["custom_config"] = masked_config
        return dump

    def _track_llm_cost(self, usage: Usage) -> None:
        """
        Track LLM usage cost for Onyx-managed API keys.

        This is called after every LLM call completes (streaming or non-streaming).
        Cost is only tracked if:
        1. Usage limits are enabled for this deployment
        2. The API key is one of Onyx's managed default keys
        """

        from onyx.server.usage_limits import is_usage_limits_enabled

        if not is_usage_limits_enabled():
            return

        from onyx.server.usage_limits import is_onyx_managed_api_key

        if not is_onyx_managed_api_key(self._api_key):
            return
        # Import here to avoid circular imports
        from onyx.db.engine.sql_engine import get_session_with_current_tenant
        from onyx.db.usage import increment_usage
        from onyx.db.usage import UsageType

        # Calculate cost in cents
        cost_cents = calculate_llm_cost_cents(
            model_name=self._model_version,
            prompt_tokens=usage.prompt_tokens,
            completion_tokens=usage.completion_tokens,
        )

        if cost_cents <= 0:
            return

        try:
            with get_session_with_current_tenant() as db_session:
                increment_usage(db_session, UsageType.LLM_COST, cost_cents)
                db_session.commit()
        except Exception as e:
            # Log but don't fail the LLM call if tracking fails
            logger.warning(f"Failed to track LLM cost: {e}")

    def _completion(
        self,
        prompt: LanguageModelInput,
        tools: list[dict] | None,
        tool_choice: ToolChoiceOptions | None,
        stream: bool,
        parallel_tool_calls: bool,
        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
        max_tokens: int | None = None,
        user_identity: LLMUserIdentity | None = None,
        client: "HTTPHandler | None" = None,
    ) -> Union["ModelResponse", "CustomStreamWrapper"]:
        # Lazy loading to avoid memory bloat for non-inference flows
        from onyx.llm.litellm_singleton import litellm
        from litellm.exceptions import Timeout, RateLimitError

        #########################
        # Flags that modify the final arguments
        #########################
        is_claude_model = "claude" in self.config.model_name.lower()
        is_reasoning = model_is_reasoning_model(
            self.config.model_name, self.config.model_provider
        )
        # All OpenAI models will use responses API for consistency
        # Responses API is needed to get reasoning packets from OpenAI models
        is_openai_model = is_true_openai_model(
            self.config.model_provider, self.config.model_name
        )
        is_ollama = self._model_provider == LlmProviderNames.OLLAMA_CHAT
        is_mistral = self._model_provider == LlmProviderNames.MISTRAL
        is_vertex_ai = self._model_provider == LlmProviderNames.VERTEX_AI
        # Some Vertex Anthropic models reject output_config.
        # Keep this guard until LiteLLM/Vertex accept the field for these models.
        is_vertex_model_rejecting_output_config = (
            is_vertex_ai
            and _is_vertex_model_rejecting_output_config(self.config.model_name)
        )

        #########################
        # Build arguments
        #########################
        # Optional kwargs - should only be passed to LiteLLM under certain conditions
        optional_kwargs: dict[str, Any] = {}

        # Model name
        is_bifrost = self._model_provider == LlmProviderNames.BIFROST
        model_provider = (
            f"{self.config.model_provider}/responses"
            if is_openai_model  # Uses litellm's completions -> responses bridge
            else self.config.model_provider
        )
        if is_bifrost:
            # Bifrost expects model names in provider/model format
            # (e.g. "anthropic/claude-sonnet-4-6") sent directly to its
            # OpenAI-compatible endpoint. We use custom_llm_provider="openai"
            # so LiteLLM doesn't try to route based on the provider prefix.
            model = self.config.deployment_name or self.config.model_name
        else:
            model = f"{model_provider}/{self.config.deployment_name or self.config.model_name}"

        # Tool choice
        if is_claude_model and tool_choice == ToolChoiceOptions.REQUIRED:
            # Claude models will not use reasoning if tool_choice is required
            # let it choose tools automatically so reasoning can still be used
            tool_choice = ToolChoiceOptions.AUTO

        # If no tools are provided, tool_choice should be None
        if not tools:
            tool_choice = None

        # Temperature
        temperature = 1 if is_reasoning else self._temperature

        if stream and not is_vertex_model_rejecting_output_config:
            optional_kwargs["stream_options"] = {"include_usage": True}

        # Note, there is a reasoning_effort parameter in LiteLLM but it is completely jank and does not work for any
        # of the major providers. Not setting it sets it to OFF.
        if (
            is_reasoning
            # The default of this parameter not set is surprisingly not the equivalent of an Auto but is actually Off
            and reasoning_effort != ReasoningEffort.OFF
            and not is_vertex_model_rejecting_output_config
        ):
            if is_openai_model:
                # OpenAI API does not accept reasoning params for GPT 5 chat models
                # (neither reasoning nor reasoning_effort are accepted)
                # even though they are reasoning models (bug in OpenAI)
                if "-chat" not in model:
                    optional_kwargs["reasoning"] = {
                        "effort": OPENAI_REASONING_EFFORT[reasoning_effort],
                        "summary": "auto",
                    }

            elif is_claude_model:
                budget_tokens: int | None = ANTHROPIC_REASONING_EFFORT_BUDGET.get(
                    reasoning_effort
                )

                # Anthropic requires every assistant message with tool_use
                # blocks to start with a thinking block that carries a
                # cryptographic signature.  We don't preserve those blocks
                # across turns, so skip thinking when the history already
                # contains tool-calling assistant messages.  LiteLLM's
                # modify_params workaround doesn't cover all providers
                # (notably Bedrock).
                can_enable_thinking = (
                    budget_tokens is not None
                    and not _prompt_contains_tool_call_history(prompt)
                )

                if can_enable_thinking:
                    assert budget_tokens is not None  # mypy
                    if max_tokens is not None:
                        # Anthropic has a weird rule where max token has to be at least as much as budget tokens if set
                        # and the minimum budget tokens is 1024
                        # Will note that overwriting a developer set max tokens is not ideal but is the best we can do for now
                        # It is better to allow the LLM to output more reasoning tokens even if it results in a fairly small tool
                        # call as compared to reducing the budget for reasoning.
                        max_tokens = max(budget_tokens + 1, max_tokens)
                    optional_kwargs["thinking"] = {
                        "type": "enabled",
                        "budget_tokens": budget_tokens,
                    }

                # LiteLLM just does some mapping like this anyway but is incomplete for Anthropic
                optional_kwargs.pop("reasoning_effort", None)

            else:
                # Hope for the best from LiteLLM
                if reasoning_effort in [
                    ReasoningEffort.LOW,
                    ReasoningEffort.MEDIUM,
                    ReasoningEffort.HIGH,
                ]:
                    optional_kwargs["reasoning_effort"] = reasoning_effort.value
                else:
                    optional_kwargs["reasoning_effort"] = ReasoningEffort.MEDIUM.value

        if tools:
            # OpenAI will error if parallel_tool_calls is True and tools are not specified
            optional_kwargs["parallel_tool_calls"] = parallel_tool_calls

        if structured_response_format:
            optional_kwargs["response_format"] = structured_response_format

        if not (is_claude_model or is_ollama or is_mistral) or is_bifrost:
            # Litellm bug: tool_choice is dropped silently if not specified here for OpenAI
            # However, this param breaks Anthropic and Mistral models,
            # so it must be conditionally included unless the request is
            # routed through Bifrost's OpenAI-compatible endpoint.
            # Additionally, tool_choice is not supported by Ollama and causes warnings if included.
            # See also, https://github.com/ollama/ollama/issues/11171
            optional_kwargs["allowed_openai_params"] = ["tool_choice"]

        # Passthrough kwargs
        passthrough_kwargs = build_litellm_passthrough_kwargs(
            model_kwargs=self._model_kwargs,
            user_identity=user_identity,
        )

        try:
            # NOTE: must pass in None instead of empty strings otherwise litellm
            # can have some issues with bedrock.
            # NOTE: Sometimes _model_kwargs may have an "api_key" kwarg
            # depending on what the caller passes in for custom_config. If it
            # does we allow it to clobber _api_key.
            if "api_key" not in passthrough_kwargs:
                passthrough_kwargs["api_key"] = self._api_key or None

            # We only need to set environment variables if custom config is set
            env_ctx = (
                temporary_env_and_lock(self._custom_config)
                if self._custom_config
                else nullcontext()
            )
            with env_ctx:
                messages = _prompt_to_dicts(prompt)

                # Bedrock's Converse API requires toolConfig when messages
                # contain toolUse/toolResult content blocks. When no tools are
                # provided for this request but the history contains tool
                # content from previous turns, strip it to plain text.
                is_bedrock = self._model_provider in {
                    LlmProviderNames.BEDROCK,
                    LlmProviderNames.BEDROCK_CONVERSE,
                }
                if (
                    is_bedrock
                    and not tools
                    and _messages_contain_tool_content(messages)
                ):
                    messages = _strip_tool_content_from_messages(messages)

                # Some models (e.g. Mistral) reject a user message
                # immediately after a tool message. Insert a synthetic
                # assistant bridge message to satisfy the ordering
                # constraint. Check both the provider and the deployment/
                # model name to catch Mistral hosted on Azure.
                model_or_deployment = (
                    self._deployment_name or self._model_version or ""
                ).lower()
                is_mistral_model = is_mistral or "mistral" in model_or_deployment
                if is_mistral_model:
                    messages = _fix_tool_user_message_ordering(messages)

                # Only pass tool_choice when tools are present — some providers (e.g. Fireworks)
                # reject requests where tool_choice is explicitly null.
                if tools and tool_choice is not None:
                    optional_kwargs["tool_choice"] = tool_choice

                response = litellm.completion(
                    mock_response=get_llm_mock_response() or MOCK_LLM_RESPONSE,
                    model=model,
                    base_url=self._api_base or None,
                    api_version=self._api_version or None,
                    custom_llm_provider=self._custom_llm_provider or None,
                    messages=messages,
                    tools=tools,
                    stream=stream,
                    temperature=temperature,
                    timeout=timeout_override or self._timeout,
                    max_tokens=max_tokens,
                    client=client,
                    **optional_kwargs,
                    **passthrough_kwargs,
                )
            return response
        except Exception as e:
            # for break pointing
            if isinstance(e, Timeout):
                raise LLMTimeoutError(e)

            elif isinstance(e, RateLimitError):
                raise LLMRateLimitError(e)

            raise e

    @property
    def config(self) -> LLMConfig:
        return LLMConfig(
            model_provider=self._model_provider,
            model_name=self._model_version,
            temperature=self._temperature,
            api_key=self._api_key,
            api_base=self._api_base,
            api_version=self._api_version,
            deployment_name=self._deployment_name,
            custom_config=self._custom_config,
            max_input_tokens=self._max_input_tokens,
        )

    def invoke(
        self,
        prompt: LanguageModelInput,
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
        max_tokens: int | None = None,
        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,
        user_identity: LLMUserIdentity | None = None,
    ) -> ModelResponse:
        from litellm import HTTPHandler
        from litellm import ModelResponse as LiteLLMModelResponse

        from onyx.llm.model_response import from_litellm_model_response

        # HTTPHandler Threading & Connection Pool Notes:
        # =============================================
        # We create an isolated HTTPHandler ONLY for true OpenAI models (not OpenAI-compatible
        # providers like glm-4.7, DeepSeek, etc.). This distinction is critical:
        #
        # 1. WHY ONLY TRUE OPENAI MODELS:
        #    - True OpenAI models use litellm's "responses API" path which expects HTTPHandler
        #    - OpenAI-compatible providers (model_provider="openai" with non-OpenAI models)
        #      use the standard completion path which expects OpenAI SDK client objects
        #    - Passing HTTPHandler to OpenAI-compatible providers causes:
        #      AttributeError: 'HTTPHandler' object has no attribute 'api_key'
        #      (because _get_openai_client() calls openai_client.api_key on line ~929)
        #
        # 2. WHY ISOLATED HTTPHandler FOR OPENAI:
        #    - Prevents "Bad file descriptor" errors when multiple threads stream concurrently
        #    - Shared connection pools can have stale connections or abandoned streams that
        #      corrupt the pool state for other threads
        #    - Each request gets its own fresh httpx.Client via HTTPHandler
        #
        # 3. WHY OTHER PROVIDERS DON'T NEED THIS:
        #    - Other providers (Anthropic, Bedrock, etc.) use litellm.module_level_client
        #      which handles concurrency appropriately
        #    - httpx.Client itself IS thread-safe for concurrent requests
        #    - The issue is specific to OpenAI's responses API path and connection reuse
        #
        # 4. PITFALL - is_true_openai_model() CHECK:
        #    - Must use is_true_openai_model() NOT just check model_provider == "openai"
        #    - Many OpenAI-compatible providers set model_provider="openai" but are NOT true
        #      OpenAI models (glm-4.7, DeepSeek, local proxies, etc.)
        #    - is_true_openai_model() checks both provider AND model name patterns
        #
        # This note may not be entirely accurate as there is a lot of complexity in the LiteLLM codebase around this
        # and not every model path was traced thoroughly. It is also possible that in future versions of LiteLLM
        # they will realize that their OpenAI handling is not threadsafe. Hope they will just fix it.
        client = None
        if is_true_openai_model(self.config.model_provider, self.config.model_name):
            client = HTTPHandler(timeout=timeout_override or self._timeout)

        try:
            # When custom_config is set, env vars are temporarily injected
            # under a global lock. Using stream=True here means the lock is
            # only held during connection setup (not the full inference).
            # The chunks are then collected outside the lock and reassembled
            # into a single ModelResponse via stream_chunk_builder.
            from litellm import stream_chunk_builder
            from litellm import CustomStreamWrapper as LiteLLMCustomStreamWrapper

            stream_response = cast(
                LiteLLMCustomStreamWrapper,
                self._completion(
                    prompt=prompt,
                    tools=tools,
                    tool_choice=tool_choice,
                    stream=True,
                    structured_response_format=structured_response_format,
                    timeout_override=timeout_override,
                    max_tokens=max_tokens,
                    parallel_tool_calls=True,
                    reasoning_effort=reasoning_effort,
                    user_identity=user_identity,
                    client=client,
                ),
            )
            chunks = list(stream_response)
            response = cast(
                LiteLLMModelResponse,
                stream_chunk_builder(chunks),
            )

            model_response = from_litellm_model_response(response)

            # Track LLM cost for Onyx-managed API keys
            if model_response.usage:
                self._track_llm_cost(model_response.usage)

            return model_response
        finally:
            if client is not None:
                client.close()

    def stream(
        self,
        prompt: LanguageModelInput,
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
        max_tokens: int | None = None,
        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,
        user_identity: LLMUserIdentity | None = None,
    ) -> Iterator[ModelResponseStream]:
        from litellm import CustomStreamWrapper as LiteLLMCustomStreamWrapper
        from litellm import HTTPHandler

        from onyx.llm.model_response import from_litellm_model_response_stream

        # HTTPHandler Threading & Connection Pool Notes:
        # =============================================
        # See invoke() method for full explanation. Key points for streaming:
        #
        # 1. SAME RESTRICTIONS APPLY:
        #    - HTTPHandler ONLY for true OpenAI models (use is_true_openai_model())
        #    - OpenAI-compatible providers will fail with AttributeError on api_key
        #
        # 2. STREAMING-SPECIFIC CONCERNS:
        #    - "Bad file descriptor" errors are MORE common during streaming because:
        #      a) Streams hold connections open longer, increasing conflict window
        #      b) Multiple concurrent streams (e.g., deep research) share the pool
        #      c) Abandoned/interrupted streams can leave connections in bad state
        #
        # 3. ABANDONED STREAM PITFALL:
        #    - If callers abandon this generator without fully consuming it (e.g.,
        #      early return, exception, or break), the finally block won't execute
        #      until the generator is garbage collected
        #    - This is acceptable because:
        #      a) CPython's refcounting typically finalizes generators promptly
        #      b) Each HTTPHandler has its own isolated connection pool
        #      c) httpx has built-in connection timeouts as a fallback
        #    - If abandoned streams become problematic, consider using contextlib
        #      or explicit stream.close() at call sites
        #
        # 4. WHY NOT USE SHARED HTTPHandler:
        #    - litellm's InMemoryCache (used for client caching) is NOT thread-safe
        #    - Shared pools can have connections corrupted by other threads
        #    - Per-request HTTPHandler eliminates cross-thread interference
        client = None
        if is_true_openai_model(self.config.model_provider, self.config.model_name):
            client = HTTPHandler(timeout=timeout_override or self._timeout)

        try:
            response = cast(
                LiteLLMCustomStreamWrapper,
                self._completion(
                    prompt=prompt,
                    tools=tools,
                    tool_choice=tool_choice,
                    stream=True,
                    structured_response_format=structured_response_format,
                    timeout_override=timeout_override,
                    max_tokens=max_tokens,
                    parallel_tool_calls=True,
                    reasoning_effort=reasoning_effort,
                    user_identity=user_identity,
                    client=client,
                ),
            )

            for chunk in response:
                model_response = from_litellm_model_response_stream(chunk)

                # Track LLM cost when usage info is available (typically in the last chunk)
                if model_response.usage:
                    self._track_llm_cost(model_response.usage)

                yield model_response
        finally:
            if client is not None:
                client.close()


@contextmanager
def temporary_env_and_lock(env_variables: dict[str, str]) -> Iterator[None]:
    """
    Temporarily sets the environment variables to the given values.
    Code path is locked while the environment variables are set.
    Then cleans up the environment and frees the lock.
    """
    with _env_lock:
        logger.debug("Acquired lock in temporary_env_and_lock")
        # Store original values (None if key didn't exist)
        original_values: dict[str, str | None] = {
            key: os.environ.get(key) for key in env_variables
        }
        try:
            os.environ.update(env_variables)
            yield
        finally:
            for key, original_value in original_values.items():
                if original_value is None:
                    os.environ.pop(key, None)  # Remove if it didn't exist before
                else:
                    os.environ[key] = original_value  # Restore original value

    logger.debug("Released lock in temporary_env_and_lock")


================================================
FILE: backend/onyx/llm/override_models.py
================================================
"""Overrides sent over the wire / stored in the DB

NOTE: these models are used in many places, so have to be
kepy in a separate file to avoid circular imports.
"""

from pydantic import BaseModel


class LLMOverride(BaseModel):
    """Per-request LLM settings that override persona defaults.

    All fields are optional — only the fields that differ from the persona's
    configured LLM need to be supplied. Used both over the wire (API requests)
    and for multi-model comparison, where one override is supplied per model.

    Attributes:
        model_provider: LLM provider slug (e.g. ``"openai"``, ``"anthropic"``).
            When ``None``, the persona's default provider is used.
        model_version: Specific model version string (e.g. ``"gpt-4o"``).
            When ``None``, the persona's default model is used.
        temperature: Sampling temperature in ``[0, 2]``. When ``None``, the
            persona's default temperature is used.
        display_name: Human-readable label shown in the UI for this model,
            e.g. ``"GPT-4 Turbo"``. Optional; falls back to ``model_version``
            when not set.
    """

    model_provider: str | None = None
    model_version: str | None = None
    temperature: float | None = None
    display_name: str | None = None

    # This disables the "model_" protected namespace for pydantic
    model_config = {"protected_namespaces": ()}


class PromptOverride(BaseModel):
    system_prompt: str | None = None
    task_prompt: str | None = None


================================================
FILE: backend/onyx/llm/prompt_cache/README.md
================================================
# Prompt Caching Framework

A comprehensive prompt-caching mechanism for enabling cost savings across multiple LLM providers by leveraging provider-side prompt token caching.

## Overview

The prompt caching framework provides a unified interface for enabling prompt caching across different LLM providers. It supports both **implicit caching** (automatic provider-side caching) and **explicit caching** (with cache control parameters).

## Features

- **Provider Support**: OpenAI (implicit), Anthropic (explicit), Vertex AI (explicit)
- **Flexible Input**: Supports both `str` and `Sequence[ChatCompletionMessage]` inputs
- **Continuation Handling**: Smart merging of cacheable prefix and suffix messages
- **Best-Effort**: Gracefully degrades if caching fails
- **Tenant-Aware**: Automatic tenant isolation for multi-tenant deployments
- **Configurable**: Enable/disable via environment variable

## Quick Start

### Basic Usage

```python
from onyx.llm.prompt_cache import process_with_prompt_cache
from onyx.llm.models import SystemMessage, UserMessage

# Assume you have an LLM instance with a config property
# llm = get_your_llm_instance()

# Define cacheable prefix (static context) using Pydantic message models
cacheable_prefix = [
    SystemMessage(role="system", content="You are a helpful assistant."),
    UserMessage(role="user", content="Context: ...")  # Static context
]

# Define suffix (dynamic user input)
suffix = [UserMessage(role="user", content="What is the weather?")]

# Process with caching - pass llm_config, not the llm instance
processed_prompt, cache_metadata = process_with_prompt_cache(
    llm_config=llm.config,
    cacheable_prefix=cacheable_prefix,
    suffix=suffix,
    continuation=False,
)

# Make LLM call with processed prompt
response = llm.invoke(processed_prompt)
```

### Using String Inputs

```python
# Both prefix and suffix can be strings
cacheable_prefix = "You are a helpful assistant. Context: ..."
suffix = "What is the weather?"

processed_prompt, cache_metadata = process_with_prompt_cache(
    llm_config=llm.config,
    cacheable_prefix=cacheable_prefix,
    suffix=suffix,
    continuation=False,
)

response = llm.invoke(processed_prompt)
```

### Continuation Flag

When `continuation=True`, the suffix is appended to the last message of the cacheable prefix:

```python
# Without continuation (default)
# Result: [system_msg, prefix_user_msg, suffix_user_msg]

# With continuation=True
# Result: [system_msg, prefix_user_msg + suffix_user_msg]
processed_prompt, _ = process_with_prompt_cache(
    llm_config=llm.config,
    cacheable_prefix=cacheable_prefix,
    suffix=suffix,
    continuation=True,  # Merge suffix into last prefix message
)
```

**Note**: If `cacheable_prefix` is a string, it remains in its own content block even when `continuation=True`.

## Provider-Specific Behavior

### OpenAI
- **Caching Type**: Implicit (automatic)
- **Behavior**: No special parameters needed. Provider automatically caches prefixes >1024 tokens.
- **Cache Lifetime**: Up to 1 hour
- **Cost Savings**: 50% discount on cached tokens

### Anthropic
- **Caching Type**: Explicit (requires `cache_control` parameter)
- **Behavior**: Automatically adds `cache_control={"type": "ephemeral"}` to the **last message** of the cacheable prefix
- **Cache Lifetime**: 5 minutes (default)
- **Limitations**: Supports up to 4 cache breakpoints

### Vertex AI
- **Caching Type**: Explicit (with `cache_control` parameter)
- **Behavior**: Adds `cache_control={"type": "ephemeral"}` to **all content blocks** in cacheable messages. String content is converted to array format with the cache control attached.
- **Cache Lifetime**: 5 minutes
- **Future**: Full context caching with block number management (deferred to future PR)

## Configuration

### Environment Variables

- `ENABLE_PROMPT_CACHING`: Enable/disable prompt caching (default: `true`)
  ```bash
  export ENABLE_PROMPT_CACHING=false  # Disable caching
  ```

## Architecture

### Core Components

1. **`processor.py`**: Main entry point (`process_with_prompt_cache`)
2. **`cache_manager.py`**: Cache metadata storage and retrieval
3. **`models.py`**: Pydantic models for cache metadata (`CacheMetadata`)
4. **`providers/`**: Provider-specific adapters
5. **`utils.py`**: Shared utility functions

### Provider Adapters

Each provider has its own adapter in `providers/`:

| File | Class | Description |
|------|-------|-------------|
| `base.py` | `PromptCacheProvider` | Abstract base class for all providers |
| `openai.py` | `OpenAIPromptCacheProvider` | Implicit caching (no transformation) |
| `anthropic.py` | `AnthropicPromptCacheProvider` | Explicit caching with `cache_control` on last message |
| `vertex.py` | `VertexAIPromptCacheProvider` | Explicit caching with `cache_control` on all content blocks |
| `noop.py` | `NoOpPromptCacheProvider` | Fallback for unsupported providers |

Each adapter implements:
- `supports_caching()`: Whether caching is supported
- `prepare_messages_for_caching()`: Transform messages for caching
- `extract_cache_metadata()`: Extract metadata from responses
- `get_cache_ttl_seconds()`: Cache TTL

## Best Practices

1. **Cache Static Content**: Use cacheable prefix for system prompts, static context, and instructions that don't change between requests.

2. **Keep Dynamic Content in Suffix**: User queries, search results, and other dynamic content should be in the suffix.

3. **Monitor Cache Effectiveness**: Check logs for cache hits/misses and adjust your caching strategy accordingly.

4. **Provider Selection**: Different providers have different caching characteristics - choose based on your use case.

## Error Handling

The framework is **best-effort** - if caching fails, it gracefully falls back to non-cached behavior:

- Cache lookup failures: Logged and continue without caching
- Provider adapter failures: Fall back to no-op adapter
- Cache storage failures: Logged and continue (caching is best-effort)
- Invalid cache metadata: Cleared and proceed without cache

## Future Enhancements

- **Explicit Caching for Vertex AI**: Full block number tracking and management
- **Cache Analytics**: Detailed metrics on cache effectiveness and cost savings
- **Advanced Strategies**: More sophisticated cache key generation and invalidation
- **Distributed Caching**: Shared caches across instances

## Examples

See `backend/tests/external_dependency_unit/llm/test_prompt_caching.py` for detailed integration test examples.


================================================
FILE: backend/onyx/llm/prompt_cache/__init__.py
================================================
"""Prompt caching framework for LLM providers.

This module provides a framework for enabling prompt caching across different
LLM providers. It supports both implicit caching (automatic provider-side caching)
and explicit caching (with cache metadata management).
"""

from onyx.llm.prompt_cache.cache_manager import CacheManager
from onyx.llm.prompt_cache.cache_manager import generate_cache_key_hash
from onyx.llm.prompt_cache.models import CacheMetadata
from onyx.llm.prompt_cache.processor import process_with_prompt_cache
from onyx.llm.prompt_cache.providers.anthropic import AnthropicPromptCacheProvider
from onyx.llm.prompt_cache.providers.base import PromptCacheProvider
from onyx.llm.prompt_cache.providers.factory import get_provider_adapter
from onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider
from onyx.llm.prompt_cache.providers.openai import OpenAIPromptCacheProvider
from onyx.llm.prompt_cache.providers.vertex import VertexAIPromptCacheProvider
from onyx.llm.prompt_cache.utils import combine_messages_with_continuation
from onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform

__all__ = [
    "AnthropicPromptCacheProvider",
    "CacheManager",
    "CacheMetadata",
    "combine_messages_with_continuation",
    "generate_cache_key_hash",
    "get_provider_adapter",
    "NoOpPromptCacheProvider",
    "OpenAIPromptCacheProvider",
    "prepare_messages_with_cacheable_transform",
    "process_with_prompt_cache",
    "PromptCacheProvider",
    "VertexAIPromptCacheProvider",
]


================================================
FILE: backend/onyx/llm/prompt_cache/cache_manager.py
================================================
"""Cache manager for storing and retrieving prompt cache metadata."""

import hashlib
import json
from datetime import datetime
from datetime import timezone

from onyx.configs.model_configs import PROMPT_CACHE_REDIS_TTL_MULTIPLIER
from onyx.key_value_store.store import PgRedisKVStore
from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.prompt_cache.models import CacheMetadata
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

REDIS_KEY_PREFIX = "prompt_cache:"
# Cache TTL multiplier - store caches slightly longer than provider TTL
# This allows for some clock skew and ensures we don't lose cache metadata prematurely
# Value is configurable via PROMPT_CACHE_REDIS_TTL_MULTIPLIER env var (default: 1.2)
CACHE_TTL_MULTIPLIER = PROMPT_CACHE_REDIS_TTL_MULTIPLIER


class CacheManager:
    """Manages storage and retrieval of prompt cache metadata."""

    def __init__(self, kv_store: PgRedisKVStore | None = None) -> None:
        """Initialize the cache manager.

        Args:
            kv_store: Optional key-value store. If None, creates a new PgRedisKVStore.
        """
        self._kv_store = kv_store or PgRedisKVStore()

    def _build_cache_key(
        self,
        provider: str,
        model_name: str,
        cache_key_hash: str,
        tenant_id: str | None = None,
    ) -> str:
        """Build a Redis/PostgreSQL key for cache metadata.

        Args:
            provider: LLM provider name (e.g., "openai", "anthropic")
            model_name: Model name
            cache_key_hash: Hash of the cacheable prefix content
            tenant_id: Tenant ID. If None, uses current tenant from context.

        Returns:
            Cache key string
        """
        if tenant_id is None:
            tenant_id = get_current_tenant_id()
        return f"{REDIS_KEY_PREFIX}{tenant_id}:{provider}:{model_name}:{cache_key_hash}"

    def store_cache_metadata(
        self,
        metadata: CacheMetadata,
    ) -> None:
        """Store cache metadata.

        Args:
            metadata: Cache metadata to store
            ttl_seconds: Optional TTL in seconds. If None, uses provider default.
        """
        try:
            cache_key = self._build_cache_key(
                metadata.provider,
                metadata.model_name,
                metadata.cache_key,
                metadata.tenant_id,
            )

            # Update last_accessed timestamp
            metadata.last_accessed = datetime.now(timezone.utc)

            # Serialize metadata
            metadata_dict = metadata.model_dump(mode="json")

            # Store in key-value store
            # Note: PgRedisKVStore doesn't support TTL directly, but Redis will
            # handle expiration. For PostgreSQL persistence, we rely on cleanup
            # based on last_accessed timestamp.
            self._kv_store.store(cache_key, metadata_dict, encrypt=False)

            logger.debug(
                f"Stored cache metadata: provider={metadata.provider}, "
                f"model={metadata.model_name}, cache_key={metadata.cache_key[:16]}..., "
                f"tenant_id={metadata.tenant_id}"
            )
        except Exception as e:
            # Best-effort: log and continue
            logger.warning(f"Failed to store cache metadata: {str(e)}")

    def retrieve_cache_metadata(
        self,
        provider: str,
        model_name: str,
        cache_key_hash: str,
        tenant_id: str | None = None,
    ) -> CacheMetadata | None:
        """Retrieve cache metadata.

        Args:
            provider: LLM provider name
            model_name: Model name
            cache_key_hash: Hash of the cacheable prefix content
            tenant_id: Tenant ID. If None, uses current tenant from context.

        Returns:
            CacheMetadata if found, None otherwise
        """
        try:
            cache_key = self._build_cache_key(
                provider, model_name, cache_key_hash, tenant_id
            )
            metadata_dict = self._kv_store.load(cache_key, refresh_cache=False)

            # Deserialize metadata
            metadata = CacheMetadata.model_validate(metadata_dict)

            # Update last_accessed timestamp
            metadata.last_accessed = datetime.now(timezone.utc)
            self.store_cache_metadata(metadata)

            logger.debug(
                f"Retrieved cache metadata: provider={provider}, "
                f"model={model_name}, cache_key={cache_key_hash[:16]}..., "
                f"tenant_id={tenant_id}"
            )
            return metadata
        except Exception as e:
            # Best-effort: log and continue
            logger.debug(f"Cache metadata not found or error retrieving: {str(e)}")
            return None

    def delete_cache_metadata(
        self,
        provider: str,
        model_name: str,
        cache_key_hash: str,
        tenant_id: str | None = None,
    ) -> None:
        """Delete cache metadata.

        Args:
            provider: LLM provider name
            model_name: Model name
            cache_key_hash: Hash of the cacheable prefix content
            tenant_id: Tenant ID. If None, uses current tenant from context.
        """
        try:
            cache_key = self._build_cache_key(
                provider, model_name, cache_key_hash, tenant_id
            )
            self._kv_store.delete(cache_key)
            logger.debug(
                f"Deleted cache metadata for provider={provider}, model={model_name}, cache_key={cache_key_hash[:16]}..."
            )
        except Exception as e:
            # Best-effort: log and continue
            logger.warning(f"Failed to delete cache metadata: {str(e)}")


def _make_json_serializable(obj: object) -> object:
    """Recursively convert objects to JSON-serializable types.

    Handles Pydantic models, dicts, lists, and other common types.
    """
    if hasattr(obj, "model_dump"):
        # Pydantic v2 model
        return obj.model_dump(mode="json")
    elif hasattr(obj, "dict"):
        # Pydantic v1 model or similar
        return _make_json_serializable(obj.dict())
    elif isinstance(obj, dict):
        return {k: _make_json_serializable(v) for k, v in obj.items()}
    elif isinstance(obj, (list, tuple)):
        return [_make_json_serializable(item) for item in obj]
    elif isinstance(obj, (str, int, float, bool, type(None))):
        return obj
    else:
        # Fallback: convert to string representation
        return str(obj)


def generate_cache_key_hash(
    cacheable_prefix: LanguageModelInput,
    provider: str,
    model_name: str,
    tenant_id: str,
) -> str:
    """Generate a deterministic cache key hash from cacheable prefix.

    Args:
        cacheable_prefix: Single message or list of messages to hash
        provider: LLM provider name
        model_name: Model name
        tenant_id: Tenant ID

    Returns:
        SHA256 hash as hex string
    """
    # Normalize to list for consistent hashing; _make_json_serializable handles Pydantic models
    messages = (
        cacheable_prefix if isinstance(cacheable_prefix, list) else [cacheable_prefix]
    )
    messages_dict = [_make_json_serializable(msg) for msg in messages]

    # Serialize messages in a deterministic way
    # Include only content, roles, and order - exclude timestamps or dynamic fields
    serialized = json.dumps(
        {
            "messages": messages_dict,
            "provider": provider,
            "model": model_name,
            "tenant_id": tenant_id,
        },
        sort_keys=True,
        separators=(",", ":"),
    )
    return hashlib.sha256(serialized.encode("utf-8")).hexdigest()


================================================
FILE: backend/onyx/llm/prompt_cache/models.py
================================================
"""Interfaces and data structures for prompt caching."""

from datetime import datetime

from pydantic import BaseModel


class CacheMetadata(BaseModel):
    """Metadata for cached prompt prefixes."""

    cache_key: str
    provider: str
    model_name: str
    tenant_id: str
    created_at: datetime
    last_accessed: datetime
    # Provider-specific metadata
    # TODO: Add explicit caching support in future PR
    # vertex_block_numbers: dict[str, str] | None = None  # message_hash -> block_number
    # anthropic_cache_id: str | None = None


================================================
FILE: backend/onyx/llm/prompt_cache/processor.py
================================================
"""Main processor for prompt caching."""

from datetime import datetime
from datetime import timezone

from onyx.configs.model_configs import ENABLE_PROMPT_CACHING
from onyx.llm.interfaces import LLMConfig
from onyx.llm.models import LanguageModelInput
from onyx.llm.prompt_cache.cache_manager import generate_cache_key_hash
from onyx.llm.prompt_cache.models import CacheMetadata
from onyx.llm.prompt_cache.providers.factory import get_provider_adapter
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


# TODO: test with a history containing images
def process_with_prompt_cache(
    llm_config: LLMConfig,
    cacheable_prefix: LanguageModelInput | None,
    suffix: LanguageModelInput,
    continuation: bool = False,
) -> tuple[LanguageModelInput, CacheMetadata | None]:
    """Process prompt with caching support.

    This function takes a cacheable prefix and suffix, processes them according to
    the LLM provider's caching capabilities, and returns the combined messages
    ready for LLM API calls along with optional cache metadata.

    Args:
        llm: The LLM instance (used to determine provider and model)
        cacheable_prefix: Optional cacheable prefix. If None, no caching is attempted.
        suffix: The non-cacheable suffix to append
        continuation: If True, suffix should be appended to the last message
            of cacheable_prefix rather than being separate messages

    Returns:
        Tuple of (processed_prompt, cache_metadata_to_store)
        - processed_prompt: Combined and transformed messages ready for LLM API call
        - cache_metadata_to_store: Optional cache metadata for post-processing
            (currently None for implicit caching, will be populated in future PR
            for explicit caching)
    """
    # Check if prompt caching is enabled
    if not ENABLE_PROMPT_CACHING:
        logger.debug("Prompt caching is disabled via configuration")
        # Fall back to no-op behavior
        from onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider

        noop_adapter = NoOpPromptCacheProvider()
        combined = noop_adapter.prepare_messages_for_caching(
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
            cache_metadata=None,
        )
        return combined, None

    # If no cacheable prefix, return suffix unchanged
    if cacheable_prefix is None:
        logger.debug("No cacheable prefix provided, skipping caching")
        return suffix, None

    # Get provider adapter
    provider_adapter = get_provider_adapter(llm_config)

    # If provider doesn't support caching, combine and return unchanged
    if not provider_adapter.supports_caching():
        logger.debug(
            f"Provider {llm_config.model_provider} does not support caching, combining messages without caching"
        )
        # Use no-op adapter to combine messages
        from onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider

        noop_adapter = NoOpPromptCacheProvider()
        combined = noop_adapter.prepare_messages_for_caching(
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
            cache_metadata=None,
        )
        return combined, None

    # Generate cache key for cacheable prefix
    tenant_id = get_current_tenant_id()
    cache_key_hash = generate_cache_key_hash(
        cacheable_prefix=cacheable_prefix,
        provider=llm_config.model_provider,
        model_name=llm_config.model_name,
        tenant_id=tenant_id,
    )

    # For implicit caching: Skip cache lookup (providers handle caching automatically)
    # TODO (explicit caching - future PR): Look up cache metadata in CacheManager
    cache_metadata: CacheMetadata | None = None

    # Use provider adapter to prepare messages with caching
    try:
        processed_prompt = provider_adapter.prepare_messages_for_caching(
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
            cache_metadata=cache_metadata,
        )

        logger.debug(
            f"Processed prompt with caching: provider={llm_config.model_provider}, "
            f"model={llm_config.model_name}, cache_key={cache_key_hash[:16]}..., "
            f"continuation={continuation}"
        )

        # Create cache metadata for tracking (even for implicit caching)
        # This allows us to track cache usage and effectiveness
        cache_metadata = CacheMetadata(
            cache_key=cache_key_hash,
            provider=llm_config.model_provider,
            model_name=llm_config.model_name,
            tenant_id=tenant_id,
            created_at=datetime.now(timezone.utc),
            last_accessed=datetime.now(timezone.utc),
        )

        return processed_prompt, cache_metadata

    except Exception as e:
        # Best-effort: log error and fall back to no-op behavior
        logger.warning(
            f"Error processing prompt with caching for provider={llm_config.model_provider}: {str(e)}. "
            "Falling back to non-cached behavior."
        )
        # Fall back to no-op adapter
        from onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider

        noop_adapter = NoOpPromptCacheProvider()
        combined = noop_adapter.prepare_messages_for_caching(
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
            cache_metadata=None,
        )
        return combined, None


================================================
FILE: backend/onyx/llm/prompt_cache/providers/__init__.py
================================================
"""Provider adapters for prompt caching."""

from onyx.llm.prompt_cache.providers.anthropic import AnthropicPromptCacheProvider
from onyx.llm.prompt_cache.providers.base import PromptCacheProvider
from onyx.llm.prompt_cache.providers.factory import get_provider_adapter
from onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider
from onyx.llm.prompt_cache.providers.openai import OpenAIPromptCacheProvider
from onyx.llm.prompt_cache.providers.vertex import VertexAIPromptCacheProvider

__all__ = [
    "AnthropicPromptCacheProvider",
    "get_provider_adapter",
    "NoOpPromptCacheProvider",
    "OpenAIPromptCacheProvider",
    "PromptCacheProvider",
    "VertexAIPromptCacheProvider",
]


================================================
FILE: backend/onyx/llm/prompt_cache/providers/anthropic.py
================================================
"""Anthropic provider adapter for prompt caching."""

from collections.abc import Sequence

from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.prompt_cache.models import CacheMetadata
from onyx.llm.prompt_cache.providers.base import PromptCacheProvider
from onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform
from onyx.llm.prompt_cache.utils import revalidate_message_from_original


def _add_anthropic_cache_control(
    messages: Sequence[ChatCompletionMessage],
) -> Sequence[ChatCompletionMessage]:
    """Add cache_control parameter to messages for Anthropic caching.

    Args:
        messages: Messages to transform

    Returns:
        Messages with cache_control added
    """
    last_message_dict = dict(messages[-1])
    last_message_dict["cache_control"] = {"type": "ephemeral"}
    last_message = revalidate_message_from_original(
        original=messages[-1], mutated=last_message_dict
    )
    return list(messages[:-1]) + [last_message]


class AnthropicPromptCacheProvider(PromptCacheProvider):
    """Anthropic adapter for prompt caching (explicit caching with cache_control).
    implicit caching = just need to ensure byte-equivalent prefixes, and the provider
                       auto-detects and reuses them.
    explicit caching = the caller must do _something_ to enable provider-side caching.
    In this case, anthropic supports explicit caching via the cache_control parameter:
    https://platform.claude.com/docs/en/build-with-claude/prompt-caching
    """

    def supports_caching(self) -> bool:
        """Anthropic supports explicit prompt caching."""
        return True

    def prepare_messages_for_caching(
        self,
        cacheable_prefix: LanguageModelInput | None,
        suffix: LanguageModelInput,
        continuation: bool,
        cache_metadata: CacheMetadata | None,  # noqa: ARG002
    ) -> LanguageModelInput:
        """Prepare messages for Anthropic caching.

        Anthropic requires cache_control parameter on cacheable messages.
        We add cache_control={"type": "ephemeral"} to all cacheable prefix messages.

        Args:
            cacheable_prefix: Optional cacheable prefix
            suffix: Non-cacheable suffix
            continuation: Whether to append suffix to last prefix message
            cache_metadata: Cache metadata (for future explicit caching support)

        Returns:
            Combined messages with cache_control on cacheable messages
        """
        return prepare_messages_with_cacheable_transform(
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
            transform_cacheable=_add_anthropic_cache_control,
        )

    def extract_cache_metadata(
        self,
        response: dict,  # noqa: ARG002
        cache_key: str,  # noqa: ARG002
    ) -> CacheMetadata | None:
        """Extract cache metadata from Anthropic response.

        Anthropic may return cache identifiers in the response.
        For now, we don't extract detailed metadata (future explicit caching support).

        Args:
            response: Anthropic API response dictionary
            cache_key: Cache key used for this request

        Returns:
            CacheMetadata if extractable, None otherwise
        """
        # TODO: Extract cache identifiers from response when implementing explicit caching
        return None

    def get_cache_ttl_seconds(self) -> int:
        """Get cache TTL for Anthropic (5 minutes default)."""
        return 300


================================================
FILE: backend/onyx/llm/prompt_cache/providers/base.py
================================================
"""Base interface for provider-specific prompt caching adapters."""

from abc import ABC
from abc import abstractmethod

from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.prompt_cache.models import CacheMetadata


class PromptCacheProvider(ABC):
    """Abstract base class for provider-specific prompt caching logic."""

    @abstractmethod
    def supports_caching(self) -> bool:
        """Whether this provider supports prompt caching.

        Returns:
            True if caching is supported, False otherwise
        """
        raise NotImplementedError

    @abstractmethod
    def prepare_messages_for_caching(
        self,
        cacheable_prefix: LanguageModelInput | None,
        suffix: LanguageModelInput,
        continuation: bool,
        cache_metadata: CacheMetadata | None,
    ) -> LanguageModelInput:
        """Transform messages to enable caching.

        Args:
            cacheable_prefix: Optional cacheable prefix (can be str or Sequence[ChatCompletionMessage])
            suffix: Non-cacheable suffix (can be str or Sequence[ChatCompletionMessage])
            continuation: If True, suffix should be appended to the last message
                of cacheable_prefix rather than being separate messages.
                Note: When cacheable_prefix is a string, it should remain in its own
                content block even if continuation=True.
            cache_metadata: Optional cache metadata from previous requests

        Returns:
            Combined and transformed messages ready for LLM API call
        """
        raise NotImplementedError

    @abstractmethod
    def extract_cache_metadata(
        self,
        response: dict,  # Provider-specific response object
        cache_key: str,
    ) -> CacheMetadata | None:
        """Extract cache metadata from API response.

        Args:
            response: Provider-specific response dictionary
            cache_key: Cache key used for this request

        Returns:
            CacheMetadata if extractable, None otherwise
        """
        raise NotImplementedError

    @abstractmethod
    def get_cache_ttl_seconds(self) -> int:
        """Get cache TTL in seconds for this provider.

        Returns:
            TTL in seconds
        """
        raise NotImplementedError


================================================
FILE: backend/onyx/llm/prompt_cache/providers/factory.py
================================================
"""Factory for creating provider-specific prompt cache adapters."""

from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLMConfig
from onyx.llm.prompt_cache.providers.anthropic import AnthropicPromptCacheProvider
from onyx.llm.prompt_cache.providers.base import PromptCacheProvider
from onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider
from onyx.llm.prompt_cache.providers.openai import OpenAIPromptCacheProvider
from onyx.llm.prompt_cache.providers.vertex import VertexAIPromptCacheProvider

ANTHROPIC_BEDROCK_TAG = "anthropic."


def get_provider_adapter(llm_config: LLMConfig) -> PromptCacheProvider:
    """Get the appropriate prompt cache provider adapter for a given provider.

    Args:
        provider: Provider name (e.g., "openai", "anthropic", "vertex_ai")

    Returns:
        PromptCacheProvider instance for the given provider
    """
    if llm_config.model_provider == LlmProviderNames.OPENAI:
        return OpenAIPromptCacheProvider()
    elif llm_config.model_provider == LlmProviderNames.ANTHROPIC or (
        llm_config.model_provider == LlmProviderNames.BEDROCK
        and ANTHROPIC_BEDROCK_TAG in llm_config.model_name
    ):
        return AnthropicPromptCacheProvider()
    elif llm_config.model_provider == LlmProviderNames.VERTEX_AI:
        return VertexAIPromptCacheProvider()
    else:
        # Default to no-op for providers without caching support
        return NoOpPromptCacheProvider()


================================================
FILE: backend/onyx/llm/prompt_cache/providers/noop.py
================================================
"""No-op provider adapter for providers without caching support."""

from onyx.llm.models import LanguageModelInput
from onyx.llm.prompt_cache.models import CacheMetadata
from onyx.llm.prompt_cache.providers.base import PromptCacheProvider
from onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform


class NoOpPromptCacheProvider(PromptCacheProvider):
    """No-op adapter for providers that don't support prompt caching."""

    def supports_caching(self) -> bool:
        """No-op providers don't support caching."""
        return False

    def prepare_messages_for_caching(
        self,
        cacheable_prefix: LanguageModelInput | None,
        suffix: LanguageModelInput,
        continuation: bool,
        cache_metadata: CacheMetadata | None,  # noqa: ARG002
    ) -> LanguageModelInput:
        """Return messages unchanged (no caching support).

        Args:
            cacheable_prefix: Optional cacheable prefix (can be str or Sequence[ChatCompletionMessage])
            suffix: Non-cacheable suffix (can be str or Sequence[ChatCompletionMessage])
            continuation: Whether to append suffix to last prefix message.
                Note: When cacheable_prefix is a string, it remains in its own content block.
            cache_metadata: Cache metadata (ignored)

        Returns:
            Combined messages (prefix + suffix)
        """
        # No transformation needed for no-op provider
        return prepare_messages_with_cacheable_transform(
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
            transform_cacheable=None,
        )

    def extract_cache_metadata(
        self,
        response: dict,  # noqa: ARG002
        cache_key: str,  # noqa: ARG002
    ) -> CacheMetadata | None:
        """No cache metadata to extract."""
        return None

    def get_cache_ttl_seconds(self) -> int:
        """Return default TTL (not used for no-op)."""
        return 0


================================================
FILE: backend/onyx/llm/prompt_cache/providers/openai.py
================================================
"""OpenAI provider adapter for prompt caching."""

from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.prompt_cache.models import CacheMetadata
from onyx.llm.prompt_cache.providers.base import PromptCacheProvider
from onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform


class OpenAIPromptCacheProvider(PromptCacheProvider):
    """OpenAI adapter for prompt caching (implicit caching)."""

    def supports_caching(self) -> bool:
        """OpenAI supports automatic prompt caching."""
        return True

    def prepare_messages_for_caching(
        self,
        cacheable_prefix: LanguageModelInput | None,
        suffix: LanguageModelInput,
        continuation: bool,
        cache_metadata: CacheMetadata | None,  # noqa: ARG002
    ) -> LanguageModelInput:
        """Prepare messages for OpenAI caching.

        OpenAI handles caching automatically, so we just normalize and combine
        the messages. The provider will automatically cache prefixes >1024 tokens.

        Args:
            cacheable_prefix: Optional cacheable prefix
            suffix: Non-cacheable suffix
            continuation: Whether to append suffix to last prefix message
            cache_metadata: Cache metadata (ignored for implicit caching)

        Returns:
            Combined messages ready for LLM API call
        """
        # No transformation needed for OpenAI (implicit caching)
        return prepare_messages_with_cacheable_transform(
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
            transform_cacheable=None,
        )

    def extract_cache_metadata(
        self,
        response: dict,  # noqa: ARG002
        cache_key: str,  # noqa: ARG002
    ) -> CacheMetadata | None:
        """Extract cache metadata from OpenAI response.

        OpenAI responses may include cached_tokens in the usage field.
        For implicit caching, we don't need to store much metadata.

        Args:
            response: OpenAI API response dictionary
            cache_key: Cache key used for this request

        Returns:
            CacheMetadata if extractable, None otherwise
        """
        # For implicit caching, OpenAI handles everything automatically
        # We could extract cached_tokens from response.get("usage", {}).get("cached_tokens")
        # but for now, we don't need to store metadata for implicit caching
        return None

    def get_cache_ttl_seconds(self) -> int:
        """Get cache TTL for OpenAI (1 hour max)."""
        return 3600


================================================
FILE: backend/onyx/llm/prompt_cache/providers/vertex.py
================================================
"""Vertex AI provider adapter for prompt caching."""

from collections.abc import Sequence

from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.prompt_cache.models import CacheMetadata
from onyx.llm.prompt_cache.providers.base import PromptCacheProvider
from onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform
from onyx.llm.prompt_cache.utils import revalidate_message_from_original


class VertexAIPromptCacheProvider(PromptCacheProvider):
    """Vertex AI adapter for prompt caching (implicit caching for this PR)."""

    def supports_caching(self) -> bool:
        """Vertex AI supports prompt caching (implicit and explicit)."""
        return True

    def prepare_messages_for_caching(
        self,
        cacheable_prefix: LanguageModelInput | None,
        suffix: LanguageModelInput,
        continuation: bool,
        cache_metadata: CacheMetadata | None,  # noqa: ARG002
    ) -> LanguageModelInput:
        """Prepare messages for Vertex AI caching.

        For implicit caching we attach cache_control={"type": "ephemeral"} to every
        cacheable prefix message so Vertex/Gemini can reuse them automatically.
        Explicit context caching (with cache blocks) will be added in a future PR.

        Args:
            cacheable_prefix: Optional cacheable prefix
            suffix: Non-cacheable suffix
            continuation: Whether to append suffix to last prefix message
            cache_metadata: Cache metadata (for future explicit caching support)

        Returns:
            Combined messages ready for LLM API call
        """
        # For implicit caching, no transformation needed (Vertex handles caching automatically)
        # TODO (explicit caching - future PR):
        # - Check cache_metadata for vertex_block_numbers
        # - Create transform function that replaces messages with cache_block_id if available
        # - Or adds cache_control parameter if not using cached blocks
        return prepare_messages_with_cacheable_transform(
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
            transform_cacheable=None,  # TODO: support explicit caching
        )

    def extract_cache_metadata(
        self,
        response: dict,  # noqa: ARG002
        cache_key: str,  # noqa: ARG002
    ) -> CacheMetadata | None:
        """Extract cache metadata from Vertex AI response.

        For this PR (implicit caching): Extract basic cache usage info if available.
        TODO (explicit caching - future PR): Extract block numbers from response
        and store in metadata.

        Args:
            response: Vertex AI API response dictionary
            cache_key: Cache key used for this request

        Returns:
            CacheMetadata if extractable, None otherwise
        """
        # For implicit caching, Vertex handles everything automatically
        # TODO (explicit caching - future PR):
        # - Extract cache block numbers from response
        # - Store in cache_metadata.vertex_block_numbers
        return None

    def get_cache_ttl_seconds(self) -> int:
        """Get cache TTL for Vertex AI (5 minutes)."""
        return 300


def _add_vertex_cache_control(
    messages: Sequence[ChatCompletionMessage],
) -> Sequence[ChatCompletionMessage]:
    """Add cache_control inside content blocks for Vertex AI/Gemini caching.

    Gemini requires cache_control to be on a content block within the content array,
    not at the message level. This function converts string content to the array format
    and adds cache_control to the last content block in each cacheable message.
    """
    # NOTE: unfortunately we need a much more sophisticated mechnism to support
    # explict caching with vertex in the presence of tools and system messages
    # (since they're supposed to be stripped out when setting cache_control)
    # so we're deferring this to a future PR.
    updated: list[ChatCompletionMessage] = []
    for message in messages:
        mutated = dict(message)
        content = mutated.get("content")

        if isinstance(content, str):
            # Convert string content to array format with cache_control
            mutated["content"] = [
                {
                    "type": "text",
                    "text": content,
                    "cache_control": {"type": "ephemeral"},
                }
            ]
        elif isinstance(content, list) and content:
            # Content is already an array - add cache_control to last block
            new_content = []
            for i, block in enumerate(content):
                if isinstance(block, dict):
                    block_copy = dict(block)
                    # Add cache_control to the last content block
                    if i == len(content) - 1:
                        block_copy["cache_control"] = {"type": "ephemeral"}
                    new_content.append(block_copy)
                else:
                    new_content.append(block)
            mutated["content"] = new_content

        updated.append(revalidate_message_from_original(message, mutated))
    return updated


================================================
FILE: backend/onyx/llm/prompt_cache/utils.py
================================================
# pyright: reportMissingTypeStubs=false
"""Utility functions for prompt caching."""

import json
from collections.abc import Callable
from collections.abc import Sequence
from typing import Any

from onyx.llm.models import ChatCompletionMessage
from onyx.llm.models import LanguageModelInput
from onyx.utils.logger import setup_logger


logger = setup_logger()


def combine_messages_with_continuation(
    prefix_msgs: Sequence[ChatCompletionMessage],
    suffix_msgs: Sequence[ChatCompletionMessage],
    continuation: bool,
) -> list[ChatCompletionMessage]:
    """Combine prefix and suffix messages, handling continuation flag.

    Args:
        prefix_msgs: Normalized cacheable prefix messages
        suffix_msgs: Normalized suffix messages
        continuation: If True, append suffix content to the last message of prefix
        was_prefix_string: Deprecated, no longer used

    Returns:
        Combined messages
    """
    if not continuation or not prefix_msgs:
        return list(prefix_msgs) + list(suffix_msgs)
    # Append suffix content to last message of prefix
    result = list(prefix_msgs)
    last_msg = dict(result[-1])
    suffix_first = dict(suffix_msgs[0]) if suffix_msgs else {}

    # Combine content
    if "content" in last_msg and "content" in suffix_first:
        if isinstance(last_msg["content"], str) and isinstance(
            suffix_first["content"], str
        ):
            last_msg["content"] = last_msg["content"] + suffix_first["content"]
        else:
            # Handle list content (multimodal)
            prefix_content = (
                last_msg["content"]
                if isinstance(last_msg["content"], list)
                else [{"type": "text", "text": last_msg["content"]}]
            )
            suffix_content = (
                suffix_first["content"]
                if isinstance(suffix_first["content"], list)
                else [{"type": "text", "text": suffix_first["content"]}]
            )
            last_msg["content"] = prefix_content + suffix_content

    result[-1] = revalidate_message_from_original(original=result[-1], mutated=last_msg)
    result.extend(suffix_msgs[1:])
    return result


def revalidate_message_from_original(
    original: ChatCompletionMessage,
    mutated: dict[str, Any],
) -> ChatCompletionMessage:
    """Rebuild a mutated message using the original BaseModel type.

    Some providers need to add cache metadata to messages. Re-run validation against
    the original message's Pydantic class so union discrimination (by role) stays
    intact.
    """
    cls = original.__class__
    try:
        return cls.model_validate_json(json.dumps(mutated))
    except Exception:
        return cls.model_validate(mutated)


def prepare_messages_with_cacheable_transform(
    cacheable_prefix: LanguageModelInput | None,
    suffix: LanguageModelInput,
    continuation: bool,
    transform_cacheable: (
        Callable[[Sequence[ChatCompletionMessage]], Sequence[ChatCompletionMessage]]
        | None
    ) = None,
) -> LanguageModelInput:
    """Prepare messages for caching with optional transformation of cacheable prefix.

    This is a shared utility that handles the common flow:
    1. Normalize inputs
    2. Optionally transform cacheable messages
    3. Combine with continuation handling

    Args:
        cacheable_prefix: Optional cacheable prefix
        suffix: Non-cacheable suffix
        continuation: Whether to append suffix to last prefix message
        transform_cacheable: Optional function to transform cacheable messages
            (e.g., add cache_control parameter). If None, messages are used as-is.

    Returns:
        Combined messages ready for LLM API call
    """
    if cacheable_prefix is None:
        return suffix

    prefix_msgs = (
        cacheable_prefix if isinstance(cacheable_prefix, list) else [cacheable_prefix]
    )
    suffix_msgs = suffix if isinstance(suffix, list) else [suffix]

    # Apply transformation to cacheable messages if provided
    if transform_cacheable is not None:
        prefix_msgs = list(transform_cacheable(prefix_msgs))

    return combine_messages_with_continuation(
        prefix_msgs=prefix_msgs, suffix_msgs=suffix_msgs, continuation=continuation
    )


================================================
FILE: backend/onyx/llm/request_context.py
================================================
import contextvars


_LLM_MOCK_RESPONSE_CONTEXTVAR: contextvars.ContextVar[str | None] = (
    contextvars.ContextVar("llm_mock_response", default=None)
)


def get_llm_mock_response() -> str | None:
    return _LLM_MOCK_RESPONSE_CONTEXTVAR.get()


def set_llm_mock_response(mock_response: str | None) -> contextvars.Token[str | None]:
    return _LLM_MOCK_RESPONSE_CONTEXTVAR.set(mock_response)


def reset_llm_mock_response(token: contextvars.Token[str | None]) -> None:
    try:
        _LLM_MOCK_RESPONSE_CONTEXTVAR.reset(token)
    except ValueError:
        # Streaming requests can cross execution contexts.
        # Best effort clear to avoid crashing request teardown in integration mode.
        _LLM_MOCK_RESPONSE_CONTEXTVAR.set(None)


================================================
FILE: backend/onyx/llm/utils.py
================================================
import copy
import re
from collections.abc import Callable
from functools import lru_cache
from typing import Any
from typing import cast
from typing import TYPE_CHECKING

from sqlalchemy import select

from onyx.configs.app_configs import LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS
from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
from onyx.configs.app_configs import SEND_USER_METADATA_TO_LLM_PROVIDER
from onyx.configs.app_configs import USE_CHUNK_SUMMARY
from onyx.configs.app_configs import USE_DOCUMENT_SUMMARY
from onyx.configs.model_configs import GEN_AI_MAX_TOKENS
from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
from onyx.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import LLMModelFlowType
from onyx.db.models import LLMProvider
from onyx.db.models import ModelConfiguration
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.model_response import ModelResponse
from onyx.llm.models import UserMessage
from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_TOKEN_ESTIMATE
from onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_TOKEN_ESTIMATE
from onyx.utils.logger import setup_logger
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE


if TYPE_CHECKING:
    from onyx.server.manage.llm.models import LLMProviderView


logger = setup_logger()

MAX_CONTEXT_TOKENS = 100
ONE_MILLION = 1_000_000
CHUNKS_PER_DOC_ESTIMATE = 5
MAX_LITELLM_USER_ID_LENGTH = 64
_TWELVE_LABS_PEGASUS_MODEL_NAMES = [
    "us.twelvelabs.pegasus-1-2-v1:0",
    "us.twelvelabs.pegasus-1-2-v1",
    "twelvelabs/us.twelvelabs.pegasus-1-2-v1:0",
    "twelvelabs/us.twelvelabs.pegasus-1-2-v1",
]
_TWELVE_LABS_PEGASUS_OUTPUT_TOKENS = max(512, GEN_AI_MODEL_FALLBACK_MAX_TOKENS // 4)
CUSTOM_LITELLM_MODEL_OVERRIDES: dict[str, dict[str, Any]] = {
    model_name: {
        "max_input_tokens": GEN_AI_MODEL_FALLBACK_MAX_TOKENS,
        "max_output_tokens": _TWELVE_LABS_PEGASUS_OUTPUT_TOKENS,
        "max_tokens": GEN_AI_MODEL_FALLBACK_MAX_TOKENS,
        "supports_reasoning": False,
        "supports_vision": False,
    }
    for model_name in _TWELVE_LABS_PEGASUS_MODEL_NAMES
}


def truncate_litellm_user_id(user_id: str) -> str:
    """Truncate the LiteLLM `user` field maximum length."""
    if len(user_id) <= MAX_LITELLM_USER_ID_LENGTH:
        return user_id
    logger.warning(
        "User's ID exceeds %d chars (len=%d); truncating for Litellm logging compatibility.",
        MAX_LITELLM_USER_ID_LENGTH,
        len(user_id),
    )
    return user_id[:MAX_LITELLM_USER_ID_LENGTH]


def build_litellm_passthrough_kwargs(
    model_kwargs: dict[str, Any],
    user_identity: LLMUserIdentity | None,
) -> dict[str, Any]:
    """Build kwargs passed through directly to LiteLLM.

    Returns `model_kwargs` unchanged unless we need to add user/session metadata,
    in which case a copy is returned to avoid cross-request mutation.
    """

    if not (SEND_USER_METADATA_TO_LLM_PROVIDER and user_identity):
        return model_kwargs

    passthrough_kwargs = copy.deepcopy(model_kwargs)

    if user_identity.user_id:
        passthrough_kwargs["user"] = truncate_litellm_user_id(user_identity.user_id)

    if user_identity.session_id:
        existing_metadata = passthrough_kwargs.get("metadata")
        metadata: dict[str, Any] | None
        if existing_metadata is None:
            metadata = {}
        elif isinstance(existing_metadata, dict):
            metadata = copy.deepcopy(existing_metadata)
        else:
            metadata = None

        if metadata is not None:
            metadata["session_id"] = user_identity.session_id
            passthrough_kwargs["metadata"] = metadata

    return passthrough_kwargs


def _unwrap_nested_exception(error: Exception) -> Exception:
    """
    Traverse common exception wrappers to surface the underlying LiteLLM error.
    """
    visited: set[int] = set()
    current = error
    for _ in range(100):
        visited.add(id(current))
        candidate: Exception | None = None
        cause = getattr(current, "__cause__", None)
        if isinstance(cause, Exception):
            candidate = cause
        elif (
            hasattr(current, "args")
            and len(getattr(current, "args")) == 1
            and isinstance(current.args[0], Exception)
        ):
            candidate = current.args[0]
        if candidate is None or id(candidate) in visited:
            break
        current = candidate
    return current


def litellm_exception_to_error_msg(
    e: Exception,
    llm: LLM,
    fallback_to_error_msg: bool = False,
    custom_error_msg_mappings: (
        dict[str, str] | None
    ) = LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS,
) -> tuple[str, str, bool]:
    """Convert a LiteLLM exception to a user-friendly error message with classification.

    Returns:
        tuple: (error_message, error_code, is_retryable)
            - error_message: User-friendly error description
            - error_code: Categorized error code for frontend display
            - is_retryable: Whether the user should try again
    """
    from litellm.exceptions import BadRequestError
    from litellm.exceptions import AuthenticationError
    from litellm.exceptions import PermissionDeniedError
    from litellm.exceptions import NotFoundError
    from litellm.exceptions import UnprocessableEntityError
    from litellm.exceptions import RateLimitError
    from litellm.exceptions import ContextWindowExceededError
    from litellm.exceptions import APIConnectionError
    from litellm.exceptions import APIError
    from litellm.exceptions import Timeout
    from litellm.exceptions import ContentPolicyViolationError
    from litellm.exceptions import BudgetExceededError
    from litellm.exceptions import ServiceUnavailableError

    core_exception = _unwrap_nested_exception(e)
    error_msg = str(core_exception)
    error_code = "UNKNOWN_ERROR"
    is_retryable = True

    if custom_error_msg_mappings:
        for error_msg_pattern, custom_error_msg in custom_error_msg_mappings.items():
            if error_msg_pattern in error_msg:
                return custom_error_msg, "CUSTOM_ERROR", True

    if isinstance(core_exception, BadRequestError):
        error_msg = "Bad request: The server couldn't process your request. Please check your input."
        error_code = "BAD_REQUEST"
        is_retryable = True
    elif isinstance(core_exception, AuthenticationError):
        error_msg = "Authentication failed: Please check your API key and credentials."
        error_code = "AUTH_ERROR"
        is_retryable = False
    elif isinstance(core_exception, PermissionDeniedError):
        error_msg = (
            "Permission denied: You don't have the necessary permissions for this operation. "
            "Ensure you have access to this model."
        )
        error_code = "PERMISSION_DENIED"
        is_retryable = False
    elif isinstance(core_exception, NotFoundError):
        error_msg = "Resource not found: The requested resource doesn't exist."
        error_code = "NOT_FOUND"
        is_retryable = False
    elif isinstance(core_exception, UnprocessableEntityError):
        error_msg = "Unprocessable entity: The server couldn't process your request due to semantic errors."
        error_code = "UNPROCESSABLE_ENTITY"
        is_retryable = True
    elif isinstance(core_exception, RateLimitError):
        provider_name = (
            llm.config.model_provider
            if llm is not None and llm.config.model_provider
            else "The LLM provider"
        )
        upstream_detail: str | None = None
        message_attr = getattr(core_exception, "message", None)
        if message_attr:
            upstream_detail = str(message_attr)
        elif hasattr(core_exception, "api_error"):
            api_error = core_exception.api_error
            if isinstance(api_error, dict):
                upstream_detail = (
                    api_error.get("message")
                    or api_error.get("detail")
                    or api_error.get("error")
                )
        if not upstream_detail:
            upstream_detail = str(core_exception)
        upstream_detail = str(upstream_detail).strip()
        if ":" in upstream_detail and upstream_detail.lower().startswith(
            "ratelimiterror"
        ):
            upstream_detail = upstream_detail.split(":", 1)[1].strip()
        upstream_detail_lower = upstream_detail.lower()
        if (
            "insufficient_quota" in upstream_detail_lower
            or "exceeded your current quota" in upstream_detail_lower
        ):
            error_msg = (
                f"{provider_name} quota exceeded: {upstream_detail}"
                if upstream_detail
                else f"{provider_name} quota exceeded: Verify billing and quota for this API key."
            )
            error_code = "BUDGET_EXCEEDED"
            is_retryable = False
        else:
            error_msg = (
                f"{provider_name} rate limit: {upstream_detail}"
                if upstream_detail
                else f"{provider_name} rate limit exceeded: Please slow down your requests and try again later."
            )
            error_code = "RATE_LIMIT"
            is_retryable = True
    elif isinstance(core_exception, ServiceUnavailableError):
        provider_name = (
            llm.config.model_provider
            if llm is not None and llm.config.model_provider
            else "The LLM provider"
        )
        # Check if this is specifically the Bedrock "Too many connections" error
        if "Too many connections" in error_msg or "BedrockException" in error_msg:
            error_msg = (
                f"{provider_name} is experiencing high connection volume and cannot process your request right now. "
                "This typically happens when there are too many simultaneous requests to the AI model. "
                "Please wait a moment and try again. If this persists, contact your system administrator "
                "to review connection limits and retry configurations."
            )
        else:
            # Generic 503 Service Unavailable
            error_msg = f"{provider_name} service error: {str(core_exception)}"
        error_code = "SERVICE_UNAVAILABLE"
        is_retryable = True
    elif isinstance(core_exception, ContextWindowExceededError):
        error_msg = (
            "Context window exceeded: Your input is too long for the model to process."
        )
        if llm is not None:
            try:
                max_context = get_max_input_tokens(
                    model_name=llm.config.model_name,
                    model_provider=llm.config.model_provider,
                )
                error_msg += f" Your invoked model ({llm.config.model_name}) has a maximum context size of {max_context}."
            except Exception:
                logger.warning(
                    "Unable to get maximum input token for LiteLLM exception handling"
                )
        error_code = "CONTEXT_TOO_LONG"
        is_retryable = False
    elif isinstance(core_exception, ContentPolicyViolationError):
        error_msg = "Content policy violation: Your request violates the content policy. Please revise your input."
        error_code = "CONTENT_POLICY"
        is_retryable = False
    elif isinstance(core_exception, APIConnectionError):
        error_msg = "API connection error: Failed to connect to the API. Please check your internet connection."
        error_code = "CONNECTION_ERROR"
        is_retryable = True
    elif isinstance(core_exception, BudgetExceededError):
        error_msg = (
            "Budget exceeded: You've exceeded your allocated budget for API usage."
        )
        error_code = "BUDGET_EXCEEDED"
        is_retryable = False
    elif isinstance(core_exception, Timeout):
        error_msg = "Request timed out: The operation took too long to complete. Please try again."
        error_code = "CONNECTION_ERROR"
        is_retryable = True
    elif isinstance(core_exception, APIError):
        error_msg = f"API error: An error occurred while communicating with the API. Details: {str(core_exception)}"
        error_code = "API_ERROR"
        is_retryable = True
    elif not fallback_to_error_msg:
        error_msg = "An unexpected error occurred while processing your request. Please try again later."
        error_code = "UNKNOWN_ERROR"
        is_retryable = True

    return error_msg, error_code, is_retryable


def llm_response_to_string(message: ModelResponse) -> str:
    if not isinstance(message.choice.message.content, str):
        raise RuntimeError("LLM message not in expected format.")

    return message.choice.message.content


def check_number_of_tokens(
    text: str, encode_fn: Callable[[str], list] | None = None
) -> int:
    """Gets the number of tokens in the provided text, using the provided encoding
    function. If none is provided, default to the tiktoken encoder used by GPT-3.5
    and GPT-4.
    """
    import tiktoken

    if encode_fn is None:
        encode_fn = tiktoken.get_encoding("cl100k_base").encode

    return len(encode_fn(text))


def test_llm(llm: LLM) -> str | None:
    # try for up to 2 timeouts (e.g. 10 seconds in total)
    error_msg = None
    for _ in range(2):
        try:
            llm.invoke(UserMessage(content="Do not respond"), max_tokens=50)
            return None
        except Exception as e:
            error_msg = str(e)
            logger.warning(f"Failed to call LLM with the following error: {error_msg}")

    return error_msg


@lru_cache(maxsize=1)  # the copy.deepcopy is expensive, so we cache the result
def get_model_map() -> dict:
    import litellm

    DIVIDER = "/"

    original_map = cast(dict[str, dict], litellm.model_cost)
    starting_map = copy.deepcopy(original_map)
    for key in original_map:
        if DIVIDER in key:
            truncated_key = key.split(DIVIDER)[-1]
            # make sure not to overwrite an original key
            if truncated_key in original_map:
                continue

            # if there are multiple possible matches, choose the most "detailed"
            # one as a heuristic. "detailed" = the description of the model
            # has the most filled out fields.
            existing_truncated_value = starting_map.get(truncated_key)
            potential_truncated_value = original_map[key]
            if not existing_truncated_value or len(potential_truncated_value) > len(
                existing_truncated_value
            ):
                starting_map[truncated_key] = potential_truncated_value

    for model_name, model_metadata in CUSTOM_LITELLM_MODEL_OVERRIDES.items():
        if model_name in starting_map:
            continue
        starting_map[model_name] = copy.deepcopy(model_metadata)

    # NOTE: outside of the explicit CUSTOM_LITELLM_MODEL_OVERRIDES,
    # we avoid hard-coding additional models here. Ollama, for example,
    # allows the user to specify their desired max context window, and it's
    # unlikely to be standard across users even for the same model
    # (it heavily depends on their hardware). For those cases, we rely on
    # GEN_AI_MODEL_FALLBACK_MAX_TOKENS to cover this.
    # for model_name in [
    #     "llama3.2",
    #     "llama3.2:1b",
    #     "llama3.2:3b",
    #     "llama3.2:11b",
    #     "llama3.2:90b",
    # ]:
    #     starting_map[f"ollama/{model_name}"] = {
    #         "max_tokens": 128000,
    #         "max_input_tokens": 128000,
    #         "max_output_tokens": 128000,
    #     }

    return starting_map


def _strip_extra_provider_from_model_name(model_name: str) -> str:
    return model_name.split("/")[1] if "/" in model_name else model_name


def _strip_colon_from_model_name(model_name: str) -> str:
    return ":".join(model_name.split(":")[:-1]) if ":" in model_name else model_name


def find_model_obj(model_map: dict, provider: str, model_name: str) -> dict | None:
    stripped_model_name = _strip_extra_provider_from_model_name(model_name)

    model_names = [
        model_name,
        _strip_extra_provider_from_model_name(model_name),
        # Remove leading extra provider. Usually for cases where user has a
        # customer model proxy which appends another prefix
        # remove :XXXX from the end, if present. Needed for ollama.
        _strip_colon_from_model_name(model_name),
        _strip_colon_from_model_name(stripped_model_name),
    ]

    # Filter out None values and deduplicate model names
    filtered_model_names = [name for name in model_names if name]

    # First try all model names with provider prefix
    for model_name in filtered_model_names:
        model_obj = model_map.get(f"{provider}/{model_name}")
        if model_obj:
            return model_obj

    # Then try all model names without provider prefix
    for model_name in filtered_model_names:
        model_obj = model_map.get(model_name)
        if model_obj:
            return model_obj

    return None


def get_llm_contextual_cost(
    llm: LLM,
) -> float:
    """
    Approximate the cost of using the given LLM for indexing with Contextual RAG.

    We use a precomputed estimate for the number of tokens in the contextualizing prompts,
    and we assume that every chunk is maximized in terms of content and context.
    We also assume that every document is maximized in terms of content, as currently if
    a document is longer than a certain length, its summary is used instead of the full content.

    We expect that the first assumption will overestimate more than the second one
    underestimates, so this should be a fairly conservative price estimate. Also,
    this does not account for the cost of documents that fit within a single chunk
    which do not get contextualized.
    """

    import litellm

    # calculate input costs
    num_tokens = ONE_MILLION
    num_input_chunks = num_tokens // DOC_EMBEDDING_CONTEXT_SIZE

    # We assume that the documents are MAX_TOKENS_FOR_FULL_INCLUSION tokens long
    # on average.
    num_docs = num_tokens // MAX_TOKENS_FOR_FULL_INCLUSION

    num_input_tokens = 0
    num_output_tokens = 0

    if not USE_CHUNK_SUMMARY and not USE_DOCUMENT_SUMMARY:
        return 0

    if USE_CHUNK_SUMMARY:
        # Each per-chunk prompt includes:
        # - The prompt tokens
        # - the document tokens
        # - the chunk tokens

        # for each chunk, we prompt the LLM with the contextual RAG prompt
        # and the full document content (or the doc summary, so this is an overestimate)
        num_input_tokens += num_input_chunks * (
            CONTEXTUAL_RAG_TOKEN_ESTIMATE + MAX_TOKENS_FOR_FULL_INCLUSION
        )

        # in aggregate, each chunk content is used as a prompt input once
        # so the full input size is covered
        num_input_tokens += num_tokens

        # A single MAX_CONTEXT_TOKENS worth of output is generated per chunk
        num_output_tokens += num_input_chunks * MAX_CONTEXT_TOKENS

    # going over each doc once means all the tokens, plus the prompt tokens for
    # the summary prompt. This CAN happen even when USE_DOCUMENT_SUMMARY is false,
    # since doc summaries are used for longer documents when USE_CHUNK_SUMMARY is true.
    # So, we include this unconditionally to overestimate.
    num_input_tokens += num_tokens + num_docs * DOCUMENT_SUMMARY_TOKEN_ESTIMATE
    num_output_tokens += num_docs * MAX_CONTEXT_TOKENS

    try:
        usd_per_prompt, usd_per_completion = litellm.cost_per_token(
            model=llm.config.model_name,
            prompt_tokens=num_input_tokens,
            completion_tokens=num_output_tokens,
        )
    except Exception:
        logger.exception(
            "An unexpected error occurred while calculating cost for model "
            f"{llm.config.model_name} (potentially due to malformed name). "
            "Assuming cost is 0."
        )
        return 0

    # Costs are in USD dollars per million tokens
    return usd_per_prompt + usd_per_completion


def llm_max_input_tokens(
    model_map: dict,
    model_name: str,
    model_provider: str,
) -> int:
    """Best effort attempt to get the max input tokens for the LLM."""
    if GEN_AI_MAX_TOKENS:
        # This is an override, so always return this
        logger.info(f"Using override GEN_AI_MAX_TOKENS: {GEN_AI_MAX_TOKENS}")
        return GEN_AI_MAX_TOKENS

    model_obj = find_model_obj(
        model_map,
        model_provider,
        model_name,
    )
    if not model_obj:
        logger.warning(
            f"Model '{model_name}' not found in LiteLLM. Falling back to {GEN_AI_MODEL_FALLBACK_MAX_TOKENS} tokens."
        )
        return GEN_AI_MODEL_FALLBACK_MAX_TOKENS

    if "max_input_tokens" in model_obj:
        return model_obj["max_input_tokens"]

    if "max_tokens" in model_obj:
        return model_obj["max_tokens"]

    logger.warning(
        f"No max tokens found for '{model_name}'. Falling back to {GEN_AI_MODEL_FALLBACK_MAX_TOKENS} tokens."
    )
    return GEN_AI_MODEL_FALLBACK_MAX_TOKENS


def get_llm_max_output_tokens(
    model_map: dict,
    model_name: str,
    model_provider: str,
) -> int:
    """Best effort attempt to get the max output tokens for the LLM."""
    default_output_tokens = int(GEN_AI_MODEL_FALLBACK_MAX_TOKENS)

    model_obj = model_map.get(f"{model_provider}/{model_name}")
    if not model_obj:
        model_obj = model_map.get(model_name)

    if not model_obj:
        logger.warning(
            f"Model '{model_name}' not found in LiteLLM. Falling back to {default_output_tokens} output tokens."
        )
        return default_output_tokens

    if "max_output_tokens" in model_obj:
        return model_obj["max_output_tokens"]

    # Fallback to a fraction of max_tokens if max_output_tokens is not specified
    if "max_tokens" in model_obj:
        return int(model_obj["max_tokens"] * 0.1)

    logger.warning(
        f"No max output tokens found for '{model_name}'. Falling back to {default_output_tokens} output tokens."
    )
    return default_output_tokens


def get_max_input_tokens(
    model_name: str,
    model_provider: str,
    output_tokens: int = GEN_AI_NUM_RESERVED_OUTPUT_TOKENS,
) -> int:
    # NOTE: we previously used `litellm.get_max_tokens()`, but despite the name, this actually
    # returns the max OUTPUT tokens. Under the hood, this uses the `litellm.model_cost` dict,
    # and there is no other interface to get what we want. This should be okay though, since the
    # `model_cost` dict is a named public interface:
    # https://litellm.vercel.app/docs/completion/token_usage#7-model_cost
    # model_map is  litellm.model_cost
    litellm_model_map = get_model_map()

    input_toks = (
        llm_max_input_tokens(
            model_name=model_name,
            model_provider=model_provider,
            model_map=litellm_model_map,
        )
        - output_tokens
    )

    if input_toks <= 0:
        return GEN_AI_MODEL_FALLBACK_MAX_TOKENS

    return input_toks


def get_max_input_tokens_from_llm_provider(
    llm_provider: "LLMProviderView",
    model_name: str,
) -> int:
    """Get max input tokens for a model, with fallback chain.

    Fallback order:
    1. Use max_input_tokens from model_configuration (populated from source APIs
       like OpenRouter, Ollama, or our Bedrock mapping)
    2. Look up in litellm.model_cost dictionary
    3. Fall back to GEN_AI_MODEL_FALLBACK_MAX_TOKENS (32000)

    Most dynamic providers (OpenRouter, Ollama) provide context_length via their
    APIs. Bedrock doesn't expose this, so we parse from model ID suffix (:200k)
    or use BEDROCK_MODEL_TOKEN_LIMITS mapping. The 32000 fallback is only hit for
    unknown models not in any of these sources.
    """
    max_input_tokens = None
    for model_configuration in llm_provider.model_configurations:
        if model_configuration.name == model_name:
            max_input_tokens = model_configuration.max_input_tokens
    return (
        max_input_tokens
        if max_input_tokens
        else get_max_input_tokens(
            model_provider=llm_provider.name,
            model_name=model_name,
        )
    )


def get_bedrock_token_limit(model_id: str) -> int:
    """Look up token limit for a Bedrock model.

    AWS Bedrock API doesn't expose token limits directly. This function
    attempts to determine the limit from multiple sources.

    Lookup order:
    1. Parse from model ID suffix (e.g., ":200k" → 200000)
    2. Check LiteLLM's model_cost dictionary
    3. Fall back to our hardcoded BEDROCK_MODEL_TOKEN_LIMITS mapping
    4. Default to 32000 if not found anywhere
    """
    from onyx.llm.constants import BEDROCK_MODEL_TOKEN_LIMITS

    model_id_lower = model_id.lower()

    # 1. Try to parse context length from model ID suffix
    # Format: "model-name:version:NNNk" where NNN is the context length in thousands
    # Examples: ":200k", ":128k", ":1000k", ":8k", ":4k"
    context_match = re.search(r":(\d+)k\b", model_id_lower)
    if context_match:
        return int(context_match.group(1)) * 1000

    # 2. Check LiteLLM's model_cost dictionary
    try:
        model_map = get_model_map()
        # Try with bedrock/ prefix first, then without
        for key in [f"bedrock/{model_id}", model_id]:
            if key in model_map:
                model_info = model_map[key]
                if "max_input_tokens" in model_info:
                    return model_info["max_input_tokens"]
                if "max_tokens" in model_info:
                    return model_info["max_tokens"]
    except Exception:
        pass  # Fall through to mapping

    # 3. Try our hardcoded mapping (longest match first)
    for pattern, limit in sorted(
        BEDROCK_MODEL_TOKEN_LIMITS.items(), key=lambda x: -len(x[0])
    ):
        if pattern in model_id_lower:
            return limit

    # 4. Default fallback
    return GEN_AI_MODEL_FALLBACK_MAX_TOKENS


def model_supports_image_input(model_name: str, model_provider: str) -> bool:
    # First, try to read an explicit configuration from the model_configuration table
    try:
        with get_session_with_current_tenant() as db_session:
            model_config = db_session.scalar(
                select(ModelConfiguration)
                .join(
                    LLMProvider,
                    ModelConfiguration.llm_provider_id == LLMProvider.id,
                )
                .where(
                    ModelConfiguration.name == model_name,
                    LLMProvider.provider == model_provider,
                )
            )
            if (
                model_config
                and LLMModelFlowType.VISION in model_config.llm_model_flow_types
            ):
                return True
    except Exception as e:
        logger.warning(
            f"Failed to query database for {model_provider} model {model_name} image support: {e}"
        )

    # Fallback to looking up the model in the litellm model_cost dict
    return litellm_thinks_model_supports_image_input(model_name, model_provider)


def litellm_thinks_model_supports_image_input(
    model_name: str, model_provider: str
) -> bool:
    """Generally should call `model_supports_image_input` unless you already know that
    `model_supports_image_input` from the DB is not set OR you need to avoid the performance
    hit of querying the DB."""
    try:
        model_obj = find_model_obj(get_model_map(), model_provider, model_name)
        if not model_obj:
            logger.warning(
                f"No litellm entry found for {model_provider}/{model_name}, this model may or may not support image input."
            )
            return False
        # The or False here is because sometimes the dict contains the key but the value is None
        return model_obj.get("supports_vision", False) or False
    except Exception:
        logger.exception(
            f"Failed to get model object for {model_provider}/{model_name}"
        )
        return False


def model_is_reasoning_model(model_name: str, model_provider: str) -> bool:
    import litellm

    model_map = get_model_map()
    try:
        model_obj = find_model_obj(
            model_map,
            model_provider,
            model_name,
        )
        if model_obj and "supports_reasoning" in model_obj:
            return model_obj["supports_reasoning"]

        # Fallback: try using litellm.supports_reasoning() for newer models
        try:
            # logger.debug("Falling back to `litellm.supports_reasoning`")
            full_model_name = (
                f"{model_provider}/{model_name}"
                if model_provider not in model_name
                else model_name
            )
            return litellm.supports_reasoning(model=full_model_name)
        except Exception:
            logger.exception(
                f"Failed to check if {model_provider}/{model_name} supports reasoning"
            )
            return False

    except Exception:
        logger.exception(
            f"Failed to get model object for {model_provider}/{model_name}"
        )
        return False


def is_true_openai_model(model_provider: str, model_name: str) -> bool:
    """
    Determines if a model is a true OpenAI model or just using OpenAI-compatible API.

    LiteLLM uses the "openai" provider for any OpenAI-compatible server (e.g. vLLM, LiteLLM proxy),
    but this function checks if the model is actually from OpenAI's model registry.

    This function is used primarily to determine if we should use the responses API.
    OpenAI models from OpenAI and Azure should use responses.
    """

    if model_provider not in {
        LlmProviderNames.OPENAI,
        LlmProviderNames.LITELLM_PROXY,
        LlmProviderNames.AZURE,
    }:
        return False

    model_map = get_model_map()

    def _check_if_model_name_is_openai_provider(model_name: str) -> bool:
        if model_name not in model_map:
            return False
        return model_map[model_name].get("litellm_provider") == LlmProviderNames.OPENAI

    try:
        # Check if any model exists in litellm's registry with openai prefix
        # If it's registered as "openai/model-name", it's a real OpenAI model
        if f"{LlmProviderNames.OPENAI}/{model_name}" in model_map:
            return True

        if _check_if_model_name_is_openai_provider(model_name):
            return True

        if model_name.startswith(f"{LlmProviderNames.AZURE}/"):
            model_name_with_azure_removed = "/".join(model_name.split("/")[1:])
            if _check_if_model_name_is_openai_provider(model_name_with_azure_removed):
                return True

        return False

    except Exception:
        logger.exception(
            f"Failed to determine if {model_provider}/{model_name} is a true OpenAI model"
        )
        return False


def model_needs_formatting_reenabled(model_name: str) -> bool:
    # See https://simonwillison.net/tags/markdown/ for context on why this is needed
    # for OpenAI reasoning models to have correct markdown generation

    # Models that need formatting re-enabled
    model_names = ["gpt-5.1", "gpt-5", "o3", "o1"]

    # Pattern matches if any of these model names appear with word boundaries
    # Word boundaries include: start/end of string, space, hyphen, or forward slash
    pattern = (
        r"(?:^|[\s\-/])("
        + "|".join(re.escape(name) for name in model_names)
        + r")(?:$|[\s\-/])"
    )

    if re.search(pattern, model_name):
        return True

    return False


================================================
FILE: backend/onyx/llm/well_known_providers/auto_update_models.py
================================================
"""Pydantic models for GitHub-hosted Auto LLM configuration."""

from datetime import datetime
from typing import Any

from pydantic import BaseModel
from pydantic import field_validator

from onyx.llm.well_known_providers.models import SimpleKnownModel


class LLMProviderRecommendation(BaseModel):
    """Configuration for a single provider in the GitHub config.

    Schema matches the plan:
    - default_model: The default model config (can be string or object with name)
    - additional_visible_models: List of additional visible model configs
    """

    default_model: SimpleKnownModel
    additional_visible_models: list[SimpleKnownModel] = []

    @field_validator("default_model", mode="before")
    @classmethod
    def normalize_default_model(cls, v: Any) -> dict[str, Any]:
        """Allow default_model to be a string (model name) or object."""
        if isinstance(v, str):
            return {"name": v}
        return v


class LLMRecommendations(BaseModel):
    """Root configuration object fetched from GitHub."""

    version: str
    updated_at: datetime
    providers: dict[str, LLMProviderRecommendation]

    def get_visible_models(self, provider_name: str) -> list[SimpleKnownModel]:
        """Get the set of models that should be visible by default for a provider."""
        if provider_name in self.providers:
            provider_config = self.providers[provider_name]
            return [provider_config.default_model] + list(
                provider_config.additional_visible_models
            )
        return []

    def get_default_model(self, provider_name: str) -> SimpleKnownModel | None:
        """Get the default model for a provider."""
        if provider_name in self.providers:
            provider_config = self.providers[provider_name]
            return provider_config.default_model
        return None


================================================
FILE: backend/onyx/llm/well_known_providers/auto_update_service.py
================================================
"""Service for fetching and syncing LLM model configurations from GitHub.

This service manages Auto mode LLM providers, where models and configuration
are managed centrally via a GitHub-hosted JSON file. In Auto mode:
- Model list is controlled by GitHub config
- Model visibility is controlled by GitHub config
- Default model is controlled by GitHub config
- Admin only needs to provide API credentials
"""

from datetime import datetime

import httpx
from sqlalchemy.orm import Session

from onyx.cache.factory import get_cache_backend
from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
from onyx.db.llm import fetch_auto_mode_providers
from onyx.db.llm import sync_auto_mode_models
from onyx.llm.well_known_providers.auto_update_models import LLMRecommendations
from onyx.utils.logger import setup_logger

logger = setup_logger()

_CACHE_KEY_LAST_UPDATED_AT = "auto_llm_update:last_updated_at"
_CACHE_TTL_SECONDS = 60 * 60 * 24  # 24 hours


def _get_cached_last_updated_at() -> datetime | None:
    try:
        value = get_cache_backend().get(_CACHE_KEY_LAST_UPDATED_AT)
        if value is not None:
            return datetime.fromisoformat(value.decode("utf-8"))
    except Exception as e:
        logger.warning(f"Failed to get cached last_updated_at: {e}")
    return None


def _set_cached_last_updated_at(updated_at: datetime) -> None:
    try:
        get_cache_backend().set(
            _CACHE_KEY_LAST_UPDATED_AT,
            updated_at.isoformat(),
            ex=_CACHE_TTL_SECONDS,
        )
    except Exception as e:
        logger.warning(f"Failed to set cached last_updated_at: {e}")


def fetch_llm_recommendations_from_github(
    timeout: float = 30.0,
) -> LLMRecommendations | None:
    """Fetch LLM configuration from GitHub.

    Returns:
        GitHubLLMConfig if successful, None on error.
    """
    if not AUTO_LLM_CONFIG_URL:
        logger.debug("AUTO_LLM_CONFIG_URL not configured, skipping fetch")
        return None

    try:
        with httpx.Client(timeout=timeout) as client:
            response = client.get(AUTO_LLM_CONFIG_URL)
            response.raise_for_status()

            data = response.json()
            return LLMRecommendations.model_validate(data)
    except httpx.HTTPError as e:
        logger.error(f"Failed to fetch LLM config from GitHub: {e}")
        return None
    except Exception as e:
        logger.error(f"Error parsing LLM config: {e}")
        return None


def sync_llm_models_from_github(
    db_session: Session,
    force: bool = False,
) -> dict[str, int]:
    """Sync models from GitHub config to database for all Auto mode providers.

    In Auto mode, EVERYTHING is controlled by GitHub config:
    - Model list
    - Model visibility (is_visible)
    - Default model
    - Fast default model

    Args:
        db_session: Database session
        config: GitHub LLM configuration
        force: If True, skip the updated_at check and force sync

    Returns:
        Dict of provider_name -> number of changes made.
    """
    results: dict[str, int] = {}

    # Get all providers in Auto mode
    auto_providers = fetch_auto_mode_providers(db_session)
    if not auto_providers:
        logger.debug("No providers in Auto mode found")
        return {}

    # Fetch config from GitHub
    config = fetch_llm_recommendations_from_github()
    if not config:
        logger.warning("Failed to fetch GitHub config")
        return {}

    # Skip if we've already processed this version (unless forced)
    last_updated_at = _get_cached_last_updated_at()
    if not force and last_updated_at and config.updated_at <= last_updated_at:
        logger.debug("GitHub config unchanged, skipping sync")
        _set_cached_last_updated_at(config.updated_at)
        return {}

    for provider in auto_providers:
        provider_type = provider.provider  # e.g., "openai", "anthropic"

        if provider_type not in config.providers:
            logger.debug(
                f"No config for provider type '{provider_type}' in GitHub config"
            )
            continue

        # Sync models - this replaces the model list entirely for Auto mode
        changes = sync_auto_mode_models(
            db_session=db_session,
            provider=provider,
            llm_recommendations=config,
        )

        if changes > 0:
            results[provider.name] = changes
            logger.info(
                f"Applied {changes} model changes to provider '{provider.name}'"
            )

    _set_cached_last_updated_at(config.updated_at)
    return results


def reset_cache() -> None:
    """Reset the cache timestamp. Useful for testing."""
    try:
        get_cache_backend().delete(_CACHE_KEY_LAST_UPDATED_AT)
    except Exception as e:
        logger.warning(f"Failed to reset cache: {e}")


================================================
FILE: backend/onyx/llm/well_known_providers/constants.py
================================================
from onyx.llm.constants import LlmProviderNames

OPENAI_PROVIDER_NAME = "openai"

BEDROCK_PROVIDER_NAME = "bedrock"


OLLAMA_PROVIDER_NAME = "ollama_chat"
OLLAMA_API_KEY_CONFIG_KEY = "OLLAMA_API_KEY"

LM_STUDIO_PROVIDER_NAME = "lm_studio"
LM_STUDIO_API_KEY_CONFIG_KEY = "LM_STUDIO_API_KEY"

LITELLM_PROXY_PROVIDER_NAME = "litellm_proxy"

BIFROST_PROVIDER_NAME = "bifrost"

# Providers that use optional Bearer auth from custom_config
PROVIDERS_WITH_SPECIAL_API_KEY_HANDLING: dict[str, str] = {
    LlmProviderNames.OLLAMA_CHAT: OLLAMA_API_KEY_CONFIG_KEY,
    LlmProviderNames.LM_STUDIO: LM_STUDIO_API_KEY_CONFIG_KEY,
}

# OpenRouter
OPENROUTER_PROVIDER_NAME = "openrouter"

ANTHROPIC_PROVIDER_NAME = "anthropic"

AZURE_PROVIDER_NAME = "azure"


VERTEXAI_PROVIDER_NAME = "vertex_ai"
VERTEX_CREDENTIALS_FILE_KWARG = "vertex_credentials"
VERTEX_CREDENTIALS_FILE_KWARG_ENV_VAR_FORMAT = "CREDENTIALS_FILE"
VERTEX_LOCATION_KWARG = "vertex_location"

AWS_REGION_NAME_KWARG = "aws_region_name"
AWS_REGION_NAME_KWARG_ENV_VAR_FORMAT = "AWS_REGION_NAME"
AWS_BEARER_TOKEN_BEDROCK_KWARG_ENV_VAR_FORMAT = "AWS_BEARER_TOKEN_BEDROCK"
AWS_ACCESS_KEY_ID_KWARG = "aws_access_key_id"
AWS_ACCESS_KEY_ID_KWARG_ENV_VAR_FORMAT = "AWS_ACCESS_KEY_ID"
AWS_SECRET_ACCESS_KEY_KWARG = "aws_secret_access_key"
AWS_SECRET_ACCESS_KEY_KWARG_ENV_VAR_FORMAT = "AWS_SECRET_ACCESS_KEY"


================================================
FILE: backend/onyx/llm/well_known_providers/llm_provider_options.py
================================================
import json
import pathlib
import threading
import time

from onyx.llm.constants import LlmProviderNames
from onyx.llm.constants import PROVIDER_DISPLAY_NAMES
from onyx.llm.constants import WELL_KNOWN_PROVIDER_NAMES
from onyx.llm.utils import get_max_input_tokens
from onyx.llm.utils import model_supports_image_input
from onyx.llm.well_known_providers.auto_update_models import LLMRecommendations
from onyx.llm.well_known_providers.auto_update_service import (
    fetch_llm_recommendations_from_github,
)
from onyx.llm.well_known_providers.constants import ANTHROPIC_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import AZURE_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import BEDROCK_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import BIFROST_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import LITELLM_PROXY_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import LM_STUDIO_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import OLLAMA_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import OPENAI_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import OPENROUTER_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import VERTEXAI_PROVIDER_NAME
from onyx.llm.well_known_providers.models import WellKnownLLMProviderDescriptor
from onyx.server.manage.llm.models import ModelConfigurationView
from onyx.utils.logger import setup_logger

logger = setup_logger()

_RECOMMENDATIONS_CACHE_TTL_SECONDS = 300
_recommendations_cache_lock = threading.Lock()
_cached_recommendations: LLMRecommendations | None = None
_cached_recommendations_time: float = 0.0


def _get_provider_to_models_map() -> dict[str, list[str]]:
    """Lazy-load provider model mappings to avoid importing litellm at module level.

    Dynamic providers (Bedrock, Ollama, OpenRouter) return empty lists here
    because their models are fetched directly from the source API, which is
    more up-to-date than LiteLLM's static lists.
    """
    return {
        OPENAI_PROVIDER_NAME: get_openai_model_names(),
        BEDROCK_PROVIDER_NAME: [],  # Dynamic - fetched from AWS API
        ANTHROPIC_PROVIDER_NAME: get_anthropic_model_names(),
        VERTEXAI_PROVIDER_NAME: get_vertexai_model_names(),
        OLLAMA_PROVIDER_NAME: [],  # Dynamic - fetched from Ollama API
        LM_STUDIO_PROVIDER_NAME: [],  # Dynamic - fetched from LM Studio API
        OPENROUTER_PROVIDER_NAME: [],  # Dynamic - fetched from OpenRouter API
        LITELLM_PROXY_PROVIDER_NAME: [],  # Dynamic - fetched from LiteLLM proxy API
        BIFROST_PROVIDER_NAME: [],  # Dynamic - fetched from Bifrost API
    }


def _load_bundled_recommendations() -> LLMRecommendations:
    json_path = pathlib.Path(__file__).parent / "recommended-models.json"
    with open(json_path, "r") as f:
        json_config = json.load(f)
    return LLMRecommendations.model_validate(json_config)


def get_recommendations() -> LLMRecommendations:
    """Get the recommendations, with an in-memory cache to avoid
    hitting GitHub on every API request."""
    global _cached_recommendations, _cached_recommendations_time

    now = time.monotonic()
    if (
        _cached_recommendations is not None
        and (now - _cached_recommendations_time) < _RECOMMENDATIONS_CACHE_TTL_SECONDS
    ):
        return _cached_recommendations

    with _recommendations_cache_lock:
        # Double-check after acquiring lock
        if (
            _cached_recommendations is not None
            and (time.monotonic() - _cached_recommendations_time)
            < _RECOMMENDATIONS_CACHE_TTL_SECONDS
        ):
            return _cached_recommendations

        recommendations_from_github = fetch_llm_recommendations_from_github()
        result = recommendations_from_github or _load_bundled_recommendations()

        _cached_recommendations = result
        _cached_recommendations_time = time.monotonic()
        return result


def is_obsolete_model(model_name: str, provider: str) -> bool:
    """Check if a model is obsolete and should be filtered out.

    Filters models that are 2+ major versions behind or deprecated.
    This is the single source of truth for obsolete model detection.
    """
    model_lower = model_name.lower()

    # OpenAI obsolete models
    if provider == LlmProviderNames.OPENAI:
        # GPT-3 models are obsolete
        if "gpt-3" in model_lower:
            return True
        # Legacy models
        deprecated = {
            "text-davinci-003",
            "text-davinci-002",
            "text-curie-001",
            "text-babbage-001",
            "text-ada-001",
            "davinci",
            "curie",
            "babbage",
            "ada",
        }
        if model_lower in deprecated:
            return True

    # Anthropic obsolete models
    if provider == LlmProviderNames.ANTHROPIC:
        if "claude-2" in model_lower or "claude-instant" in model_lower:
            return True

    # Vertex AI obsolete models
    if provider == LlmProviderNames.VERTEX_AI:
        if "gemini-1.0" in model_lower:
            return True
        if "palm" in model_lower or "bison" in model_lower:
            return True

    return False


def get_openai_model_names() -> list[str]:
    """Get OpenAI model names dynamically from litellm."""
    import re
    import litellm

    # TODO: remove these lists once we have a comprehensive model configuration page
    # The ideal flow should be: fetch all available models --> filter by type
    # --> allow user to modify filters and select models based on current context
    non_chat_model_terms = {
        "embed",
        "audio",
        "tts",
        "whisper",
        "dall-e",
        "image",
        "moderation",
        "sora",
        "container",
    }
    deprecated_model_terms = {"babbage", "davinci", "gpt-3.5", "gpt-4-"}
    excluded_terms = non_chat_model_terms | deprecated_model_terms

    # NOTE: We are explicitly excluding all "timestamped" models
    # because they are mostly just noise in the admin configuration panel
    # e.g. gpt-4o-2025-07-16, gpt-3.5-turbo-0613, etc.
    date_pattern = re.compile(r"-\d{4}")

    def is_valid_model(model: str) -> bool:
        model_lower = model.lower()
        return not any(
            ex in model_lower for ex in excluded_terms
        ) and not date_pattern.search(model)

    return sorted(
        (
            model.removeprefix("openai/")
            for model in litellm.open_ai_chat_completion_models
            if is_valid_model(model)
        ),
        reverse=True,
    )


def get_anthropic_model_names() -> list[str]:
    """Get Anthropic model names dynamically from litellm."""
    import litellm

    # Models to exclude from Anthropic's model list (deprecated or duplicates)
    _IGNORABLE_ANTHROPIC_MODELS = {
        "claude-2",
        "claude-instant-1",
        "anthropic/claude-3-5-sonnet-20241022",
    }

    return sorted(
        [
            model
            for model in litellm.anthropic_models
            if model not in _IGNORABLE_ANTHROPIC_MODELS
            and not is_obsolete_model(model, LlmProviderNames.ANTHROPIC)
        ],
        reverse=True,
    )


def get_vertexai_model_names() -> list[str]:
    """Get Vertex AI model names dynamically from litellm model_cost."""
    import litellm

    # Combine all vertex model sets
    vertex_models: set[str] = set()
    vertex_model_sets = [
        "vertex_chat_models",
        "vertex_language_models",
        "vertex_anthropic_models",
        "vertex_llama3_models",
        "vertex_mistral_models",
        "vertex_ai_ai21_models",
        "vertex_deepseek_models",
    ]
    for attr in vertex_model_sets:
        if hasattr(litellm, attr):
            vertex_models.update(getattr(litellm, attr))

    # Also extract from model_cost for any models not in the sets
    for key in litellm.model_cost.keys():
        if key.startswith("vertex_ai/"):
            model_name = key.replace("vertex_ai/", "")
            vertex_models.add(model_name)

    return sorted(
        [
            model
            for model in vertex_models
            if "embed" not in model.lower()
            and "image" not in model.lower()
            and "video" not in model.lower()
            and "code" not in model.lower()
            and "veo" not in model.lower()  # video generation
            and "live" not in model.lower()  # live/streaming models
            and "tts" not in model.lower()  # text-to-speech
            and "native-audio" not in model.lower()  # audio models
            and "/" not in model  # filter out prefixed models like openai/gpt-oss
            and "search_api" not in model.lower()  # not a model
            and "-maas" not in model.lower()  # marketplace models
            and not is_obsolete_model(model, LlmProviderNames.VERTEX_AI)
        ],
        reverse=True,
    )


def model_configurations_for_provider(
    provider_name: str, llm_recommendations: LLMRecommendations
) -> list[ModelConfigurationView]:
    recommended_visible_models = llm_recommendations.get_visible_models(provider_name)
    recommended_visible_models_names = [m.name for m in recommended_visible_models]

    # Preserve provider-defined ordering while de-duplicating.
    model_names: list[str] = []
    seen_model_names: set[str] = set()
    for model_name in (
        fetch_models_for_provider(provider_name) + recommended_visible_models_names
    ):
        if model_name in seen_model_names:
            continue
        seen_model_names.add(model_name)
        model_names.append(model_name)

    # Vertex model list can be large and mixed-vendor; alphabetical ordering
    # makes model discovery easier in admin selection UIs.
    if provider_name == VERTEXAI_PROVIDER_NAME:
        model_names = sorted(model_names, key=str.lower)

    return [
        ModelConfigurationView(
            name=model_name,
            is_visible=model_name in recommended_visible_models_names,
            max_input_tokens=get_max_input_tokens(model_name, provider_name),
            supports_image_input=model_supports_image_input(model_name, provider_name),
        )
        for model_name in model_names
    ]


def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]:
    llm_recommendations = get_recommendations()

    well_known_llms = []
    for provider_name in WELL_KNOWN_PROVIDER_NAMES:
        model_configurations = model_configurations_for_provider(
            provider_name, llm_recommendations
        )
        well_known_llms.append(
            WellKnownLLMProviderDescriptor(
                name=provider_name,
                known_models=model_configurations,
                recommended_default_model=llm_recommendations.get_default_model(
                    provider_name
                ),
            )
        )
    return well_known_llms


def fetch_models_for_provider(provider_name: str) -> list[str]:
    return _get_provider_to_models_map().get(provider_name, [])


def fetch_model_names_for_provider_as_set(provider_name: str) -> set[str] | None:
    model_names = fetch_models_for_provider(provider_name)
    return set(model_names) if model_names else None


def fetch_visible_model_names_for_provider_as_set(
    provider_name: str,
) -> set[str] | None:
    """Get visible model names for a provider.

    Note: Since we no longer maintain separate visible model lists,
    this returns all models (same as fetch_model_names_for_provider_as_set).
    Kept for backwards compatibility with alembic migrations.
    """
    return fetch_model_names_for_provider_as_set(provider_name)


def get_provider_display_name(provider_name: str) -> str:
    """Get human-friendly display name for an Onyx-supported provider.

    First checks Onyx-specific display names, then falls back to
    PROVIDER_DISPLAY_NAMES from constants.
    """
    # Display names for Onyx-supported LLM providers (used in admin UI provider selection).
    # These override PROVIDER_DISPLAY_NAMES for Onyx-specific branding.
    _ONYX_PROVIDER_DISPLAY_NAMES: dict[str, str] = {
        OPENAI_PROVIDER_NAME: "ChatGPT (OpenAI)",
        OLLAMA_PROVIDER_NAME: "Ollama",
        LM_STUDIO_PROVIDER_NAME: "LM Studio",
        ANTHROPIC_PROVIDER_NAME: "Claude (Anthropic)",
        AZURE_PROVIDER_NAME: "Azure OpenAI",
        BEDROCK_PROVIDER_NAME: "Amazon Bedrock",
        VERTEXAI_PROVIDER_NAME: "Google Vertex AI",
        OPENROUTER_PROVIDER_NAME: "OpenRouter",
        LITELLM_PROXY_PROVIDER_NAME: "LiteLLM Proxy",
    }

    if provider_name in _ONYX_PROVIDER_DISPLAY_NAMES:
        return _ONYX_PROVIDER_DISPLAY_NAMES[provider_name]
    return PROVIDER_DISPLAY_NAMES.get(
        provider_name.lower(), provider_name.replace("_", " ").title()
    )


def fetch_default_model_for_provider(provider_name: str) -> str | None:
    """Fetch the default model for a provider.

    First checks the GitHub-hosted recommended-models.json config (via fetch_github_config),
    then falls back to hardcoded defaults if unavailable.
    """
    llm_recommendations = get_recommendations()
    default_model = llm_recommendations.get_default_model(provider_name)
    return default_model.name if default_model else None


================================================
FILE: backend/onyx/llm/well_known_providers/models.py
================================================
from enum import Enum

from pydantic import BaseModel
from pydantic import Field

from onyx.server.manage.llm.models import ModelConfigurationView


class CustomConfigKeyType(str, Enum):
    # used for configuration values that require manual input
    # i.e., textual API keys (e.g., "abcd1234")
    TEXT_INPUT = "text_input"

    # used for configuration values that require a file to be selected/drag-and-dropped
    # i.e., file based credentials (e.g., "/path/to/credentials/file.json")
    FILE_INPUT = "file_input"

    # used for configuration values that require a selection from predefined options
    SELECT = "select"


class SimpleKnownModel(BaseModel):
    name: str
    display_name: str | None = None


class WellKnownLLMProviderDescriptor(BaseModel):
    name: str

    # NOTE: the recommended visible models are encoded in the known_models list
    known_models: list[ModelConfigurationView] = Field(default_factory=list)
    recommended_default_model: SimpleKnownModel | None = None


================================================
FILE: backend/onyx/llm/well_known_providers/recommended-models.json
================================================
{
  "version": "1.1",
  "updated_at": "2026-03-05T00:00:00Z",
  "providers": {
    "openai": {
      "default_model": { "name": "gpt-5.4" },
      "additional_visible_models": [
        { "name": "gpt-5.4" },
        { "name": "gpt-5.2" }
      ]
    },
    "anthropic": {
      "default_model": "claude-opus-4-6",
      "additional_visible_models": [
        {
          "name": "claude-opus-4-6",
          "display_name": "Claude Opus 4.6"
        },
        {
          "name": "claude-sonnet-4-6",
          "display_name": "Claude Sonnet 4.6"
        },
        {
          "name": "claude-opus-4-5",
          "display_name": "Claude Opus 4.5"
        },
        {
          "name": "claude-sonnet-4-5",
          "display_name": "Claude Sonnet 4.5"
        }
      ]
    },
    "vertex_ai": {
      "default_model": "gemini-3-pro-preview",
      "additional_visible_models": [
        {
          "name": "gemini-3-pro-preview",
          "display_name": "Gemini 3 Pro"
        },
        {
          "name": "gemini-3-flash-preview",
          "display_name": "Gemini 3 Flash"
        }
      ]
    },
    "openrouter": {
      "default_model": "z-ai/glm-4.7",
      "additional_visible_models": [
        {
          "name": "z-ai/glm-4.7",
          "display_name": "GLM 4.7"
        },
        {
          "name": "deepseek/deepseek-v3.2",
          "display_name": "DeepSeek V3.2"
        },
        {
          "name": "qwen/qwen3-235b-a22b-2507",
          "display_name": "Qwen3 235B A22B Instruct 2507"
        },
        {
          "name": "moonshotai/kimi-k2-0905",
          "display_name": "Kimi K2 0905"
        }
      ]
    }
  }
}


================================================
FILE: backend/onyx/main.py
================================================
import logging
import sys
import traceback
import warnings
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from typing import Any
from typing import cast

import sentry_sdk
import uvicorn
from fastapi import APIRouter
from fastapi import FastAPI
from fastapi import HTTPException
from fastapi import Request
from fastapi import status
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.routing import APIRoute
from httpx_oauth.clients.google import GoogleOAuth2
from httpx_oauth.clients.openid import BASE_SCOPES
from httpx_oauth.clients.openid import OpenID
from sentry_sdk.integrations.fastapi import FastApiIntegration
from sentry_sdk.integrations.starlette import StarletteIntegration
from starlette.types import Lifespan

from onyx import __version__
from onyx.auth.schemas import UserCreate
from onyx.auth.schemas import UserRead
from onyx.auth.schemas import UserUpdate
from onyx.auth.users import auth_backend
from onyx.auth.users import create_onyx_oauth_router
from onyx.auth.users import fastapi_users
from onyx.cache.interface import CacheBackendType
from onyx.configs.app_configs import APP_API_PREFIX
from onyx.configs.app_configs import APP_HOST
from onyx.configs.app_configs import APP_PORT
from onyx.configs.app_configs import AUTH_RATE_LIMITING_ENABLED
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import CACHE_BACKEND
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import LOG_ENDPOINT_LATENCY
from onyx.configs.app_configs import OAUTH_CLIENT_ID
from onyx.configs.app_configs import OAUTH_CLIENT_SECRET
from onyx.configs.app_configs import OAUTH_ENABLED
from onyx.configs.app_configs import OIDC_PKCE_ENABLED
from onyx.configs.app_configs import OIDC_SCOPE_OVERRIDE
from onyx.configs.app_configs import OPENID_CONFIG_URL
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
from onyx.configs.app_configs import POSTGRES_API_SERVER_READ_ONLY_POOL_OVERFLOW
from onyx.configs.app_configs import POSTGRES_API_SERVER_READ_ONLY_POOL_SIZE
from onyx.configs.app_configs import SYSTEM_RECURSION_LIMIT
from onyx.configs.app_configs import USER_AUTH_SECRET
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import AuthType
from onyx.configs.constants import POSTGRES_WEB_APP_NAME
from onyx.db.engine.async_sql_engine import get_sqlalchemy_async_engine
from onyx.db.engine.connection_warmup import warm_up_connections
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.error_handling.exceptions import register_onyx_exception_handlers
from onyx.file_store.file_store import get_default_file_store
from onyx.hooks.registry import validate_registry
from onyx.server.api_key.api import router as api_key_router
from onyx.server.auth_check import check_router_auth
from onyx.server.documents.cc_pair import router as cc_pair_router
from onyx.server.documents.connector import router as connector_router
from onyx.server.documents.credential import router as credential_router
from onyx.server.documents.document import router as document_router
from onyx.server.documents.standard_oauth import router as standard_oauth_router
from onyx.server.features.build.api.api import public_build_router
from onyx.server.features.build.api.api import router as build_router
from onyx.server.features.default_assistant.api import (
    router as default_assistant_router,
)
from onyx.server.features.document_set.api import router as document_set_router
from onyx.server.features.hierarchy.api import router as hierarchy_router
from onyx.server.features.input_prompt.api import (
    admin_router as admin_input_prompt_router,
)
from onyx.server.features.input_prompt.api import (
    basic_router as input_prompt_router,
)
from onyx.server.features.mcp.api import admin_router as mcp_admin_router
from onyx.server.features.mcp.api import router as mcp_router
from onyx.server.features.notifications.api import router as notification_router
from onyx.server.features.oauth_config.api import (
    admin_router as admin_oauth_config_router,
)
from onyx.server.features.oauth_config.api import router as oauth_config_router
from onyx.server.features.password.api import router as password_router
from onyx.server.features.persona.api import admin_agents_router
from onyx.server.features.persona.api import admin_router as admin_persona_router
from onyx.server.features.persona.api import agents_router
from onyx.server.features.persona.api import basic_router as persona_router
from onyx.server.features.projects.api import router as projects_router
from onyx.server.features.tool.api import admin_router as admin_tool_router
from onyx.server.features.tool.api import router as tool_router
from onyx.server.features.user_oauth_token.api import router as user_oauth_token_router
from onyx.server.features.web_search.api import router as web_search_router
from onyx.server.federated.api import router as federated_router
from onyx.server.kg.api import admin_router as kg_admin_router
from onyx.server.manage.administrative import router as admin_router
from onyx.server.manage.code_interpreter.api import (
    admin_router as code_interpreter_admin_router,
)
from onyx.server.manage.discord_bot.api import router as discord_bot_router
from onyx.server.manage.embedding.api import admin_router as embedding_admin_router
from onyx.server.manage.embedding.api import basic_router as embedding_router
from onyx.server.manage.get_state import router as state_router
from onyx.server.manage.image_generation.api import (
    admin_router as image_generation_admin_router,
)
from onyx.server.manage.llm.api import admin_router as llm_admin_router
from onyx.server.manage.llm.api import basic_router as llm_router
from onyx.server.manage.opensearch_migration.api import (
    admin_router as opensearch_migration_admin_router,
)
from onyx.server.manage.search_settings import router as search_settings_router
from onyx.server.manage.slack_bot import router as slack_bot_management_router
from onyx.server.manage.users import router as user_router
from onyx.server.manage.voice.api import admin_router as voice_admin_router
from onyx.server.manage.voice.user_api import router as voice_router
from onyx.server.manage.voice.websocket_api import router as voice_websocket_router
from onyx.server.manage.web_search.api import (
    admin_router as web_search_admin_router,
)
from onyx.server.metrics.postgres_connection_pool import (
    setup_postgres_connection_pool_metrics,
)
from onyx.server.metrics.prometheus_setup import setup_prometheus_metrics
from onyx.server.middleware.latency_logging import add_latency_logging_middleware
from onyx.server.middleware.rate_limiting import close_auth_limiter
from onyx.server.middleware.rate_limiting import get_auth_rate_limiters
from onyx.server.middleware.rate_limiting import setup_auth_limiter
from onyx.server.onyx_api.ingestion import router as onyx_api_router
from onyx.server.pat.api import router as pat_router
from onyx.server.query_and_chat.chat_backend import router as chat_router
from onyx.server.query_and_chat.query_backend import (
    admin_router as admin_query_router,
)
from onyx.server.query_and_chat.query_backend import basic_router as query_router
from onyx.server.saml import router as saml_router
from onyx.server.settings.api import admin_router as settings_admin_router
from onyx.server.settings.api import basic_router as settings_router
from onyx.server.token_rate_limits.api import (
    router as token_rate_limit_settings_router,
)
from onyx.server.utils import BasicAuthenticationError
from onyx.setup import setup_multitenant_onyx
from onyx.setup import setup_onyx
from onyx.tracing.setup import setup_tracing
from onyx.utils.logger import setup_logger
from onyx.utils.logger import setup_uvicorn_logger
from onyx.utils.middleware import add_endpoint_context_middleware
from onyx.utils.middleware import add_onyx_request_id_middleware
from onyx.utils.telemetry import get_or_generate_uuid
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
from shared_configs.configs import CORS_ALLOWED_ORIGIN
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import SENTRY_DSN
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

warnings.filterwarnings(
    "ignore", category=ResourceWarning, message=r"Unclosed client session"
)
warnings.filterwarnings(
    "ignore", category=ResourceWarning, message=r"Unclosed connector"
)

logger = setup_logger()

file_handlers = [
    h for h in logger.logger.handlers if isinstance(h, logging.FileHandler)
]

setup_uvicorn_logger(shared_file_handlers=file_handlers)


def validation_exception_handler(request: Request, exc: Exception) -> JSONResponse:
    if not isinstance(exc, RequestValidationError):
        logger.error(
            f"Unexpected exception type in validation_exception_handler - {type(exc)}"
        )
        raise exc

    exc_str = f"{exc}".replace("\n", " ").replace("   ", " ")
    logger.exception(f"{request}: {exc_str}")
    content = {"status_code": 422, "message": exc_str, "data": None}
    return JSONResponse(content=content, status_code=422)


def value_error_handler(_: Request, exc: Exception) -> JSONResponse:
    if not isinstance(exc, ValueError):
        logger.error(f"Unexpected exception type in value_error_handler - {type(exc)}")
        raise exc

    try:
        raise (exc)
    except Exception:
        # log stacktrace
        logger.exception("ValueError")
    return JSONResponse(
        status_code=400,
        content={"message": str(exc)},
    )


def use_route_function_names_as_operation_ids(app: FastAPI) -> None:
    """
    OpenAPI generation defaults to naming the operation with the
    function + route + HTTP method, which usually looks very redundant.

    This function changes the operation IDs to be just the function name.

    Should be called only after all routes have been added.
    """
    for route in app.routes:
        if isinstance(route, APIRoute):
            route.operation_id = route.name


def include_router_with_global_prefix_prepended(
    application: FastAPI, router: APIRouter, **kwargs: Any
) -> None:
    """Adds the global prefix to all routes in the router."""
    processed_global_prefix = f"/{APP_API_PREFIX.strip('/')}" if APP_API_PREFIX else ""

    passed_in_prefix = cast(str | None, kwargs.get("prefix"))
    if passed_in_prefix:
        final_prefix = f"{processed_global_prefix}/{passed_in_prefix.strip('/')}"
    else:
        final_prefix = f"{processed_global_prefix}"
    final_kwargs: dict[str, Any] = {
        **kwargs,
        "prefix": final_prefix,
    }

    application.include_router(router, **final_kwargs)


def include_auth_router_with_prefix(
    application: FastAPI,
    router: APIRouter,
    prefix: str | None = None,
    tags: list[str] | None = None,
) -> None:
    """Wrapper function to include an 'auth' router with prefix + rate-limiting dependencies."""
    final_tags = tags or ["auth"]
    include_router_with_global_prefix_prepended(
        application,
        router,
        prefix=prefix,
        tags=final_tags,
        dependencies=get_auth_rate_limiters(),
    )


def validate_cache_backend_settings() -> None:
    """Validate that CACHE_BACKEND=postgres is only used with DISABLE_VECTOR_DB.

    The Postgres cache backend eliminates the Redis dependency, but only works
    when Celery is not running (which requires DISABLE_VECTOR_DB=true).
    """
    if CACHE_BACKEND == CacheBackendType.POSTGRES and not DISABLE_VECTOR_DB:
        raise RuntimeError(
            "CACHE_BACKEND=postgres requires DISABLE_VECTOR_DB=true. "
            "The Postgres cache backend is only supported in no-vector-DB "
            "deployments where Celery is replaced by the in-process task runner."
        )


def validate_no_vector_db_settings() -> None:
    """Validate that DISABLE_VECTOR_DB is not combined with incompatible settings.

    Raises RuntimeError if DISABLE_VECTOR_DB is set alongside MULTI_TENANT or ENABLE_CRAFT,
    since these modes require infrastructure that is removed in no-vector-DB deployments.
    """
    if not DISABLE_VECTOR_DB:
        return

    if MULTI_TENANT:
        raise RuntimeError(
            "DISABLE_VECTOR_DB cannot be used with MULTI_TENANT. "
            "Multi-tenant deployments require the vector database for "
            "per-tenant document indexing and search. Run in single-tenant "
            "mode when disabling the vector database."
        )

    from onyx.server.features.build.configs import ENABLE_CRAFT

    if ENABLE_CRAFT:
        raise RuntimeError(
            "DISABLE_VECTOR_DB cannot be used with ENABLE_CRAFT. "
            "Onyx Craft requires background workers for sandbox lifecycle "
            "management, which are removed in no-vector-DB deployments. "
            "Disable Craft (ENABLE_CRAFT=false) when disabling the vector database."
        )


@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:  # noqa: ARG001
    validate_no_vector_db_settings()
    validate_cache_backend_settings()
    validate_registry()

    # Set recursion limit
    if SYSTEM_RECURSION_LIMIT is not None:
        sys.setrecursionlimit(SYSTEM_RECURSION_LIMIT)
        logger.notice(f"System recursion limit set to {SYSTEM_RECURSION_LIMIT}")

    SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME)

    SqlEngine.init_engine(
        pool_size=POSTGRES_API_SERVER_POOL_SIZE,
        max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,
    )
    SqlEngine.get_engine()

    SqlEngine.init_readonly_engine(
        pool_size=POSTGRES_API_SERVER_READ_ONLY_POOL_SIZE,
        max_overflow=POSTGRES_API_SERVER_READ_ONLY_POOL_OVERFLOW,
    )

    # Register pool metrics now that engines are created.
    # HTTP instrumentation is set up earlier in get_application() since it
    # adds middleware (which Starlette forbids after the app has started).
    setup_postgres_connection_pool_metrics(
        engines={
            "sync": SqlEngine.get_engine(),
            "async": get_sqlalchemy_async_engine(),
            "readonly": SqlEngine.get_readonly_engine(),
        },
    )

    verify_auth = fetch_versioned_implementation(
        "onyx.auth.users", "verify_auth_setting"
    )

    # Will throw exception if an issue is found
    verify_auth()

    if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET:
        logger.notice("Both OAuth Client ID and Secret are configured.")

    # Initialize tracing if credentials are provided
    setup_tracing()

    # fill up Postgres connection pools
    await warm_up_connections()

    if not MULTI_TENANT:
        # We cache this at the beginning so there is no delay in the first telemetry
        CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)
        get_or_generate_uuid()

        # If we are multi-tenant, we need to only set up initial public tables
        with get_session_with_current_tenant() as db_session:
            setup_onyx(db_session, POSTGRES_DEFAULT_SCHEMA)
            # set up the file store (e.g. create bucket if needed). On multi-tenant,
            # this is done via IaC
            get_default_file_store().initialize()
    else:
        setup_multitenant_onyx()

    if not MULTI_TENANT:
        # don't emit a metric for every pod rollover/restart
        optional_telemetry(
            record_type=RecordType.VERSION, data={"version": __version__}
        )

    if AUTH_RATE_LIMITING_ENABLED:
        await setup_auth_limiter()

    if DISABLE_VECTOR_DB:
        from onyx.background.periodic_poller import recover_stuck_user_files
        from onyx.background.periodic_poller import start_periodic_poller

        recover_stuck_user_files(POSTGRES_DEFAULT_SCHEMA)
        start_periodic_poller(POSTGRES_DEFAULT_SCHEMA)

    yield

    if DISABLE_VECTOR_DB:
        from onyx.background.periodic_poller import stop_periodic_poller

        stop_periodic_poller()

    SqlEngine.reset_engine()

    if AUTH_RATE_LIMITING_ENABLED:
        await close_auth_limiter()


def log_http_error(request: Request, exc: Exception) -> JSONResponse:
    status_code = getattr(exc, "status_code", 500)

    if isinstance(exc, BasicAuthenticationError):
        # For BasicAuthenticationError, just log a brief message without stack trace
        # (almost always spammy)
        logger.debug(f"Authentication failed: {str(exc)}")

    elif status_code == 404 and request.url.path == "/metrics":
        # Log 404 errors for the /metrics endpoint with debug level
        logger.debug(f"404 error for /metrics endpoint: {str(exc)}")

    elif status_code >= 400:
        error_msg = f"{str(exc)}\n"
        error_msg += "".join(traceback.format_tb(exc.__traceback__))
        logger.error(error_msg)

    detail = exc.detail if isinstance(exc, HTTPException) else str(exc)
    return JSONResponse(
        status_code=status_code,
        content={"detail": detail},
    )


def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
    application = FastAPI(
        title="Onyx Backend",
        version=__version__,
        description="Onyx API for AI-powered chat with search, document indexing, agents, actions, and more",
        servers=[
            {"url": f"{WEB_DOMAIN.rstrip('/')}/api", "description": "Onyx API Server"}
        ],
        lifespan=lifespan_override or lifespan,
    )
    if SENTRY_DSN:
        sentry_sdk.init(
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
        logger.debug("Sentry DSN not provided, skipping Sentry initialization")

    application.add_exception_handler(status.HTTP_400_BAD_REQUEST, log_http_error)
    application.add_exception_handler(status.HTTP_401_UNAUTHORIZED, log_http_error)
    application.add_exception_handler(status.HTTP_403_FORBIDDEN, log_http_error)
    application.add_exception_handler(status.HTTP_404_NOT_FOUND, log_http_error)
    application.add_exception_handler(
        status.HTTP_500_INTERNAL_SERVER_ERROR, log_http_error
    )

    register_onyx_exception_handlers(application)

    include_router_with_global_prefix_prepended(application, password_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, query_router)
    include_router_with_global_prefix_prepended(application, document_router)
    include_router_with_global_prefix_prepended(application, user_router)
    include_router_with_global_prefix_prepended(application, admin_query_router)
    include_router_with_global_prefix_prepended(application, admin_router)
    include_router_with_global_prefix_prepended(application, connector_router)
    include_router_with_global_prefix_prepended(application, credential_router)
    include_router_with_global_prefix_prepended(application, input_prompt_router)
    include_router_with_global_prefix_prepended(application, admin_input_prompt_router)
    include_router_with_global_prefix_prepended(application, cc_pair_router)
    include_router_with_global_prefix_prepended(application, projects_router)
    include_router_with_global_prefix_prepended(application, public_build_router)
    include_router_with_global_prefix_prepended(application, build_router)
    include_router_with_global_prefix_prepended(application, document_set_router)
    include_router_with_global_prefix_prepended(application, hierarchy_router)
    include_router_with_global_prefix_prepended(application, search_settings_router)
    include_router_with_global_prefix_prepended(
        application, slack_bot_management_router
    )
    include_router_with_global_prefix_prepended(application, discord_bot_router)
    include_router_with_global_prefix_prepended(application, persona_router)
    include_router_with_global_prefix_prepended(application, admin_persona_router)
    include_router_with_global_prefix_prepended(application, agents_router)
    include_router_with_global_prefix_prepended(application, admin_agents_router)
    include_router_with_global_prefix_prepended(application, default_assistant_router)
    include_router_with_global_prefix_prepended(application, notification_router)
    include_router_with_global_prefix_prepended(application, tool_router)
    include_router_with_global_prefix_prepended(application, admin_tool_router)
    include_router_with_global_prefix_prepended(application, oauth_config_router)
    include_router_with_global_prefix_prepended(application, admin_oauth_config_router)
    include_router_with_global_prefix_prepended(application, user_oauth_token_router)
    include_router_with_global_prefix_prepended(application, state_router)
    include_router_with_global_prefix_prepended(application, onyx_api_router)
    include_router_with_global_prefix_prepended(application, settings_router)
    include_router_with_global_prefix_prepended(application, settings_admin_router)
    include_router_with_global_prefix_prepended(application, llm_admin_router)
    include_router_with_global_prefix_prepended(application, kg_admin_router)
    include_router_with_global_prefix_prepended(application, llm_router)
    include_router_with_global_prefix_prepended(
        application, code_interpreter_admin_router
    )
    include_router_with_global_prefix_prepended(
        application, image_generation_admin_router
    )
    include_router_with_global_prefix_prepended(application, embedding_admin_router)
    include_router_with_global_prefix_prepended(application, embedding_router)
    include_router_with_global_prefix_prepended(application, web_search_router)
    include_router_with_global_prefix_prepended(application, web_search_admin_router)
    include_router_with_global_prefix_prepended(application, voice_admin_router)
    include_router_with_global_prefix_prepended(application, voice_router)
    include_router_with_global_prefix_prepended(application, voice_websocket_router)
    include_router_with_global_prefix_prepended(
        application, opensearch_migration_admin_router
    )
    include_router_with_global_prefix_prepended(
        application, token_rate_limit_settings_router
    )
    include_router_with_global_prefix_prepended(application, api_key_router)
    include_router_with_global_prefix_prepended(application, standard_oauth_router)
    include_router_with_global_prefix_prepended(application, federated_router)
    include_router_with_global_prefix_prepended(application, mcp_router)
    include_router_with_global_prefix_prepended(application, mcp_admin_router)

    include_router_with_global_prefix_prepended(application, pat_router)

    if AUTH_TYPE == AuthType.BASIC or AUTH_TYPE == AuthType.CLOUD:
        include_auth_router_with_prefix(
            application,
            fastapi_users.get_auth_router(auth_backend),
            prefix="/auth",
        )

        include_auth_router_with_prefix(
            application,
            fastapi_users.get_register_router(UserRead, UserCreate),
            prefix="/auth",
        )

        include_auth_router_with_prefix(
            application,
            fastapi_users.get_reset_password_router(),
            prefix="/auth",
        )
        include_auth_router_with_prefix(
            application,
            fastapi_users.get_verify_router(UserRead),
            prefix="/auth",
        )
        include_auth_router_with_prefix(
            application,
            fastapi_users.get_users_router(UserRead, UserUpdate),
            prefix="/users",
        )

    # Register Google OAuth when AUTH_TYPE is GOOGLE_OAUTH, or when
    # AUTH_TYPE is BASIC and OAuth credentials are configured
    if AUTH_TYPE == AuthType.GOOGLE_OAUTH or (
        AUTH_TYPE == AuthType.BASIC and OAUTH_ENABLED
    ):
        oauth_client = GoogleOAuth2(
            OAUTH_CLIENT_ID,
            OAUTH_CLIENT_SECRET,
            scopes=["openid", "email", "profile"],
        )
        include_auth_router_with_prefix(
            application,
            create_onyx_oauth_router(
                oauth_client,
                auth_backend,
                USER_AUTH_SECRET,
                associate_by_email=True,
                is_verified_by_default=True,
                redirect_url=f"{WEB_DOMAIN}/auth/oauth/callback",
            ),
            prefix="/auth/oauth",
        )

        # Need logout router for GOOGLE_OAUTH only (BASIC already has it from above)
        if AUTH_TYPE == AuthType.GOOGLE_OAUTH:
            include_auth_router_with_prefix(
                application,
                fastapi_users.get_logout_router(auth_backend),
                prefix="/auth",
            )

    if AUTH_TYPE == AuthType.OIDC:
        # Ensure we request offline_access for refresh tokens
        try:
            oidc_scopes = list(OIDC_SCOPE_OVERRIDE or BASE_SCOPES)
            if "offline_access" not in oidc_scopes:
                oidc_scopes.append("offline_access")
        except Exception as e:
            logger.warning(f"Error configuring OIDC scopes: {e}")
            # Fall back to default scopes if there's an error
            oidc_scopes = BASE_SCOPES

        include_auth_router_with_prefix(
            application,
            create_onyx_oauth_router(
                OpenID(
                    OAUTH_CLIENT_ID,
                    OAUTH_CLIENT_SECRET,
                    OPENID_CONFIG_URL,
                    # Use the configured scopes
                    base_scopes=oidc_scopes,
                ),
                auth_backend,
                USER_AUTH_SECRET,
                associate_by_email=True,
                is_verified_by_default=True,
                redirect_url=f"{WEB_DOMAIN}/auth/oidc/callback",
                enable_pkce=OIDC_PKCE_ENABLED,
            ),
            prefix="/auth/oidc",
        )

        # need basic auth router for `logout` endpoint
        include_auth_router_with_prefix(
            application,
            fastapi_users.get_auth_router(auth_backend),
            prefix="/auth",
        )

    elif AUTH_TYPE == AuthType.SAML:
        include_auth_router_with_prefix(
            application,
            saml_router,
        )

    if (
        AUTH_TYPE == AuthType.CLOUD
        or AUTH_TYPE == AuthType.BASIC
        or AUTH_TYPE == AuthType.GOOGLE_OAUTH
        or AUTH_TYPE == AuthType.OIDC
    ):
        # Add refresh token endpoint for OAuth as well
        include_auth_router_with_prefix(
            application,
            fastapi_users.get_refresh_router(auth_backend),
            prefix="/auth",
        )

    application.add_exception_handler(
        RequestValidationError, validation_exception_handler
    )

    application.add_exception_handler(ValueError, value_error_handler)

    application.add_middleware(
        CORSMiddleware,
        allow_origins=CORS_ALLOWED_ORIGIN,  # Configurable via environment variable
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )
    if LOG_ENDPOINT_LATENCY:
        add_latency_logging_middleware(application, logger)

    add_onyx_request_id_middleware(application, "API", logger)

    # Set endpoint context for per-endpoint DB pool attribution metrics.
    # Must be registered after all routes are added.
    add_endpoint_context_middleware(application)

    # HTTP request metrics (latency histograms, in-progress gauge, slow request
    # counter). Must be called here — before the app starts — because the
    # instrumentator adds middleware via app.add_middleware().
    setup_prometheus_metrics(application)

    # Ensure all routes have auth enabled or are explicitly marked as public
    check_router_auth(application)

    use_route_function_names_as_operation_ids(application)

    return application


# NOTE: needs to be outside of the `if __name__ == "__main__"` block so that the
# app is exportable
set_is_ee_based_on_env_variable()
app = fetch_versioned_implementation(module="onyx.main", attribute="get_application")


if __name__ == "__main__":
    logger.notice(
        f"Starting Onyx Backend version {__version__} on http://{APP_HOST}:{str(APP_PORT)}/"
    )

    if global_version.is_ee_version():
        logger.notice("Running Enterprise Edition")

    uvicorn.run(app, host=APP_HOST, port=APP_PORT)


================================================
FILE: backend/onyx/mcp_server/README.md
================================================
# Onyx MCP Server

## Overview

The Onyx MCP server allows LLMs to connect to your Onyx instance and access its knowledge base and search capabilities through the [Model Context Protocol (MCP)](https://modelcontextprotocol.io/).

With the Onyx MCP Server, you can search your knowledgebase,
give your LLMs web search, and upload and manage documents in Onyx.

All access controls are managed within the main Onyx application.

### Authentication

Provide an Onyx Personal Access Token or API Key in the `Authorization` header as a Bearer token.
The MCP server quickly validates and passes through the token on every request.

Depending on usage, the MCP Server may support OAuth and stdio in the future.

### Default Configuration
- **Transport**: HTTP POST (MCP over HTTP)
- **Port**: 8090 (shares domain with API server)
- **Framework**: FastMCP with FastAPI wrapper
- **Database**: None (all work delegates to the API server)

### Architecture

The MCP server is built on [FastMCP](https://github.com/jlowin/fastmcp) and runs alongside the main Onyx API server:

```
┌─────────────────┐
│  LLM Client     │
│  (Claude, etc)  │
└────────┬────────┘
         │ MCP over HTTP
         │ (POST with bearer)
         ▼
┌─────────────────┐
│  MCP Server     │
│  Port 8090      │
│  ├─ Auth        │
│  ├─ Tools       │
│  └─ Resources   │
└────────┬────────┘
         │ Internal HTTP
         │ (authenticated)
         ▼
┌─────────────────┐
│  API Server     │
│  Port 8080      │
│  ├─ /me (auth)  │
│  ├─ Search APIs │
│  └─ ACL checks  │
└─────────────────┘
```

## Configuring MCP Clients

### Claude Desktop

Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):

```json
{
  "mcpServers": {
    "onyx": {
      "url": "https://[YOUR_ONYX_DOMAIN]:8090/",
      "transport": "http",
      "headers": {
        "Authorization": "Bearer YOUR_ONYX_TOKEN_HERE"
      }
    }
  }
}
```

### Other MCP Clients

Most MCP clients support HTTP transport with custom headers. Refer to your client's documentation for configuration details.

## Capabilities

### Tools

The server provides three tools for searching and retrieving information:

1. `search_indexed_documents`
Search the user's private knowledge base indexed in Onyx. Returns ranked documents with content snippets, scores, and metadata.

2. `search_web`
Search the public internet for current events and general knowledge. Returns web search results with titles, URLs, and snippets.

3. `open_urls`
Retrieve the complete text content from specific web URLs. Useful for fetching full page content after finding relevant URLs via `search_web`.

### Resources

1. `indexed_sources`
Lists all document sources currently indexed in the tenant (e.g., `"confluence"`, `"github"`). Use these values to filter results when calling `search_indexed_documents`.

## Local Development

### Running the MCP Server

The MCP Server automatically launches with the `Run All Onyx Services` task from the default launch.json.

You can also independently launch the Server via the vscode debugger.

### Testing with MCP Inspector

The [MCP Inspector](https://github.com/modelcontextprotocol/inspector) is a debugging tool for MCP servers:

```bash
npx @modelcontextprotocol/inspector http://localhost:8090/
```

**Setup in Inspector:**

1. Ignore the OAuth configuration menus
2. Open the **Authentication** tab
3. Select **Bearer Token** authentication
4. Paste your Onyx bearer token
5. Click **Connect**

Once connected, you can:
- Browse available tools
- Test tool calls with different parameters
- View request/response payloads
- Debug authentication issues

### Health Check

Verify the server is running:

```bash
curl http://localhost:8090/health
```

Expected response:
```json
{
  "status": "healthy",
  "service": "mcp_server"
}
```

### Environment Variables

**MCP Server Configuration:**
- `MCP_SERVER_ENABLED`: Enable MCP server (set to "true" to enable, default: disabled)
- `MCP_SERVER_PORT`: Port for MCP server (default: 8090)
- `MCP_SERVER_CORS_ORIGINS`: Comma-separated CORS origins (optional)

**API Server Connection:**
- `API_SERVER_PROTOCOL`: Protocol for API server connection (default: "http")
- `API_SERVER_HOST`: Hostname for API server connection (default: "127.0.0.1")
- `API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS`: Optional override URL. If set, takes precedence over the protocol/host variables. Used for self-hosting the MCP server with Onyx Cloud as the backend.

================================================
FILE: backend/onyx/mcp_server/api.py
================================================
"""MCP server with FastAPI wrapper."""

from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager

from fastapi import FastAPI
from fastapi import Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.responses import Response
from fastmcp import FastMCP
from starlette.datastructures import MutableHeaders
from starlette.middleware.base import RequestResponseEndpoint
from starlette.types import Receive
from starlette.types import Scope
from starlette.types import Send

from onyx.configs.app_configs import MCP_SERVER_CORS_ORIGINS
from onyx.mcp_server.auth import OnyxTokenVerifier
from onyx.mcp_server.utils import shutdown_http_client
from onyx.utils.logger import setup_logger

logger = setup_logger()

logger.info("Creating Onyx MCP Server...")

mcp_server = FastMCP(
    name="Onyx MCP Server",
    version="1.0.0",
    auth=OnyxTokenVerifier(),
)

# Import tools and resources AFTER mcp_server is created to avoid circular imports
# Components register themselves via decorators on the shared mcp_server instance
from onyx.mcp_server.tools import search  # noqa: E402, F401
from onyx.mcp_server.resources import indexed_sources  # noqa: E402, F401

logger.info("MCP server instance created")


def create_mcp_fastapi_app() -> FastAPI:
    """Create FastAPI app wrapping MCP server with auth and shared client lifecycle."""
    mcp_asgi_app = mcp_server.http_app(path="/")

    async def _ensure_streamable_accept_header(
        scope: Scope, receive: Receive, send: Send
    ) -> None:
        """Ensure Accept header includes types required by FastMCP streamable HTTP."""
        if scope.get("type") == "http":
            headers = MutableHeaders(scope=scope)
            accept = headers.get("accept", "")
            accept_lower = accept.lower()

            if (
                not accept
                or accept == "*/*"
                or "application/json" not in accept_lower
                or "text/event-stream" not in accept_lower
            ):
                headers["accept"] = "application/json, text/event-stream"

        await mcp_asgi_app(scope, receive, send)

    @asynccontextmanager
    async def combined_lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
        """Initializes MCP session manager."""
        logger.info("MCP server starting up")

        try:
            async with mcp_asgi_app.lifespan(app):
                yield
        finally:
            logger.info("MCP server shutting down")
            await shutdown_http_client()

    app = FastAPI(
        title="Onyx MCP Server",
        description="HTTP POST transport with bearer auth delegated to API /me",
        version="1.0.0",
        lifespan=combined_lifespan,
    )

    # Public health check endpoint (bypasses MCP auth)
    @app.middleware("http")
    async def health_check(
        request: Request, call_next: RequestResponseEndpoint
    ) -> Response:
        if request.url.path.rstrip("/") == "/health":
            return JSONResponse({"status": "healthy", "service": "mcp_server"})
        return await call_next(request)

    # Authentication is handled by FastMCP's OnyxTokenVerifier (see auth.py)

    if MCP_SERVER_CORS_ORIGINS:
        logger.info(f"CORS origins: {MCP_SERVER_CORS_ORIGINS}")
        app.add_middleware(
            CORSMiddleware,
            allow_origins=MCP_SERVER_CORS_ORIGINS,
            allow_credentials=True,
            allow_methods=["*"],
            allow_headers=["*"],
        )

    app.mount("/", _ensure_streamable_accept_header)

    return app


mcp_app = create_mcp_fastapi_app()


================================================
FILE: backend/onyx/mcp_server/auth.py
================================================
"""Authentication helpers for the Onyx MCP server."""

from typing import Optional

from fastmcp.server.auth.auth import AccessToken
from fastmcp.server.auth.auth import TokenVerifier

from onyx.mcp_server.utils import get_http_client
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import build_api_server_url_for_http_requests

logger = setup_logger()


class OnyxTokenVerifier(TokenVerifier):
    """Validates bearer tokens by delegating to the API server."""

    async def verify_token(self, token: str) -> Optional[AccessToken]:
        """Call API /me to verify the token, return minimal AccessToken on success."""
        try:
            response = await get_http_client().get(
                f"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/me",
                headers={"Authorization": f"Bearer {token}"},
            )
        except Exception as exc:
            logger.error(
                "MCP server failed to reach API /me for authentication: %s",
                exc,
                exc_info=True,
            )
            return None

        if response.status_code != 200:
            logger.warning(
                "API server rejected MCP auth token with status %s",
                response.status_code,
            )
            return None

        return AccessToken(
            token=token,
            client_id="mcp",
            scopes=["mcp:use"],
            expires_at=None,
            resource=None,
            claims={},
        )


================================================
FILE: backend/onyx/mcp_server/mcp.json.template
================================================
{
    "mcpServers": {
      "Onyx": {
        "url": "https://cloud.onyx.app/mcp",
        "headers": {
          "Authorization": "Bearer [YOUR PAT OR API KEY HERE]"
        }
      }
    }
  }

================================================
FILE: backend/onyx/mcp_server/resources/__init__.py
================================================
"""Resource registrations for the Onyx MCP server."""

# Import resource modules so decorators execute when the package loads.
from onyx.mcp_server.resources import indexed_sources  # noqa: F401


================================================
FILE: backend/onyx/mcp_server/resources/indexed_sources.py
================================================
"""Resources that expose metadata for the Onyx MCP server."""

from __future__ import annotations

from typing import Any

from onyx.mcp_server.api import mcp_server
from onyx.mcp_server.utils import get_indexed_sources
from onyx.mcp_server.utils import require_access_token
from onyx.utils.logger import setup_logger

logger = setup_logger()


@mcp_server.resource(
    "resource://indexed_sources",
    name="indexed_sources",
    description=(
        "Enumerate the user's document sources that are currently indexed in Onyx."
        "This can be used to discover filters for the `search_indexed_documents` tool."
    ),
    mime_type="application/json",
)
async def indexed_sources_resource() -> dict[str, Any]:
    """Return the list of indexed source types for search filtering."""

    access_token = require_access_token()

    sources = await get_indexed_sources(access_token)

    logger.info(
        "Onyx MCP Server: indexed_sources resource returning %s entries",
        len(sources),
    )

    return {
        "indexed_sources": sorted(sources),
    }


================================================
FILE: backend/onyx/mcp_server/tools/__init__.py
================================================
"""Tool registrations for the Onyx MCP server."""

# Import tool modules so decorators execute when the package is imported.
from onyx.mcp_server.tools import search  # noqa: F401


================================================
FILE: backend/onyx/mcp_server/tools/search.py
================================================
"""Search tools for MCP server - document and web search."""

from datetime import datetime
from typing import Any

from onyx.configs.constants import DocumentSource
from onyx.mcp_server.api import mcp_server
from onyx.mcp_server.utils import get_http_client
from onyx.mcp_server.utils import get_indexed_sources
from onyx.mcp_server.utils import require_access_token
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import build_api_server_url_for_http_requests
from onyx.utils.variable_functionality import global_version

logger = setup_logger()


@mcp_server.tool()
async def search_indexed_documents(
    query: str,
    source_types: list[str] | None = None,
    time_cutoff: str | None = None,
    limit: int = 10,
) -> dict[str, Any]:
    """
    Search the user's knowledge base indexed in Onyx.
    Use this tool for information that is not public knowledge and specific to the user,
    their team, their work, or their organization/company.

    Note: In CE mode, this tool uses the chat endpoint internally which invokes an LLM
    on every call, consuming tokens and adding latency.
    Additionally, CE callers receive a truncated snippet (blurb) instead of a full document chunk,
    but this should still be sufficient for most use cases. CE mode functionality should be swapped
    when a dedicated CE search endpoint is implemented.

    In EE mode, the dedicated search endpoint is used instead.

    To find a list of available sources, use the `indexed_sources` resource.
    Returns chunks of text as search results with snippets, scores, and metadata.

    Example usage:
    ```
    {
        "query": "What is the latest status of PROJ-1234 and what is the next development item?",
        "source_types": ["jira", "google_drive", "github"],
        "time_cutoff": "2025-11-24T00:00:00Z",
        "limit": 10,
    }
    ```
    """
    logger.info(
        f"Onyx MCP Server: document search: query='{query}', sources={source_types}, limit={limit}"
    )

    # Parse time_cutoff string to datetime if provided
    time_cutoff_dt: datetime | None = None
    if time_cutoff:
        try:
            time_cutoff_dt = datetime.fromisoformat(time_cutoff.replace("Z", "+00:00"))
        except ValueError as e:
            logger.warning(
                f"Onyx MCP Server: Invalid time_cutoff format '{time_cutoff}': {e}. Continuing without time filter."
            )
            # Continue with no time_cutoff instead of returning an error
            time_cutoff_dt = None

    # Initialize source_type_enums early to avoid UnboundLocalError
    source_type_enums: list[DocumentSource] | None = None

    # Get authenticated user from FastMCP's access token
    access_token = require_access_token()

    try:
        sources = await get_indexed_sources(access_token)
    except Exception as e:
        # Error fetching sources (network error, API failure, etc.)
        logger.error(
            "Onyx MCP Server: Error checking indexed sources: %s",
            e,
            exc_info=True,
        )
        return {
            "documents": [],
            "total_results": 0,
            "query": query,
            "error": (f"Failed to check indexed sources: {str(e)}. "),
        }

    if not sources:
        logger.info("Onyx MCP Server: No indexed sources available for tenant")
        return {
            "documents": [],
            "total_results": 0,
            "query": query,
            "message": (
                "No document sources are indexed yet. Add connectors or upload data "
                "through Onyx before calling onyx_search_documents."
            ),
        }

    # Convert source_types strings to DocumentSource enums if provided
    # Invalid values will be handled by the API server
    if source_types is not None:
        source_type_enums = []
        for src in source_types:
            try:
                source_type_enums.append(DocumentSource(src.lower()))
            except ValueError:
                logger.warning(
                    f"Onyx MCP Server: Invalid source type '{src}' - will be ignored by server"
                )

    # Build filters dict only with non-None values
    filters: dict[str, Any] | None = None
    if source_type_enums or time_cutoff_dt:
        filters = {}
        if source_type_enums:
            filters["source_type"] = [src.value for src in source_type_enums]
        if time_cutoff_dt:
            filters["time_cutoff"] = time_cutoff_dt.isoformat()

    is_ee = global_version.is_ee_version()
    base_url = build_api_server_url_for_http_requests(respect_env_override_if_set=True)
    auth_headers = {"Authorization": f"Bearer {access_token.token}"}

    search_request: dict[str, Any]
    if is_ee:
        # EE: use the dedicated search endpoint (no LLM invocation)
        search_request = {
            "search_query": query,
            "filters": filters,
            "num_docs_fed_to_llm_selection": limit,
            "run_query_expansion": False,
            "include_content": True,
            "stream": False,
        }
        endpoint = f"{base_url}/search/send-search-message"
        error_key = "error"
        docs_key = "search_docs"
        content_field = "content"
    else:
        # CE: fall back to the chat endpoint (invokes LLM, consumes tokens)
        search_request = {
            "message": query,
            "stream": False,
            "chat_session_info": {},
        }
        if filters:
            search_request["internal_search_filters"] = filters
        endpoint = f"{base_url}/chat/send-chat-message"
        error_key = "error_msg"
        docs_key = "top_documents"
        content_field = "blurb"

    try:
        response = await get_http_client().post(
            endpoint,
            json=search_request,
            headers=auth_headers,
        )
        response.raise_for_status()
        result = response.json()

        # Check for error in response
        if result.get(error_key):
            return {
                "documents": [],
                "total_results": 0,
                "query": query,
                "error": result.get(error_key),
            }

        documents = [
            {
                "semantic_identifier": doc.get("semantic_identifier"),
                "content": doc.get(content_field),
                "source_type": doc.get("source_type"),
                "link": doc.get("link"),
                "score": doc.get("score"),
            }
            for doc in result.get(docs_key, [])
        ]

        # NOTE: search depth is controlled by the backend persona defaults, not `limit`.
        # `limit` only caps the returned list; fewer results may be returned if the
        # backend retrieves fewer documents than requested.
        documents = documents[:limit]

        logger.info(
            f"Onyx MCP Server: Internal search returned {len(documents)} results"
        )
        return {
            "documents": documents,
            "total_results": len(documents),
            "query": query,
        }
    except Exception as e:
        logger.error(f"Onyx MCP Server: Document search error: {e}", exc_info=True)
        return {
            "error": f"Document search failed: {str(e)}",
            "documents": [],
            "query": query,
        }


@mcp_server.tool()
async def search_web(
    query: str,
    limit: int = 5,
) -> dict[str, Any]:
    """
    Search the public internet for general knowledge, current events, and publicly available information.
    Use this tool for information that is publicly available on the web,
    such as news, documentation, general facts, or when the user's private knowledge base doesn't contain relevant information.

    Returns web search results with titles, URLs, and snippets (NOT full content). Use `open_urls` to fetch full page content.

    Example usage:
    ```
    {
        "query": "React 19 migration guide to use react compiler",
        "limit": 5
    }
    ```
    """
    logger.info(f"Onyx MCP Server: Web search: query='{query}', limit={limit}")

    access_token = require_access_token()

    try:
        request_payload = {"queries": [query], "max_results": limit}
        response = await get_http_client().post(
            f"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/web-search/search-lite",
            json=request_payload,
            headers={"Authorization": f"Bearer {access_token.token}"},
        )
        response.raise_for_status()
        response_payload = response.json()
        results = response_payload.get("results", [])
        return {
            "results": results,
            "query": query,
        }
    except Exception as e:
        logger.error(f"Onyx MCP Server: Web search error: {e}", exc_info=True)
        return {
            "error": f"Web search failed: {str(e)}",
            "results": [],
            "query": query,
        }


@mcp_server.tool()
async def open_urls(
    urls: list[str],
) -> dict[str, Any]:
    """
    Retrieve the complete text content from specific web URLs.
    Use this tool when you need to access full content from known URLs,
    such as documentation pages or articles returned by the `search_web` tool.

    Useful for following up on web search results when snippets do not provide enough information.

    Returns the full text content of each URL along with metadata like title and content type.

    Example usage:
    ```
    {
        "urls": ["https://react.dev/versions", "https://react.dev/learn/react-compiler","https://react.dev/learn/react-compiler/introduction"]
    }
    ```
    """
    logger.info(f"Onyx MCP Server: Open URL: fetching {len(urls)} URLs")

    access_token = require_access_token()

    try:
        response = await get_http_client().post(
            f"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/web-search/open-urls",
            json={"urls": urls},
            headers={"Authorization": f"Bearer {access_token.token}"},
        )
        response.raise_for_status()
        response_payload = response.json()
        results = response_payload.get("results", [])
        return {
            "results": results,
        }
    except Exception as e:
        logger.error(f"Onyx MCP Server: URL fetch error: {e}", exc_info=True)
        return {
            "error": f"URL fetch failed: {str(e)}",
            "results": [],
        }


================================================
FILE: backend/onyx/mcp_server/utils.py
================================================
"""Utility helpers for the Onyx MCP server."""

from __future__ import annotations

import httpx
from fastmcp.server.auth.auth import AccessToken
from fastmcp.server.dependencies import get_access_token

from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import build_api_server_url_for_http_requests

logger = setup_logger()

# Shared HTTP client reused across requests
_http_client: httpx.AsyncClient | None = None


def require_access_token() -> AccessToken:
    """
    Get and validate the access token from the current request.

    Raises:
        ValueError: If no access token is present in the request.

    Returns:
        AccessToken: The validated access token.
    """
    access_token = get_access_token()
    if not access_token:
        raise ValueError(
            "MCP Server requires an Onyx access token to authenticate your request"
        )
    return access_token


def get_http_client() -> httpx.AsyncClient:
    """Return a shared async HTTP client."""
    global _http_client
    if _http_client is None:
        _http_client = httpx.AsyncClient(timeout=60.0)
    return _http_client


async def shutdown_http_client() -> None:
    """Close the shared HTTP client when the server shuts down."""
    global _http_client
    if _http_client is not None:
        await _http_client.aclose()
        _http_client = None


async def get_indexed_sources(
    access_token: AccessToken,
) -> list[str]:
    """
    Fetch indexed document sources for the current user/tenant.

    Returns:
        List of indexed source strings. Empty list if no sources are indexed.
    """
    headers = {"Authorization": f"Bearer {access_token.token}"}
    try:
        response = await get_http_client().get(
            f"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/manage/indexed-sources",
            headers=headers,
        )
        response.raise_for_status()
        payload = response.json()
        sources = payload.get("sources", [])
        if not isinstance(sources, list):
            raise ValueError("Unexpected response shape for indexed sources")
        return [str(source) for source in sources]
    except (httpx.HTTPStatusError, httpx.RequestError, ValueError):
        # Re-raise known exception types (httpx errors and validation errors)
        logger.error(
            "Onyx MCP Server: Failed to fetch indexed sources",
            exc_info=True,
        )
        raise
    except Exception as exc:
        # Wrap unexpected exceptions
        logger.error(
            "Onyx MCP Server: Unexpected error fetching indexed sources",
            exc_info=True,
        )
        raise RuntimeError(f"Failed to fetch indexed sources: {exc}") from exc


================================================
FILE: backend/onyx/mcp_server_main.py
================================================
"""Entry point for MCP server - HTTP POST transport with API key auth."""

import uvicorn

from onyx.configs.app_configs import MCP_SERVER_ENABLED
from onyx.configs.app_configs import MCP_SERVER_HOST
from onyx.configs.app_configs import MCP_SERVER_PORT
from onyx.utils.logger import setup_logger

logger = setup_logger()


def main() -> None:
    """Run the MCP server."""
    if not MCP_SERVER_ENABLED:
        logger.info("MCP server is disabled (MCP_SERVER_ENABLED=false)")
        return

    logger.info(f"Starting MCP server on {MCP_SERVER_HOST}:{MCP_SERVER_PORT}")

    from onyx.mcp_server.api import mcp_app

    uvicorn.run(
        mcp_app,
        host=MCP_SERVER_HOST,
        port=MCP_SERVER_PORT,
        log_config=None,
    )


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/natural_language_processing/__init__.py
================================================


================================================
FILE: backend/onyx/natural_language_processing/constants.py
================================================
"""
Constants for natural language processing, including embedding and reranking models.

This file contains constants moved from model_server to support the gradual migration
of API-based calls to bypass the model server.
"""

from shared_configs.enums import EmbeddingProvider
from shared_configs.enums import EmbedTextType


# Default model names for different providers
DEFAULT_OPENAI_MODEL = "text-embedding-3-small"
DEFAULT_COHERE_MODEL = "embed-english-light-v3.0"
DEFAULT_VOYAGE_MODEL = "voyage-large-2-instruct"
DEFAULT_VERTEX_MODEL = "text-embedding-005"


class EmbeddingModelTextType:
    """Mapping of Onyx text types to provider-specific text types."""

    PROVIDER_TEXT_TYPE_MAP = {
        EmbeddingProvider.COHERE: {
            EmbedTextType.QUERY: "search_query",
            EmbedTextType.PASSAGE: "search_document",
        },
        EmbeddingProvider.VOYAGE: {
            EmbedTextType.QUERY: "query",
            EmbedTextType.PASSAGE: "document",
        },
        EmbeddingProvider.GOOGLE: {
            EmbedTextType.QUERY: "RETRIEVAL_QUERY",
            EmbedTextType.PASSAGE: "RETRIEVAL_DOCUMENT",
        },
    }

    @staticmethod
    def get_type(provider: EmbeddingProvider, text_type: EmbedTextType) -> str:
        """Get provider-specific text type string."""
        return EmbeddingModelTextType.PROVIDER_TEXT_TYPE_MAP[provider][text_type]


================================================
FILE: backend/onyx/natural_language_processing/english_stopwords.py
================================================
import re

ENGLISH_STOPWORDS = [
    "a",
    "about",
    "above",
    "after",
    "again",
    "against",
    "ain",
    "all",
    "am",
    "an",
    "and",
    "any",
    "are",
    "aren",
    "aren't",
    "as",
    "at",
    "be",
    "because",
    "been",
    "before",
    "being",
    "below",
    "between",
    "both",
    "but",
    "by",
    "can",
    "couldn",
    "couldn't",
    "d",
    "did",
    "didn",
    "didn't",
    "do",
    "does",
    "doesn",
    "doesn't",
    "doing",
    "don",
    "don't",
    "down",
    "during",
    "each",
    "few",
    "for",
    "from",
    "further",
    "had",
    "hadn",
    "hadn't",
    "has",
    "hasn",
    "hasn't",
    "have",
    "haven",
    "haven't",
    "having",
    "he",
    "he'd",
    "he'll",
    "he's",
    "her",
    "here",
    "hers",
    "herself",
    "him",
    "himself",
    "his",
    "how",
    "i",
    "i'd",
    "i'll",
    "i'm",
    "i've",
    "if",
    "in",
    "into",
    "is",
    "isn",
    "isn't",
    "it",
    "it'd",
    "it'll",
    "it's",
    "its",
    "itself",
    "just",
    "ll",
    "m",
    "ma",
    "me",
    "mightn",
    "mightn't",
    "more",
    "most",
    "mustn",
    "mustn't",
    "my",
    "myself",
    "needn",
    "needn't",
    "no",
    "nor",
    "not",
    "now",
    "o",
    "of",
    "off",
    "on",
    "once",
    "only",
    "or",
    "other",
    "our",
    "ours",
    "ourselves",
    "out",
    "over",
    "own",
    "re",
    "s",
    "same",
    "shan",
    "shan't",
    "she",
    "she'd",
    "she'll",
    "she's",
    "should",
    "should've",
    "shouldn",
    "shouldn't",
    "so",
    "some",
    "such",
    "t",
    "than",
    "that",
    "that'll",
    "the",
    "their",
    "theirs",
    "them",
    "themselves",
    "then",
    "there",
    "these",
    "they",
    "they'd",
    "they'll",
    "they're",
    "they've",
    "this",
    "those",
    "through",
    "to",
    "too",
    "under",
    "until",
    "up",
    "ve",
    "very",
    "was",
    "wasn",
    "wasn't",
    "we",
    "we'd",
    "we'll",
    "we're",
    "we've",
    "were",
    "weren",
    "weren't",
    "what",
    "when",
    "where",
    "which",
    "while",
    "who",
    "whom",
    "why",
    "will",
    "with",
    "won",
    "won't",
    "wouldn",
    "wouldn't",
    "y",
    "you",
    "you'd",
    "you'll",
    "you're",
    "you've",
    "your",
    "yours",
    "yourself",
    "yourselves",
]

ENGLISH_STOPWORDS_SET = frozenset(ENGLISH_STOPWORDS)


def strip_stopwords(text: str) -> list[str]:
    """Remove English stopwords from text.

    Matching is case-insensitive and ignores leading/trailing punctuation
    on each word. Internal punctuation (like apostrophes in contractions)
    is preserved for matching, so "you're" matches the stopword "you're"
    but "youre" would not.
    """
    words = text.split()
    result = []

    for word in words:
        # Strip leading/trailing punctuation to get the core word for comparison
        # This preserves internal punctuation like apostrophes
        core = re.sub(r"^[^\w']+|[^\w']+$", "", word)
        if core.lower() not in ENGLISH_STOPWORDS_SET:
            result.append(word)

    return result


================================================
FILE: backend/onyx/natural_language_processing/exceptions.py
================================================
class ModelServerRateLimitError(Exception):
    """
    Exception raised for rate limiting errors from the model server.
    """


class CohereBillingLimitError(Exception):
    """
    Raised when Cohere rejects requests because the billing cap is reached.
    """


================================================
FILE: backend/onyx/natural_language_processing/search_nlp_models.py
================================================
import asyncio
import json
import os
import threading
import time
from collections.abc import Callable
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from functools import partial
from functools import wraps
from types import TracebackType
from typing import Any
from typing import cast

import aioboto3  # type: ignore
import httpx
import requests
import voyageai  # type: ignore[import-untyped]
from cohere import AsyncClient as CohereAsyncClient
from cohere.core.api_error import ApiError
from google.oauth2 import service_account
from httpx import HTTPError
from requests import JSONDecodeError
from requests import RequestException
from requests import Response
from retry import retry

from onyx.configs.app_configs import INDEXING_EMBEDDING_MODEL_NUM_THREADS
from onyx.configs.app_configs import LARGE_CHUNK_RATIO
from onyx.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS
from onyx.configs.model_configs import (
    BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES,
)
from onyx.connectors.models import ConnectorStopSignal
from onyx.db.models import SearchSettings
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.natural_language_processing.constants import DEFAULT_COHERE_MODEL
from onyx.natural_language_processing.constants import DEFAULT_OPENAI_MODEL
from onyx.natural_language_processing.constants import DEFAULT_VERTEX_MODEL
from onyx.natural_language_processing.constants import DEFAULT_VOYAGE_MODEL
from onyx.natural_language_processing.constants import EmbeddingModelTextType
from onyx.natural_language_processing.exceptions import CohereBillingLimitError
from onyx.natural_language_processing.exceptions import ModelServerRateLimitError
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.natural_language_processing.utils import tokenizer_trim_content
from onyx.utils.logger import setup_logger
from onyx.utils.search_nlp_models_utils import pass_aws_key
from onyx.utils.text_processing import remove_invalid_unicode_chars
from onyx.utils.timing import log_function_time
from shared_configs.configs import API_BASED_EMBEDDING_TIMEOUT
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE
from shared_configs.configs import INDEXING_ONLY
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT
from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT
from shared_configs.configs import SKIP_WARM_UP
from shared_configs.configs import VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE
from shared_configs.enums import EmbeddingProvider
from shared_configs.enums import EmbedTextType
from shared_configs.enums import RerankerProvider
from shared_configs.model_server_models import Embedding
from shared_configs.model_server_models import EmbedRequest
from shared_configs.model_server_models import EmbedResponse
from shared_configs.model_server_models import IntentRequest
from shared_configs.model_server_models import IntentResponse
from shared_configs.model_server_models import RerankRequest
from shared_configs.model_server_models import RerankResponse
from shared_configs.utils import batch_list

logger = setup_logger()

# If we are not only indexing, dont want retry very long
_RETRY_DELAY = 10 if INDEXING_ONLY else 0.1
_RETRY_TRIES = 10 if INDEXING_ONLY else 2

# OpenAI only allows 2048 embeddings to be computed at once
_OPENAI_MAX_INPUT_LEN = 2048
# Cohere allows up to 96 embeddings in a single embedding calling
_COHERE_MAX_INPUT_LEN = 96

# Authentication error string constants
_AUTH_ERROR_401 = "401"
_AUTH_ERROR_UNAUTHORIZED = "unauthorized"
_AUTH_ERROR_INVALID_API_KEY = "invalid api key"
_AUTH_ERROR_PERMISSION = "permission"

# Thread-local storage for event loops
# This prevents creating thousands of event loops during batch processing,
# which was causing severe memory leaks with API-based embedding providers
_thread_local = threading.local()


def _get_or_create_event_loop() -> asyncio.AbstractEventLoop:
    """Get or create a thread-local event loop for API embedding calls.

    This prevents creating a new event loop for every batch during embedding,
    which was causing memory leaks. Instead, each thread reuses the same loop.

    Returns:
        asyncio.AbstractEventLoop: The thread-local event loop
    """
    if (
        not hasattr(_thread_local, "loop")
        or _thread_local.loop is None
        or _thread_local.loop.is_closed()
    ):
        _thread_local.loop = asyncio.new_event_loop()
        asyncio.set_event_loop(_thread_local.loop)
    return _thread_local.loop


def cleanup_embedding_thread_locals() -> None:
    """Clean up thread-local event loops to prevent memory leaks.

    This should be called after each task completes to ensure that
    event loops and their associated resources are properly released.
    Thread-local storage persists across Celery tasks when using the
    thread pool, so explicit cleanup is necessary.

    NOTE: This must be called from the SAME thread that created the event loop.
    For ThreadPoolExecutor-based embedding, this cleanup happens automatically
    via the _cleanup_thread_local wrapper.
    """
    if hasattr(_thread_local, "loop") and _thread_local.loop is not None:
        loop = _thread_local.loop
        if not loop.is_closed():
            # Cancel all pending tasks in the event loop
            try:
                # Ensure loop is set as current event loop before accessing tasks
                asyncio.set_event_loop(loop)
                pending = asyncio.all_tasks(loop)
                if pending:
                    logger.debug(
                        f"Cleaning up event loop with {len(pending)} pending tasks in thread {threading.current_thread().name}"
                    )
                    for task in pending:
                        task.cancel()
                    # Run the loop briefly to allow cancelled tasks to complete
                    loop.run_until_complete(
                        asyncio.gather(*pending, return_exceptions=True)
                    )
            except Exception as e:
                # If gathering tasks fails, just close the loop
                logger.debug(f"Error gathering tasks during cleanup: {e}")

            # Close the event loop
            loop.close()
            logger.debug(
                f"Closed event loop in thread {threading.current_thread().name}"
            )

        # Clear the thread-local reference
        _thread_local.loop = None


def _cleanup_thread_local(func: Callable) -> Callable:
    """Decorator to ensure thread-local cleanup after function execution.

    This wraps functions that run in ThreadPoolExecutor threads to ensure
    that thread-local event loops are cleaned up after each execution,
    preventing memory leaks from persistent thread-local storage.
    """

    @wraps(func)
    def wrapper(*args: Any, **kwargs: Any) -> Any:
        try:
            return func(*args, **kwargs)
        finally:
            # Clean up thread-local event loop after this thread's work is done
            cleanup_embedding_thread_locals()

    return wrapper


WARM_UP_STRINGS = [
    "Onyx is amazing!",
    "Check out our easy deployment guide at",
    "https://docs.onyx.app/deployment/getting_started/quickstart",
]


def clean_model_name(model_str: str) -> str:
    return model_str.replace("/", "_").replace("-", "_").replace(".", "_")


def build_model_server_url(
    model_server_host: str,
    model_server_port: int,
) -> str:
    model_server_url = f"{model_server_host}:{model_server_port}"

    # use protocol if provided
    if "http" in model_server_url:
        return model_server_url

    # otherwise default to http
    return f"http://{model_server_url}"


def is_authentication_error(error: Exception) -> bool:
    """Check if an exception is related to authentication issues.

    Args:
        error: The exception to check

    Returns:
        bool: True if the error appears to be authentication-related
    """
    error_str = str(error).lower()
    return (
        _AUTH_ERROR_401 in error_str
        or _AUTH_ERROR_UNAUTHORIZED in error_str
        or _AUTH_ERROR_INVALID_API_KEY in error_str
        or _AUTH_ERROR_PERMISSION in error_str
    )


def format_embedding_error(
    error: Exception,
    service_name: str,
    model: str | None,
    provider: EmbeddingProvider,
    sanitized_api_key: str | None = None,
    status_code: int | None = None,
) -> str:
    """
    Format a standardized error string for embedding errors.
    """
    detail = f"Status {status_code}" if status_code else f"{type(error)}"

    return (
        f"{'HTTP error' if status_code else 'Exception'} embedding text with {service_name} - {detail}: "
        f"Model: {model} "
        f"Provider: {provider} "
        f"API Key: {sanitized_api_key} "
        f"Exception: {error}"
    )


# Custom exception for authentication errors
class AuthenticationError(Exception):
    """Raised when authentication fails with a provider."""

    def __init__(self, provider: str, message: str = "API key is invalid or expired"):
        self.provider = provider
        self.message = message
        super().__init__(f"{provider} authentication failed: {message}")


class CloudEmbedding:
    def __init__(
        self,
        api_key: str,
        provider: EmbeddingProvider,
        api_url: str | None = None,
        api_version: str | None = None,
        timeout: int = API_BASED_EMBEDDING_TIMEOUT,
    ) -> None:
        self.provider = provider
        self.api_key = api_key
        self.api_url = api_url
        self.api_version = api_version
        self.timeout = timeout
        self.http_client = httpx.AsyncClient(timeout=timeout)
        self._closed = False
        self.sanitized_api_key = api_key[:4] + "********" + api_key[-4:]

    async def _embed_openai(
        self, texts: list[str], model: str | None, reduced_dimension: int | None
    ) -> list[Embedding]:
        if not model:
            model = DEFAULT_OPENAI_MODEL

        import openai

        # Use the OpenAI specific timeout for this one
        client = openai.AsyncOpenAI(
            api_key=self.api_key, timeout=OPENAI_EMBEDDING_TIMEOUT
        )

        final_embeddings: list[Embedding] = []

        for text_batch in batch_list(texts, _OPENAI_MAX_INPUT_LEN):
            response = await client.embeddings.create(
                input=text_batch,
                model=model,
                dimensions=reduced_dimension or openai.omit,
            )
            final_embeddings.extend(
                [embedding.embedding for embedding in response.data]
            )
        return final_embeddings

    async def _embed_cohere(
        self, texts: list[str], model: str | None, embedding_type: str
    ) -> list[Embedding]:
        if not model:
            model = DEFAULT_COHERE_MODEL

        client = CohereAsyncClient(api_key=self.api_key)

        final_embeddings: list[Embedding] = []
        for text_batch in batch_list(texts, _COHERE_MAX_INPUT_LEN):
            # Does not use the same tokenizer as the Onyx API server but it's approximately the same
            # empirically it's only off by a very few tokens so it's not a big deal
            response = await client.embed(
                texts=text_batch,
                model=model,
                input_type=embedding_type,
                truncate="END",
            )
            final_embeddings.extend(cast(list[Embedding], response.embeddings))
        return final_embeddings

    async def _embed_voyage(
        self, texts: list[str], model: str | None, embedding_type: str
    ) -> list[Embedding]:
        if not model:
            model = DEFAULT_VOYAGE_MODEL

        client = voyageai.AsyncClient(
            api_key=self.api_key, timeout=API_BASED_EMBEDDING_TIMEOUT
        )

        response = await client.embed(
            texts=texts,
            model=model,
            input_type=embedding_type,
            truncation=True,
        )
        return response.embeddings

    async def _embed_azure(
        self, texts: list[str], model: str | None
    ) -> list[Embedding]:
        from litellm import aembedding

        response = await aembedding(
            model=model,
            input=texts,
            timeout=API_BASED_EMBEDDING_TIMEOUT,
            api_key=self.api_key,
            api_base=self.api_url,
            api_version=self.api_version,
        )
        embeddings = [embedding["embedding"] for embedding in response.data]
        return embeddings

    async def _embed_vertex(
        self,
        texts: list[str],
        model: str | None,
        embedding_type: str,
        reduced_dimension: int | None,
    ) -> list[Embedding]:
        from google import genai
        from google.genai import types as genai_types

        if not model:
            model = DEFAULT_VERTEX_MODEL

        service_account_info = json.loads(self.api_key)
        credentials = service_account.Credentials.from_service_account_info(
            service_account_info,
            scopes=["https://www.googleapis.com/auth/cloud-platform"],
        )
        project_id = service_account_info["project_id"]
        location = (
            service_account_info.get("location")
            or os.environ.get("GOOGLE_CLOUD_LOCATION")
            or "us-central1"
        )

        client = genai.Client(
            vertexai=True,
            project=project_id,
            location=location,
            credentials=credentials,
        )

        embed_config = genai_types.EmbedContentConfig(
            task_type=embedding_type,
            output_dimensionality=reduced_dimension,
            auto_truncate=True,
        )

        async def _embed_batch(batch_texts: list[str]) -> list[Embedding]:
            content_requests: list[Any] = [
                genai_types.Content(parts=[genai_types.Part(text=text)])
                for text in batch_texts
            ]
            response = await client.aio.models.embed_content(
                model=model,
                contents=content_requests,
                config=embed_config,
            )

            if not response.embeddings:
                raise RuntimeError("Received empty embeddings from Google GenAI.")

            embeddings: list[Embedding] = []
            for idx, embedding in enumerate(response.embeddings):
                if embedding.values is None:
                    raise RuntimeError(
                        f"Missing embedding values for input at index {idx}."
                    )
                embeddings.append(embedding.values)
            return embeddings

        # Process VertexAI batches sequentially to avoid additional intra-task fanout.
        # The higher-level thread pool already provides concurrency; running these
        # requests in parallel here was causing excessive memory usage.
        batches = [
            texts[i : i + VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE]
            for i in range(0, len(texts), VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE)
        ]
        all_embeddings: list[Embedding] = []

        logger.debug(
            f"VertexAI embedding: processing {len(texts)} texts in {len(batches)} batches "
            f"(batch_size={VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE})"
        )

        try:
            for batch_idx, batch in enumerate(batches):
                batch_embeddings = await _embed_batch(batch)
                all_embeddings.extend(batch_embeddings)

                # Log progress for large batches to track memory usage patterns
                if batch_idx % 10 == 0 and batch_idx > 0:
                    logger.debug(
                        f"VertexAI embedding progress: batch {batch_idx}/{len(batches)}, total_embeddings={len(all_embeddings)}"
                    )

            logger.debug(
                f"VertexAI embedding completed: {len(all_embeddings)} embeddings generated"
            )
            return all_embeddings
        finally:
            # Ensure client is closed with a timeout to prevent hanging on stuck sessions
            try:
                await asyncio.wait_for(client.aio.aclose(), timeout=5.0)
            except asyncio.TimeoutError:
                logger.warning("Google GenAI client aclose() timed out after 5s")
            except Exception as e:
                logger.warning(f"Error closing Google GenAI client: {e}")

    async def _embed_litellm_proxy(
        self, texts: list[str], model_name: str | None
    ) -> list[Embedding]:
        if not model_name:
            raise ValueError("Model name is required for LiteLLM proxy embedding.")

        if not self.api_url:
            raise ValueError("API URL is required for LiteLLM proxy embedding.")

        headers = (
            {} if not self.api_key else {"Authorization": f"Bearer {self.api_key}"}
        )

        response = await self.http_client.post(
            self.api_url,
            json={
                "model": model_name,
                "input": texts,
            },
            headers=headers,
        )
        response.raise_for_status()
        result = response.json()
        return [embedding["embedding"] for embedding in result["data"]]

    @retry(tries=_RETRY_TRIES, delay=_RETRY_DELAY)
    async def embed(
        self,
        *,
        texts: list[str],
        text_type: EmbedTextType,
        model_name: str | None = None,
        deployment_name: str | None = None,
        reduced_dimension: int | None = None,
    ) -> list[Embedding]:
        import openai

        try:
            if self.provider == EmbeddingProvider.OPENAI:
                return await self._embed_openai(texts, model_name, reduced_dimension)
            elif self.provider == EmbeddingProvider.AZURE:
                return await self._embed_azure(texts, f"azure/{deployment_name}")
            elif self.provider == EmbeddingProvider.LITELLM:
                return await self._embed_litellm_proxy(texts, model_name)

            embedding_type = EmbeddingModelTextType.get_type(self.provider, text_type)
            if self.provider == EmbeddingProvider.COHERE:
                return await self._embed_cohere(texts, model_name, embedding_type)
            elif self.provider == EmbeddingProvider.VOYAGE:
                return await self._embed_voyage(texts, model_name, embedding_type)
            elif self.provider == EmbeddingProvider.GOOGLE:
                return await self._embed_vertex(
                    texts, model_name, embedding_type, reduced_dimension
                )
            else:
                raise ValueError(f"Unsupported provider: {self.provider}")
        except openai.AuthenticationError:
            raise AuthenticationError(provider="OpenAI")
        except httpx.HTTPStatusError as e:
            if e.response.status_code == 401:
                raise AuthenticationError(provider=str(self.provider))

            error_string = format_embedding_error(
                e,
                str(self.provider),
                model_name or deployment_name,
                self.provider,
                sanitized_api_key=self.sanitized_api_key,
                status_code=e.response.status_code,
            )
            logger.error(error_string)
            logger.debug(f"Exception texts: {texts}")

            raise RuntimeError(error_string)
        except Exception as e:
            if is_authentication_error(e):
                raise AuthenticationError(provider=str(self.provider))

            error_string = format_embedding_error(
                e,
                str(self.provider),
                model_name or deployment_name,
                self.provider,
                sanitized_api_key=self.sanitized_api_key,
            )
            logger.error(error_string)
            logger.debug(f"Exception texts: {texts}")

            raise RuntimeError(error_string)

    @staticmethod
    def create(
        api_key: str,
        provider: EmbeddingProvider,
        api_url: str | None = None,
        api_version: str | None = None,
    ) -> "CloudEmbedding":
        logger.debug(f"Creating Embedding instance for provider: {provider}")
        return CloudEmbedding(api_key, provider, api_url, api_version)

    async def aclose(self) -> None:
        """Explicitly close the client."""
        if not self._closed:
            await self.http_client.aclose()
            self._closed = True

    async def __aenter__(self) -> "CloudEmbedding":
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        await self.aclose()

    def __del__(self) -> None:
        """Finalizer to warn about unclosed clients."""
        if not self._closed:
            logger.warning(
                "CloudEmbedding was not properly closed. Use 'async with' or call aclose()"
            )


# API-based reranking functions (moved from model server)
async def cohere_rerank_api(
    query: str, docs: list[str], model_name: str, api_key: str
) -> list[float]:
    cohere_client = CohereAsyncClient(api_key=api_key)
    try:
        response = await cohere_client.rerank(
            query=query, documents=docs, model=model_name
        )
    except ApiError as err:
        if err.status_code == 402:
            logger.warning(
                "Cohere rerank request rejected due to billing cap. Falling back to retrieval ordering until billing resets."
            )
            raise CohereBillingLimitError(
                "Cohere billing limit reached for reranking"
            ) from err
        raise
    results = response.results
    sorted_results = sorted(results, key=lambda item: item.index)
    return [result.relevance_score for result in sorted_results]


async def cohere_rerank_aws(
    query: str,
    docs: list[str],
    model_name: str,
    region_name: str,
    aws_access_key_id: str,
    aws_secret_access_key: str,
) -> list[float]:
    session = aioboto3.Session(
        aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key
    )
    async with session.client(
        "bedrock-runtime", region_name=region_name
    ) as bedrock_client:
        body = json.dumps(
            {
                "query": query,
                "documents": docs,
                "api_version": 2,
            }
        )
        # Invoke the Bedrock model asynchronously
        response = await bedrock_client.invoke_model(
            modelId=model_name,
            accept="application/json",
            contentType="application/json",
            body=body,
        )

        # Read the response asynchronously
        response_body = json.loads(await response["body"].read())

        # Extract and sort the results
        results = response_body.get("results", [])
        sorted_results = sorted(results, key=lambda item: item["index"])

        return [result["relevance_score"] for result in sorted_results]


async def litellm_rerank(
    query: str, docs: list[str], api_url: str, model_name: str, api_key: str | None
) -> list[float]:
    headers = {} if not api_key else {"Authorization": f"Bearer {api_key}"}
    async with httpx.AsyncClient() as client:
        response = await client.post(
            api_url,
            json={
                "model": model_name,
                "query": query,
                "documents": docs,
            },
            headers=headers,
        )
        response.raise_for_status()
        result = response.json()
        return [
            item["relevance_score"]
            for item in sorted(result["results"], key=lambda x: x["index"])
        ]


class EmbeddingModel:
    def __init__(
        self,
        server_host: str,  # Changes depending on indexing or inference
        server_port: int,
        model_name: str | None,
        normalize: bool,
        query_prefix: str | None,
        passage_prefix: str | None,
        api_key: str | None,
        api_url: str | None,
        provider_type: EmbeddingProvider | None,
        retrim_content: bool = False,
        callback: IndexingHeartbeatInterface | None = None,
        api_version: str | None = None,
        deployment_name: str | None = None,
        reduced_dimension: int | None = None,
    ) -> None:
        self.api_key = api_key
        self.provider_type = provider_type
        self.query_prefix = query_prefix
        self.passage_prefix = passage_prefix
        self.normalize = normalize
        self.model_name = model_name
        self.retrim_content = retrim_content
        self.api_url = api_url
        self.api_version = api_version
        self.deployment_name = deployment_name
        self.reduced_dimension = reduced_dimension
        self.tokenizer = get_tokenizer(
            model_name=model_name, provider_type=provider_type
        )
        self.callback = callback

        # Only build model server endpoint for local models
        if self.provider_type is None:
            model_server_url = build_model_server_url(server_host, server_port)
            self.embed_server_endpoint: str | None = (
                f"{model_server_url}/encoder/bi-encoder-embed"
            )
        else:
            # API providers don't need model server endpoint
            self.embed_server_endpoint = None

    async def _make_direct_api_call(
        self,
        embed_request: EmbedRequest,
        tenant_id: str | None = None,  # noqa: ARG002
        request_id: str | None = None,  # noqa: ARG002
    ) -> EmbedResponse:
        """Make direct API call to cloud provider, bypassing model server."""
        if self.provider_type is None:
            raise ValueError("Provider type is required for direct API calls")

        if self.api_key is None:
            logger.error("API key not provided for cloud model")
            raise RuntimeError("API key not provided for cloud model")

        # Check for prefix usage with cloud models
        if embed_request.manual_query_prefix or embed_request.manual_passage_prefix:
            logger.warning("Prefix provided for cloud model, which is not supported")
            raise ValueError(
                "Prefix string is not valid for cloud models. Cloud models take an explicit text type instead."
            )

        if not all(embed_request.texts):
            logger.error("Empty strings provided for embedding")
            raise ValueError("Empty strings are not allowed for embedding.")

        if not embed_request.texts:
            logger.error("No texts provided for embedding")
            raise ValueError("No texts provided for embedding.")

        start_time = time.monotonic()
        total_chars = sum(len(text) for text in embed_request.texts)

        logger.info(
            f"Embedding {len(embed_request.texts)} texts with {total_chars} total characters with provider: {self.provider_type}"
        )

        async with CloudEmbedding(
            api_key=self.api_key,
            provider=self.provider_type,
            api_url=self.api_url,
            api_version=self.api_version,
        ) as cloud_model:
            embeddings = await cloud_model.embed(
                texts=embed_request.texts,
                model_name=embed_request.model_name,
                deployment_name=embed_request.deployment_name,
                text_type=embed_request.text_type,
                reduced_dimension=embed_request.reduced_dimension,
            )

        if any(embedding is None for embedding in embeddings):
            error_message = "Embeddings contain None values\n"
            error_message += "Corresponding texts:\n"
            error_message += "\n".join(embed_request.texts)
            logger.error(error_message)
            raise ValueError(error_message)

        elapsed = time.monotonic() - start_time
        logger.info(
            f"event=embedding_provider "
            f"texts={len(embed_request.texts)} "
            f"chars={total_chars} "
            f"provider={self.provider_type} "
            f"elapsed={elapsed:.2f}"
        )

        return EmbedResponse(embeddings=embeddings)

    def _make_model_server_request(
        self,
        embed_request: EmbedRequest,
        tenant_id: str | None = None,
        request_id: str | None = None,
    ) -> EmbedResponse:
        if self.embed_server_endpoint is None:
            raise ValueError("Model server endpoint is not configured for local models")

        # Store the endpoint in a local variable to help mypy understand it's not None
        endpoint = self.embed_server_endpoint

        def _make_request() -> Response:
            headers = {}
            if tenant_id:
                headers["X-Onyx-Tenant-ID"] = tenant_id

            if request_id:
                headers["X-Onyx-Request-ID"] = request_id

            response = requests.post(
                endpoint,
                headers=headers,
                json=embed_request.model_dump(),
            )
            # signify that this is a rate limit error
            if response.status_code == 429:
                raise ModelServerRateLimitError(response.text)

            response.raise_for_status()
            return response

        final_make_request_func = _make_request

        # if the text type is a passage, add some default
        # retries + handling for rate limiting
        if embed_request.text_type == EmbedTextType.PASSAGE:
            final_make_request_func = retry(
                tries=3,
                delay=5,
                exceptions=(RequestException, ValueError, JSONDecodeError),
            )(final_make_request_func)
            # use 10 second delay as per Azure suggestion
            final_make_request_func = retry(
                tries=10, delay=10, exceptions=ModelServerRateLimitError
            )(final_make_request_func)

        response: Response | None = None

        try:
            response = final_make_request_func()
            return EmbedResponse(**response.json())
        except requests.HTTPError as e:
            if not response:
                raise HTTPError("HTTP error occurred - response is None.") from e

            try:
                error_detail = response.json().get("detail", str(e))
            except Exception:
                error_detail = response.text
            raise HTTPError(f"HTTP error occurred: {error_detail}") from e
        except requests.RequestException as e:
            raise HTTPError(f"Request failed: {str(e)}") from e

    def _batch_encode_texts(
        self,
        texts: list[str],
        text_type: EmbedTextType,
        batch_size: int,
        max_seq_length: int,
        num_threads: int = INDEXING_EMBEDDING_MODEL_NUM_THREADS,
        tenant_id: str | None = None,
        request_id: str | None = None,
    ) -> list[Embedding]:
        text_batches = batch_list(texts, batch_size)

        logger.debug(f"Encoding {len(texts)} texts in {len(text_batches)} batches")

        embeddings: list[Embedding] = []

        @_cleanup_thread_local
        def process_batch(
            batch_idx: int,
            batch_len: int,
            text_batch: list[str],
            tenant_id: str | None = None,
            request_id: str | None = None,
        ) -> tuple[int, list[Embedding]]:
            if self.callback:
                if self.callback.should_stop():
                    raise ConnectorStopSignal(
                        "_batch_encode_texts detected stop signal"
                    )

            embed_request = EmbedRequest(
                model_name=self.model_name,
                texts=text_batch,
                api_version=self.api_version,
                deployment_name=self.deployment_name,
                max_context_length=max_seq_length,
                normalize_embeddings=self.normalize,
                api_key=self.api_key,
                provider_type=self.provider_type,
                text_type=text_type,
                manual_query_prefix=self.query_prefix,
                manual_passage_prefix=self.passage_prefix,
                api_url=self.api_url,
                reduced_dimension=self.reduced_dimension,
            )

            start_time = time.monotonic()

            # Route between direct API calls and model server calls
            if self.provider_type is not None:
                # For API providers, make direct API call
                # Use thread-local event loop to prevent memory leaks from creating
                # thousands of event loops during batch processing
                loop = _get_or_create_event_loop()
                response = loop.run_until_complete(
                    self._make_direct_api_call(
                        embed_request, tenant_id=tenant_id, request_id=request_id
                    )
                )
            else:
                # For local models, use model server
                response = self._make_model_server_request(
                    embed_request, tenant_id=tenant_id, request_id=request_id
                )

            end_time = time.monotonic()

            processing_time = end_time - start_time
            logger.debug(
                f"EmbeddingModel.process_batch: Batch {batch_idx}/{batch_len} processing time: {processing_time:.2f} seconds"
            )

            return batch_idx, response.embeddings

        # only multi thread if:
        #   1. num_threads is greater than 1
        #   2. we are using an API-based embedding model (provider_type is not None)
        #   3. there are more than 1 batch (no point in threading if only 1)
        if num_threads >= 1 and self.provider_type and len(text_batches) > 1:
            with ThreadPoolExecutor(max_workers=num_threads) as executor:
                future_to_batch = {
                    executor.submit(
                        partial(
                            process_batch,
                            idx,
                            len(text_batches),
                            batch,
                            tenant_id=tenant_id,
                            request_id=request_id,
                        )
                    ): idx
                    for idx, batch in enumerate(text_batches, start=1)
                }

                # Collect results in order
                batch_results: list[tuple[int, list[Embedding]]] = []
                for future in as_completed(future_to_batch):
                    try:
                        result = future.result()
                        batch_results.append(result)
                    except Exception as e:
                        logger.exception("Embedding model failed to process batch")
                        raise e

                # Sort by batch index and extend embeddings
                batch_results.sort(key=lambda x: x[0])
                for _, batch_embeddings in batch_results:
                    embeddings.extend(batch_embeddings)
        else:
            # Original sequential processing
            for idx, text_batch in enumerate(text_batches, start=1):
                _, batch_embeddings = process_batch(
                    idx,
                    len(text_batches),
                    text_batch,
                    tenant_id=tenant_id,
                    request_id=request_id,
                )
                embeddings.extend(batch_embeddings)

        return embeddings

    @log_function_time(print_only=True, debug_only=True)
    def encode(
        self,
        texts: list[str],
        text_type: EmbedTextType,
        large_chunks_present: bool = False,
        local_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS,
        api_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES,
        max_seq_length: int = DOC_EMBEDDING_CONTEXT_SIZE,
        tenant_id: str | None = None,
        request_id: str | None = None,
    ) -> list[Embedding]:
        if not texts or not all(texts):
            raise ValueError(f"Empty or missing text for embedding: {texts}")

        if large_chunks_present:
            max_seq_length *= LARGE_CHUNK_RATIO

        if self.retrim_content:
            # This is applied during indexing as a catchall for overly long titles (or other uncapped fields)
            # Note that this uses just the default tokenizer which may also lead to very minor miscountings
            # However this slight miscounting is very unlikely to have any material impact.
            texts = [
                tokenizer_trim_content(
                    content=text,
                    desired_length=max_seq_length,
                    tokenizer=self.tokenizer,
                )
                for text in texts
            ]

        # Remove invalid Unicode characters (e.g., unpaired surrogates from malformed documents)
        # that would cause UTF-8 encoding errors when sent to embedding providers
        texts = [remove_invalid_unicode_chars(text) or "<>" for text in texts]

        batch_size = (
            api_embedding_batch_size
            if self.provider_type
            else local_embedding_batch_size
        )

        return self._batch_encode_texts(
            texts=texts,
            text_type=text_type,
            batch_size=batch_size,
            max_seq_length=max_seq_length,
            tenant_id=tenant_id,
            request_id=request_id,
        )

    @classmethod
    def from_db_model(
        cls,
        search_settings: SearchSettings,
        server_host: str,  # Changes depending on indexing or inference
        server_port: int,
        retrim_content: bool = False,
    ) -> "EmbeddingModel":
        return cls(
            server_host=server_host,
            server_port=server_port,
            model_name=search_settings.model_name,
            normalize=search_settings.normalize,
            query_prefix=search_settings.query_prefix,
            passage_prefix=search_settings.passage_prefix,
            api_key=search_settings.api_key,
            provider_type=search_settings.provider_type,
            api_url=search_settings.api_url,
            retrim_content=retrim_content,
            api_version=search_settings.api_version,
            deployment_name=search_settings.deployment_name,
            reduced_dimension=search_settings.reduced_dimension,
        )


class RerankingModel:
    def __init__(
        self,
        model_name: str,
        provider_type: RerankerProvider | None,
        api_key: str | None,
        api_url: str | None,
        model_server_host: str = MODEL_SERVER_HOST,
        model_server_port: int = MODEL_SERVER_PORT,
    ) -> None:
        self.model_name = model_name
        self.provider_type = provider_type
        self.api_key = api_key
        self.api_url = api_url

        # Only build model server endpoint for local models
        if self.provider_type is None:
            model_server_url = build_model_server_url(
                model_server_host, model_server_port
            )
            self.rerank_server_endpoint: str | None = (
                model_server_url + "/encoder/cross-encoder-scores"
            )
        else:
            # API providers don't need model server endpoint
            self.rerank_server_endpoint = None

    async def _make_direct_rerank_call(
        self, query: str, passages: list[str]
    ) -> list[float]:
        """Make direct API call to cloud provider, bypassing model server."""
        if self.provider_type is None:
            raise ValueError("Provider type is required for direct API calls")

        if self.api_key is None:
            raise ValueError("API key is required for cloud provider")

        if self.provider_type == RerankerProvider.COHERE:
            return await cohere_rerank_api(
                query, passages, self.model_name, self.api_key
            )
        elif self.provider_type == RerankerProvider.BEDROCK:
            aws_access_key_id, aws_secret_access_key, aws_region = pass_aws_key(
                self.api_key
            )
            return await cohere_rerank_aws(
                query,
                passages,
                self.model_name,
                aws_region,
                aws_access_key_id,
                aws_secret_access_key,
            )
        elif self.provider_type == RerankerProvider.LITELLM:
            if self.api_url is None:
                raise ValueError("API URL is required for LiteLLM reranking.")
            return await litellm_rerank(
                query, passages, self.api_url, self.model_name, self.api_key
            )
        else:
            raise ValueError(f"Unsupported reranking provider: {self.provider_type}")

    def predict(self, query: str, passages: list[str]) -> list[float]:
        # Route between direct API calls and model server calls
        if self.provider_type is not None:
            # For API providers, make direct API call
            loop = asyncio.new_event_loop()
            try:
                asyncio.set_event_loop(loop)
                return loop.run_until_complete(
                    self._make_direct_rerank_call(query, passages)
                )
            finally:
                loop.close()
        else:
            # For local models, use model server
            if self.rerank_server_endpoint is None:
                raise ValueError(
                    "Rerank server endpoint is not configured for local models"
                )

            rerank_request = RerankRequest(
                query=query,
                documents=passages,
                model_name=self.model_name,
                provider_type=self.provider_type,
                api_key=self.api_key,
                api_url=self.api_url,
            )

            response = requests.post(
                self.rerank_server_endpoint, json=rerank_request.model_dump()
            )
            response.raise_for_status()

            return RerankResponse(**response.json()).scores


class QueryAnalysisModel:
    def __init__(
        self,
        model_server_host: str = MODEL_SERVER_HOST,
        model_server_port: int = MODEL_SERVER_PORT,
        # Lean heavily towards not throwing out keywords
        keyword_percent_threshold: float = 0.1,
        # Lean towards semantic which is the default
        semantic_percent_threshold: float = 0.4,
    ) -> None:
        model_server_url = build_model_server_url(model_server_host, model_server_port)
        self.intent_server_endpoint = model_server_url + "/custom/query-analysis"
        self.keyword_percent_threshold = keyword_percent_threshold
        self.semantic_percent_threshold = semantic_percent_threshold

    def predict(
        self,
        query: str,
    ) -> tuple[bool, list[str]]:
        intent_request = IntentRequest(
            query=query,
            keyword_percent_threshold=self.keyword_percent_threshold,
            semantic_percent_threshold=self.semantic_percent_threshold,
        )

        response = requests.post(
            self.intent_server_endpoint, json=intent_request.model_dump()
        )
        response.raise_for_status()

        response_model = IntentResponse(**response.json())

        return response_model.is_keyword, response_model.keywords


def warm_up_retry(
    func: Callable[..., Any],
    tries: int = 20,
    delay: int = 5,
    *args: Any,  # noqa: ARG001
    **kwargs: Any,  # noqa: ARG001
) -> Callable[..., Any]:
    @wraps(func)
    def wrapper(*args: Any, **kwargs: Any) -> Any:
        exceptions = []
        for attempt in range(tries):
            try:
                return func(*args, **kwargs)
            except Exception as e:
                exceptions.append(e)
                logger.info(
                    f"Attempt {attempt + 1}/{tries} failed; retrying in {delay} seconds..."
                )
                time.sleep(delay)
        raise Exception(f"All retries failed: {exceptions}")

    return wrapper


def warm_up_bi_encoder(
    embedding_model: EmbeddingModel,
    non_blocking: bool = False,
) -> None:
    if SKIP_WARM_UP:
        return

    warm_up_str = " ".join(WARM_UP_STRINGS)

    logger.debug(f"Warming up encoder model: {embedding_model.model_name}")
    get_tokenizer(
        model_name=embedding_model.model_name,
        provider_type=embedding_model.provider_type,
    ).encode(warm_up_str)

    def _warm_up() -> None:
        try:
            embedding_model.encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY)
            logger.debug(
                f"Warm-up complete for encoder model: {embedding_model.model_name}"
            )
        except Exception as e:
            logger.warning(
                f"Warm-up request failed for encoder model {embedding_model.model_name}: {e}"
            )

    if non_blocking:
        threading.Thread(target=_warm_up, daemon=True).start()
        logger.debug(
            f"Started non-blocking warm-up for encoder model: {embedding_model.model_name}"
        )
    else:
        retry_encode = warm_up_retry(embedding_model.encode)
        retry_encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY)


# No longer used
def warm_up_cross_encoder(
    rerank_model_name: str,
    non_blocking: bool = False,
) -> None:
    if SKIP_WARM_UP:
        return

    logger.debug(f"Warming up reranking model: {rerank_model_name}")

    reranking_model = RerankingModel(
        model_name=rerank_model_name,
        provider_type=None,
        api_url=None,
        api_key=None,
    )

    def _warm_up() -> None:
        try:
            reranking_model.predict(WARM_UP_STRINGS[0], WARM_UP_STRINGS[1:])
            logger.debug(f"Warm-up complete for reranking model: {rerank_model_name}")
        except Exception as e:
            logger.warning(
                f"Warm-up request failed for reranking model {rerank_model_name}: {e}"
            )

    if non_blocking:
        threading.Thread(target=_warm_up, daemon=True).start()
        logger.debug(
            f"Started non-blocking warm-up for reranking model: {rerank_model_name}"
        )
    else:
        retry_rerank = warm_up_retry(reranking_model.predict)
        retry_rerank(WARM_UP_STRINGS[0], WARM_UP_STRINGS[1:])


================================================
FILE: backend/onyx/natural_language_processing/utils.py
================================================
import os
from abc import ABC
from abc import abstractmethod
from copy import copy

from tokenizers import Encoding  # type: ignore[import-untyped]
from tokenizers import Tokenizer

from onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL
from onyx.context.search.models import InferenceChunk
from onyx.utils.logger import setup_logger
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE
from shared_configs.enums import EmbeddingProvider

TRIM_SEP_PAT = "\n... {n} tokens removed...\n"

logger = setup_logger()
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"


class BaseTokenizer(ABC):
    @abstractmethod
    def encode(self, string: str) -> list[int]:
        pass

    @abstractmethod
    def tokenize(self, string: str) -> list[str]:
        pass

    @abstractmethod
    def decode(self, tokens: list[int]) -> str:
        pass


class TiktokenTokenizer(BaseTokenizer):
    _instances: dict[str, "TiktokenTokenizer"] = {}

    def __new__(cls, model_name: str) -> "TiktokenTokenizer":
        if model_name not in cls._instances:
            cls._instances[model_name] = super(TiktokenTokenizer, cls).__new__(cls)
        return cls._instances[model_name]

    def __init__(self, model_name: str):
        if not hasattr(self, "encoder"):
            import tiktoken

            self.encoder = tiktoken.encoding_for_model(model_name)

    def encode(self, string: str) -> list[int]:
        # this ignores special tokens that the model is trained on, see encode_ordinary for details
        return self.encoder.encode_ordinary(string)

    def tokenize(self, string: str) -> list[str]:
        encoded = self.encode(string)
        decoded = [self.encoder.decode([token]) for token in encoded]

        if len(decoded) != len(encoded):
            logger.warning(
                f"OpenAI tokenized length {len(decoded)} does not match encoded length {len(encoded)} for string: {string}"
            )

        return decoded

    def decode(self, tokens: list[int]) -> str:
        return self.encoder.decode(tokens)


class HuggingFaceTokenizer(BaseTokenizer):
    def __init__(self, model_name: str):
        self.encoder: Tokenizer = Tokenizer.from_pretrained(model_name)

    def _safer_encode(self, string: str) -> Encoding:
        """
        Encode a string using the HuggingFaceTokenizer, but if it fails,
        encode the string as ASCII and decode it back to a string. This helps
        in cases where the string has weird characters like \udeb4.
        """
        try:
            return self.encoder.encode(string, add_special_tokens=False)
        except Exception:
            return self.encoder.encode(
                string.encode("ascii", "ignore").decode(), add_special_tokens=False
            )

    def encode(self, string: str) -> list[int]:
        # this returns no special tokens
        return self._safer_encode(string).ids

    def tokenize(self, string: str) -> list[str]:
        return self._safer_encode(string).tokens

    def decode(self, tokens: list[int]) -> str:
        return self.encoder.decode(tokens)


_TOKENIZER_CACHE: dict[tuple[EmbeddingProvider | None, str | None], BaseTokenizer] = {}


def _check_tokenizer_cache(
    model_provider: EmbeddingProvider | None, model_name: str | None
) -> BaseTokenizer:
    global _TOKENIZER_CACHE
    id_tuple = (model_provider, model_name)

    if id_tuple not in _TOKENIZER_CACHE:
        tokenizer = None

        if model_name:
            tokenizer = _try_initialize_tokenizer(model_name, model_provider)

        if not tokenizer:
            logger.info(
                f"Falling back to default embedding model tokenizer: {DOCUMENT_ENCODER_MODEL}"
            )
            tokenizer = _get_default_tokenizer()

        _TOKENIZER_CACHE[id_tuple] = tokenizer

    return _TOKENIZER_CACHE[id_tuple]


def _try_initialize_tokenizer(
    model_name: str, model_provider: EmbeddingProvider | None
) -> BaseTokenizer | None:
    tokenizer: BaseTokenizer | None = None

    if model_provider is not None:
        # Try using TiktokenTokenizer first if model_provider exists
        try:
            tokenizer = TiktokenTokenizer(model_name)
            logger.info(f"Initialized TiktokenTokenizer for: {model_name}")
            return tokenizer
        except Exception as tiktoken_error:
            logger.debug(
                f"TiktokenTokenizer not available for model {model_name}: {tiktoken_error}"
            )
    else:
        # If no provider specified, try HuggingFaceTokenizer
        try:
            tokenizer = HuggingFaceTokenizer(model_name)
            logger.info(f"Initialized HuggingFaceTokenizer for: {model_name}")
            return tokenizer
        except Exception as hf_error:
            logger.warning(
                f"Failed to initialize HuggingFaceTokenizer for {model_name}: {hf_error}"
            )

    # If both initializations fail, return None
    return None


_DEFAULT_TOKENIZER: BaseTokenizer | None = None


def _get_default_tokenizer() -> BaseTokenizer:
    """Lazy-load the default tokenizer to avoid loading it at module import time."""
    global _DEFAULT_TOKENIZER
    if _DEFAULT_TOKENIZER is None:
        _DEFAULT_TOKENIZER = HuggingFaceTokenizer(DOCUMENT_ENCODER_MODEL)
    return _DEFAULT_TOKENIZER


def get_tokenizer(
    model_name: str | None, provider_type: EmbeddingProvider | str | None
) -> BaseTokenizer:
    if isinstance(provider_type, str):
        try:
            provider_type = EmbeddingProvider(provider_type)
        except ValueError:
            logger.debug(
                f"Invalid provider_type '{provider_type}'. Falling back to default tokenizer."
            )
            return _get_default_tokenizer()
    return _check_tokenizer_cache(provider_type, model_name)


# Max characters per encode() call.
_ENCODE_CHUNK_SIZE = 500_000


def count_tokens(
    text: str,
    tokenizer: BaseTokenizer,
    token_limit: int | None = None,
) -> int:
    """Count tokens, chunking the input to avoid tiktoken stack overflow.

    If token_limit is provided and the text is large enough to require
    multiple chunks (> 500k chars), stops early once the count exceeds it.
    When early-exiting, the returned value exceeds token_limit but may be
    less than the true full token count.
    """
    if len(text) <= _ENCODE_CHUNK_SIZE:
        return len(tokenizer.encode(text))
    total = 0
    for start in range(0, len(text), _ENCODE_CHUNK_SIZE):
        total += len(tokenizer.encode(text[start : start + _ENCODE_CHUNK_SIZE]))
        if token_limit is not None and total > token_limit:
            return total  # Already over — skip remaining chunks
    return total


def tokenizer_trim_content(
    content: str, desired_length: int, tokenizer: BaseTokenizer
) -> str:
    tokens = tokenizer.encode(content)
    if len(tokens) <= desired_length:
        return content

    return tokenizer.decode(tokens[:desired_length])


def tokenizer_trim_middle(
    tokens: list[int], desired_length: int, tokenizer: BaseTokenizer
) -> str:
    if len(tokens) <= desired_length:
        return tokenizer.decode(tokens)
    sep_str = TRIM_SEP_PAT.format(n=len(tokens) - desired_length)
    sep_tokens = tokenizer.encode(sep_str)
    slice_size = (desired_length - len(sep_tokens)) // 2
    assert slice_size > 0, "Slice size is not positive, desired length is too short"
    return (
        tokenizer.decode(tokens[:slice_size])
        + sep_str
        + tokenizer.decode(tokens[-slice_size:])
    )


def tokenizer_trim_chunks(
    chunks: list[InferenceChunk],
    tokenizer: BaseTokenizer,
    max_chunk_toks: int = DOC_EMBEDDING_CONTEXT_SIZE,
) -> list[InferenceChunk]:
    new_chunks = copy(chunks)
    for ind, chunk in enumerate(new_chunks):
        new_content = tokenizer_trim_content(chunk.content, max_chunk_toks, tokenizer)
        if len(new_content) != len(chunk.content):
            new_chunk = copy(chunk)
            new_chunk.content = new_content
            new_chunks[ind] = new_chunk
    return new_chunks


================================================
FILE: backend/onyx/onyxbot/discord/DISCORD_MULTITENANT_README.md
================================================
# Discord Bot Multitenant Architecture

This document analyzes how the Discord cache manager and API client coordinate to handle multitenant API keys from a single Discord client.

## Overview

The Discord bot uses a **single-client, multi-tenant** architecture where one `OnyxDiscordClient` instance serves multiple tenants (organizations) simultaneously. Tenant isolation is achieved through:

- **Cache Manager**: Maps Discord guilds to tenants and stores per-tenant API keys
- **API Client**: Stateless HTTP client that accepts dynamic API keys per request

```
┌─────────────────────────────────────────────────────────────────────┐
│                      OnyxDiscordClient                              │
│                                                                     │
│  ┌─────────────────────────┐    ┌─────────────────────────────┐    │
│  │   DiscordCacheManager   │    │      OnyxAPIClient          │    │
│  │                         │    │                             │    │
│  │  guild_id → tenant_id   │───▶│  send_chat_message(         │    │
│  │  tenant_id → api_key    │    │    message,                 │    │
│  │                         │    │    api_key=<per-tenant>,    │    │
│  └─────────────────────────┘    │    persona_id=...           │    │
│                                 │  )                          │    │
│                                 └─────────────────────────────┘    │
└─────────────────────────────────────────────────────────────────────┘
```

---

## Component Details

### 1. Cache Manager (`backend/onyx/onyxbot/discord/cache.py`)

The `DiscordCacheManager` maintains two critical in-memory mappings:

```python
class DiscordCacheManager:
    _guild_tenants: dict[int, str]   # guild_id → tenant_id
    _api_keys: dict[str, str]        # tenant_id → api_key
    _lock: asyncio.Lock              # Concurrency control
```

#### Key Responsibilities

| Function | Purpose |
|----------|---------|
| `get_tenant(guild_id)` | O(1) lookup: guild → tenant |
| `get_api_key(tenant_id)` | O(1) lookup: tenant → API key |
| `refresh_all()` | Full cache rebuild from database |
| `refresh_guild()` | Incremental update for single guild |

#### API Key Provisioning Strategy

API keys are **lazily provisioned** - only created when first needed:

```python
async def _load_tenant_data(self, tenant_id: str) -> tuple[list[int], str | None]:
    needs_key = tenant_id not in self._api_keys

    with get_session_with_tenant(tenant_id) as db:
        # Load guild configs
        configs = get_discord_bot_configs(db)
        guild_ids = [c.guild_id for c in configs if c.enabled]

        # Only provision API key if not already cached
        api_key = None
        if needs_key:
            api_key = get_or_create_discord_service_api_key(db, tenant_id)

    return guild_ids, api_key
```

This optimization avoids repeated database calls for API key generation.

#### Concurrency Control

All write operations acquire an async lock to prevent race conditions:

```python
async def refresh_all(self) -> None:
    async with self._lock:
        # Safe to modify _guild_tenants and _api_keys
        for tenant_id in get_all_tenant_ids():
            guild_ids, api_key = await self._load_tenant_data(tenant_id)
            # Update mappings...
```

Read operations (`get_tenant`, `get_api_key`) are lock-free since Python dict lookups are atomic.

---

### 2. API Client (`backend/onyx/onyxbot/discord/api_client.py`)

The `OnyxAPIClient` is a **stateless async HTTP client** that communicates with Onyx API pods.

#### Key Design: Per-Request API Key Injection

```python
class OnyxAPIClient:
    async def send_chat_message(
        self,
        message: str,
        api_key: str,           # Injected per-request
        persona_id: int | None,
        ...
    ) -> ChatFullResponse:
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}",  # Tenant-specific auth
        }
        # Make request...
```

The client accepts `api_key` as a parameter to each method, enabling **dynamic tenant selection at request time**. This design allows a single client instance to serve multiple tenants:

```python
# Same client, different tenants
await api_client.send_chat_message(msg, api_key=key_for_tenant_1, ...)
await api_client.send_chat_message(msg, api_key=key_for_tenant_2, ...)
```

---

## Coordination Flow

### Message Processing Pipeline

When a Discord message arrives, the client coordinates cache and API client:

```python
async def on_message(self, message: Message) -> None:
    guild_id = message.guild.id

    # Step 1: Cache lookup - guild → tenant
    tenant_id = self.cache.get_tenant(guild_id)
    if not tenant_id:
        return  # Guild not registered

    # Step 2: Cache lookup - tenant → API key
    api_key = self.cache.get_api_key(tenant_id)
    if not api_key:
        logger.warning(f"No API key for tenant {tenant_id}")
        return

    # Step 3: API call with tenant-specific credentials
    await process_chat_message(
        message=message,
        api_key=api_key,              # Tenant-specific
        persona_id=persona_id,         # Tenant-specific
        api_client=self.api_client,
    )
```

### Startup Sequence

```python
async def setup_hook(self) -> None:
    # 1. Initialize API client (create aiohttp session)
    await self.api_client.initialize()

    # 2. Populate cache with all tenants
    await self.cache.refresh_all()

    # 3. Start background refresh task
    self._cache_refresh_task = self.loop.create_task(
        self._periodic_cache_refresh()  # Every 60 seconds
    )
```

### Shutdown Sequence

```python
async def close(self) -> None:
    # 1. Cancel background refresh
    if self._cache_refresh_task:
        self._cache_refresh_task.cancel()

    # 2. Close Discord connection
    await super().close()

    # 3. Close API client session
    await self.api_client.close()

    # 4. Clear cache
    self.cache.clear()
```

---

## Tenant Isolation Mechanisms

### 1. Per-Tenant API Keys

Each tenant has a dedicated service API key:

```python
# backend/onyx/db/discord_bot.py
def get_or_create_discord_service_api_key(db_session: Session, tenant_id: str) -> str:
    existing = get_discord_service_api_key(db_session)
    if existing:
        return regenerate_key(existing)

    # Create LIMITED role key (chat-only permissions)
    return insert_api_key(
        db_session=db_session,
        api_key_args=APIKeyArgs(
            name=DISCORD_SERVICE_API_KEY_NAME,
            role=UserRole.LIMITED,  # Minimal permissions
        ),
        user_id=None,  # Service account (system-owned)
    ).api_key
```

### 2. Database Context Variables

The cache uses context variables for proper tenant-scoped DB sessions:

```python
context_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
try:
    with get_session_with_tenant(tenant_id) as db:
        # All DB operations scoped to this tenant
        ...
finally:
    CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)
```

### 3. Enterprise Gating Support

Gated tenants are filtered during cache refresh:

```python
gated_tenants = fetch_ee_implementation_or_noop(
    "onyx.server.tenants.product_gating",
    "get_gated_tenants",
    set(),
)()

for tenant_id in get_all_tenant_ids():
    if tenant_id in gated_tenants:
        continue  # Skip gated tenants
```

---

## Cache Refresh Strategy

| Trigger | Method | Scope |
|---------|--------|-------|
| Startup | `refresh_all()` | All tenants |
| Periodic (60s) | `refresh_all()` | All tenants |
| Guild registration | `refresh_guild()` | Single tenant |

### Error Handling

- **Tenant-level errors**: Logged and skipped (doesn't stop other tenants)
- **Missing API key**: Bot silently ignores messages from that guild
- **Network errors**: Logged, cache continues with stale data until next refresh

---

## Key Design Insights

1. **Single Client, Multiple Tenants**: One `OnyxAPIClient` and one `DiscordCacheManager` instance serves all tenants via dynamic API key injection.

2. **Cache-First Architecture**: Guild lookups are O(1) in-memory; API keys are cached after first provisioning to avoid repeated DB calls.

3. **Graceful Degradation**: If an API key is missing or stale, the bot simply doesn't respond (no crash or error propagation).

4. **Thread Safety Without Blocking**: `asyncio.Lock` prevents race conditions while maintaining async concurrency for reads.

5. **Lazy Provisioning**: API keys are only created when first needed, then cached for performance.

6. **Stateless API Client**: The HTTP client holds no tenant state - all tenant context is injected per-request via the `api_key` parameter.

---

## File References

| Component | Path |
|-----------|------|
| Cache Manager | `backend/onyx/onyxbot/discord/cache.py` |
| API Client | `backend/onyx/onyxbot/discord/api_client.py` |
| Discord Client | `backend/onyx/onyxbot/discord/client.py` |
| API Key DB Operations | `backend/onyx/db/discord_bot.py` |
| Cache Manager Tests | `backend/tests/unit/onyx/onyxbot/discord/test_cache_manager.py` |
| API Client Tests | `backend/tests/unit/onyx/onyxbot/discord/test_api_client.py` |

================================================
FILE: backend/onyx/onyxbot/discord/api_client.py
================================================
"""Async HTTP client for communicating with Onyx API pods."""

import aiohttp

from onyx.chat.models import ChatFullResponse
from onyx.onyxbot.discord.constants import API_REQUEST_TIMEOUT
from onyx.onyxbot.discord.exceptions import APIConnectionError
from onyx.onyxbot.discord.exceptions import APIResponseError
from onyx.onyxbot.discord.exceptions import APITimeoutError
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import MessageOrigin
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import build_api_server_url_for_http_requests

logger = setup_logger()


class OnyxAPIClient:
    """Async HTTP client for sending chat requests to Onyx API pods.

    This client manages an aiohttp session for making non-blocking HTTP
    requests to the Onyx API server. It handles authentication with per-tenant
    API keys and multi-tenant routing.

    Usage:
        client = OnyxAPIClient()
        await client.initialize()
        try:
            response = await client.send_chat_message(
                message="What is our deployment process?",
                tenant_id="tenant_123",
                api_key="dn_xxx...",
                persona_id=1,
            )
            print(response.answer)
        finally:
            await client.close()
    """

    def __init__(
        self,
        timeout: int = API_REQUEST_TIMEOUT,
    ) -> None:
        """Initialize the API client.

        Args:
            timeout: Request timeout in seconds.
        """
        # Helm chart uses API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS to set the base URL
        # TODO: Ideally, this override is only used when someone is launching an Onyx service independently
        self._base_url = build_api_server_url_for_http_requests(
            respect_env_override_if_set=True
        ).rstrip("/")
        self._timeout = timeout
        self._session: aiohttp.ClientSession | None = None

    async def initialize(self) -> None:
        """Create the aiohttp session.

        Must be called before making any requests. The session is created
        with a total timeout and connection timeout.
        """
        if self._session is not None:
            logger.warning("API client session already initialized")
            return

        timeout = aiohttp.ClientTimeout(
            total=self._timeout,
            connect=30,  # 30 seconds to establish connection
        )
        self._session = aiohttp.ClientSession(timeout=timeout)
        logger.info(f"API client initialized with base URL: {self._base_url}")

    async def close(self) -> None:
        """Close the aiohttp session.

        Should be called when shutting down the bot to properly release
        resources.
        """
        if self._session is not None:
            await self._session.close()
            self._session = None
            logger.info("API client session closed")

    @property
    def is_initialized(self) -> bool:
        """Check if the session is initialized."""
        return self._session is not None

    async def send_chat_message(
        self,
        message: str,
        api_key: str,
        persona_id: int | None = None,
    ) -> ChatFullResponse:
        """Send a chat message to the Onyx API server and get a response.

        This method sends a non-streaming chat request to the API server. The response
        contains the complete answer with any citations and metadata.

        Args:
            message: The user's message to process.
            api_key: The API key for authentication.
            persona_id: Optional persona ID to use for the response.

        Returns:
            ChatFullResponse containing the answer, citations, and metadata.

        Raises:
            APIConnectionError: If unable to connect to the API.
            APITimeoutError: If the request times out.
            APIResponseError: If the API returns an error response.
        """
        if self._session is None:
            raise APIConnectionError(
                "API client not initialized. Call initialize() first."
            )

        url = f"{self._base_url}/chat/send-chat-message"

        # Build request payload
        request = SendMessageRequest(
            message=message,
            stream=False,
            origin=MessageOrigin.DISCORDBOT,
            chat_session_info=ChatSessionCreationRequest(
                persona_id=persona_id if persona_id is not None else 0,
            ),
        )

        # Build headers
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}",
        }

        try:
            async with self._session.post(
                url,
                json=request.model_dump(mode="json"),
                headers=headers,
            ) as response:
                if response.status == 401:
                    raise APIResponseError(
                        "Authentication failed - invalid API key",
                        status_code=401,
                    )
                elif response.status == 403:
                    raise APIResponseError(
                        "Access denied - insufficient permissions",
                        status_code=403,
                    )
                elif response.status == 404:
                    raise APIResponseError(
                        "API endpoint not found",
                        status_code=404,
                    )
                elif response.status >= 500:
                    error_text = await response.text()
                    raise APIResponseError(
                        f"Server error: {error_text}",
                        status_code=response.status,
                    )
                elif response.status >= 400:
                    error_text = await response.text()
                    raise APIResponseError(
                        f"Request error: {error_text}",
                        status_code=response.status,
                    )

                # Parse successful response
                data = await response.json()
                response_obj = ChatFullResponse.model_validate(data)

                if response_obj.error_msg:
                    logger.warning(f"Chat API returned error: {response_obj.error_msg}")

                return response_obj

        except aiohttp.ClientConnectorError as e:
            logger.error(f"Failed to connect to API: {e}")
            raise APIConnectionError(
                f"Failed to connect to API at {self._base_url}: {e}"
            ) from e

        except TimeoutError as e:
            logger.error(f"API request timed out after {self._timeout}s")
            raise APITimeoutError(
                f"Request timed out after {self._timeout} seconds"
            ) from e

        except aiohttp.ClientError as e:
            logger.error(f"HTTP client error: {e}")
            raise APIConnectionError(f"HTTP client error: {e}") from e

    async def health_check(self) -> bool:
        """Check if the API server is healthy.

        Returns:
            True if the API server is reachable and healthy, False otherwise.
        """
        if self._session is None:
            logger.warning("API client not initialized. Call initialize() first.")
            return False

        try:
            url = f"{self._base_url}/health"
            async with self._session.get(
                url, timeout=aiohttp.ClientTimeout(total=10)
            ) as response:
                return response.status == 200
        except Exception as e:
            logger.warning(f"API server health check failed: {e}")
            return False


================================================
FILE: backend/onyx/onyxbot/discord/cache.py
================================================
"""Multi-tenant cache for Discord bot guild-tenant mappings and API keys."""

import asyncio

from onyx.db.discord_bot import get_guild_configs
from onyx.db.discord_bot import get_or_create_discord_service_api_key
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.onyxbot.discord.exceptions import CacheError
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()


class DiscordCacheManager:
    """Caches guild->tenant mappings and tenant->API key mappings.

    Refreshed on startup, periodically (every 60s), and when guilds register.
    """

    def __init__(self) -> None:
        self._guild_tenants: dict[int, str] = {}  # guild_id -> tenant_id
        self._api_keys: dict[str, str] = {}  # tenant_id -> api_key
        self._lock = asyncio.Lock()
        self._initialized = False

    @property
    def is_initialized(self) -> bool:
        return self._initialized

    async def refresh_all(self) -> None:
        """Full cache refresh from all tenants."""
        async with self._lock:
            logger.info("Starting Discord cache refresh")

            new_guild_tenants: dict[int, str] = {}
            new_api_keys: dict[str, str] = {}

            try:
                gated = fetch_ee_implementation_or_noop(
                    "onyx.server.tenants.product_gating",
                    "get_gated_tenants",
                    set(),
                )()

                tenant_ids = await asyncio.to_thread(get_all_tenant_ids)
                for tenant_id in tenant_ids:
                    if tenant_id in gated:
                        continue

                    context_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
                    try:
                        guild_ids, api_key = await self._load_tenant_data(tenant_id)
                        if not guild_ids:
                            logger.debug(f"No guilds found for tenant {tenant_id}")
                            continue

                        if not api_key:
                            logger.warning(
                                "Discord service API key missing for tenant that has registered guilds. "
                                f"{tenant_id} will not be handled in this refresh cycle."
                            )
                            continue

                        for guild_id in guild_ids:
                            new_guild_tenants[guild_id] = tenant_id

                        new_api_keys[tenant_id] = api_key
                    except Exception as e:
                        logger.warning(f"Failed to refresh tenant {tenant_id}: {e}")
                    finally:
                        CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)

                self._guild_tenants = new_guild_tenants
                self._api_keys = new_api_keys
                self._initialized = True

                logger.info(
                    f"Cache refresh complete: {len(new_guild_tenants)} guilds, {len(new_api_keys)} tenants"
                )

            except Exception as e:
                logger.error(f"Cache refresh failed: {e}")
                raise CacheError(f"Failed to refresh cache: {e}") from e

    async def refresh_guild(self, guild_id: int, tenant_id: str) -> None:
        """Add a single guild to cache after registration."""
        async with self._lock:
            logger.info(f"Refreshing cache for guild {guild_id} (tenant: {tenant_id})")

            guild_ids, api_key = await self._load_tenant_data(tenant_id)

            if guild_id in guild_ids:
                self._guild_tenants[guild_id] = tenant_id
                if api_key:
                    self._api_keys[tenant_id] = api_key
                logger.info(f"Cache updated for guild {guild_id}")
            else:
                logger.warning(f"Guild {guild_id} not found or disabled")

    async def _load_tenant_data(self, tenant_id: str) -> tuple[list[int], str | None]:
        """Load guild IDs and provision API key if needed.

        Returns:
            (active_guild_ids, api_key) - api_key is the cached key if available,
            otherwise a newly created key. Returns None if no guilds found.
        """
        cached_key = self._api_keys.get(tenant_id)

        def _sync() -> tuple[list[int], str | None]:
            with get_session_with_tenant(tenant_id=tenant_id) as db:
                configs = get_guild_configs(db)
                guild_ids = [
                    config.guild_id
                    for config in configs
                    if config.enabled and config.guild_id is not None
                ]

                if not guild_ids:
                    return [], None

                if not cached_key:
                    new_key = get_or_create_discord_service_api_key(db, tenant_id)
                    db.commit()
                    return guild_ids, new_key

                return guild_ids, cached_key

        return await asyncio.to_thread(_sync)

    def get_tenant(self, guild_id: int) -> str | None:
        """Get tenant ID for a guild."""
        return self._guild_tenants.get(guild_id)

    def get_api_key(self, tenant_id: str) -> str | None:
        """Get API key for a tenant."""
        return self._api_keys.get(tenant_id)

    def remove_guild(self, guild_id: int) -> None:
        """Remove a guild from cache."""
        self._guild_tenants.pop(guild_id, None)

    def get_all_guild_ids(self) -> list[int]:
        """Get all cached guild IDs."""
        return list(self._guild_tenants.keys())

    def clear(self) -> None:
        """Clear all caches."""
        self._guild_tenants.clear()
        self._api_keys.clear()
        self._initialized = False


================================================
FILE: backend/onyx/onyxbot/discord/client.py
================================================
"""Discord bot client with integrated message handling."""

import asyncio
import time

import discord
from discord.ext import commands

from onyx.configs.app_configs import DISCORD_BOT_INVOKE_CHAR
from onyx.onyxbot.discord.api_client import OnyxAPIClient
from onyx.onyxbot.discord.cache import DiscordCacheManager
from onyx.onyxbot.discord.constants import CACHE_REFRESH_INTERVAL
from onyx.onyxbot.discord.handle_commands import handle_dm
from onyx.onyxbot.discord.handle_commands import handle_registration_command
from onyx.onyxbot.discord.handle_commands import handle_sync_channels_command
from onyx.onyxbot.discord.handle_message import process_chat_message
from onyx.onyxbot.discord.handle_message import should_respond
from onyx.onyxbot.discord.utils import get_bot_token
from onyx.utils.logger import setup_logger

logger = setup_logger()


class OnyxDiscordClient(commands.Bot):
    """Discord bot client with integrated cache, API client, and message handling.

    This client handles:
    - Guild registration via !register command
    - Message processing with persona-based responses
    - Thread context for conversation continuity
    - Multi-tenant support via cached API keys
    """

    def __init__(self, command_prefix: str = DISCORD_BOT_INVOKE_CHAR) -> None:
        intents = discord.Intents.default()
        intents.message_content = True
        intents.members = True

        super().__init__(command_prefix=command_prefix, intents=intents)

        self.ready = False
        self.cache = DiscordCacheManager()
        self.api_client = OnyxAPIClient()
        self._cache_refresh_task: asyncio.Task | None = None

    # -------------------------------------------------------------------------
    # Lifecycle Methods
    # -------------------------------------------------------------------------

    async def setup_hook(self) -> None:
        """Called before on_ready. Initialize components."""
        logger.info("Initializing Discord bot components...")

        # Initialize API client
        await self.api_client.initialize()

        # Initial cache load
        await self.cache.refresh_all()

        # Start periodic cache refresh
        self._cache_refresh_task = self.loop.create_task(self._periodic_cache_refresh())

        logger.info("Discord bot components initialized")

    async def _periodic_cache_refresh(self) -> None:
        """Background task to refresh cache periodically."""
        while not self.is_closed():
            await asyncio.sleep(CACHE_REFRESH_INTERVAL)
            try:
                await self.cache.refresh_all()
            except Exception as e:
                logger.error(f"Cache refresh failed: {e}")

    async def on_ready(self) -> None:
        """Bot connected and ready."""
        if self.ready:
            return

        if not self.user:
            raise RuntimeError("Critical error: Discord Bot user not found")

        logger.info(f"Discord Bot connected as {self.user} (ID: {self.user.id})")
        logger.info(f"Connected to {len(self.guilds)} guild(s)")
        logger.info(f"Cached {len(self.cache.get_all_guild_ids())} registered guild(s)")

        self.ready = True

    async def close(self) -> None:
        """Graceful shutdown."""
        logger.info("Shutting down Discord bot...")

        # Cancel cache refresh task
        if self._cache_refresh_task:
            self._cache_refresh_task.cancel()
            try:
                await self._cache_refresh_task
            except asyncio.CancelledError:
                pass

        # Close Discord connection first - stops new commands from triggering cache ops
        if not self.is_closed():
            await super().close()

        # Close API client
        await self.api_client.close()

        # Clear cache (safe now - no concurrent operations possible)
        self.cache.clear()

        self.ready = False
        logger.info("Discord bot shutdown complete")

    # -------------------------------------------------------------------------
    # Message Handling
    # -------------------------------------------------------------------------

    async def on_message(self, message: discord.Message) -> None:
        """Main message handler."""
        # mypy
        if not self.user:
            raise RuntimeError("Critical error: Discord Bot user not found")

        try:
            # Ignore bot messages
            if message.author.bot:
                return

            # Ignore thread starter messages (empty reference nodes that don't contain content)
            if message.type == discord.MessageType.thread_starter_message:
                return

            # Handle DMs
            if isinstance(message.channel, discord.DMChannel):
                await handle_dm(message)
                return

            # Must have a guild
            if not message.guild or not message.guild.id:
                return

            guild_id = message.guild.id

            # Check for registration command first
            if await handle_registration_command(message, self.cache):
                return

            # Look up guild in cache
            tenant_id = self.cache.get_tenant(guild_id)

            # Check for sync-channels command (requires registered guild)
            if await handle_sync_channels_command(message, tenant_id, self):
                return

            if not tenant_id:
                # Guild not registered, ignore
                return

            # Get API key
            api_key = self.cache.get_api_key(tenant_id)
            if not api_key:
                logger.warning(f"No API key cached for tenant {tenant_id}")
                return

            # Check if bot should respond
            should_respond_context = await should_respond(message, tenant_id, self.user)

            if not should_respond_context.should_respond:
                return

            logger.debug(
                f"Processing message: '{message.content[:50]}' in "
                f"#{getattr(message.channel, 'name', 'unknown')} ({message.guild.name}), "
                f"persona_id={should_respond_context.persona_id}"
            )

            # Process the message
            await process_chat_message(
                message=message,
                api_key=api_key,
                persona_id=should_respond_context.persona_id,
                thread_only_mode=should_respond_context.thread_only_mode,
                api_client=self.api_client,
                bot_user=self.user,
            )

        except Exception as e:
            logger.exception(f"Error processing message: {e}")


# -----------------------------------------------------------------------------
# Entry Point
# -----------------------------------------------------------------------------


def main() -> None:
    """Main entry point for Discord bot."""
    from onyx.db.engine.sql_engine import SqlEngine
    from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

    logger.info("Starting Onyx Discord Bot...")

    # Initialize the database engine (required before any DB operations)
    SqlEngine.init_engine(pool_size=20, max_overflow=5)

    # Initialize EE features based on environment
    set_is_ee_based_on_env_variable()

    counter = 0
    while True:
        token = get_bot_token()
        if not token:
            if counter % 180 == 0:
                logger.info(
                    "Discord bot is dormant. Waiting for token configuration..."
                )
            counter += 1
            time.sleep(5)
            continue
        counter = 0
        bot = OnyxDiscordClient()

        try:
            # bot.run() handles SIGINT/SIGTERM and calls close() automatically
            bot.run(token)

        except Exception:
            logger.exception("Fatal error in Discord bot")
            raise


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/onyxbot/discord/constants.py
================================================
"""Discord bot constants."""

# API settings
API_REQUEST_TIMEOUT: int = 3 * 60  # 3 minutes

# Cache settings
CACHE_REFRESH_INTERVAL: int = 60  # 1 minute

# Message settings
MAX_MESSAGE_LENGTH: int = 2000  # Discord's character limit
MAX_CONTEXT_MESSAGES: int = 10  # Max messages to include in conversation context
# Note: Discord.py's add_reaction() requires unicode emoji, not :name: format
THINKING_EMOJI: str = "🤔"  # U+1F914 - Thinking Face
SUCCESS_EMOJI: str = "✅"  # U+2705 - White Heavy Check Mark
ERROR_EMOJI: str = "❌"  # U+274C - Cross Mark

# Command prefix
REGISTER_COMMAND: str = "register"
SYNC_CHANNELS_COMMAND: str = "sync-channels"


================================================
FILE: backend/onyx/onyxbot/discord/exceptions.py
================================================
"""Custom exception classes for Discord bot."""


class DiscordBotError(Exception):
    """Base exception for Discord bot errors."""


class RegistrationError(DiscordBotError):
    """Error during guild registration."""


class SyncChannelsError(DiscordBotError):
    """Error during channel sync."""


class APIError(DiscordBotError):
    """Base API error."""


class CacheError(DiscordBotError):
    """Error during cache operations."""


class APIConnectionError(APIError):
    """Failed to connect to API."""


class APITimeoutError(APIError):
    """Request timed out."""


class APIResponseError(APIError):
    """API returned an error response."""

    def __init__(self, message: str, status_code: int | None = None):
        super().__init__(message)
        self.status_code = status_code


================================================
FILE: backend/onyx/onyxbot/discord/handle_commands.py
================================================
"""Discord bot command handlers for registration and channel sync."""

import asyncio
from datetime import datetime
from datetime import timezone

import discord

from onyx.configs.app_configs import DISCORD_BOT_INVOKE_CHAR
from onyx.configs.constants import ONYX_DISCORD_URL
from onyx.db.discord_bot import bulk_create_channel_configs
from onyx.db.discord_bot import get_guild_config_by_discord_id
from onyx.db.discord_bot import get_guild_config_by_internal_id
from onyx.db.discord_bot import get_guild_config_by_registration_key
from onyx.db.discord_bot import sync_channel_configs
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.utils import DiscordChannelView
from onyx.onyxbot.discord.cache import DiscordCacheManager
from onyx.onyxbot.discord.constants import REGISTER_COMMAND
from onyx.onyxbot.discord.constants import SYNC_CHANNELS_COMMAND
from onyx.onyxbot.discord.exceptions import RegistrationError
from onyx.onyxbot.discord.exceptions import SyncChannelsError
from onyx.server.manage.discord_bot.utils import parse_discord_registration_key
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()


async def handle_dm(message: discord.Message) -> None:
    """Handle direct messages."""
    dm_response = (
        "**I can't respond to DMs** :sweat:\n\n"
        f"Please chat with me in a server channel, or join the official "
        f"[Onyx Discord]({ONYX_DISCORD_URL}) for help!"
    )
    await message.channel.send(dm_response)


# -------------------------------------------------------------------------
# Helper functions for error handling
# -------------------------------------------------------------------------


async def _try_dm_author(message: discord.Message, content: str) -> bool:
    """Attempt to DM the message author. Returns True if successful."""
    logger.debug(f"Responding in Discord DM with {content}")
    try:
        await message.author.send(content)
        return True
    except (discord.Forbidden, discord.HTTPException) as e:
        # User has DMs disabled or other error
        logger.warning(f"Failed to DM author {message.author.id}: {e}")
    except Exception as e:
        logger.exception(f"Unexpected error DMing author {message.author.id}: {e}")
    return False


async def _try_delete_message(message: discord.Message) -> bool:
    """Attempt to delete a message. Returns True if successful."""
    logger.debug(f"Deleting potentially sensitive message {message.id}")
    try:
        await message.delete()
        return True
    except (discord.Forbidden, discord.HTTPException) as e:
        # Bot lacks permission or other error
        logger.warning(f"Failed to delete message {message.id}: {e}")
    except Exception as e:
        logger.exception(f"Unexpected error deleting message {message.id}: {e}")
    return False


async def _try_react_x(message: discord.Message) -> bool:
    """Attempt to react to a message with ❌. Returns True if successful."""
    try:
        await message.add_reaction("❌")
        return True
    except (discord.Forbidden, discord.HTTPException) as e:
        # Bot lacks permission or other error
        logger.warning(f"Failed to react to message {message.id}: {e}")
    except Exception as e:
        logger.exception(f"Unexpected error reacting to message {message.id}: {e}")
    return False


# -------------------------------------------------------------------------
# Registration
# -------------------------------------------------------------------------


async def handle_registration_command(
    message: discord.Message,
    cache: DiscordCacheManager,
) -> bool:
    """Handle !register command. Returns True if command was handled."""
    content = message.content.strip()

    # Check for !register command
    if not content.startswith(f"{DISCORD_BOT_INVOKE_CHAR}{REGISTER_COMMAND}"):
        return False

    # Must be in a server
    if not message.guild:
        await _try_dm_author(
            message, "This command can only be used in a server channel."
        )
        return True

    guild_name = message.guild.name
    logger.info(f"Registration command received: {guild_name}")

    try:
        # Parse the registration key
        parts = content.split(maxsplit=1)
        if len(parts) < 2:
            raise RegistrationError(
                "Invalid registration key format. Please check the key and try again."
            )

        registration_key = parts[1].strip()

        if not message.author or not isinstance(message.author, discord.Member):
            raise RegistrationError(
                "You need to be a server administrator to register the bot."
            )

        # Check permissions - require admin or manage_guild
        if not message.author.guild_permissions.administrator:
            if not message.author.guild_permissions.manage_guild:
                raise RegistrationError(
                    "You need **Administrator** or **Manage Server** permissions to register this bot."
                )

        await _register_guild(message, registration_key, cache)
        logger.info(f"Registration successful: {guild_name}")
        await message.reply(
            ":white_check_mark: **Successfully registered!**\n\n"
            "This server is now connected to Onyx. "
            "I'll respond to messages based on your server and channel settings set in Onyx."
        )
    except RegistrationError as e:
        logger.debug(f"Registration failed: {guild_name}, error={e}")
        await _try_dm_author(message, f":x: **Registration failed.**\n\n{e}")
        await _try_delete_message(message)
    except Exception:
        logger.exception(f"Registration failed unexpectedly: {guild_name}")
        await _try_dm_author(
            message,
            ":x: **Registration failed.**\n\nAn unexpected error occurred. Please try again later.",
        )
        await _try_delete_message(message)

    return True


async def _register_guild(
    message: discord.Message,
    registration_key: str,
    cache: DiscordCacheManager,
) -> None:
    """Register a guild with a registration key."""
    if not message.guild:
        # mypy, even though we already know that message.guild is not None
        raise RegistrationError("This command can only be used in a server.")

    logger.info(f"Guild '{message.guild.name}' attempting to register Discord bot")
    registration_key = registration_key.strip()

    # Parse tenant_id from registration key
    parsed = parse_discord_registration_key(registration_key)
    if parsed is None:
        raise RegistrationError(
            "Invalid registration key format. Please check the key and try again."
        )

    tenant_id = parsed

    logger.info(f"Parsed tenant_id {tenant_id} from registration key")

    # Check if this guild is already registered to any tenant
    guild_id = message.guild.id
    existing_tenant = cache.get_tenant(guild_id)
    if existing_tenant is not None:
        logger.warning(
            f"Guild {guild_id} is already registered to tenant {existing_tenant}"
        )
        raise RegistrationError(
            "This server is already registered.\n\nOnyxBot can only connect one Discord server to one Onyx workspace."
        )

    context_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
    try:
        guild = message.guild
        guild_name = guild.name

        # Collect all text channels from the guild
        channels = get_text_channels(guild)
        logger.info(f"Found {len(channels)} text channels in guild '{guild_name}'")

        # Validate and update in database
        def _sync_register() -> int:
            with get_session_with_tenant(tenant_id=tenant_id) as db:
                # Find the guild config by registration key
                config = get_guild_config_by_registration_key(db, registration_key)
                if not config:
                    raise RegistrationError(
                        "Registration key not found.\n\n"
                        "The key may have expired or been deleted. "
                        "Please generate a new one from the Onyx admin panel."
                    )

                # Check if already used
                if config.guild_id is not None:
                    raise RegistrationError(
                        "This registration key has already been used.\n\n"
                        "Each key can only be used once. "
                        "Please generate a new key from the Onyx admin panel."
                    )

                # Update the guild config
                config.guild_id = guild_id
                config.guild_name = guild_name
                config.registered_at = datetime.now(timezone.utc)

                # Create channel configs for all text channels
                bulk_create_channel_configs(db, config.id, channels)

                db.commit()
                return config.id

        await asyncio.to_thread(_sync_register)

        # Refresh cache for this guild
        await cache.refresh_guild(guild_id, tenant_id)

        logger.info(
            f"Guild '{guild_name}' registered with {len(channels)} channel configs"
        )
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)


def get_text_channels(guild: discord.Guild) -> list[DiscordChannelView]:
    """Get all text channels from a guild as DiscordChannelView objects."""
    channels: list[DiscordChannelView] = []
    for channel in guild.channels:
        # Include text channels and forum channels (where threads can be created)
        if isinstance(channel, (discord.TextChannel, discord.ForumChannel)):
            # Check if channel is private (not visible to @everyone)
            everyone_perms = channel.permissions_for(guild.default_role)
            is_private = not everyone_perms.view_channel

            logger.debug(
                f"Found channel: #{channel.name}, type={channel.type.name}, is_private={is_private}"
            )

            channels.append(
                DiscordChannelView(
                    channel_id=channel.id,
                    channel_name=channel.name,
                    channel_type=channel.type.name,  # "text" or "forum"
                    is_private=is_private,
                )
            )

    logger.debug(f"Retrieved {len(channels)} channels from guild '{guild.name}'")
    return channels


# -------------------------------------------------------------------------
# Sync Channels
# -------------------------------------------------------------------------


async def handle_sync_channels_command(
    message: discord.Message,
    tenant_id: str | None,
    bot: discord.Client,
) -> bool:
    """Handle !sync-channels command. Returns True if command was handled."""
    content = message.content.strip()

    # Check for !sync-channels command
    if not content.startswith(f"{DISCORD_BOT_INVOKE_CHAR}{SYNC_CHANNELS_COMMAND}"):
        return False

    # Must be in a server
    if not message.guild:
        await _try_dm_author(
            message, "This command can only be used in a server channel."
        )
        return True

    guild_name = message.guild.name
    logger.info(f"Sync-channels command received: {guild_name}")

    try:
        # Must be registered
        if not tenant_id:
            raise SyncChannelsError(
                "This server is not registered. Please register it first."
            )

        # Check permissions - require admin or manage_guild
        if not message.author or not isinstance(message.author, discord.Member):
            raise SyncChannelsError(
                "You need to be a server administrator to sync channels."
            )

        if not message.author.guild_permissions.administrator:
            if not message.author.guild_permissions.manage_guild:
                raise SyncChannelsError(
                    "You need **Administrator** or **Manage Server** permissions to sync channels."
                )

        # Get guild config ID
        def _get_guild_config_id() -> int | None:
            with get_session_with_tenant(tenant_id=tenant_id) as db:
                if not message.guild:
                    raise SyncChannelsError(
                        "Server not found. This shouldn't happen. Please contact Onyx support."
                    )
                config = get_guild_config_by_discord_id(db, message.guild.id)
                return config.id if config else None

        guild_config_id = await asyncio.to_thread(_get_guild_config_id)

        if not guild_config_id:
            raise SyncChannelsError(
                "Server config not found. This shouldn't happen. Please contact Onyx support."
            )

        # Perform the sync
        added, removed, updated = await sync_guild_channels(
            guild_config_id, tenant_id, bot
        )
        logger.info(
            f"Sync-channels successful: {guild_name}, added={added}, removed={removed}, updated={updated}"
        )
        await message.reply(
            f":white_check_mark: **Channel sync complete!**\n\n"
            f"* **{added}** new channel(s) added\n"
            f"* **{removed}** deleted channel(s) removed\n"
            f"* **{updated}** channel name(s) updated\n\n"
            "New channels are disabled by default. Enable them in the Onyx admin panel."
        )
    except SyncChannelsError as e:
        logger.debug(f"Sync-channels failed: {guild_name}, error={e}")
        await _try_dm_author(message, f":x: **Channel sync failed.**\n\n{e}")
        await _try_react_x(message)
    except Exception:
        logger.exception(f"Sync-channels failed unexpectedly: {guild_name}")
        await _try_dm_author(
            message,
            ":x: **Channel sync failed.**\n\nAn unexpected error occurred. Please try again later.",
        )
        await _try_react_x(message)

    return True


async def sync_guild_channels(
    guild_config_id: int,
    tenant_id: str,
    bot: discord.Client,
) -> tuple[int, int, int]:
    """Sync channel configs with current Discord channels for a guild.

    Fetches current channels from Discord and syncs with database:
    - Creates configs for new channels (disabled by default)
    - Removes configs for deleted channels
    - Updates names for existing channels if changed

    Args:
        guild_config_id: Internal ID of the guild config
        tenant_id: Tenant ID for database access
        bot: Discord bot client

    Returns:
        (added_count, removed_count, updated_count)

    Raises:
        ValueError: If guild config not found or guild not registered
    """
    context_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
    try:
        # Get guild_id from config
        def _get_guild_id() -> int | None:
            with get_session_with_tenant(tenant_id=tenant_id) as db:
                config = get_guild_config_by_internal_id(db, guild_config_id)
                if not config:
                    return None
                return config.guild_id

        guild_id = await asyncio.to_thread(_get_guild_id)

        if guild_id is None:
            raise ValueError(
                f"Guild config {guild_config_id} not found or not registered"
            )

        # Get the guild from Discord
        guild = bot.get_guild(guild_id)
        if not guild:
            raise ValueError(f"Guild {guild_id} not found in Discord cache")

        # Get current channels from Discord
        channels = get_text_channels(guild)
        logger.info(f"Syncing {len(channels)} channels for guild '{guild.name}'")

        # Sync with database
        def _sync() -> tuple[int, int, int]:
            with get_session_with_tenant(tenant_id=tenant_id) as db:
                added, removed, updated = sync_channel_configs(
                    db, guild_config_id, channels
                )
                db.commit()
                return added, removed, updated

        added, removed, updated = await asyncio.to_thread(_sync)

        logger.info(
            f"Channel sync complete for guild '{guild.name}': added={added}, removed={removed}, updated={updated}"
        )

        return added, removed, updated

    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)


================================================
FILE: backend/onyx/onyxbot/discord/handle_message.py
================================================
"""Discord bot message handling and response logic."""

import asyncio

import discord
from pydantic import BaseModel

from onyx.chat.models import ChatFullResponse
from onyx.db.discord_bot import get_channel_config_by_discord_ids
from onyx.db.discord_bot import get_guild_config_by_discord_id
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.models import DiscordChannelConfig
from onyx.db.models import DiscordGuildConfig
from onyx.onyxbot.discord.api_client import OnyxAPIClient
from onyx.onyxbot.discord.constants import MAX_CONTEXT_MESSAGES
from onyx.onyxbot.discord.constants import MAX_MESSAGE_LENGTH
from onyx.onyxbot.discord.constants import THINKING_EMOJI
from onyx.onyxbot.discord.exceptions import APIError
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Message types with actual content (excludes system notifications like "user joined")
CONTENT_MESSAGE_TYPES = (
    discord.MessageType.default,
    discord.MessageType.reply,
    discord.MessageType.thread_starter_message,
)


class ShouldRespondContext(BaseModel):
    """Context for whether the bot should respond to a message."""

    should_respond: bool
    persona_id: int | None
    thread_only_mode: bool


# -------------------------------------------------------------------------
# Response Logic
# -------------------------------------------------------------------------


async def should_respond(
    message: discord.Message,
    tenant_id: str,
    bot_user: discord.ClientUser,
) -> ShouldRespondContext:
    """Determine if bot should respond and which persona to use."""
    if not message.guild:
        logger.warning("Received a message that isn't in a server.")
        return ShouldRespondContext(
            should_respond=False, persona_id=None, thread_only_mode=False
        )

    guild_id = message.guild.id
    channel_id = message.channel.id
    bot_mentioned = bot_user in message.mentions

    def _get_configs() -> tuple[DiscordGuildConfig | None, DiscordChannelConfig | None]:
        with get_session_with_tenant(tenant_id=tenant_id) as db:
            guild_config = get_guild_config_by_discord_id(db, guild_id)
            if not guild_config or not guild_config.enabled:
                return None, None

            # For threads, use parent channel ID
            actual_channel_id = channel_id
            if isinstance(message.channel, discord.Thread) and message.channel.parent:
                actual_channel_id = message.channel.parent.id

            channel_config = get_channel_config_by_discord_ids(
                db, guild_id, actual_channel_id
            )
            return guild_config, channel_config

    guild_config, channel_config = await asyncio.to_thread(_get_configs)

    if not guild_config or not channel_config or not channel_config.enabled:
        return ShouldRespondContext(
            should_respond=False, persona_id=None, thread_only_mode=False
        )

    # Determine persona (channel override or guild default)
    persona_id = channel_config.persona_override_id or guild_config.default_persona_id

    # Check mention requirement (with exceptions for implicit invocation)
    if channel_config.require_bot_invocation and not bot_mentioned:
        if not await check_implicit_invocation(message, bot_user):
            return ShouldRespondContext(
                should_respond=False, persona_id=None, thread_only_mode=False
            )

    return ShouldRespondContext(
        should_respond=True,
        persona_id=persona_id,
        thread_only_mode=channel_config.thread_only_mode,
    )


async def check_implicit_invocation(
    message: discord.Message,
    bot_user: discord.ClientUser,
) -> bool:
    """Check if the bot should respond without explicit mention.

    Returns True if:
    1. User is replying to a bot message
    2. User is in a thread owned by the bot
    3. User is in a thread created from a bot message
    """
    # Check if replying to a bot message
    if message.reference and message.reference.message_id:
        try:
            referenced_msg = await message.channel.fetch_message(
                message.reference.message_id
            )
            if referenced_msg.author.id == bot_user.id:
                logger.debug(
                    f"Implicit invocation via reply: '{message.content[:50]}...'"
                )
                return True
        except (discord.NotFound, discord.HTTPException):
            pass

    # Check thread-related conditions
    if isinstance(message.channel, discord.Thread):
        thread = message.channel

        # Bot owns the thread
        if thread.owner_id == bot_user.id:
            logger.debug(
                f"Implicit invocation via bot-owned thread: '{message.content[:50]}...' in #{thread.name}"
            )
            return True

        # Thread was created from a bot message
        if thread.parent and not isinstance(thread.parent, discord.ForumChannel):
            try:
                starter = await thread.parent.fetch_message(thread.id)
                if starter.author.id == bot_user.id:
                    logger.debug(
                        f"Implicit invocation via bot-started thread: '{message.content[:50]}...' in #{thread.name}"
                    )
                    return True
            except (discord.NotFound, discord.HTTPException):
                pass

    return False


# -------------------------------------------------------------------------
# Message Processing
# -------------------------------------------------------------------------


async def process_chat_message(
    message: discord.Message,
    api_key: str,
    persona_id: int | None,
    thread_only_mode: bool,
    api_client: OnyxAPIClient,
    bot_user: discord.ClientUser,
) -> None:
    """Process a message and send response."""
    try:
        await message.add_reaction(THINKING_EMOJI)
    except discord.DiscordException:
        logger.warning(
            f"Failed to add thinking reaction to message: '{message.content[:50]}...'"
        )

    try:
        # Build conversation context
        context = await _build_conversation_context(message, bot_user)

        # Prepare full message content
        parts = []
        if context:
            parts.append(context)
        if isinstance(message.channel, discord.Thread):
            if isinstance(message.channel.parent, discord.ForumChannel):
                parts.append(f"Forum post title: {message.channel.name}")
        parts.append(
            f"Current message from @{message.author.display_name}: {format_message_content(message)}"
        )

        # Send to API
        response = await api_client.send_chat_message(
            message="\n\n".join(parts),
            api_key=api_key,
            persona_id=persona_id,
        )

        # Format response with citations
        answer = response.answer or "I couldn't generate a response."
        answer = _append_citations(answer, response)

        await send_response(message, answer, thread_only_mode)

        try:
            await message.remove_reaction(THINKING_EMOJI, bot_user)
        except discord.DiscordException:
            pass

    except APIError as e:
        logger.error(f"API error processing message: {e}")
        await send_error_response(message, bot_user)
    except Exception as e:
        logger.exception(f"Error processing chat message: {e}")
        await send_error_response(message, bot_user)


async def _build_conversation_context(
    message: discord.Message,
    bot_user: discord.ClientUser,
) -> str | None:
    """Build conversation context from thread history or reply chain."""
    if isinstance(message.channel, discord.Thread):
        return await _build_thread_context(message, bot_user)
    elif message.reference:
        return await _build_reply_chain_context(message, bot_user)
    return None


def _append_citations(answer: str, response: ChatFullResponse) -> str:
    """Append citation sources to the answer if present."""
    if not response.citation_info or not response.top_documents:
        return answer

    cited_docs: list[tuple[int, str, str | None]] = []
    for citation in response.citation_info:
        doc = next(
            (
                d
                for d in response.top_documents
                if d.document_id == citation.document_id
            ),
            None,
        )
        if doc:
            cited_docs.append(
                (
                    citation.citation_number,
                    doc.semantic_identifier or "Source",
                    doc.link,
                )
            )

    if not cited_docs:
        return answer

    cited_docs.sort(key=lambda x: x[0])
    citations = "\n\n**Sources:**\n"
    for num, name, link in cited_docs[:5]:
        if link:
            citations += f"{num}. [{name}](<{link}>)\n"
        else:
            citations += f"{num}. {name}\n"

    return answer + citations


# -------------------------------------------------------------------------
# Context Building
# -------------------------------------------------------------------------


async def _build_reply_chain_context(
    message: discord.Message,
    bot_user: discord.ClientUser,
) -> str | None:
    """Build context by following the reply chain backwards."""
    if not message.reference or not message.reference.message_id:
        return None

    try:
        messages: list[discord.Message] = []
        current = message

        # Follow reply chain backwards up to MAX_CONTEXT_MESSAGES
        while (
            current.reference
            and current.reference.message_id
            and len(messages) < MAX_CONTEXT_MESSAGES
        ):
            try:
                parent = await message.channel.fetch_message(
                    current.reference.message_id
                )
                messages.append(parent)
                current = parent
            except (discord.NotFound, discord.HTTPException):
                break

        if not messages:
            return None

        messages.reverse()  # Chronological order

        logger.debug(
            f"Built reply chain context: {len(messages)} messages in #{getattr(message.channel, 'name', 'unknown')}"
        )

        return _format_messages_as_context(messages, bot_user)

    except Exception as e:
        logger.warning(f"Failed to build reply chain context: {e}")
        return None


async def _build_thread_context(
    message: discord.Message,
    bot_user: discord.ClientUser,
) -> str | None:
    """Build context from thread message history."""
    if not isinstance(message.channel, discord.Thread):
        return None

    try:
        thread = message.channel
        messages: list[discord.Message] = []

        # Fetch recent messages (excluding current)
        async for msg in thread.history(limit=MAX_CONTEXT_MESSAGES, oldest_first=False):
            if msg.id != message.id:
                messages.append(msg)

        # Include thread starter message and its reply chain if not already present
        if thread.parent and not isinstance(thread.parent, discord.ForumChannel):
            try:
                starter = await thread.parent.fetch_message(thread.id)
                if starter.id != message.id and not any(
                    m.id == starter.id for m in messages
                ):
                    messages.append(starter)

                # Trace back through the starter's reply chain for more context
                current = starter
                while (
                    current.reference
                    and current.reference.message_id
                    and len(messages) < MAX_CONTEXT_MESSAGES
                ):
                    try:
                        parent = await thread.parent.fetch_message(
                            current.reference.message_id
                        )
                        if not any(m.id == parent.id for m in messages):
                            messages.append(parent)
                        current = parent
                    except (discord.NotFound, discord.HTTPException):
                        break
            except (discord.NotFound, discord.HTTPException):
                pass

        if not messages:
            return None

        messages.sort(key=lambda m: m.id)  # Chronological order
        logger.debug(
            f"Built thread context: {len(messages)} messages in #{thread.name}"
        )

        return _format_messages_as_context(messages, bot_user)

    except Exception as e:
        logger.warning(f"Failed to build thread context: {e}")
        return None


def _format_messages_as_context(
    messages: list[discord.Message],
    bot_user: discord.ClientUser,
) -> str | None:
    """Format a list of messages into a conversation context string."""
    formatted = []
    for msg in messages:
        if msg.type not in CONTENT_MESSAGE_TYPES:
            continue

        sender = (
            "OnyxBot" if msg.author.id == bot_user.id else f"@{msg.author.display_name}"
        )
        formatted.append(f"{sender}: {format_message_content(msg)}")

    if not formatted:
        return None

    return (
        "You are a Discord bot named OnyxBot.\n"
        'Always assume that [user] is the same as the "Current message" author.'
        "Conversation history:\n"
        "---\n" + "\n".join(formatted) + "\n---"
    )


# -------------------------------------------------------------------------
# Message Formatting
# -------------------------------------------------------------------------


def format_message_content(message: discord.Message) -> str:
    """Format message content with readable mentions."""
    content = message.content

    for user in message.mentions:
        content = content.replace(f"<@{user.id}>", f"@{user.display_name}")
        content = content.replace(f"<@!{user.id}>", f"@{user.display_name}")

    for role in message.role_mentions:
        content = content.replace(f"<@&{role.id}>", f"@{role.name}")

    for channel in message.channel_mentions:
        content = content.replace(f"<#{channel.id}>", f"#{channel.name}")

    return content


# -------------------------------------------------------------------------
# Response Sending
# -------------------------------------------------------------------------


async def send_response(
    message: discord.Message,
    content: str,
    thread_only_mode: bool,
) -> None:
    """Send response based on thread_only_mode setting."""
    chunks = _split_message(content)

    if isinstance(message.channel, discord.Thread):
        for chunk in chunks:
            await message.channel.send(chunk)
    elif thread_only_mode:
        thread_name = f"OnyxBot <> {message.author.display_name}"[:100]
        thread = await message.create_thread(name=thread_name)
        for chunk in chunks:
            await thread.send(chunk)
    else:
        for i, chunk in enumerate(chunks):
            if i == 0:
                await message.reply(chunk)
            else:
                await message.channel.send(chunk)


def _split_message(content: str) -> list[str]:
    """Split content into chunks that fit Discord's message limit."""
    chunks = []
    while content:
        if len(content) <= MAX_MESSAGE_LENGTH:
            chunks.append(content)
            break

        # Find a good split point
        split_at = MAX_MESSAGE_LENGTH
        for sep in ["\n\n", "\n", ". ", " "]:
            idx = content.rfind(sep, 0, MAX_MESSAGE_LENGTH)
            if idx > MAX_MESSAGE_LENGTH // 2:
                split_at = idx + len(sep)
                break

        chunks.append(content[:split_at])
        content = content[split_at:]

    return chunks


async def send_error_response(
    message: discord.Message,
    bot_user: discord.ClientUser,
) -> None:
    """Send error response and clean up reaction."""
    try:
        await message.remove_reaction(THINKING_EMOJI, bot_user)
    except discord.DiscordException:
        pass

    error_msg = "Sorry, I encountered an error processing your message. You may want to contact Onyx for support :sweat_smile:"

    try:
        if isinstance(message.channel, discord.Thread):
            await message.channel.send(error_msg)
        else:
            thread = await message.create_thread(
                name=f"Response to {message.author.display_name}"[:100]
            )
            await thread.send(error_msg)
    except discord.DiscordException:
        pass


================================================
FILE: backend/onyx/onyxbot/discord/utils.py
================================================
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import DISCORD_BOT_TOKEN
from onyx.configs.constants import AuthType
from onyx.db.discord_bot import get_discord_bot_config
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.utils.logger import setup_logger
from onyx.utils.sensitive import SensitiveValue
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

logger = setup_logger()


def get_bot_token() -> str | None:
    """Get Discord bot token from env var or database.

    Priority:
    1. DISCORD_BOT_TOKEN env var (always takes precedence)
    2. For self-hosted: DiscordBotConfig in database (default tenant)
    3. For Cloud: should always have env var set

    Returns:
        Bot token string, or None if not configured.
    """
    # Environment variable takes precedence
    if DISCORD_BOT_TOKEN:
        return DISCORD_BOT_TOKEN

    # Cloud should always have env var; if not, return None
    if AUTH_TYPE == AuthType.CLOUD:
        logger.warning("Cloud deployment missing DISCORD_BOT_TOKEN env var")
        return None

    # Self-hosted: check database for bot config
    try:
        with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db:
            config = get_discord_bot_config(db)
    except Exception as e:
        logger.error(f"Failed to get bot token from database: {e}")
        return None
    if config and config.bot_token:
        if isinstance(config.bot_token, SensitiveValue):
            return config.bot_token.get_value(apply_mask=False)
        return config.bot_token
    return None


================================================
FILE: backend/onyx/onyxbot/slack/blocks.py
================================================
from datetime import datetime
from typing import cast

import pytz
import timeago  # type: ignore
from slack_sdk.models.blocks import ActionsBlock
from slack_sdk.models.blocks import Block
from slack_sdk.models.blocks import ButtonElement
from slack_sdk.models.blocks import ContextBlock
from slack_sdk.models.blocks import DividerBlock
from slack_sdk.models.blocks import HeaderBlock
from slack_sdk.models.blocks import Option
from slack_sdk.models.blocks import RadioButtonsElement
from slack_sdk.models.blocks import SectionBlock
from slack_sdk.models.blocks.basic_components import MarkdownTextObject
from slack_sdk.models.blocks.block_elements import ImageElement

from onyx.chat.models import ChatBasicResponse
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import SearchFeedbackType
from onyx.configs.onyxbot_configs import ONYX_BOT_NUM_DOCS_TO_DISPLAY
from onyx.context.search.models import SearchDoc
from onyx.db.chat import get_chat_session_by_message_id
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import ChannelConfig
from onyx.onyxbot.slack.constants import CONTINUE_IN_WEB_UI_ACTION_ID
from onyx.onyxbot.slack.constants import DISLIKE_BLOCK_ACTION_ID
from onyx.onyxbot.slack.constants import FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID
from onyx.onyxbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID
from onyx.onyxbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID
from onyx.onyxbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID
from onyx.onyxbot.slack.constants import KEEP_TO_YOURSELF_ACTION_ID
from onyx.onyxbot.slack.constants import LIKE_BLOCK_ACTION_ID
from onyx.onyxbot.slack.constants import SHOW_EVERYONE_ACTION_ID
from onyx.onyxbot.slack.formatting import format_slack_message
from onyx.onyxbot.slack.icons import source_to_github_img_link
from onyx.onyxbot.slack.models import ActionValuesEphemeralMessage
from onyx.onyxbot.slack.models import ActionValuesEphemeralMessageChannelConfig
from onyx.onyxbot.slack.models import ActionValuesEphemeralMessageMessageInfo
from onyx.onyxbot.slack.models import SlackMessageInfo
from onyx.onyxbot.slack.utils import build_continue_in_web_ui_id
from onyx.onyxbot.slack.utils import build_feedback_id
from onyx.onyxbot.slack.utils import build_publish_ephemeral_message_id
from onyx.onyxbot.slack.utils import remove_slack_text_interactions
from onyx.onyxbot.slack.utils import translate_vespa_highlight_to_slack
from onyx.utils.text_processing import decode_escapes

_MAX_BLURB_LEN = 45


def _format_doc_updated_at(updated_at: datetime | None) -> str | None:
    """Convert document timestamps to a human friendly relative string."""
    if updated_at is None:
        return None

    if updated_at.tzinfo is None or updated_at.tzinfo.utcoffset(updated_at) is None:
        aware_updated_at = updated_at.replace(tzinfo=pytz.utc)
    else:
        aware_updated_at = updated_at.astimezone(pytz.utc)

    return timeago.format(aware_updated_at, datetime.now(pytz.utc))


def get_feedback_reminder_blocks(thread_link: str, include_followup: bool) -> Block:
    text = (
        f"Please provide feedback on <{thread_link}|this answer>. "
        "This is essential to help us to improve the quality of the answers. "
        "Please rate it by clicking the `Helpful` or `Not helpful` button. "
    )
    if include_followup:
        text += "\n\nIf you need more help, click the `I need more help from a human!` button. "

    text += "\n\nThanks!"

    return SectionBlock(text=text)


def _split_text(text: str, limit: int = 3000) -> list[str]:
    if len(text) <= limit:
        return [text]

    chunks = []
    while text:
        if len(text) <= limit:
            chunks.append(text)
            break

        # Find the nearest space before the limit to avoid splitting a word
        split_at = text.rfind(" ", 0, limit)
        if split_at == -1:  # No spaces found, force split
            split_at = limit

        chunk = text[:split_at]
        chunks.append(chunk)
        text = text[split_at:].lstrip()  # Remove leading spaces from the next chunk

    return chunks


def _clean_markdown_link_text(text: str) -> str:
    # Remove any newlines within the text
    return format_slack_message(text).replace("\n", " ").strip()


def _build_qa_feedback_block(
    message_id: int, feedback_reminder_id: str | None = None
) -> Block:
    return ActionsBlock(
        block_id=build_feedback_id(message_id),
        elements=[
            ButtonElement(
                action_id=LIKE_BLOCK_ACTION_ID,
                text="👍 Helpful",
                value=feedback_reminder_id,
            ),
            ButtonElement(
                action_id=DISLIKE_BLOCK_ACTION_ID,
                text="👎 Not helpful",
                value=feedback_reminder_id,
            ),
        ],
    )


def _build_ephemeral_publication_block(
    channel_id: str,  # noqa: ARG001
    chat_message_id: int,
    message_info: SlackMessageInfo,
    original_question_ts: str,
    channel_conf: ChannelConfig,
    feedback_reminder_id: str | None = None,
) -> Block:
    # check whether the message is in a thread
    if (
        message_info is not None
        and message_info.msg_to_respond is not None
        and message_info.thread_to_respond is not None
        and (message_info.msg_to_respond == message_info.thread_to_respond)
    ):
        respond_ts = None
    else:
        respond_ts = original_question_ts

    action_values_ephemeral_message_channel_config = (
        ActionValuesEphemeralMessageChannelConfig(
            channel_name=channel_conf.get("channel_name"),
            respond_tag_only=channel_conf.get("respond_tag_only"),
            respond_to_bots=channel_conf.get("respond_to_bots"),
            is_ephemeral=channel_conf.get("is_ephemeral", False),
            respond_member_group_list=channel_conf.get("respond_member_group_list"),
            answer_filters=channel_conf.get("answer_filters"),
            follow_up_tags=channel_conf.get("follow_up_tags"),
            show_continue_in_web_ui=channel_conf.get("show_continue_in_web_ui", False),
        )
    )

    action_values_ephemeral_message_message_info = (
        ActionValuesEphemeralMessageMessageInfo(
            bypass_filters=message_info.bypass_filters,
            channel_to_respond=message_info.channel_to_respond,
            msg_to_respond=message_info.msg_to_respond,
            email=message_info.email,
            sender_id=message_info.sender_id,
            thread_messages=[],
            is_slash_command=message_info.is_slash_command,
            is_bot_dm=message_info.is_bot_dm,
            thread_to_respond=respond_ts,
        )
    )

    action_values_ephemeral_message = ActionValuesEphemeralMessage(
        original_question_ts=original_question_ts,
        feedback_reminder_id=feedback_reminder_id,
        chat_message_id=chat_message_id,
        message_info=action_values_ephemeral_message_message_info,
        channel_conf=action_values_ephemeral_message_channel_config,
    )

    return ActionsBlock(
        block_id=build_publish_ephemeral_message_id(original_question_ts),
        elements=[
            ButtonElement(
                action_id=SHOW_EVERYONE_ACTION_ID,
                text="📢 Share with Everyone",
                value=action_values_ephemeral_message.model_dump_json(),
            ),
            ButtonElement(
                action_id=KEEP_TO_YOURSELF_ACTION_ID,
                text="🤫  Keep to Yourself",
                value=action_values_ephemeral_message.model_dump_json(),
            ),
        ],
    )


def get_document_feedback_blocks() -> Block:
    return SectionBlock(
        text=(
            "- 'Up-Boost' if this document is a good source of information and should be "
            "shown more often.\n"
            "- 'Down-boost' if this document is a poor source of information and should be "
            "shown less often.\n"
            "- 'Hide' if this document is deprecated and should never be shown anymore."
        ),
        accessory=RadioButtonsElement(
            options=[
                Option(
                    text=":thumbsup: Up-Boost",
                    value=SearchFeedbackType.ENDORSE.value,
                ),
                Option(
                    text=":thumbsdown: Down-Boost",
                    value=SearchFeedbackType.REJECT.value,
                ),
                Option(
                    text=":x: Hide",
                    value=SearchFeedbackType.HIDE.value,
                ),
            ]
        ),
    )


def _build_doc_feedback_block(
    message_id: int,
    document_id: str,
    document_rank: int,
) -> ButtonElement:
    feedback_id = build_feedback_id(message_id, document_id, document_rank)
    return ButtonElement(
        action_id=FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID,
        value=feedback_id,
        text="Give Feedback",
    )


def get_restate_blocks(
    msg: str,
    is_slash_command: bool,
) -> list[Block]:
    # Only the slash command needs this context because the user doesn't see their own input
    if not is_slash_command:
        return []

    return [
        HeaderBlock(text="Responding to the Query"),
        SectionBlock(text=f"```{msg}```"),
    ]


def _build_documents_blocks(
    documents: list[SearchDoc],
    message_id: int | None,
    num_docs_to_display: int = ONYX_BOT_NUM_DOCS_TO_DISPLAY,
) -> list[Block]:
    header_text = "Reference Documents"
    seen_docs_identifiers = set()
    section_blocks: list[Block] = [HeaderBlock(text=header_text)]
    included_docs = 0
    for rank, d in enumerate(documents):
        if d.document_id in seen_docs_identifiers:
            continue
        seen_docs_identifiers.add(d.document_id)

        # Strip newlines from the semantic identifier for Slackbot formatting
        doc_sem_id = d.semantic_identifier.replace("\n", " ")
        if d.source_type == DocumentSource.SLACK.value:
            doc_sem_id = "#" + doc_sem_id

        used_chars = len(doc_sem_id) + 3
        match_str = translate_vespa_highlight_to_slack(d.match_highlights, used_chars)

        included_docs += 1

        header_line = f"{doc_sem_id}\n"
        if d.link:
            header_line = f"<{d.link}|{doc_sem_id}>\n"

        updated_at_line = ""
        updated_at_str = _format_doc_updated_at(d.updated_at)
        if updated_at_str:
            updated_at_line = f"_Updated {updated_at_str}_\n"

        body_text = f">{remove_slack_text_interactions(match_str)}"

        block_text = header_line + updated_at_line + body_text

        feedback: ButtonElement | dict = {}
        if message_id is not None:
            feedback = _build_doc_feedback_block(
                message_id=message_id,
                document_id=d.document_id,
                document_rank=rank,
            )

        section_blocks.append(
            SectionBlock(text=block_text, accessory=feedback),
        )

        section_blocks.append(DividerBlock())

        if included_docs >= num_docs_to_display:
            break

    return section_blocks


def _build_sources_blocks(
    cited_documents: list[tuple[int, SearchDoc]],
    num_docs_to_display: int = ONYX_BOT_NUM_DOCS_TO_DISPLAY,
) -> list[Block]:
    if not cited_documents:
        return [
            SectionBlock(
                text="*Warning*: no sources were cited for this answer, so it may be unreliable 😔"
            )
        ]

    seen_docs_identifiers = set()
    section_blocks: list[Block] = [SectionBlock(text="*Sources:*")]
    included_docs = 0
    for citation_num, d in cited_documents:
        if d.document_id in seen_docs_identifiers:
            continue
        seen_docs_identifiers.add(d.document_id)

        doc_sem_id = d.semantic_identifier
        if d.source_type == DocumentSource.SLACK.value:
            # for legacy reasons, before the switch to how Slack semantic identifiers are constructed
            if "#" not in doc_sem_id:
                doc_sem_id = "#" + doc_sem_id

        # this is needed to try and prevent the line from overflowing
        # if it does overflow, the image gets placed above the title and it
        # looks bad
        doc_sem_id = (
            doc_sem_id[:_MAX_BLURB_LEN] + "..."
            if len(doc_sem_id) > _MAX_BLURB_LEN
            else doc_sem_id
        )

        owner_str = f"By {d.primary_owners[0]}" if d.primary_owners else None
        days_ago_str = _format_doc_updated_at(d.updated_at)
        final_metadata_str = " | ".join(
            ([owner_str] if owner_str else [])
            + ([days_ago_str] if days_ago_str else [])
        )

        document_title = _clean_markdown_link_text(doc_sem_id)
        img_link = source_to_github_img_link(d.source_type)

        section_blocks.append(
            ContextBlock(
                elements=(
                    [
                        ImageElement(
                            image_url=img_link,
                            alt_text=f"{d.source_type.value} logo",
                        )
                    ]
                    if img_link
                    else []
                )
                + [
                    (
                        MarkdownTextObject(text=f"{document_title}")
                        if d.link == ""
                        else MarkdownTextObject(
                            text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}"
                        )
                    ),
                ]
            )
        )

        if included_docs >= num_docs_to_display:
            break

    return section_blocks


def _priority_ordered_documents_blocks(
    answer: ChatBasicResponse,
) -> list[Block]:
    top_docs = answer.top_documents if answer.top_documents else None
    if not top_docs:
        return []

    document_blocks = _build_documents_blocks(
        documents=top_docs,
        message_id=answer.message_id,
    )
    if document_blocks:
        document_blocks = [DividerBlock()] + document_blocks
    return document_blocks


def _build_citations_blocks(
    answer: ChatBasicResponse,
) -> list[Block]:
    top_docs = answer.top_documents
    citations = answer.citation_info or []
    cited_docs: list[tuple[int, SearchDoc]] = []
    for citation_info in citations:
        matching_doc = next(
            (d for d in top_docs if d.document_id == citation_info.document_id),
            None,
        )
        if matching_doc:
            cited_docs.append((citation_info.citation_number, matching_doc))

    cited_docs.sort()
    citations_block = _build_sources_blocks(cited_documents=cited_docs)
    return citations_block


def _build_main_response_blocks(
    answer: ChatBasicResponse,
) -> list[Block]:
    # TODO: add back in later when auto-filtering is implemented
    # if (
    #     retrieval_info.applied_time_cutoff
    #     or retrieval_info.recency_bias_multiplier > 1
    #     or retrieval_info.applied_source_filters
    # ):
    #     filter_text = "Filters: "
    #     if retrieval_info.applied_source_filters:
    #         sources_str = ", ".join(
    #             [s.value for s in retrieval_info.applied_source_filters]
    #         )
    #         filter_text += f"`Sources in [{sources_str}]`"
    #         if (
    #             retrieval_info.applied_time_cutoff
    #             or retrieval_info.recency_bias_multiplier > 1
    #         ):
    #             filter_text += " and "
    #     if retrieval_info.applied_time_cutoff is not None:
    #         time_str = retrieval_info.applied_time_cutoff.strftime("%b %d, %Y")
    #         filter_text += f"`Docs Updated >= {time_str}` "
    #     if retrieval_info.recency_bias_multiplier > 1:
    #         if retrieval_info.applied_time_cutoff is not None:
    #             filter_text += "+ "
    #         filter_text += "`Prioritize Recently Updated Docs`"

    #     filter_block = SectionBlock(text=f"_{filter_text}_")

    # replaces markdown links with slack format links
    formatted_answer = format_slack_message(answer.answer)
    answer_processed = decode_escapes(remove_slack_text_interactions(formatted_answer))
    answer_blocks = [SectionBlock(text=text) for text in _split_text(answer_processed)]

    return cast(list[Block], answer_blocks)


def _build_continue_in_web_ui_block(
    message_id: int | None,
) -> Block:
    if message_id is None:
        raise ValueError("No message id provided to build continue in web ui block")
    with get_session_with_current_tenant() as db_session:
        chat_session = get_chat_session_by_message_id(
            db_session=db_session,
            message_id=message_id,
        )
        return ActionsBlock(
            block_id=build_continue_in_web_ui_id(message_id),
            elements=[
                ButtonElement(
                    action_id=CONTINUE_IN_WEB_UI_ACTION_ID,
                    text="Continue Chat in Onyx!",
                    style="primary",
                    url=f"{WEB_DOMAIN}/chat?slackChatId={chat_session.id}",
                ),
            ],
        )


def _build_follow_up_block(message_id: int | None) -> ActionsBlock:
    return ActionsBlock(
        block_id=build_feedback_id(message_id) if message_id is not None else None,
        elements=[
            ButtonElement(
                action_id=IMMEDIATE_RESOLVED_BUTTON_ACTION_ID,
                style="primary",
                text="I'm all set!",
            ),
            ButtonElement(
                action_id=FOLLOWUP_BUTTON_ACTION_ID,
                style="danger",
                text="I need more help from a human!",
            ),
        ],
    )


def build_follow_up_resolved_blocks(
    tag_ids: list[str], group_ids: list[str]
) -> list[Block]:
    tag_str = " ".join([f"<@{tag}>" for tag in tag_ids])
    if tag_str:
        tag_str += " "

    group_str = " ".join([f"<!subteam^{group_id}|>" for group_id in group_ids])
    if group_str:
        group_str += " "

    text = (
        tag_str
        + group_str
        + "Someone has requested more help.\n\n:point_down:Please mark this resolved after answering!"
    )
    text_block = SectionBlock(text=text)
    button_block = ActionsBlock(
        elements=[
            ButtonElement(
                action_id=FOLLOWUP_BUTTON_RESOLVED_ACTION_ID,
                style="primary",
                text="Mark Resolved",
            )
        ]
    )
    return [text_block, button_block]


def build_slack_response_blocks(
    answer: ChatBasicResponse,
    message_info: SlackMessageInfo,
    channel_conf: ChannelConfig | None,
    feedback_reminder_id: str | None,
    skip_ai_feedback: bool = False,
    offer_ephemeral_publication: bool = False,
    skip_restated_question: bool = False,
) -> list[Block]:
    """
    This function is a top level function that builds all the blocks for the Slack response.
    It also handles combining all the blocks together.
    """
    # If called with the OnyxBot slash command, the question is lost so we have to reshow it
    if not skip_restated_question:
        restate_question_block = get_restate_blocks(
            message_info.thread_messages[-1].message, message_info.is_slash_command
        )
    else:
        restate_question_block = []

    answer_blocks = _build_main_response_blocks(answer)

    web_follow_up_block = []
    if channel_conf and channel_conf.get("show_continue_in_web_ui"):
        web_follow_up_block.append(
            _build_continue_in_web_ui_block(
                message_id=answer.message_id,
            )
        )

    follow_up_block = []
    if (
        channel_conf
        and channel_conf.get("follow_up_tags") is not None
        and not channel_conf.get("is_ephemeral", False)
    ):
        follow_up_block.append(_build_follow_up_block(message_id=answer.message_id))

    publish_ephemeral_message_block = []

    if (
        offer_ephemeral_publication
        and answer.message_id is not None
        and message_info.msg_to_respond is not None
        and channel_conf is not None
    ):
        publish_ephemeral_message_block.append(
            _build_ephemeral_publication_block(
                channel_id=message_info.channel_to_respond,
                chat_message_id=answer.message_id,
                original_question_ts=message_info.msg_to_respond,
                message_info=message_info,
                channel_conf=channel_conf,
                feedback_reminder_id=feedback_reminder_id,
            )
        )

    ai_feedback_block: list[Block] = []

    if answer.message_id is not None and not skip_ai_feedback:
        ai_feedback_block.append(
            _build_qa_feedback_block(
                message_id=answer.message_id,
                feedback_reminder_id=feedback_reminder_id,
            )
        )

    citations_blocks = []
    if answer.citation_info:
        citations_blocks = _build_citations_blocks(answer)

    citations_divider = [DividerBlock()] if citations_blocks else []
    buttons_divider = [DividerBlock()] if web_follow_up_block or follow_up_block else []

    all_blocks = (
        restate_question_block
        + answer_blocks
        + publish_ephemeral_message_block
        + ai_feedback_block
        + citations_divider
        + citations_blocks
        + buttons_divider
        + web_follow_up_block
        + follow_up_block
    )

    return all_blocks


================================================
FILE: backend/onyx/onyxbot/slack/config.py
================================================
import os

from sqlalchemy.orm import Session

from onyx.db.models import SlackChannelConfig
from onyx.db.slack_channel_config import (
    fetch_slack_channel_config_for_channel_or_default,
)
from onyx.db.slack_channel_config import fetch_slack_channel_configs

VALID_SLACK_FILTERS = [
    "answerable_prefilter",
    "well_answered_postfilter",
    "questionmark_prefilter",
]


def get_slack_channel_config_for_bot_and_channel(
    db_session: Session,
    slack_bot_id: int,
    channel_name: str | None,
) -> SlackChannelConfig:
    slack_bot_config = fetch_slack_channel_config_for_channel_or_default(
        db_session=db_session, slack_bot_id=slack_bot_id, channel_name=channel_name
    )
    if not slack_bot_config:
        raise ValueError(
            "No default configuration has been set for this Slack bot. This should not be possible."
        )

    return slack_bot_config


def validate_channel_name(
    db_session: Session,
    current_slack_bot_id: int,
    channel_name: str,
    current_slack_channel_config_id: int | None,
) -> str:
    """Make sure that this channel_name does not exist in other Slack channel configs.
    Returns a cleaned up channel name (e.g. '#' removed if present)"""
    slack_bot_configs = fetch_slack_channel_configs(
        db_session=db_session,
        slack_bot_id=current_slack_bot_id,
    )
    cleaned_channel_name = channel_name.lstrip("#").lower()
    for slack_channel_config in slack_bot_configs:
        if slack_channel_config.id == current_slack_channel_config_id:
            continue

        if cleaned_channel_name == slack_channel_config.channel_config["channel_name"]:
            raise ValueError(
                f"Channel name '{channel_name}' already exists in "
                "another Slack channel config with in Slack Bot with name: "
                f"{slack_channel_config.slack_bot.name}"
            )

    return cleaned_channel_name


# Scaling configurations for multi-tenant Slack channel handling
TENANT_LOCK_EXPIRATION = 1800  # How long a pod can hold exclusive access to a tenant before other pods can acquire it
TENANT_HEARTBEAT_INTERVAL = (
    15  # How often pods send heartbeats to indicate they are still processing a tenant
)
TENANT_HEARTBEAT_EXPIRATION = (
    60  # How long before a tenant's heartbeat expires, allowing other pods to take over
)
TENANT_ACQUISITION_INTERVAL = 60  # How often pods attempt to acquire unprocessed tenants and checks for new tokens

MAX_TENANTS_PER_POD = int(os.getenv("MAX_TENANTS_PER_POD", 50))


================================================
FILE: backend/onyx/onyxbot/slack/constants.py
================================================
import re
from enum import Enum

# Matches Slack channel references like <#C097NBWMY8Y> or <#C097NBWMY8Y|channel-name>
SLACK_CHANNEL_REF_PATTERN = re.compile(r"<#([A-Z0-9]+)(?:\|([^>]+))?>")

LIKE_BLOCK_ACTION_ID = "feedback-like"
DISLIKE_BLOCK_ACTION_ID = "feedback-dislike"
SHOW_EVERYONE_ACTION_ID = "show-everyone"
KEEP_TO_YOURSELF_ACTION_ID = "keep-to-yourself"
CONTINUE_IN_WEB_UI_ACTION_ID = "continue-in-web-ui"
FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID = "feedback-doc-button"
IMMEDIATE_RESOLVED_BUTTON_ACTION_ID = "immediate-resolved-button"
FOLLOWUP_BUTTON_ACTION_ID = "followup-button"
FOLLOWUP_BUTTON_RESOLVED_ACTION_ID = "followup-resolved-button"
VIEW_DOC_FEEDBACK_ID = "view-doc-feedback"
GENERATE_ANSWER_BUTTON_ACTION_ID = "generate-answer-button"


class FeedbackVisibility(str, Enum):
    PRIVATE = "private"
    ANONYMOUS = "anonymous"
    PUBLIC = "public"


================================================
FILE: backend/onyx/onyxbot/slack/formatting.py
================================================
import re
from collections.abc import Callable
from typing import Any

from mistune import create_markdown
from mistune import HTMLRenderer

# Tags that should be replaced with a newline (line-break and block-level elements)
_HTML_NEWLINE_TAG_PATTERN = re.compile(
    r"<br\s*/?>|</(?:p|div|li|h[1-6]|tr|blockquote|section|article)>",
    re.IGNORECASE,
)

# Strips HTML tags but excludes autolinks like <https://...> and <mailto:...>
_HTML_TAG_PATTERN = re.compile(
    r"<(?!https?://|mailto:)/?[a-zA-Z][^>]*>",
)

# Matches fenced code blocks (``` ... ```) so we can skip sanitization inside them
_FENCED_CODE_BLOCK_PATTERN = re.compile(r"```[\s\S]*?```")

# Matches the start of any markdown link: [text]( or [[n]](
# The inner group handles nested brackets for citation links like [[1]](.
_MARKDOWN_LINK_PATTERN = re.compile(r"\[(?:[^\[\]]|\[[^\]]*\])*\]\(")

# Matches Slack-style links <url|text> that LLMs sometimes output directly.
# Mistune doesn't recognise this syntax, so text() would escape the angle
# brackets and Slack would render them as literal text instead of links.
_SLACK_LINK_PATTERN = re.compile(r"<(https?://[^|>]+)\|([^>]+)>")


def _sanitize_html(text: str) -> str:
    """Strip HTML tags from a text fragment.

    Block-level closing tags and <br> are converted to newlines.
    All other HTML tags are removed. Autolinks (<https://...>) are preserved.
    """
    text = _HTML_NEWLINE_TAG_PATTERN.sub("\n", text)
    text = _HTML_TAG_PATTERN.sub("", text)
    return text


def _transform_outside_code_blocks(
    message: str, transform: Callable[[str], str]
) -> str:
    """Apply *transform* only to text outside fenced code blocks."""
    parts = _FENCED_CODE_BLOCK_PATTERN.split(message)
    code_blocks = _FENCED_CODE_BLOCK_PATTERN.findall(message)

    result: list[str] = []
    for i, part in enumerate(parts):
        result.append(transform(part))
        if i < len(code_blocks):
            result.append(code_blocks[i])

    return "".join(result)


def _extract_link_destination(message: str, start_idx: int) -> tuple[str, int | None]:
    """Extract markdown link destination, allowing nested parentheses in the URL."""
    depth = 0
    i = start_idx

    while i < len(message):
        curr = message[i]
        if curr == "\\":
            i += 2
            continue

        if curr == "(":
            depth += 1
        elif curr == ")":
            if depth == 0:
                return message[start_idx:i], i
            depth -= 1
        i += 1

    return message[start_idx:], None


def _normalize_link_destinations(message: str) -> str:
    """Wrap markdown link URLs in angle brackets so the parser handles special chars safely.

    Markdown link syntax [text](url) breaks when the URL contains unescaped
    parentheses, spaces, or other special characters. Wrapping the URL in angle
    brackets — [text](<url>) — tells the parser to treat everything inside as
    a literal URL. This applies to all links, not just citations.
    """
    if "](" not in message:
        return message

    normalized_parts: list[str] = []
    cursor = 0

    while match := _MARKDOWN_LINK_PATTERN.search(message, cursor):
        normalized_parts.append(message[cursor : match.end()])
        destination_start = match.end()
        destination, end_idx = _extract_link_destination(message, destination_start)
        if end_idx is None:
            normalized_parts.append(message[destination_start:])
            return "".join(normalized_parts)

        already_wrapped = destination.startswith("<") and destination.endswith(">")
        if destination and not already_wrapped:
            destination = f"<{destination}>"

        normalized_parts.append(destination)
        normalized_parts.append(")")
        cursor = end_idx + 1

    normalized_parts.append(message[cursor:])
    return "".join(normalized_parts)


def _convert_slack_links_to_markdown(message: str) -> str:
    """Convert Slack-style <url|text> links to standard markdown [text](url).

    LLMs sometimes emit Slack mrkdwn link syntax directly. Mistune doesn't
    recognise it, so the angle brackets would be escaped by text() and Slack
    would render the link as literal text instead of a clickable link.
    """
    return _transform_outside_code_blocks(
        message, lambda text: _SLACK_LINK_PATTERN.sub(r"[\2](\1)", text)
    )


def format_slack_message(message: str | None) -> str:
    if message is None:
        return ""
    message = _transform_outside_code_blocks(message, _sanitize_html)
    message = _convert_slack_links_to_markdown(message)
    normalized_message = _normalize_link_destinations(message)
    md = create_markdown(renderer=SlackRenderer(), plugins=["strikethrough", "table"])
    result = md(normalized_message)
    # With HTMLRenderer, result is always str (not AST list)
    assert isinstance(result, str)
    return result.rstrip("\n")


class SlackRenderer(HTMLRenderer):
    """Renders markdown as Slack mrkdwn format instead of HTML.

    Overrides all HTMLRenderer methods that produce HTML tags to ensure
    no raw HTML ever appears in Slack messages.
    """

    SPECIALS: dict[str, str] = {"&": "&amp;", "<": "&lt;", ">": "&gt;"}

    def __init__(self) -> None:
        super().__init__()
        self._table_headers: list[str] = []
        self._current_row_cells: list[str] = []

    def escape_special(self, text: str) -> str:
        for special, replacement in self.SPECIALS.items():
            text = text.replace(special, replacement)
        return text

    def heading(self, text: str, level: int, **attrs: Any) -> str:  # noqa: ARG002
        return f"*{text}*\n\n"

    def emphasis(self, text: str) -> str:
        return f"_{text}_"

    def strong(self, text: str) -> str:
        return f"*{text}*"

    def strikethrough(self, text: str) -> str:
        return f"~{text}~"

    def list(self, text: str, ordered: bool, **attrs: Any) -> str:  # noqa: ARG002
        lines = text.split("\n")
        count = 0
        for i, line in enumerate(lines):
            if line.startswith("li: "):
                count += 1
                prefix = f"{count}. " if ordered else "• "
                lines[i] = f"{prefix}{line[4:]}"
        return "\n".join(lines) + "\n"

    def list_item(self, text: str) -> str:
        return f"li: {text}\n"

    def link(self, text: str, url: str, title: str | None = None) -> str:
        escaped_url = self.escape_special(url)
        if text:
            return f"<{escaped_url}|{text}>"
        if title:
            return f"<{escaped_url}|{title}>"
        return f"<{escaped_url}>"

    def image(self, text: str, url: str, title: str | None = None) -> str:
        escaped_url = self.escape_special(url)
        display_text = title or text
        return f"<{escaped_url}|{display_text}>" if display_text else f"<{escaped_url}>"

    def codespan(self, text: str) -> str:
        return f"`{text}`"

    def block_code(self, code: str, info: str | None = None) -> str:  # noqa: ARG002
        return f"```\n{code.rstrip(chr(10))}\n```\n\n"

    def linebreak(self) -> str:
        return "\n"

    def thematic_break(self) -> str:
        return "---\n\n"

    def block_quote(self, text: str) -> str:
        lines = text.strip().split("\n")
        quoted = "\n".join(f">{line}" for line in lines)
        return quoted + "\n\n"

    def block_html(self, html: str) -> str:
        return _sanitize_html(html) + "\n\n"

    def block_error(self, text: str) -> str:
        return f"```\n{text}\n```\n\n"

    def text(self, text: str) -> str:
        # Only escape the three entities Slack recognizes: & < >
        # HTMLRenderer.text() also escapes " to &quot; which Slack renders
        # as literal &quot; text since Slack doesn't recognize that entity.
        return self.escape_special(text)

    # -- Table rendering (converts markdown tables to vertical cards) --

    def table_cell(
        self,
        text: str,
        align: str | None = None,  # noqa: ARG002
        head: bool = False,  # noqa: ARG002
    ) -> str:
        if head:
            self._table_headers.append(text.strip())
        else:
            self._current_row_cells.append(text.strip())
        return ""

    def table_head(self, text: str) -> str:  # noqa: ARG002
        self._current_row_cells = []
        return ""

    def table_row(self, text: str) -> str:  # noqa: ARG002
        cells = self._current_row_cells
        self._current_row_cells = []
        # First column becomes the bold title, remaining columns are bulleted fields
        lines: list[str] = []
        if cells:
            title = cells[0]
            if title:
                # Avoid double-wrapping if cell already contains bold markup
                if title.startswith("*") and title.endswith("*") and len(title) > 1:
                    lines.append(title)
                else:
                    lines.append(f"*{title}*")
            for i, cell in enumerate(cells[1:], start=1):
                if i < len(self._table_headers):
                    lines.append(f"  • {self._table_headers[i]}: {cell}")
                else:
                    lines.append(f"  • {cell}")
        return "\n".join(lines) + "\n\n"

    def table_body(self, text: str) -> str:
        return text

    def table(self, text: str) -> str:
        self._table_headers = []
        self._current_row_cells = []
        return text + "\n"

    def paragraph(self, text: str) -> str:
        return f"{text}\n\n"


================================================
FILE: backend/onyx/onyxbot/slack/handlers/__init__.py
================================================


================================================
FILE: backend/onyx/onyxbot/slack/handlers/handle_buttons.py
================================================
import json
from typing import Any
from typing import cast

from slack_sdk import WebClient
from slack_sdk.models.blocks import SectionBlock
from slack_sdk.models.views import View
from slack_sdk.socket_mode.request import SocketModeRequest
from slack_sdk.webhook import WebhookClient

from onyx.chat.models import ChatBasicResponse
from onyx.chat.process_message import remove_answer_citations
from onyx.configs.constants import MessageType
from onyx.configs.constants import SearchFeedbackType
from onyx.configs.onyxbot_configs import ONYX_BOT_FOLLOWUP_EMOJI
from onyx.connectors.slack.utils import expert_info_from_slack_id
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc
from onyx.db.chat import get_chat_message
from onyx.db.chat import translate_db_message_to_chat_message_detail
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.feedback import create_chat_message_feedback
from onyx.db.feedback import create_doc_retrieval_feedback
from onyx.db.users import get_user_by_email
from onyx.onyxbot.slack.blocks import build_follow_up_resolved_blocks
from onyx.onyxbot.slack.blocks import build_slack_response_blocks
from onyx.onyxbot.slack.blocks import get_document_feedback_blocks
from onyx.onyxbot.slack.config import get_slack_channel_config_for_bot_and_channel
from onyx.onyxbot.slack.constants import DISLIKE_BLOCK_ACTION_ID
from onyx.onyxbot.slack.constants import FeedbackVisibility
from onyx.onyxbot.slack.constants import KEEP_TO_YOURSELF_ACTION_ID
from onyx.onyxbot.slack.constants import LIKE_BLOCK_ACTION_ID
from onyx.onyxbot.slack.constants import SHOW_EVERYONE_ACTION_ID
from onyx.onyxbot.slack.constants import VIEW_DOC_FEEDBACK_ID
from onyx.onyxbot.slack.handlers.handle_message import (
    remove_scheduled_feedback_reminder,
)
from onyx.onyxbot.slack.handlers.handle_regular_answer import (
    handle_regular_answer,
)
from onyx.onyxbot.slack.models import SlackMessageInfo
from onyx.onyxbot.slack.utils import build_feedback_id
from onyx.onyxbot.slack.utils import decompose_action_id
from onyx.onyxbot.slack.utils import fetch_group_ids_from_names
from onyx.onyxbot.slack.utils import fetch_slack_user_ids_from_emails
from onyx.onyxbot.slack.utils import get_channel_name_from_id
from onyx.onyxbot.slack.utils import get_feedback_visibility
from onyx.onyxbot.slack.utils import read_slack_thread
from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
from onyx.onyxbot.slack.utils import TenantSocketModeClient
from onyx.onyxbot.slack.utils import update_emote_react
from onyx.server.query_and_chat.models import ChatMessageDetail
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.utils.logger import setup_logger


logger = setup_logger()


def _convert_document_ids_to_citation_info(
    citation_dict: dict[int, str], top_documents: list[SavedSearchDoc]
) -> list[CitationInfo]:
    citation_list_with_document_id = []
    # Build a set of valid document_ids from top_documents for validation
    valid_document_ids = {doc.document_id for doc in top_documents}

    for citation_num, document_id in citation_dict.items():
        if document_id is not None and document_id in valid_document_ids:
            citation_list_with_document_id.append(
                CitationInfo(
                    citation_number=citation_num,
                    document_id=document_id,
                )
            )
    return citation_list_with_document_id


def _build_citation_list(chat_message_detail: ChatMessageDetail) -> list[CitationInfo]:
    citation_dict = chat_message_detail.citations
    if citation_dict is None:
        return []
    else:
        top_documents = (
            chat_message_detail.context_docs if chat_message_detail.context_docs else []
        )
        citation_list = _convert_document_ids_to_citation_info(
            citation_dict, top_documents
        )
        return citation_list


def handle_doc_feedback_button(
    req: SocketModeRequest,
    client: TenantSocketModeClient,
) -> None:
    if not (actions := req.payload.get("actions")):
        logger.error("Missing actions. Unable to build the source feedback view")
        return

    # Extracts the feedback_id coming from the 'source feedback' button
    # and generates a new one for the View, to keep track of the doc info
    query_event_id, doc_id, doc_rank = decompose_action_id(actions[0].get("value"))
    external_id = build_feedback_id(query_event_id, doc_id, doc_rank)

    channel_id = req.payload["container"]["channel_id"]
    thread_ts = req.payload["container"].get("thread_ts", None)

    data = View(
        type="modal",
        callback_id=VIEW_DOC_FEEDBACK_ID,
        external_id=external_id,
        # We use the private metadata to keep track of the channel id and thread ts
        private_metadata=f"{channel_id}_{thread_ts}",
        title="Give Feedback",
        blocks=[get_document_feedback_blocks()],
        submit="send",
        close="cancel",
    )

    client.web_client.views_open(
        trigger_id=req.payload["trigger_id"], view=data.to_dict()
    )


def handle_generate_answer_button(
    req: SocketModeRequest,
    client: TenantSocketModeClient,
) -> None:
    channel_id = req.payload["channel"]["id"]
    channel_name = req.payload["channel"]["name"]
    message_ts = req.payload["message"]["ts"]
    thread_ts = req.payload["container"].get("thread_ts", None)
    user_id = req.payload["user"]["id"]
    expert_info = expert_info_from_slack_id(user_id, client.web_client, user_cache={})
    email = expert_info.email if expert_info else None

    if not thread_ts:
        raise ValueError("Missing thread_ts in the payload")

    thread_messages = read_slack_thread(
        tenant_id=client._tenant_id,
        channel=channel_id,
        thread=thread_ts,
        client=client.web_client,
    )
    # remove all assistant messages till we get to the last user message
    # we want the new answer to be generated off of the last "question" in
    # the thread
    for i in range(len(thread_messages) - 1, -1, -1):
        if thread_messages[i].role == MessageType.USER:
            break
        if thread_messages[i].role == MessageType.ASSISTANT:
            thread_messages.pop(i)

    # tell the user that we're working on it
    # Send an ephemeral message to the user that we're generating the answer
    respond_in_thread_or_channel(
        client=client.web_client,
        channel=channel_id,
        receiver_ids=[user_id],
        text="I'm working on generating a full answer for you. This may take a moment...",
        thread_ts=thread_ts,
    )

    with get_session_with_current_tenant() as db_session:
        slack_channel_config = get_slack_channel_config_for_bot_and_channel(
            db_session=db_session,
            slack_bot_id=client.slack_bot_id,
            channel_name=channel_name,
        )

        handle_regular_answer(
            message_info=SlackMessageInfo(
                thread_messages=thread_messages,
                channel_to_respond=channel_id,
                msg_to_respond=cast(str, message_ts or thread_ts),
                thread_to_respond=cast(str, thread_ts or message_ts),
                sender_id=user_id or None,
                email=email or None,
                bypass_filters=True,
                is_slash_command=False,
                is_bot_dm=False,
            ),
            slack_channel_config=slack_channel_config,
            receiver_ids=None,
            client=client.web_client,
            channel=channel_id,
            logger=logger,
            feedback_reminder_id=None,
        )


def handle_publish_ephemeral_message_button(
    req: SocketModeRequest,
    client: TenantSocketModeClient,
    action_id: str,
) -> None:
    """
    This function handles the Share with Everyone/Keep for Yourself buttons
    for ephemeral messages.
    """
    channel_id = req.payload["channel"]["id"]
    ephemeral_message_ts = req.payload["container"]["message_ts"]

    slack_sender_id = req.payload["user"]["id"]
    response_url = req.payload["response_url"]
    webhook = WebhookClient(url=response_url)

    # The additional data required that was added to buttons.
    # Specifically, this contains the message_info, channel_conf information
    # and some additional attributes.
    value_dict = json.loads(req.payload["actions"][0]["value"])

    original_question_ts = value_dict.get("original_question_ts")
    if not original_question_ts:
        raise ValueError("Missing original_question_ts in the payload")
    if not ephemeral_message_ts:
        raise ValueError("Missing ephemeral_message_ts in the payload")

    feedback_reminder_id = value_dict.get("feedback_reminder_id")

    slack_message_info = SlackMessageInfo(**value_dict["message_info"])
    channel_conf = value_dict.get("channel_conf")

    user_email = value_dict.get("message_info", {}).get("email")

    chat_message_id = value_dict.get("chat_message_id")

    # Obtain onyx_user and chat_message information
    if not chat_message_id:
        raise ValueError("Missing chat_message_id in the payload")

    with get_session_with_current_tenant() as db_session:
        onyx_user = get_user_by_email(user_email, db_session)
        if not onyx_user:
            raise ValueError("Cannot determine onyx_user_id from email in payload")
        try:
            chat_message = get_chat_message(chat_message_id, onyx_user.id, db_session)
        except ValueError:
            chat_message = get_chat_message(
                chat_message_id, None, db_session
            )  # is this good idea?
        except Exception as e:
            logger.error(f"Failed to get chat message: {e}")
            raise e

        chat_message_detail = translate_db_message_to_chat_message_detail(chat_message)

        # construct the proper citation format and then the answer in the suitable format
        # we need to construct the blocks.
        citation_list = _build_citation_list(chat_message_detail)

        if chat_message_detail.context_docs:
            top_documents: list[SearchDoc] = [
                SearchDoc.from_saved_search_doc(doc)
                for doc in chat_message_detail.context_docs
            ]
        else:
            top_documents = []

        onyx_bot_answer = ChatBasicResponse(
            answer=chat_message_detail.message,
            answer_citationless=remove_answer_citations(chat_message_detail.message),
            top_documents=top_documents,
            message_id=chat_message_id,
            error_msg=None,
            citation_info=citation_list,
        )

    # Note: we need to use the webhook and the respond_url to update/delete ephemeral messages
    if action_id == SHOW_EVERYONE_ACTION_ID:
        # Convert to non-ephemeral message in thread
        try:
            webhook.send(
                response_type="ephemeral",
                text="",
                blocks=[],
                replace_original=True,
                delete_original=True,
            )
        except Exception as e:
            logger.error(f"Failed to send webhook: {e}")

        # remove handling of empheremal block and add AI feedback.
        all_blocks = build_slack_response_blocks(
            answer=onyx_bot_answer,
            message_info=slack_message_info,
            channel_conf=channel_conf,
            feedback_reminder_id=feedback_reminder_id,
            skip_ai_feedback=False,
            offer_ephemeral_publication=False,
            skip_restated_question=True,
        )
        try:
            # Post in thread as non-ephemeral message
            respond_in_thread_or_channel(
                client=client.web_client,
                channel=channel_id,
                receiver_ids=None,  # If respond_member_group_list is set, send to them. TODO: check!
                text="Hello! Onyx has some results for you!",
                blocks=all_blocks,
                thread_ts=original_question_ts,
                # don't unfurl, since otherwise we will have 5+ previews which makes the message very long
                unfurl=False,
                send_as_ephemeral=False,
            )
        except Exception as e:
            logger.error(f"Failed to publish ephemeral message: {e}")
            raise e

    elif action_id == KEEP_TO_YOURSELF_ACTION_ID:
        # Keep as ephemeral message in channel or thread, but remove the publish button and add feedback button

        changed_blocks = build_slack_response_blocks(
            answer=onyx_bot_answer,
            message_info=slack_message_info,
            channel_conf=channel_conf,
            feedback_reminder_id=feedback_reminder_id,
            skip_ai_feedback=False,
            offer_ephemeral_publication=False,
            skip_restated_question=True,
        )

        try:
            if slack_message_info.thread_to_respond is not None:
                # There seems to be a bug in slack where an update within the thread
                # actually leads to the update to be posted in the channel. Therefore,
                # for now we delete the original ephemeral message and post a new one
                # if the ephemeral message is in a thread.
                webhook.send(
                    response_type="ephemeral",
                    text="",
                    blocks=[],
                    replace_original=True,
                    delete_original=True,
                )

                respond_in_thread_or_channel(
                    client=client.web_client,
                    channel=channel_id,
                    receiver_ids=[slack_sender_id],
                    text="Your personal response, sent as an ephemeral message.",
                    blocks=changed_blocks,
                    thread_ts=original_question_ts,
                    # don't unfurl, since otherwise we will have 5+ previews which makes the message very long
                    unfurl=False,
                    send_as_ephemeral=True,
                )
            else:
                # This works fine if the ephemeral message is in the channel
                webhook.send(
                    response_type="ephemeral",
                    text="Your personal response, sent as an ephemeral message.",
                    blocks=changed_blocks,
                    replace_original=True,
                    delete_original=False,
                )
        except Exception as e:
            logger.error(f"Failed to send webhook: {e}")


def handle_slack_feedback(
    feedback_id: str,
    feedback_type: str,
    feedback_msg_reminder: str,
    client: WebClient,
    user_id_to_post_confirmation: str,
    channel_id_to_post_confirmation: str,
    thread_ts_to_post_confirmation: str,
) -> None:
    message_id, doc_id, doc_rank = decompose_action_id(feedback_id)

    # Get Onyx user from Slack ID
    expert_info = expert_info_from_slack_id(
        user_id_to_post_confirmation, client, user_cache={}
    )
    email = expert_info.email if expert_info else None

    with get_session_with_current_tenant() as db_session:
        onyx_user = get_user_by_email(email, db_session) if email else None
        if feedback_type in [LIKE_BLOCK_ACTION_ID, DISLIKE_BLOCK_ACTION_ID]:
            create_chat_message_feedback(
                is_positive=feedback_type == LIKE_BLOCK_ACTION_ID,
                feedback_text="",
                chat_message_id=message_id,
                user_id=onyx_user.id if onyx_user else None,
                db_session=db_session,
            )
            remove_scheduled_feedback_reminder(
                client=client,
                channel=user_id_to_post_confirmation,
                msg_id=feedback_msg_reminder,
            )
        elif feedback_type in [
            SearchFeedbackType.ENDORSE.value,
            SearchFeedbackType.REJECT.value,
            SearchFeedbackType.HIDE.value,
        ]:
            if doc_id is None or doc_rank is None:
                raise ValueError("Missing information for Document Feedback")

            if feedback_type == SearchFeedbackType.ENDORSE.value:
                feedback = SearchFeedbackType.ENDORSE
            elif feedback_type == SearchFeedbackType.REJECT.value:
                feedback = SearchFeedbackType.REJECT
            else:
                feedback = SearchFeedbackType.HIDE

            create_doc_retrieval_feedback(
                message_id=message_id,
                document_id=doc_id,
                document_rank=doc_rank,
                db_session=db_session,
                clicked=False,  # Not tracking this for Slack
                feedback=feedback,
            )
        else:
            logger.error(f"Feedback type '{feedback_type}' not supported")

    if get_feedback_visibility() == FeedbackVisibility.PRIVATE or feedback_type not in [
        LIKE_BLOCK_ACTION_ID,
        DISLIKE_BLOCK_ACTION_ID,
    ]:
        client.chat_postEphemeral(
            channel=channel_id_to_post_confirmation,
            user=user_id_to_post_confirmation,
            thread_ts=thread_ts_to_post_confirmation,
            text="Thanks for your feedback!",
        )
    else:
        feedback_response_txt = (
            "liked" if feedback_type == LIKE_BLOCK_ACTION_ID else "disliked"
        )

        if get_feedback_visibility() == FeedbackVisibility.ANONYMOUS:
            msg = f"A user has {feedback_response_txt} the AI Answer"
        else:
            msg = f"<@{user_id_to_post_confirmation}> has {feedback_response_txt} the AI Answer"

        respond_in_thread_or_channel(
            client=client,
            channel=channel_id_to_post_confirmation,
            text=msg,
            thread_ts=thread_ts_to_post_confirmation,
            unfurl=False,
        )


def handle_followup_button(
    req: SocketModeRequest,
    client: TenantSocketModeClient,
) -> None:
    action_id = None
    if actions := req.payload.get("actions"):
        action = cast(dict[str, Any], actions[0])
        action_id = cast(str, action.get("block_id"))

    channel_id = req.payload["container"]["channel_id"]
    thread_ts = req.payload["container"].get("thread_ts", None)

    update_emote_react(
        emoji=ONYX_BOT_FOLLOWUP_EMOJI,
        channel=channel_id,
        message_ts=thread_ts,
        remove=False,
        client=client.web_client,
    )

    tag_ids: list[str] = []
    group_ids: list[str] = []
    with get_session_with_current_tenant() as db_session:
        channel_name, is_dm = get_channel_name_from_id(
            client=client.web_client, channel_id=channel_id
        )
        slack_channel_config = get_slack_channel_config_for_bot_and_channel(
            db_session=db_session,
            slack_bot_id=client.slack_bot_id,
            channel_name=channel_name,
        )
        if slack_channel_config:
            tag_names = slack_channel_config.channel_config.get("follow_up_tags")
            remaining = None
            if tag_names:
                tag_ids, remaining = fetch_slack_user_ids_from_emails(
                    tag_names, client.web_client
                )
            if remaining:
                group_ids, _ = fetch_group_ids_from_names(remaining, client.web_client)

    blocks = build_follow_up_resolved_blocks(tag_ids=tag_ids, group_ids=group_ids)

    respond_in_thread_or_channel(
        client=client.web_client,
        channel=channel_id,
        text="Received your request for more help",
        blocks=blocks,
        thread_ts=thread_ts,
        unfurl=False,
    )

    if action_id is not None:
        message_id, _, _ = decompose_action_id(action_id)

        create_chat_message_feedback(
            is_positive=None,
            feedback_text="",
            chat_message_id=message_id,
            user_id=None,  # no "user" for Slack bot for now
            db_session=db_session,
            required_followup=True,
        )


def get_clicker_name(
    req: SocketModeRequest,
    client: TenantSocketModeClient,
) -> str:
    clicker_name = req.payload.get("user", {}).get("name", "Someone")
    clicker_real_name = None
    try:
        clicker = client.web_client.users_info(user=req.payload["user"]["id"])
        clicker_real_name = (
            cast(dict, clicker.data).get("user", {}).get("profile", {}).get("real_name")
        )
    except Exception:
        # Likely a scope issue
        pass

    if clicker_real_name:
        clicker_name = clicker_real_name

    return clicker_name


def handle_followup_resolved_button(
    req: SocketModeRequest,
    client: TenantSocketModeClient,
    immediate: bool = False,
) -> None:
    channel_id = req.payload["container"]["channel_id"]
    message_ts = req.payload["container"]["message_ts"]
    thread_ts = req.payload["container"].get("thread_ts", None)

    clicker_name = get_clicker_name(req, client)

    update_emote_react(
        emoji=ONYX_BOT_FOLLOWUP_EMOJI,
        channel=channel_id,
        message_ts=thread_ts,
        remove=True,
        client=client.web_client,
    )

    # Delete the message with the option to mark resolved
    if not immediate:
        response = client.web_client.chat_delete(
            channel=channel_id,
            ts=message_ts,
        )

        if not response.get("ok"):
            logger.error("Unable to delete message for resolved")

    if immediate:
        msg_text = f"{clicker_name} has marked this question as resolved!"
    else:
        msg_text = (
            f"{clicker_name} has marked this question as resolved! "
            f'\n\n You can always click the "I need more help button" to let the team '
            f"know that your problem still needs attention."
        )

    resolved_block = SectionBlock(text=msg_text)

    respond_in_thread_or_channel(
        client=client.web_client,
        channel=channel_id,
        text="Your request for help as been addressed!",
        blocks=[resolved_block],
        thread_ts=thread_ts,
        unfurl=False,
    )


================================================
FILE: backend/onyx/onyxbot/slack/handlers/handle_message.py
================================================
import datetime

from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

from onyx.configs.onyxbot_configs import ONYX_BOT_FEEDBACK_REMINDER
from onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccountType
from onyx.db.models import SlackChannelConfig
from onyx.db.user_preferences import activate_user
from onyx.db.users import add_slack_user_if_not_exists
from onyx.db.users import get_user_by_email
from onyx.onyxbot.slack.blocks import get_feedback_reminder_blocks
from onyx.onyxbot.slack.handlers.handle_regular_answer import (
    handle_regular_answer,
)
from onyx.onyxbot.slack.handlers.handle_standard_answers import (
    handle_standard_answers,
)
from onyx.onyxbot.slack.models import SlackMessageInfo
from onyx.onyxbot.slack.utils import fetch_slack_user_ids_from_emails
from onyx.onyxbot.slack.utils import fetch_user_ids_from_groups
from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
from onyx.onyxbot.slack.utils import slack_usage_report
from onyx.onyxbot.slack.utils import update_emote_react
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from shared_configs.configs import SLACK_CHANNEL_ID

logger_base = setup_logger()


def send_msg_ack_to_user(details: SlackMessageInfo, client: WebClient) -> None:
    if details.is_slash_command and details.sender_id:
        respond_in_thread_or_channel(
            client=client,
            channel=details.channel_to_respond,
            thread_ts=details.msg_to_respond,
            receiver_ids=[details.sender_id],
            text="Hi, we're evaluating your query :face_with_monocle:",
        )
        return

    update_emote_react(
        emoji=ONYX_BOT_REACT_EMOJI,
        channel=details.channel_to_respond,
        message_ts=details.msg_to_respond,
        remove=False,
        client=client,
    )


def schedule_feedback_reminder(
    details: SlackMessageInfo, include_followup: bool, client: WebClient
) -> str | None:
    logger = setup_logger(extra={SLACK_CHANNEL_ID: details.channel_to_respond})

    if not ONYX_BOT_FEEDBACK_REMINDER:
        logger.info("Scheduled feedback reminder disabled...")
        return None

    try:
        permalink = client.chat_getPermalink(
            channel=details.channel_to_respond,
            message_ts=details.msg_to_respond,  # type:ignore
        )
    except SlackApiError as e:
        logger.error(f"Unable to generate the feedback reminder permalink: {e}")
        return None

    now = datetime.datetime.now()
    future = now + datetime.timedelta(minutes=ONYX_BOT_FEEDBACK_REMINDER)

    try:
        response = client.chat_scheduleMessage(
            channel=details.sender_id,  # type:ignore
            post_at=int(future.timestamp()),
            blocks=[
                get_feedback_reminder_blocks(
                    thread_link=permalink.data["permalink"],  # type:ignore
                    include_followup=include_followup,
                )
            ],
            text="",
        )
        logger.info("Scheduled feedback reminder configured")
        return response.data["scheduled_message_id"]  # type:ignore
    except SlackApiError as e:
        logger.error(f"Unable to generate the feedback reminder message: {e}")
        return None


def remove_scheduled_feedback_reminder(
    client: WebClient, channel: str | None, msg_id: str
) -> None:
    logger = setup_logger(extra={SLACK_CHANNEL_ID: channel})

    try:
        client.chat_deleteScheduledMessage(
            channel=channel,  # type:ignore
            scheduled_message_id=msg_id,
        )
        logger.info("Scheduled feedback reminder deleted")
    except SlackApiError as e:
        if e.response["error"] == "invalid_scheduled_message_id":
            logger.info(
                "Unable to delete the scheduled message. It must have already been posted"
            )


def handle_message(
    message_info: SlackMessageInfo,
    slack_channel_config: SlackChannelConfig,
    client: WebClient,
    feedback_reminder_id: str | None,
) -> bool:
    """Potentially respond to the user message depending on filters and if an answer was generated

    Returns True if need to respond with an additional message to the user(s) after this
    function is finished. True indicates an unexpected failure that needs to be communicated
    Query thrown out by filters due to config does not count as a failure that should be notified
    Onyx failing to answer/retrieve docs does count and should be notified
    """
    channel = message_info.channel_to_respond

    logger = setup_logger(extra={SLACK_CHANNEL_ID: channel})

    messages = message_info.thread_messages
    sender_id = message_info.sender_id
    bypass_filters = message_info.bypass_filters
    is_slash_command = message_info.is_slash_command
    is_bot_dm = message_info.is_bot_dm

    action = "slack_message"
    if is_slash_command:
        action = "slack_slash_message"
    elif bypass_filters:
        action = "slack_tag_message"
    elif is_bot_dm:
        action = "slack_dm_message"
    slack_usage_report(action=action, sender_id=sender_id, client=client)

    document_set_names: list[str] | None = None
    persona = slack_channel_config.persona if slack_channel_config else None
    if persona:
        document_set_names = [
            document_set.name for document_set in persona.document_sets
        ]

    respond_tag_only = False
    respond_member_group_list = None

    channel_conf = None
    if slack_channel_config and slack_channel_config.channel_config:
        channel_conf = slack_channel_config.channel_config
        if not bypass_filters and "answer_filters" in channel_conf:
            if (
                "questionmark_prefilter" in channel_conf["answer_filters"]
                and "?" not in messages[-1].message
            ):
                logger.info(
                    "Skipping message since it does not contain a question mark"
                )
                return False

        logger.info(
            "Found slack bot config for channel. Restricting bot to use document "
            f"sets: {document_set_names}, "
            f"validity checks enabled: {channel_conf.get('answer_filters', 'NA')}"
        )

        respond_tag_only = channel_conf.get("respond_tag_only") or False
        respond_member_group_list = channel_conf.get("respond_member_group_list", None)

    # Only default config can be disabled.
    # If channel config is disabled, bot should not respond to this message (including DMs)
    if slack_channel_config.channel_config.get("disabled"):
        logger.info("Skipping message: OnyxBot is disabled for this channel")
        return False

    # If bot should only respond to tags and is not tagged nor in a DM, skip message
    if respond_tag_only and not bypass_filters and not is_bot_dm:
        logger.info("Skipping message: OnyxBot only responds to tags in this channel")
        return False

    # List of user id to send message to, if None, send to everyone in channel
    send_to: list[str] | None = None
    missing_users: list[str] | None = None
    if respond_member_group_list:
        send_to, missing_ids = fetch_slack_user_ids_from_emails(
            respond_member_group_list, client
        )

        user_ids, missing_users = fetch_user_ids_from_groups(missing_ids, client)
        send_to = list(set(send_to + user_ids)) if send_to else user_ids

        if missing_users:
            logger.warning(f"Failed to find these users/groups: {missing_users}")

    # If configured to respond to team members only, then cannot be used with a /OnyxBot command
    # which would just respond to the sender
    if send_to and is_slash_command:
        if sender_id:
            respond_in_thread_or_channel(
                client=client,
                channel=channel,
                receiver_ids=[sender_id],
                text="The OnyxBot slash command is not enabled for this channel",
                thread_ts=None,
            )

    try:
        send_msg_ack_to_user(message_info, client)
    except SlackApiError as e:
        logger.error(f"Was not able to react to user message due to: {e}")

    with get_session_with_current_tenant() as db_session:
        if message_info.email:
            existing_user = get_user_by_email(message_info.email, db_session)
            if existing_user is None:
                # New user — check seat availability before creating
                check_seat_fn = fetch_ee_implementation_or_noop(
                    "onyx.db.license",
                    "check_seat_availability",
                    None,
                )
                # noop returns None when called; real function returns SeatAvailabilityResult
                seat_result = check_seat_fn(db_session=db_session)
                if seat_result is not None and not seat_result.available:
                    logger.info(
                        f"Blocked new Slack user {message_info.email}: {seat_result.error_message}"
                    )
                    respond_in_thread_or_channel(
                        client=client,
                        channel=channel,
                        thread_ts=message_info.msg_to_respond,
                        text=(
                            "We weren't able to respond because your organization "
                            "has reached its user seat limit. Since this is your "
                            "first time interacting with the bot, a new account "
                            "could not be created for you. Please contact your "
                            "Onyx administrator to add more seats."
                        ),
                    )
                    return False

            elif (
                not existing_user.is_active
                and existing_user.account_type == AccountType.BOT
            ):
                check_seat_fn = fetch_ee_implementation_or_noop(
                    "onyx.db.license",
                    "check_seat_availability",
                    None,
                )
                seat_result = check_seat_fn(db_session=db_session)
                if seat_result is not None and not seat_result.available:
                    logger.info(
                        f"Blocked inactive Slack user {message_info.email}: {seat_result.error_message}"
                    )
                    respond_in_thread_or_channel(
                        client=client,
                        channel=channel,
                        thread_ts=message_info.msg_to_respond,
                        text=(
                            "We weren't able to respond because your organization "
                            "has reached its user seat limit. Your account is "
                            "currently deactivated and cannot be reactivated "
                            "until more seats are available. Please contact "
                            "your Onyx administrator."
                        ),
                    )
                    return False

                activate_user(existing_user, db_session)
                invalidate_license_cache_fn = fetch_ee_implementation_or_noop(
                    "onyx.db.license",
                    "invalidate_license_cache",
                    None,
                )
                invalidate_license_cache_fn()
                logger.info(f"Reactivated inactive Slack user {message_info.email}")

            add_slack_user_if_not_exists(db_session, message_info.email)

        # first check if we need to respond with a standard answer
        # standard answers should be published in a thread
        used_standard_answer = handle_standard_answers(
            message_info=message_info,
            receiver_ids=send_to,
            slack_channel_config=slack_channel_config,
            logger=logger,
            client=client,
            db_session=db_session,
        )
        if used_standard_answer:
            return False

        # if no standard answer applies, try a regular answer
        issue_with_regular_answer = handle_regular_answer(
            message_info=message_info,
            slack_channel_config=slack_channel_config,
            receiver_ids=send_to,
            client=client,
            channel=channel,
            logger=logger,
            feedback_reminder_id=feedback_reminder_id,
        )
        return issue_with_regular_answer


================================================
FILE: backend/onyx/onyxbot/slack/handlers/handle_regular_answer.py
================================================
import functools
from collections.abc import Callable
from typing import Any
from typing import Optional
from typing import TypeVar

from retry import retry
from slack_sdk import WebClient

from onyx.auth.users import get_anonymous_user
from onyx.chat.models import ChatBasicResponse
from onyx.chat.process_message import gather_stream
from onyx.chat.process_message import handle_stream_message_objects
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.configs.constants import MessageType
from onyx.configs.onyxbot_configs import ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER
from onyx.configs.onyxbot_configs import ONYX_BOT_DISPLAY_ERROR_MSGS
from onyx.configs.onyxbot_configs import ONYX_BOT_NUM_RETRIES
from onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import Tag
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import SlackChannelConfig
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
from onyx.db.users import get_user_by_email
from onyx.onyxbot.slack.blocks import build_slack_response_blocks
from onyx.onyxbot.slack.constants import SLACK_CHANNEL_REF_PATTERN
from onyx.onyxbot.slack.handlers.utils import send_team_member_message
from onyx.onyxbot.slack.models import SlackMessageInfo
from onyx.onyxbot.slack.models import ThreadMessage
from onyx.onyxbot.slack.utils import get_channel_from_id
from onyx.onyxbot.slack.utils import get_channel_name_from_id
from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
from onyx.onyxbot.slack.utils import SlackRateLimiter
from onyx.onyxbot.slack.utils import update_emote_react
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import MessageOrigin
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.utils.logger import OnyxLoggingAdapter

srl = SlackRateLimiter()

RT = TypeVar("RT")  # return type


def resolve_channel_references(
    message: str,
    client: WebClient,
    logger: OnyxLoggingAdapter,
) -> tuple[str, list[Tag]]:
    """Parse Slack channel references from a message, resolve IDs to names,
    replace the raw markup with readable #channel-name, and return channel tags
    for search filtering."""
    tags: list[Tag] = []
    channel_matches = SLACK_CHANNEL_REF_PATTERN.findall(message)
    seen_channel_ids: set[str] = set()

    for channel_id, channel_name_from_markup in channel_matches:
        if channel_id in seen_channel_ids:
            continue
        seen_channel_ids.add(channel_id)

        channel_name = channel_name_from_markup or None

        if not channel_name:
            try:
                channel_info = get_channel_from_id(client=client, channel_id=channel_id)
                channel_name = channel_info.get("name") or None
            except Exception:
                logger.warning(f"Failed to resolve channel name for ID: {channel_id}")

            if not channel_name:
                continue

        # Replace raw Slack markup with readable channel name
        if channel_name_from_markup:
            message = message.replace(
                f"<#{channel_id}|{channel_name_from_markup}>",
                f"#{channel_name}",
            )
        else:
            message = message.replace(
                f"<#{channel_id}>",
                f"#{channel_name}",
            )
        tags.append(Tag(tag_key="Channel", tag_value=channel_name))

    return message, tags


def rate_limits(
    client: WebClient, channel: str, thread_ts: Optional[str]
) -> Callable[[Callable[..., RT]], Callable[..., RT]]:
    def decorator(func: Callable[..., RT]) -> Callable[..., RT]:
        @functools.wraps(func)
        def wrapper(*args: Any, **kwargs: Any) -> RT:
            if not srl.is_available():
                func_randid, position = srl.init_waiter()
                srl.notify(client, channel, position, thread_ts)
                while not srl.is_available():
                    srl.waiter(func_randid)
            srl.acquire_slot()
            return func(*args, **kwargs)

        return wrapper

    return decorator


def build_slack_context_str(
    messages: list[ThreadMessage], channel_name: str | None
) -> str | None:
    if not messages:
        return None

    if channel_name:
        slack_context_str = f"The following is a thread in Slack in channel {channel_name}:\n====================\n"
    else:
        slack_context_str = (
            "The following is a thread from Slack:\n====================\n"
        )

    message_strs: list[str] = []
    for message in messages:
        if message.role == MessageType.USER:
            message_text = f"{message.sender or 'Unknown User'}:\n{message.message}"
        elif message.role == MessageType.ASSISTANT:
            message_text = f"AI:\n{message.message}"
        else:
            message_text = f"{message.role.value.upper()}:\n{message.message}"
        message_strs.append(message_text)

    return slack_context_str + "\n\n".join(message_strs)


def handle_regular_answer(
    message_info: SlackMessageInfo,
    slack_channel_config: SlackChannelConfig,
    receiver_ids: list[str] | None,
    client: WebClient,
    channel: str,
    logger: OnyxLoggingAdapter,
    feedback_reminder_id: str | None,
    num_retries: int = ONYX_BOT_NUM_RETRIES,
    should_respond_with_error_msgs: bool = ONYX_BOT_DISPLAY_ERROR_MSGS,
    disable_docs_only_answer: bool = ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER,
) -> bool:
    channel_conf = slack_channel_config.channel_config

    messages = message_info.thread_messages

    message_ts_to_respond_to = message_info.msg_to_respond
    is_slash_command = message_info.is_slash_command

    # Capture whether response mode for channel is ephemeral. Even if the channel is set
    # to respond with an ephemeral message, we still send as non-ephemeral if
    # the message is a dm with the Onyx bot.
    send_as_ephemeral = (
        slack_channel_config.channel_config.get("is_ephemeral", False)
        or message_info.is_slash_command
    ) and not message_info.is_bot_dm

    # If the channel is configured to respond with an ephemeral message,
    # or the message is a dm to the Onyx bot, we should use the proper onyx user from the email.
    # This will make documents privately accessible to the user available to Onyx Bot answers.
    # Otherwise - if not ephemeral or DM to Onyx Bot - we use anonymous user to restrict
    # to public docs.

    if message_info.email:
        with get_session_with_current_tenant() as db_session:
            found_user = get_user_by_email(message_info.email, db_session)
            user = found_user if found_user else get_anonymous_user()
    else:
        user = get_anonymous_user()

    target_thread_ts = (
        None
        if send_as_ephemeral and len(message_info.thread_messages) < 2
        else message_ts_to_respond_to
    )
    target_receiver_ids = (
        [message_info.sender_id]
        if message_info.sender_id and send_as_ephemeral
        else receiver_ids
    )

    document_set_names: list[str] | None = None
    # If no persona is specified, use the default search based persona
    # This way slack flow always has a persona
    persona = slack_channel_config.persona
    if not persona:
        logger.warning("No persona found for channel config, using default persona")
        with get_session_with_current_tenant() as db_session:
            persona = get_persona_by_id(DEFAULT_PERSONA_ID, user, db_session)
            document_set_names = [
                document_set.name for document_set in persona.document_sets
            ]
    else:
        logger.info(f"Using persona {persona.name} for channel config")
        document_set_names = [
            document_set.name for document_set in persona.document_sets
        ]

    user_message = messages[-1]
    history_messages = messages[:-1]

    # Resolve any <#CHANNEL_ID> references in the user message to readable
    # channel names and extract channel tags for search filtering
    resolved_message, channel_tags = resolve_channel_references(
        message=user_message.message,
        client=client,
        logger=logger,
    )

    user_message = ThreadMessage(
        message=resolved_message,
        sender=user_message.sender,
        role=user_message.role,
    )

    channel_name, _ = get_channel_name_from_id(
        client=client,
        channel_id=channel,
    )

    # NOTE: only the message history will contain the person asking. This is likely
    # fine since the most common use case for this info is when referring to a user
    # who previously posted in the thread.
    slack_context_str = build_slack_context_str(history_messages, channel_name)

    if not message_ts_to_respond_to and not is_slash_command:
        # if the message is not "/onyx" command, then it should have a message ts to respond to
        raise RuntimeError(
            "No message timestamp to respond to in `handle_message`. This should never happen."
        )

    @retry(
        tries=num_retries,
        delay=0.25,
        backoff=2,
    )
    @rate_limits(client=client, channel=channel, thread_ts=message_ts_to_respond_to)
    def _get_slack_answer(
        new_message_request: SendMessageRequest,
        slack_context_str: str | None,
        onyx_user: User,
    ) -> ChatBasicResponse:
        with get_session_with_current_tenant() as db_session:
            packets = handle_stream_message_objects(
                new_msg_req=new_message_request,
                user=onyx_user,
                db_session=db_session,
                bypass_acl=False,
                additional_context=slack_context_str,
                slack_context=message_info.slack_context,
            )
            answer = gather_stream(packets)

        if answer.error_msg:
            raise RuntimeError(answer.error_msg)

        return answer

    try:
        # By leaving time_cutoff and favor_recent as None, and setting enable_auto_detect_filters
        # it allows the slack flow to extract out filters from the user query
        filters = BaseFilters(
            source_type=None,
            document_set=document_set_names,
            time_cutoff=None,
            tags=channel_tags if channel_tags else None,
        )

        new_message_request = SendMessageRequest(
            message=user_message.message,
            allowed_tool_ids=None,
            forced_tool_id=None,
            file_descriptors=[],
            internal_search_filters=filters,
            deep_research=False,
            origin=MessageOrigin.SLACKBOT,
            chat_session_info=ChatSessionCreationRequest(
                persona_id=persona.id,
            ),
        )

        # if it's a DM or ephemeral message, answer based on private documents.
        # otherwise, answer based on public documents ONLY as to not leak information.
        can_search_over_private_docs = message_info.is_bot_dm or send_as_ephemeral
        answer = _get_slack_answer(
            new_message_request=new_message_request,
            onyx_user=user if can_search_over_private_docs else get_anonymous_user(),
            slack_context_str=slack_context_str,
        )

        # If a channel filter was applied but no results were found, override
        # the LLM response to avoid hallucinated answers about unindexed channels
        if channel_tags and not answer.citation_info and not answer.top_documents:
            channel_names = ", ".join(f"#{tag.tag_value}" for tag in channel_tags)
            answer.answer = (
                f"No indexed data found for {channel_names}. "
                "This channel may not be indexed, or there may be no messages "
                "matching your query within it."
            )

    except Exception as e:
        logger.exception(
            f"Unable to process message - did not successfully answer in {num_retries} attempts"
        )
        # Optionally, respond in thread with the error message, Used primarily
        # for debugging purposes
        if should_respond_with_error_msgs:
            respond_in_thread_or_channel(
                client=client,
                channel=channel,
                receiver_ids=target_receiver_ids,
                text=f"Encountered exception when trying to answer: \n\n```{e}```",
                thread_ts=target_thread_ts,
                send_as_ephemeral=send_as_ephemeral,
            )

        # In case of failures, don't keep the reaction there permanently
        update_emote_react(
            emoji=ONYX_BOT_REACT_EMOJI,
            channel=message_info.channel_to_respond,
            message_ts=message_info.msg_to_respond,
            remove=True,
            client=client,
        )

        return True

    # Got an answer at this point, can remove reaction and give results
    if not is_slash_command:  # Slash commands don't have reactions
        update_emote_react(
            emoji=ONYX_BOT_REACT_EMOJI,
            channel=message_info.channel_to_respond,
            message_ts=message_info.msg_to_respond,
            remove=True,
            client=client,
        )

    if not answer.answer and disable_docs_only_answer:
        logger.notice(
            "Unable to find answer - not responding since the `ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER` env variable is set"
        )
        return True

    only_respond_if_citations = (
        channel_conf
        and "well_answered_postfilter" in channel_conf.get("answer_filters", [])
    )

    if (
        only_respond_if_citations
        and not answer.citation_info
        and not message_info.bypass_filters
        and not channel_tags
    ):
        logger.error(
            f"Unable to find citations to answer: '{answer.answer}' - not answering!"
        )
        # Optionally, respond in thread with the error message
        # Used primarily for debugging purposes
        if should_respond_with_error_msgs:
            respond_in_thread_or_channel(
                client=client,
                channel=channel,
                receiver_ids=target_receiver_ids,
                text="Found no citations or quotes when trying to answer.",
                thread_ts=target_thread_ts,
                send_as_ephemeral=send_as_ephemeral,
            )
        return True

    if (
        send_as_ephemeral
        and target_receiver_ids is not None
        and len(target_receiver_ids) == 1
    ):
        offer_ephemeral_publication = True
        skip_ai_feedback = True
    else:
        offer_ephemeral_publication = False
        skip_ai_feedback = False

    all_blocks = build_slack_response_blocks(
        message_info=message_info,
        answer=answer,
        channel_conf=channel_conf,
        feedback_reminder_id=feedback_reminder_id,
        offer_ephemeral_publication=offer_ephemeral_publication,
        skip_ai_feedback=skip_ai_feedback,
    )

    # NOTE(rkuo): Slack has a maximum block list size of 50.
    # we should modify build_slack_response_blocks to respect the max
    # but enforcing the hard limit here is the last resort.
    all_blocks = all_blocks[:50]

    try:
        respond_in_thread_or_channel(
            client=client,
            channel=channel,
            receiver_ids=target_receiver_ids,
            text="Hello! Onyx has some results for you!",
            blocks=all_blocks,
            thread_ts=target_thread_ts,
            # don't unfurl, since otherwise we will have 5+ previews which makes the message very long
            unfurl=False,
            send_as_ephemeral=send_as_ephemeral,
        )

        # For DM (ephemeral message), we need to create a thread via a normal message so the user can see
        # the ephemeral message. This also will give the user a notification which ephemeral message does not.
        # if there is no message_ts_to_respond_to, and we have made it this far, then this is a /onyx message
        # so we shouldn't send_team_member_message
        if (
            target_receiver_ids
            and message_ts_to_respond_to is not None
            and not send_as_ephemeral
            and target_thread_ts is not None
        ):
            send_team_member_message(
                client=client,
                channel=channel,
                thread_ts=target_thread_ts,
                receiver_ids=target_receiver_ids,
                send_as_ephemeral=send_as_ephemeral,
            )

        return False

    except Exception:
        logger.exception(
            f"Unable to process message - could not respond in slack in {num_retries} attempts"
        )
        return True


================================================
FILE: backend/onyx/onyxbot/slack/handlers/handle_standard_answers.py
================================================
from slack_sdk import WebClient
from sqlalchemy.orm import Session

from onyx.db.models import SlackChannelConfig
from onyx.onyxbot.slack.models import SlackMessageInfo
from onyx.utils.logger import OnyxLoggingAdapter
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation

logger = setup_logger()


def handle_standard_answers(
    message_info: SlackMessageInfo,
    receiver_ids: list[str] | None,
    slack_channel_config: SlackChannelConfig,
    logger: OnyxLoggingAdapter,
    client: WebClient,
    db_session: Session,
) -> bool:
    """Returns whether one or more Standard Answer message blocks were
    emitted by the Slack bot"""
    versioned_handle_standard_answers = fetch_versioned_implementation(
        "onyx.onyxbot.slack.handlers.handle_standard_answers",
        "_handle_standard_answers",
    )
    return versioned_handle_standard_answers(
        message_info=message_info,
        receiver_ids=receiver_ids,
        slack_channel_config=slack_channel_config,
        logger=logger,
        client=client,
        db_session=db_session,
    )


def _handle_standard_answers(
    message_info: SlackMessageInfo,  # noqa: ARG001
    receiver_ids: list[str] | None,  # noqa: ARG001
    slack_channel_config: SlackChannelConfig,  # noqa: ARG001
    logger: OnyxLoggingAdapter,  # noqa: ARG001
    client: WebClient,  # noqa: ARG001
    db_session: Session,  # noqa: ARG001
) -> bool:
    """
    Standard Answers are a paid Enterprise Edition feature. This is the fallback
    function handling the case where EE features are not enabled.

    Always returns false i.e. since EE features are not enabled, we NEVER create any
    Slack message blocks.
    """
    return False


================================================
FILE: backend/onyx/onyxbot/slack/handlers/utils.py
================================================
from slack_sdk import WebClient

from onyx.onyxbot.slack.utils import respond_in_thread_or_channel


def send_team_member_message(
    client: WebClient,
    channel: str,
    thread_ts: str,
    receiver_ids: list[str] | None = None,  # noqa: ARG001
    send_as_ephemeral: bool = False,
) -> None:
    respond_in_thread_or_channel(
        client=client,
        channel=channel,
        text=(
            "👋 Hi, we've just gathered and forwarded the relevant "
            + "information to the team. They'll get back to you shortly!"
        ),
        thread_ts=thread_ts,
        receiver_ids=None,
        send_as_ephemeral=send_as_ephemeral,
    )


================================================
FILE: backend/onyx/onyxbot/slack/icons.py
================================================
from onyx.configs.constants import DocumentSource


def source_to_github_img_link(source: DocumentSource) -> str | None:
    # TODO: store these images somewhere better
    if source == DocumentSource.WEB.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Web.png"
    if source == DocumentSource.FILE.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/File.png"
    if source == DocumentSource.GOOGLE_SITES.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/GoogleSites.png"
    if source == DocumentSource.SLACK.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Slack.png"
    if source == DocumentSource.GMAIL.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Gmail.png"
    if source == DocumentSource.GOOGLE_DRIVE.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/GoogleDrive.png"
    if source == DocumentSource.GITHUB.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Github.png"
    if source == DocumentSource.GITLAB.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Gitlab.png"
    if source == DocumentSource.CONFLUENCE.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Confluence.png"
    if source == DocumentSource.JIRA.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Jira.png"
    if source == DocumentSource.NOTION.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Notion.png"
    if source == DocumentSource.ZENDESK.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Zendesk.png"
    if source == DocumentSource.GONG.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Gong.png"
    if source == DocumentSource.LINEAR.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Linear.png"
    if source == DocumentSource.PRODUCTBOARD.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Productboard.webp"
    if source == DocumentSource.SLAB.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/SlabLogo.png"
    if source == DocumentSource.ZULIP.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Zulip.png"
    if source == DocumentSource.GURU.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Guru.png"
    if source == DocumentSource.HUBSPOT.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/HubSpot.png"
    if source == DocumentSource.DOCUMENT360.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Document360.png"
    if source == DocumentSource.BOOKSTACK.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Bookstack.png"
    if source == DocumentSource.OUTLINE.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Outline.png"
    if source == DocumentSource.LOOPIO.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Loopio.png"
    if source == DocumentSource.SHAREPOINT.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Sharepoint.png"
    if source == DocumentSource.REQUESTTRACKER.value:
        # just use file icon for now
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/File.png"
    if source == DocumentSource.INGESTION_API.value:
        return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/File.png"

    return "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/File.png"


================================================
FILE: backend/onyx/onyxbot/slack/listener.py
================================================
import os
import signal
import sys
import threading
import time
from collections.abc import Callable
from contextvars import Token
from threading import Event
from types import FrameType
from typing import Any
from typing import cast
from typing import Dict

import psycopg2.errors
from prometheus_client import Gauge
from prometheus_client import start_http_server
from redis.lock import Lock
from redis.lock import Lock as RedisLock
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_sdk.http_retry import ConnectionErrorRetryHandler
from slack_sdk.http_retry import RateLimitErrorRetryHandler
from slack_sdk.http_retry import RetryHandler
from slack_sdk.socket_mode.request import SocketModeRequest
from slack_sdk.socket_mode.response import SocketModeResponse
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DEV_MODE
from onyx.configs.app_configs import POD_NAME
from onyx.configs.app_configs import POD_NAMESPACE
from onyx.configs.constants import MessageType
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.onyxbot_configs import NOTIFY_SLACKBOT_NO_ANSWER
from onyx.connectors.slack.utils import expert_info_from_slack_id
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.db.models import SlackBot
from onyx.db.search_settings import get_current_search_settings
from onyx.db.slack_bot import fetch_slack_bot
from onyx.db.slack_bot import fetch_slack_bots
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
from onyx.onyxbot.slack.config import get_slack_channel_config_for_bot_and_channel
from onyx.onyxbot.slack.config import MAX_TENANTS_PER_POD
from onyx.onyxbot.slack.config import TENANT_ACQUISITION_INTERVAL
from onyx.onyxbot.slack.config import TENANT_HEARTBEAT_EXPIRATION
from onyx.onyxbot.slack.config import TENANT_HEARTBEAT_INTERVAL
from onyx.onyxbot.slack.config import TENANT_LOCK_EXPIRATION
from onyx.onyxbot.slack.constants import DISLIKE_BLOCK_ACTION_ID
from onyx.onyxbot.slack.constants import FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID
from onyx.onyxbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID
from onyx.onyxbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID
from onyx.onyxbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID
from onyx.onyxbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID
from onyx.onyxbot.slack.constants import KEEP_TO_YOURSELF_ACTION_ID
from onyx.onyxbot.slack.constants import LIKE_BLOCK_ACTION_ID
from onyx.onyxbot.slack.constants import SHOW_EVERYONE_ACTION_ID
from onyx.onyxbot.slack.constants import VIEW_DOC_FEEDBACK_ID
from onyx.onyxbot.slack.handlers.handle_buttons import handle_doc_feedback_button
from onyx.onyxbot.slack.handlers.handle_buttons import handle_followup_button
from onyx.onyxbot.slack.handlers.handle_buttons import (
    handle_followup_resolved_button,
)
from onyx.onyxbot.slack.handlers.handle_buttons import (
    handle_generate_answer_button,
)
from onyx.onyxbot.slack.handlers.handle_buttons import (
    handle_publish_ephemeral_message_button,
)
from onyx.onyxbot.slack.handlers.handle_buttons import handle_slack_feedback
from onyx.onyxbot.slack.handlers.handle_message import handle_message
from onyx.onyxbot.slack.handlers.handle_message import (
    remove_scheduled_feedback_reminder,
)
from onyx.onyxbot.slack.handlers.handle_message import schedule_feedback_reminder
from onyx.onyxbot.slack.models import SlackContext
from onyx.onyxbot.slack.models import SlackMessageInfo
from onyx.onyxbot.slack.models import ThreadMessage
from onyx.onyxbot.slack.utils import check_message_limit
from onyx.onyxbot.slack.utils import decompose_action_id
from onyx.onyxbot.slack.utils import get_channel_name_from_id
from onyx.onyxbot.slack.utils import get_channel_type_from_id
from onyx.onyxbot.slack.utils import get_onyx_bot_auth_ids
from onyx.onyxbot.slack.utils import read_slack_thread
from onyx.onyxbot.slack.utils import remove_onyx_bot_tag
from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
from onyx.onyxbot.slack.utils import TenantSocketModeClient
from onyx.redis.redis_pool import get_redis_client
from onyx.server.manage.models import SlackBotTokens
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
from shared_configs.configs import DISALLOWED_SLACK_BOT_TENANT_LIST
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import SLACK_CHANNEL_ID
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

# Prometheus metric for HPA
active_tenants_gauge = Gauge(
    "active_tenants",
    "Number of active tenants handled by this pod",
    ["namespace", "pod"],
)

# In rare cases, some users have been experiencing a massive amount of trivial messages coming through
# to the Slack Bot with trivial messages. Adding this to avoid exploding LLM costs while we track down
# the cause.
_SLACK_GREETINGS_TO_IGNORE = {
    "Welcome back!",
    "It's going to be a great day.",
    "Salutations!",
    "Greetings!",
    "Feeling great!",
    "Hi there",
    ":wave:",
}

# This is always (currently) the user id of Slack's official slackbot
_OFFICIAL_SLACKBOT_USER_ID = "USLACKBOT"

# Fields to exclude from Slack payload logging
# Intention is to not log slack message content
_EXCLUDED_SLACK_PAYLOAD_FIELDS = {"text", "blocks"}


class SlackbotHandler:
    def __init__(self) -> None:
        logger.info("Initializing SlackbotHandler")
        self.tenant_ids: set[str] = set()
        # The keys for these dictionaries are tuples of (tenant_id, slack_bot_id)
        self.socket_clients: Dict[tuple[str, int], TenantSocketModeClient] = {}
        self.slack_bot_tokens: Dict[tuple[str, int], SlackBotTokens] = {}

        # Store Redis lock objects here so we can release them properly
        self.redis_locks: Dict[str, Lock] = {}

        self.running = True
        self.pod_id = os.environ.get("HOSTNAME", "unknown_pod")
        self._shutdown_event = Event()

        self._lock = threading.Lock()

        logger.info(f"Pod ID: {self.pod_id}")

        # Set up signal handlers for graceful shutdown
        signal.signal(signal.SIGTERM, self.shutdown)
        signal.signal(signal.SIGINT, self.shutdown)
        logger.info("Signal handlers registered")

        # Start the Prometheus metrics server
        logger.info("Starting Prometheus metrics server")
        start_http_server(8000)
        logger.info("Prometheus metrics server started")

        # Start background threads
        logger.info("Starting background threads")
        self.acquire_thread = threading.Thread(
            target=self.acquire_tenants_loop, daemon=True
        )
        self.heartbeat_thread = threading.Thread(
            target=self.heartbeat_loop, daemon=True
        )

        self.acquire_thread.start()
        self.heartbeat_thread.start()

        logger.info("Background threads started")

    def acquire_tenants_loop(self) -> None:
        while not self._shutdown_event.is_set():
            try:
                self.acquire_tenants()

                # After we finish acquiring and managing Slack bots,
                # set the gauge to the number of active tenants (those with Slack bots).
                active_tenants_gauge.labels(namespace=POD_NAMESPACE, pod=POD_NAME).set(
                    len(self.tenant_ids)
                )
                logger.debug(
                    f"Current active tenants with Slack bots: {len(self.tenant_ids)}"
                )
            except Exception as e:
                logger.exception(f"Error in Slack acquisition: {e}")
            self._shutdown_event.wait(timeout=TENANT_ACQUISITION_INTERVAL)

    def heartbeat_loop(self) -> None:
        """This heartbeats into redis.

        NOTE(rkuo): this is not thread-safe with acquire_tenants_loop and will
        occasionally exception. Fix it!
        """
        while not self._shutdown_event.is_set():
            try:
                with self._lock:
                    tenant_ids = self.tenant_ids.copy()

                SlackbotHandler.send_heartbeats(self.pod_id, tenant_ids)
                logger.debug(f"Sent heartbeats for {len(tenant_ids)} active tenants")
            except Exception as e:
                logger.exception(f"Error in heartbeat loop: {e}")
            self._shutdown_event.wait(timeout=TENANT_HEARTBEAT_INTERVAL)

    def _manage_clients_per_tenant(
        self, db_session: Session, tenant_id: str, bot: SlackBot
    ) -> None:
        """
        - If the tokens are missing or empty, close the socket client and remove them.
        - If the tokens have changed, close the existing socket client and reconnect.
        - If the tokens are new, warm up the model and start a new socket client.
        """
        tenant_bot_pair = (tenant_id, bot.id)

        # If the tokens are missing or empty, close the socket client and remove them.
        if not bot.bot_token or not bot.app_token:
            logger.debug(
                f"No Slack bot tokens found for tenant={tenant_id}, bot {bot.id}"
            )
            if tenant_bot_pair in self.socket_clients:
                self.socket_clients[tenant_bot_pair].close()
                del self.socket_clients[tenant_bot_pair]
                del self.slack_bot_tokens[tenant_bot_pair]
            return

        slack_bot_tokens = SlackBotTokens(
            bot_token=bot.bot_token.get_value(apply_mask=False),
            app_token=bot.app_token.get_value(apply_mask=False),
        )

        tokens_exist = tenant_bot_pair in self.slack_bot_tokens
        tokens_changed = (
            tokens_exist and slack_bot_tokens != self.slack_bot_tokens[tenant_bot_pair]
        )
        if not tokens_exist or tokens_changed:
            if tokens_exist:
                logger.info(
                    f"Slack Bot tokens changed for tenant={tenant_id}, bot {bot.id}; reconnecting"
                )
            else:
                # Warm up the model if needed
                search_settings = get_current_search_settings(db_session)
                embedding_model = EmbeddingModel.from_db_model(
                    search_settings=search_settings,
                    server_host=MODEL_SERVER_HOST,
                    server_port=MODEL_SERVER_PORT,
                )
                warm_up_bi_encoder(embedding_model=embedding_model)

            self.slack_bot_tokens[tenant_bot_pair] = slack_bot_tokens

            # Close any existing connection first
            if tenant_bot_pair in self.socket_clients:
                self.socket_clients[tenant_bot_pair].close()

            socket_client = self.start_socket_client(
                bot.id, tenant_id, slack_bot_tokens
            )
            if socket_client:
                # Ensure tenant is tracked as active
                self.socket_clients[tenant_id, bot.id] = socket_client

                logger.info(
                    f"Started SocketModeClient: {tenant_id=} {socket_client.bot_name=} {bot.id=}"
                )

            self.tenant_ids.add(tenant_id)

    def acquire_tenants(self) -> None:
        """
        - Attempt to acquire a Redis lock for each tenant.
        - If acquired, check if that tenant actually has Slack bots.
        - If yes, store them in self.tenant_ids and manage the socket connections.
        - If a tenant in self.tenant_ids no longer has Slack bots, remove it (and release the lock in this scope).
        """

        token: Token[str | None]

        # tenants that are disabled (e.g. their trial is over and haven't subscribed)
        # for non-cloud, this will return an empty set
        gated_tenants = fetch_ee_implementation_or_noop(
            "onyx.server.tenants.product_gating",
            "get_gated_tenants",
            set(),
        )()
        all_active_tenants = [
            tenant_id
            for tenant_id in get_all_tenant_ids()
            if tenant_id not in gated_tenants
        ]

        # 1) Try to acquire locks for new tenants
        for tenant_id in all_active_tenants:
            if (
                DISALLOWED_SLACK_BOT_TENANT_LIST is not None
                and tenant_id in DISALLOWED_SLACK_BOT_TENANT_LIST
            ):
                logger.debug(f"Tenant {tenant_id} is disallowed; skipping.")
                continue

            # Already acquired in a previous loop iteration?
            if tenant_id in self.tenant_ids:
                continue

            # Respect max tenant limit per pod
            if len(self.tenant_ids) >= MAX_TENANTS_PER_POD:
                logger.info(
                    f"Max tenants per pod reached, not acquiring more: {MAX_TENANTS_PER_POD=}"
                )
                break

            redis_client = get_redis_client(tenant_id=tenant_id)
            # Acquire a Redis lock (non-blocking)
            # thread_local=False because the shutdown event is handled
            # on an arbitrary thread
            rlock: RedisLock = redis_client.lock(
                OnyxRedisLocks.SLACK_BOT_LOCK,
                timeout=TENANT_LOCK_EXPIRATION,
                thread_local=False,
            )
            lock_acquired = rlock.acquire(blocking=False)

            if not lock_acquired and not DEV_MODE:
                logger.debug(
                    f"Another pod holds the lock for tenant {tenant_id}, skipping."
                )
                continue

            if lock_acquired:
                logger.debug(f"Acquired lock for tenant {tenant_id}.")
                self.redis_locks[tenant_id] = rlock
            else:
                # DEV_MODE will skip the lock acquisition guard
                logger.debug(
                    f"Running in DEV_MODE. Not enforcing lock for {tenant_id}."
                )

            # Now check if this tenant actually has Slack bots
            token = CURRENT_TENANT_ID_CONTEXTVAR.set(
                tenant_id or POSTGRES_DEFAULT_SCHEMA
            )
            try:
                with get_session_with_tenant(tenant_id=tenant_id) as db_session:
                    bots: list[SlackBot] = []
                    try:
                        bots = list(fetch_slack_bots(db_session=db_session))
                    except KvKeyNotFoundError:
                        # No Slackbot tokens, pass
                        pass
                    except psycopg2.errors.UndefinedTable:
                        logger.error(
                            "Undefined table error in fetch_slack_bots. Tenant schema may need fixing."
                        )
                    except Exception as e:
                        logger.exception(
                            f"Error fetching Slack bots for tenant {tenant_id}: {e}"
                        )

                    if bots:
                        # Mark as active tenant
                        self.tenant_ids.add(tenant_id)
                        for bot in bots:
                            self._manage_clients_per_tenant(
                                db_session=db_session,
                                tenant_id=tenant_id,
                                bot=bot,
                            )
                    else:
                        # If no Slack bots, release lock immediately (unless in DEV_MODE)
                        if lock_acquired and not DEV_MODE:
                            rlock.release()
                            del self.redis_locks[tenant_id]
                        logger.debug(
                            f"No Slack bots for tenant {tenant_id}; lock released (if held)."
                        )
            finally:
                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

        # 2) Make sure tenants we're handling still have Slack bots
        #    and haven't been suspended (gated)
        for tenant_id in list(self.tenant_ids):
            if tenant_id in gated_tenants:
                logger.info(
                    f"Tenant {tenant_id} is now gated (suspended). Disconnecting."
                )
                self._remove_tenant(tenant_id)
                if tenant_id in self.redis_locks and not DEV_MODE:
                    try:
                        self.redis_locks[tenant_id].release()
                        del self.redis_locks[tenant_id]
                    except Exception as e:
                        logger.error(
                            f"Error releasing lock for gated tenant {tenant_id}: {e}"
                        )
                continue

            token = CURRENT_TENANT_ID_CONTEXTVAR.set(
                tenant_id or POSTGRES_DEFAULT_SCHEMA
            )
            redis_client = get_redis_client(tenant_id=tenant_id)

            try:
                with get_session_with_current_tenant() as db_session:
                    # Attempt to fetch Slack bots
                    try:
                        bots = list(fetch_slack_bots(db_session=db_session))
                    except KvKeyNotFoundError:
                        # No Slackbot tokens, pass (and remove below)
                        bots = []
                    except Exception as e:
                        logger.exception(f"Error handling tenant {tenant_id}: {e}")
                        bots = []

                    if not bots:
                        logger.info(
                            f"Tenant {tenant_id} no longer has Slack bots. Removing."
                        )
                        self._remove_tenant(tenant_id)

                        # NOTE: We release the lock here (in the same scope it was acquired)
                        if tenant_id in self.redis_locks and not DEV_MODE:
                            try:
                                self.redis_locks[tenant_id].release()
                                del self.redis_locks[tenant_id]
                                logger.info(f"Released lock for tenant {tenant_id}")
                            except Exception as e:
                                logger.error(
                                    f"Error releasing lock for tenant {tenant_id}: {e}"
                                )
                    else:
                        # Manage or reconnect Slack bot sockets
                        for bot in bots:
                            self._manage_clients_per_tenant(
                                db_session=db_session,
                                tenant_id=tenant_id,
                                bot=bot,
                            )
            finally:
                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

    def _remove_tenant(self, tenant_id: str) -> None:
        """
        Helper to remove a tenant from `self.tenant_ids` and close any socket clients.
        (Lock release now happens in `acquire_tenants()`, not here.)
        """
        socket_client_list = list(self.socket_clients.items())
        # Close all socket clients for this tenant
        for (t_id, slack_bot_id), client in socket_client_list:
            if t_id == tenant_id:
                client.close()
                del self.socket_clients[(t_id, slack_bot_id)]
                del self.slack_bot_tokens[(t_id, slack_bot_id)]
                logger.info(
                    f"Stopped SocketModeClient for tenant: {t_id}, app: {slack_bot_id}"
                )

        # Remove from active set
        if tenant_id in self.tenant_ids:
            self.tenant_ids.remove(tenant_id)

    @staticmethod
    def send_heartbeats(pod_id: str, tenant_ids: set[str]) -> None:
        current_time = int(time.time())
        logger.debug(f"Sending heartbeats for {len(tenant_ids)} active tenants")
        for tenant_id in tenant_ids:
            redis_client = get_redis_client(tenant_id=tenant_id)
            heartbeat_key = f"{OnyxRedisLocks.SLACK_BOT_HEARTBEAT_PREFIX}:{pod_id}"
            redis_client.set(
                heartbeat_key, current_time, ex=TENANT_HEARTBEAT_EXPIRATION
            )

    @staticmethod
    def start_socket_client(
        slack_bot_id: int, tenant_id: str, slack_bot_tokens: SlackBotTokens
    ) -> TenantSocketModeClient | None:
        """Returns the socket client if this succeeds"""
        socket_client: TenantSocketModeClient = _get_socket_client(
            slack_bot_tokens, tenant_id, slack_bot_id
        )

        try:
            bot_info = socket_client.web_client.auth_test()

            if bot_info["ok"]:
                bot_user_id = bot_info["user_id"]
                user_info = socket_client.web_client.users_info(user=bot_user_id)
                if user_info["ok"]:
                    bot_name = (
                        user_info["user"]["real_name"] or user_info["user"]["name"]
                    )
                    socket_client.bot_name = bot_name
                    # logger.info(
                    #     f"Started socket client for Slackbot with name '{bot_name}' (tenant: {tenant_id}, app: {slack_bot_id})"
                    # )
        except SlackApiError as e:
            # Only error out if we get a not_authed error
            if "not_authed" in str(e):
                # for some reason we want to add the tenant to the list when this happens?
                logger.error(
                    f"Authentication error - Invalid or expired credentials: {tenant_id=} {slack_bot_id=}. Error: {e}"
                )
                return None

            # Log other Slack API errors but continue
            logger.error(
                f"Slack API error fetching bot info: {e} for tenant: {tenant_id}, app: {slack_bot_id}"
            )
        except Exception as e:
            # Log other exceptions but continue
            logger.error(
                f"Error fetching bot info: {e} for tenant: {tenant_id}, app: {slack_bot_id}"
            )

        # Append the event handler
        process_slack_event = create_process_slack_event()
        socket_client.socket_mode_request_listeners.append(process_slack_event)  # type: ignore

        # Establish a WebSocket connection to the Socket Mode servers
        # logger.debug(
        #     f"Connecting socket client for tenant: {tenant_id}, app: {slack_bot_id}"
        # )
        socket_client.connect()
        # logger.info(
        #     f"Started SocketModeClient for tenant: {tenant_id}, app: {slack_bot_id}"
        # )

        return socket_client

    @staticmethod
    def stop_socket_clients(
        pod_id: str, socket_clients: Dict[tuple[str, int], TenantSocketModeClient]
    ) -> None:
        socket_client_list = list(socket_clients.items())
        length = len(socket_client_list)

        x = 0
        for (tenant_id, slack_bot_id), client in socket_client_list:
            x += 1
            client.close()
            logger.info(
                f"Stopped SocketModeClient {x}/{length}: {pod_id=} {tenant_id=} {slack_bot_id=}"
            )

    def shutdown(
        self,
        signum: int | None,  # noqa: ARG002
        frame: FrameType | None,  # noqa: ARG002
    ) -> None:
        if not self.running:
            return

        logger.info("Shutting down gracefully")
        self.running = False
        self._shutdown_event.set()  # set the shutdown event

        # wait for threads to detect the event and exit
        self.acquire_thread.join(timeout=60.0)
        self.heartbeat_thread.join(timeout=60.0)

        # Stop all socket clients
        logger.info(f"Stopping {len(self.socket_clients)} socket clients")
        SlackbotHandler.stop_socket_clients(self.pod_id, self.socket_clients)

        # Release locks for all tenants we currently hold
        logger.info(f"Releasing locks for {len(self.tenant_ids)} tenants")
        for tenant_id in list(self.tenant_ids):
            if tenant_id in self.redis_locks:
                try:
                    self.redis_locks[tenant_id].release()
                    logger.info(f"Released lock for tenant {tenant_id}")
                except Exception as e:
                    logger.error(f"Error releasing lock for tenant {tenant_id}: {e}")
                finally:
                    del self.redis_locks[tenant_id]

        # Wait for background threads to finish (with a timeout)
        logger.info("Waiting for background threads to finish...")
        self.acquire_thread.join(timeout=5)
        self.heartbeat_thread.join(timeout=5)

        logger.info("Shutdown complete")
        sys.exit(0)


def sanitize_slack_payload(payload: dict) -> dict:
    """Remove message content from Slack payload for logging"""
    sanitized = {
        k: v for k, v in payload.items() if k not in _EXCLUDED_SLACK_PAYLOAD_FIELDS
    }
    if "event" in sanitized and isinstance(sanitized["event"], dict):
        sanitized["event"] = {
            k: v
            for k, v in sanitized["event"].items()
            if k not in _EXCLUDED_SLACK_PAYLOAD_FIELDS
        }
    return sanitized


def prefilter_requests(req: SocketModeRequest, client: TenantSocketModeClient) -> bool:
    """True to keep going, False to ignore this Slack request"""

    # skip cases where the bot is disabled in the web UI
    tenant_id = get_current_tenant_id()

    bot_token_user_id, bot_token_bot_id = get_onyx_bot_auth_ids(
        tenant_id, client.web_client
    )
    logger.info(f"prefilter_requests: {bot_token_user_id=} {bot_token_bot_id=}")

    with get_session_with_current_tenant() as db_session:
        slack_bot = fetch_slack_bot(
            db_session=db_session, slack_bot_id=client.slack_bot_id
        )
        if not slack_bot:
            logger.error(
                f"Slack bot with ID '{client.slack_bot_id}' not found. Skipping request."
            )
            return False

        if not slack_bot.enabled:
            logger.info(
                f"Slack bot with ID '{client.slack_bot_id}' is disabled. Skipping request."
            )
            return False

    if req.type == "events_api":
        # Verify channel is valid
        event = cast(dict[str, Any], req.payload.get("event", {}))
        msg = cast(str | None, event.get("text"))
        channel = cast(str | None, event.get("channel"))
        channel_specific_logger = setup_logger(extra={SLACK_CHANNEL_ID: channel})

        # This should never happen, but we can't continue without a channel since
        # we can't send a response without it
        if not channel:
            channel_specific_logger.warning("Found message without channel - skipping")
            return False

        if not msg:
            channel_specific_logger.warning(
                "Cannot respond to empty message - skipping"
            )
            return False

        if (
            req.payload.setdefault("event", {}).get("user", "")
            == _OFFICIAL_SLACKBOT_USER_ID
        ):
            channel_specific_logger.info(
                "Ignoring messages from Slack's official Slackbot"
            )
            return False

        if (
            msg in _SLACK_GREETINGS_TO_IGNORE
            or remove_onyx_bot_tag(tenant_id, msg, client=client.web_client)
            in _SLACK_GREETINGS_TO_IGNORE
        ):
            channel_specific_logger.error(
                f"Ignoring weird Slack greeting message: '{msg}'"
            )
            channel_specific_logger.error(
                f"Weird Slack greeting message payload: '{req.payload}'"
            )
            return False

        # Ensure that the message is a new message of expected type
        event_type = event.get("type")
        event.get("channel_type")

        if event_type not in ["app_mention", "message"]:
            return False

        bot_token_user_id, bot_token_bot_id = get_onyx_bot_auth_ids(
            tenant_id, client.web_client
        )
        if event_type == "message":
            is_onyx_bot_msg = False
            is_tagged = False

            event_user = event.get("user", "")
            event_bot_id = event.get("bot_id", "")

            is_dm = event.get("channel_type") == "im"
            if bot_token_user_id and f"<@{bot_token_user_id}>" in msg:
                is_tagged = True

            if bot_token_user_id and bot_token_user_id in event_user:
                is_onyx_bot_msg = True

            if bot_token_bot_id and bot_token_bot_id in event_bot_id:
                is_onyx_bot_msg = True

            # OnyxBot should never respond to itself
            if is_onyx_bot_msg:
                logger.info("Ignoring message from OnyxBot (self-message)")
                return False

            # DMs with the bot don't pick up the @OnyxBot so we have to keep the
            # caught events_api
            if is_tagged and not is_dm:
                # Let the tag flow handle this case, don't reply twice
                return False

        # Check if this is a bot message (either via bot_profile or bot_message subtype)
        is_bot_message = bool(
            event.get("bot_profile") or event.get("subtype") == "bot_message"
        )
        if is_bot_message:
            channel_name, _ = get_channel_name_from_id(
                client=client.web_client, channel_id=channel
            )
            with get_session_with_current_tenant() as db_session:
                slack_channel_config = get_slack_channel_config_for_bot_and_channel(
                    db_session=db_session,
                    slack_bot_id=client.slack_bot_id,
                    channel_name=channel_name,
                )

            # If OnyxBot is not specifically tagged and the channel is not set to respond to bots, ignore the message
            if (not bot_token_user_id or bot_token_user_id not in msg) and (
                not slack_channel_config
                or not slack_channel_config.channel_config.get("respond_to_bots")
            ):
                channel_specific_logger.info(
                    "Ignoring message from bot since respond_to_bots is disabled"
                )
                return False

        # Ignore things like channel_join, channel_leave, etc.
        # NOTE: "file_share" is just a message with a file attachment, so we
        # should not ignore it
        message_subtype = event.get("subtype")
        if message_subtype not in [None, "file_share", "bot_message"]:
            channel_specific_logger.info(
                f"Ignoring message with subtype '{message_subtype}' since it is a special message type"
            )
            return False

        message_ts = event.get("ts")
        thread_ts = event.get("thread_ts")
        # Pick the root of the thread (if a thread exists)
        # Can respond in thread if it's an "im" directly to Onyx or @OnyxBot is tagged
        if (
            thread_ts
            and message_ts != thread_ts
            and event_type != "app_mention"
            and event.get("channel_type") != "im"
        ):
            channel_specific_logger.debug(
                "Skipping message since it is not the root of a thread"
            )
            return False

        msg = cast(str, event.get("text", ""))
        if not msg:
            channel_specific_logger.error("Unable to process empty message")
            return False

    if req.type == "slash_commands":
        # Verify that there's an associated channel
        channel = req.payload.get("channel_id")
        channel_specific_logger = setup_logger(extra={SLACK_CHANNEL_ID: channel})

        if not channel:
            channel_specific_logger.error(
                "Received OnyxBot command without channel - skipping"
            )
            return False

        sender = req.payload.get("user_id")
        if not sender:
            channel_specific_logger.error(
                "Cannot respond to OnyxBot command without sender to respond to."
            )
            return False

    if not check_message_limit():
        return False

    # Don't log Slack message content
    logger.debug(
        f"Handling Slack request: {client.bot_name=} '{sanitize_slack_payload(req.payload)=}'"
    )
    return True


def process_feedback(req: SocketModeRequest, client: TenantSocketModeClient) -> None:
    if actions := req.payload.get("actions"):
        action = cast(dict[str, Any], actions[0])
        feedback_type = cast(str, action.get("action_id"))
        feedback_msg_reminder = cast(str, action.get("value"))
        feedback_id = cast(str, action.get("block_id"))
        channel_id = cast(str, req.payload["container"]["channel_id"])
        thread_ts = cast(
            str,
            req.payload["container"].get("thread_ts")
            or req.payload["container"].get("message_ts"),
        )
    else:
        logger.error("Unable to process feedback. Action not found")
        return

    user_id = cast(str, req.payload["user"]["id"])

    handle_slack_feedback(
        feedback_id=feedback_id,
        feedback_type=feedback_type,
        feedback_msg_reminder=feedback_msg_reminder,
        client=client.web_client,
        user_id_to_post_confirmation=user_id,
        channel_id_to_post_confirmation=channel_id,
        thread_ts_to_post_confirmation=thread_ts,
    )

    query_event_id, _, _ = decompose_action_id(feedback_id)
    logger.info(f"Successfully handled QA feedback for event: {query_event_id}")


def build_request_details(
    req: SocketModeRequest, client: TenantSocketModeClient
) -> SlackMessageInfo:
    tagged: bool = False

    tenant_id = get_current_tenant_id()
    if req.type == "events_api":
        event = cast(dict[str, Any], req.payload["event"])
        msg = cast(str, event["text"])
        channel = cast(str, event["channel"])

        # Check for both app_mention events and messages containing bot tag
        bot_token_user_id, _ = get_onyx_bot_auth_ids(tenant_id, client.web_client)
        message_ts = event.get("ts")
        thread_ts = event.get("thread_ts")
        sender_id = event.get("user") or None
        expert_info = expert_info_from_slack_id(
            sender_id, client.web_client, user_cache={}
        )
        email = expert_info.email if expert_info else None

        msg = remove_onyx_bot_tag(tenant_id, msg, client=client.web_client)

        logger.info(f"Received Slack message: {msg}")

        event_type = event.get("type")
        if event_type == "app_mention":
            tagged = True

        if event_type == "message":
            if bot_token_user_id:
                if f"<@{bot_token_user_id}>" in msg:
                    tagged = True

        if tagged:
            logger.debug("User tagged OnyxBot")

        # Build Slack context for federated search
        # Get proper channel type from Slack API instead of relying on event.channel_type
        channel_type = get_channel_type_from_id(client.web_client, channel)

        slack_context = SlackContext(
            channel_type=channel_type,
            channel_id=channel,
            user_id=sender_id or "unknown",
            message_ts=message_ts,
        )
        logger.info(
            f"build_request_details: Capturing Slack context: "
            f"channel_type={channel_type} channel_id={channel} message_ts={message_ts}"
        )

        if thread_ts != message_ts and thread_ts is not None:
            thread_messages: list[ThreadMessage] = read_slack_thread(
                tenant_id=tenant_id,
                channel=channel,
                thread=thread_ts,
                client=client.web_client,
            )
        else:
            sender_display_name = None
            if expert_info:
                sender_display_name = expert_info.display_name
                if sender_display_name is None:
                    sender_display_name = (
                        f"{expert_info.first_name} {expert_info.last_name}"
                        if expert_info.last_name
                        else expert_info.first_name
                    )
                if sender_display_name is None:
                    sender_display_name = expert_info.email
            thread_messages = [
                ThreadMessage(
                    message=msg, sender=sender_display_name, role=MessageType.USER
                )
            ]

        return SlackMessageInfo(
            thread_messages=thread_messages,
            channel_to_respond=channel,
            msg_to_respond=cast(str, message_ts or thread_ts),
            thread_to_respond=cast(str, thread_ts or message_ts),
            sender_id=sender_id,
            email=email,
            bypass_filters=tagged,
            is_slash_command=False,
            is_bot_dm=event.get("channel_type") == "im",
            slack_context=slack_context,  # Add Slack context for federated search
        )

    elif req.type == "slash_commands":
        channel = req.payload["channel_id"]
        channel_name = req.payload["channel_name"]
        msg = req.payload["text"]
        sender = req.payload["user_id"]
        expert_info = expert_info_from_slack_id(
            sender, client.web_client, user_cache={}
        )
        email = expert_info.email if expert_info else None

        # Get proper channel type for slash commands too
        channel_type = get_channel_type_from_id(client.web_client, channel)

        slack_context = SlackContext(
            channel_type=channel_type,
            channel_id=channel,
            user_id=sender,
            message_ts=None,  # Slash commands don't have a message timestamp
        )
        logger.info(
            f"build_request_details: Capturing Slack context for slash command: channel_type={channel_type} channel_id={channel}"
        )

        single_msg = ThreadMessage(message=msg, sender=None, role=MessageType.USER)

        return SlackMessageInfo(
            thread_messages=[single_msg],
            channel_to_respond=channel,
            msg_to_respond=None,
            thread_to_respond=None,
            sender_id=sender,
            email=email,
            bypass_filters=True,
            is_slash_command=True,
            is_bot_dm=channel_name == "directmessage",
            slack_context=slack_context,  # Add Slack context for federated search
        )

    raise RuntimeError("Programming fault, this should never happen.")


def apologize_for_fail(
    details: SlackMessageInfo,
    client: TenantSocketModeClient,
) -> None:
    respond_in_thread_or_channel(
        client=client.web_client,
        channel=details.channel_to_respond,
        thread_ts=details.msg_to_respond,
        text="Sorry, we weren't able to find anything relevant :cold_sweat:",
    )


def process_message(
    req: SocketModeRequest,
    client: TenantSocketModeClient,
    notify_no_answer: bool = NOTIFY_SLACKBOT_NO_ANSWER,
) -> None:
    tenant_id = get_current_tenant_id()
    if req.type == "events_api":
        event = cast(dict[str, Any], req.payload["event"])
        event_type = event.get("type")
        logger.info(
            f"process_message start: {tenant_id=} {req.type=} {req.envelope_id=} {event_type=}"
        )
    else:
        logger.info(
            f"process_message start: {tenant_id=} {req.type=} {req.envelope_id=}"
        )

    # Throw out requests that can't or shouldn't be handled
    if not prefilter_requests(req, client):
        logger.info(
            f"process_message prefiltered: {tenant_id=} {req.type=} {req.envelope_id=}"
        )
        return

    details = build_request_details(req, client)
    channel = details.channel_to_respond
    channel_name, is_dm = get_channel_name_from_id(
        client=client.web_client, channel_id=channel
    )

    with get_session_with_current_tenant() as db_session:
        slack_channel_config = get_slack_channel_config_for_bot_and_channel(
            db_session=db_session,
            slack_bot_id=client.slack_bot_id,
            channel_name=channel_name,
        )

        follow_up = bool(
            slack_channel_config.channel_config
            and slack_channel_config.channel_config.get("follow_up_tags") is not None
        )

        feedback_reminder_id = schedule_feedback_reminder(
            details=details, client=client.web_client, include_followup=follow_up
        )

        failed = handle_message(
            message_info=details,
            slack_channel_config=slack_channel_config,
            client=client.web_client,
            feedback_reminder_id=feedback_reminder_id,
        )

        if failed:
            if feedback_reminder_id:
                remove_scheduled_feedback_reminder(
                    client=client.web_client,
                    channel=details.sender_id,
                    msg_id=feedback_reminder_id,
                )
            # Skipping answering due to pre-filtering is not considered a failure
            if notify_no_answer:
                apologize_for_fail(details, client)

    logger.info(
        f"process_message finished: success={not failed} {tenant_id=} {req.type=} {req.envelope_id=}"
    )


def acknowledge_message(req: SocketModeRequest, client: TenantSocketModeClient) -> None:
    response = SocketModeResponse(envelope_id=req.envelope_id)
    client.send_socket_mode_response(response)


def action_routing(req: SocketModeRequest, client: TenantSocketModeClient) -> None:
    if actions := req.payload.get("actions"):
        action = cast(dict[str, Any], actions[0])

        if action["action_id"] in [DISLIKE_BLOCK_ACTION_ID, LIKE_BLOCK_ACTION_ID]:
            # AI Answer feedback
            return process_feedback(req, client)
        elif action["action_id"] in [
            SHOW_EVERYONE_ACTION_ID,
            KEEP_TO_YOURSELF_ACTION_ID,
        ]:
            # Publish ephemeral message or keep hidden in main channel
            return handle_publish_ephemeral_message_button(
                req, client, action["action_id"]
            )
        elif action["action_id"] == FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID:
            # Activation of the "source feedback" button
            return handle_doc_feedback_button(req, client)
        elif action["action_id"] == FOLLOWUP_BUTTON_ACTION_ID:
            return handle_followup_button(req, client)
        elif action["action_id"] == IMMEDIATE_RESOLVED_BUTTON_ACTION_ID:
            return handle_followup_resolved_button(req, client, immediate=True)
        elif action["action_id"] == FOLLOWUP_BUTTON_RESOLVED_ACTION_ID:
            return handle_followup_resolved_button(req, client, immediate=False)
        elif action["action_id"] == GENERATE_ANSWER_BUTTON_ACTION_ID:
            return handle_generate_answer_button(req, client)


def view_routing(req: SocketModeRequest, client: TenantSocketModeClient) -> None:
    if view := req.payload.get("view"):
        if view["callback_id"] == VIEW_DOC_FEEDBACK_ID:
            return process_feedback(req, client)


def _extract_channel_from_request(req: SocketModeRequest) -> str | None:
    """Best-effort channel extraction from any Slack request type."""
    if req.type == "events_api":
        return cast(dict[str, Any], req.payload.get("event", {})).get("channel")
    elif req.type == "slash_commands":
        return req.payload.get("channel_id")
    elif req.type == "interactive":
        container = req.payload.get("container", {})
        return container.get("channel_id") or req.payload.get("channel", {}).get("id")
    return None


def _check_tenant_gated(client: TenantSocketModeClient, req: SocketModeRequest) -> bool:
    """Check if the current tenant is gated (suspended or license expired).

    Multi-tenant: checks the gated tenants Redis set (populated by control plane).
    Self-hosted: checks the cached license metadata for expiry.

    Returns True if blocked.
    """
    from onyx.server.settings.models import ApplicationStatus

    # Multi-tenant path: control plane marks gated tenants in Redis
    is_gated: bool = fetch_ee_implementation_or_noop(
        "onyx.server.tenants.product_gating",
        "is_tenant_gated",
        False,
    )(get_current_tenant_id())

    # Self-hosted path: check license metadata cache
    if not is_gated:
        get_cached_metadata = fetch_ee_implementation_or_noop(
            "onyx.db.license",
            "get_cached_license_metadata",
            None,
        )
        metadata = get_cached_metadata()
        if metadata is not None:
            if metadata.status == ApplicationStatus.GATED_ACCESS:
                is_gated = True

    if not is_gated:
        return False

    # Only notify once per user action:
    # - Skip bot messages (avoids feedback loop from our own response)
    # - Skip app_mention events (Slack fires both app_mention AND message
    #   for @mentions; we respond on the message event only)
    event = req.payload.get("event", {}) if req.type == "events_api" else {}
    is_bot_event = bool(
        event.get("bot_id")
        or event.get("bot_profile")
        or event.get("subtype") == "bot_message"
    )
    is_duplicate_mention = event.get("type") == "app_mention"
    if not is_bot_event and not is_duplicate_mention:
        channel = _extract_channel_from_request(req)
        thread_ts = event.get("thread_ts") or event.get("ts")
        if channel:
            respond_in_thread_or_channel(
                client=client.web_client,
                channel=channel,
                thread_ts=thread_ts,
                text=(
                    "Your organization's subscription has expired. Please contact your Onyx administrator to restore access."
                ),
            )
    logger.info(f"Blocked Slack request for gated tenant {get_current_tenant_id()}")
    return True


def create_process_slack_event() -> (
    Callable[[TenantSocketModeClient, SocketModeRequest], None]
):
    def process_slack_event(
        client: TenantSocketModeClient, req: SocketModeRequest
    ) -> None:
        # Always respond right away, if Slack doesn't receive these frequently enough
        # it will assume the Bot is DEAD!!! :(
        acknowledge_message(req, client)

        if _check_tenant_gated(client, req):
            return

        try:
            if req.type == "interactive":
                if req.payload.get("type") == "block_actions":
                    return action_routing(req, client)
                elif req.payload.get("type") == "view_submission":
                    return view_routing(req, client)
            elif req.type == "events_api" or req.type == "slash_commands":
                return process_message(req, client)
        except Exception:
            logger.exception("Failed to process slack event")

    return process_slack_event


def _get_socket_client(
    slack_bot_tokens: SlackBotTokens, tenant_id: str, slack_bot_id: int
) -> TenantSocketModeClient:
    # For more info on how to set this up, checkout the docs:
    # https://docs.onyx.app/admins/getting_started/slack_bot_setup

    # use the retry handlers built into the slack sdk
    connection_error_retry_handler = ConnectionErrorRetryHandler()
    rate_limit_error_retry_handler = RateLimitErrorRetryHandler(max_retry_count=7)
    slack_retry_handlers: list[RetryHandler] = [
        connection_error_retry_handler,
        rate_limit_error_retry_handler,
    ]

    return TenantSocketModeClient(
        # This app-level token will be used only for establishing a connection
        app_token=slack_bot_tokens.app_token,
        web_client=WebClient(
            token=slack_bot_tokens.bot_token, retry_handlers=slack_retry_handlers
        ),
        tenant_id=tenant_id,
        slack_bot_id=slack_bot_id,
    )


if __name__ == "__main__":
    # Initialize the SqlEngine
    SqlEngine.init_engine(pool_size=20, max_overflow=5)

    # Initialize the tenant handler which will manage tenant connections
    logger.info("Starting SlackbotHandler")
    tenant_handler = SlackbotHandler()

    set_is_ee_based_on_env_variable()

    try:
        # Keep the main thread alive
        while tenant_handler.running:
            time.sleep(1)

    except Exception:
        logger.exception("Fatal error in main thread")
        tenant_handler.shutdown(None, None)


================================================
FILE: backend/onyx/onyxbot/slack/models.py
================================================
from enum import Enum
from typing import Literal

from pydantic import BaseModel

from onyx.configs.constants import MessageType


class ChannelType(str, Enum):
    """Slack channel types."""

    IM = "im"  # Direct message
    MPIM = "mpim"  # Multi-person direct message
    PRIVATE_CHANNEL = "private_channel"  # Private channel
    PUBLIC_CHANNEL = "public_channel"  # Public channel
    UNKNOWN = "unknown"  # Unknown channel type


class SlackContext(BaseModel):
    """Context information for Slack bot interactions."""

    channel_type: ChannelType
    channel_id: str
    user_id: str
    message_ts: str | None = None  # Used as request ID for log correlation


class ThreadMessage(BaseModel):
    message: str
    sender: str | None = None
    role: MessageType = MessageType.USER


class SlackMessageInfo(BaseModel):
    thread_messages: list[ThreadMessage]
    channel_to_respond: str
    msg_to_respond: str | None
    thread_to_respond: str | None
    sender_id: str | None
    email: str | None
    bypass_filters: bool  # User has tagged @OnyxBot
    is_slash_command: bool  # User is using /OnyxBot
    is_bot_dm: bool  # User is direct messaging to OnyxBot
    slack_context: SlackContext | None = None


# Models used to encode the relevant data for the ephemeral message actions
class ActionValuesEphemeralMessageMessageInfo(BaseModel):
    bypass_filters: bool | None
    channel_to_respond: str | None
    msg_to_respond: str | None
    email: str | None
    sender_id: str | None
    thread_messages: list[ThreadMessage] | None
    is_slash_command: bool | None
    is_bot_dm: bool | None
    thread_to_respond: str | None


class ActionValuesEphemeralMessageChannelConfig(BaseModel):
    channel_name: str | None
    respond_tag_only: bool | None
    respond_to_bots: bool | None
    is_ephemeral: bool
    respond_member_group_list: list[str] | None
    answer_filters: (
        list[Literal["well_answered_postfilter", "questionmark_prefilter"]] | None
    )
    follow_up_tags: list[str] | None
    show_continue_in_web_ui: bool


class ActionValuesEphemeralMessage(BaseModel):
    original_question_ts: str | None
    feedback_reminder_id: str | None
    chat_message_id: int
    message_info: ActionValuesEphemeralMessageMessageInfo
    channel_conf: ActionValuesEphemeralMessageChannelConfig


================================================
FILE: backend/onyx/onyxbot/slack/utils.py
================================================
import logging
import random
import re
import string
import threading
import time
import uuid
from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from typing import cast

from retry import retry
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_sdk.models.blocks import Block
from slack_sdk.models.blocks import SectionBlock
from slack_sdk.models.metadata import Metadata
from slack_sdk.socket_mode import SocketModeClient

from onyx.configs.app_configs import DISABLE_TELEMETRY
from onyx.configs.constants import ID_SEPARATOR
from onyx.configs.constants import MessageType
from onyx.configs.onyxbot_configs import ONYX_BOT_FEEDBACK_VISIBILITY
from onyx.configs.onyxbot_configs import ONYX_BOT_MAX_QPM
from onyx.configs.onyxbot_configs import ONYX_BOT_MAX_WAIT_TIME
from onyx.configs.onyxbot_configs import ONYX_BOT_NUM_RETRIES
from onyx.configs.onyxbot_configs import (
    ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD,
)
from onyx.configs.onyxbot_configs import (
    ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS,
)
from onyx.connectors.slack.utils import SlackTextCleaner
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.users import get_user_by_email
from onyx.onyxbot.slack.constants import FeedbackVisibility
from onyx.onyxbot.slack.models import ChannelType
from onyx.onyxbot.slack.models import ThreadMessage
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
from onyx.utils.text_processing import replace_whitespaces_w_space
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()

slack_token_user_ids: dict[str, str | None] = {}
slack_token_bot_ids: dict[str, str | None] = {}
slack_token_lock = threading.Lock()

_ONYX_BOT_MESSAGE_COUNT: int = 0
_ONYX_BOT_COUNT_START_TIME: float = time.time()


def get_onyx_bot_auth_ids(
    tenant_id: str, web_client: WebClient
) -> tuple[str | None, str | None]:
    """Returns a tuple of user_id and bot_id."""

    user_id: str | None
    bot_id: str | None

    global slack_token_user_ids
    global slack_token_bot_ids

    with slack_token_lock:
        user_id = slack_token_user_ids.get(tenant_id)
        bot_id = slack_token_bot_ids.get(tenant_id)

    if user_id is None or bot_id is None:
        response = web_client.auth_test()
        user_id = response.get("user_id")
        bot_id = response.get("bot_id")
        with slack_token_lock:
            slack_token_user_ids[tenant_id] = user_id
            slack_token_bot_ids[tenant_id] = bot_id

    return user_id, bot_id


def get_channel_type_from_id(web_client: WebClient, channel_id: str) -> ChannelType:
    """
    Get the channel type from a channel ID using Slack API.
    Returns: ChannelType enum value
    """
    try:
        channel_info = web_client.conversations_info(channel=channel_id)
        if channel_info.get("ok") and channel_info.get("channel"):
            channel: dict[str, Any] = channel_info.get("channel", {})

            if channel.get("is_im"):
                return ChannelType.IM  # Direct message
            elif channel.get("is_mpim"):
                return ChannelType.MPIM  # Multi-person direct message
            elif channel.get("is_private"):
                return ChannelType.PRIVATE_CHANNEL  # Private channel
            elif channel.get("is_channel"):
                return ChannelType.PUBLIC_CHANNEL  # Public channel
            else:
                logger.warning(
                    f"Could not determine channel type for {channel_id}, defaulting to unknown"
                )
                return ChannelType.UNKNOWN
        else:
            logger.warning(f"Invalid channel info response for {channel_id}")
            return ChannelType.UNKNOWN
    except Exception as e:
        logger.warning(
            f"Error getting channel info for {channel_id}, defaulting to unknown: {e}"
        )
        return ChannelType.UNKNOWN


def check_message_limit() -> bool:
    """
    This isnt a perfect solution.
    High traffic at the end of one period and start of another could cause
    the limit to be exceeded.
    """
    if ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD <= 0:
        return True
    global _ONYX_BOT_MESSAGE_COUNT
    global _ONYX_BOT_COUNT_START_TIME
    time_since_start = time.time() - _ONYX_BOT_COUNT_START_TIME
    if time_since_start > ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS:
        _ONYX_BOT_MESSAGE_COUNT = 0
        _ONYX_BOT_COUNT_START_TIME = time.time()
    if (_ONYX_BOT_MESSAGE_COUNT + 1) > ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD:
        logger.error(
            f"OnyxBot has reached the message limit {ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD}"
            f" for the time period {ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS} seconds."
            " These limits are configurable in backend/onyx/configs/onyxbot_configs.py"
        )
        return False
    _ONYX_BOT_MESSAGE_COUNT += 1
    return True


def update_emote_react(
    emoji: str,
    channel: str,
    message_ts: str | None,
    remove: bool,
    client: WebClient,
) -> None:
    if not message_ts:
        action = "remove" if remove else "add"
        logger.error(f"update_emote_react - no message specified: {channel=} {action=}")
        return

    if remove:
        try:
            client.reactions_remove(
                name=emoji,
                channel=channel,
                timestamp=message_ts,
            )
        except SlackApiError as e:
            logger.error(f"Failed to remove Reaction due to: {e}")

        return

    try:
        client.reactions_add(
            name=emoji,
            channel=channel,
            timestamp=message_ts,
        )
    except SlackApiError as e:
        logger.error(f"Was not able to react to user message due to: {e}")

    return


def remove_onyx_bot_tag(tenant_id: str, message_str: str, client: WebClient) -> str:
    bot_token_user_id, _ = get_onyx_bot_auth_ids(tenant_id, web_client=client)
    return re.sub(rf"<@{bot_token_user_id}>\s*", "", message_str)


def _check_for_url_in_block(block: Block) -> bool:
    """
    Check if the block has a key that contains "url" in it
    """
    block_dict = block.to_dict()

    def check_dict_for_url(d: dict) -> bool:
        for key, value in d.items():
            if "url" in key.lower():
                return True
            if isinstance(value, dict):
                if check_dict_for_url(value):
                    return True
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict) and check_dict_for_url(item):
                        return True
        return False

    return check_dict_for_url(block_dict)


def _build_error_block(error_message: str) -> Block:
    """
    Build an error block to display in slack so that the user can see
    the error without completely breaking
    """
    display_text = (
        "There was an error displaying all of the Onyx answers."
        f" Please let an admin or an onyx developer know. Error: {error_message}"
    )
    return SectionBlock(text=display_text)


@retry(
    tries=ONYX_BOT_NUM_RETRIES,
    delay=0.25,
    backoff=2,
    logger=cast(logging.Logger, logger),
)
def respond_in_thread_or_channel(
    client: WebClient,
    channel: str,
    thread_ts: str | None,
    text: str | None = None,
    blocks: list[Block] | None = None,
    receiver_ids: list[str] | None = None,
    metadata: Metadata | None = None,
    unfurl: bool = True,
    send_as_ephemeral: bool | None = True,  # noqa: ARG001
) -> list[str]:
    if not text and not blocks:
        raise ValueError("One of `text` or `blocks` must be provided")

    message_ids: list[str] = []
    if not receiver_ids:
        try:
            response = client.chat_postMessage(
                channel=channel,
                text=text,
                blocks=blocks,
                thread_ts=thread_ts,
                metadata=metadata,
                unfurl_links=unfurl,
                unfurl_media=unfurl,
            )
        except Exception as e:
            blocks_str = str(blocks)[:1024]  # truncate block logging
            logger.warning(f"Failed to post message: {e} \n blocks: {blocks_str}")
            logger.warning("Trying again without blocks that have urls")

            if not blocks:
                raise e

            blocks_without_urls = [
                block for block in blocks if not _check_for_url_in_block(block)
            ]
            blocks_without_urls.append(_build_error_block(str(e)))

            # Try again wtihout blocks containing url
            response = client.chat_postMessage(
                channel=channel,
                text=text,
                blocks=blocks_without_urls,
                thread_ts=thread_ts,
                metadata=metadata,
                unfurl_links=unfurl,
                unfurl_media=unfurl,
            )

        message_ids.append(response["message_ts"])
    else:
        for receiver in receiver_ids:
            try:
                response = client.chat_postEphemeral(
                    channel=channel,
                    user=receiver,
                    text=text,
                    blocks=blocks,
                    thread_ts=thread_ts,
                    metadata=metadata,
                    unfurl_links=unfurl,
                    unfurl_media=unfurl,
                )
            except Exception as e:
                blocks_str = str(blocks)[:1024]  # truncate block logging
                logger.warning(f"Failed to post message: {e} \n blocks: {blocks_str}")
                logger.warning("Trying again without blocks that have urls")

                if not blocks:
                    raise e

                blocks_without_urls = [
                    block for block in blocks if not _check_for_url_in_block(block)
                ]
                blocks_without_urls.append(_build_error_block(str(e)))

                # Try again wtihout blocks containing url
                response = client.chat_postEphemeral(
                    channel=channel,
                    user=receiver,
                    text=text,
                    blocks=blocks_without_urls,
                    thread_ts=thread_ts,
                    metadata=metadata,
                    unfurl_links=unfurl,
                    unfurl_media=unfurl,
                )

            message_ids.append(response["message_ts"])

    return message_ids


def build_feedback_id(
    message_id: int,
    document_id: str | None = None,
    document_rank: int | None = None,
) -> str:
    unique_prefix = "".join(random.choice(string.ascii_letters) for _ in range(10))
    if document_id is not None:
        if not document_id or document_rank is None:
            raise ValueError("Invalid document, missing information")
        if ID_SEPARATOR in document_id:
            raise ValueError(
                "Separator pattern should not already exist in document id"
            )
        feedback_id = ID_SEPARATOR.join(
            [str(message_id), document_id, str(document_rank)]
        )
    else:
        feedback_id = str(message_id)

    return unique_prefix + ID_SEPARATOR + feedback_id


def build_publish_ephemeral_message_id(
    original_question_ts: str,
) -> str:
    return "publish_ephemeral_message__" + original_question_ts


def build_continue_in_web_ui_id(
    message_id: int,
) -> str:
    unique_prefix = str(uuid.uuid4())[:10]
    return unique_prefix + ID_SEPARATOR + str(message_id)


def decompose_action_id(feedback_id: str) -> tuple[int, str | None, int | None]:
    """Decompose into query_id, document_id, document_rank, see above function"""
    try:
        components = feedback_id.split(ID_SEPARATOR)
        if len(components) != 2 and len(components) != 4:
            raise ValueError("Feedback ID does not contain right number of elements")

        if len(components) == 2:
            return int(components[-1]), None, None

        return int(components[1]), components[2], int(components[3])

    except Exception as e:
        logger.error(e)
        raise ValueError("Received invalid Feedback Identifier")


def get_view_values(state_values: dict[str, Any]) -> dict[str, str]:
    """Extract view values

    Args:
        state_values (dict): The Slack view-submission values

    Returns:
        dict: keys/values of the view state content
    """
    view_values = {}
    for _, view_data in state_values.items():
        for k, v in view_data.items():
            if (
                "selected_option" in v
                and isinstance(v["selected_option"], dict)
                and "value" in v["selected_option"]
            ):
                view_values[k] = v["selected_option"]["value"]
            elif "selected_options" in v and isinstance(v["selected_options"], list):
                view_values[k] = [
                    x["value"] for x in v["selected_options"] if "value" in x
                ]
            elif "selected_date" in v:
                view_values[k] = v["selected_date"]
            elif "value" in v:
                view_values[k] = v["value"]
    return view_values


def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str:
    def _replace_highlight(s: str) -> str:
        s = re.sub(r"(?<=[^\s])<hi>(.*?)</hi>", r"\1", s)
        s = s.replace("</hi>", "*").replace("<hi>", "*")
        return s

    final_matches = [
        replace_whitespaces_w_space(_replace_highlight(match_str)).strip()
        for match_str in match_strs
        if match_str
    ]
    combined = "... ".join(final_matches)

    # Slack introduces "Show More" after 300 on desktop which is ugly
    # But don't trim the message if there is still a highlight after 300 chars
    remaining = 300 - used_chars
    if len(combined) > remaining and "*" not in combined[remaining:]:
        combined = combined[: remaining - 3] + "..."

    return combined


def remove_slack_text_interactions(slack_str: str) -> str:
    slack_str = SlackTextCleaner.replace_tags_basic(slack_str)
    slack_str = SlackTextCleaner.replace_channels_basic(slack_str)
    slack_str = SlackTextCleaner.replace_special_mentions(slack_str)
    slack_str = SlackTextCleaner.replace_special_catchall(slack_str)
    slack_str = SlackTextCleaner.add_zero_width_whitespace_after_tag(slack_str)
    return slack_str


def get_channel_from_id(client: WebClient, channel_id: str) -> dict[str, Any]:
    response = client.conversations_info(channel=channel_id)
    response.validate()
    return response["channel"]


def get_channel_name_from_id(
    client: WebClient, channel_id: str
) -> tuple[str | None, bool]:
    try:
        channel_info = get_channel_from_id(client, channel_id)
        name = channel_info.get("name")
        is_dm = any([channel_info.get("is_im"), channel_info.get("is_mpim")])
        return name, is_dm
    except SlackApiError as e:
        logger.exception(f"Couldn't fetch channel name from id: {channel_id}")
        raise e


def fetch_slack_user_ids_from_emails(
    user_emails: list[str], client: WebClient
) -> tuple[list[str], list[str]]:
    user_ids: list[str] = []
    failed_to_find: list[str] = []
    for email in user_emails:
        try:
            user = client.users_lookupByEmail(email=email)
            user_ids.append(user.data["user"]["id"])  # type: ignore
        except Exception:
            logger.error(f"Was not able to find slack user by email: {email}")
            failed_to_find.append(email)

    return user_ids, failed_to_find


def fetch_user_ids_from_groups(
    given_names: list[str], client: WebClient
) -> tuple[list[str], list[str]]:
    user_ids: list[str] = []
    failed_to_find: list[str] = []
    try:
        response = client.usergroups_list()
        if not isinstance(response.data, dict):
            logger.error("Error fetching user groups")
            return user_ids, given_names

        all_group_data = response.data.get("usergroups", [])
        name_id_map = {d["name"]: d["id"] for d in all_group_data}
        handle_id_map = {d["handle"]: d["id"] for d in all_group_data}
        for given_name in given_names:
            group_id = name_id_map.get(given_name) or handle_id_map.get(
                given_name.lstrip("@")
            )
            if not group_id:
                failed_to_find.append(given_name)
                continue
            try:
                response = client.usergroups_users_list(usergroup=group_id)
                if isinstance(response.data, dict):
                    user_ids.extend(response.data.get("users", []))
                else:
                    failed_to_find.append(given_name)
            except Exception as e:
                logger.error(f"Error fetching user group ids: {str(e)}")
                failed_to_find.append(given_name)
    except Exception as e:
        logger.error(f"Error fetching user groups: {str(e)}")
        failed_to_find = given_names

    return user_ids, failed_to_find


def fetch_group_ids_from_names(
    given_names: list[str], client: WebClient
) -> tuple[list[str], list[str]]:
    group_data: list[str] = []
    failed_to_find: list[str] = []

    try:
        response = client.usergroups_list()
        if not isinstance(response.data, dict):
            logger.error("Error fetching user groups")
            return group_data, given_names

        all_group_data = response.data.get("usergroups", [])

        name_id_map = {d["name"]: d["id"] for d in all_group_data}
        handle_id_map = {d["handle"]: d["id"] for d in all_group_data}

        for given_name in given_names:
            id = handle_id_map.get(given_name.lstrip("@"))
            id = id or name_id_map.get(given_name)
            if id:
                group_data.append(id)
            else:
                failed_to_find.append(given_name)
    except Exception as e:
        failed_to_find = given_names
        logger.error(f"Error fetching user groups: {str(e)}")

    return group_data, failed_to_find


def fetch_user_semantic_id_from_id(
    user_id: str | None, client: WebClient
) -> str | None:
    if not user_id:
        return None

    response = client.users_info(user=user_id)
    if not response["ok"]:
        return None

    user: dict = cast(dict[Any, dict], response.data).get("user", {})

    return (
        user.get("real_name")
        or user.get("name")
        or user.get("profile", {}).get("email")
    )


def read_slack_thread(
    tenant_id: str, channel: str, thread: str, client: WebClient
) -> list[ThreadMessage]:
    thread_messages: list[ThreadMessage] = []
    response = client.conversations_replies(channel=channel, ts=thread)
    replies = cast(dict, response.data).get("messages", [])
    for reply in replies:
        if "user" in reply and "bot_id" not in reply:
            message = reply["text"]
            user_sem_id = (
                fetch_user_semantic_id_from_id(reply.get("user"), client)
                or "Unknown User"
            )
            message_type = MessageType.USER
        else:
            blocks: Any
            is_onyx_bot_response = False

            reply_user = reply.get("user")
            reply_bot_id = reply.get("bot_id")

            self_slack_bot_user_id, self_slack_bot_bot_id = get_onyx_bot_auth_ids(
                tenant_id, client
            )
            if reply_user is not None and reply_user == self_slack_bot_user_id:
                is_onyx_bot_response = True

            if reply_bot_id is not None and reply_bot_id == self_slack_bot_bot_id:
                is_onyx_bot_response = True

            if is_onyx_bot_response:
                # OnyxBot response
                message_type = MessageType.ASSISTANT
                user_sem_id = "Assistant"

                # OnyxBot responses have both text and blocks
                # The useful content is in the blocks, specifically the first block unless there are
                # auto-detected filters
                blocks = reply.get("blocks")
                if not blocks:
                    logger.warning(f"OnyxBot response has no blocks: {reply}")
                    continue

                message = blocks[0].get("text", {}).get("text")

                # If auto-detected filters are on, use the second block for the actual answer
                # The first block is the auto-detected filters
                if message is not None and message.startswith("_Filters"):
                    if len(blocks) < 2:
                        logger.warning(f"Only filter blocks found: {reply}")
                        continue
                    # This is the OnyxBot answer format, if there is a change to how we respond,
                    # this will need to be updated to get the correct "answer" portion
                    message = reply["blocks"][1].get("text", {}).get("text")
            else:
                # Other bots are not counted as the LLM response which only comes from Onyx
                message_type = MessageType.USER
                bot_user_name = fetch_user_semantic_id_from_id(
                    reply.get("user"), client
                )
                user_sem_id = bot_user_name or "Unknown" + " Bot"

                # For other bots, just use the text as we have no way of knowing that the
                # useful portion is
                message = reply.get("text")
                if not message:
                    message = blocks[0].get("text", {}).get("text")

            if not message:
                logger.warning("Skipping Slack thread message, no text found")
                continue

        message = remove_onyx_bot_tag(tenant_id, message, client=client)
        thread_messages.append(
            ThreadMessage(message=message, sender=user_sem_id, role=message_type)
        )

    return thread_messages


def slack_usage_report(action: str, sender_id: str | None, client: WebClient) -> None:
    if DISABLE_TELEMETRY:
        return

    onyx_user = None
    sender_email = None
    try:
        sender_email = client.users_info(user=sender_id).data["user"]["profile"]["email"]  # type: ignore
    except Exception:
        logger.warning("Unable to find sender email")

    if sender_email is not None:
        with get_session_with_current_tenant() as db_session:
            onyx_user = get_user_by_email(email=sender_email, db_session=db_session)

    optional_telemetry(
        record_type=RecordType.USAGE,
        data={"action": action},
        user_id=str(onyx_user.id) if onyx_user else "Non-Onyx-Or-No-Auth-User",
    )


class SlackRateLimiter:
    def __init__(self) -> None:
        self.max_qpm: int | None = ONYX_BOT_MAX_QPM
        self.max_wait_time = ONYX_BOT_MAX_WAIT_TIME
        self.active_question = 0
        self.last_reset_time = time.time()
        self.waiting_questions: list[int] = []

    def refill(self) -> None:
        # If elapsed time is greater than the period, reset the active question count
        if (time.time() - self.last_reset_time) > 60:
            self.active_question = 0
            self.last_reset_time = time.time()

    def notify(
        self, client: WebClient, channel: str, position: int, thread_ts: str | None
    ) -> None:
        respond_in_thread_or_channel(
            client=client,
            channel=channel,
            receiver_ids=None,
            text=f"Your question has been queued. You are in position {position}.\nPlease wait a moment :hourglass_flowing_sand:",
            thread_ts=thread_ts,
        )

    def is_available(self) -> bool:
        if self.max_qpm is None:
            return True

        self.refill()
        return self.active_question < self.max_qpm

    def acquire_slot(self) -> None:
        self.active_question += 1

    def init_waiter(self) -> tuple[int, int]:
        func_randid = random.getrandbits(128)
        self.waiting_questions.append(func_randid)
        position = self.waiting_questions.index(func_randid) + 1

        return func_randid, position

    def waiter(self, func_randid: int) -> None:
        if self.max_qpm is None:
            return

        wait_time = 0
        while (
            self.active_question >= self.max_qpm
            or self.waiting_questions[0] != func_randid
        ):
            if wait_time > self.max_wait_time:
                raise TimeoutError
            time.sleep(2)
            wait_time += 2
            self.refill()

        del self.waiting_questions[0]


def get_feedback_visibility() -> FeedbackVisibility:
    try:
        return FeedbackVisibility(ONYX_BOT_FEEDBACK_VISIBILITY.lower())
    except ValueError:
        return FeedbackVisibility.PRIVATE


class TenantSocketModeClient(SocketModeClient):
    def __init__(self, tenant_id: str, slack_bot_id: int, *args: Any, **kwargs: Any):
        super().__init__(*args, **kwargs)
        self._tenant_id = tenant_id
        self.slack_bot_id = slack_bot_id
        self.bot_name: str = "Unnamed"

    @contextmanager
    def _set_tenant_context(self) -> Generator[None, None, None]:
        token = None
        try:
            if self._tenant_id:
                token = CURRENT_TENANT_ID_CONTEXTVAR.set(self._tenant_id)
            yield
        finally:
            if token:
                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

    def enqueue_message(self, message: str) -> None:
        with self._set_tenant_context():
            super().enqueue_message(message)

    def process_message(self) -> None:
        with self._set_tenant_context():
            super().process_message()

    def run_message_listeners(self, message: dict, raw_message: str) -> None:
        with self._set_tenant_context():
            super().run_message_listeners(message, raw_message)


================================================
FILE: backend/onyx/prompts/__init__.py
================================================


================================================
FILE: backend/onyx/prompts/basic_memory.py
================================================
# ruff: noqa: E501, W605 start

# Note that the user_basic_information is only included if we have at least 1 of the following: user_name, user_email, user_role
# This is included because sometimes we need to know the user's name or basic info to best generate the memory.
FULL_MEMORY_UPDATE_PROMPT = """
You are a memory update agent that helps the user add or update memories. You are given a list of existing memories and a new memory to add. \
Just as context, you are also given the last few user messages from the conversation which generated the new memory. You must determine if the memory is brand new or if it is related to an existing memory. \
If the new memory is an update to an existing memory or contradicts an existing memory, it should be treated as an update and you should reference the existing memory by memory_id (see below). \
The memory should omit the user's name and direct reference to the user - for example, a memory like "Yuhong prefers dark mode." should be modified to "Prefers dark mode." (if the user's name is Yuhong).

# Truncated chat history
{chat_history}{user_basic_information}

# User's existing memories
{existing_memories}

# New memory the user wants to insert
{new_memory}

# Response Style
You MUST respond in a json which follows the following format and keys:
```json
{{
    "operation": "add or update",
    "memory_id": "if the operation is update, the id of the memory to update, otherwise null",
    "memory_text": "the text of the memory to add or update"
}}
```
""".strip()
# ruff: noqa: E501, W605 end

MEMORY_USER_BASIC_INFORMATION_PROMPT = """

# User Basic Information
User name: {user_name}
User email: {user_email}
User role: {user_role}
"""


================================================
FILE: backend/onyx/prompts/chat_prompts.py
================================================
# ruff: noqa: E501, W605 start

from onyx.prompts.constants import REMINDER_TAG_NO_HEADER


DATETIME_REPLACEMENT_PAT = "{{CURRENT_DATETIME}}"
CITATION_GUIDANCE_REPLACEMENT_PAT = "{{CITATION_GUIDANCE}}"
REMINDER_TAG_REPLACEMENT_PAT = "{{REMINDER_TAG_DESCRIPTION}}"


# Note this uses a string pattern replacement so the user can also include it in their custom prompts. Keeps the replacement logic simple
# This is editable by the user in the admin UI.
# The first line is intended to help guide the general feel/behavior of the system.
DEFAULT_SYSTEM_PROMPT = f"""
You are an expert assistant who is truthful, nuanced, insightful, and efficient. \
Your goal is to deeply understand the user's intent, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. \
Whenever there is any ambiguity around the user's query (or more information would be helpful), you use available tools (if any) to get more context.

The current date is {DATETIME_REPLACEMENT_PAT}.{CITATION_GUIDANCE_REPLACEMENT_PAT}

# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.

{REMINDER_TAG_REPLACEMENT_PAT}
""".lstrip()


COMPANY_NAME_BLOCK = """
The user is at an organization called `{company_name}`.
"""

COMPANY_DESCRIPTION_BLOCK = """
Organization description: {company_description}
"""

# This is added to the system prompt prior to the tools section and is applied only if search tools have been run
REQUIRE_CITATION_GUIDANCE = """

CRITICAL: If referencing knowledge from searches, cite relevant statements INLINE using the format [1], [2], [3], etc. to reference the "document" field. \
DO NOT provide any links following the citations. Cite inline as opposed to leaving all citations until the very end of the response.
"""


# Reminder message if any search tool has been run anytime in the chat turn
CITATION_REMINDER = """
Remember to provide inline citations in the format [1], [2], [3], etc. based on the "document" field of the documents.
""".strip()

LAST_CYCLE_CITATION_REMINDER = """
You are on your last cycle and no longer have any tool calls available. You must answer the query now to the best of your ability.
""".strip()


# Reminder message that replaces the usual reminder if web_search was the last tool call
OPEN_URL_REMINDER = """
Remember that after using web_search, you are encouraged to open some pages to get more context unless the query is completely answered by the snippets.
Open the pages that look the most promising and high quality by calling the open_url tool with an array of URLs. Open as many as you want.

If you do have enough to answer, remember to provide INLINE citations using the "document" field in the format [1], [2], [3], etc.
""".strip()


IMAGE_GEN_REMINDER = """
Very briefly describe the image(s) generated. Do not include any links or attachments.
""".strip()


FILE_REMINDER = """
Your code execution generated file(s) with download links.
If you reference or share these files, use the exact markdown format [filename](file_link) with the file_link from the execution result.
""".strip()


# Specifically for OpenAI models, this prefix needs to be in place for the model to output markdown and correct styling
CODE_BLOCK_MARKDOWN = "Formatting re-enabled. "

# This is just for Slack context today
ADDITIONAL_CONTEXT_PROMPT = """
Here is some additional context which may be relevant to the user query:

{additional_context}
""".strip()


TOOL_CALL_RESPONSE_CROSS_MESSAGE = """
This tool call completed but the results are no longer accessible.
""".strip()

# This is used to add the current date and time to the prompt in the case where the Agent should be aware of the current
# date and time but the replacement pattern is not present in the prompt.
ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."


CHAT_NAMING_SYSTEM_PROMPT = f"""
Given the conversation history, provide a SHORT name for the conversation. Focus the name on the important keywords to convey the topic of the conversation. \
Make sure the name is in the same language as the user's first message.

{REMINDER_TAG_NO_HEADER}

IMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. NEVER USE MORE THAN 5 WORDS, LESS IS FINE.
""".strip()


CHAT_NAMING_REMINDER = """
Provide a short name for the conversation. Refer to other messages in the conversation (not including this one) to determine the language of the name.

IMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. NEVER USE MORE THAN 5 WORDS, LESS IS FINE.
""".strip()
# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/prompts/chat_tools.py
================================================
# These prompts are to support tool calling. Currently not used in the main flow or via any configs
# The current generation of LLM is too unreliable for this task.
# Onyx retrieval call as a tool option
DANSWER_TOOL_NAME = "Current Search"
DANSWER_TOOL_DESCRIPTION = "A search tool that can find information on any topic including up to date and proprietary knowledge."


# Tool calling format inspired from LangChain
TOOL_TEMPLATE = """
TOOLS
------
You can use tools to look up information that may be helpful in answering the user's \
original question. The available tools are:

{tool_overviews}

RESPONSE FORMAT INSTRUCTIONS
----------------------------
When responding to me, please output a response in one of two formats:

**Option 1:**
Use this if you want to use a tool. Markdown code snippet formatted in the following schema:

```json
{{
    "action": string, \\ The action to take. {tool_names}
    "action_input": string \\ The input to the action
}}
```

**Option #2:**
Use this if you want to respond directly to the user. Markdown code snippet formatted in the following schema:

```json
{{
    "action": "Final Answer",
    "action_input": string \\ You should put what you want to return to use here
}}
```
"""

# For the case where the user has not configured any tools to call, but still using the tool-flow
# expected format
TOOL_LESS_PROMPT = """
Respond with a markdown code snippet in the following schema:

```json
{{
    "action": "Final Answer",
    "action_input": string \\ You should put what you want to return to use here
}}
```
"""


# Second part of the prompt to include the user query
USER_INPUT = """
USER'S INPUT
--------------------
Here is the user's input \
(remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else):

{user_input}
"""


# After the tool call, this is the following message to get a final answer
# Tools are not chained currently, the system must provide an answer after calling a tool
TOOL_FOLLOWUP = """
TOOL RESPONSE:
---------------------
{tool_output}

USER'S INPUT
--------------------
Okay, so what is the response to my last comment? If using information obtained from the tools you must \
mention it explicitly without mentioning the tool names - I have forgotten all TOOL RESPONSES!
If the tool response is not useful, ignore it completely.
{optional_reminder}{hint}
IMPORTANT! You MUST respond with a markdown code snippet of a json blob with a single action, and NOTHING else.
"""


# If no tools were used, but retrieval is enabled, then follow up with this message to get the final answer
TOOL_LESS_FOLLOWUP = """
Refer to the following documents when responding to my final query. Ignore any documents that are not relevant.

CONTEXT DOCUMENTS:
---------------------
{context_str}

FINAL QUERY:
--------------------
{user_query}

{hint_text}
"""


================================================
FILE: backend/onyx/prompts/compression_prompts.py
================================================
# Prompts for chat history compression via summarization.

# ruff: noqa: E501, W605 start
# Cutoff marker helps the LLM focus on summarizing only messages before this point.
# This improves "needle in haystack" accuracy by explicitly marking where to stop with an exact pattern which is also placed in locations easily attended to by the LLM (last user message and system prompt).
CONTEXT_CUTOFF_START_MARKER = "<context_cutoff>"
CONTEXT_CUTOFF_END_MARKER = "</context_cutoff>"

SUMMARIZATION_CUTOFF_MARKER = f"{CONTEXT_CUTOFF_START_MARKER} Stop summarizing the rest of the conversation past this point. {CONTEXT_CUTOFF_END_MARKER}"

SUMMARIZATION_PROMPT = f"""
You are a summarization system. Your task is to produce a detailed and accurate summary of a chat conversation up to a specified cutoff message. The cutoff will be marked by the string {CONTEXT_CUTOFF_START_MARKER}. \
IMPORTANT: Do not explicitly mention anything about the cutoff in your response. Do not situate the summary with respect to the cutoff. The context cutoff is only a system injected marker.

# Guidelines
- Only consider messages that occur at or before the cutoff point. Use the messages after it purely as context without including any of it in the summary.
- Preserve factual correctness and intent; do not infer or speculate.
- The summary should be information dense and detailed.
- The summary should be in paragraph format and long enough to capture all of the most prominent details.

# Focus on
- Key topics discussed.
- Decisions made, tools used, and conclusions reached.
- Open questions or unresolved items.
- Important constraints, preferences, or assumptions stated.
- Omit small talk, repetition, and stylistic filler unless it affects meaning.
""".strip()

PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK = """

# Existing summary
There is a previous summary of the conversation. Build on top of this when constructing the new overall summary of the conversation:
{previous_summary}
""".rstrip()

USER_REMINDER = f"Help summarize the conversation up to the cutoff point (do not mention anything related to the cutoff directly in your response). It should be a long form summary of the conversation up to the cutoff point as marked by {CONTEXT_CUTOFF_START_MARKER}. Be thorough."

PROGRESSIVE_USER_REMINDER = f"Update the existing summary by incorporating the new messages up to the cutoff point as marked by {CONTEXT_CUTOFF_START_MARKER} (do not mention anything related to the cutoff directly in your response). Be thorough and maintain the long form summary format."
# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/prompts/constants.py
================================================
# ruff: noqa: E501, W605 start
CODE_BLOCK_PAT = "```\n{}\n```"
TRIPLE_BACKTICK = "```"
SYSTEM_REMINDER_TAG_OPEN = "<system-reminder>"
SYSTEM_REMINDER_TAG_CLOSE = "</system-reminder>"

# Tags format inspired by Anthropic and OpenCode
REMINDER_TAG_NO_HEADER = f"""
User messages may include {SYSTEM_REMINDER_TAG_OPEN} and {SYSTEM_REMINDER_TAG_CLOSE} tags. These {SYSTEM_REMINDER_TAG_OPEN} tags contain useful information and reminders. \
They are automatically added by the system and are not actual user inputs. Behave in accordance to these instructions if relevant, and continue normally if they are not.
""".strip()

REMINDER_TAG_DESCRIPTION = f"""
# System Reminders
{REMINDER_TAG_NO_HEADER}
""".strip()
# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/prompts/contextual_retrieval.py
================================================
# NOTE: the prompt separation is partially done for efficiency; previously I tried
# to do it all in one prompt with sequential format() calls but this will cause a backend
# error when the document contains any {} as python will expect the {} to be filled by
# format() arguments

# ruff: noqa: E501, W605 start
CONTEXTUAL_RAG_PROMPT1 = """<document>
{document}
</document>
Here is the chunk we want to situate within the whole document"""

CONTEXTUAL_RAG_PROMPT2 = """<chunk>
{chunk}
</chunk>
Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else.
""".rstrip()

CONTEXTUAL_RAG_TOKEN_ESTIMATE = 64  # 19 + 45

DOCUMENT_SUMMARY_PROMPT = """<document>
{document}
</document>
Please give a short succinct summary of the entire document. Answer only with the succinct summary and nothing else.
""".rstrip()

DOCUMENT_SUMMARY_TOKEN_ESTIMATE = 50
# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/prompts/deep_research/__init__.py
================================================


================================================
FILE: backend/onyx/prompts/deep_research/dr_tool_prompts.py
================================================
GENERATE_PLAN_TOOL_NAME = "generate_plan"


GENERATE_REPORT_TOOL_NAME = "generate_report"


RESEARCH_AGENT_TOOL_NAME = "research_agent"


# This is to ensure that even the non-reasoning models can have an ok time with this more complex flow.
THINK_TOOL_NAME = "think_tool"


# ruff: noqa: E501, W605 start

# Hard for the open_url tool to be called for a ton of search results all at once so limit to 3
WEB_SEARCH_TOOL_DESCRIPTION = """

## web_search
Use the `web_search` tool to get search results from the web. You should use this tool to get context for your research. These should be optimized for search engines like Google. \
Use concise and specific queries and avoid merging multiple queries into one. You can call web_search with multiple queries at once (3 max) but generally only do this when there is a clear opportunity for parallel searching. \
If you use multiple queries, ensure that the queries are related in topic but not similar such that the results would be redundant.
"""

# This one is mostly similar to the one for the main flow but there won't be any user specified URLs to open.
OPEN_URLS_TOOL_DESCRIPTION = f"""

## open_urls
Use the `open_urls` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches. \
You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources. \
You should almost always use open_urls after a web_search call and sometimes after reasoning with the {THINK_TOOL_NAME} tool.
"""

OPEN_URLS_TOOL_DESCRIPTION_REASONING = """

## open_urls
Use the `open_urls` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches. \
You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources. \
You should almost always use open_urls after a web_search call.
"""

# NOTE: Internal search tool uses the same description as the default flow, not duplicating here.

# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/prompts/deep_research/orchestration_layer.py
================================================
from onyx.prompts.deep_research.dr_tool_prompts import GENERATE_PLAN_TOOL_NAME
from onyx.prompts.deep_research.dr_tool_prompts import GENERATE_REPORT_TOOL_NAME
from onyx.prompts.deep_research.dr_tool_prompts import RESEARCH_AGENT_TOOL_NAME
from onyx.prompts.deep_research.dr_tool_prompts import THINK_TOOL_NAME


# ruff: noqa: E501, W605 start
CLARIFICATION_PROMPT = f"""
You are a clarification agent that runs prior to deep research. Assess whether you need to ask clarifying questions, or if the user has already provided enough information for you to start research. \
CRITICAL - Never directly answer the user's query, you must only ask clarifying questions or call the `{GENERATE_PLAN_TOOL_NAME}` tool.

If the user query is already very detailed or lengthy (more than 3 sentences), do not ask for clarification and instead call the `{GENERATE_PLAN_TOOL_NAME}` tool.

For context, the date is {{current_datetime}}.

Be conversational and friendly, prefer saying "could you" rather than "I need" etc.

If you need to ask questions, follow these guidelines:
- Be concise and do not ask more than 5 questions.
- If there are ambiguous terms or questions, ask the user to clarify.
- Your questions should be a numbered list for clarity.
- Respond in the same language as the user's query.
- Make sure to gather all the information needed to carry out the research task in a concise, well-structured manner.{{internal_search_clarification_guidance}}
- Wrap up with a quick sentence on what the clarification will help with, it's ok to reference the user query closely here.
""".strip()


INTERNAL_SEARCH_CLARIFICATION_GUIDANCE = """
- The deep research system is connected with organization internal document search and web search capabilities. In cases where it is unclear which source is more appropriate, ask the user to clarify.
"""

# Here there is a bit of combating model behavior which during alignment may be overly tuned to be cautious about access to data and feasibility.
# Sometimes the model will just apologize and claim the task is not possible, hence the long section following CRITICAL.
RESEARCH_PLAN_PROMPT = """
You are a research planner agent that generates the high level approach for deep research on a user query. Analyze the query carefully and break it down into main concepts and areas of exploration. \
Stick closely to the user query and stay on topic but be curious and avoid duplicate or overlapping exploration directions. \
Be sure to take into account the time sensitive aspects of the research topic and make sure to emphasize up to date information where appropriate. \
Focus on providing thorough research of the user's query over being helpful.

CRITICAL - You MUST only output the research plan for the deep research flow and nothing else, you are not responding to the user. \
Do not worry about the feasibility of the plan or access to data or tools, a different deep research flow will handle that.

For context, the date is {current_datetime}.

The research plan should be formatted as a numbered list of steps and have 6 or less individual steps.

Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps. The plan should be in the same language as the user's query.

Output only the numbered list of steps with no additional prefix or suffix.
""".strip()


# Specifically for some models, it really struggles to not just answer the user when there are questions about internal knowledge.
# A reminder (specifically the fact that it's also a User type message) helps to prevent this.
RESEARCH_PLAN_REMINDER = """
Remember to only output the research plan and nothing else. Do not worry about the feasibility of the plan or data access.

Your response must only be a numbered list of steps with no additional prefix or suffix.
""".strip()


ORCHESTRATOR_PROMPT = f"""
You are an orchestrator agent for deep research. Your job is to conduct research by calling the {RESEARCH_AGENT_TOOL_NAME} tool with high level research tasks. \
This delegates the lower level research work to the {RESEARCH_AGENT_TOOL_NAME} which will provide back the results of the research.

For context, the date is {{current_datetime}}.

Before calling {GENERATE_REPORT_TOOL_NAME}, reason to double check that all aspects of the user's query have been well researched and that all key topics around the plan have been researched. \
There are cases where new discoveries from research may lead to a deviation from the original research plan.
In these cases, ensure that the new directions are thoroughly investigated prior to calling {GENERATE_REPORT_TOOL_NAME}.

NEVER output normal response tokens, you must only call tools.

# Tools
You have currently used {{current_cycle_count}} of {{max_cycles}} max research cycles. You do not need to use all cycles.

## {RESEARCH_AGENT_TOOL_NAME}
The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
The research task should be in the same language as the overall research plan.

CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, other research agents, or message history. \
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}

You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.

You are encouraged to call the {RESEARCH_AGENT_TOOL_NAME} in parallel if the research tasks are not dependent on each other, which is typically the case. NEVER call more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.

## {GENERATE_REPORT_TOOL_NAME}
You should call the {GENERATE_REPORT_TOOL_NAME} tool if any of the following conditions are met:
- You have researched all of the relevant topics of the research plan.
- You have shifted away from the original research plan and believe that you are done.
- You have all of the information needed to thoroughly answer all aspects of the user's query.
- The last research cycle yielded minimal new information and future cycles are unlikely to yield more information.

## {THINK_TOOL_NAME}
CRITICAL - use the {THINK_TOOL_NAME} to reason between every call to the {RESEARCH_AGENT_TOOL_NAME} and before calling {GENERATE_REPORT_TOOL_NAME}. You should treat this as chain-of-thought reasoning to think deeply on what to do next. \
Be curious, identify knowledge gaps and consider new potential directions of research. Use paragraph format, do not use bullet points or lists.

NEVER use the {THINK_TOOL_NAME} in parallel with other {RESEARCH_AGENT_TOOL_NAME} or {GENERATE_REPORT_TOOL_NAME}.

Before calling {GENERATE_REPORT_TOOL_NAME}, double check that all aspects of the user's query have been researched and that all key topics around the plan have been researched (unless you have gone in a different direction).

# Research Plan
{{research_plan}}
""".strip()


INTERNAL_SEARCH_RESEARCH_TASK_GUIDANCE = """
 If necessary, clarify if the research agent should focus mostly on organization internal searches, web searches, or a combination of both. If the task doesn't require a clear priority, don't add sourcing guidance.
""".strip(
    "\n"
)


USER_ORCHESTRATOR_PROMPT = """
Remember to refer to the system prompt and follow how to use the tools. Call the {THINK_TOOL_NAME} between every call to the {RESEARCH_AGENT_TOOL_NAME} and before calling {GENERATE_REPORT_TOOL_NAME}. Never run more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.

Don't mention this reminder or underlying details about the system.
""".strip()


FINAL_REPORT_PROMPT = """
You are the final answer generator for a deep research task. Your job is to produce a thorough, balanced, and comprehensive answer on the research question provided by the user. \
You have access to high-quality, diverse sources collected by secondary research agents as well as their analysis of the sources.

IMPORTANT - You get straight to the point, never providing a title and avoiding lengthy introductions/preambles.

For context, the date is {current_datetime}.

Users have explicitly selected the deep research mode and will expect a long and detailed answer. It is ok and encouraged that your response is several pages long. \
Structure your response logically into relevant sections. You may find it helpful to reference the research plan to help structure your response but do not limit yourself to what is contained in the plan.

You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible.

Provide inline citations in the format [1], [2], [3], etc. based on the citations included by the research agents.
""".strip()


USER_FINAL_REPORT_QUERY = f"""
The original research plan is included below (use it as a helpful reference but do not limit yourself to this):
```
{{research_plan}}
```

Based on all of the context provided in the research history, provide a comprehensive, well structured, and insightful answer to the user's previous query. \
CRITICAL: be extremely thorough in your response and address all relevant aspects of the query.

Ignore the format styles of the intermediate {RESEARCH_AGENT_TOOL_NAME} reports, those are not end user facing and different from your task.

Provide inline citations in the format [1], [2], [3], etc. based on the citations included by the research agents. The citations should be just a number in a bracket, nothing additional.
""".strip()


# Reasoning Model Variants of the prompts
ORCHESTRATOR_PROMPT_REASONING = f"""
You are an orchestrator agent for deep research. Your job is to conduct research by calling the {RESEARCH_AGENT_TOOL_NAME} tool with high level research tasks. \
This delegates the lower level research work to the {RESEARCH_AGENT_TOOL_NAME} which will provide back the results of the research.

For context, the date is {{current_datetime}}.

Before calling {GENERATE_REPORT_TOOL_NAME}, reason to double check that all aspects of the user's query have been well researched and that all key topics around the plan have been researched.
There are cases where new discoveries from research may lead to a deviation from the original research plan. In these cases, ensure that the new directions are thoroughly investigated prior to calling {GENERATE_REPORT_TOOL_NAME}.

Between calls, think deeply on what to do next. Be curious, identify knowledge gaps and consider new potential directions of research. Use paragraph format for your reasoning, do not use bullet points or lists.

NEVER output normal response tokens, you must only call tools.

# Tools
You have currently used {{current_cycle_count}} of {{max_cycles}} max research cycles. You do not need to use all cycles.

## {RESEARCH_AGENT_TOOL_NAME}
The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
The research task should be in the same language as the overall research plan.

CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, or message history. \
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}

You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.

You are encouraged to call the {RESEARCH_AGENT_TOOL_NAME} in parallel if the research tasks are not dependent on each other, which is typically the case. NEVER call more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.

## {GENERATE_REPORT_TOOL_NAME}
You should call the {GENERATE_REPORT_TOOL_NAME} tool if any of the following conditions are met:
- You have researched all of the relevant topics of the research plan.
- You have shifted away from the original research plan and believe that you are done.
- You have all of the information needed to thoroughly answer all aspects of the user's query.
- The last research cycle yielded minimal new information and future cycles are unlikely to yield more information.

# Research Plan
{{research_plan}}
""".strip()


USER_ORCHESTRATOR_PROMPT_REASONING = """
Remember to refer to the system prompt and follow how to use the tools. \
You are encouraged to call the {RESEARCH_AGENT_TOOL_NAME} in parallel when the research tasks are not dependent on each other, but never call more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.

Don't mention this reminder or underlying details about the system.
""".strip()


# Only for the first cycle, we encourage the model to research more, since it is unlikely that it has already addressed all parts of the plan at this point.
FIRST_CYCLE_REMINDER_TOKENS = 100
FIRST_CYCLE_REMINDER = """
Make sure all parts of the user question and the plan have been thoroughly explored before calling generate_report. If new interesting angles have been revealed from the research, you may deviate from the plan to research new directions.
""".strip()
# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/prompts/deep_research/research_agent.py
================================================
from onyx.prompts.deep_research.dr_tool_prompts import GENERATE_REPORT_TOOL_NAME
from onyx.prompts.deep_research.dr_tool_prompts import THINK_TOOL_NAME


MAX_RESEARCH_CYCLES = 8

# ruff: noqa: E501, W605 start
RESEARCH_AGENT_PROMPT = f"""
You are a highly capable, thoughtful, and precise research agent that conducts research on a specific topic. Prefer being thorough in research over being helpful. Be curious but stay strictly on topic. \
You iteratively call the tools available to you including {{available_tools}} until you have completed your research at which point you call the {GENERATE_REPORT_TOOL_NAME} tool.

NEVER output normal response tokens, you must only call tools.

For context, the date is {{current_datetime}}.

# Tools
You have a limited number of cycles to complete your research and you do not have to use all cycles. You are on cycle {{current_cycle_count}} of {MAX_RESEARCH_CYCLES}.\
{{optional_internal_search_tool_description}}\
{{optional_web_search_tool_description}}\
{{optional_open_url_tool_description}}
## {THINK_TOOL_NAME}
CRITICAL - use the think tool after every set of searches and reads (so search, read some pages, then think and repeat). \
You MUST use the {THINK_TOOL_NAME} before calling the web_search tool for all calls to web_search except for the first call. \
Use the {THINK_TOOL_NAME} before calling the {GENERATE_REPORT_TOOL_NAME} tool.

After a set of searches + reads, use the {THINK_TOOL_NAME} to analyze the results and plan the next steps.
- Reflect on the key information found with relation to the task.
- Reason thoroughly about what could be missing, the knowledge gaps, and what queries might address them, \
or why there is enough information to answer the research task comprehensively.

## {GENERATE_REPORT_TOOL_NAME}
Once you have completed your research, call the `{GENERATE_REPORT_TOOL_NAME}` tool. \
You should only call this tool after you have fully researched the topic. \
Consider other potential areas of research and weigh that against the materials already gathered before calling this tool.
""".strip()


RESEARCH_REPORT_PROMPT = """
You are a highly capable and precise research sub-agent that has conducted research on a specific topic. \
Your job is now to organize the findings to return a comprehensive report that preserves all relevant statements and information that has been gathered in the existing messages. \
The report will be seen by another agent instead of a user so keep it free of formatting or commentary and instead focus on the facts only. \
Do not give it a title, do not break it down into sections, and do not provide any of your own conclusions/analysis.

You may see a list of tool calls in the history but you do not have access to tools anymore. You should only use the information in the history to create the report.

CRITICAL - This report should be as long as necessary to return ALL of the information that the researcher has gathered. It should be several pages long so as to capture as much detail as possible from the research. \
It cannot be stressed enough that this report must be EXTREMELY THOROUGH and COMPREHENSIVE. Only this report is going to be returned, so it's CRUCIAL that you don't lose any details from the raw messages.

Remove any obviously irrelevant or duplicative information.

If a statement seems not trustworthy or is contradictory to other statements, it is important to flag it.

Write the report in the same language as the provided task.

Cite all sources INLINE using the format [1], [2], [3], etc. based on the `document` field of the source. \
Cite inline as opposed to leaving all citations until the very end of the response.
"""


USER_REPORT_QUERY = """
Please write me a comprehensive report on the research topic given the context above. As a reminder, the original topic was:
{research_topic}

Remember to include AS MUCH INFORMATION AS POSSIBLE and as faithful to the original sources as possible. \
Keep it free of formatting and focus on the facts only. Be sure to include all context for each fact to avoid misinterpretation or misattribution. \
Respond in the same language as the topic provided above.

Cite every fact INLINE using the format [1], [2], [3], etc. based on the `document` field of the source.

CRITICAL - BE EXTREMELY THOROUGH AND COMPREHENSIVE, YOUR RESPONSE SHOULD BE SEVERAL PAGES LONG.
"""


# Reasoning Model Variants of the prompts
RESEARCH_AGENT_PROMPT_REASONING = f"""
You are a highly capable, thoughtful, and precise research agent that conducts research on a specific topic. Prefer being thorough in research over being helpful. Be curious but stay strictly on topic. \
You iteratively call the tools available to you including {{available_tools}} until you have completed your research at which point you call the {GENERATE_REPORT_TOOL_NAME} tool. Between calls, think about the results of the previous tool call and plan the next steps. \
Reason thoroughly about what could be missing, identify knowledge gaps, and what queries might address them. Or consider why there is enough information to answer the research task comprehensively.

Once you have completed your research, call the `{GENERATE_REPORT_TOOL_NAME}` tool.

NEVER output normal response tokens, you must only call tools.

For context, the date is {{current_datetime}}.

# Tools
You have a limited number of cycles to complete your research and you do not have to use all cycles. You are on cycle {{current_cycle_count}} of {MAX_RESEARCH_CYCLES}.\
{{optional_internal_search_tool_description}}\
{{optional_web_search_tool_description}}\
{{optional_open_url_tool_description}}
## {GENERATE_REPORT_TOOL_NAME}
Once you have completed your research, call the `{GENERATE_REPORT_TOOL_NAME}` tool. You should only call this tool after you have fully researched the topic.
""".strip()


OPEN_URL_REMINDER_RESEARCH_AGENT = """
Remember that after using web_search, you are encouraged to open some pages to get more context unless the query is completely answered by the snippets.
Open the pages that look the most promising and high quality by calling the open_url tool with an array of URLs.
""".strip()
# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/prompts/federated_search.py
================================================
from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS

SLACK_QUERY_EXPANSION_PROMPT = f"""
Rewrite the user's query into at most {MAX_SLACK_QUERY_EXPANSIONS} keyword-only queries for Slack's keyword search.

Slack search behavior:
- Pure keyword AND search (no semantics)
- More words = fewer matches, so keep queries concise (1-3 words)

ALWAYS include:
- Person names (e.g., "Sarah Chen", "Mike Johnson") - people search for messages from/about specific people
- Project/product names, technical terms, proper nouns
- Actual content words: "performance", "bug", "deployment", "API", "error"

DO NOT include:
- Meta-words: "topics", "conversations", "discussed", "summary", "messages"
- Temporal: "today", "yesterday", "week", "month", "recent", "last"
- Channel names: "general", "eng-general", "random"

Examples:

Query: "what are the big topics in eng-general this week?"
Output:

Query: "messages with Sarah about the deployment"
Output:
Sarah deployment
Sarah
deployment

Query: "what did Mike say about the budget?"
Output:
Mike budget
Mike
budget

Query: "performance issues in eng-general"
Output:
performance issues
performance
issues

Query: "what did we discuss about the API migration?"
Output:
API migration
API
migration

Now process this query:

{{query}}

Output (keywords only, one per line, NO explanations or commentary):
"""

SLACK_DATE_EXTRACTION_PROMPT = """
Extract the date range from the user's query and return it in a structured format.

Current date context:
- Today: {today}
- Current time: {current_time}

Guidelines:
1. Return a JSON object with "days_back" (integer) indicating how many days back to search
2. If no date/time is mentioned, return {{"days_back": null}}
3. Interpret relative dates accurately:
   - "today" or "today's" = 0 days back
   - "yesterday" = 1 day back
   - "last week" = 7 days back
   - "last month" = 30 days back
   - "last X days" = X days back
   - "past X days" = X days back
   - "this week" = 7 days back
   - "this month" = 30 days back
4. For creative expressions, interpret intent:
   - "recent" = 7 days back
   - "recently" = 7 days back
   - "lately" = 14 days back
5. Always be conservative - if uncertain, use a longer time range

User query: {query}

Return ONLY a valid JSON object in this format: {{"days_back": <integer or null>}}
Nothing else.
"""


================================================
FILE: backend/onyx/prompts/filter_extration.py
================================================
# The following prompts are used for extracting filters to apply along with the query in the
# document index. For example, a filter for dates or a filter by source type such as GitHub
# or Slack
SOURCES_KEY = "sources"

# Smaller followup prompts in time_filter.py
TIME_FILTER_PROMPT = """
You are a tool to identify time filters to apply to a user query for a downstream search \
application. The downstream application is able to use a recency bias or apply a hard cutoff to \
remove all documents before the cutoff. Identify the correct filters to apply for the user query.

The current day and time is {current_day_time_str}.

Always answer with ONLY a json which contains the keys "filter_type", "filter_value", \
"value_multiple" and "date".

The valid values for "filter_type" are "hard cutoff", "favors recent", or "not time sensitive".
The valid values for "filter_value" are "day", "week", "month", "quarter", "half", or "year".
The valid values for "value_multiple" is any number.
The valid values for "date" is a date in format MM/DD/YYYY, ALWAYS follow this format.
""".strip()


# Smaller followup prompts in source_filter.py
# Known issue: LLMs like GPT-3.5 try to generalize. If the valid sources contains "web" but not
# "confluence" and the user asks for confluence related things, the LLM will select "web" since
# confluence is accessed as a website. This cannot be fixed without also reducing the capability
# to match things like repository->github, website->web, etc.
# This is generally not a big issue though as if the company has confluence, hopefully they add
# a connector for it or the user is aware that confluence has not been added.
SOURCE_FILTER_PROMPT = f"""
Given a user query, extract relevant source filters for use in a downstream search tool.
Respond with a json containing the source filters or null if no specific sources are referenced.
ONLY extract sources when the user is explicitly limiting the scope of where information is \
coming from.
The user may provide invalid source filters, ignore those.

The valid sources are:
{{valid_sources}}
{{web_source_warning}}
{{file_source_warning}}


ALWAYS answer with ONLY a json with the key "{SOURCES_KEY}". \
The value for "{SOURCES_KEY}" must be null or a list of valid sources.

Sample Response:
{{sample_response}}
""".strip()

WEB_SOURCE_WARNING = """
Note: The "web" source only applies to when the user specifies "website" in the query. \
It does not apply to tools such as Confluence, GitHub, etc. that have a website.
""".strip()

FILE_SOURCE_WARNING = """
Note: The "file" source only applies to when the user refers to uploaded files in the query.
""".strip()


# Use the following for easy viewing of prompts
if __name__ == "__main__":
    print(TIME_FILTER_PROMPT)
    print("------------------")
    print(SOURCE_FILTER_PROMPT)


================================================
FILE: backend/onyx/prompts/image_analysis.py
================================================
# Used for creating embeddings of images for vector search
DEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT = """
You are an assistant for summarizing images for retrieval.
Summarize the content of the following image and be as precise as possible.
The summary will be embedded and used to retrieve the original image.
Therefore, write a concise summary of the image that is optimized for retrieval.
"""

# Prompt for generating image descriptions with filename context
DEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT = """
Describe precisely and concisely what the image shows.
"""


# Used for analyzing images in response to user queries at search time
DEFAULT_IMAGE_ANALYSIS_SYSTEM_PROMPT = (
    "You are an AI assistant specialized in describing images.\n"
    "You will receive a user question plus an image URL. Provide a concise textual answer.\n"
    "Focus on aspects of the image that are relevant to the user's question.\n"
    "Be specific and detailed about visual elements that directly address the query.\n"
)


================================================
FILE: backend/onyx/prompts/kg_prompts.py
================================================
# Standards
SEPARATOR_LINE = "-------"
SEPARATOR_LINE_LONG = "---------------"
NO_EXTRACTION = "No extraction of knowledge graph objects was feasible."
YES = "yes"
NO = "no"

# Framing/Support/Template Prompts
ENTITY_TYPE_SETTING_PROMPT = f"""
{SEPARATOR_LINE}
{{entity_types}}
{SEPARATOR_LINE}
""".strip()

RELATIONSHIP_TYPE_SETTING_PROMPT = f"""
Here are the types of relationships:
{SEPARATOR_LINE}
{{relationship_types}}
{SEPARATOR_LINE}
""".strip()

EXTRACTION_FORMATTING_PROMPT = r"""
{{"entities": [<a list of entities of the prescribed entity types that you can reliably identify in the text, \
formatted as '<ENTITY_TYPE_NAME>::<entity_name>' (please use that capitalization). If allowed options \
are provided above, you can only extract those types of entities! Again, there should be an 'Other' \
option. Pick this if none of the others apply.>],
"relationships": [<a list of IMPORTANT relationships between the identified entities, formatted as \
'<SOURCE_ENTITY_TYPE_NAME>::<source_entity_name>__<a word or two that captures the nature \
of the relationship (if appropriate, include a judgment, as in 'likes' or 'dislikes' vs. 'uses', etc.). \
Common relationships may be: 'likes', 'dislikes', 'uses', 'is interested in', 'mentions', 'addresses', \
'participates in', etc., but look at the text to find the most appropriate relationship. \
Use spaces here for word separation. DO NOT INCLUDE RELATIONSHIPS THAT ARE SIMPLY MENTIONED, BUT ONLY \
THOSE THAT ARE CENTRAL TO THE CONTENT! >\
__<TARGET_ENTITY_TYPE_NAME>::<target_entity_name>'>],
"terms": [<a comma-separated list of high-level terms (each one one or two words) that you can reliably \
identify in the text, each formatted simply as '<term>'>]
}}
""".strip()

QUERY_ENTITY_EXTRACTION_FORMATTING_PROMPT = r"""
{{"entities": [<a list of entities of the prescribed entity types that you can reliably identify in the text, \
formatted as '<ENTITY_TYPE_NAME>::<entity_name>' (please use that capitalization)>. Each entity \
also should be followed by a list of comma-separated attribute filters for the entity, if referred to in the \
question for that entity. CRITICAL: you can only use attributes that are mentioned above for the \
entity type in question. Example: 'ACCOUNT::* -- [account_type: customer, status: active]' if the question is \
'list all customer accounts', and ACCOUNT was an entity type with these attribute key/values allowed.] \
"time_filter": <if needed, a SQL-like filter for a field called 'event_date'. Do not select anything here \
unless you are sure that the question asks for that filter. Only apply a time_filter if the question explicitly \
mentions a specific date, time period, or event that can be directly translated into a date filter. Do not assume \
the current date, if given, as the event date or to imply that it should be a filter. Do not make assumptions here \
but only use the information provided to infer whether there should be a time_filter, and if so, what it should be.>
}}
""".strip()

QUERY_RELATIONSHIP_EXTRACTION_FORMATTING_PROMPT = r"""
{{"relationships": [<a list of relationships between the identified entities, formatted as \
'<SOURCE_ENTITY_TYPE_NAME>::<source_entity_name>__<a word or two that captures the nature \
of the relationship (if appropriate, include a judgment, as in 'likes' or 'dislikes' vs. 'uses', etc.)>\
__<TARGET_ENTITY_TYPE_NAME>::<target_entity_name>'>]
}}
""".strip()

EXAMPLE_1 = r"""
{{"entities": ["ACCOUNT::Nike", "CONCERN::*"],
    "relationships": ["ACCOUNT::Nike__had__CONCERN::*"], "terms": []}}
""".strip()

EXAMPLE_2 = r"""
{{"entities": ["ACCOUNT::Nike", "CONCERN::performance"],
    "relationships": ["ACCOUNT::*__had_issues__CONCERN::performance"], "terms": ["performance issue"]}}
""".strip()

EXAMPLE_3 = r"""
{{"entities": ["ACCOUNT::Nike", "CONCERN::performance", "CONCERN::user_experience"],
    "relationships": ["ACCOUNT::Nike__had__CONCERN::performance",
                      "ACCOUNT::Nike__solved__CONCERN::user_experience"],
    "terms": ["performance", "user experience"]}}
""".strip()

EXAMPLE_4 = r"""
{{"entities": ["ACCOUNT::Nike", "FEATURE::dashboard", "CONCERN::performance"],
    "relationships": ["ACCOUNT::Nike__had__CONCERN::performance",
                      "ACCOUNT::Nike__had_issues__FEATURE::dashboard",
                      "ACCOUNT::NIKE__gets_value_from__FEATURE::dashboard"],
    "terms": ["value", "performance"]}}
""".strip()

RELATIONSHIP_EXAMPLE_1 = r"""
'Which issues did Nike report?' and the extracted entities were found to be:

  "ACCOUNT::Nike", "CONCERN::*"

then a valid relationship extraction could be:

{{"relationships": ["ACCOUNT::Nike__had__CONCERN::*"]}}
""".strip()

RELATIONSHIP_EXAMPLE_2 = r"""
'Did Nike say anything about performance issues?' and the extracted entities were found to be:

"ACCOUNT::Nike", "CONCERN::performance"

then a much more suitable relationship extraction could be:
{{"relationships": ["ACCOUNT::*__had_issues__CONCERN::performance"]}}
""".strip()

RELATIONSHIP_EXAMPLE_3 = r"""
'Did Nike report some performance issues with our solution? And were they happy that the user experience issue got solved?', \
and the extracted entities were found to be:

"ACCOUNT::Nike", "CONCERN::performance", "CONCERN::user_experience"

then a valid relationship extraction could be:

{{"relationships": ["ACCOUNT::Nike__had__CONCERN::performance",
                      "ACCOUNT::Nike__solved__CONCERN::user_experience"]}}
""".strip()

RELATIONSHIP_EXAMPLE_4 = r"""
'Nike reported some performance issues with our dashboard solution, but do they think it delivers great value nevertheless?' \
and the extracted entities were found to be:

"ACCOUNT::Nike", "FEATURE::dashboard", "CONCERN::performance"

then a valid relationship extraction could be:
Example 4:

{{"relationships": ["ACCOUNT::Nike__had__CONCERN::performance",
                      "ACCOUNT::Nike__had_issues__FEATURE::dashboard",
                      "ACCOUNT::NIKE__gets_value_from__FEATURE::dashboard"]}}

Explanation:
 - Nike did report performance concerns
 - Nike had problems with the dashboard, which is a feature
 - We are interested in the value relationship between Nike and the dashboard feature

""".strip()

RELATIONSHIP_EXAMPLE_5 = r"""
'In which emails did Nike discuss their issues with the dashboard?' \
and the extracted entities were found to be:

"ACCOUNT::Nike", "FEATURE::dashboard", "EMAIL::*"

then a valid relationship extraction could be:

{{"relationships": ["ACCOUNT::Nike__had__CONCERN::*",
                      "ACCOUNT::Nike__had_issues__FEATURE::dashboard",
                      "ACCOUNT::NIKE__in__EMAIL::*",
                      "EMAIL::*__discusses__FEATURE::dashboard",
                      "EMAIL::*Nike__had__CONCERN::* "]}}
Explanation:
 - Nike did report unspecified concerns
 - Nike had problems with the dashboard, which is a feature
 - We are interested in emails that Nike exchanged with us
""".strip()

RELATIONSHIP_EXAMPLE_6 = r"""
'List the last 5 emails that Lisa exchanged with Nike:' \
and the extracted entities were found to be:

"ACCOUNT::Nike", "EMAIL::*", "EMPLOYEE::Lisa"

then a valid relationship extraction could be:

{{"relationships": ["ACCOUNT::Nike__had__CONCERN::*",
                      "ACCOUNT::Nike__had_issues__FEATURE::dashboard",
                      "ACCOUNT::NIKE__in__EMAIL::*"]}}
Explanation:
 - Nike did report unspecified concerns
 - Nike had problems with the dashboard, which is a feature
 - We are interested in emails that Nike exchanged with us
""".strip()


ENTITY_EXAMPLE_1 = r"""
{{"entities": ["ACCOUNT::Nike--[]", "CONCERN::*--[]"]}}
""".strip()

ENTITY_EXAMPLE_2 = r"""
{{"entities": ["ACCOUNT::Nike--[]", "CONCERN::performance--[]"]}}
""".strip()

ENTITY_EXAMPLE_3 = r"""
{{"entities": ["ACCOUNT::*--[]", "CONCERN::performance--[]", "CONCERN::user_experience--[]"]}}
""".strip()

ENTITY_EXAMPLE_4 = r"""
{{"entities": ["ACCOUNT::*--[]", "CONCERN::performance--[degree: severe]"]}}
""".strip()

MASTER_EXTRACTION_PROMPT = f"""
You are an expert in the area of knowledge extraction in order to construct a knowledge graph. You are given a text \
and asked to extract entities, relationships, and terms from it that you can reliably identify.

Here are the entity types that are available for extraction. Some of them may have a description, others \
should be obvious. Also, for a given entity allowed options may be provided. If allowed options are provided, \
you can only extract those types of entities! If no allowed options are provided, take your best guess.

You can ONLY extract entities of these types and relationships between objects of these types:
{SEPARATOR_LINE}
{ENTITY_TYPE_SETTING_PROMPT}
{SEPARATOR_LINE}
Please format your answer in this format:
{SEPARATOR_LINE}
{EXTRACTION_FORMATTING_PROMPT}
{SEPARATOR_LINE}

The list above here is the exclusive, only list of entities you can choose from!

Here are some important additional instructions. (For the purpose of illustration, assume that ]
 "ACCOUNT", "CONCERN", and "FEATURE" are all in the list of entity types above, and shown actual \
entities fall into allowed options. Note that this \
is just assumed for these examples, but you MUST use only the entities above for the actual extraction!)

- You can either extract specific entities if a specific entity is referred to, or you can refer to the entity type.
* if the entity type is referred to in general, you would use '*' as the entity name in the extraction.
As an example, if the text would say:
 'Nike reported that they had issues'
then a valid extraction could be:
Example 1:
{EXAMPLE_1}

* If on the other hand the text would say:
'Nike reported that they had performance issues'
then a much more suitable extraction could be:
Example 2:
{EXAMPLE_2}

- You can extract multiple relationships between the same two entity types.
As an example, if the text would say:
'Nike reported some performance issues with our solution, but they are very happy that the user experience issue got solved.'
then a valid extraction could be:
Example 3:
{EXAMPLE_3}

- You can extract multiple relationships between the same two actual entities if you think that \
there are multiple relationships between them based on the text.
As an example, if the text would say:
'Nike reported some performance issues with our dashboard solution, but they think it delivers great value.'
then a valid extraction could be:
Example 4:
{EXAMPLE_4}

Note that effectively a three-way relationship (Nike - performance issues - dashboard) extracted as two individual \
relationships.

- Again,
   -  you should only extract entities belonging to the entity types above - but do extract all that you \
can reliably identify in the text
   - use refer to 'all' entities in an entity type listed above by using '*' as the entity name
   - only extract important relationships that signify something non-trivial, expressing things like \
needs, wants, likes, dislikes, plans, interests, lack of interests, problems the account is having, etc.
   - you MUST only use the initial list of entities provided! Ignore the entities in the examples unless \
they are also part of the initial list of entities! This is essential!
   - only extract relationships between the entities extracted first!


{SEPARATOR_LINE}

Here is the text you are asked to extract knowledge from, if needed with additional information about any participants:
{SEPARATOR_LINE}
---content---
{SEPARATOR_LINE}
""".strip()


QUERY_ENTITY_EXTRACTION_PROMPT = f"""
You are an expert in the area of knowledge extraction and using knowledge graphs. You are given a question \
and asked to extract entities (with attributes if applicable) that you can reliably identify, which will then
be matched with a known entity in the knowledge graph. You are also asked to extract time constraints information \
from the QUESTION. Some time constraints will be captured by entity attributes if \
the entity type has a fitting attribute (example: 'created_at' could be a candidate for that), other times
we will extract an explicit time filter if no attribute fits. (Note regarding 'last', 'first', etc.: DO NOT \
imply the need for a time filter just because the question asks for something that is not the current date. \
They will relate to ordering that we will handle separately later).

In case useful, today is ---today_date--- and the user asking is ---user_name---, which may or may not be relevant.
Here are the entity types that are available for extraction. Some of them may have \
a description, others should be obvious. Also, notice that some may have attributes associated with them, which will \
be important later.
You can ONLY extract entities of these types:
{SEPARATOR_LINE}
{ENTITY_TYPE_SETTING_PROMPT}
{SEPARATOR_LINE}

The list above here is the exclusive, only list of entities you can choose from!

Also, note that there are fixed relationship types between these entities. Please consider those \
as well so to make sure that you are not missing implicit entities! Implicit entities are often \
in verbs ('emailed to', 'talked to', ...). Also, they may be used to connect entities that are \
clearly in the question.

{SEPARATOR_LINE}
{RELATIONSHIP_TYPE_SETTING_PROMPT}
{SEPARATOR_LINE}

Here are some important additional instructions. (For the purpose of illustration, assume that \
 "ACCOUNT", "CONCERN", "EMAIL", and "FEATURE" are all in the list of entity types above, and the \
attribute options for "CONCERN" include 'degree' with possible values that include 'severe'. Note that this \
is just assumed for these examples, but you MUST use only the entities above for the actual extraction!)

- You can either extract specific entities if a specific entity is referred to, or you can refer to the entity type.
* if the entity type is referred to in general, you would use '*' as the entity name in the extraction.
As an example, if the question would say:
 'Which issues did Nike report?'
then a valid entity and term extraction could be:
Example 1:
{ENTITY_EXAMPLE_1}

* If on the other hand the question would say:
'Did Nike say anything about performance issues?'
then a much more suitable entity and term extraction could be:
Example 2:
{ENTITY_EXAMPLE_2}

* Then, if the question is:
'Who reported performance issues?'
then a suitable entity and term extraction could be:
Example 3:
{ENTITY_EXAMPLE_3}

* Then, if we inquire about an entity with a specific attribute :
'Who reported severe performance issues?'
then a suitable entity and term extraction could be:
Example 3:
{ENTITY_EXAMPLE_4}

- Again,
   -  you should only extract entities belonging to the entity types above - but do extract all that you \
can reliably identify in the text
   - if you refer to all/any/an unspecified entity of an entity type listed above, use '*' as the entity name
   - similarly, if a specific entity type is referred to in general, you should use '*' as the entity name
   - you MUST only use the initial list of entities provided! Ignore the entities in the examples unless \
they are also part of the initial list of entities! This is essential!
   - don't forget to provide answers also to the event filtering and whether documents need to be inspected!
   - 'who' often refers to individuals or accounts.
   - see whether any of the entities are supposed to be narrowed down by an attribute value. The precise attribute \
and the value would need to be taken from the specification, as the question may use different words and the \
actual attribute may be implied.
   - don't just look at the entities that are mentioned in the question but also those that the question \
may be about.
  - be very careful that you only extract attributes that are listed above for the entity type in question! Do \
not make up attributes even if they are implied! Particularly if there is a relationship type that would \
actually represent that information, you MUST not extract the information as an attribute. We \
will extract the relationship type later.
  - For the values of attributes, look at the possible values above! For example 'open' may refer to \
'backlog', 'todo', 'in progress', etc. In cases like that construct a ';'-separated list of values that you think may fit \
what is implied in the question (in the exanple: 'open; backlog; todo; in progress').

Also, if you think the name or the title of an entity is given but name or title are not mentioned \
explicitly as an attribute, then you should indeed extract the name/title as the entity name.

{SEPARATOR_LINE}

Here is the question you are asked to extract desired entities and time filters from:
{SEPARATOR_LINE}
---content---
{SEPARATOR_LINE}

Please format your answer in this format:
{SEPARATOR_LINE}
{QUERY_ENTITY_EXTRACTION_FORMATTING_PROMPT}
{SEPARATOR_LINE}

""".strip()


QUERY_RELATIONSHIP_EXTRACTION_PROMPT = f"""
You are an expert in the area of knowledge extraction and using knowledge graphs. You are given a question \
and previously you were asked to identify known entities in the question. Now you are asked to extract \
the relationships between the entities you have identified earlier.

First off as background, here are the entity types that are known to the system:
{SEPARATOR_LINE}
---entity_types---
{SEPARATOR_LINE}


Here are the entities you have identified earlier:
{SEPARATOR_LINE}
---identified_entities---
{SEPARATOR_LINE}

Note that the notation for the entities is <ENTITY_TYPE>::<ENTITY_NAME>.

Here are the options for the relationship types(!) between the entities you have identified earlier \
as well as relationship types between the identified entities and other entities \
not explicitly mentioned:
{SEPARATOR_LINE}
---relationship_type_options---
{SEPARATOR_LINE}

These types are, if any were identified, formatted as \
<SOURCE_ENTITY_TYPE>__<RELATIONSHIP_SHORTHAND>__<TARGET_ENTITY_TYPE>, and they \
limit the allowed relationships that you can extract. You would then though use the actual full entities as in:

<SOURCE_ENTITY_TYPE>::<SOURCE_ENTITY_NAME>__<RELATIONSHIP_SHORTHAND>__<TARGET_ENTITY_TYPE>::<TARGET_ENTITY_NAME>.

Note: <RELATIONSHIP_SHORTHAND> should be a word or two that captures the nature \
of the relationship. Common relationships may be: 'likes', 'dislikes', 'uses', 'is interested in', 'mentions', \
'addresses', 'participates in', etc., but look at the text to find the most appropriate relationship. \
Use spaces here for word separation.

Please format your answer in this format:
{SEPARATOR_LINE}
{QUERY_RELATIONSHIP_EXTRACTION_FORMATTING_PROMPT}
{SEPARATOR_LINE}

The list above here is the exclusive, only list of entities and relationship types you can choose from!

Here are some important additional instructions. (For the purpose of illustration, assume that ]
 "ACCOUNT", "CONCERN", and "FEATURE" are all in the list of entity types above. Note that this \
is just assumed for these examples, but you MUST use only the entities above for the actual extraction!)

- You can either extract specific entities if a specific entity is referred to, or you can refer to the entity type.
* if the entity type is referred to in general, you would use '*' as the entity name in the extraction.

As an example, if the question would say:

{RELATIONSHIP_EXAMPLE_1}

* If on the other hand the question would say:

{RELATIONSHIP_EXAMPLE_2}

- You can extract multiple relationships between the same two entity types.
For example 3, if the question would say:

{RELATIONSHIP_EXAMPLE_3}

- You can extract multiple relationships between the same two actual entities if you think that \
there are multiple relationships between them based on the question.
As an example, if the question would say:

{RELATIONSHIP_EXAMPLE_4}

Note that effectively a three-way relationship (Nike - performance issues - dashboard) extracted as two individual \
relationships.

- Again,
   - you can only extract relationships between the entities extracted earlier
   - you can only extract the relationships that match the listed relationship types
   - if in doubt and there are multiple relationships between the same two entities, you can extract \
all of those that may fit with the question.
   - be really thinking through the question which type of relationships should be extracted and which should not.

Other important notes:
 - For questions that really try to explore in general what a certain entity was involved in like 'what did Paul Smith do \
in the last 3 months?', and Paul Smith has been extracted i.e. as an entity of type 'EMPLOYEE', then you need to extract \
all of the possible relationships an empoyee Paul Smith could have.
 - You are not forced to use all or any of the relationship types listed above. Really look at the question to \
 determine which relationships are explicitly or implicitly referred to in the question.

{SEPARATOR_LINE}

Here is the question you are asked to extract desired entities, relationships, and terms from:
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}
""".strip()


GENERAL_CHUNK_PREPROCESSING_PROMPT = """
This is a part of a document that you need to extract information (entities, relationships) from.

Note: when you extract relationships, please make sure that:
  - if you see a relationship for one of our employees, you should extract the relationship both for the employee AND \
    VENDOR::{vendor}.
  - if you see a relationship for one of the representatives of other accounts, you should extract the relationship \
only for the account ACCOUNT::<account_name>!

--
And here is the content:
{content}
""".strip()


### Source-specific prompts

CALL_CHUNK_PREPROCESSING_PROMPT = """
This is a call between employees of the VENDOR's company and representatives of one or more ACCOUNTs (usually one). \
When you extract information based on the instructions, please make sure that you properly attribute the information \
to the correct employee and account. \

Here are the participants (name component of email) from us ({vendor}):
{participant_string}

Here are the participants (name component of email) from the other account(s):
{account_participant_string}

In the text it should be easy to associate a name with the email, and then with the account ('us' vs 'them'). If in doubt, \
look at the context and try to identify whether the statement comes from the other account. If you are not sure, ignore.

Note: when you extract relationships, please make sure that:
  - if you see a relationship for one of our employees, you should extract the relationship both for the employee AND \
    VENDOR::{vendor}.
  - if you see a relationship for one of the representatives of other accounts, you should extract the relationship \
only for the account ACCOUNT::<account_name>!

--
And here is the content:
{content}
""".strip()


CALL_DOCUMENT_CLASSIFICATION_PROMPT = """
This is the beginning of a call between employees of the VENDOR's company ({vendor}) and other participants.

Your task is to classify the call into one of the following categories:
{category_options}

Please also consider the participants when you perform your classification task - they can be important indicators \
for the category.

Please format your answer as a string in the format:

REASONING: <your reasoning for the classification> - CATEGORY: <the category you have chosen. Only use {category_list}>

--
And here is the beginning of the call, including title and participants:

{beginning_of_call_content}
""".strip()


STRATEGY_GENERATION_PROMPT = f"""
Now you need to decide what type of strategy to use to answer a given question, how ultimately \
the answer should be formatted to match the user's expectation, and what an appropriate question \
to/about 'one object or one set of objects' may be, should the answer logically benefit from a divide \
and conquer strategy, or it naturally relates to one or few individual objects. Also, you are \
supposed to determine whether a divide and conquer strategy would be appropriate.


Here are the entity types that are available in the knowledge graph:
{SEPARATOR_LINE}
---possible_entities---
{SEPARATOR_LINE}

Here are the relationship types that are available in the knowledge graph:
{SEPARATOR_LINE}
---possible_relationships---
{SEPARATOR_LINE}

Here is the question whose answer is ultimately sought:
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}

And here are the entities and relationships that have been extracted earlier from this question:
{SEPARATOR_LINE}
---entities---
---relationships---
{SEPARATOR_LINE}

Here are more instructions:

a) Regarding the strategy, there are three aspects to it:

a1) "Search Type":
Should the question be answered as a SEARCH ('filtered search'), or as a SQL ('SQL query search')?

The options are:
1. SEARCH: A filtered search simply uses the entities and relationships that you extracted earlier and \
applies them as filters to search the underlying documents, which are properly indexed. Examples are \
'what did Nike say about the Analyzer product?', or 'what did I say in my calls with Nike about pricing?'. So this \
is used really when there is *no implicit or explicit constraint or requirements* on underlying source documents \
outside of filters, and there is no ordering, no limiting their number, etc. So use this for a question that \
tries to get information *across* documents which may be filtered by their related relationships and entities, but without \
other constraints.

2. SQL: Choose this option if the question either requires counting of entities (e.g. 'how many calls...'), or \
if the query refers to specific entities that first need to be identified and then analyzed/searched/listed. \
Examples here are 'what did I say about pricing in my call with Nike last week?' (the specific call needs to \
be identified first and then analyzed),  \
'what are the next steps of our two largest opportunities?', or 'summarize my 3 most recent customer calls'. So \
this is used if there *are implicit constraints* on the underlying source documents beyond filtering, including \
ordering, limiting, etc. Use this also if the answer expects to analyze each source independently as part \
of the overall answer.

Note:
 - here, you should look at the extracted entities and relationships and judge whether using them as filters \
(using an *and*) would be appropriate to identify the range of underlying sources, or whether more \
calculations would be needed to find the underlying sources ('last 2...', etc.) .
 - It is also *critical* to look at the attributes of the entities! You only can use the given attributes (and their
 values, if given) as where conditions etc in a SQL statement. So if you think you would 'want
 to' have a where condition but there is not appropriate attribute, then you should not use the SQL strategy
 but the SEARCH strategy. (A Search can always look through data and see what is the best fit, SQL needs to
 be more specific.). On the other hand, if the question maps well to the entities and attributes, then
 SQL may be a good choice.
 - Likely, if there are questions 'about something', then this only is used in a SQL statement or a filter \
 if it shows up as an entity or relationship in the extracted entities and relationships. Otherwise, it will \
 be part of the analysis/search. not the document identification.
 - again, note that we can only FILTER (SEARCH) or COMPUTE (SQL) using the extracted entities (and their attributes)
 and relationships. \
 So do not think that if there is another term in the question, it should be included in the SQL statement. \
 It cannot.


a2) "Search Strategy":
If a SQL search is chosen, i.e., documents have to be identified first, there are two approaches:
1. SIMPLE: You think you can answer the question using a database that is aware of the entities, relationships \
above, and is generally suitable if it is enough to either list or count entities, return dates, etc. Usually, \
'SIMPLE' is chosen for questions of the form 'how many...' (always), or 'list the...' (often), 'when was...', \
'what did (someone) work on...'etc. Often it is also used in cases like 'what did John work on since April?'. Here, \
the user would expect to just see the list. So chose 'SIMPLE' here unless there are REALLY CLEAR \
follow-up instructions for each item (like 'summarize...' , 'analyze...', 'what are the main points of...'.) If \
it is a 'what did...'-type question, choose 'SIMPLE'!

2. DEEP: You think you really should ALSO leverage the actual text of sources to answer the question, which sits \
in a vector database. Examples are 'what is discussed in...', 'summarize', 'what is the discussion about...',\
'how does... relate to...', 'are there any mentions of... in..', 'what are the main points in...', \
'what are the next steps...', etc. Those are usually questions 'about' \
the entities retrieved from the knowledge graph, or questions about the underlying sources.

Your task is to decide which of the two strategies to use.

a3) "Relationship Detection":
You need to evaluate whether the question involves any relationships between entities (of the same type) \
or between entities and relationships.  Respond with 'RELATIONSHIPS' or 'NO_RELATIONSHIPS'.

b) Regarding the format of the answer: there are also two types of formats available to you:

1. LIST: The user would expect an answer as a bullet point list of objects, likely with text associated with each \
bullet point (or sub-bullet). This will be clearer once the data is available.
2. TEXT: The user would expect the questions to be answered in text form.

Your task is to decide which of the two formats to use.


c) Regarding the broken down question for one object:

Always generate a broken_down_question if the question pertains ultimately to a specific objects, even if it seems to be \
a singular object.

- If the question is of type 'how many...', or similar, then imagine that the individual objects have been \
found and you want to ask each object something that illustrates why/in what what that object relates to the \
question. (question: 'How many cars are fast?' -> broken_down_question: 'How fast is this car?')

- Assume the answer would either i) best be generated by first analyzing one object at a time, then aggregating \
the results, or ii) directly relates to one or few objects found through matching suitable criteria.

- The key is to drop any filtering/criteria matching as the objects are already filtered by the criteria. Also, do not \
try to verify here whether the object in question actually satisfies a filter criteria, but rather see \
what it says/does etc. In other words, use this to identify more details about the object, as it relates \
to the original question.
(Example: question: 'What did our oil & gas customers say about the new product?' -> broken_down_question: \
'What did this customer say about the new product?',
or:
question: 'What was in the email from Frank?' -> broken_down_question: 'What is in this email?')


d) Regarding the divide and conquer strategy:

You are supposed to decide whether a divide and conquer strategy would be appropriate. That means, do you think \
that in order to answer the question, it would be good to first analyze one object at a time, and then aggregate the \
results? Or should the information rather be analyzed as a whole? This would be 'yes' or 'no'.

Please answer in json format in this form:

{{
    "search_type": <see search-type instructions above, answer with "SEARCH" or "SQL">,
    "search_strategy": <see search-strategy instructions above, answer with "DEEP" or "SIMPLE">,
    "relationship_detection": <see relationship-detection instructions above, answer with "RELATIONSHIPS" or "NO_RELATIONSHIPS">,
    "format": <see format instructions above, answer with "LIST" or "TEXT">,
    "broken_down_question": <see broken-down-question instructions above, answer with the question \
that should be used to analyze each object/each source (or 'the object' that fits all criteria).>,
    "divide_and_conquer": <see divide-and-conquer instructions above, answer with "yes" or "no">
}}

Do not include any other text or explanations.
"""

SOURCE_DETECTION_PROMPT = f"""
You are an expert in generating, understanding and analyzing SQL statements.

You are given an original SQL statement that returns a list of entities from a table or \
an aggregation of entities from a table. Your task will be to \
identify the source documents that are relevant to what the SQL statement is returning.

The task is actually quite simple. There are two tables involved - relationship_table and entity_table. \
relationship_table was used to generate the original SQL statement. Again, returning entities \
or aggregations of entities. The second table, entity_table contains the entities and \
the corresponding source_documents. All you need to do is to appropriately join the \
entity_table table on the entities that would be retrieved from the original SQL statement, \
and then return the source_documents from the entity_table table.

For your orientation, the relationship_table table has this structure:
 - Table name: relationship_table
 - Columns:
   - relationship (str): The name of the RELATIONSHIP, combining the nature of the relationship and the names of the entities. \
It is of the form \
<source_entity_type>::<source_entity_name>__<relationship_description>__<target_entity_type>::<target_entity_name> \
[example: ACCOUNT::Nike__has__CONCERN::performance]. Note that this is NOT UNIQUE!
   - source_entity (str): the id of the source ENTITY/NODE in the relationship [example: ACCOUNT::Nike]
   - source_entity_attributes (json): the attributes of the source entity/node [example: {{"account_type": "customer"}}]
   - target_entity (str): the id of the target ENTITY/NODE in the relationship [example: CONCERN::performance]
   - target_entity_attributes (json): the attributes of the target entity/node [example: {{"degree": "severe"}}]
   - source_entity_type (str): the type of the source entity/node [example: ACCOUNT]. Only the entity types provided \
   below are valid.
   - target_entity_type (str): the type of the target entity/node [example: CONCERN]. Only the entity types provided \
   below are valid.
   - relationship_type (str): the type of the relationship, formatted as  \
<source_entity_type>__<relationship_description>__<target_entity_type>.   So the explicit entity_names have \
been removed. [example: ACCOUNT__has__CONCERN]
   - source_date (str): the 'event' date of the source document [example: 2021-01-01]

The second table, entity_table, has this structure:
 - Table name: entity_table
 - Columns:
   - entity (str): The name of the ENTITY, which is unique in this table. source_entity and target_entity \
in the relationship_table table are the same as entity in this table.
   - source_document (str): the id of the document that contains the entity.

Again, ultimately, your task is to join the entity_table table on the entities that would be retrieved from the \
original SQL statement, and then return the source_documents from the entity_table table.

The way to do that is to create a common table expression for the original SQL statement and join the \
entity_table table suitably on the entities.

Here is the *original* SQL statement:
{SEPARATOR_LINE}
---original_sql_statement---
{SEPARATOR_LINE}

Please structure your answer using <reasoning>, </reasoning>,<sql>, </sql> start and end tags as in:

<reasoning>[think very briefly through the problem step by step, not more than 2-3 sentences]</reasoning> \
<sql>[the new SQL statement that returns the source documents involved in the original SQL statement]</sql>
""".strip()

ENTITY_SOURCE_DETECTION_PROMPT = f"""
You are an expert in generating, understanding and analyzing SQL statements.

You are given a SQL statement that returned an aggregation of entities in a table. \
Your task will be to identify the source documents for the entities involved in \
the answer. For example, should the original SQL statement be \
'SELECT COUNT(entity) FROM entity_table where entity_type = "ACCOUNT"' \
then you should return the source documents that contain the entities of type 'ACCOUNT'.

The table has this structure:
 - Table name: entity_table
 - Columns:
   - entity (str): The name of the ENTITY, combining the nature of the entity and the id of the entity. \
It is of the form <entity_type>::<entity_name> [example: ACCOUNT::625482894].
   - entity_type (str): the type of the entity [example: ACCOUNT].
   - entity_attributes (json): the attributes of the entity [example: {{"priority": "high", "status": "active"}}]
   - source_document (str): the id of the document that contains the entity. Note that the combination of \
id_name and source_document IS UNIQUE!
   - source_date (timestamp): the 'event' date of the source document [example: 2025-04-25 21:43:31.054741+00]

Specifically, the table contains the 'source_document' column, which is the id of the source document that \
contains the core information about the entity. Make sure that you do not return more documents, i.e. if there \
is a limit on source documents in the original SQL statement, the new SQL statement needs to have \
the same limit.

CRITICAL NOTES:
 - Only return source documents and nothing else!

Your task is then to create a new SQL statement that returns the source documents that are relevant to what the \
original SQL statement is returning. So the source document of every row used in the original SQL statement should \
be included in the result of the new SQL statement, and then you should apply a 'distinct'.

Here is the *original* SQL statement:
{SEPARATOR_LINE}
---original_sql_statement---
{SEPARATOR_LINE}

Please structure your answer using <reasoning>, </reasoning>,<sql>, </sql> start and end tags as in:

<reasoning>[think very briefly through the problem step by step, not more than 2-3 sentences]</reasoning> \
<sql>[the new SQL statement that returns the source documents involved in the original SQL statement]</sql>
""".strip()


ENTITY_TABLE_DESCRIPTION = f"""\
 - Table name: entity_table
 - Columns:
   - entity (str): The name of the ENTITY, combining the nature of the entity and the id of the entity. \
It is of the form <entity_type>::<entity_name> [example: ACCOUNT::625482894].
   - entity_type (str): the type of the entity [example: ACCOUNT].
   - entity_attributes (json): the attributes of the entity [example: {{"priority": "high", "status": "active"}}]
   - source_document (str): the id of the document that contains the entity. Note that the combination of \
id_name and source_document IS UNIQUE!
   - source_date (timestamp): the 'event' date of the source document [example: 2025-04-25 21:43:31.054741+00]

{SEPARATOR_LINE}

Importantly, here are the entity (node) types that you can use, with a short description of what they mean. You may need to \
identify the proper entity type through its description. Also notice the allowed attributes for each entity type and \
their values, if provided. Of particular importance is the 'subtype' attribute, if provided, as this is how \
the entity type may also often be referred to.
{SEPARATOR_LINE}
---entity_types---
{SEPARATOR_LINE}
"""

RELATIONSHIP_TABLE_DESCRIPTION = f"""\
 - Table name: relationship_table
 - Columns:
   - relationship (str): The name of the RELATIONSHIP, combining the nature of the relationship and the names of the entities. \
It is of the form \
<source_entity_type>::<source_entity_name>__<relationship_description>__<target_entity_type>::<target_entity_name> \
[example: ACCOUNT::Nike__has__CONCERN::performance]. Note that this is NOT UNIQUE!
   - source_entity (str): the id of the source ENTITY/NODE in the relationship [example: ACCOUNT::Nike]
   - source_entity_attributes (json): the attributes of the source entity/node [example: {{"account_type": "customer"}}]
   - target_entity (str): the id of the target ENTITY/NODE in the relationship [example: CONCERN::performance]
   - target_entity_attributes (json): the attributes of the target entity/node [example: {{"degree": "severe"}}]
   - source_entity_type (str): the type of the source entity/node [example: ACCOUNT]. Only the entity types provided \
   below are valid.
   - target_entity_type (str): the type of the target entity/node [example: CONCERN]. Only the entity types provided \
   below are valid.
   - relationship_type (str): the type of the relationship, formatted as  \
<source_entity_type>__<relationship_description>__<target_entity_type>.   So the explicit entity_names have \
been removed. [example: ACCOUNT__has__CONCERN]
   - source_document (str): the id of the document that contains the relationship. Note that the combination of \
id_name and source_document IS UNIQUE!
   - source_date (timestamp): the 'event' date of the source document [example: 2025-04-25 21:43:31.054741+00]

{SEPARATOR_LINE}

Importantly, here are the entity (node) types that you can use, with a short description of what they mean. You may need to \
identify the proper entity type through its description. Also notice the allowed attributes for each entity type and \
their values, if provided. Of particular importance is the 'subtype' attribute, if provided, as this is how \
the entity type may also often be referred to.
{SEPARATOR_LINE}
---entity_types---
{SEPARATOR_LINE}

Here are the relationship types that are in the table, denoted as <source_entity_type>__<relationship_type>__<target_entity_type>.
In the table, the actual relationships are not quite of this form, but each <entity_type> is followed by '::<entity_name>' \
in the relationship id as shown above.
{SEPARATOR_LINE}
---relationship_types---
{SEPARATOR_LINE}
"""


SIMPLE_SQL_PROMPT = f"""
You are an expert in generating a SQL statement that only uses ONE TABLE that captures RELATIONSHIPS \
between TWO ENTITIES. The table has the following structure:

{SEPARATOR_LINE}
{RELATIONSHIP_TABLE_DESCRIPTION}

Here is the question you are supposed to translate into a SQL statement:
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}

To help you, we already have identified the entities and relationships that the SQL statement likely *should* use (but note the \
exception below!). The entities also contain the list of attributes and attribute values that should specify the entity. \
The format is <entity_type>::<entity_name>--[<attribute_name_1>:<attribute_value_1>, \
<attribute_name_2>:<attribute_value_2>, ...].
{SEPARATOR_LINE}
Identified entities with attributes in query:

---query_entities_with_attributes---

These are the entities that should be used in the SQL statement. However, \
note that these are the entities (with potential attributes) that were *matches* of Knowledge Graph identified with the \
entities originally identified in the original question. A such, they may have id names that may not mean much by themselves, \
eg ACCOUNT::a74f332. Here is the mapping of entities originally identified (whose role in the query should be obvious) with \
the entities that were matched to them in the Knowledge Graph:

---entity_explanation_string---

--

Here are relationships that were identified as explicitly or implicitly referred to in the question:

---query_relationships---

(Again, if applicable, the entities contained in the relationships are the same as the entities in the \
query_entities_with_attributes, and those are the correct ones to use in the SQL statement.)

{SEPARATOR_LINE}

CRITICAL SPECIAL CASE:
  - if an identified entity is of the form <entity_type>::*, or an identified relationship contains an \
entity of this form, this refers to *any* entity of that type. Correspondingly, the SQL query should use the *entity type*, \
and possibly the relationship type, but not the entity with the * itself. \
Example: if you see 'ACCOUNT::*', that means any account matches. So if you are supposed to count the 'ACCOUNT::*', \
you should count the entities of entity_type 'ACCOUNT'.


IMPORTANT NOTES:
- The id_name of each relationship has the format \
<source_entity_id_name>__<relationship_type>__<target_entity_id_name>.
- The relationship id_names are NOT UNIQUE, only the combinations of relationship id_name and source_document_id are unique. \
That is because each relationship is extracted from a document. So make sure you use the proper 'distinct's!
- If the SQL contains a 'SELECT DISTINCT' clause and an ORDER BY clause, then you MUST include the columns from the ORDER BY \
clause ALSO IN THE SELECT DISTINCT CLAUSE! This is very important! (This is a postgres db., so this is a MUST!). \
You MUST NOT have a column in the ORDER BY clause that is not ALSO in the SELECT DISTINCT clause!
- If you join the relationship table on itself using the source_node or target_node, you need to make sure that you also \
join on the source_document_id.
- The id_name of each node/entity has the format <entity_type_id_name>::<name>, where 'entity_type_id_name' \
and 'name' are columns and \
  the values <entity_type_id_name> and <name> can be used for filtering.
- The table can be joined on itself on source nodes and/or target nodes if needed.
- the SQL statement MUST ultimately only return NODES/ENTITIES (not relationships!), or aggregations of \
entities/nodes(count, avg, max, min, etc.). \
Again, DO NOT compose a SQL statement that returns id_name of relationships.
- You CAN ONLY return ENTITIES or COUNTS (or other aggregations) of ENTITIES, or you can return \
source_date (but only if the question asks for event dates or times). DO NOT return \
source documents or counts of source documents, or relationships or counts of relationships! \
Those can only appear in where clauses, ordering etc., but they cannot be returned or ultimately \
counted here! source_date and date operations can appear in select statements, particularly if \
there is time ordering or grouping involved.
- ENTITIES can be target_entity or source_entity. Think about the allowed relationships and the \
question to decide which one you want!
- It is ok to generate nested SQL as long as it is correct postgres syntax!
- Attributes are stored in the attributes json field. As this is postgres, querying for those must be done as \
"attributes ->> '<attribute>' = '<attribute value>'".
-  The SELECT clause MUST only contain entities or aggregations/counts of entities, or, in cases the \
question was about dates or times, then it can also include source_date. But source_document MUST NEVER appear \
in the SELECT clause!
- Again, NEVER count or retrieve source documents in SELECT CLAUSE, whether it is in combination with \
entities, with a distinct, etc. NO source_document in SELECT CLAUSE! So NEVER produce a \
'SELECT COUNT(source_entity, source_document)...'
- Please think about whether you are interested in source entities or target entities! For that purpose, \
consider the allowed relationship types to make sure you select or count the correct one!
- Again, ALWAYS make sure that EACH COLUMN in an ORDER-BY clause IS ALSO IN THE SELECT CLAUSE! Remind yourself \
of that in the reasoning.
- Be careful with dates! Often a date will refer to the source data, which is the date when \
an underlying piece of information was updated. However, if the attributes of an entity contain \
time information as well (like 'started_at', 'completed_at', etc.), then you should really look at \
the wording to see whether you should use a date in the attributes or the event date.
- Dates are ALWAYS in string format of the form YYYY-MM-DD, for source date as well as for date-like the attributes! \
So please use that format, particularly if you use data comparisons (>, <, ...)
- Again, NO 'relationship' or 'source_document' in the SELECT CLAUSE, be it as direct columns are in aggregations!
- Careful with SORT! Really think in which order you want to sort if you have multiple columns you \
want to sort by. If the sorting is time-based and there is a limit for example, then you do want to have a suitable date \
variable as the first column to sort by.
- When doing a SORT on an attribute value of an entity, you MUST also apply a WHERE clause to filter \
for entities that have the attribute value set. For example, if you want to sort the target entity \
by the attribute 'created_date', you must also have a WHERE clause that checks whether the target \
entity attribute contains 'created_date'. This is vital for proper ordering with null values.
- Usually, you will want to retrieve or count entities, maybe with attributes. But you almost always want to \
have entities involved in the SELECT clause.
- Questions like 'What did Paul work on last week?' should generally be handled by finding all entities \
that reasonably relate to 'work entities' that are i) related to Paul, and ii) that were created or \
updated (by him) last week. So this would likely be a UNION of multiple queries.
- If you do joins consider the possibility that the second entity does not exist for all examples. \
Therefore joins should generally be LEFT joins (or RIGHT joins) as appropriate. Think about which \
entities you are interested in, and which ones provides attributes.
Another important note:
 - For questions that really try to explore what a certain entity was involved in like 'what did Paul Smith do \
in the last 3 months?', and Paul Smith has been extracted ie as an entity of type 'EMPLOYEE', you will \
want to consider all entities that Paul Smith may be related to that satisfy any potential other conditions.
- Joins should always be made on entities, not source documents!
- Try to be as efficient as possible.

APPROACH:
Please think through this step by step. Make sure that you include all columns in the ORDER BY clause \
also in the SELECT DISTINCT clause, \
if applicable! And again, joins should generally be LEFT JOINS!

Also, in case it is important, today is ---today_date--- and the user/employee asking is ---user_name---.

Please structure your answer using <reasoning>, </reasoning>, <sql>, </sql> start and end tags as in:

<reasoning>[think through the logic but do so extremely briefly! Not more than 3-4 sentences.]</reasoning>
<sql>[the SQL statement that you generate to satisfy the task]</sql>
""".strip()

# TODO: remove following before merging after enough testing
SIMPLE_SQL_CORRECTION_PROMPT = f"""
You are an expert in reviewing and fixing SQL statements.

Here is a draft SQL statement that you should consider as generally capturing the information intended. \
However, it may or may not be syntactically 100% for our postgresql database.

Guidance:
 - Think about whether attributes should be numbers or strings. You may need to convert them.
 - If we use SELECT DISTINCT we need to have the ORDER BY columns in the \
SELECT statement as well! And it needs to be in the EXACT FORM! So if a \
conversion took place, make sure to include the conversion in the SELECT and the ORDER BY clause!
 - never should 'source_document' be in the SELECT clause! Remove if present!
 - if there are joins, they must be on entities, never source documents
 - if there are joins, consider the possibility that the second entity does not exist for all examples.\
 Therefore consider using LEFT joins (or RIGHT joins) as appropriate.

Draft SQL:
{SEPARATOR_LINE}
---draft_sql---
{SEPARATOR_LINE}

Please structure your answer using <reasoning>, </reasoning>, <sql>, </sql> start and end tags as in:

<reasoning>[think briefly through the problem step by step]</reasoning>
<sql>[the corrected (or original one, if correct) SQL statement]</sql>
""".strip()

SIMPLE_ENTITY_SQL_PROMPT = f"""
You are an expert in generating a SQL statement that only uses ONE TABLE that captures ENTITIES \
and their attributes and other data. The table has the following structure:

{SEPARATOR_LINE}
{ENTITY_TABLE_DESCRIPTION}

Here is the question you are supposed to translate into a SQL statement:
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}

To help you, we already have identified the entities that the SQL statement likely *should* use (but note the \
exception below!). The entities as written below also contain the list of attributes and attribute values \
that should specify the entity. \
The format is <entity_type>::<entity_name>--[<attribute_name_1>:<attribute_value_1>, \
<attribute_name_2>:<attribute_value_2>, ...].
{SEPARATOR_LINE}
Identified entities with attributes in query:

---query_entities_with_attributes---

These are the entities that should be used in the SQL statement. However, \
note that these are the entities (with potential attributes) that were *matches* of Knowledge Graph identified with the \
entities originally identified in the original question. As such, they may have id names that may not mean much by themselves, \
eg ACCOUNT::a74f332. Here is the mapping of entities originally identified (whose role in the query should be obvious) with \
the entities that were matched to them in the Knowledge Graph:

---entity_explanation_string---

--


{SEPARATOR_LINE}

CRITICAL SPECIAL CASE:
  - if an identified entity is of the form <entity_type>::*, or an identified relationship contains an \
entity of this form, this refers to *any* entity of that type. Correspondingly, the SQL query should use the *entity type*, \
but not the entity with the * itself. \
Example: if you see 'ACCOUNT::*', that means any account matches. So if you are supposed to count the 'ACCOUNT::*', \
you should count the entities of entity_type 'ACCOUNT'.


IMPORTANT NOTES:
- The entities are unique in the table.
- If the SQL contains a 'SELECT DISTINCT' clause and an ORDER BY clause, then you MUST include the columns from the ORDER BY \
clause ALSO IN THE SELECT DISTINCT CLAUSE! This is very important! (This is a postgres db., so this is a MUST!). \
You MUST NOT have a column in the ORDER BY clause that is not ALSO in the SELECT DISTINCT clause!
- The table cannot be joined on itself.
- You CAN ONLY return ENTITIES or COUNTS (or other aggregations) of ENTITIES, or you can return \
source_date (but only if the question asks for event dates or times, and then the \
corresponding entity must also be returned).
- Generally, the query can only return ENTITIES or aggregations of ENTITIES:
   - if individual entities are returned, then you MUST also return the source_document. \
If the source date was requested, you can return that too.
   - if aggregations of entities are returned, then you can only aggregate the entities.
- Attributes are stored in the attributes json field. As this is postgres, querying for those must be done as \
"attributes ->> '<attribute>' = '<attribute value>'".
- Again, ALWAYS make sure that EACH COLUMN in an ORDER-BY clause IS ALSO IN THE SELECT CLAUSE! Remind yourself \
of that in the reasoning.
- Be careful with dates! Often a date will refer to the source data, which is the date when \
an underlying piece of information was updated. However, if the attributes of an entity may contain \
time information as well (like 'started_at', 'completed_at', etc.), then you should really look at \
the wording to see whether you should use a date in the attributes or the event date.
- Dates are ALWAYS in string format of the form YYYY-MM-DD, for source date as well as for date-like the attributes! \
So please use that format, particularly if you use data comparisons (>, <, ...)
- Careful with SORT! Really think in which order you want to sort if you have multiple columns you \
want to sort by. If the sorting is time-based and there is a limit for example, then you do want to have a suitable date \
variable as the first column to sort by.
- When doing a SORT on an attribute value of an entity, you MUST also apply a WHERE clause to filter \
for entities that have the attribute value set. For example, if you want to sort the target entity \
by the attribute 'created_date', you must also have a WHERE clause that checks whether the target \
entity attribute contains 'created_date'. This is vital for proper ordering with null values.
- Usually, you will want to retrieve or count entities, maybe with attributes. But you almost always want to \
have entities involved in the SELECT clause.
- You MUST ONLY rely on the entity attributes provided! This is essential! Do not assume \
other attributes exist...they don't! Note that there will often be a search using the results \
of this query. So if there is information in the question that does not fit the provided attributes, \
you should not use it here but rely on the later search!
- Try to be as efficient as possible.

APPROACH:
Please think through this step by step. Make sure that you include all columns in the ORDER BY clause \
also in the SELECT DISTINCT clause, \
if applicable!

Also, in case it is important, today is ---today_date--- and the user/employee asking is ---user_name---.

Please structure your answer using <reasoning>, </reasoning>, <sql>, </sql> start and end tags as in:

<reasoning>[think through the logic but do so extremely briefly! Not more than 3-4 sentences.]</reasoning>
<sql>[the SQL statement that you generate to satisfy the task]</sql>
""".strip()

SIMPLE_SQL_ERROR_FIX_PROMPT = f"""
You are an expert at fixing SQL statements. You will be provided with a SQL statement that aims to address \
a question, but it contains an error. Your task is to fix the SQL statement, based on the error message.

Here is the description of the table that the SQL statement is supposed to use:
---table_description---

Here is the question you are supposed to translate into a SQL statement:
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}

Here is the SQL statement that you should fix:
{SEPARATOR_LINE}
---sql_statement---
{SEPARATOR_LINE}

Here is the error message that was returned:
{SEPARATOR_LINE}
---error_message---
{SEPARATOR_LINE}

Note that in the case the error states the sql statement did not return any results, it is possible that the \
sql statement is correct, but the question is not addressable with the information in the knowledge graph. \
If you are absolutely certain that is the case, you may return the original sql statement.

Here are a couple common errors that you may encounter:
- source_document is in the SELECT clause -> remove it
- columns used in ORDER BY must also appear in the SELECT DISTINCT clause
- consider carefully the type of the columns you are using, especially for attributes. You may need to cast them
- dates are ALWAYS in string format of the form YYYY-MM-DD, for source date as well as for date-like the attributes! \
So please use that format, particularly if you use data comparisons (>, <, ...)
- attributes are stored in the attributes json field. As this is postgres, querying for those must be done as \
"attributes ->> '<attribute>' = '<attribute value>'" (or "attributes ? '<attribute>'" to check for existence).
- if you are using joins and the sql returned no joins, make sure you are using the appropriate join type (LEFT, RIGHT, etc.) \
it is possible that the second entity does not exist for all examples.
- (ignore if using entity_table) if using the relationship_table and the sql returned no results, make sure you are \
selecting the correct column! Use the available relationship types to determine whether to use the source or target entity.

APPROACH:
Please think through this step by step. Please also bear in mind that the sql statement is written in postgres syntax.

Also, in case it is important, today is ---today_date--- and the user/employee asking is ---user_name---.

Please structure your answer using <reasoning>, </reasoning>, <sql>, </sql> start and end tags as in:

<reasoning>[think through the logic but do so extremely briefly! Not more than 3-4 sentences.]</reasoning>
<sql>[the SQL statement that you generate to satisfy the task]</sql>
"""


SEARCH_FILTER_CONSTRUCTION_PROMPT = f"""
You need to prepare a search across text segments that contain the information necessary to \
answer a question. The text segments have tags that can be used to filter for the relevant segments. \
Key are suitable entities and relationships of a knowledge graph, as well as underlying source documents.

Your overall task is to find the filters and structures that are needed to filtering a database to \
properly address a user question.

You will be given:
  - the user question
  - a description of all of the potential entity types involved
  - a list of 'global' entities and relationships that should be filtered by, given the question
  - the structure of a schema that was used to derive additional entity filters
  - a SQL statement that was generated to derive those filters
  - the results that were generated using the SQL statement. This can have multiple rows, \
and those will be the 'local' filters (which will later mean that each retrieved result will \
need to match at least one of the conditions that you will generate).
  - the results of another query that asked for the underlying source documents that resulted \
in the answers of the SQL statement


Here is the information:

1) The overall user question
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}

2) Here is a description of all of the entity types:
{SEPARATOR_LINE}
---entity_type_descriptions---
{SEPARATOR_LINE}

3) Here are the lists of entity and relationship filters that were derived from the question:
{SEPARATOR_LINE}
Entity filters:

---entity_filters---

--

Relationship filters:

---relationship_filters---

{SEPARATOR_LINE}

4) Here are the columns of a table in a database that has a lot of knowledge about the \
data:
{SEPARATOR_LINE}
   - relationship (str): The name of the RELATIONSHIP, combining the nature of the relationship and the names of the entities. \
It is of the form \
<source_entity_type>::<source_entity_name>__<relationship_description>__<target_entity_type>::<target_entity_name> \
[example: ACCOUNT::Nike__has__CONCERN::performance]. Note that this is NOT UNIQUE!
   - source_entity (str): the id of the source ENTITY/NODE in the relationship [example: ACCOUNT::Nike]
   - source_entity_attributes (json): the attributes of the source entity/node [example: {{"account_type": "customer"}}]
   - target_entity (str): the id of the target ENTITY/NODE in the relationship [example: CONCERN::performance]
   - target_entity_attributes (json): the attributes of the target entity/node [example: {{"degree": "severe"}}]
   - source_entity_type (str): the type of the source entity/node [example: ACCOUNT]. Only the entity types provided \
   below are valid.
   - target_entity_type (str): the type of the target entity/node [example: CONCERN]. Only the entity types provided \
   below are valid.
   - relationship_type (str): the type of the relationship, formatted as  \
<source_entity_type>__<relationship_description>__<target_entity_type>.   So the explicit entity_names have \
been removed. [example: ACCOUNT__has__CONCERN]
   - source_document (str): the id of the document that contains the relationship. Note that the combination of \
id_name and source_document IS UNIQUE!
   - source_date (str): the 'event' date of the source document [example: 2021-01-01]

{SEPARATOR_LINE}

5) Here is a query that was generated for that table to provide additional filters:
{SEPARATOR_LINE}
---sql_query---
{SEPARATOR_LINE}

6) Here are the results of that SQL query. (Consider the schema description and the \
structure of the entities to interpret the results)
{SEPARATOR_LINE}
---sql_results---
{SEPARATOR_LINE}

7) Here are the results of the other query that provided the underlying source documents \
using the schema:
{SEPARATOR_LINE}
---source_document_results---
{SEPARATOR_LINE}

Here is the detailed set of tasks that you should perform, including the proper output format for you:

Please reply as a json dictionary in this form:

{{
    "global_entity_filters": <a list of entity filters>,
    "global_relationship_filters": <a list of relationship filters, derived from the 'global' \
relationship filers above.>,
    "local_entity_filters": <a list of lists of 'local' entity filters, which were obtained from the \
SQL results in 6 above. Each inner list can have one or more entities, which will correspond to the \
rows in the sql results in point 6 above.>,
    "source_document_filters": <a list of strings, derived from the source document filters above. \
You are essentially only formatting here, so do not change the content of the strings.>,
    "structure": <a list of entity ids (entity_type::uuid) that the user maybe want to know more about. \
More specifically, think about how (and if) the user would naturally want the answer to be divided up in \
*equivalent and parallel* sub-investigations. For example, if the question was something like 'what was discussed \
in the last 5 calls', the user probably expects to see a bullet point list, one bullet point for each call that \
then shows the summary. In that case for this part of the task, your response for the structure should be the \
list of call entities from the sql results in 6 above. (The actual 'what was discussed' will be addressed later). \
In other words, respond with a list of entity ids that you think the user would like to have independently analyzed
and the results reported for each of those entities.>
}}

Again - DO NOT FORGET - here is the user question that motivates this whole task:
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}

Your json dictionary answer:
""".strip()

OUTPUT_FORMAT_NO_EXAMPLES_PROMPT = f"""
You need to format an answer to a research question. \
You will see what the desired output is, the original question, and the unformatted answer to the research question. \
Your purpose is to generate the answer respecting the desired format.

Notes:
 - Note that you are a language model and that answers may or may not be perfect. To communicate \
this to the user, consider phrases like 'I found [10 accounts]...', or 'Here are a number of [goals] that \
I found...]
- Please DO NOT mention the explicit output format in your answer. Just use it to inform the formatting.

Here is the unformatted answer to the research question:
{SEPARATOR_LINE}
---introductory_answer---
{SEPARATOR_LINE}

Here is the original question:
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}

And finally, here is the desired output format:
{SEPARATOR_LINE}
---output_format---
{SEPARATOR_LINE}

Please start generating the answer, without any explanation. There should be no real modifications to \
the text, after all, all you need to do here is formatting. \

Your Answer:
""".strip()


OUTPUT_FORMAT_PROMPT = f"""
You need to format the answers to a research question that was generated using one or more objects. \
An overall introductory answer may be provided to you, as well as the research results for each individual object. \
You will also be provided with the original question as background, and the desired format. \

Your purpose is to generate a consolidated and FORMATTED answer that starts of with the introductory \
answer, and then formats the research results for each individual object in the desired format. \
Do not add any other text please!

Notes:
 - Note that you are a language model and that answers may or may not be perfect. To communicate \
this to the user, consider phrases like 'I found [10 accounts]...', or 'Here are a number of [goals] that \
I found...]
- Please DO NOT mention the explicit output format in your answer. Just use it to inform the formatting.
- DO NOT add any content to the introductory answer!


Here is the original question for your background:
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}

Here is the desired output format:
{SEPARATOR_LINE}
---output_format---
{SEPARATOR_LINE}

Here is the introductory answer:
{SEPARATOR_LINE}
---introductory_answer---
{SEPARATOR_LINE}

Here are the research results that you should - respecting the target format- return in a formatted way:
{SEPARATOR_LINE}
---research_results---
{SEPARATOR_LINE}

Please start generating the answer, without any explanation. After all, all you need to do here is formatting. \


Your Answer:
""".strip()

OUTPUT_FORMAT_NO_OVERALL_ANSWER_PROMPT = f"""
You need to format the return of research on multiple objects. The research results will be given \
to you as a string. You will also see what the desired output is, as well as the original question. \
Your purpose is to generate the answer respecting the desired format.

Notes:
 - Note that you are a language model and that answers may or may not be perfect. To communicate \
this to the user, consider phrases like 'I found [10 accounts]...', or 'Here are a number of [goals] that \
I found...]
- Please DO NOT mention the explicit output format in your answer. Just use it to inform the formatting.
 - Often, you are also provided with a list of explicit examples. If  - AND ONLY IF - the list is not \
empty, then these should be listed at the end with the text:
'...
Here are some examples of what I found:
<bullet point list of examples>
...'
 - Again if the list of examples is an empty string then skip this section! Do not use the \
results data for this purpose instead! (They will already be handled in the answer.)
- Even if the desired output format is 'text', make sure that you keep the individual research results \
separated by bullet points, and mention the object name first, followed by a new line. The object name \
is at the beginning of the research result, and should be in the format <object_type>::<object_name>.


Here is the original question:
{SEPARATOR_LINE}
---question---
{SEPARATOR_LINE}

And finally, here is the desired output format:
{SEPARATOR_LINE}
---output_format---
{SEPARATOR_LINE}

Here are the research results that you should properly format:
{SEPARATOR_LINE}
---research_results---
{SEPARATOR_LINE}

Please start generating the answer, without any explanation. After all, all you need to do here is formatting. \


Your Answer:
""".strip()

KG_OBJECT_SOURCE_RESEARCH_PROMPT = f"""
You are an expert in extracting relevant structured information from a list of documents that \
should relate to one object. You are presented with a list of documents that have been determined to be \
relevant to the task of interest. Your goal is to extract the information asked around these topics:
You should look at the documents - in no particular order! - and extract the information that relates \
to a question:
{SEPARATOR_LINE}
{{question}}
{SEPARATOR_LINE}

Here are the documents you are supposed to search through:
--
{{document_text}}
{SEPARATOR_LINE}
Note: in this case, please do NOT cite your sources. This is very important!

Please now generate the answer to the question given the documents:
""".strip()

KG_SEARCH_PROMPT = f"""
You are an expert in extracting relevant structured information from a list of documents that \
should relate to one object. You are presented with a list of documents that have been determined to be \
relevant to the task of interest. Your goal is to extract the information asked around these topics:
You should look at the documents and extract the information that relates \
to a question:
{SEPARATOR_LINE}
{{question}}
{SEPARATOR_LINE}

Here are the documents you are supposed to search through:
--
{{document_text}}
{SEPARATOR_LINE}
Note: in this case, please DO cite your sources. This is very important! \
Use the format [<document number>]. Ie, use [1], [2], and NOT [1,2] if \
there are two documents to cite, etc. \


Please now generate the answer to the question given the documents:
""".strip()

# KG Beta Assistant System Prompt
KG_BETA_ASSISTANT_SYSTEM_PROMPT = """"You are a knowledge graph assistant that helps users explore and \
understand relationships between entities."""

KG_BETA_ASSISTANT_TASK_PROMPT = """"Help users explore and understand the knowledge graph by answering \
questions about entities and their relationships."""


# Just in case, for best practice, send a system message with key rules.
# (The db user permissions executing the SQL will avoid issues anyway,
# but it does not hurt to to put multiple checks in place.)
SQL_INSTRUCTIONS_RELATIONSHIP_PROMPT = """
You are an expert at generating SQL queries to answer questions about a knowledge graph.

You will be given a lot of instructions later, but here rules that MUST BE FOLLOWED:
  - the SQL generated MUST only use the table one table named 'relationship_table'. \
This table is not a table that can be defined or overwritten by the user and the resulting SQL \
statement, it MUST be seen as an existing table in the database.
  - self-joins of the 'relationship_table' are allowed, as well as common table expressions \
  that reference only the 'relationship_table'.
  - no other table or view can in any way or shape be \
involved in the generated SQL.
  - no other database operations can be generated except for those that query the 'relationship_table'. \
(WHERE, GROUP BY, etc. are certainly allowed, but no other database table can be used in the generated SQL.)
"""

SQL_INSTRUCTIONS_ENTITY_PROMPT = """
You are an expert at generating SQL queries to answer questions about a knowledge graph.

You will be given a lot of instructions later, but here rules that MUST BE FOLLOWED:
  - the SQL generated MUST only use the table one table named 'entity_table'. \
This table is not a table that can be defined or overwritten by the user and the resulting SQL \
statement, it MUST be seen as an existing table in the database.
  - common table expressions that reference only the 'entity_table' are allowed.
  - no other table or view of a potential underlying schema can in any way or shape be \
involved in the generated SQL.
  - no other database operations can be generated except for those that query the 'entity_table'. \
(WHERE, GROUP BY, etc. are certainly allowed, but no other database table can be used in the generated SQL.)
"""


================================================
FILE: backend/onyx/prompts/prompt_template.py
================================================
import re

from onyx.prompts.prompt_utils import replace_current_datetime_tag


class PromptTemplate:
    """
    A class for building prompt templates with placeholders.
    Useful when building templates with json schemas, as {} will not work with f-strings.
    Unlike string.replace, this class will raise an error if the fields are missing.
    """

    DEFAULT_PATTERN = r"---([a-zA-Z0-9_]+)---"

    def __init__(self, template: str, pattern: str = DEFAULT_PATTERN):
        self._pattern_str = pattern
        self._pattern = re.compile(pattern)
        self._template = template
        self._fields: set[str] = set(self._pattern.findall(template))

    def build(self, **kwargs: str) -> str:
        """
        Build the prompt template with the given fields.
        Will raise an error if the fields are missing.
        Will ignore fields that are not in the template.
        """
        missing = self._fields - set(kwargs.keys())
        if missing:
            raise ValueError(f"Missing required fields: {missing}.")
        built = self._replace_fields(kwargs)
        return self._postprocess(built)

    def partial_build(self, **kwargs: str) -> "PromptTemplate":
        """
        Returns another PromptTemplate with the given fields replaced.
        Will ignore fields that are not in the template.
        """
        new_template = self._replace_fields(kwargs)
        return PromptTemplate(new_template, self._pattern_str)

    def _replace_fields(self, field_vals: dict[str, str]) -> str:
        def repl(match: re.Match) -> str:
            key = match.group(1)
            return field_vals.get(key, match.group(0))

        return self._pattern.sub(repl, self._template)

    def _postprocess(self, text: str) -> str:
        """Apply global replacements such as [[CURRENT_DATETIME]]."""
        if not text:
            return text
        # Ensure [[CURRENT_DATETIME]] matches shared prompt formatting
        return replace_current_datetime_tag(
            text,
            full_sentence=True,
            include_day_of_week=True,
        )


================================================
FILE: backend/onyx/prompts/prompt_utils.py
================================================
from datetime import datetime
from typing import cast

from langchain_core.messages import BaseMessage

from onyx.configs.constants import DocumentSource
from onyx.prompts.chat_prompts import ADDITIONAL_INFO
from onyx.prompts.chat_prompts import CITATION_GUIDANCE_REPLACEMENT_PAT
from onyx.prompts.chat_prompts import COMPANY_DESCRIPTION_BLOCK
from onyx.prompts.chat_prompts import COMPANY_NAME_BLOCK
from onyx.prompts.chat_prompts import DATETIME_REPLACEMENT_PAT
from onyx.prompts.chat_prompts import REMINDER_TAG_REPLACEMENT_PAT
from onyx.prompts.chat_prompts import REQUIRE_CITATION_GUIDANCE
from onyx.prompts.constants import CODE_BLOCK_PAT
from onyx.prompts.constants import REMINDER_TAG_DESCRIPTION
from onyx.server.settings.store import load_settings
from onyx.utils.logger import setup_logger


logger = setup_logger()


_BASIC_TIME_STR = "The current date is {datetime_info}."


def get_current_llm_day_time(
    include_day_of_week: bool = True,
    full_sentence: bool = True,
    include_hour_min: bool = False,
) -> str:
    current_datetime = datetime.now()
    # Format looks like: "October 16, 2023 14:30" if include_hour_min, otherwise "October 16, 2023"
    formatted_datetime = (
        current_datetime.strftime("%B %d, %Y %H:%M")
        if include_hour_min
        else current_datetime.strftime("%B %d, %Y")
    )
    day_of_week = current_datetime.strftime("%A")
    if full_sentence:
        return f"The current day and time is {day_of_week} {formatted_datetime}"
    if include_day_of_week:
        return f"{day_of_week} {formatted_datetime}"
    return f"{formatted_datetime}"


def replace_current_datetime_tag(
    prompt_str: str,
    *,
    full_sentence: bool = False,
    include_day_of_week: bool = True,
) -> str:
    datetime_str = get_current_llm_day_time(
        full_sentence=full_sentence,
        include_day_of_week=include_day_of_week,
    )

    if DATETIME_REPLACEMENT_PAT in prompt_str:
        prompt_str = prompt_str.replace(DATETIME_REPLACEMENT_PAT, datetime_str)

    return prompt_str


def replace_citation_guidance_tag(
    prompt_str: str,
    *,
    should_cite_documents: bool = False,
    include_all_guidance: bool = False,
) -> tuple[str, bool]:
    """
    Replace {{CITATION_GUIDANCE}} placeholder with citation guidance if needed.

    Returns:
        tuple[str, bool]: (prompt_with_replacement, should_append_fallback)
        - prompt_with_replacement: The prompt with placeholder replaced (or unchanged if not present)
        - should_append_fallback: True if citation guidance should be appended
            (placeholder is not present and citations are needed)
    """
    placeholder_was_present = CITATION_GUIDANCE_REPLACEMENT_PAT in prompt_str

    if not placeholder_was_present:
        # Placeholder not present - caller should append if citations are needed
        should_append = (
            should_cite_documents or include_all_guidance
        ) and REQUIRE_CITATION_GUIDANCE not in prompt_str
        return prompt_str, should_append

    citation_guidance = (
        REQUIRE_CITATION_GUIDANCE
        if should_cite_documents or include_all_guidance
        else ""
    )

    prompt_str = prompt_str.replace(
        CITATION_GUIDANCE_REPLACEMENT_PAT,
        citation_guidance,
    )

    return prompt_str, False


def replace_reminder_tag(prompt_str: str) -> str:
    """Replace {{REMINDER_TAG_DESCRIPTION}} with the reminder tag content."""
    if REMINDER_TAG_REPLACEMENT_PAT in prompt_str:
        prompt_str = prompt_str.replace(
            REMINDER_TAG_REPLACEMENT_PAT, REMINDER_TAG_DESCRIPTION
        )

    return prompt_str


def handle_onyx_date_awareness(
    prompt_str: str,
    # We always replace the pattern {{CURRENT_DATETIME}} if it shows up
    # but if it doesn't show up and the prompt is datetime aware, add it to the prompt at the end.
    datetime_aware: bool = False,
) -> str:
    """
    If there is a {{CURRENT_DATETIME}} tag, replace it with the current date and time no matter what.
    If the prompt is datetime aware, and there are no datetime tags, add it to the prompt.
    Do nothing otherwise.
    This can later be expanded to support other tags.
    """

    prompt_with_datetime = replace_current_datetime_tag(
        prompt_str,
        full_sentence=False,
        include_day_of_week=True,
    )
    if prompt_with_datetime != prompt_str:
        return prompt_with_datetime

    if datetime_aware:
        return prompt_str + ADDITIONAL_INFO.format(
            datetime_info=_BASIC_TIME_STR.format(
                datetime_info=get_current_llm_day_time()
            )
        )

    return prompt_str


def get_company_context() -> str | None:
    prompt_str = None
    try:
        workspace_settings = load_settings()
        company_name = workspace_settings.company_name
        company_description = workspace_settings.company_description

        if not company_name and not company_description:
            return None

        prompt_str = ""
        if company_name:
            prompt_str += COMPANY_NAME_BLOCK.format(company_name=company_name)
        if company_description:
            prompt_str += COMPANY_DESCRIPTION_BLOCK.format(
                company_description=company_description
            )
        return prompt_str
    except Exception as e:
        logger.error(f"Error handling company awareness: {e}")
        return None


# Maps connector enum string to a more natural language representation for the LLM
# If not on the list, uses the original but slightly cleaned up, see below
CONNECTOR_NAME_MAP = {
    "web": "Website",
    "requesttracker": "Request Tracker",
    "github": "GitHub",
    "file": "File Upload",
}


def clean_up_source(source_str: str) -> str:
    if source_str in CONNECTOR_NAME_MAP:
        return CONNECTOR_NAME_MAP[source_str]
    return source_str.replace("_", " ").title()


def build_doc_context_str(
    semantic_identifier: str,
    source_type: DocumentSource,
    content: str,
    metadata_dict: dict[str, str | list[str]],
    updated_at: datetime | None,
    ind: int,
    include_metadata: bool = True,
) -> str:
    context_str = ""
    if include_metadata:
        context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
        context_str += f"Source: {clean_up_source(source_type)}\n"

        for k, v in metadata_dict.items():
            if isinstance(v, list):
                v_str = ", ".join(v)
                context_str += f"{k.capitalize()}: {v_str}\n"
            else:
                context_str += f"{k.capitalize()}: {v}\n"

        if updated_at:
            update_str = updated_at.strftime("%B %d, %Y %H:%M")
            context_str += f"Updated: {update_str}\n"
    context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n"
    return context_str


_PER_MESSAGE_TOKEN_BUFFER = 7


def find_last_index(lst: list[int], max_prompt_tokens: int) -> int:
    """From the back, find the index of the last element to include
    before the list exceeds the maximum"""
    running_sum = 0

    if not lst:
        logger.warning("Empty message history passed to find_last_index")
        return 0

    last_ind = 0
    for i in range(len(lst) - 1, -1, -1):
        running_sum += lst[i] + _PER_MESSAGE_TOKEN_BUFFER
        if running_sum > max_prompt_tokens:
            last_ind = i + 1
            break

    if last_ind >= len(lst):
        logger.error(
            f"Last message alone is too large! max_prompt_tokens: {max_prompt_tokens}, message_token_counts: {lst}"
        )
        raise ValueError("Last message alone is too large!")

    return last_ind


def drop_messages_history_overflow(
    messages_with_token_cnts: list[tuple[BaseMessage, int]],
    max_allowed_tokens: int,
) -> list[BaseMessage]:
    """As message history grows, messages need to be dropped starting from the furthest in the past.
    The System message should be kept if at all possible and the latest user input which is inserted in the
    prompt template must be included"""

    final_messages: list[BaseMessage] = []
    messages, token_counts = cast(
        tuple[list[BaseMessage], list[int]], zip(*messages_with_token_cnts)
    )
    system_msg = (
        final_messages[0]
        if final_messages and final_messages[0].type == "system"
        else None
    )

    history_msgs = messages[:-1]
    final_msg = messages[-1]
    if final_msg.type != "human":
        if final_msg.type != "tool":
            raise ValueError("Last message must be user input OR a tool result")
        else:
            final_msgs = messages[-3:]
            history_msgs = messages[:-3]
    else:
        final_msgs = [final_msg]

    # Start dropping from the history if necessary
    ind_prev_msg_start = find_last_index(
        token_counts, max_prompt_tokens=max_allowed_tokens
    )

    if system_msg and ind_prev_msg_start <= len(history_msgs):
        final_messages.append(system_msg)

    final_messages.extend(history_msgs[ind_prev_msg_start:])
    final_messages.extend(final_msgs)

    return final_messages


================================================
FILE: backend/onyx/prompts/search_prompts.py
================================================
# How it works and rationale:
# First - this works best empirically across multiple LLMs, some of this is back-explaining reasons based on results.
#
# The system prompt is kept simple and as similar to typical system prompts as possible to stay within training distribution.
# The history is passed through as a list of messages, this should allow the LLM to more easily understand what is going on.
# The special tokens and separators let the LLM more easily disregard no longer relevant past messages.
# The last message is dynamically created and has a detailed description of the actual task.
# This is based on the assumption that users give much more varied requests in their prompts and LLMs are well adjusted to this.
# The proximity of the instructions and the lack of any breaks should also let the LLM follow the task more clearly.
#
# For document verification, the history is not included as the queries should ideally be standalone enough.
# To keep it simple, it is just a single simple prompt.


SEMANTIC_QUERY_REPHRASE_SYSTEM_PROMPT = """
You are an assistant that reformulates the last user message into a standalone, self-contained query suitable for \
semantic search. Your goal is to output a single natural language query that captures the full meaning of the user's \
most recent message. It should be fully semantic and natural language unless the user query is already a keyword query. \
When relevant, you bring in context from the history or knowledge about the user.

The current date is {current_date}.
"""

SEMANTIC_QUERY_REPHRASE_USER_PROMPT = """
Given the chat history above (if any) and the final user query (provided below), provide a standalone query that is as
representative of the user query as possible. In most cases, it should be exactly the same as the last user query. \
It should be fully semantic and natural language unless the user query is already a keyword query. \
Focus on the last user message, in most cases the history and extra context should be ignored.

For a query like "What are the use cases for product X", your output should remain "What are the use cases for product X". \
It should remain semantic, and as close to the original query as possible. There is nothing additional needed \
from the history or that should be removed / replaced from the query.

For modifications, you can:
1. Insert relevant context from the chat history. For example:
"How do I set it up?" -> "How do I set up software Y?" (assuming the conversation was about software Y)

2. Remove asks or requests not related to the searching. For example:
"Can you summarize the calls with example company" -> "calls with example company"
"Can you find me the document that goes over all of the software to set up on an engineer's first day?" -> \
"all of the software to set up on an engineer's first day"

3. Fill in relevant information about the user. For example:
"What document did I write last week?" -> "What document did John Doe write last week?" (assuming the user is John Doe)
{additional_context}
=========================
CRITICAL: ONLY provide the standalone query and nothing else.

Final user query:
{user_query}
""".strip()


KEYWORD_REPHRASE_SYSTEM_PROMPT = """
You are an assistant that reformulates the last user message into a set of standalone keyword queries suitable for a keyword \
search engine. Your goal is to output keyword queries that optimize finding relevant documents to answer the user query. \
When relevant, you bring in context from the history or knowledge about the user.

The current date is {current_date}.
"""


KEYWORD_REPHRASE_USER_PROMPT = """
Given the chat history above (if any) and the final user query (provided below), provide a set of keyword only queries that can
help find relevant documents. Provide a single query per line (where each query consists of one or more keywords). \
The queries must be purely keywords and not contain any natural language. \
Each query should have as few keywords as necessary to represent the user's search intent.

Guidelines:
- Do not provide more than 3 queries.
- Do not replace or expand niche, proprietary, or obscure terms
- Focus on the last user message, in most cases the history and any extra context should be ignored.
{additional_context}
=========================
CRITICAL: ONLY provide the keyword queries, one set of keywords per line and nothing else.

Final user query:
{user_query}
""".strip()


REPHRASE_CONTEXT_PROMPT = """
In most cases the following additional context is not needed. If relevant, here is some information about the user:
{user_info}

Here are some memories about the user:
{memories}
"""


# This prompt is intended to be fairly lenient since there are additional filters downstream.
# There are now multiple places for misleading docs to get dropped so each one can be a bit more lax.
# As models get better, it's likely better to include more context than not, some questionably
# useful stuff may be helpful downstream.
# Adding the ! option to allow better models to handle questions where all of the documents are
# necessary to make a good determination.
# If a document is by far the best and is a very obvious inclusion, add a ! after the section_id to indicate that it should \
# be included in full. Example output: [8, 2!, 5].
DOCUMENT_SELECTION_PROMPT = """
Select the most relevant document sections for the user's query (maximum {max_sections}).{extra_instructions}

# Document Sections
```
{formatted_doc_sections}
```

# User Query
```
{user_query}
```

# Selection Criteria
- Choose sections most relevant to answering the query, if at all in doubt, include the section.
- Even if only a tiny part of the section is relevant, include it.
- It is ok to select multiple sections from the same document.
- Consider indirect connections and supporting context to be valuable.
- If the section is not directly helpful but the document seems relevant, there is an opportunity \
later to expand the section and read more from the document so include the section.

# Output Format
Return ONLY section_ids as a comma-separated list, ordered by relevance:
[most_relevant_section_id, second_most_relevant_section_id, ...]

Section IDs:
""".strip()

TRY_TO_FILL_TO_MAX_INSTRUCTIONS = """
Try to fill the list to the maximum number of sections if possible without including non-relevant or misleading sections.
"""


# Some models are trained heavily to reason in the actual output so we allow some flexibility in the prompt.
# Downstream of the model, we will attempt to parse the output to extract the number.
# This inference will not have a system prompt as it's a single message task more like the traditional ones.
# LLMs should do better with just this type of next word prediction.
# Opted to not include metadata here as the doc was already selected by the previous step that has it.
# Also hopefully it leans not throwing out documents as there are not many bad ones that make it to this stage.
# If anything, it's mostly because of something misleading, otherwise this step should be treated as 95% expansion/filtering.
DOCUMENT_CONTEXT_SELECTION_PROMPT = """
Analyze the relevance of document sections to a search query and classify according to the categories \
described at the end of the prompt.

# Document Title / Metadata
```
{document_title}
```

# Section Above:
```
{section_above}
```

# Main Section:
```
{main_section}
```

# Section Below:
```
{section_below}
```

# User Query:
```
{user_query}
```

# Classification Categories:
**0 - NOT_RELEVANT**
- Main section and surrounding sections do not help answer the query or provide meaningful, relevant information.
- Appears on topic but refers to a different context or subject (could lead to potential confusion or misdirection). \
It is important to avoid conflating different contexts and subjects - if the document is related to the query but not about \
the correct subject. Example: "How much did we quote ACME for project X", "ACME paid us $100,000 for project Y".

**1 - MAIN_SECTION_ONLY**
- Main section contains useful information relevant to the query.
- Adjacent sections do not provide additional directly relevant information.

**2 - INCLUDE_ADJACENT_SECTIONS**
- The main section AND adjacent sections are all useful for answering the user query.
- The surrounding sections provide relevant information that does not exist in the main section.
- Even if only 1 of the adjacent sections is useful or there is a small piece in either that is useful.
- Additional unseen sections are unlikely to contain valuable related information.

**3 - INCLUDE_FULL_DOCUMENT**
- Additional unseen sections are likely to contain valuable related information to the query.

## Additional Decision Notes
- If only a small piece of the document is useful - use classification 1 or 2, do not use 0.
- If the document is on topic and provides additional context that might be useful in \
combination with other documents - use classification 1, 2 or 3, do not use 0.

CRITICAL: ONLY output the NUMBER of the situation most applicable to the query and sections provided (0, 1, 2, or 3).

Situation Number:
""".strip()


================================================
FILE: backend/onyx/prompts/tool_prompts.py
================================================
# ruff: noqa: E501, W605 start
# If there are any tools, this section is included, the sections below are for the available tools
TOOL_SECTION_HEADER = "\n# Tools\n\n"


# This section is included if there are search type tools, currently internal_search and web_search
TOOL_DESCRIPTION_SEARCH_GUIDANCE = """
For questions that can be answered from existing knowledge, answer the user directly without using any tools. \
If you suspect your knowledge is outdated or for topics where things are rapidly changing, use search tools to get more context. \
For statements that may be describing or referring to a document, run a search for the document. \
In ambiguous cases, favor searching to get more context.

When using any search type tool, do not make any assumptions and stay as faithful to the user's query as possible. \
Between internal and web search (if both are available), think about if the user's query is likely better answered by team internal sources or online web pages. \
When searching for information, if the initial results cannot fully answer the user's query, try again with different tools or arguments. \
Do not repeat the same or very similar queries if it already has been run in the chat history.

If it is unclear which tool to use, consider using multiple in parallel to be efficient with time.
""".lstrip()


INTERNAL_SEARCH_GUIDANCE = """
## internal_search
Use the `internal_search` tool to search connected applications for information. Some examples of when to use `internal_search` include:
- Internal information: any time where there may be some information stored in internal applications that could help better answer the query.
- Niche/Specific information: information that is likely not found in public sources, things specific to a project or product, team, process, etc.
- Keyword Queries: queries that are heavily keyword based are often internal document search queries.
- Ambiguity: questions about something that is not widely known or understood.
Never provide more than 3 queries at once to `internal_search`.
""".lstrip()


WEB_SEARCH_GUIDANCE = """
## web_search
Use the `web_search` tool to access up-to-date information from the web. Some examples of when to use `web_search` include:
- Freshness: when the answer might be enhanced by up-to-date information on a topic. Very important for topics that are changing or evolving.
- Accuracy: if the cost of outdated/inaccurate information is high.
- Niche Information: when detailed info is not widely known or understood (but is likely found on the internet).{site_colon_disabled}
""".lstrip()

WEB_SEARCH_SITE_DISABLED_GUIDANCE = """
Do not use the "site:" operator in your web search queries.
""".lstrip()


OPEN_URLS_GUIDANCE = """
## open_url
Use the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your web searches or user specified URLs. \
You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources. \
Do not open URLs that are image files like .png, .jpg, etc.
You should almost always use open_url after a web_search call. Use this tool when a user asks about a specific provided URL.
""".lstrip()

PYTHON_TOOL_GUIDANCE = """
## python
Use the `python` tool to execute Python code in an isolated sandbox. The tool will respond with the output of the execution or time out after 60.0 seconds.
Any files uploaded to the chat will be automatically be available in the execution environment's current directory. \
The current directory in the file system can be used to save and persist user files. Files written to the current directory will be returned with a `file_link`. \
Use this to give the user a way to download the file OR to display generated images.
Internet access for this session is disabled. Do not make external web requests or API calls as they will fail.
Use `openpyxl` to read and write Excel files. You have access to libraries like numpy, pandas, scipy, matplotlib, and PIL.
IMPORTANT: each call to this tool is independent. Variables from previous calls will NOT be available in the current call.
""".lstrip()

GENERATE_IMAGE_GUIDANCE = """
## generate_image
NEVER use generate_image unless the user specifically requests an image.
For edits/variations of a previously generated image, pass `reference_image_file_ids` with
the `file_id` values returned by earlier `generate_image` tool results.
""".lstrip()

MEMORY_GUIDANCE = """
## add_memory
Use the `add_memory` tool for facts shared by the user that should be remembered for future conversations. \
Only add memories that are specific, likely to remain true, and likely to be useful later. \
Focus on enduring preferences, long-term goals, stable constraints, and explicit "remember this" type requests.
""".lstrip()

TOOL_CALL_FAILURE_PROMPT = """
LLM attempted to call a tool but failed. Most likely the tool name or arguments were misspelled.
""".strip()
# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/prompts/user_info.py
================================================
# ruff: noqa: E501, W605 start
USER_INFORMATION_HEADER = "\n# User Information\n\n"

BASIC_INFORMATION_PROMPT = """
## Basic Information
User name: {user_name}
User email: {user_email}{user_role}
""".lstrip()

# This line only shows up if the user has configured their role.
USER_ROLE_PROMPT = """
User role: {user_role}
""".lstrip()

# Team information should be a paragraph style description of the user's team.
TEAM_INFORMATION_PROMPT = """
## Team Information
{team_information}
""".lstrip()

# User preferences should be a paragraph style description of the user's preferences.
USER_PREFERENCES_PROMPT = """
## User Preferences
{user_preferences}
""".lstrip()

# User memories should look something like:
# - Memory 1
# - Memory 2
# - Memory 3
USER_MEMORIES_PROMPT = """
## User Memories
{user_memories}
""".lstrip()

# ruff: noqa: E501, W605 end


================================================
FILE: backend/onyx/redis/iam_auth.py
================================================
"""
Redis IAM Authentication Module
This module provides Redis IAM authentication functionality for AWS ElastiCache.
Unlike RDS IAM auth, Redis IAM auth relies on IAM roles and policies rather than
generating authentication tokens.
Key functions:
- configure_redis_iam_auth: Configure Redis connection parameters for IAM auth
- create_redis_ssl_context_if_iam: Create SSL context for secure connections
"""

import ssl
from typing import Any


def configure_redis_iam_auth(connection_kwargs: dict[str, Any]) -> None:
    """
    Configure Redis connection parameters for IAM authentication.
    Modifies the connection_kwargs dict in-place to:
    1. Remove password (not needed with IAM)
    2. Enable SSL with system CA certificates
    3. Set proper SSL context for secure connections
    """
    # Remove password as it's not needed with IAM authentication
    if "password" in connection_kwargs:
        del connection_kwargs["password"]

    # Ensure SSL is enabled for IAM authentication
    connection_kwargs["ssl"] = True
    connection_kwargs["ssl_context"] = create_redis_ssl_context_if_iam()


def create_redis_ssl_context_if_iam() -> ssl.SSLContext:
    """Create an SSL context for Redis IAM authentication using system CA certificates."""
    # Use system CA certificates by default - no need for additional CA files
    ssl_context = ssl.create_default_context()
    ssl_context.check_hostname = True
    ssl_context.verify_mode = ssl.CERT_REQUIRED
    return ssl_context


================================================
FILE: backend/onyx/redis/redis_connector.py
================================================
import redis

from onyx.redis.redis_connector_delete import RedisConnectorDelete
from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
from onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync
from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_connector_stop import RedisConnectorStop
from onyx.redis.redis_pool import get_redis_client


# TODO: reduce dependence on redis
class RedisConnector:
    """Composes several classes to simplify interacting with a connector and its
    associated background tasks / associated redis interactions."""

    def __init__(self, tenant_id: str, cc_pair_id: int) -> None:
        """id: a connector credential pair id"""

        self.tenant_id: str = tenant_id
        self.cc_pair_id: int = cc_pair_id
        self.redis: redis.Redis = get_redis_client(tenant_id=tenant_id)

        self.stop = RedisConnectorStop(tenant_id, cc_pair_id, self.redis)
        self.prune = RedisConnectorPrune(tenant_id, cc_pair_id, self.redis)
        self.delete = RedisConnectorDelete(tenant_id, cc_pair_id, self.redis)
        self.permissions = RedisConnectorPermissionSync(
            tenant_id, cc_pair_id, self.redis
        )
        self.external_group_sync = RedisConnectorExternalGroupSync(
            tenant_id, cc_pair_id, self.redis
        )

    @staticmethod
    def get_id_from_fence_key(key: str) -> str | None:
        """
        Extracts the object ID from a fence key in the format `PREFIX_fence_X`.

        Args:
            key (str): The fence key string.

        Returns:
            Optional[int]: The extracted ID if the key is in the correct format, otherwise None.
        """
        parts = key.split("_")
        if len(parts) != 3:
            return None

        object_id = parts[2]
        return object_id

    @staticmethod
    def get_id_from_task_id(task_id: str) -> str | None:
        """
        Extracts the object ID from a task ID string.

        This method assumes the task ID is formatted as `prefix_objectid_suffix`, where:
        - `prefix` is an arbitrary string (e.g., the name of the task or entity),
        - `objectid` is the ID you want to extract,
        - `suffix` is another arbitrary string (e.g., a UUID).

        Example:
            If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`,
            this method will return the string `"1"`.

        Args:
            task_id (str): The task ID string from which to extract the object ID.

        Returns:
            str | None: The extracted object ID if the task ID is in the correct format, otherwise None.
        """
        # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc
        parts = task_id.split("_")
        if len(parts) != 3:
            return None

        object_id = parts[1]
        return object_id

    def db_lock_key(self, search_settings_id: int) -> str:
        """
        Key for the db lock for an indexing attempt.
        Prevents multiple modifications to the current indexing attempt row
        from multiple docfetching/docprocessing tasks.
        """
        return f"da_lock:indexing:db_{self.cc_pair_id}/{search_settings_id}"


================================================
FILE: backend/onyx/redis/redis_connector_delete.py
================================================
import time
from datetime import datetime
from typing import cast
from uuid import uuid4

import redis
from celery import Celery
from pydantic import BaseModel
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DB_YIELD_PER_DEFAULT
from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.document import construct_document_id_select_for_connector_credential_pair


class RedisConnectorDeletePayload(BaseModel):
    num_tasks: int | None
    submitted: datetime


class RedisConnectorDelete:
    """Manages interactions with redis for deletion tasks. Should only be accessed
    through RedisConnector."""

    PREFIX = "connectordeletion"
    FENCE_PREFIX = f"{PREFIX}_fence"  # "connectordeletion_fence"
    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks
    TASKSET_PREFIX = f"{PREFIX}_taskset"  # "connectordeletion_taskset"
    TASKSET_TTL = FENCE_TTL

    # used to signal the overall workflow is still active
    # it's impossible to get the exact state of the system at a single point in time
    # so we need a signal with a TTL to bridge gaps in our checks
    ACTIVE_PREFIX = PREFIX + "_active"
    ACTIVE_TTL = 3600

    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:
        self.tenant_id: str = tenant_id
        self.id = id
        self.redis = redis

        self.fence_key: str = f"{self.FENCE_PREFIX}_{id}"
        self.taskset_key = f"{self.TASKSET_PREFIX}_{id}"

        self.active_key = f"{self.ACTIVE_PREFIX}_{id}"

    def taskset_clear(self) -> None:
        self.redis.delete(self.taskset_key)

    def get_remaining(self) -> int:
        # todo: move into fence
        remaining = cast(int, self.redis.scard(self.taskset_key))
        return remaining

    @property
    def fenced(self) -> bool:
        return bool(self.redis.exists(self.fence_key))

    @property
    def payload(self) -> RedisConnectorDeletePayload | None:
        # read related data and evaluate/print task progress
        fence_bytes = cast(bytes, self.redis.get(self.fence_key))
        if fence_bytes is None:
            return None

        fence_str = fence_bytes.decode("utf-8")
        payload = RedisConnectorDeletePayload.model_validate_json(cast(str, fence_str))

        return payload

    def set_fence(self, payload: RedisConnectorDeletePayload | None) -> None:
        if not payload:
            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
            self.redis.delete(self.fence_key)
            return

        self.redis.set(self.fence_key, payload.model_dump_json(), ex=self.FENCE_TTL)
        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)

    def set_active(self) -> None:
        """This sets a signal to keep the permissioning flow from getting cleaned up within
        the expiration time.

        The slack in timing is needed to avoid race conditions where simply checking
        the celery queue and task status could result in race conditions."""
        self.redis.set(self.active_key, 0, ex=self.ACTIVE_TTL)

    def active(self) -> bool:
        return bool(self.redis.exists(self.active_key))

    def _generate_task_id(self) -> str:
        # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
        # we prefix the task id so it's easier to keep track of who created the task
        # aka "connectordeletion_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"

        return f"{self.PREFIX}_{self.id}_{uuid4()}"

    def generate_tasks(
        self,
        celery_app: Celery,
        db_session: Session,
        lock: RedisLock,
    ) -> int | None:
        """Returns None if the cc_pair doesn't exist.
        Otherwise, returns an int with the number of generated tasks."""
        last_lock_time = time.monotonic()

        cc_pair = get_connector_credential_pair_from_id(
            db_session=db_session,
            cc_pair_id=int(self.id),
        )
        if not cc_pair:
            return None

        num_tasks_sent = 0

        stmt = construct_document_id_select_for_connector_credential_pair(
            cc_pair.connector_id, cc_pair.credential_id
        )
        for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
            doc_id = cast(str, doc_id)
            current_time = time.monotonic()
            if current_time - last_lock_time >= (
                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
            ):
                lock.reacquire()
                last_lock_time = current_time

            custom_task_id = self._generate_task_id()

            # add to the tracking taskset in redis BEFORE creating the celery task.
            # note that for the moment we are using a single taskset key, not differentiated by cc_pair id
            self.redis.sadd(self.taskset_key, custom_task_id)
            self.redis.expire(self.taskset_key, self.TASKSET_TTL)

            # Priority on sync's triggered by new indexing should be medium
            celery_app.send_task(
                OnyxCeleryTask.DOCUMENT_BY_CC_PAIR_CLEANUP_TASK,
                kwargs=dict(
                    document_id=doc_id,
                    connector_id=cc_pair.connector_id,
                    credential_id=cc_pair.credential_id,
                    tenant_id=self.tenant_id,
                ),
                queue=OnyxCeleryQueues.CONNECTOR_DELETION,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.MEDIUM,
                ignore_result=True,
            )

            num_tasks_sent += 1

        return num_tasks_sent

    def reset(self) -> None:
        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
        self.redis.delete(self.active_key)
        self.redis.delete(self.taskset_key)
        self.redis.delete(self.fence_key)

    @staticmethod
    def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None:
        taskset_key = f"{RedisConnectorDelete.TASKSET_PREFIX}_{id}"
        r.srem(taskset_key, task_id)
        return

    @staticmethod
    def reset_all(r: redis.Redis) -> None:
        """Deletes all redis values for all connectors"""
        for key in r.scan_iter(RedisConnectorDelete.ACTIVE_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorDelete.TASKSET_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorDelete.FENCE_PREFIX + "*"):
            r.delete(key)


================================================
FILE: backend/onyx/redis/redis_connector_doc_perm_sync.py
================================================
import time
from datetime import datetime
from logging import Logger
from typing import Any
from typing import cast
from typing import NamedTuple

import redis
from pydantic import BaseModel
from redis.lock import Lock as RedisLock

from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT
from onyx.configs.constants import OnyxRedisConstants
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
from onyx.utils.variable_functionality import fetch_versioned_implementation


class PermissionSyncResult(NamedTuple):
    """Result of a permission sync operation.

    Attributes:
        num_updated: Number of documents successfully updated
        num_errors: Number of documents that failed to update
    """

    num_updated: int
    num_errors: int


class RedisConnectorPermissionSyncPayload(BaseModel):
    id: str
    submitted: datetime
    started: datetime | None
    celery_task_id: str | None


class RedisConnectorPermissionSync:
    """Manages interactions with redis for doc permission sync tasks. Should only be accessed
    through RedisConnector."""

    PREFIX = "connectordocpermissionsync"

    FENCE_PREFIX = f"{PREFIX}_fence"
    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks

    # phase 1 - geneartor task and progress signals
    GENERATORTASK_PREFIX = f"{PREFIX}+generator"  # connectorpermissions+generator
    GENERATOR_PROGRESS_PREFIX = (
        PREFIX + "_generator_progress"
    )  # connectorpermissions_generator_progress
    GENERATOR_COMPLETE_PREFIX = (
        PREFIX + "_generator_complete"
    )  # connectorpermissions_generator_complete

    TASKSET_PREFIX = f"{PREFIX}_taskset"  # connectorpermissions_taskset
    SUBTASK_PREFIX = f"{PREFIX}+sub"  # connectorpermissions+sub

    # used to signal the overall workflow is still active
    # it's impossible to get the exact state of the system at a single point in time
    # so we need a signal with a TTL to bridge gaps in our checks
    ACTIVE_PREFIX = PREFIX + "_active"
    ACTIVE_TTL = CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT * 2

    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:
        self.tenant_id: str = tenant_id
        self.id = id
        self.redis = redis

        self.fence_key: str = f"{self.FENCE_PREFIX}_{id}"
        self.generator_task_key = f"{self.GENERATORTASK_PREFIX}_{id}"
        self.generator_progress_key = f"{self.GENERATOR_PROGRESS_PREFIX}_{id}"
        self.generator_complete_key = f"{self.GENERATOR_COMPLETE_PREFIX}_{id}"

        self.taskset_key = f"{self.TASKSET_PREFIX}_{id}"

        self.subtask_prefix: str = f"{self.SUBTASK_PREFIX}_{id}"
        self.active_key = f"{self.ACTIVE_PREFIX}_{id}"

    def taskset_clear(self) -> None:
        self.redis.delete(self.taskset_key)

    def generator_clear(self) -> None:
        self.redis.delete(self.generator_progress_key)
        self.redis.delete(self.generator_complete_key)

    def get_remaining(self) -> int:
        remaining = cast(int, self.redis.scard(self.taskset_key))
        return remaining

    def get_active_task_count(self) -> int:
        """Count of active permission sync tasks"""
        count = 0
        for _ in self.redis.sscan_iter(
            OnyxRedisConstants.ACTIVE_FENCES,
            RedisConnectorPermissionSync.FENCE_PREFIX + "*",
            count=SCAN_ITER_COUNT_DEFAULT,
        ):
            count += 1
        return count

    @property
    def fenced(self) -> bool:
        return bool(self.redis.exists(self.fence_key))

    @property
    def payload(self) -> RedisConnectorPermissionSyncPayload | None:
        # read related data and evaluate/print task progress
        fence_bytes = cast(Any, self.redis.get(self.fence_key))
        if fence_bytes is None:
            return None

        fence_str = fence_bytes.decode("utf-8")
        payload = RedisConnectorPermissionSyncPayload.model_validate_json(
            cast(str, fence_str)
        )

        return payload

    def set_fence(
        self,
        payload: RedisConnectorPermissionSyncPayload | None,
    ) -> None:
        if not payload:
            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
            self.redis.delete(self.fence_key)
            return

        self.redis.set(self.fence_key, payload.model_dump_json(), ex=self.FENCE_TTL)
        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)

    def set_active(self) -> None:
        """This sets a signal to keep the permissioning flow from getting cleaned up within
        the expiration time.

        The slack in timing is needed to avoid race conditions where simply checking
        the celery queue and task status could result in race conditions."""
        self.redis.set(self.active_key, 0, ex=self.ACTIVE_TTL)

    def active(self) -> bool:
        return bool(self.redis.exists(self.active_key))

    @property
    def generator_complete(self) -> int | None:
        """the fence payload is an int representing the starting number of
        permission sync tasks to be processed ... just after the generator completes."""
        fence_bytes = self.redis.get(self.generator_complete_key)
        if fence_bytes is None:
            return None

        if fence_bytes == b"None":
            return None

        fence_int = int(cast(bytes, fence_bytes).decode())
        return fence_int

    @generator_complete.setter
    def generator_complete(self, payload: int | None) -> None:
        """Set the payload to an int to set the fence, otherwise if None it will
        be deleted"""
        if payload is None:
            self.redis.delete(self.generator_complete_key)
            return

        self.redis.set(self.generator_complete_key, payload, ex=self.FENCE_TTL)

    def update_db(
        self,
        lock: RedisLock | None,
        new_permissions: list[ElementExternalAccess],
        source_string: str,
        connector_id: int,
        credential_id: int,
        task_logger: Logger | None = None,
    ) -> PermissionSyncResult:
        """Update permissions for documents and hierarchy nodes.

        Returns:
            PermissionSyncResult containing counts of successful updates and errors
        """
        last_lock_time = time.monotonic()

        element_update_permissions_fn = fetch_versioned_implementation(
            "onyx.background.celery.tasks.doc_permission_syncing.tasks",
            "element_update_permissions",
        )

        num_permissions = 0
        num_errors = 0
        # Create a task for each permission sync
        for permissions in new_permissions:
            current_time = time.monotonic()
            if lock and current_time - last_lock_time >= (
                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4
            ):
                lock.reacquire()
                last_lock_time = current_time

            if (
                permissions.external_access.num_entries
                > permissions.external_access.MAX_NUM_ENTRIES
            ):
                if task_logger:
                    num_users = len(permissions.external_access.external_user_emails)
                    num_groups = len(
                        permissions.external_access.external_user_group_ids
                    )
                    element_id = (
                        permissions.doc_id
                        if isinstance(permissions, DocExternalAccess)
                        else permissions.raw_node_id
                    )
                    task_logger.warning(
                        f"Permissions length exceeded, skipping...: "
                        f"{element_id} "
                        f"{num_users=} {num_groups=} "
                        f"{permissions.external_access.MAX_NUM_ENTRIES=}"
                    )
                continue

            # NOTE(rkuo): this used to fire a task instead of directly writing to the DB,
            # but the permissions can be excessively large if sent over the wire.
            # On the other hand, the downside of doing db updates here is that we can
            # block and fail if we can't make the calls to the DB ... but that's probably
            # a rare enough case to be acceptable.

            # This can internally exception due to db issues but still continue
            # Catch exceptions per-element to avoid breaking the entire sync
            try:
                element_update_permissions_fn(
                    self.tenant_id,
                    permissions,
                    source_string,
                    connector_id,
                    credential_id,
                )

                num_permissions += 1
            except Exception:
                num_errors += 1
                if task_logger:
                    element_id = (
                        permissions.doc_id
                        if isinstance(permissions, DocExternalAccess)
                        else permissions.raw_node_id
                    )
                    task_logger.exception(
                        f"Failed to update permissions for element {element_id}"
                    )
                # Continue processing other elements

        return PermissionSyncResult(num_updated=num_permissions, num_errors=num_errors)

    def reset(self) -> None:
        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
        self.redis.delete(self.active_key)
        self.redis.delete(self.generator_progress_key)
        self.redis.delete(self.generator_complete_key)
        self.redis.delete(self.taskset_key)
        self.redis.delete(self.fence_key)

    @staticmethod
    def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None:
        taskset_key = f"{RedisConnectorPermissionSync.TASKSET_PREFIX}_{id}"
        r.srem(taskset_key, task_id)
        return

    @staticmethod
    def reset_all(r: redis.Redis) -> None:
        """Deletes all redis values for all connectors"""
        for key in r.scan_iter(RedisConnectorPermissionSync.ACTIVE_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorPermissionSync.TASKSET_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(
            RedisConnectorPermissionSync.GENERATOR_COMPLETE_PREFIX + "*"
        ):
            r.delete(key)

        for key in r.scan_iter(
            RedisConnectorPermissionSync.GENERATOR_PROGRESS_PREFIX + "*"
        ):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorPermissionSync.FENCE_PREFIX + "*"):
            r.delete(key)


================================================
FILE: backend/onyx/redis/redis_connector_ext_group_sync.py
================================================
from datetime import datetime
from typing import cast

import redis
from celery import Celery
from pydantic import BaseModel
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.configs.constants import OnyxRedisConstants
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT


class RedisConnectorExternalGroupSyncPayload(BaseModel):
    id: str
    submitted: datetime
    started: datetime | None
    celery_task_id: str | None


class RedisConnectorExternalGroupSync:
    """Manages interactions with redis for external group syncing tasks. Should only be accessed
    through RedisConnector."""

    PREFIX = "connectorexternalgroupsync"

    FENCE_PREFIX = f"{PREFIX}_fence"
    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks

    # phase 1 - geneartor task and progress signals
    GENERATORTASK_PREFIX = f"{PREFIX}+generator"  # connectorexternalgroupsync+generator
    GENERATOR_PROGRESS_PREFIX = (
        PREFIX + "_generator_progress"
    )  # connectorexternalgroupsync_generator_progress
    GENERATOR_COMPLETE_PREFIX = (
        PREFIX + "_generator_complete"
    )  # connectorexternalgroupsync_generator_complete

    TASKSET_PREFIX = f"{PREFIX}_taskset"  # connectorexternalgroupsync_taskset
    SUBTASK_PREFIX = f"{PREFIX}+sub"  # connectorexternalgroupsync+sub

    # used to signal the overall workflow is still active
    # it's impossible to get the exact state of the system at a single point in time
    # so we need a signal with a TTL to bridge gaps in our checks
    ACTIVE_PREFIX = PREFIX + "_active"
    ACTIVE_TTL = 3600

    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:
        self.tenant_id: str = tenant_id
        self.id = id
        self.redis = redis

        self.fence_key: str = f"{self.FENCE_PREFIX}_{id}"
        self.generator_task_key = f"{self.GENERATORTASK_PREFIX}_{id}"
        self.generator_progress_key = f"{self.GENERATOR_PROGRESS_PREFIX}_{id}"
        self.generator_complete_key = f"{self.GENERATOR_COMPLETE_PREFIX}_{id}"

        self.taskset_key = f"{self.TASKSET_PREFIX}_{id}"

        self.subtask_prefix: str = f"{self.SUBTASK_PREFIX}_{id}"
        self.active_key = f"{self.ACTIVE_PREFIX}_{id}"

    def taskset_clear(self) -> None:
        self.redis.delete(self.taskset_key)

    def generator_clear(self) -> None:
        self.redis.delete(self.generator_progress_key)
        self.redis.delete(self.generator_complete_key)

    def get_remaining(self) -> int:
        # todo: move into fence
        remaining = cast(int, self.redis.scard(self.taskset_key))
        return remaining

    def get_active_task_count(self) -> int:
        """Count of active external group syncing tasks"""
        count = 0
        for _ in self.redis.sscan_iter(
            OnyxRedisConstants.ACTIVE_FENCES,
            RedisConnectorExternalGroupSync.FENCE_PREFIX + "*",
            count=SCAN_ITER_COUNT_DEFAULT,
        ):
            count += 1
        return count

    @property
    def fenced(self) -> bool:
        return bool(self.redis.exists(self.fence_key))

    @property
    def payload(self) -> RedisConnectorExternalGroupSyncPayload | None:
        # read related data and evaluate/print task progress
        fence_raw = self.redis.get(self.fence_key)
        if fence_raw is None:
            return None

        fence_bytes = cast(bytes, fence_raw)
        fence_str = fence_bytes.decode("utf-8")
        payload = RedisConnectorExternalGroupSyncPayload.model_validate_json(
            cast(str, fence_str)
        )

        return payload

    def set_fence(
        self,
        payload: RedisConnectorExternalGroupSyncPayload | None,
    ) -> None:
        if not payload:
            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
            self.redis.delete(self.fence_key)
            return

        self.redis.set(self.fence_key, payload.model_dump_json(), ex=self.FENCE_TTL)
        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)

    def set_active(self) -> None:
        """This sets a signal to keep the permissioning flow from getting cleaned up within
        the expiration time.

        The slack in timing is needed to avoid race conditions where simply checking
        the celery queue and task status could result in race conditions."""
        self.redis.set(self.active_key, 0, ex=self.ACTIVE_TTL)

    def active(self) -> bool:
        return bool(self.redis.exists(self.active_key))

    @property
    def generator_complete(self) -> int | None:
        """the fence payload is an int representing the starting number of
        external group syncing tasks to be processed ... just after the generator completes.
        """
        fence_bytes = self.redis.get(self.generator_complete_key)
        if fence_bytes is None:
            return None

        if fence_bytes == b"None":
            return None

        fence_int = int(cast(bytes, fence_bytes).decode())
        return fence_int

    @generator_complete.setter
    def generator_complete(self, payload: int | None) -> None:
        """Set the payload to an int to set the fence, otherwise if None it will
        be deleted"""
        if payload is None:
            self.redis.delete(self.generator_complete_key)
            return

        self.redis.set(self.generator_complete_key, payload, ex=self.FENCE_TTL)

    def generate_tasks(
        self,
        celery_app: Celery,
        db_session: Session,
        lock: RedisLock | None,
    ) -> int | None:
        pass

    def reset(self) -> None:
        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
        self.redis.delete(self.active_key)
        self.redis.delete(self.generator_progress_key)
        self.redis.delete(self.generator_complete_key)
        self.redis.delete(self.taskset_key)
        self.redis.delete(self.fence_key)

    @staticmethod
    def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None:
        taskset_key = f"{RedisConnectorExternalGroupSync.TASKSET_PREFIX}_{id}"
        r.srem(taskset_key, task_id)
        return

    @staticmethod
    def reset_all(r: redis.Redis) -> None:
        """Deletes all redis values for all connectors"""
        for key in r.scan_iter(RedisConnectorExternalGroupSync.ACTIVE_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorExternalGroupSync.TASKSET_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(
            RedisConnectorExternalGroupSync.GENERATOR_COMPLETE_PREFIX + "*"
        ):
            r.delete(key)

        for key in r.scan_iter(
            RedisConnectorExternalGroupSync.GENERATOR_PROGRESS_PREFIX + "*"
        ):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorExternalGroupSync.FENCE_PREFIX + "*"):
            r.delete(key)


================================================
FILE: backend/onyx/redis/redis_connector_index.py
================================================
from datetime import datetime

from pydantic import BaseModel


class RedisConnectorIndexPayload(BaseModel):
    index_attempt_id: int | None
    started: datetime | None
    submitted: datetime
    celery_task_id: str | None


================================================
FILE: backend/onyx/redis/redis_connector_prune.py
================================================
import time
from datetime import datetime
from typing import cast
from uuid import uuid4

import redis
from celery import Celery
from pydantic import BaseModel
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT


class RedisConnectorPrunePayload(BaseModel):
    id: str
    submitted: datetime
    started: datetime | None
    celery_task_id: str | None


class RedisConnectorPrune:
    """Manages interactions with redis for pruning tasks. Should only be accessed
    through RedisConnector."""

    PREFIX = "connectorpruning"

    FENCE_PREFIX = f"{PREFIX}_fence"
    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks

    # phase 1 - geneartor task and progress signals
    GENERATORTASK_PREFIX = f"{PREFIX}+generator"  # connectorpruning+generator
    GENERATOR_PROGRESS_PREFIX = (
        PREFIX + "_generator_progress"
    )  # connectorpruning_generator_progress
    GENERATOR_COMPLETE_PREFIX = (
        PREFIX + "_generator_complete"
    )  # connectorpruning_generator_complete

    TASKSET_PREFIX = f"{PREFIX}_taskset"  # connectorpruning_taskset
    TASKSET_TTL = FENCE_TTL
    SUBTASK_PREFIX = f"{PREFIX}+sub"  # connectorpruning+sub

    # used to signal the overall workflow is still active
    # it's impossible to get the exact state of the system at a single point in time
    # so we need a signal with a TTL to bridge gaps in our checks
    ACTIVE_PREFIX = PREFIX + "_active"
    ACTIVE_TTL = CELERY_PRUNING_LOCK_TIMEOUT * 2

    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:
        self.tenant_id: str = tenant_id
        self.id = id
        self.redis = redis

        self.fence_key: str = f"{self.FENCE_PREFIX}_{id}"
        self.generator_task_key = f"{self.GENERATORTASK_PREFIX}_{id}"
        self.generator_progress_key = f"{self.GENERATOR_PROGRESS_PREFIX}_{id}"
        self.generator_complete_key = f"{self.GENERATOR_COMPLETE_PREFIX}_{id}"

        self.taskset_key = f"{self.TASKSET_PREFIX}_{id}"

        self.subtask_prefix: str = f"{self.SUBTASK_PREFIX}_{id}"
        self.active_key = f"{self.ACTIVE_PREFIX}_{id}"

    def taskset_clear(self) -> None:
        self.redis.delete(self.taskset_key)

    def generator_clear(self) -> None:
        self.redis.delete(self.generator_progress_key)
        self.redis.delete(self.generator_complete_key)

    def get_remaining(self) -> int:
        # todo: move into fence
        remaining = cast(int, self.redis.scard(self.taskset_key))
        return remaining

    def get_active_task_count(self) -> int:
        """Count of active pruning tasks"""
        count = 0
        for _ in self.redis.sscan_iter(
            OnyxRedisConstants.ACTIVE_FENCES,
            RedisConnectorPrune.FENCE_PREFIX + "*",
            count=SCAN_ITER_COUNT_DEFAULT,
        ):
            count += 1
        return count

    @property
    def fenced(self) -> bool:
        return bool(self.redis.exists(self.fence_key))

    @property
    def payload(self) -> RedisConnectorPrunePayload | None:
        # read related data and evaluate/print task progress
        fence_bytes = cast(bytes, self.redis.get(self.fence_key))
        if fence_bytes is None:
            return None

        fence_str = fence_bytes.decode("utf-8")
        payload = RedisConnectorPrunePayload.model_validate_json(cast(str, fence_str))

        return payload

    def set_fence(
        self,
        payload: RedisConnectorPrunePayload | None,
    ) -> None:
        if not payload:
            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
            self.redis.delete(self.fence_key)
            return

        self.redis.set(self.fence_key, payload.model_dump_json(), ex=self.FENCE_TTL)
        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)

    def set_active(self) -> None:
        """This sets a signal to keep the permissioning flow from getting cleaned up within
        the expiration time.

        The slack in timing is needed to avoid race conditions where simply checking
        the celery queue and task status could result in race conditions."""
        self.redis.set(self.active_key, 0, ex=self.ACTIVE_TTL)

    def active(self) -> bool:
        return bool(self.redis.exists(self.active_key))

    @property
    def generator_complete(self) -> int | None:
        """the fence payload is an int representing the starting number of
        pruning tasks to be processed ... just after the generator completes."""
        fence_bytes = self.redis.get(self.generator_complete_key)
        if fence_bytes is None:
            return None

        fence_int = int(cast(bytes, fence_bytes))
        return fence_int

    @generator_complete.setter
    def generator_complete(self, payload: int | None) -> None:
        """Set the payload to an int to set the fence, otherwise if None it will
        be deleted"""
        if payload is None:
            self.redis.delete(self.generator_complete_key)
            return

        self.redis.set(self.generator_complete_key, payload, ex=self.FENCE_TTL)

    def generate_tasks(
        self,
        documents_to_prune: set[str],
        celery_app: Celery,
        db_session: Session,
        lock: RedisLock | None,
    ) -> int | None:
        last_lock_time = time.monotonic()

        async_results = []
        cc_pair = get_connector_credential_pair_from_id(
            db_session=db_session,
            cc_pair_id=int(self.id),
        )
        if not cc_pair:
            return None

        for doc_id in documents_to_prune:
            current_time = time.monotonic()
            if lock and current_time - last_lock_time >= (
                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4
            ):
                lock.reacquire()
                last_lock_time = current_time

            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
            # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
            # we prefix the task id so it's easier to keep track of who created the task
            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
            custom_task_id = f"{self.subtask_prefix}_{uuid4()}"

            # add to the tracking taskset in redis BEFORE creating the celery task.
            self.redis.sadd(self.taskset_key, custom_task_id)
            self.redis.expire(self.taskset_key, self.TASKSET_TTL)

            # Priority on sync's triggered by new indexing should be medium
            result = celery_app.send_task(
                OnyxCeleryTask.DOCUMENT_BY_CC_PAIR_CLEANUP_TASK,
                kwargs=dict(
                    document_id=doc_id,
                    connector_id=cc_pair.connector_id,
                    credential_id=cc_pair.credential_id,
                    tenant_id=self.tenant_id,
                ),
                queue=OnyxCeleryQueues.CONNECTOR_DELETION,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.MEDIUM,
                ignore_result=True,
            )

            async_results.append(result)

        return len(async_results)

    def reset(self) -> None:
        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
        self.redis.delete(self.active_key)
        self.redis.delete(self.generator_progress_key)
        self.redis.delete(self.generator_complete_key)
        self.redis.delete(self.taskset_key)
        self.redis.delete(self.fence_key)

    @staticmethod
    def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None:
        taskset_key = f"{RedisConnectorPrune.TASKSET_PREFIX}_{id}"
        r.srem(taskset_key, task_id)
        return

    @staticmethod
    def reset_all(r: redis.Redis) -> None:
        """Deletes all redis values for all connectors"""
        for key in r.scan_iter(RedisConnectorPrune.ACTIVE_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorPrune.TASKSET_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorPrune.GENERATOR_COMPLETE_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorPrune.GENERATOR_PROGRESS_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorPrune.FENCE_PREFIX + "*"):
            r.delete(key)


================================================
FILE: backend/onyx/redis/redis_connector_stop.py
================================================
import redis


class RedisConnectorStop:
    """Manages interactions with redis for stop signaling. Should only be accessed
    through RedisConnector."""

    PREFIX = "connectorstop"
    FENCE_PREFIX = f"{PREFIX}_fence"
    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks

    # if this timeout is exceeded, the caller may decide to take more
    # drastic measures
    TIMEOUT_PREFIX = f"{PREFIX}_timeout"
    TIMEOUT_TTL = 300

    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:
        self.tenant_id: str = tenant_id
        self.id: int = id
        self.redis = redis

        self.fence_key: str = f"{self.FENCE_PREFIX}_{id}"
        self.timeout_key: str = f"{self.TIMEOUT_PREFIX}_{id}"

    @property
    def fenced(self) -> bool:
        return bool(self.redis.exists(self.fence_key))

    def set_fence(self, value: bool) -> None:
        if not value:
            self.redis.delete(self.fence_key)
            return

        self.redis.set(self.fence_key, 0, ex=self.FENCE_TTL)

    @property
    def timed_out(self) -> bool:
        return not bool(self.redis.exists(self.timeout_key))

    def set_timeout(self) -> None:
        """After calling this, call timed_out to determine if the timeout has been
        exceeded."""
        self.redis.set(f"{self.timeout_key}", 0, ex=self.TIMEOUT_TTL)

    @staticmethod
    def reset_all(r: redis.Redis) -> None:
        for key in r.scan_iter(RedisConnectorStop.FENCE_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisConnectorStop.TIMEOUT_PREFIX + "*"):
            r.delete(key)


================================================
FILE: backend/onyx/redis/redis_connector_utils.py
================================================
from sqlalchemy.orm import Session

from onyx.db.connector_credential_pair import get_connector_credential_pair
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import TaskStatus
from onyx.db.models import TaskQueueState
from onyx.redis.redis_connector import RedisConnector
from onyx.server.documents.models import DeletionAttemptSnapshot


def _get_deletion_status(
    connector_id: int,
    credential_id: int,
    db_session: Session,
    tenant_id: str,
) -> TaskQueueState | None:
    """We no longer store TaskQueueState in the DB for a deletion attempt.
    This function populates TaskQueueState by just checking redis.
    """
    cc_pair = get_connector_credential_pair(
        connector_id=connector_id, credential_id=credential_id, db_session=db_session
    )
    if not cc_pair:
        return None

    redis_connector = RedisConnector(tenant_id, cc_pair.id)
    if redis_connector.delete.fenced:
        return TaskQueueState(
            task_id="",
            task_name=redis_connector.delete.fence_key,
            status=TaskStatus.STARTED,
        )

    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
        return TaskQueueState(
            task_id="",
            task_name=redis_connector.delete.fence_key,
            status=TaskStatus.PENDING,
        )

    return None


def get_deletion_attempt_snapshot(
    connector_id: int,
    credential_id: int,
    db_session: Session,
    tenant_id: str,
) -> DeletionAttemptSnapshot | None:
    deletion_task = _get_deletion_status(
        connector_id, credential_id, db_session, tenant_id
    )
    if not deletion_task:
        return None

    return DeletionAttemptSnapshot(
        connector_id=connector_id,
        credential_id=credential_id,
        status=deletion_task.status,
    )


================================================
FILE: backend/onyx/redis/redis_document_set.py
================================================
import time
from typing import cast
from uuid import uuid4

import redis
from celery import Celery
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DB_YIELD_PER_DEFAULT
from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.db.document_set import construct_document_id_select_by_docset
from onyx.redis.redis_object_helper import RedisObjectHelper


class RedisDocumentSet(RedisObjectHelper):
    PREFIX = "documentset"
    FENCE_PREFIX = PREFIX + "_fence"
    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks
    TASKSET_PREFIX = PREFIX + "_taskset"
    TASKSET_TTL = FENCE_TTL

    def __init__(self, tenant_id: str, id: int) -> None:
        super().__init__(tenant_id, str(id))

    @property
    def fenced(self) -> bool:
        return bool(self.redis.exists(self.fence_key))

    def set_fence(self, payload: int | None) -> None:
        if payload is None:
            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
            self.redis.delete(self.fence_key)
            return

        self.redis.set(self.fence_key, payload, ex=self.FENCE_TTL)
        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)

    @property
    def payload(self) -> int | None:
        bytes = self.redis.get(self.fence_key)
        if bytes is None:
            return None

        progress = int(cast(int, bytes))
        return progress

    def generate_tasks(
        self,
        max_tasks: int,  # noqa: ARG002
        celery_app: Celery,
        db_session: Session,
        redis_client: Redis,
        lock: RedisLock,
        tenant_id: str,
    ) -> tuple[int, int] | None:
        """Max tasks is ignored for now until we can build the logic to mark the
        document set up to date over multiple batches.
        """
        last_lock_time = time.monotonic()

        num_tasks_sent = 0

        stmt = construct_document_id_select_by_docset(int(self._id), current_only=False)
        for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
            doc_id = cast(str, doc_id)
            current_time = time.monotonic()
            if current_time - last_lock_time >= (
                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
            ):
                lock.reacquire()
                last_lock_time = current_time

            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
            # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
            # we prefix the task id so it's easier to keep track of who created the task
            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"

            # add to the set BEFORE creating the task.
            redis_client.sadd(self.taskset_key, custom_task_id)
            redis_client.expire(self.taskset_key, self.TASKSET_TTL)

            celery_app.send_task(
                OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
                kwargs=dict(document_id=doc_id, tenant_id=tenant_id),
                queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.MEDIUM,
            )

            num_tasks_sent += 1

        return num_tasks_sent, num_tasks_sent

    def reset(self) -> None:
        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
        self.redis.delete(self.taskset_key)
        self.redis.delete(self.fence_key)

    @staticmethod
    def reset_all(r: redis.Redis) -> None:
        for key in r.scan_iter(RedisDocumentSet.TASKSET_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"):
            r.delete(key)


================================================
FILE: backend/onyx/redis/redis_hierarchy.py
================================================
"""Redis cache operations for hierarchy node ancestor resolution.

This module provides a Redis-based cache for hierarchy node parent relationships,
enabling fast ancestor path resolution without repeated database queries.

The cache stores node_id -> parent_id mappings for all hierarchy nodes of a given
source type. When resolving ancestors for a document, we walk up the tree using
Redis lookups instead of database queries.

Cache Strategy:
- Nodes are cached per source type with a 6-hour TTL
- During docfetching, nodes are added to cache as they're upserted to Postgres
- If the cache is stale (TTL expired during long-running job), one worker does
  a full refresh from DB while others wait
- If a node is still not found after refresh, we log an error and fall back to
  using only the SOURCE-type node as the ancestor
"""

from typing import cast
from typing import TYPE_CHECKING

from pydantic import BaseModel
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.db.enums import HierarchyNodeType
from onyx.db.hierarchy import ensure_source_node_exists as db_ensure_source_node_exists
from onyx.db.hierarchy import get_all_hierarchy_nodes_for_source
from onyx.utils.logger import setup_logger

if TYPE_CHECKING:
    from onyx.db.models import HierarchyNode as DBHierarchyNode

logger = setup_logger()

# Cache TTL: 6 hours in seconds
HIERARCHY_CACHE_TTL_SECONDS = 6 * 60 * 60

# Lock timeout for cache refresh: 5 minutes
HIERARCHY_CACHE_LOCK_TIMEOUT_SECONDS = 5 * 60

# Lock acquisition timeout: 60 seconds
HIERARCHY_CACHE_LOCK_ACQUIRE_TIMEOUT_SECONDS = 60

MAX_DEPTH = 1000


class HierarchyNodeCacheEntry(BaseModel):
    """Represents a hierarchy node for caching purposes."""

    node_id: int
    parent_id: int | None
    node_type: HierarchyNodeType
    raw_node_id: str

    @classmethod
    def from_db_model(cls, node: "DBHierarchyNode") -> "HierarchyNodeCacheEntry":
        """Create a cache entry from a SQLAlchemy HierarchyNode model."""
        return cls(
            node_id=node.id,
            parent_id=node.parent_id,
            node_type=node.node_type,
            raw_node_id=node.raw_node_id,
        )


def _cache_key(source: DocumentSource) -> str:
    """Get the Redis hash key for hierarchy node cache of a given source.

    This hash stores: node_id -> "parent_id:node_type"
    """
    return f"hierarchy_cache:{source.value}"


def _raw_id_cache_key(source: DocumentSource) -> str:
    """Get the Redis hash key for raw_node_id -> node_id mapping.

    This hash stores: raw_node_id -> node_id
    """
    return f"hierarchy_cache_rawid:{source.value}"


def _source_node_key(source: DocumentSource) -> str:
    """Get the Redis key for the SOURCE-type node ID of a given source.

    This is a simple string key storing the database ID of the SOURCE node.
    """
    return f"hierarchy_source_node:{source.value}"


def _loading_lock_key(source: DocumentSource) -> str:
    """Get the Redis lock key for cache loading of a given source."""
    return f"hierarchy_cache_loading:{source.value}"


def _construct_parent_value(parent_id: int | None, node_type: HierarchyNodeType) -> str:
    """Construct the cached value string from parent_id and node_type.

    Format: "parent_id:node_type" where parent_id is empty string if None.
    """
    parent_str = str(parent_id) if parent_id is not None else ""
    return f"{parent_str}:{node_type.value}"


def _unpack_parent_value(value: str) -> tuple[int | None, HierarchyNodeType | None]:
    """Unpack a cached value string back into (parent_id, node_type).

    Returns None for invalid values.
    """
    parts = value.split(":", 1)
    parent_str = parts[0]
    node_type_str = parts[1] if len(parts) > 1 else ""
    parent_id = int(parent_str) if parent_str else None

    node_type = HierarchyNodeType(node_type_str) if node_type_str else None

    return parent_id, node_type


def cache_hierarchy_node(
    redis_client: Redis,
    source: DocumentSource,
    entry: HierarchyNodeCacheEntry,
) -> None:
    """
    Add or update a single hierarchy node in the Redis cache.

    Called during docfetching when nodes are upserted to Postgres.
    Stores the parent chain mapping, raw_id -> node_id mapping, and
    SOURCE node ID (if this is a SOURCE-type node).

    Args:
        redis_client: Redis client with tenant prefixing
        source: The document source (e.g., CONFLUENCE, GOOGLE_DRIVE)
        entry: The hierarchy node cache entry
    """
    cache_key = _cache_key(source)
    raw_id_key = _raw_id_cache_key(source)

    # Store parent chain: node_id -> "parent_id:node_type"
    value = _construct_parent_value(entry.parent_id, entry.node_type)
    redis_client.hset(cache_key, str(entry.node_id), value)

    # Store raw_id -> node_id mapping
    redis_client.hset(raw_id_key, entry.raw_node_id, str(entry.node_id))

    # If this is the SOURCE node, store its ID in the dedicated key
    if entry.node_type == HierarchyNodeType.SOURCE:
        source_node_key = _source_node_key(source)
        redis_client.set(source_node_key, str(entry.node_id))
        redis_client.expire(source_node_key, HIERARCHY_CACHE_TTL_SECONDS)

    # Refresh TTL on every write (ensures cache stays alive during long indexing)
    redis_client.expire(cache_key, HIERARCHY_CACHE_TTL_SECONDS)
    redis_client.expire(raw_id_key, HIERARCHY_CACHE_TTL_SECONDS)


def cache_hierarchy_nodes_batch(
    redis_client: Redis,
    source: DocumentSource,
    entries: list[HierarchyNodeCacheEntry],
) -> None:
    """
    Add or update multiple hierarchy nodes in the Redis cache.

    Args:
        redis_client: Redis client with tenant prefixing
        source: The document source
        entries: List of HierarchyNodeCacheEntry objects
    """
    if not entries:
        return

    cache_key = _cache_key(source)
    raw_id_key = _raw_id_cache_key(source)
    source_node_key = _source_node_key(source)

    # Build mappings for batch insert
    parent_mapping: dict[str, str] = {}
    raw_id_mapping: dict[str, str] = {}
    source_node_id: int | None = None

    for entry in entries:
        parent_mapping[str(entry.node_id)] = _construct_parent_value(
            entry.parent_id, entry.node_type
        )
        raw_id_mapping[entry.raw_node_id] = str(entry.node_id)

        # Track the SOURCE node if we encounter it
        if entry.node_type == HierarchyNodeType.SOURCE:
            source_node_id = entry.node_id

    # Use hset with mapping for batch insert
    redis_client.hset(cache_key, mapping=parent_mapping)
    redis_client.hset(raw_id_key, mapping=raw_id_mapping)

    # Cache the SOURCE node ID if found
    if source_node_id is not None:
        redis_client.set(source_node_key, str(source_node_id))
        redis_client.expire(source_node_key, HIERARCHY_CACHE_TTL_SECONDS)

    redis_client.expire(cache_key, HIERARCHY_CACHE_TTL_SECONDS)
    redis_client.expire(raw_id_key, HIERARCHY_CACHE_TTL_SECONDS)


def evict_hierarchy_nodes_from_cache(
    redis_client: Redis,
    source: DocumentSource,
    raw_node_ids: list[str],
) -> None:
    """Remove specific hierarchy nodes from the Redis cache.

    Deletes entries from both the parent-chain hash and the raw_id→node_id hash.
    """
    if not raw_node_ids:
        return

    cache_key = _cache_key(source)
    raw_id_key = _raw_id_cache_key(source)

    # Look up node_ids so we can remove them from the parent-chain hash
    raw_values = cast(list[str | None], redis_client.hmget(raw_id_key, raw_node_ids))
    node_id_strs = [v for v in raw_values if v is not None]

    if node_id_strs:
        redis_client.hdel(cache_key, *node_id_strs)
    redis_client.hdel(raw_id_key, *raw_node_ids)


def get_node_id_from_raw_id(
    redis_client: Redis,
    source: DocumentSource,
    raw_node_id: str,
) -> tuple[int | None, bool]:
    """
    Get the database node_id for a raw_node_id from the cache.

    Returns:
        Tuple of (node_id or None, found_in_cache)
        - If found_in_cache is False, the raw_id doesn't exist in cache
        - If found_in_cache is True, node_id is the database ID
    """
    raw_id_key = _raw_id_cache_key(source)
    value = redis_client.hget(raw_id_key, raw_node_id)

    if value is None:
        return None, False

    # Decode bytes if needed
    value_str: str
    if isinstance(value, bytes):
        value_str = value.decode("utf-8")
    else:
        value_str = str(value)

    return int(value_str), True


def get_parent_id_from_cache(
    redis_client: Redis,
    source: DocumentSource,
    node_id: int,
) -> tuple[int | None, bool]:
    """
    Get the parent_id for a node from the cache.

    Returns:
        Tuple of (parent_id or None, found_in_cache)
        - If found_in_cache is False, the node doesn't exist in cache
        - If found_in_cache is True, parent_id is the actual parent (or None for root)
    """
    cache_key = _cache_key(source)
    value = redis_client.hget(cache_key, str(node_id))

    if value is None:
        return None, False

    # Decode bytes if needed
    value_str: str
    if isinstance(value, bytes):
        value_str = value.decode("utf-8")
    else:
        value_str = str(value)

    parent_id, _ = _unpack_parent_value(value_str)
    return parent_id, True


def is_cache_populated(redis_client: Redis, source: DocumentSource) -> bool:
    """Check if the cache has any entries for this source."""
    cache_key = _cache_key(source)
    # redis.exists returns int (number of keys that exist)
    exists_result: int = redis_client.exists(cache_key)  # type: ignore[assignment]
    return exists_result > 0


def refresh_hierarchy_cache_from_db(
    redis_client: Redis,
    db_session: Session,
    source: DocumentSource,
) -> None:
    """
    Refresh the entire hierarchy cache for a source from the database.

    This function acquires a distributed lock to ensure only one worker
    performs the refresh. Other workers will wait for the refresh to complete.

    Args:
        redis_client: Redis client with tenant prefixing
        db_session: SQLAlchemy session for database access
        source: The document source to refresh
    """

    lock_key = _loading_lock_key(source)

    # Try to acquire lock - if we can't get it, someone else is refreshing
    lock: RedisLock = redis_client.lock(
        lock_key,
        timeout=HIERARCHY_CACHE_LOCK_TIMEOUT_SECONDS,
        blocking=True,
        blocking_timeout=HIERARCHY_CACHE_LOCK_ACQUIRE_TIMEOUT_SECONDS,
    )

    acquired = lock.acquire(blocking=True)
    if not acquired:
        logger.warning(
            f"Could not acquire lock for hierarchy cache refresh for source {source.value} - another worker may be refreshing"
        )
        return

    try:
        # Always refresh from DB when called - new nodes may have been added
        # since the cache was last populated. The lock ensures only one worker
        # does the refresh at a time.
        logger.info(f"Refreshing hierarchy cache for source {source.value} from DB")

        # Load all nodes for this source from DB
        nodes = get_all_hierarchy_nodes_for_source(db_session, source)

        if not nodes:
            logger.warning(f"No hierarchy nodes found in DB for source {source.value}")
            return

        # Batch insert into cache
        cache_entries = [HierarchyNodeCacheEntry.from_db_model(node) for node in nodes]
        cache_hierarchy_nodes_batch(redis_client, source, cache_entries)

        logger.info(
            f"Refreshed hierarchy cache for {source.value} with {len(nodes)} nodes"
        )

    finally:
        try:
            lock.release()
        except Exception as e:
            logger.warning(f"Error releasing hierarchy cache lock: {e}")


def _walk_ancestor_chain(
    redis_client: Redis,
    source: DocumentSource,
    start_node_id: int,
    db_session: Session,
) -> list[int]:
    """
    Walk up the hierarchy tree from a node, collecting all ancestor IDs.

    Internal helper used by both get_ancestors_from_node_id and
    get_ancestors_from_raw_id.
    """
    ancestors: list[int] = []
    current_id: int | None = start_node_id
    visited: set[int] = set()

    while current_id is not None and len(ancestors) < MAX_DEPTH:
        if current_id in visited:
            logger.error(
                f"Cycle detected in hierarchy for source {source.value} at node {current_id}. Ancestors so far: {ancestors}"
            )
            break

        visited.add(current_id)
        ancestors.append(current_id)

        parent_id, found = get_parent_id_from_cache(redis_client, source, current_id)

        if not found:
            logger.debug(
                f"Cache miss for hierarchy node {current_id} of source {source.value}, attempting refresh"
            )
            refresh_hierarchy_cache_from_db(redis_client, db_session, source)
            parent_id, found = get_parent_id_from_cache(
                redis_client, source, current_id
            )

            if not found:
                logger.error(
                    f"Hierarchy node {current_id} not found in cache for source {source.value} even after refresh."
                )
                break

        current_id = parent_id

    if len(ancestors) >= MAX_DEPTH:
        logger.error(
            f"Hit max depth {MAX_DEPTH} traversing hierarchy for source "
            f"{source.value}. Possible infinite loop or very deep hierarchy."
        )

    return ancestors


def get_ancestors_from_raw_id(
    redis_client: Redis,
    source: DocumentSource,
    parent_hierarchy_raw_node_id: str | None,
    db_session: Session,
) -> list[int]:
    """
    Get all ancestor hierarchy node IDs from a raw_node_id.

    This is the main entry point for getting ancestors from a document's
    parent_hierarchy_raw_node_id. It resolves the raw_id to a database ID
    via Redis cache, then walks up the tree.

    No DB calls are made unless the cache is stale.

    Args:
        redis_client: Redis client with tenant prefixing
        source: The document source
        parent_hierarchy_raw_node_id: The document's parent raw node ID (from connector)
        db_session: DB session for cache refresh if needed

    Returns:
        List of ancestor hierarchy node IDs from parent to root (inclusive).
        Returns list with just SOURCE node ID if parent is None or not found.
    """
    # If no parent specified, return just the SOURCE node
    if parent_hierarchy_raw_node_id is None:
        source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)
        return [source_node_id] if source_node_id else []

    # Resolve raw_id to node_id via Redis
    node_id, found = get_node_id_from_raw_id(
        redis_client, source, parent_hierarchy_raw_node_id
    )

    if not found:
        # Cache miss - try refresh
        logger.debug(
            f"Cache miss for raw_node_id '{parent_hierarchy_raw_node_id}' of source {source.value}, attempting refresh"
        )
        refresh_hierarchy_cache_from_db(redis_client, db_session, source)
        node_id, found = get_node_id_from_raw_id(
            redis_client, source, parent_hierarchy_raw_node_id
        )

    if not found or node_id is None:
        logger.error(
            f"Raw node ID '{parent_hierarchy_raw_node_id}' not found in cache "
            f"for source {source.value}. Falling back to SOURCE node only."
        )
        source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)
        return [source_node_id] if source_node_id else []

    # Walk up the ancestor chain
    return _walk_ancestor_chain(redis_client, source, node_id, db_session)


def get_source_node_id_from_cache(
    redis_client: Redis,
    db_session: Session,
    source: DocumentSource,
) -> int | None:
    """
    Get the SOURCE-type node ID for a given source from cache.

    If not in cache and db_session is provided, refreshes from DB.

    Returns:
        The ID of the SOURCE node, or None if not found.
    """
    source_node_key = _source_node_key(source)

    # Try to get from dedicated SOURCE node key
    value = redis_client.get(source_node_key)
    if value is not None:
        if isinstance(value, bytes):
            value = value.decode("utf-8")
        if not isinstance(value, str):
            raise ValueError(f"SOURCE node value is not a string: {value}")
        return int(value)

    # Not in cache - try refresh from DB
    refresh_hierarchy_cache_from_db(redis_client, db_session, source)

    # Try again after refresh
    value = redis_client.get(source_node_key)
    if value is not None:
        if isinstance(value, bytes):
            value = value.decode("utf-8")
        if not isinstance(value, str):
            raise ValueError(f"SOURCE node value is not a string: {value}")
        return int(value)

    logger.error(f"SOURCE node not found for source {source.value}")
    return None


def clear_hierarchy_cache(redis_client: Redis, source: DocumentSource) -> None:
    """Clear the hierarchy cache for a source (useful for testing)."""
    cache_key = _cache_key(source)
    raw_id_key = _raw_id_cache_key(source)
    source_node_key = _source_node_key(source)
    redis_client.delete(cache_key)
    redis_client.delete(raw_id_key)
    redis_client.delete(source_node_key)


def ensure_source_node_exists(
    redis_client: Redis,
    db_session: Session,
    source: DocumentSource,
) -> int:
    """
    Ensure that a SOURCE-type hierarchy node exists for the given source and cache it.

    This is the primary entry point for ensuring hierarchy infrastructure is set up
    for a source before processing documents. It should be called early in the
    indexing pipeline (e.g., at the start of docfetching or hierarchy fetching).

    The function:
    1. Checks Redis cache for existing SOURCE node ID
    2. If not cached, ensures the SOURCE node exists in the database
    3. Caches the SOURCE node in Redis for fast subsequent lookups

    This is idempotent and safe to call multiple times concurrently.

    Args:
        redis_client: Redis client with tenant prefixing
        db_session: SQLAlchemy session for database operations
        source: The document source type (e.g., GOOGLE_DRIVE, CONFLUENCE)

    Returns:
        The database ID of the SOURCE-type hierarchy node
    """
    # First check if we already have it cached
    source_node_key = _source_node_key(source)
    cached_value = redis_client.get(source_node_key)

    if cached_value is not None:
        value_str: str
        if isinstance(cached_value, bytes):
            value_str = cached_value.decode("utf-8")
        else:
            value_str = str(cached_value)
        return int(value_str)

    # Not cached - ensure it exists in DB and cache it
    source_node = db_ensure_source_node_exists(db_session, source, commit=True)

    # Cache the SOURCE node
    cache_entry = HierarchyNodeCacheEntry.from_db_model(source_node)
    cache_hierarchy_node(redis_client, source, cache_entry)

    logger.info(
        f"Ensured SOURCE node exists and cached for {source.value}: id={source_node.id}"
    )

    return source_node.id


================================================
FILE: backend/onyx/redis/redis_object_helper.py
================================================
from abc import ABC
from abc import abstractmethod

from celery import Celery
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.redis.redis_pool import get_redis_client


class RedisObjectHelper(ABC):
    PREFIX = "base"
    FENCE_PREFIX = PREFIX + "_fence"
    TASKSET_PREFIX = PREFIX + "_taskset"

    def __init__(self, tenant_id: str, id: str):
        self._tenant_id: str = tenant_id
        self._id: str = id
        self.redis = get_redis_client(tenant_id=tenant_id)

    @property
    def task_id_prefix(self) -> str:
        return f"{self.PREFIX}_{self._id}"

    @property
    def fence_key(self) -> str:
        # example: documentset_fence_1
        return f"{self.FENCE_PREFIX}_{self._id}"

    @property
    def taskset_key(self) -> str:
        # example: documentset_taskset_1
        return f"{self.TASKSET_PREFIX}_{self._id}"

    @staticmethod
    def get_id_from_fence_key(key: str) -> str | None:
        """
        Extracts the object ID from a fence key in the format `PREFIX_fence_X`.

        Args:
            key (str): The fence key string.

        Returns:
            Optional[int]: The extracted ID if the key is in the correct format, otherwise None.
        """
        parts = key.split("_")
        if len(parts) != 3:
            return None

        object_id = parts[2]
        return object_id

    @staticmethod
    def get_id_from_task_id(task_id: str) -> str | None:
        """
        Extracts the object ID from a task ID string.

        This method assumes the task ID is formatted as `prefix_objectid_suffix`, where:
        - `prefix` is an arbitrary string (e.g., the name of the task or entity),
        - `objectid` is the ID you want to extract,
        - `suffix` is another arbitrary string (e.g., a UUID).

        Example:
            If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`,
            this method will return the string `"1"`.

        Args:
            task_id (str): The task ID string from which to extract the object ID.

        Returns:
            str | None: The extracted object ID if the task ID is in the correct format, otherwise None.
        """
        # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc
        parts = task_id.split("_")
        if len(parts) != 3:
            return None

        object_id = parts[1]
        return object_id

    @abstractmethod
    def generate_tasks(
        self,
        max_tasks: int,
        celery_app: Celery,
        db_session: Session,
        redis_client: Redis,
        lock: RedisLock,
        tenant_id: str,
    ) -> tuple[int, int] | None:
        """First element should be the number of actual tasks generated, second should
        be the number of docs that were candidates to be synced for the cc pair.

        The need for this is when we are syncing stale docs referenced by multiple
        connectors. In a single pass across multiple cc pairs, we only want a task
        for be created for a particular document id the first time we see it.
        The rest can be skipped."""


================================================
FILE: backend/onyx/redis/redis_pool.py
================================================
import asyncio
import functools
import json
import ssl
import threading
from collections.abc import Callable
from typing import Any
from typing import cast
from typing import Optional

import redis
from fastapi import Request
from redis import asyncio as aioredis
from redis.client import Redis
from redis.lock import Lock as RedisLock

from onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX
from onyx.configs.app_configs import REDIS_DB_NUMBER
from onyx.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
from onyx.configs.app_configs import REDIS_HOST
from onyx.configs.app_configs import REDIS_PASSWORD
from onyx.configs.app_configs import REDIS_POOL_MAX_CONNECTIONS
from onyx.configs.app_configs import REDIS_PORT
from onyx.configs.app_configs import REDIS_REPLICA_HOST
from onyx.configs.app_configs import REDIS_SSL
from onyx.configs.app_configs import REDIS_SSL_CA_CERTS
from onyx.configs.app_configs import REDIS_SSL_CERT_REQS
from onyx.configs.app_configs import USE_REDIS_IAM_AUTH
from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
from onyx.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS
from onyx.redis.iam_auth import configure_redis_iam_auth
from onyx.redis.iam_auth import create_redis_ssl_context_if_iam
from onyx.utils.logger import setup_logger
from shared_configs.configs import DEFAULT_REDIS_PREFIX
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

SCAN_ITER_COUNT_DEFAULT = 4096


class TenantRedis(redis.Redis):
    def __init__(self, tenant_id: str, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)
        self.tenant_id: str = tenant_id

    def _prefixed(self, key: str | bytes | memoryview) -> str | bytes | memoryview:
        prefix: str = f"{self.tenant_id}:"
        if isinstance(key, str):
            if key.startswith(prefix):
                return key
            else:
                return prefix + key
        elif isinstance(key, bytes):
            prefix_bytes = prefix.encode()
            if key.startswith(prefix_bytes):
                return key
            else:
                return prefix_bytes + key
        elif isinstance(key, memoryview):
            key_bytes = key.tobytes()
            prefix_bytes = prefix.encode()
            if key_bytes.startswith(prefix_bytes):
                return key
            else:
                return memoryview(prefix_bytes + key_bytes)
        else:
            raise TypeError(f"Unsupported key type: {type(key)}")

    def _prefix_method(self, method: Callable) -> Callable:
        @functools.wraps(method)
        def wrapper(*args: Any, **kwargs: Any) -> Any:
            if "name" in kwargs:
                kwargs["name"] = self._prefixed(kwargs["name"])
            elif len(args) > 0:
                args = (self._prefixed(args[0]),) + args[1:]
            return method(*args, **kwargs)

        return wrapper

    def _prefix_scan_iter(self, method: Callable) -> Callable:
        @functools.wraps(method)
        def wrapper(*args: Any, **kwargs: Any) -> Any:
            # Prefix the match pattern if provided
            if "match" in kwargs:
                kwargs["match"] = self._prefixed(kwargs["match"])
            elif len(args) > 0:
                args = (self._prefixed(args[0]),) + args[1:]

            # Get the iterator
            iterator = method(*args, **kwargs)

            # Remove prefix from returned keys
            prefix = f"{self.tenant_id}:".encode()
            prefix_len = len(prefix)

            for key in iterator:
                if isinstance(key, bytes) and key.startswith(prefix):
                    yield key[prefix_len:]
                else:
                    yield key

        return wrapper

    def __getattribute__(self, item: str) -> Any:
        original_attr = super().__getattribute__(item)
        methods_to_wrap = [
            "lock",
            "unlock",
            "get",
            "set",
            "setex",
            "delete",
            "exists",
            "incrby",
            "hset",
            "hget",
            "getset",
            "owned",
            "reacquire",
            "create_lock",
            "startswith",
            "smembers",
            "sismember",
            "sadd",
            "srem",
            "scard",
            "hexists",
            "hset",
            "hdel",
            "ttl",
            "pttl",
        ]  # Regular methods that need simple prefixing

        if item == "scan_iter" or item == "sscan_iter":
            return self._prefix_scan_iter(original_attr)
        elif item in methods_to_wrap and callable(original_attr):
            return self._prefix_method(original_attr)
        return original_attr


class RedisPool:
    _instance: Optional["RedisPool"] = None
    _lock: threading.Lock = threading.Lock()
    _pool: redis.BlockingConnectionPool
    _replica_pool: redis.BlockingConnectionPool

    def __new__(cls) -> "RedisPool":
        if not cls._instance:
            with cls._lock:
                if not cls._instance:
                    cls._instance = super(RedisPool, cls).__new__(cls)
                    cls._instance._init_pools()
        return cls._instance

    def _init_pools(self) -> None:
        self._pool = RedisPool.create_pool(ssl=REDIS_SSL)
        self._replica_pool = RedisPool.create_pool(
            host=REDIS_REPLICA_HOST, ssl=REDIS_SSL
        )

    def get_client(self, tenant_id: str) -> Redis:
        return TenantRedis(tenant_id, connection_pool=self._pool)

    def get_replica_client(self, tenant_id: str) -> Redis:
        return TenantRedis(tenant_id, connection_pool=self._replica_pool)

    def get_raw_client(self) -> Redis:
        """
        Returns a Redis client with direct access to the primary connection pool,
        without tenant prefixing.
        """
        return redis.Redis(connection_pool=self._pool)

    def get_raw_replica_client(self) -> Redis:
        """
        Returns a Redis client with direct access to the replica connection pool,
        without tenant prefixing.
        """
        return redis.Redis(connection_pool=self._replica_pool)

    @staticmethod
    def create_pool(
        host: str = REDIS_HOST,
        port: int = REDIS_PORT,
        db: int = REDIS_DB_NUMBER,
        password: str = REDIS_PASSWORD,
        max_connections: int = REDIS_POOL_MAX_CONNECTIONS,
        ssl_ca_certs: str | None = REDIS_SSL_CA_CERTS,
        ssl_cert_reqs: str = REDIS_SSL_CERT_REQS,
        ssl: bool = False,
    ) -> redis.BlockingConnectionPool:
        """
        Create a Redis connection pool with appropriate SSL configuration.
        SSL Configuration Priority:
        1. IAM Authentication (USE_REDIS_IAM_AUTH=true): Uses system CA certificates
        2. Regular SSL (REDIS_SSL=true): Uses custom SSL configuration
        3. No SSL: Standard connection without encryption
        Note: IAM authentication automatically enables SSL and takes precedence
        over regular SSL configuration to ensure proper security.

        We use BlockingConnectionPool because it will block and wait for a connection
        rather than error if max_connections is reached. This is far more deterministic
        behavior and aligned with how we want to use Redis."""

        # Using ConnectionPool is not well documented.
        # Useful examples: https://github.com/redis/redis-py/issues/780

        # Handle IAM authentication
        if USE_REDIS_IAM_AUTH:
            # For IAM authentication, we don't use password
            # and ensure SSL is enabled with proper context
            ssl_context = create_redis_ssl_context_if_iam()
            return redis.BlockingConnectionPool(
                host=host,
                port=port,
                db=db,
                password=None,  # No password with IAM auth
                max_connections=max_connections,
                timeout=None,
                health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,
                socket_keepalive=True,
                socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,
                connection_class=redis.SSLConnection,
                ssl_context=ssl_context,  # Use IAM auth SSL context
            )

        if ssl:
            return redis.BlockingConnectionPool(
                host=host,
                port=port,
                db=db,
                password=password,
                max_connections=max_connections,
                timeout=None,
                health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,
                socket_keepalive=True,
                socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,
                connection_class=redis.SSLConnection,
                ssl_ca_certs=ssl_ca_certs,
                ssl_cert_reqs=ssl_cert_reqs,
            )

        return redis.BlockingConnectionPool(
            host=host,
            port=port,
            db=db,
            password=password,
            max_connections=max_connections,
            timeout=None,
            health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,
            socket_keepalive=True,
            socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,
        )


redis_pool = RedisPool()


# # Usage example
# redis_pool = RedisPool()
# redis_client = redis_pool.get_client()

# # Example of setting and getting a value
# redis_client.set('key', 'value')
# value = redis_client.get('key')
# print(value.decode())  # Output: 'value'


def get_redis_client(
    *,
    #  This argument will be deprecated in the future
    tenant_id: str | None = None,
) -> Redis:
    """
    Returns a Redis client with tenant-specific key prefixing.

    This ensures proper data isolation between tenants by automatically
    prefixing all Redis keys with the tenant ID.

    Use this when working with tenant-specific data that should be
    isolated from other tenants.
    """
    if tenant_id is None:
        tenant_id = get_current_tenant_id()

    return redis_pool.get_client(tenant_id)


def get_redis_replica_client(
    *,
    # this argument will be deprecated in the future
    tenant_id: str | None = None,
) -> Redis:
    """
    Returns a Redis replica client with tenant-specific key prefixing.

    Similar to get_redis_client(), but connects to a read replica when available.
    This ensures proper data isolation between tenants by automatically
    prefixing all Redis keys with the tenant ID.

    Use this for read-heavy operations on tenant-specific data.
    """
    if tenant_id is None:
        tenant_id = get_current_tenant_id()

    return redis_pool.get_replica_client(tenant_id)


def get_shared_redis_client() -> Redis:
    """
    Returns a Redis client with a shared namespace prefix.

    Unlike tenant-specific clients, this uses a common prefix for all keys,
    creating a shared namespace accessible across all tenants.

    Use this for data that should be shared across the application and
    isn't specific to any individual tenant.
    """
    return redis_pool.get_client(DEFAULT_REDIS_PREFIX)


def get_shared_redis_replica_client() -> Redis:
    """
    Returns a Redis replica client with a shared namespace prefix.

    Similar to get_shared_redis_client(), but connects to a read replica when available.
    Uses a common prefix for all keys, creating a shared namespace.

    Use this for read-heavy operations on data that should be shared
    across the application.
    """
    return redis_pool.get_replica_client(DEFAULT_REDIS_PREFIX)


def get_raw_redis_client() -> Redis:
    """
    Returns a Redis client that doesn't apply tenant prefixing to keys.

    Use this only when you need to access Redis directly without tenant isolation
    or any key prefixing. Typically needed for integrating with external systems
    or libraries that have inflexible key requirements.

    Warning: Be careful with this client as it bypasses tenant isolation.
    """
    return redis_pool.get_raw_client()


def get_raw_redis_replica_client() -> Redis:
    """
    Returns a Redis replica client that doesn't apply tenant prefixing to keys.

    Similar to get_raw_redis_client(), but connects to a read replica when available.
    Use this for read-heavy operations that need direct Redis access without
    tenant isolation or key prefixing.

    Warning: Be careful with this client as it bypasses tenant isolation.
    """
    return redis_pool.get_raw_replica_client()


SSL_CERT_REQS_MAP = {
    "none": ssl.CERT_NONE,
    "optional": ssl.CERT_OPTIONAL,
    "required": ssl.CERT_REQUIRED,
}


_async_redis_connection: aioredis.Redis | None = None
_async_lock = asyncio.Lock()


async def get_async_redis_connection() -> aioredis.Redis:
    """
    Provides a shared async Redis connection, using the same configs (host, port, SSL, etc.).
    Ensures that the connection is created only once (lazily) and reused for all future calls.
    """
    global _async_redis_connection

    # If we haven't yet created an async Redis connection, we need to create one
    if _async_redis_connection is None:
        # Acquire the lock to ensure that only one coroutine attempts to create the connection
        async with _async_lock:
            # Double-check inside the lock to avoid race conditions
            if _async_redis_connection is None:
                # Load env vars or your config variables

                connection_kwargs: dict[str, Any] = {
                    "host": REDIS_HOST,
                    "port": REDIS_PORT,
                    "db": REDIS_DB_NUMBER,
                    "password": REDIS_PASSWORD,
                    "max_connections": REDIS_POOL_MAX_CONNECTIONS,
                    "health_check_interval": REDIS_HEALTH_CHECK_INTERVAL,
                    "socket_keepalive": True,
                    "socket_keepalive_options": REDIS_SOCKET_KEEPALIVE_OPTIONS,
                }

                if USE_REDIS_IAM_AUTH:
                    configure_redis_iam_auth(connection_kwargs)
                elif REDIS_SSL:
                    ssl_context = ssl.create_default_context()

                    if REDIS_SSL_CA_CERTS:
                        ssl_context.load_verify_locations(REDIS_SSL_CA_CERTS)
                    ssl_context.check_hostname = False

                    # Map your string to the proper ssl.CERT_* constant
                    ssl_context.verify_mode = SSL_CERT_REQS_MAP.get(
                        REDIS_SSL_CERT_REQS, ssl.CERT_NONE
                    )

                    connection_kwargs["ssl"] = ssl_context

                # Create a new Redis connection (or connection pool) with SSL configuration
                _async_redis_connection = aioredis.Redis(**connection_kwargs)

    # Return the established connection (or pool) for all future operations
    return _async_redis_connection


async def retrieve_auth_token_data(token: str) -> dict | None:
    """Validate auth token against Redis and return token data.

    Args:
        token: The raw authentication token string.

    Returns:
        Token data dict if valid, None if invalid/expired.
    """
    try:
        redis = await get_async_redis_connection()
        redis_key = REDIS_AUTH_KEY_PREFIX + token
        token_data_str = await redis.get(redis_key)

        if not token_data_str:
            logger.debug(f"Token key {redis_key} not found or expired in Redis")
            return None

        return json.loads(token_data_str)
    except json.JSONDecodeError:
        logger.error("Error decoding token data from Redis")
        return None
    except Exception as e:
        logger.error(f"Unexpected error in retrieve_auth_token_data: {str(e)}")
        raise ValueError(f"Unexpected error in retrieve_auth_token_data: {str(e)}")


async def retrieve_auth_token_data_from_redis(request: Request) -> dict | None:
    """Validate auth token from request cookie. Wrapper for backwards compatibility."""
    token = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)
    if not token:
        logger.debug("No auth token cookie found")
        return None
    return await retrieve_auth_token_data(token)


# WebSocket token prefix (separate from regular auth tokens)
REDIS_WS_TOKEN_PREFIX = "ws_token:"
# WebSocket tokens expire after 60 seconds
WS_TOKEN_TTL_SECONDS = 60
# Rate limit: max tokens per user per window
WS_TOKEN_RATE_LIMIT_MAX = 10
WS_TOKEN_RATE_LIMIT_WINDOW_SECONDS = 60
REDIS_WS_TOKEN_RATE_LIMIT_PREFIX = "ws_token_rate:"


class WsTokenRateLimitExceeded(Exception):
    """Raised when a user exceeds the WS token generation rate limit."""


async def store_ws_token(token: str, user_id: str) -> None:
    """Store a short-lived WebSocket authentication token in Redis.

    Args:
        token: The generated WS token.
        user_id: The user ID to associate with this token.

    Raises:
        WsTokenRateLimitExceeded: If the user has exceeded the rate limit.
    """
    redis = await get_async_redis_connection()

    # Atomically increment and check rate limit to avoid TOCTOU races
    rate_limit_key = REDIS_WS_TOKEN_RATE_LIMIT_PREFIX + user_id
    pipe = redis.pipeline()
    pipe.incr(rate_limit_key)
    pipe.expire(rate_limit_key, WS_TOKEN_RATE_LIMIT_WINDOW_SECONDS)
    results = await pipe.execute()
    new_count = results[0]

    if new_count > WS_TOKEN_RATE_LIMIT_MAX:
        # Over limit — decrement back since we won't use this slot
        await redis.decr(rate_limit_key)
        logger.warning(f"WS token rate limit exceeded for user {user_id}")
        raise WsTokenRateLimitExceeded(
            f"Rate limit exceeded. Maximum {WS_TOKEN_RATE_LIMIT_MAX} tokens per minute."
        )

    # Store the actual token
    redis_key = REDIS_WS_TOKEN_PREFIX + token
    token_data = json.dumps({"sub": user_id})
    await redis.set(redis_key, token_data, ex=WS_TOKEN_TTL_SECONDS)


async def retrieve_ws_token_data(token: str) -> dict | None:
    """Validate a WebSocket token and return the token data.

    This uses GETDEL for atomic get-and-delete to prevent race conditions
    where the same token could be used twice.

    Args:
        token: The WS token to validate.

    Returns:
        Token data dict with 'sub' (user ID) if valid, None if invalid/expired.
    """
    try:
        redis = await get_async_redis_connection()
        redis_key = REDIS_WS_TOKEN_PREFIX + token

        # Atomic get-and-delete to prevent race conditions (Redis 6.2+)
        token_data_str = await redis.getdel(redis_key)

        if not token_data_str:
            return None

        return json.loads(token_data_str)
    except json.JSONDecodeError:
        logger.error("Error decoding WS token data from Redis")
        return None
    except Exception as e:
        logger.error(f"Unexpected error in retrieve_ws_token_data: {str(e)}")
        return None


def redis_lock_dump(lock: RedisLock, r: Redis) -> None:
    # diagnostic logging for lock errors
    name = lock.name
    ttl = r.ttl(name)
    locked = lock.locked()
    owned = lock.owned()
    local_token: str | None = lock.local.token

    remote_token_raw = r.get(lock.name)
    if remote_token_raw:
        remote_token_bytes = cast(bytes, remote_token_raw)
        remote_token = remote_token_bytes.decode("utf-8")
    else:
        remote_token = None

    logger.warning(
        f"RedisLock diagnostic: "
        f"name={name} "
        f"locked={locked} "
        f"owned={owned} "
        f"local_token={local_token} "
        f"remote_token={remote_token} "
        f"ttl={ttl}"
    )


================================================
FILE: backend/onyx/redis/redis_usergroup.py
================================================
import time
from typing import cast
from uuid import uuid4

import redis
from celery import Celery
from redis import Redis
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session

from onyx.configs.app_configs import DB_YIELD_PER_DEFAULT
from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.redis.redis_object_helper import RedisObjectHelper
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version


class RedisUserGroup(RedisObjectHelper):
    PREFIX = "usergroup"
    FENCE_PREFIX = PREFIX + "_fence"
    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks
    TASKSET_PREFIX = PREFIX + "_taskset"
    TASKSET_TTL = FENCE_TTL

    def __init__(self, tenant_id: str, id: int) -> None:
        super().__init__(tenant_id, str(id))

    @property
    def fenced(self) -> bool:
        if self.redis.exists(self.fence_key):
            return True

        return False

    def set_fence(self, payload: int | None) -> None:
        if payload is None:
            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
            self.redis.delete(self.fence_key)
            return

        self.redis.set(self.fence_key, payload, ex=self.FENCE_TTL)
        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)

    @property
    def payload(self) -> int | None:
        bytes = self.redis.get(self.fence_key)
        if bytes is None:
            return None

        progress = int(cast(int, bytes))
        return progress

    def generate_tasks(
        self,
        max_tasks: int,  # noqa: ARG002
        celery_app: Celery,
        db_session: Session,
        redis_client: Redis,
        lock: RedisLock,
        tenant_id: str,
    ) -> tuple[int, int] | None:
        """Max tasks is ignored for now until we can build the logic to mark the
        user group up to date over multiple batches.
        """
        last_lock_time = time.monotonic()
        num_tasks_sent = 0

        if not global_version.is_ee_version():
            return 0, 0

        try:
            construct_document_id_select_by_usergroup = fetch_versioned_implementation(
                "onyx.db.user_group",
                "construct_document_id_select_by_usergroup",
            )
        except ModuleNotFoundError:
            return 0, 0

        stmt = construct_document_id_select_by_usergroup(int(self._id))
        for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
            doc_id = cast(str, doc_id)
            current_time = time.monotonic()
            if current_time - last_lock_time >= (
                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
            ):
                lock.reacquire()
                last_lock_time = current_time

            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
            # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
            # we prefix the task id so it's easier to keep track of who created the task
            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"

            # add to the set BEFORE creating the task.
            redis_client.sadd(self.taskset_key, custom_task_id)
            redis_client.expire(self.taskset_key, self.TASKSET_TTL)

            celery_app.send_task(
                OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
                kwargs=dict(document_id=doc_id, tenant_id=tenant_id),
                queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.MEDIUM,
            )

            num_tasks_sent += 1

        return num_tasks_sent, num_tasks_sent

    def reset(self) -> None:
        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
        self.redis.delete(self.taskset_key)
        self.redis.delete(self.fence_key)

    @staticmethod
    def reset_all(r: redis.Redis) -> None:
        for key in r.scan_iter(RedisUserGroup.TASKSET_PREFIX + "*"):
            r.delete(key)

        for key in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"):
            r.delete(key)


================================================
FILE: backend/onyx/redis/redis_utils.py
================================================
from onyx.redis.redis_connector_delete import RedisConnectorDelete
from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_document_set import RedisDocumentSet
from onyx.redis.redis_usergroup import RedisUserGroup


def is_fence(key_bytes: bytes) -> bool:
    key_str = key_bytes.decode("utf-8")
    if key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
        return True
    if key_str.startswith(RedisUserGroup.FENCE_PREFIX):
        return True
    if key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
        return True
    if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
        return True
    if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
        return True

    return False


================================================
FILE: backend/onyx/secondary_llm_flows/__init__.py
================================================


================================================
FILE: backend/onyx/secondary_llm_flows/chat_session_naming.py
================================================
from onyx.chat.llm_step import translate_history_to_llm_format
from onyx.chat.models import ChatMessageSimple
from onyx.configs.constants import MessageType
from onyx.llm.interfaces import LLM
from onyx.llm.models import ReasoningEffort
from onyx.llm.utils import llm_response_to_string
from onyx.prompts.chat_prompts import CHAT_NAMING_REMINDER
from onyx.prompts.chat_prompts import CHAT_NAMING_SYSTEM_PROMPT
from onyx.tracing.llm_utils import llm_generation_span
from onyx.tracing.llm_utils import record_llm_response
from onyx.utils.logger import setup_logger

logger = setup_logger()


def generate_chat_session_name(
    chat_history: list[ChatMessageSimple],
    llm: LLM,
) -> str:
    system_prompt = ChatMessageSimple(
        message=CHAT_NAMING_SYSTEM_PROMPT,
        token_count=100,
        message_type=MessageType.SYSTEM,
    )

    reminder_prompt = ChatMessageSimple(
        message=CHAT_NAMING_REMINDER,
        token_count=100,
        message_type=MessageType.USER_REMINDER,
    )

    complete_message_history = [system_prompt] + chat_history + [reminder_prompt]

    llm_facing_history = translate_history_to_llm_format(
        complete_message_history, llm.config
    )

    # Call LLM with Braintrust tracing
    with llm_generation_span(
        llm=llm, flow="chat_session_naming", input_messages=llm_facing_history
    ) as span_generation:
        response = llm.invoke(llm_facing_history, reasoning_effort=ReasoningEffort.OFF)
        record_llm_response(span_generation, response)
        new_name_raw = llm_response_to_string(response)

    return new_name_raw.strip().strip('"')


================================================
FILE: backend/onyx/secondary_llm_flows/document_filter.py
================================================
import json
import re

from onyx.context.search.models import ContextExpansionType
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceSection
from onyx.llm.interfaces import LLM
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import UserMessage
from onyx.prompts.search_prompts import DOCUMENT_CONTEXT_SELECTION_PROMPT
from onyx.prompts.search_prompts import DOCUMENT_SELECTION_PROMPT
from onyx.prompts.search_prompts import TRY_TO_FILL_TO_MAX_INSTRUCTIONS
from onyx.tools.tool_implementations.search.constants import (
    MAX_CHUNKS_FOR_RELEVANCE,
)
from onyx.tracing.llm_utils import llm_generation_span
from onyx.tracing.llm_utils import record_llm_response
from onyx.utils.logger import setup_logger

logger = setup_logger()


def select_chunks_for_relevance(
    section: InferenceSection,
    max_chunks: int = MAX_CHUNKS_FOR_RELEVANCE,
) -> list[InferenceChunk]:
    """Select a subset of chunks from a section based on center chunk position.

    Logic:
    - Always include the center chunk
    - If there are chunks directly next to it by index, grab the preceding and following
    - Otherwise grab 2 in the direction that does exist (2 before or 2 after)
    - If there are not enough in either direction, just grab what's available
    - If there are no other chunks, just use the central chunk

    Args:
        section: InferenceSection with center_chunk and chunks
        max_chunks: Maximum number of chunks to select (default: MAX_CHUNKS_FOR_RELEVANCE)

    Returns:
        List of selected InferenceChunks ordered by position
    """
    if max_chunks <= 0:
        return []

    center_chunk = section.center_chunk
    all_chunks = section.chunks

    # Find the index of the center chunk in the chunks list
    try:
        center_index = next(
            i
            for i, chunk in enumerate(all_chunks)
            if chunk.chunk_id == center_chunk.chunk_id
        )
    except StopIteration:
        # If center chunk not found in chunks list, just return center chunk
        return [center_chunk]

    if max_chunks == 1:
        return [center_chunk]

    # Calculate how many chunks to take before and after
    chunks_needed = max_chunks - 1  # minus 1 for center chunk

    # Determine available chunks before and after center
    chunks_before_available = center_index
    chunks_after_available = len(all_chunks) - center_index - 1

    # Start with balanced distribution (1 before, 1 after for max_chunks=3)
    chunks_before = min(chunks_needed // 2, chunks_before_available)
    chunks_after = min(chunks_needed // 2, chunks_after_available)

    # Allocate remaining chunks to whichever direction has availability
    remaining = chunks_needed - chunks_before - chunks_after
    if remaining > 0:
        # Try to add more chunks before center if available
        if chunks_before_available > chunks_before:
            additional_before = min(remaining, chunks_before_available - chunks_before)
            chunks_before += additional_before
            remaining -= additional_before
        # Try to add more chunks after center if available
        if remaining > 0 and chunks_after_available > chunks_after:
            additional_after = min(remaining, chunks_after_available - chunks_after)
            chunks_after += additional_after

    # Select the chunks
    start_index = center_index - chunks_before
    end_index = center_index + chunks_after + 1  # +1 to include center and chunks after

    return all_chunks[start_index:end_index]


def classify_section_relevance(
    document_title: str,
    section_text: str,
    user_query: str,
    llm: LLM,
    section_above_text: str | None,
    section_below_text: str | None,
) -> ContextExpansionType:
    """Use LLM to classify section relevance and determine context expansion type.

    Args:
        section_text: The text content of the section to classify
        user_query: The user's search query
        llm: LLM instance to use for classification
        section_above_text: Text content from chunks above the section
        section_below_text: Text content from chunks below the section

    Returns:
        ContextExpansionType indicating how the section should be expanded
    """
    # Build the prompt
    prompt_text = DOCUMENT_CONTEXT_SELECTION_PROMPT.format(
        document_title=document_title,
        main_section=section_text,
        section_above=section_above_text if section_above_text else "N/A",
        section_below=section_below_text if section_below_text else "N/A",
        user_query=user_query,
    )

    # Default to MAIN_SECTION_ONLY
    default_classification = ContextExpansionType.MAIN_SECTION_ONLY

    # Call LLM for classification with Braintrust tracing
    try:
        prompt_msg = UserMessage(content=prompt_text)
        with llm_generation_span(
            llm=llm, flow="classify_section_relevance", input_messages=[prompt_msg]
        ) as span_generation:
            response = llm.invoke(
                prompt=prompt_msg,
                reasoning_effort=ReasoningEffort.OFF,
            )
            record_llm_response(span_generation, response)
            llm_response = response.choice.message.content

        if not llm_response:
            logger.warning(
                "LLM returned empty response for context selection, defaulting to MAIN_SECTION_ONLY"
            )
            classification = default_classification
        else:
            # Parse the response to extract the situation number (0-3)
            numbers = re.findall(r"\b[0-3]\b", llm_response)
            if numbers:
                situation = int(numbers[-1])
                # Map situation number to ContextExpansionType
                situation_to_type = {
                    0: ContextExpansionType.NOT_RELEVANT,
                    1: ContextExpansionType.MAIN_SECTION_ONLY,
                    2: ContextExpansionType.INCLUDE_ADJACENT_SECTIONS,
                    3: ContextExpansionType.FULL_DOCUMENT,
                }
                classification = situation_to_type.get(
                    situation, default_classification
                )
            else:
                logger.warning(
                    f"Could not parse situation number from LLM response: {llm_response}"
                )
                classification = default_classification

    except Exception as e:
        logger.error(f"Error calling LLM for context selection: {e}")
        classification = default_classification

    # To save some effort down the line, if there is nothing surrounding, don't allow a classification of adjacent or whole doc
    if (
        not section_above_text
        and not section_below_text
        and classification != ContextExpansionType.NOT_RELEVANT
    ):
        classification = ContextExpansionType.MAIN_SECTION_ONLY

    return classification


def select_sections_for_expansion(
    sections: list[InferenceSection],
    user_query: str,
    llm: LLM,
    max_sections: int = 10,
    max_chunks_per_section: int | None = MAX_CHUNKS_FOR_RELEVANCE,
    try_to_fill_to_max: bool = False,
) -> tuple[list[InferenceSection], list[str] | None]:
    """Use LLM to select the most relevant document sections for expansion.

    Args:
        sections: List of InferenceSection objects to select from
        user_query: The user's search query
        llm: LLM instance to use for selection
        max_sections: Maximum number of sections to select (default: 10)
        max_chunks_per_section: Maximum chunks to consider per section (default: MAX_CHUNKS_FOR_RELEVANCE)

    Returns:
        A tuple of:
        - Filtered list of InferenceSection objects selected by the LLM
        - List of document IDs for sections marked with "!" by the LLM, or None if none.
          Note: The "!" marker support exists in parsing but is not currently used because
          the prompt does not instruct the LLM to use it.
    """
    if not sections:
        return [], None

    # Create a mapping of section ID to section
    section_map: dict[str, InferenceSection] = {}
    sections_dict: list[dict[str, str | int | list[str]]] = []

    for idx, section in enumerate(sections):
        # Create a unique ID for each section
        section_id = f"{idx}"
        section_map[section_id] = section

        # Format the section for the LLM
        chunk = section.center_chunk

        # Combine primary and secondary owners for authors
        authors = None
        if chunk.primary_owners or chunk.secondary_owners:
            authors = []
            if chunk.primary_owners:
                authors.extend(chunk.primary_owners)
            if chunk.secondary_owners:
                authors.extend(chunk.secondary_owners)

        # Format updated_at as ISO string if available
        updated_at_str = None
        if chunk.updated_at:
            updated_at_str = chunk.updated_at.isoformat()

        # Convert metadata to JSON string
        metadata_str = json.dumps(chunk.metadata)

        # Select only the most relevant chunks from the section to avoid flooding
        # the LLM with too much content from documents with many matching sections
        if max_chunks_per_section is not None:
            selected_chunks = select_chunks_for_relevance(
                section, max_chunks_per_section
            )
            selected_content = " ".join(chunk.content for chunk in selected_chunks)
        else:
            selected_content = section.combined_content

        section_dict: dict[str, str | int | list[str]] = {
            "section_id": idx,
            "title": chunk.semantic_identifier,
        }

        # Only include updated_at if not None
        if updated_at_str is not None:
            section_dict["updated_at"] = updated_at_str

        # Only include authors if not None
        if authors is not None:
            section_dict["authors"] = authors

        section_dict["source_type"] = str(chunk.source_type)
        section_dict["metadata"] = metadata_str
        section_dict["content"] = selected_content

        sections_dict.append(section_dict)

    # Build the prompt
    extra_instructions = TRY_TO_FILL_TO_MAX_INSTRUCTIONS if try_to_fill_to_max else ""
    prompt_text = UserMessage(
        content=DOCUMENT_SELECTION_PROMPT.format(
            max_sections=max_sections,
            extra_instructions=extra_instructions,
            formatted_doc_sections=json.dumps(sections_dict, indent=2),
            user_query=user_query,
        )
    )

    # Call LLM for selection with Braintrust tracing
    try:
        with llm_generation_span(
            llm=llm, flow="select_sections_for_expansion", input_messages=[prompt_text]
        ) as span_generation:
            response = llm.invoke(
                prompt=[prompt_text], reasoning_effort=ReasoningEffort.OFF
            )
            record_llm_response(span_generation, response)
            llm_response = response.choice.message.content

        if not llm_response:
            logger.warning(
                "LLM returned empty response for document selection, returning first max_sections"
            )
            return sections[:max_sections], None

        # Parse the response to extract section IDs
        # Look for patterns like [1, 2, 3] or [1,2,3] with flexible whitespace/newlines
        # Also handle unbracketed comma-separated lists like "1, 2, 3"
        # Track which sections have "!" marker (e.g., "1, 2!, 3" or "[1, 2!, 3]")
        section_ids = []
        sections_with_exclamation = set()  # Track section IDs that have "!" marker

        # First try to find a bracketed list
        bracket_pattern = r"\[([^\]]+)\]"
        bracket_match = re.search(bracket_pattern, llm_response)

        if bracket_match:
            # Extract the content between brackets
            list_content = bracket_match.group(1)
            # Split by comma, preserving the parts
            parts = [part.strip() for part in list_content.split(",")]
            for part in parts:
                # Check if this part has an exclamation mark
                has_exclamation = "!" in part
                # Extract the number (digits only)
                numbers = re.findall(r"\d+", part)
                if numbers:
                    section_id = numbers[0]
                    section_ids.append(section_id)
                    if has_exclamation:
                        sections_with_exclamation.add(section_id)
        else:
            # Try to find an unbracketed comma-separated list
            # Look for patterns like "1, 2, 3" or "1, 2!, 3"
            # This regex finds sequences of digits optionally followed by "!" and separated by commas
            comma_list_pattern = r"\b\d+!?\b(?:\s*,\s*\b\d+!?\b)*"
            comma_match = re.search(comma_list_pattern, llm_response)

            if comma_match:
                # Extract the matched comma-separated list
                list_content = comma_match.group(0)
                parts = [part.strip() for part in list_content.split(",")]
                for part in parts:
                    # Check if this part has an exclamation mark
                    has_exclamation = "!" in part
                    # Extract the number (digits only)
                    numbers = re.findall(r"\d+", part)
                    if numbers:
                        section_id = numbers[0]
                        section_ids.append(section_id)
                        if has_exclamation:
                            sections_with_exclamation.add(section_id)
            else:
                # Fallback: try to extract all numbers from the response
                # Also check for "!" after numbers
                number_pattern = r"\b(\d+)(!)?\b"
                matches = re.finditer(number_pattern, llm_response)
                for match in matches:
                    section_id = match.group(1)
                    has_exclamation = match.group(2) == "!"
                    section_ids.append(section_id)
                    if has_exclamation:
                        sections_with_exclamation.add(section_id)

        if not section_ids:
            logger.warning(
                f"Could not parse section IDs from LLM response: {llm_response}"
            )
            return sections[:max_sections], None

        # Filter sections based on LLM selection
        # Skip out-of-range IDs and don't count them toward max_sections
        selected_sections = []
        document_ids_with_exclamation = []  # Collect document_ids for sections with "!"
        num_sections = len(sections)

        for section_id_str in section_ids:
            # Convert to int
            try:
                section_id_int = int(section_id_str)
            except ValueError:
                logger.warning(f"Could not convert section ID to int: {section_id_str}")
                continue

            # Check if in valid range
            if section_id_int < 0 or section_id_int >= num_sections:
                logger.warning(
                    f"Section ID {section_id_int} is out of range [0, {num_sections - 1}], skipping"
                )
                continue

            # Convert back to string for section_map lookup
            section_id = str(section_id_int)
            if section_id in section_map:
                section = section_map[section_id]
                selected_sections.append(section)

                # If this section has an exclamation mark, collect its document_id
                if section_id_str in sections_with_exclamation:
                    document_id = section.center_chunk.document_id
                    if document_id not in document_ids_with_exclamation:
                        document_ids_with_exclamation.append(document_id)

            # Stop if we've reached max_sections valid selections
            if len(selected_sections) >= max_sections:
                break

        if not selected_sections:
            logger.warning(
                "No valid sections selected from LLM response, returning first max_sections"
            )
            return sections[:max_sections], None

        # Collect all selected document IDs
        selected_document_ids = [
            section.center_chunk.document_id for section in selected_sections
        ]

        logger.debug(
            f"LLM selected {len(selected_sections)} valid sections from {len(sections)} total candidates. "
            f"Selected document IDs: {selected_document_ids}. "
            f"Document IDs with exclamation: {document_ids_with_exclamation if document_ids_with_exclamation else []}"
        )

        # Return document_ids if any sections had exclamation marks, otherwise None
        return selected_sections, (
            document_ids_with_exclamation if document_ids_with_exclamation else None
        )

    except Exception as e:
        logger.error(f"Error calling LLM for document selection: {e}")
        return sections[:max_sections], None


================================================
FILE: backend/onyx/secondary_llm_flows/memory_update.py
================================================
from onyx.configs.constants import MessageType
from onyx.llm.interfaces import LLM
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import UserMessage
from onyx.prompts.basic_memory import FULL_MEMORY_UPDATE_PROMPT
from onyx.tools.models import ChatMinimalTextMessage
from onyx.tracing.llm_utils import llm_generation_span
from onyx.tracing.llm_utils import record_llm_response
from onyx.utils.logger import setup_logger
from onyx.utils.text_processing import parse_llm_json_response

logger = setup_logger()

# Maximum number of user messages to include
MAX_USER_MESSAGES = 3
MAX_CHARS_PER_MESSAGE = 500


def _format_chat_history(chat_history: list[ChatMinimalTextMessage]) -> str:
    user_messages = [
        msg for msg in chat_history if msg.message_type == MessageType.USER
    ]

    if not user_messages:
        return "No chat history available."

    # Take the last N user messages
    recent_user_messages = user_messages[-MAX_USER_MESSAGES:]

    formatted_parts = []
    for i, msg in enumerate(recent_user_messages, start=1):
        if len(msg.message) > MAX_CHARS_PER_MESSAGE:
            truncated_message = msg.message[:MAX_CHARS_PER_MESSAGE] + "[...truncated]"
        else:
            truncated_message = msg.message
        formatted_parts.append(f"\nUser message:\n{truncated_message}\n")

    return "".join(formatted_parts).strip()


def _format_existing_memories(existing_memories: list[str]) -> str:
    """Format existing memories as a numbered list (1-indexed for readability)."""
    if not existing_memories:
        return "No existing memories."

    formatted_lines = []
    for i, memory in enumerate(existing_memories, start=1):
        formatted_lines.append(f"{i}. {memory}")

    return "\n".join(formatted_lines)


def _format_user_basic_information(
    user_name: str | None,
    user_email: str | None,
    user_role: str | None,
) -> str:
    """Format user basic information, only including fields that have values."""
    lines = []
    if user_name:
        lines.append(f"User name: {user_name}")
    if user_email:
        lines.append(f"User email: {user_email}")
    if user_role:
        lines.append(f"User role: {user_role}")

    if not lines:
        return ""

    return "\n\n# User Basic Information\n" + "\n".join(lines)


def process_memory_update(
    new_memory: str,
    existing_memories: list[str],
    chat_history: list[ChatMinimalTextMessage],
    llm: LLM,
    user_name: str | None = None,
    user_email: str | None = None,
    user_role: str | None = None,
) -> tuple[str, int | None]:
    """
    Determine if a memory should be added or updated.

    Uses the LLM to analyze the new memory against existing memories and
    determine whether to add it as new or update an existing memory.

    Args:
        new_memory: The new memory text from the memory tool
        existing_memories: List of existing memory strings
        chat_history: Recent chat history for context
        llm: LLM instance to use for the decision
        user_name: Optional user name for context
        user_email: Optional user email for context
        user_role: Optional user role for context

    Returns:
        Tuple of (memory_text, index_to_replace)
        - memory_text: The final memory text to store
        - index_to_replace: Index in existing_memories to replace, or None if adding new
    """
    # Format inputs for the prompt
    formatted_chat_history = _format_chat_history(chat_history)
    formatted_memories = _format_existing_memories(existing_memories)
    formatted_user_info = _format_user_basic_information(
        user_name, user_email, user_role
    )

    # Build the prompt
    prompt = FULL_MEMORY_UPDATE_PROMPT.format(
        chat_history=formatted_chat_history,
        user_basic_information=formatted_user_info,
        existing_memories=formatted_memories,
        new_memory=new_memory,
    )

    # Call LLM with Braintrust tracing
    try:
        prompt_msg = UserMessage(content=prompt)
        with llm_generation_span(
            llm=llm, flow="memory_update", input_messages=[prompt_msg]
        ) as span_generation:
            response = llm.invoke(
                prompt=prompt_msg, reasoning_effort=ReasoningEffort.OFF
            )
            record_llm_response(span_generation, response)
            content = response.choice.message.content
    except Exception as e:
        logger.warning(f"LLM invocation failed for memory update: {e}")
        return (new_memory, None)

    # Handle empty response
    if not content:
        logger.warning(
            "LLM returned empty response for memory update, defaulting to add"
        )
        return (new_memory, None)

    # Parse JSON response
    parsed_response = parse_llm_json_response(content)

    if not parsed_response:
        logger.warning(
            f"Failed to parse JSON from LLM response: {content[:200]}..., defaulting to add"
        )
        return (new_memory, None)

    # Extract fields from response
    operation = parsed_response.get("operation", "add").lower()
    memory_id = parsed_response.get("memory_id")
    memory_text = parsed_response.get("memory_text", new_memory)

    # Ensure memory_text is valid
    if not memory_text or not isinstance(memory_text, str):
        memory_text = new_memory

    # Handle add operation
    if operation == "add":
        logger.debug("Memory update operation: add")
        return (memory_text, None)

    # Handle update operation
    if operation == "update":
        # Validate memory_id
        if memory_id is None:
            logger.warning("Update operation specified but no memory_id provided")
            return (memory_text, None)

        # Convert memory_id to integer if it's a string
        try:
            memory_id_int = int(memory_id)
        except (ValueError, TypeError):
            logger.warning(f"Invalid memory_id format: {memory_id}")
            return (memory_text, None)

        # Convert from 1-indexed (LLM response) to 0-indexed (internal)
        index_to_replace = memory_id_int - 1

        # Validate index is in range
        if index_to_replace < 0 or index_to_replace >= len(existing_memories):
            logger.warning(
                f"memory_id {memory_id_int} out of range (1-{len(existing_memories)}), defaulting to add"
            )
            return (memory_text, None)

        logger.debug(f"Memory update operation: update at index {index_to_replace}")
        return (memory_text, index_to_replace)

    # Unknown operation, default to add
    logger.warning(f"Unknown operation '{operation}', defaulting to add")
    return (memory_text, None)


================================================
FILE: backend/onyx/secondary_llm_flows/query_expansion.py
================================================
from onyx.configs.constants import MessageType
from onyx.llm.interfaces import LLM
from onyx.llm.models import AssistantMessage
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import SystemMessage
from onyx.llm.models import UserMessage
from onyx.prompts.prompt_utils import get_current_llm_day_time
from onyx.prompts.search_prompts import KEYWORD_REPHRASE_SYSTEM_PROMPT
from onyx.prompts.search_prompts import KEYWORD_REPHRASE_USER_PROMPT
from onyx.prompts.search_prompts import REPHRASE_CONTEXT_PROMPT
from onyx.prompts.search_prompts import SEMANTIC_QUERY_REPHRASE_SYSTEM_PROMPT
from onyx.prompts.search_prompts import SEMANTIC_QUERY_REPHRASE_USER_PROMPT
from onyx.tools.models import ChatMinimalTextMessage
from onyx.tracing.llm_utils import llm_generation_span
from onyx.tracing.llm_utils import record_llm_response
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _build_additional_context(
    user_info: str | None = None,
    memories: list[str] | None = None,
) -> str:
    """Build additional context section for query rephrasing/expansion.

    Returns empty string if both user_info and memories are None/empty.
    Otherwise returns formatted context with "N/A" for missing fields.
    """
    has_user_info = user_info and user_info.strip()
    has_memories = memories and any(m.strip() for m in memories)

    if not has_user_info and not has_memories:
        return ""

    formatted_user_info = user_info if has_user_info else "N/A"
    formatted_memories = (
        "\n".join(f"- {memory}" for memory in memories)
        if has_memories and memories
        else "N/A"
    )

    return REPHRASE_CONTEXT_PROMPT.format(
        user_info=formatted_user_info,
        memories=formatted_memories,
    )


def _build_message_history(
    history: list[ChatMinimalTextMessage],
) -> list[ChatCompletionMessage]:
    """Convert ChatMinimalTextMessage list to ChatCompletionMessage list."""
    messages: list[ChatCompletionMessage] = []

    for msg in history:
        if msg.message_type == MessageType.USER:
            user_msg = UserMessage(content=msg.message)
            messages.append(user_msg)
        elif msg.message_type == MessageType.ASSISTANT:
            assistant_msg = AssistantMessage(content=msg.message)
            messages.append(assistant_msg)

    return messages


def semantic_query_rephrase(
    history: list[ChatMinimalTextMessage],
    llm: LLM,
    user_info: str | None = None,
    memories: list[str] | None = None,
) -> str:
    """Rephrase a query into a standalone query using chat history context.

    Converts the user's query into a self-contained search query that incorporates
    relevant context from the chat history and optional user information/memories.

    Args:
        history: Chat message history. Must contain at least one user message.
        llm: Language model to use for rephrasing
        user_info: Optional user information for personalization
        memories: Optional user memories for personalization

    Returns:
        Rephrased standalone query string

    Raises:
        ValueError: If history is empty or contains no user messages
        RuntimeError: If LLM fails to generate a rephrased query
    """
    if not history:
        raise ValueError("History cannot be empty for query rephrasing")

    # Find the last user message in the history
    last_user_message_idx = None
    for i in range(len(history) - 1, -1, -1):
        if history[i].message_type == MessageType.USER:
            last_user_message_idx = i
            break

    if last_user_message_idx is None:
        raise ValueError("History must contain at least one user message")

    # Extract the last user query
    user_query = history[last_user_message_idx].message

    # Build additional context section
    additional_context = _build_additional_context(user_info, memories)

    current_datetime_str = get_current_llm_day_time(
        include_day_of_week=True, full_sentence=False
    )

    # Build system message with current date
    system_msg = SystemMessage(
        content=SEMANTIC_QUERY_REPHRASE_SYSTEM_PROMPT.format(
            current_date=current_datetime_str
        )
    )

    # Convert chat history to message format (excluding the last user message and everything after it)
    messages: list[ChatCompletionMessage] = [system_msg]
    messages.extend(_build_message_history(history[:last_user_message_idx]))

    # Add the last message as the user prompt with instructions
    final_user_msg = UserMessage(
        content=SEMANTIC_QUERY_REPHRASE_USER_PROMPT.format(
            additional_context=additional_context, user_query=user_query
        )
    )
    messages.append(final_user_msg)

    # Call LLM and return result with Braintrust tracing
    with llm_generation_span(
        llm=llm, flow="semantic_query_rephrase", input_messages=messages
    ) as span_generation:
        response = llm.invoke(prompt=messages, reasoning_effort=ReasoningEffort.OFF)
        record_llm_response(span_generation, response)
        final_query = response.choice.message.content

    if not final_query:
        # It's ok if some other queries fail, this one is likely the best one
        # It also can't fail in parsing so we should be able to guarantee a valid query here.
        raise RuntimeError("LLM failed to generate a rephrased query")

    return final_query


def keyword_query_expansion(
    history: list[ChatMinimalTextMessage],
    llm: LLM,
    user_info: str | None = None,
    memories: list[str] | None = None,
) -> list[str] | None:
    """Expand a query into multiple keyword-only queries using chat history context.

    Converts the user's query into a set of keyword-based search queries (max 3)
    that incorporate relevant context from the chat history and optional user
    information/memories. Returns a list of keyword queries.

    Args:
        history: Chat message history. Must contain at least one user message.
        llm: Language model to use for keyword expansion
        user_info: Optional user information for personalization
        memories: Optional user memories for personalization

    Returns:
        List of keyword-only query strings (max 3), or empty list if generation fails

    Raises:
        ValueError: If history is empty or contains no user messages
    """
    if not history:
        raise ValueError("History cannot be empty for keyword query expansion")

    # Find the last user message in the history
    last_user_message_idx = None
    for i in range(len(history) - 1, -1, -1):
        if history[i].message_type == MessageType.USER:
            last_user_message_idx = i
            break

    if last_user_message_idx is None:
        raise ValueError("History must contain at least one user message")

    # Extract the last user query
    user_query = history[last_user_message_idx].message

    # Build additional context section
    additional_context = _build_additional_context(user_info, memories)

    current_datetime_str = get_current_llm_day_time(
        include_day_of_week=True, full_sentence=False
    )

    # Build system message with current date
    system_msg = SystemMessage(
        content=KEYWORD_REPHRASE_SYSTEM_PROMPT.format(current_date=current_datetime_str)
    )

    # Convert chat history to message format (excluding the last user message and everything after it)
    messages: list[ChatCompletionMessage] = [system_msg]
    messages.extend(_build_message_history(history[:last_user_message_idx]))

    # Add the last message as the user prompt with instructions
    final_user_msg = UserMessage(
        content=KEYWORD_REPHRASE_USER_PROMPT.format(
            additional_context=additional_context, user_query=user_query
        )
    )
    messages.append(final_user_msg)

    # Call LLM and return result with Braintrust tracing
    with llm_generation_span(
        llm=llm, flow="keyword_query_expansion", input_messages=messages
    ) as span_generation:
        response = llm.invoke(prompt=messages, reasoning_effort=ReasoningEffort.OFF)
        record_llm_response(span_generation, response)
        content = response.choice.message.content

    # Parse the response - each line is a separate keyword query
    if not content:
        return []

    queries = [line.strip() for line in content.strip().split("\n") if line.strip()]
    return queries


================================================
FILE: backend/onyx/secondary_llm_flows/source_filter.py
================================================
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.llm.interfaces import LLM
from onyx.utils.logger import setup_logger

logger = setup_logger()


def strings_to_document_sources(source_strs: list[str]) -> list[DocumentSource]:
    sources = []
    for s in source_strs:
        try:
            sources.append(DocumentSource(s))
        except ValueError:
            logger.warning(f"Failed to translate {s} to a DocumentSource")
    return sources


def extract_source_filter(
    query: str, llm: LLM, db_session: Session
) -> list[DocumentSource] | None:
    # Can reference onyx/prompts/filter_extration.py for previous implementation prompts
    raise NotImplementedError("This function should not be getting called right now")


================================================
FILE: backend/onyx/secondary_llm_flows/time_filter.py
================================================
from datetime import datetime
from datetime import timezone

from dateutil.parser import parse

from onyx.llm.interfaces import LLM
from onyx.utils.logger import setup_logger

logger = setup_logger()


def best_match_time(time_str: str) -> datetime | None:
    preferred_formats = ["%m/%d/%Y", "%m-%d-%Y"]

    for fmt in preferred_formats:
        try:
            # As we don't know if the user is interacting with the API server from
            # the same timezone as the API server, just assume the queries are UTC time
            # the few hours offset (if any) shouldn't make any significant difference
            dt = datetime.strptime(time_str, fmt)
            return dt.replace(tzinfo=timezone.utc)
        except ValueError:
            continue

    # If the above formats don't match, try using dateutil's parser
    try:
        dt = parse(time_str)
        return (
            dt.astimezone(timezone.utc)
            if dt.tzinfo
            else dt.replace(tzinfo=timezone.utc)
        )
    except ValueError:
        return None


def extract_time_filter(query: str, llm: LLM) -> tuple[datetime | None, bool]:
    """Returns a datetime if a hard time filter should be applied for the given query
    Additionally returns a bool, True if more recently updated Documents should be
    heavily favored"""
    raise NotImplementedError("This function should not be getting called right now")


#     def _get_time_filter_messages(query: str) -> list[dict[str, str]]:
#         messages = [
#             {
#                 "role": "system",
#                 "content": TIME_FILTER_PROMPT.format(
#                     current_day_time_str=get_current_llm_day_time()
#                 ),
#             },
#             {
#                 "role": "user",
#                 "content": "What documents in Confluence were written in the last two quarters",
#             },
#             {
#                 "role": "assistant",
#                 "content": json.dumps(
#                     {
#                         "filter_type": "hard cutoff",
#                         "filter_value": "quarter",
#                         "value_multiple": 2,
#                     }
#                 ),
#             },
#             {"role": "user", "content": "What's the latest on project Corgies?"},
#             {
#                 "role": "assistant",
#                 "content": json.dumps({"filter_type": "favor recent"}),
#             },
#             {
#                 "role": "user",
#                 "content": "Which customer asked about security features in February of 2022?",
#             },
#             {
#                 "role": "assistant",
#                 "content": json.dumps(
#                     {"filter_type": "hard cutoff", "date": "02/01/2022"}
#                 ),
#             },
#             {"role": "user", "content": query},
#         ]
#         return messages

#     def _extract_time_filter_from_llm_out(
#         model_out: str,
#     ) -> tuple[datetime | None, bool]:
#         """Returns a datetime for a hard cutoff and a bool for if the"""
#         try:
#             model_json = json.loads(model_out, strict=False)
#         except json.JSONDecodeError:
#             return None, False

#         # If filter type is not present, just assume something has gone wrong
#         # Potentially model has identified a date and just returned that but
#         # better to be conservative and not identify the wrong filter.
#         if "filter_type" not in model_json:
#             return None, False

#         if "hard" in model_json["filter_type"] or "recent" in model_json["filter_type"]:
#             favor_recent = "recent" in model_json["filter_type"]

#             if "date" in model_json:
#                 extracted_time = best_match_time(model_json["date"])
#                 if extracted_time is not None:
#                     # LLM struggles to understand the concept of not sensitive within a time range
#                     # So if a time is extracted, just go with that alone
#                     return extracted_time, False

#             time_diff = None
#             multiplier = 1.0

#             if "value_multiple" in model_json:
#                 try:
#                     multiplier = float(model_json["value_multiple"])
#                 except ValueError:
#                     pass

#             if "filter_value" in model_json:
#                 filter_value = model_json["filter_value"]
#                 if "day" in filter_value:
#                     time_diff = timedelta(days=multiplier)
#                 elif "week" in filter_value:
#                     time_diff = timedelta(weeks=multiplier)
#                 elif "month" in filter_value:
#                     # Have to just use the average here, too complicated to calculate exact day
#                     # based on current day etc.
#                     time_diff = timedelta(days=multiplier * 30.437)
#                 elif "quarter" in filter_value:
#                     time_diff = timedelta(days=multiplier * 91.25)
#                 elif "year" in filter_value:
#                     time_diff = timedelta(days=multiplier * 365)

#             if time_diff is not None:
#                 current = datetime.now(timezone.utc)
#                 # LLM struggles to understand the concept of not sensitive within a time range
#                 # So if a time is extracted, just go with that alone
#                 return current - time_diff, False

#             # If we failed to extract a hard filter, just pass back the value of favor recent
#             return None, favor_recent

#         return None, False

#     messages = _get_time_filter_messages(query)
#     filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
#     model_output = message_to_string(llm.invoke_langchain(filled_llm_prompt))
#     logger.debug(model_output)

#     return _extract_time_filter_from_llm_out(model_output)


================================================
FILE: backend/onyx/seeding/__init__.py
================================================


================================================
FILE: backend/onyx/server/__init__.py
================================================


================================================
FILE: backend/onyx/server/api_key/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.db.api_key import ApiKeyDescriptor
from onyx.db.api_key import fetch_api_keys
from onyx.db.api_key import insert_api_key
from onyx.db.api_key import regenerate_api_key
from onyx.db.api_key import remove_api_key
from onyx.db.api_key import update_api_key
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.api_key.models import APIKeyArgs


router = APIRouter(prefix="/admin/api-key")


@router.get("")
def list_api_keys(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[ApiKeyDescriptor]:
    return fetch_api_keys(db_session)


@router.post("")
def create_api_key(
    api_key_args: APIKeyArgs,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> ApiKeyDescriptor:
    return insert_api_key(db_session, api_key_args, user.id)


@router.post("/{api_key_id}/regenerate")
def regenerate_existing_api_key(
    api_key_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> ApiKeyDescriptor:
    return regenerate_api_key(db_session, api_key_id)


@router.patch("/{api_key_id}")
def update_existing_api_key(
    api_key_id: int,
    api_key_args: APIKeyArgs,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> ApiKeyDescriptor:
    return update_api_key(db_session, api_key_id, api_key_args)


@router.delete("/{api_key_id}")
def delete_api_key(
    api_key_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    remove_api_key(db_session, api_key_id)


================================================
FILE: backend/onyx/server/api_key/models.py
================================================
from pydantic import BaseModel

from onyx.auth.schemas import UserRole


class APIKeyArgs(BaseModel):
    name: str | None = None
    role: UserRole = UserRole.BASIC


================================================
FILE: backend/onyx/server/api_key_usage.py
================================================
"""API key and PAT usage tracking for cloud usage limits."""

from fastapi import Depends
from fastapi import Request
from sqlalchemy.orm import Session

from onyx.auth.api_key import get_hashed_api_key_from_request
from onyx.auth.pat import get_hashed_pat_from_request
from onyx.db.engine.sql_engine import get_session
from onyx.db.usage import increment_usage
from onyx.db.usage import UsageType
from onyx.server.usage_limits import check_usage_and_raise
from onyx.server.usage_limits import is_usage_limits_enabled
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


def check_api_key_usage(
    request: Request,
    db_session: Session = Depends(get_session),
) -> None:
    """
    FastAPI dependency that checks and tracks API key/PAT usage limits.

    This should be added as a dependency to endpoints that accept API key
    or PAT authentication and should be usage-limited.
    """
    if not is_usage_limits_enabled():
        return

    # Check if request is authenticated via API key or PAT
    is_api_key_request = get_hashed_api_key_from_request(request) is not None
    is_pat_request = get_hashed_pat_from_request(request) is not None

    if not is_api_key_request and not is_pat_request:
        return

    tenant_id = get_current_tenant_id()

    # Check usage limit
    check_usage_and_raise(
        db_session=db_session,
        usage_type=UsageType.API_CALLS,
        tenant_id=tenant_id,
        pending_amount=1,
    )

    # Increment usage counter
    increment_usage(
        db_session=db_session,
        usage_type=UsageType.API_CALLS,
        amount=1,
    )
    db_session.commit()


================================================
FILE: backend/onyx/server/auth_check.py
================================================
from typing import cast

from fastapi import FastAPI
from fastapi.dependencies.models import Dependant
from starlette.routing import BaseRoute

from onyx.auth.users import current_admin_user
from onyx.auth.users import current_chat_accessible_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_limited_user
from onyx.auth.users import current_user
from onyx.auth.users import current_user_from_websocket
from onyx.auth.users import current_user_with_expired_token
from onyx.configs.app_configs import APP_API_PREFIX
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop


PUBLIC_ENDPOINT_SPECS = [
    # built-in documentation functions
    ("/openapi.json", {"GET", "HEAD"}),
    ("/docs", {"GET", "HEAD"}),
    ("/docs/oauth2-redirect", {"GET", "HEAD"}),
    ("/redoc", {"GET", "HEAD"}),
    # should always be callable, will just return 401 if not authenticated
    ("/me", {"GET"}),
    # just returns 200 to validate that the server is up
    ("/health", {"GET"}),
    # just returns auth type, needs to be accessible before the user is logged
    # in to determine what flow to give the user
    ("/auth/type", {"GET"}),
    # just gets the version of Onyx (e.g. 0.3.11)
    ("/version", {"GET"}),
    # Gets stable and beta versions for Onyx docker images
    ("/versions", {"GET"}),
    # stuff related to basic auth
    ("/auth/refresh", {"POST"}),
    ("/auth/register", {"POST"}),
    ("/auth/login", {"POST"}),
    ("/auth/logout", {"POST"}),
    ("/auth/forgot-password", {"POST"}),
    ("/auth/reset-password", {"POST"}),
    ("/auth/request-verify-token", {"POST"}),
    ("/auth/verify", {"POST"}),
    ("/users/me", {"GET"}),
    ("/users/me", {"PATCH"}),
    ("/users/{id}", {"GET"}),
    ("/users/{id}", {"PATCH"}),
    ("/users/{id}", {"DELETE"}),
    # oauth
    ("/auth/oauth/authorize", {"GET"}),
    ("/auth/oauth/callback", {"GET"}),
    # oidc
    ("/auth/oidc/authorize", {"GET"}),
    ("/auth/oidc/callback", {"GET"}),
    # saml
    ("/auth/saml/authorize", {"GET"}),
    ("/auth/saml/callback", {"POST"}),
    ("/auth/saml/callback", {"GET"}),
    ("/auth/saml/logout", {"POST"}),
    # anonymous user on cloud
    ("/tenants/anonymous-user", {"POST"}),
    ("/metrics", {"GET"}),  # added by prometheus_fastapi_instrumentator
    # craft webapp proxy — access enforced per-session via sharing_scope in handler
    ("/build/sessions/{session_id}/webapp", {"GET"}),
    ("/build/sessions/{session_id}/webapp/{path:path}", {"GET"}),
]


def is_route_in_spec_list(
    route: BaseRoute, public_endpoint_specs: list[tuple[str, set[str]]]
) -> bool:
    if not hasattr(route, "path") or not hasattr(route, "methods"):
        return False

    # try adding the prefix AND not adding the prefix, since some endpoints
    # are not prefixed (e.g. /openapi.json)
    if (route.path, route.methods) in public_endpoint_specs:
        return True

    processed_global_prefix = f"/{APP_API_PREFIX.strip('/')}" if APP_API_PREFIX else ""
    if not processed_global_prefix:
        return False

    for endpoint_spec in public_endpoint_specs:
        base_path, methods = endpoint_spec
        prefixed_path = f"{processed_global_prefix}/{base_path.strip('/')}"

        if prefixed_path == route.path and route.methods == methods:
            return True

    return False


def check_router_auth(
    application: FastAPI,
    public_endpoint_specs: list[tuple[str, set[str]]] = PUBLIC_ENDPOINT_SPECS,
) -> None:
    """Ensures that all endpoints on the passed in application either
    (1) have auth enabled OR
    (2) are explicitly marked as a public endpoint
    """

    control_plane_dep = fetch_ee_implementation_or_noop(
        "onyx.server.tenants.access", "control_plane_dep"
    )
    current_cloud_superuser = fetch_ee_implementation_or_noop(
        "onyx.auth.users", "current_cloud_superuser"
    )
    verify_scim_token = fetch_ee_implementation_or_noop(
        "onyx.server.scim.auth", "verify_scim_token"
    )

    for route in application.routes:
        # explicitly marked as public
        if is_route_in_spec_list(route, public_endpoint_specs):
            continue

        # check for auth
        found_auth = False
        route_dependant_obj = cast(
            Dependant | None, route.dependant if hasattr(route, "dependant") else None
        )
        if route_dependant_obj:
            for dependency in route_dependant_obj.dependencies:
                depends_fn = dependency.cache_key[0]
                if (
                    depends_fn == current_limited_user
                    or depends_fn == current_user
                    or depends_fn == current_admin_user
                    or depends_fn == current_curator_or_admin_user
                    or depends_fn == current_user_with_expired_token
                    or depends_fn == current_chat_accessible_user
                    or depends_fn == current_user_from_websocket
                    or depends_fn == control_plane_dep
                    or depends_fn == current_cloud_superuser
                    or depends_fn == verify_scim_token
                ):
                    found_auth = True
                    break

        if not found_auth:
            # uncomment to print out all route(s) that are missing auth
            # print(f"(\"{route.path}\", {set(route.methods)}),")

            raise RuntimeError(
                f"Did not find user dependency in private route - {route}"
            )


================================================
FILE: backend/onyx/server/documents/__init__.py
================================================


================================================
FILE: backend/onyx/server/documents/cc_pair.py
================================================
from datetime import datetime
from http import HTTPStatus

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi.responses import JSONResponse
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session

from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.background.celery.tasks.pruning.tasks import (
    try_creating_prune_generator_task,
)
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.background.indexing.models import IndexAttemptErrorPydantic
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.connectors.exceptions import ValidationError
from onyx.connectors.factory import validate_ccpair_for_user
from onyx.db.connector import delete_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.connector_credential_pair import (
    get_connector_credential_pair_from_id_for_user,
)
from onyx.db.connector_credential_pair import remove_credential_from_connector
from onyx.db.connector_credential_pair import (
    update_connector_credential_pair_from_id,
)
from onyx.db.connector_credential_pair import verify_user_has_access_to_cc_pair
from onyx.db.document import get_document_counts_for_cc_pairs
from onyx.db.document import get_documents_for_cc_pair
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import PermissionSyncStatus
from onyx.db.index_attempt import count_index_attempt_errors_for_cc_pair
from onyx.db.index_attempt import count_index_attempts_for_cc_pair
from onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair
from onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
from onyx.db.index_attempt import (
    get_latest_successful_index_attempt_for_cc_pair_id,
)
from onyx.db.index_attempt import get_paginated_index_attempts_for_cc_pair_id
from onyx.db.indexing_coordination import IndexingCoordination
from onyx.db.models import IndexAttempt
from onyx.db.models import User
from onyx.db.permission_sync_attempt import (
    get_latest_doc_permission_sync_attempt_for_cc_pair,
)
from onyx.db.permission_sync_attempt import (
    get_recent_doc_permission_sync_attempts_for_cc_pair,
)
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_utils import get_deletion_attempt_snapshot
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.models import CCPairFullInfo
from onyx.server.documents.models import CCPropertyUpdateRequest
from onyx.server.documents.models import CCStatusUpdateRequest
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
from onyx.server.documents.models import ConnectorCredentialPairMetadata
from onyx.server.documents.models import DocumentSyncStatus
from onyx.server.documents.models import IndexAttemptSnapshot
from onyx.server.documents.models import PaginatedReturn
from onyx.server.documents.models import PermissionSyncAttemptSnapshot
from onyx.server.models import StatusResponse
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()
router = APIRouter(prefix="/manage")


@router.get("/admin/cc-pair/{cc_pair_id}/index-attempts", tags=PUBLIC_API_TAGS)
def get_cc_pair_index_attempts(
    cc_pair_id: int,
    page_num: int = Query(0, ge=0),
    page_size: int = Query(10, ge=1, le=1000),
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> PaginatedReturn[IndexAttemptSnapshot]:
    if user:
        user_has_access = verify_user_has_access_to_cc_pair(
            cc_pair_id, db_session, user, get_editable=False
        )
        if not user_has_access:
            raise HTTPException(
                status_code=400, detail="CC Pair not found for current user permissions"
            )

    total_count = count_index_attempts_for_cc_pair(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
    )
    index_attempts = get_paginated_index_attempts_for_cc_pair_id(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
        page=page_num,
        page_size=page_size,
    )
    return PaginatedReturn(
        items=[
            IndexAttemptSnapshot.from_index_attempt_db_model(index_attempt)
            for index_attempt in index_attempts
        ],
        total_items=total_count,
    )


@router.get("/admin/cc-pair/{cc_pair_id}/permission-sync-attempts")
def get_cc_pair_permission_sync_attempts(
    cc_pair_id: int,
    page_num: int = Query(0, ge=0),
    page_size: int = Query(10, ge=1, le=1000),
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> PaginatedReturn[PermissionSyncAttemptSnapshot]:
    if user:
        user_has_access = verify_user_has_access_to_cc_pair(
            cc_pair_id, db_session, user, get_editable=False
        )
        if not user_has_access:
            raise HTTPException(
                status_code=400, detail="CC Pair not found for current user permissions"
            )

    # Get all permission sync attempts for this cc pair
    all_attempts = get_recent_doc_permission_sync_attempts_for_cc_pair(
        cc_pair_id=cc_pair_id,
        limit=1000,
        db_session=db_session,
    )

    start_idx = page_num * page_size
    end_idx = start_idx + page_size
    paginated_attempts = all_attempts[start_idx:end_idx]
    items = [
        PermissionSyncAttemptSnapshot.from_permission_sync_attempt_db_model(attempt)
        for attempt in paginated_attempts
    ]

    return PaginatedReturn(
        items=items,
        total_items=len(all_attempts),
    )


@router.get("/admin/cc-pair/{cc_pair_id}", tags=PUBLIC_API_TAGS)
def get_cc_pair_full_info(
    cc_pair_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> CCPairFullInfo:
    tenant_id = get_current_tenant_id()

    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id, db_session, user, get_editable=False
    )
    if not cc_pair:
        raise HTTPException(
            status_code=404, detail="CC Pair not found for current user permissions"
        )
    editable_cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id, db_session, user, get_editable=True
    )
    is_editable_for_current_user = editable_cc_pair is not None

    document_count_info_list = list(
        get_document_counts_for_cc_pairs(
            db_session=db_session,
            cc_pairs=[
                ConnectorCredentialPairIdentifier(
                    connector_id=cc_pair.connector_id,
                    credential_id=cc_pair.credential_id,
                )
            ],
        )
    )
    documents_indexed = (
        document_count_info_list[0][-1] if document_count_info_list else 0
    )

    latest_attempt = get_latest_index_attempt_for_cc_pair_id(
        db_session=db_session,
        connector_credential_pair_id=cc_pair_id,
        secondary_index=False,
        only_finished=False,
    )

    latest_successful_attempt = get_latest_successful_index_attempt_for_cc_pair_id(
        db_session=db_session,
        connector_credential_pair_id=cc_pair_id,
    )

    # Get latest permission sync attempt for status
    latest_permission_sync_attempt = None
    if cc_pair.access_type == AccessType.SYNC:
        latest_permission_sync_attempt = (
            get_latest_doc_permission_sync_attempt_for_cc_pair(
                db_session=db_session,
                connector_credential_pair_id=cc_pair_id,
            )
        )

    return CCPairFullInfo.from_models(
        cc_pair_model=cc_pair,
        number_of_index_attempts=count_index_attempts_for_cc_pair(
            db_session=db_session,
            cc_pair_id=cc_pair_id,
        ),
        last_index_attempt=latest_attempt,
        last_successful_index_time=(
            latest_successful_attempt.time_started
            if latest_successful_attempt
            else None
        ),
        latest_deletion_attempt=get_deletion_attempt_snapshot(
            connector_id=cc_pair.connector_id,
            credential_id=cc_pair.credential_id,
            db_session=db_session,
            tenant_id=tenant_id,
        ),
        num_docs_indexed=documents_indexed,
        is_editable_for_current_user=is_editable_for_current_user,
        indexing=bool(
            latest_attempt and latest_attempt.status == IndexingStatus.IN_PROGRESS
        ),
        last_permission_sync_attempt_status=(
            latest_permission_sync_attempt.status
            if latest_permission_sync_attempt
            else None
        ),
        permission_syncing=bool(
            latest_permission_sync_attempt
            and latest_permission_sync_attempt.status
            == PermissionSyncStatus.IN_PROGRESS
        ),
        last_permission_sync_attempt_finished=(
            latest_permission_sync_attempt.time_finished
            if latest_permission_sync_attempt
            else None
        ),
        last_permission_sync_attempt_error_message=(
            latest_permission_sync_attempt.error_message
            if latest_permission_sync_attempt
            else None
        ),
    )


@router.put("/admin/cc-pair/{cc_pair_id}/status", tags=PUBLIC_API_TAGS)
def update_cc_pair_status(
    cc_pair_id: int,
    status_update_request: CCStatusUpdateRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> JSONResponse:
    """This method returns nearly immediately. It simply sets some signals and
    optimistically assumes any running background processes will clean themselves up.
    This is done to improve the perceived end user experience.

    Returns HTTPStatus.OK if everything finished.
    """
    tenant_id = get_current_tenant_id()

    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id=cc_pair_id,
        db_session=db_session,
        user=user,
        get_editable=True,
    )

    if not cc_pair:
        raise HTTPException(
            status_code=400,
            detail="Connection not found for current user's permissions",
        )

    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    if status_update_request.status == ConnectorCredentialPairStatus.PAUSED:
        redis_connector.stop.set_fence(True)

        # Request cancellation for any active indexing attempts for this cc_pair
        active_attempts = (
            db_session.execute(
                select(IndexAttempt).where(
                    IndexAttempt.connector_credential_pair_id == cc_pair_id,
                    IndexAttempt.status.in_(
                        [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
                    ),
                )
            )
            .scalars()
            .all()
        )

        for attempt in active_attempts:
            try:
                IndexingCoordination.request_cancellation(db_session, attempt.id)
                # Revoke the task to prevent it from running
                if attempt.celery_task_id:
                    client_app.control.revoke(attempt.celery_task_id)
                logger.info(
                    f"Requested cancellation for active indexing attempt {attempt.id} "
                    f"due to connector pause: cc_pair={cc_pair_id}"
                )
            except Exception:
                logger.exception(
                    f"Failed to request cancellation for indexing attempt {attempt.id}"
                )

    else:
        redis_connector.stop.set_fence(False)

    update_connector_credential_pair_from_id(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
        status=status_update_request.status,
    )

    db_session.commit()

    # this speeds up the start of indexing by firing the check immediately
    client_app.send_task(
        OnyxCeleryTask.CHECK_FOR_INDEXING,
        kwargs=dict(tenant_id=tenant_id),
        priority=OnyxCeleryPriority.HIGH,
    )

    return JSONResponse(
        status_code=HTTPStatus.OK, content={"message": str(HTTPStatus.OK)}
    )


@router.put("/admin/cc-pair/{cc_pair_id}/name")
def update_cc_pair_name(
    cc_pair_id: int,
    new_name: str,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id=cc_pair_id,
        db_session=db_session,
        user=user,
        get_editable=True,
    )
    if not cc_pair:
        raise HTTPException(
            status_code=400, detail="CC Pair not found for current user's permissions"
        )

    try:
        cc_pair.name = new_name
        db_session.commit()
        return StatusResponse(
            success=True, message="Name updated successfully", data=cc_pair_id
        )
    except IntegrityError:
        db_session.rollback()
        raise HTTPException(status_code=400, detail="Name must be unique")


@router.put("/admin/cc-pair/{cc_pair_id}/property")
def update_cc_pair_property(
    cc_pair_id: int,
    update_request: CCPropertyUpdateRequest,  # in seconds
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id=cc_pair_id,
        db_session=db_session,
        user=user,
        get_editable=True,
    )
    if not cc_pair:
        raise HTTPException(
            status_code=400, detail="CC Pair not found for current user's permissions"
        )

    # Can we centralize logic for updating connector properties
    # so that we don't need to manually validate everywhere?
    if update_request.name == "refresh_frequency":
        cc_pair.connector.refresh_freq = int(update_request.value)
        cc_pair.connector.validate_refresh_freq()
        db_session.commit()

        msg = "Refresh frequency updated successfully"
    elif update_request.name == "pruning_frequency":
        cc_pair.connector.prune_freq = int(update_request.value)
        cc_pair.connector.validate_prune_freq()
        db_session.commit()

        msg = "Pruning frequency updated successfully"
    else:
        raise HTTPException(
            status_code=400, detail=f"Property name {update_request.name} is not valid."
        )

    return StatusResponse(success=True, message=msg, data=cc_pair_id)


@router.get("/admin/cc-pair/{cc_pair_id}/last_pruned")
def get_cc_pair_last_pruned(
    cc_pair_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> datetime | None:
    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id=cc_pair_id,
        db_session=db_session,
        user=user,
        get_editable=False,
    )
    if not cc_pair:
        raise HTTPException(
            status_code=400,
            detail="cc_pair not found for current user's permissions",
        )

    return cc_pair.last_pruned


@router.post("/admin/cc-pair/{cc_pair_id}/prune", tags=PUBLIC_API_TAGS)
def prune_cc_pair(
    cc_pair_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse[list[int]]:
    """Triggers pruning on a particular cc_pair immediately"""
    tenant_id = get_current_tenant_id()

    cc_pair = get_connector_credential_pair_from_id_for_user(
        cc_pair_id=cc_pair_id,
        db_session=db_session,
        user=user,
        get_editable=False,
    )
    if not cc_pair:
        raise HTTPException(
            status_code=400,
            detail="Connection not found for current user's permissions",
        )

    r = get_redis_client()

    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    if redis_connector.prune.fenced:
        raise HTTPException(
            status_code=HTTPStatus.CONFLICT,
            detail="Pruning task already in progress.",
        )

    logger.info(
        f"Pruning cc_pair: cc_pair={cc_pair_id} "
        f"connector={cc_pair.connector_id} "
        f"credential={cc_pair.credential_id} "
        f"{cc_pair.connector.name} connector."
    )
    payload_id = try_creating_prune_generator_task(
        client_app, cc_pair, db_session, r, tenant_id
    )
    if not payload_id:
        raise HTTPException(
            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
            detail="Pruning task creation failed.",
        )

    logger.info(f"Pruning queued: cc_pair={cc_pair.id} id={payload_id}")

    return StatusResponse(
        success=True,
        message="Successfully created the pruning task.",
    )


@router.get("/admin/cc-pair/{cc_pair_id}/get-docs-sync-status")
def get_docs_sync_status(
    cc_pair_id: int,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> list[DocumentSyncStatus]:
    all_docs_for_cc_pair = get_documents_for_cc_pair(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
    )
    return [DocumentSyncStatus.from_model(doc) for doc in all_docs_for_cc_pair]


@router.get("/admin/cc-pair/{cc_pair_id}/errors", tags=PUBLIC_API_TAGS)
def get_cc_pair_indexing_errors(
    cc_pair_id: int,
    include_resolved: bool = Query(False),
    page_num: int = Query(0, ge=0),
    page_size: int = Query(10, ge=1, le=100),
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> PaginatedReturn[IndexAttemptErrorPydantic]:
    """Gives back all errors for a given CC Pair. Allows pagination based on page and page_size params.

    Args:
        cc_pair_id: ID of the connector-credential pair to get errors for
        include_resolved: Whether to include resolved errors in the results
        page_num: Page number for pagination, starting at 0
        page_size: Number of errors to return per page
        _: Current user, must be curator or admin
        db_session: Database session

    Returns:
        Paginated list of indexing errors for the CC pair.
    """
    total_count = count_index_attempt_errors_for_cc_pair(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
        unresolved_only=not include_resolved,
    )

    index_attempt_errors = get_index_attempt_errors_for_cc_pair(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
        unresolved_only=not include_resolved,
        page=page_num,
        page_size=page_size,
    )
    return PaginatedReturn(
        items=[IndexAttemptErrorPydantic.from_model(e) for e in index_attempt_errors],
        total_items=total_count,
    )


@router.put(
    "/connector/{connector_id}/credential/{credential_id}", tags=PUBLIC_API_TAGS
)
def associate_credential_to_connector(
    connector_id: int,
    credential_id: int,
    metadata: ConnectorCredentialPairMetadata,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
    tenant_id: str = Depends(get_current_tenant_id),
) -> StatusResponse[int]:
    """NOTE(rkuo): internally discussed and the consensus is this endpoint
    and create_connector_with_mock_credential should be combined.

    The intent of this endpoint is to handle connectors that actually need credentials.
    """

    fetch_ee_implementation_or_noop(
        "onyx.db.user_group", "validate_object_creation_for_user", None
    )(
        db_session=db_session,
        user=user,
        target_group_ids=metadata.groups,
        object_is_public=metadata.access_type == AccessType.PUBLIC,
        object_is_perm_sync=metadata.access_type == AccessType.SYNC,
        object_is_new=True,
    )

    try:
        validate_ccpair_for_user(
            connector_id, credential_id, metadata.access_type, db_session
        )

        response = add_credential_to_connector(
            db_session=db_session,
            user=user,
            connector_id=connector_id,
            credential_id=credential_id,
            cc_pair_name=metadata.name,
            access_type=metadata.access_type,
            auto_sync_options=metadata.auto_sync_options,
            groups=metadata.groups,
            processing_mode=metadata.processing_mode,
        )

        # trigger indexing immediately
        client_app.send_task(
            OnyxCeleryTask.CHECK_FOR_INDEXING,
            priority=OnyxCeleryPriority.HIGH,
            kwargs={"tenant_id": tenant_id},
        )

        logger.info(
            f"associate_credential_to_connector - running check_for_indexing: cc_pair={response.data}"
        )

        return response
    except ValidationError as e:
        # If validation fails, delete the connector and commit the changes
        # Ensures we don't leave invalid connectors in the database
        # NOTE: consensus is that it makes sense to unify connector and ccpair creation flows
        # which would rid us of needing to handle cases like these
        delete_connector(db_session, connector_id)
        db_session.commit()

        raise HTTPException(
            status_code=400, detail="Connector validation error: " + str(e)
        )
    except IntegrityError as e:
        logger.error(f"IntegrityError: {e}")
        delete_connector(db_session, connector_id)
        db_session.commit()

        raise HTTPException(status_code=400, detail="Name must be unique")

    except Exception as e:
        logger.exception(f"Unexpected error: {e}")

        raise HTTPException(status_code=500, detail="Unexpected error")


@router.delete(
    "/connector/{connector_id}/credential/{credential_id}", tags=PUBLIC_API_TAGS
)
def dissociate_credential_from_connector(
    connector_id: int,
    credential_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
    return remove_credential_from_connector(
        connector_id, credential_id, user, db_session
    )


================================================
FILE: backend/onyx/server/documents/connector.py
================================================
import json
import math
import mimetypes
import os
import zipfile
from datetime import datetime
from io import BytesIO
from typing import Any
from typing import cast

from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Form
from fastapi import HTTPException
from fastapi import Query
from fastapi import Request
from fastapi import Response
from fastapi import UploadFile
from google.oauth2.credentials import Credentials
from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.auth.email_utils import send_email
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_chat_accessible_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.background.celery.tasks.pruning.tasks import (
    try_creating_prune_generator_task,
)
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.app_configs import EMAIL_CONFIGURED
from onyx.configs.app_configs import ENABLED_CONNECTOR_TYPES
from onyx.configs.app_configs import MOCK_CONNECTOR_FILE_PATH
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import ONYX_METADATA_FILENAME
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.factory import validate_ccpair_for_user
from onyx.connectors.google_utils.google_auth import (
    get_google_oauth_creds,
)
from onyx.connectors.google_utils.google_kv import (
    build_service_account_creds,
)
from onyx.connectors.google_utils.google_kv import (
    delete_google_app_cred,
)
from onyx.connectors.google_utils.google_kv import (
    delete_service_account_key,
)
from onyx.connectors.google_utils.google_kv import get_auth_url
from onyx.connectors.google_utils.google_kv import (
    get_google_app_cred,
)
from onyx.connectors.google_utils.google_kv import (
    get_service_account_key,
)
from onyx.connectors.google_utils.google_kv import (
    update_credential_access_tokens,
)
from onyx.connectors.google_utils.google_kv import (
    upsert_google_app_cred,
)
from onyx.connectors.google_utils.google_kv import (
    upsert_service_account_key,
)
from onyx.connectors.google_utils.google_kv import verify_csrf
from onyx.connectors.google_utils.shared_constants import DB_CREDENTIALS_DICT_TOKEN_KEY
from onyx.connectors.google_utils.shared_constants import (
    GoogleOAuthAuthenticationMethod,
)
from onyx.db.connector import create_connector
from onyx.db.connector import delete_connector
from onyx.db.connector import fetch_connector_by_id
from onyx.db.connector import fetch_connectors
from onyx.db.connector import fetch_unique_document_sources
from onyx.db.connector import get_connector_credential_ids
from onyx.db.connector import mark_ccpair_with_indexing_trigger
from onyx.db.connector import update_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.connector_credential_pair import (
    fetch_connector_credential_pair_for_connector,
)
from onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids
from onyx.db.connector_credential_pair import get_connector_credential_pair
from onyx.db.connector_credential_pair import get_connector_credential_pairs_for_user
from onyx.db.connector_credential_pair import (
    get_connector_credential_pairs_for_user_parallel,
)
from onyx.db.connector_credential_pair import verify_user_has_access_to_cc_pair
from onyx.db.credentials import cleanup_gmail_credentials
from onyx.db.credentials import cleanup_google_drive_credentials
from onyx.db.credentials import create_credential
from onyx.db.credentials import delete_service_account_credentials
from onyx.db.credentials import fetch_credential_by_id_for_user
from onyx.db.deletion_attempt import check_deletion_attempt_is_allowed
from onyx.db.document import get_document_counts_for_all_cc_pairs
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingMode
from onyx.db.enums import ProcessingMode
from onyx.db.federated import fetch_all_federated_connectors_parallel
from onyx.db.index_attempt import get_index_attempts_for_cc_pair
from onyx.db.index_attempt import get_latest_index_attempts_by_status
from onyx.db.index_attempt import get_latest_index_attempts_parallel
from onyx.db.index_attempt import (
    get_latest_successful_index_attempts_parallel,
)
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import FederatedConnector
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexingStatus
from onyx.db.models import User
from onyx.db.models import UserRole
from onyx.file_processing.file_types import PLAIN_TEXT_MIME_TYPE
from onyx.file_processing.file_types import WORD_PROCESSING_MIME_TYPE
from onyx.file_store.file_store import FileStore
from onyx.file_store.file_store import get_default_file_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.models import AuthStatus
from onyx.server.documents.models import AuthUrl
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
from onyx.server.documents.models import ConnectorFileInfo
from onyx.server.documents.models import ConnectorFilesResponse
from onyx.server.documents.models import ConnectorIndexingStatusLite
from onyx.server.documents.models import ConnectorIndexingStatusLiteResponse
from onyx.server.documents.models import ConnectorRequestSubmission
from onyx.server.documents.models import ConnectorSnapshot
from onyx.server.documents.models import ConnectorStatus
from onyx.server.documents.models import ConnectorUpdateRequest
from onyx.server.documents.models import CredentialBase
from onyx.server.documents.models import CredentialSnapshot
from onyx.server.documents.models import DocsCountOperator
from onyx.server.documents.models import FailedConnectorIndexingStatus
from onyx.server.documents.models import FileUploadResponse
from onyx.server.documents.models import GDriveCallback
from onyx.server.documents.models import GmailCallback
from onyx.server.documents.models import GoogleAppCredentials
from onyx.server.documents.models import GoogleServiceAccountCredentialRequest
from onyx.server.documents.models import GoogleServiceAccountKey
from onyx.server.documents.models import IndexedSourcesResponse
from onyx.server.documents.models import IndexingStatusRequest
from onyx.server.documents.models import ObjectCreationIdResponse
from onyx.server.documents.models import RunConnectorRequest
from onyx.server.documents.models import SourceSummary
from onyx.server.federated.models import FederatedConnectorStatus
from onyx.server.models import StatusResponse
from onyx.server.utils_vector_db import require_vector_db
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import mt_cloud_telemetry
from onyx.utils.threadpool_concurrency import CallableProtocol
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

_GMAIL_CREDENTIAL_ID_COOKIE_NAME = "gmail_credential_id"
_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME = "google_drive_credential_id"
_INDEXING_STATUS_PAGE_SIZE = 10

SEEN_ZIP_DETAIL = "Only one zip file is allowed per file connector, \
use the ingestion APIs for multiple files"

router = APIRouter(prefix="/manage", dependencies=[Depends(require_vector_db)])


"""Admin only API endpoints"""


@router.get("/admin/connector/gmail/app-credential")
def check_google_app_gmail_credentials_exist(
    _: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
    try:
        return {"client_id": get_google_app_cred(DocumentSource.GMAIL).web.client_id}
    except KvKeyNotFoundError:
        raise HTTPException(status_code=404, detail="Google App Credentials not found")


@router.put("/admin/connector/gmail/app-credential")
def upsert_google_app_gmail_credentials(
    app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user)
) -> StatusResponse:
    try:
        upsert_google_app_cred(app_credentials, DocumentSource.GMAIL)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    return StatusResponse(
        success=True, message="Successfully saved Google App Credentials"
    )


@router.delete("/admin/connector/gmail/app-credential")
def delete_google_app_gmail_credentials(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    try:
        delete_google_app_cred(DocumentSource.GMAIL)
        cleanup_gmail_credentials(db_session=db_session)
    except KvKeyNotFoundError as e:
        raise HTTPException(status_code=400, detail=str(e))

    return StatusResponse(
        success=True, message="Successfully deleted Google App Credentials"
    )


@router.get("/admin/connector/google-drive/app-credential")
def check_google_app_credentials_exist(
    _: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
    try:
        return {
            "client_id": get_google_app_cred(DocumentSource.GOOGLE_DRIVE).web.client_id
        }
    except KvKeyNotFoundError:
        raise HTTPException(status_code=404, detail="Google App Credentials not found")


@router.put("/admin/connector/google-drive/app-credential")
def upsert_google_app_credentials(
    app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user)
) -> StatusResponse:
    try:
        upsert_google_app_cred(app_credentials, DocumentSource.GOOGLE_DRIVE)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    return StatusResponse(
        success=True, message="Successfully saved Google App Credentials"
    )


@router.delete("/admin/connector/google-drive/app-credential")
def delete_google_app_credentials(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    try:
        delete_google_app_cred(DocumentSource.GOOGLE_DRIVE)
        cleanup_google_drive_credentials(db_session=db_session)
    except KvKeyNotFoundError as e:
        raise HTTPException(status_code=400, detail=str(e))

    return StatusResponse(
        success=True, message="Successfully deleted Google App Credentials"
    )


@router.get("/admin/connector/gmail/service-account-key")
def check_google_service_gmail_account_key_exist(
    _: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
    try:
        return {
            "service_account_email": get_service_account_key(
                DocumentSource.GMAIL
            ).client_email
        }
    except KvKeyNotFoundError:
        raise HTTPException(
            status_code=404, detail="Google Service Account Key not found"
        )


@router.put("/admin/connector/gmail/service-account-key")
def upsert_google_service_gmail_account_key(
    service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user)
) -> StatusResponse:
    try:
        upsert_service_account_key(service_account_key, DocumentSource.GMAIL)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    return StatusResponse(
        success=True, message="Successfully saved Google Service Account Key"
    )


@router.delete("/admin/connector/gmail/service-account-key")
def delete_google_service_gmail_account_key(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    try:
        delete_service_account_key(DocumentSource.GMAIL)
        cleanup_gmail_credentials(db_session=db_session)
    except KvKeyNotFoundError as e:
        raise HTTPException(status_code=400, detail=str(e))

    return StatusResponse(
        success=True, message="Successfully deleted Google Service Account Key"
    )


@router.get("/admin/connector/google-drive/service-account-key")
def check_google_service_account_key_exist(
    _: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
    try:
        return {
            "service_account_email": get_service_account_key(
                DocumentSource.GOOGLE_DRIVE
            ).client_email
        }
    except KvKeyNotFoundError:
        raise HTTPException(
            status_code=404, detail="Google Service Account Key not found"
        )


@router.put("/admin/connector/google-drive/service-account-key")
def upsert_google_service_account_key(
    service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user)
) -> StatusResponse:
    try:
        upsert_service_account_key(service_account_key, DocumentSource.GOOGLE_DRIVE)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    return StatusResponse(
        success=True, message="Successfully saved Google Service Account Key"
    )


@router.delete("/admin/connector/google-drive/service-account-key")
def delete_google_service_account_key(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    try:
        delete_service_account_key(DocumentSource.GOOGLE_DRIVE)
        cleanup_google_drive_credentials(db_session=db_session)
    except KvKeyNotFoundError as e:
        raise HTTPException(status_code=400, detail=str(e))

    return StatusResponse(
        success=True, message="Successfully deleted Google Service Account Key"
    )


@router.put("/admin/connector/google-drive/service-account-credential")
def upsert_service_account_credential(
    service_account_credential_request: GoogleServiceAccountCredentialRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
    """Special API which allows the creation of a credential for a service account.
    Combines the input with the saved service account key to create an entry in the
    `Credential` table."""
    try:
        credential_base = build_service_account_creds(
            DocumentSource.GOOGLE_DRIVE,
            primary_admin_email=service_account_credential_request.google_primary_admin,
            name="Service Account (uploaded)",
        )
    except KvKeyNotFoundError as e:
        raise HTTPException(status_code=400, detail=str(e))

    # first delete all existing service account credentials
    delete_service_account_credentials(user, db_session, DocumentSource.GOOGLE_DRIVE)
    # `user=None` since this credential is not a personal credential
    credential = create_credential(
        credential_data=credential_base, user=user, db_session=db_session
    )
    return ObjectCreationIdResponse(id=credential.id)


@router.put("/admin/connector/gmail/service-account-credential")
def upsert_gmail_service_account_credential(
    service_account_credential_request: GoogleServiceAccountCredentialRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
    """Special API which allows the creation of a credential for a service account.
    Combines the input with the saved service account key to create an entry in the
    `Credential` table."""
    try:
        credential_base = build_service_account_creds(
            DocumentSource.GMAIL,
            primary_admin_email=service_account_credential_request.google_primary_admin,
        )
    except KvKeyNotFoundError as e:
        raise HTTPException(status_code=400, detail=str(e))

    # first delete all existing service account credentials
    delete_service_account_credentials(user, db_session, DocumentSource.GMAIL)
    # `user=None` since this credential is not a personal credential
    credential = create_credential(
        credential_data=credential_base, user=user, db_session=db_session
    )
    return ObjectCreationIdResponse(id=credential.id)


@router.get("/admin/connector/google-drive/check-auth/{credential_id}")
def check_drive_tokens(
    credential_id: int,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> AuthStatus:
    db_credentials = fetch_credential_by_id_for_user(credential_id, user, db_session)
    if not db_credentials or not db_credentials.credential_json:
        return AuthStatus(authenticated=False)

    credential_json = db_credentials.credential_json.get_value(apply_mask=False)
    if DB_CREDENTIALS_DICT_TOKEN_KEY not in credential_json:
        return AuthStatus(authenticated=False)
    token_json_str = str(credential_json[DB_CREDENTIALS_DICT_TOKEN_KEY])
    google_drive_creds = get_google_oauth_creds(
        token_json_str=token_json_str,
        source=DocumentSource.GOOGLE_DRIVE,
    )
    if google_drive_creds is None:
        return AuthStatus(authenticated=False)
    return AuthStatus(authenticated=True)


def save_zip_metadata_to_file_store(
    zf: zipfile.ZipFile, file_store: FileStore
) -> str | None:
    """
    Extract .onyx_metadata.json from zip and save to file store.
    Returns the file_id or None if no metadata file exists.
    """
    try:
        metadata_file_info = zf.getinfo(ONYX_METADATA_FILENAME)
        with zf.open(metadata_file_info, "r") as metadata_file:
            metadata_bytes = metadata_file.read()

            # Validate that it's valid JSON before saving
            try:
                json.loads(metadata_bytes)
            except json.JSONDecodeError as e:
                logger.warning(f"Unable to load {ONYX_METADATA_FILENAME}: {e}")
                raise HTTPException(
                    status_code=400,
                    detail=f"Unable to load {ONYX_METADATA_FILENAME}: {e}",
                )

            # Save to file store
            file_id = file_store.save_file(
                content=BytesIO(metadata_bytes),
                display_name=ONYX_METADATA_FILENAME,
                file_origin=FileOrigin.CONNECTOR_METADATA,
                file_type="application/json",
            )
            return file_id
    except KeyError:
        logger.info(f"No {ONYX_METADATA_FILENAME} file")
        return None


def is_zip_file(file: UploadFile) -> bool:
    """
    Check if the file is a zip file by content type or filename.
    """
    return bool(
        (
            file.content_type
            and file.content_type.startswith(
                (
                    "application/zip",
                    "application/x-zip-compressed",  # May be this in Windows
                    "application/x-zip",
                    "multipart/x-zip",
                )
            )
        )
        or (file.filename and file.filename.lower().endswith(".zip"))
    )


def upload_files(
    files: list[UploadFile],
    file_origin: FileOrigin = FileOrigin.CONNECTOR,
    unzip: bool = True,
) -> FileUploadResponse:

    # Skip directories and known macOS metadata entries
    def should_process_file(file_path: str) -> bool:
        normalized_path = os.path.normpath(file_path)
        return not any(part.startswith(".") for part in normalized_path.split(os.sep))

    deduped_file_paths = []
    deduped_file_names = []
    zip_metadata_file_id: str | None = None
    try:
        file_store = get_default_file_store()
        seen_zip = False
        for file in files:
            if not file.filename:
                logger.warning("File has no filename, skipping")
                continue

            if is_zip_file(file):
                if seen_zip:
                    raise HTTPException(status_code=400, detail=SEEN_ZIP_DETAIL)
                seen_zip = True

                # Validate the zip by opening it (catches corrupt/non-zip files)
                with zipfile.ZipFile(file.file, "r") as zf:
                    if unzip:
                        zip_metadata_file_id = save_zip_metadata_to_file_store(
                            zf, file_store
                        )
                        for file_info in zf.namelist():
                            if zf.getinfo(file_info).is_dir():
                                continue

                            if not should_process_file(file_info):
                                continue

                            sub_file_bytes = zf.read(file_info)

                            mime_type, __ = mimetypes.guess_type(file_info)
                            if mime_type is None:
                                mime_type = "application/octet-stream"

                            file_id = file_store.save_file(
                                content=BytesIO(sub_file_bytes),
                                display_name=os.path.basename(file_info),
                                file_origin=file_origin,
                                file_type=mime_type,
                            )
                            deduped_file_paths.append(file_id)
                            deduped_file_names.append(os.path.basename(file_info))
                        continue

                # Store the zip as-is (unzip=False)
                file.file.seek(0)
                file_id = file_store.save_file(
                    content=file.file,
                    display_name=file.filename,
                    file_origin=file_origin,
                    file_type=file.content_type or "application/zip",
                )
                deduped_file_paths.append(file_id)
                deduped_file_names.append(file.filename)
                continue

            # Since we can't render docx files in the UI,
            # we store them in the file store as plain text
            if file.content_type == WORD_PROCESSING_MIME_TYPE:
                # Lazy load to avoid importing markitdown when not needed
                from onyx.file_processing.extract_file_text import read_docx_file

                text, _ = read_docx_file(file.file, file.filename)
                file_id = file_store.save_file(
                    content=BytesIO(text.encode("utf-8")),
                    display_name=file.filename,
                    file_origin=file_origin,
                    file_type=PLAIN_TEXT_MIME_TYPE,
                )

            else:
                file_id = file_store.save_file(
                    content=file.file,
                    display_name=file.filename,
                    file_origin=file_origin,
                    file_type=file.content_type or "text/plain",
                )
            deduped_file_paths.append(file_id)
            deduped_file_names.append(file.filename)

    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return FileUploadResponse(
        file_paths=deduped_file_paths,
        file_names=deduped_file_names,
        zip_metadata_file_id=zip_metadata_file_id,
    )


def _normalize_file_names_for_backwards_compatibility(
    file_locations: list[str], file_names: list[str]
) -> list[str]:
    """
    Ensures file_names list is the same length as file_locations for backwards compatibility.
    In legacy data, file_names might not exist or be shorter than file_locations.
    If file_names is shorter, pads it with corresponding file_locations values.
    """
    return file_names + file_locations[len(file_names) :]


def _fetch_and_check_file_connector_cc_pair_permissions(
    connector_id: int,
    user: User,
    db_session: Session,
    require_editable: bool,
) -> ConnectorCredentialPair:
    cc_pair = fetch_connector_credential_pair_for_connector(db_session, connector_id)
    if cc_pair is None:
        raise HTTPException(
            status_code=404,
            detail="No Connector-Credential Pair found for this connector",
        )

    has_requested_access = verify_user_has_access_to_cc_pair(
        cc_pair_id=cc_pair.id,
        db_session=db_session,
        user=user,
        get_editable=require_editable,
    )
    if has_requested_access:
        return cc_pair

    # Special case: global curators should be able to manage files
    # for public file connectors even when they are not the creator.
    if (
        require_editable
        and user.role == UserRole.GLOBAL_CURATOR
        and cc_pair.access_type == AccessType.PUBLIC
    ):
        return cc_pair

    raise HTTPException(
        status_code=403,
        detail="Access denied. User cannot manage files for this connector.",
    )


@router.post("/admin/connector/file/upload", tags=PUBLIC_API_TAGS)
def upload_files_api(
    files: list[UploadFile],
    unzip: bool = True,
    _: User = Depends(current_curator_or_admin_user),
) -> FileUploadResponse:
    return upload_files(files, FileOrigin.OTHER, unzip=unzip)


@router.get("/admin/connector/{connector_id}/files", tags=PUBLIC_API_TAGS)
def list_connector_files(
    connector_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> ConnectorFilesResponse:
    """List all files in a file connector."""
    connector = fetch_connector_by_id(connector_id, db_session)
    if connector is None:
        raise HTTPException(status_code=404, detail="Connector not found")

    if connector.source != DocumentSource.FILE:
        raise HTTPException(
            status_code=400, detail="This endpoint only works with file connectors"
        )

    _ = _fetch_and_check_file_connector_cc_pair_permissions(
        connector_id=connector_id,
        user=user,
        db_session=db_session,
        require_editable=False,
    )

    file_locations = connector.connector_specific_config.get("file_locations", [])
    file_names = connector.connector_specific_config.get("file_names", [])

    # Normalize file_names for backwards compatibility with legacy data
    file_names = _normalize_file_names_for_backwards_compatibility(
        file_locations, file_names
    )

    file_store = get_default_file_store()
    files = []

    for file_id, file_name in zip(file_locations, file_names):
        try:
            file_record = file_store.read_file_record(file_id)
            file_size = None
            upload_date = None
            if file_record:
                file_size = file_store.get_file_size(file_id)
                upload_date = (
                    file_record.created_at.isoformat()
                    if file_record.created_at
                    else None
                )
            files.append(
                ConnectorFileInfo(
                    file_id=file_id,
                    file_name=file_name,
                    file_size=file_size,
                    upload_date=upload_date,
                )
            )
        except Exception as e:
            logger.warning(f"Error reading file record for {file_id}: {e}")
            # Include file with basic info even if record fetch fails
            files.append(
                ConnectorFileInfo(
                    file_id=file_id,
                    file_name=file_name,
                )
            )

    return ConnectorFilesResponse(files=files)


@router.post("/admin/connector/{connector_id}/files/update", tags=PUBLIC_API_TAGS)
def update_connector_files(
    connector_id: int,
    files: list[UploadFile] | None = File(None),
    file_ids_to_remove: str = Form("[]"),
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> FileUploadResponse:
    """
    Update files in a connector by adding new files and/or removing existing ones.
    This is an atomic operation that validates, updates the connector config, and triggers indexing.
    """
    files = files or []
    connector = fetch_connector_by_id(connector_id, db_session)
    if connector is None:
        raise HTTPException(status_code=404, detail="Connector not found")

    if connector.source != DocumentSource.FILE:
        raise HTTPException(
            status_code=400, detail="This endpoint only works with file connectors"
        )

    # Get the connector-credential pair for indexing/pruning triggers
    # and validate user permissions for file management.
    cc_pair = _fetch_and_check_file_connector_cc_pair_permissions(
        connector_id=connector_id,
        user=user,
        db_session=db_session,
        require_editable=True,
    )

    # Parse file IDs to remove
    try:
        file_ids_list = json.loads(file_ids_to_remove)
    except json.JSONDecodeError:
        raise HTTPException(status_code=400, detail="Invalid file_ids_to_remove format")

    if not isinstance(file_ids_list, list):
        raise HTTPException(
            status_code=400,
            detail="file_ids_to_remove must be a JSON-encoded list",
        )

    # Get current connector config
    current_config = connector.connector_specific_config
    current_file_locations = current_config.get("file_locations", [])
    current_file_names = current_config.get("file_names", [])
    current_zip_metadata_file_id = current_config.get("zip_metadata_file_id")

    # Load existing metadata from file store if available
    file_store = get_default_file_store()
    current_zip_metadata: dict[str, Any] = {}
    if current_zip_metadata_file_id:
        try:
            metadata_io = file_store.read_file(
                file_id=current_zip_metadata_file_id, mode="b"
            )
            metadata_bytes = metadata_io.read()
            loaded_metadata = json.loads(metadata_bytes)
            if isinstance(loaded_metadata, list):
                current_zip_metadata = {d["filename"]: d for d in loaded_metadata}
            else:
                current_zip_metadata = loaded_metadata
        except Exception as e:
            logger.warning(f"Failed to load existing metadata file: {e}")
            raise HTTPException(
                status_code=500,
                detail="Failed to load existing connector metadata file",
            )

    # Upload new files if any
    new_file_paths = []
    new_file_names_list = []
    new_zip_metadata_file_id: str | None = None
    new_zip_metadata: dict[str, Any] = {}

    if files and len(files) > 0:
        upload_response = upload_files(files, FileOrigin.CONNECTOR)
        new_file_paths = upload_response.file_paths
        new_file_names_list = upload_response.file_names
        new_zip_metadata_file_id = upload_response.zip_metadata_file_id

        # Load new metadata from file store if available
        if new_zip_metadata_file_id:
            try:
                metadata_io = file_store.read_file(
                    file_id=new_zip_metadata_file_id, mode="b"
                )
                metadata_bytes = metadata_io.read()
                loaded_metadata = json.loads(metadata_bytes)
                if isinstance(loaded_metadata, list):
                    new_zip_metadata = {d["filename"]: d for d in loaded_metadata}
                else:
                    new_zip_metadata = loaded_metadata
            except Exception as e:
                logger.warning(f"Failed to load new metadata file: {e}")

    # Remove specified files
    files_to_remove_set = set(file_ids_list)

    # Normalize file_names for backwards compatibility with legacy data
    current_file_names = _normalize_file_names_for_backwards_compatibility(
        current_file_locations, current_file_names
    )

    remaining_file_locations = []
    remaining_file_names = []
    removed_file_names = set()

    for file_id, file_name in zip(current_file_locations, current_file_names):
        if file_id not in files_to_remove_set:
            remaining_file_locations.append(file_id)
            remaining_file_names.append(file_name)
        else:
            removed_file_names.add(file_name)

    # Combine remaining files with new files
    final_file_locations = remaining_file_locations + new_file_paths
    final_file_names = remaining_file_names + new_file_names_list

    # Validate that at least one file remains
    if not final_file_locations:
        raise HTTPException(
            status_code=400,
            detail="Cannot remove all files from connector. At least one file must remain.",
        )

    # Merge and filter metadata (remove metadata for deleted files)
    final_zip_metadata = {
        key: value
        for key, value in current_zip_metadata.items()
        if key not in removed_file_names
    }
    final_zip_metadata.update(new_zip_metadata)

    # Save merged metadata to file store if we have any metadata
    final_zip_metadata_file_id: str | None = None
    if final_zip_metadata:
        final_zip_metadata_file_id = file_store.save_file(
            content=BytesIO(json.dumps(final_zip_metadata).encode("utf-8")),
            display_name=ONYX_METADATA_FILENAME,
            file_origin=FileOrigin.CONNECTOR_METADATA,
            file_type="application/json",
        )

    # Update connector config
    updated_config = {
        **current_config,
        "file_locations": final_file_locations,
        "file_names": final_file_names,
        "zip_metadata_file_id": final_zip_metadata_file_id,
    }
    # Remove old zip_metadata dict if present (backwards compatibility cleanup)
    updated_config.pop("zip_metadata", None)

    connector_base = ConnectorBase(
        name=connector.name,
        source=connector.source,
        input_type=connector.input_type,
        connector_specific_config=updated_config,
        refresh_freq=connector.refresh_freq,
        prune_freq=connector.prune_freq,
        indexing_start=connector.indexing_start,
    )

    updated_connector = update_connector(connector_id, connector_base, db_session)
    if updated_connector is None:
        raise HTTPException(
            status_code=500, detail="Failed to update connector configuration"
        )

    # Trigger re-indexing for new files and pruning for removed files
    try:
        tenant_id = get_current_tenant_id()

        # If files were added, mark for UPDATE indexing (only new docs)
        if new_file_paths:
            mark_ccpair_with_indexing_trigger(
                cc_pair.id, IndexingMode.UPDATE, db_session
            )

            # Send task to check for indexing immediately
            client_app.send_task(
                OnyxCeleryTask.CHECK_FOR_INDEXING,
                kwargs={"tenant_id": tenant_id},
                priority=OnyxCeleryPriority.HIGH,
            )
            logger.info(
                f"Marked cc_pair {cc_pair.id} for UPDATE indexing (new files) for connector {connector_id}"
            )

        # If files were removed, trigger pruning immediately
        if file_ids_list:
            r = get_redis_client()
            payload_id = try_creating_prune_generator_task(
                client_app, cc_pair, db_session, r, tenant_id
            )
            if payload_id:
                logger.info(
                    f"Triggered pruning for cc_pair {cc_pair.id} (removed files) for connector "
                    f"{connector_id}, payload_id={payload_id}"
                )
            else:
                logger.warning(
                    f"Failed to trigger pruning for cc_pair {cc_pair.id} (removed files) for connector {connector_id}"
                )
    except Exception as e:
        logger.error(f"Failed to trigger re-indexing after file update: {e}")

    return FileUploadResponse(
        file_paths=final_file_locations,
        file_names=final_file_names,
        zip_metadata_file_id=final_zip_metadata_file_id,
    )


@router.get("/admin/connector", tags=PUBLIC_API_TAGS)
def get_connectors_by_credential(
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
    credential: int | None = None,
) -> list[ConnectorSnapshot]:
    """Get a list of connectors. Allow filtering by a specific credential id."""

    connectors = fetch_connectors(db_session)

    filtered_connectors = []
    for connector in connectors:
        if connector.source == DocumentSource.INGESTION_API:
            # don't include INGESTION_API, as it's a system level
            # connector not manageable by the user
            continue

        if credential is not None:
            found = False
            for cc_pair in connector.credentials:
                if credential == cc_pair.credential_id:
                    found = True
                    break

            if not found:
                continue

        filtered_connectors.append(ConnectorSnapshot.from_connector_db_model(connector))

    return filtered_connectors


# Retrieves most recent failure cases for connectors that are currently failing
@router.get("/admin/connector/failed-indexing-status", tags=PUBLIC_API_TAGS)
def get_currently_failed_indexing_status(
    secondary_index: bool = False,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
    get_editable: bool = Query(
        False, description="If true, return editable document sets"
    ),
) -> list[FailedConnectorIndexingStatus]:
    # Get the latest failed indexing attempts
    latest_failed_indexing_attempts = get_latest_index_attempts_by_status(
        secondary_index=secondary_index,
        db_session=db_session,
        status=IndexingStatus.FAILED,
    )

    # Get the latest successful indexing attempts
    latest_successful_indexing_attempts = get_latest_index_attempts_by_status(
        secondary_index=secondary_index,
        db_session=db_session,
        status=IndexingStatus.SUCCESS,
    )

    # Get all connector credential pairs
    cc_pairs = get_connector_credential_pairs_for_user(
        db_session=db_session,
        user=user,
        get_editable=get_editable,
    )

    # Filter out failed attempts that have a more recent successful attempt
    filtered_failed_attempts = [
        failed_attempt
        for failed_attempt in latest_failed_indexing_attempts
        if not any(
            success_attempt.connector_credential_pair_id
            == failed_attempt.connector_credential_pair_id
            and success_attempt.time_updated > failed_attempt.time_updated
            for success_attempt in latest_successful_indexing_attempts
        )
    ]

    # Filter cc_pairs to include only those with failed attempts
    cc_pairs = [
        cc_pair
        for cc_pair in cc_pairs
        if any(
            attempt.connector_credential_pair == cc_pair
            for attempt in filtered_failed_attempts
        )
    ]

    # Create a mapping of cc_pair_id to its latest failed index attempt
    cc_pair_to_latest_index_attempt = {
        attempt.connector_credential_pair_id: attempt
        for attempt in filtered_failed_attempts
    }

    indexing_statuses = []

    for cc_pair in cc_pairs:
        # Skip DefaultCCPair
        if cc_pair.name == "DefaultCCPair":
            continue

        latest_index_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)

        indexing_statuses.append(
            FailedConnectorIndexingStatus(
                cc_pair_id=cc_pair.id,
                name=cc_pair.name,
                error_msg=(
                    latest_index_attempt.error_msg if latest_index_attempt else None
                ),
                connector_id=cc_pair.connector_id,
                credential_id=cc_pair.credential_id,
                is_deletable=check_deletion_attempt_is_allowed(
                    connector_credential_pair=cc_pair,
                    db_session=db_session,
                    allow_scheduled=True,
                )
                is None,
            )
        )

    return indexing_statuses


@router.get("/admin/connector/status", tags=PUBLIC_API_TAGS)
def get_connector_status(
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> list[ConnectorStatus]:
    # This method is only used document set and group creation/editing
    # Therefore, it is okay to get non-editable, but public cc_pairs
    cc_pairs = get_connector_credential_pairs_for_user(
        db_session=db_session,
        user=user,
        eager_load_connector=True,
        eager_load_credential=True,
        eager_load_user=True,
        get_editable=False,
    )

    group_cc_pair_relationships = get_cc_pair_groups_for_ids(
        db_session=db_session,
        cc_pair_ids=[cc_pair.id for cc_pair in cc_pairs],
    )
    group_cc_pair_relationships_dict: dict[int, list[int]] = {}
    for relationship in group_cc_pair_relationships:
        group_cc_pair_relationships_dict.setdefault(relationship.cc_pair_id, []).append(
            relationship.user_group_id
        )

    # Pre-compute credential_ids per connector to avoid N+1 lazy loads
    connector_to_credential_ids: dict[int, list[int]] = {}
    for cc_pair in cc_pairs:
        connector_to_credential_ids.setdefault(cc_pair.connector_id, []).append(
            cc_pair.credential_id
        )

    return [
        ConnectorStatus(
            cc_pair_id=cc_pair.id,
            name=cc_pair.name,
            connector=ConnectorSnapshot.from_connector_db_model(
                cc_pair.connector,
                credential_ids=connector_to_credential_ids.get(
                    cc_pair.connector_id, []
                ),
            ),
            credential=CredentialSnapshot.from_credential_db_model(cc_pair.credential),
            access_type=cc_pair.access_type,
            groups=group_cc_pair_relationships_dict.get(cc_pair.id, []),
        )
        for cc_pair in cc_pairs
        if cc_pair.name != "DefaultCCPair" and cc_pair.connector and cc_pair.credential
    ]


@router.post("/admin/connector/indexing-status", tags=PUBLIC_API_TAGS)
def get_connector_indexing_status(
    request: IndexingStatusRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> list[ConnectorIndexingStatusLiteResponse]:
    tenant_id = get_current_tenant_id()

    # NOTE: If the connector is deleting behind the scenes,
    # accessing cc_pairs can be inconsistent and members like
    # connector or credential may be None.
    # Additional checks are done to make sure the connector and credential still exist.
    # TODO: make this one query ... possibly eager load or wrap in a read transaction
    # to avoid the complexity of trying to error check throughout the function

    # see https://stackoverflow.com/questions/75758327/
    # sqlalchemy-method-connection-for-bind-is-already-in-progress
    # for why we can't pass in the current db_session to these functions

    if MOCK_CONNECTOR_FILE_PATH:
        import json

        with open(MOCK_CONNECTOR_FILE_PATH, "r") as f:
            raw_data = json.load(f)
            connector_indexing_statuses = [
                ConnectorIndexingStatusLite(**status) for status in raw_data
            ]
        return [
            ConnectorIndexingStatusLiteResponse(
                source=DocumentSource.FILE,
                summary=SourceSummary(
                    total_connectors=100,
                    active_connectors=100,
                    public_connectors=100,
                    total_docs_indexed=100000,
                ),
                current_page=1,
                total_pages=1,
                indexing_statuses=connector_indexing_statuses,
            )
        ]

    parallel_functions: list[tuple[CallableProtocol, tuple[Any, ...]]] = [
        # Get editable connector/credential pairs
        (
            lambda: get_connector_credential_pairs_for_user_parallel(
                user, True, None, True, True, False, True, request.source
            ),
            (),
        ),
        # Get federated connectors
        (fetch_all_federated_connectors_parallel, ()),
        # Get most recent index attempts
        (
            lambda: get_latest_index_attempts_parallel(
                request.secondary_index, True, False
            ),
            (),
        ),
        # Get most recent finished index attempts
        (
            lambda: get_latest_index_attempts_parallel(
                request.secondary_index, True, True
            ),
            (),
        ),
        # Get most recent successful index attempts
        (
            lambda: get_latest_successful_index_attempts_parallel(
                request.secondary_index,
            ),
            (),
        ),
    ]

    if user and user.role == UserRole.ADMIN:
        (
            editable_cc_pairs,
            federated_connectors,
            latest_index_attempts,
            latest_finished_index_attempts,
            latest_successful_index_attempts,
        ) = run_functions_tuples_in_parallel(parallel_functions)
        non_editable_cc_pairs = []
    else:
        parallel_functions.append(
            (
                lambda: get_connector_credential_pairs_for_user_parallel(
                    user, False, None, True, True, False, True, request.source
                ),
                (),
            ),
        )

        (
            editable_cc_pairs,
            federated_connectors,
            latest_index_attempts,
            latest_finished_index_attempts,
            latest_successful_index_attempts,
            non_editable_cc_pairs,
        ) = run_functions_tuples_in_parallel(parallel_functions)

    # Cast results to proper types
    non_editable_cc_pairs = cast(list[ConnectorCredentialPair], non_editable_cc_pairs)
    editable_cc_pairs = cast(list[ConnectorCredentialPair], editable_cc_pairs)
    federated_connectors = cast(list[FederatedConnector], federated_connectors)
    latest_index_attempts = cast(list[IndexAttempt], latest_index_attempts)
    latest_finished_index_attempts = cast(
        list[IndexAttempt], latest_finished_index_attempts
    )
    latest_successful_index_attempts = cast(
        list[IndexAttempt], latest_successful_index_attempts
    )

    document_count_info = get_document_counts_for_all_cc_pairs(db_session)

    # Create lookup dictionaries for efficient access
    cc_pair_to_document_cnt: dict[tuple[int, int], int] = {
        (connector_id, credential_id): cnt
        for connector_id, credential_id, cnt in document_count_info
    }

    def _attempt_lookup(
        attempts: list[IndexAttempt],
    ) -> dict[int, IndexAttempt]:
        return {attempt.connector_credential_pair_id: attempt for attempt in attempts}

    cc_pair_to_latest_index_attempt = _attempt_lookup(latest_index_attempts)
    cc_pair_to_latest_finished_index_attempt = _attempt_lookup(
        latest_finished_index_attempts
    )
    cc_pair_to_latest_successful_index_attempt = _attempt_lookup(
        latest_successful_index_attempts
    )

    def build_connector_indexing_status(
        cc_pair: ConnectorCredentialPair,
        is_editable: bool,
    ) -> ConnectorIndexingStatusLite | None:
        if cc_pair.name == "DefaultCCPair":
            return None

        latest_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)
        latest_finished_attempt = cc_pair_to_latest_finished_index_attempt.get(
            cc_pair.id
        )
        latest_successful_attempt = cc_pair_to_latest_successful_index_attempt.get(
            cc_pair.id
        )
        doc_count = cc_pair_to_document_cnt.get(
            (cc_pair.connector_id, cc_pair.credential_id), 0
        )

        return _get_connector_indexing_status_lite(
            cc_pair,
            latest_attempt,
            latest_finished_attempt,
            (
                latest_successful_attempt.time_started
                if latest_successful_attempt
                else None
            ),
            is_editable,
            doc_count,
        )

    # Process editable cc_pairs
    editable_statuses: list[ConnectorIndexingStatusLite] = []
    for cc_pair in editable_cc_pairs:
        status = build_connector_indexing_status(cc_pair, True)
        if status:
            editable_statuses.append(status)

    # Process non-editable cc_pairs
    non_editable_statuses: list[ConnectorIndexingStatusLite] = []
    for cc_pair in non_editable_cc_pairs:
        status = build_connector_indexing_status(cc_pair, False)
        if status:
            non_editable_statuses.append(status)

    # Process federated connectors
    federated_statuses: list[FederatedConnectorStatus] = []
    for federated_connector in federated_connectors:
        federated_status = FederatedConnectorStatus(
            id=federated_connector.id,
            source=federated_connector.source,
            name=f"{federated_connector.source.replace('_', ' ').title()}",
        )

        federated_statuses.append(federated_status)

    source_to_summary: dict[DocumentSource, SourceSummary] = {}

    # Apply filters only if any are provided
    has_filters = bool(
        request.access_type_filters
        or request.last_status_filters
        or (
            request.docs_count_operator is not None
            and request.docs_count_value is not None
        )
        or request.name_filter
    )

    if has_filters:
        editable_statuses = _apply_connector_status_filters(
            editable_statuses,
            request.access_type_filters,
            request.last_status_filters,
            request.docs_count_operator,
            request.docs_count_value,
            request.name_filter,
        )
        non_editable_statuses = _apply_connector_status_filters(
            non_editable_statuses,
            request.access_type_filters,
            request.last_status_filters,
            request.docs_count_operator,
            request.docs_count_value,
            request.name_filter,
        )
        federated_statuses = _apply_federated_connector_status_filters(
            federated_statuses,
            request.name_filter,
        )

    # Calculate source summary
    for connector_status in (
        editable_statuses + non_editable_statuses + federated_statuses
    ):
        if isinstance(connector_status, FederatedConnectorStatus):
            source = connector_status.source.to_non_federated_source()
        else:
            source = connector_status.source

        # Skip if source is None (federated connectors without mapping)
        if source is None:
            continue

        if source not in source_to_summary:
            source_to_summary[source] = SourceSummary(
                total_connectors=0,
                active_connectors=0,
                public_connectors=0,
                total_docs_indexed=0,
            )
        source_to_summary[source].total_connectors += 1
        if isinstance(connector_status, ConnectorIndexingStatusLite):
            if connector_status.cc_pair_status == ConnectorCredentialPairStatus.ACTIVE:
                source_to_summary[source].active_connectors += 1
            if connector_status.access_type == AccessType.PUBLIC:
                source_to_summary[source].public_connectors += 1
            source_to_summary[
                source
            ].total_docs_indexed += connector_status.docs_indexed

    # Track admin page visit for analytics
    mt_cloud_telemetry(
        tenant_id=tenant_id,
        distinct_id=str(user.id),
        event=MilestoneRecordType.VISITED_ADMIN_PAGE,
    )

    # Group statuses by source for pagination
    source_to_all_statuses: dict[
        DocumentSource, list[ConnectorIndexingStatusLite | FederatedConnectorStatus]
    ] = {}
    # Group by source
    for connector_status in (
        editable_statuses + non_editable_statuses + federated_statuses
    ):
        if isinstance(connector_status, FederatedConnectorStatus):
            source = connector_status.source.to_non_federated_source()
        else:
            source = connector_status.source

        # Skip if source is None (federated connectors without mapping)
        if source is None:
            continue

        if source not in source_to_all_statuses:
            source_to_all_statuses[source] = []
        source_to_all_statuses[source].append(connector_status)

    # Create paginated response objects by source
    response_list: list[ConnectorIndexingStatusLiteResponse] = []

    source_list = list(source_to_all_statuses.keys())
    source_list.sort()

    for source in source_list:
        statuses = source_to_all_statuses[source]
        # Get current page for this source (default to page 1, 1-indexed)
        current_page = request.source_to_page.get(source, 1)

        # Calculate start and end indices for pagination (convert to 0-indexed)
        start_idx = (current_page - 1) * _INDEXING_STATUS_PAGE_SIZE
        end_idx = start_idx + _INDEXING_STATUS_PAGE_SIZE

        if request.get_all_connectors:
            page_statuses = statuses
        else:
            # Get the page slice for this source
            page_statuses = statuses[start_idx:end_idx]

        # Create response object for this source
        if page_statuses:  # Only include sources that have data on this page
            response_list.append(
                ConnectorIndexingStatusLiteResponse(
                    source=source,
                    summary=source_to_summary[source],
                    current_page=current_page,
                    total_pages=math.ceil(len(statuses) / _INDEXING_STATUS_PAGE_SIZE),
                    indexing_statuses=page_statuses,
                )
            )

    return response_list


def _get_connector_indexing_status_lite(
    cc_pair: ConnectorCredentialPair,
    latest_index_attempt: IndexAttempt | None,
    latest_finished_index_attempt: IndexAttempt | None,
    last_successful_index_time: datetime | None,
    is_editable: bool,
    document_cnt: int,
) -> ConnectorIndexingStatusLite | None:
    # TODO remove this to enable ingestion API
    if cc_pair.name == "DefaultCCPair":
        return None

    connector = cc_pair.connector
    credential = cc_pair.credential
    if not connector or not credential:
        # This may happen if background deletion is happening
        return None

    in_progress = bool(
        latest_index_attempt
        and latest_index_attempt.status == IndexingStatus.IN_PROGRESS
    )

    return ConnectorIndexingStatusLite(
        cc_pair_id=cc_pair.id,
        name=cc_pair.name,
        source=cc_pair.connector.source,
        access_type=cc_pair.access_type,
        cc_pair_status=cc_pair.status,
        is_editable=is_editable,
        in_progress=in_progress,
        in_repeated_error_state=cc_pair.in_repeated_error_state,
        last_finished_status=(
            latest_finished_index_attempt.status
            if latest_finished_index_attempt
            else None
        ),
        last_status=latest_index_attempt.status if latest_index_attempt else None,
        last_success=last_successful_index_time,
        docs_indexed=document_cnt,
        latest_index_attempt_docs_indexed=(
            latest_index_attempt.total_docs_indexed if latest_index_attempt else None
        ),
    )


def _apply_connector_status_filters(
    statuses: list[ConnectorIndexingStatusLite],
    access_type_filters: list[AccessType],
    last_status_filters: list[IndexingStatus],
    docs_count_operator: DocsCountOperator | None,
    docs_count_value: int | None,
    name_filter: str | None,
) -> list[ConnectorIndexingStatusLite]:
    """Apply filters to a list of ConnectorIndexingStatusLite objects"""
    filtered_statuses: list[ConnectorIndexingStatusLite] = []

    for status in statuses:
        # Filter by access type
        if access_type_filters and status.access_type not in access_type_filters:
            continue

        # Filter by last status
        if last_status_filters and status.last_status not in last_status_filters:
            continue

        # Filter by document count
        if docs_count_operator and docs_count_value is not None:
            if docs_count_operator == DocsCountOperator.GREATER_THAN and not (
                status.docs_indexed > docs_count_value
            ):
                continue
            elif docs_count_operator == DocsCountOperator.LESS_THAN and not (
                status.docs_indexed < docs_count_value
            ):
                continue
            elif (
                docs_count_operator == DocsCountOperator.EQUAL_TO
                and status.docs_indexed != docs_count_value
            ):
                continue

        # Filter by name
        if status.name:
            if name_filter and name_filter.lower() not in status.name.lower():
                continue
        else:
            if name_filter:
                continue

        filtered_statuses.append(status)

    return filtered_statuses


def _apply_federated_connector_status_filters(
    statuses: list[FederatedConnectorStatus],
    name_filter: str | None,
) -> list[FederatedConnectorStatus]:
    filtered_statuses: list[FederatedConnectorStatus] = []

    for status in statuses:
        if name_filter and name_filter.lower() not in status.name.lower():
            continue

        filtered_statuses.append(status)

    return filtered_statuses


def _validate_connector_allowed(source: DocumentSource) -> None:
    valid_connectors = [
        x for x in ENABLED_CONNECTOR_TYPES.replace("_", "").split(",") if x
    ]
    if not valid_connectors:
        return
    for connector_type in valid_connectors:
        if source.value.lower().replace("_", "") == connector_type:
            return

    raise ValueError(
        "This connector type has been disabled by your system admin. Please contact them to get it enabled if you wish to use it."
    )


@router.post("/admin/connector", tags=PUBLIC_API_TAGS)
def create_connector_from_model(
    connector_data: ConnectorUpdateRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
    tenant_id = get_current_tenant_id()

    try:
        _validate_connector_allowed(connector_data.source)

        fetch_ee_implementation_or_noop(
            "onyx.db.user_group", "validate_object_creation_for_user", None
        )(
            db_session=db_session,
            user=user,
            target_group_ids=connector_data.groups,
            object_is_public=connector_data.access_type == AccessType.PUBLIC,
            object_is_perm_sync=connector_data.access_type == AccessType.SYNC,
            object_is_new=True,
        )
        connector_base = connector_data.to_connector_base()
        connector_response = create_connector(
            db_session=db_session,
            connector_data=connector_base,
        )

        mt_cloud_telemetry(
            tenant_id=tenant_id,
            distinct_id=str(user.id),
            event=MilestoneRecordType.CREATED_CONNECTOR,
        )

        return connector_response
    except ValueError as e:
        logger.error(f"Error creating connector: {e}")
        raise HTTPException(status_code=400, detail=str(e))


@router.post("/admin/connector-with-mock-credential")
def create_connector_with_mock_credential(
    connector_data: ConnectorUpdateRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    tenant_id = get_current_tenant_id()

    fetch_ee_implementation_or_noop(
        "onyx.db.user_group", "validate_object_creation_for_user", None
    )(
        db_session=db_session,
        user=user,
        target_group_ids=connector_data.groups,
        object_is_public=connector_data.access_type == AccessType.PUBLIC,
        object_is_perm_sync=connector_data.access_type == AccessType.SYNC,
    )
    try:
        _validate_connector_allowed(connector_data.source)
        connector_response = create_connector(
            db_session=db_session,
            connector_data=connector_data,
        )

        mock_credential = CredentialBase(
            credential_json={},
            admin_public=True,
            source=connector_data.source,
        )
        credential = create_credential(
            credential_data=mock_credential,
            user=user,
            db_session=db_session,
        )

        # Store the created connector and credential IDs
        connector_id = cast(int, connector_response.id)
        credential_id = credential.id

        validate_ccpair_for_user(
            connector_id=connector_id,
            credential_id=credential_id,
            access_type=connector_data.access_type,
            db_session=db_session,
        )
        response = add_credential_to_connector(
            db_session=db_session,
            user=user,
            connector_id=connector_id,
            credential_id=credential_id,
            access_type=connector_data.access_type,
            cc_pair_name=connector_data.name,
            groups=connector_data.groups,
        )

        # trigger indexing immediately
        client_app.send_task(
            OnyxCeleryTask.CHECK_FOR_INDEXING,
            priority=OnyxCeleryPriority.HIGH,
            kwargs={"tenant_id": tenant_id},
        )

        logger.info(
            f"create_connector_with_mock_credential - running check_for_indexing: cc_pair={response.data}"
        )

        mt_cloud_telemetry(
            tenant_id=tenant_id,
            distinct_id=str(user.id),
            event=MilestoneRecordType.CREATED_CONNECTOR,
        )
        return response

    except ConnectorValidationError as e:
        raise HTTPException(
            status_code=400, detail="Connector validation error: " + str(e)
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))


@router.patch("/admin/connector/{connector_id}", tags=PUBLIC_API_TAGS)
def update_connector_from_model(
    connector_id: int,
    connector_data: ConnectorUpdateRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> ConnectorSnapshot | StatusResponse[int]:
    cc_pair = fetch_connector_credential_pair_for_connector(db_session, connector_id)
    try:
        _validate_connector_allowed(connector_data.source)
        fetch_ee_implementation_or_noop(
            "onyx.db.user_group", "validate_object_creation_for_user", None
        )(
            db_session=db_session,
            user=user,
            target_group_ids=connector_data.groups,
            object_is_public=connector_data.access_type == AccessType.PUBLIC,
            object_is_perm_sync=connector_data.access_type == AccessType.SYNC,
            object_is_owned_by_user=cc_pair and user and cc_pair.creator_id == user.id,
        )
        connector_base = connector_data.to_connector_base()
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    updated_connector = update_connector(connector_id, connector_base, db_session)
    if updated_connector is None:
        raise HTTPException(
            status_code=404, detail=f"Connector {connector_id} does not exist"
        )

    return ConnectorSnapshot(
        id=updated_connector.id,
        name=updated_connector.name,
        source=updated_connector.source,
        input_type=updated_connector.input_type,
        connector_specific_config=updated_connector.connector_specific_config,
        refresh_freq=updated_connector.refresh_freq,
        prune_freq=updated_connector.prune_freq,
        credential_ids=[
            association.credential.id for association in updated_connector.credentials
        ],
        indexing_start=updated_connector.indexing_start,
        time_created=updated_connector.time_created,
        time_updated=updated_connector.time_updated,
    )


@router.delete(
    "/admin/connector/{connector_id}",
    response_model=StatusResponse[int],
    tags=PUBLIC_API_TAGS,
)
def delete_connector_by_id(
    connector_id: int,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
    try:
        with db_session.begin():
            return delete_connector(
                db_session=db_session,
                connector_id=connector_id,
            )
    except AssertionError:
        raise HTTPException(status_code=400, detail="Connector is not deletable")


@router.post("/admin/connector/run-once", tags=PUBLIC_API_TAGS)
def connector_run_once(
    run_info: RunConnectorRequest,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
    """Used to trigger indexing on a set of cc_pairs associated with a
    single connector."""
    tenant_id = get_current_tenant_id()

    connector_id = run_info.connector_id
    specified_credential_ids = run_info.credential_ids

    try:
        possible_credential_ids = get_connector_credential_ids(
            run_info.connector_id, db_session
        )
    except ValueError:
        raise HTTPException(
            status_code=404,
            detail=f"Connector by id {connector_id} does not exist.",
        )

    if not specified_credential_ids:
        credential_ids = possible_credential_ids
    else:
        if set(specified_credential_ids).issubset(set(possible_credential_ids)):
            credential_ids = specified_credential_ids
        else:
            raise HTTPException(
                status_code=400,
                detail="Not all specified credentials are associated with connector",
            )

    if not credential_ids:
        raise HTTPException(
            status_code=400,
            detail="Connector has no valid credentials, cannot create index attempts.",
        )
    try:
        num_triggers = trigger_indexing_for_cc_pair(
            credential_ids,
            connector_id,
            run_info.from_beginning,
            tenant_id,
            db_session,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    logger.info("connector_run_once - running check_for_indexing")

    msg = f"Marked {num_triggers} index attempts with indexing triggers."
    return StatusResponse(
        success=True,
        message=msg,
        data=num_triggers,
    )


"""Endpoints for basic users"""


@router.get("/connector/gmail/authorize/{credential_id}")
def gmail_auth(
    response: Response, credential_id: str, _: User = Depends(current_user)
) -> AuthUrl:
    # set a cookie that we can read in the callback (used for `verify_csrf`)
    response.set_cookie(
        key=_GMAIL_CREDENTIAL_ID_COOKIE_NAME,
        value=credential_id,
        httponly=True,
        max_age=600,
    )
    return AuthUrl(auth_url=get_auth_url(int(credential_id), DocumentSource.GMAIL))


@router.get("/connector/google-drive/authorize/{credential_id}")
def google_drive_auth(
    response: Response, credential_id: str, _: User = Depends(current_user)
) -> AuthUrl:
    # set a cookie that we can read in the callback (used for `verify_csrf`)
    response.set_cookie(
        key=_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME,
        value=credential_id,
        httponly=True,
        max_age=600,
    )
    return AuthUrl(
        auth_url=get_auth_url(int(credential_id), DocumentSource.GOOGLE_DRIVE)
    )


@router.get("/connector/gmail/callback")
def gmail_callback(
    request: Request,
    callback: GmailCallback = Depends(),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    credential_id_cookie = request.cookies.get(_GMAIL_CREDENTIAL_ID_COOKIE_NAME)
    if credential_id_cookie is None or not credential_id_cookie.isdigit():
        raise HTTPException(
            status_code=401, detail="Request did not pass CSRF verification."
        )
    credential_id = int(credential_id_cookie)
    verify_csrf(credential_id, callback.state)
    credentials: Credentials | None = update_credential_access_tokens(
        callback.code,
        credential_id,
        user,
        db_session,
        DocumentSource.GMAIL,
        GoogleOAuthAuthenticationMethod.UPLOADED,
    )
    if credentials is None:
        raise HTTPException(
            status_code=500, detail="Unable to fetch Gmail access tokens"
        )

    return StatusResponse(success=True, message="Updated Gmail access tokens")


@router.get("/connector/google-drive/callback")
def google_drive_callback(
    request: Request,
    callback: GDriveCallback = Depends(),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    credential_id_cookie = request.cookies.get(_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME)
    if credential_id_cookie is None or not credential_id_cookie.isdigit():
        raise HTTPException(
            status_code=401, detail="Request did not pass CSRF verification."
        )
    credential_id = int(credential_id_cookie)
    verify_csrf(credential_id, callback.state)

    credentials: Credentials | None = update_credential_access_tokens(
        callback.code,
        credential_id,
        user,
        db_session,
        DocumentSource.GOOGLE_DRIVE,
        GoogleOAuthAuthenticationMethod.UPLOADED,
    )
    if credentials is None:
        raise HTTPException(
            status_code=500, detail="Unable to fetch Google Drive access tokens"
        )

    return StatusResponse(success=True, message="Updated Google Drive access tokens")


@router.get("/connector", tags=PUBLIC_API_TAGS)
def get_connectors(
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[ConnectorSnapshot]:
    connectors = fetch_connectors(db_session)
    return [
        ConnectorSnapshot.from_connector_db_model(connector)
        for connector in connectors
        # don't include INGESTION_API, as it's not a "real"
        # connector like those created by the user
        if connector.source != DocumentSource.INGESTION_API
    ]


@router.get("/indexed-sources", tags=PUBLIC_API_TAGS)
def get_indexed_sources(
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> IndexedSourcesResponse:
    sources = sorted(
        fetch_unique_document_sources(db_session), key=lambda source: source.value
    )
    return IndexedSourcesResponse(sources=sources)


@router.get("/connector/{connector_id}", tags=PUBLIC_API_TAGS)
def get_connector_by_id(
    connector_id: int,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> ConnectorSnapshot | StatusResponse[int]:
    connector = fetch_connector_by_id(connector_id, db_session)
    if connector is None:
        raise HTTPException(
            status_code=404, detail=f"Connector {connector_id} does not exist"
        )

    return ConnectorSnapshot(
        id=connector.id,
        name=connector.name,
        source=connector.source,
        indexing_start=connector.indexing_start,
        input_type=connector.input_type,
        connector_specific_config=connector.connector_specific_config,
        refresh_freq=connector.refresh_freq,
        prune_freq=connector.prune_freq,
        credential_ids=[
            association.credential.id for association in connector.credentials
        ],
        time_created=connector.time_created,
        time_updated=connector.time_updated,
    )


@router.post("/connector-request")
def submit_connector_request(
    request_data: ConnectorRequestSubmission,
    user: User = Depends(current_user),
) -> StatusResponse:
    """
    Submit a connector request for Cloud deployments.
    Tracks via PostHog telemetry and sends email to hello@onyx.app.
    """
    tenant_id = get_current_tenant_id()
    connector_name = request_data.connector_name.strip()

    if not connector_name:
        raise HTTPException(status_code=400, detail="Connector name cannot be empty")

    user_email = user.email

    # Track connector request via PostHog telemetry (Cloud only)
    from shared_configs.configs import MULTI_TENANT

    if MULTI_TENANT:
        mt_cloud_telemetry(
            tenant_id=tenant_id,
            distinct_id=str(user.id),
            event=MilestoneRecordType.REQUESTED_CONNECTOR,
            properties={
                "connector_name": connector_name,
                "user_email": user.email,
            },
        )

    # Send email notification (if email is configured)
    if EMAIL_CONFIGURED:
        try:
            subject = "Onyx Craft Connector Request"
            email_body_text = f"""A new connector request has been submitted:

Connector Name: {connector_name}
User Email: {user_email or "Not provided (anonymous user)"}
Tenant ID: {tenant_id}
"""
            email_body_html = f"""<html>
<body>
<p>A new connector request has been submitted:</p>
<ul>
<li><strong>Connector Name:</strong> {connector_name}</li>
<li><strong>User Email:</strong> {user_email or "Not provided (anonymous user)"}</li>
<li><strong>Tenant ID:</strong> {tenant_id}</li>
</ul>
</body>
</html>"""

            send_email(
                user_email="hello@onyx.app",
                subject=subject,
                html_body=email_body_html,
                text_body=email_body_text,
            )
            logger.info(
                f"Connector request email sent to hello@onyx.app for connector: {connector_name}"
            )
        except Exception as e:
            # Log error but don't fail the request if email fails
            logger.error(
                f"Failed to send connector request email for {connector_name}: {e}"
            )

    logger.info(
        f"Connector request submitted: {connector_name} by user {user_email or 'anonymous'} (tenant: {tenant_id})"
    )

    return StatusResponse(
        success=True,
        message="Connector request submitted successfully. We'll prioritize popular requests!",
    )


class BasicCCPairInfo(BaseModel):
    has_successful_run: bool
    source: DocumentSource
    status: ConnectorCredentialPairStatus


@router.get("/connector-status", tags=PUBLIC_API_TAGS)
def get_basic_connector_indexing_status(
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
) -> list[BasicCCPairInfo]:
    cc_pairs = get_connector_credential_pairs_for_user(
        db_session=db_session,
        eager_load_connector=True,
        get_editable=False,
        user=user,
    )

    # NOTE: This endpoint excludes Craft connectors
    return [
        BasicCCPairInfo(
            has_successful_run=cc_pair.last_successful_index_time is not None,
            source=cc_pair.connector.source,
            status=cc_pair.status,
        )
        for cc_pair in cc_pairs
        if cc_pair.connector.source != DocumentSource.INGESTION_API
        and cc_pair.processing_mode == ProcessingMode.REGULAR
    ]


def trigger_indexing_for_cc_pair(
    specified_credential_ids: list[int],
    connector_id: int,
    from_beginning: bool,
    tenant_id: str,
    db_session: Session,
) -> int:
    try:
        possible_credential_ids = get_connector_credential_ids(connector_id, db_session)
    except ValueError as e:
        raise ValueError(f"Connector by id {connector_id} does not exist: {str(e)}")

    if not specified_credential_ids:
        credential_ids = possible_credential_ids
    else:
        if set(specified_credential_ids).issubset(set(possible_credential_ids)):
            credential_ids = specified_credential_ids
        else:
            raise ValueError(
                "Not all specified credentials are associated with connector"
            )

    if not credential_ids:
        raise ValueError(
            "Connector has no valid credentials, cannot create index attempts."
        )

    # Prevents index attempts for cc pairs that already have an index attempt currently running
    skipped_credentials = [
        credential_id
        for credential_id in credential_ids
        if get_index_attempts_for_cc_pair(
            cc_pair_identifier=ConnectorCredentialPairIdentifier(
                connector_id=connector_id,
                credential_id=credential_id,
            ),
            only_current=True,
            db_session=db_session,
            disinclude_finished=True,
        )
    ]

    connector_credential_pairs = [
        get_connector_credential_pair(
            db_session=db_session,
            connector_id=connector_id,
            credential_id=credential_id,
        )
        for credential_id in credential_ids
        if credential_id not in skipped_credentials
    ]

    num_triggers = 0
    for cc_pair in connector_credential_pairs:
        if cc_pair is not None:
            indexing_mode = IndexingMode.UPDATE
            if from_beginning:
                indexing_mode = IndexingMode.REINDEX

            mark_ccpair_with_indexing_trigger(cc_pair.id, indexing_mode, db_session)
            num_triggers += 1

            logger.info(
                f"connector_run_once - marking cc_pair with indexing trigger: "
                f"connector={connector_id} "
                f"cc_pair={cc_pair.id} "
                f"indexing_trigger={indexing_mode}"
            )

    priority = OnyxCeleryPriority.HIGH

    # run the beat task to pick up the triggers immediately
    logger.info(f"Sending indexing check task with priority {priority}")
    client_app.send_task(
        OnyxCeleryTask.CHECK_FOR_INDEXING,
        priority=priority,
        kwargs={"tenant_id": tenant_id},
    )

    return num_triggers


================================================
FILE: backend/onyx/server/documents/credential.py
================================================
import json

from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Form
from fastapi import HTTPException
from fastapi import Query
from fastapi import UploadFile
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.connectors.factory import validate_ccpair_for_user
from onyx.db.credentials import alter_credential
from onyx.db.credentials import cleanup_gmail_credentials
from onyx.db.credentials import create_credential
from onyx.db.credentials import CREDENTIAL_PERMISSIONS_TO_IGNORE
from onyx.db.credentials import delete_credential
from onyx.db.credentials import delete_credential_for_user
from onyx.db.credentials import fetch_credential_by_id_for_user
from onyx.db.credentials import fetch_credentials_by_source_for_user
from onyx.db.credentials import fetch_credentials_for_user
from onyx.db.credentials import swap_credentials_connector
from onyx.db.credentials import update_credential
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import DocumentSource
from onyx.db.models import User
from onyx.server.documents.models import CredentialBase
from onyx.server.documents.models import CredentialDataUpdateRequest
from onyx.server.documents.models import CredentialSnapshot
from onyx.server.documents.models import CredentialSwapRequest
from onyx.server.documents.models import ObjectCreationIdResponse
from onyx.server.documents.private_key_types import FILE_TYPE_TO_FILE_PROCESSOR
from onyx.server.documents.private_key_types import PrivateKeyFileTypes
from onyx.server.documents.private_key_types import ProcessPrivateKeyFileProtocol
from onyx.server.models import StatusResponse
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop

logger = setup_logger()


router = APIRouter(prefix="/manage", tags=PUBLIC_API_TAGS)


def _ignore_credential_permissions(source: DocumentSource) -> bool:
    return source in CREDENTIAL_PERMISSIONS_TO_IGNORE


"""Admin-only endpoints"""


@router.get("/admin/credential")
def list_credentials_admin(
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> list[CredentialSnapshot]:
    """Lists all public credentials"""
    credentials = fetch_credentials_for_user(
        db_session=db_session,
        user=user,
        get_editable=False,
    )
    return [
        CredentialSnapshot.from_credential_db_model(credential)
        for credential in credentials
    ]


@router.get("/admin/similar-credentials/{source_type}")
def get_cc_source_full_info(
    source_type: DocumentSource,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
    get_editable: bool = Query(
        False, description="If true, return editable credentials"
    ),
) -> list[CredentialSnapshot]:
    credentials = fetch_credentials_by_source_for_user(
        db_session=db_session,
        user=user,
        document_source=source_type,
        get_editable=get_editable,
    )

    return [
        CredentialSnapshot.from_credential_db_model(credential)
        for credential in credentials
    ]


@router.delete("/admin/credential/{credential_id}")
def delete_credential_by_id_admin(
    credential_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    """Same as the user endpoint, but can delete any credential (not just the user's own)"""
    delete_credential(db_session=db_session, credential_id=credential_id)
    return StatusResponse(
        success=True, message="Credential deleted successfully", data=credential_id
    )


@router.put("/admin/credential/swap")
def swap_credentials_for_connector(
    credential_swap_req: CredentialSwapRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    validate_ccpair_for_user(
        credential_swap_req.connector_id,
        credential_swap_req.new_credential_id,
        credential_swap_req.access_type,
        db_session,
    )

    connector_credential_pair = swap_credentials_connector(
        new_credential_id=credential_swap_req.new_credential_id,
        connector_id=credential_swap_req.connector_id,
        db_session=db_session,
        user=user,
    )

    return StatusResponse(
        success=True,
        message="Credential swapped successfully",
        data=connector_credential_pair.id,
    )


@router.post("/credential")
def create_credential_from_model(
    credential_info: CredentialBase,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
    if not _ignore_credential_permissions(credential_info.source):
        fetch_ee_implementation_or_noop(
            "onyx.db.user_group", "validate_object_creation_for_user", None
        )(
            db_session=db_session,
            user=user,
            target_group_ids=credential_info.groups,
            object_is_public=credential_info.curator_public,
        )

    # Temporary fix for empty Google App credentials
    if credential_info.source == DocumentSource.GMAIL:
        cleanup_gmail_credentials(db_session=db_session)

    credential = create_credential(credential_info, user, db_session)
    return ObjectCreationIdResponse(
        id=credential.id,
        credential=CredentialSnapshot.from_credential_db_model(credential),
    )


@router.post("/credential/private-key")
def create_credential_with_private_key(
    credential_json: str = Form(...),
    admin_public: bool = Form(False),
    curator_public: bool = Form(False),
    groups: list[int] = Form([]),
    name: str | None = Form(None),
    source: str = Form(...),
    user: User = Depends(current_curator_or_admin_user),
    uploaded_file: UploadFile = File(...),
    field_key: str = Form(...),
    type_definition_key: str = Form(...),
    db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
    try:
        credential_data = json.loads(credential_json)
    except json.JSONDecodeError as e:
        raise HTTPException(
            status_code=400,
            detail=f"Invalid JSON in credential_json: {str(e)}",
        )

    private_key_processor: ProcessPrivateKeyFileProtocol | None = (
        FILE_TYPE_TO_FILE_PROCESSOR.get(PrivateKeyFileTypes(type_definition_key))
    )
    if private_key_processor is None:
        raise HTTPException(
            status_code=400,
            detail="Invalid type definition key for private key file",
        )
    private_key_content: str = private_key_processor(uploaded_file)

    credential_data[field_key] = private_key_content

    credential_info = CredentialBase(
        credential_json=credential_data,
        admin_public=admin_public,
        curator_public=curator_public,
        groups=groups,
        name=name,
        source=DocumentSource(source),
    )

    if not _ignore_credential_permissions(DocumentSource(source)):
        fetch_ee_implementation_or_noop(
            "onyx.db.user_group", "validate_object_creation_for_user", None
        )(
            db_session=db_session,
            user=user,
            target_group_ids=groups,
            object_is_public=curator_public,
        )

    # Temporary fix for empty Google App credentials
    if DocumentSource(source) == DocumentSource.GMAIL:
        cleanup_gmail_credentials(db_session=db_session)

    credential = create_credential(credential_info, user, db_session)
    return ObjectCreationIdResponse(
        id=credential.id,
        credential=CredentialSnapshot.from_credential_db_model(credential),
    )


"""Endpoints for all"""


@router.get("/credential")
def list_credentials(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[CredentialSnapshot]:
    credentials = fetch_credentials_for_user(db_session=db_session, user=user)
    return [
        CredentialSnapshot.from_credential_db_model(credential)
        for credential in credentials
    ]


@router.get("/credential/{credential_id}")
def get_credential_by_id(
    credential_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> CredentialSnapshot | StatusResponse[int]:
    credential = fetch_credential_by_id_for_user(
        credential_id,
        user,
        db_session,
        get_editable=False,
    )
    if credential is None:
        raise HTTPException(
            status_code=401,
            detail=f"Credential {credential_id} does not exist or does not belong to user",
        )

    return CredentialSnapshot.from_credential_db_model(credential)


@router.put("/admin/credential/{credential_id}")
def update_credential_data(
    credential_id: int,
    credential_update: CredentialDataUpdateRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> CredentialBase:
    credential = alter_credential(
        credential_id,
        credential_update.name,
        credential_update.credential_json,
        user,
        db_session,
    )

    if credential is None:
        raise HTTPException(
            status_code=401,
            detail=f"Credential {credential_id} does not exist or does not belong to user",
        )

    return CredentialSnapshot.from_credential_db_model(credential)


@router.put("/admin/credential/private-key/{credential_id}")
def update_credential_private_key(
    credential_id: int,
    name: str = Form(...),
    credential_json: str = Form(...),
    uploaded_file: UploadFile = File(...),
    field_key: str = Form(...),
    type_definition_key: str = Form(...),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> CredentialBase:
    try:
        credential_data = json.loads(credential_json)
    except json.JSONDecodeError as e:
        raise HTTPException(
            status_code=400,
            detail=f"Invalid JSON in credential_json: {str(e)}",
        )

    private_key_processor: ProcessPrivateKeyFileProtocol | None = (
        FILE_TYPE_TO_FILE_PROCESSOR.get(PrivateKeyFileTypes(type_definition_key))
    )
    if private_key_processor is None:
        raise HTTPException(
            status_code=400,
            detail="Invalid type definition key for private key file",
        )
    private_key_content: str = private_key_processor(uploaded_file)
    credential_data[field_key] = private_key_content

    credential = alter_credential(
        credential_id,
        name,
        credential_data,
        user,
        db_session,
    )

    if credential is None:
        raise HTTPException(
            status_code=401,
            detail=f"Credential {credential_id} does not exist or does not belong to user",
        )

    return CredentialSnapshot.from_credential_db_model(credential)


@router.patch("/credential/{credential_id}")
def update_credential_from_model(
    credential_id: int,
    credential_data: CredentialBase,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> CredentialSnapshot | StatusResponse[int]:
    updated_credential = update_credential(
        credential_id, credential_data, user, db_session
    )
    if updated_credential is None:
        raise HTTPException(
            status_code=401,
            detail=f"Credential {credential_id} does not exist or does not belong to user",
        )

    # Get credential_json value - use masking for API responses
    credential_json_value = (
        updated_credential.credential_json.get_value(apply_mask=True)
        if updated_credential.credential_json
        else {}
    )

    return CredentialSnapshot(
        source=updated_credential.source,
        id=updated_credential.id,
        credential_json=credential_json_value,
        user_id=updated_credential.user_id,
        name=updated_credential.name,
        admin_public=updated_credential.admin_public,
        time_created=updated_credential.time_created,
        time_updated=updated_credential.time_updated,
        curator_public=updated_credential.curator_public,
    )


@router.delete("/credential/{credential_id}")
def delete_credential_by_id(
    credential_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    delete_credential_for_user(
        credential_id,
        user,
        db_session,
    )

    return StatusResponse(
        success=True, message="Credential deleted successfully", data=credential_id
    )


@router.delete("/credential/force/{credential_id}")
def force_delete_credential_by_id(
    credential_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    delete_credential_for_user(credential_id, user, db_session, True)

    return StatusResponse(
        success=True, message="Credential deleted successfully", data=credential_id
    )


================================================
FILE: backend/onyx/server/documents/document.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.context.search.models import IndexFilters
from onyx.context.search.preprocessing.access_filters import (
    build_access_filters_for_user,
)
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.prompts.prompt_utils import build_doc_context_str
from onyx.server.documents.models import ChunkInfo
from onyx.server.documents.models import DocumentInfo
from onyx.server.utils_vector_db import require_vector_db


router = APIRouter(prefix="/document")


# Have to use a query parameter as FastAPI is interpreting the URL type document_ids
# as a different path
@router.get("/document-size-info", dependencies=[Depends(require_vector_db)])
def get_document_info(
    document_id: str = Query(...),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> DocumentInfo:
    search_settings = get_current_search_settings(db_session)
    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None, db_session)

    user_acl_filters = build_access_filters_for_user(user, db_session)
    inference_chunks = document_index.id_based_retrieval(
        chunk_requests=[VespaChunkRequest(document_id=document_id)],
        filters=IndexFilters(access_control_list=user_acl_filters),
    )

    if not inference_chunks:
        raise HTTPException(status_code=404, detail="Document not found")

    contents = [chunk.content for chunk in inference_chunks]

    combined_contents = "\n".join(contents)

    # get actual document context used for LLM
    first_chunk = inference_chunks[0]
    tokenizer_encode = get_tokenizer(
        provider_type=search_settings.provider_type,
        model_name=search_settings.model_name,
    ).encode
    full_context_str = build_doc_context_str(
        semantic_identifier=first_chunk.semantic_identifier,
        source_type=first_chunk.source_type,
        content=combined_contents,
        metadata_dict=first_chunk.metadata,
        updated_at=first_chunk.updated_at,
        ind=0,
    )

    return DocumentInfo(
        num_chunks=len(inference_chunks),
        num_tokens=len(tokenizer_encode(full_context_str)),
    )


@router.get("/chunk-info", dependencies=[Depends(require_vector_db)])
def get_chunk_info(
    document_id: str = Query(...),
    chunk_id: int = Query(...),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> ChunkInfo:
    search_settings = get_current_search_settings(db_session)
    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None, db_session)

    user_acl_filters = build_access_filters_for_user(user, db_session)
    chunk_request = VespaChunkRequest(
        document_id=document_id,
        min_chunk_ind=chunk_id,
        max_chunk_ind=chunk_id,
    )

    inference_chunks = document_index.id_based_retrieval(
        chunk_requests=[chunk_request],
        filters=IndexFilters(access_control_list=user_acl_filters),
        batch_retrieval=True,
    )

    if not inference_chunks:
        raise HTTPException(status_code=404, detail="Chunk not found")

    chunk_content = inference_chunks[0].content

    tokenizer_encode = get_tokenizer(
        provider_type=search_settings.provider_type,
        model_name=search_settings.model_name,
    ).encode

    return ChunkInfo(
        content=chunk_content, num_tokens=len(tokenizer_encode(chunk_content))
    )


================================================
FILE: backend/onyx/server/documents/document_utils.py
================================================
from cryptography.hazmat.primitives.serialization import pkcs12

from onyx.utils.logger import setup_logger

logger = setup_logger()


def _is_password_related_error(error: Exception) -> bool:
    """
    Check if the exception indicates a password-related issue rather than a format issue.
    """
    error_msg = str(error).lower()
    password_keywords = ["mac", "integrity", "password", "authentication", "verify"]
    return any(keyword in error_msg for keyword in password_keywords)


def validate_pkcs12_content(file_bytes: bytes) -> bool:
    """
    Validate that the file content is actually a PKCS#12 file.
    This performs basic format validation without requiring passwords.
    """
    try:
        # Basic file size check
        if len(file_bytes) < 10:
            logger.debug("File too small to be a valid PKCS#12 file")
            return False

        # Check for PKCS#12 magic bytes/ASN.1 structure
        # PKCS#12 files start with ASN.1 SEQUENCE tag (0x30)
        if file_bytes[0] != 0x30:
            logger.debug("File does not start with ASN.1 SEQUENCE tag")
            return False

        # Try to parse the outer ASN.1 structure without password validation
        # This checks if the file has the basic PKCS#12 structure
        try:
            # Attempt to load just to validate the basic format
            # We expect this to fail due to password, but it should fail with a specific error
            pkcs12.load_key_and_certificates(file_bytes, password=None)
            return True
        except ValueError as e:
            # Check if the error is related to password (expected) vs format issues
            if _is_password_related_error(e):
                # These errors indicate the file format is correct but password is wrong/missing
                logger.debug(
                    f"PKCS#12 format appears valid, password-related error: {e}"
                )
                return True
            else:
                # Other ValueError likely indicates format issues
                logger.debug(f"PKCS#12 format validation failed: {e}")
                return False
        except Exception as e:
            # Try with empty password as fallback
            try:
                pkcs12.load_key_and_certificates(file_bytes, password=b"")
                return True
            except ValueError as e2:
                if _is_password_related_error(e2):
                    logger.debug(
                        f"PKCS#12 format appears valid with empty password attempt: {e2}"
                    )
                    return True
                else:
                    logger.debug(
                        f"PKCS#12 validation failed on both attempts: {e}, {e2}"
                    )
                    return False
            except Exception:
                logger.debug(f"PKCS#12 validation failed: {e}")
                return False

    except Exception as e:
        logger.debug(f"Unexpected error during PKCS#12 validation: {e}")
        return False


================================================
FILE: backend/onyx/server/documents/models.py
================================================
from collections.abc import Sequence
from datetime import datetime
from datetime import timezone
from datetime import UTC
from enum import Enum
from typing import Any
from typing import Generic
from typing import TypeVar
from uuid import UUID

from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field

from onyx.configs.app_configs import MASK_CREDENTIAL_PREFIX
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import PermissionSyncStatus
from onyx.db.enums import ProcessingMode
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import DocPermissionSyncAttempt
from onyx.db.models import Document as DbDocument
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexingStatus
from onyx.db.models import TaskStatus
from onyx.server.federated.models import FederatedConnectorStatus
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop


class DocumentSyncStatus(BaseModel):
    doc_id: str
    last_synced: datetime | None
    last_modified: datetime | None

    @classmethod
    def from_model(cls, doc: DbDocument) -> "DocumentSyncStatus":
        return DocumentSyncStatus(
            doc_id=doc.id,
            last_synced=doc.last_synced,
            last_modified=doc.last_modified,
        )


class DocumentInfo(BaseModel):
    num_chunks: int
    num_tokens: int


class ChunkInfo(BaseModel):
    content: str
    num_tokens: int


class IndexedSourcesResponse(BaseModel):
    model_config = ConfigDict(use_enum_values=True)
    sources: list[DocumentSource]


class DeletionAttemptSnapshot(BaseModel):
    connector_id: int
    credential_id: int
    status: TaskStatus


class ConnectorBase(BaseModel):
    name: str
    source: DocumentSource
    input_type: InputType
    connector_specific_config: dict[str, Any]
    # In seconds, None for one time index with no refresh
    refresh_freq: int | None = None
    prune_freq: int | None = None
    indexing_start: datetime | None = None


class ConnectorUpdateRequest(ConnectorBase):
    access_type: AccessType
    groups: list[int] = Field(default_factory=list)

    def to_connector_base(self) -> ConnectorBase:
        return ConnectorBase(**self.model_dump(exclude={"access_type", "groups"}))


class ConnectorSnapshot(ConnectorBase):
    id: int
    credential_ids: list[int]
    time_created: datetime
    time_updated: datetime
    source: DocumentSource

    @classmethod
    def from_connector_db_model(
        cls, connector: Connector, credential_ids: list[int] | None = None
    ) -> "ConnectorSnapshot":
        return ConnectorSnapshot(
            id=connector.id,
            name=connector.name,
            source=connector.source,
            input_type=connector.input_type,
            connector_specific_config=connector.connector_specific_config,
            refresh_freq=connector.refresh_freq,
            prune_freq=connector.prune_freq,
            credential_ids=(
                credential_ids
                or [association.credential.id for association in connector.credentials]
            ),
            indexing_start=connector.indexing_start,
            time_created=connector.time_created,
            time_updated=connector.time_updated,
        )


class CredentialSwapRequest(BaseModel):
    new_credential_id: int
    connector_id: int
    access_type: AccessType


class CredentialDataUpdateRequest(BaseModel):
    name: str
    credential_json: dict[str, Any]


class CredentialBase(BaseModel):
    credential_json: dict[str, Any]
    # if `true`, then all Admins will have access to the credential
    admin_public: bool
    source: DocumentSource
    name: str | None = None
    curator_public: bool = False
    groups: list[int] = Field(default_factory=list)


class CredentialSnapshot(CredentialBase):
    id: int
    user_id: UUID | None
    user_email: str | None = None
    time_created: datetime
    time_updated: datetime

    @classmethod
    def from_credential_db_model(cls, credential: Credential) -> "CredentialSnapshot":
        # Get the credential_json value with appropriate masking
        if credential.credential_json is None:
            credential_json_value: dict[str, Any] = {}
        elif MASK_CREDENTIAL_PREFIX:
            credential_json_value = credential.credential_json.get_value(
                apply_mask=True
            )
        else:
            credential_json_value = credential.credential_json.get_value(
                apply_mask=False
            )

        return CredentialSnapshot(
            id=credential.id,
            credential_json=credential_json_value,
            user_id=credential.user_id,
            user_email=credential.user.email if credential.user else None,
            admin_public=credential.admin_public,
            time_created=credential.time_created,
            time_updated=credential.time_updated,
            source=credential.source or DocumentSource.NOT_APPLICABLE,
            name=credential.name,
            curator_public=credential.curator_public,
        )


class IndexAttemptSnapshot(BaseModel):
    id: int
    status: IndexingStatus | None
    from_beginning: bool
    new_docs_indexed: int  # only includes completely new docs
    total_docs_indexed: int  # includes docs that are updated
    docs_removed_from_index: int
    error_msg: str | None
    error_count: int
    full_exception_trace: str | None
    time_started: str | None
    time_updated: str
    poll_range_start: datetime | None = None
    poll_range_end: datetime | None = None

    @classmethod
    def from_index_attempt_db_model(
        cls, index_attempt: IndexAttempt
    ) -> "IndexAttemptSnapshot":
        return IndexAttemptSnapshot(
            id=index_attempt.id,
            status=index_attempt.status,
            from_beginning=index_attempt.from_beginning,
            new_docs_indexed=index_attempt.new_docs_indexed or 0,
            total_docs_indexed=index_attempt.total_docs_indexed or 0,
            docs_removed_from_index=index_attempt.docs_removed_from_index or 0,
            error_msg=index_attempt.error_msg,
            error_count=len(index_attempt.error_rows),
            full_exception_trace=index_attempt.full_exception_trace,
            time_started=(
                index_attempt.time_started.isoformat()
                if index_attempt.time_started
                else None
            ),
            time_updated=index_attempt.time_updated.isoformat(),
            poll_range_start=index_attempt.poll_range_start,
            poll_range_end=index_attempt.poll_range_end,
        )


# These are the types currently supported by the pagination hook
# More api endpoints can be refactored and be added here for use with the pagination hook
PaginatedType = TypeVar("PaginatedType", bound=BaseModel)


class PermissionSyncAttemptSnapshot(BaseModel):
    id: int
    status: PermissionSyncStatus
    error_message: str | None
    total_docs_synced: int
    docs_with_permission_errors: int
    time_created: str
    time_started: str | None
    time_finished: str | None

    @classmethod
    def from_permission_sync_attempt_db_model(
        cls, attempt: DocPermissionSyncAttempt
    ) -> "PermissionSyncAttemptSnapshot":
        return PermissionSyncAttemptSnapshot(
            id=attempt.id,
            status=attempt.status,
            error_message=attempt.error_message,
            total_docs_synced=attempt.total_docs_synced or 0,
            docs_with_permission_errors=attempt.docs_with_permission_errors or 0,
            time_created=attempt.time_created.isoformat(),
            time_started=(
                attempt.time_started.isoformat() if attempt.time_started else None
            ),
            time_finished=(
                attempt.time_finished.isoformat() if attempt.time_finished else None
            ),
        )


class PaginatedReturn(BaseModel, Generic[PaginatedType]):
    items: list[PaginatedType]
    total_items: int


class CCPairFullInfo(BaseModel):
    id: int
    name: str
    status: ConnectorCredentialPairStatus
    in_repeated_error_state: bool
    num_docs_indexed: int
    connector: ConnectorSnapshot
    credential: CredentialSnapshot
    number_of_index_attempts: int
    last_index_attempt_status: IndexingStatus | None
    latest_deletion_attempt: DeletionAttemptSnapshot | None
    access_type: AccessType
    is_editable_for_current_user: bool
    deletion_failure_message: str | None
    indexing: bool
    creator: UUID | None
    creator_email: str | None

    # information on syncing/indexing
    last_indexed: datetime | None
    last_pruned: datetime | None
    # accounts for both doc sync and group sync
    last_full_permission_sync: datetime | None
    overall_indexing_speed: float | None
    latest_checkpoint_description: str | None

    # permission sync attempt status
    last_permission_sync_attempt_status: PermissionSyncStatus | None
    permission_syncing: bool
    last_permission_sync_attempt_finished: datetime | None
    last_permission_sync_attempt_error_message: str | None

    @classmethod
    def _get_last_full_permission_sync(
        cls, cc_pair_model: ConnectorCredentialPair
    ) -> datetime | None:
        check_if_source_requires_external_group_sync = fetch_ee_implementation_or_noop(
            "onyx.external_permissions.sync_params",
            "source_requires_external_group_sync",
            noop_return_value=False,
        )
        check_if_source_requires_doc_sync = fetch_ee_implementation_or_noop(
            "onyx.external_permissions.sync_params",
            "source_requires_doc_sync",
            noop_return_value=False,
        )

        needs_group_sync = check_if_source_requires_external_group_sync(
            cc_pair_model.connector.source
        )
        needs_doc_sync = check_if_source_requires_doc_sync(
            cc_pair_model.connector.source
        )

        last_group_sync = (
            cc_pair_model.last_time_external_group_sync
            if needs_group_sync
            else datetime.now(UTC)
        )
        last_doc_sync = (
            cc_pair_model.last_time_perm_sync if needs_doc_sync else datetime.now(UTC)
        )

        # if either is still None at this point, it means sync is necessary but
        # has never completed.
        if last_group_sync is None or last_doc_sync is None:
            return None

        return min(last_group_sync, last_doc_sync)

    @classmethod
    def from_models(
        cls,
        cc_pair_model: ConnectorCredentialPair,
        latest_deletion_attempt: DeletionAttemptSnapshot | None,
        number_of_index_attempts: int,
        last_index_attempt: IndexAttempt | None,
        num_docs_indexed: int,  # not ideal, but this must be computed separately
        is_editable_for_current_user: bool,
        indexing: bool,
        last_successful_index_time: datetime | None = None,
        last_permission_sync_attempt_status: PermissionSyncStatus | None = None,
        permission_syncing: bool = False,
        last_permission_sync_attempt_finished: datetime | None = None,
        last_permission_sync_attempt_error_message: str | None = None,
    ) -> "CCPairFullInfo":
        # figure out if we need to artificially deflate the number of docs indexed.
        # This is required since the total number of docs indexed by a CC Pair is
        # updated before the new docs for an indexing attempt. If we don't do this,
        # there is a mismatch between these two numbers which may confuse users.
        last_indexing_status = last_index_attempt.status if last_index_attempt else None
        if (
            # only need to do this if the last indexing attempt is still in progress
            last_indexing_status == IndexingStatus.IN_PROGRESS
            and number_of_index_attempts == 1
            and last_index_attempt
            and last_index_attempt.new_docs_indexed
        ):
            num_docs_indexed = (
                last_index_attempt.new_docs_indexed if last_index_attempt else 0
            )

        overall_indexing_speed = num_docs_indexed / (
            (
                datetime.now(tz=timezone.utc) - cc_pair_model.connector.time_created
            ).total_seconds()
            / 60
        )

        return cls(
            id=cc_pair_model.id,
            name=cc_pair_model.name,
            status=cc_pair_model.status,
            in_repeated_error_state=cc_pair_model.in_repeated_error_state,
            num_docs_indexed=num_docs_indexed,
            connector=ConnectorSnapshot.from_connector_db_model(
                cc_pair_model.connector,
                credential_ids=[cc_pair_model.credential_id],
            ),
            credential=CredentialSnapshot.from_credential_db_model(
                cc_pair_model.credential
            ),
            number_of_index_attempts=number_of_index_attempts,
            last_index_attempt_status=last_indexing_status,
            latest_deletion_attempt=latest_deletion_attempt,
            access_type=cc_pair_model.access_type,
            is_editable_for_current_user=is_editable_for_current_user,
            deletion_failure_message=cc_pair_model.deletion_failure_message,
            indexing=indexing,
            creator=cc_pair_model.creator_id,
            creator_email=(
                cc_pair_model.creator.email if cc_pair_model.creator else None
            ),
            last_indexed=last_successful_index_time,
            last_pruned=cc_pair_model.last_pruned,
            last_full_permission_sync=cls._get_last_full_permission_sync(cc_pair_model),
            overall_indexing_speed=overall_indexing_speed,
            latest_checkpoint_description=None,
            last_permission_sync_attempt_status=last_permission_sync_attempt_status,
            permission_syncing=permission_syncing,
            last_permission_sync_attempt_finished=last_permission_sync_attempt_finished,
            last_permission_sync_attempt_error_message=last_permission_sync_attempt_error_message,
        )


class CeleryTaskStatus(BaseModel):
    id: str
    name: str
    status: TaskStatus
    start_time: datetime | None
    register_time: datetime | None


class FailedConnectorIndexingStatus(BaseModel):
    """Simplified version of ConnectorIndexingStatus for failed indexing attempts"""

    cc_pair_id: int
    name: str
    error_msg: str | None
    is_deletable: bool
    connector_id: int
    credential_id: int


class ConnectorStatus(BaseModel):
    """
    Represents the status of a connector,
    including indexing status elated information
    """

    cc_pair_id: int
    name: str
    connector: ConnectorSnapshot
    credential: CredentialSnapshot
    access_type: AccessType
    groups: list[int]


class ConnectorIndexingStatus(ConnectorStatus):
    """Represents the full indexing status of a connector"""

    cc_pair_status: ConnectorCredentialPairStatus
    # this is separate from the `status` above, since a connector can be `INITIAL_INDEXING`, `ACTIVE`,
    # or `PAUSED` and still be in a repeated error state.
    in_repeated_error_state: bool
    owner: str
    last_finished_status: IndexingStatus | None
    last_status: IndexingStatus | None
    last_success: datetime | None
    latest_index_attempt: IndexAttemptSnapshot | None
    docs_indexed: int
    in_progress: bool


class DocsCountOperator(str, Enum):
    GREATER_THAN = ">"
    LESS_THAN = "<"
    EQUAL_TO = "="


class ConnectorIndexingStatusLite(BaseModel):
    cc_pair_id: int
    name: str
    source: DocumentSource
    access_type: AccessType
    cc_pair_status: ConnectorCredentialPairStatus
    in_progress: bool
    in_repeated_error_state: bool
    last_finished_status: IndexingStatus | None
    last_status: IndexingStatus | None
    last_success: datetime | None
    is_editable: bool
    docs_indexed: int
    latest_index_attempt_docs_indexed: int | None


class SourceSummary(BaseModel):
    total_connectors: int
    active_connectors: int
    public_connectors: int
    total_docs_indexed: int


class ConnectorIndexingStatusLiteResponse(BaseModel):
    source: DocumentSource
    summary: SourceSummary
    current_page: int
    total_pages: int
    indexing_statuses: Sequence[ConnectorIndexingStatusLite | FederatedConnectorStatus]


class ConnectorCredentialPairIdentifier(BaseModel):
    connector_id: int
    credential_id: int


class ConnectorCredentialPairMetadata(BaseModel):
    name: str
    access_type: AccessType
    auto_sync_options: dict[str, Any] | None = None
    groups: list[int] = Field(default_factory=list)
    processing_mode: ProcessingMode = ProcessingMode.REGULAR


class CCStatusUpdateRequest(BaseModel):
    status: ConnectorCredentialPairStatus


class ConnectorCredentialPairDescriptor(BaseModel):
    id: int
    name: str
    connector: ConnectorSnapshot
    credential: CredentialSnapshot
    access_type: AccessType


class CCPairSummary(BaseModel):
    """Simplified connector-credential pair information with just essential data"""

    id: int
    name: str
    source: DocumentSource
    access_type: AccessType

    @classmethod
    def from_cc_pair_descriptor(
        cls, descriptor: ConnectorCredentialPairDescriptor
    ) -> "CCPairSummary":
        return cls(
            id=descriptor.id,
            name=descriptor.name,
            source=descriptor.connector.source,
            access_type=descriptor.access_type,
        )


class RunConnectorRequest(BaseModel):
    connector_id: int
    credential_ids: list[int] | None = None
    from_beginning: bool = False


class ConnectorRequestSubmission(BaseModel):
    connector_name: str


class CCPropertyUpdateRequest(BaseModel):
    name: str
    value: str


"""Connectors Models"""


class GoogleAppWebCredentials(BaseModel):
    client_id: str
    project_id: str
    auth_uri: str
    token_uri: str
    auth_provider_x509_cert_url: str
    client_secret: str
    redirect_uris: list[str]
    javascript_origins: list[str]


class GoogleAppCredentials(BaseModel):
    web: GoogleAppWebCredentials


class GoogleServiceAccountKey(BaseModel):
    type: str
    project_id: str
    private_key_id: str
    private_key: str
    client_email: str
    client_id: str
    auth_uri: str
    token_uri: str
    auth_provider_x509_cert_url: str
    client_x509_cert_url: str
    universe_domain: str


class GoogleServiceAccountCredentialRequest(BaseModel):
    google_primary_admin: str | None = None  # email of user to impersonate


class FileUploadResponse(BaseModel):
    file_paths: list[str]
    file_names: list[str]
    zip_metadata_file_id: str | None  # File ID pointing to metadata in file store


class ConnectorFileInfo(BaseModel):
    file_id: str
    file_name: str
    file_size: int | None = None
    upload_date: str | None = None


class ConnectorFilesResponse(BaseModel):
    files: list[ConnectorFileInfo]


class ObjectCreationIdResponse(BaseModel):
    id: int
    credential: CredentialSnapshot | None = None


class AuthStatus(BaseModel):
    authenticated: bool


class AuthUrl(BaseModel):
    auth_url: str


class GmailCallback(BaseModel):
    state: str
    code: str


class GDriveCallback(BaseModel):
    state: str
    code: str


class IndexingStatusRequest(BaseModel):
    secondary_index: bool = False
    source: DocumentSource | None = None
    access_type_filters: list[AccessType] = Field(default_factory=list)
    last_status_filters: list[IndexingStatus] = Field(default_factory=list)
    docs_count_operator: DocsCountOperator | None = None
    docs_count_value: int | None = None
    name_filter: str | None = None
    source_to_page: dict[DocumentSource, int] = Field(default_factory=dict)
    get_all_connectors: bool = False


================================================
FILE: backend/onyx/server/documents/private_key_types.py
================================================
import base64
from enum import Enum
from typing import Protocol

from fastapi import HTTPException
from fastapi import UploadFile

from onyx.server.documents.document_utils import validate_pkcs12_content


class ProcessPrivateKeyFileProtocol(Protocol):
    def __call__(self, file: UploadFile) -> str:
        """
        Accepts a file-like object, validates the file (e.g., checks extension and content),
        and returns its contents as a base64-encoded string if valid.
        Raises an exception if validation fails.
        """
        ...


class PrivateKeyFileTypes(Enum):
    SHAREPOINT_PFX_FILE = "sharepoint_pfx_file"


def process_sharepoint_private_key_file(file: UploadFile) -> str:
    """
    Process and validate a private key file upload.

    Validates both the file extension and file content to ensure it's a valid PKCS#12 file.
    Content validation prevents attacks that rely on file extension spoofing.
    """
    # First check file extension (basic filter)
    if not (file.filename and file.filename.lower().endswith(".pfx")):
        raise HTTPException(
            status_code=400, detail="Invalid file type. Only .pfx files are supported."
        )

    # Read file content for validation and processing
    private_key_bytes = file.file.read()

    # Validate file content to prevent extension spoofing attacks
    if not validate_pkcs12_content(private_key_bytes):
        raise HTTPException(
            status_code=400,
            detail="Invalid file content. The uploaded file does not appear to be a valid PKCS#12 (.pfx) file.",
        )

    # Convert to base64 if validation passes
    pfx_64 = base64.b64encode(private_key_bytes).decode("ascii")
    return pfx_64


FILE_TYPE_TO_FILE_PROCESSOR: dict[
    PrivateKeyFileTypes, ProcessPrivateKeyFileProtocol
] = {
    PrivateKeyFileTypes.SHAREPOINT_PFX_FILE: process_sharepoint_private_key_file,
}


================================================
FILE: backend/onyx/server/documents/standard_oauth.py
================================================
import json
import uuid
from typing import Annotated
from typing import cast

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi import Request
from pydantic import BaseModel
from pydantic import ValidationError
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import OAuthConnector
from onyx.db.credentials import create_credential
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.models import CredentialBase
from onyx.utils.logger import setup_logger
from onyx.utils.subclasses import find_all_subclasses_in_package
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/connector/oauth")

_OAUTH_STATE_KEY_FMT = "oauth_state:{state}"
_OAUTH_STATE_EXPIRATION_SECONDS = 10 * 60  # 10 minutes
_DESIRED_RETURN_URL_KEY = "desired_return_url"
_ADDITIONAL_KWARGS_KEY = "additional_kwargs"

# Cache for OAuth connectors, populated at module load time
_OAUTH_CONNECTORS: dict[DocumentSource, type[OAuthConnector]] = {}


def _discover_oauth_connectors() -> dict[DocumentSource, type[OAuthConnector]]:
    """Walk through the connectors package to find all OAuthConnector implementations"""
    global _OAUTH_CONNECTORS
    if _OAUTH_CONNECTORS:  # Return cached connectors if already discovered
        return _OAUTH_CONNECTORS

    # Import submodules using package-based discovery to avoid sys.path mutations
    oauth_connectors = find_all_subclasses_in_package(
        cast(type[OAuthConnector], OAuthConnector), "onyx.connectors"
    )

    _OAUTH_CONNECTORS = {cls.oauth_id(): cls for cls in oauth_connectors}
    return _OAUTH_CONNECTORS


# Discover OAuth connectors at module load time
_discover_oauth_connectors()


def _get_additional_kwargs(
    request: Request, connector_cls: type[OAuthConnector], args_to_ignore: list[str]
) -> dict[str, str]:
    # get additional kwargs from request
    # e.g. anything except for desired_return_url
    additional_kwargs_dict = {
        k: v for k, v in request.query_params.items() if k not in args_to_ignore
    }
    try:
        # validate
        connector_cls.AdditionalOauthKwargs(**additional_kwargs_dict)
    except ValidationError:
        raise HTTPException(
            status_code=400,
            detail=(
                f"Invalid additional kwargs. Got {additional_kwargs_dict}, expected "
                f"{connector_cls.AdditionalOauthKwargs.model_json_schema()}"
            ),
        )

    return additional_kwargs_dict


class AuthorizeResponse(BaseModel):
    redirect_url: str


@router.get("/authorize/{source}")
def oauth_authorize(
    request: Request,
    source: DocumentSource,
    desired_return_url: Annotated[str | None, Query()] = None,
    _: User = Depends(current_user),
) -> AuthorizeResponse:
    """Initiates the OAuth flow by redirecting to the provider's auth page"""

    tenant_id = get_current_tenant_id()
    oauth_connectors = _discover_oauth_connectors()

    if source not in oauth_connectors:
        raise HTTPException(status_code=400, detail=f"Unknown OAuth source: {source}")

    connector_cls = oauth_connectors[source]
    base_url = WEB_DOMAIN

    # get additional kwargs from request
    # e.g. anything except for desired_return_url
    additional_kwargs = _get_additional_kwargs(
        request, connector_cls, ["desired_return_url"]
    )

    # store state in redis
    if not desired_return_url:
        desired_return_url = f"{base_url}/admin/connectors/{source}?step=0"
    redis_client = get_redis_client(tenant_id=tenant_id)
    state = str(uuid.uuid4())
    redis_client.set(
        _OAUTH_STATE_KEY_FMT.format(state=state),
        json.dumps(
            {
                _DESIRED_RETURN_URL_KEY: desired_return_url,
                _ADDITIONAL_KWARGS_KEY: additional_kwargs,
            }
        ),
        ex=_OAUTH_STATE_EXPIRATION_SECONDS,
    )

    return AuthorizeResponse(
        redirect_url=connector_cls.oauth_authorization_url(
            base_url, state, additional_kwargs
        )
    )


class CallbackResponse(BaseModel):
    redirect_url: str


@router.get("/callback/{source}")
def oauth_callback(
    source: DocumentSource,
    code: Annotated[str, Query()],
    state: Annotated[str, Query()],
    db_session: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> CallbackResponse:
    """Handles the OAuth callback and exchanges the code for tokens"""
    oauth_connectors = _discover_oauth_connectors()

    if source not in oauth_connectors:
        raise HTTPException(status_code=400, detail=f"Unknown OAuth source: {source}")

    connector_cls = oauth_connectors[source]

    # get state from redis
    redis_client = get_redis_client()
    oauth_state_bytes = cast(
        bytes, redis_client.get(_OAUTH_STATE_KEY_FMT.format(state=state))
    )
    if not oauth_state_bytes:
        raise HTTPException(status_code=400, detail="Invalid OAuth state")
    oauth_state = json.loads(oauth_state_bytes.decode("utf-8"))

    desired_return_url = cast(str, oauth_state[_DESIRED_RETURN_URL_KEY])
    additional_kwargs = cast(dict[str, str], oauth_state[_ADDITIONAL_KWARGS_KEY])

    base_url = WEB_DOMAIN
    token_info = connector_cls.oauth_code_to_token(base_url, code, additional_kwargs)

    # Create a new credential with the token info
    credential_data = CredentialBase(
        credential_json=token_info,
        admin_public=True,  # Or based on some logic/parameter
        source=source,
        name=f"{source.title()} OAuth Credential",
    )

    credential = create_credential(
        credential_data=credential_data,
        user=user,
        db_session=db_session,
    )

    # TODO: use a library for url handling
    sep = "&" if "?" in desired_return_url else "?"
    return CallbackResponse(
        redirect_url=f"{desired_return_url}{sep}credentialId={credential.id}"
    )


class OAuthAdditionalKwargDescription(BaseModel):
    name: str
    display_name: str
    description: str


class OAuthDetails(BaseModel):
    oauth_enabled: bool
    additional_kwargs: list[OAuthAdditionalKwargDescription]


@router.get("/details/{source}")
def oauth_details(
    source: DocumentSource,
    _: User = Depends(current_user),
) -> OAuthDetails:
    oauth_connectors = _discover_oauth_connectors()

    if source not in oauth_connectors:
        return OAuthDetails(
            oauth_enabled=False,
            additional_kwargs=[],
        )

    connector_cls = oauth_connectors[source]

    additional_kwarg_descriptions = []
    for key, value in connector_cls.AdditionalOauthKwargs.model_json_schema()[
        "properties"
    ].items():
        additional_kwarg_descriptions.append(
            OAuthAdditionalKwargDescription(
                name=key,
                display_name=value.get("title", key),
                description=value.get("description", ""),
            )
        )

    return OAuthDetails(
        oauth_enabled=True,
        additional_kwargs=additional_kwarg_descriptions,
    )


================================================
FILE: backend/onyx/server/evals/__init__.py
================================================


================================================
FILE: backend/onyx/server/evals/models.py
================================================
from pydantic import BaseModel


class EvalRunAck(BaseModel):
    """Response model for evaluation runs"""

    success: bool


================================================
FILE: backend/onyx/server/features/__init__.py
================================================


================================================
FILE: backend/onyx/server/features/build/.gitignore
================================================
sandbox/kubernetes/docker/templates/venv/**
sandbox/kubernetes/docker/demo_data/**


================================================
FILE: backend/onyx/server/features/build/AGENTS.template.md
================================================
# AGENTS.md

You are an AI agent powering **Onyx Craft**. You create interactive web applications, dashboards, and documents from company knowledge. You run in a secure sandbox with access to the user's knowledge sources. The knowledge sources you have are organization context like meeting notes, emails, slack messages, and other organizational data that you must use to answer your question.

{{USER_CONTEXT}}

## Configuration

- **LLM**: {{LLM_PROVIDER_NAME}} / {{LLM_MODEL_NAME}}
- **Next.js**: Running on port {{NEXTJS_PORT}} (already started — do NOT run `npm run dev`)
  {{DISABLED_TOOLS_SECTION}}

## Environment

Ephemeral VM with Python 3.11 and Node v22. Virtual environment at `.venv/` includes numpy, pandas, matplotlib, scipy.

Install packages: `pip install <pkg>` or `npm install <pkg>` (from `outputs/web`).

{{ORG_INFO_SECTION}}

## Skills

{{AVAILABLE_SKILLS_SECTION}}

Read the relevant SKILL.md before starting work that the skill covers.

## Recommended Task Approach Methodology

When presented with a task, you typically:

1. Analyze the request to understand what's being asked
2. Break down complex problems into manageable steps and sub-questions
3. Use appropriate tools and methods to address each step
4. Provide clear communication throughout the process
5. Deliver results in a helpful and organized manner

Follow this two-step pattern for most tasks:

### Step 1: Information Retrieval

1. **Search** knowledge sources using `find`, `grep`, or direct file reads. Start your search at the root of the `files/` directory
to get a general grasp of what subdirectories to further explore, especially when looking for a person. their name may be a proper noun
or strictly lowercase.
2. **Extract** relevant data from JSON documents
3. **Summarize** key findings before proceeding

**Tip**: Use `find`, `grep`, or `glob` to search files directly rather than navigating directories one at a time.

### Step 2: Output Generation

1. **Choose format**: Web app for interactive/visual, Markdown for reports, or direct response for quick answers
2. **Build** the output using retrieved information
3. **Verify** the output renders correctly and includes accurate data

## Behavior Guidelines

- **Accuracy**: Do not make any assumptions about the user. Any conclusions you reach must be supported by the provided data.

- **Completeness**: For any tasks requiring data from the knowledge sources, you should make sure to look at ALL sources that may be relevant to the user's questions and use that in your final response. Make sure you check Google Drive if applicable
  - **Explicitly state** which sources were checked and which had no relevant data
  - **Search ALL knowledge sources** for the person's name/email, not just the obvious ones when answering questions about a person's activites.

- **Task Management**: For any non-trivial task involving multiple steps, you should organize your work and track progress. This helps users understand what you're doing and ensures nothing is missed.

- **Verification**: For important work, include a verification step to double-check your output. This could involve testing functionality, reviewing for accuracy, or validating against requirements.

- Critical execution rule: If you say you're about to do something, actually do it in the same turn (run the tool call right after).

- Check off completed TODOs before reporting progress.

- Your main goal is to follow the USER's instructions at each message

- Don't mention tool names to the user; describe actions naturally.

## Knowledge Sources

The `files/` directory contains JSON documents from various knowledge sources. Here's what's available:

{{KNOWLEDGE_SOURCES_SECTION}}

### Document Format

Files are JSON with: `title`, `source`, `metadata`, `sections[{text, link}]`.

**Important**: The `files/` directory is read-only. Do NOT attempt to write to it.

## Outputs

All outputs go in the `outputs/` directory.

| Format       | Use For                                  |
| ------------ | ---------------------------------------- |
| **Web App**  | Interactive dashboards, data exploration |
| **Markdown** | Reports, analyses, documentation         |
| **Response** | Quick answers, lookups                   |

You can also generate other output formats if you think they more directly answer the user's question

### Web Apps

Use `outputs/web` with Next.js 16.1.1, React v19, Tailwind, Recharts, shadcn/ui.

<!-- **⚠️ Read `outputs/web/AGENTS.md` for webapp technical specs and styling rules. For all other output types, this is unneccessary. ** -->

### Markdown

Save to `outputs/markdown/*.md`. Use clear headings and tables.

## Questions to Ask

- Did you check all relevant sources that could be useful in addressing the user's question?
- Did you generate the correct output format that the user requested?
- Did you answer the user's question thoroughly?


================================================
FILE: backend/onyx/server/features/build/__init__.py
================================================
# Build feature module


================================================
FILE: backend/onyx/server/features/build/api/api.py
================================================
import re
from collections.abc import Iterator
from pathlib import Path
from uuid import UUID

import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from fastapi import Response
from fastapi.responses import RedirectResponse
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.auth.users import optional_user
from onyx.configs.constants import DocumentSource
from onyx.db.connector_credential_pair import get_connector_credential_pairs_for_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import ProcessingMode
from onyx.db.enums import SharingScope
from onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
from onyx.db.models import BuildSession
from onyx.db.models import User
from onyx.server.features.build.api.messages_api import router as messages_router
from onyx.server.features.build.api.models import BuildConnectorInfo
from onyx.server.features.build.api.models import BuildConnectorListResponse
from onyx.server.features.build.api.models import BuildConnectorStatus
from onyx.server.features.build.api.models import RateLimitResponse
from onyx.server.features.build.api.rate_limit import get_user_rate_limit_status
from onyx.server.features.build.api.sessions_api import router as sessions_router
from onyx.server.features.build.api.user_library import router as user_library_router
from onyx.server.features.build.db.sandbox import get_sandbox_by_user_id
from onyx.server.features.build.sandbox import get_sandbox_manager
from onyx.server.features.build.session.manager import SessionManager
from onyx.server.features.build.utils import is_onyx_craft_enabled
from onyx.utils.logger import setup_logger

logger = setup_logger()

_TEMPLATES_DIR = Path(__file__).parent / "templates"
_WEBAPP_HMR_FIXER_TEMPLATE = (_TEMPLATES_DIR / "webapp_hmr_fixer.js").read_text()


def require_onyx_craft_enabled(user: User = Depends(current_user)) -> User:
    """
    Dependency that checks if Onyx Craft is enabled for the user.
    Raises HTTP 403 if Onyx Craft is disabled via feature flag.
    """
    if not is_onyx_craft_enabled(user):
        raise HTTPException(
            status_code=403,
            detail="Onyx Craft is not available",
        )
    return user


router = APIRouter(prefix="/build", dependencies=[Depends(require_onyx_craft_enabled)])

# Include sub-routers for sessions, messages, and user library
router.include_router(sessions_router, tags=["build"])
router.include_router(messages_router, tags=["build"])
router.include_router(user_library_router, tags=["build"])


# -----------------------------------------------------------------------------
# Rate Limiting
# -----------------------------------------------------------------------------


@router.get("/limit", response_model=RateLimitResponse)
def get_rate_limit(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> RateLimitResponse:
    """Get rate limit information for the current user."""
    return get_user_rate_limit_status(user, db_session)


# -----------------------------------------------------------------------------
# Build Connectors
# -----------------------------------------------------------------------------


@router.get("/connectors", response_model=BuildConnectorListResponse)
def get_build_connectors(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> BuildConnectorListResponse:
    """Get all connectors for the build admin panel.

    Returns connector-credential pairs with simplified status information.
    On the build configure page, all users (including admins) only see connectors
    they own/created. Users can create new connectors if they don't have one of a type.
    """
    # Fetch both FILE_SYSTEM (standard connectors) and RAW_BINARY (User Library) connectors
    file_system_cc_pairs = get_connector_credential_pairs_for_user(
        db_session=db_session,
        user=user,
        get_editable=False,
        eager_load_connector=True,
        eager_load_credential=True,
        processing_mode=ProcessingMode.FILE_SYSTEM,
    )
    raw_binary_cc_pairs = get_connector_credential_pairs_for_user(
        db_session=db_session,
        user=user,
        get_editable=False,
        eager_load_connector=True,
        eager_load_credential=True,
        processing_mode=ProcessingMode.RAW_BINARY,
    )
    cc_pairs = file_system_cc_pairs + raw_binary_cc_pairs

    # Filter to only show connectors created by the current user
    # All users (including admins) must create their own connectors on the build configure page
    if user:
        cc_pairs = [cc_pair for cc_pair in cc_pairs if cc_pair.creator_id == user.id]

    connectors: list[BuildConnectorInfo] = []
    for cc_pair in cc_pairs:
        # Skip ingestion API connectors and default pairs
        if cc_pair.connector.source == DocumentSource.INGESTION_API:
            continue
        if cc_pair.name == "DefaultCCPair":
            continue

        # Determine status
        error_message: str | None = None
        has_ever_succeeded = cc_pair.last_successful_index_time is not None

        if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
            status = BuildConnectorStatus.DELETING
        elif cc_pair.status == ConnectorCredentialPairStatus.INVALID:
            # If connector has succeeded before but credentials are now invalid,
            # show as connected_with_errors so user can still disable demo data
            if has_ever_succeeded:
                status = BuildConnectorStatus.CONNECTED_WITH_ERRORS
                error_message = "Connector credentials are invalid"
            else:
                status = BuildConnectorStatus.ERROR
                error_message = "Connector credentials are invalid"
        else:
            # Check latest index attempt for errors
            latest_attempt = get_latest_index_attempt_for_cc_pair_id(
                db_session=db_session,
                connector_credential_pair_id=cc_pair.id,
                secondary_index=False,
                only_finished=True,
            )

            if latest_attempt and latest_attempt.status == IndexingStatus.FAILED:
                # If connector has succeeded before but latest attempt failed,
                # show as connected_with_errors
                if has_ever_succeeded:
                    status = BuildConnectorStatus.CONNECTED_WITH_ERRORS
                else:
                    status = BuildConnectorStatus.ERROR
                error_message = latest_attempt.error_msg
            elif (
                latest_attempt
                and latest_attempt.status == IndexingStatus.COMPLETED_WITH_ERRORS
            ):
                # Completed with errors - if it has succeeded before, show as connected_with_errors
                if has_ever_succeeded:
                    status = BuildConnectorStatus.CONNECTED_WITH_ERRORS
                else:
                    status = BuildConnectorStatus.ERROR
                error_message = "Indexing completed with errors"
            elif cc_pair.status == ConnectorCredentialPairStatus.PAUSED:
                status = BuildConnectorStatus.CONNECTED
            elif cc_pair.last_successful_index_time is None:
                # Never successfully indexed - check if currently indexing
                # First check cc_pair status for scheduled/initial indexing
                if cc_pair.status in (
                    ConnectorCredentialPairStatus.SCHEDULED,
                    ConnectorCredentialPairStatus.INITIAL_INDEXING,
                ):
                    status = BuildConnectorStatus.INDEXING
                else:
                    in_progress_attempt = get_latest_index_attempt_for_cc_pair_id(
                        db_session=db_session,
                        connector_credential_pair_id=cc_pair.id,
                        secondary_index=False,
                        only_finished=False,
                    )
                    if (
                        in_progress_attempt
                        and in_progress_attempt.status == IndexingStatus.IN_PROGRESS
                    ):
                        status = BuildConnectorStatus.INDEXING
                    elif (
                        in_progress_attempt
                        and in_progress_attempt.status == IndexingStatus.NOT_STARTED
                    ):
                        status = BuildConnectorStatus.INDEXING
                    else:
                        # Has a finished attempt but never succeeded - likely error
                        status = BuildConnectorStatus.ERROR
                        error_message = (
                            latest_attempt.error_msg
                            if latest_attempt
                            else "Initial indexing failed"
                        )
            else:
                status = BuildConnectorStatus.CONNECTED

        connectors.append(
            BuildConnectorInfo(
                cc_pair_id=cc_pair.id,
                connector_id=cc_pair.connector.id,
                credential_id=cc_pair.credential.id,
                source=cc_pair.connector.source.value,
                name=cc_pair.name or cc_pair.connector.name or "Unnamed",
                status=status,
                docs_indexed=0,  # Would need to query for this
                last_indexed=cc_pair.last_successful_index_time,
                error_message=error_message,
            )
        )

    return BuildConnectorListResponse(connectors=connectors)


# Headers to skip when proxying.
# Hop-by-hop headers must not be forwarded, and set-cookie is stripped to
# prevent LLM-generated apps from setting cookies on the parent Onyx domain.
EXCLUDED_HEADERS = {
    "content-encoding",
    "content-length",
    "transfer-encoding",
    "connection",
    "set-cookie",
}


def _stream_response(response: httpx.Response) -> Iterator[bytes]:
    """Stream the response content in chunks."""
    for chunk in response.iter_bytes(chunk_size=8192):
        yield chunk


def _inject_hmr_fixer(content: bytes, session_id: str) -> bytes:
    """Inject a script that stubs root-scoped Next HMR websocket connections."""
    base = f"/api/build/sessions/{session_id}/webapp"
    script = f"<script>{_WEBAPP_HMR_FIXER_TEMPLATE.replace('__WEBAPP_BASE__', base)}</script>"
    text = content.decode("utf-8")
    text = re.sub(
        r"(<head\b[^>]*>)",
        lambda m: m.group(0) + script,
        text,
        count=1,
        flags=re.IGNORECASE,
    )
    return text.encode("utf-8")


def _rewrite_asset_paths(content: bytes, session_id: str) -> bytes:
    """Rewrite Next.js asset paths to go through the proxy."""
    webapp_base_path = f"/api/build/sessions/{session_id}/webapp"
    escaped_webapp_base_path = webapp_base_path.replace("/", r"\/")
    hmr_paths = ("/_next/webpack-hmr", "/_next/hmr")

    text = content.decode("utf-8")
    # Anchor on delimiter so already-prefixed URLs (from assetPrefix) aren't double-rewritten.
    for delim in ('"', "'", "("):
        text = text.replace(f"{delim}/_next/", f"{delim}{webapp_base_path}/_next/")
        text = re.sub(
            rf"{re.escape(delim)}https?://[^/\"')]+/_next/",
            f"{delim}{webapp_base_path}/_next/",
            text,
        )
        text = re.sub(
            rf"{re.escape(delim)}wss?://[^/\"')]+/_next/",
            f"{delim}{webapp_base_path}/_next/",
            text,
        )
    text = text.replace(r"\/_next\/", rf"{escaped_webapp_base_path}\/_next\/")
    text = re.sub(
        r"https?:\\\/\\\/[^\"']+?\\\/_next\\\/",
        rf"{escaped_webapp_base_path}\/_next\/",
        text,
    )
    text = re.sub(
        r"wss?:\\\/\\\/[^\"']+?\\\/_next\\\/",
        rf"{escaped_webapp_base_path}\/_next\/",
        text,
    )
    for hmr_path in hmr_paths:
        escaped_hmr_path = hmr_path.replace("/", r"\/")
        text = text.replace(
            f"{webapp_base_path}{hmr_path}",
            hmr_path,
        )
        text = text.replace(
            f"{escaped_webapp_base_path}{escaped_hmr_path}",
            escaped_hmr_path,
        )
    text = re.sub(
        r'"(/(?:[a-zA-Z0-9_-]+/)*[a-zA-Z0-9_-]+\.json)"',
        f'"{webapp_base_path}\\1"',
        text,
    )
    text = re.sub(
        r"'(/(?:[a-zA-Z0-9_-]+/)*[a-zA-Z0-9_-]+\.json)'",
        f"'{webapp_base_path}\\1'",
        text,
    )
    text = text.replace('"/favicon.ico', f'"{webapp_base_path}/favicon.ico')
    return text.encode("utf-8")


def _rewrite_proxy_response_headers(
    headers: dict[str, str], session_id: str
) -> dict[str, str]:
    """Rewrite response headers that can leak root-scoped asset URLs."""
    link = headers.get("link")
    if link:
        webapp_base_path = f"/api/build/sessions/{session_id}/webapp"
        rewritten_link = re.sub(
            r"<https?://[^>]+/_next/",
            f"<{webapp_base_path}/_next/",
            link,
        )
        rewritten_link = rewritten_link.replace(
            "</_next/", f"<{webapp_base_path}/_next/"
        )
        headers["link"] = rewritten_link
    return headers


# Content types that may contain asset path references that need rewriting
REWRITABLE_CONTENT_TYPES = {
    "text/html",
    "text/css",
    "application/javascript",
    "text/javascript",
    "application/x-javascript",
}


def _get_sandbox_url(session_id: UUID, db_session: Session) -> str:
    """Get the internal URL for a session's Next.js server.

    Uses the sandbox manager to get the correct URL for both local and
    Kubernetes environments.

    Args:
        session_id: The build session ID
        db_session: Database session

    Returns:
        Internal URL to proxy requests to

    Raises:
        HTTPException: If session not found, port not allocated, or sandbox not found
    """

    session = db_session.get(BuildSession, session_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")
    if session.nextjs_port is None:
        raise HTTPException(status_code=503, detail="Session port not allocated")
    if session.user_id is None:
        raise HTTPException(status_code=404, detail="User not found")

    sandbox = get_sandbox_by_user_id(db_session, session.user_id)
    if sandbox is None:
        raise HTTPException(status_code=404, detail="Sandbox not found")

    sandbox_manager = get_sandbox_manager()
    return sandbox_manager.get_webapp_url(sandbox.id, session.nextjs_port)


def _proxy_request(
    path: str, request: Request, session_id: UUID, db_session: Session
) -> StreamingResponse | Response:
    """Proxy a request to the sandbox's Next.js server."""
    base_url = _get_sandbox_url(session_id, db_session)

    # Build the target URL
    target_url = f"{base_url}/{path.lstrip('/')}"

    # Include query params if present
    if request.query_params:
        target_url = f"{target_url}?{request.query_params}"

    logger.debug(f"Proxying request to: {target_url}")

    try:
        # Make the request to the target URL
        with httpx.Client(timeout=30.0, follow_redirects=True) as client:
            response = client.get(
                target_url,
                headers={
                    key: value
                    for key, value in request.headers.items()
                    if key.lower() not in ("host", "content-length")
                },
            )

            # Build response headers, excluding hop-by-hop headers
            response_headers = {
                key: value
                for key, value in response.headers.items()
                if key.lower() not in EXCLUDED_HEADERS
            }
            response_headers = _rewrite_proxy_response_headers(
                response_headers, str(session_id)
            )

            content_type = response.headers.get("content-type", "")

            # For HTML/CSS/JS responses, rewrite asset paths
            if any(ct in content_type for ct in REWRITABLE_CONTENT_TYPES):
                content = _rewrite_asset_paths(response.content, str(session_id))
                if "text/html" in content_type:
                    content = _inject_hmr_fixer(content, str(session_id))
                return Response(
                    content=content,
                    status_code=response.status_code,
                    headers=response_headers,
                    media_type=content_type,
                )

            return StreamingResponse(
                content=_stream_response(response),
                status_code=response.status_code,
                headers=response_headers,
                media_type=content_type or None,
            )

    except httpx.TimeoutException:
        logger.error(f"Timeout while proxying request to {target_url}")
        raise HTTPException(status_code=504, detail="Gateway timeout")
    except httpx.RequestError as e:
        logger.error(f"Error proxying request to {target_url}: {e}")
        raise HTTPException(status_code=502, detail="Bad gateway")


def _check_webapp_access(
    session_id: UUID, user: User | None, db_session: Session
) -> BuildSession:
    """Check if user can access a session's webapp.

    - public_global: accessible by anyone (no auth required)
    - public_org: accessible by any authenticated user
    - private: only accessible by the session owner
    """
    session = db_session.get(BuildSession, session_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")
    if session.sharing_scope == SharingScope.PUBLIC_GLOBAL:
        return session
    if user is None:
        raise HTTPException(status_code=401, detail="Authentication required")
    if session.sharing_scope == SharingScope.PRIVATE and session.user_id != user.id:
        raise HTTPException(status_code=404, detail="Session not found")
    return session


_OFFLINE_HTML_PATH = _TEMPLATES_DIR / "webapp_offline.html"


def _offline_html_response() -> Response:
    """Return a branded Craft HTML page when the sandbox is not reachable.

    Design mirrors the default Craft web template (outputs/web/app/page.tsx):
    terminal window aesthetic with Minecraft-themed typing animation.

    """
    html = _OFFLINE_HTML_PATH.read_text()
    return Response(content=html, status_code=503, media_type="text/html")


# Public router for webapp proxy — no authentication required
# (access controlled per-session via sharing_scope)
public_build_router = APIRouter(prefix="/build")


@public_build_router.get("/sessions/{session_id}/webapp", response_model=None)
@public_build_router.get(
    "/sessions/{session_id}/webapp/{path:path}", response_model=None
)
def get_webapp(
    session_id: UUID,
    request: Request,
    path: str = "",
    user: User | None = Depends(optional_user),
    db_session: Session = Depends(get_session),
) -> StreamingResponse | Response:
    """Proxy the webapp for a specific session (root and subpaths).

    Accessible without authentication when sharing_scope is public_global.
    Returns a friendly offline page when the sandbox is not running.
    """
    try:
        _check_webapp_access(session_id, user, db_session)
    except HTTPException as e:
        if e.status_code == 401:
            return RedirectResponse(url="/auth/login", status_code=302)
        raise
    try:
        return _proxy_request(path, request, session_id, db_session)
    except HTTPException as e:
        if e.status_code in (502, 503, 504):
            return _offline_html_response()
        raise


# =============================================================================
# Sandbox Management Endpoints
# =============================================================================


@router.post("/sandbox/reset", response_model=None)
def reset_sandbox(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """Reset the user's sandbox by terminating it and cleaning up all sessions.

    This endpoint terminates the user's shared sandbox container/pod and
    cleans up all session workspaces. Useful for "start fresh" functionality.

    After calling this endpoint, the next session creation will provision a
    new sandbox.
    """
    session_manager = SessionManager(db_session)

    try:
        success = session_manager.terminate_user_sandbox(user.id)
        if not success:
            raise HTTPException(
                status_code=404,
                detail="No sandbox found for user",
            )
        db_session.commit()
    except HTTPException:
        raise
    except Exception as e:
        db_session.rollback()
        logger.error(f"Failed to reset sandbox for user {user.id}: {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Failed to reset sandbox: {e}",
        )

    return Response(status_code=204)


================================================
FILE: backend/onyx/server/features/build/api/messages_api.py
================================================
"""API endpoints for Build Mode message management."""

from collections.abc import Generator
from uuid import UUID

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import User
from onyx.server.features.build.api.models import MessageListResponse
from onyx.server.features.build.api.models import MessageRequest
from onyx.server.features.build.api.models import MessageResponse
from onyx.server.features.build.db.sandbox import get_sandbox_by_user_id
from onyx.server.features.build.db.sandbox import update_sandbox_heartbeat
from onyx.server.features.build.session.manager import RateLimitError
from onyx.server.features.build.session.manager import SessionManager
from onyx.utils.logger import setup_logger

logger = setup_logger()


router = APIRouter()


def check_build_rate_limits(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    """
    Dependency to check build mode rate limits before processing the request.

    Raises HTTPException(429) if rate limit is exceeded.
    Follows the same pattern as chat's check_token_rate_limits.
    """
    session_manager = SessionManager(db_session)

    try:
        session_manager.check_rate_limit(user)
    except RateLimitError as e:
        raise HTTPException(
            status_code=429,
            detail=str(e),
        )


@router.get("/sessions/{session_id}/messages", tags=PUBLIC_API_TAGS)
def list_messages(
    session_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> MessageListResponse:
    """Get all messages for a build session."""
    session_manager = SessionManager(db_session)

    messages = session_manager.list_messages(session_id, user.id)

    if messages is None:
        raise HTTPException(status_code=404, detail="Session not found")

    return MessageListResponse(
        messages=[MessageResponse.from_model(msg) for msg in messages]
    )


@router.post("/sessions/{session_id}/send-message", tags=PUBLIC_API_TAGS)
def send_message(
    session_id: UUID,
    request: MessageRequest,
    user: User = Depends(current_user),
    _rate_limit_check: None = Depends(check_build_rate_limits),
) -> StreamingResponse:
    """
    Send a message to the CLI agent and stream the response.

    Enforces rate limiting before executing the agent (via dependency).
    Returns a Server-Sent Events (SSE) stream with the agent's response.

    Follows the same pattern as /chat/send-chat-message for consistency.
    """

    def stream_generator() -> Generator[str, None, None]:
        """Stream generator that manages its own database session.

        This is necessary because StreamingResponse consumes the generator
        AFTER the endpoint returns, at which point FastAPI's dependency-injected
        db_session has already been closed. By creating a new session inside
        the generator, we ensure the session remains open for the entire
        streaming duration.
        """
        # Capture user info needed for streaming (user object may not be available
        # after the endpoint returns due to dependency cleanup)
        user_id = user.id
        message_content = request.content

        with get_session_with_current_tenant() as db_session:
            # Update sandbox heartbeat - this is the only place we track activity
            # for determining when a sandbox should be put to sleep
            sandbox = get_sandbox_by_user_id(db_session, user.id)
            if sandbox and sandbox.status.is_active():
                update_sandbox_heartbeat(db_session, sandbox.id)

            session_manager = SessionManager(db_session)
            yield from session_manager.send_message(
                session_id, user_id, message_content
            )

    # Stream the CLI agent's response
    return StreamingResponse(
        stream_generator(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",  # Disable nginx buffering
        },
    )


================================================
FILE: backend/onyx/server/features/build/api/models.py
================================================
from datetime import datetime
from enum import Enum
from typing import Any
from typing import TYPE_CHECKING
from typing import Union

from pydantic import BaseModel

from onyx.configs.constants import MessageType
from onyx.db.enums import ArtifactType
from onyx.db.enums import BuildSessionStatus
from onyx.db.enums import SandboxStatus
from onyx.db.enums import SharingScope
from onyx.server.features.build.sandbox.models import (
    FilesystemEntry as FileSystemEntry,
)

if TYPE_CHECKING:
    from onyx.db.models import Sandbox
    from onyx.db.models import BuildSession


# ===== Session Models =====
class SessionCreateRequest(BaseModel):
    """Request to create a new build session."""

    name: str | None = None  # Optional session name
    demo_data_enabled: bool = True  # Whether to enable demo org_info data in sandbox
    user_work_area: str | None = None  # User's work area (e.g., "engineering")
    user_level: str | None = None  # User's level (e.g., "ic", "manager")
    # LLM selection from user's cookie
    llm_provider_type: str | None = None  # Provider type (e.g., "anthropic", "openai")
    llm_model_name: str | None = None  # Model name (e.g., "claude-opus-4-5")


class SessionUpdateRequest(BaseModel):
    """Request to update a build session.

    If name is None, the session name will be auto-generated using LLM.
    """

    name: str | None = None


class SessionNameGenerateResponse(BaseModel):
    """Response containing a generated session name."""

    name: str


class SandboxResponse(BaseModel):
    """Sandbox metadata in session response."""

    id: str
    status: SandboxStatus
    container_id: str | None
    created_at: datetime
    last_heartbeat: datetime | None

    @classmethod
    def from_model(cls, sandbox: Any) -> "SandboxResponse":
        """Convert Sandbox ORM model to response."""
        return cls(
            id=str(sandbox.id),
            status=sandbox.status,
            container_id=sandbox.container_id,
            created_at=sandbox.created_at,
            last_heartbeat=sandbox.last_heartbeat,
        )


class ArtifactResponse(BaseModel):
    """Artifact metadata in session response."""

    id: str
    session_id: str
    type: ArtifactType
    name: str
    path: str
    preview_url: str | None
    created_at: datetime
    updated_at: datetime

    @classmethod
    def from_model(cls, artifact: Any) -> "ArtifactResponse":
        """Convert Artifact ORM model to response."""
        return cls(
            id=str(artifact.id),
            session_id=str(artifact.session_id),
            type=artifact.type,
            name=artifact.name,
            path=artifact.path,
            preview_url=getattr(artifact, "preview_url", None),
            created_at=artifact.created_at,
            updated_at=artifact.updated_at,
        )


class SessionResponse(BaseModel):
    """Response containing session details."""

    id: str
    user_id: str | None
    name: str | None
    status: BuildSessionStatus
    created_at: datetime
    last_activity_at: datetime
    nextjs_port: int | None
    sandbox: SandboxResponse | None
    artifacts: list[ArtifactResponse]
    sharing_scope: SharingScope

    @classmethod
    def from_model(
        cls, session: "BuildSession", sandbox: Union["Sandbox", None] = None
    ) -> "SessionResponse":
        """Convert BuildSession ORM model to response.

        Args:
            session: BuildSession ORM model
            sandbox: Optional Sandbox ORM model. Since sandboxes are now user-owned
                     (not session-owned), the sandbox must be passed separately.
        """
        return cls(
            id=str(session.id),
            user_id=str(session.user_id) if session.user_id else None,
            name=session.name,
            status=session.status,
            created_at=session.created_at,
            last_activity_at=session.last_activity_at,
            nextjs_port=session.nextjs_port,
            sandbox=(SandboxResponse.from_model(sandbox) if sandbox else None),
            artifacts=[ArtifactResponse.from_model(a) for a in session.artifacts],
            sharing_scope=session.sharing_scope,
        )


class DetailedSessionResponse(SessionResponse):
    """Extended session response with sandbox state details.

    Used for single-session endpoints where we compute expensive fields
    like session_loaded_in_sandbox.
    """

    session_loaded_in_sandbox: bool

    @classmethod
    def from_session_response(
        cls,
        base: SessionResponse,
        session_loaded_in_sandbox: bool,
    ) -> "DetailedSessionResponse":
        return cls(
            **base.model_dump(),
            session_loaded_in_sandbox=session_loaded_in_sandbox,
        )


class SessionListResponse(BaseModel):
    """Response containing list of sessions."""

    sessions: list[SessionResponse]


class SetSessionSharingRequest(BaseModel):
    """Request to set the sharing scope of a session."""

    sharing_scope: SharingScope


class SetSessionSharingResponse(BaseModel):
    """Response after setting session sharing scope."""

    session_id: str
    sharing_scope: SharingScope


# ===== Message Models =====
class MessageRequest(BaseModel):
    """Request to send a message to the CLI agent."""

    content: str


class MessageResponse(BaseModel):
    """Response containing message details.

    All message data is stored in message_metadata as JSON (the raw ACP packet).
    The turn_index groups all assistant responses under the user prompt they respond to.

    Packet types in message_metadata:
    - user_message: {type: "user_message", content: {...}}
    - agent_message: {type: "agent_message", content: {...}}
    - agent_thought: {type: "agent_thought", content: {...}}
    - tool_call_progress: {type: "tool_call_progress", status: "completed", ...}
    - agent_plan_update: {type: "agent_plan_update", entries: [...]}
    """

    id: str
    session_id: str
    turn_index: int
    type: MessageType
    message_metadata: dict[str, Any]
    created_at: datetime

    @classmethod
    def from_model(cls, message: Any) -> "MessageResponse":
        """Convert BuildMessage ORM model to response."""
        return cls(
            id=str(message.id),
            session_id=str(message.session_id),
            turn_index=message.turn_index,
            type=message.type,
            message_metadata=message.message_metadata,
            created_at=message.created_at,
        )


class MessageListResponse(BaseModel):
    """Response containing list of messages."""

    messages: list[MessageResponse]


# ===== Legacy Models (for compatibility with other code) =====
class CreateSessionRequest(BaseModel):
    task: str
    available_sources: list[str] | None = None


class CreateSessionResponse(BaseModel):
    session_id: str


class ExecuteRequest(BaseModel):
    task: str
    context: str | None = None


class ArtifactInfo(BaseModel):
    artifact_type: str  # "webapp", "file", "markdown", "image"
    path: str
    filename: str
    mime_type: str | None = None


class SessionStatus(BaseModel):
    session_id: str
    status: str  # "idle", "running", "completed", "failed"
    webapp_url: str | None = None


class DirectoryListing(BaseModel):
    path: str  # Current directory path
    entries: list[FileSystemEntry]  # Contents


class WebappInfo(BaseModel):
    has_webapp: bool  # Whether a webapp exists in outputs/web
    webapp_url: str | None  # URL to access the webapp (e.g., http://localhost:3015)
    status: str  # Sandbox status (running, terminated, etc.)
    ready: bool  # Whether the NextJS dev server is actually responding
    sharing_scope: SharingScope


# ===== File Upload Models =====
class UploadResponse(BaseModel):
    """Response after successful file upload."""

    filename: str  # Sanitized filename
    path: str  # Relative path in sandbox (e.g., "attachments/doc.pdf")
    size_bytes: int  # File size in bytes


# ===== Rate Limit Models =====
class RateLimitResponse(BaseModel):
    """Rate limit information."""

    is_limited: bool
    limit_type: str  # "weekly" or "total"
    messages_used: int
    limit: int
    reset_timestamp: str | None = None


# ===== Pre-Provisioned Session Check Models =====
class PreProvisionedCheckResponse(BaseModel):
    """Response for checking if a pre-provisioned session is still valid (empty)."""

    valid: bool  # True if session exists and has no messages
    session_id: str | None = None  # Session ID if valid, None otherwise


# ===== Build Connector Models =====
class BuildConnectorStatus(str, Enum):
    """Status of a build connector."""

    NOT_CONNECTED = "not_connected"
    CONNECTED = "connected"
    CONNECTED_WITH_ERRORS = "connected_with_errors"
    INDEXING = "indexing"
    ERROR = "error"
    DELETING = "deleting"


class BuildConnectorInfo(BaseModel):
    """Simplified connector info for build admin panel."""

    cc_pair_id: int
    connector_id: int
    credential_id: int
    source: str
    name: str
    status: BuildConnectorStatus
    docs_indexed: int
    last_indexed: datetime | None
    error_message: str | None = None


class BuildConnectorListResponse(BaseModel):
    """List of build connectors."""

    connectors: list[BuildConnectorInfo]


# ===== Suggestion Bubble Models =====
class SuggestionTheme(str, Enum):
    """Theme/category of a follow-up suggestion."""

    ADD = "add"
    QUESTION = "question"


class SuggestionBubble(BaseModel):
    """A single follow-up suggestion bubble."""

    theme: SuggestionTheme
    text: str


class GenerateSuggestionsRequest(BaseModel):
    """Request to generate follow-up suggestions."""

    user_message: str  # First user message
    assistant_message: str  # First assistant text response (accumulated)


class GenerateSuggestionsResponse(BaseModel):
    """Response containing generated suggestions."""

    suggestions: list[SuggestionBubble]


class PptxPreviewResponse(BaseModel):
    """Response with PPTX slide preview metadata."""

    slide_count: int
    slide_paths: list[str]  # Relative paths to slide JPEGs within session workspace
    cached: bool  # Whether result was served from cache


================================================
FILE: backend/onyx/server/features/build/api/packet_logger.py
================================================
"""Comprehensive packet and ACP event logger for build mode debugging.

Logs all packets, JSON-RPC messages, and ACP events during build mode streaming.
Provides detailed tracing for the entire agent loop and communication flow.

Log output locations (in priority order):
1. /var/log/onyx/packets.log (for Docker - mounted to host via docker-compose volumes)
2. backend/log/packets.log (for local dev without Docker)
3. backend/onyx/server/features/build/packets.log (fallback)

Enable logging by setting LOG_LEVEL=DEBUG or BUILD_PACKET_LOGGING=true.

Features:
- Rotating log with max 5000 lines (configurable via BUILD_PACKET_LOG_MAX_LINES)
- Automatically trims oldest entries when limit is exceeded
- Visual separators between message streams for easy reading
"""

import json
import logging
import os
import threading
import time
from pathlib import Path
from typing import Any
from uuid import UUID

# Default max lines to keep in the log file (acts like a deque)
DEFAULT_MAX_LOG_LINES = 5000


class PacketLogger:
    """Comprehensive logger for ACP/OpenCode communication and packet streaming.

    Logs:
    - All JSON-RPC requests sent to the agent
    - All JSON-RPC responses/notifications received from the agent
    - All ACP events emitted during streaming
    - Session and sandbox lifecycle events
    - Timing information for debugging performance

    The log file is kept to a maximum number of lines (default 5000) to prevent
    unbounded growth. When the limit is exceeded, the oldest lines are trimmed.
    """

    _instance: "PacketLogger | None" = None
    _initialized: bool

    def __new__(cls) -> "PacketLogger":
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance._initialized = False
        return cls._instance

    def __init__(self) -> None:
        if self._initialized:
            return

        self._initialized = True
        # Enable via LOG_LEVEL=DEBUG or BUILD_PACKET_LOGGING=true
        log_level = os.getenv("LOG_LEVEL", "").upper()
        packet_logging = os.getenv("BUILD_PACKET_LOGGING", "").lower()
        self._enabled = log_level == "DEBUG" or packet_logging in ("true", "1", "yes")
        self._logger: logging.Logger | None = None
        self._log_file_path: Path | None = None
        self._session_start_times: dict[str, float] = {}

        # Max lines to keep in log file
        try:
            self._max_lines = int(
                os.getenv("BUILD_PACKET_LOG_MAX_LINES", str(DEFAULT_MAX_LOG_LINES))
            )
        except ValueError:
            self._max_lines = DEFAULT_MAX_LOG_LINES

        # Lock for thread-safe file operations
        self._file_lock = threading.Lock()

        # Track approximate line count to avoid reading file too often
        self._approx_line_count = 0
        self._lines_since_last_trim = 0
        # Trim every N lines written to avoid constant file reads
        self._trim_interval = 500

        if self._enabled:
            self._setup_logger()

    def _get_log_file_path(self) -> Path:
        """Determine the best log file path based on environment.

        Priority:
        1. /var/log/onyx/packets.log - Docker environment (mounted to host)
        2. backend/log/packets.log - Local dev (same dir as other logs)
        3. backend/onyx/server/features/build/packets.log - Fallback
        """
        # Option 1: Docker environment - use /var/log/onyx which is mounted
        docker_log_dir = Path("/var/log/onyx")
        if docker_log_dir.exists() and docker_log_dir.is_dir():
            return docker_log_dir / "packets.log"

        # Option 2: Local dev - use backend/log directory (same as other debug logs)
        # Navigate from this file to backend/log
        backend_dir = Path(__file__).parents[4]  # up to backend/
        local_log_dir = backend_dir / "log"
        if local_log_dir.exists() and local_log_dir.is_dir():
            return local_log_dir / "packets.log"

        # Option 3: Fallback to build directory
        build_dir = Path(__file__).parents[1]
        return build_dir / "packets.log"

    def _setup_logger(self) -> None:
        """Set up the file handler for packet logging."""
        self._log_file_path = self._get_log_file_path()

        # Ensure parent directory exists
        self._log_file_path.parent.mkdir(parents=True, exist_ok=True)

        self._logger = logging.getLogger("build.packets")
        self._logger.setLevel(logging.DEBUG)
        self._logger.propagate = False

        self._logger.handlers.clear()

        # Use append mode
        handler = logging.FileHandler(self._log_file_path, mode="a", encoding="utf-8")
        handler.setLevel(logging.DEBUG)
        # Include timestamp in each log entry
        handler.setFormatter(
            logging.Formatter(
                "%(asctime)s.%(msecs)03d | %(message)s", "%Y-%m-%d %H:%M:%S"
            )
        )

        self._logger.addHandler(handler)

        # Initialize line count from existing file
        self._init_line_count()

    def _init_line_count(self) -> None:
        """Initialize the approximate line count from the existing log file."""
        if not self._log_file_path or not self._log_file_path.exists():
            self._approx_line_count = 0
            return

        try:
            with open(self._log_file_path, "r", encoding="utf-8", errors="ignore") as f:
                self._approx_line_count = sum(1 for _ in f)
        except Exception:
            self._approx_line_count = 0

    def _maybe_trim_log(self) -> None:
        """Trim the log file if it exceeds the max line limit.

        This is called periodically (every _trim_interval lines) to avoid
        reading the file on every write.
        """
        self._lines_since_last_trim += 1

        if self._lines_since_last_trim < self._trim_interval:
            return

        self._lines_since_last_trim = 0
        self._trim_log_file()

    def _trim_log_file(self) -> None:
        """Trim the log file to keep only the last max_lines."""
        if not self._log_file_path or not self._log_file_path.exists():
            return

        with self._file_lock:
            try:
                # Read all lines
                with open(
                    self._log_file_path, "r", encoding="utf-8", errors="ignore"
                ) as f:
                    lines = f.readlines()

                current_count = len(lines)
                self._approx_line_count = current_count

                # If under limit, nothing to do
                if current_count <= self._max_lines:
                    return

                # Keep only the last max_lines
                lines_to_keep = lines[-self._max_lines :]

                # Close the logger's file handler temporarily
                if self._logger:
                    for handler in self._logger.handlers:
                        handler.close()

                # Rewrite the file with trimmed content
                with open(self._log_file_path, "w", encoding="utf-8") as f:
                    f.writelines(lines_to_keep)

                # Reopen the handler
                if self._logger:
                    self._logger.handlers.clear()
                    handler = logging.FileHandler(
                        self._log_file_path, mode="a", encoding="utf-8"
                    )
                    handler.setLevel(logging.DEBUG)
                    handler.setFormatter(
                        logging.Formatter(
                            "%(asctime)s.%(msecs)03d | %(message)s", "%Y-%m-%d %H:%M:%S"
                        )
                    )
                    self._logger.addHandler(handler)

                self._approx_line_count = len(lines_to_keep)

            except Exception:
                pass  # Silently ignore errors during trim

    def clear_log_file(self) -> None:
        """Clear the log file contents.

        Note: With the rotating log approach, this is optional. The log will
        automatically trim itself. But this can still be useful to start fresh.
        """
        if not self._enabled or not self._log_file_path:
            return

        with self._file_lock:
            try:
                # Close the logger's file handler temporarily
                if self._logger:
                    for handler in self._logger.handlers:
                        handler.close()

                # Truncate the file
                with open(self._log_file_path, "w", encoding="utf-8") as f:
                    f.write("")  # Empty the file

                # Reopen the handler
                if self._logger:
                    self._logger.handlers.clear()
                    handler = logging.FileHandler(
                        self._log_file_path, mode="a", encoding="utf-8"
                    )
                    handler.setLevel(logging.DEBUG)
                    handler.setFormatter(
                        logging.Formatter(
                            "%(asctime)s.%(msecs)03d | %(message)s", "%Y-%m-%d %H:%M:%S"
                        )
                    )
                    self._logger.addHandler(handler)

                self._approx_line_count = 0
                self._lines_since_last_trim = 0

            except Exception:
                pass  # Silently ignore errors

    @property
    def is_enabled(self) -> bool:
        """Check if logging is enabled."""
        return self._enabled and self._logger is not None

    def _format_uuid(self, value: Any) -> str:
        """Format UUID for logging (shortened for readability)."""
        if isinstance(value, UUID):
            return str(value)[:8]
        if isinstance(value, str) and len(value) >= 8:
            return value[:8]
        return str(value)

    def _write_log(self, message: str) -> None:
        """Internal method to write a log message and trigger trim check.

        Args:
            message: The formatted log message
        """
        if not self._logger:
            return

        self._logger.debug(message)
        self._maybe_trim_log()

    def log(self, packet_type: str, payload: dict[str, Any] | None = None) -> None:
        """Log a packet as JSON.

        Args:
            packet_type: The type of packet
            payload: The packet payload
        """
        if not self._enabled or not self._logger:
            return

        try:
            output = json.dumps(payload, indent=2, default=str) if payload else "{}"
            self._write_log(f"[PACKET] {packet_type}\n{output}")
        except Exception:
            self._write_log(f"[PACKET] {packet_type}\n{payload}")

    def log_raw(self, label: str, data: Any) -> None:
        """Log raw data with a label.

        Args:
            label: A label for this log entry
            data: Any data to log
        """
        if not self._enabled or not self._logger:
            return

        try:
            if isinstance(data, (dict, list)):
                output = json.dumps(data, indent=2, default=str)
            else:
                output = str(data)
            self._write_log(f"[RAW] {label}\n{output}")
        except Exception:
            self._write_log(f"[RAW] {label}\n{data}")

    # =========================================================================
    # JSON-RPC Communication Logging
    # =========================================================================

    def log_jsonrpc_request(
        self,
        method: str,
        request_id: int | None,
        params: dict[str, Any] | None = None,
        context: str = "",
    ) -> None:
        """Log a JSON-RPC request being sent to the agent.

        Args:
            method: The JSON-RPC method name
            request_id: The request ID (None for notifications)
            params: The request parameters
            context: Additional context (e.g., "local", "k8s")
        """
        if not self._enabled or not self._logger:
            return

        try:
            req_type = "REQUEST" if request_id is not None else "NOTIFICATION"
            ctx_prefix = f"[{context}] " if context else ""
            params_str = json.dumps(params, indent=2, default=str) if params else "{}"
            id_str = f" id={request_id}" if request_id is not None else ""
            self._write_log(
                f"{ctx_prefix}[JSONRPC-OUT] {req_type} {method}{id_str}\n{params_str}"
            )
        except Exception as e:
            self._write_log(f"[JSONRPC-OUT] {method} (logging error: {e})")

    def log_jsonrpc_response(
        self,
        request_id: int | None,
        result: dict[str, Any] | None = None,
        error: dict[str, Any] | None = None,
        context: str = "",
    ) -> None:
        """Log a JSON-RPC response received from the agent.

        Args:
            request_id: The request ID this is responding to
            result: The result payload (if success)
            error: The error payload (if error)
            context: Additional context (e.g., "local", "k8s")
        """
        if not self._enabled or not self._logger:
            return

        try:
            ctx_prefix = f"[{context}] " if context else ""
            id_str = f" id={request_id}" if request_id is not None else ""
            if error:
                error_str = json.dumps(error, indent=2, default=str)
                self._write_log(
                    f"{ctx_prefix}[JSONRPC-IN] RESPONSE{id_str} ERROR\n{error_str}"
                )
            else:
                result_str = (
                    json.dumps(result, indent=2, default=str) if result else "{}"
                )
                self._write_log(
                    f"{ctx_prefix}[JSONRPC-IN] RESPONSE{id_str}\n{result_str}"
                )
        except Exception as e:
            self._write_log(f"[JSONRPC-IN] RESPONSE (logging error: {e})")

    def log_jsonrpc_notification(
        self,
        method: str,
        params: dict[str, Any] | None = None,
        context: str = "",
    ) -> None:
        """Log a JSON-RPC notification received from the agent.

        Args:
            method: The notification method name
            params: The notification parameters
            context: Additional context (e.g., "local", "k8s")
        """
        if not self._enabled or not self._logger:
            return

        try:
            ctx_prefix = f"[{context}] " if context else ""
            params_str = json.dumps(params, indent=2, default=str) if params else "{}"
            self._write_log(
                f"{ctx_prefix}[JSONRPC-IN] NOTIFICATION {method}\n{params_str}"
            )
        except Exception as e:
            self._write_log(f"[JSONRPC-IN] NOTIFICATION {method} (logging error: {e})")

    def log_jsonrpc_raw_message(
        self,
        direction: str,
        message: dict[str, Any] | str,
        context: str = "",
    ) -> None:
        """Log a raw JSON-RPC message (for debugging parsing issues).

        Args:
            direction: "IN" or "OUT"
            message: The raw message (dict or string)
            context: Additional context
        """
        if not self._enabled or not self._logger:
            return

        try:
            ctx_prefix = f"[{context}] " if context else ""
            if isinstance(message, dict):
                msg_str = json.dumps(message, indent=2, default=str)
            else:
                msg_str = str(message)
            self._write_log(f"{ctx_prefix}[JSONRPC-RAW-{direction}]\n{msg_str}")
        except Exception as e:
            self._write_log(f"[JSONRPC-RAW-{direction}] (logging error: {e})")

    # =========================================================================
    # ACP Event Logging
    # =========================================================================

    def log_acp_event(
        self,
        event_type: str,
        event_data: dict[str, Any],
        sandbox_id: UUID | str | None = None,
        session_id: UUID | str | None = None,
    ) -> None:
        """Log an ACP event being emitted.

        Args:
            event_type: The ACP event type (e.g., "agent_message_chunk")
            event_data: The full event data
            sandbox_id: The sandbox ID (optional, for context)
            session_id: The session ID (optional, for context)
        """
        if not self._enabled or not self._logger:
            return

        try:
            ctx_parts = []
            if sandbox_id:
                ctx_parts.append(f"sandbox={self._format_uuid(sandbox_id)}")
            if session_id:
                ctx_parts.append(f"session={self._format_uuid(session_id)}")
            ctx = f" ({', '.join(ctx_parts)})" if ctx_parts else ""

            # For message chunks, show truncated content for readability
            display_data = event_data.copy()
            if event_type in ("agent_message_chunk", "agent_thought_chunk"):
                content = display_data.get("content", {})
                if isinstance(content, dict) and "text" in content:
                    text = content.get("text", "")
                    if len(text) > 200:
                        display_data["content"] = {
                            **content,
                            "text": text[:200] + f"... ({len(text)} chars total)",
                        }

            event_str = json.dumps(display_data, indent=2, default=str)
            self._write_log(f"[ACP-EVENT] {event_type}{ctx}\n{event_str}")
        except Exception as e:
            self._write_log(f"[ACP-EVENT] {event_type} (logging error: {e})")

    def log_acp_event_yielded(
        self,
        event_type: str,
        event_obj: Any,
        sandbox_id: UUID | str | None = None,
        session_id: UUID | str | None = None,
    ) -> None:
        """Log an ACP event object being yielded from the generator.

        Args:
            event_type: The ACP event type
            event_obj: The Pydantic event object
            sandbox_id: The sandbox ID (optional)
            session_id: The session ID (optional)
        """
        if not self._enabled or not self._logger:
            return

        try:
            if hasattr(event_obj, "model_dump"):
                event_data = event_obj.model_dump(mode="json", by_alias=True)
            else:
                event_data = {"raw": str(event_obj)}
            self.log_acp_event(event_type, event_data, sandbox_id, session_id)
        except Exception as e:
            self._write_log(f"[ACP-EVENT] {event_type} (logging error: {e})")

    # =========================================================================
    # Session and Sandbox Lifecycle Logging
    # =========================================================================

    def log_session_start(
        self,
        session_id: UUID | str,
        sandbox_id: UUID | str,
        message_preview: str = "",
    ) -> None:
        """Log the start of a message streaming session.

        Args:
            session_id: The session ID
            sandbox_id: The sandbox ID
            message_preview: First 100 chars of the user message
        """
        if not self._enabled or not self._logger:
            return

        session_key = str(session_id)
        self._session_start_times[session_key] = time.time()

        preview = (
            message_preview[:100] + "..."
            if len(message_preview) > 100
            else message_preview
        )
        self._write_log(
            f"[SESSION-START] session={self._format_uuid(session_id)} "
            f"sandbox={self._format_uuid(sandbox_id)}\n"
            f"  message: {preview}"
        )

    def log_session_end(
        self,
        session_id: UUID | str,
        success: bool = True,
        error: str | None = None,
        events_count: int = 0,
    ) -> None:
        """Log the end of a message streaming session.

        Args:
            session_id: The session ID
            success: Whether the session completed successfully
            error: Error message if failed
            events_count: Number of events emitted
        """
        if not self._enabled or not self._logger:
            return

        session_key = str(session_id)
        start_time = self._session_start_times.pop(session_key, None)
        duration_ms = (time.time() - start_time) * 1000 if start_time else 0

        status = "SUCCESS" if success else "FAILED"
        error_str = f"\n  error: {error}" if error else ""
        self._write_log(
            f"[SESSION-END] session={self._format_uuid(session_id)} "
            f"status={status} duration={duration_ms:.0f}ms events={events_count}"
            f"{error_str}"
        )

    def log_acp_client_start(
        self,
        sandbox_id: UUID | str,
        session_id: UUID | str,
        cwd: str,
        context: str = "",
    ) -> None:
        """Log ACP client initialization.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            cwd: Working directory
            context: "local" or "k8s"
        """
        if not self._enabled or not self._logger:
            return

        ctx_prefix = f"[{context}] " if context else ""
        self._write_log(
            f"{ctx_prefix}[ACP-CLIENT-START] "
            f"sandbox={self._format_uuid(sandbox_id)} "
            f"session={self._format_uuid(session_id)}\n"
            f"  cwd: {cwd}"
        )

    def log_acp_client_stop(
        self,
        sandbox_id: UUID | str,
        session_id: UUID | str,
        context: str = "",
    ) -> None:
        """Log ACP client shutdown.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            context: "local" or "k8s"
        """
        if not self._enabled or not self._logger:
            return

        ctx_prefix = f"[{context}] " if context else ""
        self._write_log(
            f"{ctx_prefix}[ACP-CLIENT-STOP] sandbox={self._format_uuid(sandbox_id)} session={self._format_uuid(session_id)}"
        )

    # =========================================================================
    # Streaming State Logging
    # =========================================================================

    def log_streaming_state_update(
        self,
        session_id: UUID | str,
        state_type: str,
        details: dict[str, Any] | None = None,
    ) -> None:
        """Log streaming state changes.

        Args:
            session_id: The session ID
            state_type: Type of state change (e.g., "chunk_accumulated", "saved_to_db")
            details: Additional details
        """
        if not self._enabled or not self._logger:
            return

        try:
            details_str = ""
            if details:
                details_str = "\n" + json.dumps(details, indent=2, default=str)
            self._write_log(
                f"[STREAMING-STATE] session={self._format_uuid(session_id)} type={state_type}{details_str}"
            )
        except Exception as e:
            self._write_log(f"[STREAMING-STATE] {state_type} (logging error: {e})")

    def log_sse_emit(
        self,
        event_type: str,
        session_id: UUID | str | None = None,
    ) -> None:
        """Log SSE event being emitted to frontend.

        Args:
            event_type: The event type being emitted
            session_id: The session ID
        """
        if not self._enabled or not self._logger:
            return

        session_str = f" session={self._format_uuid(session_id)}" if session_id else ""
        self._write_log(f"[SSE-EMIT] {event_type}{session_str}")


# Singleton instance
_packet_logger: PacketLogger | None = None


def get_packet_logger() -> PacketLogger:
    """Get the singleton packet logger instance."""
    global _packet_logger
    if _packet_logger is None:
        _packet_logger = PacketLogger()
    return _packet_logger


def log_separator(label: str = "") -> None:
    """Log a visual separator for readability in the log file.

    Args:
        label: Optional label for the separator
    """
    logger = get_packet_logger()
    if not logger.is_enabled or not logger._logger:
        return

    separator = "=" * 80
    if label:
        logger._write_log(f"\n{separator}\n{label}\n{separator}")
    else:
        logger._write_log(f"\n{separator}")


================================================
FILE: backend/onyx/server/features/build/api/packets.py
================================================
"""Build Mode packet types for streaming agent responses.

This module defines CUSTOM Onyx packet types that extend ACP (Agent Client Protocol).
ACP events are passed through directly from the agent - this module only contains
Onyx-specific extensions like artifacts and file operations.

All packets use SSE (Server-Sent Events) format with `event: message` and include
a `type` field to distinguish packet types.

ACP events (passed through directly from acp.schema):
- agent_message_chunk: Text/image content from agent
- agent_thought_chunk: Agent's internal reasoning
- tool_call_start: Tool invocation started
- tool_call_progress: Tool execution progress/result
- agent_plan_update: Agent's execution plan
- current_mode_update: Agent mode change
- prompt_response: Agent finished processing
- error: An error occurred

Custom Onyx packets (defined here):
- error: Onyx-specific errors (e.g., session not found)

Based on:
- Agent Client Protocol (ACP): https://agentclientprotocol.com
"""

from datetime import datetime
from datetime import timezone
from typing import Any
from typing import Literal

from pydantic import BaseModel
from pydantic import Field


# =============================================================================
# Base Packet Type
# =============================================================================


class BasePacket(BaseModel):
    """Base packet with common fields for all custom Onyx packet types."""

    type: str
    timestamp: str = Field(
        default_factory=lambda: datetime.now(tz=timezone.utc).isoformat()
    )


# =============================================================================
# Custom Onyx Packets
# =============================================================================


class ErrorPacket(BasePacket):
    """An Onyx-specific error occurred (e.g., session not found, sandbox not running)."""

    type: Literal["error"] = "error"
    message: str
    code: int | None = None
    details: dict[str, Any] | None = None


# =============================================================================
# Union Type for Custom Onyx Packets
# =============================================================================

BuildPacket = ErrorPacket


================================================
FILE: backend/onyx/server/features/build/api/rate_limit.py
================================================
"""Rate limiting logic for Build Mode."""

from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Literal

from sqlalchemy.orm import Session

from onyx.db.models import User
from onyx.feature_flags.factory import get_default_feature_flag_provider
from onyx.server.features.build.api.models import RateLimitResponse
from onyx.server.features.build.api.subscription_check import is_user_subscribed
from onyx.server.features.build.configs import CRAFT_PAID_USER_RATE_LIMIT
from onyx.server.features.build.db.rate_limit import count_user_messages_in_window
from onyx.server.features.build.db.rate_limit import count_user_messages_total
from onyx.server.features.build.db.rate_limit import get_oldest_message_timestamp
from onyx.server.features.build.utils import CRAFT_HAS_USAGE_LIMITS
from shared_configs.configs import MULTI_TENANT

# Default limit for free/non-subscribed users (not configurable)
FREE_USER_RATE_LIMIT = 5


def _should_skip_rate_limiting(user: User) -> bool:
    """
    Check if rate limiting should be skipped for this user.

    Currently grants unlimited usage to dev tenant users (tenant_dev).
    Controlled via PostHog feature flag.

    Returns:
        True to skip rate limiting (unlimited), False to apply normal limits
    """
    # NOTE: We can modify the posthog flag to return more detail about a limit
    # i.e. can set variable limits per user and tenant via PostHog instead of env vars
    # to avoid re-deploying on every limit change

    feature_flag_provider = get_default_feature_flag_provider()
    # Flag returns True for users who SHOULD be rate limited
    # We negate to get: True = skip rate limiting
    has_rate_limit = feature_flag_provider.feature_enabled(
        CRAFT_HAS_USAGE_LIMITS,
        user.id,
    )
    return not has_rate_limit


def get_user_rate_limit_status(
    user: User,
    db_session: Session,
) -> RateLimitResponse:
    """
    Get the rate limit status for a user.

    Rate limits:
        - Cloud (MULTI_TENANT=true):
            - Subscribed users: CRAFT_PAID_USER_RATE_LIMIT messages per week
              (configurable, default 25)
            - Non-subscribed users: 5 messages (lifetime total)
            - Per-user overrides via PostHog feature flag
        - Self-hosted (MULTI_TENANT=false):
            - Unlimited (no rate limiting)

    Args:
        user: The authenticated user
        db_session: Database session

    Returns:
        RateLimitResponse with current limit status
    """
    # Self-hosted deployments have no rate limits
    if not MULTI_TENANT:
        return RateLimitResponse(
            is_limited=False,
            limit_type="weekly",
            messages_used=0,
            limit=0,  # 0 indicates unlimited
            reset_timestamp=None,
        )

    # Check if user should skip rate limiting (e.g., dev tenant users)
    if _should_skip_rate_limiting(user):
        return RateLimitResponse(
            is_limited=False,
            limit_type="weekly",
            messages_used=-1,
            limit=0,  # 0 indicates unlimited
            reset_timestamp=None,
        )

    # Determine subscription status
    is_subscribed = is_user_subscribed(user, db_session)

    # Get limit based on subscription status
    limit = CRAFT_PAID_USER_RATE_LIMIT if is_subscribed else FREE_USER_RATE_LIMIT

    # Limit type: weekly for subscribed users, total for free
    limit_type: Literal["weekly", "total"] = "weekly" if is_subscribed else "total"

    # Count messages
    if limit_type == "weekly":
        # Subscribed: rolling 7-day window
        cutoff_time = datetime.now(tz=timezone.utc) - timedelta(days=7)
        messages_used = count_user_messages_in_window(user.id, cutoff_time, db_session)

        # Calculate reset timestamp (when oldest message ages out)
        # Only show reset time if user is at or over the limit
        if messages_used >= limit:
            oldest_msg = get_oldest_message_timestamp(user.id, cutoff_time, db_session)
            if oldest_msg:
                reset_time = oldest_msg + timedelta(days=7)
                reset_timestamp = reset_time.isoformat()
            else:
                reset_timestamp = None
        else:
            reset_timestamp = None
    else:
        # Non-subscribed: lifetime total
        messages_used = count_user_messages_total(user.id, db_session)
        reset_timestamp = None

    return RateLimitResponse(
        is_limited=messages_used >= limit,
        limit_type=limit_type,
        messages_used=messages_used,
        limit=limit,
        reset_timestamp=reset_timestamp,
    )


================================================
FILE: backend/onyx/server/features/build/api/sessions_api.py
================================================
"""API endpoints for Build Mode session management."""

from uuid import UUID

from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import HTTPException
from fastapi import Response
from fastapi import UploadFile
from sqlalchemy import exists
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import BuildSessionStatus
from onyx.db.enums import SandboxStatus
from onyx.db.models import BuildMessage
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.server.features.build.api.models import ArtifactResponse
from onyx.server.features.build.api.models import DetailedSessionResponse
from onyx.server.features.build.api.models import DirectoryListing
from onyx.server.features.build.api.models import GenerateSuggestionsRequest
from onyx.server.features.build.api.models import GenerateSuggestionsResponse
from onyx.server.features.build.api.models import PptxPreviewResponse
from onyx.server.features.build.api.models import PreProvisionedCheckResponse
from onyx.server.features.build.api.models import SessionCreateRequest
from onyx.server.features.build.api.models import SessionListResponse
from onyx.server.features.build.api.models import SessionNameGenerateResponse
from onyx.server.features.build.api.models import SessionResponse
from onyx.server.features.build.api.models import SessionUpdateRequest
from onyx.server.features.build.api.models import SetSessionSharingRequest
from onyx.server.features.build.api.models import SetSessionSharingResponse
from onyx.server.features.build.api.models import SuggestionBubble
from onyx.server.features.build.api.models import SuggestionTheme
from onyx.server.features.build.api.models import UploadResponse
from onyx.server.features.build.api.models import WebappInfo
from onyx.server.features.build.configs import SANDBOX_BACKEND
from onyx.server.features.build.configs import SandboxBackend
from onyx.server.features.build.db.build_session import allocate_nextjs_port
from onyx.server.features.build.db.build_session import get_build_session
from onyx.server.features.build.db.build_session import set_build_session_sharing_scope
from onyx.server.features.build.db.sandbox import get_latest_snapshot_for_session
from onyx.server.features.build.db.sandbox import get_sandbox_by_user_id
from onyx.server.features.build.db.sandbox import update_sandbox_heartbeat
from onyx.server.features.build.db.sandbox import update_sandbox_status__no_commit
from onyx.server.features.build.sandbox import get_sandbox_manager
from onyx.server.features.build.session.manager import SessionManager
from onyx.server.features.build.session.manager import UploadLimitExceededError
from onyx.server.features.build.utils import sanitize_filename
from onyx.server.features.build.utils import validate_file
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/sessions")


# =============================================================================
# Session Management Endpoints
# =============================================================================


@router.get("", response_model=SessionListResponse)
def list_sessions(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> SessionListResponse:
    """List all build sessions for the current user."""
    session_manager = SessionManager(db_session)

    sessions = session_manager.list_sessions(user.id)

    # Get the user's sandbox (shared across all sessions)
    sandbox = get_sandbox_by_user_id(db_session, user.id)

    return SessionListResponse(
        sessions=[SessionResponse.from_model(session, sandbox) for session in sessions]
    )


# Lock timeout for session creation (should be longer than max provision time)
SESSION_CREATE_LOCK_TIMEOUT_SECONDS = 300


@router.post("", response_model=DetailedSessionResponse)
def create_session(
    request: SessionCreateRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> DetailedSessionResponse:
    """
    Create or get an existing empty build session.

    Creates a sandbox with the necessary file structure and returns a session ID.
    Uses SessionManager for session and sandbox provisioning.

    This endpoint is atomic - if sandbox provisioning fails, no database
    records are created (transaction is rolled back).

    Uses Redis lock to prevent race conditions when multiple requests try to
    create/provision a session for the same user concurrently.
    """
    tenant_id = get_current_tenant_id()
    redis_client = get_redis_client(tenant_id=tenant_id)

    # Lock on user_id to prevent concurrent session creation for the same user
    # This prevents race conditions where two requests both see sandbox as SLEEPING
    # and both try to provision, with one deleting the other's work
    lock_key = f"session_create:{user.id}"
    lock = redis_client.lock(lock_key, timeout=SESSION_CREATE_LOCK_TIMEOUT_SECONDS)

    # blocking=True means wait if another create is in progress
    acquired = lock.acquire(
        blocking=True, blocking_timeout=SESSION_CREATE_LOCK_TIMEOUT_SECONDS
    )
    if not acquired:
        raise HTTPException(
            status_code=503,
            detail="Session creation timed out waiting for lock",
        )

    try:
        session_manager = SessionManager(db_session)
        build_session = session_manager.get_or_create_empty_session(
            user.id,
            user_work_area=(
                request.user_work_area if request.demo_data_enabled else None
            ),
            user_level=request.user_level if request.demo_data_enabled else None,
            llm_provider_type=request.llm_provider_type,
            llm_model_name=request.llm_model_name,
            demo_data_enabled=request.demo_data_enabled,
        )
        db_session.commit()

        sandbox = get_sandbox_by_user_id(db_session, user.id)
        base_response = SessionResponse.from_model(build_session, sandbox)
        return DetailedSessionResponse.from_session_response(
            base_response, session_loaded_in_sandbox=True
        )
    except ValueError as e:
        logger.exception("Session creation failed")
        db_session.rollback()
        raise HTTPException(status_code=429, detail=str(e))
    except Exception as e:
        db_session.rollback()
        logger.error(f"Session creation failed: {e}")
        raise HTTPException(status_code=500, detail=f"Session creation failed: {e}")
    finally:
        if lock.owned():
            lock.release()


@router.get("/{session_id}", response_model=DetailedSessionResponse)
def get_session_details(
    session_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> DetailedSessionResponse:
    """
    Get details of a specific build session.

    Returns session_loaded_in_sandbox to indicate if the session workspace
    exists in the running sandbox.
    """
    session_manager = SessionManager(db_session)

    session = session_manager.get_session(session_id, user.id)

    if session is None:
        raise HTTPException(status_code=404, detail="Session not found")

    # Get the user's sandbox to include in response
    sandbox = get_sandbox_by_user_id(db_session, user.id)

    # Check if session workspace exists in the sandbox
    session_loaded = False
    if sandbox and sandbox.status == SandboxStatus.RUNNING:
        sandbox_manager = get_sandbox_manager()
        session_loaded = sandbox_manager.session_workspace_exists(
            sandbox.id, session_id
        )

    base_response = SessionResponse.from_model(session, sandbox)
    return DetailedSessionResponse.from_session_response(
        base_response, session_loaded_in_sandbox=session_loaded
    )


@router.get(
    "/{session_id}/pre-provisioned-check", response_model=PreProvisionedCheckResponse
)
def check_pre_provisioned_session(
    session_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> PreProvisionedCheckResponse:
    """
    Check if a pre-provisioned session is still valid (empty).

    Used by the frontend to poll and detect when another tab has used
    the session. A session is considered valid if it has no messages yet.

    Returns:
        - valid=True, session_id=<id> if the session is still empty
        - valid=False, session_id=None if the session has messages or doesn't exist
    """
    session = get_build_session(session_id, user.id, db_session)

    if session is None:
        return PreProvisionedCheckResponse(valid=False, session_id=None)

    # Check if session is still empty (no messages = pre-provisioned)
    has_messages = db_session.query(
        exists().where(BuildMessage.session_id == session_id)
    ).scalar()

    if not has_messages:
        return PreProvisionedCheckResponse(valid=True, session_id=str(session_id))

    # Session has messages - it's no longer a valid pre-provisioned session
    return PreProvisionedCheckResponse(valid=False, session_id=None)


@router.post("/{session_id}/generate-name", response_model=SessionNameGenerateResponse)
def generate_session_name(
    session_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> SessionNameGenerateResponse:
    """Generate a session name using LLM based on the first user message."""
    session_manager = SessionManager(db_session)

    generated_name = session_manager.generate_session_name(session_id, user.id)

    if generated_name is None:
        raise HTTPException(status_code=404, detail="Session not found")

    return SessionNameGenerateResponse(name=generated_name)


@router.post(
    "/{session_id}/generate-suggestions", response_model=GenerateSuggestionsResponse
)
def generate_suggestions(
    session_id: UUID,
    request: GenerateSuggestionsRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> GenerateSuggestionsResponse:
    """Generate follow-up suggestions based on the first exchange in a session."""
    session_manager = SessionManager(db_session)

    # Verify session exists and belongs to user
    session = session_manager.get_session(session_id, user.id)
    if session is None:
        raise HTTPException(status_code=404, detail="Session not found")

    # Generate suggestions
    suggestions_data = session_manager.generate_followup_suggestions(
        user_message=request.user_message,
        assistant_message=request.assistant_message,
    )

    # Convert to response model
    suggestions = [
        SuggestionBubble(
            theme=SuggestionTheme(item["theme"]),
            text=item["text"],
        )
        for item in suggestions_data
    ]

    return GenerateSuggestionsResponse(suggestions=suggestions)


@router.put("/{session_id}/name", response_model=SessionResponse)
def update_session_name(
    session_id: UUID,
    request: SessionUpdateRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> SessionResponse:
    """Update the name of a build session."""
    session_manager = SessionManager(db_session)

    session = session_manager.update_session_name(session_id, user.id, request.name)

    if session is None:
        raise HTTPException(status_code=404, detail="Session not found")

    # Get the user's sandbox to include in response
    sandbox = get_sandbox_by_user_id(db_session, user.id)
    return SessionResponse.from_model(session, sandbox)


@router.patch("/{session_id}/public")
def set_session_public(
    session_id: UUID,
    request: SetSessionSharingRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> SetSessionSharingResponse:
    """Set the sharing scope of a build session's webapp."""
    updated = set_build_session_sharing_scope(
        session_id, user.id, request.sharing_scope, db_session
    )
    if not updated:
        raise HTTPException(status_code=404, detail="Session not found")
    return SetSessionSharingResponse(
        session_id=str(session_id),
        sharing_scope=updated.sharing_scope,
    )


@router.delete("/{session_id}", response_model=None)
def delete_session(
    session_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """Delete a build session and all associated data.

    This endpoint is atomic - if sandbox termination fails, the session
    is NOT deleted (transaction is rolled back).
    """
    session_manager = SessionManager(db_session)

    try:
        success = session_manager.delete_session(session_id, user.id)
        if not success:
            raise HTTPException(status_code=404, detail="Session not found")
        db_session.commit()
    except HTTPException:
        # Re-raise HTTP exceptions (like 404) without rollback
        raise
    except Exception as e:
        # Sandbox termination failed - rollback to preserve session
        db_session.rollback()
        logger.error(f"Failed to delete session {session_id}: {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Failed to delete session: {e}",
        )

    return Response(status_code=204)


# Lock timeout should be longer than max restore time (5 minutes)
RESTORE_LOCK_TIMEOUT_SECONDS = 300


@router.post("/{session_id}/restore", response_model=DetailedSessionResponse)
def restore_session(
    session_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> DetailedSessionResponse:
    """Restore sandbox and load session snapshot. Blocks until complete.

    Uses Redis lock to ensure only one restore runs per sandbox at a time.
    If another restore is in progress, waits for it to complete.

    Handles two cases:
    1. Sandbox is SLEEPING: Re-provision pod, then load session snapshot
    2. Sandbox is RUNNING but session not loaded: Just load session snapshot

    Returns immediately if session workspace already exists in pod.
    Always returns session_loaded_in_sandbox=True on success.
    """
    session = get_build_session(session_id, user.id, db_session)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    sandbox = get_sandbox_by_user_id(db_session, user.id)
    if not sandbox:
        raise HTTPException(status_code=404, detail="Sandbox not found")

    # If sandbox is already running, check if session workspace exists
    sandbox_manager = get_sandbox_manager()
    tenant_id = get_current_tenant_id()

    # Need to do some work - acquire Redis lock
    redis_client = get_redis_client(tenant_id=tenant_id)
    lock_key = f"sandbox_restore:{sandbox.id}"
    lock = redis_client.lock(lock_key, timeout=RESTORE_LOCK_TIMEOUT_SECONDS)

    # Non-blocking: if another restore is already running, return 409 immediately
    # instead of making the user wait. The frontend will retry.
    acquired = lock.acquire(blocking=False)
    if not acquired:
        raise HTTPException(
            status_code=409,
            detail="Restore already in progress",
        )

    try:
        # Re-fetch sandbox status (may have changed while waiting for lock)
        db_session.refresh(sandbox)

        # Also re-check if session workspace exists (another request may have
        # restored it while we were waiting)
        if sandbox.status == SandboxStatus.RUNNING:
            is_healthy = sandbox_manager.health_check(sandbox.id, timeout=10.0)
            if is_healthy and sandbox_manager.session_workspace_exists(
                sandbox.id, session_id
            ):
                session.status = BuildSessionStatus.ACTIVE
                update_sandbox_heartbeat(db_session, sandbox.id)
                base_response = SessionResponse.from_model(session, sandbox)
                return DetailedSessionResponse.from_session_response(
                    base_response, session_loaded_in_sandbox=True
                )

            if not is_healthy:
                logger.warning(
                    f"Sandbox {sandbox.id} marked as RUNNING but pod is unhealthy/missing. Entering recovery mode."
                )
                # Terminate to clean up any lingering K8s resources
                sandbox_manager.terminate(sandbox.id)

                update_sandbox_status__no_commit(
                    db_session, sandbox.id, SandboxStatus.TERMINATED
                )
                db_session.commit()
                db_session.refresh(sandbox)
                # Fall through to TERMINATED handling below

        session_manager = SessionManager(db_session)
        llm_config = session_manager._get_llm_config(None, None)

        if sandbox.status in (SandboxStatus.SLEEPING, SandboxStatus.TERMINATED):
            # Mark as PROVISIONING before the long-running provision() call
            # so other requests know work is in progress
            update_sandbox_status__no_commit(
                db_session, sandbox.id, SandboxStatus.PROVISIONING
            )
            db_session.commit()

            sandbox_manager.provision(
                sandbox_id=sandbox.id,
                user_id=user.id,
                tenant_id=tenant_id,
                llm_config=llm_config,
            )

            # Mark as RUNNING after successful provision
            update_sandbox_status__no_commit(
                db_session, sandbox.id, SandboxStatus.RUNNING
            )
            db_session.commit()

        # 2. Check if session workspace needs to be loaded
        if sandbox.status == SandboxStatus.RUNNING:
            workspace_exists = sandbox_manager.session_workspace_exists(
                sandbox.id, session_id
            )

            if not workspace_exists:
                # Allocate port if not already set (needed for both snapshot restore and fresh setup)
                if not session.nextjs_port:
                    session.nextjs_port = allocate_nextjs_port(db_session)
                    # Commit port allocation before long-running operations
                    db_session.commit()

                # Only Kubernetes backend supports snapshot restoration
                snapshot = None
                if SANDBOX_BACKEND == SandboxBackend.KUBERNETES:
                    snapshot = get_latest_snapshot_for_session(db_session, session_id)

                if snapshot:
                    try:
                        sandbox_manager.restore_snapshot(
                            sandbox_id=sandbox.id,
                            session_id=session_id,
                            snapshot_storage_path=snapshot.storage_path,
                            tenant_id=tenant_id,
                            nextjs_port=session.nextjs_port,
                            llm_config=llm_config,
                            use_demo_data=session.demo_data_enabled,
                        )
                        session.status = BuildSessionStatus.ACTIVE
                        db_session.commit()
                    except Exception as e:
                        logger.error(
                            f"Snapshot restore failed for session {session_id}: {e}"
                        )
                        session.nextjs_port = None
                        db_session.commit()
                        raise
                else:
                    # No snapshot - set up fresh workspace
                    sandbox_manager.setup_session_workspace(
                        sandbox_id=sandbox.id,
                        session_id=session_id,
                        llm_config=llm_config,
                        nextjs_port=session.nextjs_port,
                    )
                    session.status = BuildSessionStatus.ACTIVE
                    db_session.commit()
        else:
            logger.warning(
                f"Sandbox {sandbox.id} status is {sandbox.status} after re-provision, expected RUNNING"
            )

    except Exception as e:
        logger.error(f"Failed to restore session {session_id}: {e}", exc_info=True)
        raise HTTPException(
            status_code=500,
            detail=f"Failed to restore session: {e}",
        )
    finally:
        if lock.owned():
            lock.release()

    # Update heartbeat to mark sandbox as active after successful restore
    update_sandbox_heartbeat(db_session, sandbox.id)

    base_response = SessionResponse.from_model(session, sandbox)
    return DetailedSessionResponse.from_session_response(
        base_response, session_loaded_in_sandbox=True
    )


# =============================================================================
# Artifact Endpoints
# =============================================================================


@router.get(
    "/{session_id}/artifacts",
    response_model=list[ArtifactResponse],
)
def list_artifacts(
    session_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[dict]:
    """List artifacts generated in the session."""
    user_id: UUID = user.id
    session_manager = SessionManager(db_session)

    artifacts = session_manager.list_artifacts(session_id, user_id)
    if artifacts is None:
        raise HTTPException(status_code=404, detail="Session not found")

    return artifacts


@router.get("/{session_id}/files", response_model=DirectoryListing)
def list_directory(
    session_id: UUID,
    path: str = "",
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> DirectoryListing:
    """
    List files and directories in the sandbox.

    Args:
        session_id: The session ID
        path: Relative path from sandbox root (empty string for root)

    Returns:
        DirectoryListing with sorted entries (directories first, then files)
    """
    user_id: UUID = user.id
    session_manager = SessionManager(db_session)

    try:
        listing = session_manager.list_directory(session_id, user_id, path)
    except ValueError as e:
        error_message = str(e)
        if "path traversal" in error_message.lower():
            raise HTTPException(status_code=403, detail="Access denied")
        elif "not found" in error_message.lower():
            raise HTTPException(status_code=404, detail="Directory not found")
        elif "not a directory" in error_message.lower():
            raise HTTPException(status_code=400, detail="Path is not a directory")
        raise HTTPException(status_code=400, detail=error_message)

    if listing is None:
        raise HTTPException(status_code=404, detail="Session not found")

    return listing


@router.get("/{session_id}/artifacts/{path:path}")
def download_artifact(
    session_id: UUID,
    path: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """Download a specific artifact file."""
    user_id: UUID = user.id
    session_manager = SessionManager(db_session)

    try:
        result = session_manager.download_artifact(session_id, user_id, path)
    except ValueError as e:
        error_message = str(e)
        if (
            "path traversal" in error_message.lower()
            or "access denied" in error_message.lower()
        ):
            raise HTTPException(status_code=403, detail="Access denied")
        elif "directory" in error_message.lower():
            raise HTTPException(status_code=400, detail="Cannot download directory")
        raise HTTPException(status_code=400, detail=error_message)

    if result is None:
        raise HTTPException(status_code=404, detail="Artifact not found")

    content, mime_type, filename = result

    # Handle Unicode filenames in Content-Disposition header
    # HTTP headers require Latin-1 encoding, so we use RFC 5987 for Unicode
    try:
        # Try Latin-1 encoding first (ASCII-compatible filenames)
        filename.encode("latin-1")
        content_disposition = f'attachment; filename="{filename}"'
    except UnicodeEncodeError:
        # Use RFC 5987 encoding for Unicode filenames
        from urllib.parse import quote

        encoded_filename = quote(filename, safe="")
        content_disposition = f"attachment; filename*=UTF-8''{encoded_filename}"

    return Response(
        content=content,
        media_type=mime_type,
        headers={
            "Content-Disposition": content_disposition,
        },
    )


@router.get("/{session_id}/export-docx/{path:path}")
def export_docx(
    session_id: UUID,
    path: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """Export a markdown file as DOCX."""
    session_manager = SessionManager(db_session)

    try:
        result = session_manager.export_docx(session_id, user.id, path)
    except ValueError as e:
        error_message = str(e)
        if (
            "path traversal" in error_message.lower()
            or "access denied" in error_message.lower()
        ):
            raise HTTPException(status_code=403, detail="Access denied")
        raise HTTPException(status_code=400, detail=error_message)

    if result is None:
        raise HTTPException(status_code=404, detail="File not found")

    docx_bytes, filename = result

    try:
        filename.encode("latin-1")
        content_disposition = f'attachment; filename="{filename}"'
    except UnicodeEncodeError:
        from urllib.parse import quote

        encoded_filename = quote(filename, safe="")
        content_disposition = f"attachment; filename*=UTF-8''{encoded_filename}"

    return Response(
        content=docx_bytes,
        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        headers={"Content-Disposition": content_disposition},
    )


@router.get("/{session_id}/pptx-preview/{path:path}")
def get_pptx_preview(
    session_id: UUID,
    path: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> PptxPreviewResponse:
    """Generate slide image previews for a PPTX file."""
    session_manager = SessionManager(db_session)

    try:
        result = session_manager.get_pptx_preview(session_id, user.id, path)
    except ValueError as e:
        error_message = str(e)
        if (
            "path traversal" in error_message.lower()
            or "access denied" in error_message.lower()
        ):
            raise HTTPException(status_code=403, detail="Access denied")
        raise HTTPException(status_code=400, detail=error_message)

    if result is None:
        raise HTTPException(status_code=404, detail="Session not found")

    return PptxPreviewResponse(**result)


@router.get("/{session_id}/webapp-info", response_model=WebappInfo)
def get_webapp_info(
    session_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> WebappInfo:
    """
    Get webapp information for a session.

    Returns whether a webapp exists, its URL, and the sandbox status.
    """
    user_id: UUID = user.id
    session_manager = SessionManager(db_session)

    webapp_info = session_manager.get_webapp_info(session_id, user_id)

    if webapp_info is None:
        raise HTTPException(status_code=404, detail="Session not found")

    return WebappInfo(**webapp_info)


@router.get("/{session_id}/webapp-download")
def download_webapp(
    session_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """
    Download the webapp directory as a zip file.

    Returns the entire outputs/web directory as a zip archive.
    """
    user_id: UUID = user.id
    session_manager = SessionManager(db_session)

    result = session_manager.download_webapp_zip(session_id, user_id)

    if result is None:
        raise HTTPException(status_code=404, detail="Webapp not found")

    zip_bytes, filename = result

    return Response(
        content=zip_bytes,
        media_type="application/zip",
        headers={
            "Content-Disposition": f'attachment; filename="{filename}"',
        },
    )


@router.get("/{session_id}/download-directory/{path:path}")
def download_directory(
    session_id: UUID,
    path: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """
    Download a directory as a zip file.

    Returns the specified directory as a zip archive.
    """
    user_id: UUID = user.id
    session_manager = SessionManager(db_session)

    try:
        result = session_manager.download_directory(session_id, user_id, path)
    except ValueError as e:
        error_message = str(e)
        if "path traversal" in error_message.lower():
            raise HTTPException(status_code=403, detail="Access denied")
        raise HTTPException(status_code=400, detail=error_message)

    if result is None:
        raise HTTPException(status_code=404, detail="Directory not found")

    zip_bytes, filename = result

    return Response(
        content=zip_bytes,
        media_type="application/zip",
        headers={
            "Content-Disposition": f'attachment; filename="{filename}"',
        },
    )


@router.post("/{session_id}/upload", response_model=UploadResponse)
def upload_file_endpoint(
    session_id: UUID,
    file: UploadFile = File(...),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UploadResponse:
    """Upload a file to the session's sandbox.

    The file will be placed in the sandbox's attachments directory.
    """
    user_id: UUID = user.id
    session_manager = SessionManager(db_session)

    if not file.filename:
        raise HTTPException(status_code=400, detail="File has no filename")

    # Read file content (use sync file interface)
    content = file.file.read()

    # Validate file (extension, mime type, size)
    is_valid, error = validate_file(file.filename, file.content_type, len(content))
    if not is_valid:
        raise HTTPException(status_code=400, detail=error)

    # Sanitize filename
    safe_filename = sanitize_filename(file.filename)

    try:
        relative_path, _ = session_manager.upload_file(
            session_id=session_id,
            user_id=user_id,
            filename=safe_filename,
            content=content,
        )
    except UploadLimitExceededError as e:
        # Return 429 for limit exceeded errors
        raise HTTPException(status_code=429, detail=str(e))
    except ValueError as e:
        error_message = str(e)
        if "not found" in error_message.lower():
            raise HTTPException(status_code=404, detail=error_message)
        raise HTTPException(status_code=400, detail=error_message)

    return UploadResponse(
        filename=safe_filename,
        path=relative_path,
        size_bytes=len(content),
    )


@router.delete("/{session_id}/files/{path:path}", response_model=None)
def delete_file_endpoint(
    session_id: UUID,
    path: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """Delete a file from the session's sandbox.

    Args:
        session_id: The session ID
        path: Relative path to the file (e.g., "attachments/doc.pdf")
    """
    user_id: UUID = user.id
    session_manager = SessionManager(db_session)

    try:
        deleted = session_manager.delete_file(session_id, user_id, path)
    except ValueError as e:
        error_message = str(e)
        if "path traversal" in error_message.lower():
            raise HTTPException(status_code=403, detail="Access denied")
        elif "not found" in error_message.lower():
            raise HTTPException(status_code=404, detail=error_message)
        elif "directory" in error_message.lower():
            raise HTTPException(status_code=400, detail="Cannot delete directory")
        raise HTTPException(status_code=400, detail=error_message)

    if not deleted:
        raise HTTPException(status_code=404, detail="File not found")

    return Response(status_code=204)


================================================
FILE: backend/onyx/server/features/build/api/subscription_check.py
================================================
"""Subscription detection for Build Mode rate limiting."""

from sqlalchemy.orm import Session

from onyx.configs.app_configs import DEV_MODE
from onyx.db.models import User
from onyx.server.usage_limits import is_tenant_on_trial_fn
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


def is_user_subscribed(user: User, db_session: Session) -> bool:  # noqa: ARG001
    """
    Check if a user has an active subscription.

    For cloud (MULTI_TENANT=true):
        - Checks Stripe billing via control plane
        - Returns True if tenant is NOT on trial (subscribed = NOT on trial)

    For self-hosted (MULTI_TENANT=false):
        - Checks license metadata
        - Returns True if license status is ACTIVE

    Args:
        user: The user object (None for unauthenticated users)
        db_session: Database session

    Returns:
        True if user has active subscription, False otherwise
    """
    if DEV_MODE:
        return True

    if user is None:
        return False

    if MULTI_TENANT:
        # Cloud: check Stripe billing via control plane
        tenant_id = get_current_tenant_id()
        try:
            on_trial = is_tenant_on_trial_fn(tenant_id)
            # Subscribed = NOT on trial
            return not on_trial
        except Exception as e:
            logger.warning(f"Subscription check failed for tenant {tenant_id}: {e}")
            # Default to non-subscribed (safer/more restrictive)
            return False

    return True


================================================
FILE: backend/onyx/server/features/build/api/templates/webapp_hmr_fixer.js
================================================
(function () {
  var WEBAPP_BASE = "__WEBAPP_BASE__";
  var PROXIED_NEXT_PREFIX = WEBAPP_BASE + "/_next/";
  var PROXIED_HMR_PREFIX = WEBAPP_BASE + "/_next/webpack-hmr";
  var PROXIED_ALT_HMR_PREFIX = WEBAPP_BASE + "/_next/hmr";

  function isHmrWebSocketUrl(url) {
    if (!url) return false;
    try {
      var parsedUrl = new URL(String(url), window.location.href);
      return (
        parsedUrl.pathname.indexOf("/_next/webpack-hmr") === 0 ||
        parsedUrl.pathname.indexOf("/_next/hmr") === 0 ||
        parsedUrl.pathname.indexOf(PROXIED_HMR_PREFIX) === 0 ||
        parsedUrl.pathname.indexOf(PROXIED_ALT_HMR_PREFIX) === 0
      );
    } catch (e) {}
    if (typeof url === "string") {
      return (
        url.indexOf("/_next/webpack-hmr") === 0 ||
        url.indexOf("/_next/hmr") === 0 ||
        url.indexOf(PROXIED_HMR_PREFIX) === 0 ||
        url.indexOf(PROXIED_ALT_HMR_PREFIX) === 0
      );
    }
    return false;
  }

  function rewriteNextAssetUrl(url) {
    if (!url) return url;
    try {
      var parsedUrl = new URL(String(url), window.location.href);
      if (parsedUrl.pathname.indexOf(PROXIED_NEXT_PREFIX) === 0) {
        return parsedUrl.pathname + parsedUrl.search + parsedUrl.hash;
      }
      if (parsedUrl.pathname.indexOf("/_next/") === 0) {
        return (
          WEBAPP_BASE + parsedUrl.pathname + parsedUrl.search + parsedUrl.hash
        );
      }
    } catch (e) {}
    if (typeof url === "string") {
      if (url.indexOf(PROXIED_NEXT_PREFIX) === 0) {
        return url;
      }
      if (url.indexOf("/_next/") === 0) {
        return WEBAPP_BASE + url;
      }
    }
    return url;
  }

  function createEvent(eventType) {
    return typeof Event === "function"
      ? new Event(eventType)
      : { type: eventType };
  }

  function MockHmrWebSocket(url) {
    this.url = String(url);
    this.readyState = 1;
    this.bufferedAmount = 0;
    this.extensions = "";
    this.protocol = "";
    this.binaryType = "blob";
    this.onopen = null;
    this.onmessage = null;
    this.onerror = null;
    this.onclose = null;
    this._l = {};
    var socket = this;
    setTimeout(function () {
      socket._d("open", createEvent("open"));
    }, 0);
  }

  MockHmrWebSocket.CONNECTING = 0;
  MockHmrWebSocket.OPEN = 1;
  MockHmrWebSocket.CLOSING = 2;
  MockHmrWebSocket.CLOSED = 3;

  MockHmrWebSocket.prototype.addEventListener = function (eventType, callback) {
    (this._l[eventType] || (this._l[eventType] = [])).push(callback);
  };

  MockHmrWebSocket.prototype.removeEventListener = function (
    eventType,
    callback,
  ) {
    var listeners = this._l[eventType] || [];
    this._l[eventType] = listeners.filter(function (listener) {
      return listener !== callback;
    });
  };

  MockHmrWebSocket.prototype._d = function (eventType, eventValue) {
    var listeners = this._l[eventType] || [];
    for (var i = 0; i < listeners.length; i++) {
      listeners[i].call(this, eventValue);
    }
    var handler = this["on" + eventType];
    if (typeof handler === "function") {
      handler.call(this, eventValue);
    }
  };

  MockHmrWebSocket.prototype.send = function () {};

  MockHmrWebSocket.prototype.close = function (code, reason) {
    if (this.readyState >= 2) return;
    this.readyState = 3;
    var closeEvent = createEvent("close");
    closeEvent.code = code === undefined ? 1000 : code;
    closeEvent.reason = reason || "";
    closeEvent.wasClean = true;
    this._d("close", closeEvent);
  };

  if (window.WebSocket) {
    var OriginalWebSocket = window.WebSocket;
    window.WebSocket = function (url, protocols) {
      if (isHmrWebSocketUrl(url)) {
        return new MockHmrWebSocket(rewriteNextAssetUrl(url));
      }
      return protocols === undefined
        ? new OriginalWebSocket(url)
        : new OriginalWebSocket(url, protocols);
    };
    window.WebSocket.prototype = OriginalWebSocket.prototype;
    Object.setPrototypeOf(window.WebSocket, OriginalWebSocket);
    ["CONNECTING", "OPEN", "CLOSING", "CLOSED"].forEach(function (stateKey) {
      window.WebSocket[stateKey] = OriginalWebSocket[stateKey];
    });
  }
})();


================================================
FILE: backend/onyx/server/features/build/api/templates/webapp_offline.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta http-equiv="refresh" content="15" />
    <title>Craft — Starting up</title>
    <style>
      *,
      *::before,
      *::after {
        box-sizing: border-box;
        margin: 0;
        padding: 0;
      }

      body {
        font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas,
          monospace;
        background: linear-gradient(to bottom right, #030712, #111827, #030712);
        min-height: 100vh;
        display: flex;
        flex-direction: column;
        align-items: center;
        justify-content: center;
        gap: 1.5rem;
        padding: 2rem;
      }

      .terminal {
        width: 100%;
        max-width: 580px;
        border: 2px solid #374151;
        border-radius: 2px;
      }

      .titlebar {
        background: #1f2937;
        padding: 0.5rem 0.75rem;
        display: flex;
        align-items: center;
        gap: 0.5rem;
        border-bottom: 1px solid #374151;
      }

      .btn {
        width: 12px;
        height: 12px;
        border-radius: 2px;
        flex-shrink: 0;
      }
      .btn-red {
        background: #ef4444;
      }
      .btn-yellow {
        background: #eab308;
      }
      .btn-green {
        background: #22c55e;
      }

      .title-label {
        flex: 1;
        text-align: center;
        font-size: 0.75rem;
        color: #6b7280;
        margin-right: 36px;
      }

      .body {
        background: #111827;
        padding: 1.5rem;
        min-height: 200px;
        font-size: 0.875rem;
        color: #d1d5db;
        display: flex;
        align-items: flex-start;
        gap: 0.375rem;
      }

      .prompt {
        color: #10b981;
        user-select: none;
      }

      .tagline {
        font-size: 0.8125rem;
        color: #4b5563;
        text-align: center;
      }
    </style>
  </head>
  <body>
    <div class="terminal">
      <div class="titlebar">
        <div class="btn btn-red"></div>
        <div class="btn btn-yellow"></div>
        <div class="btn btn-green"></div>
        <span class="title-label">crafting_table</span>
      </div>
      <div class="body">
        <span class="prompt">/&gt;</span>
        <span>Sandbox is asleep...</span>
      </div>
    </div>
    <p class="tagline">
      Ask the owner to open their Craft session to wake it up.
    </p>
  </body>
</html>


================================================
FILE: backend/onyx/server/features/build/api/user_library.py
================================================
"""API endpoints for User Library file management in Craft.

This module provides endpoints for uploading and managing raw binary files
(xlsx, pptx, docx, csv, etc.) that are stored directly in S3 for sandbox access.

Files are stored at:
    s3://{bucket}/{tenant_id}/knowledge/{user_id}/user_library/{path}

And synced to sandbox at:
    /workspace/files/user_library/{path}

Known Issues / TODOs:
    - Memory: Upload endpoints read entire file content into memory (up to 500MB).
      Should be refactored to stream uploads directly to S3 via multipart upload
      for better memory efficiency under concurrent load.
    - Transaction safety: Multi-file uploads are not atomic. If the endpoint fails
      mid-batch (e.g., file 3 of 5 exceeds storage quota), files 1-2 are already
      persisted to S3 and DB. A partial upload is not catastrophic but the response
      implies atomicity that doesn't exist.
"""

import hashlib
import mimetypes
import re
import zipfile
from datetime import datetime
from datetime import timezone
from io import BytesIO
from typing import Any

from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Form
from fastapi import HTTPException
from fastapi import Query
from fastapi import UploadFile
from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.background.celery.versioned_apps.client import app as celery_app
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.connector_credential_pair import update_connector_credential_pair
from onyx.db.document import upsert_document_by_connector_credential_pair
from onyx.db.document import upsert_documents
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import User
from onyx.document_index.interfaces import DocumentMetadata
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
from onyx.server.features.build.configs import USER_LIBRARY_SOURCE_DIR
from onyx.server.features.build.db.user_library import get_or_create_craft_connector
from onyx.server.features.build.db.user_library import get_user_storage_bytes
from onyx.server.features.build.indexing.persistent_document_writer import (
    get_persistent_document_writer,
)
from onyx.server.features.build.indexing.persistent_document_writer import (
    PersistentDocumentWriter,
)
from onyx.server.features.build.indexing.persistent_document_writer import (
    S3PersistentDocumentWriter,
)
from onyx.server.features.build.utils import sanitize_filename as api_sanitize_filename
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/user-library")


# =============================================================================
# Pydantic Models
# =============================================================================


class LibraryEntryResponse(BaseModel):
    """Response for a single library entry (file or directory)."""

    id: str  # document_id
    name: str
    path: str
    is_directory: bool
    file_size: int | None
    mime_type: str | None
    sync_enabled: bool
    created_at: datetime
    children: list["LibraryEntryResponse"] | None = None


class CreateDirectoryRequest(BaseModel):
    """Request to create a virtual directory."""

    name: str
    parent_path: str = "/"


class UploadResponse(BaseModel):
    """Response after successful file upload."""

    entries: list[LibraryEntryResponse]
    total_uploaded: int
    total_size_bytes: int


class ToggleSyncResponse(BaseModel):
    """Response after toggling file sync."""

    success: bool
    sync_enabled: bool


class DeleteFileResponse(BaseModel):
    """Response after deleting a file."""

    success: bool
    deleted: str


# =============================================================================
# Helper Functions
# =============================================================================


def _sanitize_path(path: str) -> str:
    """Sanitize a file path, removing traversal attempts and normalizing.

    Removes '..' and '.' segments and ensures the path starts with '/'.
    Only allows alphanumeric characters, hyphens, underscores, dots, spaces,
    and forward slashes. All other characters are stripped.
    """
    parts = path.split("/")
    sanitized_parts: list[str] = []
    for p in parts:
        if not p or p == ".." or p == ".":
            continue
        # Strip any character not in the whitelist
        cleaned = re.sub(r"[^a-zA-Z0-9\-_. ]", "", p)
        if cleaned:
            sanitized_parts.append(cleaned)
    return "/" + "/".join(sanitized_parts)


def _build_document_id(user_id: str, path: str) -> str:
    """Build a document ID for a craft file.

    Deterministic: re-uploading the same file to the same path will produce the
    same document ID, allowing upsert to overwrite the previous record.

    Uses a hash of the path to avoid collisions from separator replacement
    (e.g., "/a/b_c" vs "/a_b/c" would collide with naive slash-to-underscore).
    """
    path_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
    return f"CRAFT_FILE__{user_id}__{path_hash}"


def _trigger_sandbox_sync(
    user_id: str, tenant_id: str, source: str | None = None
) -> None:
    """Trigger sandbox file sync task.

    Args:
        user_id: The user ID whose sandbox should be synced
        tenant_id: The tenant ID for S3 path construction
        source: Optional source type (e.g., "user_library"). If specified,
                only syncs that source's directory with --delete flag.
    """
    celery_app.send_task(
        OnyxCeleryTask.SANDBOX_FILE_SYNC,
        kwargs={"user_id": user_id, "tenant_id": tenant_id, "source": source},
        queue=OnyxCeleryQueues.SANDBOX,
    )


def _validate_zip_contents(
    zip_file: zipfile.ZipFile,
    existing_usage: int,
) -> None:
    """Validate zip file contents before extraction.

    Checks file count limit and total decompressed size against storage quota.
    Raises HTTPException on validation failure.
    """
    if len(zip_file.namelist()) > USER_LIBRARY_MAX_FILES_PER_UPLOAD:
        raise HTTPException(
            status_code=400,
            detail=f"Zip contains too many files. Maximum is {USER_LIBRARY_MAX_FILES_PER_UPLOAD}.",
        )

    # Zip bomb protection: check total decompressed size before extracting
    declared_total = sum(
        info.file_size for info in zip_file.infolist() if not info.is_dir()
    )
    if existing_usage + declared_total > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
        raise HTTPException(
            status_code=400,
            detail=(
                f"Zip decompressed size ({declared_total // (1024 * 1024)}MB) would exceed storage limit."
            ),
        )


def _verify_ownership_and_get_document(
    document_id: str,
    user: User,
    db_session: Session,
) -> Any:
    """Verify the user owns the document and return it.

    Raises HTTPException on authorization failure or if document not found.
    """
    from onyx.db.document import get_document

    user_prefix = f"CRAFT_FILE__{user.id}__"
    if not document_id.startswith(user_prefix):
        raise HTTPException(
            status_code=403, detail="Not authorized to modify this file"
        )

    doc = get_document(document_id, db_session)
    if doc is None:
        raise HTTPException(status_code=404, detail="File not found")

    return doc


def _store_and_track_file(
    *,
    writer: "PersistentDocumentWriter | S3PersistentDocumentWriter",
    file_path: str,
    content: bytes,
    content_type: str | None,
    user_id: str,
    connector_id: int,
    credential_id: int,
    db_session: Session,
) -> tuple[str, str]:
    """Write a file to storage and upsert its document record.

    Returns:
        Tuple of (document_id, storage_key)
    """
    storage_key = writer.write_raw_file(
        path=file_path,
        content=content,
        content_type=content_type,
    )

    doc_id = _build_document_id(user_id, file_path)
    doc_metadata = DocumentMetadata(
        connector_id=connector_id,
        credential_id=credential_id,
        document_id=doc_id,
        semantic_identifier=f"{USER_LIBRARY_SOURCE_DIR}{file_path}",
        first_link=storage_key,
        doc_metadata={
            "storage_key": storage_key,
            "file_path": file_path,
            "file_size": len(content),
            "mime_type": content_type,
            "is_directory": False,
        },
    )
    upsert_documents(db_session, [doc_metadata])
    upsert_document_by_connector_credential_pair(
        db_session, connector_id, credential_id, [doc_id]
    )

    return doc_id, storage_key


# =============================================================================
# API Endpoints
# =============================================================================


@router.get("/tree")
def get_library_tree(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[LibraryEntryResponse]:
    """Get user's uploaded files as a tree structure.

    Returns all CRAFT_FILE documents for the user, organized hierarchically.
    """
    from onyx.db.document import get_documents_by_source

    # Get CRAFT_FILE documents for this user (filtered at SQL level)
    user_docs = get_documents_by_source(
        db_session=db_session,
        source=DocumentSource.CRAFT_FILE,
        creator_id=user.id,
    )

    # Build tree structure
    entries: list[LibraryEntryResponse] = []
    now = datetime.now(timezone.utc)
    for doc in user_docs:
        doc_metadata = doc.doc_metadata or {}
        entries.append(
            LibraryEntryResponse(
                id=doc.id,
                name=doc.semantic_id.split("/")[-1] if doc.semantic_id else "unknown",
                path=doc.semantic_id or "",
                is_directory=doc_metadata.get("is_directory", False),
                file_size=doc_metadata.get("file_size"),
                mime_type=doc_metadata.get("mime_type"),
                sync_enabled=not doc_metadata.get("sync_disabled", False),
                created_at=doc.last_modified or now,
            )
        )

    return entries


@router.post("/upload")
async def upload_files(
    files: list[UploadFile] = File(...),
    path: str = Form("/"),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UploadResponse:
    """Upload files directly to S3 and track in PostgreSQL.

    Files are stored as raw binary (no text extraction) for access by
    the sandbox agent using Python libraries like openpyxl, python-pptx, etc.
    """
    tenant_id = get_current_tenant_id()
    if tenant_id is None:
        raise HTTPException(status_code=500, detail="Tenant ID not found")

    # Validate file count
    if len(files) > USER_LIBRARY_MAX_FILES_PER_UPLOAD:
        raise HTTPException(
            status_code=400,
            detail=f"Too many files. Maximum is {USER_LIBRARY_MAX_FILES_PER_UPLOAD} per upload.",
        )

    # Check cumulative storage usage
    existing_usage = get_user_storage_bytes(db_session, user.id)

    # Get or create connector
    connector_id, credential_id = get_or_create_craft_connector(db_session, user)

    # Get the persistent document writer
    writer = get_persistent_document_writer(
        user_id=str(user.id),
        tenant_id=tenant_id,
    )

    uploaded_entries: list[LibraryEntryResponse] = []
    total_size = 0
    now = datetime.now(timezone.utc)

    # Sanitize the base path
    base_path = _sanitize_path(path)

    for file in files:
        # TODO: Stream directly to S3 via multipart upload instead of reading
        # entire file into memory. With 500MB max file size, this can OOM under
        # concurrent uploads.
        content = await file.read()
        file_size = len(content)

        # Validate individual file size
        if file_size > USER_LIBRARY_MAX_FILE_SIZE_BYTES:
            raise HTTPException(
                status_code=400,
                detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024 * 1024)}MB",
            )

        # Validate cumulative storage (existing + this upload batch)
        total_size += file_size
        if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
            raise HTTPException(
                status_code=400,
                detail=f"Total storage would exceed maximum of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024 * 1024 * 1024)}GB",
            )

        # Sanitize filename
        safe_filename = api_sanitize_filename(file.filename or "unnamed")
        file_path = f"{base_path}/{safe_filename}".replace("//", "/")

        doc_id, _ = _store_and_track_file(
            writer=writer,
            file_path=file_path,
            content=content,
            content_type=file.content_type,
            user_id=str(user.id),
            connector_id=connector_id,
            credential_id=credential_id,
            db_session=db_session,
        )

        uploaded_entries.append(
            LibraryEntryResponse(
                id=doc_id,
                name=safe_filename,
                path=file_path,
                is_directory=False,
                file_size=file_size,
                mime_type=file.content_type,
                sync_enabled=True,
                created_at=now,
            )
        )

    # Mark connector as having succeeded (sets last_successful_index_time)
    # This allows the demo data toggle to be disabled
    update_connector_credential_pair(
        db_session=db_session,
        connector_id=connector_id,
        credential_id=credential_id,
        status=ConnectorCredentialPairStatus.ACTIVE,
        net_docs=len(uploaded_entries),
        run_dt=now,
    )

    # Trigger sandbox sync for user_library source only
    _trigger_sandbox_sync(str(user.id), tenant_id, source=USER_LIBRARY_SOURCE_DIR)

    logger.info(
        f"Uploaded {len(uploaded_entries)} files ({total_size} bytes) for user {user.id}"
    )

    return UploadResponse(
        entries=uploaded_entries,
        total_uploaded=len(uploaded_entries),
        total_size_bytes=total_size,
    )


@router.post("/upload-zip")
async def upload_zip(
    file: UploadFile = File(...),
    path: str = Form("/"),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UploadResponse:
    """Upload and extract a zip file, storing each extracted file to S3.

    Preserves the directory structure from the zip file.
    """
    tenant_id = get_current_tenant_id()
    if tenant_id is None:
        raise HTTPException(status_code=500, detail="Tenant ID not found")

    # Read zip content
    content = await file.read()
    if len(content) > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
        raise HTTPException(
            status_code=400,
            detail=f"Zip file exceeds maximum size of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024 * 1024 * 1024)}GB",
        )

    # Check cumulative storage usage
    existing_usage = get_user_storage_bytes(db_session, user.id)

    # Get or create connector
    connector_id, credential_id = get_or_create_craft_connector(db_session, user)

    # Get the persistent document writer
    writer = get_persistent_document_writer(
        user_id=str(user.id),
        tenant_id=tenant_id,
    )

    uploaded_entries: list[LibraryEntryResponse] = []
    total_size = 0

    # Extract zip contents into a subfolder named after the zip file
    zip_name = api_sanitize_filename(file.filename or "upload")
    if zip_name.lower().endswith(".zip"):
        zip_name = zip_name[:-4]
    folder_path = f"{_sanitize_path(path)}/{zip_name}".replace("//", "/")
    base_path = folder_path

    now = datetime.now(timezone.utc)

    # Track all directory paths we need to create records for
    directory_paths: set[str] = set()

    try:
        with zipfile.ZipFile(BytesIO(content), "r") as zip_file:
            _validate_zip_contents(zip_file, existing_usage)

            for zip_info in zip_file.infolist():
                # Skip hidden files and __MACOSX
                if (
                    zip_info.filename.startswith("__MACOSX")
                    or "/." in zip_info.filename
                ):
                    continue

                # Skip directories - we'll create records from file paths below
                if zip_info.is_dir():
                    continue

                # Read file content
                file_content = zip_file.read(zip_info.filename)
                file_size = len(file_content)

                # Validate individual file size
                if file_size > USER_LIBRARY_MAX_FILE_SIZE_BYTES:
                    logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
                    continue

                total_size += file_size

                # Validate cumulative storage
                if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
                    raise HTTPException(
                        status_code=400,
                        detail=f"Total storage would exceed maximum of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024 * 1024 * 1024)}GB",
                    )

                # Build path preserving zip structure
                sanitized_zip_path = _sanitize_path(zip_info.filename)
                file_path = f"{base_path}{sanitized_zip_path}".replace("//", "/")
                file_name = file_path.split("/")[-1]

                # Collect all intermediate directories for this file
                parts = file_path.split("/")
                for i in range(
                    2, len(parts)
                ):  # start at 2 to skip empty + first segment
                    directory_paths.add("/".join(parts[:i]))

                # Guess content type
                content_type, _ = mimetypes.guess_type(file_name)

                doc_id, _ = _store_and_track_file(
                    writer=writer,
                    file_path=file_path,
                    content=file_content,
                    content_type=content_type,
                    user_id=str(user.id),
                    connector_id=connector_id,
                    credential_id=credential_id,
                    db_session=db_session,
                )

                uploaded_entries.append(
                    LibraryEntryResponse(
                        id=doc_id,
                        name=file_name,
                        path=file_path,
                        is_directory=False,
                        file_size=file_size,
                        mime_type=content_type,
                        sync_enabled=True,
                        created_at=now,
                    )
                )

    except zipfile.BadZipFile:
        raise HTTPException(status_code=400, detail="Invalid zip file")

    # Create directory document records so they appear in the tree view
    if directory_paths:
        dir_doc_ids: list[str] = []
        for dir_path in sorted(directory_paths):
            dir_doc_id = _build_document_id(str(user.id), dir_path)
            dir_doc_ids.append(dir_doc_id)
            dir_metadata = DocumentMetadata(
                connector_id=connector_id,
                credential_id=credential_id,
                document_id=dir_doc_id,
                semantic_identifier=f"{USER_LIBRARY_SOURCE_DIR}{dir_path}",
                first_link="",
                doc_metadata={"is_directory": True},
            )
            upsert_documents(db_session, [dir_metadata])
        upsert_document_by_connector_credential_pair(
            db_session, connector_id, credential_id, dir_doc_ids
        )

    # Mark connector as having succeeded (sets last_successful_index_time)
    # This allows the demo data toggle to be disabled
    update_connector_credential_pair(
        db_session=db_session,
        connector_id=connector_id,
        credential_id=credential_id,
        status=ConnectorCredentialPairStatus.ACTIVE,
        net_docs=len(uploaded_entries),
        run_dt=now,
    )

    # Trigger sandbox sync for user_library source only
    _trigger_sandbox_sync(str(user.id), tenant_id, source=USER_LIBRARY_SOURCE_DIR)

    logger.info(
        f"Extracted {len(uploaded_entries)} files ({total_size} bytes) from zip for user {user.id}"
    )

    return UploadResponse(
        entries=uploaded_entries,
        total_uploaded=len(uploaded_entries),
        total_size_bytes=total_size,
    )


@router.post("/directories")
def create_directory(
    request: CreateDirectoryRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> LibraryEntryResponse:
    """Create a virtual directory.

    Directories are tracked as documents with is_directory=True.
    No S3 object is created (S3 doesn't have real directories).
    """
    # Get or create connector
    connector_id, credential_id = get_or_create_craft_connector(db_session, user)

    # Build path
    parent_path = _sanitize_path(request.parent_path)
    safe_name = api_sanitize_filename(request.name)
    dir_path = f"{parent_path}/{safe_name}".replace("//", "/")

    # Track in document table
    doc_id = _build_document_id(str(user.id), dir_path)
    doc_metadata = DocumentMetadata(
        connector_id=connector_id,
        credential_id=credential_id,
        document_id=doc_id,
        semantic_identifier=f"{USER_LIBRARY_SOURCE_DIR}{dir_path}",
        first_link="",
        doc_metadata={
            "is_directory": True,
        },
    )
    upsert_documents(db_session, [doc_metadata])
    upsert_document_by_connector_credential_pair(
        db_session, connector_id, credential_id, [doc_id]
    )
    db_session.commit()

    return LibraryEntryResponse(
        id=doc_id,
        name=safe_name,
        path=dir_path,
        is_directory=True,
        file_size=None,
        mime_type=None,
        sync_enabled=True,
        created_at=datetime.now(timezone.utc),
    )


@router.patch("/files/{document_id}/toggle")
def toggle_file_sync(
    document_id: str,
    enabled: bool = Query(...),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> ToggleSyncResponse:
    """Enable/disable syncing a file to sandboxes.

    When sync is disabled, the file's metadata is updated with sync_disabled=True.
    The sandbox sync task will exclude these files when syncing to the sandbox.

    If the item is a directory, all children are also toggled.
    """
    from onyx.db.document import get_documents_by_source
    from onyx.db.document import update_document_metadata__no_commit

    tenant_id = get_current_tenant_id()
    if tenant_id is None:
        raise HTTPException(status_code=500, detail="Tenant ID not found")

    doc = _verify_ownership_and_get_document(document_id, user, db_session)

    # Update metadata for this document
    new_metadata = dict(doc.doc_metadata or {})
    new_metadata["sync_disabled"] = not enabled
    update_document_metadata__no_commit(db_session, document_id, new_metadata)

    # If this is a directory, also toggle all children
    doc_metadata = doc.doc_metadata or {}
    if doc_metadata.get("is_directory"):
        folder_path = doc.semantic_id
        if folder_path:
            all_docs = get_documents_by_source(
                db_session=db_session,
                source=DocumentSource.CRAFT_FILE,
                creator_id=user.id,
            )
            for child_doc in all_docs:
                if child_doc.semantic_id and child_doc.semantic_id.startswith(
                    folder_path + "/"
                ):
                    child_metadata = dict(child_doc.doc_metadata or {})
                    child_metadata["sync_disabled"] = not enabled
                    update_document_metadata__no_commit(
                        db_session, child_doc.id, child_metadata
                    )

    db_session.commit()

    return ToggleSyncResponse(success=True, sync_enabled=enabled)


@router.delete("/files/{document_id}")
def delete_file(
    document_id: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> DeleteFileResponse:
    """Delete a file from both S3 and the document table."""
    from onyx.db.document import delete_document_by_id__no_commit

    tenant_id = get_current_tenant_id()
    if tenant_id is None:
        raise HTTPException(status_code=500, detail="Tenant ID not found")

    doc = _verify_ownership_and_get_document(document_id, user, db_session)

    # Delete from storage if it's a file (not directory)
    doc_metadata = doc.doc_metadata or {}
    if not doc_metadata.get("is_directory"):
        file_path = doc_metadata.get("file_path")
        if file_path:
            writer = get_persistent_document_writer(
                user_id=str(user.id),
                tenant_id=tenant_id,
            )
            try:
                if isinstance(writer, S3PersistentDocumentWriter):
                    writer.delete_raw_file_by_path(file_path)
                else:
                    writer.delete_raw_file(file_path)
            except Exception as e:
                logger.warning(f"Failed to delete file at path {file_path}: {e}")
        else:
            # Fallback for documents created before file_path was stored
            storage_key = doc_metadata.get("storage_key") or doc_metadata.get("s3_key")
            if storage_key:
                writer = get_persistent_document_writer(
                    user_id=str(user.id),
                    tenant_id=tenant_id,
                )
                try:
                    if isinstance(writer, S3PersistentDocumentWriter):
                        writer.delete_raw_file(storage_key)
                    else:
                        logger.warning(
                            f"Cannot delete file in local mode without file_path: {document_id}"
                        )
                except Exception as e:
                    logger.warning(
                        f"Failed to delete storage object {storage_key}: {e}"
                    )

    # Delete from document table
    delete_document_by_id__no_commit(db_session, document_id)
    db_session.commit()

    # Trigger sync to apply changes
    _trigger_sandbox_sync(str(user.id), tenant_id, source=USER_LIBRARY_SOURCE_DIR)

    return DeleteFileResponse(success=True, deleted=document_id)


================================================
FILE: backend/onyx/server/features/build/configs.py
================================================
import os
from enum import Enum
from pathlib import Path


class SandboxBackend(str, Enum):
    """Backend mode for sandbox operations.

    LOCAL: Development mode - no snapshots, no automatic cleanup
    KUBERNETES: Production mode - full snapshots and cleanup
    """

    LOCAL = "local"
    KUBERNETES = "kubernetes"


# Sandbox backend mode (controls snapshot and cleanup behavior)
# "local" = no snapshots, no cleanup (for development)
# "kubernetes" = full snapshots and cleanup (for production)
SANDBOX_BACKEND = SandboxBackend(os.environ.get("SANDBOX_BACKEND", "local"))

# Base directory path for persistent document storage (local filesystem)
# Example: /var/onyx/file-system or /app/file-system
PERSISTENT_DOCUMENT_STORAGE_PATH = os.environ.get(
    "PERSISTENT_DOCUMENT_STORAGE_PATH", "/app/file-system"
)

# Demo Data Path
# Local: Source tree path (relative to this file)
# Kubernetes: Baked into container image at /workspace/demo_data
_THIS_FILE = Path(__file__)
DEMO_DATA_PATH = str(
    _THIS_FILE.parent / "sandbox" / "kubernetes" / "docker" / "demo_data"
)

# Sandbox filesystem paths
SANDBOX_BASE_PATH = os.environ.get("SANDBOX_BASE_PATH", "/tmp/onyx-sandboxes")
OUTPUTS_TEMPLATE_PATH = os.environ.get("OUTPUTS_TEMPLATE_PATH", "/templates/outputs")
VENV_TEMPLATE_PATH = os.environ.get("VENV_TEMPLATE_PATH", "/templates/venv")

# Sandbox agent configuration
SANDBOX_AGENT_COMMAND = os.environ.get("SANDBOX_AGENT_COMMAND", "opencode").split()

# OpenCode disabled tools (comma-separated list)
# Available tools: bash, edit, write, read, grep, glob, list, lsp, patch,
#                  skill, todowrite, todoread, webfetch, question
# Example: "question,webfetch" to disable user questions and web fetching
_disabled_tools_str = os.environ.get("OPENCODE_DISABLED_TOOLS", "question")
OPENCODE_DISABLED_TOOLS: list[str] = [
    t.strip() for t in _disabled_tools_str.split(",") if t.strip()
]

# Sandbox lifecycle configuration
SANDBOX_IDLE_TIMEOUT_SECONDS = int(
    os.environ.get("SANDBOX_IDLE_TIMEOUT_SECONDS", "3600")
)
SANDBOX_MAX_CONCURRENT_PER_ORG = int(
    os.environ.get("SANDBOX_MAX_CONCURRENT_PER_ORG", "10")
)

# Sandbox snapshot storage
SANDBOX_SNAPSHOTS_BUCKET = os.environ.get(
    "SANDBOX_SNAPSHOTS_BUCKET", "sandbox-snapshots"
)

# Next.js preview server port range
SANDBOX_NEXTJS_PORT_START = int(os.environ.get("SANDBOX_NEXTJS_PORT_START", "3010"))
SANDBOX_NEXTJS_PORT_END = int(os.environ.get("SANDBOX_NEXTJS_PORT_END", "3100"))

# File upload configuration
MAX_UPLOAD_FILE_SIZE_MB = int(os.environ.get("BUILD_MAX_UPLOAD_FILE_SIZE_MB", "50"))
MAX_UPLOAD_FILE_SIZE_BYTES = MAX_UPLOAD_FILE_SIZE_MB * 1024 * 1024
MAX_UPLOAD_FILES_PER_SESSION = int(
    os.environ.get("BUILD_MAX_UPLOAD_FILES_PER_SESSION", "20")
)
MAX_TOTAL_UPLOAD_SIZE_MB = int(os.environ.get("BUILD_MAX_TOTAL_UPLOAD_SIZE_MB", "200"))
MAX_TOTAL_UPLOAD_SIZE_BYTES = MAX_TOTAL_UPLOAD_SIZE_MB * 1024 * 1024
ATTACHMENTS_DIRECTORY = "attachments"

# ============================================================================
# Kubernetes Sandbox Configuration
# Only used when SANDBOX_BACKEND = "kubernetes"
# ============================================================================

# Namespace where sandbox pods are created
SANDBOX_NAMESPACE = os.environ.get("SANDBOX_NAMESPACE", "onyx-sandboxes")

# Container image for sandbox pods
# Should include Next.js template, opencode CLI, and demo_data zip
SANDBOX_CONTAINER_IMAGE = os.environ.get(
    "SANDBOX_CONTAINER_IMAGE", "onyxdotapp/sandbox:v0.1.5"
)

# S3 bucket for sandbox file storage (snapshots, knowledge files, uploads)
# Path structure: s3://{bucket}/{tenant_id}/snapshots/{session_id}/{snapshot_id}.tar.gz
#                 s3://{bucket}/{tenant_id}/knowledge/{user_id}/
#                 s3://{bucket}/{tenant_id}/uploads/{session_id}/
SANDBOX_S3_BUCKET = os.environ.get("SANDBOX_S3_BUCKET", "onyx-sandbox-files")

# Service account for sandbox pods (NO IRSA - no AWS API access)
SANDBOX_SERVICE_ACCOUNT_NAME = os.environ.get(
    "SANDBOX_SERVICE_ACCOUNT_NAME", "sandbox-runner"
)

# Service account for init container (has IRSA for S3 access)
SANDBOX_FILE_SYNC_SERVICE_ACCOUNT = os.environ.get(
    "SANDBOX_FILE_SYNC_SERVICE_ACCOUNT", "sandbox-file-sync"
)

ENABLE_CRAFT = os.environ.get("ENABLE_CRAFT", "false").lower() == "true"

# ============================================================================
# SSE Streaming Configuration
# ============================================================================

# SSE keepalive interval in seconds - send keepalive comment if no events
SSE_KEEPALIVE_INTERVAL = float(os.environ.get("SSE_KEEPALIVE_INTERVAL", "15.0"))

# ============================================================================
# ACP (Agent Communication Protocol) Configuration
# ============================================================================

# Timeout for ACP message processing in seconds
# This is the maximum time to wait for a complete response from the agent
ACP_MESSAGE_TIMEOUT = float(os.environ.get("ACP_MESSAGE_TIMEOUT", "900.0"))

# ============================================================================
# Rate Limiting Configuration
# ============================================================================

# Base rate limit for paid/subscribed users (messages per week)
# Free users always get 5 messages total (not configurable)
# Per-user overrides are managed via PostHog feature flag "craft-has-usage-limits"
CRAFT_PAID_USER_RATE_LIMIT = int(os.environ.get("CRAFT_PAID_USER_RATE_LIMIT", "25"))

# ============================================================================
# User Library Configuration
# For user-uploaded raw files (xlsx, pptx, docx, etc.) in Craft
# ============================================================================

# Maximum size per file in MB (default 500MB)
USER_LIBRARY_MAX_FILE_SIZE_MB = int(
    os.environ.get("USER_LIBRARY_MAX_FILE_SIZE_MB", "500")
)
USER_LIBRARY_MAX_FILE_SIZE_BYTES = USER_LIBRARY_MAX_FILE_SIZE_MB * 1024 * 1024

# Maximum total storage per user in GB (default 10GB)
USER_LIBRARY_MAX_TOTAL_SIZE_GB = int(
    os.environ.get("USER_LIBRARY_MAX_TOTAL_SIZE_GB", "10")
)
USER_LIBRARY_MAX_TOTAL_SIZE_BYTES = USER_LIBRARY_MAX_TOTAL_SIZE_GB * 1024 * 1024 * 1024

# Maximum files per single upload request (default 100)
USER_LIBRARY_MAX_FILES_PER_UPLOAD = int(
    os.environ.get("USER_LIBRARY_MAX_FILES_PER_UPLOAD", "100")
)

# String constants for User Library entities
USER_LIBRARY_CONNECTOR_NAME = "User Library"
USER_LIBRARY_CREDENTIAL_NAME = "User Library Credential"
USER_LIBRARY_SOURCE_DIR = "user_library"


================================================
FILE: backend/onyx/server/features/build/db/__init__.py
================================================
# Database operations for the build feature


================================================
FILE: backend/onyx/server/features/build/db/build_session.py
================================================
"""Database operations for Build Mode sessions."""

from datetime import datetime
from typing import Any
from uuid import UUID

from sqlalchemy import desc
from sqlalchemy import exists
from sqlalchemy import select
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session

from onyx.configs.constants import MessageType
from onyx.db.enums import BuildSessionStatus
from onyx.db.enums import SandboxStatus
from onyx.db.enums import SharingScope
from onyx.db.models import Artifact
from onyx.db.models import BuildMessage
from onyx.db.models import BuildSession
from onyx.db.models import LLMProvider as LLMProviderModel
from onyx.db.models import Sandbox
from onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_END
from onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_START
from onyx.server.manage.llm.models import LLMProviderView
from onyx.utils.logger import setup_logger

logger = setup_logger()


def create_build_session__no_commit(
    user_id: UUID,
    db_session: Session,
    name: str | None = None,
    demo_data_enabled: bool = True,
) -> BuildSession:
    """Create a new build session for the given user.

    NOTE: This function uses flush() instead of commit(). The caller is
    responsible for committing the transaction when ready.

    Args:
        user_id: The user ID
        db_session: Database session
        name: Optional session name
        demo_data_enabled: Whether this session uses demo data (default True)
    """
    session = BuildSession(
        user_id=user_id,
        name=name,
        status=BuildSessionStatus.ACTIVE,
        demo_data_enabled=demo_data_enabled,
    )
    db_session.add(session)
    db_session.flush()

    logger.info(
        f"Created build session {session.id} for user {user_id} (demo_data={demo_data_enabled})"
    )
    return session


def get_build_session(
    session_id: UUID,
    user_id: UUID,
    db_session: Session,
) -> BuildSession | None:
    """Get a build session by ID, ensuring it belongs to the user."""
    return (
        db_session.query(BuildSession)
        .filter(
            BuildSession.id == session_id,
            BuildSession.user_id == user_id,
        )
        .one_or_none()
    )


def get_user_build_sessions(
    user_id: UUID,
    db_session: Session,
    limit: int = 100,
) -> list[BuildSession]:
    """Get all build sessions for a user that have at least one message.

    Excludes empty (pre-provisioned) sessions from the listing.
    """
    # Subquery to check if session has any messages
    has_messages = exists().where(BuildMessage.session_id == BuildSession.id)

    return (
        db_session.query(BuildSession)
        .filter(
            BuildSession.user_id == user_id,
            has_messages,  # Only sessions with messages
        )
        .order_by(desc(BuildSession.created_at))
        .limit(limit)
        .all()
    )


def get_empty_session_for_user(
    user_id: UUID,
    db_session: Session,
    demo_data_enabled: bool | None = None,
) -> BuildSession | None:
    """Get an empty (pre-provisioned) session for the user if one exists.

    Returns a session with no messages, or None if all sessions have messages.

    Args:
        user_id: The user ID
        db_session: Database session
        demo_data_enabled: Match sessions with this demo_data setting.
                          If None, matches any session regardless of setting.
    """
    # Subquery to check if session has any messages
    has_messages = exists().where(BuildMessage.session_id == BuildSession.id)

    query = db_session.query(BuildSession).filter(
        BuildSession.user_id == user_id,
        ~has_messages,  # Sessions with no messages only
    )

    if demo_data_enabled is not None:
        query = query.filter(BuildSession.demo_data_enabled == demo_data_enabled)

    return query.first()


def update_session_activity(
    session_id: UUID,
    db_session: Session,
) -> None:
    """Update the last activity timestamp for a session."""
    session = (
        db_session.query(BuildSession)
        .filter(BuildSession.id == session_id)
        .one_or_none()
    )
    if session:
        session.last_activity_at = datetime.utcnow()
        db_session.commit()


def update_session_status(
    session_id: UUID,
    status: BuildSessionStatus,
    db_session: Session,
) -> None:
    """Update the status of a build session."""
    session = (
        db_session.query(BuildSession)
        .filter(BuildSession.id == session_id)
        .one_or_none()
    )
    if session:
        session.status = status
        db_session.commit()
        logger.info(f"Updated build session {session_id} status to {status}")


def set_build_session_sharing_scope(
    session_id: UUID,
    user_id: UUID,
    sharing_scope: SharingScope,
    db_session: Session,
) -> BuildSession | None:
    """Set the sharing scope of a build session.

    Only the session owner can change this setting.
    Returns the updated session, or None if not found/unauthorized.
    """
    session = get_build_session(session_id, user_id, db_session)
    if not session:
        return None
    session.sharing_scope = sharing_scope
    db_session.commit()
    logger.info(f"Set build session {session_id} sharing_scope={sharing_scope}")
    return session


def delete_build_session__no_commit(
    session_id: UUID,
    user_id: UUID,
    db_session: Session,
) -> bool:
    """Delete a build session and all related data.

    NOTE: This function uses flush() instead of commit(). The caller is
    responsible for committing the transaction when ready.
    """
    session = get_build_session(session_id, user_id, db_session)
    if not session:
        return False

    db_session.delete(session)
    db_session.flush()
    logger.info(f"Deleted build session {session_id}")
    return True


# Sandbox operations
# NOTE: Most sandbox operations have moved to sandbox.py
# These remain here for convenience in session-related workflows


def update_sandbox_status(
    sandbox_id: UUID,
    status: SandboxStatus,
    db_session: Session,
    container_id: str | None = None,
) -> None:
    """Update the status of a sandbox."""
    sandbox = db_session.query(Sandbox).filter(Sandbox.id == sandbox_id).one_or_none()
    if sandbox:
        sandbox.status = status
        if container_id is not None:
            sandbox.container_id = container_id
        sandbox.last_heartbeat = datetime.utcnow()
        db_session.commit()
        logger.info(f"Updated sandbox {sandbox_id} status to {status}")


def update_sandbox_heartbeat(
    sandbox_id: UUID,
    db_session: Session,
) -> None:
    """Update the heartbeat timestamp for a sandbox."""
    sandbox = db_session.query(Sandbox).filter(Sandbox.id == sandbox_id).one_or_none()
    if sandbox:
        sandbox.last_heartbeat = datetime.utcnow()
        db_session.commit()


# Artifact operations
def create_artifact(
    session_id: UUID,
    artifact_type: str,
    path: str,
    name: str,
    db_session: Session,
) -> Artifact:
    """Create a new artifact record."""
    artifact = Artifact(
        session_id=session_id,
        type=artifact_type,
        path=path,
        name=name,
    )
    db_session.add(artifact)
    db_session.commit()
    db_session.refresh(artifact)

    logger.info(f"Created artifact {artifact.id} for session {session_id}")
    return artifact


def get_session_artifacts(
    session_id: UUID,
    db_session: Session,
) -> list[Artifact]:
    """Get all artifacts for a session."""
    return (
        db_session.query(Artifact)
        .filter(Artifact.session_id == session_id)
        .order_by(desc(Artifact.created_at))
        .all()
    )


def update_artifact(
    artifact_id: UUID,
    db_session: Session,
    path: str | None = None,
    name: str | None = None,
) -> None:
    """Update artifact metadata."""
    artifact = (
        db_session.query(Artifact).filter(Artifact.id == artifact_id).one_or_none()
    )
    if artifact:
        if path is not None:
            artifact.path = path
        if name is not None:
            artifact.name = name
        artifact.updated_at = datetime.utcnow()
        db_session.commit()
        logger.info(f"Updated artifact {artifact_id}")


# Message operations
def create_message(
    session_id: UUID,
    message_type: MessageType,
    turn_index: int,
    message_metadata: dict[str, Any],
    db_session: Session,
) -> BuildMessage:
    """Create a new message in a build session.

    All message data is stored in message_metadata as JSON.

    Args:
        session_id: Session UUID
        message_type: Type of message (USER, ASSISTANT, SYSTEM)
        turn_index: 0-indexed user message number this message belongs to
        message_metadata: Required structured data (the raw ACP packet JSON)
        db_session: Database session
    """
    message = BuildMessage(
        session_id=session_id,
        turn_index=turn_index,
        type=message_type,
        message_metadata=message_metadata,
    )
    db_session.add(message)
    db_session.commit()
    db_session.refresh(message)

    logger.info(
        f"Created {message_type.value} message {message.id} for session {session_id} "
        f"turn={turn_index} type={message_metadata.get('type')}"
    )
    return message


def update_message(
    message_id: UUID,
    message_metadata: dict[str, Any],
    db_session: Session,
) -> BuildMessage | None:
    """Update an existing message's metadata.

    Used for upserting agent_plan_update messages.

    Args:
        message_id: The message UUID to update
        message_metadata: New metadata to set
        db_session: Database session

    Returns:
        Updated BuildMessage or None if not found
    """
    message = (
        db_session.query(BuildMessage).filter(BuildMessage.id == message_id).first()
    )
    if message is None:
        return None

    message.message_metadata = message_metadata
    db_session.commit()
    db_session.refresh(message)

    logger.info(
        f"Updated message {message_id} metadata type={message_metadata.get('type')}"
    )
    return message


def upsert_agent_plan(
    session_id: UUID,
    turn_index: int,
    plan_metadata: dict[str, Any],
    db_session: Session,
    existing_plan_id: UUID | None = None,
) -> BuildMessage:
    """Upsert an agent plan - update if exists, create if not.

    Each session/turn should only have one agent_plan_update message.
    This function updates the existing plan message or creates a new one.

    Args:
        session_id: Session UUID
        turn_index: Current turn index
        plan_metadata: The agent_plan_update packet data
        db_session: Database session
        existing_plan_id: ID of existing plan message to update (if known)

    Returns:
        The created or updated BuildMessage
    """
    if existing_plan_id:
        # Fast path: we know the plan ID
        updated = update_message(existing_plan_id, plan_metadata, db_session)
        if updated:
            return updated

    # Check if a plan already exists for this session/turn
    existing_plan = (
        db_session.query(BuildMessage)
        .filter(
            BuildMessage.session_id == session_id,
            BuildMessage.turn_index == turn_index,
            BuildMessage.message_metadata["type"].astext == "agent_plan_update",
        )
        .first()
    )

    if existing_plan:
        existing_plan.message_metadata = plan_metadata
        db_session.commit()
        db_session.refresh(existing_plan)
        logger.info(
            f"Updated agent_plan_update message {existing_plan.id} for session {session_id}"
        )
        return existing_plan

    # Create new plan message
    return create_message(
        session_id=session_id,
        message_type=MessageType.ASSISTANT,
        turn_index=turn_index,
        message_metadata=plan_metadata,
        db_session=db_session,
    )


def get_session_messages(
    session_id: UUID,
    db_session: Session,
) -> list[BuildMessage]:
    """Get all messages for a session, ordered by turn index and creation time."""
    return (
        db_session.query(BuildMessage)
        .filter(BuildMessage.session_id == session_id)
        .order_by(BuildMessage.turn_index, BuildMessage.created_at)
        .all()
    )


def _is_port_available(port: int) -> bool:
    """Check if a port is available by attempting to bind to it.

    Checks both IPv4 and IPv6 wildcard addresses to properly detect
    if anything is listening on the port, regardless of address family.
    """
    import socket

    logger.debug(f"Checking if port {port} is available")

    # Check IPv4 wildcard (0.0.0.0) - this will detect any IPv4 listener
    try:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
            sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            sock.bind(("0.0.0.0", port))
            logger.debug(f"Port {port} IPv4 wildcard bind successful")
    except OSError as e:
        logger.debug(f"Port {port} IPv4 wildcard not available: {e}")
        return False

    # Check IPv6 wildcard (::) - this will detect any IPv6 listener
    try:
        with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as sock:
            sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            # IPV6_V6ONLY must be False to allow dual-stack behavior
            sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
            sock.bind(("::", port))
            logger.debug(f"Port {port} IPv6 wildcard bind successful")
    except OSError as e:
        logger.debug(f"Port {port} IPv6 wildcard not available: {e}")
        return False

    logger.debug(f"Port {port} is available")
    return True


def allocate_nextjs_port(db_session: Session) -> int:
    """Allocate an available port for a new session.

    Finds the first available port in the configured range by checking
    both database allocations and system-level port availability.

    Args:
        db_session: Database session for querying allocated ports

    Returns:
        An available port number

    Raises:
        RuntimeError: If no ports are available in the configured range
    """
    from onyx.db.models import BuildSession

    # Get all currently allocated ports from active sessions
    allocated_ports = set(
        db_session.query(BuildSession.nextjs_port)
        .filter(BuildSession.nextjs_port.isnot(None))
        .all()
    )
    allocated_ports = {port[0] for port in allocated_ports if port[0] is not None}

    # Find first port that's not in DB and not currently bound
    for port in range(SANDBOX_NEXTJS_PORT_START, SANDBOX_NEXTJS_PORT_END):
        if port not in allocated_ports and _is_port_available(port):
            return port

    raise RuntimeError(
        f"No available ports in range [{SANDBOX_NEXTJS_PORT_START}, {SANDBOX_NEXTJS_PORT_END})"
    )


def mark_user_sessions_idle__no_commit(db_session: Session, user_id: UUID) -> int:
    """Mark all ACTIVE sessions for a user as IDLE.

    Called when a sandbox goes to sleep so the frontend knows these sessions
    need restoration before they can be used again.

    Args:
        db_session: Database session
        user_id: The user whose sessions should be marked idle

    Returns:
        Number of sessions updated
    """
    result = (
        db_session.query(BuildSession)
        .filter(
            BuildSession.user_id == user_id,
            BuildSession.status == BuildSessionStatus.ACTIVE,
        )
        .update({BuildSession.status: BuildSessionStatus.IDLE})
    )
    db_session.flush()
    logger.info(f"Marked {result} sessions as IDLE for user {user_id}")
    return result


def clear_nextjs_ports_for_user(db_session: Session, user_id: UUID) -> int:
    """Clear nextjs_port for all sessions belonging to a user.

    Called when sandbox goes to sleep to release port allocations.

    Args:
        db_session: Database session
        user_id: The user whose sessions should have ports cleared

    Returns:
        Number of sessions updated
    """
    result = (
        db_session.query(BuildSession)
        .filter(
            BuildSession.user_id == user_id,
            BuildSession.nextjs_port.isnot(None),
        )
        .update({BuildSession.nextjs_port: None})
    )
    db_session.flush()
    logger.info(f"Cleared {result} nextjs_port allocations for user {user_id}")
    return result


def fetch_llm_provider_by_type_for_build_mode(
    db_session: Session, provider_type: str
) -> LLMProviderView | None:
    """Fetch an LLM provider by its provider type (e.g., "anthropic", "openai").

    Resolution priority:
    1. First try to find a provider named "build-mode-{type}" (e.g., "build-mode-anthropic")
    2. If not found, fall back to any provider that matches the type

    Args:
        db_session: Database session
        provider_type: The provider type (e.g., "anthropic", "openai", "openrouter")

    Returns:
        LLMProviderView if found, None otherwise
    """
    from onyx.db.llm import fetch_existing_llm_provider

    # First try to find a "build-mode-{type}" provider
    build_mode_name = f"build-mode-{provider_type}"
    provider_model = fetch_existing_llm_provider(
        name=build_mode_name, db_session=db_session
    )

    # If not found, fall back to any provider that matches the type
    if not provider_model:
        provider_model = db_session.scalar(
            select(LLMProviderModel)
            .where(LLMProviderModel.provider == provider_type)
            .options(
                selectinload(LLMProviderModel.model_configurations),
                selectinload(LLMProviderModel.groups),
                selectinload(LLMProviderModel.personas),
            )
        )

    if not provider_model:
        return None
    return LLMProviderView.from_model(provider_model)


================================================
FILE: backend/onyx/server/features/build/db/rate_limit.py
================================================
"""Database queries for Build Mode rate limiting."""

from datetime import datetime
from uuid import UUID

from sqlalchemy import func
from sqlalchemy.orm import Session

from onyx.configs.constants import MessageType
from onyx.db.models import BuildMessage
from onyx.db.models import BuildSession


def count_user_messages_in_window(
    user_id: UUID,
    cutoff_time: datetime,
    db_session: Session,
) -> int:
    """
    Count USER messages for a user since cutoff_time.

    Args:
        user_id: The user's UUID
        cutoff_time: Only count messages created at or after this time
        db_session: Database session

    Returns:
        Number of USER messages in the time window
    """
    return (
        db_session.query(func.count(BuildMessage.id))
        .join(BuildSession, BuildMessage.session_id == BuildSession.id)
        .filter(
            BuildSession.user_id == user_id,
            BuildMessage.type == MessageType.USER,
            BuildMessage.created_at >= cutoff_time,
        )
        .scalar()
        or 0
    )


def count_user_messages_total(user_id: UUID, db_session: Session) -> int:
    """
    Count all USER messages for a user (lifetime total).

    Args:
        user_id: The user's UUID
        db_session: Database session

    Returns:
        Total number of USER messages
    """
    return (
        db_session.query(func.count(BuildMessage.id))
        .join(BuildSession, BuildMessage.session_id == BuildSession.id)
        .filter(
            BuildSession.user_id == user_id,
            BuildMessage.type == MessageType.USER,
        )
        .scalar()
        or 0
    )


def get_oldest_message_timestamp(
    user_id: UUID,
    cutoff_time: datetime,
    db_session: Session,
) -> datetime | None:
    """
    Get the timestamp of the oldest USER message in the time window.

    Used to calculate when the rate limit will reset (when the oldest
    message ages out of the rolling window).

    Args:
        user_id: The user's UUID
        cutoff_time: Only consider messages created at or after this time
        db_session: Database session

    Returns:
        Timestamp of oldest message in window, or None if no messages
    """
    return (
        db_session.query(BuildMessage.created_at)
        .join(BuildSession, BuildMessage.session_id == BuildSession.id)
        .filter(
            BuildSession.user_id == user_id,
            BuildMessage.type == MessageType.USER,
            BuildMessage.created_at >= cutoff_time,
        )
        .order_by(BuildMessage.created_at.asc())
        .limit(1)
        .scalar()
    )


================================================
FILE: backend/onyx/server/features/build/db/sandbox.py
================================================
"""Database operations for CLI agent sandbox management."""

import datetime
from uuid import UUID

from sqlalchemy import and_
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.enums import SandboxStatus
from onyx.db.models import Sandbox
from onyx.db.models import Snapshot
from onyx.utils.logger import setup_logger

logger = setup_logger()


def create_sandbox__no_commit(
    db_session: Session,
    user_id: UUID,
) -> Sandbox:
    """Create a new sandbox record for a user.

    Sets last_heartbeat to now so that:
    1. The sandbox has a proper idle timeout baseline from creation
    2. Long-running provisioning doesn't cause the sandbox to appear "old"
       when it transitions to RUNNING

    NOTE: This function uses flush() instead of commit(). The caller is
    responsible for committing the transaction when ready.
    """
    sandbox = Sandbox(
        user_id=user_id,
        status=SandboxStatus.PROVISIONING,
        last_heartbeat=datetime.datetime.now(datetime.timezone.utc),
    )
    db_session.add(sandbox)
    db_session.flush()
    return sandbox


def get_sandbox_by_user_id(db_session: Session, user_id: UUID) -> Sandbox | None:
    """Get sandbox by user ID (primary lookup method)."""
    stmt = select(Sandbox).where(Sandbox.user_id == user_id)
    return db_session.execute(stmt).scalar_one_or_none()


def get_sandbox_by_session_id(db_session: Session, session_id: UUID) -> Sandbox | None:
    """Get sandbox by session ID (compatibility function).

    This function provides backwards compatibility during the transition to
    user-owned sandboxes. It looks up the session's user_id, then finds the
    user's sandbox.

    NOTE: This will be removed in a future phase when all callers are updated
    to use get_sandbox_by_user_id() directly.
    """
    from onyx.db.models import BuildSession

    stmt = select(BuildSession.user_id).where(BuildSession.id == session_id)
    result = db_session.execute(stmt).scalar_one_or_none()
    if result is None:
        return None

    return get_sandbox_by_user_id(db_session, result)


def get_sandbox_by_id(db_session: Session, sandbox_id: UUID) -> Sandbox | None:
    """Get sandbox by its ID."""
    stmt = select(Sandbox).where(Sandbox.id == sandbox_id)
    return db_session.execute(stmt).scalar_one_or_none()


def update_sandbox_status__no_commit(
    db_session: Session,
    sandbox_id: UUID,
    status: SandboxStatus,
) -> Sandbox:
    """Update sandbox status.

    When transitioning to RUNNING, also sets last_heartbeat to now. This ensures
    newly provisioned sandboxes have a proper idle timeout baseline (rather than
    being immediately considered idle due to NULL heartbeat).

    NOTE: This function uses flush() instead of commit(). The caller is
    responsible for committing the transaction when ready.
    """
    sandbox = get_sandbox_by_id(db_session, sandbox_id)
    if not sandbox:
        raise ValueError(f"Sandbox {sandbox_id} not found")

    sandbox.status = status

    # Set heartbeat when sandbox becomes active to establish idle timeout baseline
    if status == SandboxStatus.RUNNING:
        sandbox.last_heartbeat = datetime.datetime.now(datetime.timezone.utc)

    db_session.flush()
    return sandbox


def update_sandbox_heartbeat(db_session: Session, sandbox_id: UUID) -> Sandbox:
    """Update sandbox last_heartbeat to now."""
    sandbox = get_sandbox_by_id(db_session, sandbox_id)
    if not sandbox:
        raise ValueError(f"Sandbox {sandbox_id} not found")

    sandbox.last_heartbeat = datetime.datetime.now(datetime.timezone.utc)
    db_session.commit()
    return sandbox


def get_idle_sandboxes(
    db_session: Session, idle_threshold_seconds: int
) -> list[Sandbox]:
    """Get sandboxes that have been idle longer than threshold.

    Also includes sandboxes with NULL heartbeat, but only if they were created
    before the threshold (to avoid sweeping up brand-new sandboxes that may have
    NULL heartbeat due to edge cases like older rows or manual inserts).
    """
    threshold_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
        seconds=idle_threshold_seconds
    )

    stmt = select(Sandbox).where(
        Sandbox.status == SandboxStatus.RUNNING,
        or_(
            Sandbox.last_heartbeat < threshold_time,
            and_(
                Sandbox.last_heartbeat.is_(None),
                Sandbox.created_at < threshold_time,
            ),
        ),
    )
    return list(db_session.execute(stmt).scalars().all())


def get_running_sandbox_count_by_tenant(
    db_session: Session,
    tenant_id: str,  # noqa: ARG001
) -> int:
    """Get count of running sandboxes for a tenant (for limit enforcement).

    Note: tenant_id parameter is kept for API compatibility but is not used
    since Sandbox model no longer has tenant_id. This function returns
    the count of all running sandboxes.
    """
    stmt = select(func.count(Sandbox.id)).where(Sandbox.status == SandboxStatus.RUNNING)
    result = db_session.execute(stmt).scalar()
    return result or 0


def create_snapshot__no_commit(
    db_session: Session,
    session_id: UUID,
    storage_path: str,
    size_bytes: int,
) -> Snapshot:
    """Create a snapshot record for a session.

    NOTE: Uses flush() instead of commit(). The caller (cleanup task) is
    responsible for committing after all snapshots + status updates are done,
    so the entire operation is atomic.
    """
    snapshot = Snapshot(
        session_id=session_id,
        storage_path=storage_path,
        size_bytes=size_bytes,
    )
    db_session.add(snapshot)
    db_session.flush()
    return snapshot


def get_latest_snapshot_for_session(
    db_session: Session, session_id: UUID
) -> Snapshot | None:
    """Get most recent snapshot for a session."""
    stmt = (
        select(Snapshot)
        .where(Snapshot.session_id == session_id)
        .order_by(Snapshot.created_at.desc())
        .limit(1)
    )
    return db_session.execute(stmt).scalar_one_or_none()


def get_snapshots_for_session(db_session: Session, session_id: UUID) -> list[Snapshot]:
    """Get all snapshots for a session, ordered by creation time descending."""
    stmt = (
        select(Snapshot)
        .where(Snapshot.session_id == session_id)
        .order_by(Snapshot.created_at.desc())
    )
    return list(db_session.execute(stmt).scalars().all())


def delete_old_snapshots(
    db_session: Session,
    tenant_id: str,  # noqa: ARG001
    retention_days: int,
) -> int:
    """Delete snapshots older than retention period, return count deleted.

    Note: tenant_id parameter is kept for API compatibility but is not used
    since Snapshot model no longer has tenant_id. This function deletes
    all snapshots older than the retention period.
    """
    cutoff_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
        days=retention_days
    )

    stmt = select(Snapshot).where(
        Snapshot.created_at < cutoff_time,
    )
    old_snapshots = db_session.execute(stmt).scalars().all()

    count = 0
    for snapshot in old_snapshots:
        db_session.delete(snapshot)
        count += 1

    if count > 0:
        db_session.commit()

    return count


def delete_snapshot(db_session: Session, snapshot_id: UUID) -> bool:
    """Delete a specific snapshot by ID. Returns True if deleted, False if not found."""
    stmt = select(Snapshot).where(Snapshot.id == snapshot_id)
    snapshot = db_session.execute(stmt).scalar_one_or_none()

    if not snapshot:
        return False

    db_session.delete(snapshot)
    db_session.commit()
    return True


================================================
FILE: backend/onyx/server/features/build/db/user_library.py
================================================
"""Database operations for User Library (CRAFT_FILE connector).

Handles storage quota queries and connector/credential setup for the
User Library feature in Craft.
"""

from uuid import UUID

from sqlalchemy import and_
from sqlalchemy import cast
from sqlalchemy import func
from sqlalchemy import Integer
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.connector import create_connector
from onyx.db.connector import fetch_connectors
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.connector_credential_pair import (
    get_connector_credential_pairs_for_user,
)
from onyx.db.credentials import create_credential
from onyx.db.credentials import fetch_credentials_for_user
from onyx.db.enums import AccessType
from onyx.db.enums import ProcessingMode
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document as DbDocument
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import User
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import CredentialBase
from onyx.server.features.build.configs import USER_LIBRARY_CONNECTOR_NAME
from onyx.server.features.build.configs import USER_LIBRARY_CREDENTIAL_NAME
from onyx.utils.logger import setup_logger

logger = setup_logger()


def get_user_storage_bytes(db_session: Session, user_id: UUID) -> int:
    """Get total storage usage for a user's library files.

    Uses SQL aggregation to sum file_size from doc_metadata JSONB for all
    CRAFT_FILE documents owned by this user, avoiding loading all documents
    into Python memory.
    """
    stmt = (
        select(
            func.coalesce(
                func.sum(
                    cast(
                        DbDocument.doc_metadata["file_size"].as_string(),
                        Integer,
                    )
                ),
                0,
            )
        )
        .join(
            DocumentByConnectorCredentialPair,
            DbDocument.id == DocumentByConnectorCredentialPair.id,
        )
        .join(
            ConnectorCredentialPair,
            and_(
                DocumentByConnectorCredentialPair.connector_id
                == ConnectorCredentialPair.connector_id,
                DocumentByConnectorCredentialPair.credential_id
                == ConnectorCredentialPair.credential_id,
            ),
        )
        .join(
            Connector,
            ConnectorCredentialPair.connector_id == Connector.id,
        )
        .where(Connector.source == DocumentSource.CRAFT_FILE)
        .where(ConnectorCredentialPair.creator_id == user_id)
        .where(DbDocument.doc_metadata["is_directory"].as_boolean().is_not(True))
    )
    result = db_session.execute(stmt).scalar()
    return int(result or 0)


def get_or_create_craft_connector(db_session: Session, user: User) -> tuple[int, int]:
    """Get or create the CRAFT_FILE connector for a user.

    Returns:
        Tuple of (connector_id, credential_id)

    Note: We need to create a credential even though CRAFT_FILE doesn't require
    authentication. This is because Onyx's connector-credential pair system
    requires a credential for all connectors. The credential is empty ({}).

    This function handles recovery from partial creation failures by detecting
    orphaned connectors (connectors without cc_pairs) and completing their setup.
    """
    # Check if user already has a complete CRAFT_FILE cc_pair
    cc_pairs = get_connector_credential_pairs_for_user(
        db_session=db_session,
        user=user,
        get_editable=False,
        eager_load_connector=True,
        eager_load_credential=True,
        processing_mode=ProcessingMode.RAW_BINARY,
    )

    for cc_pair in cc_pairs:
        if (
            cc_pair.connector.source == DocumentSource.CRAFT_FILE
            and cc_pair.creator_id == user.id
        ):
            return cc_pair.connector.id, cc_pair.credential.id

    # No cc_pair for this user — find or create the shared CRAFT_FILE connector
    existing_connectors = fetch_connectors(
        db_session, sources=[DocumentSource.CRAFT_FILE]
    )
    connector_id: int | None = None
    for conn in existing_connectors:
        if conn.name == USER_LIBRARY_CONNECTOR_NAME:
            connector_id = conn.id
            break

    if connector_id is None:
        connector_data = ConnectorBase(
            name=USER_LIBRARY_CONNECTOR_NAME,
            source=DocumentSource.CRAFT_FILE,
            input_type=InputType.LOAD_STATE,
            connector_specific_config={"disabled_paths": []},
            refresh_freq=None,
            prune_freq=None,
        )
        connector_response = create_connector(
            db_session=db_session,
            connector_data=connector_data,
        )
        connector_id = connector_response.id

    # Try to reuse an existing User Library credential for this user
    existing_credentials = fetch_credentials_for_user(
        db_session=db_session,
        user=user,
    )
    credential = None
    for cred in existing_credentials:
        if (
            cred.source == DocumentSource.CRAFT_FILE
            and cred.name == USER_LIBRARY_CREDENTIAL_NAME
        ):
            credential = cred
            break

    if credential is None:
        credential_data = CredentialBase(
            credential_json={},
            admin_public=False,
            source=DocumentSource.CRAFT_FILE,
            name=USER_LIBRARY_CREDENTIAL_NAME,
        )
        credential = create_credential(
            credential_data=credential_data,
            user=user,
            db_session=db_session,
        )

    # Link them with RAW_BINARY processing mode
    add_credential_to_connector(
        db_session=db_session,
        connector_id=connector_id,
        credential_id=credential.id,
        user=user,
        cc_pair_name=USER_LIBRARY_CONNECTOR_NAME,
        access_type=AccessType.PRIVATE,
        groups=None,
        processing_mode=ProcessingMode.RAW_BINARY,
    )

    db_session.commit()
    return connector_id, credential.id


================================================
FILE: backend/onyx/server/features/build/indexing/persistent_document_writer.py
================================================
"""
Persistent Document Writer for writing indexed documents to local filesystem or S3 with
hierarchical directory structure that mirrors the source organization.

Local mode (SandboxBackend.LOCAL):
    Writes to local filesystem at {PERSISTENT_DOCUMENT_STORAGE_PATH}/{tenant_id}/knowledge/{user_id}/...

Kubernetes mode (SandboxBackend.KUBERNETES):
    Writes to S3 at s3://{SANDBOX_S3_BUCKET}/{tenant_id}/knowledge/{user_id}/...
    This is the same location that kubernetes_sandbox_manager.py reads from when
    provisioning sandboxes.

Both modes use consistent tenant/user-segregated paths for multi-tenant isolation.
"""

import hashlib
import json
import unicodedata
from pathlib import Path
from typing import Any

from botocore.exceptions import ClientError
from mypy_boto3_s3.client import S3Client

from onyx.connectors.models import Document
from onyx.server.features.build.configs import PERSISTENT_DOCUMENT_STORAGE_PATH
from onyx.server.features.build.configs import SANDBOX_BACKEND
from onyx.server.features.build.configs import SANDBOX_S3_BUCKET
from onyx.server.features.build.configs import SandboxBackend
from onyx.server.features.build.s3.s3_client import build_s3_client
from onyx.utils.logger import setup_logger

logger = setup_logger()


# =============================================================================
# Shared Utilities for Path Building
# =============================================================================


def sanitize_path_component(component: str, replace_slash: bool = True) -> str:
    """Sanitize a path component for file system / S3 key safety.

    Args:
        component: The path component to sanitize
        replace_slash: If True, replaces forward slashes (needed for local filesystem).
                      Set to False for S3 where `/` is a valid delimiter.

    Returns:
        Sanitized path component safe for use in file paths or S3 keys
    """
    # First, normalize Unicode to decomposed form and remove combining characters
    # This handles cases like accented characters, while also filtering format chars
    normalized = unicodedata.normalize("NFKD", component)

    # Filter out Unicode format/control characters (categories Cf, Cc)
    # This removes invisible chars like U+2060 (WORD JOINER), zero-width spaces, etc.
    sanitized = "".join(
        c for c in normalized if unicodedata.category(c) not in ("Cf", "Cc")
    )

    # Replace spaces with underscores
    sanitized = sanitized.replace(" ", "_")
    # Replace problematic characters
    if replace_slash:
        sanitized = sanitized.replace("/", "_")
    sanitized = sanitized.replace("\\", "_").replace(":", "_")
    sanitized = sanitized.replace("<", "_").replace(">", "_").replace("|", "_")
    sanitized = sanitized.replace('"', "_").replace("?", "_").replace("*", "_")
    return sanitized.strip() or "unnamed"


def sanitize_filename(name: str, replace_slash: bool = True) -> str:
    """Sanitize name for use as filename.

    Args:
        name: The filename to sanitize
        replace_slash: Passed through to sanitize_path_component

    Returns:
        Sanitized filename, truncated with hash suffix if too long
    """
    sanitized = sanitize_path_component(name, replace_slash=replace_slash)
    if len(sanitized) > 200:
        # Keep first 150 chars + hash suffix for uniqueness
        hash_suffix = hashlib.sha256(name.encode()).hexdigest()[:16]
        return f"{sanitized[:150]}_{hash_suffix}"
    return sanitized


def normalize_leading_slash(path: str) -> str:
    """Ensure a path starts with exactly one leading slash."""
    return "/" + path.lstrip("/")


def get_base_filename(doc: Document, replace_slash: bool = True) -> str:
    """Get base filename from document, preferring semantic identifier.

    Args:
        doc: The document to get filename for
        replace_slash: Passed through to sanitize_filename

    Returns:
        Sanitized base filename (without extension)
    """
    name = doc.semantic_identifier or doc.title or doc.id
    return sanitize_filename(name, replace_slash=replace_slash)


def build_document_subpath(doc: Document, replace_slash: bool = True) -> list[str]:
    """Build the source/hierarchy path components from a document.

    Returns path components like: [source, hierarchy_part1, hierarchy_part2, ...]

    This is the common part of the path that comes after user/tenant segregation.

    Args:
        doc: The document to build path for
        replace_slash: Passed through to sanitize_path_component

    Returns:
        List of sanitized path components
    """
    parts: list[str] = []

    # Source type (e.g., "google_drive", "confluence")
    parts.append(doc.source.value)

    # Get hierarchy from doc_metadata
    hierarchy: dict[str, Any] = (
        doc.doc_metadata.get("hierarchy", {}) if doc.doc_metadata else {}
    )
    source_path: list[str] = hierarchy.get("source_path", [])

    if source_path:
        parts.extend(
            [
                sanitize_path_component(p, replace_slash=replace_slash)
                for p in source_path
            ]
        )

    return parts


def resolve_duplicate_filename(
    doc: Document,
    base_filename: str,
    has_duplicates: bool,
    replace_slash: bool = True,
) -> str:
    """Resolve filename, appending ID suffix if there are duplicates.

    Args:
        doc: The document (for ID extraction)
        base_filename: The base filename without extension
        has_duplicates: Whether there are other docs with the same base filename
        replace_slash: Passed through to sanitize_path_component

    Returns:
        Final filename with .json extension
    """
    if has_duplicates:
        id_suffix = sanitize_path_component(doc.id, replace_slash=replace_slash)
        if len(id_suffix) > 50:
            id_suffix = hashlib.sha256(doc.id.encode()).hexdigest()[:16]
        return f"{base_filename}_{id_suffix}.json"
    return f"{base_filename}.json"


def serialize_document(doc: Document) -> dict[str, Any]:
    """Serialize a document to a dictionary for JSON storage.

    Args:
        doc: The document to serialize

    Returns:
        Dictionary representation of the document
    """
    return {
        "id": doc.id,
        "semantic_identifier": doc.semantic_identifier,
        "title": doc.title,
        "source": doc.source.value,
        "doc_updated_at": (
            doc.doc_updated_at.isoformat() if doc.doc_updated_at else None
        ),
        "metadata": doc.metadata,
        "doc_metadata": doc.doc_metadata,
        "sections": [
            {"text": s.text if hasattr(s, "text") else None, "link": s.link}
            for s in doc.sections
        ],
        "primary_owners": [o.model_dump() for o in (doc.primary_owners or [])],
        "secondary_owners": [o.model_dump() for o in (doc.secondary_owners or [])],
    }


# =============================================================================
# Classes
# =============================================================================


class PersistentDocumentWriter:
    """Writes indexed documents to local filesystem with hierarchical structure.

    Documents are stored in tenant/user-segregated paths:
    {base_path}/{tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/document.json

    This enables per-tenant and per-user isolation for sandbox access control.
    """

    def __init__(
        self,
        base_path: str,
        tenant_id: str,
        user_id: str,
    ):
        self.base_path = Path(base_path)
        self.tenant_id = tenant_id
        self.user_id = user_id

    def write_documents(self, documents: list[Document]) -> list[str]:
        """Write documents to local filesystem, returns written file paths."""
        written_paths: list[str] = []

        # Build a map of base filenames to detect duplicates
        # Key: (directory_path, base_filename) -> list of docs with that name
        filename_map: dict[tuple[Path, str], list[Document]] = {}

        for doc in documents:
            dir_path = self._build_directory_path(doc)
            base_filename = get_base_filename(doc, replace_slash=True)
            key = (dir_path, base_filename)
            if key not in filename_map:
                filename_map[key] = []
            filename_map[key].append(doc)

        # Now write documents, appending ID if there are duplicates
        for (dir_path, base_filename), docs in filename_map.items():
            has_duplicates = len(docs) > 1
            for doc in docs:
                filename = resolve_duplicate_filename(
                    doc, base_filename, has_duplicates, replace_slash=True
                )
                path = dir_path / filename
                self._write_document(doc, path)
                written_paths.append(str(path))

        return written_paths

    def _build_directory_path(self, doc: Document) -> Path:
        """Build directory path from document metadata.

        Documents are stored under tenant/user-segregated paths:
        {base_path}/{tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/

        This enables per-tenant and per-user isolation for sandbox access control.
        """
        # Tenant and user segregation prefix (matches S3 path structure)
        parts = [self.tenant_id, "knowledge", self.user_id]
        # Add source and hierarchy from document
        parts.extend(build_document_subpath(doc, replace_slash=True))

        return self.base_path / "/".join(parts)

    def _write_document(self, doc: Document, path: Path) -> None:
        """Serialize and write document to filesystem."""
        content = serialize_document(doc)

        # Create parent directories if they don't exist
        path.parent.mkdir(parents=True, exist_ok=True)

        # Write the JSON file
        with open(path, "w", encoding="utf-8") as f:
            json.dump(content, f, indent=2, default=str)

        logger.debug(f"Wrote document to {path}")

    def write_raw_file(
        self,
        path: str,
        content: bytes,
        content_type: str | None = None,  # noqa: ARG002
    ) -> str:
        """Write a raw binary file to local filesystem (for User Library).

        Unlike write_documents which serializes Document objects to JSON, this method
        writes raw binary content directly. Used for user-uploaded files like xlsx, pptx.

        Args:
            path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
            content: Raw binary content to write
            content_type: MIME type of the file (stored as metadata, unused locally)

        Returns:
            Full filesystem path where file was written
        """
        # Build full path: {base_path}/{tenant}/knowledge/{user}/user_library/{path}
        normalized_path = normalize_leading_slash(path)
        full_path = (
            self.base_path
            / self.tenant_id
            / "knowledge"
            / self.user_id
            / "user_library"
            / normalized_path.lstrip("/")
        )

        # Create parent directories if they don't exist
        full_path.parent.mkdir(parents=True, exist_ok=True)

        # Write the raw binary content
        with open(full_path, "wb") as f:
            f.write(content)

        logger.debug(f"Wrote raw file to {full_path}")
        return str(full_path)

    def delete_raw_file(self, path: str) -> None:
        """Delete a raw file from local filesystem.

        Args:
            path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
        """
        # Build full path
        normalized_path = normalize_leading_slash(path)
        full_path = (
            self.base_path
            / self.tenant_id
            / "knowledge"
            / self.user_id
            / "user_library"
            / normalized_path.lstrip("/")
        )

        if full_path.exists():
            full_path.unlink()
            logger.debug(f"Deleted raw file at {full_path}")
        else:
            logger.warning(f"File not found for deletion: {full_path}")


class S3PersistentDocumentWriter:
    """Writes indexed documents to S3 with hierarchical structure.

    Documents are stored in tenant/user-segregated paths:
    s3://{bucket}/{tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/document.json

    This matches the location that KubernetesSandboxManager reads from when
    provisioning sandboxes (via the sidecar container's s5cmd sync command).
    """

    def __init__(self, tenant_id: str, user_id: str):
        """Initialize S3PersistentDocumentWriter.

        Args:
            tenant_id: Tenant identifier for multi-tenant isolation
            user_id: User ID for user-segregated storage paths
        """
        self.tenant_id = tenant_id
        self.user_id = user_id
        self.bucket = SANDBOX_S3_BUCKET
        self._s3_client: S3Client | None = None

    def _get_s3_client(self) -> S3Client:
        """Lazily initialize S3 client.

        Uses the craft-specific boto3 client which only supports IAM roles (IRSA).
        """
        if self._s3_client is None:
            self._s3_client = build_s3_client()
        return self._s3_client

    def write_documents(self, documents: list[Document]) -> list[str]:
        """Write documents to S3, returns written S3 keys.

        Args:
            documents: List of documents to write

        Returns:
            List of S3 keys that were written
        """
        written_keys: list[str] = []

        # Build a map of base keys to detect duplicates
        # Key: (directory_prefix, base_filename) -> list of docs with that name
        key_map: dict[tuple[str, str], list[Document]] = {}

        for doc in documents:
            dir_prefix = self._build_directory_path(doc)
            base_filename = get_base_filename(doc, replace_slash=False)
            key = (dir_prefix, base_filename)
            if key not in key_map:
                key_map[key] = []
            key_map[key].append(doc)

        # Now write documents, appending ID if there are duplicates
        s3_client = self._get_s3_client()

        for (dir_prefix, base_filename), docs in key_map.items():
            has_duplicates = len(docs) > 1
            for doc in docs:
                filename = resolve_duplicate_filename(
                    doc, base_filename, has_duplicates, replace_slash=False
                )
                s3_key = f"{dir_prefix}/{filename}"
                self._write_document(s3_client, doc, s3_key)
                written_keys.append(s3_key)

        return written_keys

    def _build_directory_path(self, doc: Document) -> str:
        """Build S3 key prefix from document metadata.

        Documents are stored under tenant/user-segregated paths:
        {tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/

        This matches the path that KubernetesSandboxManager syncs from:
        s5cmd sync "s3://{bucket}/{tenant_id}/knowledge/{user_id}/*" /workspace/files/
        """
        # Tenant and user segregation (matches K8s sandbox init container path)
        parts = [self.tenant_id, "knowledge", self.user_id]
        # Add source and hierarchy from document
        parts.extend(build_document_subpath(doc, replace_slash=False))

        return "/".join(parts)

    def _write_document(self, s3_client: S3Client, doc: Document, s3_key: str) -> None:
        """Serialize and write document to S3."""
        content = serialize_document(doc)
        json_content = json.dumps(content, indent=2, default=str)

        try:
            s3_client.put_object(
                Bucket=self.bucket,
                Key=s3_key,
                Body=json_content.encode("utf-8"),
                ContentType="application/json",
            )
            logger.debug(f"Wrote document to s3://{self.bucket}/{s3_key}")
        except ClientError as e:
            logger.error(f"Failed to write to S3: {e}")
            raise

    def write_raw_file(
        self,
        path: str,
        content: bytes,
        content_type: str | None = None,
    ) -> str:
        """Write a raw binary file to S3 (for User Library).

        Unlike write_documents which serializes Document objects to JSON, this method
        writes raw binary content directly. Used for user-uploaded files like xlsx, pptx.

        Args:
            path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
            content: Raw binary content to write
            content_type: MIME type of the file

        Returns:
            S3 key where file was written
        """
        # Build S3 key: {tenant}/knowledge/{user}/user_library/{path}
        normalized_path = path.lstrip("/")
        s3_key = (
            f"{self.tenant_id}/knowledge/{self.user_id}/user_library/{normalized_path}"
        )

        s3_client = self._get_s3_client()

        try:
            s3_client.put_object(
                Bucket=self.bucket,
                Key=s3_key,
                Body=content,
                ContentType=content_type or "application/octet-stream",
            )
            logger.debug(f"Wrote raw file to s3://{self.bucket}/{s3_key}")
            return s3_key
        except ClientError as e:
            logger.error(f"Failed to write raw file to S3: {e}")
            raise

    def delete_raw_file(self, s3_key: str) -> None:
        """Delete a raw file from S3.

        Args:
            s3_key: Full S3 key of the file to delete
        """
        s3_client = self._get_s3_client()

        try:
            s3_client.delete_object(Bucket=self.bucket, Key=s3_key)
            logger.debug(f"Deleted raw file at s3://{self.bucket}/{s3_key}")
        except ClientError as e:
            logger.error(f"Failed to delete raw file from S3: {e}")
            raise

    def delete_raw_file_by_path(self, path: str) -> None:
        """Delete a raw file from S3 by its relative path.

        Args:
            path: Relative path within user's library (e.g., "/project-data/financials.xlsx")
        """
        normalized_path = path.lstrip("/")
        s3_key = (
            f"{self.tenant_id}/knowledge/{self.user_id}/user_library/{normalized_path}"
        )
        self.delete_raw_file(s3_key)


def get_persistent_document_writer(
    user_id: str,
    tenant_id: str,
) -> PersistentDocumentWriter | S3PersistentDocumentWriter:
    """Factory function to create a PersistentDocumentWriter with default configuration.

    Args:
        user_id: User ID for user-segregated storage paths.
        tenant_id: Tenant ID for multi-tenant isolation.

    Both local and S3 modes use consistent tenant/user-segregated paths:
        - Local: {base_path}/{tenant_id}/knowledge/{user_id}/...
        - S3: s3://{bucket}/{tenant_id}/knowledge/{user_id}/...

    Returns:
        PersistentDocumentWriter for local mode, S3PersistentDocumentWriter for K8s mode
    """
    if SANDBOX_BACKEND == SandboxBackend.LOCAL:
        return PersistentDocumentWriter(
            base_path=PERSISTENT_DOCUMENT_STORAGE_PATH,
            tenant_id=tenant_id,
            user_id=user_id,
        )
    elif SANDBOX_BACKEND == SandboxBackend.KUBERNETES:
        return S3PersistentDocumentWriter(
            tenant_id=tenant_id,
            user_id=user_id,
        )
    else:
        raise ValueError(f"Unknown sandbox backend: {SANDBOX_BACKEND}")


================================================
FILE: backend/onyx/server/features/build/s3/s3_client.py
================================================
import boto3
from mypy_boto3_s3.client import S3Client

from onyx.configs.app_configs import AWS_REGION_NAME


def build_s3_client() -> S3Client:
    """Build an S3 client using IAM roles (IRSA)"""
    return boto3.client("s3", region_name=AWS_REGION_NAME)


================================================
FILE: backend/onyx/server/features/build/sandbox/README.md
================================================
# Onyx Sandbox System

This directory contains the implementation of Onyx's sandbox system for running OpenCode agents in isolated environments.

## Overview

The sandbox system provides isolated execution environments where OpenCode agents can build web applications, run code, and interact with knowledge files. Each sandbox includes:

- **Next.js development environment** - Lightweight Next.js scaffold with shadcn/ui and Recharts for building UIs
- **Python virtual environment** - Pre-installed packages for data processing
- **OpenCode agent** - AI coding agent with access to tools and MCP servers
- **Knowledge files** - Access to indexed documents and user uploads

## Architecture

### Deployment Modes

1. **Local Mode** (`SANDBOX_BACKEND=local`)
   - Sandboxes run as directories on the local filesystem
   - No automatic cleanup or snapshots
   - Suitable for development and testing

2. **Kubernetes Mode** (`SANDBOX_BACKEND=kubernetes`)
   - Sandboxes run as Kubernetes pods
   - Automatic snapshots to S3
   - Auto-cleanup of idle sandboxes
   - Production-ready with resource isolation

### Directory Structure

```
/workspace/                          # Sandbox root (in container)
├── outputs/                         # Working directory
│   ├── web/                        # Lightweight Next.js app (shadcn/ui, Recharts)
│   ├── slides/                     # Generated presentations
│   ├── markdown/                   # Generated documents
│   └── graphs/                     # Generated visualizations
├── .venv/                          # Python virtual environment
├── files/                          # Symlink to knowledge files
├── attachments/                    # User uploads
├── AGENTS.md                       # Agent instructions
└── .opencode/
    └── skills/                     # Agent skills
```

## Setup

### Running via Docker/Kubernetes (Zero Setup!) 🎉

**No setup required!** Just build and deploy:

```bash
# Build backend image (includes both templates)
cd backend
docker build -f Dockerfile.sandbox-templates -t onyxdotapp/backend:latest .

# Build sandbox container (lightweight runner)
cd onyx/server/features/build/sandbox/kubernetes/docker
docker build -t onyxdotapp/sandbox:latest .

# Deploy with docker-compose or kubectl - sandboxes work immediately!
```

**How it works:**

- **Backend image**: Contains both templates at build time:
  - Web template at `/templates/outputs/web` (lightweight Next.js scaffold, ~2MB)
  - Python venv template at `/templates/venv` (pre-installed packages, ~50MB)
- **Init container** (Kubernetes only): Syncs knowledge files from S3
- **Sandbox startup**: Runs `npm install` (for fresh dependency locks) + `next dev`

### Running Backend Directly (Without Docker)

**Only needed if you're running the Onyx backend outside of Docker.** Most developers use Docker and can skip this section.

If you're running the backend Python process directly on your machine, you need templates at `/templates/`:

#### Web Template

The web template is a lightweight Next.js app (Next.js 16, React 19, shadcn/ui, Recharts) checked into the codebase at `backend/onyx/server/features/build/templates/outputs/web/`.

For local development, create a symlink to this template:

```bash
sudo mkdir -p /templates/outputs
sudo ln -s $(pwd)/backend/onyx/server/features/build/templates/outputs/web /templates/outputs/web
```

#### Python Venv Template

If you don't have a venv template, create it:

```bash
# Use the utility script
cd backend
python -m onyx.server.features.build.sandbox.util.build_venv_template

# Or manually
python3 -m venv /templates/venv
/templates/venv/bin/pip install -r backend/onyx/server/features/build/sandbox/kubernetes/docker/initial-requirements.txt
```

#### System Dependencies (for PPTX skill)

The PPTX skill requires LibreOffice and Poppler for PDF conversion and thumbnail generation:

**macOS:**

```bash
brew install poppler
brew install --cask libreoffice
```

Ensure `soffice` is on your PATH:

```bash
export PATH="/Applications/LibreOffice.app/Contents/MacOS:$PATH"
```

**Linux (Debian/Ubuntu):**

```bash
sudo apt-get install libreoffice-impress poppler-utils
```

**That's it!** When sandboxes are created:

1. Web template is copied from `/templates/outputs/web`
2. Python venv is copied from `/templates/venv`
3. `npm install` runs automatically to install fresh Next.js dependencies

## OpenCode Configuration

Each sandbox includes an OpenCode agent configured with:

- **LLM Provider**: Anthropic, OpenAI, Google, Bedrock, or Azure
- **Extended thinking**: High reasoning effort / thinking budgets for complex tasks
- **Tool permissions**: File operations, bash commands, web access
- **Disabled tools**: Configurable via `OPENCODE_DISABLED_TOOLS` env var

Configuration is generated dynamically in `templates/opencode_config.py`.

## Key Components

### Managers

- **`base.py`** - Abstract base class defining the sandbox interface
- **`local/manager.py`** - Filesystem-based sandbox manager for local development
- **`kubernetes/manager.py`** - Kubernetes-based sandbox manager for production

### Managers (Shared)

- **`manager/directory_manager.py`** - Creates sandbox directory structure and copies templates
- **`manager/snapshot_manager.py`** - Handles snapshot creation and restoration

### Utilities

- **`util/opencode_config.py`** - Generates OpenCode configuration with MCP support
- **`util/agent_instructions.py`** - Generates agent instructions (AGENTS.md)
- **`util/build_venv_template.py`** - Utility to build Python venv template for local development

### Templates

- **`../templates/outputs/web/`** - Lightweight Next.js scaffold (shadcn/ui, Recharts) versioned with the backend code

### Kubernetes Specific

- **`kubernetes/docker/Dockerfile`** - Sandbox container image (runs Next.js + OpenCode)
- **`kubernetes/docker/entrypoint.sh`** - Container startup script

## Environment Variables

### Core Settings

```bash
# Sandbox backend mode
SANDBOX_BACKEND=local|kubernetes           # Default: local

# Template paths (local mode)
OUTPUTS_TEMPLATE_PATH=/templates/outputs   # Default: /templates/outputs
VENV_TEMPLATE_PATH=/templates/venv        # Default: /templates/venv

# Sandbox base path (local mode)
SANDBOX_BASE_PATH=/tmp/onyx-sandboxes     # Default: /tmp/onyx-sandboxes

# OpenCode configuration
OPENCODE_DISABLED_TOOLS=question          # Comma-separated list, default: question
```

### Kubernetes Settings

```bash
# Kubernetes namespace
SANDBOX_NAMESPACE=onyx-sandboxes          # Default: onyx-sandboxes

# Container image
SANDBOX_CONTAINER_IMAGE=onyxdotapp/sandbox:latest

# S3 bucket for snapshots and files
SANDBOX_S3_BUCKET=onyx-sandbox-files      # Default: onyx-sandbox-files

# Service accounts
SANDBOX_SERVICE_ACCOUNT_NAME=sandbox-runner          # No AWS access
SANDBOX_FILE_SYNC_SERVICE_ACCOUNT=sandbox-file-sync  # Has S3 access via IRSA
```

### Lifecycle Settings

```bash
# Idle timeout before cleanup (seconds)
SANDBOX_IDLE_TIMEOUT_SECONDS=900          # Default: 900 (15 minutes)

# Max concurrent sandboxes per organization
SANDBOX_MAX_CONCURRENT_PER_ORG=10         # Default: 10

# Next.js port range (local mode)
SANDBOX_NEXTJS_PORT_START=3010            # Default: 3010
SANDBOX_NEXTJS_PORT_END=3100              # Default: 3100
```

## Testing

### Integration Tests

```bash
# Test local sandbox provisioning
uv run pytest backend/tests/integration/sandbox/test_local_sandbox.py

# Test Kubernetes sandbox provisioning (requires k8s cluster)
uv run pytest backend/tests/integration/sandbox/test_kubernetes_sandbox.py
```

### Manual Testing

```bash
# Start a local sandbox session
curl -X POST http://localhost:3000/api/build/session \
  -H "Content-Type: application/json" \
  -d '{
    "user_id": "user-123",
    "file_system_path": "/path/to/files"
  }'

# Send a message to the agent
curl -X POST http://localhost:3000/api/build/session/{session_id}/message \
  -H "Content-Type: application/json" \
  -d '{
    "message": "Create a simple web page"
  }'
```

## Troubleshooting

### Sandbox Stuck in PROVISIONING (Kubernetes)

**Symptoms**: Sandbox status never changes from `PROVISIONING`

**Solutions**:

- Check pod logs: `kubectl logs -n onyx-sandboxes sandbox-{sandbox-id}`
- Check init container: `kubectl logs -n onyx-sandboxes sandbox-{sandbox-id} -c file-sync`
- Verify init container completed: `kubectl describe pod -n onyx-sandboxes sandbox-{sandbox-id}`
- Check S3 bucket access: Ensure init container service account has IRSA configured

### Next.js Server Won't Start

**Symptoms**: Sandbox provisioned but web preview doesn't load

**Solutions**:

- **Local mode**: Check if port is already in use
- **Docker/K8s**: Check container logs: `kubectl logs -n onyx-sandboxes sandbox-{sandbox-id}`
- Verify npm install succeeded (check entrypoint.sh logs)
- Check that web template was copied: `kubectl exec -n onyx-sandboxes sandbox-{sandbox-id} -- ls /workspace/outputs/web`

### Templates Not Found (Local Mode)

**Symptoms**: `RuntimeError: Sandbox templates are missing`

**Solution**: Set up templates as described in the "Local Development" section above:

```bash
# Symlink web template
sudo ln -s $(pwd)/backend/onyx/server/features/build/templates/outputs/web /templates/outputs/web

# Create Python venv
python3 -m venv /templates/venv
/templates/venv/bin/pip install -r backend/onyx/server/features/build/sandbox/kubernetes/docker/initial-requirements.txt
```

### Permission Denied

**Symptoms**: `Permission denied` error accessing `/templates/`

**Solution**: Either use sudo when creating symlinks, or use custom paths:

```bash
export OUTPUTS_TEMPLATE_PATH=$HOME/.onyx/templates/outputs
export VENV_TEMPLATE_PATH=$HOME/.onyx/templates/venv

# Then symlink to your home directory
mkdir -p $HOME/.onyx/templates/outputs
ln -s $(pwd)/backend/onyx/server/features/build/templates/outputs/web $HOME/.onyx/templates/outputs/web
```

## Security Considerations

### Sandbox Isolation

- **Kubernetes pods** run with restricted security context (non-root, no privilege escalation)
- **Init containers** have S3 access for file sync, but main sandbox container does NOT
- **Network policies** can restrict sandbox egress traffic
- **Resource limits** prevent resource exhaustion

### Credentials Management

- LLM API keys are passed as environment variables (not stored in sandbox)
- User file access is read-only via symlinks
- Snapshots are isolated per tenant in S3

## Development

### Adding New MCP Servers

1. Add MCP configuration to `templates/opencode_config.py`:

   ```python
   config["mcp"] = {
       "my-mcp": {
           "type": "local",
           "command": ["npx", "@my/mcp@latest"],
           "enabled": True,
       }
   }
   ```

2. Install required npm packages in web template (if needed)

3. Rebuild Docker image and templates

### Modifying Agent Instructions

Edit `AGENTS.template.md` in the build directory. This is populated with dynamic content by `templates/agent_instructions.py`.

### Adding New Tools/Permissions

Update `templates/opencode_config.py` to add/remove tool permissions in the `permission` section.

## Template Details

### Web Template

The lightweight Next.js template (`backend/onyx/server/features/build/templates/outputs/web/`) includes:

- **Framework**: Next.js 16.1.4 with React 19.2.3
- **UI Library**: shadcn/ui components with Radix UI primitives
- **Styling**: Tailwind CSS v4 with custom theming support
- **Charts**: Recharts for data visualization
- **Size**: ~2MB (excluding node_modules, which are installed fresh per sandbox)

This template provides a modern development environment without the complexity of the full Onyx application, allowing agents to build custom UIs quickly.

### Python Venv Template

The Python venv (`/templates/venv/`) includes packages from `initial-requirements.txt`:

- Data processing: pandas, numpy, polars
- HTTP clients: requests, httpx
- Utilities: python-dotenv, pydantic

## References

- [OpenCode Documentation](https://docs.opencode.ai)
- [Next.js Documentation](https://nextjs.org/docs)
- [shadcn/ui Components](https://ui.shadcn.com)


================================================
FILE: backend/onyx/server/features/build/sandbox/__init__.py
================================================
"""
Sandbox module for CLI agent filesystem-based isolation.

This module provides lightweight sandbox management for CLI-based AI agent sessions.
Each sandbox is a directory on the local filesystem or a Kubernetes pod.

Usage:
    from onyx.server.features.build.sandbox import get_sandbox_manager

    # Get the appropriate sandbox manager based on SANDBOX_BACKEND config
    sandbox_manager = get_sandbox_manager()

    # Use the sandbox manager
    sandbox_info = sandbox_manager.provision(...)

Module structure:
    - base.py: SandboxManager ABC and get_sandbox_manager() factory
    - models.py: Shared Pydantic models
    - local/: Local filesystem-based implementation for development
    - kubernetes/: Kubernetes pod-based implementation for production
    - internal/: Shared internal utilities (snapshot manager)
"""

from onyx.server.features.build.sandbox.base import get_sandbox_manager
from onyx.server.features.build.sandbox.base import SandboxManager
from onyx.server.features.build.sandbox.local.local_sandbox_manager import (
    LocalSandboxManager,
)
from onyx.server.features.build.sandbox.models import FilesystemEntry
from onyx.server.features.build.sandbox.models import SandboxInfo
from onyx.server.features.build.sandbox.models import SnapshotInfo

__all__ = [
    # Factory function (preferred)
    "get_sandbox_manager",
    # Interface
    "SandboxManager",
    # Implementations
    "LocalSandboxManager",
    # Models
    "SandboxInfo",
    "SnapshotInfo",
    "FilesystemEntry",
]


================================================
FILE: backend/onyx/server/features/build/sandbox/base.py
================================================
"""Abstract base class and factory for sandbox operations.

SandboxManager is the abstract interface for sandbox lifecycle management.
Use get_sandbox_manager() to get the appropriate implementation based on SANDBOX_BACKEND.

IMPORTANT: SandboxManager implementations must NOT interface with the database directly.
All database operations should be handled by the caller (SessionManager, Celery tasks, etc.).

Architecture Note (User-Shared Sandbox Model):
- One sandbox (container/pod) is shared across all of a user's sessions
- provision() creates the user's sandbox with shared files/ directory
- setup_session_workspace() creates per-session workspace within the sandbox
- cleanup_session_workspace() removes session workspace on session delete
- terminate() destroys the entire sandbox (all sessions)
"""

import threading
from abc import ABC
from abc import abstractmethod
from collections.abc import Generator
from typing import Any
from uuid import UUID

from onyx.server.features.build.configs import SANDBOX_BACKEND
from onyx.server.features.build.configs import SandboxBackend
from onyx.server.features.build.sandbox.models import FilesystemEntry
from onyx.server.features.build.sandbox.models import LLMProviderConfig
from onyx.server.features.build.sandbox.models import SandboxInfo
from onyx.server.features.build.sandbox.models import SnapshotResult
from onyx.utils.logger import setup_logger

logger = setup_logger()

# ACPEvent is a union type defined in both local and kubernetes modules
# Using Any here to avoid circular imports - the actual type checking
# happens in the implementation modules
ACPEvent = Any


class SandboxManager(ABC):
    """Abstract interface for sandbox operations.

    Defines the contract for sandbox lifecycle management including:
    - Provisioning and termination (user-level)
    - Session workspace setup and cleanup (session-level)
    - Snapshot creation (session-level)
    - Health checks
    - Agent communication (session-level)
    - Filesystem operations (session-level)

    Directory Structure:
        $SANDBOX_ROOT/
        ├── files/                     # SHARED - symlink to user's persistent documents
        └── sessions/
            ├── $session_id_1/         # Per-session workspace
            │   ├── outputs/           # Agent output for this session
            │   │   └── web/           # Next.js app
            │   ├── venv/              # Python virtual environment
            │   ├── skills/            # Opencode skills
            │   ├── AGENTS.md          # Agent instructions
            │   ├── opencode.json      # LLM config
            │   └── attachments/
            └── $session_id_2/
                └── ...

    IMPORTANT: Implementations must NOT interface with the database directly.
    All database operations should be handled by the caller.

    Use get_sandbox_manager() to get the appropriate implementation.
    """

    @abstractmethod
    def provision(
        self,
        sandbox_id: UUID,
        user_id: UUID,
        tenant_id: str,
        llm_config: LLMProviderConfig,
    ) -> SandboxInfo:
        """Provision a new sandbox for a user.

        Creates the sandbox container/directory with:
        - sessions/ directory for per-session workspaces

        NOTE: This does NOT set up session-specific workspaces.
        Call setup_session_workspace() after provisioning to create a session workspace.

        Args:
            sandbox_id: Unique identifier for the sandbox
            user_id: User identifier who owns this sandbox
            tenant_id: Tenant identifier for multi-tenant isolation
            llm_config: LLM provider configuration (for default config)

        Returns:
            SandboxInfo with the provisioned sandbox details

        Raises:
            RuntimeError: If provisioning fails
        """
        ...

    @abstractmethod
    def terminate(self, sandbox_id: UUID) -> None:
        """Terminate a sandbox and clean up all resources.

        Destroys the entire sandbox including all session workspaces.
        Use cleanup_session_workspace() to remove individual sessions.

        Args:
            sandbox_id: The sandbox ID to terminate
        """
        ...

    @abstractmethod
    def setup_session_workspace(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        llm_config: LLMProviderConfig,
        nextjs_port: int,
        file_system_path: str | None = None,
        snapshot_path: str | None = None,
        user_name: str | None = None,
        user_role: str | None = None,
        user_work_area: str | None = None,
        user_level: str | None = None,
        use_demo_data: bool = False,
        excluded_user_library_paths: list[str] | None = None,
    ) -> None:
        """Set up a session workspace within an existing sandbox.

        Creates the per-session directory structure:
        - sessions/$session_id/outputs/ (from snapshot or template)
        - sessions/$session_id/venv/
        - sessions/$session_id/skills/
        - sessions/$session_id/files/ (symlink to demo data or user files)
        - sessions/$session_id/AGENTS.md
        - sessions/$session_id/opencode.json
        - sessions/$session_id/attachments/
        - sessions/$session_id/org_info/ (if demo data enabled)

        Args:
            sandbox_id: The sandbox ID (must be provisioned)
            session_id: The session ID for this workspace
            llm_config: LLM provider configuration for opencode.json
            file_system_path: Path to user's knowledge/source files
            snapshot_path: Optional storage path to restore outputs from
            user_name: User's name for personalization in AGENTS.md
            user_role: User's role/title for personalization in AGENTS.md
            user_work_area: User's work area for demo persona (e.g., "engineering")
            user_level: User's level for demo persona (e.g., "ic", "manager")
            use_demo_data: If True, symlink files/ to demo data; else to user files
            excluded_user_library_paths: List of paths within user_library to exclude
                from the sandbox (e.g., ["/data/file.xlsx"]). Only applies when
                use_demo_data=False. Files at these paths won't be accessible.

        Raises:
            RuntimeError: If workspace setup fails
        """
        ...

    @abstractmethod
    def cleanup_session_workspace(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        nextjs_port: int | None = None,
    ) -> None:
        """Clean up a session workspace (on session delete).

        1. Stop the Next.js dev server if running on nextjs_port
        2. Remove the session directory: sessions/$session_id/

        Does NOT terminate the sandbox - other sessions may still be using it.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to clean up
            nextjs_port: Optional port where Next.js server is running
        """
        ...

    @abstractmethod
    def create_snapshot(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        tenant_id: str,
    ) -> SnapshotResult | None:
        """Create a snapshot of a session's outputs and attachments directories.

        Captures session-specific user data:
        - sessions/$session_id/outputs/ (generated artifacts, web apps)
        - sessions/$session_id/attachments/ (user uploaded files)

        Does NOT include: venv, skills, AGENTS.md, opencode.json, files symlink
        (these are regenerated during restore)

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to snapshot
            tenant_id: Tenant identifier for storage path

        Returns:
            SnapshotResult with storage path and size, or None if:
            - Snapshots are disabled for this backend
            - No outputs directory exists (nothing to snapshot)

        Raises:
            RuntimeError: If snapshot creation fails
        """
        ...

    @abstractmethod
    def restore_snapshot(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        snapshot_storage_path: str,
        tenant_id: str,
        nextjs_port: int,
        llm_config: LLMProviderConfig,
        use_demo_data: bool = False,
    ) -> None:
        """Restore a session workspace from a snapshot.

        For Kubernetes: Downloads and extracts the snapshot, regenerates config files.
        For Local: No-op since workspaces persist on disk (no snapshots).

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to restore
            snapshot_storage_path: Path to the snapshot in storage
            tenant_id: Tenant identifier for storage access
            nextjs_port: Port number for the NextJS dev server
            llm_config: LLM provider configuration for opencode.json
            use_demo_data: If True, symlink files/ to demo data

        Raises:
            RuntimeError: If snapshot restoration fails
        """
        ...

    @abstractmethod
    def session_workspace_exists(
        self,
        sandbox_id: UUID,
        session_id: UUID,
    ) -> bool:
        """Check if a session's workspace directory exists in the sandbox.

        Used to determine if we need to restore from snapshot.
        Checks for sessions/$session_id/outputs/ directory.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to check

        Returns:
            True if the session workspace exists, False otherwise
        """
        ...

    @abstractmethod
    def health_check(self, sandbox_id: UUID, timeout: float = 60.0) -> bool:
        """Check if the sandbox is healthy.

        Args:
            sandbox_id: The sandbox ID to check

        Returns:
            True if sandbox is healthy, False otherwise
        """
        ...

    @abstractmethod
    def send_message(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        message: str,
    ) -> Generator[ACPEvent, None, None]:
        """Send a message to the CLI agent and stream typed ACP events.

        The agent runs in the session-specific workspace:
        sessions/$session_id/

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID (determines workspace directory)
            message: The message content to send

        Yields:
            Typed ACP schema event objects

        Raises:
            RuntimeError: If agent communication fails
        """
        ...

    @abstractmethod
    def list_directory(
        self, sandbox_id: UUID, session_id: UUID, path: str
    ) -> list[FilesystemEntry]:
        """List contents of a directory in the session's outputs directory.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            path: Relative path within sessions/$session_id/outputs/

        Returns:
            List of FilesystemEntry objects sorted by directory first, then name

        Raises:
            ValueError: If path traversal attempted or path is not a directory
        """
        ...

    @abstractmethod
    def read_file(self, sandbox_id: UUID, session_id: UUID, path: str) -> bytes:
        """Read a file from the session's workspace.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            path: Relative path within sessions/$session_id/

        Returns:
            File contents as bytes

        Raises:
            ValueError: If path traversal attempted or path is not a file
        """
        ...

    @abstractmethod
    def upload_file(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        filename: str,
        content: bytes,
    ) -> str:
        """Upload a file to the session's attachments directory.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            filename: Sanitized filename
            content: File content as bytes

        Returns:
            Relative path where file was saved (e.g., "attachments/doc.pdf")

        Raises:
            RuntimeError: If upload fails
        """
        ...

    @abstractmethod
    def delete_file(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        path: str,
    ) -> bool:
        """Delete a file from the session's workspace.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            path: Relative path to the file (e.g., "attachments/doc.pdf")

        Returns:
            True if file was deleted, False if not found

        Raises:
            ValueError: If path traversal attempted
        """
        ...

    @abstractmethod
    def get_upload_stats(
        self,
        sandbox_id: UUID,
        session_id: UUID,
    ) -> tuple[int, int]:
        """Get current file count and total size for a session's attachments.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID

        Returns:
            Tuple of (file_count, total_size_bytes)
        """
        ...

    @abstractmethod
    def get_webapp_url(self, sandbox_id: UUID, port: int) -> str:
        """Get the webapp URL for a session's Next.js server.

        Returns the appropriate URL based on the backend:
        - Local: Returns localhost URL with port
        - Kubernetes: Returns internal cluster service URL

        Args:
            sandbox_id: The sandbox ID
            port: The session's allocated Next.js port

        Returns:
            URL to access the webapp
        """
        ...

    @abstractmethod
    def generate_pptx_preview(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        pptx_path: str,
        cache_dir: str,
    ) -> tuple[list[str], bool]:
        """Convert PPTX to slide JPEG images for preview, with caching.

        Checks if cache_dir already has slides. If the PPTX is newer than the
        cached images (or no cache exists), runs soffice -> pdftoppm pipeline.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            pptx_path: Relative path to the PPTX file within the session workspace
            cache_dir: Relative path for the cache directory
                       (e.g., "outputs/.pptx-preview/abc123")

        Returns:
            Tuple of (slide_paths, cached) where slide_paths is a list of
            relative paths to slide JPEG images (within session workspace)
            and cached indicates whether the result was served from cache.

        Raises:
            ValueError: If file not found or conversion fails
        """
        ...

    @abstractmethod
    def sync_files(
        self,
        sandbox_id: UUID,
        user_id: UUID,
        tenant_id: str,
        source: str | None = None,
    ) -> bool:
        """Sync files from S3 to the sandbox's /workspace/files directory.

        For Kubernetes backend: Executes `s5cmd sync` in the file-sync sidecar container.
        For Local backend: No-op since files are directly accessible via symlink.

        This is idempotent - only downloads changed files. File visibility in
        sessions is controlled via filtered symlinks in setup_session_workspace(),
        not at the sync level.

        Args:
            sandbox_id: The sandbox UUID
            user_id: The user ID (for S3 path construction)
            tenant_id: The tenant ID (for S3 path construction)
            source: Optional source type (e.g., "gmail", "google_drive").
                    If None, syncs all sources. If specified, only syncs
                    that source's directory.

        Returns:
            True if sync was successful, False otherwise.
        """
        ...

    def ensure_nextjs_running(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        nextjs_port: int,
    ) -> None:
        """Ensure the Next.js server is running for a session.

        Default is a no-op — only meaningful for local backends that manage
        process lifecycles directly (e.g., LocalSandboxManager).

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            nextjs_port: The port the Next.js server should be listening on
        """


# Singleton instance cache for the factory
_sandbox_manager_instance: SandboxManager | None = None
_sandbox_manager_lock = threading.Lock()


def get_sandbox_manager() -> SandboxManager:
    """Get the appropriate SandboxManager implementation based on SANDBOX_BACKEND.

    Returns:
        SandboxManager instance:
        - LocalSandboxManager for local backend (development)
        - KubernetesSandboxManager for kubernetes backend (production)
    """
    global _sandbox_manager_instance

    if _sandbox_manager_instance is None:
        with _sandbox_manager_lock:
            if _sandbox_manager_instance is None:
                if SANDBOX_BACKEND == SandboxBackend.LOCAL:
                    from onyx.server.features.build.sandbox.local.local_sandbox_manager import (
                        LocalSandboxManager,
                    )

                    _sandbox_manager_instance = LocalSandboxManager()
                elif SANDBOX_BACKEND == SandboxBackend.KUBERNETES:
                    from onyx.server.features.build.sandbox.kubernetes.kubernetes_sandbox_manager import (
                        KubernetesSandboxManager,
                    )

                    _sandbox_manager_instance = KubernetesSandboxManager()
                    logger.info("Using KubernetesSandboxManager for sandbox operations")
                else:
                    raise ValueError(f"Unknown sandbox backend: {SANDBOX_BACKEND}")

    return _sandbox_manager_instance


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/__init__.py
================================================
"""Kubernetes-based sandbox implementation.

This module provides the KubernetesSandboxManager for production deployments
that run sandboxes as isolated Kubernetes pods.

Internal implementation details (acp_http_client) are in the internal/
subdirectory and should not be used directly.
"""

from onyx.server.features.build.sandbox.kubernetes.kubernetes_sandbox_manager import (
    KubernetesSandboxManager,
)

__all__ = [
    "KubernetesSandboxManager",
]


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/Dockerfile
================================================
# Sandbox Container Image
#
# User-shared sandbox model:
# - One pod per user, shared across all user's sessions
# - Session workspaces created via kubectl exec (setup_session_workspace)
# - OpenCode agent runs via kubectl exec when needed
#
# Directory structure (created by init container + session setup):
#   /workspace/
#   ├── demo_data/       # Demo data (baked into image, for demo sessions)
#   ├── files/           # User's knowledge files (synced from S3)
#   ├── skills/          # Agent skills (baked into image, copied per-session)
#   ├── templates/       # Output templates (baked into image)
#   └── sessions/        # Per-session workspaces (created via exec)
#       └── $session_id/
#           ├── files/   # Symlink to /workspace/demo_data or /workspace/files
#           ├── outputs/
#           ├── AGENTS.md
#           └── opencode.json

FROM node:20-slim

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3 \
    python3-pip \
    python3-venv \
    curl \
    git \
    procps \
    unzip \
    \
    libreoffice-core \
    libreoffice-common \
    libreoffice-impress \
    libreoffice-draw \
    poppler-utils \
    gcc \
    libc6-dev \
    fontconfig \
    fonts-dejavu-core \
    fonts-liberation \
    && rm -rf /var/lib/apt/lists/*

# Create non-root user (matches pod securityContext)
# Handle existing user/group with UID/GID 1000 in base image
RUN EXISTING_USER=$(id -nu 1000 2>/dev/null || echo ""); \
    EXISTING_GROUP=$(getent group 1000 | cut -d: -f1 2>/dev/null || echo ""); \
    if [ -n "$EXISTING_GROUP" ] && [ "$EXISTING_GROUP" != "sandbox" ]; then \
    groupmod -n sandbox $EXISTING_GROUP; \
    elif [ -z "$EXISTING_GROUP" ]; then \
    groupadd -g 1000 sandbox; \
    fi; \
    if [ -n "$EXISTING_USER" ] && [ "$EXISTING_USER" != "sandbox" ]; then \
    usermod -l sandbox -g sandbox $EXISTING_USER; \
    usermod -d /home/sandbox -m sandbox; \
    usermod -s /bin/bash sandbox; \
    elif [ -z "$EXISTING_USER" ]; then \
    useradd -u 1000 -g sandbox -m -s /bin/bash sandbox; \
    fi

# Create workspace directories
RUN mkdir -p workspace/sessions /workspace/files /workspace/templates /workspace/demo_data && \
    chown -R sandbox:sandbox /workspace

# Copy outputs template (web app scaffold, without node_modules)
COPY --exclude=.next --exclude=node_modules templates/outputs /workspace/templates/outputs
RUN chown -R sandbox:sandbox /workspace/templates

# Copy and extract demo data from zip file
# Zip contains demo_data/ as root folder
COPY demo_data.zip /tmp/demo_data.zip
RUN unzip -q /tmp/demo_data.zip -d /workspace && \
    rm /tmp/demo_data.zip && \
    chown -R sandbox:sandbox /workspace/demo_data

# Copy and install Python requirements into a venv
COPY initial-requirements.txt /tmp/initial-requirements.txt
RUN python3 -m venv /workspace/.venv && \
    /workspace/.venv/bin/pip install --upgrade pip && \
    /workspace/.venv/bin/pip install -r /tmp/initial-requirements.txt && \
    rm /tmp/initial-requirements.txt && \
    chown -R sandbox:sandbox /workspace/.venv

# Add venv to PATH so python/pip use it by default
ENV PATH="/workspace/.venv/bin:${PATH}"

# Install pptxgenjs globally for creating presentations from scratch
RUN npm install -g pptxgenjs

# Install opencode CLI as sandbox user so it goes to their home directory
USER sandbox
RUN curl -fsSL https://opencode.ai/install | bash
USER root

# Add opencode to PATH (installs to ~/.opencode/bin)
ENV PATH="/home/sandbox/.opencode/bin:${PATH}"

# Copy agent skills (symlinked into each session's .opencode/skills/ at setup time)
COPY --exclude=__pycache__ skills/ /workspace/skills/

# Set ownership
RUN chown -R sandbox:sandbox /workspace

# Copy scripts
COPY generate_agents_md.py /usr/local/bin/generate_agents_md.py
RUN chmod +x /usr/local/bin/generate_agents_md.py

# Switch to non-root user
USER sandbox
WORKDIR /workspace

# Expose ports
# - 3000: Next.js dev server (started per-session if needed)
# - 8081: OpenCode ACP HTTP server (started via exec)
EXPOSE 3000 8081

# Keep container alive - all work done via kubectl exec
CMD ["sleep", "infinity"]


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/README.md
================================================
# Sandbox Container Image

This directory contains the Dockerfile and resources for building the Onyx Craft sandbox container image.

## Directory Structure

```
docker/
├── Dockerfile              # Main container image definition
├── demo_data.zip           # Demo data (extracted to /workspace/demo_data)
├── skills/                 # Agent skills (image-generation, pptx, etc.)
├── templates/
│   └── outputs/            # Web app scaffold template (Next.js)
├── initial-requirements.txt # Python packages pre-installed in sandbox
├── generate_agents_md.py   # Script to generate AGENTS.md for sessions
└── README.md               # This file
```

## Building the Image

The sandbox image must be built for **amd64** architecture since our Kubernetes cluster runs on x86_64 nodes.

### Build for amd64 only (fastest)

```bash
cd backend/onyx/server/features/build/sandbox/kubernetes/docker
docker build --platform linux/amd64 -t onyxdotapp/sandbox:v0.1.x .
docker push onyxdotapp/sandbox:v0.1.x
```

### Build multi-arch (recommended for flexibility)

```bash
docker buildx build --platform linux/amd64,linux/arm64 \
  -t onyxdotapp/sandbox:v0.1.x \
  --push .
```

### Update the `latest` tag

After pushing a versioned tag, update `latest`:

```bash
docker tag onyxdotapp/sandbox:v0.1.x onyxdotapp/sandbox:latest
docker push onyxdotapp/sandbox:latest
```

Or with buildx:

```bash
docker buildx build --platform linux/amd64,linux/arm64 \
  -t onyxdotapp/sandbox:v0.1.x \
  -t onyxdotapp/sandbox:latest \
  --push .
```

## Deploying a New Version

1. **Build and push** the new image (see above)

2. **Update the ConfigMap** in `cloud-deployment-yamls/danswer/configmap/env-configmap.yaml`:
   ```yaml
   SANDBOX_CONTAINER_IMAGE: "onyxdotapp/sandbox:v0.1.x"
   ```

3. **Apply the ConfigMap**:
   ```bash
   kubectl apply -f configmap/env-configmap.yaml
   ```

4. **Restart the API server** to pick up the new config:
   ```bash
   kubectl rollout restart deployment/api-server -n danswer
   ```

5. **Delete existing sandbox pods** (they will be recreated with the new image):
   ```bash
   kubectl delete pods -n onyx-sandboxes -l app.kubernetes.io/component=sandbox
   ```

## What's Baked Into the Image

- **Base**: `node:20-slim` (Debian-based)
- **Demo data**: `/workspace/demo_data/` - sample files for demo sessions
- **Skills**: `/workspace/skills/` - agent skills (image-generation, pptx, etc.)
- **Templates**: `/workspace/templates/outputs/` - Next.js web app scaffold
- **Python venv**: `/workspace/.venv/` with packages from `initial-requirements.txt`
- **OpenCode CLI**: Installed in `/home/sandbox/.opencode/bin/`

## Runtime Directory Structure

When a session is created, the following structure is set up in the pod:

```
/workspace/
├── demo_data/              # Baked into image
├── files/                  # Mounted volume, synced from S3
├── skills/                 # Baked into image (agent skills)
├── templates/              # Baked into image
└── sessions/
    └── $session_id/
        ├── .opencode/
        │   └── skills/     # Symlink to /workspace/skills
        ├── files/          # Symlink to /workspace/demo_data or /workspace/files
        ├── outputs/        # Copied from templates, contains web app
        ├── attachments/    # User-uploaded files
        ├── org_info/       # Demo persona info (if demo mode)
        ├── AGENTS.md       # Instructions for the AI agent
        └── opencode.json   # OpenCode configuration
```

## Troubleshooting

### Verify image exists on Docker Hub

```bash
curl -s "https://hub.docker.com/v2/repositories/onyxdotapp/sandbox/tags" | jq '.results[].name'
```

### Check what image a pod is using

```bash
kubectl get pod <pod-name> -n onyx-sandboxes -o jsonpath='{.spec.containers[?(@.name=="sandbox")].image}'
```


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/generate_agents_md.py
================================================
#!/usr/bin/env python3
"""Generate AGENTS.md by scanning the files directory and populating the template.

This script runs during session setup, AFTER files have been synced from S3
and the files symlink has been created. It reads an existing AGENTS.md (which
contains the {{KNOWLEDGE_SOURCES_SECTION}} placeholder), replaces the
placeholder by scanning the knowledge source directory, and writes it back.

Usage:
    python3 generate_agents_md.py <agents_md_path> <files_path>

Arguments:
    agents_md_path: Path to the AGENTS.md file to update in place
    files_path: Path to the files directory to scan for knowledge sources
"""

import sys
from pathlib import Path

# Type alias for connector info entries
ConnectorInfoEntry = dict[str, str | int]

# Connector information for generating knowledge sources section
# Keys are normalized (lowercase, underscores) directory names
# Each entry has: summary (with optional {subdirs}), file_pattern, scan_depth
# NOTE: This is duplicated from agent_instructions.py to avoid circular imports
CONNECTOR_INFO: dict[str, ConnectorInfoEntry] = {
    "google_drive": {
        "summary": "Documents and files from Google Drive. This may contain information about a user and work they have done.",
        "file_pattern": "`FILE_NAME.json`",
        "scan_depth": 0,
    },
    "gmail": {
        "summary": "Email conversations and threads",
        "file_pattern": "`FILE_NAME.json`",
        "scan_depth": 0,
    },
    "linear": {
        "summary": "Engineering tickets from teams: {subdirs}",
        "file_pattern": "`[TEAM]/[TICKET_ID]_TICKET_TITLE.json`",
        "scan_depth": 2,
    },
    "slack": {
        "summary": "Team messages from channels: {subdirs}",
        "file_pattern": "`[CHANNEL]/[AUTHOR]_in_[CHANNEL]__[MSG].json`",
        "scan_depth": 1,
    },
    "github": {
        "summary": "Pull requests and code from: {subdirs}",
        "file_pattern": "`[ORG]/[REPO]/pull_requests/[PR_NUMBER]__[PR_TITLE].json`",
        "scan_depth": 2,
    },
    "fireflies": {
        "summary": "Meeting transcripts from: {subdirs}",
        "file_pattern": "`[YYYY-MM]/CALL_TITLE.json`",
        "scan_depth": 1,
    },
    "hubspot": {
        "summary": "CRM data including: {subdirs}",
        "file_pattern": "`[TYPE]/[RECORD_NAME].json`",
        "scan_depth": 1,
    },
    "notion": {
        "summary": "Documentation and notes: {subdirs}",
        "file_pattern": "`PAGE_TITLE.json`",
        "scan_depth": 1,
    },
    "user_library": {
        "summary": "User-uploaded files (spreadsheets, documents, presentations, etc.)",
        "file_pattern": "Any file format",
        "scan_depth": 1,
    },
}
DEFAULT_SCAN_DEPTH = 1


def _normalize_connector_name(name: str) -> str:
    """Normalize a connector directory name for lookup."""
    return name.lower().replace(" ", "_").replace("-", "_")


def _scan_directory_to_depth(
    directory: Path, current_depth: int, max_depth: int, indent: str = "  "
) -> list[str]:
    """Recursively scan directory up to max_depth levels."""
    if current_depth >= max_depth:
        return []

    lines: list[str] = []
    try:
        subdirs = sorted(
            d for d in directory.iterdir() if d.is_dir() and not d.name.startswith(".")
        )

        for subdir in subdirs[:10]:  # Limit to 10 per level
            lines.append(f"{indent}- {subdir.name}/")

            # Recurse if we haven't hit max depth
            if current_depth + 1 < max_depth:
                nested = _scan_directory_to_depth(
                    subdir, current_depth + 1, max_depth, indent + "  "
                )
                lines.extend(nested)

        if len(subdirs) > 10:
            lines.append(f"{indent}- ... and {len(subdirs) - 10} more")
    except Exception:
        pass

    return lines


def build_knowledge_sources_section(files_path: Path) -> str:
    """Build combined knowledge sources section with summary, structure, and file patterns.

    This creates a single section per connector that includes:
    - What kind of data it contains (with actual subdirectory names)
    - The directory structure
    - The file naming pattern

    Args:
        files_path: Path to the files directory

    Returns:
        Formatted knowledge sources section
    """
    if not files_path.exists():
        return "No knowledge sources available."

    sections: list[str] = []
    try:
        for item in sorted(files_path.iterdir()):
            if not item.is_dir() or item.name.startswith("."):
                continue

            normalized = _normalize_connector_name(item.name)
            info = CONNECTOR_INFO.get(normalized, {})

            # Get subdirectory names
            subdirs: list[str] = []
            try:
                subdirs = sorted(
                    d.name
                    for d in item.iterdir()
                    if d.is_dir() and not d.name.startswith(".")
                )[:5]
            except Exception:
                pass

            # Build summary with subdirs
            summary_template = str(info.get("summary", f"Data from {item.name}"))
            if "{subdirs}" in summary_template and subdirs:
                subdir_str = ", ".join(subdirs)
                if len(subdirs) == 5:
                    subdir_str += ", ..."
                summary = summary_template.format(subdirs=subdir_str)
            elif "{subdirs}" in summary_template:
                summary = summary_template.replace(": {subdirs}", "").replace(
                    " {subdirs}", ""
                )
            else:
                summary = summary_template

            # Build connector section
            file_pattern = str(info.get("file_pattern", ""))
            scan_depth = int(info.get("scan_depth", DEFAULT_SCAN_DEPTH))

            lines = [f"### {item.name}/"]
            lines.append(f"{summary}.\n")
            # Add directory structure if depth > 0
            if scan_depth > 0:
                lines.append("Directory structure:\n")
                nested = _scan_directory_to_depth(item, 0, scan_depth, "")
                if nested:
                    lines.append("")
                    lines.extend(nested)

            lines.append(f"\nFile format: {file_pattern}")

            sections.append("\n".join(lines))
    except Exception as e:
        print(
            f"Warning: Error building knowledge sources section: {e}", file=sys.stderr
        )
        return "Error scanning knowledge sources."

    if not sections:
        return "No knowledge sources available."

    return "\n\n".join(sections)


def main() -> None:
    """Main entry point for container startup script.

    Reads an existing AGENTS.md, replaces the {{KNOWLEDGE_SOURCES_SECTION}}
    placeholder by scanning the files directory, and writes it back.

    Usage:
        python3 generate_agents_md.py <agents_md_path> <files_path>
    """
    if len(sys.argv) != 3:
        print(
            f"Usage: {sys.argv[0]} <agents_md_path> <files_path>",
            file=sys.stderr,
        )
        sys.exit(1)

    agents_md_path = Path(sys.argv[1])
    files_path = Path(sys.argv[2])

    if not agents_md_path.exists():
        print(f"Error: {agents_md_path} not found", file=sys.stderr)
        sys.exit(1)

    template = agents_md_path.read_text()

    # Resolve symlinks (handles both direct symlinks and dirs containing symlinks)
    resolved_files_path = files_path.resolve()

    knowledge_sources_section = build_knowledge_sources_section(resolved_files_path)

    # Replace placeholder and write back
    content = template.replace(
        "{{KNOWLEDGE_SOURCES_SECTION}}", knowledge_sources_section
    )
    agents_md_path.write_text(content)
    print(f"Populated knowledge sources in {agents_md_path}")


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/initial-requirements.txt
================================================
defusedxml>=0.7.1
google-genai>=1.0.0
lxml>=5.0.0
markitdown>=0.1.2
matplotlib==3.9.1
matplotlib-inline>=0.1.7
matplotlib-venn>=1.1.2
numpy==1.26.4
opencv-python>=4.11.0.86
openpyxl>=3.1.5
pandas==2.2.2
pdfplumber>=0.11.7
Pillow>=10.0.0
pydantic>=2.11.9
python-pptx>=1.0.2
scikit-image>=0.25.2
scikit-learn>=1.7.2
scipy>=1.16.2
seaborn>=0.13.2
xgboost>=3.0.5

================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/run-test.sh
================================================
#!/bin/bash
# Run Kubernetes sandbox integration tests
#
# This script:
# 1. Builds the onyx-backend Docker image
# 2. Loads it into the kind cluster
# 3. Deletes/recreates the test pod
# 4. Waits for the pod to be ready
# 5. Runs the pytest command inside the pod
#
# Usage:
#   ./run-test.sh [test_name]
#
# Examples:
#   ./run-test.sh                                    # Run all tests
#   ./run-test.sh test_kubernetes_sandbox_provision  # Run specific test

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../../../../../../.." && pwd)"
NAMESPACE="onyx-sandboxes"
POD_NAME="sandbox-test"
IMAGE_NAME="onyxdotapp/onyx-backend:latest"
TEST_FILE="onyx/server/features/build/sandbox/kubernetes/test_kubernetes_sandbox.py"
ENV_FILE="$PROJECT_ROOT/.vscode/.env"

ORIGINAL_TEST_FILE="$PROJECT_ROOT/backend/tests/external_dependency_unit/craft/test_kubernetes_sandbox.py"
cp "$ORIGINAL_TEST_FILE" "$PROJECT_ROOT/backend/$TEST_FILE"

# Optional: specific test to run
TEST_NAME="${1:-}"

# Build env var arguments from .vscode/.env file for passing to the container
ENV_VARS=()
if [ -f "$ENV_FILE" ]; then
    echo "=== Loading environment variables from .vscode/.env ==="
    while IFS= read -r line || [ -n "$line" ]; do
        # Skip empty lines and comments
        [[ -z "$line" || "$line" =~ ^[[:space:]]*# ]] && continue
        # Skip lines without =
        [[ "$line" != *"="* ]] && continue
        # Add to env vars array
        ENV_VARS+=("$line")
    done < "$ENV_FILE"
    echo "Loaded ${#ENV_VARS[@]} environment variables"
else
    echo "Warning: .vscode/.env not found, running without additional env vars"
fi

echo "=== Building onyx-backend Docker image ==="
cd "$PROJECT_ROOT/backend"
docker build -t "$IMAGE_NAME" -f Dockerfile .

rm "$PROJECT_ROOT/backend/$TEST_FILE"

echo "=== Loading image into kind cluster ==="
kind load docker-image "$IMAGE_NAME" --name onyx 2>/dev/null || \
    kind load docker-image "$IMAGE_NAME" 2>/dev/null || \
    echo "Warning: Could not load into kind. If using minikube, run: minikube image load $IMAGE_NAME"

echo "=== Deleting existing test pod (if any) ==="
kubectl delete pod "$POD_NAME" -n "$NAMESPACE" --ignore-not-found=true

echo "=== Creating test pod ==="
kubectl apply -f "$SCRIPT_DIR/test-job.yaml"

echo "=== Waiting for pod to be ready ==="
kubectl wait --for=condition=Ready pod/"$POD_NAME" -n "$NAMESPACE" --timeout=120s

echo "=== Running tests ==="
if [ -n "$TEST_NAME" ]; then
    kubectl exec -it "$POD_NAME" -n "$NAMESPACE" -- \
        env "${ENV_VARS[@]}" pytest "$TEST_FILE::$TEST_NAME" -v -s
else
    kubectl exec -it "$POD_NAME" -n "$NAMESPACE" -- \
        env "${ENV_VARS[@]}" pytest "$TEST_FILE" -v -s
fi

echo "=== Tests complete ==="


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/image-generation/SKILL.md
================================================
---
name: image-generation
description: Generate images using nano banana.
---

# Image Generation Skill

Generate images using Nano Banana (Google Gemini Image API). Supports text-to-image and image-to-image generation with configurable options.

## Setup

### Dependencies

```bash
pip install google-genai Pillow
```

### Environment Variable

Set your API key:

```bash
export GEMINI_API_KEY="your_api_key_here"
```

## Usage

### Basic Text-to-Image

```bash
python scripts/generate.py --prompt "A futuristic city at sunset with neon lights" --output city.png
```

### With Aspect Ratio

```bash
python scripts/generate.py \
  --prompt "Mountain landscape with a lake" \
  --output landscape.png \
  --aspect-ratio 16:9
```

### Image-to-Image Mode

Use a reference image to guide generation:

```bash
python scripts/generate.py \
  --prompt "Make it look like a watercolor painting" \
  --input-image original.png \
  --output watercolor.png
```

### Generate Multiple Images

```bash
python scripts/generate.py \
  --prompt "Abstract colorful art" \
  --output art.png \
  --num-images 3
```

## Arguments

| Argument | Short | Required | Default | Description |
|----------|-------|----------|---------|-------------|
| `--prompt` | `-p` | Yes | — | Text prompt describing the desired image |
| `--output` | `-o` | No | `output.png` | Output path for the generated image |
| `--model` | `-m` | No | `gemini-2.0-flash-preview-image-generation` | Model to use for generation |
| `--input-image` | `-i` | No | — | Reference image for image-to-image mode |
| `--aspect-ratio` | `-a` | No | — | Aspect ratio: `1:1`, `16:9`, `9:16`, `4:3`, `3:4` |
| `--num-images` | `-n` | No | `1` | Number of images to generate |

## Available Models

- `gemini-2.0-flash-preview-image-generation` - Fast, optimized for speed and lower latency
- `imagen-3.0-generate-002` - High quality image generation

## Programmatic Usage

Import the function directly in Python:

```python
from scripts.generate import generate_image

paths = generate_image(
    prompt="A serene mountain lake under moonlight",
    output_path="./outputs/lake.png",
    aspect_ratio="16:9",
    num_images=2,
)
```

## Tips

- **Detailed prompts work better**: Instead of "a cat", try "a fluffy orange tabby cat sitting on a windowsill, soft morning light, photorealistic"
- **Specify style**: Include style keywords like "digital art", "oil painting", "photorealistic", "anime style"
- **Use aspect ratios**: Match the aspect ratio to your intended use (16:9 for landscapes, 9:16 for portraits/mobile)
- **Image-to-image**: Great for style transfer, variations, or guided modifications of existing images


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/image-generation/scripts/generate.py
================================================
#!/usr/bin/env python3
"""
Image generation script using Nano Banana (Google Gemini Image API).

Supports text-to-image and image-to-image generation with configurable options.
"""

import argparse
import base64
import os
import sys
from io import BytesIO
from pathlib import Path

from PIL import Image


def load_image_as_base64(image_path: str) -> tuple[str, str]:
    """Load an image file and return base64 data and mime type."""
    path = Path(image_path)
    if not path.exists():
        raise FileNotFoundError(f"Image not found: {image_path}")

    # Determine mime type from extension
    ext = path.suffix.lower()
    mime_types = {
        ".png": "image/png",
        ".jpg": "image/jpeg",
        ".jpeg": "image/jpeg",
        ".gif": "image/gif",
        ".webp": "image/webp",
    }
    mime_type = mime_types.get(ext, "image/png")

    with open(image_path, "rb") as f:
        data = base64.b64encode(f.read()).decode("utf-8")

    return data, mime_type


def generate_image(
    prompt: str,
    output_path: str,
    model: str = "gemini-3-pro-image-preview",
    input_image: str | None = None,
    aspect_ratio: str | None = None,  # noqa: ARG001
    num_images: int = 1,
) -> list[str]:
    """
    Generate image(s) using Google Gemini / Nano Banana API.

    Args:
        prompt: Text description for image generation.
        output_path: Path to save the generated image(s).
        model: Model ID to use for generation.
        input_image: Optional path to reference image for image-to-image mode.
        aspect_ratio: Aspect ratio (e.g., "1:1", "16:9", "9:16", "4:3", "3:4").
        num_images: Number of images to generate.

    Returns:
        List of paths to saved images.
    """
    api_key = os.environ.get("GEMINI_API_KEY") or os.environ.get("GENAI_API_KEY")
    if not api_key:
        raise ValueError(
            "API key not found. Set GEMINI_API_KEY or GENAI_API_KEY environment variable."
        )

    # lazy importing since very heavy libs
    from google import genai
    from google.genai import types

    client = genai.Client(api_key=api_key)

    # Build content parts
    parts: list[types.Part] = []

    # Add reference image if provided (image-to-image mode)
    if input_image:
        img_data, mime_type = load_image_as_base64(input_image)
        parts.append(
            types.Part.from_bytes(
                data=base64.b64decode(img_data),
                mime_type=mime_type,
            )
        )

    # Add text prompt
    parts.append(types.Part.from_text(text=prompt))

    # Build generation config
    generate_config = types.GenerateContentConfig(
        response_modalities=["TEXT", "IMAGE"],
    )

    saved_paths: list[str] = []
    output_dir = Path(output_path).parent
    output_dir.mkdir(parents=True, exist_ok=True)

    base_name = Path(output_path).stem
    extension = Path(output_path).suffix or ".png"

    for i in range(num_images):
        response = client.models.generate_content(
            model=model,
            contents=types.Content(parts=parts),
            config=generate_config,
        )

        # Validate response
        if not response.candidates:
            raise ValueError("No candidates returned from the API")

        candidate = response.candidates[0]
        if not candidate.content or not candidate.content.parts:
            raise ValueError("No content parts returned from the API")

        # Process response parts
        image_count = 0
        for part in candidate.content.parts:
            if part.inline_data is not None and part.inline_data.data is not None:
                # Extract and save the image
                image_data = part.inline_data.data
                image = Image.open(BytesIO(image_data))

                # Generate output filename
                if num_images == 1 and image_count == 0:
                    save_path = output_path
                else:
                    save_path = str(
                        output_dir / f"{base_name}_{i + 1}_{image_count + 1}{extension}"
                    )

                image.save(save_path)
                saved_paths.append(save_path)
                print(f"Saved: {save_path}")
                image_count += 1
            elif part.text:
                # Print any text response from the model
                print(f"Model response: {part.text}")

    return saved_paths


def main() -> None:
    """Main entry point for CLI usage."""
    parser = argparse.ArgumentParser(
        description="Generate images using Nano Banana (Google Gemini Image API).",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Basic text-to-image generation
  python generate.py --prompt "A futuristic city at sunset" --output city.png

  # Generate with specific aspect ratio
  python generate.py --prompt "Mountain landscape" --output landscape.png --aspect-ratio 16:9

  # Image-to-image mode (use reference image)
  python generate.py --prompt "Make it more colorful" --input-image ref.png --output colorful.png

  # Generate multiple images
  python generate.py --prompt "Abstract art" --output art.png --num-images 3
""",
    )

    parser.add_argument(
        "--prompt",
        "-p",
        type=str,
        required=True,
        help="Text prompt describing the desired image.",
    )
    parser.add_argument(
        "--output",
        "-o",
        type=str,
        default="output.png",
        help="Output path for the generated image (default: output.png).",
    )
    parser.add_argument(
        "--model",
        "-m",
        type=str,
        default="gemini-3-pro-image-preview",
        help="Model to use (default: gemini-3-pro-image-preview).",
    )
    parser.add_argument(
        "--input-image",
        "-i",
        type=str,
        help="Path to reference image for image-to-image generation.",
    )
    parser.add_argument(
        "--aspect-ratio",
        "-a",
        type=str,
        choices=["1:1", "16:9", "9:16", "4:3", "3:4"],
        help="Aspect ratio for the generated image.",
    )
    parser.add_argument(
        "--num-images",
        "-n",
        type=int,
        default=1,
        help="Number of images to generate (default: 1).",
    )

    args = parser.parse_args()

    try:
        saved_paths = generate_image(
            prompt=args.prompt,
            output_path=args.output,
            model=args.model,
            input_image=args.input_image,
            aspect_ratio=args.aspect_ratio,
            num_images=args.num_images,
        )

        print(f"\nSuccessfully generated {len(saved_paths)} image(s):")
        for path in saved_paths:
            print(f"  - {path}")

    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/SKILL.md
================================================
---
name: pptx
description: "Use this skill any time a .pptx file is involved in any way — as input, output, or both. This includes: creating slide decks, pitch decks, or presentations; reading, parsing, or extracting text from any .pptx file (even if the extracted content will be used elsewhere, like in an email or summary); editing, modifying, or updating existing presentations; combining or splitting slide files; working with templates, layouts, speaker notes, or comments. Trigger whenever the user mentions \"deck,\" \"slides,\" \"presentation,\" or references a .pptx filename, regardless of what they plan to do with the content afterward. If a .pptx file needs to be opened, created, or touched, use this skill."
license: Proprietary. LICENSE.txt has complete terms
---

# PPTX Skill

> **Path convention**: All commands run from the **session workspace** (your working directory). Never `cd` into the skill directory. Prefix all skill scripts with `.opencode/skills/pptx/`. All generated files (unpacked dirs, output presentations, thumbnails, PDFs, images) go in `outputs/`.

## Quick Reference

| Task | Guide |
|------|-------|
| Read/analyze content | `python -m markitdown presentation.pptx` |
| Edit or create from template | Read [editing.md](editing.md) |
| Create from scratch | Read [pptxgenjs.md](pptxgenjs.md) |

---

## Reading Content

```bash
# Text extraction
python -m markitdown presentation.pptx

# Visual overview
python .opencode/skills/pptx/scripts/thumbnail.py presentation.pptx

# Raw XML
python .opencode/skills/pptx/scripts/office/unpack.py presentation.pptx outputs/unpacked/
```

---

## Editing Workflow

**Read [editing.md](editing.md) for full details.**

1. Analyze template with `thumbnail.py`
2. Unpack → manipulate slides → edit content → clean → pack

---

## Creating from Scratch

**Read [pptxgenjs.md](pptxgenjs.md) for full details.**

Use when no template or reference presentation is available.

---

## Design Ideas

**Don't create boring slides.** Plain bullets on a white background won't impress anyone. Consider ideas from this list for each slide.

### Before Starting

- **Pick a bold, content-informed color palette**: The palette should feel designed for THIS topic. If swapping your colors into a completely different presentation would still "work," you haven't made specific enough choices.
- **Dominance over equality**: One color should dominate (60-70% visual weight), with 1-2 supporting tones and one sharp accent. Never give all colors equal weight.
- **Dark/light contrast**: Dark backgrounds for title + conclusion slides, light for content ("sandwich" structure). Or commit to dark throughout for a premium feel.
- **Commit to a visual motif**: Pick ONE distinctive element and repeat it — rounded image frames, icons in colored circles, thick single-side borders. Carry it across every slide.

### Color Palettes

Choose colors that match your topic — don't default to generic blue. Use these palettes as inspiration:

| Theme | Primary | Secondary | Accent |
|-------|---------|-----------|--------|
| **Midnight Executive** | `1E2761` (navy) | `CADCFC` (ice blue) | `FFFFFF` (white) |
| **Forest & Moss** | `2C5F2D` (forest) | `97BC62` (moss) | `F5F5F5` (cream) |
| **Coral Energy** | `F96167` (coral) | `F9E795` (gold) | `2F3C7E` (navy) |
| **Warm Terracotta** | `B85042` (terracotta) | `E7E8D1` (sand) | `A7BEAE` (sage) |
| **Ocean Gradient** | `065A82` (deep blue) | `1C7293` (teal) | `21295C` (midnight) |
| **Charcoal Minimal** | `36454F` (charcoal) | `F2F2F2` (off-white) | `212121` (black) |
| **Teal Trust** | `028090` (teal) | `00A896` (seafoam) | `02C39A` (mint) |
| **Berry & Cream** | `6D2E46` (berry) | `A26769` (dusty rose) | `ECE2D0` (cream) |
| **Sage Calm** | `84B59F` (sage) | `69A297` (eucalyptus) | `50808E` (slate) |
| **Cherry Bold** | `990011` (cherry) | `FCF6F5` (off-white) | `2F3C7E` (navy) |

### For Each Slide

**Every slide needs a visual element** — image, chart, icon, or shape. Text-only slides are forgettable.

**Layout options:**
- Two-column (text left, illustration on right)
- Icon + text rows (icon in colored circle, bold header, description below)
- 2x2 or 2x3 grid (image on one side, grid of content blocks on other)
- Half-bleed image (full left or right side) with content overlay

**Data display:**
- Large stat callouts (big numbers 60-72pt with small labels below)
- Comparison columns (before/after, pros/cons, side-by-side options)
- Timeline or process flow (numbered steps, arrows)

**Visual polish:**
- Icons in small colored circles next to section headers
- Italic accent text for key stats or taglines

### Typography

**Choose an interesting font pairing** — don't default to Arial. Pick a header font with personality and pair it with a clean body font.

| Header Font | Body Font |
|-------------|-----------|
| Georgia | Calibri |
| Arial Black | Arial |
| Calibri | Calibri Light |
| Cambria | Calibri |
| Trebuchet MS | Calibri |
| Impact | Arial |
| Palatino | Garamond |
| Consolas | Calibri |

| Element | Size |
|---------|------|
| Slide title | 36-44pt bold |
| Section header | 20-24pt bold |
| Body text | 14-16pt |
| Captions | 10-12pt muted |

### Spacing

- 0.5" minimum margins
- 0.3-0.5" between content blocks
- Leave breathing room—don't fill every inch

### Avoid (Common Mistakes)

- **Don't repeat the same layout** — vary columns, cards, and callouts across slides
- **Don't center body text** — left-align paragraphs and lists; center only titles
- **Don't skimp on size contrast** — titles need 36pt+ to stand out from 14-16pt body
- **Don't default to blue** — pick colors that reflect the specific topic
- **Don't mix spacing randomly** — choose 0.3" or 0.5" gaps and use consistently
- **Don't style one slide and leave the rest plain** — commit fully or keep it simple throughout
- **Don't create text-only slides** — add images, icons, charts, or visual elements; avoid plain title + bullets
- **Don't forget text box padding** — when aligning lines or shapes with text edges, set `margin: 0` on the text box or offset the shape to account for padding
- **Don't use low-contrast elements** — icons AND text need strong contrast against the background; avoid light text on light backgrounds or dark text on dark backgrounds
- **NEVER use accent lines under titles** — these are a hallmark of AI-generated slides; use whitespace or background color instead

---

## QA (Required)

**Assume there are problems. Your job is to find them.**

Your first render is almost never correct. Approach QA as a bug hunt, not a confirmation step. If you found zero issues on first inspection, you weren't looking hard enough.

### Content QA

```bash
python -m markitdown output.pptx
```

Check for missing content, typos, wrong order.

**When using templates, check for leftover placeholder text:**

```bash
python -m markitdown output.pptx | grep -iE "xxxx|lorem|ipsum|this.*(page|slide).*layout"
```

If grep returns results, fix them before declaring success.

### Visual QA

**⚠️ USE SUBAGENTS** — even for 2-3 slides. You've been staring at the code and will see what you expect, not what's there. Subagents have fresh eyes.

Convert slides to images (see [Converting to Images](#converting-to-images)), then use this prompt:

```
Visually inspect these slides. Assume there are issues — find them.

Look for:
- Overlapping elements (text through shapes, lines through words, stacked elements)
- Text overflow or cut off at edges/box boundaries
- Decorative lines positioned for single-line text but title wrapped to two lines
- Source citations or footers colliding with content above
- Elements too close (< 0.3" gaps) or cards/sections nearly touching
- Uneven gaps (large empty area in one place, cramped in another)
- Insufficient margin from slide edges (< 0.5")
- Columns or similar elements not aligned consistently
- Low-contrast text (e.g., light gray text on cream-colored background)
- Low-contrast icons (e.g., dark icons on dark backgrounds without a contrasting circle)
- Text boxes too narrow causing excessive wrapping
- Leftover placeholder content

For each slide, list issues or areas of concern, even if minor.

Read and analyze these images:
1. /path/to/slide-01.jpg (Expected: [brief description])
2. /path/to/slide-02.jpg (Expected: [brief description])

Report ALL issues found, including minor ones.
```

### Verification Loop

1. Generate slides → Convert to images → Inspect
2. **List issues found** (if none found, look again more critically)
3. Fix issues
4. **Re-verify affected slides** — one fix often creates another problem
5. Repeat until a full pass reveals no new issues

**Do not declare success until you've completed at least one fix-and-verify cycle.**

---

## Converting to Images

Convert presentations to individual slide images for visual inspection:

```bash
python .opencode/skills/pptx/scripts/office/soffice.py --headless --convert-to pdf outputs/output.pptx
pdftoppm -jpeg -r 150 outputs/output.pdf outputs/slide
```

This creates `slide-01.jpg`, `slide-02.jpg`, etc.

To re-render specific slides after fixes:

```bash
pdftoppm -jpeg -r 150 -f N -l N outputs/output.pdf outputs/slide-fixed
```

---

## Dependencies

- `pip install "markitdown[pptx]"` - text extraction
- `pip install Pillow` - thumbnail grids
- `npm install -g pptxgenjs` - creating from scratch
- LibreOffice (`soffice`) - PDF conversion (auto-configured for sandboxed environments via `.opencode/skills/pptx/scripts/office/soffice.py`)
- Poppler (`pdftoppm`) - PDF to images


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/editing.md
================================================
# Editing Presentations

> **Path convention**: All commands run from the **session workspace**. Never `cd` into the skill directory. Prefix all skill scripts with `.opencode/skills/pptx/`. All generated files go in `outputs/`.

## Template-Based Workflow

When using an existing presentation as a template:

1. **Analyze existing slides**:
   ```bash
   python .opencode/skills/pptx/scripts/thumbnail.py template.pptx outputs/thumbnails
   python -m markitdown template.pptx
   ```
   Review `outputs/thumbnails.jpg` to see layouts, and markitdown output to see placeholder text.

2. **Plan slide mapping**: For each content section, choose a template slide.

   ⚠️ **USE VARIED LAYOUTS** — monotonous presentations are a common failure mode. Don't default to basic title + bullet slides. Actively seek out:
   - Multi-column layouts (2-column, 3-column)
   - Image + text combinations
   - Full-bleed images with text overlay
   - Quote or callout slides
   - Section dividers
   - Stat/number callouts
   - Icon grids or icon + text rows

   **Avoid:** Repeating the same text-heavy layout for every slide.

   Match content type to layout style (e.g., key points → bullet slide, team info → multi-column, testimonials → quote slide).

3. **Unpack**: `python .opencode/skills/pptx/scripts/office/unpack.py template.pptx outputs/unpacked/`

4. **Build presentation** (do this yourself, not with subagents):
   - Delete unwanted slides (remove from `<p:sldIdLst>`)
   - Duplicate slides you want to reuse (`add_slide.py`)
   - Reorder slides in `<p:sldIdLst>`
   - **Complete all structural changes before step 5**

5. **Edit content**: Update text in each `slide{N}.xml`.
   **Use subagents here if available** — slides are separate XML files, so subagents can edit in parallel.

6. **Clean**: `python .opencode/skills/pptx/scripts/clean.py outputs/unpacked/`

7. **Pack**: `python .opencode/skills/pptx/scripts/office/pack.py outputs/unpacked/ outputs/output.pptx --original template.pptx`

---

## Scripts

| Script | Purpose |
|--------|---------|
| `unpack.py` | Extract and pretty-print PPTX |
| `add_slide.py` | Duplicate slide or create from layout |
| `clean.py` | Remove orphaned files |
| `pack.py` | Repack with validation |
| `thumbnail.py` | Create visual grid of slides |

### unpack.py

```bash
python .opencode/skills/pptx/scripts/office/unpack.py input.pptx outputs/unpacked/
```

Extracts PPTX, pretty-prints XML, escapes smart quotes.

### add_slide.py

```bash
python .opencode/skills/pptx/scripts/add_slide.py outputs/unpacked/ slide2.xml      # Duplicate slide
python .opencode/skills/pptx/scripts/add_slide.py outputs/unpacked/ slideLayout2.xml # From layout
```

Prints `<p:sldId>` to add to `<p:sldIdLst>` at desired position.

### clean.py

```bash
python .opencode/skills/pptx/scripts/clean.py outputs/unpacked/
```

Removes slides not in `<p:sldIdLst>`, unreferenced media, orphaned rels.

### pack.py

```bash
python .opencode/skills/pptx/scripts/office/pack.py outputs/unpacked/ outputs/output.pptx --original input.pptx
```

Validates, repairs, condenses XML, re-encodes smart quotes.

### thumbnail.py

```bash
python .opencode/skills/pptx/scripts/thumbnail.py input.pptx outputs/thumbnails [--cols N]
```

Creates `outputs/thumbnails.jpg` with slide filenames as labels. Default 3 columns, max 12 per grid.

**Use for template analysis only** (choosing layouts). For visual QA, use `soffice` + `pdftoppm` to create full-resolution individual slide images—see SKILL.md.

---

## Slide Operations

Slide order is in `outputs/unpacked/ppt/presentation.xml` → `<p:sldIdLst>`.

**Reorder**: Rearrange `<p:sldId>` elements.

**Delete**: Remove `<p:sldId>`, then run `clean.py`.

**See available layouts**: `ls outputs/unpacked/ppt/slideLayouts/`

**Add**: Use `add_slide.py`. Never manually copy slide files—the script handles notes references, Content_Types.xml, and relationship IDs that manual copying misses.

---

## Editing Content

**Subagents:** If available, use them here (after completing step 4). Each slide is a separate XML file, so subagents can edit in parallel. In your prompt to subagents, include:
- The slide file path(s) to edit
- **"Use the Edit tool for all changes"**
- The formatting rules and common pitfalls below

For each slide:
1. Read the slide's XML
2. Identify ALL placeholder content—text, images, charts, icons, captions
3. Replace each placeholder with final content

**Use the Edit tool, not sed or Python scripts.** The Edit tool forces specificity about what to replace and where, yielding better reliability.

### Formatting Rules

- **Bold all headers, subheadings, and inline labels**: Use `b="1"` on `<a:rPr>`. This includes:
  - Slide titles
  - Section headers within a slide
  - Inline labels like (e.g.: "Status:", "Description:") at the start of a line
- **Never use unicode bullets (•)**: Use proper list formatting with `<a:buChar>` or `<a:buAutoNum>`
- **Bullet consistency**: Let bullets inherit from the layout. Only specify `<a:buChar>` or `<a:buNone>`.

---

## Common Pitfalls

### Template Adaptation

When source content has fewer items than the template:
- **Remove excess elements entirely** (images, shapes, text boxes), don't just clear text
- Check for orphaned visuals after clearing text content
- Run visual QA to catch mismatched counts

When replacing text with different length content:
- **Shorter replacements**: Usually safe
- **Longer replacements**: May overflow or wrap unexpectedly
- Test with visual QA after text changes
- Consider truncating or splitting content to fit the template's design constraints

**Template slots ≠ Source items**: If template has 4 team members but source has 3 users, delete the 4th member's entire group (image + text boxes), not just the text.

### Multi-Item Content

If source has multiple items (numbered lists, multiple sections), create separate `<a:p>` elements for each — **never concatenate into one string**.

**❌ WRONG** — all items in one paragraph:
```xml
<a:p>
  <a:r><a:rPr .../><a:t>Step 1: Do the first thing. Step 2: Do the second thing.</a:t></a:r>
</a:p>
```

**✅ CORRECT** — separate paragraphs with bold headers:
```xml
<a:p>
  <a:pPr algn="l"><a:lnSpc><a:spcPts val="3919"/></a:lnSpc></a:pPr>
  <a:r><a:rPr lang="en-US" sz="2799" b="1" .../><a:t>Step 1</a:t></a:r>
</a:p>
<a:p>
  <a:pPr algn="l"><a:lnSpc><a:spcPts val="3919"/></a:lnSpc></a:pPr>
  <a:r><a:rPr lang="en-US" sz="2799" .../><a:t>Do the first thing.</a:t></a:r>
</a:p>
<a:p>
  <a:pPr algn="l"><a:lnSpc><a:spcPts val="3919"/></a:lnSpc></a:pPr>
  <a:r><a:rPr lang="en-US" sz="2799" b="1" .../><a:t>Step 2</a:t></a:r>
</a:p>
<!-- continue pattern -->
```

Copy `<a:pPr>` from the original paragraph to preserve line spacing. Use `b="1"` on headers.

### Smart Quotes

Handled automatically by unpack/pack. But the Edit tool converts smart quotes to ASCII.

**When adding new text with quotes, use XML entities:**

```xml
<a:t>the &#x201C;Agreement&#x201D;</a:t>
```

| Character | Name | Unicode | XML Entity |
|-----------|------|---------|------------|
| `“` | Left double quote | U+201C | `&#x201C;` |
| `”` | Right double quote | U+201D | `&#x201D;` |
| `‘` | Left single quote | U+2018 | `&#x2018;` |
| `’` | Right single quote | U+2019 | `&#x2019;` |

### Other

- **Whitespace**: Use `xml:space="preserve"` on `<a:t>` with leading/trailing spaces
- **XML parsing**: Use `defusedxml.minidom`, not `xml.etree.ElementTree` (corrupts namespaces)


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/pptxgenjs.md
================================================
# PptxGenJS Tutorial

## Setup & Basic Structure

```javascript
const pptxgen = require("pptxgenjs");

let pres = new pptxgen();
pres.layout = 'LAYOUT_16x9';  // or 'LAYOUT_16x10', 'LAYOUT_4x3', 'LAYOUT_WIDE'
pres.author = 'Your Name';
pres.title = 'Presentation Title';

let slide = pres.addSlide();
slide.addText("Hello World!", { x: 0.5, y: 0.5, fontSize: 36, color: "363636" });

pres.writeFile({ fileName: "Presentation.pptx" });
```

## Layout Dimensions

Slide dimensions (coordinates in inches):
- `LAYOUT_16x9`: 10" × 5.625" (default)
- `LAYOUT_16x10`: 10" × 6.25"
- `LAYOUT_4x3`: 10" × 7.5"
- `LAYOUT_WIDE`: 13.3" × 7.5"

---

## Text & Formatting

```javascript
// Basic text
slide.addText("Simple Text", {
  x: 1, y: 1, w: 8, h: 2, fontSize: 24, fontFace: "Arial",
  color: "363636", bold: true, align: "center", valign: "middle"
});

// Character spacing (use charSpacing, not letterSpacing which is silently ignored)
slide.addText("SPACED TEXT", { x: 1, y: 1, w: 8, h: 1, charSpacing: 6 });

// Rich text arrays
slide.addText([
  { text: "Bold ", options: { bold: true } },
  { text: "Italic ", options: { italic: true } }
], { x: 1, y: 3, w: 8, h: 1 });

// Multi-line text (requires breakLine: true)
slide.addText([
  { text: "Line 1", options: { breakLine: true } },
  { text: "Line 2", options: { breakLine: true } },
  { text: "Line 3" }  // Last item doesn't need breakLine
], { x: 0.5, y: 0.5, w: 8, h: 2 });

// Text box margin (internal padding)
slide.addText("Title", {
  x: 0.5, y: 0.3, w: 9, h: 0.6,
  margin: 0  // Use 0 when aligning text with other elements like shapes or icons
});
```

**Tip:** Text boxes have internal margin by default. Set `margin: 0` when you need text to align precisely with shapes, lines, or icons at the same x-position.

---

## Lists & Bullets

```javascript
// ✅ CORRECT: Multiple bullets
slide.addText([
  { text: "First item", options: { bullet: true, breakLine: true } },
  { text: "Second item", options: { bullet: true, breakLine: true } },
  { text: "Third item", options: { bullet: true } }
], { x: 0.5, y: 0.5, w: 8, h: 3 });

// ❌ WRONG: Never use unicode bullets
slide.addText("• First item", { ... });  // Creates double bullets

// Sub-items and numbered lists
{ text: "Sub-item", options: { bullet: true, indentLevel: 1 } }
{ text: "First", options: { bullet: { type: "number" }, breakLine: true } }
```

---

## Shapes

```javascript
slide.addShape(pres.shapes.RECTANGLE, {
  x: 0.5, y: 0.8, w: 1.5, h: 3.0,
  fill: { color: "FF0000" }, line: { color: "000000", width: 2 }
});

slide.addShape(pres.shapes.OVAL, { x: 4, y: 1, w: 2, h: 2, fill: { color: "0000FF" } });

slide.addShape(pres.shapes.LINE, {
  x: 1, y: 3, w: 5, h: 0, line: { color: "FF0000", width: 3, dashType: "dash" }
});

// With transparency
slide.addShape(pres.shapes.RECTANGLE, {
  x: 1, y: 1, w: 3, h: 2,
  fill: { color: "0088CC", transparency: 50 }
});

// Rounded rectangle (rectRadius only works with ROUNDED_RECTANGLE, not RECTANGLE)
// ⚠️ Don't pair with rectangular accent overlays — they won't cover rounded corners. Use RECTANGLE instead.
slide.addShape(pres.shapes.ROUNDED_RECTANGLE, {
  x: 1, y: 1, w: 3, h: 2,
  fill: { color: "FFFFFF" }, rectRadius: 0.1
});

// With shadow
slide.addShape(pres.shapes.RECTANGLE, {
  x: 1, y: 1, w: 3, h: 2,
  fill: { color: "FFFFFF" },
  shadow: { type: "outer", color: "000000", blur: 6, offset: 2, angle: 135, opacity: 0.15 }
});
```

Shadow options:

| Property | Type | Range | Notes |
|----------|------|-------|-------|
| `type` | string | `"outer"`, `"inner"` | |
| `color` | string | 6-char hex (e.g. `"000000"`) | No `#` prefix, no 8-char hex — see Common Pitfalls |
| `blur` | number | 0-100 pt | |
| `offset` | number | 0-200 pt | **Must be non-negative** — negative values corrupt the file |
| `angle` | number | 0-359 degrees | Direction the shadow falls (135 = bottom-right, 270 = upward) |
| `opacity` | number | 0.0-1.0 | Use this for transparency, never encode in color string |

To cast a shadow upward (e.g. on a footer bar), use `angle: 270` with a positive offset — do **not** use a negative offset.

**Note**: Gradient fills are not natively supported. Use a gradient image as a background instead.

---

## Images

### Image Sources

```javascript
// From file path
slide.addImage({ path: "images/chart.png", x: 1, y: 1, w: 5, h: 3 });

// From URL
slide.addImage({ path: "https://example.com/image.jpg", x: 1, y: 1, w: 5, h: 3 });

// From base64 (faster, no file I/O)
slide.addImage({ data: "image/png;base64,iVBORw0KGgo...", x: 1, y: 1, w: 5, h: 3 });
```

### Image Options

```javascript
slide.addImage({
  path: "image.png",
  x: 1, y: 1, w: 5, h: 3,
  rotate: 45,              // 0-359 degrees
  rounding: true,          // Circular crop
  transparency: 50,        // 0-100
  flipH: true,             // Horizontal flip
  flipV: false,            // Vertical flip
  altText: "Description",  // Accessibility
  hyperlink: { url: "https://example.com" }
});
```

### Image Sizing Modes

```javascript
// Contain - fit inside, preserve ratio
{ sizing: { type: 'contain', w: 4, h: 3 } }

// Cover - fill area, preserve ratio (may crop)
{ sizing: { type: 'cover', w: 4, h: 3 } }

// Crop - cut specific portion
{ sizing: { type: 'crop', x: 0.5, y: 0.5, w: 2, h: 2 } }
```

### Calculate Dimensions (preserve aspect ratio)

```javascript
const origWidth = 1978, origHeight = 923, maxHeight = 3.0;
const calcWidth = maxHeight * (origWidth / origHeight);
const centerX = (10 - calcWidth) / 2;

slide.addImage({ path: "image.png", x: centerX, y: 1.2, w: calcWidth, h: maxHeight });
```

### Supported Formats

- **Standard**: PNG, JPG, GIF (animated GIFs work in Microsoft 365)
- **SVG**: Works in modern PowerPoint/Microsoft 365

---

## Icons

Use react-icons to generate SVG icons, then rasterize to PNG for universal compatibility.

### Setup

```javascript
const React = require("react");
const ReactDOMServer = require("react-dom/server");
const sharp = require("sharp");
const { FaCheckCircle, FaChartLine } = require("react-icons/fa");

function renderIconSvg(IconComponent, color = "#000000", size = 256) {
  return ReactDOMServer.renderToStaticMarkup(
    React.createElement(IconComponent, { color, size: String(size) })
  );
}

async function iconToBase64Png(IconComponent, color, size = 256) {
  const svg = renderIconSvg(IconComponent, color, size);
  const pngBuffer = await sharp(Buffer.from(svg)).png().toBuffer();
  return "image/png;base64," + pngBuffer.toString("base64");
}
```

### Add Icon to Slide

```javascript
const iconData = await iconToBase64Png(FaCheckCircle, "#4472C4", 256);

slide.addImage({
  data: iconData,
  x: 1, y: 1, w: 0.5, h: 0.5  // Size in inches
});
```

**Note**: Use size 256 or higher for crisp icons. The size parameter controls the rasterization resolution, not the display size on the slide (which is set by `w` and `h` in inches).

### Icon Libraries

Install: `npm install -g react-icons react react-dom sharp`

Popular icon sets in react-icons:
- `react-icons/fa` - Font Awesome
- `react-icons/md` - Material Design
- `react-icons/hi` - Heroicons
- `react-icons/bi` - Bootstrap Icons

---

## Slide Backgrounds

```javascript
// Solid color
slide.background = { color: "F1F1F1" };

// Color with transparency
slide.background = { color: "FF3399", transparency: 50 };

// Image from URL
slide.background = { path: "https://example.com/bg.jpg" };

// Image from base64
slide.background = { data: "image/png;base64,iVBORw0KGgo..." };
```

---

## Tables

```javascript
slide.addTable([
  ["Header 1", "Header 2"],
  ["Cell 1", "Cell 2"]
], {
  x: 1, y: 1, w: 8, h: 2,
  border: { pt: 1, color: "999999" }, fill: { color: "F1F1F1" }
});

// Advanced with merged cells
let tableData = [
  [{ text: "Header", options: { fill: { color: "6699CC" }, color: "FFFFFF", bold: true } }, "Cell"],
  [{ text: "Merged", options: { colspan: 2 } }]
];
slide.addTable(tableData, { x: 1, y: 3.5, w: 8, colW: [4, 4] });
```

---

## Charts

```javascript
// Bar chart
slide.addChart(pres.charts.BAR, [{
  name: "Sales", labels: ["Q1", "Q2", "Q3", "Q4"], values: [4500, 5500, 6200, 7100]
}], {
  x: 0.5, y: 0.6, w: 6, h: 3, barDir: 'col',
  showTitle: true, title: 'Quarterly Sales'
});

// Line chart
slide.addChart(pres.charts.LINE, [{
  name: "Temp", labels: ["Jan", "Feb", "Mar"], values: [32, 35, 42]
}], { x: 0.5, y: 4, w: 6, h: 3, lineSize: 3, lineSmooth: true });

// Pie chart
slide.addChart(pres.charts.PIE, [{
  name: "Share", labels: ["A", "B", "Other"], values: [35, 45, 20]
}], { x: 7, y: 1, w: 5, h: 4, showPercent: true });
```

### Better-Looking Charts

Default charts look dated. Apply these options for a modern, clean appearance:

```javascript
slide.addChart(pres.charts.BAR, chartData, {
  x: 0.5, y: 1, w: 9, h: 4, barDir: "col",

  // Custom colors (match your presentation palette)
  chartColors: ["0D9488", "14B8A6", "5EEAD4"],

  // Clean background
  chartArea: { fill: { color: "FFFFFF" }, roundedCorners: true },

  // Muted axis labels
  catAxisLabelColor: "64748B",
  valAxisLabelColor: "64748B",

  // Subtle grid (value axis only)
  valGridLine: { color: "E2E8F0", size: 0.5 },
  catGridLine: { style: "none" },

  // Data labels on bars
  showValue: true,
  dataLabelPosition: "outEnd",
  dataLabelColor: "1E293B",

  // Hide legend for single series
  showLegend: false,
});
```

**Key styling options:**
- `chartColors: [...]` - hex colors for series/segments
- `chartArea: { fill, border, roundedCorners }` - chart background
- `catGridLine/valGridLine: { color, style, size }` - grid lines (`style: "none"` to hide)
- `lineSmooth: true` - curved lines (line charts)
- `legendPos: "r"` - legend position: "b", "t", "l", "r", "tr"

---

## Slide Masters

```javascript
pres.defineSlideMaster({
  title: 'TITLE_SLIDE', background: { color: '283A5E' },
  objects: [{
    placeholder: { options: { name: 'title', type: 'title', x: 1, y: 2, w: 8, h: 2 } }
  }]
});

let titleSlide = pres.addSlide({ masterName: "TITLE_SLIDE" });
titleSlide.addText("My Title", { placeholder: "title" });
```

---

## Common Pitfalls

⚠️ These issues cause file corruption, visual bugs, or broken output. Avoid them.

1. **NEVER use "#" with hex colors** - causes file corruption
   ```javascript
   color: "FF0000"      // ✅ CORRECT
   color: "#FF0000"     // ❌ WRONG
   ```

2. **NEVER encode opacity in hex color strings** - 8-char colors (e.g., `"00000020"`) corrupt the file. Use the `opacity` property instead.
   ```javascript
   shadow: { type: "outer", blur: 6, offset: 2, color: "00000020" }          // ❌ CORRUPTS FILE
   shadow: { type: "outer", blur: 6, offset: 2, color: "000000", opacity: 0.12 }  // ✅ CORRECT
   ```

3. **Use `bullet: true`** - NEVER unicode symbols like "•" (creates double bullets)

4. **Use `breakLine: true`** between array items or text runs together

5. **Avoid `lineSpacing` with bullets** - causes excessive gaps; use `paraSpaceAfter` instead

6. **Each presentation needs fresh instance** - don't reuse `pptxgen()` objects

7. **NEVER reuse option objects across calls** - PptxGenJS mutates objects in-place (e.g. converting shadow values to EMU). Sharing one object between multiple calls corrupts the second shape.
   ```javascript
   const shadow = { type: "outer", blur: 6, offset: 2, color: "000000", opacity: 0.15 };
   slide.addShape(pres.shapes.RECTANGLE, { shadow, ... });  // ❌ second call gets already-converted values
   slide.addShape(pres.shapes.RECTANGLE, { shadow, ... });

   const makeShadow = () => ({ type: "outer", blur: 6, offset: 2, color: "000000", opacity: 0.15 });
   slide.addShape(pres.shapes.RECTANGLE, { shadow: makeShadow(), ... });  // ✅ fresh object each time
   slide.addShape(pres.shapes.RECTANGLE, { shadow: makeShadow(), ... });
   ```

8. **Don't use `ROUNDED_RECTANGLE` with accent borders** - rectangular overlay bars won't cover rounded corners. Use `RECTANGLE` instead.
   ```javascript
   // ❌ WRONG: Accent bar doesn't cover rounded corners
   slide.addShape(pres.shapes.ROUNDED_RECTANGLE, { x: 1, y: 1, w: 3, h: 1.5, fill: { color: "FFFFFF" } });
   slide.addShape(pres.shapes.RECTANGLE, { x: 1, y: 1, w: 0.08, h: 1.5, fill: { color: "0891B2" } });

   // ✅ CORRECT: Use RECTANGLE for clean alignment
   slide.addShape(pres.shapes.RECTANGLE, { x: 1, y: 1, w: 3, h: 1.5, fill: { color: "FFFFFF" } });
   slide.addShape(pres.shapes.RECTANGLE, { x: 1, y: 1, w: 0.08, h: 1.5, fill: { color: "0891B2" } });
   ```

---

## Quick Reference

- **Shapes**: RECTANGLE, OVAL, LINE, ROUNDED_RECTANGLE
- **Charts**: BAR, LINE, PIE, DOUGHNUT, SCATTER, BUBBLE, RADAR
- **Layouts**: LAYOUT_16x9 (10"×5.625"), LAYOUT_16x10, LAYOUT_4x3, LAYOUT_WIDE
- **Alignment**: "left", "center", "right"
- **Chart data labels**: "outEnd", "inEnd", "center"


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/__init__.py
================================================


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/add_slide.py
================================================
"""Add a new slide to an unpacked PPTX directory.

Usage: python add_slide.py <unpacked_dir> <source>

The source can be:
  - A slide file (e.g., slide2.xml) - duplicates the slide
  - A layout file (e.g., slideLayout2.xml) - creates from layout

Examples:
    python add_slide.py unpacked/ slide2.xml
    # Duplicates slide2, creates slide5.xml

    python add_slide.py unpacked/ slideLayout2.xml
    # Creates slide5.xml from slideLayout2.xml

To see available layouts: ls unpacked/ppt/slideLayouts/

Prints the <p:sldId> element to add to presentation.xml.
"""

import re
import shutil
import sys
from pathlib import Path


def get_next_slide_number(slides_dir: Path) -> int:
    existing = [
        int(m.group(1))
        for f in slides_dir.glob("slide*.xml")
        if (m := re.match(r"slide(\d+)\.xml", f.name))
    ]
    return max(existing) + 1 if existing else 1


def create_slide_from_layout(unpacked_dir: Path, layout_file: str) -> None:
    slides_dir = unpacked_dir / "ppt" / "slides"
    rels_dir = slides_dir / "_rels"
    layouts_dir = unpacked_dir / "ppt" / "slideLayouts"

    layout_path = layouts_dir / layout_file
    if not layout_path.exists():
        print(f"Error: {layout_path} not found", file=sys.stderr)
        sys.exit(1)

    next_num = get_next_slide_number(slides_dir)
    dest = f"slide{next_num}.xml"
    dest_slide = slides_dir / dest
    dest_rels = rels_dir / f"{dest}.rels"

    slide_xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
  <p:cSld>
    <p:spTree>
      <p:nvGrpSpPr>
        <p:cNvPr id="1" name=""/>
        <p:cNvGrpSpPr/>
        <p:nvPr/>
      </p:nvGrpSpPr>
      <p:grpSpPr>
        <a:xfrm>
          <a:off x="0" y="0"/>
          <a:ext cx="0" cy="0"/>
          <a:chOff x="0" y="0"/>
          <a:chExt cx="0" cy="0"/>
        </a:xfrm>
      </p:grpSpPr>
    </p:spTree>
  </p:cSld>
  <p:clrMapOvr>
    <a:masterClrMapping/>
  </p:clrMapOvr>
</p:sld>"""
    dest_slide.write_text(slide_xml, encoding="utf-8")

    rels_dir.mkdir(exist_ok=True)
    rels_xml = f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
  <Relationship Id="rId1"
    Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"
    Target="../slideLayouts/{layout_file}"/>
</Relationships>"""
    dest_rels.write_text(rels_xml, encoding="utf-8")

    _add_to_content_types(unpacked_dir, dest)

    rid = _add_to_presentation_rels(unpacked_dir, dest)

    next_slide_id = _get_next_slide_id(unpacked_dir)

    print(f"Created {dest} from {layout_file}")
    print(
        f'Add to presentation.xml <p:sldIdLst>: <p:sldId id="{next_slide_id}" r:id="{rid}"/>'
    )


def duplicate_slide(unpacked_dir: Path, source: str) -> None:
    slides_dir = unpacked_dir / "ppt" / "slides"
    rels_dir = slides_dir / "_rels"

    source_slide = slides_dir / source

    if not source_slide.exists():
        print(f"Error: {source_slide} not found", file=sys.stderr)
        sys.exit(1)

    next_num = get_next_slide_number(slides_dir)
    dest = f"slide{next_num}.xml"
    dest_slide = slides_dir / dest

    source_rels = rels_dir / f"{source}.rels"
    dest_rels = rels_dir / f"{dest}.rels"

    shutil.copy2(source_slide, dest_slide)

    if source_rels.exists():
        shutil.copy2(source_rels, dest_rels)

        rels_content = dest_rels.read_text(encoding="utf-8")
        rels_content = re.sub(
            r'\s*<Relationship[^>]*Type="[^"]*notesSlide"[^>]*/>\s*',
            "\n",
            rels_content,
        )
        dest_rels.write_text(rels_content, encoding="utf-8")

    _add_to_content_types(unpacked_dir, dest)

    rid = _add_to_presentation_rels(unpacked_dir, dest)

    next_slide_id = _get_next_slide_id(unpacked_dir)

    print(f"Created {dest} from {source}")
    print(
        f'Add to presentation.xml <p:sldIdLst>: <p:sldId id="{next_slide_id}" r:id="{rid}"/>'
    )


def _add_to_content_types(unpacked_dir: Path, dest: str) -> None:
    content_types_path = unpacked_dir / "[Content_Types].xml"
    content_types = content_types_path.read_text(encoding="utf-8")

    content_type = (
        "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"
    )
    new_override = (
        f'<Override PartName="/ppt/slides/{dest}" ContentType="{content_type}"/>'
    )

    if f"/ppt/slides/{dest}" not in content_types:
        content_types = content_types.replace("</Types>", f"  {new_override}\n</Types>")
        content_types_path.write_text(content_types, encoding="utf-8")


def _add_to_presentation_rels(unpacked_dir: Path, dest: str) -> str:
    pres_rels_path = unpacked_dir / "ppt" / "_rels" / "presentation.xml.rels"
    pres_rels = pres_rels_path.read_text(encoding="utf-8")

    rids = [int(m) for m in re.findall(r'Id="rId(\d+)"', pres_rels)]
    next_rid = max(rids) + 1 if rids else 1
    rid = f"rId{next_rid}"

    slide_type = (
        "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide"
    )
    new_rel = f'<Relationship Id="{rid}" Type="{slide_type}" Target="slides/{dest}"/>'

    if f"slides/{dest}" not in pres_rels:
        pres_rels = pres_rels.replace(
            "</Relationships>", f"  {new_rel}\n</Relationships>"
        )
        pres_rels_path.write_text(pres_rels, encoding="utf-8")

    return rid


def _get_next_slide_id(unpacked_dir: Path) -> int:
    pres_path = unpacked_dir / "ppt" / "presentation.xml"
    pres_content = pres_path.read_text(encoding="utf-8")
    slide_ids = [int(m) for m in re.findall(r'<p:sldId[^>]*id="(\d+)"', pres_content)]
    return max(slide_ids) + 1 if slide_ids else 256


def parse_source(source: str) -> tuple[str, str | None]:
    if source.startswith("slideLayout") and source.endswith(".xml"):
        return ("layout", source)

    return ("slide", None)


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python add_slide.py <unpacked_dir> <source>", file=sys.stderr)
        print("", file=sys.stderr)
        print("Source can be:", file=sys.stderr)
        print("  slide2.xml        - duplicate an existing slide", file=sys.stderr)
        print("  slideLayout2.xml  - create from a layout template", file=sys.stderr)
        print("", file=sys.stderr)
        print(
            "To see available layouts: ls <unpacked_dir>/ppt/slideLayouts/",
            file=sys.stderr,
        )
        sys.exit(1)

    unpacked_dir = Path(sys.argv[1])
    source = sys.argv[2]

    if not unpacked_dir.exists():
        print(f"Error: {unpacked_dir} not found", file=sys.stderr)
        sys.exit(1)

    source_type, layout_file = parse_source(source)

    if source_type == "layout" and layout_file is not None:
        create_slide_from_layout(unpacked_dir, layout_file)
    else:
        duplicate_slide(unpacked_dir, source)


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/clean.py
================================================
"""Remove unreferenced files from an unpacked PPTX directory.

Usage: python clean.py <unpacked_dir>

Example:
    python clean.py unpacked/

This script removes:
- Orphaned slides (not in sldIdLst) and their relationships
- [trash] directory (unreferenced files)
- Orphaned .rels files for deleted resources
- Unreferenced media, embeddings, charts, diagrams, drawings, ink files
- Unreferenced theme files
- Unreferenced notes slides
- Content-Type overrides for deleted files
"""

import re
import sys
from pathlib import Path

import defusedxml.minidom


def get_slides_in_sldidlst(unpacked_dir: Path) -> set[str]:
    pres_path = unpacked_dir / "ppt" / "presentation.xml"
    pres_rels_path = unpacked_dir / "ppt" / "_rels" / "presentation.xml.rels"

    if not pres_path.exists() or not pres_rels_path.exists():
        return set()

    rels_dom = defusedxml.minidom.parse(str(pres_rels_path))
    rid_to_slide = {}
    for rel in rels_dom.getElementsByTagName("Relationship"):
        rid = rel.getAttribute("Id")
        target = rel.getAttribute("Target")
        rel_type = rel.getAttribute("Type")
        if "slide" in rel_type and target.startswith("slides/"):
            rid_to_slide[rid] = target.replace("slides/", "")

    pres_content = pres_path.read_text(encoding="utf-8")
    referenced_rids = set(re.findall(r'<p:sldId[^>]*r:id="([^"]+)"', pres_content))

    return {rid_to_slide[rid] for rid in referenced_rids if rid in rid_to_slide}


def remove_orphaned_slides(unpacked_dir: Path) -> list[str]:
    slides_dir = unpacked_dir / "ppt" / "slides"
    slides_rels_dir = slides_dir / "_rels"
    pres_rels_path = unpacked_dir / "ppt" / "_rels" / "presentation.xml.rels"

    if not slides_dir.exists():
        return []

    referenced_slides = get_slides_in_sldidlst(unpacked_dir)
    removed = []

    for slide_file in slides_dir.glob("slide*.xml"):
        if slide_file.name not in referenced_slides:
            rel_path = slide_file.relative_to(unpacked_dir)
            slide_file.unlink()
            removed.append(str(rel_path))

            rels_file = slides_rels_dir / f"{slide_file.name}.rels"
            if rels_file.exists():
                rels_file.unlink()
                removed.append(str(rels_file.relative_to(unpacked_dir)))

    if removed and pres_rels_path.exists():
        rels_dom = defusedxml.minidom.parse(str(pres_rels_path))
        changed = False

        for rel in list(rels_dom.getElementsByTagName("Relationship")):
            target = rel.getAttribute("Target")
            if target.startswith("slides/"):
                slide_name = target.replace("slides/", "")
                if slide_name not in referenced_slides:
                    if rel.parentNode:
                        rel.parentNode.removeChild(rel)
                        changed = True

        if changed:
            with open(pres_rels_path, "wb") as f:
                f.write(rels_dom.toxml(encoding="utf-8"))

    return removed


def remove_trash_directory(unpacked_dir: Path) -> list[str]:
    trash_dir = unpacked_dir / "[trash]"
    removed = []

    if trash_dir.exists() and trash_dir.is_dir():
        for file_path in trash_dir.iterdir():
            if file_path.is_file():
                rel_path = file_path.relative_to(unpacked_dir)
                removed.append(str(rel_path))
                file_path.unlink()
        trash_dir.rmdir()

    return removed


def get_slide_referenced_files(unpacked_dir: Path) -> set:
    referenced = set()
    slides_rels_dir = unpacked_dir / "ppt" / "slides" / "_rels"

    if not slides_rels_dir.exists():
        return referenced

    for rels_file in slides_rels_dir.glob("*.rels"):
        dom = defusedxml.minidom.parse(str(rels_file))
        for rel in dom.getElementsByTagName("Relationship"):
            target = rel.getAttribute("Target")
            if not target:
                continue
            target_path = (rels_file.parent.parent / target).resolve()
            try:
                referenced.add(target_path.relative_to(unpacked_dir.resolve()))
            except ValueError:
                pass

    return referenced


def remove_orphaned_rels_files(unpacked_dir: Path) -> list[str]:
    resource_dirs = ["charts", "diagrams", "drawings"]
    removed = []
    slide_referenced = get_slide_referenced_files(unpacked_dir)

    for dir_name in resource_dirs:
        rels_dir = unpacked_dir / "ppt" / dir_name / "_rels"
        if not rels_dir.exists():
            continue

        for rels_file in rels_dir.glob("*.rels"):
            resource_file = rels_dir.parent / rels_file.name.replace(".rels", "")
            try:
                resource_rel_path = resource_file.resolve().relative_to(
                    unpacked_dir.resolve()
                )
            except ValueError:
                continue

            if not resource_file.exists() or resource_rel_path not in slide_referenced:
                rels_file.unlink()
                rel_path = rels_file.relative_to(unpacked_dir)
                removed.append(str(rel_path))

    return removed


def get_referenced_files(unpacked_dir: Path) -> set:
    referenced = set()

    for rels_file in unpacked_dir.rglob("*.rels"):
        dom = defusedxml.minidom.parse(str(rels_file))
        for rel in dom.getElementsByTagName("Relationship"):
            target = rel.getAttribute("Target")
            if not target:
                continue
            target_path = (rels_file.parent.parent / target).resolve()
            try:
                referenced.add(target_path.relative_to(unpacked_dir.resolve()))
            except ValueError:
                pass

    return referenced


def remove_orphaned_files(unpacked_dir: Path, referenced: set) -> list[str]:
    resource_dirs = [
        "media",
        "embeddings",
        "charts",
        "diagrams",
        "tags",
        "drawings",
        "ink",
    ]
    removed = []

    for dir_name in resource_dirs:
        dir_path = unpacked_dir / "ppt" / dir_name
        if not dir_path.exists():
            continue

        for file_path in dir_path.glob("*"):
            if not file_path.is_file():
                continue
            rel_path = file_path.relative_to(unpacked_dir)
            if rel_path not in referenced:
                file_path.unlink()
                removed.append(str(rel_path))

    theme_dir = unpacked_dir / "ppt" / "theme"
    if theme_dir.exists():
        for file_path in theme_dir.glob("theme*.xml"):
            rel_path = file_path.relative_to(unpacked_dir)
            if rel_path not in referenced:
                file_path.unlink()
                removed.append(str(rel_path))
                theme_rels = theme_dir / "_rels" / f"{file_path.name}.rels"
                if theme_rels.exists():
                    theme_rels.unlink()
                    removed.append(str(theme_rels.relative_to(unpacked_dir)))

    notes_dir = unpacked_dir / "ppt" / "notesSlides"
    if notes_dir.exists():
        for file_path in notes_dir.glob("*.xml"):
            if not file_path.is_file():
                continue
            rel_path = file_path.relative_to(unpacked_dir)
            if rel_path not in referenced:
                file_path.unlink()
                removed.append(str(rel_path))

        notes_rels_dir = notes_dir / "_rels"
        if notes_rels_dir.exists():
            for file_path in notes_rels_dir.glob("*.rels"):
                notes_file = notes_dir / file_path.name.replace(".rels", "")
                if not notes_file.exists():
                    file_path.unlink()
                    removed.append(str(file_path.relative_to(unpacked_dir)))

    return removed


def update_content_types(unpacked_dir: Path, removed_files: list[str]) -> None:
    ct_path = unpacked_dir / "[Content_Types].xml"
    if not ct_path.exists():
        return

    dom = defusedxml.minidom.parse(str(ct_path))
    changed = False

    for override in list(dom.getElementsByTagName("Override")):
        part_name = override.getAttribute("PartName").lstrip("/")
        if part_name in removed_files:
            if override.parentNode:
                override.parentNode.removeChild(override)
                changed = True

    if changed:
        with open(ct_path, "wb") as f:
            f.write(dom.toxml(encoding="utf-8"))


def clean_unused_files(unpacked_dir: Path) -> list[str]:
    all_removed = []

    slides_removed = remove_orphaned_slides(unpacked_dir)
    all_removed.extend(slides_removed)

    trash_removed = remove_trash_directory(unpacked_dir)
    all_removed.extend(trash_removed)

    while True:
        removed_rels = remove_orphaned_rels_files(unpacked_dir)
        referenced = get_referenced_files(unpacked_dir)
        removed_files = remove_orphaned_files(unpacked_dir, referenced)

        total_removed = removed_rels + removed_files
        if not total_removed:
            break

        all_removed.extend(total_removed)

    if all_removed:
        update_content_types(unpacked_dir, all_removed)

    return all_removed


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python clean.py <unpacked_dir>", file=sys.stderr)
        print("Example: python clean.py unpacked/", file=sys.stderr)
        sys.exit(1)

    unpacked_dir = Path(sys.argv[1])

    if not unpacked_dir.exists():
        print(f"Error: {unpacked_dir} not found", file=sys.stderr)
        sys.exit(1)

    removed = clean_unused_files(unpacked_dir)

    if removed:
        print(f"Removed {len(removed)} unreferenced files:")
        for f in removed:
            print(f"  {f}")
    else:
        print("No unreferenced files found")


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/helpers/__init__.py
================================================


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/helpers/merge_runs.py
================================================
"""Merge adjacent runs with identical formatting in DOCX.

Merges adjacent <w:r> elements that have identical <w:rPr> properties.
Works on runs in paragraphs and inside tracked changes (<w:ins>, <w:del>).

Also:
- Removes rsid attributes from runs (revision metadata that doesn't affect rendering)
- Removes proofErr elements (spell/grammar markers that block merging)
"""

from pathlib import Path

import defusedxml.minidom


def merge_runs(input_dir: str) -> tuple[int, str]:
    doc_xml = Path(input_dir) / "word" / "document.xml"

    if not doc_xml.exists():
        return 0, f"Error: {doc_xml} not found"

    try:
        dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
        root = dom.documentElement

        _remove_elements(root, "proofErr")
        _strip_run_rsid_attrs(root)

        containers = {run.parentNode for run in _find_elements(root, "r")}

        merge_count = 0
        for container in containers:
            merge_count += _merge_runs_in(container)

        doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
        return merge_count, f"Merged {merge_count} runs"

    except Exception as e:
        return 0, f"Error: {e}"


def _find_elements(root, tag: str) -> list:
    results = []

    def traverse(node):
        if node.nodeType == node.ELEMENT_NODE:
            name = node.localName or node.tagName
            if name == tag or name.endswith(f":{tag}"):
                results.append(node)
            for child in node.childNodes:
                traverse(child)

    traverse(root)
    return results


def _get_child(parent, tag: str):
    for child in parent.childNodes:
        if child.nodeType == child.ELEMENT_NODE:
            name = child.localName or child.tagName
            if name == tag or name.endswith(f":{tag}"):
                return child
    return None


def _get_children(parent, tag: str) -> list:
    results = []
    for child in parent.childNodes:
        if child.nodeType == child.ELEMENT_NODE:
            name = child.localName or child.tagName
            if name == tag or name.endswith(f":{tag}"):
                results.append(child)
    return results


def _is_adjacent(elem1, elem2) -> bool:
    node = elem1.nextSibling
    while node:
        if node == elem2:
            return True
        if node.nodeType == node.ELEMENT_NODE:
            return False
        if node.nodeType == node.TEXT_NODE and node.data.strip():
            return False
        node = node.nextSibling
    return False


def _remove_elements(root, tag: str):
    for elem in _find_elements(root, tag):
        if elem.parentNode:
            elem.parentNode.removeChild(elem)


def _strip_run_rsid_attrs(root):
    for run in _find_elements(root, "r"):
        for attr in list(run.attributes.values()):
            if "rsid" in attr.name.lower():
                run.removeAttribute(attr.name)


def _merge_runs_in(container) -> int:
    merge_count = 0
    run = _first_child_run(container)

    while run:
        while True:
            next_elem = _next_element_sibling(run)
            if next_elem and _is_run(next_elem) and _can_merge(run, next_elem):
                _merge_run_content(run, next_elem)
                container.removeChild(next_elem)
                merge_count += 1
            else:
                break

        _consolidate_text(run)
        run = _next_sibling_run(run)

    return merge_count


def _first_child_run(container):
    for child in container.childNodes:
        if child.nodeType == child.ELEMENT_NODE and _is_run(child):
            return child
    return None


def _next_element_sibling(node):
    sibling = node.nextSibling
    while sibling:
        if sibling.nodeType == sibling.ELEMENT_NODE:
            return sibling
        sibling = sibling.nextSibling
    return None


def _next_sibling_run(node):
    sibling = node.nextSibling
    while sibling:
        if sibling.nodeType == sibling.ELEMENT_NODE:
            if _is_run(sibling):
                return sibling
        sibling = sibling.nextSibling
    return None


def _is_run(node) -> bool:
    name = node.localName or node.tagName
    return name == "r" or name.endswith(":r")


def _can_merge(run1, run2) -> bool:
    rpr1 = _get_child(run1, "rPr")
    rpr2 = _get_child(run2, "rPr")

    if (rpr1 is None) != (rpr2 is None):
        return False
    if rpr1 is None:
        return True
    return rpr1.toxml() == rpr2.toxml()


def _merge_run_content(target, source):
    for child in list(source.childNodes):
        if child.nodeType == child.ELEMENT_NODE:
            name = child.localName or child.tagName
            if name != "rPr" and not name.endswith(":rPr"):
                target.appendChild(child)


def _consolidate_text(run):
    t_elements = _get_children(run, "t")

    for i in range(len(t_elements) - 1, 0, -1):
        curr, prev = t_elements[i], t_elements[i - 1]

        if _is_adjacent(prev, curr):
            prev_text = prev.firstChild.data if prev.firstChild else ""
            curr_text = curr.firstChild.data if curr.firstChild else ""
            merged = prev_text + curr_text

            if prev.firstChild:
                prev.firstChild.data = merged
            else:
                prev.appendChild(run.ownerDocument.createTextNode(merged))

            if merged.startswith(" ") or merged.endswith(" "):
                prev.setAttribute("xml:space", "preserve")
            elif prev.hasAttribute("xml:space"):
                prev.removeAttribute("xml:space")

            run.removeChild(curr)


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/helpers/simplify_redlines.py
================================================
"""Simplify tracked changes by merging adjacent w:ins or w:del elements.

Merges adjacent <w:ins> elements from the same author into a single element.
Same for <w:del> elements. This makes heavily-redlined documents easier to
work with by reducing the number of tracked change wrappers.

Rules:
- Only merges w:ins with w:ins, w:del with w:del (same element type)
- Only merges if same author (ignores timestamp differences)
- Only merges if truly adjacent (only whitespace between them)
"""

import xml.etree.ElementTree as ET
import zipfile
from pathlib import Path

import defusedxml.minidom

WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"


def simplify_redlines(input_dir: str) -> tuple[int, str]:
    doc_xml = Path(input_dir) / "word" / "document.xml"

    if not doc_xml.exists():
        return 0, f"Error: {doc_xml} not found"

    try:
        dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
        root = dom.documentElement

        merge_count = 0

        containers = _find_elements(root, "p") + _find_elements(root, "tc")

        for container in containers:
            merge_count += _merge_tracked_changes_in(container, "ins")
            merge_count += _merge_tracked_changes_in(container, "del")

        doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
        return merge_count, f"Simplified {merge_count} tracked changes"

    except Exception as e:
        return 0, f"Error: {e}"


def _merge_tracked_changes_in(container, tag: str) -> int:
    merge_count = 0

    tracked = [
        child
        for child in container.childNodes
        if child.nodeType == child.ELEMENT_NODE and _is_element(child, tag)
    ]

    if len(tracked) < 2:
        return 0

    i = 0
    while i < len(tracked) - 1:
        curr = tracked[i]
        next_elem = tracked[i + 1]

        if _can_merge_tracked(curr, next_elem):
            _merge_tracked_content(curr, next_elem)
            container.removeChild(next_elem)
            tracked.pop(i + 1)
            merge_count += 1
        else:
            i += 1

    return merge_count


def _is_element(node, tag: str) -> bool:
    name = node.localName or node.tagName
    return name == tag or name.endswith(f":{tag}")


def _get_author(elem) -> str:
    author = elem.getAttribute("w:author")
    if not author:
        for attr in elem.attributes.values():
            if attr.localName == "author" or attr.name.endswith(":author"):
                return attr.value
    return author


def _can_merge_tracked(elem1, elem2) -> bool:
    if _get_author(elem1) != _get_author(elem2):
        return False

    node = elem1.nextSibling
    while node and node != elem2:
        if node.nodeType == node.ELEMENT_NODE:
            return False
        if node.nodeType == node.TEXT_NODE and node.data.strip():
            return False
        node = node.nextSibling

    return True


def _merge_tracked_content(target, source):
    while source.firstChild:
        child = source.firstChild
        source.removeChild(child)
        target.appendChild(child)


def _find_elements(root, tag: str) -> list:
    results = []

    def traverse(node):
        if node.nodeType == node.ELEMENT_NODE:
            name = node.localName or node.tagName
            if name == tag or name.endswith(f":{tag}"):
                results.append(node)
            for child in node.childNodes:
                traverse(child)

    traverse(root)
    return results


def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
    if not doc_xml_path.exists():
        return {}

    try:
        tree = ET.parse(doc_xml_path)
        root = tree.getroot()
    except ET.ParseError:
        return {}

    namespaces = {"w": WORD_NS}
    author_attr = f"{{{WORD_NS}}}author"

    authors: dict[str, int] = {}
    for tag in ["ins", "del"]:
        for elem in root.findall(f".//w:{tag}", namespaces):
            author = elem.get(author_attr)
            if author:
                authors[author] = authors.get(author, 0) + 1

    return authors


def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
    try:
        with zipfile.ZipFile(docx_path, "r") as zf:
            if "word/document.xml" not in zf.namelist():
                return {}
            with zf.open("word/document.xml") as f:
                tree = ET.parse(f)
                root = tree.getroot()

                namespaces = {"w": WORD_NS}
                author_attr = f"{{{WORD_NS}}}author"

                authors: dict[str, int] = {}
                for tag in ["ins", "del"]:
                    for elem in root.findall(f".//w:{tag}", namespaces):
                        author = elem.get(author_attr)
                        if author:
                            authors[author] = authors.get(author, 0) + 1
                return authors
    except (zipfile.BadZipFile, ET.ParseError):
        return {}


def infer_author(
    modified_dir: Path, original_docx: Path, default: str = "Claude"
) -> str:
    modified_xml = modified_dir / "word" / "document.xml"
    modified_authors = get_tracked_change_authors(modified_xml)

    if not modified_authors:
        return default

    original_authors = _get_authors_from_docx(original_docx)

    new_changes: dict[str, int] = {}
    for author, count in modified_authors.items():
        original_count = original_authors.get(author, 0)
        diff = count - original_count
        if diff > 0:
            new_changes[author] = diff

    if not new_changes:
        return default

    if len(new_changes) == 1:
        return next(iter(new_changes))

    raise ValueError(
        f"Multiple authors added new changes: {new_changes}. Cannot infer which author to validate."
    )


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/pack.py
================================================
"""Pack a directory into a DOCX, PPTX, or XLSX file.

Validates with auto-repair, condenses XML formatting, and creates the Office file.

Usage:
    python pack.py <input_directory> <output_file> [--original <file>] [--validate true|false]

Examples:
    python pack.py unpacked/ output.docx --original input.docx
    python pack.py unpacked/ output.pptx --validate false
"""

import argparse
import shutil
import sys
import tempfile
import zipfile
from pathlib import Path

import defusedxml.minidom
from validators import DOCXSchemaValidator
from validators import PPTXSchemaValidator
from validators import RedliningValidator


def pack(
    input_directory: str,
    output_file: str,
    original_file: str | None = None,
    validate: bool = True,
    infer_author_func=None,
) -> tuple[None, str]:
    input_dir = Path(input_directory)
    output_path = Path(output_file)
    suffix = output_path.suffix.lower()

    if not input_dir.is_dir():
        return None, f"Error: {input_dir} is not a directory"

    if suffix not in {".docx", ".pptx", ".xlsx"}:
        return None, f"Error: {output_file} must be a .docx, .pptx, or .xlsx file"

    if validate and original_file:
        original_path = Path(original_file)
        if original_path.exists():
            success, output = _run_validation(
                input_dir, original_path, suffix, infer_author_func
            )
            if output:
                print(output)
            if not success:
                return None, f"Error: Validation failed for {input_dir}"

    with tempfile.TemporaryDirectory() as temp_dir:
        temp_content_dir = Path(temp_dir) / "content"
        shutil.copytree(input_dir, temp_content_dir)

        for pattern in ["*.xml", "*.rels"]:
            for xml_file in temp_content_dir.rglob(pattern):
                _condense_xml(xml_file)

        output_path.parent.mkdir(parents=True, exist_ok=True)
        with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
            for f in temp_content_dir.rglob("*"):
                if f.is_file():
                    zf.write(f, f.relative_to(temp_content_dir))

    return None, f"Successfully packed {input_dir} to {output_file}"


def _run_validation(
    unpacked_dir: Path,
    original_file: Path,
    suffix: str,
    infer_author_func=None,
) -> tuple[bool, str | None]:
    output_lines = []
    validators = []

    if suffix == ".docx":
        author = "Claude"
        if infer_author_func:
            try:
                author = infer_author_func(unpacked_dir, original_file)
            except ValueError as e:
                print(f"Warning: {e} Using default author 'Claude'.", file=sys.stderr)

        validators = [
            DOCXSchemaValidator(unpacked_dir, original_file),
            RedliningValidator(unpacked_dir, original_file, author=author),
        ]
    elif suffix == ".pptx":
        validators = [PPTXSchemaValidator(unpacked_dir, original_file)]

    if not validators:
        return True, None

    total_repairs = sum(v.repair() for v in validators)
    if total_repairs:
        output_lines.append(f"Auto-repaired {total_repairs} issue(s)")

    success = all(v.validate() for v in validators)

    if success:
        output_lines.append("All validations PASSED!")

    return success, "\n".join(output_lines) if output_lines else None


def _condense_xml(xml_file: Path) -> None:
    try:
        with open(xml_file, encoding="utf-8") as f:
            dom = defusedxml.minidom.parse(f)

        for element in dom.getElementsByTagName("*"):
            if element.tagName.endswith(":t"):
                continue

            for child in list(element.childNodes):
                if (
                    child.nodeType == child.TEXT_NODE
                    and child.nodeValue
                    and child.nodeValue.strip() == ""
                ) or child.nodeType == child.COMMENT_NODE:
                    element.removeChild(child)

        xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
    except Exception as e:
        print(f"ERROR: Failed to parse {xml_file.name}: {e}", file=sys.stderr)
        raise


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Pack a directory into a DOCX, PPTX, or XLSX file"
    )
    parser.add_argument("input_directory", help="Unpacked Office document directory")
    parser.add_argument("output_file", help="Output Office file (.docx/.pptx/.xlsx)")
    parser.add_argument(
        "--original",
        help="Original file for validation comparison",
    )
    parser.add_argument(
        "--validate",
        type=lambda x: x.lower() == "true",
        default=True,
        metavar="true|false",
        help="Run validation with auto-repair (default: true)",
    )
    args = parser.parse_args()

    _, message = pack(
        args.input_directory,
        args.output_file,
        original_file=args.original,
        validate=args.validate,
    )
    print(message)

    if "Error" in message:
        sys.exit(1)


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns="http://schemas.openxmlformats.org/drawingml/2006/chart"
  xmlns:cdr="http://schemas.openxmlformats.org/drawingml/2006/chartDrawing"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="http://schemas.openxmlformats.org/drawingml/2006/chart"
  elementFormDefault="qualified" attributeFormDefault="unqualified" blockDefault="#all">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    schemaLocation="shared-relationshipReference.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main"
    schemaLocation="dml-main.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/chartDrawing"
    schemaLocation="dml-chartDrawing.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:complexType name="CT_Boolean">
    <xsd:attribute name="val" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Double">
    <xsd:attribute name="val" type="xsd:double" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_UnsignedInt">
    <xsd:attribute name="val" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RelId">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Extension">
    <xsd:sequence>
      <xsd:any processContents="lax"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="xsd:token"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ExtensionList">
    <xsd:sequence>
      <xsd:element name="ext" type="CT_Extension" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NumVal">
    <xsd:sequence>
      <xsd:element name="v" type="s:ST_Xstring" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="idx" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="formatCode" type="s:ST_Xstring" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NumData">
    <xsd:sequence>
      <xsd:element name="formatCode" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ptCount" type="CT_UnsignedInt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pt" type="CT_NumVal" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NumRef">
    <xsd:sequence>
      <xsd:element name="f" type="xsd:string" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="numCache" type="CT_NumData" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NumDataSource">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="numRef" type="CT_NumRef" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="numLit" type="CT_NumData" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_StrVal">
    <xsd:sequence>
      <xsd:element name="v" type="s:ST_Xstring" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="idx" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_StrData">
    <xsd:sequence>
      <xsd:element name="ptCount" type="CT_UnsignedInt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pt" type="CT_StrVal" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_StrRef">
    <xsd:sequence>
      <xsd:element name="f" type="xsd:string" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="strCache" type="CT_StrData" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Tx">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="strRef" type="CT_StrRef" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="rich" type="a:CT_TextBody" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TextLanguageID">
    <xsd:attribute name="val" type="s:ST_Lang" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Lvl">
    <xsd:sequence>
      <xsd:element name="pt" type="CT_StrVal" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MultiLvlStrData">
    <xsd:sequence>
      <xsd:element name="ptCount" type="CT_UnsignedInt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl" type="CT_Lvl" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MultiLvlStrRef">
    <xsd:sequence>
      <xsd:element name="f" type="xsd:string" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="multiLvlStrCache" type="CT_MultiLvlStrData" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_AxDataSource">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="multiLvlStrRef" type="CT_MultiLvlStrRef" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="numRef" type="CT_NumRef" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="numLit" type="CT_NumData" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="strRef" type="CT_StrRef" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="strLit" type="CT_StrData" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SerTx">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="strRef" type="CT_StrRef" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="v" type="s:ST_Xstring" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_LayoutTarget">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="inner"/>
      <xsd:enumeration value="outer"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LayoutTarget">
    <xsd:attribute name="val" type="ST_LayoutTarget" default="outer"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_LayoutMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="edge"/>
      <xsd:enumeration value="factor"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LayoutMode">
    <xsd:attribute name="val" type="ST_LayoutMode" default="factor"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ManualLayout">
    <xsd:sequence>
      <xsd:element name="layoutTarget" type="CT_LayoutTarget" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="xMode" type="CT_LayoutMode" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="yMode" type="CT_LayoutMode" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="wMode" type="CT_LayoutMode" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hMode" type="CT_LayoutMode" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="x" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="y" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="w" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="h" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Layout">
    <xsd:sequence>
      <xsd:element name="manualLayout" type="CT_ManualLayout" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Title">
    <xsd:sequence>
      <xsd:element name="tx" type="CT_Tx" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="layout" type="CT_Layout" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="overlay" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_RotX">
    <xsd:restriction base="xsd:byte">
      <xsd:minInclusive value="-90"/>
      <xsd:maxInclusive value="90"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_RotX">
    <xsd:attribute name="val" type="ST_RotX" default="0"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_HPercent">
    <xsd:union memberTypes="ST_HPercentWithSymbol ST_HPercentUShort"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HPercentWithSymbol">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="0*(([5-9])|([1-9][0-9])|([1-4][0-9][0-9])|500)%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HPercentUShort">
    <xsd:restriction base="xsd:unsignedShort">
      <xsd:minInclusive value="5"/>
      <xsd:maxInclusive value="500"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_HPercent">
    <xsd:attribute name="val" type="ST_HPercent" default="100%"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_RotY">
    <xsd:restriction base="xsd:unsignedShort">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="360"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_RotY">
    <xsd:attribute name="val" type="ST_RotY" default="0"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DepthPercent">
    <xsd:union memberTypes="ST_DepthPercentWithSymbol ST_DepthPercentUShort"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DepthPercentWithSymbol">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="0*(([2-9][0-9])|([1-9][0-9][0-9])|(1[0-9][0-9][0-9])|2000)%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DepthPercentUShort">
    <xsd:restriction base="xsd:unsignedShort">
      <xsd:minInclusive value="20"/>
      <xsd:maxInclusive value="2000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DepthPercent">
    <xsd:attribute name="val" type="ST_DepthPercent" default="100%"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Perspective">
    <xsd:restriction base="xsd:unsignedByte">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="240"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Perspective">
    <xsd:attribute name="val" type="ST_Perspective" default="30"/>
  </xsd:complexType>
  <xsd:complexType name="CT_View3D">
    <xsd:sequence>
      <xsd:element name="rotX" type="CT_RotX" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hPercent" type="CT_HPercent" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rotY" type="CT_RotY" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="depthPercent" type="CT_DepthPercent" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rAngAx" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="perspective" type="CT_Perspective" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Surface">
    <xsd:sequence>
      <xsd:element name="thickness" type="CT_Thickness" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pictureOptions" type="CT_PictureOptions" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_Thickness">
    <xsd:union memberTypes="ST_ThicknessPercent xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ThicknessPercent">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="([0-9]+)%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Thickness">
    <xsd:attribute name="val" type="ST_Thickness" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DTable">
    <xsd:sequence>
      <xsd:element name="showHorzBorder" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showVertBorder" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showOutline" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showKeys" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_GapAmount">
    <xsd:union memberTypes="ST_GapAmountPercent ST_GapAmountUShort"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_GapAmountPercent">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="0*(([0-9])|([1-9][0-9])|([1-4][0-9][0-9])|500)%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_GapAmountUShort">
    <xsd:restriction base="xsd:unsignedShort">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="500"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_GapAmount">
    <xsd:attribute name="val" type="ST_GapAmount" default="150%"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Overlap">
    <xsd:union memberTypes="ST_OverlapPercent ST_OverlapByte"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OverlapPercent">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="(-?0*(([0-9])|([1-9][0-9])|100))%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OverlapByte">
    <xsd:restriction base="xsd:byte">
      <xsd:minInclusive value="-100"/>
      <xsd:maxInclusive value="100"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Overlap">
    <xsd:attribute name="val" type="ST_Overlap" default="0%"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_BubbleScale">
    <xsd:union memberTypes="ST_BubbleScalePercent ST_BubbleScaleUInt"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_BubbleScalePercent">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="0*(([0-9])|([1-9][0-9])|([1-2][0-9][0-9])|300)%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_BubbleScaleUInt">
    <xsd:restriction base="xsd:unsignedInt">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="300"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_BubbleScale">
    <xsd:attribute name="val" type="ST_BubbleScale" default="100%"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SizeRepresents">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="area"/>
      <xsd:enumeration value="w"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SizeRepresents">
    <xsd:attribute name="val" type="ST_SizeRepresents" default="area"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FirstSliceAng">
    <xsd:restriction base="xsd:unsignedShort">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="360"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FirstSliceAng">
    <xsd:attribute name="val" type="ST_FirstSliceAng" default="0"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_HoleSize">
    <xsd:union memberTypes="ST_HoleSizePercent ST_HoleSizeUByte"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HoleSizePercent">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="0*([1-9]|([1-8][0-9])|90)%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HoleSizeUByte">
    <xsd:restriction base="xsd:unsignedByte">
      <xsd:minInclusive value="1"/>
      <xsd:maxInclusive value="90"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_HoleSize">
    <xsd:attribute name="val" type="ST_HoleSize" default="10%"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SplitType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="cust"/>
      <xsd:enumeration value="percent"/>
      <xsd:enumeration value="pos"/>
      <xsd:enumeration value="val"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SplitType">
    <xsd:attribute name="val" type="ST_SplitType" default="auto"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustSplit">
    <xsd:sequence>
      <xsd:element name="secondPiePt" type="CT_UnsignedInt" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_SecondPieSize">
    <xsd:union memberTypes="ST_SecondPieSizePercent ST_SecondPieSizeUShort"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_SecondPieSizePercent">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="0*(([5-9])|([1-9][0-9])|(1[0-9][0-9])|200)%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_SecondPieSizeUShort">
    <xsd:restriction base="xsd:unsignedShort">
      <xsd:minInclusive value="5"/>
      <xsd:maxInclusive value="200"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SecondPieSize">
    <xsd:attribute name="val" type="ST_SecondPieSize" default="75%"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NumFmt">
    <xsd:attribute name="formatCode" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="sourceLinked" type="xsd:boolean"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_LblAlgn">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="r"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LblAlgn">
    <xsd:attribute name="val" type="ST_LblAlgn" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DLblPos">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="bestFit"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="inBase"/>
      <xsd:enumeration value="inEnd"/>
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="outEnd"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="t"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DLblPos">
    <xsd:attribute name="val" type="ST_DLblPos" use="required"/>
  </xsd:complexType>
  <xsd:group name="EG_DLblShared">
    <xsd:sequence>
      <xsd:element name="numFmt" type="CT_NumFmt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dLblPos" type="CT_DLblPos" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showLegendKey" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showVal" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showCatName" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showSerName" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showPercent" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showBubbleSize" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="separator" type="xsd:string" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:group name="Group_DLbl">
    <xsd:sequence>
      <xsd:element name="layout" type="CT_Layout" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tx" type="CT_Tx" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_DLblShared" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_DLbl">
    <xsd:sequence>
      <xsd:element name="idx" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:choice>
        <xsd:element name="delete" type="CT_Boolean" minOccurs="1" maxOccurs="1"/>
        <xsd:group ref="Group_DLbl" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="Group_DLbls">
    <xsd:sequence>
      <xsd:group ref="EG_DLblShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="showLeaderLines" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="leaderLines" type="CT_ChartLines" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_DLbls">
    <xsd:sequence>
      <xsd:element name="dLbl" type="CT_DLbl" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:choice>
        <xsd:element name="delete" type="CT_Boolean" minOccurs="1" maxOccurs="1"/>
        <xsd:group ref="Group_DLbls" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_MarkerStyle">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="circle"/>
      <xsd:enumeration value="dash"/>
      <xsd:enumeration value="diamond"/>
      <xsd:enumeration value="dot"/>
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="picture"/>
      <xsd:enumeration value="plus"/>
      <xsd:enumeration value="square"/>
      <xsd:enumeration value="star"/>
      <xsd:enumeration value="triangle"/>
      <xsd:enumeration value="x"/>
      <xsd:enumeration value="auto"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MarkerStyle">
    <xsd:attribute name="val" type="ST_MarkerStyle" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MarkerSize">
    <xsd:restriction base="xsd:unsignedByte">
      <xsd:minInclusive value="2"/>
      <xsd:maxInclusive value="72"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MarkerSize">
    <xsd:attribute name="val" type="ST_MarkerSize" default="5"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Marker">
    <xsd:sequence>
      <xsd:element name="symbol" type="CT_MarkerStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="size" type="CT_MarkerSize" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DPt">
    <xsd:sequence>
      <xsd:element name="idx" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="invertIfNegative" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="marker" type="CT_Marker" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bubble3D" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="explosion" type="CT_UnsignedInt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pictureOptions" type="CT_PictureOptions" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TrendlineType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="exp"/>
      <xsd:enumeration value="linear"/>
      <xsd:enumeration value="log"/>
      <xsd:enumeration value="movingAvg"/>
      <xsd:enumeration value="poly"/>
      <xsd:enumeration value="power"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TrendlineType">
    <xsd:attribute name="val" type="ST_TrendlineType" default="linear"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Order">
    <xsd:restriction base="xsd:unsignedByte">
      <xsd:minInclusive value="2"/>
      <xsd:maxInclusive value="6"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Order">
    <xsd:attribute name="val" type="ST_Order" default="2"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Period">
    <xsd:restriction base="xsd:unsignedInt">
      <xsd:minInclusive value="2"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Period">
    <xsd:attribute name="val" type="ST_Period" default="2"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TrendlineLbl">
    <xsd:sequence>
      <xsd:element name="layout" type="CT_Layout" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tx" type="CT_Tx" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="numFmt" type="CT_NumFmt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Trendline">
    <xsd:sequence>
      <xsd:element name="name" type="xsd:string" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="trendlineType" type="CT_TrendlineType" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="order" type="CT_Order" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="period" type="CT_Period" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="forward" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="backward" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="intercept" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dispRSqr" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dispEq" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="trendlineLbl" type="CT_TrendlineLbl" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_ErrDir">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="x"/>
      <xsd:enumeration value="y"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ErrDir">
    <xsd:attribute name="val" type="ST_ErrDir" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ErrBarType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="both"/>
      <xsd:enumeration value="minus"/>
      <xsd:enumeration value="plus"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ErrBarType">
    <xsd:attribute name="val" type="ST_ErrBarType" default="both"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ErrValType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="cust"/>
      <xsd:enumeration value="fixedVal"/>
      <xsd:enumeration value="percentage"/>
      <xsd:enumeration value="stdDev"/>
      <xsd:enumeration value="stdErr"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ErrValType">
    <xsd:attribute name="val" type="ST_ErrValType" default="fixedVal"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ErrBars">
    <xsd:sequence>
      <xsd:element name="errDir" type="CT_ErrDir" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="errBarType" type="CT_ErrBarType" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="errValType" type="CT_ErrValType" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="noEndCap" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="plus" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="minus" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="val" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_UpDownBar">
    <xsd:sequence>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_UpDownBars">
    <xsd:sequence>
      <xsd:element name="gapWidth" type="CT_GapAmount" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="upBars" type="CT_UpDownBar" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="downBars" type="CT_UpDownBar" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_SerShared">
    <xsd:sequence>
      <xsd:element name="idx" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="order" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="tx" type="CT_SerTx" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_LineSer">
    <xsd:sequence>
      <xsd:group ref="EG_SerShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="marker" type="CT_Marker" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dPt" type="CT_DPt" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="trendline" type="CT_Trendline" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="errBars" type="CT_ErrBars" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cat" type="CT_AxDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="val" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="smooth" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ScatterSer">
    <xsd:sequence>
      <xsd:group ref="EG_SerShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="marker" type="CT_Marker" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dPt" type="CT_DPt" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="trendline" type="CT_Trendline" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="errBars" type="CT_ErrBars" minOccurs="0" maxOccurs="2"/>
      <xsd:element name="xVal" type="CT_AxDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="yVal" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="smooth" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_RadarSer">
    <xsd:sequence>
      <xsd:group ref="EG_SerShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="marker" type="CT_Marker" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dPt" type="CT_DPt" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cat" type="CT_AxDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="val" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BarSer">
    <xsd:sequence>
      <xsd:group ref="EG_SerShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="invertIfNegative" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pictureOptions" type="CT_PictureOptions" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dPt" type="CT_DPt" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="trendline" type="CT_Trendline" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="errBars" type="CT_ErrBars" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cat" type="CT_AxDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="val" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shape" type="CT_Shape" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_AreaSer">
    <xsd:sequence>
      <xsd:group ref="EG_SerShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="pictureOptions" type="CT_PictureOptions" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dPt" type="CT_DPt" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="trendline" type="CT_Trendline" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="errBars" type="CT_ErrBars" minOccurs="0" maxOccurs="2"/>
      <xsd:element name="cat" type="CT_AxDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="val" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PieSer">
    <xsd:sequence>
      <xsd:group ref="EG_SerShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="explosion" type="CT_UnsignedInt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dPt" type="CT_DPt" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cat" type="CT_AxDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="val" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BubbleSer">
    <xsd:sequence>
      <xsd:group ref="EG_SerShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="invertIfNegative" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dPt" type="CT_DPt" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="trendline" type="CT_Trendline" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="errBars" type="CT_ErrBars" minOccurs="0" maxOccurs="2"/>
      <xsd:element name="xVal" type="CT_AxDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="yVal" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bubbleSize" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bubble3D" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SurfaceSer">
    <xsd:sequence>
      <xsd:group ref="EG_SerShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cat" type="CT_AxDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="val" type="CT_NumDataSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_Grouping">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="percentStacked"/>
      <xsd:enumeration value="standard"/>
      <xsd:enumeration value="stacked"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Grouping">
    <xsd:attribute name="val" type="ST_Grouping" default="standard"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ChartLines">
    <xsd:sequence>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_LineChartShared">
    <xsd:sequence>
      <xsd:element name="grouping" type="CT_Grouping" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="varyColors" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ser" type="CT_LineSer" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dropLines" type="CT_ChartLines" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_LineChart">
    <xsd:sequence>
      <xsd:group ref="EG_LineChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="hiLowLines" type="CT_ChartLines" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="upDownBars" type="CT_UpDownBars" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="marker" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="smooth" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="2"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Line3DChart">
    <xsd:sequence>
      <xsd:group ref="EG_LineChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gapDepth" type="CT_GapAmount" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="3" maxOccurs="3"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_StockChart">
    <xsd:sequence>
      <xsd:element name="ser" type="CT_LineSer" minOccurs="3" maxOccurs="4"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dropLines" type="CT_ChartLines" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hiLowLines" type="CT_ChartLines" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="upDownBars" type="CT_UpDownBars" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="2"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_ScatterStyle">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="line"/>
      <xsd:enumeration value="lineMarker"/>
      <xsd:enumeration value="marker"/>
      <xsd:enumeration value="smooth"/>
      <xsd:enumeration value="smoothMarker"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ScatterStyle">
    <xsd:attribute name="val" type="ST_ScatterStyle" default="marker"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ScatterChart">
    <xsd:sequence>
      <xsd:element name="scatterStyle" type="CT_ScatterStyle" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="varyColors" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ser" type="CT_ScatterSer" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="2"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_RadarStyle">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="standard"/>
      <xsd:enumeration value="marker"/>
      <xsd:enumeration value="filled"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_RadarStyle">
    <xsd:attribute name="val" type="ST_RadarStyle" default="standard"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RadarChart">
    <xsd:sequence>
      <xsd:element name="radarStyle" type="CT_RadarStyle" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="varyColors" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ser" type="CT_RadarSer" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="2"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_BarGrouping">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="percentStacked"/>
      <xsd:enumeration value="clustered"/>
      <xsd:enumeration value="standard"/>
      <xsd:enumeration value="stacked"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_BarGrouping">
    <xsd:attribute name="val" type="ST_BarGrouping" default="clustered"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_BarDir">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="bar"/>
      <xsd:enumeration value="col"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_BarDir">
    <xsd:attribute name="val" type="ST_BarDir" default="col"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Shape">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="cone"/>
      <xsd:enumeration value="coneToMax"/>
      <xsd:enumeration value="box"/>
      <xsd:enumeration value="cylinder"/>
      <xsd:enumeration value="pyramid"/>
      <xsd:enumeration value="pyramidToMax"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Shape">
    <xsd:attribute name="val" type="ST_Shape" default="box"/>
  </xsd:complexType>
  <xsd:group name="EG_BarChartShared">
    <xsd:sequence>
      <xsd:element name="barDir" type="CT_BarDir" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="grouping" type="CT_BarGrouping" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="varyColors" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ser" type="CT_BarSer" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_BarChart">
    <xsd:sequence>
      <xsd:group ref="EG_BarChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gapWidth" type="CT_GapAmount" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="overlap" type="CT_Overlap" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="serLines" type="CT_ChartLines" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="2"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Bar3DChart">
    <xsd:sequence>
      <xsd:group ref="EG_BarChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gapWidth" type="CT_GapAmount" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="gapDepth" type="CT_GapAmount" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shape" type="CT_Shape" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="3"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_AreaChartShared">
    <xsd:sequence>
      <xsd:element name="grouping" type="CT_Grouping" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="varyColors" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ser" type="CT_AreaSer" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dropLines" type="CT_ChartLines" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_AreaChart">
    <xsd:sequence>
      <xsd:group ref="EG_AreaChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="2"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Area3DChart">
    <xsd:sequence>
      <xsd:group ref="EG_AreaChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gapDepth" type="CT_GapAmount" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="3"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_PieChartShared">
    <xsd:sequence>
      <xsd:element name="varyColors" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ser" type="CT_PieSer" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_PieChart">
    <xsd:sequence>
      <xsd:group ref="EG_PieChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="firstSliceAng" type="CT_FirstSliceAng" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Pie3DChart">
    <xsd:sequence>
      <xsd:group ref="EG_PieChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DoughnutChart">
    <xsd:sequence>
      <xsd:group ref="EG_PieChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="firstSliceAng" type="CT_FirstSliceAng" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="holeSize" type="CT_HoleSize" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_OfPieType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="pie"/>
      <xsd:enumeration value="bar"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_OfPieType">
    <xsd:attribute name="val" type="ST_OfPieType" default="pie"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OfPieChart">
    <xsd:sequence>
      <xsd:element name="ofPieType" type="CT_OfPieType" minOccurs="1" maxOccurs="1"/>
      <xsd:group ref="EG_PieChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gapWidth" type="CT_GapAmount" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="splitType" type="CT_SplitType" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="splitPos" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="custSplit" type="CT_CustSplit" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="secondPieSize" type="CT_SecondPieSize" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="serLines" type="CT_ChartLines" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BubbleChart">
    <xsd:sequence>
      <xsd:element name="varyColors" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ser" type="CT_BubbleSer" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dLbls" type="CT_DLbls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bubble3D" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bubbleScale" type="CT_BubbleScale" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showNegBubbles" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sizeRepresents" type="CT_SizeRepresents" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="2"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BandFmt">
    <xsd:sequence>
      <xsd:element name="idx" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BandFmts">
    <xsd:sequence>
      <xsd:element name="bandFmt" type="CT_BandFmt" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_SurfaceChartShared">
    <xsd:sequence>
      <xsd:element name="wireframe" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ser" type="CT_SurfaceSer" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="bandFmts" type="CT_BandFmts" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_SurfaceChart">
    <xsd:sequence>
      <xsd:group ref="EG_SurfaceChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="2" maxOccurs="3"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Surface3DChart">
    <xsd:sequence>
      <xsd:group ref="EG_SurfaceChartShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="3" maxOccurs="3"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_AxPos">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="t"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_AxPos">
    <xsd:attribute name="val" type="ST_AxPos" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Crosses">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="autoZero"/>
      <xsd:enumeration value="max"/>
      <xsd:enumeration value="min"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Crosses">
    <xsd:attribute name="val" type="ST_Crosses" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_CrossBetween">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="between"/>
      <xsd:enumeration value="midCat"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_CrossBetween">
    <xsd:attribute name="val" type="ST_CrossBetween" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TickMark">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="cross"/>
      <xsd:enumeration value="in"/>
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="out"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TickMark">
    <xsd:attribute name="val" type="ST_TickMark" default="cross"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TickLblPos">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="high"/>
      <xsd:enumeration value="low"/>
      <xsd:enumeration value="nextTo"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TickLblPos">
    <xsd:attribute name="val" type="ST_TickLblPos" default="nextTo"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Skip">
    <xsd:restriction base="xsd:unsignedInt">
      <xsd:minInclusive value="1"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Skip">
    <xsd:attribute name="val" type="ST_Skip" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TimeUnit">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="days"/>
      <xsd:enumeration value="months"/>
      <xsd:enumeration value="years"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TimeUnit">
    <xsd:attribute name="val" type="ST_TimeUnit" default="days"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_AxisUnit">
    <xsd:restriction base="xsd:double">
      <xsd:minExclusive value="0"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_AxisUnit">
    <xsd:attribute name="val" type="ST_AxisUnit" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_BuiltInUnit">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="hundreds"/>
      <xsd:enumeration value="thousands"/>
      <xsd:enumeration value="tenThousands"/>
      <xsd:enumeration value="hundredThousands"/>
      <xsd:enumeration value="millions"/>
      <xsd:enumeration value="tenMillions"/>
      <xsd:enumeration value="hundredMillions"/>
      <xsd:enumeration value="billions"/>
      <xsd:enumeration value="trillions"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_BuiltInUnit">
    <xsd:attribute name="val" type="ST_BuiltInUnit" default="thousands"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PictureFormat">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="stretch"/>
      <xsd:enumeration value="stack"/>
      <xsd:enumeration value="stackScale"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PictureFormat">
    <xsd:attribute name="val" type="ST_PictureFormat" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PictureStackUnit">
    <xsd:restriction base="xsd:double">
      <xsd:minExclusive value="0"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PictureStackUnit">
    <xsd:attribute name="val" type="ST_PictureStackUnit" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PictureOptions">
    <xsd:sequence>
      <xsd:element name="applyToFront" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="applyToSides" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="applyToEnd" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pictureFormat" type="CT_PictureFormat" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pictureStackUnit" type="CT_PictureStackUnit" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DispUnitsLbl">
    <xsd:sequence>
      <xsd:element name="layout" type="CT_Layout" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tx" type="CT_Tx" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DispUnits">
    <xsd:sequence>
      <xsd:choice>
        <xsd:element name="custUnit" type="CT_Double" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="builtInUnit" type="CT_BuiltInUnit" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="dispUnitsLbl" type="CT_DispUnitsLbl" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_Orientation">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="maxMin"/>
      <xsd:enumeration value="minMax"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Orientation">
    <xsd:attribute name="val" type="ST_Orientation" default="minMax"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_LogBase">
    <xsd:restriction base="xsd:double">
      <xsd:minInclusive value="2"/>
      <xsd:maxInclusive value="1000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LogBase">
    <xsd:attribute name="val" type="ST_LogBase" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Scaling">
    <xsd:sequence>
      <xsd:element name="logBase" type="CT_LogBase" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="orientation" type="CT_Orientation" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="max" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="min" type="CT_Double" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_LblOffset">
    <xsd:union memberTypes="ST_LblOffsetPercent ST_LblOffsetUShort"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_LblOffsetPercent">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="0*(([0-9])|([1-9][0-9])|([1-9][0-9][0-9])|1000)%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_LblOffsetUShort">
    <xsd:restriction base="xsd:unsignedShort">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="1000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LblOffset">
    <xsd:attribute name="val" type="ST_LblOffset" default="100%"/>
  </xsd:complexType>
  <xsd:group name="EG_AxShared">
    <xsd:sequence>
      <xsd:element name="axId" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="scaling" type="CT_Scaling" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="delete" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="axPos" type="CT_AxPos" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="majorGridlines" type="CT_ChartLines" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="minorGridlines" type="CT_ChartLines" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="title" type="CT_Title" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="numFmt" type="CT_NumFmt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="majorTickMark" type="CT_TickMark" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="minorTickMark" type="CT_TickMark" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tickLblPos" type="CT_TickLblPos" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="crossAx" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:choice minOccurs="0" maxOccurs="1">
        <xsd:element name="crosses" type="CT_Crosses" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="crossesAt" type="CT_Double" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_CatAx">
    <xsd:sequence>
      <xsd:group ref="EG_AxShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="auto" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lblAlgn" type="CT_LblAlgn" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lblOffset" type="CT_LblOffset" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tickLblSkip" type="CT_Skip" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tickMarkSkip" type="CT_Skip" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="noMultiLvlLbl" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DateAx">
    <xsd:sequence>
      <xsd:group ref="EG_AxShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="auto" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lblOffset" type="CT_LblOffset" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="baseTimeUnit" type="CT_TimeUnit" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="majorUnit" type="CT_AxisUnit" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="majorTimeUnit" type="CT_TimeUnit" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="minorUnit" type="CT_AxisUnit" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="minorTimeUnit" type="CT_TimeUnit" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SerAx">
    <xsd:sequence>
      <xsd:group ref="EG_AxShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="tickLblSkip" type="CT_Skip" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tickMarkSkip" type="CT_Skip" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ValAx">
    <xsd:sequence>
      <xsd:group ref="EG_AxShared" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="crossBetween" type="CT_CrossBetween" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="majorUnit" type="CT_AxisUnit" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="minorUnit" type="CT_AxisUnit" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dispUnits" type="CT_DispUnits" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PlotArea">
    <xsd:sequence>
      <xsd:element name="layout" type="CT_Layout" minOccurs="0" maxOccurs="1"/>
      <xsd:choice minOccurs="1" maxOccurs="unbounded">
        <xsd:element name="areaChart" type="CT_AreaChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="area3DChart" type="CT_Area3DChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="lineChart" type="CT_LineChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="line3DChart" type="CT_Line3DChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="stockChart" type="CT_StockChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="radarChart" type="CT_RadarChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="scatterChart" type="CT_ScatterChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="pieChart" type="CT_PieChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="pie3DChart" type="CT_Pie3DChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="doughnutChart" type="CT_DoughnutChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="barChart" type="CT_BarChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="bar3DChart" type="CT_Bar3DChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="ofPieChart" type="CT_OfPieChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="surfaceChart" type="CT_SurfaceChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="surface3DChart" type="CT_Surface3DChart" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="bubbleChart" type="CT_BubbleChart" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element name="valAx" type="CT_ValAx" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="catAx" type="CT_CatAx" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="dateAx" type="CT_DateAx" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="serAx" type="CT_SerAx" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="dTable" type="CT_DTable" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotFmt">
    <xsd:sequence>
      <xsd:element name="idx" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="marker" type="CT_Marker" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dLbl" type="CT_DLbl" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotFmts">
    <xsd:sequence>
      <xsd:element name="pivotFmt" type="CT_PivotFmt" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_LegendPos">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="tr"/>
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="t"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LegendPos">
    <xsd:attribute name="val" type="ST_LegendPos" default="r"/>
  </xsd:complexType>
  <xsd:group name="EG_LegendEntryData">
    <xsd:sequence>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_LegendEntry">
    <xsd:sequence>
      <xsd:element name="idx" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:choice>
        <xsd:element name="delete" type="CT_Boolean" minOccurs="1" maxOccurs="1"/>
        <xsd:group ref="EG_LegendEntryData" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Legend">
    <xsd:sequence>
      <xsd:element name="legendPos" type="CT_LegendPos" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="legendEntry" type="CT_LegendEntry" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="layout" type="CT_Layout" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="overlay" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_DispBlanksAs">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="span"/>
      <xsd:enumeration value="gap"/>
      <xsd:enumeration value="zero"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DispBlanksAs">
    <xsd:attribute name="val" type="ST_DispBlanksAs" default="zero"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Chart">
    <xsd:sequence>
      <xsd:element name="title" type="CT_Title" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="autoTitleDeleted" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pivotFmts" type="CT_PivotFmts" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="view3D" type="CT_View3D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="floor" type="CT_Surface" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sideWall" type="CT_Surface" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="backWall" type="CT_Surface" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="plotArea" type="CT_PlotArea" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="legend" type="CT_Legend" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="plotVisOnly" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dispBlanksAs" type="CT_DispBlanksAs" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showDLblsOverMax" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_Style">
    <xsd:restriction base="xsd:unsignedByte">
      <xsd:minInclusive value="1"/>
      <xsd:maxInclusive value="48"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Style">
    <xsd:attribute name="val" type="ST_Style" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotSource">
    <xsd:sequence>
      <xsd:element name="name" type="s:ST_Xstring" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="fmtId" type="CT_UnsignedInt" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Protection">
    <xsd:sequence>
      <xsd:element name="chartObject" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="data" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="formatting" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="selection" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="userInterface" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_HeaderFooter">
    <xsd:sequence>
      <xsd:element name="oddHeader" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="oddFooter" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="evenHeader" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="evenFooter" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="firstHeader" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="firstFooter" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="alignWithMargins" type="xsd:boolean" default="true"/>
    <xsd:attribute name="differentOddEven" type="xsd:boolean" default="false"/>
    <xsd:attribute name="differentFirst" type="xsd:boolean" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageMargins">
    <xsd:attribute name="l" type="xsd:double" use="required"/>
    <xsd:attribute name="r" type="xsd:double" use="required"/>
    <xsd:attribute name="t" type="xsd:double" use="required"/>
    <xsd:attribute name="b" type="xsd:double" use="required"/>
    <xsd:attribute name="header" type="xsd:double" use="required"/>
    <xsd:attribute name="footer" type="xsd:double" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PageSetupOrientation">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="default"/>
      <xsd:enumeration value="portrait"/>
      <xsd:enumeration value="landscape"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ExternalData">
    <xsd:sequence>
      <xsd:element name="autoUpdate" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageSetup">
    <xsd:attribute name="paperSize" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="paperHeight" type="s:ST_PositiveUniversalMeasure" use="optional"/>
    <xsd:attribute name="paperWidth" type="s:ST_PositiveUniversalMeasure" use="optional"/>
    <xsd:attribute name="firstPageNumber" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="orientation" type="ST_PageSetupOrientation" use="optional"
      default="default"/>
    <xsd:attribute name="blackAndWhite" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="draft" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="useFirstPageNumber" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="horizontalDpi" type="xsd:int" use="optional" default="600"/>
    <xsd:attribute name="verticalDpi" type="xsd:int" use="optional" default="600"/>
    <xsd:attribute name="copies" type="xsd:unsignedInt" use="optional" default="1"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PrintSettings">
    <xsd:sequence>
      <xsd:element name="headerFooter" type="CT_HeaderFooter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageMargins" type="CT_PageMargins" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageSetup" type="CT_PageSetup" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="legacyDrawingHF" type="CT_RelId" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ChartSpace">
    <xsd:sequence>
      <xsd:element name="date1904" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lang" type="CT_TextLanguageID" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="roundedCorners" type="CT_Boolean" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="style" type="CT_Style" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="clrMapOvr" type="a:CT_ColorMapping" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pivotSource" type="CT_PivotSource" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="protection" type="CT_Protection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="chart" type="CT_Chart" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="externalData" type="CT_ExternalData" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="printSettings" type="CT_PrintSettings" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="userShapes" type="CT_RelId" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="chartSpace" type="CT_ChartSpace"/>
  <xsd:element name="userShapes" type="cdr:CT_Drawing"/>
  <xsd:element name="chart" type="CT_RelId"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
  xmlns="http://schemas.openxmlformats.org/drawingml/2006/chartDrawing"
  targetNamespace="http://schemas.openxmlformats.org/drawingml/2006/chartDrawing"
  elementFormDefault="qualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main"
    schemaLocation="dml-main.xsd"/>
  <xsd:complexType name="CT_ShapeNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvSpPr" type="a:CT_NonVisualDrawingShapeProps" minOccurs="1" maxOccurs="1"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Shape">
    <xsd:sequence>
      <xsd:element name="nvSpPr" type="CT_ShapeNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txBody" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="macro" type="xsd:string" use="optional"/>
    <xsd:attribute name="textlink" type="xsd:string" use="optional"/>
    <xsd:attribute name="fLocksText" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="fPublished" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ConnectorNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvCxnSpPr" type="a:CT_NonVisualConnectorProperties" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Connector">
    <xsd:sequence>
      <xsd:element name="nvCxnSpPr" type="CT_ConnectorNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="macro" type="xsd:string" use="optional"/>
    <xsd:attribute name="fPublished" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PictureNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvPicPr" type="a:CT_NonVisualPictureProperties" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Picture">
    <xsd:sequence>
      <xsd:element name="nvPicPr" type="CT_PictureNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blipFill" type="a:CT_BlipFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="macro" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="fPublished" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicFrameNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGraphicFramePr" type="a:CT_NonVisualGraphicFrameProperties"
        minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicFrame">
    <xsd:sequence>
      <xsd:element name="nvGraphicFramePr" type="CT_GraphicFrameNonVisual" minOccurs="1"
        maxOccurs="1"/>
      <xsd:element name="xfrm" type="a:CT_Transform2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element ref="a:graphic" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="macro" type="xsd:string" use="optional"/>
    <xsd:attribute name="fPublished" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupShapeNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGrpSpPr" type="a:CT_NonVisualGroupDrawingShapeProps" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupShape">
    <xsd:sequence>
      <xsd:element name="nvGrpSpPr" type="CT_GroupShapeNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="grpSpPr" type="a:CT_GroupShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element name="sp" type="CT_Shape"/>
        <xsd:element name="grpSp" type="CT_GroupShape"/>
        <xsd:element name="graphicFrame" type="CT_GraphicFrame"/>
        <xsd:element name="cxnSp" type="CT_Connector"/>
        <xsd:element name="pic" type="CT_Picture"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_ObjectChoices">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="sp" type="CT_Shape"/>
        <xsd:element name="grpSp" type="CT_GroupShape"/>
        <xsd:element name="graphicFrame" type="CT_GraphicFrame"/>
        <xsd:element name="cxnSp" type="CT_Connector"/>
        <xsd:element name="pic" type="CT_Picture"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:group>
  <xsd:simpleType name="ST_MarkerCoordinate">
    <xsd:restriction base="xsd:double">
      <xsd:minInclusive value="0.0"/>
      <xsd:maxInclusive value="1.0"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Marker">
    <xsd:sequence>
      <xsd:element name="x" type="ST_MarkerCoordinate" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="y" type="ST_MarkerCoordinate" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_RelSizeAnchor">
    <xsd:sequence>
      <xsd:element name="from" type="CT_Marker"/>
      <xsd:element name="to" type="CT_Marker"/>
      <xsd:group ref="EG_ObjectChoices"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_AbsSizeAnchor">
    <xsd:sequence>
      <xsd:element name="from" type="CT_Marker"/>
      <xsd:element name="ext" type="a:CT_PositiveSize2D"/>
      <xsd:group ref="EG_ObjectChoices"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_Anchor">
    <xsd:choice>
      <xsd:element name="relSizeAnchor" type="CT_RelSizeAnchor"/>
      <xsd:element name="absSizeAnchor" type="CT_AbsSizeAnchor"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_Drawing">
    <xsd:sequence>
      <xsd:group ref="EG_Anchor" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/drawingml/2006/diagram"
  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="http://schemas.openxmlformats.org/drawingml/2006/diagram"
  elementFormDefault="qualified" attributeFormDefault="unqualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    schemaLocation="shared-relationshipReference.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main"
    schemaLocation="dml-main.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:complexType name="CT_CTName">
    <xsd:attribute name="lang" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="val" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CTDescription">
    <xsd:attribute name="lang" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="val" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CTCategory">
    <xsd:attribute name="type" type="xsd:anyURI" use="required"/>
    <xsd:attribute name="pri" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CTCategories">
    <xsd:sequence minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="cat" type="CT_CTCategory" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_ClrAppMethod">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="span"/>
      <xsd:enumeration value="cycle"/>
      <xsd:enumeration value="repeat"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HueDir">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="cw"/>
      <xsd:enumeration value="ccw"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Colors">
    <xsd:sequence>
      <xsd:group ref="a:EG_ColorChoice" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="meth" type="ST_ClrAppMethod" use="optional" default="span"/>
    <xsd:attribute name="hueDir" type="ST_HueDir" use="optional" default="cw"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CTStyleLabel">
    <xsd:sequence>
      <xsd:element name="fillClrLst" type="CT_Colors" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="linClrLst" type="CT_Colors" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="effectClrLst" type="CT_Colors" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txLinClrLst" type="CT_Colors" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txFillClrLst" type="CT_Colors" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txEffectClrLst" type="CT_Colors" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorTransform">
    <xsd:sequence>
      <xsd:element name="title" type="CT_CTName" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="desc" type="CT_CTDescription" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="catLst" type="CT_CTCategories" minOccurs="0"/>
      <xsd:element name="styleLbl" type="CT_CTStyleLabel" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="uniqueId" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="minVer" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:element name="colorsDef" type="CT_ColorTransform"/>
  <xsd:complexType name="CT_ColorTransformHeader">
    <xsd:sequence>
      <xsd:element name="title" type="CT_CTName" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="desc" type="CT_CTDescription" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="catLst" type="CT_CTCategories" minOccurs="0"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="uniqueId" type="xsd:string" use="required"/>
    <xsd:attribute name="minVer" type="xsd:string" use="optional"/>
    <xsd:attribute name="resId" type="xsd:int" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:element name="colorsDefHdr" type="CT_ColorTransformHeader"/>
  <xsd:complexType name="CT_ColorTransformHeaderLst">
    <xsd:sequence>
      <xsd:element name="colorsDefHdr" type="CT_ColorTransformHeader" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="colorsDefHdrLst" type="CT_ColorTransformHeaderLst"/>
  <xsd:simpleType name="ST_PtType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="node"/>
      <xsd:enumeration value="asst"/>
      <xsd:enumeration value="doc"/>
      <xsd:enumeration value="pres"/>
      <xsd:enumeration value="parTrans"/>
      <xsd:enumeration value="sibTrans"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Pt">
    <xsd:sequence>
      <xsd:element name="prSet" type="CT_ElemPropSet" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="t" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="modelId" type="ST_ModelId" use="required"/>
    <xsd:attribute name="type" type="ST_PtType" use="optional" default="node"/>
    <xsd:attribute name="cxnId" type="ST_ModelId" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PtList">
    <xsd:sequence>
      <xsd:element name="pt" type="CT_Pt" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_CxnType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="parOf"/>
      <xsd:enumeration value="presOf"/>
      <xsd:enumeration value="presParOf"/>
      <xsd:enumeration value="unknownRelationship"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Cxn">
    <xsd:sequence>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="modelId" type="ST_ModelId" use="required"/>
    <xsd:attribute name="type" type="ST_CxnType" use="optional" default="parOf"/>
    <xsd:attribute name="srcId" type="ST_ModelId" use="required"/>
    <xsd:attribute name="destId" type="ST_ModelId" use="required"/>
    <xsd:attribute name="srcOrd" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="destOrd" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="parTransId" type="ST_ModelId" use="optional" default="0"/>
    <xsd:attribute name="sibTransId" type="ST_ModelId" use="optional" default="0"/>
    <xsd:attribute name="presId" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_CxnList">
    <xsd:sequence>
      <xsd:element name="cxn" type="CT_Cxn" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DataModel">
    <xsd:sequence>
      <xsd:element name="ptLst" type="CT_PtList"/>
      <xsd:element name="cxnLst" type="CT_CxnList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bg" type="a:CT_BackgroundFormatting" minOccurs="0"/>
      <xsd:element name="whole" type="a:CT_WholeE2oFormatting" minOccurs="0"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="dataModel" type="CT_DataModel"/>
  <xsd:attributeGroup name="AG_IteratorAttributes">
    <xsd:attribute name="axis" type="ST_AxisTypes" use="optional" default="none"/>
    <xsd:attribute name="ptType" type="ST_ElementTypes" use="optional" default="all"/>
    <xsd:attribute name="hideLastTrans" type="ST_Booleans" use="optional" default="true"/>
    <xsd:attribute name="st" type="ST_Ints" use="optional" default="1"/>
    <xsd:attribute name="cnt" type="ST_UnsignedInts" use="optional" default="0"/>
    <xsd:attribute name="step" type="ST_Ints" use="optional" default="1"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_ConstraintAttributes">
    <xsd:attribute name="type" type="ST_ConstraintType" use="required"/>
    <xsd:attribute name="for" type="ST_ConstraintRelationship" use="optional" default="self"/>
    <xsd:attribute name="forName" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="ptType" type="ST_ElementType" use="optional" default="all"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_ConstraintRefAttributes">
    <xsd:attribute name="refType" type="ST_ConstraintType" use="optional" default="none"/>
    <xsd:attribute name="refFor" type="ST_ConstraintRelationship" use="optional" default="self"/>
    <xsd:attribute name="refForName" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="refPtType" type="ST_ElementType" use="optional" default="all"/>
  </xsd:attributeGroup>
  <xsd:complexType name="CT_Constraint">
    <xsd:sequence>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_ConstraintAttributes"/>
    <xsd:attributeGroup ref="AG_ConstraintRefAttributes"/>
    <xsd:attribute name="op" type="ST_BoolOperator" use="optional" default="none"/>
    <xsd:attribute name="val" type="xsd:double" use="optional" default="0"/>
    <xsd:attribute name="fact" type="xsd:double" use="optional" default="1"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Constraints">
    <xsd:sequence>
      <xsd:element name="constr" type="CT_Constraint" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NumericRule">
    <xsd:sequence>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_ConstraintAttributes"/>
    <xsd:attribute name="val" type="xsd:double" use="optional" default="NaN"/>
    <xsd:attribute name="fact" type="xsd:double" use="optional" default="NaN"/>
    <xsd:attribute name="max" type="xsd:double" use="optional" default="NaN"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Rules">
    <xsd:sequence>
      <xsd:element name="rule" type="CT_NumericRule" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PresentationOf">
    <xsd:sequence>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_IteratorAttributes"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_LayoutShapeType" final="restriction">
    <xsd:union memberTypes="a:ST_ShapeType ST_OutputShapeType"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Index1">
    <xsd:restriction base="xsd:unsignedInt">
      <xsd:minInclusive value="1"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Adj">
    <xsd:attribute name="idx" type="ST_Index1" use="required"/>
    <xsd:attribute name="val" type="xsd:double" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AdjLst">
    <xsd:sequence>
      <xsd:element name="adj" type="CT_Adj" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Shape">
    <xsd:sequence>
      <xsd:element name="adjLst" type="CT_AdjLst" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="rot" type="xsd:double" use="optional" default="0"/>
    <xsd:attribute name="type" type="ST_LayoutShapeType" use="optional" default="none"/>
    <xsd:attribute ref="r:blip" use="optional"/>
    <xsd:attribute name="zOrderOff" type="xsd:int" use="optional" default="0"/>
    <xsd:attribute name="hideGeom" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="lkTxEntry" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="blipPhldr" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Parameter">
    <xsd:attribute name="type" type="ST_ParameterId" use="required"/>
    <xsd:attribute name="val" type="ST_ParameterVal" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Algorithm">
    <xsd:sequence>
      <xsd:element name="param" type="CT_Parameter" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_AlgorithmType" use="required"/>
    <xsd:attribute name="rev" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LayoutNode">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="alg" type="CT_Algorithm" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shape" type="CT_Shape" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="presOf" type="CT_PresentationOf" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="constrLst" type="CT_Constraints" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ruleLst" type="CT_Rules" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="varLst" type="CT_LayoutVariablePropertySet" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="forEach" type="CT_ForEach"/>
      <xsd:element name="layoutNode" type="CT_LayoutNode"/>
      <xsd:element name="choose" type="CT_Choose"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="styleLbl" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="chOrder" type="ST_ChildOrderType" use="optional" default="b"/>
    <xsd:attribute name="moveWith" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_ForEach">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="alg" type="CT_Algorithm" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shape" type="CT_Shape" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="presOf" type="CT_PresentationOf" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="constrLst" type="CT_Constraints" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ruleLst" type="CT_Rules" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="forEach" type="CT_ForEach"/>
      <xsd:element name="layoutNode" type="CT_LayoutNode"/>
      <xsd:element name="choose" type="CT_Choose"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="ref" type="xsd:string" use="optional" default=""/>
    <xsd:attributeGroup ref="AG_IteratorAttributes"/>
  </xsd:complexType>
  <xsd:complexType name="CT_When">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="alg" type="CT_Algorithm" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shape" type="CT_Shape" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="presOf" type="CT_PresentationOf" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="constrLst" type="CT_Constraints" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ruleLst" type="CT_Rules" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="forEach" type="CT_ForEach"/>
      <xsd:element name="layoutNode" type="CT_LayoutNode"/>
      <xsd:element name="choose" type="CT_Choose"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
    <xsd:attributeGroup ref="AG_IteratorAttributes"/>
    <xsd:attribute name="func" type="ST_FunctionType" use="required"/>
    <xsd:attribute name="arg" type="ST_FunctionArgument" use="optional" default="none"/>
    <xsd:attribute name="op" type="ST_FunctionOperator" use="required"/>
    <xsd:attribute name="val" type="ST_FunctionValue" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Otherwise">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="alg" type="CT_Algorithm" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shape" type="CT_Shape" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="presOf" type="CT_PresentationOf" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="constrLst" type="CT_Constraints" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ruleLst" type="CT_Rules" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="forEach" type="CT_ForEach"/>
      <xsd:element name="layoutNode" type="CT_LayoutNode"/>
      <xsd:element name="choose" type="CT_Choose"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_Choose">
    <xsd:sequence>
      <xsd:element name="if" type="CT_When" maxOccurs="unbounded"/>
      <xsd:element name="else" type="CT_Otherwise" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_SampleData">
    <xsd:sequence>
      <xsd:element name="dataModel" type="CT_DataModel" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="useDef" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Category">
    <xsd:attribute name="type" type="xsd:anyURI" use="required"/>
    <xsd:attribute name="pri" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Categories">
    <xsd:sequence>
      <xsd:element name="cat" type="CT_Category" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Name">
    <xsd:attribute name="lang" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="val" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Description">
    <xsd:attribute name="lang" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="val" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DiagramDefinition">
    <xsd:sequence>
      <xsd:element name="title" type="CT_Name" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="desc" type="CT_Description" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="catLst" type="CT_Categories" minOccurs="0"/>
      <xsd:element name="sampData" type="CT_SampleData" minOccurs="0"/>
      <xsd:element name="styleData" type="CT_SampleData" minOccurs="0"/>
      <xsd:element name="clrData" type="CT_SampleData" minOccurs="0"/>
      <xsd:element name="layoutNode" type="CT_LayoutNode"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="uniqueId" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="minVer" type="xsd:string" use="optional"/>
    <xsd:attribute name="defStyle" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:element name="layoutDef" type="CT_DiagramDefinition"/>
  <xsd:complexType name="CT_DiagramDefinitionHeader">
    <xsd:sequence>
      <xsd:element name="title" type="CT_Name" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="desc" type="CT_Description" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="catLst" type="CT_Categories" minOccurs="0"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="uniqueId" type="xsd:string" use="required"/>
    <xsd:attribute name="minVer" type="xsd:string" use="optional"/>
    <xsd:attribute name="defStyle" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="resId" type="xsd:int" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:element name="layoutDefHdr" type="CT_DiagramDefinitionHeader"/>
  <xsd:complexType name="CT_DiagramDefinitionHeaderLst">
    <xsd:sequence>
      <xsd:element name="layoutDefHdr" type="CT_DiagramDefinitionHeader" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="layoutDefHdrLst" type="CT_DiagramDefinitionHeaderLst"/>
  <xsd:complexType name="CT_RelIds">
    <xsd:attribute ref="r:dm" use="required"/>
    <xsd:attribute ref="r:lo" use="required"/>
    <xsd:attribute ref="r:qs" use="required"/>
    <xsd:attribute ref="r:cs" use="required"/>
  </xsd:complexType>
  <xsd:element name="relIds" type="CT_RelIds"/>
  <xsd:simpleType name="ST_ParameterVal">
    <xsd:union
      memberTypes="ST_DiagramHorizontalAlignment ST_VerticalAlignment ST_ChildDirection ST_ChildAlignment ST_SecondaryChildAlignment ST_LinearDirection ST_SecondaryLinearDirection ST_StartingElement ST_BendPoint ST_ConnectorRouting ST_ArrowheadStyle ST_ConnectorDimension ST_RotationPath ST_CenterShapeMapping ST_NodeHorizontalAlignment ST_NodeVerticalAlignment ST_FallbackDimension ST_TextDirection ST_PyramidAccentPosition ST_PyramidAccentTextMargin ST_TextBlockDirection ST_TextAnchorHorizontal ST_TextAnchorVertical ST_DiagramTextAlignment ST_AutoTextRotation ST_GrowDirection ST_FlowDirection ST_ContinueDirection ST_Breakpoint ST_Offset ST_HierarchyAlignment xsd:int xsd:double xsd:boolean xsd:string ST_ConnectorPoint"
    />
  </xsd:simpleType>
  <xsd:simpleType name="ST_ModelId">
    <xsd:union memberTypes="xsd:int s:ST_Guid"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PrSetCustVal">
    <xsd:union memberTypes="s:ST_Percentage xsd:int"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_ElemPropSet">
    <xsd:sequence>
      <xsd:element name="presLayoutVars" type="CT_LayoutVariablePropertySet" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="presAssocID" type="ST_ModelId" use="optional"/>
    <xsd:attribute name="presName" type="xsd:string" use="optional"/>
    <xsd:attribute name="presStyleLbl" type="xsd:string" use="optional"/>
    <xsd:attribute name="presStyleIdx" type="xsd:int" use="optional"/>
    <xsd:attribute name="presStyleCnt" type="xsd:int" use="optional"/>
    <xsd:attribute name="loTypeId" type="xsd:string" use="optional"/>
    <xsd:attribute name="loCatId" type="xsd:string" use="optional"/>
    <xsd:attribute name="qsTypeId" type="xsd:string" use="optional"/>
    <xsd:attribute name="qsCatId" type="xsd:string" use="optional"/>
    <xsd:attribute name="csTypeId" type="xsd:string" use="optional"/>
    <xsd:attribute name="csCatId" type="xsd:string" use="optional"/>
    <xsd:attribute name="coherent3DOff" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="phldrT" type="xsd:string" use="optional"/>
    <xsd:attribute name="phldr" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="custAng" type="xsd:int" use="optional"/>
    <xsd:attribute name="custFlipVert" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="custFlipHor" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="custSzX" type="xsd:int" use="optional"/>
    <xsd:attribute name="custSzY" type="xsd:int" use="optional"/>
    <xsd:attribute name="custScaleX" type="ST_PrSetCustVal" use="optional"/>
    <xsd:attribute name="custScaleY" type="ST_PrSetCustVal" use="optional"/>
    <xsd:attribute name="custT" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="custLinFactX" type="ST_PrSetCustVal" use="optional"/>
    <xsd:attribute name="custLinFactY" type="ST_PrSetCustVal" use="optional"/>
    <xsd:attribute name="custLinFactNeighborX" type="ST_PrSetCustVal" use="optional"/>
    <xsd:attribute name="custLinFactNeighborY" type="ST_PrSetCustVal" use="optional"/>
    <xsd:attribute name="custRadScaleRad" type="ST_PrSetCustVal" use="optional"/>
    <xsd:attribute name="custRadScaleInc" type="ST_PrSetCustVal" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Direction" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="norm"/>
      <xsd:enumeration value="rev"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HierBranchStyle" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="hang"/>
      <xsd:enumeration value="std"/>
      <xsd:enumeration value="init"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AnimOneStr" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="one"/>
      <xsd:enumeration value="branch"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AnimLvlStr" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="lvl"/>
      <xsd:enumeration value="ctr"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_OrgChart">
    <xsd:attribute name="val" type="xsd:boolean" default="false" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_NodeCount">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="-1"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ChildMax">
    <xsd:attribute name="val" type="ST_NodeCount" default="-1" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ChildPref">
    <xsd:attribute name="val" type="ST_NodeCount" default="-1" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_BulletEnabled">
    <xsd:attribute name="val" type="xsd:boolean" default="false" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Direction">
    <xsd:attribute name="val" type="ST_Direction" default="norm" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_HierBranchStyle">
    <xsd:attribute name="val" type="ST_HierBranchStyle" default="std" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AnimOne">
    <xsd:attribute name="val" type="ST_AnimOneStr" default="one" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AnimLvl">
    <xsd:attribute name="val" type="ST_AnimLvlStr" default="none" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ResizeHandlesStr" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="exact"/>
      <xsd:enumeration value="rel"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ResizeHandles">
    <xsd:attribute name="val" type="ST_ResizeHandlesStr" default="rel" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LayoutVariablePropertySet">
    <xsd:sequence>
      <xsd:element name="orgChart" type="CT_OrgChart" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="chMax" type="CT_ChildMax" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="chPref" type="CT_ChildPref" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bulletEnabled" type="CT_BulletEnabled" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dir" type="CT_Direction" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hierBranch" type="CT_HierBranchStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="animOne" type="CT_AnimOne" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="animLvl" type="CT_AnimLvl" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="resizeHandles" type="CT_ResizeHandles" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SDName">
    <xsd:attribute name="lang" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="val" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SDDescription">
    <xsd:attribute name="lang" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="val" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SDCategory">
    <xsd:attribute name="type" type="xsd:anyURI" use="required"/>
    <xsd:attribute name="pri" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SDCategories">
    <xsd:sequence minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="cat" type="CT_SDCategory" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TextProps">
    <xsd:sequence>
      <xsd:group ref="a:EG_Text3D" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_StyleLabel">
    <xsd:sequence>
      <xsd:element name="scene3d" type="a:CT_Scene3D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sp3d" type="a:CT_Shape3D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txPr" type="CT_TextProps" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_StyleDefinition">
    <xsd:sequence>
      <xsd:element name="title" type="CT_SDName" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="desc" type="CT_SDDescription" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="catLst" type="CT_SDCategories" minOccurs="0"/>
      <xsd:element name="scene3d" type="a:CT_Scene3D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="styleLbl" type="CT_StyleLabel" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="uniqueId" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="minVer" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:element name="styleDef" type="CT_StyleDefinition"/>
  <xsd:complexType name="CT_StyleDefinitionHeader">
    <xsd:sequence>
      <xsd:element name="title" type="CT_SDName" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="desc" type="CT_SDDescription" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="catLst" type="CT_SDCategories" minOccurs="0"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="uniqueId" type="xsd:string" use="required"/>
    <xsd:attribute name="minVer" type="xsd:string" use="optional"/>
    <xsd:attribute name="resId" type="xsd:int" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:element name="styleDefHdr" type="CT_StyleDefinitionHeader"/>
  <xsd:complexType name="CT_StyleDefinitionHeaderLst">
    <xsd:sequence>
      <xsd:element name="styleDefHdr" type="CT_StyleDefinitionHeader" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="styleDefHdrLst" type="CT_StyleDefinitionHeaderLst"/>
  <xsd:simpleType name="ST_AlgorithmType" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="composite"/>
      <xsd:enumeration value="conn"/>
      <xsd:enumeration value="cycle"/>
      <xsd:enumeration value="hierChild"/>
      <xsd:enumeration value="hierRoot"/>
      <xsd:enumeration value="pyra"/>
      <xsd:enumeration value="lin"/>
      <xsd:enumeration value="sp"/>
      <xsd:enumeration value="tx"/>
      <xsd:enumeration value="snake"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AxisType" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="self"/>
      <xsd:enumeration value="ch"/>
      <xsd:enumeration value="des"/>
      <xsd:enumeration value="desOrSelf"/>
      <xsd:enumeration value="par"/>
      <xsd:enumeration value="ancst"/>
      <xsd:enumeration value="ancstOrSelf"/>
      <xsd:enumeration value="followSib"/>
      <xsd:enumeration value="precedSib"/>
      <xsd:enumeration value="follow"/>
      <xsd:enumeration value="preced"/>
      <xsd:enumeration value="root"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AxisTypes">
    <xsd:list itemType="ST_AxisType"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_BoolOperator" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="equ"/>
      <xsd:enumeration value="gte"/>
      <xsd:enumeration value="lte"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ChildOrderType" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="t"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ConstraintType" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="alignOff"/>
      <xsd:enumeration value="begMarg"/>
      <xsd:enumeration value="bendDist"/>
      <xsd:enumeration value="begPad"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="bMarg"/>
      <xsd:enumeration value="bOff"/>
      <xsd:enumeration value="ctrX"/>
      <xsd:enumeration value="ctrXOff"/>
      <xsd:enumeration value="ctrY"/>
      <xsd:enumeration value="ctrYOff"/>
      <xsd:enumeration value="connDist"/>
      <xsd:enumeration value="diam"/>
      <xsd:enumeration value="endMarg"/>
      <xsd:enumeration value="endPad"/>
      <xsd:enumeration value="h"/>
      <xsd:enumeration value="hArH"/>
      <xsd:enumeration value="hOff"/>
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="lMarg"/>
      <xsd:enumeration value="lOff"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="rMarg"/>
      <xsd:enumeration value="rOff"/>
      <xsd:enumeration value="primFontSz"/>
      <xsd:enumeration value="pyraAcctRatio"/>
      <xsd:enumeration value="secFontSz"/>
      <xsd:enumeration value="sibSp"/>
      <xsd:enumeration value="secSibSp"/>
      <xsd:enumeration value="sp"/>
      <xsd:enumeration value="stemThick"/>
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="tMarg"/>
      <xsd:enumeration value="tOff"/>
      <xsd:enumeration value="userA"/>
      <xsd:enumeration value="userB"/>
      <xsd:enumeration value="userC"/>
      <xsd:enumeration value="userD"/>
      <xsd:enumeration value="userE"/>
      <xsd:enumeration value="userF"/>
      <xsd:enumeration value="userG"/>
      <xsd:enumeration value="userH"/>
      <xsd:enumeration value="userI"/>
      <xsd:enumeration value="userJ"/>
      <xsd:enumeration value="userK"/>
      <xsd:enumeration value="userL"/>
      <xsd:enumeration value="userM"/>
      <xsd:enumeration value="userN"/>
      <xsd:enumeration value="userO"/>
      <xsd:enumeration value="userP"/>
      <xsd:enumeration value="userQ"/>
      <xsd:enumeration value="userR"/>
      <xsd:enumeration value="userS"/>
      <xsd:enumeration value="userT"/>
      <xsd:enumeration value="userU"/>
      <xsd:enumeration value="userV"/>
      <xsd:enumeration value="userW"/>
      <xsd:enumeration value="userX"/>
      <xsd:enumeration value="userY"/>
      <xsd:enumeration value="userZ"/>
      <xsd:enumeration value="w"/>
      <xsd:enumeration value="wArH"/>
      <xsd:enumeration value="wOff"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ConstraintRelationship" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="self"/>
      <xsd:enumeration value="ch"/>
      <xsd:enumeration value="des"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ElementType" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="all"/>
      <xsd:enumeration value="doc"/>
      <xsd:enumeration value="node"/>
      <xsd:enumeration value="norm"/>
      <xsd:enumeration value="nonNorm"/>
      <xsd:enumeration value="asst"/>
      <xsd:enumeration value="nonAsst"/>
      <xsd:enumeration value="parTrans"/>
      <xsd:enumeration value="pres"/>
      <xsd:enumeration value="sibTrans"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ElementTypes">
    <xsd:list itemType="ST_ElementType"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ParameterId" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="horzAlign"/>
      <xsd:enumeration value="vertAlign"/>
      <xsd:enumeration value="chDir"/>
      <xsd:enumeration value="chAlign"/>
      <xsd:enumeration value="secChAlign"/>
      <xsd:enumeration value="linDir"/>
      <xsd:enumeration value="secLinDir"/>
      <xsd:enumeration value="stElem"/>
      <xsd:enumeration value="bendPt"/>
      <xsd:enumeration value="connRout"/>
      <xsd:enumeration value="begSty"/>
      <xsd:enumeration value="endSty"/>
      <xsd:enumeration value="dim"/>
      <xsd:enumeration value="rotPath"/>
      <xsd:enumeration value="ctrShpMap"/>
      <xsd:enumeration value="nodeHorzAlign"/>
      <xsd:enumeration value="nodeVertAlign"/>
      <xsd:enumeration value="fallback"/>
      <xsd:enumeration value="txDir"/>
      <xsd:enumeration value="pyraAcctPos"/>
      <xsd:enumeration value="pyraAcctTxMar"/>
      <xsd:enumeration value="txBlDir"/>
      <xsd:enumeration value="txAnchorHorz"/>
      <xsd:enumeration value="txAnchorVert"/>
      <xsd:enumeration value="txAnchorHorzCh"/>
      <xsd:enumeration value="txAnchorVertCh"/>
      <xsd:enumeration value="parTxLTRAlign"/>
      <xsd:enumeration value="parTxRTLAlign"/>
      <xsd:enumeration value="shpTxLTRAlignCh"/>
      <xsd:enumeration value="shpTxRTLAlignCh"/>
      <xsd:enumeration value="autoTxRot"/>
      <xsd:enumeration value="grDir"/>
      <xsd:enumeration value="flowDir"/>
      <xsd:enumeration value="contDir"/>
      <xsd:enumeration value="bkpt"/>
      <xsd:enumeration value="off"/>
      <xsd:enumeration value="hierAlign"/>
      <xsd:enumeration value="bkPtFixedVal"/>
      <xsd:enumeration value="stBulletLvl"/>
      <xsd:enumeration value="stAng"/>
      <xsd:enumeration value="spanAng"/>
      <xsd:enumeration value="ar"/>
      <xsd:enumeration value="lnSpPar"/>
      <xsd:enumeration value="lnSpAfParP"/>
      <xsd:enumeration value="lnSpCh"/>
      <xsd:enumeration value="lnSpAfChP"/>
      <xsd:enumeration value="rtShortDist"/>
      <xsd:enumeration value="alignTx"/>
      <xsd:enumeration value="pyraLvlNode"/>
      <xsd:enumeration value="pyraAcctBkgdNode"/>
      <xsd:enumeration value="pyraAcctTxNode"/>
      <xsd:enumeration value="srcNode"/>
      <xsd:enumeration value="dstNode"/>
      <xsd:enumeration value="begPts"/>
      <xsd:enumeration value="endPts"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Ints">
    <xsd:list itemType="xsd:int"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_UnsignedInts">
    <xsd:list itemType="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Booleans">
    <xsd:list itemType="xsd:boolean"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FunctionType" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="cnt"/>
      <xsd:enumeration value="pos"/>
      <xsd:enumeration value="revPos"/>
      <xsd:enumeration value="posEven"/>
      <xsd:enumeration value="posOdd"/>
      <xsd:enumeration value="var"/>
      <xsd:enumeration value="depth"/>
      <xsd:enumeration value="maxDepth"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FunctionOperator" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="equ"/>
      <xsd:enumeration value="neq"/>
      <xsd:enumeration value="gt"/>
      <xsd:enumeration value="lt"/>
      <xsd:enumeration value="gte"/>
      <xsd:enumeration value="lte"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DiagramHorizontalAlignment" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_VerticalAlignment" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="mid"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ChildDirection" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="horz"/>
      <xsd:enumeration value="vert"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ChildAlignment" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="r"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_SecondaryChildAlignment" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="r"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_LinearDirection" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="fromL"/>
      <xsd:enumeration value="fromR"/>
      <xsd:enumeration value="fromT"/>
      <xsd:enumeration value="fromB"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_SecondaryLinearDirection" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="fromL"/>
      <xsd:enumeration value="fromR"/>
      <xsd:enumeration value="fromT"/>
      <xsd:enumeration value="fromB"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_StartingElement" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="node"/>
      <xsd:enumeration value="trans"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_RotationPath" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="alongPath"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CenterShapeMapping" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="fNode"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_BendPoint" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="beg"/>
      <xsd:enumeration value="def"/>
      <xsd:enumeration value="end"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ConnectorRouting" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="stra"/>
      <xsd:enumeration value="bend"/>
      <xsd:enumeration value="curve"/>
      <xsd:enumeration value="longCurve"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ArrowheadStyle" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="arr"/>
      <xsd:enumeration value="noArr"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ConnectorDimension" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="1D"/>
      <xsd:enumeration value="2D"/>
      <xsd:enumeration value="cust"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ConnectorPoint" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="bCtr"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="midL"/>
      <xsd:enumeration value="midR"/>
      <xsd:enumeration value="tCtr"/>
      <xsd:enumeration value="bL"/>
      <xsd:enumeration value="bR"/>
      <xsd:enumeration value="tL"/>
      <xsd:enumeration value="tR"/>
      <xsd:enumeration value="radial"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_NodeHorizontalAlignment" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="r"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_NodeVerticalAlignment" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="mid"/>
      <xsd:enumeration value="b"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FallbackDimension" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="1D"/>
      <xsd:enumeration value="2D"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextDirection" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="fromT"/>
      <xsd:enumeration value="fromB"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PyramidAccentPosition" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="bef"/>
      <xsd:enumeration value="aft"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PyramidAccentTextMargin" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="step"/>
      <xsd:enumeration value="stack"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextBlockDirection" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="horz"/>
      <xsd:enumeration value="vert"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextAnchorHorizontal" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="ctr"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextAnchorVertical" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="mid"/>
      <xsd:enumeration value="b"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DiagramTextAlignment" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="r"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AutoTextRotation" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="upr"/>
      <xsd:enumeration value="grav"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_GrowDirection" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="tL"/>
      <xsd:enumeration value="tR"/>
      <xsd:enumeration value="bL"/>
      <xsd:enumeration value="bR"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FlowDirection" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="row"/>
      <xsd:enumeration value="col"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ContinueDirection" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="revDir"/>
      <xsd:enumeration value="sameDir"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Breakpoint" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="endCnv"/>
      <xsd:enumeration value="bal"/>
      <xsd:enumeration value="fixed"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Offset" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="off"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HierarchyAlignment" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="tL"/>
      <xsd:enumeration value="tR"/>
      <xsd:enumeration value="tCtrCh"/>
      <xsd:enumeration value="tCtrDes"/>
      <xsd:enumeration value="bL"/>
      <xsd:enumeration value="bR"/>
      <xsd:enumeration value="bCtrCh"/>
      <xsd:enumeration value="bCtrDes"/>
      <xsd:enumeration value="lT"/>
      <xsd:enumeration value="lB"/>
      <xsd:enumeration value="lCtrCh"/>
      <xsd:enumeration value="lCtrDes"/>
      <xsd:enumeration value="rT"/>
      <xsd:enumeration value="rB"/>
      <xsd:enumeration value="rCtrCh"/>
      <xsd:enumeration value="rCtrDes"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FunctionValue" final="restriction">
    <xsd:union
      memberTypes="xsd:int xsd:boolean ST_Direction ST_HierBranchStyle ST_AnimOneStr ST_AnimLvlStr ST_ResizeHandlesStr"
    />
  </xsd:simpleType>
  <xsd:simpleType name="ST_VariableType" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="orgChart"/>
      <xsd:enumeration value="chMax"/>
      <xsd:enumeration value="chPref"/>
      <xsd:enumeration value="bulEnabled"/>
      <xsd:enumeration value="dir"/>
      <xsd:enumeration value="hierBranch"/>
      <xsd:enumeration value="animOne"/>
      <xsd:enumeration value="animLvl"/>
      <xsd:enumeration value="resizeHandles"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FunctionArgument" final="restriction">
    <xsd:union memberTypes="ST_VariableType"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OutputShapeType" final="restriction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="conn"/>
    </xsd:restriction>
  </xsd:simpleType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/drawingml/2006/lockedCanvas"
  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  elementFormDefault="qualified"
  targetNamespace="http://schemas.openxmlformats.org/drawingml/2006/lockedCanvas">
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main"
    schemaLocation="dml-main.xsd"/>
  <xsd:element name="lockedCanvas" type="a:CT_GvmlGroupShape"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  xmlns="http://schemas.openxmlformats.org/drawingml/2006/main"
  targetNamespace="http://schemas.openxmlformats.org/drawingml/2006/main"
  elementFormDefault="qualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    schemaLocation="shared-relationshipReference.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/diagram"
    schemaLocation="dml-diagram.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/chart"
    schemaLocation="dml-chart.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/picture"
    schemaLocation="dml-picture.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/lockedCanvas"
    schemaLocation="dml-lockedCanvas.xsd"/>
  <xsd:complexType name="CT_AudioFile">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:link" use="required"/>
    <xsd:attribute name="contentType" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_VideoFile">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:link" use="required"/>
    <xsd:attribute name="contentType" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_QuickTimeFile">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:link" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AudioCDTime">
    <xsd:attribute name="track" type="xsd:unsignedByte" use="required"/>
    <xsd:attribute name="time" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AudioCD">
    <xsd:sequence>
      <xsd:element name="st" type="CT_AudioCDTime" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="end" type="CT_AudioCDTime" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_Media">
    <xsd:choice>
      <xsd:element name="audioCd" type="CT_AudioCD"/>
      <xsd:element name="wavAudioFile" type="CT_EmbeddedWAVAudioFile"/>
      <xsd:element name="audioFile" type="CT_AudioFile"/>
      <xsd:element name="videoFile" type="CT_VideoFile"/>
      <xsd:element name="quickTimeFile" type="CT_QuickTimeFile"/>
    </xsd:choice>
  </xsd:group>
  <xsd:element name="videoFile" type="CT_VideoFile"/>
  <xsd:simpleType name="ST_StyleMatrixColumnIndex">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FontCollectionIndex">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="major"/>
      <xsd:enumeration value="minor"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ColorSchemeIndex">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="dk1"/>
      <xsd:enumeration value="lt1"/>
      <xsd:enumeration value="dk2"/>
      <xsd:enumeration value="lt2"/>
      <xsd:enumeration value="accent1"/>
      <xsd:enumeration value="accent2"/>
      <xsd:enumeration value="accent3"/>
      <xsd:enumeration value="accent4"/>
      <xsd:enumeration value="accent5"/>
      <xsd:enumeration value="accent6"/>
      <xsd:enumeration value="hlink"/>
      <xsd:enumeration value="folHlink"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ColorScheme">
    <xsd:sequence>
      <xsd:element name="dk1" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lt1" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="dk2" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lt2" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="accent1" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="accent2" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="accent3" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="accent4" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="accent5" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="accent6" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="hlink" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="folHlink" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomColor">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_SupplementalFont">
    <xsd:attribute name="script" type="xsd:string" use="required"/>
    <xsd:attribute name="typeface" type="ST_TextTypeface" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomColorList">
    <xsd:sequence>
      <xsd:element name="custClr" type="CT_CustomColor" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_FontCollection">
    <xsd:sequence>
      <xsd:element name="latin" type="CT_TextFont" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="ea" type="CT_TextFont" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cs" type="CT_TextFont" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="font" type="CT_SupplementalFont" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_EffectStyleItem">
    <xsd:sequence>
      <xsd:group ref="EG_EffectProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="scene3d" type="CT_Scene3D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sp3d" type="CT_Shape3D" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_FontScheme">
    <xsd:sequence>
      <xsd:element name="majorFont" type="CT_FontCollection" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="minorFont" type="CT_FontCollection" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FillStyleList">
    <xsd:sequence>
      <xsd:group ref="EG_FillProperties" minOccurs="3" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_LineStyleList">
    <xsd:sequence>
      <xsd:element name="ln" type="CT_LineProperties" minOccurs="3" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_EffectStyleList">
    <xsd:sequence>
      <xsd:element name="effectStyle" type="CT_EffectStyleItem" minOccurs="3" maxOccurs="unbounded"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BackgroundFillStyleList">
    <xsd:sequence>
      <xsd:group ref="EG_FillProperties" minOccurs="3" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_StyleMatrix">
    <xsd:sequence>
      <xsd:element name="fillStyleLst" type="CT_FillStyleList" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lnStyleLst" type="CT_LineStyleList" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="effectStyleLst" type="CT_EffectStyleList" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="bgFillStyleLst" type="CT_BackgroundFillStyleList" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_BaseStyles">
    <xsd:sequence>
      <xsd:element name="clrScheme" type="CT_ColorScheme" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="fontScheme" type="CT_FontScheme" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="fmtScheme" type="CT_StyleMatrix" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_OfficeArtExtension">
    <xsd:sequence>
      <xsd:any processContents="lax" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="xsd:token" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Coordinate">
    <xsd:union memberTypes="ST_CoordinateUnqualified s:ST_UniversalMeasure"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CoordinateUnqualified">
    <xsd:restriction base="xsd:long">
      <xsd:minInclusive value="-27273042329600"/>
      <xsd:maxInclusive value="27273042316900"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Coordinate32">
    <xsd:union memberTypes="ST_Coordinate32Unqualified s:ST_UniversalMeasure"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Coordinate32Unqualified">
    <xsd:restriction base="xsd:int"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PositiveCoordinate">
    <xsd:restriction base="xsd:long">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="27273042316900"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PositiveCoordinate32">
    <xsd:restriction base="ST_Coordinate32Unqualified">
      <xsd:minInclusive value="0"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Angle">
    <xsd:restriction base="xsd:int"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_Angle">
    <xsd:attribute name="val" type="ST_Angle" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FixedAngle">
    <xsd:restriction base="ST_Angle">
      <xsd:minExclusive value="-5400000"/>
      <xsd:maxExclusive value="5400000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PositiveFixedAngle">
    <xsd:restriction base="ST_Angle">
      <xsd:minInclusive value="0"/>
      <xsd:maxExclusive value="21600000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PositiveFixedAngle">
    <xsd:attribute name="val" type="ST_PositiveFixedAngle" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Percentage">
    <xsd:union memberTypes="ST_PercentageDecimal s:ST_Percentage"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PercentageDecimal">
    <xsd:restriction base="xsd:int"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_Percentage">
    <xsd:attribute name="val" type="ST_Percentage" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PositivePercentage">
    <xsd:union memberTypes="ST_PositivePercentageDecimal s:ST_PositivePercentage"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PositivePercentageDecimal">
    <xsd:restriction base="ST_PercentageDecimal">
      <xsd:minInclusive value="0"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PositivePercentage">
    <xsd:attribute name="val" type="ST_PositivePercentage" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FixedPercentage">
    <xsd:union memberTypes="ST_FixedPercentageDecimal s:ST_FixedPercentage"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FixedPercentageDecimal">
    <xsd:restriction base="ST_PercentageDecimal">
      <xsd:minInclusive value="-100000"/>
      <xsd:maxInclusive value="100000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FixedPercentage">
    <xsd:attribute name="val" type="ST_FixedPercentage" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PositiveFixedPercentage">
    <xsd:union memberTypes="ST_PositiveFixedPercentageDecimal s:ST_PositiveFixedPercentage"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PositiveFixedPercentageDecimal">
    <xsd:restriction base="ST_PercentageDecimal">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="100000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PositiveFixedPercentage">
    <xsd:attribute name="val" type="ST_PositiveFixedPercentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Ratio">
    <xsd:attribute name="n" type="xsd:long" use="required"/>
    <xsd:attribute name="d" type="xsd:long" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Point2D">
    <xsd:attribute name="x" type="ST_Coordinate" use="required"/>
    <xsd:attribute name="y" type="ST_Coordinate" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PositiveSize2D">
    <xsd:attribute name="cx" type="ST_PositiveCoordinate" use="required"/>
    <xsd:attribute name="cy" type="ST_PositiveCoordinate" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ComplementTransform"/>
  <xsd:complexType name="CT_InverseTransform"/>
  <xsd:complexType name="CT_GrayscaleTransform"/>
  <xsd:complexType name="CT_GammaTransform"/>
  <xsd:complexType name="CT_InverseGammaTransform"/>
  <xsd:group name="EG_ColorTransform">
    <xsd:choice>
      <xsd:element name="tint" type="CT_PositiveFixedPercentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="shade" type="CT_PositiveFixedPercentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="comp" type="CT_ComplementTransform" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="inv" type="CT_InverseTransform" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gray" type="CT_GrayscaleTransform" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alpha" type="CT_PositiveFixedPercentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaOff" type="CT_FixedPercentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaMod" type="CT_PositivePercentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="hue" type="CT_PositiveFixedAngle" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="hueOff" type="CT_Angle" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="hueMod" type="CT_PositivePercentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="sat" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="satOff" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="satMod" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lum" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lumOff" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lumMod" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="red" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="redOff" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="redMod" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="green" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="greenOff" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="greenMod" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blue" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blueOff" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blueMod" type="CT_Percentage" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gamma" type="CT_GammaTransform" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="invGamma" type="CT_InverseGammaTransform" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_ScRgbColor">
    <xsd:sequence>
      <xsd:group ref="EG_ColorTransform" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="r" type="ST_Percentage" use="required"/>
    <xsd:attribute name="g" type="ST_Percentage" use="required"/>
    <xsd:attribute name="b" type="ST_Percentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SRgbColor">
    <xsd:sequence>
      <xsd:group ref="EG_ColorTransform" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="val" type="s:ST_HexColorRGB" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_HslColor">
    <xsd:sequence>
      <xsd:group ref="EG_ColorTransform" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="hue" type="ST_PositiveFixedAngle" use="required"/>
    <xsd:attribute name="sat" type="ST_Percentage" use="required"/>
    <xsd:attribute name="lum" type="ST_Percentage" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SystemColorVal">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="scrollBar"/>
      <xsd:enumeration value="background"/>
      <xsd:enumeration value="activeCaption"/>
      <xsd:enumeration value="inactiveCaption"/>
      <xsd:enumeration value="menu"/>
      <xsd:enumeration value="window"/>
      <xsd:enumeration value="windowFrame"/>
      <xsd:enumeration value="menuText"/>
      <xsd:enumeration value="windowText"/>
      <xsd:enumeration value="captionText"/>
      <xsd:enumeration value="activeBorder"/>
      <xsd:enumeration value="inactiveBorder"/>
      <xsd:enumeration value="appWorkspace"/>
      <xsd:enumeration value="highlight"/>
      <xsd:enumeration value="highlightText"/>
      <xsd:enumeration value="btnFace"/>
      <xsd:enumeration value="btnShadow"/>
      <xsd:enumeration value="grayText"/>
      <xsd:enumeration value="btnText"/>
      <xsd:enumeration value="inactiveCaptionText"/>
      <xsd:enumeration value="btnHighlight"/>
      <xsd:enumeration value="3dDkShadow"/>
      <xsd:enumeration value="3dLight"/>
      <xsd:enumeration value="infoText"/>
      <xsd:enumeration value="infoBk"/>
      <xsd:enumeration value="hotLight"/>
      <xsd:enumeration value="gradientActiveCaption"/>
      <xsd:enumeration value="gradientInactiveCaption"/>
      <xsd:enumeration value="menuHighlight"/>
      <xsd:enumeration value="menuBar"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SystemColor">
    <xsd:sequence>
      <xsd:group ref="EG_ColorTransform" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="val" type="ST_SystemColorVal" use="required"/>
    <xsd:attribute name="lastClr" type="s:ST_HexColorRGB" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SchemeColorVal">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="bg1"/>
      <xsd:enumeration value="tx1"/>
      <xsd:enumeration value="bg2"/>
      <xsd:enumeration value="tx2"/>
      <xsd:enumeration value="accent1"/>
      <xsd:enumeration value="accent2"/>
      <xsd:enumeration value="accent3"/>
      <xsd:enumeration value="accent4"/>
      <xsd:enumeration value="accent5"/>
      <xsd:enumeration value="accent6"/>
      <xsd:enumeration value="hlink"/>
      <xsd:enumeration value="folHlink"/>
      <xsd:enumeration value="phClr"/>
      <xsd:enumeration value="dk1"/>
      <xsd:enumeration value="lt1"/>
      <xsd:enumeration value="dk2"/>
      <xsd:enumeration value="lt2"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SchemeColor">
    <xsd:sequence>
      <xsd:group ref="EG_ColorTransform" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="val" type="ST_SchemeColorVal" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PresetColorVal">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="aliceBlue"/>
      <xsd:enumeration value="antiqueWhite"/>
      <xsd:enumeration value="aqua"/>
      <xsd:enumeration value="aquamarine"/>
      <xsd:enumeration value="azure"/>
      <xsd:enumeration value="beige"/>
      <xsd:enumeration value="bisque"/>
      <xsd:enumeration value="black"/>
      <xsd:enumeration value="blanchedAlmond"/>
      <xsd:enumeration value="blue"/>
      <xsd:enumeration value="blueViolet"/>
      <xsd:enumeration value="brown"/>
      <xsd:enumeration value="burlyWood"/>
      <xsd:enumeration value="cadetBlue"/>
      <xsd:enumeration value="chartreuse"/>
      <xsd:enumeration value="chocolate"/>
      <xsd:enumeration value="coral"/>
      <xsd:enumeration value="cornflowerBlue"/>
      <xsd:enumeration value="cornsilk"/>
      <xsd:enumeration value="crimson"/>
      <xsd:enumeration value="cyan"/>
      <xsd:enumeration value="darkBlue"/>
      <xsd:enumeration value="darkCyan"/>
      <xsd:enumeration value="darkGoldenrod"/>
      <xsd:enumeration value="darkGray"/>
      <xsd:enumeration value="darkGrey"/>
      <xsd:enumeration value="darkGreen"/>
      <xsd:enumeration value="darkKhaki"/>
      <xsd:enumeration value="darkMagenta"/>
      <xsd:enumeration value="darkOliveGreen"/>
      <xsd:enumeration value="darkOrange"/>
      <xsd:enumeration value="darkOrchid"/>
      <xsd:enumeration value="darkRed"/>
      <xsd:enumeration value="darkSalmon"/>
      <xsd:enumeration value="darkSeaGreen"/>
      <xsd:enumeration value="darkSlateBlue"/>
      <xsd:enumeration value="darkSlateGray"/>
      <xsd:enumeration value="darkSlateGrey"/>
      <xsd:enumeration value="darkTurquoise"/>
      <xsd:enumeration value="darkViolet"/>
      <xsd:enumeration value="dkBlue"/>
      <xsd:enumeration value="dkCyan"/>
      <xsd:enumeration value="dkGoldenrod"/>
      <xsd:enumeration value="dkGray"/>
      <xsd:enumeration value="dkGrey"/>
      <xsd:enumeration value="dkGreen"/>
      <xsd:enumeration value="dkKhaki"/>
      <xsd:enumeration value="dkMagenta"/>
      <xsd:enumeration value="dkOliveGreen"/>
      <xsd:enumeration value="dkOrange"/>
      <xsd:enumeration value="dkOrchid"/>
      <xsd:enumeration value="dkRed"/>
      <xsd:enumeration value="dkSalmon"/>
      <xsd:enumeration value="dkSeaGreen"/>
      <xsd:enumeration value="dkSlateBlue"/>
      <xsd:enumeration value="dkSlateGray"/>
      <xsd:enumeration value="dkSlateGrey"/>
      <xsd:enumeration value="dkTurquoise"/>
      <xsd:enumeration value="dkViolet"/>
      <xsd:enumeration value="deepPink"/>
      <xsd:enumeration value="deepSkyBlue"/>
      <xsd:enumeration value="dimGray"/>
      <xsd:enumeration value="dimGrey"/>
      <xsd:enumeration value="dodgerBlue"/>
      <xsd:enumeration value="firebrick"/>
      <xsd:enumeration value="floralWhite"/>
      <xsd:enumeration value="forestGreen"/>
      <xsd:enumeration value="fuchsia"/>
      <xsd:enumeration value="gainsboro"/>
      <xsd:enumeration value="ghostWhite"/>
      <xsd:enumeration value="gold"/>
      <xsd:enumeration value="goldenrod"/>
      <xsd:enumeration value="gray"/>
      <xsd:enumeration value="grey"/>
      <xsd:enumeration value="green"/>
      <xsd:enumeration value="greenYellow"/>
      <xsd:enumeration value="honeydew"/>
      <xsd:enumeration value="hotPink"/>
      <xsd:enumeration value="indianRed"/>
      <xsd:enumeration value="indigo"/>
      <xsd:enumeration value="ivory"/>
      <xsd:enumeration value="khaki"/>
      <xsd:enumeration value="lavender"/>
      <xsd:enumeration value="lavenderBlush"/>
      <xsd:enumeration value="lawnGreen"/>
      <xsd:enumeration value="lemonChiffon"/>
      <xsd:enumeration value="lightBlue"/>
      <xsd:enumeration value="lightCoral"/>
      <xsd:enumeration value="lightCyan"/>
      <xsd:enumeration value="lightGoldenrodYellow"/>
      <xsd:enumeration value="lightGray"/>
      <xsd:enumeration value="lightGrey"/>
      <xsd:enumeration value="lightGreen"/>
      <xsd:enumeration value="lightPink"/>
      <xsd:enumeration value="lightSalmon"/>
      <xsd:enumeration value="lightSeaGreen"/>
      <xsd:enumeration value="lightSkyBlue"/>
      <xsd:enumeration value="lightSlateGray"/>
      <xsd:enumeration value="lightSlateGrey"/>
      <xsd:enumeration value="lightSteelBlue"/>
      <xsd:enumeration value="lightYellow"/>
      <xsd:enumeration value="ltBlue"/>
      <xsd:enumeration value="ltCoral"/>
      <xsd:enumeration value="ltCyan"/>
      <xsd:enumeration value="ltGoldenrodYellow"/>
      <xsd:enumeration value="ltGray"/>
      <xsd:enumeration value="ltGrey"/>
      <xsd:enumeration value="ltGreen"/>
      <xsd:enumeration value="ltPink"/>
      <xsd:enumeration value="ltSalmon"/>
      <xsd:enumeration value="ltSeaGreen"/>
      <xsd:enumeration value="ltSkyBlue"/>
      <xsd:enumeration value="ltSlateGray"/>
      <xsd:enumeration value="ltSlateGrey"/>
      <xsd:enumeration value="ltSteelBlue"/>
      <xsd:enumeration value="ltYellow"/>
      <xsd:enumeration value="lime"/>
      <xsd:enumeration value="limeGreen"/>
      <xsd:enumeration value="linen"/>
      <xsd:enumeration value="magenta"/>
      <xsd:enumeration value="maroon"/>
      <xsd:enumeration value="medAquamarine"/>
      <xsd:enumeration value="medBlue"/>
      <xsd:enumeration value="medOrchid"/>
      <xsd:enumeration value="medPurple"/>
      <xsd:enumeration value="medSeaGreen"/>
      <xsd:enumeration value="medSlateBlue"/>
      <xsd:enumeration value="medSpringGreen"/>
      <xsd:enumeration value="medTurquoise"/>
      <xsd:enumeration value="medVioletRed"/>
      <xsd:enumeration value="mediumAquamarine"/>
      <xsd:enumeration value="mediumBlue"/>
      <xsd:enumeration value="mediumOrchid"/>
      <xsd:enumeration value="mediumPurple"/>
      <xsd:enumeration value="mediumSeaGreen"/>
      <xsd:enumeration value="mediumSlateBlue"/>
      <xsd:enumeration value="mediumSpringGreen"/>
      <xsd:enumeration value="mediumTurquoise"/>
      <xsd:enumeration value="mediumVioletRed"/>
      <xsd:enumeration value="midnightBlue"/>
      <xsd:enumeration value="mintCream"/>
      <xsd:enumeration value="mistyRose"/>
      <xsd:enumeration value="moccasin"/>
      <xsd:enumeration value="navajoWhite"/>
      <xsd:enumeration value="navy"/>
      <xsd:enumeration value="oldLace"/>
      <xsd:enumeration value="olive"/>
      <xsd:enumeration value="oliveDrab"/>
      <xsd:enumeration value="orange"/>
      <xsd:enumeration value="orangeRed"/>
      <xsd:enumeration value="orchid"/>
      <xsd:enumeration value="paleGoldenrod"/>
      <xsd:enumeration value="paleGreen"/>
      <xsd:enumeration value="paleTurquoise"/>
      <xsd:enumeration value="paleVioletRed"/>
      <xsd:enumeration value="papayaWhip"/>
      <xsd:enumeration value="peachPuff"/>
      <xsd:enumeration value="peru"/>
      <xsd:enumeration value="pink"/>
      <xsd:enumeration value="plum"/>
      <xsd:enumeration value="powderBlue"/>
      <xsd:enumeration value="purple"/>
      <xsd:enumeration value="red"/>
      <xsd:enumeration value="rosyBrown"/>
      <xsd:enumeration value="royalBlue"/>
      <xsd:enumeration value="saddleBrown"/>
      <xsd:enumeration value="salmon"/>
      <xsd:enumeration value="sandyBrown"/>
      <xsd:enumeration value="seaGreen"/>
      <xsd:enumeration value="seaShell"/>
      <xsd:enumeration value="sienna"/>
      <xsd:enumeration value="silver"/>
      <xsd:enumeration value="skyBlue"/>
      <xsd:enumeration value="slateBlue"/>
      <xsd:enumeration value="slateGray"/>
      <xsd:enumeration value="slateGrey"/>
      <xsd:enumeration value="snow"/>
      <xsd:enumeration value="springGreen"/>
      <xsd:enumeration value="steelBlue"/>
      <xsd:enumeration value="tan"/>
      <xsd:enumeration value="teal"/>
      <xsd:enumeration value="thistle"/>
      <xsd:enumeration value="tomato"/>
      <xsd:enumeration value="turquoise"/>
      <xsd:enumeration value="violet"/>
      <xsd:enumeration value="wheat"/>
      <xsd:enumeration value="white"/>
      <xsd:enumeration value="whiteSmoke"/>
      <xsd:enumeration value="yellow"/>
      <xsd:enumeration value="yellowGreen"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PresetColor">
    <xsd:sequence>
      <xsd:group ref="EG_ColorTransform" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="val" type="ST_PresetColorVal" use="required"/>
  </xsd:complexType>
  <xsd:group name="EG_OfficeArtExtensionList">
    <xsd:sequence>
      <xsd:element name="ext" type="CT_OfficeArtExtension" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_OfficeArtExtensionList">
    <xsd:sequence>
      <xsd:group ref="EG_OfficeArtExtensionList" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Scale2D">
    <xsd:sequence>
      <xsd:element name="sx" type="CT_Ratio" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="sy" type="CT_Ratio" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Transform2D">
    <xsd:sequence>
      <xsd:element name="off" type="CT_Point2D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ext" type="CT_PositiveSize2D" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="rot" type="ST_Angle" use="optional" default="0"/>
    <xsd:attribute name="flipH" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="flipV" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupTransform2D">
    <xsd:sequence>
      <xsd:element name="off" type="CT_Point2D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ext" type="CT_PositiveSize2D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="chOff" type="CT_Point2D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="chExt" type="CT_PositiveSize2D" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="rot" type="ST_Angle" use="optional" default="0"/>
    <xsd:attribute name="flipH" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="flipV" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Point3D">
    <xsd:attribute name="x" type="ST_Coordinate" use="required"/>
    <xsd:attribute name="y" type="ST_Coordinate" use="required"/>
    <xsd:attribute name="z" type="ST_Coordinate" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Vector3D">
    <xsd:attribute name="dx" type="ST_Coordinate" use="required"/>
    <xsd:attribute name="dy" type="ST_Coordinate" use="required"/>
    <xsd:attribute name="dz" type="ST_Coordinate" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SphereCoords">
    <xsd:attribute name="lat" type="ST_PositiveFixedAngle" use="required"/>
    <xsd:attribute name="lon" type="ST_PositiveFixedAngle" use="required"/>
    <xsd:attribute name="rev" type="ST_PositiveFixedAngle" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RelativeRect">
    <xsd:attribute name="l" type="ST_Percentage" use="optional" default="0%"/>
    <xsd:attribute name="t" type="ST_Percentage" use="optional" default="0%"/>
    <xsd:attribute name="r" type="ST_Percentage" use="optional" default="0%"/>
    <xsd:attribute name="b" type="ST_Percentage" use="optional" default="0%"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_RectAlignment">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="tl"/>
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="tr"/>
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="bl"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="br"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:group name="EG_ColorChoice">
    <xsd:choice>
      <xsd:element name="scrgbClr" type="CT_ScRgbColor" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="srgbClr" type="CT_SRgbColor" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="hslClr" type="CT_HslColor" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="sysClr" type="CT_SystemColor" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="schemeClr" type="CT_SchemeColor" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="prstClr" type="CT_PresetColor" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_Color">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorMRU">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_BlackWhiteMode">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="clr"/>
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="gray"/>
      <xsd:enumeration value="ltGray"/>
      <xsd:enumeration value="invGray"/>
      <xsd:enumeration value="grayWhite"/>
      <xsd:enumeration value="blackGray"/>
      <xsd:enumeration value="blackWhite"/>
      <xsd:enumeration value="black"/>
      <xsd:enumeration value="white"/>
      <xsd:enumeration value="hidden"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:attributeGroup name="AG_Blob">
    <xsd:attribute ref="r:embed" use="optional" default=""/>
    <xsd:attribute ref="r:link" use="optional" default=""/>
  </xsd:attributeGroup>
  <xsd:complexType name="CT_EmbeddedWAVAudioFile">
    <xsd:attribute ref="r:embed" use="required"/>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_Hyperlink">
    <xsd:sequence>
      <xsd:element name="snd" type="CT_EmbeddedWAVAudioFile" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:id" use="optional"/>
    <xsd:attribute name="invalidUrl" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="action" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="tgtFrame" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="tooltip" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="history" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="highlightClick" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="endSnd" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DrawingElementId">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:attributeGroup name="AG_Locking">
    <xsd:attribute name="noGrp" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noSelect" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noRot" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noChangeAspect" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noMove" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noResize" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noEditPoints" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noAdjustHandles" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noChangeArrowheads" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noChangeShapeType" type="xsd:boolean" use="optional" default="false"/>
  </xsd:attributeGroup>
  <xsd:complexType name="CT_ConnectorLocking">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Locking"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ShapeLocking">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Locking"/>
    <xsd:attribute name="noTextEdit" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PictureLocking">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Locking"/>
    <xsd:attribute name="noCrop" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupLocking">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="noGrp" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noUngrp" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noSelect" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noRot" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noChangeAspect" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noMove" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noResize" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicalObjectFrameLocking">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="noGrp" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noDrilldown" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noSelect" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noChangeAspect" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noMove" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="noResize" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ContentPartLocking">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Locking"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NonVisualDrawingProps">
    <xsd:sequence>
      <xsd:element name="hlinkClick" type="CT_Hyperlink" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hlinkHover" type="CT_Hyperlink" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="ST_DrawingElementId" use="required"/>
    <xsd:attribute name="name" type="xsd:string" use="required"/>
    <xsd:attribute name="descr" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="hidden" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="title" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_NonVisualDrawingShapeProps">
    <xsd:sequence>
      <xsd:element name="spLocks" type="CT_ShapeLocking" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="txBox" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NonVisualConnectorProperties">
    <xsd:sequence>
      <xsd:element name="cxnSpLocks" type="CT_ConnectorLocking" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="stCxn" type="CT_Connection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="endCxn" type="CT_Connection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NonVisualPictureProperties">
    <xsd:sequence>
      <xsd:element name="picLocks" type="CT_PictureLocking" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="preferRelativeResize" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NonVisualGroupDrawingShapeProps">
    <xsd:sequence>
      <xsd:element name="grpSpLocks" type="CT_GroupLocking" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NonVisualGraphicFrameProperties">
    <xsd:sequence>
      <xsd:element name="graphicFrameLocks" type="CT_GraphicalObjectFrameLocking" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NonVisualContentPartProperties">
    <xsd:sequence>
      <xsd:element name="cpLocks" type="CT_ContentPartLocking" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="isComment" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicalObjectData">
    <xsd:sequence>
      <xsd:any minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="xsd:token" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicalObject">
    <xsd:sequence>
      <xsd:element name="graphicData" type="CT_GraphicalObjectData"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="graphic" type="CT_GraphicalObject"/>
  <xsd:simpleType name="ST_ChartBuildStep">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="category"/>
      <xsd:enumeration value="ptInCategory"/>
      <xsd:enumeration value="series"/>
      <xsd:enumeration value="ptInSeries"/>
      <xsd:enumeration value="allPts"/>
      <xsd:enumeration value="gridLegend"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DgmBuildStep">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="sp"/>
      <xsd:enumeration value="bg"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_AnimationDgmElement">
    <xsd:attribute name="id" type="s:ST_Guid" use="optional"
      default="{00000000-0000-0000-0000-000000000000}"/>
    <xsd:attribute name="bldStep" type="ST_DgmBuildStep" use="optional" default="sp"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AnimationChartElement">
    <xsd:attribute name="seriesIdx" type="xsd:int" use="optional" default="-1"/>
    <xsd:attribute name="categoryIdx" type="xsd:int" use="optional" default="-1"/>
    <xsd:attribute name="bldStep" type="ST_ChartBuildStep" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AnimationElementChoice">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element name="dgm" type="CT_AnimationDgmElement"/>
      <xsd:element name="chart" type="CT_AnimationChartElement"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:simpleType name="ST_AnimationBuildType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="allAtOnce"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AnimationDgmOnlyBuildType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="one"/>
      <xsd:enumeration value="lvlOne"/>
      <xsd:enumeration value="lvlAtOnce"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AnimationDgmBuildType">
    <xsd:union memberTypes="ST_AnimationBuildType ST_AnimationDgmOnlyBuildType"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_AnimationDgmBuildProperties">
    <xsd:attribute name="bld" type="ST_AnimationDgmBuildType" use="optional" default="allAtOnce"/>
    <xsd:attribute name="rev" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_AnimationChartOnlyBuildType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="series"/>
      <xsd:enumeration value="category"/>
      <xsd:enumeration value="seriesEl"/>
      <xsd:enumeration value="categoryEl"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AnimationChartBuildType">
    <xsd:union memberTypes="ST_AnimationBuildType ST_AnimationChartOnlyBuildType"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_AnimationChartBuildProperties">
    <xsd:attribute name="bld" type="ST_AnimationChartBuildType" use="optional" default="allAtOnce"/>
    <xsd:attribute name="animBg" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AnimationGraphicalObjectBuildProperties">
    <xsd:choice>
      <xsd:element name="bldDgm" type="CT_AnimationDgmBuildProperties"/>
      <xsd:element name="bldChart" type="CT_AnimationChartBuildProperties"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_BackgroundFormatting">
    <xsd:sequence>
      <xsd:group ref="EG_FillProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_EffectProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_WholeE2oFormatting">
    <xsd:sequence>
      <xsd:element name="ln" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_EffectProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlUseShapeRectangle"/>
  <xsd:complexType name="CT_GvmlTextShape">
    <xsd:sequence>
      <xsd:element name="txBody" type="CT_TextBody" minOccurs="1" maxOccurs="1"/>
      <xsd:choice>
        <xsd:element name="useSpRect" type="CT_GvmlUseShapeRectangle" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="xfrm" type="CT_Transform2D" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlShapeNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvSpPr" type="CT_NonVisualDrawingShapeProps" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlShape">
    <xsd:sequence>
      <xsd:element name="nvSpPr" type="CT_GvmlShapeNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="txSp" type="CT_GvmlTextShape" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="style" type="CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlConnectorNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvCxnSpPr" type="CT_NonVisualConnectorProperties" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlConnector">
    <xsd:sequence>
      <xsd:element name="nvCxnSpPr" type="CT_GvmlConnectorNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlPictureNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvPicPr" type="CT_NonVisualPictureProperties" minOccurs="1" maxOccurs="1"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlPicture">
    <xsd:sequence>
      <xsd:element name="nvPicPr" type="CT_GvmlPictureNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blipFill" type="CT_BlipFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlGraphicFrameNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGraphicFramePr" type="CT_NonVisualGraphicFrameProperties" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlGraphicalObjectFrame">
    <xsd:sequence>
      <xsd:element name="nvGraphicFramePr" type="CT_GvmlGraphicFrameNonVisual" minOccurs="1"
        maxOccurs="1"/>
      <xsd:element ref="graphic" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="xfrm" type="CT_Transform2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlGroupShapeNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGrpSpPr" type="CT_NonVisualGroupDrawingShapeProps" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GvmlGroupShape">
    <xsd:sequence>
      <xsd:element name="nvGrpSpPr" type="CT_GvmlGroupShapeNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="grpSpPr" type="CT_GroupShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element name="txSp" type="CT_GvmlTextShape"/>
        <xsd:element name="sp" type="CT_GvmlShape"/>
        <xsd:element name="cxnSp" type="CT_GvmlConnector"/>
        <xsd:element name="pic" type="CT_GvmlPicture"/>
        <xsd:element name="graphicFrame" type="CT_GvmlGraphicalObjectFrame"/>
        <xsd:element name="grpSp" type="CT_GvmlGroupShape"/>
      </xsd:choice>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_PresetCameraType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="legacyObliqueTopLeft"/>
      <xsd:enumeration value="legacyObliqueTop"/>
      <xsd:enumeration value="legacyObliqueTopRight"/>
      <xsd:enumeration value="legacyObliqueLeft"/>
      <xsd:enumeration value="legacyObliqueFront"/>
      <xsd:enumeration value="legacyObliqueRight"/>
      <xsd:enumeration value="legacyObliqueBottomLeft"/>
      <xsd:enumeration value="legacyObliqueBottom"/>
      <xsd:enumeration value="legacyObliqueBottomRight"/>
      <xsd:enumeration value="legacyPerspectiveTopLeft"/>
      <xsd:enumeration value="legacyPerspectiveTop"/>
      <xsd:enumeration value="legacyPerspectiveTopRight"/>
      <xsd:enumeration value="legacyPerspectiveLeft"/>
      <xsd:enumeration value="legacyPerspectiveFront"/>
      <xsd:enumeration value="legacyPerspectiveRight"/>
      <xsd:enumeration value="legacyPerspectiveBottomLeft"/>
      <xsd:enumeration value="legacyPerspectiveBottom"/>
      <xsd:enumeration value="legacyPerspectiveBottomRight"/>
      <xsd:enumeration value="orthographicFront"/>
      <xsd:enumeration value="isometricTopUp"/>
      <xsd:enumeration value="isometricTopDown"/>
      <xsd:enumeration value="isometricBottomUp"/>
      <xsd:enumeration value="isometricBottomDown"/>
      <xsd:enumeration value="isometricLeftUp"/>
      <xsd:enumeration value="isometricLeftDown"/>
      <xsd:enumeration value="isometricRightUp"/>
      <xsd:enumeration value="isometricRightDown"/>
      <xsd:enumeration value="isometricOffAxis1Left"/>
      <xsd:enumeration value="isometricOffAxis1Right"/>
      <xsd:enumeration value="isometricOffAxis1Top"/>
      <xsd:enumeration value="isometricOffAxis2Left"/>
      <xsd:enumeration value="isometricOffAxis2Right"/>
      <xsd:enumeration value="isometricOffAxis2Top"/>
      <xsd:enumeration value="isometricOffAxis3Left"/>
      <xsd:enumeration value="isometricOffAxis3Right"/>
      <xsd:enumeration value="isometricOffAxis3Bottom"/>
      <xsd:enumeration value="isometricOffAxis4Left"/>
      <xsd:enumeration value="isometricOffAxis4Right"/>
      <xsd:enumeration value="isometricOffAxis4Bottom"/>
      <xsd:enumeration value="obliqueTopLeft"/>
      <xsd:enumeration value="obliqueTop"/>
      <xsd:enumeration value="obliqueTopRight"/>
      <xsd:enumeration value="obliqueLeft"/>
      <xsd:enumeration value="obliqueRight"/>
      <xsd:enumeration value="obliqueBottomLeft"/>
      <xsd:enumeration value="obliqueBottom"/>
      <xsd:enumeration value="obliqueBottomRight"/>
      <xsd:enumeration value="perspectiveFront"/>
      <xsd:enumeration value="perspectiveLeft"/>
      <xsd:enumeration value="perspectiveRight"/>
      <xsd:enumeration value="perspectiveAbove"/>
      <xsd:enumeration value="perspectiveBelow"/>
      <xsd:enumeration value="perspectiveAboveLeftFacing"/>
      <xsd:enumeration value="perspectiveAboveRightFacing"/>
      <xsd:enumeration value="perspectiveContrastingLeftFacing"/>
      <xsd:enumeration value="perspectiveContrastingRightFacing"/>
      <xsd:enumeration value="perspectiveHeroicLeftFacing"/>
      <xsd:enumeration value="perspectiveHeroicRightFacing"/>
      <xsd:enumeration value="perspectiveHeroicExtremeLeftFacing"/>
      <xsd:enumeration value="perspectiveHeroicExtremeRightFacing"/>
      <xsd:enumeration value="perspectiveRelaxed"/>
      <xsd:enumeration value="perspectiveRelaxedModerately"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FOVAngle">
    <xsd:restriction base="ST_Angle">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="10800000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Camera">
    <xsd:sequence>
      <xsd:element name="rot" type="CT_SphereCoords" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="prst" type="ST_PresetCameraType" use="required"/>
    <xsd:attribute name="fov" type="ST_FOVAngle" use="optional"/>
    <xsd:attribute name="zoom" type="ST_PositivePercentage" use="optional" default="100%"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_LightRigDirection">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="tl"/>
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="tr"/>
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="bl"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="br"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_LightRigType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="legacyFlat1"/>
      <xsd:enumeration value="legacyFlat2"/>
      <xsd:enumeration value="legacyFlat3"/>
      <xsd:enumeration value="legacyFlat4"/>
      <xsd:enumeration value="legacyNormal1"/>
      <xsd:enumeration value="legacyNormal2"/>
      <xsd:enumeration value="legacyNormal3"/>
      <xsd:enumeration value="legacyNormal4"/>
      <xsd:enumeration value="legacyHarsh1"/>
      <xsd:enumeration value="legacyHarsh2"/>
      <xsd:enumeration value="legacyHarsh3"/>
      <xsd:enumeration value="legacyHarsh4"/>
      <xsd:enumeration value="threePt"/>
      <xsd:enumeration value="balanced"/>
      <xsd:enumeration value="soft"/>
      <xsd:enumeration value="harsh"/>
      <xsd:enumeration value="flood"/>
      <xsd:enumeration value="contrasting"/>
      <xsd:enumeration value="morning"/>
      <xsd:enumeration value="sunrise"/>
      <xsd:enumeration value="sunset"/>
      <xsd:enumeration value="chilly"/>
      <xsd:enumeration value="freezing"/>
      <xsd:enumeration value="flat"/>
      <xsd:enumeration value="twoPt"/>
      <xsd:enumeration value="glow"/>
      <xsd:enumeration value="brightRoom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LightRig">
    <xsd:sequence>
      <xsd:element name="rot" type="CT_SphereCoords" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="rig" type="ST_LightRigType" use="required"/>
    <xsd:attribute name="dir" type="ST_LightRigDirection" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Scene3D">
    <xsd:sequence>
      <xsd:element name="camera" type="CT_Camera" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lightRig" type="CT_LightRig" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="backdrop" type="CT_Backdrop" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Backdrop">
    <xsd:sequence>
      <xsd:element name="anchor" type="CT_Point3D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="norm" type="CT_Vector3D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="up" type="CT_Vector3D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_BevelPresetType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="relaxedInset"/>
      <xsd:enumeration value="circle"/>
      <xsd:enumeration value="slope"/>
      <xsd:enumeration value="cross"/>
      <xsd:enumeration value="angle"/>
      <xsd:enumeration value="softRound"/>
      <xsd:enumeration value="convex"/>
      <xsd:enumeration value="coolSlant"/>
      <xsd:enumeration value="divot"/>
      <xsd:enumeration value="riblet"/>
      <xsd:enumeration value="hardEdge"/>
      <xsd:enumeration value="artDeco"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Bevel">
    <xsd:attribute name="w" type="ST_PositiveCoordinate" use="optional" default="76200"/>
    <xsd:attribute name="h" type="ST_PositiveCoordinate" use="optional" default="76200"/>
    <xsd:attribute name="prst" type="ST_BevelPresetType" use="optional" default="circle"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PresetMaterialType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="legacyMatte"/>
      <xsd:enumeration value="legacyPlastic"/>
      <xsd:enumeration value="legacyMetal"/>
      <xsd:enumeration value="legacyWireframe"/>
      <xsd:enumeration value="matte"/>
      <xsd:enumeration value="plastic"/>
      <xsd:enumeration value="metal"/>
      <xsd:enumeration value="warmMatte"/>
      <xsd:enumeration value="translucentPowder"/>
      <xsd:enumeration value="powder"/>
      <xsd:enumeration value="dkEdge"/>
      <xsd:enumeration value="softEdge"/>
      <xsd:enumeration value="clear"/>
      <xsd:enumeration value="flat"/>
      <xsd:enumeration value="softmetal"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Shape3D">
    <xsd:sequence>
      <xsd:element name="bevelT" type="CT_Bevel" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bevelB" type="CT_Bevel" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extrusionClr" type="CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="contourClr" type="CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="z" type="ST_Coordinate" use="optional" default="0"/>
    <xsd:attribute name="extrusionH" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="contourW" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="prstMaterial" type="ST_PresetMaterialType" use="optional"
      default="warmMatte"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FlatText">
    <xsd:attribute name="z" type="ST_Coordinate" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:group name="EG_Text3D">
    <xsd:choice>
      <xsd:element name="sp3d" type="CT_Shape3D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="flatTx" type="CT_FlatText" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_AlphaBiLevelEffect">
    <xsd:attribute name="thresh" type="ST_PositiveFixedPercentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AlphaCeilingEffect"/>
  <xsd:complexType name="CT_AlphaFloorEffect"/>
  <xsd:complexType name="CT_AlphaInverseEffect">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_AlphaModulateFixedEffect">
    <xsd:attribute name="amt" type="ST_PositivePercentage" use="optional" default="100%"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AlphaOutsetEffect">
    <xsd:attribute name="rad" type="ST_Coordinate" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AlphaReplaceEffect">
    <xsd:attribute name="a" type="ST_PositiveFixedPercentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_BiLevelEffect">
    <xsd:attribute name="thresh" type="ST_PositiveFixedPercentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_BlurEffect">
    <xsd:attribute name="rad" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="grow" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorChangeEffect">
    <xsd:sequence>
      <xsd:element name="clrFrom" type="CT_Color" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="clrTo" type="CT_Color" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="useA" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorReplaceEffect">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DuotoneEffect">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="2" maxOccurs="2"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GlowEffect">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="rad" type="ST_PositiveCoordinate" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GrayscaleEffect"/>
  <xsd:complexType name="CT_HSLEffect">
    <xsd:attribute name="hue" type="ST_PositiveFixedAngle" use="optional" default="0"/>
    <xsd:attribute name="sat" type="ST_FixedPercentage" use="optional" default="0%"/>
    <xsd:attribute name="lum" type="ST_FixedPercentage" use="optional" default="0%"/>
  </xsd:complexType>
  <xsd:complexType name="CT_InnerShadowEffect">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="blurRad" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="dist" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="dir" type="ST_PositiveFixedAngle" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LuminanceEffect">
    <xsd:attribute name="bright" type="ST_FixedPercentage" use="optional" default="0%"/>
    <xsd:attribute name="contrast" type="ST_FixedPercentage" use="optional" default="0%"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OuterShadowEffect">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="blurRad" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="dist" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="dir" type="ST_PositiveFixedAngle" use="optional" default="0"/>
    <xsd:attribute name="sx" type="ST_Percentage" use="optional" default="100%"/>
    <xsd:attribute name="sy" type="ST_Percentage" use="optional" default="100%"/>
    <xsd:attribute name="kx" type="ST_FixedAngle" use="optional" default="0"/>
    <xsd:attribute name="ky" type="ST_FixedAngle" use="optional" default="0"/>
    <xsd:attribute name="algn" type="ST_RectAlignment" use="optional" default="b"/>
    <xsd:attribute name="rotWithShape" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PresetShadowVal">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="shdw1"/>
      <xsd:enumeration value="shdw2"/>
      <xsd:enumeration value="shdw3"/>
      <xsd:enumeration value="shdw4"/>
      <xsd:enumeration value="shdw5"/>
      <xsd:enumeration value="shdw6"/>
      <xsd:enumeration value="shdw7"/>
      <xsd:enumeration value="shdw8"/>
      <xsd:enumeration value="shdw9"/>
      <xsd:enumeration value="shdw10"/>
      <xsd:enumeration value="shdw11"/>
      <xsd:enumeration value="shdw12"/>
      <xsd:enumeration value="shdw13"/>
      <xsd:enumeration value="shdw14"/>
      <xsd:enumeration value="shdw15"/>
      <xsd:enumeration value="shdw16"/>
      <xsd:enumeration value="shdw17"/>
      <xsd:enumeration value="shdw18"/>
      <xsd:enumeration value="shdw19"/>
      <xsd:enumeration value="shdw20"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PresetShadowEffect">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="prst" type="ST_PresetShadowVal" use="required"/>
    <xsd:attribute name="dist" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="dir" type="ST_PositiveFixedAngle" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ReflectionEffect">
    <xsd:attribute name="blurRad" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="stA" type="ST_PositiveFixedPercentage" use="optional" default="100%"/>
    <xsd:attribute name="stPos" type="ST_PositiveFixedPercentage" use="optional" default="0%"/>
    <xsd:attribute name="endA" type="ST_PositiveFixedPercentage" use="optional" default="0%"/>
    <xsd:attribute name="endPos" type="ST_PositiveFixedPercentage" use="optional" default="100%"/>
    <xsd:attribute name="dist" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="dir" type="ST_PositiveFixedAngle" use="optional" default="0"/>
    <xsd:attribute name="fadeDir" type="ST_PositiveFixedAngle" use="optional" default="5400000"/>
    <xsd:attribute name="sx" type="ST_Percentage" use="optional" default="100%"/>
    <xsd:attribute name="sy" type="ST_Percentage" use="optional" default="100%"/>
    <xsd:attribute name="kx" type="ST_FixedAngle" use="optional" default="0"/>
    <xsd:attribute name="ky" type="ST_FixedAngle" use="optional" default="0"/>
    <xsd:attribute name="algn" type="ST_RectAlignment" use="optional" default="b"/>
    <xsd:attribute name="rotWithShape" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RelativeOffsetEffect">
    <xsd:attribute name="tx" type="ST_Percentage" use="optional" default="0%"/>
    <xsd:attribute name="ty" type="ST_Percentage" use="optional" default="0%"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SoftEdgesEffect">
    <xsd:attribute name="rad" type="ST_PositiveCoordinate" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TintEffect">
    <xsd:attribute name="hue" type="ST_PositiveFixedAngle" use="optional" default="0"/>
    <xsd:attribute name="amt" type="ST_FixedPercentage" use="optional" default="0%"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TransformEffect">
    <xsd:attribute name="sx" type="ST_Percentage" use="optional" default="100%"/>
    <xsd:attribute name="sy" type="ST_Percentage" use="optional" default="100%"/>
    <xsd:attribute name="kx" type="ST_FixedAngle" use="optional" default="0"/>
    <xsd:attribute name="ky" type="ST_FixedAngle" use="optional" default="0"/>
    <xsd:attribute name="tx" type="ST_Coordinate" use="optional" default="0"/>
    <xsd:attribute name="ty" type="ST_Coordinate" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NoFillProperties"/>
  <xsd:complexType name="CT_SolidColorFillProperties">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_LinearShadeProperties">
    <xsd:attribute name="ang" type="ST_PositiveFixedAngle" use="optional"/>
    <xsd:attribute name="scaled" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PathShadeType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="shape"/>
      <xsd:enumeration value="circle"/>
      <xsd:enumeration value="rect"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PathShadeProperties">
    <xsd:sequence>
      <xsd:element name="fillToRect" type="CT_RelativeRect" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="path" type="ST_PathShadeType" use="optional"/>
  </xsd:complexType>
  <xsd:group name="EG_ShadeProperties">
    <xsd:choice>
      <xsd:element name="lin" type="CT_LinearShadeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="path" type="CT_PathShadeProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_TileFlipMode">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="x"/>
      <xsd:enumeration value="y"/>
      <xsd:enumeration value="xy"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_GradientStop">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="pos" type="ST_PositiveFixedPercentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GradientStopList">
    <xsd:sequence>
      <xsd:element name="gs" type="CT_GradientStop" minOccurs="2" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GradientFillProperties">
    <xsd:sequence>
      <xsd:element name="gsLst" type="CT_GradientStopList" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_ShadeProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tileRect" type="CT_RelativeRect" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="flip" type="ST_TileFlipMode" use="optional" default="none"/>
    <xsd:attribute name="rotWithShape" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TileInfoProperties">
    <xsd:attribute name="tx" type="ST_Coordinate" use="optional"/>
    <xsd:attribute name="ty" type="ST_Coordinate" use="optional"/>
    <xsd:attribute name="sx" type="ST_Percentage" use="optional"/>
    <xsd:attribute name="sy" type="ST_Percentage" use="optional"/>
    <xsd:attribute name="flip" type="ST_TileFlipMode" use="optional" default="none"/>
    <xsd:attribute name="algn" type="ST_RectAlignment" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_StretchInfoProperties">
    <xsd:sequence>
      <xsd:element name="fillRect" type="CT_RelativeRect" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_FillModeProperties">
    <xsd:choice>
      <xsd:element name="tile" type="CT_TileInfoProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="stretch" type="CT_StretchInfoProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_BlipCompression">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="email"/>
      <xsd:enumeration value="screen"/>
      <xsd:enumeration value="print"/>
      <xsd:enumeration value="hqprint"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Blip">
    <xsd:sequence>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element name="alphaBiLevel" type="CT_AlphaBiLevelEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="alphaCeiling" type="CT_AlphaCeilingEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="alphaFloor" type="CT_AlphaFloorEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="alphaInv" type="CT_AlphaInverseEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="alphaMod" type="CT_AlphaModulateEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="alphaModFix" type="CT_AlphaModulateFixedEffect" minOccurs="1"
          maxOccurs="1"/>
        <xsd:element name="alphaRepl" type="CT_AlphaReplaceEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="biLevel" type="CT_BiLevelEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="blur" type="CT_BlurEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="clrChange" type="CT_ColorChangeEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="clrRepl" type="CT_ColorReplaceEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="duotone" type="CT_DuotoneEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="fillOverlay" type="CT_FillOverlayEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="grayscl" type="CT_GrayscaleEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="hsl" type="CT_HSLEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="lum" type="CT_LuminanceEffect" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="tint" type="CT_TintEffect" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Blob"/>
    <xsd:attribute name="cstate" type="ST_BlipCompression" use="optional" default="none"/>
  </xsd:complexType>
  <xsd:complexType name="CT_BlipFillProperties">
    <xsd:sequence>
      <xsd:element name="blip" type="CT_Blip" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="srcRect" type="CT_RelativeRect" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_FillModeProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="dpi" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="rotWithShape" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PresetPatternVal">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="pct5"/>
      <xsd:enumeration value="pct10"/>
      <xsd:enumeration value="pct20"/>
      <xsd:enumeration value="pct25"/>
      <xsd:enumeration value="pct30"/>
      <xsd:enumeration value="pct40"/>
      <xsd:enumeration value="pct50"/>
      <xsd:enumeration value="pct60"/>
      <xsd:enumeration value="pct70"/>
      <xsd:enumeration value="pct75"/>
      <xsd:enumeration value="pct80"/>
      <xsd:enumeration value="pct90"/>
      <xsd:enumeration value="horz"/>
      <xsd:enumeration value="vert"/>
      <xsd:enumeration value="ltHorz"/>
      <xsd:enumeration value="ltVert"/>
      <xsd:enumeration value="dkHorz"/>
      <xsd:enumeration value="dkVert"/>
      <xsd:enumeration value="narHorz"/>
      <xsd:enumeration value="narVert"/>
      <xsd:enumeration value="dashHorz"/>
      <xsd:enumeration value="dashVert"/>
      <xsd:enumeration value="cross"/>
      <xsd:enumeration value="dnDiag"/>
      <xsd:enumeration value="upDiag"/>
      <xsd:enumeration value="ltDnDiag"/>
      <xsd:enumeration value="ltUpDiag"/>
      <xsd:enumeration value="dkDnDiag"/>
      <xsd:enumeration value="dkUpDiag"/>
      <xsd:enumeration value="wdDnDiag"/>
      <xsd:enumeration value="wdUpDiag"/>
      <xsd:enumeration value="dashDnDiag"/>
      <xsd:enumeration value="dashUpDiag"/>
      <xsd:enumeration value="diagCross"/>
      <xsd:enumeration value="smCheck"/>
      <xsd:enumeration value="lgCheck"/>
      <xsd:enumeration value="smGrid"/>
      <xsd:enumeration value="lgGrid"/>
      <xsd:enumeration value="dotGrid"/>
      <xsd:enumeration value="smConfetti"/>
      <xsd:enumeration value="lgConfetti"/>
      <xsd:enumeration value="horzBrick"/>
      <xsd:enumeration value="diagBrick"/>
      <xsd:enumeration value="solidDmnd"/>
      <xsd:enumeration value="openDmnd"/>
      <xsd:enumeration value="dotDmnd"/>
      <xsd:enumeration value="plaid"/>
      <xsd:enumeration value="sphere"/>
      <xsd:enumeration value="weave"/>
      <xsd:enumeration value="divot"/>
      <xsd:enumeration value="shingle"/>
      <xsd:enumeration value="wave"/>
      <xsd:enumeration value="trellis"/>
      <xsd:enumeration value="zigZag"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PatternFillProperties">
    <xsd:sequence>
      <xsd:element name="fgClr" type="CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bgClr" type="CT_Color" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="prst" type="ST_PresetPatternVal" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupFillProperties"/>
  <xsd:group name="EG_FillProperties">
    <xsd:choice>
      <xsd:element name="noFill" type="CT_NoFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="solidFill" type="CT_SolidColorFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gradFill" type="CT_GradientFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blipFill" type="CT_BlipFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="pattFill" type="CT_PatternFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="grpFill" type="CT_GroupFillProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_FillProperties">
    <xsd:sequence>
      <xsd:group ref="EG_FillProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_FillEffect">
    <xsd:sequence>
      <xsd:group ref="EG_FillProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_BlendMode">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="over"/>
      <xsd:enumeration value="mult"/>
      <xsd:enumeration value="screen"/>
      <xsd:enumeration value="darken"/>
      <xsd:enumeration value="lighten"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FillOverlayEffect">
    <xsd:sequence>
      <xsd:group ref="EG_FillProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="blend" type="ST_BlendMode" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_EffectReference">
    <xsd:attribute name="ref" type="xsd:token" use="required"/>
  </xsd:complexType>
  <xsd:group name="EG_Effect">
    <xsd:choice>
      <xsd:element name="cont" type="CT_EffectContainer" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="effect" type="CT_EffectReference" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaBiLevel" type="CT_AlphaBiLevelEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaCeiling" type="CT_AlphaCeilingEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaFloor" type="CT_AlphaFloorEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaInv" type="CT_AlphaInverseEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaMod" type="CT_AlphaModulateEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaModFix" type="CT_AlphaModulateFixedEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaOutset" type="CT_AlphaOutsetEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="alphaRepl" type="CT_AlphaReplaceEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="biLevel" type="CT_BiLevelEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blend" type="CT_BlendEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blur" type="CT_BlurEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="clrChange" type="CT_ColorChangeEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="clrRepl" type="CT_ColorReplaceEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="duotone" type="CT_DuotoneEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="fill" type="CT_FillEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="fillOverlay" type="CT_FillOverlayEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="glow" type="CT_GlowEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="grayscl" type="CT_GrayscaleEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="hsl" type="CT_HSLEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="innerShdw" type="CT_InnerShadowEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lum" type="CT_LuminanceEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="outerShdw" type="CT_OuterShadowEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="prstShdw" type="CT_PresetShadowEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="reflection" type="CT_ReflectionEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="relOff" type="CT_RelativeOffsetEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="softEdge" type="CT_SoftEdgesEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="tint" type="CT_TintEffect" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="xfrm" type="CT_TransformEffect" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_EffectContainerType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="sib"/>
      <xsd:enumeration value="tree"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_EffectContainer">
    <xsd:group ref="EG_Effect" minOccurs="0" maxOccurs="unbounded"/>
    <xsd:attribute name="type" type="ST_EffectContainerType" use="optional" default="sib"/>
    <xsd:attribute name="name" type="xsd:token" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AlphaModulateEffect">
    <xsd:sequence>
      <xsd:element name="cont" type="CT_EffectContainer" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BlendEffect">
    <xsd:sequence>
      <xsd:element name="cont" type="CT_EffectContainer" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="blend" type="ST_BlendMode" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_EffectList">
    <xsd:sequence>
      <xsd:element name="blur" type="CT_BlurEffect" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="fillOverlay" type="CT_FillOverlayEffect" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="glow" type="CT_GlowEffect" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="innerShdw" type="CT_InnerShadowEffect" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="outerShdw" type="CT_OuterShadowEffect" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="prstShdw" type="CT_PresetShadowEffect" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="reflection" type="CT_ReflectionEffect" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="softEdge" type="CT_SoftEdgesEffect" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_EffectProperties">
    <xsd:choice>
      <xsd:element name="effectLst" type="CT_EffectList" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="effectDag" type="CT_EffectContainer" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_EffectProperties">
    <xsd:sequence>
      <xsd:group ref="EG_EffectProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="blip" type="CT_Blip"/>
  <xsd:simpleType name="ST_ShapeType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="line"/>
      <xsd:enumeration value="lineInv"/>
      <xsd:enumeration value="triangle"/>
      <xsd:enumeration value="rtTriangle"/>
      <xsd:enumeration value="rect"/>
      <xsd:enumeration value="diamond"/>
      <xsd:enumeration value="parallelogram"/>
      <xsd:enumeration value="trapezoid"/>
      <xsd:enumeration value="nonIsoscelesTrapezoid"/>
      <xsd:enumeration value="pentagon"/>
      <xsd:enumeration value="hexagon"/>
      <xsd:enumeration value="heptagon"/>
      <xsd:enumeration value="octagon"/>
      <xsd:enumeration value="decagon"/>
      <xsd:enumeration value="dodecagon"/>
      <xsd:enumeration value="star4"/>
      <xsd:enumeration value="star5"/>
      <xsd:enumeration value="star6"/>
      <xsd:enumeration value="star7"/>
      <xsd:enumeration value="star8"/>
      <xsd:enumeration value="star10"/>
      <xsd:enumeration value="star12"/>
      <xsd:enumeration value="star16"/>
      <xsd:enumeration value="star24"/>
      <xsd:enumeration value="star32"/>
      <xsd:enumeration value="roundRect"/>
      <xsd:enumeration value="round1Rect"/>
      <xsd:enumeration value="round2SameRect"/>
      <xsd:enumeration value="round2DiagRect"/>
      <xsd:enumeration value="snipRoundRect"/>
      <xsd:enumeration value="snip1Rect"/>
      <xsd:enumeration value="snip2SameRect"/>
      <xsd:enumeration value="snip2DiagRect"/>
      <xsd:enumeration value="plaque"/>
      <xsd:enumeration value="ellipse"/>
      <xsd:enumeration value="teardrop"/>
      <xsd:enumeration value="homePlate"/>
      <xsd:enumeration value="chevron"/>
      <xsd:enumeration value="pieWedge"/>
      <xsd:enumeration value="pie"/>
      <xsd:enumeration value="blockArc"/>
      <xsd:enumeration value="donut"/>
      <xsd:enumeration value="noSmoking"/>
      <xsd:enumeration value="rightArrow"/>
      <xsd:enumeration value="leftArrow"/>
      <xsd:enumeration value="upArrow"/>
      <xsd:enumeration value="downArrow"/>
      <xsd:enumeration value="stripedRightArrow"/>
      <xsd:enumeration value="notchedRightArrow"/>
      <xsd:enumeration value="bentUpArrow"/>
      <xsd:enumeration value="leftRightArrow"/>
      <xsd:enumeration value="upDownArrow"/>
      <xsd:enumeration value="leftUpArrow"/>
      <xsd:enumeration value="leftRightUpArrow"/>
      <xsd:enumeration value="quadArrow"/>
      <xsd:enumeration value="leftArrowCallout"/>
      <xsd:enumeration value="rightArrowCallout"/>
      <xsd:enumeration value="upArrowCallout"/>
      <xsd:enumeration value="downArrowCallout"/>
      <xsd:enumeration value="leftRightArrowCallout"/>
      <xsd:enumeration value="upDownArrowCallout"/>
      <xsd:enumeration value="quadArrowCallout"/>
      <xsd:enumeration value="bentArrow"/>
      <xsd:enumeration value="uturnArrow"/>
      <xsd:enumeration value="circularArrow"/>
      <xsd:enumeration value="leftCircularArrow"/>
      <xsd:enumeration value="leftRightCircularArrow"/>
      <xsd:enumeration value="curvedRightArrow"/>
      <xsd:enumeration value="curvedLeftArrow"/>
      <xsd:enumeration value="curvedUpArrow"/>
      <xsd:enumeration value="curvedDownArrow"/>
      <xsd:enumeration value="swooshArrow"/>
      <xsd:enumeration value="cube"/>
      <xsd:enumeration value="can"/>
      <xsd:enumeration value="lightningBolt"/>
      <xsd:enumeration value="heart"/>
      <xsd:enumeration value="sun"/>
      <xsd:enumeration value="moon"/>
      <xsd:enumeration value="smileyFace"/>
      <xsd:enumeration value="irregularSeal1"/>
      <xsd:enumeration value="irregularSeal2"/>
      <xsd:enumeration value="foldedCorner"/>
      <xsd:enumeration value="bevel"/>
      <xsd:enumeration value="frame"/>
      <xsd:enumeration value="halfFrame"/>
      <xsd:enumeration value="corner"/>
      <xsd:enumeration value="diagStripe"/>
      <xsd:enumeration value="chord"/>
      <xsd:enumeration value="arc"/>
      <xsd:enumeration value="leftBracket"/>
      <xsd:enumeration value="rightBracket"/>
      <xsd:enumeration value="leftBrace"/>
      <xsd:enumeration value="rightBrace"/>
      <xsd:enumeration value="bracketPair"/>
      <xsd:enumeration value="bracePair"/>
      <xsd:enumeration value="straightConnector1"/>
      <xsd:enumeration value="bentConnector2"/>
      <xsd:enumeration value="bentConnector3"/>
      <xsd:enumeration value="bentConnector4"/>
      <xsd:enumeration value="bentConnector5"/>
      <xsd:enumeration value="curvedConnector2"/>
      <xsd:enumeration value="curvedConnector3"/>
      <xsd:enumeration value="curvedConnector4"/>
      <xsd:enumeration value="curvedConnector5"/>
      <xsd:enumeration value="callout1"/>
      <xsd:enumeration value="callout2"/>
      <xsd:enumeration value="callout3"/>
      <xsd:enumeration value="accentCallout1"/>
      <xsd:enumeration value="accentCallout2"/>
      <xsd:enumeration value="accentCallout3"/>
      <xsd:enumeration value="borderCallout1"/>
      <xsd:enumeration value="borderCallout2"/>
      <xsd:enumeration value="borderCallout3"/>
      <xsd:enumeration value="accentBorderCallout1"/>
      <xsd:enumeration value="accentBorderCallout2"/>
      <xsd:enumeration value="accentBorderCallout3"/>
      <xsd:enumeration value="wedgeRectCallout"/>
      <xsd:enumeration value="wedgeRoundRectCallout"/>
      <xsd:enumeration value="wedgeEllipseCallout"/>
      <xsd:enumeration value="cloudCallout"/>
      <xsd:enumeration value="cloud"/>
      <xsd:enumeration value="ribbon"/>
      <xsd:enumeration value="ribbon2"/>
      <xsd:enumeration value="ellipseRibbon"/>
      <xsd:enumeration value="ellipseRibbon2"/>
      <xsd:enumeration value="leftRightRibbon"/>
      <xsd:enumeration value="verticalScroll"/>
      <xsd:enumeration value="horizontalScroll"/>
      <xsd:enumeration value="wave"/>
      <xsd:enumeration value="doubleWave"/>
      <xsd:enumeration value="plus"/>
      <xsd:enumeration value="flowChartProcess"/>
      <xsd:enumeration value="flowChartDecision"/>
      <xsd:enumeration value="flowChartInputOutput"/>
      <xsd:enumeration value="flowChartPredefinedProcess"/>
      <xsd:enumeration value="flowChartInternalStorage"/>
      <xsd:enumeration value="flowChartDocument"/>
      <xsd:enumeration value="flowChartMultidocument"/>
      <xsd:enumeration value="flowChartTerminator"/>
      <xsd:enumeration value="flowChartPreparation"/>
      <xsd:enumeration value="flowChartManualInput"/>
      <xsd:enumeration value="flowChartManualOperation"/>
      <xsd:enumeration value="flowChartConnector"/>
      <xsd:enumeration value="flowChartPunchedCard"/>
      <xsd:enumeration value="flowChartPunchedTape"/>
      <xsd:enumeration value="flowChartSummingJunction"/>
      <xsd:enumeration value="flowChartOr"/>
      <xsd:enumeration value="flowChartCollate"/>
      <xsd:enumeration value="flowChartSort"/>
      <xsd:enumeration value="flowChartExtract"/>
      <xsd:enumeration value="flowChartMerge"/>
      <xsd:enumeration value="flowChartOfflineStorage"/>
      <xsd:enumeration value="flowChartOnlineStorage"/>
      <xsd:enumeration value="flowChartMagneticTape"/>
      <xsd:enumeration value="flowChartMagneticDisk"/>
      <xsd:enumeration value="flowChartMagneticDrum"/>
      <xsd:enumeration value="flowChartDisplay"/>
      <xsd:enumeration value="flowChartDelay"/>
      <xsd:enumeration value="flowChartAlternateProcess"/>
      <xsd:enumeration value="flowChartOffpageConnector"/>
      <xsd:enumeration value="actionButtonBlank"/>
      <xsd:enumeration value="actionButtonHome"/>
      <xsd:enumeration value="actionButtonHelp"/>
      <xsd:enumeration value="actionButtonInformation"/>
      <xsd:enumeration value="actionButtonForwardNext"/>
      <xsd:enumeration value="actionButtonBackPrevious"/>
      <xsd:enumeration value="actionButtonEnd"/>
      <xsd:enumeration value="actionButtonBeginning"/>
      <xsd:enumeration value="actionButtonReturn"/>
      <xsd:enumeration value="actionButtonDocument"/>
      <xsd:enumeration value="actionButtonSound"/>
      <xsd:enumeration value="actionButtonMovie"/>
      <xsd:enumeration value="gear6"/>
      <xsd:enumeration value="gear9"/>
      <xsd:enumeration value="funnel"/>
      <xsd:enumeration value="mathPlus"/>
      <xsd:enumeration value="mathMinus"/>
      <xsd:enumeration value="mathMultiply"/>
      <xsd:enumeration value="mathDivide"/>
      <xsd:enumeration value="mathEqual"/>
      <xsd:enumeration value="mathNotEqual"/>
      <xsd:enumeration value="cornerTabs"/>
      <xsd:enumeration value="squareTabs"/>
      <xsd:enumeration value="plaqueTabs"/>
      <xsd:enumeration value="chartX"/>
      <xsd:enumeration value="chartStar"/>
      <xsd:enumeration value="chartPlus"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextShapeType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="textNoShape"/>
      <xsd:enumeration value="textPlain"/>
      <xsd:enumeration value="textStop"/>
      <xsd:enumeration value="textTriangle"/>
      <xsd:enumeration value="textTriangleInverted"/>
      <xsd:enumeration value="textChevron"/>
      <xsd:enumeration value="textChevronInverted"/>
      <xsd:enumeration value="textRingInside"/>
      <xsd:enumeration value="textRingOutside"/>
      <xsd:enumeration value="textArchUp"/>
      <xsd:enumeration value="textArchDown"/>
      <xsd:enumeration value="textCircle"/>
      <xsd:enumeration value="textButton"/>
      <xsd:enumeration value="textArchUpPour"/>
      <xsd:enumeration value="textArchDownPour"/>
      <xsd:enumeration value="textCirclePour"/>
      <xsd:enumeration value="textButtonPour"/>
      <xsd:enumeration value="textCurveUp"/>
      <xsd:enumeration value="textCurveDown"/>
      <xsd:enumeration value="textCanUp"/>
      <xsd:enumeration value="textCanDown"/>
      <xsd:enumeration value="textWave1"/>
      <xsd:enumeration value="textWave2"/>
      <xsd:enumeration value="textDoubleWave1"/>
      <xsd:enumeration value="textWave4"/>
      <xsd:enumeration value="textInflate"/>
      <xsd:enumeration value="textDeflate"/>
      <xsd:enumeration value="textInflateBottom"/>
      <xsd:enumeration value="textDeflateBottom"/>
      <xsd:enumeration value="textInflateTop"/>
      <xsd:enumeration value="textDeflateTop"/>
      <xsd:enumeration value="textDeflateInflate"/>
      <xsd:enumeration value="textDeflateInflateDeflate"/>
      <xsd:enumeration value="textFadeRight"/>
      <xsd:enumeration value="textFadeLeft"/>
      <xsd:enumeration value="textFadeUp"/>
      <xsd:enumeration value="textFadeDown"/>
      <xsd:enumeration value="textSlantUp"/>
      <xsd:enumeration value="textSlantDown"/>
      <xsd:enumeration value="textCascadeUp"/>
      <xsd:enumeration value="textCascadeDown"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_GeomGuideName">
    <xsd:restriction base="xsd:token"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_GeomGuideFormula">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_GeomGuide">
    <xsd:attribute name="name" type="ST_GeomGuideName" use="required"/>
    <xsd:attribute name="fmla" type="ST_GeomGuideFormula" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GeomGuideList">
    <xsd:sequence>
      <xsd:element name="gd" type="CT_GeomGuide" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_AdjCoordinate">
    <xsd:union memberTypes="ST_Coordinate ST_GeomGuideName"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AdjAngle">
    <xsd:union memberTypes="ST_Angle ST_GeomGuideName"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_AdjPoint2D">
    <xsd:attribute name="x" type="ST_AdjCoordinate" use="required"/>
    <xsd:attribute name="y" type="ST_AdjCoordinate" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GeomRect">
    <xsd:attribute name="l" type="ST_AdjCoordinate" use="required"/>
    <xsd:attribute name="t" type="ST_AdjCoordinate" use="required"/>
    <xsd:attribute name="r" type="ST_AdjCoordinate" use="required"/>
    <xsd:attribute name="b" type="ST_AdjCoordinate" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_XYAdjustHandle">
    <xsd:sequence>
      <xsd:element name="pos" type="CT_AdjPoint2D" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="gdRefX" type="ST_GeomGuideName" use="optional"/>
    <xsd:attribute name="minX" type="ST_AdjCoordinate" use="optional"/>
    <xsd:attribute name="maxX" type="ST_AdjCoordinate" use="optional"/>
    <xsd:attribute name="gdRefY" type="ST_GeomGuideName" use="optional"/>
    <xsd:attribute name="minY" type="ST_AdjCoordinate" use="optional"/>
    <xsd:attribute name="maxY" type="ST_AdjCoordinate" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PolarAdjustHandle">
    <xsd:sequence>
      <xsd:element name="pos" type="CT_AdjPoint2D" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="gdRefR" type="ST_GeomGuideName" use="optional"/>
    <xsd:attribute name="minR" type="ST_AdjCoordinate" use="optional"/>
    <xsd:attribute name="maxR" type="ST_AdjCoordinate" use="optional"/>
    <xsd:attribute name="gdRefAng" type="ST_GeomGuideName" use="optional"/>
    <xsd:attribute name="minAng" type="ST_AdjAngle" use="optional"/>
    <xsd:attribute name="maxAng" type="ST_AdjAngle" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ConnectionSite">
    <xsd:sequence>
      <xsd:element name="pos" type="CT_AdjPoint2D" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="ang" type="ST_AdjAngle" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AdjustHandleList">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="ahXY" type="CT_XYAdjustHandle" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="ahPolar" type="CT_PolarAdjustHandle" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_ConnectionSiteList">
    <xsd:sequence>
      <xsd:element name="cxn" type="CT_ConnectionSite" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Connection">
    <xsd:attribute name="id" type="ST_DrawingElementId" use="required"/>
    <xsd:attribute name="idx" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Path2DMoveTo">
    <xsd:sequence>
      <xsd:element name="pt" type="CT_AdjPoint2D" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Path2DLineTo">
    <xsd:sequence>
      <xsd:element name="pt" type="CT_AdjPoint2D" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Path2DArcTo">
    <xsd:attribute name="wR" type="ST_AdjCoordinate" use="required"/>
    <xsd:attribute name="hR" type="ST_AdjCoordinate" use="required"/>
    <xsd:attribute name="stAng" type="ST_AdjAngle" use="required"/>
    <xsd:attribute name="swAng" type="ST_AdjAngle" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Path2DQuadBezierTo">
    <xsd:sequence>
      <xsd:element name="pt" type="CT_AdjPoint2D" minOccurs="2" maxOccurs="2"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Path2DCubicBezierTo">
    <xsd:sequence>
      <xsd:element name="pt" type="CT_AdjPoint2D" minOccurs="3" maxOccurs="3"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Path2DClose"/>
  <xsd:simpleType name="ST_PathFillMode">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="norm"/>
      <xsd:enumeration value="lighten"/>
      <xsd:enumeration value="lightenLess"/>
      <xsd:enumeration value="darken"/>
      <xsd:enumeration value="darkenLess"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Path2D">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="close" type="CT_Path2DClose" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="moveTo" type="CT_Path2DMoveTo" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lnTo" type="CT_Path2DLineTo" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="arcTo" type="CT_Path2DArcTo" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="quadBezTo" type="CT_Path2DQuadBezierTo" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cubicBezTo" type="CT_Path2DCubicBezierTo" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
    <xsd:attribute name="w" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="h" type="ST_PositiveCoordinate" use="optional" default="0"/>
    <xsd:attribute name="fill" type="ST_PathFillMode" use="optional" default="norm"/>
    <xsd:attribute name="stroke" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="extrusionOk" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Path2DList">
    <xsd:sequence>
      <xsd:element name="path" type="CT_Path2D" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PresetGeometry2D">
    <xsd:sequence>
      <xsd:element name="avLst" type="CT_GeomGuideList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="prst" type="ST_ShapeType" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PresetTextShape">
    <xsd:sequence>
      <xsd:element name="avLst" type="CT_GeomGuideList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="prst" type="ST_TextShapeType" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomGeometry2D">
    <xsd:sequence>
      <xsd:element name="avLst" type="CT_GeomGuideList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="gdLst" type="CT_GeomGuideList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ahLst" type="CT_AdjustHandleList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cxnLst" type="CT_ConnectionSiteList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rect" type="CT_GeomRect" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pathLst" type="CT_Path2DList" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_Geometry">
    <xsd:choice>
      <xsd:element name="custGeom" type="CT_CustomGeometry2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="prstGeom" type="CT_PresetGeometry2D" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_TextGeometry">
    <xsd:choice>
      <xsd:element name="custGeom" type="CT_CustomGeometry2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="prstTxWarp" type="CT_PresetTextShape" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_LineEndType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="triangle"/>
      <xsd:enumeration value="stealth"/>
      <xsd:enumeration value="diamond"/>
      <xsd:enumeration value="oval"/>
      <xsd:enumeration value="arrow"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_LineEndWidth">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="sm"/>
      <xsd:enumeration value="med"/>
      <xsd:enumeration value="lg"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_LineEndLength">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="sm"/>
      <xsd:enumeration value="med"/>
      <xsd:enumeration value="lg"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LineEndProperties">
    <xsd:attribute name="type" type="ST_LineEndType" use="optional" default="none"/>
    <xsd:attribute name="w" type="ST_LineEndWidth" use="optional"/>
    <xsd:attribute name="len" type="ST_LineEndLength" use="optional"/>
  </xsd:complexType>
  <xsd:group name="EG_LineFillProperties">
    <xsd:choice>
      <xsd:element name="noFill" type="CT_NoFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="solidFill" type="CT_SolidColorFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gradFill" type="CT_GradientFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="pattFill" type="CT_PatternFillProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_LineJoinBevel"/>
  <xsd:complexType name="CT_LineJoinRound"/>
  <xsd:complexType name="CT_LineJoinMiterProperties">
    <xsd:attribute name="lim" type="ST_PositivePercentage" use="optional"/>
  </xsd:complexType>
  <xsd:group name="EG_LineJoinProperties">
    <xsd:choice>
      <xsd:element name="round" type="CT_LineJoinRound" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="bevel" type="CT_LineJoinBevel" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="miter" type="CT_LineJoinMiterProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_PresetLineDashVal">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="solid"/>
      <xsd:enumeration value="dot"/>
      <xsd:enumeration value="dash"/>
      <xsd:enumeration value="lgDash"/>
      <xsd:enumeration value="dashDot"/>
      <xsd:enumeration value="lgDashDot"/>
      <xsd:enumeration value="lgDashDotDot"/>
      <xsd:enumeration value="sysDash"/>
      <xsd:enumeration value="sysDot"/>
      <xsd:enumeration value="sysDashDot"/>
      <xsd:enumeration value="sysDashDotDot"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PresetLineDashProperties">
    <xsd:attribute name="val" type="ST_PresetLineDashVal" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DashStop">
    <xsd:attribute name="d" type="ST_PositivePercentage" use="required"/>
    <xsd:attribute name="sp" type="ST_PositivePercentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DashStopList">
    <xsd:sequence>
      <xsd:element name="ds" type="CT_DashStop" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_LineDashProperties">
    <xsd:choice>
      <xsd:element name="prstDash" type="CT_PresetLineDashProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="custDash" type="CT_DashStopList" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_LineCap">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="rnd"/>
      <xsd:enumeration value="sq"/>
      <xsd:enumeration value="flat"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_LineWidth">
    <xsd:restriction base="ST_Coordinate32Unqualified">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="20116800"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PenAlignment">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="in"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CompoundLine">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="sng"/>
      <xsd:enumeration value="dbl"/>
      <xsd:enumeration value="thickThin"/>
      <xsd:enumeration value="thinThick"/>
      <xsd:enumeration value="tri"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LineProperties">
    <xsd:sequence>
      <xsd:group ref="EG_LineFillProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_LineDashProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_LineJoinProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="headEnd" type="CT_LineEndProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tailEnd" type="CT_LineEndProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="w" type="ST_LineWidth" use="optional"/>
    <xsd:attribute name="cap" type="ST_LineCap" use="optional"/>
    <xsd:attribute name="cmpd" type="ST_CompoundLine" use="optional"/>
    <xsd:attribute name="algn" type="ST_PenAlignment" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ShapeID">
    <xsd:restriction base="xsd:token"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_ShapeProperties">
    <xsd:sequence>
      <xsd:element name="xfrm" type="CT_Transform2D" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_Geometry" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_FillProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ln" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_EffectProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="scene3d" type="CT_Scene3D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sp3d" type="CT_Shape3D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="bwMode" type="ST_BlackWhiteMode" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupShapeProperties">
    <xsd:sequence>
      <xsd:element name="xfrm" type="CT_GroupTransform2D" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_FillProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_EffectProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="scene3d" type="CT_Scene3D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="bwMode" type="ST_BlackWhiteMode" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_StyleMatrixReference">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="idx" type="ST_StyleMatrixColumnIndex" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FontReference">
    <xsd:sequence>
      <xsd:group ref="EG_ColorChoice" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="idx" type="ST_FontCollectionIndex" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ShapeStyle">
    <xsd:sequence>
      <xsd:element name="lnRef" type="CT_StyleMatrixReference" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="fillRef" type="CT_StyleMatrixReference" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="effectRef" type="CT_StyleMatrixReference" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="fontRef" type="CT_FontReference" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DefaultShapeDefinition">
    <xsd:sequence>
      <xsd:element name="spPr" type="CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="bodyPr" type="CT_TextBodyProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lstStyle" type="CT_TextListStyle" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ObjectStyleDefaults">
    <xsd:sequence>
      <xsd:element name="spDef" type="CT_DefaultShapeDefinition" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lnDef" type="CT_DefaultShapeDefinition" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txDef" type="CT_DefaultShapeDefinition" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_EmptyElement"/>
  <xsd:complexType name="CT_ColorMapping">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="bg1" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="tx1" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="bg2" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="tx2" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="accent1" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="accent2" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="accent3" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="accent4" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="accent5" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="accent6" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="hlink" type="ST_ColorSchemeIndex" use="required"/>
    <xsd:attribute name="folHlink" type="ST_ColorSchemeIndex" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorMappingOverride">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="masterClrMapping" type="CT_EmptyElement"/>
        <xsd:element name="overrideClrMapping" type="CT_ColorMapping"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorSchemeAndMapping">
    <xsd:sequence>
      <xsd:element name="clrScheme" type="CT_ColorScheme" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="clrMap" type="CT_ColorMapping" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorSchemeList">
    <xsd:sequence>
      <xsd:element name="extraClrScheme" type="CT_ColorSchemeAndMapping" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_OfficeStyleSheet">
    <xsd:sequence>
      <xsd:element name="themeElements" type="CT_BaseStyles" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="objectDefaults" type="CT_ObjectStyleDefaults" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extraClrSchemeLst" type="CT_ColorSchemeList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="custClrLst" type="CT_CustomColorList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_BaseStylesOverride">
    <xsd:sequence>
      <xsd:element name="clrScheme" type="CT_ColorScheme" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="fontScheme" type="CT_FontScheme" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="fmtScheme" type="CT_StyleMatrix" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ClipboardStyleSheet">
    <xsd:sequence>
      <xsd:element name="themeElements" type="CT_BaseStyles" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="clrMap" type="CT_ColorMapping" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="theme" type="CT_OfficeStyleSheet"/>
  <xsd:element name="themeOverride" type="CT_BaseStylesOverride"/>
  <xsd:element name="themeManager" type="CT_EmptyElement"/>
  <xsd:complexType name="CT_TableCellProperties">
    <xsd:sequence>
      <xsd:element name="lnL" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lnR" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lnT" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lnB" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lnTlToBr" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lnBlToTr" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cell3D" type="CT_Cell3D" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_FillProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="headers" type="CT_Headers" minOccurs="0"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="marL" type="ST_Coordinate32" use="optional" default="91440"/>
    <xsd:attribute name="marR" type="ST_Coordinate32" use="optional" default="91440"/>
    <xsd:attribute name="marT" type="ST_Coordinate32" use="optional" default="45720"/>
    <xsd:attribute name="marB" type="ST_Coordinate32" use="optional" default="45720"/>
    <xsd:attribute name="vert" type="ST_TextVerticalType" use="optional" default="horz"/>
    <xsd:attribute name="anchor" type="ST_TextAnchoringType" use="optional" default="t"/>
    <xsd:attribute name="anchorCtr" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="horzOverflow" type="ST_TextHorzOverflowType" use="optional" default="clip"
    />
  </xsd:complexType>
  <xsd:complexType name="CT_Headers">
    <xsd:sequence minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="header" type="xsd:string"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TableCol">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="w" type="ST_Coordinate" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableGrid">
    <xsd:sequence>
      <xsd:element name="gridCol" type="CT_TableCol" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TableCell">
    <xsd:sequence>
      <xsd:element name="txBody" type="CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tcPr" type="CT_TableCellProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="rowSpan" type="xsd:int" use="optional" default="1"/>
    <xsd:attribute name="gridSpan" type="xsd:int" use="optional" default="1"/>
    <xsd:attribute name="hMerge" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="vMerge" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="id" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableRow">
    <xsd:sequence>
      <xsd:element name="tc" type="CT_TableCell" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="h" type="ST_Coordinate" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableProperties">
    <xsd:sequence>
      <xsd:group ref="EG_FillProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_EffectProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:choice minOccurs="0" maxOccurs="1">
        <xsd:element name="tableStyle" type="CT_TableStyle"/>
        <xsd:element name="tableStyleId" type="s:ST_Guid"/>
      </xsd:choice>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="rtl" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="firstRow" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="firstCol" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="lastRow" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="lastCol" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="bandRow" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="bandCol" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Table">
    <xsd:sequence>
      <xsd:element name="tblPr" type="CT_TableProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblGrid" type="CT_TableGrid" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="tr" type="CT_TableRow" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="tbl" type="CT_Table"/>
  <xsd:complexType name="CT_Cell3D">
    <xsd:sequence>
      <xsd:element name="bevel" type="CT_Bevel" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lightRig" type="CT_LightRig" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="prstMaterial" type="ST_PresetMaterialType" use="optional" default="plastic"
    />
  </xsd:complexType>
  <xsd:group name="EG_ThemeableFillStyle">
    <xsd:choice>
      <xsd:element name="fill" type="CT_FillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="fillRef" type="CT_StyleMatrixReference" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_ThemeableLineStyle">
    <xsd:choice>
      <xsd:element name="ln" type="CT_LineProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lnRef" type="CT_StyleMatrixReference" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:group name="EG_ThemeableEffectStyle">
    <xsd:choice>
      <xsd:element name="effect" type="CT_EffectProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="effectRef" type="CT_StyleMatrixReference" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_ThemeableFontStyles">
    <xsd:choice>
      <xsd:element name="font" type="CT_FontCollection" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="fontRef" type="CT_FontReference" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_OnOffStyleType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="on"/>
      <xsd:enumeration value="off"/>
      <xsd:enumeration value="def"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TableStyleTextStyle">
    <xsd:sequence>
      <xsd:group ref="EG_ThemeableFontStyles" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_ColorChoice" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="b" type="ST_OnOffStyleType" use="optional" default="def"/>
    <xsd:attribute name="i" type="ST_OnOffStyleType" use="optional" default="def"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableCellBorderStyle">
    <xsd:sequence>
      <xsd:element name="left" type="CT_ThemeableLineStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="right" type="CT_ThemeableLineStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="top" type="CT_ThemeableLineStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bottom" type="CT_ThemeableLineStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="insideH" type="CT_ThemeableLineStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="insideV" type="CT_ThemeableLineStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tl2br" type="CT_ThemeableLineStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tr2bl" type="CT_ThemeableLineStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TableBackgroundStyle">
    <xsd:sequence>
      <xsd:group ref="EG_ThemeableFillStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_ThemeableEffectStyle" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TableStyleCellStyle">
    <xsd:sequence>
      <xsd:element name="tcBdr" type="CT_TableCellBorderStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_ThemeableFillStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cell3D" type="CT_Cell3D" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TablePartStyle">
    <xsd:sequence>
      <xsd:element name="tcTxStyle" type="CT_TableStyleTextStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tcStyle" type="CT_TableStyleCellStyle" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TableStyle">
    <xsd:sequence>
      <xsd:element name="tblBg" type="CT_TableBackgroundStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="wholeTbl" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="band1H" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="band2H" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="band1V" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="band2V" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lastCol" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="firstCol" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lastRow" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="seCell" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="swCell" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="firstRow" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="neCell" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="nwCell" type="CT_TablePartStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="styleId" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="styleName" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableStyleList">
    <xsd:sequence>
      <xsd:element name="tblStyle" type="CT_TableStyle" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="def" type="s:ST_Guid" use="required"/>
  </xsd:complexType>
  <xsd:element name="tblStyleLst" type="CT_TableStyleList"/>
  <xsd:complexType name="CT_TextParagraph">
    <xsd:sequence>
      <xsd:element name="pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_TextRun" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="endParaRPr" type="CT_TextCharacterProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextAnchoringType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="just"/>
      <xsd:enumeration value="dist"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextVertOverflowType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="overflow"/>
      <xsd:enumeration value="ellipsis"/>
      <xsd:enumeration value="clip"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextHorzOverflowType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="overflow"/>
      <xsd:enumeration value="clip"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextVerticalType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="horz"/>
      <xsd:enumeration value="vert"/>
      <xsd:enumeration value="vert270"/>
      <xsd:enumeration value="wordArtVert"/>
      <xsd:enumeration value="eaVert"/>
      <xsd:enumeration value="mongolianVert"/>
      <xsd:enumeration value="wordArtVertRtl"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextWrappingType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="square"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextColumnCount">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="1"/>
      <xsd:maxInclusive value="16"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextListStyle">
    <xsd:sequence>
      <xsd:element name="defPPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl1pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl2pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl3pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl4pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl5pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl6pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl7pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl8pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="lvl9pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextFontScalePercentOrPercentString">
    <xsd:union memberTypes="ST_TextFontScalePercent s:ST_Percentage"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextFontScalePercent">
    <xsd:restriction base="ST_PercentageDecimal">
      <xsd:minInclusive value="1000"/>
      <xsd:maxInclusive value="100000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextNormalAutofit">
    <xsd:attribute name="fontScale" type="ST_TextFontScalePercentOrPercentString" use="optional"
      default="100%"/>
    <xsd:attribute name="lnSpcReduction" type="ST_TextSpacingPercentOrPercentString" use="optional"
      default="0%"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextShapeAutofit"/>
  <xsd:complexType name="CT_TextNoAutofit"/>
  <xsd:group name="EG_TextAutofit">
    <xsd:choice>
      <xsd:element name="noAutofit" type="CT_TextNoAutofit"/>
      <xsd:element name="normAutofit" type="CT_TextNormalAutofit"/>
      <xsd:element name="spAutoFit" type="CT_TextShapeAutofit"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_TextBodyProperties">
    <xsd:sequence>
      <xsd:element name="prstTxWarp" type="CT_PresetTextShape" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_TextAutofit" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="scene3d" type="CT_Scene3D" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_Text3D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="rot" type="ST_Angle" use="optional"/>
    <xsd:attribute name="spcFirstLastPara" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="vertOverflow" type="ST_TextVertOverflowType" use="optional"/>
    <xsd:attribute name="horzOverflow" type="ST_TextHorzOverflowType" use="optional"/>
    <xsd:attribute name="vert" type="ST_TextVerticalType" use="optional"/>
    <xsd:attribute name="wrap" type="ST_TextWrappingType" use="optional"/>
    <xsd:attribute name="lIns" type="ST_Coordinate32" use="optional"/>
    <xsd:attribute name="tIns" type="ST_Coordinate32" use="optional"/>
    <xsd:attribute name="rIns" type="ST_Coordinate32" use="optional"/>
    <xsd:attribute name="bIns" type="ST_Coordinate32" use="optional"/>
    <xsd:attribute name="numCol" type="ST_TextColumnCount" use="optional"/>
    <xsd:attribute name="spcCol" type="ST_PositiveCoordinate32" use="optional"/>
    <xsd:attribute name="rtlCol" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="fromWordArt" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="anchor" type="ST_TextAnchoringType" use="optional"/>
    <xsd:attribute name="anchorCtr" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="forceAA" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="upright" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="compatLnSpc" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextBody">
    <xsd:sequence>
      <xsd:element name="bodyPr" type="CT_TextBodyProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lstStyle" type="CT_TextListStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="p" type="CT_TextParagraph" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextBulletStartAtNum">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="1"/>
      <xsd:maxInclusive value="32767"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextAutonumberScheme">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="alphaLcParenBoth"/>
      <xsd:enumeration value="alphaUcParenBoth"/>
      <xsd:enumeration value="alphaLcParenR"/>
      <xsd:enumeration value="alphaUcParenR"/>
      <xsd:enumeration value="alphaLcPeriod"/>
      <xsd:enumeration value="alphaUcPeriod"/>
      <xsd:enumeration value="arabicParenBoth"/>
      <xsd:enumeration value="arabicParenR"/>
      <xsd:enumeration value="arabicPeriod"/>
      <xsd:enumeration value="arabicPlain"/>
      <xsd:enumeration value="romanLcParenBoth"/>
      <xsd:enumeration value="romanUcParenBoth"/>
      <xsd:enumeration value="romanLcParenR"/>
      <xsd:enumeration value="romanUcParenR"/>
      <xsd:enumeration value="romanLcPeriod"/>
      <xsd:enumeration value="romanUcPeriod"/>
      <xsd:enumeration value="circleNumDbPlain"/>
      <xsd:enumeration value="circleNumWdBlackPlain"/>
      <xsd:enumeration value="circleNumWdWhitePlain"/>
      <xsd:enumeration value="arabicDbPeriod"/>
      <xsd:enumeration value="arabicDbPlain"/>
      <xsd:enumeration value="ea1ChsPeriod"/>
      <xsd:enumeration value="ea1ChsPlain"/>
      <xsd:enumeration value="ea1ChtPeriod"/>
      <xsd:enumeration value="ea1ChtPlain"/>
      <xsd:enumeration value="ea1JpnChsDbPeriod"/>
      <xsd:enumeration value="ea1JpnKorPlain"/>
      <xsd:enumeration value="ea1JpnKorPeriod"/>
      <xsd:enumeration value="arabic1Minus"/>
      <xsd:enumeration value="arabic2Minus"/>
      <xsd:enumeration value="hebrew2Minus"/>
      <xsd:enumeration value="thaiAlphaPeriod"/>
      <xsd:enumeration value="thaiAlphaParenR"/>
      <xsd:enumeration value="thaiAlphaParenBoth"/>
      <xsd:enumeration value="thaiNumPeriod"/>
      <xsd:enumeration value="thaiNumParenR"/>
      <xsd:enumeration value="thaiNumParenBoth"/>
      <xsd:enumeration value="hindiAlphaPeriod"/>
      <xsd:enumeration value="hindiNumPeriod"/>
      <xsd:enumeration value="hindiNumParenR"/>
      <xsd:enumeration value="hindiAlpha1Period"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextBulletColorFollowText"/>
  <xsd:group name="EG_TextBulletColor">
    <xsd:choice>
      <xsd:element name="buClrTx" type="CT_TextBulletColorFollowText" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="buClr" type="CT_Color" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_TextBulletSize">
    <xsd:union memberTypes="ST_TextBulletSizePercent ST_TextBulletSizeDecimal"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextBulletSizePercent">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="0*((2[5-9])|([3-9][0-9])|([1-3][0-9][0-9])|400)%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextBulletSizeDecimal">
    <xsd:restriction base="ST_PercentageDecimal">
      <xsd:minInclusive value="25000"/>
      <xsd:maxInclusive value="400000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextBulletSizeFollowText"/>
  <xsd:complexType name="CT_TextBulletSizePercent">
    <xsd:attribute name="val" type="ST_TextBulletSizePercent" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextBulletSizePoint">
    <xsd:attribute name="val" type="ST_TextFontSize" use="required"/>
  </xsd:complexType>
  <xsd:group name="EG_TextBulletSize">
    <xsd:choice>
      <xsd:element name="buSzTx" type="CT_TextBulletSizeFollowText"/>
      <xsd:element name="buSzPct" type="CT_TextBulletSizePercent"/>
      <xsd:element name="buSzPts" type="CT_TextBulletSizePoint"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_TextBulletTypefaceFollowText"/>
  <xsd:group name="EG_TextBulletTypeface">
    <xsd:choice>
      <xsd:element name="buFontTx" type="CT_TextBulletTypefaceFollowText"/>
      <xsd:element name="buFont" type="CT_TextFont"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_TextAutonumberBullet">
    <xsd:attribute name="type" type="ST_TextAutonumberScheme" use="required"/>
    <xsd:attribute name="startAt" type="ST_TextBulletStartAtNum" use="optional" default="1"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextCharBullet">
    <xsd:attribute name="char" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextBlipBullet">
    <xsd:sequence>
      <xsd:element name="blip" type="CT_Blip" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TextNoBullet"/>
  <xsd:group name="EG_TextBullet">
    <xsd:choice>
      <xsd:element name="buNone" type="CT_TextNoBullet"/>
      <xsd:element name="buAutoNum" type="CT_TextAutonumberBullet"/>
      <xsd:element name="buChar" type="CT_TextCharBullet"/>
      <xsd:element name="buBlip" type="CT_TextBlipBullet"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_TextPoint">
    <xsd:union memberTypes="ST_TextPointUnqualified s:ST_UniversalMeasure"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextPointUnqualified">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="-400000"/>
      <xsd:maxInclusive value="400000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextNonNegativePoint">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="400000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextFontSize">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="100"/>
      <xsd:maxInclusive value="400000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextTypeface">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PitchFamily">
   <xsd:restriction base="xsd:byte">
     <xsd:enumeration value="00"/>
     <xsd:enumeration value="01"/>
     <xsd:enumeration value="02"/>
     <xsd:enumeration value="16"/>
     <xsd:enumeration value="17"/>
     <xsd:enumeration value="18"/>
     <xsd:enumeration value="32"/>
     <xsd:enumeration value="33"/>
     <xsd:enumeration value="34"/>
     <xsd:enumeration value="48"/>
     <xsd:enumeration value="49"/>
     <xsd:enumeration value="50"/>
     <xsd:enumeration value="64"/>
     <xsd:enumeration value="65"/>
     <xsd:enumeration value="66"/>
     <xsd:enumeration value="80"/>
     <xsd:enumeration value="81"/>
     <xsd:enumeration value="82"/>
   </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_TextFont">
    <xsd:attribute name="typeface" type="ST_TextTypeface" use="required"/>
    <xsd:attribute name="panose" type="s:ST_Panose" use="optional"/>
    <xsd:attribute name="pitchFamily" type="ST_PitchFamily" use="optional" default="0"/>
    <xsd:attribute name="charset" type="xsd:byte" use="optional" default="1"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextUnderlineType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="words"/>
      <xsd:enumeration value="sng"/>
      <xsd:enumeration value="dbl"/>
      <xsd:enumeration value="heavy"/>
      <xsd:enumeration value="dotted"/>
      <xsd:enumeration value="dottedHeavy"/>
      <xsd:enumeration value="dash"/>
      <xsd:enumeration value="dashHeavy"/>
      <xsd:enumeration value="dashLong"/>
      <xsd:enumeration value="dashLongHeavy"/>
      <xsd:enumeration value="dotDash"/>
      <xsd:enumeration value="dotDashHeavy"/>
      <xsd:enumeration value="dotDotDash"/>
      <xsd:enumeration value="dotDotDashHeavy"/>
      <xsd:enumeration value="wavy"/>
      <xsd:enumeration value="wavyHeavy"/>
      <xsd:enumeration value="wavyDbl"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextUnderlineLineFollowText"/>
  <xsd:complexType name="CT_TextUnderlineFillFollowText"/>
  <xsd:complexType name="CT_TextUnderlineFillGroupWrapper">
    <xsd:group ref="EG_FillProperties" minOccurs="1" maxOccurs="1"/>
  </xsd:complexType>
  <xsd:group name="EG_TextUnderlineLine">
    <xsd:choice>
      <xsd:element name="uLnTx" type="CT_TextUnderlineLineFollowText"/>
      <xsd:element name="uLn" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_TextUnderlineFill">
    <xsd:choice>
      <xsd:element name="uFillTx" type="CT_TextUnderlineFillFollowText"/>
      <xsd:element name="uFill" type="CT_TextUnderlineFillGroupWrapper"/>
    </xsd:choice>
  </xsd:group>
  <xsd:simpleType name="ST_TextStrikeType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="noStrike"/>
      <xsd:enumeration value="sngStrike"/>
      <xsd:enumeration value="dblStrike"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextCapsType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="small"/>
      <xsd:enumeration value="all"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextCharacterProperties">
    <xsd:sequence>
      <xsd:element name="ln" type="CT_LineProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_FillProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_EffectProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="highlight" type="CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_TextUnderlineLine" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_TextUnderlineFill" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="latin" type="CT_TextFont" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ea" type="CT_TextFont" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cs" type="CT_TextFont" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sym" type="CT_TextFont" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hlinkClick" type="CT_Hyperlink" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hlinkMouseOver" type="CT_Hyperlink" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rtl" type="CT_Boolean" minOccurs="0"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="kumimoji" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="lang" type="s:ST_Lang" use="optional"/>
    <xsd:attribute name="altLang" type="s:ST_Lang" use="optional"/>
    <xsd:attribute name="sz" type="ST_TextFontSize" use="optional"/>
    <xsd:attribute name="b" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="i" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="u" type="ST_TextUnderlineType" use="optional"/>
    <xsd:attribute name="strike" type="ST_TextStrikeType" use="optional"/>
    <xsd:attribute name="kern" type="ST_TextNonNegativePoint" use="optional"/>
    <xsd:attribute name="cap" type="ST_TextCapsType" use="optional" default="none"/>
    <xsd:attribute name="spc" type="ST_TextPoint" use="optional"/>
    <xsd:attribute name="normalizeH" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="baseline" type="ST_Percentage" use="optional"/>
    <xsd:attribute name="noProof" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="dirty" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="err" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="smtClean" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="smtId" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="bmk" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Boolean">
    <xsd:attribute name="val" type="s:ST_OnOff" default="0"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextSpacingPoint">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="158400"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextSpacingPercentOrPercentString">
    <xsd:union memberTypes="ST_TextSpacingPercent s:ST_Percentage"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextSpacingPercent">
    <xsd:restriction base="ST_PercentageDecimal">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="13200000"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextSpacingPercent">
    <xsd:attribute name="val" type="ST_TextSpacingPercentOrPercentString" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextSpacingPoint">
    <xsd:attribute name="val" type="ST_TextSpacingPoint" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextMargin">
    <xsd:restriction base="ST_Coordinate32Unqualified">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="51206400"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextIndent">
    <xsd:restriction base="ST_Coordinate32Unqualified">
      <xsd:minInclusive value="-51206400"/>
      <xsd:maxInclusive value="51206400"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextTabAlignType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="dec"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextTabStop">
    <xsd:attribute name="pos" type="ST_Coordinate32" use="optional"/>
    <xsd:attribute name="algn" type="ST_TextTabAlignType" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextTabStopList">
    <xsd:sequence>
      <xsd:element name="tab" type="CT_TextTabStop" minOccurs="0" maxOccurs="32"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TextLineBreak">
    <xsd:sequence>
      <xsd:element name="rPr" type="CT_TextCharacterProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TextSpacing">
    <xsd:choice>
      <xsd:element name="spcPct" type="CT_TextSpacingPercent"/>
      <xsd:element name="spcPts" type="CT_TextSpacingPoint"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextAlignType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="just"/>
      <xsd:enumeration value="justLow"/>
      <xsd:enumeration value="dist"/>
      <xsd:enumeration value="thaiDist"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextFontAlignType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="ctr"/>
      <xsd:enumeration value="base"/>
      <xsd:enumeration value="b"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextIndentLevelType">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="8"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextParagraphProperties">
    <xsd:sequence>
      <xsd:element name="lnSpc" type="CT_TextSpacing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spcBef" type="CT_TextSpacing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spcAft" type="CT_TextSpacing" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_TextBulletColor" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_TextBulletSize" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_TextBulletTypeface" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_TextBullet" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tabLst" type="CT_TextTabStopList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="defRPr" type="CT_TextCharacterProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="marL" type="ST_TextMargin" use="optional"/>
    <xsd:attribute name="marR" type="ST_TextMargin" use="optional"/>
    <xsd:attribute name="lvl" type="ST_TextIndentLevelType" use="optional"/>
    <xsd:attribute name="indent" type="ST_TextIndent" use="optional"/>
    <xsd:attribute name="algn" type="ST_TextAlignType" use="optional"/>
    <xsd:attribute name="defTabSz" type="ST_Coordinate32" use="optional"/>
    <xsd:attribute name="rtl" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="eaLnBrk" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="fontAlgn" type="ST_TextFontAlignType" use="optional"/>
    <xsd:attribute name="latinLnBrk" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="hangingPunct" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextField">
    <xsd:sequence>
      <xsd:element name="rPr" type="CT_TextCharacterProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pPr" type="CT_TextParagraphProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="t" type="xsd:string" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="type" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:group name="EG_TextRun">
    <xsd:choice>
      <xsd:element name="r" type="CT_RegularTextRun"/>
      <xsd:element name="br" type="CT_TextLineBreak"/>
      <xsd:element name="fld" type="CT_TextField"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_RegularTextRun">
    <xsd:sequence>
      <xsd:element name="rPr" type="CT_TextCharacterProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="t" type="xsd:string" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/drawingml/2006/picture"
  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" elementFormDefault="qualified"
  targetNamespace="http://schemas.openxmlformats.org/drawingml/2006/picture">
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main"
    schemaLocation="dml-main.xsd"/>
  <xsd:complexType name="CT_PictureNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvPicPr" type="a:CT_NonVisualPictureProperties" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Picture">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="nvPicPr" type="CT_PictureNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blipFill" type="a:CT_BlipFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="pic" type="CT_Picture"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
  xmlns="http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  targetNamespace="http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"
  elementFormDefault="qualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main"
    schemaLocation="dml-main.xsd"/>
  <xsd:import schemaLocation="shared-relationshipReference.xsd"
    namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"/>
  <xsd:element name="from" type="CT_Marker"/>
  <xsd:element name="to" type="CT_Marker"/>
  <xsd:complexType name="CT_AnchorClientData">
    <xsd:attribute name="fLocksWithSheet" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="fPrintsWithSheet" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ShapeNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvSpPr" type="a:CT_NonVisualDrawingShapeProps" minOccurs="1" maxOccurs="1"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Shape">
    <xsd:sequence>
      <xsd:element name="nvSpPr" type="CT_ShapeNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txBody" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="macro" type="xsd:string" use="optional"/>
    <xsd:attribute name="textlink" type="xsd:string" use="optional"/>
    <xsd:attribute name="fLocksText" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="fPublished" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ConnectorNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvCxnSpPr" type="a:CT_NonVisualConnectorProperties" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Connector">
    <xsd:sequence>
      <xsd:element name="nvCxnSpPr" type="CT_ConnectorNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="macro" type="xsd:string" use="optional"/>
    <xsd:attribute name="fPublished" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PictureNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvPicPr" type="a:CT_NonVisualPictureProperties" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Picture">
    <xsd:sequence>
      <xsd:element name="nvPicPr" type="CT_PictureNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blipFill" type="a:CT_BlipFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="macro" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="fPublished" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicalObjectFrameNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGraphicFramePr" type="a:CT_NonVisualGraphicFrameProperties"
        minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicalObjectFrame">
    <xsd:sequence>
      <xsd:element name="nvGraphicFramePr" type="CT_GraphicalObjectFrameNonVisual" minOccurs="1"
        maxOccurs="1"/>
      <xsd:element name="xfrm" type="a:CT_Transform2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element ref="a:graphic" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="macro" type="xsd:string" use="optional"/>
    <xsd:attribute name="fPublished" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupShapeNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGrpSpPr" type="a:CT_NonVisualGroupDrawingShapeProps" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupShape">
    <xsd:sequence>
      <xsd:element name="nvGrpSpPr" type="CT_GroupShapeNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="grpSpPr" type="a:CT_GroupShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element name="sp" type="CT_Shape"/>
        <xsd:element name="grpSp" type="CT_GroupShape"/>
        <xsd:element name="graphicFrame" type="CT_GraphicalObjectFrame"/>
        <xsd:element name="cxnSp" type="CT_Connector"/>
        <xsd:element name="pic" type="CT_Picture"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_ObjectChoices">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="sp" type="CT_Shape"/>
        <xsd:element name="grpSp" type="CT_GroupShape"/>
        <xsd:element name="graphicFrame" type="CT_GraphicalObjectFrame"/>
        <xsd:element name="cxnSp" type="CT_Connector"/>
        <xsd:element name="pic" type="CT_Picture"/>
        <xsd:element name="contentPart" type="CT_Rel"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_Rel">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ColID">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="0"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_RowID">
    <xsd:restriction base="xsd:int">
      <xsd:minInclusive value="0"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Marker">
    <xsd:sequence>
      <xsd:element name="col" type="ST_ColID"/>
      <xsd:element name="colOff" type="a:ST_Coordinate"/>
      <xsd:element name="row" type="ST_RowID"/>
      <xsd:element name="rowOff" type="a:ST_Coordinate"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_EditAs">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="twoCell"/>
      <xsd:enumeration value="oneCell"/>
      <xsd:enumeration value="absolute"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TwoCellAnchor">
    <xsd:sequence>
      <xsd:element name="from" type="CT_Marker"/>
      <xsd:element name="to" type="CT_Marker"/>
      <xsd:group ref="EG_ObjectChoices"/>
      <xsd:element name="clientData" type="CT_AnchorClientData" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="editAs" type="ST_EditAs" use="optional" default="twoCell"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OneCellAnchor">
    <xsd:sequence>
      <xsd:element name="from" type="CT_Marker"/>
      <xsd:element name="ext" type="a:CT_PositiveSize2D"/>
      <xsd:group ref="EG_ObjectChoices"/>
      <xsd:element name="clientData" type="CT_AnchorClientData" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_AbsoluteAnchor">
    <xsd:sequence>
      <xsd:element name="pos" type="a:CT_Point2D"/>
      <xsd:element name="ext" type="a:CT_PositiveSize2D"/>
      <xsd:group ref="EG_ObjectChoices"/>
      <xsd:element name="clientData" type="CT_AnchorClientData" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_Anchor">
    <xsd:choice>
      <xsd:element name="twoCellAnchor" type="CT_TwoCellAnchor"/>
      <xsd:element name="oneCellAnchor" type="CT_OneCellAnchor"/>
      <xsd:element name="absoluteAnchor" type="CT_AbsoluteAnchor"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_Drawing">
    <xsd:sequence>
      <xsd:group ref="EG_Anchor" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="wsDr" type="CT_Drawing"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
  xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
  xmlns:dpct="http://schemas.openxmlformats.org/drawingml/2006/picture"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
  targetNamespace="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
  elementFormDefault="qualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main"
    schemaLocation="dml-main.xsd"/>
  <xsd:import schemaLocation="wml.xsd"
    namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/picture"
    schemaLocation="dml-picture.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    schemaLocation="shared-relationshipReference.xsd"/>
  <xsd:complexType name="CT_EffectExtent">
    <xsd:attribute name="l" type="a:ST_Coordinate" use="required"/>
    <xsd:attribute name="t" type="a:ST_Coordinate" use="required"/>
    <xsd:attribute name="r" type="a:ST_Coordinate" use="required"/>
    <xsd:attribute name="b" type="a:ST_Coordinate" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_WrapDistance">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_Inline">
    <xsd:sequence>
      <xsd:element name="extent" type="a:CT_PositiveSize2D"/>
      <xsd:element name="effectExtent" type="CT_EffectExtent" minOccurs="0"/>
      <xsd:element name="docPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGraphicFramePr" type="a:CT_NonVisualGraphicFrameProperties"
        minOccurs="0" maxOccurs="1"/>
      <xsd:element ref="a:graphic" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="distT" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distB" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distL" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distR" type="ST_WrapDistance" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_WrapText">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="bothSides"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="largest"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_WrapPath">
    <xsd:sequence>
      <xsd:element name="start" type="a:CT_Point2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="lineTo" type="a:CT_Point2D" minOccurs="2" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="edited" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WrapNone"/>
  <xsd:complexType name="CT_WrapSquare">
    <xsd:sequence>
      <xsd:element name="effectExtent" type="CT_EffectExtent" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="wrapText" type="ST_WrapText" use="required"/>
    <xsd:attribute name="distT" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distB" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distL" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distR" type="ST_WrapDistance" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WrapTight">
    <xsd:sequence>
      <xsd:element name="wrapPolygon" type="CT_WrapPath" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="wrapText" type="ST_WrapText" use="required"/>
    <xsd:attribute name="distL" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distR" type="ST_WrapDistance" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WrapThrough">
    <xsd:sequence>
      <xsd:element name="wrapPolygon" type="CT_WrapPath" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="wrapText" type="ST_WrapText" use="required"/>
    <xsd:attribute name="distL" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distR" type="ST_WrapDistance" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WrapTopBottom">
    <xsd:sequence>
      <xsd:element name="effectExtent" type="CT_EffectExtent" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="distT" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distB" type="ST_WrapDistance" use="optional"/>
  </xsd:complexType>
  <xsd:group name="EG_WrapType">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="wrapNone" type="CT_WrapNone" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="wrapSquare" type="CT_WrapSquare" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="wrapTight" type="CT_WrapTight" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="wrapThrough" type="CT_WrapThrough" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="wrapTopAndBottom" type="CT_WrapTopBottom" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:group>
  <xsd:simpleType name="ST_PositionOffset">
    <xsd:restriction base="xsd:int"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AlignH">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="inside"/>
      <xsd:enumeration value="outside"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_RelFromH">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="margin"/>
      <xsd:enumeration value="page"/>
      <xsd:enumeration value="column"/>
      <xsd:enumeration value="character"/>
      <xsd:enumeration value="leftMargin"/>
      <xsd:enumeration value="rightMargin"/>
      <xsd:enumeration value="insideMargin"/>
      <xsd:enumeration value="outsideMargin"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PosH">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="align" type="ST_AlignH" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="posOffset" type="ST_PositionOffset" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
    </xsd:sequence>
    <xsd:attribute name="relativeFrom" type="ST_RelFromH" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_AlignV">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="top"/>
      <xsd:enumeration value="bottom"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="inside"/>
      <xsd:enumeration value="outside"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_RelFromV">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="margin"/>
      <xsd:enumeration value="page"/>
      <xsd:enumeration value="paragraph"/>
      <xsd:enumeration value="line"/>
      <xsd:enumeration value="topMargin"/>
      <xsd:enumeration value="bottomMargin"/>
      <xsd:enumeration value="insideMargin"/>
      <xsd:enumeration value="outsideMargin"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PosV">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="align" type="ST_AlignV" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="posOffset" type="ST_PositionOffset" minOccurs="1" maxOccurs="1"/>
      </xsd:choice>
    </xsd:sequence>
    <xsd:attribute name="relativeFrom" type="ST_RelFromV" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Anchor">
    <xsd:sequence>
      <xsd:element name="simplePos" type="a:CT_Point2D"/>
      <xsd:element name="positionH" type="CT_PosH"/>
      <xsd:element name="positionV" type="CT_PosV"/>
      <xsd:element name="extent" type="a:CT_PositiveSize2D"/>
      <xsd:element name="effectExtent" type="CT_EffectExtent" minOccurs="0"/>
      <xsd:group ref="EG_WrapType"/>
      <xsd:element name="docPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGraphicFramePr" type="a:CT_NonVisualGraphicFrameProperties"
        minOccurs="0" maxOccurs="1"/>
      <xsd:element ref="a:graphic" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="distT" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distB" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distL" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="distR" type="ST_WrapDistance" use="optional"/>
    <xsd:attribute name="simplePos" type="xsd:boolean"/>
    <xsd:attribute name="relativeHeight" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="behindDoc" type="xsd:boolean" use="required"/>
    <xsd:attribute name="locked" type="xsd:boolean" use="required"/>
    <xsd:attribute name="layoutInCell" type="xsd:boolean" use="required"/>
    <xsd:attribute name="hidden" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="allowOverlap" type="xsd:boolean" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TxbxContent">
    <xsd:group ref="w:EG_BlockLevelElts" minOccurs="1" maxOccurs="unbounded"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextboxInfo">
    <xsd:sequence>
      <xsd:element name="txbxContent" type="CT_TxbxContent" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="xsd:unsignedShort" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LinkedTextboxInformation">
    <xsd:sequence>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="xsd:unsignedShort" use="required"/>
    <xsd:attribute name="seq" type="xsd:unsignedShort" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WordprocessingShape">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="0" maxOccurs="1"/>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="cNvSpPr" type="a:CT_NonVisualDrawingShapeProps" minOccurs="1"
          maxOccurs="1"/>
        <xsd:element name="cNvCnPr" type="a:CT_NonVisualConnectorProperties" minOccurs="1"
          maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
      <xsd:choice minOccurs="0" maxOccurs="1">
        <xsd:element name="txbx" type="CT_TextboxInfo" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="linkedTxbx" type="CT_LinkedTextboxInformation" minOccurs="1"
          maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="bodyPr" type="a:CT_TextBodyProperties" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="normalEastAsianFlow" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicFrame">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvFrPr" type="a:CT_NonVisualGraphicFrameProperties" minOccurs="1"
        maxOccurs="1"/>
      <xsd:element name="xfrm" type="a:CT_Transform2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element ref="a:graphic" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_WordprocessingContentPartNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cNvContentPartPr" type="a:CT_NonVisualContentPartProperties" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_WordprocessingContentPart">
    <xsd:sequence>
      <xsd:element name="nvContentPartPr" type="CT_WordprocessingContentPartNonVisual" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="xfrm" type="a:CT_Transform2D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="bwMode" type="a:ST_BlackWhiteMode" use="optional"/>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WordprocessingGroup">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cNvGrpSpPr" type="a:CT_NonVisualGroupDrawingShapeProps" minOccurs="1"
        maxOccurs="1"/>
      <xsd:element name="grpSpPr" type="a:CT_GroupShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element ref="wsp"/>
        <xsd:element name="grpSp" type="CT_WordprocessingGroup"/>
        <xsd:element name="graphicFrame" type="CT_GraphicFrame"/>
        <xsd:element ref="dpct:pic"/>
        <xsd:element name="contentPart" type="CT_WordprocessingContentPart"/>
      </xsd:choice>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_WordprocessingCanvas">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="bg" type="a:CT_BackgroundFormatting" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="whole" type="a:CT_WholeE2oFormatting" minOccurs="0" maxOccurs="1"/>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element ref="wsp"/>
        <xsd:element ref="dpct:pic"/>
        <xsd:element name="contentPart" type="CT_WordprocessingContentPart"/>
        <xsd:element ref="wgp"/>
        <xsd:element name="graphicFrame" type="CT_GraphicFrame"/>
      </xsd:choice>
      <xsd:element name="extLst" type="a:CT_OfficeArtExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="wpc" type="CT_WordprocessingCanvas"/>
  <xsd:element name="wgp" type="CT_WordprocessingGroup"/>
  <xsd:element name="wsp" type="CT_WordprocessingShape"/>
  <xsd:element name="inline" type="CT_Inline"/>
  <xsd:element name="anchor" type="CT_Anchor"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/presentationml/2006/main"
  xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  elementFormDefault="qualified"
  targetNamespace="http://schemas.openxmlformats.org/presentationml/2006/main">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    schemaLocation="shared-relationshipReference.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main"
    schemaLocation="dml-main.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:simpleType name="ST_TransitionSideDirectionType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="l"/>
      <xsd:enumeration value="u"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="d"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TransitionCornerDirectionType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="lu"/>
      <xsd:enumeration value="ru"/>
      <xsd:enumeration value="ld"/>
      <xsd:enumeration value="rd"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TransitionInOutDirectionType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="out"/>
      <xsd:enumeration value="in"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SideDirectionTransition">
    <xsd:attribute name="dir" type="ST_TransitionSideDirectionType" use="optional" default="l"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CornerDirectionTransition">
    <xsd:attribute name="dir" type="ST_TransitionCornerDirectionType" use="optional" default="lu"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TransitionEightDirectionType">
    <xsd:union memberTypes="ST_TransitionSideDirectionType ST_TransitionCornerDirectionType"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_EightDirectionTransition">
    <xsd:attribute name="dir" type="ST_TransitionEightDirectionType" use="optional" default="l"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OrientationTransition">
    <xsd:attribute name="dir" type="ST_Direction" use="optional" default="horz"/>
  </xsd:complexType>
  <xsd:complexType name="CT_InOutTransition">
    <xsd:attribute name="dir" type="ST_TransitionInOutDirectionType" use="optional" default="out"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OptionalBlackTransition">
    <xsd:attribute name="thruBlk" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SplitTransition">
    <xsd:attribute name="orient" type="ST_Direction" use="optional" default="horz"/>
    <xsd:attribute name="dir" type="ST_TransitionInOutDirectionType" use="optional" default="out"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WheelTransition">
    <xsd:attribute name="spokes" type="xsd:unsignedInt" use="optional" default="4"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TransitionStartSoundAction">
    <xsd:sequence>
      <xsd:element minOccurs="1" maxOccurs="1" name="snd" type="a:CT_EmbeddedWAVAudioFile"/>
    </xsd:sequence>
    <xsd:attribute name="loop" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TransitionSoundAction">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element name="stSnd" type="CT_TransitionStartSoundAction"/>
      <xsd:element name="endSnd" type="CT_Empty"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:simpleType name="ST_TransitionSpeed">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="slow"/>
      <xsd:enumeration value="med"/>
      <xsd:enumeration value="fast"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SlideTransition">
    <xsd:sequence>
      <xsd:choice minOccurs="0" maxOccurs="1">
        <xsd:element name="blinds" type="CT_OrientationTransition"/>
        <xsd:element name="checker" type="CT_OrientationTransition"/>
        <xsd:element name="circle" type="CT_Empty"/>
        <xsd:element name="dissolve" type="CT_Empty"/>
        <xsd:element name="comb" type="CT_OrientationTransition"/>
        <xsd:element name="cover" type="CT_EightDirectionTransition"/>
        <xsd:element name="cut" type="CT_OptionalBlackTransition"/>
        <xsd:element name="diamond" type="CT_Empty"/>
        <xsd:element name="fade" type="CT_OptionalBlackTransition"/>
        <xsd:element name="newsflash" type="CT_Empty"/>
        <xsd:element name="plus" type="CT_Empty"/>
        <xsd:element name="pull" type="CT_EightDirectionTransition"/>
        <xsd:element name="push" type="CT_SideDirectionTransition"/>
        <xsd:element name="random" type="CT_Empty"/>
        <xsd:element name="randomBar" type="CT_OrientationTransition"/>
        <xsd:element name="split" type="CT_SplitTransition"/>
        <xsd:element name="strips" type="CT_CornerDirectionTransition"/>
        <xsd:element name="wedge" type="CT_Empty"/>
        <xsd:element name="wheel" type="CT_WheelTransition"/>
        <xsd:element name="wipe" type="CT_SideDirectionTransition"/>
        <xsd:element name="zoom" type="CT_InOutTransition"/>
      </xsd:choice>
      <xsd:element name="sndAc" minOccurs="0" maxOccurs="1" type="CT_TransitionSoundAction"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="spd" type="ST_TransitionSpeed" use="optional" default="fast"/>
    <xsd:attribute name="advClick" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="advTm" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLTimeIndefinite">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="indefinite"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLTime">
    <xsd:union memberTypes="xsd:unsignedInt ST_TLTimeIndefinite"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLTimeNodeID">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLIterateIntervalTime">
    <xsd:attribute name="val" type="ST_TLTime" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLIterateIntervalPercentage">
    <xsd:attribute name="val" type="a:ST_PositivePercentage" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_IterateType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="el"/>
      <xsd:enumeration value="wd"/>
      <xsd:enumeration value="lt"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLIterateData">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element name="tmAbs" type="CT_TLIterateIntervalTime"/>
      <xsd:element name="tmPct" type="CT_TLIterateIntervalPercentage"/>
    </xsd:choice>
    <xsd:attribute name="type" type="ST_IterateType" use="optional" default="el"/>
    <xsd:attribute name="backwards" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLSubShapeId">
    <xsd:attribute name="spid" type="a:ST_ShapeID" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLTextTargetElement">
    <xsd:choice minOccurs="0" maxOccurs="1">
      <xsd:element name="charRg" type="CT_IndexRange"/>
      <xsd:element name="pRg" type="CT_IndexRange"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLChartSubelementType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="gridLegend"/>
      <xsd:enumeration value="series"/>
      <xsd:enumeration value="category"/>
      <xsd:enumeration value="ptInSeries"/>
      <xsd:enumeration value="ptInCategory"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLOleChartTargetElement">
    <xsd:attribute name="type" type="ST_TLChartSubelementType" use="required"/>
    <xsd:attribute name="lvl" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLShapeTargetElement">
    <xsd:choice minOccurs="0" maxOccurs="1">
      <xsd:element name="bg" type="CT_Empty"/>
      <xsd:element name="subSp" type="CT_TLSubShapeId"/>
      <xsd:element name="oleChartEl" type="CT_TLOleChartTargetElement"/>
      <xsd:element name="txEl" type="CT_TLTextTargetElement"/>
      <xsd:element name="graphicEl" type="a:CT_AnimationElementChoice"/>
    </xsd:choice>
    <xsd:attribute name="spid" type="a:ST_DrawingElementId" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLTimeTargetElement">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element name="sldTgt" type="CT_Empty"/>
      <xsd:element name="sndTgt" type="a:CT_EmbeddedWAVAudioFile"/>
      <xsd:element name="spTgt" type="CT_TLShapeTargetElement"/>
      <xsd:element name="inkTgt" type="CT_TLSubShapeId"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_TLTriggerTimeNodeID">
    <xsd:attribute name="val" type="ST_TLTimeNodeID" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLTriggerRuntimeNode">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="first"/>
      <xsd:enumeration value="last"/>
      <xsd:enumeration value="all"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLTriggerRuntimeNode">
    <xsd:attribute name="val" type="ST_TLTriggerRuntimeNode" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLTriggerEvent">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="onBegin"/>
      <xsd:enumeration value="onEnd"/>
      <xsd:enumeration value="begin"/>
      <xsd:enumeration value="end"/>
      <xsd:enumeration value="onClick"/>
      <xsd:enumeration value="onDblClick"/>
      <xsd:enumeration value="onMouseOver"/>
      <xsd:enumeration value="onMouseOut"/>
      <xsd:enumeration value="onNext"/>
      <xsd:enumeration value="onPrev"/>
      <xsd:enumeration value="onStopAudio"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLTimeCondition">
    <xsd:choice minOccurs="0" maxOccurs="1">
      <xsd:element name="tgtEl" type="CT_TLTimeTargetElement"/>
      <xsd:element name="tn" type="CT_TLTriggerTimeNodeID"/>
      <xsd:element name="rtn" type="CT_TLTriggerRuntimeNode"/>
    </xsd:choice>
    <xsd:attribute name="evt" use="optional" type="ST_TLTriggerEvent"/>
    <xsd:attribute name="delay" type="ST_TLTime" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLTimeConditionList">
    <xsd:sequence>
      <xsd:element name="cond" type="CT_TLTimeCondition" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TimeNodeList">
    <xsd:choice minOccurs="1" maxOccurs="unbounded">
      <xsd:element name="par" type="CT_TLTimeNodeParallel"/>
      <xsd:element name="seq" type="CT_TLTimeNodeSequence"/>
      <xsd:element name="excl" type="CT_TLTimeNodeExclusive"/>
      <xsd:element name="anim" type="CT_TLAnimateBehavior"/>
      <xsd:element name="animClr" type="CT_TLAnimateColorBehavior"/>
      <xsd:element name="animEffect" type="CT_TLAnimateEffectBehavior"/>
      <xsd:element name="animMotion" type="CT_TLAnimateMotionBehavior"/>
      <xsd:element name="animRot" type="CT_TLAnimateRotationBehavior"/>
      <xsd:element name="animScale" type="CT_TLAnimateScaleBehavior"/>
      <xsd:element name="cmd" type="CT_TLCommandBehavior"/>
      <xsd:element name="set" type="CT_TLSetBehavior"/>
      <xsd:element name="audio" type="CT_TLMediaNodeAudio"/>
      <xsd:element name="video" type="CT_TLMediaNodeVideo"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLTimeNodePresetClassType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="entr"/>
      <xsd:enumeration value="exit"/>
      <xsd:enumeration value="emph"/>
      <xsd:enumeration value="path"/>
      <xsd:enumeration value="verb"/>
      <xsd:enumeration value="mediacall"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLTimeNodeRestartType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="always"/>
      <xsd:enumeration value="whenNotActive"/>
      <xsd:enumeration value="never"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLTimeNodeFillType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="remove"/>
      <xsd:enumeration value="freeze"/>
      <xsd:enumeration value="hold"/>
      <xsd:enumeration value="transition"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLTimeNodeSyncType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="canSlip"/>
      <xsd:enumeration value="locked"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLTimeNodeMasterRelation">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="sameClick"/>
      <xsd:enumeration value="lastClick"/>
      <xsd:enumeration value="nextClick"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLTimeNodeType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="clickEffect"/>
      <xsd:enumeration value="withEffect"/>
      <xsd:enumeration value="afterEffect"/>
      <xsd:enumeration value="mainSeq"/>
      <xsd:enumeration value="interactiveSeq"/>
      <xsd:enumeration value="clickPar"/>
      <xsd:enumeration value="withGroup"/>
      <xsd:enumeration value="afterGroup"/>
      <xsd:enumeration value="tmRoot"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLCommonTimeNodeData">
    <xsd:sequence>
      <xsd:element name="stCondLst" type="CT_TLTimeConditionList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="endCondLst" type="CT_TLTimeConditionList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="endSync" type="CT_TLTimeCondition" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="iterate" type="CT_TLIterateData" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="childTnLst" type="CT_TimeNodeList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="subTnLst" type="CT_TimeNodeList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="ST_TLTimeNodeID" use="optional"/>
    <xsd:attribute name="presetID" type="xsd:int" use="optional"/>
    <xsd:attribute name="presetClass" type="ST_TLTimeNodePresetClassType" use="optional"/>
    <xsd:attribute name="presetSubtype" type="xsd:int" use="optional"/>
    <xsd:attribute name="dur" type="ST_TLTime" use="optional"/>
    <xsd:attribute name="repeatCount" type="ST_TLTime" use="optional" default="1000"/>
    <xsd:attribute name="repeatDur" type="ST_TLTime" use="optional"/>
    <xsd:attribute name="spd" type="a:ST_Percentage" use="optional" default="100%"/>
    <xsd:attribute name="accel" type="a:ST_PositiveFixedPercentage" use="optional" default="0%"/>
    <xsd:attribute name="decel" type="a:ST_PositiveFixedPercentage" use="optional" default="0%"/>
    <xsd:attribute name="autoRev" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="restart" type="ST_TLTimeNodeRestartType" use="optional"/>
    <xsd:attribute name="fill" type="ST_TLTimeNodeFillType" use="optional"/>
    <xsd:attribute name="syncBehavior" type="ST_TLTimeNodeSyncType" use="optional"/>
    <xsd:attribute name="tmFilter" type="xsd:string" use="optional"/>
    <xsd:attribute name="evtFilter" type="xsd:string" use="optional"/>
    <xsd:attribute name="display" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="masterRel" type="ST_TLTimeNodeMasterRelation" use="optional"/>
    <xsd:attribute name="bldLvl" type="xsd:int" use="optional"/>
    <xsd:attribute name="grpId" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="afterEffect" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="nodeType" type="ST_TLTimeNodeType" use="optional"/>
    <xsd:attribute name="nodePh" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLTimeNodeParallel">
    <xsd:sequence>
      <xsd:element name="cTn" type="CT_TLCommonTimeNodeData" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLNextActionType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="seek"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLPreviousActionType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="skipTimed"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLTimeNodeSequence">
    <xsd:sequence>
      <xsd:element name="cTn" type="CT_TLCommonTimeNodeData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="prevCondLst" type="CT_TLTimeConditionList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="nextCondLst" type="CT_TLTimeConditionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="concurrent" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="prevAc" type="ST_TLPreviousActionType" use="optional"/>
    <xsd:attribute name="nextAc" type="ST_TLNextActionType" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLTimeNodeExclusive">
    <xsd:sequence>
      <xsd:element name="cTn" type="CT_TLCommonTimeNodeData" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TLBehaviorAttributeNameList">
    <xsd:sequence>
      <xsd:element name="attrName" type="xsd:string" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLBehaviorAdditiveType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="base"/>
      <xsd:enumeration value="sum"/>
      <xsd:enumeration value="repl"/>
      <xsd:enumeration value="mult"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLBehaviorAccumulateType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="always"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLBehaviorTransformType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="pt"/>
      <xsd:enumeration value="img"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLBehaviorOverrideType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="normal"/>
      <xsd:enumeration value="childStyle"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLCommonBehaviorData">
    <xsd:sequence>
      <xsd:element name="cTn" type="CT_TLCommonTimeNodeData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="tgtEl" type="CT_TLTimeTargetElement" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="attrNameLst" type="CT_TLBehaviorAttributeNameList" minOccurs="0"
        maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="additive" type="ST_TLBehaviorAdditiveType" use="optional"/>
    <xsd:attribute name="accumulate" type="ST_TLBehaviorAccumulateType" use="optional"/>
    <xsd:attribute name="xfrmType" type="ST_TLBehaviorTransformType" use="optional"/>
    <xsd:attribute name="from" type="xsd:string" use="optional"/>
    <xsd:attribute name="to" type="xsd:string" use="optional"/>
    <xsd:attribute name="by" type="xsd:string" use="optional"/>
    <xsd:attribute name="rctx" type="xsd:string" use="optional"/>
    <xsd:attribute name="override" type="ST_TLBehaviorOverrideType" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLAnimVariantBooleanVal">
    <xsd:attribute name="val" type="xsd:boolean" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLAnimVariantIntegerVal">
    <xsd:attribute name="val" type="xsd:int" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLAnimVariantFloatVal">
    <xsd:attribute name="val" type="xsd:float" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLAnimVariantStringVal">
    <xsd:attribute name="val" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLAnimVariant">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element name="boolVal" type="CT_TLAnimVariantBooleanVal"/>
      <xsd:element name="intVal" type="CT_TLAnimVariantIntegerVal"/>
      <xsd:element name="fltVal" type="CT_TLAnimVariantFloatVal"/>
      <xsd:element name="strVal" type="CT_TLAnimVariantStringVal"/>
      <xsd:element name="clrVal" type="a:CT_Color"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLTimeAnimateValueTime">
    <xsd:union memberTypes="a:ST_PositiveFixedPercentage ST_TLTimeIndefinite"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLTimeAnimateValue">
    <xsd:sequence>
      <xsd:element name="val" type="CT_TLAnimVariant" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="tm" type="ST_TLTimeAnimateValueTime" use="optional" default="indefinite"/>
    <xsd:attribute name="fmla" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLTimeAnimateValueList">
    <xsd:sequence>
      <xsd:element name="tav" type="CT_TLTimeAnimateValue" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLAnimateBehaviorCalcMode">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="discrete"/>
      <xsd:enumeration value="lin"/>
      <xsd:enumeration value="fmla"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLAnimateBehaviorValueType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="str"/>
      <xsd:enumeration value="num"/>
      <xsd:enumeration value="clr"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLAnimateBehavior">
    <xsd:sequence>
      <xsd:element name="cBhvr" type="CT_TLCommonBehaviorData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="tavLst" type="CT_TLTimeAnimateValueList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="by" type="xsd:string" use="optional"/>
    <xsd:attribute name="from" type="xsd:string" use="optional"/>
    <xsd:attribute name="to" type="xsd:string" use="optional"/>
    <xsd:attribute name="calcmode" type="ST_TLAnimateBehaviorCalcMode" use="optional"/>
    <xsd:attribute name="valueType" type="ST_TLAnimateBehaviorValueType" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLByRgbColorTransform">
    <xsd:attribute name="r" type="a:ST_FixedPercentage" use="required"/>
    <xsd:attribute name="g" type="a:ST_FixedPercentage" use="required"/>
    <xsd:attribute name="b" type="a:ST_FixedPercentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLByHslColorTransform">
    <xsd:attribute name="h" type="a:ST_Angle" use="required"/>
    <xsd:attribute name="s" type="a:ST_FixedPercentage" use="required"/>
    <xsd:attribute name="l" type="a:ST_FixedPercentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLByAnimateColorTransform">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element name="rgb" type="CT_TLByRgbColorTransform"/>
      <xsd:element name="hsl" type="CT_TLByHslColorTransform"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLAnimateColorSpace">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="rgb"/>
      <xsd:enumeration value="hsl"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLAnimateColorDirection">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="cw"/>
      <xsd:enumeration value="ccw"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLAnimateColorBehavior">
    <xsd:sequence>
      <xsd:element name="cBhvr" type="CT_TLCommonBehaviorData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="by" type="CT_TLByAnimateColorTransform" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="from" type="a:CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="to" type="a:CT_Color" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="clrSpc" type="ST_TLAnimateColorSpace" use="optional"/>
    <xsd:attribute name="dir" type="ST_TLAnimateColorDirection" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLAnimateEffectTransition">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="in"/>
      <xsd:enumeration value="out"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLAnimateEffectBehavior">
    <xsd:sequence>
      <xsd:element name="cBhvr" type="CT_TLCommonBehaviorData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="progress" type="CT_TLAnimVariant" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="transition" type="ST_TLAnimateEffectTransition" default="in" use="optional"/>
    <xsd:attribute name="filter" type="xsd:string" use="optional"/>
    <xsd:attribute name="prLst" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLAnimateMotionBehaviorOrigin">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="parent"/>
      <xsd:enumeration value="layout"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TLAnimateMotionPathEditMode">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="relative"/>
      <xsd:enumeration value="fixed"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLPoint">
    <xsd:attribute name="x" type="a:ST_Percentage" use="required"/>
    <xsd:attribute name="y" type="a:ST_Percentage" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLAnimateMotionBehavior">
    <xsd:sequence>
      <xsd:element name="cBhvr" type="CT_TLCommonBehaviorData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="by" type="CT_TLPoint" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="from" type="CT_TLPoint" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="to" type="CT_TLPoint" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rCtr" type="CT_TLPoint" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="origin" type="ST_TLAnimateMotionBehaviorOrigin" use="optional"/>
    <xsd:attribute name="path" type="xsd:string" use="optional"/>
    <xsd:attribute name="pathEditMode" type="ST_TLAnimateMotionPathEditMode" use="optional"/>
    <xsd:attribute name="rAng" type="a:ST_Angle" use="optional"/>
    <xsd:attribute name="ptsTypes" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLAnimateRotationBehavior">
    <xsd:sequence>
      <xsd:element name="cBhvr" type="CT_TLCommonBehaviorData" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="by" type="a:ST_Angle" use="optional"/>
    <xsd:attribute name="from" type="a:ST_Angle" use="optional"/>
    <xsd:attribute name="to" type="a:ST_Angle" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLAnimateScaleBehavior">
    <xsd:sequence>
      <xsd:element name="cBhvr" type="CT_TLCommonBehaviorData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="by" type="CT_TLPoint" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="from" type="CT_TLPoint" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="to" type="CT_TLPoint" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="zoomContents" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLCommandType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="evt"/>
      <xsd:enumeration value="call"/>
      <xsd:enumeration value="verb"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLCommandBehavior">
    <xsd:sequence>
      <xsd:element name="cBhvr" type="CT_TLCommonBehaviorData" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute type="ST_TLCommandType" name="type" use="optional"/>
    <xsd:attribute name="cmd" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLSetBehavior">
    <xsd:sequence>
      <xsd:element name="cBhvr" type="CT_TLCommonBehaviorData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="to" type="CT_TLAnimVariant" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TLCommonMediaNodeData">
    <xsd:sequence>
      <xsd:element name="cTn" type="CT_TLCommonTimeNodeData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="tgtEl" type="CT_TLTimeTargetElement" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="vol" type="a:ST_PositiveFixedPercentage" default="50%" use="optional"/>
    <xsd:attribute name="mute" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="numSld" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="showWhenStopped" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLMediaNodeAudio">
    <xsd:sequence>
      <xsd:element name="cMediaNode" type="CT_TLCommonMediaNodeData" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="isNarration" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLMediaNodeVideo">
    <xsd:sequence>
      <xsd:element name="cMediaNode" type="CT_TLCommonMediaNodeData" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="fullScrn" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:attributeGroup name="AG_TLBuild">
    <xsd:attribute name="spid" type="a:ST_DrawingElementId" use="required"/>
    <xsd:attribute name="grpId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="uiExpand" type="xsd:boolean" use="optional" default="false"/>
  </xsd:attributeGroup>
  <xsd:complexType name="CT_TLTemplate">
    <xsd:sequence>
      <xsd:element name="tnLst" type="CT_TimeNodeList" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="lvl" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLTemplateList">
    <xsd:sequence>
      <xsd:element name="tmpl" type="CT_TLTemplate" minOccurs="0" maxOccurs="9"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLParaBuildType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="allAtOnce"/>
      <xsd:enumeration value="p"/>
      <xsd:enumeration value="cust"/>
      <xsd:enumeration value="whole"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLBuildParagraph">
    <xsd:sequence>
      <xsd:element name="tmplLst" type="CT_TLTemplateList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_TLBuild"/>
    <xsd:attribute name="build" type="ST_TLParaBuildType" use="optional" default="whole"/>
    <xsd:attribute name="bldLvl" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="animBg" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="autoUpdateAnimBg" type="xsd:boolean" default="true" use="optional"/>
    <xsd:attribute name="rev" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="advAuto" type="ST_TLTime" use="optional" default="indefinite"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLDiagramBuildType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="whole"/>
      <xsd:enumeration value="depthByNode"/>
      <xsd:enumeration value="depthByBranch"/>
      <xsd:enumeration value="breadthByNode"/>
      <xsd:enumeration value="breadthByLvl"/>
      <xsd:enumeration value="cw"/>
      <xsd:enumeration value="cwIn"/>
      <xsd:enumeration value="cwOut"/>
      <xsd:enumeration value="ccw"/>
      <xsd:enumeration value="ccwIn"/>
      <xsd:enumeration value="ccwOut"/>
      <xsd:enumeration value="inByRing"/>
      <xsd:enumeration value="outByRing"/>
      <xsd:enumeration value="up"/>
      <xsd:enumeration value="down"/>
      <xsd:enumeration value="allAtOnce"/>
      <xsd:enumeration value="cust"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLBuildDiagram">
    <xsd:attributeGroup ref="AG_TLBuild"/>
    <xsd:attribute name="bld" type="ST_TLDiagramBuildType" use="optional" default="whole"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TLOleChartBuildType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="allAtOnce"/>
      <xsd:enumeration value="series"/>
      <xsd:enumeration value="category"/>
      <xsd:enumeration value="seriesEl"/>
      <xsd:enumeration value="categoryEl"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TLOleBuildChart">
    <xsd:attributeGroup ref="AG_TLBuild"/>
    <xsd:attribute name="bld" type="ST_TLOleChartBuildType" use="optional" default="allAtOnce"/>
    <xsd:attribute name="animBg" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TLGraphicalObjectBuild">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element name="bldAsOne" type="CT_Empty"/>
      <xsd:element name="bldSub" type="a:CT_AnimationGraphicalObjectBuildProperties"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_TLBuild"/>
  </xsd:complexType>
  <xsd:complexType name="CT_BuildList">
    <xsd:choice minOccurs="1" maxOccurs="unbounded">
      <xsd:element name="bldP" type="CT_TLBuildParagraph"/>
      <xsd:element name="bldDgm" type="CT_TLBuildDiagram"/>
      <xsd:element name="bldOleChart" type="CT_TLOleBuildChart"/>
      <xsd:element name="bldGraphic" type="CT_TLGraphicalObjectBuild"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_SlideTiming">
    <xsd:sequence>
      <xsd:element name="tnLst" type="CT_TimeNodeList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bldLst" type="CT_BuildList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Empty"/>
  <xsd:simpleType name="ST_Name">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Direction">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="horz"/>
      <xsd:enumeration value="vert"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Index">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_IndexRange">
    <xsd:attribute name="st" type="ST_Index" use="required"/>
    <xsd:attribute name="end" type="ST_Index" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SlideRelationshipListEntry">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SlideRelationshipList">
    <xsd:sequence>
      <xsd:element name="sld" type="CT_SlideRelationshipListEntry" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomShowId">
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:group name="EG_SlideListChoice">
    <xsd:choice>
      <xsd:element name="sldAll" type="CT_Empty"/>
      <xsd:element name="sldRg" type="CT_IndexRange"/>
      <xsd:element name="custShow" type="CT_CustomShowId"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_CustomerData">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TagsData">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomerDataList">
    <xsd:sequence minOccurs="0" maxOccurs="1">
      <xsd:element name="custData" type="CT_CustomerData" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="tags" type="CT_TagsData" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Extension">
    <xsd:sequence>
      <xsd:any processContents="lax" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="xsd:token" use="required"/>
  </xsd:complexType>
  <xsd:group name="EG_ExtensionList">
    <xsd:sequence>
      <xsd:element name="ext" type="CT_Extension" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_ExtensionList">
    <xsd:sequence>
      <xsd:group ref="EG_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ExtensionListModify">
    <xsd:sequence>
      <xsd:group ref="EG_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="mod" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CommentAuthor">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="name" type="ST_Name" use="required"/>
    <xsd:attribute name="initials" type="ST_Name" use="required"/>
    <xsd:attribute name="lastIdx" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="clrIdx" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CommentAuthorList">
    <xsd:sequence>
      <xsd:element name="cmAuthor" type="CT_CommentAuthor" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="cmAuthorLst" type="CT_CommentAuthorList"/>
  <xsd:complexType name="CT_Comment">
    <xsd:sequence>
      <xsd:element name="pos" type="a:CT_Point2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="text" type="xsd:string" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="authorId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="dt" type="xsd:dateTime" use="optional"/>
    <xsd:attribute name="idx" type="ST_Index" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CommentList">
    <xsd:sequence>
      <xsd:element name="cm" type="CT_Comment" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="cmLst" type="CT_CommentList"/>
  <xsd:attributeGroup name="AG_Ole">
    <xsd:attribute name="spid" type="a:ST_ShapeID" use="optional"/>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="showAsIcon" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute ref="r:id" use="optional"/>
    <xsd:attribute name="imgW" type="a:ST_PositiveCoordinate32" use="optional"/>
    <xsd:attribute name="imgH" type="a:ST_PositiveCoordinate32" use="optional"/>
  </xsd:attributeGroup>
  <xsd:simpleType name="ST_OleObjectFollowColorScheme">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="full"/>
      <xsd:enumeration value="textAndBackground"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_OleObjectEmbed">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="followColorScheme" type="ST_OleObjectFollowColorScheme" use="optional"
      default="none"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OleObjectLink">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="updateAutomatic" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OleObject">
    <xsd:sequence>
      <xsd:choice minOccurs="1" maxOccurs="1">
        <xsd:element name="embed" type="CT_OleObjectEmbed"/>
        <xsd:element name="link" type="CT_OleObjectLink"/>
      </xsd:choice>
      <xsd:element name="pic" type="CT_Picture" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Ole"/>
    <xsd:attribute name="progId" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:element name="oleObj" type="CT_OleObject"/>
  <xsd:complexType name="CT_Control">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pic" type="CT_Picture" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Ole"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ControlList">
    <xsd:sequence>
      <xsd:element name="control" type="CT_Control" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_SlideId">
    <xsd:restriction base="xsd:unsignedInt">
      <xsd:minInclusive value="256"/>
      <xsd:maxExclusive value="2147483648"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SlideIdListEntry">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="ST_SlideId" use="required"/>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SlideIdList">
    <xsd:sequence>
      <xsd:element name="sldId" type="CT_SlideIdListEntry" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_SlideMasterId">
    <xsd:restriction base="xsd:unsignedInt">
      <xsd:minInclusive value="2147483648"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SlideMasterIdListEntry">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="ST_SlideMasterId" use="optional"/>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SlideMasterIdList">
    <xsd:sequence>
      <xsd:element name="sldMasterId" type="CT_SlideMasterIdListEntry" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NotesMasterIdListEntry">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NotesMasterIdList">
    <xsd:sequence>
      <xsd:element name="notesMasterId" type="CT_NotesMasterIdListEntry" minOccurs="0" maxOccurs="1"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_HandoutMasterIdListEntry">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_HandoutMasterIdList">
    <xsd:sequence>
      <xsd:element name="handoutMasterId" type="CT_HandoutMasterIdListEntry" minOccurs="0"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_EmbeddedFontDataId">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_EmbeddedFontListEntry">
    <xsd:sequence>
      <xsd:element name="font" type="a:CT_TextFont" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="regular" type="CT_EmbeddedFontDataId" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bold" type="CT_EmbeddedFontDataId" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="italic" type="CT_EmbeddedFontDataId" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="boldItalic" type="CT_EmbeddedFontDataId" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_EmbeddedFontList">
    <xsd:sequence>
      <xsd:element name="embeddedFont" type="CT_EmbeddedFontListEntry" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SmartTags">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomShow">
    <xsd:sequence>
      <xsd:element name="sldLst" type="CT_SlideRelationshipList" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="ST_Name" use="required"/>
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomShowList">
    <xsd:sequence>
      <xsd:element name="custShow" type="CT_CustomShow" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_PhotoAlbumLayout">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="fitToSlide"/>
      <xsd:enumeration value="1pic"/>
      <xsd:enumeration value="2pic"/>
      <xsd:enumeration value="4pic"/>
      <xsd:enumeration value="1picTitle"/>
      <xsd:enumeration value="2picTitle"/>
      <xsd:enumeration value="4picTitle"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PhotoAlbumFrameShape">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="frameStyle1"/>
      <xsd:enumeration value="frameStyle2"/>
      <xsd:enumeration value="frameStyle3"/>
      <xsd:enumeration value="frameStyle4"/>
      <xsd:enumeration value="frameStyle5"/>
      <xsd:enumeration value="frameStyle6"/>
      <xsd:enumeration value="frameStyle7"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PhotoAlbum">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="bw" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showCaptions" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="layout" type="ST_PhotoAlbumLayout" use="optional" default="fitToSlide"/>
    <xsd:attribute name="frame" type="ST_PhotoAlbumFrameShape" use="optional" default="frameStyle1"
    />
  </xsd:complexType>
  <xsd:simpleType name="ST_SlideSizeCoordinate">
    <xsd:restriction base="a:ST_PositiveCoordinate32">
      <xsd:minInclusive value="914400"/>
      <xsd:maxInclusive value="51206400"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_SlideSizeType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="screen4x3"/>
      <xsd:enumeration value="letter"/>
      <xsd:enumeration value="A4"/>
      <xsd:enumeration value="35mm"/>
      <xsd:enumeration value="overhead"/>
      <xsd:enumeration value="banner"/>
      <xsd:enumeration value="custom"/>
      <xsd:enumeration value="ledger"/>
      <xsd:enumeration value="A3"/>
      <xsd:enumeration value="B4ISO"/>
      <xsd:enumeration value="B5ISO"/>
      <xsd:enumeration value="B4JIS"/>
      <xsd:enumeration value="B5JIS"/>
      <xsd:enumeration value="hagakiCard"/>
      <xsd:enumeration value="screen16x9"/>
      <xsd:enumeration value="screen16x10"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SlideSize">
    <xsd:attribute name="cx" type="ST_SlideSizeCoordinate" use="required"/>
    <xsd:attribute name="cy" type="ST_SlideSizeCoordinate" use="required"/>
    <xsd:attribute name="type" type="ST_SlideSizeType" use="optional" default="custom"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Kinsoku">
    <xsd:attribute name="lang" type="xsd:string" use="optional"/>
    <xsd:attribute name="invalStChars" type="xsd:string" use="required"/>
    <xsd:attribute name="invalEndChars" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_BookmarkIdSeed">
    <xsd:restriction base="xsd:unsignedInt">
      <xsd:minInclusive value="1"/>
      <xsd:maxExclusive value="2147483648"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ModifyVerifier">
    <xsd:attribute name="algorithmName" type="xsd:string" use="optional"/>
    <xsd:attribute name="hashValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="saltValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="spinValue" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="cryptProviderType" type="s:ST_CryptProv" use="optional"/>
    <xsd:attribute name="cryptAlgorithmClass" type="s:ST_AlgClass" use="optional"/>
    <xsd:attribute name="cryptAlgorithmType" type="s:ST_AlgType" use="optional"/>
    <xsd:attribute name="cryptAlgorithmSid" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="spinCount" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="saltData" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="hashData" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="cryptProvider" type="xsd:string" use="optional"/>
    <xsd:attribute name="algIdExt" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="algIdExtSource" type="xsd:string" use="optional"/>
    <xsd:attribute name="cryptProviderTypeExt" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="cryptProviderTypeExtSource" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Presentation">
    <xsd:sequence>
      <xsd:element name="sldMasterIdLst" type="CT_SlideMasterIdList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="notesMasterIdLst" type="CT_NotesMasterIdList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="handoutMasterIdLst" type="CT_HandoutMasterIdList" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="sldIdLst" type="CT_SlideIdList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sldSz" type="CT_SlideSize" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="notesSz" type="a:CT_PositiveSize2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="smartTags" type="CT_SmartTags" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="embeddedFontLst" type="CT_EmbeddedFontList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="custShowLst" type="CT_CustomShowList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="photoAlbum" type="CT_PhotoAlbum" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="custDataLst" type="CT_CustomerDataList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="kinsoku" type="CT_Kinsoku" minOccurs="0"/>
      <xsd:element name="defaultTextStyle" type="a:CT_TextListStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="modifyVerifier" type="CT_ModifyVerifier" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="serverZoom" type="a:ST_Percentage" use="optional" default="50%"/>
    <xsd:attribute name="firstSlideNum" type="xsd:int" use="optional" default="1"/>
    <xsd:attribute name="showSpecialPlsOnTitleSld" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="rtl" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="removePersonalInfoOnSave" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="compatMode" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="strictFirstAndLastChars" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="embedTrueTypeFonts" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="saveSubsetFonts" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="autoCompressPictures" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="bookmarkIdSeed" type="ST_BookmarkIdSeed" use="optional" default="1"/>
    <xsd:attribute name="conformance" type="s:ST_ConformanceClass"/>
  </xsd:complexType>
  <xsd:element name="presentation" type="CT_Presentation"/>
  <xsd:complexType name="CT_HtmlPublishProperties">
    <xsd:sequence>
      <xsd:group ref="EG_SlideListChoice" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="showSpeakerNotes" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="target" type="xsd:string" use="optional"/>
    <xsd:attribute name="title" type="xsd:string" use="optional" default=""/>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_WebColorType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="browser"/>
      <xsd:enumeration value="presentationText"/>
      <xsd:enumeration value="presentationAccent"/>
      <xsd:enumeration value="whiteTextOnBlack"/>
      <xsd:enumeration value="blackTextOnWhite"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_WebScreenSize">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="544x376"/>
      <xsd:enumeration value="640x480"/>
      <xsd:enumeration value="720x512"/>
      <xsd:enumeration value="800x600"/>
      <xsd:enumeration value="1024x768"/>
      <xsd:enumeration value="1152x882"/>
      <xsd:enumeration value="1152x900"/>
      <xsd:enumeration value="1280x1024"/>
      <xsd:enumeration value="1600x1200"/>
      <xsd:enumeration value="1800x1400"/>
      <xsd:enumeration value="1920x1200"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_WebEncoding">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_WebProperties">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="showAnimation" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="resizeGraphics" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="allowPng" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="relyOnVml" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="organizeInFolders" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="useLongFilenames" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="imgSz" type="ST_WebScreenSize" use="optional" default="800x600"/>
    <xsd:attribute name="encoding" type="ST_WebEncoding" use="optional" default=""/>
    <xsd:attribute name="clr" type="ST_WebColorType" use="optional" default="whiteTextOnBlack"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PrintWhat">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="slides"/>
      <xsd:enumeration value="handouts1"/>
      <xsd:enumeration value="handouts2"/>
      <xsd:enumeration value="handouts3"/>
      <xsd:enumeration value="handouts4"/>
      <xsd:enumeration value="handouts6"/>
      <xsd:enumeration value="handouts9"/>
      <xsd:enumeration value="notes"/>
      <xsd:enumeration value="outline"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PrintColorMode">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="bw"/>
      <xsd:enumeration value="gray"/>
      <xsd:enumeration value="clr"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PrintProperties">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="prnWhat" type="ST_PrintWhat" use="optional" default="slides"/>
    <xsd:attribute name="clrMode" type="ST_PrintColorMode" use="optional" default="clr"/>
    <xsd:attribute name="hiddenSlides" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="scaleToFitPaper" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="frameSlides" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ShowInfoBrowse">
    <xsd:attribute name="showScrollbar" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ShowInfoKiosk">
    <xsd:attribute name="restart" type="xsd:unsignedInt" use="optional" default="300000"/>
  </xsd:complexType>
  <xsd:group name="EG_ShowType">
    <xsd:choice>
      <xsd:element name="present" type="CT_Empty"/>
      <xsd:element name="browse" type="CT_ShowInfoBrowse"/>
      <xsd:element name="kiosk" type="CT_ShowInfoKiosk"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_ShowProperties">
    <xsd:sequence minOccurs="0" maxOccurs="1">
      <xsd:group ref="EG_ShowType" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_SlideListChoice" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="penClr" type="a:CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="loop" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showNarration" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showAnimation" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="useTimings" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PresentationProperties">
    <xsd:sequence>
      <xsd:element name="htmlPubPr" type="CT_HtmlPublishProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="webPr" type="CT_WebProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="prnPr" type="CT_PrintProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="showPr" type="CT_ShowProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="clrMru" type="a:CT_ColorMRU" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="presentationPr" type="CT_PresentationProperties"/>
  <xsd:complexType name="CT_HeaderFooter">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="sldNum" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="hdr" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="ftr" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="dt" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PlaceholderType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="title"/>
      <xsd:enumeration value="body"/>
      <xsd:enumeration value="ctrTitle"/>
      <xsd:enumeration value="subTitle"/>
      <xsd:enumeration value="dt"/>
      <xsd:enumeration value="sldNum"/>
      <xsd:enumeration value="ftr"/>
      <xsd:enumeration value="hdr"/>
      <xsd:enumeration value="obj"/>
      <xsd:enumeration value="chart"/>
      <xsd:enumeration value="tbl"/>
      <xsd:enumeration value="clipArt"/>
      <xsd:enumeration value="dgm"/>
      <xsd:enumeration value="media"/>
      <xsd:enumeration value="sldImg"/>
      <xsd:enumeration value="pic"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PlaceholderSize">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="full"/>
      <xsd:enumeration value="half"/>
      <xsd:enumeration value="quarter"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Placeholder">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_PlaceholderType" use="optional" default="obj"/>
    <xsd:attribute name="orient" type="ST_Direction" use="optional" default="horz"/>
    <xsd:attribute name="sz" type="ST_PlaceholderSize" use="optional" default="full"/>
    <xsd:attribute name="idx" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="hasCustomPrompt" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ApplicationNonVisualDrawingProps">
    <xsd:sequence>
      <xsd:element name="ph" type="CT_Placeholder" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="a:EG_Media" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="custDataLst" type="CT_CustomerDataList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="isPhoto" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="userDrawn" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ShapeNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvSpPr" type="a:CT_NonVisualDrawingShapeProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="nvPr" type="CT_ApplicationNonVisualDrawingProps" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Shape">
    <xsd:sequence>
      <xsd:element name="nvSpPr" type="CT_ShapeNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txBody" type="a:CT_TextBody" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="useBgFill" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ConnectorNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvCxnSpPr" type="a:CT_NonVisualConnectorProperties" minOccurs="1"
        maxOccurs="1"/>
      <xsd:element name="nvPr" type="CT_ApplicationNonVisualDrawingProps" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Connector">
    <xsd:sequence>
      <xsd:element name="nvCxnSpPr" type="CT_ConnectorNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PictureNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvPicPr" type="a:CT_NonVisualPictureProperties" minOccurs="1"
        maxOccurs="1"/>
      <xsd:element name="nvPr" type="CT_ApplicationNonVisualDrawingProps" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Picture">
    <xsd:sequence>
      <xsd:element name="nvPicPr" type="CT_PictureNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="blipFill" type="a:CT_BlipFillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="spPr" type="a:CT_ShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="style" type="a:CT_ShapeStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicalObjectFrameNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGraphicFramePr" type="a:CT_NonVisualGraphicFrameProperties"
        minOccurs="1" maxOccurs="1"/>
      <xsd:element name="nvPr" type="CT_ApplicationNonVisualDrawingProps" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GraphicalObjectFrame">
    <xsd:sequence>
      <xsd:element name="nvGraphicFramePr" type="CT_GraphicalObjectFrameNonVisual" minOccurs="1"
        maxOccurs="1"/>
      <xsd:element name="xfrm" type="a:CT_Transform2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element ref="a:graphic" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="bwMode" type="a:ST_BlackWhiteMode" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupShapeNonVisual">
    <xsd:sequence>
      <xsd:element name="cNvPr" type="a:CT_NonVisualDrawingProps" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cNvGrpSpPr" type="a:CT_NonVisualGroupDrawingShapeProps" minOccurs="1"
        maxOccurs="1"/>
      <xsd:element name="nvPr" type="CT_ApplicationNonVisualDrawingProps" minOccurs="1"
        maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupShape">
    <xsd:sequence>
      <xsd:element name="nvGrpSpPr" type="CT_GroupShapeNonVisual" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="grpSpPr" type="a:CT_GroupShapeProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element name="sp" type="CT_Shape"/>
        <xsd:element name="grpSp" type="CT_GroupShape"/>
        <xsd:element name="graphicFrame" type="CT_GraphicalObjectFrame"/>
        <xsd:element name="cxnSp" type="CT_Connector"/>
        <xsd:element name="pic" type="CT_Picture"/>
        <xsd:element name="contentPart" type="CT_Rel"/>
      </xsd:choice>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Rel">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:group name="EG_TopLevelSlide">
    <xsd:sequence>
      <xsd:element name="clrMap" type="a:CT_ColorMapping" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:group name="EG_ChildSlide">
    <xsd:sequence>
      <xsd:element name="clrMapOvr" type="a:CT_ColorMappingOverride" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:attributeGroup name="AG_ChildSlide">
    <xsd:attribute name="showMasterSp" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showMasterPhAnim" type="xsd:boolean" use="optional" default="true"/>
  </xsd:attributeGroup>
  <xsd:complexType name="CT_BackgroundProperties">
    <xsd:sequence>
      <xsd:group ref="a:EG_FillProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:group ref="a:EG_EffectProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="shadeToTitle" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:group name="EG_Background">
    <xsd:choice>
      <xsd:element name="bgPr" type="CT_BackgroundProperties"/>
      <xsd:element name="bgRef" type="a:CT_StyleMatrixReference"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_Background">
    <xsd:sequence>
      <xsd:group ref="EG_Background"/>
    </xsd:sequence>
    <xsd:attribute name="bwMode" type="a:ST_BlackWhiteMode" use="optional" default="white"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CommonSlideData">
    <xsd:sequence>
      <xsd:element name="bg" type="CT_Background" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="spTree" type="CT_GroupShape" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="custDataLst" type="CT_CustomerDataList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="controls" type="CT_ControlList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_Slide">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="cSld" type="CT_CommonSlideData" minOccurs="1" maxOccurs="1"/>
      <xsd:group ref="EG_ChildSlide" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="transition" type="CT_SlideTransition" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="timing" type="CT_SlideTiming" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_ChildSlide"/>
    <xsd:attribute name="show" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:element name="sld" type="CT_Slide"/>
  <xsd:simpleType name="ST_SlideLayoutType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="title"/>
      <xsd:enumeration value="tx"/>
      <xsd:enumeration value="twoColTx"/>
      <xsd:enumeration value="tbl"/>
      <xsd:enumeration value="txAndChart"/>
      <xsd:enumeration value="chartAndTx"/>
      <xsd:enumeration value="dgm"/>
      <xsd:enumeration value="chart"/>
      <xsd:enumeration value="txAndClipArt"/>
      <xsd:enumeration value="clipArtAndTx"/>
      <xsd:enumeration value="titleOnly"/>
      <xsd:enumeration value="blank"/>
      <xsd:enumeration value="txAndObj"/>
      <xsd:enumeration value="objAndTx"/>
      <xsd:enumeration value="objOnly"/>
      <xsd:enumeration value="obj"/>
      <xsd:enumeration value="txAndMedia"/>
      <xsd:enumeration value="mediaAndTx"/>
      <xsd:enumeration value="objOverTx"/>
      <xsd:enumeration value="txOverObj"/>
      <xsd:enumeration value="txAndTwoObj"/>
      <xsd:enumeration value="twoObjAndTx"/>
      <xsd:enumeration value="twoObjOverTx"/>
      <xsd:enumeration value="fourObj"/>
      <xsd:enumeration value="vertTx"/>
      <xsd:enumeration value="clipArtAndVertTx"/>
      <xsd:enumeration value="vertTitleAndTx"/>
      <xsd:enumeration value="vertTitleAndTxOverChart"/>
      <xsd:enumeration value="twoObj"/>
      <xsd:enumeration value="objAndTwoObj"/>
      <xsd:enumeration value="twoObjAndObj"/>
      <xsd:enumeration value="cust"/>
      <xsd:enumeration value="secHead"/>
      <xsd:enumeration value="twoTxTwoObj"/>
      <xsd:enumeration value="objTx"/>
      <xsd:enumeration value="picTx"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SlideLayout">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="cSld" type="CT_CommonSlideData" minOccurs="1" maxOccurs="1"/>
      <xsd:group ref="EG_ChildSlide" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="transition" type="CT_SlideTransition" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="timing" type="CT_SlideTiming" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hf" type="CT_HeaderFooter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_ChildSlide"/>
    <xsd:attribute name="matchingName" type="xsd:string" use="optional" default=""/>
    <xsd:attribute name="type" type="ST_SlideLayoutType" use="optional" default="cust"/>
    <xsd:attribute name="preserve" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="userDrawn" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:element name="sldLayout" type="CT_SlideLayout"/>
  <xsd:complexType name="CT_SlideMasterTextStyles">
    <xsd:sequence>
      <xsd:element name="titleStyle" type="a:CT_TextListStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bodyStyle" type="a:CT_TextListStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="otherStyle" type="a:CT_TextListStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_SlideLayoutId">
    <xsd:restriction base="xsd:unsignedInt">
      <xsd:minInclusive value="2147483648"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SlideLayoutIdListEntry">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="ST_SlideLayoutId" use="optional"/>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SlideLayoutIdList">
    <xsd:sequence>
      <xsd:element name="sldLayoutId" type="CT_SlideLayoutIdListEntry" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SlideMaster">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="cSld" type="CT_CommonSlideData" minOccurs="1" maxOccurs="1"/>
      <xsd:group ref="EG_TopLevelSlide" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="sldLayoutIdLst" type="CT_SlideLayoutIdList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="transition" type="CT_SlideTransition" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="timing" type="CT_SlideTiming" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hf" type="CT_HeaderFooter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="txStyles" type="CT_SlideMasterTextStyles" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="preserve" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:element name="sldMaster" type="CT_SlideMaster"/>
  <xsd:complexType name="CT_HandoutMaster">
    <xsd:sequence>
      <xsd:element name="cSld" type="CT_CommonSlideData" minOccurs="1" maxOccurs="1"/>
      <xsd:group ref="EG_TopLevelSlide" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="hf" type="CT_HeaderFooter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="handoutMaster" type="CT_HandoutMaster"/>
  <xsd:complexType name="CT_NotesMaster">
    <xsd:sequence>
      <xsd:element name="cSld" type="CT_CommonSlideData" minOccurs="1" maxOccurs="1"/>
      <xsd:group ref="EG_TopLevelSlide" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="hf" type="CT_HeaderFooter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="notesStyle" type="a:CT_TextListStyle" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="notesMaster" type="CT_NotesMaster"/>
  <xsd:complexType name="CT_NotesSlide">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="cSld" type="CT_CommonSlideData" minOccurs="1" maxOccurs="1"/>
      <xsd:group ref="EG_ChildSlide" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionListModify" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_ChildSlide"/>
  </xsd:complexType>
  <xsd:element name="notes" type="CT_NotesSlide"/>
  <xsd:complexType name="CT_SlideSyncProperties">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="serverSldId" type="xsd:string" use="required"/>
    <xsd:attribute name="serverSldModifiedTime" type="xsd:dateTime" use="required"/>
    <xsd:attribute name="clientInsertedTime" type="xsd:dateTime" use="required"/>
  </xsd:complexType>
  <xsd:element name="sldSyncPr" type="CT_SlideSyncProperties"/>
  <xsd:complexType name="CT_StringTag">
    <xsd:attribute name="name" type="xsd:string" use="required"/>
    <xsd:attribute name="val" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TagList">
    <xsd:sequence>
      <xsd:element name="tag" type="CT_StringTag" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="tagLst" type="CT_TagList"/>
  <xsd:simpleType name="ST_SplitterBarState">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="minimized"/>
      <xsd:enumeration value="restored"/>
      <xsd:enumeration value="maximized"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ViewType">
    <xsd:restriction base="xsd:token">
      <xsd:enumeration value="sldView"/>
      <xsd:enumeration value="sldMasterView"/>
      <xsd:enumeration value="notesView"/>
      <xsd:enumeration value="handoutView"/>
      <xsd:enumeration value="notesMasterView"/>
      <xsd:enumeration value="outlineView"/>
      <xsd:enumeration value="sldSorterView"/>
      <xsd:enumeration value="sldThumbnailView"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_NormalViewPortion">
    <xsd:attribute name="sz" type="a:ST_PositiveFixedPercentage" use="required"/>
    <xsd:attribute name="autoAdjust" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NormalViewProperties">
    <xsd:sequence>
      <xsd:element name="restoredLeft" type="CT_NormalViewPortion" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="restoredTop" type="CT_NormalViewPortion" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="showOutlineIcons" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="snapVertSplitter" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="vertBarState" type="ST_SplitterBarState" use="optional" default="restored"/>
    <xsd:attribute name="horzBarState" type="ST_SplitterBarState" use="optional" default="restored"/>
    <xsd:attribute name="preferSingleView" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CommonViewProperties">
    <xsd:sequence>
      <xsd:element name="scale" type="a:CT_Scale2D" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="origin" type="a:CT_Point2D" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="varScale" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NotesTextViewProperties">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="cViewPr" type="CT_CommonViewProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_OutlineViewSlideEntry">
    <xsd:attribute ref="r:id" use="required"/>
    <xsd:attribute name="collapse" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OutlineViewSlideList">
    <xsd:sequence>
      <xsd:element name="sld" type="CT_OutlineViewSlideEntry" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_OutlineViewProperties">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="cViewPr" type="CT_CommonViewProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="sldLst" type="CT_OutlineViewSlideList" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SlideSorterViewProperties">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element name="cViewPr" type="CT_CommonViewProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="showFormatting" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Guide">
    <xsd:attribute name="orient" type="ST_Direction" use="optional" default="vert"/>
    <xsd:attribute name="pos" type="a:ST_Coordinate32" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GuideList">
    <xsd:sequence minOccurs="0" maxOccurs="1">
      <xsd:element name="guide" type="CT_Guide" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CommonSlideViewProperties">
    <xsd:sequence>
      <xsd:element name="cViewPr" type="CT_CommonViewProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="guideLst" type="CT_GuideList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="snapToGrid" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="snapToObjects" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showGuides" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SlideViewProperties">
    <xsd:sequence>
      <xsd:element name="cSldViewPr" type="CT_CommonSlideViewProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NotesViewProperties">
    <xsd:sequence>
      <xsd:element name="cSldViewPr" type="CT_CommonSlideViewProperties" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ViewProperties">
    <xsd:sequence minOccurs="0" maxOccurs="1">
      <xsd:element name="normalViewPr" type="CT_NormalViewProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="slideViewPr" type="CT_SlideViewProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="outlineViewPr" type="CT_OutlineViewProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="notesTextViewPr" type="CT_NotesTextViewProperties" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="sorterViewPr" type="CT_SlideSorterViewProperties" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="notesViewPr" type="CT_NotesViewProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="gridSpacing" type="a:CT_PositiveSize2D" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="lastView" type="ST_ViewType" use="optional" default="sldView"/>
    <xsd:attribute name="showComments" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:element name="viewPr" type="CT_ViewProperties"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/officeDocument/2006/characteristics"
  targetNamespace="http://schemas.openxmlformats.org/officeDocument/2006/characteristics"
  elementFormDefault="qualified">
  <xsd:complexType name="CT_AdditionalCharacteristics">
    <xsd:sequence>
      <xsd:element name="characteristic" type="CT_Characteristic" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Characteristic">
    <xsd:attribute name="name" type="xsd:string" use="required"/>
    <xsd:attribute name="relation" type="ST_Relation" use="required"/>
    <xsd:attribute name="val" type="xsd:string" use="required"/>
    <xsd:attribute name="vocabulary" type="xsd:anyURI" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Relation">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="ge"/>
      <xsd:enumeration value="le"/>
      <xsd:enumeration value="gt"/>
      <xsd:enumeration value="lt"/>
      <xsd:enumeration value="eq"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:element name="additionalCharacteristics" type="CT_AdditionalCharacteristics"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"
  elementFormDefault="qualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:simpleType name="ST_SourceType">
    <xsd:restriction base="s:ST_String">
      <xsd:enumeration value="ArticleInAPeriodical"/>
      <xsd:enumeration value="Book"/>
      <xsd:enumeration value="BookSection"/>
      <xsd:enumeration value="JournalArticle"/>
      <xsd:enumeration value="ConferenceProceedings"/>
      <xsd:enumeration value="Report"/>
      <xsd:enumeration value="SoundRecording"/>
      <xsd:enumeration value="Performance"/>
      <xsd:enumeration value="Art"/>
      <xsd:enumeration value="DocumentFromInternetSite"/>
      <xsd:enumeration value="InternetSite"/>
      <xsd:enumeration value="Film"/>
      <xsd:enumeration value="Interview"/>
      <xsd:enumeration value="Patent"/>
      <xsd:enumeration value="ElectronicSource"/>
      <xsd:enumeration value="Case"/>
      <xsd:enumeration value="Misc"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_NameListType">
    <xsd:sequence>
      <xsd:element name="Person" type="CT_PersonType" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PersonType">
    <xsd:sequence>
      <xsd:element name="Last" type="s:ST_String" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="First" type="s:ST_String" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="Middle" type="s:ST_String" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NameType">
    <xsd:sequence>
      <xsd:element name="NameList" type="CT_NameListType" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NameOrCorporateType">
    <xsd:sequence>
      <xsd:choice minOccurs="0" maxOccurs="1">
        <xsd:element name="NameList" type="CT_NameListType" minOccurs="1" maxOccurs="1"/>
        <xsd:element name="Corporate" minOccurs="1" maxOccurs="1" type="s:ST_String"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_AuthorType">
    <xsd:sequence>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element name="Artist" type="CT_NameType"/>
        <xsd:element name="Author" type="CT_NameOrCorporateType"/>
        <xsd:element name="BookAuthor" type="CT_NameType"/>
        <xsd:element name="Compiler" type="CT_NameType"/>
        <xsd:element name="Composer" type="CT_NameType"/>
        <xsd:element name="Conductor" type="CT_NameType"/>
        <xsd:element name="Counsel" type="CT_NameType"/>
        <xsd:element name="Director" type="CT_NameType"/>
        <xsd:element name="Editor" type="CT_NameType"/>
        <xsd:element name="Interviewee" type="CT_NameType"/>
        <xsd:element name="Interviewer" type="CT_NameType"/>
        <xsd:element name="Inventor" type="CT_NameType"/>
        <xsd:element name="Performer" type="CT_NameOrCorporateType"/>
        <xsd:element name="ProducerName" type="CT_NameType"/>
        <xsd:element name="Translator" type="CT_NameType"/>
        <xsd:element name="Writer" type="CT_NameType"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SourceType">
    <xsd:sequence>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element name="AbbreviatedCaseNumber" type="s:ST_String"/>
        <xsd:element name="AlbumTitle" type="s:ST_String"/>
        <xsd:element name="Author" type="CT_AuthorType"/>
        <xsd:element name="BookTitle" type="s:ST_String"/>
        <xsd:element name="Broadcaster" type="s:ST_String"/>
        <xsd:element name="BroadcastTitle" type="s:ST_String"/>
        <xsd:element name="CaseNumber" type="s:ST_String"/>
        <xsd:element name="ChapterNumber" type="s:ST_String"/>
        <xsd:element name="City" type="s:ST_String"/>
        <xsd:element name="Comments" type="s:ST_String"/>
        <xsd:element name="ConferenceName" type="s:ST_String"/>
        <xsd:element name="CountryRegion" type="s:ST_String"/>
        <xsd:element name="Court" type="s:ST_String"/>
        <xsd:element name="Day" type="s:ST_String"/>
        <xsd:element name="DayAccessed" type="s:ST_String"/>
        <xsd:element name="Department" type="s:ST_String"/>
        <xsd:element name="Distributor" type="s:ST_String"/>
        <xsd:element name="Edition" type="s:ST_String"/>
        <xsd:element name="Guid" type="s:ST_String"/>
        <xsd:element name="Institution" type="s:ST_String"/>
        <xsd:element name="InternetSiteTitle" type="s:ST_String"/>
        <xsd:element name="Issue" type="s:ST_String"/>
        <xsd:element name="JournalName" type="s:ST_String"/>
        <xsd:element name="LCID" type="s:ST_Lang"/>
        <xsd:element name="Medium" type="s:ST_String"/>
        <xsd:element name="Month" type="s:ST_String"/>
        <xsd:element name="MonthAccessed" type="s:ST_String"/>
        <xsd:element name="NumberVolumes" type="s:ST_String"/>
        <xsd:element name="Pages" type="s:ST_String"/>
        <xsd:element name="PatentNumber" type="s:ST_String"/>
        <xsd:element name="PeriodicalTitle" type="s:ST_String"/>
        <xsd:element name="ProductionCompany" type="s:ST_String"/>
        <xsd:element name="PublicationTitle" type="s:ST_String"/>
        <xsd:element name="Publisher" type="s:ST_String"/>
        <xsd:element name="RecordingNumber" type="s:ST_String"/>
        <xsd:element name="RefOrder" type="s:ST_String"/>
        <xsd:element name="Reporter" type="s:ST_String"/>
        <xsd:element name="SourceType" type="ST_SourceType"/>
        <xsd:element name="ShortTitle" type="s:ST_String"/>
        <xsd:element name="StandardNumber" type="s:ST_String"/>
        <xsd:element name="StateProvince" type="s:ST_String"/>
        <xsd:element name="Station" type="s:ST_String"/>
        <xsd:element name="Tag" type="s:ST_String"/>
        <xsd:element name="Theater" type="s:ST_String"/>
        <xsd:element name="ThesisType" type="s:ST_String"/>
        <xsd:element name="Title" type="s:ST_String"/>
        <xsd:element name="Type" type="s:ST_String"/>
        <xsd:element name="URL" type="s:ST_String"/>
        <xsd:element name="Version" type="s:ST_String"/>
        <xsd:element name="Volume" type="s:ST_String"/>
        <xsd:element name="Year" type="s:ST_String"/>
        <xsd:element name="YearAccessed" type="s:ST_String"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="Sources" type="CT_Sources"/>
  <xsd:complexType name="CT_Sources">
    <xsd:sequence>
      <xsd:element name="Source" type="CT_SourceType" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="SelectedStyle" type="s:ST_String"/>
    <xsd:attribute name="StyleName" type="s:ST_String"/>
    <xsd:attribute name="URI" type="s:ST_String"/>
  </xsd:complexType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  elementFormDefault="qualified">
  <xsd:simpleType name="ST_Lang">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HexColorRGB">
    <xsd:restriction base="xsd:hexBinary">
      <xsd:length value="3" fixed="true"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Panose">
    <xsd:restriction base="xsd:hexBinary">
      <xsd:length value="10"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CalendarType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="gregorian"/>
      <xsd:enumeration value="gregorianUs"/>
      <xsd:enumeration value="gregorianMeFrench"/>
      <xsd:enumeration value="gregorianArabic"/>
      <xsd:enumeration value="hijri"/>
      <xsd:enumeration value="hebrew"/>
      <xsd:enumeration value="taiwan"/>
      <xsd:enumeration value="japan"/>
      <xsd:enumeration value="thai"/>
      <xsd:enumeration value="korea"/>
      <xsd:enumeration value="saka"/>
      <xsd:enumeration value="gregorianXlitEnglish"/>
      <xsd:enumeration value="gregorianXlitFrench"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AlgClass">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="hash"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CryptProv">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="rsaAES"/>
      <xsd:enumeration value="rsaFull"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AlgType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="typeAny"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ColorType">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Guid">
    <xsd:restriction base="xsd:token">
      <xsd:pattern value="\{[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}\}"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OnOff">
    <xsd:union memberTypes="xsd:boolean ST_OnOff1"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OnOff1">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="on"/>
      <xsd:enumeration value="off"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_String">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_XmlName">
    <xsd:restriction base="xsd:NCName">
      <xsd:minLength value="1"/>
      <xsd:maxLength value="255"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TrueFalse">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="f"/>
      <xsd:enumeration value="true"/>
      <xsd:enumeration value="false"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TrueFalseBlank">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="f"/>
      <xsd:enumeration value="true"/>
      <xsd:enumeration value="false"/>
      <xsd:enumeration value=""/>
      <xsd:enumeration value="True"/>
      <xsd:enumeration value="False"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_UnsignedDecimalNumber">
    <xsd:restriction base="xsd:decimal">
      <xsd:minInclusive value="0"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TwipsMeasure">
    <xsd:union memberTypes="ST_UnsignedDecimalNumber ST_PositiveUniversalMeasure"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_VerticalAlignRun">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="baseline"/>
      <xsd:enumeration value="superscript"/>
      <xsd:enumeration value="subscript"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Xstring">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_XAlign">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="inside"/>
      <xsd:enumeration value="outside"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_YAlign">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="inline"/>
      <xsd:enumeration value="top"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="bottom"/>
      <xsd:enumeration value="inside"/>
      <xsd:enumeration value="outside"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ConformanceClass">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="strict"/>
      <xsd:enumeration value="transitional"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_UniversalMeasure">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="-?[0-9]+(\.[0-9]+)?(mm|cm|in|pt|pc|pi)"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PositiveUniversalMeasure">
    <xsd:restriction base="ST_UniversalMeasure">
      <xsd:pattern value="[0-9]+(\.[0-9]+)?(mm|cm|in|pt|pc|pi)"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Percentage">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="-?[0-9]+(\.[0-9]+)?%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FixedPercentage">
    <xsd:restriction base="ST_Percentage">
      <xsd:pattern value="-?((100)|([0-9][0-9]?))(\.[0-9][0-9]?)?%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PositivePercentage">
    <xsd:restriction base="ST_Percentage">
      <xsd:pattern value="[0-9]+(\.[0-9]+)?%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PositiveFixedPercentage">
    <xsd:restriction base="ST_Percentage">
      <xsd:pattern value="((100)|([0-9][0-9]?))(\.[0-9][0-9]?)?%"/>
    </xsd:restriction>
  </xsd:simpleType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/officeDocument/2006/customXml"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="http://schemas.openxmlformats.org/officeDocument/2006/customXml"
  elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:complexType name="CT_DatastoreSchemaRef">
    <xsd:attribute name="uri" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DatastoreSchemaRefs">
    <xsd:sequence>
      <xsd:element name="schemaRef" type="CT_DatastoreSchemaRef" minOccurs="0" maxOccurs="unbounded"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DatastoreItem">
    <xsd:sequence>
      <xsd:element name="schemaRefs" type="CT_DatastoreSchemaRefs" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="itemID" type="s:ST_Guid" use="required"/>
  </xsd:complexType>
  <xsd:element name="datastoreItem" type="CT_DatastoreItem"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/schemaLibrary/2006/main"
  targetNamespace="http://schemas.openxmlformats.org/schemaLibrary/2006/main"
  attributeFormDefault="qualified" elementFormDefault="qualified">
  <xsd:complexType name="CT_Schema">
    <xsd:attribute name="uri" type="xsd:string" default=""/>
    <xsd:attribute name="manifestLocation" type="xsd:string"/>
    <xsd:attribute name="schemaLocation" type="xsd:string"/>
    <xsd:attribute name="schemaLanguage" type="xsd:token"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SchemaLibrary">
    <xsd:sequence>
      <xsd:element name="schema" type="CT_Schema" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="schemaLibrary" type="CT_SchemaLibrary"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
  xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
  blockDefault="#all" elementFormDefault="qualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
    schemaLocation="shared-documentPropertiesVariantTypes.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:element name="Properties" type="CT_Properties"/>
  <xsd:complexType name="CT_Properties">
    <xsd:sequence>
      <xsd:element name="property" minOccurs="0" maxOccurs="unbounded" type="CT_Property"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Property">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element ref="vt:vector"/>
      <xsd:element ref="vt:array"/>
      <xsd:element ref="vt:blob"/>
      <xsd:element ref="vt:oblob"/>
      <xsd:element ref="vt:empty"/>
      <xsd:element ref="vt:null"/>
      <xsd:element ref="vt:i1"/>
      <xsd:element ref="vt:i2"/>
      <xsd:element ref="vt:i4"/>
      <xsd:element ref="vt:i8"/>
      <xsd:element ref="vt:int"/>
      <xsd:element ref="vt:ui1"/>
      <xsd:element ref="vt:ui2"/>
      <xsd:element ref="vt:ui4"/>
      <xsd:element ref="vt:ui8"/>
      <xsd:element ref="vt:uint"/>
      <xsd:element ref="vt:r4"/>
      <xsd:element ref="vt:r8"/>
      <xsd:element ref="vt:decimal"/>
      <xsd:element ref="vt:lpstr"/>
      <xsd:element ref="vt:lpwstr"/>
      <xsd:element ref="vt:bstr"/>
      <xsd:element ref="vt:date"/>
      <xsd:element ref="vt:filetime"/>
      <xsd:element ref="vt:bool"/>
      <xsd:element ref="vt:cy"/>
      <xsd:element ref="vt:error"/>
      <xsd:element ref="vt:stream"/>
      <xsd:element ref="vt:ostream"/>
      <xsd:element ref="vt:storage"/>
      <xsd:element ref="vt:ostorage"/>
      <xsd:element ref="vt:vstream"/>
      <xsd:element ref="vt:clsid"/>
    </xsd:choice>
    <xsd:attribute name="fmtid" use="required" type="s:ST_Guid"/>
    <xsd:attribute name="pid" use="required" type="xsd:int"/>
    <xsd:attribute name="name" use="optional" type="xsd:string"/>
    <xsd:attribute name="linkTarget" use="optional" type="xsd:string"/>
  </xsd:complexType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
  xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
  targetNamespace="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
  elementFormDefault="qualified" blockDefault="#all">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
    schemaLocation="shared-documentPropertiesVariantTypes.xsd"/>
  <xsd:element name="Properties" type="CT_Properties"/>
  <xsd:complexType name="CT_Properties">
    <xsd:all>
      <xsd:element name="Template" minOccurs="0" maxOccurs="1" type="xsd:string"/>
      <xsd:element name="Manager" minOccurs="0" maxOccurs="1" type="xsd:string"/>
      <xsd:element name="Company" minOccurs="0" maxOccurs="1" type="xsd:string"/>
      <xsd:element name="Pages" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="Words" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="Characters" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="PresentationFormat" minOccurs="0" maxOccurs="1" type="xsd:string"/>
      <xsd:element name="Lines" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="Paragraphs" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="Slides" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="Notes" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="TotalTime" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="HiddenSlides" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="MMClips" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="ScaleCrop" minOccurs="0" maxOccurs="1" type="xsd:boolean"/>
      <xsd:element name="HeadingPairs" minOccurs="0" maxOccurs="1" type="CT_VectorVariant"/>
      <xsd:element name="TitlesOfParts" minOccurs="0" maxOccurs="1" type="CT_VectorLpstr"/>
      <xsd:element name="LinksUpToDate" minOccurs="0" maxOccurs="1" type="xsd:boolean"/>
      <xsd:element name="CharactersWithSpaces" minOccurs="0" maxOccurs="1" type="xsd:int"/>
      <xsd:element name="SharedDoc" minOccurs="0" maxOccurs="1" type="xsd:boolean"/>
      <xsd:element name="HyperlinkBase" minOccurs="0" maxOccurs="1" type="xsd:string"/>
      <xsd:element name="HLinks" minOccurs="0" maxOccurs="1" type="CT_VectorVariant"/>
      <xsd:element name="HyperlinksChanged" minOccurs="0" maxOccurs="1" type="xsd:boolean"/>
      <xsd:element name="DigSig" minOccurs="0" maxOccurs="1" type="CT_DigSigBlob"/>
      <xsd:element name="Application" minOccurs="0" maxOccurs="1" type="xsd:string"/>
      <xsd:element name="AppVersion" minOccurs="0" maxOccurs="1" type="xsd:string"/>
      <xsd:element name="DocSecurity" minOccurs="0" maxOccurs="1" type="xsd:int"/>
    </xsd:all>
  </xsd:complexType>
  <xsd:complexType name="CT_VectorVariant">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element ref="vt:vector"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_VectorLpstr">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element ref="vt:vector"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DigSigBlob">
    <xsd:sequence minOccurs="1" maxOccurs="1">
      <xsd:element ref="vt:blob"/>
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
  blockDefault="#all" elementFormDefault="qualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:simpleType name="ST_VectorBaseType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="variant"/>
      <xsd:enumeration value="i1"/>
      <xsd:enumeration value="i2"/>
      <xsd:enumeration value="i4"/>
      <xsd:enumeration value="i8"/>
      <xsd:enumeration value="ui1"/>
      <xsd:enumeration value="ui2"/>
      <xsd:enumeration value="ui4"/>
      <xsd:enumeration value="ui8"/>
      <xsd:enumeration value="r4"/>
      <xsd:enumeration value="r8"/>
      <xsd:enumeration value="lpstr"/>
      <xsd:enumeration value="lpwstr"/>
      <xsd:enumeration value="bstr"/>
      <xsd:enumeration value="date"/>
      <xsd:enumeration value="filetime"/>
      <xsd:enumeration value="bool"/>
      <xsd:enumeration value="cy"/>
      <xsd:enumeration value="error"/>
      <xsd:enumeration value="clsid"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ArrayBaseType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="variant"/>
      <xsd:enumeration value="i1"/>
      <xsd:enumeration value="i2"/>
      <xsd:enumeration value="i4"/>
      <xsd:enumeration value="int"/>
      <xsd:enumeration value="ui1"/>
      <xsd:enumeration value="ui2"/>
      <xsd:enumeration value="ui4"/>
      <xsd:enumeration value="uint"/>
      <xsd:enumeration value="r4"/>
      <xsd:enumeration value="r8"/>
      <xsd:enumeration value="decimal"/>
      <xsd:enumeration value="bstr"/>
      <xsd:enumeration value="date"/>
      <xsd:enumeration value="bool"/>
      <xsd:enumeration value="cy"/>
      <xsd:enumeration value="error"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Cy">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="\s*[0-9]*\.[0-9]{4}\s*"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Error">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="\s*0x[0-9A-Za-z]{8}\s*"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Empty"/>
  <xsd:complexType name="CT_Null"/>
  <xsd:complexType name="CT_Vector">
    <xsd:choice minOccurs="1" maxOccurs="unbounded">
      <xsd:element ref="variant"/>
      <xsd:element ref="i1"/>
      <xsd:element ref="i2"/>
      <xsd:element ref="i4"/>
      <xsd:element ref="i8"/>
      <xsd:element ref="ui1"/>
      <xsd:element ref="ui2"/>
      <xsd:element ref="ui4"/>
      <xsd:element ref="ui8"/>
      <xsd:element ref="r4"/>
      <xsd:element ref="r8"/>
      <xsd:element ref="lpstr"/>
      <xsd:element ref="lpwstr"/>
      <xsd:element ref="bstr"/>
      <xsd:element ref="date"/>
      <xsd:element ref="filetime"/>
      <xsd:element ref="bool"/>
      <xsd:element ref="cy"/>
      <xsd:element ref="error"/>
      <xsd:element ref="clsid"/>
    </xsd:choice>
    <xsd:attribute name="baseType" type="ST_VectorBaseType" use="required"/>
    <xsd:attribute name="size" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Array">
    <xsd:choice minOccurs="1" maxOccurs="unbounded">
      <xsd:element ref="variant"/>
      <xsd:element ref="i1"/>
      <xsd:element ref="i2"/>
      <xsd:element ref="i4"/>
      <xsd:element ref="int"/>
      <xsd:element ref="ui1"/>
      <xsd:element ref="ui2"/>
      <xsd:element ref="ui4"/>
      <xsd:element ref="uint"/>
      <xsd:element ref="r4"/>
      <xsd:element ref="r8"/>
      <xsd:element ref="decimal"/>
      <xsd:element ref="bstr"/>
      <xsd:element ref="date"/>
      <xsd:element ref="bool"/>
      <xsd:element ref="error"/>
      <xsd:element ref="cy"/>
    </xsd:choice>
    <xsd:attribute name="lBounds" type="xsd:int" use="required"/>
    <xsd:attribute name="uBounds" type="xsd:int" use="required"/>
    <xsd:attribute name="baseType" type="ST_ArrayBaseType" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Variant">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element ref="variant"/>
      <xsd:element ref="vector"/>
      <xsd:element ref="array"/>
      <xsd:element ref="blob"/>
      <xsd:element ref="oblob"/>
      <xsd:element ref="empty"/>
      <xsd:element ref="null"/>
      <xsd:element ref="i1"/>
      <xsd:element ref="i2"/>
      <xsd:element ref="i4"/>
      <xsd:element ref="i8"/>
      <xsd:element ref="int"/>
      <xsd:element ref="ui1"/>
      <xsd:element ref="ui2"/>
      <xsd:element ref="ui4"/>
      <xsd:element ref="ui8"/>
      <xsd:element ref="uint"/>
      <xsd:element ref="r4"/>
      <xsd:element ref="r8"/>
      <xsd:element ref="decimal"/>
      <xsd:element ref="lpstr"/>
      <xsd:element ref="lpwstr"/>
      <xsd:element ref="bstr"/>
      <xsd:element ref="date"/>
      <xsd:element ref="filetime"/>
      <xsd:element ref="bool"/>
      <xsd:element ref="cy"/>
      <xsd:element ref="error"/>
      <xsd:element ref="stream"/>
      <xsd:element ref="ostream"/>
      <xsd:element ref="storage"/>
      <xsd:element ref="ostorage"/>
      <xsd:element ref="vstream"/>
      <xsd:element ref="clsid"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_Vstream">
    <xsd:simpleContent>
      <xsd:extension base="xsd:base64Binary">
        <xsd:attribute name="version" type="s:ST_Guid"/>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>
  <xsd:element name="variant" type="CT_Variant"/>
  <xsd:element name="vector" type="CT_Vector"/>
  <xsd:element name="array" type="CT_Array"/>
  <xsd:element name="blob" type="xsd:base64Binary"/>
  <xsd:element name="oblob" type="xsd:base64Binary"/>
  <xsd:element name="empty" type="CT_Empty"/>
  <xsd:element name="null" type="CT_Null"/>
  <xsd:element name="i1" type="xsd:byte"/>
  <xsd:element name="i2" type="xsd:short"/>
  <xsd:element name="i4" type="xsd:int"/>
  <xsd:element name="i8" type="xsd:long"/>
  <xsd:element name="int" type="xsd:int"/>
  <xsd:element name="ui1" type="xsd:unsignedByte"/>
  <xsd:element name="ui2" type="xsd:unsignedShort"/>
  <xsd:element name="ui4" type="xsd:unsignedInt"/>
  <xsd:element name="ui8" type="xsd:unsignedLong"/>
  <xsd:element name="uint" type="xsd:unsignedInt"/>
  <xsd:element name="r4" type="xsd:float"/>
  <xsd:element name="r8" type="xsd:double"/>
  <xsd:element name="decimal" type="xsd:decimal"/>
  <xsd:element name="lpstr" type="xsd:string"/>
  <xsd:element name="lpwstr" type="xsd:string"/>
  <xsd:element name="bstr" type="xsd:string"/>
  <xsd:element name="date" type="xsd:dateTime"/>
  <xsd:element name="filetime" type="xsd:dateTime"/>
  <xsd:element name="bool" type="xsd:boolean"/>
  <xsd:element name="cy" type="ST_Cy"/>
  <xsd:element name="error" type="ST_Error"/>
  <xsd:element name="stream" type="xsd:base64Binary"/>
  <xsd:element name="ostream" type="xsd:base64Binary"/>
  <xsd:element name="storage" type="xsd:base64Binary"/>
  <xsd:element name="ostorage" type="xsd:base64Binary"/>
  <xsd:element name="vstream" type="CT_Vstream"/>
  <xsd:element name="clsid" type="s:ST_Guid"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/officeDocument/2006/math"
  xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
  xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all"
  targetNamespace="http://schemas.openxmlformats.org/officeDocument/2006/math">
  <xsd:import namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
    schemaLocation="wml.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="xml.xsd"/>
  <xsd:simpleType name="ST_Integer255">
    <xsd:restriction base="xsd:integer">
      <xsd:minInclusive value="1"/>
      <xsd:maxInclusive value="255"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Integer255">
    <xsd:attribute name="val" type="ST_Integer255" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Integer2">
    <xsd:restriction base="xsd:integer">
      <xsd:minInclusive value="-2"/>
      <xsd:maxInclusive value="2"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Integer2">
    <xsd:attribute name="val" type="ST_Integer2" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SpacingRule">
    <xsd:restriction base="xsd:integer">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="4"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SpacingRule">
    <xsd:attribute name="val" type="ST_SpacingRule" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_UnSignedInteger">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_UnSignedInteger">
    <xsd:attribute name="val" type="ST_UnSignedInteger" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Char">
    <xsd:restriction base="xsd:string">
      <xsd:maxLength value="1"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Char">
    <xsd:attribute name="val" type="ST_Char" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OnOff">
    <xsd:attribute name="val" type="s:ST_OnOff"/>
  </xsd:complexType>
  <xsd:complexType name="CT_String">
    <xsd:attribute name="val" type="s:ST_String"/>
  </xsd:complexType>
  <xsd:complexType name="CT_XAlign">
    <xsd:attribute name="val" type="s:ST_XAlign" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_YAlign">
    <xsd:attribute name="val" type="s:ST_YAlign" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Shp">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="centered"/>
      <xsd:enumeration value="match"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Shp">
    <xsd:attribute name="val" type="ST_Shp" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="bar"/>
      <xsd:enumeration value="skw"/>
      <xsd:enumeration value="lin"/>
      <xsd:enumeration value="noBar"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FType">
    <xsd:attribute name="val" type="ST_FType" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_LimLoc">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="undOvr"/>
      <xsd:enumeration value="subSup"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LimLoc">
    <xsd:attribute name="val" type="ST_LimLoc" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TopBot">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="top"/>
      <xsd:enumeration value="bot"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TopBot">
    <xsd:attribute name="val" type="ST_TopBot" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Script">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="roman"/>
      <xsd:enumeration value="script"/>
      <xsd:enumeration value="fraktur"/>
      <xsd:enumeration value="double-struck"/>
      <xsd:enumeration value="sans-serif"/>
      <xsd:enumeration value="monospace"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Script">
    <xsd:attribute name="val" type="ST_Script"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Style">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="p"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="i"/>
      <xsd:enumeration value="bi"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Style">
    <xsd:attribute name="val" type="ST_Style"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ManualBreak">
    <xsd:attribute name="alnAt" type="ST_Integer255"/>
  </xsd:complexType>
  <xsd:group name="EG_ScriptStyle">
    <xsd:sequence>
      <xsd:element name="scr" minOccurs="0" type="CT_Script"/>
      <xsd:element name="sty" minOccurs="0" type="CT_Style"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_RPR">
    <xsd:sequence>
      <xsd:element name="lit" minOccurs="0" type="CT_OnOff"/>
      <xsd:choice>
        <xsd:element name="nor" minOccurs="0" type="CT_OnOff"/>
        <xsd:sequence>
          <xsd:group ref="EG_ScriptStyle"/>
        </xsd:sequence>
      </xsd:choice>
      <xsd:element name="brk" minOccurs="0" type="CT_ManualBreak"/>
      <xsd:element name="aln" minOccurs="0" type="CT_OnOff"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Text">
    <xsd:simpleContent>
      <xsd:extension base="s:ST_String">
        <xsd:attribute ref="xml:space" use="optional"/>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>
  <xsd:complexType name="CT_R">
    <xsd:sequence>
      <xsd:element name="rPr" type="CT_RPR" minOccurs="0"/>
      <xsd:group ref="w:EG_RPr" minOccurs="0"/>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:group ref="w:EG_RunInnerContent"/>
        <xsd:element name="t" type="CT_Text" minOccurs="0"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CtrlPr">
    <xsd:sequence>
      <xsd:group ref="w:EG_RPrMath" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_AccPr">
    <xsd:sequence>
      <xsd:element name="chr" type="CT_Char" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Acc">
    <xsd:sequence>
      <xsd:element name="accPr" type="CT_AccPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BarPr">
    <xsd:sequence>
      <xsd:element name="pos" type="CT_TopBot" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Bar">
    <xsd:sequence>
      <xsd:element name="barPr" type="CT_BarPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BoxPr">
    <xsd:sequence>
      <xsd:element name="opEmu" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="noBreak" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="diff" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="brk" type="CT_ManualBreak" minOccurs="0"/>
      <xsd:element name="aln" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Box">
    <xsd:sequence>
      <xsd:element name="boxPr" type="CT_BoxPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BorderBoxPr">
    <xsd:sequence>
      <xsd:element name="hideTop" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="hideBot" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="hideLeft" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="hideRight" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="strikeH" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="strikeV" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="strikeBLTR" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="strikeTLBR" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BorderBox">
    <xsd:sequence>
      <xsd:element name="borderBoxPr" type="CT_BorderBoxPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DPr">
    <xsd:sequence>
      <xsd:element name="begChr" type="CT_Char" minOccurs="0"/>
      <xsd:element name="sepChr" type="CT_Char" minOccurs="0"/>
      <xsd:element name="endChr" type="CT_Char" minOccurs="0"/>
      <xsd:element name="grow" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="shp" type="CT_Shp" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_D">
    <xsd:sequence>
      <xsd:element name="dPr" type="CT_DPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_EqArrPr">
    <xsd:sequence>
      <xsd:element name="baseJc" type="CT_YAlign" minOccurs="0"/>
      <xsd:element name="maxDist" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="objDist" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="rSpRule" type="CT_SpacingRule" minOccurs="0"/>
      <xsd:element name="rSp" type="CT_UnSignedInteger" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_EqArr">
    <xsd:sequence>
      <xsd:element name="eqArrPr" type="CT_EqArrPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_FPr">
    <xsd:sequence>
      <xsd:element name="type" type="CT_FType" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_F">
    <xsd:sequence>
      <xsd:element name="fPr" type="CT_FPr" minOccurs="0"/>
      <xsd:element name="num" type="CT_OMathArg"/>
      <xsd:element name="den" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_FuncPr">
    <xsd:sequence>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Func">
    <xsd:sequence>
      <xsd:element name="funcPr" type="CT_FuncPr" minOccurs="0"/>
      <xsd:element name="fName" type="CT_OMathArg"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupChrPr">
    <xsd:sequence>
      <xsd:element name="chr" type="CT_Char" minOccurs="0"/>
      <xsd:element name="pos" type="CT_TopBot" minOccurs="0"/>
      <xsd:element name="vertJc" type="CT_TopBot" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupChr">
    <xsd:sequence>
      <xsd:element name="groupChrPr" type="CT_GroupChrPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_LimLowPr">
    <xsd:sequence>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_LimLow">
    <xsd:sequence>
      <xsd:element name="limLowPr" type="CT_LimLowPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
      <xsd:element name="lim" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_LimUppPr">
    <xsd:sequence>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_LimUpp">
    <xsd:sequence>
      <xsd:element name="limUppPr" type="CT_LimUppPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
      <xsd:element name="lim" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MCPr">
    <xsd:sequence>
      <xsd:element name="count" type="CT_Integer255" minOccurs="0"/>
      <xsd:element name="mcJc" type="CT_XAlign" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MC">
    <xsd:sequence>
      <xsd:element name="mcPr" type="CT_MCPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MCS">
    <xsd:sequence>
      <xsd:element name="mc" type="CT_MC" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MPr">
    <xsd:sequence>
      <xsd:element name="baseJc" type="CT_YAlign" minOccurs="0"/>
      <xsd:element name="plcHide" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="rSpRule" type="CT_SpacingRule" minOccurs="0"/>
      <xsd:element name="cGpRule" type="CT_SpacingRule" minOccurs="0"/>
      <xsd:element name="rSp" type="CT_UnSignedInteger" minOccurs="0"/>
      <xsd:element name="cSp" type="CT_UnSignedInteger" minOccurs="0"/>
      <xsd:element name="cGp" type="CT_UnSignedInteger" minOccurs="0"/>
      <xsd:element name="mcs" type="CT_MCS" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MR">
    <xsd:sequence>
      <xsd:element name="e" type="CT_OMathArg" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_M">
    <xsd:sequence>
      <xsd:element name="mPr" type="CT_MPr" minOccurs="0"/>
      <xsd:element name="mr" type="CT_MR" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NaryPr">
    <xsd:sequence>
      <xsd:element name="chr" type="CT_Char" minOccurs="0"/>
      <xsd:element name="limLoc" type="CT_LimLoc" minOccurs="0"/>
      <xsd:element name="grow" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="subHide" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="supHide" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Nary">
    <xsd:sequence>
      <xsd:element name="naryPr" type="CT_NaryPr" minOccurs="0"/>
      <xsd:element name="sub" type="CT_OMathArg"/>
      <xsd:element name="sup" type="CT_OMathArg"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PhantPr">
    <xsd:sequence>
      <xsd:element name="show" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="zeroWid" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="zeroAsc" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="zeroDesc" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="transp" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Phant">
    <xsd:sequence>
      <xsd:element name="phantPr" type="CT_PhantPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_RadPr">
    <xsd:sequence>
      <xsd:element name="degHide" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Rad">
    <xsd:sequence>
      <xsd:element name="radPr" type="CT_RadPr" minOccurs="0"/>
      <xsd:element name="deg" type="CT_OMathArg"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SPrePr">
    <xsd:sequence>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SPre">
    <xsd:sequence>
      <xsd:element name="sPrePr" type="CT_SPrePr" minOccurs="0"/>
      <xsd:element name="sub" type="CT_OMathArg"/>
      <xsd:element name="sup" type="CT_OMathArg"/>
      <xsd:element name="e" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SSubPr">
    <xsd:sequence>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SSub">
    <xsd:sequence>
      <xsd:element name="sSubPr" type="CT_SSubPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
      <xsd:element name="sub" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SSubSupPr">
    <xsd:sequence>
      <xsd:element name="alnScr" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SSubSup">
    <xsd:sequence>
      <xsd:element name="sSubSupPr" type="CT_SSubSupPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
      <xsd:element name="sub" type="CT_OMathArg"/>
      <xsd:element name="sup" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SSupPr">
    <xsd:sequence>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SSup">
    <xsd:sequence>
      <xsd:element name="sSupPr" type="CT_SSupPr" minOccurs="0"/>
      <xsd:element name="e" type="CT_OMathArg"/>
      <xsd:element name="sup" type="CT_OMathArg"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_OMathMathElements">
    <xsd:choice>
      <xsd:element name="acc" type="CT_Acc"/>
      <xsd:element name="bar" type="CT_Bar"/>
      <xsd:element name="box" type="CT_Box"/>
      <xsd:element name="borderBox" type="CT_BorderBox"/>
      <xsd:element name="d" type="CT_D"/>
      <xsd:element name="eqArr" type="CT_EqArr"/>
      <xsd:element name="f" type="CT_F"/>
      <xsd:element name="func" type="CT_Func"/>
      <xsd:element name="groupChr" type="CT_GroupChr"/>
      <xsd:element name="limLow" type="CT_LimLow"/>
      <xsd:element name="limUpp" type="CT_LimUpp"/>
      <xsd:element name="m" type="CT_M"/>
      <xsd:element name="nary" type="CT_Nary"/>
      <xsd:element name="phant" type="CT_Phant"/>
      <xsd:element name="rad" type="CT_Rad"/>
      <xsd:element name="sPre" type="CT_SPre"/>
      <xsd:element name="sSub" type="CT_SSub"/>
      <xsd:element name="sSubSup" type="CT_SSubSup"/>
      <xsd:element name="sSup" type="CT_SSup"/>
      <xsd:element name="r" type="CT_R"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_OMathElements">
    <xsd:choice>
      <xsd:group ref="EG_OMathMathElements"/>
      <xsd:group ref="w:EG_PContentMath"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_OMathArgPr">
    <xsd:sequence>
      <xsd:element name="argSz" type="CT_Integer2" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_OMathArg">
    <xsd:sequence>
      <xsd:element name="argPr" type="CT_OMathArgPr" minOccurs="0"/>
      <xsd:group ref="EG_OMathElements" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="ctrlPr" type="CT_CtrlPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_Jc">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="centerGroup"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_OMathJc">
    <xsd:attribute name="val" type="ST_Jc"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OMathParaPr">
    <xsd:sequence>
      <xsd:element name="jc" type="CT_OMathJc" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TwipsMeasure">
    <xsd:attribute name="val" type="s:ST_TwipsMeasure" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_BreakBin">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="before"/>
      <xsd:enumeration value="after"/>
      <xsd:enumeration value="repeat"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_BreakBin">
    <xsd:attribute name="val" type="ST_BreakBin"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_BreakBinSub">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="--"/>
      <xsd:enumeration value="-+"/>
      <xsd:enumeration value="+-"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_BreakBinSub">
    <xsd:attribute name="val" type="ST_BreakBinSub"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MathPr">
    <xsd:sequence>
      <xsd:element name="mathFont" type="CT_String" minOccurs="0"/>
      <xsd:element name="brkBin" type="CT_BreakBin" minOccurs="0"/>
      <xsd:element name="brkBinSub" type="CT_BreakBinSub" minOccurs="0"/>
      <xsd:element name="smallFrac" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="dispDef" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="lMargin" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="rMargin" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="defJc" type="CT_OMathJc" minOccurs="0"/>
      <xsd:element name="preSp" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="postSp" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="interSp" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="intraSp" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:choice minOccurs="0">
        <xsd:element name="wrapIndent" type="CT_TwipsMeasure"/>
        <xsd:element name="wrapRight" type="CT_OnOff"/>
      </xsd:choice>
      <xsd:element name="intLim" type="CT_LimLoc" minOccurs="0"/>
      <xsd:element name="naryLim" type="CT_LimLoc" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="mathPr" type="CT_MathPr"/>
  <xsd:complexType name="CT_OMathPara">
    <xsd:sequence>
      <xsd:element name="oMathParaPr" type="CT_OMathParaPr" minOccurs="0"/>
      <xsd:element name="oMath" type="CT_OMath" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_OMath">
    <xsd:sequence>
      <xsd:group ref="EG_OMathElements" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="oMathPara" type="CT_OMathPara"/>
  <xsd:element name="oMath" type="CT_OMath"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  elementFormDefault="qualified"
  targetNamespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  blockDefault="#all">
  <xsd:simpleType name="ST_RelationshipId">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:attribute name="id" type="ST_RelationshipId"/>
  <xsd:attribute name="embed" type="ST_RelationshipId"/>
  <xsd:attribute name="link" type="ST_RelationshipId"/>
  <xsd:attribute name="dm" type="ST_RelationshipId" default=""/>
  <xsd:attribute name="lo" type="ST_RelationshipId" default=""/>
  <xsd:attribute name="qs" type="ST_RelationshipId" default=""/>
  <xsd:attribute name="cs" type="ST_RelationshipId" default=""/>
  <xsd:attribute name="blip" type="ST_RelationshipId" default=""/>
  <xsd:attribute name="pict" type="ST_RelationshipId"/>
  <xsd:attribute name="href" type="ST_RelationshipId"/>
  <xsd:attribute name="topLeft" type="ST_RelationshipId"/>
  <xsd:attribute name="topRight" type="ST_RelationshipId"/>
  <xsd:attribute name="bottomLeft" type="ST_RelationshipId"/>
  <xsd:attribute name="bottomRight" type="ST_RelationshipId"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns:xdr="http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
  elementFormDefault="qualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    schemaLocation="shared-relationshipReference.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:import 
    namespace="http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"
    schemaLocation="dml-spreadsheetDrawing.xsd"/>
  <xsd:complexType name="CT_AutoFilter">
    <xsd:sequence>
      <xsd:element name="filterColumn" minOccurs="0" maxOccurs="unbounded" type="CT_FilterColumn"/>
      <xsd:element name="sortState" minOccurs="0" maxOccurs="1" type="CT_SortState"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="ref" type="ST_Ref"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FilterColumn">
    <xsd:choice minOccurs="0" maxOccurs="1">
      <xsd:element name="filters" type="CT_Filters" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="top10" type="CT_Top10" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="customFilters" type="CT_CustomFilters" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dynamicFilter" type="CT_DynamicFilter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="colorFilter" type="CT_ColorFilter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="iconFilter" minOccurs="0" maxOccurs="1" type="CT_IconFilter"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
    <xsd:attribute name="colId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="hiddenButton" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showButton" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Filters">
    <xsd:sequence>
      <xsd:element name="filter" type="CT_Filter" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="dateGroupItem" type="CT_DateGroupItem" minOccurs="0" maxOccurs="unbounded"
      />
    </xsd:sequence>
    <xsd:attribute name="blank" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="calendarType" type="s:ST_CalendarType" use="optional" default="none"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Filter">
    <xsd:attribute name="val" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomFilters">
    <xsd:sequence>
      <xsd:element name="customFilter" type="CT_CustomFilter" minOccurs="1" maxOccurs="2"/>
    </xsd:sequence>
    <xsd:attribute name="and" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomFilter">
    <xsd:attribute name="operator" type="ST_FilterOperator" default="equal" use="optional"/>
    <xsd:attribute name="val" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Top10">
    <xsd:attribute name="top" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="percent" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="val" type="xsd:double" use="required"/>
    <xsd:attribute name="filterVal" type="xsd:double" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorFilter">
    <xsd:attribute name="dxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="cellColor" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_IconFilter">
    <xsd:attribute name="iconSet" type="ST_IconSetType" use="required"/>
    <xsd:attribute name="iconId" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FilterOperator">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="equal"/>
      <xsd:enumeration value="lessThan"/>
      <xsd:enumeration value="lessThanOrEqual"/>
      <xsd:enumeration value="notEqual"/>
      <xsd:enumeration value="greaterThanOrEqual"/>
      <xsd:enumeration value="greaterThan"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DynamicFilter">
    <xsd:attribute name="type" type="ST_DynamicFilterType" use="required"/>
    <xsd:attribute name="val" type="xsd:double" use="optional"/>
    <xsd:attribute name="valIso" type="xsd:dateTime" use="optional"/>
    <xsd:attribute name="maxVal" type="xsd:double" use="optional"/>
    <xsd:attribute name="maxValIso" type="xsd:dateTime" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DynamicFilterType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="null"/>
      <xsd:enumeration value="aboveAverage"/>
      <xsd:enumeration value="belowAverage"/>
      <xsd:enumeration value="tomorrow"/>
      <xsd:enumeration value="today"/>
      <xsd:enumeration value="yesterday"/>
      <xsd:enumeration value="nextWeek"/>
      <xsd:enumeration value="thisWeek"/>
      <xsd:enumeration value="lastWeek"/>
      <xsd:enumeration value="nextMonth"/>
      <xsd:enumeration value="thisMonth"/>
      <xsd:enumeration value="lastMonth"/>
      <xsd:enumeration value="nextQuarter"/>
      <xsd:enumeration value="thisQuarter"/>
      <xsd:enumeration value="lastQuarter"/>
      <xsd:enumeration value="nextYear"/>
      <xsd:enumeration value="thisYear"/>
      <xsd:enumeration value="lastYear"/>
      <xsd:enumeration value="yearToDate"/>
      <xsd:enumeration value="Q1"/>
      <xsd:enumeration value="Q2"/>
      <xsd:enumeration value="Q3"/>
      <xsd:enumeration value="Q4"/>
      <xsd:enumeration value="M1"/>
      <xsd:enumeration value="M2"/>
      <xsd:enumeration value="M3"/>
      <xsd:enumeration value="M4"/>
      <xsd:enumeration value="M5"/>
      <xsd:enumeration value="M6"/>
      <xsd:enumeration value="M7"/>
      <xsd:enumeration value="M8"/>
      <xsd:enumeration value="M9"/>
      <xsd:enumeration value="M10"/>
      <xsd:enumeration value="M11"/>
      <xsd:enumeration value="M12"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_IconSetType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="3Arrows"/>
      <xsd:enumeration value="3ArrowsGray"/>
      <xsd:enumeration value="3Flags"/>
      <xsd:enumeration value="3TrafficLights1"/>
      <xsd:enumeration value="3TrafficLights2"/>
      <xsd:enumeration value="3Signs"/>
      <xsd:enumeration value="3Symbols"/>
      <xsd:enumeration value="3Symbols2"/>
      <xsd:enumeration value="4Arrows"/>
      <xsd:enumeration value="4ArrowsGray"/>
      <xsd:enumeration value="4RedToBlack"/>
      <xsd:enumeration value="4Rating"/>
      <xsd:enumeration value="4TrafficLights"/>
      <xsd:enumeration value="5Arrows"/>
      <xsd:enumeration value="5ArrowsGray"/>
      <xsd:enumeration value="5Rating"/>
      <xsd:enumeration value="5Quarters"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SortState">
    <xsd:sequence>
      <xsd:element name="sortCondition" minOccurs="0" maxOccurs="64" type="CT_SortCondition"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="columnSort" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="caseSensitive" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="sortMethod" type="ST_SortMethod" use="optional" default="none"/>
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SortCondition">
    <xsd:attribute name="descending" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="sortBy" type="ST_SortBy" use="optional" default="value"/>
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
    <xsd:attribute name="customList" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="dxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="iconSet" type="ST_IconSetType" use="optional" default="3Arrows"/>
    <xsd:attribute name="iconId" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SortBy">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="value"/>
      <xsd:enumeration value="cellColor"/>
      <xsd:enumeration value="fontColor"/>
      <xsd:enumeration value="icon"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_SortMethod">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="stroke"/>
      <xsd:enumeration value="pinYin"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DateGroupItem">
    <xsd:attribute name="year" type="xsd:unsignedShort" use="required"/>
    <xsd:attribute name="month" type="xsd:unsignedShort" use="optional"/>
    <xsd:attribute name="day" type="xsd:unsignedShort" use="optional"/>
    <xsd:attribute name="hour" type="xsd:unsignedShort" use="optional"/>
    <xsd:attribute name="minute" type="xsd:unsignedShort" use="optional"/>
    <xsd:attribute name="second" type="xsd:unsignedShort" use="optional"/>
    <xsd:attribute name="dateTimeGrouping" type="ST_DateTimeGrouping" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DateTimeGrouping">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="year"/>
      <xsd:enumeration value="month"/>
      <xsd:enumeration value="day"/>
      <xsd:enumeration value="hour"/>
      <xsd:enumeration value="minute"/>
      <xsd:enumeration value="second"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CellRef">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Ref">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_RefA">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Sqref">
    <xsd:list itemType="ST_Ref"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Formula">
    <xsd:restriction base="s:ST_Xstring"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_UnsignedIntHex">
    <xsd:restriction base="xsd:hexBinary">
      <xsd:length value="4"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_UnsignedShortHex">
    <xsd:restriction base="xsd:hexBinary">
      <xsd:length value="2"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_XStringElement">
    <xsd:attribute name="v" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Extension">
    <xsd:sequence>
      <xsd:any processContents="lax"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="xsd:token"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ObjectAnchor">
    <xsd:sequence>
      <xsd:element ref="xdr:from" minOccurs="1" maxOccurs="1"/>
      <xsd:element ref="xdr:to" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="moveWithCells" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="sizeWithCells" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:group name="EG_ExtensionList">
    <xsd:sequence>
      <xsd:element name="ext" type="CT_Extension" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_ExtensionList">
    <xsd:sequence>
      <xsd:group ref="EG_ExtensionList" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="calcChain" type="CT_CalcChain"/>
  <xsd:complexType name="CT_CalcChain">
    <xsd:sequence>
      <xsd:element name="c" type="CT_CalcCell" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CalcCell">
    <xsd:attribute name="r" type="ST_CellRef" use="optional"/>
    <xsd:attribute name="ref" type="ST_CellRef" use="optional"/>
    <xsd:attribute name="i" type="xsd:int" use="optional" default="0"/>
    <xsd:attribute name="s" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="l" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="t" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="a" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:element name="comments" type="CT_Comments"/>
  <xsd:complexType name="CT_Comments">
    <xsd:sequence>
      <xsd:element name="authors" type="CT_Authors" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="commentList" type="CT_CommentList" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Authors">
    <xsd:sequence>
      <xsd:element name="author" type="s:ST_Xstring" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CommentList">
    <xsd:sequence>
      <xsd:element name="comment" type="CT_Comment" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Comment">
    <xsd:sequence>
      <xsd:element name="text" type="CT_Rst" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="commentPr" type="CT_CommentPr" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
    <xsd:attribute name="authorId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="guid" type="s:ST_Guid" use="optional"/>
    <xsd:attribute name="shapeId" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CommentPr">
    <xsd:sequence>
      <xsd:element name="anchor" type="CT_ObjectAnchor" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="locked" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="defaultSize" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="print" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="disabled" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="autoFill" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="autoLine" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="altText" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="textHAlign" type="ST_TextHAlign" use="optional" default="left"/>
    <xsd:attribute name="textVAlign" type="ST_TextVAlign" use="optional" default="top"/>
    <xsd:attribute name="lockText" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="justLastX" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="autoScale" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextHAlign">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="justify"/>
      <xsd:enumeration value="distributed"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextVAlign">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="top"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="bottom"/>
      <xsd:enumeration value="justify"/>
      <xsd:enumeration value="distributed"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:element name="MapInfo" type="CT_MapInfo"/>
  <xsd:complexType name="CT_MapInfo">
    <xsd:sequence>
      <xsd:element name="Schema" type="CT_Schema" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="Map" type="CT_Map" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="SelectionNamespaces" type="xsd:string" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Schema" mixed="true">
    <xsd:sequence>
      <xsd:any/>
    </xsd:sequence>
    <xsd:attribute name="ID" type="xsd:string" use="required"/>
    <xsd:attribute name="SchemaRef" type="xsd:string" use="optional"/>
    <xsd:attribute name="Namespace" type="xsd:string" use="optional"/>
    <xsd:attribute name="SchemaLanguage" type="xsd:token" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Map">
    <xsd:sequence>
      <xsd:element name="DataBinding" type="CT_DataBinding" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="ID" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="Name" type="xsd:string" use="required"/>
    <xsd:attribute name="RootElement" type="xsd:string" use="required"/>
    <xsd:attribute name="SchemaID" type="xsd:string" use="required"/>
    <xsd:attribute name="ShowImportExportValidationErrors" type="xsd:boolean" use="required"/>
    <xsd:attribute name="AutoFit" type="xsd:boolean" use="required"/>
    <xsd:attribute name="Append" type="xsd:boolean" use="required"/>
    <xsd:attribute name="PreserveSortAFLayout" type="xsd:boolean" use="required"/>
    <xsd:attribute name="PreserveFormat" type="xsd:boolean" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DataBinding">
    <xsd:sequence>
      <xsd:any/>
    </xsd:sequence>
    <xsd:attribute name="DataBindingName" type="xsd:string" use="optional"/>
    <xsd:attribute name="FileBinding" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="ConnectionID" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="FileBindingName" type="xsd:string" use="optional"/>
    <xsd:attribute name="DataBindingLoadMode" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:element name="connections" type="CT_Connections"/>
  <xsd:complexType name="CT_Connections">
    <xsd:sequence>
      <xsd:element name="connection" minOccurs="1" maxOccurs="unbounded" type="CT_Connection"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Connection">
    <xsd:sequence>
      <xsd:element name="dbPr" minOccurs="0" maxOccurs="1" type="CT_DbPr"/>
      <xsd:element name="olapPr" minOccurs="0" maxOccurs="1" type="CT_OlapPr"/>
      <xsd:element name="webPr" minOccurs="0" maxOccurs="1" type="CT_WebPr"/>
      <xsd:element name="textPr" minOccurs="0" maxOccurs="1" type="CT_TextPr"/>
      <xsd:element name="parameters" minOccurs="0" maxOccurs="1" type="CT_Parameters"/>
      <xsd:element name="extLst" minOccurs="0" maxOccurs="1" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="id" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="sourceFile" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="odcFile" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="keepAlive" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="interval" use="optional" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="name" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="description" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="type" use="optional" type="xsd:unsignedInt"/>
    <xsd:attribute name="reconnectionMethod" use="optional" type="xsd:unsignedInt" default="1"/>
    <xsd:attribute name="refreshedVersion" use="required" type="xsd:unsignedByte"/>
    <xsd:attribute name="minRefreshableVersion" use="optional" type="xsd:unsignedByte" default="0"/>
    <xsd:attribute name="savePassword" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="new" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="deleted" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="onlyUseConnectionFile" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="background" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="refreshOnLoad" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="saveData" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="credentials" use="optional" type="ST_CredMethod" default="integrated"/>
    <xsd:attribute name="singleSignOnId" use="optional" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_CredMethod">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="integrated"/>
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="stored"/>
      <xsd:enumeration value="prompt"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DbPr">
    <xsd:attribute name="connection" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="command" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="serverCommand" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="commandType" use="optional" type="xsd:unsignedInt" default="2"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OlapPr">
    <xsd:attribute name="local" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="localConnection" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="localRefresh" use="optional" type="xsd:boolean" default="true"/>
    <xsd:attribute name="sendLocale" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="rowDrillCount" use="optional" type="xsd:unsignedInt"/>
    <xsd:attribute name="serverFill" use="optional" type="xsd:boolean" default="true"/>
    <xsd:attribute name="serverNumberFormat" use="optional" type="xsd:boolean" default="true"/>
    <xsd:attribute name="serverFont" use="optional" type="xsd:boolean" default="true"/>
    <xsd:attribute name="serverFontColor" use="optional" type="xsd:boolean" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WebPr">
    <xsd:sequence>
      <xsd:element name="tables" minOccurs="0" maxOccurs="1" type="CT_Tables"/>
    </xsd:sequence>
    <xsd:attribute name="xml" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="sourceData" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="parsePre" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="consecutive" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="firstRow" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="xl97" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="textDates" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="xl2000" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="url" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="post" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="htmlTables" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="htmlFormat" use="optional" type="ST_HtmlFmt" default="none"/>
    <xsd:attribute name="editPage" use="optional" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_HtmlFmt">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="rtf"/>
      <xsd:enumeration value="all"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Parameters">
    <xsd:sequence>
      <xsd:element name="parameter" minOccurs="1" maxOccurs="unbounded" type="CT_Parameter"/>
    </xsd:sequence>
    <xsd:attribute name="count" use="optional" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Parameter">
    <xsd:attribute name="name" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="sqlType" use="optional" type="xsd:int" default="0"/>
    <xsd:attribute name="parameterType" use="optional" type="ST_ParameterType" default="prompt"/>
    <xsd:attribute name="refreshOnChange" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="prompt" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="boolean" use="optional" type="xsd:boolean"/>
    <xsd:attribute name="double" use="optional" type="xsd:double"/>
    <xsd:attribute name="integer" use="optional" type="xsd:int"/>
    <xsd:attribute name="string" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="cell" use="optional" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ParameterType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="prompt"/>
      <xsd:enumeration value="value"/>
      <xsd:enumeration value="cell"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Tables">
    <xsd:choice minOccurs="1" maxOccurs="unbounded">
      <xsd:element name="m" type="CT_TableMissing"/>
      <xsd:element name="s" type="CT_XStringElement"/>
      <xsd:element name="x" type="CT_Index"/>
    </xsd:choice>
    <xsd:attribute name="count" use="optional" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableMissing"/>
  <xsd:complexType name="CT_TextPr">
    <xsd:sequence>
      <xsd:element name="textFields" minOccurs="0" maxOccurs="1" type="CT_TextFields"/>
    </xsd:sequence>
    <xsd:attribute name="prompt" use="optional" type="xsd:boolean" default="true"/>
    <xsd:attribute name="fileType" use="optional" type="ST_FileType" default="win"/>
    <xsd:attribute name="codePage" use="optional" type="xsd:unsignedInt" default="1252"/>
    <xsd:attribute name="characterSet" use="optional" type="xsd:string"/>
    <xsd:attribute name="firstRow" use="optional" type="xsd:unsignedInt" default="1"/>
    <xsd:attribute name="sourceFile" use="optional" type="s:ST_Xstring" default=""/>
    <xsd:attribute name="delimited" use="optional" type="xsd:boolean" default="true"/>
    <xsd:attribute name="decimal" use="optional" type="s:ST_Xstring" default="."/>
    <xsd:attribute name="thousands" use="optional" type="s:ST_Xstring" default=","/>
    <xsd:attribute name="tab" use="optional" type="xsd:boolean" default="true"/>
    <xsd:attribute name="space" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="comma" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="semicolon" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="consecutive" use="optional" type="xsd:boolean" default="false"/>
    <xsd:attribute name="qualifier" use="optional" type="ST_Qualifier" default="doubleQuote"/>
    <xsd:attribute name="delimiter" use="optional" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FileType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="mac"/>
      <xsd:enumeration value="win"/>
      <xsd:enumeration value="dos"/>
      <xsd:enumeration value="lin"/>
      <xsd:enumeration value="other"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Qualifier">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="doubleQuote"/>
      <xsd:enumeration value="singleQuote"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextFields">
    <xsd:sequence>
      <xsd:element name="textField" minOccurs="1" maxOccurs="unbounded" type="CT_TextField"/>
    </xsd:sequence>
    <xsd:attribute name="count" use="optional" type="xsd:unsignedInt" default="1"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextField">
    <xsd:attribute name="type" use="optional" type="ST_ExternalConnectionType" default="general"/>
    <xsd:attribute name="position" use="optional" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ExternalConnectionType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="general"/>
      <xsd:enumeration value="text"/>
      <xsd:enumeration value="MDY"/>
      <xsd:enumeration value="DMY"/>
      <xsd:enumeration value="YMD"/>
      <xsd:enumeration value="MYD"/>
      <xsd:enumeration value="DYM"/>
      <xsd:enumeration value="YDM"/>
      <xsd:enumeration value="skip"/>
      <xsd:enumeration value="EMD"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:element name="pivotCacheDefinition" type="CT_PivotCacheDefinition"/>
  <xsd:element name="pivotCacheRecords" type="CT_PivotCacheRecords"/>
  <xsd:element name="pivotTableDefinition" type="CT_pivotTableDefinition"/>
  <xsd:complexType name="CT_PivotCacheDefinition">
    <xsd:sequence>
      <xsd:element name="cacheSource" type="CT_CacheSource" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cacheFields" type="CT_CacheFields" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="cacheHierarchies" minOccurs="0" type="CT_CacheHierarchies"/>
      <xsd:element name="kpis" minOccurs="0" type="CT_PCDKPIs"/>
      <xsd:element name="tupleCache" minOccurs="0" type="CT_TupleCache"/>
      <xsd:element name="calculatedItems" minOccurs="0" type="CT_CalculatedItems"/>
      <xsd:element name="calculatedMembers" type="CT_CalculatedMembers" minOccurs="0"/>
      <xsd:element name="dimensions" type="CT_Dimensions" minOccurs="0"/>
      <xsd:element name="measureGroups" type="CT_MeasureGroups" minOccurs="0"/>
      <xsd:element name="maps" type="CT_MeasureDimensionMaps" minOccurs="0"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute ref="r:id" use="optional"/>
    <xsd:attribute name="invalid" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="saveData" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="refreshOnLoad" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="optimizeMemory" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="enableRefresh" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="refreshedBy" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="refreshedDate" type="xsd:double" use="optional"/>
    <xsd:attribute name="refreshedDateIso" type="xsd:dateTime" use="optional"/>
    <xsd:attribute name="backgroundQuery" type="xsd:boolean" default="false"/>
    <xsd:attribute name="missingItemsLimit" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="createdVersion" type="xsd:unsignedByte" use="optional" default="0"/>
    <xsd:attribute name="refreshedVersion" type="xsd:unsignedByte" use="optional" default="0"/>
    <xsd:attribute name="minRefreshableVersion" type="xsd:unsignedByte" use="optional" default="0"/>
    <xsd:attribute name="recordCount" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="upgradeOnRefresh" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="tupleCache" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="supportSubquery" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="supportAdvancedDrill" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CacheFields">
    <xsd:sequence>
      <xsd:element name="cacheField" type="CT_CacheField" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CacheField">
    <xsd:sequence>
      <xsd:element name="sharedItems" type="CT_SharedItems" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="fieldGroup" minOccurs="0" type="CT_FieldGroup"/>
      <xsd:element name="mpMap" minOccurs="0" maxOccurs="unbounded" type="CT_X"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="caption" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="propertyName" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="serverField" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="uniqueList" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="numFmtId" type="ST_NumFmtId" use="optional"/>
    <xsd:attribute name="formula" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="sqlType" type="xsd:int" use="optional" default="0"/>
    <xsd:attribute name="hierarchy" type="xsd:int" use="optional" default="0"/>
    <xsd:attribute name="level" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="databaseField" type="xsd:boolean" default="true"/>
    <xsd:attribute name="mappingCount" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="memberPropertyField" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CacheSource">
    <xsd:choice minOccurs="0" maxOccurs="1">
      <xsd:element name="worksheetSource" type="CT_WorksheetSource" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="consolidation" type="CT_Consolidation" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0"/>
    </xsd:choice>
    <xsd:attribute name="type" type="ST_SourceType" use="required"/>
    <xsd:attribute name="connectionId" type="xsd:unsignedInt" default="0" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SourceType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="worksheet"/>
      <xsd:enumeration value="external"/>
      <xsd:enumeration value="consolidation"/>
      <xsd:enumeration value="scenario"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_WorksheetSource">
    <xsd:attribute name="ref" type="ST_Ref" use="optional"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="sheet" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Consolidation">
    <xsd:sequence>
      <xsd:element name="pages" type="CT_Pages" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rangeSets" type="CT_RangeSets" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="autoPage" type="xsd:boolean" default="true" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Pages">
    <xsd:sequence>
      <xsd:element name="page" type="CT_PCDSCPage" minOccurs="1" maxOccurs="4"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PCDSCPage">
    <xsd:sequence>
      <xsd:element name="pageItem" type="CT_PageItem" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageItem">
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RangeSets">
    <xsd:sequence>
      <xsd:element name="rangeSet" type="CT_RangeSet" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RangeSet">
    <xsd:attribute name="i1" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="i2" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="i3" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="i4" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="ref" type="ST_Ref" use="optional"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="sheet" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SharedItems">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="m" type="CT_Missing" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="n" type="CT_Number" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="b" type="CT_Boolean" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="e" type="CT_Error" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="s" type="CT_String" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="d" type="CT_DateTime" minOccurs="1" maxOccurs="1"/>
    </xsd:choice>
    <xsd:attribute name="containsSemiMixedTypes" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="containsNonDate" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="containsDate" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="containsString" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="containsBlank" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="containsMixedTypes" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="containsNumber" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="containsInteger" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="minValue" type="xsd:double" use="optional"/>
    <xsd:attribute name="maxValue" type="xsd:double" use="optional"/>
    <xsd:attribute name="minDate" type="xsd:dateTime" use="optional"/>
    <xsd:attribute name="maxDate" type="xsd:dateTime" use="optional"/>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="longText" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Missing">
    <xsd:sequence>
      <xsd:element name="tpls" minOccurs="0" maxOccurs="unbounded" type="CT_Tuples"/>
      <xsd:element name="x" minOccurs="0" maxOccurs="unbounded" type="CT_X"/>
    </xsd:sequence>
    <xsd:attribute name="u" type="xsd:boolean"/>
    <xsd:attribute name="f" type="xsd:boolean"/>
    <xsd:attribute name="c" type="s:ST_Xstring"/>
    <xsd:attribute name="cp" type="xsd:unsignedInt"/>
    <xsd:attribute name="in" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="bc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="fc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="i" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="un" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="st" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="b" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Number">
    <xsd:sequence>
      <xsd:element name="tpls" minOccurs="0" maxOccurs="unbounded" type="CT_Tuples"/>
      <xsd:element name="x" minOccurs="0" maxOccurs="unbounded" type="CT_X"/>
    </xsd:sequence>
    <xsd:attribute name="v" use="required" type="xsd:double"/>
    <xsd:attribute name="u" type="xsd:boolean"/>
    <xsd:attribute name="f" type="xsd:boolean"/>
    <xsd:attribute name="c" type="s:ST_Xstring"/>
    <xsd:attribute name="cp" type="xsd:unsignedInt"/>
    <xsd:attribute name="in" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="bc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="fc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="i" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="un" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="st" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="b" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Boolean">
    <xsd:sequence>
      <xsd:element name="x" minOccurs="0" maxOccurs="unbounded" type="CT_X"/>
    </xsd:sequence>
    <xsd:attribute name="v" use="required" type="xsd:boolean"/>
    <xsd:attribute name="u" type="xsd:boolean"/>
    <xsd:attribute name="f" type="xsd:boolean"/>
    <xsd:attribute name="c" type="s:ST_Xstring"/>
    <xsd:attribute name="cp" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Error">
    <xsd:sequence>
      <xsd:element name="tpls" minOccurs="0" type="CT_Tuples"/>
      <xsd:element name="x" minOccurs="0" maxOccurs="unbounded" type="CT_X"/>
    </xsd:sequence>
    <xsd:attribute name="v" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="u" type="xsd:boolean"/>
    <xsd:attribute name="f" type="xsd:boolean"/>
    <xsd:attribute name="c" type="s:ST_Xstring"/>
    <xsd:attribute name="cp" type="xsd:unsignedInt"/>
    <xsd:attribute name="in" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="bc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="fc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="i" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="un" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="st" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="b" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_String">
    <xsd:sequence>
      <xsd:element name="tpls" minOccurs="0" maxOccurs="unbounded" type="CT_Tuples"/>
      <xsd:element name="x" minOccurs="0" maxOccurs="unbounded" type="CT_X"/>
    </xsd:sequence>
    <xsd:attribute name="v" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="u" type="xsd:boolean"/>
    <xsd:attribute name="f" type="xsd:boolean"/>
    <xsd:attribute name="c" type="s:ST_Xstring"/>
    <xsd:attribute name="cp" type="xsd:unsignedInt"/>
    <xsd:attribute name="in" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="bc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="fc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="i" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="un" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="st" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="b" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DateTime">
    <xsd:sequence>
      <xsd:element name="x" minOccurs="0" maxOccurs="unbounded" type="CT_X"/>
    </xsd:sequence>
    <xsd:attribute name="v" use="required" type="xsd:dateTime"/>
    <xsd:attribute name="u" type="xsd:boolean"/>
    <xsd:attribute name="f" type="xsd:boolean"/>
    <xsd:attribute name="c" type="s:ST_Xstring"/>
    <xsd:attribute name="cp" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FieldGroup">
    <xsd:sequence>
      <xsd:element name="rangePr" minOccurs="0" type="CT_RangePr"/>
      <xsd:element name="discretePr" minOccurs="0" type="CT_DiscretePr"/>
      <xsd:element name="groupItems" minOccurs="0" type="CT_GroupItems"/>
    </xsd:sequence>
    <xsd:attribute name="par" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="base" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RangePr">
    <xsd:attribute name="autoStart" type="xsd:boolean" default="true"/>
    <xsd:attribute name="autoEnd" type="xsd:boolean" default="true"/>
    <xsd:attribute name="groupBy" type="ST_GroupBy" default="range"/>
    <xsd:attribute name="startNum" type="xsd:double"/>
    <xsd:attribute name="endNum" type="xsd:double"/>
    <xsd:attribute name="startDate" type="xsd:dateTime"/>
    <xsd:attribute name="endDate" type="xsd:dateTime"/>
    <xsd:attribute name="groupInterval" type="xsd:double" default="1"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_GroupBy">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="range"/>
      <xsd:enumeration value="seconds"/>
      <xsd:enumeration value="minutes"/>
      <xsd:enumeration value="hours"/>
      <xsd:enumeration value="days"/>
      <xsd:enumeration value="months"/>
      <xsd:enumeration value="quarters"/>
      <xsd:enumeration value="years"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DiscretePr">
    <xsd:sequence>
      <xsd:element name="x" maxOccurs="unbounded" type="CT_Index"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupItems">
    <xsd:choice maxOccurs="unbounded">
      <xsd:element name="m" type="CT_Missing"/>
      <xsd:element name="n" type="CT_Number"/>
      <xsd:element name="b" type="CT_Boolean"/>
      <xsd:element name="e" type="CT_Error"/>
      <xsd:element name="s" type="CT_String"/>
      <xsd:element name="d" type="CT_DateTime"/>
    </xsd:choice>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotCacheRecords">
    <xsd:sequence>
      <xsd:element name="r" minOccurs="0" maxOccurs="unbounded" type="CT_Record"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Record">
    <xsd:choice maxOccurs="unbounded">
      <xsd:element name="m" type="CT_Missing"/>
      <xsd:element name="n" type="CT_Number"/>
      <xsd:element name="b" type="CT_Boolean"/>
      <xsd:element name="e" type="CT_Error"/>
      <xsd:element name="s" type="CT_String"/>
      <xsd:element name="d" type="CT_DateTime"/>
      <xsd:element name="x" type="CT_Index"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_PCDKPIs">
    <xsd:sequence>
      <xsd:element name="kpi" minOccurs="0" maxOccurs="unbounded" type="CT_PCDKPI"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PCDKPI">
    <xsd:attribute name="uniqueName" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="caption" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="displayFolder" type="s:ST_Xstring"/>
    <xsd:attribute name="measureGroup" type="s:ST_Xstring"/>
    <xsd:attribute name="parent" type="s:ST_Xstring"/>
    <xsd:attribute name="value" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="goal" type="s:ST_Xstring"/>
    <xsd:attribute name="status" type="s:ST_Xstring"/>
    <xsd:attribute name="trend" type="s:ST_Xstring"/>
    <xsd:attribute name="weight" type="s:ST_Xstring"/>
    <xsd:attribute name="time" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CacheHierarchies">
    <xsd:sequence>
      <xsd:element name="cacheHierarchy" minOccurs="0" maxOccurs="unbounded"
        type="CT_CacheHierarchy"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CacheHierarchy">
    <xsd:sequence>
      <xsd:element name="fieldsUsage" minOccurs="0" type="CT_FieldsUsage"/>
      <xsd:element name="groupLevels" minOccurs="0" type="CT_GroupLevels"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="uniqueName" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="caption" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="measure" type="xsd:boolean" default="false"/>
    <xsd:attribute name="set" type="xsd:boolean" default="false"/>
    <xsd:attribute name="parentSet" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="iconSet" type="xsd:int" default="0"/>
    <xsd:attribute name="attribute" type="xsd:boolean" default="false"/>
    <xsd:attribute name="time" type="xsd:boolean" default="false"/>
    <xsd:attribute name="keyAttribute" type="xsd:boolean" default="false"/>
    <xsd:attribute name="defaultMemberUniqueName" type="s:ST_Xstring"/>
    <xsd:attribute name="allUniqueName" type="s:ST_Xstring"/>
    <xsd:attribute name="allCaption" type="s:ST_Xstring"/>
    <xsd:attribute name="dimensionUniqueName" type="s:ST_Xstring"/>
    <xsd:attribute name="displayFolder" type="s:ST_Xstring"/>
    <xsd:attribute name="measureGroup" type="s:ST_Xstring"/>
    <xsd:attribute name="measures" type="xsd:boolean" default="false"/>
    <xsd:attribute name="count" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="oneField" type="xsd:boolean" default="false"/>
    <xsd:attribute name="memberValueDatatype" use="optional" type="xsd:unsignedShort"/>
    <xsd:attribute name="unbalanced" use="optional" type="xsd:boolean"/>
    <xsd:attribute name="unbalancedGroup" use="optional" type="xsd:boolean"/>
    <xsd:attribute name="hidden" type="xsd:boolean" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FieldsUsage">
    <xsd:sequence>
      <xsd:element name="fieldUsage" minOccurs="0" maxOccurs="unbounded" type="CT_FieldUsage"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FieldUsage">
    <xsd:attribute name="x" use="required" type="xsd:int"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupLevels">
    <xsd:sequence>
      <xsd:element name="groupLevel" maxOccurs="unbounded" type="CT_GroupLevel"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupLevel">
    <xsd:sequence>
      <xsd:element name="groups" minOccurs="0" type="CT_Groups"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="uniqueName" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="caption" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="user" type="xsd:boolean" default="false"/>
    <xsd:attribute name="customRollUp" type="xsd:boolean" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Groups">
    <xsd:sequence>
      <xsd:element name="group" maxOccurs="unbounded" type="CT_LevelGroup"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LevelGroup">
    <xsd:sequence>
      <xsd:element name="groupMembers" type="CT_GroupMembers"/>
    </xsd:sequence>
    <xsd:attribute name="name" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="uniqueName" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="caption" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="uniqueParent" type="s:ST_Xstring"/>
    <xsd:attribute name="id" type="xsd:int"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupMembers">
    <xsd:sequence>
      <xsd:element name="groupMember" maxOccurs="unbounded" type="CT_GroupMember"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GroupMember">
    <xsd:attribute name="uniqueName" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="group" type="xsd:boolean" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TupleCache">
    <xsd:sequence>
      <xsd:element name="entries" minOccurs="0" type="CT_PCDSDTCEntries"/>
      <xsd:element name="sets" minOccurs="0" type="CT_Sets"/>
      <xsd:element name="queryCache" minOccurs="0" type="CT_QueryCache"/>
      <xsd:element name="serverFormats" minOccurs="0" maxOccurs="1" type="CT_ServerFormats"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ServerFormat">
    <xsd:attribute name="culture" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="format" use="optional" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ServerFormats">
    <xsd:sequence>
      <xsd:element name="serverFormat" type="CT_ServerFormat" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PCDSDTCEntries">
    <xsd:choice maxOccurs="unbounded">
      <xsd:element name="m" type="CT_Missing"/>
      <xsd:element name="n" type="CT_Number"/>
      <xsd:element name="e" type="CT_Error"/>
      <xsd:element name="s" type="CT_String"/>
    </xsd:choice>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Tuples">
    <xsd:sequence>
      <xsd:element name="tpl" type="CT_Tuple" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="c" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Tuple">
    <xsd:attribute name="fld" type="xsd:unsignedInt"/>
    <xsd:attribute name="hier" type="xsd:unsignedInt"/>
    <xsd:attribute name="item" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Sets">
    <xsd:sequence>
      <xsd:element name="set" maxOccurs="unbounded" type="CT_Set"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Set">
    <xsd:sequence>
      <xsd:element name="tpls" minOccurs="0" maxOccurs="unbounded" type="CT_Tuples"/>
      <xsd:element name="sortByTuple" minOccurs="0" type="CT_Tuples"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
    <xsd:attribute name="maxRank" use="required" type="xsd:int"/>
    <xsd:attribute name="setDefinition" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="sortType" type="ST_SortType" default="none"/>
    <xsd:attribute name="queryFailed" type="xsd:boolean" default="false"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SortType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="ascending"/>
      <xsd:enumeration value="descending"/>
      <xsd:enumeration value="ascendingAlpha"/>
      <xsd:enumeration value="descendingAlpha"/>
      <xsd:enumeration value="ascendingNatural"/>
      <xsd:enumeration value="descendingNatural"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_QueryCache">
    <xsd:sequence>
      <xsd:element name="query" maxOccurs="unbounded" type="CT_Query"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Query">
    <xsd:sequence>
      <xsd:element name="tpls" minOccurs="0" type="CT_Tuples"/>
    </xsd:sequence>
    <xsd:attribute name="mdx" use="required" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CalculatedItems">
    <xsd:sequence>
      <xsd:element name="calculatedItem" maxOccurs="unbounded" type="CT_CalculatedItem"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CalculatedItem">
    <xsd:sequence>
      <xsd:element name="pivotArea" type="CT_PivotArea"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="field" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="formula" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CalculatedMembers">
    <xsd:sequence>
      <xsd:element name="calculatedMember" maxOccurs="unbounded" type="CT_CalculatedMember"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CalculatedMember">
    <xsd:sequence minOccurs="0">
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="name" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="mdx" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="memberName" type="s:ST_Xstring"/>
    <xsd:attribute name="hierarchy" type="s:ST_Xstring"/>
    <xsd:attribute name="parent" type="s:ST_Xstring"/>
    <xsd:attribute name="solveOrder" type="xsd:int" default="0"/>
    <xsd:attribute name="set" type="xsd:boolean" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_pivotTableDefinition">
    <xsd:sequence>
      <xsd:element name="location" type="CT_Location"/>
      <xsd:element name="pivotFields" type="CT_PivotFields" minOccurs="0"/>
      <xsd:element name="rowFields" type="CT_RowFields" minOccurs="0"/>
      <xsd:element name="rowItems" type="CT_rowItems" minOccurs="0"/>
      <xsd:element name="colFields" type="CT_ColFields" minOccurs="0"/>
      <xsd:element name="colItems" type="CT_colItems" minOccurs="0"/>
      <xsd:element name="pageFields" type="CT_PageFields" minOccurs="0"/>
      <xsd:element name="dataFields" type="CT_DataFields" minOccurs="0"/>
      <xsd:element name="formats" type="CT_Formats" minOccurs="0"/>
      <xsd:element name="conditionalFormats" type="CT_ConditionalFormats" minOccurs="0"/>
      <xsd:element name="chartFormats" type="CT_ChartFormats" minOccurs="0"/>
      <xsd:element name="pivotHierarchies" type="CT_PivotHierarchies" minOccurs="0"/>
      <xsd:element name="pivotTableStyleInfo" minOccurs="0" maxOccurs="1" type="CT_PivotTableStyle"/>
      <xsd:element name="filters" minOccurs="0" maxOccurs="1" type="CT_PivotFilters"/>
      <xsd:element name="rowHierarchiesUsage" type="CT_RowHierarchiesUsage" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="colHierarchiesUsage" type="CT_ColHierarchiesUsage" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="name" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="cacheId" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="dataOnRows" type="xsd:boolean" default="false"/>
    <xsd:attribute name="dataPosition" type="xsd:unsignedInt" use="optional"/>
    <xsd:attributeGroup ref="AG_AutoFormat"/>
    <xsd:attribute name="dataCaption" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="grandTotalCaption" type="s:ST_Xstring"/>
    <xsd:attribute name="errorCaption" type="s:ST_Xstring"/>
    <xsd:attribute name="showError" type="xsd:boolean" default="false"/>
    <xsd:attribute name="missingCaption" type="s:ST_Xstring"/>
    <xsd:attribute name="showMissing" type="xsd:boolean" default="true"/>
    <xsd:attribute name="pageStyle" type="s:ST_Xstring"/>
    <xsd:attribute name="pivotTableStyle" type="s:ST_Xstring"/>
    <xsd:attribute name="vacatedStyle" type="s:ST_Xstring"/>
    <xsd:attribute name="tag" type="s:ST_Xstring"/>
    <xsd:attribute name="updatedVersion" type="xsd:unsignedByte" default="0"/>
    <xsd:attribute name="minRefreshableVersion" type="xsd:unsignedByte" default="0"/>
    <xsd:attribute name="asteriskTotals" type="xsd:boolean" default="false"/>
    <xsd:attribute name="showItems" type="xsd:boolean" default="true"/>
    <xsd:attribute name="editData" type="xsd:boolean" default="false"/>
    <xsd:attribute name="disableFieldList" type="xsd:boolean" default="false"/>
    <xsd:attribute name="showCalcMbrs" type="xsd:boolean" default="true"/>
    <xsd:attribute name="visualTotals" type="xsd:boolean" default="true"/>
    <xsd:attribute name="showMultipleLabel" type="xsd:boolean" default="true"/>
    <xsd:attribute name="showDataDropDown" type="xsd:boolean" default="true"/>
    <xsd:attribute name="showDrill" type="xsd:boolean" default="true"/>
    <xsd:attribute name="printDrill" type="xsd:boolean" default="false"/>
    <xsd:attribute name="showMemberPropertyTips" type="xsd:boolean" default="true"/>
    <xsd:attribute name="showDataTips" type="xsd:boolean" default="true"/>
    <xsd:attribute name="enableWizard" type="xsd:boolean" default="true"/>
    <xsd:attribute name="enableDrill" type="xsd:boolean" default="true"/>
    <xsd:attribute name="enableFieldProperties" type="xsd:boolean" default="true"/>
    <xsd:attribute name="preserveFormatting" type="xsd:boolean" default="true"/>
    <xsd:attribute name="useAutoFormatting" type="xsd:boolean" default="false"/>
    <xsd:attribute name="pageWrap" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="pageOverThenDown" type="xsd:boolean" default="false"/>
    <xsd:attribute name="subtotalHiddenItems" type="xsd:boolean" default="false"/>
    <xsd:attribute name="rowGrandTotals" type="xsd:boolean" default="true"/>
    <xsd:attribute name="colGrandTotals" type="xsd:boolean" default="true"/>
    <xsd:attribute name="fieldPrintTitles" type="xsd:boolean" default="false"/>
    <xsd:attribute name="itemPrintTitles" type="xsd:boolean" default="false"/>
    <xsd:attribute name="mergeItem" type="xsd:boolean" default="false"/>
    <xsd:attribute name="showDropZones" type="xsd:boolean" default="true"/>
    <xsd:attribute name="createdVersion" type="xsd:unsignedByte" default="0"/>
    <xsd:attribute name="indent" type="xsd:unsignedInt" default="1"/>
    <xsd:attribute name="showEmptyRow" type="xsd:boolean" default="false"/>
    <xsd:attribute name="showEmptyCol" type="xsd:boolean" default="false"/>
    <xsd:attribute name="showHeaders" type="xsd:boolean" default="true"/>
    <xsd:attribute name="compact" type="xsd:boolean" default="true"/>
    <xsd:attribute name="outline" type="xsd:boolean" default="false"/>
    <xsd:attribute name="outlineData" type="xsd:boolean" default="false"/>
    <xsd:attribute name="compactData" type="xsd:boolean" default="true"/>
    <xsd:attribute name="published" type="xsd:boolean" default="false"/>
    <xsd:attribute name="gridDropZones" type="xsd:boolean" default="false"/>
    <xsd:attribute name="immersive" type="xsd:boolean" default="true"/>
    <xsd:attribute name="multipleFieldFilters" type="xsd:boolean" default="true"/>
    <xsd:attribute name="chartFormat" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="rowHeaderCaption" type="s:ST_Xstring"/>
    <xsd:attribute name="colHeaderCaption" type="s:ST_Xstring"/>
    <xsd:attribute name="fieldListSortAscending" type="xsd:boolean" default="false"/>
    <xsd:attribute name="mdxSubqueries" type="xsd:boolean" default="false"/>
    <xsd:attribute name="customListSort" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Location">
    <xsd:attribute name="ref" use="required" type="ST_Ref"/>
    <xsd:attribute name="firstHeaderRow" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="firstDataRow" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="firstDataCol" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="rowPageCount" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="colPageCount" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotFields">
    <xsd:sequence>
      <xsd:element name="pivotField" maxOccurs="unbounded" type="CT_PivotField"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotField">
    <xsd:sequence>
      <xsd:element name="items" minOccurs="0" type="CT_Items"/>
      <xsd:element name="autoSortScope" minOccurs="0" type="CT_AutoSortScope"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="s:ST_Xstring"/>
    <xsd:attribute name="axis" use="optional" type="ST_Axis"/>
    <xsd:attribute name="dataField" type="xsd:boolean" default="false"/>
    <xsd:attribute name="subtotalCaption" type="s:ST_Xstring"/>
    <xsd:attribute name="showDropDowns" type="xsd:boolean" default="true"/>
    <xsd:attribute name="hiddenLevel" type="xsd:boolean" default="false"/>
    <xsd:attribute name="uniqueMemberProperty" type="s:ST_Xstring"/>
    <xsd:attribute name="compact" type="xsd:boolean" default="true"/>
    <xsd:attribute name="allDrilled" type="xsd:boolean" default="false"/>
    <xsd:attribute name="numFmtId" type="ST_NumFmtId" use="optional"/>
    <xsd:attribute name="outline" type="xsd:boolean" default="true"/>
    <xsd:attribute name="subtotalTop" type="xsd:boolean" default="true"/>
    <xsd:attribute name="dragToRow" type="xsd:boolean" default="true"/>
    <xsd:attribute name="dragToCol" type="xsd:boolean" default="true"/>
    <xsd:attribute name="multipleItemSelectionAllowed" type="xsd:boolean" default="false"/>
    <xsd:attribute name="dragToPage" type="xsd:boolean" default="true"/>
    <xsd:attribute name="dragToData" type="xsd:boolean" default="true"/>
    <xsd:attribute name="dragOff" type="xsd:boolean" default="true"/>
    <xsd:attribute name="showAll" type="xsd:boolean" default="true"/>
    <xsd:attribute name="insertBlankRow" type="xsd:boolean" default="false"/>
    <xsd:attribute name="serverField" type="xsd:boolean" default="false"/>
    <xsd:attribute name="insertPageBreak" type="xsd:boolean" default="false"/>
    <xsd:attribute name="autoShow" type="xsd:boolean" default="false"/>
    <xsd:attribute name="topAutoShow" type="xsd:boolean" default="true"/>
    <xsd:attribute name="hideNewItems" type="xsd:boolean" default="false"/>
    <xsd:attribute name="measureFilter" type="xsd:boolean" default="false"/>
    <xsd:attribute name="includeNewItemsInFilter" type="xsd:boolean" default="false"/>
    <xsd:attribute name="itemPageCount" type="xsd:unsignedInt" default="10"/>
    <xsd:attribute name="sortType" type="ST_FieldSortType" default="manual"/>
    <xsd:attribute name="dataSourceSort" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="nonAutoSortDefault" type="xsd:boolean" default="false"/>
    <xsd:attribute name="rankBy" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="defaultSubtotal" type="xsd:boolean" default="true"/>
    <xsd:attribute name="sumSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="countASubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="avgSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="maxSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="minSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="productSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="countSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="stdDevSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="stdDevPSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="varSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="varPSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="showPropCell" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showPropTip" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showPropAsCaption" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="defaultAttributeDrillState" type="xsd:boolean" use="optional"
      default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AutoSortScope">
    <xsd:sequence>
      <xsd:element name="pivotArea" type="CT_PivotArea"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Items">
    <xsd:sequence>
      <xsd:element name="item" maxOccurs="unbounded" type="CT_Item"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Item">
    <xsd:attribute name="n" type="s:ST_Xstring"/>
    <xsd:attribute name="t" type="ST_ItemType" default="data"/>
    <xsd:attribute name="h" type="xsd:boolean" default="false"/>
    <xsd:attribute name="s" type="xsd:boolean" default="false"/>
    <xsd:attribute name="sd" type="xsd:boolean" default="true"/>
    <xsd:attribute name="f" type="xsd:boolean" default="false"/>
    <xsd:attribute name="m" type="xsd:boolean" default="false"/>
    <xsd:attribute name="c" type="xsd:boolean" default="false"/>
    <xsd:attribute name="x" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="d" type="xsd:boolean" default="false"/>
    <xsd:attribute name="e" type="xsd:boolean" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageFields">
    <xsd:sequence>
      <xsd:element name="pageField" maxOccurs="unbounded" type="CT_PageField"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageField">
    <xsd:sequence minOccurs="0">
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="fld" use="required" type="xsd:int"/>
    <xsd:attribute name="item" use="optional" type="xsd:unsignedInt"/>
    <xsd:attribute name="hier" type="xsd:int"/>
    <xsd:attribute name="name" type="s:ST_Xstring"/>
    <xsd:attribute name="cap" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DataFields">
    <xsd:sequence>
      <xsd:element name="dataField" maxOccurs="unbounded" type="CT_DataField"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DataField">
    <xsd:sequence>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="name" use="optional" type="s:ST_Xstring"/>
    <xsd:attribute name="fld" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="subtotal" type="ST_DataConsolidateFunction" default="sum"/>
    <xsd:attribute name="showDataAs" type="ST_ShowDataAs" default="normal"/>
    <xsd:attribute name="baseField" type="xsd:int" default="-1"/>
    <xsd:attribute name="baseItem" type="xsd:unsignedInt" default="1048832"/>
    <xsd:attribute name="numFmtId" type="ST_NumFmtId" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_rowItems">
    <xsd:sequence>
      <xsd:element name="i" maxOccurs="unbounded" type="CT_I"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_colItems">
    <xsd:sequence>
      <xsd:element name="i" maxOccurs="unbounded" type="CT_I"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_I">
    <xsd:sequence>
      <xsd:element name="x" minOccurs="0" maxOccurs="unbounded" type="CT_X"/>
    </xsd:sequence>
    <xsd:attribute name="t" type="ST_ItemType" default="data"/>
    <xsd:attribute name="r" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="i" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_X">
    <xsd:attribute name="v" type="xsd:int" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RowFields">
    <xsd:sequence>
      <xsd:element name="field" maxOccurs="unbounded" type="CT_Field"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ColFields">
    <xsd:sequence>
      <xsd:element name="field" maxOccurs="unbounded" type="CT_Field"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Field">
    <xsd:attribute name="x" type="xsd:int" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Formats">
    <xsd:sequence>
      <xsd:element name="format" maxOccurs="unbounded" type="CT_Format"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Format">
    <xsd:sequence>
      <xsd:element name="pivotArea" type="CT_PivotArea"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="action" type="ST_FormatAction" default="formatting"/>
    <xsd:attribute name="dxfId" type="ST_DxfId" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ConditionalFormats">
    <xsd:sequence>
      <xsd:element name="conditionalFormat" maxOccurs="unbounded" type="CT_ConditionalFormat"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ConditionalFormat">
    <xsd:sequence>
      <xsd:element name="pivotAreas" type="CT_PivotAreas"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="scope" type="ST_Scope" default="selection"/>
    <xsd:attribute name="type" type="ST_Type" default="none"/>
    <xsd:attribute name="priority" use="required" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotAreas">
    <xsd:sequence>
      <xsd:element name="pivotArea" minOccurs="0" maxOccurs="unbounded" type="CT_PivotArea"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Scope">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="selection"/>
      <xsd:enumeration value="data"/>
      <xsd:enumeration value="field"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Type">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="all"/>
      <xsd:enumeration value="row"/>
      <xsd:enumeration value="column"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ChartFormats">
    <xsd:sequence>
      <xsd:element name="chartFormat" maxOccurs="unbounded" type="CT_ChartFormat"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ChartFormat">
    <xsd:sequence>
      <xsd:element name="pivotArea" type="CT_PivotArea"/>
    </xsd:sequence>
    <xsd:attribute name="chart" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="format" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="series" type="xsd:boolean" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotHierarchies">
    <xsd:sequence>
      <xsd:element name="pivotHierarchy" maxOccurs="unbounded" type="CT_PivotHierarchy"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotHierarchy">
    <xsd:sequence>
      <xsd:element name="mps" minOccurs="0" type="CT_MemberProperties"/>
      <xsd:element name="members" minOccurs="0" maxOccurs="unbounded" type="CT_Members"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="outline" type="xsd:boolean" default="false"/>
    <xsd:attribute name="multipleItemSelectionAllowed" type="xsd:boolean" default="false"/>
    <xsd:attribute name="subtotalTop" type="xsd:boolean" default="false"/>
    <xsd:attribute name="showInFieldList" type="xsd:boolean" default="true"/>
    <xsd:attribute name="dragToRow" type="xsd:boolean" default="true"/>
    <xsd:attribute name="dragToCol" type="xsd:boolean" default="true"/>
    <xsd:attribute name="dragToPage" type="xsd:boolean" default="true"/>
    <xsd:attribute name="dragToData" type="xsd:boolean" default="false"/>
    <xsd:attribute name="dragOff" type="xsd:boolean" default="true"/>
    <xsd:attribute name="includeNewItemsInFilter" type="xsd:boolean" default="false"/>
    <xsd:attribute name="caption" type="s:ST_Xstring" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RowHierarchiesUsage">
    <xsd:sequence>
      <xsd:element name="rowHierarchyUsage" minOccurs="1" maxOccurs="unbounded"
        type="CT_HierarchyUsage"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ColHierarchiesUsage">
    <xsd:sequence>
      <xsd:element name="colHierarchyUsage" minOccurs="1" maxOccurs="unbounded"
        type="CT_HierarchyUsage"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_HierarchyUsage">
    <xsd:attribute name="hierarchyUsage" type="xsd:int" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MemberProperties">
    <xsd:sequence>
      <xsd:element name="mp" maxOccurs="unbounded" type="CT_MemberProperty"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MemberProperty">
    <xsd:attribute name="name" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="showCell" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showTip" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showAsCaption" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="nameLen" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="pPos" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="pLen" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="level" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="field" use="required" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Members">
    <xsd:sequence>
      <xsd:element name="member" maxOccurs="unbounded" type="CT_Member"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
    <xsd:attribute name="level" use="optional" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Member">
    <xsd:attribute name="name" use="required" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Dimensions">
    <xsd:sequence>
      <xsd:element name="dimension" minOccurs="0" maxOccurs="unbounded" type="CT_PivotDimension"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotDimension">
    <xsd:attribute name="measure" type="xsd:boolean" default="false"/>
    <xsd:attribute name="name" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="uniqueName" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="caption" use="required" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MeasureGroups">
    <xsd:sequence>
      <xsd:element name="measureGroup" minOccurs="0" maxOccurs="unbounded" type="CT_MeasureGroup"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MeasureDimensionMaps">
    <xsd:sequence>
      <xsd:element name="map" minOccurs="0" maxOccurs="unbounded" type="CT_MeasureDimensionMap"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MeasureGroup">
    <xsd:attribute name="name" use="required" type="s:ST_Xstring"/>
    <xsd:attribute name="caption" use="required" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MeasureDimensionMap">
    <xsd:attribute name="measureGroup" use="optional" type="xsd:unsignedInt"/>
    <xsd:attribute name="dimension" use="optional" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotTableStyle">
    <xsd:attribute name="name" type="xsd:string"/>
    <xsd:attribute name="showRowHeaders" type="xsd:boolean"/>
    <xsd:attribute name="showColHeaders" type="xsd:boolean"/>
    <xsd:attribute name="showRowStripes" type="xsd:boolean"/>
    <xsd:attribute name="showColStripes" type="xsd:boolean"/>
    <xsd:attribute name="showLastColumn" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotFilters">
    <xsd:sequence>
      <xsd:element name="filter" minOccurs="0" maxOccurs="unbounded" type="CT_PivotFilter"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotFilter">
    <xsd:sequence>
      <xsd:element name="autoFilter" minOccurs="1" maxOccurs="1" type="CT_AutoFilter"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="fld" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="mpFld" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="type" use="required" type="ST_PivotFilterType"/>
    <xsd:attribute name="evalOrder" use="optional" type="xsd:int" default="0"/>
    <xsd:attribute name="id" use="required" type="xsd:unsignedInt"/>
    <xsd:attribute name="iMeasureHier" use="optional" type="xsd:unsignedInt"/>
    <xsd:attribute name="iMeasureFld" use="optional" type="xsd:unsignedInt"/>
    <xsd:attribute name="name" type="s:ST_Xstring"/>
    <xsd:attribute name="description" type="s:ST_Xstring"/>
    <xsd:attribute name="stringValue1" type="s:ST_Xstring"/>
    <xsd:attribute name="stringValue2" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ShowDataAs">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="normal"/>
      <xsd:enumeration value="difference"/>
      <xsd:enumeration value="percent"/>
      <xsd:enumeration value="percentDiff"/>
      <xsd:enumeration value="runTotal"/>
      <xsd:enumeration value="percentOfRow"/>
      <xsd:enumeration value="percentOfCol"/>
      <xsd:enumeration value="percentOfTotal"/>
      <xsd:enumeration value="index"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ItemType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="data"/>
      <xsd:enumeration value="default"/>
      <xsd:enumeration value="sum"/>
      <xsd:enumeration value="countA"/>
      <xsd:enumeration value="avg"/>
      <xsd:enumeration value="max"/>
      <xsd:enumeration value="min"/>
      <xsd:enumeration value="product"/>
      <xsd:enumeration value="count"/>
      <xsd:enumeration value="stdDev"/>
      <xsd:enumeration value="stdDevP"/>
      <xsd:enumeration value="var"/>
      <xsd:enumeration value="varP"/>
      <xsd:enumeration value="grand"/>
      <xsd:enumeration value="blank"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FormatAction">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="blank"/>
      <xsd:enumeration value="formatting"/>
      <xsd:enumeration value="drill"/>
      <xsd:enumeration value="formula"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FieldSortType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="manual"/>
      <xsd:enumeration value="ascending"/>
      <xsd:enumeration value="descending"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PivotFilterType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="unknown"/>
      <xsd:enumeration value="count"/>
      <xsd:enumeration value="percent"/>
      <xsd:enumeration value="sum"/>
      <xsd:enumeration value="captionEqual"/>
      <xsd:enumeration value="captionNotEqual"/>
      <xsd:enumeration value="captionBeginsWith"/>
      <xsd:enumeration value="captionNotBeginsWith"/>
      <xsd:enumeration value="captionEndsWith"/>
      <xsd:enumeration value="captionNotEndsWith"/>
      <xsd:enumeration value="captionContains"/>
      <xsd:enumeration value="captionNotContains"/>
      <xsd:enumeration value="captionGreaterThan"/>
      <xsd:enumeration value="captionGreaterThanOrEqual"/>
      <xsd:enumeration value="captionLessThan"/>
      <xsd:enumeration value="captionLessThanOrEqual"/>
      <xsd:enumeration value="captionBetween"/>
      <xsd:enumeration value="captionNotBetween"/>
      <xsd:enumeration value="valueEqual"/>
      <xsd:enumeration value="valueNotEqual"/>
      <xsd:enumeration value="valueGreaterThan"/>
      <xsd:enumeration value="valueGreaterThanOrEqual"/>
      <xsd:enumeration value="valueLessThan"/>
      <xsd:enumeration value="valueLessThanOrEqual"/>
      <xsd:enumeration value="valueBetween"/>
      <xsd:enumeration value="valueNotBetween"/>
      <xsd:enumeration value="dateEqual"/>
      <xsd:enumeration value="dateNotEqual"/>
      <xsd:enumeration value="dateOlderThan"/>
      <xsd:enumeration value="dateOlderThanOrEqual"/>
      <xsd:enumeration value="dateNewerThan"/>
      <xsd:enumeration value="dateNewerThanOrEqual"/>
      <xsd:enumeration value="dateBetween"/>
      <xsd:enumeration value="dateNotBetween"/>
      <xsd:enumeration value="tomorrow"/>
      <xsd:enumeration value="today"/>
      <xsd:enumeration value="yesterday"/>
      <xsd:enumeration value="nextWeek"/>
      <xsd:enumeration value="thisWeek"/>
      <xsd:enumeration value="lastWeek"/>
      <xsd:enumeration value="nextMonth"/>
      <xsd:enumeration value="thisMonth"/>
      <xsd:enumeration value="lastMonth"/>
      <xsd:enumeration value="nextQuarter"/>
      <xsd:enumeration value="thisQuarter"/>
      <xsd:enumeration value="lastQuarter"/>
      <xsd:enumeration value="nextYear"/>
      <xsd:enumeration value="thisYear"/>
      <xsd:enumeration value="lastYear"/>
      <xsd:enumeration value="yearToDate"/>
      <xsd:enumeration value="Q1"/>
      <xsd:enumeration value="Q2"/>
      <xsd:enumeration value="Q3"/>
      <xsd:enumeration value="Q4"/>
      <xsd:enumeration value="M1"/>
      <xsd:enumeration value="M2"/>
      <xsd:enumeration value="M3"/>
      <xsd:enumeration value="M4"/>
      <xsd:enumeration value="M5"/>
      <xsd:enumeration value="M6"/>
      <xsd:enumeration value="M7"/>
      <xsd:enumeration value="M8"/>
      <xsd:enumeration value="M9"/>
      <xsd:enumeration value="M10"/>
      <xsd:enumeration value="M11"/>
      <xsd:enumeration value="M12"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PivotArea">
    <xsd:sequence>
      <xsd:element name="references" minOccurs="0" type="CT_PivotAreaReferences"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="field" use="optional" type="xsd:int"/>
    <xsd:attribute name="type" type="ST_PivotAreaType" default="normal"/>
    <xsd:attribute name="dataOnly" type="xsd:boolean" default="true"/>
    <xsd:attribute name="labelOnly" type="xsd:boolean" default="false"/>
    <xsd:attribute name="grandRow" type="xsd:boolean" default="false"/>
    <xsd:attribute name="grandCol" type="xsd:boolean" default="false"/>
    <xsd:attribute name="cacheIndex" type="xsd:boolean" default="false"/>
    <xsd:attribute name="outline" type="xsd:boolean" default="true"/>
    <xsd:attribute name="offset" type="ST_Ref"/>
    <xsd:attribute name="collapsedLevelsAreSubtotals" type="xsd:boolean" default="false"/>
    <xsd:attribute name="axis" type="ST_Axis" use="optional"/>
    <xsd:attribute name="fieldPosition" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PivotAreaType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="normal"/>
      <xsd:enumeration value="data"/>
      <xsd:enumeration value="all"/>
      <xsd:enumeration value="origin"/>
      <xsd:enumeration value="button"/>
      <xsd:enumeration value="topEnd"/>
      <xsd:enumeration value="topRight"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PivotAreaReferences">
    <xsd:sequence>
      <xsd:element name="reference" maxOccurs="unbounded" type="CT_PivotAreaReference"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotAreaReference">
    <xsd:sequence>
      <xsd:element name="x" minOccurs="0" maxOccurs="unbounded" type="CT_Index"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="field" use="optional" type="xsd:unsignedInt"/>
    <xsd:attribute name="count" type="xsd:unsignedInt"/>
    <xsd:attribute name="selected" type="xsd:boolean" default="true"/>
    <xsd:attribute name="byPosition" type="xsd:boolean" default="false"/>
    <xsd:attribute name="relative" type="xsd:boolean" default="false"/>
    <xsd:attribute name="defaultSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="sumSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="countASubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="avgSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="maxSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="minSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="productSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="countSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="stdDevSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="stdDevPSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="varSubtotal" type="xsd:boolean" default="false"/>
    <xsd:attribute name="varPSubtotal" type="xsd:boolean" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Index">
    <xsd:attribute name="v" use="required" type="xsd:unsignedInt"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Axis">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="axisRow"/>
      <xsd:enumeration value="axisCol"/>
      <xsd:enumeration value="axisPage"/>
      <xsd:enumeration value="axisValues"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:element name="queryTable" type="CT_QueryTable"/>
  <xsd:complexType name="CT_QueryTable">
    <xsd:sequence>
      <xsd:element name="queryTableRefresh" type="CT_QueryTableRefresh" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="headers" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="rowNumbers" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="disableRefresh" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="backgroundRefresh" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="firstBackgroundRefresh" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="refreshOnLoad" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="growShrinkType" type="ST_GrowShrinkType" use="optional"
      default="insertDelete"/>
    <xsd:attribute name="fillFormulas" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="removeDataOnSave" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="disableEdit" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="preserveFormatting" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="adjustColumnWidth" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="intermediate" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="connectionId" type="xsd:unsignedInt" use="required"/>
    <xsd:attributeGroup ref="AG_AutoFormat"/>
  </xsd:complexType>
  <xsd:complexType name="CT_QueryTableRefresh">
    <xsd:sequence>
      <xsd:element name="queryTableFields" type="CT_QueryTableFields" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="queryTableDeletedFields" type="CT_QueryTableDeletedFields" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="sortState" minOccurs="0" maxOccurs="1" type="CT_SortState"/>
      <xsd:element name="extLst" minOccurs="0" maxOccurs="1" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="preserveSortFilterLayout" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="fieldIdWrapped" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="headersInLastRefresh" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="minimumVersion" type="xsd:unsignedByte" use="optional" default="0"/>
    <xsd:attribute name="nextId" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="unboundColumnsLeft" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="unboundColumnsRight" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_QueryTableDeletedFields">
    <xsd:sequence>
      <xsd:element name="deletedField" type="CT_DeletedField" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DeletedField">
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_QueryTableFields">
    <xsd:sequence>
      <xsd:element name="queryTableField" type="CT_QueryTableField" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_QueryTableField">
    <xsd:sequence minOccurs="0">
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="dataBound" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="rowNumbers" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="fillFormulas" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="clipped" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="tableColumnId" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_GrowShrinkType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="insertDelete"/>
      <xsd:enumeration value="insertClear"/>
      <xsd:enumeration value="overwriteClear"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:element name="sst" type="CT_Sst"/>
  <xsd:complexType name="CT_Sst">
    <xsd:sequence>
      <xsd:element name="si" type="CT_Rst" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="uniqueCount" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PhoneticType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="halfwidthKatakana"/>
      <xsd:enumeration value="fullwidthKatakana"/>
      <xsd:enumeration value="Hiragana"/>
      <xsd:enumeration value="noConversion"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PhoneticAlignment">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="noControl"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="distributed"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PhoneticRun">
    <xsd:sequence>
      <xsd:element name="t" type="s:ST_Xstring" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="sb" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="eb" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RElt">
    <xsd:sequence>
      <xsd:element name="rPr" type="CT_RPrElt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="t" type="s:ST_Xstring" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_RPrElt">
    <xsd:choice maxOccurs="unbounded">
      <xsd:element name="rFont" type="CT_FontName" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="charset" type="CT_IntProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="family" type="CT_IntProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="b" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="i" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="strike" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="outline" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shadow" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="condense" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extend" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="color" type="CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sz" type="CT_FontSize" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="u" type="CT_UnderlineProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="vertAlign" type="CT_VerticalAlignFontProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="scheme" type="CT_FontScheme" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_Rst">
    <xsd:sequence>
      <xsd:element name="t" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="r" type="CT_RElt" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rPh" type="CT_PhoneticRun" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="phoneticPr" minOccurs="0" maxOccurs="1" type="CT_PhoneticPr"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PhoneticPr">
    <xsd:attribute name="fontId" type="ST_FontId" use="required"/>
    <xsd:attribute name="type" type="ST_PhoneticType" use="optional" default="fullwidthKatakana"/>
    <xsd:attribute name="alignment" type="ST_PhoneticAlignment" use="optional" default="left"/>
  </xsd:complexType>
  <xsd:element name="headers" type="CT_RevisionHeaders"/>
  <xsd:element name="revisions" type="CT_Revisions"/>
  <xsd:complexType name="CT_RevisionHeaders">
    <xsd:sequence>
      <xsd:element name="header" type="CT_RevisionHeader" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="guid" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="lastGuid" type="s:ST_Guid" use="optional"/>
    <xsd:attribute name="shared" type="xsd:boolean" default="true"/>
    <xsd:attribute name="diskRevisions" type="xsd:boolean" default="false"/>
    <xsd:attribute name="history" type="xsd:boolean" default="true"/>
    <xsd:attribute name="trackRevisions" type="xsd:boolean" default="true"/>
    <xsd:attribute name="exclusive" type="xsd:boolean" default="false"/>
    <xsd:attribute name="revisionId" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="version" type="xsd:int" default="1"/>
    <xsd:attribute name="keepChangeHistory" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="protected" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="preserveHistory" type="xsd:unsignedInt" default="30"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Revisions">
    <xsd:choice maxOccurs="unbounded">
      <xsd:element name="rrc" type="CT_RevisionRowColumn" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rm" type="CT_RevisionMove" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rcv" type="CT_RevisionCustomView" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rsnm" type="CT_RevisionSheetRename" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="ris" type="CT_RevisionInsertSheet" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rcc" type="CT_RevisionCellChange" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rfmt" type="CT_RevisionFormatting" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="raf" type="CT_RevisionAutoFormatting" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rdn" type="CT_RevisionDefinedName" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rcmt" type="CT_RevisionComment" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rqt" type="CT_RevisionQueryTableField" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rcft" type="CT_RevisionConflict" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:attributeGroup name="AG_RevData">
    <xsd:attribute name="rId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="ua" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="ra" type="xsd:boolean" use="optional" default="false"/>
  </xsd:attributeGroup>
  <xsd:complexType name="CT_RevisionHeader">
    <xsd:sequence>
      <xsd:element name="sheetIdMap" minOccurs="1" maxOccurs="1" type="CT_SheetIdMap"/>
      <xsd:element name="reviewedList" minOccurs="0" maxOccurs="1" type="CT_ReviewedRevisions"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="guid" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="dateTime" type="xsd:dateTime" use="required"/>
    <xsd:attribute name="maxSheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="userName" type="s:ST_Xstring" use="required"/>
    <xsd:attribute ref="r:id" use="required"/>
    <xsd:attribute name="minRId" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="maxRId" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetIdMap">
    <xsd:sequence>
      <xsd:element name="sheetId" type="CT_SheetId" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetId">
    <xsd:attribute name="val" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ReviewedRevisions">
    <xsd:sequence>
      <xsd:element name="reviewed" type="CT_Reviewed" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Reviewed">
    <xsd:attribute name="rId" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_UndoInfo">
    <xsd:attribute name="index" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="exp" type="ST_FormulaExpression" use="required"/>
    <xsd:attribute name="ref3D" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="array" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="v" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="nf" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="cs" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="dr" type="ST_RefA" use="required"/>
    <xsd:attribute name="dn" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="r" type="ST_CellRef" use="optional"/>
    <xsd:attribute name="sId" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionRowColumn">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="undo" type="CT_UndoInfo" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rcc" type="CT_RevisionCellChange" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rfmt" type="CT_RevisionFormatting" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_RevData"/>
    <xsd:attribute name="sId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="eol" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
    <xsd:attribute name="action" type="ST_rwColActionType" use="required"/>
    <xsd:attribute name="edge" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionMove">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="undo" type="CT_UndoInfo" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rcc" type="CT_RevisionCellChange" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rfmt" type="CT_RevisionFormatting" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_RevData"/>
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="source" type="ST_Ref" use="required"/>
    <xsd:attribute name="destination" type="ST_Ref" use="required"/>
    <xsd:attribute name="sourceSheetId" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionCustomView">
    <xsd:attribute name="guid" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="action" type="ST_RevisionAction" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionSheetRename">
    <xsd:sequence>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_RevData"/>
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="oldName" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="newName" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionInsertSheet">
    <xsd:attributeGroup ref="AG_RevData"/>
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="sheetPosition" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionCellChange">
    <xsd:sequence>
      <xsd:element name="oc" type="CT_Cell" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="nc" type="CT_Cell" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="odxf" type="CT_Dxf" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ndxf" type="CT_Dxf" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_RevData"/>
    <xsd:attribute name="sId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="odxf" type="xsd:boolean" default="false"/>
    <xsd:attribute name="xfDxf" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="s" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="dxf" type="xsd:boolean" default="false"/>
    <xsd:attribute name="numFmtId" type="ST_NumFmtId" use="optional"/>
    <xsd:attribute name="quotePrefix" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="oldQuotePrefix" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="ph" type="xsd:boolean" default="false"/>
    <xsd:attribute name="oldPh" type="xsd:boolean" default="false"/>
    <xsd:attribute name="endOfListFormulaUpdate" type="xsd:boolean" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionFormatting">
    <xsd:sequence>
      <xsd:element name="dxf" type="CT_Dxf" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="xfDxf" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="s" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="sqref" type="ST_Sqref" use="required"/>
    <xsd:attribute name="start" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="length" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionAutoFormatting">
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attributeGroup ref="AG_AutoFormat"/>
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionComment">
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="cell" type="ST_CellRef" use="required"/>
    <xsd:attribute name="guid" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="action" type="ST_RevisionAction" default="add"/>
    <xsd:attribute name="alwaysShow" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="old" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="hiddenRow" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="hiddenColumn" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="author" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="oldLength" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="newLength" type="xsd:unsignedInt" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionDefinedName">
    <xsd:sequence>
      <xsd:element name="formula" type="ST_Formula" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="oldFormula" type="ST_Formula" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_RevData"/>
    <xsd:attribute name="localSheetId" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="customView" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="function" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="oldFunction" type="xsd:boolean" default="false"/>
    <xsd:attribute name="functionGroupId" type="xsd:unsignedByte" use="optional"/>
    <xsd:attribute name="oldFunctionGroupId" type="xsd:unsignedByte" use="optional"/>
    <xsd:attribute name="shortcutKey" type="xsd:unsignedByte" use="optional"/>
    <xsd:attribute name="oldShortcutKey" type="xsd:unsignedByte" use="optional"/>
    <xsd:attribute name="hidden" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="oldHidden" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="customMenu" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="oldCustomMenu" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="description" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="oldDescription" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="help" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="oldHelp" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="statusBar" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="oldStatusBar" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="comment" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="oldComment" type="s:ST_Xstring" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionConflict">
    <xsd:attributeGroup ref="AG_RevData"/>
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RevisionQueryTableField">
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
    <xsd:attribute name="fieldId" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_rwColActionType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="insertRow"/>
      <xsd:enumeration value="deleteRow"/>
      <xsd:enumeration value="insertCol"/>
      <xsd:enumeration value="deleteCol"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_RevisionAction">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="add"/>
      <xsd:enumeration value="delete"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FormulaExpression">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="ref"/>
      <xsd:enumeration value="refError"/>
      <xsd:enumeration value="area"/>
      <xsd:enumeration value="areaError"/>
      <xsd:enumeration value="computedArea"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:element name="users" type="CT_Users"/>
  <xsd:complexType name="CT_Users">
    <xsd:sequence>
      <xsd:element name="userInfo" minOccurs="0" maxOccurs="256" type="CT_SharedUser"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SharedUser">
    <xsd:sequence>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="guid" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="id" type="xsd:int" use="required"/>
    <xsd:attribute name="dateTime" type="xsd:dateTime" use="required"/>
  </xsd:complexType>
  <xsd:element name="worksheet" type="CT_Worksheet"/>
  <xsd:element name="chartsheet" type="CT_Chartsheet"/>
  <xsd:element name="dialogsheet" type="CT_Dialogsheet"/>
  <xsd:complexType name="CT_Macrosheet">
    <xsd:sequence>
      <xsd:element name="sheetPr" type="CT_SheetPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dimension" type="CT_SheetDimension" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sheetViews" type="CT_SheetViews" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sheetFormatPr" type="CT_SheetFormatPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cols" type="CT_Cols" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="sheetData" type="CT_SheetData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="sheetProtection" type="CT_SheetProtection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="autoFilter" type="CT_AutoFilter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sortState" type="CT_SortState" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dataConsolidate" type="CT_DataConsolidate" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="customSheetViews" type="CT_CustomSheetViews" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="phoneticPr" type="CT_PhoneticPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="conditionalFormatting" type="CT_ConditionalFormatting" minOccurs="0"
        maxOccurs="unbounded"/>
      <xsd:element name="printOptions" type="CT_PrintOptions" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageMargins" type="CT_PageMargins" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageSetup" type="CT_PageSetup" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="headerFooter" type="CT_HeaderFooter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rowBreaks" type="CT_PageBreak" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="colBreaks" type="CT_PageBreak" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="customProperties" type="CT_CustomProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="drawing" type="CT_Drawing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="legacyDrawing" type="CT_LegacyDrawing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="legacyDrawingHF" type="CT_LegacyDrawing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="drawingHF" type="CT_DrawingHF" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="picture" type="CT_SheetBackgroundPicture" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="oleObjects" type="CT_OleObjects" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Dialogsheet">
    <xsd:sequence>
      <xsd:element name="sheetPr" minOccurs="0" type="CT_SheetPr"/>
      <xsd:element name="sheetViews" minOccurs="0" type="CT_SheetViews"/>
      <xsd:element name="sheetFormatPr" minOccurs="0" type="CT_SheetFormatPr"/>
      <xsd:element name="sheetProtection" type="CT_SheetProtection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="customSheetViews" minOccurs="0" type="CT_CustomSheetViews"/>
      <xsd:element name="printOptions" minOccurs="0" type="CT_PrintOptions"/>
      <xsd:element name="pageMargins" minOccurs="0" type="CT_PageMargins"/>
      <xsd:element name="pageSetup" minOccurs="0" type="CT_PageSetup"/>
      <xsd:element name="headerFooter" minOccurs="0" type="CT_HeaderFooter"/>
      <xsd:element name="drawing" minOccurs="0" type="CT_Drawing"/>
      <xsd:element name="legacyDrawing" minOccurs="0" type="CT_LegacyDrawing"/>
      <xsd:element name="legacyDrawingHF" type="CT_LegacyDrawing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="drawingHF" type="CT_DrawingHF" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="oleObjects" type="CT_OleObjects" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="controls" type="CT_Controls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Worksheet">
    <xsd:sequence>
      <xsd:element name="sheetPr" type="CT_SheetPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dimension" type="CT_SheetDimension" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sheetViews" type="CT_SheetViews" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sheetFormatPr" type="CT_SheetFormatPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cols" type="CT_Cols" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="sheetData" type="CT_SheetData" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="sheetCalcPr" type="CT_SheetCalcPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sheetProtection" type="CT_SheetProtection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="protectedRanges" type="CT_ProtectedRanges" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="scenarios" type="CT_Scenarios" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="autoFilter" type="CT_AutoFilter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sortState" type="CT_SortState" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dataConsolidate" type="CT_DataConsolidate" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="customSheetViews" type="CT_CustomSheetViews" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="mergeCells" type="CT_MergeCells" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="phoneticPr" type="CT_PhoneticPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="conditionalFormatting" type="CT_ConditionalFormatting" minOccurs="0"
        maxOccurs="unbounded"/>
      <xsd:element name="dataValidations" type="CT_DataValidations" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hyperlinks" type="CT_Hyperlinks" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="printOptions" type="CT_PrintOptions" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageMargins" type="CT_PageMargins" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageSetup" type="CT_PageSetup" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="headerFooter" type="CT_HeaderFooter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rowBreaks" type="CT_PageBreak" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="colBreaks" type="CT_PageBreak" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="customProperties" type="CT_CustomProperties" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cellWatches" type="CT_CellWatches" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ignoredErrors" type="CT_IgnoredErrors" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="smartTags" type="CT_SmartTags" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="drawing" type="CT_Drawing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="legacyDrawing" type="CT_LegacyDrawing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="legacyDrawingHF" type="CT_LegacyDrawing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="drawingHF" type="CT_DrawingHF" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="picture" type="CT_SheetBackgroundPicture" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="oleObjects" type="CT_OleObjects" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="controls" type="CT_Controls" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="webPublishItems" type="CT_WebPublishItems" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tableParts" type="CT_TableParts" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetData">
    <xsd:sequence>
      <xsd:element name="row" type="CT_Row" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetCalcPr">
    <xsd:attribute name="fullCalcOnLoad" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetFormatPr">
    <xsd:attribute name="baseColWidth" type="xsd:unsignedInt" use="optional" default="8"/>
    <xsd:attribute name="defaultColWidth" type="xsd:double" use="optional"/>
    <xsd:attribute name="defaultRowHeight" type="xsd:double" use="required"/>
    <xsd:attribute name="customHeight" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="zeroHeight" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="thickTop" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="thickBottom" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="outlineLevelRow" type="xsd:unsignedByte" use="optional" default="0"/>
    <xsd:attribute name="outlineLevelCol" type="xsd:unsignedByte" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Cols">
    <xsd:sequence>
      <xsd:element name="col" type="CT_Col" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Col">
    <xsd:attribute name="min" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="max" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="width" type="xsd:double" use="optional"/>
    <xsd:attribute name="style" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="hidden" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="bestFit" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="customWidth" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="phonetic" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="outlineLevel" type="xsd:unsignedByte" use="optional" default="0"/>
    <xsd:attribute name="collapsed" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_CellSpan">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CellSpans">
    <xsd:list itemType="ST_CellSpan"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_Row">
    <xsd:sequence>
      <xsd:element name="c" type="CT_Cell" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="r" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="spans" type="ST_CellSpans" use="optional"/>
    <xsd:attribute name="s" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="customFormat" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="ht" type="xsd:double" use="optional"/>
    <xsd:attribute name="hidden" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="customHeight" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="outlineLevel" type="xsd:unsignedByte" use="optional" default="0"/>
    <xsd:attribute name="collapsed" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="thickTop" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="thickBot" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="ph" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Cell">
    <xsd:sequence>
      <xsd:element name="f" type="CT_CellFormula" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="v" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="is" type="CT_Rst" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="r" type="ST_CellRef" use="optional"/>
    <xsd:attribute name="s" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="t" type="ST_CellType" use="optional" default="n"/>
    <xsd:attribute name="cm" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="vm" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="ph" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_CellType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="n"/>
      <xsd:enumeration value="e"/>
      <xsd:enumeration value="s"/>
      <xsd:enumeration value="str"/>
      <xsd:enumeration value="inlineStr"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CellFormulaType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="normal"/>
      <xsd:enumeration value="array"/>
      <xsd:enumeration value="dataTable"/>
      <xsd:enumeration value="shared"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SheetPr">
    <xsd:sequence>
      <xsd:element name="tabColor" type="CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="outlinePr" type="CT_OutlinePr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageSetUpPr" type="CT_PageSetUpPr" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="syncHorizontal" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="syncVertical" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="syncRef" type="ST_Ref" use="optional"/>
    <xsd:attribute name="transitionEvaluation" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="transitionEntry" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="published" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="codeName" type="xsd:string" use="optional"/>
    <xsd:attribute name="filterMode" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="enableFormatConditionsCalculation" type="xsd:boolean" use="optional"
      default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetDimension">
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetViews">
    <xsd:sequence>
      <xsd:element name="sheetView" type="CT_SheetView" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetView">
    <xsd:sequence>
      <xsd:element name="pane" type="CT_Pane" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="selection" type="CT_Selection" minOccurs="0" maxOccurs="4"/>
      <xsd:element name="pivotSelection" type="CT_PivotSelection" minOccurs="0" maxOccurs="4"/>
      <xsd:element name="extLst" minOccurs="0" maxOccurs="1" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="windowProtection" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showFormulas" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showGridLines" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showRowColHeaders" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showZeros" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="rightToLeft" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="tabSelected" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showRuler" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showOutlineSymbols" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="defaultGridColor" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showWhiteSpace" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="view" type="ST_SheetViewType" use="optional" default="normal"/>
    <xsd:attribute name="topLeftCell" type="ST_CellRef" use="optional"/>
    <xsd:attribute name="colorId" type="xsd:unsignedInt" use="optional" default="64"/>
    <xsd:attribute name="zoomScale" type="xsd:unsignedInt" use="optional" default="100"/>
    <xsd:attribute name="zoomScaleNormal" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="zoomScaleSheetLayoutView" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="zoomScalePageLayoutView" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="workbookViewId" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Pane">
    <xsd:attribute name="xSplit" type="xsd:double" use="optional" default="0"/>
    <xsd:attribute name="ySplit" type="xsd:double" use="optional" default="0"/>
    <xsd:attribute name="topLeftCell" type="ST_CellRef" use="optional"/>
    <xsd:attribute name="activePane" type="ST_Pane" use="optional" default="topLeft"/>
    <xsd:attribute name="state" type="ST_PaneState" use="optional" default="split"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotSelection">
    <xsd:sequence>
      <xsd:element name="pivotArea" type="CT_PivotArea"/>
    </xsd:sequence>
    <xsd:attribute name="pane" type="ST_Pane" use="optional" default="topLeft"/>
    <xsd:attribute name="showHeader" type="xsd:boolean" default="false"/>
    <xsd:attribute name="label" type="xsd:boolean" default="false"/>
    <xsd:attribute name="data" type="xsd:boolean" default="false"/>
    <xsd:attribute name="extendable" type="xsd:boolean" default="false"/>
    <xsd:attribute name="count" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="axis" type="ST_Axis" use="optional"/>
    <xsd:attribute name="dimension" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="start" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="min" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="max" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="activeRow" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="activeCol" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="previousRow" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="previousCol" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute name="click" type="xsd:unsignedInt" default="0"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Selection">
    <xsd:attribute name="pane" type="ST_Pane" use="optional" default="topLeft"/>
    <xsd:attribute name="activeCell" type="ST_CellRef" use="optional"/>
    <xsd:attribute name="activeCellId" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="sqref" type="ST_Sqref" use="optional" default="A1"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Pane">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="bottomRight"/>
      <xsd:enumeration value="topRight"/>
      <xsd:enumeration value="bottomLeft"/>
      <xsd:enumeration value="topLeft"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PageBreak">
    <xsd:sequence>
      <xsd:element name="brk" type="CT_Break" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="manualBreakCount" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Break">
    <xsd:attribute name="id" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="min" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="max" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="man" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pt" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SheetViewType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="normal"/>
      <xsd:enumeration value="pageBreakPreview"/>
      <xsd:enumeration value="pageLayout"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_OutlinePr">
    <xsd:attribute name="applyStyles" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="summaryBelow" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="summaryRight" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showOutlineSymbols" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageSetUpPr">
    <xsd:attribute name="autoPageBreaks" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="fitToPage" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DataConsolidate">
    <xsd:sequence>
      <xsd:element name="dataRefs" type="CT_DataRefs" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="function" type="ST_DataConsolidateFunction" use="optional" default="sum"/>
    <xsd:attribute name="startLabels" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="leftLabels" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="topLabels" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="link" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DataConsolidateFunction">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="average"/>
      <xsd:enumeration value="count"/>
      <xsd:enumeration value="countNums"/>
      <xsd:enumeration value="max"/>
      <xsd:enumeration value="min"/>
      <xsd:enumeration value="product"/>
      <xsd:enumeration value="stdDev"/>
      <xsd:enumeration value="stdDevp"/>
      <xsd:enumeration value="sum"/>
      <xsd:enumeration value="var"/>
      <xsd:enumeration value="varp"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DataRefs">
    <xsd:sequence>
      <xsd:element name="dataRef" type="CT_DataRef" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DataRef">
    <xsd:attribute name="ref" type="ST_Ref" use="optional"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="sheet" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MergeCells">
    <xsd:sequence>
      <xsd:element name="mergeCell" type="CT_MergeCell" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MergeCell">
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SmartTags">
    <xsd:sequence>
      <xsd:element name="cellSmartTags" type="CT_CellSmartTags" minOccurs="1" maxOccurs="unbounded"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CellSmartTags">
    <xsd:sequence>
      <xsd:element name="cellSmartTag" type="CT_CellSmartTag" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="r" type="ST_CellRef" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CellSmartTag">
    <xsd:sequence>
      <xsd:element name="cellSmartTagPr" minOccurs="0" maxOccurs="unbounded"
        type="CT_CellSmartTagPr"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="deleted" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="xmlBased" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CellSmartTagPr">
    <xsd:attribute name="key" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="val" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Drawing">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LegacyDrawing">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DrawingHF">
    <xsd:attribute ref="r:id" use="required"/>
    <xsd:attribute name="lho" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="lhe" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="lhf" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="cho" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="che" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="chf" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="rho" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="rhe" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="rhf" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="lfo" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="lfe" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="lff" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="cfo" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="cfe" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="cff" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="rfo" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="rfe" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="rff" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomSheetViews">
    <xsd:sequence>
      <xsd:element name="customSheetView" minOccurs="1" maxOccurs="unbounded"
        type="CT_CustomSheetView"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomSheetView">
    <xsd:sequence>
      <xsd:element name="pane" type="CT_Pane" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="selection" type="CT_Selection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rowBreaks" type="CT_PageBreak" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="colBreaks" type="CT_PageBreak" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageMargins" type="CT_PageMargins" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="printOptions" type="CT_PrintOptions" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageSetup" type="CT_PageSetup" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="headerFooter" type="CT_HeaderFooter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="autoFilter" type="CT_AutoFilter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="guid" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="scale" type="xsd:unsignedInt" default="100"/>
    <xsd:attribute name="colorId" type="xsd:unsignedInt" default="64"/>
    <xsd:attribute name="showPageBreaks" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showFormulas" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showGridLines" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showRowCol" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="outlineSymbols" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="zeroValues" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="fitToPage" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="printArea" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="filter" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showAutoFilter" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="hiddenRows" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="hiddenColumns" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="state" type="ST_SheetState" default="visible"/>
    <xsd:attribute name="filterUnique" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="view" type="ST_SheetViewType" default="normal"/>
    <xsd:attribute name="showRuler" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="topLeftCell" type="ST_CellRef" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DataValidations">
    <xsd:sequence>
      <xsd:element name="dataValidation" type="CT_DataValidation" minOccurs="1"
        maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="disablePrompts" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="xWindow" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="yWindow" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DataValidation">
    <xsd:sequence>
      <xsd:element name="formula1" type="ST_Formula" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="formula2" type="ST_Formula" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_DataValidationType" use="optional" default="none"/>
    <xsd:attribute name="errorStyle" type="ST_DataValidationErrorStyle" use="optional"
      default="stop"/>
    <xsd:attribute name="imeMode" type="ST_DataValidationImeMode" use="optional" default="noControl"/>
    <xsd:attribute name="operator" type="ST_DataValidationOperator" use="optional" default="between"/>
    <xsd:attribute name="allowBlank" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showDropDown" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showInputMessage" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showErrorMessage" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="errorTitle" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="error" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="promptTitle" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="prompt" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="sqref" type="ST_Sqref" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DataValidationType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="whole"/>
      <xsd:enumeration value="decimal"/>
      <xsd:enumeration value="list"/>
      <xsd:enumeration value="date"/>
      <xsd:enumeration value="time"/>
      <xsd:enumeration value="textLength"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DataValidationOperator">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="between"/>
      <xsd:enumeration value="notBetween"/>
      <xsd:enumeration value="equal"/>
      <xsd:enumeration value="notEqual"/>
      <xsd:enumeration value="lessThan"/>
      <xsd:enumeration value="lessThanOrEqual"/>
      <xsd:enumeration value="greaterThan"/>
      <xsd:enumeration value="greaterThanOrEqual"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DataValidationErrorStyle">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="stop"/>
      <xsd:enumeration value="warning"/>
      <xsd:enumeration value="information"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DataValidationImeMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="noControl"/>
      <xsd:enumeration value="off"/>
      <xsd:enumeration value="on"/>
      <xsd:enumeration value="disabled"/>
      <xsd:enumeration value="hiragana"/>
      <xsd:enumeration value="fullKatakana"/>
      <xsd:enumeration value="halfKatakana"/>
      <xsd:enumeration value="fullAlpha"/>
      <xsd:enumeration value="halfAlpha"/>
      <xsd:enumeration value="fullHangul"/>
      <xsd:enumeration value="halfHangul"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CfType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="expression"/>
      <xsd:enumeration value="cellIs"/>
      <xsd:enumeration value="colorScale"/>
      <xsd:enumeration value="dataBar"/>
      <xsd:enumeration value="iconSet"/>
      <xsd:enumeration value="top10"/>
      <xsd:enumeration value="uniqueValues"/>
      <xsd:enumeration value="duplicateValues"/>
      <xsd:enumeration value="containsText"/>
      <xsd:enumeration value="notContainsText"/>
      <xsd:enumeration value="beginsWith"/>
      <xsd:enumeration value="endsWith"/>
      <xsd:enumeration value="containsBlanks"/>
      <xsd:enumeration value="notContainsBlanks"/>
      <xsd:enumeration value="containsErrors"/>
      <xsd:enumeration value="notContainsErrors"/>
      <xsd:enumeration value="timePeriod"/>
      <xsd:enumeration value="aboveAverage"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TimePeriod">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="today"/>
      <xsd:enumeration value="yesterday"/>
      <xsd:enumeration value="tomorrow"/>
      <xsd:enumeration value="last7Days"/>
      <xsd:enumeration value="thisMonth"/>
      <xsd:enumeration value="lastMonth"/>
      <xsd:enumeration value="nextMonth"/>
      <xsd:enumeration value="thisWeek"/>
      <xsd:enumeration value="lastWeek"/>
      <xsd:enumeration value="nextWeek"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ConditionalFormattingOperator">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="lessThan"/>
      <xsd:enumeration value="lessThanOrEqual"/>
      <xsd:enumeration value="equal"/>
      <xsd:enumeration value="notEqual"/>
      <xsd:enumeration value="greaterThanOrEqual"/>
      <xsd:enumeration value="greaterThan"/>
      <xsd:enumeration value="between"/>
      <xsd:enumeration value="notBetween"/>
      <xsd:enumeration value="containsText"/>
      <xsd:enumeration value="notContains"/>
      <xsd:enumeration value="beginsWith"/>
      <xsd:enumeration value="endsWith"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CfvoType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="num"/>
      <xsd:enumeration value="percent"/>
      <xsd:enumeration value="max"/>
      <xsd:enumeration value="min"/>
      <xsd:enumeration value="formula"/>
      <xsd:enumeration value="percentile"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ConditionalFormatting">
    <xsd:sequence>
      <xsd:element name="cfRule" type="CT_CfRule" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="pivot" type="xsd:boolean" default="false"/>
    <xsd:attribute name="sqref" type="ST_Sqref"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CfRule">
    <xsd:sequence>
      <xsd:element name="formula" type="ST_Formula" minOccurs="0" maxOccurs="3"/>
      <xsd:element name="colorScale" type="CT_ColorScale" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dataBar" type="CT_DataBar" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="iconSet" type="CT_IconSet" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_CfType"/>
    <xsd:attribute name="dxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="priority" type="xsd:int" use="required"/>
    <xsd:attribute name="stopIfTrue" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="aboveAverage" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="percent" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="bottom" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="operator" type="ST_ConditionalFormattingOperator" use="optional"/>
    <xsd:attribute name="text" type="xsd:string" use="optional"/>
    <xsd:attribute name="timePeriod" type="ST_TimePeriod" use="optional"/>
    <xsd:attribute name="rank" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="stdDev" type="xsd:int" use="optional"/>
    <xsd:attribute name="equalAverage" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Hyperlinks">
    <xsd:sequence>
      <xsd:element name="hyperlink" type="CT_Hyperlink" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Hyperlink">
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
    <xsd:attribute ref="r:id" use="optional"/>
    <xsd:attribute name="location" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="tooltip" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="display" type="s:ST_Xstring" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CellFormula">
    <xsd:simpleContent>
      <xsd:extension base="ST_Formula">
        <xsd:attribute name="t" type="ST_CellFormulaType" use="optional" default="normal"/>
        <xsd:attribute name="aca" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="ref" type="ST_Ref" use="optional"/>
        <xsd:attribute name="dt2D" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="dtr" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="del1" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="del2" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="r1" type="ST_CellRef" use="optional"/>
        <xsd:attribute name="r2" type="ST_CellRef" use="optional"/>
        <xsd:attribute name="ca" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="si" type="xsd:unsignedInt" use="optional"/>
        <xsd:attribute name="bx" type="xsd:boolean" use="optional" default="false"/>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorScale">
    <xsd:sequence>
      <xsd:element name="cfvo" type="CT_Cfvo" minOccurs="2" maxOccurs="unbounded"/>
      <xsd:element name="color" type="CT_Color" minOccurs="2" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DataBar">
    <xsd:sequence>
      <xsd:element name="cfvo" type="CT_Cfvo" minOccurs="2" maxOccurs="2"/>
      <xsd:element name="color" type="CT_Color" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="minLength" type="xsd:unsignedInt" use="optional" default="10"/>
    <xsd:attribute name="maxLength" type="xsd:unsignedInt" use="optional" default="90"/>
    <xsd:attribute name="showValue" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_IconSet">
    <xsd:sequence>
      <xsd:element name="cfvo" type="CT_Cfvo" minOccurs="2" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="iconSet" type="ST_IconSetType" use="optional" default="3TrafficLights1"/>
    <xsd:attribute name="showValue" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="percent" type="xsd:boolean" default="true"/>
    <xsd:attribute name="reverse" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Cfvo">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_CfvoType" use="required"/>
    <xsd:attribute name="val" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="gte" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageMargins">
    <xsd:attribute name="left" type="xsd:double" use="required"/>
    <xsd:attribute name="right" type="xsd:double" use="required"/>
    <xsd:attribute name="top" type="xsd:double" use="required"/>
    <xsd:attribute name="bottom" type="xsd:double" use="required"/>
    <xsd:attribute name="header" type="xsd:double" use="required"/>
    <xsd:attribute name="footer" type="xsd:double" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PrintOptions">
    <xsd:attribute name="horizontalCentered" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="verticalCentered" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="headings" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="gridLines" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="gridLinesSet" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageSetup">
    <xsd:attribute name="paperSize" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="paperHeight" type="s:ST_PositiveUniversalMeasure" use="optional"/>
    <xsd:attribute name="paperWidth" type="s:ST_PositiveUniversalMeasure" use="optional"/>
    <xsd:attribute name="scale" type="xsd:unsignedInt" use="optional" default="100"/>
    <xsd:attribute name="firstPageNumber" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="fitToWidth" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="fitToHeight" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="pageOrder" type="ST_PageOrder" use="optional" default="downThenOver"/>
    <xsd:attribute name="orientation" type="ST_Orientation" use="optional" default="default"/>
    <xsd:attribute name="usePrinterDefaults" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="blackAndWhite" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="draft" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="cellComments" type="ST_CellComments" use="optional" default="none"/>
    <xsd:attribute name="useFirstPageNumber" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="errors" type="ST_PrintError" use="optional" default="displayed"/>
    <xsd:attribute name="horizontalDpi" type="xsd:unsignedInt" use="optional" default="600"/>
    <xsd:attribute name="verticalDpi" type="xsd:unsignedInt" use="optional" default="600"/>
    <xsd:attribute name="copies" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PageOrder">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="downThenOver"/>
      <xsd:enumeration value="overThenDown"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Orientation">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="default"/>
      <xsd:enumeration value="portrait"/>
      <xsd:enumeration value="landscape"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CellComments">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="asDisplayed"/>
      <xsd:enumeration value="atEnd"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_HeaderFooter">
    <xsd:sequence>
      <xsd:element name="oddHeader" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="oddFooter" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="evenHeader" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="evenFooter" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="firstHeader" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="firstFooter" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="differentOddEven" type="xsd:boolean" default="false"/>
    <xsd:attribute name="differentFirst" type="xsd:boolean" default="false"/>
    <xsd:attribute name="scaleWithDoc" type="xsd:boolean" default="true"/>
    <xsd:attribute name="alignWithMargins" type="xsd:boolean" default="true"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PrintError">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="displayed"/>
      <xsd:enumeration value="blank"/>
      <xsd:enumeration value="dash"/>
      <xsd:enumeration value="NA"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Scenarios">
    <xsd:sequence>
      <xsd:element name="scenario" type="CT_Scenario" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="current" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="show" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="sqref" type="ST_Sqref" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetProtection">
    <xsd:attribute name="password" type="ST_UnsignedShortHex" use="optional"/>
    <xsd:attribute name="algorithmName" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="hashValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="saltValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="spinCount" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="sheet" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="objects" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="scenarios" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="formatCells" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="formatColumns" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="formatRows" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="insertColumns" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="insertRows" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="insertHyperlinks" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="deleteColumns" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="deleteRows" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="selectLockedCells" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="sort" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="autoFilter" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="pivotTables" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="selectUnlockedCells" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ProtectedRanges">
    <xsd:sequence>
      <xsd:element name="protectedRange" type="CT_ProtectedRange" minOccurs="1"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ProtectedRange">
    <xsd:sequence>
      <xsd:element name="securityDescriptor" type="xsd:string" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="password" type="ST_UnsignedShortHex" use="optional"/>
    <xsd:attribute name="sqref" type="ST_Sqref" use="required"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="securityDescriptor" type="xsd:string" use="optional"/>
    <xsd:attribute name="algorithmName" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="hashValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="saltValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="spinCount" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Scenario">
    <xsd:sequence>
      <xsd:element name="inputCells" type="CT_InputCells" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="locked" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="hidden" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="user" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="comment" type="s:ST_Xstring" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_InputCells">
    <xsd:attribute name="r" type="ST_CellRef" use="required"/>
    <xsd:attribute name="deleted" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="undone" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="val" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="numFmtId" type="ST_NumFmtId" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CellWatches">
    <xsd:sequence>
      <xsd:element name="cellWatch" type="CT_CellWatch" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CellWatch">
    <xsd:attribute name="r" type="ST_CellRef" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Chartsheet">
    <xsd:sequence>
      <xsd:element name="sheetPr" type="CT_ChartsheetPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sheetViews" type="CT_ChartsheetViews" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="sheetProtection" type="CT_ChartsheetProtection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="customSheetViews" type="CT_CustomChartsheetViews" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="pageMargins" minOccurs="0" type="CT_PageMargins"/>
      <xsd:element name="pageSetup" type="CT_CsPageSetup" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="headerFooter" minOccurs="0" type="CT_HeaderFooter"/>
      <xsd:element name="drawing" type="CT_Drawing" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="legacyDrawing" type="CT_LegacyDrawing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="legacyDrawingHF" type="CT_LegacyDrawing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="drawingHF" type="CT_DrawingHF" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="picture" type="CT_SheetBackgroundPicture" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="webPublishItems" type="CT_WebPublishItems" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ChartsheetPr">
    <xsd:sequence>
      <xsd:element name="tabColor" type="CT_Color" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="published" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="codeName" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ChartsheetViews">
    <xsd:sequence>
      <xsd:element name="sheetView" type="CT_ChartsheetView" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ChartsheetView">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="tabSelected" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="zoomScale" type="xsd:unsignedInt" default="100" use="optional"/>
    <xsd:attribute name="workbookViewId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="zoomToFit" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ChartsheetProtection">
    <xsd:attribute name="password" type="ST_UnsignedShortHex" use="optional"/>
    <xsd:attribute name="algorithmName" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="hashValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="saltValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="spinCount" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="content" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="objects" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CsPageSetup">
    <xsd:attribute name="paperSize" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="paperHeight" type="s:ST_PositiveUniversalMeasure" use="optional"/>
    <xsd:attribute name="paperWidth" type="s:ST_PositiveUniversalMeasure" use="optional"/>
    <xsd:attribute name="firstPageNumber" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="orientation" type="ST_Orientation" use="optional" default="default"/>
    <xsd:attribute name="usePrinterDefaults" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="blackAndWhite" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="draft" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="useFirstPageNumber" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="horizontalDpi" type="xsd:unsignedInt" use="optional" default="600"/>
    <xsd:attribute name="verticalDpi" type="xsd:unsignedInt" use="optional" default="600"/>
    <xsd:attribute name="copies" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomChartsheetViews">
    <xsd:sequence>
      <xsd:element name="customSheetView" minOccurs="0" maxOccurs="unbounded"
        type="CT_CustomChartsheetView"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomChartsheetView">
    <xsd:sequence>
      <xsd:element name="pageMargins" type="CT_PageMargins" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pageSetup" type="CT_CsPageSetup" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="headerFooter" type="CT_HeaderFooter" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="guid" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="scale" type="xsd:unsignedInt" default="100"/>
    <xsd:attribute name="state" type="ST_SheetState" default="visible"/>
    <xsd:attribute name="zoomToFit" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomProperties">
    <xsd:sequence>
      <xsd:element name="customPr" type="CT_CustomProperty" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomProperty">
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OleObjects">
    <xsd:sequence>
      <xsd:element name="oleObject" type="CT_OleObject" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_OleObject">
    <xsd:sequence>
      <xsd:element name="objectPr" type="CT_ObjectPr" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="progId" type="xsd:string" use="optional"/>
    <xsd:attribute name="dvAspect" type="ST_DvAspect" use="optional" default="DVASPECT_CONTENT"/>
    <xsd:attribute name="link" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="oleUpdate" type="ST_OleUpdate" use="optional"/>
    <xsd:attribute name="autoLoad" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="shapeId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ObjectPr">
    <xsd:sequence>
      <xsd:element name="anchor" type="CT_ObjectAnchor" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="locked" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="defaultSize" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="print" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="disabled" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="uiObject" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="autoFill" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="autoLine" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="autoPict" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="macro" type="ST_Formula" use="optional"/>
    <xsd:attribute name="altText" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="dde" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DvAspect">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="DVASPECT_CONTENT"/>
      <xsd:enumeration value="DVASPECT_ICON"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OleUpdate">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="OLEUPDATE_ALWAYS"/>
      <xsd:enumeration value="OLEUPDATE_ONCALL"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_WebPublishItems">
    <xsd:sequence>
      <xsd:element name="webPublishItem" type="CT_WebPublishItem" minOccurs="1"
        maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WebPublishItem">
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="divId" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="sourceType" type="ST_WebSourceType" use="required"/>
    <xsd:attribute name="sourceRef" type="ST_Ref" use="optional"/>
    <xsd:attribute name="sourceObject" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="destinationFile" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="title" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="autoRepublish" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Controls">
    <xsd:sequence>
      <xsd:element name="control" type="CT_Control" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Control">
    <xsd:sequence>
      <xsd:element name="controlPr" type="CT_ControlPr" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="shapeId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute ref="r:id" use="required"/>
    <xsd:attribute name="name" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ControlPr">
    <xsd:sequence>
      <xsd:element name="anchor" type="CT_ObjectAnchor" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="locked" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="defaultSize" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="print" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="disabled" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="recalcAlways" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="uiObject" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="autoFill" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="autoLine" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="autoPict" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="macro" type="ST_Formula" use="optional"/>
    <xsd:attribute name="altText" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="linkedCell" type="ST_Formula" use="optional"/>
    <xsd:attribute name="listFillRange" type="ST_Formula" use="optional"/>
    <xsd:attribute name="cf" type="s:ST_Xstring" use="optional" default="pict"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_WebSourceType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="sheet"/>
      <xsd:enumeration value="printArea"/>
      <xsd:enumeration value="autoFilter"/>
      <xsd:enumeration value="range"/>
      <xsd:enumeration value="chart"/>
      <xsd:enumeration value="pivotTable"/>
      <xsd:enumeration value="query"/>
      <xsd:enumeration value="label"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_IgnoredErrors">
    <xsd:sequence>
      <xsd:element name="ignoredError" type="CT_IgnoredError" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_IgnoredError">
    <xsd:attribute name="sqref" type="ST_Sqref" use="required"/>
    <xsd:attribute name="evalError" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="twoDigitTextYear" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="numberStoredAsText" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="formula" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="formulaRange" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="unlockedFormula" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="emptyCellReference" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="listDataValidation" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="calculatedColumn" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PaneState">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="split"/>
      <xsd:enumeration value="frozen"/>
      <xsd:enumeration value="frozenSplit"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TableParts">
    <xsd:sequence>
      <xsd:element name="tablePart" type="CT_TablePart" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TablePart">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:element name="metadata" type="CT_Metadata"/>
  <xsd:complexType name="CT_Metadata">
    <xsd:sequence>
      <xsd:element name="metadataTypes" type="CT_MetadataTypes" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="metadataStrings" type="CT_MetadataStrings" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="mdxMetadata" type="CT_MdxMetadata" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="futureMetadata" type="CT_FutureMetadata" minOccurs="0"
        maxOccurs="unbounded"/>
      <xsd:element name="cellMetadata" type="CT_MetadataBlocks" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="valueMetadata" type="CT_MetadataBlocks" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" minOccurs="0" maxOccurs="1" type="CT_ExtensionList"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MetadataTypes">
    <xsd:sequence>
      <xsd:element name="metadataType" type="CT_MetadataType" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MetadataType">
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="minSupportedVersion" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="ghostRow" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="ghostCol" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="edit" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="delete" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="copy" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pasteAll" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pasteFormulas" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pasteValues" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pasteFormats" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pasteComments" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pasteDataValidation" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pasteBorders" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pasteColWidths" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pasteNumberFormats" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="merge" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="splitFirst" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="splitAll" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="rowColShift" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="clearAll" type="xsd:boolean" default="false"/>
    <xsd:attribute name="clearFormats" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="clearContents" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="clearComments" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="assign" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="coerce" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="adjust" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="cellMeta" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MetadataBlocks">
    <xsd:sequence>
      <xsd:element name="bk" type="CT_MetadataBlock" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MetadataBlock">
    <xsd:sequence>
      <xsd:element name="rc" type="CT_MetadataRecord" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MetadataRecord">
    <xsd:attribute name="t" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="v" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FutureMetadata">
    <xsd:sequence>
      <xsd:element name="bk" type="CT_FutureMetadataBlock" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="extLst" minOccurs="0" maxOccurs="1" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FutureMetadataBlock">
    <xsd:sequence>
      <xsd:element name="extLst" minOccurs="0" maxOccurs="1" type="CT_ExtensionList"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MdxMetadata">
    <xsd:sequence>
      <xsd:element name="mdx" type="CT_Mdx" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Mdx">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element name="t" type="CT_MdxTuple"/>
      <xsd:element name="ms" type="CT_MdxSet"/>
      <xsd:element name="p" type="CT_MdxMemeberProp"/>
      <xsd:element name="k" type="CT_MdxKPI"/>
    </xsd:choice>
    <xsd:attribute name="n" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="f" type="ST_MdxFunctionType" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MdxFunctionType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="m"/>
      <xsd:enumeration value="v"/>
      <xsd:enumeration value="s"/>
      <xsd:enumeration value="c"/>
      <xsd:enumeration value="r"/>
      <xsd:enumeration value="p"/>
      <xsd:enumeration value="k"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MdxTuple">
    <xsd:sequence>
      <xsd:element name="n" type="CT_MetadataStringIndex" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="c" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="ct" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="si" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="fi" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="bc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="fc" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="i" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="u" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="st" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="b" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MdxSet">
    <xsd:sequence>
      <xsd:element name="n" type="CT_MetadataStringIndex" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="ns" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="c" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="o" type="ST_MdxSetOrder" use="optional" default="u"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MdxSetOrder">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="u"/>
      <xsd:enumeration value="a"/>
      <xsd:enumeration value="d"/>
      <xsd:enumeration value="aa"/>
      <xsd:enumeration value="ad"/>
      <xsd:enumeration value="na"/>
      <xsd:enumeration value="nd"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MdxMemeberProp">
    <xsd:attribute name="n" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="np" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MdxKPI">
    <xsd:attribute name="n" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="np" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="p" type="ST_MdxKPIProperty" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MdxKPIProperty">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="v"/>
      <xsd:enumeration value="g"/>
      <xsd:enumeration value="s"/>
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="w"/>
      <xsd:enumeration value="m"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MetadataStringIndex">
    <xsd:attribute name="x" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="s" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_MetadataStrings">
    <xsd:sequence>
      <xsd:element name="s" type="CT_XStringElement" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:element name="singleXmlCells" type="CT_SingleXmlCells"/>
  <xsd:complexType name="CT_SingleXmlCells">
    <xsd:sequence>
      <xsd:element name="singleXmlCell" type="CT_SingleXmlCell" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SingleXmlCell">
    <xsd:sequence>
      <xsd:element name="xmlCellPr" type="CT_XmlCellPr" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="r" type="ST_CellRef" use="required"/>
    <xsd:attribute name="connectionId" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_XmlCellPr">
    <xsd:sequence>
      <xsd:element name="xmlPr" type="CT_XmlPr" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="uniqueName" type="s:ST_Xstring" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_XmlPr">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="mapId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="xpath" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="xmlDataType" type="ST_XmlDataType" use="required"/>
  </xsd:complexType>
  <xsd:element name="styleSheet" type="CT_Stylesheet"/>
  <xsd:complexType name="CT_Stylesheet">
    <xsd:sequence>
      <xsd:element name="numFmts" type="CT_NumFmts" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="fonts" type="CT_Fonts" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="fills" type="CT_Fills" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="borders" type="CT_Borders" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cellStyleXfs" type="CT_CellStyleXfs" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cellXfs" type="CT_CellXfs" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cellStyles" type="CT_CellStyles" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="dxfs" type="CT_Dxfs" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tableStyles" type="CT_TableStyles" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="colors" type="CT_Colors" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CellAlignment">
    <xsd:attribute name="horizontal" type="ST_HorizontalAlignment" use="optional"/>
    <xsd:attribute name="vertical" type="ST_VerticalAlignment" default="bottom" use="optional"/>
    <xsd:attribute name="textRotation" type="ST_TextRotation" use="optional"/>
    <xsd:attribute name="wrapText" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="indent" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="relativeIndent" type="xsd:int" use="optional"/>
    <xsd:attribute name="justifyLastLine" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="shrinkToFit" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="readingOrder" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextRotation">
    <xsd:union>
      <xsd:simpleType>
        <xsd:restriction base="xsd:nonNegativeInteger">
          <xsd:maxInclusive value="180"/>
        </xsd:restriction>
      </xsd:simpleType>
      <xsd:simpleType>
        <xsd:restriction base="xsd:nonNegativeInteger">
          <xsd:enumeration value="255"/>
        </xsd:restriction>
      </xsd:simpleType>
    </xsd:union>
  </xsd:simpleType>
  <xsd:simpleType name="ST_BorderStyle">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="thin"/>
      <xsd:enumeration value="medium"/>
      <xsd:enumeration value="dashed"/>
      <xsd:enumeration value="dotted"/>
      <xsd:enumeration value="thick"/>
      <xsd:enumeration value="double"/>
      <xsd:enumeration value="hair"/>
      <xsd:enumeration value="mediumDashed"/>
      <xsd:enumeration value="dashDot"/>
      <xsd:enumeration value="mediumDashDot"/>
      <xsd:enumeration value="dashDotDot"/>
      <xsd:enumeration value="mediumDashDotDot"/>
      <xsd:enumeration value="slantDashDot"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Borders">
    <xsd:sequence>
      <xsd:element name="border" type="CT_Border" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Border">
    <xsd:sequence>
      <xsd:element name="start" type="CT_BorderPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="end" type="CT_BorderPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="left" type="CT_BorderPr" minOccurs="0"/>
      <xsd:element name="right" type="CT_BorderPr" minOccurs="0"/>
      <xsd:element name="top" type="CT_BorderPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bottom" type="CT_BorderPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="diagonal" type="CT_BorderPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="vertical" type="CT_BorderPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="horizontal" type="CT_BorderPr" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="diagonalUp" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="diagonalDown" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="outline" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_BorderPr">
    <xsd:sequence>
      <xsd:element name="color" type="CT_Color" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="style" type="ST_BorderStyle" use="optional" default="none"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CellProtection">
    <xsd:attribute name="locked" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="hidden" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Fonts">
    <xsd:sequence>
      <xsd:element name="font" type="CT_Font" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Fills">
    <xsd:sequence>
      <xsd:element name="fill" type="CT_Fill" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Fill">
    <xsd:choice minOccurs="1" maxOccurs="1">
      <xsd:element name="patternFill" type="CT_PatternFill" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="gradientFill" type="CT_GradientFill" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_PatternFill">
    <xsd:sequence>
      <xsd:element name="fgColor" type="CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bgColor" type="CT_Color" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="patternType" type="ST_PatternType" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Color">
    <xsd:attribute name="auto" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="indexed" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="rgb" type="ST_UnsignedIntHex" use="optional"/>
    <xsd:attribute name="theme" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="tint" type="xsd:double" use="optional" default="0.0"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PatternType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="solid"/>
      <xsd:enumeration value="mediumGray"/>
      <xsd:enumeration value="darkGray"/>
      <xsd:enumeration value="lightGray"/>
      <xsd:enumeration value="darkHorizontal"/>
      <xsd:enumeration value="darkVertical"/>
      <xsd:enumeration value="darkDown"/>
      <xsd:enumeration value="darkUp"/>
      <xsd:enumeration value="darkGrid"/>
      <xsd:enumeration value="darkTrellis"/>
      <xsd:enumeration value="lightHorizontal"/>
      <xsd:enumeration value="lightVertical"/>
      <xsd:enumeration value="lightDown"/>
      <xsd:enumeration value="lightUp"/>
      <xsd:enumeration value="lightGrid"/>
      <xsd:enumeration value="lightTrellis"/>
      <xsd:enumeration value="gray125"/>
      <xsd:enumeration value="gray0625"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_GradientFill">
    <xsd:sequence>
      <xsd:element name="stop" type="CT_GradientStop" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_GradientType" use="optional" default="linear"/>
    <xsd:attribute name="degree" type="xsd:double" use="optional" default="0"/>
    <xsd:attribute name="left" type="xsd:double" use="optional" default="0"/>
    <xsd:attribute name="right" type="xsd:double" use="optional" default="0"/>
    <xsd:attribute name="top" type="xsd:double" use="optional" default="0"/>
    <xsd:attribute name="bottom" type="xsd:double" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_GradientStop">
    <xsd:sequence>
      <xsd:element name="color" type="CT_Color" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="position" type="xsd:double" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_GradientType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="linear"/>
      <xsd:enumeration value="path"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HorizontalAlignment">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="general"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="fill"/>
      <xsd:enumeration value="justify"/>
      <xsd:enumeration value="centerContinuous"/>
      <xsd:enumeration value="distributed"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_VerticalAlignment">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="top"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="bottom"/>
      <xsd:enumeration value="justify"/>
      <xsd:enumeration value="distributed"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_NumFmts">
    <xsd:sequence>
      <xsd:element name="numFmt" type="CT_NumFmt" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NumFmt">
    <xsd:attribute name="numFmtId" type="ST_NumFmtId" use="required"/>
    <xsd:attribute name="formatCode" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CellStyleXfs">
    <xsd:sequence>
      <xsd:element name="xf" type="CT_Xf" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CellXfs">
    <xsd:sequence>
      <xsd:element name="xf" type="CT_Xf" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Xf">
    <xsd:sequence>
      <xsd:element name="alignment" type="CT_CellAlignment" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="protection" type="CT_CellProtection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="numFmtId" type="ST_NumFmtId" use="optional"/>
    <xsd:attribute name="fontId" type="ST_FontId" use="optional"/>
    <xsd:attribute name="fillId" type="ST_FillId" use="optional"/>
    <xsd:attribute name="borderId" type="ST_BorderId" use="optional"/>
    <xsd:attribute name="xfId" type="ST_CellStyleXfId" use="optional"/>
    <xsd:attribute name="quotePrefix" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="pivotButton" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="applyNumberFormat" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="applyFont" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="applyFill" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="applyBorder" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="applyAlignment" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="applyProtection" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CellStyles">
    <xsd:sequence>
      <xsd:element name="cellStyle" type="CT_CellStyle" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CellStyle">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="xfId" type="ST_CellStyleXfId" use="required"/>
    <xsd:attribute name="builtinId" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="iLevel" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="hidden" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="customBuiltin" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Dxfs">
    <xsd:sequence>
      <xsd:element name="dxf" type="CT_Dxf" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Dxf">
    <xsd:sequence>
      <xsd:element name="font" type="CT_Font" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="numFmt" type="CT_NumFmt" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="fill" type="CT_Fill" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="alignment" type="CT_CellAlignment" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="border" type="CT_Border" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="protection" type="CT_CellProtection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_NumFmtId">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FontId">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FillId">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_BorderId">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CellStyleXfId">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DxfId">
    <xsd:restriction base="xsd:unsignedInt"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_Colors">
    <xsd:sequence>
      <xsd:element name="indexedColors" type="CT_IndexedColors" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="mruColors" type="CT_MRUColors" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_IndexedColors">
    <xsd:sequence>
      <xsd:element name="rgbColor" type="CT_RgbColor" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MRUColors">
    <xsd:sequence>
      <xsd:element name="color" type="CT_Color" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_RgbColor">
    <xsd:attribute name="rgb" type="ST_UnsignedIntHex" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableStyles">
    <xsd:sequence>
      <xsd:element name="tableStyle" type="CT_TableStyle" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="defaultTableStyle" type="xsd:string" use="optional"/>
    <xsd:attribute name="defaultPivotStyle" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableStyle">
    <xsd:sequence>
      <xsd:element name="tableStyleElement" type="CT_TableStyleElement" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="xsd:string" use="required"/>
    <xsd:attribute name="pivot" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="table" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableStyleElement">
    <xsd:attribute name="type" type="ST_TableStyleType" use="required"/>
    <xsd:attribute name="size" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="dxfId" type="ST_DxfId" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TableStyleType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="wholeTable"/>
      <xsd:enumeration value="headerRow"/>
      <xsd:enumeration value="totalRow"/>
      <xsd:enumeration value="firstColumn"/>
      <xsd:enumeration value="lastColumn"/>
      <xsd:enumeration value="firstRowStripe"/>
      <xsd:enumeration value="secondRowStripe"/>
      <xsd:enumeration value="firstColumnStripe"/>
      <xsd:enumeration value="secondColumnStripe"/>
      <xsd:enumeration value="firstHeaderCell"/>
      <xsd:enumeration value="lastHeaderCell"/>
      <xsd:enumeration value="firstTotalCell"/>
      <xsd:enumeration value="lastTotalCell"/>
      <xsd:enumeration value="firstSubtotalColumn"/>
      <xsd:enumeration value="secondSubtotalColumn"/>
      <xsd:enumeration value="thirdSubtotalColumn"/>
      <xsd:enumeration value="firstSubtotalRow"/>
      <xsd:enumeration value="secondSubtotalRow"/>
      <xsd:enumeration value="thirdSubtotalRow"/>
      <xsd:enumeration value="blankRow"/>
      <xsd:enumeration value="firstColumnSubheading"/>
      <xsd:enumeration value="secondColumnSubheading"/>
      <xsd:enumeration value="thirdColumnSubheading"/>
      <xsd:enumeration value="firstRowSubheading"/>
      <xsd:enumeration value="secondRowSubheading"/>
      <xsd:enumeration value="thirdRowSubheading"/>
      <xsd:enumeration value="pageFieldLabels"/>
      <xsd:enumeration value="pageFieldValues"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_BooleanProperty">
    <xsd:attribute name="val" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FontSize">
    <xsd:attribute name="val" type="xsd:double" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_IntProperty">
    <xsd:attribute name="val" type="xsd:int" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FontName">
    <xsd:attribute name="val" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_VerticalAlignFontProperty">
    <xsd:attribute name="val" type="s:ST_VerticalAlignRun" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FontScheme">
    <xsd:attribute name="val" type="ST_FontScheme" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FontScheme">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="major"/>
      <xsd:enumeration value="minor"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_UnderlineProperty">
    <xsd:attribute name="val" type="ST_UnderlineValues" use="optional" default="single"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_UnderlineValues">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="single"/>
      <xsd:enumeration value="double"/>
      <xsd:enumeration value="singleAccounting"/>
      <xsd:enumeration value="doubleAccounting"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Font">
    <xsd:choice maxOccurs="unbounded">
      <xsd:element name="name" type="CT_FontName" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="charset" type="CT_IntProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="family" type="CT_FontFamily" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="b" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="i" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="strike" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="outline" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shadow" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="condense" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extend" type="CT_BooleanProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="color" type="CT_Color" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sz" type="CT_FontSize" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="u" type="CT_UnderlineProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="vertAlign" type="CT_VerticalAlignFontProperty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="scheme" type="CT_FontScheme" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_FontFamily">
    <xsd:attribute name="val" type="ST_FontFamily" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FontFamily">
    <xsd:restriction base="xsd:integer">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="14"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:attributeGroup name="AG_AutoFormat">
    <xsd:attribute name="autoFormatId" type="xsd:unsignedInt"/>
    <xsd:attribute name="applyNumberFormats" type="xsd:boolean"/>
    <xsd:attribute name="applyBorderFormats" type="xsd:boolean"/>
    <xsd:attribute name="applyFontFormats" type="xsd:boolean"/>
    <xsd:attribute name="applyPatternFormats" type="xsd:boolean"/>
    <xsd:attribute name="applyAlignmentFormats" type="xsd:boolean"/>
    <xsd:attribute name="applyWidthHeightFormats" type="xsd:boolean"/>
  </xsd:attributeGroup>
  <xsd:element name="externalLink" type="CT_ExternalLink"/>
  <xsd:complexType name="CT_ExternalLink">
    <xsd:sequence>
      <xsd:choice>
        <xsd:element name="externalBook" type="CT_ExternalBook" minOccurs="0" maxOccurs="1"/>
        <xsd:element name="ddeLink" type="CT_DdeLink" minOccurs="0" maxOccurs="1"/>
        <xsd:element name="oleLink" type="CT_OleLink" minOccurs="0" maxOccurs="1"/>
      </xsd:choice>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalBook">
    <xsd:sequence>
      <xsd:element name="sheetNames" type="CT_ExternalSheetNames" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="definedNames" type="CT_ExternalDefinedNames" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sheetDataSet" type="CT_ExternalSheetDataSet" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalSheetNames">
    <xsd:sequence>
      <xsd:element name="sheetName" minOccurs="1" maxOccurs="unbounded" type="CT_ExternalSheetName"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalSheetName">
    <xsd:attribute name="val" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalDefinedNames">
    <xsd:sequence>
      <xsd:element name="definedName" type="CT_ExternalDefinedName" minOccurs="0"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalDefinedName">
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="refersTo" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalSheetDataSet">
    <xsd:sequence>
      <xsd:element name="sheetData" type="CT_ExternalSheetData" minOccurs="1" maxOccurs="unbounded"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalSheetData">
    <xsd:sequence>
      <xsd:element name="row" type="CT_ExternalRow" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="refreshError" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalRow">
    <xsd:sequence>
      <xsd:element name="cell" type="CT_ExternalCell" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="r" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalCell">
    <xsd:sequence>
      <xsd:element name="v" type="s:ST_Xstring" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="r" type="ST_CellRef" use="optional"/>
    <xsd:attribute name="t" type="ST_CellType" use="optional" default="n"/>
    <xsd:attribute name="vm" type="xsd:unsignedInt" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DdeLink">
    <xsd:sequence>
      <xsd:element name="ddeItems" type="CT_DdeItems" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="ddeService" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="ddeTopic" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DdeItems">
    <xsd:sequence>
      <xsd:element name="ddeItem" type="CT_DdeItem" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DdeItem">
    <xsd:sequence>
      <xsd:element name="values" type="CT_DdeValues" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="s:ST_Xstring" default="0"/>
    <xsd:attribute name="ole" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="advise" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="preferPic" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DdeValues">
    <xsd:sequence>
      <xsd:element name="value" minOccurs="1" maxOccurs="unbounded" type="CT_DdeValue"/>
    </xsd:sequence>
    <xsd:attribute name="rows" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="cols" type="xsd:unsignedInt" use="optional" default="1"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DdeValue">
    <xsd:sequence>
      <xsd:element name="val" type="s:ST_Xstring" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="t" type="ST_DdeValueType" use="optional" default="n"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DdeValueType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="nil"/>
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="n"/>
      <xsd:enumeration value="e"/>
      <xsd:enumeration value="str"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_OleLink">
    <xsd:sequence>
      <xsd:element name="oleItems" type="CT_OleItems" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:id" use="required"/>
    <xsd:attribute name="progId" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OleItems">
    <xsd:sequence>
      <xsd:element name="oleItem" type="CT_OleItem" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_OleItem">
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="icon" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="advise" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="preferPic" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:element name="table" type="CT_Table"/>
  <xsd:complexType name="CT_Table">
    <xsd:sequence>
      <xsd:element name="autoFilter" type="CT_AutoFilter" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sortState" type="CT_SortState" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tableColumns" type="CT_TableColumns" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="tableStyleInfo" type="CT_TableStyleInfo" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="displayName" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="comment" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
    <xsd:attribute name="tableType" type="ST_TableType" use="optional" default="worksheet"/>
    <xsd:attribute name="headerRowCount" type="xsd:unsignedInt" use="optional" default="1"/>
    <xsd:attribute name="insertRow" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="insertRowShift" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="totalsRowCount" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="totalsRowShown" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="published" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="headerRowDxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="dataDxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="totalsRowDxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="headerRowBorderDxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="tableBorderDxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="totalsRowBorderDxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="headerRowCellStyle" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="dataCellStyle" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="totalsRowCellStyle" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="connectionId" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TableType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="worksheet"/>
      <xsd:enumeration value="xml"/>
      <xsd:enumeration value="queryTable"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TableStyleInfo">
    <xsd:attribute name="name" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="showFirstColumn" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="showLastColumn" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="showRowStripes" type="xsd:boolean" use="optional"/>
    <xsd:attribute name="showColumnStripes" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableColumns">
    <xsd:sequence>
      <xsd:element name="tableColumn" type="CT_TableColumn" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableColumn">
    <xsd:sequence>
      <xsd:element name="calculatedColumnFormula" type="CT_TableFormula" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="totalsRowFormula" type="CT_TableFormula" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="xmlColumnPr" type="CT_XmlColumnPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="uniqueName" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="totalsRowFunction" type="ST_TotalsRowFunction" use="optional"
      default="none"/>
    <xsd:attribute name="totalsRowLabel" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="queryTableFieldId" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="headerRowDxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="dataDxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="totalsRowDxfId" type="ST_DxfId" use="optional"/>
    <xsd:attribute name="headerRowCellStyle" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="dataCellStyle" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="totalsRowCellStyle" type="s:ST_Xstring" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TableFormula">
    <xsd:simpleContent>
      <xsd:extension base="ST_Formula">
        <xsd:attribute name="array" type="xsd:boolean" default="false"/>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>
  <xsd:simpleType name="ST_TotalsRowFunction">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="sum"/>
      <xsd:enumeration value="min"/>
      <xsd:enumeration value="max"/>
      <xsd:enumeration value="average"/>
      <xsd:enumeration value="count"/>
      <xsd:enumeration value="countNums"/>
      <xsd:enumeration value="stdDev"/>
      <xsd:enumeration value="var"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_XmlColumnPr">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="mapId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="xpath" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="denormalized" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="xmlDataType" type="ST_XmlDataType" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_XmlDataType">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:element name="volTypes" type="CT_VolTypes"/>
  <xsd:complexType name="CT_VolTypes">
    <xsd:sequence>
      <xsd:element name="volType" type="CT_VolType" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_VolType">
    <xsd:sequence>
      <xsd:element name="main" type="CT_VolMain" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_VolDepType" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_VolMain">
    <xsd:sequence>
      <xsd:element name="tp" type="CT_VolTopic" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="first" type="s:ST_Xstring" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_VolTopic">
    <xsd:sequence>
      <xsd:element name="v" type="s:ST_Xstring" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="stp" type="s:ST_Xstring" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="tr" type="CT_VolTopicRef" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="t" type="ST_VolValueType" use="optional" default="n"/>
  </xsd:complexType>
  <xsd:complexType name="CT_VolTopicRef">
    <xsd:attribute name="r" type="ST_CellRef" use="required"/>
    <xsd:attribute name="s" type="xsd:unsignedInt" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_VolDepType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="realTimeData"/>
      <xsd:enumeration value="olapFunctions"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_VolValueType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="b"/>
      <xsd:enumeration value="n"/>
      <xsd:enumeration value="e"/>
      <xsd:enumeration value="s"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:element name="workbook" type="CT_Workbook"/>
  <xsd:complexType name="CT_Workbook">
    <xsd:sequence>
      <xsd:element name="fileVersion" type="CT_FileVersion" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="fileSharing" type="CT_FileSharing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="workbookPr" type="CT_WorkbookPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="workbookProtection" type="CT_WorkbookProtection" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="bookViews" type="CT_BookViews" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sheets" type="CT_Sheets" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="functionGroups" type="CT_FunctionGroups" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="externalReferences" type="CT_ExternalReferences" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="definedNames" type="CT_DefinedNames" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="calcPr" type="CT_CalcPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="oleSize" type="CT_OleSize" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="customWorkbookViews" type="CT_CustomWorkbookViews" minOccurs="0"
        maxOccurs="1"/>
      <xsd:element name="pivotCaches" type="CT_PivotCaches" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="smartTagPr" type="CT_SmartTagPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="smartTagTypes" type="CT_SmartTagTypes" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="webPublishing" type="CT_WebPublishing" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="fileRecoveryPr" type="CT_FileRecoveryPr" minOccurs="0"
        maxOccurs="unbounded"/>
      <xsd:element name="webPublishObjects" type="CT_WebPublishObjects" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="conformance" type="s:ST_ConformanceClass"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FileVersion">
    <xsd:attribute name="appName" type="xsd:string" use="optional"/>
    <xsd:attribute name="lastEdited" type="xsd:string" use="optional"/>
    <xsd:attribute name="lowestEdited" type="xsd:string" use="optional"/>
    <xsd:attribute name="rupBuild" type="xsd:string" use="optional"/>
    <xsd:attribute name="codeName" type="s:ST_Guid" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_BookViews">
    <xsd:sequence>
      <xsd:element name="workbookView" type="CT_BookView" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_BookView">
    <xsd:sequence>
      <xsd:element name="extLst" type="CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="visibility" type="ST_Visibility" use="optional" default="visible"/>
    <xsd:attribute name="minimized" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showHorizontalScroll" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showVerticalScroll" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showSheetTabs" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="xWindow" type="xsd:int" use="optional"/>
    <xsd:attribute name="yWindow" type="xsd:int" use="optional"/>
    <xsd:attribute name="windowWidth" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="windowHeight" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="tabRatio" type="xsd:unsignedInt" use="optional" default="600"/>
    <xsd:attribute name="firstSheet" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="activeTab" type="xsd:unsignedInt" use="optional" default="0"/>
    <xsd:attribute name="autoFilterDateGrouping" type="xsd:boolean" use="optional" default="true"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Visibility">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="visible"/>
      <xsd:enumeration value="hidden"/>
      <xsd:enumeration value="veryHidden"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_CustomWorkbookViews">
    <xsd:sequence>
      <xsd:element name="customWorkbookView" minOccurs="1" maxOccurs="unbounded"
        type="CT_CustomWorkbookView"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomWorkbookView">
    <xsd:sequence>
      <xsd:element name="extLst" minOccurs="0" type="CT_ExtensionList"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="guid" type="s:ST_Guid" use="required"/>
    <xsd:attribute name="autoUpdate" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="mergeInterval" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="changesSavedWin" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="onlySync" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="personalView" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="includePrintSettings" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="includeHiddenRowCol" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="maximized" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="minimized" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showHorizontalScroll" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showVerticalScroll" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showSheetTabs" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="xWindow" type="xsd:int" use="optional" default="0"/>
    <xsd:attribute name="yWindow" type="xsd:int" use="optional" default="0"/>
    <xsd:attribute name="windowWidth" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="windowHeight" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="tabRatio" type="xsd:unsignedInt" use="optional" default="600"/>
    <xsd:attribute name="activeSheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="showFormulaBar" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showStatusbar" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="showComments" type="ST_Comments" use="optional" default="commIndicator"/>
    <xsd:attribute name="showObjects" type="ST_Objects" use="optional" default="all"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Comments">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="commNone"/>
      <xsd:enumeration value="commIndicator"/>
      <xsd:enumeration value="commIndAndComment"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Objects">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="all"/>
      <xsd:enumeration value="placeholders"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Sheets">
    <xsd:sequence>
      <xsd:element name="sheet" type="CT_Sheet" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Sheet">
    <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="sheetId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="state" type="ST_SheetState" use="optional" default="visible"/>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SheetState">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="visible"/>
      <xsd:enumeration value="hidden"/>
      <xsd:enumeration value="veryHidden"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_WorkbookPr">
    <xsd:attribute name="date1904" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showObjects" type="ST_Objects" use="optional" default="all"/>
    <xsd:attribute name="showBorderUnselectedTables" type="xsd:boolean" use="optional"
      default="true"/>
    <xsd:attribute name="filterPrivacy" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="promptedSolutions" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showInkAnnotation" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="backupFile" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="saveExternalLinkValues" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="updateLinks" type="ST_UpdateLinks" use="optional" default="userSet"/>
    <xsd:attribute name="codeName" type="xsd:string" use="optional"/>
    <xsd:attribute name="hidePivotFieldList" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="showPivotChartFilter" type="xsd:boolean" default="false"/>
    <xsd:attribute name="allowRefreshQuery" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="publishItems" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="checkCompatibility" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="autoCompressPictures" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="refreshAllConnections" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="defaultThemeVersion" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_UpdateLinks">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="userSet"/>
      <xsd:enumeration value="never"/>
      <xsd:enumeration value="always"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SmartTagPr">
    <xsd:attribute name="embed" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="show" type="ST_SmartTagShow" use="optional" default="all"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SmartTagShow">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="all"/>
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="noIndicator"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SmartTagTypes">
    <xsd:sequence>
      <xsd:element name="smartTagType" type="CT_SmartTagType" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SmartTagType">
    <xsd:attribute name="namespaceUri" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="name" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="url" type="s:ST_Xstring" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FileRecoveryPr">
    <xsd:attribute name="autoRecover" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="crashSave" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="dataExtractLoad" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="repairLoad" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CalcPr">
    <xsd:attribute name="calcId" type="xsd:unsignedInt"/>
    <xsd:attribute name="calcMode" type="ST_CalcMode" use="optional" default="auto"/>
    <xsd:attribute name="fullCalcOnLoad" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="refMode" type="ST_RefMode" use="optional" default="A1"/>
    <xsd:attribute name="iterate" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="iterateCount" type="xsd:unsignedInt" use="optional" default="100"/>
    <xsd:attribute name="iterateDelta" type="xsd:double" use="optional" default="0.001"/>
    <xsd:attribute name="fullPrecision" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="calcCompleted" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="calcOnSave" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="concurrentCalc" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="concurrentManualCount" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="forceFullCalc" type="xsd:boolean" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_CalcMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="manual"/>
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="autoNoTable"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_RefMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="A1"/>
      <xsd:enumeration value="R1C1"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DefinedNames">
    <xsd:sequence>
      <xsd:element name="definedName" type="CT_DefinedName" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DefinedName">
    <xsd:simpleContent>
      <xsd:extension base="ST_Formula">
        <xsd:attribute name="name" type="s:ST_Xstring" use="required"/>
        <xsd:attribute name="comment" type="s:ST_Xstring" use="optional"/>
        <xsd:attribute name="customMenu" type="s:ST_Xstring" use="optional"/>
        <xsd:attribute name="description" type="s:ST_Xstring" use="optional"/>
        <xsd:attribute name="help" type="s:ST_Xstring" use="optional"/>
        <xsd:attribute name="statusBar" type="s:ST_Xstring" use="optional"/>
        <xsd:attribute name="localSheetId" type="xsd:unsignedInt" use="optional"/>
        <xsd:attribute name="hidden" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="function" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="vbProcedure" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="xlm" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="functionGroupId" type="xsd:unsignedInt" use="optional"/>
        <xsd:attribute name="shortcutKey" type="s:ST_Xstring" use="optional"/>
        <xsd:attribute name="publishToServer" type="xsd:boolean" use="optional" default="false"/>
        <xsd:attribute name="workbookParameter" type="xsd:boolean" use="optional" default="false"/>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalReferences">
    <xsd:sequence>
      <xsd:element name="externalReference" type="CT_ExternalReference" minOccurs="1"
        maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ExternalReference">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SheetBackgroundPicture">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotCaches">
    <xsd:sequence>
      <xsd:element name="pivotCache" type="CT_PivotCache" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PivotCache">
    <xsd:attribute name="cacheId" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FileSharing">
    <xsd:attribute name="readOnlyRecommended" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="userName" type="s:ST_Xstring"/>
    <xsd:attribute name="reservationPassword" type="ST_UnsignedShortHex"/>
    <xsd:attribute name="algorithmName" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="hashValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="saltValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="spinCount" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OleSize">
    <xsd:attribute name="ref" type="ST_Ref" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WorkbookProtection">
    <xsd:attribute name="workbookPassword" type="ST_UnsignedShortHex" use="optional"/>
    <xsd:attribute name="workbookPasswordCharacterSet" type="xsd:string" use="optional"/>
    <xsd:attribute name="revisionsPassword" type="ST_UnsignedShortHex" use="optional"/>
    <xsd:attribute name="revisionsPasswordCharacterSet" type="xsd:string" use="optional"/>
    <xsd:attribute name="lockStructure" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="lockWindows" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="lockRevision" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="revisionsAlgorithmName" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="revisionsHashValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="revisionsSaltValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="revisionsSpinCount" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="workbookAlgorithmName" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="workbookHashValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="workbookSaltValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="workbookSpinCount" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WebPublishing">
    <xsd:attribute name="css" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="thicket" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="longFileNames" type="xsd:boolean" use="optional" default="true"/>
    <xsd:attribute name="vml" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="allowPng" type="xsd:boolean" use="optional" default="false"/>
    <xsd:attribute name="targetScreenSize" type="ST_TargetScreenSize" use="optional"
      default="800x600"/>
    <xsd:attribute name="dpi" type="xsd:unsignedInt" use="optional" default="96"/>
    <xsd:attribute name="codePage" type="xsd:unsignedInt" use="optional"/>
    <xsd:attribute name="characterSet" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TargetScreenSize">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="544x376"/>
      <xsd:enumeration value="640x480"/>
      <xsd:enumeration value="720x512"/>
      <xsd:enumeration value="800x600"/>
      <xsd:enumeration value="1024x768"/>
      <xsd:enumeration value="1152x882"/>
      <xsd:enumeration value="1152x900"/>
      <xsd:enumeration value="1280x1024"/>
      <xsd:enumeration value="1600x1200"/>
      <xsd:enumeration value="1800x1440"/>
      <xsd:enumeration value="1920x1200"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FunctionGroups">
    <xsd:sequence maxOccurs="unbounded">
      <xsd:element name="functionGroup" type="CT_FunctionGroup" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="builtInGroupCount" type="xsd:unsignedInt" default="16" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FunctionGroup">
    <xsd:attribute name="name" type="s:ST_Xstring"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WebPublishObjects">
    <xsd:sequence>
      <xsd:element name="webPublishObject" type="CT_WebPublishObject" minOccurs="1"
        maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="count" type="xsd:unsignedInt" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WebPublishObject">
    <xsd:attribute name="id" type="xsd:unsignedInt" use="required"/>
    <xsd:attribute name="divId" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="sourceObject" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="destinationFile" type="s:ST_Xstring" use="required"/>
    <xsd:attribute name="title" type="s:ST_Xstring" use="optional"/>
    <xsd:attribute name="autoRepublish" type="xsd:boolean" use="optional" default="false"/>
  </xsd:complexType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns="urn:schemas-microsoft-com:vml"
  xmlns:pvml="urn:schemas-microsoft-com:office:powerpoint"
  xmlns:o="urn:schemas-microsoft-com:office:office"
  xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
  xmlns:w10="urn:schemas-microsoft-com:office:word"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns:x="urn:schemas-microsoft-com:office:excel"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="urn:schemas-microsoft-com:vml" elementFormDefault="qualified"
  attributeFormDefault="unqualified">
  <xsd:import namespace="urn:schemas-microsoft-com:office:office"
    schemaLocation="vml-officeDrawing.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
    schemaLocation="wml.xsd"/>
  <xsd:import namespace="urn:schemas-microsoft-com:office:word"
    schemaLocation="vml-wordprocessingDrawing.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    schemaLocation="shared-relationshipReference.xsd"/>
  <xsd:import namespace="urn:schemas-microsoft-com:office:excel"
    schemaLocation="vml-spreadsheetDrawing.xsd"/>
  <xsd:import namespace="urn:schemas-microsoft-com:office:powerpoint"
    schemaLocation="vml-presentationDrawing.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:attributeGroup name="AG_Id">
    <xsd:attribute name="id" type="xsd:string" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_Style">
    <xsd:attribute name="style" type="xsd:string" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_Type">
    <xsd:attribute name="type" type="xsd:string" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_Adj">
    <xsd:attribute name="adj" type="xsd:string" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_Path">
    <xsd:attribute name="path" type="xsd:string" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_Fill">
    <xsd:attribute name="filled" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="fillcolor" type="s:ST_ColorType" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_Chromakey">
    <xsd:attribute name="chromakey" type="s:ST_ColorType" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_Ext">
    <xsd:attribute name="ext" form="qualified" type="ST_Ext"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_CoreAttributes">
    <xsd:attributeGroup ref="AG_Id"/>
    <xsd:attributeGroup ref="AG_Style"/>
    <xsd:attribute name="href" type="xsd:string" use="optional"/>
    <xsd:attribute name="target" type="xsd:string" use="optional"/>
    <xsd:attribute name="class" type="xsd:string" use="optional"/>
    <xsd:attribute name="title" type="xsd:string" use="optional"/>
    <xsd:attribute name="alt" type="xsd:string" use="optional"/>
    <xsd:attribute name="coordsize" type="xsd:string" use="optional"/>
    <xsd:attribute name="coordorigin" type="xsd:string" use="optional"/>
    <xsd:attribute name="wrapcoords" type="xsd:string" use="optional"/>
    <xsd:attribute name="print" type="s:ST_TrueFalse" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_ShapeAttributes">
    <xsd:attributeGroup ref="AG_Chromakey"/>
    <xsd:attributeGroup ref="AG_Fill"/>
    <xsd:attribute name="opacity" type="xsd:string" use="optional"/>
    <xsd:attribute name="stroked" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="strokecolor" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="strokeweight" type="xsd:string" use="optional"/>
    <xsd:attribute name="insetpen" type="s:ST_TrueFalse" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_OfficeCoreAttributes">
    <xsd:attribute ref="o:spid"/>
    <xsd:attribute ref="o:oned"/>
    <xsd:attribute ref="o:regroupid"/>
    <xsd:attribute ref="o:doubleclicknotify"/>
    <xsd:attribute ref="o:button"/>
    <xsd:attribute ref="o:userhidden"/>
    <xsd:attribute ref="o:bullet"/>
    <xsd:attribute ref="o:hr"/>
    <xsd:attribute ref="o:hrstd"/>
    <xsd:attribute ref="o:hrnoshade"/>
    <xsd:attribute ref="o:hrpct"/>
    <xsd:attribute ref="o:hralign"/>
    <xsd:attribute ref="o:allowincell"/>
    <xsd:attribute ref="o:allowoverlap"/>
    <xsd:attribute ref="o:userdrawn"/>
    <xsd:attribute ref="o:bordertopcolor"/>
    <xsd:attribute ref="o:borderleftcolor"/>
    <xsd:attribute ref="o:borderbottomcolor"/>
    <xsd:attribute ref="o:borderrightcolor"/>
    <xsd:attribute ref="o:dgmlayout"/>
    <xsd:attribute ref="o:dgmnodekind"/>
    <xsd:attribute ref="o:dgmlayoutmru"/>
    <xsd:attribute ref="o:insetmode"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_OfficeShapeAttributes">
    <xsd:attribute ref="o:spt"/>
    <xsd:attribute ref="o:connectortype"/>
    <xsd:attribute ref="o:bwmode"/>
    <xsd:attribute ref="o:bwpure"/>
    <xsd:attribute ref="o:bwnormal"/>
    <xsd:attribute ref="o:forcedash"/>
    <xsd:attribute ref="o:oleicon"/>
    <xsd:attribute ref="o:ole"/>
    <xsd:attribute ref="o:preferrelative"/>
    <xsd:attribute ref="o:cliptowrap"/>
    <xsd:attribute ref="o:clip"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_AllCoreAttributes">
    <xsd:attributeGroup ref="AG_CoreAttributes"/>
    <xsd:attributeGroup ref="AG_OfficeCoreAttributes"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_AllShapeAttributes">
    <xsd:attributeGroup ref="AG_ShapeAttributes"/>
    <xsd:attributeGroup ref="AG_OfficeShapeAttributes"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_ImageAttributes">
    <xsd:attribute name="src" type="xsd:string" use="optional"/>
    <xsd:attribute name="cropleft" type="xsd:string" use="optional"/>
    <xsd:attribute name="croptop" type="xsd:string" use="optional"/>
    <xsd:attribute name="cropright" type="xsd:string" use="optional"/>
    <xsd:attribute name="cropbottom" type="xsd:string" use="optional"/>
    <xsd:attribute name="gain" type="xsd:string" use="optional"/>
    <xsd:attribute name="blacklevel" type="xsd:string" use="optional"/>
    <xsd:attribute name="gamma" type="xsd:string" use="optional"/>
    <xsd:attribute name="grayscale" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="bilevel" type="s:ST_TrueFalse" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_StrokeAttributes">
    <xsd:attribute name="on" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="weight" type="xsd:string" use="optional"/>
    <xsd:attribute name="color" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="opacity" type="xsd:string" use="optional"/>
    <xsd:attribute name="linestyle" type="ST_StrokeLineStyle" use="optional"/>
    <xsd:attribute name="miterlimit" type="xsd:decimal" use="optional"/>
    <xsd:attribute name="joinstyle" type="ST_StrokeJoinStyle" use="optional"/>
    <xsd:attribute name="endcap" type="ST_StrokeEndCap" use="optional"/>
    <xsd:attribute name="dashstyle" type="xsd:string" use="optional"/>
    <xsd:attribute name="filltype" type="ST_FillType" use="optional"/>
    <xsd:attribute name="src" type="xsd:string" use="optional"/>
    <xsd:attribute name="imageaspect" type="ST_ImageAspect" use="optional"/>
    <xsd:attribute name="imagesize" type="xsd:string" use="optional"/>
    <xsd:attribute name="imagealignshape" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="color2" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="startarrow" type="ST_StrokeArrowType" use="optional"/>
    <xsd:attribute name="startarrowwidth" type="ST_StrokeArrowWidth" use="optional"/>
    <xsd:attribute name="startarrowlength" type="ST_StrokeArrowLength" use="optional"/>
    <xsd:attribute name="endarrow" type="ST_StrokeArrowType" use="optional"/>
    <xsd:attribute name="endarrowwidth" type="ST_StrokeArrowWidth" use="optional"/>
    <xsd:attribute name="endarrowlength" type="ST_StrokeArrowLength" use="optional"/>
    <xsd:attribute ref="o:href"/>
    <xsd:attribute ref="o:althref"/>
    <xsd:attribute ref="o:title"/>
    <xsd:attribute ref="o:forcedash"/>
    <xsd:attribute ref="r:id" use="optional"/>
    <xsd:attribute name="insetpen" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute ref="o:relid"/>
  </xsd:attributeGroup>
  <xsd:group name="EG_ShapeElements">
    <xsd:choice>
      <xsd:element ref="path"/>
      <xsd:element ref="formulas"/>
      <xsd:element ref="handles"/>
      <xsd:element ref="fill"/>
      <xsd:element ref="stroke"/>
      <xsd:element ref="shadow"/>
      <xsd:element ref="textbox"/>
      <xsd:element ref="textpath"/>
      <xsd:element ref="imagedata"/>
      <xsd:element ref="o:skew"/>
      <xsd:element ref="o:extrusion"/>
      <xsd:element ref="o:callout"/>
      <xsd:element ref="o:lock"/>
      <xsd:element ref="o:clippath"/>
      <xsd:element ref="o:signatureline"/>
      <xsd:element ref="w10:wrap"/>
      <xsd:element ref="w10:anchorlock"/>
      <xsd:element ref="w10:bordertop"/>
      <xsd:element ref="w10:borderbottom"/>
      <xsd:element ref="w10:borderleft"/>
      <xsd:element ref="w10:borderright"/>
      <xsd:element ref="x:ClientData" minOccurs="0"/>
      <xsd:element ref="pvml:textdata" minOccurs="0"/>
    </xsd:choice>
  </xsd:group>
  <xsd:element name="shape" type="CT_Shape"/>
  <xsd:element name="shapetype" type="CT_Shapetype"/>
  <xsd:element name="group" type="CT_Group"/>
  <xsd:element name="background" type="CT_Background"/>
  <xsd:complexType name="CT_Shape">
    <xsd:choice maxOccurs="unbounded">
      <xsd:group ref="EG_ShapeElements"/>
      <xsd:element ref="o:ink"/>
      <xsd:element ref="pvml:iscomment"/>
      <xsd:element ref="o:equationxml"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
    <xsd:attributeGroup ref="AG_Type"/>
    <xsd:attributeGroup ref="AG_Adj"/>
    <xsd:attributeGroup ref="AG_Path"/>
    <xsd:attribute ref="o:gfxdata"/>
    <xsd:attribute name="equationxml" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Shapetype">
    <xsd:sequence>
      <xsd:group ref="EG_ShapeElements" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element ref="o:complex" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
    <xsd:attributeGroup ref="AG_Adj"/>
    <xsd:attributeGroup ref="AG_Path"/>
    <xsd:attribute ref="o:master"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Group">
    <xsd:choice maxOccurs="unbounded">
      <xsd:group ref="EG_ShapeElements"/>
      <xsd:element ref="group"/>
      <xsd:element ref="shape"/>
      <xsd:element ref="shapetype"/>
      <xsd:element ref="arc"/>
      <xsd:element ref="curve"/>
      <xsd:element ref="image"/>
      <xsd:element ref="line"/>
      <xsd:element ref="oval"/>
      <xsd:element ref="polyline"/>
      <xsd:element ref="rect"/>
      <xsd:element ref="roundrect"/>
      <xsd:element ref="o:diagram"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_Fill"/>
    <xsd:attribute name="editas" type="ST_EditAs" use="optional"/>
    <xsd:attribute ref="o:tableproperties"/>
    <xsd:attribute ref="o:tablelimits"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Background">
    <xsd:sequence>
      <xsd:element ref="fill" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Id"/>
    <xsd:attributeGroup ref="AG_Fill"/>
    <xsd:attribute ref="o:bwmode"/>
    <xsd:attribute ref="o:bwpure"/>
    <xsd:attribute ref="o:bwnormal"/>
    <xsd:attribute ref="o:targetscreensize"/>
  </xsd:complexType>
  <xsd:element name="fill" type="CT_Fill"/>
  <xsd:element name="formulas" type="CT_Formulas"/>
  <xsd:element name="handles" type="CT_Handles"/>
  <xsd:element name="imagedata" type="CT_ImageData"/>
  <xsd:element name="path" type="CT_Path"/>
  <xsd:element name="textbox" type="CT_Textbox"/>
  <xsd:element name="shadow" type="CT_Shadow"/>
  <xsd:element name="stroke" type="CT_Stroke"/>
  <xsd:element name="textpath" type="CT_TextPath"/>
  <xsd:complexType name="CT_Fill">
    <xsd:sequence>
      <xsd:element ref="o:fill" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Id"/>
    <xsd:attribute name="type" type="ST_FillType" use="optional"/>
    <xsd:attribute name="on" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="color" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="opacity" type="xsd:string" use="optional"/>
    <xsd:attribute name="color2" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="src" type="xsd:string" use="optional"/>
    <xsd:attribute ref="o:href"/>
    <xsd:attribute ref="o:althref"/>
    <xsd:attribute name="size" type="xsd:string" use="optional"/>
    <xsd:attribute name="origin" type="xsd:string" use="optional"/>
    <xsd:attribute name="position" type="xsd:string" use="optional"/>
    <xsd:attribute name="aspect" type="ST_ImageAspect" use="optional"/>
    <xsd:attribute name="colors" type="xsd:string" use="optional"/>
    <xsd:attribute name="angle" type="xsd:decimal" use="optional"/>
    <xsd:attribute name="alignshape" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="focus" type="xsd:string" use="optional"/>
    <xsd:attribute name="focussize" type="xsd:string" use="optional"/>
    <xsd:attribute name="focusposition" type="xsd:string" use="optional"/>
    <xsd:attribute name="method" type="ST_FillMethod" use="optional"/>
    <xsd:attribute ref="o:detectmouseclick"/>
    <xsd:attribute ref="o:title"/>
    <xsd:attribute ref="o:opacity2"/>
    <xsd:attribute name="recolor" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="rotate" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute ref="r:id" use="optional"/>
    <xsd:attribute ref="o:relid" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Formulas">
    <xsd:sequence>
      <xsd:element name="f" type="CT_F" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_F">
    <xsd:attribute name="eqn" type="xsd:string"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Handles">
    <xsd:sequence>
      <xsd:element name="h" type="CT_H" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_H">
    <xsd:attribute name="position" type="xsd:string"/>
    <xsd:attribute name="polar" type="xsd:string"/>
    <xsd:attribute name="map" type="xsd:string"/>
    <xsd:attribute name="invx" type="s:ST_TrueFalse"/>
    <xsd:attribute name="invy" type="s:ST_TrueFalse"/>
    <xsd:attribute name="switch" type="s:ST_TrueFalseBlank"/>
    <xsd:attribute name="xrange" type="xsd:string"/>
    <xsd:attribute name="yrange" type="xsd:string"/>
    <xsd:attribute name="radiusrange" type="xsd:string"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ImageData">
    <xsd:attributeGroup ref="AG_Id"/>
    <xsd:attributeGroup ref="AG_ImageAttributes"/>
    <xsd:attributeGroup ref="AG_Chromakey"/>
    <xsd:attribute name="embosscolor" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="recolortarget" type="s:ST_ColorType"/>
    <xsd:attribute ref="o:href"/>
    <xsd:attribute ref="o:althref"/>
    <xsd:attribute ref="o:title"/>
    <xsd:attribute ref="o:oleid"/>
    <xsd:attribute ref="o:detectmouseclick"/>
    <xsd:attribute ref="o:movie"/>
    <xsd:attribute ref="o:relid"/>
    <xsd:attribute ref="r:id"/>
    <xsd:attribute ref="r:pict"/>
    <xsd:attribute ref="r:href"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Path">
    <xsd:attributeGroup ref="AG_Id"/>
    <xsd:attribute name="v" type="xsd:string" use="optional"/>
    <xsd:attribute name="limo" type="xsd:string" use="optional"/>
    <xsd:attribute name="textboxrect" type="xsd:string" use="optional"/>
    <xsd:attribute name="fillok" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="strokeok" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="shadowok" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="arrowok" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="gradientshapeok" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="textpathok" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="insetpenok" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute ref="o:connecttype"/>
    <xsd:attribute ref="o:connectlocs"/>
    <xsd:attribute ref="o:connectangles"/>
    <xsd:attribute ref="o:extrusionok"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Shadow">
    <xsd:attributeGroup ref="AG_Id"/>
    <xsd:attribute name="on" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="type" type="ST_ShadowType" use="optional"/>
    <xsd:attribute name="obscured" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="color" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="opacity" type="xsd:string" use="optional"/>
    <xsd:attribute name="offset" type="xsd:string" use="optional"/>
    <xsd:attribute name="color2" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="offset2" type="xsd:string" use="optional"/>
    <xsd:attribute name="origin" type="xsd:string" use="optional"/>
    <xsd:attribute name="matrix" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Stroke">
    <xsd:sequence>
      <xsd:element ref="o:left" minOccurs="0"/>
      <xsd:element ref="o:top" minOccurs="0"/>
      <xsd:element ref="o:right" minOccurs="0"/>
      <xsd:element ref="o:bottom" minOccurs="0"/>
      <xsd:element ref="o:column" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_Id"/>
    <xsd:attributeGroup ref="AG_StrokeAttributes"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Textbox">
    <xsd:choice>
      <xsd:element ref="w:txbxContent" minOccurs="0"/>
      <xsd:any namespace="##local" processContents="skip"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_Id"/>
    <xsd:attributeGroup ref="AG_Style"/>
    <xsd:attribute name="inset" type="xsd:string" use="optional"/>
    <xsd:attribute ref="o:singleclick"/>
    <xsd:attribute ref="o:insetmode"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TextPath">
    <xsd:attributeGroup ref="AG_Id"/>
    <xsd:attributeGroup ref="AG_Style"/>
    <xsd:attribute name="on" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="fitshape" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="fitpath" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="trim" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="xscale" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="string" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:element name="arc" type="CT_Arc"/>
  <xsd:element name="curve" type="CT_Curve"/>
  <xsd:element name="image" type="CT_Image"/>
  <xsd:element name="line" type="CT_Line"/>
  <xsd:element name="oval" type="CT_Oval"/>
  <xsd:element name="polyline" type="CT_PolyLine"/>
  <xsd:element name="rect" type="CT_Rect"/>
  <xsd:element name="roundrect" type="CT_RoundRect"/>
  <xsd:complexType name="CT_Arc">
    <xsd:sequence>
      <xsd:group ref="EG_ShapeElements" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
    <xsd:attribute name="startAngle" type="xsd:decimal" use="optional"/>
    <xsd:attribute name="endAngle" type="xsd:decimal" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Curve">
    <xsd:sequence>
      <xsd:group ref="EG_ShapeElements" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
    <xsd:attribute name="from" type="xsd:string" use="optional"/>
    <xsd:attribute name="control1" type="xsd:string" use="optional"/>
    <xsd:attribute name="control2" type="xsd:string" use="optional"/>
    <xsd:attribute name="to" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Image">
    <xsd:sequence>
      <xsd:group ref="EG_ShapeElements" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
    <xsd:attributeGroup ref="AG_ImageAttributes"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Line">
    <xsd:sequence>
      <xsd:group ref="EG_ShapeElements" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
    <xsd:attribute name="from" type="xsd:string" use="optional"/>
    <xsd:attribute name="to" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Oval">
    <xsd:choice maxOccurs="unbounded">
      <xsd:group ref="EG_ShapeElements" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PolyLine">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:group ref="EG_ShapeElements"/>
      <xsd:element ref="o:ink"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
    <xsd:attribute name="points" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Rect">
    <xsd:choice maxOccurs="unbounded">
      <xsd:group ref="EG_ShapeElements" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RoundRect">
    <xsd:choice maxOccurs="unbounded">
      <xsd:group ref="EG_ShapeElements" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
    <xsd:attributeGroup ref="AG_AllCoreAttributes"/>
    <xsd:attributeGroup ref="AG_AllShapeAttributes"/>
    <xsd:attribute name="arcsize" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Ext">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="view"/>
      <xsd:enumeration value="edit"/>
      <xsd:enumeration value="backwardCompatible"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FillType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="solid"/>
      <xsd:enumeration value="gradient"/>
      <xsd:enumeration value="gradientRadial"/>
      <xsd:enumeration value="tile"/>
      <xsd:enumeration value="pattern"/>
      <xsd:enumeration value="frame"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FillMethod">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="linear"/>
      <xsd:enumeration value="sigma"/>
      <xsd:enumeration value="any"/>
      <xsd:enumeration value="linear sigma"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ShadowType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="single"/>
      <xsd:enumeration value="double"/>
      <xsd:enumeration value="emboss"/>
      <xsd:enumeration value="perspective"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_StrokeLineStyle">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="single"/>
      <xsd:enumeration value="thinThin"/>
      <xsd:enumeration value="thinThick"/>
      <xsd:enumeration value="thickThin"/>
      <xsd:enumeration value="thickBetweenThin"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_StrokeJoinStyle">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="round"/>
      <xsd:enumeration value="bevel"/>
      <xsd:enumeration value="miter"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_StrokeEndCap">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="flat"/>
      <xsd:enumeration value="square"/>
      <xsd:enumeration value="round"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_StrokeArrowLength">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="short"/>
      <xsd:enumeration value="medium"/>
      <xsd:enumeration value="long"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_StrokeArrowWidth">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="narrow"/>
      <xsd:enumeration value="medium"/>
      <xsd:enumeration value="wide"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_StrokeArrowType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="block"/>
      <xsd:enumeration value="classic"/>
      <xsd:enumeration value="oval"/>
      <xsd:enumeration value="diamond"/>
      <xsd:enumeration value="open"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ImageAspect">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="ignore"/>
      <xsd:enumeration value="atMost"/>
      <xsd:enumeration value="atLeast"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_EditAs">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="canvas"/>
      <xsd:enumeration value="orgchart"/>
      <xsd:enumeration value="radial"/>
      <xsd:enumeration value="cycle"/>
      <xsd:enumeration value="stacked"/>
      <xsd:enumeration value="venn"/>
      <xsd:enumeration value="bullseye"/>
    </xsd:restriction>
  </xsd:simpleType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="urn:schemas-microsoft-com:office:office" xmlns:v="urn:schemas-microsoft-com:vml"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="urn:schemas-microsoft-com:office:office" elementFormDefault="qualified"
  attributeFormDefault="unqualified">
  <xsd:import namespace="urn:schemas-microsoft-com:vml" schemaLocation="vml-main.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    schemaLocation="shared-relationshipReference.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:attribute name="bwmode" type="ST_BWMode"/>
  <xsd:attribute name="bwpure" type="ST_BWMode"/>
  <xsd:attribute name="bwnormal" type="ST_BWMode"/>
  <xsd:attribute name="targetscreensize" type="ST_ScreenSize"/>
  <xsd:attribute name="insetmode" type="ST_InsetMode" default="custom"/>
  <xsd:attribute name="spt" type="xsd:float"/>
  <xsd:attribute name="wrapcoords" type="xsd:string"/>
  <xsd:attribute name="oned" type="s:ST_TrueFalse"/>
  <xsd:attribute name="regroupid" type="xsd:integer"/>
  <xsd:attribute name="doubleclicknotify" type="s:ST_TrueFalse"/>
  <xsd:attribute name="connectortype" type="ST_ConnectorType" default="straight"/>
  <xsd:attribute name="button" type="s:ST_TrueFalse"/>
  <xsd:attribute name="userhidden" type="s:ST_TrueFalse"/>
  <xsd:attribute name="forcedash" type="s:ST_TrueFalse"/>
  <xsd:attribute name="oleicon" type="s:ST_TrueFalse"/>
  <xsd:attribute name="ole" type="s:ST_TrueFalseBlank"/>
  <xsd:attribute name="preferrelative" type="s:ST_TrueFalse"/>
  <xsd:attribute name="cliptowrap" type="s:ST_TrueFalse"/>
  <xsd:attribute name="clip" type="s:ST_TrueFalse"/>
  <xsd:attribute name="bullet" type="s:ST_TrueFalse"/>
  <xsd:attribute name="hr" type="s:ST_TrueFalse"/>
  <xsd:attribute name="hrstd" type="s:ST_TrueFalse"/>
  <xsd:attribute name="hrnoshade" type="s:ST_TrueFalse"/>
  <xsd:attribute name="hrpct" type="xsd:float"/>
  <xsd:attribute name="hralign" type="ST_HrAlign" default="left"/>
  <xsd:attribute name="allowincell" type="s:ST_TrueFalse"/>
  <xsd:attribute name="allowoverlap" type="s:ST_TrueFalse"/>
  <xsd:attribute name="userdrawn" type="s:ST_TrueFalse"/>
  <xsd:attribute name="bordertopcolor" type="xsd:string"/>
  <xsd:attribute name="borderleftcolor" type="xsd:string"/>
  <xsd:attribute name="borderbottomcolor" type="xsd:string"/>
  <xsd:attribute name="borderrightcolor" type="xsd:string"/>
  <xsd:attribute name="connecttype" type="ST_ConnectType"/>
  <xsd:attribute name="connectlocs" type="xsd:string"/>
  <xsd:attribute name="connectangles" type="xsd:string"/>
  <xsd:attribute name="master" type="xsd:string"/>
  <xsd:attribute name="extrusionok" type="s:ST_TrueFalse"/>
  <xsd:attribute name="href" type="xsd:string"/>
  <xsd:attribute name="althref" type="xsd:string"/>
  <xsd:attribute name="title" type="xsd:string"/>
  <xsd:attribute name="singleclick" type="s:ST_TrueFalse"/>
  <xsd:attribute name="oleid" type="xsd:float"/>
  <xsd:attribute name="detectmouseclick" type="s:ST_TrueFalse"/>
  <xsd:attribute name="movie" type="xsd:float"/>
  <xsd:attribute name="spid" type="xsd:string"/>
  <xsd:attribute name="opacity2" type="xsd:string"/>
  <xsd:attribute name="relid" type="r:ST_RelationshipId"/>
  <xsd:attribute name="dgmlayout" type="ST_DiagramLayout"/>
  <xsd:attribute name="dgmnodekind" type="xsd:integer"/>
  <xsd:attribute name="dgmlayoutmru" type="ST_DiagramLayout"/>
  <xsd:attribute name="gfxdata" type="xsd:base64Binary"/>
  <xsd:attribute name="tableproperties" type="xsd:string"/>
  <xsd:attribute name="tablelimits" type="xsd:string"/>
  <xsd:element name="shapedefaults" type="CT_ShapeDefaults"/>
  <xsd:element name="shapelayout" type="CT_ShapeLayout"/>
  <xsd:element name="signatureline" type="CT_SignatureLine"/>
  <xsd:element name="ink" type="CT_Ink"/>
  <xsd:element name="diagram" type="CT_Diagram"/>
  <xsd:element name="equationxml" type="CT_EquationXml"/>
  <xsd:complexType name="CT_ShapeDefaults">
    <xsd:all minOccurs="0">
      <xsd:element ref="v:fill" minOccurs="0"/>
      <xsd:element ref="v:stroke" minOccurs="0"/>
      <xsd:element ref="v:textbox" minOccurs="0"/>
      <xsd:element ref="v:shadow" minOccurs="0"/>
      <xsd:element ref="skew" minOccurs="0"/>
      <xsd:element ref="extrusion" minOccurs="0"/>
      <xsd:element ref="callout" minOccurs="0"/>
      <xsd:element ref="lock" minOccurs="0"/>
      <xsd:element name="colormru" minOccurs="0" type="CT_ColorMru"/>
      <xsd:element name="colormenu" minOccurs="0" type="CT_ColorMenu"/>
    </xsd:all>
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="spidmax" type="xsd:integer" use="optional"/>
    <xsd:attribute name="style" type="xsd:string" use="optional"/>
    <xsd:attribute name="fill" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="fillcolor" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="stroke" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="strokecolor" type="s:ST_ColorType"/>
    <xsd:attribute name="allowincell" form="qualified" type="s:ST_TrueFalse"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Ink">
    <xsd:sequence/>
    <xsd:attribute name="i" type="xsd:string"/>
    <xsd:attribute name="annotation" type="s:ST_TrueFalse"/>
    <xsd:attribute name="contentType" type="ST_ContentType" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SignatureLine">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="issignatureline" type="s:ST_TrueFalse"/>
    <xsd:attribute name="id" type="s:ST_Guid"/>
    <xsd:attribute name="provid" type="s:ST_Guid"/>
    <xsd:attribute name="signinginstructionsset" type="s:ST_TrueFalse"/>
    <xsd:attribute name="allowcomments" type="s:ST_TrueFalse"/>
    <xsd:attribute name="showsigndate" type="s:ST_TrueFalse"/>
    <xsd:attribute name="suggestedsigner" type="xsd:string" form="qualified"/>
    <xsd:attribute name="suggestedsigner2" type="xsd:string" form="qualified"/>
    <xsd:attribute name="suggestedsigneremail" type="xsd:string" form="qualified"/>
    <xsd:attribute name="signinginstructions" type="xsd:string"/>
    <xsd:attribute name="addlxml" type="xsd:string"/>
    <xsd:attribute name="sigprovurl" type="xsd:string"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ShapeLayout">
    <xsd:all>
      <xsd:element name="idmap" type="CT_IdMap" minOccurs="0"/>
      <xsd:element name="regrouptable" type="CT_RegroupTable" minOccurs="0"/>
      <xsd:element name="rules" type="CT_Rules" minOccurs="0"/>
    </xsd:all>
    <xsd:attributeGroup ref="v:AG_Ext"/>
  </xsd:complexType>
  <xsd:complexType name="CT_IdMap">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="data" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RegroupTable">
    <xsd:sequence>
      <xsd:element name="entry" type="CT_Entry" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="v:AG_Ext"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Entry">
    <xsd:attribute name="new" type="xsd:int" use="optional"/>
    <xsd:attribute name="old" type="xsd:int" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Rules">
    <xsd:sequence>
      <xsd:element name="r" type="CT_R" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="v:AG_Ext"/>
  </xsd:complexType>
  <xsd:complexType name="CT_R">
    <xsd:sequence>
      <xsd:element name="proxy" type="CT_Proxy" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="xsd:string" use="required"/>
    <xsd:attribute name="type" type="ST_RType" use="optional"/>
    <xsd:attribute name="how" type="ST_How" use="optional"/>
    <xsd:attribute name="idref" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Proxy">
    <xsd:attribute name="start" type="s:ST_TrueFalseBlank" use="optional" default="false"/>
    <xsd:attribute name="end" type="s:ST_TrueFalseBlank" use="optional" default="false"/>
    <xsd:attribute name="idref" type="xsd:string" use="optional"/>
    <xsd:attribute name="connectloc" type="xsd:int" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Diagram">
    <xsd:sequence>
      <xsd:element name="relationtable" type="CT_RelationTable" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="dgmstyle" type="xsd:integer" use="optional"/>
    <xsd:attribute name="autoformat" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="reverse" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="autolayout" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="dgmscalex" type="xsd:integer" use="optional"/>
    <xsd:attribute name="dgmscaley" type="xsd:integer" use="optional"/>
    <xsd:attribute name="dgmfontsize" type="xsd:integer" use="optional"/>
    <xsd:attribute name="constrainbounds" type="xsd:string" use="optional"/>
    <xsd:attribute name="dgmbasetextscale" type="xsd:integer" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_EquationXml">
    <xsd:sequence>
      <xsd:any namespace="##any"/>
    </xsd:sequence>
    <xsd:attribute name="contentType" type="ST_AlternateMathContentType" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_AlternateMathContentType">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_RelationTable">
    <xsd:sequence>
      <xsd:element name="rel" type="CT_Relation" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="v:AG_Ext"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Relation">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="idsrc" type="xsd:string" use="optional"/>
    <xsd:attribute name="iddest" type="xsd:string" use="optional"/>
    <xsd:attribute name="idcntr" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorMru">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="colors" type="xsd:string"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ColorMenu">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="strokecolor" type="s:ST_ColorType"/>
    <xsd:attribute name="fillcolor" type="s:ST_ColorType"/>
    <xsd:attribute name="shadowcolor" type="s:ST_ColorType"/>
    <xsd:attribute name="extrusioncolor" type="s:ST_ColorType"/>
  </xsd:complexType>
  <xsd:element name="skew" type="CT_Skew"/>
  <xsd:element name="extrusion" type="CT_Extrusion"/>
  <xsd:element name="callout" type="CT_Callout"/>
  <xsd:element name="lock" type="CT_Lock"/>
  <xsd:element name="OLEObject" type="CT_OLEObject"/>
  <xsd:element name="complex" type="CT_Complex"/>
  <xsd:element name="left" type="CT_StrokeChild"/>
  <xsd:element name="top" type="CT_StrokeChild"/>
  <xsd:element name="right" type="CT_StrokeChild"/>
  <xsd:element name="bottom" type="CT_StrokeChild"/>
  <xsd:element name="column" type="CT_StrokeChild"/>
  <xsd:element name="clippath" type="CT_ClipPath"/>
  <xsd:element name="fill" type="CT_Fill"/>
  <xsd:complexType name="CT_Skew">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="id" type="xsd:string" use="optional"/>
    <xsd:attribute name="on" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="offset" type="xsd:string" use="optional"/>
    <xsd:attribute name="origin" type="xsd:string" use="optional"/>
    <xsd:attribute name="matrix" type="xsd:string" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Extrusion">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="on" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="type" type="ST_ExtrusionType" default="parallel" use="optional"/>
    <xsd:attribute name="render" type="ST_ExtrusionRender" default="solid" use="optional"/>
    <xsd:attribute name="viewpointorigin" type="xsd:string" use="optional"/>
    <xsd:attribute name="viewpoint" type="xsd:string" use="optional"/>
    <xsd:attribute name="plane" type="ST_ExtrusionPlane" default="XY" use="optional"/>
    <xsd:attribute name="skewangle" type="xsd:float" use="optional"/>
    <xsd:attribute name="skewamt" type="xsd:string" use="optional"/>
    <xsd:attribute name="foredepth" type="xsd:string" use="optional"/>
    <xsd:attribute name="backdepth" type="xsd:string" use="optional"/>
    <xsd:attribute name="orientation" type="xsd:string" use="optional"/>
    <xsd:attribute name="orientationangle" type="xsd:float" use="optional"/>
    <xsd:attribute name="lockrotationcenter" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="autorotationcenter" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="rotationcenter" type="xsd:string" use="optional"/>
    <xsd:attribute name="rotationangle" type="xsd:string" use="optional"/>
    <xsd:attribute name="colormode" type="ST_ColorMode" use="optional"/>
    <xsd:attribute name="color" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="shininess" type="xsd:float" use="optional"/>
    <xsd:attribute name="specularity" type="xsd:string" use="optional"/>
    <xsd:attribute name="diffusity" type="xsd:string" use="optional"/>
    <xsd:attribute name="metal" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="edge" type="xsd:string" use="optional"/>
    <xsd:attribute name="facet" type="xsd:string" use="optional"/>
    <xsd:attribute name="lightface" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="brightness" type="xsd:string" use="optional"/>
    <xsd:attribute name="lightposition" type="xsd:string" use="optional"/>
    <xsd:attribute name="lightlevel" type="xsd:string" use="optional"/>
    <xsd:attribute name="lightharsh" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="lightposition2" type="xsd:string" use="optional"/>
    <xsd:attribute name="lightlevel2" type="xsd:string" use="optional"/>
    <xsd:attribute name="lightharsh2" type="s:ST_TrueFalse" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Callout">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="on" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="type" type="xsd:string" use="optional"/>
    <xsd:attribute name="gap" type="xsd:string" use="optional"/>
    <xsd:attribute name="angle" type="ST_Angle" use="optional"/>
    <xsd:attribute name="dropauto" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="drop" type="ST_CalloutDrop" use="optional"/>
    <xsd:attribute name="distance" type="xsd:string" use="optional"/>
    <xsd:attribute name="lengthspecified" type="s:ST_TrueFalse" default="f" use="optional"/>
    <xsd:attribute name="length" type="xsd:string" use="optional"/>
    <xsd:attribute name="accentbar" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="textborder" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="minusx" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="minusy" type="s:ST_TrueFalse" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Lock">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="position" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="selection" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="grouping" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="ungrouping" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="rotation" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="cropping" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="verticies" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="adjusthandles" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="text" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="aspectratio" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="shapetype" type="s:ST_TrueFalse" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OLEObject">
    <xsd:sequence>
      <xsd:element name="LinkType" type="ST_OLELinkType" minOccurs="0"/>
      <xsd:element name="LockedField" type="s:ST_TrueFalseBlank" minOccurs="0"/>
      <xsd:element name="FieldCodes" type="xsd:string" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="Type" type="ST_OLEType" use="optional"/>
    <xsd:attribute name="ProgID" type="xsd:string" use="optional"/>
    <xsd:attribute name="ShapeID" type="xsd:string" use="optional"/>
    <xsd:attribute name="DrawAspect" type="ST_OLEDrawAspect" use="optional"/>
    <xsd:attribute name="ObjectID" type="xsd:string" use="optional"/>
    <xsd:attribute ref="r:id" use="optional"/>
    <xsd:attribute name="UpdateMode" type="ST_OLEUpdateMode" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Complex">
    <xsd:attributeGroup ref="v:AG_Ext"/>
  </xsd:complexType>
  <xsd:complexType name="CT_StrokeChild">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="on" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="weight" type="xsd:string" use="optional"/>
    <xsd:attribute name="color" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="color2" type="s:ST_ColorType" use="optional"/>
    <xsd:attribute name="opacity" type="xsd:string" use="optional"/>
    <xsd:attribute name="linestyle" type="v:ST_StrokeLineStyle" use="optional"/>
    <xsd:attribute name="miterlimit" type="xsd:decimal" use="optional"/>
    <xsd:attribute name="joinstyle" type="v:ST_StrokeJoinStyle" use="optional"/>
    <xsd:attribute name="endcap" type="v:ST_StrokeEndCap" use="optional"/>
    <xsd:attribute name="dashstyle" type="xsd:string" use="optional"/>
    <xsd:attribute name="insetpen" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="filltype" type="v:ST_FillType" use="optional"/>
    <xsd:attribute name="src" type="xsd:string" use="optional"/>
    <xsd:attribute name="imageaspect" type="v:ST_ImageAspect" use="optional"/>
    <xsd:attribute name="imagesize" type="xsd:string" use="optional"/>
    <xsd:attribute name="imagealignshape" type="s:ST_TrueFalse" use="optional"/>
    <xsd:attribute name="startarrow" type="v:ST_StrokeArrowType" use="optional"/>
    <xsd:attribute name="startarrowwidth" type="v:ST_StrokeArrowWidth" use="optional"/>
    <xsd:attribute name="startarrowlength" type="v:ST_StrokeArrowLength" use="optional"/>
    <xsd:attribute name="endarrow" type="v:ST_StrokeArrowType" use="optional"/>
    <xsd:attribute name="endarrowwidth" type="v:ST_StrokeArrowWidth" use="optional"/>
    <xsd:attribute name="endarrowlength" type="v:ST_StrokeArrowLength" use="optional"/>
    <xsd:attribute ref="href"/>
    <xsd:attribute ref="althref"/>
    <xsd:attribute ref="title"/>
    <xsd:attribute ref="forcedash"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ClipPath">
    <xsd:attribute name="v" type="xsd:string" use="required" form="qualified"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Fill">
    <xsd:attributeGroup ref="v:AG_Ext"/>
    <xsd:attribute name="type" type="ST_FillType"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_RType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="arc"/>
      <xsd:enumeration value="callout"/>
      <xsd:enumeration value="connector"/>
      <xsd:enumeration value="align"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_How">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="top"/>
      <xsd:enumeration value="middle"/>
      <xsd:enumeration value="bottom"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="right"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_BWMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="color"/>
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="grayScale"/>
      <xsd:enumeration value="lightGrayscale"/>
      <xsd:enumeration value="inverseGray"/>
      <xsd:enumeration value="grayOutline"/>
      <xsd:enumeration value="highContrast"/>
      <xsd:enumeration value="black"/>
      <xsd:enumeration value="white"/>
      <xsd:enumeration value="hide"/>
      <xsd:enumeration value="undrawn"/>
      <xsd:enumeration value="blackTextAndLines"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ScreenSize">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="544,376"/>
      <xsd:enumeration value="640,480"/>
      <xsd:enumeration value="720,512"/>
      <xsd:enumeration value="800,600"/>
      <xsd:enumeration value="1024,768"/>
      <xsd:enumeration value="1152,862"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_InsetMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ColorMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ContentType">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DiagramLayout">
    <xsd:restriction base="xsd:integer">
      <xsd:enumeration value="0"/>
      <xsd:enumeration value="1"/>
      <xsd:enumeration value="2"/>
      <xsd:enumeration value="3"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ExtrusionType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="perspective"/>
      <xsd:enumeration value="parallel"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ExtrusionRender">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="solid"/>
      <xsd:enumeration value="wireFrame"/>
      <xsd:enumeration value="boundingCube"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ExtrusionPlane">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="XY"/>
      <xsd:enumeration value="ZX"/>
      <xsd:enumeration value="YZ"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Angle">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="any"/>
      <xsd:enumeration value="30"/>
      <xsd:enumeration value="45"/>
      <xsd:enumeration value="60"/>
      <xsd:enumeration value="90"/>
      <xsd:enumeration value="auto"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CalloutDrop">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_CalloutPlacement">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="top"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="bottom"/>
      <xsd:enumeration value="user"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ConnectorType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="straight"/>
      <xsd:enumeration value="elbow"/>
      <xsd:enumeration value="curved"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HrAlign">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="center"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ConnectType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="rect"/>
      <xsd:enumeration value="segments"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OLELinkType">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OLEType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="Embed"/>
      <xsd:enumeration value="Link"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OLEDrawAspect">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="Content"/>
      <xsd:enumeration value="Icon"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_OLEUpdateMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="Always"/>
      <xsd:enumeration value="OnCall"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FillType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="gradientCenter"/>
      <xsd:enumeration value="solid"/>
      <xsd:enumeration value="pattern"/>
      <xsd:enumeration value="tile"/>
      <xsd:enumeration value="frame"/>
      <xsd:enumeration value="gradientUnscaled"/>
      <xsd:enumeration value="gradientRadial"/>
      <xsd:enumeration value="gradient"/>
      <xsd:enumeration value="background"/>
    </xsd:restriction>
  </xsd:simpleType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="urn:schemas-microsoft-com:office:powerpoint"
  targetNamespace="urn:schemas-microsoft-com:office:powerpoint" elementFormDefault="qualified"
  attributeFormDefault="unqualified">
  <xsd:element name="iscomment" type="CT_Empty"/>
  <xsd:element name="textdata" type="CT_Rel"/>
  <xsd:complexType name="CT_Empty"/>
  <xsd:complexType name="CT_Rel">
    <xsd:attribute name="id" type="xsd:string"/>
  </xsd:complexType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="urn:schemas-microsoft-com:office:excel"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  targetNamespace="urn:schemas-microsoft-com:office:excel" elementFormDefault="qualified"
  attributeFormDefault="unqualified">
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:element name="ClientData" type="CT_ClientData"/>
  <xsd:complexType name="CT_ClientData">
    <xsd:choice minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="MoveWithCells" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="SizeWithCells" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="Anchor" type="xsd:string"/>
      <xsd:element name="Locked" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="DefaultSize" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="PrintObject" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="Disabled" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="AutoFill" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="AutoLine" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="AutoPict" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="FmlaMacro" type="xsd:string"/>
      <xsd:element name="TextHAlign" type="xsd:string"/>
      <xsd:element name="TextVAlign" type="xsd:string"/>
      <xsd:element name="LockText" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="JustLastX" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="SecretEdit" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="Default" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="Help" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="Cancel" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="Dismiss" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="Accel" type="xsd:integer"/>
      <xsd:element name="Accel2" type="xsd:integer"/>
      <xsd:element name="Row" type="xsd:integer"/>
      <xsd:element name="Column" type="xsd:integer"/>
      <xsd:element name="Visible" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="RowHidden" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="ColHidden" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="VTEdit" type="xsd:integer"/>
      <xsd:element name="MultiLine" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="VScroll" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="ValidIds" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="FmlaRange" type="xsd:string"/>
      <xsd:element name="WidthMin" type="xsd:integer"/>
      <xsd:element name="Sel" type="xsd:integer"/>
      <xsd:element name="NoThreeD2" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="SelType" type="xsd:string"/>
      <xsd:element name="MultiSel" type="xsd:string"/>
      <xsd:element name="LCT" type="xsd:string"/>
      <xsd:element name="ListItem" type="xsd:string"/>
      <xsd:element name="DropStyle" type="xsd:string"/>
      <xsd:element name="Colored" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="DropLines" type="xsd:integer"/>
      <xsd:element name="Checked" type="xsd:integer"/>
      <xsd:element name="FmlaLink" type="xsd:string"/>
      <xsd:element name="FmlaPict" type="xsd:string"/>
      <xsd:element name="NoThreeD" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="FirstButton" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="FmlaGroup" type="xsd:string"/>
      <xsd:element name="Val" type="xsd:integer"/>
      <xsd:element name="Min" type="xsd:integer"/>
      <xsd:element name="Max" type="xsd:integer"/>
      <xsd:element name="Inc" type="xsd:integer"/>
      <xsd:element name="Page" type="xsd:integer"/>
      <xsd:element name="Horiz" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="Dx" type="xsd:integer"/>
      <xsd:element name="MapOCX" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="CF" type="ST_CF"/>
      <xsd:element name="Camera" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="RecalcAlways" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="AutoScale" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="DDE" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="UIObj" type="s:ST_TrueFalseBlank"/>
      <xsd:element name="ScriptText" type="xsd:string"/>
      <xsd:element name="ScriptExtended" type="xsd:string"/>
      <xsd:element name="ScriptLanguage" type="xsd:nonNegativeInteger"/>
      <xsd:element name="ScriptLocation" type="xsd:nonNegativeInteger"/>
      <xsd:element name="FmlaTxbx" type="xsd:string"/>
    </xsd:choice>
    <xsd:attribute name="ObjectType" type="ST_ObjectType" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_CF">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_ObjectType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="Button"/>
      <xsd:enumeration value="Checkbox"/>
      <xsd:enumeration value="Dialog"/>
      <xsd:enumeration value="Drop"/>
      <xsd:enumeration value="Edit"/>
      <xsd:enumeration value="GBox"/>
      <xsd:enumeration value="Label"/>
      <xsd:enumeration value="LineA"/>
      <xsd:enumeration value="List"/>
      <xsd:enumeration value="Movie"/>
      <xsd:enumeration value="Note"/>
      <xsd:enumeration value="Pict"/>
      <xsd:enumeration value="Radio"/>
      <xsd:enumeration value="RectA"/>
      <xsd:enumeration value="Scroll"/>
      <xsd:enumeration value="Spin"/>
      <xsd:enumeration value="Shape"/>
      <xsd:enumeration value="Group"/>
      <xsd:enumeration value="Rect"/>
    </xsd:restriction>
  </xsd:simpleType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns="urn:schemas-microsoft-com:office:word"
  targetNamespace="urn:schemas-microsoft-com:office:word" elementFormDefault="qualified"
  attributeFormDefault="unqualified">
  <xsd:element name="bordertop" type="CT_Border"/>
  <xsd:element name="borderleft" type="CT_Border"/>
  <xsd:element name="borderright" type="CT_Border"/>
  <xsd:element name="borderbottom" type="CT_Border"/>
  <xsd:complexType name="CT_Border">
    <xsd:attribute name="type" type="ST_BorderType" use="optional"/>
    <xsd:attribute name="width" type="xsd:positiveInteger" use="optional"/>
    <xsd:attribute name="shadow" type="ST_BorderShadow" use="optional"/>
  </xsd:complexType>
  <xsd:element name="wrap" type="CT_Wrap"/>
  <xsd:complexType name="CT_Wrap">
    <xsd:attribute name="type" type="ST_WrapType" use="optional"/>
    <xsd:attribute name="side" type="ST_WrapSide" use="optional"/>
    <xsd:attribute name="anchorx" type="ST_HorizontalAnchor" use="optional"/>
    <xsd:attribute name="anchory" type="ST_VerticalAnchor" use="optional"/>
  </xsd:complexType>
  <xsd:element name="anchorlock" type="CT_AnchorLock"/>
  <xsd:complexType name="CT_AnchorLock"/>
  <xsd:simpleType name="ST_BorderType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="single"/>
      <xsd:enumeration value="thick"/>
      <xsd:enumeration value="double"/>
      <xsd:enumeration value="hairline"/>
      <xsd:enumeration value="dot"/>
      <xsd:enumeration value="dash"/>
      <xsd:enumeration value="dotDash"/>
      <xsd:enumeration value="dashDotDot"/>
      <xsd:enumeration value="triple"/>
      <xsd:enumeration value="thinThickSmall"/>
      <xsd:enumeration value="thickThinSmall"/>
      <xsd:enumeration value="thickBetweenThinSmall"/>
      <xsd:enumeration value="thinThick"/>
      <xsd:enumeration value="thickThin"/>
      <xsd:enumeration value="thickBetweenThin"/>
      <xsd:enumeration value="thinThickLarge"/>
      <xsd:enumeration value="thickThinLarge"/>
      <xsd:enumeration value="thickBetweenThinLarge"/>
      <xsd:enumeration value="wave"/>
      <xsd:enumeration value="doubleWave"/>
      <xsd:enumeration value="dashedSmall"/>
      <xsd:enumeration value="dashDotStroked"/>
      <xsd:enumeration value="threeDEmboss"/>
      <xsd:enumeration value="threeDEngrave"/>
      <xsd:enumeration value="HTMLOutset"/>
      <xsd:enumeration value="HTMLInset"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_BorderShadow">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="t"/>
      <xsd:enumeration value="true"/>
      <xsd:enumeration value="f"/>
      <xsd:enumeration value="false"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_WrapType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="topAndBottom"/>
      <xsd:enumeration value="square"/>
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="tight"/>
      <xsd:enumeration value="through"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_WrapSide">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="both"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="largest"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HorizontalAnchor">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="margin"/>
      <xsd:enumeration value="page"/>
      <xsd:enumeration value="text"/>
      <xsd:enumeration value="char"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_VerticalAnchor">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="margin"/>
      <xsd:enumeration value="page"/>
      <xsd:enumeration value="text"/>
      <xsd:enumeration value="line"/>
    </xsd:restriction>
  </xsd:simpleType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
  xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
  xmlns:sl="http://schemas.openxmlformats.org/schemaLibrary/2006/main"
  xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
  xmlns="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
  xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
  xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
  elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all"
  targetNamespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
  <xsd:import namespace="http://schemas.openxmlformats.org/markup-compatibility/2006" schemaLocation="../mce/mc.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
    schemaLocation="dml-wordprocessingDrawing.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/math"
    schemaLocation="shared-math.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    schemaLocation="shared-relationshipReference.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
    schemaLocation="shared-commonSimpleTypes.xsd"/>
  <xsd:import namespace="http://schemas.openxmlformats.org/schemaLibrary/2006/main"
    schemaLocation="shared-customXmlSchemaProperties.xsd"/>
  <xsd:import namespace="http://www.w3.org/XML/1998/namespace"/>
  <xsd:complexType name="CT_Empty"/>
  <xsd:complexType name="CT_OnOff">
    <xsd:attribute name="val" type="s:ST_OnOff"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_LongHexNumber">
    <xsd:restriction base="xsd:hexBinary">
      <xsd:length value="4"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LongHexNumber">
    <xsd:attribute name="val" type="ST_LongHexNumber" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ShortHexNumber">
    <xsd:restriction base="xsd:hexBinary">
      <xsd:length value="2"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_UcharHexNumber">
    <xsd:restriction base="xsd:hexBinary">
      <xsd:length value="1"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Charset">
    <xsd:attribute name="val" type="ST_UcharHexNumber" use="optional"/>
    <xsd:attribute name="characterSet" type="s:ST_String" use="optional" default="ISO-8859-1"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DecimalNumberOrPercent">
    <xsd:union memberTypes="ST_UnqualifiedPercentage s:ST_Percentage"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_UnqualifiedPercentage">
    <xsd:restriction base="xsd:decimal"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DecimalNumber">
    <xsd:restriction base="xsd:integer"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_DecimalNumber">
    <xsd:attribute name="val" type="ST_DecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_UnsignedDecimalNumber">
    <xsd:attribute name="val" type="s:ST_UnsignedDecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DecimalNumberOrPrecent">
    <xsd:attribute name="val" type="ST_DecimalNumberOrPercent" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TwipsMeasure">
    <xsd:attribute name="val" type="s:ST_TwipsMeasure" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SignedTwipsMeasure">
    <xsd:union memberTypes="xsd:integer s:ST_UniversalMeasure"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_SignedTwipsMeasure">
    <xsd:attribute name="val" type="ST_SignedTwipsMeasure" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PixelsMeasure">
    <xsd:restriction base="s:ST_UnsignedDecimalNumber"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_PixelsMeasure">
    <xsd:attribute name="val" type="ST_PixelsMeasure" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_HpsMeasure">
    <xsd:union memberTypes="s:ST_UnsignedDecimalNumber s:ST_PositiveUniversalMeasure"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_HpsMeasure">
    <xsd:attribute name="val" type="ST_HpsMeasure" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SignedHpsMeasure">
    <xsd:union memberTypes="xsd:integer s:ST_UniversalMeasure"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_SignedHpsMeasure">
    <xsd:attribute name="val" type="ST_SignedHpsMeasure" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DateTime">
    <xsd:restriction base="xsd:dateTime"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_MacroName">
    <xsd:restriction base="xsd:string">
      <xsd:maxLength value="33"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MacroName">
    <xsd:attribute name="val" use="required" type="ST_MacroName"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_EighthPointMeasure">
    <xsd:restriction base="s:ST_UnsignedDecimalNumber"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PointMeasure">
    <xsd:restriction base="s:ST_UnsignedDecimalNumber"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_String">
    <xsd:attribute name="val" type="s:ST_String" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextScale">
    <xsd:union memberTypes="ST_TextScalePercent ST_TextScaleDecimal"/>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextScalePercent">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="0*(600|([0-5]?[0-9]?[0-9]))%"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TextScaleDecimal">
    <xsd:restriction base="xsd:integer">
      <xsd:minInclusive value="0"/>
      <xsd:maxInclusive value="600"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextScale">
    <xsd:attribute name="val" type="ST_TextScale"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_HighlightColor">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="black"/>
      <xsd:enumeration value="blue"/>
      <xsd:enumeration value="cyan"/>
      <xsd:enumeration value="green"/>
      <xsd:enumeration value="magenta"/>
      <xsd:enumeration value="red"/>
      <xsd:enumeration value="yellow"/>
      <xsd:enumeration value="white"/>
      <xsd:enumeration value="darkBlue"/>
      <xsd:enumeration value="darkCyan"/>
      <xsd:enumeration value="darkGreen"/>
      <xsd:enumeration value="darkMagenta"/>
      <xsd:enumeration value="darkRed"/>
      <xsd:enumeration value="darkYellow"/>
      <xsd:enumeration value="darkGray"/>
      <xsd:enumeration value="lightGray"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Highlight">
    <xsd:attribute name="val" type="ST_HighlightColor" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_HexColorAuto">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="auto"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HexColor">
    <xsd:union memberTypes="ST_HexColorAuto s:ST_HexColorRGB"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_Color">
    <xsd:attribute name="val" type="ST_HexColor" use="required"/>
    <xsd:attribute name="themeColor" type="ST_ThemeColor" use="optional"/>
    <xsd:attribute name="themeTint" type="ST_UcharHexNumber" use="optional"/>
    <xsd:attribute name="themeShade" type="ST_UcharHexNumber" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Lang">
    <xsd:attribute name="val" type="s:ST_Lang" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Guid">
    <xsd:attribute name="val" type="s:ST_Guid"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Underline">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="single"/>
      <xsd:enumeration value="words"/>
      <xsd:enumeration value="double"/>
      <xsd:enumeration value="thick"/>
      <xsd:enumeration value="dotted"/>
      <xsd:enumeration value="dottedHeavy"/>
      <xsd:enumeration value="dash"/>
      <xsd:enumeration value="dashedHeavy"/>
      <xsd:enumeration value="dashLong"/>
      <xsd:enumeration value="dashLongHeavy"/>
      <xsd:enumeration value="dotDash"/>
      <xsd:enumeration value="dashDotHeavy"/>
      <xsd:enumeration value="dotDotDash"/>
      <xsd:enumeration value="dashDotDotHeavy"/>
      <xsd:enumeration value="wave"/>
      <xsd:enumeration value="wavyHeavy"/>
      <xsd:enumeration value="wavyDouble"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Underline">
    <xsd:attribute name="val" type="ST_Underline" use="optional"/>
    <xsd:attribute name="color" type="ST_HexColor" use="optional" default="auto"/>
    <xsd:attribute name="themeColor" type="ST_ThemeColor" use="optional"/>
    <xsd:attribute name="themeTint" type="ST_UcharHexNumber" use="optional"/>
    <xsd:attribute name="themeShade" type="ST_UcharHexNumber" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextEffect">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="blinkBackground"/>
      <xsd:enumeration value="lights"/>
      <xsd:enumeration value="antsBlack"/>
      <xsd:enumeration value="antsRed"/>
      <xsd:enumeration value="shimmer"/>
      <xsd:enumeration value="sparkle"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextEffect">
    <xsd:attribute name="val" type="ST_TextEffect" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Border">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="nil"/>
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="single"/>
      <xsd:enumeration value="thick"/>
      <xsd:enumeration value="double"/>
      <xsd:enumeration value="dotted"/>
      <xsd:enumeration value="dashed"/>
      <xsd:enumeration value="dotDash"/>
      <xsd:enumeration value="dotDotDash"/>
      <xsd:enumeration value="triple"/>
      <xsd:enumeration value="thinThickSmallGap"/>
      <xsd:enumeration value="thickThinSmallGap"/>
      <xsd:enumeration value="thinThickThinSmallGap"/>
      <xsd:enumeration value="thinThickMediumGap"/>
      <xsd:enumeration value="thickThinMediumGap"/>
      <xsd:enumeration value="thinThickThinMediumGap"/>
      <xsd:enumeration value="thinThickLargeGap"/>
      <xsd:enumeration value="thickThinLargeGap"/>
      <xsd:enumeration value="thinThickThinLargeGap"/>
      <xsd:enumeration value="wave"/>
      <xsd:enumeration value="doubleWave"/>
      <xsd:enumeration value="dashSmallGap"/>
      <xsd:enumeration value="dashDotStroked"/>
      <xsd:enumeration value="threeDEmboss"/>
      <xsd:enumeration value="threeDEngrave"/>
      <xsd:enumeration value="outset"/>
      <xsd:enumeration value="inset"/>
      <xsd:enumeration value="apples"/>
      <xsd:enumeration value="archedScallops"/>
      <xsd:enumeration value="babyPacifier"/>
      <xsd:enumeration value="babyRattle"/>
      <xsd:enumeration value="balloons3Colors"/>
      <xsd:enumeration value="balloonsHotAir"/>
      <xsd:enumeration value="basicBlackDashes"/>
      <xsd:enumeration value="basicBlackDots"/>
      <xsd:enumeration value="basicBlackSquares"/>
      <xsd:enumeration value="basicThinLines"/>
      <xsd:enumeration value="basicWhiteDashes"/>
      <xsd:enumeration value="basicWhiteDots"/>
      <xsd:enumeration value="basicWhiteSquares"/>
      <xsd:enumeration value="basicWideInline"/>
      <xsd:enumeration value="basicWideMidline"/>
      <xsd:enumeration value="basicWideOutline"/>
      <xsd:enumeration value="bats"/>
      <xsd:enumeration value="birds"/>
      <xsd:enumeration value="birdsFlight"/>
      <xsd:enumeration value="cabins"/>
      <xsd:enumeration value="cakeSlice"/>
      <xsd:enumeration value="candyCorn"/>
      <xsd:enumeration value="celticKnotwork"/>
      <xsd:enumeration value="certificateBanner"/>
      <xsd:enumeration value="chainLink"/>
      <xsd:enumeration value="champagneBottle"/>
      <xsd:enumeration value="checkedBarBlack"/>
      <xsd:enumeration value="checkedBarColor"/>
      <xsd:enumeration value="checkered"/>
      <xsd:enumeration value="christmasTree"/>
      <xsd:enumeration value="circlesLines"/>
      <xsd:enumeration value="circlesRectangles"/>
      <xsd:enumeration value="classicalWave"/>
      <xsd:enumeration value="clocks"/>
      <xsd:enumeration value="compass"/>
      <xsd:enumeration value="confetti"/>
      <xsd:enumeration value="confettiGrays"/>
      <xsd:enumeration value="confettiOutline"/>
      <xsd:enumeration value="confettiStreamers"/>
      <xsd:enumeration value="confettiWhite"/>
      <xsd:enumeration value="cornerTriangles"/>
      <xsd:enumeration value="couponCutoutDashes"/>
      <xsd:enumeration value="couponCutoutDots"/>
      <xsd:enumeration value="crazyMaze"/>
      <xsd:enumeration value="creaturesButterfly"/>
      <xsd:enumeration value="creaturesFish"/>
      <xsd:enumeration value="creaturesInsects"/>
      <xsd:enumeration value="creaturesLadyBug"/>
      <xsd:enumeration value="crossStitch"/>
      <xsd:enumeration value="cup"/>
      <xsd:enumeration value="decoArch"/>
      <xsd:enumeration value="decoArchColor"/>
      <xsd:enumeration value="decoBlocks"/>
      <xsd:enumeration value="diamondsGray"/>
      <xsd:enumeration value="doubleD"/>
      <xsd:enumeration value="doubleDiamonds"/>
      <xsd:enumeration value="earth1"/>
      <xsd:enumeration value="earth2"/>
      <xsd:enumeration value="earth3"/>
      <xsd:enumeration value="eclipsingSquares1"/>
      <xsd:enumeration value="eclipsingSquares2"/>
      <xsd:enumeration value="eggsBlack"/>
      <xsd:enumeration value="fans"/>
      <xsd:enumeration value="film"/>
      <xsd:enumeration value="firecrackers"/>
      <xsd:enumeration value="flowersBlockPrint"/>
      <xsd:enumeration value="flowersDaisies"/>
      <xsd:enumeration value="flowersModern1"/>
      <xsd:enumeration value="flowersModern2"/>
      <xsd:enumeration value="flowersPansy"/>
      <xsd:enumeration value="flowersRedRose"/>
      <xsd:enumeration value="flowersRoses"/>
      <xsd:enumeration value="flowersTeacup"/>
      <xsd:enumeration value="flowersTiny"/>
      <xsd:enumeration value="gems"/>
      <xsd:enumeration value="gingerbreadMan"/>
      <xsd:enumeration value="gradient"/>
      <xsd:enumeration value="handmade1"/>
      <xsd:enumeration value="handmade2"/>
      <xsd:enumeration value="heartBalloon"/>
      <xsd:enumeration value="heartGray"/>
      <xsd:enumeration value="hearts"/>
      <xsd:enumeration value="heebieJeebies"/>
      <xsd:enumeration value="holly"/>
      <xsd:enumeration value="houseFunky"/>
      <xsd:enumeration value="hypnotic"/>
      <xsd:enumeration value="iceCreamCones"/>
      <xsd:enumeration value="lightBulb"/>
      <xsd:enumeration value="lightning1"/>
      <xsd:enumeration value="lightning2"/>
      <xsd:enumeration value="mapPins"/>
      <xsd:enumeration value="mapleLeaf"/>
      <xsd:enumeration value="mapleMuffins"/>
      <xsd:enumeration value="marquee"/>
      <xsd:enumeration value="marqueeToothed"/>
      <xsd:enumeration value="moons"/>
      <xsd:enumeration value="mosaic"/>
      <xsd:enumeration value="musicNotes"/>
      <xsd:enumeration value="northwest"/>
      <xsd:enumeration value="ovals"/>
      <xsd:enumeration value="packages"/>
      <xsd:enumeration value="palmsBlack"/>
      <xsd:enumeration value="palmsColor"/>
      <xsd:enumeration value="paperClips"/>
      <xsd:enumeration value="papyrus"/>
      <xsd:enumeration value="partyFavor"/>
      <xsd:enumeration value="partyGlass"/>
      <xsd:enumeration value="pencils"/>
      <xsd:enumeration value="people"/>
      <xsd:enumeration value="peopleWaving"/>
      <xsd:enumeration value="peopleHats"/>
      <xsd:enumeration value="poinsettias"/>
      <xsd:enumeration value="postageStamp"/>
      <xsd:enumeration value="pumpkin1"/>
      <xsd:enumeration value="pushPinNote2"/>
      <xsd:enumeration value="pushPinNote1"/>
      <xsd:enumeration value="pyramids"/>
      <xsd:enumeration value="pyramidsAbove"/>
      <xsd:enumeration value="quadrants"/>
      <xsd:enumeration value="rings"/>
      <xsd:enumeration value="safari"/>
      <xsd:enumeration value="sawtooth"/>
      <xsd:enumeration value="sawtoothGray"/>
      <xsd:enumeration value="scaredCat"/>
      <xsd:enumeration value="seattle"/>
      <xsd:enumeration value="shadowedSquares"/>
      <xsd:enumeration value="sharksTeeth"/>
      <xsd:enumeration value="shorebirdTracks"/>
      <xsd:enumeration value="skyrocket"/>
      <xsd:enumeration value="snowflakeFancy"/>
      <xsd:enumeration value="snowflakes"/>
      <xsd:enumeration value="sombrero"/>
      <xsd:enumeration value="southwest"/>
      <xsd:enumeration value="stars"/>
      <xsd:enumeration value="starsTop"/>
      <xsd:enumeration value="stars3d"/>
      <xsd:enumeration value="starsBlack"/>
      <xsd:enumeration value="starsShadowed"/>
      <xsd:enumeration value="sun"/>
      <xsd:enumeration value="swirligig"/>
      <xsd:enumeration value="tornPaper"/>
      <xsd:enumeration value="tornPaperBlack"/>
      <xsd:enumeration value="trees"/>
      <xsd:enumeration value="triangleParty"/>
      <xsd:enumeration value="triangles"/>
      <xsd:enumeration value="triangle1"/>
      <xsd:enumeration value="triangle2"/>
      <xsd:enumeration value="triangleCircle1"/>
      <xsd:enumeration value="triangleCircle2"/>
      <xsd:enumeration value="shapes1"/>
      <xsd:enumeration value="shapes2"/>
      <xsd:enumeration value="twistedLines1"/>
      <xsd:enumeration value="twistedLines2"/>
      <xsd:enumeration value="vine"/>
      <xsd:enumeration value="waveline"/>
      <xsd:enumeration value="weavingAngles"/>
      <xsd:enumeration value="weavingBraid"/>
      <xsd:enumeration value="weavingRibbon"/>
      <xsd:enumeration value="weavingStrips"/>
      <xsd:enumeration value="whiteFlowers"/>
      <xsd:enumeration value="woodwork"/>
      <xsd:enumeration value="xIllusions"/>
      <xsd:enumeration value="zanyTriangles"/>
      <xsd:enumeration value="zigZag"/>
      <xsd:enumeration value="zigZagStitch"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Border">
    <xsd:attribute name="val" type="ST_Border" use="required"/>
    <xsd:attribute name="color" type="ST_HexColor" use="optional" default="auto"/>
    <xsd:attribute name="themeColor" type="ST_ThemeColor" use="optional"/>
    <xsd:attribute name="themeTint" type="ST_UcharHexNumber" use="optional"/>
    <xsd:attribute name="themeShade" type="ST_UcharHexNumber" use="optional"/>
    <xsd:attribute name="sz" type="ST_EighthPointMeasure" use="optional"/>
    <xsd:attribute name="space" type="ST_PointMeasure" use="optional" default="0"/>
    <xsd:attribute name="shadow" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="frame" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Shd">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="nil"/>
      <xsd:enumeration value="clear"/>
      <xsd:enumeration value="solid"/>
      <xsd:enumeration value="horzStripe"/>
      <xsd:enumeration value="vertStripe"/>
      <xsd:enumeration value="reverseDiagStripe"/>
      <xsd:enumeration value="diagStripe"/>
      <xsd:enumeration value="horzCross"/>
      <xsd:enumeration value="diagCross"/>
      <xsd:enumeration value="thinHorzStripe"/>
      <xsd:enumeration value="thinVertStripe"/>
      <xsd:enumeration value="thinReverseDiagStripe"/>
      <xsd:enumeration value="thinDiagStripe"/>
      <xsd:enumeration value="thinHorzCross"/>
      <xsd:enumeration value="thinDiagCross"/>
      <xsd:enumeration value="pct5"/>
      <xsd:enumeration value="pct10"/>
      <xsd:enumeration value="pct12"/>
      <xsd:enumeration value="pct15"/>
      <xsd:enumeration value="pct20"/>
      <xsd:enumeration value="pct25"/>
      <xsd:enumeration value="pct30"/>
      <xsd:enumeration value="pct35"/>
      <xsd:enumeration value="pct37"/>
      <xsd:enumeration value="pct40"/>
      <xsd:enumeration value="pct45"/>
      <xsd:enumeration value="pct50"/>
      <xsd:enumeration value="pct55"/>
      <xsd:enumeration value="pct60"/>
      <xsd:enumeration value="pct62"/>
      <xsd:enumeration value="pct65"/>
      <xsd:enumeration value="pct70"/>
      <xsd:enumeration value="pct75"/>
      <xsd:enumeration value="pct80"/>
      <xsd:enumeration value="pct85"/>
      <xsd:enumeration value="pct87"/>
      <xsd:enumeration value="pct90"/>
      <xsd:enumeration value="pct95"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Shd">
    <xsd:attribute name="val" type="ST_Shd" use="required"/>
    <xsd:attribute name="color" type="ST_HexColor" use="optional"/>
    <xsd:attribute name="themeColor" type="ST_ThemeColor" use="optional"/>
    <xsd:attribute name="themeTint" type="ST_UcharHexNumber" use="optional"/>
    <xsd:attribute name="themeShade" type="ST_UcharHexNumber" use="optional"/>
    <xsd:attribute name="fill" type="ST_HexColor" use="optional"/>
    <xsd:attribute name="themeFill" type="ST_ThemeColor" use="optional"/>
    <xsd:attribute name="themeFillTint" type="ST_UcharHexNumber" use="optional"/>
    <xsd:attribute name="themeFillShade" type="ST_UcharHexNumber" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_VerticalAlignRun">
    <xsd:attribute name="val" type="s:ST_VerticalAlignRun" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FitText">
    <xsd:attribute name="val" type="s:ST_TwipsMeasure" use="required"/>
    <xsd:attribute name="id" type="ST_DecimalNumber" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Em">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="dot"/>
      <xsd:enumeration value="comma"/>
      <xsd:enumeration value="circle"/>
      <xsd:enumeration value="underDot"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Em">
    <xsd:attribute name="val" type="ST_Em" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Language">
    <xsd:attribute name="val" type="s:ST_Lang" use="optional"/>
    <xsd:attribute name="eastAsia" type="s:ST_Lang" use="optional"/>
    <xsd:attribute name="bidi" type="s:ST_Lang" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_CombineBrackets">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="round"/>
      <xsd:enumeration value="square"/>
      <xsd:enumeration value="angle"/>
      <xsd:enumeration value="curly"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_EastAsianLayout">
    <xsd:attribute name="id" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="combine" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="combineBrackets" type="ST_CombineBrackets" use="optional"/>
    <xsd:attribute name="vert" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="vertCompress" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_HeightRule">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="exact"/>
      <xsd:enumeration value="atLeast"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Wrap">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="notBeside"/>
      <xsd:enumeration value="around"/>
      <xsd:enumeration value="tight"/>
      <xsd:enumeration value="through"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_VAnchor">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="text"/>
      <xsd:enumeration value="margin"/>
      <xsd:enumeration value="page"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_HAnchor">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="text"/>
      <xsd:enumeration value="margin"/>
      <xsd:enumeration value="page"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DropCap">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="drop"/>
      <xsd:enumeration value="margin"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FramePr">
    <xsd:attribute name="dropCap" type="ST_DropCap" use="optional"/>
    <xsd:attribute name="lines" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="w" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="h" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="vSpace" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="hSpace" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="wrap" type="ST_Wrap" use="optional"/>
    <xsd:attribute name="hAnchor" type="ST_HAnchor" use="optional"/>
    <xsd:attribute name="vAnchor" type="ST_VAnchor" use="optional"/>
    <xsd:attribute name="x" type="ST_SignedTwipsMeasure" use="optional"/>
    <xsd:attribute name="xAlign" type="s:ST_XAlign" use="optional"/>
    <xsd:attribute name="y" type="ST_SignedTwipsMeasure" use="optional"/>
    <xsd:attribute name="yAlign" type="s:ST_YAlign" use="optional"/>
    <xsd:attribute name="hRule" type="ST_HeightRule" use="optional"/>
    <xsd:attribute name="anchorLock" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TabJc">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="clear"/>
      <xsd:enumeration value="start"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="end"/>
      <xsd:enumeration value="decimal"/>
      <xsd:enumeration value="bar"/>
      <xsd:enumeration value="num"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_TabTlc">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="dot"/>
      <xsd:enumeration value="hyphen"/>
      <xsd:enumeration value="underscore"/>
      <xsd:enumeration value="heavy"/>
      <xsd:enumeration value="middleDot"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TabStop">
    <xsd:attribute name="val" type="ST_TabJc" use="required"/>
    <xsd:attribute name="leader" type="ST_TabTlc" use="optional"/>
    <xsd:attribute name="pos" type="ST_SignedTwipsMeasure" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_LineSpacingRule">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="auto"/>
      <xsd:enumeration value="exact"/>
      <xsd:enumeration value="atLeast"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Spacing">
    <xsd:attribute name="before" type="s:ST_TwipsMeasure" use="optional" default="0"/>
    <xsd:attribute name="beforeLines" type="ST_DecimalNumber" use="optional" default="0"/>
    <xsd:attribute name="beforeAutospacing" type="s:ST_OnOff" use="optional" default="off"/>
    <xsd:attribute name="after" type="s:ST_TwipsMeasure" use="optional" default="0"/>
    <xsd:attribute name="afterLines" type="ST_DecimalNumber" use="optional" default="0"/>
    <xsd:attribute name="afterAutospacing" type="s:ST_OnOff" use="optional" default="off"/>
    <xsd:attribute name="line" type="ST_SignedTwipsMeasure" use="optional" default="0"/>
    <xsd:attribute name="lineRule" type="ST_LineSpacingRule" use="optional" default="auto"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Ind">
    <xsd:attribute name="start" type="ST_SignedTwipsMeasure" use="optional"/>
    <xsd:attribute name="startChars" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="end" type="ST_SignedTwipsMeasure" use="optional"/>
    <xsd:attribute name="endChars" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="left" type="ST_SignedTwipsMeasure" use="optional"/>
    <xsd:attribute name="leftChars" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="right" type="ST_SignedTwipsMeasure" use="optional"/>
    <xsd:attribute name="rightChars" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="hanging" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="hangingChars" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="firstLine" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="firstLineChars" type="ST_DecimalNumber" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Jc">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="start"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="end"/>
      <xsd:enumeration value="both"/>
      <xsd:enumeration value="mediumKashida"/>
      <xsd:enumeration value="distribute"/>
      <xsd:enumeration value="numTab"/>
      <xsd:enumeration value="highKashida"/>
      <xsd:enumeration value="lowKashida"/>
      <xsd:enumeration value="thaiDistribute"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_JcTable">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="end"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="start"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Jc">
    <xsd:attribute name="val" type="ST_Jc" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_JcTable">
    <xsd:attribute name="val" type="ST_JcTable" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_View">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="print"/>
      <xsd:enumeration value="outline"/>
      <xsd:enumeration value="masterPages"/>
      <xsd:enumeration value="normal"/>
      <xsd:enumeration value="web"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_View">
    <xsd:attribute name="val" type="ST_View" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Zoom">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="fullPage"/>
      <xsd:enumeration value="bestFit"/>
      <xsd:enumeration value="textFit"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Zoom">
    <xsd:attribute name="val" type="ST_Zoom" use="optional"/>
    <xsd:attribute name="percent" type="ST_DecimalNumberOrPercent" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WritingStyle">
    <xsd:attribute name="lang" type="s:ST_Lang" use="required"/>
    <xsd:attribute name="vendorID" type="s:ST_String" use="required"/>
    <xsd:attribute name="dllVersion" type="s:ST_String" use="required"/>
    <xsd:attribute name="nlCheck" type="s:ST_OnOff" use="optional" default="off"/>
    <xsd:attribute name="checkStyle" type="s:ST_OnOff" use="required"/>
    <xsd:attribute name="appName" type="s:ST_String" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Proof">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="clean"/>
      <xsd:enumeration value="dirty"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Proof">
    <xsd:attribute name="spelling" type="ST_Proof" use="optional"/>
    <xsd:attribute name="grammar" type="ST_Proof" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DocType">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_DocType">
    <xsd:attribute name="val" type="ST_DocType" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DocProtect">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="readOnly"/>
      <xsd:enumeration value="comments"/>
      <xsd:enumeration value="trackedChanges"/>
      <xsd:enumeration value="forms"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:attributeGroup name="AG_Password">
    <xsd:attribute name="algorithmName" type="s:ST_String" use="optional"/>
    <xsd:attribute name="hashValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="saltValue" type="xsd:base64Binary" use="optional"/>
    <xsd:attribute name="spinCount" type="ST_DecimalNumber" use="optional"/>
  </xsd:attributeGroup>
  <xsd:attributeGroup name="AG_TransitionalPassword">
    <xsd:attribute name="cryptProviderType" type="s:ST_CryptProv"/>
    <xsd:attribute name="cryptAlgorithmClass" type="s:ST_AlgClass"/>
    <xsd:attribute name="cryptAlgorithmType" type="s:ST_AlgType"/>
    <xsd:attribute name="cryptAlgorithmSid" type="ST_DecimalNumber"/>
    <xsd:attribute name="cryptSpinCount" type="ST_DecimalNumber"/>
    <xsd:attribute name="cryptProvider" type="s:ST_String"/>
    <xsd:attribute name="algIdExt" type="ST_LongHexNumber"/>
    <xsd:attribute name="algIdExtSource" type="s:ST_String"/>
    <xsd:attribute name="cryptProviderTypeExt" type="ST_LongHexNumber"/>
    <xsd:attribute name="cryptProviderTypeExtSource" type="s:ST_String"/>
    <xsd:attribute name="hash" type="xsd:base64Binary"/>
    <xsd:attribute name="salt" type="xsd:base64Binary"/>
  </xsd:attributeGroup>
  <xsd:complexType name="CT_DocProtect">
    <xsd:attribute name="edit" type="ST_DocProtect" use="optional"/>
    <xsd:attribute name="formatting" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="enforcement" type="s:ST_OnOff"/>
    <xsd:attributeGroup ref="AG_Password"/>
    <xsd:attributeGroup ref="AG_TransitionalPassword"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MailMergeDocType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="catalog"/>
      <xsd:enumeration value="envelopes"/>
      <xsd:enumeration value="mailingLabels"/>
      <xsd:enumeration value="formLetters"/>
      <xsd:enumeration value="email"/>
      <xsd:enumeration value="fax"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MailMergeDocType">
    <xsd:attribute name="val" type="ST_MailMergeDocType" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MailMergeDataType">
    <xsd:restriction base="xsd:string"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_MailMergeDataType">
    <xsd:attribute name="val" type="ST_MailMergeDataType" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MailMergeDest">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="newDocument"/>
      <xsd:enumeration value="printer"/>
      <xsd:enumeration value="email"/>
      <xsd:enumeration value="fax"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MailMergeDest">
    <xsd:attribute name="val" type="ST_MailMergeDest" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MailMergeOdsoFMDFieldType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="null"/>
      <xsd:enumeration value="dbColumn"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MailMergeOdsoFMDFieldType">
    <xsd:attribute name="val" type="ST_MailMergeOdsoFMDFieldType" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TrackChangesView">
    <xsd:attribute name="markup" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="comments" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="insDel" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="formatting" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="inkAnnotations" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Kinsoku">
    <xsd:attribute name="lang" type="s:ST_Lang" use="required"/>
    <xsd:attribute name="val" type="s:ST_String" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextDirection">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="tb"/>
      <xsd:enumeration value="rl"/>
      <xsd:enumeration value="lr"/>
      <xsd:enumeration value="tbV"/>
      <xsd:enumeration value="rlV"/>
      <xsd:enumeration value="lrV"/>
      <xsd:enumeration value="btLr"/>
      <xsd:enumeration value="lrTb"/>
      <xsd:enumeration value="lrTbV"/>
      <xsd:enumeration value="tbLrV"/>
      <xsd:enumeration value="tbRl"/>
      <xsd:enumeration value="tbRlV"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextDirection">
    <xsd:attribute name="val" type="ST_TextDirection" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextAlignment">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="top"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="baseline"/>
      <xsd:enumeration value="bottom"/>
      <xsd:enumeration value="auto"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextAlignment">
    <xsd:attribute name="val" type="ST_TextAlignment" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DisplacedByCustomXml">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="next"/>
      <xsd:enumeration value="prev"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_AnnotationVMerge">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="cont"/>
      <xsd:enumeration value="rest"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Markup">
    <xsd:attribute name="id" type="ST_DecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TrackChange">
    <xsd:complexContent>
      <xsd:extension base="CT_Markup">
        <xsd:attribute name="author" type="s:ST_String" use="required"/>
        <xsd:attribute name="date" type="ST_DateTime" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_CellMergeTrackChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:attribute name="vMerge" type="ST_AnnotationVMerge" use="optional"/>
        <xsd:attribute name="vMergeOrig" type="ST_AnnotationVMerge" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TrackChangeRange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:attribute name="displacedByCustomXml" type="ST_DisplacedByCustomXml" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_MarkupRange">
    <xsd:complexContent>
      <xsd:extension base="CT_Markup">
        <xsd:attribute name="displacedByCustomXml" type="ST_DisplacedByCustomXml" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_BookmarkRange">
    <xsd:complexContent>
      <xsd:extension base="CT_MarkupRange">
        <xsd:attribute name="colFirst" type="ST_DecimalNumber" use="optional"/>
        <xsd:attribute name="colLast" type="ST_DecimalNumber" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_Bookmark">
    <xsd:complexContent>
      <xsd:extension base="CT_BookmarkRange">
        <xsd:attribute name="name" type="s:ST_String" use="required"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_MoveBookmark">
    <xsd:complexContent>
      <xsd:extension base="CT_Bookmark">
        <xsd:attribute name="author" type="s:ST_String" use="required"/>
        <xsd:attribute name="date" type="ST_DateTime" use="required"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_Comment">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:sequence>
          <xsd:group ref="EG_BlockLevelElts" minOccurs="0" maxOccurs="unbounded"/>
        </xsd:sequence>
        <xsd:attribute name="initials" type="s:ST_String" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TrackChangeNumbering">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:attribute name="original" type="s:ST_String" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TblPrExChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:sequence>
          <xsd:element name="tblPrEx" type="CT_TblPrExBase" minOccurs="1"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TcPrChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:sequence>
          <xsd:element name="tcPr" type="CT_TcPrInner" minOccurs="1"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TrPrChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:sequence>
          <xsd:element name="trPr" type="CT_TrPrBase" minOccurs="1"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TblGridChange">
    <xsd:complexContent>
      <xsd:extension base="CT_Markup">
        <xsd:sequence>
          <xsd:element name="tblGrid" type="CT_TblGridBase"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TblPrChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:sequence>
          <xsd:element name="tblPr" type="CT_TblPrBase"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_SectPrChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:sequence>
          <xsd:element name="sectPr" type="CT_SectPrBase" minOccurs="0"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_PPrChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:sequence>
          <xsd:element name="pPr" type="CT_PPrBase" minOccurs="1"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_RPrChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:sequence>
          <xsd:element name="rPr" type="CT_RPrOriginal" minOccurs="1"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_ParaRPrChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:sequence>
          <xsd:element name="rPr" type="CT_ParaRPrOriginal" minOccurs="1"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_RunTrackChange">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:choice minOccurs="0" maxOccurs="unbounded">
          <xsd:group ref="EG_ContentRunContent"/>
          <xsd:group ref="m:EG_OMathMathElements"/>
        </xsd:choice>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:group name="EG_PContentMath">
    <xsd:choice>
      <xsd:group ref="EG_PContentBase" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:group ref="EG_ContentRunContentBase" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_PContentBase">
    <xsd:choice>
      <xsd:element name="customXml" type="CT_CustomXmlRun"/>
      <xsd:element name="fldSimple" type="CT_SimpleField" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="hyperlink" type="CT_Hyperlink"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_ContentRunContentBase">
    <xsd:choice>
      <xsd:element name="smartTag" type="CT_SmartTagRun"/>
      <xsd:element name="sdt" type="CT_SdtRun"/>
      <xsd:group ref="EG_RunLevelElts" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_CellMarkupElements">
    <xsd:choice>
      <xsd:element name="cellIns" type="CT_TrackChange" minOccurs="0"/>
      <xsd:element name="cellDel" type="CT_TrackChange" minOccurs="0"/>
      <xsd:element name="cellMerge" type="CT_CellMergeTrackChange" minOccurs="0"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_RangeMarkupElements">
    <xsd:choice>
      <xsd:element name="bookmarkStart" type="CT_Bookmark"/>
      <xsd:element name="bookmarkEnd" type="CT_MarkupRange"/>
      <xsd:element name="moveFromRangeStart" type="CT_MoveBookmark"/>
      <xsd:element name="moveFromRangeEnd" type="CT_MarkupRange"/>
      <xsd:element name="moveToRangeStart" type="CT_MoveBookmark"/>
      <xsd:element name="moveToRangeEnd" type="CT_MarkupRange"/>
      <xsd:element name="commentRangeStart" type="CT_MarkupRange"/>
      <xsd:element name="commentRangeEnd" type="CT_MarkupRange"/>
      <xsd:element name="customXmlInsRangeStart" type="CT_TrackChange"/>
      <xsd:element name="customXmlInsRangeEnd" type="CT_Markup"/>
      <xsd:element name="customXmlDelRangeStart" type="CT_TrackChange"/>
      <xsd:element name="customXmlDelRangeEnd" type="CT_Markup"/>
      <xsd:element name="customXmlMoveFromRangeStart" type="CT_TrackChange"/>
      <xsd:element name="customXmlMoveFromRangeEnd" type="CT_Markup"/>
      <xsd:element name="customXmlMoveToRangeStart" type="CT_TrackChange"/>
      <xsd:element name="customXmlMoveToRangeEnd" type="CT_Markup"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_NumPr">
    <xsd:sequence>
      <xsd:element name="ilvl" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="numId" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="numberingChange" type="CT_TrackChangeNumbering" minOccurs="0"/>
      <xsd:element name="ins" type="CT_TrackChange" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PBdr">
    <xsd:sequence>
      <xsd:element name="top" type="CT_Border" minOccurs="0"/>
      <xsd:element name="left" type="CT_Border" minOccurs="0"/>
      <xsd:element name="bottom" type="CT_Border" minOccurs="0"/>
      <xsd:element name="right" type="CT_Border" minOccurs="0"/>
      <xsd:element name="between" type="CT_Border" minOccurs="0"/>
      <xsd:element name="bar" type="CT_Border" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Tabs">
    <xsd:sequence>
      <xsd:element name="tab" type="CT_TabStop" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TextboxTightWrap">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="allLines"/>
      <xsd:enumeration value="firstAndLastLine"/>
      <xsd:enumeration value="firstLineOnly"/>
      <xsd:enumeration value="lastLineOnly"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TextboxTightWrap">
    <xsd:attribute name="val" type="ST_TextboxTightWrap" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PPrBase">
    <xsd:sequence>
      <xsd:element name="pStyle" type="CT_String" minOccurs="0"/>
      <xsd:element name="keepNext" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="keepLines" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="pageBreakBefore" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="framePr" type="CT_FramePr" minOccurs="0"/>
      <xsd:element name="widowControl" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="numPr" type="CT_NumPr" minOccurs="0"/>
      <xsd:element name="suppressLineNumbers" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="pBdr" type="CT_PBdr" minOccurs="0"/>
      <xsd:element name="shd" type="CT_Shd" minOccurs="0"/>
      <xsd:element name="tabs" type="CT_Tabs" minOccurs="0"/>
      <xsd:element name="suppressAutoHyphens" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="kinsoku" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="wordWrap" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="overflowPunct" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="topLinePunct" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="autoSpaceDE" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="autoSpaceDN" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="bidi" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="adjustRightInd" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="snapToGrid" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="spacing" type="CT_Spacing" minOccurs="0"/>
      <xsd:element name="ind" type="CT_Ind" minOccurs="0"/>
      <xsd:element name="contextualSpacing" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="mirrorIndents" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="suppressOverlap" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="jc" type="CT_Jc" minOccurs="0"/>
      <xsd:element name="textDirection" type="CT_TextDirection" minOccurs="0"/>
      <xsd:element name="textAlignment" type="CT_TextAlignment" minOccurs="0"/>
      <xsd:element name="textboxTightWrap" type="CT_TextboxTightWrap" minOccurs="0"/>
      <xsd:element name="outlineLvl" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="divId" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="cnfStyle" type="CT_Cnf" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PPr">
    <xsd:complexContent>
      <xsd:extension base="CT_PPrBase">
        <xsd:sequence>
          <xsd:element name="rPr" type="CT_ParaRPr" minOccurs="0"/>
          <xsd:element name="sectPr" type="CT_SectPr" minOccurs="0"/>
          <xsd:element name="pPrChange" type="CT_PPrChange" minOccurs="0"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_PPrGeneral">
    <xsd:complexContent>
      <xsd:extension base="CT_PPrBase">
        <xsd:sequence>
          <xsd:element name="pPrChange" type="CT_PPrChange" minOccurs="0"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_Control">
    <xsd:attribute name="name" type="s:ST_String" use="optional"/>
    <xsd:attribute name="shapeid" type="s:ST_String" use="optional"/>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Background">
    <xsd:sequence>
      <xsd:sequence maxOccurs="unbounded">
        <xsd:any processContents="lax" namespace="urn:schemas-microsoft-com:vml" minOccurs="0"
          maxOccurs="unbounded"/>
        <xsd:any processContents="lax" namespace="urn:schemas-microsoft-com:office:office"
          minOccurs="0" maxOccurs="unbounded"/>
      </xsd:sequence>
      <xsd:element name="drawing" type="CT_Drawing" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="color" type="ST_HexColor" use="optional" default="auto"/>
    <xsd:attribute name="themeColor" type="ST_ThemeColor" use="optional"/>
    <xsd:attribute name="themeTint" type="ST_UcharHexNumber" use="optional"/>
    <xsd:attribute name="themeShade" type="ST_UcharHexNumber" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Rel">
    <xsd:attribute ref="r:id" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Object">
    <xsd:sequence>
      <xsd:sequence maxOccurs="unbounded">
        <xsd:any processContents="lax" namespace="urn:schemas-microsoft-com:vml" minOccurs="0"
          maxOccurs="unbounded"/>
        <xsd:any processContents="lax" namespace="urn:schemas-microsoft-com:office:office"
          minOccurs="0" maxOccurs="unbounded"/>
      </xsd:sequence>
      <xsd:element name="drawing" type="CT_Drawing" minOccurs="0"/>
      <xsd:choice minOccurs="0">
        <xsd:element name="control" type="CT_Control"/>
        <xsd:element name="objectLink" type="CT_ObjectLink"/>
        <xsd:element name="objectEmbed" type="CT_ObjectEmbed"/>
        <xsd:element name="movie" type="CT_Rel"/>
      </xsd:choice>
    </xsd:sequence>
    <xsd:attribute name="dxaOrig" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="dyaOrig" type="s:ST_TwipsMeasure" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Picture">
    <xsd:sequence>
      <xsd:sequence maxOccurs="unbounded">
        <xsd:any processContents="lax" namespace="urn:schemas-microsoft-com:vml" minOccurs="0"
          maxOccurs="unbounded"/>
        <xsd:any processContents="lax" namespace="urn:schemas-microsoft-com:office:office"
          minOccurs="0" maxOccurs="unbounded"/>
      </xsd:sequence>
      <xsd:element name="movie" type="CT_Rel" minOccurs="0"/>
      <xsd:element name="control" type="CT_Control" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ObjectEmbed">
    <xsd:attribute name="drawAspect" type="ST_ObjectDrawAspect" use="optional"/>
    <xsd:attribute ref="r:id" use="required"/>
    <xsd:attribute name="progId" type="s:ST_String" use="optional"/>
    <xsd:attribute name="shapeId" type="s:ST_String" use="optional"/>
    <xsd:attribute name="fieldCodes" type="s:ST_String" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ObjectDrawAspect">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="content"/>
      <xsd:enumeration value="icon"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ObjectLink">
    <xsd:complexContent>
      <xsd:extension base="CT_ObjectEmbed">
        <xsd:attribute name="updateMode" type="ST_ObjectUpdateMode" use="required"/>
        <xsd:attribute name="lockedField" type="s:ST_OnOff" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:simpleType name="ST_ObjectUpdateMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="always"/>
      <xsd:enumeration value="onCall"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Drawing">
    <xsd:choice minOccurs="1" maxOccurs="unbounded">
      <xsd:element ref="wp:anchor" minOccurs="0"/>
      <xsd:element ref="wp:inline" minOccurs="0"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_SimpleField">
    <xsd:sequence>
      <xsd:element name="fldData" type="CT_Text" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_PContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="instr" type="s:ST_String" use="required"/>
    <xsd:attribute name="fldLock" type="s:ST_OnOff"/>
    <xsd:attribute name="dirty" type="s:ST_OnOff"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FldCharType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="begin"/>
      <xsd:enumeration value="separate"/>
      <xsd:enumeration value="end"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_InfoTextType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="text"/>
      <xsd:enumeration value="autoText"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FFHelpTextVal">
    <xsd:restriction base="xsd:string">
      <xsd:maxLength value="256"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FFStatusTextVal">
    <xsd:restriction base="xsd:string">
      <xsd:maxLength value="140"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FFName">
    <xsd:restriction base="xsd:string">
      <xsd:maxLength value="65"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FFTextType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="regular"/>
      <xsd:enumeration value="number"/>
      <xsd:enumeration value="date"/>
      <xsd:enumeration value="currentTime"/>
      <xsd:enumeration value="currentDate"/>
      <xsd:enumeration value="calculated"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FFTextType">
    <xsd:attribute name="val" type="ST_FFTextType" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FFName">
    <xsd:attribute name="val" type="ST_FFName"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FldChar">
    <xsd:choice>
      <xsd:element name="fldData" type="CT_Text" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="ffData" type="CT_FFData" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="numberingChange" type="CT_TrackChangeNumbering" minOccurs="0"/>
    </xsd:choice>
    <xsd:attribute name="fldCharType" type="ST_FldCharType" use="required"/>
    <xsd:attribute name="fldLock" type="s:ST_OnOff"/>
    <xsd:attribute name="dirty" type="s:ST_OnOff"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Hyperlink">
    <xsd:group ref="EG_PContent" minOccurs="0" maxOccurs="unbounded"/>
    <xsd:attribute name="tgtFrame" type="s:ST_String" use="optional"/>
    <xsd:attribute name="tooltip" type="s:ST_String" use="optional"/>
    <xsd:attribute name="docLocation" type="s:ST_String" use="optional"/>
    <xsd:attribute name="history" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="anchor" type="s:ST_String" use="optional"/>
    <xsd:attribute ref="r:id"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FFData">
    <xsd:choice maxOccurs="unbounded">
      <xsd:element name="name" type="CT_FFName"/>
      <xsd:element name="label" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="tabIndex" type="CT_UnsignedDecimalNumber" minOccurs="0"/>
      <xsd:element name="enabled" type="CT_OnOff"/>
      <xsd:element name="calcOnExit" type="CT_OnOff"/>
      <xsd:element name="entryMacro" type="CT_MacroName" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="exitMacro" type="CT_MacroName" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="helpText" type="CT_FFHelpText" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="statusText" type="CT_FFStatusText" minOccurs="0" maxOccurs="1"/>
      <xsd:choice>
        <xsd:element name="checkBox" type="CT_FFCheckBox"/>
        <xsd:element name="ddList" type="CT_FFDDList"/>
        <xsd:element name="textInput" type="CT_FFTextInput"/>
      </xsd:choice>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_FFHelpText">
    <xsd:attribute name="type" type="ST_InfoTextType"/>
    <xsd:attribute name="val" type="ST_FFHelpTextVal"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FFStatusText">
    <xsd:attribute name="type" type="ST_InfoTextType"/>
    <xsd:attribute name="val" type="ST_FFStatusTextVal"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FFCheckBox">
    <xsd:sequence>
      <xsd:choice>
        <xsd:element name="size" type="CT_HpsMeasure"/>
        <xsd:element name="sizeAuto" type="CT_OnOff"/>
      </xsd:choice>
      <xsd:element name="default" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="checked" type="CT_OnOff" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_FFDDList">
    <xsd:sequence>
      <xsd:element name="result" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="default" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="listEntry" type="CT_String" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_FFTextInput">
    <xsd:sequence>
      <xsd:element name="type" type="CT_FFTextType" minOccurs="0"/>
      <xsd:element name="default" type="CT_String" minOccurs="0"/>
      <xsd:element name="maxLength" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="format" type="CT_String" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_SectionMark">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="nextPage"/>
      <xsd:enumeration value="nextColumn"/>
      <xsd:enumeration value="continuous"/>
      <xsd:enumeration value="evenPage"/>
      <xsd:enumeration value="oddPage"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SectType">
    <xsd:attribute name="val" type="ST_SectionMark"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PaperSource">
    <xsd:attribute name="first" type="ST_DecimalNumber"/>
    <xsd:attribute name="other" type="ST_DecimalNumber"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_NumberFormat">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="decimal"/>
      <xsd:enumeration value="upperRoman"/>
      <xsd:enumeration value="lowerRoman"/>
      <xsd:enumeration value="upperLetter"/>
      <xsd:enumeration value="lowerLetter"/>
      <xsd:enumeration value="ordinal"/>
      <xsd:enumeration value="cardinalText"/>
      <xsd:enumeration value="ordinalText"/>
      <xsd:enumeration value="hex"/>
      <xsd:enumeration value="chicago"/>
      <xsd:enumeration value="ideographDigital"/>
      <xsd:enumeration value="japaneseCounting"/>
      <xsd:enumeration value="aiueo"/>
      <xsd:enumeration value="iroha"/>
      <xsd:enumeration value="decimalFullWidth"/>
      <xsd:enumeration value="decimalHalfWidth"/>
      <xsd:enumeration value="japaneseLegal"/>
      <xsd:enumeration value="japaneseDigitalTenThousand"/>
      <xsd:enumeration value="decimalEnclosedCircle"/>
      <xsd:enumeration value="decimalFullWidth2"/>
      <xsd:enumeration value="aiueoFullWidth"/>
      <xsd:enumeration value="irohaFullWidth"/>
      <xsd:enumeration value="decimalZero"/>
      <xsd:enumeration value="bullet"/>
      <xsd:enumeration value="ganada"/>
      <xsd:enumeration value="chosung"/>
      <xsd:enumeration value="decimalEnclosedFullstop"/>
      <xsd:enumeration value="decimalEnclosedParen"/>
      <xsd:enumeration value="decimalEnclosedCircleChinese"/>
      <xsd:enumeration value="ideographEnclosedCircle"/>
      <xsd:enumeration value="ideographTraditional"/>
      <xsd:enumeration value="ideographZodiac"/>
      <xsd:enumeration value="ideographZodiacTraditional"/>
      <xsd:enumeration value="taiwaneseCounting"/>
      <xsd:enumeration value="ideographLegalTraditional"/>
      <xsd:enumeration value="taiwaneseCountingThousand"/>
      <xsd:enumeration value="taiwaneseDigital"/>
      <xsd:enumeration value="chineseCounting"/>
      <xsd:enumeration value="chineseLegalSimplified"/>
      <xsd:enumeration value="chineseCountingThousand"/>
      <xsd:enumeration value="koreanDigital"/>
      <xsd:enumeration value="koreanCounting"/>
      <xsd:enumeration value="koreanLegal"/>
      <xsd:enumeration value="koreanDigital2"/>
      <xsd:enumeration value="vietnameseCounting"/>
      <xsd:enumeration value="russianLower"/>
      <xsd:enumeration value="russianUpper"/>
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="numberInDash"/>
      <xsd:enumeration value="hebrew1"/>
      <xsd:enumeration value="hebrew2"/>
      <xsd:enumeration value="arabicAlpha"/>
      <xsd:enumeration value="arabicAbjad"/>
      <xsd:enumeration value="hindiVowels"/>
      <xsd:enumeration value="hindiConsonants"/>
      <xsd:enumeration value="hindiNumbers"/>
      <xsd:enumeration value="hindiCounting"/>
      <xsd:enumeration value="thaiLetters"/>
      <xsd:enumeration value="thaiNumbers"/>
      <xsd:enumeration value="thaiCounting"/>
      <xsd:enumeration value="bahtText"/>
      <xsd:enumeration value="dollarText"/>
      <xsd:enumeration value="custom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PageOrientation">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="portrait"/>
      <xsd:enumeration value="landscape"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PageSz">
    <xsd:attribute name="w" type="s:ST_TwipsMeasure"/>
    <xsd:attribute name="h" type="s:ST_TwipsMeasure"/>
    <xsd:attribute name="orient" type="ST_PageOrientation" use="optional"/>
    <xsd:attribute name="code" type="ST_DecimalNumber" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageMar">
    <xsd:attribute name="top" type="ST_SignedTwipsMeasure" use="required"/>
    <xsd:attribute name="right" type="s:ST_TwipsMeasure" use="required"/>
    <xsd:attribute name="bottom" type="ST_SignedTwipsMeasure" use="required"/>
    <xsd:attribute name="left" type="s:ST_TwipsMeasure" use="required"/>
    <xsd:attribute name="header" type="s:ST_TwipsMeasure" use="required"/>
    <xsd:attribute name="footer" type="s:ST_TwipsMeasure" use="required"/>
    <xsd:attribute name="gutter" type="s:ST_TwipsMeasure" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PageBorderZOrder">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="front"/>
      <xsd:enumeration value="back"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PageBorderDisplay">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="allPages"/>
      <xsd:enumeration value="firstPage"/>
      <xsd:enumeration value="notFirstPage"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PageBorderOffset">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="page"/>
      <xsd:enumeration value="text"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PageBorders">
    <xsd:sequence>
      <xsd:element name="top" type="CT_TopPageBorder" minOccurs="0"/>
      <xsd:element name="left" type="CT_PageBorder" minOccurs="0"/>
      <xsd:element name="bottom" type="CT_BottomPageBorder" minOccurs="0"/>
      <xsd:element name="right" type="CT_PageBorder" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="zOrder" type="ST_PageBorderZOrder" use="optional" default="front"/>
    <xsd:attribute name="display" type="ST_PageBorderDisplay" use="optional"/>
    <xsd:attribute name="offsetFrom" type="ST_PageBorderOffset" use="optional" default="text"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageBorder">
    <xsd:complexContent>
      <xsd:extension base="CT_Border">
        <xsd:attribute ref="r:id" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_BottomPageBorder">
    <xsd:complexContent>
      <xsd:extension base="CT_PageBorder">
        <xsd:attribute ref="r:bottomLeft" use="optional"/>
        <xsd:attribute ref="r:bottomRight" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TopPageBorder">
    <xsd:complexContent>
      <xsd:extension base="CT_PageBorder">
        <xsd:attribute ref="r:topLeft" use="optional"/>
        <xsd:attribute ref="r:topRight" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:simpleType name="ST_ChapterSep">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="hyphen"/>
      <xsd:enumeration value="period"/>
      <xsd:enumeration value="colon"/>
      <xsd:enumeration value="emDash"/>
      <xsd:enumeration value="enDash"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_LineNumberRestart">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="newPage"/>
      <xsd:enumeration value="newSection"/>
      <xsd:enumeration value="continuous"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LineNumber">
    <xsd:attribute name="countBy" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="start" type="ST_DecimalNumber" use="optional" default="1"/>
    <xsd:attribute name="distance" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="restart" type="ST_LineNumberRestart" use="optional" default="newPage"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PageNumber">
    <xsd:attribute name="fmt" type="ST_NumberFormat" use="optional" default="decimal"/>
    <xsd:attribute name="start" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="chapStyle" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="chapSep" type="ST_ChapterSep" use="optional" default="hyphen"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Column">
    <xsd:attribute name="w" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="space" type="s:ST_TwipsMeasure" use="optional" default="0"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Columns">
    <xsd:sequence minOccurs="0">
      <xsd:element name="col" type="CT_Column" maxOccurs="45"/>
    </xsd:sequence>
    <xsd:attribute name="equalWidth" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="space" type="s:ST_TwipsMeasure" use="optional" default="720"/>
    <xsd:attribute name="num" type="ST_DecimalNumber" use="optional" default="1"/>
    <xsd:attribute name="sep" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_VerticalJc">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="top"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="both"/>
      <xsd:enumeration value="bottom"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_VerticalJc">
    <xsd:attribute name="val" type="ST_VerticalJc" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DocGrid">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="default"/>
      <xsd:enumeration value="lines"/>
      <xsd:enumeration value="linesAndChars"/>
      <xsd:enumeration value="snapToChars"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DocGrid">
    <xsd:attribute name="type" type="ST_DocGrid"/>
    <xsd:attribute name="linePitch" type="ST_DecimalNumber"/>
    <xsd:attribute name="charSpace" type="ST_DecimalNumber"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_HdrFtr">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="even"/>
      <xsd:enumeration value="default"/>
      <xsd:enumeration value="first"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_FtnEdn">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="normal"/>
      <xsd:enumeration value="separator"/>
      <xsd:enumeration value="continuationSeparator"/>
      <xsd:enumeration value="continuationNotice"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_HdrFtrRef">
    <xsd:complexContent>
      <xsd:extension base="CT_Rel">
        <xsd:attribute name="type" type="ST_HdrFtr" use="required"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:group name="EG_HdrFtrReferences">
    <xsd:choice>
      <xsd:element name="headerReference" type="CT_HdrFtrRef" minOccurs="0"/>
      <xsd:element name="footerReference" type="CT_HdrFtrRef" minOccurs="0"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_HdrFtr">
    <xsd:group ref="EG_BlockLevelElts" minOccurs="1" maxOccurs="unbounded"/>
  </xsd:complexType>
  <xsd:group name="EG_SectPrContents">
    <xsd:sequence>
      <xsd:element name="footnotePr" type="CT_FtnProps" minOccurs="0"/>
      <xsd:element name="endnotePr" type="CT_EdnProps" minOccurs="0"/>
      <xsd:element name="type" type="CT_SectType" minOccurs="0"/>
      <xsd:element name="pgSz" type="CT_PageSz" minOccurs="0"/>
      <xsd:element name="pgMar" type="CT_PageMar" minOccurs="0"/>
      <xsd:element name="paperSrc" type="CT_PaperSource" minOccurs="0"/>
      <xsd:element name="pgBorders" type="CT_PageBorders" minOccurs="0"/>
      <xsd:element name="lnNumType" type="CT_LineNumber" minOccurs="0"/>
      <xsd:element name="pgNumType" type="CT_PageNumber" minOccurs="0"/>
      <xsd:element name="cols" type="CT_Columns" minOccurs="0"/>
      <xsd:element name="formProt" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="vAlign" type="CT_VerticalJc" minOccurs="0"/>
      <xsd:element name="noEndnote" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="titlePg" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="textDirection" type="CT_TextDirection" minOccurs="0"/>
      <xsd:element name="bidi" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="rtlGutter" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="docGrid" type="CT_DocGrid" minOccurs="0"/>
      <xsd:element name="printerSettings" type="CT_Rel" minOccurs="0"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:attributeGroup name="AG_SectPrAttributes">
    <xsd:attribute name="rsidRPr" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidDel" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidR" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidSect" type="ST_LongHexNumber"/>
  </xsd:attributeGroup>
  <xsd:complexType name="CT_SectPrBase">
    <xsd:sequence>
      <xsd:group ref="EG_SectPrContents" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_SectPrAttributes"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SectPr">
    <xsd:sequence>
      <xsd:group ref="EG_HdrFtrReferences" minOccurs="0" maxOccurs="6"/>
      <xsd:group ref="EG_SectPrContents" minOccurs="0"/>
      <xsd:element name="sectPrChange" type="CT_SectPrChange" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attributeGroup ref="AG_SectPrAttributes"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_BrType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="page"/>
      <xsd:enumeration value="column"/>
      <xsd:enumeration value="textWrapping"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_BrClear">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="all"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Br">
    <xsd:attribute name="type" type="ST_BrType" use="optional"/>
    <xsd:attribute name="clear" type="ST_BrClear" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_PTabAlignment">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="right"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PTabRelativeTo">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="margin"/>
      <xsd:enumeration value="indent"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_PTabLeader">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="dot"/>
      <xsd:enumeration value="hyphen"/>
      <xsd:enumeration value="underscore"/>
      <xsd:enumeration value="middleDot"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_PTab">
    <xsd:attribute name="alignment" type="ST_PTabAlignment" use="required"/>
    <xsd:attribute name="relativeTo" type="ST_PTabRelativeTo" use="required"/>
    <xsd:attribute name="leader" type="ST_PTabLeader" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Sym">
    <xsd:attribute name="font" type="s:ST_String"/>
    <xsd:attribute name="char" type="ST_ShortHexNumber"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ProofErr">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="spellStart"/>
      <xsd:enumeration value="spellEnd"/>
      <xsd:enumeration value="gramStart"/>
      <xsd:enumeration value="gramEnd"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ProofErr">
    <xsd:attribute name="type" type="ST_ProofErr" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_EdGrp">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="everyone"/>
      <xsd:enumeration value="administrators"/>
      <xsd:enumeration value="contributors"/>
      <xsd:enumeration value="editors"/>
      <xsd:enumeration value="owners"/>
      <xsd:enumeration value="current"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Perm">
    <xsd:attribute name="id" type="s:ST_String" use="required"/>
    <xsd:attribute name="displacedByCustomXml" type="ST_DisplacedByCustomXml" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_PermStart">
    <xsd:complexContent>
      <xsd:extension base="CT_Perm">
        <xsd:attribute name="edGrp" type="ST_EdGrp" use="optional"/>
        <xsd:attribute name="ed" type="s:ST_String" use="optional"/>
        <xsd:attribute name="colFirst" type="ST_DecimalNumber" use="optional"/>
        <xsd:attribute name="colLast" type="ST_DecimalNumber" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_Text">
    <xsd:simpleContent>
      <xsd:extension base="s:ST_String">
        <xsd:attribute ref="xml:space" use="optional"/>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>
  <xsd:group name="EG_RunInnerContent">
    <xsd:choice>
      <xsd:element name="br" type="CT_Br"/>
      <xsd:element name="t" type="CT_Text"/>
      <xsd:element name="contentPart" type="CT_Rel"/>
      <xsd:element name="delText" type="CT_Text"/>
      <xsd:element name="instrText" type="CT_Text"/>
      <xsd:element name="delInstrText" type="CT_Text"/>
      <xsd:element name="noBreakHyphen" type="CT_Empty"/>
      <xsd:element name="softHyphen" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="dayShort" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="monthShort" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="yearShort" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="dayLong" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="monthLong" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="yearLong" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="annotationRef" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="footnoteRef" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="endnoteRef" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="separator" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="continuationSeparator" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="sym" type="CT_Sym" minOccurs="0"/>
      <xsd:element name="pgNum" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="cr" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="tab" type="CT_Empty" minOccurs="0"/>
      <xsd:element name="object" type="CT_Object"/>
      <xsd:element name="pict" type="CT_Picture"/>
      <xsd:element name="fldChar" type="CT_FldChar"/>
      <xsd:element name="ruby" type="CT_Ruby"/>
      <xsd:element name="footnoteReference" type="CT_FtnEdnRef"/>
      <xsd:element name="endnoteReference" type="CT_FtnEdnRef"/>
      <xsd:element name="commentReference" type="CT_Markup"/>
      <xsd:element name="drawing" type="CT_Drawing"/>
      <xsd:element name="ptab" type="CT_PTab" minOccurs="0"/>
      <xsd:element name="lastRenderedPageBreak" type="CT_Empty" minOccurs="0" maxOccurs="1"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_R">
    <xsd:sequence>
      <xsd:group ref="EG_RPr" minOccurs="0"/>
      <xsd:group ref="EG_RunInnerContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="rsidRPr" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidDel" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidR" type="ST_LongHexNumber"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Hint">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="default"/>
      <xsd:enumeration value="eastAsia"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_Theme">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="majorEastAsia"/>
      <xsd:enumeration value="majorBidi"/>
      <xsd:enumeration value="majorAscii"/>
      <xsd:enumeration value="majorHAnsi"/>
      <xsd:enumeration value="minorEastAsia"/>
      <xsd:enumeration value="minorBidi"/>
      <xsd:enumeration value="minorAscii"/>
      <xsd:enumeration value="minorHAnsi"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Fonts">
    <xsd:attribute name="hint" type="ST_Hint"/>
    <xsd:attribute name="ascii" type="s:ST_String"/>
    <xsd:attribute name="hAnsi" type="s:ST_String"/>
    <xsd:attribute name="eastAsia" type="s:ST_String"/>
    <xsd:attribute name="cs" type="s:ST_String"/>
    <xsd:attribute name="asciiTheme" type="ST_Theme"/>
    <xsd:attribute name="hAnsiTheme" type="ST_Theme"/>
    <xsd:attribute name="eastAsiaTheme" type="ST_Theme"/>
    <xsd:attribute name="cstheme" type="ST_Theme"/>
  </xsd:complexType>
  <xsd:group name="EG_RPrBase">
    <xsd:choice>
      <xsd:element name="rStyle" type="CT_String"/>
      <xsd:element name="rFonts" type="CT_Fonts"/>
      <xsd:element name="b" type="CT_OnOff"/>
      <xsd:element name="bCs" type="CT_OnOff"/>
      <xsd:element name="i" type="CT_OnOff"/>
      <xsd:element name="iCs" type="CT_OnOff"/>
      <xsd:element name="caps" type="CT_OnOff"/>
      <xsd:element name="smallCaps" type="CT_OnOff"/>
      <xsd:element name="strike" type="CT_OnOff"/>
      <xsd:element name="dstrike" type="CT_OnOff"/>
      <xsd:element name="outline" type="CT_OnOff"/>
      <xsd:element name="shadow" type="CT_OnOff"/>
      <xsd:element name="emboss" type="CT_OnOff"/>
      <xsd:element name="imprint" type="CT_OnOff"/>
      <xsd:element name="noProof" type="CT_OnOff"/>
      <xsd:element name="snapToGrid" type="CT_OnOff"/>
      <xsd:element name="vanish" type="CT_OnOff"/>
      <xsd:element name="webHidden" type="CT_OnOff"/>
      <xsd:element name="color" type="CT_Color"/>
      <xsd:element name="spacing" type="CT_SignedTwipsMeasure"/>
      <xsd:element name="w" type="CT_TextScale"/>
      <xsd:element name="kern" type="CT_HpsMeasure"/>
      <xsd:element name="position" type="CT_SignedHpsMeasure"/>
      <xsd:element name="sz" type="CT_HpsMeasure"/>
      <xsd:element name="szCs" type="CT_HpsMeasure"/>
      <xsd:element name="highlight" type="CT_Highlight"/>
      <xsd:element name="u" type="CT_Underline"/>
      <xsd:element name="effect" type="CT_TextEffect"/>
      <xsd:element name="bdr" type="CT_Border"/>
      <xsd:element name="shd" type="CT_Shd"/>
      <xsd:element name="fitText" type="CT_FitText"/>
      <xsd:element name="vertAlign" type="CT_VerticalAlignRun"/>
      <xsd:element name="rtl" type="CT_OnOff"/>
      <xsd:element name="cs" type="CT_OnOff"/>
      <xsd:element name="em" type="CT_Em"/>
      <xsd:element name="lang" type="CT_Language"/>
      <xsd:element name="eastAsianLayout" type="CT_EastAsianLayout"/>
      <xsd:element name="specVanish" type="CT_OnOff"/>
      <xsd:element name="oMath" type="CT_OnOff"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_RPrContent">
    <xsd:sequence>
      <xsd:group ref="EG_RPrBase" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rPrChange" type="CT_RPrChange" minOccurs="0"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_RPr">
    <xsd:sequence>
      <xsd:group ref="EG_RPrContent" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_RPr">
    <xsd:sequence>
      <xsd:element name="rPr" type="CT_RPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:group name="EG_RPrMath">
    <xsd:choice>
      <xsd:group ref="EG_RPr"/>
      <xsd:element name="ins" type="CT_MathCtrlIns"/>
      <xsd:element name="del" type="CT_MathCtrlDel"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_MathCtrlIns">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:choice minOccurs="0">
          <xsd:element name="del" type="CT_RPrChange" minOccurs="1"/>
          <xsd:element name="rPr" type="CT_RPr" minOccurs="1"/>
        </xsd:choice>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_MathCtrlDel">
    <xsd:complexContent>
      <xsd:extension base="CT_TrackChange">
        <xsd:choice minOccurs="0">
          <xsd:element name="rPr" type="CT_RPr" minOccurs="1"/>
        </xsd:choice>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_RPrOriginal">
    <xsd:sequence>
      <xsd:group ref="EG_RPrBase" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ParaRPrOriginal">
    <xsd:sequence>
      <xsd:group ref="EG_ParaRPrTrackChanges" minOccurs="0"/>
      <xsd:group ref="EG_RPrBase" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ParaRPr">
    <xsd:sequence>
      <xsd:group ref="EG_ParaRPrTrackChanges" minOccurs="0"/>
      <xsd:group ref="EG_RPrBase" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="rPrChange" type="CT_ParaRPrChange" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_ParaRPrTrackChanges">
    <xsd:sequence>
      <xsd:element name="ins" type="CT_TrackChange" minOccurs="0"/>
      <xsd:element name="del" type="CT_TrackChange" minOccurs="0"/>
      <xsd:element name="moveFrom" type="CT_TrackChange" minOccurs="0"/>
      <xsd:element name="moveTo" type="CT_TrackChange" minOccurs="0"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_AltChunk">
    <xsd:sequence>
      <xsd:element name="altChunkPr" type="CT_AltChunkPr" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute ref="r:id" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AltChunkPr">
    <xsd:sequence>
      <xsd:element name="matchSrc" type="CT_OnOff" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_RubyAlign">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="center"/>
      <xsd:enumeration value="distributeLetter"/>
      <xsd:enumeration value="distributeSpace"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
      <xsd:enumeration value="rightVertical"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_RubyAlign">
    <xsd:attribute name="val" type="ST_RubyAlign" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RubyPr">
    <xsd:sequence>
      <xsd:element name="rubyAlign" type="CT_RubyAlign"/>
      <xsd:element name="hps" type="CT_HpsMeasure"/>
      <xsd:element name="hpsRaise" type="CT_HpsMeasure"/>
      <xsd:element name="hpsBaseText" type="CT_HpsMeasure"/>
      <xsd:element name="lid" type="CT_Lang"/>
      <xsd:element name="dirty" type="CT_OnOff" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_RubyContent">
    <xsd:choice>
      <xsd:element name="r" type="CT_R"/>
      <xsd:group ref="EG_RunLevelElts" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_RubyContent">
    <xsd:group ref="EG_RubyContent" minOccurs="0" maxOccurs="unbounded"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Ruby">
    <xsd:sequence>
      <xsd:element name="rubyPr" type="CT_RubyPr"/>
      <xsd:element name="rt" type="CT_RubyContent"/>
      <xsd:element name="rubyBase" type="CT_RubyContent"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_Lock">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="sdtLocked"/>
      <xsd:enumeration value="contentLocked"/>
      <xsd:enumeration value="unlocked"/>
      <xsd:enumeration value="sdtContentLocked"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Lock">
    <xsd:attribute name="val" type="ST_Lock"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtListItem">
    <xsd:attribute name="displayText" type="s:ST_String"/>
    <xsd:attribute name="value" type="s:ST_String"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_SdtDateMappingType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="text"/>
      <xsd:enumeration value="date"/>
      <xsd:enumeration value="dateTime"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SdtDateMappingType">
    <xsd:attribute name="val" type="ST_SdtDateMappingType"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CalendarType">
    <xsd:attribute name="val" type="s:ST_CalendarType"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtDate">
    <xsd:sequence>
      <xsd:element name="dateFormat" type="CT_String" minOccurs="0"/>
      <xsd:element name="lid" type="CT_Lang" minOccurs="0"/>
      <xsd:element name="storeMappedDataAs" type="CT_SdtDateMappingType" minOccurs="0"/>
      <xsd:element name="calendar" type="CT_CalendarType" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="fullDate" type="ST_DateTime" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtComboBox">
    <xsd:sequence>
      <xsd:element name="listItem" type="CT_SdtListItem" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="lastValue" type="s:ST_String" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtDocPart">
    <xsd:sequence>
      <xsd:element name="docPartGallery" type="CT_String" minOccurs="0"/>
      <xsd:element name="docPartCategory" type="CT_String" minOccurs="0"/>
      <xsd:element name="docPartUnique" type="CT_OnOff" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtDropDownList">
    <xsd:sequence>
      <xsd:element name="listItem" type="CT_SdtListItem" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="lastValue" type="s:ST_String" use="optional" default=""/>
  </xsd:complexType>
  <xsd:complexType name="CT_Placeholder">
    <xsd:sequence>
      <xsd:element name="docPart" type="CT_String"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtText">
    <xsd:attribute name="multiLine" type="s:ST_OnOff"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DataBinding">
    <xsd:attribute name="prefixMappings" type="s:ST_String"/>
    <xsd:attribute name="xpath" type="s:ST_String" use="required"/>
    <xsd:attribute name="storeItemID" type="s:ST_String" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtPr">
    <xsd:sequence>
      <xsd:element name="rPr" type="CT_RPr" minOccurs="0"/>
      <xsd:element name="alias" type="CT_String" minOccurs="0"/>
      <xsd:element name="tag" type="CT_String" minOccurs="0"/>
      <xsd:element name="id" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="lock" type="CT_Lock" minOccurs="0"/>
      <xsd:element name="placeholder" type="CT_Placeholder" minOccurs="0"/>
      <xsd:element name="temporary" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="showingPlcHdr" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="dataBinding" type="CT_DataBinding" minOccurs="0"/>
      <xsd:element name="label" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="tabIndex" type="CT_UnsignedDecimalNumber" minOccurs="0"/>
      <xsd:choice minOccurs="0" maxOccurs="1">
        <xsd:element name="equation" type="CT_Empty"/>
        <xsd:element name="comboBox" type="CT_SdtComboBox"/>
        <xsd:element name="date" type="CT_SdtDate"/>
        <xsd:element name="docPartObj" type="CT_SdtDocPart"/>
        <xsd:element name="docPartList" type="CT_SdtDocPart"/>
        <xsd:element name="dropDownList" type="CT_SdtDropDownList"/>
        <xsd:element name="picture" type="CT_Empty"/>
        <xsd:element name="richText" type="CT_Empty"/>
        <xsd:element name="text" type="CT_SdtText"/>
        <xsd:element name="citation" type="CT_Empty"/>
        <xsd:element name="group" type="CT_Empty"/>
        <xsd:element name="bibliography" type="CT_Empty"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtEndPr">
    <xsd:choice maxOccurs="unbounded">
      <xsd:element name="rPr" type="CT_RPr" minOccurs="0"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:group name="EG_ContentRunContent">
    <xsd:choice>
      <xsd:element name="customXml" type="CT_CustomXmlRun"/>
      <xsd:element name="smartTag" type="CT_SmartTagRun"/>
      <xsd:element name="sdt" type="CT_SdtRun"/>
      <xsd:element name="dir" type="CT_DirContentRun"/>
      <xsd:element name="bdo" type="CT_BdoContentRun"/>
      <xsd:element name="r" type="CT_R"/>
      <xsd:group ref="EG_RunLevelElts" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_DirContentRun">
    <xsd:group ref="EG_PContent" minOccurs="0" maxOccurs="unbounded"/>
    <xsd:attribute name="val" type="ST_Direction" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_BdoContentRun">
    <xsd:group ref="EG_PContent" minOccurs="0" maxOccurs="unbounded"/>
    <xsd:attribute name="val" type="ST_Direction" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Direction">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="ltr"/>
      <xsd:enumeration value="rtl"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_SdtContentRun">
    <xsd:group ref="EG_PContent" minOccurs="0" maxOccurs="unbounded"/>
  </xsd:complexType>
  <xsd:group name="EG_ContentBlockContent">
    <xsd:choice>
      <xsd:element name="customXml" type="CT_CustomXmlBlock"/>
      <xsd:element name="sdt" type="CT_SdtBlock"/>
      <xsd:element name="p" type="CT_P" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="tbl" type="CT_Tbl" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:group ref="EG_RunLevelElts" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_SdtContentBlock">
    <xsd:group ref="EG_ContentBlockContent" minOccurs="0" maxOccurs="unbounded"/>
  </xsd:complexType>
  <xsd:group name="EG_ContentRowContent">
    <xsd:choice>
      <xsd:element name="tr" type="CT_Row" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="customXml" type="CT_CustomXmlRow"/>
      <xsd:element name="sdt" type="CT_SdtRow"/>
      <xsd:group ref="EG_RunLevelElts" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_SdtContentRow">
    <xsd:group ref="EG_ContentRowContent" minOccurs="0" maxOccurs="unbounded"/>
  </xsd:complexType>
  <xsd:group name="EG_ContentCellContent">
    <xsd:choice>
      <xsd:element name="tc" type="CT_Tc" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="customXml" type="CT_CustomXmlCell"/>
      <xsd:element name="sdt" type="CT_SdtCell"/>
      <xsd:group ref="EG_RunLevelElts" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_SdtContentCell">
    <xsd:group ref="EG_ContentCellContent" minOccurs="0" maxOccurs="unbounded"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtBlock">
    <xsd:sequence>
      <xsd:element name="sdtPr" type="CT_SdtPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sdtEndPr" type="CT_SdtEndPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sdtContent" type="CT_SdtContentBlock" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtRun">
    <xsd:sequence>
      <xsd:element name="sdtPr" type="CT_SdtPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sdtEndPr" type="CT_SdtEndPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sdtContent" type="CT_SdtContentRun" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtCell">
    <xsd:sequence>
      <xsd:element name="sdtPr" type="CT_SdtPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sdtEndPr" type="CT_SdtEndPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sdtContent" type="CT_SdtContentCell" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_SdtRow">
    <xsd:sequence>
      <xsd:element name="sdtPr" type="CT_SdtPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sdtEndPr" type="CT_SdtEndPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sdtContent" type="CT_SdtContentRow" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Attr">
    <xsd:attribute name="uri" type="s:ST_String"/>
    <xsd:attribute name="name" type="s:ST_String" use="required"/>
    <xsd:attribute name="val" type="s:ST_String" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomXmlRun">
    <xsd:sequence>
      <xsd:element name="customXmlPr" type="CT_CustomXmlPr" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_PContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="s:ST_String"/>
    <xsd:attribute name="element" type="s:ST_XmlName" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SmartTagRun">
    <xsd:sequence>
      <xsd:element name="smartTagPr" type="CT_SmartTagPr" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_PContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="s:ST_String"/>
    <xsd:attribute name="element" type="s:ST_XmlName" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomXmlBlock">
    <xsd:sequence>
      <xsd:element name="customXmlPr" type="CT_CustomXmlPr" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_ContentBlockContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="s:ST_String"/>
    <xsd:attribute name="element" type="s:ST_XmlName" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomXmlPr">
    <xsd:sequence>
      <xsd:element name="placeholder" type="CT_String" minOccurs="0"/>
      <xsd:element name="attr" type="CT_Attr" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomXmlRow">
    <xsd:sequence>
      <xsd:element name="customXmlPr" type="CT_CustomXmlPr" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_ContentRowContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="s:ST_String"/>
    <xsd:attribute name="element" type="s:ST_XmlName" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_CustomXmlCell">
    <xsd:sequence>
      <xsd:element name="customXmlPr" type="CT_CustomXmlPr" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_ContentCellContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="uri" type="s:ST_String"/>
    <xsd:attribute name="element" type="s:ST_XmlName" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SmartTagPr">
    <xsd:sequence>
      <xsd:element name="attr" type="CT_Attr" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:group name="EG_PContent">
    <xsd:choice>
      <xsd:group ref="EG_ContentRunContent" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="fldSimple" type="CT_SimpleField" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="hyperlink" type="CT_Hyperlink"/>
      <xsd:element name="subDoc" type="CT_Rel"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_P">
    <xsd:sequence>
      <xsd:element name="pPr" type="CT_PPr" minOccurs="0"/>
      <xsd:group ref="EG_PContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="rsidRPr" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidR" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidDel" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidP" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidRDefault" type="ST_LongHexNumber"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TblWidth">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="nil"/>
      <xsd:enumeration value="pct"/>
      <xsd:enumeration value="dxa"/>
      <xsd:enumeration value="auto"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Height">
    <xsd:attribute name="val" type="s:ST_TwipsMeasure"/>
    <xsd:attribute name="hRule" type="ST_HeightRule"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MeasurementOrPercent">
    <xsd:union memberTypes="ST_DecimalNumberOrPercent s:ST_UniversalMeasure"/>
  </xsd:simpleType>
  <xsd:complexType name="CT_TblWidth">
    <xsd:attribute name="w" type="ST_MeasurementOrPercent"/>
    <xsd:attribute name="type" type="ST_TblWidth"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TblGridCol">
    <xsd:attribute name="w" type="s:ST_TwipsMeasure"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TblGridBase">
    <xsd:sequence>
      <xsd:element name="gridCol" type="CT_TblGridCol" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TblGrid">
    <xsd:complexContent>
      <xsd:extension base="CT_TblGridBase">
        <xsd:sequence>
          <xsd:element name="tblGridChange" type="CT_TblGridChange" minOccurs="0"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TcBorders">
    <xsd:sequence>
      <xsd:element name="top" type="CT_Border" minOccurs="0"/>
      <xsd:element name="start" type="CT_Border" minOccurs="0"/>
      <xsd:element name="left" type="CT_Border" minOccurs="0"/>
      <xsd:element name="bottom" type="CT_Border" minOccurs="0"/>
      <xsd:element name="end" type="CT_Border" minOccurs="0"/>
      <xsd:element name="right" type="CT_Border" minOccurs="0"/>
      <xsd:element name="insideH" type="CT_Border" minOccurs="0"/>
      <xsd:element name="insideV" type="CT_Border" minOccurs="0"/>
      <xsd:element name="tl2br" type="CT_Border" minOccurs="0"/>
      <xsd:element name="tr2bl" type="CT_Border" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TcMar">
    <xsd:sequence>
      <xsd:element name="top" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="start" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="left" type="CT_TblWidth" minOccurs="0"/>
      <xsd:element name="bottom" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="end" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="right" type="CT_TblWidth" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_Merge">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="continue"/>
      <xsd:enumeration value="restart"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_VMerge">
    <xsd:attribute name="val" type="ST_Merge"/>
  </xsd:complexType>
  <xsd:complexType name="CT_HMerge">
    <xsd:attribute name="val" type="ST_Merge"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TcPrBase">
    <xsd:sequence>
      <xsd:element name="cnfStyle" type="CT_Cnf" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tcW" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="gridSpan" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="hMerge" type="CT_HMerge" minOccurs="0"/>
      <xsd:element name="vMerge" type="CT_VMerge" minOccurs="0"/>
      <xsd:element name="tcBorders" type="CT_TcBorders" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shd" type="CT_Shd" minOccurs="0"/>
      <xsd:element name="noWrap" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="tcMar" type="CT_TcMar" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="textDirection" type="CT_TextDirection" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tcFitText" type="CT_OnOff" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="vAlign" type="CT_VerticalJc" minOccurs="0"/>
      <xsd:element name="hideMark" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="headers" type="CT_Headers" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TcPr">
    <xsd:complexContent>
      <xsd:extension base="CT_TcPrInner">
        <xsd:sequence>
          <xsd:element name="tcPrChange" type="CT_TcPrChange" minOccurs="0"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TcPrInner">
    <xsd:complexContent>
      <xsd:extension base="CT_TcPrBase">
        <xsd:sequence>
          <xsd:group ref="EG_CellMarkupElements" minOccurs="0" maxOccurs="1"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_Tc">
    <xsd:sequence>
      <xsd:element name="tcPr" type="CT_TcPr" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_BlockLevelElts" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="s:ST_String" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Cnf">
    <xsd:restriction base="xsd:string">
      <xsd:length value="12"/>
      <xsd:pattern value="[01]*"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Cnf">
    <xsd:attribute name="val" type="ST_Cnf"/>
    <xsd:attribute name="firstRow" type="s:ST_OnOff"/>
    <xsd:attribute name="lastRow" type="s:ST_OnOff"/>
    <xsd:attribute name="firstColumn" type="s:ST_OnOff"/>
    <xsd:attribute name="lastColumn" type="s:ST_OnOff"/>
    <xsd:attribute name="oddVBand" type="s:ST_OnOff"/>
    <xsd:attribute name="evenVBand" type="s:ST_OnOff"/>
    <xsd:attribute name="oddHBand" type="s:ST_OnOff"/>
    <xsd:attribute name="evenHBand" type="s:ST_OnOff"/>
    <xsd:attribute name="firstRowFirstColumn" type="s:ST_OnOff"/>
    <xsd:attribute name="firstRowLastColumn" type="s:ST_OnOff"/>
    <xsd:attribute name="lastRowFirstColumn" type="s:ST_OnOff"/>
    <xsd:attribute name="lastRowLastColumn" type="s:ST_OnOff"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Headers">
    <xsd:sequence minOccurs="0" maxOccurs="unbounded">
      <xsd:element name="header" type="CT_String"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TrPrBase">
    <xsd:choice maxOccurs="unbounded">
      <xsd:element name="cnfStyle" type="CT_Cnf" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="divId" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="gridBefore" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="gridAfter" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="wBefore" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="wAfter" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="cantSplit" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="trHeight" type="CT_Height" minOccurs="0"/>
      <xsd:element name="tblHeader" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="tblCellSpacing" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="jc" type="CT_JcTable" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="hidden" type="CT_OnOff" minOccurs="0"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_TrPr">
    <xsd:complexContent>
      <xsd:extension base="CT_TrPrBase">
        <xsd:sequence>
          <xsd:element name="ins" type="CT_TrackChange" minOccurs="0"/>
          <xsd:element name="del" type="CT_TrackChange" minOccurs="0"/>
          <xsd:element name="trPrChange" type="CT_TrPrChange" minOccurs="0"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_Row">
    <xsd:sequence>
      <xsd:element name="tblPrEx" type="CT_TblPrEx" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="trPr" type="CT_TrPr" minOccurs="0" maxOccurs="1"/>
      <xsd:group ref="EG_ContentCellContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="rsidRPr" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidR" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidDel" type="ST_LongHexNumber"/>
    <xsd:attribute name="rsidTr" type="ST_LongHexNumber"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TblLayoutType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="fixed"/>
      <xsd:enumeration value="autofit"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TblLayoutType">
    <xsd:attribute name="type" type="ST_TblLayoutType"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_TblOverlap">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="never"/>
      <xsd:enumeration value="overlap"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TblOverlap">
    <xsd:attribute name="val" type="ST_TblOverlap" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TblPPr">
    <xsd:attribute name="leftFromText" type="s:ST_TwipsMeasure"/>
    <xsd:attribute name="rightFromText" type="s:ST_TwipsMeasure"/>
    <xsd:attribute name="topFromText" type="s:ST_TwipsMeasure"/>
    <xsd:attribute name="bottomFromText" type="s:ST_TwipsMeasure"/>
    <xsd:attribute name="vertAnchor" type="ST_VAnchor"/>
    <xsd:attribute name="horzAnchor" type="ST_HAnchor"/>
    <xsd:attribute name="tblpXSpec" type="s:ST_XAlign"/>
    <xsd:attribute name="tblpX" type="ST_SignedTwipsMeasure"/>
    <xsd:attribute name="tblpYSpec" type="s:ST_YAlign"/>
    <xsd:attribute name="tblpY" type="ST_SignedTwipsMeasure"/>
  </xsd:complexType>
  <xsd:complexType name="CT_TblCellMar">
    <xsd:sequence>
      <xsd:element name="top" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="start" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="left" type="CT_TblWidth" minOccurs="0"/>
      <xsd:element name="bottom" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="end" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="right" type="CT_TblWidth" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TblBorders">
    <xsd:sequence>
      <xsd:element name="top" type="CT_Border" minOccurs="0"/>
      <xsd:element name="start" type="CT_Border" minOccurs="0"/>
      <xsd:element name="left" type="CT_Border" minOccurs="0"/>
      <xsd:element name="bottom" type="CT_Border" minOccurs="0"/>
      <xsd:element name="end" type="CT_Border" minOccurs="0"/>
      <xsd:element name="right" type="CT_Border" minOccurs="0"/>
      <xsd:element name="insideH" type="CT_Border" minOccurs="0"/>
      <xsd:element name="insideV" type="CT_Border" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TblPrBase">
    <xsd:sequence>
      <xsd:element name="tblStyle" type="CT_String" minOccurs="0"/>
      <xsd:element name="tblpPr" type="CT_TblPPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblOverlap" type="CT_TblOverlap" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="bidiVisual" type="CT_OnOff" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblStyleRowBandSize" type="CT_DecimalNumber" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblStyleColBandSize" type="CT_DecimalNumber" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblW" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="jc" type="CT_JcTable" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblCellSpacing" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblInd" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblBorders" type="CT_TblBorders" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shd" type="CT_Shd" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblLayout" type="CT_TblLayoutType" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblCellMar" type="CT_TblCellMar" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblLook" type="CT_TblLook" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblCaption" type="CT_String" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblDescription" type="CT_String" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TblPr">
    <xsd:complexContent>
      <xsd:extension base="CT_TblPrBase">
        <xsd:sequence>
          <xsd:element name="tblPrChange" type="CT_TblPrChange" minOccurs="0"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_TblPrExBase">
    <xsd:sequence>
      <xsd:element name="tblW" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="jc" type="CT_JcTable" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblCellSpacing" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblInd" type="CT_TblWidth" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblBorders" type="CT_TblBorders" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shd" type="CT_Shd" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblLayout" type="CT_TblLayoutType" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblCellMar" type="CT_TblCellMar" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblLook" type="CT_TblLook" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TblPrEx">
    <xsd:complexContent>
      <xsd:extension base="CT_TblPrExBase">
        <xsd:sequence>
          <xsd:element name="tblPrExChange" type="CT_TblPrExChange" minOccurs="0"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_Tbl">
    <xsd:sequence>
      <xsd:group ref="EG_RangeMarkupElements" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="tblPr" type="CT_TblPr"/>
      <xsd:element name="tblGrid" type="CT_TblGrid"/>
      <xsd:group ref="EG_ContentRowContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TblLook">
    <xsd:attribute name="firstRow" type="s:ST_OnOff"/>
    <xsd:attribute name="lastRow" type="s:ST_OnOff"/>
    <xsd:attribute name="firstColumn" type="s:ST_OnOff"/>
    <xsd:attribute name="lastColumn" type="s:ST_OnOff"/>
    <xsd:attribute name="noHBand" type="s:ST_OnOff"/>
    <xsd:attribute name="noVBand" type="s:ST_OnOff"/>
    <xsd:attribute name="val" type="ST_ShortHexNumber"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FtnPos">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="pageBottom"/>
      <xsd:enumeration value="beneathText"/>
      <xsd:enumeration value="sectEnd"/>
      <xsd:enumeration value="docEnd"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FtnPos">
    <xsd:attribute name="val" type="ST_FtnPos" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_EdnPos">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="sectEnd"/>
      <xsd:enumeration value="docEnd"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_EdnPos">
    <xsd:attribute name="val" type="ST_EdnPos" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NumFmt">
    <xsd:attribute name="val" type="ST_NumberFormat" use="required"/>
    <xsd:attribute name="format" type="s:ST_String" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_RestartNumber">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="continuous"/>
      <xsd:enumeration value="eachSect"/>
      <xsd:enumeration value="eachPage"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_NumRestart">
    <xsd:attribute name="val" type="ST_RestartNumber" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FtnEdnRef">
    <xsd:attribute name="customMarkFollows" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="id" use="required" type="ST_DecimalNumber"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FtnEdnSepRef">
    <xsd:attribute name="id" type="ST_DecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FtnEdn">
    <xsd:sequence>
      <xsd:group ref="EG_BlockLevelElts" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_FtnEdn" use="optional"/>
    <xsd:attribute name="id" type="ST_DecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:group name="EG_FtnEdnNumProps">
    <xsd:sequence>
      <xsd:element name="numStart" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="numRestart" type="CT_NumRestart" minOccurs="0"/>
    </xsd:sequence>
  </xsd:group>
  <xsd:complexType name="CT_FtnProps">
    <xsd:sequence>
      <xsd:element name="pos" type="CT_FtnPos" minOccurs="0"/>
      <xsd:element name="numFmt" type="CT_NumFmt" minOccurs="0"/>
      <xsd:group ref="EG_FtnEdnNumProps" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_EdnProps">
    <xsd:sequence>
      <xsd:element name="pos" type="CT_EdnPos" minOccurs="0"/>
      <xsd:element name="numFmt" type="CT_NumFmt" minOccurs="0"/>
      <xsd:group ref="EG_FtnEdnNumProps" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_FtnDocProps">
    <xsd:complexContent>
      <xsd:extension base="CT_FtnProps">
        <xsd:sequence>
          <xsd:element name="footnote" type="CT_FtnEdnSepRef" minOccurs="0" maxOccurs="3"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_EdnDocProps">
    <xsd:complexContent>
      <xsd:extension base="CT_EdnProps">
        <xsd:sequence>
          <xsd:element name="endnote" type="CT_FtnEdnSepRef" minOccurs="0" maxOccurs="3"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_RecipientData">
    <xsd:sequence>
      <xsd:element name="active" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="column" type="CT_DecimalNumber" minOccurs="1"/>
      <xsd:element name="uniqueTag" type="CT_Base64Binary" minOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Base64Binary">
    <xsd:attribute name="val" type="xsd:base64Binary" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Recipients">
    <xsd:sequence>
      <xsd:element name="recipientData" type="CT_RecipientData" minOccurs="1" maxOccurs="unbounded"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="recipients" type="CT_Recipients"/>
  <xsd:complexType name="CT_OdsoFieldMapData">
    <xsd:sequence>
      <xsd:element name="type" type="CT_MailMergeOdsoFMDFieldType" minOccurs="0"/>
      <xsd:element name="name" type="CT_String" minOccurs="0"/>
      <xsd:element name="mappedName" type="CT_String" minOccurs="0"/>
      <xsd:element name="column" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="lid" type="CT_Lang" minOccurs="0"/>
      <xsd:element name="dynamicAddress" type="CT_OnOff" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_MailMergeSourceType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="database"/>
      <xsd:enumeration value="addressBook"/>
      <xsd:enumeration value="document1"/>
      <xsd:enumeration value="document2"/>
      <xsd:enumeration value="text"/>
      <xsd:enumeration value="email"/>
      <xsd:enumeration value="native"/>
      <xsd:enumeration value="legacy"/>
      <xsd:enumeration value="master"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MailMergeSourceType">
    <xsd:attribute name="val" use="required" type="ST_MailMergeSourceType"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Odso">
    <xsd:sequence>
      <xsd:element name="udl" type="CT_String" minOccurs="0"/>
      <xsd:element name="table" type="CT_String" minOccurs="0"/>
      <xsd:element name="src" type="CT_Rel" minOccurs="0"/>
      <xsd:element name="colDelim" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="type" type="CT_MailMergeSourceType" minOccurs="0"/>
      <xsd:element name="fHdr" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="fieldMapData" type="CT_OdsoFieldMapData" minOccurs="0"
        maxOccurs="unbounded"/>
      <xsd:element name="recipientData" type="CT_Rel" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_MailMerge">
    <xsd:sequence>
      <xsd:element name="mainDocumentType" type="CT_MailMergeDocType" minOccurs="1"/>
      <xsd:element name="linkToQuery" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="dataType" type="CT_MailMergeDataType" minOccurs="1"/>
      <xsd:element name="connectString" type="CT_String" minOccurs="0"/>
      <xsd:element name="query" type="CT_String" minOccurs="0"/>
      <xsd:element name="dataSource" type="CT_Rel" minOccurs="0"/>
      <xsd:element name="headerSource" type="CT_Rel" minOccurs="0"/>
      <xsd:element name="doNotSuppressBlankLines" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="destination" type="CT_MailMergeDest" minOccurs="0"/>
      <xsd:element name="addressFieldName" type="CT_String" minOccurs="0"/>
      <xsd:element name="mailSubject" type="CT_String" minOccurs="0"/>
      <xsd:element name="mailAsAttachment" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="viewMergedData" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="activeRecord" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="checkErrors" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="odso" type="CT_Odso" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TargetScreenSz">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="544x376"/>
      <xsd:enumeration value="640x480"/>
      <xsd:enumeration value="720x512"/>
      <xsd:enumeration value="800x600"/>
      <xsd:enumeration value="1024x768"/>
      <xsd:enumeration value="1152x882"/>
      <xsd:enumeration value="1152x900"/>
      <xsd:enumeration value="1280x1024"/>
      <xsd:enumeration value="1600x1200"/>
      <xsd:enumeration value="1800x1440"/>
      <xsd:enumeration value="1920x1200"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TargetScreenSz">
    <xsd:attribute name="val" type="ST_TargetScreenSz" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Compat">
    <xsd:sequence>
      <xsd:element name="useSingleBorderforContiguousCells" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="wpJustification" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="noTabHangInd" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="noLeading" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="spaceForUL" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="noColumnBalance" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="balanceSingleByteDoubleByteWidth" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="noExtraLineSpacing" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotLeaveBackslashAlone" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="ulTrailSpace" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotExpandShiftReturn" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="spacingInWholePoints" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="lineWrapLikeWord6" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="printBodyTextBeforeHeader" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="printColBlack" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="wpSpaceWidth" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="showBreaksInFrames" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="subFontBySize" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="suppressBottomSpacing" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="suppressTopSpacing" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="suppressSpacingAtTopOfPage" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="suppressTopSpacingWP" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="suppressSpBfAfterPgBrk" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="swapBordersFacingPages" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="convMailMergeEsc" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="truncateFontHeightsLikeWP6" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="mwSmallCaps" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="usePrinterMetrics" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotSuppressParagraphBorders" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="wrapTrailSpaces" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="footnoteLayoutLikeWW8" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="shapeLayoutLikeWW8" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="alignTablesRowByRow" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="forgetLastTabAlignment" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="adjustLineHeightInTable" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="autoSpaceLikeWord95" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="noSpaceRaiseLower" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotUseHTMLParagraphAutoSpacing" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="layoutRawTableWidth" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="layoutTableRowsApart" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="useWord97LineBreakRules" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotBreakWrappedTables" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotSnapToGridInCell" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="selectFldWithFirstOrLastChar" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="applyBreakingRules" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotWrapTextWithPunct" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotUseEastAsianBreakRules" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="useWord2002TableStyleRules" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="growAutofit" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="useFELayout" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="useNormalStyleForList" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotUseIndentAsNumberingTabStop" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="useAltKinsokuLineBreakRules" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="allowSpaceOfSameStyleInTable" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotSuppressIndentation" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotAutofitConstrainedTables" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="autofitToFirstFixedWidthCell" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="underlineTabInNumList" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="displayHangulFixedWidth" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="splitPgBreakAndParaMark" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotVertAlignCellWithSp" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotBreakConstrainedForcedTable" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotVertAlignInTxbx" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="useAnsiKerningPairs" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="cachedColBalance" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="compatSetting" type="CT_CompatSetting" minOccurs="0" maxOccurs="unbounded"
      />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_CompatSetting">
    <xsd:attribute name="name" type="s:ST_String"/>
    <xsd:attribute name="uri" type="s:ST_String"/>
    <xsd:attribute name="val" type="s:ST_String"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DocVar">
    <xsd:attribute name="name" type="s:ST_String" use="required"/>
    <xsd:attribute name="val" type="s:ST_String" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DocVars">
    <xsd:sequence>
      <xsd:element name="docVar" type="CT_DocVar" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DocRsids">
    <xsd:sequence>
      <xsd:element name="rsidRoot" type="CT_LongHexNumber" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rsid" type="CT_LongHexNumber" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_CharacterSpacing">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="doNotCompress"/>
      <xsd:enumeration value="compressPunctuation"/>
      <xsd:enumeration value="compressPunctuationAndJapaneseKana"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_CharacterSpacing">
    <xsd:attribute name="val" type="ST_CharacterSpacing" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_SaveThroughXslt">
    <xsd:attribute ref="r:id" use="optional"/>
    <xsd:attribute name="solutionID" type="s:ST_String" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_RPrDefault">
    <xsd:sequence>
      <xsd:element name="rPr" type="CT_RPr" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_PPrDefault">
    <xsd:sequence>
      <xsd:element name="pPr" type="CT_PPrGeneral" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DocDefaults">
    <xsd:sequence>
      <xsd:element name="rPrDefault" type="CT_RPrDefault" minOccurs="0"/>
      <xsd:element name="pPrDefault" type="CT_PPrDefault" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_WmlColorSchemeIndex">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="dark1"/>
      <xsd:enumeration value="light1"/>
      <xsd:enumeration value="dark2"/>
      <xsd:enumeration value="light2"/>
      <xsd:enumeration value="accent1"/>
      <xsd:enumeration value="accent2"/>
      <xsd:enumeration value="accent3"/>
      <xsd:enumeration value="accent4"/>
      <xsd:enumeration value="accent5"/>
      <xsd:enumeration value="accent6"/>
      <xsd:enumeration value="hyperlink"/>
      <xsd:enumeration value="followedHyperlink"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_ColorSchemeMapping">
    <xsd:attribute name="bg1" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="t1" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="bg2" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="t2" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="accent1" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="accent2" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="accent3" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="accent4" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="accent5" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="accent6" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="hyperlink" type="ST_WmlColorSchemeIndex"/>
    <xsd:attribute name="followedHyperlink" type="ST_WmlColorSchemeIndex"/>
  </xsd:complexType>
  <xsd:complexType name="CT_ReadingModeInkLockDown">
    <xsd:attribute name="actualPg" type="s:ST_OnOff" use="required"/>
    <xsd:attribute name="w" type="ST_PixelsMeasure" use="required"/>
    <xsd:attribute name="h" type="ST_PixelsMeasure" use="required"/>
    <xsd:attribute name="fontSz" type="ST_DecimalNumberOrPercent" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_WriteProtection">
    <xsd:attribute name="recommended" type="s:ST_OnOff" use="optional"/>
    <xsd:attributeGroup ref="AG_Password"/>
    <xsd:attributeGroup ref="AG_TransitionalPassword"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Settings">
    <xsd:sequence>
      <xsd:element name="writeProtection" type="CT_WriteProtection" minOccurs="0"/>
      <xsd:element name="view" type="CT_View" minOccurs="0"/>
      <xsd:element name="zoom" type="CT_Zoom" minOccurs="0"/>
      <xsd:element name="removePersonalInformation" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="removeDateAndTime" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotDisplayPageBoundaries" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="displayBackgroundShape" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="printPostScriptOverText" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="printFractionalCharacterWidth" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="printFormsData" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="embedTrueTypeFonts" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="embedSystemFonts" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="saveSubsetFonts" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="saveFormsData" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="mirrorMargins" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="alignBordersAndEdges" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="bordersDoNotSurroundHeader" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="bordersDoNotSurroundFooter" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="gutterAtTop" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="hideSpellingErrors" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="hideGrammaticalErrors" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="activeWritingStyle" type="CT_WritingStyle" minOccurs="0"
        maxOccurs="unbounded"/>
      <xsd:element name="proofState" type="CT_Proof" minOccurs="0"/>
      <xsd:element name="formsDesign" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="attachedTemplate" type="CT_Rel" minOccurs="0"/>
      <xsd:element name="linkStyles" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="stylePaneFormatFilter" type="CT_StylePaneFilter" minOccurs="0"/>
      <xsd:element name="stylePaneSortMethod" type="CT_StyleSort" minOccurs="0"/>
      <xsd:element name="documentType" type="CT_DocType" minOccurs="0"/>
      <xsd:element name="mailMerge" type="CT_MailMerge" minOccurs="0"/>
      <xsd:element name="revisionView" type="CT_TrackChangesView" minOccurs="0"/>
      <xsd:element name="trackRevisions" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotTrackMoves" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotTrackFormatting" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="documentProtection" type="CT_DocProtect" minOccurs="0"/>
      <xsd:element name="autoFormatOverride" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="styleLockTheme" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="styleLockQFSet" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="defaultTabStop" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="autoHyphenation" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="consecutiveHyphenLimit" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="hyphenationZone" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="doNotHyphenateCaps" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="showEnvelope" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="summaryLength" type="CT_DecimalNumberOrPrecent" minOccurs="0"/>
      <xsd:element name="clickAndTypeStyle" type="CT_String" minOccurs="0"/>
      <xsd:element name="defaultTableStyle" type="CT_String" minOccurs="0"/>
      <xsd:element name="evenAndOddHeaders" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="bookFoldRevPrinting" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="bookFoldPrinting" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="bookFoldPrintingSheets" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="drawingGridHorizontalSpacing" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="drawingGridVerticalSpacing" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="displayHorizontalDrawingGridEvery" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="displayVerticalDrawingGridEvery" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="doNotUseMarginsForDrawingGridOrigin" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="drawingGridHorizontalOrigin" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="drawingGridVerticalOrigin" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="doNotShadeFormData" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="noPunctuationKerning" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="characterSpacingControl" type="CT_CharacterSpacing" minOccurs="0"/>
      <xsd:element name="printTwoOnOne" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="strictFirstAndLastChars" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="noLineBreaksAfter" type="CT_Kinsoku" minOccurs="0"/>
      <xsd:element name="noLineBreaksBefore" type="CT_Kinsoku" minOccurs="0"/>
      <xsd:element name="savePreviewPicture" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotValidateAgainstSchema" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="saveInvalidXml" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="ignoreMixedContent" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="alwaysShowPlaceholderText" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotDemarcateInvalidXml" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="saveXmlDataOnly" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="useXSLTWhenSaving" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="saveThroughXslt" type="CT_SaveThroughXslt" minOccurs="0"/>
      <xsd:element name="showXMLTags" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="alwaysMergeEmptyNamespace" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="updateFields" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="hdrShapeDefaults" type="CT_ShapeDefaults" minOccurs="0"/>
      <xsd:element name="footnotePr" type="CT_FtnDocProps" minOccurs="0"/>
      <xsd:element name="endnotePr" type="CT_EdnDocProps" minOccurs="0"/>
      <xsd:element name="compat" type="CT_Compat" minOccurs="0"/>
      <xsd:element name="docVars" type="CT_DocVars" minOccurs="0"/>
      <xsd:element name="rsids" type="CT_DocRsids" minOccurs="0"/>
      <xsd:element ref="m:mathPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="attachedSchema" type="CT_String" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="themeFontLang" type="CT_Language" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="clrSchemeMapping" type="CT_ColorSchemeMapping" minOccurs="0"/>
      <xsd:element name="doNotIncludeSubdocsInStats" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotAutoCompressPictures" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="forceUpgrade" type="CT_Empty" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="captions" type="CT_Captions" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="readModeInkLockDown" type="CT_ReadingModeInkLockDown" minOccurs="0"/>
      <xsd:element name="smartTagType" type="CT_SmartTagType" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element ref="sl:schemaLibrary" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="shapeDefaults" type="CT_ShapeDefaults" minOccurs="0"/>
      <xsd:element name="doNotEmbedSmartTags" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="decimalSymbol" type="CT_String" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="listSeparator" type="CT_String" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_StyleSort">
    <xsd:attribute name="val" type="ST_StyleSort" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_StylePaneFilter">
    <xsd:attribute name="allStyles" type="s:ST_OnOff"/>
    <xsd:attribute name="customStyles" type="s:ST_OnOff"/>
    <xsd:attribute name="latentStyles" type="s:ST_OnOff"/>
    <xsd:attribute name="stylesInUse" type="s:ST_OnOff"/>
    <xsd:attribute name="headingStyles" type="s:ST_OnOff"/>
    <xsd:attribute name="numberingStyles" type="s:ST_OnOff"/>
    <xsd:attribute name="tableStyles" type="s:ST_OnOff"/>
    <xsd:attribute name="directFormattingOnRuns" type="s:ST_OnOff"/>
    <xsd:attribute name="directFormattingOnParagraphs" type="s:ST_OnOff"/>
    <xsd:attribute name="directFormattingOnNumbering" type="s:ST_OnOff"/>
    <xsd:attribute name="directFormattingOnTables" type="s:ST_OnOff"/>
    <xsd:attribute name="clearFormatting" type="s:ST_OnOff"/>
    <xsd:attribute name="top3HeadingStyles" type="s:ST_OnOff"/>
    <xsd:attribute name="visibleStyles" type="s:ST_OnOff"/>
    <xsd:attribute name="alternateStyleNames" type="s:ST_OnOff"/>
    <xsd:attribute name="val" type="ST_ShortHexNumber"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_StyleSort">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="name"/>
      <xsd:enumeration value="priority"/>
      <xsd:enumeration value="default"/>
      <xsd:enumeration value="font"/>
      <xsd:enumeration value="basedOn"/>
      <xsd:enumeration value="type"/>
      <xsd:enumeration value="0000"/>
      <xsd:enumeration value="0001"/>
      <xsd:enumeration value="0002"/>
      <xsd:enumeration value="0003"/>
      <xsd:enumeration value="0004"/>
      <xsd:enumeration value="0005"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_WebSettings">
    <xsd:sequence>
      <xsd:element name="frameset" type="CT_Frameset" minOccurs="0"/>
      <xsd:element name="divs" type="CT_Divs" minOccurs="0"/>
      <xsd:element name="encoding" type="CT_String" minOccurs="0"/>
      <xsd:element name="optimizeForBrowser" type="CT_OptimizeForBrowser" minOccurs="0"/>
      <xsd:element name="relyOnVML" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="allowPNG" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotRelyOnCSS" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotSaveAsSingleFile" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotOrganizeInFolder" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="doNotUseLongFileNames" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="pixelsPerInch" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="targetScreenSz" type="CT_TargetScreenSz" minOccurs="0"/>
      <xsd:element name="saveSmartTagsAsXml" type="CT_OnOff" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_FrameScrollbar">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="on"/>
      <xsd:enumeration value="off"/>
      <xsd:enumeration value="auto"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FrameScrollbar">
    <xsd:attribute name="val" type="ST_FrameScrollbar" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_OptimizeForBrowser">
    <xsd:complexContent>
      <xsd:extension base="CT_OnOff">
        <xsd:attribute name="target" type="s:ST_String" use="optional"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_Frame">
    <xsd:sequence>
      <xsd:element name="sz" type="CT_String" minOccurs="0"/>
      <xsd:element name="name" type="CT_String" minOccurs="0"/>
      <xsd:element name="title" type="CT_String" minOccurs="0"/>
      <xsd:element name="longDesc" type="CT_Rel" minOccurs="0"/>
      <xsd:element name="sourceFileName" type="CT_Rel" minOccurs="0"/>
      <xsd:element name="marW" type="CT_PixelsMeasure" minOccurs="0"/>
      <xsd:element name="marH" type="CT_PixelsMeasure" minOccurs="0"/>
      <xsd:element name="scrollbar" type="CT_FrameScrollbar" minOccurs="0"/>
      <xsd:element name="noResizeAllowed" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="linkedToFile" type="CT_OnOff" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_FrameLayout">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="rows"/>
      <xsd:enumeration value="cols"/>
      <xsd:enumeration value="none"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FrameLayout">
    <xsd:attribute name="val" type="ST_FrameLayout" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FramesetSplitbar">
    <xsd:sequence>
      <xsd:element name="w" type="CT_TwipsMeasure" minOccurs="0"/>
      <xsd:element name="color" type="CT_Color" minOccurs="0"/>
      <xsd:element name="noBorder" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="flatBorders" type="CT_OnOff" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Frameset">
    <xsd:sequence>
      <xsd:element name="sz" type="CT_String" minOccurs="0"/>
      <xsd:element name="framesetSplitbar" type="CT_FramesetSplitbar" minOccurs="0"/>
      <xsd:element name="frameLayout" type="CT_FrameLayout" minOccurs="0"/>
      <xsd:element name="title" type="CT_String" minOccurs="0"/>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element name="frameset" type="CT_Frameset" minOccurs="0" maxOccurs="unbounded"/>
        <xsd:element name="frame" type="CT_Frame" minOccurs="0" maxOccurs="unbounded"/>
      </xsd:choice>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_NumPicBullet">
    <xsd:choice>
      <xsd:element name="pict" type="CT_Picture"/>
      <xsd:element name="drawing" type="CT_Drawing"/>
    </xsd:choice>
    <xsd:attribute name="numPicBulletId" type="ST_DecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_LevelSuffix">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="tab"/>
      <xsd:enumeration value="space"/>
      <xsd:enumeration value="nothing"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_LevelSuffix">
    <xsd:attribute name="val" type="ST_LevelSuffix" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LevelText">
    <xsd:attribute name="val" type="s:ST_String" use="optional"/>
    <xsd:attribute name="null" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LvlLegacy">
    <xsd:attribute name="legacy" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="legacySpace" type="s:ST_TwipsMeasure" use="optional"/>
    <xsd:attribute name="legacyIndent" type="ST_SignedTwipsMeasure" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Lvl">
    <xsd:sequence>
      <xsd:element name="start" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="numFmt" type="CT_NumFmt" minOccurs="0"/>
      <xsd:element name="lvlRestart" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="pStyle" type="CT_String" minOccurs="0"/>
      <xsd:element name="isLgl" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="suff" type="CT_LevelSuffix" minOccurs="0"/>
      <xsd:element name="lvlText" type="CT_LevelText" minOccurs="0"/>
      <xsd:element name="lvlPicBulletId" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="legacy" type="CT_LvlLegacy" minOccurs="0"/>
      <xsd:element name="lvlJc" type="CT_Jc" minOccurs="0"/>
      <xsd:element name="pPr" type="CT_PPrGeneral" minOccurs="0"/>
      <xsd:element name="rPr" type="CT_RPr" minOccurs="0"/>
    </xsd:sequence>
    <xsd:attribute name="ilvl" type="ST_DecimalNumber" use="required"/>
    <xsd:attribute name="tplc" type="ST_LongHexNumber" use="optional"/>
    <xsd:attribute name="tentative" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_MultiLevelType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="singleLevel"/>
      <xsd:enumeration value="multilevel"/>
      <xsd:enumeration value="hybridMultilevel"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_MultiLevelType">
    <xsd:attribute name="val" type="ST_MultiLevelType" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AbstractNum">
    <xsd:sequence>
      <xsd:element name="nsid" type="CT_LongHexNumber" minOccurs="0"/>
      <xsd:element name="multiLevelType" type="CT_MultiLevelType" minOccurs="0"/>
      <xsd:element name="tmpl" type="CT_LongHexNumber" minOccurs="0"/>
      <xsd:element name="name" type="CT_String" minOccurs="0"/>
      <xsd:element name="styleLink" type="CT_String" minOccurs="0"/>
      <xsd:element name="numStyleLink" type="CT_String" minOccurs="0"/>
      <xsd:element name="lvl" type="CT_Lvl" minOccurs="0" maxOccurs="9"/>
    </xsd:sequence>
    <xsd:attribute name="abstractNumId" type="ST_DecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_NumLvl">
    <xsd:sequence>
      <xsd:element name="startOverride" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="lvl" type="CT_Lvl" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="ilvl" type="ST_DecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Num">
    <xsd:sequence>
      <xsd:element name="abstractNumId" type="CT_DecimalNumber" minOccurs="1"/>
      <xsd:element name="lvlOverride" type="CT_NumLvl" minOccurs="0" maxOccurs="9"/>
    </xsd:sequence>
    <xsd:attribute name="numId" type="ST_DecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Numbering">
    <xsd:sequence>
      <xsd:element name="numPicBullet" type="CT_NumPicBullet" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="abstractNum" type="CT_AbstractNum" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="num" type="CT_Num" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="numIdMacAtCleanup" type="CT_DecimalNumber" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:simpleType name="ST_TblStyleOverrideType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="wholeTable"/>
      <xsd:enumeration value="firstRow"/>
      <xsd:enumeration value="lastRow"/>
      <xsd:enumeration value="firstCol"/>
      <xsd:enumeration value="lastCol"/>
      <xsd:enumeration value="band1Vert"/>
      <xsd:enumeration value="band2Vert"/>
      <xsd:enumeration value="band1Horz"/>
      <xsd:enumeration value="band2Horz"/>
      <xsd:enumeration value="neCell"/>
      <xsd:enumeration value="nwCell"/>
      <xsd:enumeration value="seCell"/>
      <xsd:enumeration value="swCell"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_TblStylePr">
    <xsd:sequence>
      <xsd:element name="pPr" type="CT_PPrGeneral" minOccurs="0"/>
      <xsd:element name="rPr" type="CT_RPr" minOccurs="0"/>
      <xsd:element name="tblPr" type="CT_TblPrBase" minOccurs="0"/>
      <xsd:element name="trPr" type="CT_TrPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tcPr" type="CT_TcPr" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_TblStyleOverrideType" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_StyleType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="paragraph"/>
      <xsd:enumeration value="character"/>
      <xsd:enumeration value="table"/>
      <xsd:enumeration value="numbering"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Style">
    <xsd:sequence>
      <xsd:element name="name" type="CT_String" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="aliases" type="CT_String" minOccurs="0"/>
      <xsd:element name="basedOn" type="CT_String" minOccurs="0"/>
      <xsd:element name="next" type="CT_String" minOccurs="0"/>
      <xsd:element name="link" type="CT_String" minOccurs="0"/>
      <xsd:element name="autoRedefine" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="hidden" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="uiPriority" type="CT_DecimalNumber" minOccurs="0"/>
      <xsd:element name="semiHidden" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="unhideWhenUsed" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="qFormat" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="locked" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="personal" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="personalCompose" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="personalReply" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="rsid" type="CT_LongHexNumber" minOccurs="0"/>
      <xsd:element name="pPr" type="CT_PPrGeneral" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="rPr" type="CT_RPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblPr" type="CT_TblPrBase" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="trPr" type="CT_TrPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tcPr" type="CT_TcPr" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="tblStylePr" type="CT_TblStylePr" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="type" type="ST_StyleType" use="optional"/>
    <xsd:attribute name="styleId" type="s:ST_String" use="optional"/>
    <xsd:attribute name="default" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="customStyle" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LsdException">
    <xsd:attribute name="name" type="s:ST_String" use="required"/>
    <xsd:attribute name="locked" type="s:ST_OnOff"/>
    <xsd:attribute name="uiPriority" type="ST_DecimalNumber"/>
    <xsd:attribute name="semiHidden" type="s:ST_OnOff"/>
    <xsd:attribute name="unhideWhenUsed" type="s:ST_OnOff"/>
    <xsd:attribute name="qFormat" type="s:ST_OnOff"/>
  </xsd:complexType>
  <xsd:complexType name="CT_LatentStyles">
    <xsd:sequence>
      <xsd:element name="lsdException" type="CT_LsdException" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="defLockedState" type="s:ST_OnOff"/>
    <xsd:attribute name="defUIPriority" type="ST_DecimalNumber"/>
    <xsd:attribute name="defSemiHidden" type="s:ST_OnOff"/>
    <xsd:attribute name="defUnhideWhenUsed" type="s:ST_OnOff"/>
    <xsd:attribute name="defQFormat" type="s:ST_OnOff"/>
    <xsd:attribute name="count" type="ST_DecimalNumber"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Styles">
    <xsd:sequence>
      <xsd:element name="docDefaults" type="CT_DocDefaults" minOccurs="0"/>
      <xsd:element name="latentStyles" type="CT_LatentStyles" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="style" type="CT_Style" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Panose">
    <xsd:attribute name="val" type="s:ST_Panose" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_FontFamily">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="decorative"/>
      <xsd:enumeration value="modern"/>
      <xsd:enumeration value="roman"/>
      <xsd:enumeration value="script"/>
      <xsd:enumeration value="swiss"/>
      <xsd:enumeration value="auto"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_FontFamily">
    <xsd:attribute name="val" type="ST_FontFamily" use="required"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_Pitch">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="fixed"/>
      <xsd:enumeration value="variable"/>
      <xsd:enumeration value="default"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Pitch">
    <xsd:attribute name="val" type="ST_Pitch" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FontSig">
    <xsd:attribute name="usb0" use="required" type="ST_LongHexNumber"/>
    <xsd:attribute name="usb1" use="required" type="ST_LongHexNumber"/>
    <xsd:attribute name="usb2" use="required" type="ST_LongHexNumber"/>
    <xsd:attribute name="usb3" use="required" type="ST_LongHexNumber"/>
    <xsd:attribute name="csb0" use="required" type="ST_LongHexNumber"/>
    <xsd:attribute name="csb1" use="required" type="ST_LongHexNumber"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FontRel">
    <xsd:complexContent>
      <xsd:extension base="CT_Rel">
        <xsd:attribute name="fontKey" type="s:ST_Guid"/>
        <xsd:attribute name="subsetted" type="s:ST_OnOff"/>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_Font">
    <xsd:sequence>
      <xsd:element name="altName" type="CT_String" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="panose1" type="CT_Panose" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="charset" type="CT_Charset" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="family" type="CT_FontFamily" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="notTrueType" type="CT_OnOff" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="pitch" type="CT_Pitch" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="sig" type="CT_FontSig" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="embedRegular" type="CT_FontRel" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="embedBold" type="CT_FontRel" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="embedItalic" type="CT_FontRel" minOccurs="0" maxOccurs="1"/>
      <xsd:element name="embedBoldItalic" type="CT_FontRel" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
    <xsd:attribute name="name" type="s:ST_String" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_FontsList">
    <xsd:sequence>
      <xsd:element name="font" type="CT_Font" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DivBdr">
    <xsd:sequence>
      <xsd:element name="top" type="CT_Border" minOccurs="0"/>
      <xsd:element name="left" type="CT_Border" minOccurs="0"/>
      <xsd:element name="bottom" type="CT_Border" minOccurs="0"/>
      <xsd:element name="right" type="CT_Border" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Div">
    <xsd:sequence>
      <xsd:element name="blockQuote" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="bodyDiv" type="CT_OnOff" minOccurs="0"/>
      <xsd:element name="marLeft" type="CT_SignedTwipsMeasure"/>
      <xsd:element name="marRight" type="CT_SignedTwipsMeasure"/>
      <xsd:element name="marTop" type="CT_SignedTwipsMeasure"/>
      <xsd:element name="marBottom" type="CT_SignedTwipsMeasure"/>
      <xsd:element name="divBdr" type="CT_DivBdr" minOccurs="0"/>
      <xsd:element name="divsChild" type="CT_Divs" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
    <xsd:attribute name="id" type="ST_DecimalNumber" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_Divs">
    <xsd:sequence minOccurs="1" maxOccurs="unbounded">
      <xsd:element name="div" type="CT_Div"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_TxbxContent">
    <xsd:group ref="EG_BlockLevelElts" minOccurs="1" maxOccurs="unbounded"/>
  </xsd:complexType>
  <xsd:element name="txbxContent" type="CT_TxbxContent"/>
  <xsd:group name="EG_MathContent">
    <xsd:choice>
      <xsd:element ref="m:oMathPara"/>
      <xsd:element ref="m:oMath"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_BlockLevelChunkElts">
    <xsd:choice>
      <xsd:group ref="EG_ContentBlockContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_BlockLevelElts">
    <xsd:choice>
      <xsd:group ref="EG_BlockLevelChunkElts" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="altChunk" type="CT_AltChunk" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:group name="EG_RunLevelElts">
    <xsd:choice>
      <xsd:element name="proofErr" minOccurs="0" type="CT_ProofErr"/>
      <xsd:element name="permStart" minOccurs="0" type="CT_PermStart"/>
      <xsd:element name="permEnd" minOccurs="0" type="CT_Perm"/>
      <xsd:group ref="EG_RangeMarkupElements" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="ins" type="CT_RunTrackChange" minOccurs="0"/>
      <xsd:element name="del" type="CT_RunTrackChange" minOccurs="0"/>
      <xsd:element name="moveFrom" type="CT_RunTrackChange"/>
      <xsd:element name="moveTo" type="CT_RunTrackChange"/>
      <xsd:group ref="EG_MathContent" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:group>
  <xsd:complexType name="CT_Body">
    <xsd:sequence>
      <xsd:group ref="EG_BlockLevelElts" minOccurs="0" maxOccurs="unbounded"/>
      <xsd:element name="sectPr" minOccurs="0" maxOccurs="1" type="CT_SectPr"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_ShapeDefaults">
    <xsd:choice maxOccurs="unbounded">
      <xsd:any processContents="lax" namespace="urn:schemas-microsoft-com:office:office"
        minOccurs="0" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:complexType name="CT_Comments">
    <xsd:sequence>
      <xsd:element name="comment" type="CT_Comment" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="comments" type="CT_Comments"/>
  <xsd:complexType name="CT_Footnotes">
    <xsd:sequence maxOccurs="unbounded">
      <xsd:element name="footnote" type="CT_FtnEdn" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="footnotes" type="CT_Footnotes"/>
  <xsd:complexType name="CT_Endnotes">
    <xsd:sequence maxOccurs="unbounded">
      <xsd:element name="endnote" type="CT_FtnEdn" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="endnotes" type="CT_Endnotes"/>
  <xsd:element name="hdr" type="CT_HdrFtr"/>
  <xsd:element name="ftr" type="CT_HdrFtr"/>
  <xsd:complexType name="CT_SmartTagType">
    <xsd:attribute name="namespaceuri" type="s:ST_String"/>
    <xsd:attribute name="name" type="s:ST_String"/>
    <xsd:attribute name="url" type="s:ST_String"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_ThemeColor">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="dark1"/>
      <xsd:enumeration value="light1"/>
      <xsd:enumeration value="dark2"/>
      <xsd:enumeration value="light2"/>
      <xsd:enumeration value="accent1"/>
      <xsd:enumeration value="accent2"/>
      <xsd:enumeration value="accent3"/>
      <xsd:enumeration value="accent4"/>
      <xsd:enumeration value="accent5"/>
      <xsd:enumeration value="accent6"/>
      <xsd:enumeration value="hyperlink"/>
      <xsd:enumeration value="followedHyperlink"/>
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="background1"/>
      <xsd:enumeration value="text1"/>
      <xsd:enumeration value="background2"/>
      <xsd:enumeration value="text2"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:simpleType name="ST_DocPartBehavior">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="content"/>
      <xsd:enumeration value="p"/>
      <xsd:enumeration value="pg"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DocPartBehavior">
    <xsd:attribute name="val" use="required" type="ST_DocPartBehavior"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DocPartBehaviors">
    <xsd:choice>
      <xsd:element name="behavior" type="CT_DocPartBehavior" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:simpleType name="ST_DocPartType">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="none"/>
      <xsd:enumeration value="normal"/>
      <xsd:enumeration value="autoExp"/>
      <xsd:enumeration value="toolbar"/>
      <xsd:enumeration value="speller"/>
      <xsd:enumeration value="formFld"/>
      <xsd:enumeration value="bbPlcHdr"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DocPartType">
    <xsd:attribute name="val" use="required" type="ST_DocPartType"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DocPartTypes">
    <xsd:choice>
      <xsd:element name="type" type="CT_DocPartType" maxOccurs="unbounded"/>
    </xsd:choice>
    <xsd:attribute name="all" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:simpleType name="ST_DocPartGallery">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="placeholder"/>
      <xsd:enumeration value="any"/>
      <xsd:enumeration value="default"/>
      <xsd:enumeration value="docParts"/>
      <xsd:enumeration value="coverPg"/>
      <xsd:enumeration value="eq"/>
      <xsd:enumeration value="ftrs"/>
      <xsd:enumeration value="hdrs"/>
      <xsd:enumeration value="pgNum"/>
      <xsd:enumeration value="tbls"/>
      <xsd:enumeration value="watermarks"/>
      <xsd:enumeration value="autoTxt"/>
      <xsd:enumeration value="txtBox"/>
      <xsd:enumeration value="pgNumT"/>
      <xsd:enumeration value="pgNumB"/>
      <xsd:enumeration value="pgNumMargins"/>
      <xsd:enumeration value="tblOfContents"/>
      <xsd:enumeration value="bib"/>
      <xsd:enumeration value="custQuickParts"/>
      <xsd:enumeration value="custCoverPg"/>
      <xsd:enumeration value="custEq"/>
      <xsd:enumeration value="custFtrs"/>
      <xsd:enumeration value="custHdrs"/>
      <xsd:enumeration value="custPgNum"/>
      <xsd:enumeration value="custTbls"/>
      <xsd:enumeration value="custWatermarks"/>
      <xsd:enumeration value="custAutoTxt"/>
      <xsd:enumeration value="custTxtBox"/>
      <xsd:enumeration value="custPgNumT"/>
      <xsd:enumeration value="custPgNumB"/>
      <xsd:enumeration value="custPgNumMargins"/>
      <xsd:enumeration value="custTblOfContents"/>
      <xsd:enumeration value="custBib"/>
      <xsd:enumeration value="custom1"/>
      <xsd:enumeration value="custom2"/>
      <xsd:enumeration value="custom3"/>
      <xsd:enumeration value="custom4"/>
      <xsd:enumeration value="custom5"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_DocPartGallery">
    <xsd:attribute name="val" type="ST_DocPartGallery" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DocPartCategory">
    <xsd:sequence>
      <xsd:element name="name" type="CT_String" minOccurs="1" maxOccurs="1"/>
      <xsd:element name="gallery" type="CT_DocPartGallery" minOccurs="1" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DocPartName">
    <xsd:attribute name="val" type="s:ST_String" use="required"/>
    <xsd:attribute name="decorated" type="s:ST_OnOff" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_DocPartPr">
    <xsd:all>
      <xsd:element name="name" type="CT_DocPartName" minOccurs="1"/>
      <xsd:element name="style" type="CT_String" minOccurs="0"/>
      <xsd:element name="category" type="CT_DocPartCategory" minOccurs="0"/>
      <xsd:element name="types" type="CT_DocPartTypes" minOccurs="0"/>
      <xsd:element name="behaviors" type="CT_DocPartBehaviors" minOccurs="0"/>
      <xsd:element name="description" type="CT_String" minOccurs="0"/>
      <xsd:element name="guid" type="CT_Guid" minOccurs="0"/>
    </xsd:all>
  </xsd:complexType>
  <xsd:complexType name="CT_DocPart">
    <xsd:sequence>
      <xsd:element name="docPartPr" type="CT_DocPartPr" minOccurs="0"/>
      <xsd:element name="docPartBody" type="CT_Body" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DocParts">
    <xsd:choice>
      <xsd:element name="docPart" type="CT_DocPart" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:choice>
  </xsd:complexType>
  <xsd:element name="settings" type="CT_Settings"/>
  <xsd:element name="webSettings" type="CT_WebSettings"/>
  <xsd:element name="fonts" type="CT_FontsList"/>
  <xsd:element name="numbering" type="CT_Numbering"/>
  <xsd:element name="styles" type="CT_Styles"/>
  <xsd:simpleType name="ST_CaptionPos">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="above"/>
      <xsd:enumeration value="below"/>
      <xsd:enumeration value="left"/>
      <xsd:enumeration value="right"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:complexType name="CT_Caption">
    <xsd:attribute name="name" type="s:ST_String" use="required"/>
    <xsd:attribute name="pos" type="ST_CaptionPos" use="optional"/>
    <xsd:attribute name="chapNum" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="heading" type="ST_DecimalNumber" use="optional"/>
    <xsd:attribute name="noLabel" type="s:ST_OnOff" use="optional"/>
    <xsd:attribute name="numFmt" type="ST_NumberFormat" use="optional"/>
    <xsd:attribute name="sep" type="ST_ChapterSep" use="optional"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AutoCaption">
    <xsd:attribute name="name" type="s:ST_String" use="required"/>
    <xsd:attribute name="caption" type="s:ST_String" use="required"/>
  </xsd:complexType>
  <xsd:complexType name="CT_AutoCaptions">
    <xsd:sequence>
      <xsd:element name="autoCaption" type="CT_AutoCaption" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Captions">
    <xsd:sequence>
      <xsd:element name="caption" type="CT_Caption" minOccurs="1" maxOccurs="unbounded"/>
      <xsd:element name="autoCaptions" type="CT_AutoCaptions" minOccurs="0" maxOccurs="1"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_DocumentBase">
    <xsd:sequence>
      <xsd:element name="background" type="CT_Background" minOccurs="0"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="CT_Document">
    <xsd:complexContent>
      <xsd:extension base="CT_DocumentBase">
        <xsd:sequence>
          <xsd:element name="body" type="CT_Body" minOccurs="0" maxOccurs="1"/>
        </xsd:sequence>
        <xsd:attribute name="conformance" type="s:ST_ConformanceClass"/>
        <xsd:attribute ref="mc:Ignorable" use="optional" />
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:complexType name="CT_GlossaryDocument">
    <xsd:complexContent>
      <xsd:extension base="CT_DocumentBase">
        <xsd:sequence>
          <xsd:element name="docParts" type="CT_DocParts" minOccurs="0"/>
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>
  <xsd:element name="document" type="CT_Document"/>
  <xsd:element name="glossaryDocument" type="CT_GlossaryDocument"/>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd
================================================
<?xml version='1.0'?>
<xs:schema targetNamespace="http://www.w3.org/XML/1998/namespace" xmlns:xs="http://www.w3.org/2001/XMLSchema" xml:lang="en">

 <xs:annotation>
  <xs:documentation>
   See http://www.w3.org/XML/1998/namespace.html and
   http://www.w3.org/TR/REC-xml for information about this namespace.

    This schema document describes the XML namespace, in a form
    suitable for import by other schema documents.  

    Note that local names in this namespace are intended to be defined
    only by the World Wide Web Consortium or its subgroups.  The
    following names are currently defined in this namespace and should
    not be used with conflicting semantics by any Working Group,
    specification, or document instance:

    base (as an attribute name): denotes an attribute whose value
         provides a URI to be used as the base for interpreting any
         relative URIs in the scope of the element on which it
         appears; its value is inherited.  This name is reserved
         by virtue of its definition in the XML Base specification.

    lang (as an attribute name): denotes an attribute whose value
         is a language code for the natural language of the content of
         any element; its value is inherited.  This name is reserved
         by virtue of its definition in the XML specification.
  
    space (as an attribute name): denotes an attribute whose
         value is a keyword indicating what whitespace processing
         discipline is intended for the content of the element; its
         value is inherited.  This name is reserved by virtue of its
         definition in the XML specification.

    Father (in any context at all): denotes Jon Bosak, the chair of 
         the original XML Working Group.  This name is reserved by 
         the following decision of the W3C XML Plenary and 
         XML Coordination groups:

             In appreciation for his vision, leadership and dedication
             the W3C XML Plenary on this 10th day of February, 2000
             reserves for Jon Bosak in perpetuity the XML name
             xml:Father
  </xs:documentation>
 </xs:annotation>

 <xs:annotation>
  <xs:documentation>This schema defines attributes and an attribute group
        suitable for use by
        schemas wishing to allow xml:base, xml:lang or xml:space attributes
        on elements they define.

        To enable this, such a schema must import this schema
        for the XML namespace, e.g. as follows:
        &lt;schema . . .>
         . . .
         &lt;import namespace="http://www.w3.org/XML/1998/namespace"
                    schemaLocation="http://www.w3.org/2001/03/xml.xsd"/>

        Subsequently, qualified reference to any of the attributes
        or the group defined below will have the desired effect, e.g.

        &lt;type . . .>
         . . .
         &lt;attributeGroup ref="xml:specialAttrs"/>
 
         will define a type which will schema-validate an instance
         element with any of those attributes</xs:documentation>
 </xs:annotation>

 <xs:annotation>
  <xs:documentation>In keeping with the XML Schema WG's standard versioning
   policy, this schema document will persist at
   http://www.w3.org/2001/03/xml.xsd.
   At the date of issue it can also be found at
   http://www.w3.org/2001/xml.xsd.
   The schema document at that URI may however change in the future,
   in order to remain compatible with the latest version of XML Schema
   itself.  In other words, if the XML Schema namespace changes, the version
   of this document at
   http://www.w3.org/2001/xml.xsd will change
   accordingly; the version at
   http://www.w3.org/2001/03/xml.xsd will not change.
  </xs:documentation>
 </xs:annotation>

 <xs:attribute name="lang" type="xs:language">
  <xs:annotation>
   <xs:documentation>In due course, we should install the relevant ISO 2- and 3-letter
         codes as the enumerated possible values . . .</xs:documentation>
  </xs:annotation>
 </xs:attribute>

 <xs:attribute name="space" default="preserve">
  <xs:simpleType>
   <xs:restriction base="xs:NCName">
    <xs:enumeration value="default"/>
    <xs:enumeration value="preserve"/>
   </xs:restriction>
  </xs:simpleType>
 </xs:attribute>

 <xs:attribute name="base" type="xs:anyURI">
  <xs:annotation>
   <xs:documentation>See http://www.w3.org/TR/xmlbase/ for
                     information about this attribute.</xs:documentation>
  </xs:annotation>
 </xs:attribute>

 <xs:attributeGroup name="specialAttrs">
  <xs:attribute ref="xml:base"/>
  <xs:attribute ref="xml:lang"/>
  <xs:attribute ref="xml:space"/>
 </xs:attributeGroup>

</xs:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd
================================================
﻿<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<xs:schema xmlns="http://schemas.openxmlformats.org/package/2006/content-types"
  xmlns:xs="http://www.w3.org/2001/XMLSchema"
  targetNamespace="http://schemas.openxmlformats.org/package/2006/content-types"
  elementFormDefault="qualified" attributeFormDefault="unqualified" blockDefault="#all">

  <xs:element name="Types" type="CT_Types"/>
  <xs:element name="Default" type="CT_Default"/>
  <xs:element name="Override" type="CT_Override"/>

  <xs:complexType name="CT_Types">
    <xs:choice minOccurs="0" maxOccurs="unbounded">
      <xs:element ref="Default"/>
      <xs:element ref="Override"/>
    </xs:choice>
  </xs:complexType>

  <xs:complexType name="CT_Default">
    <xs:attribute name="Extension" type="ST_Extension" use="required"/>
    <xs:attribute name="ContentType" type="ST_ContentType" use="required"/>
  </xs:complexType>

  <xs:complexType name="CT_Override">
    <xs:attribute name="ContentType" type="ST_ContentType" use="required"/>
    <xs:attribute name="PartName" type="xs:anyURI" use="required"/>
  </xs:complexType>

  <xs:simpleType name="ST_ContentType">
    <xs:restriction base="xs:string">
      <xs:pattern
        value="(((([\p{IsBasicLatin}-[\p{Cc}&#127;\(\)&lt;&gt;@,;:\\&quot;/\[\]\?=\{\}\s\t]])+))/((([\p{IsBasicLatin}-[\p{Cc}&#127;\(\)&lt;&gt;@,;:\\&quot;/\[\]\?=\{\}\s\t]])+))((\s+)*;(\s+)*(((([\p{IsBasicLatin}-[\p{Cc}&#127;\(\)&lt;&gt;@,;:\\&quot;/\[\]\?=\{\}\s\t]])+))=((([\p{IsBasicLatin}-[\p{Cc}&#127;\(\)&lt;&gt;@,;:\\&quot;/\[\]\?=\{\}\s\t]])+)|(&quot;(([\p{IsLatin-1Supplement}\p{IsBasicLatin}-[\p{Cc}&#127;&quot;\n\r]]|(\s+))|(\\[\p{IsBasicLatin}]))*&quot;))))*)"
      />
    </xs:restriction>
  </xs:simpleType>

  <xs:simpleType name="ST_Extension">
    <xs:restriction base="xs:string">
      <xs:pattern
        value="([!$&amp;'\(\)\*\+,:=]|(%[0-9a-fA-F][0-9a-fA-F])|[:@]|[a-zA-Z0-9\-_~])+"/>
    </xs:restriction>
  </xs:simpleType>
</xs:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd
================================================
﻿<?xml version="1.0" encoding="UTF-8"?>
<xs:schema targetNamespace="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
  xmlns="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
  xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/"
  xmlns:dcterms="http://purl.org/dc/terms/" elementFormDefault="qualified" blockDefault="#all">

  <xs:import namespace="http://purl.org/dc/elements/1.1/"
    schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dc.xsd"/>
  <xs:import namespace="http://purl.org/dc/terms/"
    schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dcterms.xsd"/>
  <xs:import id="xml" namespace="http://www.w3.org/XML/1998/namespace"/>

  <xs:element name="coreProperties" type="CT_CoreProperties"/>

  <xs:complexType name="CT_CoreProperties">
    <xs:all>
      <xs:element name="category" minOccurs="0" maxOccurs="1" type="xs:string"/>
      <xs:element name="contentStatus" minOccurs="0" maxOccurs="1" type="xs:string"/>
      <xs:element ref="dcterms:created" minOccurs="0" maxOccurs="1"/>
      <xs:element ref="dc:creator" minOccurs="0" maxOccurs="1"/>
      <xs:element ref="dc:description" minOccurs="0" maxOccurs="1"/>
      <xs:element ref="dc:identifier" minOccurs="0" maxOccurs="1"/>
      <xs:element name="keywords" minOccurs="0" maxOccurs="1" type="CT_Keywords"/>
      <xs:element ref="dc:language" minOccurs="0" maxOccurs="1"/>
      <xs:element name="lastModifiedBy" minOccurs="0" maxOccurs="1" type="xs:string"/>
      <xs:element name="lastPrinted" minOccurs="0" maxOccurs="1" type="xs:dateTime"/>
      <xs:element ref="dcterms:modified" minOccurs="0" maxOccurs="1"/>
      <xs:element name="revision" minOccurs="0" maxOccurs="1" type="xs:string"/>
      <xs:element ref="dc:subject" minOccurs="0" maxOccurs="1"/>
      <xs:element ref="dc:title" minOccurs="0" maxOccurs="1"/>
      <xs:element name="version" minOccurs="0" maxOccurs="1" type="xs:string"/>
    </xs:all>
  </xs:complexType>

  <xs:complexType name="CT_Keywords" mixed="true">
    <xs:sequence>
      <xs:element name="value" minOccurs="0" maxOccurs="unbounded" type="CT_Keyword"/>
    </xs:sequence>
    <xs:attribute ref="xml:lang" use="optional"/>
  </xs:complexType>

  <xs:complexType name="CT_Keyword">
    <xs:simpleContent>
      <xs:extension base="xs:string">
        <xs:attribute ref="xml:lang" use="optional"/>
      </xs:extension>
    </xs:simpleContent>
  </xs:complexType>

</xs:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd
================================================
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns="http://schemas.openxmlformats.org/package/2006/digital-signature"
  xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  targetNamespace="http://schemas.openxmlformats.org/package/2006/digital-signature"
  elementFormDefault="qualified" attributeFormDefault="unqualified" blockDefault="#all">

  <xsd:element name="SignatureTime" type="CT_SignatureTime"/>
  <xsd:element name="RelationshipReference" type="CT_RelationshipReference"/>
  <xsd:element name="RelationshipsGroupReference" type="CT_RelationshipsGroupReference"/>

  <xsd:complexType name="CT_SignatureTime">
    <xsd:sequence>
      <xsd:element name="Format" type="ST_Format"/>
      <xsd:element name="Value" type="ST_Value"/>
    </xsd:sequence>
  </xsd:complexType>

  <xsd:complexType name="CT_RelationshipReference">
    <xsd:simpleContent>
      <xsd:extension base="xsd:string">
        <xsd:attribute name="SourceId" type="xsd:string" use="required"/>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>

  <xsd:complexType name="CT_RelationshipsGroupReference">
    <xsd:simpleContent>
      <xsd:extension base="xsd:string">
        <xsd:attribute name="SourceType" type="xsd:anyURI" use="required"/>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>

  <xsd:simpleType name="ST_Format">
    <xsd:restriction base="xsd:string">
      <xsd:pattern
        value="(YYYY)|(YYYY-MM)|(YYYY-MM-DD)|(YYYY-MM-DDThh:mmTZD)|(YYYY-MM-DDThh:mm:ssTZD)|(YYYY-MM-DDThh:mm:ss.sTZD)"
      />
    </xsd:restriction>
  </xsd:simpleType>

  <xsd:simpleType name="ST_Value">
    <xsd:restriction base="xsd:string">
      <xsd:pattern
        value="(([0-9][0-9][0-9][0-9]))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2))))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2)))-((0[1-9])|(1[0-9])|(2[0-9])|(3(0|1))))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2)))-((0[1-9])|(1[0-9])|(2[0-9])|(3(0|1)))T((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9]))(((\+|-)((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])))|Z))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2)))-((0[1-9])|(1[0-9])|(2[0-9])|(3(0|1)))T((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9]))(((\+|-)((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])))|Z))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2)))-((0[1-9])|(1[0-9])|(2[0-9])|(3(0|1)))T((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])):(((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9]))\.[0-9])(((\+|-)((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])))|Z))"
      />
    </xsd:restriction>
  </xsd:simpleType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd
================================================
﻿<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<xsd:schema xmlns="http://schemas.openxmlformats.org/package/2006/relationships"
  xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  targetNamespace="http://schemas.openxmlformats.org/package/2006/relationships"
  elementFormDefault="qualified" attributeFormDefault="unqualified" blockDefault="#all">

  <xsd:element name="Relationships" type="CT_Relationships"/>
  <xsd:element name="Relationship" type="CT_Relationship"/>

  <xsd:complexType name="CT_Relationships">
    <xsd:sequence>
      <xsd:element ref="Relationship" minOccurs="0" maxOccurs="unbounded"/>
    </xsd:sequence>
  </xsd:complexType>

  <xsd:complexType name="CT_Relationship">
    <xsd:simpleContent>
      <xsd:extension base="xsd:string">
        <xsd:attribute name="TargetMode" type="ST_TargetMode" use="optional"/>
        <xsd:attribute name="Target" type="xsd:anyURI" use="required"/>
        <xsd:attribute name="Type" type="xsd:anyURI" use="required"/>
        <xsd:attribute name="Id" type="xsd:ID" use="required"/>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>

  <xsd:simpleType name="ST_TargetMode">
    <xsd:restriction base="xsd:string">
      <xsd:enumeration value="External"/>
      <xsd:enumeration value="Internal"/>
    </xsd:restriction>
  </xsd:simpleType>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/mce/mc.xsd
================================================
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
	attributeFormDefault="unqualified" elementFormDefault="qualified"
	targetNamespace="http://schemas.openxmlformats.org/markup-compatibility/2006"
	xmlns:xsd="http://www.w3.org/2001/XMLSchema">

  <!--
    This XSD is a modified version of the one found at:
    https://github.com/plutext/docx4j/blob/master/xsd/mce/markup-compatibility-2006-MINIMAL.xsd

    This XSD has 2 objectives:

        1. round tripping @mc:Ignorable

			<w:document
			            xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
			            xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
			            mc:Ignorable="w14 w15 wp14">

        2. enabling AlternateContent to be manipulated in certain elements
           (in the unusual case where the content model is xsd:any, it doesn't have to be explicitly added)

		See further ECMA-376, 4th Edition, Office Open XML File Formats
		Part 3 : Markup Compatibility and Extensibility
   -->

  <!--  Objective 1 -->
  <xsd:attribute name="Ignorable" type="xsd:string" />

  <!--  Objective 2 -->
	<xsd:attribute name="MustUnderstand" type="xsd:string"  />
	<xsd:attribute name="ProcessContent" type="xsd:string"  />

<!-- An AlternateContent element shall contain one or more Choice child elements, optionally followed by a
Fallback child element. If present, there shall be only one Fallback element, and it shall follow all Choice
elements. -->
	<xsd:element name="AlternateContent">
		<xsd:complexType>
			<xsd:sequence>
				<xsd:element name="Choice" minOccurs="0" maxOccurs="unbounded">
					<xsd:complexType>
						<xsd:sequence>
							<xsd:any minOccurs="0" maxOccurs="unbounded"
								processContents="strict">
							</xsd:any>
						</xsd:sequence>
						<xsd:attribute name="Requires" type="xsd:string" use="required" />
						<xsd:attribute ref="mc:Ignorable" use="optional" />
						<xsd:attribute ref="mc:MustUnderstand" use="optional" />
						<xsd:attribute ref="mc:ProcessContent" use="optional" />
					</xsd:complexType>
				</xsd:element>
				<xsd:element name="Fallback" minOccurs="0" maxOccurs="1">
					<xsd:complexType>
						<xsd:sequence>
							<xsd:any minOccurs="0" maxOccurs="unbounded"
								processContents="strict">
							</xsd:any>
						</xsd:sequence>
						<xsd:attribute ref="mc:Ignorable" use="optional" />
						<xsd:attribute ref="mc:MustUnderstand" use="optional" />
						<xsd:attribute ref="mc:ProcessContent" use="optional" />
					</xsd:complexType>
				</xsd:element>
			</xsd:sequence>
			<!-- AlternateContent elements might include the attributes Ignorable,
				MustUnderstand and ProcessContent described in this Part of ECMA-376. These
				attributes’ qualified names shall be prefixed when associated with an AlternateContent
				element. -->
			<xsd:attribute ref="mc:Ignorable" use="optional" />
			<xsd:attribute ref="mc:MustUnderstand" use="optional" />
			<xsd:attribute ref="mc:ProcessContent" use="optional" />
		</xsd:complexType>
	</xsd:element>
</xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd
================================================
 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns="http://schemas.microsoft.com/office/word/2010/wordml" targetNamespace="http://schemas.microsoft.com/office/word/2010/wordml">
   <!-- <xsd:import id="rel" namespace="http://schemas.openxmlformats.org/officeDocument/2006/relationships" schemaLocation="orel.xsd"/> -->
   <xsd:import id="w" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
   <!-- <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main" schemaLocation="oartbasetypes.xsd"/>
   <xsd:import namespace="http://schemas.openxmlformats.org/drawingml/2006/main" schemaLocation="oartsplineproperties.xsd"/> -->
   <xsd:complexType name="CT_LongHexNumber">
     <xsd:attribute name="val" type="w:ST_LongHexNumber" use="required"/>
   </xsd:complexType>
   <xsd:simpleType name="ST_OnOff">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="true"/>
       <xsd:enumeration value="false"/>
       <xsd:enumeration value="0"/>
       <xsd:enumeration value="1"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_OnOff">
     <xsd:attribute name="val" type="ST_OnOff"/>
   </xsd:complexType>
   <xsd:element name="docId" type="CT_LongHexNumber"/>
   <xsd:element name="conflictMode" type="CT_OnOff"/>
   <xsd:attributeGroup name="AG_Parids">
     <xsd:attribute name="paraId" type="w:ST_LongHexNumber"/>
     <xsd:attribute name="textId" type="w:ST_LongHexNumber"/>
   </xsd:attributeGroup>
   <xsd:attribute name="anchorId" type="w:ST_LongHexNumber"/>
   <xsd:attribute name="noSpellErr" type="ST_OnOff"/>
   <xsd:element name="customXmlConflictInsRangeStart" type="w:CT_TrackChange"/>
   <xsd:element name="customXmlConflictInsRangeEnd" type="w:CT_Markup"/>
   <xsd:element name="customXmlConflictDelRangeStart" type="w:CT_TrackChange"/>
   <xsd:element name="customXmlConflictDelRangeEnd" type="w:CT_Markup"/>
   <xsd:group name="EG_RunLevelConflicts">
     <xsd:sequence>
       <xsd:element name="conflictIns" type="w:CT_RunTrackChange" minOccurs="0"/>
       <xsd:element name="conflictDel" type="w:CT_RunTrackChange" minOccurs="0"/>
     </xsd:sequence>
   </xsd:group>
   <xsd:group name="EG_Conflicts">
     <xsd:choice>
       <xsd:element name="conflictIns" type="w:CT_TrackChange" minOccurs="0"/>
       <xsd:element name="conflictDel" type="w:CT_TrackChange" minOccurs="0"/>
     </xsd:choice>
   </xsd:group>
   <xsd:complexType name="CT_Percentage">
     <xsd:attribute name="val" type="a:ST_Percentage" use="required"/>
   </xsd:complexType>
   <xsd:complexType name="CT_PositiveFixedPercentage">
     <xsd:attribute name="val" type="a:ST_PositiveFixedPercentage" use="required"/>
   </xsd:complexType>
   <xsd:complexType name="CT_PositivePercentage">
     <xsd:attribute name="val" type="a:ST_PositivePercentage" use="required"/>
   </xsd:complexType>
   <xsd:simpleType name="ST_SchemeColorVal">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="bg1"/>
       <xsd:enumeration value="tx1"/>
       <xsd:enumeration value="bg2"/>
       <xsd:enumeration value="tx2"/>
       <xsd:enumeration value="accent1"/>
       <xsd:enumeration value="accent2"/>
       <xsd:enumeration value="accent3"/>
       <xsd:enumeration value="accent4"/>
       <xsd:enumeration value="accent5"/>
       <xsd:enumeration value="accent6"/>
       <xsd:enumeration value="hlink"/>
       <xsd:enumeration value="folHlink"/>
       <xsd:enumeration value="dk1"/>
       <xsd:enumeration value="lt1"/>
       <xsd:enumeration value="dk2"/>
       <xsd:enumeration value="lt2"/>
       <xsd:enumeration value="phClr"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:simpleType name="ST_RectAlignment">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="none"/>
       <xsd:enumeration value="tl"/>
       <xsd:enumeration value="t"/>
       <xsd:enumeration value="tr"/>
       <xsd:enumeration value="l"/>
       <xsd:enumeration value="ctr"/>
       <xsd:enumeration value="r"/>
       <xsd:enumeration value="bl"/>
       <xsd:enumeration value="b"/>
       <xsd:enumeration value="br"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:simpleType name="ST_PathShadeType">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="shape"/>
       <xsd:enumeration value="circle"/>
       <xsd:enumeration value="rect"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:simpleType name="ST_LineCap">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="rnd"/>
       <xsd:enumeration value="sq"/>
       <xsd:enumeration value="flat"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:simpleType name="ST_PresetLineDashVal">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="solid"/>
       <xsd:enumeration value="dot"/>
       <xsd:enumeration value="sysDot"/>
       <xsd:enumeration value="dash"/>
       <xsd:enumeration value="sysDash"/>
       <xsd:enumeration value="lgDash"/>
       <xsd:enumeration value="dashDot"/>
       <xsd:enumeration value="sysDashDot"/>
       <xsd:enumeration value="lgDashDot"/>
       <xsd:enumeration value="lgDashDotDot"/>
       <xsd:enumeration value="sysDashDotDot"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:simpleType name="ST_PenAlignment">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="ctr"/>
       <xsd:enumeration value="in"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:simpleType name="ST_CompoundLine">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="sng"/>
       <xsd:enumeration value="dbl"/>
       <xsd:enumeration value="thickThin"/>
       <xsd:enumeration value="thinThick"/>
       <xsd:enumeration value="tri"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_RelativeRect">
     <xsd:attribute name="l" use="optional" type="a:ST_Percentage"/>
     <xsd:attribute name="t" use="optional" type="a:ST_Percentage"/>
     <xsd:attribute name="r" use="optional" type="a:ST_Percentage"/>
     <xsd:attribute name="b" use="optional" type="a:ST_Percentage"/>
   </xsd:complexType>
   <xsd:group name="EG_ColorTransform">
     <xsd:choice>
       <xsd:element name="tint" type="CT_PositiveFixedPercentage"/>
       <xsd:element name="shade" type="CT_PositiveFixedPercentage"/>
       <xsd:element name="alpha" type="CT_PositiveFixedPercentage"/>
       <xsd:element name="hueMod" type="CT_PositivePercentage"/>
       <xsd:element name="sat" type="CT_Percentage"/>
       <xsd:element name="satOff" type="CT_Percentage"/>
       <xsd:element name="satMod" type="CT_Percentage"/>
       <xsd:element name="lum" type="CT_Percentage"/>
       <xsd:element name="lumOff" type="CT_Percentage"/>
       <xsd:element name="lumMod" type="CT_Percentage"/>
     </xsd:choice>
   </xsd:group>
   <xsd:complexType name="CT_SRgbColor">
     <xsd:sequence>
       <xsd:group ref="EG_ColorTransform" minOccurs="0" maxOccurs="unbounded"/>
     </xsd:sequence>
     <xsd:attribute name="val" type="s:ST_HexColorRGB" use="required"/>
   </xsd:complexType>
   <xsd:complexType name="CT_SchemeColor">
     <xsd:sequence>
       <xsd:group ref="EG_ColorTransform" minOccurs="0" maxOccurs="unbounded"/>
     </xsd:sequence>
     <xsd:attribute name="val" type="ST_SchemeColorVal" use="required"/>
   </xsd:complexType>
   <xsd:group name="EG_ColorChoice">
     <xsd:choice>
       <xsd:element name="srgbClr" type="CT_SRgbColor"/>
       <xsd:element name="schemeClr" type="CT_SchemeColor"/>
     </xsd:choice>
   </xsd:group>
   <xsd:complexType name="CT_Color">
     <xsd:sequence>
       <xsd:group ref="EG_ColorChoice"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:complexType name="CT_GradientStop">
     <xsd:sequence>
       <xsd:group ref="EG_ColorChoice"/>
     </xsd:sequence>
     <xsd:attribute name="pos" type="a:ST_PositiveFixedPercentage" use="required"/>
   </xsd:complexType>
   <xsd:complexType name="CT_GradientStopList">
     <xsd:sequence>
       <xsd:element name="gs" type="CT_GradientStop" minOccurs="2" maxOccurs="10"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:complexType name="CT_LinearShadeProperties">
     <xsd:attribute name="ang" type="a:ST_PositiveFixedAngle" use="optional"/>
     <xsd:attribute name="scaled" type="ST_OnOff" use="optional"/>
   </xsd:complexType>
   <xsd:complexType name="CT_PathShadeProperties">
     <xsd:sequence>
       <xsd:element name="fillToRect" type="CT_RelativeRect" minOccurs="0"/>
     </xsd:sequence>
     <xsd:attribute name="path" type="ST_PathShadeType" use="optional"/>
   </xsd:complexType>
   <xsd:group name="EG_ShadeProperties">
     <xsd:choice>
       <xsd:element name="lin" type="CT_LinearShadeProperties"/>
       <xsd:element name="path" type="CT_PathShadeProperties"/>
     </xsd:choice>
   </xsd:group>
   <xsd:complexType name="CT_SolidColorFillProperties">
     <xsd:sequence>
       <xsd:group ref="EG_ColorChoice" minOccurs="0"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:complexType name="CT_GradientFillProperties">
     <xsd:sequence>
       <xsd:element name="gsLst" type="CT_GradientStopList" minOccurs="0"/>
       <xsd:group ref="EG_ShadeProperties" minOccurs="0"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:group name="EG_FillProperties">
     <xsd:choice>
       <xsd:element name="noFill" type="w:CT_Empty"/>
       <xsd:element name="solidFill" type="CT_SolidColorFillProperties"/>
       <xsd:element name="gradFill" type="CT_GradientFillProperties"/>
     </xsd:choice>
   </xsd:group>
   <xsd:complexType name="CT_PresetLineDashProperties">
     <xsd:attribute name="val" type="ST_PresetLineDashVal" use="optional"/>
   </xsd:complexType>
   <xsd:group name="EG_LineDashProperties">
     <xsd:choice>
       <xsd:element name="prstDash" type="CT_PresetLineDashProperties"/>
     </xsd:choice>
   </xsd:group>
   <xsd:complexType name="CT_LineJoinMiterProperties">
     <xsd:attribute name="lim" type="a:ST_PositivePercentage" use="optional"/>
   </xsd:complexType>
   <xsd:group name="EG_LineJoinProperties">
     <xsd:choice>
       <xsd:element name="round" type="w:CT_Empty"/>
       <xsd:element name="bevel" type="w:CT_Empty"/>
       <xsd:element name="miter" type="CT_LineJoinMiterProperties"/>
     </xsd:choice>
   </xsd:group>
   <xsd:simpleType name="ST_PresetCameraType">
     <xsd:restriction base="xsd:token">
       <xsd:enumeration value="legacyObliqueTopLeft"/>
       <xsd:enumeration value="legacyObliqueTop"/>
       <xsd:enumeration value="legacyObliqueTopRight"/>
       <xsd:enumeration value="legacyObliqueLeft"/>
       <xsd:enumeration value="legacyObliqueFront"/>
       <xsd:enumeration value="legacyObliqueRight"/>
       <xsd:enumeration value="legacyObliqueBottomLeft"/>
       <xsd:enumeration value="legacyObliqueBottom"/>
       <xsd:enumeration value="legacyObliqueBottomRight"/>
       <xsd:enumeration value="legacyPerspectiveTopLeft"/>
       <xsd:enumeration value="legacyPerspectiveTop"/>
       <xsd:enumeration value="legacyPerspectiveTopRight"/>
       <xsd:enumeration value="legacyPerspectiveLeft"/>
       <xsd:enumeration value="legacyPerspectiveFront"/>
       <xsd:enumeration value="legacyPerspectiveRight"/>
       <xsd:enumeration value="legacyPerspectiveBottomLeft"/>
       <xsd:enumeration value="legacyPerspectiveBottom"/>
       <xsd:enumeration value="legacyPerspectiveBottomRight"/>
       <xsd:enumeration value="orthographicFront"/>
       <xsd:enumeration value="isometricTopUp"/>
       <xsd:enumeration value="isometricTopDown"/>
       <xsd:enumeration value="isometricBottomUp"/>
       <xsd:enumeration value="isometricBottomDown"/>
       <xsd:enumeration value="isometricLeftUp"/>
       <xsd:enumeration value="isometricLeftDown"/>
       <xsd:enumeration value="isometricRightUp"/>
       <xsd:enumeration value="isometricRightDown"/>
       <xsd:enumeration value="isometricOffAxis1Left"/>
       <xsd:enumeration value="isometricOffAxis1Right"/>
       <xsd:enumeration value="isometricOffAxis1Top"/>
       <xsd:enumeration value="isometricOffAxis2Left"/>
       <xsd:enumeration value="isometricOffAxis2Right"/>
       <xsd:enumeration value="isometricOffAxis2Top"/>
       <xsd:enumeration value="isometricOffAxis3Left"/>
       <xsd:enumeration value="isometricOffAxis3Right"/>
       <xsd:enumeration value="isometricOffAxis3Bottom"/>
       <xsd:enumeration value="isometricOffAxis4Left"/>
       <xsd:enumeration value="isometricOffAxis4Right"/>
       <xsd:enumeration value="isometricOffAxis4Bottom"/>
       <xsd:enumeration value="obliqueTopLeft"/>
       <xsd:enumeration value="obliqueTop"/>
       <xsd:enumeration value="obliqueTopRight"/>
       <xsd:enumeration value="obliqueLeft"/>
       <xsd:enumeration value="obliqueRight"/>
       <xsd:enumeration value="obliqueBottomLeft"/>
       <xsd:enumeration value="obliqueBottom"/>
       <xsd:enumeration value="obliqueBottomRight"/>
       <xsd:enumeration value="perspectiveFront"/>
       <xsd:enumeration value="perspectiveLeft"/>
       <xsd:enumeration value="perspectiveRight"/>
       <xsd:enumeration value="perspectiveAbove"/>
       <xsd:enumeration value="perspectiveBelow"/>
       <xsd:enumeration value="perspectiveAboveLeftFacing"/>
       <xsd:enumeration value="perspectiveAboveRightFacing"/>
       <xsd:enumeration value="perspectiveContrastingLeftFacing"/>
       <xsd:enumeration value="perspectiveContrastingRightFacing"/>
       <xsd:enumeration value="perspectiveHeroicLeftFacing"/>
       <xsd:enumeration value="perspectiveHeroicRightFacing"/>
       <xsd:enumeration value="perspectiveHeroicExtremeLeftFacing"/>
       <xsd:enumeration value="perspectiveHeroicExtremeRightFacing"/>
       <xsd:enumeration value="perspectiveRelaxed"/>
       <xsd:enumeration value="perspectiveRelaxedModerately"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_Camera">
     <xsd:attribute name="prst" use="required" type="ST_PresetCameraType"/>
   </xsd:complexType>
   <xsd:complexType name="CT_SphereCoords">
     <xsd:attribute name="lat" type="a:ST_PositiveFixedAngle" use="required"/>
     <xsd:attribute name="lon" type="a:ST_PositiveFixedAngle" use="required"/>
     <xsd:attribute name="rev" type="a:ST_PositiveFixedAngle" use="required"/>
   </xsd:complexType>
   <xsd:simpleType name="ST_LightRigType">
     <xsd:restriction base="xsd:token">
       <xsd:enumeration value="legacyFlat1"/>
       <xsd:enumeration value="legacyFlat2"/>
       <xsd:enumeration value="legacyFlat3"/>
       <xsd:enumeration value="legacyFlat4"/>
       <xsd:enumeration value="legacyNormal1"/>
       <xsd:enumeration value="legacyNormal2"/>
       <xsd:enumeration value="legacyNormal3"/>
       <xsd:enumeration value="legacyNormal4"/>
       <xsd:enumeration value="legacyHarsh1"/>
       <xsd:enumeration value="legacyHarsh2"/>
       <xsd:enumeration value="legacyHarsh3"/>
       <xsd:enumeration value="legacyHarsh4"/>
       <xsd:enumeration value="threePt"/>
       <xsd:enumeration value="balanced"/>
       <xsd:enumeration value="soft"/>
       <xsd:enumeration value="harsh"/>
       <xsd:enumeration value="flood"/>
       <xsd:enumeration value="contrasting"/>
       <xsd:enumeration value="morning"/>
       <xsd:enumeration value="sunrise"/>
       <xsd:enumeration value="sunset"/>
       <xsd:enumeration value="chilly"/>
       <xsd:enumeration value="freezing"/>
       <xsd:enumeration value="flat"/>
       <xsd:enumeration value="twoPt"/>
       <xsd:enumeration value="glow"/>
       <xsd:enumeration value="brightRoom"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:simpleType name="ST_LightRigDirection">
     <xsd:restriction base="xsd:token">
       <xsd:enumeration value="tl"/>
       <xsd:enumeration value="t"/>
       <xsd:enumeration value="tr"/>
       <xsd:enumeration value="l"/>
       <xsd:enumeration value="r"/>
       <xsd:enumeration value="bl"/>
       <xsd:enumeration value="b"/>
       <xsd:enumeration value="br"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_LightRig">
     <xsd:sequence>
       <xsd:element name="rot" type="CT_SphereCoords" minOccurs="0"/>
     </xsd:sequence>
     <xsd:attribute name="rig" type="ST_LightRigType" use="required"/>
     <xsd:attribute name="dir" type="ST_LightRigDirection" use="required"/>
   </xsd:complexType>
   <xsd:simpleType name="ST_BevelPresetType">
     <xsd:restriction base="xsd:token">
       <xsd:enumeration value="relaxedInset"/>
       <xsd:enumeration value="circle"/>
       <xsd:enumeration value="slope"/>
       <xsd:enumeration value="cross"/>
       <xsd:enumeration value="angle"/>
       <xsd:enumeration value="softRound"/>
       <xsd:enumeration value="convex"/>
       <xsd:enumeration value="coolSlant"/>
       <xsd:enumeration value="divot"/>
       <xsd:enumeration value="riblet"/>
       <xsd:enumeration value="hardEdge"/>
       <xsd:enumeration value="artDeco"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_Bevel">
     <xsd:attribute name="w" type="a:ST_PositiveCoordinate" use="optional"/>
     <xsd:attribute name="h" type="a:ST_PositiveCoordinate" use="optional"/>
     <xsd:attribute name="prst" type="ST_BevelPresetType" use="optional"/>
   </xsd:complexType>
   <xsd:simpleType name="ST_PresetMaterialType">
     <xsd:restriction base="xsd:token">
       <xsd:enumeration value="legacyMatte"/>
       <xsd:enumeration value="legacyPlastic"/>
       <xsd:enumeration value="legacyMetal"/>
       <xsd:enumeration value="legacyWireframe"/>
       <xsd:enumeration value="matte"/>
       <xsd:enumeration value="plastic"/>
       <xsd:enumeration value="metal"/>
       <xsd:enumeration value="warmMatte"/>
       <xsd:enumeration value="translucentPowder"/>
       <xsd:enumeration value="powder"/>
       <xsd:enumeration value="dkEdge"/>
       <xsd:enumeration value="softEdge"/>
       <xsd:enumeration value="clear"/>
       <xsd:enumeration value="flat"/>
       <xsd:enumeration value="softmetal"/>
       <xsd:enumeration value="none"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_Glow">
     <xsd:sequence>
       <xsd:group ref="EG_ColorChoice"/>
     </xsd:sequence>
     <xsd:attribute name="rad" use="optional" type="a:ST_PositiveCoordinate"/>
   </xsd:complexType>
   <xsd:complexType name="CT_Shadow">
     <xsd:sequence>
       <xsd:group ref="EG_ColorChoice"/>
     </xsd:sequence>
     <xsd:attribute name="blurRad" use="optional" type="a:ST_PositiveCoordinate"/>
     <xsd:attribute name="dist" use="optional" type="a:ST_PositiveCoordinate"/>
     <xsd:attribute name="dir" use="optional" type="a:ST_PositiveFixedAngle"/>
     <xsd:attribute name="sx" use="optional" type="a:ST_Percentage"/>
     <xsd:attribute name="sy" use="optional" type="a:ST_Percentage"/>
     <xsd:attribute name="kx" use="optional" type="a:ST_FixedAngle"/>
     <xsd:attribute name="ky" use="optional" type="a:ST_FixedAngle"/>
     <xsd:attribute name="algn" use="optional" type="ST_RectAlignment"/>
   </xsd:complexType>
   <xsd:complexType name="CT_Reflection">
     <xsd:attribute name="blurRad" use="optional" type="a:ST_PositiveCoordinate"/>
     <xsd:attribute name="stA" use="optional" type="a:ST_PositiveFixedPercentage"/>
     <xsd:attribute name="stPos" use="optional" type="a:ST_PositiveFixedPercentage"/>
     <xsd:attribute name="endA" use="optional" type="a:ST_PositiveFixedPercentage"/>
     <xsd:attribute name="endPos" use="optional" type="a:ST_PositiveFixedPercentage"/>
     <xsd:attribute name="dist" use="optional" type="a:ST_PositiveCoordinate"/>
     <xsd:attribute name="dir" use="optional" type="a:ST_PositiveFixedAngle"/>
     <xsd:attribute name="fadeDir" use="optional" type="a:ST_PositiveFixedAngle"/>
     <xsd:attribute name="sx" use="optional" type="a:ST_Percentage"/>
     <xsd:attribute name="sy" use="optional" type="a:ST_Percentage"/>
     <xsd:attribute name="kx" use="optional" type="a:ST_FixedAngle"/>
     <xsd:attribute name="ky" use="optional" type="a:ST_FixedAngle"/>
     <xsd:attribute name="algn" use="optional" type="ST_RectAlignment"/>
   </xsd:complexType>
   <xsd:complexType name="CT_FillTextEffect">
     <xsd:sequence>
       <xsd:group ref="EG_FillProperties" minOccurs="0"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:complexType name="CT_TextOutlineEffect">
     <xsd:sequence>
       <xsd:group ref="EG_FillProperties" minOccurs="0"/>
       <xsd:group ref="EG_LineDashProperties" minOccurs="0"/>
       <xsd:group ref="EG_LineJoinProperties" minOccurs="0"/>
     </xsd:sequence>
     <xsd:attribute name="w" use="optional" type="a:ST_LineWidth"/>
     <xsd:attribute name="cap" use="optional" type="ST_LineCap"/>
     <xsd:attribute name="cmpd" use="optional" type="ST_CompoundLine"/>
     <xsd:attribute name="algn" use="optional" type="ST_PenAlignment"/>
   </xsd:complexType>
   <xsd:complexType name="CT_Scene3D">
     <xsd:sequence>
       <xsd:element name="camera" type="CT_Camera"/>
       <xsd:element name="lightRig" type="CT_LightRig"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:complexType name="CT_Props3D">
     <xsd:sequence>
       <xsd:element name="bevelT" type="CT_Bevel" minOccurs="0"/>
       <xsd:element name="bevelB" type="CT_Bevel" minOccurs="0"/>
       <xsd:element name="extrusionClr" type="CT_Color" minOccurs="0"/>
       <xsd:element name="contourClr" type="CT_Color" minOccurs="0"/>
     </xsd:sequence>
     <xsd:attribute name="extrusionH" type="a:ST_PositiveCoordinate" use="optional"/>
     <xsd:attribute name="contourW" type="a:ST_PositiveCoordinate" use="optional"/>
     <xsd:attribute name="prstMaterial" type="ST_PresetMaterialType" use="optional"/>
   </xsd:complexType>
   <xsd:group name="EG_RPrTextEffects">
     <xsd:sequence>
       <xsd:element name="glow" minOccurs="0" type="CT_Glow"/>
       <xsd:element name="shadow" minOccurs="0" type="CT_Shadow"/>
       <xsd:element name="reflection" minOccurs="0" type="CT_Reflection"/>
       <xsd:element name="textOutline" minOccurs="0" type="CT_TextOutlineEffect"/>
       <xsd:element name="textFill" minOccurs="0" type="CT_FillTextEffect"/>
       <xsd:element name="scene3d" minOccurs="0" type="CT_Scene3D"/>
       <xsd:element name="props3d" minOccurs="0" type="CT_Props3D"/>
     </xsd:sequence>
   </xsd:group>
   <xsd:simpleType name="ST_Ligatures">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="none"/>
       <xsd:enumeration value="standard"/>
       <xsd:enumeration value="contextual"/>
       <xsd:enumeration value="historical"/>
       <xsd:enumeration value="discretional"/>
       <xsd:enumeration value="standardContextual"/>
       <xsd:enumeration value="standardHistorical"/>
       <xsd:enumeration value="contextualHistorical"/>
       <xsd:enumeration value="standardDiscretional"/>
       <xsd:enumeration value="contextualDiscretional"/>
       <xsd:enumeration value="historicalDiscretional"/>
       <xsd:enumeration value="standardContextualHistorical"/>
       <xsd:enumeration value="standardContextualDiscretional"/>
       <xsd:enumeration value="standardHistoricalDiscretional"/>
       <xsd:enumeration value="contextualHistoricalDiscretional"/>
       <xsd:enumeration value="all"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_Ligatures">
     <xsd:attribute name="val" type="ST_Ligatures" use="required"/>
   </xsd:complexType>
   <xsd:simpleType name="ST_NumForm">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="default"/>
       <xsd:enumeration value="lining"/>
       <xsd:enumeration value="oldStyle"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_NumForm">
     <xsd:attribute name="val" type="ST_NumForm" use="required"/>
   </xsd:complexType>
   <xsd:simpleType name="ST_NumSpacing">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="default"/>
       <xsd:enumeration value="proportional"/>
       <xsd:enumeration value="tabular"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_NumSpacing">
     <xsd:attribute name="val" type="ST_NumSpacing" use="required"/>
   </xsd:complexType>
   <xsd:complexType name="CT_StyleSet">
     <xsd:attribute name="id" type="s:ST_UnsignedDecimalNumber" use="required"/>
     <xsd:attribute name="val" type="ST_OnOff" use="optional"/>
   </xsd:complexType>
   <xsd:complexType name="CT_StylisticSets">
     <xsd:sequence minOccurs="0">
       <xsd:element name="styleSet" minOccurs="0" maxOccurs="unbounded" type="CT_StyleSet"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:group name="EG_RPrOpenType">
     <xsd:sequence>
       <xsd:element name="ligatures" minOccurs="0" type="CT_Ligatures"/>
       <xsd:element name="numForm" minOccurs="0" type="CT_NumForm"/>
       <xsd:element name="numSpacing" minOccurs="0" type="CT_NumSpacing"/>
       <xsd:element name="stylisticSets" minOccurs="0" type="CT_StylisticSets"/>
       <xsd:element name="cntxtAlts" minOccurs="0" type="CT_OnOff"/>
     </xsd:sequence>
   </xsd:group>
   <xsd:element name="discardImageEditingData" type="CT_OnOff"/>
   <xsd:element name="defaultImageDpi" type="CT_DefaultImageDpi"/>
   <xsd:complexType name="CT_DefaultImageDpi">
     <xsd:attribute name="val" type="w:ST_DecimalNumber" use="required"/>
   </xsd:complexType>
   <xsd:element name="entityPicker" type="w:CT_Empty"/>
   <xsd:complexType name="CT_SdtCheckboxSymbol">
     <xsd:attribute name="font" type="s:ST_String"/>
     <xsd:attribute name="val" type="w:ST_ShortHexNumber"/>
   </xsd:complexType>
   <xsd:complexType name="CT_SdtCheckbox">
     <xsd:sequence>
       <xsd:element name="checked" type="CT_OnOff" minOccurs="0"/>
       <xsd:element name="checkedState" type="CT_SdtCheckboxSymbol" minOccurs="0"/>
       <xsd:element name="uncheckedState" type="CT_SdtCheckboxSymbol" minOccurs="0"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:element name="checkbox" type="CT_SdtCheckbox"/>
 </xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd
================================================
 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2012/wordml" targetNamespace="http://schemas.microsoft.com/office/word/2012/wordml">
   <xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
   <xsd:import namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" schemaLocation="../ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd"/>
   <xsd:element name="color" type="w12:CT_Color"/>
   <xsd:simpleType name="ST_SdtAppearance">
     <xsd:restriction base="xsd:string">
       <xsd:enumeration value="boundingBox"/>
       <xsd:enumeration value="tags"/>
       <xsd:enumeration value="hidden"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:element name="dataBinding" type="w12:CT_DataBinding"/>
   <xsd:complexType name="CT_SdtAppearance">
     <xsd:attribute name="val" type="ST_SdtAppearance"/>
   </xsd:complexType>
   <xsd:element name="appearance" type="CT_SdtAppearance"/>
   <xsd:complexType name="CT_CommentsEx">
     <xsd:sequence>
       <xsd:element name="commentEx" type="CT_CommentEx" minOccurs="0" maxOccurs="unbounded"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:complexType name="CT_CommentEx">
     <xsd:attribute name="paraId" type="w12:ST_LongHexNumber" use="required"/>
     <xsd:attribute name="paraIdParent" type="w12:ST_LongHexNumber" use="optional"/>
     <xsd:attribute name="done" type="s:ST_OnOff" use="optional"/>
   </xsd:complexType>
   <xsd:element name="commentsEx" type="CT_CommentsEx"/>
   <xsd:complexType name="CT_People">
     <xsd:sequence>
       <xsd:element name="person" type="CT_Person" minOccurs="0" maxOccurs="unbounded"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:complexType name="CT_PresenceInfo">
     <xsd:attribute name="providerId" type="xsd:string" use="required"/>
     <xsd:attribute name="userId" type="xsd:string" use="required"/>
   </xsd:complexType>
   <xsd:complexType name="CT_Person">
     <xsd:sequence>
       <xsd:element name="presenceInfo" type="CT_PresenceInfo" minOccurs="0" maxOccurs="1"/>
     </xsd:sequence>
     <xsd:attribute name="author" type="s:ST_String" use="required"/>
   </xsd:complexType>
   <xsd:element name="people" type="CT_People"/>
   <xsd:complexType name="CT_SdtRepeatedSection">
     <xsd:sequence>
       <xsd:element name="sectionTitle" type="w12:CT_String" minOccurs="0"/>
       <xsd:element name="doNotAllowInsertDeleteSection" type="w12:CT_OnOff" minOccurs="0"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:simpleType name="ST_Guid">
     <xsd:restriction base="xsd:token">
       <xsd:pattern value="\{[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}\}"/>
     </xsd:restriction>
   </xsd:simpleType>
   <xsd:complexType name="CT_Guid">
     <xsd:attribute name="val" type="ST_Guid"/>
   </xsd:complexType>
   <xsd:element name="repeatingSection" type="CT_SdtRepeatedSection"/>
   <xsd:element name="repeatingSectionItem" type="w12:CT_Empty"/>
   <xsd:element name="chartTrackingRefBased" type="w12:CT_OnOff"/>
   <xsd:element name="collapsed" type="w12:CT_OnOff"/>
   <xsd:element name="docId" type="CT_Guid"/>
   <xsd:element name="footnoteColumns" type="w12:CT_DecimalNumber"/>
   <xsd:element name="webExtensionLinked" type="w12:CT_OnOff"/>
   <xsd:element name="webExtensionCreated" type="w12:CT_OnOff"/>
   <xsd:attribute name="restartNumberingAfterBreak" type="s:ST_OnOff"/>
 </xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd
================================================
 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2018/wordml" targetNamespace="http://schemas.microsoft.com/office/word/2018/wordml">
   <xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
   <xsd:complexType name="CT_Extension">
     <xsd:sequence>
       <xsd:any processContents="lax"/>
     </xsd:sequence>
     <xsd:attribute name="uri" type="xsd:token"/>
   </xsd:complexType>
   <xsd:complexType name="CT_ExtensionList">
     <xsd:sequence>
       <xsd:element name="ext" type="CT_Extension" minOccurs="0" maxOccurs="unbounded"/>
     </xsd:sequence>
   </xsd:complexType>
 </xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd
================================================
 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2018/wordml/cex" targetNamespace="http://schemas.microsoft.com/office/word/2018/wordml/cex">
   <xsd:import id="w16" namespace="http://schemas.microsoft.com/office/word/2018/wordml" schemaLocation="wml-2018.xsd"/>
   <xsd:import id="w" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
   <xsd:import id="s" namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" schemaLocation="../ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd"/>
   <xsd:complexType name="CT_CommentsExtensible">
     <xsd:sequence>
       <xsd:element name="commentExtensible" type="CT_CommentExtensible" minOccurs="0" maxOccurs="unbounded"/>
       <xsd:element name="extLst" type="w16:CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:complexType name="CT_CommentExtensible">
     <xsd:sequence>
       <xsd:element name="extLst" type="w16:CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
     </xsd:sequence>
     <xsd:attribute name="durableId" type="w:ST_LongHexNumber" use="required"/>
     <xsd:attribute name="dateUtc" type="w:ST_DateTime" use="optional"/>
     <xsd:attribute name="intelligentPlaceholder" type="s:ST_OnOff" use="optional"/>
   </xsd:complexType>
   <xsd:element name="commentsExtensible" type="CT_CommentsExtensible"/>
 </xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd
================================================
 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2016/wordml/cid" targetNamespace="http://schemas.microsoft.com/office/word/2016/wordml/cid">
   <xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
   <xsd:complexType name="CT_CommentsIds">
     <xsd:sequence>
       <xsd:element name="commentId" type="CT_CommentId" minOccurs="0" maxOccurs="unbounded"/>
     </xsd:sequence>
   </xsd:complexType>
   <xsd:complexType name="CT_CommentId">
     <xsd:attribute name="paraId" type="w12:ST_LongHexNumber" use="required"/>
     <xsd:attribute name="durableId" type="w12:ST_LongHexNumber" use="required"/>
   </xsd:complexType>
   <xsd:element name="commentsIds" type="CT_CommentsIds"/>
 </xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd
================================================
 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" targetNamespace="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash">
   <xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
   <xsd:attribute name="storeItemChecksum" type="w12:ST_String"/>
 </xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd
================================================
 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2015/wordml/symex" targetNamespace="http://schemas.microsoft.com/office/word/2015/wordml/symex">
   <xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
   <xsd:complexType name="CT_SymEx">
     <xsd:attribute name="font" type="w12:ST_String"/>
     <xsd:attribute name="char" type="w12:ST_LongHexNumber"/>
   </xsd:complexType>
   <xsd:element name="symEx" type="CT_SymEx"/>
 </xsd:schema>


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/soffice.py
================================================
"""
Helper for running LibreOffice (soffice) in environments where AF_UNIX
sockets may be blocked (e.g., sandboxed VMs).  Detects the restriction
at runtime and applies an LD_PRELOAD shim if needed.

Usage:
    from office.soffice import run_soffice, get_soffice_env

    # Option 1 – run soffice directly
    result = run_soffice(["--headless", "--convert-to", "pdf", "input.docx"])

    # Option 2 – get env dict for your own subprocess calls
    env = get_soffice_env()
    subprocess.run(["soffice", ...], env=env)
"""

import os
import socket
import subprocess
import tempfile
from pathlib import Path


def get_soffice_env() -> dict:
    env = os.environ.copy()
    env["SAL_USE_VCLPLUGIN"] = "svp"

    if _needs_shim():
        shim = _ensure_shim()
        env["LD_PRELOAD"] = str(shim)

    return env


def run_soffice(args: list[str], **kwargs) -> subprocess.CompletedProcess:
    env = get_soffice_env()
    return subprocess.run(["soffice"] + args, env=env, **kwargs)


_SHIM_SO = Path(tempfile.gettempdir()) / "lo_socket_shim.so"


def _needs_shim() -> bool:
    try:
        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        s.close()
        return False
    except OSError:
        return True


def _ensure_shim() -> Path:
    if _SHIM_SO.exists():
        return _SHIM_SO

    src = Path(tempfile.gettempdir()) / "lo_socket_shim.c"
    src.write_text(_SHIM_SOURCE)
    subprocess.run(
        ["gcc", "-shared", "-fPIC", "-o", str(_SHIM_SO), str(src), "-ldl"],
        check=True,
        capture_output=True,
    )
    src.unlink()
    return _SHIM_SO


_SHIM_SOURCE = r"""
#define _GNU_SOURCE
#include <dlfcn.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <unistd.h>

static int (*real_socket)(int, int, int);
static int (*real_socketpair)(int, int, int, int[2]);
static int (*real_listen)(int, int);
static int (*real_accept)(int, struct sockaddr *, socklen_t *);
static int (*real_close)(int);
static int (*real_read)(int, void *, size_t);

/* Per-FD bookkeeping (FDs >= 1024 are passed through unshimmed). */
static int is_shimmed[1024];
static int peer_of[1024];
static int wake_r[1024];            /* accept() blocks reading this */
static int wake_w[1024];            /* close()  writes to this      */
static int listener_fd = -1;        /* FD that received listen()    */

__attribute__((constructor))
static void init(void) {
    real_socket     = dlsym(RTLD_NEXT, "socket");
    real_socketpair = dlsym(RTLD_NEXT, "socketpair");
    real_listen     = dlsym(RTLD_NEXT, "listen");
    real_accept     = dlsym(RTLD_NEXT, "accept");
    real_close      = dlsym(RTLD_NEXT, "close");
    real_read       = dlsym(RTLD_NEXT, "read");
    for (int i = 0; i < 1024; i++) {
        peer_of[i] = -1;
        wake_r[i]  = -1;
        wake_w[i]  = -1;
    }
}

/* ---- socket ---------------------------------------------------------- */
int socket(int domain, int type, int protocol) {
    if (domain == AF_UNIX) {
        int fd = real_socket(domain, type, protocol);
        if (fd >= 0) return fd;
        /* socket(AF_UNIX) blocked – fall back to socketpair(). */
        int sv[2];
        if (real_socketpair(domain, type, protocol, sv) == 0) {
            if (sv[0] >= 0 && sv[0] < 1024) {
                is_shimmed[sv[0]] = 1;
                peer_of[sv[0]]    = sv[1];
                int wp[2];
                if (pipe(wp) == 0) {
                    wake_r[sv[0]] = wp[0];
                    wake_w[sv[0]] = wp[1];
                }
            }
            return sv[0];
        }
        errno = EPERM;
        return -1;
    }
    return real_socket(domain, type, protocol);
}

/* ---- listen ---------------------------------------------------------- */
int listen(int sockfd, int backlog) {
    if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {
        listener_fd = sockfd;
        return 0;
    }
    return real_listen(sockfd, backlog);
}

/* ---- accept ---------------------------------------------------------- */
int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) {
    if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {
        /* Block until close() writes to the wake pipe. */
        if (wake_r[sockfd] >= 0) {
            char buf;
            real_read(wake_r[sockfd], &buf, 1);
        }
        errno = ECONNABORTED;
        return -1;
    }
    return real_accept(sockfd, addr, addrlen);
}

/* ---- close ----------------------------------------------------------- */
int close(int fd) {
    if (fd >= 0 && fd < 1024 && is_shimmed[fd]) {
        int was_listener = (fd == listener_fd);
        is_shimmed[fd] = 0;

        if (wake_w[fd] >= 0) {              /* unblock accept() */
            char c = 0;
            write(wake_w[fd], &c, 1);
            real_close(wake_w[fd]);
            wake_w[fd] = -1;
        }
        if (wake_r[fd] >= 0) { real_close(wake_r[fd]); wake_r[fd]  = -1; }
        if (peer_of[fd] >= 0) { real_close(peer_of[fd]); peer_of[fd] = -1; }

        if (was_listener)
            _exit(0);                        /* conversion done – exit */
    }
    return real_close(fd);
}
"""


if __name__ == "__main__":
    import sys

    result = run_soffice(sys.argv[1:])
    sys.exit(result.returncode)


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/unpack.py
================================================
"""Unpack Office files (DOCX, PPTX, XLSX) for editing.

Extracts the ZIP archive, pretty-prints XML files, and optionally:
- Merges adjacent runs with identical formatting (DOCX only)
- Simplifies adjacent tracked changes from same author (DOCX only)

Usage:
    python unpack.py <office_file> <output_dir> [options]

Examples:
    python unpack.py document.docx unpacked/
    python unpack.py presentation.pptx unpacked/
    python unpack.py document.docx unpacked/ --merge-runs false
"""

import argparse
import sys
import zipfile
from pathlib import Path

import defusedxml.minidom
from helpers.merge_runs import merge_runs as do_merge_runs
from helpers.simplify_redlines import simplify_redlines as do_simplify_redlines

SMART_QUOTE_REPLACEMENTS = {
    "\u201c": "&#x201C;",
    "\u201d": "&#x201D;",
    "\u2018": "&#x2018;",
    "\u2019": "&#x2019;",
}


def unpack(
    input_file: str,
    output_directory: str,
    merge_runs: bool = True,
    simplify_redlines: bool = True,
) -> tuple[None, str]:
    input_path = Path(input_file)
    output_path = Path(output_directory)
    suffix = input_path.suffix.lower()

    if not input_path.exists():
        return None, f"Error: {input_file} does not exist"

    if suffix not in {".docx", ".pptx", ".xlsx"}:
        return None, f"Error: {input_file} must be a .docx, .pptx, or .xlsx file"

    try:
        output_path.mkdir(parents=True, exist_ok=True)

        with zipfile.ZipFile(input_path, "r") as zf:
            zf.extractall(output_path)

        xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
        for xml_file in xml_files:
            _pretty_print_xml(xml_file)

        message = f"Unpacked {input_file} ({len(xml_files)} XML files)"

        if suffix == ".docx":
            if simplify_redlines:
                simplify_count, _ = do_simplify_redlines(str(output_path))
                message += f", simplified {simplify_count} tracked changes"

            if merge_runs:
                merge_count, _ = do_merge_runs(str(output_path))
                message += f", merged {merge_count} runs"

        for xml_file in xml_files:
            _escape_smart_quotes(xml_file)

        return None, message

    except zipfile.BadZipFile:
        return None, f"Error: {input_file} is not a valid Office file"
    except Exception as e:
        return None, f"Error unpacking: {e}"


def _pretty_print_xml(xml_file: Path) -> None:
    try:
        content = xml_file.read_text(encoding="utf-8")
        dom = defusedxml.minidom.parseString(content)
        xml_file.write_bytes(dom.toprettyxml(indent="  ", encoding="utf-8"))
    except Exception:
        pass


def _escape_smart_quotes(xml_file: Path) -> None:
    try:
        content = xml_file.read_text(encoding="utf-8")
        for char, entity in SMART_QUOTE_REPLACEMENTS.items():
            content = content.replace(char, entity)
        xml_file.write_text(content, encoding="utf-8")
    except Exception:
        pass


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Unpack an Office file (DOCX, PPTX, XLSX) for editing"
    )
    parser.add_argument("input_file", help="Office file to unpack")
    parser.add_argument("output_directory", help="Output directory")
    parser.add_argument(
        "--merge-runs",
        type=lambda x: x.lower() == "true",
        default=True,
        metavar="true|false",
        help="Merge adjacent runs with identical formatting (DOCX only, default: true)",
    )
    parser.add_argument(
        "--simplify-redlines",
        type=lambda x: x.lower() == "true",
        default=True,
        metavar="true|false",
        help="Merge adjacent tracked changes from same author (DOCX only, default: true)",
    )
    args = parser.parse_args()

    _, message = unpack(
        args.input_file,
        args.output_directory,
        merge_runs=args.merge_runs,
        simplify_redlines=args.simplify_redlines,
    )
    print(message)

    if "Error" in message:
        sys.exit(1)


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validate.py
================================================
"""
Command line tool to validate Office document XML files against XSD schemas and tracked changes.

Usage:
    python validate.py <path> [--original <original_file>] [--auto-repair] [--author NAME]

The first argument can be either:
- An unpacked directory containing the Office document XML files
- A packed Office file (.docx/.pptx/.xlsx) which will be unpacked to a temp directory

Auto-repair fixes:
- paraId/durableId values that exceed OOXML limits
- Missing xml:space="preserve" on w:t elements with whitespace
"""

import argparse
import sys
import tempfile
import zipfile
from pathlib import Path

from validators import DOCXSchemaValidator
from validators import PPTXSchemaValidator
from validators import RedliningValidator


def main():
    parser = argparse.ArgumentParser(description="Validate Office document XML files")
    parser.add_argument(
        "path",
        help="Path to unpacked directory or packed Office file (.docx/.pptx/.xlsx)",
    )
    parser.add_argument(
        "--original",
        required=False,
        default=None,
        help=(
            "Path to original file (.docx/.pptx/.xlsx). If omitted, all XSD errors "
            "are reported and redlining validation is skipped."
        ),
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="Enable verbose output",
    )
    parser.add_argument(
        "--auto-repair",
        action="store_true",
        help="Automatically repair common issues (hex IDs, whitespace preservation)",
    )
    parser.add_argument(
        "--author",
        default="Claude",
        help="Author name for redlining validation (default: Claude)",
    )
    args = parser.parse_args()

    path = Path(args.path)
    assert path.exists(), f"Error: {path} does not exist"

    original_file = None
    if args.original:
        original_file = Path(args.original)
        assert original_file.is_file(), f"Error: {original_file} is not a file"
        assert original_file.suffix.lower() in [
            ".docx",
            ".pptx",
            ".xlsx",
        ], f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"

    file_extension = (original_file or path).suffix.lower()
    assert file_extension in [
        ".docx",
        ".pptx",
        ".xlsx",
    ], f"Error: Cannot determine file type from {path}. Use --original or provide a .docx/.pptx/.xlsx file."

    if path.is_file() and path.suffix.lower() in [".docx", ".pptx", ".xlsx"]:
        temp_dir = tempfile.mkdtemp()
        with zipfile.ZipFile(path, "r") as zf:
            zf.extractall(temp_dir)
        unpacked_dir = Path(temp_dir)
    else:
        assert path.is_dir(), f"Error: {path} is not a directory or Office file"
        unpacked_dir = path

    match file_extension:
        case ".docx":
            validators = [
                DOCXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),
            ]
            if original_file:
                validators.append(
                    RedliningValidator(
                        unpacked_dir,
                        original_file,
                        verbose=args.verbose,
                        author=args.author,
                    )
                )
        case ".pptx":
            validators = [
                PPTXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),
            ]
        case _:
            print(f"Error: Validation not supported for file type {file_extension}")
            sys.exit(1)

    if args.auto_repair:
        total_repairs = sum(v.repair() for v in validators)
        if total_repairs:
            print(f"Auto-repaired {total_repairs} issue(s)")

    success = all(v.validate() for v in validators)

    if success:
        print("All validations PASSED!")

    sys.exit(0 if success else 1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/__init__.py
================================================
"""
Validation modules for Word document processing.
"""

from .base import BaseSchemaValidator
from .docx import DOCXSchemaValidator
from .pptx import PPTXSchemaValidator
from .redlining import RedliningValidator

__all__ = [
    "BaseSchemaValidator",
    "DOCXSchemaValidator",
    "PPTXSchemaValidator",
    "RedliningValidator",
]


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/base.py
================================================
"""
Base validator with common validation logic for document files.
"""

import re
from pathlib import Path

import defusedxml.minidom
import lxml.etree


class BaseSchemaValidator:
    IGNORED_VALIDATION_ERRORS = [
        "hyphenationZone",
        "purl.org/dc/terms",
    ]

    UNIQUE_ID_REQUIREMENTS = {
        "comment": ("id", "file"),
        "commentrangestart": ("id", "file"),
        "commentrangeend": ("id", "file"),
        "bookmarkstart": ("id", "file"),
        "bookmarkend": ("id", "file"),
        "sldid": ("id", "file"),
        "sldmasterid": ("id", "global"),
        "sldlayoutid": ("id", "global"),
        "cm": ("authorid", "file"),
        "sheet": ("sheetid", "file"),
        "definedname": ("id", "file"),
        "cxnsp": ("id", "file"),
        "sp": ("id", "file"),
        "pic": ("id", "file"),
        "grpsp": ("id", "file"),
    }

    EXCLUDED_ID_CONTAINERS = {
        "sectionlst",
    }

    ELEMENT_RELATIONSHIP_TYPES = {}

    SCHEMA_MAPPINGS = {
        "word": "ISO-IEC29500-4_2016/wml.xsd",
        "ppt": "ISO-IEC29500-4_2016/pml.xsd",
        "xl": "ISO-IEC29500-4_2016/sml.xsd",
        "[Content_Types].xml": "ecma/fouth-edition/opc-contentTypes.xsd",
        "app.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd",
        "core.xml": "ecma/fouth-edition/opc-coreProperties.xsd",
        "custom.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd",
        ".rels": "ecma/fouth-edition/opc-relationships.xsd",
        "people.xml": "microsoft/wml-2012.xsd",
        "commentsIds.xml": "microsoft/wml-cid-2016.xsd",
        "commentsExtensible.xml": "microsoft/wml-cex-2018.xsd",
        "commentsExtended.xml": "microsoft/wml-2012.xsd",
        "chart": "ISO-IEC29500-4_2016/dml-chart.xsd",
        "theme": "ISO-IEC29500-4_2016/dml-main.xsd",
        "drawing": "ISO-IEC29500-4_2016/dml-main.xsd",
    }

    MC_NAMESPACE = "http://schemas.openxmlformats.org/markup-compatibility/2006"
    XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"

    PACKAGE_RELATIONSHIPS_NAMESPACE = (
        "http://schemas.openxmlformats.org/package/2006/relationships"
    )
    OFFICE_RELATIONSHIPS_NAMESPACE = (
        "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
    )
    CONTENT_TYPES_NAMESPACE = (
        "http://schemas.openxmlformats.org/package/2006/content-types"
    )

    MAIN_CONTENT_FOLDERS = {"word", "ppt", "xl"}

    OOXML_NAMESPACES = {
        "http://schemas.openxmlformats.org/officeDocument/2006/math",
        "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
        "http://schemas.openxmlformats.org/schemaLibrary/2006/main",
        "http://schemas.openxmlformats.org/drawingml/2006/main",
        "http://schemas.openxmlformats.org/drawingml/2006/chart",
        "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing",
        "http://schemas.openxmlformats.org/drawingml/2006/diagram",
        "http://schemas.openxmlformats.org/drawingml/2006/picture",
        "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
        "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
        "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
        "http://schemas.openxmlformats.org/presentationml/2006/main",
        "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
        "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes",
        "http://www.w3.org/XML/1998/namespace",
    }

    def __init__(self, unpacked_dir, original_file=None, verbose=False):
        self.unpacked_dir = Path(unpacked_dir).resolve()
        self.original_file = Path(original_file) if original_file else None
        self.verbose = verbose

        self.schemas_dir = Path(__file__).parent.parent / "schemas"

        patterns = ["*.xml", "*.rels"]
        self.xml_files = [
            f for pattern in patterns for f in self.unpacked_dir.rglob(pattern)
        ]

        if not self.xml_files:
            print(f"Warning: No XML files found in {self.unpacked_dir}")

    def validate(self):
        raise NotImplementedError("Subclasses must implement the validate method")

    def repair(self) -> int:
        return self.repair_whitespace_preservation()

    def repair_whitespace_preservation(self) -> int:
        repairs = 0

        for xml_file in self.xml_files:
            try:
                content = xml_file.read_text(encoding="utf-8")
                dom = defusedxml.minidom.parseString(content)
                modified = False

                for elem in dom.getElementsByTagName("*"):
                    if elem.tagName.endswith(":t") and elem.firstChild:
                        text = elem.firstChild.nodeValue
                        if text and (
                            text.startswith((" ", "\t")) or text.endswith((" ", "\t"))
                        ):
                            if elem.getAttribute("xml:space") != "preserve":
                                elem.setAttribute("xml:space", "preserve")
                                text_preview = (
                                    repr(text[:30]) + "..."
                                    if len(text) > 30
                                    else repr(text)
                                )
                                print(
                                    f"  Repaired: {xml_file.name}: Added xml:space='preserve' to {elem.tagName}: {text_preview}"
                                )
                                repairs += 1
                                modified = True

                if modified:
                    xml_file.write_bytes(dom.toxml(encoding="UTF-8"))

            except Exception:
                pass

        return repairs

    def validate_xml(self):
        errors = []

        for xml_file in self.xml_files:
            try:
                lxml.etree.parse(str(xml_file))
            except lxml.etree.XMLSyntaxError as e:
                errors.append(
                    f"  {xml_file.relative_to(self.unpacked_dir)}: Line {e.lineno}: {e.msg}"
                )
            except Exception as e:
                errors.append(
                    f"  {xml_file.relative_to(self.unpacked_dir)}: Unexpected error: {str(e)}"
                )

        if errors:
            print(f"FAILED - Found {len(errors)} XML violations:")
            for error in errors:
                print(error)
            return False
        else:
            if self.verbose:
                print("PASSED - All XML files are well-formed")
            return True

    def validate_namespaces(self):
        errors = []

        for xml_file in self.xml_files:
            try:
                root = lxml.etree.parse(str(xml_file)).getroot()
                declared = set(root.nsmap.keys()) - {None}

                for attr_val in [
                    v for k, v in root.attrib.items() if k.endswith("Ignorable")
                ]:
                    undeclared = set(attr_val.split()) - declared
                    errors.extend(
                        f"  {xml_file.relative_to(self.unpacked_dir)}: Namespace '{ns}' in Ignorable but not declared"
                        for ns in undeclared
                    )
            except lxml.etree.XMLSyntaxError:
                continue

        if errors:
            print(f"FAILED - {len(errors)} namespace issues:")
            for error in errors:
                print(error)
            return False
        if self.verbose:
            print("PASSED - All namespace prefixes properly declared")
        return True

    def validate_unique_ids(self):
        errors = []
        global_ids = {}

        for xml_file in self.xml_files:
            try:
                root = lxml.etree.parse(str(xml_file)).getroot()
                file_ids = {}

                mc_elements = root.xpath(
                    ".//mc:AlternateContent", namespaces={"mc": self.MC_NAMESPACE}
                )
                for elem in mc_elements:
                    elem.getparent().remove(elem)

                for elem in root.iter():
                    tag = (
                        elem.tag.split("}")[-1].lower()
                        if "}" in elem.tag
                        else elem.tag.lower()
                    )

                    if tag in self.UNIQUE_ID_REQUIREMENTS:
                        in_excluded_container = any(
                            ancestor.tag.split("}")[-1].lower()
                            in self.EXCLUDED_ID_CONTAINERS
                            for ancestor in elem.iterancestors()
                        )
                        if in_excluded_container:
                            continue

                        attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[tag]

                        id_value = None
                        for attr, value in elem.attrib.items():
                            attr_local = (
                                attr.split("}")[-1].lower()
                                if "}" in attr
                                else attr.lower()
                            )
                            if attr_local == attr_name:
                                id_value = value
                                break

                        if id_value is not None:
                            if scope == "global":
                                if id_value in global_ids:
                                    prev_file, prev_line, prev_tag = global_ids[
                                        id_value
                                    ]
                                    errors.append(
                                        f"  {xml_file.relative_to(self.unpacked_dir)}: "
                                        f"Line {elem.sourceline}: Global ID '{id_value}' in <{tag}> "
                                        f"already used in {prev_file} at line {prev_line} in <{prev_tag}>"
                                    )
                                else:
                                    global_ids[id_value] = (
                                        xml_file.relative_to(self.unpacked_dir),
                                        elem.sourceline,
                                        tag,
                                    )
                            elif scope == "file":
                                key = (tag, attr_name)
                                if key not in file_ids:
                                    file_ids[key] = {}

                                if id_value in file_ids[key]:
                                    prev_line = file_ids[key][id_value]
                                    errors.append(
                                        f"  {xml_file.relative_to(self.unpacked_dir)}: "
                                        f"Line {elem.sourceline}: Duplicate {attr_name}='{id_value}' in <{tag}> "
                                        f"(first occurrence at line {prev_line})"
                                    )
                                else:
                                    file_ids[key][id_value] = elem.sourceline

            except (lxml.etree.XMLSyntaxError, Exception) as e:
                errors.append(
                    f"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
                )

        if errors:
            print(f"FAILED - Found {len(errors)} ID uniqueness violations:")
            for error in errors:
                print(error)
            return False
        else:
            if self.verbose:
                print("PASSED - All required IDs are unique")
            return True

    def validate_file_references(self):
        errors = []

        rels_files = list(self.unpacked_dir.rglob("*.rels"))

        if not rels_files:
            if self.verbose:
                print("PASSED - No .rels files found")
            return True

        all_files = []
        for file_path in self.unpacked_dir.rglob("*"):
            if (
                file_path.is_file()
                and file_path.name != "[Content_Types].xml"
                and not file_path.name.endswith(".rels")
            ):
                all_files.append(file_path.resolve())

        all_referenced_files = set()

        if self.verbose:
            print(
                f"Found {len(rels_files)} .rels files and {len(all_files)} target files"
            )

        for rels_file in rels_files:
            try:
                rels_root = lxml.etree.parse(str(rels_file)).getroot()

                rels_dir = rels_file.parent

                referenced_files = set()
                broken_refs = []

                for rel in rels_root.findall(
                    ".//ns:Relationship",
                    namespaces={"ns": self.PACKAGE_RELATIONSHIPS_NAMESPACE},
                ):
                    target = rel.get("Target")
                    if target and not target.startswith(("http", "mailto:")):
                        if target.startswith("/"):
                            target_path = self.unpacked_dir / target.lstrip("/")
                        elif rels_file.name == ".rels":
                            target_path = self.unpacked_dir / target
                        else:
                            base_dir = rels_dir.parent
                            target_path = base_dir / target

                        try:
                            target_path = target_path.resolve()
                            if target_path.exists() and target_path.is_file():
                                referenced_files.add(target_path)
                                all_referenced_files.add(target_path)
                            else:
                                broken_refs.append((target, rel.sourceline))
                        except (OSError, ValueError):
                            broken_refs.append((target, rel.sourceline))

                if broken_refs:
                    rel_path = rels_file.relative_to(self.unpacked_dir)
                    for broken_ref, line_num in broken_refs:
                        errors.append(
                            f"  {rel_path}: Line {line_num}: Broken reference to {broken_ref}"
                        )

            except Exception as e:
                rel_path = rels_file.relative_to(self.unpacked_dir)
                errors.append(f"  Error parsing {rel_path}: {e}")

        unreferenced_files = set(all_files) - all_referenced_files

        if unreferenced_files:
            for unref_file in sorted(unreferenced_files):
                unref_rel_path = unref_file.relative_to(self.unpacked_dir)
                errors.append(f"  Unreferenced file: {unref_rel_path}")

        if errors:
            print(f"FAILED - Found {len(errors)} relationship validation errors:")
            for error in errors:
                print(error)
            print(
                "CRITICAL: These errors will cause the document to appear corrupt. "
                + "Broken references MUST be fixed, "
                + "and unreferenced files MUST be referenced or removed."
            )
            return False
        else:
            if self.verbose:
                print(
                    "PASSED - All references are valid and all files are properly referenced"
                )
            return True

    def validate_all_relationship_ids(self):
        import lxml.etree

        errors = []

        for xml_file in self.xml_files:
            if xml_file.suffix == ".rels":
                continue

            rels_dir = xml_file.parent / "_rels"
            rels_file = rels_dir / f"{xml_file.name}.rels"

            if not rels_file.exists():
                continue

            try:
                rels_root = lxml.etree.parse(str(rels_file)).getroot()
                rid_to_type = {}

                for rel in rels_root.findall(
                    f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
                ):
                    rid = rel.get("Id")
                    rel_type = rel.get("Type", "")
                    if rid:
                        if rid in rid_to_type:
                            rels_rel_path = rels_file.relative_to(self.unpacked_dir)
                            errors.append(
                                f"  {rels_rel_path}: Line {rel.sourceline}: "
                                f"Duplicate relationship ID '{rid}' (IDs must be unique)"
                            )
                        type_name = (
                            rel_type.split("/")[-1] if "/" in rel_type else rel_type
                        )
                        rid_to_type[rid] = type_name

                xml_root = lxml.etree.parse(str(xml_file)).getroot()

                r_ns = self.OFFICE_RELATIONSHIPS_NAMESPACE
                rid_attrs_to_check = ["id", "embed", "link"]
                for elem in xml_root.iter():
                    for attr_name in rid_attrs_to_check:
                        rid_attr = elem.get(f"{{{r_ns}}}{attr_name}")
                        if not rid_attr:
                            continue
                        xml_rel_path = xml_file.relative_to(self.unpacked_dir)
                        elem_name = (
                            elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
                        )

                        if rid_attr not in rid_to_type:
                            errors.append(
                                f"  {xml_rel_path}: Line {elem.sourceline}: "
                                f"<{elem_name}> r:{attr_name} references non-existent relationship '{rid_attr}' "
                                f"(valid IDs: {', '.join(sorted(rid_to_type.keys())[:5])}{'...' if len(rid_to_type) > 5 else ''})"
                            )
                        elif attr_name == "id" and self.ELEMENT_RELATIONSHIP_TYPES:
                            expected_type = self._get_expected_relationship_type(
                                elem_name
                            )
                            if expected_type:
                                actual_type = rid_to_type[rid_attr]
                                if expected_type not in actual_type.lower():
                                    errors.append(
                                        f"  {xml_rel_path}: Line {elem.sourceline}: "
                                        f"<{elem_name}> references '{rid_attr}' which points to '{actual_type}' "
                                        f"but should point to a '{expected_type}' relationship"
                                    )

            except Exception as e:
                xml_rel_path = xml_file.relative_to(self.unpacked_dir)
                errors.append(f"  Error processing {xml_rel_path}: {e}")

        if errors:
            print(f"FAILED - Found {len(errors)} relationship ID reference errors:")
            for error in errors:
                print(error)
            print("\nThese ID mismatches will cause the document to appear corrupt!")
            return False
        else:
            if self.verbose:
                print("PASSED - All relationship ID references are valid")
            return True

    def _get_expected_relationship_type(self, element_name):
        elem_lower = element_name.lower()

        if elem_lower in self.ELEMENT_RELATIONSHIP_TYPES:
            return self.ELEMENT_RELATIONSHIP_TYPES[elem_lower]

        if elem_lower.endswith("id") and len(elem_lower) > 2:
            prefix = elem_lower[:-2]
            if prefix.endswith("master"):
                return prefix.lower()
            elif prefix.endswith("layout"):
                return prefix.lower()
            else:
                if prefix == "sld":
                    return "slide"
                return prefix.lower()

        if elem_lower.endswith("reference") and len(elem_lower) > 9:
            prefix = elem_lower[:-9]
            return prefix.lower()

        return None

    def validate_content_types(self):
        errors = []

        content_types_file = self.unpacked_dir / "[Content_Types].xml"
        if not content_types_file.exists():
            print("FAILED - [Content_Types].xml file not found")
            return False

        try:
            root = lxml.etree.parse(str(content_types_file)).getroot()
            declared_parts = set()
            declared_extensions = set()

            for override in root.findall(
                f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Override"
            ):
                part_name = override.get("PartName")
                if part_name is not None:
                    declared_parts.add(part_name.lstrip("/"))

            for default in root.findall(
                f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Default"
            ):
                extension = default.get("Extension")
                if extension is not None:
                    declared_extensions.add(extension.lower())

            declarable_roots = {
                "sld",
                "sldLayout",
                "sldMaster",
                "presentation",
                "document",
                "workbook",
                "worksheet",
                "theme",
            }

            media_extensions = {
                "png": "image/png",
                "jpg": "image/jpeg",
                "jpeg": "image/jpeg",
                "gif": "image/gif",
                "bmp": "image/bmp",
                "tiff": "image/tiff",
                "wmf": "image/x-wmf",
                "emf": "image/x-emf",
            }

            all_files = list(self.unpacked_dir.rglob("*"))
            all_files = [f for f in all_files if f.is_file()]

            for xml_file in self.xml_files:
                path_str = str(xml_file.relative_to(self.unpacked_dir)).replace(
                    "\\", "/"
                )

                if any(
                    skip in path_str
                    for skip in [".rels", "[Content_Types]", "docProps/", "_rels/"]
                ):
                    continue

                try:
                    root_tag = lxml.etree.parse(str(xml_file)).getroot().tag
                    root_name = root_tag.split("}")[-1] if "}" in root_tag else root_tag

                    if root_name in declarable_roots and path_str not in declared_parts:
                        errors.append(
                            f"  {path_str}: File with <{root_name}> root not declared in [Content_Types].xml"
                        )

                except Exception:
                    continue

            for file_path in all_files:
                if file_path.suffix.lower() in {".xml", ".rels"}:
                    continue
                if file_path.name == "[Content_Types].xml":
                    continue
                if "_rels" in file_path.parts or "docProps" in file_path.parts:
                    continue

                extension = file_path.suffix.lstrip(".").lower()
                if extension and extension not in declared_extensions:
                    if extension in media_extensions:
                        relative_path = file_path.relative_to(self.unpacked_dir)
                        msg = (
                            f"  {relative_path}: File with extension '{extension}' "
                            f"not declared in [Content_Types].xml - should add: "
                            f'<Default Extension="{extension}" '
                            f'ContentType="{media_extensions[extension]}"/>'
                        )
                        errors.append(msg)

        except Exception as e:
            errors.append(f"  Error parsing [Content_Types].xml: {e}")

        if errors:
            print(f"FAILED - Found {len(errors)} content type declaration errors:")
            for error in errors:
                print(error)
            return False
        else:
            if self.verbose:
                print(
                    "PASSED - All content files are properly declared in [Content_Types].xml"
                )
            return True

    def validate_file_against_xsd(self, xml_file, verbose=False):
        xml_file = Path(xml_file).resolve()
        unpacked_dir = self.unpacked_dir.resolve()

        is_valid, current_errors = self._validate_single_file_xsd(
            xml_file, unpacked_dir
        )

        if is_valid is None:
            return None, set()
        elif is_valid:
            return True, set()

        original_errors = self._get_original_file_errors(xml_file)

        assert current_errors is not None
        new_errors = current_errors - original_errors

        new_errors = {
            e
            for e in new_errors
            if not any(pattern in e for pattern in self.IGNORED_VALIDATION_ERRORS)
        }

        if new_errors:
            if verbose:
                relative_path = xml_file.relative_to(unpacked_dir)
                print(f"FAILED - {relative_path}: {len(new_errors)} new error(s)")
                for error in list(new_errors)[:3]:
                    truncated = error[:250] + "..." if len(error) > 250 else error
                    print(f"  - {truncated}")
            return False, new_errors
        else:
            if verbose:
                print(
                    f"PASSED - No new errors (original had {len(current_errors)} errors)"
                )
            return True, set()

    def validate_against_xsd(self):
        new_errors = []
        original_error_count = 0
        valid_count = 0
        skipped_count = 0

        for xml_file in self.xml_files:
            relative_path = str(xml_file.relative_to(self.unpacked_dir))
            is_valid, new_file_errors = self.validate_file_against_xsd(
                xml_file, verbose=False
            )

            if is_valid is None:
                skipped_count += 1
                continue
            elif is_valid and not new_file_errors:
                valid_count += 1
                continue
            elif is_valid:
                original_error_count += 1
                valid_count += 1
                continue

            new_errors.append(f"  {relative_path}: {len(new_file_errors)} new error(s)")
            for error in list(new_file_errors)[:3]:
                new_errors.append(
                    f"    - {error[:250]}..." if len(error) > 250 else f"    - {error}"
                )

        if self.verbose:
            print(f"Validated {len(self.xml_files)} files:")
            print(f"  - Valid: {valid_count}")
            print(f"  - Skipped (no schema): {skipped_count}")
            if original_error_count:
                print(f"  - With original errors (ignored): {original_error_count}")
            print(
                f"  - With NEW errors: {len(new_errors) > 0 and len([e for e in new_errors if not e.startswith('    ')]) or 0}"
            )

        if new_errors:
            print("\nFAILED - Found NEW validation errors:")
            for error in new_errors:
                print(error)
            return False
        else:
            if self.verbose:
                print("\nPASSED - No new XSD validation errors introduced")
            return True

    def _get_schema_path(self, xml_file):
        if xml_file.name in self.SCHEMA_MAPPINGS:
            return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.name]

        if xml_file.suffix == ".rels":
            return self.schemas_dir / self.SCHEMA_MAPPINGS[".rels"]

        if "charts/" in str(xml_file) and xml_file.name.startswith("chart"):
            return self.schemas_dir / self.SCHEMA_MAPPINGS["chart"]

        if "theme/" in str(xml_file) and xml_file.name.startswith("theme"):
            return self.schemas_dir / self.SCHEMA_MAPPINGS["theme"]

        if xml_file.parent.name in self.MAIN_CONTENT_FOLDERS:
            return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.parent.name]

        return None

    def _clean_ignorable_namespaces(self, xml_doc):
        xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
        xml_copy = lxml.etree.fromstring(xml_string)

        for elem in xml_copy.iter():
            attrs_to_remove = []

            for attr in elem.attrib:
                if "{" in attr:
                    ns = attr.split("}")[0][1:]
                    if ns not in self.OOXML_NAMESPACES:
                        attrs_to_remove.append(attr)

            for attr in attrs_to_remove:
                del elem.attrib[attr]

        self._remove_ignorable_elements(xml_copy)

        return lxml.etree.ElementTree(xml_copy)

    def _remove_ignorable_elements(self, root):
        elements_to_remove = []

        for elem in list(root):
            if not hasattr(elem, "tag") or callable(elem.tag):
                continue

            tag_str = str(elem.tag)
            if tag_str.startswith("{"):
                ns = tag_str.split("}")[0][1:]
                if ns not in self.OOXML_NAMESPACES:
                    elements_to_remove.append(elem)
                    continue

            self._remove_ignorable_elements(elem)

        for elem in elements_to_remove:
            root.remove(elem)

    def _preprocess_for_mc_ignorable(self, xml_doc):
        root = xml_doc.getroot()

        if f"{{{self.MC_NAMESPACE}}}Ignorable" in root.attrib:
            del root.attrib[f"{{{self.MC_NAMESPACE}}}Ignorable"]

        return xml_doc

    def _validate_single_file_xsd(self, xml_file, base_path):
        schema_path = self._get_schema_path(xml_file)
        if not schema_path:
            return None, None

        try:
            with open(schema_path, "rb") as xsd_file:
                parser = lxml.etree.XMLParser()
                xsd_doc = lxml.etree.parse(
                    xsd_file, parser=parser, base_url=str(schema_path)
                )
                schema = lxml.etree.XMLSchema(xsd_doc)

            with open(xml_file, "r") as f:
                xml_doc = lxml.etree.parse(f)

            xml_doc, _ = self._remove_template_tags_from_text_nodes(xml_doc)
            xml_doc = self._preprocess_for_mc_ignorable(xml_doc)

            relative_path = xml_file.relative_to(base_path)
            if (
                relative_path.parts
                and relative_path.parts[0] in self.MAIN_CONTENT_FOLDERS
            ):
                xml_doc = self._clean_ignorable_namespaces(xml_doc)

            if schema.validate(xml_doc):
                return True, set()
            else:
                errors = set()
                for error in schema.error_log:
                    errors.add(error.message)
                return False, errors

        except Exception as e:
            return False, {str(e)}

    def _get_original_file_errors(self, xml_file):
        if self.original_file is None:
            return set()

        import tempfile
        import zipfile

        xml_file = Path(xml_file).resolve()
        unpacked_dir = self.unpacked_dir.resolve()
        relative_path = xml_file.relative_to(unpacked_dir)

        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)

            with zipfile.ZipFile(self.original_file, "r") as zip_ref:
                zip_ref.extractall(temp_path)

            original_xml_file = temp_path / relative_path

            if not original_xml_file.exists():
                return set()

            is_valid, errors = self._validate_single_file_xsd(
                original_xml_file, temp_path
            )
            return errors if errors else set()

    def _remove_template_tags_from_text_nodes(self, xml_doc):
        warnings = []
        template_pattern = re.compile(r"\{\{[^}]*\}\}")

        xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
        xml_copy = lxml.etree.fromstring(xml_string)

        def process_text_content(text, content_type):
            if not text:
                return text
            matches = list(template_pattern.finditer(text))
            if matches:
                for match in matches:
                    warnings.append(
                        f"Found template tag in {content_type}: {match.group()}"
                    )
                return template_pattern.sub("", text)
            return text

        for elem in xml_copy.iter():
            if not hasattr(elem, "tag") or callable(elem.tag):
                continue
            tag_str = str(elem.tag)
            if tag_str.endswith("}t") or tag_str == "t":
                continue

            elem.text = process_text_content(elem.text, "text content")
            elem.tail = process_text_content(elem.tail, "tail content")

        return lxml.etree.ElementTree(xml_copy), warnings


if __name__ == "__main__":
    raise RuntimeError("This module should not be run directly.")


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/docx.py
================================================
"""
Validator for Word document XML files against XSD schemas.
"""

import random
import re
import tempfile
import zipfile

import defusedxml.minidom
import lxml.etree

from .base import BaseSchemaValidator


class DOCXSchemaValidator(BaseSchemaValidator):
    WORD_2006_NAMESPACE = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
    W14_NAMESPACE = "http://schemas.microsoft.com/office/word/2010/wordml"
    W16CID_NAMESPACE = "http://schemas.microsoft.com/office/word/2016/wordml/cid"

    ELEMENT_RELATIONSHIP_TYPES = {}

    def validate(self):
        if not self.validate_xml():
            return False

        all_valid = True
        if not self.validate_namespaces():
            all_valid = False

        if not self.validate_unique_ids():
            all_valid = False

        if not self.validate_file_references():
            all_valid = False

        if not self.validate_content_types():
            all_valid = False

        if not self.validate_against_xsd():
            all_valid = False

        if not self.validate_whitespace_preservation():
            all_valid = False

        if not self.validate_deletions():
            all_valid = False

        if not self.validate_insertions():
            all_valid = False

        if not self.validate_all_relationship_ids():
            all_valid = False

        if not self.validate_id_constraints():
            all_valid = False

        if not self.validate_comment_markers():
            all_valid = False

        self.compare_paragraph_counts()

        return all_valid

    def validate_whitespace_preservation(self):
        errors = []

        for xml_file in self.xml_files:
            if xml_file.name != "document.xml":
                continue

            try:
                root = lxml.etree.parse(str(xml_file)).getroot()

                for elem in root.iter(f"{{{self.WORD_2006_NAMESPACE}}}t"):
                    if elem.text:
                        text = elem.text
                        if re.search(r"^[ \t\n\r]", text) or re.search(
                            r"[ \t\n\r]$", text
                        ):
                            xml_space_attr = f"{{{self.XML_NAMESPACE}}}space"
                            if (
                                xml_space_attr not in elem.attrib
                                or elem.attrib[xml_space_attr] != "preserve"
                            ):
                                text_preview = (
                                    repr(text)[:50] + "..."
                                    if len(repr(text)) > 50
                                    else repr(text)
                                )
                                errors.append(
                                    f"  {xml_file.relative_to(self.unpacked_dir)}: "
                                    f"Line {elem.sourceline}: w:t element with whitespace "
                                    f"missing xml:space='preserve': {text_preview}"
                                )

            except (lxml.etree.XMLSyntaxError, Exception) as e:
                errors.append(
                    f"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
                )

        if errors:
            print(f"FAILED - Found {len(errors)} whitespace preservation violations:")
            for error in errors:
                print(error)
            return False
        else:
            if self.verbose:
                print("PASSED - All whitespace is properly preserved")
            return True

    def validate_deletions(self):
        errors = []

        for xml_file in self.xml_files:
            if xml_file.name != "document.xml":
                continue

            try:
                root = lxml.etree.parse(str(xml_file)).getroot()
                namespaces = {"w": self.WORD_2006_NAMESPACE}

                for t_elem in root.xpath(".//w:del//w:t", namespaces=namespaces):
                    if t_elem.text:
                        text_preview = (
                            repr(t_elem.text)[:50] + "..."
                            if len(repr(t_elem.text)) > 50
                            else repr(t_elem.text)
                        )
                        errors.append(
                            f"  {xml_file.relative_to(self.unpacked_dir)}: "
                            f"Line {t_elem.sourceline}: <w:t> found within <w:del>: {text_preview}"
                        )

                for instr_elem in root.xpath(
                    ".//w:del//w:instrText", namespaces=namespaces
                ):
                    text_preview = (
                        repr(instr_elem.text or "")[:50] + "..."
                        if len(repr(instr_elem.text or "")) > 50
                        else repr(instr_elem.text or "")
                    )
                    errors.append(
                        f"  {xml_file.relative_to(self.unpacked_dir)}: "
                        f"Line {instr_elem.sourceline}: <w:instrText> found within <w:del> (use <w:delInstrText>): {text_preview}"
                    )

            except (lxml.etree.XMLSyntaxError, Exception) as e:
                errors.append(
                    f"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
                )

        if errors:
            print(f"FAILED - Found {len(errors)} deletion validation violations:")
            for error in errors:
                print(error)
            return False
        else:
            if self.verbose:
                print("PASSED - No w:t elements found within w:del elements")
            return True

    def count_paragraphs_in_unpacked(self):
        count = 0

        for xml_file in self.xml_files:
            if xml_file.name != "document.xml":
                continue

            try:
                root = lxml.etree.parse(str(xml_file)).getroot()
                paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
                count = len(paragraphs)
            except Exception as e:
                print(f"Error counting paragraphs in unpacked document: {e}")

        return count

    def count_paragraphs_in_original(self):
        original = self.original_file
        if original is None:
            return 0

        count = 0

        try:
            with tempfile.TemporaryDirectory() as temp_dir:
                with zipfile.ZipFile(original, "r") as zip_ref:
                    zip_ref.extractall(temp_dir)

                doc_xml_path = temp_dir + "/word/document.xml"
                root = lxml.etree.parse(doc_xml_path).getroot()

                paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
                count = len(paragraphs)

        except Exception as e:
            print(f"Error counting paragraphs in original document: {e}")

        return count

    def validate_insertions(self):
        errors = []

        for xml_file in self.xml_files:
            if xml_file.name != "document.xml":
                continue

            try:
                root = lxml.etree.parse(str(xml_file)).getroot()
                namespaces = {"w": self.WORD_2006_NAMESPACE}

                invalid_elements = root.xpath(
                    ".//w:ins//w:delText[not(ancestor::w:del)]", namespaces=namespaces
                )

                for elem in invalid_elements:
                    text_preview = (
                        repr(elem.text or "")[:50] + "..."
                        if len(repr(elem.text or "")) > 50
                        else repr(elem.text or "")
                    )
                    errors.append(
                        f"  {xml_file.relative_to(self.unpacked_dir)}: "
                        f"Line {elem.sourceline}: <w:delText> within <w:ins>: {text_preview}"
                    )

            except (lxml.etree.XMLSyntaxError, Exception) as e:
                errors.append(
                    f"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
                )

        if errors:
            print(f"FAILED - Found {len(errors)} insertion validation violations:")
            for error in errors:
                print(error)
            return False
        else:
            if self.verbose:
                print("PASSED - No w:delText elements within w:ins elements")
            return True

    def compare_paragraph_counts(self):
        original_count = self.count_paragraphs_in_original()
        new_count = self.count_paragraphs_in_unpacked()

        diff = new_count - original_count
        diff_str = f"+{diff}" if diff > 0 else str(diff)
        print(f"\nParagraphs: {original_count} → {new_count} ({diff_str})")

    def _parse_id_value(self, val: str, base: int = 16) -> int:
        return int(val, base)

    def validate_id_constraints(self):
        errors = []
        para_id_attr = f"{{{self.W14_NAMESPACE}}}paraId"
        durable_id_attr = f"{{{self.W16CID_NAMESPACE}}}durableId"

        for xml_file in self.xml_files:
            try:
                for elem in lxml.etree.parse(str(xml_file)).iter():
                    if val := elem.get(para_id_attr):
                        if self._parse_id_value(val, base=16) >= 0x80000000:
                            errors.append(
                                f"  {xml_file.name}:{elem.sourceline}: paraId={val} >= 0x80000000"
                            )

                    if val := elem.get(durable_id_attr):
                        if xml_file.name == "numbering.xml":
                            try:
                                if self._parse_id_value(val, base=10) >= 0x7FFFFFFF:
                                    errors.append(
                                        f"  {xml_file.name}:{elem.sourceline}: durableId={val} >= 0x7FFFFFFF"
                                    )
                            except ValueError:
                                errors.append(
                                    f"  {xml_file.name}:{elem.sourceline}: durableId={val} must be decimal in numbering.xml"
                                )
                        else:
                            if self._parse_id_value(val, base=16) >= 0x7FFFFFFF:
                                errors.append(
                                    f"  {xml_file.name}:{elem.sourceline}: durableId={val} >= 0x7FFFFFFF"
                                )
            except Exception:
                pass

        if errors:
            print(f"FAILED - {len(errors)} ID constraint violations:")
            for e in errors:
                print(e)
        elif self.verbose:
            print("PASSED - All paraId/durableId values within constraints")
        return not errors

    def validate_comment_markers(self):
        errors = []

        document_xml = None
        comments_xml = None
        for xml_file in self.xml_files:
            if xml_file.name == "document.xml" and "word" in str(xml_file):
                document_xml = xml_file
            elif xml_file.name == "comments.xml":
                comments_xml = xml_file

        if not document_xml:
            if self.verbose:
                print("PASSED - No document.xml found (skipping comment validation)")
            return True

        try:
            doc_root = lxml.etree.parse(str(document_xml)).getroot()
            namespaces = {"w": self.WORD_2006_NAMESPACE}

            range_starts = {
                elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
                for elem in doc_root.xpath(
                    ".//w:commentRangeStart", namespaces=namespaces
                )
            }
            range_ends = {
                elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
                for elem in doc_root.xpath(
                    ".//w:commentRangeEnd", namespaces=namespaces
                )
            }
            references = {
                elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
                for elem in doc_root.xpath(
                    ".//w:commentReference", namespaces=namespaces
                )
            }

            orphaned_ends = range_ends - range_starts
            for comment_id in sorted(
                orphaned_ends, key=lambda x: int(x) if x and x.isdigit() else 0
            ):
                errors.append(
                    f'  document.xml: commentRangeEnd id="{comment_id}" has no matching commentRangeStart'
                )

            orphaned_starts = range_starts - range_ends
            for comment_id in sorted(
                orphaned_starts, key=lambda x: int(x) if x and x.isdigit() else 0
            ):
                errors.append(
                    f'  document.xml: commentRangeStart id="{comment_id}" has no matching commentRangeEnd'
                )

            comment_ids = set()
            if comments_xml and comments_xml.exists():
                comments_root = lxml.etree.parse(str(comments_xml)).getroot()
                comment_ids = {
                    elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
                    for elem in comments_root.xpath(
                        ".//w:comment", namespaces=namespaces
                    )
                }

                marker_ids = range_starts | range_ends | references
                invalid_refs = marker_ids - comment_ids
                for comment_id in sorted(
                    invalid_refs, key=lambda x: int(x) if x and x.isdigit() else 0
                ):
                    if comment_id:
                        errors.append(
                            f'  document.xml: marker id="{comment_id}" references non-existent comment'
                        )

        except (lxml.etree.XMLSyntaxError, Exception) as e:
            errors.append(f"  Error parsing XML: {e}")

        if errors:
            print(f"FAILED - {len(errors)} comment marker violations:")
            for error in errors:
                print(error)
            return False
        else:
            if self.verbose:
                print("PASSED - All comment markers properly paired")
            return True

    def repair(self) -> int:
        repairs = super().repair()
        repairs += self.repair_durableId()
        return repairs

    def repair_durableId(self) -> int:
        repairs = 0

        for xml_file in self.xml_files:
            try:
                content = xml_file.read_text(encoding="utf-8")
                dom = defusedxml.minidom.parseString(content)
                modified = False

                for elem in dom.getElementsByTagName("*"):
                    if not elem.hasAttribute("w16cid:durableId"):
                        continue

                    durable_id = elem.getAttribute("w16cid:durableId")
                    needs_repair = False

                    if xml_file.name == "numbering.xml":
                        try:
                            needs_repair = (
                                self._parse_id_value(durable_id, base=10) >= 0x7FFFFFFF
                            )
                        except ValueError:
                            needs_repair = True
                    else:
                        try:
                            needs_repair = (
                                self._parse_id_value(durable_id, base=16) >= 0x7FFFFFFF
                            )
                        except ValueError:
                            needs_repair = True

                    if needs_repair:
                        value = random.randint(1, 0x7FFFFFFE)
                        if xml_file.name == "numbering.xml":
                            new_id = str(value)
                        else:
                            new_id = f"{value:08X}"

                        elem.setAttribute("w16cid:durableId", new_id)
                        print(
                            f"  Repaired: {xml_file.name}: durableId {durable_id} → {new_id}"
                        )
                        repairs += 1
                        modified = True

                if modified:
                    xml_file.write_bytes(dom.toxml(encoding="UTF-8"))

            except Exception:
                pass

        return repairs


if __name__ == "__main__":
    raise RuntimeError("This module should not be run directly.")


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/pptx.py
================================================
"""
Validator for PowerPoint presentation XML files against XSD schemas.
"""

import re

from .base import BaseSchemaValidator


class PPTXSchemaValidator(BaseSchemaValidator):
    PRESENTATIONML_NAMESPACE = (
        "http://schemas.openxmlformats.org/presentationml/2006/main"
    )

    ELEMENT_RELATIONSHIP_TYPES = {
        "sldid": "slide",
        "sldmasterid": "slidemaster",
        "notesmasterid": "notesmaster",
        "sldlayoutid": "slidelayout",
        "themeid": "theme",
        "tablestyleid": "tablestyles",
    }

    def validate(self):
        if not self.validate_xml():
            return False

        all_valid = True
        if not self.validate_namespaces():
            all_valid = False

        if not self.validate_unique_ids():
            all_valid = False

        if not self.validate_uuid_ids():
            all_valid = False

        if not self.validate_file_references():
            all_valid = False

        if not self.validate_slide_layout_ids():
            all_valid = False

        if not self.validate_content_types():
            all_valid = False

        if not self.validate_against_xsd():
            all_valid = False

        if not self.validate_notes_slide_references():
            all_valid = False

        if not self.validate_all_relationship_ids():
            all_valid = False

        if not self.validate_no_duplicate_slide_layouts():
            all_valid = False

        return all_valid

    def validate_uuid_ids(self):
        import lxml.etree

        errors = []
        uuid_pattern = re.compile(
            r"^[\{\(]?[0-9A-Fa-f]{8}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{12}[\}\)]?$"
        )

        for xml_file in self.xml_files:
            try:
                root = lxml.etree.parse(str(xml_file)).getroot()

                for elem in root.iter():
                    for attr, value in elem.attrib.items():
                        attr_name = attr.split("}")[-1].lower()
                        if attr_name == "id" or attr_name.endswith("id"):
                            if self._looks_like_uuid(value):
                                if not uuid_pattern.match(value):
                                    errors.append(
                                        f"  {xml_file.relative_to(self.unpacked_dir)}: "
                                        f"Line {elem.sourceline}: ID '{value}' appears to be "
                                        "a UUID but contains invalid hex characters"
                                    )

            except (lxml.etree.XMLSyntaxError, Exception) as e:
                errors.append(
                    f"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
                )

        if errors:
            print(f"FAILED - Found {len(errors)} UUID ID validation errors:")
            for error in errors:
                print(error)
            return False
        else:
            if self.verbose:
                print("PASSED - All UUID-like IDs contain valid hex values")
            return True

    def _looks_like_uuid(self, value):
        clean_value = value.strip("{}()").replace("-", "")
        return len(clean_value) == 32 and all(c.isalnum() for c in clean_value)

    def validate_slide_layout_ids(self):
        import lxml.etree

        errors = []

        slide_masters = list(self.unpacked_dir.glob("ppt/slideMasters/*.xml"))

        if not slide_masters:
            if self.verbose:
                print("PASSED - No slide masters found")
            return True

        for slide_master in slide_masters:
            try:
                root = lxml.etree.parse(str(slide_master)).getroot()

                rels_file = slide_master.parent / "_rels" / f"{slide_master.name}.rels"

                if not rels_file.exists():
                    errors.append(
                        f"  {slide_master.relative_to(self.unpacked_dir)}: "
                        f"Missing relationships file: {rels_file.relative_to(self.unpacked_dir)}"
                    )
                    continue

                rels_root = lxml.etree.parse(str(rels_file)).getroot()

                valid_layout_rids = set()
                for rel in rels_root.findall(
                    f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
                ):
                    rel_type = rel.get("Type", "")
                    if "slideLayout" in rel_type:
                        valid_layout_rids.add(rel.get("Id"))

                for sld_layout_id in root.findall(
                    f".//{{{self.PRESENTATIONML_NAMESPACE}}}sldLayoutId"
                ):
                    r_id = sld_layout_id.get(
                        f"{{{self.OFFICE_RELATIONSHIPS_NAMESPACE}}}id"
                    )
                    layout_id = sld_layout_id.get("id")

                    if r_id and r_id not in valid_layout_rids:
                        errors.append(
                            f"  {slide_master.relative_to(self.unpacked_dir)}: "
                            f"Line {sld_layout_id.sourceline}: sldLayoutId with id='{layout_id}' "
                            f"references r:id='{r_id}' which is not found in slide layout relationships"
                        )

            except (lxml.etree.XMLSyntaxError, Exception) as e:
                errors.append(
                    f"  {slide_master.relative_to(self.unpacked_dir)}: Error: {e}"
                )

        if errors:
            print(f"FAILED - Found {len(errors)} slide layout ID validation errors:")
            for error in errors:
                print(error)
            print(
                "Remove invalid references or add missing slide layouts to the relationships file."
            )
            return False
        else:
            if self.verbose:
                print("PASSED - All slide layout IDs reference valid slide layouts")
            return True

    def validate_no_duplicate_slide_layouts(self):
        import lxml.etree

        errors = []
        slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))

        for rels_file in slide_rels_files:
            try:
                root = lxml.etree.parse(str(rels_file)).getroot()

                layout_rels = [
                    rel
                    for rel in root.findall(
                        f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
                    )
                    if "slideLayout" in rel.get("Type", "")
                ]

                if len(layout_rels) > 1:
                    errors.append(
                        f"  {rels_file.relative_to(self.unpacked_dir)}: has {len(layout_rels)} slideLayout references"
                    )

            except Exception as e:
                errors.append(
                    f"  {rels_file.relative_to(self.unpacked_dir)}: Error: {e}"
                )

        if errors:
            print("FAILED - Found slides with duplicate slideLayout references:")
            for error in errors:
                print(error)
            return False
        else:
            if self.verbose:
                print("PASSED - All slides have exactly one slideLayout reference")
            return True

    def validate_notes_slide_references(self):
        import lxml.etree

        errors = []
        notes_slide_references = {}

        slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))

        if not slide_rels_files:
            if self.verbose:
                print("PASSED - No slide relationship files found")
            return True

        for rels_file in slide_rels_files:
            try:
                root = lxml.etree.parse(str(rels_file)).getroot()

                for rel in root.findall(
                    f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
                ):
                    rel_type = rel.get("Type", "")
                    if "notesSlide" in rel_type:
                        target = rel.get("Target", "")
                        if target:
                            normalized_target = target.replace("../", "")

                            slide_name = rels_file.stem.replace(".xml", "")

                            if normalized_target not in notes_slide_references:
                                notes_slide_references[normalized_target] = []
                            notes_slide_references[normalized_target].append(
                                (slide_name, rels_file)
                            )

            except (lxml.etree.XMLSyntaxError, Exception) as e:
                errors.append(
                    f"  {rels_file.relative_to(self.unpacked_dir)}: Error: {e}"
                )

        for target, references in notes_slide_references.items():
            if len(references) > 1:
                slide_names = [ref[0] for ref in references]
                errors.append(
                    f"  Notes slide '{target}' is referenced by multiple slides: {', '.join(slide_names)}"
                )
                for slide_name, rels_file in references:
                    errors.append(f"    - {rels_file.relative_to(self.unpacked_dir)}")

        if errors:
            print(
                f"FAILED - Found {len([e for e in errors if not e.startswith('    ')])} notes slide reference validation errors:"
            )
            for error in errors:
                print(error)
            print("Each slide may optionally have its own slide file.")
            return False
        else:
            if self.verbose:
                print("PASSED - All notes slide references are unique")
            return True


if __name__ == "__main__":
    raise RuntimeError("This module should not be run directly.")


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/redlining.py
================================================
"""
Validator for tracked changes in Word documents.
"""

import subprocess
import tempfile
import zipfile
from pathlib import Path


class RedliningValidator:
    def __init__(self, unpacked_dir, original_docx, verbose=False, author="Claude"):
        self.unpacked_dir = Path(unpacked_dir)
        self.original_docx = Path(original_docx)
        self.verbose = verbose
        self.author = author
        self.namespaces = {
            "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
        }

    def repair(self) -> int:
        return 0

    def validate(self):
        modified_file = self.unpacked_dir / "word" / "document.xml"
        if not modified_file.exists():
            print(f"FAILED - Modified document.xml not found at {modified_file}")
            return False

        try:
            import xml.etree.ElementTree as ET

            tree = ET.parse(modified_file)
            root = tree.getroot()

            del_elements = root.findall(".//w:del", self.namespaces)
            ins_elements = root.findall(".//w:ins", self.namespaces)

            author_del_elements = [
                elem
                for elem in del_elements
                if elem.get(f"{{{self.namespaces['w']}}}author") == self.author
            ]
            author_ins_elements = [
                elem
                for elem in ins_elements
                if elem.get(f"{{{self.namespaces['w']}}}author") == self.author
            ]

            if not author_del_elements and not author_ins_elements:
                if self.verbose:
                    print(f"PASSED - No tracked changes by {self.author} found.")
                return True

        except Exception:
            pass

        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)

            try:
                with zipfile.ZipFile(self.original_docx, "r") as zip_ref:
                    zip_ref.extractall(temp_path)
            except Exception as e:
                print(f"FAILED - Error unpacking original docx: {e}")
                return False

            original_file = temp_path / "word" / "document.xml"
            if not original_file.exists():
                print(
                    f"FAILED - Original document.xml not found in {self.original_docx}"
                )
                return False

            try:
                import xml.etree.ElementTree as ET

                modified_tree = ET.parse(modified_file)
                modified_root = modified_tree.getroot()
                original_tree = ET.parse(original_file)
                original_root = original_tree.getroot()
            except ET.ParseError as e:
                print(f"FAILED - Error parsing XML files: {e}")
                return False

            self._remove_author_tracked_changes(original_root)
            self._remove_author_tracked_changes(modified_root)

            modified_text = self._extract_text_content(modified_root)
            original_text = self._extract_text_content(original_root)

            if modified_text != original_text:
                error_message = self._generate_detailed_diff(
                    original_text, modified_text
                )
                print(error_message)
                return False

            if self.verbose:
                print(f"PASSED - All changes by {self.author} are properly tracked")
            return True

    def _generate_detailed_diff(self, original_text, modified_text):
        error_parts = [
            f"FAILED - Document text doesn't match after removing {self.author}'s tracked changes",
            "",
            "Likely causes:",
            "  1. Modified text inside another author's <w:ins> or <w:del> tags",
            "  2. Made edits without proper tracked changes",
            "  3. Didn't nest <w:del> inside <w:ins> when deleting another's insertion",
            "",
            "For pre-redlined documents, use correct patterns:",
            "  - To reject another's INSERTION: Nest <w:del> inside their <w:ins>",
            "  - To restore another's DELETION: Add new <w:ins> AFTER their <w:del>",
            "",
        ]

        git_diff = self._get_git_word_diff(original_text, modified_text)
        if git_diff:
            error_parts.extend(["Differences:", "============", git_diff])
        else:
            error_parts.append("Unable to generate word diff (git not available)")

        return "\n".join(error_parts)

    def _get_git_word_diff(self, original_text, modified_text):
        try:
            with tempfile.TemporaryDirectory() as temp_dir:
                temp_path = Path(temp_dir)

                original_file = temp_path / "original.txt"
                modified_file = temp_path / "modified.txt"

                original_file.write_text(original_text, encoding="utf-8")
                modified_file.write_text(modified_text, encoding="utf-8")

                result = subprocess.run(
                    [
                        "git",
                        "diff",
                        "--word-diff=plain",
                        "--word-diff-regex=.",
                        "-U0",
                        "--no-index",
                        str(original_file),
                        str(modified_file),
                    ],
                    capture_output=True,
                    text=True,
                )

                if result.stdout.strip():
                    lines = result.stdout.split("\n")
                    content_lines = []
                    in_content = False
                    for line in lines:
                        if line.startswith("@@"):
                            in_content = True
                            continue
                        if in_content and line.strip():
                            content_lines.append(line)

                    if content_lines:
                        return "\n".join(content_lines)

                result = subprocess.run(
                    [
                        "git",
                        "diff",
                        "--word-diff=plain",
                        "-U0",
                        "--no-index",
                        str(original_file),
                        str(modified_file),
                    ],
                    capture_output=True,
                    text=True,
                )

                if result.stdout.strip():
                    lines = result.stdout.split("\n")
                    content_lines = []
                    in_content = False
                    for line in lines:
                        if line.startswith("@@"):
                            in_content = True
                            continue
                        if in_content and line.strip():
                            content_lines.append(line)
                    return "\n".join(content_lines)

        except (subprocess.CalledProcessError, FileNotFoundError, Exception):
            pass

        return None

    def _remove_author_tracked_changes(self, root):
        ins_tag = f"{{{self.namespaces['w']}}}ins"
        del_tag = f"{{{self.namespaces['w']}}}del"
        author_attr = f"{{{self.namespaces['w']}}}author"

        for parent in root.iter():
            to_remove = []
            for child in parent:
                if child.tag == ins_tag and child.get(author_attr) == self.author:
                    to_remove.append(child)
            for elem in to_remove:
                parent.remove(elem)

        deltext_tag = f"{{{self.namespaces['w']}}}delText"
        t_tag = f"{{{self.namespaces['w']}}}t"

        for parent in root.iter():
            to_process = []
            for child in parent:
                if child.tag == del_tag and child.get(author_attr) == self.author:
                    to_process.append((child, list(parent).index(child)))

            for del_elem, del_index in reversed(to_process):
                for elem in del_elem.iter():
                    if elem.tag == deltext_tag:
                        elem.tag = t_tag

                for child in reversed(list(del_elem)):
                    parent.insert(del_index, child)
                parent.remove(del_elem)

    def _extract_text_content(self, root):
        p_tag = f"{{{self.namespaces['w']}}}p"
        t_tag = f"{{{self.namespaces['w']}}}t"

        paragraphs = []
        for p_elem in root.findall(f".//{p_tag}"):
            text_parts = []
            for t_elem in p_elem.findall(f".//{t_tag}"):
                if t_elem.text:
                    text_parts.append(t_elem.text)
            paragraph_text = "".join(text_parts)
            if paragraph_text:
                paragraphs.append(paragraph_text)

        return "\n".join(paragraphs)


if __name__ == "__main__":
    raise RuntimeError("This module should not be run directly.")


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/preview.py
================================================
"""Generate slide preview images from a PowerPoint file.

Converts PPTX -> PDF -> JPEG slides with caching. If cached slides
already exist and are up-to-date, returns them without reconverting.

Output protocol (stdout):
    Line 1: status — one of CACHED, GENERATED, ERROR_NOT_FOUND, ERROR_NO_PDF
    Lines 2+: sorted absolute paths to slide-*.jpg files

Usage:
    python preview.py /path/to/file.pptx /path/to/cache_dir
"""

import os
import subprocess
import sys
from pathlib import Path

# Allow importing office.soffice from the scripts directory
sys.path.insert(0, str(Path(__file__).resolve().parent))

from office.soffice import run_soffice

CONVERSION_DPI = 150


def _find_slides(directory: Path) -> list[str]:
    """Find slide-*.jpg files in directory, sorted by page number."""
    slides = list(directory.glob("slide-*.jpg"))
    slides.sort(key=lambda p: int(p.stem.split("-")[-1]))
    return [str(s) for s in slides]


def main() -> None:
    if len(sys.argv) != 3:
        print(f"Usage: {sys.argv[0]} <pptx_path> <cache_dir>", file=sys.stderr)
        sys.exit(1)

    pptx_path = Path(sys.argv[1])
    cache_dir = Path(sys.argv[2])

    if not pptx_path.is_file():
        print("ERROR_NOT_FOUND")
        return

    # Check cache: if slides exist and are at least as new as the PPTX, reuse them
    cached_slides = _find_slides(cache_dir)
    if cached_slides:
        pptx_mtime = os.path.getmtime(pptx_path)
        oldest_slide_mtime = min(os.path.getmtime(s) for s in cached_slides)
        if oldest_slide_mtime >= pptx_mtime:
            print("CACHED")
            for slide in cached_slides:
                print(slide)
            return
        # Stale cache — remove old slides
        for slide in cached_slides:
            os.remove(slide)

    cache_dir.mkdir(parents=True, exist_ok=True)

    # Convert PPTX -> PDF via LibreOffice
    result = run_soffice(
        [
            "--headless",
            "--convert-to",
            "pdf",
            "--outdir",
            str(cache_dir),
            str(pptx_path),
        ],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        print("CONVERSION_ERROR", file=sys.stderr)
        sys.exit(1)

    # Find the generated PDF
    pdfs = sorted(cache_dir.glob("*.pdf"))
    if not pdfs:
        print("ERROR_NO_PDF")
        return

    pdf_file = pdfs[0]

    # Convert PDF -> JPEG slides
    result = subprocess.run(
        [
            "pdftoppm",
            "-jpeg",
            "-r",
            str(CONVERSION_DPI),
            str(pdf_file),
            str(cache_dir / "slide"),
        ],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        print("CONVERSION_ERROR", file=sys.stderr)
        sys.exit(1)

    # Clean up PDF
    pdf_file.unlink(missing_ok=True)

    slides = _find_slides(cache_dir)
    print("GENERATED")
    for slide in slides:
        print(slide)


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/thumbnail.py
================================================
"""Create thumbnail grids from PowerPoint presentation slides.

Creates a grid layout of slide thumbnails for quick visual analysis.
Labels each thumbnail with its XML filename (e.g., slide1.xml).
Hidden slides are shown with a placeholder pattern.

Usage:
    python thumbnail.py input.pptx [output_prefix] [--cols N]

Examples:
    python thumbnail.py presentation.pptx
    # Creates: thumbnails.jpg

    python thumbnail.py template.pptx grid --cols 4
    # Creates: grid.jpg (or grid-1.jpg, grid-2.jpg for large decks)
"""

import argparse
import subprocess
import sys
import tempfile
import zipfile
from pathlib import Path

import defusedxml.minidom
from office.soffice import get_soffice_env
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont

THUMBNAIL_WIDTH = 300
CONVERSION_DPI = 100
MAX_COLS = 6
DEFAULT_COLS = 3
JPEG_QUALITY = 95
GRID_PADDING = 20
BORDER_WIDTH = 2
FONT_SIZE_RATIO = 0.10
LABEL_PADDING_RATIO = 0.4


def main():
    parser = argparse.ArgumentParser(
        description="Create thumbnail grids from PowerPoint slides."
    )
    parser.add_argument("input", help="Input PowerPoint file (.pptx)")
    parser.add_argument(
        "output_prefix",
        nargs="?",
        default="thumbnails",
        help="Output prefix for image files (default: thumbnails)",
    )
    parser.add_argument(
        "--cols",
        type=int,
        default=DEFAULT_COLS,
        help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})",
    )

    args = parser.parse_args()

    cols = min(args.cols, MAX_COLS)
    if args.cols > MAX_COLS:
        print(f"Warning: Columns limited to {MAX_COLS}")

    input_path = Path(args.input)
    if not input_path.exists() or input_path.suffix.lower() != ".pptx":
        print(f"Error: Invalid PowerPoint file: {args.input}", file=sys.stderr)
        sys.exit(1)

    output_path = Path(f"{args.output_prefix}.jpg")

    try:
        slide_info = get_slide_info(input_path)

        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)
            visible_images = convert_to_images(input_path, temp_path)

            if not visible_images and not any(s["hidden"] for s in slide_info):
                print("Error: No slides found", file=sys.stderr)
                sys.exit(1)

            slides = build_slide_list(slide_info, visible_images, temp_path)

            grid_files = create_grids(slides, cols, THUMBNAIL_WIDTH, output_path)

            print(f"Created {len(grid_files)} grid(s):")
            for grid_file in grid_files:
                print(f"  {grid_file}")

    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)


def get_slide_info(pptx_path: Path) -> list[dict]:
    with zipfile.ZipFile(pptx_path, "r") as zf:
        rels_content = zf.read("ppt/_rels/presentation.xml.rels").decode("utf-8")
        rels_dom = defusedxml.minidom.parseString(rels_content)

        rid_to_slide = {}
        for rel in rels_dom.getElementsByTagName("Relationship"):
            rid = rel.getAttribute("Id")
            target = rel.getAttribute("Target")
            rel_type = rel.getAttribute("Type")
            if "slide" in rel_type and target.startswith("slides/"):
                rid_to_slide[rid] = target.replace("slides/", "")

        pres_content = zf.read("ppt/presentation.xml").decode("utf-8")
        pres_dom = defusedxml.minidom.parseString(pres_content)

        slides = []
        for sld_id in pres_dom.getElementsByTagName("p:sldId"):
            rid = sld_id.getAttribute("r:id")
            if rid in rid_to_slide:
                hidden = sld_id.getAttribute("show") == "0"
                slides.append({"name": rid_to_slide[rid], "hidden": hidden})

        return slides


def build_slide_list(
    slide_info: list[dict],
    visible_images: list[Path],
    temp_dir: Path,
) -> list[tuple[Path, str]]:
    if visible_images:
        with Image.open(visible_images[0]) as img:
            placeholder_size = img.size
    else:
        placeholder_size = (1920, 1080)

    slides = []
    visible_idx = 0

    for info in slide_info:
        if info["hidden"]:
            placeholder_path = temp_dir / f"hidden-{info['name']}.jpg"
            placeholder_img = create_hidden_placeholder(placeholder_size)
            placeholder_img.save(placeholder_path, "JPEG")
            slides.append((placeholder_path, f"{info['name']} (hidden)"))
        else:
            if visible_idx < len(visible_images):
                slides.append((visible_images[visible_idx], info["name"]))
                visible_idx += 1

    return slides


def create_hidden_placeholder(size: tuple[int, int]) -> Image.Image:
    img = Image.new("RGB", size, color="#F0F0F0")
    draw = ImageDraw.Draw(img)
    line_width = max(5, min(size) // 100)
    draw.line([(0, 0), size], fill="#CCCCCC", width=line_width)
    draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width)
    return img


def convert_to_images(pptx_path: Path, temp_dir: Path) -> list[Path]:
    pdf_path = temp_dir / f"{pptx_path.stem}.pdf"

    result = subprocess.run(
        [
            "soffice",
            "--headless",
            "--convert-to",
            "pdf",
            "--outdir",
            str(temp_dir),
            str(pptx_path),
        ],
        capture_output=True,
        text=True,
        env=get_soffice_env(),
    )
    if result.returncode != 0 or not pdf_path.exists():
        raise RuntimeError("PDF conversion failed")

    result = subprocess.run(
        [
            "pdftoppm",
            "-jpeg",
            "-r",
            str(CONVERSION_DPI),
            str(pdf_path),
            str(temp_dir / "slide"),
        ],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        raise RuntimeError("Image conversion failed")

    return sorted(temp_dir.glob("slide-*.jpg"))


def create_grids(
    slides: list[tuple[Path, str]],
    cols: int,
    width: int,
    output_path: Path,
) -> list[str]:
    max_per_grid = cols * (cols + 1)
    grid_files = []

    for chunk_idx, start_idx in enumerate(range(0, len(slides), max_per_grid)):
        end_idx = min(start_idx + max_per_grid, len(slides))
        chunk_slides = slides[start_idx:end_idx]

        grid = create_grid(chunk_slides, cols, width)

        if len(slides) <= max_per_grid:
            grid_filename = output_path
        else:
            stem = output_path.stem
            suffix = output_path.suffix
            grid_filename = output_path.parent / f"{stem}-{chunk_idx + 1}{suffix}"

        grid_filename.parent.mkdir(parents=True, exist_ok=True)
        grid.save(str(grid_filename), quality=JPEG_QUALITY)
        grid_files.append(str(grid_filename))

    return grid_files


def create_grid(
    slides: list[tuple[Path, str]],
    cols: int,
    width: int,
) -> Image.Image:
    font_size = int(width * FONT_SIZE_RATIO)
    label_padding = int(font_size * LABEL_PADDING_RATIO)

    with Image.open(slides[0][0]) as img:
        aspect = img.height / img.width
    height = int(width * aspect)

    rows = (len(slides) + cols - 1) // cols
    grid_w = cols * width + (cols + 1) * GRID_PADDING
    grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING

    grid = Image.new("RGB", (grid_w, grid_h), "white")
    draw = ImageDraw.Draw(grid)

    try:
        font = ImageFont.load_default(size=font_size)
    except Exception:
        font = ImageFont.load_default()

    for i, (img_path, slide_name) in enumerate(slides):
        row, col = i // cols, i % cols
        x = col * width + (col + 1) * GRID_PADDING
        y_base = (
            row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING
        )

        label = slide_name
        bbox = draw.textbbox((0, 0), label, font=font)
        text_w = bbox[2] - bbox[0]
        draw.text(
            (x + (width - text_w) // 2, y_base + label_padding),
            label,
            fill="black",
            font=font,
        )

        y_thumbnail = y_base + label_padding + font_size + label_padding

        with Image.open(img_path) as img:
            img.thumbnail((width, height), Image.Resampling.LANCZOS)
            w, h = img.size
            tx = x + (width - w) // 2
            ty = y_thumbnail + (height - h) // 2
            grid.paste(img, (tx, ty))

            if BORDER_WIDTH > 0:
                draw.rectangle(
                    [
                        (tx - BORDER_WIDTH, ty - BORDER_WIDTH),
                        (tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1),
                    ],
                    outline="gray",
                    width=BORDER_WIDTH,
                )

    return grid


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/.gitignore
================================================
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/versions

# testing
/coverage

# next.js
/.next/
/out/

# production
/build

# misc
.DS_Store
*.pem

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*

# env files (can opt-in for committing if needed)
.env*

# vercel
.vercel

# typescript
*.tsbuildinfo
next-env.d.ts


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/AGENTS.md
================================================
# AGENTS.md

This file provides guidance to AI agents when working on the web application within this directory.

## Important Notes

- **The development server is already running** at a dynamically allocated port. Do NOT run `npm run dev` yourself.
- **We do NOT use a `src` directory** - all code lives directly in the root folders (`app/`, `components/`, `lib/`, etc.)
- If the app needs pre-computation (data processing, API calls, etc.), create a bash or python script called `prepare.sh`/`prepare.py` at the root of this directory
- **CRITICAL: Create small, modular components** - Do NOT write everything in `page.tsx`. Break your UI into small, reusable components in the `components/` directory. Each component should have a single responsibility and be in its own file.

## Data Preparation Scripts

**CRITICAL: Always re-run data scripts after modifying them.**

If a `prepare.sh` or `prepare.py` script exists at the root of this directory, it is responsible for generating/loading data that the frontend consumes. 

### When to Run the Script

You MUST run the data preparation script:
1. **After creating** the script for the first time
2. **After modifying** the script logic (new data sources, changed processing, etc.)
3. **After updating** any data files the script reads from
4. **Before testing** the frontend if you're unsure if data is fresh

### How to Run

```bash
# For bash scripts
bash prepare.sh

# For python scripts
python prepare.py
```

### Common Mistake

❌ **Updating the script but forgetting to run it** - This leaves stale data in place and the frontend won't reflect your changes. Always run the script immediately after modifying it.

## Commands

```bash
npm run dev      # Start development server (DO NOT RUN - already running)
npm run lint     # Run ESLint
```

## Architecture

This is a **Next.js 16.1.1** application using the **App Router** with **React 19** and **TypeScript**. It serves as a component showcase/template built on shadcn/ui.

### File Organization Philosophy

**Prioritize small, incremental file writes.** Break your application into many small components rather than monolithic page files.

#### Component Organization

```
components/
├── dashboard/           # Feature-specific components
│   ├── stats-card.tsx
│   ├── activity-feed.tsx
│   └── recent-items.tsx
├── charts/             # Chart components
│   ├── line-chart.tsx
│   ├── bar-chart.tsx
│   └── pie-chart.tsx
├── data/               # Data display components
│   ├── data-table.tsx
│   ├── filter-bar.tsx
│   └── sort-controls.tsx
└── layout/             # Layout components
    ├── header.tsx
    ├── sidebar.tsx
    └── footer.tsx
```

#### Page Structure

Pages (`app/page.tsx`) should be **thin orchestration layers** that compose components:

```typescript
// ✅ GOOD - page.tsx is just composition
import { StatsCard } from "@/components/dashboard/stats-card";
import { ActivityFeed } from "@/components/dashboard/activity-feed";
import { RecentItems } from "@/components/dashboard/recent-items";

export default function DashboardPage() {
  return (
    <div className="container py-6 space-y-6">
      <h1 className="text-3xl font-bold">Dashboard</h1>
      <div className="grid grid-cols-1 md:grid-cols-3 gap-4">
        <StatsCard title="Total Users" value={1234} />
        <StatsCard title="Active Sessions" value={56} />
        <StatsCard title="Revenue" value="$12,345" />
      </div>
      <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
        <ActivityFeed />
        <RecentItems />
      </div>
    </div>
  );
}

// ❌ BAD - Everything in page.tsx (500+ lines of mixed logic)
export default function DashboardPage() {
  // ... 500 lines of component logic, state, handlers, JSX ...
}
```

#### Component Granularity

Create a new component file when:
- A UI section has distinct functionality (e.g., `user-profile-card.tsx`)
- Logic exceeds ~50-100 lines
- A pattern is reused 2+ times
- Testing/maintenance would benefit from isolation

**Example: Dashboard Feature**

Instead of writing everything in `app/page.tsx`:

```typescript
// components/dashboard/stats-card.tsx
export function StatsCard({ title, value, trend }: StatsCardProps) {
  return (
    <Card>
      <CardHeader>
        <CardTitle className="text-sm font-medium">{title}</CardTitle>
      </CardHeader>
      <CardContent>
        <div className="text-2xl font-bold">{value}</div>
        {trend && <p className="text-xs text-muted-foreground">{trend}</p>}
      </CardContent>
    </Card>
  );
}

// components/dashboard/activity-feed.tsx
export function ActivityFeed() {
  // Activity feed logic here
}

// components/dashboard/recent-items.tsx
export function RecentItems() {
  // Recent items logic here
}
```

#### Benefits of Small Components

1. **Incremental Development**: Write one component at a time, test, iterate
2. **Better Diffs**: Smaller files = clearer git diffs and easier reviews
3. **Reusability**: Components can be imported across pages
4. **Maintainability**: Easier to locate and fix issues
5. **Hot Reload Efficiency**: Changes to small files reload faster
6. **Parallel Development**: Multiple features can be worked on independently

### Tech Stack

- **Framework**: Next.js 16.1.1 with App Router
- **React**: React 19
- **Language**: TypeScript
- **Styling**: Tailwind CSS v4 with CSS variables in OKLCH color space
- **Charts**: recharts for data visualization
- **UI Components**: shadcn/ui (53 components) built on Radix UI primitives
- **Variants**: class-variance-authority (CVA) for component variants
- **Class Merging**: `cn()` utility in `lib/utils.ts` (clsx + tailwind-merge)
- **Theme**: Dark mode enforced (via `dark` class on `<html>`)

### Key Directories

- `app/` - Next.js App Router pages and layouts
- `components/ui/` - shadcn/ui component library (Button, Card, Dialog, etc.)
- `components/` - App-specific components
- `hooks/` - Custom React hooks (e.g., `use-mobile.ts`)
- `lib/` - Utilities (`cn()` function)

### Component Patterns

- **Compound Components**: Components like `DropdownMenu`, `Dialog`, `Select` export multiple sub-components (Trigger, Content, Item)
- **Variants via CVA**: Use `variants` prop for size/style variations (e.g., `buttonVariants`)
- **Radix UI Primitives**: UI components wrap Radix for accessibility

### Path Aliases

All imports use `@/` alias (e.g., `@/components/ui/button`, `@/lib/utils`)

### shadcn/ui Configuration

Located in `components.json`:

- Style: `radix-nova`
- RSC enabled
- Icons: lucide-react

### Theme Variables

Global CSS variables defined in `app/globals.css` control colors, radius, and spacing. **Dark mode is enforced site-wide** via the `dark` class on the `<html>` element in `app/layout.tsx`. All styling should assume dark mode is active.

### Dark Mode Priority

- **Dark mode is the default and only theme** - do not design for light mode
- The `dark` class is permanently set on `<html>` in `layout.tsx`
- Use dark-appropriate colors: `bg-background`, `text-foreground`, etc.
- Ensure sufficient contrast for dark backgrounds
- Test all components in dark mode only

## Styling Guidelines

### CRITICAL: Use Only shadcn/ui Components

**MINIMIZE freestyling and creating custom components.** This application uses a complete, professionally designed component library (shadcn/ui). You MUST use the existing components from `components/ui/` for most UI needs.

#### Available shadcn/ui Components

All components are in `components/ui/`. Import using `@/components/ui/component-name`.

**Layout & Structure:**

- `Card` (`card.tsx`) - Content containers with CardHeader, CardTitle, CardDescription, CardContent, CardFooter
- `Separator` (`separator.tsx`) - Horizontal/vertical dividers
- `Tabs` (`tabs.tsx`) - Tabbed interfaces with Tabs, TabsList, TabsTrigger, TabsContent
- `ScrollArea` (`scroll-area.tsx`) - Styled scrollable regions
- `Resizable` (`resizable.tsx`) - Resizable panel layouts
- `Drawer` (`drawer.tsx`) - Bottom/side drawer overlays
- `Sidebar` (`sidebar.tsx`) - Application sidebar layout
- `AspectRatio` (`aspect-ratio.tsx`) - Maintain aspect ratios

**Forms & Inputs:**

- `Button` (`button.tsx`) - Primary, secondary, destructive, outline, ghost, link variants
- `ButtonGroup` (`button-group.tsx`) - Group of related buttons
- `Input` (`input.tsx`) - Text inputs with various states
- `InputGroup` (`input-group.tsx`) - Input with addons/icons
- `Textarea` (`textarea.tsx`) - Multi-line text input
- `Checkbox` (`checkbox.tsx`) - Checkboxes with indeterminate state
- `RadioGroup` (`radio-group.tsx`) - Radio button groups
- `Switch` (`switch.tsx`) - Toggle switches
- `Select` (`select.tsx`) - Dropdown select menus
- `NativeSelect` (`native-select.tsx`) - Native HTML select
- `Combobox` (`combobox.tsx`) - Autocomplete select with search
- `Command` (`command.tsx`) - Command palette/search interface
- `Field` (`field.tsx`) - Form field wrapper with label and error
- `Label` (`label.tsx`) - Form labels with proper accessibility
- `Slider` (`slider.tsx`) - Range sliders
- `Calendar` (`calendar.tsx`) - Date picker calendar
- `Toggle` (`toggle.tsx`) - Toggle button
- `ToggleGroup` (`toggle-group.tsx`) - Group of toggle buttons

**Navigation:**

- `NavigationMenu` (`navigation-menu.tsx`) - Complex navigation menus
- `Menubar` (`menubar.tsx`) - Application menu bar
- `Breadcrumb` (`breadcrumb.tsx`) - Breadcrumb navigation
- `Pagination` (`pagination.tsx`) - Page navigation controls

**Feedback & Overlays:**

- `Dialog` (`dialog.tsx`) - Modal dialogs
- `AlertDialog` (`alert-dialog.tsx`) - Confirmation dialogs
- `Sheet` (`sheet.tsx`) - Side sheets/panels
- `Popover` (`popover.tsx`) - Floating popovers
- `HoverCard` (`hover-card.tsx`) - Hover-triggered cards
- `Tooltip` (`tooltip.tsx`) - Tooltips on hover
- `Sonner` (`sonner.tsx`) - Toast notifications
- `Alert` (`alert.tsx`) - Static alert messages
- `Progress` (`progress.tsx`) - Progress bars
- `Skeleton` (`skeleton.tsx`) - Loading skeletons
- `Spinner` (`spinner.tsx`) - Loading spinners
- `Empty` (`empty.tsx`) - Empty state placeholder

**Menus & Dropdowns:**

- `DropdownMenu` (`dropdown-menu.tsx`) - Dropdown menus with submenus
- `ContextMenu` (`context-menu.tsx`) - Right-click context menus

**Data Display:**

- `Table` (`table.tsx`) - Data tables with Table, TableHeader, TableBody, TableRow, TableCell, etc.
- `Badge` (`badge.tsx`) - Status badges and tags
- `Avatar` (`avatar.tsx`) - User avatars with fallbacks
- `Accordion` (`accordion.tsx`) - Collapsible content sections
- `Collapsible` (`collapsible.tsx`) - Simple collapse/expand
- `Carousel` (`carousel.tsx`) - Image/content carousels
- `Item` (`item.tsx`) - List item component
- `Kbd` (`kbd.tsx`) - Keyboard shortcut display

**Data Visualization:**

- `Chart` (`chart.tsx`) - Chart wrapper with ChartContainer, ChartTooltip, ChartTooltipContent, ChartLegend, ChartLegendContent

### Component Usage Principles

#### 1. **Never Create Custom Components**

```typescript
// ❌ WRONG - Do not create freestyle components
function CustomCard({ title, children }) {
  return (
    <div className="rounded-lg border p-4">
      <h3 className="font-bold">{title}</h3>
      {children}
    </div>
  );
}

// ✅ CORRECT - Use shadcn Card
import { Card, CardHeader, CardTitle, CardContent } from "@/components/ui/card";

function MyComponent() {
  return (
    <Card>
      <CardHeader>
        <CardTitle>Title</CardTitle>
      </CardHeader>
      <CardContent>Content here</CardContent>
    </Card>
  );
}
```

#### 2. **Use Component Variants, Don't Style Directly**

```typescript
// ❌ WRONG - Applying custom Tailwind classes
<button className="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded">
  Click me
</button>

// ✅ CORRECT - Use Button variants
import { Button } from "@/components/ui/button";

<Button variant="default">Click me</Button>
<Button variant="destructive">Delete</Button>
<Button variant="outline">Cancel</Button>
<Button variant="ghost">Subtle Action</Button>
<Button size="sm">Small</Button>
<Button size="lg">Large</Button>
```

#### 3. **Compose Compound Components**

Many shadcn components export multiple sub-components. Use them as designed:

```typescript
// ✅ Dropdown Menu Composition
import {
  DropdownMenu,
  DropdownMenuTrigger,
  DropdownMenuContent,
  DropdownMenuItem,
  DropdownMenuSeparator,
  DropdownMenuLabel,
} from "@/components/ui/dropdown-menu";

<DropdownMenu>
  <DropdownMenuTrigger asChild>
    <Button variant="outline">Options</Button>
  </DropdownMenuTrigger>
  <DropdownMenuContent>
    <DropdownMenuLabel>Actions</DropdownMenuLabel>
    <DropdownMenuSeparator />
    <DropdownMenuItem>Edit</DropdownMenuItem>
    <DropdownMenuItem>Delete</DropdownMenuItem>
  </DropdownMenuContent>
</DropdownMenu>
```

#### 4. **Use Layout Components for Structure**

```typescript
// ✅ Use Card for content sections
import { Card, CardHeader, CardTitle, CardDescription, CardContent, CardFooter } from "@/components/ui/card";

<Card>
  <CardHeader>
    <CardTitle>Dashboard</CardTitle>
    <CardDescription>Overview of your data</CardDescription>
  </CardHeader>
  <CardContent>
    {/* Your content */}
  </CardContent>
  <CardFooter>
    <Button>Action</Button>
  </CardFooter>
</Card>
```

### Styling Rules

#### 1. **Spacing & Layout**

Use Tailwind's utility classes for spacing, but stick to the design system:

- Gap: `gap-2`, `gap-4`, `gap-6`, `gap-8`
- Padding: `p-2`, `p-4`, `p-6`, `p-8`
- Margins: Prefer `gap` and `space-y-*` over margins

#### 2. **Colors**

All colors come from CSS variables in `app/globals.css`. Use semantic color classes:

- `bg-background`, `bg-foreground`
- `bg-card`, `text-card-foreground`
- `bg-primary`, `text-primary-foreground`
- `bg-secondary`, `text-secondary-foreground`
- `bg-muted`, `text-muted-foreground`
- `bg-accent`, `text-accent-foreground`
- `bg-destructive`, `text-destructive-foreground`
- `border-border`, `border-input`
- `ring-ring`

**DO NOT use arbitrary color values** like `bg-blue-500` or `text-red-600`.

#### **CRITICAL: Color Contrast Pairing Rules**

**Always pair background colors with their matching foreground colors.** The color system uses paired variables where each background has a corresponding text color designed for proper contrast.

| Background Class | Text Class to Use | Description |
|-----------------|-------------------|-------------|
| `bg-background` | `text-foreground` | Main page background |
| `bg-card` | `text-card-foreground` | Card containers |
| `bg-primary` | `text-primary-foreground` | Primary buttons/accents |
| `bg-secondary` | `text-secondary-foreground` | Secondary elements |
| `bg-muted` | `text-muted-foreground` | Muted/subtle areas |
| `bg-accent` | `text-accent-foreground` | Accent highlights |
| `bg-destructive` | `text-destructive-foreground` | Error/delete actions |

**Examples:**

```typescript
// ✅ CORRECT - Matching background and foreground pairs
<div className="bg-card text-card-foreground">Content</div>
<Button className="bg-primary text-primary-foreground">Click</Button>
<div className="bg-muted text-muted-foreground">Subtle text</div>

// ❌ WRONG - Mismatched colors causing contrast issues
<div className="bg-background text-background">Invisible text!</div>
<div className="bg-card text-foreground">May have poor contrast</div>
<Button className="bg-primary text-primary">White on white!</Button>
```

**Key Rules:**

1. **Never use the same color for background and text** (e.g., `bg-foreground text-foreground`)
2. **Always use the `-foreground` variant for text** when using a colored background
3. **For text on `bg-background`**, use `text-foreground` (primary) or `text-muted-foreground` (secondary)
4. **Test visually** - if text is hard to read, you have a contrast problem

#### 3. **Typography**

Use Tailwind text utilities (no separate Typography component):

- Headings: `text-xl font-semibold`, `text-2xl font-bold`, etc.
- Body: `text-sm`, `text-base`
- Secondary text: `text-muted-foreground`
- Use semantic HTML: `<h1>`, `<h2>`, `<p>`, etc.
- **Always wrap text** - Use `max-w-prose` or `max-w-xl` for readable line lengths
- **Prevent overflow** - Use `break-words` or `truncate` for long text that might overflow containers

#### 4. **Responsive Design**

Use Tailwind's responsive prefixes:

```typescript
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
  {/* Responsive grid */}
</div>
```

#### 5. **Icons**

Use Lucide React icons (already configured):

```typescript
import { Check, X, ChevronDown, User } from "lucide-react";

<Button>
  <Check className="mr-2 h-4 w-4" />
  Confirm
</Button>
```

### Data Visualization

For charts and data visualization, use the **shadcn/ui Chart components** (`@/components/ui/chart`) which wrap recharts with consistent theming. Charts should be **elegant, informative, and digestible at a glance**.

#### Chart Design Principles

1. **Clarity over complexity** - A chart should communicate ONE key insight immediately
2. **Minimal visual noise** - Remove anything that doesn't add information
3. **Consistent styling** - Use `ChartConfig` for colors, not arbitrary values
4. **Responsive** - Always use `ChartContainer` (includes ResponsiveContainer)
5. **Accessible** - Use `ChartTooltip` with `ChartTooltipContent` for proper styling

#### Chart Type Selection

| Data Type | Recommended Chart | Use Case |
|-----------|-------------------|----------|
| Trend over time | `LineChart` or `AreaChart` | Stock prices, user growth, metrics over days/months |
| Comparing categories | `BarChart` | Revenue by product, users by region |
| Part of whole | `PieChart` or `RadialBarChart` | Market share, budget allocation |
| Distribution | `BarChart` (horizontal) | Survey responses, rating distribution |
| Correlation | `ScatterChart` | Price vs. quality, age vs. income |

#### shadcn/ui Chart Components

Always import from the shadcn chart component:

```typescript
import {
  ChartContainer,
  ChartTooltip,
  ChartTooltipContent,
  ChartLegend,
  ChartLegendContent,
  type ChartConfig,
} from "@/components/ui/chart";
import { LineChart, Line, XAxis, YAxis, CartesianGrid } from "recharts";
```

#### ChartConfig - Define Colors and Labels

The `ChartConfig` object defines colors and labels for your data series. This ensures consistent theming:

```typescript
const chartConfig = {
  revenue: {
    label: "Revenue",
    color: "var(--chart-1)",
  },
  expenses: {
    label: "Expenses", 
    color: "var(--chart-2)",
  },
} satisfies ChartConfig;
```

#### Basic Line Chart Template

```typescript
import {
  ChartContainer,
  ChartTooltip,
  ChartTooltipContent,
  type ChartConfig,
} from "@/components/ui/chart";
import { LineChart, Line, XAxis, YAxis, CartesianGrid } from "recharts";

const chartConfig = {
  value: {
    label: "Value",
    color: "var(--chart-1)",
  },
} satisfies ChartConfig;

<ChartContainer config={chartConfig} className="h-[300px] w-full">
  <LineChart data={data} accessibilityLayer>
    <CartesianGrid vertical={false} />
    <XAxis
      dataKey="month"
      tickLine={false}
      axisLine={false}
      tickMargin={8}
    />
    <YAxis tickLine={false} axisLine={false} tickMargin={8} />
    <ChartTooltip content={<ChartTooltipContent />} />
    <Line
      type="monotone"
      dataKey="value"
      stroke="var(--color-value)"
      strokeWidth={2}
      dot={false}
    />
  </LineChart>
</ChartContainer>
```

#### Bar Chart with Multiple Series

```typescript
const chartConfig = {
  revenue: {
    label: "Revenue",
    color: "var(--chart-1)",
  },
  expenses: {
    label: "Expenses",
    color: "var(--chart-2)",
  },
} satisfies ChartConfig;

<ChartContainer config={chartConfig} className="h-[300px] w-full">
  <BarChart data={data} accessibilityLayer>
    <CartesianGrid vertical={false} />
    <XAxis dataKey="month" tickLine={false} axisLine={false} tickMargin={8} />
    <YAxis tickLine={false} axisLine={false} tickMargin={8} />
    <ChartTooltip content={<ChartTooltipContent />} />
    <ChartLegend content={<ChartLegendContent />} />
    <Bar dataKey="revenue" fill="var(--color-revenue)" radius={4} />
    <Bar dataKey="expenses" fill="var(--color-expenses)" radius={4} />
  </BarChart>
</ChartContainer>
```

#### Pie/Donut Chart

```typescript
const chartConfig = {
  desktop: { label: "Desktop", color: "var(--chart-1)" },
  mobile: { label: "Mobile", color: "var(--chart-2)" },
  tablet: { label: "Tablet", color: "var(--chart-3)" },
} satisfies ChartConfig;

<ChartContainer config={chartConfig} className="h-[300px] w-full">
  <PieChart>
    <ChartTooltip content={<ChartTooltipContent hideLabel />} />
    <Pie
      data={data}
      dataKey="value"
      nameKey="name"
      innerRadius={60}  // Remove for solid pie, keep for donut
      strokeWidth={5}
    />
    <ChartLegend content={<ChartLegendContent nameKey="name" />} />
  </PieChart>
</ChartContainer>
```

#### Chart Styling Rules

**Colors (use CSS variables from globals.css):**
- `var(--chart-1)` through `var(--chart-5)` - Primary chart colors
- `var(--primary)` - For single-series emphasis
- `var(--muted)` - For de-emphasized data

**Color References in Charts:**
- In `ChartConfig`: Use `color: "var(--chart-1)"`
- In chart elements: Use `fill="var(--color-keyname)"` or `stroke="var(--color-keyname)"`
- The `keyname` matches the key in your `ChartConfig`

**Visual Cleanup:**
- Set `tickLine={false}` and `axisLine={false}` on axes for cleaner look
- Use `vertical={false}` on `CartesianGrid` for horizontal-only grid lines
- Use `dot={false}` on line charts unless individual points matter
- Add `radius={4}` to bars for rounded corners
- Limit to 3-5 data series maximum per chart

**Avoid:**
- ❌ 3D effects
- ❌ More than 5-6 colors in one chart
- ❌ Legends with more than 5 items (simplify the data instead)
- ❌ Dual Y-axes (confusing - use two separate charts)
- ❌ Pie charts with more than 5-6 slices
- ❌ Custom tooltip styling - use `ChartTooltipContent`

#### Fallback to Raw Recharts

If shadcn/ui Chart components don't support a specific chart type (e.g., ScatterChart, ComposedChart, RadarChart), you can use recharts directly:

```typescript
import { ScatterChart, Scatter, XAxis, YAxis, CartesianGrid, Tooltip, ResponsiveContainer } from "recharts";

<ResponsiveContainer width="100%" height={300}>
  <ScatterChart>
    <CartesianGrid strokeDasharray="3 3" stroke="var(--border)" />
    <XAxis dataKey="x" stroke="var(--muted-foreground)" fontSize={12} tickLine={false} axisLine={false} />
    <YAxis dataKey="y" stroke="var(--muted-foreground)" fontSize={12} tickLine={false} axisLine={false} />
    <Tooltip 
      contentStyle={{ 
        backgroundColor: "var(--card)", 
        border: "1px solid var(--border)", 
        borderRadius: "6px" 
      }} 
    />
    <Scatter data={data} fill="var(--chart-1)" />
  </ScatterChart>
</ResponsiveContainer>
```

**When using raw recharts:**
- Still use CSS variables for colors (`var(--chart-1)`, etc.)
- Match styling to shadcn conventions (tickLine={false}, axisLine={false})
- Style tooltips to match the design system

#### Data Accuracy Checklist

Before displaying a chart, verify:
- [ ] `ChartConfig` keys match your data's `dataKey` values
- [ ] Data values are correctly mapped to the right axes
- [ ] Axis labels match the data units (%, $, count, etc.)
- [ ] Time series data is sorted chronologically
- [ ] No missing data points that would break the visualization
- [ ] `ChartTooltip` with `ChartTooltipContent` is included
- [ ] Chart title/context makes the insight clear

### Common Patterns

#### Loading States

```typescript
import { Skeleton } from "@/components/ui/skeleton";

{isLoading ? (
  <Skeleton className="h-12 w-full" />
) : (
  <Content />
)}
```

#### Empty States

```typescript
import { Empty, EmptyHeader, EmptyTitle, EmptyDescription, EmptyMedia } from "@/components/ui/empty";
import { Inbox } from "lucide-react";

<Empty>
  <EmptyHeader>
    <EmptyMedia variant="icon">
      <Inbox />
    </EmptyMedia>
    <EmptyTitle>No data available</EmptyTitle>
    <EmptyDescription>
      There's nothing to display yet. Add some items to get started.
    </EmptyDescription>
  </EmptyHeader>
</Empty>
```

#### Interactive Lists

```typescript
import { ScrollArea } from "@/components/ui/scroll-area";
import { ItemGroup, Item, ItemContent, ItemTitle, ItemDescription, ItemMedia } from "@/components/ui/item";
import { FileText } from "lucide-react";

<ScrollArea className="h-[400px]">
  <ItemGroup>
    {items.map((item) => (
      <Item key={item.id} variant="outline">
        <ItemMedia variant="icon">
          <FileText />
        </ItemMedia>
        <ItemContent>
          <ItemTitle>{item.name}</ItemTitle>
          <ItemDescription>{item.description}</ItemDescription>
        </ItemContent>
      </Item>
    ))}
  </ItemGroup>
</ScrollArea>
```

#### Form Fields

```typescript
import { Field, FieldLabel, FieldDescription, FieldError, FieldGroup } from "@/components/ui/field";
import { Input } from "@/components/ui/input";
import { Button } from "@/components/ui/button";

<FieldGroup>
  <Field>
    <FieldLabel>Email</FieldLabel>
    <Input type="email" placeholder="you@example.com" />
    <FieldDescription>We'll never share your email.</FieldDescription>
  </Field>
  <Field>
    <FieldLabel>Password</FieldLabel>
    <Input type="password" />
    <FieldError>Password must be at least 8 characters.</FieldError>
  </Field>
  <Button type="submit">Sign up</Button>
</FieldGroup>
```

### What NOT To Do

❌ **Don't create custom styled divs when a component exists**
❌ **Don't use arbitrary Tailwind colors** (use CSS variables)
❌ **Don't import UI libraries** like Material-UI, Ant Design, etc.
❌ **Don't use inline styles** except for dynamic values
❌ **Don't create custom form inputs** (use Field, Input, Select, etc. from components/ui)
❌ **Don't add new dependencies** without checking if shadcn covers it
❌ **Don't write everything in page.tsx** - break into separate component files
❌ **Don't design for light mode** - this site is dark mode only
❌ **Don't use `dark:` variants** - dark mode is always active, use base classes

### Development Workflow

1. **Plan the component structure** - Identify logical UI sections before writing code
2. **Create components incrementally** - Write one small component file at a time
3. **Test each component** - Verify it works before moving to the next
4. **Compose in page.tsx** - Import and arrange your components in the page
5. **Iterate** - Refine individual components without touching others

### Summary

This application has a **complete, production-ready component library**. Your job is to:
1. **Compose** shadcn/ui components (from `components/ui/`)
2. **Create small, focused component files** (in `components/`)
3. **Keep pages thin** - pages should orchestrate components, not contain implementation

Think of yourself as assembling LEGO blocks—all the UI pieces you need already exist in `components/ui/`, and you create small, organized structures by composing them into feature-specific components.


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/app/globals.css
================================================
@import "tailwindcss";
@import "tw-animate-css";
@import "shadcn/tailwind.css";

@custom-variant dark (&:is(.dark *));

@theme inline {
  --color-background: var(--background);
  --color-foreground: var(--foreground);
  --font-sans: var(--font-sans);
  --font-mono: var(--font-geist-mono);
  --color-sidebar-ring: var(--sidebar-ring);
  --color-sidebar-border: var(--sidebar-border);
  --color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
  --color-sidebar-accent: var(--sidebar-accent);
  --color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
  --color-sidebar-primary: var(--sidebar-primary);
  --color-sidebar-foreground: var(--sidebar-foreground);
  --color-sidebar: var(--sidebar);
  --color-chart-5: var(--chart-5);
  --color-chart-4: var(--chart-4);
  --color-chart-3: var(--chart-3);
  --color-chart-2: var(--chart-2);
  --color-chart-1: var(--chart-1);
  --color-ring: var(--ring);
  --color-input: var(--input);
  --color-border: var(--border);
  --color-destructive: var(--destructive);
  --color-accent-foreground: var(--accent-foreground);
  --color-accent: var(--accent);
  --color-muted-foreground: var(--muted-foreground);
  --color-muted: var(--muted);
  --color-secondary-foreground: var(--secondary-foreground);
  --color-secondary: var(--secondary);
  --color-primary-foreground: var(--primary-foreground);
  --color-primary: var(--primary);
  --color-popover-foreground: var(--popover-foreground);
  --color-popover: var(--popover);
  --color-card-foreground: var(--card-foreground);
  --color-card: var(--card);
  --radius-sm: calc(var(--radius) - 4px);
  --radius-md: calc(var(--radius) - 2px);
  --radius-lg: var(--radius);
  --radius-xl: calc(var(--radius) + 4px);
  --radius-2xl: calc(var(--radius) + 8px);
  --radius-3xl: calc(var(--radius) + 12px);
  --radius-4xl: calc(var(--radius) + 16px);
}

:root {
  --background: oklch(1 0 0);
  --foreground: oklch(0.145 0 0);
  --card: oklch(1 0 0);
  --card-foreground: oklch(0.145 0 0);
  --popover: oklch(1 0 0);
  --popover-foreground: oklch(0.145 0 0);
  --primary: oklch(0.67 0.16 58);
  --primary-foreground: oklch(0.99 0.02 95);
  --secondary: oklch(0.967 0.001 286.375);
  --secondary-foreground: oklch(0.21 0.006 285.885);
  --muted: oklch(0.97 0 0);
  --muted-foreground: oklch(0.556 0 0);
  --accent: oklch(0.97 0 0);
  --accent-foreground: oklch(0.205 0 0);
  --destructive: oklch(0.58 0.22 27);
  --border: oklch(0.922 0 0);
  --input: oklch(0.922 0 0);
  --ring: oklch(0.708 0 0);
  --chart-1: oklch(0.88 0.15 92);
  --chart-2: oklch(0.77 0.16 70);
  --chart-3: oklch(0.67 0.16 58);
  --chart-4: oklch(0.56 0.15 49);
  --chart-5: oklch(0.47 0.12 46);
  --radius: 0.625rem;
  --sidebar: oklch(0.985 0 0);
  --sidebar-foreground: oklch(0.145 0 0);
  --sidebar-primary: oklch(0.67 0.16 58);
  --sidebar-primary-foreground: oklch(0.99 0.02 95);
  --sidebar-accent: oklch(0.97 0 0);
  --sidebar-accent-foreground: oklch(0.205 0 0);
  --sidebar-border: oklch(0.922 0 0);
  --sidebar-ring: oklch(0.708 0 0);
}

.dark {
  --background: oklch(0.145 0 0);
  --foreground: oklch(0.985 0 0);
  --card: oklch(0.205 0 0);
  --card-foreground: oklch(0.985 0 0);
  --popover: oklch(0.205 0 0);
  --popover-foreground: oklch(0.985 0 0);
  --primary: oklch(0.77 0.16 70);
  --primary-foreground: oklch(0.28 0.07 46);
  --secondary: oklch(0.274 0.006 286.033);
  --secondary-foreground: oklch(0.985 0 0);
  --muted: oklch(0.269 0 0);
  --muted-foreground: oklch(0.708 0 0);
  --accent: oklch(0.371 0 0);
  --accent-foreground: oklch(0.985 0 0);
  --destructive: oklch(0.704 0.191 22.216);
  --border: oklch(1 0 0 / 10%);
  --input: oklch(1 0 0 / 15%);
  --ring: oklch(0.556 0 0);
  /* Chart colors optimized for dark backgrounds - brighter and more vibrant */
  --chart-1: oklch(0.82 0.18 140);
  --chart-2: oklch(0.75 0.2 200);
  --chart-3: oklch(0.7 0.22 280);
  --chart-4: oklch(0.78 0.18 50);
  --chart-5: oklch(0.72 0.2 330);
  --sidebar: oklch(0.205 0 0);
  --sidebar-foreground: oklch(0.985 0 0);
  --sidebar-primary: oklch(0.77 0.16 70);
  --sidebar-primary-foreground: oklch(0.28 0.07 46);
  --sidebar-accent: oklch(0.269 0 0);
  --sidebar-accent-foreground: oklch(0.985 0 0);
  --sidebar-border: oklch(1 0 0 / 10%);
  --sidebar-ring: oklch(0.556 0 0);
}

@layer base {
  * {
    @apply border-border outline-ring/50;
  }
  body {
    @apply bg-background text-foreground;
  }
}


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/app/layout.tsx
================================================
import type { Metadata } from "next";
import { Geist, Geist_Mono, Inter } from "next/font/google";
import "./globals.css";

const inter = Inter({ subsets: ["latin"], variable: "--font-sans" });

const geistSans = Geist({
  variable: "--font-geist-sans",
  subsets: ["latin"],
});

const geistMono = Geist_Mono({
  variable: "--font-geist-mono",
  subsets: ["latin"],
});

export const metadata: Metadata = {
  title: "Onyx Craft",
  description: "Crafting your next great idea.",
};

export default function RootLayout({
  children,
}: Readonly<{
  children: React.ReactNode;
}>) {
  return (
    <html lang="en" className={`${inter.variable} dark`}>
      <body
        className={`${geistSans.variable} ${geistMono.variable} antialiased`}
      >
        {children}
      </body>
    </html>
  );
}


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/app/page.tsx
================================================
"use client";

import { useState, useEffect, useRef } from "react";

const messages = [
  "Punching wood...",
  "Gathering resources...",
  "Placing blocks...",
  "Crafting your workspace...",
  "Mining for dependencies...",
  "Smelting the code...",
  "Enchanting with magic...",
  "World generation complete...",
  "/gamemode 1",
];

const MESSAGE_COUNT = messages.length;
const TYPE_DELAY = 40;
const LINE_PAUSE = 800;
const RESET_DELAY = 2000;

export default function CraftingLoader() {
  const [display, setDisplay] = useState({
    lines: [] as string[],
    currentText: "",
  });

  const lineIndexRef = useRef(0);
  const charIndexRef = useRef(0);
  const lastUpdateRef = useRef(0);
  const timeoutRef = useRef<NodeJS.Timeout | undefined>(undefined);
  const rafRef = useRef<number | undefined>(undefined);

  useEffect(() => {
    let isActive = true;

    const update = (now: number) => {
      if (!isActive) return;

      const lineIdx = lineIndexRef.current;
      const charIdx = charIndexRef.current;

      if (lineIdx >= MESSAGE_COUNT) {
        timeoutRef.current = setTimeout(() => {
          if (!isActive) return;
          lineIndexRef.current = 0;
          charIndexRef.current = 0;
          setDisplay({ lines: [], currentText: "" });
          lastUpdateRef.current = performance.now();
          rafRef.current = requestAnimationFrame(update);
        }, RESET_DELAY);
        return;
      }

      const msg = messages[lineIdx];
      if (!msg) return;

      const elapsed = now - lastUpdateRef.current;

      if (charIdx < msg.length) {
        if (elapsed >= TYPE_DELAY) {
          charIndexRef.current = charIdx + 1;
          setDisplay((prev) => ({
            lines: prev.lines,
            currentText: msg.substring(0, charIdx + 1),
          }));
          lastUpdateRef.current = now;
        }
      } else if (elapsed >= LINE_PAUSE) {
        setDisplay((prev) => ({
          lines: [...prev.lines, msg],
          currentText: "",
        }));
        lineIndexRef.current = lineIdx + 1;
        charIndexRef.current = 0;
        lastUpdateRef.current = now;
      }

      rafRef.current = requestAnimationFrame(update);
    };

    lastUpdateRef.current = performance.now();
    rafRef.current = requestAnimationFrame(update);

    return () => {
      isActive = false;
      if (rafRef.current !== undefined) cancelAnimationFrame(rafRef.current);
      if (timeoutRef.current !== undefined) clearTimeout(timeoutRef.current);
    };
  }, []);

  const { lines, currentText } = display;
  const hasCurrentText = currentText.length > 0;

  return (
    <div className="min-h-screen bg-gradient-to-br from-neutral-950 via-neutral-900 to-neutral-950 flex flex-col items-center justify-center p-4">
      <div className="w-full max-w-md rounded-sm overflow-hidden shadow-2xl border-2 border-neutral-700">
        <div className="bg-neutral-800 px-4 py-3 flex items-center gap-2 border-b-2 border-neutral-700">
          <div className="w-3 h-3 rounded-none bg-red-500" />
          <div className="w-3 h-3 rounded-none bg-yellow-500" />
          <div className="w-3 h-3 rounded-none bg-green-500" />
          <span className="ml-4 text-neutral-500 text-sm font-mono">
            crafting_table
          </span>
        </div>

        <div className="bg-neutral-900 p-6 min-h-[250px] font-mono text-sm">
          {lines.map((line, i) => (
            <div key={i} className="flex items-center text-neutral-300">
              <span className="text-emerald-500 mr-2">/&gt;</span>
              <span>{line}</span>
            </div>
          ))}
          {hasCurrentText && (
            <div className="flex items-center text-neutral-300">
              <span className="text-emerald-500 mr-2">/&gt;</span>
              <span>{currentText}</span>
              <span className="w-2 h-5 bg-emerald-500 animate-pulse ml-0.5" />
            </div>
          )}
          {!hasCurrentText && (
            <div className="flex items-center text-neutral-300">
              <span className="text-emerald-500 mr-2">/&gt;</span>
              <span className="w-2 h-5 bg-emerald-500 animate-pulse" />
            </div>
          )}
        </div>
      </div>

      <p className="mt-6 text-neutral-500 text-sm font-mono">
        Crafting your next great idea...
      </p>
    </div>
  );
}


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/app/site.webmanifest
================================================
{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}

================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/component-example.tsx
================================================
"use client";

import * as React from "react";

import { Example, ExampleWrapper } from "@/components/example";
import {
  AlertDialog,
  AlertDialogAction,
  AlertDialogCancel,
  AlertDialogContent,
  AlertDialogDescription,
  AlertDialogFooter,
  AlertDialogHeader,
  AlertDialogMedia,
  AlertDialogTitle,
  AlertDialogTrigger,
} from "@/components/ui/alert-dialog";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import {
  Card,
  CardAction,
  CardContent,
  CardDescription,
  CardFooter,
  CardHeader,
  CardTitle,
} from "@/components/ui/card";
import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import {
  DropdownMenu,
  DropdownMenuCheckboxItem,
  DropdownMenuContent,
  DropdownMenuGroup,
  DropdownMenuItem,
  DropdownMenuLabel,
  DropdownMenuPortal,
  DropdownMenuRadioGroup,
  DropdownMenuRadioItem,
  DropdownMenuSeparator,
  DropdownMenuShortcut,
  DropdownMenuSub,
  DropdownMenuSubContent,
  DropdownMenuSubTrigger,
  DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import { Field, FieldGroup, FieldLabel } from "@/components/ui/field";
import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectGroup,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Textarea } from "@/components/ui/textarea";
import {
  PlusIcon,
  BluetoothIcon,
  MoreVerticalIcon,
  FileIcon,
  FolderIcon,
  FolderOpenIcon,
  FileCodeIcon,
  MoreHorizontalIcon,
  FolderSearchIcon,
  SaveIcon,
  DownloadIcon,
  EyeIcon,
  LayoutIcon,
  PaletteIcon,
  SunIcon,
  MoonIcon,
  MonitorIcon,
  UserIcon,
  CreditCardIcon,
  SettingsIcon,
  KeyboardIcon,
  LanguagesIcon,
  BellIcon,
  MailIcon,
  ShieldIcon,
  HelpCircleIcon,
  FileTextIcon,
  LogOutIcon,
} from "lucide-react";

export function ComponentExample() {
  return (
    <ExampleWrapper>
      <CardExample />
      <FormExample />
    </ExampleWrapper>
  );
}

function CardExample() {
  return (
    <Example title="Card" className="items-center justify-center">
      <Card className="relative w-full max-w-sm overflow-hidden pt-0">
        <div className="bg-primary absolute inset-0 z-30 aspect-video opacity-50 mix-blend-color" />
        <img
          src="https://images.unsplash.com/photo-1604076850742-4c7221f3101b?q=80&w=1887&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
          alt="Photo by mymind on Unsplash"
          title="Photo by mymind on Unsplash"
          className="relative z-20 aspect-video w-full object-cover brightness-60 grayscale"
        />
        <CardHeader>
          <CardTitle>Observability Plus is replacing Monitoring</CardTitle>
          <CardDescription>
            Switch to the improved way to explore your data, with natural
            language. Monitoring will no longer be available on the Pro plan in
            November, 2025
          </CardDescription>
        </CardHeader>
        <CardFooter>
          <AlertDialog>
            <AlertDialogTrigger asChild>
              <Button>
                <PlusIcon data-icon="inline-start" />
                Show Dialog
              </Button>
            </AlertDialogTrigger>
            <AlertDialogContent size="sm">
              <AlertDialogHeader>
                <AlertDialogMedia>
                  <BluetoothIcon />
                </AlertDialogMedia>
                <AlertDialogTitle>Allow accessory to connect?</AlertDialogTitle>
                <AlertDialogDescription>
                  Do you want to allow the USB accessory to connect to this
                  device?
                </AlertDialogDescription>
              </AlertDialogHeader>
              <AlertDialogFooter>
                <AlertDialogCancel>Don&apos;t allow</AlertDialogCancel>
                <AlertDialogAction>Allow</AlertDialogAction>
              </AlertDialogFooter>
            </AlertDialogContent>
          </AlertDialog>
          <Badge variant="secondary" className="ml-auto">
            Warning
          </Badge>
        </CardFooter>
      </Card>
    </Example>
  );
}

const frameworks = [
  "Next.js",
  "SvelteKit",
  "Nuxt.js",
  "Remix",
  "Astro",
] as const;

function FormExample() {
  const [notifications, setNotifications] = React.useState({
    email: true,
    sms: false,
    push: true,
  });
  const [theme, setTheme] = React.useState("light");

  return (
    <Example title="Form">
      <Card className="w-full max-w-md">
        <CardHeader>
          <CardTitle>User Information</CardTitle>
          <CardDescription>Please fill in your details below</CardDescription>
          <CardAction>
            <DropdownMenu>
              <DropdownMenuTrigger asChild>
                <Button variant="ghost" size="icon">
                  <MoreVerticalIcon />
                  <span className="sr-only">More options</span>
                </Button>
              </DropdownMenuTrigger>
              <DropdownMenuContent align="end" className="w-56">
                <DropdownMenuGroup>
                  <DropdownMenuLabel>File</DropdownMenuLabel>
                  <DropdownMenuItem>
                    <FileIcon />
                    New File
                    <DropdownMenuShortcut>⌘N</DropdownMenuShortcut>
                  </DropdownMenuItem>
                  <DropdownMenuItem>
                    <FolderIcon />
                    New Folder
                    <DropdownMenuShortcut>⇧⌘N</DropdownMenuShortcut>
                  </DropdownMenuItem>
                  <DropdownMenuSub>
                    <DropdownMenuSubTrigger>
                      <FolderOpenIcon />
                      Open Recent
                    </DropdownMenuSubTrigger>
                    <DropdownMenuPortal>
                      <DropdownMenuSubContent>
                        <DropdownMenuGroup>
                          <DropdownMenuLabel>Recent Projects</DropdownMenuLabel>
                          <DropdownMenuItem>
                            <FileCodeIcon />
                            Project Alpha
                          </DropdownMenuItem>
                          <DropdownMenuItem>
                            <FileCodeIcon />
                            Project Beta
                          </DropdownMenuItem>
                          <DropdownMenuSub>
                            <DropdownMenuSubTrigger>
                              <MoreHorizontalIcon />
                              More Projects
                            </DropdownMenuSubTrigger>
                            <DropdownMenuPortal>
                              <DropdownMenuSubContent>
                                <DropdownMenuItem>
                                  <FileCodeIcon />
                                  Project Gamma
                                </DropdownMenuItem>
                                <DropdownMenuItem>
                                  <FileCodeIcon />
                                  Project Delta
                                </DropdownMenuItem>
                              </DropdownMenuSubContent>
                            </DropdownMenuPortal>
                          </DropdownMenuSub>
                        </DropdownMenuGroup>
                        <DropdownMenuSeparator />
                        <DropdownMenuGroup>
                          <DropdownMenuItem>
                            <FolderSearchIcon />
                            Browse...
                          </DropdownMenuItem>
                        </DropdownMenuGroup>
                      </DropdownMenuSubContent>
                    </DropdownMenuPortal>
                  </DropdownMenuSub>
                  <DropdownMenuSeparator />
                  <DropdownMenuItem>
                    <SaveIcon />
                    Save
                    <DropdownMenuShortcut>⌘S</DropdownMenuShortcut>
                  </DropdownMenuItem>
                  <DropdownMenuItem>
                    <DownloadIcon />
                    Export
                    <DropdownMenuShortcut>⇧⌘E</DropdownMenuShortcut>
                  </DropdownMenuItem>
                </DropdownMenuGroup>
                <DropdownMenuSeparator />
                <DropdownMenuGroup>
                  <DropdownMenuLabel>View</DropdownMenuLabel>
                  <DropdownMenuCheckboxItem
                    checked={notifications.email}
                    onCheckedChange={(checked) =>
                      setNotifications({
                        ...notifications,
                        email: checked === true,
                      })
                    }
                  >
                    <EyeIcon />
                    Show Sidebar
                  </DropdownMenuCheckboxItem>
                  <DropdownMenuCheckboxItem
                    checked={notifications.sms}
                    onCheckedChange={(checked) =>
                      setNotifications({
                        ...notifications,
                        sms: checked === true,
                      })
                    }
                  >
                    <LayoutIcon />
                    Show Status Bar
                  </DropdownMenuCheckboxItem>
                  <DropdownMenuSub>
                    <DropdownMenuSubTrigger>
                      <PaletteIcon />
                      Theme
                    </DropdownMenuSubTrigger>
                    <DropdownMenuPortal>
                      <DropdownMenuSubContent>
                        <DropdownMenuGroup>
                          <DropdownMenuLabel>Appearance</DropdownMenuLabel>
                          <DropdownMenuRadioGroup
                            value={theme}
                            onValueChange={setTheme}
                          >
                            <DropdownMenuRadioItem value="light">
                              <SunIcon />
                              Light
                            </DropdownMenuRadioItem>
                            <DropdownMenuRadioItem value="dark">
                              <MoonIcon />
                              Dark
                            </DropdownMenuRadioItem>
                            <DropdownMenuRadioItem value="system">
                              <MonitorIcon />
                              System
                            </DropdownMenuRadioItem>
                          </DropdownMenuRadioGroup>
                        </DropdownMenuGroup>
                      </DropdownMenuSubContent>
                    </DropdownMenuPortal>
                  </DropdownMenuSub>
                </DropdownMenuGroup>
                <DropdownMenuSeparator />
                <DropdownMenuGroup>
                  <DropdownMenuLabel>Account</DropdownMenuLabel>
                  <DropdownMenuItem>
                    <UserIcon />
                    Profile
                    <DropdownMenuShortcut>⇧⌘P</DropdownMenuShortcut>
                  </DropdownMenuItem>
                  <DropdownMenuItem>
                    <CreditCardIcon />
                    Billing
                  </DropdownMenuItem>
                  <DropdownMenuSub>
                    <DropdownMenuSubTrigger>
                      <SettingsIcon />
                      Settings
                    </DropdownMenuSubTrigger>
                    <DropdownMenuPortal>
                      <DropdownMenuSubContent>
                        <DropdownMenuGroup>
                          <DropdownMenuLabel>Preferences</DropdownMenuLabel>
                          <DropdownMenuItem>
                            <KeyboardIcon />
                            Keyboard Shortcuts
                          </DropdownMenuItem>
                          <DropdownMenuItem>
                            <LanguagesIcon />
                            Language
                          </DropdownMenuItem>
                          <DropdownMenuSub>
                            <DropdownMenuSubTrigger>
                              <BellIcon />
                              Notifications
                            </DropdownMenuSubTrigger>
                            <DropdownMenuPortal>
                              <DropdownMenuSubContent>
                                <DropdownMenuGroup>
                                  <DropdownMenuLabel>
                                    Notification Types
                                  </DropdownMenuLabel>
                                  <DropdownMenuCheckboxItem
                                    checked={notifications.push}
                                    onCheckedChange={(checked) =>
                                      setNotifications({
                                        ...notifications,
                                        push: checked === true,
                                      })
                                    }
                                  >
                                    <BellIcon />
                                    Push Notifications
                                  </DropdownMenuCheckboxItem>
                                  <DropdownMenuCheckboxItem
                                    checked={notifications.email}
                                    onCheckedChange={(checked) =>
                                      setNotifications({
                                        ...notifications,
                                        email: checked === true,
                                      })
                                    }
                                  >
                                    <MailIcon />
                                    Email Notifications
                                  </DropdownMenuCheckboxItem>
                                </DropdownMenuGroup>
                              </DropdownMenuSubContent>
                            </DropdownMenuPortal>
                          </DropdownMenuSub>
                        </DropdownMenuGroup>
                        <DropdownMenuSeparator />
                        <DropdownMenuGroup>
                          <DropdownMenuItem>
                            <ShieldIcon />
                            Privacy & Security
                          </DropdownMenuItem>
                        </DropdownMenuGroup>
                      </DropdownMenuSubContent>
                    </DropdownMenuPortal>
                  </DropdownMenuSub>
                </DropdownMenuGroup>
                <DropdownMenuSeparator />
                <DropdownMenuGroup>
                  <DropdownMenuItem>
                    <HelpCircleIcon />
                    Help & Support
                  </DropdownMenuItem>
                  <DropdownMenuItem>
                    <FileTextIcon />
                    Documentation
                  </DropdownMenuItem>
                </DropdownMenuGroup>
                <DropdownMenuSeparator />
                <DropdownMenuGroup>
                  <DropdownMenuItem variant="destructive">
                    <LogOutIcon />
                    Sign Out
                    <DropdownMenuShortcut>⇧⌘Q</DropdownMenuShortcut>
                  </DropdownMenuItem>
                </DropdownMenuGroup>
              </DropdownMenuContent>
            </DropdownMenu>
          </CardAction>
        </CardHeader>
        <CardContent>
          <form>
            <FieldGroup>
              <div className="grid grid-cols-2 gap-4">
                <Field>
                  <FieldLabel htmlFor="small-form-name">Name</FieldLabel>
                  <Input
                    id="small-form-name"
                    placeholder="Enter your name"
                    required
                  />
                </Field>
                <Field>
                  <FieldLabel htmlFor="small-form-role">Role</FieldLabel>
                  <Select defaultValue="">
                    <SelectTrigger id="small-form-role">
                      <SelectValue placeholder="Select a role" />
                    </SelectTrigger>
                    <SelectContent>
                      <SelectGroup>
                        <SelectItem value="developer">Developer</SelectItem>
                        <SelectItem value="designer">Designer</SelectItem>
                        <SelectItem value="manager">Manager</SelectItem>
                        <SelectItem value="other">Other</SelectItem>
                      </SelectGroup>
                    </SelectContent>
                  </Select>
                </Field>
              </div>
              <Field>
                <FieldLabel htmlFor="small-form-framework">
                  Framework
                </FieldLabel>
                <Combobox items={frameworks}>
                  <ComboboxInput
                    id="small-form-framework"
                    placeholder="Select a framework"
                    required
                  />
                  <ComboboxContent>
                    <ComboboxEmpty>No frameworks found.</ComboboxEmpty>
                    <ComboboxList>
                      {(item) => (
                        <ComboboxItem key={item} value={item}>
                          {item}
                        </ComboboxItem>
                      )}
                    </ComboboxList>
                  </ComboboxContent>
                </Combobox>
              </Field>
              <Field>
                <FieldLabel htmlFor="small-form-comments">Comments</FieldLabel>
                <Textarea
                  id="small-form-comments"
                  placeholder="Add any additional comments"
                />
              </Field>
              <Field orientation="horizontal">
                <Button type="submit">Submit</Button>
                <Button variant="outline" type="button">
                  Cancel
                </Button>
              </Field>
            </FieldGroup>
          </form>
        </CardContent>
      </Card>
    </Example>
  );
}


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/example.tsx
================================================
import { cn } from "@/lib/utils";

function ExampleWrapper({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div className="bg-background w-full">
      <div
        data-slot="example-wrapper"
        className={cn(
          "mx-auto grid min-h-screen w-full max-w-5xl min-w-0 content-center items-start gap-8 p-4 pt-2 sm:gap-12 sm:p-6 md:grid-cols-2 md:gap-8 lg:p-12 2xl:max-w-6xl",
          className,
        )}
        {...props}
      />
    </div>
  );
}

function Example({
  title,
  children,
  className,
  containerClassName,
  ...props
}: React.ComponentProps<"div"> & {
  title?: string;
  containerClassName?: string;
}) {
  return (
    <div
      data-slot="example"
      className={cn(
        "mx-auto flex w-full max-w-lg min-w-0 flex-col gap-1 self-stretch lg:max-w-none",
        containerClassName,
      )}
      {...props}
    >
      {title && (
        <div className="text-muted-foreground px-1.5 py-2 text-xs font-medium">
          {title}
        </div>
      )}
      <div
        data-slot="example-content"
        className={cn(
          "bg-background text-foreground flex min-w-0 flex-1 flex-col items-start gap-6 border border-dashed p-4 sm:p-6 *:[div:not([class*='w-'])]:w-full",
          className,
        )}
      >
        {children}
      </div>
    </div>
  );
}

export { ExampleWrapper, Example };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/accordion.tsx
================================================
"use client";

import * as React from "react";
import { Accordion as AccordionPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { ChevronDownIcon, ChevronUpIcon } from "lucide-react";

function Accordion({
  className,
  ...props
}: React.ComponentProps<typeof AccordionPrimitive.Root>) {
  return (
    <AccordionPrimitive.Root
      data-slot="accordion"
      className={cn("flex w-full flex-col", className)}
      {...props}
    />
  );
}

function AccordionItem({
  className,
  ...props
}: React.ComponentProps<typeof AccordionPrimitive.Item>) {
  return (
    <AccordionPrimitive.Item
      data-slot="accordion-item"
      className={cn("not-last:border-b", className)}
      {...props}
    />
  );
}

function AccordionTrigger({
  className,
  children,
  ...props
}: React.ComponentProps<typeof AccordionPrimitive.Trigger>) {
  return (
    <AccordionPrimitive.Header className="flex">
      <AccordionPrimitive.Trigger
        data-slot="accordion-trigger"
        className={cn(
          "focus-visible:ring-ring/50 focus-visible:border-ring focus-visible:after:border-ring **:data-[slot=accordion-trigger-icon]:text-muted-foreground rounded-lg py-2.5 text-left text-sm font-medium hover:underline focus-visible:ring-[3px] **:data-[slot=accordion-trigger-icon]:ml-auto **:data-[slot=accordion-trigger-icon]:size-4 group/accordion-trigger relative flex flex-1 items-start justify-between border border-transparent transition-all outline-none disabled:pointer-events-none disabled:opacity-50",
          className,
        )}
        {...props}
      >
        {children}
        <ChevronDownIcon
          data-slot="accordion-trigger-icon"
          className="pointer-events-none shrink-0 group-aria-expanded/accordion-trigger:hidden"
        />
        <ChevronUpIcon
          data-slot="accordion-trigger-icon"
          className="pointer-events-none hidden shrink-0 group-aria-expanded/accordion-trigger:inline"
        />
      </AccordionPrimitive.Trigger>
    </AccordionPrimitive.Header>
  );
}

function AccordionContent({
  className,
  children,
  ...props
}: React.ComponentProps<typeof AccordionPrimitive.Content>) {
  return (
    <AccordionPrimitive.Content
      data-slot="accordion-content"
      className="data-open:animate-accordion-down data-closed:animate-accordion-up text-sm overflow-hidden"
      {...props}
    >
      <div
        className={cn(
          "pt-0 pb-2.5 [&_a]:hover:text-foreground h-(--radix-accordion-content-height) [&_a]:underline [&_a]:underline-offset-3 [&_p:not(:last-child)]:mb-4",
          className,
        )}
      >
        {children}
      </div>
    </AccordionPrimitive.Content>
  );
}

export { Accordion, AccordionItem, AccordionTrigger, AccordionContent };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/alert-dialog.tsx
================================================
"use client";

import * as React from "react";
import { AlertDialog as AlertDialogPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";

function AlertDialog({
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Root>) {
  return <AlertDialogPrimitive.Root data-slot="alert-dialog" {...props} />;
}

function AlertDialogTrigger({
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Trigger>) {
  return (
    <AlertDialogPrimitive.Trigger data-slot="alert-dialog-trigger" {...props} />
  );
}

function AlertDialogPortal({
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Portal>) {
  return (
    <AlertDialogPrimitive.Portal data-slot="alert-dialog-portal" {...props} />
  );
}

function AlertDialogOverlay({
  className,
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Overlay>) {
  return (
    <AlertDialogPrimitive.Overlay
      data-slot="alert-dialog-overlay"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/10 duration-100 supports-backdrop-filter:backdrop-blur-xs fixed inset-0 z-50",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogContent({
  className,
  size = "default",
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Content> & {
  size?: "default" | "sm";
}) {
  return (
    <AlertDialogPortal>
      <AlertDialogOverlay />
      <AlertDialogPrimitive.Content
        data-slot="alert-dialog-content"
        data-size={size}
        className={cn(
          "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 bg-background ring-foreground/10 gap-4 rounded-xl p-4 ring-1 duration-100 data-[size=default]:max-w-xs data-[size=sm]:max-w-xs data-[size=default]:sm:max-w-sm group/alert-dialog-content fixed top-1/2 left-1/2 z-50 grid w-full -translate-x-1/2 -translate-y-1/2 outline-none",
          className,
        )}
        {...props}
      />
    </AlertDialogPortal>
  );
}

function AlertDialogHeader({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-dialog-header"
      className={cn(
        "grid grid-rows-[auto_1fr] place-items-center gap-1.5 text-center has-data-[slot=alert-dialog-media]:grid-rows-[auto_auto_1fr] has-data-[slot=alert-dialog-media]:gap-x-4 sm:group-data-[size=default]/alert-dialog-content:place-items-start sm:group-data-[size=default]/alert-dialog-content:text-left sm:group-data-[size=default]/alert-dialog-content:has-data-[slot=alert-dialog-media]:grid-rows-[auto_1fr]",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogFooter({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-dialog-footer"
      className={cn(
        "bg-muted/50 -mx-4 -mb-4 rounded-b-xl border-t p-4 flex flex-col-reverse gap-2 group-data-[size=sm]/alert-dialog-content:grid group-data-[size=sm]/alert-dialog-content:grid-cols-2 sm:flex-row sm:justify-end",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogMedia({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-dialog-media"
      className={cn(
        "bg-muted mb-2 inline-flex size-10 items-center justify-center rounded-md sm:group-data-[size=default]/alert-dialog-content:row-span-2 *:[svg:not([class*='size-'])]:size-6",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogTitle({
  className,
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Title>) {
  return (
    <AlertDialogPrimitive.Title
      data-slot="alert-dialog-title"
      className={cn(
        "text-base font-medium sm:group-data-[size=default]/alert-dialog-content:group-has-data-[slot=alert-dialog-media]/alert-dialog-content:col-start-2",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogDescription({
  className,
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Description>) {
  return (
    <AlertDialogPrimitive.Description
      data-slot="alert-dialog-description"
      className={cn(
        "text-muted-foreground *:[a]:hover:text-foreground text-sm text-balance md:text-pretty *:[a]:underline *:[a]:underline-offset-3",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogAction({
  className,
  variant = "default",
  size = "default",
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Action> &
  Pick<React.ComponentProps<typeof Button>, "variant" | "size">) {
  return (
    <Button variant={variant} size={size} asChild>
      <AlertDialogPrimitive.Action
        data-slot="alert-dialog-action"
        className={cn(className)}
        {...props}
      />
    </Button>
  );
}

function AlertDialogCancel({
  className,
  variant = "outline",
  size = "default",
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Cancel> &
  Pick<React.ComponentProps<typeof Button>, "variant" | "size">) {
  return (
    <Button variant={variant} size={size} asChild>
      <AlertDialogPrimitive.Cancel
        data-slot="alert-dialog-cancel"
        className={cn(className)}
        {...props}
      />
    </Button>
  );
}

export {
  AlertDialog,
  AlertDialogAction,
  AlertDialogCancel,
  AlertDialogContent,
  AlertDialogDescription,
  AlertDialogFooter,
  AlertDialogHeader,
  AlertDialogMedia,
  AlertDialogOverlay,
  AlertDialogPortal,
  AlertDialogTitle,
  AlertDialogTrigger,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/alert.tsx
================================================
import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";

import { cn } from "@/lib/utils";

const alertVariants = cva(
  "grid gap-0.5 rounded-lg border px-2.5 py-2 text-left text-sm has-data-[slot=alert-action]:relative has-data-[slot=alert-action]:pr-18 has-[>svg]:grid-cols-[auto_1fr] has-[>svg]:gap-x-2 *:[svg]:row-span-2 *:[svg]:translate-y-0.5 *:[svg]:text-current *:[svg:not([class*='size-'])]:size-4 w-full relative group/alert",
  {
    variants: {
      variant: {
        default: "bg-card text-card-foreground",
        destructive:
          "text-destructive bg-card *:data-[slot=alert-description]:text-destructive/90 *:[svg]:text-current",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
);

function Alert({
  className,
  variant,
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof alertVariants>) {
  return (
    <div
      data-slot="alert"
      role="alert"
      className={cn(alertVariants({ variant }), className)}
      {...props}
    />
  );
}

function AlertTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-title"
      className={cn(
        "font-medium group-has-[>svg]/alert:col-start-2 [&_a]:hover:text-foreground [&_a]:underline [&_a]:underline-offset-3",
        className,
      )}
      {...props}
    />
  );
}

function AlertDescription({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-description"
      className={cn(
        "text-muted-foreground text-sm text-balance md:text-pretty [&_p:not(:last-child)]:mb-4 [&_a]:hover:text-foreground [&_a]:underline [&_a]:underline-offset-3",
        className,
      )}
      {...props}
    />
  );
}

function AlertAction({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-action"
      className={cn("absolute top-2 right-2", className)}
      {...props}
    />
  );
}

export { Alert, AlertTitle, AlertDescription, AlertAction };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/aspect-ratio.tsx
================================================
"use client";

import { AspectRatio as AspectRatioPrimitive } from "radix-ui";

function AspectRatio({
  ...props
}: React.ComponentProps<typeof AspectRatioPrimitive.Root>) {
  return <AspectRatioPrimitive.Root data-slot="aspect-ratio" {...props} />;
}

export { AspectRatio };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/avatar.tsx
================================================
"use client";

import * as React from "react";
import { Avatar as AvatarPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function Avatar({
  className,
  size = "default",
  ...props
}: React.ComponentProps<typeof AvatarPrimitive.Root> & {
  size?: "default" | "sm" | "lg";
}) {
  return (
    <AvatarPrimitive.Root
      data-slot="avatar"
      data-size={size}
      className={cn(
        "size-8 rounded-full after:rounded-full data-[size=lg]:size-10 data-[size=sm]:size-6 after:border-border group/avatar relative flex shrink-0 select-none after:absolute after:inset-0 after:border after:mix-blend-darken dark:after:mix-blend-lighten",
        className,
      )}
      {...props}
    />
  );
}

function AvatarImage({
  className,
  ...props
}: React.ComponentProps<typeof AvatarPrimitive.Image>) {
  return (
    <AvatarPrimitive.Image
      data-slot="avatar-image"
      className={cn(
        "rounded-full aspect-square size-full object-cover",
        className,
      )}
      {...props}
    />
  );
}

function AvatarFallback({
  className,
  ...props
}: React.ComponentProps<typeof AvatarPrimitive.Fallback>) {
  return (
    <AvatarPrimitive.Fallback
      data-slot="avatar-fallback"
      className={cn(
        "bg-muted text-muted-foreground rounded-full flex size-full items-center justify-center text-sm group-data-[size=sm]/avatar:text-xs",
        className,
      )}
      {...props}
    />
  );
}

function AvatarBadge({ className, ...props }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="avatar-badge"
      className={cn(
        "bg-primary text-primary-foreground ring-background absolute right-0 bottom-0 z-10 inline-flex items-center justify-center rounded-full bg-blend-color ring-2 select-none",
        "group-data-[size=sm]/avatar:size-2 group-data-[size=sm]/avatar:[&>svg]:hidden",
        "group-data-[size=default]/avatar:size-2.5 group-data-[size=default]/avatar:[&>svg]:size-2",
        "group-data-[size=lg]/avatar:size-3 group-data-[size=lg]/avatar:[&>svg]:size-2",
        className,
      )}
      {...props}
    />
  );
}

function AvatarGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="avatar-group"
      className={cn(
        "*:data-[slot=avatar]:ring-background group/avatar-group flex -space-x-2 *:data-[slot=avatar]:ring-2",
        className,
      )}
      {...props}
    />
  );
}

function AvatarGroupCount({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="avatar-group-count"
      className={cn(
        "bg-muted text-muted-foreground size-8 rounded-full text-sm group-has-data-[size=lg]/avatar-group:size-10 group-has-data-[size=sm]/avatar-group:size-6 [&>svg]:size-4 group-has-data-[size=lg]/avatar-group:[&>svg]:size-5 group-has-data-[size=sm]/avatar-group:[&>svg]:size-3 ring-background relative flex shrink-0 items-center justify-center ring-2",
        className,
      )}
      {...props}
    />
  );
}

export {
  Avatar,
  AvatarImage,
  AvatarFallback,
  AvatarGroup,
  AvatarGroupCount,
  AvatarBadge,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/badge.tsx
================================================
import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";
import { Slot } from "radix-ui";

import { cn } from "@/lib/utils";

const badgeVariants = cva(
  "h-5 gap-1 rounded-4xl border border-transparent px-2 py-0.5 text-xs font-medium transition-all has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&>svg]:size-3! inline-flex items-center justify-center w-fit whitespace-nowrap shrink-0 [&>svg]:pointer-events-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive overflow-hidden group/badge",
  {
    variants: {
      variant: {
        default: "bg-primary text-primary-foreground [a]:hover:bg-primary/80",
        secondary:
          "bg-secondary text-secondary-foreground [a]:hover:bg-secondary/80",
        destructive:
          "bg-destructive/10 [a]:hover:bg-destructive/20 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 text-destructive dark:bg-destructive/20",
        outline:
          "border-border text-foreground [a]:hover:bg-muted [a]:hover:text-muted-foreground",
        ghost:
          "hover:bg-muted hover:text-muted-foreground dark:hover:bg-muted/50",
        link: "text-primary underline-offset-4 hover:underline",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
);

function Badge({
  className,
  variant = "default",
  asChild = false,
  ...props
}: React.ComponentProps<"span"> &
  VariantProps<typeof badgeVariants> & { asChild?: boolean }) {
  const Comp = asChild ? Slot.Root : "span";

  return (
    <Comp
      data-slot="badge"
      data-variant={variant}
      className={cn(badgeVariants({ variant }), className)}
      {...props}
    />
  );
}

export { Badge, badgeVariants };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/breadcrumb.tsx
================================================
import * as React from "react";
import { Slot } from "radix-ui";

import { cn } from "@/lib/utils";
import { ChevronRightIcon, MoreHorizontalIcon } from "lucide-react";

function Breadcrumb({ className, ...props }: React.ComponentProps<"nav">) {
  return (
    <nav
      aria-label="breadcrumb"
      data-slot="breadcrumb"
      className={cn(className)}
      {...props}
    />
  );
}

function BreadcrumbList({ className, ...props }: React.ComponentProps<"ol">) {
  return (
    <ol
      data-slot="breadcrumb-list"
      className={cn(
        "text-muted-foreground gap-1.5 text-sm flex flex-wrap items-center break-words",
        className,
      )}
      {...props}
    />
  );
}

function BreadcrumbItem({ className, ...props }: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="breadcrumb-item"
      className={cn("gap-1 inline-flex items-center", className)}
      {...props}
    />
  );
}

function BreadcrumbLink({
  asChild,
  className,
  ...props
}: React.ComponentProps<"a"> & {
  asChild?: boolean;
}) {
  const Comp = asChild ? Slot.Root : "a";

  return (
    <Comp
      data-slot="breadcrumb-link"
      className={cn("hover:text-foreground transition-colors", className)}
      {...props}
    />
  );
}

function BreadcrumbPage({ className, ...props }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="breadcrumb-page"
      role="link"
      aria-disabled="true"
      aria-current="page"
      className={cn("text-foreground font-normal", className)}
      {...props}
    />
  );
}

function BreadcrumbSeparator({
  children,
  className,
  ...props
}: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="breadcrumb-separator"
      role="presentation"
      aria-hidden="true"
      className={cn("[&>svg]:size-3.5", className)}
      {...props}
    >
      {children ?? <ChevronRightIcon />}
    </li>
  );
}

function BreadcrumbEllipsis({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="breadcrumb-ellipsis"
      role="presentation"
      aria-hidden="true"
      className={cn(
        "size-5 [&>svg]:size-4 flex items-center justify-center",
        className,
      )}
      {...props}
    >
      <MoreHorizontalIcon />
      <span className="sr-only">More</span>
    </span>
  );
}

export {
  Breadcrumb,
  BreadcrumbList,
  BreadcrumbItem,
  BreadcrumbLink,
  BreadcrumbPage,
  BreadcrumbSeparator,
  BreadcrumbEllipsis,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/button-group.tsx
================================================
import { cva, type VariantProps } from "class-variance-authority";
import { Slot } from "radix-ui";

import { cn } from "@/lib/utils";
import { Separator } from "@/components/ui/separator";

const buttonGroupVariants = cva(
  "has-[>[data-slot=button-group]]:gap-2 has-[select[aria-hidden=true]:last-child]:[&>[data-slot=select-trigger]:last-of-type]:rounded-r-lg flex w-fit items-stretch [&>*]:focus-visible:z-10 [&>*]:focus-visible:relative [&>[data-slot=select-trigger]:not([class*='w-'])]:w-fit [&>input]:flex-1",
  {
    variants: {
      orientation: {
        horizontal:
          "[&>[data-slot]:not(:has(~[data-slot]))]:rounded-r-lg! [&>*:not(:first-child)]:rounded-l-none [&>*:not(:first-child)]:border-l-0 [&>*:not(:last-child)]:rounded-r-none",
        vertical:
          "[&>[data-slot]:not(:has(~[data-slot]))]:rounded-b-lg! flex-col [&>*:not(:first-child)]:rounded-t-none [&>*:not(:first-child)]:border-t-0 [&>*:not(:last-child)]:rounded-b-none",
      },
    },
    defaultVariants: {
      orientation: "horizontal",
    },
  },
);

function ButtonGroup({
  className,
  orientation,
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof buttonGroupVariants>) {
  return (
    <div
      role="group"
      data-slot="button-group"
      data-orientation={orientation}
      className={cn(buttonGroupVariants({ orientation }), className)}
      {...props}
    />
  );
}

function ButtonGroupText({
  className,
  asChild = false,
  ...props
}: React.ComponentProps<"div"> & {
  asChild?: boolean;
}) {
  const Comp = asChild ? Slot.Root : "div";

  return (
    <Comp
      className={cn(
        "bg-muted gap-2 rounded-lg border px-2.5 text-sm font-medium [&_svg:not([class*='size-'])]:size-4 flex items-center [&_svg]:pointer-events-none",
        className,
      )}
      {...props}
    />
  );
}

function ButtonGroupSeparator({
  className,
  orientation = "vertical",
  ...props
}: React.ComponentProps<typeof Separator>) {
  return (
    <Separator
      data-slot="button-group-separator"
      orientation={orientation}
      className={cn(
        "bg-input relative self-stretch data-[orientation=horizontal]:mx-px data-[orientation=horizontal]:w-auto data-[orientation=vertical]:my-px data-[orientation=vertical]:h-auto",
        className,
      )}
      {...props}
    />
  );
}

export {
  ButtonGroup,
  ButtonGroupSeparator,
  ButtonGroupText,
  buttonGroupVariants,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/button.tsx
================================================
import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";
import { Slot } from "radix-ui";

import { cn } from "@/lib/utils";

const buttonVariants = cva(
  "focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 rounded-lg border border-transparent bg-clip-padding text-sm font-medium focus-visible:ring-[3px] aria-invalid:ring-[3px] [&_svg:not([class*='size-'])]:size-4 inline-flex items-center justify-center whitespace-nowrap transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none shrink-0 [&_svg]:shrink-0 outline-none group/button select-none",
  {
    variants: {
      variant: {
        default: "bg-primary text-primary-foreground [a]:hover:bg-primary/80",
        outline:
          "border-border bg-background hover:bg-muted hover:text-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50 aria-expanded:bg-muted aria-expanded:text-foreground",
        secondary:
          "bg-secondary text-secondary-foreground hover:bg-secondary/80 aria-expanded:bg-secondary aria-expanded:text-secondary-foreground",
        ghost:
          "hover:bg-muted hover:text-foreground dark:hover:bg-muted/50 aria-expanded:bg-muted aria-expanded:text-foreground",
        destructive:
          "bg-destructive/10 hover:bg-destructive/20 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/20 text-destructive focus-visible:border-destructive/40 dark:hover:bg-destructive/30",
        link: "text-primary underline-offset-4 hover:underline",
      },
      size: {
        default:
          "h-8 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2",
        xs: "h-6 gap-1 rounded-[min(var(--radius-md),10px)] px-2 text-xs in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3",
        sm: "h-7 gap-1 rounded-[min(var(--radius-md),12px)] px-2.5 text-[0.8rem] in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3.5",
        lg: "h-9 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-3 has-data-[icon=inline-start]:pl-3",
        icon: "size-8",
        "icon-xs":
          "size-6 rounded-[min(var(--radius-md),10px)] in-data-[slot=button-group]:rounded-lg [&_svg:not([class*='size-'])]:size-3",
        "icon-sm":
          "size-7 rounded-[min(var(--radius-md),12px)] in-data-[slot=button-group]:rounded-lg",
        "icon-lg": "size-9",
      },
    },
    defaultVariants: {
      variant: "default",
      size: "default",
    },
  },
);

function Button({
  className,
  variant = "default",
  size = "default",
  asChild = false,
  ...props
}: React.ComponentProps<"button"> &
  VariantProps<typeof buttonVariants> & {
    asChild?: boolean;
  }) {
  const Comp = asChild ? Slot.Root : "button";

  return (
    <Comp
      data-slot="button"
      data-variant={variant}
      data-size={size}
      className={cn(buttonVariants({ variant, size, className }))}
      {...props}
    />
  );
}

export { Button, buttonVariants };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/calendar.tsx
================================================
"use client";

import * as React from "react";
import {
  DayPicker,
  getDefaultClassNames,
  type DayButton,
} from "react-day-picker";

import { cn } from "@/lib/utils";
import { Button, buttonVariants } from "@/components/ui/button";
import {
  ChevronLeftIcon,
  ChevronRightIcon,
  ChevronDownIcon,
} from "lucide-react";

function Calendar({
  className,
  classNames,
  showOutsideDays = true,
  captionLayout = "label",
  buttonVariant = "ghost",
  formatters,
  components,
  ...props
}: React.ComponentProps<typeof DayPicker> & {
  buttonVariant?: React.ComponentProps<typeof Button>["variant"];
}) {
  const defaultClassNames = getDefaultClassNames();

  return (
    <DayPicker
      showOutsideDays={showOutsideDays}
      className={cn(
        "p-2 [--cell-radius:var(--radius-md)] [--cell-size:--spacing(7)] bg-background group/calendar [[data-slot=card-content]_&]:bg-transparent [[data-slot=popover-content]_&]:bg-transparent",
        String.raw`rtl:**:[.rdp-button\_next>svg]:rotate-180`,
        String.raw`rtl:**:[.rdp-button\_previous>svg]:rotate-180`,
        className,
      )}
      captionLayout={captionLayout}
      formatters={{
        formatMonthDropdown: (date) =>
          date.toLocaleString("default", { month: "short" }),
        ...formatters,
      }}
      classNames={{
        root: cn("w-fit", defaultClassNames.root),
        months: cn(
          "flex gap-4 flex-col md:flex-row relative",
          defaultClassNames.months,
        ),
        month: cn("flex flex-col w-full gap-4", defaultClassNames.month),
        nav: cn(
          "flex items-center gap-1 w-full absolute top-0 inset-x-0 justify-between",
          defaultClassNames.nav,
        ),
        button_previous: cn(
          buttonVariants({ variant: buttonVariant }),
          "size-(--cell-size) aria-disabled:opacity-50 p-0 select-none",
          defaultClassNames.button_previous,
        ),
        button_next: cn(
          buttonVariants({ variant: buttonVariant }),
          "size-(--cell-size) aria-disabled:opacity-50 p-0 select-none",
          defaultClassNames.button_next,
        ),
        month_caption: cn(
          "flex items-center justify-center h-(--cell-size) w-full px-(--cell-size)",
          defaultClassNames.month_caption,
        ),
        dropdowns: cn(
          "w-full flex items-center text-sm font-medium justify-center h-(--cell-size) gap-1.5",
          defaultClassNames.dropdowns,
        ),
        dropdown_root: cn(
          "relative cn-calendar-dropdown-root rounded-(--cell-radius)",
          defaultClassNames.dropdown_root,
        ),
        dropdown: cn(
          "absolute bg-popover inset-0 opacity-0",
          defaultClassNames.dropdown,
        ),
        caption_label: cn(
          "select-none font-medium",
          captionLayout === "label"
            ? "text-sm"
            : "cn-calendar-caption-label rounded-(--cell-radius) flex items-center gap-1 text-sm  [&>svg]:text-muted-foreground [&>svg]:size-3.5",
          defaultClassNames.caption_label,
        ),
        table: "w-full border-collapse",
        weekdays: cn("flex", defaultClassNames.weekdays),
        weekday: cn(
          "text-muted-foreground rounded-(--cell-radius) flex-1 font-normal text-[0.8rem] select-none",
          defaultClassNames.weekday,
        ),
        week: cn("flex w-full mt-2", defaultClassNames.week),
        week_number_header: cn(
          "select-none w-(--cell-size)",
          defaultClassNames.week_number_header,
        ),
        week_number: cn(
          "text-[0.8rem] select-none text-muted-foreground",
          defaultClassNames.week_number,
        ),
        day: cn(
          "relative w-full rounded-(--cell-radius) h-full p-0 text-center [&:last-child[data-selected=true]_button]:rounded-r-(--cell-radius) group/day aspect-square select-none",
          props.showWeekNumber
            ? "[&:nth-child(2)[data-selected=true]_button]:rounded-l-(--cell-radius)"
            : "[&:first-child[data-selected=true]_button]:rounded-l-(--cell-radius)",
          defaultClassNames.day,
        ),
        range_start: cn(
          "rounded-l-(--cell-radius) bg-muted relative after:bg-muted after:absolute after:inset-y-0 after:w-4 after:right-0 -z-0 isolate",
          defaultClassNames.range_start,
        ),
        range_middle: cn("rounded-none", defaultClassNames.range_middle),
        range_end: cn(
          "rounded-r-(--cell-radius) bg-muted relative after:bg-muted-200 after:absolute after:inset-y-0 after:w-4 after:left-0 -z-0 isolate",
          defaultClassNames.range_end,
        ),
        today: cn(
          "bg-muted text-foreground rounded-(--cell-radius) data-[selected=true]:rounded-none",
          defaultClassNames.today,
        ),
        outside: cn(
          "text-muted-foreground aria-selected:text-muted-foreground",
          defaultClassNames.outside,
        ),
        disabled: cn(
          "text-muted-foreground opacity-50",
          defaultClassNames.disabled,
        ),
        hidden: cn("invisible", defaultClassNames.hidden),
        ...classNames,
      }}
      components={{
        Root: ({ className, rootRef, ...props }) => {
          return (
            <div
              data-slot="calendar"
              ref={rootRef}
              className={cn(className)}
              {...props}
            />
          );
        },
        Chevron: ({ className, orientation, ...props }) => {
          if (orientation === "left") {
            return (
              <ChevronLeftIcon className={cn("size-4", className)} {...props} />
            );
          }

          if (orientation === "right") {
            return (
              <ChevronRightIcon
                className={cn("size-4", className)}
                {...props}
              />
            );
          }

          return (
            <ChevronDownIcon className={cn("size-4", className)} {...props} />
          );
        },
        DayButton: CalendarDayButton,
        WeekNumber: ({ children, ...props }) => {
          return (
            <td {...props}>
              <div className="flex size-(--cell-size) items-center justify-center text-center">
                {children}
              </div>
            </td>
          );
        },
        ...components,
      }}
      {...props}
    />
  );
}

function CalendarDayButton({
  className,
  day,
  modifiers,
  ...props
}: React.ComponentProps<typeof DayButton>) {
  const defaultClassNames = getDefaultClassNames();

  const ref = React.useRef<HTMLButtonElement>(null);
  React.useEffect(() => {
    if (modifiers.focused) ref.current?.focus();
  }, [modifiers.focused]);

  return (
    <Button
      ref={ref}
      variant="ghost"
      size="icon"
      data-day={day.date.toLocaleDateString()}
      data-selected-single={
        modifiers.selected &&
        !modifiers.range_start &&
        !modifiers.range_end &&
        !modifiers.range_middle
      }
      data-range-start={modifiers.range_start}
      data-range-end={modifiers.range_end}
      data-range-middle={modifiers.range_middle}
      className={cn(
        "data-[selected-single=true]:bg-primary data-[selected-single=true]:text-primary-foreground data-[range-middle=true]:bg-muted data-[range-middle=true]:text-foreground data-[range-start=true]:bg-primary data-[range-start=true]:text-primary-foreground data-[range-end=true]:bg-primary data-[range-end=true]:text-primary-foreground group-data-[focused=true]/day:border-ring group-data-[focused=true]/day:ring-ring/50 dark:hover:text-foreground relative isolate z-10 flex aspect-square size-auto w-full min-w-(--cell-size) flex-col gap-1 border-0 leading-none font-normal group-data-[focused=true]/day:relative group-data-[focused=true]/day:z-10 group-data-[focused=true]/day:ring-[3px] data-[range-end=true]:rounded-(--cell-radius) data-[range-end=true]:rounded-r-(--cell-radius) data-[range-middle=true]:rounded-none data-[range-start=true]:rounded-(--cell-radius) data-[range-start=true]:rounded-l-(--cell-radius) [&>span]:text-xs [&>span]:opacity-70",
        defaultClassNames.day,
        className,
      )}
      {...props}
    />
  );
}

export { Calendar, CalendarDayButton };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/card.tsx
================================================
import * as React from "react";

import { cn } from "@/lib/utils";

function Card({
  className,
  size = "default",
  ...props
}: React.ComponentProps<"div"> & { size?: "default" | "sm" }) {
  return (
    <div
      data-slot="card"
      data-size={size}
      className={cn(
        "ring-foreground/10 bg-card text-card-foreground gap-4 overflow-hidden rounded-xl py-4 text-sm ring-1 has-data-[slot=card-footer]:pb-0 has-[>img:first-child]:pt-0 data-[size=sm]:gap-3 data-[size=sm]:py-3 data-[size=sm]:has-data-[slot=card-footer]:pb-0 *:[img:first-child]:rounded-t-xl *:[img:last-child]:rounded-b-xl group/card flex flex-col",
        className,
      )}
      {...props}
    />
  );
}

function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-header"
      className={cn(
        "gap-1 rounded-t-xl px-4 group-data-[size=sm]/card:px-3 [.border-b]:pb-4 group-data-[size=sm]/card:[.border-b]:pb-3 group/card-header @container/card-header grid auto-rows-min items-start has-data-[slot=card-action]:grid-cols-[1fr_auto] has-data-[slot=card-description]:grid-rows-[auto_auto]",
        className,
      )}
      {...props}
    />
  );
}

function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-title"
      className={cn(
        "text-base leading-snug font-medium group-data-[size=sm]/card:text-sm",
        className,
      )}
      {...props}
    />
  );
}

function CardDescription({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-description"
      className={cn("text-muted-foreground text-sm", className)}
      {...props}
    />
  );
}

function CardAction({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-action"
      className={cn(
        "col-start-2 row-span-2 row-start-1 self-start justify-self-end",
        className,
      )}
      {...props}
    />
  );
}

function CardContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-content"
      className={cn("px-4 group-data-[size=sm]/card:px-3", className)}
      {...props}
    />
  );
}

function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-footer"
      className={cn(
        "bg-muted/50 rounded-b-xl border-t p-4 group-data-[size=sm]/card:p-3 flex items-center",
        className,
      )}
      {...props}
    />
  );
}

export {
  Card,
  CardHeader,
  CardFooter,
  CardTitle,
  CardAction,
  CardDescription,
  CardContent,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/carousel.tsx
================================================
"use client";

import * as React from "react";
import useEmblaCarousel, {
  type UseEmblaCarouselType,
} from "embla-carousel-react";

import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
import { ChevronLeftIcon, ChevronRightIcon } from "lucide-react";

type CarouselApi = UseEmblaCarouselType[1];
type UseCarouselParameters = Parameters<typeof useEmblaCarousel>;
type CarouselOptions = UseCarouselParameters[0];
type CarouselPlugin = UseCarouselParameters[1];

type CarouselProps = {
  opts?: CarouselOptions;
  plugins?: CarouselPlugin;
  orientation?: "horizontal" | "vertical";
  setApi?: (api: CarouselApi) => void;
};

type CarouselContextProps = {
  carouselRef: ReturnType<typeof useEmblaCarousel>[0];
  api: ReturnType<typeof useEmblaCarousel>[1];
  scrollPrev: () => void;
  scrollNext: () => void;
  canScrollPrev: boolean;
  canScrollNext: boolean;
} & CarouselProps;

const CarouselContext = React.createContext<CarouselContextProps | null>(null);

function useCarousel() {
  const context = React.useContext(CarouselContext);

  if (!context) {
    throw new Error("useCarousel must be used within a <Carousel />");
  }

  return context;
}

function Carousel({
  orientation = "horizontal",
  opts,
  setApi,
  plugins,
  className,
  children,
  ...props
}: React.ComponentProps<"div"> & CarouselProps) {
  const [carouselRef, api] = useEmblaCarousel(
    {
      ...opts,
      axis: orientation === "horizontal" ? "x" : "y",
    },
    plugins,
  );
  const [canScrollPrev, setCanScrollPrev] = React.useState(false);
  const [canScrollNext, setCanScrollNext] = React.useState(false);

  const onSelect = React.useCallback((api: CarouselApi) => {
    if (!api) return;
    setCanScrollPrev(api.canScrollPrev());
    setCanScrollNext(api.canScrollNext());
  }, []);

  const scrollPrev = React.useCallback(() => {
    api?.scrollPrev();
  }, [api]);

  const scrollNext = React.useCallback(() => {
    api?.scrollNext();
  }, [api]);

  const handleKeyDown = React.useCallback(
    (event: React.KeyboardEvent<HTMLDivElement>) => {
      if (event.key === "ArrowLeft") {
        event.preventDefault();
        scrollPrev();
      } else if (event.key === "ArrowRight") {
        event.preventDefault();
        scrollNext();
      }
    },
    [scrollPrev, scrollNext],
  );

  React.useEffect(() => {
    if (!api || !setApi) return;
    setApi(api);
  }, [api, setApi]);

  React.useEffect(() => {
    if (!api) return;
    onSelect(api);
    api.on("reInit", onSelect);
    api.on("select", onSelect);

    return () => {
      api?.off("select", onSelect);
    };
  }, [api, onSelect]);

  return (
    <CarouselContext.Provider
      value={{
        carouselRef,
        api: api,
        opts,
        orientation:
          orientation || (opts?.axis === "y" ? "vertical" : "horizontal"),
        scrollPrev,
        scrollNext,
        canScrollPrev,
        canScrollNext,
      }}
    >
      <div
        onKeyDownCapture={handleKeyDown}
        className={cn("relative", className)}
        role="region"
        aria-roledescription="carousel"
        data-slot="carousel"
        {...props}
      >
        {children}
      </div>
    </CarouselContext.Provider>
  );
}

function CarouselContent({ className, ...props }: React.ComponentProps<"div">) {
  const { carouselRef, orientation } = useCarousel();

  return (
    <div
      ref={carouselRef}
      className="overflow-hidden"
      data-slot="carousel-content"
    >
      <div
        className={cn(
          "flex",
          orientation === "horizontal" ? "-ml-4" : "-mt-4 flex-col",
          className,
        )}
        {...props}
      />
    </div>
  );
}

function CarouselItem({ className, ...props }: React.ComponentProps<"div">) {
  const { orientation } = useCarousel();

  return (
    <div
      role="group"
      aria-roledescription="slide"
      data-slot="carousel-item"
      className={cn(
        "min-w-0 shrink-0 grow-0 basis-full",
        orientation === "horizontal" ? "pl-4" : "pt-4",
        className,
      )}
      {...props}
    />
  );
}

function CarouselPrevious({
  className,
  variant = "outline",
  size = "icon-sm",
  ...props
}: React.ComponentProps<typeof Button>) {
  const { orientation, scrollPrev, canScrollPrev } = useCarousel();

  return (
    <Button
      data-slot="carousel-previous"
      variant={variant}
      size={size}
      className={cn(
        "rounded-full absolute touch-manipulation",
        orientation === "horizontal"
          ? "top-1/2 -left-12 -translate-y-1/2"
          : "-top-12 left-1/2 -translate-x-1/2 rotate-90",
        className,
      )}
      disabled={!canScrollPrev}
      onClick={scrollPrev}
      {...props}
    >
      <ChevronLeftIcon />
      <span className="sr-only">Previous slide</span>
    </Button>
  );
}

function CarouselNext({
  className,
  variant = "outline",
  size = "icon-sm",
  ...props
}: React.ComponentProps<typeof Button>) {
  const { orientation, scrollNext, canScrollNext } = useCarousel();

  return (
    <Button
      data-slot="carousel-next"
      variant={variant}
      size={size}
      className={cn(
        "rounded-full absolute touch-manipulation",
        orientation === "horizontal"
          ? "top-1/2 -right-12 -translate-y-1/2"
          : "-bottom-12 left-1/2 -translate-x-1/2 rotate-90",
        className,
      )}
      disabled={!canScrollNext}
      onClick={scrollNext}
      {...props}
    >
      <ChevronRightIcon />
      <span className="sr-only">Next slide</span>
    </Button>
  );
}

export {
  type CarouselApi,
  Carousel,
  CarouselContent,
  CarouselItem,
  CarouselPrevious,
  CarouselNext,
  useCarousel,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/chart.tsx
================================================
"use client";

import * as React from "react";
import * as RechartsPrimitive from "recharts";

import { cn } from "@/lib/utils";

// Format: { THEME_NAME: CSS_SELECTOR }
const THEMES = { light: "", dark: ".dark" } as const;

export type ChartConfig = {
  [k in string]: {
    label?: React.ReactNode;
    icon?: React.ComponentType;
  } & (
    | { color?: string; theme?: never }
    | { color?: never; theme: Record<keyof typeof THEMES, string> }
  );
};

type ChartContextProps = {
  config: ChartConfig;
};

const ChartContext = React.createContext<ChartContextProps | null>(null);

function useChart() {
  const context = React.useContext(ChartContext);

  if (!context) {
    throw new Error("useChart must be used within a <ChartContainer />");
  }

  return context;
}

function ChartContainer({
  id,
  className,
  children,
  config,
  ...props
}: React.ComponentProps<"div"> & {
  config: ChartConfig;
  children: React.ComponentProps<
    typeof RechartsPrimitive.ResponsiveContainer
  >["children"];
}) {
  const uniqueId = React.useId();
  const chartId = `chart-${id || uniqueId.replace(/:/g, "")}`;

  return (
    <ChartContext.Provider value={{ config }}>
      <div
        data-slot="chart"
        data-chart={chartId}
        className={cn(
          "[&_.recharts-cartesian-axis-tick_text]:fill-muted-foreground [&_.recharts-cartesian-grid_line[stroke='#ccc']]:stroke-border/50 [&_.recharts-curve.recharts-tooltip-cursor]:stroke-border [&_.recharts-polar-grid_[stroke='#ccc']]:stroke-border [&_.recharts-radial-bar-background-sector]:fill-muted [&_.recharts-rectangle.recharts-tooltip-cursor]:fill-muted [&_.recharts-reference-line_[stroke='#ccc']]:stroke-border flex aspect-video justify-center text-xs [&_.recharts-dot[stroke='#fff']]:stroke-transparent [&_.recharts-layer]:outline-hidden [&_.recharts-sector]:outline-hidden [&_.recharts-sector[stroke='#fff']]:stroke-transparent [&_.recharts-surface]:outline-hidden",
          className,
        )}
        {...props}
      >
        <ChartStyle id={chartId} config={config} />
        <RechartsPrimitive.ResponsiveContainer>
          {children}
        </RechartsPrimitive.ResponsiveContainer>
      </div>
    </ChartContext.Provider>
  );
}

const ChartStyle = ({ id, config }: { id: string; config: ChartConfig }) => {
  const colorConfig = Object.entries(config).filter(
    ([, config]) => config.theme || config.color,
  );

  if (!colorConfig.length) {
    return null;
  }

  return (
    <style
      dangerouslySetInnerHTML={{
        __html: Object.entries(THEMES)
          .map(
            ([theme, prefix]) => `
${prefix} [data-chart=${id}] {
${colorConfig
  .map(([key, itemConfig]) => {
    const color =
      itemConfig.theme?.[theme as keyof typeof itemConfig.theme] ||
      itemConfig.color;
    return color ? `  --color-${key}: ${color};` : null;
  })
  .join("\n")}
}
`,
          )
          .join("\n"),
      }}
    />
  );
};

const ChartTooltip = RechartsPrimitive.Tooltip;

function ChartTooltipContent({
  active,
  payload,
  className,
  indicator = "dot",
  hideLabel = false,
  hideIndicator = false,
  label,
  labelFormatter,
  labelClassName,
  formatter,
  color,
  nameKey,
  labelKey,
}: React.ComponentProps<typeof RechartsPrimitive.Tooltip> &
  React.ComponentProps<"div"> & {
    hideLabel?: boolean;
    hideIndicator?: boolean;
    indicator?: "line" | "dot" | "dashed";
    nameKey?: string;
    labelKey?: string;
  }) {
  const { config } = useChart();

  const tooltipLabel = React.useMemo(() => {
    if (hideLabel || !payload?.length) {
      return null;
    }

    const [item] = payload;
    const key = `${labelKey || item?.dataKey || item?.name || "value"}`;
    const itemConfig = getPayloadConfigFromPayload(config, item, key);
    const value =
      !labelKey && typeof label === "string"
        ? config[label as keyof typeof config]?.label || label
        : itemConfig?.label;

    if (labelFormatter) {
      return (
        <div className={cn("font-medium", labelClassName)}>
          {labelFormatter(value, payload)}
        </div>
      );
    }

    if (!value) {
      return null;
    }

    return <div className={cn("font-medium", labelClassName)}>{value}</div>;
  }, [
    label,
    labelFormatter,
    payload,
    hideLabel,
    labelClassName,
    config,
    labelKey,
  ]);

  if (!active || !payload?.length) {
    return null;
  }

  const nestLabel = payload.length === 1 && indicator !== "dot";

  return (
    <div
      className={cn(
        "border-border/50 bg-background gap-1.5 rounded-lg border px-2.5 py-1.5 text-xs shadow-xl grid min-w-[8rem] items-start",
        className,
      )}
    >
      {!nestLabel ? tooltipLabel : null}
      <div className="grid gap-1.5">
        {payload
          .filter((item) => item.type !== "none")
          .map((item, index) => {
            const key = `${nameKey || item.name || item.dataKey || "value"}`;
            const itemConfig = getPayloadConfigFromPayload(config, item, key);
            const indicatorColor = color || item.payload.fill || item.color;

            return (
              <div
                key={item.dataKey}
                className={cn(
                  "[&>svg]:text-muted-foreground flex w-full flex-wrap items-stretch gap-2 [&>svg]:h-2.5 [&>svg]:w-2.5",
                  indicator === "dot" && "items-center",
                )}
              >
                {formatter && item?.value !== undefined && item.name ? (
                  formatter(item.value, item.name, item, index, item.payload)
                ) : (
                  <>
                    {itemConfig?.icon ? (
                      <itemConfig.icon />
                    ) : (
                      !hideIndicator && (
                        <div
                          className={cn(
                            "shrink-0 rounded-[2px] border-(--color-border) bg-(--color-bg)",
                            {
                              "h-2.5 w-2.5": indicator === "dot",
                              "w-1": indicator === "line",
                              "w-0 border-[1.5px] border-dashed bg-transparent":
                                indicator === "dashed",
                              "my-0.5": nestLabel && indicator === "dashed",
                            },
                          )}
                          style={
                            {
                              "--color-bg": indicatorColor,
                              "--color-border": indicatorColor,
                            } as React.CSSProperties
                          }
                        />
                      )
                    )}
                    <div
                      className={cn(
                        "flex flex-1 justify-between leading-none",
                        nestLabel ? "items-end" : "items-center",
                      )}
                    >
                      <div className="grid gap-1.5">
                        {nestLabel ? tooltipLabel : null}
                        <span className="text-muted-foreground">
                          {itemConfig?.label || item.name}
                        </span>
                      </div>
                      {item.value && (
                        <span className="text-foreground font-mono font-medium tabular-nums">
                          {item.value.toLocaleString()}
                        </span>
                      )}
                    </div>
                  </>
                )}
              </div>
            );
          })}
      </div>
    </div>
  );
}

const ChartLegend = RechartsPrimitive.Legend;

function ChartLegendContent({
  className,
  hideIcon = false,
  payload,
  verticalAlign = "bottom",
  nameKey,
}: React.ComponentProps<"div"> &
  Pick<RechartsPrimitive.LegendProps, "payload" | "verticalAlign"> & {
    hideIcon?: boolean;
    nameKey?: string;
  }) {
  const { config } = useChart();

  if (!payload?.length) {
    return null;
  }

  return (
    <div
      className={cn(
        "flex items-center justify-center gap-4",
        verticalAlign === "top" ? "pb-3" : "pt-3",
        className,
      )}
    >
      {payload
        .filter((item) => item.type !== "none")
        .map((item) => {
          const key = `${nameKey || item.dataKey || "value"}`;
          const itemConfig = getPayloadConfigFromPayload(config, item, key);

          return (
            <div
              key={item.value}
              className={cn(
                "[&>svg]:text-muted-foreground flex items-center gap-1.5 [&>svg]:h-3 [&>svg]:w-3",
              )}
            >
              {itemConfig?.icon && !hideIcon ? (
                <itemConfig.icon />
              ) : (
                <div
                  className="h-2 w-2 shrink-0 rounded-[2px]"
                  style={{
                    backgroundColor: item.color,
                  }}
                />
              )}
              {itemConfig?.label}
            </div>
          );
        })}
    </div>
  );
}

function getPayloadConfigFromPayload(
  config: ChartConfig,
  payload: unknown,
  key: string,
) {
  if (typeof payload !== "object" || payload === null) {
    return undefined;
  }

  const payloadPayload =
    "payload" in payload &&
    typeof payload.payload === "object" &&
    payload.payload !== null
      ? payload.payload
      : undefined;

  let configLabelKey: string = key;

  if (
    key in payload &&
    typeof payload[key as keyof typeof payload] === "string"
  ) {
    configLabelKey = payload[key as keyof typeof payload] as string;
  } else if (
    payloadPayload &&
    key in payloadPayload &&
    typeof payloadPayload[key as keyof typeof payloadPayload] === "string"
  ) {
    configLabelKey = payloadPayload[
      key as keyof typeof payloadPayload
    ] as string;
  }

  return configLabelKey in config
    ? config[configLabelKey]
    : config[key as keyof typeof config];
}

export {
  ChartContainer,
  ChartTooltip,
  ChartTooltipContent,
  ChartLegend,
  ChartLegendContent,
  ChartStyle,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/checkbox.tsx
================================================
"use client";

import * as React from "react";
import { Checkbox as CheckboxPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { CheckIcon } from "lucide-react";

function Checkbox({
  className,
  ...props
}: React.ComponentProps<typeof CheckboxPrimitive.Root>) {
  return (
    <CheckboxPrimitive.Root
      data-slot="checkbox"
      className={cn(
        "border-input dark:bg-input/30 data-checked:bg-primary data-checked:text-primary-foreground dark:data-checked:bg-primary data-checked:border-primary aria-invalid:aria-checked:border-primary aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 flex size-4 items-center justify-center rounded-[4px] border transition-colors group-has-disabled/field:opacity-50 focus-visible:ring-[3px] aria-invalid:ring-[3px] peer relative shrink-0 outline-none after:absolute after:-inset-x-3 after:-inset-y-2 disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    >
      <CheckboxPrimitive.Indicator
        data-slot="checkbox-indicator"
        className="[&>svg]:size-3.5 grid place-content-center text-current transition-none"
      >
        <CheckIcon />
      </CheckboxPrimitive.Indicator>
    </CheckboxPrimitive.Root>
  );
}

export { Checkbox };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/collapsible.tsx
================================================
"use client";

import { Collapsible as CollapsiblePrimitive } from "radix-ui";

function Collapsible({
  ...props
}: React.ComponentProps<typeof CollapsiblePrimitive.Root>) {
  return <CollapsiblePrimitive.Root data-slot="collapsible" {...props} />;
}

function CollapsibleTrigger({
  ...props
}: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleTrigger>) {
  return (
    <CollapsiblePrimitive.CollapsibleTrigger
      data-slot="collapsible-trigger"
      {...props}
    />
  );
}

function CollapsibleContent({
  ...props
}: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleContent>) {
  return (
    <CollapsiblePrimitive.CollapsibleContent
      data-slot="collapsible-content"
      {...props}
    />
  );
}

export { Collapsible, CollapsibleTrigger, CollapsibleContent };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/combobox.tsx
================================================
"use client";

import * as React from "react";
import { Combobox as ComboboxPrimitive } from "@base-ui/react";

import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
import {
  InputGroup,
  InputGroupAddon,
  InputGroupButton,
  InputGroupInput,
} from "@/components/ui/input-group";
import { ChevronDownIcon, XIcon, CheckIcon } from "lucide-react";

const Combobox = ComboboxPrimitive.Root;

function ComboboxValue({ ...props }: ComboboxPrimitive.Value.Props) {
  return <ComboboxPrimitive.Value data-slot="combobox-value" {...props} />;
}

function ComboboxTrigger({
  className,
  children,
  ...props
}: ComboboxPrimitive.Trigger.Props) {
  return (
    <ComboboxPrimitive.Trigger
      data-slot="combobox-trigger"
      className={cn("[&_svg:not([class*='size-'])]:size-4", className)}
      {...props}
    >
      {children}
      <ChevronDownIcon className="text-muted-foreground size-4 pointer-events-none" />
    </ComboboxPrimitive.Trigger>
  );
}

function ComboboxClear({ className, ...props }: ComboboxPrimitive.Clear.Props) {
  return (
    <ComboboxPrimitive.Clear
      data-slot="combobox-clear"
      render={<InputGroupButton variant="ghost" size="icon-xs" />}
      className={cn(className)}
      {...props}
    >
      <XIcon className="pointer-events-none" />
    </ComboboxPrimitive.Clear>
  );
}

function ComboboxInput({
  className,
  children,
  disabled = false,
  showTrigger = true,
  showClear = false,
  ...props
}: ComboboxPrimitive.Input.Props & {
  showTrigger?: boolean;
  showClear?: boolean;
}) {
  return (
    <InputGroup className={cn("w-auto", className)}>
      <ComboboxPrimitive.Input
        render={<InputGroupInput disabled={disabled} />}
        {...props}
      />
      <InputGroupAddon align="inline-end">
        {showTrigger && (
          <InputGroupButton
            size="icon-xs"
            variant="ghost"
            asChild
            data-slot="input-group-button"
            className="group-has-data-[slot=combobox-clear]/input-group:hidden data-pressed:bg-transparent"
            disabled={disabled}
          >
            <ComboboxTrigger />
          </InputGroupButton>
        )}
        {showClear && <ComboboxClear disabled={disabled} />}
      </InputGroupAddon>
      {children}
    </InputGroup>
  );
}

function ComboboxContent({
  className,
  side = "bottom",
  sideOffset = 6,
  align = "start",
  alignOffset = 0,
  anchor,
  ...props
}: ComboboxPrimitive.Popup.Props &
  Pick<
    ComboboxPrimitive.Positioner.Props,
    "side" | "align" | "sideOffset" | "alignOffset" | "anchor"
  >) {
  return (
    <ComboboxPrimitive.Portal>
      <ComboboxPrimitive.Positioner
        side={side}
        sideOffset={sideOffset}
        align={align}
        alignOffset={alignOffset}
        anchor={anchor}
        className="isolate z-50"
      >
        <ComboboxPrimitive.Popup
          data-slot="combobox-content"
          data-chips={!!anchor}
          className={cn(
            "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 *:data-[slot=input-group]:bg-input/30 *:data-[slot=input-group]:border-input/30 max-h-72 min-w-36 overflow-hidden rounded-lg shadow-md ring-1 duration-100 *:data-[slot=input-group]:m-1 *:data-[slot=input-group]:mb-0 *:data-[slot=input-group]:h-8 *:data-[slot=input-group]:shadow-none group/combobox-content relative max-h-(--available-height) w-(--anchor-width) max-w-(--available-width) min-w-[calc(var(--anchor-width)+--spacing(7))] origin-(--transform-origin) data-[chips=true]:min-w-(--anchor-width)",
            className,
          )}
          {...props}
        />
      </ComboboxPrimitive.Positioner>
    </ComboboxPrimitive.Portal>
  );
}

function ComboboxList({ className, ...props }: ComboboxPrimitive.List.Props) {
  return (
    <ComboboxPrimitive.List
      data-slot="combobox-list"
      className={cn(
        "no-scrollbar max-h-[min(calc(--spacing(72)---spacing(9)),calc(var(--available-height)---spacing(9)))] scroll-py-1 overflow-y-auto p-1 data-empty:p-0 overflow-y-auto overscroll-contain",
        className,
      )}
      {...props}
    />
  );
}

function ComboboxItem({
  className,
  children,
  ...props
}: ComboboxPrimitive.Item.Props) {
  return (
    <ComboboxPrimitive.Item
      data-slot="combobox-item"
      className={cn(
        "data-highlighted:bg-accent data-highlighted:text-accent-foreground not-data-[variant=destructive]:data-highlighted:**:text-accent-foreground gap-2 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex w-full cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      {children}
      <ComboboxPrimitive.ItemIndicator
        render={
          <span className="pointer-events-none absolute right-2 flex size-4 items-center justify-center" />
        }
      >
        <CheckIcon className="pointer-events-none" />
      </ComboboxPrimitive.ItemIndicator>
    </ComboboxPrimitive.Item>
  );
}

function ComboboxGroup({ className, ...props }: ComboboxPrimitive.Group.Props) {
  return (
    <ComboboxPrimitive.Group
      data-slot="combobox-group"
      className={cn(className)}
      {...props}
    />
  );
}

function ComboboxLabel({
  className,
  ...props
}: ComboboxPrimitive.GroupLabel.Props) {
  return (
    <ComboboxPrimitive.GroupLabel
      data-slot="combobox-label"
      className={cn("text-muted-foreground px-2 py-1.5 text-xs", className)}
      {...props}
    />
  );
}

function ComboboxCollection({ ...props }: ComboboxPrimitive.Collection.Props) {
  return (
    <ComboboxPrimitive.Collection data-slot="combobox-collection" {...props} />
  );
}

function ComboboxEmpty({ className, ...props }: ComboboxPrimitive.Empty.Props) {
  return (
    <ComboboxPrimitive.Empty
      data-slot="combobox-empty"
      className={cn(
        "text-muted-foreground hidden w-full justify-center py-2 text-center text-sm group-data-empty/combobox-content:flex",
        className,
      )}
      {...props}
    />
  );
}

function ComboboxSeparator({
  className,
  ...props
}: ComboboxPrimitive.Separator.Props) {
  return (
    <ComboboxPrimitive.Separator
      data-slot="combobox-separator"
      className={cn("bg-border -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
}

function ComboboxChips({
  className,
  ...props
}: React.ComponentPropsWithRef<typeof ComboboxPrimitive.Chips> &
  ComboboxPrimitive.Chips.Props) {
  return (
    <ComboboxPrimitive.Chips
      data-slot="combobox-chips"
      className={cn(
        "dark:bg-input/30 border-input focus-within:border-ring focus-within:ring-ring/50 has-aria-invalid:ring-destructive/20 dark:has-aria-invalid:ring-destructive/40 has-aria-invalid:border-destructive dark:has-aria-invalid:border-destructive/50 flex min-h-8 flex-wrap items-center gap-1 rounded-lg border bg-transparent bg-clip-padding px-2.5 py-1 text-sm transition-colors focus-within:ring-[3px] has-aria-invalid:ring-[3px] has-data-[slot=combobox-chip]:px-1",
        className,
      )}
      {...props}
    />
  );
}

function ComboboxChip({
  className,
  children,
  showRemove = true,
  ...props
}: ComboboxPrimitive.Chip.Props & {
  showRemove?: boolean;
}) {
  return (
    <ComboboxPrimitive.Chip
      data-slot="combobox-chip"
      className={cn(
        "bg-muted text-foreground flex h-[calc(--spacing(5.25))] w-fit items-center justify-center gap-1 rounded-sm px-1.5 text-xs font-medium whitespace-nowrap has-data-[slot=combobox-chip-remove]:pr-0 has-disabled:pointer-events-none has-disabled:cursor-not-allowed has-disabled:opacity-50",
        className,
      )}
      {...props}
    >
      {children}
      {showRemove && (
        <ComboboxPrimitive.ChipRemove
          render={<Button variant="ghost" size="icon-xs" />}
          className="-ml-1 opacity-50 hover:opacity-100"
          data-slot="combobox-chip-remove"
        >
          <XIcon className="pointer-events-none" />
        </ComboboxPrimitive.ChipRemove>
      )}
    </ComboboxPrimitive.Chip>
  );
}

function ComboboxChipsInput({
  className,
  ...props
}: ComboboxPrimitive.Input.Props) {
  return (
    <ComboboxPrimitive.Input
      data-slot="combobox-chip-input"
      className={cn("min-w-16 flex-1 outline-none", className)}
      {...props}
    />
  );
}

function useComboboxAnchor() {
  return React.useRef<HTMLDivElement | null>(null);
}

export {
  Combobox,
  ComboboxInput,
  ComboboxContent,
  ComboboxList,
  ComboboxItem,
  ComboboxGroup,
  ComboboxLabel,
  ComboboxCollection,
  ComboboxEmpty,
  ComboboxSeparator,
  ComboboxChips,
  ComboboxChip,
  ComboboxChipsInput,
  ComboboxTrigger,
  ComboboxValue,
  useComboboxAnchor,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/command.tsx
================================================
"use client";

import * as React from "react";
import { Command as CommandPrimitive } from "cmdk";

import { cn } from "@/lib/utils";
import {
  Dialog,
  DialogContent,
  DialogDescription,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import { InputGroup, InputGroupAddon } from "@/components/ui/input-group";
import { SearchIcon, CheckIcon } from "lucide-react";

function Command({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive>) {
  return (
    <CommandPrimitive
      data-slot="command"
      className={cn(
        "bg-popover text-popover-foreground rounded-xl! p-1 flex size-full flex-col overflow-hidden",
        className,
      )}
      {...props}
    />
  );
}

function CommandDialog({
  title = "Command Palette",
  description = "Search for a command to run...",
  children,
  className,
  showCloseButton = false,
  ...props
}: React.ComponentProps<typeof Dialog> & {
  title?: string;
  description?: string;
  className?: string;
  showCloseButton?: boolean;
}) {
  return (
    <Dialog {...props}>
      <DialogHeader className="sr-only">
        <DialogTitle>{title}</DialogTitle>
        <DialogDescription>{description}</DialogDescription>
      </DialogHeader>
      <DialogContent
        className={cn(
          "rounded-xl! top-1/3 translate-y-0 overflow-hidden p-0",
          className,
        )}
        showCloseButton={showCloseButton}
      >
        {children}
      </DialogContent>
    </Dialog>
  );
}

function CommandInput({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Input>) {
  return (
    <div data-slot="command-input-wrapper" className="p-1 pb-0">
      <InputGroup className="bg-input/30 border-input/30 h-8! rounded-lg! shadow-none! *:data-[slot=input-group-addon]:pl-2!">
        <CommandPrimitive.Input
          data-slot="command-input"
          className={cn(
            "w-full text-sm outline-hidden disabled:cursor-not-allowed disabled:opacity-50",
            className,
          )}
          {...props}
        />
        <InputGroupAddon>
          <SearchIcon className="size-4 shrink-0 opacity-50" />
        </InputGroupAddon>
      </InputGroup>
    </div>
  );
}

function CommandList({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.List>) {
  return (
    <CommandPrimitive.List
      data-slot="command-list"
      className={cn(
        "no-scrollbar max-h-72 scroll-py-1 outline-none overflow-x-hidden overflow-y-auto",
        className,
      )}
      {...props}
    />
  );
}

function CommandEmpty({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Empty>) {
  return (
    <CommandPrimitive.Empty
      data-slot="command-empty"
      className={cn("py-6 text-center text-sm", className)}
      {...props}
    />
  );
}

function CommandGroup({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Group>) {
  return (
    <CommandPrimitive.Group
      data-slot="command-group"
      className={cn(
        "text-foreground [&_[cmdk-group-heading]]:text-muted-foreground overflow-hidden p-1 [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:py-1.5 [&_[cmdk-group-heading]]:text-xs [&_[cmdk-group-heading]]:font-medium",
        className,
      )}
      {...props}
    />
  );
}

function CommandSeparator({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Separator>) {
  return (
    <CommandPrimitive.Separator
      data-slot="command-separator"
      className={cn("bg-border -mx-1 h-px", className)}
      {...props}
    />
  );
}

function CommandItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Item>) {
  return (
    <CommandPrimitive.Item
      data-slot="command-item"
      className={cn(
        "data-selected:bg-muted data-selected:text-foreground data-selected:*:[svg]:text-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none [&_svg:not([class*='size-'])]:size-4 [[data-slot=dialog-content]_&]:rounded-lg! group/command-item data-[disabled=true]:pointer-events-none data-[disabled=true]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      {children}
      <CheckIcon className="ml-auto opacity-0 group-has-[[data-slot=command-shortcut]]/command-item:hidden group-data-[checked=true]/command-item:opacity-100" />
    </CommandPrimitive.Item>
  );
}

function CommandShortcut({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="command-shortcut"
      className={cn(
        "text-muted-foreground group-data-selected/command-item:text-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
}

export {
  Command,
  CommandDialog,
  CommandInput,
  CommandList,
  CommandEmpty,
  CommandGroup,
  CommandItem,
  CommandShortcut,
  CommandSeparator,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/context-menu.tsx
================================================
"use client";

import * as React from "react";
import { ContextMenu as ContextMenuPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { ChevronRightIcon, CheckIcon } from "lucide-react";

function ContextMenu({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Root>) {
  return <ContextMenuPrimitive.Root data-slot="context-menu" {...props} />;
}

function ContextMenuTrigger({
  className,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Trigger>) {
  return (
    <ContextMenuPrimitive.Trigger
      data-slot="context-menu-trigger"
      className={cn("select-none", className)}
      {...props}
    />
  );
}

function ContextMenuGroup({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Group>) {
  return (
    <ContextMenuPrimitive.Group data-slot="context-menu-group" {...props} />
  );
}

function ContextMenuPortal({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Portal>) {
  return (
    <ContextMenuPrimitive.Portal data-slot="context-menu-portal" {...props} />
  );
}

function ContextMenuSub({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Sub>) {
  return <ContextMenuPrimitive.Sub data-slot="context-menu-sub" {...props} />;
}

function ContextMenuRadioGroup({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.RadioGroup>) {
  return (
    <ContextMenuPrimitive.RadioGroup
      data-slot="context-menu-radio-group"
      {...props}
    />
  );
}

function ContextMenuContent({
  className,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Content> & {
  side?: "top" | "right" | "bottom" | "left";
}) {
  return (
    <ContextMenuPrimitive.Portal>
      <ContextMenuPrimitive.Content
        data-slot="context-menu-content"
        className={cn(
          "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 bg-popover text-popover-foreground min-w-36 rounded-lg p-1 shadow-md ring-1 duration-100 z-50 max-h-(--radix-context-menu-content-available-height) origin-(--radix-context-menu-content-transform-origin) overflow-x-hidden overflow-y-auto",
          className,
        )}
        {...props}
      />
    </ContextMenuPrimitive.Portal>
  );
}

function ContextMenuItem({
  className,
  inset,
  variant = "default",
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Item> & {
  inset?: boolean;
  variant?: "default" | "destructive";
}) {
  return (
    <ContextMenuPrimitive.Item
      data-slot="context-menu-item"
      data-inset={inset}
      data-variant={variant}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:text-destructive focus:*:[svg]:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm [&_svg:not([class*='size-'])]:size-4 group/context-menu-item relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    />
  );
}

function ContextMenuSubTrigger({
  className,
  inset,
  children,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.SubTrigger> & {
  inset?: boolean;
}) {
  return (
    <ContextMenuPrimitive.SubTrigger
      data-slot="context-menu-sub-trigger"
      data-inset={inset}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-open:bg-accent data-open:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm [&_svg:not([class*='size-'])]:size-4 flex cursor-default items-center outline-hidden select-none data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      {children}
      <ChevronRightIcon className="ml-auto" />
    </ContextMenuPrimitive.SubTrigger>
  );
}

function ContextMenuSubContent({
  className,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.SubContent>) {
  return (
    <ContextMenuPrimitive.SubContent
      data-slot="context-menu-sub-content"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 bg-popover text-popover-foreground min-w-32 rounded-lg border p-1 shadow-lg duration-100 z-50 origin-(--radix-context-menu-content-transform-origin) overflow-hidden",
        className,
      )}
      {...props}
    />
  );
}

function ContextMenuCheckboxItem({
  className,
  children,
  checked,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.CheckboxItem>) {
  return (
    <ContextMenuPrimitive.CheckboxItem
      data-slot="context-menu-checkbox-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      checked={checked}
      {...props}
    >
      <span className="absolute right-2 pointer-events-none">
        <ContextMenuPrimitive.ItemIndicator>
          <CheckIcon />
        </ContextMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </ContextMenuPrimitive.CheckboxItem>
  );
}

function ContextMenuRadioItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.RadioItem>) {
  return (
    <ContextMenuPrimitive.RadioItem
      data-slot="context-menu-radio-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      <span className="absolute right-2 pointer-events-none">
        <ContextMenuPrimitive.ItemIndicator>
          <CheckIcon />
        </ContextMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </ContextMenuPrimitive.RadioItem>
  );
}

function ContextMenuLabel({
  className,
  inset,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Label> & {
  inset?: boolean;
}) {
  return (
    <ContextMenuPrimitive.Label
      data-slot="context-menu-label"
      data-inset={inset}
      className={cn(
        "text-muted-foreground px-1.5 py-1 text-xs font-medium data-[inset]:pl-8",
        className,
      )}
      {...props}
    />
  );
}

function ContextMenuSeparator({
  className,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Separator>) {
  return (
    <ContextMenuPrimitive.Separator
      data-slot="context-menu-separator"
      className={cn("bg-border -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
}

function ContextMenuShortcut({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="context-menu-shortcut"
      className={cn(
        "text-muted-foreground group-focus/context-menu-item:text-accent-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
}

export {
  ContextMenu,
  ContextMenuTrigger,
  ContextMenuContent,
  ContextMenuItem,
  ContextMenuCheckboxItem,
  ContextMenuRadioItem,
  ContextMenuLabel,
  ContextMenuSeparator,
  ContextMenuShortcut,
  ContextMenuGroup,
  ContextMenuPortal,
  ContextMenuSub,
  ContextMenuSubContent,
  ContextMenuSubTrigger,
  ContextMenuRadioGroup,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/dialog.tsx
================================================
"use client";

import * as React from "react";
import { Dialog as DialogPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
import { XIcon } from "lucide-react";

function Dialog({
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Root>) {
  return <DialogPrimitive.Root data-slot="dialog" {...props} />;
}

function DialogTrigger({
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Trigger>) {
  return <DialogPrimitive.Trigger data-slot="dialog-trigger" {...props} />;
}

function DialogPortal({
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Portal>) {
  return <DialogPrimitive.Portal data-slot="dialog-portal" {...props} />;
}

function DialogClose({
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Close>) {
  return <DialogPrimitive.Close data-slot="dialog-close" {...props} />;
}

function DialogOverlay({
  className,
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Overlay>) {
  return (
    <DialogPrimitive.Overlay
      data-slot="dialog-overlay"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/10 duration-100 supports-backdrop-filter:backdrop-blur-xs fixed inset-0 isolate z-50",
        className,
      )}
      {...props}
    />
  );
}

function DialogContent({
  className,
  children,
  showCloseButton = true,
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Content> & {
  showCloseButton?: boolean;
}) {
  return (
    <DialogPortal>
      <DialogOverlay />
      <DialogPrimitive.Content
        data-slot="dialog-content"
        className={cn(
          "bg-background data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 ring-foreground/10 grid max-w-[calc(100%-2rem)] gap-4 rounded-xl p-4 text-sm ring-1 duration-100 sm:max-w-sm fixed top-1/2 left-1/2 z-50 w-full -translate-x-1/2 -translate-y-1/2",
          className,
        )}
        {...props}
      >
        {children}
        {showCloseButton && (
          <DialogPrimitive.Close data-slot="dialog-close" asChild>
            <Button
              variant="ghost"
              className="absolute top-2 right-2"
              size="icon-sm"
            >
              <XIcon />
              <span className="sr-only">Close</span>
            </Button>
          </DialogPrimitive.Close>
        )}
      </DialogPrimitive.Content>
    </DialogPortal>
  );
}

function DialogHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="dialog-header"
      className={cn("gap-2 flex flex-col", className)}
      {...props}
    />
  );
}

function DialogFooter({
  className,
  showCloseButton = false,
  children,
  ...props
}: React.ComponentProps<"div"> & {
  showCloseButton?: boolean;
}) {
  return (
    <div
      data-slot="dialog-footer"
      className={cn(
        "bg-muted/50 -mx-4 -mb-4 rounded-b-xl border-t p-4 flex flex-col-reverse gap-2 sm:flex-row sm:justify-end",
        className,
      )}
      {...props}
    >
      {children}
      {showCloseButton && (
        <DialogPrimitive.Close asChild>
          <Button variant="outline">Close</Button>
        </DialogPrimitive.Close>
      )}
    </div>
  );
}

function DialogTitle({
  className,
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Title>) {
  return (
    <DialogPrimitive.Title
      data-slot="dialog-title"
      className={cn("text-base leading-none font-medium", className)}
      {...props}
    />
  );
}

function DialogDescription({
  className,
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Description>) {
  return (
    <DialogPrimitive.Description
      data-slot="dialog-description"
      className={cn(
        "text-muted-foreground *:[a]:hover:text-foreground text-sm *:[a]:underline *:[a]:underline-offset-3",
        className,
      )}
      {...props}
    />
  );
}

export {
  Dialog,
  DialogClose,
  DialogContent,
  DialogDescription,
  DialogFooter,
  DialogHeader,
  DialogOverlay,
  DialogPortal,
  DialogTitle,
  DialogTrigger,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/drawer.tsx
================================================
"use client";

import * as React from "react";
import { Drawer as DrawerPrimitive } from "vaul";

import { cn } from "@/lib/utils";

function Drawer({
  ...props
}: React.ComponentProps<typeof DrawerPrimitive.Root>) {
  return <DrawerPrimitive.Root data-slot="drawer" {...props} />;
}

function DrawerTrigger({
  ...props
}: React.ComponentProps<typeof DrawerPrimitive.Trigger>) {
  return <DrawerPrimitive.Trigger data-slot="drawer-trigger" {...props} />;
}

function DrawerPortal({
  ...props
}: React.ComponentProps<typeof DrawerPrimitive.Portal>) {
  return <DrawerPrimitive.Portal data-slot="drawer-portal" {...props} />;
}

function DrawerClose({
  ...props
}: React.ComponentProps<typeof DrawerPrimitive.Close>) {
  return <DrawerPrimitive.Close data-slot="drawer-close" {...props} />;
}

function DrawerOverlay({
  className,
  ...props
}: React.ComponentProps<typeof DrawerPrimitive.Overlay>) {
  return (
    <DrawerPrimitive.Overlay
      data-slot="drawer-overlay"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/10 supports-backdrop-filter:backdrop-blur-xs fixed inset-0 z-50",
        className,
      )}
      {...props}
    />
  );
}

function DrawerContent({
  className,
  children,
  ...props
}: React.ComponentProps<typeof DrawerPrimitive.Content>) {
  return (
    <DrawerPortal data-slot="drawer-portal">
      <DrawerOverlay />
      <DrawerPrimitive.Content
        data-slot="drawer-content"
        className={cn(
          "bg-background flex h-auto flex-col text-sm data-[vaul-drawer-direction=bottom]:inset-x-0 data-[vaul-drawer-direction=bottom]:bottom-0 data-[vaul-drawer-direction=bottom]:mt-24 data-[vaul-drawer-direction=bottom]:max-h-[80vh] data-[vaul-drawer-direction=bottom]:rounded-t-xl data-[vaul-drawer-direction=bottom]:border-t data-[vaul-drawer-direction=left]:inset-y-0 data-[vaul-drawer-direction=left]:left-0 data-[vaul-drawer-direction=left]:w-3/4 data-[vaul-drawer-direction=left]:rounded-r-xl data-[vaul-drawer-direction=left]:border-r data-[vaul-drawer-direction=right]:inset-y-0 data-[vaul-drawer-direction=right]:right-0 data-[vaul-drawer-direction=right]:w-3/4 data-[vaul-drawer-direction=right]:rounded-l-xl data-[vaul-drawer-direction=right]:border-l data-[vaul-drawer-direction=top]:inset-x-0 data-[vaul-drawer-direction=top]:top-0 data-[vaul-drawer-direction=top]:mb-24 data-[vaul-drawer-direction=top]:max-h-[80vh] data-[vaul-drawer-direction=top]:rounded-b-xl data-[vaul-drawer-direction=top]:border-b data-[vaul-drawer-direction=left]:sm:max-w-sm data-[vaul-drawer-direction=right]:sm:max-w-sm group/drawer-content fixed z-50",
          className,
        )}
        {...props}
      >
        <div className="bg-muted mx-auto mt-4 hidden h-1 w-[100px] shrink-0 rounded-full group-data-[vaul-drawer-direction=bottom]/drawer-content:block bg-muted mx-auto hidden shrink-0 group-data-[vaul-drawer-direction=bottom]/drawer-content:block" />
        {children}
      </DrawerPrimitive.Content>
    </DrawerPortal>
  );
}

function DrawerHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="drawer-header"
      className={cn(
        "gap-0.5 p-4 group-data-[vaul-drawer-direction=bottom]/drawer-content:text-center group-data-[vaul-drawer-direction=top]/drawer-content:text-center md:gap-0.5 md:text-left flex flex-col",
        className,
      )}
      {...props}
    />
  );
}

function DrawerFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="drawer-footer"
      className={cn("gap-2 p-4 mt-auto flex flex-col", className)}
      {...props}
    />
  );
}

function DrawerTitle({
  className,
  ...props
}: React.ComponentProps<typeof DrawerPrimitive.Title>) {
  return (
    <DrawerPrimitive.Title
      data-slot="drawer-title"
      className={cn("text-foreground text-base font-medium", className)}
      {...props}
    />
  );
}

function DrawerDescription({
  className,
  ...props
}: React.ComponentProps<typeof DrawerPrimitive.Description>) {
  return (
    <DrawerPrimitive.Description
      data-slot="drawer-description"
      className={cn("text-muted-foreground text-sm", className)}
      {...props}
    />
  );
}

export {
  Drawer,
  DrawerPortal,
  DrawerOverlay,
  DrawerTrigger,
  DrawerClose,
  DrawerContent,
  DrawerHeader,
  DrawerFooter,
  DrawerTitle,
  DrawerDescription,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/dropdown-menu.tsx
================================================
"use client";

import * as React from "react";
import { DropdownMenu as DropdownMenuPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { CheckIcon, ChevronRightIcon } from "lucide-react";

function DropdownMenu({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Root>) {
  return <DropdownMenuPrimitive.Root data-slot="dropdown-menu" {...props} />;
}

function DropdownMenuPortal({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Portal>) {
  return (
    <DropdownMenuPrimitive.Portal data-slot="dropdown-menu-portal" {...props} />
  );
}

function DropdownMenuTrigger({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Trigger>) {
  return (
    <DropdownMenuPrimitive.Trigger
      data-slot="dropdown-menu-trigger"
      {...props}
    />
  );
}

function DropdownMenuContent({
  className,
  align = "start",
  sideOffset = 4,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Content>) {
  return (
    <DropdownMenuPrimitive.Portal>
      <DropdownMenuPrimitive.Content
        data-slot="dropdown-menu-content"
        sideOffset={sideOffset}
        align={align}
        className={cn(
          "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 bg-popover text-popover-foreground min-w-32 rounded-lg p-1 shadow-md ring-1 duration-100 z-50 max-h-(--radix-dropdown-menu-content-available-height) w-(--radix-dropdown-menu-trigger-width) origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto data-[state=closed]:overflow-hidden",
          className,
        )}
        {...props}
      />
    </DropdownMenuPrimitive.Portal>
  );
}

function DropdownMenuGroup({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Group>) {
  return (
    <DropdownMenuPrimitive.Group data-slot="dropdown-menu-group" {...props} />
  );
}

function DropdownMenuItem({
  className,
  inset,
  variant = "default",
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Item> & {
  inset?: boolean;
  variant?: "default" | "destructive";
}) {
  return (
    <DropdownMenuPrimitive.Item
      data-slot="dropdown-menu-item"
      data-inset={inset}
      data-variant={variant}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:text-destructive not-data-[variant=destructive]:focus:**:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm [&_svg:not([class*='size-'])]:size-4 group/dropdown-menu-item relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    />
  );
}

function DropdownMenuCheckboxItem({
  className,
  children,
  checked,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.CheckboxItem>) {
  return (
    <DropdownMenuPrimitive.CheckboxItem
      data-slot="dropdown-menu-checkbox-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      checked={checked}
      {...props}
    >
      <span
        className="pointer-events-none absolute right-2 flex items-center justify-center pointer-events-none"
        data-slot="dropdown-menu-checkbox-item-indicator"
      >
        <DropdownMenuPrimitive.ItemIndicator>
          <CheckIcon />
        </DropdownMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </DropdownMenuPrimitive.CheckboxItem>
  );
}

function DropdownMenuRadioGroup({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioGroup>) {
  return (
    <DropdownMenuPrimitive.RadioGroup
      data-slot="dropdown-menu-radio-group"
      {...props}
    />
  );
}

function DropdownMenuRadioItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioItem>) {
  return (
    <DropdownMenuPrimitive.RadioItem
      data-slot="dropdown-menu-radio-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      <span
        className="pointer-events-none absolute right-2 flex items-center justify-center pointer-events-none"
        data-slot="dropdown-menu-radio-item-indicator"
      >
        <DropdownMenuPrimitive.ItemIndicator>
          <CheckIcon />
        </DropdownMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </DropdownMenuPrimitive.RadioItem>
  );
}

function DropdownMenuLabel({
  className,
  inset,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Label> & {
  inset?: boolean;
}) {
  return (
    <DropdownMenuPrimitive.Label
      data-slot="dropdown-menu-label"
      data-inset={inset}
      className={cn(
        "text-muted-foreground px-1.5 py-1 text-xs font-medium data-[inset]:pl-8",
        className,
      )}
      {...props}
    />
  );
}

function DropdownMenuSeparator({
  className,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Separator>) {
  return (
    <DropdownMenuPrimitive.Separator
      data-slot="dropdown-menu-separator"
      className={cn("bg-border -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
}

function DropdownMenuShortcut({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="dropdown-menu-shortcut"
      className={cn(
        "text-muted-foreground group-focus/dropdown-menu-item:text-accent-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
}

function DropdownMenuSub({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Sub>) {
  return <DropdownMenuPrimitive.Sub data-slot="dropdown-menu-sub" {...props} />;
}

function DropdownMenuSubTrigger({
  className,
  inset,
  children,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.SubTrigger> & {
  inset?: boolean;
}) {
  return (
    <DropdownMenuPrimitive.SubTrigger
      data-slot="dropdown-menu-sub-trigger"
      data-inset={inset}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-open:bg-accent data-open:text-accent-foreground not-data-[variant=destructive]:focus:**:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm [&_svg:not([class*='size-'])]:size-4 flex cursor-default items-center outline-hidden select-none data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      {children}
      <ChevronRightIcon className="ml-auto" />
    </DropdownMenuPrimitive.SubTrigger>
  );
}

function DropdownMenuSubContent({
  className,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.SubContent>) {
  return (
    <DropdownMenuPrimitive.SubContent
      data-slot="dropdown-menu-sub-content"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 bg-popover text-popover-foreground min-w-[96px] rounded-md p-1 shadow-lg ring-1 duration-100 z-50 origin-(--radix-dropdown-menu-content-transform-origin) overflow-hidden",
        className,
      )}
      {...props}
    />
  );
}

export {
  DropdownMenu,
  DropdownMenuPortal,
  DropdownMenuTrigger,
  DropdownMenuContent,
  DropdownMenuGroup,
  DropdownMenuLabel,
  DropdownMenuItem,
  DropdownMenuCheckboxItem,
  DropdownMenuRadioGroup,
  DropdownMenuRadioItem,
  DropdownMenuSeparator,
  DropdownMenuShortcut,
  DropdownMenuSub,
  DropdownMenuSubTrigger,
  DropdownMenuSubContent,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/empty.tsx
================================================
import { cva, type VariantProps } from "class-variance-authority";

import { cn } from "@/lib/utils";

function Empty({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="empty"
      className={cn(
        "gap-4 rounded-lg border-dashed p-6 flex w-full min-w-0 flex-1 flex-col items-center justify-center text-center text-balance",
        className,
      )}
      {...props}
    />
  );
}

function EmptyHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="empty-header"
      className={cn("gap-2 flex max-w-sm flex-col items-center", className)}
      {...props}
    />
  );
}

const emptyMediaVariants = cva(
  "mb-2 flex shrink-0 items-center justify-center [&_svg]:pointer-events-none [&_svg]:shrink-0",
  {
    variants: {
      variant: {
        default: "bg-transparent",
        icon: "bg-muted text-foreground flex size-8 shrink-0 items-center justify-center rounded-lg [&_svg:not([class*='size-'])]:size-4",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
);

function EmptyMedia({
  className,
  variant = "default",
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof emptyMediaVariants>) {
  return (
    <div
      data-slot="empty-icon"
      data-variant={variant}
      className={cn(emptyMediaVariants({ variant, className }))}
      {...props}
    />
  );
}

function EmptyTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="empty-title"
      className={cn("text-sm font-medium tracking-tight", className)}
      {...props}
    />
  );
}

function EmptyDescription({ className, ...props }: React.ComponentProps<"p">) {
  return (
    <div
      data-slot="empty-description"
      className={cn(
        "text-sm/relaxed text-muted-foreground [&>a:hover]:text-primary [&>a]:underline [&>a]:underline-offset-4",
        className,
      )}
      {...props}
    />
  );
}

function EmptyContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="empty-content"
      className={cn(
        "gap-2.5 text-sm flex w-full max-w-sm min-w-0 flex-col items-center text-balance",
        className,
      )}
      {...props}
    />
  );
}

export {
  Empty,
  EmptyHeader,
  EmptyTitle,
  EmptyDescription,
  EmptyContent,
  EmptyMedia,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/field.tsx
================================================
"use client";

import { useMemo } from "react";
import { cva, type VariantProps } from "class-variance-authority";

import { cn } from "@/lib/utils";
import { Label } from "@/components/ui/label";
import { Separator } from "@/components/ui/separator";

function FieldSet({ className, ...props }: React.ComponentProps<"fieldset">) {
  return (
    <fieldset
      data-slot="field-set"
      className={cn(
        "gap-4 has-[>[data-slot=checkbox-group]]:gap-3 has-[>[data-slot=radio-group]]:gap-3 flex flex-col",
        className,
      )}
      {...props}
    />
  );
}

function FieldLegend({
  className,
  variant = "legend",
  ...props
}: React.ComponentProps<"legend"> & { variant?: "legend" | "label" }) {
  return (
    <legend
      data-slot="field-legend"
      data-variant={variant}
      className={cn(
        "mb-1.5 font-medium data-[variant=label]:text-sm data-[variant=legend]:text-base",
        className,
      )}
      {...props}
    />
  );
}

function FieldGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="field-group"
      className={cn(
        "gap-5 data-[slot=checkbox-group]:gap-3 [&>[data-slot=field-group]]:gap-4 group/field-group @container/field-group flex w-full flex-col",
        className,
      )}
      {...props}
    />
  );
}

const fieldVariants = cva(
  "data-[invalid=true]:text-destructive gap-2 group/field flex w-full",
  {
    variants: {
      orientation: {
        vertical: "flex-col [&>*]:w-full [&>.sr-only]:w-auto",
        horizontal:
          "flex-row items-center [&>[data-slot=field-label]]:flex-auto has-[>[data-slot=field-content]]:items-start has-[>[data-slot=field-content]]:[&>[role=checkbox],[role=radio]]:mt-px",
        responsive:
          "flex-col [&>*]:w-full [&>.sr-only]:w-auto @md/field-group:flex-row @md/field-group:items-center @md/field-group:[&>*]:w-auto @md/field-group:[&>[data-slot=field-label]]:flex-auto @md/field-group:has-[>[data-slot=field-content]]:items-start @md/field-group:has-[>[data-slot=field-content]]:[&>[role=checkbox],[role=radio]]:mt-px",
      },
    },
    defaultVariants: {
      orientation: "vertical",
    },
  },
);

function Field({
  className,
  orientation = "vertical",
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof fieldVariants>) {
  return (
    <div
      role="group"
      data-slot="field"
      data-orientation={orientation}
      className={cn(fieldVariants({ orientation }), className)}
      {...props}
    />
  );
}

function FieldContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="field-content"
      className={cn(
        "gap-0.5 group/field-content flex flex-1 flex-col leading-snug",
        className,
      )}
      {...props}
    />
  );
}

function FieldLabel({
  className,
  ...props
}: React.ComponentProps<typeof Label>) {
  return (
    <Label
      data-slot="field-label"
      className={cn(
        "has-data-checked:bg-primary/5 has-data-checked:border-primary dark:has-data-checked:bg-primary/10 gap-2 group-data-[disabled=true]/field:opacity-50 has-[>[data-slot=field]]:rounded-lg has-[>[data-slot=field]]:border [&>*]:data-[slot=field]:p-2.5 group/field-label peer/field-label flex w-fit leading-snug",
        "has-[>[data-slot=field]]:w-full has-[>[data-slot=field]]:flex-col",
        className,
      )}
      {...props}
    />
  );
}

function FieldTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="field-label"
      className={cn(
        "gap-2 text-sm font-medium group-data-[disabled=true]/field:opacity-50 flex w-fit items-center leading-snug",
        className,
      )}
      {...props}
    />
  );
}

function FieldDescription({ className, ...props }: React.ComponentProps<"p">) {
  return (
    <p
      data-slot="field-description"
      className={cn(
        "text-muted-foreground text-left text-sm [[data-variant=legend]+&]:-mt-1.5 leading-normal font-normal group-has-[[data-orientation=horizontal]]/field:text-balance",
        "last:mt-0 nth-last-2:-mt-1",
        "[&>a:hover]:text-primary [&>a]:underline [&>a]:underline-offset-4",
        className,
      )}
      {...props}
    />
  );
}

function FieldSeparator({
  children,
  className,
  ...props
}: React.ComponentProps<"div"> & {
  children?: React.ReactNode;
}) {
  return (
    <div
      data-slot="field-separator"
      data-content={!!children}
      className={cn(
        "-my-2 h-5 text-sm group-data-[variant=outline]/field-group:-mb-2 relative",
        className,
      )}
      {...props}
    >
      <Separator className="absolute inset-0 top-1/2" />
      {children && (
        <span
          className="text-muted-foreground px-2 bg-background relative mx-auto block w-fit"
          data-slot="field-separator-content"
        >
          {children}
        </span>
      )}
    </div>
  );
}

function FieldError({
  className,
  children,
  errors,
  ...props
}: React.ComponentProps<"div"> & {
  errors?: Array<{ message?: string } | undefined>;
}) {
  const content = useMemo(() => {
    if (children) {
      return children;
    }

    if (!errors?.length) {
      return null;
    }

    const uniqueErrors = [
      ...new Map(errors.map((error) => [error?.message, error])).values(),
    ];

    if (uniqueErrors?.length == 1) {
      return uniqueErrors[0]?.message;
    }

    return (
      <ul className="ml-4 flex list-disc flex-col gap-1">
        {uniqueErrors.map(
          (error, index) =>
            error?.message && <li key={index}>{error.message}</li>,
        )}
      </ul>
    );
  }, [children, errors]);

  if (!content) {
    return null;
  }

  return (
    <div
      role="alert"
      data-slot="field-error"
      className={cn("text-destructive text-sm font-normal", className)}
      {...props}
    >
      {content}
    </div>
  );
}

export {
  Field,
  FieldLabel,
  FieldDescription,
  FieldError,
  FieldGroup,
  FieldLegend,
  FieldSeparator,
  FieldSet,
  FieldContent,
  FieldTitle,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/hover-card.tsx
================================================
"use client";

import * as React from "react";
import { HoverCard as HoverCardPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function HoverCard({
  ...props
}: React.ComponentProps<typeof HoverCardPrimitive.Root>) {
  return <HoverCardPrimitive.Root data-slot="hover-card" {...props} />;
}

function HoverCardTrigger({
  ...props
}: React.ComponentProps<typeof HoverCardPrimitive.Trigger>) {
  return (
    <HoverCardPrimitive.Trigger data-slot="hover-card-trigger" {...props} />
  );
}

function HoverCardContent({
  className,
  align = "center",
  sideOffset = 4,
  ...props
}: React.ComponentProps<typeof HoverCardPrimitive.Content>) {
  return (
    <HoverCardPrimitive.Portal data-slot="hover-card-portal">
      <HoverCardPrimitive.Content
        data-slot="hover-card-content"
        align={align}
        sideOffset={sideOffset}
        className={cn(
          "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 bg-popover text-popover-foreground w-64 rounded-lg p-2.5 text-sm shadow-md ring-1 duration-100 z-50 origin-(--radix-hover-card-content-transform-origin) outline-hidden",
          className,
        )}
        {...props}
      />
    </HoverCardPrimitive.Portal>
  );
}

export { HoverCard, HoverCardTrigger, HoverCardContent };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/input-group.tsx
================================================
"use client";

import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";

import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Textarea } from "@/components/ui/textarea";

function InputGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="input-group"
      role="group"
      className={cn(
        "border-input dark:bg-input/30 has-[[data-slot=input-group-control]:focus-visible]:border-ring has-[[data-slot=input-group-control]:focus-visible]:ring-ring/50 has-[[data-slot][aria-invalid=true]]:ring-destructive/20 has-[[data-slot][aria-invalid=true]]:border-destructive dark:has-[[data-slot][aria-invalid=true]]:ring-destructive/40 has-disabled:bg-input/50 dark:has-disabled:bg-input/80 h-8 rounded-lg border transition-colors has-disabled:opacity-50 has-[[data-slot=input-group-control]:focus-visible]:ring-[3px] has-[[data-slot][aria-invalid=true]]:ring-[3px] has-[>[data-align=block-end]]:h-auto has-[>[data-align=block-end]]:flex-col has-[>[data-align=block-start]]:h-auto has-[>[data-align=block-start]]:flex-col has-[>[data-align=block-end]]:[&>input]:pt-3 has-[>[data-align=block-start]]:[&>input]:pb-3 has-[>[data-align=inline-end]]:[&>input]:pr-1.5 has-[>[data-align=inline-start]]:[&>input]:pl-1.5 [[data-slot=combobox-content]_&]:focus-within:border-inherit [[data-slot=combobox-content]_&]:focus-within:ring-0 group/input-group relative flex w-full min-w-0 items-center outline-none has-[>textarea]:h-auto",
        className,
      )}
      {...props}
    />
  );
}

const inputGroupAddonVariants = cva(
  "text-muted-foreground h-auto gap-2 py-1.5 text-sm font-medium group-data-[disabled=true]/input-group:opacity-50 [&>kbd]:rounded-[calc(var(--radius)-5px)] [&>svg:not([class*='size-'])]:size-4 flex cursor-text items-center justify-center select-none",
  {
    variants: {
      align: {
        "inline-start":
          "pl-2 has-[>button]:ml-[-0.3rem] has-[>kbd]:ml-[-0.15rem] order-first",
        "inline-end":
          "pr-2 has-[>button]:mr-[-0.3rem] has-[>kbd]:mr-[-0.15rem] order-last",
        "block-start":
          "px-2.5 pt-2 group-has-[>input]/input-group:pt-2 [.border-b]:pb-2 order-first w-full justify-start",
        "block-end":
          "px-2.5 pb-2 group-has-[>input]/input-group:pb-2 [.border-t]:pt-2 order-last w-full justify-start",
      },
    },
    defaultVariants: {
      align: "inline-start",
    },
  },
);

function InputGroupAddon({
  className,
  align = "inline-start",
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof inputGroupAddonVariants>) {
  return (
    <div
      role="group"
      data-slot="input-group-addon"
      data-align={align}
      className={cn(inputGroupAddonVariants({ align }), className)}
      onClick={(e) => {
        if ((e.target as HTMLElement).closest("button")) {
          return;
        }
        e.currentTarget.parentElement?.querySelector("input")?.focus();
      }}
      {...props}
    />
  );
}

const inputGroupButtonVariants = cva(
  "gap-2 text-sm shadow-none flex items-center",
  {
    variants: {
      size: {
        xs: "h-6 gap-1 rounded-[calc(var(--radius)-3px)] px-1.5 [&>svg:not([class*='size-'])]:size-3.5",
        sm: "",
        "icon-xs":
          "size-6 rounded-[calc(var(--radius)-3px)] p-0 has-[>svg]:p-0",
        "icon-sm": "size-8 p-0 has-[>svg]:p-0",
      },
    },
    defaultVariants: {
      size: "xs",
    },
  },
);

function InputGroupButton({
  className,
  type = "button",
  variant = "ghost",
  size = "xs",
  ...props
}: Omit<React.ComponentProps<typeof Button>, "size"> &
  VariantProps<typeof inputGroupButtonVariants>) {
  return (
    <Button
      type={type}
      data-size={size}
      variant={variant}
      className={cn(inputGroupButtonVariants({ size }), className)}
      {...props}
    />
  );
}

function InputGroupText({ className, ...props }: React.ComponentProps<"span">) {
  return (
    <span
      className={cn(
        "text-muted-foreground gap-2 text-sm [&_svg:not([class*='size-'])]:size-4 flex items-center [&_svg]:pointer-events-none",
        className,
      )}
      {...props}
    />
  );
}

function InputGroupInput({
  className,
  ...props
}: React.ComponentProps<"input">) {
  return (
    <Input
      data-slot="input-group-control"
      className={cn(
        "rounded-none border-0 bg-transparent shadow-none ring-0 focus-visible:ring-0 disabled:bg-transparent aria-invalid:ring-0 dark:bg-transparent dark:disabled:bg-transparent flex-1",
        className,
      )}
      {...props}
    />
  );
}

function InputGroupTextarea({
  className,
  ...props
}: React.ComponentProps<"textarea">) {
  return (
    <Textarea
      data-slot="input-group-control"
      className={cn(
        "rounded-none border-0 bg-transparent py-2 shadow-none ring-0 focus-visible:ring-0 disabled:bg-transparent aria-invalid:ring-0 dark:bg-transparent dark:disabled:bg-transparent flex-1 resize-none",
        className,
      )}
      {...props}
    />
  );
}

export {
  InputGroup,
  InputGroupAddon,
  InputGroupButton,
  InputGroupText,
  InputGroupInput,
  InputGroupTextarea,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/input.tsx
================================================
import * as React from "react";

import { cn } from "@/lib/utils";

function Input({ className, type, ...props }: React.ComponentProps<"input">) {
  return (
    <input
      type={type}
      data-slot="input"
      className={cn(
        "dark:bg-input/30 border-input focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 disabled:bg-input/50 dark:disabled:bg-input/80 h-8 rounded-lg border bg-transparent px-2.5 py-1 text-base transition-colors file:h-6 file:text-sm file:font-medium focus-visible:ring-[3px] aria-invalid:ring-[3px] md:text-sm file:text-foreground placeholder:text-muted-foreground w-full min-w-0 outline-none file:inline-flex file:border-0 file:bg-transparent disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    />
  );
}

export { Input };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/item.tsx
================================================
import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";
import { Slot } from "radix-ui";

import { cn } from "@/lib/utils";
import { Separator } from "@/components/ui/separator";

function ItemGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      role="list"
      data-slot="item-group"
      className={cn(
        "gap-4 has-[[data-size=sm]]:gap-2.5 has-[[data-size=xs]]:gap-2 group/item-group flex w-full flex-col",
        className,
      )}
      {...props}
    />
  );
}

function ItemSeparator({
  className,
  ...props
}: React.ComponentProps<typeof Separator>) {
  return (
    <Separator
      data-slot="item-separator"
      orientation="horizontal"
      className={cn("my-2", className)}
      {...props}
    />
  );
}

const itemVariants = cva(
  "[a]:hover:bg-muted rounded-lg border text-sm w-full group/item focus-visible:border-ring focus-visible:ring-ring/50 flex items-center flex-wrap outline-none transition-colors duration-100 focus-visible:ring-[3px] [a]:transition-colors",
  {
    variants: {
      variant: {
        default: "border-transparent",
        outline: "border-border",
        muted: "bg-muted/50 border-transparent",
      },
      size: {
        default: "gap-2.5 px-3 py-2.5",
        sm: "gap-2.5 px-3 py-2.5",
        xs: "gap-2 px-2.5 py-2 [[data-slot=dropdown-menu-content]_&]:p-0",
      },
    },
    defaultVariants: {
      variant: "default",
      size: "default",
    },
  },
);

function Item({
  className,
  variant = "default",
  size = "default",
  asChild = false,
  ...props
}: React.ComponentProps<"div"> &
  VariantProps<typeof itemVariants> & { asChild?: boolean }) {
  const Comp = asChild ? Slot.Root : "div";
  return (
    <Comp
      data-slot="item"
      data-variant={variant}
      data-size={size}
      className={cn(itemVariants({ variant, size, className }))}
      {...props}
    />
  );
}

const itemMediaVariants = cva(
  "gap-2 group-has-[[data-slot=item-description]]/item:translate-y-0.5 group-has-[[data-slot=item-description]]/item:self-start flex shrink-0 items-center justify-center [&_svg]:pointer-events-none",
  {
    variants: {
      variant: {
        default: "bg-transparent",
        icon: "[&_svg:not([class*='size-'])]:size-4",
        image:
          "size-10 overflow-hidden rounded-sm group-data-[size=sm]/item:size-8 group-data-[size=xs]/item:size-6 [&_img]:size-full [&_img]:object-cover",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
);

function ItemMedia({
  className,
  variant = "default",
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof itemMediaVariants>) {
  return (
    <div
      data-slot="item-media"
      data-variant={variant}
      className={cn(itemMediaVariants({ variant, className }))}
      {...props}
    />
  );
}

function ItemContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="item-content"
      className={cn(
        "gap-1 group-data-[size=xs]/item:gap-0 flex flex-1 flex-col [&+[data-slot=item-content]]:flex-none",
        className,
      )}
      {...props}
    />
  );
}

function ItemTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="item-title"
      className={cn(
        "gap-2 text-sm leading-snug font-medium underline-offset-4 line-clamp-1 flex w-fit items-center",
        className,
      )}
      {...props}
    />
  );
}

function ItemDescription({ className, ...props }: React.ComponentProps<"p">) {
  return (
    <p
      data-slot="item-description"
      className={cn(
        "text-muted-foreground text-left text-sm leading-normal group-data-[size=xs]/item:text-xs [&>a:hover]:text-primary line-clamp-2 font-normal [&>a]:underline [&>a]:underline-offset-4",
        className,
      )}
      {...props}
    />
  );
}

function ItemActions({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="item-actions"
      className={cn("gap-2 flex items-center", className)}
      {...props}
    />
  );
}

function ItemHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="item-header"
      className={cn(
        "gap-2 flex basis-full items-center justify-between",
        className,
      )}
      {...props}
    />
  );
}

function ItemFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="item-footer"
      className={cn(
        "gap-2 flex basis-full items-center justify-between",
        className,
      )}
      {...props}
    />
  );
}

export {
  Item,
  ItemMedia,
  ItemContent,
  ItemActions,
  ItemGroup,
  ItemSeparator,
  ItemTitle,
  ItemDescription,
  ItemHeader,
  ItemFooter,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/kbd.tsx
================================================
import { cn } from "@/lib/utils";

function Kbd({ className, ...props }: React.ComponentProps<"kbd">) {
  return (
    <kbd
      data-slot="kbd"
      className={cn(
        "bg-muted text-muted-foreground [[data-slot=tooltip-content]_&]:bg-background/20 [[data-slot=tooltip-content]_&]:text-background dark:[[data-slot=tooltip-content]_&]:bg-background/10 h-5 w-fit min-w-5 gap-1 rounded-sm px-1 font-sans text-xs font-medium [&_svg:not([class*='size-'])]:size-3 pointer-events-none inline-flex items-center justify-center select-none",
        className,
      )}
      {...props}
    />
  );
}

function KbdGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <kbd
      data-slot="kbd-group"
      className={cn("gap-1 inline-flex items-center", className)}
      {...props}
    />
  );
}

export { Kbd, KbdGroup };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/label.tsx
================================================
"use client";

import * as React from "react";
import { Label as LabelPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function Label({
  className,
  ...props
}: React.ComponentProps<typeof LabelPrimitive.Root>) {
  return (
    <LabelPrimitive.Root
      data-slot="label"
      className={cn(
        "gap-2 text-sm leading-none font-medium group-data-[disabled=true]:opacity-50 peer-disabled:opacity-50 flex items-center select-none group-data-[disabled=true]:pointer-events-none peer-disabled:cursor-not-allowed",
        className,
      )}
      {...props}
    />
  );
}

export { Label };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/menubar.tsx
================================================
"use client";

import * as React from "react";
import { Menubar as MenubarPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { CheckIcon, ChevronRightIcon } from "lucide-react";

function Menubar({
  className,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Root>) {
  return (
    <MenubarPrimitive.Root
      data-slot="menubar"
      className={cn(
        "bg-background h-8 gap-0.5 rounded-lg border p-1 flex items-center",
        className,
      )}
      {...props}
    />
  );
}

function MenubarMenu({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Menu>) {
  return <MenubarPrimitive.Menu data-slot="menubar-menu" {...props} />;
}

function MenubarGroup({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Group>) {
  return <MenubarPrimitive.Group data-slot="menubar-group" {...props} />;
}

function MenubarPortal({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Portal>) {
  return <MenubarPrimitive.Portal data-slot="menubar-portal" {...props} />;
}

function MenubarRadioGroup({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.RadioGroup>) {
  return (
    <MenubarPrimitive.RadioGroup data-slot="menubar-radio-group" {...props} />
  );
}

function MenubarTrigger({
  className,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Trigger>) {
  return (
    <MenubarPrimitive.Trigger
      data-slot="menubar-trigger"
      className={cn(
        "hover:bg-muted aria-expanded:bg-muted rounded-sm px-1.5 py-px text-sm font-medium flex items-center outline-hidden select-none",
        className,
      )}
      {...props}
    />
  );
}

function MenubarContent({
  className,
  align = "start",
  alignOffset = -4,
  sideOffset = 8,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Content>) {
  return (
    <MenubarPortal>
      <MenubarPrimitive.Content
        data-slot="menubar-content"
        align={align}
        alignOffset={alignOffset}
        sideOffset={sideOffset}
        className={cn(
          "bg-popover text-popover-foreground data-open:animate-in data-open:fade-in-0 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 min-w-36 rounded-lg p-1 shadow-md ring-1 duration-100 z-50 origin-(--radix-menubar-content-transform-origin) overflow-hidden",
          className,
        )}
        {...props}
      />
    </MenubarPortal>
  );
}

function MenubarItem({
  className,
  inset,
  variant = "default",
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Item> & {
  inset?: boolean;
  variant?: "default" | "destructive";
}) {
  return (
    <MenubarPrimitive.Item
      data-slot="menubar-item"
      data-inset={inset}
      data-variant={variant}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive not-data-[variant=destructive]:focus:**:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg:not([class*='size-'])]:size-4 group/menubar-item relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    />
  );
}

function MenubarCheckboxItem({
  className,
  children,
  checked,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.CheckboxItem>) {
  return (
    <MenubarPrimitive.CheckboxItem
      data-slot="menubar-checkbox-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-1.5 pl-7 text-sm data-disabled:opacity-50 relative flex cursor-default items-center outline-hidden select-none data-disabled:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      checked={checked}
      {...props}
    >
      <span className="left-1.5 size-4 [&_svg:not([class*='size-'])]:size-4 pointer-events-none absolute flex items-center justify-center">
        <MenubarPrimitive.ItemIndicator>
          <CheckIcon />
        </MenubarPrimitive.ItemIndicator>
      </span>
      {children}
    </MenubarPrimitive.CheckboxItem>
  );
}

function MenubarRadioItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.RadioItem>) {
  return (
    <MenubarPrimitive.RadioItem
      data-slot="menubar-radio-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-1.5 pl-7 text-sm data-disabled:opacity-50 [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-disabled:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      <span className="left-1.5 size-4 [&_svg:not([class*='size-'])]:size-4 pointer-events-none absolute flex items-center justify-center">
        <MenubarPrimitive.ItemIndicator>
          <CheckIcon />
        </MenubarPrimitive.ItemIndicator>
      </span>
      {children}
    </MenubarPrimitive.RadioItem>
  );
}

function MenubarLabel({
  className,
  inset,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Label> & {
  inset?: boolean;
}) {
  return (
    <MenubarPrimitive.Label
      data-slot="menubar-label"
      data-inset={inset}
      className={cn(
        "px-1.5 py-1 text-sm font-medium data-[inset]:pl-8",
        className,
      )}
      {...props}
    />
  );
}

function MenubarSeparator({
  className,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Separator>) {
  return (
    <MenubarPrimitive.Separator
      data-slot="menubar-separator"
      className={cn("bg-border -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
}

function MenubarShortcut({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="menubar-shortcut"
      className={cn(
        "text-muted-foreground group-focus/menubar-item:text-accent-foreground text-xs tracking-widest ml-auto",
        className,
      )}
      {...props}
    />
  );
}

function MenubarSub({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Sub>) {
  return <MenubarPrimitive.Sub data-slot="menubar-sub" {...props} />;
}

function MenubarSubTrigger({
  className,
  inset,
  children,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.SubTrigger> & {
  inset?: boolean;
}) {
  return (
    <MenubarPrimitive.SubTrigger
      data-slot="menubar-sub-trigger"
      data-inset={inset}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-open:bg-accent data-open:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm data-[inset]:pl-8 [&_svg:not([class*='size-'])]:size-4 flex cursor-default items-center outline-none select-none",
        className,
      )}
      {...props}
    >
      {children}
      <ChevronRightIcon className="ml-auto size-4" />
    </MenubarPrimitive.SubTrigger>
  );
}

function MenubarSubContent({
  className,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.SubContent>) {
  return (
    <MenubarPrimitive.SubContent
      data-slot="menubar-sub-content"
      className={cn(
        "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 min-w-32 rounded-lg p-1 shadow-lg ring-1 duration-100 z-50 origin-(--radix-menubar-content-transform-origin) overflow-hidden",
        className,
      )}
      {...props}
    />
  );
}

export {
  Menubar,
  MenubarPortal,
  MenubarMenu,
  MenubarTrigger,
  MenubarContent,
  MenubarGroup,
  MenubarSeparator,
  MenubarLabel,
  MenubarItem,
  MenubarShortcut,
  MenubarCheckboxItem,
  MenubarRadioGroup,
  MenubarRadioItem,
  MenubarSub,
  MenubarSubTrigger,
  MenubarSubContent,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/native-select.tsx
================================================
import * as React from "react";

import { cn } from "@/lib/utils";
import { ChevronDownIcon } from "lucide-react";

type NativeSelectProps = Omit<React.ComponentProps<"select">, "size"> & {
  size?: "sm" | "default";
};

function NativeSelect({
  className,
  size = "default",
  ...props
}: NativeSelectProps) {
  return (
    <div
      className={cn(
        "group/native-select relative w-fit has-[select:disabled]:opacity-50",
        className,
      )}
      data-slot="native-select-wrapper"
      data-size={size}
    >
      <select
        data-slot="native-select"
        data-size={size}
        className="border-input placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 dark:hover:bg-input/50 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 h-8 w-full min-w-0 appearance-none rounded-lg border bg-transparent py-1 pr-8 pl-2.5 text-sm transition-colors select-none focus-visible:ring-[3px] aria-invalid:ring-[3px] data-[size=sm]:h-7 data-[size=sm]:rounded-[min(var(--radius-md),10px)] data-[size=sm]:py-0.5 outline-none disabled:pointer-events-none disabled:cursor-not-allowed"
        {...props}
      />
      <ChevronDownIcon
        className="text-muted-foreground top-1/2 right-2.5 size-4 -translate-y-1/2 pointer-events-none absolute select-none"
        aria-hidden="true"
        data-slot="native-select-icon"
      />
    </div>
  );
}

function NativeSelectOption({ ...props }: React.ComponentProps<"option">) {
  return <option data-slot="native-select-option" {...props} />;
}

function NativeSelectOptGroup({
  className,
  ...props
}: React.ComponentProps<"optgroup">) {
  return (
    <optgroup
      data-slot="native-select-optgroup"
      className={cn(className)}
      {...props}
    />
  );
}

export { NativeSelect, NativeSelectOptGroup, NativeSelectOption };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/navigation-menu.tsx
================================================
import * as React from "react";
import { cva } from "class-variance-authority";
import { NavigationMenu as NavigationMenuPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { ChevronDownIcon } from "lucide-react";

function NavigationMenu({
  className,
  children,
  viewport = true,
  ...props
}: React.ComponentProps<typeof NavigationMenuPrimitive.Root> & {
  viewport?: boolean;
}) {
  return (
    <NavigationMenuPrimitive.Root
      data-slot="navigation-menu"
      data-viewport={viewport}
      className={cn(
        "max-w-max group/navigation-menu relative flex max-w-max flex-1 items-center justify-center",
        className,
      )}
      {...props}
    >
      {children}
      {viewport && <NavigationMenuViewport />}
    </NavigationMenuPrimitive.Root>
  );
}

function NavigationMenuList({
  className,
  ...props
}: React.ComponentProps<typeof NavigationMenuPrimitive.List>) {
  return (
    <NavigationMenuPrimitive.List
      data-slot="navigation-menu-list"
      className={cn(
        "gap-0 group flex flex-1 list-none items-center justify-center",
        className,
      )}
      {...props}
    />
  );
}

function NavigationMenuItem({
  className,
  ...props
}: React.ComponentProps<typeof NavigationMenuPrimitive.Item>) {
  return (
    <NavigationMenuPrimitive.Item
      data-slot="navigation-menu-item"
      className={cn("relative", className)}
      {...props}
    />
  );
}

const navigationMenuTriggerStyle = cva(
  "bg-background hover:bg-muted focus:bg-muted data-open:hover:bg-muted data-open:focus:bg-muted data-open:bg-muted/50 focus-visible:ring-ring/50 data-popup-open:bg-muted/50 data-popup-open:hover:bg-muted rounded-lg px-2.5 py-1.5 text-sm font-medium transition-all focus-visible:ring-[3px] focus-visible:outline-1 disabled:opacity-50 group/navigation-menu-trigger inline-flex h-9 w-max items-center justify-center disabled:pointer-events-none outline-none",
);

function NavigationMenuTrigger({
  className,
  children,
  ...props
}: React.ComponentProps<typeof NavigationMenuPrimitive.Trigger>) {
  return (
    <NavigationMenuPrimitive.Trigger
      data-slot="navigation-menu-trigger"
      className={cn(navigationMenuTriggerStyle(), "group", className)}
      {...props}
    >
      {children}{" "}
      <ChevronDownIcon
        className="relative top-[1px] ml-1 size-3 transition duration-300 group-data-open/navigation-menu-trigger:rotate-180 group-data-popup-open/navigation-menu-trigger:rotate-180"
        aria-hidden="true"
      />
    </NavigationMenuPrimitive.Trigger>
  );
}

function NavigationMenuContent({
  className,
  ...props
}: React.ComponentProps<typeof NavigationMenuPrimitive.Content>) {
  return (
    <NavigationMenuPrimitive.Content
      data-slot="navigation-menu-content"
      className={cn(
        "data-[motion^=from-]:animate-in data-[motion^=to-]:animate-out data-[motion^=from-]:fade-in data-[motion^=to-]:fade-out data-[motion=from-end]:slide-in-from-right-52 data-[motion=from-start]:slide-in-from-left-52 data-[motion=to-end]:slide-out-to-right-52 data-[motion=to-start]:slide-out-to-left-52 group-data-[viewport=false]/navigation-menu:bg-popover group-data-[viewport=false]/navigation-menu:text-popover-foreground group-data-[viewport=false]/navigation-menu:data-open:animate-in group-data-[viewport=false]/navigation-menu:data-closed:animate-out group-data-[viewport=false]/navigation-menu:data-closed:zoom-out-95 group-data-[viewport=false]/navigation-menu:data-open:zoom-in-95 group-data-[viewport=false]/navigation-menu:data-open:fade-in-0 group-data-[viewport=false]/navigation-menu:data-closed:fade-out-0 group-data-[viewport=false]/navigation-menu:ring-foreground/10 p-1 ease-[cubic-bezier(0.22,1,0.36,1)] group-data-[viewport=false]/navigation-menu:rounded-lg group-data-[viewport=false]/navigation-menu:shadow group-data-[viewport=false]/navigation-menu:ring-1 group-data-[viewport=false]/navigation-menu:duration-300 top-0 left-0 w-full group-data-[viewport=false]/navigation-menu:top-full group-data-[viewport=false]/navigation-menu:mt-1.5 group-data-[viewport=false]/navigation-menu:overflow-hidden **:data-[slot=navigation-menu-link]:focus:ring-0 **:data-[slot=navigation-menu-link]:focus:outline-none md:absolute md:w-auto",
        className,
      )}
      {...props}
    />
  );
}

function NavigationMenuViewport({
  className,
  ...props
}: React.ComponentProps<typeof NavigationMenuPrimitive.Viewport>) {
  return (
    <div
      className={cn(
        "absolute top-full left-0 isolate z-50 flex justify-center",
      )}
    >
      <NavigationMenuPrimitive.Viewport
        data-slot="navigation-menu-viewport"
        className={cn(
          "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:zoom-out-95 data-open:zoom-in-90 ring-foreground/10 rounded-lg shadow ring-1 duration-100 origin-top-center relative mt-1.5 h-[var(--radix-navigation-menu-viewport-height)] w-full overflow-hidden md:w-[var(--radix-navigation-menu-viewport-width)]",
          className,
        )}
        {...props}
      />
    </div>
  );
}

function NavigationMenuLink({
  className,
  ...props
}: React.ComponentProps<typeof NavigationMenuPrimitive.Link>) {
  return (
    <NavigationMenuPrimitive.Link
      data-slot="navigation-menu-link"
      className={cn(
        "data-active:focus:bg-muted data-active:hover:bg-muted data-active:bg-muted/50 focus-visible:ring-ring/50 hover:bg-muted focus:bg-muted flex items-center gap-2 rounded-lg p-2 text-sm transition-all outline-none focus-visible:ring-[3px] focus-visible:outline-1 [&_svg:not([class*='size-'])]:size-4 [[data-slot=navigation-menu-content]_&]:rounded-md",
        className,
      )}
      {...props}
    />
  );
}

function NavigationMenuIndicator({
  className,
  ...props
}: React.ComponentProps<typeof NavigationMenuPrimitive.Indicator>) {
  return (
    <NavigationMenuPrimitive.Indicator
      data-slot="navigation-menu-indicator"
      className={cn(
        "data-[state=visible]:animate-in data-[state=hidden]:animate-out data-[state=hidden]:fade-out data-[state=visible]:fade-in top-full z-[1] flex h-1.5 items-end justify-center overflow-hidden",
        className,
      )}
      {...props}
    >
      <div className="bg-border rounded-tl-sm shadow-md relative top-[60%] h-2 w-2 rotate-45" />
    </NavigationMenuPrimitive.Indicator>
  );
}

export {
  NavigationMenu,
  NavigationMenuList,
  NavigationMenuItem,
  NavigationMenuContent,
  NavigationMenuTrigger,
  NavigationMenuLink,
  NavigationMenuIndicator,
  NavigationMenuViewport,
  navigationMenuTriggerStyle,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/pagination.tsx
================================================
import * as React from "react";

import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
import {
  ChevronLeftIcon,
  ChevronRightIcon,
  MoreHorizontalIcon,
} from "lucide-react";

function Pagination({ className, ...props }: React.ComponentProps<"nav">) {
  return (
    <nav
      role="navigation"
      aria-label="pagination"
      data-slot="pagination"
      className={cn("mx-auto flex w-full justify-center", className)}
      {...props}
    />
  );
}

function PaginationContent({
  className,
  ...props
}: React.ComponentProps<"ul">) {
  return (
    <ul
      data-slot="pagination-content"
      className={cn("gap-0.5 flex items-center", className)}
      {...props}
    />
  );
}

function PaginationItem({ ...props }: React.ComponentProps<"li">) {
  return <li data-slot="pagination-item" {...props} />;
}

type PaginationLinkProps = {
  isActive?: boolean;
} & Pick<React.ComponentProps<typeof Button>, "size"> &
  React.ComponentProps<"a">;

function PaginationLink({
  className,
  isActive,
  size = "icon",
  ...props
}: PaginationLinkProps) {
  return (
    <Button
      asChild
      variant={isActive ? "outline" : "ghost"}
      size={size}
      className={cn(className)}
    >
      <a
        aria-current={isActive ? "page" : undefined}
        data-slot="pagination-link"
        data-active={isActive}
        {...props}
      />
    </Button>
  );
}

function PaginationPrevious({
  className,
  ...props
}: React.ComponentProps<typeof PaginationLink>) {
  return (
    <PaginationLink
      aria-label="Go to previous page"
      size="default"
      className={cn("pl-1.5!", className)}
      {...props}
    >
      <ChevronLeftIcon data-icon="inline-start" />
      <span className="hidden sm:block">Previous</span>
    </PaginationLink>
  );
}

function PaginationNext({
  className,
  ...props
}: React.ComponentProps<typeof PaginationLink>) {
  return (
    <PaginationLink
      aria-label="Go to next page"
      size="default"
      className={cn("pr-1.5!", className)}
      {...props}
    >
      <span className="hidden sm:block">Next</span>
      <ChevronRightIcon data-icon="inline-end" />
    </PaginationLink>
  );
}

function PaginationEllipsis({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      aria-hidden
      data-slot="pagination-ellipsis"
      className={cn(
        "size-8 items-center justify-center [&_svg:not([class*='size-'])]:size-4 flex items-center justify-center",
        className,
      )}
      {...props}
    >
      <MoreHorizontalIcon />
      <span className="sr-only">More pages</span>
    </span>
  );
}

export {
  Pagination,
  PaginationContent,
  PaginationEllipsis,
  PaginationItem,
  PaginationLink,
  PaginationNext,
  PaginationPrevious,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/popover.tsx
================================================
"use client";

import * as React from "react";
import { Popover as PopoverPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function Popover({
  ...props
}: React.ComponentProps<typeof PopoverPrimitive.Root>) {
  return <PopoverPrimitive.Root data-slot="popover" {...props} />;
}

function PopoverTrigger({
  ...props
}: React.ComponentProps<typeof PopoverPrimitive.Trigger>) {
  return <PopoverPrimitive.Trigger data-slot="popover-trigger" {...props} />;
}

function PopoverContent({
  className,
  align = "center",
  sideOffset = 4,
  ...props
}: React.ComponentProps<typeof PopoverPrimitive.Content>) {
  return (
    <PopoverPrimitive.Portal>
      <PopoverPrimitive.Content
        data-slot="popover-content"
        align={align}
        sideOffset={sideOffset}
        className={cn(
          "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 flex flex-col gap-2.5 rounded-lg p-2.5 text-sm shadow-md ring-1 duration-100 z-50 w-72 origin-(--radix-popover-content-transform-origin) outline-hidden",
          className,
        )}
        {...props}
      />
    </PopoverPrimitive.Portal>
  );
}

function PopoverAnchor({
  ...props
}: React.ComponentProps<typeof PopoverPrimitive.Anchor>) {
  return <PopoverPrimitive.Anchor data-slot="popover-anchor" {...props} />;
}

function PopoverHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="popover-header"
      className={cn("flex flex-col gap-0.5 text-sm", className)}
      {...props}
    />
  );
}

function PopoverTitle({ className, ...props }: React.ComponentProps<"h2">) {
  return (
    <div
      data-slot="popover-title"
      className={cn("font-medium", className)}
      {...props}
    />
  );
}

function PopoverDescription({
  className,
  ...props
}: React.ComponentProps<"p">) {
  return (
    <p
      data-slot="popover-description"
      className={cn("text-muted-foreground", className)}
      {...props}
    />
  );
}

export {
  Popover,
  PopoverAnchor,
  PopoverContent,
  PopoverDescription,
  PopoverHeader,
  PopoverTitle,
  PopoverTrigger,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/progress.tsx
================================================
"use client";

import * as React from "react";
import { Progress as ProgressPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function Progress({
  className,
  value,
  ...props
}: React.ComponentProps<typeof ProgressPrimitive.Root>) {
  return (
    <ProgressPrimitive.Root
      data-slot="progress"
      className={cn(
        "bg-muted h-1 rounded-full relative flex w-full items-center overflow-x-hidden",
        className,
      )}
      {...props}
    >
      <ProgressPrimitive.Indicator
        data-slot="progress-indicator"
        className="bg-primary size-full flex-1 transition-all"
        style={{ transform: `translateX(-${100 - (value || 0)}%)` }}
      />
    </ProgressPrimitive.Root>
  );
}

export { Progress };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/radio-group.tsx
================================================
"use client";

import * as React from "react";
import { RadioGroup as RadioGroupPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { CircleIcon } from "lucide-react";

function RadioGroup({
  className,
  ...props
}: React.ComponentProps<typeof RadioGroupPrimitive.Root>) {
  return (
    <RadioGroupPrimitive.Root
      data-slot="radio-group"
      className={cn("grid gap-2 w-full", className)}
      {...props}
    />
  );
}

function RadioGroupItem({
  className,
  ...props
}: React.ComponentProps<typeof RadioGroupPrimitive.Item>) {
  return (
    <RadioGroupPrimitive.Item
      data-slot="radio-group-item"
      className={cn(
        "border-input text-primary dark:bg-input/30 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 flex size-4 rounded-full focus-visible:ring-[3px] aria-invalid:ring-[3px] group/radio-group-item peer relative aspect-square shrink-0 border outline-none after:absolute after:-inset-x-3 after:-inset-y-2 disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    >
      <RadioGroupPrimitive.Indicator
        data-slot="radio-group-indicator"
        className="group-aria-invalid/radio-group-item:text-destructive text-primary flex size-4 items-center justify-center"
      >
        <CircleIcon className="absolute top-1/2 left-1/2 size-2 -translate-x-1/2 -translate-y-1/2 fill-current" />
      </RadioGroupPrimitive.Indicator>
    </RadioGroupPrimitive.Item>
  );
}

export { RadioGroup, RadioGroupItem };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/resizable.tsx
================================================
"use client";

import * as React from "react";
import * as ResizablePrimitive from "react-resizable-panels";

import { cn } from "@/lib/utils";

function ResizablePanelGroup({
  className,
  ...props
}: React.ComponentProps<typeof ResizablePrimitive.PanelGroup>) {
  return (
    <ResizablePrimitive.PanelGroup
      data-slot="resizable-panel-group"
      className={cn(
        "flex h-full w-full data-[panel-group-direction=vertical]:flex-col",
        className,
      )}
      {...props}
    />
  );
}

function ResizablePanel({
  ...props
}: React.ComponentProps<typeof ResizablePrimitive.Panel>) {
  return <ResizablePrimitive.Panel data-slot="resizable-panel" {...props} />;
}

function ResizableHandle({
  withHandle,
  className,
  ...props
}: React.ComponentProps<typeof ResizablePrimitive.PanelResizeHandle> & {
  withHandle?: boolean;
}) {
  return (
    <ResizablePrimitive.PanelResizeHandle
      data-slot="resizable-handle"
      className={cn(
        "bg-border focus-visible:ring-ring relative flex w-px items-center justify-center after:absolute after:inset-y-0 after:left-1/2 after:w-1 after:-translate-x-1/2 focus-visible:ring-1 focus-visible:ring-offset-1 focus-visible:outline-hidden data-[panel-group-direction=vertical]:h-px data-[panel-group-direction=vertical]:w-full data-[panel-group-direction=vertical]:after:left-0 data-[panel-group-direction=vertical]:after:h-1 data-[panel-group-direction=vertical]:after:w-full data-[panel-group-direction=vertical]:after:translate-x-0 data-[panel-group-direction=vertical]:after:-translate-y-1/2 [&[data-panel-group-direction=vertical]>div]:rotate-90",
        className,
      )}
      {...props}
    >
      {withHandle && (
        <div className="bg-border h-6 w-1 rounded-lg z-10 flex shrink-0" />
      )}
    </ResizablePrimitive.PanelResizeHandle>
  );
}

export { ResizablePanelGroup, ResizablePanel, ResizableHandle };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/scroll-area.tsx
================================================
"use client";

import * as React from "react";
import { ScrollArea as ScrollAreaPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function ScrollArea({
  className,
  children,
  ...props
}: React.ComponentProps<typeof ScrollAreaPrimitive.Root>) {
  return (
    <ScrollAreaPrimitive.Root
      data-slot="scroll-area"
      className={cn("relative", className)}
      {...props}
    >
      <ScrollAreaPrimitive.Viewport
        data-slot="scroll-area-viewport"
        className="focus-visible:ring-ring/50 size-full rounded-[inherit] transition-[color,box-shadow] outline-none focus-visible:ring-[3px] focus-visible:outline-1"
      >
        {children}
      </ScrollAreaPrimitive.Viewport>
      <ScrollBar />
      <ScrollAreaPrimitive.Corner />
    </ScrollAreaPrimitive.Root>
  );
}

function ScrollBar({
  className,
  orientation = "vertical",
  ...props
}: React.ComponentProps<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>) {
  return (
    <ScrollAreaPrimitive.ScrollAreaScrollbar
      data-slot="scroll-area-scrollbar"
      data-orientation={orientation}
      orientation={orientation}
      className={cn(
        "data-horizontal:h-2.5 data-horizontal:flex-col data-horizontal:border-t data-horizontal:border-t-transparent data-vertical:h-full data-vertical:w-2.5 data-vertical:border-l data-vertical:border-l-transparent flex touch-none p-px transition-colors select-none",
        className,
      )}
      {...props}
    >
      <ScrollAreaPrimitive.ScrollAreaThumb
        data-slot="scroll-area-thumb"
        className="rounded-full bg-border relative flex-1"
      />
    </ScrollAreaPrimitive.ScrollAreaScrollbar>
  );
}

export { ScrollArea, ScrollBar };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/select.tsx
================================================
"use client";

import * as React from "react";
import { Select as SelectPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { ChevronDownIcon, CheckIcon, ChevronUpIcon } from "lucide-react";

function Select({
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Root>) {
  return <SelectPrimitive.Root data-slot="select" {...props} />;
}

function SelectGroup({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Group>) {
  return (
    <SelectPrimitive.Group
      data-slot="select-group"
      className={cn("scroll-my-1 p-1", className)}
      {...props}
    />
  );
}

function SelectValue({
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Value>) {
  return <SelectPrimitive.Value data-slot="select-value" {...props} />;
}

function SelectTrigger({
  className,
  size = "default",
  children,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Trigger> & {
  size?: "sm" | "default";
}) {
  return (
    <SelectPrimitive.Trigger
      data-slot="select-trigger"
      data-size={size}
      className={cn(
        "border-input data-[placeholder]:text-muted-foreground dark:bg-input/30 dark:hover:bg-input/50 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 gap-1.5 rounded-lg border bg-transparent py-2 pr-2 pl-2.5 text-sm transition-colors select-none focus-visible:ring-[3px] aria-invalid:ring-[3px] data-[size=default]:h-8 data-[size=sm]:h-7 data-[size=sm]:rounded-[min(var(--radius-md),10px)] *:data-[slot=select-value]:flex *:data-[slot=select-value]:gap-1.5 [&_svg:not([class*='size-'])]:size-4 flex w-fit items-center justify-between whitespace-nowrap outline-none disabled:cursor-not-allowed disabled:opacity-50 *:data-[slot=select-value]:line-clamp-1 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      {children}
      <SelectPrimitive.Icon asChild>
        <ChevronDownIcon className="text-muted-foreground size-4 pointer-events-none" />
      </SelectPrimitive.Icon>
    </SelectPrimitive.Trigger>
  );
}

function SelectContent({
  className,
  children,
  position = "item-aligned",
  align = "center",
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Content>) {
  return (
    <SelectPrimitive.Portal>
      <SelectPrimitive.Content
        data-slot="select-content"
        className={cn(
          "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 min-w-36 rounded-lg shadow-md ring-1 duration-100 relative z-50 max-h-(--radix-select-content-available-height) origin-(--radix-select-content-transform-origin) overflow-x-hidden overflow-y-auto",
          position === "popper" &&
            "data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1",
          className,
        )}
        position={position}
        align={align}
        {...props}
      >
        <SelectScrollUpButton />
        <SelectPrimitive.Viewport
          data-position={position}
          className={cn(
            "data-[position=popper]:h-[var(--radix-select-trigger-height)] data-[position=popper]:w-full data-[position=popper]:min-w-[var(--radix-select-trigger-width)]",
            position === "popper" && "",
          )}
        >
          {children}
        </SelectPrimitive.Viewport>
        <SelectScrollDownButton />
      </SelectPrimitive.Content>
    </SelectPrimitive.Portal>
  );
}

function SelectLabel({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Label>) {
  return (
    <SelectPrimitive.Label
      data-slot="select-label"
      className={cn("text-muted-foreground px-1.5 py-1 text-xs", className)}
      {...props}
    />
  );
}

function SelectItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Item>) {
  return (
    <SelectPrimitive.Item
      data-slot="select-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground not-data-[variant=destructive]:focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 *:[span]:last:flex *:[span]:last:items-center *:[span]:last:gap-2 relative flex w-full cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      <span className="pointer-events-none absolute right-2 flex size-4 items-center justify-center">
        <SelectPrimitive.ItemIndicator>
          <CheckIcon className="pointer-events-none" />
        </SelectPrimitive.ItemIndicator>
      </span>
      <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
    </SelectPrimitive.Item>
  );
}

function SelectSeparator({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Separator>) {
  return (
    <SelectPrimitive.Separator
      data-slot="select-separator"
      className={cn("bg-border -mx-1 my-1 h-px pointer-events-none", className)}
      {...props}
    />
  );
}

function SelectScrollUpButton({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.ScrollUpButton>) {
  return (
    <SelectPrimitive.ScrollUpButton
      data-slot="select-scroll-up-button"
      className={cn(
        "bg-popover z-10 flex cursor-default items-center justify-center py-1 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    >
      <ChevronUpIcon />
    </SelectPrimitive.ScrollUpButton>
  );
}

function SelectScrollDownButton({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.ScrollDownButton>) {
  return (
    <SelectPrimitive.ScrollDownButton
      data-slot="select-scroll-down-button"
      className={cn(
        "bg-popover z-10 flex cursor-default items-center justify-center py-1 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    >
      <ChevronDownIcon />
    </SelectPrimitive.ScrollDownButton>
  );
}

export {
  Select,
  SelectContent,
  SelectGroup,
  SelectItem,
  SelectLabel,
  SelectScrollDownButton,
  SelectScrollUpButton,
  SelectSeparator,
  SelectTrigger,
  SelectValue,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/separator.tsx
================================================
"use client";

import * as React from "react";
import { Separator as SeparatorPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function Separator({
  className,
  orientation = "horizontal",
  decorative = true,
  ...props
}: React.ComponentProps<typeof SeparatorPrimitive.Root>) {
  return (
    <SeparatorPrimitive.Root
      data-slot="separator"
      decorative={decorative}
      orientation={orientation}
      className={cn(
        "bg-border shrink-0 data-[orientation=horizontal]:h-px data-[orientation=horizontal]:w-full data-[orientation=vertical]:w-px data-[orientation=vertical]:self-stretch",
        className,
      )}
      {...props}
    />
  );
}

export { Separator };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/sheet.tsx
================================================
"use client";

import * as React from "react";
import { Dialog as SheetPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
import { XIcon } from "lucide-react";

function Sheet({ ...props }: React.ComponentProps<typeof SheetPrimitive.Root>) {
  return <SheetPrimitive.Root data-slot="sheet" {...props} />;
}

function SheetTrigger({
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Trigger>) {
  return <SheetPrimitive.Trigger data-slot="sheet-trigger" {...props} />;
}

function SheetClose({
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Close>) {
  return <SheetPrimitive.Close data-slot="sheet-close" {...props} />;
}

function SheetPortal({
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Portal>) {
  return <SheetPrimitive.Portal data-slot="sheet-portal" {...props} />;
}

function SheetOverlay({
  className,
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Overlay>) {
  return (
    <SheetPrimitive.Overlay
      data-slot="sheet-overlay"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/10 duration-100 data-ending-style:opacity-0 data-starting-style:opacity-0 supports-backdrop-filter:backdrop-blur-xs fixed inset-0 z-50",
        className,
      )}
      {...props}
    />
  );
}

function SheetContent({
  className,
  children,
  side = "right",
  showCloseButton = true,
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Content> & {
  side?: "top" | "right" | "bottom" | "left";
  showCloseButton?: boolean;
}) {
  return (
    <SheetPortal>
      <SheetOverlay />
      <SheetPrimitive.Content
        data-slot="sheet-content"
        data-side={side}
        className={cn(
          "bg-background data-open:animate-in data-closed:animate-out data-[side=right]:data-closed:slide-out-to-right-10 data-[side=right]:data-open:slide-in-from-right-10 data-[side=left]:data-closed:slide-out-to-left-10 data-[side=left]:data-open:slide-in-from-left-10 data-[side=top]:data-closed:slide-out-to-top-10 data-[side=top]:data-open:slide-in-from-top-10 data-closed:fade-out-0 data-open:fade-in-0 data-[side=bottom]:data-closed:slide-out-to-bottom-10 data-[side=bottom]:data-open:slide-in-from-bottom-10 fixed z-50 flex flex-col gap-4 bg-clip-padding text-sm shadow-lg transition duration-200 ease-in-out data-[side=bottom]:inset-x-0 data-[side=bottom]:bottom-0 data-[side=bottom]:h-auto data-[side=bottom]:border-t data-[side=left]:inset-y-0 data-[side=left]:left-0 data-[side=left]:h-full data-[side=left]:w-3/4 data-[side=left]:border-r data-[side=right]:inset-y-0 data-[side=right]:right-0 data-[side=right]:h-full data-[side=right]:w-3/4 data-[side=right]:border-l data-[side=top]:inset-x-0 data-[side=top]:top-0 data-[side=top]:h-auto data-[side=top]:border-b data-[side=left]:sm:max-w-sm data-[side=right]:sm:max-w-sm",
          className,
        )}
        {...props}
      >
        {children}
        {showCloseButton && (
          <SheetPrimitive.Close data-slot="sheet-close" asChild>
            <Button
              variant="ghost"
              className="absolute top-3 right-3"
              size="icon-sm"
            >
              <XIcon />
              <span className="sr-only">Close</span>
            </Button>
          </SheetPrimitive.Close>
        )}
      </SheetPrimitive.Content>
    </SheetPortal>
  );
}

function SheetHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sheet-header"
      className={cn("gap-0.5 p-4 flex flex-col", className)}
      {...props}
    />
  );
}

function SheetFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sheet-footer"
      className={cn("gap-2 p-4 mt-auto flex flex-col", className)}
      {...props}
    />
  );
}

function SheetTitle({
  className,
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Title>) {
  return (
    <SheetPrimitive.Title
      data-slot="sheet-title"
      className={cn("text-foreground text-base font-medium", className)}
      {...props}
    />
  );
}

function SheetDescription({
  className,
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Description>) {
  return (
    <SheetPrimitive.Description
      data-slot="sheet-description"
      className={cn("text-muted-foreground text-sm", className)}
      {...props}
    />
  );
}

export {
  Sheet,
  SheetTrigger,
  SheetClose,
  SheetContent,
  SheetHeader,
  SheetFooter,
  SheetTitle,
  SheetDescription,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/sidebar.tsx
================================================
"use client";

import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";
import { Slot } from "radix-ui";

import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Separator } from "@/components/ui/separator";
import {
  Sheet,
  SheetContent,
  SheetDescription,
  SheetHeader,
  SheetTitle,
} from "@/components/ui/sheet";
import { Skeleton } from "@/components/ui/skeleton";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { useIsMobile } from "@/hooks/use-mobile";
import { PanelLeftIcon } from "lucide-react";

const SIDEBAR_COOKIE_NAME = "sidebar_state";
const SIDEBAR_COOKIE_MAX_AGE = 60 * 60 * 24 * 7;
const SIDEBAR_WIDTH = "16rem";
const SIDEBAR_WIDTH_MOBILE = "18rem";
const SIDEBAR_WIDTH_ICON = "3rem";
const SIDEBAR_KEYBOARD_SHORTCUT = "b";

type SidebarContextProps = {
  state: "expanded" | "collapsed";
  open: boolean;
  setOpen: (open: boolean) => void;
  openMobile: boolean;
  setOpenMobile: (open: boolean) => void;
  isMobile: boolean;
  toggleSidebar: () => void;
};

const SidebarContext = React.createContext<SidebarContextProps | null>(null);

function useSidebar() {
  const context = React.useContext(SidebarContext);
  if (!context) {
    throw new Error("useSidebar must be used within a SidebarProvider.");
  }

  return context;
}

function SidebarProvider({
  defaultOpen = true,
  open: openProp,
  onOpenChange: setOpenProp,
  className,
  style,
  children,
  ...props
}: React.ComponentProps<"div"> & {
  defaultOpen?: boolean;
  open?: boolean;
  onOpenChange?: (open: boolean) => void;
}) {
  const isMobile = useIsMobile();
  const [openMobile, setOpenMobile] = React.useState(false);

  // This is the internal state of the sidebar.
  // We use openProp and setOpenProp for control from outside the component.
  const [_open, _setOpen] = React.useState(defaultOpen);
  const open = openProp ?? _open;
  const setOpen = React.useCallback(
    (value: boolean | ((value: boolean) => boolean)) => {
      const openState = typeof value === "function" ? value(open) : value;
      if (setOpenProp) {
        setOpenProp(openState);
      } else {
        _setOpen(openState);
      }

      // This sets the cookie to keep the sidebar state.
      document.cookie = `${SIDEBAR_COOKIE_NAME}=${openState}; path=/; max-age=${SIDEBAR_COOKIE_MAX_AGE}`;
    },
    [setOpenProp, open],
  );

  // Helper to toggle the sidebar.
  const toggleSidebar = React.useCallback(() => {
    return isMobile ? setOpenMobile((open) => !open) : setOpen((open) => !open);
  }, [isMobile, setOpen, setOpenMobile]);

  // Adds a keyboard shortcut to toggle the sidebar.
  React.useEffect(() => {
    const handleKeyDown = (event: KeyboardEvent) => {
      if (
        event.key === SIDEBAR_KEYBOARD_SHORTCUT &&
        (event.metaKey || event.ctrlKey)
      ) {
        event.preventDefault();
        toggleSidebar();
      }
    };

    window.addEventListener("keydown", handleKeyDown);
    return () => window.removeEventListener("keydown", handleKeyDown);
  }, [toggleSidebar]);

  // We add a state so that we can do data-state="expanded" or "collapsed".
  // This makes it easier to style the sidebar with Tailwind classes.
  const state = open ? "expanded" : "collapsed";

  const contextValue = React.useMemo<SidebarContextProps>(
    () => ({
      state,
      open,
      setOpen,
      isMobile,
      openMobile,
      setOpenMobile,
      toggleSidebar,
    }),
    [state, open, setOpen, isMobile, openMobile, setOpenMobile, toggleSidebar],
  );

  return (
    <SidebarContext.Provider value={contextValue}>
      <div
        data-slot="sidebar-wrapper"
        style={
          {
            "--sidebar-width": SIDEBAR_WIDTH,
            "--sidebar-width-icon": SIDEBAR_WIDTH_ICON,
            ...style,
          } as React.CSSProperties
        }
        className={cn(
          "group/sidebar-wrapper has-data-[variant=inset]:bg-sidebar flex min-h-svh w-full",
          className,
        )}
        {...props}
      >
        {children}
      </div>
    </SidebarContext.Provider>
  );
}

function Sidebar({
  side = "left",
  variant = "sidebar",
  collapsible = "offExamples",
  className,
  children,
  ...props
}: React.ComponentProps<"div"> & {
  side?: "left" | "right";
  variant?: "sidebar" | "floating" | "inset";
  collapsible?: "offExamples" | "icon" | "none";
}) {
  const { isMobile, state, openMobile, setOpenMobile } = useSidebar();

  if (collapsible === "none") {
    return (
      <div
        data-slot="sidebar"
        className={cn(
          "bg-sidebar text-sidebar-foreground flex h-full w-(--sidebar-width) flex-col",
          className,
        )}
        {...props}
      >
        {children}
      </div>
    );
  }

  if (isMobile) {
    return (
      <Sheet open={openMobile} onOpenChange={setOpenMobile} {...props}>
        <SheetContent
          data-sidebar="sidebar"
          data-slot="sidebar"
          data-mobile="true"
          className="bg-sidebar text-sidebar-foreground w-(--sidebar-width) p-0 [&>button]:hidden"
          style={
            {
              "--sidebar-width": SIDEBAR_WIDTH_MOBILE,
            } as React.CSSProperties
          }
          side={side}
        >
          <SheetHeader className="sr-only">
            <SheetTitle>Sidebar</SheetTitle>
            <SheetDescription>Displays the mobile sidebar.</SheetDescription>
          </SheetHeader>
          <div className="flex h-full w-full flex-col">{children}</div>
        </SheetContent>
      </Sheet>
    );
  }

  return (
    <div
      className="group peer text-sidebar-foreground hidden md:block"
      data-state={state}
      data-collapsible={state === "collapsed" ? collapsible : ""}
      data-variant={variant}
      data-side={side}
      data-slot="sidebar"
    >
      {/* This is what handles the sidebar gap on desktop */}
      <div
        data-slot="sidebar-gap"
        className={cn(
          "transition-[width] duration-200 ease-linear relative w-(--sidebar-width) bg-transparent",
          "group-data-[collapsible=offExamples]:w-0",
          "group-data-[side=right]:rotate-180",
          variant === "floating" || variant === "inset"
            ? "group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4)))]"
            : "group-data-[collapsible=icon]:w-(--sidebar-width-icon)",
        )}
      />
      <div
        data-slot="sidebar-container"
        className={cn(
          "fixed inset-y-0 z-10 hidden h-svh w-(--sidebar-width) transition-[left,right,width] duration-200 ease-linear md:flex",
          side === "left"
            ? "left-0 group-data-[collapsible=offExamples]:left-[calc(var(--sidebar-width)*-1)]"
            : "right-0 group-data-[collapsible=offExamples]:right-[calc(var(--sidebar-width)*-1)]",
          // Adjust the padding for floating and inset variants.
          variant === "floating" || variant === "inset"
            ? "p-2 group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4))+2px)]"
            : "group-data-[collapsible=icon]:w-(--sidebar-width-icon) group-data-[side=left]:border-r group-data-[side=right]:border-l",
          className,
        )}
        {...props}
      >
        <div
          data-sidebar="sidebar"
          data-slot="sidebar-inner"
          className="bg-sidebar group-data-[variant=floating]:ring-sidebar-border group-data-[variant=floating]:rounded-lg group-data-[variant=floating]:shadow-sm group-data-[variant=floating]:ring-1 flex size-full flex-col"
        >
          {children}
        </div>
      </div>
    </div>
  );
}

function SidebarTrigger({
  className,
  onClick,
  ...props
}: React.ComponentProps<typeof Button>) {
  const { toggleSidebar } = useSidebar();

  return (
    <Button
      data-sidebar="trigger"
      data-slot="sidebar-trigger"
      variant="ghost"
      size="icon-sm"
      className={cn(className)}
      onClick={(event) => {
        onClick?.(event);
        toggleSidebar();
      }}
      {...props}
    >
      <PanelLeftIcon />
      <span className="sr-only">Toggle Sidebar</span>
    </Button>
  );
}

function SidebarRail({ className, ...props }: React.ComponentProps<"button">) {
  const { toggleSidebar } = useSidebar();

  return (
    <button
      data-sidebar="rail"
      data-slot="sidebar-rail"
      aria-label="Toggle Sidebar"
      tabIndex={-1}
      onClick={toggleSidebar}
      title="Toggle Sidebar"
      className={cn(
        "hover:after:bg-sidebar-border absolute inset-y-0 z-20 hidden w-4 -translate-x-1/2 transition-all ease-linear group-data-[side=left]:-right-4 group-data-[side=right]:left-0 after:absolute after:inset-y-0 after:left-1/2 after:w-[2px] sm:flex",
        "in-data-[side=left]:cursor-w-resize in-data-[side=right]:cursor-e-resize",
        "[[data-side=left][data-state=collapsed]_&]:cursor-e-resize [[data-side=right][data-state=collapsed]_&]:cursor-w-resize",
        "hover:group-data-[collapsible=offExamples]:bg-sidebar group-data-[collapsible=offExamples]:translate-x-0 group-data-[collapsible=offExamples]:after:left-full",
        "[[data-side=left][data-collapsible=offExamples]_&]:-right-2",
        "[[data-side=right][data-collapsible=offExamples]_&]:-left-2",
        className,
      )}
      {...props}
    />
  );
}

function SidebarInset({ className, ...props }: React.ComponentProps<"main">) {
  return (
    <main
      data-slot="sidebar-inset"
      className={cn(
        "bg-background md:peer-data-[variant=inset]:m-2 md:peer-data-[variant=inset]:ml-0 md:peer-data-[variant=inset]:rounded-xl md:peer-data-[variant=inset]:shadow-sm md:peer-data-[variant=inset]:peer-data-[state=collapsed]:ml-2 relative flex w-full flex-1 flex-col",
        className,
      )}
      {...props}
    />
  );
}

function SidebarInput({
  className,
  ...props
}: React.ComponentProps<typeof Input>) {
  return (
    <Input
      data-slot="sidebar-input"
      data-sidebar="input"
      className={cn("bg-background h-8 w-full shadow-none", className)}
      {...props}
    />
  );
}

function SidebarHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-header"
      data-sidebar="header"
      className={cn("gap-2 p-2 flex flex-col", className)}
      {...props}
    />
  );
}

function SidebarFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-footer"
      data-sidebar="footer"
      className={cn("gap-2 p-2 flex flex-col", className)}
      {...props}
    />
  );
}

function SidebarSeparator({
  className,
  ...props
}: React.ComponentProps<typeof Separator>) {
  return (
    <Separator
      data-slot="sidebar-separator"
      data-sidebar="separator"
      className={cn("bg-sidebar-border mx-2 w-auto", className)}
      {...props}
    />
  );
}

function SidebarContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-content"
      data-sidebar="content"
      className={cn(
        "no-scrollbar gap-0 flex min-h-0 flex-1 flex-col overflow-auto group-data-[collapsible=icon]:overflow-hidden",
        className,
      )}
      {...props}
    />
  );
}

function SidebarGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-group"
      data-sidebar="group"
      className={cn("p-2 relative flex w-full min-w-0 flex-col", className)}
      {...props}
    />
  );
}

function SidebarGroupLabel({
  className,
  asChild = false,
  ...props
}: React.ComponentProps<"div"> & { asChild?: boolean }) {
  const Comp = asChild ? Slot.Root : "div";

  return (
    <Comp
      data-slot="sidebar-group-label"
      data-sidebar="group-label"
      className={cn(
        "text-sidebar-foreground/70 ring-sidebar-ring h-8 rounded-md px-2 text-xs font-medium transition-[margin,opacity] duration-200 ease-linear group-data-[collapsible=icon]:-mt-8 group-data-[collapsible=icon]:opacity-0 focus-visible:ring-2 [&>svg]:size-4 flex shrink-0 items-center outline-hidden [&>svg]:shrink-0",
        className,
      )}
      {...props}
    />
  );
}

function SidebarGroupAction({
  className,
  asChild = false,
  ...props
}: React.ComponentProps<"button"> & { asChild?: boolean }) {
  const Comp = asChild ? Slot.Root : "button";

  return (
    <Comp
      data-slot="sidebar-group-action"
      data-sidebar="group-action"
      className={cn(
        "text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground absolute top-3.5 right-3 w-5 rounded-md p-0 focus-visible:ring-2 [&>svg]:size-4 flex aspect-square items-center justify-center outline-hidden transition-transform group-data-[collapsible=icon]:hidden after:absolute after:-inset-2 md:after:hidden [&>svg]:shrink-0",
        className,
      )}
      {...props}
    />
  );
}

function SidebarGroupContent({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-group-content"
      data-sidebar="group-content"
      className={cn("text-sm w-full", className)}
      {...props}
    />
  );
}

function SidebarMenu({ className, ...props }: React.ComponentProps<"ul">) {
  return (
    <ul
      data-slot="sidebar-menu"
      data-sidebar="menu"
      className={cn("gap-0 flex w-full min-w-0 flex-col", className)}
      {...props}
    />
  );
}

function SidebarMenuItem({ className, ...props }: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="sidebar-menu-item"
      data-sidebar="menu-item"
      className={cn("group/menu-item relative", className)}
      {...props}
    />
  );
}

const sidebarMenuButtonVariants = cva(
  "ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground active:bg-sidebar-accent active:text-sidebar-accent-foreground data-active:bg-sidebar-accent data-active:text-sidebar-accent-foreground data-open:hover:bg-sidebar-accent data-open:hover:text-sidebar-accent-foreground gap-2 rounded-md p-2 text-left text-sm transition-[width,height,padding] group-has-data-[sidebar=menu-action]/menu-item:pr-8 group-data-[collapsible=icon]:size-8! group-data-[collapsible=icon]:p-2! focus-visible:ring-2 data-active:font-medium peer/menu-button flex w-full items-center overflow-hidden outline-hidden disabled:pointer-events-none disabled:opacity-50 aria-disabled:pointer-events-none aria-disabled:opacity-50 [&>span:last-child]:truncate [&_svg]:size-4 [&_svg]:shrink-0",
  {
    variants: {
      variant: {
        default: "hover:bg-sidebar-accent hover:text-sidebar-accent-foreground",
        outline:
          "bg-background hover:bg-sidebar-accent hover:text-sidebar-accent-foreground shadow-[0_0_0_1px_hsl(var(--sidebar-border))] hover:shadow-[0_0_0_1px_hsl(var(--sidebar-accent))]",
      },
      size: {
        default: "h-8 text-sm",
        sm: "h-7 text-xs",
        lg: "h-12 text-sm group-data-[collapsible=icon]:p-0!",
      },
    },
    defaultVariants: {
      variant: "default",
      size: "default",
    },
  },
);

function SidebarMenuButton({
  asChild = false,
  isActive = false,
  variant = "default",
  size = "default",
  tooltip,
  className,
  ...props
}: React.ComponentProps<"button"> & {
  asChild?: boolean;
  isActive?: boolean;
  tooltip?: string | React.ComponentProps<typeof TooltipContent>;
} & VariantProps<typeof sidebarMenuButtonVariants>) {
  const Comp = asChild ? Slot.Root : "button";
  const { isMobile, state } = useSidebar();

  const button = (
    <Comp
      data-slot="sidebar-menu-button"
      data-sidebar="menu-button"
      data-size={size}
      data-active={isActive}
      className={cn(sidebarMenuButtonVariants({ variant, size }), className)}
      {...props}
    />
  );

  if (!tooltip) {
    return button;
  }

  if (typeof tooltip === "string") {
    tooltip = {
      children: tooltip,
    };
  }

  return (
    <Tooltip>
      <TooltipTrigger asChild>{button}</TooltipTrigger>
      <TooltipContent
        side="right"
        align="center"
        hidden={state !== "collapsed" || isMobile}
        {...tooltip}
      />
    </Tooltip>
  );
}

function SidebarMenuAction({
  className,
  asChild = false,
  showOnHover = false,
  ...props
}: React.ComponentProps<"button"> & {
  asChild?: boolean;
  showOnHover?: boolean;
}) {
  const Comp = asChild ? Slot.Root : "button";

  return (
    <Comp
      data-slot="sidebar-menu-action"
      data-sidebar="menu-action"
      className={cn(
        "text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground peer-hover/menu-button:text-sidebar-accent-foreground absolute top-1.5 right-1 aspect-square w-5 rounded-md p-0 peer-data-[size=default]/menu-button:top-1.5 peer-data-[size=lg]/menu-button:top-2.5 peer-data-[size=sm]/menu-button:top-1 focus-visible:ring-2 [&>svg]:size-4 flex items-center justify-center outline-hidden transition-transform group-data-[collapsible=icon]:hidden after:absolute after:-inset-2 md:after:hidden [&>svg]:shrink-0",
        showOnHover &&
          "peer-data-active/menu-button:text-sidebar-accent-foreground group-focus-within/menu-item:opacity-100 group-hover/menu-item:opacity-100 data-open:opacity-100 md:opacity-0",
        className,
      )}
      {...props}
    />
  );
}

function SidebarMenuBadge({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-menu-badge"
      data-sidebar="menu-badge"
      className={cn(
        "text-sidebar-foreground peer-hover/menu-button:text-sidebar-accent-foreground peer-data-active/menu-button:text-sidebar-accent-foreground pointer-events-none absolute right-1 flex h-5 min-w-5 rounded-md px-1 text-xs font-medium peer-data-[size=default]/menu-button:top-1.5 peer-data-[size=lg]/menu-button:top-2.5 peer-data-[size=sm]/menu-button:top-1 flex items-center justify-center tabular-nums select-none group-data-[collapsible=icon]:hidden",
        className,
      )}
      {...props}
    />
  );
}

function SidebarMenuSkeleton({
  className,
  showIcon = false,
  ...props
}: React.ComponentProps<"div"> & {
  showIcon?: boolean;
}) {
  // Random width between 50 to 90%.
  const [width] = React.useState(() => {
    return `${Math.floor(Math.random() * 40) + 50}%`;
  });

  return (
    <div
      data-slot="sidebar-menu-skeleton"
      data-sidebar="menu-skeleton"
      className={cn("h-8 gap-2 rounded-md px-2 flex items-center", className)}
      {...props}
    >
      {showIcon && (
        <Skeleton
          className="size-4 rounded-md"
          data-sidebar="menu-skeleton-icon"
        />
      )}
      <Skeleton
        className="h-4 max-w-(--skeleton-width) flex-1"
        data-sidebar="menu-skeleton-text"
        style={
          {
            "--skeleton-width": width,
          } as React.CSSProperties
        }
      />
    </div>
  );
}

function SidebarMenuSub({ className, ...props }: React.ComponentProps<"ul">) {
  return (
    <ul
      data-slot="sidebar-menu-sub"
      data-sidebar="menu-sub"
      className={cn(
        "border-sidebar-border mx-3.5 translate-x-px gap-1 border-l px-2.5 py-0.5 group-data-[collapsible=icon]:hidden flex min-w-0 flex-col",
        className,
      )}
      {...props}
    />
  );
}

function SidebarMenuSubItem({
  className,
  ...props
}: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="sidebar-menu-sub-item"
      data-sidebar="menu-sub-item"
      className={cn("group/menu-sub-item relative", className)}
      {...props}
    />
  );
}

function SidebarMenuSubButton({
  asChild = false,
  size = "md",
  isActive = false,
  className,
  ...props
}: React.ComponentProps<"a"> & {
  asChild?: boolean;
  size?: "sm" | "md";
  isActive?: boolean;
}) {
  const Comp = asChild ? Slot.Root : "a";

  return (
    <Comp
      data-slot="sidebar-menu-sub-button"
      data-sidebar="menu-sub-button"
      data-size={size}
      data-active={isActive}
      className={cn(
        "text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground active:bg-sidebar-accent active:text-sidebar-accent-foreground [&>svg]:text-sidebar-accent-foreground data-active:bg-sidebar-accent data-active:text-sidebar-accent-foreground h-7 gap-2 rounded-md px-2 focus-visible:ring-2 data-[size=md]:text-sm data-[size=sm]:text-xs [&>svg]:size-4 flex min-w-0 -translate-x-px items-center overflow-hidden outline-hidden group-data-[collapsible=icon]:hidden disabled:pointer-events-none disabled:opacity-50 aria-disabled:pointer-events-none aria-disabled:opacity-50 [&>span:last-child]:truncate [&>svg]:shrink-0",
        className,
      )}
      {...props}
    />
  );
}

export {
  Sidebar,
  SidebarContent,
  SidebarFooter,
  SidebarGroup,
  SidebarGroupAction,
  SidebarGroupContent,
  SidebarGroupLabel,
  SidebarHeader,
  SidebarInput,
  SidebarInset,
  SidebarMenu,
  SidebarMenuAction,
  SidebarMenuBadge,
  SidebarMenuButton,
  SidebarMenuItem,
  SidebarMenuSkeleton,
  SidebarMenuSub,
  SidebarMenuSubButton,
  SidebarMenuSubItem,
  SidebarProvider,
  SidebarRail,
  SidebarSeparator,
  SidebarTrigger,
  useSidebar,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/skeleton.tsx
================================================
import { cn } from "@/lib/utils";

function Skeleton({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="skeleton"
      className={cn("bg-muted rounded-md animate-pulse", className)}
      {...props}
    />
  );
}

export { Skeleton };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/slider.tsx
================================================
"use client";

import * as React from "react";
import { Slider as SliderPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function Slider({
  className,
  defaultValue,
  value,
  min = 0,
  max = 100,
  ...props
}: React.ComponentProps<typeof SliderPrimitive.Root>) {
  const _values = React.useMemo(
    () =>
      Array.isArray(value)
        ? value
        : Array.isArray(defaultValue)
          ? defaultValue
          : [min, max],
    [value, defaultValue, min, max],
  );

  return (
    <SliderPrimitive.Root
      data-slot="slider"
      defaultValue={defaultValue}
      value={value}
      min={min}
      max={max}
      className={cn(
        "data-vertical:min-h-40 relative flex w-full touch-none items-center select-none data-disabled:opacity-50 data-vertical:h-full data-vertical:w-auto data-vertical:flex-col",
        className,
      )}
      {...props}
    >
      <SliderPrimitive.Track
        data-slot="slider-track"
        className="bg-muted rounded-full data-horizontal:h-1 data-horizontal:w-full data-vertical:h-full data-vertical:w-1 bg-muted relative grow overflow-hidden data-horizontal:w-full data-vertical:h-full"
      >
        <SliderPrimitive.Range
          data-slot="slider-range"
          className="bg-primary absolute select-none data-horizontal:h-full data-vertical:w-full"
        />
      </SliderPrimitive.Track>
      {Array.from({ length: _values.length }, (_, index) => (
        <SliderPrimitive.Thumb
          data-slot="slider-thumb"
          key={index}
          className="border-ring ring-ring/50 relative size-3 rounded-full border bg-white transition-[color,box-shadow] after:absolute after:-inset-2 hover:ring-[3px] focus-visible:ring-[3px] focus-visible:outline-hidden active:ring-[3px] block shrink-0 select-none disabled:pointer-events-none disabled:opacity-50"
        />
      ))}
    </SliderPrimitive.Root>
  );
}

export { Slider };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/sonner.tsx
================================================
"use client";

import { useTheme } from "next-themes";
import { Toaster as Sonner, type ToasterProps } from "sonner";
import {
  CircleCheckIcon,
  InfoIcon,
  TriangleAlertIcon,
  OctagonXIcon,
  Loader2Icon,
} from "lucide-react";

const Toaster = ({ ...props }: ToasterProps) => {
  const { theme = "system" } = useTheme();

  return (
    <Sonner
      theme={theme as ToasterProps["theme"]}
      className="toaster group"
      icons={{
        success: <CircleCheckIcon className="size-4" />,
        info: <InfoIcon className="size-4" />,
        warning: <TriangleAlertIcon className="size-4" />,
        error: <OctagonXIcon className="size-4" />,
        loading: <Loader2Icon className="size-4 animate-spin" />,
      }}
      style={
        {
          "--normal-bg": "var(--popover)",
          "--normal-text": "var(--popover-foreground)",
          "--normal-border": "var(--border)",
          "--border-radius": "var(--radius)",
        } as React.CSSProperties
      }
      toastOptions={{
        classNames: {
          toast: "cn-toast",
        },
      }}
      {...props}
    />
  );
};

export { Toaster };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/spinner.tsx
================================================
import { cn } from "@/lib/utils";
import { Loader2Icon } from "lucide-react";

function Spinner({ className, ...props }: React.ComponentProps<"svg">) {
  return (
    <Loader2Icon
      role="status"
      aria-label="Loading"
      className={cn("size-4 animate-spin", className)}
      {...props}
    />
  );
}

export { Spinner };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/switch.tsx
================================================
"use client";

import * as React from "react";
import { Switch as SwitchPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function Switch({
  className,
  size = "default",
  ...props
}: React.ComponentProps<typeof SwitchPrimitive.Root> & {
  size?: "sm" | "default";
}) {
  return (
    <SwitchPrimitive.Root
      data-slot="switch"
      data-size={size}
      className={cn(
        "data-checked:bg-primary data-unchecked:bg-input focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 dark:data-unchecked:bg-input/80 shrink-0 rounded-full border border-transparent focus-visible:ring-[3px] aria-invalid:ring-[3px] data-[size=default]:h-[18.4px] data-[size=default]:w-[32px] data-[size=sm]:h-[14px] data-[size=sm]:w-[24px] peer group/switch relative inline-flex items-center transition-all outline-none after:absolute after:-inset-x-3 after:-inset-y-2 data-disabled:cursor-not-allowed data-disabled:opacity-50",
        className,
      )}
      {...props}
    >
      <SwitchPrimitive.Thumb
        data-slot="switch-thumb"
        className="bg-background dark:data-unchecked:bg-foreground dark:data-checked:bg-primary-foreground rounded-full group-data-[size=default]/switch:size-4 group-data-[size=sm]/switch:size-3 group-data-[size=default]/switch:data-checked:translate-x-[calc(100%-2px)] group-data-[size=sm]/switch:data-checked:translate-x-[calc(100%-2px)] group-data-[size=default]/switch:data-unchecked:translate-x-0 group-data-[size=sm]/switch:data-unchecked:translate-x-0 pointer-events-none block ring-0 transition-transform"
      />
    </SwitchPrimitive.Root>
  );
}

export { Switch };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/table.tsx
================================================
"use client";

import * as React from "react";

import { cn } from "@/lib/utils";

function Table({ className, ...props }: React.ComponentProps<"table">) {
  return (
    <div
      data-slot="table-container"
      className="relative w-full overflow-x-auto"
    >
      <table
        data-slot="table"
        className={cn("w-full caption-bottom text-sm", className)}
        {...props}
      />
    </div>
  );
}

function TableHeader({ className, ...props }: React.ComponentProps<"thead">) {
  return (
    <thead
      data-slot="table-header"
      className={cn("[&_tr]:border-b", className)}
      {...props}
    />
  );
}

function TableBody({ className, ...props }: React.ComponentProps<"tbody">) {
  return (
    <tbody
      data-slot="table-body"
      className={cn("[&_tr:last-child]:border-0", className)}
      {...props}
    />
  );
}

function TableFooter({ className, ...props }: React.ComponentProps<"tfoot">) {
  return (
    <tfoot
      data-slot="table-footer"
      className={cn(
        "bg-muted/50 border-t font-medium [&>tr]:last:border-b-0",
        className,
      )}
      {...props}
    />
  );
}

function TableRow({ className, ...props }: React.ComponentProps<"tr">) {
  return (
    <tr
      data-slot="table-row"
      className={cn(
        "hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors",
        className,
      )}
      {...props}
    />
  );
}

function TableHead({ className, ...props }: React.ComponentProps<"th">) {
  return (
    <th
      data-slot="table-head"
      className={cn(
        "text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&:has([role=checkbox])]:pr-0",
        className,
      )}
      {...props}
    />
  );
}

function TableCell({ className, ...props }: React.ComponentProps<"td">) {
  return (
    <td
      data-slot="table-cell"
      className={cn(
        "p-2 align-middle whitespace-nowrap [&:has([role=checkbox])]:pr-0",
        className,
      )}
      {...props}
    />
  );
}

function TableCaption({
  className,
  ...props
}: React.ComponentProps<"caption">) {
  return (
    <caption
      data-slot="table-caption"
      className={cn("text-muted-foreground mt-4 text-sm", className)}
      {...props}
    />
  );
}

export {
  Table,
  TableHeader,
  TableBody,
  TableFooter,
  TableHead,
  TableRow,
  TableCell,
  TableCaption,
};


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/tabs.tsx
================================================
"use client";

import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";
import { Tabs as TabsPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function Tabs({
  className,
  orientation = "horizontal",
  ...props
}: React.ComponentProps<typeof TabsPrimitive.Root>) {
  return (
    <TabsPrimitive.Root
      data-slot="tabs"
      data-orientation={orientation}
      className={cn(
        "gap-2 group/tabs flex data-[orientation=horizontal]:flex-col",
        className,
      )}
      {...props}
    />
  );
}

const tabsListVariants = cva(
  "rounded-lg p-[3px] group-data-horizontal/tabs:h-8 data-[variant=line]:rounded-none group/tabs-list text-muted-foreground inline-flex w-fit items-center justify-center group-data-[orientation=vertical]/tabs:h-fit group-data-[orientation=vertical]/tabs:flex-col",
  {
    variants: {
      variant: {
        default: "bg-muted",
        line: "gap-1 bg-transparent",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
);

function TabsList({
  className,
  variant = "default",
  ...props
}: React.ComponentProps<typeof TabsPrimitive.List> &
  VariantProps<typeof tabsListVariants>) {
  return (
    <TabsPrimitive.List
      data-slot="tabs-list"
      data-variant={variant}
      className={cn(tabsListVariants({ variant }), className)}
      {...props}
    />
  );
}

function TabsTrigger({
  className,
  ...props
}: React.ComponentProps<typeof TabsPrimitive.Trigger>) {
  return (
    <TabsPrimitive.Trigger
      data-slot="tabs-trigger"
      className={cn(
        "gap-1.5 rounded-md border border-transparent px-1.5 py-0.5 text-sm font-medium group-data-[variant=default]/tabs-list:data-active:shadow-sm group-data-[variant=line]/tabs-list:data-active:shadow-none [&_svg:not([class*='size-'])]:size-4 focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:outline-ring text-foreground/60 hover:text-foreground dark:text-muted-foreground dark:hover:text-foreground relative inline-flex h-[calc(100%-1px)] flex-1 items-center justify-center whitespace-nowrap transition-all group-data-[orientation=vertical]/tabs:w-full group-data-[orientation=vertical]/tabs:justify-start focus-visible:ring-[3px] focus-visible:outline-1 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        "group-data-[variant=line]/tabs-list:bg-transparent group-data-[variant=line]/tabs-list:data-active:bg-transparent dark:group-data-[variant=line]/tabs-list:data-active:border-transparent dark:group-data-[variant=line]/tabs-list:data-active:bg-transparent",
        "data-active:bg-background dark:data-active:text-foreground dark:data-active:border-input dark:data-active:bg-input/30 data-active:text-foreground",
        "after:bg-foreground after:absolute after:opacity-0 after:transition-opacity group-data-[orientation=horizontal]/tabs:after:inset-x-0 group-data-[orientation=horizontal]/tabs:after:bottom-[-5px] group-data-[orientation=horizontal]/tabs:after:h-0.5 group-data-[orientation=vertical]/tabs:after:inset-y-0 group-data-[orientation=vertical]/tabs:after:-right-1 group-data-[orientation=vertical]/tabs:after:w-0.5 group-data-[variant=line]/tabs-list:data-active:after:opacity-100",
        className,
      )}
      {...props}
    />
  );
}

function TabsContent({
  className,
  ...props
}: React.ComponentProps<typeof TabsPrimitive.Content>) {
  return (
    <TabsPrimitive.Content
      data-slot="tabs-content"
      className={cn("text-sm flex-1 outline-none", className)}
      {...props}
    />
  );
}

export { Tabs, TabsList, TabsTrigger, TabsContent, tabsListVariants };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/textarea.tsx
================================================
import * as React from "react";

import { cn } from "@/lib/utils";

function Textarea({ className, ...props }: React.ComponentProps<"textarea">) {
  return (
    <textarea
      data-slot="textarea"
      className={cn(
        "border-input dark:bg-input/30 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 disabled:bg-input/50 dark:disabled:bg-input/80 rounded-lg border bg-transparent px-2.5 py-2 text-base transition-colors focus-visible:ring-[3px] aria-invalid:ring-[3px] md:text-sm placeholder:text-muted-foreground flex field-sizing-content min-h-16 w-full outline-none disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    />
  );
}

export { Textarea };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/toggle-group.tsx
================================================
"use client";

import * as React from "react";
import { type VariantProps } from "class-variance-authority";
import { ToggleGroup as ToggleGroupPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";
import { toggleVariants } from "@/components/ui/toggle";

const ToggleGroupContext = React.createContext<
  VariantProps<typeof toggleVariants> & {
    spacing?: number;
    orientation?: "horizontal" | "vertical";
  }
>({
  size: "default",
  variant: "default",
  spacing: 0,
  orientation: "horizontal",
});

function ToggleGroup({
  className,
  variant,
  size,
  spacing = 0,
  orientation = "horizontal",
  children,
  ...props
}: React.ComponentProps<typeof ToggleGroupPrimitive.Root> &
  VariantProps<typeof toggleVariants> & {
    spacing?: number;
    orientation?: "horizontal" | "vertical";
  }) {
  return (
    <ToggleGroupPrimitive.Root
      data-slot="toggle-group"
      data-variant={variant}
      data-size={size}
      data-spacing={spacing}
      data-orientation={orientation}
      style={{ "--gap": spacing } as React.CSSProperties}
      className={cn(
        "rounded-lg data-[size=sm]:rounded-[min(var(--radius-md),10px)] group/toggle-group flex w-fit flex-row items-center gap-[--spacing(var(--gap))] data-[orientation=vertical]:flex-col data-[orientation=vertical]:items-stretch",
        className,
      )}
      {...props}
    >
      <ToggleGroupContext.Provider
        value={{ variant, size, spacing, orientation }}
      >
        {children}
      </ToggleGroupContext.Provider>
    </ToggleGroupPrimitive.Root>
  );
}

function ToggleGroupItem({
  className,
  children,
  variant = "default",
  size = "default",
  ...props
}: React.ComponentProps<typeof ToggleGroupPrimitive.Item> &
  VariantProps<typeof toggleVariants>) {
  const context = React.useContext(ToggleGroupContext);

  return (
    <ToggleGroupPrimitive.Item
      data-slot="toggle-group-item"
      data-variant={context.variant || variant}
      data-size={context.size || size}
      data-spacing={context.spacing}
      className={cn(
        "group-data-[spacing=0]/toggle-group:rounded-none group-data-[spacing=0]/toggle-group:px-2 group-data-horizontal/toggle-group:data-[spacing=0]:first:rounded-l-lg group-data-vertical/toggle-group:data-[spacing=0]:first:rounded-t-lg group-data-horizontal/toggle-group:data-[spacing=0]:last:rounded-r-lg group-data-vertical/toggle-group:data-[spacing=0]:last:rounded-b-lg shrink-0 focus:z-10 focus-visible:z-10 group-data-horizontal/toggle-group:data-[spacing=0]:data-[variant=outline]:border-l-0 group-data-vertical/toggle-group:data-[spacing=0]:data-[variant=outline]:border-t-0 group-data-horizontal/toggle-group:data-[spacing=0]:data-[variant=outline]:first:border-l group-data-vertical/toggle-group:data-[spacing=0]:data-[variant=outline]:first:border-t",
        toggleVariants({
          variant: context.variant || variant,
          size: context.size || size,
        }),
        className,
      )}
      {...props}
    >
      {children}
    </ToggleGroupPrimitive.Item>
  );
}

export { ToggleGroup, ToggleGroupItem };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/toggle.tsx
================================================
"use client";

import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";
import { Toggle as TogglePrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

const toggleVariants = cva(
  "hover:text-foreground aria-pressed:bg-muted focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive data-[state=on]:bg-muted gap-1 rounded-lg text-sm font-medium transition-all [&_svg:not([class*='size-'])]:size-4 group/toggle hover:bg-muted inline-flex items-center justify-center whitespace-nowrap outline-none focus-visible:ring-[3px] disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
  {
    variants: {
      variant: {
        default: "bg-transparent",
        outline: "border-input hover:bg-muted border bg-transparent",
      },
      size: {
        default: "h-8 min-w-8 px-2",
        sm: "h-7 min-w-7 rounded-[min(var(--radius-md),12px)] px-1.5 text-[0.8rem]",
        lg: "h-9 min-w-9 px-2.5",
      },
    },
    defaultVariants: {
      variant: "default",
      size: "default",
    },
  },
);

function Toggle({
  className,
  variant = "default",
  size = "default",
  ...props
}: React.ComponentProps<typeof TogglePrimitive.Root> &
  VariantProps<typeof toggleVariants>) {
  return (
    <TogglePrimitive.Root
      data-slot="toggle"
      className={cn(toggleVariants({ variant, size, className }))}
      {...props}
    />
  );
}

export { Toggle, toggleVariants };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/tooltip.tsx
================================================
"use client";

import * as React from "react";
import { Tooltip as TooltipPrimitive } from "radix-ui";

import { cn } from "@/lib/utils";

function TooltipProvider({
  delayDuration = 0,
  ...props
}: React.ComponentProps<typeof TooltipPrimitive.Provider>) {
  return (
    <TooltipPrimitive.Provider
      data-slot="tooltip-provider"
      delayDuration={delayDuration}
      {...props}
    />
  );
}

function Tooltip({
  ...props
}: React.ComponentProps<typeof TooltipPrimitive.Root>) {
  return (
    <TooltipProvider>
      <TooltipPrimitive.Root data-slot="tooltip" {...props} />
    </TooltipProvider>
  );
}

function TooltipTrigger({
  ...props
}: React.ComponentProps<typeof TooltipPrimitive.Trigger>) {
  return <TooltipPrimitive.Trigger data-slot="tooltip-trigger" {...props} />;
}

function TooltipContent({
  className,
  sideOffset = 0,
  children,
  ...props
}: React.ComponentProps<typeof TooltipPrimitive.Content>) {
  return (
    <TooltipPrimitive.Portal>
      <TooltipPrimitive.Content
        data-slot="tooltip-content"
        sideOffset={sideOffset}
        className={cn(
          "data-open:animate-in data-open:fade-in-0 data-open:zoom-in-95 data-[state=delayed-open]:animate-in data-[state=delayed-open]:fade-in-0 data-[state=delayed-open]:zoom-in-95 data-closed:animate-out data-closed:fade-out-0 data-closed:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 rounded-md px-3 py-1.5 text-xs bg-foreground text-background z-50 w-fit max-w-xs origin-(--radix-tooltip-content-transform-origin)",
          className,
        )}
        {...props}
      >
        {children}
        <TooltipPrimitive.Arrow className="size-2.5 translate-y-[calc(-50%_-_2px)] rotate-45 rounded-[2px] bg-foreground fill-foreground z-50 translate-y-[calc(-50%_-_2px)]" />
      </TooltipPrimitive.Content>
    </TooltipPrimitive.Portal>
  );
}

export { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger };


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components.json
================================================
{
  "$schema": "https://ui.shadcn.com/schema.json",
  "style": "radix-nova",
  "rsc": true,
  "tsx": true,
  "tailwind": {
    "config": "",
    "css": "app/globals.css",
    "baseColor": "neutral",
    "cssVariables": true,
    "prefix": ""
  },
  "iconLibrary": "lucide",
  "aliases": {
    "components": "@/components",
    "utils": "@/lib/utils",
    "ui": "@/components/ui",
    "lib": "@/lib",
    "hooks": "@/hooks"
  },
  "menuColor": "default",
  "menuAccent": "subtle",
  "registries": {}
}


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/eslint.config.mjs
================================================
import { defineConfig, globalIgnores } from "eslint/config";
import nextVitals from "eslint-config-next/core-web-vitals";
import nextTs from "eslint-config-next/typescript";

const eslintConfig = defineConfig([
  ...nextVitals,
  ...nextTs,
  // Override default ignores of eslint-config-next.
  globalIgnores([
    // Default ignores of eslint-config-next:
    ".next/**",
    "out/**",
    "build/**",
    "next-env.d.ts",
  ]),
]);

export default eslintConfig;


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/hooks/use-mobile.ts
================================================
import * as React from "react";

const MOBILE_BREAKPOINT = 768;

export function useIsMobile() {
  const [isMobile, setIsMobile] = React.useState<boolean | undefined>(
    undefined,
  );

  React.useEffect(() => {
    const mql = window.matchMedia(`(max-width: ${MOBILE_BREAKPOINT - 1}px)`);
    const onChange = () => {
      setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);
    };
    mql.addEventListener("change", onChange);
    setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);
    return () => mql.removeEventListener("change", onChange);
  }, []);

  return !!isMobile;
}


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/lib/utils.ts
================================================
import { clsx, type ClassValue } from "clsx";
import { twMerge } from "tailwind-merge";

export function cn(...inputs: ClassValue[]) {
  return twMerge(clsx(inputs));
}


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/next.config.ts
================================================
import type { NextConfig } from "next";

const nextConfig: NextConfig = {
  /* config options here */
};

export default nextConfig;


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package.json
================================================
{
  "name": "web",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "dev": "next dev",
    "build": "next build",
    "start": "next start",
    "lint": "eslint"
  },
  "dependencies": {
    "@base-ui/react": "^1.1.0",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "cmdk": "^1.1.1",
    "date-fns": "^4.1.0",
    "embla-carousel-react": "^8.6.0",
    "lucide-react": "^0.562.0",
    "next": "16.1.7",
    "next-themes": "^0.4.6",
    "radix-ui": "^1.4.3",
    "react": "19.2.3",
    "react-day-picker": "^9.13.0",
    "react-dom": "19.2.3",
    "react-resizable-panels": "^4.4.1",
    "recharts": "^2.15.4",
    "shadcn": "^3.7.0",
    "sonner": "^2.0.7",
    "tailwind-merge": "^3.4.0",
    "tw-animate-css": "^1.4.0",
    "vaul": "^1.1.2"
  },
  "devDependencies": {
    "@tailwindcss/postcss": "^4",
    "@types/node": "^20",
    "@types/react": "^19",
    "@types/react-dom": "^19",
    "eslint": "^9",
    "eslint-config-next": "16.1.4",
    "tailwindcss": "^4",
    "typescript": "^5"
  }
}


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/postcss.config.mjs
================================================
const config = {
  plugins: {
    "@tailwindcss/postcss": {},
  },
};

export default config;


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "ES2017",
    "lib": ["dom", "dom.iterable", "esnext"],
    "allowJs": true,
    "skipLibCheck": true,
    "strict": true,
    "noEmit": true,
    "esModuleInterop": true,
    "module": "esnext",
    "moduleResolution": "bundler",
    "resolveJsonModule": true,
    "isolatedModules": true,
    "jsx": "react-jsx",
    "incremental": true,
    "plugins": [
      {
        "name": "next"
      }
    ],
    "paths": {
      "@/*": ["./*"]
    }
  },
  "include": [
    "next-env.d.ts",
    "**/*.ts",
    "**/*.tsx",
    ".next/types/**/*.ts",
    ".next/dev/types/**/*.ts",
    "**/*.mts"
  ],
  "exclude": ["node_modules"]
}


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/docker/test-job.yaml
================================================
# Kubernetes Job to run sandbox integration tests
#
# This runs the test pod inside the cluster so it can access sandbox services.
#
# Usage:
#   kubectl apply -f test-job.yaml
#   kubectl logs -f job/sandbox-test -n onyx-sandboxes
#   kubectl delete job sandbox-test -n onyx-sandboxes

apiVersion: v1
kind: Pod
metadata:
  name: sandbox-test
  namespace: onyx-sandboxes
spec:
  serviceAccountName: sandbox-runner  # Needs permissions to create/delete pods
  containers:
  - name: test
    image: onyxdotapp/onyx-backend:latest
    imagePullPolicy: Never  # Use local image, don't try to pull from registry
    command: ["sleep", "infinity"]
    env:
    - name: SANDBOX_BACKEND
      value: "kubernetes"
    - name: SANDBOX_NAMESPACE
      value: "onyx-sandboxes"
    # Add any other required env vars (API keys, DB connection, etc.)
    # - name: OPENAI_API_KEY
    #   valueFrom:
    #     secretKeyRef:
    #       name: openai-secrets
    #       key: api-key
    resources:
      requests:
        cpu: "500m"
        memory: "512Mi"
      limits:
        cpu: "1000m"
        memory: "1Gi"


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/internal/__init__.py
================================================
"""Internal implementation details for Kubernetes sandbox management.

These modules are implementation details and should only be used by KubernetesSandboxManager.
"""

from onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client import (
    ACPEvent,
)

__all__ = [
    "ACPEvent",
]


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/internal/acp_exec_client.py
================================================
"""ACP client that communicates via kubectl exec into the sandbox pod.

This client runs `opencode acp` directly in the sandbox pod via kubernetes exec,
using stdin/stdout for JSON-RPC communication. This bypasses the HTTP server
and uses the native ACP subprocess protocol.

Each message creates an ephemeral client (start → resume_or_create_session →
send_message → stop) to prevent concurrent processes from corrupting
opencode's flat file session storage.

Usage:
    client = ACPExecClient(
        pod_name="sandbox-abc123",
        namespace="onyx-sandboxes",
    )
    client.start(cwd="/workspace")
    session_id = client.resume_or_create_session(cwd="/workspace/sessions/abc")
    for event in client.send_message("What files are here?", session_id=session_id):
        print(event)
    client.stop()
"""

import json
import shlex
import threading
import time
from collections.abc import Generator
from dataclasses import dataclass
from dataclasses import field
from queue import Empty
from queue import Queue
from typing import Any
from typing import cast

from acp.schema import AgentMessageChunk
from acp.schema import AgentPlanUpdate
from acp.schema import AgentThoughtChunk
from acp.schema import CurrentModeUpdate
from acp.schema import Error
from acp.schema import PromptResponse
from acp.schema import ToolCallProgress
from acp.schema import ToolCallStart
from kubernetes import client  # type: ignore
from kubernetes import config
from kubernetes.stream import stream as k8s_stream  # type: ignore
from kubernetes.stream.ws_client import WSClient  # type: ignore
from pydantic import BaseModel
from pydantic import ValidationError

from onyx.server.features.build.api.packet_logger import get_packet_logger
from onyx.server.features.build.configs import ACP_MESSAGE_TIMEOUT
from onyx.server.features.build.configs import SSE_KEEPALIVE_INTERVAL
from onyx.utils.logger import setup_logger

logger = setup_logger()

# ACP Protocol version
ACP_PROTOCOL_VERSION = 1

# Default client info
DEFAULT_CLIENT_INFO = {
    "name": "onyx-sandbox-k8s-exec",
    "title": "Onyx Sandbox Agent Client (K8s Exec)",
    "version": "1.0.0",
}


@dataclass
class SSEKeepalive:
    """Marker event to signal that an SSE keepalive should be sent.

    This is yielded when no ACP events have been received for SSE_KEEPALIVE_INTERVAL
    seconds, allowing the SSE stream to send a comment to keep the connection alive.

    Note: This is an internal event type - it's consumed by session/manager.py and
    converted to an SSE comment before leaving that layer. It should not be exposed
    to external consumers.
    """


# Union type for all possible events from send_message
ACPEvent = (
    AgentMessageChunk
    | AgentThoughtChunk
    | ToolCallStart
    | ToolCallProgress
    | AgentPlanUpdate
    | CurrentModeUpdate
    | PromptResponse
    | Error
    | SSEKeepalive
)


@dataclass
class ACPSession:
    """Represents an active ACP session."""

    session_id: str
    cwd: str


@dataclass
class ACPClientState:
    """Internal state for the ACP client."""

    initialized: bool = False
    sessions: dict[str, ACPSession] = field(default_factory=dict)
    next_request_id: int = 0
    agent_capabilities: dict[str, Any] = field(default_factory=dict)
    agent_info: dict[str, Any] = field(default_factory=dict)


class ACPExecClient:
    """ACP client that communicates via kubectl exec.

    Runs `opencode acp` in the sandbox pod and communicates via stdin/stdout
    through the kubernetes exec stream.
    """

    def __init__(
        self,
        pod_name: str,
        namespace: str,
        container: str = "sandbox",
        client_info: dict[str, Any] | None = None,
        client_capabilities: dict[str, Any] | None = None,
    ) -> None:
        """Initialize the exec-based ACP client.

        Args:
            pod_name: Name of the sandbox pod
            namespace: Kubernetes namespace
            container: Container name within the pod
            client_info: Client identification info
            client_capabilities: Client capabilities to advertise
        """
        self._pod_name = pod_name
        self._namespace = namespace
        self._container = container
        self._client_info = client_info or DEFAULT_CLIENT_INFO
        self._client_capabilities = client_capabilities or {
            "fs": {"readTextFile": True, "writeTextFile": True},
            "terminal": True,
        }
        self._state = ACPClientState()
        self._ws_client: WSClient | None = None
        self._response_queue: Queue[dict[str, Any]] = Queue()
        self._reader_thread: threading.Thread | None = None
        self._stop_reader = threading.Event()
        self._k8s_client: client.CoreV1Api | None = None

    def _get_k8s_client(self) -> client.CoreV1Api:
        """Get or create kubernetes client."""
        if self._k8s_client is None:
            try:
                config.load_incluster_config()
            except config.ConfigException:
                config.load_kube_config()
            self._k8s_client = client.CoreV1Api()
        return self._k8s_client

    def start(self, cwd: str = "/workspace", timeout: float = 30.0) -> None:
        """Start the agent process via exec and initialize the ACP connection.

        Only performs the ACP `initialize` handshake. Sessions are created
        separately via `resume_or_create_session()`.

        Args:
            cwd: Working directory for the `opencode acp` process
            timeout: Timeout for initialization

        Raises:
            RuntimeError: If startup fails
        """
        if self._ws_client is not None:
            raise RuntimeError("Client already started. Call stop() first.")

        k8s = self._get_k8s_client()

        # Start opencode acp via exec.
        # Set XDG_DATA_HOME so opencode stores session data on the shared
        # workspace volume (accessible from file-sync container for snapshots)
        # instead of the container-local ~/.local/share/ filesystem.
        data_dir = shlex.quote(f"{cwd}/.opencode-data")
        safe_cwd = shlex.quote(cwd)
        exec_command = [
            "/bin/sh",
            "-c",
            f"XDG_DATA_HOME={data_dir} exec opencode acp --cwd {safe_cwd}",
        ]

        logger.info(f"[ACP] Starting client: pod={self._pod_name} cwd={cwd}")

        try:
            self._ws_client = k8s_stream(
                k8s.connect_get_namespaced_pod_exec,
                name=self._pod_name,
                namespace=self._namespace,
                container=self._container,
                command=exec_command,
                stdin=True,
                stdout=True,
                stderr=True,
                tty=False,
                _preload_content=False,
                _request_timeout=900,  # 15 minute timeout for long-running sessions
            )

            # Start reader thread
            self._stop_reader.clear()
            self._reader_thread = threading.Thread(
                target=self._read_responses, daemon=True
            )
            self._reader_thread.start()

            # Give process a moment to start
            time.sleep(0.5)

            # Initialize ACP connection (no session creation)
            self._initialize(timeout=timeout)

            logger.info(f"[ACP] Client started: pod={self._pod_name}")
        except Exception as e:
            logger.error(f"[ACP] Client start failed: pod={self._pod_name} error={e}")
            self.stop()
            raise RuntimeError(f"Failed to start ACP exec client: {e}") from e

    def _read_responses(self) -> None:
        """Background thread to read responses from the exec stream."""
        buffer = ""
        packet_logger = get_packet_logger()

        while not self._stop_reader.is_set():
            if self._ws_client is None:
                break

            try:
                if self._ws_client.is_open():
                    self._ws_client.update(timeout=0.1)

                    # Read stderr - log any agent errors
                    stderr_data = self._ws_client.read_stderr(timeout=0.01)
                    if stderr_data:
                        logger.warning(
                            f"[ACP] stderr pod={self._pod_name}: {stderr_data.strip()[:500]}"
                        )

                    # Read stdout
                    data = self._ws_client.read_stdout(timeout=0.1)
                    if data:
                        buffer += data

                        while "\n" in buffer:
                            line, buffer = buffer.split("\n", 1)
                            line = line.strip()
                            if line:
                                try:
                                    message = json.loads(line)
                                    packet_logger.log_jsonrpc_raw_message(
                                        "IN", message, context="k8s"
                                    )
                                    self._response_queue.put(message)
                                except json.JSONDecodeError:
                                    logger.warning(
                                        f"[ACP] Invalid JSON from agent: {line[:100]}"
                                    )

                else:
                    logger.warning(f"[ACP] WebSocket closed: pod={self._pod_name}")
                    break

            except Exception as e:
                if not self._stop_reader.is_set():
                    logger.warning(f"[ACP] Reader error: {e}, pod={self._pod_name}")
                break

    def stop(self) -> None:
        """Stop the exec session and clean up."""
        session_ids = list(self._state.sessions.keys())
        logger.info(
            f"[ACP] Stopping client: pod={self._pod_name} sessions={session_ids}"
        )
        self._stop_reader.set()

        if self._ws_client is not None:
            try:
                self._ws_client.close()
            except Exception:
                pass
            self._ws_client = None

        if self._reader_thread is not None:
            self._reader_thread.join(timeout=2.0)
            self._reader_thread = None

        self._state = ACPClientState()

    def _get_next_id(self) -> int:
        """Get the next request ID."""
        request_id = self._state.next_request_id
        self._state.next_request_id += 1
        return request_id

    def _send_request(self, method: str, params: dict[str, Any] | None = None) -> int:
        """Send a JSON-RPC request."""
        if self._ws_client is None or not self._ws_client.is_open():
            raise RuntimeError("Exec session not open")

        request_id = self._get_next_id()
        request: dict[str, Any] = {
            "jsonrpc": "2.0",
            "id": request_id,
            "method": method,
        }
        if params is not None:
            request["params"] = params

        # Log the outgoing request
        packet_logger = get_packet_logger()
        packet_logger.log_jsonrpc_request(method, request_id, params, context="k8s")

        message = json.dumps(request) + "\n"
        self._ws_client.write_stdin(message)

        return request_id

    def _send_notification(
        self, method: str, params: dict[str, Any] | None = None
    ) -> None:
        """Send a JSON-RPC notification (no response expected)."""
        if self._ws_client is None or not self._ws_client.is_open():
            return

        notification: dict[str, Any] = {
            "jsonrpc": "2.0",
            "method": method,
        }
        if params is not None:
            notification["params"] = params

        # Log the outgoing notification
        packet_logger = get_packet_logger()
        packet_logger.log_jsonrpc_request(method, None, params, context="k8s")

        message = json.dumps(notification) + "\n"
        self._ws_client.write_stdin(message)

    def _wait_for_response(
        self, request_id: int, timeout: float = 30.0
    ) -> dict[str, Any]:
        """Wait for a response to a specific request."""
        start_time = time.time()

        while True:
            remaining = timeout - (time.time() - start_time)
            if remaining <= 0:
                raise RuntimeError(
                    f"Timeout waiting for response to request {request_id}"
                )

            try:
                message = self._response_queue.get(timeout=min(remaining, 1.0))

                if message.get("id") == request_id:
                    if "error" in message:
                        error = message["error"]
                        raise RuntimeError(
                            f"ACP error {error.get('code')}: {error.get('message')}"
                        )
                    return message.get("result", {})

                # Put back messages that aren't our response
                self._response_queue.put(message)

            except Empty:
                continue

    def _initialize(self, timeout: float = 30.0) -> dict[str, Any]:
        """Initialize the ACP connection."""
        params = {
            "protocolVersion": ACP_PROTOCOL_VERSION,
            "clientCapabilities": self._client_capabilities,
            "clientInfo": self._client_info,
        }

        request_id = self._send_request("initialize", params)
        result = self._wait_for_response(request_id, timeout)

        self._state.initialized = True
        self._state.agent_capabilities = result.get("agentCapabilities", {})
        self._state.agent_info = result.get("agentInfo", {})

        return result

    def _create_session(self, cwd: str, timeout: float = 30.0) -> str:
        """Create a new ACP session."""
        params = {
            "cwd": cwd,
            "mcpServers": [],
        }

        request_id = self._send_request("session/new", params)
        result = self._wait_for_response(request_id, timeout)

        session_id = result.get("sessionId")
        if not session_id:
            raise RuntimeError("No session ID returned from session/new")

        self._state.sessions[session_id] = ACPSession(session_id=session_id, cwd=cwd)
        logger.info(f"[ACP] Created session: acp_session={session_id} cwd={cwd}")

        return session_id

    def _list_sessions(self, cwd: str, timeout: float = 10.0) -> list[dict[str, Any]]:
        """List available ACP sessions, filtered by working directory.

        Returns:
            List of session info dicts with keys like 'sessionId', 'cwd', 'title'.
            Empty list if session/list is not supported or fails.
        """
        try:
            request_id = self._send_request("session/list", {"cwd": cwd})
            result = self._wait_for_response(request_id, timeout)
            sessions = result.get("sessions", [])
            logger.info(f"[ACP] session/list: {len(sessions)} sessions for cwd={cwd}")
            return sessions
        except Exception as e:
            logger.info(f"[ACP] session/list unavailable: {e}")
            return []

    def _resume_session(self, session_id: str, cwd: str, timeout: float = 30.0) -> str:
        """Resume an existing ACP session.

        Args:
            session_id: The ACP session ID to resume
            cwd: Working directory for the session
            timeout: Timeout for the resume request

        Returns:
            The session ID

        Raises:
            RuntimeError: If resume fails
        """
        params = {
            "sessionId": session_id,
            "cwd": cwd,
            "mcpServers": [],
        }

        request_id = self._send_request("session/resume", params)
        result = self._wait_for_response(request_id, timeout)

        # The response should contain the session ID
        resumed_id = result.get("sessionId", session_id)
        self._state.sessions[resumed_id] = ACPSession(session_id=resumed_id, cwd=cwd)

        logger.info(f"[ACP] Resumed session: acp_session={resumed_id} cwd={cwd}")
        return resumed_id

    def _try_resume_existing_session(self, cwd: str, timeout: float) -> str | None:
        """Try to find and resume an existing session for this workspace.

        When multiple API server replicas connect to the same sandbox pod,
        a previous replica may have already created an ACP session for this
        workspace. This method discovers and resumes that session so the
        agent retains conversation context.

        Args:
            cwd: Working directory to search for sessions
            timeout: Timeout for ACP requests

        Returns:
            The resumed session ID, or None if no session could be resumed
        """
        # List sessions for this workspace directory
        sessions = self._list_sessions(cwd, timeout=min(timeout, 10.0))
        if not sessions:
            return None

        # Pick the most recent session (first in list, assuming sorted)
        target = sessions[0]
        target_id = target.get("sessionId")
        if not target_id:
            logger.warning("[ACP] session/list returned session without sessionId")
            return None

        logger.info(
            f"[ACP] Resuming existing session: acp_session={target_id} (found {len(sessions)})"
        )

        try:
            return self._resume_session(target_id, cwd, timeout)
        except Exception as e:
            logger.warning(
                f"[ACP] session/resume failed for {target_id}: {e}, falling back to session/new"
            )
            return None

    def resume_or_create_session(self, cwd: str, timeout: float = 30.0) -> str:
        """Resume a session from opencode's on-disk storage, or create a new one.

        With ephemeral clients (one process per message), this always hits disk.
        Tries resume first to preserve conversation context, falls back to new.

        Args:
            cwd: Working directory for the session
            timeout: Timeout for ACP requests

        Returns:
            The ACP session ID
        """
        if not self._state.initialized:
            raise RuntimeError("Client not initialized. Call start() first.")

        # Try to resume from opencode's persisted storage
        resumed_id = self._try_resume_existing_session(cwd, timeout)
        if resumed_id:
            return resumed_id

        # Create a new session
        return self._create_session(cwd=cwd, timeout=timeout)

    def send_message(
        self,
        message: str,
        session_id: str,
        timeout: float = ACP_MESSAGE_TIMEOUT,
    ) -> Generator[ACPEvent, None, None]:
        """Send a message to a specific session and stream response events.

        Args:
            message: The message content to send
            session_id: The ACP session ID to send the message to
            timeout: Maximum time to wait for complete response (defaults to ACP_MESSAGE_TIMEOUT env var)

        Yields:
            Typed ACP schema event objects
        """
        if session_id not in self._state.sessions:
            raise RuntimeError(
                f"Unknown session {session_id}. Known sessions: {list(self._state.sessions.keys())}"
            )
        packet_logger = get_packet_logger()

        logger.info(
            f"[ACP] Sending prompt: acp_session={session_id} pod={self._pod_name} queue_backlog={self._response_queue.qsize()}"
        )

        prompt_content = [{"type": "text", "text": message}]
        params = {
            "sessionId": session_id,
            "prompt": prompt_content,
        }

        request_id = self._send_request("session/prompt", params)
        start_time = time.time()
        last_event_time = time.time()
        events_yielded = 0
        keepalive_count = 0
        completion_reason = "unknown"

        while True:
            remaining = timeout - (time.time() - start_time)
            if remaining <= 0:
                completion_reason = "timeout"
                logger.warning(
                    f"[ACP] Prompt timeout: acp_session={session_id} events={events_yielded}, sending session/cancel"
                )
                try:
                    self.cancel(session_id=session_id)
                except Exception as cancel_err:
                    logger.warning(
                        f"[ACP] session/cancel failed on timeout: {cancel_err}"
                    )
                yield Error(code=-1, message="Timeout waiting for response")
                break

            try:
                message_data = self._response_queue.get(timeout=min(remaining, 1.0))
                last_event_time = time.time()
            except Empty:
                # Send SSE keepalive if idle
                idle_time = time.time() - last_event_time
                if idle_time >= SSE_KEEPALIVE_INTERVAL:
                    keepalive_count += 1
                    yield SSEKeepalive()
                    last_event_time = time.time()
                continue

            # Check for JSON-RPC response to our prompt request.
            msg_id = message_data.get("id")
            is_response = "method" not in message_data and (
                msg_id == request_id
                or (msg_id is not None and str(msg_id) == str(request_id))
            )
            if is_response:
                completion_reason = "jsonrpc_response"
                if "error" in message_data:
                    error_data = message_data["error"]
                    completion_reason = "jsonrpc_error"
                    logger.warning(f"[ACP] Prompt error: {error_data}")
                    packet_logger.log_jsonrpc_response(
                        request_id, error=error_data, context="k8s"
                    )
                    yield Error(
                        code=error_data.get("code", -1),
                        message=error_data.get("message", "Unknown error"),
                    )
                else:
                    result = message_data.get("result", {})
                    packet_logger.log_jsonrpc_response(
                        request_id, result=result, context="k8s"
                    )
                    try:
                        prompt_response = PromptResponse.model_validate(result)
                        events_yielded += 1
                        yield prompt_response
                    except ValidationError as e:
                        logger.error(f"[ACP] PromptResponse validation failed: {e}")

                elapsed_ms = (time.time() - start_time) * 1000
                logger.info(
                    f"[ACP] Prompt complete: "
                    f"reason={completion_reason} acp_session={session_id} "
                    f"events={events_yielded} elapsed={elapsed_ms:.0f}ms"
                )
                break

            # Handle notifications (session/update)
            if message_data.get("method") == "session/update":
                params_data = message_data.get("params", {})
                update = params_data.get("update", {})

                prompt_complete = False
                for event in self._process_session_update(update):
                    events_yielded += 1
                    yield event
                    if isinstance(event, PromptResponse):
                        prompt_complete = True
                        break

                if prompt_complete:
                    completion_reason = "prompt_response_via_notification"
                    elapsed_ms = (time.time() - start_time) * 1000
                    logger.info(
                        f"[ACP] Prompt complete: "
                        f"reason={completion_reason} acp_session={session_id} "
                        f"events={events_yielded} elapsed={elapsed_ms:.0f}ms"
                    )
                    break

            # Handle requests from agent - send error response
            elif "method" in message_data and "id" in message_data:
                logger.debug(
                    f"[ACP] Unsupported agent request: method={message_data['method']}"
                )
                self._send_error_response(
                    message_data["id"],
                    -32601,
                    f"Method not supported: {message_data['method']}",
                )

            else:
                logger.warning(
                    f"[ACP] Unhandled message: "
                    f"id={message_data.get('id')} "
                    f"method={message_data.get('method')} "
                    f"keys={list(message_data.keys())}"
                )

    def _process_session_update(
        self, update: dict[str, Any]
    ) -> Generator[ACPEvent, None, None]:
        """Process a session/update notification and yield typed ACP schema objects."""
        update_type = update.get("sessionUpdate")
        if not isinstance(update_type, str):
            return

        # Map update types to their ACP schema classes.
        # Note: prompt_response is included because ACP sometimes sends it as a
        # notification WITHOUT a corresponding JSON-RPC response. We accept
        # either signal as turn completion (first one wins).
        type_map: dict[str, type[BaseModel]] = {
            "agent_message_chunk": AgentMessageChunk,
            "agent_thought_chunk": AgentThoughtChunk,
            "tool_call": ToolCallStart,
            "tool_call_update": ToolCallProgress,
            "plan": AgentPlanUpdate,
            "current_mode_update": CurrentModeUpdate,
            "prompt_response": PromptResponse,
        }

        model_class = type_map.get(update_type)
        if model_class is not None:
            try:
                yield cast(ACPEvent, model_class.model_validate(update))
            except ValidationError as e:
                logger.warning(f"[ACP] Validation error for {update_type}: {e}")
        elif update_type not in (
            "user_message_chunk",
            "available_commands_update",
            "session_info_update",
            "usage_update",
        ):
            logger.debug(f"[ACP] Unknown update type: {update_type}")

    def _send_error_response(self, request_id: int, code: int, message: str) -> None:
        """Send an error response to an agent request."""
        if self._ws_client is None or not self._ws_client.is_open():
            return

        response = {
            "jsonrpc": "2.0",
            "id": request_id,
            "error": {"code": code, "message": message},
        }

        self._ws_client.write_stdin(json.dumps(response) + "\n")

    def cancel(self, session_id: str | None = None) -> None:
        """Cancel the current operation on a session.

        Args:
            session_id: The ACP session ID to cancel. If None, cancels all sessions.
        """
        if session_id:
            if session_id in self._state.sessions:
                self._send_notification(
                    "session/cancel",
                    {"sessionId": session_id},
                )
        else:
            for sid in self._state.sessions:
                self._send_notification(
                    "session/cancel",
                    {"sessionId": sid},
                )

    def health_check(self, timeout: float = 5.0) -> bool:  # noqa: ARG002
        """Check if we can exec into the pod."""
        try:
            k8s = self._get_k8s_client()
            result = k8s_stream(
                k8s.connect_get_namespaced_pod_exec,
                name=self._pod_name,
                namespace=self._namespace,
                container=self._container,
                command=["echo", "ok"],
                stdin=False,
                stdout=True,
                stderr=False,
                tty=False,
            )
            return "ok" in result
        except Exception:
            return False

    @property
    def is_running(self) -> bool:
        """Check if the exec session is running."""
        return self._ws_client is not None and self._ws_client.is_open()

    def __enter__(self) -> "ACPExecClient":
        """Context manager entry."""
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        """Context manager exit - ensures cleanup."""
        self.stop()


================================================
FILE: backend/onyx/server/features/build/sandbox/kubernetes/kubernetes_sandbox_manager.py
================================================
"""Kubernetes-based sandbox manager for production deployments.

KubernetesSandboxManager provisions sandboxes as Kubernetes pods with true
container isolation. Each sandbox runs in its own pod with dedicated resources.

Key features:
- Pod-based isolation (not process-level)
- S3-based snapshots via init containers
- Cluster-native service discovery
- RBAC-controlled resource management
- User-shared sandbox model with per-session workspaces

Architecture Note (User-Shared Sandbox Model):
- One pod per user (shared across all user's sessions)
- provision() creates the pod with shared files/ directory
- setup_session_workspace() creates per-session workspace via kubectl exec
- cleanup_session_workspace() removes session workspace via kubectl exec
- terminate() destroys the entire pod (all sessions)

Directory Structure (inside pod):
    /workspace/
    ├── files/                     # SHARED - synced from S3
    └── sessions/
        ├── $session_id_1/         # Per-session workspace
        │   ├── outputs/
        │   ├── AGENTS.md
        │   └── ...
        └── $session_id_2/
            └── ...

IMPORTANT: This manager does NOT interface with the database directly.
All database operations should be handled by the caller (SessionManager, Celery tasks, etc.).

Use get_sandbox_manager() from base.py to get the appropriate implementation.
"""

import base64
import binascii
import io
import json
import mimetypes
import os
import re
import shlex
import tarfile
import threading
import time
from collections.abc import Generator
from pathlib import Path
from uuid import UUID
from uuid import uuid4

from acp.schema import PromptResponse
from kubernetes import client  # type: ignore
from kubernetes import config
from kubernetes.client.rest import ApiException  # type: ignore
from kubernetes.stream import stream as k8s_stream  # type: ignore

from onyx.db.enums import SandboxStatus
from onyx.server.features.build.api.packet_logger import get_packet_logger
from onyx.server.features.build.configs import OPENCODE_DISABLED_TOOLS
from onyx.server.features.build.configs import SANDBOX_CONTAINER_IMAGE
from onyx.server.features.build.configs import SANDBOX_FILE_SYNC_SERVICE_ACCOUNT
from onyx.server.features.build.configs import SANDBOX_NAMESPACE
from onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_END
from onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_START
from onyx.server.features.build.configs import SANDBOX_S3_BUCKET
from onyx.server.features.build.configs import SANDBOX_SERVICE_ACCOUNT_NAME
from onyx.server.features.build.sandbox.base import SandboxManager
from onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client import (
    ACPEvent,
)
from onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client import (
    ACPExecClient,
)
from onyx.server.features.build.sandbox.models import FilesystemEntry
from onyx.server.features.build.sandbox.models import LLMProviderConfig
from onyx.server.features.build.sandbox.models import SandboxInfo
from onyx.server.features.build.sandbox.models import SnapshotResult
from onyx.server.features.build.sandbox.util.agent_instructions import (
    ATTACHMENTS_SECTION_CONTENT,
)
from onyx.server.features.build.sandbox.util.agent_instructions import (
    generate_agent_instructions,
)
from onyx.server.features.build.sandbox.util.opencode_config import (
    build_opencode_config,
)
from onyx.server.features.build.sandbox.util.persona_mapping import (
    generate_user_identity_content,
)
from onyx.server.features.build.sandbox.util.persona_mapping import get_persona_info
from onyx.server.features.build.sandbox.util.persona_mapping import ORG_INFO_AGENTS_MD
from onyx.server.features.build.sandbox.util.persona_mapping import (
    ORGANIZATION_STRUCTURE,
)
from onyx.utils.logger import setup_logger

logger = setup_logger()

# API server pod hostname — used to identify which replica is handling a request.
# In K8s, HOSTNAME is set to the pod name (e.g., "api-server-dpgg7").
_API_SERVER_HOSTNAME = os.environ.get("HOSTNAME", "unknown")

# Constants for pod configuration
# Note: Next.js ports are dynamically allocated from SANDBOX_NEXTJS_PORT_START to
# SANDBOX_NEXTJS_PORT_END range, with one port per session.
AGENT_PORT = 8081
POD_READY_TIMEOUT_SECONDS = 120
POD_READY_POLL_INTERVAL_SECONDS = 2

# Resource deletion timeout and polling interval
# Kubernetes deletes are async - we need to wait for resources to actually be gone
RESOURCE_DELETION_TIMEOUT_SECONDS = 30
RESOURCE_DELETION_POLL_INTERVAL_SECONDS = 0.5


def _build_nextjs_start_script(
    session_path: str,
    nextjs_port: int,
    check_node_modules: bool = False,
) -> str:
    """Build shell script to start the NextJS dev server.

    Args:
        session_path: Path to the session directory (should be shell-safe)
        nextjs_port: Port number for the NextJS dev server
        check_node_modules: If True, check for node_modules and run npm install if missing

    Returns:
        Shell script string to start the NextJS server
    """
    npm_install_check = ""
    if check_node_modules:
        npm_install_check = """
# Check if npm dependencies are installed
if [ ! -d "node_modules" ]; then
    echo "Installing npm dependencies..."
    npm install
fi
"""

    return f"""
set -e
cd {session_path}/outputs/web
{npm_install_check}
# Start npm run dev in background
echo "Starting Next.js dev server on port {nextjs_port}..."
nohup npm run dev -- -p {nextjs_port} > {session_path}/nextjs.log 2>&1 &
NEXTJS_PID=$!
echo "Next.js server started with PID $NEXTJS_PID"
echo $NEXTJS_PID > {session_path}/nextjs.pid
"""


def _get_local_aws_credential_env_vars() -> list[client.V1EnvVar]:
    """Get AWS credential environment variables from local environment.

    Checks for AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and optionally
    AWS_SESSION_TOKEN and AWS_DEFAULT_REGION in the local environment.
    If credentials are found, returns V1EnvVar objects to pass them to containers.

    This allows using local AWS credentials for development/testing while
    IRSA (IAM Roles for Service Accounts) handles credentials in production EKS.

    Returns:
        List of V1EnvVar objects for AWS credentials, empty if not set locally.
    """
    env_vars: list[client.V1EnvVar] = []

    aws_access_key = os.environ.get("AWS_ACCESS_KEY_ID")
    aws_secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY")

    # Only add credentials if both required values are present
    if aws_access_key and aws_secret_key:
        env_vars.append(client.V1EnvVar(name="AWS_ACCESS_KEY_ID", value=aws_access_key))
        env_vars.append(
            client.V1EnvVar(name="AWS_SECRET_ACCESS_KEY", value=aws_secret_key)
        )

        # Optional: session token for temporary credentials
        aws_session_token = os.environ.get("AWS_SESSION_TOKEN")
        if aws_session_token:
            env_vars.append(
                client.V1EnvVar(name="AWS_SESSION_TOKEN", value=aws_session_token)
            )

        # Optional: default region
        aws_region = os.environ.get("AWS_DEFAULT_REGION") or os.environ.get(
            "AWS_REGION"
        )
        if aws_region:
            env_vars.append(
                client.V1EnvVar(name="AWS_DEFAULT_REGION", value=aws_region)
            )

        logger.info("Using local AWS credentials for sandbox init container")

    return env_vars


def _build_filtered_symlink_script(
    session_path: str,
    excluded_user_library_paths: list[str],
) -> str:
    """Build a shell script that creates filtered symlinks for user_library.

    Creates symlinks for all top-level directories in /workspace/files/,
    then selectively symlinks user_library files, excluding disabled paths.

    TODO: Replace this inline shell script with a standalone Python script
    that gets copied onto the pod and invoked with arguments. This would
    be easier to test and maintain.

    Args:
        session_path: The session directory path in the pod
        excluded_user_library_paths: Paths to exclude from symlinks
    """
    excluded_paths_lines = "\n".join(p.lstrip("/") for p in excluded_user_library_paths)
    heredoc_delim = f"_EXCL_{uuid4().hex[:12]}_"
    return f"""
# Create filtered files directory with exclusions
mkdir -p {session_path}/files

# Symlink all top-level directories except user_library
for item in /workspace/files/*; do
    [ -e "$item" ] || continue
    name=$(basename "$item")
    if [ "$name" != "user_library" ]; then
        ln -sf "$item" {session_path}/files/"$name"
    fi
done

# Write excluded paths to a temp file (one per line, via heredoc for safety)
EXCL_FILE=$(mktemp)
cat > "$EXCL_FILE" << '{heredoc_delim}'
{excluded_paths_lines}
{heredoc_delim}

# Check if a relative path is excluded (exact match or child of excluded dir)
is_excluded() {{
    local rel_path="$1"
    while IFS= read -r excl || [ -n "$excl" ]; do
        [ -z "$excl" ] && continue
        if [ "$rel_path" = "$excl" ]; then
            return 0
        fi
        case "$rel_path" in
            "$excl"/*) return 0 ;;
        esac
    done < "$EXCL_FILE"
    return 1
}}

# Recursively create symlinks for non-excluded files
create_filtered_symlinks() {{
    src_dir="$1"
    dst_dir="$2"
    rel_base="$3"

    for item in "$src_dir"/*; do
        [ -e "$item" ] || continue
        name=$(basename "$item")
        if [ -n "$rel_base" ]; then
            rel_path="$rel_base/$name"
        else
            rel_path="$name"
        fi

        if is_excluded "$rel_path"; then
            continue
        fi

        if [ -d "$item" ]; then
            mkdir -p "$dst_dir/$name"
            create_filtered_symlinks "$item" "$dst_dir/$name" "$rel_path"
            rmdir "$dst_dir/$name" 2>/dev/null || true
        else
            ln -sf "$item" "$dst_dir/$name"
        fi
    done
}}

if [ -d "/workspace/files/user_library" ]; then
    mkdir -p {session_path}/files/user_library
    create_filtered_symlinks /workspace/files/user_library {session_path}/files/user_library ""
    rmdir {session_path}/files/user_library 2>/dev/null || true
fi

rm -f "$EXCL_FILE"
"""


class KubernetesSandboxManager(SandboxManager):
    """Kubernetes-based sandbox manager for production deployments.

    Manages sandboxes as Kubernetes pods with:
    - Init containers for S3 file sync (snapshots, knowledge files, uploads)
    - Main sandbox container running Next.js + opencode agent
    - ClusterIP services for network access

    IMPORTANT: This manager does NOT interface with the database directly.
    All database operations should be handled by the caller.

    This is a singleton class - use get_sandbox_manager() to get the instance.
    """

    _instance: "KubernetesSandboxManager | None" = None
    _lock = threading.Lock()

    def __new__(cls) -> "KubernetesSandboxManager":
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    cls._instance = super().__new__(cls)
                    cls._instance._initialize()
        return cls._instance

    def _initialize(self) -> None:
        """Initialize Kubernetes client and configuration."""
        # Load Kubernetes config (in-cluster or kubeconfig)
        try:
            config.load_incluster_config()
            logger.info("Loaded in-cluster Kubernetes configuration")
        except config.ConfigException:
            try:
                config.load_kube_config()
                logger.info("Loaded kubeconfig from default location")
            except config.ConfigException as e:
                raise RuntimeError(
                    f"Failed to load Kubernetes configuration: {e}"
                ) from e

        # IMPORTANT: We use separate ApiClient instances for REST vs streaming operations.
        # The kubernetes.stream.stream function monkey-patches the ApiClient's request
        # method to use WebSocket. If we share the same ApiClient for both REST and
        # streaming, the patching can leak, causing REST calls to erroneously use
        # WebSocket (resulting in "Handshake status 200 OK" errors).
        self._rest_api_client = client.ApiClient()
        self._stream_api_client = client.ApiClient()

        # Use the REST client for standard CRUD operations
        self._core_api = client.CoreV1Api(api_client=self._rest_api_client)
        self._batch_api = client.BatchV1Api(api_client=self._rest_api_client)
        self._networking_api = client.NetworkingV1Api(api_client=self._rest_api_client)

        # Use a separate client for streaming/exec operations
        self._stream_core_api = client.CoreV1Api(api_client=self._stream_api_client)

        self._namespace = SANDBOX_NAMESPACE
        self._image = SANDBOX_CONTAINER_IMAGE
        self._s3_bucket = SANDBOX_S3_BUCKET
        self._service_account = SANDBOX_SERVICE_ACCOUNT_NAME
        self._file_sync_service_account = SANDBOX_FILE_SYNC_SERVICE_ACCOUNT

        # Load AGENTS.md template path
        build_dir = Path(__file__).parent.parent.parent  # /onyx/server/features/build/
        self._agent_instructions_template_path = build_dir / "AGENTS.template.md"
        self._skills_path = Path(__file__).parent / "docker" / "skills"

        logger.info(
            f"KubernetesSandboxManager initialized: namespace={self._namespace}, image={self._image}"
        )

    def _get_pod_name(self, sandbox_id: str) -> str:
        """Generate pod name from sandbox ID."""
        return f"sandbox-{str(sandbox_id)[:8]}"

    def _get_service_name(self, sandbox_id: str) -> str:
        """Generate service name from sandbox ID."""
        return self._get_pod_name(sandbox_id)

    def _get_nextjs_url(self, sandbox_id: str, port: int) -> str:
        """Get the internal cluster URL for a session's Next.js server.

        Args:
            sandbox_id: The sandbox ID (string)
            port: The session's allocated Next.js port

        Returns:
            Internal cluster URL for the Next.js server on the specified port
        """
        service_name = self._get_service_name(sandbox_id)
        return f"http://{service_name}.{self._namespace}.svc.cluster.local:{port}"

    def _load_agent_instructions(
        self,
        files_path: Path | None = None,
        provider: str | None = None,
        model_name: str | None = None,
        nextjs_port: int | None = None,
        disabled_tools: list[str] | None = None,
        user_name: str | None = None,
        user_role: str | None = None,
        use_demo_data: bool = False,
        include_org_info: bool = False,
    ) -> str:
        """Load and populate agent instructions from template file.


        Args:
            files_path: Path to the files directory (symlink to knowledge sources)
            provider: LLM provider type
            model_name: Model name
            nextjs_port: Next.js port
            disabled_tools: List of disabled tools
            user_name: User's name for personalization
            user_role: User's role/title for personalization
            use_demo_data: If True, exclude user context from AGENTS.md
            include_org_info: Whether to include the org_info section (demo data mode)

        Returns:
            Populated agent instructions content

        Note:
            In Kubernetes mode, files_path refers to paths inside the pod.
            Since the backend cannot access the pod filesystem, these are passed as None
            to leave placeholders intact for the container script to resolve at runtime.
        """
        return generate_agent_instructions(
            template_path=self._agent_instructions_template_path,
            skills_path=self._skills_path,
            files_path=files_path,
            provider=provider,
            model_name=model_name,
            nextjs_port=nextjs_port,
            disabled_tools=disabled_tools,
            user_name=user_name,
            user_role=user_role,
            use_demo_data=use_demo_data,
            include_org_info=include_org_info,
        )

    def _create_sandbox_pod(
        self,
        sandbox_id: str,
        user_id: str,
        tenant_id: str,
    ) -> client.V1Pod:
        """Create Pod specification for sandbox (user-level).

        Creates pod with:
        - files/ directory synced from S3 (shared across sessions)
        - sessions/ directory for per-session workspaces

        NOTE: Session-specific setup is done via setup_session_workspace().
        """
        pod_name = self._get_pod_name(sandbox_id)

        # File-sync sidecar container for S3 file sync (knowledge files only)
        # Runs as sidecar (not init container) so we can trigger incremental syncs
        # via kubectl exec after new documents are indexed
        file_sync_container = client.V1Container(
            name="file-sync",
            image="peakcom/s5cmd:v2.3.0",
            env=_get_local_aws_credential_env_vars(),
            command=["/bin/sh", "-c"],
            args=[
                f"""
# Handle signals for graceful container termination
trap 'echo "Shutting down"; exit 0' TERM INT

echo "Starting initial file sync"
echo "S3: s3://{self._s3_bucket}/{tenant_id}/knowledge/{user_id}/*"
echo "Local: /workspace/files/"

# s5cmd sync (default 256 workers)
# Exit codes: 0=success, 1=success with warnings
sync_exit_code=0
/s5cmd --stat sync \
    "s3://{self._s3_bucket}/{tenant_id}/knowledge/{user_id}/*" \
    /workspace/files/ 2>&1 || sync_exit_code=$?

echo "=== Initial sync finished (exit code: $sync_exit_code) ==="

# Handle result
if [ $sync_exit_code -eq 0 ] || [ $sync_exit_code -eq 1 ]; then
    file_count=$(find /workspace/files -type f 2>/dev/null | wc -l)
    echo "Files synced: $file_count"
    echo "Sidecar ready for incremental syncs"
else
    echo "ERROR: Initial sync failed (exit code: $sync_exit_code)"
    exit $sync_exit_code
fi

# Stay alive for incremental syncs via kubectl exec
while true; do
    sleep 30 &
    wait $!
done
"""
            ],
            volume_mounts=[
                client.V1VolumeMount(name="files", mount_path="/workspace/files"),
                # Mount sessions directory so file-sync can create snapshots
                client.V1VolumeMount(
                    name="workspace", mount_path="/workspace/sessions"
                ),
            ],
            resources=client.V1ResourceRequirements(
                # Reduced resources since sidecar is mostly idle (sleeping)
                requests={"cpu": "250m", "memory": "256Mi"},
                limits={"cpu": "4000m", "memory": "8Gi"},
            ),
        )

        # Main sandbox container
        # Note: Container ports are informational only in K8s. Each session's Next.js
        # server binds to its allocated port from the SANDBOX_NEXTJS_PORT_START-END range.
        # We declare all ports for documentation, tooling, and network policies.
        container_ports = [
            client.V1ContainerPort(name="agent", container_port=AGENT_PORT),
        ]
        # Add ports for session Next.js servers (one port per potential session)
        for port in range(SANDBOX_NEXTJS_PORT_START, SANDBOX_NEXTJS_PORT_END):
            container_ports.append(
                client.V1ContainerPort(
                    name=f"nextjs-{port}",
                    container_port=port,
                )
            )

        sandbox_container = client.V1Container(
            name="sandbox",
            image=self._image,
            image_pull_policy="IfNotPresent",
            ports=container_ports,
            volume_mounts=[
                client.V1VolumeMount(
                    name="files", mount_path="/workspace/files", read_only=True
                ),
                # Mount sessions directory (shared with file-sync for snapshots)
                client.V1VolumeMount(
                    name="workspace", mount_path="/workspace/sessions"
                ),
            ],
            resources=client.V1ResourceRequirements(
                requests={"cpu": "1000m", "memory": "2Gi"},
                limits={"cpu": "2000m", "memory": "10Gi"},
            ),
            # TODO: Re-enable probes when sandbox container runs actual services.
            # Note: Next.js ports are now per-session (dynamic), so container-level
            # probes would need to check the agent port or use a different approach.
            # liveness_probe=client.V1Probe(
            #     http_get=client.V1HTTPGetAction(path="/global/health", port=AGENT_PORT),
            #     initial_delay_seconds=30,
            #     period_seconds=30,
            #     timeout_seconds=5,
            #     failure_threshold=3,
            # ),
            security_context=client.V1SecurityContext(
                allow_privilege_escalation=False,
                read_only_root_filesystem=False,
                privileged=False,
                capabilities=client.V1Capabilities(drop=["ALL"]),
            ),
        )

        # Volumes - workspace holds sessions/, files is shared read-only
        volumes = [
            client.V1Volume(
                name="workspace",
                # Increased size: holds sessions/ directory with per-session outputs
                empty_dir=client.V1EmptyDirVolumeSource(size_limit="50Gi"),
            ),
            client.V1Volume(
                name="files",
                empty_dir=client.V1EmptyDirVolumeSource(size_limit="5Gi"),
            ),
        ]

        # Pod spec
        # Note: file_sync_container runs as sidecar (not init container) so we can
        # trigger incremental S3 syncs via kubectl exec after new documents are indexed
        pod_spec = client.V1PodSpec(
            service_account_name=self._file_sync_service_account,
            containers=[sandbox_container, file_sync_container],
            volumes=volumes,
            restart_policy="Never",
            termination_grace_period_seconds=10,  # Fast pod termination
            # CRITICAL: Disable service environment variable injection
            # Without this, Kubernetes injects env vars for ALL services in the namespace,
            # which can exceed ARG_MAX (2.6MB) when there are many sandbox pods.
            # With 40+ sandboxes × 100 ports × 4 env vars each = ~16k env vars (~2.2MB)
            # This causes "exec /bin/sh: argument list too long" errors.
            enable_service_links=False,
            # Node selection for sandbox nodes
            node_selector={"onyx.app/workload": "sandbox"},
            tolerations=[
                client.V1Toleration(
                    key="workload",
                    operator="Equal",
                    value="sandbox",
                    effect="NoSchedule",
                ),
            ],
            # Security context for pod
            security_context=client.V1PodSecurityContext(
                run_as_non_root=True,
                run_as_user=1000,
                fs_group=1000,
                seccomp_profile=client.V1SeccompProfile(type="RuntimeDefault"),
            ),
            # Disable host access
            host_network=False,
            host_pid=False,
            host_ipc=False,
        )

        return client.V1Pod(
            api_version="v1",
            kind="Pod",
            metadata=client.V1ObjectMeta(
                name=pod_name,
                namespace=self._namespace,
                labels={
                    "app.kubernetes.io/component": "sandbox",
                    "app.kubernetes.io/managed-by": "onyx",
                    "onyx.app/sandbox-id": sandbox_id,
                    "onyx.app/tenant-id": tenant_id,
                },
            ),
            spec=pod_spec,
        )

    def _create_sandbox_service(
        self,
        sandbox_id: UUID,
        tenant_id: str,
    ) -> client.V1Service:
        """Create ClusterIP Service for sandbox pod.

        Exposes the agent port and a range of ports for per-session Next.js servers.
        The port range matches SANDBOX_NEXTJS_PORT_START to SANDBOX_NEXTJS_PORT_END.
        """
        # Convert UUID objects to strings if needed (Kubernetes client requires strings)
        sandbox_id_str: str = str(sandbox_id)
        tenant_id_str: str = str(tenant_id)

        service_name = self._get_service_name(sandbox_id_str)

        # Build port list: agent port + all session Next.js ports
        ports = [
            client.V1ServicePort(name="agent", port=AGENT_PORT, target_port=AGENT_PORT),
        ]

        # Add ports for session Next.js servers (one port per potential session)
        for port in range(SANDBOX_NEXTJS_PORT_START, SANDBOX_NEXTJS_PORT_END):
            ports.append(
                client.V1ServicePort(
                    name=f"nextjs-{port}",
                    port=port,
                    target_port=port,
                )
            )

        return client.V1Service(
            api_version="v1",
            kind="Service",
            metadata=client.V1ObjectMeta(
                name=service_name,
                namespace=self._namespace,
                labels={
                    "app.kubernetes.io/component": "sandbox",
                    "app.kubernetes.io/managed-by": "onyx",
                    "onyx.app/sandbox-id": sandbox_id_str,
                    "onyx.app/tenant-id": tenant_id_str,
                },
            ),
            spec=client.V1ServiceSpec(
                type="ClusterIP",
                selector={"onyx.app/sandbox-id": sandbox_id_str},
                ports=ports,
            ),
        )

    def _ensure_service_exists(
        self,
        sandbox_id: UUID,
        tenant_id: str,
    ) -> None:
        """Ensure a ClusterIP service exists for the sandbox pod.

        Handles the case where a service is in Terminating state (has a
        deletion_timestamp) by waiting for deletion and recreating it.
        This prevents a race condition where provision reuses an existing pod
        but the old service is still being deleted.
        """
        service_name = self._get_service_name(str(sandbox_id))

        try:
            svc = self._core_api.read_namespaced_service(
                name=service_name,
                namespace=self._namespace,
            )
            # Service exists - check if it's being deleted
            if svc.metadata.deletion_timestamp:
                logger.info(
                    f"Service {service_name} is terminating, waiting for deletion"
                )
                self._wait_for_resource_deletion("service", service_name)
                # Now create a fresh service
                service = self._create_sandbox_service(sandbox_id, tenant_id)
                self._core_api.create_namespaced_service(
                    namespace=self._namespace,
                    body=service,
                )
                logger.info(f"Recreated Service {service_name} after termination")
            else:
                logger.debug(f"Service {service_name} already exists and is active")

        except ApiException as e:
            if e.status == 404:
                # Service doesn't exist, create it
                logger.info(f"Creating missing Service {service_name}")
                service = self._create_sandbox_service(sandbox_id, tenant_id)
                try:
                    self._core_api.create_namespaced_service(
                        namespace=self._namespace,
                        body=service,
                    )
                except ApiException as svc_e:
                    if svc_e.status != 409:  # Ignore AlreadyExists
                        raise
                    logger.debug(
                        f"Service {service_name} was created by another request"
                    )
            else:
                raise

    def _get_init_container_logs(self, pod_name: str, container_name: str) -> str:
        """Get logs from an init container.

        Args:
            pod_name: Name of the pod
            container_name: Name of the init container

        Returns:
            Log output from the init container, or error message if logs cannot be retrieved
        """
        try:
            logs = self._core_api.read_namespaced_pod_log(
                name=pod_name,
                namespace=self._namespace,
                container=container_name,
                tail_lines=100,  # Get last 100 lines
            )
            return logs if logs else "(no logs available)"
        except ApiException as e:
            return f"(failed to retrieve logs: {e})"

    def _check_init_container_status(self, pod: client.V1Pod) -> str | None:
        """Check if any init containers have failed.

        Args:
            pod: The pod object

        Returns:
            Error message if an init container failed, None otherwise
        """
        if not pod.status.init_container_statuses:
            return None

        for init_status in pod.status.init_container_statuses:
            if init_status.state:
                # Check for terminated state with non-zero exit code
                if init_status.state.terminated:
                    if init_status.state.terminated.exit_code != 0:
                        container_name = init_status.name
                        logs = self._get_init_container_logs(
                            pod.metadata.name, container_name
                        )
                        return (
                            f"Init container '{container_name}' failed with exit code "
                            f"{init_status.state.terminated.exit_code}. "
                            f"Logs:\n{logs}"
                        )
                # Check for waiting state with error reason
                elif init_status.state.waiting:
                    if init_status.state.waiting.reason in [
                        "Error",
                        "CrashLoopBackOff",
                    ]:
                        container_name = init_status.name
                        reason = init_status.state.waiting.reason
                        message = init_status.state.waiting.message or ""
                        return f"Init container '{container_name}' is in '{reason}' state. Message: {message}"

        return None

    def _wait_for_pod_ready(
        self,
        pod_name: str,
        timeout: float = POD_READY_TIMEOUT_SECONDS,
    ) -> bool:
        """Wait for pod to become ready.

        Args:
            pod_name: Name of the pod to wait for
            timeout: Maximum time to wait in seconds

        Returns:
            True if pod is ready, False if timeout

        Raises:
            RuntimeError: If pod fails or is deleted
        """
        start_time = time.time()

        while time.time() - start_time < timeout:
            try:
                pod = self._core_api.read_namespaced_pod(
                    name=pod_name,
                    namespace=self._namespace,
                )

                # Check init container status first (they run before main container)
                init_error = self._check_init_container_status(pod)
                if init_error:
                    raise RuntimeError(f"Pod {pod_name} failed to start: {init_error}")

                phase = pod.status.phase

                # Check for failure conditions
                if phase == "Failed":
                    # Try to get more details about the failure
                    init_error = self._check_init_container_status(pod)
                    error_msg = f"Pod {pod_name} failed to start"
                    if init_error:
                        error_msg += f": {init_error}"
                    raise RuntimeError(error_msg)

                if phase == "Succeeded":
                    raise RuntimeError(
                        f"Pod {pod_name} completed unexpectedly (sandbox pods should run indefinitely)"
                    )

                # Check if running and ready
                if phase == "Running":
                    conditions = pod.status.conditions or []
                    for condition in conditions:
                        if condition.type == "Ready" and condition.status == "True":
                            logger.info(f"Pod {pod_name} is ready")
                            return True

                logger.debug(f"Pod {pod_name} status: {phase}, waiting...")

            except ApiException as e:
                if e.status == 404:
                    raise RuntimeError(f"Pod {pod_name} was deleted")
                logger.warning(f"Error checking pod status: {e}")

            time.sleep(POD_READY_POLL_INTERVAL_SECONDS)

        # On timeout, check one more time for init container failures
        try:
            pod = self._core_api.read_namespaced_pod(
                name=pod_name,
                namespace=self._namespace,
            )
            init_error = self._check_init_container_status(pod)
            if init_error:
                raise RuntimeError(f"Pod {pod_name} failed to start: {init_error}")
        except ApiException:
            pass  # Pod might be deleted, ignore

        logger.warning(f"Timeout waiting for pod {pod_name} to become ready")
        return False

    def _pod_exists_and_healthy(self, pod_name: str) -> bool:
        """Check if a pod exists and is in a healthy/running state.

        Args:
            pod_name: Name of the pod to check

        Returns:
            True if pod exists and is running/ready, False otherwise
        """
        try:
            pod = self._core_api.read_namespaced_pod(
                name=pod_name,
                namespace=self._namespace,
            )
            phase = pod.status.phase

            # Check if running and ready
            if phase == "Running":
                conditions = pod.status.conditions or []
                for condition in conditions:
                    if condition.type == "Ready" and condition.status == "True":
                        return True

            # Pending is OK too - pod is being created by another request
            if phase == "Pending":
                return True

            return False
        except ApiException as e:
            if e.status == 404:
                return False
            raise

    def provision(
        self,
        sandbox_id: UUID,
        user_id: UUID,
        tenant_id: str,
        llm_config: LLMProviderConfig,  # noqa: ARG002
    ) -> SandboxInfo:
        """Provision a new sandbox as a Kubernetes pod (user-level).

        This method is idempotent - if a pod already exists and is healthy,
        it will be reused. This prevents race conditions when multiple requests
        try to provision the same sandbox concurrently.

        Creates pod with:
        1. Init container syncs files/ from S3
        2. Creates sessions/ directory for per-session workspaces
        3. Main container runs the sandbox environment

        NOTE: This does NOT set up session-specific workspaces.
        Call setup_session_workspace() to create session workspaces.

        Args:
            sandbox_id: Unique identifier for the sandbox
            user_id: User identifier who owns this sandbox
            tenant_id: Tenant identifier for multi-tenant isolation
            llm_config: LLM provider configuration

        Returns:
            SandboxInfo with the provisioned sandbox details

        Raises:
            RuntimeError: If provisioning fails
        """
        logger.info(
            f"Starting Kubernetes sandbox provisioning for sandbox {sandbox_id}, user {user_id}, tenant {tenant_id}"
        )

        pod_name = self._get_pod_name(str(sandbox_id))

        # Check if pod already exists and is healthy (idempotency check)
        if self._pod_exists_and_healthy(pod_name):
            logger.info(
                f"Pod {pod_name} already exists and is healthy, reusing existing pod"
            )
            # Ensure service exists and is not terminating
            self._ensure_service_exists(sandbox_id, tenant_id)

            # Wait for pod to be ready if it's still pending
            logger.info(f"Waiting for existing pod {pod_name} to become ready...")
            if not self._wait_for_pod_ready(pod_name):
                raise RuntimeError(
                    f"Timeout waiting for existing sandbox pod {pod_name} to become ready"
                )

            logger.info(
                f"Reusing existing Kubernetes sandbox {sandbox_id}, pod: {pod_name}"
            )
            return SandboxInfo(
                sandbox_id=sandbox_id,
                directory_path=f"k8s://{self._namespace}/{pod_name}",
                status=SandboxStatus.RUNNING,
                last_heartbeat=None,
            )

        try:
            # 1. Create Pod (user-level only, no session setup)
            logger.debug(f"Creating Pod {pod_name}")
            pod = self._create_sandbox_pod(
                sandbox_id=str(sandbox_id),
                user_id=str(user_id),
                tenant_id=tenant_id,
            )
            try:
                self._core_api.create_namespaced_pod(
                    namespace=self._namespace,
                    body=pod,
                )
            except ApiException as e:
                if e.status == 409:
                    # Pod was created by another concurrent request
                    # Check if it's healthy and reuse it
                    logger.warning(
                        f"Pod {pod_name} already exists (409 conflict, this shouldn't normally happen), "
                        "checking if it's healthy..."
                    )
                    if self._pod_exists_and_healthy(pod_name):
                        logger.warning(
                            f"During provisioning, discovered that pod {pod_name} already exists. Reusing"
                        )
                        # Continue to ensure service exists and wait for ready
                    else:
                        # Pod exists but is not healthy - this shouldn't happen often
                        # but could occur if a previous provision failed mid-way
                        logger.warning(
                            f"Pod {pod_name} exists but is not healthy, waiting for it to become ready or fail"
                        )
                else:
                    raise

            # 2. Create Service (handles terminating services)
            self._ensure_service_exists(sandbox_id, tenant_id)

            # 3. Wait for pod to be ready
            logger.info(f"Waiting for pod {pod_name} to become ready...")
            if not self._wait_for_pod_ready(pod_name):
                raise RuntimeError(
                    f"Timeout waiting for sandbox pod {pod_name} to become ready"
                )

            logger.info(
                f"Provisioned Kubernetes sandbox {sandbox_id}, pod: {pod_name} (no sessions yet)"
            )

            return SandboxInfo(
                sandbox_id=sandbox_id,
                directory_path=f"k8s://{self._namespace}/{pod_name}",
                status=SandboxStatus.RUNNING,
                last_heartbeat=None,
            )

        except Exception as e:
            # Only cleanup if we're sure the pod is not being used by another request
            # Check if pod is healthy - if so, don't clean up (another request may own it)
            if self._pod_exists_and_healthy(pod_name):
                logger.warning(
                    f"Kubernetes sandbox provisioning failed for sandbox {sandbox_id}: {e}, "
                    "but pod is healthy (likely owned by concurrent request), not cleaning up"
                )
            else:
                logger.error(
                    f"Kubernetes sandbox provisioning failed for sandbox {sandbox_id}: {e}",
                    exc_info=True,
                )
                self._cleanup_kubernetes_resources(str(sandbox_id))
            raise

    def _wait_for_resource_deletion(
        self,
        resource_type: str,
        name: str,
        timeout: float = RESOURCE_DELETION_TIMEOUT_SECONDS,
    ) -> bool:
        """Wait for a Kubernetes resource to be fully deleted.

        Kubernetes delete calls are asynchronous - the API returns immediately
        but the resource may still exist in a 'Terminating' state. This method
        polls until the resource returns 404 (not found).

        Args:
            resource_type: Type of resource ("pod" or "service")
            name: Name of the resource
            timeout: Maximum time to wait in seconds

        Returns:
            True if resource was deleted, False if timeout
        """
        start_time = time.time()

        while time.time() - start_time < timeout:
            try:
                if resource_type == "pod":
                    self._core_api.read_namespaced_pod(
                        name=name,
                        namespace=self._namespace,
                    )
                elif resource_type == "service":
                    self._core_api.read_namespaced_service(
                        name=name,
                        namespace=self._namespace,
                    )
                else:
                    raise ValueError(f"Unknown resource type: {resource_type}")

                # Resource still exists, wait and retry
                logger.debug(f"Waiting for {resource_type} {name} to be deleted...")
                time.sleep(RESOURCE_DELETION_POLL_INTERVAL_SECONDS)

            except ApiException as e:
                if e.status == 404:
                    # Resource is gone
                    logger.debug(f"{resource_type.capitalize()} {name} fully deleted")
                    return True
                # Other error, log and continue waiting
                logger.warning(f"Error checking {resource_type} {name} status: {e}")
                time.sleep(RESOURCE_DELETION_POLL_INTERVAL_SECONDS)

        logger.warning(
            f"Timeout waiting for {resource_type} {name} to be deleted after {timeout}s"
        )
        return False

    def _cleanup_kubernetes_resources(
        self,
        sandbox_id: str,
        wait_for_deletion: bool = True,
    ) -> None:
        """Clean up Kubernetes resources for a sandbox.

        Args:
            sandbox_id: The sandbox ID to clean up
            wait_for_deletion: If True, wait for resources to be fully deleted
                before returning. This prevents 409 conflicts when immediately
                re-provisioning with the same sandbox ID.
        """
        # Convert UUID objects to strings if needed (Kubernetes client requires strings)
        sandbox_id = str(sandbox_id)

        pod_name = self._get_pod_name(sandbox_id)
        service_name = self._get_service_name(sandbox_id)

        # Delete in reverse order of creation
        service_deleted = False
        try:
            self._core_api.delete_namespaced_service(
                name=service_name,
                namespace=self._namespace,
            )
            logger.debug(f"Deleted Service {service_name}")
            service_deleted = True
        except ApiException as e:
            if e.status == 404:
                # Already deleted
                service_deleted = True
            else:
                logger.error(f"Error deleting Service {service_name}: {e}")
                raise

        pod_deleted = False
        try:
            self._core_api.delete_namespaced_pod(
                name=pod_name,
                namespace=self._namespace,
            )
            logger.debug(f"Deleted Pod {pod_name}")
            pod_deleted = True
        except ApiException as e:
            if e.status == 404:
                # Already deleted
                pod_deleted = True
            else:
                logger.error(f"Error deleting Pod {pod_name}: {e}")
                raise

        # Wait for resources to be fully deleted to prevent 409 conflicts
        # on immediate re-provisioning
        if wait_for_deletion:
            if service_deleted:
                self._wait_for_resource_deletion("service", service_name)
            if pod_deleted:
                self._wait_for_resource_deletion("pod", pod_name)

    def terminate(self, sandbox_id: UUID) -> None:
        """Terminate a sandbox and clean up Kubernetes resources.

        Removes session mappings for this sandbox, then deletes the
        Service and Pod. ACP clients are ephemeral (created per message),
        so there's nothing to stop here.

        Args:
            sandbox_id: The sandbox ID to terminate
        """
        # Clean up Kubernetes resources (needs string for pod/service names)
        self._cleanup_kubernetes_resources(str(sandbox_id))

        logger.info(f"Terminated Kubernetes sandbox {sandbox_id}")

    def setup_session_workspace(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        llm_config: LLMProviderConfig,
        nextjs_port: int,
        file_system_path: str | None = None,  # noqa: ARG002
        snapshot_path: str | None = None,
        user_name: str | None = None,
        user_role: str | None = None,
        user_work_area: str | None = None,
        user_level: str | None = None,
        use_demo_data: bool = False,
        excluded_user_library_paths: list[str] | None = None,
    ) -> None:
        """Set up a session workspace within an existing sandbox pod.

        Executes kubectl exec to:
        1. Create sessions/$session_id/ directory
        2. Create files/ symlink (to demo data or S3-synced user files)
        3. Copy outputs template from local templates (downloaded during init)
        4. Write AGENTS.md
        5. Write opencode.json with LLM config
        6. Create org_info/ directory with user identity file (if demo data enabled)
        7. Start Next.js dev server

        Note: Snapshot restoration is not supported in Kubernetes mode since the
        main container doesn't have S3 access. Snapshots would need to be
        pre-downloaded during pod provisioning if needed.

        Args:
            sandbox_id: The sandbox ID (must be provisioned)
            session_id: The session ID for this workspace
            llm_config: LLM provider configuration for opencode.json
            file_system_path: Path to user's S3-synced knowledge files (/workspace/files)
            snapshot_path: Optional S3 path - logged but ignored (no S3 access)
            user_name: User's name for personalization in AGENTS.md
            user_role: User's role/title for personalization in AGENTS.md
            user_work_area: User's work area for demo persona (e.g., "engineering")
            user_level: User's level for demo persona (e.g., "ic", "manager")
            use_demo_data: If True, symlink files/ to /workspace/demo_data;
                          else to /workspace/files (S3-synced user files)
            excluded_user_library_paths: List of paths within user_library/ to exclude
                (e.g., ["/data/file.xlsx"]). These files won't be accessible in the session.

        Raises:
            RuntimeError: If workspace setup fails
        """
        if snapshot_path:
            logger.warning(
                f"Snapshot restoration requested but not supported in Kubernetes mode. "
                f"Snapshot path {snapshot_path} will be ignored. "
                f"Session {session_id} will start with fresh outputs template."
            )

        pod_name = self._get_pod_name(str(sandbox_id))
        session_path = f"/workspace/sessions/{session_id}"

        # Paths inside the pod (created during workspace setup below):
        # - {session_path}/files: symlink to knowledge sources
        # - {session_path}/attachments: user-uploaded files
        #
        # Note: files_path=None leaves {{KNOWLEDGE_SOURCES_SECTION}} placeholder intact
        # for generate_agents_md.py to resolve at container runtime by scanning /workspace/files.
        # Attachments section is injected dynamically when first file is uploaded.
        agent_instructions = self._load_agent_instructions(
            files_path=None,  # Container script handles this at runtime
            provider=llm_config.provider,
            model_name=llm_config.model_name,
            nextjs_port=nextjs_port,
            disabled_tools=OPENCODE_DISABLED_TOOLS,
            user_name=user_name,
            user_role=user_role,
            use_demo_data=use_demo_data,
            include_org_info=use_demo_data,
        )

        # Build opencode config JSON using shared config builder
        opencode_config = build_opencode_config(
            provider=llm_config.provider,
            model_name=llm_config.model_name,
            api_key=llm_config.api_key if llm_config.api_key else None,
            api_base=llm_config.api_base,
            disabled_tools=OPENCODE_DISABLED_TOOLS,
        )

        opencode_json = json.dumps(opencode_config)
        # Escape for shell
        opencode_json_escaped = opencode_json.replace("'", "'\\''")
        agent_instructions_escaped = agent_instructions.replace("'", "'\\''")

        # Build org_info setup script if persona is set
        # Uses shared constants from persona_mapping module as single source of truth
        org_info_setup = ""
        if user_work_area:
            persona = get_persona_info(user_work_area, user_level)
            if persona:
                # Escape content for shell (single quotes)
                agents_md_escaped = ORG_INFO_AGENTS_MD.replace("'", "'\\''")
                identity_escaped = generate_user_identity_content(persona).replace(
                    "'", "'\\''"
                )
                org_structure_escaped = json.dumps(
                    ORGANIZATION_STRUCTURE, indent=2
                ).replace("'", "'\\''")

                org_info_setup = f"""
# Create org_info directory with all files
mkdir -p {session_path}/org_info
printf '%s' '{agents_md_escaped}' > {session_path}/org_info/AGENTS.md
printf '%s' '{identity_escaped}' > {session_path}/org_info/user_identity_profile.txt
printf '%s' '{org_structure_escaped}' > {session_path}/org_info/organization_structure.json
"""

        # Build files symlink setup
        # Choose between demo data (baked in image) or user's S3-synced files
        if use_demo_data:
            # Demo mode: symlink to demo data baked into the container image
            symlink_target = "/workspace/demo_data"
            files_symlink_setup = f"""
# Create files symlink to demo data (baked into image)
echo "Creating files symlink to demo data: {symlink_target}"
ln -sf {symlink_target} {session_path}/files
"""
        elif excluded_user_library_paths:
            files_symlink_setup = _build_filtered_symlink_script(
                session_path, excluded_user_library_paths
            )
        else:
            # Normal mode: symlink to user's S3-synced knowledge files
            symlink_target = "/workspace/files"
            files_symlink_setup = f"""
# Create files symlink to user's knowledge files (synced from S3)
echo "Creating files symlink to user files: {symlink_target}"
ln -sf {symlink_target} {session_path}/files
"""

        # Copy outputs template from baked-in location and install npm dependencies
        outputs_setup = f"""
# Copy outputs template (baked into image at build time)
echo "Copying outputs template"
if [ -d /workspace/templates/outputs ]; then
    cp -r /workspace/templates/outputs/* {session_path}/outputs/
    # Install npm dependencies
    echo "Installing npm dependencies..."
    cd {session_path}/outputs/web && npm install
else
    echo "Warning: outputs template not found at /workspace/templates/outputs"
    mkdir -p {session_path}/outputs/web
fi
"""

        # Build NextJS startup script (npm install already done in outputs_setup)
        nextjs_start_script = _build_nextjs_start_script(
            session_path, nextjs_port, check_node_modules=False
        )

        setup_script = f"""
set -e

# Create session directory structure
echo "Creating session directory: {session_path}"
mkdir -p {session_path}/outputs
mkdir -p {session_path}/attachments
{files_symlink_setup}
# Setup outputs
{outputs_setup}

# Symlink skills (baked into image at /workspace/skills/)
if [ -d /workspace/skills ]; then
    mkdir -p {session_path}/.opencode
    ln -sf /workspace/skills {session_path}/.opencode/skills
    echo "Linked skills to /workspace/skills"
fi

# Write agent instructions
echo "Writing AGENTS.md"
printf '%s' '{agent_instructions_escaped}' > {session_path}/AGENTS.md

# Populate knowledge sources by scanning the files directory
python3 /usr/local/bin/generate_agents_md.py {session_path}/AGENTS.md {session_path}/files || true

# Write opencode config
echo "Writing opencode.json"
printf '%s' '{opencode_json_escaped}' > {session_path}/opencode.json
{org_info_setup}
# Start Next.js dev server
{nextjs_start_script}

echo "Session workspace setup complete"
"""

        logger.info(
            f"Setting up session workspace {session_id} in sandbox {sandbox_id}"
        )

        try:
            # Execute setup script in the pod
            exec_response = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                command=["/bin/sh", "-c", setup_script],
                container="sandbox",
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )

            logger.debug(f"Session setup output: {exec_response}")
            logger.info(
                f"Set up session workspace {session_id} in sandbox {sandbox_id}"
            )

        except Exception as e:
            logger.error(
                f"Failed to setup session workspace {session_id} in sandbox {sandbox_id}: {e}",
                exc_info=True,
            )
            raise RuntimeError(
                f"Failed to setup session workspace {session_id}: {e}"
            ) from e

    def cleanup_session_workspace(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        nextjs_port: int | None = None,  # noqa: ARG002
    ) -> None:
        """Clean up a session workspace (on session delete).

        Removes the ACP session mapping and executes kubectl exec to remove
        the session directory. The shared ACP client persists for other sessions.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to clean up
            nextjs_port: Optional port where Next.js server is running (unused in K8s,
                        we use PID file instead)
        """
        pod_name = self._get_pod_name(str(sandbox_id))
        session_path = f"/workspace/sessions/{session_id}"

        cleanup_script = f"""
set -e

# Kill Next.js server if running
if [ -f {session_path}/nextjs.pid ]; then
    NEXTJS_PID=$(cat {session_path}/nextjs.pid)
    echo "Stopping Next.js server (PID: $NEXTJS_PID)"
    kill $NEXTJS_PID 2>/dev/null || true
fi

echo "Removing session directory: {session_path}"
rm -rf {session_path}
echo "Session cleanup complete"
"""

        logger.info(
            f"Cleaning up session workspace {session_id} in sandbox {sandbox_id}"
        )

        try:
            exec_response = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                command=["/bin/sh", "-c", cleanup_script],
                container="sandbox",
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )

            logger.debug(f"Session cleanup output: {exec_response}")
            logger.info(
                f"Cleaned up session workspace {session_id} in sandbox {sandbox_id}"
            )

        except ApiException as e:
            if e.status == 404:
                # Pod not found, nothing to clean up
                logger.debug(f"Pod {pod_name} not found, skipping cleanup")
            else:
                logger.warning(f"Error cleaning up session workspace {session_id}: {e}")
        except Exception as e:
            logger.warning(f"Error cleaning up session workspace {session_id}: {e}")

    def create_snapshot(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        tenant_id: str,
    ) -> SnapshotResult | None:
        """Create a snapshot of a session's outputs and attachments directories.

        For Kubernetes backend, we exec into the file-sync container to create
        the snapshot and upload to S3. Captures:
        - sessions/$session_id/outputs/ (generated artifacts, web apps)
        - sessions/$session_id/attachments/ (user uploaded files)
        - sessions/$session_id/.opencode-data/ (opencode session data for resumption)

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to snapshot
            tenant_id: Tenant identifier for storage path

        Returns:
            SnapshotResult with storage path and size, or None if nothing to snapshot

        Raises:
            RuntimeError: If snapshot creation fails
        """
        sandbox_id_str = str(sandbox_id)
        session_id_str = str(session_id)
        pod_name = self._get_pod_name(sandbox_id_str)
        snapshot_id = str(uuid4())

        # Use shlex.quote for safety (UUIDs are safe but good practice)
        safe_session_path = shlex.quote(f"/workspace/sessions/{session_id_str}")
        s3_path = f"s3://{self._s3_bucket}/{tenant_id}/snapshots/{session_id_str}/{snapshot_id}.tar.gz"

        # Create tar and upload to S3 via file-sync container.
        # .opencode-data/ is already on the shared workspace volume because we set
        # XDG_DATA_HOME to the session directory when starting opencode (see
        # ACPExecClient.start()). No cross-container copy needed.
        exec_command = [
            "/bin/sh",
            "-c",
            f"""
set -eo pipefail
cd {safe_session_path}
if [ ! -d outputs ]; then
    echo "EMPTY_SNAPSHOT"
    exit 0
fi
dirs="outputs"
[ -d attachments ] && [ "$(ls -A attachments 2>/dev/null)" ] && dirs="$dirs attachments"
[ -d .opencode-data ] && [ "$(ls -A .opencode-data 2>/dev/null)" ] && dirs="$dirs .opencode-data"
tar -czf - $dirs | /s5cmd pipe {s3_path}
echo "SNAPSHOT_CREATED"
""",
        ]

        try:
            # Use exec to run snapshot command in file-sync container (has s5cmd)
            resp = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="file-sync",
                command=exec_command,
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )

            logger.debug(f"Snapshot exec output: {resp}")

            # Check if nothing was snapshotted
            if "EMPTY_SNAPSHOT" in resp:
                logger.info(
                    f"No outputs or attachments to snapshot for session {session_id}"
                )
                return None

            # Verify upload succeeded
            if "SNAPSHOT_CREATED" not in resp:
                raise RuntimeError(f"Snapshot upload may have failed. Output: {resp}")

        except ApiException as e:
            raise RuntimeError(f"Failed to create snapshot: {e}") from e

        # Estimate size (we can't easily get exact size from streamed tar)
        # In production, you might want to query S3 for the actual size
        size_bytes = 0

        # Storage path must match the S3 upload path (without s3://bucket/ prefix)
        storage_path = f"{tenant_id}/snapshots/{session_id_str}/{snapshot_id}.tar.gz"

        logger.info(f"Created snapshot for session {session_id}")

        return SnapshotResult(
            storage_path=storage_path,
            size_bytes=size_bytes,
        )

    def session_workspace_exists(
        self,
        sandbox_id: UUID,
        session_id: UUID,
    ) -> bool:
        """Check if a session's workspace directory exists in the pod.

        Execs into pod to check for /workspace/sessions/{session_id}/outputs/.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to check

        Returns:
            True if the session workspace exists, False otherwise
        """
        pod_name = self._get_pod_name(str(sandbox_id))
        session_path = f"/workspace/sessions/{session_id}/outputs"

        # Use exec to check if directory exists
        exec_command = [
            "/bin/sh",
            "-c",
            f'[ -d "{session_path}" ] && echo "WORKSPACE_FOUND" || echo "WORKSPACE_MISSING"',
        ]

        try:
            resp = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="sandbox",
                command=exec_command,
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )

            result = "WORKSPACE_FOUND" in resp
            logger.info(
                f"[WORKSPACE_CHECK] session={session_id}, path={session_path}, raw_resp={resp!r}, result={result}"
            )
            return result

        except ApiException as e:
            logger.warning(
                f"Failed to check session workspace exists for {session_id}: {e}"
            )
            return False

    def restore_snapshot(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        snapshot_storage_path: str,
        tenant_id: str,  # noqa: ARG002
        nextjs_port: int,
        llm_config: LLMProviderConfig,
        use_demo_data: bool = False,
    ) -> None:
        """Download snapshot from S3 via s5cmd, extract, regenerate config, and start NextJS.

        Uses the file-sync sidecar container (which has s5cmd + S3 credentials
        via IRSA) to stream the snapshot directly from S3 into the session
        directory. This avoids downloading to the backend server and the
        base64 encoding overhead of piping through kubectl exec.

        Steps:
        1. Exec s5cmd cat in file-sync container to stream snapshot from S3
        2. Pipe directly to tar for extraction in the shared workspace volume
           (.opencode-data/ is restored automatically since XDG_DATA_HOME points here)
        3. Regenerate configuration files (AGENTS.md, opencode.json, files symlink)
        4. Start the NextJS dev server

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to restore
            snapshot_storage_path: Path to the snapshot in S3 (relative path)
            tenant_id: Tenant identifier for storage access
            nextjs_port: Port number for the NextJS dev server
            llm_config: LLM provider configuration for opencode.json
            use_demo_data: If True, symlink files/ to demo data; else to user files

        Raises:
            RuntimeError: If snapshot restoration fails
        """
        pod_name = self._get_pod_name(str(sandbox_id))
        session_path = f"/workspace/sessions/{session_id}"
        safe_session_path = shlex.quote(session_path)

        s3_path = f"s3://{self._s3_bucket}/{snapshot_storage_path}"

        # Stream snapshot directly from S3 via s5cmd in file-sync container.
        # Mirrors the upload pattern: upload uses `tar | s5cmd pipe`,
        # restore uses `s5cmd cat | tar`. Both run in file-sync container
        # which has s5cmd and S3 credentials (IRSA). The shared workspace
        # volume makes extracted files immediately visible to the sandbox
        # container.
        restore_script = f"""
set -eo pipefail
mkdir -p {safe_session_path}
/s5cmd cat {s3_path} | tar -xzf - -C {safe_session_path}
echo "SNAPSHOT_RESTORED"
"""

        try:
            resp = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="file-sync",
                command=["/bin/sh", "-c", restore_script],
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )

            if "SNAPSHOT_RESTORED" not in resp:
                raise RuntimeError(f"Snapshot restore may have failed. Output: {resp}")

            # Regenerate configuration files that aren't in the snapshot
            # These are regenerated to ensure they match the current system state
            self._regenerate_session_config(
                pod_name=pod_name,
                session_path=safe_session_path,
                llm_config=llm_config,
                nextjs_port=nextjs_port,
                use_demo_data=use_demo_data,
            )

            # Start NextJS dev server (check node_modules since restoring from snapshot)
            start_script = _build_nextjs_start_script(
                safe_session_path, nextjs_port, check_node_modules=True
            )
            k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="sandbox",
                command=["/bin/sh", "-c", start_script],
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )
        except ApiException as e:
            raise RuntimeError(f"Failed to restore snapshot: {e}") from e

    def _regenerate_session_config(
        self,
        pod_name: str,
        session_path: str,
        llm_config: LLMProviderConfig,
        nextjs_port: int,
        use_demo_data: bool,
    ) -> None:
        """Regenerate session configuration files after snapshot restore.

        Creates:
        - AGENTS.md (agent instructions)
        - opencode.json (LLM configuration)
        - files symlink (to demo data or user files)

        Args:
            pod_name: The pod name to exec into
            session_path: Path to the session directory (already shlex.quoted)
            llm_config: LLM provider configuration
            nextjs_port: Port for NextJS (used in AGENTS.md)
            use_demo_data: Whether to use demo data or user files
        """
        # Generate AGENTS.md content
        agent_instructions = self._load_agent_instructions(
            files_path=None,  # Container script handles this at runtime
            provider=llm_config.provider,
            model_name=llm_config.model_name,
            nextjs_port=nextjs_port,
            disabled_tools=OPENCODE_DISABLED_TOOLS,
            user_name=None,  # Not stored, regenerate without personalization
            user_role=None,
            use_demo_data=use_demo_data,
            include_org_info=False,  # Don't include org_info for restored sessions
        )

        # Generate opencode.json
        opencode_config = build_opencode_config(
            provider=llm_config.provider,
            model_name=llm_config.model_name,
            api_key=llm_config.api_key if llm_config.api_key else None,
            api_base=llm_config.api_base,
            disabled_tools=OPENCODE_DISABLED_TOOLS,
        )
        opencode_json = json.dumps(opencode_config)

        # Escape for shell (single quotes)
        opencode_json_escaped = opencode_json.replace("'", "'\\''")
        agent_instructions_escaped = agent_instructions.replace("'", "'\\''")

        # Build files symlink setup
        if use_demo_data:
            symlink_target = "/workspace/demo_data"
        else:
            symlink_target = "/workspace/files"

        config_script = f"""
set -e

# Create files symlink
echo "Creating files symlink to {symlink_target}"
ln -sf {symlink_target} {session_path}/files

# Write agent instructions
echo "Writing AGENTS.md"
printf '%s' '{agent_instructions_escaped}' > {session_path}/AGENTS.md

# Populate knowledge sources by scanning the files directory
python3 /usr/local/bin/generate_agents_md.py {session_path}/AGENTS.md {session_path}/files || true

# Write opencode config
echo "Writing opencode.json"
printf '%s' '{opencode_json_escaped}' > {session_path}/opencode.json

echo "Session config regeneration complete"
"""

        logger.info("Regenerating session configuration files")
        k8s_stream(
            self._stream_core_api.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=self._namespace,
            container="sandbox",
            command=["/bin/sh", "-c", config_script],
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        logger.info("Session configuration files regenerated")

    def health_check(self, sandbox_id: UUID, timeout: float = 60.0) -> bool:
        """Check if the sandbox pod is healthy (can exec into it).

        Args:
            sandbox_id: The sandbox ID to check
            timeout: Health check timeout in seconds

        Returns:
            True if sandbox is healthy, False otherwise
        """
        pod_name = self._get_pod_name(str(sandbox_id))
        exec_client = ACPExecClient(
            pod_name=pod_name,
            namespace=self._namespace,
            container="sandbox",
        )
        return exec_client.health_check(timeout=timeout)

    def _create_ephemeral_acp_client(
        self, sandbox_id: UUID, session_path: str
    ) -> ACPExecClient:
        """Create a new ephemeral ACP client for a single message exchange.

        Each call starts a fresh `opencode acp` process in the sandbox pod.
        The process is short-lived — stopped after the message completes.
        This prevents the bug where multiple long-lived processes (one per
        API replica) operate on the same session's flat file storage
        concurrently, causing the JSON-RPC response to be silently lost.

        Args:
            sandbox_id: The sandbox ID
            session_path: Working directory for the session (e.g. /workspace/sessions/{id}).
                XDG_DATA_HOME is set relative to this so opencode's session data
                lives inside the snapshot directory.

        Returns:
            A running ACPExecClient (caller must stop it when done)
        """
        pod_name = self._get_pod_name(str(sandbox_id))
        acp_client = ACPExecClient(
            pod_name=pod_name,
            namespace=self._namespace,
            container="sandbox",
        )
        acp_client.start(cwd=session_path)

        logger.info(
            f"[SANDBOX-ACP] Created ephemeral ACP client: sandbox={sandbox_id} pod={pod_name} api_pod={_API_SERVER_HOSTNAME}"
        )
        return acp_client

    def send_message(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        message: str,
    ) -> Generator[ACPEvent, None, None]:
        """Send a message to the CLI agent and stream ACP events.

        Creates an ephemeral `opencode acp` process for each message.
        The process resumes the session from opencode's on-disk storage,
        handles the prompt, then is stopped. This ensures only one process
        operates on a session's flat files at a time, preventing the bug
        where multiple long-lived processes (one per API replica) corrupt
        each other's in-memory state.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID (determines workspace directory)
            message: The message content to send

        Yields:
            Typed ACP schema event objects
        """
        packet_logger = get_packet_logger()
        session_path = f"/workspace/sessions/{session_id}"

        # Create an ephemeral ACP client for this message
        acp_client = self._create_ephemeral_acp_client(sandbox_id, session_path)

        try:
            # Resume (or create) the ACP session from opencode's on-disk storage
            acp_session_id = acp_client.resume_or_create_session(cwd=session_path)

            logger.info(
                f"[SANDBOX-ACP] Sending message: session={session_id} acp_session={acp_session_id} api_pod={_API_SERVER_HOSTNAME}"
            )

            # Log the send_message call at sandbox manager level
            packet_logger.log_session_start(session_id, sandbox_id, message)

            events_count = 0
            got_prompt_response = False
            try:
                for event in acp_client.send_message(
                    message, session_id=acp_session_id
                ):
                    events_count += 1
                    if isinstance(event, PromptResponse):
                        got_prompt_response = True
                    yield event

                logger.info(
                    f"[SANDBOX-ACP] send_message completed: "
                    f"session={session_id} events={events_count} "
                    f"got_prompt_response={got_prompt_response}"
                )
                packet_logger.log_session_end(
                    session_id, success=True, events_count=events_count
                )
            except GeneratorExit:
                logger.warning(
                    f"[SANDBOX-ACP] GeneratorExit: session={session_id} events={events_count}, sending session/cancel"
                )
                try:
                    acp_client.cancel(session_id=acp_session_id)
                except Exception as cancel_err:
                    logger.warning(
                        f"[SANDBOX-ACP] session/cancel failed on GeneratorExit: {cancel_err}"
                    )
                packet_logger.log_session_end(
                    session_id,
                    success=False,
                    error="GeneratorExit: Client disconnected or stream closed by consumer",
                    events_count=events_count,
                )
                raise
            except Exception as e:
                logger.error(
                    f"[SANDBOX-ACP] Exception: session={session_id} events={events_count} error={e}, sending session/cancel"
                )
                try:
                    acp_client.cancel(session_id=acp_session_id)
                except Exception as cancel_err:
                    logger.warning(
                        f"[SANDBOX-ACP] session/cancel failed on Exception: {cancel_err}"
                    )
                packet_logger.log_session_end(
                    session_id,
                    success=False,
                    error=f"Exception: {str(e)}",
                    events_count=events_count,
                )
                raise
            except BaseException as e:
                logger.error(
                    f"[SANDBOX-ACP] {type(e).__name__}: session={session_id} error={e}"
                )
                packet_logger.log_session_end(
                    session_id,
                    success=False,
                    error=f"{type(e).__name__}: {str(e) if str(e) else 'System-level interruption'}",
                    events_count=events_count,
                )
                raise
        finally:
            # Always stop the ephemeral ACP client to kill the opencode process.
            # This ensures no stale processes linger in the sandbox container.
            try:
                acp_client.stop()
            except Exception as e:
                logger.warning(
                    f"[SANDBOX-ACP] Failed to stop ephemeral ACP client: session={session_id} error={e}"
                )

    def list_directory(
        self, sandbox_id: UUID, session_id: UUID, path: str
    ) -> list[FilesystemEntry]:
        """List contents of a directory in the session's outputs directory.

        For Kubernetes backend, we exec into the pod to list files.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            path: Relative path within sessions/$session_id/outputs/

        Returns:
            List of FilesystemEntry objects sorted by directory first, then name

        Raises:
            ValueError: If path traversal attempted or path is not a directory
        """
        # _get_pod_name needs string
        pod_name = self._get_pod_name(str(sandbox_id))

        # Security: sanitize path by removing '..' components individually
        path_obj = Path(path.lstrip("/"))
        clean_parts = [p for p in path_obj.parts if p != ".."]
        clean_path = str(Path(*clean_parts)) if clean_parts else "."
        target_path = f"/workspace/sessions/{session_id}/{clean_path}"
        # Use shlex.quote to prevent command injection
        quoted_path = shlex.quote(target_path)

        logger.info(f"Listing directory {target_path} in pod {pod_name}")

        # Use exec to list directory
        # -L follows symlinks (important for files/ -> /workspace/demo_data)
        exec_command = [
            "/bin/sh",
            "-c",
            f"ls -laL --time-style=+%s {quoted_path} 2>/dev/null || echo 'ERROR_NOT_FOUND'",
        ]

        try:
            resp = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="sandbox",
                command=exec_command,
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )

            if "ERROR_NOT_FOUND" in resp:
                raise ValueError(f"Path not found or not a directory: {path}")

            entries = self._parse_ls_output(resp, clean_path)
            return sorted(entries, key=lambda e: (not e.is_directory, e.name.lower()))

        except ApiException as e:
            raise RuntimeError(f"Failed to list directory: {e}") from e

    def _parse_ls_output(self, ls_output: str, base_path: str) -> list[FilesystemEntry]:
        """Parse ls -la output into FilesystemEntry objects.

        Handles regular files, directories, and symlinks. Symlinks to directories
        are treated as directories for navigation purposes.
        """
        entries = []
        lines = ls_output.strip().split("\n")

        logger.debug(f"Parsing {len(lines)} lines of ls output for {base_path}")

        for line in lines:
            logger.debug(f"Parsing line: {line}")

            # Skip header line and . / .. entries
            if line.startswith("total") or not line:
                continue

            parts = line.split()
            # ls -la --time-style=+%s format: perms links owner group size timestamp name
            # Minimum 7 parts for a simple filename
            if len(parts) < 7:
                continue

            # Handle symlinks: format is "name -> target"
            # For symlinks, parts[-1] is the target, not the name
            is_symlink = line.startswith("l")
            if is_symlink and " -> " in line:
                # Extract name from the "name -> target" portion
                # Filename starts at index 6 (after perms, links, owner, group, size, timestamp)
                try:
                    # Rejoin from index 6 onwards to handle names with spaces
                    name_and_target = " ".join(parts[6:])
                    if " -> " in name_and_target:
                        name = name_and_target.split(" -> ")[0]
                    else:
                        name = parts[-1]
                except (IndexError, ValueError):
                    name = parts[-1]
            else:
                # For regular files/directories, name is at index 6 or later (with spaces)
                name = " ".join(parts[6:])

            if name in (".", ".."):
                continue

            # Directories start with 'd', symlinks start with 'l'
            # Treat symlinks as directories (they typically point to directories
            # in our sandbox setup, like files/ -> /workspace/demo_data)
            is_directory = line.startswith("d") or is_symlink
            size_str = parts[4]

            try:
                size = int(size_str) if not is_directory else None
            except ValueError:
                size = None

            # Guess MIME type for files based on extension
            mime_type = mimetypes.guess_type(name)[0] if not is_directory else None

            entry_path = f"{base_path}/{name}".lstrip("/")
            entries.append(
                FilesystemEntry(
                    name=name,
                    path=entry_path,
                    is_directory=is_directory,
                    size=size,
                    mime_type=mime_type,
                )
            )

        return entries

    def read_file(self, sandbox_id: UUID, session_id: UUID, path: str) -> bytes:
        """Read a file from the session's workspace.

        For Kubernetes backend, we exec into the pod to read the file.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            path: Relative path within sessions/$session_id/

        Returns:
            File contents as bytes

        Raises:
            ValueError: If path traversal attempted or path is not a file
        """
        # _get_pod_name needs string
        pod_name = self._get_pod_name(str(sandbox_id))

        # Security: sanitize path by removing '..' components individually
        path_obj = Path(path.lstrip("/"))
        clean_parts = [p for p in path_obj.parts if p != ".."]
        clean_path = str(Path(*clean_parts)) if clean_parts else "."
        target_path = f"/workspace/sessions/{session_id}/{clean_path}"
        # Use shlex.quote to prevent command injection
        quoted_path = shlex.quote(target_path)

        # Use exec to read file with base64 encoding to handle binary data
        # Base64 encode the output to safely transport binary content
        exec_command = [
            "/bin/sh",
            "-c",
            f"if [ -f {quoted_path} ]; then base64 {quoted_path}; else echo 'ERROR_NOT_FOUND'; fi",
        ]

        try:
            resp = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="sandbox",
                command=exec_command,
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )

            if "ERROR_NOT_FOUND" in resp:
                raise ValueError(f"File not found: {path}")

            # Decode base64 content
            try:
                content = base64.b64decode(resp.strip())
            except binascii.Error as e:
                logger.error(f"Failed to decode base64 content: {e}")
                raise RuntimeError(f"Failed to decode file content: {e}") from e

            return content

        except ApiException as e:
            raise RuntimeError(f"Failed to read file: {e}") from e

    def get_webapp_url(self, sandbox_id: UUID, port: int) -> str:
        """Get the webapp URL for a session's Next.js server.

        For Kubernetes backend, returns internal cluster service URL.

        Args:
            sandbox_id: The sandbox ID
            port: The session's allocated Next.js port

        Returns:
            Internal cluster URL for the Next.js server on the specified port
        """
        return self._get_nextjs_url(str(sandbox_id), port)

    def generate_pptx_preview(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        pptx_path: str,
        cache_dir: str,
    ) -> tuple[list[str], bool]:
        """Convert PPTX to slide images using soffice + pdftoppm in the pod.

        Runs preview.py in the sandbox container which:
        1. Checks if cached slides exist and are newer than the PPTX
        2. If not, converts PPTX -> PDF -> JPEG slides
        3. Returns list of slide image paths
        """
        pod_name = self._get_pod_name(str(sandbox_id))

        # Security: sanitize paths
        pptx_path_obj = Path(pptx_path.lstrip("/"))
        pptx_clean_parts = [p for p in pptx_path_obj.parts if p != ".."]
        clean_pptx = str(Path(*pptx_clean_parts)) if pptx_clean_parts else "."

        cache_path_obj = Path(cache_dir.lstrip("/"))
        cache_clean_parts = [p for p in cache_path_obj.parts if p != ".."]
        clean_cache = str(Path(*cache_clean_parts)) if cache_clean_parts else "."

        session_root = f"/workspace/sessions/{session_id}"
        pptx_abs = f"{session_root}/{clean_pptx}"
        cache_abs = f"{session_root}/{clean_cache}"

        exec_command = [
            "python",
            "/workspace/skills/pptx/scripts/preview.py",
            pptx_abs,
            cache_abs,
        ]

        try:
            resp = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="sandbox",
                command=exec_command,
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )

            lines = [line.strip() for line in resp.strip().split("\n") if line.strip()]

            if not lines:
                raise ValueError("Empty response from PPTX conversion")

            if lines[0] == "ERROR_NOT_FOUND":
                raise ValueError(f"File not found: {pptx_path}")

            if lines[0] == "ERROR_NO_PDF":
                raise ValueError("soffice did not produce a PDF file")

            cached = lines[0] == "CACHED"
            # Skip the status line, rest are file paths
            abs_paths = lines[1:] if lines[0] in ("CACHED", "GENERATED") else lines

            # Convert absolute paths to session-relative paths
            prefix = f"{session_root}/"
            rel_paths = []
            for p in abs_paths:
                if p.startswith(prefix):
                    rel_paths.append(p[len(prefix) :])
                elif p.endswith(".jpg"):
                    rel_paths.append(p)

            return (rel_paths, cached)

        except ApiException as e:
            raise RuntimeError(f"Failed to generate PPTX preview: {e}") from e

    def sync_files(
        self,
        sandbox_id: UUID,
        user_id: UUID,
        tenant_id: str,
        source: str | None = None,
    ) -> bool:
        """Sync files from S3 to the running pod via the file-sync sidecar.

        Executes `s5cmd sync` in the file-sync sidecar container to download
        any new or changed files from S3 to /workspace/files/.

        This is safe to call multiple times - s5cmd sync is idempotent.

        Note: For user_library source, --delete is NOT used since deletions
        are handled explicitly by the delete_file API endpoint. File visibility
        in sessions is controlled via filtered symlinks in setup_session_workspace().

        Args:
            sandbox_id: The sandbox UUID
            user_id: The user ID (for S3 path construction)
            tenant_id: The tenant ID (for S3 path construction)
            source: Optional source type (e.g., "gmail", "google_drive").
                    If None, syncs all sources. If specified, only syncs
                    that source's directory.

        Returns:
            True if sync was successful, False otherwise.
        """
        pod_name = self._get_pod_name(str(sandbox_id))

        # Build S3 path based on whether source is specified
        if source:
            # Sync only the specific source directory
            s3_path = f"s3://{self._s3_bucket}/{tenant_id}/knowledge/{str(user_id)}/{source}/*"
            local_path = f"/workspace/files/{source}/"
        else:
            # Sync all sources (original behavior)
            s3_path = f"s3://{self._s3_bucket}/{tenant_id}/knowledge/{str(user_id)}/*"
            local_path = "/workspace/files/"

        # s5cmd sync with --delete for external connectors only.
        # timeout: prevent zombie processes from kubectl exec disconnections
        # trap: kill child processes on exit/disconnect
        source_info = f" (source={source})" if source else ""

        # Sources where --delete is explicitly forbidden (deletions handled via API)
        NO_DELETE_SOURCES = {"user_library"}
        use_delete = source is not None and source not in NO_DELETE_SOURCES
        delete_flag = " --delete" if use_delete else ""

        sync_script = f"""
# Kill child processes on exit/disconnect to prevent zombie s5cmd workers
cleanup() {{ pkill -P $$ 2>/dev/null || true; }}
trap cleanup EXIT INT TERM

echo "Starting incremental file sync{source_info}"
echo "S3: {s3_path}"
echo "Local: {local_path}"

# Ensure destination exists (needed for source-specific syncs)
mkdir -p "{local_path}"

# Run s5cmd with 5-minute timeout (SIGKILL after 10s if SIGTERM ignored)
# Exit codes: 0=success, 1=success with warnings, 124=timeout
sync_exit_code=0
timeout --signal=TERM --kill-after=10s 5m \
    /s5cmd --stat sync{delete_flag} "{s3_path}" "{local_path}" 2>&1 || sync_exit_code=$?

echo "=== Sync finished (exit code: $sync_exit_code) ==="

# Handle result
if [ $sync_exit_code -eq 0 ] || [ $sync_exit_code -eq 1 ]; then
    file_count=$(find "{local_path}" -type f 2>/dev/null | wc -l)
    echo "Files in {local_path}: $file_count"
    echo "SYNC_SUCCESS"
elif [ $sync_exit_code -eq 124 ]; then
    echo "ERROR: Sync timed out after 5 minutes"
    echo "SYNC_FAILED"
    exit 1
else
    echo "ERROR: Sync failed (exit code: $sync_exit_code)"
    echo "SYNC_FAILED"
    exit $sync_exit_code
fi
"""
        sync_command = ["/bin/sh", "-c", sync_script]
        resp = k8s_stream(
            self._stream_core_api.connect_get_namespaced_pod_exec,
            pod_name,
            self._namespace,
            container="file-sync",  # Execute in sidecar, not sandbox container
            command=sync_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        logger.debug(f"File sync response: {resp}")

        # Check if sync succeeded based on output markers
        if "SYNC_FAILED" in resp:
            logger.warning(f"File sync failed for sandbox {sandbox_id}")
            return False
        return True

    def _ensure_agents_md_attachments_section(
        self, sandbox_id: UUID, session_id: UUID
    ) -> None:
        """Ensure AGENTS.md has the attachments section.

        Called after uploading a file. Only adds the section if it doesn't exist.
        Inserts the section above ## Skills for better document flow.
        This is a fire-and-forget operation - failures are logged but not raised.
        """
        pod_name = self._get_pod_name(str(sandbox_id))
        session_path = f"/workspace/sessions/{session_id}"
        agents_md_path = f"{session_path}/AGENTS.md"

        # Base64 encode the content for safe shell handling
        attachments_content_b64 = base64.b64encode(
            ATTACHMENTS_SECTION_CONTENT.encode()
        ).decode()

        # Script: add section before ## Skills if not present
        # Uses a temp file approach for safe insertion
        script = f"""
if [ -f "{agents_md_path}" ]; then
    if ! grep -q "## Attachments (PRIORITY)" "{agents_md_path}" 2>/dev/null; then
        # Check if ## Skills exists
        if grep -q "## Skills" "{agents_md_path}" 2>/dev/null; then
            # Insert before ## Skills using awk
            awk -v content="$(echo "{attachments_content_b64}" | base64 -d)" '
                /^## Skills/ {{ print content; print ""; }}
                {{ print }}
            ' "{agents_md_path}" > "{agents_md_path}.tmp" && mv "{agents_md_path}.tmp" "{agents_md_path}"
            echo "ADDED_BEFORE_SKILLS"
        else
            # Fallback: append to end
            echo "" >> "{agents_md_path}"
            echo "" >> "{agents_md_path}"
            echo "{attachments_content_b64}" | base64 -d >> "{agents_md_path}"
            echo "ADDED_AT_END"
        fi
    else
        echo "EXISTS"
    fi
else
    echo "NO_AGENTS_MD"
fi
"""

        try:
            resp = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="sandbox",
                command=["/bin/sh", "-c", script],
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )
            logger.debug(
                f"Ensure AGENTS.md attachments section for session {session_id}: {resp.strip()}"
            )
        except ApiException as e:
            logger.warning(f"Failed to ensure AGENTS.md attachments section: {e}")

    def upload_file(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        filename: str,
        content: bytes,
    ) -> str:
        """Upload a file to the session's attachments directory.

        Uses tar streaming via stdin with explicit byte count to avoid EOF issues.
        The K8s Python client cannot close stdin without closing the entire WebSocket
        connection, so we use `head -c <size>` to read exactly the expected bytes
        instead of waiting for EOF.

        Handles filename collisions atomically within the shell script.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            filename: Sanitized filename
            content: File content as bytes

        Returns:
            Relative path where file was saved (e.g., "attachments/doc.pdf")

        Raises:
            RuntimeError: If upload fails
        """
        pod_name = self._get_pod_name(str(sandbox_id))
        target_dir = f"/workspace/sessions/{session_id}/attachments"

        # Create tar archive in memory
        tar_buffer = io.BytesIO()
        with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
            tarinfo = tarfile.TarInfo(name=filename)
            tarinfo.size = len(content)
            tar.addfile(tarinfo, io.BytesIO(content))
        tar_data = tar_buffer.getvalue()
        tar_size = len(tar_data)

        # Shell script that:
        # 1. Creates target directory and temp extraction directory
        # 2. Reads exactly tar_size bytes from stdin (avoids needing EOF signal)
        # 3. Extracts tar to temp directory
        # 4. Moves file to target with collision handling
        # 5. Cleans up temp directory
        # 6. Outputs final filename
        script = f"""
set -e
target_dir="{target_dir}"
tmpdir=$(mktemp -d)
trap 'rm -rf "$tmpdir"' EXIT

mkdir -p "$target_dir"

# Read exactly {tar_size} bytes and extract (avoids waiting for EOF)
head -c {tar_size} | tar xf - -C "$tmpdir"

# Find the extracted file (first file in tmpdir)
original=$(ls -1 "$tmpdir" | head -1)
base="$original"

cd "$target_dir"
if [ -f "$base" ]; then
    stem="${{base%.*}}"
    ext="${{base##*.}}"
    [ "$stem" = "$base" ] && ext="" || ext=".$ext"
    i=1
    while [ -f "${{stem}}_${{i}}${{ext}}" ]; do i=$((i+1)); done
    base="${{stem}}_${{i}}${{ext}}"
fi

mv "$tmpdir/$original" "$target_dir/$base"
chmod 644 "$target_dir/$base"
echo "$base"
"""

        try:
            # Open WebSocket connection with stdin enabled
            ws_client = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="sandbox",
                command=["/bin/sh", "-c", script],
                stdin=True,
                stdout=True,
                stderr=True,
                tty=False,
                _preload_content=False,  # Return WSClient instead of string
            )

            # Write tar data to stdin
            ws_client.write_stdin(tar_data)

            # Read response - head -c will read exactly tar_size bytes and proceed,
            # so we don't need to close stdin to signal EOF
            stdout_data = ""
            stderr_data = ""
            while ws_client.is_open():
                ws_client.update(timeout=30)
                if ws_client.peek_stdout():
                    stdout_data += ws_client.read_stdout()
                if ws_client.peek_stderr():
                    stderr_data += ws_client.read_stderr()

            # Get any remaining data
            stdout_data += ws_client.read_stdout() or ""
            stderr_data += ws_client.read_stderr() or ""

            if stderr_data.strip():
                logger.warning(f"Upload stderr: {stderr_data.strip()}")

            # Last line of output is the final filename
            final_filename = stdout_data.strip().split("\n")[-1]

            if not final_filename:
                raise RuntimeError(
                    f"Upload failed - no filename returned. stderr: {stderr_data}"
                )

            logger.info(
                f"Uploaded file to session {session_id}: attachments/{final_filename} ({len(content)} bytes)"
            )

            # Ensure AGENTS.md has the attachments section
            self._ensure_agents_md_attachments_section(sandbox_id, session_id)

            return f"attachments/{final_filename}"

        except ApiException as e:
            raise RuntimeError(f"Failed to upload file: {e}") from e

    def delete_file(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        path: str,
    ) -> bool:
        """Delete a file from the session's workspace.

        Uses kubectl exec to delete the file from the pod.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            path: Relative path to the file (e.g., "attachments/doc.pdf")

        Returns:
            True if file was deleted, False if not found

        Raises:
            ValueError: If path traversal attempted or invalid characters
        """
        pod_name = self._get_pod_name(str(sandbox_id))

        # Security: robust path sanitization
        # Reject paths with traversal patterns, URL-encoded characters, or null bytes
        if re.search(r"\.\.", path) or "%" in path or "\x00" in path:
            raise ValueError("Invalid path: potential path traversal detected")

        # Reject paths with shell metacharacters that could be exploited
        if re.search(r'[;&|`$(){}[\]<>\'"\n\r\\]', path):
            raise ValueError("Invalid path: contains disallowed characters")

        clean_path = path.lstrip("/")

        # Verify path only contains safe characters (alphanumeric, dash, underscore, dot, forward slash)
        if not re.match(r"^[a-zA-Z0-9_\-./]+$", clean_path):
            raise ValueError("Invalid path: contains disallowed characters")

        target_path = f"/workspace/sessions/{session_id}/{clean_path}"

        # Use exec to delete file
        exec_command = [
            "/bin/sh",
            "-c",
            f'[ -f "{target_path}" ] && rm "{target_path}" && echo "DELETED" || echo "NOT_FOUND"',
        ]

        try:
            resp = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="sandbox",
                command=exec_command,
                stdin=False,
                stdout=True,
                stderr=True,
                tty=False,
            )

            deleted = "DELETED" in resp
            if deleted:
                logger.info(f"Deleted file from session {session_id}: {path}")
            else:
                logger.debug(
                    f"File not found for deletion in session {session_id}: {path}"
                )

            return deleted

        except ApiException as e:
            raise RuntimeError(f"Failed to delete file: {e}") from e

    def get_upload_stats(
        self,
        sandbox_id: UUID,
        session_id: UUID,
    ) -> tuple[int, int]:
        """Get current file count and total size for a session's attachments.

        Uses kubectl exec to query the pod's attachments directory.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID

        Returns:
            Tuple of (file_count, total_size_bytes)
        """
        pod_name = self._get_pod_name(str(sandbox_id))
        target_dir = f"/workspace/sessions/{session_id}/attachments"

        # Get file count and total size in one command
        # Uses find to list files, wc -l for count, and du for size
        exec_command = [
            "/bin/sh",
            "-c",
            f"""
if [ -d "{target_dir}" ]; then
    count=$(find "{target_dir}" -maxdepth 1 -type f 2>/dev/null | wc -l)
    size=$(du -sb "{target_dir}" 2>/dev/null | cut -f1)
    echo "$count $size"
else
    echo "0 0"
fi
""",
        ]

        try:
            resp = k8s_stream(
                self._stream_core_api.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=self._namespace,
                container="sandbox",
                command=exec_command,
                stdin=False,
                stdout=True,
                stderr=True,
                tty=False,
            )

            # Parse response: "count size"
            parts = resp.strip().split()
            if len(parts) >= 2:
                try:
                    file_count = int(parts[0])
                    # du includes directory overhead, but for limits this is fine
                    total_size = int(parts[1])
                    return file_count, total_size
                except ValueError:
                    logger.warning(f"Failed to parse upload stats: {resp}")
                    return 0, 0

            return 0, 0

        except ApiException as e:
            logger.warning(f"Failed to get upload stats: {e}")
            return 0, 0


================================================
FILE: backend/onyx/server/features/build/sandbox/local/__init__.py
================================================
"""Local filesystem-based sandbox implementation.

This module provides the LocalSandboxManager for development and single-node
deployments that run sandboxes as directories on the local filesystem.
"""

from onyx.server.features.build.sandbox.local.agent_client import ACPAgentClient
from onyx.server.features.build.sandbox.local.agent_client import ACPEvent
from onyx.server.features.build.sandbox.local.local_sandbox_manager import (
    LocalSandboxManager,
)
from onyx.server.features.build.sandbox.local.process_manager import ProcessManager

__all__ = [
    "ACPAgentClient",
    "ACPEvent",
    "LocalSandboxManager",
    "ProcessManager",
]


================================================
FILE: backend/onyx/server/features/build/sandbox/local/agent_client.py
================================================
"""Communication with CLI agent subprocess using ACP (Agent Client Protocol).

ACP is a JSON-RPC 2.0 based protocol for communicating with coding agents.
See: https://agentclientprotocol.com

This module includes comprehensive logging for debugging ACP communication.
Enable logging by setting LOG_LEVEL=DEBUG or BUILD_PACKET_LOGGING=true.

Usage:
    # Simple usage with context manager
    with ACPAgentClient(cwd="/path/to/project") as client:
        for packet in client.send_message("What files are here?"):
            print(packet)

    # Manual lifecycle management
    client = ACPAgentClient()
    client.start(cwd="/path/to/project")
    for packet in client.send_message("Hello"):
        print(packet)
    client.stop()
"""

import json
import os
import select
import shutil
import subprocess
import threading
import time
from collections.abc import Generator
from dataclasses import dataclass
from dataclasses import field
from pathlib import Path
from typing import Any

from acp.schema import AgentMessageChunk
from acp.schema import AgentPlanUpdate
from acp.schema import AgentThoughtChunk
from acp.schema import CurrentModeUpdate
from acp.schema import Error
from acp.schema import PromptResponse
from acp.schema import ToolCallProgress
from acp.schema import ToolCallStart
from pydantic import ValidationError

from onyx.server.features.build.api.packet_logger import get_packet_logger


# ACP Protocol version
ACP_PROTOCOL_VERSION = 1

# Default client info
DEFAULT_CLIENT_INFO = {
    "name": "onyx-sandbox",
    "title": "Onyx Sandbox Agent Client",
    "version": "1.0.0",
}

SESSION_CREATION_TIMEOUT = 30.0  # 30 seconds
TIMEOUT = 900.0  # 15 minutes
SINGLE_READ_TIMEOUT = 10.0  # 10 seconds


# =============================================================================
# Response Event Types (from acp.schema + custom completion/error types)
# =============================================================================

# Union type for all possible events from send_message
# Uses ACP schema types for session updates, plus our completion type
ACPEvent = (
    AgentMessageChunk  # Text/image content from agent
    | AgentThoughtChunk  # Agent's internal reasoning
    | ToolCallStart  # Tool invocation started
    | ToolCallProgress  # Tool execution progress/result
    | AgentPlanUpdate  # Agent's execution plan
    | CurrentModeUpdate  # Agent mode change
    | PromptResponse  # Agent finished (contains stop_reason)
    | Error  # An error occurred
)


# =============================================================================
# Internal State Types
# =============================================================================


@dataclass
class ACPSession:
    """Represents an active ACP session."""

    session_id: str
    cwd: str


@dataclass
class ACPClientState:
    """Internal state for the ACP client."""

    initialized: bool = False
    current_session: ACPSession | None = None
    next_request_id: int = 0
    agent_capabilities: dict[str, Any] = field(default_factory=dict)
    agent_info: dict[str, Any] = field(default_factory=dict)


def _find_opencode_binary() -> str | None:
    """Find the opencode binary path.

    Returns:
        Path to opencode binary, or None if not found
    """
    # Check PATH first
    opencode_path = shutil.which("opencode")
    if opencode_path:
        return opencode_path

    # Try common installation paths
    common_paths = [
        Path.home() / ".opencode" / "bin" / "opencode",
        Path("/usr/local/bin/opencode"),
    ]
    for path in common_paths:
        if path.exists():
            return str(path)

    return None


class ACPAgentClient:
    """ACP (Agent Client Protocol) client for communication with CLI agents.

    Implements JSON-RPC 2.0 over stdin/stdout as specified by ACP.
    Manages the agent subprocess lifecycle internally.

    Usage:
        # With context manager (recommended)
        with ACPAgentClient(cwd="/path/to/project") as client:
            for packet in client.send_message("Hello"):
                print(packet)

        # Manual lifecycle
        client = ACPAgentClient()
        client.start(cwd="/path/to/project")
        try:
            for packet in client.send_message("Hello"):
                print(packet)
        finally:
            client.stop()
    """

    def __init__(
        self,
        cwd: str | None = None,
        opencode_path: str | None = None,
        client_info: dict[str, Any] | None = None,
        client_capabilities: dict[str, Any] | None = None,
        auto_start: bool = True,
    ) -> None:
        """Initialize the ACP client.

        Args:
            cwd: Working directory for the agent. If provided and auto_start=True,
                 the agent will be started immediately.
            opencode_path: Path to opencode binary. Auto-detected if not provided.
            client_info: Client identification info (name, title, version)
            client_capabilities: Client capabilities to advertise
            auto_start: If True and cwd is provided, start the agent immediately
        """
        self._opencode_path = opencode_path or _find_opencode_binary()
        self._client_info = client_info or DEFAULT_CLIENT_INFO
        self._client_capabilities = client_capabilities or {
            "fs": {
                "readTextFile": True,
                "writeTextFile": True,
            },
            "terminal": True,
        }
        self._state = ACPClientState()
        self._process: subprocess.Popen[str] | None = None
        self._read_lock = threading.Lock()
        self._cwd: str | None = None

        # Auto-start if cwd provided
        if cwd and auto_start:
            self.start(cwd=cwd)

    def __enter__(self) -> "ACPAgentClient":
        """Context manager entry."""
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        """Context manager exit - ensures cleanup."""
        self.stop()

    def start(
        self,
        cwd: str | None = None,
        mcp_servers: list[dict[str, Any]] | None = None,
        timeout: float = 30.0,
    ) -> str:
        """Start the agent process and initialize a session.

        This method:
        1. Starts the opencode acp subprocess
        2. Sends the initialize handshake
        3. Creates a new session

        Args:
            cwd: Working directory for the agent (defaults to current directory)
            mcp_servers: Optional MCP server configurations
            timeout: Timeout for initialization and session creation

        Returns:
            The session ID

        Raises:
            RuntimeError: If opencode is not found or startup fails
        """
        if self._process is not None:
            raise RuntimeError("Agent already started. Call stop() first.")

        if not self._opencode_path:
            raise RuntimeError(
                "opencode binary not found. Install opencode or provide opencode_path."
            )

        self._cwd = cwd or os.getcwd()

        # Start the opencode acp process
        self._process = subprocess.Popen(
            [self._opencode_path, "acp", "--cwd", self._cwd],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        )

        try:
            # Initialize the ACP connection
            self._initialize(timeout=timeout)

            # Create a session
            session_id = self._create_session(
                cwd=self._cwd,
                mcp_servers=mcp_servers,
                timeout=timeout,
            )

            return session_id

        except Exception:
            # Clean up on failure
            self.stop()
            raise

    def stop(self) -> None:
        """Stop the agent process and clean up resources."""
        if self._process is not None:
            if self._process.poll() is None:
                self._process.terminate()
                try:
                    self._process.wait(timeout=5)
                except subprocess.TimeoutExpired:
                    self._process.kill()

            self._process = None

        # Reset state
        self._state = ACPClientState()

    def _get_next_id(self) -> int:
        """Get the next request ID."""
        request_id = self._state.next_request_id
        self._state.next_request_id += 1
        return request_id

    def _ensure_running(self) -> subprocess.Popen[str]:
        """Ensure the process is running and return it.

        Raises:
            RuntimeError: If process is not running
        """
        if self._process is None:
            raise RuntimeError("Agent not started. Call start() first.")

        if self._process.poll() is not None:
            raise RuntimeError(
                f"Agent process has terminated with code {self._process.returncode}"
            )

        return self._process

    def _send_request(
        self,
        method: str,
        params: dict[str, Any] | None = None,
    ) -> int:
        """Send a JSON-RPC request to the agent.

        Args:
            method: The RPC method name
            params: Optional parameters for the method

        Returns:
            The request ID

        Raises:
            RuntimeError: If the process has terminated or pipe is broken
        """
        process = self._ensure_running()

        if process.stdin is None:
            raise RuntimeError("Process stdin is not available")

        request_id = self._get_next_id()
        request: dict[str, Any] = {
            "jsonrpc": "2.0",
            "id": request_id,
            "method": method,
        }
        if params is not None:
            request["params"] = params

        # Log the outgoing request
        packet_logger = get_packet_logger()
        packet_logger.log_jsonrpc_request(method, request_id, params, context="local")

        try:
            process.stdin.write(json.dumps(request) + "\n")
            process.stdin.flush()
        except BrokenPipeError:
            raise RuntimeError("Agent process stdin pipe is broken")

        return request_id

    def _send_notification(
        self,
        method: str,
        params: dict[str, Any] | None = None,
    ) -> None:
        """Send a JSON-RPC notification (no response expected).

        Args:
            method: The notification method name
            params: Optional parameters

        Raises:
            RuntimeError: If the process has terminated or pipe is broken
        """
        process = self._ensure_running()

        if process.stdin is None:
            raise RuntimeError("Process stdin is not available")

        notification: dict[str, Any] = {
            "jsonrpc": "2.0",
            "method": method,
        }
        if params is not None:
            notification["params"] = params

        # Log the outgoing notification
        packet_logger = get_packet_logger()
        packet_logger.log_jsonrpc_request(method, None, params, context="local")

        try:
            process.stdin.write(json.dumps(notification) + "\n")
            process.stdin.flush()
        except BrokenPipeError:
            raise RuntimeError("Agent process stdin pipe is broken")

    def _read_message(
        self,
        timeout: float | None = None,
    ) -> dict[str, Any] | None:
        """Read a single JSON-RPC message from the agent.

        Args:
            timeout: Optional timeout in seconds

        Returns:
            The parsed JSON message, or None if timeout/EOF

        Raises:
            RuntimeError: If process stdout is not available
        """
        process = self._ensure_running()

        if process.stdout is None:
            raise RuntimeError("Process stdout is not available")

        packet_logger = get_packet_logger()

        with self._read_lock:
            if timeout is not None:
                stdout_fd = process.stdout.fileno()
                readable, _, _ = select.select([stdout_fd], [], [], timeout)
                if not readable:
                    return None

            line = process.stdout.readline()
            if not line:
                return None

            line = line.strip()
            if not line:
                return None

            try:
                message = json.loads(line)
                # Log the raw incoming message
                packet_logger.log_jsonrpc_raw_message("IN", message, context="local")
                return message
            except json.JSONDecodeError:
                packet_logger.log_raw(
                    "JSONRPC-PARSE-ERROR",
                    {"raw_line": line[:500], "error": "JSON decode failed"},
                )
                return {
                    "jsonrpc": "2.0",
                    "error": {
                        "code": -32700,
                        "message": f"Parse error: {line[:100]}",
                    },
                }

    def _wait_for_response(
        self,
        request_id: int,
        timeout: float = 30.0,
    ) -> dict[str, Any]:
        """Wait for a response to a specific request.

        Args:
            request_id: The request ID to wait for
            timeout: Maximum time to wait

        Returns:
            The response result

        Raises:
            RuntimeError: If timeout, error response, or process dies
        """
        import time

        start_time = time.time()

        while True:
            remaining = timeout - (time.time() - start_time)
            if remaining <= 0:
                raise RuntimeError(
                    f"Timeout waiting for response to request {request_id}"
                )

            message = self._read_message(timeout=min(remaining, 1.0))

            if message is None:
                process = self._ensure_running()
                if process.poll() is not None:
                    raise RuntimeError(
                        f"Agent process terminated with code {process.returncode}"
                    )
                continue

            # Check if this is the response we're waiting for
            if message.get("id") == request_id:
                if "error" in message:
                    error = message["error"]
                    raise RuntimeError(
                        f"ACP error {error.get('code')}: {error.get('message')}"
                    )
                return message.get("result", {})

    def _initialize(self, timeout: float = SESSION_CREATION_TIMEOUT) -> dict[str, Any]:
        """Initialize the ACP connection (internal).

        Args:
            timeout: Maximum time to wait for response

        Returns:
            The agent's capabilities and info
        """
        params = {
            "protocolVersion": ACP_PROTOCOL_VERSION,
            "clientCapabilities": self._client_capabilities,
            "clientInfo": self._client_info,
        }

        request_id = self._send_request("initialize", params)
        result = self._wait_for_response(request_id, timeout)

        self._state.initialized = True
        self._state.agent_capabilities = result.get("agentCapabilities", {})
        self._state.agent_info = result.get("agentInfo", {})

        return result

    def _create_session(
        self,
        cwd: str,
        mcp_servers: list[dict[str, Any]] | None = None,
        timeout: float = SESSION_CREATION_TIMEOUT,
    ) -> str:
        """Create a new ACP session (internal).

        Args:
            cwd: Working directory for the session
            mcp_servers: Optional MCP server configurations
            timeout: Maximum time to wait for response

        Returns:
            The session ID
        """
        # Note: opencode requires cwd and mcpServers
        params: dict[str, Any] = {
            "cwd": cwd,
            "mcpServers": mcp_servers or [],
        }

        request_id = self._send_request("session/new", params)
        result = self._wait_for_response(request_id, timeout)

        session_id = result.get("sessionId")
        if not session_id:
            raise RuntimeError("No session ID returned from session/new")

        self._state.current_session = ACPSession(
            session_id=session_id,
            cwd=cwd,
        )

        return session_id

    def send_message(
        self,
        message: str,
        timeout: float = TIMEOUT,
    ) -> Generator[ACPEvent, None, None]:
        """Send a message and stream response events.

        Args:
            message: The message content to send
            timeout: Maximum time to wait for complete response

        Yields:
            Typed ACP schema event objects (ACPEvent union):
            - AgentMessageChunk: Text/image content from the agent
            - AgentThoughtChunk: Agent's internal reasoning
            - ToolCallStart: Tool invocation started
            - ToolCallProgress: Tool execution progress/result
            - AgentPlanUpdate: Agent's execution plan
            - CurrentModeUpdate: Agent mode change
            - PromptResponse: Agent finished (has stop_reason)
            - Error: An error occurred

        Raises:
            RuntimeError: If no session or prompt fails
        """
        if self._state.current_session is None:
            raise RuntimeError("No active session. Call start() first.")

        session_id = self._state.current_session.session_id
        process = self._ensure_running()
        packet_logger = get_packet_logger()

        # Log the start of message processing
        packet_logger.log_raw(
            "ACP-SEND-MESSAGE-START",
            {
                "session_id": session_id,
                "message_preview": (
                    message[:200] + "..." if len(message) > 200 else message
                ),
                "timeout": timeout,
            },
        )

        # Build prompt content blocks
        prompt_content = [{"type": "text", "text": message}]

        params = {
            "sessionId": session_id,
            "prompt": prompt_content,
        }

        request_id = self._send_request("session/prompt", params)
        start_time = time.time()
        events_yielded = 0

        while True:
            remaining = timeout - (time.time() - start_time)
            if remaining <= 0:
                packet_logger.log_raw(
                    "ACP-TIMEOUT",
                    {
                        "session_id": session_id,
                        "elapsed_ms": (time.time() - start_time) * 1000,
                    },
                )
                yield Error(code=-1, message="Timeout waiting for response")
                break

            message_data = self._read_message(
                timeout=min(remaining, SINGLE_READ_TIMEOUT)
            )

            if message_data is None:
                if process.poll() is not None:
                    packet_logger.log_raw(
                        "ACP-PROCESS-TERMINATED",
                        {"session_id": session_id, "exit_code": process.returncode},
                    )
                    yield Error(
                        code=-1,
                        message=f"Agent process terminated with code {process.returncode}",
                    )
                    break
                continue

            # Check for response to our prompt request
            if message_data.get("id") == request_id:
                if "error" in message_data:
                    error_data = message_data["error"]
                    packet_logger.log_jsonrpc_response(
                        request_id, error=error_data, context="local"
                    )
                    yield Error(
                        code=error_data.get("code", -1),
                        message=error_data.get("message", "Unknown error"),
                    )
                else:
                    result = message_data.get("result", {})
                    packet_logger.log_jsonrpc_response(
                        request_id, result=result, context="local"
                    )
                    prompt_response = PromptResponse.model_validate(result)
                    packet_logger.log_acp_event_yielded(
                        "prompt_response", prompt_response
                    )
                    events_yielded += 1
                    yield prompt_response

                # Log completion summary
                elapsed_ms = (time.time() - start_time) * 1000
                packet_logger.log_raw(
                    "ACP-SEND-MESSAGE-COMPLETE",
                    {
                        "session_id": session_id,
                        "events_yielded": events_yielded,
                        "elapsed_ms": elapsed_ms,
                    },
                )
                break

            # Handle notifications (session/update)
            if message_data.get("method") == "session/update":
                params_data = message_data.get("params", {})
                update = params_data.get("update", {})

                # Log the notification
                packet_logger.log_jsonrpc_notification(
                    "session/update",
                    {"update_type": update.get("sessionUpdate")},
                    context="local",
                )

                for event in self._process_session_update(update):
                    events_yielded += 1
                    # Log each yielded event
                    event_type = self._get_event_type_name(event)
                    packet_logger.log_acp_event_yielded(event_type, event)
                    yield event

            # Handle requests from agent (e.g., fs/readTextFile)
            elif "method" in message_data and "id" in message_data:
                packet_logger.log_raw(
                    "ACP-UNSUPPORTED-REQUEST",
                    {"method": message_data["method"], "id": message_data["id"]},
                )
                self._send_error_response(
                    message_data["id"],
                    -32601,
                    f"Method not supported: {message_data['method']}",
                )

    def _get_event_type_name(self, event: ACPEvent) -> str:
        """Get the type name for an ACP event."""
        if isinstance(event, AgentMessageChunk):
            return "agent_message_chunk"
        elif isinstance(event, AgentThoughtChunk):
            return "agent_thought_chunk"
        elif isinstance(event, ToolCallStart):
            return "tool_call_start"
        elif isinstance(event, ToolCallProgress):
            return "tool_call_progress"
        elif isinstance(event, AgentPlanUpdate):
            return "agent_plan_update"
        elif isinstance(event, CurrentModeUpdate):
            return "current_mode_update"
        elif isinstance(event, PromptResponse):
            return "prompt_response"
        elif isinstance(event, Error):
            return "error"
        return "unknown"

    def _process_session_update(
        self, update: dict[str, Any]
    ) -> Generator[ACPEvent, None, None]:
        """Process a session/update notification and yield typed ACP schema objects.

        Validates and returns the actual ACP schema types directly.
        Invalid updates are logged and skipped.
        """
        update_type = update.get("sessionUpdate")
        packet_logger = get_packet_logger()

        if update_type == "agent_message_chunk":
            try:
                yield AgentMessageChunk.model_validate(update)
            except ValidationError as e:
                packet_logger.log_raw(
                    "ACP-VALIDATION-ERROR",
                    {"update_type": update_type, "error": str(e), "update": update},
                )

        elif update_type == "agent_thought_chunk":
            try:
                yield AgentThoughtChunk.model_validate(update)
            except ValidationError as e:
                packet_logger.log_raw(
                    "ACP-VALIDATION-ERROR",
                    {"update_type": update_type, "error": str(e), "update": update},
                )

        elif update_type == "user_message_chunk":
            # Echo of user message - skip but log
            packet_logger.log_raw("ACP-SKIPPED-UPDATE", {"type": "user_message_chunk"})

        elif update_type == "tool_call":
            try:
                yield ToolCallStart.model_validate(update)
            except ValidationError as e:
                packet_logger.log_raw(
                    "ACP-VALIDATION-ERROR",
                    {"update_type": update_type, "error": str(e), "update": update},
                )

        elif update_type == "tool_call_update":
            try:
                yield ToolCallProgress.model_validate(update)
            except ValidationError as e:
                packet_logger.log_raw(
                    "ACP-VALIDATION-ERROR",
                    {"update_type": update_type, "error": str(e), "update": update},
                )

        elif update_type == "plan":
            try:
                yield AgentPlanUpdate.model_validate(update)
            except ValidationError as e:
                packet_logger.log_raw(
                    "ACP-VALIDATION-ERROR",
                    {"update_type": update_type, "error": str(e), "update": update},
                )

        elif update_type == "available_commands_update":
            # Skip command updates - not relevant for consumers
            packet_logger.log_raw(
                "ACP-SKIPPED-UPDATE", {"type": "available_commands_update"}
            )

        elif update_type == "current_mode_update":
            try:
                yield CurrentModeUpdate.model_validate(update)
            except ValidationError as e:
                packet_logger.log_raw(
                    "ACP-VALIDATION-ERROR",
                    {"update_type": update_type, "error": str(e), "update": update},
                )

        elif update_type == "session_info_update":
            # Skip session info updates - internal bookkeeping
            packet_logger.log_raw("ACP-SKIPPED-UPDATE", {"type": "session_info_update"})

        else:
            # Unknown update types are logged
            packet_logger.log_raw(
                "ACP-UNKNOWN-UPDATE-TYPE",
                {"update_type": update_type, "update": update},
            )

    def _send_error_response(
        self,
        request_id: int,
        code: int,
        message: str,
    ) -> None:
        """Send an error response to an agent request."""
        process = self._process
        if process is None or process.stdin is None:
            return

        response = {
            "jsonrpc": "2.0",
            "id": request_id,
            "error": {
                "code": code,
                "message": message,
            },
        }

        try:
            process.stdin.write(json.dumps(response) + "\n")
            process.stdin.flush()
        except BrokenPipeError:
            pass

    def cancel(self) -> None:
        """Cancel the current operation."""
        if self._state.current_session is None:
            return

        self._send_notification(
            "session/cancel",
            {"sessionId": self._state.current_session.session_id},
        )

    @property
    def is_running(self) -> bool:
        """Check if the agent process is running."""
        return self._process is not None and self._process.poll() is None

    @property
    def session_id(self) -> str | None:
        """Get the current session ID, if any."""
        if self._state.current_session:
            return self._state.current_session.session_id
        return None

    @property
    def agent_info(self) -> dict[str, Any]:
        """Get the agent's info from initialization."""
        return self._state.agent_info

    @property
    def agent_capabilities(self) -> dict[str, Any]:
        """Get the agent's capabilities from initialization."""
        return self._state.agent_capabilities


================================================
FILE: backend/onyx/server/features/build/sandbox/local/local_sandbox_manager.py
================================================
"""Filesystem-based sandbox manager for local/dev environments.

LocalSandboxManager manages sandboxes as directories on the local filesystem.
Suitable for development, testing, and single-node deployments.

IMPORTANT: This manager does NOT interface with the database directly.
All database operations should be handled by the caller (SessionManager, Celery tasks, etc.).
"""

import mimetypes
import re
import subprocess
import threading
from collections.abc import Generator
from pathlib import Path
from uuid import UUID

import httpx

from onyx.db.enums import SandboxStatus
from onyx.file_store.file_store import get_default_file_store
from onyx.server.features.build.configs import DEMO_DATA_PATH
from onyx.server.features.build.configs import OPENCODE_DISABLED_TOOLS
from onyx.server.features.build.configs import OUTPUTS_TEMPLATE_PATH
from onyx.server.features.build.configs import SANDBOX_BASE_PATH
from onyx.server.features.build.configs import VENV_TEMPLATE_PATH
from onyx.server.features.build.sandbox.base import SandboxManager
from onyx.server.features.build.sandbox.local.agent_client import ACPAgentClient
from onyx.server.features.build.sandbox.local.agent_client import ACPEvent
from onyx.server.features.build.sandbox.local.process_manager import ProcessManager
from onyx.server.features.build.sandbox.manager.directory_manager import (
    DirectoryManager,
)
from onyx.server.features.build.sandbox.manager.snapshot_manager import SnapshotManager
from onyx.server.features.build.sandbox.models import FilesystemEntry
from onyx.server.features.build.sandbox.models import LLMProviderConfig
from onyx.server.features.build.sandbox.models import SandboxInfo
from onyx.server.features.build.sandbox.models import SnapshotResult
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import ThreadSafeSet

logger = setup_logger()


class LocalSandboxManager(SandboxManager):
    """Filesystem-based sandbox manager for local/dev environments.

    Manages sandboxes as directories on the local filesystem.
    Suitable for development, testing, and single-node deployments.

    Key characteristics:
    - Sandboxes are directories under SANDBOX_BASE_PATH
    - No container isolation (process-level only)
    - No automatic cleanup of idle sandboxes

    IMPORTANT: This manager does NOT interface with the database directly.
    All database operations should be handled by the caller.

    This is a singleton class - use get_sandbox_manager() to get the instance.
    """

    _instance: "LocalSandboxManager | None" = None
    _lock = threading.Lock()

    def __new__(cls) -> "LocalSandboxManager":
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    cls._instance = super().__new__(cls)
                    cls._instance._initialize()
        return cls._instance

    def _initialize(self) -> None:
        """Initialize managers."""
        # Paths for templates
        build_dir = Path(__file__).parent.parent.parent  # /onyx/server/features/build/
        skills_path = build_dir / "sandbox" / "kubernetes" / "docker" / "skills"
        agent_instructions_template_path = build_dir / "AGENTS.template.md"

        self._directory_manager = DirectoryManager(
            base_path=Path(SANDBOX_BASE_PATH),
            outputs_template_path=Path(OUTPUTS_TEMPLATE_PATH),
            venv_template_path=Path(VENV_TEMPLATE_PATH),
            skills_path=skills_path,
            agent_instructions_template_path=agent_instructions_template_path,
        )
        self._process_manager = ProcessManager()
        self._snapshot_manager = SnapshotManager(get_default_file_store())

        # Track ACP clients in memory - keyed by (sandbox_id, session_id) tuple
        # Each session within a sandbox has its own ACP client
        self._acp_clients: dict[tuple[UUID, UUID], ACPAgentClient] = {}

        # Track Next.js processes - keyed by (sandbox_id, session_id) tuple
        # Used for clean shutdown when sessions are deleted.
        # Mutated from background threads; all access must hold _nextjs_lock.
        self._nextjs_processes: dict[tuple[UUID, UUID], subprocess.Popen[bytes]] = {}

        # Track sessions currently being (re)started - prevents concurrent restarts.
        # ThreadSafeSet allows atomic check-and-add without holding _nextjs_lock.
        self._nextjs_starting: ThreadSafeSet[tuple[UUID, UUID]] = ThreadSafeSet()

        # Lock guarding _nextjs_processes (shared across sessions; hold briefly only)
        self._nextjs_lock = threading.Lock()

        # Validate templates exist (raises RuntimeError if missing)
        self._validate_templates()

    def _validate_templates(self) -> None:
        """Validate that sandbox templates exist.

        Raises RuntimeError if templates are missing.
        Templates are required for sandbox functionality.

        Raises:
            RuntimeError: If outputs or venv templates are missing
        """
        outputs_path = Path(OUTPUTS_TEMPLATE_PATH)
        venv_path = Path(VENV_TEMPLATE_PATH)

        missing_templates: list[str] = []

        if not outputs_path.exists():
            missing_templates.append(f"Outputs template not found at {outputs_path}")

        if not venv_path.exists():
            missing_templates.append(f"Venv template not found at {venv_path}")

        if missing_templates:
            error_msg = (
                "Sandbox templates are missing. "
                "Please build templates using:\n"
                "  python -m onyx.server.features.build.sandbox.util.build_venv_template\n"
                "Or use Docker image built with Dockerfile.sandbox-templates.\n\n"
                "Missing templates:\n"
            )
            error_msg += "\n".join(f"  - {template}" for template in missing_templates)
            raise RuntimeError(error_msg)

        logger.debug(f"Outputs template found at {outputs_path}")
        logger.debug(f"Venv template found at {venv_path}")

    def _get_sandbox_path(self, sandbox_id: str | UUID) -> Path:
        """Get the filesystem path for a sandbox based on sandbox_id.

        Args:
            sandbox_id: The sandbox ID (can be string or UUID)

        Returns:
            Path to the sandbox directory
        """
        return Path(SANDBOX_BASE_PATH) / str(sandbox_id)

    def _get_session_path(self, sandbox_id: str | UUID, session_id: str | UUID) -> Path:
        """Get the filesystem path for a session workspace.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID

        Returns:
            Path to the session workspace directory (sessions/$session_id/)
        """
        return self._get_sandbox_path(sandbox_id) / "sessions" / str(session_id)

    def _setup_filtered_files(
        self,
        session_path: Path,
        source_path: Path,
        excluded_paths: list[str],
    ) -> None:
        """Set up files directory with filtered symlinks based on exclusions.

        Instead of symlinking the entire source directory, this creates a files/
        directory structure where:
        - Top-level items (except user_library) are symlinked directly
        - user_library/ is created as a real directory with filtered symlinks

        Args:
            session_path: Path to the session directory
            source_path: Path to the user's knowledge files (e.g., /storage/tenant/knowledge/user/)
            excluded_paths: List of paths within user_library to exclude
                (e.g., ["/data/file.xlsx", "/reports/old.pdf"])
        """
        files_dir = session_path / "files"
        files_dir.mkdir(parents=True, exist_ok=True)

        # Normalize excluded paths for comparison (remove leading slash)
        excluded_set = {p.lstrip("/") for p in excluded_paths}

        if not source_path.exists():
            logger.warning(f"Source path does not exist: {source_path}")
            return

        # Iterate through top-level items in source
        for item in source_path.iterdir():
            target_link = files_dir / item.name

            if item.name == "user_library":
                # user_library needs filtered handling
                self._setup_filtered_user_library(
                    target_dir=target_link,
                    source_dir=item,
                    excluded_set=excluded_set,
                    base_path="",
                )
            else:
                # Other directories/files: symlink directly
                if not target_link.exists():
                    target_link.symlink_to(item, target_is_directory=item.is_dir())

    def _setup_filtered_user_library(
        self,
        target_dir: Path,
        source_dir: Path,
        excluded_set: set[str],
        base_path: str,
    ) -> bool:
        """Recursively set up user_library with filtered symlinks.

        Creates directory structure and symlinks only non-excluded files.
        Only creates directories if they will contain at least one enabled file.

        Args:
            target_dir: Where to create the filtered structure
            source_dir: Source user_library directory
            excluded_set: Set of excluded relative paths (e.g., {"data/file.xlsx"})
            base_path: Current path relative to user_library root (for recursion)

        Returns:
            True if any content was created (files or non-empty subdirectories)
        """
        if not source_dir.exists():
            return False

        has_content = False

        for item in source_dir.iterdir():
            # Build relative path for exclusion check
            rel_path = (
                f"{base_path}/{item.name}".lstrip("/") if base_path else item.name
            )
            target_link = target_dir / item.name

            if item.is_dir():
                # Check if entire directory is excluded
                if rel_path in excluded_set:
                    logger.debug(f"Excluding directory: user_library/{rel_path}")
                    continue

                # Recurse into directory - only create if it has content
                subdir_has_content = self._setup_filtered_user_library(
                    target_dir=target_link,
                    source_dir=item,
                    excluded_set=excluded_set,
                    base_path=rel_path,
                )
                if subdir_has_content:
                    has_content = True
            else:
                # Check if file is excluded
                if rel_path in excluded_set:
                    logger.debug(f"Excluding file: user_library/{rel_path}")
                    continue

                # Create parent directory if needed (lazy creation)
                if not target_dir.exists():
                    target_dir.mkdir(parents=True, exist_ok=True)

                # Create symlink to file
                if not target_link.exists():
                    target_link.symlink_to(item)
                has_content = True

        return has_content

    def provision(
        self,
        sandbox_id: UUID,
        user_id: UUID,
        tenant_id: str,
        llm_config: LLMProviderConfig,  # noqa: ARG002
    ) -> SandboxInfo:
        """Provision a new sandbox for a user.

        Creates user-level sandbox structure:
        1. Create sandbox directory with sessions/ subdirectory

        NOTE: This does NOT set up session-specific workspaces or start Next.js.
        Call setup_session_workspace() to create session workspaces.
        Next.js server is started per-session in setup_session_workspace().

        Args:
            sandbox_id: Unique identifier for the sandbox
            user_id: User identifier who owns this sandbox
            tenant_id: Tenant identifier for multi-tenant isolation
            llm_config: LLM provider configuration (stored for default config)

        Returns:
            SandboxInfo with the provisioned sandbox details

        Raises:
            RuntimeError: If provisioning fails
        """
        logger.info(
            f"Starting sandbox provisioning for sandbox {sandbox_id}, user {user_id}, tenant {tenant_id}"
        )

        # Create sandbox directory structure (user-level only)
        logger.info(f"Creating sandbox directory structure for sandbox {sandbox_id}")
        sandbox_path = self._directory_manager.create_sandbox_directory(str(sandbox_id))
        logger.debug(f"Sandbox directory created at {sandbox_path}")

        logger.info(
            f"Provisioned sandbox {sandbox_id} at {sandbox_path} (no sessions yet)"
        )

        return SandboxInfo(
            sandbox_id=sandbox_id,
            directory_path=str(self._get_sandbox_path(sandbox_id)),
            status=SandboxStatus.RUNNING,
            last_heartbeat=None,
        )

    def terminate(self, sandbox_id: UUID) -> None:
        """Terminate a sandbox and clean up all resources.

        1. Stop all Next.js processes for this sandbox
        2. Stop all ACP clients for this sandbox (terminates agent subprocesses)
        3. Cleanup sandbox directory

        Args:
            sandbox_id: The sandbox ID to terminate

        Raises:
            RuntimeError: If termination fails
        """
        # Stop all Next.js processes for this sandbox (keyed by (sandbox_id, session_id))
        with self._nextjs_lock:
            processes_to_stop = [
                (key, process)
                for key, process in self._nextjs_processes.items()
                if key[0] == sandbox_id
            ]
        for key, process in processes_to_stop:
            session_id = key[1]
            try:
                self._stop_nextjs_process(process, session_id)
                with self._nextjs_lock:
                    self._nextjs_processes.pop(key, None)
            except Exception as e:
                logger.warning(
                    f"Failed to stop Next.js for sandbox {sandbox_id}, session {session_id}: {e}"
                )

        # Stop all ACP clients for this sandbox (keyed by (sandbox_id, session_id))
        clients_to_stop = [
            (key, client)
            for key, client in self._acp_clients.items()
            if key[0] == sandbox_id
        ]
        for key, client in clients_to_stop:
            try:
                client.stop()
                del self._acp_clients[key]
            except Exception as e:
                logger.warning(
                    f"Failed to stop ACP client for sandbox {sandbox_id}, session {key[1]}: {e}"
                )

        # Cleanup directory
        sandbox_path = self._get_sandbox_path(sandbox_id)
        try:
            self._directory_manager.cleanup_sandbox_directory(sandbox_path)
        except Exception as e:
            raise RuntimeError(
                f"Failed to cleanup sandbox directory {sandbox_path}: {e}"
            ) from e

        logger.info(f"Terminated sandbox {sandbox_id}")

    def setup_session_workspace(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        llm_config: LLMProviderConfig,
        nextjs_port: int,
        file_system_path: str | None = None,
        snapshot_path: str | None = None,  # noqa: ARG002
        user_name: str | None = None,
        user_role: str | None = None,
        user_work_area: str | None = None,
        user_level: str | None = None,
        use_demo_data: bool = False,
        excluded_user_library_paths: list[str] | None = None,
    ) -> None:
        """Set up a session workspace within an existing sandbox.

        Creates per-session directory structure with:
        1. sessions/$session_id/ directory
        2. outputs/ (from snapshot or template)
        3. .venv/ (from template)
        4. AGENTS.md
        5. .agent/skills/
        6. files/ (symlink to demo data OR filtered user files)
        7. opencode.json
        8. org_info/ (if demo_data is enabled, the org structure and user identity for the user's demo persona)
        9. attachments/
        10. Start Next.js dev server for this session

        Args:
            sandbox_id: The sandbox ID (must be provisioned)
            session_id: The session ID for this workspace
            llm_config: LLM provider configuration for opencode.json
            file_system_path: Path to user's knowledge/source files
            snapshot_path: Optional storage path to restore outputs from
            user_name: User's name for personalization in AGENTS.md
            user_role: User's role/title for personalization in AGENTS.md
            user_work_area: User's work area for demo persona (e.g., "engineering")
            user_level: User's level for demo persona (e.g., "ic", "manager")
            use_demo_data: If True, symlink files/ to demo data; else to user files
            excluded_user_library_paths: List of paths within user_library/ to exclude
                (e.g., ["/data/file.xlsx"]). These files won't be linked in the sandbox.

        Raises:
            RuntimeError: If workspace setup fails
        """
        sandbox_path = self._get_sandbox_path(sandbox_id)

        if not self._directory_manager.directory_exists(sandbox_path):
            raise RuntimeError(
                f"Sandbox {sandbox_id} not provisioned - provision() first"
            )

        logger.info(
            f"Setting up session workspace for session {session_id} in sandbox {sandbox_id}"
        )

        # Create session directory
        session_path = self._directory_manager.create_session_directory(
            sandbox_path, str(session_id)
        )
        logger.debug(f"Session directory created at {session_path}")

        try:
            # Setup files access - choose between demo data or user files
            if use_demo_data:
                # Demo mode: symlink to demo data directory
                symlink_target = Path(DEMO_DATA_PATH)
                if not symlink_target.exists():
                    logger.warning(
                        f"Demo data directory does not exist: {symlink_target}"
                    )
                logger.info(f"Setting up files symlink to demo data: {symlink_target}")
                self._directory_manager.setup_files_symlink(
                    session_path, symlink_target
                )
            elif file_system_path:
                source_path = Path(file_system_path)
                # Check if we have exclusions for user_library
                if excluded_user_library_paths:
                    # Create filtered file structure with symlinks to enabled files only
                    logger.debug(
                        f"Setting up filtered files with {len(excluded_user_library_paths)} exclusions"
                    )
                    self._setup_filtered_files(
                        session_path=session_path,
                        source_path=source_path,
                        excluded_paths=excluded_user_library_paths,
                    )
                else:
                    # No exclusions: simple symlink to entire directory
                    logger.debug(
                        f"Setting up files symlink to user files: {source_path}"
                    )
                    self._directory_manager.setup_files_symlink(
                        session_path, source_path
                    )
            else:
                raise ValueError("No files symlink target provided")
            logger.debug("Files ready")

            # Setup org_info directory with user identity (at session root)
            if user_work_area:
                logger.debug(f"Setting up org_info for {user_work_area}/{user_level}")
                self._directory_manager.setup_org_info(
                    session_path, user_work_area, user_level
                )

            logger.debug("Setting up outputs directory from template")
            self._directory_manager.setup_outputs_directory(session_path)
            logger.debug("Outputs directory ready")

            logger.debug("Setting up skills")
            self._directory_manager.setup_skills(session_path)
            logger.debug("Skills ready")

            # Setup attachments directory
            logger.debug("Setting up attachments directory")
            self._directory_manager.setup_attachments_directory(session_path)
            logger.debug("Attachments directory ready")

            # Setup opencode.json with LLM provider configuration
            logger.debug(
                f"Setting up opencode config with provider: {llm_config.provider}, model: {llm_config.model_name}"
            )
            self._directory_manager.setup_opencode_config(
                sandbox_path=session_path,
                provider=llm_config.provider,
                model_name=llm_config.model_name,
                api_key=llm_config.api_key,
                api_base=llm_config.api_base,
                disabled_tools=OPENCODE_DISABLED_TOOLS,
            )
            logger.debug("Opencode config ready")

            # Start Next.js server on pre-allocated port
            web_dir = self._directory_manager.get_web_path(
                sandbox_path, str(session_id)
            )
            logger.info(f"Starting Next.js server at {web_dir} on port {nextjs_port}")

            nextjs_process = self._process_manager.start_nextjs_server(
                web_dir, nextjs_port
            )
            # Store process for clean shutdown on session delete
            with self._nextjs_lock:
                self._nextjs_processes[(sandbox_id, session_id)] = nextjs_process
            logger.info("Next.js server started successfully")

            # Setup venv and AGENTS.md
            logger.debug("Setting up virtual environment")
            self._directory_manager.setup_venv(session_path)
            logger.debug("Virtual environment ready")

            logger.debug("Setting up agent instructions (AGENTS.md)")
            self._directory_manager.setup_agent_instructions(
                sandbox_path=session_path,
                provider=llm_config.provider,
                model_name=llm_config.model_name,
                nextjs_port=nextjs_port,
                disabled_tools=OPENCODE_DISABLED_TOOLS,
                user_name=user_name,
                user_role=user_role,
                use_demo_data=use_demo_data,
                include_org_info=use_demo_data,
            )
            logger.debug("Agent instructions ready")

            logger.info(f"Set up session workspace {session_id} at {session_path}")

        except Exception as e:
            # Cleanup on failure
            logger.error(
                f"Session workspace setup failed for session {session_id}: {e}",
                exc_info=True,
            )
            logger.info(f"Cleaning up session directory at {session_path}")
            self._directory_manager.cleanup_session_directory(
                sandbox_path, str(session_id)
            )
            raise RuntimeError(
                f"Failed to set up session workspace {session_id}: {e}"
            ) from e

    def cleanup_session_workspace(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        nextjs_port: int | None = None,
    ) -> None:
        """Clean up a session workspace (on session delete).

        1. Stop Next.js dev server if running
        2. Stop ACP client for this session
        3. Remove session directory

        Does NOT terminate the sandbox - other sessions may still be using it.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to clean up
            nextjs_port: Optional port where Next.js server is running (fallback only)
        """
        # Stop Next.js dev server - try stored process first, then fallback to port lookup
        process_key = (sandbox_id, session_id)
        with self._nextjs_lock:
            nextjs_process = self._nextjs_processes.pop(process_key, None)
        if nextjs_process is not None:
            self._stop_nextjs_process(nextjs_process, session_id)
        elif nextjs_port is not None:
            # Fallback: find by port (e.g., if server was restarted)
            self._stop_nextjs_server_on_port(nextjs_port, session_id)

        # Stop ACP client for this session
        client_key = (sandbox_id, session_id)
        client = self._acp_clients.pop(client_key, None)
        if client:
            try:
                client.stop()
                logger.debug(f"Stopped ACP client for session {session_id}")
            except Exception as e:
                logger.warning(
                    f"Failed to stop ACP client for session {session_id}: {e}"
                )

        # Cleanup session directory
        sandbox_path = self._get_sandbox_path(sandbox_id)
        self._directory_manager.cleanup_session_directory(sandbox_path, str(session_id))
        logger.info(f"Cleaned up session workspace {session_id}")

    def _stop_nextjs_process(
        self, process: subprocess.Popen[bytes], session_id: UUID
    ) -> None:
        """Stop a Next.js dev server process gracefully.

        Args:
            process: The subprocess.Popen object for the Next.js server
            session_id: The session ID (for logging)
        """
        if process.poll() is not None:
            # Process already terminated
            logger.debug(
                f"Next.js server for session {session_id} already terminated (exit code: {process.returncode})"
            )
            return

        try:
            logger.info(
                f"Stopping Next.js server (PID {process.pid}) for session {session_id}"
            )
            self._process_manager.terminate_process(process.pid)
            logger.debug(f"Next.js server stopped for session {session_id}")
        except Exception as e:
            logger.warning(
                f"Failed to stop Next.js server for session {session_id}: {e}"
            )

    def _stop_nextjs_server_on_port(self, port: int, session_id: UUID) -> None:
        """Stop Next.js dev server running on a specific port (fallback method).

        Finds the process listening on the port and terminates it gracefully.
        Used when the process object is not available (e.g., after backend restart).

        Args:
            port: The port number where Next.js is running
            session_id: The session ID (for logging)
        """
        # Try lsof first - it's the most reliable cross-platform way
        # Timeout to prevent hanging if system is slow or unresponsive
        LSOF_TIMEOUT_SECONDS = 5.0
        try:
            result = subprocess.run(
                ["lsof", "-ti", f":{port}"],
                capture_output=True,
                text=True,
                timeout=LSOF_TIMEOUT_SECONDS,
            )
            if result.returncode == 0 and result.stdout.strip():
                # lsof can return multiple PIDs - stop all processes on this port
                pids = [
                    int(pid.strip())
                    for pid in result.stdout.strip().split("\n")
                    if pid.strip()
                ]
                if pids:
                    logger.info(
                        f"Found {len(pids)} process(es) on port {port} for session {session_id}, stopping all"
                    )
                    for pid in pids:
                        try:
                            logger.debug(
                                f"Stopping Next.js server (PID {pid}) on port {port} for session {session_id}"
                            )
                            self._process_manager.terminate_process(pid)
                        except Exception as e:
                            logger.warning(
                                f"Failed to stop process {pid} on port {port}: {e}"
                            )
                    return
            else:
                logger.debug(
                    f"No process found on port {port} for session {session_id}"
                )
        except subprocess.TimeoutExpired:
            logger.warning(
                f"lsof timed out after {LSOF_TIMEOUT_SECONDS}s while looking for process on port {port} for session {session_id}"
            )
        except FileNotFoundError:
            # lsof not available, try psutil
            try:
                import psutil

                # Use net_connections to find process by port
                # Collect all PIDs on this port (handle multiple processes)
                pids_to_stop = set()
                for conn in psutil.net_connections(kind="inet"):
                    # laddr can be empty tuple for some connection states
                    # Check if it's a tuple with at least 2 elements (host, port)
                    if (
                        conn.laddr
                        and isinstance(conn.laddr, tuple)
                        and len(conn.laddr) >= 2
                        and conn.pid
                    ):
                        if conn.laddr[1] == port:
                            pids_to_stop.add(conn.pid)

                if pids_to_stop:
                    logger.info(
                        f"Found {len(pids_to_stop)} process(es) on port {port} for session {session_id}, stopping all"
                    )
                    for pid in pids_to_stop:
                        try:
                            logger.debug(
                                f"Stopping Next.js server (PID {pid}) on port {port} for session {session_id}"
                            )
                            self._process_manager.terminate_process(pid)
                        except Exception as e:
                            logger.warning(
                                f"Failed to stop process {pid} on port {port}: {e}"
                            )
                    return

                logger.debug(
                    f"No process found on port {port} for session {session_id}"
                )
            except ImportError:
                logger.warning(
                    f"Neither lsof nor psutil available to find process on port {port}"
                )
            except Exception as e:
                logger.warning(f"Failed to find process on port {port}: {e}")
        except Exception as e:
            logger.warning(
                f"Failed to stop Next.js server on port {port} for session {session_id}: {e}"
            )

    def create_snapshot(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        tenant_id: str,
    ) -> SnapshotResult | None:
        """Not implemented for local backend - workspaces persist on disk.

        Local sandboxes don't use snapshots since the filesystem persists.
        This should never be called for local backend.
        """
        raise NotImplementedError(
            "create_snapshot is not supported for local backend. Local sandboxes persist on disk and don't use snapshots."
        )

    def session_workspace_exists(
        self,
        sandbox_id: UUID,
        session_id: UUID,
    ) -> bool:
        """Check if a session's workspace directory exists.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID to check

        Returns:
            True if the session workspace exists, False otherwise
        """
        session_path = self._get_session_path(sandbox_id, session_id)
        outputs_path = session_path / "outputs"
        return outputs_path.exists()

    def ensure_nextjs_running(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        nextjs_port: int,
    ) -> None:
        """Start Next.js server for a session if not already running.

        Called when the server is detected as unreachable (e.g., after API server restart).
        Returns immediately — the actual startup runs in a background daemon thread.
        A per-session guard prevents concurrent restarts from racing.

        Lock design: _nextjs_lock is shared across ALL sessions. Holding it during
        httpx (1s) or start_nextjs_server (several seconds) would block every other
        session's status checks and restarts. We only hold the lock for fast
        in-memory ops (dict get, check_and_add). The slow I/O runs in the background
        thread without holding any lock.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            nextjs_port: The port number for the Next.js server
        """
        process_key = (sandbox_id, session_id)

        with self._nextjs_lock:
            existing = self._nextjs_processes.get(process_key)
            if existing is not None and existing.poll() is None:
                return

        # Atomic check-and-add: returns True if already in set (another thread is starting)
        if self._nextjs_starting.check_and_add(process_key):
            return

        def _start_in_background() -> None:
            try:
                # Port check in background to avoid blocking the main thread
                try:
                    with httpx.Client(timeout=1.0) as client:
                        client.get(f"http://localhost:{nextjs_port}")
                    logger.info(
                        f"Port {nextjs_port} already alive for session {session_id} (orphan process) — skipping restart"
                    )
                    return
                except Exception:
                    pass  # Port is dead; proceed with restart

                logger.info(
                    f"Starting Next.js for session {session_id} on port {nextjs_port}"
                )
                sandbox_path = self._get_sandbox_path(sandbox_id)
                web_dir = self._directory_manager.get_web_path(
                    sandbox_path, str(session_id)
                )
                if not web_dir.exists():
                    logger.warning(
                        f"Web dir missing for session {session_id}: {web_dir} — cannot restart Next.js"
                    )
                    return
                process = self._process_manager.start_nextjs_server(
                    web_dir, nextjs_port
                )
                with self._nextjs_lock:
                    self._nextjs_processes[process_key] = process
                logger.info(
                    f"Auto-restarted Next.js for session {session_id} on port {nextjs_port}"
                )
            except Exception as e:
                logger.error(
                    f"Failed to auto-restart Next.js for session {session_id}: {e}"
                )
            finally:
                self._nextjs_starting.discard(process_key)

        threading.Thread(target=_start_in_background, daemon=True).start()

    def restore_snapshot(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        snapshot_storage_path: str,
        tenant_id: str,  # noqa: ARG002
        nextjs_port: int,
        llm_config: LLMProviderConfig,
        use_demo_data: bool = False,
    ) -> None:
        """Not implemented for local backend - workspaces persist on disk.

        Local sandboxes don't use snapshots since the filesystem persists.
        This should never be called for local backend.
        """
        raise NotImplementedError(
            "restore_snapshot is not supported for local backend. Local sandboxes persist on disk and don't use snapshots."
        )

    def health_check(
        self,
        sandbox_id: UUID,
        timeout: float = 60.0,  # noqa: ARG002
    ) -> bool:
        """Check if the sandbox is healthy (folder exists).

        Args:
            sandbox_id: The sandbox ID to check
            timeout: Health check timeout in seconds

        Returns:
            True if sandbox is healthy, False otherwise
        """
        # assume healthy if no port is specified
        sandbox_path = self._get_sandbox_path(sandbox_id)
        if not sandbox_path.exists():
            return False
        return True

    def send_message(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        message: str,
    ) -> Generator[ACPEvent, None, None]:
        """Send a message to the CLI agent and stream typed ACP events.

        The agent runs in the session-specific workspace:
        sessions/$session_id/

        Yields ACPEvent objects:
        - AgentMessageChunk: Text/image content from agent
        - AgentThoughtChunk: Agent's internal reasoning
        - ToolCallStart: Tool invocation started
        - ToolCallProgress: Tool execution progress/result
        - AgentPlanUpdate: Agent's execution plan
        - CurrentModeUpdate: Agent mode change
        - PromptResponse: Agent finished (has stop_reason)
        - Error: An error occurred

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID (determines workspace directory)
            message: The message content to send

        Yields:
            Typed ACP schema event objects
        """
        from onyx.server.features.build.api.packet_logger import get_packet_logger

        packet_logger = get_packet_logger()

        # Get or create ACP client for this session
        client_key = (sandbox_id, session_id)
        client = self._acp_clients.get(client_key)

        if client is None or not client.is_running:
            session_path = self._get_session_path(sandbox_id, session_id)

            # Log client creation
            packet_logger.log_acp_client_start(
                sandbox_id, session_id, str(session_path), context="local"
            )
            logger.info(
                f"Creating new ACP client for sandbox {sandbox_id}, session {session_id}"
            )

            # Create and start ACP client for this session
            client = ACPAgentClient(cwd=str(session_path))
            self._acp_clients[client_key] = client

        # Log the send_message call at sandbox manager level
        packet_logger.log_session_start(session_id, sandbox_id, message)

        events_count = 0
        try:
            for event in client.send_message(message):
                events_count += 1
                yield event

            # Log successful completion
            packet_logger.log_session_end(
                session_id, success=True, events_count=events_count
            )
        except Exception as e:
            # Log failure
            packet_logger.log_session_end(
                session_id, success=False, error=str(e), events_count=events_count
            )
            raise

    def _sanitize_path(self, path: str) -> str:
        """Sanitize a user-provided path to prevent path traversal attacks.

        Removes '..' components and normalizes the path to prevent attacks like
        'files/../../../../etc/passwd'.

        Args:
            path: User-provided relative path

        Returns:
            Sanitized path string with '..' components removed
        """
        # Parse the path and filter out '..' components
        path_obj = Path(path.lstrip("/"))
        clean_parts = [p for p in path_obj.parts if p != ".."]
        return str(Path(*clean_parts)) if clean_parts else "."

    def _is_path_allowed(self, session_path: Path, target_path: Path) -> bool:
        """Check if target_path is allowed for access.

        Allows paths within session_path OR within the files/ symlink.
        The files/ symlink intentionally points outside session_path to
        provide access to knowledge files.

        Args:
            session_path: The session's root directory
            target_path: The path being accessed

        Returns:
            True if access is allowed, False otherwise
        """
        files_symlink = session_path / "files"

        # Check if path is within the files/ symlink (or is the symlink itself)
        if files_symlink.is_symlink():
            try:
                # Use lexical check (without resolving symlinks)
                # This handles both the symlink itself (returns '.') and paths within it
                target_path.relative_to(files_symlink)
                return True
            except ValueError:
                pass

        # Standard check: path must be within session directory
        try:
            target_path.resolve().relative_to(session_path.resolve())
            return True
        except ValueError:
            return False

    def list_directory(
        self, sandbox_id: UUID, session_id: UUID, path: str
    ) -> list[FilesystemEntry]:
        """List contents of a directory in the session's outputs directory.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            path: Relative path within sessions/$session_id/outputs/

        Returns:
            List of FilesystemEntry objects sorted by directory first, then name

        Raises:
            ValueError: If path traversal attempted or path is not a directory
        """
        session_path = self._get_session_path(sandbox_id, session_id)
        # Security: sanitize path to remove path traversal attempts
        clean_path = self._sanitize_path(path)
        target_path = session_path / clean_path

        # Security check
        if not self._is_path_allowed(session_path, target_path):
            raise ValueError("Path traversal not allowed")

        if not target_path.is_dir():
            raise ValueError(f"Not a directory: {path}")

        entries = []
        for item in target_path.iterdir():
            stat = item.stat()
            is_file = item.is_file()
            mime_type = mimetypes.guess_type(str(item))[0] if is_file else None
            entries.append(
                FilesystemEntry(
                    name=item.name,
                    path=str(item.relative_to(session_path)),
                    is_directory=item.is_dir(),
                    size=stat.st_size if is_file else None,
                    mime_type=mime_type,
                )
            )

        return sorted(entries, key=lambda e: (not e.is_directory, e.name.lower()))

    def read_file(self, sandbox_id: UUID, session_id: UUID, path: str) -> bytes:
        """Read a file from the session's outputs directory.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            path: Relative path within sessions/$session_id/outputs/

        Returns:
            File contents as bytes

        Raises:
            ValueError: If path traversal attempted or path is not a file
        """
        session_path = self._get_session_path(sandbox_id, session_id)
        # Security: sanitize path to remove path traversal attempts
        clean_path = self._sanitize_path(path)
        target_path = session_path / clean_path

        # Security check
        if not self._is_path_allowed(session_path, target_path):
            raise ValueError("Path traversal not allowed")

        if not target_path.is_file():
            raise ValueError(f"Not a file: {path}")

        return target_path.read_bytes()

    def upload_file(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        filename: str,
        content: bytes,
    ) -> str:
        """Upload a file to the session's attachments directory.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            filename: Sanitized filename
            content: File content as bytes

        Returns:
            Relative path where file was saved (e.g., "attachments/doc.pdf")

        Raises:
            RuntimeError: If upload fails
        """
        session_path = self._get_session_path(sandbox_id, session_id)
        attachments_dir = session_path / "attachments"
        attachments_dir.mkdir(parents=True, exist_ok=True)

        # Handle filename collisions by appending a number
        target_path = attachments_dir / filename
        if target_path.exists():
            stem = target_path.stem
            suffix = target_path.suffix
            counter = 1
            while target_path.exists():
                target_path = attachments_dir / f"{stem}_{counter}{suffix}"
                counter += 1
            filename = target_path.name

        target_path.write_bytes(content)
        target_path.chmod(0o644)

        logger.info(
            f"Uploaded file to session {session_id}: attachments/{filename} ({len(content)} bytes)"
        )

        # Inject attachments section into AGENTS.md if not already present
        self._ensure_agents_md_attachments_section(session_path)

        return f"attachments/{filename}"

    def _ensure_agents_md_attachments_section(self, session_path: Path) -> None:
        """Ensure AGENTS.md has the attachments section.

        Called after uploading a file. Only adds the section if it doesn't exist.
        Inserts the section above ## Skills for better document flow.
        """
        from onyx.server.features.build.sandbox.util.agent_instructions import (
            ATTACHMENTS_SECTION_CONTENT,
        )

        agents_md_path = session_path / "AGENTS.md"
        if not agents_md_path.exists():
            return

        current_content = agents_md_path.read_text()
        section_marker = "## Attachments (PRIORITY)"

        if section_marker not in current_content:
            # Insert before ## Skills if it exists, otherwise append
            skills_marker = "## Skills"
            if skills_marker in current_content:
                updated_content = current_content.replace(
                    skills_marker,
                    ATTACHMENTS_SECTION_CONTENT + "\n\n" + skills_marker,
                )
            else:
                updated_content = (
                    current_content.rstrip() + "\n\n" + ATTACHMENTS_SECTION_CONTENT
                )
            agents_md_path.write_text(updated_content)
            logger.debug("Added attachments section to AGENTS.md")

    def delete_file(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        path: str,
    ) -> bool:
        """Delete a file from the session's workspace.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID
            path: Relative path to the file (e.g., "attachments/doc.pdf")

        Returns:
            True if file was deleted, False if not found

        Raises:
            ValueError: If path traversal attempted or trying to delete a directory
        """
        session_path = self._get_session_path(sandbox_id, session_id)

        # Security: robust path sanitization (consistent with K8s implementation)
        # Reject paths with traversal patterns, URL-encoded characters, or null bytes
        if re.search(r"\.\.", path) or "%" in path or "\x00" in path:
            raise ValueError("Invalid path: potential path traversal detected")

        # Reject paths with shell metacharacters (consistency with K8s implementation)
        if re.search(r'[;&|`$(){}[\]<>\'"\n\r\\]', path):
            raise ValueError("Invalid path: contains disallowed characters")

        clean_path = path.lstrip("/")

        # Verify path only contains safe characters
        if not re.match(r"^[a-zA-Z0-9_\-./]+$", clean_path):
            raise ValueError("Invalid path: contains disallowed characters")

        file_path = session_path / clean_path

        # Verify path stays within session (defense in depth)
        try:
            file_path.resolve().relative_to(session_path.resolve())
        except ValueError:
            raise ValueError("Path traversal not allowed")

        if not file_path.exists():
            logger.debug(f"File not found for deletion in session {session_id}: {path}")
            return False

        if file_path.is_dir():
            raise ValueError("Cannot delete directory")

        file_path.unlink()
        logger.info(f"Deleted file from session {session_id}: {path}")

        return True

    def get_upload_stats(
        self,
        sandbox_id: UUID,
        session_id: UUID,
    ) -> tuple[int, int]:
        """Get current file count and total size for a session's attachments.

        Args:
            sandbox_id: The sandbox ID
            session_id: The session ID

        Returns:
            Tuple of (file_count, total_size_bytes)
        """
        session_path = self._get_session_path(sandbox_id, session_id)
        attachments_path = session_path / "attachments"

        if not attachments_path.exists():
            return 0, 0

        file_count = 0
        total_size = 0
        for item in attachments_path.iterdir():
            if item.is_file():
                file_count += 1
                total_size += item.stat().st_size

        return file_count, total_size

    def get_webapp_url(self, sandbox_id: UUID, port: int) -> str:  # noqa: ARG002
        """Get the webapp URL for a session's Next.js server.

        For local backend, returns localhost URL with port.

        Args:
            sandbox_id: The sandbox ID (not used in local backend)
            port: The session's allocated Next.js port

        Returns:
            URL to access the webapp (e.g., http://localhost:3015)
        """
        return f"http://localhost:{port}"

    def generate_pptx_preview(
        self,
        sandbox_id: UUID,
        session_id: UUID,
        pptx_path: str,
        cache_dir: str,
    ) -> tuple[list[str], bool]:
        """Convert PPTX to slide images using soffice + pdftoppm.

        Uses local filesystem and subprocess for conversion.
        """
        session_path = self._get_session_path(sandbox_id, session_id)
        clean_pptx = self._sanitize_path(pptx_path)
        clean_cache = self._sanitize_path(cache_dir)
        pptx_abs = session_path / clean_pptx
        cache_abs = session_path / clean_cache

        if not pptx_abs.is_file():
            raise ValueError(f"File not found: {pptx_path}")

        # Check cache - if slides exist and are newer than the PPTX, use them
        cached = False
        if cache_abs.is_dir():
            existing = sorted(cache_abs.glob("slide-*.jpg"))
            if existing:
                pptx_mtime = pptx_abs.stat().st_mtime
                cache_mtime = existing[0].stat().st_mtime
                if cache_mtime >= pptx_mtime:
                    cached = True
                    return (
                        [str(f.relative_to(session_path)) for f in existing],
                        cached,
                    )
                # Stale cache - remove old slides
                for f in existing:
                    f.unlink()

        cache_abs.mkdir(parents=True, exist_ok=True)

        # Convert PPTX -> PDF using soffice
        try:
            import os

            env = os.environ.copy()
            env["SAL_USE_VCLPLUGIN"] = "svp"
            subprocess.run(
                [
                    "soffice",
                    "--headless",
                    "--convert-to",
                    "pdf",
                    "--outdir",
                    str(cache_abs),
                    str(pptx_abs),
                ],
                env=env,
                check=True,
                capture_output=True,
                timeout=120,
            )
        except FileNotFoundError:
            raise ValueError(
                "LibreOffice (soffice) is not installed. PPTX preview requires LibreOffice."
            )
        except subprocess.TimeoutExpired:
            raise ValueError("PPTX conversion timed out")
        except subprocess.CalledProcessError as e:
            raise ValueError(f"PPTX conversion failed: {e.stderr.decode()}")

        # Find the generated PDF
        pdf_files = list(cache_abs.glob("*.pdf"))
        if not pdf_files:
            raise ValueError("soffice did not produce a PDF file")
        pdf_path = pdf_files[0]

        # Convert PDF -> JPEG slides using pdftoppm
        try:
            subprocess.run(
                [
                    "pdftoppm",
                    "-jpeg",
                    "-r",
                    "150",
                    str(pdf_path),
                    str(cache_abs / "slide"),
                ],
                check=True,
                capture_output=True,
                timeout=120,
            )
        except FileNotFoundError:
            raise ValueError(
                "pdftoppm (poppler-utils) is not installed. PPTX preview requires poppler."
            )
        except subprocess.CalledProcessError as e:
            raise ValueError(f"PDF to image conversion failed: {e.stderr.decode()}")

        # Clean up PDF
        pdf_path.unlink(missing_ok=True)

        # Collect slide images
        slides = sorted(cache_abs.glob("slide-*.jpg"))
        return (
            [str(f.relative_to(session_path)) for f in slides],
            False,
        )

    def sync_files(
        self,
        sandbox_id: UUID,
        user_id: UUID,  # noqa: ARG002
        tenant_id: str,  # noqa: ARG002
        source: str | None = None,  # noqa: ARG002
    ) -> bool:
        """No-op for local mode - files are directly accessible via symlink.

        In local mode, the sandbox's files/ directory is a symlink to the
        local persistent document storage, so no sync is needed. File visibility
        in sessions is controlled via filtered symlinks in setup_session_workspace().

        Args:
            sandbox_id: The sandbox UUID (unused)
            user_id: The user ID (unused)
            tenant_id: The tenant ID (unused)
            source: The source type (unused in local mode)

        Returns:
            True (always succeeds since no sync is needed)
        """
        source_info = f" source={source}" if source else ""
        logger.debug(
            f"sync_files called for local sandbox {sandbox_id}{source_info} - no-op"
        )
        return True


================================================
FILE: backend/onyx/server/features/build/sandbox/local/process_manager.py
================================================
"""Process management for Next.js server subprocesses."""

import os
import shutil
import signal
import subprocess
import time
import urllib.error
import urllib.request
from pathlib import Path

from onyx.utils.logger import setup_logger

logger = setup_logger()


class ProcessManager:
    """Manages Next.js server subprocess lifecycle.

    Responsible for:
    - Starting Next.js dev servers
    - Checking process status
    - Gracefully terminating processes
    """

    def start_nextjs_server(
        self,
        web_dir: Path,
        port: int,
        timeout: float = 180.0,
    ) -> subprocess.Popen[bytes]:
        """Start Next.js dev server.

        1. Clear .next cache to avoid stale paths from template
        2. Start npm run dev on specified port
        3. Wait for server to be ready

        Args:
            web_dir: Path to the Next.js web directory
            port: Port number to run the server on
            timeout: Maximum time to wait for server to start

        Returns:
            The subprocess.Popen object for the Next.js server

        Raises:
            RuntimeError: If server fails to start within timeout
        """
        logger.info(f"Starting Next.js server in {web_dir} on port {port}")

        # Clear Next.js cache to avoid stale paths from template
        next_cache = web_dir / ".next"
        if next_cache.exists():
            logger.debug(f"Clearing Next.js cache at {next_cache}")
            shutil.rmtree(next_cache)

        # Verify web_dir exists and has package.json
        if not web_dir.exists():
            logger.error(f"Web directory does not exist: {web_dir}")
            raise RuntimeError(f"Web directory does not exist: {web_dir}")

        package_json = web_dir / "package.json"
        if not package_json.exists():
            logger.error(f"package.json not found in {web_dir}")
            raise RuntimeError(f"package.json not found in {web_dir}")

        logger.debug(f"Starting npm run dev command in {web_dir}")
        # CRITICAL: Inherit stdout/stderr (None) to prevent pipe buffer overflow.
        # When PIPE is used but never drained, the buffer fills up (64KB on most systems)
        # and the subprocess blocks indefinitely on write, causing the server to freeze.
        # This was the root cause of Next.js servers dying after a few minutes.
        # Using None inherits from parent, so logs appear in the backend terminal.
        # FIXME: ideally we should drain the pipe to avoid the buffer overflow, but not for v1
        process = subprocess.Popen(
            ["npm", "run", "dev", "--", "-p", str(port)],
            cwd=web_dir,
            stdout=None,
            stderr=None,
        )
        logger.info(f"Next.js process started with PID {process.pid}")

        # Wait for server to be ready
        server_url = f"http://localhost:{port}"
        logger.info(f"Waiting for Next.js server at {server_url} (timeout: {timeout}s)")

        if not self._wait_for_server(server_url, timeout=timeout, process=process):
            # Check if process died
            if process.poll() is not None:
                logger.error(
                    f"Next.js server process died with code {process.returncode}. "
                    f"Check the terminal or logs in {web_dir} for details."
                )
                raise RuntimeError(
                    f"Next.js server process died with code {process.returncode}. Check server logs for details."
                )

            # Process still running but server not responding
            logger.error(
                f"Next.js server failed to respond within {timeout} seconds (process still running with PID {process.pid})"
            )

            raise RuntimeError(
                f"Next.js server failed to start within {timeout} seconds"
            )

        logger.info(f"Next.js server is ready at {server_url}")
        return process

    def _wait_for_server(
        self,
        url: str,
        timeout: float = 30.0,
        poll_interval: float = 0.5,
        process: subprocess.Popen[bytes] | None = None,
    ) -> bool:
        """Wait for a server to become available by polling.

        Args:
            url: URL to poll
            timeout: Maximum time to wait in seconds
            poll_interval: Time between poll attempts in seconds
            process: Optional process to check if it's still running

        Returns:
            True if server became available, False if timeout reached
        """
        start_time = time.time()
        attempt_count = 0
        last_log_time = start_time

        while time.time() - start_time < timeout:
            attempt_count += 1
            elapsed = time.time() - start_time

            # Check if process died early
            if process is not None and process.poll() is not None:
                logger.warning(
                    f"Process died during wait (exit code: {process.returncode}) "
                    f"after {elapsed:.1f}s and {attempt_count} attempts"
                )
                return False

            try:
                with urllib.request.urlopen(url, timeout=2) as response:
                    if response.status == 200:
                        logger.debug(
                            f"Server ready after {elapsed:.1f}s and {attempt_count} attempts"
                        )
                        return True
            except urllib.error.HTTPError as e:
                # Log HTTP errors (server responding but with error)
                if time.time() - last_log_time >= 10:
                    logger.debug(
                        f"HTTP error {e.code} from {url} after {elapsed:.1f}s ({attempt_count} attempts)"
                    )
                    last_log_time = time.time()
            except (urllib.error.URLError, TimeoutError) as e:
                # Log connection errors periodically (every 10 seconds)
                if time.time() - last_log_time >= 10:
                    logger.debug(
                        f"Still waiting for {url} after {elapsed:.1f}s ({attempt_count} attempts): {type(e).__name__}"
                    )
                    last_log_time = time.time()

            time.sleep(poll_interval)

        logger.warning(
            f"Server at {url} did not become available within {timeout}s ({attempt_count} attempts)"
        )
        return False

    def is_process_running(self, pid: int) -> bool:
        """Check if process with given PID is still running.

        Args:
            pid: Process ID to check

        Returns:
            True if process is running, False otherwise
        """
        try:
            os.kill(pid, 0)  # Signal 0 just checks if process exists
            return True
        except ProcessLookupError:
            return False
        except PermissionError:
            return True  # Process exists but we can't signal it

    def terminate_process(self, pid: int, timeout: float = 5.0) -> bool:
        """Gracefully terminate process.

        1. Send SIGTERM
        2. Wait up to timeout seconds
        3. If still running, send SIGKILL

        Args:
            pid: Process ID to terminate
            timeout: Maximum time to wait for graceful shutdown

        Returns:
            True if process was terminated, False if it wasn't running
        """
        if not self.is_process_running(pid):
            return False

        try:
            os.kill(pid, signal.SIGTERM)
        except ProcessLookupError:
            return False

        # Wait for graceful shutdown
        deadline = time.time() + timeout
        while time.time() < deadline:
            if not self.is_process_running(pid):
                return True
            time.sleep(0.1)

        # Force kill if still running
        try:
            os.kill(pid, signal.SIGKILL)
        except ProcessLookupError:
            pass

        return True

    def get_process_info(self, pid: int) -> dict[str, str | int | float] | None:
        """Get information about a running process.

        Uses psutil if available, otherwise returns basic info.

        Args:
            pid: Process ID to get info for

        Returns:
            Dictionary with process info, or None if process not running
        """
        if not self.is_process_running(pid):
            return None

        try:
            import psutil

            proc = psutil.Process(pid)
            return {
                "pid": pid,
                "status": proc.status(),
                "cpu_percent": proc.cpu_percent(),
                "memory_mb": proc.memory_info().rss / 1024 / 1024,
                "create_time": proc.create_time(),
            }
        except ImportError:
            # psutil not available, return basic info
            return {"pid": pid, "status": "unknown"}
        except Exception:
            return {"pid": pid, "status": "unknown"}


================================================
FILE: backend/onyx/server/features/build/sandbox/local/test_agent_client.py
================================================
#!/usr/bin/env python3
"""Test script for ACPAgentClient with opencode CLI.

Usage:
  # From backend directory:
  PYTHONPATH=. python onyx/server/features/build/sandbox/local/test_agent_client.py

  # Or with specific message:
  PYTHONPATH=. python onyx/server/features/build/sandbox/local/test_agent_client.py "What files are in this directory?"

  # With specific working directory:
  PYTHONPATH=. python onyx/server/features/build/sandbox/local/test_agent_client.py --dir /path/to/project "List files"
"""

import argparse
import shutil
import tempfile
from pathlib import Path

from acp.schema import AgentMessageChunk
from acp.schema import AgentPlanUpdate
from acp.schema import AgentThoughtChunk
from acp.schema import CurrentModeUpdate
from acp.schema import Error
from acp.schema import PromptResponse
from acp.schema import ToolCallProgress
from acp.schema import ToolCallStart

try:
    from onyx.server.features.build.sandbox.local.agent_client import ACPAgentClient
except ImportError:
    from agent_client import ACPAgentClient  # type: ignore


def test_with_opencode_acp(message: str, working_dir: str | None = None) -> None:
    """Test ACPAgentClient with the opencode CLI using ACP protocol."""
    print("=" * 60)
    print("Testing ACPAgentClient with opencode acp")
    print("=" * 60)

    # Use provided working dir or create temp dir
    if working_dir:
        work_dir = Path(working_dir)
        if not work_dir.exists():
            print(f"Working directory does not exist: {working_dir}")
            return
        cleanup_dir = False
    else:
        work_dir = Path(tempfile.mkdtemp(prefix="opencode-test-"))
        cleanup_dir = True
        print(f"Created temp working directory: {work_dir}")

    try:
        print(f"\nStarting ACPAgentClient in: {work_dir}")

        # Use context manager - handles start/stop automatically
        with ACPAgentClient(cwd=str(work_dir)) as client:
            print(
                f"Agent: {client.agent_info.get('name', 'unknown')} v{client.agent_info.get('version', '?')}"
            )
            print(f"Session ID: {client.session_id}")

            print(f"\nSending message: {message}")
            print("-" * 60)

            text_buffer = ""
            event_count = 0

            for event in client.send_message(message, timeout=120.0):
                event_count += 1

                if isinstance(event, AgentMessageChunk):
                    content = event.content
                    if content.type == "text":
                        text_buffer += content.text
                        print(content.text, end="", flush=True)

                elif isinstance(event, AgentThoughtChunk):
                    content = event.content
                    if content.type == "text":
                        print(f"\n[Thought: {content.text[:100]}...]", flush=True)

                elif isinstance(event, ToolCallStart):
                    print(
                        f"\n[Tool Call: {event.title} ({event.kind}) - {event.tool_call_id}]",
                        flush=True,
                    )

                elif isinstance(event, ToolCallProgress):
                    title_str = f"{event.title} " if event.title else ""
                    print(
                        f"\n[Tool Result: {title_str}{event.status} - {event.tool_call_id}]",
                        flush=True,
                    )

                elif isinstance(event, AgentPlanUpdate):
                    steps = event.plan.entries if event.plan else []
                    print(f"\n[Plan: {len(steps)} steps]", flush=True)

                elif isinstance(event, CurrentModeUpdate):
                    print(f"\n[Mode: {event.current_mode_id}]", flush=True)

                elif isinstance(event, PromptResponse):
                    print(f"\n\n[Done - stop_reason: {event.stop_reason}]")

                elif isinstance(event, Error):
                    print(f"\n[Error: {event.message}]")

                else:
                    print(f"\n[Unknown event]: {event}", flush=True)

            print("-" * 60)
            print(f"\nReceived {event_count} events total")
            if text_buffer:
                print(f"Total text length: {len(text_buffer)} chars")

    except RuntimeError as e:
        print(f"\nError: {e}")

    except Exception as e:
        print(f"\nUnexpected error: {e}")
        import traceback

        traceback.print_exc()

    finally:
        if cleanup_dir:
            shutil.rmtree(work_dir, ignore_errors=True)
            print(f"\nCleaned up temp directory: {work_dir}")


def main() -> None:
    """Main entry point."""
    parser = argparse.ArgumentParser(
        description="Test ACPAgentClient with opencode CLI",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Test with opencode CLI (default message)
  python test_agent_client.py

  # Test with specific message
  python test_agent_client.py "What is 2+2?"

  # Test with specific working directory
  python test_agent_client.py "List files" --dir /path/to/project
        """,
    )
    parser.add_argument(
        "message",
        type=str,
        nargs="?",
        default="What is 2+2? Reply briefly with just the number.",
        help="Message to send to opencode",
    )
    parser.add_argument(
        "--dir",
        type=str,
        metavar="PATH",
        help="Working directory for opencode (default: temp dir)",
    )

    args = parser.parse_args()

    print("\nACP Agent Client Test Suite")
    print("===========================\n")

    test_with_opencode_acp(args.message, args.dir)

    print("\n\nDone!")


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/server/features/build/sandbox/local/test_manager.py
================================================
"""Tests for SandboxManager public interface.

These are external dependency unit tests that use real DB sessions and filesystem.
Each test covers a single happy path case for the corresponding public function.

Tests for provision are not included as they require the full sandbox environment
with Next.js servers.
"""

import shutil
import tempfile
from collections.abc import Generator
from pathlib import Path
from uuid import UUID
from uuid import uuid4

import pytest
from acp.schema import PromptResponse
from acp.schema import ToolCallStart
from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.enums import BuildSessionStatus
from onyx.db.enums import SandboxStatus
from onyx.db.models import BuildSession
from onyx.db.models import Sandbox
from onyx.db.models import User
from onyx.db.models import UserRole
from onyx.file_store.file_store import get_default_file_store
from onyx.server.features.build.configs import SANDBOX_BASE_PATH
from onyx.server.features.build.db.build_session import allocate_nextjs_port
from onyx.server.features.build.sandbox import get_sandbox_manager
from onyx.server.features.build.sandbox.local import LocalSandboxManager
from onyx.server.features.build.sandbox.local.agent_client import ACPEvent
from onyx.server.features.build.sandbox.models import FilesystemEntry
from onyx.server.features.build.sandbox.models import LLMProviderConfig
from onyx.server.features.build.sandbox.models import SnapshotResult
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR


TEST_TENANT_ID = "public"
TEST_USER_EMAIL = "test_sandbox_user@example.com"


@pytest.fixture(scope="function")
def db_session() -> Generator[Session, None, None]:
    """Create a database session for testing."""
    SqlEngine.init_engine(pool_size=10, max_overflow=5)
    with get_session_with_current_tenant() as session:
        yield session


@pytest.fixture(scope="function")
def tenant_context() -> Generator[None, None, None]:
    """Set up tenant context for testing."""
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
    try:
        yield
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


@pytest.fixture
def sandbox_manager() -> LocalSandboxManager:
    """Get the SandboxManager instance via factory function."""
    manager = get_sandbox_manager()
    assert isinstance(manager, LocalSandboxManager)
    return manager


@pytest.fixture
def temp_sandbox_dir() -> Generator[Path, None, None]:
    """Create a temporary directory structure for sandbox testing."""
    temp_dir = Path(tempfile.mkdtemp(prefix="sandbox_test_"))
    outputs_dir = temp_dir / "outputs"
    outputs_dir.mkdir()

    yield temp_dir

    shutil.rmtree(temp_dir, ignore_errors=True)


@pytest.fixture
def actual_sandbox_path(sandbox_record: Sandbox) -> Path:
    """Get the actual sandbox path where the manager expects it."""
    return Path(SANDBOX_BASE_PATH) / str(sandbox_record.id)


@pytest.fixture
def test_user(
    db_session: Session,
    tenant_context: None,  # noqa: ARG001
) -> Generator[User, None, None]:
    """Create or get a test user for sandbox tests."""
    from sqlalchemy import select

    # Check if user already exists
    stmt = select(User).where(User.email == TEST_USER_EMAIL)  # type: ignore[arg-type]
    existing_user = db_session.execute(stmt).unique().scalar_one_or_none()

    if existing_user:
        yield existing_user
        return

    # Create new test user with required fields
    user = User(
        id=uuid4(),
        email=TEST_USER_EMAIL,
        hashed_password="test_hashed_password",  # Required NOT NULL field
        role=UserRole.BASIC,  # Required NOT NULL field
    )
    db_session.add(user)
    db_session.commit()
    db_session.refresh(user)

    yield user

    # Cleanup
    existing = db_session.get(User, user.id)
    if existing:
        db_session.delete(existing)
        db_session.commit()


@pytest.fixture
def sandbox_record(
    db_session: Session,
    tenant_context: None,  # noqa: ARG001
    test_user: User,
) -> Generator[Sandbox, None, None]:
    """Create a real Sandbox record in the database and set up sandbox directory."""
    from sqlalchemy import select

    # Check if sandbox already exists for this user (one sandbox per user)
    stmt = select(Sandbox).where(Sandbox.user_id == test_user.id)
    existing_sandbox = db_session.execute(stmt).unique().scalar_one_or_none()

    if existing_sandbox:
        # Clean up existing sandbox directory if it exists
        existing_sandbox_path = Path(SANDBOX_BASE_PATH) / str(existing_sandbox.id)
        if existing_sandbox_path.exists():
            shutil.rmtree(existing_sandbox_path, ignore_errors=True)
        # Delete existing sandbox record
        db_session.delete(existing_sandbox)
        db_session.commit()

    # Create Sandbox with reference to User (new model: one sandbox per user)
    sandbox = Sandbox(
        id=uuid4(),
        user_id=test_user.id,
        status=SandboxStatus.RUNNING,
    )
    db_session.add(sandbox)
    db_session.commit()
    db_session.refresh(sandbox)

    yield sandbox

    # Cleanup - re-fetch in case it was deleted
    existing = db_session.get(Sandbox, sandbox.id)
    if existing:
        db_session.delete(existing)
        db_session.commit()


@pytest.fixture
def build_session_record(
    db_session: Session,
    tenant_context: None,  # noqa: ARG001
    test_user: User,
) -> Generator[BuildSession, None, None]:
    """Create a BuildSession record for testing session-specific operations."""
    build_session = BuildSession(
        id=uuid4(),
        user_id=test_user.id,
        status=BuildSessionStatus.ACTIVE,
    )
    db_session.add(build_session)
    db_session.commit()
    db_session.refresh(build_session)

    yield build_session

    # Cleanup
    existing = db_session.get(BuildSession, build_session.id)
    if existing:
        db_session.delete(existing)
        db_session.commit()


@pytest.fixture
def session_workspace(
    sandbox_manager: LocalSandboxManager,
    sandbox_record: Sandbox,
    build_session_record: BuildSession,
    db_session: Session,
) -> Generator[tuple[Sandbox, UUID], None, None]:
    """Set up a session workspace within the sandbox and return (sandbox, session_id)."""
    session_id = build_session_record.id

    # Use setup_session_workspace to create the session directory structure
    llm_config = LLMProviderConfig(
        provider="openai",
        model_name="gpt-4",
        api_key="test-api-key",
        api_base=None,
    )
    # Allocate port for this test session
    nextjs_port = allocate_nextjs_port(db_session)

    sandbox_manager.provision(
        sandbox_id=sandbox_record.id,
        user_id=sandbox_record.user_id,
        tenant_id=TEST_TENANT_ID,
        llm_config=llm_config,
    )
    sandbox_manager.setup_session_workspace(
        sandbox_id=sandbox_record.id,
        session_id=session_id,
        llm_config=llm_config,
        nextjs_port=nextjs_port,
        file_system_path=SANDBOX_BASE_PATH,
    )

    yield sandbox_record, session_id

    # Cleanup session workspace
    sandbox_manager.cleanup_session_workspace(
        sandbox_id=sandbox_record.id,
        session_id=session_id,
    )

    sandbox_manager.terminate(sandbox_record.id)


@pytest.fixture
def file_store_initialized() -> Generator[None, None, None]:
    """Initialize file store for snapshot tests."""
    get_default_file_store().initialize()
    yield


class TestTerminate:
    """Tests for SandboxManager.terminate()."""

    def test_terminate_cleans_up_resources(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        sandbox_record: Sandbox,
        temp_sandbox_dir: Path,  # noqa: ARG002
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that terminate cleans up sandbox resources.

        Note: Status update is now handled by the caller (SessionManager/tasks),
        not by the SandboxManager itself.
        """
        sandbox_manager.terminate(sandbox_record.id)
        # No exception means success - resources cleaned up


class TestCreateSnapshot:
    """Tests for SandboxManager.create_snapshot()."""

    def test_create_snapshot_archives_outputs(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
        file_store_initialized: None,  # noqa: ARG002
    ) -> None:
        """Test that create_snapshot archives the session's outputs directory.

        Note: Caller is responsible for creating DB record from the SnapshotResult.
        """
        sandbox, session_id = session_workspace
        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)
        outputs_dir = sandbox_path / "sessions" / str(session_id) / "outputs"
        (outputs_dir / "app.py").write_text("print('hello')")

        result = sandbox_manager.create_snapshot(sandbox.id, session_id, TEST_TENANT_ID)

        assert isinstance(result, SnapshotResult)
        assert result.size_bytes > 0
        assert result.storage_path is not None


class TestHealthCheck:
    """Tests for SandboxManager.health_check()."""

    def test_health_check_returns_false_when_no_processes(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        sandbox_record: Sandbox,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that health_check returns False when no processes are running.

        Note: nextjs_port is now passed by the caller instead of being fetched from DB.
        """
        result = sandbox_manager.health_check(sandbox_record.id)

        assert result is False


class TestListDirectory:
    """Tests for SandboxManager.list_directory()."""

    def test_list_directory_returns_entries(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that list_directory returns filesystem entries."""
        sandbox, session_id = session_workspace
        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)
        outputs_dir = sandbox_path / "sessions" / str(session_id)
        (outputs_dir / "file.txt").write_text("content")
        (outputs_dir / "subdir").mkdir()

        result = sandbox_manager.list_directory(sandbox.id, session_id, "/")
        print(result)

        # .agent, .venv, AGENTS.md, opencode.json, files, outputs, attachments + 2 created files
        assert len(result) == 9
        assert all(isinstance(e, FilesystemEntry) for e in result)


class TestReadFile:
    """Tests for SandboxManager.read_file()."""

    def test_read_file_returns_contents(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that read_file returns file contents as bytes."""
        sandbox, session_id = session_workspace
        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)
        outputs_dir = sandbox_path / "sessions" / str(session_id) / "outputs"
        (outputs_dir / "test.txt").write_bytes(b"Hello, World!")

        result = sandbox_manager.read_file(sandbox.id, session_id, "test.txt")

        assert result == b"Hello, World!"


class TestSendMessage:
    """Tests for SandboxManager.send_message()."""

    def test_send_message_streams_events(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that send_message streams ACPEvent objects and ends with PromptResponse.

        Note: Heartbeat update is now handled by the caller (SessionManager),
        not by the SandboxManager itself.
        """
        sandbox, session_id = session_workspace

        events: list[ACPEvent] = []
        for event in sandbox_manager.send_message(
            sandbox.id, session_id, "What is 2 + 2?"
        ):
            events.append(event)

        # Should have received at least one event
        assert len(events) > 0

        # Last event should be PromptResponse (success) or contain results
        last_event = events[-1]
        assert isinstance(last_event, PromptResponse)

    def test_send_message_write_file(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that send_message can write files and emits edit tool calls."""
        sandbox, session_id = session_workspace
        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)
        session_path = sandbox_path / "sessions" / str(session_id)

        events: list[ACPEvent] = []
        for event in sandbox_manager.send_message(
            sandbox.id,
            session_id,
            "Create a file called hello.txt with the content 'Hello, World!'",
        ):
            events.append(event)

        # Should have at least one ToolCallStart with kind='edit'
        tool_calls = [e for e in events if isinstance(e, ToolCallStart)]
        edit_tool_calls = [tc for tc in tool_calls if tc.kind == "edit"]
        assert len(edit_tool_calls) >= 1, (
            f"Expected at least one edit tool call, got {len(edit_tool_calls)}. "
            f"Tool calls: {[(tc.title, tc.kind) for tc in tool_calls]}"
        )

        # Last event should be PromptResponse
        last_event = events[-1]
        assert isinstance(last_event, PromptResponse)

        # Verify the file was actually created (agent writes relative to session root)
        created_file = session_path / "hello.txt"
        assert created_file.exists(), f"Expected file {created_file} to be created"
        assert "Hello" in created_file.read_text()

    def test_send_message_read_file(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that send_message can read files and emits read tool calls."""
        sandbox, session_id = session_workspace
        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)
        session_path = sandbox_path / "sessions" / str(session_id)

        # Create a file for the agent to read (at session root, where agent has access)
        test_file = session_path / "secret.txt"
        test_file.write_text("The secret code is 12345")

        events: list[ACPEvent] = []
        for event in sandbox_manager.send_message(
            sandbox.id,
            session_id,
            "Read the file secret.txt and tell me what the secret code is",
        ):
            events.append(event)

        # Should have at least one ToolCallStart with kind='read'
        tool_calls = [e for e in events if isinstance(e, ToolCallStart)]
        read_tool_calls = [tc for tc in tool_calls if tc.kind == "read"]
        assert len(read_tool_calls) >= 1, (
            f"Expected at least one read tool call, got {len(read_tool_calls)}. "
            f"Tool calls: {[(tc.title, tc.kind) for tc in tool_calls]}"
        )

        # Last event should be PromptResponse
        last_event = events[-1]
        assert isinstance(last_event, PromptResponse)


class TestUploadFile:
    """Tests for SandboxManager.upload_file()."""

    def test_upload_file_creates_file(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that upload_file creates a file in the attachments directory."""
        sandbox, session_id = session_workspace
        content = b"Hello, World!"

        result = sandbox_manager.upload_file(
            sandbox.id, session_id, "test.txt", content
        )

        assert result == "attachments/test.txt"

        # Verify file exists
        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)
        file_path = (
            sandbox_path / "sessions" / str(session_id) / "attachments" / "test.txt"
        )
        assert file_path.exists()
        assert file_path.read_bytes() == content

    def test_upload_file_handles_collision(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that upload_file renames files on collision."""
        sandbox, session_id = session_workspace

        # Upload first file
        sandbox_manager.upload_file(sandbox.id, session_id, "test.txt", b"first")

        # Upload second file with same name
        result = sandbox_manager.upload_file(
            sandbox.id, session_id, "test.txt", b"second"
        )

        assert result == "attachments/test_1.txt"


class TestDeleteFile:
    """Tests for SandboxManager.delete_file()."""

    def test_delete_file_removes_file(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that delete_file removes a file."""
        sandbox, session_id = session_workspace

        # Upload a file first
        sandbox_manager.upload_file(sandbox.id, session_id, "test.txt", b"content")

        # Delete it
        result = sandbox_manager.delete_file(
            sandbox.id, session_id, "attachments/test.txt"
        )

        assert result is True

        # Verify file is gone
        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)
        file_path = (
            sandbox_path / "sessions" / str(session_id) / "attachments" / "test.txt"
        )
        assert not file_path.exists()

    def test_delete_file_returns_false_for_missing(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that delete_file returns False for non-existent file."""
        sandbox, session_id = session_workspace

        result = sandbox_manager.delete_file(
            sandbox.id, session_id, "attachments/nonexistent.txt"
        )

        assert result is False

    def test_delete_file_rejects_path_traversal(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that delete_file rejects path traversal attempts."""
        sandbox, session_id = session_workspace

        with pytest.raises(ValueError, match="path traversal"):
            sandbox_manager.delete_file(sandbox.id, session_id, "../../../etc/passwd")


class TestGetUploadStats:
    """Tests for SandboxManager.get_upload_stats()."""

    def test_get_upload_stats_empty(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test get_upload_stats returns zeros for empty directory."""
        sandbox, session_id = session_workspace

        file_count, total_size = sandbox_manager.get_upload_stats(
            sandbox.id, session_id
        )

        assert file_count == 0
        assert total_size == 0

    def test_get_upload_stats_with_files(
        self,
        sandbox_manager: LocalSandboxManager,
        db_session: Session,  # noqa: ARG002
        session_workspace: tuple[Sandbox, UUID],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test get_upload_stats returns correct count and size."""
        sandbox, session_id = session_workspace

        # Upload some files
        sandbox_manager.upload_file(
            sandbox.id, session_id, "file1.txt", b"hello"
        )  # 5 bytes
        sandbox_manager.upload_file(
            sandbox.id, session_id, "file2.txt", b"world!"
        )  # 6 bytes

        file_count, total_size = sandbox_manager.get_upload_stats(
            sandbox.id, session_id
        )

        assert file_count == 2
        assert total_size == 11  # 5 + 6


================================================
FILE: backend/onyx/server/features/build/sandbox/manager/__init__.py
================================================
"""Sandbox manager utilities.

Contains:
- DirectoryManager: Sandbox directory structure management
- SnapshotManager: Snapshot creation and restoration
"""

from onyx.server.features.build.sandbox.manager.directory_manager import (
    DirectoryManager,
)
from onyx.server.features.build.sandbox.manager.snapshot_manager import SnapshotManager

__all__ = [
    "DirectoryManager",
    "SnapshotManager",
]


================================================
FILE: backend/onyx/server/features/build/sandbox/manager/directory_manager.py
================================================
"""Directory management for sandbox lifecycle.

Supports user-shared sandbox model where:
- One sandbox per user with shared files/ directory
- Per-session workspaces under sessions/$session_id/
"""

import json
import shutil
from pathlib import Path

from onyx.server.features.build.sandbox.util.agent_instructions import (
    generate_agent_instructions,
)
from onyx.server.features.build.sandbox.util.opencode_config import (
    build_opencode_config,
)
from onyx.server.features.build.sandbox.util.persona_mapping import (
    generate_user_identity_content,
)
from onyx.server.features.build.sandbox.util.persona_mapping import get_persona_info
from onyx.server.features.build.sandbox.util.persona_mapping import ORG_INFO_AGENTS_MD
from onyx.server.features.build.sandbox.util.persona_mapping import (
    ORGANIZATION_STRUCTURE,
)
from onyx.utils.logger import setup_logger

logger = setup_logger()


class DirectoryManager:
    """Manages sandbox directory creation and cleanup.

    Responsible for:
    - Creating sandbox directory structure (user-level)
    - Creating session workspace directories (session-level)
    - Setting up symlinks to knowledge files
    - Copying templates (outputs, venv, skills, AGENTS.md)
    - Cleaning up sandbox/session directories on termination

    Directory Structure:
        $base_path/$sandbox_id/
        ├── files/                     # Symlink to knowledge/source files (SHARED)
        └── sessions/
            ├── $session_id_1/         # Per-session workspace
            │   ├── outputs/           # Agent output (from template or snapshot)
            │   │   └── web/           # Next.js app
            │   ├── .venv/             # Python virtual environment
            │   ├── .agent/skills/     # Opencode skills
            │   ├── files/             # Symlink to sandbox-level files/ (SHARED)
            │   ├── AGENTS.md          # Agent instructions
            │   ├── opencode.json      # LLM config
            │   └── attachments/
            └── $session_id_2/
                └── ...
    """

    def __init__(
        self,
        base_path: Path,
        outputs_template_path: Path,
        venv_template_path: Path,
        skills_path: Path,
        agent_instructions_template_path: Path,
    ) -> None:
        """Initialize DirectoryManager with template paths.

        Args:
            base_path: Root directory for all sandboxes
            outputs_template_path: Path to outputs template directory
            venv_template_path: Path to Python virtual environment template
            skills_path: Path to agent skills directory
            agent_instructions_template_path: Path to AGENTS.md template file
        """
        self._base_path = base_path
        self._outputs_template_path = outputs_template_path
        self._venv_template_path = venv_template_path
        self._skills_path = skills_path
        self._agent_instructions_template_path = agent_instructions_template_path

    def create_sandbox_directory(self, sandbox_id: str) -> Path:
        """Create sandbox directory structure (user-level).

        Creates the base directory for a user's sandbox:
        {base_path}/{sandbox_id}/
        ├── files/                      # Symlink to knowledge/source files (set up separately)
        └── sessions/                   # Container for per-session workspaces

        NOTE: This only creates the sandbox-level structure.
        Call create_session_directory() to create per-session workspaces.

        Args:
            sandbox_id: Unique identifier for the sandbox

        Returns:
            Path to the created sandbox directory
        """
        sandbox_path = self._base_path / sandbox_id
        sandbox_path.mkdir(parents=True, exist_ok=True)
        # Create sessions directory for per-session workspaces
        (sandbox_path / "sessions").mkdir(exist_ok=True)
        return sandbox_path

    def create_session_directory(self, sandbox_path: Path, session_id: str) -> Path:
        """Create session workspace directory structure.

        Creates a per-session workspace within the sandbox:
        {sandbox_path}/sessions/{session_id}/
        ├── outputs/                    # Working directory from template
        │   ├── web/                    # Next.js app
        │   ├── slides/
        │   ├── markdown/
        │   └── graphs/
        ├── .venv/                      # Python virtual environment
        ├── AGENTS.md                   # Agent instructions
        ├── opencode.json               # LLM config (set up separately)
        ├── attachments/                # User-uploaded files
        └── .opencode/
            └── skills/                 # Agent skills

        NOTE: This creates the directory structure but doesn't copy templates.
        Call setup_outputs_directory(), setup_venv(), etc. to set up contents.

        Args:
            sandbox_path: Path to the sandbox directory
            session_id: Unique identifier for the session

        Returns:
            Path to the created session workspace directory
        """
        session_path = sandbox_path / "sessions" / session_id
        session_path.mkdir(parents=True, exist_ok=True)
        return session_path

    def cleanup_session_directory(self, sandbox_path: Path, session_id: str) -> None:
        """Remove session workspace directory and all contents.

        Args:
            sandbox_path: Path to the sandbox directory
            session_id: Session ID to clean up
        """
        session_path = sandbox_path / "sessions" / session_id
        if session_path.exists():
            shutil.rmtree(session_path)
            logger.info(f"Cleaned up session directory: {session_path}")

    def get_session_path(self, sandbox_path: Path, session_id: str) -> Path:
        """Get path to session workspace.

        Args:
            sandbox_path: Path to the sandbox directory
            session_id: Session ID

        Returns:
            Path to sessions/$session_id/
        """
        return sandbox_path / "sessions" / session_id

    def setup_files_symlink(
        self,
        sandbox_path: Path,
        file_system_path: Path,
    ) -> None:
        """Create symlink to knowledge/source files.

        Args:
            sandbox_path: Path to the sandbox directory
            file_system_path: Path to the source files to link
        """
        files_link = sandbox_path / "files"
        if not files_link.exists():
            files_link.symlink_to(file_system_path, target_is_directory=True)

    def setup_org_info(
        self,
        session_path: Path,
        user_work_area: str | None,
        user_level: str | None,
    ) -> None:
        """Create org_info directory with organizational context files.

        Creates an org_info/ directory at the session root level with:
        - AGENTS.md: Description of available org info files
        - user_identity_profile.txt: User's persona information
        - organization_structure.json: Org hierarchy with managers and reports

        Uses shared constants from persona_mapping module as single source of truth.

        Args:
            session_path: Path to the session directory
            user_work_area: User's work area (e.g., "engineering", "product")
            user_level: User's level (e.g., "ic", "manager")
        """
        # Get persona info from mapping
        persona = get_persona_info(user_work_area, user_level)
        if not persona:
            logger.debug(
                f"No persona found for work_area={user_work_area}, level={user_level}, skipping org_info setup"
            )
            return

        # Create org_info directory at session root
        org_info_dir = session_path / "org_info"
        org_info_dir.mkdir(parents=True, exist_ok=True)

        try:
            # 1. AGENTS.md - Description of org info contents
            (org_info_dir / "AGENTS.md").write_text(ORG_INFO_AGENTS_MD)

            # 2. user_identity_profile.txt - User's persona
            (org_info_dir / "user_identity_profile.txt").write_text(
                generate_user_identity_content(persona)
            )

            # 3. organization_structure.json - Org hierarchy
            (org_info_dir / "organization_structure.json").write_text(
                json.dumps(ORGANIZATION_STRUCTURE, indent=2)
            )

            logger.info(
                f"Created org_info with identity: {persona['name']} <{persona['email']}>"
            )
        except Exception as e:
            # Don't fail provisioning if org_info setup fails
            logger.warning(f"Failed to setup org_info: {e}")

    def setup_outputs_directory(self, sandbox_path: Path) -> None:
        """Copy outputs template and create additional directories.

        Copies the Next.js template and creates additional output
        directories for generated content (slides, markdown, graphs).

        Args:
            sandbox_path: Path to the sandbox directory
        """
        output_dir = sandbox_path / "outputs"
        if not output_dir.exists():
            if self._outputs_template_path.exists():
                shutil.copytree(self._outputs_template_path, output_dir, symlinks=True)
            else:
                raise RuntimeError(
                    f"Outputs template path does not exist: {self._outputs_template_path}"
                )

        # Create additional output directories for generated content
        (output_dir / "markdown").mkdir(parents=True, exist_ok=True)
        # TODO: no images for now
        # (output_dir / "slides").mkdir(parents=True, exist_ok=True)
        # TODO: No graphs for now
        # (output_dir / "graphs").mkdir(parents=True, exist_ok=True)

    def setup_venv(self, sandbox_path: Path) -> Path:
        """Copy virtual environment template.

        Args:
            sandbox_path: Path to the sandbox directory

        Returns:
            Path to the virtual environment directory
        """
        venv_path = sandbox_path / ".venv"
        if not venv_path.exists() and self._venv_template_path.exists():
            shutil.copytree(self._venv_template_path, venv_path, symlinks=True)
        return venv_path

    def setup_agent_instructions(
        self,
        sandbox_path: Path,
        provider: str | None = None,
        model_name: str | None = None,
        nextjs_port: int | None = None,
        disabled_tools: list[str] | None = None,
        user_name: str | None = None,
        user_role: str | None = None,
        use_demo_data: bool = False,
        include_org_info: bool = False,
    ) -> None:
        """Generate AGENTS.md with dynamic configuration.

        Reads the template file and replaces placeholders with actual values
        including user personalization, LLM configuration, runtime settings,
        and dynamically discovered knowledge sources.

        Args:
            sandbox_path: Path to the sandbox directory
            provider: LLM provider type (e.g., "openai", "anthropic")
            model_name: Model name (e.g., "claude-sonnet-4-5", "gpt-4o")
            nextjs_port: Port for Next.js development server
            disabled_tools: List of disabled tools
            user_name: User's name for personalization
            user_role: User's role/title for personalization
            use_demo_data: If True, exclude user context from AGENTS.md
            include_org_info: Whether to include the org_info section (demo data mode)
        """
        agent_md_path = sandbox_path / "AGENTS.md"
        if agent_md_path.exists():
            return

        # Get the files path (symlink to knowledge sources)
        files_path = sandbox_path / "files"

        # Use shared utility to generate content
        content = generate_agent_instructions(
            template_path=self._agent_instructions_template_path,
            skills_path=self._skills_path,
            files_path=files_path if files_path.exists() else None,
            provider=provider,
            model_name=model_name,
            nextjs_port=nextjs_port,
            disabled_tools=disabled_tools,
            user_name=user_name,
            user_role=user_role,
            use_demo_data=use_demo_data,
            include_org_info=include_org_info,
        )

        # Write the generated content
        agent_md_path.write_text(content)
        logger.debug(f"Generated AGENTS.md at {agent_md_path}")

    def setup_skills(self, sandbox_path: Path, overwrite: bool = True) -> None:
        """Copy skills directory to .opencode/skills.

        Copies all skills from the source skills directory to the sandbox's
        .opencode/skills directory. If the destination already exists, it will
        be removed and recreated to ensure skills are up-to-date.

        Args:
            sandbox_path: Path to the sandbox directory
            overwrite: If True, overwrite existing skills. If False, preserve existing skills.
        """
        skills_dest = sandbox_path / ".opencode" / "skills"

        if not self._skills_path.exists():
            logger.warning(
                f"Skills path {self._skills_path} does not exist, skipping skills setup"
            )
            return

        if not overwrite and skills_dest.exists():
            logger.debug(
                f"Skills directory already exists at {skills_dest}, skipping skills setup"
            )
            return

        try:
            # Remove existing skills directory if it exists to ensure fresh copy
            if skills_dest.exists():
                shutil.rmtree(skills_dest)

            # Create parent directory and copy skills
            skills_dest.parent.mkdir(parents=True, exist_ok=True)
            shutil.copytree(self._skills_path, skills_dest)

            # Verify the copy succeeded
            if not skills_dest.exists():
                logger.error(
                    f"Skills copy failed: destination {skills_dest} does not exist after copy"
                )
        except Exception as e:
            logger.error(
                f"Failed to copy skills from {self._skills_path} to {skills_dest}: {e}",
                exc_info=True,
            )
            raise

    def setup_opencode_config(
        self,
        sandbox_path: Path,
        provider: str,
        model_name: str,
        api_key: str | None = None,
        api_base: str | None = None,
        disabled_tools: list[str] | None = None,
        overwrite: bool = True,
        dev_mode: bool = False,
    ) -> None:
        """Create opencode.json configuration file for the agent.

        Configures the opencode CLI agent with the LLM provider settings
        from Onyx's configured LLM provider.

        Args:
            sandbox_path: Path to the sandbox directory
            provider: LLM provider type (e.g., "openai", "anthropic")
            model_name: Model name (e.g., "claude-sonnet-4-5", "gpt-4o")
            api_key: Optional API key for the provider
            api_base: Optional custom API base URL
            disabled_tools: Optional list of tools to disable (e.g., ["question", "webfetch"])
            overwrite: If True, overwrite existing config. If False, preserve existing config.
            dev_mode: If True, allow all external directories (local dev).
                      If False (default), only whitelist /workspace/files and /workspace/demo_data.
        """
        config_path = sandbox_path / "opencode.json"
        if not overwrite and config_path.exists():
            logger.debug(
                f"opencode.json already exists at {config_path}, skipping config setup"
            )
            return

        # Use shared config builder
        config = build_opencode_config(
            provider=provider,
            model_name=model_name,
            api_key=api_key,
            api_base=api_base,
            disabled_tools=disabled_tools,
            dev_mode=dev_mode,
        )

        config_json = json.dumps(config, indent=2)
        config_path.write_text(config_json)

    def cleanup_sandbox_directory(self, sandbox_path: Path) -> None:
        """Remove sandbox directory and all contents.

        Args:
            sandbox_path: Path to the sandbox directory to remove
        """
        if sandbox_path.exists():
            shutil.rmtree(sandbox_path)

    def get_outputs_path(
        self, sandbox_path: Path, session_id: str | None = None
    ) -> Path:
        """Return path to outputs directory.

        Args:
            sandbox_path: Path to the sandbox directory
            session_id: Optional session ID for session-specific outputs

        Returns:
            Path to the outputs directory
        """
        if session_id:
            return sandbox_path / "sessions" / session_id / "outputs"
        return sandbox_path / "outputs"

    def get_web_path(self, sandbox_path: Path, session_id: str) -> Path:
        """Return path to Next.js web directory.

        Args:
            sandbox_path: Path to the sandbox directory
            session_id: Optional session ID for session-specific web directory

        Returns:
            Path to the web directory
        """
        if session_id:
            return sandbox_path / "sessions" / session_id / "outputs" / "web"
        return sandbox_path / "outputs" / "web"

    def get_venv_path(self, sandbox_path: Path, session_id: str | None = None) -> Path:
        """Return path to virtual environment.

        Args:
            sandbox_path: Path to the sandbox directory
            session_id: Optional session ID for session-specific venv

        Returns:
            Path to the .venv directory
        """
        if session_id:
            return sandbox_path / "sessions" / session_id / ".venv"
        return sandbox_path / ".venv"

    def directory_exists(self, sandbox_path: Path) -> bool:
        """Check if sandbox directory exists.

        Args:
            sandbox_path: Path to check

        Returns:
            True if directory exists and is a directory
        """
        return sandbox_path.exists() and sandbox_path.is_dir()

    def session_exists(self, sandbox_path: Path, session_id: str) -> bool:
        """Check if session workspace exists.

        Args:
            sandbox_path: Path to sandbox directory
            session_id: Session ID to check

        Returns:
            True if session directory exists
        """
        session_path = sandbox_path / "sessions" / session_id
        return session_path.exists() and session_path.is_dir()

    def setup_attachments_directory(
        self, sandbox_path: Path, session_id: str | None = None
    ) -> Path:
        """Create attachments directory for user-uploaded files.

        This directory is used to store files uploaded by the user
        through the chat interface.

        Args:
            sandbox_path: Path to the sandbox directory
            session_id: Optional session ID for session-specific uploads

        Returns:
            Path to the attachments directory
        """
        if session_id:
            attachments_path = sandbox_path / "sessions" / session_id / "attachments"
        else:
            attachments_path = sandbox_path / "attachments"
        attachments_path.mkdir(parents=True, exist_ok=True)
        return attachments_path

    def get_attachments_path(
        self, sandbox_path: Path, session_id: str | None = None
    ) -> Path:
        """Return path to attachments directory.

        Args:
            sandbox_path: Path to the sandbox directory
            session_id: Optional session ID for session-specific uploads

        Returns:
            Path to the attachments directory
        """
        if session_id:
            return sandbox_path / "sessions" / session_id / "attachments"
        return sandbox_path / "attachments"


================================================
FILE: backend/onyx/server/features/build/sandbox/manager/snapshot_manager.py
================================================
"""Snapshot management for sandbox state persistence."""

import tarfile
import tempfile
from pathlib import Path
from uuid import uuid4

from onyx.configs.constants import FileOrigin
from onyx.file_store.file_store import FileStore
from onyx.utils.logger import setup_logger

logger = setup_logger()

# File type for snapshot archives
SNAPSHOT_FILE_TYPE = "application/gzip"


class SnapshotManager:
    """Manages sandbox snapshot creation and restoration.

    Snapshots are tar.gz archives of the sandbox's outputs directory,
    stored using the file store abstraction (S3-compatible storage).

    Responsible for:
    - Creating snapshots of outputs directories
    - Restoring snapshots to target directories
    - Deleting snapshots from storage
    """

    def __init__(self, file_store: FileStore) -> None:
        """Initialize SnapshotManager with a file store.

        Args:
            file_store: The file store to use for snapshot storage
        """
        self._file_store = file_store

    def create_snapshot(
        self,
        sandbox_path: Path,
        sandbox_id: str,
        tenant_id: str,
    ) -> tuple[str, str, int]:
        """Create a snapshot of the outputs directory.

        Creates a tar.gz archive of the sandbox's outputs directory
        and uploads it to the file store.

        Args:
            sandbox_path: Path to the sandbox directory
            sandbox_id: Sandbox identifier
            tenant_id: Tenant identifier for multi-tenant isolation

        Returns:
            Tuple of (snapshot_id, storage_path, size_bytes)

        Raises:
            FileNotFoundError: If outputs directory doesn't exist
            RuntimeError: If snapshot creation fails
        """
        snapshot_id = str(uuid4())
        outputs_path = sandbox_path / "outputs"

        if not outputs_path.exists():
            raise FileNotFoundError(f"Outputs directory not found: {outputs_path}")

        # Create tar.gz in temp location
        tmp_path: str | None = None
        try:
            with tempfile.NamedTemporaryFile(
                suffix=".tar.gz", delete=False
            ) as tmp_file:
                tmp_path = tmp_file.name

            # Create the tar archive
            with tarfile.open(tmp_path, "w:gz") as tar:
                tar.add(outputs_path, arcname="outputs")

            # Get size
            size_bytes = Path(tmp_path).stat().st_size

            # Generate storage path for file store
            # Format: sandbox-snapshots/{tenant_id}/{sandbox_id}/{snapshot_id}.tar.gz
            storage_path = (
                f"sandbox-snapshots/{tenant_id}/{sandbox_id}/{snapshot_id}.tar.gz"
            )
            display_name = f"sandbox-snapshot-{sandbox_id}-{snapshot_id}.tar.gz"

            # Upload to file store
            with open(tmp_path, "rb") as f:
                self._file_store.save_file(
                    content=f,
                    display_name=display_name,
                    file_origin=FileOrigin.SANDBOX_SNAPSHOT,
                    file_type=SNAPSHOT_FILE_TYPE,
                    file_id=storage_path,
                    file_metadata={
                        "sandbox_id": sandbox_id,
                        "tenant_id": tenant_id,
                        "snapshot_id": snapshot_id,
                    },
                )

            logger.info(
                f"Created snapshot {snapshot_id} for sandbox {sandbox_id}, size: {size_bytes} bytes"
            )

            return snapshot_id, storage_path, size_bytes

        except Exception as e:
            logger.error(f"Failed to create snapshot for sandbox {sandbox_id}: {e}")
            raise RuntimeError(f"Failed to create snapshot: {e}") from e
        finally:
            # Cleanup temp file
            if tmp_path:
                try:
                    Path(tmp_path).unlink(missing_ok=True)
                except Exception as cleanup_error:
                    logger.warning(
                        f"Failed to cleanup temp file {tmp_path}: {cleanup_error}"
                    )

    def restore_snapshot(
        self,
        storage_path: str,
        target_path: Path,
    ) -> None:
        """Restore a snapshot to target directory.

        Downloads the snapshot from file store and extracts the outputs/
        directory to the target path.

        Args:
            storage_path: The file store path of the snapshot
            target_path: Directory to extract the snapshot into

        Raises:
            FileNotFoundError: If snapshot doesn't exist in file store
            RuntimeError: If restoration fails
        """
        tmp_path: str | None = None
        file_io = None
        try:
            # Download from file store
            file_io = self._file_store.read_file(storage_path, use_tempfile=True)

            # Write to temp file for tarfile extraction
            with tempfile.NamedTemporaryFile(
                suffix=".tar.gz", delete=False
            ) as tmp_file:
                tmp_path = tmp_file.name
                # Read from the IO object and write to temp file
                content = file_io.read()
                tmp_file.write(content)

            # Ensure target path exists
            target_path.mkdir(parents=True, exist_ok=True)

            # Extract with security filter
            with tarfile.open(tmp_path, "r:gz") as tar:
                # Use data filter for safe extraction (prevents path traversal)
                # Available in Python 3.11.4+
                try:
                    tar.extractall(target_path, filter="data")
                except TypeError:
                    # Fallback for older Python versions without filter support
                    # Manually validate paths for security
                    for member in tar.getmembers():
                        # Check for path traversal attempts
                        member_path = Path(target_path) / member.name
                        try:
                            member_path.resolve().relative_to(target_path.resolve())
                        except ValueError:
                            raise RuntimeError(
                                f"Path traversal attempt detected: {member.name}"
                            )
                    tar.extractall(target_path)

            logger.info(f"Restored snapshot from {storage_path} to {target_path}")

        except Exception as e:
            logger.error(f"Failed to restore snapshot {storage_path}: {e}")
            raise RuntimeError(f"Failed to restore snapshot: {e}") from e
        finally:
            # Cleanup temp file
            if tmp_path:
                try:
                    Path(tmp_path).unlink(missing_ok=True)
                except Exception as cleanup_error:
                    logger.warning(
                        f"Failed to cleanup temp file {tmp_path}: {cleanup_error}"
                    )
            # Close the file IO if it's still open
            try:
                if file_io:
                    file_io.close()
            except Exception:
                pass

    def delete_snapshot(self, storage_path: str) -> None:
        """Delete snapshot from file store.

        Args:
            storage_path: The file store path of the snapshot to delete

        Raises:
            RuntimeError: If deletion fails (other than file not found)
        """
        try:
            self._file_store.delete_file(storage_path)
            logger.info(f"Deleted snapshot: {storage_path}")
        except Exception as e:
            # Log but don't fail if snapshot doesn't exist
            logger.warning(f"Failed to delete snapshot {storage_path}: {e}")
            raise RuntimeError(f"Failed to delete snapshot: {e}") from e

    def get_snapshot_size(self, storage_path: str) -> int | None:
        """Get the size of a snapshot in bytes.

        Args:
            storage_path: The file store path of the snapshot

        Returns:
            Size in bytes, or None if not available
        """
        return self._file_store.get_file_size(storage_path)


================================================
FILE: backend/onyx/server/features/build/sandbox/manager/test_directory_manager.py
================================================
"""Tests for DirectoryManager.

These are unit tests that test DirectoryManager's behavior in isolation,
focusing on the setup_opencode_config method with different provider configurations.
"""

import json
import shutil
import tempfile
from collections.abc import Generator
from pathlib import Path
from typing import Any

import pytest

from onyx.server.features.build.sandbox.manager.directory_manager import (
    DirectoryManager,
)


@pytest.fixture
def temp_base_path() -> Generator[Path, None, None]:
    """Create a temporary base path for testing."""
    temp_dir = Path(tempfile.mkdtemp(prefix="test_dir_manager_"))
    yield temp_dir
    shutil.rmtree(temp_dir, ignore_errors=True)


@pytest.fixture
def temp_templates(temp_base_path: Path) -> dict[str, Path]:
    """Create temporary template directories and files."""
    templates_dir = temp_base_path / "templates"
    templates_dir.mkdir()

    outputs_template = templates_dir / "outputs"
    outputs_template.mkdir()

    venv_template = templates_dir / "venv"
    venv_template.mkdir()

    skills_path = templates_dir / "skills"
    skills_path.mkdir()

    agent_instructions = templates_dir / "AGENTS.md"
    agent_instructions.write_text("# Agent Instructions\n")

    return {
        "outputs": outputs_template,
        "venv": venv_template,
        "skills": skills_path,
        "agent_instructions": agent_instructions,
    }


@pytest.fixture
def directory_manager(
    temp_base_path: Path, temp_templates: dict[str, Path]
) -> DirectoryManager:
    """Create a DirectoryManager instance with temporary paths."""
    return DirectoryManager(
        base_path=temp_base_path,
        outputs_template_path=temp_templates["outputs"],
        venv_template_path=temp_templates["venv"],
        skills_path=temp_templates["skills"],
        agent_instructions_template_path=temp_templates["agent_instructions"],
    )


class TestSetupOpencodeConfig:
    """Tests for DirectoryManager.setup_opencode_config()."""

    def test_openai_config_with_thinking(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test that OpenAI provider includes reasoning configuration."""
        session_id = "test_openai_session"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="openai",
            model_name="gpt-4o",
            api_key="test-api-key",
        )

        config_path = sandbox_path / "opencode.json"
        assert config_path.exists()

        config = json.loads(config_path.read_text())

        # Verify basic structure
        assert config["model"] == "openai/gpt-4o"
        assert "$schema" in config
        assert "provider" in config
        assert "openai" in config["provider"]
        assert config["provider"]["openai"]["options"]["apiKey"] == "test-api-key"

        # Verify OpenAI reasoning configuration in model config
        assert "models" in config["provider"]["openai"]
        assert "gpt-4o" in config["provider"]["openai"]["models"]
        model_options = config["provider"]["openai"]["models"]["gpt-4o"]["options"]
        assert model_options["reasoningEffort"] == "high"

    def test_anthropic_config_with_thinking(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test that Anthropic provider includes thinking configuration."""
        session_id = "test_anthropic_session"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="anthropic",
            model_name="claude-sonnet-4-5",
            api_key="test-api-key",
        )

        config_path = sandbox_path / "opencode.json"
        assert config_path.exists()

        config = json.loads(config_path.read_text())

        # Verify basic structure
        assert config["model"] == "anthropic/claude-sonnet-4-5"
        assert "$schema" in config
        assert "provider" in config
        assert "anthropic" in config["provider"]
        assert config["provider"]["anthropic"]["options"]["apiKey"] == "test-api-key"

        # Verify Anthropic thinking configuration in model config
        assert "models" in config["provider"]["anthropic"]
        assert "claude-sonnet-4-5" in config["provider"]["anthropic"]["models"]
        model_options = config["provider"]["anthropic"]["models"]["claude-sonnet-4-5"][
            "options"
        ]
        assert "thinking" in model_options
        assert model_options["thinking"]["type"] == "enabled"
        assert model_options["thinking"]["budgetTokens"] == 16000

    def test_google_config_with_thinking(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test that Google provider includes thinking configuration."""
        session_id = "test_google_session"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="google",
            model_name="gemini-3-pro",
            api_key="test-api-key",
        )

        config_path = sandbox_path / "opencode.json"
        assert config_path.exists()

        config = json.loads(config_path.read_text())

        # Verify basic structure
        assert config["model"] == "google/gemini-3-pro"
        assert "$schema" in config
        assert "provider" in config
        assert "google" in config["provider"]
        assert config["provider"]["google"]["options"]["apiKey"] == "test-api-key"

        # Verify Google thinking configuration in model config
        assert "models" in config["provider"]["google"]
        assert "gemini-3-pro" in config["provider"]["google"]["models"]
        model_options = config["provider"]["google"]["models"]["gemini-3-pro"][
            "options"
        ]
        assert model_options["thinking_budget"] == 16000
        assert model_options["thinking_level"] == "high"

    def test_bedrock_config_with_thinking(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test that Bedrock provider includes thinking configuration."""
        session_id = "test_bedrock_session"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="bedrock",
            model_name="anthropic.claude-v3-5-sonnet-20250219-v1:0",
            api_key="test-api-key",
        )

        config_path = sandbox_path / "opencode.json"
        assert config_path.exists()

        config = json.loads(config_path.read_text())

        # Verify basic structure
        assert config["model"] == "bedrock/anthropic.claude-v3-5-sonnet-20250219-v1:0"
        assert "$schema" in config
        assert "provider" in config
        assert "bedrock" in config["provider"]
        assert config["provider"]["bedrock"]["options"]["apiKey"] == "test-api-key"

        # Verify Bedrock thinking configuration in model config (same as Anthropic)
        assert "models" in config["provider"]["bedrock"]
        model_name = "anthropic.claude-v3-5-sonnet-20250219-v1:0"
        assert model_name in config["provider"]["bedrock"]["models"]
        model_options = config["provider"]["bedrock"]["models"][model_name]["options"]
        assert "thinking" in model_options
        assert model_options["thinking"]["type"] == "enabled"
        assert model_options["thinking"]["budgetTokens"] == 16000

    def test_azure_config_with_thinking(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test that Azure provider includes thinking configuration."""
        session_id = "test_azure_session"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="azure",
            model_name="gpt-4o",
            api_key="test-api-key",
        )

        config_path = sandbox_path / "opencode.json"
        assert config_path.exists()

        config = json.loads(config_path.read_text())

        # Verify basic structure
        assert config["model"] == "azure/gpt-4o"
        assert "$schema" in config
        assert "provider" in config
        assert "azure" in config["provider"]
        assert config["provider"]["azure"]["options"]["apiKey"] == "test-api-key"

        # Verify Azure reasoning configuration in model config (same as OpenAI)
        assert "models" in config["provider"]["azure"]
        assert "gpt-4o" in config["provider"]["azure"]["models"]
        model_options = config["provider"]["azure"]["models"]["gpt-4o"]["options"]
        assert model_options["reasoningEffort"] == "high"

    def test_openai_config_with_api_base(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test OpenAI config with custom API base URL."""
        session_id = "test_openai_api_base"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="openai",
            model_name="gpt-4o",
            api_key="test-api-key",
            api_base="https://custom.api.endpoint",
        )

        config_path = sandbox_path / "opencode.json"
        config = json.loads(config_path.read_text())

        # Verify API base is included
        assert config["provider"]["openai"]["api"] == "https://custom.api.endpoint"

        # Verify thinking config is still present in model options
        assert "models" in config["provider"]["openai"]
        model_options = config["provider"]["openai"]["models"]["gpt-4o"]["options"]
        assert model_options["reasoningEffort"] == "high"

    def test_anthropic_config_with_api_base(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test Anthropic config with custom API base URL."""
        session_id = "test_anthropic_api_base"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="anthropic",
            model_name="claude-sonnet-4-5",
            api_key="test-api-key",
            api_base="https://custom.anthropic.endpoint",
        )

        config_path = sandbox_path / "opencode.json"
        config = json.loads(config_path.read_text())

        # Verify API base is included
        assert (
            config["provider"]["anthropic"]["api"]
            == "https://custom.anthropic.endpoint"
        )

        # Verify thinking config is still present in model options
        assert "models" in config["provider"]["anthropic"]
        model_options = config["provider"]["anthropic"]["models"]["claude-sonnet-4-5"][
            "options"
        ]
        assert model_options["thinking"]["type"] == "enabled"

    def test_config_with_disabled_tools(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test config with disabled tools permissions."""
        session_id = "test_disabled_tools"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="openai",
            model_name="gpt-4o",
            api_key="test-api-key",
            disabled_tools=["question", "webfetch"],
        )

        config_path = sandbox_path / "opencode.json"
        config = json.loads(config_path.read_text())

        # Verify disabled tools
        assert "permission" in config
        assert config["permission"]["question"] == "deny"
        assert config["permission"]["webfetch"] == "deny"

        # Verify default permissions are still present
        assert config["permission"]["read"] == "allow"
        assert config["permission"]["write"] == "allow"
        assert config["permission"]["edit"] == "allow"
        assert config["permission"]["grep"] == "allow"
        assert "bash" in config["permission"]
        assert config["permission"]["bash"]["rm"] == "deny"

        # Verify thinking config is still present in model options
        assert "models" in config["provider"]["openai"]
        model_options = config["provider"]["openai"]["models"]["gpt-4o"]["options"]
        assert model_options["reasoningEffort"] == "high"

    def test_config_without_api_key(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test config without API key still includes thinking settings."""
        session_id = "test_no_api_key"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="openai",
            model_name="gpt-4o",
        )

        config_path = sandbox_path / "opencode.json"
        config = json.loads(config_path.read_text())

        # Should still have provider config structure even without API key
        assert "provider" in config
        assert "openai" in config["provider"]
        # Should not have options (API key) without API key
        assert "options" not in config["provider"]["openai"]

        # But should still have thinking config in model options
        assert "models" in config["provider"]["openai"]
        assert "gpt-4o" in config["provider"]["openai"]["models"]
        model_options = config["provider"]["openai"]["models"]["gpt-4o"]["options"]
        assert model_options["reasoningEffort"] == "high"

    def test_other_provider_no_thinking(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test that other providers (non OpenAI/Anthropic/Google/Bedrock/Azure) don't get thinking configuration."""
        session_id = "test_other_provider"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="cohere",
            model_name="command-r-plus",
            api_key="test-api-key",
        )

        config_path = sandbox_path / "opencode.json"
        config = json.loads(config_path.read_text())

        # Verify basic structure
        assert config["model"] == "cohere/command-r-plus"
        assert "$schema" in config
        assert "provider" in config
        assert "cohere" in config["provider"]

        # Should not have model config (thinking) for other providers
        assert "models" not in config["provider"]["cohere"]

    def test_config_overwritten_if_exists(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test that existing opencode.json is overwritten with new config."""
        session_id = "test_existing_config"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        # Create existing config
        existing_config = {"model": "existing/model", "custom": "value"}
        config_path = sandbox_path / "opencode.json"
        config_path.write_text(json.dumps(existing_config, indent=2))

        # Try to setup new config
        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="openai",
            model_name="gpt-4o",
            api_key="test-api-key",
        )

        # Verify config is overwritten with new config
        config = json.loads(config_path.read_text())
        assert config["model"] == "openai/gpt-4o"
        assert "custom" not in config  # Old config is replaced
        assert config["provider"]["openai"]["options"]["apiKey"] == "test-api-key"

    def test_full_config_structure_openai(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test full OpenAI config structure matches expected format."""
        session_id = "test_full_openai"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="openai",
            model_name="gpt-4o",
            api_key="test-openai-key",
            api_base="https://api.openai.com/v1",
            disabled_tools=["webfetch"],
        )

        config_path = sandbox_path / "opencode.json"
        config: dict[str, Any] = json.loads(config_path.read_text())

        # Verify key parts of structure (permission has defaults now)
        assert config["model"] == "openai/gpt-4o"
        assert config["$schema"] == "https://opencode.ai/config.json"
        assert config["provider"]["openai"]["options"]["apiKey"] == "test-openai-key"
        assert config["provider"]["openai"]["api"] == "https://api.openai.com/v1"
        assert "models" in config["provider"]["openai"]
        model_options = config["provider"]["openai"]["models"]["gpt-4o"]["options"]
        assert model_options["reasoningEffort"] == "high"
        assert config["permission"]["webfetch"] == "deny"

    def test_full_config_structure_anthropic(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test full Anthropic config structure matches expected format."""
        session_id = "test_full_anthropic"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="anthropic",
            model_name="claude-sonnet-4-5",
            api_key="test-anthropic-key",
            api_base="https://api.anthropic.com",
            disabled_tools=["question"],
        )

        config_path = sandbox_path / "opencode.json"
        config: dict[str, Any] = json.loads(config_path.read_text())

        # Verify structure (permission has defaults now, so we check for overrides)
        assert config["model"] == "anthropic/claude-sonnet-4-5"
        assert config["$schema"] == "https://opencode.ai/config.json"
        assert (
            config["provider"]["anthropic"]["options"]["apiKey"] == "test-anthropic-key"
        )
        assert config["provider"]["anthropic"]["api"] == "https://api.anthropic.com"
        assert "models" in config["provider"]["anthropic"]
        model_options = config["provider"]["anthropic"]["models"]["claude-sonnet-4-5"][
            "options"
        ]
        assert model_options["thinking"]["type"] == "enabled"
        assert model_options["thinking"]["budgetTokens"] == 16000
        assert config["permission"]["question"] == "deny"

    def test_full_config_structure_google(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test full Google config structure matches expected format."""
        session_id = "test_full_google"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="google",
            model_name="gemini-3-pro",
            api_key="test-google-key",
            api_base="https://generativelanguage.googleapis.com",
            disabled_tools=["webfetch"],
        )

        config_path = sandbox_path / "opencode.json"
        config: dict[str, Any] = json.loads(config_path.read_text())

        # Verify structure
        assert config["model"] == "google/gemini-3-pro"
        assert config["$schema"] == "https://opencode.ai/config.json"
        assert config["provider"]["google"]["options"]["apiKey"] == "test-google-key"
        assert (
            config["provider"]["google"]["api"]
            == "https://generativelanguage.googleapis.com"
        )
        assert "models" in config["provider"]["google"]
        model_options = config["provider"]["google"]["models"]["gemini-3-pro"][
            "options"
        ]
        assert model_options["thinking_budget"] == 16000
        assert model_options["thinking_level"] == "high"
        assert config["permission"]["webfetch"] == "deny"

    def test_full_config_structure_bedrock(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test full Bedrock config structure matches expected format."""
        session_id = "test_full_bedrock"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="bedrock",
            model_name="anthropic.claude-v3-5-sonnet-20250219-v1:0",
            api_key="test-bedrock-key",
            disabled_tools=["question"],
        )

        config_path = sandbox_path / "opencode.json"
        config: dict[str, Any] = json.loads(config_path.read_text())

        # Verify structure
        assert config["model"] == "bedrock/anthropic.claude-v3-5-sonnet-20250219-v1:0"
        assert config["$schema"] == "https://opencode.ai/config.json"
        assert config["provider"]["bedrock"]["options"]["apiKey"] == "test-bedrock-key"
        model_name = "anthropic.claude-v3-5-sonnet-20250219-v1:0"
        assert "models" in config["provider"]["bedrock"]
        model_options = config["provider"]["bedrock"]["models"][model_name]["options"]
        assert model_options["thinking"]["type"] == "enabled"
        assert model_options["thinking"]["budgetTokens"] == 16000
        assert config["permission"]["question"] == "deny"

    def test_full_config_structure_azure(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test full Azure config structure matches expected format."""
        session_id = "test_full_azure"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="azure",
            model_name="gpt-4o",
            api_key="test-azure-key",
            api_base="https://myresource.openai.azure.com",
            disabled_tools=["bash"],
        )

        config_path = sandbox_path / "opencode.json"
        config: dict[str, Any] = json.loads(config_path.read_text())

        # Verify structure
        assert config["model"] == "azure/gpt-4o"
        assert config["$schema"] == "https://opencode.ai/config.json"
        assert config["provider"]["azure"]["options"]["apiKey"] == "test-azure-key"
        assert (
            config["provider"]["azure"]["api"] == "https://myresource.openai.azure.com"
        )
        assert "models" in config["provider"]["azure"]
        model_options = config["provider"]["azure"]["models"]["gpt-4o"]["options"]
        assert model_options["reasoningEffort"] == "high"
        assert config["permission"]["bash"] == "deny"


class TestSandboxDirectoryStructure:
    """Tests for complete sandbox directory setup."""

    def test_create_complete_sandbox(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
    ) -> None:
        """Test creating a complete sandbox with all components including opencode.json."""
        session_id = "test_complete_sandbox"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)

        # Setup all components
        directory_manager.setup_outputs_directory(sandbox_path)
        directory_manager.setup_venv(sandbox_path)
        directory_manager.setup_agent_instructions(sandbox_path)
        directory_manager.setup_skills(sandbox_path)
        directory_manager.setup_attachments_directory(sandbox_path)
        directory_manager.setup_opencode_config(
            sandbox_path=sandbox_path,
            provider="anthropic",
            model_name="claude-sonnet-4-5",
            api_key="test-key",
        )

        # Verify all components exist
        assert (sandbox_path / "outputs").exists()
        assert (sandbox_path / ".venv").exists()
        assert (sandbox_path / "AGENTS.md").exists()
        assert (sandbox_path / ".opencode" / "skills").exists()
        assert (sandbox_path / "attachments").exists()
        assert (sandbox_path / "opencode.json").exists()

        # Verify opencode.json has thinking config
        config = json.loads((sandbox_path / "opencode.json").read_text())
        model_options = config["provider"]["anthropic"]["models"]["claude-sonnet-4-5"][
            "options"
        ]
        assert model_options["thinking"]["type"] == "enabled"

    def test_setup_skills_copies_and_overwrites(
        self,
        directory_manager: DirectoryManager,
        temp_base_path: Path,  # noqa: ARG002
        temp_templates: dict[str, Path],
    ) -> None:
        """Test that setup_skills copies skills and overwrites existing ones."""
        session_id = "test_skills_setup"
        sandbox_path = directory_manager.create_sandbox_directory(session_id)
        skills_dest = sandbox_path / ".opencode" / "skills"

        # Create a test skill in the source directory
        test_skill_dir = temp_templates["skills"] / "test-skill"
        test_skill_dir.mkdir()
        test_skill_file = test_skill_dir / "SKILL.md"
        test_skill_file.write_text("# Test Skill\nOriginal content")

        # First call - should copy skills
        directory_manager.setup_skills(sandbox_path)
        assert skills_dest.exists()
        assert (skills_dest / "test-skill" / "SKILL.md").exists()
        assert (
            skills_dest / "test-skill" / "SKILL.md"
        ).read_text() == "# Test Skill\nOriginal content"

        # Update the source skill
        test_skill_file.write_text("# Test Skill\nUpdated content")

        # Second call - should overwrite existing skills
        directory_manager.setup_skills(sandbox_path)
        assert skills_dest.exists()
        assert (skills_dest / "test-skill" / "SKILL.md").exists()
        assert (
            skills_dest / "test-skill" / "SKILL.md"
        ).read_text() == "# Test Skill\nUpdated content"


================================================
FILE: backend/onyx/server/features/build/sandbox/models.py
================================================
"""Pydantic models for sandbox module communication."""

from datetime import datetime
from uuid import UUID

from pydantic import BaseModel

from onyx.db.enums import SandboxStatus


class LLMProviderConfig(BaseModel):
    """LLM provider configuration for sandbox provisioning.

    Passed to SandboxManager.provision() to configure the LLM.
    """

    provider: str
    model_name: str
    api_key: str | None
    api_base: str | None


class SandboxInfo(BaseModel):
    """Information about a sandbox instance.

    Returned by SandboxManager.provision() and other methods.
    """

    sandbox_id: UUID
    directory_path: str
    status: SandboxStatus
    last_heartbeat: datetime | None


class SnapshotResult(BaseModel):
    """Result of creating a snapshot (without DB record).

    Returned by SandboxManager.create_snapshot().
    The caller is responsible for creating the DB record.
    """

    storage_path: str
    size_bytes: int


class SnapshotInfo(BaseModel):
    """Full information about a sandbox snapshot (including DB info).

    Used when returning snapshot information to API callers.
    """

    id: str
    sandbox_id: str
    storage_path: str
    created_at: datetime
    size_bytes: int


class FilesystemEntry(BaseModel):
    """Represents a file or directory entry in the sandbox filesystem.

    Used for directory listing operations. This is the canonical model used
    by both sandbox managers and the API layer.
    """

    name: str
    path: str
    is_directory: bool
    size: int | None = None  # File size in bytes (None for directories)
    mime_type: str | None = None  # MIME type (None for directories)


================================================
FILE: backend/onyx/server/features/build/sandbox/tasks/__init__.py
================================================


================================================
FILE: backend/onyx/server/features/build/sandbox/tasks/tasks.py
================================================
"""Celery tasks for sandbox operations (cleanup, file sync, etc.)."""

from collections.abc import Iterator
from contextlib import contextmanager
from typing import TYPE_CHECKING
from uuid import UUID

from celery import shared_task
from celery import Task
from redis.lock import Lock as RedisLock

if TYPE_CHECKING:
    from sqlalchemy.orm import Session

from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.constants import CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import SandboxStatus
from onyx.redis.redis_pool import get_redis_client
from onyx.server.features.build.configs import SANDBOX_BACKEND
from onyx.server.features.build.configs import SANDBOX_IDLE_TIMEOUT_SECONDS
from onyx.server.features.build.configs import SandboxBackend
from onyx.server.features.build.configs import USER_LIBRARY_SOURCE_DIR
from onyx.server.features.build.db.build_session import clear_nextjs_ports_for_user
from onyx.server.features.build.db.build_session import (
    mark_user_sessions_idle__no_commit,
)
from onyx.server.features.build.db.sandbox import get_sandbox_by_user_id
from onyx.server.features.build.sandbox.base import get_sandbox_manager
from onyx.server.features.build.sandbox.kubernetes.kubernetes_sandbox_manager import (
    KubernetesSandboxManager,
)


# Snapshot retention period in days
SNAPSHOT_RETENTION_DAYS = 30

# 100 minutes - snapshotting can take time
TIMEOUT_SECONDS = 6000


@shared_task(
    name=OnyxCeleryTask.CLEANUP_IDLE_SANDBOXES,
    soft_time_limit=TIMEOUT_SECONDS,
    bind=True,
    ignore_result=True,
)
def cleanup_idle_sandboxes_task(self: Task, *, tenant_id: str) -> None:  # noqa: ARG001
    """Put idle sandboxes to sleep after snapshotting all sessions.

    This task:
    1. Finds sandboxes that have been idle longer than SANDBOX_IDLE_TIMEOUT_SECONDS
    2. Lists all session directories in the pod's /workspace/sessions/
    3. Creates a snapshot of each session's outputs to S3
    4. Terminates the pod (but keeps the sandbox record)
    5. Marks the sandbox as SLEEPING (can be restored later)

    NOTE: This task is a no-op for local backend - sandboxes persist until
    manually terminated or server restart.

    Args:
        tenant_id: The tenant ID for multi-tenant isolation
    """
    # Skip cleanup for local backend - sandboxes persist until manual termination
    if SANDBOX_BACKEND == SandboxBackend.LOCAL:
        task_logger.debug(
            "cleanup_idle_sandboxes_task skipped (local backend - cleanup disabled)"
        )
        return

    task_logger.info(f"cleanup_idle_sandboxes_task starting for tenant {tenant_id}")

    redis_client = get_redis_client(tenant_id=tenant_id)
    lock: RedisLock = redis_client.lock(
        OnyxRedisLocks.CLEANUP_IDLE_SANDBOXES_BEAT_LOCK,
        timeout=TIMEOUT_SECONDS,
    )

    # Prevent overlapping runs of this task
    if not lock.acquire(blocking=False):
        task_logger.info("cleanup_idle_sandboxes_task - lock not acquired, skipping")
        return

    try:
        # Import here to avoid circular imports
        from onyx.db.enums import SandboxStatus
        from onyx.server.features.build.db.sandbox import create_snapshot__no_commit
        from onyx.server.features.build.db.sandbox import get_idle_sandboxes
        from onyx.server.features.build.db.sandbox import (
            update_sandbox_status__no_commit,
        )

        sandbox_manager = get_sandbox_manager()

        # Type guard for kubernetes-specific methods
        if not isinstance(sandbox_manager, KubernetesSandboxManager):
            task_logger.debug(
                "cleanup_idle_sandboxes_task skipped (not kubernetes backend)"
            )
            return

        with get_session_with_current_tenant() as db_session:
            idle_sandboxes = get_idle_sandboxes(
                db_session, SANDBOX_IDLE_TIMEOUT_SECONDS
            )

            if not idle_sandboxes:
                task_logger.debug("No idle sandboxes found")
                return

            task_logger.info(
                f"Found {len(idle_sandboxes)} idle sandboxes to put to sleep"
            )

            for sandbox in idle_sandboxes:
                sandbox_id = sandbox.id
                sandbox_id_str = str(sandbox_id)
                task_logger.info(f"Putting sandbox {sandbox_id_str} to sleep")

                try:
                    # List session directories in the pod
                    session_ids = _list_session_directories(sandbox_manager, sandbox_id)
                    task_logger.info(
                        f"Found {len(session_ids)} sessions in sandbox {sandbox_id_str}"
                    )

                    # Snapshot each session
                    for session_id_str in session_ids:
                        try:
                            session_id = UUID(session_id_str)
                            task_logger.debug(
                                f"Creating snapshot for session {session_id_str}"
                            )
                            snapshot_result = sandbox_manager.create_snapshot(
                                sandbox_id, session_id, tenant_id
                            )
                            if snapshot_result:
                                # Create DB record for the snapshot
                                create_snapshot__no_commit(
                                    db_session,
                                    session_id,
                                    snapshot_result.storage_path,
                                    snapshot_result.size_bytes,
                                )
                                task_logger.debug(
                                    f"Snapshot created for session {session_id_str}"
                                )
                        except Exception as e:
                            task_logger.warning(
                                f"Failed to create snapshot for session {session_id_str}: {e}"
                            )
                            # Continue with other sessions even if one fails

                    # Terminate the pod (but keep sandbox record)
                    sandbox_manager.terminate(sandbox_id)

                    # Zero out nextjs ports for all sessions (ports are no longer in use)
                    cleared = clear_nextjs_ports_for_user(db_session, sandbox.user_id)
                    task_logger.debug(
                        f"Cleared {cleared} nextjs_port allocations for user {sandbox.user_id}"
                    )

                    # Mark all active sessions as IDLE
                    idled = mark_user_sessions_idle__no_commit(
                        db_session, sandbox.user_id
                    )
                    task_logger.debug(
                        f"Marked {idled} sessions as IDLE for user {sandbox.user_id}"
                    )

                    update_sandbox_status__no_commit(
                        db_session, sandbox_id, SandboxStatus.SLEEPING
                    )
                    db_session.commit()
                    task_logger.info(f"Sandbox {sandbox_id_str} is now sleeping")

                except Exception as e:
                    task_logger.error(
                        f"Failed to put sandbox {sandbox_id_str} to sleep: {e}",
                        exc_info=True,
                    )
                    db_session.rollback()

    except Exception:
        task_logger.exception("Error in cleanup_idle_sandboxes_task")
        raise

    finally:
        if lock.owned():
            lock.release()

    task_logger.info("cleanup_idle_sandboxes_task completed")


def _list_session_directories(
    sandbox_manager: KubernetesSandboxManager,
    sandbox_id: UUID,
) -> list[str]:
    """List session directory names in the pod's /workspace/sessions/.

    Args:
        sandbox_manager: The kubernetes sandbox manager
        sandbox_id: The sandbox ID

    Returns:
        List of session ID strings (directory names)
    """
    from kubernetes.client.rest import ApiException  # type: ignore
    from kubernetes.stream import stream as k8s_stream  # type: ignore

    pod_name = sandbox_manager._get_pod_name(str(sandbox_id))

    # List directories in /workspace/sessions/
    exec_command = [
        "/bin/sh",
        "-c",
        'ls -1 /workspace/sessions/ 2>/dev/null || echo ""',
    ]

    try:
        resp = k8s_stream(
            sandbox_manager._core_api.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=sandbox_manager._namespace,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )

        # Parse output - one directory name per line
        session_ids = []
        for line in resp.strip().split("\n"):
            line = line.strip()
            if line:
                # Validate it looks like a UUID
                try:
                    UUID(line)
                    session_ids.append(line)
                except ValueError:
                    # Not a valid UUID, skip
                    pass

        return session_ids

    except ApiException as e:
        task_logger.warning(f"Failed to list session directories: {e}")
        return []


@contextmanager
def _acquire_sandbox_file_sync_lock(lock: RedisLock) -> Iterator[bool]:
    """Acquire the sandbox file-sync lock with blocking timeout; release on exit."""
    acquired = lock.acquire(
        blocking_timeout=CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT,
    )
    try:
        yield acquired
    finally:
        if lock.owned():
            lock.release()


def _get_disabled_user_library_paths(db_session: "Session", user_id: str) -> list[str]:
    """Get list of disabled user library file paths for exclusion during sync.

    Queries the document table for CRAFT_FILE documents with sync_disabled=True
    and returns their relative paths within user_library/.

    Args:
        db_session: Database session
        user_id: The user ID to filter documents

    Returns:
        List of relative file paths to exclude (e.g., ["/data/file.xlsx", "/old/report.pdf"])
    """
    from uuid import UUID

    from onyx.configs.constants import DocumentSource
    from onyx.db.document import get_documents_by_source

    disabled_paths: list[str] = []

    # Get CRAFT_FILE documents for this user (filtered at SQL level)
    documents = get_documents_by_source(
        db_session=db_session,
        source=DocumentSource.CRAFT_FILE,
        creator_id=UUID(user_id),
    )

    for doc in documents:
        doc_metadata = doc.doc_metadata or {}
        if not doc_metadata.get("sync_disabled"):
            continue

        # Extract file path from semantic_id
        # semantic_id format: "user_library/path/to/file.xlsx"
        # Include both files AND directories - the shell script in
        # setup_session_workspace() handles directory exclusion by
        # checking if paths are children of an excluded directory.
        semantic_id = doc.semantic_id or ""
        if semantic_id.startswith(USER_LIBRARY_SOURCE_DIR):
            file_path = semantic_id[len(USER_LIBRARY_SOURCE_DIR) :]
            if file_path:
                disabled_paths.append(file_path)

    return disabled_paths


@shared_task(
    name=OnyxCeleryTask.SANDBOX_FILE_SYNC,
    soft_time_limit=TIMEOUT_SECONDS,
    bind=True,
    ignore_result=True,
)
def sync_sandbox_files(
    self: Task,  # noqa: ARG001
    *,
    user_id: str,
    tenant_id: str,
    source: str | None = None,
) -> bool:
    """Sync files from S3 to a user's running sandbox.

    This task is triggered after documents are written to S3 during indexing.
    It executes `s5cmd sync` in the file-sync sidecar container to download
    any new or changed files.

    Per-user locking ensures only one sync runs at a time for a given user.
    If a sync is already in progress, this task will wait until it completes.

    Note: File visibility in sessions is controlled via filtered symlinks in
    setup_session_workspace(), not at the sync level. The sync mirrors S3
    faithfully; disabled files are excluded only when creating new sessions.

    Args:
        user_id: The user ID whose sandbox should be synced
        tenant_id: The tenant ID for S3 path construction
        source: Optional source type (e.g., "gmail", "google_drive", "user_library").
                If None, syncs all sources.

    Returns:
        True if sync was successful, False if skipped or failed
    """
    source_info = f" source={source}" if source else " (all sources)"
    task_logger.info(
        f"sync_sandbox_files starting for user {user_id} in tenant {tenant_id}{source_info}"
    )

    lock_timeout = CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT
    redis_client = get_redis_client(tenant_id=tenant_id)
    lock = redis_client.lock(
        f"{OnyxRedisLocks.SANDBOX_FILE_SYNC_LOCK_PREFIX}:{user_id}",
        timeout=lock_timeout,
    )

    with _acquire_sandbox_file_sync_lock(lock) as acquired:
        if not acquired:
            task_logger.warning(
                f"sync_sandbox_files - failed to acquire lock for user {user_id} after {lock_timeout}s, skipping"
            )
            return False

        with get_session_with_current_tenant() as db_session:
            sandbox = get_sandbox_by_user_id(db_session, UUID(user_id))
            if sandbox is None:
                task_logger.debug(f"No sandbox found for user {user_id}, skipping sync")
                return False
            if sandbox.status != SandboxStatus.RUNNING:
                task_logger.debug(
                    f"Sandbox {sandbox.id} not running (status={sandbox.status}), skipping sync"
                )
                return False

            sandbox_manager = get_sandbox_manager()
            result = sandbox_manager.sync_files(
                sandbox_id=sandbox.id,
                user_id=UUID(user_id),
                tenant_id=tenant_id,
                source=source,
            )
            if result:
                task_logger.info(f"File sync completed for user {user_id}{source_info}")
            else:
                task_logger.warning(f"File sync failed for user {user_id}{source_info}")
            return result


# NOTE: in the future, may need to add this. For now, will do manual cleanup.
# @shared_task(
#     name=OnyxCeleryTask.CLEANUP_OLD_SNAPSHOTS,
#     soft_time_limit=300,
#     bind=True,
#     ignore_result=True,
# )
# def cleanup_old_snapshots_task(self: Task, *, tenant_id: str) -> None:
#     """Delete snapshots older than the retention period.

#     This task cleans up old snapshots to manage storage usage.
#     Snapshots older than SNAPSHOT_RETENTION_DAYS are deleted.

#     NOTE: This task is a no-op for local backend since snapshots are disabled.

#     Args:
#         tenant_id: The tenant ID for multi-tenant isolation
#     """
#     # Skip for local backend - no snapshots to clean up
#     if SANDBOX_BACKEND == SandboxBackend.LOCAL:
#         task_logger.debug(
#             "cleanup_old_snapshots_task skipped (local backend - snapshots disabled)"
#         )
#         return

#     task_logger.info(f"cleanup_old_snapshots_task starting for tenant {tenant_id}")

#     redis_client = get_redis_client(tenant_id=tenant_id)
#     lock: RedisLock = redis_client.lock(
#         OnyxRedisLocks.CLEANUP_OLD_SNAPSHOTS_BEAT_LOCK,
#         timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
#     )

#     # Prevent overlapping runs of this task
#     if not lock.acquire(blocking=False):
#         task_logger.debug("cleanup_old_snapshots_task - lock not acquired, skipping")
#         return

#     try:
#         from onyx.server.features.build.db.sandbox import delete_old_snapshots

#         with get_session_with_current_tenant() as db_session:
#             deleted_count = delete_old_snapshots(
#                 db_session, tenant_id, SNAPSHOT_RETENTION_DAYS
#             )

#             if deleted_count > 0:
#                 task_logger.info(
#                     f"Deleted {deleted_count} old snapshots for tenant {tenant_id}"
#                 )
#             else:
#                 task_logger.debug("No old snapshots to delete")

#     except Exception:
#         task_logger.exception("Error in cleanup_old_snapshots_task")
#         raise

#     finally:
#         if lock.owned():
#             lock.release()

#     task_logger.info("cleanup_old_snapshots_task completed")


================================================
FILE: backend/onyx/server/features/build/sandbox/util/__init__.py
================================================
"""Template and configuration utilities for sandbox environments.

Contains utilities for:
- Building sandbox templates (Next.js, venv)
- Generating agent instructions (AGENTS.md)
- Generating opencode configuration
"""

from onyx.server.features.build.sandbox.util.agent_instructions import (
    build_knowledge_sources_section,
)
from onyx.server.features.build.sandbox.util.agent_instructions import (
    build_skills_section,
)
from onyx.server.features.build.sandbox.util.agent_instructions import (
    build_user_context,
)
from onyx.server.features.build.sandbox.util.agent_instructions import (
    extract_skill_description,
)
from onyx.server.features.build.sandbox.util.agent_instructions import (
    generate_agent_instructions,
)
from onyx.server.features.build.sandbox.util.agent_instructions import (
    get_provider_display_name,
)
from onyx.server.features.build.sandbox.util.opencode_config import (
    build_opencode_config,
)

__all__ = [
    "build_knowledge_sources_section",
    "build_opencode_config",
    "build_skills_section",
    "build_user_context",
    "extract_skill_description",
    "generate_agent_instructions",
    "get_provider_display_name",
]


================================================
FILE: backend/onyx/server/features/build/sandbox/util/agent_instructions.py
================================================
"""Shared utilities for generating AGENTS.md content.

This module provides functions for building dynamic agent instructions
that are shared between local and kubernetes sandbox managers.
"""

import threading
from pathlib import Path

from onyx.utils.logger import setup_logger

logger = setup_logger()

# Cache for skills section (skills are static, cached indefinitely)
_skills_cache: dict[str, str] = {}
_skills_cache_lock = threading.Lock()

# Provider display name mapping
PROVIDER_DISPLAY_NAMES = {
    "openai": "OpenAI",
    "anthropic": "Anthropic",
    "azure": "Azure OpenAI",
    "google": "Google AI",
    "bedrock": "AWS Bedrock",
    "vertex": "Google Vertex AI",
}

# Type alias for connector info entries
ConnectorInfoEntry = dict[str, str | int]

# Connector information for generating knowledge sources section
# Keys are normalized (lowercase, underscores) directory names
# Each entry has: summary (with optional {subdirs}), file_pattern, scan_depth
# NOTE: This is duplicated in kubernetes/docker/generate_agents_md.py to avoid circular imports
CONNECTOR_INFO: dict[str, ConnectorInfoEntry] = {
    "google_drive": {
        "summary": "Documents and files from Google Drive. This may contain information about a user and work they have done",
        "file_pattern": "`FILE_NAME.json`",
        "scan_depth": 0,
    },
    "gmail": {
        "summary": "Email conversations and threads",
        "file_pattern": "`FILE_NAME.json`",
        "scan_depth": 0,
    },
    "linear": {
        "summary": "Engineering tickets from teams: {subdirs}",
        "file_pattern": "`[TEAM]/[TICKET_ID]_TICKET_TITLE.json`",
        "scan_depth": 2,
    },
    "slack": {
        "summary": "Team messages from channels: {subdirs}",
        "file_pattern": "`[CHANNEL]/[AUTHOR]_in_[CHANNEL]__[MSG].json`",
        "scan_depth": 1,
    },
    "github": {
        "summary": "Pull requests and code from: {subdirs}",
        "file_pattern": "`[ORG]/[REPO]/pull_requests/[PR_NUMBER]__[PR_TITLE].json`",
        "scan_depth": 2,
    },
    "fireflies": {
        "summary": "Meeting transcripts from: {subdirs}",
        "file_pattern": "`[YYYY-MM]/CALL_TITLE.json`",
        "scan_depth": 1,
    },
    "hubspot": {
        "summary": "CRM data including: {subdirs}",
        "file_pattern": "`[TYPE]/[RECORD_NAME].json`",
        "scan_depth": 1,
    },
    "notion": {
        "summary": "Documentation and notes: {subdirs}",
        "file_pattern": "`PAGE_TITLE.json`",
        "scan_depth": 1,
    },
    "user_library": {
        "summary": "User-uploaded files (spreadsheets, documents, presentations, etc.)",
        "file_pattern": "Any file format",
        "scan_depth": 1,
    },
}
DEFAULT_SCAN_DEPTH = 1


def get_provider_display_name(provider: str | None) -> str | None:
    """Get user-friendly display name for LLM provider.

    Args:
        provider: Internal provider name

    Returns:
        User-friendly display name, or None if provider is None
    """
    if not provider:
        return None

    return PROVIDER_DISPLAY_NAMES.get(provider, provider.title())


def build_user_context(user_name: str | None, user_role: str | None) -> str:
    """Build the user context section for AGENTS.md.

    Args:
        user_name: User's name
        user_role: User's role/title

    Returns:
        Formatted user context string
    """
    if not user_name:
        return ""

    if user_role:
        return f"You are assisting **{user_name}**, {user_role}, with their work."
    return f"You are assisting **{user_name}** with their work."


# Content for the org_info section when demo data is enabled
ORG_INFO_SECTION_CONTENT = """## Organization Info

The `org_info/` directory contains information about the organization and user context:

- `AGENTS.md`: Description of available organizational information files
- `user_identity_profile.txt`: Contains the current user's name, email, and organization
  they work for. Use this information when personalizing outputs or when the user asks
  about their identity.
- `organization_structure.json`: Contains a JSON representation of the organization's
  groups, managers, and their direct reports. Use this to understand reporting
  relationships and team structures."""


# Content for the attachments section when user has uploaded files
ATTACHMENTS_SECTION_CONTENT = """## Attachments (PRIORITY)

The `attachments/` directory contains files that the user has explicitly
uploaded during this session. **These files are critically important** and
should be treated as high-priority context.

### Why Attachments Matter

- The user deliberately chose to upload these files, signaling they are directly relevant to the task
- These files often contain the specific data, requirements, or examples the user wants you to work with
- They may include spreadsheets, documents, images, or code that should inform your work

### Required Actions

**At the start of every task, you MUST:**

1. **Check for attachments**: List the contents of `attachments/` to see what the user has provided
2. **Read and analyze each file**: Thoroughly examine every attachment to understand its contents and relevance
3. **Reference attachment content**: Use the information from attachments to inform your responses and outputs

### File Handling

- Uploaded files may be in various formats: CSV, JSON, PDF, images, text files, etc.
- For spreadsheets and data files, examine the structure, columns, and sample data
- For documents, extract key information and requirements
- For images, analyze and describe their content
- For code files, understand the logic and patterns

**Do NOT ignore user uploaded files.** They are there for a reason and likely
contain exactly what you need to complete the task successfully."""


def build_org_info_section(include_org_info: bool) -> str:
    """Build the organization info section for AGENTS.md.

    Only includes the org_info section when demo data is enabled,
    since the org_info/ directory is only set up in that case.

    Args:
        include_org_info: Whether to include the org_info section

    Returns:
        Formatted org info section string, or empty string if not included
    """
    if include_org_info:
        return ORG_INFO_SECTION_CONTENT
    return ""


def extract_skill_description(skill_md_path: Path) -> str:
    """Extract a brief description from a SKILL.md file.

    If the file has YAML frontmatter (delimited by ---), uses the
    ``description`` field. Otherwise falls back to the first paragraph.

    Args:
        skill_md_path: Path to the SKILL.md file

    Returns:
        Brief description (truncated to ~120 chars)
    """
    try:
        content = skill_md_path.read_text()
        lines = content.strip().split("\n")

        # Try YAML frontmatter first
        if lines and lines[0].strip() == "---":
            for line in lines[1:]:
                if line.strip() == "---":
                    break
                if line.startswith("description:"):
                    desc = line.split(":", 1)[1].strip().strip('"').strip("'")
                    if desc:
                        if len(desc) > 120:
                            desc = desc[:117] + "..."
                        return desc

        # Fallback: first non-heading paragraph after frontmatter
        in_frontmatter = lines[0].strip() == "---" if lines else False
        description_lines: list[str] = []
        for line in lines[1:] if in_frontmatter else lines:
            stripped = line.strip()
            # Skip until end of frontmatter
            if in_frontmatter:
                if stripped == "---":
                    in_frontmatter = False
                continue
            if not stripped:
                if description_lines:
                    break
                continue
            if stripped.startswith("#"):
                continue
            description_lines.append(stripped)
            if len(" ".join(description_lines)) > 100:
                break

        description = " ".join(description_lines)
        if len(description) > 120:
            description = description[:117] + "..."
        return description or "No description available."
    except Exception:
        return "No description available."


def _scan_skills_directory(skills_path: Path) -> str:
    """Internal function to scan skills directory (not cached).

    Args:
        skills_path: Path to the skills directory

    Returns:
        Formatted skills section string
    """
    skills_list: list[str] = []
    try:
        for skill_dir in sorted(skills_path.iterdir()):
            if not skill_dir.is_dir():
                continue

            skill_md = skill_dir / "SKILL.md"
            if skill_md.exists():
                description = extract_skill_description(skill_md)
                skills_list.append(f"- **{skill_dir.name}**: {description}")
    except Exception as e:
        logger.warning(f"Error scanning skills directory: {e}")
        return "Error loading skills."

    if not skills_list:
        return "No skills available."

    return "\n".join(skills_list)


def build_skills_section(skills_path: Path) -> str:
    """Build the available skills section by scanning the skills directory.

    Skills are static, so results are cached indefinitely for performance.

    Args:
        skills_path: Path to the skills directory

    Returns:
        Formatted skills section string
    """
    if not skills_path.exists():
        return "No skills available."

    cache_key = str(skills_path)

    # Check cache first (skills are static, no TTL needed)
    with _skills_cache_lock:
        cached = _skills_cache.get(cache_key)
        if cached is not None:
            return cached

    # Cache miss - scan the directory
    result = _scan_skills_directory(skills_path)

    # Update cache
    with _skills_cache_lock:
        _skills_cache[cache_key] = result

    return result


def _normalize_connector_name(name: str) -> str:
    """Normalize a connector directory name for lookup."""
    return name.lower().replace(" ", "_").replace("-", "_")


def _scan_directory_to_depth(
    directory: Path, current_depth: int, max_depth: int, indent: str = "  "
) -> list[str]:
    """Recursively scan directory up to max_depth levels.

    Args:
        directory: Directory to scan
        current_depth: Current depth level (0 = connector root)
        max_depth: Maximum depth to scan
        indent: Indentation string for current level

    Returns:
        List of formatted directory lines
    """
    if current_depth >= max_depth:
        return []

    lines: list[str] = []
    try:
        subdirs = sorted(
            d for d in directory.iterdir() if d.is_dir() and not d.name.startswith(".")
        )

        for subdir in subdirs[:10]:  # Limit to 10 per level
            lines.append(f"{indent}- {subdir.name}/")

            # Recurse if we haven't hit max depth
            if current_depth + 1 < max_depth:
                nested = _scan_directory_to_depth(
                    subdir, current_depth + 1, max_depth, indent + "  "
                )
                lines.extend(nested)

        if len(subdirs) > 10:
            lines.append(f"{indent}- ... and {len(subdirs) - 10} more")
    except Exception:
        pass

    return lines


def build_knowledge_sources_section(files_path: Path) -> str:
    """Build combined knowledge sources section with summary, structure, and file patterns.

    This creates a single section per connector that includes:
    - What kind of data it contains (with actual subdirectory names)
    - The directory structure
    - The file naming pattern

    Args:
        files_path: Path to the files directory (symlink to knowledge sources)

    Returns:
        Formatted knowledge sources section
    """
    if not files_path.exists():
        return "No knowledge sources available."

    # Resolve the symlink to get the actual path
    try:
        actual_path = files_path.resolve()
        if not actual_path.exists():
            return "No knowledge sources available."
    except Exception:
        actual_path = files_path

    sections: list[str] = []
    try:
        for item in sorted(files_path.iterdir()):
            if not item.is_dir() or item.name.startswith("."):
                continue

            normalized = _normalize_connector_name(item.name)
            info = CONNECTOR_INFO.get(normalized, {})

            # Get subdirectory names
            subdirs: list[str] = []
            try:
                subdirs = sorted(
                    d.name
                    for d in item.iterdir()
                    if d.is_dir() and not d.name.startswith(".")
                )[:5]
            except Exception:
                pass

            # Build summary with subdirs
            summary_template = str(info.get("summary", f"Data from {item.name}"))
            if "{subdirs}" in summary_template and subdirs:
                subdir_str = ", ".join(subdirs)
                if len(subdirs) == 5:
                    subdir_str += ", ..."
                summary = summary_template.format(subdirs=subdir_str)
            elif "{subdirs}" in summary_template:
                summary = summary_template.replace(": {subdirs}", "").replace(
                    " {subdirs}", ""
                )
            else:
                summary = summary_template

            # Build connector section
            file_pattern = str(info.get("file_pattern", ""))
            scan_depth = int(info.get("scan_depth", DEFAULT_SCAN_DEPTH))

            lines = [f"### {item.name}/"]
            lines.append(f"{summary}.\n")
            # Add directory structure if depth > 0
            if scan_depth > 0:
                lines.append("Directory structure:\n")
                nested = _scan_directory_to_depth(item, 0, scan_depth, "")
                if nested:
                    lines.append("")
                    lines.extend(nested)

            lines.append(f"\nFile format: {file_pattern}")

            sections.append("\n".join(lines))
    except Exception as e:
        logger.warning(f"Error building knowledge sources section: {e}")
        return "Error scanning knowledge sources."

    if not sections:
        return "No knowledge sources available."

    return "\n\n".join(sections)


def generate_agent_instructions(
    template_path: Path,
    skills_path: Path,
    files_path: Path | None = None,
    provider: str | None = None,
    model_name: str | None = None,
    nextjs_port: int | None = None,
    disabled_tools: list[str] | None = None,
    user_name: str | None = None,
    user_role: str | None = None,
    use_demo_data: bool = False,
    include_org_info: bool = False,
) -> str:
    """Generate AGENTS.md content by populating the template with dynamic values.

    Args:
        template_path: Path to the AGENTS.template.md file
        skills_path: Path to the skills directory
        files_path: Path to the files directory (symlink to knowledge sources)
        provider: LLM provider type (e.g., "openai", "anthropic")
        model_name: Model name (e.g., "claude-sonnet-4-5", "gpt-4o")
        nextjs_port: Port for Next.js development server
        disabled_tools: List of disabled tools
        user_name: User's name for personalization
        user_role: User's role/title for personalization
        use_demo_data: If True, exclude user context from AGENTS.md
        include_org_info: Whether to include the org_info section (demo data mode)

    Returns:
        Generated AGENTS.md content with placeholders replaced
    """
    if not template_path.exists():
        logger.warning(f"AGENTS.template.md not found at {template_path}")
        return "# Agent Instructions\n\nNo custom instructions provided."

    # Read template content
    template_content = template_path.read_text()

    # Build user context section - only include when NOT using demo data
    user_context = "" if use_demo_data else build_user_context(user_name, user_role)

    # Build LLM configuration section
    provider_display = get_provider_display_name(provider)

    # Build disabled tools section
    disabled_tools_section = ""
    if disabled_tools:
        disabled_tools_section = f"\n**Disabled Tools**: {', '.join(disabled_tools)}\n"

    # Build available skills section
    available_skills_section = build_skills_section(skills_path)

    # Build org info section (only included when demo data is enabled)
    org_info_section = build_org_info_section(include_org_info)

    # Replace placeholders
    content = template_content
    content = content.replace("{{USER_CONTEXT}}", user_context)
    content = content.replace("{{LLM_PROVIDER_NAME}}", provider_display or "Unknown")
    content = content.replace("{{LLM_MODEL_NAME}}", model_name or "Unknown")
    content = content.replace(
        "{{NEXTJS_PORT}}", str(nextjs_port) if nextjs_port else "Unknown"
    )
    content = content.replace("{{DISABLED_TOOLS_SECTION}}", disabled_tools_section)
    content = content.replace("{{AVAILABLE_SKILLS_SECTION}}", available_skills_section)
    content = content.replace("{{ORG_INFO_SECTION}}", org_info_section)

    # Only replace file-related placeholders if files_path is provided.
    # When files_path is None (e.g., Kubernetes), leave placeholders intact
    # so the container can replace them after files are synced.
    if files_path:
        knowledge_sources_section = build_knowledge_sources_section(files_path)
        content = content.replace(
            "{{KNOWLEDGE_SOURCES_SECTION}}", knowledge_sources_section
        )

    return content


================================================
FILE: backend/onyx/server/features/build/sandbox/util/build_venv_template.py
================================================
#!/usr/bin/env python3
"""Build sandbox template for Python venv."""

import argparse
import subprocess
import sys
from pathlib import Path

try:
    from onyx.server.features.build.configs import (
        OUTPUTS_TEMPLATE_PATH,
        VENV_TEMPLATE_PATH,
    )
except ImportError:
    # Fallback if running as standalone script
    import os

    OUTPUTS_TEMPLATE_PATH = os.environ.get(
        "OUTPUTS_TEMPLATE_PATH", "/templates/outputs"
    )
    VENV_TEMPLATE_PATH = os.environ.get("VENV_TEMPLATE_PATH", "/templates/venv")


def build_python_venv_template(target_path: Path, requirements_path: Path) -> None:
    """Build Python venv template with required packages.

    Creates a Python virtual environment and installs packages from requirements file.

    Args:
        target_path: Path where the venv should be created
        requirements_path: Path to requirements.txt file

    Raises:
        RuntimeError: If venv creation or package installation fails
    """
    if not requirements_path.exists():
        raise FileNotFoundError(f"Requirements file not found: {requirements_path}")

    # Create venv
    print("  Creating virtual environment...")
    result = subprocess.run(
        [sys.executable, "-m", "venv", str(target_path)],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        raise RuntimeError(f"Failed to create virtual environment: {result.stderr}")

    # Determine pip path based on OS
    if sys.platform == "win32":
        pip_path = target_path / "Scripts" / "pip"
    else:
        pip_path = target_path / "bin" / "pip"

    # Install requirements
    print(f"  Installing packages from {requirements_path.name}...")
    install_result = subprocess.run(
        [str(pip_path), "install", "-r", str(requirements_path)],
        capture_output=True,
        text=True,
    )
    if install_result.returncode != 0:
        raise RuntimeError(f"Failed to install packages: {install_result.stderr}")


def main() -> None:
    """Build Python venv template.

    Web template is already provided at backend/onyx/server/features/build/sandbox/templates/web
    """
    parser = argparse.ArgumentParser(
        description="Build Python venv template for sandbox (web template already provided)"
    )
    parser.add_argument(
        "--venv-dir",
        type=str,
        default=VENV_TEMPLATE_PATH,
        help=f"Output directory for Python venv template (default: {VENV_TEMPLATE_PATH})",
    )
    parser.add_argument(
        "--requirements",
        type=str,
        default=None,
        help="Path to requirements.txt (default: auto-detect)",
    )

    args = parser.parse_args()

    venv_dir = Path(args.venv_dir)

    # Find requirements file
    if args.requirements:
        requirements_file = Path(args.requirements)
    else:
        # Try to find requirements file relative to script location
        script_dir = Path(__file__).parent
        requirements_file = (
            script_dir.parent.parent
            / "sandbox"
            / "kubernetes"
            / "docker"
            / "initial-requirements.txt"
        )
        if not requirements_file.exists():
            raise FileNotFoundError(
                f"Could not find requirements file. Expected at {requirements_file} or specify with --requirements"
            )

    # Show web template location
    print(f"\nOutputs template path: {OUTPUTS_TEMPLATE_PATH}")
    print(f"Venv template path: {VENV_TEMPLATE_PATH}")

    # Build Python venv template
    print(f"\nBuilding Python venv template to {venv_dir}...")
    print("  (This may take 30-60 seconds)")
    build_python_venv_template(venv_dir, requirements_file)
    print("✅ Python venv template built successfully")

    print("\nTemplate ready! You can now create sandboxes.")


if __name__ == "__main__":
    main()


================================================
FILE: backend/onyx/server/features/build/sandbox/util/opencode_config.py
================================================
"""Shared opencode configuration generation.

This module provides a centralized way to generate opencode.json configuration
that is consistent across local and Kubernetes sandbox environments.
"""

from typing import Any


def build_opencode_config(
    provider: str,
    model_name: str,
    api_key: str | None = None,
    api_base: str | None = None,
    disabled_tools: list[str] | None = None,
    dev_mode: bool = False,
) -> dict[str, Any]:
    """Build opencode.json configuration dict.

    Creates the configuration structure for the opencode CLI agent with
    provider-specific settings for thinking/reasoning and tool permissions.

    Args:
        provider: LLM provider type (e.g., "openai", "anthropic")
        model_name: Model name (e.g., "claude-sonnet-4-5", "gpt-4o")
        api_key: Optional API key for the provider
        api_base: Optional custom API base URL
        disabled_tools: Optional list of tools to disable (e.g., ["question", "webfetch"])
        dev_mode: If True, allow all external directories. If False (Docker/Kubernetes),
                  only whitelist /workspace/files and /workspace/demo_data.

    Returns:
        Configuration dict ready to be serialized to JSON
    """
    # Build opencode model string: provider/model-name
    opencode_model = f"{provider}/{model_name}"

    # Build configuration with schema
    config: dict[str, Any] = {
        "$schema": "https://opencode.ai/config.json",
        "model": opencode_model,
        "provider": {},
    }

    # Build provider configuration
    provider_config: dict[str, Any] = {}

    # Add API key if provided
    if api_key:
        provider_config["options"] = {"apiKey": api_key}

    # Add API base if provided
    if api_base:
        provider_config["api"] = api_base

    # Build model configuration with thinking/reasoning options
    options: dict[str, Any] = {}

    if provider == "openai":
        options["reasoningEffort"] = "high"
    elif provider == "anthropic":
        options["thinking"] = {
            "type": "enabled",
            "budgetTokens": 16000,
        }
    elif provider == "google":
        options["thinking_budget"] = 16000
        options["thinking_level"] = "high"
    elif provider == "bedrock":
        options["thinking"] = {
            "type": "enabled",
            "budgetTokens": 16000,
        }
    elif provider == "azure":
        options["reasoningEffort"] = "high"

    # Add model configuration to provider
    if options:
        provider_config["models"] = {
            model_name: {
                "options": options,
            }
        }

    # Add provider to config
    config["provider"][provider] = provider_config

    # Set default tool permissions
    # Order matters: last matching rule wins
    # Allow all files first, then deny specific files
    config["permission"] = {
        "bash": {
            # Dangerous commands
            "rm": "deny",
            "ssh": "deny",
            "scp": "deny",
            "sftp": "deny",
            "ftp": "deny",
            "telnet": "deny",
            "nc": "deny",
            "netcat": "deny",
            # Block file reading commands to force use of read tool with permissions
            "tac": "deny",
            "nl": "deny",
            "od": "deny",
            "xxd": "deny",
            "hexdump": "deny",
            "strings": "deny",
            "base64": "deny",
            "*": "allow",  # Allow other bash commands
        },
        "edit": {
            "opencode.json": "deny",
            "**/opencode.json": "deny",
            "*": "allow",
        },
        "write": {
            "opencode.json": "deny",
            "**/opencode.json": "deny",
            "*": "allow",
        },
        "read": {
            "*": "allow",
            "opencode.json": "deny",
            "**/opencode.json": "deny",
        },
        "grep": {
            "*": "allow",
            "opencode.json": "deny",
            "**/opencode.json": "deny",
        },
        "glob": {
            "*": "allow",
            "opencode.json": "deny",
            "**/opencode.json": "deny",
        },
        "list": "allow",
        "lsp": "allow",
        "patch": "allow",
        "skill": "allow",
        "question": "allow",
        "webfetch": "allow",
        # External directory permissions:
        # - dev_mode: Allow all external directories for local development
        # - Docker/Kubernetes: Whitelist only specific directories
        "external_directory": (
            "allow"
            if dev_mode
            else {
                "*": "deny",  # Deny all external directories by default
                "/workspace/files": "allow",  # Allow files directory
                "/workspace/files/**": "allow",  # Allow files directory contents
                "/workspace/demo_data": "allow",  # Allow demo data directory
                "/workspace/demo_data/**": "allow",  # Allow demo data directory contents
            }
        ),
    }

    # Disable specified tools via permissions
    if disabled_tools:
        for tool in disabled_tools:
            config["permission"][tool] = "deny"

    return config


================================================
FILE: backend/onyx/server/features/build/sandbox/util/persona_mapping.py
================================================
"""Persona mapping utility for demo user identities and org structure.

Maps frontend persona selections (work_area + level) to demo user profiles
with name and email for sandbox provisioning.

Also provides organizational structure data and content generators for org_info files.
Single source of truth for both local and Kubernetes sandbox provisioning.
"""

from typing import TypedDict


class PersonaInfo(TypedDict):
    """Type for persona information."""

    name: str
    email: str


# Persona mapping: work_area -> level -> PersonaInfo
PERSONA_MAPPING: dict[str, dict[str, PersonaInfo]] = {
    "engineering": {
        "ic": {
            "name": "Jiwon Kang",
            "email": "jiwon_kang@netherite-extraction.onyx.app",
        },
        "manager": {
            "name": "Javier Morales",
            "email": "javier_morales@netherite-extraction.onyx.app",
        },
    },
    "sales": {
        "ic": {
            "name": "Megan Foster",
            "email": "megan_foster@netherite-extraction.onyx.app",
        },
        "manager": {
            "name": "Valeria Cruz",
            "email": "valeria_cruz@netherite-extraction.onyx.app",
        },
    },
    "product": {
        "ic": {
            "name": "Michael Anderson",
            "email": "michael_anderson@netherite-extraction.onyx.app",
        },
        "manager": {
            "name": "David Liu",
            "email": "david_liu@netherite-extraction.onyx.app",
        },
    },
    "marketing": {
        "ic": {
            "name": "Rahul Patel",
            "email": "rahul_patel@netherite-extraction.onyx.app",
        },
        "manager": {
            "name": "Olivia Reed",
            "email": "olivia_reed@netherite-extraction.onyx.app",
        },
    },
    "executives": {
        "ic": {
            "name": "Sarah Mitchell",
            "email": "sarah_mitchell@netherite-extraction.onyx.app",
        },
        "manager": {
            "name": "Sarah Mitchell",
            "email": "sarah_mitchell@netherite-extraction.onyx.app",
        },
    },
    "other": {
        "manager": {
            "name": "Ralf Schroeder",
            "email": "ralf_schroeder@netherite-extraction.onyx.app",
        },
        "ic": {
            "name": "John Carpenter",
            "email": "john_carpenter@netherite-extraction.onyx.app",
        },
    },
}

# Organization structure - maps managers to their direct reports
ORGANIZATION_STRUCTURE: dict[str, dict[str, list[str]]] = {
    "engineering": {
        "javier_morales@netherite-extraction.onyx.app": [
            "tyler_jenkins@netherite-extraction.onyx.app",
            "jiwon_kang@netherite-extraction.onyx.app",
            "brooke_spencer@netherite-extraction.onyx.app",
            "andre_robinson@netherite-extraction.onyx.app",
        ],
        "isabella_torres@netherite-extraction.onyx.app": [
            "ryan_murphy@netherite-extraction.onyx.app",
            "jason_morris@netherite-extraction.onyx.app",
            "kevin_sullivan@netherite-extraction.onyx.app",
        ],
    },
    "sales": {
        "valeria_cruz@netherite-extraction.onyx.app": [
            "megan_foster@netherite-extraction.onyx.app",
            "mina_park@netherite-extraction.onyx.app",
            "james_choi@netherite-extraction.onyx.app",
            "camila_vega@netherite-extraction.onyx.app",
        ],
        "layla_farah@netherite-extraction.onyx.app": [
            "arjun_mehta@netherite-extraction.onyx.app",
            "sneha_reddy@netherite-extraction.onyx.app",
            "irene_shen@netherite-extraction.onyx.app",
        ],
    },
    "product": {
        "david_liu@netherite-extraction.onyx.app": [
            "michael_anderson@netherite-extraction.onyx.app",
            "kenji_watanabe@netherite-extraction.onyx.app",
            "sofia_ramirez@netherite-extraction.onyx.app",
        ],
    },
    "marketing": {
        "olivia_reed@netherite-extraction.onyx.app": [
            "rahul_patel@netherite-extraction.onyx.app",
            "yuna_lee@netherite-extraction.onyx.app",
            "peter_yamamoto@netherite-extraction.onyx.app",
        ],
    },
    "executives": {
        "sarah_mitchell@netherite-extraction.onyx.app": [
            "daniel_hughes@netherite-extraction.onyx.app",
            "amanda_brooks@netherite-extraction.onyx.app",
            "ananya_gupta@netherite-extraction.onyx.app",
        ],
    },
    "other": {
        "ralf_schroeder@netherite-extraction.onyx.app": [
            "john_carpenter@netherite-extraction.onyx.app",
        ],
    },
}

# AGENTS.md content for org_info directory
ORG_INFO_AGENTS_MD = """# AGENTS.md

This file provides information about which organizational information sources are available:

There are two files available that provide important information about the user's company and the user themselves.


## User Identity

The file `user_identity_profile.txt` contains the user's profile.

## Organizational Structure

The file `organization_structure.json` contains a json with the organization's groups, managers, and their reports.
"""


def get_persona_info(work_area: str | None, level: str | None) -> PersonaInfo | None:
    """Get persona info from work area and level.

    Args:
        work_area: User's work area (e.g., "engineering", "product", "sales")
        level: User's level (e.g., "ic", "manager")

    Returns:
        PersonaInfo with name and email, or None if no matching persona
    """
    if not work_area:
        return None

    work_area_lower = work_area.lower().strip()
    level_lower = (level or "manager").lower().strip()

    work_area_mapping = PERSONA_MAPPING.get(work_area_lower)
    if not work_area_mapping:
        return None

    return work_area_mapping.get(level_lower)


def generate_user_identity_content(persona: PersonaInfo) -> str:
    """Generate user identity profile content.

    Args:
        persona: PersonaInfo with name and email

    Returns:
        Content for user_identity_profile.txt
    """
    return f"Your name is {persona['name']}. Your email is {persona['email']}. You are working at Netherite Extraction Corp.\n"


================================================
FILE: backend/onyx/server/features/build/session/__init__.py
================================================
"""Session management for Build Mode."""

from onyx.server.features.build.session.manager import RateLimitError
from onyx.server.features.build.session.manager import SessionManager

__all__ = ["SessionManager", "RateLimitError"]


================================================
FILE: backend/onyx/server/features/build/session/manager.py
================================================
"""Public interface for session operations.

SessionManager is the main entry point for build session lifecycle management.
It orchestrates session CRUD, message handling, artifact management, and file system access.
"""

import io
import json
import mimetypes
import zipfile
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from pathlib import Path
from typing import Any
from uuid import UUID

from acp.schema import AgentMessageChunk
from acp.schema import AgentPlanUpdate
from acp.schema import AgentThoughtChunk
from acp.schema import CurrentModeUpdate
from acp.schema import Error as ACPError
from acp.schema import PromptResponse
from acp.schema import ToolCallProgress
from acp.schema import ToolCallStart
from sqlalchemy.orm import Session as DBSession

from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import MessageType
from onyx.db.enums import SandboxStatus
from onyx.db.llm import fetch_default_llm_model
from onyx.db.models import BuildMessage
from onyx.db.models import BuildSession
from onyx.db.models import User
from onyx.db.users import fetch_user_by_id
from onyx.llm.factory import get_default_llm
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import SystemMessage
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.server.features.build.api.models import DirectoryListing
from onyx.server.features.build.api.models import FileSystemEntry
from onyx.server.features.build.api.packet_logger import get_packet_logger
from onyx.server.features.build.api.packet_logger import log_separator
from onyx.server.features.build.api.packets import BuildPacket
from onyx.server.features.build.api.packets import ErrorPacket
from onyx.server.features.build.api.rate_limit import get_user_rate_limit_status
from onyx.server.features.build.configs import MAX_TOTAL_UPLOAD_SIZE_BYTES
from onyx.server.features.build.configs import MAX_UPLOAD_FILES_PER_SESSION
from onyx.server.features.build.configs import PERSISTENT_DOCUMENT_STORAGE_PATH
from onyx.server.features.build.configs import SANDBOX_BACKEND
from onyx.server.features.build.configs import SandboxBackend
from onyx.server.features.build.db.build_session import allocate_nextjs_port
from onyx.server.features.build.db.build_session import create_build_session__no_commit
from onyx.server.features.build.db.build_session import create_message
from onyx.server.features.build.db.build_session import delete_build_session__no_commit
from onyx.server.features.build.db.build_session import (
    fetch_llm_provider_by_type_for_build_mode,
)
from onyx.server.features.build.db.build_session import get_build_session
from onyx.server.features.build.db.build_session import get_empty_session_for_user
from onyx.server.features.build.db.build_session import get_session_messages
from onyx.server.features.build.db.build_session import get_user_build_sessions
from onyx.server.features.build.db.build_session import update_session_activity
from onyx.server.features.build.db.build_session import upsert_agent_plan
from onyx.server.features.build.db.sandbox import create_sandbox__no_commit
from onyx.server.features.build.db.sandbox import get_running_sandbox_count_by_tenant
from onyx.server.features.build.db.sandbox import get_sandbox_by_session_id
from onyx.server.features.build.db.sandbox import get_sandbox_by_user_id
from onyx.server.features.build.db.sandbox import get_snapshots_for_session
from onyx.server.features.build.db.sandbox import update_sandbox_heartbeat
from onyx.server.features.build.db.sandbox import update_sandbox_status__no_commit
from onyx.server.features.build.sandbox import get_sandbox_manager
from onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client import (
    SSEKeepalive,
)
from onyx.server.features.build.sandbox.models import LLMProviderConfig
from onyx.server.features.build.sandbox.tasks.tasks import (
    _get_disabled_user_library_paths,
)
from onyx.server.features.build.session.prompts import BUILD_NAMING_SYSTEM_PROMPT
from onyx.server.features.build.session.prompts import BUILD_NAMING_USER_PROMPT
from onyx.server.features.build.session.prompts import (
    FOLLOWUP_SUGGESTIONS_SYSTEM_PROMPT,
)
from onyx.server.features.build.session.prompts import FOLLOWUP_SUGGESTIONS_USER_PROMPT
from onyx.tracing.framework.create import ensure_trace
from onyx.tracing.llm_utils import llm_generation_span
from onyx.tracing.llm_utils import record_llm_response
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


class UploadLimitExceededError(ValueError):
    """Raised when file upload limits are exceeded."""


class BuildStreamingState:
    """Container for accumulating state during ACP streaming.

    Similar to ChatStateContainer but adapted for ACP packet types.
    Accumulates chunks and tracks pending tool calls until completion.

    Usage:
        state = BuildStreamingState(turn_index=0)

        # During streaming:
        for packet in stream:
            if packet.type == "agent_message_chunk":
                state.add_message_chunk(packet.content.text)
            elif packet.type == "tool_call_progress" and packet.status == "completed":
                state.add_completed_tool_call(packet_data)
            # etc.

        # At end of streaming, call finalize methods and save
    """

    def __init__(self, turn_index: int) -> None:
        """Initialize streaming state for a turn.

        Args:
            turn_index: The 0-indexed user message number this turn belongs to
        """
        self.turn_index = turn_index

        # Accumulated text chunks (similar to answer_tokens in ChatStateContainer)
        self.message_chunks: list[str] = []
        self.thought_chunks: list[str] = []

        # For upserting agent_plan_update - track ID so we can update in place
        self.plan_message_id: UUID | None = None

        # Track what type of chunk we were last receiving
        self._last_chunk_type: str | None = None

    def add_message_chunk(self, text: str) -> None:
        """Accumulate message text."""
        self.message_chunks.append(text)
        self._last_chunk_type = "message"

    def add_thought_chunk(self, text: str) -> None:
        """Accumulate thought text."""
        self.thought_chunks.append(text)
        self._last_chunk_type = "thought"

    def finalize_message_chunks(self) -> dict[str, Any] | None:
        """Build a synthetic packet with accumulated message text.

        Returns:
            A synthetic agent_message packet or None if no chunks accumulated
        """
        if not self.message_chunks:
            return None

        full_text = "".join(self.message_chunks)
        result = {
            "type": "agent_message",
            "content": {"type": "text", "text": full_text},
            "sessionUpdate": "agent_message",
        }
        self.message_chunks.clear()
        return result

    def finalize_thought_chunks(self) -> dict[str, Any] | None:
        """Build a synthetic packet with accumulated thought text.

        Returns:
            A synthetic agent_thought packet or None if no chunks accumulated
        """
        if not self.thought_chunks:
            return None

        full_text = "".join(self.thought_chunks)
        result = {
            "type": "agent_thought",
            "content": {"type": "text", "text": full_text},
            "sessionUpdate": "agent_thought",
        }
        self.thought_chunks.clear()
        return result

    def should_finalize_chunks(self, new_packet_type: str) -> bool:
        """Check if we should finalize pending chunks before processing new packet.

        We finalize when the packet type changes from message/thought chunks
        to something else (or to a different chunk type).
        """
        if self._last_chunk_type is None:
            return False

        # If we were receiving message chunks and now get something else
        if (
            self._last_chunk_type == "message"
            and new_packet_type != "agent_message_chunk"
        ):
            return True

        # If we were receiving thought chunks and now get something else
        if (
            self._last_chunk_type == "thought"
            and new_packet_type != "agent_thought_chunk"
        ):
            return True

        return False

    def clear_last_chunk_type(self) -> None:
        """Clear the last chunk type tracking after finalization."""
        self._last_chunk_type = None


# Hidden directories/files to filter from listings
HIDDEN_PATTERNS = {
    ".venv",
    ".git",
    ".next",
    "__pycache__",
    "node_modules",
    ".DS_Store",
    "opencode.json",
    ".env",
    ".gitignore",
}


class RateLimitError(Exception):
    """Exception raised when rate limit is exceeded."""

    def __init__(
        self,
        message: str,
        messages_used: int,
        limit: int,
        reset_timestamp: str | None = None,
    ):
        super().__init__(message)
        self.messages_used = messages_used
        self.limit = limit
        self.reset_timestamp = reset_timestamp


class SessionManager:
    """Public interface for session operations.

    Orchestrates session lifecycle, messaging, artifacts, and file access.
    Uses SandboxManager internally for sandbox-related operations.

    Unlike SandboxManager, this is NOT a singleton - each instance is bound
    to a specific database session for the duration of a request.

    Usage:
        session_manager = SessionManager(db_session)
        sessions = session_manager.list_sessions(user_id)
    """

    def __init__(self, db_session: DBSession) -> None:
        """Initialize the SessionManager with a database session.

        Args:
            db_session: The SQLAlchemy database session to use for all operations
        """
        self._db_session = db_session
        self._sandbox_manager = get_sandbox_manager()

    # =========================================================================
    # Rate Limiting
    # =========================================================================

    def check_rate_limit(self, user: User) -> None:
        """
        Check build mode rate limits for a user.

        Args:
            user: The user to check rate limits for

        Raises:
            RateLimitError: If rate limit is exceeded
        """
        # Skip rate limiting for self-hosted deployments
        if not MULTI_TENANT:
            return

        rate_limit_status = get_user_rate_limit_status(user, self._db_session)
        if rate_limit_status.is_limited:
            raise RateLimitError(
                message=(
                    f"Rate limit exceeded. You have used "
                    f"{rate_limit_status.messages_used}/{rate_limit_status.limit} messages. "
                    f"Limit resets at {rate_limit_status.reset_timestamp}."
                    if rate_limit_status.reset_timestamp
                    else "This is a lifetime limit."
                ),
                messages_used=rate_limit_status.messages_used,
                limit=rate_limit_status.limit,
                reset_timestamp=rate_limit_status.reset_timestamp,
            )

    # =========================================================================
    # LLM Configuration
    # =========================================================================

    def _get_llm_config(
        self,
        requested_provider_type: str | None,
        requested_model_name: str | None,
    ) -> LLMProviderConfig:
        """Get LLM config for sandbox provisioning.

        Resolution priority:
        1. User's requested provider/model (from cookie)
        2. System default provider

        Args:
            requested_provider_type: Provider type from user's cookie (e.g., "anthropic", "openai")
            requested_model_name: Model name from user's cookie (e.g., "claude-opus-4-5")

        Returns:
            LLMProviderConfig for sandbox provisioning

        Raises:
            ValueError: If no LLM provider is configured
        """
        if requested_provider_type and requested_model_name:
            # Look up provider by type (e.g., "anthropic", "openai", "openrouter")
            provider = fetch_llm_provider_by_type_for_build_mode(
                self._db_session, requested_provider_type
            )
            if provider:
                # Use the requested model directly - the provider's API will
                # reject invalid models. This allows users to use models that
                # aren't explicitly configured as "visible" in the admin UI.
                return LLMProviderConfig(
                    provider=provider.provider,
                    model_name=requested_model_name,
                    api_key=provider.api_key,
                    api_base=provider.api_base,
                )
            else:
                logger.warning(
                    f"Requested provider type {requested_provider_type} not found, falling back to default"
                )

        # Fallback to system default
        default_model = fetch_default_llm_model(self._db_session)
        if not default_model:
            raise ValueError("No default LLM model found")

        return LLMProviderConfig(
            provider=default_model.llm_provider.provider,
            model_name=default_model.name,
            api_key=(
                default_model.llm_provider.api_key.get_value(apply_mask=False)
                if default_model.llm_provider.api_key
                else None
            ),
            api_base=default_model.llm_provider.api_base,
        )

    # =========================================================================
    # Session CRUD Operations
    # =========================================================================

    def list_sessions(
        self,
        user_id: UUID,
    ) -> list[BuildSession]:
        """Get all build sessions for a user.

        Args:
            user_id: The user ID

        Returns:
            List of BuildSession models ordered by most recent first
        """
        return get_user_build_sessions(user_id, self._db_session)

    def create_session__no_commit(
        self,
        user_id: UUID,
        name: str | None = None,
        user_work_area: str | None = None,
        user_level: str | None = None,
        llm_provider_type: str | None = None,
        llm_model_name: str | None = None,
        demo_data_enabled: bool = True,
    ) -> BuildSession:
        """
        Create a new build session with a sandbox.

        NOTE: This method does NOT commit the transaction. The caller is
        responsible for committing after this method returns successfully.
        This allows the entire operation to be atomic at the endpoint level.

        Args:
            user_id: The user ID
            name: Optional session name
            user_work_area: User's work area for demo persona (e.g., "engineering")
            user_level: User's level for demo persona (e.g., "ic", "manager")
            llm_provider_type: Provider type from user's cookie (e.g., "anthropic", "openai")
            llm_model_name: Model name from user's cookie (e.g., "claude-opus-4-5")
            demo_data_enabled: Explicit flag for demo data mode. Defaults to True if not provided.

        Returns:
            The created BuildSession model

        Raises:
            ValueError: If max concurrent sandboxes reached or no LLM provider
            RuntimeError: If sandbox provisioning fails
        """
        tenant_id = get_current_tenant_id()

        # Check sandbox limits for multi-tenant deployments
        if MULTI_TENANT:
            from onyx.server.features.build.configs import (
                SANDBOX_MAX_CONCURRENT_PER_ORG,
            )

            running_count = get_running_sandbox_count_by_tenant(
                self._db_session, tenant_id
            )
            if running_count >= SANDBOX_MAX_CONCURRENT_PER_ORG:
                raise ValueError(
                    f"Maximum concurrent sandboxes ({SANDBOX_MAX_CONCURRENT_PER_ORG}) reached"
                )

        # Get LLM config (uses user's selection or falls back to default)
        llm_config = self._get_llm_config(llm_provider_type, llm_model_name)

        # Build tenant/user-specific path for FILE_SYSTEM documents (sandbox isolation)
        # Each user's sandbox can only access documents they created
        # Path structure: {base_path}/{tenant_id}/knowledge/{user_id}/
        # This matches the path structure used by PersistentDocumentWriter
        if PERSISTENT_DOCUMENT_STORAGE_PATH:
            user_file_system_path = str(
                Path(PERSISTENT_DOCUMENT_STORAGE_PATH)
                / tenant_id
                / "knowledge"
                / str(user_id)
            )
        else:
            # Fallback for local development without persistent storage
            user_file_system_path = "/tmp/onyx-files"

        # Ensure the user's document directory exists (if local)
        if SANDBOX_BACKEND == SandboxBackend.LOCAL:
            Path(user_file_system_path).mkdir(parents=True, exist_ok=True)

        # Allocate port for this session (per-session port allocation)
        # Both LOCAL and KUBERNETES backends use the same port allocation strategy
        nextjs_port = allocate_nextjs_port(self._db_session)

        # Create BuildSession record with allocated port (uses flush, caller commits)
        build_session = create_build_session__no_commit(
            user_id, self._db_session, name=name, demo_data_enabled=demo_data_enabled
        )
        build_session.nextjs_port = nextjs_port
        self._db_session.flush()
        session_id = str(build_session.id)
        logger.info(
            f"Created build session {session_id} for user {user_id} (port: {nextjs_port})"
        )

        # Check if user already has a sandbox (one sandbox per user model)
        existing_sandbox = get_sandbox_by_user_id(self._db_session, user_id)

        if existing_sandbox:
            # User already has a sandbox - check if it needs re-provisioning
            sandbox = existing_sandbox
            sandbox_id = sandbox.id

            if sandbox.status in (
                SandboxStatus.TERMINATED,
                SandboxStatus.SLEEPING,
                SandboxStatus.FAILED,
            ):
                # Re-provision sandbox (pod doesn't exist or failed)
                logger.info(
                    f"Re-provisioning {sandbox.status.value} sandbox {sandbox_id} for user {user_id}"
                )
                sandbox_info = self._sandbox_manager.provision(
                    sandbox_id=sandbox_id,
                    user_id=user_id,
                    tenant_id=tenant_id,
                    llm_config=llm_config,
                )
                # Use update function to also set heartbeat when transitioning to RUNNING
                update_sandbox_status__no_commit(
                    self._db_session, sandbox_id, sandbox_info.status
                )
            elif sandbox.status.is_active():
                # Verify pod is healthy before reusing (use short timeout for quick check)
                if not self._sandbox_manager.health_check(sandbox_id, timeout=5.0):
                    logger.warning(
                        f"Sandbox {sandbox_id} marked as {sandbox.status} but pod is unhealthy/missing. Entering recovery mode."
                    )
                    # Terminate to clean up any lingering K8s resources
                    self._sandbox_manager.terminate(sandbox_id)

                    # Mark as terminated and re-provision
                    update_sandbox_status__no_commit(
                        self._db_session, sandbox_id, SandboxStatus.TERMINATED
                    )

                    logger.info(
                        f"Re-provisioning sandbox {sandbox_id} for user {user_id}"
                    )
                    sandbox_info = self._sandbox_manager.provision(
                        sandbox_id=sandbox_id,
                        user_id=user_id,
                        tenant_id=tenant_id,
                        llm_config=llm_config,
                    )
                    # Use update function to also set heartbeat when transitioning to RUNNING
                    update_sandbox_status__no_commit(
                        self._db_session, sandbox_id, sandbox_info.status
                    )
                else:
                    logger.info(
                        f"Reusing existing sandbox {sandbox_id} (status: {sandbox.status}) for new session {session_id}"
                    )
            else:
                # PROVISIONING status - sandbox is being created by another request
                # Just fail this request
                msg = (
                    f"Sandbox {sandbox_id} has status {sandbox.status.value} and is being "
                    f"created by another request for new session {session_id}"
                )
                logger.error(msg)
                raise RuntimeError(msg)
        else:
            # Create new Sandbox record for the user (uses flush, caller commits)
            sandbox = create_sandbox__no_commit(
                db_session=self._db_session,
                user_id=user_id,
            )
            sandbox_id = sandbox.id
            logger.info(f"Created sandbox record {sandbox_id} for session {session_id}")

            # Provision sandbox (no DB operations inside)
            sandbox_info = self._sandbox_manager.provision(
                sandbox_id=sandbox_id,
                user_id=user_id,
                tenant_id=tenant_id,
                llm_config=llm_config,
            )

            # Update sandbox status (also refreshes heartbeat when transitioning to RUNNING)
            update_sandbox_status__no_commit(
                self._db_session, sandbox_id, sandbox_info.status
            )

        # Set up session workspace within the sandbox
        logger.info(
            f"Setting up session workspace {session_id} in sandbox {sandbox.id}"
        )
        # Fetch user data for personalization in AGENTS.md
        user = fetch_user_by_id(self._db_session, user_id)
        user_name = user.personal_name if user else None
        user_role = user.personal_role if user else None

        # Get excluded user library paths (files with sync_disabled=True)
        # Only query if not using demo data (user library only applies to user files)
        excluded_user_library_paths: list[str] | None = None
        if not demo_data_enabled:
            excluded_user_library_paths = _get_disabled_user_library_paths(
                self._db_session, str(user_id)
            )
            if excluded_user_library_paths:
                logger.debug(
                    f"Excluding {len(excluded_user_library_paths)} disabled user library paths"
                )

        self._sandbox_manager.setup_session_workspace(
            sandbox_id=sandbox.id,
            session_id=build_session.id,
            llm_config=llm_config,
            nextjs_port=nextjs_port,
            file_system_path=user_file_system_path,
            snapshot_path=None,  # TODO: Support restoring from snapshot
            user_name=user_name,
            user_role=user_role,
            user_work_area=user_work_area,
            user_level=user_level,
            use_demo_data=demo_data_enabled,
            excluded_user_library_paths=excluded_user_library_paths,
        )

        sandbox_id = sandbox.id
        logger.info(
            f"Successfully created session {session_id} with workspace in sandbox {sandbox.id}"
        )

        return build_session

    def get_or_create_empty_session(
        self,
        user_id: UUID,
        user_work_area: str | None = None,
        user_level: str | None = None,
        llm_provider_type: str | None = None,
        llm_model_name: str | None = None,
        demo_data_enabled: bool = True,
    ) -> BuildSession:
        """Get existing empty session or create a new one with provisioned sandbox.

        Used for pre-provisioning sandboxes when user lands on /build/v1.
        Returns existing recent empty session if one exists, has a healthy sandbox,
        AND has matching demo_data_enabled setting. Otherwise creates new.
        If an empty session exists but its sandbox is unhealthy/terminated/missing,
        the stale session is deleted and a fresh one is created (which will handle
        sandbox recovery/re-provisioning).

        Args:
            user_id: The user ID
            user_work_area: User's work area for demo persona (e.g., "engineering")
            user_level: User's level for demo persona (e.g., "ic", "manager")
            llm_provider_type: Provider type from user's cookie (e.g., "anthropic", "openai")
            llm_model_name: Model name from user's cookie (e.g., "claude-opus-4-5")
            demo_data_enabled: Explicit flag for demo data mode. Defaults to True if not provided.

        Returns:
            BuildSession (existing empty or newly created)

        Raises:
            ValueError: If max concurrent sandboxes reached
            RuntimeError: If sandbox provisioning fails
        """
        # Look for existing empty session with matching demo_data setting
        existing = get_empty_session_for_user(
            user_id, self._db_session, demo_data_enabled=demo_data_enabled
        )
        if existing:
            logger.info(
                f"Existing empty session {existing.id} found for user {user_id}"
            )
            # Verify sandbox is healthy before returning existing session
            sandbox = get_sandbox_by_user_id(self._db_session, user_id)

            if sandbox and sandbox.status.is_active():
                # Quick health check to verify sandbox is actually responsive
                # AND verify the session workspace still exists on disk
                # (it may have been wiped if the sandbox was re-provisioned)
                is_healthy = self._sandbox_manager.health_check(sandbox.id, timeout=5.0)
                workspace_exists = (
                    is_healthy
                    and self._sandbox_manager.session_workspace_exists(
                        sandbox.id, existing.id
                    )
                )
                if is_healthy and workspace_exists:
                    logger.info(
                        f"Returning existing empty session {existing.id} for user {user_id}"
                    )
                    return existing
                elif not is_healthy:
                    logger.warning(
                        f"Empty session {existing.id} has unhealthy sandbox {sandbox.id}. Deleting and creating fresh session."
                    )
                else:
                    logger.warning(
                        f"Empty session {existing.id} workspace missing in sandbox "
                        f"{sandbox.id}. Deleting and creating fresh session."
                    )
            else:
                logger.warning(
                    f"Empty session {existing.id} has no active sandbox "
                    f"(sandbox={'missing' if not sandbox else sandbox.status}). "
                    f"Deleting and creating fresh session."
                )

            # Delete the stale empty session - create_session__no_commit will
            # handle sandbox recovery/re-provisioning
            delete_build_session__no_commit(existing.id, user_id, self._db_session)

        return self.create_session__no_commit(
            user_id=user_id,
            user_work_area=user_work_area,
            user_level=user_level,
            llm_provider_type=llm_provider_type,
            llm_model_name=llm_model_name,
            demo_data_enabled=demo_data_enabled,
        )

    def delete_empty_session(self, user_id: UUID) -> bool:
        """Delete user's pre-provisioned (empty) session if one exists.

        A session is considered "empty" if it has no messages.
        This is called when user changes LLM selection or toggles demo data
        so the session can be re-created with the new LLM configuration.

        Args:
            user_id: The user ID

        Returns:
            True if a session was deleted, False if none found
        """
        empty_session = get_empty_session_for_user(user_id, self._db_session)

        if not empty_session:
            logger.info(f"No empty session found for user {user_id}")
            return False

        session_id = empty_session.id

        # Get user's sandbox to clean up session workspace
        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox and sandbox.status.is_active():
            try:
                self._sandbox_manager.cleanup_session_workspace(
                    sandbox_id=sandbox.id,
                    session_id=session_id,
                    nextjs_port=empty_session.nextjs_port,
                )
                logger.info(
                    f"Cleaned up session workspace {session_id} in sandbox {sandbox.id}"
                )
            except Exception as e:
                # Log but don't fail - session can still be deleted
                logger.warning(f"Failed to cleanup session workspace {session_id}: {e}")

        # Delete session (cascade deletes artifacts)
        delete_build_session__no_commit(session_id, user_id, self._db_session)
        logger.info(f"Deleted empty session {session_id} for user {user_id}")

        return True

    def get_session(
        self,
        session_id: UUID,
        user_id: UUID,
    ) -> BuildSession | None:
        """
        Get a specific build session.

        Also updates the last activity timestamp.

        Args:
            session_id: The session UUID
            user_id: The user ID

        Returns:
            BuildSession model or None if not found
        """
        session = get_build_session(session_id, user_id, self._db_session)
        if session:
            update_session_activity(session_id, self._db_session)
            self._db_session.refresh(session)
        return session

    def generate_session_name(
        self,
        session_id: UUID,
        user_id: UUID,
    ) -> str | None:
        """
        Generate a session name using LLM based on the first user message.

        Args:
            session_id: The session UUID
            user_id: The user ID (for ownership verification)

        Returns:
            Generated session name or None if session not found
        """
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None

        return self._generate_session_name(session_id)

    def update_session_name(
        self,
        session_id: UUID,
        user_id: UUID,
        name: str | None = None,
    ) -> BuildSession | None:
        """
        Update the name of a build session.

        If name is None, auto-generates a name using LLM based on the first
        user message in the session.

        Args:
            session_id: The session UUID
            user_id: The user ID
            name: The new session name (if None, auto-generates using LLM)

        Returns:
            Updated BuildSession model or None if not found
        """
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None

        if name is not None:
            # Manual rename
            session.name = name
        else:
            # Auto-generate name from first user message using LLM
            session.name = self._generate_session_name(session_id)

        update_session_activity(session_id, self._db_session)
        self._db_session.commit()
        self._db_session.refresh(session)
        return session

    def _generate_session_name(self, session_id: UUID) -> str:
        """
        Generate a session name using LLM based on the first user message.

        Args:
            session_id: The session UUID

        Returns:
            Generated session name or fallback name
        """
        # Get messages to find first user message
        messages = get_session_messages(session_id, self._db_session)
        first_user_msg = next((m for m in messages if m.type == MessageType.USER), None)

        if not first_user_msg:
            return f"Build Session {str(session_id)[:8]}"

        # Extract text from message_metadata
        metadata = first_user_msg.message_metadata
        if not metadata:
            return f"Build Session {str(session_id)[:8]}"

        # Handle user_message packet structure: {type: "user_message", content: {type: "text", text: "..."}}
        content = metadata.get("content", {})
        if isinstance(content, dict):
            user_message = content.get("text", "")
        else:
            user_message = str(content) if content else ""

        if not user_message:
            return f"Build Session {str(session_id)[:8]}"

        # Use LLM to generate a concise session name with Braintrust tracing
        try:
            llm = get_default_llm()
            prompt_messages: LanguageModelInput = [
                SystemMessage(content=BUILD_NAMING_SYSTEM_PROMPT),
                UserMessage(
                    content=BUILD_NAMING_USER_PROMPT.format(
                        user_message=user_message[:500]  # Limit input size
                    )
                ),
            ]
            with ensure_trace(
                "build_session_naming",
                group_id=str(session_id),
                metadata={"session_id": str(session_id)},
            ):
                with llm_generation_span(
                    llm=llm,
                    flow="build_session_naming",
                    input_messages=prompt_messages,
                ) as span_generation:
                    response = llm.invoke(
                        prompt_messages, reasoning_effort=ReasoningEffort.OFF
                    )
                    record_llm_response(span_generation, response)
                    generated_name = llm_response_to_string(response).strip().strip('"')

            # Ensure the name isn't too long (max 50 chars)
            if len(generated_name) > 50:
                generated_name = generated_name[:47] + "..."

            return (
                generated_name
                if generated_name
                else f"Build Session {str(session_id)[:8]}"
            )
        except Exception as e:
            logger.warning(f"Failed to generate session name with LLM: {e}")
            # Fallback to simple truncation
            return user_message[:40].strip() + ("..." if len(user_message) > 40 else "")

    def generate_followup_suggestions(
        self,
        user_message: str,
        assistant_message: str,
    ) -> list[dict[str, str]]:
        """
        Generate follow-up suggestions based on the first exchange.

        Args:
            user_message: The first user message content
            assistant_message: The first assistant response (text only, no tool calls)

        Returns:
            List of suggestion dicts with "theme" and "text" keys, or empty list on failure
        """
        if not user_message or not assistant_message:
            return []

        try:
            llm = get_default_llm()
            prompt_messages: LanguageModelInput = [
                SystemMessage(content=FOLLOWUP_SUGGESTIONS_SYSTEM_PROMPT),
                UserMessage(
                    content=FOLLOWUP_SUGGESTIONS_USER_PROMPT.format(
                        user_message=user_message[:1000],  # Limit input size
                        assistant_message=assistant_message[:2000],
                    )
                ),
            ]
            # Call LLM with Braintrust tracing
            with ensure_trace("build_followup_suggestions"):
                with llm_generation_span(
                    llm=llm,
                    flow="build_followup_suggestions",
                    input_messages=prompt_messages,
                ) as span_generation:
                    response = llm.invoke(
                        prompt_messages,
                        reasoning_effort=ReasoningEffort.OFF,
                        max_tokens=500,
                    )
                    record_llm_response(span_generation, response)
                    raw_output = llm_response_to_string(response).strip()

            return self._parse_suggestions(raw_output)
        except Exception as e:
            logger.warning(f"Failed to generate follow-up suggestions with LLM: {e}")
            return []

    def _parse_suggestions(self, raw_output: str) -> list[dict[str, str]]:
        """
        Parse suggestions from LLM output with multiple fallback strategies.

        Args:
            raw_output: Raw LLM response string

        Returns:
            List of suggestion dicts or empty list on parse failure
        """
        import re

        # Strategy 1: Try direct JSON parse
        try:
            # Strip common LLM artifacts (code fences, etc.)
            cleaned = raw_output.strip()
            if cleaned.startswith("```"):
                # Extract content between code fences
                parts = cleaned.split("```")
                if len(parts) >= 2:
                    cleaned = parts[1]
                    if cleaned.startswith("json"):
                        cleaned = cleaned[4:]
                    cleaned = cleaned.strip()

            data = json.loads(cleaned)
            if isinstance(data, list) and len(data) >= 2:
                suggestions = []
                for item in data[:2]:
                    if isinstance(item, dict) and "theme" in item and "text" in item:
                        theme = item["theme"].lower()
                        if theme in ("add", "question"):
                            text = str(item["text"])[:150]  # Truncate to max length
                            suggestions.append({"theme": theme, "text": text})
                if len(suggestions) == 2:
                    return suggestions
        except (json.JSONDecodeError, KeyError, TypeError):
            pass

        # Strategy 2: Regex extraction for common patterns
        # Handles: "theme": "add", "text": "..." patterns
        suggestions = []
        for theme in ["add", "question"]:
            # Match "theme": "add" followed by "text": "..."
            pattern = rf'"theme"\s*:\s*"{theme}"[^}}]*"text"\s*:\s*"([^"]+)"'
            match = re.search(pattern, raw_output, re.IGNORECASE | re.DOTALL)
            if match:
                text = match.group(1)[:150]
                suggestions.append({"theme": theme, "text": text})

        if len(suggestions) == 2:
            return suggestions

        # Strategy 3: Alternative pattern - theme and text in any order
        suggestions = []
        for theme in ["add", "question"]:
            pattern = rf'"text"\s*:\s*"([^"]+)"[^}}]*"theme"\s*:\s*"{theme}"'
            match = re.search(pattern, raw_output, re.IGNORECASE | re.DOTALL)
            if match:
                text = match.group(1)[:150]
                suggestions.append({"theme": theme, "text": text})

        if len(suggestions) == 2:
            return suggestions

        # Silent fail - return empty list
        logger.warning(
            f"Failed to parse suggestions from LLM output: {raw_output[:200]}"
        )
        return []

    def delete_session(
        self,
        session_id: UUID,
        user_id: UUID,
    ) -> bool:
        """
        Delete a build session and all associated data.

        Cleans up session workspace but does NOT terminate the sandbox
        (sandbox is user-owned and shared across sessions).

        NOTE: This method does NOT commit the transaction. The caller is
        responsible for committing after this method returns successfully.

        Args:
            session_id: The session UUID
            user_id: The user ID

        Returns:
            True if deleted, False if not found
        """
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return False

        # Get user's sandbox to clean up session workspace
        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox and sandbox.status.is_active():
            # Clean up session workspace (but don't terminate sandbox)
            try:
                self._sandbox_manager.cleanup_session_workspace(
                    sandbox_id=sandbox.id,
                    session_id=session_id,
                    nextjs_port=session.nextjs_port,
                )
                logger.info(
                    f"Cleaned up session workspace {session_id} in sandbox {sandbox.id}"
                )
            except Exception as e:
                # Log but don't fail - session can still be deleted even if
                # workspace cleanup fails (e.g., if pod is already terminated)
                logger.warning(f"Failed to cleanup session workspace {session_id}: {e}")

        # Delete snapshot files from S3 before removing DB records
        snapshots = get_snapshots_for_session(self._db_session, session_id)
        if snapshots:
            from onyx.file_store.file_store import get_default_file_store
            from onyx.server.features.build.sandbox.manager.snapshot_manager import (
                SnapshotManager,
            )

            snapshot_manager = SnapshotManager(get_default_file_store())
            for snapshot in snapshots:
                try:
                    snapshot_manager.delete_snapshot(snapshot.storage_path)
                except Exception as e:
                    logger.warning(
                        f"Failed to delete snapshot file {snapshot.storage_path}: {e}"
                    )

        # Delete session (uses flush, caller commits)
        return delete_build_session__no_commit(session_id, user_id, self._db_session)

    # =========================================================================
    # Message Operations
    # =========================================================================

    def list_messages(
        self,
        session_id: UUID,
        user_id: UUID,
    ) -> list[BuildMessage] | None:
        """
        Get all messages for a session.

        Args:
            session_id: The session UUID
            user_id: The user ID

        Returns:
            List of BuildMessage models or None if session not found
        """
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None
        return get_session_messages(session_id, self._db_session)

    def send_message(
        self,
        session_id: UUID,
        user_id: UUID,
        content: str,
    ) -> Generator[str, None, None]:
        """
        Send a message to the CLI agent and stream the response as SSE events.

        Validates session, saves user message, streams agent response,
        and saves assistant response to database.

        Args:
            session_id: The session UUID
            user_id: The user ID
            content: The message content

        Yields:
            SSE formatted event strings
        """
        yield from self._stream_cli_agent_response(session_id, content, user_id)

    def _stream_cli_agent_response(
        self,
        session_id: UUID,
        user_message_content: str,
        user_id: UUID,
    ) -> Generator[str, None, None]:
        """
        Stream the CLI agent's response using SSE format.

        Executes the agent via SandboxManager and streams events back to the client.
        Uses BuildStreamingState to accumulate chunks and track tool calls.
        At the end of streaming, saves accumulated state to the database.

        Storage behavior:
        - User message: Saved immediately at start
        - agent_message_chunk: Accumulated, saved as one synthetic packet at end/type change
        - agent_thought_chunk: Accumulated, saved as one synthetic packet at end/type change
        - tool_call_start: Streamed to frontend only, not saved
        - tool_call_progress: Only saved when status="completed"
        - agent_plan_update: Upserted (only latest plan kept per turn)
        """

        def _serialize_acp_event(event: Any, event_type: str) -> str:
            """Serialize an ACP event to SSE format, preserving ALL ACP data."""
            if hasattr(event, "model_dump"):
                data = event.model_dump(mode="json", by_alias=True, exclude_none=False)
            else:
                data = {"raw": str(event)}

            data["type"] = event_type
            data["timestamp"] = datetime.now(tz=timezone.utc).isoformat()

            return f"event: message\ndata: {json.dumps(data)}\n\n"

        def _format_packet_event(packet: BuildPacket) -> str:
            """Format a BuildPacket as SSE."""
            return f"event: message\ndata: {packet.model_dump_json(by_alias=True)}\n\n"

        def _extract_text_from_content(content: Any) -> str:
            """Extract text from ACP content structure."""
            if content is None:
                return ""
            if hasattr(content, "type") and content.type == "text":
                return getattr(content, "text", "") or ""
            if isinstance(content, list):
                texts = []
                for block in content:
                    if hasattr(block, "type") and block.type == "text":
                        texts.append(getattr(block, "text", "") or "")
                return "".join(texts)
            return ""

        def _save_pending_chunks(state: BuildStreamingState) -> None:
            """Save any pending accumulated chunks to the database."""
            # Finalize message chunks
            message_packet = state.finalize_message_chunks()
            if message_packet:
                create_message(
                    session_id=session_id,
                    message_type=MessageType.ASSISTANT,
                    turn_index=state.turn_index,
                    message_metadata=message_packet,
                    db_session=self._db_session,
                )

            # Finalize thought chunks
            thought_packet = state.finalize_thought_chunks()
            if thought_packet:
                create_message(
                    session_id=session_id,
                    message_type=MessageType.ASSISTANT,
                    turn_index=state.turn_index,
                    message_metadata=thought_packet,
                    db_session=self._db_session,
                )

            state.clear_last_chunk_type()

        def _save_build_turn(state: BuildStreamingState) -> None:
            """Save all accumulated state at the end of streaming.

            Similar to save_chat_turn() in the main chat flow.
            """
            # 1. Save any remaining accumulated chunks
            _save_pending_chunks(state)

        # Initialize packet logging
        packet_logger = get_packet_logger()

        # The log file auto-rotates to keep only the last N lines (default 5000).
        # Add a prominent separator for visual identification of new message streams.
        log_separator(
            f"NEW MESSAGE STREAM - Session: {str(session_id)[:8]} - User: {str(user_id)[:8]}"
        )
        packet_logger.log_raw(
            "STREAM-START",
            {
                "session_id": str(session_id),
                "user_id": str(user_id),
                "message_preview": user_message_content[:200]
                + ("..." if len(user_message_content) > 200 else ""),
            },
        )

        try:
            # Verify session exists and belongs to user
            session = get_build_session(session_id, user_id, self._db_session)
            if session is None:
                error_packet = ErrorPacket(message="Session not found")
                packet_logger.log("error", error_packet.model_dump())
                yield _format_packet_event(error_packet)
                return

            # Get the user's sandbox (now user-owned, not session-owned)
            sandbox = get_sandbox_by_user_id(self._db_session, user_id)

            # Check if sandbox is running
            if not sandbox or sandbox.status != SandboxStatus.RUNNING:
                error_packet = ErrorPacket(
                    message="Sandbox is not running. Please wait for it to start."
                )
                packet_logger.log("error", error_packet.model_dump())
                yield _format_packet_event(error_packet)
                return

            # Update last activity timestamp
            update_session_activity(session_id, self._db_session)

            # Calculate turn_index BEFORE saving user message
            # turn_index = count of existing USER messages (this will be the Nth user message)

            # Get count of user messages to determine turn index
            existing_user_count = (
                self._db_session.query(BuildMessage)
                .filter(
                    BuildMessage.session_id == session_id,
                    BuildMessage.type == MessageType.USER,
                )
                .count()
            )
            turn_index = existing_user_count  # This user message is the Nth (0-indexed)

            # Save user message to database
            user_message_metadata = {
                "type": "user_message",
                "content": {"type": "text", "text": user_message_content},
            }
            create_message(
                session_id=session_id,
                message_type=MessageType.USER,
                turn_index=turn_index,
                message_metadata=user_message_metadata,
                db_session=self._db_session,
            )

            # Initialize streaming state for this turn
            state = BuildStreamingState(turn_index=turn_index)

            # Get sandbox
            sandbox = get_sandbox_by_session_id(self._db_session, session_id)
            if sandbox is None:
                error_packet = ErrorPacket(message="Sandbox not found")
                packet_logger.log("error", error_packet.model_dump())
                yield _format_packet_event(error_packet)
                return

            sandbox_id = sandbox.id
            events_emitted = 0

            packet_logger.log_raw(
                "STREAM-BEGIN-AGENT-LOOP",
                {
                    "session_id": str(session_id),
                    "sandbox_id": str(sandbox_id),
                    "turn_index": turn_index,
                },
            )

            # Stream ACP events directly to frontend
            for acp_event in self._sandbox_manager.send_message(
                sandbox_id, session_id, user_message_content
            ):
                # Handle SSE keepalive - send comment to keep connection alive
                if isinstance(acp_event, SSEKeepalive):
                    # SSE comments start with : and are ignored by EventSource
                    # but keep the HTTP connection alive
                    packet_logger.log_sse_emit("keepalive", session_id)
                    yield ": keepalive\n\n"
                    continue

                # Check if we need to finalize pending chunks before processing
                event_type = self._get_event_type(acp_event)
                if state.should_finalize_chunks(event_type):
                    _save_pending_chunks(state)

                events_emitted += 1

                # Pass through ACP events with snake_case type names
                if isinstance(acp_event, AgentMessageChunk):
                    text = _extract_text_from_content(acp_event.content)
                    if text:
                        state.add_message_chunk(text)
                    event_data = acp_event.model_dump(
                        mode="json", by_alias=True, exclude_none=False
                    )
                    event_data["type"] = "agent_message_chunk"
                    packet_logger.log("agent_message_chunk", event_data)
                    packet_logger.log_sse_emit("agent_message_chunk", session_id)
                    yield _serialize_acp_event(acp_event, "agent_message_chunk")

                elif isinstance(acp_event, AgentThoughtChunk):
                    text = _extract_text_from_content(acp_event.content)
                    if text:
                        state.add_thought_chunk(text)
                    packet_logger.log(
                        "agent_thought_chunk",
                        acp_event.model_dump(mode="json", by_alias=True),
                    )
                    packet_logger.log_sse_emit("agent_thought_chunk", session_id)
                    yield _serialize_acp_event(acp_event, "agent_thought_chunk")

                elif isinstance(acp_event, ToolCallStart):
                    # Stream to frontend but don't save - wait for completion
                    packet_logger.log(
                        "tool_call_start",
                        acp_event.model_dump(mode="json", by_alias=True),
                    )
                    packet_logger.log_sse_emit("tool_call_start", session_id)
                    yield _serialize_acp_event(acp_event, "tool_call_start")

                elif isinstance(acp_event, ToolCallProgress):
                    event_data = acp_event.model_dump(
                        mode="json", by_alias=True, exclude_none=False
                    )
                    event_data["type"] = "tool_call_progress"
                    event_data["timestamp"] = datetime.now(tz=timezone.utc).isoformat()

                    # Check if this is a TodoWrite tool call
                    tool_name = (event_data.get("title") or "").lower()
                    is_todo_write = tool_name in ("todowrite", "todo_write")

                    # Check if this is a Task (subagent) tool call
                    raw_input = event_data.get("rawInput") or {}
                    is_task_tool = (
                        tool_name == "task"
                        or raw_input.get("subagent_type") is not None
                        or raw_input.get("subagentType") is not None
                    )

                    # Save to DB:
                    # - For TodoWrite: Save every progress update (todos change frequently)
                    # - For other tools: Only save when status="completed"
                    if is_todo_write or acp_event.status == "completed":
                        create_message(
                            session_id=session_id,
                            message_type=MessageType.ASSISTANT,
                            turn_index=state.turn_index,
                            message_metadata=event_data,
                            db_session=self._db_session,
                        )

                    # For completed Task tools, also save the output as an agent_message
                    # This allows the task output to be rendered as assistant text on reload
                    if is_task_tool and acp_event.status == "completed":
                        raw_output = event_data.get("rawOutput") or {}
                        task_output = raw_output.get("output")
                        if task_output and isinstance(task_output, str):
                            # Strip task_metadata from the output
                            metadata_idx = task_output.find("<task_metadata>")
                            if metadata_idx >= 0:
                                task_output = task_output[:metadata_idx].strip()

                            if task_output:
                                # Create agent_message packet for the task output
                                task_output_packet = {
                                    "type": "agent_message",
                                    "content": {"type": "text", "text": task_output},
                                    "source": "task_output",
                                    "timestamp": datetime.now(
                                        tz=timezone.utc
                                    ).isoformat(),
                                }
                                create_message(
                                    session_id=session_id,
                                    message_type=MessageType.ASSISTANT,
                                    turn_index=state.turn_index,
                                    message_metadata=task_output_packet,
                                    db_session=self._db_session,
                                )

                    # Log full event to packet logger (can handle large payloads)
                    packet_logger.log("tool_call_progress", event_data)
                    packet_logger.log_sse_emit("tool_call_progress", session_id)
                    yield _serialize_acp_event(acp_event, "tool_call_progress")

                elif isinstance(acp_event, AgentPlanUpdate):
                    event_data = acp_event.model_dump(
                        mode="json", by_alias=True, exclude_none=False
                    )
                    event_data["type"] = "agent_plan_update"
                    event_data["timestamp"] = datetime.now(tz=timezone.utc).isoformat()

                    # Upsert plan immediately
                    plan_msg = upsert_agent_plan(
                        session_id=session_id,
                        turn_index=state.turn_index,
                        plan_metadata=event_data,
                        db_session=self._db_session,
                        existing_plan_id=state.plan_message_id,
                    )
                    state.plan_message_id = plan_msg.id

                    packet_logger.log("agent_plan_update", event_data)
                    packet_logger.log_sse_emit("agent_plan_update", session_id)
                    yield _serialize_acp_event(acp_event, "agent_plan_update")

                elif isinstance(acp_event, CurrentModeUpdate):
                    event_data = acp_event.model_dump(
                        mode="json", by_alias=True, exclude_none=False
                    )
                    event_data["type"] = "current_mode_update"
                    packet_logger.log("current_mode_update", event_data)
                    packet_logger.log_sse_emit("current_mode_update", session_id)
                    yield _serialize_acp_event(acp_event, "current_mode_update")

                elif isinstance(acp_event, PromptResponse):
                    event_data = acp_event.model_dump(
                        mode="json", by_alias=True, exclude_none=False
                    )
                    event_data["type"] = "prompt_response"
                    packet_logger.log("prompt_response", event_data)
                    packet_logger.log_sse_emit("prompt_response", session_id)
                    yield _serialize_acp_event(acp_event, "prompt_response")

                elif isinstance(acp_event, ACPError):
                    event_data = acp_event.model_dump(
                        mode="json", by_alias=True, exclude_none=False
                    )
                    event_data["type"] = "error"
                    packet_logger.log("error", event_data)
                    packet_logger.log_sse_emit("error", session_id)
                    yield _serialize_acp_event(acp_event, "error")

                else:
                    # Unrecognized packet type - log it but don't stream to frontend
                    event_type_name = type(acp_event).__name__
                    event_data = acp_event.model_dump(
                        mode="json", by_alias=True, exclude_none=False
                    )
                    event_data["type"] = f"unrecognized_{event_type_name.lower()}"
                    packet_logger.log(
                        f"unrecognized_{event_type_name.lower()}", event_data
                    )

            # Save all accumulated state at end of streaming
            _save_build_turn(state)

            # Log streaming completion
            packet_logger.log_raw(
                "STREAM-COMPLETE",
                {
                    "session_id": str(session_id),
                    "sandbox_id": str(sandbox_id),
                    "turn_index": turn_index,
                    "events_emitted": events_emitted,
                    "message_chunks_accumulated": len(state.message_chunks),
                    "thought_chunks_accumulated": len(state.thought_chunks),
                },
            )

            # Update heartbeat after successful message exchange
            update_sandbox_heartbeat(self._db_session, sandbox_id)

        except ValueError as e:
            error_packet = ErrorPacket(message=str(e))
            packet_logger.log("error", error_packet.model_dump())
            packet_logger.log_raw(
                "STREAM-ERROR",
                {
                    "session_id": str(session_id),
                    "error_type": "ValueError",
                    "error": str(e),
                },
            )
            logger.exception("ValueError in build message streaming")
            yield _format_packet_event(error_packet)
        except RuntimeError as e:
            error_packet = ErrorPacket(message=str(e))
            packet_logger.log("error", error_packet.model_dump())
            packet_logger.log_raw(
                "STREAM-ERROR",
                {
                    "session_id": str(session_id),
                    "error_type": "RuntimeError",
                    "error": str(e),
                },
            )
            logger.exception(f"RuntimeError in build message streaming: {e}")
            yield _format_packet_event(error_packet)
        except Exception as e:
            error_packet = ErrorPacket(message=str(e))
            packet_logger.log("error", error_packet.model_dump())
            packet_logger.log_raw(
                "STREAM-ERROR",
                {
                    "session_id": str(session_id),
                    "error_type": type(e).__name__,
                    "error": str(e),
                },
            )
            logger.exception("Unexpected error in build message streaming")
            yield _format_packet_event(error_packet)

    def _get_event_type(self, acp_event: Any) -> str:
        """Get the event type string for an ACP event."""
        if isinstance(acp_event, AgentMessageChunk):
            return "agent_message_chunk"
        elif isinstance(acp_event, AgentThoughtChunk):
            return "agent_thought_chunk"
        elif isinstance(acp_event, ToolCallStart):
            return "tool_call_start"
        elif isinstance(acp_event, ToolCallProgress):
            return "tool_call_progress"
        elif isinstance(acp_event, AgentPlanUpdate):
            return "agent_plan_update"
        elif isinstance(acp_event, CurrentModeUpdate):
            return "current_mode_update"
        elif isinstance(acp_event, PromptResponse):
            return "prompt_response"
        elif isinstance(acp_event, ACPError):
            return "error"
        return "unknown"

    # =========================================================================
    # Artifact Operations
    # =========================================================================

    def list_artifacts(
        self,
        session_id: UUID,
        user_id: UUID,
    ) -> list[dict[str, Any]] | None:
        """
        List artifacts generated in a session.

        Returns artifacts in the format expected by the frontend (matching ArtifactResponse).

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership

        Returns:
            List of artifact dicts or None if session not found or user doesn't own session
        """
        import uuid

        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            return None

        artifacts: list[dict[str, Any]] = []
        now = datetime.now(timezone.utc)

        # Check for outputs directory using sandbox manager
        try:
            output_entries = self._sandbox_manager.list_directory(
                sandbox_id=sandbox.id,
                session_id=session_id,
                path="outputs",
            )
        except ValueError:
            # Directory doesn't exist
            return artifacts

        # Check for webapp (web directory in outputs)
        has_webapp = any(
            entry.is_directory and entry.name == "web" for entry in output_entries
        )

        if has_webapp:
            artifacts.append(
                {
                    "id": str(uuid.uuid4()),
                    "session_id": str(session_id),
                    "type": "web_app",  # Use web_app to match streaming packet type
                    "name": "Web Application",
                    "path": "outputs/web",
                    "preview_url": None,  # Preview is via webapp URL, not artifact preview
                    "created_at": now.isoformat(),
                    "updated_at": now.isoformat(),
                }
            )

        return artifacts

    def download_artifact(
        self,
        session_id: UUID,
        user_id: UUID,
        path: str,
    ) -> tuple[bytes, str, str] | None:
        """
        Download a specific artifact file.

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership
            path: Relative path to the artifact (within session workspace)

        Returns:
            Tuple of (content, mime_type, filename) or None if not found

        Raises:
            ValueError: If path traversal attempted or path is a directory
        """
        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            return None

        # Extract filename from path
        filename = Path(path).name

        # Filter out opencode.json files
        if filename == "opencode.json":
            return None

        # Use sandbox manager to read file (works for both local and K8s)
        try:
            content = self._sandbox_manager.read_file(
                sandbox_id=sandbox.id,
                session_id=session_id,
                path=path,
            )
        except ValueError as e:
            # read_file raises ValueError for not found or directory
            if "Not a file" in str(e):
                raise ValueError("Cannot download directory")
            return None

        mime_type, _ = mimetypes.guess_type(filename)

        return (content, mime_type or "application/octet-stream", filename)

    def export_docx(
        self,
        session_id: UUID,
        user_id: UUID,
        path: str,
    ) -> tuple[bytes, str] | None:
        """
        Export a markdown file as DOCX.

        Reads the markdown file and converts it to DOCX using pypandoc.

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership
            path: Relative path to the markdown file

        Returns:
            Tuple of (docx_bytes, filename) or None if not found

        Raises:
            ValueError: If path traversal attempted, file is not markdown, etc.
        """
        result = self.download_artifact(session_id, user_id, path)
        if result is None:
            return None

        content_bytes, _mime_type, filename = result

        if not filename.lower().endswith(".md"):
            raise ValueError("Only markdown (.md) files can be exported as DOCX")

        import tempfile
        import pypandoc  # type: ignore

        md_text = content_bytes.decode("utf-8")

        with tempfile.NamedTemporaryFile(suffix=".docx", delete=True) as tmp:
            pypandoc.convert_text(md_text, "docx", format="md", outputfile=tmp.name)
            docx_bytes = tmp.read()

        docx_filename = filename.rsplit(".", 1)[0] + ".docx"
        return (docx_bytes, docx_filename)

    def get_pptx_preview(
        self,
        session_id: UUID,
        user_id: UUID,
        path: str,
    ) -> dict[str, Any] | None:
        """
        Generate slide image previews for a PPTX file.

        Converts the PPTX to individual JPEG slide images using
        soffice + pdftoppm, with caching to avoid re-conversion.

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership
            path: Relative path to the PPTX file within session workspace

        Returns:
            Dict with slide_count, slide_paths, and cached flag,
            or None if session not found.

        Raises:
            ValueError: If path is invalid or conversion fails
        """
        import hashlib

        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            return None

        # Validate file extension
        if not path.lower().endswith(".pptx"):
            raise ValueError("Only .pptx files are supported for preview")

        # Compute cache directory from path hash
        path_hash = hashlib.sha256(path.encode()).hexdigest()[:12]
        cache_dir = f"outputs/.pptx-preview/{path_hash}"

        slide_paths, cached = self._sandbox_manager.generate_pptx_preview(
            sandbox_id=sandbox.id,
            session_id=session_id,
            pptx_path=path,
            cache_dir=cache_dir,
        )

        return {
            "slide_count": len(slide_paths),
            "slide_paths": slide_paths,
            "cached": cached,
        }

    def get_webapp_info(
        self,
        session_id: UUID,
        user_id: UUID,
    ) -> dict[str, Any] | None:
        """
        Get webapp information for a session.

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership

        Returns:
            Dict with has_webapp, webapp_url, status, and ready,
            or None if session not found
        """
        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            return {
                "has_webapp": False,
                "webapp_url": None,
                "status": "no_sandbox",
                "ready": False,
                "sharing_scope": session.sharing_scope,
            }

        # Return the proxy URL - the proxy handles routing to the correct sandbox
        # for both local and Kubernetes environments
        webapp_url = None
        ready = False
        if session.nextjs_port:
            webapp_url = f"{WEB_DOMAIN}/api/build/sessions/{session_id}/webapp"

            # Quick health check: can the API server reach the NextJS dev server?
            ready = self._check_nextjs_ready(sandbox.id, session.nextjs_port)

            # If not ready, ask the sandbox manager to ensure Next.js is running.
            # For the local backend this triggers a background restart so that the
            # frontend poll loop eventually sees ready=True without the user having
            # to manually recreate the session.
            if not ready:
                self._sandbox_manager.ensure_nextjs_running(
                    sandbox.id, session_id, session.nextjs_port
                )

        return {
            "has_webapp": session.nextjs_port is not None,
            "webapp_url": webapp_url,
            "status": sandbox.status.value,
            "ready": ready,
            "sharing_scope": session.sharing_scope,
        }

    def _check_nextjs_ready(self, sandbox_id: UUID, port: int) -> bool:
        """Check if the NextJS dev server is responding.

        Does a quick HTTP GET to the sandbox's internal URL with a short timeout.
        Returns True if the server responds with any status code, False on timeout
        or connection error.
        """
        import httpx

        from onyx.server.features.build.sandbox.base import get_sandbox_manager

        try:
            sandbox_manager = get_sandbox_manager()
            internal_url = sandbox_manager.get_webapp_url(sandbox_id, port)
            with httpx.Client(timeout=2.0) as client:
                resp = client.get(internal_url)
                # Any response (even 500) means the server is up
                return resp.status_code < 500
        except (httpx.TimeoutException, httpx.ConnectError, Exception):
            return False

    def download_webapp_zip(
        self,
        session_id: UUID,
        user_id: UUID,
    ) -> tuple[bytes, str] | None:
        """
        Create a zip file of the webapp directory.

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership

        Returns:
            Tuple of (zip_bytes, filename) or None if session/webapp not found
        """
        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            return None

        # Check if web directory exists using sandbox manager
        try:
            self._sandbox_manager.list_directory(
                sandbox_id=sandbox.id,
                session_id=session_id,
                path="outputs/web",
            )
        except ValueError:
            # Directory doesn't exist
            return None

        # Recursively collect all files in the web directory
        def collect_files(dir_path: str) -> list[tuple[str, str]]:
            """Collect all files recursively, returning (full_path, relative_path) tuples."""
            files: list[tuple[str, str]] = []
            try:
                entries = self._sandbox_manager.list_directory(
                    sandbox_id=sandbox.id,
                    session_id=session_id,
                    path=dir_path,
                )
                for entry in entries:
                    if entry.is_directory:
                        # Recursively collect files from subdirectory
                        files.extend(collect_files(entry.path))
                    else:
                        # entry.path is relative to session root (e.g., "outputs/web/file.txt")
                        # arcname should be relative to web dir (e.g., "file.txt")
                        arcname = entry.path.replace("outputs/web/", "", 1)
                        files.append((entry.path, arcname))
            except ValueError:
                pass  # Directory doesn't exist, skip
            return files

        file_list = collect_files("outputs/web")

        # Create zip file in memory
        zip_buffer = io.BytesIO()
        with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
            for full_path, arcname in file_list:
                try:
                    content = self._sandbox_manager.read_file(
                        sandbox_id=sandbox.id,
                        session_id=session_id,
                        path=full_path,
                    )
                    zip_file.writestr(arcname, content)
                except ValueError:
                    # Skip files that can't be read
                    pass

        zip_buffer.seek(0)

        # Create filename with session name or ID
        session_name = session.name or f"session-{str(session_id)[:8]}"
        # Sanitize filename
        safe_name = "".join(
            c if c.isalnum() or c in ("-", "_") else "_" for c in session_name
        )
        filename = f"{safe_name}-webapp.zip"

        return zip_buffer.getvalue(), filename

    def download_directory(
        self,
        session_id: UUID,
        user_id: UUID,
        path: str,
    ) -> tuple[bytes, str] | None:
        """
        Create a zip file of an arbitrary directory in the session workspace.

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership
            path: Relative path to the directory (within session workspace)

        Returns:
            Tuple of (zip_bytes, filename) or None if session not found

        Raises:
            ValueError: If path traversal attempted or path is not a directory
        """
        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            return None

        # Check if directory exists
        try:
            self._sandbox_manager.list_directory(
                sandbox_id=sandbox.id,
                session_id=session_id,
                path=path,
            )
        except ValueError:
            return None

        # Recursively collect all files
        def collect_files(dir_path: str) -> list[tuple[str, str]]:
            """Collect all files recursively, returning (full_path, arcname) tuples."""
            files: list[tuple[str, str]] = []
            try:
                entries = self._sandbox_manager.list_directory(
                    sandbox_id=sandbox.id,
                    session_id=session_id,
                    path=dir_path,
                )
                for entry in entries:
                    if entry.is_directory:
                        files.extend(collect_files(entry.path))
                    else:
                        # arcname is relative to the target directory
                        prefix_len = len(path) + 1  # +1 for trailing slash
                        arcname = entry.path[prefix_len:]
                        files.append((entry.path, arcname))
            except ValueError:
                pass
            return files

        file_list = collect_files(path)

        # Create zip file in memory
        zip_buffer = io.BytesIO()
        with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
            for full_path, arcname in file_list:
                try:
                    content = self._sandbox_manager.read_file(
                        sandbox_id=sandbox.id,
                        session_id=session_id,
                        path=full_path,
                    )
                    zip_file.writestr(arcname, content)
                except ValueError:
                    pass

        zip_buffer.seek(0)

        # Use the directory name for the zip filename
        dir_name = Path(path).name
        safe_name = "".join(
            c if c.isalnum() or c in ("-", "_", ".") else "_" for c in dir_name
        )
        filename = f"{safe_name}.zip"

        return zip_buffer.getvalue(), filename

    # =========================================================================
    # File System Operations
    # =========================================================================

    def list_directory(
        self,
        session_id: UUID,
        user_id: UUID,
        path: str,
    ) -> DirectoryListing | None:
        """
        List files and directories in the session workspace.

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership
            path: Relative path from session workspace root (empty string for root)

        Returns:
            DirectoryListing with sorted entries (directories first) or None if not found

        Raises:
            ValueError: If path traversal attempted or path is not a directory
        """
        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            return None

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            return None

        # Use sandbox manager to list directory (works for both local and K8s)
        # If the directory doesn't exist (e.g., session workspace not yet loaded),
        # return an empty listing rather than erroring out.
        try:
            raw_entries = self._sandbox_manager.list_directory(
                sandbox_id=sandbox.id,
                session_id=session_id,
                path=path,
            )
        except ValueError as e:
            if "path traversal" in str(e).lower():
                raise
            return DirectoryListing(path=path, entries=[])

        # Filter hidden files and directories
        entries: list[FileSystemEntry] = [
            entry
            for entry in raw_entries
            if entry.name not in HIDDEN_PATTERNS and not entry.name.startswith(".")
        ]

        # Sort: directories first, then files, both alphabetically
        entries.sort(key=lambda e: (not e.is_directory, e.name.lower()))

        return DirectoryListing(path=path, entries=entries)

    def get_upload_stats(
        self,
        session_id: UUID,
        user_id: UUID,
    ) -> tuple[int, int]:
        """Get current file count and total size for a session's uploads.

        Delegates to SandboxManager for the actual filesystem query (supports both
        local filesystem and Kubernetes pods).

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership

        Returns:
            Tuple of (file_count, total_size_bytes)

        Raises:
            ValueError: If session not found
        """
        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            raise ValueError("Session not found")

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            raise ValueError("Sandbox not found")

        # Delegate to sandbox manager (handles both local and K8s)
        return self._sandbox_manager.get_upload_stats(
            sandbox_id=sandbox.id,
            session_id=session_id,
        )

    def upload_file(
        self,
        session_id: UUID,
        user_id: UUID,
        filename: str,
        content: bytes,
    ) -> tuple[str, int]:
        """Upload a file to the session's workspace.

        Delegates to SandboxManager for the actual file write (supports both
        local filesystem and Kubernetes pods).

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership
            filename: Sanitized filename (validation done at API layer)
            content: File content as bytes

        Returns:
            Tuple of (relative_path, size_bytes) where the file was saved

        Raises:
            ValueError: If session not found or upload limits exceeded
        """
        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            raise ValueError("Session not found")

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            raise ValueError("Sandbox not found")

        # Check upload limits
        file_count, total_size = self.get_upload_stats(session_id, user_id)

        if file_count >= MAX_UPLOAD_FILES_PER_SESSION:
            raise UploadLimitExceededError(
                f"Maximum number of files ({MAX_UPLOAD_FILES_PER_SESSION}) reached"
            )

        if total_size + len(content) > MAX_TOTAL_UPLOAD_SIZE_BYTES:
            max_mb = MAX_TOTAL_UPLOAD_SIZE_BYTES // (1024 * 1024)
            raise UploadLimitExceededError(
                f"Total upload size limit ({max_mb}MB) exceeded"
            )

        # Delegate to sandbox manager (handles both local and K8s)
        relative_path = self._sandbox_manager.upload_file(
            sandbox_id=sandbox.id,
            session_id=session_id,
            filename=filename,
            content=content,
        )

        # Update heartbeat - file upload is user activity that keeps sandbox alive
        update_sandbox_heartbeat(self._db_session, sandbox.id)

        return relative_path, len(content)

    def delete_file(
        self,
        session_id: UUID,
        user_id: UUID,
        path: str,
    ) -> bool:
        """Delete a file from the session's workspace.

        Delegates to SandboxManager for the actual file delete (supports both
        local filesystem and Kubernetes pods).

        Args:
            session_id: The session UUID
            user_id: The user ID to verify ownership
            path: Relative path to the file (e.g., "attachments/doc.pdf")

        Returns:
            True if file was deleted, False if not found

        Raises:
            ValueError: If session not found or path traversal attempted
        """
        # Verify session ownership
        session = get_build_session(session_id, user_id, self._db_session)
        if session is None:
            raise ValueError("Session not found")

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            raise ValueError("Sandbox not found")

        # Delegate to sandbox manager (handles both local and K8s)
        deleted = self._sandbox_manager.delete_file(
            sandbox_id=sandbox.id,
            session_id=session_id,
            path=path,
        )

        if deleted:
            # SandboxManager already logs the deletion details
            # Update heartbeat - file deletion is user activity that keeps sandbox alive
            update_sandbox_heartbeat(self._db_session, sandbox.id)

        return deleted

    # =========================================================================
    # Sandbox Management Operations
    # =========================================================================

    def terminate_user_sandbox(self, user_id: UUID) -> bool:
        """Terminate the user's sandbox and clean up all session workspaces.

        Used for explicit "start fresh" functionality.

        Args:
            user_id: The user ID

        Returns:
            True if sandbox was terminated, False if user had no sandbox
        """
        from onyx.server.features.build.db.sandbox import (
            update_sandbox_status__no_commit,
        )

        sandbox = get_sandbox_by_user_id(self._db_session, user_id)
        if sandbox is None:
            return False

        if sandbox.status == SandboxStatus.TERMINATED:
            logger.info(f"Sandbox {sandbox.id} already terminated")
            return True

        try:
            # Terminate the sandbox (this cleans up all resources)
            self._sandbox_manager.terminate(sandbox.id)
            logger.info(f"Terminated sandbox {sandbox.id} for user {user_id}")

            # Update status in database
            update_sandbox_status__no_commit(
                self._db_session, sandbox.id, SandboxStatus.TERMINATED
            )
            self._db_session.flush()

            return True

        except Exception as e:
            logger.error(f"Failed to terminate sandbox {sandbox.id}: {e}")
            raise RuntimeError(f"Failed to terminate sandbox: {e}") from e


================================================
FILE: backend/onyx/server/features/build/session/prompts.py
================================================
"""Prompts used for build session operations."""

# Build session naming prompts (similar to chat naming)
BUILD_NAMING_SYSTEM_PROMPT = """
Given the user's build request, provide a SHORT name for the build session. \
Focus on the main task or goal the user wants to accomplish.

IMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. \
NEVER USE MORE THAN 5 WORDS, LESS IS FINE.
""".strip()

BUILD_NAMING_USER_PROMPT = """
User's request: {user_message}

Provide a short name for this build session.
""".strip()


# Follow-up suggestion prompts
FOLLOWUP_SUGGESTIONS_SYSTEM_PROMPT = """You generate follow-up suggestions for an AI workplace assistant conversation.

Given the user's initial request and the assistant's response, generate exactly 2 suggestions:

1. ADD: A suggestion to extend or enhance what was built.
Start with "Great! Now add..." or similar positive acknowledgment + extension.

2. QUESTION: A follow-up question the user might want to ask about the implementation or to explore further.
Start with something like "Can you explain..." or "How does...".

IMPORTANT:
- Keep each suggestion SHORT (under 100 characters preferred, max 150)
- Make them specific to the actual request and response
- They should feel natural, like what a user might actually type
- Output ONLY a JSON array with objects containing "theme" and "text" fields
- Do NOT wrap in code fences or add any other text

Example output:
[{"theme": "add", "text": "Great! Now add form validation for the email field"},
{"theme": "question", "text": "Can you explain how the authentication flow works?"}]""".strip()

FOLLOWUP_SUGGESTIONS_USER_PROMPT = """User's request:
{user_message}

Assistant's response:
{assistant_message}

Generate 2 follow-up suggestions (add, question) as a JSON array:""".strip()


================================================
FILE: backend/onyx/server/features/build/utils.py
================================================
"""Utility functions for Build Mode feature announcements and file validation."""

import re
from pathlib import Path

from sqlalchemy.orm import Session

from onyx.configs.constants import NotificationType
from onyx.db.models import User
from onyx.db.notification import create_notification
from onyx.feature_flags.factory import get_default_feature_flag_provider
from onyx.feature_flags.interface import NoOpFeatureFlagProvider
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.file_types import OnyxMimeTypes
from onyx.server.features.build.configs import ENABLE_CRAFT
from onyx.server.features.build.configs import MAX_UPLOAD_FILE_SIZE_BYTES
from onyx.utils.logger import setup_logger

logger = setup_logger()

# =============================================================================
# File Upload Validation
# =============================================================================

# Additional extensions for code files (safe to read, not execute)
CODE_FILE_EXTENSIONS: set[str] = {
    ".py",
    ".js",
    ".ts",
    ".tsx",
    ".jsx",
    ".css",
    ".scss",
    ".less",
    ".java",
    ".go",
    ".rs",
    ".cpp",
    ".c",
    ".h",
    ".hpp",
    ".cs",
    ".rb",
    ".php",
    ".swift",
    ".kt",
    ".scala",
    ".sh",
    ".bash",
    ".zsh",
    ".env",
    ".ini",
    ".toml",
    ".cfg",
    ".properties",
}

# Additional MIME types for code files
CODE_MIME_TYPES: set[str] = {
    "text/x-python",
    "text/x-java",
    "text/x-c",
    "text/x-c++",
    "text/x-go",
    "text/x-rust",
    "text/x-shellscript",
    "text/css",
    "text/javascript",
    "application/javascript",
    "application/typescript",
    "application/octet-stream",  # Generic (for code files with unknown type)
}

# Combine base Onyx extensions with code file extensions
ALLOWED_EXTENSIONS: set[str] = (
    OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS | CODE_FILE_EXTENSIONS
)

# Combine base Onyx MIME types with code MIME types
ALLOWED_MIME_TYPES: set[str] = OnyxMimeTypes.ALLOWED_MIME_TYPES | CODE_MIME_TYPES

# Blocked extensions (executable/dangerous files)
BLOCKED_EXTENSIONS: set[str] = {
    # Windows executables
    ".exe",
    ".dll",
    ".msi",
    ".scr",
    ".com",
    ".bat",
    ".cmd",
    ".ps1",
    # macOS
    ".app",
    ".dmg",
    ".pkg",
    # Linux
    ".deb",
    ".rpm",
    ".so",
    # Cross-platform
    ".jar",
    ".war",
    ".ear",
    # Other potentially dangerous
    ".vbs",
    ".vbe",
    ".wsf",
    ".wsh",
    ".hta",
    ".cpl",
    ".reg",
    ".lnk",
    ".pif",
}

# Regex for sanitizing filenames (allow alphanumeric, dash, underscore, period)
SAFE_FILENAME_PATTERN = re.compile(r"[^a-zA-Z0-9._-]")


def validate_file_extension(filename: str) -> tuple[bool, str | None]:
    """Validate file extension against allowlist.

    Args:
        filename: The filename to validate

    Returns:
        Tuple of (is_valid, error_message)
    """
    ext = Path(filename).suffix.lower()

    if not ext:
        return False, "File must have an extension"

    if ext in BLOCKED_EXTENSIONS:
        return False, f"File type '{ext}' is not allowed for security reasons"

    if ext not in ALLOWED_EXTENSIONS:
        return False, f"File type '{ext}' is not supported"

    return True, None


def validate_mime_type(content_type: str | None) -> bool:
    """Validate MIME type against allowlist.

    Args:
        content_type: The Content-Type header value

    Returns:
        True if the MIME type is allowed, False otherwise
    """
    if not content_type:
        # Allow missing content type - we'll validate by extension
        return True

    # Extract base MIME type (ignore charset etc.)
    mime_type = content_type.split(";")[0].strip().lower()

    if mime_type not in ALLOWED_MIME_TYPES:
        return False

    return True


def validate_file_size(size: int) -> bool:
    """Validate file size against limit.

    Args:
        size: File size in bytes

    Returns:
        True if the file size is allowed, False otherwise
    """
    if size <= 0:
        return False

    if size > MAX_UPLOAD_FILE_SIZE_BYTES:
        return False

    return True


def sanitize_filename(filename: str) -> str:
    """Sanitize filename to prevent path traversal and other issues.

    Args:
        filename: The original filename

    Returns:
        Sanitized filename safe for filesystem use
    """
    # Remove any path components (prevent path traversal)
    filename = Path(filename).name

    # Remove null bytes
    filename = filename.replace("\x00", "")

    # Replace unsafe characters with underscore
    filename = SAFE_FILENAME_PATTERN.sub("_", filename)

    # Remove leading/trailing dots and spaces
    filename = filename.strip(". ")

    # Ensure filename is not empty
    if not filename:
        filename = "unnamed_file"

    # Ensure filename doesn't start with a dot (hidden file)
    if filename.startswith("."):
        filename = "_" + filename[1:]

    # Limit length (preserve extension)
    max_length = 255
    if len(filename) > max_length:
        stem = Path(filename).stem
        ext = Path(filename).suffix
        max_stem_length = max_length - len(ext)
        filename = stem[:max_stem_length] + ext

    return filename


def validate_file(
    filename: str,
    content_type: str | None,
    size: int,
) -> tuple[bool, str | None]:
    """Validate a file for upload.

    Performs all validation checks:
    - Extension validation
    - MIME type validation
    - Size validation

    Args:
        filename: The filename to validate
        content_type: The Content-Type header value
        size: File size in bytes

    Returns:
        Tuple of (is_valid, error_message). error_message is None if valid.
    """
    # Validate extension
    ext_valid, ext_error = validate_file_extension(filename)
    if not ext_valid:
        return False, ext_error

    # Validate MIME type
    if not validate_mime_type(content_type):
        return False, f"MIME type '{content_type}' is not supported"

    # Validate file size
    if not validate_file_size(size):
        return (
            False,
            f"File size exceeds maximum allowed size of {MAX_UPLOAD_FILE_SIZE_BYTES} bytes",
        )

    return True, None


# =============================================================================
# Build Mode Feature Announcements
# =============================================================================

# PostHog feature flag key for enabling Onyx Craft (cloud rollout control)
# Flag logic: True = enabled, False/null/not found = disabled
ONYX_CRAFT_ENABLED_FLAG = "onyx-craft-enabled"

# PostHog feature flag key for controlling whether a user has usage limits
# Flag logic: True = user has usage limits (rate limits apply), False/null/not found = no limits (unlimited usage)
CRAFT_HAS_USAGE_LIMITS = "craft-has-usage-limits"

# Feature identifier in additional_data
BUILD_MODE_FEATURE_ID = "build_mode"


def is_onyx_craft_enabled(user: User) -> bool:
    """
    Check if Onyx Craft (Build Mode) is enabled for the user.

    Flag logic for "onyx-craft-enabled":
    - Flag = True → enabled (Onyx Craft is available)
    - Flag = False → disabled (Onyx Craft is not available)
    - Flag = null/not found → disabled (Onyx Craft is not available)

    Only explicit True enables the feature.
    """
    feature_flag_provider = get_default_feature_flag_provider()

    # If no PostHog configured (NoOp provider), use ENABLE_CRAFT env var
    if isinstance(feature_flag_provider, NoOpFeatureFlagProvider):
        return ENABLE_CRAFT

    # Use the feature flag provider
    is_enabled = feature_flag_provider.feature_enabled(
        ONYX_CRAFT_ENABLED_FLAG,
        user.id,
    )

    if is_enabled:
        logger.debug("Onyx Craft enabled via PostHog feature flag")
        return True
    else:
        logger.debug("Onyx Craft disabled via PostHog feature flag")
        return False


def ensure_build_mode_intro_notification(user: User, db_session: Session) -> None:
    """
    Create Build Mode intro notification for user if enabled and not already exists.

    Called from /api/notifications endpoint. Uses notification deduplication
    to ensure each user only gets one notification.
    """
    # PostHog feature flag check - only show notification if Onyx Craft is enabled
    if not is_onyx_craft_enabled(user):
        return

    # Create notification (will be skipped if already exists due to deduplication)
    create_notification(
        user_id=user.id,
        notif_type=NotificationType.FEATURE_ANNOUNCEMENT,
        db_session=db_session,
        title="Introducing Onyx Craft",
        description="Unleash Onyx to create dashboards, slides, documents, and more with your connected data.",
        additional_data={"feature": BUILD_MODE_FEATURE_ID},
    )


================================================
FILE: backend/onyx/server/features/default_assistant/api.py
================================================
"""API endpoints for default assistant configuration."""

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.persona import get_default_assistant
from onyx.db.persona import update_default_assistant_configuration
from onyx.prompts.chat_prompts import DEFAULT_SYSTEM_PROMPT
from onyx.server.features.default_assistant.models import DefaultAssistantConfiguration
from onyx.server.features.default_assistant.models import DefaultAssistantUpdateRequest
from onyx.utils.logger import setup_logger

logger = setup_logger()

router = APIRouter(prefix="/admin/default-assistant")


@router.get("/configuration")
def get_default_assistant_configuration(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> DefaultAssistantConfiguration:
    """Get the current default assistant configuration.

    Returns:
        DefaultAssistantConfiguration with current tool IDs and system prompt
    """
    persona = get_default_assistant(db_session)
    if not persona:
        raise HTTPException(status_code=404, detail="Default assistant not found")

    # Extract DB tool IDs from the persona's tools
    tool_ids = [tool.id for tool in persona.tools]

    return DefaultAssistantConfiguration(
        tool_ids=tool_ids,
        system_prompt=persona.system_prompt,
        default_system_prompt=DEFAULT_SYSTEM_PROMPT,
    )


@router.patch("")
def update_default_assistant(
    update_request: DefaultAssistantUpdateRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> DefaultAssistantConfiguration:
    """Update the default assistant configuration.

    Args:
        update_request: Request with optional tool_ids and system_prompt

    Returns:
        Updated DefaultAssistantConfiguration

    Raises:
        400: If invalid tool IDs are provided
        404: If default assistant not found
    """
    # Validate tool IDs if provided
    try:
        # Check if system_prompt was explicitly provided in the request
        # This allows distinguishing "not provided" from "explicitly set to null"
        update_system_prompt = "system_prompt" in update_request.model_fields_set

        # Update the default assistant
        updated_persona = update_default_assistant_configuration(
            db_session=db_session,
            tool_ids=update_request.tool_ids,
            system_prompt=update_request.system_prompt,
            update_system_prompt=update_system_prompt,
        )

        # Return the updated configuration
        tool_ids = [tool.id for tool in updated_persona.tools]
        return DefaultAssistantConfiguration(
            tool_ids=tool_ids,
            system_prompt=updated_persona.system_prompt,
            default_system_prompt=DEFAULT_SYSTEM_PROMPT,
        )

    except ValueError as e:
        if "Default assistant not found" in str(e):
            raise HTTPException(status_code=404, detail=str(e))
        raise HTTPException(status_code=400, detail=str(e))


================================================
FILE: backend/onyx/server/features/default_assistant/models.py
================================================
"""Models for default assistant configuration API."""

from pydantic import BaseModel
from pydantic import Field


class DefaultAssistantConfiguration(BaseModel):
    """Simplified view of default assistant configuration for admin UI."""

    tool_ids: list[int] = Field(
        default_factory=list, description="List of enabled tool IDs"
    )
    system_prompt: str | None = Field(
        ...,
        description="System prompt (instructions) for the assistant. None means use default.",
    )
    default_system_prompt: str = Field(
        ..., description="The default system prompt used when system_prompt is null."
    )


class DefaultAssistantUpdateRequest(BaseModel):
    """Request model for updating default assistant configuration."""

    tool_ids: list[int] | None = Field(
        default=None,
        description="List of tool IDs to enable for the default assistant",
    )
    system_prompt: str | None = Field(
        default=None,
        description="New system prompt (instructions). None resets to default, empty string is allowed.",
    )


3


================================================
FILE: backend/onyx/server/features/document_set/__init__.py
================================================


================================================
FILE: backend/onyx/server/features/document_set/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from sqlalchemy.orm import Session

from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.document_set import check_document_sets_are_public
from onyx.db.document_set import delete_document_set as db_delete_document_set
from onyx.db.document_set import fetch_all_document_sets_for_user
from onyx.db.document_set import get_document_set_by_id
from onyx.db.document_set import insert_document_set
from onyx.db.document_set import mark_document_set_as_to_be_deleted
from onyx.db.document_set import update_document_set
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.features.document_set.models import CheckDocSetPublicRequest
from onyx.server.features.document_set.models import CheckDocSetPublicResponse
from onyx.server.features.document_set.models import DocumentSetCreationRequest
from onyx.server.features.document_set.models import DocumentSetSummary
from onyx.server.features.document_set.models import DocumentSetUpdateRequest
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from shared_configs.contextvars import get_current_tenant_id


router = APIRouter(prefix="/manage")


@router.post("/admin/document-set")
def create_document_set(
    document_set_creation_request: DocumentSetCreationRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
    tenant_id: str = Depends(get_current_tenant_id),
) -> int:
    fetch_ee_implementation_or_noop(
        "onyx.db.user_group", "validate_object_creation_for_user", None
    )(
        db_session=db_session,
        user=user,
        target_group_ids=document_set_creation_request.groups,
        object_is_public=document_set_creation_request.is_public,
        object_is_new=True,
    )
    try:
        document_set_db_model, _ = insert_document_set(
            document_set_creation_request=document_set_creation_request,
            user_id=user.id,
            db_session=db_session,
        )
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

    if not DISABLE_VECTOR_DB:
        client_app.send_task(
            OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
            kwargs={"tenant_id": tenant_id},
            priority=OnyxCeleryPriority.HIGH,
        )

    return document_set_db_model.id


@router.patch("/admin/document-set")
def patch_document_set(
    document_set_update_request: DocumentSetUpdateRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
    tenant_id: str = Depends(get_current_tenant_id),
) -> None:
    document_set = get_document_set_by_id(db_session, document_set_update_request.id)
    if document_set is None:
        raise HTTPException(
            status_code=404,
            detail=f"Document set {document_set_update_request.id} does not exist",
        )

    fetch_ee_implementation_or_noop(
        "onyx.db.user_group", "validate_object_creation_for_user", None
    )(
        db_session=db_session,
        user=user,
        target_group_ids=document_set_update_request.groups,
        object_is_public=document_set_update_request.is_public,
        object_is_owned_by_user=user
        and (document_set.user_id is None or document_set.user_id == user.id),
    )
    try:
        update_document_set(
            document_set_update_request=document_set_update_request,
            db_session=db_session,
            user=user,
        )
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

    if not DISABLE_VECTOR_DB:
        client_app.send_task(
            OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
            kwargs={"tenant_id": tenant_id},
            priority=OnyxCeleryPriority.HIGH,
        )


@router.delete("/admin/document-set/{document_set_id}")
def delete_document_set(
    document_set_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
    tenant_id: str = Depends(get_current_tenant_id),
) -> None:
    document_set = get_document_set_by_id(db_session, document_set_id)
    if document_set is None:
        raise HTTPException(
            status_code=404,
            detail=f"Document set {document_set_id} does not exist",
        )

    # check if the user has "edit" access to the document set.
    # `validate_object_creation_for_user` is poorly named, but this
    # is the right function to use here
    fetch_ee_implementation_or_noop(
        "onyx.db.user_group", "validate_object_creation_for_user", None
    )(
        db_session=db_session,
        user=user,
        object_is_public=document_set.is_public,
        object_is_owned_by_user=user
        and (document_set.user_id is None or document_set.user_id == user.id),
    )

    try:
        mark_document_set_as_to_be_deleted(
            db_session=db_session,
            document_set_id=document_set_id,
            user=user,
        )
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

    if DISABLE_VECTOR_DB:
        db_session.refresh(document_set)
        db_delete_document_set(document_set, db_session)
    else:
        client_app.send_task(
            OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
            kwargs={"tenant_id": tenant_id},
            priority=OnyxCeleryPriority.HIGH,
        )


"""Endpoints for non-admins"""


@router.get("/document-set")
def list_document_sets_for_user(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
    get_editable: bool = Query(
        False, description="If true, return editable document sets"
    ),
) -> list[DocumentSetSummary]:
    document_sets = fetch_all_document_sets_for_user(
        db_session=db_session, user=user, get_editable=get_editable
    )
    return [DocumentSetSummary.from_model(ds) for ds in document_sets]


@router.get("/document-set-public")
def document_set_public(
    check_public_request: CheckDocSetPublicRequest,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> CheckDocSetPublicResponse:
    is_public = check_document_sets_are_public(
        document_set_ids=check_public_request.document_set_ids, db_session=db_session
    )
    return CheckDocSetPublicResponse(is_public=is_public)


================================================
FILE: backend/onyx/server/features/document_set/models.py
================================================
from typing import Any
from uuid import UUID

from pydantic import BaseModel
from pydantic import Field

from onyx.db.models import DocumentSet as DocumentSetDBModel
from onyx.db.models import FederatedConnector__DocumentSet
from onyx.server.documents.models import CCPairSummary
from onyx.server.documents.models import ConnectorCredentialPairDescriptor
from onyx.server.documents.models import ConnectorSnapshot
from onyx.server.documents.models import CredentialSnapshot
from onyx.server.federated.models import FederatedConnectorSummary


class FederatedConnectorConfig(BaseModel):
    """Configuration for adding a federated connector to a document set"""

    federated_connector_id: int
    entities: dict[str, Any]


class FederatedConnectorDescriptor(BaseModel):
    """Descriptor for a federated connector in a document set"""

    id: int
    name: str
    source: str
    entities: dict[str, Any]

    @classmethod
    def from_federated_connector_mapping(
        cls, fc_mapping: "FederatedConnector__DocumentSet"
    ) -> "FederatedConnectorDescriptor":
        """Create a descriptor from a federated connector mapping"""
        return cls(
            id=fc_mapping.federated_connector_id,
            name=(
                f"{fc_mapping.federated_connector.source.replace('_', ' ').title()}"
                if fc_mapping.federated_connector
                else "Unknown"
            ),
            source=(
                fc_mapping.federated_connector.source
                if fc_mapping.federated_connector
                else "unknown"
            ),
            entities=fc_mapping.entities,
        )


class DocumentSetCreationRequest(BaseModel):
    name: str
    description: str
    cc_pair_ids: list[int]
    is_public: bool
    # For Private Document Sets, who should be able to access these
    users: list[UUID] = Field(default_factory=list)
    groups: list[int] = Field(default_factory=list)
    # Federated connectors to include in the document set
    federated_connectors: list[FederatedConnectorConfig] = Field(default_factory=list)


class DocumentSetUpdateRequest(BaseModel):
    id: int
    description: str
    cc_pair_ids: list[int]
    is_public: bool
    # For Private Document Sets, who should be able to access these
    users: list[UUID]
    groups: list[int]
    # Federated connectors to include in the document set
    federated_connectors: list[FederatedConnectorConfig] = Field(default_factory=list)


class CheckDocSetPublicRequest(BaseModel):
    """Note that this does not mean that the Document Set itself is to be viewable by everyone
    Rather, this refers to the CC-Pairs in the Document Set, and if every CC-Pair is public
    """

    document_set_ids: list[int]


class CheckDocSetPublicResponse(BaseModel):
    is_public: bool


class DocumentSet(BaseModel):
    id: int
    name: str
    description: str | None
    cc_pair_descriptors: list[ConnectorCredentialPairDescriptor]
    is_up_to_date: bool
    is_public: bool
    # For Private Document Sets, who should be able to access these
    users: list[UUID]
    groups: list[int]
    # Federated connectors in the document set
    federated_connectors: list[FederatedConnectorDescriptor] = Field(
        default_factory=list
    )

    @classmethod
    def from_model(cls, document_set_model: DocumentSetDBModel) -> "DocumentSet":
        return cls(
            id=document_set_model.id,
            name=document_set_model.name,
            description=document_set_model.description,
            cc_pair_descriptors=[
                ConnectorCredentialPairDescriptor(
                    id=cc_pair.id,
                    name=cc_pair.name,
                    connector=ConnectorSnapshot.from_connector_db_model(
                        cc_pair.connector,
                        credential_ids=[cc_pair.credential_id],
                    ),
                    credential=CredentialSnapshot.from_credential_db_model(
                        cc_pair.credential
                    ),
                    access_type=cc_pair.access_type,
                )
                for cc_pair in document_set_model.connector_credential_pairs
            ],
            is_up_to_date=document_set_model.is_up_to_date,
            is_public=document_set_model.is_public,
            users=[user.id for user in document_set_model.users],
            groups=[group.id for group in document_set_model.groups],
            federated_connectors=[
                FederatedConnectorDescriptor.from_federated_connector_mapping(
                    fc_mapping
                )
                for fc_mapping in document_set_model.federated_connectors
            ],
        )


class DocumentSetSummary(BaseModel):
    """Simplified document set model with minimal data for list views"""

    id: int
    name: str
    description: str | None
    cc_pair_summaries: list[CCPairSummary]
    is_up_to_date: bool
    is_public: bool
    users: list[UUID]
    groups: list[int]
    federated_connector_summaries: list[FederatedConnectorSummary] = Field(
        default_factory=list
    )

    @classmethod
    def from_model(cls, document_set: DocumentSetDBModel) -> "DocumentSetSummary":
        """Create a summary from a DocumentSet database model"""
        return cls(
            id=document_set.id,
            name=document_set.name,
            description=document_set.description,
            cc_pair_summaries=[
                CCPairSummary(
                    id=cc_pair.id,
                    name=cc_pair.name,
                    source=cc_pair.connector.source,
                    access_type=cc_pair.access_type,
                )
                for cc_pair in document_set.connector_credential_pairs
            ],
            is_up_to_date=document_set.is_up_to_date,
            is_public=document_set.is_public,
            users=[user.id for user in document_set.users],
            groups=[group.id for group in document_set.groups],
            federated_connector_summaries=[
                FederatedConnectorSummary(
                    id=fc_mapping.federated_connector_id,
                    name=f"{fc_mapping.federated_connector.source.replace('_', ' ').title()}",
                    source=fc_mapping.federated_connector.source,
                    entities=fc_mapping.entities,
                )
                for fc_mapping in document_set.federated_connectors
                if fc_mapping.federated_connector is not None
            ],
        )


================================================
FILE: backend/onyx/server/features/hierarchy/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.access.hierarchy_access import get_user_external_group_ids
from onyx.auth.users import current_user
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.constants import DocumentSource
from onyx.db.document import get_accessible_documents_for_hierarchy_node_paginated
from onyx.db.engine.sql_engine import get_session
from onyx.db.hierarchy import get_accessible_hierarchy_nodes_for_source
from onyx.db.models import User
from onyx.db.opensearch_migration import get_opensearch_retrieval_state
from onyx.server.features.hierarchy.constants import DOCUMENT_PAGE_SIZE
from onyx.server.features.hierarchy.constants import HIERARCHY_NODE_DOCUMENTS_PATH
from onyx.server.features.hierarchy.constants import HIERARCHY_NODES_LIST_PATH
from onyx.server.features.hierarchy.constants import HIERARCHY_NODES_PREFIX
from onyx.server.features.hierarchy.models import DocumentPageCursor
from onyx.server.features.hierarchy.models import DocumentSortDirection
from onyx.server.features.hierarchy.models import DocumentSortField
from onyx.server.features.hierarchy.models import DocumentSummary
from onyx.server.features.hierarchy.models import HierarchyNodeDocumentsRequest
from onyx.server.features.hierarchy.models import HierarchyNodeDocumentsResponse
from onyx.server.features.hierarchy.models import HierarchyNodesResponse
from onyx.server.features.hierarchy.models import HierarchyNodeSummary

OPENSEARCH_NOT_ENABLED_MESSAGE = "Per-source knowledge selection is coming soon in v3.0! OpenSearch indexing must be enabled to use this feature."

MIGRATION_STATUS_MESSAGE = (
    "Our records indicate that the transition to OpenSearch is still in progress. "
    "OpenSearch retrieval is necessary to use this feature. "
    "You can still use Document Sets, though! "
    "If you would like to manually switch to OpenSearch, "
    'Go to the "Document Index Migration" section in the Admin panel.'
)

router = APIRouter(prefix=HIERARCHY_NODES_PREFIX)


def _require_opensearch(db_session: Session) -> None:
    if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
        raise HTTPException(
            status_code=403,
            detail=OPENSEARCH_NOT_ENABLED_MESSAGE,
        )
    if not get_opensearch_retrieval_state(db_session):
        raise HTTPException(
            status_code=403,
            detail=MIGRATION_STATUS_MESSAGE,
        )


def _get_user_access_info(user: User, db_session: Session) -> tuple[str, list[str]]:
    return user.email, get_user_external_group_ids(db_session, user)


@router.get(HIERARCHY_NODES_LIST_PATH)
def list_accessible_hierarchy_nodes(
    source: DocumentSource,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> HierarchyNodesResponse:
    _require_opensearch(db_session)
    user_email, external_group_ids = _get_user_access_info(user, db_session)
    nodes = get_accessible_hierarchy_nodes_for_source(
        db_session=db_session,
        source=source,
        user_email=user_email,
        external_group_ids=external_group_ids,
    )
    return HierarchyNodesResponse(
        nodes=[
            HierarchyNodeSummary(
                id=node.id,
                title=node.display_name,
                link=node.link,
                parent_id=node.parent_id,
            )
            for node in nodes
        ]
    )


@router.post(HIERARCHY_NODE_DOCUMENTS_PATH)
def list_accessible_hierarchy_node_documents(
    documents_request: HierarchyNodeDocumentsRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> HierarchyNodeDocumentsResponse:
    _require_opensearch(db_session)
    user_email, external_group_ids = _get_user_access_info(user, db_session)
    cursor = documents_request.cursor
    sort_field = documents_request.sort_field
    sort_direction = documents_request.sort_direction

    sort_by_name = sort_field == DocumentSortField.NAME
    sort_ascending = sort_direction == DocumentSortDirection.ASC

    documents = get_accessible_documents_for_hierarchy_node_paginated(
        db_session=db_session,
        parent_hierarchy_node_id=documents_request.parent_hierarchy_node_id,
        user_email=user_email,
        external_group_ids=external_group_ids,
        limit=DOCUMENT_PAGE_SIZE + 1,
        sort_by_name=sort_by_name,
        sort_ascending=sort_ascending,
        cursor_last_modified=cursor.last_modified if cursor else None,
        cursor_last_synced=cursor.last_synced if cursor else None,
        cursor_name=cursor.name if cursor else None,
        cursor_document_id=cursor.document_id if cursor else None,
    )
    document_summaries = [
        DocumentSummary(
            id=document.id,
            title=document.semantic_id,
            link=document.link,
            parent_id=document.parent_hierarchy_node_id,
            last_modified=document.last_modified,
            last_synced=document.last_synced,
        )
        for document in documents[:DOCUMENT_PAGE_SIZE]
    ]
    next_cursor = None
    if len(documents) > DOCUMENT_PAGE_SIZE and document_summaries:
        last_document = document_summaries[-1]
        # For name sorting, we always have a title; for last_updated, we need last_modified
        can_create_cursor = sort_by_name or last_document.last_modified is not None
        if can_create_cursor:
            next_cursor = DocumentPageCursor.from_document(last_document, sort_field)
    return HierarchyNodeDocumentsResponse(
        documents=document_summaries,
        next_cursor=next_cursor,
        sort_field=sort_field,
        sort_direction=sort_direction,
        folder_position=documents_request.folder_position,
    )


================================================
FILE: backend/onyx/server/features/hierarchy/constants.py
================================================
HIERARCHY_NODES_PREFIX = "/hierarchy-nodes"
HIERARCHY_NODES_LIST_PATH = ""
HIERARCHY_NODE_DOCUMENTS_PATH = "/documents"

DOCUMENT_PAGE_SIZE = 50


================================================
FILE: backend/onyx/server/features/hierarchy/models.py
================================================
from datetime import datetime
from enum import Enum

from pydantic import BaseModel

from onyx.configs.constants import DocumentSource
from onyx.server.features.hierarchy.constants import DOCUMENT_PAGE_SIZE


class DocumentSortField(str, Enum):
    NAME = "name"
    LAST_UPDATED = "last_updated"


class DocumentSortDirection(str, Enum):
    ASC = "asc"
    DESC = "desc"


class FolderPosition(str, Enum):
    ON_TOP = "on_top"
    MIXED = "mixed"


class HierarchyNodesRequest(BaseModel):
    source: DocumentSource


class HierarchyNodeSummary(BaseModel):
    id: int
    title: str
    link: str | None
    parent_id: int | None


class HierarchyNodesResponse(BaseModel):
    nodes: list[HierarchyNodeSummary]


class DocumentPageCursor(BaseModel):
    # Fields for last_updated sorting
    last_modified: datetime | None = None
    last_synced: datetime | None = None
    # Field for name sorting
    name: str | None = None
    # Document ID for tie-breaking (always required when cursor is set)
    document_id: str

    @classmethod
    def from_document(
        cls,
        document: "DocumentSummary",
        sort_field: DocumentSortField,
    ) -> "DocumentPageCursor":
        if sort_field == DocumentSortField.NAME:
            return cls(
                name=document.title,
                document_id=document.id,
            )
        # Default: LAST_UPDATED
        return cls(
            last_modified=document.last_modified,
            last_synced=document.last_synced,
            document_id=document.id,
        )


class HierarchyNodeDocumentsRequest(BaseModel):
    parent_hierarchy_node_id: int
    cursor: DocumentPageCursor | None = None
    sort_field: DocumentSortField = DocumentSortField.LAST_UPDATED
    sort_direction: DocumentSortDirection = DocumentSortDirection.DESC
    folder_position: FolderPosition = FolderPosition.ON_TOP


class DocumentSummary(BaseModel):
    id: str
    title: str
    link: str | None
    parent_id: int | None
    last_modified: datetime | None
    last_synced: datetime | None


class HierarchyNodeDocumentsResponse(BaseModel):
    documents: list[DocumentSummary]
    next_cursor: DocumentPageCursor | None
    page_size: int = DOCUMENT_PAGE_SIZE
    sort_field: DocumentSortField = DocumentSortField.LAST_UPDATED
    sort_direction: DocumentSortDirection = DocumentSortDirection.DESC
    folder_position: FolderPosition = FolderPosition.ON_TOP


================================================
FILE: backend/onyx/server/features/hooks/__init__.py
================================================


================================================
FILE: backend/onyx/server/features/input_prompt/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.input_prompt import disable_input_prompt_for_user
from onyx.db.input_prompt import fetch_input_prompt_by_id
from onyx.db.input_prompt import fetch_input_prompts_by_user
from onyx.db.input_prompt import insert_input_prompt
from onyx.db.input_prompt import remove_input_prompt
from onyx.db.input_prompt import remove_public_input_prompt
from onyx.db.input_prompt import update_input_prompt
from onyx.db.models import InputPrompt__User
from onyx.db.models import User
from onyx.server.features.input_prompt.models import CreateInputPromptRequest
from onyx.server.features.input_prompt.models import InputPromptSnapshot
from onyx.server.features.input_prompt.models import UpdateInputPromptRequest
from onyx.utils.logger import setup_logger

logger = setup_logger()

basic_router = APIRouter(prefix="/input_prompt")
admin_router = APIRouter(prefix="/admin/input_prompt")


@basic_router.get("")
def list_input_prompts(
    user: User = Depends(current_user),
    include_public: bool = True,
    db_session: Session = Depends(get_session),
) -> list[InputPromptSnapshot]:
    user_prompts = fetch_input_prompts_by_user(
        user_id=user.id,
        db_session=db_session,
        include_public=include_public,
    )
    return [InputPromptSnapshot.from_model(prompt) for prompt in user_prompts]


@basic_router.get("/{input_prompt_id}")
def get_input_prompt(
    input_prompt_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> InputPromptSnapshot:
    input_prompt = fetch_input_prompt_by_id(
        id=input_prompt_id,
        user_id=user.id,
        db_session=db_session,
    )

    return InputPromptSnapshot.from_model(input_prompt=input_prompt)


@basic_router.post("")
def create_input_prompt(
    create_input_prompt_request: CreateInputPromptRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> InputPromptSnapshot:
    input_prompt = insert_input_prompt(
        prompt=create_input_prompt_request.prompt,
        content=create_input_prompt_request.content,
        is_public=False,
        user=user,
        db_session=db_session,
    )

    input_prompt_user = InputPrompt__User(
        input_prompt_id=input_prompt.id, user_id=user.id
    )
    db_session.add(input_prompt_user)
    db_session.commit()

    return InputPromptSnapshot.from_model(input_prompt)


@basic_router.patch("/{input_prompt_id}")
def patch_input_prompt(
    input_prompt_id: int,
    update_input_prompt_request: UpdateInputPromptRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> InputPromptSnapshot:
    try:
        updated_input_prompt = update_input_prompt(
            user=user,
            input_prompt_id=input_prompt_id,
            prompt=update_input_prompt_request.prompt,
            content=update_input_prompt_request.content,
            active=update_input_prompt_request.active,
            db_session=db_session,
        )
    except ValueError as e:
        error_msg = "Error occurred while updated input prompt"
        logger.warn(f"{error_msg}. Stack trace: {e}")
        raise HTTPException(status_code=404, detail=error_msg)

    return InputPromptSnapshot.from_model(updated_input_prompt)


@basic_router.delete("/{input_prompt_id}")
def delete_input_prompt(
    input_prompt_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
    delete_public: bool = False,
) -> None:
    try:
        remove_input_prompt(
            user, input_prompt_id, db_session, delete_public=delete_public
        )

    except ValueError as e:
        error_msg = "Error occurred while deleting input prompt"
        logger.warn(f"{error_msg}. Stack trace: {e}")
        raise HTTPException(status_code=404, detail=error_msg)


@admin_router.delete("/{input_prompt_id}")
def delete_public_input_prompt(
    input_prompt_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    try:
        remove_public_input_prompt(input_prompt_id, db_session)

    except ValueError as e:
        error_msg = "Error occurred while deleting input prompt"
        logger.warn(f"{error_msg}. Stack trace: {e}")
        raise HTTPException(status_code=404, detail=error_msg)


@basic_router.post("/{input_prompt_id}/hide")
def hide_input_prompt_for_user(
    input_prompt_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    """
    Endpoint that marks a seed (or any) prompt as disabled for the current user,
    so it won't show up in their subsequent queries.
    """
    disable_input_prompt_for_user(input_prompt_id, user.id, db_session)


================================================
FILE: backend/onyx/server/features/input_prompt/models.py
================================================
from uuid import UUID

from pydantic import BaseModel

from onyx.db.models import InputPrompt
from onyx.utils.logger import setup_logger

logger = setup_logger()


class CreateInputPromptRequest(BaseModel):
    prompt: str
    content: str
    is_public: bool


class UpdateInputPromptRequest(BaseModel):
    prompt: str
    content: str
    active: bool


class InputPromptResponse(BaseModel):
    id: int
    prompt: str
    content: str
    active: bool


class InputPromptSnapshot(BaseModel):
    id: int
    prompt: str
    content: str
    active: bool
    user_id: UUID | None
    is_public: bool

    @classmethod
    def from_model(cls, input_prompt: InputPrompt) -> "InputPromptSnapshot":
        return InputPromptSnapshot(
            id=input_prompt.id,
            prompt=input_prompt.prompt,
            content=input_prompt.content,
            active=input_prompt.active,
            user_id=input_prompt.user_id,
            is_public=input_prompt.is_public,
        )


================================================
FILE: backend/onyx/server/features/mcp/api.py
================================================
import asyncio
import base64
import datetime
import hashlib
import json
from collections.abc import Awaitable
from enum import Enum
from secrets import token_urlsafe
from typing import cast
from typing import Literal
from urllib.parse import urlparse

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from mcp.client.auth import OAuthClientProvider
from mcp.client.auth import TokenStorage
from mcp.shared.auth import OAuthClientInformationFull
from mcp.shared.auth import OAuthClientMetadata
from mcp.shared.auth import OAuthToken
from mcp.types import InitializeResult
from mcp.types import Tool as MCPLibTool
from pydantic import AnyUrl
from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.auth.schemas import UserRole
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.db.engine.sql_engine import get_session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import MCPAuthenticationPerformer
from onyx.db.enums import MCPAuthenticationType
from onyx.db.enums import MCPServerStatus
from onyx.db.enums import MCPTransport
from onyx.db.mcp import create_connection_config
from onyx.db.mcp import create_mcp_server__no_commit
from onyx.db.mcp import delete_all_user_connection_configs_for_server_no_commit
from onyx.db.mcp import delete_connection_config
from onyx.db.mcp import delete_mcp_server
from onyx.db.mcp import delete_user_connection_configs_for_server
from onyx.db.mcp import extract_connection_data
from onyx.db.mcp import get_all_mcp_servers
from onyx.db.mcp import get_connection_config_by_id
from onyx.db.mcp import get_mcp_server_by_id
from onyx.db.mcp import get_mcp_servers_for_persona
from onyx.db.mcp import get_server_auth_template
from onyx.db.mcp import get_user_connection_config
from onyx.db.mcp import update_connection_config
from onyx.db.mcp import update_mcp_server__no_commit
from onyx.db.mcp import upsert_user_connection_config
from onyx.db.models import MCPConnectionConfig
from onyx.db.models import MCPServer as DbMCPServer
from onyx.db.models import Tool
from onyx.db.models import User
from onyx.db.tools import create_tool__no_commit
from onyx.db.tools import delete_tool__no_commit
from onyx.db.tools import get_tools_by_mcp_server_id
from onyx.redis.redis_pool import get_redis_client
from onyx.server.features.mcp.models import MCPApiKeyResponse
from onyx.server.features.mcp.models import MCPAuthTemplate
from onyx.server.features.mcp.models import MCPConnectionData
from onyx.server.features.mcp.models import MCPOAuthCallbackResponse
from onyx.server.features.mcp.models import MCPOAuthKeys
from onyx.server.features.mcp.models import MCPServer
from onyx.server.features.mcp.models import MCPServerCreateResponse
from onyx.server.features.mcp.models import MCPServerSimpleCreateRequest
from onyx.server.features.mcp.models import MCPServerSimpleUpdateRequest
from onyx.server.features.mcp.models import MCPServersResponse
from onyx.server.features.mcp.models import MCPServerUpdateResponse
from onyx.server.features.mcp.models import MCPToolCreateRequest
from onyx.server.features.mcp.models import MCPToolListResponse
from onyx.server.features.mcp.models import MCPToolUpdateRequest
from onyx.server.features.mcp.models import MCPUserCredentialsRequest
from onyx.server.features.mcp.models import MCPUserOAuthConnectRequest
from onyx.server.features.mcp.models import MCPUserOAuthConnectResponse
from onyx.server.features.tool.models import ToolSnapshot
from onyx.tools.tool_implementations.mcp.mcp_client import discover_mcp_tools
from onyx.tools.tool_implementations.mcp.mcp_client import initialize_mcp_client
from onyx.tools.tool_implementations.mcp.mcp_client import log_exception_group
from onyx.utils.encryption import mask_string
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _truncate_description(description: str | None, max_length: int = 500) -> str:
    """Truncate description to max_length characters, adding ellipsis if truncated."""
    if not description:
        return ""
    if len(description) <= max_length:
        return description
    return description[: max_length - 3] + "..."


router = APIRouter(prefix="/mcp")
admin_router = APIRouter(prefix="/admin/mcp")
STATE_TTL_SECONDS = 60 * 5  # 5 minutes
OAUTH_WAIT_SECONDS = 30  # Give the user 30 seconds to complete the OAuth flow
UNUSED_RETURN_PATH = "unused_path"

HEADER_SUBSTITUTIONS: Literal["header_substitutions"] = "header_substitutions"


def key_auth_url(user_id: str) -> str:
    return f"mcp:oauth:{user_id}:auth_url"


def key_state(user_id: str) -> str:
    return f"mcp:oauth:{user_id}:state"


def key_code(user_id: str, state: str) -> str:
    return f"mcp:oauth:{user_id}:{state}:codes"


def key_tokens(user_id: str) -> str:
    return f"mcp:oauth:{user_id}:tokens"


def key_client_info(user_id: str) -> str:
    return f"mcp:oauth:{user_id}:client_info"


REQUESTED_SCOPE: str | None = None


class OnyxTokenStorage(TokenStorage):
    """
    store auth info in a particular user's connection config in postgres
    """

    def __init__(self, connection_config_id: int, alt_config_id: int | None = None):
        self.alt_config_id = alt_config_id
        self.connection_config_id = connection_config_id

    def _ensure_connection_config(self, db_session: Session) -> MCPConnectionConfig:
        config = get_connection_config_by_id(self.connection_config_id, db_session)
        if config is None:
            raise HTTPException(status_code=404, detail="Connection config not found")
        return config

    async def get_tokens(self) -> OAuthToken | None:
        with get_session_with_current_tenant() as db_session:
            config = self._ensure_connection_config(db_session)
            config_data = extract_connection_data(config)
            tokens_raw = config_data.get(MCPOAuthKeys.TOKENS.value)
            if tokens_raw:
                return OAuthToken.model_validate(tokens_raw)
            return None

    async def set_tokens(self, tokens: OAuthToken) -> None:
        with get_session_with_current_tenant() as db_session:
            config = self._ensure_connection_config(db_session)
            config_data = extract_connection_data(config)
            config_data[MCPOAuthKeys.TOKENS.value] = tokens.model_dump(mode="json")
            config_data["headers"] = {
                "Authorization": f"{tokens.token_type} {tokens.access_token}"
            }
            update_connection_config(config.id, db_session, config_data)
            if self.alt_config_id:
                update_connection_config(self.alt_config_id, db_session, config_data)

                # signal the oauth callback that token exchange is complete
                r = get_redis_client()
                r.rpush(key_tokens(str(self.alt_config_id)), tokens.model_dump_json())
                r.expire(key_tokens(str(self.alt_config_id)), OAUTH_WAIT_SECONDS)

    async def get_client_info(self) -> OAuthClientInformationFull | None:
        with get_session_with_current_tenant() as db_session:
            config = self._ensure_connection_config(db_session)
            config_data = extract_connection_data(config)
            client_info_raw = config_data.get(MCPOAuthKeys.CLIENT_INFO.value)
            if client_info_raw:
                return OAuthClientInformationFull.model_validate(client_info_raw)
            if self.alt_config_id:
                alt_config = get_connection_config_by_id(self.alt_config_id, db_session)
                if alt_config:
                    alt_config_data = extract_connection_data(alt_config)
                    alt_client_info = alt_config_data.get(
                        MCPOAuthKeys.CLIENT_INFO.value
                    )
                    if alt_client_info:
                        # Cache the admin client info on the user config for future calls
                        config_data[MCPOAuthKeys.CLIENT_INFO.value] = alt_client_info
                        update_connection_config(config.id, db_session, config_data)
                        return OAuthClientInformationFull.model_validate(
                            alt_client_info
                        )
            return None

    async def set_client_info(self, info: OAuthClientInformationFull) -> None:
        with get_session_with_current_tenant() as db_session:
            config = self._ensure_connection_config(db_session)
            config_data = extract_connection_data(config)
            config_data[MCPOAuthKeys.CLIENT_INFO.value] = info.model_dump(mode="json")
            update_connection_config(config.id, db_session, config_data)
            if self.alt_config_id:
                update_connection_config(self.alt_config_id, db_session, config_data)


def make_oauth_provider(
    mcp_server: DbMCPServer,
    user_id: str,
    return_path: str,
    connection_config_id: int,
    admin_config_id: int | None,
) -> OAuthClientProvider:
    async def redirect_handler(auth_url: str) -> None:
        if return_path == UNUSED_RETURN_PATH:
            raise ValueError("Please Reconnect to the server")
        r = get_redis_client()
        # The SDK generated & embedded 'state' in the auth_url; extract & store it.
        parsed = urlparse(auth_url)
        qs = dict([p.split("=", 1) for p in parsed.query.split("&") if "=" in p])
        state = qs.get("state")
        if not state:
            # Defensive: some providers encode state differently; adapt if needed.
            raise RuntimeError("Missing state in authorization_url")

        # Save for the frontend & for callback validation
        state_obj = MCPOauthState(
            server_id=mcp_server.id,
            return_path=return_path,
            is_admin=admin_config_id is not None,
            state=state,
        )
        r.rpush(key_auth_url(user_id), auth_url)
        r.expire(key_auth_url(user_id), OAUTH_WAIT_SECONDS)
        r.set(key_state(user_id), state_obj.model_dump_json(), ex=STATE_TTL_SECONDS)

        # Return immediately; the HTTP layer will read the stored URL and send it to the browser.

    async def callback_handler() -> tuple[str, str | None]:
        r = get_redis_client()
        # Wait up to TTL for the code published by the /oauth/callback route
        state = r.get(key_state(user_id))
        if isinstance(state, Awaitable):
            state = await state
        if not state:
            raise RuntimeError("No pending OAuth state for user")
        state_obj = MCPOauthState.model_validate_json(state)

        # Block on Redis for (code, state). BLPOP returns (key, value).
        key = key_code(user_id, state_obj.state)

        # requests CAN block here for up to a minute if the user doesn't resolve the OAuth flow
        # Run the blocking blpop operation in a thread pool to avoid blocking the event loop
        loop = asyncio.get_running_loop()
        pop = await loop.run_in_executor(
            None, lambda: r.blpop([key], timeout=OAUTH_WAIT_SECONDS)
        )
        # TODO: gracefully handle "user says no"
        if not pop:
            raise RuntimeError("Timed out waiting for OAuth callback")

        code_state_bytes = cast(tuple[bytes, bytes], pop)

        code_state_dict = json.loads(code_state_bytes[1].decode())

        code = code_state_dict["code"]

        if code_state_dict["state"] != state_obj.state:
            raise RuntimeError("Invalid state in OAuth callback")

        # Optional: cleanup
        r.delete(key_auth_url(user_id), key_state(user_id))
        return code, state_obj.state

    return OAuthClientProvider(
        server_url=mcp_server.server_url,
        client_metadata=OAuthClientMetadata(
            client_name=f"Onyx - {mcp_server.name}",
            redirect_uris=[AnyUrl(f"{WEB_DOMAIN}/mcp/oauth/callback")],
            grant_types=["authorization_code", "refresh_token"],
            response_types=["code"],
            scope=REQUESTED_SCOPE,  # TODO: do we need to pass this in? maybe make configurable
        ),
        storage=OnyxTokenStorage(connection_config_id, admin_config_id),
        redirect_handler=redirect_handler,
        callback_handler=callback_handler,
    )


def _build_headers_from_template(
    template_data: MCPAuthTemplate, credentials: dict[str, str], user_email: str
) -> dict[str, str]:
    """Build headers dict from template and credentials"""
    headers = {}
    template_headers = template_data.headers

    for name, value_template in template_headers.items():
        # Replace placeholders
        value = value_template
        for key, cred_value in credentials.items():
            value = value.replace(f"{{{key}}}", cred_value)
        value = value.replace("{user_email}", user_email)

        if name:
            headers[name] = value

    return headers


def test_mcp_server_credentials(
    server_url: str,
    connection_headers: dict[str, str] | None,
    auth: OAuthClientProvider | None,
    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,
) -> tuple[bool, str]:
    """Test if credentials work by calling the MCP server's tools/list endpoint"""
    try:
        # Attempt to discover tools using the provided credentials
        tools = discover_mcp_tools(
            server_url, connection_headers, transport=transport, auth=auth
        )

        if (
            tools is not None and len(tools) >= 0
        ):  # Even 0 tools is a successful connection
            return True, f"Successfully connected. Found {len(tools)} tools."
        else:
            return False, "Failed to retrieve tools list from server."

    except Exception as e:
        logger.error(f"Failed to test MCP server credentials: {e}")
        return False, f"Connection failed: {str(e)}"


def b64url(b: bytes) -> str:
    return base64.urlsafe_b64encode(b).rstrip(b"=").decode("ascii")


def make_pkce_pair() -> tuple[str, str]:
    verifier = b64url(token_urlsafe(64).encode())
    challenge = b64url(hashlib.sha256(verifier.encode("ascii")).digest())
    return verifier, challenge


class MCPOauthState(BaseModel):
    server_id: int
    return_path: str
    is_admin: bool
    state: str


@admin_router.post("/oauth/connect", response_model=MCPUserOAuthConnectResponse)
async def connect_admin_oauth(
    request: MCPUserOAuthConnectRequest,
    db: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> MCPUserOAuthConnectResponse:
    """Connect OAuth flow for admin MCP server authentication"""
    return await _connect_oauth(request, db, is_admin=True, user=user)


@router.post("/oauth/connect", response_model=MCPUserOAuthConnectResponse)
async def connect_user_oauth(
    request: MCPUserOAuthConnectRequest,
    db: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> MCPUserOAuthConnectResponse:
    return await _connect_oauth(request, db, is_admin=False, user=user)


async def _connect_oauth(
    request: MCPUserOAuthConnectRequest,
    db: Session,
    is_admin: bool,
    user: User,
) -> MCPUserOAuthConnectResponse:
    """Connect OAuth flow for per-user MCP server authentication"""

    logger.info(f"Initiating per-user OAuth for server: {request.server_id}")

    try:
        server_id = int(request.server_id)
        mcp_server = get_mcp_server_by_id(server_id, db)
    except Exception:
        raise HTTPException(status_code=404, detail="MCP server not found")

    if is_admin:
        _ensure_mcp_server_owner_or_admin(mcp_server, user)

    if mcp_server.auth_type != MCPAuthenticationType.OAUTH:
        auth_type_str = mcp_server.auth_type.value if mcp_server.auth_type else "None"
        raise HTTPException(
            status_code=400,
            detail=f"Server was configured with authentication type {auth_type_str}",
        )

    # Create admin config with client info if provided
    config_data = MCPConnectionData(headers={})
    if request.oauth_client_id and request.oauth_client_secret:
        client_info = OAuthClientInformationFull(
            client_id=request.oauth_client_id,
            client_secret=request.oauth_client_secret,
            redirect_uris=[AnyUrl(f"{WEB_DOMAIN}/mcp/oauth/callback")],
            grant_types=["authorization_code", "refresh_token"],
            response_types=["code"],
            scope=REQUESTED_SCOPE,  # TODO: allow specifying scopes?
            # Must specify auth method so client_secret is actually sent during token exchange
            token_endpoint_auth_method="client_secret_post",
        )
        config_data[MCPOAuthKeys.CLIENT_INFO.value] = client_info.model_dump(
            mode="json"
        )

    if mcp_server.admin_connection_config_id is None:
        if not is_admin:
            raise HTTPException(
                status_code=400,
                detail="Admin connection config not found for this server",
            )

        admin_config = create_connection_config(
            config_data=config_data,
            mcp_server_id=mcp_server.id,
            user_email="",
            db_session=db,
        )
        mcp_server.admin_connection_config = admin_config
        mcp_server.admin_connection_config_id = (
            admin_config.id
        )  # might not have to do this
    elif is_admin:  # only update admin config if we're an admin
        update_connection_config(mcp_server.admin_connection_config_id, db, config_data)

    connection_config = get_user_connection_config(mcp_server.id, user.email, db)

    if connection_config is None:
        connection_config = create_connection_config(
            config_data=config_data,
            mcp_server_id=mcp_server.id,
            user_email=user.email,
            db_session=db,
        )
    else:
        update_connection_config(connection_config.id, db, config_data)

    db.commit()

    connection_config_dict = extract_connection_data(
        connection_config, apply_mask=False
    )
    is_connected = (
        MCPOAuthKeys.CLIENT_INFO.value in connection_config_dict
        and connection_config_dict.get("headers")
    )
    # Step 1: make unauthenticated request and parse returned www authenticate header
    # Ensure we have a trailing slash for the MCP endpoint

    if mcp_server.transport is None:
        raise HTTPException(
            status_code=400,
            detail="MCP server transport is not configured",
        )

    # always make a http request for the initial probe
    transport = mcp_server.transport if is_connected else MCPTransport.STREAMABLE_HTTP
    probe_url = mcp_server.server_url
    logger.info(f"Probing OAuth server at: {probe_url}")

    oauth_auth = make_oauth_provider(
        mcp_server,
        str(user.id),
        request.return_path,
        connection_config.id,
        mcp_server.admin_connection_config_id,
    )

    # start the oauth handshake in the background
    # the background task will block on the callback handler after setting
    # the auth_url for us to send to the frontend. The callback handler waits for
    # the auth code to be available in redis; this code gets set by our callback endpoint
    # which is called by the frontend after the user goes through the login flow.
    async def tmp_func() -> InitializeResult:
        try:
            x = await initialize_mcp_client(
                probe_url,
                connection_headers=connection_config_dict.get("headers", {}),
                transport=transport,
                auth=oauth_auth,
            )
            logger.info(f"OAuth initialization completed successfully: {x}")
            return x
        except Exception:
            logger.exception("OAuth initialization failed")
            raise

    init_task = asyncio.create_task(tmp_func())

    # Wait for whichever happens first:
    # 1) The OAuth redirect URL becomes available in Redis (we should return it)
    # 2) The initialize task completes (tokens already valid) — return to the provided return_path
    r = get_redis_client()
    loop = asyncio.get_running_loop()

    async def wait_auth_url() -> str | None:
        raw = await loop.run_in_executor(
            None,
            lambda: r.blpop([key_auth_url(str(user.id))], timeout=OAUTH_WAIT_SECONDS),
        )
        if raw is None:
            return None
        tup = cast(tuple[bytes, bytes], raw)
        return tup[1].decode()

    auth_task = None if is_connected else asyncio.create_task(wait_auth_url())

    done, pending = await asyncio.wait(
        [init_task] + ([auth_task] if auth_task else []),
        return_when=asyncio.FIRST_COMPLETED,
    )

    # If we got an auth URL first, return it
    if auth_task in done:
        oauth_url = await auth_task
        # If no URL was retrieved within the timeout, treat as error
        if not oauth_url:
            # If initialization also finished, treat as already authenticated
            if init_task.done() and not init_task.cancelled():
                try:
                    init_result = init_task.result()
                    logger.info(
                        f"OAuth initialization completed during timeout: {init_result}"
                    )
                    return MCPUserOAuthConnectResponse(
                        server_id=int(request.server_id),
                        oauth_url=request.return_path,
                    )
                except Exception as e:
                    logger.error(f"OAuth initialization failed during timeout: {e}")
                    raise HTTPException(
                        status_code=400, detail=f"OAuth initialization failed: {str(e)}"
                    )
            raise HTTPException(status_code=400, detail="Auth URL retrieval timed out")

        logger.info(
            f"Connected to auth url: {oauth_url} for mcp server: {mcp_server.name}"
        )
        return MCPUserOAuthConnectResponse(
            server_id=int(request.server_id), oauth_url=oauth_url
        )

    # Otherwise, initialization finished first — no redirect needed; go back to return_path
    for t in pending:
        t.cancel()
    try:
        init_result = init_task.result()
        logger.info(f"OAuth initialization completed without redirect: {init_result}")
    except Exception as e:
        if isinstance(e, ExceptionGroup):
            saved_e = log_exception_group(e)
        else:
            saved_e = e
        logger.error(f"OAuth initialization failed: {saved_e}")
        # If initialize failed and we also didn't get an auth URL, surface an error
        raise HTTPException(
            status_code=400, detail=f"Failed to initialize OAuth client: {str(saved_e)}"
        )

    return MCPUserOAuthConnectResponse(
        server_id=int(request.server_id),
        oauth_url=request.return_path,
    )


@router.post("/oauth/callback", response_model=MCPOAuthCallbackResponse)
async def process_oauth_callback(
    request: Request,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> MCPOAuthCallbackResponse:
    """Complete OAuth flow by exchanging code for tokens and storing them.

    Notes:
    - For demo/test servers (like run_mcp_server_oauth.py), the token endpoint
      and parameters may be fixed. In production, use the server's metadata
      (e.g., well-known endpoints) to discover token URL and scopes.
    """

    # Get callback data from query parameters (like federated OAuth does)
    callback_data = dict(request.query_params)

    redis_client = get_redis_client()
    state = callback_data.get("state")
    code = callback_data.get("code")
    user_id = str(user.id)
    if not state:
        raise HTTPException(status_code=400, detail="Missing state parameter")
    if not code:
        raise HTTPException(status_code=400, detail="Missing code parameter")
    stored_data = cast(bytes, redis_client.get(key_state(user_id)))
    if not stored_data:
        raise HTTPException(
            status_code=400, detail="Invalid or expired state parameter"
        )
    state_data = MCPOauthState.model_validate_json(stored_data)
    try:
        server_id = state_data.server_id
        mcp_server = get_mcp_server_by_id(server_id, db_session)
    except Exception:
        raise HTTPException(status_code=404, detail="MCP server not found")

    user_id = str(user.id)

    r = get_redis_client()

    # Unblock the callback_handler in the asyncio background task
    r.rpush(key_code(user_id, state), json.dumps({"code": code, "state": state}))
    r.expire(key_code(user_id, state), OAUTH_WAIT_SECONDS)

    admin_config = mcp_server.admin_connection_config
    if admin_config is None:
        raise HTTPException(
            status_code=400,
            detail="Server referenced by callback is not configured, try recreating",
        )

    # Run the blocking blpop operation in a thread pool to avoid blocking the event loop
    # Wait until set_tokens is called
    admin_config_id = admin_config.id
    loop = asyncio.get_running_loop()
    tokens_raw = await loop.run_in_executor(
        None,
        lambda: r.blpop([key_tokens(str(admin_config_id))], timeout=OAUTH_WAIT_SECONDS),
    )
    if tokens_raw is None:
        raise HTTPException(status_code=400, detail="No tokens found")
    tokens_bytes = cast(tuple[bytes, bytes], tokens_raw)
    tokens = OAuthToken.model_validate_json(tokens_bytes[1].decode())

    if not tokens.access_token:
        raise HTTPException(status_code=400, detail="No access_token in OAuth response")

    db_session.commit()

    logger.info(
        f"server_id={str(mcp_server.id)} server_name={mcp_server.name} return_path={state_data.return_path}"
    )

    return MCPOAuthCallbackResponse(
        success=True,
        server_id=mcp_server.id,
        server_name=mcp_server.name,
        message=f"OAuth authorization completed successfully for {mcp_server.name}",
        redirect_url=state_data.return_path,
    )


@router.post("/user-credentials", response_model=MCPApiKeyResponse)
def save_user_credentials(
    request: MCPUserCredentialsRequest,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> MCPApiKeyResponse:
    """Save user credentials for template-based MCP server authentication"""

    logger.info(f"Saving user credentials for server: {request.server_id}")

    try:
        server_id = request.server_id
        mcp_server = get_mcp_server_by_id(server_id, db_session)
    except Exception:
        raise HTTPException(status_code=404, detail="MCP server not found")

    if mcp_server.auth_type == "none":
        raise HTTPException(
            status_code=400,
            detail="Server does not require authentication",
        )

    email = user.email

    # Get the authentication template for this server
    auth_template = get_server_auth_template(server_id, db_session)
    if not auth_template:
        # Fallback to simple API key storage for servers without templates
        if "api_key" not in request.credentials:
            raise HTTPException(
                status_code=400,
                detail="No authentication template found and no api_key provided",
            )
        config_data = MCPConnectionData(
            headers={"Authorization": f"Bearer {request.credentials['api_key']}"},
        )
    else:
        # Use template to create the full connection config
        try:
            # TODO: fix and/or type correctly w/base model
            auth_template_dict = extract_connection_data(
                auth_template, apply_mask=False
            )
            config_data = MCPConnectionData(
                headers=auth_template_dict.get("headers", {}),
                header_substitutions=request.credentials,
            )
            for oauth_field_key in MCPOAuthKeys:
                field_key: Literal["client_info", "tokens", "metadata"] = (
                    oauth_field_key.value
                )
                if field_val := auth_template_dict.get(field_key):
                    config_data[field_key] = field_val

        except Exception as e:
            logger.error(f"Failed to process authentication template: {e}")
            raise HTTPException(
                status_code=400,
                detail=f"Failed to process authentication template: {str(e)}",
            )

    # Test the credentials before saving
    validation_tested = False
    validation_message = "Credentials saved successfully"

    try:
        auth = None
        if mcp_server.auth_type == MCPAuthenticationType.OAUTH:
            # should only be saving user creds if an admin config exists
            assert mcp_server.admin_connection_config_id is not None
            auth = make_oauth_provider(
                mcp_server,
                email,
                UNUSED_RETURN_PATH,
                mcp_server.admin_connection_config_id,
                None,
            )

        if HEADER_SUBSTITUTIONS in config_data:
            for key, value in config_data[HEADER_SUBSTITUTIONS].items():
                for k, v in config_data["headers"].items():
                    config_data["headers"][k] = v.replace(f"{{{key}}}", value)

        server_url = mcp_server.server_url
        is_valid, test_message = test_mcp_server_credentials(
            server_url,
            config_data["headers"],
            transport=MCPTransport(request.transport.replace("-", "_").upper()),
            auth=auth,
        )
        validation_tested = True

        if not is_valid:
            raise HTTPException(
                status_code=400,
                detail=f"Credentials validation failed: {test_message}",
            )
        else:
            validation_message = (
                f"Credentials saved and validated successfully. {test_message}"
            )

    except HTTPException:
        raise  # Re-raise HTTP exceptions
    except Exception as e:
        logger.warning(
            f"Could not validate credentials for server {mcp_server.name}: {e}"
        )
        validation_message = "Credentials saved but could not be validated"

    try:
        # Save the processed credentials
        upsert_user_connection_config(
            server_id=server_id,
            user_email=email,
            config_data=config_data,
            db_session=db_session,
        )

        logger.info(
            f"User credentials saved for server {mcp_server.name} and user {email}"
        )
        db_session.commit()

        return MCPApiKeyResponse(
            success=True,
            message=validation_message,
            server_id=request.server_id,
            server_name=mcp_server.name,
            authenticated=True,
            validation_tested=validation_tested,
        )

    except Exception as e:
        logger.error(f"Failed to save user credentials: {e}")
        raise HTTPException(status_code=500, detail="Failed to save user credentials")


class MCPToolDescription(BaseModel):
    id: int
    name: str
    display_name: str
    description: str


class ServerToolsResponse(BaseModel):
    server_id: int
    server_name: str
    server_url: str
    tools: list[MCPToolDescription]


def _ensure_mcp_server_owner_or_admin(server: DbMCPServer, user: User) -> None:
    logger.info(
        f"Ensuring MCP server owner or admin: {server.name} {user} {user.role} server.owner={server.owner}"
    )
    if user.role == UserRole.ADMIN:
        return

    logger.info(f"User email: {user.email} server.owner={server.owner}")
    if server.owner != user.email:
        raise HTTPException(
            status_code=403,
            detail="Curators can only modify MCP servers that they have created.",
        )


def _db_mcp_server_to_api_mcp_server(
    db_server: DbMCPServer,
    db: Session,
    request_user: User | None,
    include_auth_config: bool = False,
) -> MCPServer:
    """Convert database MCP server to API model"""

    email = request_user.email if request_user else ""

    # Check if user has authentication configured and extract credentials
    auth_performer = db_server.auth_performer
    user_authenticated: bool | None = None
    user_credentials = None
    admin_credentials = None
    can_view_admin_credentials = bool(include_auth_config) and (
        request_user is not None
        and (
            request_user.role == UserRole.ADMIN
            or (request_user.email and request_user.email == db_server.owner)
        )
    )
    if db_server.auth_type == MCPAuthenticationType.NONE:
        user_authenticated = True  # No auth required
    elif auth_performer == MCPAuthenticationPerformer.ADMIN:
        user_authenticated = db_server.admin_connection_config is not None
        if (
            can_view_admin_credentials
            and db_server.admin_connection_config is not None
            and include_auth_config
        ):
            admin_config_dict = extract_connection_data(
                db_server.admin_connection_config, apply_mask=False
            )
            if db_server.auth_type == MCPAuthenticationType.API_TOKEN:
                raw_api_key = admin_config_dict["headers"]["Authorization"].split(" ")[
                    -1
                ]
                admin_credentials = {
                    "api_key": mask_string(raw_api_key),
                }
            elif db_server.auth_type == MCPAuthenticationType.OAUTH:
                user_authenticated = False
                client_info = None
                client_info_raw = admin_config_dict.get(MCPOAuthKeys.CLIENT_INFO.value)
                if client_info_raw:
                    client_info = OAuthClientInformationFull.model_validate(
                        client_info_raw
                    )
                if client_info:
                    if not client_info.client_id:
                        raise ValueError("Stored client info had empty client ID")
                    admin_credentials = {
                        "client_id": mask_string(client_info.client_id),
                    }
                    if client_info.client_secret:
                        admin_credentials["client_secret"] = mask_string(
                            client_info.client_secret
                        )
                else:
                    admin_credentials = {}
                    logger.warning(
                        f"No admin client info found for server {db_server.name}"
                    )
    else:  # currently: per user auth using api key OR oauth
        user_config = get_user_connection_config(db_server.id, email, db)
        user_authenticated = user_config is not None

        if user_authenticated and user_config:
            # Avoid hitting the MCP server when assembling response data.
            if (
                include_auth_config
                and db_server.auth_type != MCPAuthenticationType.OAUTH
            ):
                user_config_dict = extract_connection_data(user_config, apply_mask=True)
                user_credentials = user_config_dict.get(HEADER_SUBSTITUTIONS, {})

        if (
            db_server.auth_type == MCPAuthenticationType.OAUTH
            and db_server.admin_connection_config
        ):
            client_info = None
            oauth_admin_config_dict = extract_connection_data(
                db_server.admin_connection_config, apply_mask=False
            )
            client_info_raw = oauth_admin_config_dict.get(
                MCPOAuthKeys.CLIENT_INFO.value
            )
            if client_info_raw:
                client_info = OAuthClientInformationFull.model_validate(client_info_raw)
            if client_info:
                if not client_info.client_id:
                    raise ValueError("Stored client info had empty client ID")
                if can_view_admin_credentials:
                    admin_credentials = {
                        "client_id": mask_string(client_info.client_id),
                    }
                    if client_info.client_secret:
                        admin_credentials["client_secret"] = mask_string(
                            client_info.client_secret
                        )
            elif can_view_admin_credentials:
                admin_credentials = {}
                logger.warning(f"No client info found for server {db_server.name}")

    # Get auth template if this is a per-user auth server
    auth_template = None
    if auth_performer == MCPAuthenticationPerformer.PER_USER:
        try:
            template_config = db_server.admin_connection_config
            if template_config:
                template_config_dict = extract_connection_data(
                    template_config, apply_mask=False
                )
                headers = template_config_dict.get("headers", {})
                auth_template = MCPAuthTemplate(
                    headers=headers,
                    required_fields=[],  # would need to regex, not worth it
                )
        except Exception as e:
            logger.warning(
                f"Failed to parse auth template for server {db_server.name}: {e}"
            )

    is_authenticated: bool = (
        db_server.auth_type == MCPAuthenticationType.NONE.value
        # Pass-through OAuth: user is authenticated via their login OAuth token
        or db_server.auth_type == MCPAuthenticationType.PT_OAUTH
        or (
            auth_performer == MCPAuthenticationPerformer.ADMIN
            and db_server.auth_type != MCPAuthenticationType.OAUTH
            and db_server.admin_connection_config_id is not None
        )
        or (
            auth_performer == MCPAuthenticationPerformer.PER_USER and user_authenticated
        )
    )

    # Calculate tool count from the relationship
    tool_count = len(db_server.current_actions) if db_server.current_actions else 0

    return MCPServer(
        id=db_server.id,
        name=db_server.name,
        description=db_server.description,
        server_url=db_server.server_url,
        owner=db_server.owner,
        transport=db_server.transport,
        auth_type=db_server.auth_type,
        auth_performer=auth_performer,
        is_authenticated=is_authenticated,
        user_authenticated=user_authenticated,
        status=db_server.status,
        last_refreshed_at=db_server.last_refreshed_at,
        tool_count=tool_count,
        auth_template=auth_template,
        user_credentials=user_credentials,
        admin_credentials=admin_credentials,
    )


@router.get("/servers/persona/{assistant_id}", response_model=MCPServersResponse)
def get_mcp_servers_for_assistant(
    assistant_id: str,
    db: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> MCPServersResponse:
    """Get MCP servers for an assistant"""

    logger.info(f"Fetching MCP servers for assistant: {assistant_id}")

    try:
        persona_id = int(assistant_id)
        db_mcp_servers = get_mcp_servers_for_persona(persona_id, db, user)

        # Convert to API model format with opportunistic token refresh for OAuth
        mcp_servers = [
            _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
            for db_server in db_mcp_servers
        ]

        return MCPServersResponse(assistant_id=assistant_id, mcp_servers=mcp_servers)

    except ValueError:
        raise HTTPException(status_code=400, detail="Invalid assistant ID")
    except Exception as e:
        logger.error(f"Failed to fetch MCP servers: {e}")
        raise HTTPException(status_code=500, detail="Failed to fetch MCP servers")


@router.get("/servers", response_model=MCPServersResponse)
def get_mcp_servers_for_user(
    db: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> MCPServersResponse:
    """List all MCP servers for use in agent configuration and chat UI.

    This endpoint is intentionally available to all authenticated users so they
    can attach MCP actions to assistants. Sensitive admin credentials are never
    returned.
    """
    db_mcp_servers = get_all_mcp_servers(db)
    mcp_servers = [
        _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
        for db_server in db_mcp_servers
    ]
    return MCPServersResponse(mcp_servers=mcp_servers)


def _get_connection_config(
    mcp_server: DbMCPServer,
    is_admin: bool,  # noqa: ARG001
    user: User,
    db_session: Session,
) -> MCPConnectionConfig | None:
    """
    Get the connection config for an MCP server.
    is_admin is true when we want the config used for the admin panel

    """
    if mcp_server.auth_type == MCPAuthenticationType.NONE:
        return None

    # Pass-through OAuth uses the user's login OAuth token, not a stored config
    if mcp_server.auth_type == MCPAuthenticationType.PT_OAUTH:
        return None

    if (
        mcp_server.auth_type == MCPAuthenticationType.API_TOKEN
        and mcp_server.auth_performer == MCPAuthenticationPerformer.ADMIN
    ):
        connection_config = mcp_server.admin_connection_config
    else:
        connection_config = get_user_connection_config(
            server_id=mcp_server.id, user_email=user.email, db_session=db_session
        )

    if not connection_config:
        raise HTTPException(
            status_code=401,
            detail="Authentication required for this MCP server",
        )

    return connection_config


@admin_router.get("/server/{server_id}/tools")
def admin_list_mcp_tools_by_id(
    server_id: int,
    db: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> MCPToolListResponse:
    return _list_mcp_tools_by_id(server_id, db, True, user)


class ToolSnapshotSource(str, Enum):
    DB = "db"
    MCP = "mcp"


@admin_router.get("/server/{server_id}/tools/snapshots")
def get_mcp_server_tools_snapshots(
    server_id: int,
    source: ToolSnapshotSource = ToolSnapshotSource.DB,
    db: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> list[ToolSnapshot]:
    """
    Get tools for an MCP server as ToolSnapshot objects.

    Query Parameters:
    - source: "db" (default) - fetch from database only, "mcp" - discover from MCP server and sync to DB

    Returns: List of ToolSnapshot objects
    """
    from onyx.db.tools import get_tools_by_mcp_server_id

    try:
        # Verify the server exists
        mcp_server = get_mcp_server_by_id(server_id, db)
    except ValueError:
        raise HTTPException(status_code=404, detail="MCP server not found")

    _ensure_mcp_server_owner_or_admin(mcp_server, user)

    if source == ToolSnapshotSource.MCP:
        try:
            # Discover tools from MCP server and sync to DB
            _list_mcp_tools_by_id(server_id, db, True, user)

            # Successfully discovered tools, update status to CONNECTED
            update_mcp_server__no_commit(
                server_id=server_id,
                db_session=db,
                status=MCPServerStatus.CONNECTED,
                last_refreshed_at=datetime.datetime.now(datetime.timezone.utc),
            )
            db.commit()
        except Exception as e:
            update_mcp_server__no_commit(
                server_id=server_id,
                db_session=db,
                status=MCPServerStatus.AWAITING_AUTH,
            )
            db.commit()

            if isinstance(e, HTTPException):
                # Re-raise HTTP exceptions (e.g. 401, 400) so they are returned to client
                raise e

            logger.error(f"Failed to discover tools for MCP server: {e}")
            raise HTTPException(status_code=500, detail="Failed to discover tools")

    # Fetch and return tools from database
    mcp_tools = get_tools_by_mcp_server_id(server_id, db, order_by_id=True)
    return [ToolSnapshot.from_model(tool) for tool in mcp_tools]


@router.get("/server/{server_id}/tools")
def user_list_mcp_tools_by_id(
    server_id: int,
    db: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> MCPToolListResponse:
    return _list_mcp_tools_by_id(server_id, db, False, user)


def _upsert_db_tools(
    discovered_tools: list[MCPLibTool],
    existing_by_name: dict[str, Tool],
    processed_names: set[str],
    mcp_server_id: int,
    db: Session,
) -> bool:
    db_dirty = False

    for tool in discovered_tools:
        tool_name = tool.name
        if not tool_name:
            continue

        processed_names.add(tool_name)
        description = tool.description or ""
        annotations_title = tool.annotations.title if tool.annotations else None
        display_name = tool.title or annotations_title or tool_name
        input_schema = tool.inputSchema

        if existing_tool := existing_by_name.get(tool_name):
            if existing_tool.description != description:
                existing_tool.description = description
                db_dirty = True
            if existing_tool.display_name != display_name:
                existing_tool.display_name = display_name
                db_dirty = True
            if existing_tool.mcp_input_schema != input_schema:
                existing_tool.mcp_input_schema = input_schema
                db_dirty = True
            continue

        new_tool = create_tool__no_commit(
            name=tool_name,
            description=description,
            openapi_schema=None,
            custom_headers=None,
            user_id=None,
            db_session=db,
            passthrough_auth=False,
            mcp_server_id=mcp_server_id,
            enabled=True,
        )
        new_tool.display_name = display_name
        new_tool.mcp_input_schema = input_schema
        db_dirty = True
    return db_dirty


def _list_mcp_tools_by_id(
    server_id: int,
    db: Session,
    is_admin: bool,
    user: User,
) -> MCPToolListResponse:
    """List available tools from an existing MCP server"""
    logger.info(f"Listing tools for MCP server: {server_id}")

    try:
        # Get the MCP server
        mcp_server = get_mcp_server_by_id(server_id, db)
    except ValueError:
        raise HTTPException(status_code=404, detail="MCP server not found")

    if is_admin:
        _ensure_mcp_server_owner_or_admin(mcp_server, user)

    # Get connection config based on auth type
    # TODO: for now, only the admin that set up a per-user api key server can
    # see their configuration. This is probably not ideal. Other admins
    # can of course put their own credentials in and list the tools.
    connection_config = _get_connection_config(mcp_server, is_admin, user, db)

    # Allow access for NONE and PT_OAUTH (which use user's login token at runtime)
    if not connection_config and mcp_server.auth_type not in (
        MCPAuthenticationType.NONE,
        MCPAuthenticationType.PT_OAUTH,
    ):
        raise HTTPException(
            status_code=401,
            detail="This MCP server is not configured yet",
        )

    user_id = str(user.id)
    # Discover tools from the MCP server
    auth = None
    headers: dict[str, str] = {}

    if mcp_server.auth_type == MCPAuthenticationType.OAUTH:
        # TODO: just pass this in, but should work when auth is set already
        assert connection_config  # for mypy
        auth = make_oauth_provider(
            mcp_server,
            user_id,
            UNUSED_RETURN_PATH,
            connection_config.id,
            None,
        )
    elif mcp_server.auth_type == MCPAuthenticationType.PT_OAUTH:
        # Pass-through OAuth: use the user's login OAuth token
        if user.oauth_accounts:
            user_oauth_token = user.oauth_accounts[0].access_token
            headers["Authorization"] = f"Bearer {user_oauth_token}"
        else:
            raise HTTPException(
                status_code=401,
                detail="Pass-through OAuth requires a user logged in with OAuth",
            )

    if connection_config:
        connection_config_dict = extract_connection_data(
            connection_config, apply_mask=False
        )
        headers.update(connection_config_dict.get("headers", {}))

    import time

    t1 = time.time()
    logger.info(f"Discovering tools for MCP server: {mcp_server.name}: {t1}")
    server_url = mcp_server.server_url

    if mcp_server.transport is None:
        raise HTTPException(
            status_code=400,
            detail="MCP server transport is not configured",
        )

    discovered_tools = discover_mcp_tools(
        server_url,
        headers,
        transport=mcp_server.transport,
        auth=auth,
    )
    logger.info(
        f"Discovered {len(discovered_tools)} tools for MCP server: {mcp_server.name}: {time.time() - t1}"
    )
    update_mcp_server__no_commit(
        server_id=server_id,
        db_session=db,
        status=MCPServerStatus.CONNECTED,
    )
    db.commit()

    if is_admin:
        existing_tools = get_tools_by_mcp_server_id(mcp_server.id, db)
        existing_by_name = {db_tool.name: db_tool for db_tool in existing_tools}
        processed_names: set[str] = set()

        db_dirty = _upsert_db_tools(
            discovered_tools, existing_by_name, processed_names, mcp_server.id, db
        )

        for name, db_tool in existing_by_name.items():
            if name not in processed_names:
                delete_tool__no_commit(db_tool.id, db)
                db_dirty = True

        if db_dirty:
            db.commit()

    # Truncate tool descriptions to prevent overly long responses
    for tool in discovered_tools:
        if tool.description:
            tool.description = _truncate_description(tool.description)

    # TODO: Also list resources from the MCP server
    # resources = discover_mcp_resources(mcp_server, connection_config)

    return MCPToolListResponse(
        server_id=server_id,
        server_name=mcp_server.name,
        server_url=mcp_server.server_url,
        tools=discovered_tools,
    )


def _upsert_mcp_server(
    request: MCPToolCreateRequest,
    db_session: Session,
    user: User,
) -> DbMCPServer:
    """
    Creates a new or edits an existing MCP server. Returns the DB model
    """
    mcp_server = None
    admin_config = None

    changing_connection_config = True

    # Handle existing server update
    if request.existing_server_id:
        try:
            mcp_server = get_mcp_server_by_id(request.existing_server_id, db_session)
        except ValueError:
            raise HTTPException(
                status_code=404,
                detail=f"MCP server with ID {request.existing_server_id} not found",
            )
        _ensure_mcp_server_owner_or_admin(mcp_server, user)
        client_info = None
        if mcp_server.admin_connection_config:
            existing_admin_config_dict = extract_connection_data(
                mcp_server.admin_connection_config, apply_mask=False
            )
            client_info_raw = existing_admin_config_dict.get(
                MCPOAuthKeys.CLIENT_INFO.value
            )
            if client_info_raw:
                client_info = OAuthClientInformationFull.model_validate(client_info_raw)

        changing_connection_config = (
            not mcp_server.admin_connection_config
            or (
                request.auth_type == MCPAuthenticationType.OAUTH
                and (
                    client_info is None
                    or request.oauth_client_id != client_info.client_id
                    or request.oauth_client_secret != (client_info.client_secret or "")
                )
            )
            or (request.auth_type == MCPAuthenticationType.API_TOKEN)
            or (request.transport != mcp_server.transport)
        )

        # Cleanup: Delete existing connection configs
        # If the auth type is OAUTH, delete all user connection configs
        # If the auth type is API_TOKEN, delete the admin connection config and the admin user connection configs
        if (
            changing_connection_config
            and mcp_server.admin_connection_config_id
            and request.auth_type == MCPAuthenticationType.OAUTH
        ):
            delete_all_user_connection_configs_for_server_no_commit(
                mcp_server.id, db_session
            )
        elif (
            changing_connection_config
            and mcp_server.admin_connection_config_id
            and request.auth_type == MCPAuthenticationType.API_TOKEN
        ):
            delete_connection_config(mcp_server.admin_connection_config_id, db_session)
            if user.email:
                delete_user_connection_configs_for_server(
                    mcp_server.id, user.email, db_session
                )

        # Update the server with new values
        mcp_server = update_mcp_server__no_commit(
            server_id=request.existing_server_id,
            db_session=db_session,
            name=request.name,
            description=request.description,
            server_url=request.server_url,
            auth_type=request.auth_type,
            auth_performer=request.auth_performer,
            transport=request.transport,
        )

        logger.info(
            f"Updated existing MCP server '{request.name}' with ID {mcp_server.id}"
        )

    else:
        # Handle new server creation
        # Prevent duplicate server creation with same URL
        normalized_url = (request.server_url or "").strip()
        if not normalized_url:
            raise HTTPException(status_code=400, detail="server_url is required")

        if not user.email:
            raise HTTPException(
                status_code=400,
                detail="Authenticated user email required to create MCP servers",
            )

        mcp_server = create_mcp_server__no_commit(
            owner_email=user.email,
            name=request.name,
            description=request.description,
            server_url=request.server_url,
            auth_type=request.auth_type,
            auth_performer=request.auth_performer,
            transport=request.transport or MCPTransport.STREAMABLE_HTTP,
            db_session=db_session,
        )

        logger.info(f"Created new MCP server '{request.name}' with ID {mcp_server.id}")

    # PT_OAUTH doesn't need stored connection config (uses user's login token)
    if (
        not changing_connection_config
        or request.auth_type == MCPAuthenticationType.NONE
        or request.auth_type == MCPAuthenticationType.PT_OAUTH
    ):
        return mcp_server

    # Create connection configs
    admin_connection_config_id = None
    if request.auth_performer == MCPAuthenticationPerformer.ADMIN and request.api_token:
        # Admin-managed server: create admin config with API token
        admin_config = create_connection_config(
            config_data=MCPConnectionData(
                headers={"Authorization": f"Bearer {request.api_token}"},
            ),
            mcp_server_id=mcp_server.id,
            db_session=db_session,
        )
        admin_connection_config_id = admin_config.id

    elif request.auth_performer == MCPAuthenticationPerformer.PER_USER:
        if request.auth_type == MCPAuthenticationType.API_TOKEN:
            # handled by model validation, this is just for mypy
            assert request.auth_template and request.admin_credentials

            # Per-user server: create template and save creator's per-user config
            template_data = request.auth_template

            # Create template config: faithful representation of what's in the admin panel
            template_config = create_connection_config(
                config_data=MCPConnectionData(
                    headers=template_data.headers,
                    header_substitutions=request.admin_credentials,
                ),
                mcp_server_id=mcp_server.id,
                user_email="",
                db_session=db_session,
            )

            # seed the user config for this admin user
            user_config = create_connection_config(
                config_data=MCPConnectionData(
                    headers=_build_headers_from_template(
                        template_data, request.admin_credentials, user.email
                    ),
                    header_substitutions=request.admin_credentials,
                ),
                mcp_server_id=mcp_server.id,
                user_email=user.email,
                db_session=db_session,
            )
            user_config.mcp_server_id = mcp_server.id
            admin_connection_config_id = template_config.id
        elif request.auth_type == MCPAuthenticationType.OAUTH:
            # Create initial admin config. If client credentials were provided,
            # seed client_info so the OAuth provider can skip dynamic
            # registration; otherwise, the provider will attempt it.
            cfg: MCPConnectionData = MCPConnectionData(headers={})
            if request.oauth_client_id:
                client_info = OAuthClientInformationFull(
                    client_id=request.oauth_client_id,
                    client_secret=request.oauth_client_secret,
                    redirect_uris=[AnyUrl(f"{WEB_DOMAIN}/mcp/oauth/callback")],
                    grant_types=["authorization_code", "refresh_token"],
                    response_types=["code"],
                    scope=REQUESTED_SCOPE,  # TODO: allow specifying scopes?
                    # default token_endpoint_auth_method is client_secret_post
                )
                cfg[MCPOAuthKeys.CLIENT_INFO.value] = client_info.model_dump(
                    mode="json"
                )

            admin_config = create_connection_config(
                config_data=cfg,
                mcp_server_id=mcp_server.id,
                user_email="",
                db_session=db_session,
            )
            admin_connection_config_id = admin_config.id

            # create user connection config
            create_connection_config(
                config_data=cfg,
                mcp_server_id=mcp_server.id,
                user_email=user.email,
                db_session=db_session,
            )
    elif request.auth_performer == MCPAuthenticationPerformer.ADMIN:
        raise HTTPException(
            status_code=400,
            detail="Admin authentication is not yet supported for MCP servers: user per-user",
        )

    # Update server with config IDs
    if admin_connection_config_id is not None:
        mcp_server = update_mcp_server__no_commit(
            server_id=mcp_server.id,
            db_session=db_session,
            admin_connection_config_id=admin_connection_config_id,
        )

    db_session.commit()
    return mcp_server


def _sync_tools_for_server(
    mcp_server: DbMCPServer,
    selected_tools: set[str],
    db_session: Session,
) -> int:
    """Toggle enabled state for MCP tools that exist for the server.
    Updates to the db model of a tool all happen when the user Lists Tools.
    This ensures that the the tools added to the db match what the user sees in the UI,
    even if the underlying tool has changed on the server after list tools is called.
    That's a corner case anyways; the admin should go back and update the server by re-listing tools.
    """

    updated_tools = 0

    existing_tools = get_tools_by_mcp_server_id(mcp_server.id, db_session)
    existing_by_name = {tool.name: tool for tool in existing_tools}

    # Disable any existing tools that were not processed above
    for tool_name, db_tool in existing_by_name.items():
        should_enable = tool_name in selected_tools
        if db_tool.enabled != should_enable:
            db_tool.enabled = should_enable
            updated_tools += 1

    return updated_tools


@admin_router.get("/servers/{server_id}", response_model=MCPServer)
def get_mcp_server_detail(
    server_id: int,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> MCPServer:
    """Return details for one MCP server if user has access"""
    try:
        server = get_mcp_server_by_id(server_id, db_session)
    except ValueError:
        raise HTTPException(status_code=404, detail="MCP server not found")

    _ensure_mcp_server_owner_or_admin(server, user)

    # TODO: user permissions per mcp server not yet implemented, for now
    # permissions are based on access to assistants
    # # Quick permission check – admin or user has access
    # if user and server not in user.accessible_mcp_servers and not user.is_superuser:
    #     raise HTTPException(status_code=403, detail="Forbidden")

    return _db_mcp_server_to_api_mcp_server(
        server,
        db_session,
        include_auth_config=True,
        request_user=user,
    )


@admin_router.get("/tools")
def get_all_mcp_tools(
    db: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),  # noqa: ARG001
) -> list:
    """Get all tools associated with MCP servers, including both enabled and disabled tools"""
    from sqlalchemy import select
    from onyx.db.models import Tool

    # Query MCP tools ordered by ID to maintain consistent ordering
    stmt = select(Tool).where(Tool.mcp_server_id.is_not(None)).order_by(Tool.id)

    mcp_tools = db.scalars(stmt).all()

    # Convert to ToolSnapshot format
    return [ToolSnapshot.from_model(tool) for tool in mcp_tools]


@admin_router.patch("/server/{server_id}/status")
def update_mcp_server_status(
    server_id: int,
    status: MCPServerStatus,
    db: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
    """Update the status of an MCP server"""
    logger.info(f"Updating MCP server {server_id} status to {status}")

    try:
        mcp_server = get_mcp_server_by_id(server_id, db)
    except ValueError:
        raise HTTPException(status_code=404, detail="MCP server not found")

    _ensure_mcp_server_owner_or_admin(mcp_server, user)

    update_mcp_server__no_commit(
        server_id=server_id,
        db_session=db,
        status=status,
    )
    db.commit()

    logger.info(f"Successfully updated MCP server {server_id} status to {status}")
    return {"message": f"Server status updated to {status.value}"}


@admin_router.get("/servers", response_model=MCPServersResponse)
def get_mcp_servers_for_admin(
    db: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> MCPServersResponse:
    """Get all MCP servers for admin display"""

    logger.info("Fetching all MCP servers for admin display")

    try:
        db_mcp_servers = get_all_mcp_servers(db)

        # Convert to API model format
        mcp_servers = [
            _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
            for db_server in db_mcp_servers
        ]

        return MCPServersResponse(mcp_servers=mcp_servers)

    except Exception as e:
        logger.error(f"Failed to fetch MCP servers for admin: {type(e)}:{e}")
        raise HTTPException(status_code=500, detail="Failed to fetch MCP servers")


@admin_router.get("/server/{server_id}/db-tools")
def get_mcp_server_db_tools(
    server_id: int,
    db: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> ServerToolsResponse:
    """Get existing database tools created for an MCP server"""
    logger.info(f"Getting database tools for MCP server: {server_id}")

    try:
        # Verify the server exists
        mcp_server = get_mcp_server_by_id(server_id, db)
    except ValueError:
        raise HTTPException(status_code=404, detail="MCP server not found")

    _ensure_mcp_server_owner_or_admin(mcp_server, user)

    # Get all tools associated with this MCP server
    mcp_tools = get_tools_by_mcp_server_id(server_id, db)

    # Convert to response format
    tools_data = []
    for tool in mcp_tools:
        # Extract the tool name from the full name (remove server prefix)
        tool_name = tool.name
        if tool.mcp_server and tool_name.startswith(f"{tool.mcp_server.name}_"):
            tool_name = tool_name[len(f"{tool.mcp_server.name}_") :]

        tools_data.append(
            MCPToolDescription(
                id=tool.id,
                name=tool_name,
                display_name=tool.display_name or tool_name,
                description=_truncate_description(tool.description),
            )
        )

    return ServerToolsResponse(
        server_id=server_id,
        server_name=mcp_server.name,
        server_url=mcp_server.server_url,
        tools=tools_data,
    )


@admin_router.post("/servers/create", response_model=MCPServerCreateResponse)
def upsert_mcp_server(
    request: MCPToolCreateRequest,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> MCPServerCreateResponse:
    """Create or update an MCP server (no tools yet)"""

    # Validate auth_performer for non-none auth types
    if request.auth_type != MCPAuthenticationType.NONE and not request.auth_performer:
        raise HTTPException(
            status_code=400, detail="auth_performer is required for non-none auth types"
        )

    try:
        mcp_server = _upsert_mcp_server(request, db_session, user)

        if (
            request.auth_type
            not in (MCPAuthenticationType.NONE, MCPAuthenticationType.PT_OAUTH)
            and mcp_server.admin_connection_config_id is None
        ):
            raise HTTPException(
                status_code=500, detail="Failed to set admin connection config"
            )
        db_session.commit()

        action_verb = "Updated" if request.existing_server_id else "Created"
        logger.info(
            f"{action_verb} MCP server '{request.name}' with ID {mcp_server.id}"
        )

        if mcp_server.auth_type is None:
            raise HTTPException(
                status_code=500, detail="MCP server auth_type not configured"
            )
        auth_type_str = mcp_server.auth_type.value

        return MCPServerCreateResponse(
            server_id=mcp_server.id,
            server_name=mcp_server.name,
            server_url=mcp_server.server_url,
            auth_type=auth_type_str,
            auth_performer=(
                request.auth_performer.value if request.auth_performer else None
            ),
            is_authenticated=(
                mcp_server.auth_type == MCPAuthenticationType.NONE.value
                or request.auth_performer == MCPAuthenticationPerformer.ADMIN
            ),
        )

    except HTTPException:
        # Re-raise HTTP exceptions as-is
        raise
    except Exception as e:
        logger.exception("Failed to create/update MCP tool")
        raise HTTPException(
            status_code=500, detail=f"Failed to create/update MCP tool: {str(e)}"
        )


@admin_router.post("/servers/update", response_model=MCPServerUpdateResponse)
def update_mcp_server_with_tools(
    request: MCPToolUpdateRequest,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> MCPServerUpdateResponse:
    """Update an MCP server and associated tools"""

    try:
        mcp_server = get_mcp_server_by_id(request.server_id, db_session)
    except ValueError:
        raise HTTPException(status_code=404, detail="MCP server not found")

    _ensure_mcp_server_owner_or_admin(mcp_server, user)

    if mcp_server.admin_connection_config_id is None and mcp_server.auth_type not in (
        MCPAuthenticationType.NONE,
        MCPAuthenticationType.PT_OAUTH,
    ):
        raise HTTPException(
            status_code=400, detail="MCP server has no admin connection config"
        )

    name_changed = request.name is not None and request.name != mcp_server.name
    description_changed = (
        request.description is not None
        and request.description != mcp_server.description
    )
    if name_changed or description_changed:
        mcp_server = update_mcp_server__no_commit(
            server_id=mcp_server.id,
            db_session=db_session,
            name=request.name if name_changed else None,
            description=request.description if description_changed else None,
        )

    selected_names = set(request.selected_tools or [])
    updated_tools = _sync_tools_for_server(
        mcp_server,
        selected_names,
        db_session,
    )

    db_session.commit()

    return MCPServerUpdateResponse(
        server_id=mcp_server.id,
        server_name=mcp_server.name,
        updated_tools=updated_tools,
    )


@admin_router.post("/server", response_model=MCPServer)
def create_mcp_server_simple(
    request: MCPServerSimpleCreateRequest,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> MCPServer:
    """Create MCP server with minimal information - auth to be configured later"""

    mcp_server = create_mcp_server__no_commit(
        owner_email=user.email,
        name=request.name,
        description=request.description,
        server_url=request.server_url,
        auth_type=None,  # To be configured later
        transport=None,  # To be configured later
        auth_performer=None,  # To be configured later
        db_session=db_session,
    )

    db_session.commit()

    return MCPServer(
        id=mcp_server.id,
        name=mcp_server.name,
        description=mcp_server.description,
        server_url=mcp_server.server_url,
        owner=mcp_server.owner,
        transport=mcp_server.transport,
        auth_type=mcp_server.auth_type,
        auth_performer=mcp_server.auth_performer,
        is_authenticated=False,  # Not authenticated yet
        status=mcp_server.status,
        tool_count=0,  # New server, no tools yet
        auth_template=None,
        user_credentials=None,
        admin_credentials=None,
    )


@admin_router.patch("/server/{server_id}", response_model=MCPServer)
def update_mcp_server_simple(
    server_id: int,
    request: MCPServerSimpleUpdateRequest,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> MCPServer:
    """Update MCP server basic information (name, description, URL)"""
    try:
        mcp_server = get_mcp_server_by_id(server_id, db_session)
    except ValueError:
        raise HTTPException(status_code=404, detail="MCP server not found")

    _ensure_mcp_server_owner_or_admin(mcp_server, user)

    # Update only provided fields
    updated_server = update_mcp_server__no_commit(
        server_id=server_id,
        db_session=db_session,
        name=request.name,
        description=request.description,
        server_url=request.server_url,
    )

    db_session.commit()

    # Return the updated server in API format
    return _db_mcp_server_to_api_mcp_server(
        updated_server, db_session, request_user=user
    )


@admin_router.delete("/server/{server_id}")
def delete_mcp_server_admin(
    server_id: int,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> dict:
    """Delete an MCP server and cascading related objects (tools, configs)."""
    try:
        # Ensure it exists
        server = get_mcp_server_by_id(server_id, db_session)

        _ensure_mcp_server_owner_or_admin(server, user)

        # Log tools that will be deleted for debugging
        tools_to_delete = get_tools_by_mcp_server_id(server_id, db_session)
        logger.info(
            f"Deleting MCP server {server_id} ({server.name}) with {len(tools_to_delete)} tools"
        )
        for tool in tools_to_delete:
            logger.debug(f"  - Tool to delete: {tool.name} (ID: {tool.id})")

        # Cascade behavior handled by FK ondelete in DB
        delete_mcp_server(server_id, db_session)

        # Verify tools were deleted
        remaining_tools = get_tools_by_mcp_server_id(server_id, db_session)
        if remaining_tools:
            logger.error(
                f"WARNING: {len(remaining_tools)} tools still exist after deleting MCP server {server_id}"
            )
            # Manually delete them as a fallback
            for tool in remaining_tools:
                logger.info(
                    f"Manually deleting orphaned tool: {tool.name} (ID: {tool.id})"
                )
                delete_tool__no_commit(tool.id, db_session)
        db_session.commit()

        return {"success": True}
    except ValueError:
        raise HTTPException(status_code=404, detail="MCP server not found")
    except Exception as e:
        logger.error(f"Failed to delete MCP server {server_id}: {e}")
        raise HTTPException(status_code=500, detail="Failed to delete MCP server")


================================================
FILE: backend/onyx/server/features/mcp/models.py
================================================
import datetime
from enum import Enum
from typing import Any
from typing import List
from typing import NotRequired
from typing import Optional
from typing import TypedDict

from mcp.types import Tool as MCPLibTool
from pydantic import BaseModel
from pydantic import Field
from pydantic import model_validator

from onyx.db.enums import MCPAuthenticationPerformer
from onyx.db.enums import MCPAuthenticationType
from onyx.db.enums import MCPServerStatus
from onyx.db.enums import MCPTransport


# This should be updated along with MCPConnectionData
class MCPOAuthKeys(str, Enum):
    """MCP OAuth keys types"""

    CLIENT_INFO = "client_info"
    TOKENS = "tokens"
    METADATA = "metadata"


class MCPConnectionData(TypedDict):
    """TypedDict to allow use as a type hint for a JSONB column
    in Postgres"""

    headers: dict[str, str]
    header_substitutions: NotRequired[dict[str, str]]

    # For OAuth only
    # Note: Update MCPOAuthKeys if necessary when modifying these
    # Unfortunately we can't use the actual models here because basemodels aren't compatible
    # with SQLAlchemy
    client_info: NotRequired[dict[str, Any]]  # OAuthClientInformationFull
    tokens: NotRequired[dict[str, Any]]  # OAuthToken
    metadata: NotRequired[dict[str, Any]]  # OAuthClientMetadata

    # the actual models are defined in mcp.shared.auth
    # from mcp.shared.auth import OAuthClientInformationFull, OAuthClientMetadata, OAuthToken


class MCPAuthTemplate(BaseModel):
    """Template for per-user authentication configuration"""

    headers: dict[str, str] = Field(
        default_factory=dict,
        description="Map of header names to templates with placeholders",
    )
    # request_body_params: List[dict[str, str]] = Field(
    #     default_factory=list,
    #     description="List of request body parameter templates with path/value pairs",
    # ) # not used yet
    required_fields: List[str] = Field(
        default_factory=list,
        description="List of required field names that users must provide",
    )


class MCPToolCreateRequest(BaseModel):
    name: str = Field(..., description="Name of the MCP tool")
    description: Optional[str] = Field(None, description="Description of the MCP tool")
    server_url: str = Field(..., description="URL of the MCP server")
    auth_type: MCPAuthenticationType = Field(..., description="Authentication type")
    auth_performer: MCPAuthenticationPerformer = Field(
        ..., description="Who performs authentication"
    )
    api_token: Optional[str] = Field(
        None, description="API token for api_token auth type"
    )
    oauth_client_id: Optional[str] = Field(None, description="OAuth client ID")
    oauth_client_secret: Optional[str] = Field(None, description="OAuth client secret")
    transport: MCPTransport | None = Field(
        None, description="MCP transport type (STREAMABLE_HTTP or SSE)"
    )
    auth_template: Optional[MCPAuthTemplate] = Field(
        None, description="Template configuration for per-user authentication"
    )
    admin_credentials: Optional[dict[str, str]] = Field(
        None,
        description="Admin's credential key-value pairs for template substitution and storage",
    )
    existing_server_id: Optional[int] = Field(
        None, description="ID of existing server to update (for editing)"
    )

    @model_validator(mode="after")
    def validate_auth_configuration(self) -> "MCPToolCreateRequest":
        # Validate API token requirements for admin auth
        if (
            self.auth_type == MCPAuthenticationType.API_TOKEN
            and self.auth_performer == MCPAuthenticationPerformer.ADMIN
            and not self.api_token
        ):
            raise ValueError(
                "api_token is required when auth_type is 'api_token' and auth_performer is 'admin'"
            )

        # Validate that API token is not provided for per-user auth
        if (
            self.auth_type == MCPAuthenticationType.API_TOKEN
            and self.auth_performer == MCPAuthenticationPerformer.PER_USER
            and self.api_token
            and self.api_token.strip()
        ):
            raise ValueError(
                "api_token should not be provided when auth_performer is 'per_user'. Users will provide their own credentials."
            )

        # Validate that auth_template is provided for per-user auth
        if (
            self.auth_type == MCPAuthenticationType.API_TOKEN
            and self.auth_performer == MCPAuthenticationPerformer.PER_USER
        ):
            if not self.auth_template:
                raise ValueError(
                    "auth_template is required when auth_performer is 'per_user'"
                )
            if not self.admin_credentials:
                raise ValueError(
                    "admin_credentials is required when auth_performer is 'per_user'"
                )

        # OAuth client ID/secret are optional. If provided, they will seed the
        # OAuth client info; otherwise, the MCP client will attempt dynamic
        # client registration.

        return self


class MCPToolUpdateRequest(BaseModel):
    server_id: int = Field(..., description="ID of the MCP server")
    name: Optional[str] = Field(None, description="Updated name of the MCP server")
    description: Optional[str] = Field(
        None, description="Updated description of the MCP server"
    )
    selected_tools: Optional[List[str]] = Field(
        None, description="List of selected tool names to create"
    )


class MCPServerSimpleCreateRequest(BaseModel):
    name: str = Field(..., description="Name of the MCP server")
    description: Optional[str] = Field(
        None, description="Description of the MCP server"
    )
    server_url: str = Field(..., description="URL of the MCP server")


class MCPServerSimpleUpdateRequest(BaseModel):
    name: Optional[str] = Field(None, description="Name of the MCP server")
    description: Optional[str] = Field(
        None, description="Description of the MCP server"
    )
    server_url: Optional[str] = Field(None, description="URL of the MCP server")


class MCPToolResponse(BaseModel):
    id: int
    name: str
    display_name: str
    description: str
    definition: Optional[dict] = None  # MCP tools don't use OpenAPI definitions
    custom_headers: List[dict] = []
    in_code_tool_id: Optional[str] = None
    passthrough_auth: bool = False
    # MCP-specific fields
    server_url: str
    auth_type: str
    auth_performer: Optional[str] = None
    is_authenticated: bool


class MCPOAuthConnectRequest(BaseModel):
    name: str = Field(..., description="Name of the MCP tool")
    description: Optional[str] = Field(None, description="Description of the MCP tool")
    server_url: str = Field(..., description="URL of the MCP server")
    selected_tools: Optional[List[str]] = Field(
        None, description="List of selected tool names to create"
    )
    existing_server_id: Optional[int] = Field(
        None, description="ID of existing server to update (for editing)"
    )


class MCPOAuthConnectResponse(BaseModel):
    oauth_url: str = Field(..., description="OAuth URL to redirect user to")
    state: str = Field(..., description="OAuth state parameter")
    pending_tool: dict = Field(..., description="Pending tool configuration")


class MCPUserOAuthConnectRequest(BaseModel):
    server_id: int = Field(..., description="ID of the MCP server")
    return_path: str = Field(..., description="Path to redirect to after callback")
    include_resource_param: bool = Field(..., description="Include resource parameter")
    oauth_client_id: str | None = Field(
        None, description="OAuth client ID (optional for DCR)"
    )
    oauth_client_secret: str | None = Field(
        None, description="OAuth client secret (optional for DCR)"
    )

    @model_validator(mode="after")
    def validate_return_path(self) -> "MCPUserOAuthConnectRequest":
        if not self.return_path.startswith("/"):
            raise ValueError("return_path must start with a slash")
        return self


class MCPUserOAuthConnectResponse(BaseModel):
    server_id: int
    oauth_url: str = Field(..., description="OAuth URL to redirect user to")


class MCPOAuthCallbackRequest(BaseModel):
    """Request payload for completing OAuth flow (authorization code exchange)."""

    code: str = Field(..., description="Authorization code returned by the IdP")
    state: Optional[str] = Field(
        None, description="State parameter for CSRF protection"
    )


class MCPOAuthCallbackResponse(BaseModel):
    success: bool
    message: str
    server_id: int
    server_name: str
    redirect_url: str


class MCPDynamicClientRegistrationRequest(BaseModel):
    """Request for dynamic client registration per RFC 7591"""

    server_id: int = Field(..., description="MCP server ID")
    authorization_server_url: str = Field(
        ...,
        description="Authorization server URL discovered from WWW-Authenticate or metadata",
    )


class MCPDynamicClientRegistrationResponse(BaseModel):
    """Response from dynamic client registration"""

    client_id: str = Field(..., description="Registered client ID")
    client_secret: Optional[str] = Field(
        None, description="Client secret if confidential client"
    )
    registration_access_token: Optional[str] = Field(
        None, description="Token for managing this client registration"
    )
    registration_client_uri: Optional[str] = Field(
        None, description="URI for managing this client registration"
    )


class MCPApiKeyRequest(BaseModel):
    server_id: int = Field(..., description="ID of the MCP server")
    api_key: str = Field(..., description="API key to store")
    transport: str = Field(..., description="Transport type")


class MCPUserCredentialsRequest(BaseModel):
    """Enhanced request for template-based user credentials"""

    server_id: int = Field(..., description="ID of the MCP server")
    credentials: dict[str, str] = Field(
        ..., description="User-provided credentials (api_key, custom_token, etc.)"
    )
    transport: str = Field(..., description="Transport type")


class MCPApiKeyResponse(BaseModel):
    success: bool
    message: str
    server_id: int
    server_name: str
    authenticated: bool
    validation_tested: bool = Field(
        default=False, description="Whether credentials were tested against MCP server"
    )


class MCPServer(BaseModel):
    id: int
    name: str
    description: Optional[str] = None
    server_url: str
    owner: str
    transport: Optional[MCPTransport] = None
    auth_type: Optional[MCPAuthenticationType] = None
    auth_performer: Optional[MCPAuthenticationPerformer] = None
    is_authenticated: bool
    user_authenticated: Optional[bool] = None
    status: MCPServerStatus
    last_refreshed_at: Optional[datetime.datetime] = None
    tool_count: int = Field(
        default=0, description="Number of tools associated with this server"
    )
    auth_template: Optional[MCPAuthTemplate] = Field(
        None, description="Authentication template for per-user auth"
    )
    user_credentials: Optional[dict[str, str]] = Field(
        None, description="User's existing credentials for pre-filling forms"
    )
    admin_credentials: Optional[dict[str, str]] = Field(
        None,
        description="Admin's credential key-value pairs for template substitution and storage",
    )


class MCPServersResponse(BaseModel):
    assistant_id: str | None = None
    mcp_servers: List[MCPServer]


class MCPServerCreateResponse(BaseModel):
    """Response for creating multiple MCP tools"""

    server_id: int
    server_name: str
    server_url: str
    auth_type: str
    auth_performer: Optional[str]
    is_authenticated: bool


class MCPServerUpdateResponse(BaseModel):
    """Response for updating multiple MCP tools"""

    server_id: int
    server_name: str
    updated_tools: int


class MCPToolListResponse(BaseModel):
    server_id: int
    server_name: str
    server_url: str
    tools: list[MCPLibTool]


================================================
FILE: backend/onyx/server/features/notifications/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.notification import dismiss_notification
from onyx.db.notification import get_notification_by_id
from onyx.db.notification import get_notifications
from onyx.server.features.build.utils import ensure_build_mode_intro_notification
from onyx.server.features.release_notes.utils import (
    ensure_release_notes_fresh_and_notify,
)
from onyx.server.settings.models import Notification as NotificationModel
from onyx.utils.logger import setup_logger

logger = setup_logger()
router = APIRouter(prefix="/notifications")


@router.get("")
def get_notifications_api(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[NotificationModel]:
    """
    Get all undismissed notifications for the current user.

    Note: also executes background checks that should create notifications.

    Examples of checks that create new notifications:
    - Checking for new release notes the user hasn't seen
    - Checking for misconfigurations due to version changes
    - Explicitly announcing breaking changes
    """
    # Background checks that create notifications
    try:
        ensure_build_mode_intro_notification(user, db_session)
    except Exception:
        logger.exception(
            "Failed to check for build mode intro in notifications endpoint"
        )

    try:
        ensure_release_notes_fresh_and_notify(db_session)
    except Exception:
        logger.exception("Failed to check for release notes in notifications endpoint")

    notifications = [
        NotificationModel.from_model(notif)
        for notif in get_notifications(user, db_session, include_dismissed=True)
    ]
    return notifications


@router.post("/{notification_id}/dismiss")
def dismiss_notification_endpoint(
    notification_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    try:
        notification = get_notification_by_id(notification_id, user, db_session)
    except PermissionError:
        raise HTTPException(
            status_code=403, detail="Not authorized to dismiss this notification"
        )
    except ValueError:
        raise HTTPException(status_code=404, detail="Notification not found")

    dismiss_notification(notification, db_session)


================================================
FILE: backend/onyx/server/features/oauth_config/__init__.py
================================================
"""OAuth configuration feature module."""

from onyx.server.features.oauth_config.api import admin_router
from onyx.server.features.oauth_config.api import router

__all__ = ["admin_router", "router"]


================================================
FILE: backend/onyx/server/features/oauth_config/api.py
================================================
"""API endpoints for OAuth configuration management."""

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.oauth_token_manager import OAuthTokenManager
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import OAuthConfig
from onyx.db.models import User
from onyx.db.oauth_config import create_oauth_config
from onyx.db.oauth_config import delete_oauth_config
from onyx.db.oauth_config import delete_user_oauth_token
from onyx.db.oauth_config import get_oauth_config
from onyx.db.oauth_config import get_oauth_configs
from onyx.db.oauth_config import get_tools_by_oauth_config
from onyx.db.oauth_config import update_oauth_config
from onyx.db.oauth_config import upsert_user_oauth_token
from onyx.federated_connectors.oauth_utils import generate_oauth_state
from onyx.federated_connectors.oauth_utils import verify_oauth_state
from onyx.server.features.oauth_config.models import OAuthCallbackResponse
from onyx.server.features.oauth_config.models import OAuthConfigCreate
from onyx.server.features.oauth_config.models import OAuthConfigSnapshot
from onyx.server.features.oauth_config.models import OAuthConfigUpdate
from onyx.server.features.oauth_config.models import OAuthInitiateRequest
from onyx.server.features.oauth_config.models import OAuthInitiateResponse
from onyx.utils.logger import setup_logger

logger = setup_logger()

admin_router = APIRouter(prefix="/admin/oauth-config")
router = APIRouter(prefix="/oauth-config")


def _oauth_config_to_snapshot(
    oauth_config: OAuthConfig, db_session: Session
) -> OAuthConfigSnapshot:
    """Convert OAuthConfig model to API snapshot."""
    tools = get_tools_by_oauth_config(oauth_config.id, db_session)
    return OAuthConfigSnapshot(
        id=oauth_config.id,
        name=oauth_config.name,
        authorization_url=oauth_config.authorization_url,
        token_url=oauth_config.token_url,
        scopes=oauth_config.scopes,
        has_client_credentials=bool(
            oauth_config.client_id and oauth_config.client_secret
        ),
        tool_count=len(tools),
        created_at=oauth_config.created_at,
        updated_at=oauth_config.updated_at,
    )


"""Admin endpoints for OAuth configuration management"""


@admin_router.post("/create")
def create_oauth_config_endpoint(
    oauth_data: OAuthConfigCreate,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_curator_or_admin_user),
) -> OAuthConfigSnapshot:
    """Create a new OAuth configuration (admin only)."""
    try:
        oauth_config = create_oauth_config(
            name=oauth_data.name,
            authorization_url=oauth_data.authorization_url,
            token_url=oauth_data.token_url,
            client_id=oauth_data.client_id,
            client_secret=oauth_data.client_secret,
            scopes=oauth_data.scopes,
            additional_params=oauth_data.additional_params,
            db_session=db_session,
        )
        return _oauth_config_to_snapshot(oauth_config, db_session)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))


@admin_router.get("")
def list_oauth_configs(
    db_session: Session = Depends(get_session),
    _: User = Depends(current_curator_or_admin_user),
) -> list[OAuthConfigSnapshot]:
    """List all OAuth configurations (admin only)."""
    oauth_configs = get_oauth_configs(db_session)
    return [_oauth_config_to_snapshot(config, db_session) for config in oauth_configs]


@admin_router.get("/{oauth_config_id}")
def get_oauth_config_endpoint(
    oauth_config_id: int,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_curator_or_admin_user),
) -> OAuthConfigSnapshot:
    """Retrieve a single OAuth configuration (admin only)."""
    oauth_config = get_oauth_config(oauth_config_id, db_session)
    if not oauth_config:
        raise HTTPException(
            status_code=404, detail=f"OAuth config with id {oauth_config_id} not found"
        )
    return _oauth_config_to_snapshot(oauth_config, db_session)


@admin_router.put("/{oauth_config_id}")
def update_oauth_config_endpoint(
    oauth_config_id: int,
    oauth_data: OAuthConfigUpdate,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_curator_or_admin_user),
) -> OAuthConfigSnapshot:
    """Update an OAuth configuration (admin only)."""
    try:
        updated_config = update_oauth_config(
            oauth_config_id=oauth_config_id,
            db_session=db_session,
            name=oauth_data.name,
            authorization_url=oauth_data.authorization_url,
            token_url=oauth_data.token_url,
            client_id=oauth_data.client_id,
            client_secret=oauth_data.client_secret,
            scopes=oauth_data.scopes,
            additional_params=oauth_data.additional_params,
            clear_client_id=oauth_data.clear_client_id,
            clear_client_secret=oauth_data.clear_client_secret,
        )
        return _oauth_config_to_snapshot(updated_config, db_session)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


@admin_router.delete("/{oauth_config_id}")
def delete_oauth_config_endpoint(
    oauth_config_id: int,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
    """Delete an OAuth configuration (admin only)."""
    try:
        delete_oauth_config(oauth_config_id, db_session)
        return {"message": "OAuth configuration deleted successfully"}
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


"""User endpoints for OAuth flow"""


@router.post("/initiate")
def initiate_oauth_flow(
    request: OAuthInitiateRequest,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> OAuthInitiateResponse:
    """
    Initiate OAuth flow for the current user.

    Returns an authorization URL that the frontend should redirect the user to.
    """
    # Get OAuth config
    oauth_config = get_oauth_config(request.oauth_config_id, db_session)
    if not oauth_config:
        raise HTTPException(
            status_code=404,
            detail=f"OAuth config with id {request.oauth_config_id} not found",
        )

    # Generate state parameter and store in Redis
    state = generate_oauth_state(
        federated_connector_id=request.oauth_config_id,
        user_id=str(user.id),
        redirect_uri=request.return_path,
        additional_data={"oauth_config_id": request.oauth_config_id},
    )

    # Build authorization URL
    redirect_uri = f"{WEB_DOMAIN}/oauth-config/callback"
    authorization_url = OAuthTokenManager.build_authorization_url(
        oauth_config, redirect_uri, state
    )

    return OAuthInitiateResponse(authorization_url=authorization_url, state=state)


@router.post("/callback")
def handle_oauth_callback(
    code: str,
    state: str,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> OAuthCallbackResponse:
    """
    Handle OAuth callback after user authorizes the application.

    Exchanges the authorization code for an access token and stores it.
    Accepts code and state as query parameters (standard OAuth flow).
    """
    try:
        # Verify state and retrieve session data
        session = verify_oauth_state(state)

        # Verify the user_id matches
        if str(user.id) != session.user_id:
            raise HTTPException(
                status_code=403, detail="User mismatch in OAuth callback"
            )

        # Extract oauth_config_id from session (stored during initiate)
        oauth_config_id = session.federated_connector_id

        # Get OAuth config
        oauth_config = get_oauth_config(oauth_config_id, db_session)
        if not oauth_config:
            raise HTTPException(
                status_code=404,
                detail=f"OAuth config with id {oauth_config_id} not found",
            )

        # Exchange code for token
        redirect_uri = f"{WEB_DOMAIN}/oauth-config/callback"
        token_manager = OAuthTokenManager(oauth_config, user.id, db_session)
        token_data = token_manager.exchange_code_for_token(code, redirect_uri)

        # Store token
        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)

        # Return success with redirect
        return_path = session.redirect_uri or "/chat"
        return OAuthCallbackResponse(
            redirect_url=return_path,
        )

    except ValueError as e:
        logger.error(f"OAuth callback error: {e}")
        return OAuthCallbackResponse(
            redirect_url="/chat",
            error=str(e),
        )
    except Exception as e:
        logger.error(f"Unexpected OAuth callback error: {e}")
        return OAuthCallbackResponse(
            redirect_url="/chat",
            error="An unexpected error occurred during OAuth callback",
        )


@router.delete("/{oauth_config_id}/token")
def revoke_oauth_token(
    oauth_config_id: int,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> dict[str, str]:
    """
    Revoke (delete) the current user's OAuth token for a specific OAuth config.
    """
    try:
        delete_user_oauth_token(oauth_config_id, user.id, db_session)
        return {"message": "OAuth token revoked successfully"}
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


================================================
FILE: backend/onyx/server/features/oauth_config/models.py
================================================
from datetime import datetime
from typing import Any

from pydantic import BaseModel


class OAuthConfigCreate(BaseModel):
    name: str
    authorization_url: str
    token_url: str
    client_id: str
    client_secret: str
    scopes: list[str] | None = None
    additional_params: dict[str, Any] | None = None


class OAuthConfigUpdate(BaseModel):
    name: str | None = None
    authorization_url: str | None = None
    token_url: str | None = None
    client_id: str | None = None
    client_secret: str | None = None
    scopes: list[str] | None = None
    additional_params: dict[str, Any] | None = None
    clear_client_id: bool = False
    clear_client_secret: bool = False


class OAuthConfigSnapshot(BaseModel):
    id: int
    name: str
    authorization_url: str
    token_url: str
    scopes: list[str] | None
    has_client_credentials: bool  # NEVER expose actual client_id or client_secret
    tool_count: int  # Number of tools using this config
    created_at: datetime
    updated_at: datetime


class OAuthInitiateRequest(BaseModel):
    oauth_config_id: int
    return_path: str = "/chat"  # Where to redirect after OAuth flow


class OAuthInitiateResponse(BaseModel):
    authorization_url: str  # URL to redirect user to
    state: str  # OAuth state parameter for CSRF protection


class OAuthCallbackResponse(BaseModel):
    redirect_url: str
    error: str | None = None


class OAuthTokenStatus(BaseModel):
    oauth_config_id: int
    oauth_config_name: str
    has_token: bool
    expires_at: int | None  # Unix timestamp
    is_expired: bool


================================================
FILE: backend/onyx/server/features/password/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi_users.exceptions import InvalidPasswordException
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.auth.users import get_user_manager
from onyx.auth.users import User
from onyx.auth.users import UserManager
from onyx.db.engine.sql_engine import get_session
from onyx.db.users import get_user_by_email
from onyx.server.features.password.models import ChangePasswordRequest
from onyx.server.features.password.models import UserResetRequest
from onyx.server.features.password.models import UserResetResponse

router = APIRouter(prefix="/password")


@router.post("/change-password")
async def change_my_password(
    form_data: ChangePasswordRequest,
    user_manager: UserManager = Depends(get_user_manager),
    current_user: User = Depends(current_user),
) -> None:
    """
    Change the password for the current user.
    """
    try:
        await user_manager.change_password_if_old_matches(
            user=current_user,
            old_password=form_data.old_password,
            new_password=form_data.new_password,
        )
    except InvalidPasswordException as e:
        raise HTTPException(status_code=400, detail=str(e.reason))
    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"An unexpected error occurred: {str(e)}"
        )


@router.post("/reset_password")
async def admin_reset_user_password(
    user_reset_request: UserResetRequest,
    user_manager: UserManager = Depends(get_user_manager),
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> UserResetResponse:
    """
    Reset the password for a user (admin only).
    """
    user = get_user_by_email(user_reset_request.user_email, db_session)
    if not user:
        raise HTTPException(status_code=404, detail="User not found")
    new_password = await user_manager.reset_password_as_admin(user.id)
    return UserResetResponse(
        user_id=str(user.id),
        new_password=new_password,
    )


================================================
FILE: backend/onyx/server/features/password/models.py
================================================
from pydantic import BaseModel


class UserResetRequest(BaseModel):
    user_email: str


class UserResetResponse(BaseModel):
    user_id: str
    new_password: str


class ChangePasswordRequest(BaseModel):
    old_password: str
    new_password: str


================================================
FILE: backend/onyx/server/features/persona/__init__.py
================================================


================================================
FILE: backend/onyx/server/features/persona/api.py
================================================
from uuid import UUID

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi import UploadFile
from pydantic import BaseModel
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.auth.users import current_chat_accessible_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_limited_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.persona import create_assistant_label
from onyx.db.persona import create_update_persona
from onyx.db.persona import delete_persona_label
from onyx.db.persona import get_assistant_labels
from onyx.db.persona import get_minimal_persona_snapshots_for_user
from onyx.db.persona import get_minimal_persona_snapshots_paginated
from onyx.db.persona import get_persona_by_id
from onyx.db.persona import get_persona_count_for_user
from onyx.db.persona import get_persona_snapshots_for_user
from onyx.db.persona import get_persona_snapshots_paginated
from onyx.db.persona import mark_persona_as_deleted
from onyx.db.persona import mark_persona_as_not_deleted
from onyx.db.persona import update_persona_featured
from onyx.db.persona import update_persona_label
from onyx.db.persona import update_persona_public_status
from onyx.db.persona import update_persona_shared
from onyx.db.persona import update_persona_visibility
from onyx.db.persona import update_personas_display_priority
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import ChatFileType
from onyx.server.documents.models import PaginatedReturn
from onyx.server.features.persona.constants import ADMIN_AGENTS_RESOURCE
from onyx.server.features.persona.constants import AGENTS_RESOURCE
from onyx.server.features.persona.models import FullPersonaSnapshot
from onyx.server.features.persona.models import MinimalPersonaSnapshot
from onyx.server.features.persona.models import PersonaLabelCreate
from onyx.server.features.persona.models import PersonaLabelResponse
from onyx.server.features.persona.models import PersonaSnapshot
from onyx.server.features.persona.models import PersonaUpsertRequest
from onyx.server.manage.llm.api import get_valid_model_names_for_persona
from onyx.server.models import DisplayPriorityRequest
from onyx.server.settings.store import load_settings
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import mt_cloud_telemetry
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


def _validate_user_knowledge_enabled(
    persona_upsert_request: PersonaUpsertRequest, action: str
) -> None:
    """Check if user knowledge is enabled when user files/projects are provided."""
    settings = load_settings()
    if not settings.user_knowledge_enabled:
        # Only user files are supported going forward; keep getattr for backward compat
        if persona_upsert_request.user_file_ids or getattr(
            persona_upsert_request, "user_project_ids", None
        ):
            raise HTTPException(
                status_code=400,
                detail=f"User Knowledge is disabled. Cannot {action} assistant with user files or projects.",
            )


def _validate_vector_db_knowledge(
    persona_upsert_request: PersonaUpsertRequest,
) -> None:
    """Reject connector-sourced knowledge types when vector DB is disabled.

    document_sets, hierarchy_nodes, and attached_documents all depend on
    the vector DB for search filtering. user_files are still allowed because
    they use the FileReaderTool path instead.
    """
    if not DISABLE_VECTOR_DB:
        return

    if persona_upsert_request.document_set_ids:
        raise HTTPException(
            status_code=400,
            detail=(
                "Cannot attach document sets to an assistant when the vector database is disabled (DISABLE_VECTOR_DB is set)."
            ),
        )
    if persona_upsert_request.hierarchy_node_ids:
        raise HTTPException(
            status_code=400,
            detail=(
                "Cannot attach hierarchy nodes to an assistant when the vector database is disabled (DISABLE_VECTOR_DB is set)."
            ),
        )
    if persona_upsert_request.document_ids:
        raise HTTPException(
            status_code=400,
            detail=(
                "Cannot attach documents to an assistant when the vector database is disabled (DISABLE_VECTOR_DB is set)."
            ),
        )


admin_router = APIRouter(prefix="/admin/persona")
basic_router = APIRouter(prefix="/persona")

# NOTE: Users know this functionality as "agents", so we want to start moving
# nomenclature of these REST resources to match that.
admin_agents_router = APIRouter(prefix=ADMIN_AGENTS_RESOURCE)
agents_router = APIRouter(prefix=AGENTS_RESOURCE)


class IsListedRequest(BaseModel):
    is_listed: bool


class IsPublicRequest(BaseModel):
    is_public: bool


class IsFeaturedRequest(BaseModel):
    is_featured: bool


@admin_router.patch("/{persona_id}/listed")
def patch_persona_visibility(
    persona_id: int,
    is_listed_request: IsListedRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_persona_visibility(
        persona_id=persona_id,
        is_listed=is_listed_request.is_listed,
        db_session=db_session,
        user=user,
    )


@basic_router.patch("/{persona_id}/public")
def patch_user_persona_public_status(
    persona_id: int,
    is_public_request: IsPublicRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    try:
        update_persona_public_status(
            persona_id=persona_id,
            is_public=is_public_request.is_public,
            db_session=db_session,
            user=user,
        )
    except ValueError as e:
        logger.exception("Failed to update persona public status")
        raise HTTPException(status_code=403, detail=str(e))


@admin_router.patch("/{persona_id}/featured")
def patch_persona_featured_status(
    persona_id: int,
    is_featured_request: IsFeaturedRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    try:
        update_persona_featured(
            persona_id=persona_id,
            is_featured=is_featured_request.is_featured,
            db_session=db_session,
            user=user,
        )
    except ValueError as e:
        logger.exception("Failed to update persona featured status")
        raise HTTPException(status_code=403, detail=str(e))


@admin_agents_router.patch("/display-priorities")
def patch_agents_display_priorities(
    display_priority_request: DisplayPriorityRequest,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    try:
        update_personas_display_priority(
            display_priority_map=display_priority_request.display_priority_map,
            db_session=db_session,
            user=user,
            commit_db_txn=True,
        )
    except ValueError as e:
        logger.exception("Failed to update agent display priorities.")
        raise HTTPException(status_code=403, detail=str(e))


@admin_router.get("", tags=PUBLIC_API_TAGS)
def list_personas_admin(
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
    include_deleted: bool = False,
    get_editable: bool = Query(False, description="If true, return editable personas"),
) -> list[PersonaSnapshot]:
    return get_persona_snapshots_for_user(
        user=user,
        db_session=db_session,
        get_editable=get_editable,
        include_deleted=include_deleted,
    )


@admin_agents_router.get("", tags=PUBLIC_API_TAGS)
def get_agents_admin_paginated(
    page_num: int = Query(0, ge=0, description="Page number (0-indexed)."),
    page_size: int = Query(10, ge=1, le=1000, description="Items per page."),
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
    include_deleted: bool = Query(
        False, description="If true, includes deleted personas."
    ),
    get_editable: bool = Query(
        False, description="If true, only returns editable personas."
    ),
    include_default: bool = Query(
        True, description="If true, includes builtin/default personas."
    ),
) -> PaginatedReturn[PersonaSnapshot]:
    """Paginated endpoint for listing agents (formerly personas) (admin view).

    Returns items for the requested page plus total count.
    Agents are ordered by display_priority (ASC, nulls last) then by ID (ASC).
    """
    agents = get_persona_snapshots_paginated(
        user=user,
        db_session=db_session,
        page_num=page_num,
        page_size=page_size,
        get_editable=get_editable,
        include_default=include_default,
        include_deleted=include_deleted,
    )

    total_count = get_persona_count_for_user(
        user=user,
        db_session=db_session,
        get_editable=get_editable,
        include_default=include_default,
        include_deleted=include_deleted,
    )

    return PaginatedReturn(
        items=agents,
        total_items=total_count,
    )


@admin_router.patch("/{persona_id}/undelete", tags=PUBLIC_API_TAGS)
def undelete_persona(
    persona_id: int,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    mark_persona_as_not_deleted(
        persona_id=persona_id,
        user=user,
        db_session=db_session,
    )


# used for assistant profile pictures
@admin_router.post("/upload-image")
def upload_file(
    file: UploadFile,
    _: User = Depends(current_user),
) -> dict[str, str]:
    file_store = get_default_file_store()
    file_type = ChatFileType.IMAGE
    file_id = file_store.save_file(
        content=file.file,
        display_name=file.filename,
        file_origin=FileOrigin.CHAT_UPLOAD,
        file_type=file.content_type or file_type.value,
    )
    return {"file_id": file_id}


"""Endpoints for all"""


@basic_router.post("", tags=PUBLIC_API_TAGS)
def create_persona(
    persona_upsert_request: PersonaUpsertRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> PersonaSnapshot:
    tenant_id = get_current_tenant_id()

    _validate_user_knowledge_enabled(persona_upsert_request, "create")
    _validate_vector_db_knowledge(persona_upsert_request)

    persona_snapshot = create_update_persona(
        persona_id=None,
        create_persona_request=persona_upsert_request,
        user=user,
        db_session=db_session,
    )
    mt_cloud_telemetry(
        tenant_id=tenant_id,
        distinct_id=str(user.id),
        event=MilestoneRecordType.CREATED_ASSISTANT,
    )

    return persona_snapshot


# NOTE: This endpoint cannot update persona configuration options that
# are core to the persona, such as its display priority and
# whether or not the assistant is a built-in / default assistant
@basic_router.patch("/{persona_id}", tags=PUBLIC_API_TAGS)
def update_persona(
    persona_id: int,
    persona_upsert_request: PersonaUpsertRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> PersonaSnapshot:
    _validate_user_knowledge_enabled(persona_upsert_request, "update")
    _validate_vector_db_knowledge(persona_upsert_request)

    persona_snapshot = create_update_persona(
        persona_id=persona_id,
        create_persona_request=persona_upsert_request,
        user=user,
        db_session=db_session,
    )
    return persona_snapshot


class PersonaLabelPatchRequest(BaseModel):
    label_name: str


@basic_router.get("/labels")
def get_labels(
    db: Session = Depends(get_session),
    _: User = Depends(current_user),
) -> list[PersonaLabelResponse]:
    return [
        PersonaLabelResponse.from_model(label)
        for label in get_assistant_labels(db_session=db)
    ]


@basic_router.post("/labels")
def create_label(
    label: PersonaLabelCreate,
    db: Session = Depends(get_session),
    _: User = Depends(current_user),
) -> PersonaLabelResponse:
    """Create a new assistant label"""
    try:
        label_model = create_assistant_label(name=label.name, db_session=db)
        return PersonaLabelResponse.from_model(label_model)
    except IntegrityError:
        raise HTTPException(
            status_code=400,
            detail=f"Label with name '{label.name}' already exists. Please choose a different name.",
        )


@admin_router.patch("/label/{label_id}")
def patch_persona_label(
    label_id: int,
    persona_label_patch_request: PersonaLabelPatchRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_persona_label(
        label_id=label_id,
        label_name=persona_label_patch_request.label_name,
        db_session=db_session,
    )


@admin_router.delete("/label/{label_id}")
def delete_label(
    label_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    delete_persona_label(label_id=label_id, db_session=db_session)


class PersonaShareRequest(BaseModel):
    user_ids: list[UUID] | None = None
    group_ids: list[int] | None = None
    is_public: bool | None = None
    label_ids: list[int] | None = None


# We notify each user when a user is shared with them
@basic_router.patch("/{persona_id}/share")
def share_persona(
    persona_id: int,
    persona_share_request: PersonaShareRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    try:
        update_persona_shared(
            persona_id=persona_id,
            user=user,
            db_session=db_session,
            user_ids=persona_share_request.user_ids,
            group_ids=persona_share_request.group_ids,
            is_public=persona_share_request.is_public,
            label_ids=persona_share_request.label_ids,
        )
    except PermissionError as e:
        logger.exception("Failed to share persona")
        raise HTTPException(status_code=403, detail=str(e))
    except ValueError as e:
        logger.exception("Failed to share persona")
        raise HTTPException(status_code=400, detail=str(e))


@basic_router.delete("/{persona_id}", tags=PUBLIC_API_TAGS)
def delete_persona(
    persona_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    mark_persona_as_deleted(
        persona_id=persona_id,
        user=user,
        db_session=db_session,
    )


@basic_router.get("")
def list_personas(
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
    include_deleted: bool = False,
    persona_ids: list[int] = Query(None),
) -> list[MinimalPersonaSnapshot]:
    personas = get_minimal_persona_snapshots_for_user(
        user=user,
        include_deleted=include_deleted,
        db_session=db_session,
        get_editable=False,
    )

    if persona_ids:
        personas = [p for p in personas if p.id in persona_ids]

    return personas


@agents_router.get("", tags=PUBLIC_API_TAGS)
def get_agents_paginated(
    page_num: int = Query(0, ge=0, description="Page number (0-indexed)."),
    page_size: int = Query(10, ge=1, le=1000, description="Items per page."),
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
    include_deleted: bool = Query(
        False, description="If true, includes deleted personas."
    ),
    get_editable: bool = Query(
        False, description="If true, only returns editable personas."
    ),
    include_default: bool = Query(
        True, description="If true, includes builtin/default personas."
    ),
) -> PaginatedReturn[MinimalPersonaSnapshot]:
    """Paginated endpoint for listing agents available to the user.

    Returns items for the requested page plus total count.
    Personas are ordered by display_priority (ASC, nulls last) then by ID (ASC).

    NOTE: persona_ids filter is not supported with pagination. Use the
    non-paginated endpoint if filtering by specific IDs is needed.
    """
    agents = get_minimal_persona_snapshots_paginated(
        user=user,
        db_session=db_session,
        page_num=page_num,
        page_size=page_size,
        get_editable=get_editable,
        include_default=include_default,
        include_deleted=include_deleted,
    )

    total_count = get_persona_count_for_user(
        user=user,
        db_session=db_session,
        get_editable=get_editable,
        include_default=include_default,
        include_deleted=include_deleted,
    )

    return PaginatedReturn(
        items=agents,
        total_items=total_count,
    )


@basic_router.get("/{persona_id}", tags=PUBLIC_API_TAGS)
def get_persona(
    persona_id: int,
    user: User = Depends(current_limited_user),
    db_session: Session = Depends(get_session),
) -> FullPersonaSnapshot:
    persona = get_persona_by_id(
        persona_id=persona_id,
        user=user,
        db_session=db_session,
        is_for_edit=False,
    )

    # Validate and fix default model if it's no longer valid for this persona's restrictions
    if persona.llm_model_version_override:
        valid_models = get_valid_model_names_for_persona(persona_id, user, db_session)

        # If current default model is not in the valid list, update to first valid or None
        if persona.llm_model_version_override not in valid_models:
            persona.llm_model_version_override = (
                valid_models[0] if valid_models else None
            )
            db_session.commit()

    return FullPersonaSnapshot.from_model(persona)


================================================
FILE: backend/onyx/server/features/persona/constants.py
================================================
# NOTE: Users know this functionality as "agents", so we want to start moving
# nomenclature of these REST resources to match that.
ADMIN_AGENTS_RESOURCE = "/admin/agents"
AGENTS_RESOURCE = "/agents"


================================================
FILE: backend/onyx/server/features/persona/models.py
================================================
from datetime import datetime
from uuid import UUID

from pydantic import BaseModel
from pydantic import Field

from onyx.configs.constants import DocumentSource
from onyx.db.enums import HierarchyNodeType
from onyx.db.models import Document
from onyx.db.models import HierarchyNode
from onyx.db.models import Persona
from onyx.db.models import PersonaLabel
from onyx.db.models import StarterMessage
from onyx.server.features.document_set.models import DocumentSetSummary
from onyx.server.features.tool.models import ToolSnapshot
from onyx.server.features.tool.tool_visibility import should_expose_tool_to_fe
from onyx.server.models import MinimalUserSnapshot
from onyx.utils.logger import setup_logger


logger = setup_logger()


class HierarchyNodeSnapshot(BaseModel):
    """Minimal representation of a hierarchy node for persona responses."""

    id: int
    raw_node_id: str
    display_name: str
    link: str | None
    source: DocumentSource
    node_type: HierarchyNodeType

    @classmethod
    def from_model(cls, node: HierarchyNode) -> "HierarchyNodeSnapshot":
        return HierarchyNodeSnapshot(
            id=node.id,
            raw_node_id=node.raw_node_id,
            display_name=node.display_name,
            link=node.link,
            source=node.source,
            node_type=node.node_type,
        )


class AttachedDocumentSnapshot(BaseModel):
    """Minimal representation of an attached document for persona responses."""

    id: str
    title: str
    link: str | None
    parent_id: int | None
    last_modified: datetime | None
    last_synced: datetime | None
    source: DocumentSource | None

    @classmethod
    def from_model(cls, doc: Document) -> "AttachedDocumentSnapshot":
        return AttachedDocumentSnapshot(
            id=doc.id,
            title=doc.semantic_id,
            link=doc.link,
            parent_id=doc.parent_hierarchy_node_id,
            last_modified=doc.doc_updated_at,
            last_synced=doc.last_synced,
            source=(
                doc.parent_hierarchy_node.source if doc.parent_hierarchy_node else None
            ),  # TODO(evan) we really should just store this in the document table directly
        )


class PromptSnapshot(BaseModel):
    id: int
    name: str
    description: str
    system_prompt: str
    task_prompt: str
    datetime_aware: bool
    # Not including persona info, not needed

    @classmethod
    def from_model(cls, persona: Persona) -> "PromptSnapshot":
        """Create PromptSnapshot from persona's embedded prompt fields"""
        if persona.deleted:
            raise ValueError("Persona has been deleted")

        return PromptSnapshot(
            id=persona.id,
            name=persona.name,
            description=persona.description,
            system_prompt=persona.system_prompt or "",
            task_prompt=persona.task_prompt or "",
            datetime_aware=persona.datetime_aware,
        )


# More minimal request for generating a persona prompt
class GenerateStarterMessageRequest(BaseModel):
    name: str
    description: str
    instructions: str
    document_set_ids: list[int]
    generation_count: int


class PersonaUpsertRequest(BaseModel):
    name: str
    description: str
    document_set_ids: list[int]
    is_public: bool
    llm_model_provider_override: str | None = None
    llm_model_version_override: str | None = None
    starter_messages: list[StarterMessage] | None = None
    # For Private Personas, who should be able to access these
    users: list[UUID] = Field(default_factory=list)
    groups: list[int] = Field(default_factory=list)
    # e.g. ID of SearchTool or ImageGenerationTool or <USER_DEFINED_TOOL>
    tool_ids: list[int]
    remove_image: bool | None = None
    uploaded_image_id: str | None = None  # New field for uploaded image
    icon_name: str | None = (
        None  # New field that is custom chosen during agent creation/editing
    )
    search_start_date: datetime | None = None
    label_ids: list[int] | None = None
    is_featured: bool = False
    display_priority: int | None = None
    # Accept string UUIDs from frontend
    user_file_ids: list[str] | None = None
    # Hierarchy nodes (folders, spaces, channels) attached for scoped search
    hierarchy_node_ids: list[int] = Field(default_factory=list)
    # Individual documents attached for scoped search
    document_ids: list[str] = Field(default_factory=list)

    # prompt fields
    system_prompt: str
    replace_base_system_prompt: bool = False
    task_prompt: str
    datetime_aware: bool


class MinimalPersonaSnapshot(BaseModel):
    """Minimal persona model optimized for ChatPage.tsx - only includes fields actually used"""

    # Core fields used by ChatPage
    id: int
    name: str
    description: str
    # Used for retrieval capability checking
    tools: list[ToolSnapshot]
    starter_messages: list[StarterMessage] | None

    # only show document sets in the UI that the assistant has access to
    document_sets: list[DocumentSetSummary]
    # Counts for knowledge sources (used to determine if search tool should be enabled)
    hierarchy_node_count: int
    attached_document_count: int
    # Unique sources from all knowledge (document sets + hierarchy nodes)
    # Used to populate source filters in chat
    knowledge_sources: list[DocumentSource]
    llm_model_version_override: str | None
    llm_model_provider_override: str | None

    uploaded_image_id: str | None
    icon_name: str | None

    is_public: bool
    is_listed: bool
    display_priority: int | None
    is_featured: bool
    builtin_persona: bool

    # Used for filtering
    labels: list["PersonaLabelSnapshot"]

    # Used to display ownership
    owner: MinimalUserSnapshot | None

    @classmethod
    def from_model(cls, persona: Persona) -> "MinimalPersonaSnapshot":
        # Collect unique sources from document sets, hierarchy nodes, and attached documents
        sources: set[DocumentSource] = set()

        # Sources from document sets
        for doc_set in persona.document_sets:
            for cc_pair in doc_set.connector_credential_pairs:
                sources.add(cc_pair.connector.source)

        # Sources from hierarchy nodes
        for node in persona.hierarchy_nodes:
            sources.add(node.source)

        # Sources from attached documents (via their parent hierarchy node)
        for doc in persona.attached_documents:
            if doc.parent_hierarchy_node:
                sources.add(doc.parent_hierarchy_node.source)

        return MinimalPersonaSnapshot(
            # Core fields actually used by ChatPage
            id=persona.id,
            name=persona.name,
            description=persona.description,
            tools=[
                ToolSnapshot.from_model(tool)
                for tool in persona.tools
                if should_expose_tool_to_fe(tool)
            ],
            starter_messages=persona.starter_messages,
            document_sets=[
                DocumentSetSummary.from_model(document_set)
                for document_set in persona.document_sets
            ],
            hierarchy_node_count=len(persona.hierarchy_nodes),
            attached_document_count=len(persona.attached_documents),
            knowledge_sources=list(sources),
            llm_model_version_override=persona.llm_model_version_override,
            llm_model_provider_override=persona.llm_model_provider_override,
            uploaded_image_id=persona.uploaded_image_id,
            icon_name=persona.icon_name,
            is_public=persona.is_public,
            is_listed=persona.is_listed,
            display_priority=persona.display_priority,
            is_featured=persona.is_featured,
            builtin_persona=persona.builtin_persona,
            labels=[PersonaLabelSnapshot.from_model(label) for label in persona.labels],
            owner=(
                MinimalUserSnapshot(id=persona.user.id, email=persona.user.email)
                if persona.user
                else None
            ),
        )


class PersonaSnapshot(BaseModel):
    id: int
    name: str
    description: str
    is_public: bool
    is_listed: bool
    uploaded_image_id: str | None
    icon_name: str | None
    # Return string UUIDs to frontend for consistency
    user_file_ids: list[str]
    display_priority: int | None
    is_featured: bool
    builtin_persona: bool
    starter_messages: list[StarterMessage] | None
    tools: list[ToolSnapshot]
    labels: list["PersonaLabelSnapshot"]
    owner: MinimalUserSnapshot | None
    users: list[MinimalUserSnapshot]
    groups: list[int]
    document_sets: list[DocumentSetSummary]
    llm_model_provider_override: str | None
    llm_model_version_override: str | None
    # Hierarchy nodes attached for scoped search
    hierarchy_nodes: list[HierarchyNodeSnapshot] = Field(default_factory=list)
    # Individual documents attached for scoped search
    attached_documents: list[AttachedDocumentSnapshot] = Field(default_factory=list)

    # Embedded prompt fields (no longer separate prompt_ids)
    system_prompt: str | None = None
    replace_base_system_prompt: bool = False
    task_prompt: str | None = None
    datetime_aware: bool = True

    @classmethod
    def from_model(cls, persona: Persona) -> "PersonaSnapshot":
        return PersonaSnapshot(
            id=persona.id,
            name=persona.name,
            description=persona.description,
            is_public=persona.is_public,
            is_listed=persona.is_listed,
            uploaded_image_id=persona.uploaded_image_id,
            icon_name=persona.icon_name,
            user_file_ids=[str(file.id) for file in persona.user_files],
            display_priority=persona.display_priority,
            is_featured=persona.is_featured,
            builtin_persona=persona.builtin_persona,
            starter_messages=persona.starter_messages,
            tools=[
                ToolSnapshot.from_model(tool)
                for tool in persona.tools
                if should_expose_tool_to_fe(tool)
            ],
            labels=[PersonaLabelSnapshot.from_model(label) for label in persona.labels],
            hierarchy_nodes=[
                HierarchyNodeSnapshot.from_model(node)
                for node in persona.hierarchy_nodes
            ],
            attached_documents=[
                AttachedDocumentSnapshot.from_model(doc)
                for doc in persona.attached_documents
            ],
            owner=(
                MinimalUserSnapshot(id=persona.user.id, email=persona.user.email)
                if persona.user
                else None
            ),
            users=[
                MinimalUserSnapshot(id=user.id, email=user.email)
                for user in persona.users
            ],
            groups=[user_group.id for user_group in persona.groups],
            document_sets=[
                DocumentSetSummary.from_model(document_set_model)
                for document_set_model in persona.document_sets
            ],
            llm_model_provider_override=persona.llm_model_provider_override,
            llm_model_version_override=persona.llm_model_version_override,
            system_prompt=persona.system_prompt,
            replace_base_system_prompt=persona.replace_base_system_prompt,
            task_prompt=persona.task_prompt,
            datetime_aware=persona.datetime_aware,
        )


# Model with full context on persona's internal settings
# This is used for flows which need to know all settings
class FullPersonaSnapshot(PersonaSnapshot):
    search_start_date: datetime | None = None

    @classmethod
    def from_model(
        cls, persona: Persona, allow_deleted: bool = False
    ) -> "FullPersonaSnapshot":
        if persona.deleted:
            error_msg = f"Persona with ID {persona.id} has been deleted"
            if not allow_deleted:
                raise ValueError(error_msg)
            else:
                logger.warning(error_msg)

        return FullPersonaSnapshot(
            id=persona.id,
            name=persona.name,
            description=persona.description,
            is_public=persona.is_public,
            is_listed=persona.is_listed,
            uploaded_image_id=persona.uploaded_image_id,
            icon_name=persona.icon_name,
            user_file_ids=[str(file.id) for file in persona.user_files],
            display_priority=persona.display_priority,
            is_featured=persona.is_featured,
            builtin_persona=persona.builtin_persona,
            starter_messages=persona.starter_messages,
            users=[
                MinimalUserSnapshot(id=user.id, email=user.email)
                for user in persona.users
            ],
            groups=[user_group.id for user_group in persona.groups],
            tools=[
                ToolSnapshot.from_model(tool)
                for tool in persona.tools
                if should_expose_tool_to_fe(tool)
            ],
            labels=[PersonaLabelSnapshot.from_model(label) for label in persona.labels],
            hierarchy_nodes=[
                HierarchyNodeSnapshot.from_model(node)
                for node in persona.hierarchy_nodes
            ],
            attached_documents=[
                AttachedDocumentSnapshot.from_model(doc)
                for doc in persona.attached_documents
            ],
            owner=(
                MinimalUserSnapshot(id=persona.user.id, email=persona.user.email)
                if persona.user
                else None
            ),
            document_sets=[
                DocumentSetSummary.from_model(document_set_model)
                for document_set_model in persona.document_sets
            ],
            search_start_date=persona.search_start_date,
            llm_model_provider_override=persona.llm_model_provider_override,
            llm_model_version_override=persona.llm_model_version_override,
            system_prompt=persona.system_prompt,
            replace_base_system_prompt=persona.replace_base_system_prompt,
            task_prompt=persona.task_prompt,
            datetime_aware=persona.datetime_aware,
        )


class PromptTemplateResponse(BaseModel):
    final_prompt_template: str


class PersonaSharedNotificationData(BaseModel):
    persona_id: int


class ImageGenerationToolStatus(BaseModel):
    is_available: bool


class PersonaLabelCreate(BaseModel):
    name: str


class PersonaLabelResponse(BaseModel):
    id: int
    name: str

    @classmethod
    def from_model(cls, category: PersonaLabel) -> "PersonaLabelResponse":
        return PersonaLabelResponse(
            id=category.id,
            name=category.name,
        )


class PersonaLabelSnapshot(BaseModel):
    id: int
    name: str

    @classmethod
    def from_model(cls, label: PersonaLabel) -> "PersonaLabelSnapshot":
        return PersonaLabelSnapshot(
            id=label.id,
            name=label.name,
        )


================================================
FILE: backend/onyx/server/features/projects/api.py
================================================
import json
from uuid import UUID

from fastapi import APIRouter
from fastapi import BackgroundTasks
from fastapi import Depends
from fastapi import File
from fastapi import Form
from fastapi import HTTPException
from fastapi import Response
from fastapi import UploadFile
from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import UserFileStatus
from onyx.db.models import ChatSession
from onyx.db.models import Project__UserFile
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserProject
from onyx.db.persona import get_personas_by_ids
from onyx.db.projects import get_project_token_count
from onyx.db.projects import upload_files_to_user_files_with_indexing
from onyx.server.features.projects.models import CategorizedFilesSnapshot
from onyx.server.features.projects.models import ChatSessionRequest
from onyx.server.features.projects.models import TokenCountResponse
from onyx.server.features.projects.models import UserFileSnapshot
from onyx.server.features.projects.models import UserProjectSnapshot
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


router = APIRouter(prefix="/user/projects")


class UserFileDeleteResult(BaseModel):
    has_associations: bool
    project_names: list[str] = []
    assistant_names: list[str] = []


def _trigger_user_file_project_sync(
    user_file_id: UUID,
    tenant_id: str,
    background_tasks: BackgroundTasks | None = None,
) -> None:
    if DISABLE_VECTOR_DB and background_tasks is not None:
        from onyx.background.task_utils import drain_project_sync_loop

        background_tasks.add_task(drain_project_sync_loop, tenant_id)
        logger.info(f"Queued in-process project sync for user_file_id={user_file_id}")
        return

    from onyx.background.celery.tasks.user_file_processing.tasks import (
        enqueue_user_file_project_sync_task,
    )
    from onyx.background.celery.tasks.user_file_processing.tasks import (
        get_user_file_project_sync_queue_depth,
    )
    from onyx.background.celery.versioned_apps.client import app as client_app
    from onyx.redis.redis_pool import get_redis_client

    queue_depth = get_user_file_project_sync_queue_depth(client_app)
    if queue_depth > USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH:
        logger.warning(
            f"Skipping immediate project sync for user_file_id={user_file_id} due to "
            f"queue depth {queue_depth}>{USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH}. "
            "It will be picked up by beat later."
        )
        return

    redis_client = get_redis_client(tenant_id=tenant_id)
    enqueued = enqueue_user_file_project_sync_task(
        celery_app=client_app,
        redis_client=redis_client,
        user_file_id=user_file_id,
        tenant_id=tenant_id,
        priority=OnyxCeleryPriority.HIGHEST,
    )
    if not enqueued:
        logger.info(
            f"Skipped duplicate project sync enqueue for user_file_id={user_file_id}"
        )
        return

    logger.info(f"Triggered project sync for user_file_id={user_file_id}")


@router.get("", tags=PUBLIC_API_TAGS)
def get_projects(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[UserProjectSnapshot]:
    user_id = user.id
    projects = (
        db_session.query(UserProject).filter(UserProject.user_id == user_id).all()
    )
    return [UserProjectSnapshot.from_model(project) for project in projects]


@router.post("/create", tags=PUBLIC_API_TAGS)
def create_project(
    name: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UserProjectSnapshot:
    if name == "":
        raise HTTPException(status_code=400, detail="Project name cannot be empty")
    user_id = user.id
    project = UserProject(name=name, user_id=user_id)
    db_session.add(project)
    db_session.commit()
    return UserProjectSnapshot.from_model(project)


@router.post("/file/upload", tags=PUBLIC_API_TAGS)
def upload_user_files(
    bg_tasks: BackgroundTasks,
    files: list[UploadFile] = File(...),
    project_id: int | None = Form(None),
    temp_id_map: str | None = Form(None),  # JSON string mapping hashed key -> temp_id
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> CategorizedFilesSnapshot:
    try:
        parsed_temp_id_map: dict[str, str] | None = None
        if temp_id_map:
            try:
                parsed = json.loads(temp_id_map)
                if isinstance(parsed, dict):
                    # Ensure all keys/values are strings
                    parsed_temp_id_map = {str(k): str(v) for k, v in parsed.items()}
                else:
                    parsed_temp_id_map = None
            except json.JSONDecodeError:
                parsed_temp_id_map = None

        # Use our consolidated function that handles indexing properly
        categorized_files_result = upload_files_to_user_files_with_indexing(
            files=files,
            project_id=project_id,
            user=user,
            temp_id_map=parsed_temp_id_map,
            db_session=db_session,
            background_tasks=bg_tasks if DISABLE_VECTOR_DB else None,
        )

        return CategorizedFilesSnapshot.from_result(categorized_files_result)

    except Exception as e:
        logger.exception(f"Error uploading files - {type(e).__name__}: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail="Failed to upload files. Please try again or contact support if the issue persists.",
        )


@router.get("/{project_id}", tags=PUBLIC_API_TAGS)
def get_project(
    project_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UserProjectSnapshot:
    user_id = user.id
    project = (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
        .one_or_none()
    )
    if project is None:
        raise HTTPException(status_code=404, detail="Project not found")
    return UserProjectSnapshot.from_model(project)


@router.get("/files/{project_id}", tags=PUBLIC_API_TAGS)
def get_files_in_project(
    project_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
    user_id = user.id
    user_files = (
        db_session.query(UserFile)
        .join(Project__UserFile, UserFile.id == Project__UserFile.user_file_id)
        .filter(
            Project__UserFile.project_id == project_id,
            UserFile.user_id == user_id,
            UserFile.status != UserFileStatus.FAILED,
        )
        .order_by(Project__UserFile.created_at.desc())
        .all()
    )
    return [UserFileSnapshot.from_model(user_file) for user_file in user_files]


@router.delete("/{project_id}/files/{file_id}", tags=PUBLIC_API_TAGS)
def unlink_user_file_from_project(
    project_id: int,
    file_id: UUID,
    bg_tasks: BackgroundTasks,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """Unlink an existing user file from a specific project for the current user.

    Does not delete the underlying file; only removes the association.
    """
    user_id = user.id
    project = (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
        .one_or_none()
    )
    if project is None:
        raise HTTPException(status_code=404, detail="Project not found")

    user_file = (
        db_session.query(UserFile)
        .filter(UserFile.id == file_id, UserFile.user_id == user_id)
        .one_or_none()
    )
    if user_file is None:
        raise HTTPException(status_code=404, detail="File not found")

    # Remove the association if it exists
    if user_file in project.user_files:
        project.user_files.remove(user_file)
        user_file.needs_project_sync = True
        db_session.commit()

    tenant_id = get_current_tenant_id()
    _trigger_user_file_project_sync(user_file.id, tenant_id, bg_tasks)

    return Response(status_code=204)


@router.post(
    "/{project_id}/files/{file_id}",
    response_model=UserFileSnapshot,
    tags=PUBLIC_API_TAGS,
)
def link_user_file_to_project(
    project_id: int,
    file_id: UUID,
    bg_tasks: BackgroundTasks,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UserFileSnapshot:
    """Link an existing user file to a specific project for the current user.

    Creates the association in the Project__UserFile join table if it does not exist.
    Returns the linked user file snapshot.
    """
    user_id = user.id
    project = (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
        .one_or_none()
    )
    if project is None:
        raise HTTPException(status_code=404, detail="Project not found")

    user_file = (
        db_session.query(UserFile)
        .filter(UserFile.id == file_id, UserFile.user_id == user_id)
        .one_or_none()
    )
    if user_file is None:
        raise HTTPException(status_code=404, detail="File not found")

    if user_file not in project.user_files:
        user_file.needs_project_sync = True
        project.user_files.append(user_file)
        db_session.commit()

    tenant_id = get_current_tenant_id()
    _trigger_user_file_project_sync(user_file.id, tenant_id, bg_tasks)

    return UserFileSnapshot.from_model(user_file)


class ProjectInstructionsResponse(BaseModel):
    instructions: str | None


@router.get(
    "/{project_id}/instructions",
    response_model=ProjectInstructionsResponse,
    tags=PUBLIC_API_TAGS,
)
def get_project_instructions(
    project_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> ProjectInstructionsResponse:
    user_id = user.id
    project = (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
        .one_or_none()
    )

    if project is None:
        raise HTTPException(status_code=404, detail="Project not found")

    return ProjectInstructionsResponse(instructions=project.instructions)


class UpsertProjectInstructionsRequest(BaseModel):
    instructions: str


@router.post(
    "/{project_id}/instructions",
    response_model=ProjectInstructionsResponse,
    tags=PUBLIC_API_TAGS,
)
def upsert_project_instructions(
    project_id: int,
    body: UpsertProjectInstructionsRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> ProjectInstructionsResponse:
    """Create or update this project's instructions stored on the project itself."""
    # Ensure the project exists and belongs to the user
    user_id = user.id
    project = (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
        .one_or_none()
    )
    if project is None:
        raise HTTPException(status_code=404, detail="Project not found")
    project.instructions = body.instructions

    db_session.commit()
    db_session.refresh(project)
    return ProjectInstructionsResponse(instructions=project.instructions)


class ProjectPayload(BaseModel):
    project: UserProjectSnapshot
    files: list[UserFileSnapshot] | None = None
    persona_id_to_is_featured: dict[int, bool] | None = None


@router.get(
    "/{project_id}/details", response_model=ProjectPayload, tags=PUBLIC_API_TAGS
)
def get_project_details(
    project_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> ProjectPayload:
    project = get_project(project_id, user, db_session)
    files = get_files_in_project(project_id, user, db_session)
    persona_ids = [
        session.persona_id
        for session in project.chat_sessions
        if session.persona_id is not None
    ]
    personas = get_personas_by_ids(persona_ids, db_session)
    persona_id_to_is_featured = {
        persona.id: persona.is_featured for persona in personas
    }
    return ProjectPayload(
        project=project,
        files=files,
        persona_id_to_is_featured=persona_id_to_is_featured,
    )


class UpdateProjectRequest(BaseModel):
    name: str | None = None
    description: str | None = None


@router.patch("/{project_id}", response_model=UserProjectSnapshot, tags=PUBLIC_API_TAGS)
def update_project(
    project_id: int,
    body: UpdateProjectRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UserProjectSnapshot:
    user_id = user.id
    project = (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
        .one_or_none()
    )
    if project is None:
        raise HTTPException(status_code=404, detail="Project not found")

    if body.name is not None:
        project.name = body.name
    if body.description is not None:
        project.description = body.description

    db_session.commit()
    db_session.refresh(project)
    return UserProjectSnapshot.from_model(project)


@router.delete("/{project_id}", tags=PUBLIC_API_TAGS)
def delete_project(
    project_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    user_id = user.id
    project = (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
        .one_or_none()
    )
    if project is None:
        raise HTTPException(status_code=404, detail="Project not found")

    # Unlink chat sessions from this project
    for chat in project.chat_sessions:
        chat.project_id = None

    # Unlink many-to-many user files association (Project__UserFile)
    for uf in list(project.user_files):
        project.user_files.remove(uf)

    db_session.delete(project)
    db_session.commit()
    return Response(status_code=204)


@router.delete("/file/{file_id}", tags=PUBLIC_API_TAGS)
def delete_user_file(
    file_id: UUID,
    bg_tasks: BackgroundTasks,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UserFileDeleteResult:
    """Delete a user file belonging to the current user.

    This will also remove any project associations for the file.
    """
    user_id = user.id
    user_file = (
        db_session.query(UserFile)
        .filter(UserFile.id == file_id, UserFile.user_id == user_id)
        .one_or_none()
    )
    if user_file is None:
        raise HTTPException(status_code=404, detail="File not found")

    # Check associations with projects and assistants (personas)
    project_names = [project.name for project in user_file.projects]
    assistant_names = [assistant.name for assistant in user_file.assistants]

    if len(project_names) > 0 or len(assistant_names) > 0:
        return UserFileDeleteResult(
            has_associations=True,
            project_names=project_names,
            assistant_names=assistant_names,
        )

    # No associations found; mark as DELETING and enqueue delete task
    user_file.status = UserFileStatus.DELETING
    db_session.commit()

    tenant_id = get_current_tenant_id()
    if DISABLE_VECTOR_DB:
        from onyx.background.task_utils import drain_delete_loop

        bg_tasks.add_task(drain_delete_loop, tenant_id)
        logger.info(f"Queued in-process delete for user_file_id={user_file.id}")
    else:
        from onyx.background.celery.versioned_apps.client import app as client_app

        task = client_app.send_task(
            OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
            kwargs={"user_file_id": str(user_file.id), "tenant_id": tenant_id},
            queue=OnyxCeleryQueues.USER_FILE_DELETE,
            priority=OnyxCeleryPriority.HIGH,
        )
        logger.info(
            f"Triggered delete for user_file_id={user_file.id} with task_id={task.id}"
        )

    return UserFileDeleteResult(
        has_associations=False, project_names=[], assistant_names=[]
    )


@router.get("/file/{file_id}", response_model=UserFileSnapshot, tags=PUBLIC_API_TAGS)
def get_user_file(
    file_id: UUID,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UserFileSnapshot:
    """Fetch a single user file by ID for the current user.

    Includes files in any status (including FAILED) to allow status polling.
    """
    user_id = user.id
    user_file = (
        db_session.query(UserFile)
        .filter(UserFile.id == file_id, UserFile.user_id == user_id)
        .filter(UserFile.status != UserFileStatus.DELETING)
        .one_or_none()
    )
    if user_file is None:
        raise HTTPException(status_code=404, detail="File not found")
    return UserFileSnapshot.from_model(user_file)


class UserFileIdsRequest(BaseModel):
    file_ids: list[UUID]


@router.post(
    "/file/statuses", response_model=list[UserFileSnapshot], tags=PUBLIC_API_TAGS
)
def get_user_file_statuses(
    body: UserFileIdsRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
    """Fetch statuses for a set of user file IDs owned by the current user.

    Includes files in any status so the client can detect transitions to FAILED.
    """
    if not body.file_ids:
        return []

    user_id = user.id
    user_files = (
        db_session.query(UserFile)
        .filter(UserFile.user_id == user_id)
        .filter(UserFile.id.in_(body.file_ids))
        .filter(UserFile.status != UserFileStatus.DELETING)
        .all()
    )

    return [UserFileSnapshot.from_model(user_file) for user_file in user_files]


@router.post("/{project_id}/move_chat_session")
def move_chat_session(
    project_id: int,
    body: ChatSessionRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    user_id = user.id
    chat_session = (
        db_session.query(ChatSession)
        .filter(ChatSession.id == body.chat_session_id, ChatSession.user_id == user_id)
        .one_or_none()
    )
    if chat_session is None:
        raise HTTPException(status_code=404, detail="Chat session not found")
    chat_session.project_id = project_id
    db_session.commit()
    return Response(status_code=204)


@router.post("/remove_chat_session")
def remove_chat_session(
    body: ChatSessionRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:
    user_id = user.id
    chat_session = (
        db_session.query(ChatSession)
        .filter(ChatSession.id == body.chat_session_id, ChatSession.user_id == user_id)
        .one_or_none()
    )
    if chat_session is None:
        raise HTTPException(status_code=404, detail="Chat session not found")
    chat_session.project_id = None
    db_session.commit()
    return Response(status_code=204)


@router.get("/session/{chat_session_id}/token-count", response_model=TokenCountResponse)
def get_chat_session_project_token_count(
    chat_session_id: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> TokenCountResponse:
    """Return sum of token_count for all user files in the project linked to the given chat session.

    If the chat session has no project, returns 0.
    """
    user_id = user.id
    chat_session = (
        db_session.query(ChatSession)
        .filter(ChatSession.id == chat_session_id, ChatSession.user_id == user_id)
        .one_or_none()
    )
    if chat_session is None:
        raise HTTPException(status_code=404, detail="Chat session not found")

    total_tokens = get_project_token_count(
        project_id=chat_session.project_id,
        user_id=user_id,
        db_session=db_session,
    )

    return TokenCountResponse(total_tokens=total_tokens)


@router.get("/session/{chat_session_id}/files", tags=PUBLIC_API_TAGS)
def get_chat_session_project_files(
    chat_session_id: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
    """Return user files for the project linked to the given chat session.

    If the chat session has no project, returns an empty list.
    Only returns files owned by the current user and not FAILED.
    """
    user_id = user.id

    chat_session = (
        db_session.query(ChatSession)
        .filter(ChatSession.id == chat_session_id, ChatSession.user_id == user_id)
        .one_or_none()
    )
    if chat_session is None:
        raise HTTPException(status_code=404, detail="Chat session not found")

    if chat_session.project_id is None:
        return []

    user_files = (
        db_session.query(UserFile)
        .filter(
            UserFile.projects.any(id=chat_session.project_id),
            UserFile.user_id == user_id,
            UserFile.status != UserFileStatus.FAILED,
        )
        .order_by(UserFile.created_at.desc())
        .all()
    )

    return [UserFileSnapshot.from_model(user_file) for user_file in user_files]


@router.get("/{project_id}/token-count", response_model=TokenCountResponse)
def get_project_total_token_count(
    project_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> TokenCountResponse:
    """Return sum of token_count for all user files in the given project for the current user."""

    # Verify the project belongs to the current user
    user_id = user.id
    project = (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
        .one_or_none()
    )
    if project is None:
        raise HTTPException(status_code=404, detail="Project not found")

    total_tokens = get_project_token_count(
        project_id=project_id,
        user_id=user_id,
        db_session=db_session,
    )

    return TokenCountResponse(total_tokens=total_tokens)


================================================
FILE: backend/onyx/server/features/projects/models.py
================================================
from datetime import datetime
from uuid import UUID

from pydantic import BaseModel

from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.db.models import UserProject
from onyx.db.projects import CategorizedFilesResult
from onyx.file_store.models import ChatFileType
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
from onyx.server.query_and_chat.models import ChatSessionDetails


class UserFileSnapshot(BaseModel):
    id: UUID
    temp_id: str | None = None  # Client-side temporary ID for optimistic updates
    name: str
    project_id: int | None = None
    user_id: UUID | None
    file_id: str
    created_at: datetime
    status: UserFileStatus
    last_accessed_at: datetime | None
    file_type: str | None
    chat_file_type: ChatFileType
    token_count: int | None
    chunk_count: int | None

    @classmethod
    def from_model(
        cls, model: UserFile, temp_id_map: dict[str, str] = {}
    ) -> "UserFileSnapshot":
        return cls(
            id=model.id,
            temp_id=temp_id_map.get(str(model.id)),
            name=model.name,
            project_id=None,
            user_id=model.user_id,
            file_id=model.file_id,
            created_at=model.created_at,
            status=model.status,
            last_accessed_at=model.last_accessed_at,
            file_type=model.content_type,
            chat_file_type=mime_type_to_chat_file_type(model.content_type),
            token_count=model.token_count,
            chunk_count=model.chunk_count,
        )


class TokenCountResponse(BaseModel):
    total_tokens: int


class RejectedFile(BaseModel):
    file_name: str
    reason: str


class CategorizedFilesSnapshot(BaseModel):
    user_files: list[UserFileSnapshot]
    rejected_files: list[RejectedFile]

    @classmethod
    def from_result(cls, result: CategorizedFilesResult) -> "CategorizedFilesSnapshot":
        return cls(
            user_files=[
                UserFileSnapshot.from_model(user_file, temp_id_map=result.id_to_temp_id)
                for user_file in result.user_files
            ],
            rejected_files=[
                RejectedFile(
                    file_name=rejected_file.filename,
                    reason=rejected_file.reason,
                )
                for rejected_file in result.rejected_files
            ],
        )


class UserProjectSnapshot(BaseModel):
    id: int
    name: str
    description: str | None
    created_at: datetime
    user_id: UUID | None
    instructions: str | None = None
    chat_sessions: list[ChatSessionDetails]

    @classmethod
    def from_model(cls, model: UserProject) -> "UserProjectSnapshot":
        return cls(
            id=model.id,
            name=model.name,
            description=model.description,
            created_at=model.created_at,
            user_id=model.user_id,
            instructions=model.instructions,
            chat_sessions=[
                ChatSessionDetails.from_model(chat)
                for chat in model.chat_sessions
                if not chat.deleted
            ],
        )


class ChatSessionRequest(BaseModel):
    chat_session_id: str


================================================
FILE: backend/onyx/server/features/projects/projects_file_utils.py
================================================
from math import ceil

from fastapi import UploadFile
from PIL import Image
from PIL import ImageOps
from PIL import UnidentifiedImageError
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from sqlalchemy.orm import Session

from onyx.db.llm import fetch_default_llm_model
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.password_validation import is_file_password_protected
from onyx.natural_language_processing.utils import count_tokens
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.server.settings.store import load_settings
from onyx.utils.logger import setup_logger


logger = setup_logger()
UNKNOWN_FILENAME = "[unknown_file]"  # More descriptive than empty string


def get_safe_filename(upload: UploadFile) -> str:
    """Get filename from upload, with fallback to UNKNOWN_FILENAME if None."""
    if not upload.filename:
        logger.warning("Received upload with no filename")
        return UNKNOWN_FILENAME
    return upload.filename


def get_upload_size_bytes(upload: UploadFile) -> int | None:
    """Best-effort file size in bytes without consuming the stream."""
    if upload.size is not None:
        return upload.size

    try:
        current_pos = upload.file.tell()
        upload.file.seek(0, 2)
        size = upload.file.tell()
        upload.file.seek(current_pos)
        return size
    except Exception as e:
        logger.warning(
            "Could not determine upload size via stream seek "
            f"(filename='{get_safe_filename(upload)}', "
            f"error_type={type(e).__name__}, error={e})"
        )
        return None


def is_upload_too_large(upload: UploadFile, max_bytes: int) -> bool:
    """Return True when upload size is known and exceeds max_bytes."""
    size_bytes = get_upload_size_bytes(upload)
    if size_bytes is None:
        logger.warning(
            f"Could not determine upload size; skipping size-limit check for '{get_safe_filename(upload)}'"
        )
        return False
    return size_bytes > max_bytes


# Guard against extremely large images
Image.MAX_IMAGE_PIXELS = 12000 * 12000


class RejectedFile(BaseModel):
    filename: str = Field(default="")
    reason: str = Field(default="")


class CategorizedFiles(BaseModel):
    acceptable: list[UploadFile] = Field(default_factory=list)
    rejected: list[RejectedFile] = Field(default_factory=list)
    acceptable_file_to_token_count: dict[str, int] = Field(default_factory=dict)
    # Filenames within `acceptable` that should be stored but not indexed.
    skip_indexing: set[str] = Field(default_factory=set)

    # Allow FastAPI UploadFile instances
    model_config = ConfigDict(arbitrary_types_allowed=True)


def _skip_token_threshold(extension: str) -> bool:
    """Return True if this file extension should bypass the token limit."""
    return extension.lower() in OnyxFileExtensions.TABULAR_EXTENSIONS


def _apply_long_side_cap(width: int, height: int, cap: int) -> tuple[int, int]:
    if max(width, height) <= cap:
        return width, height
    scale = cap / max(width, height)
    new_w = max(1, int(round(width * scale)))
    new_h = max(1, int(round(height * scale)))
    return new_w, new_h


def _estimate_image_tokens(
    width: int, height: int, patch_size: int, overhead: int
) -> int:
    patches_w = ceil(width / patch_size)
    patches_h = ceil(height / patch_size)
    patches = patches_w * patches_h
    return patches + overhead


def estimate_image_tokens_for_upload(
    upload: UploadFile,
    cap_long_side: int = 2048,
    patch_size: int = 16,
    overhead_tokens: int = 32,
) -> int:
    """Open the uploaded image, normalize orientation, cap long side, and estimate tokens.

    Parameters
    - cap_long_side: Maximum pixels allowed on the image's longer side before estimating.
      Rationale: Many vision-language encoders downsample images so the longer side is
      bounded (commonly around 1024–2048px). Capping avoids unbounded patch counts and
      keeps costs predictable while preserving most semantic content for typical UI/docs.
      Default 2048 is a balanced choice between fidelity and token cost.

    - patch_size: The pixel size of square patches used in a rough ViT-style estimate.
      Rationale: Modern vision backbones (e.g., ViT variants) commonly operate on 14–16px
      patches. Using 16 simplifies the estimate and aligns with widely used configurations.
      Each patch approximately maps to one visual token in this heuristic.

    - overhead_tokens: Fixed per-image overhead to account for special tokens, metadata,
      and prompt framing added by providers. Rationale: Real models add tens of tokens per
      image beyond pure patch count. 32 is a conservative, stable default that avoids
      undercounting.

    Notes
    - This is a heuristic estimation for budgeting and gating. Actual tokenization varies
      by model/provider and may differ slightly.

    Always resets the file pointer before returning.
    """
    try:
        img = Image.open(upload.file)
        img = ImageOps.exif_transpose(img)
        width, height = img.size
        capped_w, capped_h = _apply_long_side_cap(width, height, cap=cap_long_side)
        return _estimate_image_tokens(
            capped_w, capped_h, patch_size=patch_size, overhead=overhead_tokens
        )
    finally:
        try:
            upload.file.seek(0)
        except Exception:
            pass


def categorize_uploaded_files(
    files: list[UploadFile], db_session: Session
) -> CategorizedFiles:
    """
    Categorize uploaded files based on text extractability and tokenized length.

    - Images are estimated for token cost via a patch-based heuristic.
    - All other files are run through extract_file_text, which handles known
      document formats (.pdf, .docx, …) and falls back to a text-detection
      heuristic for unknown extensions (.py, .js, .rs, …).
    - Uses default tokenizer to compute token length.
    - If token length exceeds the admin-configured threshold, reject file.
    - If extension unsupported or text cannot be extracted, reject file.
    - Otherwise marked as acceptable.
    """

    results = CategorizedFiles()
    default_model = fetch_default_llm_model(db_session)

    model_name = default_model.name if default_model else None
    provider_type = default_model.llm_provider.provider if default_model else None
    tokenizer = get_tokenizer(model_name=model_name, provider_type=provider_type)

    # Derive limits from admin-configurable settings.
    # For upload size: load_settings() resolves 0/None to a positive default.
    # For token threshold: 0 means "no limit" (converted to None below).
    settings = load_settings()
    max_upload_size_mb = (
        settings.user_file_max_upload_size_mb
    )  # always positive after load_settings()
    max_upload_size_bytes = (
        max_upload_size_mb * 1024 * 1024 if max_upload_size_mb else None
    )
    token_threshold_k = settings.file_token_count_threshold_k
    token_threshold = (
        token_threshold_k * 1000 if token_threshold_k else None
    )  # 0 → None = no limit

    for upload in files:
        try:
            filename = get_safe_filename(upload)

            # Size limit is a hard safety cap.
            if max_upload_size_bytes is not None and is_upload_too_large(
                upload, max_upload_size_bytes
            ):
                results.rejected.append(
                    RejectedFile(
                        filename=filename,
                        reason=f"Exceeds {max_upload_size_mb} MB file size limit",
                    )
                )
                continue

            extension = get_file_ext(filename)

            # If image, estimate tokens via dedicated method first
            if extension in OnyxFileExtensions.IMAGE_EXTENSIONS:
                try:
                    token_count = estimate_image_tokens_for_upload(upload)
                except (UnidentifiedImageError, OSError) as e:
                    logger.warning(
                        f"Failed to process image file '{filename}': {str(e)}"
                    )
                    results.rejected.append(
                        RejectedFile(
                            filename=filename, reason="Unsupported file contents"
                        )
                    )
                    continue

                if token_threshold is not None and token_count > token_threshold:
                    results.rejected.append(
                        RejectedFile(
                            filename=filename,
                            reason=f"Exceeds {token_threshold_k}K token limit",
                        )
                    )
                else:
                    results.acceptable.append(upload)
                    results.acceptable_file_to_token_count[filename] = token_count
                continue

            # Handle as text/document: attempt text extraction and count tokens.
            # This accepts any file that extract_file_text can handle, including
            # code files (.py, .js, .rs, etc.) via its is_text_file() fallback.
            else:
                if is_file_password_protected(
                    file=upload.file,
                    file_name=filename,
                    extension=extension,
                ):
                    logger.warning(f"{filename} is password protected")
                    results.rejected.append(
                        RejectedFile(
                            filename=filename, reason="Document is password protected"
                        )
                    )
                    continue

                text_content = extract_file_text(
                    file=upload.file,
                    file_name=filename,
                    break_on_unprocessable=False,
                    extension=extension,
                )
                if not text_content:
                    logger.warning(f"No text content extracted from '{filename}'")
                    results.rejected.append(
                        RejectedFile(
                            filename=filename,
                            reason=f"Unsupported file type: {extension}",
                        )
                    )
                    continue

                token_count = count_tokens(
                    text_content, tokenizer, token_limit=token_threshold
                )
                exceeds_threshold = (
                    token_threshold is not None and token_count > token_threshold
                )
                if exceeds_threshold and _skip_token_threshold(extension):
                    # Exempt extensions (e.g. spreadsheets) are accepted
                    # but flagged to skip indexing — only metadata is
                    # injected into the LLM context.
                    results.acceptable.append(upload)
                    results.acceptable_file_to_token_count[filename] = token_count
                    results.skip_indexing.add(filename)
                elif exceeds_threshold:
                    results.rejected.append(
                        RejectedFile(
                            filename=filename,
                            reason=f"Exceeds {token_threshold_k}K token limit",
                        )
                    )
                else:
                    results.acceptable.append(upload)
                    results.acceptable_file_to_token_count[filename] = token_count

                # Reset file pointer for subsequent upload handling
                try:
                    upload.file.seek(0)
                except Exception as e:
                    logger.warning(
                        f"Failed to reset file pointer for '{filename}': {str(e)}"
                    )
        except Exception as e:
            logger.warning(
                f"Failed to process uploaded file '{get_safe_filename(upload)}' (error_type={type(e).__name__}, error={str(e)})"
            )
            results.rejected.append(
                RejectedFile(
                    filename=get_safe_filename(upload),
                    reason="Failed to process upload",
                )
            )

    return results


================================================
FILE: backend/onyx/server/features/release_notes/__init__.py
================================================


================================================
FILE: backend/onyx/server/features/release_notes/constants.py
================================================
"""Constants for release notes functionality."""

# GitHub source
GITHUB_RAW_BASE_URL = (
    "https://raw.githubusercontent.com/onyx-dot-app/documentation/main"
)
GITHUB_CHANGELOG_RAW_URL = f"{GITHUB_RAW_BASE_URL}/changelog.mdx"

# Base URL for changelog documentation (used for notification links)
DOCS_CHANGELOG_BASE_URL = "https://docs.onyx.app/changelog"

FETCH_TIMEOUT = 60.0

# Redis keys (in shared namespace)
REDIS_KEY_PREFIX = "release_notes:"
REDIS_KEY_FETCHED_AT = f"{REDIS_KEY_PREFIX}fetched_at"
REDIS_KEY_ETAG = f"{REDIS_KEY_PREFIX}etag"

# Cache TTL: 24 hours
REDIS_CACHE_TTL = 60 * 60 * 24

# Auto-refresh threshold: 1 hour
AUTO_REFRESH_THRESHOLD_SECONDS = 60 * 60


================================================
FILE: backend/onyx/server/features/release_notes/models.py
================================================
"""Pydantic models for release notes."""

from pydantic import BaseModel


class ReleaseNoteEntry(BaseModel):
    """A single version's release note entry."""

    version: str  # e.g., "v2.7.0"
    date: str  # e.g., "January 7th, 2026"
    title: str  # Display title for notifications: "Onyx v2.7.0 is available!"


================================================
FILE: backend/onyx/server/features/release_notes/utils.py
================================================
"""Utility functions for release notes parsing and caching."""

import re
from datetime import datetime
from datetime import timezone

import httpx
from sqlalchemy.orm import Session

from onyx import __version__
from onyx.cache.factory import get_shared_cache_backend
from onyx.configs.app_configs import INSTANCE_TYPE
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.release_notes import create_release_notifications_for_versions
from onyx.server.features.release_notes.constants import AUTO_REFRESH_THRESHOLD_SECONDS
from onyx.server.features.release_notes.constants import FETCH_TIMEOUT
from onyx.server.features.release_notes.constants import GITHUB_CHANGELOG_RAW_URL
from onyx.server.features.release_notes.constants import REDIS_CACHE_TTL
from onyx.server.features.release_notes.constants import REDIS_KEY_ETAG
from onyx.server.features.release_notes.constants import REDIS_KEY_FETCHED_AT
from onyx.server.features.release_notes.models import ReleaseNoteEntry
from onyx.utils.logger import setup_logger

logger = setup_logger()


# ============================================================================
# Version Utilities
# ============================================================================


def is_valid_version(version: str) -> bool:
    """Check if version matches vX.Y.Z or vX.Y.Z-suffix.N pattern exactly."""
    return bool(re.match(r"^v\d+\.\d+\.\d+(-[a-zA-Z]+\.\d+)?$", version))


def parse_version_tuple(version: str) -> tuple[int, int, int]:
    """Parse version string to tuple for semantic sorting."""
    clean = re.sub(r"^v", "", version)
    clean = re.sub(r"-.*$", "", clean)
    parts = clean.split(".")
    return (
        int(parts[0]) if len(parts) > 0 else 0,
        int(parts[1]) if len(parts) > 1 else 0,
        int(parts[2]) if len(parts) > 2 else 0,
    )


def is_version_gte(v1: str, v2: str) -> bool:
    """Check if v1 >= v2. Strips suffixes like -cloud.X or -beta.X."""
    return parse_version_tuple(v1) >= parse_version_tuple(v2)


# ============================================================================
# MDX Parsing
# ============================================================================


def parse_mdx_to_release_note_entries(mdx_content: str) -> list[ReleaseNoteEntry]:
    """Parse MDX content into ReleaseNoteEntry objects."""
    all_entries = []

    update_pattern = (
        r'<Update\s+label="([^"]+)"\s+description="([^"]+)"'
        r"(?:\s+tags=\{([^}]+)\})?[^>]*>"
        r".*?"
        r"</Update>"
    )

    for match in re.finditer(update_pattern, mdx_content, re.DOTALL):
        version = match.group(1)
        date = match.group(2)

        if is_valid_version(version):
            all_entries.append(
                ReleaseNoteEntry(
                    version=version,
                    date=date,
                    title=f"Onyx {version} is available!",
                )
            )

    if not all_entries:
        raise ValueError("Could not parse any release note entries from MDX.")

    if INSTANCE_TYPE == "cloud":
        # Cloud often runs ahead of docs release tags; always notify on latest release.
        return sorted(
            all_entries, key=lambda x: parse_version_tuple(x.version), reverse=True
        )[:1]

    # Filter to valid versions >= __version__
    if __version__ and is_valid_version(__version__):
        entries = [
            entry for entry in all_entries if is_version_gte(entry.version, __version__)
        ]
    elif "nightly" in __version__:
        # Just show the latest entry for nightly versions
        entries = sorted(
            all_entries, key=lambda x: parse_version_tuple(x.version), reverse=True
        )[:1]
    else:
        # If not recognized version
        # likely `development` and we should show all entries
        entries = all_entries

    return entries


# ============================================================================
# Cache Helpers (ETag + timestamp only)
# ============================================================================


def get_cached_etag() -> str | None:
    cache = get_shared_cache_backend()
    try:
        etag = cache.get(REDIS_KEY_ETAG)
        if etag:
            return etag.decode("utf-8")
        return None
    except Exception as e:
        logger.error(f"Failed to get cached etag: {e}")
        return None


def get_last_fetch_time() -> datetime | None:
    cache = get_shared_cache_backend()
    try:
        raw = cache.get(REDIS_KEY_FETCHED_AT)
        if not raw:
            return None

        last_fetch = datetime.fromisoformat(raw.decode("utf-8"))
        if last_fetch.tzinfo is None:
            last_fetch = last_fetch.replace(tzinfo=timezone.utc)
        else:
            last_fetch = last_fetch.astimezone(timezone.utc)

        return last_fetch
    except Exception as e:
        logger.error(f"Failed to get last fetch time from cache: {e}")
        return None


def save_fetch_metadata(etag: str | None) -> None:
    cache = get_shared_cache_backend()
    now = datetime.now(timezone.utc)

    try:
        cache.set(REDIS_KEY_FETCHED_AT, now.isoformat(), ex=REDIS_CACHE_TTL)
        if etag:
            cache.set(REDIS_KEY_ETAG, etag, ex=REDIS_CACHE_TTL)
    except Exception as e:
        logger.error(f"Failed to save fetch metadata to cache: {e}")


def is_cache_stale() -> bool:
    """Check if we should fetch from GitHub."""
    last_fetch = get_last_fetch_time()
    if last_fetch is None:
        return True
    age = datetime.now(timezone.utc) - last_fetch
    return age.total_seconds() > AUTO_REFRESH_THRESHOLD_SECONDS


# ============================================================================
# Main Function
# ============================================================================


def ensure_release_notes_fresh_and_notify(db_session: Session) -> None:
    """
    Check for new release notes and create notifications if needed.

    Called from /api/notifications endpoint. Uses ETag for efficient
    GitHub requests. Database handles notification deduplication.

    Since all users will trigger this via notification fetch,
    uses Redis lock to prevent concurrent GitHub requests when cache is stale.
    """
    if not is_cache_stale():
        return

    cache = get_shared_cache_backend()
    lock = cache.lock(
        OnyxRedisLocks.RELEASE_NOTES_FETCH_LOCK,
        timeout=90,
    )

    # Non-blocking acquire - if we can't get the lock, another request is handling it
    acquired = lock.acquire(blocking=False)
    if not acquired:
        logger.debug("Another request is already fetching release notes, skipping.")
        return

    try:
        logger.debug("Checking GitHub for release notes updates.")

        # Use ETag for conditional request
        headers: dict[str, str] = {}
        etag = get_cached_etag()
        if etag:
            headers["If-None-Match"] = etag

        try:
            response = httpx.get(
                GITHUB_CHANGELOG_RAW_URL,
                headers=headers,
                timeout=FETCH_TIMEOUT,
                follow_redirects=True,
            )

            if response.status_code == 304:
                # Content unchanged, just update timestamp
                logger.debug("Release notes unchanged (304).")
                save_fetch_metadata(etag)
                return

            response.raise_for_status()

            # Parse and create notifications
            entries = parse_mdx_to_release_note_entries(response.text)
            new_etag = response.headers.get("ETag")
            save_fetch_metadata(new_etag)

            # Create notifications, sorted semantically to create them in chronological order
            entries = sorted(entries, key=lambda x: parse_version_tuple(x.version))
            create_release_notifications_for_versions(db_session, entries)

        except Exception as e:
            logger.error(f"Failed to check release notes: {e}")
            # Update timestamp even on failure to prevent retry storms
            # We don't save etag on failure to allow retry with conditional request
            save_fetch_metadata(None)
    finally:
        # Always release the lock
        if lock.owned():
            lock.release()


================================================
FILE: backend/onyx/server/features/tool/api.py
================================================
from typing import Any

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.auth.schemas import UserRole
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import Tool
from onyx.db.models import User
from onyx.db.tools import create_tool__no_commit
from onyx.db.tools import delete_tool__no_commit
from onyx.db.tools import get_tool_by_id
from onyx.db.tools import get_tools
from onyx.db.tools import get_tools_by_ids
from onyx.db.tools import update_tool
from onyx.server.features.tool.models import CustomToolCreate
from onyx.server.features.tool.models import CustomToolUpdate
from onyx.server.features.tool.models import ToolSnapshot
from onyx.server.features.tool.tool_visibility import should_expose_tool_to_fe
from onyx.tools.built_in_tools import get_built_in_tool_by_id
from onyx.tools.tool_implementations.custom.openapi_parsing import MethodSpec
from onyx.tools.tool_implementations.custom.openapi_parsing import (
    openapi_to_method_specs,
)
from onyx.tools.tool_implementations.custom.openapi_parsing import (
    validate_openapi_schema,
)

router = APIRouter(prefix="/tool")
admin_router = APIRouter(prefix="/admin/tool")


def _validate_tool_definition(definition: dict[str, Any]) -> None:
    try:
        validate_openapi_schema(definition)
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


def _validate_auth_settings(tool_data: CustomToolCreate | CustomToolUpdate) -> None:
    if tool_data.passthrough_auth and tool_data.custom_headers:
        for header in tool_data.custom_headers:
            if header.key.lower() == "authorization":
                raise HTTPException(
                    status_code=400,
                    detail="Cannot use passthrough auth with custom authorization headers",
                )


def _get_editable_custom_tool(tool_id: int, db_session: Session, user: User) -> Tool:
    """Fetch a custom tool and ensure the caller has permission to edit it."""
    try:
        tool = get_tool_by_id(tool_id, db_session)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))

    if tool.in_code_tool_id is not None:
        raise HTTPException(
            status_code=400,
            detail="Built-in tools cannot be modified through this endpoint.",
        )

    # Admins can always make changes; non-admins must own the tool.
    if user.role == UserRole.ADMIN:
        return tool

    if tool.user_id is None or tool.user_id != user.id:
        raise HTTPException(
            status_code=403,
            detail="You can only modify actions that you created.",
        )

    return tool


@admin_router.post("/custom", tags=PUBLIC_API_TAGS)
def create_custom_tool(
    tool_data: CustomToolCreate,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> ToolSnapshot:
    _validate_tool_definition(tool_data.definition)
    _validate_auth_settings(tool_data)
    tool = create_tool__no_commit(
        name=tool_data.name,
        description=tool_data.description,
        openapi_schema=tool_data.definition,
        custom_headers=tool_data.custom_headers,
        user_id=user.id,
        db_session=db_session,
        passthrough_auth=tool_data.passthrough_auth,
        oauth_config_id=tool_data.oauth_config_id,
        enabled=True,
    )
    db_session.commit()
    return ToolSnapshot.from_model(tool)


@admin_router.put("/custom/{tool_id}", tags=PUBLIC_API_TAGS)
def update_custom_tool(
    tool_id: int,
    tool_data: CustomToolUpdate,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> ToolSnapshot:
    existing_tool = _get_editable_custom_tool(tool_id, db_session, user)
    if tool_data.definition:
        _validate_tool_definition(tool_data.definition)
    _validate_auth_settings(tool_data)
    updated_tool = update_tool(
        tool_id=tool_id,
        name=tool_data.name,
        description=tool_data.description,
        openapi_schema=tool_data.definition,
        custom_headers=tool_data.custom_headers,
        user_id=existing_tool.user_id,
        db_session=db_session,
        passthrough_auth=tool_data.passthrough_auth,
        oauth_config_id=tool_data.oauth_config_id,
    )
    return ToolSnapshot.from_model(updated_tool)


@admin_router.delete("/custom/{tool_id}", tags=PUBLIC_API_TAGS)
def delete_custom_tool(
    tool_id: int,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),
) -> None:
    _ = _get_editable_custom_tool(tool_id, db_session, user)
    try:
        delete_tool__no_commit(tool_id, db_session)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))
    except Exception as e:
        # handles case where tool is still used by an Assistant
        raise HTTPException(status_code=400, detail=str(e))
    db_session.commit()


class ToolStatusUpdateRequest(BaseModel):
    tool_ids: list[int]
    enabled: bool


class ToolStatusUpdateResponse(BaseModel):
    updated_count: int
    tool_ids: list[int]


@admin_router.patch("/status")
def update_tools_status(
    update_data: ToolStatusUpdateRequest,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_curator_or_admin_user),  # noqa: ARG001
) -> ToolStatusUpdateResponse:
    """Enable or disable one or more tools.

    Pass a single tool ID in the list to update one tool, or multiple IDs for
    bulk updates.
    """
    if not update_data.tool_ids:
        raise HTTPException(status_code=400, detail="No tool IDs provided")

    tools = get_tools_by_ids(update_data.tool_ids, db_session)
    tools_by_id = {tool.id: tool for tool in tools}

    updated_tools = []
    missing_tools = []

    for tool_id in update_data.tool_ids:
        tool = tools_by_id.get(tool_id)
        if tool:
            tool.enabled = update_data.enabled
            updated_tools.append(tool_id)
        else:
            missing_tools.append(tool_id)

    if missing_tools:
        raise HTTPException(
            status_code=404, detail=f"Tools with IDs {missing_tools} not found"
        )

    db_session.commit()

    return ToolStatusUpdateResponse(
        updated_count=len(updated_tools),
        tool_ids=updated_tools,
    )


class ValidateToolRequest(BaseModel):
    definition: dict[str, Any]


class ValidateToolResponse(BaseModel):
    methods: list[MethodSpec]


@admin_router.post("/custom/validate", tags=PUBLIC_API_TAGS)
def validate_tool(
    tool_data: ValidateToolRequest,
    _: User = Depends(current_curator_or_admin_user),
) -> ValidateToolResponse:
    _validate_tool_definition(tool_data.definition)
    method_specs = openapi_to_method_specs(tool_data.definition)
    return ValidateToolResponse(methods=method_specs)


"""Endpoints for all"""


@router.get("/openapi", tags=PUBLIC_API_TAGS)
def list_openapi_tools(
    db_session: Session = Depends(get_session),
    _: User = Depends(current_user),
) -> list[ToolSnapshot]:
    tools = get_tools(db_session, only_openapi=True)

    openapi_tools: list[ToolSnapshot] = []
    for tool in tools:
        if not should_expose_tool_to_fe(tool):
            continue

        openapi_tools.append(ToolSnapshot.from_model(tool))

    return openapi_tools


@router.get("/{tool_id}", tags=PUBLIC_API_TAGS)
def get_custom_tool(
    tool_id: int,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_user),
) -> ToolSnapshot:
    try:
        tool = get_tool_by_id(tool_id, db_session)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))
    return ToolSnapshot.from_model(tool)


@router.get("", tags=PUBLIC_API_TAGS)
def list_tools(
    db_session: Session = Depends(get_session),
    _: User = Depends(current_user),
) -> list[ToolSnapshot]:
    tools = get_tools(db_session, only_enabled=True, only_connected_mcp=True)

    filtered_tools: list[ToolSnapshot] = []
    for tool in tools:
        if not should_expose_tool_to_fe(tool):
            continue

        # Check if it's a built-in tool and if it's available
        if tool.in_code_tool_id:
            try:
                tool_cls = get_built_in_tool_by_id(tool.in_code_tool_id)
                if not tool_cls.is_available(db_session):
                    continue
            except KeyError:
                # If tool ID not found in registry, include it by default
                pass

        # All custom tools and available built-in tools are included
        filtered_tools.append(ToolSnapshot.from_model(tool))

    return filtered_tools


================================================
FILE: backend/onyx/server/features/tool/models.py
================================================
from typing import Any

from pydantic import BaseModel

from onyx.db.models import Tool
from onyx.server.features.tool.tool_visibility import get_tool_visibility_config


class ToolSnapshot(BaseModel):
    id: int
    name: str
    description: str
    definition: dict[str, Any] | None
    display_name: str
    in_code_tool_id: str | None
    custom_headers: list[Any] | None
    passthrough_auth: bool
    mcp_server_id: int | None = None
    user_id: str | None = None
    oauth_config_id: int | None = None
    oauth_config_name: str | None = None
    enabled: bool = True

    # Visibility settings computed from TOOL_VISIBILITY_CONFIG
    chat_selectable: bool = True
    agent_creation_selectable: bool = True
    default_enabled: bool = False

    @classmethod
    def from_model(cls, tool: Tool) -> "ToolSnapshot":
        # Get visibility config for this tool
        config = get_tool_visibility_config(tool)

        return cls(
            id=tool.id,
            name=tool.name,
            description=tool.description or "",
            definition=tool.openapi_schema,
            display_name=tool.display_name or tool.name,
            in_code_tool_id=tool.in_code_tool_id,
            custom_headers=tool.custom_headers,
            passthrough_auth=tool.passthrough_auth,
            mcp_server_id=tool.mcp_server_id,
            user_id=str(tool.user_id) if tool.user_id else None,
            oauth_config_id=tool.oauth_config_id,
            oauth_config_name=tool.oauth_config.name if tool.oauth_config else None,
            enabled=tool.enabled,
            # Populate visibility settings from config or use defaults
            chat_selectable=config.chat_selectable if config else True,
            agent_creation_selectable=(
                config.agent_creation_selectable if config else True
            ),
            default_enabled=config.default_enabled if config else False,
        )


class Header(BaseModel):
    key: str
    value: str


class CustomToolCreate(BaseModel):
    name: str
    description: str | None = None
    definition: dict[str, Any]
    custom_headers: list[Header] | None = None
    passthrough_auth: bool
    oauth_config_id: int | None = None


class CustomToolUpdate(BaseModel):
    name: str | None = None
    description: str | None = None
    definition: dict[str, Any] | None = None
    custom_headers: list[Header] | None = None
    passthrough_auth: bool | None = None
    oauth_config_id: int | None = None


================================================
FILE: backend/onyx/server/features/tool/tool_visibility.py
================================================
"""Tool visibility configuration and utility functions."""

from pydantic import BaseModel

from onyx.db.models import Tool
from onyx.tools.constants import MEMORY_TOOL_ID
from onyx.tools.constants import OPEN_URL_TOOL_ID

# Tool class name constant for OktaProfileTool (not in main constants.py as it's hidden)
OKTA_PROFILE_TOOL_ID = "OktaProfileTool"


class ToolVisibilitySettings(BaseModel):
    """Configuration for tool visibility across different UI contexts."""

    chat_selectable: bool = True  # Whether tool appears in chat input bar dropdown
    agent_creation_selectable: bool = (
        True  # Whether tool appears in agent creation/default behavior pages
    )
    default_enabled: bool = False  # Whether tool is enabled by default
    expose_to_frontend: bool = True  # Whether tool should be sent to frontend at all


# Centralized configuration for tool visibility across different contexts
# This allows for easy extension with new tools that need custom visibility rules
TOOL_VISIBILITY_CONFIG: dict[str, ToolVisibilitySettings] = {
    OPEN_URL_TOOL_ID: ToolVisibilitySettings(
        chat_selectable=False,
        agent_creation_selectable=True,
        default_enabled=True,
        expose_to_frontend=True,
    ),
    OKTA_PROFILE_TOOL_ID: ToolVisibilitySettings(
        chat_selectable=False,
        agent_creation_selectable=False,
        default_enabled=False,
        expose_to_frontend=False,  # Completely hidden from frontend
    ),
    MEMORY_TOOL_ID: ToolVisibilitySettings(
        chat_selectable=False,
        agent_creation_selectable=False,
        default_enabled=False,
        expose_to_frontend=False,
    ),
    # Future tools can be added here with custom visibility rules
}


def should_expose_tool_to_fe(tool: Tool) -> bool:
    """Return True when the given tool should be sent to the frontend."""
    if tool.in_code_tool_id is None:
        # Custom tools are always exposed to frontend
        return True

    config = TOOL_VISIBILITY_CONFIG.get(tool.in_code_tool_id)
    return config.expose_to_frontend if config else True


def is_chat_selectable(tool: Tool) -> bool:
    """Return True if the tool should appear in the chat input bar dropdown.

    Tools can be excluded from the chat dropdown while remaining available
    in agent creation and configuration pages.
    """
    if tool.in_code_tool_id is None:
        # Custom tools are always chat selectable
        return True

    config = TOOL_VISIBILITY_CONFIG.get(tool.in_code_tool_id)

    return config.chat_selectable if config else True


def is_agent_creation_selectable(tool: Tool) -> bool:
    """Return True if the tool should appear in agent creation/default behavior pages.

    Most tools should be visible in these admin contexts.
    """
    if tool.in_code_tool_id is None:
        # Custom tools are always agent creation selectable
        return True

    config = TOOL_VISIBILITY_CONFIG.get(tool.in_code_tool_id)
    return config.agent_creation_selectable if config else True


def get_tool_visibility_config(tool: Tool) -> ToolVisibilitySettings | None:
    """Get visibility configuration for a tool, or None if not configured."""
    if tool.in_code_tool_id is None:
        return None
    return TOOL_VISIBILITY_CONFIG.get(tool.in_code_tool_id)


================================================
FILE: backend/onyx/server/features/user_oauth_token/__init__.py
================================================


================================================
FILE: backend/onyx/server/features/user_oauth_token/api.py
================================================
"""API endpoints for user OAuth token management."""

from fastapi import APIRouter
from fastapi import Depends
from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.auth.oauth_token_manager import OAuthTokenManager
from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.oauth_config import get_all_user_oauth_tokens

router = APIRouter(prefix="/user-oauth-token")


class OAuthTokenStatus(BaseModel):
    oauth_config_id: int
    expires_at: int | None  # Unix timestamp
    is_expired: bool


@router.get("/status")
def get_user_oauth_token_status(
    db_session: Session = Depends(get_session),
    user: User = Depends(current_user),
) -> list[OAuthTokenStatus]:
    """
    Get the OAuth token status for the current user across all OAuth configs.

    Returns information about which OAuth configs the user has authenticated with
    and whether their tokens are expired.
    """
    user_tokens = get_all_user_oauth_tokens(user.id, db_session)
    result = []
    for token in user_tokens:
        token_data = (
            token.token_data.get_value(apply_mask=False) if token.token_data else {}
        )
        result.append(
            OAuthTokenStatus(
                oauth_config_id=token.oauth_config_id,
                expires_at=OAuthTokenManager.token_expiration_time(token_data),
                is_expired=OAuthTokenManager.is_token_expired(token_data),
            )
        )
    return result


================================================
FILE: backend/onyx/server/features/web_search/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.web_search import fetch_active_web_content_provider
from onyx.db.web_search import fetch_active_web_search_provider
from onyx.server.features.web_search.models import OpenUrlsToolRequest
from onyx.server.features.web_search.models import OpenUrlsToolResponse
from onyx.server.features.web_search.models import WebSearchToolRequest
from onyx.server.features.web_search.models import WebSearchToolResponse
from onyx.server.features.web_search.models import WebSearchWithContentResponse
from onyx.server.manage.web_search.models import WebContentProviderView
from onyx.server.manage.web_search.models import WebSearchProviderView
from onyx.tools.models import LlmOpenUrlResult
from onyx.tools.models import LlmWebSearchResult
from onyx.tools.tool_implementations.open_url.models import WebContentProvider
from onyx.tools.tool_implementations.open_url.onyx_web_crawler import (
    DEFAULT_MAX_HTML_SIZE_BYTES,
)
from onyx.tools.tool_implementations.open_url.onyx_web_crawler import (
    DEFAULT_MAX_PDF_SIZE_BYTES,
)
from onyx.tools.tool_implementations.open_url.onyx_web_crawler import (
    OnyxWebCrawler,
)
from onyx.tools.tool_implementations.open_url.utils import (
    filter_web_contents_with_no_title_or_content,
)
from onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig
from onyx.tools.tool_implementations.web_search.models import WebSearchProvider
from onyx.tools.tool_implementations.web_search.providers import (
    build_content_provider_from_config,
)
from onyx.tools.tool_implementations.web_search.providers import (
    build_search_provider_from_config,
)
from onyx.tools.tool_implementations.web_search.utils import (
    filter_web_search_results_with_no_title_or_snippet,
)
from onyx.tools.tool_implementations.web_search.utils import (
    truncate_search_result_content,
)
from onyx.utils.logger import setup_logger
from shared_configs.enums import WebContentProviderType
from shared_configs.enums import WebSearchProviderType

router = APIRouter(prefix="/web-search", tags=PUBLIC_API_TAGS)
logger = setup_logger()


DOCUMENT_CITATION_NUMBER_EMPTY_VALUE = -1


def _get_active_search_provider(
    db_session: Session,
) -> tuple[WebSearchProviderView, WebSearchProvider]:
    provider_model = fetch_active_web_search_provider(db_session)
    if provider_model is None:
        raise HTTPException(
            status_code=400,
            detail="No web search provider configured.",
        )

    provider_view = WebSearchProviderView(
        id=provider_model.id,
        name=provider_model.name,
        provider_type=WebSearchProviderType(provider_model.provider_type),
        is_active=provider_model.is_active,
        config=provider_model.config or {},
        has_api_key=bool(provider_model.api_key),
    )

    if provider_model.api_key is None:
        raise HTTPException(
            status_code=400,
            detail="Web search provider requires an API key.",
        )

    try:
        provider: WebSearchProvider = build_search_provider_from_config(
            provider_type=provider_view.provider_type,
            api_key=provider_model.api_key.get_value(apply_mask=False),
            config=provider_model.config or {},
        )
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc

    return provider_view, provider


def _get_active_content_provider(
    db_session: Session,
) -> tuple[WebContentProviderView | None, WebContentProvider]:
    provider_model = fetch_active_web_content_provider(db_session)

    if provider_model is None:
        # Default to the built-in crawler if nothing is configured. Always available.
        # NOTE: the OnyxWebCrawler is not stored in the content provider table,
        # so we need to return it directly.

        return None, OnyxWebCrawler(
            max_pdf_size_bytes=DEFAULT_MAX_PDF_SIZE_BYTES,
            max_html_size_bytes=DEFAULT_MAX_HTML_SIZE_BYTES,
        )

    if provider_model.api_key is None:
        # TODO - this is not a great error, in fact, this key should not be nullable.
        raise HTTPException(
            status_code=400,
            detail="Web content provider requires an API key.",
        )

    try:
        provider_type = WebContentProviderType(provider_model.provider_type)
        config = provider_model.config or WebContentProviderConfig()

        provider: WebContentProvider | None = build_content_provider_from_config(
            provider_type=provider_type,
            api_key=provider_model.api_key.get_value(apply_mask=False),
            config=config,
        )
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc

    if provider is None:
        raise HTTPException(
            status_code=400,
            detail="Unable to initialize the configured web content provider.",
        )

    provider_view = WebContentProviderView(
        id=provider_model.id,
        name=provider_model.name,
        provider_type=provider_type,
        is_active=provider_model.is_active,
        config=provider_model.config or WebContentProviderConfig(),
        has_api_key=bool(provider_model.api_key),
    )

    return provider_view, provider


def _run_web_search(
    request: WebSearchToolRequest, db_session: Session
) -> tuple[WebSearchProviderType, list[LlmWebSearchResult]]:
    provider_view, provider = _get_active_search_provider(db_session)

    results: list[LlmWebSearchResult] = []
    for query in request.queries:
        try:
            search_results = provider.search(query)
        except HTTPException:
            raise
        except Exception as exc:
            logger.exception("Web search provider failed for query '%s'", query)
            raise HTTPException(
                status_code=502, detail="Web search provider failed to execute query."
            ) from exc

        filtered_results = filter_web_search_results_with_no_title_or_snippet(
            list(search_results)
        )
        trimmed_results = list(filtered_results)[: request.max_results]
        for search_result in trimmed_results:
            results.append(
                LlmWebSearchResult(
                    document_citation_number=DOCUMENT_CITATION_NUMBER_EMPTY_VALUE,
                    url=search_result.link,
                    title=search_result.title,
                    snippet=search_result.snippet or "",
                    unique_identifier_to_strip_away=search_result.link,
                )
            )
    return provider_view.provider_type, results


def _open_urls(
    urls: list[str],
    db_session: Session,
) -> tuple[WebContentProviderType | None, list[LlmOpenUrlResult]]:
    # SSRF protection is handled inside the content provider (OnyxWebCrawler)
    # which uses ssrf_safe_get() to validate and fetch atomically,
    # preventing DNS rebinding attacks
    provider_view, provider = _get_active_content_provider(db_session)

    try:
        docs = filter_web_contents_with_no_title_or_content(
            list(provider.contents(urls))
        )
    except HTTPException:
        raise
    except Exception as exc:
        logger.exception("Web content provider failed to fetch URLs")
        raise HTTPException(
            status_code=502, detail="Web content provider failed to fetch URLs."
        ) from exc

    results: list[LlmOpenUrlResult] = []
    for doc in docs:
        results.append(
            LlmOpenUrlResult(
                document_citation_number=DOCUMENT_CITATION_NUMBER_EMPTY_VALUE,
                content=truncate_search_result_content(doc.full_content),
                unique_identifier_to_strip_away=doc.link,
            )
        )
    provider_type = (
        provider_view.provider_type
        if provider_view
        else WebContentProviderType.ONYX_WEB_CRAWLER
    )
    return provider_type, results


@router.post("/search", response_model=WebSearchWithContentResponse)
def execute_web_search(
    request: WebSearchToolRequest,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> WebSearchWithContentResponse:
    """
    Perform a web search and immediately fetch content for the returned URLs.

    Use this when you want both snippets and page contents from one call.

    If you want to selectively fetch content (i.e. let the LLM decide which URLs to read),
    use `/search-lite` and then call `/open-urls` separately.
    """
    search_provider_type, search_results = _run_web_search(request, db_session)

    if not search_results:
        return WebSearchWithContentResponse(
            search_provider_type=search_provider_type,
            content_provider_type=None,
            search_results=[],
            full_content_results=[],
        )

    # Fetch contents for unique URLs in the order they appear
    seen: set[str] = set()
    urls_to_fetch: list[str] = []
    for result in search_results:
        url = result.url
        if url not in seen:
            seen.add(url)
            urls_to_fetch.append(url)

    content_provider_type, full_content_results = _open_urls(urls_to_fetch, db_session)

    return WebSearchWithContentResponse(
        search_provider_type=search_provider_type,
        content_provider_type=content_provider_type,
        search_results=search_results,
        full_content_results=full_content_results,
    )


@router.post("/search-lite", response_model=WebSearchToolResponse)
def execute_web_search_lite(
    request: WebSearchToolRequest,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> WebSearchToolResponse:
    """
    Lightweight search-only endpoint. Returns search snippets and URLs without
    fetching page contents. Pair with `/open-urls` if you need to fetch content
    later.
    """
    provider_type, search_results = _run_web_search(request, db_session)

    return WebSearchToolResponse(results=search_results, provider_type=provider_type)


@router.post("/open-urls", response_model=OpenUrlsToolResponse)
def execute_open_urls(
    request: OpenUrlsToolRequest,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> OpenUrlsToolResponse:
    """
    Fetch content for specific URLs using the configured content provider.
    Intended to complement `/search-lite` when you need content for a subset of URLs.
    """
    provider_type, results = _open_urls(request.urls, db_session)
    return OpenUrlsToolResponse(results=results, provider_type=provider_type)


================================================
FILE: backend/onyx/server/features/web_search/models.py
================================================
from pydantic import BaseModel
from pydantic import Field
from pydantic import field_validator

from onyx.tools.models import LlmOpenUrlResult
from onyx.tools.models import LlmWebSearchResult
from shared_configs.enums import WebContentProviderType
from shared_configs.enums import WebSearchProviderType


class WebSearchToolRequest(BaseModel):
    queries: list[str] = Field(
        ...,
        min_length=1,
        description="List of search queries to send to the configured provider.",
    )
    max_results: int | None = Field(
        default=10,
        description=(
            "Optional cap on number of results to return per query. Defaults to 10."
        ),
    )

    @field_validator("queries")
    @classmethod
    def _strip_and_validate_queries(cls, queries: list[str]) -> list[str]:
        cleaned_queries = [q.strip() for q in queries if q and q.strip()]
        if not cleaned_queries:
            raise ValueError("queries must include at least one non-empty value")
        return cleaned_queries

    @field_validator("max_results")
    @classmethod
    def _default_and_validate_max_results(cls, max_results: int | None) -> int:
        # Default to 10 when not provided
        max_results = 10 if max_results is None else max_results
        if max_results < 1:
            raise ValueError("max_results must be at least 1")
        return max_results


class WebSearchToolResponse(BaseModel):
    results: list[LlmWebSearchResult]
    provider_type: WebSearchProviderType


class WebSearchWithContentResponse(BaseModel):
    search_provider_type: WebSearchProviderType
    content_provider_type: WebContentProviderType | None = None
    search_results: list[LlmWebSearchResult]
    full_content_results: list[LlmOpenUrlResult]


class OpenUrlsToolRequest(BaseModel):
    urls: list[str] = Field(
        ...,
        min_length=1,
        description="URLs to fetch using the configured content provider.",
    )

    @field_validator("urls")
    @classmethod
    def _strip_and_validate_urls(cls, urls: list[str]) -> list[str]:
        cleaned_urls = [url.strip() for url in urls if url and url.strip()]
        if not cleaned_urls:
            raise ValueError("urls must include at least one non-empty value")
        return cleaned_urls


class OpenUrlsToolResponse(BaseModel):
    results: list[LlmOpenUrlResult]
    provider_type: WebContentProviderType | None = None


================================================
FILE: backend/onyx/server/federated/api.py
================================================
import json
from typing import Any
from uuid import UUID

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from fastapi import Response
from sqlalchemy.orm import Session

from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.configs.constants import FederatedConnectorSource
from onyx.db.engine.sql_engine import get_session
from onyx.db.federated import (
    create_federated_connector as db_create_federated_connector,
)
from onyx.db.federated import delete_federated_connector
from onyx.db.federated import fetch_all_federated_connectors
from onyx.db.federated import fetch_federated_connector_by_id
from onyx.db.federated import update_federated_connector
from onyx.db.federated import update_federated_connector_oauth_token
from onyx.db.federated import validate_federated_connector_credentials
from onyx.db.models import User
from onyx.federated_connectors.factory import get_federated_connector
from onyx.federated_connectors.factory import get_federated_connector_cls
from onyx.federated_connectors.interfaces import FederatedConnector
from onyx.federated_connectors.oauth_utils import add_state_to_oauth_url
from onyx.federated_connectors.oauth_utils import generate_oauth_state
from onyx.federated_connectors.oauth_utils import get_oauth_callback_uri
from onyx.federated_connectors.oauth_utils import verify_oauth_state
from onyx.server.federated.models import AuthorizeUrlResponse
from onyx.server.federated.models import ConfigurationSchemaResponse
from onyx.server.federated.models import CredentialSchemaResponse
from onyx.server.federated.models import EntitySpecResponse
from onyx.server.federated.models import FederatedConnectorCredentials
from onyx.server.federated.models import FederatedConnectorDetail
from onyx.server.federated.models import FederatedConnectorRequest
from onyx.server.federated.models import FederatedConnectorResponse
from onyx.server.federated.models import FederatedConnectorStatus
from onyx.server.federated.models import FederatedConnectorUpdateRequest
from onyx.server.federated.models import OAuthCallbackResult
from onyx.server.federated.models import UserOAuthStatus
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/federated")


def _get_federated_connector_instance(
    source: FederatedConnectorSource,
    credentials: dict[str, Any],
) -> FederatedConnector:
    """Factory function to get the appropriate federated connector instance."""
    try:
        return get_federated_connector(source, credentials)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))


@router.post("")
def create_federated_connector(
    federated_connector_data: FederatedConnectorRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> FederatedConnectorResponse:
    """Create a new federated connector"""
    tenant_id = get_current_tenant_id()

    logger.info(
        f"Creating federated connector: source={federated_connector_data.source}, user={user.email}, tenant_id={tenant_id}"
    )

    try:
        # Create the federated connector with validation
        federated_connector = db_create_federated_connector(
            db_session=db_session,
            source=federated_connector_data.source,
            credentials=federated_connector_data.credentials.model_dump(),
            config=federated_connector_data.config,
        )

        logger.info(
            f"Successfully created federated connector with id={federated_connector.id}"
        )

        return FederatedConnectorResponse(
            id=federated_connector.id,
            source=federated_connector.source,
        )

    except ValueError as e:
        logger.warning(f"Validation error creating federated connector: {e}")
        db_session.rollback()
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        logger.error(f"Error creating federated connector: {e}")
        db_session.rollback()
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/{id}/entities")
def get_entities(
    id: int,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> EntitySpecResponse:
    """Fetch allowed entities for the source type"""
    try:
        federated_connector = fetch_federated_connector_by_id(id, db_session)
        if not federated_connector:
            raise HTTPException(status_code=404, detail="Federated connector not found")
        if federated_connector.credentials is None:
            raise HTTPException(
                status_code=400, detail="Federated connector has no credentials"
            )

        connector_instance = _get_federated_connector_instance(
            federated_connector.source,
            federated_connector.credentials.get_value(apply_mask=False),
        )
        entities_spec = connector_instance.configuration_schema()

        # Convert EntityField objects to a dictionary format for the API response
        entities_dict = {}
        for key, field in entities_spec.items():
            entities_dict[key] = {
                "type": field.type,
                "description": field.description,
                "required": field.required,
                "default": field.default,
                "example": field.example,
            }

        return EntitySpecResponse(entities=entities_dict)

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error fetching entities for federated connector {id}: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/{id}/credentials/schema")
def get_credentials_schema(
    id: int,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> CredentialSchemaResponse:
    """Fetch credential schema for the source type"""
    try:
        federated_connector = fetch_federated_connector_by_id(id, db_session)
        if not federated_connector:
            raise HTTPException(status_code=404, detail="Federated connector not found")
        if federated_connector.credentials is None:
            raise HTTPException(
                status_code=400, detail="Federated connector has no credentials"
            )

        connector_instance = _get_federated_connector_instance(
            federated_connector.source,
            federated_connector.credentials.get_value(apply_mask=False),
        )
        credentials_spec = connector_instance.credentials_schema()

        # Convert CredentialField objects to a dictionary format for the API response
        credentials_dict = {}
        for key, field in credentials_spec.items():
            credentials_dict[key] = {
                "type": field.type,
                "description": field.description,
                "required": field.required,
                "default": field.default,
                "example": field.example,
                "secret": field.secret,
            }

        return CredentialSchemaResponse(credentials=credentials_dict)

    except HTTPException:
        raise
    except Exception as e:
        logger.error(
            f"Error fetching credentials schema for federated connector {id}: {e}"
        )
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/sources/{source}/configuration/schema")
def get_configuration_schema_by_source(
    source: FederatedConnectorSource,
    _: User = Depends(current_curator_or_admin_user),
) -> ConfigurationSchemaResponse:
    """Fetch configuration schema for a specific source type (for setup/edit forms)"""
    try:
        connector_cls = get_federated_connector_cls(source)
        entities_spec = connector_cls.configuration_schema()

        # Convert EntityField objects to a dictionary format for the API response
        configuration_dict = {}
        for key, field in entities_spec.items():
            configuration_dict[key] = {
                "type": field.type,
                "description": field.description,
                "required": field.required,
                "default": field.default,
                "example": field.example,
            }

        return ConfigurationSchemaResponse(configuration=configuration_dict)

    except Exception as e:
        logger.error(f"Error fetching configuration schema for source {source}: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/sources/{source}/credentials/schema")
def get_credentials_schema_by_source(
    source: FederatedConnectorSource,
    _: User = Depends(current_curator_or_admin_user),
) -> CredentialSchemaResponse:
    """Fetch credential schema for a specific source type (for setup forms)"""
    try:
        connector_cls = get_federated_connector_cls(source)
        credentials_spec = connector_cls.credentials_schema()

        # Convert CredentialField objects to a dictionary format for the API response
        credentials_dict = {}
        for key, field in credentials_spec.items():
            credentials_dict[key] = {
                "type": field.type,
                "description": field.description,
                "required": field.required,
                "default": field.default,
                "example": field.example,
                "secret": field.secret,
            }

        return CredentialSchemaResponse(credentials=credentials_dict)

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error fetching credentials schema for source {source}: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/sources/{source}/credentials/validate")
def validate_credentials(
    source: FederatedConnectorSource,
    credentials: FederatedConnectorCredentials,
    _: User = Depends(current_curator_or_admin_user),
) -> bool:
    """Validate credentials for a specific source type"""
    try:
        is_valid = validate_federated_connector_credentials(
            source, credentials.model_dump()
        )

        if not is_valid:
            raise HTTPException(status_code=400, detail="Credentials are invalid")

        return is_valid

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error validating credentials for source {source}: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.head("/{id}/entities/validate")
def validate_entities(
    id: int,
    request: Request,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """Validate specified entities for source type"""
    try:
        federated_connector = fetch_federated_connector_by_id(id, db_session)
        if not federated_connector:
            raise HTTPException(status_code=404, detail="Federated connector not found")
        if federated_connector.credentials is None:
            return Response(status_code=400)

        # For HEAD requests, we'll expect entities as query parameters
        # since HEAD requests shouldn't have request bodies
        entities_dict = {}
        query_params = dict(request.query_params)
        if "entities" in query_params:
            try:
                entities_dict = json.loads(query_params["entities"])
            except json.JSONDecodeError:
                logger.warning("Could not parse entities from query parameters")
                return Response(status_code=400)

        connector_instance = _get_federated_connector_instance(
            federated_connector.source,
            federated_connector.credentials.get_value(apply_mask=False),
        )
        is_valid = connector_instance.validate_entities(entities_dict)

        if is_valid:
            return Response(status_code=200)
        else:
            return Response(status_code=400)

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error validating entities for federated connector {id}: {e}")
        return Response(status_code=500)


@router.get("/{id}/authorize")
def get_authorize_url(
    id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> AuthorizeUrlResponse:
    """Get URL to send the user for OAuth"""
    # Validate that the ID is not None or invalid
    if id is None or id <= 0:
        raise HTTPException(status_code=400, detail="Invalid federated connector ID")

    federated_connector = fetch_federated_connector_by_id(id, db_session)
    if not federated_connector:
        raise HTTPException(status_code=404, detail="Federated connector not found")
    if federated_connector.credentials is None:
        raise HTTPException(
            status_code=400, detail="Federated connector has no credentials"
        )

    # Update credentials to include the correct redirect URI with the connector ID
    updated_credentials = federated_connector.credentials.get_value(
        apply_mask=False
    ).copy()
    if "redirect_uri" in updated_credentials and updated_credentials["redirect_uri"]:
        # Replace the {id} placeholder with the actual federated connector ID
        updated_credentials["redirect_uri"] = updated_credentials[
            "redirect_uri"
        ].replace("{id}", str(id))

    connector_instance = _get_federated_connector_instance(
        federated_connector.source, updated_credentials
    )
    base_authorize_url = connector_instance.authorize(get_oauth_callback_uri())

    # Generate state parameter and store session info
    logger.info(
        f"Generating OAuth state for federated_connector_id={id}, user_id={user.id}"
    )
    state = generate_oauth_state(
        federated_connector_id=id,
        user_id=str(user.id),
    )

    # Add state to the OAuth URL
    authorize_url = add_state_to_oauth_url(base_authorize_url, state)
    logger.info(f"Generated OAuth authorize URL with state for connector {id}")
    return AuthorizeUrlResponse(authorize_url=authorize_url)


@router.post("/callback")
def handle_oauth_callback_generic(
    request: Request,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> OAuthCallbackResult:
    """Handle callback for any federated connector using state parameter"""
    # Get callback data from request (query parameters)
    callback_data = dict(request.query_params)

    # Verify state parameter and get session info
    state = callback_data.get("state")
    if not state:
        raise HTTPException(status_code=400, detail="Missing state parameter")

    try:
        oauth_session = verify_oauth_state(state)
    except ValueError:
        logger.exception("Error verifying OAuth state")
        raise HTTPException(
            status_code=400, detail="Invalid or expired state parameter"
        )

    if not oauth_session:
        raise HTTPException(
            status_code=400, detail="Invalid or expired state parameter"
        )

    # Get federated connector ID from the state
    federated_connector_id = oauth_session.federated_connector_id

    # Validate federated_connector_id is not None
    if federated_connector_id is None:
        logger.error("OAuth session has null federated_connector_id")
        raise HTTPException(
            status_code=400,
            detail="Invalid OAuth session: missing federated connector ID",
        )

    federated_connector = fetch_federated_connector_by_id(
        federated_connector_id, db_session
    )
    if not federated_connector:
        raise HTTPException(status_code=404, detail="Federated connector not found")
    if federated_connector.credentials is None:
        raise HTTPException(
            status_code=400, detail="Federated connector has no credentials"
        )

    connector_instance = _get_federated_connector_instance(
        federated_connector.source,
        federated_connector.credentials.get_value(apply_mask=False),
    )
    oauth_result = connector_instance.callback(callback_data, get_oauth_callback_uri())

    # Convert OAuthResult to OAuthCallbackResult for API response
    oauth_result_dict = oauth_result.model_dump()
    oauth_callback_result = OAuthCallbackResult(**oauth_result_dict)

    # Add source information to the response
    oauth_callback_result.source = federated_connector.source

    # Store OAuth token in database if we have an access token
    if oauth_result.access_token:
        logger.info(
            f"Storing OAuth token for federated_connector_id={federated_connector_id}, user_id={oauth_session.user_id}"
        )
        update_federated_connector_oauth_token(
            db_session=db_session,
            federated_connector_id=federated_connector_id,
            user_id=UUID(oauth_session.user_id),
            token=oauth_result.access_token,
            expires_at=oauth_result.expires_at,
        )

    return oauth_callback_result


@router.get("")
def get_federated_connectors(
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[FederatedConnectorStatus]:
    """Get all federated connectors for display in the status table"""
    federated_connectors = fetch_all_federated_connectors(db_session)

    result = []
    for fc in federated_connectors:
        status_data = FederatedConnectorStatus(
            id=fc.id,
            source=fc.source,
            name=f"{fc.source.replace('_', ' ').title()}",
        )
        result.append(status_data)

    return result


@router.get("/oauth-status")
def get_user_oauth_status(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[UserOAuthStatus]:
    """Get OAuth status for all federated connectors for the current user"""
    federated_connectors = fetch_all_federated_connectors(db_session)

    result = []
    for fc in federated_connectors:
        # Check if user has OAuth token for this connector
        oauth_token = None
        for token in fc.oauth_tokens:
            if token.user_id == user.id:
                oauth_token = token
                break

        # Generate authorize URL if needed
        authorize_url = None
        if not oauth_token and fc.credentials is not None:
            connector_instance = _get_federated_connector_instance(
                fc.source, fc.credentials.get_value(apply_mask=False)
            )
            base_authorize_url = connector_instance.authorize(get_oauth_callback_uri())

            # Generate state parameter and add to URL
            state = generate_oauth_state(
                federated_connector_id=fc.id,
                user_id=str(user.id),
            )
            authorize_url = add_state_to_oauth_url(base_authorize_url, state)

        status_data = UserOAuthStatus(
            federated_connector_id=fc.id,
            source=fc.source,
            name=f"{fc.source.replace('_', ' ').title()}",
            has_oauth_token=oauth_token is not None,
            oauth_token_expires_at=oauth_token.expires_at if oauth_token else None,
            authorize_url=authorize_url,
        )
        result.append(status_data)

    return result


@router.get("/{id}")
def get_federated_connector_detail(
    id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> FederatedConnectorDetail:
    """Get detailed information about a specific federated connector"""
    federated_connector = fetch_federated_connector_by_id(id, db_session)
    if not federated_connector:
        raise HTTPException(status_code=404, detail="Federated connector not found")
    if federated_connector.credentials is None:
        raise HTTPException(
            status_code=400, detail="Federated connector has no credentials"
        )

    # Get OAuth token information for the current user
    oauth_token = None
    for token in federated_connector.oauth_tokens:
        if token.user_id == user.id:
            oauth_token = token
            break

    # Get document set mappings
    document_sets = []
    for mapping in federated_connector.document_sets:
        document_sets.append(
            {
                "id": mapping.document_set_id,
                "name": (
                    mapping.document_set.name if mapping.document_set else "Unknown"
                ),
                "entities": mapping.entities,
            }
        )

    return FederatedConnectorDetail(
        id=federated_connector.id,
        source=federated_connector.source,
        name=f"{federated_connector.source.replace('_', ' ').title()}",
        credentials=FederatedConnectorCredentials(
            **federated_connector.credentials.get_value(apply_mask=True)
        ),
        config=federated_connector.config,
        oauth_token_exists=oauth_token is not None,
        oauth_token_expires_at=oauth_token.expires_at if oauth_token else None,
        document_sets=document_sets,
    )


@router.put("/{id}")
def update_federated_connector_endpoint(
    id: int,
    update_request: FederatedConnectorUpdateRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> FederatedConnectorDetail:
    """Update a federated connector's configuration"""
    try:
        # Update the federated connector
        updated_connector = update_federated_connector(
            db_session=db_session,
            federated_connector_id=id,
            credentials=(
                update_request.credentials.model_dump()
                if update_request.credentials
                else None
            ),
            config=update_request.config,
        )

        if not updated_connector:
            raise HTTPException(status_code=404, detail="Federated connector not found")

        # Return updated connector details
        return get_federated_connector_detail(id, user, db_session)

    except ValueError as e:
        logger.warning(f"Validation error updating federated connector {id}: {e}")
        raise HTTPException(status_code=400, detail=str(e))


@router.delete("/{id}")
def delete_federated_connector_endpoint(
    id: int,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> bool:
    """Delete a federated connector"""
    success = delete_federated_connector(
        db_session=db_session,
        federated_connector_id=id,
    )

    if not success:
        raise HTTPException(status_code=404, detail="Federated connector not found")

    return True


@router.delete("/{id}/oauth")
def disconnect_oauth_token(
    id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> bool:
    """Disconnect OAuth token for the current user from a federated connector"""
    # Check if the federated connector exists
    federated_connector = fetch_federated_connector_by_id(id, db_session)
    if not federated_connector:
        raise HTTPException(status_code=404, detail="Federated connector not found")

    # Find and delete the user's OAuth token
    oauth_token = None
    for token in federated_connector.oauth_tokens:
        if token.user_id == user.id:
            oauth_token = token
            break

    if oauth_token:
        db_session.delete(oauth_token)
        db_session.commit()
        return True
    else:
        raise HTTPException(
            status_code=404, detail="No OAuth token found for this user"
        )


================================================
FILE: backend/onyx/server/federated/models.py
================================================
from datetime import datetime
from typing import Any

from pydantic import BaseModel
from pydantic import Field

from onyx.configs.constants import FederatedConnectorSource


class FederatedConnectorCredentials(BaseModel):
    """Credentials for federated connector"""

    client_id: str | None = None
    client_secret: str | None = None
    redirect_uri: str | None = None


class FederatedConnectorRequest(BaseModel):
    source: FederatedConnectorSource
    credentials: FederatedConnectorCredentials
    config: dict[str, Any] = Field(default_factory=dict)


class FederatedConnectorResponse(BaseModel):
    id: int
    source: FederatedConnectorSource


class AuthorizeUrlResponse(BaseModel):
    authorize_url: str


class OAuthCallbackResult(BaseModel):
    access_token: str | None = None
    expires_at: datetime | None = None
    refresh_token: str | None = None
    token_type: str | None = None
    scope: str | None = None
    source: FederatedConnectorSource | None = None


class FederatedConnectorStatus(BaseModel):
    id: int
    source: FederatedConnectorSource
    name: str


class UserOAuthStatus(BaseModel):
    """OAuth status for a specific user and federated connector"""

    federated_connector_id: int
    source: FederatedConnectorSource
    name: str
    has_oauth_token: bool
    oauth_token_expires_at: datetime | None = None
    authorize_url: str | None = None


class FederatedConnectorDetail(BaseModel):
    id: int
    source: FederatedConnectorSource
    name: str
    credentials: FederatedConnectorCredentials
    config: dict[str, Any] = Field(default_factory=dict)
    oauth_token_exists: bool
    oauth_token_expires_at: datetime | None = None
    document_sets: list[dict[str, Any]] = Field(default_factory=list)


class FederatedConnectorSummary(BaseModel):
    """Simplified federated connector information with just essential data"""

    id: int
    name: str
    source: FederatedConnectorSource
    entities: dict[str, Any]

    @classmethod
    def from_federated_connector_detail(
        cls, detail: FederatedConnectorDetail, entities: dict[str, Any]
    ) -> "FederatedConnectorSummary":
        return cls(
            id=detail.id,
            name=detail.name,
            source=detail.source,
            entities=entities,
        )


class FederatedConnectorUpdateRequest(BaseModel):
    credentials: FederatedConnectorCredentials | None = None
    config: dict[str, Any] | None = None


class EntitySpecResponse(BaseModel):
    """Response for entity specification"""

    entities: dict[str, Any]


class ConfigurationSchemaResponse(BaseModel):
    """Response for configuration schema specification"""

    configuration: dict[str, Any]


class CredentialSchemaResponse(BaseModel):
    """Response for credential schema specification"""

    credentials: dict[str, Any]


================================================
FILE: backend/onyx/server/kg/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME
from onyx.configs.kg_configs import KG_BETA_ASSISTANT_DESCRIPTION
from onyx.db.engine.sql_engine import get_session
from onyx.db.entities import get_entity_stats_by_grounded_source_name
from onyx.db.entity_type import get_configured_entity_types
from onyx.db.entity_type import update_entity_types_and_related_connectors__commit
from onyx.db.kg_config import disable_kg
from onyx.db.kg_config import enable_kg
from onyx.db.kg_config import get_kg_config_settings
from onyx.db.kg_config import set_kg_config_settings
from onyx.db.models import User
from onyx.db.persona import create_update_persona
from onyx.db.persona import get_persona_by_id
from onyx.db.persona import mark_persona_as_deleted
from onyx.db.persona import mark_persona_as_not_deleted
from onyx.db.tools import get_builtin_tool
from onyx.kg.resets.reset_index import reset_full_kg_index__commit
from onyx.kg.setup.kg_default_entity_definitions import (
    populate_missing_default_entity_types__commit,
)
from onyx.prompts.kg_prompts import KG_BETA_ASSISTANT_SYSTEM_PROMPT
from onyx.prompts.kg_prompts import KG_BETA_ASSISTANT_TASK_PROMPT
from onyx.server.features.persona.models import PersonaUpsertRequest
from onyx.server.kg.models import DisableKGConfigRequest
from onyx.server.kg.models import EnableKGConfigRequest
from onyx.server.kg.models import EntityType
from onyx.server.kg.models import KGConfig
from onyx.server.kg.models import KGConfig as KGConfigAPIModel
from onyx.server.kg.models import SourceAndEntityTypeView
from onyx.server.kg.models import SourceStatistics
from onyx.tools.tool_implementations.knowledge_graph.knowledge_graph_tool import (
    KnowledgeGraphTool,
)
from onyx.tools.tool_implementations.search.search_tool import SearchTool


admin_router = APIRouter(prefix="/admin/kg")


# exposed
# Controls whether or not kg is viewable in the first place.


@admin_router.get("/exposed")
def get_kg_exposed(_: User = Depends(current_admin_user)) -> bool:
    kg_config_settings = get_kg_config_settings()
    return kg_config_settings.KG_EXPOSED


# global resets


@admin_router.put("/reset")
def reset_kg(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> SourceAndEntityTypeView:
    reset_full_kg_index__commit(db_session)
    populate_missing_default_entity_types__commit(db_session=db_session)
    return get_kg_entity_types(db_session=db_session)


# configurations


@admin_router.get("/config")
def get_kg_config(_: User = Depends(current_admin_user)) -> KGConfig:
    config = get_kg_config_settings()
    return KGConfigAPIModel.from_kg_config_settings(config)


@admin_router.put("/config")
def enable_or_disable_kg(
    req: EnableKGConfigRequest | DisableKGConfigRequest,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    if isinstance(req, DisableKGConfigRequest):
        # Get the KG Beta persona ID and delete it
        kg_config_settings = get_kg_config_settings()
        persona_id = kg_config_settings.KG_BETA_PERSONA_ID
        if persona_id is not None:
            mark_persona_as_deleted(
                persona_id=persona_id,
                user=user,
                db_session=db_session,
            )
        disable_kg()
        return

    # Enable KG
    enable_kg(enable_req=req)
    populate_missing_default_entity_types__commit(db_session=db_session)

    # Get the search and knowledge graph tools
    search_tool = get_builtin_tool(db_session=db_session, tool_type=SearchTool)
    kg_tool = get_builtin_tool(db_session=db_session, tool_type=KnowledgeGraphTool)

    # Check if we have a previously created persona
    kg_config_settings = get_kg_config_settings()
    persona_id = kg_config_settings.KG_BETA_PERSONA_ID

    if persona_id is not None:
        # Try to restore the existing persona
        try:
            persona = get_persona_by_id(
                persona_id=persona_id,
                user=user,
                db_session=db_session,
                include_deleted=True,
            )
            if persona.deleted:
                mark_persona_as_not_deleted(
                    persona_id=persona_id,
                    user=user,
                    db_session=db_session,
                )
            return

        except ValueError:
            # If persona doesn't exist or can't be restored, create a new one below
            pass

    # Create KG Beta persona (private to the admin who enabled KG)
    persona_request = PersonaUpsertRequest(
        name=TMP_DRALPHA_PERSONA_NAME,
        description=KG_BETA_ASSISTANT_DESCRIPTION,
        system_prompt=KG_BETA_ASSISTANT_SYSTEM_PROMPT,
        task_prompt=KG_BETA_ASSISTANT_TASK_PROMPT,
        datetime_aware=False,
        is_public=False,
        document_set_ids=[],
        tool_ids=[search_tool.id, kg_tool.id],
        llm_model_provider_override=None,
        llm_model_version_override=None,
        starter_messages=None,
        users=[user.id],
        groups=[],
        label_ids=[],
        is_featured=False,
        display_priority=0,
        user_file_ids=[],
    )

    persona_snapshot = create_update_persona(
        persona_id=None,
        create_persona_request=persona_request,
        user=user,
        db_session=db_session,
    )
    # Store the persona ID in the KG config
    kg_config_settings.KG_BETA_PERSONA_ID = persona_snapshot.id
    set_kg_config_settings(kg_config_settings)


# entity-types


@admin_router.get("/entity-types")
def get_kg_entity_types(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> SourceAndEntityTypeView:
    # when using for the first time, populate with default entity types
    entity_types = {
        source_name: [EntityType.from_model(et) for et in ets]
        for source_name, ets in get_configured_entity_types(
            db_session=db_session
        ).items()
    }

    source_statistics = {
        source_name: SourceStatistics(
            source_name=source_name,
            last_updated=last_updated,
            entities_count=entities_count,
        )
        for source_name, (
            last_updated,
            entities_count,
        ) in get_entity_stats_by_grounded_source_name(db_session=db_session).items()
    }

    return SourceAndEntityTypeView(
        source_statistics=source_statistics, entity_types=entity_types
    )


@admin_router.put("/entity-types")
def update_kg_entity_types(
    updates: list[EntityType],
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_entity_types_and_related_connectors__commit(
        db_session=db_session, updates=updates
    )


================================================
FILE: backend/onyx/server/kg/models.py
================================================
from datetime import datetime

from pydantic import ConfigDict
from pydantic.main import BaseModel

from onyx.db.models import KGEntityType
from onyx.kg.models import KGConfigSettings


class KGConfig(BaseModel):
    enabled: bool
    vendor: str | None
    vendor_domains: list[str] | None
    ignore_domains: list[str] | None
    coverage_start: datetime | None

    @classmethod
    def from_kg_config_settings(
        cls,
        kg_config_settings: KGConfigSettings,
    ) -> "KGConfig":
        return cls(
            enabled=kg_config_settings.KG_ENABLED,
            vendor=kg_config_settings.KG_VENDOR,
            vendor_domains=kg_config_settings.KG_VENDOR_DOMAINS,
            ignore_domains=kg_config_settings.KG_IGNORE_EMAIL_DOMAINS,
            coverage_start=kg_config_settings.KG_COVERAGE_START_DATE,
        )


class EnableKGConfigRequest(BaseModel):
    vendor: str
    vendor_domains: list[str]
    ignore_domains: list[str] = []
    coverage_start: datetime

    model_config = ConfigDict(
        extra="forbid",
    )


class DisableKGConfigRequest(BaseModel):
    model_config = ConfigDict(
        extra="forbid",
    )


class EntityType(BaseModel):
    name: str
    description: str
    active: bool
    grounded_source_name: str | None = None

    @classmethod
    def from_model(
        cls,
        model: KGEntityType,
    ) -> "EntityType":
        return cls(
            name=model.id_name,
            description=model.description or "",
            active=model.active,
            grounded_source_name=model.grounded_source_name,
        )


class SourceStatistics(BaseModel):
    source_name: str
    last_updated: datetime
    entities_count: int


class SourceAndEntityTypeView(BaseModel):
    source_statistics: dict[str, SourceStatistics]
    entity_types: dict[str, list[EntityType]]


================================================
FILE: backend/onyx/server/manage/__init__.py
================================================


================================================
FILE: backend/onyx/server/manage/administrative.py
================================================
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import cast

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import KV_GEN_AI_KEY_CHECK_TIME
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.connector_credential_pair import get_connector_credential_pair_for_user
from onyx.db.connector_credential_pair import (
    update_connector_credential_pair_from_id,
)
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.feedback import fetch_docs_ranked_by_boost_for_user
from onyx.db.feedback import update_document_boost_for_user
from onyx.db.feedback import update_document_hidden_for_user
from onyx.db.index_attempt import cancel_indexing_attempts_for_ccpair
from onyx.db.models import User
from onyx.file_store.file_store import get_default_file_store
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.llm.factory import get_default_llm
from onyx.llm.utils import test_llm
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
from onyx.server.manage.models import BoostDoc
from onyx.server.manage.models import BoostUpdateRequest
from onyx.server.manage.models import HiddenUpdateRequest
from onyx.server.models import StatusResponse
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

router = APIRouter(prefix="/manage")
logger = setup_logger()

"""Admin only API endpoints"""


@router.get("/admin/doc-boosts")
def get_most_boosted_docs(
    ascending: bool,
    limit: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> list[BoostDoc]:
    boost_docs = fetch_docs_ranked_by_boost_for_user(
        ascending=ascending,
        limit=limit,
        db_session=db_session,
        user=user,
    )
    return [
        BoostDoc(
            document_id=doc.id,
            semantic_id=doc.semantic_id,
            # source=doc.source,
            link=doc.link or "",
            boost=doc.boost,
            hidden=doc.hidden,
        )
        for doc in boost_docs
    ]


@router.post("/admin/doc-boosts")
def document_boost_update(
    boost_update: BoostUpdateRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    update_document_boost_for_user(
        db_session=db_session,
        document_id=boost_update.document_id,
        boost=boost_update.boost,
        user=user,
    )
    return StatusResponse(success=True, message="Updated document boost")


@router.post("/admin/doc-hidden")
def document_hidden_update(
    hidden_update: HiddenUpdateRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> StatusResponse:
    update_document_hidden_for_user(
        db_session=db_session,
        document_id=hidden_update.document_id,
        hidden=hidden_update.hidden,
        user=user,
    )
    return StatusResponse(success=True, message="Updated document boost")


@router.get("/admin/genai-api-key/validate")
def validate_existing_genai_api_key(
    _: User = Depends(current_admin_user),
) -> None:
    # Only validate every so often
    kv_store = get_kv_store()
    curr_time = datetime.now(tz=timezone.utc)
    try:
        last_check = datetime.fromtimestamp(
            cast(float, kv_store.load(KV_GEN_AI_KEY_CHECK_TIME)), tz=timezone.utc
        )
        check_freq_sec = timedelta(seconds=GENERATIVE_MODEL_ACCESS_CHECK_FREQ)
        if curr_time - last_check < check_freq_sec:
            return
    except KvKeyNotFoundError:
        # First time checking the key, nothing unusual
        pass

    try:
        llm = get_default_llm(timeout=10)
    except ValueError:
        raise HTTPException(status_code=404, detail="LLM not setup")

    error = test_llm(llm)
    if error:
        raise HTTPException(status_code=400, detail=error)

    # Mark check as successful
    curr_time = datetime.now(tz=timezone.utc)
    kv_store.store(KV_GEN_AI_KEY_CHECK_TIME, curr_time.timestamp())


@router.post("/admin/deletion-attempt", tags=PUBLIC_API_TAGS)
def create_deletion_attempt_for_connector_id(
    connector_credential_pair_identifier: ConnectorCredentialPairIdentifier,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    tenant_id = get_current_tenant_id()

    connector_id = connector_credential_pair_identifier.connector_id
    credential_id = connector_credential_pair_identifier.credential_id

    cc_pair = get_connector_credential_pair_for_user(
        db_session=db_session,
        connector_id=connector_id,
        credential_id=credential_id,
        user=user,
        get_editable=True,
    )
    if cc_pair is None:
        error = f"Connector with ID '{connector_id}' and credential ID '{credential_id}' does not exist. Has it already been deleted?"
        logger.error(error)
        raise HTTPException(
            status_code=404,
            detail=error,
        )

    # Cancel any scheduled indexing attempts
    cancel_indexing_attempts_for_ccpair(
        cc_pair_id=cc_pair.id, db_session=db_session, include_secondary_index=True
    )

    # TODO(rkuo): 2024-10-24 - check_deletion_attempt_is_allowed shouldn't be necessary
    # any more due to background locking improvements.
    # Remove the below permanently if everything is behaving for 30 days.

    # Check if the deletion attempt should be allowed
    # deletion_attempt_disallowed_reason = check_deletion_attempt_is_allowed(
    #     connector_credential_pair=cc_pair, db_session=db_session
    # )
    # if deletion_attempt_disallowed_reason:
    #     raise HTTPException(
    #         status_code=400,
    #         detail=deletion_attempt_disallowed_reason,
    #     )

    # mark as deleting
    update_connector_credential_pair_from_id(
        db_session=db_session,
        cc_pair_id=cc_pair.id,
        status=ConnectorCredentialPairStatus.DELETING,
    )

    db_session.commit()

    # run the beat task to pick up this deletion from the db immediately
    client_app.send_task(
        OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
        priority=OnyxCeleryPriority.HIGH,
        kwargs={"tenant_id": tenant_id},
    )

    logger.info(
        f"create_deletion_attempt_for_connector_id - running check_for_connector_deletion: cc_pair={cc_pair.id}"
    )

    if cc_pair.connector.source == DocumentSource.FILE:
        connector = cc_pair.connector
        file_store = get_default_file_store()
        for file_id in connector.connector_specific_config.get("file_locations", []):
            file_store.delete_file(file_id)


================================================
FILE: backend/onyx/server/manage/code_interpreter/__init__.py
================================================


================================================
FILE: backend/onyx/server/manage/code_interpreter/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.db.code_interpreter import fetch_code_interpreter_server
from onyx.db.code_interpreter import update_code_interpreter_server_enabled
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.manage.code_interpreter.models import CodeInterpreterServer
from onyx.server.manage.code_interpreter.models import CodeInterpreterServerHealth
from onyx.tools.tool_implementations.python.code_interpreter_client import (
    CodeInterpreterClient,
)

admin_router = APIRouter(prefix="/admin/code-interpreter")


@admin_router.get("/health")
def get_code_interpreter_health(
    _: User = Depends(current_admin_user),
) -> CodeInterpreterServerHealth:
    try:
        client = CodeInterpreterClient()
        return CodeInterpreterServerHealth(healthy=client.health())
    except ValueError:
        return CodeInterpreterServerHealth(healthy=False)


@admin_router.get("")
def get_code_interpreter(
    _: User = Depends(current_admin_user), db_session: Session = Depends(get_session)
) -> CodeInterpreterServer:
    ci_server = fetch_code_interpreter_server(db_session)
    return CodeInterpreterServer(enabled=ci_server.server_enabled)


@admin_router.put("")
def update_code_interpreter(
    update: CodeInterpreterServer,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_code_interpreter_server_enabled(
        db_session=db_session,
        enabled=update.enabled,
    )


================================================
FILE: backend/onyx/server/manage/code_interpreter/models.py
================================================
from pydantic import BaseModel


class CodeInterpreterServer(BaseModel):
    enabled: bool


class CodeInterpreterServerHealth(BaseModel):
    healthy: bool


================================================
FILE: backend/onyx/server/manage/discord_bot/api.py
================================================
"""Discord bot admin API endpoints."""

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import status
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import DISCORD_BOT_TOKEN
from onyx.configs.constants import AuthType
from onyx.db.discord_bot import create_discord_bot_config
from onyx.db.discord_bot import create_guild_config
from onyx.db.discord_bot import delete_discord_bot_config
from onyx.db.discord_bot import delete_discord_service_api_key
from onyx.db.discord_bot import delete_guild_config
from onyx.db.discord_bot import get_channel_config_by_internal_ids
from onyx.db.discord_bot import get_channel_configs
from onyx.db.discord_bot import get_discord_bot_config
from onyx.db.discord_bot import get_guild_config_by_internal_id
from onyx.db.discord_bot import get_guild_configs
from onyx.db.discord_bot import update_discord_channel_config
from onyx.db.discord_bot import update_guild_config
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.manage.discord_bot.models import DiscordBotConfigCreateRequest
from onyx.server.manage.discord_bot.models import DiscordBotConfigResponse
from onyx.server.manage.discord_bot.models import DiscordChannelConfigResponse
from onyx.server.manage.discord_bot.models import DiscordChannelConfigUpdateRequest
from onyx.server.manage.discord_bot.models import DiscordGuildConfigCreateResponse
from onyx.server.manage.discord_bot.models import DiscordGuildConfigResponse
from onyx.server.manage.discord_bot.models import DiscordGuildConfigUpdateRequest
from onyx.server.manage.discord_bot.utils import (
    generate_discord_registration_key,
)
from shared_configs.contextvars import get_current_tenant_id

router = APIRouter(prefix="/manage/admin/discord-bot")


def _check_bot_config_api_access() -> None:
    """Raise 403 if bot config cannot be managed via API.

    Bot config endpoints are disabled:
    - On Cloud (managed by Onyx)
    - When DISCORD_BOT_TOKEN env var is set (managed via env)
    """
    if AUTH_TYPE == AuthType.CLOUD:
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Discord bot configuration is managed by Onyx on Cloud.",
        )
    if DISCORD_BOT_TOKEN:
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Discord bot is configured via environment variables. API access disabled.",
        )


# === Bot Config ===


@router.get("/config", response_model=DiscordBotConfigResponse)
def get_bot_config(
    _: None = Depends(_check_bot_config_api_access),
    __: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> DiscordBotConfigResponse:
    """Get Discord bot config. Returns 403 on Cloud or if env vars set."""
    config = get_discord_bot_config(db_session)
    if not config:
        return DiscordBotConfigResponse(configured=False)

    return DiscordBotConfigResponse(
        configured=True,
        created_at=config.created_at,
    )


@router.post("/config", response_model=DiscordBotConfigResponse)
def create_bot_request(
    request: DiscordBotConfigCreateRequest,
    _: None = Depends(_check_bot_config_api_access),
    __: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> DiscordBotConfigResponse:
    """Create Discord bot config. Returns 403 on Cloud or if env vars set."""
    try:
        config = create_discord_bot_config(
            db_session,
            bot_token=request.bot_token,
        )
    except ValueError:
        raise HTTPException(
            status_code=status.HTTP_409_CONFLICT,
            detail="Discord bot config already exists. Delete it first to create a new one.",
        )

    db_session.commit()

    return DiscordBotConfigResponse(
        configured=True,
        created_at=config.created_at,
    )


@router.delete("/config")
def delete_bot_config_endpoint(
    _: None = Depends(_check_bot_config_api_access),
    __: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict:
    """Delete Discord bot config.

    Also deletes the Discord service API key since the bot is being removed.
    """
    deleted = delete_discord_bot_config(db_session)
    if not deleted:
        raise HTTPException(status_code=404, detail="Bot config not found")

    # Also delete the service API key used by the Discord bot
    delete_discord_service_api_key(db_session)

    db_session.commit()
    return {"deleted": True}


# === Service API Key ===


@router.delete("/service-api-key")
def delete_service_api_key_endpoint(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict:
    """Delete the Discord service API key.

    This endpoint allows manual deletion of the service API key used by the
    Discord bot to authenticate with the Onyx API. The key is also automatically
    deleted when:
    - Bot config is deleted (self-hosted)
    - All guild configs are deleted (Cloud)
    """
    deleted = delete_discord_service_api_key(db_session)
    if not deleted:
        raise HTTPException(status_code=404, detail="Service API key not found")
    db_session.commit()
    return {"deleted": True}


# === Guild Config ===


@router.get("/guilds", response_model=list[DiscordGuildConfigResponse])
def list_guild_configs(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[DiscordGuildConfigResponse]:
    """List all guild configs (pending and registered)."""
    configs = get_guild_configs(db_session)
    return [DiscordGuildConfigResponse.model_validate(c) for c in configs]


@router.post("/guilds", response_model=DiscordGuildConfigCreateResponse)
def create_guild_request(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> DiscordGuildConfigCreateResponse:
    """Create new guild config with registration key. Key shown once."""
    tenant_id = get_current_tenant_id()
    registration_key = generate_discord_registration_key(tenant_id)

    config = create_guild_config(db_session, registration_key)
    db_session.commit()

    return DiscordGuildConfigCreateResponse(
        id=config.id,
        registration_key=registration_key,  # Shown once!
    )


@router.get("/guilds/{config_id}", response_model=DiscordGuildConfigResponse)
def get_guild_config(
    config_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> DiscordGuildConfigResponse:
    """Get specific guild config."""
    config = get_guild_config_by_internal_id(db_session, internal_id=config_id)
    if not config:
        raise HTTPException(status_code=404, detail="Guild config not found")
    return DiscordGuildConfigResponse.model_validate(config)


@router.patch("/guilds/{config_id}", response_model=DiscordGuildConfigResponse)
def update_guild_request(
    config_id: int,
    request: DiscordGuildConfigUpdateRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> DiscordGuildConfigResponse:
    """Update guild config."""
    config = get_guild_config_by_internal_id(db_session, internal_id=config_id)
    if not config:
        raise HTTPException(status_code=404, detail="Guild config not found")

    config = update_guild_config(
        db_session,
        config,
        enabled=request.enabled,
        default_persona_id=request.default_persona_id,
    )
    db_session.commit()

    return DiscordGuildConfigResponse.model_validate(config)


@router.delete("/guilds/{config_id}")
def delete_guild_request(
    config_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict:
    """Delete guild config (invalidates registration key).

    On Cloud, if this was the last guild config, also deletes the service API key.
    """
    deleted = delete_guild_config(db_session, config_id)
    if not deleted:
        raise HTTPException(status_code=404, detail="Guild config not found")

    # On Cloud, delete service API key when all guilds are removed
    if AUTH_TYPE == AuthType.CLOUD:
        remaining_guilds = get_guild_configs(db_session)
        if not remaining_guilds:
            delete_discord_service_api_key(db_session)

    db_session.commit()
    return {"deleted": True}


# === Channel Config ===


@router.get(
    "/guilds/{config_id}/channels", response_model=list[DiscordChannelConfigResponse]
)
def list_channel_configs(
    config_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[DiscordChannelConfigResponse]:
    """List whitelisted channels for a guild."""
    guild_config = get_guild_config_by_internal_id(db_session, internal_id=config_id)
    if not guild_config:
        raise HTTPException(status_code=404, detail="Guild config not found")
    if not guild_config.guild_id:
        raise HTTPException(status_code=400, detail="Guild not yet registered")

    configs = get_channel_configs(db_session, config_id)
    return [DiscordChannelConfigResponse.model_validate(c) for c in configs]


@router.patch(
    "/guilds/{guild_config_id}/channels/{channel_config_id}",
    response_model=DiscordChannelConfigResponse,
)
def update_channel_request(
    guild_config_id: int,
    channel_config_id: int,
    request: DiscordChannelConfigUpdateRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> DiscordChannelConfigResponse:
    """Update channel config."""
    config = get_channel_config_by_internal_ids(
        db_session, guild_config_id, channel_config_id
    )
    if not config:
        raise HTTPException(status_code=404, detail="Channel config not found")

    config = update_discord_channel_config(
        db_session,
        config,
        channel_name=config.channel_name,  # Keep existing name, only Discord can update
        thread_only_mode=request.thread_only_mode,
        require_bot_invocation=request.require_bot_invocation,
        persona_override_id=request.persona_override_id,
        enabled=request.enabled,
    )
    db_session.commit()

    return DiscordChannelConfigResponse.model_validate(config)


================================================
FILE: backend/onyx/server/manage/discord_bot/models.py
================================================
"""Pydantic models for Discord bot API."""

from datetime import datetime

from pydantic import BaseModel


# === Bot Config ===


class DiscordBotConfigResponse(BaseModel):
    configured: bool
    created_at: datetime | None = None

    class Config:
        from_attributes = True


class DiscordBotConfigCreateRequest(BaseModel):
    bot_token: str


# === Guild Config ===


class DiscordGuildConfigResponse(BaseModel):
    id: int
    guild_id: int | None
    guild_name: str | None
    registered_at: datetime | None
    default_persona_id: int | None
    enabled: bool

    class Config:
        from_attributes = True


class DiscordGuildConfigCreateResponse(BaseModel):
    id: int
    registration_key: str  # Shown once!


class DiscordGuildConfigUpdateRequest(BaseModel):
    enabled: bool
    default_persona_id: int | None


# === Channel Config ===


class DiscordChannelConfigResponse(BaseModel):
    id: int
    guild_config_id: int
    channel_id: int
    channel_name: str
    channel_type: str
    is_private: bool
    require_bot_invocation: bool
    thread_only_mode: bool
    persona_override_id: int | None
    enabled: bool

    class Config:
        from_attributes = True


class DiscordChannelConfigUpdateRequest(BaseModel):
    require_bot_invocation: bool
    persona_override_id: int | None
    enabled: bool
    thread_only_mode: bool


================================================
FILE: backend/onyx/server/manage/discord_bot/utils.py
================================================
"""Discord registration key generation and parsing."""

import secrets
from urllib.parse import quote
from urllib.parse import unquote

from onyx.utils.logger import setup_logger

logger = setup_logger()

REGISTRATION_KEY_PREFIX: str = "discord_"


def generate_discord_registration_key(tenant_id: str) -> str:
    """Generate a one-time registration key with embedded tenant_id.

    Format: discord_<url_encoded_tenant_id>.<random_token>

    Follows the same pattern as API keys for consistency.
    """
    encoded_tenant = quote(tenant_id)
    random_token = secrets.token_urlsafe(16)

    logger.info(f"Generated Discord registration key for tenant {tenant_id}")
    return f"{REGISTRATION_KEY_PREFIX}{encoded_tenant}.{random_token}"


def parse_discord_registration_key(key: str) -> str | None:
    """Parse registration key to extract tenant_id.

    Returns tenant_id or None if invalid format.
    """
    if not key.startswith(REGISTRATION_KEY_PREFIX):
        return None

    try:
        key_body = key.removeprefix(REGISTRATION_KEY_PREFIX)
        parts = key_body.split(".", 1)
        if len(parts) != 2:
            return None

        encoded_tenant = parts[0]
        tenant_id = unquote(encoded_tenant)
        return tenant_id
    except Exception:
        return None


================================================
FILE: backend/onyx/server/manage/embedding/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.llm import fetch_existing_embedding_providers
from onyx.db.llm import remove_embedding_provider
from onyx.db.llm import upsert_cloud_embedding_provider
from onyx.db.models import User
from onyx.db.search_settings import get_all_search_settings
from onyx.db.search_settings import get_current_db_embedding_provider
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.indexing.models import EmbeddingModelDetail
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.server.manage.embedding.models import CloudEmbeddingProvider
from onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest
from onyx.server.manage.embedding.models import TestEmbeddingRequest
from onyx.utils.logger import setup_logger
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT
from shared_configs.enums import EmbeddingProvider
from shared_configs.enums import EmbedTextType


logger = setup_logger()


admin_router = APIRouter(prefix="/admin/embedding")
basic_router = APIRouter(prefix="/embedding")


@admin_router.post("/test-embedding")
def test_embedding_configuration(
    test_llm_request: TestEmbeddingRequest,
    _: User = Depends(current_admin_user),
) -> None:
    try:
        test_model = EmbeddingModel(
            server_host=MODEL_SERVER_HOST,
            server_port=MODEL_SERVER_PORT,
            api_key=test_llm_request.api_key,
            api_url=test_llm_request.api_url,
            provider_type=test_llm_request.provider_type,
            model_name=test_llm_request.model_name,
            api_version=test_llm_request.api_version,
            deployment_name=test_llm_request.deployment_name,
            normalize=False,
            query_prefix=None,
            passage_prefix=None,
        )
        test_model.encode(["Testing Embedding"], text_type=EmbedTextType.QUERY)

    except ValueError as e:
        error_msg = f"Not a valid embedding model. Exception thrown: {e}"
        logger.error(error_msg)
        raise ValueError(error_msg)

    except Exception as e:
        error_msg = "An error occurred while testing your embedding model. Please check your configuration."
        logger.error(f"{error_msg} Error message: {e}", exc_info=True)
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, error_msg)


@admin_router.get("", response_model=list[EmbeddingModelDetail])
def list_embedding_models(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[EmbeddingModelDetail]:
    search_settings = get_all_search_settings(db_session)
    return [EmbeddingModelDetail.from_db_model(setting) for setting in search_settings]


@admin_router.get("/embedding-provider")
def list_embedding_providers(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[CloudEmbeddingProvider]:
    return [
        CloudEmbeddingProvider.from_request(embedding_provider_model)
        for embedding_provider_model in fetch_existing_embedding_providers(db_session)
    ]


@admin_router.delete("/embedding-provider/{provider_type}")
def delete_embedding_provider(
    provider_type: EmbeddingProvider,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    embedding_provider = get_current_db_embedding_provider(db_session=db_session)
    if (
        embedding_provider is not None
        and provider_type == embedding_provider.provider_type
    ):
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "You can't delete a currently active model",
        )

    remove_embedding_provider(db_session, provider_type=provider_type)


@admin_router.put("/embedding-provider")
def put_cloud_embedding_provider(
    provider: CloudEmbeddingProviderCreationRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> CloudEmbeddingProvider:
    return upsert_cloud_embedding_provider(db_session, provider)


================================================
FILE: backend/onyx/server/manage/embedding/models.py
================================================
from typing import TYPE_CHECKING

from pydantic import BaseModel

from shared_configs.enums import EmbeddingProvider

if TYPE_CHECKING:
    from onyx.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel


class SearchSettingsDeleteRequest(BaseModel):
    search_settings_id: int


class TestEmbeddingRequest(BaseModel):
    provider_type: EmbeddingProvider
    api_key: str | None = None
    api_url: str | None = None
    model_name: str | None = None
    api_version: str | None = None
    deployment_name: str | None = None

    # This disables the "model_" protected namespace for pydantic
    model_config = {"protected_namespaces": ()}


class CloudEmbeddingProvider(BaseModel):
    provider_type: EmbeddingProvider
    api_key: str | None = None
    api_url: str | None = None
    api_version: str | None = None
    deployment_name: str | None = None

    @classmethod
    def from_request(
        cls, cloud_provider_model: "CloudEmbeddingProviderModel"
    ) -> "CloudEmbeddingProvider":
        return cls(
            provider_type=cloud_provider_model.provider_type,
            api_key=(
                cloud_provider_model.api_key.get_value(apply_mask=True)
                if cloud_provider_model.api_key
                else None
            ),
            api_url=cloud_provider_model.api_url,
            api_version=cloud_provider_model.api_version,
            deployment_name=cloud_provider_model.deployment_name,
        )


class CloudEmbeddingProviderCreationRequest(BaseModel):
    provider_type: EmbeddingProvider
    api_key: str | None = None
    api_url: str | None = None
    api_version: str | None = None
    deployment_name: str | None = None


================================================
FILE: backend/onyx/server/manage/get_state.py
================================================
import concurrent.futures
import re

import requests
from fastapi import APIRouter
from fastapi import HTTPException

from onyx import __version__
from onyx.auth.users import anonymous_user_enabled
from onyx.auth.users import user_needs_to_be_verified
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import OAUTH_ENABLED
from onyx.configs.app_configs import PASSWORD_MIN_LENGTH
from onyx.configs.constants import AuthType
from onyx.configs.constants import DEV_VERSION_PATTERN
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.configs.constants import STABLE_VERSION_PATTERN
from onyx.db.auth import get_user_count
from onyx.server.manage.models import AllVersions
from onyx.server.manage.models import AuthTypeResponse
from onyx.server.manage.models import ContainerVersions
from onyx.server.manage.models import VersionResponse
from onyx.server.models import StatusResponse

router = APIRouter()


@router.get("/health", tags=PUBLIC_API_TAGS)
async def healthcheck() -> StatusResponse:
    return StatusResponse(success=True, message="ok")


@router.get("/auth/type", tags=PUBLIC_API_TAGS)
async def get_auth_type() -> AuthTypeResponse:
    # NOTE: This endpoint is critical for the multi-tenant flow and is hit before there is a tenant context
    # The reason is this is used during the login flow, but we don't know which tenant the user is supposed to be
    # associated with until they auth.
    has_users = True
    if AUTH_TYPE != AuthType.CLOUD:
        user_count = await get_user_count()
        has_users = user_count > 0

    return AuthTypeResponse(
        auth_type=AUTH_TYPE,
        requires_verification=user_needs_to_be_verified(),
        anonymous_user_enabled=anonymous_user_enabled(),
        password_min_length=PASSWORD_MIN_LENGTH,
        has_users=has_users,
        oauth_enabled=OAUTH_ENABLED,
    )


@router.get("/version", tags=PUBLIC_API_TAGS)
def get_version() -> VersionResponse:
    return VersionResponse(backend_version=__version__)


@router.get("/versions", tags=PUBLIC_API_TAGS)
def get_versions() -> AllVersions:
    """
    Fetches the latest stable and beta versions of Onyx Docker images.
    Since DockerHub does not explicitly flag stable and beta images,
    this endpoint can be used to programmatically check for new images.
    """
    # Fetch the latest tags from DockerHub for each Onyx component
    dockerhub_repos = [
        "onyxdotapp/onyx-model-server",
        "onyxdotapp/onyx-backend",
        "onyxdotapp/onyx-web-server",
    ]

    # For good measure, we fetch 10 pages of tags
    def get_dockerhub_tags(repo: str, pages: int = 10) -> list[str]:
        url = f"https://hub.docker.com/v2/repositories/{repo}/tags"
        tags = []
        for _ in range(pages):
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            data = response.json()
            tags.extend(
                [
                    tag["name"]
                    for tag in data["results"]
                    if re.match(r"^v\d", tag["name"])
                ]
            )
            url = data.get("next")
            if not url:
                break
        return tags

    # Get tags for all repos in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        all_tags = list(
            executor.map(lambda repo: set(get_dockerhub_tags(repo)), dockerhub_repos)
        )

    # Find common tags across all repos
    common_tags = set.intersection(*all_tags)

    # Filter tags by strict version patterns
    dev_tags = [tag for tag in common_tags if DEV_VERSION_PATTERN.match(tag)]
    stable_tags = [tag for tag in common_tags if STABLE_VERSION_PATTERN.match(tag)]

    # Ensure we have at least one tag of each type
    if not dev_tags:
        raise HTTPException(
            status_code=500,
            detail="No valid dev versions found matching pattern v(number).(number).(number)-beta.(number)",
        )
    if not stable_tags:
        raise HTTPException(
            status_code=500,
            detail="No valid stable versions found matching pattern v(number).(number).(number)",
        )

    # Sort common tags and get the latest one
    def version_key(version: str) -> tuple[int, int, int, int]:
        """Extract major, minor, patch, beta as integers for sorting"""
        # Remove 'v' prefix
        clean_version = version[1:]

        # Check if it's a beta version
        if "-beta." in clean_version:
            # Split on '-beta.' to separate version and beta number
            base_version, beta_num = clean_version.split("-beta.")
            parts = base_version.split(".")
            return (int(parts[0]), int(parts[1]), int(parts[2]), int(beta_num))
        else:
            # Stable version - no beta number
            parts = clean_version.split(".")
            return (int(parts[0]), int(parts[1]), int(parts[2]), 0)

    latest_dev_version = sorted(dev_tags, key=version_key, reverse=True)[0]
    latest_stable_version = sorted(stable_tags, key=version_key, reverse=True)[0]

    return AllVersions(
        stable=ContainerVersions(
            onyx=latest_stable_version,
            relational_db="postgres:15.2-alpine",
            index="vespaengine/vespa:8.277.17",
            nginx="nginx:1.25.5-alpine",
        ),
        dev=ContainerVersions(
            onyx=latest_dev_version,
            relational_db="postgres:15.2-alpine",
            index="vespaengine/vespa:8.277.17",
            nginx="nginx:1.25.5-alpine",
        ),
        migration=ContainerVersions(
            onyx="airgapped-intfloat-nomic-migration",
            relational_db="postgres:15.2-alpine",
            index="vespaengine/vespa:8.277.17",
            nginx="nginx:1.25.5-alpine",
        ),
    )


================================================
FILE: backend/onyx/server/manage/image_generation/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.image_generation import create_image_generation_config__no_commit
from onyx.db.image_generation import delete_image_generation_config__no_commit
from onyx.db.image_generation import get_all_image_generation_configs
from onyx.db.image_generation import get_image_generation_config
from onyx.db.image_generation import set_default_image_generation_config
from onyx.db.image_generation import unset_default_image_generation_config
from onyx.db.llm import remove_llm_provider__no_commit
from onyx.db.models import LLMProvider as LLMProviderModel
from onyx.db.models import ModelConfiguration
from onyx.db.models import User
from onyx.image_gen.exceptions import ImageProviderCredentialsError
from onyx.image_gen.factory import get_image_generation_provider
from onyx.image_gen.factory import validate_credentials
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.llm.utils import get_max_input_tokens
from onyx.server.manage.image_generation.models import ImageGenerationConfigCreate
from onyx.server.manage.image_generation.models import ImageGenerationConfigUpdate
from onyx.server.manage.image_generation.models import ImageGenerationConfigView
from onyx.server.manage.image_generation.models import ImageGenerationCredentials
from onyx.server.manage.image_generation.models import TestImageGenerationRequest
from onyx.server.manage.llm.api import _validate_llm_provider_change
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.utils.logger import setup_logger

logger = setup_logger()

admin_router = APIRouter(prefix="/admin/image-generation")


def _get_test_quality_for_model(model_name: str) -> str | None:
    """Returns the fastest quality setting for credential testing.

    - gpt-image-1: 'low' (fastest)
    - dall-e-3: 'standard' (faster than 'hd')
    - Other models: None (use API default)
    """
    model_lower = model_name.lower()

    if "gpt-image-1" in model_lower:
        return "low"
    elif "dall-e-3" in model_lower or "dalle-3" in model_lower:
        return "standard"
    return None


def _build_llm_provider_request(
    db_session: Session,
    image_provider_id: str,
    model_name: str,
    source_llm_provider_id: int | None,
    provider: str | None,
    api_key: str | None,
    api_base: str | None,
    api_version: str | None,
    deployment_name: str | None,
    custom_config: dict[str, str] | None,
) -> LLMProviderUpsertRequest:
    """Build LLM provider request for image generation config.

    Supports two modes:
    1. Clone mode: source_llm_provider_id provided - uses API key from source
    2. New credentials mode: api_key + provider provided

    """
    if source_llm_provider_id is not None:
        # Clone mode: Only use API key from source provider
        source_provider = db_session.get(LLMProviderModel, source_llm_provider_id)
        if not source_provider:
            raise HTTPException(
                status_code=404,
                detail=f"Source LLM provider with id {source_llm_provider_id} not found",
            )

        _validate_llm_provider_change(
            existing_api_base=source_provider.api_base,
            existing_custom_config=source_provider.custom_config,
            new_api_base=api_base,
            new_custom_config=custom_config,
            api_key_changed=False,  # Using stored key from source provider
        )

        return LLMProviderUpsertRequest(
            name=f"Image Gen - {image_provider_id}",
            provider=source_provider.provider,
            api_key=(
                source_provider.api_key.get_value(apply_mask=False)
                if source_provider.api_key
                else None
            ),  # Only this from source
            api_base=api_base,  # From request
            api_version=api_version,  # From request
            deployment_name=deployment_name,  # From request
            is_public=True,
            groups=[],
            model_configurations=[
                ModelConfigurationUpsertRequest(
                    name=model_name,
                    is_visible=True,
                )
            ],
            custom_config=custom_config,
        )

    if not provider:
        raise HTTPException(
            status_code=400,
            detail="No provider or source llm provided",
        )

    credentials = ImageGenerationProviderCredentials(
        api_key=api_key,
        api_base=api_base,
        api_version=api_version,
        deployment_name=deployment_name,
        custom_config=custom_config,
    )

    if not validate_credentials(provider, credentials):
        raise HTTPException(
            status_code=400,
            detail=f"Incorrect credentials for {provider}",
        )

    return LLMProviderUpsertRequest(
        name=f"Image Gen - {image_provider_id}",
        provider=provider,
        api_key=api_key,
        api_base=api_base,
        api_version=api_version,
        deployment_name=deployment_name,
        is_public=True,
        groups=[],
        model_configurations=[
            ModelConfigurationUpsertRequest(
                name=model_name,
                is_visible=True,
            )
        ],
        custom_config=custom_config,
    )


def _create_image_gen_llm_provider__no_commit(
    db_session: Session,
    provider_request: LLMProviderUpsertRequest,
    model_name: str,
) -> int:
    """Create a new LLM provider for image generation. Returns model_config_id.

    Unlike upsert_llm_provider, this always creates a new provider and never
    deletes existing model configurations (which would cascade-delete ImageGenerationConfig).
    """

    # Always create a new provider (don't look up by name to avoid upsert behavior)
    new_provider = LLMProviderModel(
        name=provider_request.name,
        provider=provider_request.provider,
        api_key=provider_request.api_key,
        api_base=provider_request.api_base,
        api_version=provider_request.api_version,
        deployment_name=provider_request.deployment_name,
        is_public=provider_request.is_public,
        custom_config=provider_request.custom_config,
    )
    db_session.add(new_provider)
    db_session.flush()  # Get the ID

    # Create model configuration
    max_input_tokens = get_max_input_tokens(
        model_name=model_name,
        model_provider=provider_request.provider,
    )

    model_config = ModelConfiguration(
        llm_provider_id=new_provider.id,
        name=model_name,
        is_visible=True,
        max_input_tokens=max_input_tokens,
    )
    db_session.add(model_config)
    db_session.flush()

    return model_config.id


@admin_router.post("/test")
def test_image_generation(
    test_request: TestImageGenerationRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    """Test if an API key is valid for image generation.

    Makes a minimal image generation request to verify credentials using LiteLLM.

    Two modes:
    1. Direct: api_key + provider provided
    2. From existing provider: source_llm_provider_id provided (fetches API key from DB)
    """
    api_key = test_request.api_key
    provider = test_request.provider

    # Resolve API key and provider
    if test_request.source_llm_provider_id is not None:
        # Fetch API key from existing provider
        source_provider = db_session.get(
            LLMProviderModel, test_request.source_llm_provider_id
        )
        if not source_provider:
            raise HTTPException(
                status_code=404,
                detail=f"Source LLM provider with id {test_request.source_llm_provider_id} not found",
            )

        _validate_llm_provider_change(
            existing_api_base=source_provider.api_base,
            existing_custom_config=source_provider.custom_config,
            new_api_base=test_request.api_base,
            new_custom_config=test_request.custom_config,
            api_key_changed=False,  # Using stored key from source provider
        )

        api_key = (
            source_provider.api_key.get_value(apply_mask=False)
            if source_provider.api_key
            else None
        )
        provider = source_provider.provider

    if provider is None:
        raise HTTPException(
            status_code=400,
            detail="No provider or source llm provided",
        )

    try:
        # Build image provider from credentials
        # If incorrect credentials are provided, this will raise an exception
        image_provider = get_image_generation_provider(
            provider=provider,
            credentials=ImageGenerationProviderCredentials(
                api_key=api_key,
                api_base=test_request.api_base,
                api_version=test_request.api_version,
                deployment_name=(
                    test_request.deployment_name or test_request.model_name
                ),
                custom_config=test_request.custom_config,
            ),
        )
    except ValueError:
        raise HTTPException(
            status_code=404,
            detail=f"Invalid image generation provider: {provider}",
        )
    except ImageProviderCredentialsError:
        raise HTTPException(
            status_code=401,
            detail="Invalid image generation credentials",
        )

    quality = _get_test_quality_for_model(test_request.model_name)
    try:
        image_provider.generate_image(
            prompt="a simple blue circle on white background",
            model=test_request.model_name,
            size="1024x1024",
            n=1,
            quality=quality,
        )
    except HTTPException:
        raise
    except Exception as e:
        # Log only exception type to avoid exposing sensitive data
        # (LiteLLM errors may contain URLs with API keys or auth tokens)
        logger.warning(f"Image generation test failed: {type(e).__name__}")
        raise HTTPException(
            status_code=400,
            detail=f"Image generation test failed: {type(e).__name__}",
        )


@admin_router.post("/config")
def create_config(
    config_create: ImageGenerationConfigCreate,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> ImageGenerationConfigView:
    """Create a new image generation configuration.

    Both modes create a new LLM provider + model config + image config:

    1. Clone mode: source_llm_provider_id provided
       → Extract api key from existing provider, create new provider

    2. New credentials mode: api_key + provider provided
       → Create new provider with given credentials
    """
    # Check if image_provider_id already exists
    existing_config = get_image_generation_config(
        db_session, config_create.image_provider_id
    )
    if existing_config:
        raise HTTPException(
            status_code=400,
            detail=f"ImageGenerationConfig with image_provider_id '{config_create.image_provider_id}' already exists",
        )

    try:
        # Build and create LLM provider
        provider_request = _build_llm_provider_request(
            db_session=db_session,
            image_provider_id=config_create.image_provider_id,
            model_name=config_create.model_name,
            source_llm_provider_id=config_create.source_llm_provider_id,
            provider=config_create.provider,
            api_key=config_create.api_key,
            api_base=config_create.api_base,
            api_version=config_create.api_version,
            deployment_name=config_create.deployment_name,
            custom_config=config_create.custom_config,
        )

        model_configuration_id = _create_image_gen_llm_provider__no_commit(
            db_session=db_session,
            provider_request=provider_request,
            model_name=config_create.model_name,
        )

        # Create the ImageGenerationConfig
        config = create_image_generation_config__no_commit(
            db_session=db_session,
            image_provider_id=config_create.image_provider_id,
            model_configuration_id=model_configuration_id,
            is_default=config_create.is_default,
        )
        db_session.commit()
        db_session.refresh(config)
        return ImageGenerationConfigView.from_model(config)
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


@admin_router.get("/config")
def get_all_configs(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[ImageGenerationConfigView]:
    """Get all image generation configurations."""
    configs = get_all_image_generation_configs(db_session)
    return [ImageGenerationConfigView.from_model(config) for config in configs]


@admin_router.get("/config/{image_provider_id}/credentials")
def get_config_credentials(
    image_provider_id: str,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> ImageGenerationCredentials:
    """Get the credentials for an image generation config (for edit mode).

    Returns the unmasked API key and other credential fields.
    """
    config = get_image_generation_config(db_session, image_provider_id)
    if not config:
        raise HTTPException(
            status_code=404,
            detail=f"ImageGenerationConfig with image_provider_id {image_provider_id} not found",
        )

    return ImageGenerationCredentials.from_model(config)


@admin_router.put("/config/{image_provider_id}")
def update_config(
    image_provider_id: str,
    config_update: ImageGenerationConfigUpdate,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> ImageGenerationConfigView:
    """Update an image generation configuration.

    Flow:
    1. Get existing config and its LLM provider
    2. Rename old LLM provider to free up the name (avoids unique constraint)
    3. Create new LLM provider + model config (same as create flow)
    4. Update ImageGenerationConfig to point to new model config
    5. Delete old LLM provider (safe now - nothing references it)
    """
    try:
        # 1. Get existing config
        existing_config = get_image_generation_config(db_session, image_provider_id)
        if not existing_config:
            raise HTTPException(
                status_code=404,
                detail=f"ImageGenerationConfig with image_provider_id {image_provider_id} not found",
            )

        old_llm_provider_id = existing_config.model_configuration.llm_provider_id

        # 2. Rename old LLM provider to free up the name
        # (Can't delete first due to cascade: LLMProvider -> ModelConfig -> ImageGenConfig)
        old_provider = db_session.get(LLMProviderModel, old_llm_provider_id)
        if old_provider:
            old_provider.name = f"{old_provider.name}-old-{old_llm_provider_id}"
            db_session.flush()

        # Determine actual API key to use:
        # - Clone mode (source_llm_provider_id): API key comes from source provider
        # - New credentials mode: Use provided api_key, or preserve existing if not changed
        actual_api_key = config_update.api_key
        if config_update.source_llm_provider_id is None and old_provider:
            # Check if we should preserve existing API key:
            # - api_key_changed=False AND (key is None/empty OR looks masked)
            provided_key_is_masked = (
                config_update.api_key and "****" in config_update.api_key
            )
            if not config_update.api_key_changed and (
                not config_update.api_key or provided_key_is_masked
            ):
                _validate_llm_provider_change(
                    existing_api_base=old_provider.api_base,
                    existing_custom_config=old_provider.custom_config,
                    new_api_base=config_update.api_base,
                    new_custom_config=config_update.custom_config,
                    api_key_changed=False,
                )
                # Preserve existing API key when user didn't change it
                actual_api_key = (
                    old_provider.api_key.get_value(apply_mask=False)
                    if old_provider.api_key
                    else None
                )

        # 3. Build and create new LLM provider
        provider_request = _build_llm_provider_request(
            db_session=db_session,
            image_provider_id=image_provider_id,
            model_name=config_update.model_name,
            source_llm_provider_id=config_update.source_llm_provider_id,
            provider=config_update.provider,
            api_key=actual_api_key,
            api_base=config_update.api_base,
            api_version=config_update.api_version,
            deployment_name=config_update.deployment_name,
            custom_config=config_update.custom_config,
        )

        new_model_config_id = _create_image_gen_llm_provider__no_commit(
            db_session=db_session,
            provider_request=provider_request,
            model_name=config_update.model_name,
        )

        # 4. Update the ImageGenerationConfig to point to new model config
        existing_config.model_configuration_id = new_model_config_id

        # 5. Delete old LLM provider (safe now - nothing references it)
        remove_llm_provider__no_commit(db_session, old_llm_provider_id)

        db_session.commit()
        db_session.refresh(existing_config)
        return ImageGenerationConfigView.from_model(existing_config)

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


@admin_router.delete("/config/{image_provider_id}")
def delete_config(
    image_provider_id: str,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    """Delete an image generation configuration and its associated LLM provider."""
    try:
        # Get the config first to find the associated LLM provider
        existing_config = get_image_generation_config(db_session, image_provider_id)
        if not existing_config:
            raise HTTPException(
                status_code=404,
                detail=f"ImageGenerationConfig with image_provider_id {image_provider_id} not found",
            )

        llm_provider_id = existing_config.model_configuration.llm_provider_id

        # Delete the image generation config first
        delete_image_generation_config__no_commit(db_session, image_provider_id)

        # Clean up the orphaned LLM provider (it was exclusively for image gen)
        remove_llm_provider__no_commit(db_session, llm_provider_id)

        db_session.commit()
    except HTTPException:
        raise
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


@admin_router.post("/config/{image_provider_id}/default")
def set_config_as_default(
    image_provider_id: str,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    """Set a configuration as the default for image generation."""
    try:
        set_default_image_generation_config(db_session, image_provider_id)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


@admin_router.delete("/config/{image_provider_id}/default")
def unset_config_as_default(
    image_provider_id: str,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    """Unset a configuration as the default for image generation."""
    try:
        unset_default_image_generation_config(db_session, image_provider_id)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


================================================
FILE: backend/onyx/server/manage/image_generation/models.py
================================================
from typing import TYPE_CHECKING

from pydantic import BaseModel

if TYPE_CHECKING:
    from onyx.db.models import ImageGenerationConfig as ImageGenerationConfigModel


def _mask_api_key(api_key: str | None) -> str | None:
    """Mask API key, showing first 4 and last 4 characters."""
    if not api_key:
        return None
    if len(api_key) <= 8:
        return "****"
    return api_key[:4] + "****" + api_key[-4:]


class TestImageGenerationRequest(BaseModel):
    """Request model for testing image generation API key.

    Two modes:
    1. Direct API key: Provide api_key + provider
    2. From existing provider: Provide source_llm_provider_id (backend fetches API key)
    """

    model_name: str  # e.g., "gpt-image-1", "dall-e-3"

    # Option 1: Direct API key
    provider: str | None = None  # e.g., "openai", "azure"
    api_key: str | None = None

    # Option 2: Use API key from existing provider
    source_llm_provider_id: int | None = None

    # Additional fields for custom config
    custom_config: dict[str, str] | None = None

    # Additional fields for Azure
    api_base: str | None = None
    api_version: str | None = None
    deployment_name: str | None = None


class ImageGenerationConfigCreate(BaseModel):
    """Request model for creating an image generation config.

    Two creation modes (backend always creates new LLM provider + model config):

    1. Clone mode: Provide source_llm_provider_id + model_name
       → Backend extracts credentials from existing provider and creates new provider

    2. New credentials mode: Provide api_key + provider + model_name (+ optional fields)
       → Backend creates new provider with provided credentials
    """

    # Required for both modes
    image_provider_id: str  # Static unique key (e.g., "openai_gpt_image_1")
    model_name: str  # e.g., "gpt-image-1", "dall-e-3"

    # Option 1: Clone mode - use credentials from existing provider
    source_llm_provider_id: int | None = None

    # Option 2: New credentials mode
    provider: str | None = None  # e.g., "openai", "azure"
    api_key: str | None = None
    api_base: str | None = None
    api_version: str | None = None
    deployment_name: str | None = None
    custom_config: dict[str, str] | None = None

    is_default: bool = False


class ImageGenerationConfigUpdate(BaseModel):
    """Request model for updating an image generation config.

    Same modes as create - either clone from existing provider or use new credentials.
    Backend will delete old LLM provider and create new one.
    """

    # Required
    model_name: str  # e.g., "gpt-image-1", "dall-e-3"
    # Note: image_provider_id cannot be changed during update

    # Option 1: Clone mode - use credentials from existing provider
    source_llm_provider_id: int | None = None

    # Option 2: New credentials mode
    provider: str | None = None  # e.g., "openai", "azure"
    api_key: str | None = None
    api_base: str | None = None
    api_version: str | None = None
    deployment_name: str | None = None
    custom_config: dict[str, str] | None = None

    # If False and using new credentials mode, preserve existing API key from DB
    api_key_changed: bool = False


class ImageGenerationConfigView(BaseModel):
    """Response model for image generation config with related data."""

    image_provider_id: str  # Primary key - static unique key for UI-DB mapping
    model_configuration_id: int
    model_name: str  # From model_configuration.name
    llm_provider_id: int  # From model_configuration.llm_provider_id
    llm_provider_name: str  # From model_configuration.llm_provider.name
    is_default: bool

    @classmethod
    def from_model(
        cls, config: "ImageGenerationConfigModel"
    ) -> "ImageGenerationConfigView":
        """Convert database model to view model."""
        return cls(
            image_provider_id=config.image_provider_id,
            model_configuration_id=config.model_configuration_id,
            model_name=config.model_configuration.name,
            llm_provider_id=config.model_configuration.llm_provider_id,
            llm_provider_name=config.model_configuration.llm_provider.name,
            is_default=config.is_default,
        )


class ImageGenerationCredentials(BaseModel):
    """Response model for image generation config credentials (edit mode)."""

    api_key: str | None
    api_base: str | None
    api_version: str | None
    deployment_name: str | None

    @classmethod
    def from_model(
        cls, config: "ImageGenerationConfigModel"
    ) -> "ImageGenerationCredentials":
        """Convert database model to credentials model.

        Note: API key is masked for security - only first 4 and last 4 chars shown.
        """
        llm_provider = config.model_configuration.llm_provider
        return cls(
            api_key=_mask_api_key(
                llm_provider.api_key.get_value(apply_mask=False)
                if llm_provider.api_key
                else None
            ),
            api_base=llm_provider.api_base,
            api_version=llm_provider.api_version,
            deployment_name=llm_provider.deployment_name,
        )


class DefaultImageGenerationConfig(BaseModel):
    """Contains all info needed for image generation tool."""

    model_configuration_id: int
    model_name: str  # From model_configuration.name
    provider: str  # e.g., "openai", "azure" - from llm_provider.provider
    api_key: str | None
    api_base: str | None
    api_version: str | None
    deployment_name: str | None

    @classmethod
    def from_model(
        cls, config: "ImageGenerationConfigModel"
    ) -> "DefaultImageGenerationConfig":
        """Convert database model to default config model."""
        llm_provider = config.model_configuration.llm_provider
        return cls(
            model_configuration_id=config.model_configuration_id,
            model_name=config.model_configuration.name,
            provider=llm_provider.provider,
            api_key=(
                llm_provider.api_key.get_value(apply_mask=False)
                if llm_provider.api_key
                else None
            ),
            api_base=llm_provider.api_base,
            api_version=llm_provider.api_version,
            deployment_name=llm_provider.deployment_name,
        )


================================================
FILE: backend/onyx/server/manage/llm/api.py
================================================
import os
from collections import defaultdict
from datetime import datetime
from datetime import timezone
from typing import Any

import boto3
import httpx
from botocore.exceptions import BotoCoreError
from botocore.exceptions import ClientError
from botocore.exceptions import NoCredentialsError
from fastapi import APIRouter
from fastapi import Depends
from fastapi import Query
from pydantic import ValidationError
from sqlalchemy.orm import Session

from onyx.auth.schemas import UserRole
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_chat_accessible_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import LLMModelFlowType
from onyx.db.llm import can_user_access_llm_provider
from onyx.db.llm import fetch_default_llm_model
from onyx.db.llm import fetch_default_vision_model
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import fetch_existing_llm_provider_by_id
from onyx.db.llm import fetch_existing_llm_providers
from onyx.db.llm import fetch_existing_models
from onyx.db.llm import fetch_persona_with_groups
from onyx.db.llm import fetch_user_group_ids
from onyx.db.llm import remove_llm_provider
from onyx.db.llm import sync_model_configurations
from onyx.db.llm import update_default_provider
from onyx.db.llm import update_default_vision_provider
from onyx.db.llm import upsert_llm_provider
from onyx.db.llm import validate_persona_ids_exist
from onyx.db.models import User
from onyx.db.persona import user_can_access_persona
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.llm.factory import get_default_llm
from onyx.llm.factory import get_llm
from onyx.llm.factory import get_max_input_tokens_from_llm_provider
from onyx.llm.utils import get_bedrock_token_limit
from onyx.llm.utils import get_llm_contextual_cost
from onyx.llm.utils import test_llm
from onyx.llm.well_known_providers.auto_update_service import (
    fetch_llm_recommendations_from_github,
)
from onyx.llm.well_known_providers.constants import LM_STUDIO_API_KEY_CONFIG_KEY
from onyx.llm.well_known_providers.llm_provider_options import (
    fetch_available_well_known_llms,
)
from onyx.llm.well_known_providers.llm_provider_options import (
    WellKnownLLMProviderDescriptor,
)
from onyx.server.manage.llm.models import BedrockFinalModelResponse
from onyx.server.manage.llm.models import BedrockModelsRequest
from onyx.server.manage.llm.models import BifrostFinalModelResponse
from onyx.server.manage.llm.models import BifrostModelsRequest
from onyx.server.manage.llm.models import DefaultModel
from onyx.server.manage.llm.models import LitellmFinalModelResponse
from onyx.server.manage.llm.models import LitellmModelDetails
from onyx.server.manage.llm.models import LitellmModelsRequest
from onyx.server.manage.llm.models import LLMCost
from onyx.server.manage.llm.models import LLMProviderDescriptor
from onyx.server.manage.llm.models import LLMProviderResponse
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.manage.llm.models import LMStudioFinalModelResponse
from onyx.server.manage.llm.models import LMStudioModelsRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.manage.llm.models import OllamaFinalModelResponse
from onyx.server.manage.llm.models import OllamaModelDetails
from onyx.server.manage.llm.models import OllamaModelsRequest
from onyx.server.manage.llm.models import OpenRouterFinalModelResponse
from onyx.server.manage.llm.models import OpenRouterModelDetails
from onyx.server.manage.llm.models import OpenRouterModelsRequest
from onyx.server.manage.llm.models import SyncModelEntry
from onyx.server.manage.llm.models import TestLLMRequest
from onyx.server.manage.llm.models import VisionProviderResponse
from onyx.server.manage.llm.utils import generate_bedrock_display_name
from onyx.server.manage.llm.utils import generate_ollama_display_name
from onyx.server.manage.llm.utils import infer_vision_support
from onyx.server.manage.llm.utils import is_embedding_model
from onyx.server.manage.llm.utils import is_reasoning_model
from onyx.server.manage.llm.utils import is_valid_bedrock_model
from onyx.server.manage.llm.utils import ModelMetadata
from onyx.server.manage.llm.utils import strip_openrouter_vendor_prefix
from onyx.utils.encryption import mask_string as mask_with_ellipsis
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()

admin_router = APIRouter(prefix="/admin/llm")
basic_router = APIRouter(prefix="/llm")


def _mask_string(value: str) -> str:
    """Mask a string, showing first 4 and last 4 characters."""
    if len(value) <= 8:
        return "****"
    return value[:4] + "****" + value[-4:]


def _sync_fetched_models(
    db_session: Session,
    provider_name: str,
    models: list[SyncModelEntry],
    source_label: str,
) -> None:
    """Sync fetched models to DB for the given provider.

    Args:
        db_session: Database session
        provider_name: Name of the LLM provider
        models: List of SyncModelEntry objects describing the fetched models
        source_label: Human-readable label for log messages (e.g. "Bedrock", "LiteLLM")
    """
    try:
        new_count = sync_model_configurations(
            db_session=db_session,
            provider_name=provider_name,
            models=models,
        )
        if new_count > 0:
            logger.info(
                f"Added {new_count} new {source_label} models to provider '{provider_name}'"
            )
    except ValueError as e:
        logger.warning(f"Failed to sync {source_label} models to DB: {e}")


# Keys in custom_config that contain sensitive credentials
_SENSITIVE_CONFIG_KEYS = {
    "vertex_credentials",
    "aws_secret_access_key",
    "aws_access_key_id",
    "aws_bearer_token_bedrock",
    "private_key",
    "api_key",
    "secret",
    "password",
    "token",
    "credential",
}


def _mask_provider_credentials(provider_view: LLMProviderView) -> None:
    """Mask sensitive credentials in provider view including api_key and custom_config."""
    # Mask the API key
    if provider_view.api_key:
        provider_view.api_key = _mask_string(provider_view.api_key)

    # Mask sensitive values in custom_config
    if provider_view.custom_config:
        masked_config: dict[str, Any] = {}
        for key, value in provider_view.custom_config.items():
            # Check if key matches any sensitive pattern (case-insensitive)
            key_lower = key.lower()
            is_sensitive = any(
                sensitive_key in key_lower for sensitive_key in _SENSITIVE_CONFIG_KEYS
            )
            if is_sensitive and isinstance(value, str) and value:
                masked_config[key] = _mask_string(value)
            else:
                masked_config[key] = value
        provider_view.custom_config = masked_config


def _is_sensitive_custom_config_key(key: str) -> bool:
    key_lower = key.lower()
    return any(sensitive_key in key_lower for sensitive_key in _SENSITIVE_CONFIG_KEYS)


def _is_masked_value_for_existing(
    incoming_value: str, existing_value: str, key: str
) -> bool:
    """Return True when incoming_value is a masked round-trip of existing_value."""
    if not _is_sensitive_custom_config_key(key):
        return False

    masked_candidates = {
        _mask_string(existing_value),
        mask_with_ellipsis(existing_value),
        "****",
        "••••••••••••",
        "***REDACTED***",
    }
    return incoming_value in masked_candidates


def _restore_masked_custom_config_values(
    existing_custom_config: dict[str, str] | None,
    new_custom_config: dict[str, str] | None,
) -> dict[str, str] | None:
    """Restore sensitive custom config values when clients send masked placeholders."""
    if not existing_custom_config or not new_custom_config:
        return new_custom_config

    restored_config = dict(new_custom_config)

    for key, incoming_value in restored_config.items():
        existing_value = existing_custom_config.get(key)
        if not isinstance(incoming_value, str) or not isinstance(existing_value, str):
            continue
        if _is_masked_value_for_existing(incoming_value, existing_value, key):
            restored_config[key] = existing_value

    return restored_config


def _validate_llm_provider_change(
    existing_api_base: str | None,
    existing_custom_config: dict[str, str] | None,
    new_api_base: str | None,
    new_custom_config: dict[str, str] | None,
    api_key_changed: bool,
) -> None:
    """Validate that api_base and custom_config changes are safe.

    When using a stored API key (api_key_changed=False), we must ensure api_base and
    custom_config match the stored values.

    Only enforced in MULTI_TENANT mode.

    Raises:
        OnyxError: If api_base or custom_config changed without changing API key
    """
    if not MULTI_TENANT or api_key_changed:
        return

    normalized_existing_api_base = existing_api_base or None
    normalized_new_api_base = new_api_base or None

    api_base_changed = normalized_new_api_base != normalized_existing_api_base
    custom_config_changed = (
        new_custom_config and new_custom_config != existing_custom_config
    )

    if api_base_changed or custom_config_changed:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "API base and/or custom config cannot be changed without changing the API key",
        )


@admin_router.get("/built-in/options")
def fetch_llm_options(
    _: User = Depends(current_admin_user),
) -> list[WellKnownLLMProviderDescriptor]:
    return fetch_available_well_known_llms()


@admin_router.get("/built-in/options/{provider_name}")
def fetch_llm_provider_options(
    provider_name: str,
    _: User = Depends(current_admin_user),
) -> WellKnownLLMProviderDescriptor:
    well_known_llms = fetch_available_well_known_llms()
    for well_known_llm in well_known_llms:
        if well_known_llm.name == provider_name:
            return well_known_llm
    raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Provider {provider_name} not found")


@admin_router.post("/test")
def test_llm_configuration(
    test_llm_request: TestLLMRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    """Test LLM configuration settings"""

    # the api key is sanitized if we are testing a provider already in the system

    test_api_key = test_llm_request.api_key
    test_custom_config = test_llm_request.custom_config
    if test_llm_request.id:
        existing_provider = fetch_existing_llm_provider_by_id(
            id=test_llm_request.id, db_session=db_session
        )
        if existing_provider:
            test_custom_config = _restore_masked_custom_config_values(
                existing_custom_config=existing_provider.custom_config,
                new_custom_config=test_custom_config,
            )
        # if an API key is not provided, use the existing provider's API key
        if existing_provider and not test_llm_request.api_key_changed:
            _validate_llm_provider_change(
                existing_api_base=existing_provider.api_base,
                existing_custom_config=existing_provider.custom_config,
                new_api_base=test_llm_request.api_base,
                new_custom_config=test_custom_config,
                api_key_changed=False,
            )
            test_api_key = (
                existing_provider.api_key.get_value(apply_mask=False)
                if existing_provider.api_key
                else None
            )
        if existing_provider and not test_llm_request.custom_config_changed:
            test_custom_config = existing_provider.custom_config

    # For this "testing" workflow, we do *not* need the actual `max_input_tokens`.
    # Therefore, instead of performing additional, more complex logic, we just use a dummy value
    max_input_tokens = -1

    llm = get_llm(
        provider=test_llm_request.provider,
        model=test_llm_request.model,
        api_key=test_api_key,
        api_base=test_llm_request.api_base,
        api_version=test_llm_request.api_version,
        custom_config=test_custom_config,
        deployment_name=test_llm_request.deployment_name,
        max_input_tokens=max_input_tokens,
    )

    error_msg = test_llm(llm)

    if error_msg:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, error_msg)


@admin_router.post("/test/default")
def test_default_provider(
    _: User = Depends(current_admin_user),
) -> None:
    try:
        llm = get_default_llm()
    except ValueError:
        logger.exception("Failed to fetch default LLM Provider")
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No LLM Provider setup")

    error = test_llm(llm)
    if error:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(error))


@admin_router.get("/provider")
def list_llm_providers(
    include_image_gen: bool = Query(False),
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> LLMProviderResponse[LLMProviderView]:
    start_time = datetime.now(timezone.utc)
    logger.debug("Starting to fetch LLM providers")

    llm_provider_list: list[LLMProviderView] = []
    for llm_provider_model in fetch_existing_llm_providers(
        db_session=db_session,
        flow_type_filter=[],
        exclude_image_generation_providers=not include_image_gen,
    ):
        from_model_start = datetime.now(timezone.utc)
        full_llm_provider = LLMProviderView.from_model(llm_provider_model)
        from_model_end = datetime.now(timezone.utc)
        from_model_duration = (from_model_end - from_model_start).total_seconds()
        logger.debug(
            f"LLMProviderView.from_model took {from_model_duration:.2f} seconds"
        )

        _mask_provider_credentials(full_llm_provider)
        llm_provider_list.append(full_llm_provider)

    end_time = datetime.now(timezone.utc)
    duration = (end_time - start_time).total_seconds()
    logger.debug(f"Completed fetching LLM providers in {duration:.2f} seconds")

    return LLMProviderResponse[LLMProviderView].from_models(
        providers=llm_provider_list,
        default_text=DefaultModel.from_model_config(
            fetch_default_llm_model(db_session)
        ),
        default_vision=DefaultModel.from_model_config(
            fetch_default_vision_model(db_session)
        ),
    )


@admin_router.put("/provider")
def put_llm_provider(
    llm_provider_upsert_request: LLMProviderUpsertRequest,
    is_creation: bool = Query(
        False,
        description="True if creating a new one, False if updating an existing provider",
    ),
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> LLMProviderView:
    # validate request (e.g. if we're intending to create but the name already exists we should throw an error)
    # NOTE: may involve duplicate fetching to Postgres, but we're assuming SQLAlchemy is smart enough to cache
    # the result
    existing_provider = None
    if llm_provider_upsert_request.id:
        existing_provider = fetch_existing_llm_provider_by_id(
            id=llm_provider_upsert_request.id, db_session=db_session
        )

    # Check name constraints
    # TODO: Once port from name to id is complete, unique name will no longer be required
    if existing_provider and llm_provider_upsert_request.name != existing_provider.name:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "Renaming providers is not currently supported",
        )

    found_provider = fetch_existing_llm_provider(
        name=llm_provider_upsert_request.name, db_session=db_session
    )
    if found_provider is not None and found_provider is not existing_provider:
        raise OnyxError(
            OnyxErrorCode.DUPLICATE_RESOURCE,
            f"Provider with name={llm_provider_upsert_request.name} already exists",
        )

    if existing_provider and is_creation:
        raise OnyxError(
            OnyxErrorCode.DUPLICATE_RESOURCE,
            f"LLM Provider with name {llm_provider_upsert_request.name} and id={llm_provider_upsert_request.id} already exists",
        )
    elif not existing_provider and not is_creation:
        raise OnyxError(
            OnyxErrorCode.NOT_FOUND,
            f"LLM Provider with name {llm_provider_upsert_request.name} and id={llm_provider_upsert_request.id} does not exist",
        )

    # SSRF Protection: Validate api_base and custom_config match stored values
    if existing_provider:
        llm_provider_upsert_request.custom_config = (
            _restore_masked_custom_config_values(
                existing_custom_config=existing_provider.custom_config,
                new_custom_config=llm_provider_upsert_request.custom_config,
            )
        )
        _validate_llm_provider_change(
            existing_api_base=existing_provider.api_base,
            existing_custom_config=existing_provider.custom_config,
            new_api_base=llm_provider_upsert_request.api_base,
            new_custom_config=llm_provider_upsert_request.custom_config,
            api_key_changed=llm_provider_upsert_request.api_key_changed,
        )

    persona_ids = llm_provider_upsert_request.personas
    if persona_ids:
        _fetched_persona_ids, missing_personas = validate_persona_ids_exist(
            db_session, persona_ids
        )
        if missing_personas:
            raise OnyxError(
                OnyxErrorCode.VALIDATION_ERROR,
                f"Invalid persona IDs: {', '.join(map(str, missing_personas))}",
            )
        # Remove duplicates while preserving order
        seen: set[int] = set()
        deduplicated_personas: list[int] = []
        for persona_id in persona_ids:
            if persona_id not in seen:
                seen.add(persona_id)
                deduplicated_personas.append(persona_id)
        llm_provider_upsert_request.personas = deduplicated_personas

    # the llm api key is sanitized when returned to clients, so the only time we
    # should get a real key is when it is explicitly changed
    if existing_provider and not llm_provider_upsert_request.api_key_changed:
        llm_provider_upsert_request.api_key = (
            existing_provider.api_key.get_value(apply_mask=False)
            if existing_provider.api_key
            else None
        )
    if existing_provider and not llm_provider_upsert_request.custom_config_changed:
        llm_provider_upsert_request.custom_config = existing_provider.custom_config

    # Check if we're transitioning to Auto mode
    transitioning_to_auto_mode = llm_provider_upsert_request.is_auto_mode and (
        not existing_provider or not existing_provider.is_auto_mode
    )

    # When transitioning to auto mode, preserve existing model configurations
    # so the upsert doesn't try to delete them (which would trip the default
    # model protection guard). sync_auto_mode_models will handle the model
    # lifecycle afterward — adding new models, hiding removed ones, and
    # updating the default. This is safe even if sync fails: the provider
    # keeps its old models and default rather than losing them.
    if transitioning_to_auto_mode and existing_provider:
        llm_provider_upsert_request.model_configurations = [
            ModelConfigurationUpsertRequest.from_model(mc)
            for mc in existing_provider.model_configurations
        ]

    try:
        result = upsert_llm_provider(
            llm_provider_upsert_request=llm_provider_upsert_request,
            db_session=db_session,
        )

        # If newly enabling Auto mode, sync models immediately from GitHub config
        if transitioning_to_auto_mode:
            from onyx.db.llm import sync_auto_mode_models

            config = fetch_llm_recommendations_from_github()
            if config and llm_provider_upsert_request.provider in config.providers:
                updated_provider = fetch_existing_llm_provider_by_id(
                    id=result.id, db_session=db_session
                )
                if updated_provider:
                    sync_auto_mode_models(
                        db_session,
                        updated_provider,
                        config,
                    )
                    # Refresh result with synced models
                    result = LLMProviderView.from_model(updated_provider)

        _mask_provider_credentials(result)
        return result
    except ValueError as e:
        logger.exception("Failed to upsert LLM Provider")
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))


@admin_router.delete("/provider/{provider_id}")
def delete_llm_provider(
    provider_id: int,
    force: bool = Query(False),
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    if not force:
        model = fetch_default_llm_model(db_session)

        if model and model.llm_provider_id == provider_id:
            raise OnyxError(
                OnyxErrorCode.VALIDATION_ERROR,
                "Cannot delete the default LLM provider",
            )

    try:
        remove_llm_provider(db_session, provider_id)
    except ValueError as e:
        raise OnyxError(OnyxErrorCode.NOT_FOUND, str(e))


@admin_router.post("/default")
def set_provider_as_default(
    default_model_request: DefaultModel,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_default_provider(
        provider_id=default_model_request.provider_id,
        model_name=default_model_request.model_name,
        db_session=db_session,
    )


@admin_router.post("/default-vision")
def set_provider_as_default_vision(
    default_model: DefaultModel,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_default_vision_provider(
        provider_id=default_model.provider_id,
        vision_model=default_model.model_name,
        db_session=db_session,
    )


@admin_router.get("/auto-config")
def get_auto_config(
    _: User = Depends(current_admin_user),
) -> dict:
    """Get the current Auto mode configuration from GitHub.

    Returns the available models and default configurations for each
    supported provider type when using Auto mode.
    """
    config = fetch_llm_recommendations_from_github()
    if not config:
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            "Failed to fetch configuration from GitHub",
        )
    return config.model_dump()


@admin_router.get("/vision-providers")
def get_vision_capable_providers(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> LLMProviderResponse[VisionProviderResponse]:
    """Return a list of LLM providers and their models that support image input"""
    vision_models = fetch_existing_models(
        db_session=db_session, flow_types=[LLMModelFlowType.VISION]
    )

    # Group vision models by provider ID (using ID as key since it's hashable)
    provider_models: dict[int, list[str]] = defaultdict(list)
    providers_by_id: dict[int, LLMProviderView] = {}

    for vision_model in vision_models:
        provider_id = vision_model.llm_provider.id
        provider_models[provider_id].append(vision_model.name)
        # Only create the view once per provider
        if provider_id not in providers_by_id:
            provider_view = LLMProviderView.from_model(vision_model.llm_provider)
            _mask_provider_credentials(provider_view)
            providers_by_id[provider_id] = provider_view

    # Build response list
    vision_provider_response = [
        VisionProviderResponse(
            **providers_by_id[provider_id].model_dump(),
            vision_models=model_names,
        )
        for provider_id, model_names in provider_models.items()
    ]

    logger.debug(f"Found {len(vision_provider_response)} vision-capable providers")

    return LLMProviderResponse[VisionProviderResponse].from_models(
        providers=vision_provider_response,
        default_vision=DefaultModel.from_model_config(
            fetch_default_vision_model(db_session)
        ),
    )


"""Endpoints for all"""


@basic_router.get("/provider")
def list_llm_provider_basics(
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
) -> LLMProviderResponse[LLMProviderDescriptor]:
    """Get LLM providers accessible to the current user.

    Returns:
    - All public providers (is_public=True) - Always included
    - Restricted providers user can access via their group memberships

    For anonymous users or no_auth mode: returns only public providers
    This ensures backward compatibility while providing better UX for authenticated users.
    """
    start_time = datetime.now(timezone.utc)
    logger.debug("Starting to fetch user-accessible LLM providers")

    all_providers = fetch_existing_llm_providers(db_session, [])
    user_group_ids = fetch_user_group_ids(db_session, user)
    is_admin = user.role == UserRole.ADMIN

    accessible_providers = []

    for provider in all_providers:
        # Use centralized access control logic with persona=None since we're
        # listing providers without a specific persona context. This correctly:
        # - Includes public providers WITHOUT persona restrictions
        # - Includes providers user can access via group membership
        # - Excludes providers with persona restrictions (requires specific persona)
        # - Excludes non-public providers with no restrictions (admin-only)
        if can_user_access_llm_provider(
            provider, user_group_ids, persona=None, is_admin=is_admin
        ):
            accessible_providers.append(LLMProviderDescriptor.from_model(provider))

    end_time = datetime.now(timezone.utc)
    duration = (end_time - start_time).total_seconds()
    logger.debug(
        f"Completed fetching {len(accessible_providers)} user-accessible providers in {duration:.2f} seconds"
    )

    return LLMProviderResponse[LLMProviderDescriptor].from_models(
        providers=accessible_providers,
        default_text=DefaultModel.from_model_config(
            fetch_default_llm_model(db_session)
        ),
        default_vision=DefaultModel.from_model_config(
            fetch_default_vision_model(db_session)
        ),
    )


def get_valid_model_names_for_persona(
    persona_id: int,
    user: User,
    db_session: Session,
) -> list[str]:
    """Get all valid model names that a user can access for this persona.

    Returns a list of model names (e.g., ["gpt-4o", "claude-3-5-sonnet"]) that are
    available to the user when using this persona, respecting all RBAC restrictions.
    Public providers are included unless they have persona restrictions that exclude this persona.
    """
    persona = fetch_persona_with_groups(db_session, persona_id)
    if not persona:
        return []

    is_admin = user.role == UserRole.ADMIN
    all_providers = fetch_existing_llm_providers(
        db_session, [LLMModelFlowType.CHAT, LLMModelFlowType.VISION]
    )
    user_group_ids = set() if is_admin else fetch_user_group_ids(db_session, user)

    valid_models = []
    for llm_provider_model in all_providers:
        # Check access with persona context — respects all RBAC restrictions
        if can_user_access_llm_provider(
            llm_provider_model, user_group_ids, persona, is_admin=is_admin
        ):
            # Collect all model names from this provider
            for model_config in llm_provider_model.model_configurations:
                if model_config.is_visible:
                    valid_models.append(model_config.name)

    return valid_models


@basic_router.get("/persona/{persona_id}/providers")
def list_llm_providers_for_persona(
    persona_id: int,
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
) -> LLMProviderResponse[LLMProviderDescriptor]:
    """Get LLM providers for a specific persona.

    Returns providers that the user can access when using this persona:
    - Public providers (respecting persona restrictions if set)
    - Restricted providers user can access via group/persona restrictions

    This endpoint is used for background fetching of restricted providers
    and should NOT block the UI.
    """
    start_time = datetime.now(timezone.utc)
    logger.debug(f"Starting to fetch LLM providers for persona {persona_id}")

    persona = fetch_persona_with_groups(db_session, persona_id)
    if not persona:
        raise OnyxError(OnyxErrorCode.PERSONA_NOT_FOUND, "Persona not found")

    # Verify user has access to this persona
    if not user_can_access_persona(db_session, persona_id, user, get_editable=False):
        raise OnyxError(
            OnyxErrorCode.INSUFFICIENT_PERMISSIONS,
            "You don't have access to this assistant",
        )

    is_admin = user.role == UserRole.ADMIN
    all_providers = fetch_existing_llm_providers(
        db_session, [LLMModelFlowType.CHAT, LLMModelFlowType.VISION]
    )
    user_group_ids = set() if is_admin else fetch_user_group_ids(db_session, user)

    llm_provider_list: list[LLMProviderDescriptor] = []

    for llm_provider_model in all_providers:
        # Check access with persona context — respects persona restrictions
        if can_user_access_llm_provider(
            llm_provider_model, user_group_ids, persona, is_admin=is_admin
        ):
            llm_provider_list.append(
                LLMProviderDescriptor.from_model(llm_provider_model)
            )

    end_time = datetime.now(timezone.utc)
    duration = (end_time - start_time).total_seconds()
    logger.debug(
        f"Completed fetching {len(llm_provider_list)} LLM providers for persona {persona_id} in {duration:.2f} seconds"
    )

    # Get the default model and vision model for the persona
    # TODO: Port persona's over to use ID
    persona_default_provider = persona.llm_model_provider_override
    persona_default_model = persona.llm_model_version_override

    default_text_model = fetch_default_llm_model(db_session)
    default_vision_model = fetch_default_vision_model(db_session)

    # Build default_text and default_vision using persona overrides when available,
    # falling back to the global defaults.
    default_text = DefaultModel.from_model_config(default_text_model)
    default_vision = DefaultModel.from_model_config(default_vision_model)

    if persona_default_provider:
        provider = fetch_existing_llm_provider(persona_default_provider, db_session)
        if provider and can_user_access_llm_provider(
            provider, user_group_ids, persona, is_admin=is_admin
        ):
            if persona_default_model:
                # Persona specifies both provider and model — use them directly
                default_text = DefaultModel(
                    provider_id=provider.id,
                    model_name=persona_default_model,
                )
            else:
                # Persona specifies only the provider — pick a visible (public) model,
                # falling back to any model on this provider
                visible_model = next(
                    (mc for mc in provider.model_configurations if mc.is_visible),
                    None,
                )
                fallback_model = visible_model or next(
                    iter(provider.model_configurations), None
                )
                if fallback_model:
                    default_text = DefaultModel(
                        provider_id=provider.id,
                        model_name=fallback_model.name,
                    )

    return LLMProviderResponse[LLMProviderDescriptor].from_models(
        providers=llm_provider_list,
        default_text=default_text,
        default_vision=default_vision,
    )


@admin_router.get("/provider-contextual-cost")
def get_provider_contextual_cost(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[LLMCost]:
    """
    Get the cost of Re-indexing all documents for contextual retrieval.

    See https://docs.litellm.ai/docs/completion/token_usage#5-cost_per_token
    This includes:
    - The cost of invoking the LLM on each chunk-document pair to get
      - the doc_summary
      - the chunk_context
    - The per-token cost of the LLM used to generate the doc_summary and chunk_context
    """
    providers = fetch_existing_llm_providers(db_session, [LLMModelFlowType.CHAT])
    costs = []
    for provider in providers:
        for model_configuration in provider.model_configurations:
            llm_provider = LLMProviderView.from_model(provider)
            llm = get_llm(
                provider=provider.provider,
                model=model_configuration.name,
                deployment_name=provider.deployment_name,
                api_key=(
                    provider.api_key.get_value(apply_mask=False)
                    if provider.api_key
                    else None
                ),
                api_base=provider.api_base,
                api_version=provider.api_version,
                custom_config=provider.custom_config,
                max_input_tokens=get_max_input_tokens_from_llm_provider(
                    llm_provider=llm_provider, model_name=model_configuration.name
                ),
            )
            cost = get_llm_contextual_cost(llm)
            costs.append(
                LLMCost(
                    provider=provider.name,
                    model_name=model_configuration.name,
                    cost=cost,
                )
            )

    return costs


@admin_router.post("/bedrock/available-models")
def get_bedrock_available_models(
    request: BedrockModelsRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[BedrockFinalModelResponse]:
    """Fetch available Bedrock models for a specific region and credentials.

    Returns model IDs with display names from AWS. Prefers inference profiles
    (for cross-region support) over base models when available.
    """
    try:
        # Precedence: bearer → keys → IAM
        if request.aws_bearer_token_bedrock:
            try:
                os.environ["AWS_BEARER_TOKEN_BEDROCK"] = (
                    request.aws_bearer_token_bedrock
                )
                session = boto3.Session(region_name=request.aws_region_name)
            finally:
                os.environ.pop("AWS_BEARER_TOKEN_BEDROCK", None)
        elif request.aws_access_key_id and request.aws_secret_access_key:
            session = boto3.Session(
                aws_access_key_id=request.aws_access_key_id,
                aws_secret_access_key=request.aws_secret_access_key,
                region_name=request.aws_region_name,
            )
        else:
            session = boto3.Session(region_name=request.aws_region_name)

        try:
            bedrock = session.client("bedrock")
        except Exception as e:
            raise OnyxError(
                OnyxErrorCode.CREDENTIAL_INVALID,
                f"Failed to create Bedrock client: {e}. Check AWS credentials and region.",
            )

        # Build model info dict from foundation models (modelId -> metadata)
        model_summaries = bedrock.list_foundation_models().get("modelSummaries", [])
        model_info: dict[str, ModelMetadata] = {}
        available_models: set[str] = set()

        for model in model_summaries:
            model_id = model.get("modelId", "")
            # Skip invalid or non-LLM models (embeddings, image gen, non-streaming)
            if not is_valid_bedrock_model(
                model_id, model.get("responseStreamingSupported", False)
            ):
                continue

            available_models.add(model_id)
            input_modalities = model.get("inputModalities", [])
            model_info[model_id] = {
                "display_name": model.get("modelName", model_id),
                "supports_image_input": "IMAGE" in input_modalities,
            }

        # Get inference profiles (cross-region) - these are preferred over base models
        profile_ids: set[str] = set()
        cross_region_models: set[str] = set()
        try:
            inference_profiles = bedrock.list_inference_profiles(
                typeEquals="SYSTEM_DEFINED"
            ).get("inferenceProfileSummaries", [])
            for profile in inference_profiles:
                if not (profile_id := profile.get("inferenceProfileId")):
                    continue
                # Skip non-LLM inference profiles
                if not is_valid_bedrock_model(profile_id):
                    continue

                profile_ids.add(profile_id)

                # Extract base model ID (everything after first period)
                # e.g., "us.anthropic.claude-3-5-sonnet-..." -> "anthropic.claude-3-5-sonnet-..."
                if "." in profile_id:
                    base_model_id = profile_id.split(".", 1)[1]
                    cross_region_models.add(base_model_id)
                    region = profile_id.split(".")[0]

                    # Copy model info from base model to profile, with region suffix
                    if base_model_id in model_info:
                        base_info = model_info[base_model_id]
                        model_info[profile_id] = {
                            "display_name": f"{base_info['display_name']} ({region})",
                            "supports_image_input": base_info["supports_image_input"],
                        }
                    else:
                        # Base model not in region - infer metadata from profile
                        profile_name = profile.get("inferenceProfileName", "")
                        model_info[profile_id] = {
                            "display_name": (
                                f"{profile_name} ({region})"
                                if profile_name
                                else generate_bedrock_display_name(profile_id)
                            ),
                            # Infer vision support from known vision models
                            "supports_image_input": infer_vision_support(profile_id),
                        }
        except Exception as e:
            logger.warning(f"Couldn't fetch inference profiles for Bedrock: {e}")

        # Prefer profiles: de-dupe available models, then add profile IDs
        candidates = (available_models - cross_region_models) | profile_ids

        # Build response with display names
        results: list[BedrockFinalModelResponse] = []
        for model_id in sorted(candidates, reverse=True):
            info: ModelMetadata | None = model_info.get(model_id)
            display_name = info["display_name"] if info else None

            # Fallback: generate display name from model ID if not available
            if not display_name or display_name == model_id:
                display_name = generate_bedrock_display_name(model_id)

            results.append(
                BedrockFinalModelResponse(
                    name=model_id,
                    display_name=display_name,
                    max_input_tokens=get_bedrock_token_limit(model_id),
                    supports_image_input=(
                        info["supports_image_input"] if info else False
                    ),
                )
            )

        # Sync new models to DB if provider_name is specified
        if request.provider_name:
            _sync_fetched_models(
                db_session=db_session,
                provider_name=request.provider_name,
                models=[
                    SyncModelEntry(
                        name=r.name,
                        display_name=r.display_name,
                        max_input_tokens=r.max_input_tokens,
                        supports_image_input=r.supports_image_input,
                    )
                    for r in results
                ],
                source_label="Bedrock",
            )

        return results

    except (ClientError, NoCredentialsError, BotoCoreError) as e:
        raise OnyxError(
            OnyxErrorCode.CREDENTIAL_INVALID,
            f"Failed to connect to AWS Bedrock: {e}",
        )
    except Exception as e:
        raise OnyxError(
            OnyxErrorCode.INTERNAL_ERROR,
            f"Unexpected error fetching Bedrock models: {e}",
        )


def _get_ollama_available_model_names(api_base: str) -> set[str]:
    """Fetch available model names from Ollama server."""
    tags_url = f"{api_base}/api/tags"
    try:
        response = httpx.get(tags_url, timeout=5.0)
        response.raise_for_status()
        response_json = response.json()
    except Exception as e:
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            f"Failed to fetch Ollama models: {e}",
        )

    models = response_json.get("models", [])
    return {model.get("name") for model in models if model.get("name")}


@admin_router.post("/ollama/available-models")
def get_ollama_available_models(
    request: OllamaModelsRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[OllamaFinalModelResponse]:
    """Fetch the list of available models from an Ollama server."""

    cleaned_api_base = request.api_base.strip().rstrip("/")
    if not cleaned_api_base:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "API base URL is required to fetch Ollama models.",
        )

    # NOTE: most people run Ollama locally, so we don't disallow internal URLs
    # the only way this could be used for SSRF is if there's another endpoint that
    # is not protected + exposes sensitive information on the `/api/tags` endpoint
    # with the same response format
    model_names = _get_ollama_available_model_names(cleaned_api_base)
    if not model_names:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No models found from your Ollama server",
        )

    all_models_with_context_size_and_vision: list[OllamaFinalModelResponse] = []
    show_url = f"{cleaned_api_base}/api/show"

    for model_name in model_names:
        context_limit: int | None = None
        supports_image_input: bool | None = None
        try:
            show_response = httpx.post(
                show_url,
                json={"model": model_name},
                timeout=5.0,
            )
            show_response.raise_for_status()
            show_response_json = show_response.json()

            # Parse the response into the expected format
            ollama_model_details = OllamaModelDetails.model_validate(show_response_json)

            # Check if this model supports completion/chat
            if not ollama_model_details.supports_completion():
                continue

            # Optimistically access. Context limit is stored as "model_architecture.context" = int
            architecture = ollama_model_details.model_info.get(
                "general.architecture", ""
            )
            context_limit = ollama_model_details.model_info.get(
                architecture + ".context_length", None
            )
            supports_image_input = ollama_model_details.supports_image_input()
        except ValidationError as e:
            logger.warning(
                "Invalid model details from Ollama server",
                extra={"model": model_name, "validation_error": str(e)},
            )
        except Exception as e:
            logger.warning(
                "Failed to fetch Ollama model details",
                extra={"model": model_name, "error": str(e)},
            )

        # Note: context_limit may be None if Ollama API doesn't provide it.
        # The runtime will use LiteLLM fallback logic to determine max tokens.
        all_models_with_context_size_and_vision.append(
            OllamaFinalModelResponse(
                name=model_name,
                display_name=generate_ollama_display_name(model_name),
                max_input_tokens=context_limit,
                supports_image_input=supports_image_input or False,
            )
        )

    sorted_results = sorted(
        all_models_with_context_size_and_vision,
        key=lambda m: m.name.lower(),
    )

    # Sync new models to DB if provider_name is specified
    if request.provider_name:
        _sync_fetched_models(
            db_session=db_session,
            provider_name=request.provider_name,
            models=[
                SyncModelEntry(
                    name=r.name,
                    display_name=r.display_name,
                    max_input_tokens=r.max_input_tokens,
                    supports_image_input=r.supports_image_input,
                )
                for r in sorted_results
            ],
            source_label="Ollama",
        )

    return sorted_results


def _get_openrouter_models_response(api_base: str, api_key: str) -> dict:
    """Perform GET to OpenRouter /models and return parsed JSON."""
    cleaned_api_base = api_base.strip().rstrip("/")
    url = f"{cleaned_api_base}/models"
    headers = {
        "Authorization": f"Bearer {api_key}",
        # Optional headers recommended by OpenRouter for attribution
        "HTTP-Referer": "https://onyx.app",
        "X-Title": "Onyx",
    }
    try:
        response = httpx.get(url, headers=headers, timeout=10.0)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            f"Failed to fetch OpenRouter models: {e}",
        )


@admin_router.post("/openrouter/available-models")
def get_openrouter_available_models(
    request: OpenRouterModelsRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[OpenRouterFinalModelResponse]:
    """Fetch available models from OpenRouter `/models` endpoint.

    Parses id, name (display), context_length, and architecture.input_modalities.
    """

    response_json = _get_openrouter_models_response(
        api_base=request.api_base, api_key=request.api_key
    )

    data = response_json.get("data", [])
    if not isinstance(data, list) or len(data) == 0:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No models found from your OpenRouter endpoint",
        )

    results: list[OpenRouterFinalModelResponse] = []
    for item in data:
        try:
            model_details = OpenRouterModelDetails.model_validate(item)

            # NOTE: This should be removed if we ever support dynamically fetching embedding models.
            if model_details.is_embedding_model:
                continue

            # Strip vendor prefix since we group by vendor (e.g., "Microsoft: Phi 4" → "Phi 4")
            display_name = strip_openrouter_vendor_prefix(
                model_details.display_name, model_details.id
            )

            # Treat context_length of 0 as unknown (None)
            context_length = model_details.context_length or None

            results.append(
                OpenRouterFinalModelResponse(
                    name=model_details.id,
                    display_name=display_name,
                    max_input_tokens=context_length,
                    supports_image_input=model_details.supports_image_input,
                )
            )
        except Exception as e:
            logger.warning(
                "Failed to parse OpenRouter model entry",
                extra={"error": str(e), "item": str(item)[:1000]},
            )

    if not results:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No compatible models found from OpenRouter",
        )

    sorted_results = sorted(results, key=lambda m: m.name.lower())

    # Sync new models to DB if provider_name is specified
    if request.provider_name:
        _sync_fetched_models(
            db_session=db_session,
            provider_name=request.provider_name,
            models=[
                SyncModelEntry(
                    name=r.name,
                    display_name=r.display_name,
                    max_input_tokens=r.max_input_tokens,
                    supports_image_input=r.supports_image_input,
                )
                for r in sorted_results
            ],
            source_label="OpenRouter",
        )

    return sorted_results


@admin_router.post("/lm-studio/available-models")
def get_lm_studio_available_models(
    request: LMStudioModelsRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[LMStudioFinalModelResponse]:
    """Fetch available models from an LM Studio server.

    Uses the LM Studio-native /api/v1/models endpoint which exposes
    rich metadata including capabilities (vision, reasoning),
    display names, and context lengths.
    """
    cleaned_api_base = request.api_base.strip().rstrip("/")
    # Strip /v1 suffix that users may copy from OpenAI-compatible tool configs;
    # the native metadata endpoint lives at /api/v1/models, not /v1/api/v1/models.
    cleaned_api_base = cleaned_api_base.removesuffix("/v1")
    if not cleaned_api_base:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "API base URL is required to fetch LM Studio models.",
        )

    # If provider_name is given and the api_key hasn't been changed by the user,
    # fall back to the stored API key from the database (the form value is masked).
    api_key = request.api_key
    if request.provider_name and not request.api_key_changed:
        existing_provider = fetch_existing_llm_provider(
            name=request.provider_name, db_session=db_session
        )
        if existing_provider and existing_provider.custom_config:
            api_key = existing_provider.custom_config.get(LM_STUDIO_API_KEY_CONFIG_KEY)

    url = f"{cleaned_api_base}/api/v1/models"
    headers: dict[str, str] = {}
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"

    try:
        response = httpx.get(url, headers=headers, timeout=10.0)
        response.raise_for_status()
        response_json = response.json()
    except Exception as e:
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            f"Failed to fetch LM Studio models: {e}",
        )

    models = response_json.get("models", [])
    if not isinstance(models, list) or len(models) == 0:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No models found from your LM Studio server.",
        )

    results: list[LMStudioFinalModelResponse] = []
    for item in models:
        # Filter to LLM-type models only (skip embeddings, etc.)
        if item.get("type") != "llm":
            continue

        model_key = item.get("key")
        if not model_key:
            continue

        display_name = item.get("display_name") or model_key
        max_context_length = item.get("max_context_length")
        capabilities = item.get("capabilities") or {}

        results.append(
            LMStudioFinalModelResponse(
                name=model_key,
                display_name=display_name,
                max_input_tokens=max_context_length,
                supports_image_input=capabilities.get("vision", False),
                supports_reasoning=capabilities.get("reasoning", False)
                or is_reasoning_model(model_key, display_name),
            )
        )

    if not results:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No compatible models found from LM Studio server.",
        )

    sorted_results = sorted(results, key=lambda m: m.name.lower())

    # Sync new models to DB if provider_name is specified
    if request.provider_name:
        _sync_fetched_models(
            db_session=db_session,
            provider_name=request.provider_name,
            models=[
                SyncModelEntry(
                    name=r.name,
                    display_name=r.display_name,
                    max_input_tokens=r.max_input_tokens,
                    supports_image_input=r.supports_image_input,
                )
                for r in sorted_results
            ],
            source_label="LM Studio",
        )

    return sorted_results


@admin_router.post("/litellm/available-models")
def get_litellm_available_models(
    request: LitellmModelsRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[LitellmFinalModelResponse]:
    """Fetch available models from Litellm proxy /v1/models endpoint."""
    response_json = _get_litellm_models_response(
        api_key=request.api_key, api_base=request.api_base
    )

    models = response_json.get("data", [])
    if not isinstance(models, list) or len(models) == 0:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No models found from your Litellm endpoint",
        )

    results: list[LitellmFinalModelResponse] = []
    for model in models:
        try:
            model_details = LitellmModelDetails.model_validate(model)

            # Skip embedding models
            if is_embedding_model(model_details.id):
                continue

            results.append(
                LitellmFinalModelResponse(
                    provider_name=model_details.owned_by,
                    model_name=model_details.id,
                )
            )
        except Exception as e:
            logger.warning(
                "Failed to parse Litellm model entry",
                extra={"error": str(e), "item": str(model)[:1000]},
            )

    if not results:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No compatible models found from Litellm",
        )

    sorted_results = sorted(results, key=lambda m: m.model_name.lower())

    # Sync new models to DB if provider_name is specified
    if request.provider_name:
        _sync_fetched_models(
            db_session=db_session,
            provider_name=request.provider_name,
            models=[
                SyncModelEntry(
                    name=r.model_name,
                    display_name=r.model_name,
                )
                for r in sorted_results
            ],
            source_label="LiteLLM",
        )

    return sorted_results


def _get_litellm_models_response(api_key: str, api_base: str) -> dict:
    """Perform GET to Litellm proxy /api/v1/models and return parsed JSON."""
    cleaned_api_base = api_base.strip().rstrip("/")
    url = f"{cleaned_api_base}/v1/models"

    return _get_openai_compatible_models_response(
        url=url,
        source_name="LiteLLM proxy",
        api_key=api_key,
    )


def _get_openai_compatible_models_response(
    url: str,
    source_name: str,
    api_key: str | None = None,
) -> dict:
    """Fetch model metadata from an OpenAI-compatible `/models` endpoint."""
    headers = {
        "Authorization": f"Bearer {api_key}",
        "HTTP-Referer": "https://onyx.app",
        "X-Title": "Onyx",
    }
    if not api_key:
        headers.pop("Authorization")

    try:
        response = httpx.get(url, headers=headers, timeout=10.0)
        response.raise_for_status()
        return response.json()
    except httpx.HTTPStatusError as e:
        if e.response.status_code == 401:
            raise OnyxError(
                OnyxErrorCode.VALIDATION_ERROR,
                f"Authentication failed: invalid or missing API key for {source_name}.",
            )
        elif e.response.status_code == 404:
            raise OnyxError(
                OnyxErrorCode.VALIDATION_ERROR,
                f"{source_name} models endpoint not found at {url}. Please verify the API base URL.",
            )
        else:
            raise OnyxError(
                OnyxErrorCode.BAD_GATEWAY,
                f"Failed to fetch {source_name} models: {e}",
            )
    except httpx.RequestError as e:
        logger.warning(
            "Failed to fetch models from OpenAI-compatible endpoint",
            extra={"source": source_name, "url": url, "error": str(e)},
            exc_info=True,
        )
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            f"Failed to fetch {source_name} models: {e}",
        )
    except ValueError as e:
        logger.warning(
            "Received invalid model response from OpenAI-compatible endpoint",
            extra={"source": source_name, "url": url, "error": str(e)},
            exc_info=True,
        )
        raise OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            f"Failed to fetch {source_name} models: {e}",
        )


@admin_router.post("/bifrost/available-models")
def get_bifrost_available_models(
    request: BifrostModelsRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[BifrostFinalModelResponse]:
    """Fetch available models from Bifrost gateway /v1/models endpoint."""
    response_json = _get_bifrost_models_response(
        api_base=request.api_base, api_key=request.api_key
    )

    models = response_json.get("data", [])
    if not isinstance(models, list) or len(models) == 0:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No models found from your Bifrost endpoint",
        )

    results: list[BifrostFinalModelResponse] = []
    for model in models:
        try:
            model_id = model.get("id", "")
            model_name = model.get("name", model_id)

            if not model_id:
                continue

            # Skip embedding models
            if is_embedding_model(model_id):
                continue

            results.append(
                BifrostFinalModelResponse(
                    name=model_id,
                    display_name=model_name,
                    max_input_tokens=model.get("context_length"),
                    supports_image_input=infer_vision_support(model_id),
                    supports_reasoning=is_reasoning_model(model_id, model_name),
                )
            )
        except Exception as e:
            logger.warning(
                "Failed to parse Bifrost model entry",
                extra={"error": str(e), "item": str(model)[:1000]},
            )

    if not results:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No compatible models found from Bifrost",
        )

    sorted_results = sorted(results, key=lambda m: m.name.lower())

    # Sync new models to DB if provider_name is specified
    if request.provider_name:
        _sync_fetched_models(
            db_session=db_session,
            provider_name=request.provider_name,
            models=[
                SyncModelEntry(
                    name=r.name,
                    display_name=r.display_name,
                    max_input_tokens=r.max_input_tokens,
                    supports_image_input=r.supports_image_input,
                )
                for r in sorted_results
            ],
            source_label="Bifrost",
        )

    return sorted_results


def _get_bifrost_models_response(api_base: str, api_key: str | None = None) -> dict:
    """Perform GET to Bifrost /v1/models and return parsed JSON."""
    cleaned_api_base = api_base.strip().rstrip("/")
    # Ensure we hit /v1/models
    if cleaned_api_base.endswith("/v1"):
        url = f"{cleaned_api_base}/models"
    else:
        url = f"{cleaned_api_base}/v1/models"

    return _get_openai_compatible_models_response(
        url=url,
        source_name="Bifrost",
        api_key=api_key,
    )


================================================
FILE: backend/onyx/server/manage/llm/models.py
================================================
from __future__ import annotations

from typing import Any
from typing import Generic
from typing import TYPE_CHECKING
from typing import TypeVar

from pydantic import BaseModel
from pydantic import Field
from pydantic import field_validator

from onyx.db.enums import LLMModelFlowType
from onyx.llm.utils import get_max_input_tokens
from onyx.llm.utils import litellm_thinks_model_supports_image_input
from onyx.llm.utils import model_is_reasoning_model
from onyx.server.manage.llm.utils import DYNAMIC_LLM_PROVIDERS
from onyx.server.manage.llm.utils import extract_vendor_from_model_name
from onyx.server.manage.llm.utils import filter_model_configurations
from onyx.server.manage.llm.utils import is_reasoning_model


if TYPE_CHECKING:
    from onyx.db.models import (
        LLMProvider as LLMProviderModel,
        ModelConfiguration as ModelConfigurationModel,
    )

T = TypeVar("T", "LLMProviderDescriptor", "LLMProviderView", "VisionProviderResponse")


class TestLLMRequest(BaseModel):
    # provider level
    id: int | None = None
    provider: str
    model: str
    api_key: str | None = None
    api_base: str | None = None
    api_version: str | None = None
    custom_config: dict[str, str] | None = None

    # model level
    deployment_name: str | None = None

    # if try and use the existing API/custom config key
    api_key_changed: bool
    custom_config_changed: bool

    @field_validator("provider", mode="before")
    @classmethod
    def normalize_provider(cls, value: str) -> str:
        """Normalize provider name by stripping whitespace and lowercasing."""
        return value.strip().lower()


class LLMProviderDescriptor(BaseModel):
    """A descriptor for an LLM provider that can be safely viewed by
    non-admin users. Used when giving a list of available LLMs."""

    id: int
    name: str
    provider: str
    provider_display_name: str  # Human-friendly name like "Claude (Anthropic)"
    model_configurations: list["ModelConfigurationView"]

    @classmethod
    def from_model(
        cls,
        llm_provider_model: "LLMProviderModel",
    ) -> "LLMProviderDescriptor":
        from onyx.llm.well_known_providers.llm_provider_options import (
            get_provider_display_name,
        )

        provider = llm_provider_model.provider

        return cls(
            id=llm_provider_model.id,
            name=llm_provider_model.name,
            provider=provider,
            provider_display_name=get_provider_display_name(provider),
            model_configurations=filter_model_configurations(
                llm_provider_model.model_configurations, provider
            ),
        )


class LLMProvider(BaseModel):
    name: str
    provider: str
    api_key: str | None = None
    api_base: str | None = None
    api_version: str | None = None
    custom_config: dict[str, str] | None = None
    is_public: bool = True
    is_auto_mode: bool = False
    groups: list[int] = Field(default_factory=list)
    personas: list[int] = Field(default_factory=list)
    deployment_name: str | None = None


class LLMProviderUpsertRequest(LLMProvider):
    # should only be used for a "custom" provider
    # for default providers, the built-in model names are used
    id: int | None = None
    api_key_changed: bool = False
    custom_config_changed: bool = False
    model_configurations: list["ModelConfigurationUpsertRequest"] = []

    @field_validator("provider", mode="before")
    @classmethod
    def normalize_provider(cls, value: str) -> str:
        """Normalize provider name by stripping whitespace and lowercasing."""
        return value.strip().lower()


class LLMProviderView(LLMProvider):
    """Stripped down representation of LLMProvider for display / limited access info only"""

    id: int
    model_configurations: list["ModelConfigurationView"]

    @classmethod
    def from_model(
        cls,
        llm_provider_model: "LLMProviderModel",
    ) -> "LLMProviderView":
        # Safely get groups - handle detached instance case
        try:
            groups = [group.id for group in llm_provider_model.groups]
        except Exception:
            # If groups relationship can't be loaded (detached instance), use empty list
            groups = []
        # Safely get personas - similar handling as groups
        try:
            personas = [persona.id for persona in llm_provider_model.personas]
        except Exception:
            personas = []

        provider = llm_provider_model.provider

        return cls(
            id=llm_provider_model.id,
            name=llm_provider_model.name,
            provider=provider,
            api_key=(
                llm_provider_model.api_key.get_value(apply_mask=False)
                if llm_provider_model.api_key
                else None
            ),
            api_base=llm_provider_model.api_base,
            api_version=llm_provider_model.api_version,
            custom_config=llm_provider_model.custom_config,
            is_public=llm_provider_model.is_public,
            is_auto_mode=llm_provider_model.is_auto_mode,
            groups=groups,
            personas=personas,
            deployment_name=llm_provider_model.deployment_name,
            model_configurations=filter_model_configurations(
                llm_provider_model.model_configurations, provider
            ),
        )


class ModelConfigurationUpsertRequest(BaseModel):
    name: str
    is_visible: bool
    max_input_tokens: int | None = None
    supports_image_input: bool | None = None
    display_name: str | None = None  # For dynamic providers, from source API

    @classmethod
    def from_model(
        cls, model_configuration_model: "ModelConfigurationModel"
    ) -> "ModelConfigurationUpsertRequest":
        return cls(
            name=model_configuration_model.name,
            is_visible=model_configuration_model.is_visible,
            max_input_tokens=model_configuration_model.max_input_tokens,
            supports_image_input=model_configuration_model.supports_image_input,
            display_name=model_configuration_model.display_name,
        )


class ModelConfigurationView(BaseModel):
    name: str
    is_visible: bool
    max_input_tokens: int | None = None
    supports_image_input: bool
    supports_reasoning: bool = False
    display_name: str | None = None
    provider_display_name: str | None = None
    vendor: str | None = None
    version: str | None = None
    region: str | None = None

    @classmethod
    def from_model(
        cls,
        model_configuration_model: "ModelConfigurationModel",
        provider_name: str,
    ) -> "ModelConfigurationView":
        # For dynamic providers (OpenRouter, Bedrock, Ollama), use the display_name
        # stored in DB from the source API. Skip LiteLLM parsing entirely.
        if (
            provider_name in DYNAMIC_LLM_PROVIDERS
            and model_configuration_model.display_name
        ):
            # Extract vendor from model name for grouping (e.g., "Anthropic", "OpenAI")
            vendor = extract_vendor_from_model_name(
                model_configuration_model.name, provider_name
            )

            return cls(
                name=model_configuration_model.name,
                is_visible=model_configuration_model.is_visible,
                max_input_tokens=model_configuration_model.max_input_tokens,
                supports_image_input=(
                    LLMModelFlowType.VISION
                    in model_configuration_model.llm_model_flow_types
                ),
                # Infer reasoning support from model name/display name
                supports_reasoning=is_reasoning_model(
                    model_configuration_model.name,
                    model_configuration_model.display_name or "",
                ),
                display_name=model_configuration_model.display_name,
                provider_display_name=None,  # Not needed for dynamic providers
                vendor=vendor,
                version=None,
                region=None,
            )

        # For static providers (OpenAI, Anthropic, etc.), use LiteLLM enrichments
        from onyx.llm.model_name_parser import parse_litellm_model_name

        # Parse the model name to get display information
        # Include provider prefix if not already present (enrichments use full keys like "vertex_ai/...")
        model_name = model_configuration_model.name
        if provider_name and not model_name.startswith(f"{provider_name}/"):
            model_name = f"{provider_name}/{model_name}"
        parsed = parse_litellm_model_name(model_name)

        # Include region in display name for Bedrock cross-region models
        display_name = (
            f"{parsed.display_name} ({parsed.region})"
            if parsed.region
            else parsed.display_name
        )

        return cls(
            name=model_configuration_model.name,
            is_visible=model_configuration_model.is_visible,
            max_input_tokens=(
                model_configuration_model.max_input_tokens
                or get_max_input_tokens(
                    model_name=model_configuration_model.name,
                    model_provider=provider_name,
                )
            ),
            supports_image_input=(
                True
                if LLMModelFlowType.VISION
                in model_configuration_model.llm_model_flow_types
                else litellm_thinks_model_supports_image_input(
                    model_configuration_model.name, provider_name
                )
            ),
            supports_reasoning=model_is_reasoning_model(
                model_configuration_model.name, provider_name
            ),
            # Populate display fields from parsed model name
            display_name=display_name,
            provider_display_name=parsed.provider_display_name,
            vendor=parsed.vendor,
            version=parsed.version,
            region=parsed.region,
        )


class VisionProviderResponse(LLMProviderView):
    """Response model for vision providers endpoint, including vision-specific fields."""

    vision_models: list[str]


class LLMCost(BaseModel):
    provider: str
    model_name: str
    cost: float


class BedrockModelsRequest(BaseModel):
    aws_region_name: str
    aws_access_key_id: str | None = None
    aws_secret_access_key: str | None = None
    aws_bearer_token_bedrock: str | None = None
    provider_name: str | None = None  # Optional: to save models to existing provider


class BedrockFinalModelResponse(BaseModel):
    name: str  # Model ID (e.g., "anthropic.claude-3-5-sonnet-20241022-v2:0")
    display_name: str  # Human-readable name from AWS (e.g., "Claude 3.5 Sonnet v2")
    max_input_tokens: int  # From LiteLLM, our mapping, or default 32000
    supports_image_input: bool


class OllamaModelsRequest(BaseModel):
    api_base: str
    provider_name: str | None = None  # Optional: to save models to existing provider


class OllamaFinalModelResponse(BaseModel):
    name: str
    display_name: str  # Generated from model name (e.g., "llama3:7b" → "Llama 3 7B")
    max_input_tokens: int | None  # From Ollama API or None if unavailable
    supports_image_input: bool


class OllamaModelDetails(BaseModel):
    """Response model for Ollama /api/show endpoint"""

    model_info: dict[str, Any]
    capabilities: list[str] = []

    def supports_completion(self) -> bool:
        """Check if this model supports completion/chat"""
        return "completion" in self.capabilities

    def supports_image_input(self) -> bool:
        """Check if this model supports image input"""
        return "vision" in self.capabilities


# OpenRouter dynamic models fetch
class OpenRouterModelsRequest(BaseModel):
    api_base: str
    api_key: str
    provider_name: str | None = None  # Optional: to save models to existing provider


class OpenRouterModelDetails(BaseModel):
    """Response model for OpenRouter /api/v1/models endpoint"""

    # This is used to ignore any extra fields that are returned from the API
    model_config = {"extra": "ignore"}

    id: str
    # OpenRouter API returns "name" but we use "display_name" for consistency
    display_name: str = Field(alias="name")
    # context_length may be missing or 0 for some models
    context_length: int | None = None
    architecture: dict[str, Any] = {}  # Contains 'input_modalities' key

    @property
    def supports_image_input(self) -> bool:
        input_modalities = self.architecture.get("input_modalities", [])
        return isinstance(input_modalities, list) and "image" in input_modalities

    @property
    def is_embedding_model(self) -> bool:
        output_modalities = self.architecture.get("output_modalities", [])
        return isinstance(output_modalities, list) and "embeddings" in output_modalities


class OpenRouterFinalModelResponse(BaseModel):
    name: str  # Model ID (e.g., "openai/gpt-5-pro")
    display_name: str  # Human-readable name from OpenRouter API
    max_input_tokens: (
        int | None
    )  # From OpenRouter API context_length (may be missing for some models)
    supports_image_input: bool


# LM Studio dynamic models fetch
class LMStudioModelsRequest(BaseModel):
    api_base: str
    api_key: str | None = None
    api_key_changed: bool = False
    provider_name: str | None = None  # Optional: to save models to existing provider


class LMStudioFinalModelResponse(BaseModel):
    name: str  # Model ID from LM Studio (e.g., "lmstudio-community/Meta-Llama-3-8B")
    display_name: str  # Human-readable name
    max_input_tokens: int | None  # From LM Studio API or None if unavailable
    supports_image_input: bool
    supports_reasoning: bool


class DefaultModel(BaseModel):
    provider_id: int
    model_name: str

    @classmethod
    def from_model_config(
        cls, model_config: ModelConfigurationModel | None
    ) -> DefaultModel | None:
        if not model_config:
            return None
        return cls(
            provider_id=model_config.llm_provider_id,
            model_name=model_config.name,
        )


class LLMProviderResponse(BaseModel, Generic[T]):
    providers: list[T]
    default_text: DefaultModel | None = None
    default_vision: DefaultModel | None = None

    @classmethod
    def from_models(
        cls,
        providers: list[T],
        default_text: DefaultModel | None = None,
        default_vision: DefaultModel | None = None,
    ) -> LLMProviderResponse[T]:
        return cls(
            providers=providers,
            default_text=default_text,
            default_vision=default_vision,
        )


class SyncModelEntry(BaseModel):
    """Typed model for syncing fetched models to the DB."""

    name: str
    display_name: str
    max_input_tokens: int | None = None
    supports_image_input: bool = False


class LitellmModelsRequest(BaseModel):
    api_key: str
    api_base: str
    provider_name: str | None = None  # Optional: to save models to existing provider


class LitellmModelDetails(BaseModel):
    """Response model for Litellm proxy /api/v1/models endpoint"""

    id: str  # Model ID (e.g. "gpt-4o")
    object: str  # "model"
    created: int  # Unix timestamp in seconds
    owned_by: str  # Provider name (e.g. "openai")


class LitellmFinalModelResponse(BaseModel):
    provider_name: str  # Provider name (e.g. "openai")
    model_name: str  # Model ID (e.g. "gpt-4o")


# Bifrost dynamic models fetch
class BifrostModelsRequest(BaseModel):
    api_base: str
    api_key: str | None = None
    provider_name: str | None = None  # Optional: to save models to existing provider


class BifrostFinalModelResponse(BaseModel):
    name: str  # Model ID in provider/model format (e.g. "anthropic/claude-sonnet-4-6")
    display_name: str  # Human-readable name from Bifrost API
    max_input_tokens: int | None
    supports_image_input: bool
    supports_reasoning: bool


================================================
FILE: backend/onyx/server/manage/llm/utils.py
================================================
"""
LLM Provider Utilities

Utilities for dynamic LLM providers (Bedrock, Ollama, OpenRouter):
- Display name generation from model identifiers
- Model validation and filtering
- Vision/reasoning capability inference
"""

import re
from typing import TypedDict

from onyx.llm.constants import BEDROCK_MODEL_NAME_MAPPINGS
from onyx.llm.constants import LlmProviderNames
from onyx.llm.constants import MODEL_PREFIX_TO_VENDOR
from onyx.llm.constants import OLLAMA_MODEL_NAME_MAPPINGS
from onyx.llm.constants import OLLAMA_MODEL_TO_VENDOR
from onyx.llm.constants import PROVIDER_DISPLAY_NAMES


# Dynamic providers fetch models directly from source APIs (not LiteLLM)
DYNAMIC_LLM_PROVIDERS = frozenset(
    {
        LlmProviderNames.OPENROUTER,
        LlmProviderNames.BEDROCK,
        LlmProviderNames.OLLAMA_CHAT,
        LlmProviderNames.LM_STUDIO,
        LlmProviderNames.BIFROST,
    }
)


class ModelMetadata(TypedDict):
    """Metadata about a model from the provider API."""

    display_name: str
    supports_image_input: bool


# Non-LLM model patterns to filter out (image gen, embeddings, etc.)
NON_LLM_PATTERNS = frozenset({"embed", "stable-", "titan-image", "titan-embed"})

# Known Bedrock vision-capable models (for fallback when base model not in region)
BEDROCK_VISION_MODELS = frozenset(
    {
        "anthropic.claude-3",
        "anthropic.claude-4",
        "amazon.nova-pro",
        "amazon.nova-lite",
        "amazon.nova-premier",
    }
)

# Known Bifrost/OpenAI-compatible vision-capable model families where the
# source API does not expose this metadata directly.
BIFROST_VISION_MODEL_FAMILIES = frozenset(
    {
        "anthropic/claude-3",
        "anthropic/claude-4",
        "amazon/nova-pro",
        "amazon/nova-lite",
        "amazon/nova-premier",
        "openai/gpt-4o",
        "openai/gpt-4.1",
        "google/gemini",
        "meta-llama/llama-3.2",
        "mistral/pixtral",
        "qwen/qwen2.5-vl",
        "qwen/qwen-vl",
    }
)


def is_valid_bedrock_model(
    model_id: str,
    supports_streaming: bool = True,
) -> bool:
    """Check if a Bedrock model ID is a valid LLM model.

    Args:
        model_id: The model ID to check
        supports_streaming: Whether the model supports streaming (required for LLMs)

    Returns:
        True if the model is a valid LLM, False otherwise
    """
    if not model_id:
        return False
    if any(pattern in model_id.lower() for pattern in NON_LLM_PATTERNS):
        return False
    if not supports_streaming:
        return False
    return True


def infer_vision_support(model_id: str) -> bool:
    """Infer vision support from model ID when base model metadata unavailable.

    Used for providers like Bedrock and Bifrost where vision support may
    need to be inferred from vendor/model naming conventions.
    """
    model_id_lower = model_id.lower()
    if any(vision_model in model_id_lower for vision_model in BEDROCK_VISION_MODELS):
        return True

    normalized_model_id = model_id_lower.replace(".", "/")
    return any(
        vision_model in normalized_model_id
        for vision_model in BIFROST_VISION_MODEL_FAMILIES
    )


def generate_bedrock_display_name(model_id: str) -> str:
    """Generate a human-friendly display name for a Bedrock model ID.

    Examples:
        "anthropic.claude-3-5-sonnet-20241022-v2:0" → "Claude 3.5 Sonnet v2"
        "us.anthropic.claude-3-5-sonnet-..." → "Claude 3.5 Sonnet (us)"
        "meta.llama3-70b-instruct-v1:0" → "Llama 3 70B Instruct"
    """
    # Check for region prefix (us., eu., global., etc.)
    region = None
    if "." in model_id:
        parts = model_id.split(".", 1)
        if parts[0] in ("us", "eu", "global", "ap", "apac"):
            region = parts[0]
            model_id = parts[1]

    # Remove provider prefix (anthropic., meta., amazon., etc.)
    if "." in model_id:
        model_id = model_id.split(".", 1)[1]

    # Remove version suffix (:0, :1, etc.) and date stamps
    model_id = re.sub(r":\d+$", "", model_id)
    model_id = re.sub(r"-\d{8}-v\d+", "", model_id)  # -20241022-v2
    model_id = re.sub(r"-v\d+:\d+$", "", model_id)  # -v1:0
    model_id = re.sub(r"-v\d+$", "", model_id)  # -v1

    # Convert to display name
    display_name = model_id.replace("-", " ").replace("_", " ")

    # Apply proper casing for known models
    display_lower = display_name.lower()
    for key, proper_name in BEDROCK_MODEL_NAME_MAPPINGS.items():
        if key in display_lower:
            # Find and replace with proper casing
            pattern = re.compile(re.escape(key), re.IGNORECASE)
            display_name = pattern.sub(proper_name, display_name)
            break

    # Clean up version numbers (e.g., "3 5" -> "3.5")
    display_name = re.sub(r"(\d) (\d)", r"\1.\2", display_name)

    # Title case and clean up
    words = display_name.split()
    result_words = []
    for word in words:
        if word.lower() in BEDROCK_MODEL_NAME_MAPPINGS:
            result_words.append(BEDROCK_MODEL_NAME_MAPPINGS[word.lower()])
        elif word.isdigit() or re.match(r"^\d+[bBkKmM]?$", word):
            result_words.append(word.upper() if word[-1:].lower() in "bkm" else word)
        elif word.lower() in ("instruct", "chat", "pro", "lite", "mini", "premier"):
            result_words.append(word.title())
        else:
            result_words.append(word.title() if not word[0].isupper() else word)

    display_name = " ".join(result_words)

    # Add region suffix if present
    if region:
        display_name = f"{display_name} ({region})"

    return display_name


def generate_ollama_display_name(model_name: str) -> str:
    """Generate a human-friendly display name for an Ollama model.

    Examples:
        "llama3:latest" → "Llama 3"
        "llama3.3:70b" → "Llama 3.3 70B"
        "qwen2.5:7b" → "Qwen 2.5 7B"
        "mistral:latest" → "Mistral"
        "deepseek-r1:14b" → "DeepSeek R1 14B"
    """
    # Split into base name and tag
    if ":" in model_name:
        base, tag = model_name.rsplit(":", 1)
    else:
        base, tag = model_name, ""

    # Try to match known model families and apply proper casing
    display_name = base
    base_lower = base.lower()
    for key, proper_name in OLLAMA_MODEL_NAME_MAPPINGS.items():
        if base_lower.startswith(key):
            # Replace the matched part with proper casing, keep the rest
            suffix = base[len(key) :]
            # Handle version numbers like "3", "3.3", "2.5"
            if suffix and suffix[0].isdigit():
                suffix = " " + suffix
            # Handle dashes like "-r1", "-coder"
            elif suffix.startswith("-"):
                suffix = " " + suffix[1:].title()
            display_name = proper_name + suffix
            break
    else:
        # Default: Title case with dashes converted to spaces
        display_name = base.replace("-", " ").title()

    # Process tag to extract size info (skip "latest")
    if tag and tag.lower() != "latest":
        # Extract size like "7b", "70b", "14b"
        size_match = re.match(r"^(\d+(?:\.\d+)?[bBmM])", tag)
        if size_match:
            size = size_match.group(1).upper()
            display_name = f"{display_name} {size}"

    return display_name


def strip_openrouter_vendor_prefix(display_name: str, model_id: str) -> str:
    """Strip redundant vendor prefix from OpenRouter display names.

    OpenRouter returns names like "Microsoft: Phi 4" but we already group
    by vendor, so strip the prefix to avoid redundancy.

    Examples:
        ("Microsoft: Phi 4", "microsoft/phi-4") → "Phi 4"
        ("Mistral: Mixtral 8x7B Instruct", "mistralai/mixtral-8x7b") → "Mixtral 8x7B Instruct"
        ("Claude 3.5 Sonnet", "anthropic/claude-3.5-sonnet") → "Claude 3.5 Sonnet" (no prefix)
    """
    # Extract vendor from model ID (first part before "/")
    if "/" not in model_id:
        return display_name

    vendor_from_id = model_id.split("/")[0].lower()

    # Check if display name starts with "Vendor: " pattern
    if ": " in display_name:
        prefix, rest = display_name.split(": ", 1)
        # Normalize both for comparison (remove spaces, dashes, underscores)
        prefix_normalized = prefix.lower().replace(" ", "").replace("-", "")
        vendor_normalized = vendor_from_id.replace("-", "").replace("_", "")

        # Match if prefix matches vendor (handles "Mistral" vs "mistralai", etc.)
        if (
            prefix_normalized == vendor_normalized
            or prefix_normalized.startswith(vendor_normalized)
            or vendor_normalized.startswith(prefix_normalized)
        ):
            return rest

    return display_name


# Reasoning model patterns for OpenRouter
REASONING_MODEL_PATTERNS = frozenset(
    {
        "o1",
        "o3",
        "o4",
        "gpt-5",
        "thinking",
        "reason",
        "deepseek-r1",
        "qwq",
    }
)


def is_reasoning_model(model_id: str, display_name: str) -> bool:
    """Check if a model is a reasoning/thinking model based on its ID or name.

    Used for OpenRouter and other dynamic providers where we need to infer
    reasoning capability from model identifiers.
    """
    combined = f"{model_id} {display_name}".lower()
    return any(pattern in combined for pattern in REASONING_MODEL_PATTERNS)


def extract_base_model_name(model: str) -> str | None:
    """Extract base model name by removing date suffixes.

    Returns None if no date suffix was found.
    """
    patterns = [
        r"-\d{8}$",  # -20250929
        r"-\d{4}-\d{2}-\d{2}$",  # -2024-08-06
        r"@\d{8}$",  # @20250219
    ]
    for pattern in patterns:
        if re.search(pattern, model):
            return re.sub(pattern, "", model)
    return None


def should_filter_as_dated_duplicate(
    model_name: str, all_model_names: set[str]
) -> bool:
    """Check if this model is a dated variant and a non-dated version exists."""
    base = extract_base_model_name(model_name)
    if base and base in all_model_names:
        return True
    return False


def filter_model_configurations(
    model_configurations: list,
    provider: str,
) -> list:
    """Filter out obsolete and dated duplicate models from configurations.

    Args:
        model_configurations: List of ModelConfiguration DB models
        provider: The provider name (e.g., "openai", "anthropic")

    Returns:
        List of ModelConfigurationView objects with obsolete/duplicate models removed
    """
    # Import here to avoid circular imports
    from onyx.llm.well_known_providers.llm_provider_options import is_obsolete_model
    from onyx.server.manage.llm.models import ModelConfigurationView

    all_model_names = {mc.name for mc in model_configurations}

    filtered_configs = []
    for model_configuration in model_configurations:
        # Skip obsolete models
        if is_obsolete_model(model_configuration.name, provider):
            continue
        # Skip dated duplicates when non-dated version exists
        if should_filter_as_dated_duplicate(model_configuration.name, all_model_names):
            continue
        filtered_configs.append(
            ModelConfigurationView.from_model(model_configuration, provider)
        )

    return filtered_configs


def extract_vendor_from_model_name(model_name: str, provider: str) -> str | None:
    """Extract vendor from model name for aggregator providers.

    Examples:
        - OpenRouter: "anthropic/claude-3-5-sonnet" → "Anthropic"
        - Bedrock: "anthropic.claude-3-5-sonnet-..." → "Anthropic"
        - Bedrock: "us.anthropic.claude-..." → "Anthropic"
        - Ollama: "llama3:70b" → "Meta"
        - Ollama: "qwen2.5:7b" → "Alibaba"
    """
    if provider in (LlmProviderNames.OPENROUTER, LlmProviderNames.BIFROST):
        # Format: "vendor/model-name" e.g., "anthropic/claude-3-5-sonnet"
        if "/" in model_name:
            vendor_key = model_name.split("/")[0].lower()
            return PROVIDER_DISPLAY_NAMES.get(vendor_key, vendor_key.title())

    elif provider == LlmProviderNames.BEDROCK:
        # Format: "vendor.model-name" or "region.vendor.model-name"
        parts = model_name.split(".")
        if len(parts) >= 2:
            # Check if first part is a region (us, eu, global, etc.)
            if parts[0] in ("us", "eu", "global", "ap", "apac"):
                vendor_key = parts[1].lower() if len(parts) > 2 else parts[0].lower()
            else:
                vendor_key = parts[0].lower()
            return PROVIDER_DISPLAY_NAMES.get(vendor_key, vendor_key.title())

    elif provider == LlmProviderNames.OLLAMA_CHAT:
        # Format: "model-name:tag" e.g., "llama3:70b", "qwen2.5:7b"
        # Extract base name (before colon)
        base_name = model_name.split(":")[0].lower()
        # Match against known model prefixes
        for prefix, vendor in OLLAMA_MODEL_TO_VENDOR.items():
            if base_name.startswith(prefix):
                return vendor
        # Fallback: capitalize the base name as vendor
        return base_name.split("-")[0].title()

    elif provider == LlmProviderNames.LM_STUDIO:
        # LM Studio model IDs can be paths like "publisher/model-name"
        # or simple names. Use MODEL_PREFIX_TO_VENDOR for matching.

        model_lower = model_name.lower()
        # Check for slash-separated vendor prefix first
        if "/" in model_lower:
            vendor_key = model_lower.split("/")[0]
            return PROVIDER_DISPLAY_NAMES.get(vendor_key, vendor_key.title())
        # Fallback to model prefix matching
        for prefix, vendor in MODEL_PREFIX_TO_VENDOR.items():
            if model_lower.startswith(prefix):
                return PROVIDER_DISPLAY_NAMES.get(vendor, vendor.title())
        return None

    return None


def is_embedding_model(model_name: str) -> bool:
    """Checks for if a model is an embedding model"""
    from litellm import get_model_info

    try:
        # get_model_info raises on unknown models
        # default to False
        model_info = get_model_info(model_name)
    except Exception:
        return False
    is_embedding_mode = model_info.get("mode") == "embedding"

    return is_embedding_mode


================================================
FILE: backend/onyx/server/manage/models.py
================================================
import re
from datetime import datetime
from enum import Enum
from typing import Any
from typing import TYPE_CHECKING

from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import field_validator
from pydantic import model_validator

from onyx.auth.schemas import UserRole
from onyx.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY
from onyx.configs.constants import AuthType
from onyx.context.search.models import SavedSearchSettings
from onyx.db.enums import DefaultAppMode
from onyx.db.enums import ThemePreference
from onyx.db.memory import MAX_MEMORIES_PER_USER
from onyx.db.models import AllowedAnswerFilters
from onyx.db.models import ChannelConfig
from onyx.db.models import SlackBot as SlackAppModel
from onyx.db.models import SlackChannelConfig as SlackChannelConfigModel
from onyx.db.models import StandardAnswer as StandardAnswerModel
from onyx.db.models import StandardAnswerCategory as StandardAnswerCategoryModel
from onyx.db.models import User
from onyx.onyxbot.slack.config import VALID_SLACK_FILTERS
from onyx.server.features.persona.models import FullPersonaSnapshot
from onyx.server.features.persona.models import PersonaSnapshot
from onyx.server.models import FullUserSnapshot
from onyx.server.models import InvitedUserSnapshot


if TYPE_CHECKING:
    pass


class EmailInviteStatus(str, Enum):
    SENT = "SENT"
    NOT_CONFIGURED = "NOT_CONFIGURED"
    SEND_FAILED = "SEND_FAILED"
    DISABLED = "DISABLED"


class BulkInviteResponse(BaseModel):
    invited_count: int
    email_invite_status: EmailInviteStatus


class VersionResponse(BaseModel):
    backend_version: str


class AuthTypeResponse(BaseModel):
    auth_type: AuthType
    # specifies whether the current auth setup requires
    # users to have verified emails
    requires_verification: bool
    anonymous_user_enabled: bool | None = None
    password_min_length: int
    # whether there are any users in the system
    has_users: bool = True
    oauth_enabled: bool = False


class UserSpecificAssistantPreference(BaseModel):
    disabled_tool_ids: list[int]


UserSpecificAssistantPreferences = dict[int, UserSpecificAssistantPreference]


class UserPreferences(BaseModel):
    chosen_assistants: list[int] | None = None
    hidden_assistants: list[int] = []
    visible_assistants: list[int] = []
    default_model: str | None = None
    pinned_assistants: list[int] | None = None
    shortcut_enabled: bool | None = None

    # These will default to workspace settings on the frontend if not set
    auto_scroll: bool | None = None
    temperature_override_enabled: bool | None = None
    theme_preference: ThemePreference | None = None
    chat_background: str | None = None
    default_app_mode: DefaultAppMode = DefaultAppMode.CHAT

    # Voice preferences
    voice_auto_send: bool | None = None
    voice_auto_playback: bool | None = None
    voice_playback_speed: float | None = None

    # controls which tools are enabled for the user for a specific assistant
    assistant_specific_configs: UserSpecificAssistantPreferences | None = None


class MemoryItem(BaseModel):
    id: int | None = None
    content: str


class UserPersonalization(BaseModel):
    name: str = ""
    role: str = ""
    use_memories: bool = True
    enable_memory_tool: bool = True
    memories: list[MemoryItem] = Field(default_factory=list)
    user_preferences: str = ""


class TenantSnapshot(BaseModel):
    tenant_id: str
    number_of_users: int


class TenantInfo(BaseModel):
    invitation: TenantSnapshot | None = None
    new_tenant: TenantSnapshot | None = None


class UserInfo(BaseModel):
    id: str
    email: str
    is_active: bool
    is_superuser: bool
    is_verified: bool
    role: UserRole
    preferences: UserPreferences
    personalization: UserPersonalization = Field(default_factory=UserPersonalization)
    oidc_expiry: datetime | None = None
    current_token_created_at: datetime | None = None
    current_token_expiry_length: int | None = None
    is_cloud_superuser: bool = False
    team_name: str | None = None
    is_anonymous_user: bool | None = None
    password_configured: bool | None = None
    tenant_info: TenantInfo | None = None

    @classmethod
    def from_model(
        cls,
        user: User,
        current_token_created_at: datetime | None = None,
        expiry_length: int | None = None,
        is_cloud_superuser: bool = False,
        team_name: str | None = None,
        is_anonymous_user: bool | None = None,
        tenant_info: TenantInfo | None = None,
        assistant_specific_configs: UserSpecificAssistantPreferences | None = None,
        memories: list[MemoryItem] | None = None,
    ) -> "UserInfo":
        return cls(
            id=str(user.id),
            email=user.email,
            is_active=user.is_active,
            is_superuser=user.is_superuser,
            is_verified=user.is_verified,
            role=user.role,
            password_configured=user.password_configured,
            preferences=(
                UserPreferences(
                    shortcut_enabled=user.shortcut_enabled,
                    chosen_assistants=user.chosen_assistants,
                    default_model=user.default_model,
                    hidden_assistants=user.hidden_assistants,
                    pinned_assistants=user.pinned_assistants,
                    visible_assistants=user.visible_assistants,
                    auto_scroll=user.auto_scroll,
                    temperature_override_enabled=user.temperature_override_enabled,
                    theme_preference=user.theme_preference,
                    chat_background=user.chat_background,
                    default_app_mode=user.default_app_mode,
                    voice_auto_send=user.voice_auto_send,
                    voice_auto_playback=user.voice_auto_playback,
                    voice_playback_speed=user.voice_playback_speed,
                    assistant_specific_configs=assistant_specific_configs,
                )
            ),
            team_name=team_name,
            # set to None if TRACK_EXTERNAL_IDP_EXPIRY is False so that we avoid cases
            # where they previously had this set + used OIDC, and now they switched to
            # basic auth are now constantly getting redirected back to the login page
            # since their "oidc_expiry is old"
            oidc_expiry=user.oidc_expiry if TRACK_EXTERNAL_IDP_EXPIRY else None,
            current_token_created_at=current_token_created_at,
            current_token_expiry_length=expiry_length,
            is_cloud_superuser=is_cloud_superuser,
            is_anonymous_user=is_anonymous_user,
            tenant_info=tenant_info,
            personalization=UserPersonalization(
                name=user.personal_name or "",
                role=user.personal_role or "",
                use_memories=user.use_memories,
                enable_memory_tool=user.enable_memory_tool,
                memories=memories or [],
                user_preferences=user.user_preferences or "",
            ),
        )


class UserByEmail(BaseModel):
    user_email: str


class UserRoleUpdateRequest(BaseModel):
    user_email: str
    new_role: UserRole
    explicit_override: bool = False


class UserRoleResponse(BaseModel):
    role: str


class BoostDoc(BaseModel):
    document_id: str
    semantic_id: str
    link: str
    boost: int
    hidden: bool


class BoostUpdateRequest(BaseModel):
    document_id: str
    boost: int


class HiddenUpdateRequest(BaseModel):
    document_id: str
    hidden: bool


class AutoScrollRequest(BaseModel):
    auto_scroll: bool | None


class ThemePreferenceRequest(BaseModel):
    theme_preference: ThemePreference


class DefaultAppModeRequest(BaseModel):
    default_app_mode: DefaultAppMode


class ChatBackgroundRequest(BaseModel):
    chat_background: str | None


class VoiceSettingsUpdateRequest(BaseModel):
    auto_send: bool | None = None
    auto_playback: bool | None = None
    playback_speed: float | None = Field(default=None, ge=0.5, le=2.0)


class PersonalizationUpdateRequest(BaseModel):
    name: str | None = None
    role: str | None = None
    use_memories: bool | None = None
    enable_memory_tool: bool | None = None
    memories: list[MemoryItem] | None = None
    user_preferences: str | None = Field(default=None, max_length=500)

    @field_validator("memories", mode="before")
    @classmethod
    def validate_memory_count(
        cls, value: list[MemoryItem] | None
    ) -> list[MemoryItem] | None:
        if value is not None and len(value) > MAX_MEMORIES_PER_USER:
            raise ValueError(f"Maximum of {MAX_MEMORIES_PER_USER} memories allowed")
        return value


class SlackBotCreationRequest(BaseModel):
    name: str
    enabled: bool

    bot_token: str
    app_token: str
    user_token: str | None = None


class SlackBotTokens(BaseModel):
    bot_token: str
    app_token: str
    user_token: str | None = None
    model_config = ConfigDict(frozen=True)


# TODO No longer in use, remove later
class SlackBotResponseType(str, Enum):
    QUOTES = "quotes"
    CITATIONS = "citations"


class SlackChannelConfigCreationRequest(BaseModel):
    slack_bot_id: int
    # currently, a persona is created for each Slack channel config
    # in the future, `document_sets` will probably be replaced
    # by an optional `PersonaSnapshot` object. Keeping it like this
    # for now for simplicity / speed of development
    document_sets: list[int] | None = None

    # NOTE: only one of `document_sets` / `persona_id` should be set
    persona_id: int | None = None

    channel_name: str
    respond_tag_only: bool = False
    respond_to_bots: bool = False
    is_ephemeral: bool = False
    show_continue_in_web_ui: bool = False
    enable_auto_filters: bool = False
    # If no team members, assume respond in the channel to everyone
    respond_member_group_list: list[str] = Field(default_factory=list)
    answer_filters: list[AllowedAnswerFilters] = Field(default_factory=list)
    # list of user emails
    follow_up_tags: list[str] | None = None
    response_type: SlackBotResponseType
    # XXX this is going away soon
    standard_answer_categories: list[int] = Field(default_factory=list)
    disabled: bool = False

    @field_validator("answer_filters", mode="before")
    @classmethod
    def validate_filters(cls, value: list[str]) -> list[str]:
        if any(test not in VALID_SLACK_FILTERS for test in value):
            raise ValueError(
                f"Slack Answer filters must be one of {VALID_SLACK_FILTERS}"
            )
        return value

    @model_validator(mode="after")
    def validate_document_sets_and_persona_id(
        self,
    ) -> "SlackChannelConfigCreationRequest":
        if self.document_sets and self.persona_id:
            raise ValueError("Only one of `document_sets` / `persona_id` should be set")

        return self


class SlackChannelConfig(BaseModel):
    slack_bot_id: int
    id: int
    persona: PersonaSnapshot | None
    channel_config: ChannelConfig
    # XXX this is going away soon
    standard_answer_categories: list["StandardAnswerCategory"]
    enable_auto_filters: bool
    is_default: bool

    @classmethod
    def from_model(
        cls, slack_channel_config_model: SlackChannelConfigModel
    ) -> "SlackChannelConfig":
        return cls(
            id=slack_channel_config_model.id,
            slack_bot_id=slack_channel_config_model.slack_bot_id,
            persona=(
                FullPersonaSnapshot.from_model(
                    slack_channel_config_model.persona, allow_deleted=True
                )
                if slack_channel_config_model.persona
                else None
            ),
            channel_config=slack_channel_config_model.channel_config,
            # XXX this is going away soon
            standard_answer_categories=[
                StandardAnswerCategory.from_model(standard_answer_category_model)
                for standard_answer_category_model in slack_channel_config_model.standard_answer_categories
            ],
            enable_auto_filters=slack_channel_config_model.enable_auto_filters,
            is_default=slack_channel_config_model.is_default,
        )


class SlackBot(BaseModel):
    """
    This model is identical to the SlackAppModel, but it contains
    a `configs_count` field to make it easier to fetch the number
    of SlackChannelConfigs associated with a SlackBot.
    """

    id: int
    name: str
    enabled: bool
    configs_count: int

    bot_token: str
    app_token: str
    user_token: str | None = None

    @classmethod
    def from_model(cls, slack_bot_model: SlackAppModel) -> "SlackBot":
        return cls(
            id=slack_bot_model.id,
            name=slack_bot_model.name,
            enabled=slack_bot_model.enabled,
            configs_count=len(slack_bot_model.slack_channel_configs),
            bot_token=(
                slack_bot_model.bot_token.get_value(apply_mask=True)
                if slack_bot_model.bot_token
                else ""
            ),
            app_token=(
                slack_bot_model.app_token.get_value(apply_mask=True)
                if slack_bot_model.app_token
                else ""
            ),
            user_token=(
                slack_bot_model.user_token.get_value(apply_mask=True)
                if slack_bot_model.user_token
                else None
            ),
        )


class FullModelVersionResponse(BaseModel):
    current_settings: SavedSearchSettings
    secondary_settings: SavedSearchSettings | None


class AllUsersResponse(BaseModel):
    accepted: list[FullUserSnapshot]
    invited: list[InvitedUserSnapshot]
    slack_users: list[FullUserSnapshot]
    accepted_pages: int
    invited_pages: int
    slack_users_pages: int


class SlackChannel(BaseModel):
    id: str
    name: str


"""
Standard Answer Models

ee only, but needs to be here since it's imported by non-ee models.
"""


class StandardAnswerCategoryCreationRequest(BaseModel):
    name: str


class StandardAnswerCategory(BaseModel):
    id: int
    name: str

    @classmethod
    def from_model(
        cls, standard_answer_category: StandardAnswerCategoryModel
    ) -> "StandardAnswerCategory":
        return cls(
            id=standard_answer_category.id,
            name=standard_answer_category.name,
        )


class StandardAnswer(BaseModel):
    id: int
    keyword: str
    answer: str
    categories: list[StandardAnswerCategory]
    match_regex: bool
    match_any_keywords: bool

    @classmethod
    def from_model(cls, standard_answer_model: StandardAnswerModel) -> "StandardAnswer":
        return cls(
            id=standard_answer_model.id,
            keyword=standard_answer_model.keyword,
            answer=standard_answer_model.answer,
            match_regex=standard_answer_model.match_regex,
            match_any_keywords=standard_answer_model.match_any_keywords,
            categories=[
                StandardAnswerCategory.from_model(standard_answer_category_model)
                for standard_answer_category_model in standard_answer_model.categories
            ],
        )


class StandardAnswerCreationRequest(BaseModel):
    keyword: str
    answer: str
    categories: list[int]
    match_regex: bool
    match_any_keywords: bool

    @field_validator("categories", mode="before")
    @classmethod
    def validate_categories(cls, value: list[int]) -> list[int]:
        if len(value) < 1:
            raise ValueError(
                "At least one category must be attached to a standard answer"
            )
        return value

    @model_validator(mode="after")
    def validate_only_match_any_if_not_regex(self) -> Any:
        if self.match_regex and self.match_any_keywords:
            raise ValueError(
                "Can only match any keywords in keyword mode, not regex mode"
            )

        return self

    @model_validator(mode="after")
    def validate_keyword_if_regex(self) -> Any:
        if not self.match_regex:
            # no validation for keywords
            return self

        try:
            re.compile(self.keyword)
            return self
        except re.error as err:
            if isinstance(err.pattern, bytes):
                raise ValueError(
                    f'invalid regex pattern r"{err.pattern.decode()}" in `keyword`: {err.msg}'
                )
            else:
                pattern = f'r"{err.pattern}"' if err.pattern is not None else ""
                raise ValueError(
                    " ".join(
                        ["invalid regex pattern", pattern, f"in `keyword`: {err.msg}"]
                    )
                )


class ContainerVersions(BaseModel):
    onyx: str
    relational_db: str
    index: str
    nginx: str


class AllVersions(BaseModel):
    stable: ContainerVersions
    dev: ContainerVersions
    migration: ContainerVersions


================================================
FILE: backend/onyx/server/manage/opensearch_migration/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.opensearch_migration import get_opensearch_migration_state
from onyx.db.opensearch_migration import get_opensearch_retrieval_state
from onyx.db.opensearch_migration import set_enable_opensearch_retrieval_with_commit
from onyx.server.manage.opensearch_migration.models import (
    OpenSearchMigrationStatusResponse,
)
from onyx.server.manage.opensearch_migration.models import (
    OpenSearchRetrievalStatusRequest,
)
from onyx.server.manage.opensearch_migration.models import (
    OpenSearchRetrievalStatusResponse,
)

admin_router = APIRouter(prefix="/admin/opensearch-migration")


@admin_router.get("/status")
def get_opensearch_migration_status(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> OpenSearchMigrationStatusResponse:
    (
        total_chunks_migrated,
        created_at,
        migration_completed_at,
        approx_chunk_count_in_vespa,
    ) = get_opensearch_migration_state(db_session)
    return OpenSearchMigrationStatusResponse(
        total_chunks_migrated=total_chunks_migrated,
        created_at=created_at,
        migration_completed_at=migration_completed_at,
        approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,
    )


@admin_router.get("/retrieval")
def get_opensearch_retrieval_status(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> OpenSearchRetrievalStatusResponse:
    enable_opensearch_retrieval = get_opensearch_retrieval_state(db_session)
    return OpenSearchRetrievalStatusResponse(
        enable_opensearch_retrieval=enable_opensearch_retrieval,
    )


@admin_router.put("/retrieval")
def set_opensearch_retrieval_status(
    request: OpenSearchRetrievalStatusRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> OpenSearchRetrievalStatusResponse:
    set_enable_opensearch_retrieval_with_commit(
        db_session, request.enable_opensearch_retrieval
    )
    return OpenSearchRetrievalStatusResponse(
        enable_opensearch_retrieval=request.enable_opensearch_retrieval,
    )


================================================
FILE: backend/onyx/server/manage/opensearch_migration/models.py
================================================
from datetime import datetime

from pydantic import BaseModel


class OpenSearchMigrationStatusResponse(BaseModel):
    model_config = {"frozen": True}
    total_chunks_migrated: int
    created_at: datetime | None
    migration_completed_at: datetime | None
    approx_chunk_count_in_vespa: int | None


class OpenSearchRetrievalStatusRequest(BaseModel):
    model_config = {"frozen": True}
    enable_opensearch_retrieval: bool


class OpenSearchRetrievalStatusResponse(BaseModel):
    model_config = {"frozen": True}
    enable_opensearch_retrieval: bool


================================================
FILE: backend/onyx/server/manage/search_settings.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import status
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.models import SearchSettingsCreationRequest
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.connector_credential_pair import resync_cc_pair
from onyx.db.engine.sql_engine import get_session
from onyx.db.index_attempt import expire_index_attempts
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import update_default_contextual_model
from onyx.db.llm import update_no_default_contextual_rag_provider
from onyx.db.models import IndexModelStatus
from onyx.db.models import User
from onyx.db.search_settings import create_search_settings
from onyx.db.search_settings import delete_search_settings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_embedding_provider_from_provider_type
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_current_search_settings
from onyx.db.search_settings import update_search_settings_status
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.file_processing.unstructured import delete_unstructured_api_key
from onyx.file_processing.unstructured import get_unstructured_api_key
from onyx.file_processing.unstructured import update_unstructured_api_key
from onyx.natural_language_processing.search_nlp_models import clean_model_name
from onyx.server.manage.embedding.models import SearchSettingsDeleteRequest
from onyx.server.manage.models import FullModelVersionResponse
from onyx.server.models import IdReturn
from onyx.server.utils_vector_db import require_vector_db
from onyx.utils.logger import setup_logger
from shared_configs.configs import ALT_INDEX_SUFFIX
from shared_configs.configs import MULTI_TENANT

router = APIRouter(prefix="/search-settings")
logger = setup_logger()


@router.post("/set-new-search-settings", dependencies=[Depends(require_vector_db)])
def set_new_search_settings(
    search_settings_new: SearchSettingsCreationRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> IdReturn:
    """
    Creates a new SearchSettings row and cancels the previous secondary indexing
    if any exists.
    """
    if search_settings_new.index_name:
        logger.warning("Index name was specified by request, this is not suggested")

    # Disallow contextual RAG for cloud deployments.
    if MULTI_TENANT and search_settings_new.enable_contextual_rag:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Contextual RAG disabled in Onyx Cloud",
        )

    # Validate cloud provider exists or create new LiteLLM provider.
    if search_settings_new.provider_type is not None:
        cloud_provider = get_embedding_provider_from_provider_type(
            db_session, provider_type=search_settings_new.provider_type
        )

        if cloud_provider is None:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
            )

    validate_contextual_rag_model(
        provider_name=search_settings_new.contextual_rag_llm_provider,
        model_name=search_settings_new.contextual_rag_llm_name,
        db_session=db_session,
    )

    search_settings = get_current_search_settings(db_session)

    if search_settings_new.index_name is None:
        # We define index name here.
        index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
        if (
            search_settings_new.model_name == search_settings.model_name
            and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
        ):
            index_name += ALT_INDEX_SUFFIX
        search_values = search_settings_new.model_dump()
        search_values["index_name"] = index_name
        new_search_settings_request = SavedSearchSettings(**search_values)
    else:
        new_search_settings_request = SavedSearchSettings(
            **search_settings_new.model_dump()
        )

    secondary_search_settings = get_secondary_search_settings(db_session)

    if secondary_search_settings:
        # Cancel any background indexing jobs.
        expire_index_attempts(
            search_settings_id=secondary_search_settings.id, db_session=db_session
        )

        # Mark previous model as a past model directly.
        update_search_settings_status(
            search_settings=secondary_search_settings,
            new_status=IndexModelStatus.PAST,
            db_session=db_session,
        )

    new_search_settings = create_search_settings(
        search_settings=new_search_settings_request, db_session=db_session
    )

    # Ensure the document indices have the new index immediately.
    document_indices = get_all_document_indices(search_settings, new_search_settings)
    for document_index in document_indices:
        document_index.ensure_indices_exist(
            primary_embedding_dim=search_settings.final_embedding_dim,
            primary_embedding_precision=search_settings.embedding_precision,
            secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
            secondary_index_embedding_precision=new_search_settings.embedding_precision,
        )

    # Pause index attempts for the currently in-use index to preserve resources.
    if DISABLE_INDEX_UPDATE_ON_SWAP:
        expire_index_attempts(
            search_settings_id=search_settings.id, db_session=db_session
        )
        for cc_pair in get_connector_credential_pairs(db_session):
            resync_cc_pair(
                cc_pair=cc_pair,
                search_settings_id=new_search_settings.id,
                db_session=db_session,
            )

    db_session.commit()
    return IdReturn(id=new_search_settings.id)


@router.post("/cancel-new-embedding", dependencies=[Depends(require_vector_db)])
def cancel_new_embedding(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    secondary_search_settings = get_secondary_search_settings(db_session)

    if secondary_search_settings:
        expire_index_attempts(
            search_settings_id=secondary_search_settings.id, db_session=db_session
        )

        update_search_settings_status(
            search_settings=secondary_search_settings,
            new_status=IndexModelStatus.PAST,
            db_session=db_session,
        )

        # remove the old index from the vector db
        primary_search_settings = get_current_search_settings(db_session)
        document_index = get_default_document_index(
            primary_search_settings, None, db_session
        )
        document_index.ensure_indices_exist(
            primary_embedding_dim=primary_search_settings.final_embedding_dim,
            primary_embedding_precision=primary_search_settings.embedding_precision,
            # just finished swap, no more secondary index
            secondary_index_embedding_dim=None,
            secondary_index_embedding_precision=None,
        )


@router.delete("/delete-search-settings")
def delete_search_settings_endpoint(
    deletion_request: SearchSettingsDeleteRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    try:
        delete_search_settings(
            db_session=db_session,
            search_settings_id=deletion_request.search_settings_id,
        )
    except ValueError as e:
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))


@router.get("/get-current-search-settings")
def get_current_search_settings_endpoint(
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> SavedSearchSettings:
    current_search_settings = get_current_search_settings(db_session)
    return SavedSearchSettings.from_db_model(current_search_settings)


@router.get("/get-secondary-search-settings")
def get_secondary_search_settings_endpoint(
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> SavedSearchSettings | None:
    secondary_search_settings = get_secondary_search_settings(db_session)
    if not secondary_search_settings:
        return None

    return SavedSearchSettings.from_db_model(secondary_search_settings)


@router.get("/get-all-search-settings")
def get_all_search_settings(
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> FullModelVersionResponse:
    current_search_settings = get_current_search_settings(db_session)
    secondary_search_settings = get_secondary_search_settings(db_session)
    return FullModelVersionResponse(
        current_settings=SavedSearchSettings.from_db_model(current_search_settings),
        secondary_settings=(
            SavedSearchSettings.from_db_model(secondary_search_settings)
            if secondary_search_settings
            else None
        ),
    )


# Updates current non-reindex search settings
@router.post("/update-inference-settings")
def update_saved_search_settings(
    search_settings: SavedSearchSettings,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    # Disallow contextual RAG for cloud deployments
    if MULTI_TENANT and search_settings.enable_contextual_rag:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Contextual RAG disabled in Onyx Cloud",
        )

    validate_contextual_rag_model(
        provider_name=search_settings.contextual_rag_llm_provider,
        model_name=search_settings.contextual_rag_llm_name,
        db_session=db_session,
    )

    update_current_search_settings(
        search_settings=search_settings, db_session=db_session
    )

    logger.info(
        f"Updated current search settings to {search_settings.model_dump_json()}"
    )

    # Re-sync default to match PRESENT search settings
    _sync_default_contextual_model(db_session)


@router.get("/unstructured-api-key-set")
def unstructured_api_key_set(
    _: User = Depends(current_admin_user),
) -> bool:
    api_key = get_unstructured_api_key()
    return api_key is not None


@router.put("/upsert-unstructured-api-key")
def upsert_unstructured_api_key(
    unstructured_api_key: str,
    _: User = Depends(current_admin_user),
) -> None:
    update_unstructured_api_key(unstructured_api_key)


@router.delete("/delete-unstructured-api-key")
def delete_unstructured_api_key_endpoint(
    _: User = Depends(current_admin_user),
) -> None:
    delete_unstructured_api_key()


def validate_contextual_rag_model(
    provider_name: str | None,
    model_name: str | None,
    db_session: Session,
) -> None:
    if error_msg := _validate_contextual_rag_model(
        provider_name=provider_name,
        model_name=model_name,
        db_session=db_session,
    ):
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=error_msg)


def _validate_contextual_rag_model(
    provider_name: str | None,
    model_name: str | None,
    db_session: Session,
) -> str | None:
    if provider_name is None and model_name is None:
        return None
    if not provider_name or not model_name:
        return "Provider name and model name are required"

    provider = fetch_existing_llm_provider(name=provider_name, db_session=db_session)
    if not provider:
        return f"Provider {provider_name} not found"
    model_config = next(
        (mc for mc in provider.model_configurations if mc.name == model_name), None
    )
    if not model_config:
        return f"Model {model_name} not found in provider {provider_name}"

    return None


def _sync_default_contextual_model(db_session: Session) -> None:
    """Syncs the default CONTEXTUAL_RAG flow to match the PRESENT search settings."""
    primary = get_current_search_settings(db_session)

    try:
        update_default_contextual_model(
            db_session=db_session,
            enable_contextual_rag=primary.enable_contextual_rag,
            contextual_rag_llm_provider=primary.contextual_rag_llm_provider,
            contextual_rag_llm_name=primary.contextual_rag_llm_name,
        )
    except ValueError as e:
        logger.error(
            f"Error syncing default contextual model, defaulting to no contextual model: {e}"
        )
        update_no_default_contextual_rag_provider(
            db_session=db_session,
        )


================================================
FILE: backend/onyx/server/manage/slack_bot.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.configs.constants import MilestoneRecordType
from onyx.db.constants import SLACK_BOT_PERSONA_PREFIX
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import ChannelConfig
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
from onyx.db.slack_bot import fetch_slack_bot
from onyx.db.slack_bot import fetch_slack_bots
from onyx.db.slack_bot import insert_slack_bot
from onyx.db.slack_bot import remove_slack_bot
from onyx.db.slack_bot import update_slack_bot
from onyx.db.slack_channel_config import create_slack_channel_persona
from onyx.db.slack_channel_config import fetch_slack_channel_config
from onyx.db.slack_channel_config import fetch_slack_channel_configs
from onyx.db.slack_channel_config import insert_slack_channel_config
from onyx.db.slack_channel_config import remove_slack_channel_config
from onyx.db.slack_channel_config import update_slack_channel_config
from onyx.onyxbot.slack.config import validate_channel_name
from onyx.server.manage.models import SlackBot
from onyx.server.manage.models import SlackBotCreationRequest
from onyx.server.manage.models import SlackChannelConfig
from onyx.server.manage.models import SlackChannelConfigCreationRequest
from onyx.server.manage.validate_tokens import validate_app_token
from onyx.server.manage.validate_tokens import validate_bot_token
from onyx.server.manage.validate_tokens import validate_user_token
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import mt_cloud_telemetry
from shared_configs.contextvars import get_current_tenant_id

SLACK_API_CHANNELS_PER_PAGE = 100
SLACK_MAX_RETURNED_CHANNELS = 500

logger = setup_logger()


router = APIRouter(prefix="/manage")


def _form_channel_config(
    db_session: Session,
    slack_channel_config_creation_request: SlackChannelConfigCreationRequest,
    current_slack_channel_config_id: int | None,
) -> ChannelConfig:
    raw_channel_name = slack_channel_config_creation_request.channel_name
    respond_tag_only = slack_channel_config_creation_request.respond_tag_only
    respond_member_group_list = (
        slack_channel_config_creation_request.respond_member_group_list
    )
    answer_filters = slack_channel_config_creation_request.answer_filters
    follow_up_tags = slack_channel_config_creation_request.follow_up_tags

    try:
        cleaned_channel_name = validate_channel_name(
            db_session=db_session,
            channel_name=raw_channel_name,
            current_slack_channel_config_id=current_slack_channel_config_id,
            current_slack_bot_id=slack_channel_config_creation_request.slack_bot_id,
        )
    except ValueError as e:
        raise HTTPException(
            status_code=400,
            detail=str(e),
        )

    if respond_tag_only and respond_member_group_list:
        raise ValueError(
            "Cannot set OnyxBot to only respond to tags only and also respond to a predetermined set of users."
        )

    if (
        slack_channel_config_creation_request.is_ephemeral
        and slack_channel_config_creation_request.respond_member_group_list
    ):
        raise ValueError(
            "Cannot set OnyxBot to respond to users in a private (ephemeral) message "
            "and also respond to a selected list of users."
        )

    channel_config: ChannelConfig = {
        "channel_name": cleaned_channel_name,
    }
    if respond_tag_only is not None:
        channel_config["respond_tag_only"] = respond_tag_only
    if respond_member_group_list:
        channel_config["respond_member_group_list"] = respond_member_group_list
    if answer_filters:
        channel_config["answer_filters"] = answer_filters
    if follow_up_tags is not None:
        channel_config["follow_up_tags"] = follow_up_tags

    channel_config["show_continue_in_web_ui"] = (
        slack_channel_config_creation_request.show_continue_in_web_ui
    )

    channel_config["respond_to_bots"] = (
        slack_channel_config_creation_request.respond_to_bots
    )

    channel_config["is_ephemeral"] = slack_channel_config_creation_request.is_ephemeral

    channel_config["disabled"] = slack_channel_config_creation_request.disabled

    return channel_config


@router.post("/admin/slack-app/channel")
def create_slack_channel_config(
    slack_channel_config_creation_request: SlackChannelConfigCreationRequest,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> SlackChannelConfig:
    channel_config = _form_channel_config(
        db_session=db_session,
        slack_channel_config_creation_request=slack_channel_config_creation_request,
        current_slack_channel_config_id=None,
    )

    if channel_config["channel_name"] is None:
        raise HTTPException(
            status_code=400,
            detail="Channel name is required",
        )

    persona_id = None
    if slack_channel_config_creation_request.persona_id is not None:
        persona_id = slack_channel_config_creation_request.persona_id
    elif slack_channel_config_creation_request.document_sets:
        persona_id = create_slack_channel_persona(
            db_session=db_session,
            channel_name=channel_config["channel_name"],
            document_set_ids=slack_channel_config_creation_request.document_sets,
            existing_persona_id=None,
        ).id

    slack_channel_config_model = insert_slack_channel_config(
        db_session=db_session,
        slack_bot_id=slack_channel_config_creation_request.slack_bot_id,
        persona_id=persona_id,
        channel_config=channel_config,
        standard_answer_category_ids=slack_channel_config_creation_request.standard_answer_categories,
        enable_auto_filters=slack_channel_config_creation_request.enable_auto_filters,
    )
    return SlackChannelConfig.from_model(slack_channel_config_model)


@router.patch("/admin/slack-app/channel/{slack_channel_config_id}")
def patch_slack_channel_config(
    slack_channel_config_id: int,
    slack_channel_config_creation_request: SlackChannelConfigCreationRequest,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> SlackChannelConfig:
    channel_config = _form_channel_config(
        db_session=db_session,
        slack_channel_config_creation_request=slack_channel_config_creation_request,
        current_slack_channel_config_id=slack_channel_config_id,
    )

    persona_id = None
    if slack_channel_config_creation_request.persona_id is not None:
        persona_id = slack_channel_config_creation_request.persona_id
    elif slack_channel_config_creation_request.document_sets:
        existing_slack_channel_config = fetch_slack_channel_config(
            db_session=db_session, slack_channel_config_id=slack_channel_config_id
        )
        if existing_slack_channel_config is None:
            raise HTTPException(
                status_code=404,
                detail="Slack channel config not found",
            )

        existing_persona_id = existing_slack_channel_config.persona_id
        if existing_persona_id is not None:
            persona = get_persona_by_id(
                persona_id=existing_persona_id,
                user=None,
                db_session=db_session,
                is_for_edit=False,
            )

            if not persona.name.startswith(SLACK_BOT_PERSONA_PREFIX):
                # Don't update actual non-slackbot specific personas
                # Since this one specified document sets, we have to create a new persona
                # for this OnyxBot config
                existing_persona_id = None
            else:
                existing_persona_id = existing_slack_channel_config.persona_id

        persona_id = create_slack_channel_persona(
            db_session=db_session,
            channel_name=channel_config["channel_name"],
            document_set_ids=slack_channel_config_creation_request.document_sets,
            existing_persona_id=existing_persona_id,
        ).id

    slack_channel_config_model = update_slack_channel_config(
        db_session=db_session,
        slack_channel_config_id=slack_channel_config_id,
        persona_id=persona_id,
        channel_config=channel_config,
        standard_answer_category_ids=slack_channel_config_creation_request.standard_answer_categories,
        enable_auto_filters=slack_channel_config_creation_request.enable_auto_filters,
        disabled=slack_channel_config_creation_request.disabled,
    )
    return SlackChannelConfig.from_model(slack_channel_config_model)


@router.delete("/admin/slack-app/channel/{slack_channel_config_id}")
def delete_slack_channel_config(
    slack_channel_config_id: int,
    db_session: Session = Depends(get_session),
    user: User = Depends(current_admin_user),
) -> None:
    remove_slack_channel_config(
        db_session=db_session,
        slack_channel_config_id=slack_channel_config_id,
        user=user,
    )


@router.get("/admin/slack-app/channel")
def list_slack_channel_configs(
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> list[SlackChannelConfig]:
    slack_channel_config_models = fetch_slack_channel_configs(db_session=db_session)
    return [
        SlackChannelConfig.from_model(slack_channel_config_model)
        for slack_channel_config_model in slack_channel_config_models
    ]


@router.post("/admin/slack-app/bots")
def create_bot(
    slack_bot_creation_request: SlackBotCreationRequest,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> SlackBot:
    tenant_id = get_current_tenant_id()

    validate_app_token(slack_bot_creation_request.app_token)
    validate_bot_token(slack_bot_creation_request.bot_token)
    validate_user_token(slack_bot_creation_request.user_token)

    slack_bot_model = insert_slack_bot(
        db_session=db_session,
        name=slack_bot_creation_request.name,
        enabled=slack_bot_creation_request.enabled,
        bot_token=slack_bot_creation_request.bot_token,
        app_token=slack_bot_creation_request.app_token,
        user_token=slack_bot_creation_request.user_token,
    )

    # Create a default Slack channel config
    default_channel_config = ChannelConfig(
        channel_name=None,
        respond_tag_only=True,
    )
    insert_slack_channel_config(
        db_session=db_session,
        slack_bot_id=slack_bot_model.id,
        persona_id=None,
        channel_config=default_channel_config,
        standard_answer_category_ids=[],
        enable_auto_filters=False,
        is_default=True,
    )

    mt_cloud_telemetry(
        tenant_id=tenant_id,
        distinct_id=tenant_id,
        event=MilestoneRecordType.CREATED_ONYX_BOT,
    )

    return SlackBot.from_model(slack_bot_model)


@router.patch("/admin/slack-app/bots/{slack_bot_id}")
def patch_bot(
    slack_bot_id: int,
    slack_bot_creation_request: SlackBotCreationRequest,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> SlackBot:
    validate_bot_token(slack_bot_creation_request.bot_token)
    validate_app_token(slack_bot_creation_request.app_token)
    validate_user_token(slack_bot_creation_request.user_token)
    slack_bot_model = update_slack_bot(
        db_session=db_session,
        slack_bot_id=slack_bot_id,
        name=slack_bot_creation_request.name,
        enabled=slack_bot_creation_request.enabled,
        bot_token=slack_bot_creation_request.bot_token,
        app_token=slack_bot_creation_request.app_token,
        user_token=slack_bot_creation_request.user_token,
    )
    return SlackBot.from_model(slack_bot_model)


@router.delete("/admin/slack-app/bots/{slack_bot_id}")
def delete_bot(
    slack_bot_id: int,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> None:
    remove_slack_bot(
        db_session=db_session,
        slack_bot_id=slack_bot_id,
    )


@router.get("/admin/slack-app/bots/{slack_bot_id}")
def get_bot_by_id(
    slack_bot_id: int,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> SlackBot:
    slack_bot_model = fetch_slack_bot(
        db_session=db_session,
        slack_bot_id=slack_bot_id,
    )
    return SlackBot.from_model(slack_bot_model)


@router.get("/admin/slack-app/bots")
def list_bots(
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> list[SlackBot]:
    slack_bot_models = fetch_slack_bots(db_session=db_session)
    return [
        SlackBot.from_model(slack_bot_model) for slack_bot_model in slack_bot_models
    ]


@router.get("/admin/slack-app/bots/{bot_id}/config")
def list_bot_configs(
    bot_id: int,
    db_session: Session = Depends(get_session),
    _: User = Depends(current_admin_user),
) -> list[SlackChannelConfig]:
    slack_bot_config_models = fetch_slack_channel_configs(
        db_session=db_session, slack_bot_id=bot_id
    )
    return [
        SlackChannelConfig.from_model(slack_bot_config_model)
        for slack_bot_config_model in slack_bot_config_models
    ]


================================================
FILE: backend/onyx/server/manage/users.py
================================================
import csv
import io
import re
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import cast
from uuid import UUID

import jwt
from email_validator import EmailNotValidError
from email_validator import EmailUndeliverableError
from email_validator import validate_email
from fastapi import APIRouter
from fastapi import Body
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi import Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.auth.anonymous_user import fetch_anonymous_user_info
from onyx.auth.email_utils import send_user_email_invite
from onyx.auth.invited_users import get_invited_users
from onyx.auth.invited_users import remove_user_from_invited_users
from onyx.auth.invited_users import write_invited_users
from onyx.auth.permissions import get_effective_permissions
from onyx.auth.schemas import UserRole
from onyx.auth.users import anonymous_user_enabled
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.auth.users import enforce_seat_limit
from onyx.auth.users import optional_user
from onyx.configs.app_configs import AUTH_BACKEND
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import AuthBackend
from onyx.configs.app_configs import DEV_MODE
from onyx.configs.app_configs import EMAIL_CONFIGURED
from onyx.configs.app_configs import ENABLE_EMAIL_INVITES
from onyx.configs.app_configs import NUM_FREE_TRIAL_USER_INVITES
from onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX
from onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS
from onyx.configs.app_configs import USER_AUTH_SECRET
from onyx.configs.app_configs import VALID_EMAIL_DOMAINS
from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.api_key import is_api_key_email_address
from onyx.db.auth import get_live_users_count
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import AccountType
from onyx.db.enums import UserFileStatus
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.user_preferences import activate_user
from onyx.db.user_preferences import deactivate_user
from onyx.db.user_preferences import get_all_user_assistant_specific_configs
from onyx.db.user_preferences import get_latest_access_token_for_user
from onyx.db.user_preferences import get_memories_for_user
from onyx.db.user_preferences import update_assistant_preferences
from onyx.db.user_preferences import update_user_assistant_visibility
from onyx.db.user_preferences import update_user_auto_scroll
from onyx.db.user_preferences import update_user_chat_background
from onyx.db.user_preferences import update_user_default_app_mode
from onyx.db.user_preferences import update_user_default_model
from onyx.db.user_preferences import update_user_personalization
from onyx.db.user_preferences import update_user_pinned_assistants
from onyx.db.user_preferences import update_user_role
from onyx.db.user_preferences import update_user_shortcut_enabled
from onyx.db.user_preferences import update_user_temperature_override_enabled
from onyx.db.user_preferences import update_user_theme_preference
from onyx.db.users import batch_get_user_groups
from onyx.db.users import delete_user_from_db
from onyx.db.users import get_all_accepted_users
from onyx.db.users import get_all_users
from onyx.db.users import get_page_of_filtered_users
from onyx.db.users import get_total_filtered_users_count
from onyx.db.users import get_user_by_email
from onyx.db.users import get_user_counts_by_role_and_status
from onyx.db.users import validate_user_role_update
from onyx.key_value_store.factory import get_kv_store
from onyx.redis.redis_pool import get_raw_redis_client
from onyx.server.documents.models import PaginatedReturn
from onyx.server.features.projects.models import UserFileSnapshot
from onyx.server.manage.models import AllUsersResponse
from onyx.server.manage.models import AutoScrollRequest
from onyx.server.manage.models import BulkInviteResponse
from onyx.server.manage.models import ChatBackgroundRequest
from onyx.server.manage.models import DefaultAppModeRequest
from onyx.server.manage.models import EmailInviteStatus
from onyx.server.manage.models import MemoryItem
from onyx.server.manage.models import PersonalizationUpdateRequest
from onyx.server.manage.models import TenantInfo
from onyx.server.manage.models import TenantSnapshot
from onyx.server.manage.models import ThemePreferenceRequest
from onyx.server.manage.models import UserByEmail
from onyx.server.manage.models import UserInfo
from onyx.server.manage.models import UserPreferences
from onyx.server.manage.models import UserRoleResponse
from onyx.server.manage.models import UserRoleUpdateRequest
from onyx.server.manage.models import UserSpecificAssistantPreference
from onyx.server.manage.models import UserSpecificAssistantPreferences
from onyx.server.models import FullUserSnapshot
from onyx.server.models import InvitedUserSnapshot
from onyx.server.models import MinimalUserSnapshot
from onyx.server.models import UserGroupInfo
from onyx.server.usage_limits import is_tenant_on_trial_fn
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()
router = APIRouter()

USERS_PAGE_SIZE = 10


@router.patch("/manage/set-user-role", tags=PUBLIC_API_TAGS)
def set_user_role(
    user_role_update_request: UserRoleUpdateRequest,
    current_user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    user_to_update = get_user_by_email(
        email=user_role_update_request.user_email, db_session=db_session
    )
    if not user_to_update:
        raise HTTPException(status_code=404, detail="User not found")

    current_role = user_to_update.role
    requested_role = user_role_update_request.new_role
    if requested_role == current_role:
        return

    # This will raise an exception if the role update is invalid
    validate_user_role_update(
        requested_role=requested_role,
        current_role=current_role,
        current_account_type=user_to_update.account_type,
        explicit_override=user_role_update_request.explicit_override,
    )

    if user_to_update.id == current_user.id:
        raise HTTPException(
            status_code=400,
            detail="An admin cannot demote themselves from admin role!",
        )

    if requested_role == UserRole.CURATOR:
        # Remove all curator db relationships before changing role
        fetch_ee_implementation_or_noop(
            "onyx.db.user_group",
            "remove_curator_status__no_commit",
        )(db_session, user_to_update)

    update_user_role(user_to_update, requested_role, db_session)


class TestUpsertRequest(BaseModel):
    email: str


@router.post("/manage/users/test-upsert-user")
async def test_upsert_user(
    request: TestUpsertRequest,
    _: User = Depends(current_admin_user),
) -> None | FullUserSnapshot:
    """Test endpoint for upsert_saml_user. Only used for integration testing."""
    user = await fetch_ee_implementation_or_noop(
        "onyx.server.saml", "upsert_saml_user", None
    )(email=request.email)
    return FullUserSnapshot.from_user_model(user) if user else None


@router.get("/manage/users/accepted", tags=PUBLIC_API_TAGS)
def list_accepted_users(
    q: str | None = Query(default=None),
    page_num: int = Query(0, ge=0),
    page_size: int = Query(10, ge=1, le=1000),
    roles: list[UserRole] = Query(default=[]),
    is_active: bool | None = Query(default=None),
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> PaginatedReturn[FullUserSnapshot]:
    filtered_accepted_users = get_page_of_filtered_users(
        db_session=db_session,
        page_size=page_size,
        page_num=page_num,
        email_filter_string=q,
        is_active_filter=is_active,
        roles_filter=roles,
    )

    total_accepted_users_count = get_total_filtered_users_count(
        db_session=db_session,
        email_filter_string=q,
        is_active_filter=is_active,
        roles_filter=roles,
    )

    if not filtered_accepted_users:
        logger.info("No users found")
        return PaginatedReturn(
            items=[],
            total_items=0,
        )

    user_ids = [user.id for user in filtered_accepted_users]
    groups_by_user = batch_get_user_groups(db_session, user_ids)

    # Batch-fetch SCIM mappings to mark synced users
    scim_synced_ids: set[UUID] = set()
    try:
        from onyx.db.models import ScimUserMapping

        scim_mappings = db_session.scalars(
            select(ScimUserMapping.user_id).where(ScimUserMapping.user_id.in_(user_ids))
        ).all()
        scim_synced_ids = set(scim_mappings)
    except Exception:
        logger.warning(
            "Failed to fetch SCIM mappings; marking all users as non-synced",
            exc_info=True,
        )

    return PaginatedReturn(
        items=[
            FullUserSnapshot.from_user_model(
                user,
                groups=[
                    UserGroupInfo(id=gid, name=gname)
                    for gid, gname in groups_by_user.get(user.id, [])
                ],
                is_scim_synced=user.id in scim_synced_ids,
            )
            for user in filtered_accepted_users
        ],
        total_items=total_accepted_users_count,
    )


@router.get("/manage/users/accepted/all", tags=PUBLIC_API_TAGS)
def list_all_accepted_users(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[FullUserSnapshot]:
    """Returns all accepted users without pagination.
    Used by the admin Users page for client-side filtering/sorting."""
    users = get_all_accepted_users(db_session=db_session)

    if not users:
        return []

    user_ids = [user.id for user in users]
    groups_by_user = batch_get_user_groups(db_session, user_ids)

    # Batch-fetch SCIM mappings to mark synced users
    scim_synced_ids: set[UUID] = set()
    try:
        from onyx.db.models import ScimUserMapping

        scim_mappings = db_session.scalars(
            select(ScimUserMapping.user_id).where(ScimUserMapping.user_id.in_(user_ids))
        ).all()
        scim_synced_ids = set(scim_mappings)
    except Exception:
        logger.warning(
            "Failed to fetch SCIM mappings; marking all users as non-synced",
            exc_info=True,
        )

    return [
        FullUserSnapshot.from_user_model(
            user,
            groups=[
                UserGroupInfo(id=gid, name=gname)
                for gid, gname in groups_by_user.get(user.id, [])
            ],
            is_scim_synced=user.id in scim_synced_ids,
        )
        for user in users
    ]


@router.get("/manage/users/counts")
def get_user_counts(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict[str, dict[str, int]]:
    return get_user_counts_by_role_and_status(db_session)


@router.get("/manage/users/invited", tags=PUBLIC_API_TAGS)
def list_invited_users(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[InvitedUserSnapshot]:
    invited_emails = get_invited_users()

    # Filter out users who are already active in the system
    active_user_emails = {user.email for user in get_all_users(db_session)}
    filtered_invited_emails = [
        email for email in invited_emails if email not in active_user_emails
    ]

    return [InvitedUserSnapshot(email=email) for email in filtered_invited_emails]


@router.get("/manage/users", tags=PUBLIC_API_TAGS)
def list_all_users(
    q: str | None = None,
    accepted_page: int | None = None,
    slack_users_page: int | None = None,
    invited_page: int | None = None,
    include_api_keys: bool = False,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> AllUsersResponse:
    users = [
        user
        for user in get_all_users(db_session, email_filter_string=q)
        if (include_api_keys or not is_api_key_email_address(user.email))
    ]

    slack_users = [user for user in users if user.account_type == AccountType.BOT]
    accepted_users = [user for user in users if user.account_type != AccountType.BOT]

    accepted_emails = {user.email for user in accepted_users}
    slack_users_emails = {user.email for user in slack_users}
    invited_emails = get_invited_users()

    # Filter out users who are already active (either accepted or slack users)
    all_active_emails = accepted_emails | slack_users_emails
    invited_emails = [
        email for email in invited_emails if email not in all_active_emails
    ]

    if q:
        invited_emails = [
            email for email in invited_emails if re.search(r"{}".format(q), email, re.I)
        ]

    accepted_count = len(accepted_emails)
    slack_users_count = len(slack_users_emails)
    invited_count = len(invited_emails)

    # If any of q, accepted_page, or invited_page is None, return all users
    if accepted_page is None or invited_page is None or slack_users_page is None:
        return AllUsersResponse(
            accepted=[
                FullUserSnapshot.from_user_model(user) for user in accepted_users
            ],
            slack_users=[
                FullUserSnapshot.from_user_model(user) for user in slack_users
            ],
            invited=[InvitedUserSnapshot(email=email) for email in invited_emails],
            accepted_pages=1,
            invited_pages=1,
            slack_users_pages=1,
        )

    # Otherwise, return paginated results
    return AllUsersResponse(
        accepted=[FullUserSnapshot.from_user_model(user) for user in accepted_users][
            accepted_page * USERS_PAGE_SIZE : (accepted_page + 1) * USERS_PAGE_SIZE
        ],
        slack_users=[FullUserSnapshot.from_user_model(user) for user in slack_users][
            slack_users_page
            * USERS_PAGE_SIZE : (slack_users_page + 1)
            * USERS_PAGE_SIZE
        ],
        invited=[InvitedUserSnapshot(email=email) for email in invited_emails][
            invited_page * USERS_PAGE_SIZE : (invited_page + 1) * USERS_PAGE_SIZE
        ],
        accepted_pages=(accepted_count + USERS_PAGE_SIZE - 1) // USERS_PAGE_SIZE,
        invited_pages=(invited_count + USERS_PAGE_SIZE - 1) // USERS_PAGE_SIZE,
        slack_users_pages=(slack_users_count + USERS_PAGE_SIZE - 1) // USERS_PAGE_SIZE,
    )


@router.get("/manage/users/download")
def download_users_csv(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> StreamingResponse:
    """Download all users as a CSV file."""
    # Get all users from the database
    users = get_all_users(db_session)

    # Create CSV content in memory
    output = io.StringIO()
    writer = csv.writer(output)

    # Write CSV header
    writer.writerow(["Email", "Role", "Status"])

    # Write user data
    for user in users:
        writer.writerow(
            [
                user.email,
                user.role.value if user.role else "",
                "Active" if user.is_active else "Inactive",
            ]
        )

    # Prepare the CSV content for download
    csv_content = output.getvalue()
    output.close()

    return StreamingResponse(
        io.BytesIO(csv_content.encode("utf-8")),
        media_type="text/csv",
        headers={"Content-Disposition": "attachment;"},
    )


@router.put("/manage/admin/users", tags=PUBLIC_API_TAGS)
def bulk_invite_users(
    emails: list[str] = Body(..., embed=True),
    current_user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> BulkInviteResponse:
    """emails are string validated. If any email fails validation, no emails are
    invited and an exception is raised."""
    tenant_id = get_current_tenant_id()

    new_invited_emails = []
    email: str

    try:
        for email in emails:
            # Allow syntactically valid emails without DNS deliverability checks; tests use test domains
            email_info = validate_email(email, check_deliverability=False)
            new_invited_emails.append(email_info.normalized)

    except (EmailUndeliverableError, EmailNotValidError) as e:
        raise HTTPException(
            status_code=400,
            detail=f"Invalid email address: {email} - {str(e)}",
        )

    # Count only new users (not already invited or existing) that need seats
    existing_users = {user.email for user in get_all_users(db_session)}
    already_invited = set(get_invited_users())
    emails_needing_seats = [
        e
        for e in new_invited_emails
        if e not in existing_users and e not in already_invited
    ]

    # Limit bulk invites for trial tenants to prevent email spam
    # Only count new invites, not re-invites of existing users
    if MULTI_TENANT and is_tenant_on_trial_fn(tenant_id):
        current_invited = len(already_invited)
        if current_invited + len(emails_needing_seats) > NUM_FREE_TRIAL_USER_INVITES:
            raise HTTPException(
                status_code=403,
                detail="You have hit your invite limit. Please upgrade for unlimited invites.",
            )

    # Check seat availability for new users
    if emails_needing_seats:
        enforce_seat_limit(db_session, seats_needed=len(emails_needing_seats))

    if MULTI_TENANT:
        try:
            fetch_ee_implementation_or_noop(
                "onyx.server.tenants.provisioning", "add_users_to_tenant", None
            )(new_invited_emails, tenant_id)

        except Exception as e:
            logger.error(f"Failed to add users to tenant {tenant_id}: {str(e)}")

    initial_invited_users = get_invited_users()

    all_emails = list(set(new_invited_emails) | set(initial_invited_users))
    number_of_invited_users = write_invited_users(all_emails)

    # send out email invitations only to new users (not already invited or existing)
    if not ENABLE_EMAIL_INVITES:
        email_invite_status = EmailInviteStatus.DISABLED
    elif not EMAIL_CONFIGURED:
        email_invite_status = EmailInviteStatus.NOT_CONFIGURED
    else:
        try:
            for email in emails_needing_seats:
                send_user_email_invite(email, current_user, AUTH_TYPE)
            email_invite_status = EmailInviteStatus.SENT
        except Exception as e:
            logger.error(f"Error sending email invite to invited users: {e}")
            email_invite_status = EmailInviteStatus.SEND_FAILED

    if MULTI_TENANT and not DEV_MODE:
        # for billing purposes, write to the control plane about the number of new users
        try:
            logger.info("Registering tenant users")
            fetch_ee_implementation_or_noop(
                "onyx.server.tenants.billing", "register_tenant_users", None
            )(tenant_id, get_live_users_count(db_session))
        except Exception as e:
            logger.error(f"Failed to register tenant users: {str(e)}")
            logger.info(
                "Reverting changes: removing users from tenant and resetting invited users"
            )
            write_invited_users(initial_invited_users)  # Reset to original state
            fetch_ee_implementation_or_noop(
                "onyx.server.tenants.user_mapping", "remove_users_from_tenant", None
            )(new_invited_emails, tenant_id)
            raise e

    return BulkInviteResponse(
        invited_count=number_of_invited_users,
        email_invite_status=email_invite_status,
    )


@router.patch("/manage/admin/remove-invited-user", tags=PUBLIC_API_TAGS)
def remove_invited_user(
    user_email: UserByEmail,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> int:
    tenant_id = get_current_tenant_id()
    if MULTI_TENANT:
        fetch_ee_implementation_or_noop(
            "onyx.server.tenants.user_mapping", "remove_users_from_tenant", None
        )([user_email.user_email], tenant_id)
    number_of_invited_users = remove_user_from_invited_users(user_email.user_email)

    try:
        if MULTI_TENANT and not DEV_MODE:
            fetch_ee_implementation_or_noop(
                "onyx.server.tenants.billing", "register_tenant_users", None
            )(tenant_id, get_live_users_count(db_session))
    except Exception:
        logger.error(
            "Request to update number of seats taken in control plane failed. "
            "This may cause synchronization issues/out of date enforcement of seat limits."
        )
        raise

    return number_of_invited_users


@router.patch("/manage/admin/deactivate-user", tags=PUBLIC_API_TAGS)
def deactivate_user_api(
    user_email: UserByEmail,
    current_user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    if current_user.email == user_email.user_email:
        raise HTTPException(status_code=400, detail="You cannot deactivate yourself")

    user_to_deactivate = get_user_by_email(
        email=user_email.user_email, db_session=db_session
    )

    if not user_to_deactivate:
        raise HTTPException(status_code=404, detail="User not found")

    if user_to_deactivate.is_active is False:
        logger.warning("{} is already deactivated".format(user_to_deactivate.email))

    deactivate_user(user_to_deactivate, db_session)

    # Invalidate license cache so used_seats reflects the new count
    # Only for self-hosted (non-multi-tenant) deployments
    if not MULTI_TENANT:
        fetch_ee_implementation_or_noop(
            "onyx.db.license", "invalidate_license_cache", None
        )()


@router.delete("/manage/admin/delete-user", tags=PUBLIC_API_TAGS)
async def delete_user(
    user_email: UserByEmail,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    user_to_delete = get_user_by_email(
        email=user_email.user_email, db_session=db_session
    )
    if not user_to_delete:
        raise HTTPException(status_code=404, detail="User not found")

    if user_to_delete.is_active is True:
        logger.warning(
            "{} must be deactivated before deleting".format(user_to_delete.email)
        )
        raise HTTPException(
            status_code=400, detail="User must be deactivated before deleting"
        )

    # Detach the user from the current session
    db_session.expunge(user_to_delete)

    try:
        tenant_id = get_current_tenant_id()
        fetch_ee_implementation_or_noop(
            "onyx.server.tenants.user_mapping", "remove_users_from_tenant", None
        )([user_email.user_email], tenant_id)
        delete_user_from_db(user_to_delete, db_session)
        logger.info(f"Deleted user {user_to_delete.email}")

        # Invalidate license cache so used_seats reflects the new count
        # Only for self-hosted (non-multi-tenant) deployments
        if not MULTI_TENANT:
            fetch_ee_implementation_or_noop(
                "onyx.db.license", "invalidate_license_cache", None
            )()

    except Exception as e:
        db_session.rollback()
        logger.error(f"Error deleting user {user_to_delete.email}: {str(e)}")
        raise HTTPException(status_code=500, detail="Error deleting user")


@router.patch("/manage/admin/activate-user", tags=PUBLIC_API_TAGS)
def activate_user_api(
    user_email: UserByEmail,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    user_to_activate = get_user_by_email(
        email=user_email.user_email, db_session=db_session
    )
    if not user_to_activate:
        raise HTTPException(status_code=404, detail="User not found")

    if user_to_activate.is_active is True:
        logger.warning("{} is already activated".format(user_to_activate.email))
        return

    # Check seat availability before activating
    # Only for self-hosted (non-multi-tenant) deployments
    enforce_seat_limit(db_session)

    activate_user(user_to_activate, db_session)

    # Invalidate license cache so used_seats reflects the new count
    # Only for self-hosted (non-multi-tenant) deployments
    if not MULTI_TENANT:
        fetch_ee_implementation_or_noop(
            "onyx.db.license", "invalidate_license_cache", None
        )()


@router.get("/manage/admin/valid-domains")
def get_valid_domains(
    _: User = Depends(current_admin_user),
) -> list[str]:
    return VALID_EMAIL_DOMAINS


"""Endpoints for all"""


@router.get("/users", tags=PUBLIC_API_TAGS)
def list_all_users_basic_info(
    include_api_keys: bool = False,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[MinimalUserSnapshot]:
    users = get_all_users(db_session)
    return [
        MinimalUserSnapshot(id=user.id, email=user.email)
        for user in users
        if user.account_type != AccountType.BOT
        and (include_api_keys or not is_api_key_email_address(user.email))
    ]


@router.get("/get-user-role", tags=PUBLIC_API_TAGS)
async def get_user_role(user: User = Depends(current_user)) -> UserRoleResponse:
    return UserRoleResponse(role=user.role)


def get_current_auth_token_creation_redis(
    user: User, request: Request
) -> datetime | None:
    """Calculate the token creation time from Redis TTL information.

    This function retrieves the authentication token from cookies,
    checks its TTL in Redis, and calculates when the token was created.
    Despite the function name, it returns the token creation time, not the expiration time.
    """
    # Anonymous users don't have auth tokens
    if user.is_anonymous:
        return None
    try:
        # Get the token from the request
        token = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)
        if not token:
            logger.debug("No auth token cookie found")
            return None

        # Get the Redis client
        redis = get_raw_redis_client()
        redis_key = REDIS_AUTH_KEY_PREFIX + token

        # Get the TTL of the token
        ttl = cast(int, redis.ttl(redis_key))
        if ttl <= 0:
            logger.error("Token has expired or doesn't exist in Redis")
            return None

        # Calculate the creation time based on TTL and session expiry
        # Current time minus (total session length minus remaining TTL)
        current_time = datetime.now(timezone.utc)
        token_creation_time = current_time - timedelta(
            seconds=(SESSION_EXPIRE_TIME_SECONDS - ttl)
        )

        return token_creation_time

    except Exception as e:
        logger.error(f"Error retrieving token expiration from Redis: {e}")
        return None


def get_current_token_creation_postgres(
    user: User, db_session: Session
) -> datetime | None:
    # Anonymous users don't have auth tokens
    if user.is_anonymous:
        return None

    access_token = get_latest_access_token_for_user(user.id, db_session)
    if access_token:
        return access_token.created_at
    else:
        logger.error("No AccessToken found for user")
        return None


def get_current_token_creation_jwt(user: User, request: Request) -> datetime | None:
    """Extract token creation time from the ``iat`` claim of a JWT cookie."""
    if user.is_anonymous:
        return None

    token = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)
    if not token:
        return None

    try:
        payload = jwt.decode(
            token,
            USER_AUTH_SECRET,
            algorithms=["HS256"],
            audience=["fastapi-users:auth"],
        )
        iat = payload.get("iat")
        if iat is None:
            return None
        return datetime.fromtimestamp(iat, tz=timezone.utc)
    except jwt.PyJWTError:
        logger.error("Failed to decode JWT for iat claim")
        return None


def _get_token_created_at(
    user: User, request: Request, db_session: Session
) -> datetime | None:
    if AUTH_BACKEND == AuthBackend.REDIS:
        return get_current_auth_token_creation_redis(user, request)
    if AUTH_BACKEND == AuthBackend.JWT:
        return get_current_token_creation_jwt(user, request)
    return get_current_token_creation_postgres(user, db_session)


@router.get("/me/permissions", tags=PUBLIC_API_TAGS)
def get_current_user_permissions(
    user: User = Depends(current_user),
) -> list[str]:
    return sorted(p.value for p in get_effective_permissions(user))


@router.get("/me", tags=PUBLIC_API_TAGS)
def verify_user_logged_in(
    request: Request,
    user: User | None = Depends(optional_user),
    db_session: Session = Depends(get_session),
) -> UserInfo:
    tenant_id = get_current_tenant_id()

    # User can be None if not authenticated.
    # We use optional_user to allow unverified users to access this endpoint.
    if user is None:
        # If anonymous access is enabled, return anonymous user info
        if anonymous_user_enabled(tenant_id=tenant_id):
            store = get_kv_store()
            return fetch_anonymous_user_info(store)
        raise BasicAuthenticationError(detail="Unauthorized")

    if user.oidc_expiry and user.oidc_expiry < datetime.now(timezone.utc):
        raise BasicAuthenticationError(
            detail="Access denied. User's OIDC token has expired.",
        )

    token_created_at = _get_token_created_at(user, request, db_session)

    team_name = fetch_ee_implementation_or_noop(
        "onyx.server.tenants.user_mapping", "get_tenant_id_for_email", None
    )(user.email)

    new_tenant: TenantSnapshot | None = None
    tenant_invitation: TenantSnapshot | None = None

    if MULTI_TENANT:
        if team_name != get_current_tenant_id():
            user_count = fetch_ee_implementation_or_noop(
                "onyx.server.tenants.user_mapping", "get_tenant_count", None
            )(team_name)
            new_tenant = TenantSnapshot(tenant_id=team_name, number_of_users=user_count)

        tenant_invitation = fetch_ee_implementation_or_noop(
            "onyx.server.tenants.user_mapping", "get_tenant_invitation", None
        )(user.email)

    super_users_list = cast(
        list[str],
        fetch_versioned_implementation_with_fallback(
            "onyx.configs.app_configs",
            "SUPER_USERS",
            [],
        ),
    )
    memories = [
        MemoryItem(id=memory.id, content=memory.memory_text)
        for memory in get_memories_for_user(user.id, db_session)
    ]

    user_info = UserInfo.from_model(
        user,
        current_token_created_at=token_created_at,
        expiry_length=SESSION_EXPIRE_TIME_SECONDS,
        is_cloud_superuser=user.email in super_users_list,
        team_name=team_name,
        tenant_info=TenantInfo(
            new_tenant=new_tenant,
            invitation=tenant_invitation,
        ),
        memories=memories,
    )

    return user_info


"""APIs to adjust user preferences"""


@router.patch("/temperature-override-enabled")
def update_user_temperature_override_enabled_api(
    temperature_override_enabled: bool,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_user_temperature_override_enabled(
        user.id, temperature_override_enabled, db_session
    )


class ChosenDefaultModelRequest(BaseModel):
    default_model: str | None = None


@router.patch("/shortcut-enabled")
def update_user_shortcut_enabled_api(
    shortcut_enabled: bool,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_user_shortcut_enabled(user.id, shortcut_enabled, db_session)


@router.patch("/auto-scroll")
def update_user_auto_scroll_api(
    request: AutoScrollRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_user_auto_scroll(user.id, request.auto_scroll, db_session)


@router.patch("/user/theme-preference")
def update_user_theme_preference_api(
    request: ThemePreferenceRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_user_theme_preference(user.id, request.theme_preference, db_session)


@router.patch("/user/chat-background")
def update_user_chat_background_api(
    request: ChatBackgroundRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_user_chat_background(user.id, request.chat_background, db_session)


@router.patch("/user/default-app-mode")
def update_user_default_app_mode_api(
    request: DefaultAppModeRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_user_default_app_mode(user.id, request.default_app_mode, db_session)


@router.patch("/user/default-model")
def update_user_default_model_api(
    request: ChosenDefaultModelRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_user_default_model(user.id, request.default_model, db_session)


@router.patch("/user/personalization")
def update_user_personalization_api(
    request: PersonalizationUpdateRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    new_name = request.name if request.name is not None else user.personal_name
    new_role = request.role if request.role is not None else user.personal_role
    current_use_memories = user.use_memories
    new_use_memories = (
        request.use_memories
        if request.use_memories is not None
        else current_use_memories
    )
    new_enable_memory_tool = (
        request.enable_memory_tool
        if request.enable_memory_tool is not None
        else user.enable_memory_tool
    )
    existing_memories = [
        MemoryItem(id=memory.id, content=memory.memory_text)
        for memory in get_memories_for_user(user.id, db_session)
    ]
    new_memories = (
        request.memories if request.memories is not None else existing_memories
    )
    new_user_preferences = (
        request.user_preferences
        if request.user_preferences is not None
        else user.user_preferences
    )

    update_user_personalization(
        user.id,
        personal_name=new_name,
        personal_role=new_role,
        use_memories=new_use_memories,
        enable_memory_tool=new_enable_memory_tool,
        memories=new_memories,
        user_preferences=new_user_preferences,
        db_session=db_session,
    )


class ReorderPinnedAssistantsRequest(BaseModel):
    ordered_assistant_ids: list[int]


@router.patch("/user/pinned-assistants")
def update_user_pinned_assistants_api(
    request: ReorderPinnedAssistantsRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    ordered_assistant_ids = request.ordered_assistant_ids
    update_user_pinned_assistants(user.id, ordered_assistant_ids, db_session)


class ChosenAssistantsRequest(BaseModel):
    chosen_assistants: list[int]


def update_assistant_visibility(
    preferences: UserPreferences, assistant_id: int, show: bool
) -> UserPreferences:
    visible_assistants = preferences.visible_assistants or []
    hidden_assistants = preferences.hidden_assistants or []

    if show:
        if assistant_id not in visible_assistants:
            visible_assistants.append(assistant_id)
        if assistant_id in hidden_assistants:
            hidden_assistants.remove(assistant_id)
    else:
        if assistant_id in visible_assistants:
            visible_assistants.remove(assistant_id)
        if assistant_id not in hidden_assistants:
            hidden_assistants.append(assistant_id)

    preferences.visible_assistants = visible_assistants
    preferences.hidden_assistants = hidden_assistants
    return preferences


@router.patch("/user/assistant-list/update/{assistant_id}")
def update_user_assistant_visibility_api(
    assistant_id: int,
    show: bool,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    user_preferences = UserInfo.from_model(user).preferences
    updated_preferences = update_assistant_visibility(
        user_preferences, assistant_id, show
    )
    if updated_preferences.chosen_assistants is not None:
        updated_preferences.chosen_assistants.append(assistant_id)
    update_user_assistant_visibility(
        user.id,
        updated_preferences.hidden_assistants,
        updated_preferences.visible_assistants,
        updated_preferences.chosen_assistants,
        db_session,
    )


@router.get("/user/assistant/preferences")
def get_user_assistant_preferences(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UserSpecificAssistantPreferences | None:
    """Fetch all assistant preferences for the user."""
    assistant_specific_configs = get_all_user_assistant_specific_configs(
        user.id, db_session
    )
    return {
        config.assistant_id: UserSpecificAssistantPreference(
            disabled_tool_ids=config.disabled_tool_ids
        )
        for config in assistant_specific_configs
    }


@router.patch("/user/assistant/{assistant_id}/preferences")
def update_assistant_preferences_for_user_api(
    assistant_id: int,
    new_assistant_preference: UserSpecificAssistantPreference,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    update_assistant_preferences(
        assistant_id, user.id, new_assistant_preference, db_session
    )
    db_session.commit()


@router.get("/user/files/recent")
def get_recent_files(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
    user_id = user.id
    user_files = (
        db_session.query(UserFile)
        .filter(UserFile.user_id == user_id)
        .filter(UserFile.status != UserFileStatus.FAILED)
        .filter(UserFile.status != UserFileStatus.DELETING)
        .order_by(UserFile.last_accessed_at.desc())
        .all()
    )

    return [UserFileSnapshot.from_model(user_file) for user_file in user_files]


================================================
FILE: backend/onyx/server/manage/validate_tokens.py
================================================
import requests
from fastapi import HTTPException

from onyx.configs.constants import SLACK_USER_TOKEN_PREFIX

SLACK_API_URL = "https://slack.com/api/auth.test"
SLACK_CONNECTIONS_OPEN_URL = "https://slack.com/api/apps.connections.open"


def validate_bot_token(bot_token: str) -> bool:
    headers = {"Authorization": f"Bearer {bot_token}"}
    response = requests.post(SLACK_API_URL, headers=headers)

    if response.status_code != 200:
        raise HTTPException(
            status_code=500, detail="Error communicating with Slack API."
        )

    data = response.json()
    if not data.get("ok", False):
        raise HTTPException(
            status_code=400,
            detail=f"Invalid bot token: {data.get('error', 'Unknown error')}",
        )

    return True


def validate_app_token(app_token: str) -> bool:
    headers = {"Authorization": f"Bearer {app_token}"}
    response = requests.post(SLACK_CONNECTIONS_OPEN_URL, headers=headers)

    if response.status_code != 200:
        raise HTTPException(
            status_code=500, detail="Error communicating with Slack API."
        )

    data = response.json()
    if not data.get("ok", False):
        raise HTTPException(
            status_code=400,
            detail=f"Invalid app token: {data.get('error', 'Unknown error')}",
        )

    return True


def validate_user_token(user_token: str | None) -> None:
    """
    Validate that the user_token is a valid user OAuth token (xoxp-...)
    and not a bot token (xoxb-...)
    Args:
        user_token: The user OAuth token to validate.
    Returns:
        None is valid and will return successfully.
    Raises:
        HTTPException: If the token is invalid or missing required fields
    """
    if not user_token:
        # user_token is optional, so None or empty string is valid
        return

    if not user_token.startswith(SLACK_USER_TOKEN_PREFIX):
        raise HTTPException(
            status_code=400,
            detail=f"Invalid user token format. User OAuth tokens must start with '{SLACK_USER_TOKEN_PREFIX}'",
        )

    # Test the token with Slack API to ensure it's valid
    headers = {"Authorization": f"Bearer {user_token}"}
    response = requests.post(SLACK_API_URL, headers=headers)

    if response.status_code != 200:
        raise HTTPException(
            status_code=500, detail="Error communicating with Slack API."
        )

    data = response.json()
    if not data.get("ok", False):
        raise HTTPException(
            status_code=400,
            detail=f"Invalid user token: {data.get('error', 'Unknown error')}",
        )


================================================
FILE: backend/onyx/server/manage/voice/__init__.py
================================================


================================================
FILE: backend/onyx/server/manage/voice/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from fastapi import Response
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import LLMProvider as LLMProviderModel
from onyx.db.models import User
from onyx.db.models import VoiceProvider
from onyx.db.voice import deactivate_stt_provider
from onyx.db.voice import deactivate_tts_provider
from onyx.db.voice import delete_voice_provider
from onyx.db.voice import fetch_voice_provider_by_id
from onyx.db.voice import fetch_voice_provider_by_type
from onyx.db.voice import fetch_voice_providers
from onyx.db.voice import set_default_stt_provider
from onyx.db.voice import set_default_tts_provider
from onyx.db.voice import upsert_voice_provider
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.server.manage.voice.models import VoiceOption
from onyx.server.manage.voice.models import VoiceProviderTestRequest
from onyx.server.manage.voice.models import VoiceProviderUpdateSuccess
from onyx.server.manage.voice.models import VoiceProviderUpsertRequest
from onyx.server.manage.voice.models import VoiceProviderView
from onyx.utils.logger import setup_logger
from onyx.utils.url import SSRFException
from onyx.utils.url import validate_outbound_http_url
from onyx.voice.factory import get_voice_provider

logger = setup_logger()

admin_router = APIRouter(prefix="/admin/voice")

VOICE_PROVIDER_VALIDATION_FAILURE_MESSAGE = (
    "Connection test failed. Please verify your API key and settings."
)


def _validate_voice_api_base(provider_type: str, api_base: str | None) -> str | None:
    """Validate and normalize provider api_base / target URI."""
    if api_base is None:
        return None

    allow_private_network = provider_type.lower() == "azure"
    try:
        return validate_outbound_http_url(
            api_base, allow_private_network=allow_private_network
        )
    except (ValueError, SSRFException) as e:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            f"Invalid target URI: {str(e)}",
        ) from e


def _provider_to_view(provider: VoiceProvider) -> VoiceProviderView:
    """Convert a VoiceProvider model to a VoiceProviderView."""
    return VoiceProviderView(
        id=provider.id,
        name=provider.name,
        provider_type=provider.provider_type,
        is_default_stt=provider.is_default_stt,
        is_default_tts=provider.is_default_tts,
        stt_model=provider.stt_model,
        tts_model=provider.tts_model,
        default_voice=provider.default_voice,
        has_api_key=bool(provider.api_key),
        target_uri=provider.api_base,  # api_base stores the target URI for Azure
    )


@admin_router.get("/providers")
def list_voice_providers(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[VoiceProviderView]:
    """List all configured voice providers."""
    providers = fetch_voice_providers(db_session)
    return [_provider_to_view(provider) for provider in providers]


@admin_router.post("/providers")
async def upsert_voice_provider_endpoint(
    request: VoiceProviderUpsertRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> VoiceProviderView:
    """Create or update a voice provider."""
    api_key = request.api_key
    api_key_changed = request.api_key_changed

    # If llm_provider_id is specified, copy the API key from that LLM provider
    if request.llm_provider_id is not None:
        llm_provider = db_session.get(LLMProviderModel, request.llm_provider_id)
        if llm_provider is None:
            raise OnyxError(
                OnyxErrorCode.NOT_FOUND,
                f"LLM provider with id {request.llm_provider_id} not found.",
            )
        if llm_provider.api_key is None:
            raise OnyxError(
                OnyxErrorCode.VALIDATION_ERROR,
                "Selected LLM provider has no API key configured.",
            )
        api_key = llm_provider.api_key.get_value(apply_mask=False)
        api_key_changed = True

    # Use target_uri if provided, otherwise fall back to api_base
    api_base = _validate_voice_api_base(
        request.provider_type, request.target_uri or request.api_base
    )

    provider = upsert_voice_provider(
        db_session=db_session,
        provider_id=request.id,
        name=request.name,
        provider_type=request.provider_type,
        api_key=api_key,
        api_key_changed=api_key_changed,
        api_base=api_base,
        custom_config=request.custom_config,
        stt_model=request.stt_model,
        tts_model=request.tts_model,
        default_voice=request.default_voice,
        activate_stt=request.activate_stt,
        activate_tts=request.activate_tts,
    )

    # Validate credentials before committing - rollback on failure
    try:
        voice_provider = get_voice_provider(provider)
        await voice_provider.validate_credentials()
    except OnyxError:
        db_session.rollback()
        raise
    except Exception as e:
        db_session.rollback()
        logger.error(f"Voice provider credential validation failed on save: {e}")
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            VOICE_PROVIDER_VALIDATION_FAILURE_MESSAGE,
        ) from e

    db_session.commit()

    return _provider_to_view(provider)


@admin_router.delete(
    "/providers/{provider_id}", status_code=204, response_class=Response
)
def delete_voice_provider_endpoint(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> Response:
    """Delete a voice provider."""
    delete_voice_provider(db_session, provider_id)
    db_session.commit()
    return Response(status_code=204)


@admin_router.post("/providers/{provider_id}/activate-stt")
def activate_stt_provider_endpoint(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> VoiceProviderView:
    """Set a voice provider as the default STT provider."""
    provider = set_default_stt_provider(db_session=db_session, provider_id=provider_id)
    db_session.commit()
    return _provider_to_view(provider)


@admin_router.post("/providers/{provider_id}/deactivate-stt")
def deactivate_stt_provider_endpoint(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> VoiceProviderUpdateSuccess:
    """Remove the default STT status from a voice provider."""
    deactivate_stt_provider(db_session=db_session, provider_id=provider_id)
    db_session.commit()
    return VoiceProviderUpdateSuccess()


@admin_router.post("/providers/{provider_id}/activate-tts")
def activate_tts_provider_endpoint(
    provider_id: int,
    tts_model: str | None = None,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> VoiceProviderView:
    """Set a voice provider as the default TTS provider."""
    provider = set_default_tts_provider(
        db_session=db_session, provider_id=provider_id, tts_model=tts_model
    )
    db_session.commit()
    return _provider_to_view(provider)


@admin_router.post("/providers/{provider_id}/deactivate-tts")
def deactivate_tts_provider_endpoint(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> VoiceProviderUpdateSuccess:
    """Remove the default TTS status from a voice provider."""
    deactivate_tts_provider(db_session=db_session, provider_id=provider_id)
    db_session.commit()
    return VoiceProviderUpdateSuccess()


@admin_router.post("/providers/test")
async def test_voice_provider(
    request: VoiceProviderTestRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> VoiceProviderUpdateSuccess:
    """Test a voice provider connection by making a real API call."""
    api_key = request.api_key

    if request.use_stored_key:
        existing_provider = fetch_voice_provider_by_type(
            db_session, request.provider_type
        )
        if existing_provider is None or not existing_provider.api_key:
            raise OnyxError(
                OnyxErrorCode.VALIDATION_ERROR,
                "No stored API key found for this provider type.",
            )
        api_key = existing_provider.api_key.get_value(apply_mask=False)

    if not api_key:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "API key is required. Either provide api_key or set use_stored_key to true.",
        )

    # Use target_uri if provided, otherwise fall back to api_base
    api_base = _validate_voice_api_base(
        request.provider_type, request.target_uri or request.api_base
    )

    # Create a temporary VoiceProvider for testing (not saved to DB)
    temp_provider = VoiceProvider(
        name="__test__",
        provider_type=request.provider_type,
        api_base=api_base,
        custom_config=request.custom_config or {},
    )
    temp_provider.api_key = api_key  # type: ignore[assignment]

    try:
        provider = get_voice_provider(temp_provider)
    except ValueError as exc:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(exc)) from exc

    # Validate credentials with a real API call
    try:
        await provider.validate_credentials()
    except OnyxError:
        raise
    except Exception as e:
        logger.error(f"Voice provider connection test failed: {e}")
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            VOICE_PROVIDER_VALIDATION_FAILURE_MESSAGE,
        ) from e

    logger.info(f"Voice provider test succeeded for {request.provider_type}.")
    return VoiceProviderUpdateSuccess()


@admin_router.get("/providers/{provider_id}/voices")
def get_provider_voices(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[VoiceOption]:
    """Get available voices for a provider."""
    provider_db = fetch_voice_provider_by_id(db_session, provider_id)
    if provider_db is None:
        raise OnyxError(OnyxErrorCode.NOT_FOUND, "Voice provider not found.")

    if not provider_db.api_key:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR, "Provider has no API key configured."
        )

    try:
        provider = get_voice_provider(provider_db)
    except ValueError as exc:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(exc)) from exc

    return [VoiceOption(**voice) for voice in provider.get_available_voices()]


@admin_router.get("/voices")
def get_voices_by_type(
    provider_type: str,
    _: User = Depends(current_admin_user),
) -> list[VoiceOption]:
    """Get available voices for a provider type.

    For providers like ElevenLabs and OpenAI, this fetches voices
    without requiring an existing provider configuration.
    """
    # Create a temporary VoiceProvider to get static voice list
    temp_provider = VoiceProvider(
        name="__temp__",
        provider_type=provider_type,
    )

    try:
        provider = get_voice_provider(temp_provider)
    except ValueError as exc:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(exc)) from exc

    return [VoiceOption(**voice) for voice in provider.get_available_voices()]


================================================
FILE: backend/onyx/server/manage/voice/models.py
================================================
from typing import Any

from pydantic import BaseModel
from pydantic import Field


class VoiceProviderView(BaseModel):
    """Response model for voice provider listing."""

    id: int
    name: str
    provider_type: str  # "openai", "azure", "elevenlabs"
    is_default_stt: bool
    is_default_tts: bool
    stt_model: str | None
    tts_model: str | None
    default_voice: str | None
    has_api_key: bool = Field(
        default=False,
        description="Indicates whether an API key is stored for this provider.",
    )
    target_uri: str | None = Field(
        default=None,
        description="Target URI for Azure Speech Services.",
    )


class VoiceProviderUpdateSuccess(BaseModel):
    """Simple status response for voice provider actions."""

    status: str = "ok"


class VoiceOption(BaseModel):
    """Voice option returned by voice providers."""

    id: str
    name: str


class VoiceProviderUpsertRequest(BaseModel):
    """Request model for creating or updating a voice provider."""

    id: int | None = Field(default=None, description="Existing provider ID to update.")
    name: str
    provider_type: str  # "openai", "azure", "elevenlabs"
    api_key: str | None = Field(
        default=None,
        description="API key for the provider.",
    )
    api_key_changed: bool = Field(
        default=False,
        description="Set to true when providing a new API key for an existing provider.",
    )
    llm_provider_id: int | None = Field(
        default=None,
        description="If set, copies the API key from the specified LLM provider.",
    )
    api_base: str | None = None
    target_uri: str | None = Field(
        default=None,
        description="Target URI for Azure Speech Services (maps to api_base).",
    )
    custom_config: dict[str, Any] | None = None
    stt_model: str | None = None
    tts_model: str | None = None
    default_voice: str | None = None
    activate_stt: bool = Field(
        default=False,
        description="If true, sets this provider as the default STT provider after upsert.",
    )
    activate_tts: bool = Field(
        default=False,
        description="If true, sets this provider as the default TTS provider after upsert.",
    )


class VoiceProviderTestRequest(BaseModel):
    """Request model for testing a voice provider connection."""

    provider_type: str
    api_key: str | None = Field(
        default=None,
        description="API key for testing. If not provided, use_stored_key must be true.",
    )
    use_stored_key: bool = Field(
        default=False,
        description="If true, use the stored API key for this provider type.",
    )
    api_base: str | None = None
    target_uri: str | None = Field(
        default=None,
        description="Target URI for Azure Speech Services (maps to api_base).",
    )
    custom_config: dict[str, Any] | None = None


================================================
FILE: backend/onyx/server/manage/voice/user_api.py
================================================
import secrets
from collections.abc import AsyncIterator

from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Query
from fastapi import UploadFile
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import User
from onyx.db.voice import fetch_default_stt_provider
from onyx.db.voice import fetch_default_tts_provider
from onyx.db.voice import update_user_voice_settings
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.redis.redis_pool import store_ws_token
from onyx.redis.redis_pool import WsTokenRateLimitExceeded
from onyx.server.manage.models import VoiceSettingsUpdateRequest
from onyx.utils.logger import setup_logger
from onyx.voice.factory import get_voice_provider

logger = setup_logger()

router = APIRouter(prefix="/voice")

# Max audio file size: 25MB (Whisper limit)
MAX_AUDIO_SIZE = 25 * 1024 * 1024
# Chunk size for streaming uploads (8KB)
UPLOAD_READ_CHUNK_SIZE = 8192


class VoiceStatusResponse(BaseModel):
    stt_enabled: bool
    tts_enabled: bool


@router.get("/status")
def get_voice_status(
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> VoiceStatusResponse:
    """Check whether STT and TTS providers are configured and ready."""
    stt_provider = fetch_default_stt_provider(db_session)
    tts_provider = fetch_default_tts_provider(db_session)
    return VoiceStatusResponse(
        stt_enabled=stt_provider is not None and stt_provider.api_key is not None,
        tts_enabled=tts_provider is not None and tts_provider.api_key is not None,
    )


@router.post("/transcribe")
async def transcribe_audio(
    audio: UploadFile = File(...),
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> dict[str, str]:
    """Transcribe audio to text using the default STT provider."""
    provider_db = fetch_default_stt_provider(db_session)
    if provider_db is None:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "No speech-to-text provider configured. Please contact your administrator.",
        )

    if not provider_db.api_key:
        raise OnyxError(
            OnyxErrorCode.VALIDATION_ERROR,
            "Voice provider API key not configured.",
        )

    # Read in chunks to enforce size limit during streaming (prevents OOM attacks)
    chunks: list[bytes] = []
    total = 0
    while chunk := await audio.read(UPLOAD_READ_CHUNK_SIZE):
        total += len(chunk)
        if total > MAX_AUDIO_SIZE:
            raise OnyxError(
                OnyxErrorCode.PAYLOAD_TOO_LARGE,
                f"Audio file too large. Maximum size is {MAX_AUDIO_SIZE // (1024 * 1024)}MB.",
            )
        chunks.append(chunk)
    audio_data = b"".join(chunks)

    # Extract format from filename
    filename = audio.filename or "audio.webm"
    audio_format = filename.rsplit(".", 1)[-1] if "." in filename else "webm"

    try:
        provider = get_voice_provider(provider_db)
    except ValueError as exc:
        raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, str(exc)) from exc

    try:
        text = await provider.transcribe(audio_data, audio_format)
        return {"text": text}
    except NotImplementedError as exc:
        raise OnyxError(
            OnyxErrorCode.NOT_IMPLEMENTED,
            f"Speech-to-text not implemented for {provider_db.provider_type}.",
        ) from exc
    except Exception as exc:
        logger.error(f"Transcription failed: {exc}")
        raise OnyxError(
            OnyxErrorCode.INTERNAL_ERROR,
            "Transcription failed. Please try again.",
        ) from exc


@router.post("/synthesize")
async def synthesize_speech(
    text: str | None = Query(
        default=None, description="Text to synthesize", max_length=4096
    ),
    voice: str | None = Query(default=None, description="Voice ID to use"),
    speed: float | None = Query(
        default=None, description="Playback speed (0.5-2.0)", ge=0.5, le=2.0
    ),
    user: User = Depends(current_user),
) -> StreamingResponse:
    """
    Synthesize text to speech using the default TTS provider.

    Accepts parameters via query string for streaming compatibility.
    """
    logger.info(
        f"TTS request: text length={len(text) if text else 0}, voice={voice}, speed={speed}"
    )

    if not text:
        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Text is required")

    # Use short-lived session to fetch provider config, then release connection
    # before starting the long-running streaming response
    with get_session_with_current_tenant() as db_session:
        provider_db = fetch_default_tts_provider(db_session)
        if provider_db is None:
            logger.error("No TTS provider configured")
            raise OnyxError(
                OnyxErrorCode.VALIDATION_ERROR,
                "No text-to-speech provider configured. Please contact your administrator.",
            )

        if not provider_db.api_key:
            logger.error("TTS provider has no API key")
            raise OnyxError(
                OnyxErrorCode.VALIDATION_ERROR,
                "Voice provider API key not configured.",
            )

        # Use request voice or provider default
        final_voice = voice or provider_db.default_voice
        # Use explicit None checks to avoid falsy float issues (0.0 would be skipped with `or`)
        final_speed = (
            speed
            if speed is not None
            else (
                user.voice_playback_speed
                if user.voice_playback_speed is not None
                else 1.0
            )
        )

        logger.info(
            f"TTS using provider: {provider_db.provider_type}, voice: {final_voice}, speed: {final_speed}"
        )

        try:
            provider = get_voice_provider(provider_db)
        except ValueError as exc:
            logger.error(f"Failed to get voice provider: {exc}")
            raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, str(exc)) from exc

    # Session is now closed - streaming response won't hold DB connection
    async def audio_stream() -> AsyncIterator[bytes]:
        try:
            chunk_count = 0
            async for chunk in provider.synthesize_stream(
                text=text, voice=final_voice, speed=final_speed
            ):
                chunk_count += 1
                yield chunk
            logger.info(f"TTS streaming complete: {chunk_count} chunks sent")
        except NotImplementedError as exc:
            logger.error(f"TTS not implemented: {exc}")
            raise
        except Exception as exc:
            logger.error(f"Synthesis failed: {exc}")
            raise

    return StreamingResponse(
        audio_stream(),
        media_type="audio/mpeg",
        headers={
            "Content-Disposition": "inline; filename=speech.mp3",
            # Allow streaming by not setting content-length
            "Cache-Control": "no-cache",
            "X-Accel-Buffering": "no",  # Disable nginx buffering
        },
    )


@router.patch("/settings")
def update_voice_settings(
    request: VoiceSettingsUpdateRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> dict[str, str]:
    """Update user's voice settings."""
    update_user_voice_settings(
        db_session=db_session,
        user_id=user.id,
        auto_send=request.auto_send,
        auto_playback=request.auto_playback,
        playback_speed=request.playback_speed,
    )
    db_session.commit()
    return {"status": "ok"}


class WSTokenResponse(BaseModel):
    token: str


@router.post("/ws-token")
async def get_ws_token(
    user: User = Depends(current_user),
) -> WSTokenResponse:
    """
    Generate a short-lived token for WebSocket authentication.

    This token should be passed as a query parameter when connecting
    to voice WebSocket endpoints (e.g., /voice/transcribe/stream?token=xxx).

    The token expires after 60 seconds and is single-use.
    Rate limited to 10 tokens per minute per user.
    """
    token = secrets.token_urlsafe(32)
    try:
        await store_ws_token(token, str(user.id))
    except WsTokenRateLimitExceeded:
        raise OnyxError(
            OnyxErrorCode.RATE_LIMITED,
            "Too many token requests. Please wait before requesting another.",
        )
    return WSTokenResponse(token=token)


================================================
FILE: backend/onyx/server/manage/voice/websocket_api.py
================================================
"""WebSocket API for streaming speech-to-text and text-to-speech."""

import asyncio
import io
import json
import os
from collections.abc import MutableMapping
from typing import Any

from fastapi import APIRouter
from fastapi import Depends
from fastapi import WebSocket
from fastapi import WebSocketDisconnect
from sqlalchemy.orm import Session

from onyx.auth.users import current_user_from_websocket
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from onyx.db.models import User
from onyx.db.voice import fetch_default_stt_provider
from onyx.db.voice import fetch_default_tts_provider
from onyx.utils.logger import setup_logger
from onyx.voice.factory import get_voice_provider
from onyx.voice.interface import StreamingSynthesizerProtocol
from onyx.voice.interface import StreamingTranscriberProtocol
from onyx.voice.interface import TranscriptResult

logger = setup_logger()

router = APIRouter(prefix="/voice")


# Transcribe every ~0.5 seconds of audio (webm/opus is ~2-4KB/s, so ~1-2KB per 0.5s)
MIN_CHUNK_BYTES = 1500
VOICE_DISABLE_STREAMING_FALLBACK = (
    os.environ.get("VOICE_DISABLE_STREAMING_FALLBACK", "").lower() == "true"
)

# WebSocket size limits to prevent memory exhaustion attacks
WS_MAX_MESSAGE_SIZE = 64 * 1024  # 64KB per message (OWASP recommendation)
WS_MAX_TOTAL_BYTES = 25 * 1024 * 1024  # 25MB total per connection (matches REST API)
WS_MAX_TEXT_MESSAGE_SIZE = 16 * 1024  # 16KB for text/JSON messages
WS_MAX_TTS_TEXT_LENGTH = 4096  # Max text length per synthesize call (matches REST API)


class ChunkedTranscriber:
    """Fallback transcriber for providers without streaming support."""

    def __init__(self, provider: Any, audio_format: str = "webm"):
        self.provider = provider
        self.audio_format = audio_format
        self.chunk_buffer = io.BytesIO()
        self.full_audio = io.BytesIO()
        self.chunk_bytes = 0
        self.transcripts: list[str] = []

    async def add_chunk(self, chunk: bytes) -> str | None:
        """Add audio chunk. Returns transcript if enough audio accumulated."""
        self.chunk_buffer.write(chunk)
        self.full_audio.write(chunk)
        self.chunk_bytes += len(chunk)

        if self.chunk_bytes >= MIN_CHUNK_BYTES:
            return await self._transcribe_chunk()
        return None

    async def _transcribe_chunk(self) -> str | None:
        """Transcribe current chunk and append to running transcript."""
        audio_data = self.chunk_buffer.getvalue()
        if not audio_data:
            return None

        try:
            transcript = await self.provider.transcribe(audio_data, self.audio_format)
            self.chunk_buffer = io.BytesIO()
            self.chunk_bytes = 0

            if transcript and transcript.strip():
                self.transcripts.append(transcript.strip())
                return " ".join(self.transcripts)
            return None
        except Exception as e:
            logger.error(f"Transcription error: {e}")
            self.chunk_buffer = io.BytesIO()
            self.chunk_bytes = 0
            return None

    async def flush(self) -> str:
        """Get final transcript from full audio for best accuracy."""
        full_audio_data = self.full_audio.getvalue()
        if full_audio_data:
            try:
                transcript = await self.provider.transcribe(
                    full_audio_data, self.audio_format
                )
                if transcript and transcript.strip():
                    return transcript.strip()
            except Exception as e:
                logger.error(f"Final transcription error: {e}")
        return " ".join(self.transcripts)


async def handle_streaming_transcription(
    websocket: WebSocket,
    transcriber: StreamingTranscriberProtocol,
) -> None:
    """Handle transcription using native streaming API."""
    logger.info("Streaming transcription: starting handler")
    last_transcript = ""
    chunk_count = 0
    total_bytes = 0

    async def receive_transcripts() -> None:
        """Background task to receive and send transcripts."""
        nonlocal last_transcript
        logger.info("Streaming transcription: starting transcript receiver")
        while True:
            result: TranscriptResult | None = await transcriber.receive_transcript()
            if result is None:  # End of stream
                logger.info("Streaming transcription: transcript stream ended")
                break
            # Send if text changed OR if VAD detected end of speech (for auto-send trigger)
            if result.text and (result.text != last_transcript or result.is_vad_end):
                last_transcript = result.text
                logger.debug(
                    f"Streaming transcription: got transcript: {result.text[:50]}... (is_vad_end={result.is_vad_end})"
                )
                await websocket.send_json(
                    {
                        "type": "transcript",
                        "text": result.text,
                        "is_final": result.is_vad_end,
                    }
                )

    # Start receiving transcripts in background
    receive_task = asyncio.create_task(receive_transcripts())

    try:
        while True:
            message = await websocket.receive()
            msg_type = message.get("type", "unknown")

            if msg_type == "websocket.disconnect":
                logger.info(
                    f"Streaming transcription: client disconnected after {chunk_count} chunks ({total_bytes} bytes)"
                )
                break

            if "bytes" in message:
                chunk_size = len(message["bytes"])

                # Enforce per-message size limit
                if chunk_size > WS_MAX_MESSAGE_SIZE:
                    logger.warning(
                        f"Streaming transcription: message too large ({chunk_size} bytes)"
                    )
                    await websocket.send_json(
                        {"type": "error", "message": "Message too large"}
                    )
                    break

                # Enforce total connection size limit
                if total_bytes + chunk_size > WS_MAX_TOTAL_BYTES:
                    logger.warning(
                        f"Streaming transcription: total size limit exceeded ({total_bytes + chunk_size} bytes)"
                    )
                    await websocket.send_json(
                        {"type": "error", "message": "Total size limit exceeded"}
                    )
                    break

                chunk_count += 1
                total_bytes += chunk_size
                logger.debug(
                    f"Streaming transcription: received chunk {chunk_count} ({chunk_size} bytes, total: {total_bytes})"
                )
                await transcriber.send_audio(message["bytes"])

            elif "text" in message:
                try:
                    data = json.loads(message["text"])
                    logger.debug(
                        f"Streaming transcription: received text message: {data}"
                    )
                    if data.get("type") == "end":
                        logger.info(
                            "Streaming transcription: end signal received, closing transcriber"
                        )
                        final_transcript = await transcriber.close()
                        receive_task.cancel()
                        logger.info(
                            "Streaming transcription: final transcript: "
                            f"{final_transcript[:100] if final_transcript else '(empty)'}..."
                        )
                        await websocket.send_json(
                            {
                                "type": "transcript",
                                "text": final_transcript,
                                "is_final": True,
                            }
                        )
                        break
                    elif data.get("type") == "reset":
                        # Reset accumulated transcript after auto-send
                        logger.info(
                            "Streaming transcription: reset signal received, clearing transcript"
                        )
                        transcriber.reset_transcript()
                except json.JSONDecodeError:
                    logger.warning(
                        f"Streaming transcription: failed to parse JSON: {message.get('text', '')[:100]}"
                    )
    except Exception as e:
        logger.error(f"Streaming transcription: error: {e}", exc_info=True)
        raise
    finally:
        receive_task.cancel()
        try:
            await receive_task
        except asyncio.CancelledError:
            pass
        logger.info(
            f"Streaming transcription: handler finished. Processed {chunk_count} chunks, {total_bytes} total bytes"
        )


async def handle_chunked_transcription(
    websocket: WebSocket,
    transcriber: ChunkedTranscriber,
) -> None:
    """Handle transcription using chunked batch API."""
    logger.info("Chunked transcription: starting handler")
    chunk_count = 0
    total_bytes = 0

    while True:
        message = await websocket.receive()
        msg_type = message.get("type", "unknown")

        if msg_type == "websocket.disconnect":
            logger.info(
                f"Chunked transcription: client disconnected after {chunk_count} chunks ({total_bytes} bytes)"
            )
            break

        if "bytes" in message:
            chunk_size = len(message["bytes"])

            # Enforce per-message size limit
            if chunk_size > WS_MAX_MESSAGE_SIZE:
                logger.warning(
                    f"Chunked transcription: message too large ({chunk_size} bytes)"
                )
                await websocket.send_json(
                    {"type": "error", "message": "Message too large"}
                )
                break

            # Enforce total connection size limit
            if total_bytes + chunk_size > WS_MAX_TOTAL_BYTES:
                logger.warning(
                    f"Chunked transcription: total size limit exceeded ({total_bytes + chunk_size} bytes)"
                )
                await websocket.send_json(
                    {"type": "error", "message": "Total size limit exceeded"}
                )
                break

            chunk_count += 1
            total_bytes += chunk_size
            logger.debug(
                f"Chunked transcription: received chunk {chunk_count} ({chunk_size} bytes, total: {total_bytes})"
            )

            transcript = await transcriber.add_chunk(message["bytes"])
            if transcript:
                logger.debug(
                    f"Chunked transcription: got transcript: {transcript[:50]}..."
                )
                await websocket.send_json(
                    {
                        "type": "transcript",
                        "text": transcript,
                        "is_final": False,
                    }
                )

        elif "text" in message:
            try:
                data = json.loads(message["text"])
                logger.debug(f"Chunked transcription: received text message: {data}")
                if data.get("type") == "end":
                    logger.info("Chunked transcription: end signal received, flushing")
                    final_transcript = await transcriber.flush()
                    logger.info(
                        f"Chunked transcription: final transcript: {final_transcript[:100] if final_transcript else '(empty)'}..."
                    )
                    await websocket.send_json(
                        {
                            "type": "transcript",
                            "text": final_transcript,
                            "is_final": True,
                        }
                    )
                    break
            except json.JSONDecodeError:
                logger.warning(
                    f"Chunked transcription: failed to parse JSON: {message.get('text', '')[:100]}"
                )

    logger.info(
        f"Chunked transcription: handler finished. Processed {chunk_count} chunks, {total_bytes} total bytes"
    )


@router.websocket("/transcribe/stream")
async def websocket_transcribe(
    websocket: WebSocket,
    _user: User = Depends(current_user_from_websocket),
) -> None:
    """
    WebSocket endpoint for streaming speech-to-text.

    Protocol:
    - Client sends binary audio chunks
    - Server sends JSON: {"type": "transcript", "text": "...", "is_final": false}
    - Client sends JSON {"type": "end"} to signal end
    - Server responds with final transcript and closes

    Authentication:
        Requires `token` query parameter (e.g., /voice/transcribe/stream?token=xxx).
        Applies same auth checks as HTTP endpoints (verification, role checks).
    """
    logger.info("WebSocket transcribe: connection request received (authenticated)")

    try:
        await websocket.accept()
        logger.info("WebSocket transcribe: connection accepted")
    except Exception as e:
        logger.error(f"WebSocket transcribe: failed to accept connection: {e}")
        return

    streaming_transcriber = None
    provider = None

    try:
        # Get STT provider
        logger.info("WebSocket transcribe: fetching STT provider from database")
        engine = get_sqlalchemy_engine()
        with Session(engine) as db_session:
            provider_db = fetch_default_stt_provider(db_session)
            if provider_db is None:
                logger.warning(
                    "WebSocket transcribe: no default STT provider configured"
                )
                await websocket.send_json(
                    {
                        "type": "error",
                        "message": "No speech-to-text provider configured",
                    }
                )
                return

            if not provider_db.api_key:
                logger.warning("WebSocket transcribe: STT provider has no API key")
                await websocket.send_json(
                    {
                        "type": "error",
                        "message": "Speech-to-text provider has no API key configured",
                    }
                )
                return

            logger.info(
                f"WebSocket transcribe: creating voice provider: {provider_db.provider_type}"
            )
            try:
                provider = get_voice_provider(provider_db)
                logger.info(
                    f"WebSocket transcribe: voice provider created, streaming supported: {provider.supports_streaming_stt()}"
                )
            except ValueError as e:
                logger.error(
                    f"WebSocket transcribe: failed to create voice provider: {e}"
                )
                await websocket.send_json({"type": "error", "message": str(e)})
                return

        # Use native streaming if provider supports it
        if provider.supports_streaming_stt():
            logger.info("WebSocket transcribe: using native streaming STT")
            try:
                streaming_transcriber = await provider.create_streaming_transcriber()
                logger.info(
                    "WebSocket transcribe: streaming transcriber created successfully"
                )
                await handle_streaming_transcription(websocket, streaming_transcriber)
            except Exception as e:
                logger.error(
                    f"WebSocket transcribe: failed to create streaming transcriber: {e}"
                )
                if VOICE_DISABLE_STREAMING_FALLBACK:
                    await websocket.send_json(
                        {"type": "error", "message": f"Streaming STT failed: {e}"}
                    )
                    return
                logger.info("WebSocket transcribe: falling back to chunked STT")
                # Browser stream provides raw PCM16 chunks over WebSocket.
                chunked_transcriber = ChunkedTranscriber(provider, audio_format="pcm16")
                await handle_chunked_transcription(websocket, chunked_transcriber)
        else:
            # Fall back to chunked transcription
            if VOICE_DISABLE_STREAMING_FALLBACK:
                await websocket.send_json(
                    {
                        "type": "error",
                        "message": "Provider doesn't support streaming STT",
                    }
                )
                return
            logger.info(
                "WebSocket transcribe: using chunked STT (provider doesn't support streaming)"
            )
            chunked_transcriber = ChunkedTranscriber(provider, audio_format="pcm16")
            await handle_chunked_transcription(websocket, chunked_transcriber)

    except WebSocketDisconnect:
        logger.debug("WebSocket transcribe: client disconnected")
    except Exception as e:
        logger.error(f"WebSocket transcribe: unhandled error: {e}", exc_info=True)
        try:
            # Send generic error to avoid leaking sensitive details
            await websocket.send_json(
                {"type": "error", "message": "An unexpected error occurred"}
            )
        except Exception:
            pass
    finally:
        if streaming_transcriber:
            try:
                await streaming_transcriber.close()
            except Exception:
                pass
        try:
            await websocket.close()
        except Exception:
            pass
        logger.info("WebSocket transcribe: connection closed")


async def handle_streaming_synthesis(
    websocket: WebSocket,
    synthesizer: StreamingSynthesizerProtocol,
) -> None:
    """Handle TTS using native streaming API."""
    logger.info("Streaming synthesis: starting handler")

    async def send_audio() -> None:
        """Background task to send audio chunks to client."""
        chunk_count = 0
        total_bytes = 0
        try:
            while True:
                audio_chunk = await synthesizer.receive_audio()
                if audio_chunk is None:
                    logger.info(
                        f"Streaming synthesis: audio stream ended, sent {chunk_count} chunks, {total_bytes} bytes"
                    )
                    try:
                        await websocket.send_json({"type": "audio_done"})
                        logger.info("Streaming synthesis: sent audio_done to client")
                    except Exception as e:
                        logger.warning(
                            f"Streaming synthesis: failed to send audio_done: {e}"
                        )
                    break
                if audio_chunk:  # Skip empty chunks
                    chunk_count += 1
                    total_bytes += len(audio_chunk)
                    try:
                        await websocket.send_bytes(audio_chunk)
                    except Exception as e:
                        logger.warning(
                            f"Streaming synthesis: failed to send chunk: {e}"
                        )
                        break
        except asyncio.CancelledError:
            logger.info(
                f"Streaming synthesis: send_audio cancelled after {chunk_count} chunks"
            )
        except Exception as e:
            logger.error(f"Streaming synthesis: send_audio error: {e}")

    send_task: asyncio.Task | None = None
    disconnected = False

    try:
        while not disconnected:
            try:
                message = await websocket.receive()
            except WebSocketDisconnect:
                logger.info("Streaming synthesis: client disconnected")
                break

            msg_type = message.get("type", "unknown")  # type: ignore[possibly-undefined]

            if msg_type == "websocket.disconnect":
                logger.info("Streaming synthesis: client disconnected")
                disconnected = True
                break

            if "text" in message:
                # Enforce text message size limit
                msg_size = len(message["text"])
                if msg_size > WS_MAX_TEXT_MESSAGE_SIZE:
                    logger.warning(
                        f"Streaming synthesis: text message too large ({msg_size} bytes)"
                    )
                    await websocket.send_json(
                        {"type": "error", "message": "Message too large"}
                    )
                    break

                try:
                    data = json.loads(message["text"])

                    if data.get("type") == "synthesize":
                        text = data.get("text", "")
                        # Enforce per-text size limit
                        if len(text) > WS_MAX_TTS_TEXT_LENGTH:
                            logger.warning(
                                f"Streaming synthesis: text too long ({len(text)} chars)"
                            )
                            await websocket.send_json(
                                {"type": "error", "message": "Text too long"}
                            )
                            continue
                        if text:
                            # Start audio receiver on first text chunk so playback
                            # can begin before the full assistant response completes.
                            if send_task is None:
                                send_task = asyncio.create_task(send_audio())
                            logger.debug(
                                f"Streaming synthesis: forwarding text chunk ({len(text)} chars)"
                            )
                            await synthesizer.send_text(text)

                    elif data.get("type") == "end":
                        logger.info("Streaming synthesis: end signal received")

                        # Ensure receiver is active even if no prior text chunks arrived.
                        if send_task is None:
                            send_task = asyncio.create_task(send_audio())

                        # Signal end of input
                        if hasattr(synthesizer, "flush"):
                            await synthesizer.flush()

                        # Wait for all audio to be sent
                        logger.info(
                            "Streaming synthesis: waiting for audio stream to complete"
                        )
                        try:
                            await asyncio.wait_for(send_task, timeout=60.0)
                        except asyncio.TimeoutError:
                            logger.warning(
                                "Streaming synthesis: timeout waiting for audio"
                            )
                        break

                except json.JSONDecodeError:
                    logger.warning(
                        f"Streaming synthesis: failed to parse JSON: {message.get('text', '')[:100]}"
                    )

    except WebSocketDisconnect:
        logger.debug("Streaming synthesis: client disconnected during synthesis")
    except Exception as e:
        logger.error(f"Streaming synthesis: error: {e}", exc_info=True)
    finally:
        if send_task and not send_task.done():
            logger.info("Streaming synthesis: waiting for send_task to finish")
            try:
                await asyncio.wait_for(send_task, timeout=30.0)
            except asyncio.TimeoutError:
                logger.warning("Streaming synthesis: timeout waiting for send_task")
                send_task.cancel()
                try:
                    await send_task
                except asyncio.CancelledError:
                    pass
            except asyncio.CancelledError:
                pass
        logger.info("Streaming synthesis: handler finished")


async def handle_chunked_synthesis(
    websocket: WebSocket,
    provider: Any,
    first_message: MutableMapping[str, Any] | None = None,
) -> None:
    """Fallback TTS handler using provider.synthesize_stream.

    Args:
        websocket: The WebSocket connection
        provider: Voice provider instance
        first_message: Optional first message already received (used when falling
            back from streaming mode, where the first message was already consumed)
    """
    logger.info("Chunked synthesis: starting handler")
    text_buffer: list[str] = []
    voice: str | None = None
    speed = 1.0

    # Process pre-received message if provided
    pending_message = first_message

    try:
        while True:
            if pending_message is not None:
                message = pending_message
                pending_message = None
            else:
                message = await websocket.receive()
            msg_type = message.get("type", "unknown")

            if msg_type == "websocket.disconnect":
                logger.info("Chunked synthesis: client disconnected")
                break

            if "text" not in message:
                continue

            # Enforce text message size limit
            msg_size = len(message["text"])
            if msg_size > WS_MAX_TEXT_MESSAGE_SIZE:
                logger.warning(
                    f"Chunked synthesis: text message too large ({msg_size} bytes)"
                )
                await websocket.send_json(
                    {"type": "error", "message": "Message too large"}
                )
                break

            try:
                data = json.loads(message["text"])
            except json.JSONDecodeError:
                logger.warning(
                    f"Chunked synthesis: failed to parse JSON: {message.get('text', '')[:100]}"
                )
                continue

            msg_data_type = data.get("type")  # type: ignore[possibly-undefined]
            if msg_data_type == "synthesize":
                text = data.get("text", "")
                # Enforce per-text size limit
                if len(text) > WS_MAX_TTS_TEXT_LENGTH:
                    logger.warning(
                        f"Chunked synthesis: text too long ({len(text)} chars)"
                    )
                    await websocket.send_json(
                        {"type": "error", "message": "Text too long"}
                    )
                    continue
                if text:
                    text_buffer.append(text)
                    logger.debug(
                        f"Chunked synthesis: buffered text ({len(text)} chars), total buffered: {len(text_buffer)} chunks"
                    )
                if isinstance(data.get("voice"), str) and data["voice"]:
                    voice = data["voice"]
                if isinstance(data.get("speed"), (int, float)):
                    speed = float(data["speed"])
            elif msg_data_type == "end":
                logger.info("Chunked synthesis: end signal received")
                full_text = " ".join(text_buffer).strip()
                if not full_text:
                    await websocket.send_json({"type": "audio_done"})
                    logger.info("Chunked synthesis: no text, sent audio_done")
                    break

                chunk_count = 0
                total_bytes = 0
                logger.info(
                    f"Chunked synthesis: sending full text ({len(full_text)} chars)"
                )
                async for audio_chunk in provider.synthesize_stream(
                    full_text, voice=voice, speed=speed
                ):
                    if not audio_chunk:
                        continue
                    chunk_count += 1
                    total_bytes += len(audio_chunk)
                    await websocket.send_bytes(audio_chunk)
                await websocket.send_json({"type": "audio_done"})
                logger.info(
                    f"Chunked synthesis: sent audio_done after {chunk_count} chunks, {total_bytes} bytes"
                )
                break
    except WebSocketDisconnect:
        logger.debug("Chunked synthesis: client disconnected")
    except Exception as e:
        logger.error(f"Chunked synthesis: error: {e}", exc_info=True)
        raise
    finally:
        logger.info("Chunked synthesis: handler finished")


@router.websocket("/synthesize/stream")
async def websocket_synthesize(
    websocket: WebSocket,
    _user: User = Depends(current_user_from_websocket),
) -> None:
    """
    WebSocket endpoint for streaming text-to-speech.

    Protocol:
    - Client sends JSON: {"type": "synthesize", "text": "...", "voice": "...", "speed": 1.0}
    - Server sends binary audio chunks
    - Server sends JSON: {"type": "audio_done"} when synthesis completes
    - Client sends JSON {"type": "end"} to close connection

    Authentication:
        Requires `token` query parameter (e.g., /voice/synthesize/stream?token=xxx).
        Applies same auth checks as HTTP endpoints (verification, role checks).
    """
    logger.info("WebSocket synthesize: connection request received (authenticated)")

    try:
        await websocket.accept()
        logger.info("WebSocket synthesize: connection accepted")
    except Exception as e:
        logger.error(f"WebSocket synthesize: failed to accept connection: {e}")
        return

    streaming_synthesizer: StreamingSynthesizerProtocol | None = None
    provider = None

    try:
        # Get TTS provider
        logger.info("WebSocket synthesize: fetching TTS provider from database")
        engine = get_sqlalchemy_engine()
        with Session(engine) as db_session:
            provider_db = fetch_default_tts_provider(db_session)
            if provider_db is None:
                logger.warning(
                    "WebSocket synthesize: no default TTS provider configured"
                )
                await websocket.send_json(
                    {
                        "type": "error",
                        "message": "No text-to-speech provider configured",
                    }
                )
                return

            if not provider_db.api_key:
                logger.warning("WebSocket synthesize: TTS provider has no API key")
                await websocket.send_json(
                    {
                        "type": "error",
                        "message": "Text-to-speech provider has no API key configured",
                    }
                )
                return

            logger.info(
                f"WebSocket synthesize: creating voice provider: {provider_db.provider_type}"
            )
            try:
                provider = get_voice_provider(provider_db)
                logger.info(
                    f"WebSocket synthesize: voice provider created, streaming TTS supported: {provider.supports_streaming_tts()}"
                )
            except ValueError as e:
                logger.error(
                    f"WebSocket synthesize: failed to create voice provider: {e}"
                )
                await websocket.send_json({"type": "error", "message": str(e)})
                return

        # Use native streaming if provider supports it
        if provider.supports_streaming_tts():
            logger.info("WebSocket synthesize: using native streaming TTS")
            message = None  # Initialize to avoid UnboundLocalError in except block
            try:
                # Wait for initial config message with voice/speed
                message = await websocket.receive()
                voice = None
                speed = 1.0
                if "text" in message:
                    try:
                        data = json.loads(message["text"])
                        voice = data.get("voice")
                        speed = data.get("speed", 1.0)
                    except json.JSONDecodeError:
                        pass

                streaming_synthesizer = await provider.create_streaming_synthesizer(
                    voice=voice, speed=speed
                )
                logger.info(
                    "WebSocket synthesize: streaming synthesizer created successfully"
                )
                await handle_streaming_synthesis(websocket, streaming_synthesizer)
            except Exception as e:
                logger.error(
                    f"WebSocket synthesize: failed to create streaming synthesizer: {e}"
                )
                if VOICE_DISABLE_STREAMING_FALLBACK:
                    await websocket.send_json(
                        {"type": "error", "message": f"Streaming TTS failed: {e}"}
                    )
                    return
                logger.info(
                    "WebSocket synthesize: falling back to chunked TTS synthesis"
                )
                # Pass the first message so it's not lost in the fallback
                await handle_chunked_synthesis(
                    websocket, provider, first_message=message
                )
        else:
            if VOICE_DISABLE_STREAMING_FALLBACK:
                await websocket.send_json(
                    {
                        "type": "error",
                        "message": "Provider doesn't support streaming TTS",
                    }
                )
                return
            logger.info(
                "WebSocket synthesize: using chunked TTS (provider doesn't support streaming)"
            )
            await handle_chunked_synthesis(websocket, provider)

    except WebSocketDisconnect:
        logger.debug("WebSocket synthesize: client disconnected")
    except Exception as e:
        logger.error(f"WebSocket synthesize: unhandled error: {e}", exc_info=True)
        try:
            # Send generic error to avoid leaking sensitive details
            await websocket.send_json(
                {"type": "error", "message": "An unexpected error occurred"}
            )
        except Exception:
            pass
    finally:
        if streaming_synthesizer:
            try:
                await streaming_synthesizer.close()
            except Exception:
                pass
        try:
            await websocket.close()
        except Exception:
            pass
        logger.info("WebSocket synthesize: connection closed")


================================================
FILE: backend/onyx/server/manage/web_search/api.py
================================================
from __future__ import annotations

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Response
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import InternetContentProvider
from onyx.db.models import InternetSearchProvider
from onyx.db.models import User
from onyx.db.web_search import deactivate_web_content_provider
from onyx.db.web_search import deactivate_web_search_provider
from onyx.db.web_search import delete_web_content_provider
from onyx.db.web_search import delete_web_search_provider
from onyx.db.web_search import fetch_web_content_provider_by_name
from onyx.db.web_search import fetch_web_content_provider_by_type
from onyx.db.web_search import fetch_web_content_providers
from onyx.db.web_search import fetch_web_search_provider_by_name
from onyx.db.web_search import fetch_web_search_provider_by_type
from onyx.db.web_search import fetch_web_search_providers
from onyx.db.web_search import set_active_web_content_provider
from onyx.db.web_search import set_active_web_search_provider
from onyx.db.web_search import upsert_web_content_provider
from onyx.db.web_search import upsert_web_search_provider
from onyx.server.manage.web_search.models import WebContentProviderTestRequest
from onyx.server.manage.web_search.models import WebContentProviderUpsertRequest
from onyx.server.manage.web_search.models import WebContentProviderView
from onyx.server.manage.web_search.models import WebSearchProviderTestRequest
from onyx.server.manage.web_search.models import WebSearchProviderUpsertRequest
from onyx.server.manage.web_search.models import WebSearchProviderView
from onyx.tools.tool_implementations.open_url.utils import (
    filter_web_contents_with_no_title_or_content,
)
from onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig
from onyx.tools.tool_implementations.web_search.providers import (
    build_content_provider_from_config,
)
from onyx.tools.tool_implementations.web_search.providers import (
    build_search_provider_from_config,
)
from onyx.tools.tool_implementations.web_search.providers import (
    provider_requires_api_key,
)
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.enums import WebContentProviderType
from shared_configs.enums import WebSearchProviderType

logger = setup_logger()

admin_router = APIRouter(prefix="/admin/web-search")


@admin_router.get("/search-providers", response_model=list[WebSearchProviderView])
def list_search_providers(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[WebSearchProviderView]:
    providers = fetch_web_search_providers(db_session)
    return [
        WebSearchProviderView(
            id=provider.id,
            name=provider.name,
            provider_type=WebSearchProviderType(provider.provider_type),
            is_active=provider.is_active,
            config=provider.config or {},
            has_api_key=bool(provider.api_key),
        )
        for provider in providers
    ]


@admin_router.post("/search-providers", response_model=WebSearchProviderView)
def upsert_search_provider_endpoint(
    request: WebSearchProviderUpsertRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> WebSearchProviderView:
    existing_by_name = fetch_web_search_provider_by_name(request.name, db_session)
    if (
        existing_by_name
        and request.id is not None
        and existing_by_name.id != request.id
    ):
        raise HTTPException(
            status_code=400,
            detail=f"A search provider named '{request.name}' already exists.",
        )

    provider = upsert_web_search_provider(
        provider_id=request.id,
        name=request.name,
        provider_type=request.provider_type,
        api_key=request.api_key,
        api_key_changed=request.api_key_changed,
        config=request.config,
        activate=request.activate,
        db_session=db_session,
    )

    # Sync Exa key of search engine to content provider
    if (
        request.provider_type == WebSearchProviderType.EXA
        and request.api_key_changed
        and request.api_key
    ):
        stmt = (
            insert(InternetContentProvider)
            .values(
                name="Exa",
                provider_type=WebContentProviderType.EXA.value,
                api_key=request.api_key,
                is_active=False,
            )
            .on_conflict_do_update(
                index_elements=["name"],
                set_={"api_key": request.api_key},
            )
        )
        db_session.execute(stmt)
        db_session.flush()

    db_session.commit()
    return WebSearchProviderView(
        id=provider.id,
        name=provider.name,
        provider_type=WebSearchProviderType(provider.provider_type),
        is_active=provider.is_active,
        config=provider.config or {},
        has_api_key=bool(provider.api_key),
    )


@admin_router.delete(
    "/search-providers/{provider_id}", status_code=204, response_class=Response
)
def delete_search_provider(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> Response:
    delete_web_search_provider(provider_id, db_session)
    return Response(status_code=204)


@admin_router.post("/search-providers/{provider_id}/activate")
def activate_search_provider(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> WebSearchProviderView:
    provider = set_active_web_search_provider(
        provider_id=provider_id, db_session=db_session
    )
    db_session.commit()
    return WebSearchProviderView(
        id=provider.id,
        name=provider.name,
        provider_type=WebSearchProviderType(provider.provider_type),
        is_active=provider.is_active,
        config=provider.config or {},
        has_api_key=bool(provider.api_key),
    )


@admin_router.post("/search-providers/{provider_id}/deactivate")
def deactivate_search_provider(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict[str, str]:
    deactivate_web_search_provider(provider_id=provider_id, db_session=db_session)
    db_session.commit()
    return {"status": "ok"}


@admin_router.post("/search-providers/test")
def test_search_provider(
    request: WebSearchProviderTestRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict[str, str]:
    requires_key = provider_requires_api_key(request.provider_type)

    # Determine which API key to use
    api_key = request.api_key
    if request.use_stored_key and requires_key:
        existing_provider = fetch_web_search_provider_by_type(
            request.provider_type, db_session
        )
        if existing_provider is None or not existing_provider.api_key:
            raise HTTPException(
                status_code=400,
                detail="No stored API key found for this provider type.",
            )
        api_key = existing_provider.api_key.get_value(apply_mask=False)

    if requires_key and not api_key:
        raise HTTPException(
            status_code=400,
            detail="API key is required. Either provide api_key or set use_stored_key to true.",
        )

    try:
        provider = build_search_provider_from_config(
            provider_type=request.provider_type,
            api_key=api_key,
            config=request.config or {},
        )
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc

    if provider is None:
        raise HTTPException(
            status_code=400, detail="Unable to build provider configuration."
        )

    # Run the API client's test_connection method to ensure the connection is valid.
    try:
        return provider.test_connection()
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e)) from e


@admin_router.get("/content-providers", response_model=list[WebContentProviderView])
def list_content_providers(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[WebContentProviderView]:
    providers = fetch_web_content_providers(db_session)
    return [
        WebContentProviderView(
            id=provider.id,
            name=provider.name,
            provider_type=WebContentProviderType(provider.provider_type),
            is_active=provider.is_active,
            config=provider.config or WebContentProviderConfig(),
            has_api_key=bool(provider.api_key),
        )
        for provider in providers
    ]


@admin_router.post("/content-providers", response_model=WebContentProviderView)
def upsert_content_provider_endpoint(
    request: WebContentProviderUpsertRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> WebContentProviderView:
    existing_by_name = fetch_web_content_provider_by_name(request.name, db_session)
    if (
        existing_by_name
        and request.id is not None
        and existing_by_name.id != request.id
    ):
        raise HTTPException(
            status_code=400,
            detail=f"A content provider named '{request.name}' already exists.",
        )

    provider = upsert_web_content_provider(
        provider_id=request.id,
        name=request.name,
        provider_type=request.provider_type,
        api_key=request.api_key,
        api_key_changed=request.api_key_changed,
        config=request.config,
        activate=request.activate,
        db_session=db_session,
    )

    # Sync Exa key of content provider to search provider
    if (
        request.provider_type == WebContentProviderType.EXA
        and request.api_key_changed
        and request.api_key
    ):
        stmt = (
            insert(InternetSearchProvider)
            .values(
                name="Exa",
                provider_type=WebSearchProviderType.EXA.value,
                api_key=request.api_key,
                is_active=False,
            )
            .on_conflict_do_update(
                index_elements=["name"],
                set_={"api_key": request.api_key},
            )
        )
        db_session.execute(stmt)
        db_session.flush()

    db_session.commit()
    return WebContentProviderView(
        id=provider.id,
        name=provider.name,
        provider_type=WebContentProviderType(provider.provider_type),
        is_active=provider.is_active,
        config=provider.config or WebContentProviderConfig(),
        has_api_key=bool(provider.api_key),
    )


@admin_router.delete(
    "/content-providers/{provider_id}", status_code=204, response_class=Response
)
def delete_content_provider(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> Response:
    delete_web_content_provider(provider_id, db_session)
    return Response(status_code=204)


@admin_router.post("/content-providers/{provider_id}/activate")
def activate_content_provider(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> WebContentProviderView:
    provider = set_active_web_content_provider(
        provider_id=provider_id, db_session=db_session
    )
    db_session.commit()
    return WebContentProviderView(
        id=provider.id,
        name=provider.name,
        provider_type=WebContentProviderType(provider.provider_type),
        is_active=provider.is_active,
        config=provider.config or WebContentProviderConfig(),
        has_api_key=bool(provider.api_key),
    )


@admin_router.post("/content-providers/reset-default")
def reset_content_provider_default(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict[str, str]:
    providers = fetch_web_content_providers(db_session)
    active_ids = [provider.id for provider in providers if provider.is_active]

    for provider_id in active_ids:
        deactivate_web_content_provider(provider_id=provider_id, db_session=db_session)
        db_session.commit()

    return {"status": "ok"}


@admin_router.post("/content-providers/{provider_id}/deactivate")
def deactivate_content_provider(
    provider_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict[str, str]:
    deactivate_web_content_provider(provider_id=provider_id, db_session=db_session)
    db_session.commit()
    return {"status": "ok"}


@admin_router.post("/content-providers/test")
def test_content_provider(
    request: WebContentProviderTestRequest,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> dict[str, str]:
    # Determine which API key to use
    api_key = request.api_key
    if request.use_stored_key:
        existing_provider = fetch_web_content_provider_by_type(
            request.provider_type, db_session
        )
        if existing_provider is None or not existing_provider.api_key:
            raise HTTPException(
                status_code=400,
                detail="No stored API key found for this provider type.",
            )
        if MULTI_TENANT:
            stored_base_url = (
                existing_provider.config.base_url if existing_provider.config else None
            )
            request_base_url = request.config.base_url
            if request_base_url != stored_base_url:
                raise HTTPException(
                    status_code=400,
                    detail="Base URL cannot differ from stored provider when using stored API key",
                )

        api_key = existing_provider.api_key.get_value(apply_mask=False)

    if not api_key:
        raise HTTPException(
            status_code=400,
            detail="API key is required. Either provide api_key or set use_stored_key to true.",
        )

    try:
        provider = build_content_provider_from_config(
            provider_type=request.provider_type,
            api_key=api_key,
            config=request.config,
        )
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc

    if provider is None:
        raise HTTPException(
            status_code=400, detail="Unable to build provider configuration."
        )

    # Actually test the API key by making a real content fetch call
    try:
        test_url = "https://example.com"
        test_results = filter_web_contents_with_no_title_or_content(
            list(provider.contents([test_url]))
        )
        if not test_results or not any(
            result.scrape_successful for result in test_results
        ):
            raise HTTPException(
                status_code=400,
                detail="API key validation failed: content fetch returned no results.",
            )
    except HTTPException:
        raise
    except Exception as e:
        error_msg = str(e)
        if (
            "api" in error_msg.lower()
            or "key" in error_msg.lower()
            or "auth" in error_msg.lower()
        ):
            raise HTTPException(
                status_code=400,
                detail=f"Invalid API key: {error_msg}",
            ) from e
        raise HTTPException(
            status_code=400,
            detail=f"API key validation failed: {error_msg}",
        ) from e

    logger.info(
        f"Web content provider test succeeded for {request.provider_type.value}."
    )
    return {"status": "ok"}


================================================
FILE: backend/onyx/server/manage/web_search/models.py
================================================
from typing import Any

from pydantic import BaseModel
from pydantic import Field

from onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig
from shared_configs.enums import WebContentProviderType
from shared_configs.enums import WebSearchProviderType


class WebSearchProviderView(BaseModel):
    id: int
    name: str
    provider_type: WebSearchProviderType
    is_active: bool
    config: dict[str, str] | None
    has_api_key: bool = Field(
        default=False,
        description="Indicates whether an API key is stored for this provider.",
    )


class WebSearchProviderUpsertRequest(BaseModel):
    id: int | None = Field(default=None, description="Existing provider ID to update.")
    name: str
    provider_type: WebSearchProviderType
    config: dict[str, str] | None = None
    api_key: str | None = Field(
        default=None,
        description="API key for the provider. Only required when creating or updating credentials.",
    )
    api_key_changed: bool = Field(
        default=False,
        description="Set to true when providing a new API key for an existing provider.",
    )
    activate: bool = Field(
        default=False,
        description="If true, sets this provider as the active one after upsert.",
    )


class WebContentProviderView(BaseModel):
    id: int
    name: str
    provider_type: WebContentProviderType
    is_active: bool
    config: WebContentProviderConfig | None
    has_api_key: bool = Field(default=False)


class WebContentProviderUpsertRequest(BaseModel):
    id: int | None = None
    name: str
    provider_type: WebContentProviderType
    config: WebContentProviderConfig | None = None
    api_key: str | None = None
    api_key_changed: bool = False
    activate: bool = False


class WebSearchProviderTestRequest(BaseModel):
    provider_type: WebSearchProviderType
    api_key: str | None = Field(
        default=None,
        description="API key for testing. If not provided, use_stored_key must be true.",
    )
    use_stored_key: bool = Field(
        default=False,
        description="If true, use the stored API key for this provider type instead of api_key.",
    )
    config: dict[str, Any] | None = None


class WebContentProviderTestRequest(BaseModel):
    provider_type: WebContentProviderType
    api_key: str | None = Field(
        default=None,
        description="API key for testing. If not provided, use_stored_key must be true.",
    )
    use_stored_key: bool = Field(
        default=False,
        description="If true, use the stored API key for this provider type instead of api_key.",
    )
    config: WebContentProviderConfig


================================================
FILE: backend/onyx/server/metrics/__init__.py
================================================


================================================
FILE: backend/onyx/server/metrics/celery_task_metrics.py
================================================
"""Generic Celery task lifecycle Prometheus metrics.

Provides signal handlers that track task started/completed/failed counts,
active task gauge, task duration histograms, and retry/reject/revoke counts.
These fire for ALL tasks on the worker — no per-connector enrichment
(see indexing_task_metrics.py for that).

Usage in a worker app module:
    from onyx.server.metrics.celery_task_metrics import (
        on_celery_task_prerun,
        on_celery_task_postrun,
        on_celery_task_retry,
        on_celery_task_revoked,
        on_celery_task_rejected,
    )
    # Call from the worker's existing signal handlers
"""

import threading
import time

from celery import Task
from prometheus_client import Counter
from prometheus_client import Gauge
from prometheus_client import Histogram

from onyx.utils.logger import setup_logger

logger = setup_logger()

TASK_STARTED = Counter(
    "onyx_celery_task_started_total",
    "Total Celery tasks started",
    ["task_name", "queue"],
)

TASK_COMPLETED = Counter(
    "onyx_celery_task_completed_total",
    "Total Celery tasks completed",
    ["task_name", "queue", "outcome"],
)

TASK_DURATION = Histogram(
    "onyx_celery_task_duration_seconds",
    "Celery task execution duration in seconds",
    ["task_name", "queue"],
    buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
)

TASKS_ACTIVE = Gauge(
    "onyx_celery_tasks_active",
    "Currently executing Celery tasks",
    ["task_name", "queue"],
)

TASK_RETRIED = Counter(
    "onyx_celery_task_retried_total",
    "Total Celery tasks retried",
    ["task_name", "queue"],
)

TASK_REVOKED = Counter(
    "onyx_celery_task_revoked_total",
    "Total Celery tasks revoked (cancelled)",
    ["task_name"],
)

TASK_REJECTED = Counter(
    "onyx_celery_task_rejected_total",
    "Total Celery tasks rejected by worker",
    ["task_name"],
)

# task_id → (monotonic start time, metric labels)
_task_start_times: dict[str, tuple[float, dict[str, str]]] = {}

# Lock protecting _task_start_times — prerun, postrun, and eviction may
# run concurrently on thread-pool workers.
_task_start_times_lock = threading.Lock()

# Entries older than this are evicted on each prerun to prevent unbounded
# growth when tasks are killed (SIGTERM, OOM) and postrun never fires.
_MAX_START_TIME_AGE_SECONDS = 3600  # 1 hour


def _evict_stale_start_times() -> None:
    """Remove _task_start_times entries older than _MAX_START_TIME_AGE_SECONDS.

    Must be called while holding _task_start_times_lock.
    """
    now = time.monotonic()
    stale_ids = [
        tid
        for tid, (start, _labels) in _task_start_times.items()
        if now - start > _MAX_START_TIME_AGE_SECONDS
    ]
    for tid in stale_ids:
        entry = _task_start_times.pop(tid, None)
        if entry is not None:
            _labels = entry[1]
            # Decrement active gauge for evicted tasks — these tasks were
            # started but never completed (killed, OOM, etc.).
            active_gauge = TASKS_ACTIVE.labels(**_labels)
            if active_gauge._value.get() > 0:
                active_gauge.dec()


def _get_task_labels(task: Task) -> dict[str, str]:
    """Extract task_name and queue labels from a Celery Task instance."""
    task_name = task.name or "unknown"
    queue = "unknown"
    try:
        delivery_info = task.request.delivery_info
        if delivery_info:
            queue = delivery_info.get("routing_key") or "unknown"
    except AttributeError:
        pass
    return {"task_name": task_name, "queue": queue}


def on_celery_task_prerun(
    task_id: str | None,
    task: Task | None,
) -> None:
    """Record task start. Call from the worker's task_prerun signal handler."""
    if task is None or task_id is None:
        return

    try:
        labels = _get_task_labels(task)
        TASK_STARTED.labels(**labels).inc()
        TASKS_ACTIVE.labels(**labels).inc()
        with _task_start_times_lock:
            _evict_stale_start_times()
            _task_start_times[task_id] = (time.monotonic(), labels)
    except Exception:
        logger.debug("Failed to record celery task prerun metrics", exc_info=True)


def on_celery_task_postrun(
    task_id: str | None,
    task: Task | None,
    state: str | None,
) -> None:
    """Record task completion. Call from the worker's task_postrun signal handler."""
    if task is None or task_id is None:
        return

    try:
        labels = _get_task_labels(task)
        outcome = "success" if state == "SUCCESS" else "failure"
        TASK_COMPLETED.labels(**labels, outcome=outcome).inc()

        # Guard against going below 0 if postrun fires without a matching
        # prerun (e.g. after a worker restart or stale entry eviction).
        active_gauge = TASKS_ACTIVE.labels(**labels)
        if active_gauge._value.get() > 0:
            active_gauge.dec()

        with _task_start_times_lock:
            entry = _task_start_times.pop(task_id, None)
        if entry is not None:
            start_time, _stored_labels = entry
            TASK_DURATION.labels(**labels).observe(time.monotonic() - start_time)
    except Exception:
        logger.debug("Failed to record celery task postrun metrics", exc_info=True)


def on_celery_task_retry(
    _task_id: str | None,
    task: Task | None,
) -> None:
    """Record task retry. Call from the worker's task_retry signal handler."""
    if task is None:
        return
    try:
        labels = _get_task_labels(task)
        TASK_RETRIED.labels(**labels).inc()
    except Exception:
        logger.debug("Failed to record celery task retry metrics", exc_info=True)


def on_celery_task_revoked(
    _task_id: str | None,
    task_name: str | None = None,
) -> None:
    """Record task revocation. The revoked signal doesn't provide a Task
    instance, only the task name via sender."""
    if task_name is None:
        return
    try:
        TASK_REVOKED.labels(task_name=task_name).inc()
    except Exception:
        logger.debug("Failed to record celery task revoked metrics", exc_info=True)


def on_celery_task_rejected(
    _task_id: str | None,
    task_name: str | None = None,
) -> None:
    """Record task rejection."""
    if task_name is None:
        return
    try:
        TASK_REJECTED.labels(task_name=task_name).inc()
    except Exception:
        logger.debug("Failed to record celery task rejected metrics", exc_info=True)


================================================
FILE: backend/onyx/server/metrics/indexing_pipeline.py
================================================
"""Prometheus collectors for Celery queue depths and indexing pipeline state.

These collectors query Redis and Postgres at scrape time (the Collector pattern),
so metrics are always fresh when Prometheus scrapes /metrics. They run inside the
monitoring celery worker which already has Redis and DB access.

To avoid hammering Redis/Postgres on every 15s scrape, results are cached with
a configurable TTL (default 30s). This means metrics may be up to TTL seconds
stale, which is fine for monitoring dashboards.
"""

import json
import threading
import time
from datetime import datetime
from datetime import timezone
from typing import Any

from prometheus_client.core import GaugeMetricFamily
from prometheus_client.registry import Collector
from redis import Redis

from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
from onyx.configs.constants import OnyxCeleryQueues
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Default cache TTL in seconds. Scrapes hitting within this window return
# the previous result without re-querying Redis/Postgres.
_DEFAULT_CACHE_TTL = 30.0

_QUEUE_LABEL_MAP: dict[str, str] = {
    OnyxCeleryQueues.PRIMARY: "primary",
    OnyxCeleryQueues.DOCPROCESSING: "docprocessing",
    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING: "docfetching",
    OnyxCeleryQueues.VESPA_METADATA_SYNC: "vespa_metadata_sync",
    OnyxCeleryQueues.CONNECTOR_DELETION: "connector_deletion",
    OnyxCeleryQueues.CONNECTOR_PRUNING: "connector_pruning",
    OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC: "permissions_sync",
    OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC: "external_group_sync",
    OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT: "permissions_upsert",
    OnyxCeleryQueues.CONNECTOR_HIERARCHY_FETCHING: "hierarchy_fetching",
    OnyxCeleryQueues.LLM_MODEL_UPDATE: "llm_model_update",
    OnyxCeleryQueues.CHECKPOINT_CLEANUP: "checkpoint_cleanup",
    OnyxCeleryQueues.INDEX_ATTEMPT_CLEANUP: "index_attempt_cleanup",
    OnyxCeleryQueues.CSV_GENERATION: "csv_generation",
    OnyxCeleryQueues.USER_FILE_PROCESSING: "user_file_processing",
    OnyxCeleryQueues.USER_FILE_PROJECT_SYNC: "user_file_project_sync",
    OnyxCeleryQueues.USER_FILE_DELETE: "user_file_delete",
    OnyxCeleryQueues.MONITORING: "monitoring",
    OnyxCeleryQueues.SANDBOX: "sandbox",
    OnyxCeleryQueues.OPENSEARCH_MIGRATION: "opensearch_migration",
}

# Queues where prefetched (unacked) task counts are meaningful
_UNACKED_QUEUES: list[str] = [
    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,
    OnyxCeleryQueues.DOCPROCESSING,
]


class _CachedCollector(Collector):
    """Base collector with TTL-based caching.

    Subclasses implement ``_collect_fresh()`` to query the actual data source.
    The base ``collect()`` returns cached results if the TTL hasn't expired,
    avoiding repeated queries when Prometheus scrapes frequently.
    """

    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
        self._cache_ttl = cache_ttl
        self._cached_result: list[GaugeMetricFamily] | None = None
        self._last_collect_time: float = 0.0
        self._lock = threading.Lock()

    def collect(self) -> list[GaugeMetricFamily]:
        with self._lock:
            now = time.monotonic()
            if (
                now - self._last_collect_time < self._cache_ttl
                and self._cached_result is not None
            ):
                return self._cached_result

            try:
                result = self._collect_fresh()
                self._cached_result = result
                self._last_collect_time = now
                return result
            except Exception:
                logger.exception(f"Error in {type(self).__name__}.collect()")
                # Return stale cache on error rather than nothing — avoids
                # metrics disappearing during transient failures.
                return self._cached_result if self._cached_result is not None else []

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
        raise NotImplementedError

    def describe(self) -> list[GaugeMetricFamily]:
        return []


class QueueDepthCollector(_CachedCollector):
    """Reads Celery queue lengths from the broker Redis on each scrape."""

    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
        super().__init__(cache_ttl)
        self._celery_app: Any | None = None

    def set_celery_app(self, app: Any) -> None:
        """Set the Celery app for broker Redis access."""
        self._celery_app = app

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
        if self._celery_app is None:
            return []

        from onyx.background.celery.celery_redis import celery_get_broker_client

        redis_client = celery_get_broker_client(self._celery_app)

        depth = GaugeMetricFamily(
            "onyx_queue_depth",
            "Number of tasks waiting in Celery queue",
            labels=["queue"],
        )
        unacked = GaugeMetricFamily(
            "onyx_queue_unacked",
            "Number of prefetched (unacked) tasks for queue",
            labels=["queue"],
        )
        queue_age = GaugeMetricFamily(
            "onyx_queue_oldest_task_age_seconds",
            "Age of the oldest task in the queue (seconds since enqueue)",
            labels=["queue"],
        )

        now = time.time()

        for queue_name, label in _QUEUE_LABEL_MAP.items():
            length = celery_get_queue_length(queue_name, redis_client)
            depth.add_metric([label], length)

            # Peek at the oldest message to get its age
            if length > 0:
                age = self._get_oldest_message_age(redis_client, queue_name, now)
                if age is not None:
                    queue_age.add_metric([label], age)

        for queue_name in _UNACKED_QUEUES:
            label = _QUEUE_LABEL_MAP[queue_name]
            task_ids = celery_get_unacked_task_ids(queue_name, redis_client)
            unacked.add_metric([label], len(task_ids))

        return [depth, unacked, queue_age]

    @staticmethod
    def _get_oldest_message_age(
        redis_client: Redis, queue_name: str, now: float
    ) -> float | None:
        """Peek at the oldest (tail) message in a Redis list queue
        and extract its timestamp to compute age.

        Note: If the Celery message contains neither ``properties.timestamp``
        nor ``headers.timestamp``, no age metric is emitted for this queue.
        This can happen with custom task producers or non-standard Celery
        protocol versions. The metric will simply be absent rather than
        inaccurate, which is the safest behavior for alerting.
        """
        try:
            raw: bytes | str | None = redis_client.lindex(queue_name, -1)  # type: ignore[assignment]
            if raw is None:
                return None
            msg = json.loads(raw)
            # Check for ETA tasks first — they are intentionally delayed,
            # so reporting their queue age would be misleading.
            headers = msg.get("headers", {})
            if headers.get("eta") is not None:
                return None
            # Celery v2 protocol: timestamp in properties
            props = msg.get("properties", {})
            ts = props.get("timestamp")
            if ts is not None:
                return now - float(ts)
            # Fallback: some Celery configurations place the timestamp in
            # headers instead of properties.
            ts = headers.get("timestamp")
            if ts is not None:
                return now - float(ts)
        except Exception:
            pass
        return None


class IndexAttemptCollector(_CachedCollector):
    """Queries Postgres for index attempt state on each scrape."""

    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
        super().__init__(cache_ttl)
        self._configured: bool = False
        self._terminal_statuses: list = []

    def configure(self) -> None:
        """Call once DB engine is initialized."""
        from onyx.db.enums import IndexingStatus

        self._terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
        self._configured = True

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
        if not self._configured:
            return []

        from onyx.db.engine.sql_engine import get_session_with_current_tenant
        from onyx.db.engine.tenant_utils import get_all_tenant_ids
        from onyx.db.index_attempt import get_active_index_attempts_for_metrics
        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

        attempts_gauge = GaugeMetricFamily(
            "onyx_index_attempts_active",
            "Number of non-terminal index attempts",
            labels=[
                "status",
                "source",
                "tenant_id",
                "connector_name",
                "cc_pair_id",
            ],
        )

        tenant_ids = get_all_tenant_ids()

        for tid in tenant_ids:
            # Defensive guard — get_all_tenant_ids() should never yield None,
            # but we guard here for API stability in case the contract changes.
            if tid is None:
                continue
            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
            try:
                with get_session_with_current_tenant() as session:
                    rows = get_active_index_attempts_for_metrics(session)

                    for status, source, cc_id, cc_name, count in rows:
                        name_val = cc_name or f"cc_pair_{cc_id}"
                        attempts_gauge.add_metric(
                            [
                                status.value,
                                source.value,
                                tid,
                                name_val,
                                str(cc_id),
                            ],
                            count,
                        )
            finally:
                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

        return [attempts_gauge]


class ConnectorHealthCollector(_CachedCollector):
    """Queries Postgres for connector health state on each scrape."""

    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
        super().__init__(cache_ttl)
        self._configured: bool = False

    def configure(self) -> None:
        """Call once DB engine is initialized."""
        self._configured = True

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
        if not self._configured:
            return []

        from onyx.db.connector_credential_pair import (
            get_connector_health_for_metrics,
        )
        from onyx.db.engine.sql_engine import get_session_with_current_tenant
        from onyx.db.engine.tenant_utils import get_all_tenant_ids
        from onyx.db.index_attempt import get_docs_indexed_by_cc_pair
        from onyx.db.index_attempt import get_failed_attempt_counts_by_cc_pair
        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

        staleness_gauge = GaugeMetricFamily(
            "onyx_connector_last_success_age_seconds",
            "Seconds since last successful index for this connector",
            labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
        )
        error_state_gauge = GaugeMetricFamily(
            "onyx_connector_in_error_state",
            "Whether the connector is in a repeated error state (1=yes, 0=no)",
            labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
        )
        by_status_gauge = GaugeMetricFamily(
            "onyx_connectors_by_status",
            "Number of connectors grouped by status",
            labels=["tenant_id", "status"],
        )
        error_total_gauge = GaugeMetricFamily(
            "onyx_connectors_in_error_total",
            "Total number of connectors in repeated error state",
            labels=["tenant_id"],
        )
        per_connector_labels = [
            "tenant_id",
            "source",
            "cc_pair_id",
            "connector_name",
        ]
        docs_success_gauge = GaugeMetricFamily(
            "onyx_connector_docs_indexed",
            "Total new documents indexed (90-day rolling sum) per connector",
            labels=per_connector_labels,
        )
        docs_error_gauge = GaugeMetricFamily(
            "onyx_connector_error_count",
            "Total number of failed index attempts per connector",
            labels=per_connector_labels,
        )

        now = datetime.now(tz=timezone.utc)
        tenant_ids = get_all_tenant_ids()

        for tid in tenant_ids:
            # Defensive guard — get_all_tenant_ids() should never yield None,
            # but we guard here for API stability in case the contract changes.
            if tid is None:
                continue
            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
            try:
                with get_session_with_current_tenant() as session:
                    pairs = get_connector_health_for_metrics(session)
                    error_counts_by_cc = get_failed_attempt_counts_by_cc_pair(session)
                    docs_by_cc = get_docs_indexed_by_cc_pair(session)

                    status_counts: dict[str, int] = {}
                    error_count = 0

                    for (
                        cc_id,
                        status,
                        in_error,
                        last_success,
                        cc_name,
                        source,
                    ) in pairs:
                        cc_id_str = str(cc_id)
                        source_val = source.value
                        name_val = cc_name or f"cc_pair_{cc_id}"
                        label_vals = [tid, source_val, cc_id_str, name_val]

                        if last_success is not None:
                            # Both `now` and `last_success` are timezone-aware
                            # (the DB column uses DateTime(timezone=True)),
                            # so subtraction is safe.
                            age = (now - last_success).total_seconds()
                            staleness_gauge.add_metric(label_vals, age)

                        error_state_gauge.add_metric(
                            label_vals,
                            1.0 if in_error else 0.0,
                        )
                        if in_error:
                            error_count += 1

                        docs_success_gauge.add_metric(
                            label_vals,
                            docs_by_cc.get(cc_id, 0),
                        )

                        docs_error_gauge.add_metric(
                            label_vals,
                            error_counts_by_cc.get(cc_id, 0),
                        )

                        status_val = status.value
                        status_counts[status_val] = status_counts.get(status_val, 0) + 1

                    for status_val, count in status_counts.items():
                        by_status_gauge.add_metric([tid, status_val], count)

                    error_total_gauge.add_metric([tid], error_count)
            finally:
                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

        return [
            staleness_gauge,
            error_state_gauge,
            by_status_gauge,
            error_total_gauge,
            docs_success_gauge,
            docs_error_gauge,
        ]


class RedisHealthCollector(_CachedCollector):
    """Collects Redis server health metrics (memory, clients, etc.)."""

    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
        super().__init__(cache_ttl)
        self._celery_app: Any | None = None

    def set_celery_app(self, app: Any) -> None:
        """Set the Celery app for broker Redis access."""
        self._celery_app = app

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
        if self._celery_app is None:
            return []

        from onyx.background.celery.celery_redis import celery_get_broker_client

        redis_client = celery_get_broker_client(self._celery_app)

        memory_used = GaugeMetricFamily(
            "onyx_redis_memory_used_bytes",
            "Redis used memory in bytes",
        )
        memory_peak = GaugeMetricFamily(
            "onyx_redis_memory_peak_bytes",
            "Redis peak used memory in bytes",
        )
        memory_frag = GaugeMetricFamily(
            "onyx_redis_memory_fragmentation_ratio",
            "Redis memory fragmentation ratio (>1.5 indicates fragmentation)",
        )
        connected_clients = GaugeMetricFamily(
            "onyx_redis_connected_clients",
            "Number of connected Redis clients",
        )

        try:
            mem_info: dict = redis_client.info("memory")  # type: ignore[assignment]
            memory_used.add_metric([], mem_info.get("used_memory", 0))
            memory_peak.add_metric([], mem_info.get("used_memory_peak", 0))
            frag = mem_info.get("mem_fragmentation_ratio")
            if frag is not None:
                memory_frag.add_metric([], frag)

            client_info: dict = redis_client.info("clients")  # type: ignore[assignment]
            connected_clients.add_metric([], client_info.get("connected_clients", 0))
        except Exception:
            logger.debug("Failed to collect Redis health metrics", exc_info=True)

        return [memory_used, memory_peak, memory_frag, connected_clients]


class WorkerHeartbeatMonitor:
    """Monitors Celery worker health via the event stream.

    Subscribes to ``worker-heartbeat``, ``worker-online``, and
    ``worker-offline`` events via a single persistent connection.
    Runs in a daemon thread started once during worker setup.
    """

    # Consider a worker down if no heartbeat received for this long.
    _HEARTBEAT_TIMEOUT_SECONDS = 120.0

    def __init__(self, celery_app: Any) -> None:
        self._app = celery_app
        self._worker_last_seen: dict[str, float] = {}
        self._lock = threading.Lock()
        self._running = False
        self._thread: threading.Thread | None = None

    def start(self) -> None:
        """Start the background event listener thread.

        Safe to call multiple times — only starts one thread.
        """
        if self._thread is not None and self._thread.is_alive():
            return
        self._running = True
        self._thread = threading.Thread(target=self._listen, daemon=True)
        self._thread.start()
        logger.info("WorkerHeartbeatMonitor started")

    def stop(self) -> None:
        self._running = False

    def _listen(self) -> None:
        """Background loop: connect to event stream and process heartbeats."""
        while self._running:
            try:
                with self._app.connection() as conn:
                    recv = self._app.events.Receiver(
                        conn,
                        handlers={
                            "worker-heartbeat": self._on_heartbeat,
                            "worker-online": self._on_heartbeat,
                            "worker-offline": self._on_offline,
                        },
                    )
                    recv.capture(
                        limit=None, timeout=self._HEARTBEAT_TIMEOUT_SECONDS, wakeup=True
                    )
            except Exception:
                if self._running:
                    logger.debug(
                        "Heartbeat listener disconnected, reconnecting in 5s",
                        exc_info=True,
                    )
                    time.sleep(5.0)
            else:
                # capture() returned normally (timeout with no events); reconnect
                if self._running:
                    logger.debug("Heartbeat capture timed out, reconnecting")
                    time.sleep(5.0)

    def _on_heartbeat(self, event: dict[str, Any]) -> None:
        hostname = event.get("hostname")
        if hostname:
            with self._lock:
                self._worker_last_seen[hostname] = time.monotonic()

    def _on_offline(self, event: dict[str, Any]) -> None:
        hostname = event.get("hostname")
        if hostname:
            with self._lock:
                self._worker_last_seen.pop(hostname, None)

    def get_worker_status(self) -> dict[str, bool]:
        """Return {hostname: is_alive} for all known workers.

        Thread-safe. Called by WorkerHealthCollector on each scrape.
        Also prunes workers that have been dead longer than 2x the
        heartbeat timeout to prevent unbounded growth.
        """
        now = time.monotonic()
        prune_threshold = self._HEARTBEAT_TIMEOUT_SECONDS * 2
        with self._lock:
            # Prune workers that have been gone for 2x the timeout
            stale = [
                h
                for h, ts in self._worker_last_seen.items()
                if (now - ts) > prune_threshold
            ]
            for h in stale:
                del self._worker_last_seen[h]

            result: dict[str, bool] = {}
            for hostname, last_seen in self._worker_last_seen.items():
                alive = (now - last_seen) < self._HEARTBEAT_TIMEOUT_SECONDS
                result[hostname] = alive
            return result


class WorkerHealthCollector(_CachedCollector):
    """Collects Celery worker health from the heartbeat monitor.

    Reads worker status from ``WorkerHeartbeatMonitor`` which listens
    to the Celery event stream via a single persistent connection.
    """

    def __init__(self, cache_ttl: float = 30.0) -> None:
        super().__init__(cache_ttl)
        self._monitor: WorkerHeartbeatMonitor | None = None

    def set_monitor(self, monitor: WorkerHeartbeatMonitor) -> None:
        """Set the heartbeat monitor instance."""
        self._monitor = monitor

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
        if self._monitor is None:
            return []

        active_workers = GaugeMetricFamily(
            "onyx_celery_active_worker_count",
            "Number of active Celery workers with recent heartbeats",
        )
        worker_up = GaugeMetricFamily(
            "onyx_celery_worker_up",
            "Whether a specific Celery worker is alive (1=up, 0=down)",
            labels=["worker"],
        )

        try:
            status = self._monitor.get_worker_status()
            alive_count = sum(1 for alive in status.values() if alive)
            active_workers.add_metric([], alive_count)

            for hostname in sorted(status):
                # Use short name (before @) for single-host deployments,
                # full hostname when multiple hosts share a worker type.
                label = hostname.split("@")[0]
                worker_up.add_metric([label], 1 if status[hostname] else 0)
        except Exception:
            logger.debug("Failed to collect worker health metrics", exc_info=True)

        return [active_workers, worker_up]


================================================
FILE: backend/onyx/server/metrics/indexing_pipeline_setup.py
================================================
"""Setup function for indexing pipeline Prometheus collectors.

Called once by the monitoring celery worker after Redis and DB are ready.
"""

from celery import Celery
from prometheus_client.registry import REGISTRY

from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
from onyx.server.metrics.indexing_pipeline import RedisHealthCollector
from onyx.server.metrics.indexing_pipeline import WorkerHealthCollector
from onyx.server.metrics.indexing_pipeline import WorkerHeartbeatMonitor
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Module-level singletons — these are lightweight objects (no connections or DB
# state) until configure() / set_celery_app() is called. Keeping them at
# module level ensures they survive the lifetime of the worker process and are
# only registered with the Prometheus registry once.
_queue_collector = QueueDepthCollector()
_attempt_collector = IndexAttemptCollector()
_connector_collector = ConnectorHealthCollector()
_redis_health_collector = RedisHealthCollector()
_worker_health_collector = WorkerHealthCollector()
_heartbeat_monitor: WorkerHeartbeatMonitor | None = None


def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
    """Register all indexing pipeline collectors with the default registry.

    Args:
        celery_app: The Celery application instance. Used to obtain a
            broker Redis client on each scrape for queue depth metrics.
    """
    _queue_collector.set_celery_app(celery_app)
    _redis_health_collector.set_celery_app(celery_app)

    # Start the heartbeat monitor daemon thread — uses a single persistent
    # connection to receive worker-heartbeat events.
    # Module-level singleton prevents duplicate threads on re-entry.
    global _heartbeat_monitor
    if _heartbeat_monitor is None:
        _heartbeat_monitor = WorkerHeartbeatMonitor(celery_app)
        _heartbeat_monitor.start()
    _worker_health_collector.set_monitor(_heartbeat_monitor)

    _attempt_collector.configure()
    _connector_collector.configure()

    for collector in (
        _queue_collector,
        _attempt_collector,
        _connector_collector,
        _redis_health_collector,
        _worker_health_collector,
    ):
        try:
            REGISTRY.register(collector)
        except ValueError:
            logger.debug("Collector already registered: %s", type(collector).__name__)


================================================
FILE: backend/onyx/server/metrics/indexing_task_metrics.py
================================================
"""Per-connector Prometheus metrics for indexing tasks.

Enriches the two primary indexing tasks (docfetching_proxy_task and
docprocessing_task) with connector-level labels: source, tenant_id,
and cc_pair_id.

Note: connector_name is intentionally excluded from push-based per-task
counters because it is a user-defined free-form string that can create
unbounded cardinality. The pull-based collectors on the monitoring worker
(see indexing_pipeline.py) include connector_name since they have bounded
cardinality (one series per connector, not per task execution).

Uses an in-memory cache for cc_pair_id → (source, name) lookups.
Connectors never change source type, and names change rarely, so the
cache is safe to hold for the worker's lifetime.

Usage in a worker app module:
    from onyx.server.metrics.indexing_task_metrics import (
        on_indexing_task_prerun,
        on_indexing_task_postrun,
    )
"""

import threading
import time
from dataclasses import dataclass

from celery import Task
from prometheus_client import Counter
from prometheus_client import Histogram

from onyx.configs.constants import OnyxCeleryTask
from onyx.server.metrics.celery_task_metrics import _MAX_START_TIME_AGE_SECONDS
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()


@dataclass(frozen=True)
class ConnectorInfo:
    """Cached connector metadata for metric labels."""

    source: str
    name: str


_UNKNOWN_CONNECTOR = ConnectorInfo(source="unknown", name="unknown")

# (tenant_id, cc_pair_id) → ConnectorInfo (populated on first encounter).
# Keyed by tenant to avoid cross-tenant cache poisoning in multi-tenant
# deployments where different tenants can share the same cc_pair_id value.
_connector_cache: dict[tuple[str, int], ConnectorInfo] = {}

# Lock protecting _connector_cache — multiple thread-pool workers may
# resolve connectors concurrently.
_connector_cache_lock = threading.Lock()

# Only enrich these task types with per-connector labels
_INDEXING_TASK_NAMES: frozenset[str] = frozenset(
    {
        OnyxCeleryTask.CONNECTOR_DOC_FETCHING_TASK,
        OnyxCeleryTask.DOCPROCESSING_TASK,
    }
)

# connector_name is intentionally excluded — see module docstring.
INDEXING_TASK_STARTED = Counter(
    "onyx_indexing_task_started_total",
    "Indexing tasks started per connector",
    ["task_name", "source", "tenant_id", "cc_pair_id"],
)

INDEXING_TASK_COMPLETED = Counter(
    "onyx_indexing_task_completed_total",
    "Indexing tasks completed per connector",
    [
        "task_name",
        "source",
        "tenant_id",
        "cc_pair_id",
        "outcome",
    ],
)

INDEXING_TASK_DURATION = Histogram(
    "onyx_indexing_task_duration_seconds",
    "Indexing task duration by connector type",
    ["task_name", "source", "tenant_id"],
    buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
)

# task_id → monotonic start time (for indexing tasks only)
_indexing_start_times: dict[str, float] = {}

# Lock protecting _indexing_start_times — prerun, postrun, and eviction may
# run concurrently on thread-pool workers.
_indexing_start_times_lock = threading.Lock()


def _evict_stale_start_times() -> None:
    """Remove _indexing_start_times entries older than _MAX_START_TIME_AGE_SECONDS.

    Must be called while holding _indexing_start_times_lock.
    """
    now = time.monotonic()
    stale_ids = [
        tid
        for tid, start in _indexing_start_times.items()
        if now - start > _MAX_START_TIME_AGE_SECONDS
    ]
    for tid in stale_ids:
        _indexing_start_times.pop(tid, None)


def _resolve_connector(cc_pair_id: int) -> ConnectorInfo:
    """Resolve cc_pair_id to ConnectorInfo, using cache when possible.

    On cache miss, does a single DB query with eager connector load.
    On any failure, returns _UNKNOWN_CONNECTOR without caching, so that
    subsequent calls can retry the lookup once the DB is available.

    Note on tenant_id source: we read CURRENT_TENANT_ID_CONTEXTVAR for the
    cache key. The Celery tenant-aware middleware sets this contextvar before
    task execution, and it always matches kwargs["tenant_id"] (which is set
    at task dispatch time). They are guaranteed to agree for a given task
    execution context.
    """
    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get("") or ""
    cache_key = (tenant_id, cc_pair_id)

    with _connector_cache_lock:
        cached = _connector_cache.get(cache_key)
        if cached is not None:
            return cached

    try:
        from onyx.db.connector_credential_pair import (
            get_connector_credential_pair_from_id,
        )
        from onyx.db.engine.sql_engine import get_session_with_current_tenant

        with get_session_with_current_tenant() as db_session:
            cc_pair = get_connector_credential_pair_from_id(
                db_session,
                cc_pair_id,
                eager_load_connector=True,
            )
            if cc_pair is None:
                # DB lookup succeeded but cc_pair doesn't exist — don't cache,
                # it may appear later (race with connector creation).
                return _UNKNOWN_CONNECTOR

            info = ConnectorInfo(
                source=cc_pair.connector.source.value,
                name=cc_pair.name,
            )
            with _connector_cache_lock:
                _connector_cache[cache_key] = info
            return info
    except Exception:
        logger.debug(
            f"Failed to resolve connector info for cc_pair_id={cc_pair_id}",
            exc_info=True,
        )
        return _UNKNOWN_CONNECTOR


def on_indexing_task_prerun(
    task_id: str | None,
    task: Task | None,
    kwargs: dict | None,
) -> None:
    """Record per-connector metrics at task start.

    Only fires for tasks in _INDEXING_TASK_NAMES. Silently returns for
    all other tasks.
    """
    if task is None or task_id is None or kwargs is None:
        return

    task_name = task.name or ""
    if task_name not in _INDEXING_TASK_NAMES:
        return

    try:
        cc_pair_id = kwargs.get("cc_pair_id")
        tenant_id = str(kwargs.get("tenant_id", "unknown"))

        if cc_pair_id is None:
            return

        info = _resolve_connector(cc_pair_id)

        INDEXING_TASK_STARTED.labels(
            task_name=task_name,
            source=info.source,
            tenant_id=tenant_id,
            cc_pair_id=str(cc_pair_id),
        ).inc()

        with _indexing_start_times_lock:
            _evict_stale_start_times()
            _indexing_start_times[task_id] = time.monotonic()
    except Exception:
        logger.debug("Failed to record indexing task prerun metrics", exc_info=True)


def on_indexing_task_postrun(
    task_id: str | None,
    task: Task | None,
    kwargs: dict | None,
    state: str | None,
) -> None:
    """Record per-connector completion metrics.

    Only fires for tasks in _INDEXING_TASK_NAMES.
    """
    if task is None or task_id is None or kwargs is None:
        return

    task_name = task.name or ""
    if task_name not in _INDEXING_TASK_NAMES:
        return

    try:
        cc_pair_id = kwargs.get("cc_pair_id")
        tenant_id = str(kwargs.get("tenant_id", "unknown"))

        if cc_pair_id is None:
            return

        info = _resolve_connector(cc_pair_id)
        outcome = "success" if state == "SUCCESS" else "failure"

        INDEXING_TASK_COMPLETED.labels(
            task_name=task_name,
            source=info.source,
            tenant_id=tenant_id,
            cc_pair_id=str(cc_pair_id),
            outcome=outcome,
        ).inc()

        with _indexing_start_times_lock:
            start = _indexing_start_times.pop(task_id, None)
        if start is not None:
            INDEXING_TASK_DURATION.labels(
                task_name=task_name,
                source=info.source,
                tenant_id=tenant_id,
            ).observe(time.monotonic() - start)
    except Exception:
        logger.debug("Failed to record indexing task postrun metrics", exc_info=True)


================================================
FILE: backend/onyx/server/metrics/metrics_server.py
================================================
"""Standalone Prometheus metrics HTTP server for non-API processes.

The FastAPI API server already exposes /metrics via prometheus-fastapi-instrumentator.
Celery workers and other background processes use this module to expose their
own /metrics endpoint on a configurable port.

Usage:
    from onyx.server.metrics.metrics_server import start_metrics_server
    start_metrics_server("monitoring")  # reads port from env or uses default
"""

import os
import threading

from prometheus_client import start_http_server

from onyx.utils.logger import setup_logger

logger = setup_logger()

# Default ports for worker types that serve custom Prometheus metrics.
# Only add entries here when a worker actually registers collectors.
# In k8s each worker type runs in its own pod, so PROMETHEUS_METRICS_PORT
# env var can override.
_DEFAULT_PORTS: dict[str, int] = {
    "monitoring": 9096,
    "docfetching": 9092,
    "docprocessing": 9093,
}

_server_started = False
_server_lock = threading.Lock()


def start_metrics_server(worker_type: str) -> int | None:
    """Start a Prometheus metrics HTTP server in a background thread.

    Returns the port if started, None if disabled or already started.

    Port resolution order:
    1. PROMETHEUS_METRICS_PORT env var (explicit override)
    2. Default port for the worker type
    3. If worker type is unknown and no env var, skip

    Set PROMETHEUS_METRICS_ENABLED=false to disable.
    """
    global _server_started

    with _server_lock:
        if _server_started:
            logger.debug(f"Metrics server already started for {worker_type}")
            return None

        enabled = os.environ.get("PROMETHEUS_METRICS_ENABLED", "true").lower()
        if enabled in ("false", "0", "no"):
            logger.info(f"Prometheus metrics server disabled for {worker_type}")
            return None

        port_str = os.environ.get("PROMETHEUS_METRICS_PORT")
        if port_str:
            try:
                port = int(port_str)
            except ValueError:
                logger.warning(
                    f"Invalid PROMETHEUS_METRICS_PORT '{port_str}' for {worker_type}, "
                    "must be a numeric port. Skipping metrics server."
                )
                return None
        elif worker_type in _DEFAULT_PORTS:
            port = _DEFAULT_PORTS[worker_type]
        else:
            logger.info(
                f"No default metrics port for worker type '{worker_type}' "
                "and PROMETHEUS_METRICS_PORT not set. Skipping metrics server."
            )
            return None

        try:
            start_http_server(port)
            _server_started = True
            logger.info(
                f"Prometheus metrics server started on :{port} for {worker_type}"
            )
            return port
        except OSError as e:
            logger.warning(
                f"Failed to start metrics server on :{port} for {worker_type}: {e}"
            )
            return None


================================================
FILE: backend/onyx/server/metrics/opensearch_search.py
================================================
"""Prometheus metrics for OpenSearch search latency and throughput.

Tracks client-side round-trip latency, server-side execution time (from
OpenSearch's ``took`` field), total search count, and in-flight concurrency.
"""

import logging
from collections.abc import Generator
from contextlib import contextmanager

from prometheus_client import Counter
from prometheus_client import Gauge
from prometheus_client import Histogram

from onyx.document_index.opensearch.constants import OpenSearchSearchType

logger = logging.getLogger(__name__)

_SEARCH_LATENCY_BUCKETS = (
    0.005,
    0.01,
    0.025,
    0.05,
    0.1,
    0.25,
    0.5,
    1.0,
    2.5,
    5.0,
    10.0,
    25.0,
)

_client_duration = Histogram(
    "onyx_opensearch_search_client_duration_seconds",
    "Client-side end-to-end latency of OpenSearch search calls",
    ["search_type"],
    buckets=_SEARCH_LATENCY_BUCKETS,
)

_server_duration = Histogram(
    "onyx_opensearch_search_server_duration_seconds",
    "Server-side execution time reported by OpenSearch (took field)",
    ["search_type"],
    buckets=_SEARCH_LATENCY_BUCKETS,
)

_search_total = Counter(
    "onyx_opensearch_search_total",
    "Total number of search requests sent to OpenSearch",
    ["search_type"],
)

_searches_in_progress = Gauge(
    "onyx_opensearch_searches_in_progress",
    "Number of OpenSearch searches currently in-flight",
    ["search_type"],
)


def observe_opensearch_search(
    search_type: OpenSearchSearchType,
    client_duration_s: float,
    server_took_ms: int | None,
) -> None:
    """Records latency and throughput metrics for a completed OpenSearch search.

    Args:
        search_type: The type of search.
        client_duration_s: Wall-clock duration measured on the client side, in
            seconds.
        server_took_ms: The ``took`` value from the OpenSearch response, in
            milliseconds. May be ``None`` if the response did not include it.
    """
    try:
        label = search_type.value
        _search_total.labels(search_type=label).inc()
        _client_duration.labels(search_type=label).observe(client_duration_s)
        if server_took_ms is not None:
            _server_duration.labels(search_type=label).observe(server_took_ms / 1000.0)
    except Exception:
        logger.warning("Failed to record OpenSearch search metrics.", exc_info=True)


@contextmanager
def track_opensearch_search_in_progress(
    search_type: OpenSearchSearchType,
) -> Generator[None, None, None]:
    """Context manager that tracks in-flight OpenSearch searches via a Gauge."""
    incremented = False
    label = search_type.value
    try:
        _searches_in_progress.labels(search_type=label).inc()
        incremented = True
    except Exception:
        logger.warning("Failed to increment in-progress search gauge.", exc_info=True)
    try:
        yield
    finally:
        if incremented:
            try:
                _searches_in_progress.labels(search_type=label).dec()
            except Exception:
                logger.warning(
                    "Failed to decrement in-progress search gauge.", exc_info=True
                )


================================================
FILE: backend/onyx/server/metrics/per_tenant.py
================================================
"""Per-tenant request counter metric.

Increments a counter on every request, labelled by tenant, so Grafana can
answer "which tenant is generating the most traffic?"
"""

from prometheus_client import Counter
from prometheus_fastapi_instrumentator.metrics import Info

from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

_requests_by_tenant = Counter(
    "onyx_api_requests_by_tenant_total",
    "Total API requests by tenant",
    ["tenant_id", "method", "handler", "status"],
)


def per_tenant_request_callback(info: Info) -> None:
    """Increment per-tenant request counter for every request."""
    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or "unknown"
    _requests_by_tenant.labels(
        tenant_id=tenant_id,
        method=info.method,
        handler=info.modified_handler,
        status=info.modified_status,
    ).inc()


================================================
FILE: backend/onyx/server/metrics/postgres_connection_pool.py
================================================
"""SQLAlchemy connection pool Prometheus metrics.

Provides production-grade visibility into database connection pool state:

- Pool state gauges (checked-out, idle, overflow, configured size)
- Pool lifecycle counters (checkouts, checkins, creates, invalidations, timeouts)
- Per-endpoint connection attribution (which endpoints hold connections, for how long)

Metrics are collected via two mechanisms:
1. A custom Prometheus Collector that reads pool snapshots on each /metrics scrape
2. SQLAlchemy pool event listeners (checkout, checkin, connect, invalidate) for
   counters, histograms, and attribution
"""

import time

from fastapi import Request
from fastapi.responses import JSONResponse
from prometheus_client import Counter
from prometheus_client import Gauge
from prometheus_client import Histogram
from prometheus_client.core import GaugeMetricFamily
from prometheus_client.registry import Collector
from prometheus_client.registry import REGISTRY
from sqlalchemy import event
from sqlalchemy.engine import Engine
from sqlalchemy.engine.interfaces import DBAPIConnection
from sqlalchemy.ext.asyncio import AsyncEngine
from sqlalchemy.pool import ConnectionPoolEntry
from sqlalchemy.pool import PoolProxiedConnection
from sqlalchemy.pool import QueuePool

from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_ENDPOINT_CONTEXTVAR
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()

# --- Pool lifecycle counters (event-driven) ---

_checkout_total = Counter(
    "onyx_db_pool_checkout_total",
    "Total connection checkouts from the pool",
    ["engine"],
)

_checkin_total = Counter(
    "onyx_db_pool_checkin_total",
    "Total connection checkins to the pool",
    ["engine"],
)

_connections_created_total = Counter(
    "onyx_db_pool_connections_created_total",
    "Total new database connections created",
    ["engine"],
)

_invalidations_total = Counter(
    "onyx_db_pool_invalidations_total",
    "Total connection invalidations",
    ["engine"],
)

_checkout_timeout_total = Counter(
    "onyx_db_pool_checkout_timeout_total",
    "Total connection checkout timeouts",
    ["engine"],
)

# --- Per-endpoint attribution (event-driven) ---

_connections_held = Gauge(
    "onyx_db_connections_held_by_endpoint",
    "Number of DB connections currently held, by endpoint and engine",
    ["handler", "engine", "tenant_id"],
)

_hold_seconds = Histogram(
    "onyx_db_connection_hold_seconds",
    "Duration a DB connection is held by an endpoint",
    ["handler", "engine"],
)


def pool_timeout_handler(
    request: Request,  # noqa: ARG001
    exc: Exception,
) -> JSONResponse:
    """Increment the checkout timeout counter and return 503."""
    _checkout_timeout_total.labels(engine="unknown").inc()
    return JSONResponse(
        status_code=503,
        content={
            "detail": "Database connection pool timeout",
            "error": str(exc),
        },
    )


class PoolStateCollector(Collector):
    """Custom Prometheus collector that reads QueuePool state on each scrape.

    Uses pool.checkedout(), pool.checkedin(), pool.overflow(), and pool.size()
    for an atomic snapshot of pool state. Registered engines are stored as
    (label, pool) tuples to avoid holding references to the full Engine.
    """

    def __init__(self) -> None:
        self._pools: list[tuple[str, QueuePool]] = []

    def add_pool(self, label: str, pool: QueuePool) -> None:
        self._pools.append((label, pool))

    def collect(self) -> list[GaugeMetricFamily]:
        checked_out = GaugeMetricFamily(
            "onyx_db_pool_checked_out",
            "Currently checked-out connections",
            labels=["engine"],
        )
        checked_in = GaugeMetricFamily(
            "onyx_db_pool_checked_in",
            "Idle connections available in the pool",
            labels=["engine"],
        )
        overflow = GaugeMetricFamily(
            "onyx_db_pool_overflow",
            "Current overflow connections beyond pool_size",
            labels=["engine"],
        )
        size = GaugeMetricFamily(
            "onyx_db_pool_size",
            "Configured pool size",
            labels=["engine"],
        )

        for label, pool in self._pools:
            checked_out.add_metric([label], pool.checkedout())
            checked_in.add_metric([label], pool.checkedin())
            overflow.add_metric([label], pool.overflow())
            size.add_metric([label], pool.size())

        return [checked_out, checked_in, overflow, size]

    def describe(self) -> list[GaugeMetricFamily]:
        # Return empty to mark this as an "unchecked" collector. Prometheus
        # skips upfront descriptor validation and just calls collect() at
        # scrape time. Required because our metrics are dynamic (engine
        # labels depend on which engines are registered at runtime).
        return []


def _register_pool_events(engine: Engine, label: str) -> None:
    """Attach pool event listeners for metrics collection.

    Listens to checkout, checkin, connect, and invalidate events.
    Stores per-connection metadata on connection_record.info for attribution.
    """

    @event.listens_for(engine, "checkout")
    def on_checkout(
        dbapi_conn: DBAPIConnection,  # noqa: ARG001
        conn_record: ConnectionPoolEntry,
        conn_proxy: PoolProxiedConnection,  # noqa: ARG001
    ) -> None:
        handler = CURRENT_ENDPOINT_CONTEXTVAR.get() or "unknown"
        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or "unknown"
        conn_record.info["_metrics_endpoint"] = handler
        conn_record.info["_metrics_tenant_id"] = tenant_id
        conn_record.info["_metrics_checkout_time"] = time.monotonic()
        _checkout_total.labels(engine=label).inc()
        _connections_held.labels(
            handler=handler, engine=label, tenant_id=tenant_id
        ).inc()

    @event.listens_for(engine, "checkin")
    def on_checkin(
        dbapi_conn: DBAPIConnection,  # noqa: ARG001
        conn_record: ConnectionPoolEntry,
    ) -> None:
        handler = conn_record.info.pop("_metrics_endpoint", "unknown")
        tenant_id = conn_record.info.pop("_metrics_tenant_id", "unknown")
        start = conn_record.info.pop("_metrics_checkout_time", None)
        _checkin_total.labels(engine=label).inc()
        _connections_held.labels(
            handler=handler, engine=label, tenant_id=tenant_id
        ).dec()
        if start is not None:
            _hold_seconds.labels(handler=handler, engine=label).observe(
                time.monotonic() - start
            )

    @event.listens_for(engine, "connect")
    def on_connect(
        dbapi_conn: DBAPIConnection,  # noqa: ARG001
        conn_record: ConnectionPoolEntry,  # noqa: ARG001
    ) -> None:
        _connections_created_total.labels(engine=label).inc()

    @event.listens_for(engine, "invalidate")
    def on_invalidate(
        dbapi_conn: DBAPIConnection,  # noqa: ARG001
        conn_record: ConnectionPoolEntry,
        exception: BaseException | None,  # noqa: ARG001
    ) -> None:
        _invalidations_total.labels(engine=label).inc()
        # Defensively clean up the held-connections gauge in case checkin
        # doesn't fire after invalidation (e.g. hard pool shutdown).
        handler = conn_record.info.pop("_metrics_endpoint", None)
        tenant_id = conn_record.info.pop("_metrics_tenant_id", "unknown")
        start = conn_record.info.pop("_metrics_checkout_time", None)
        if handler:
            _connections_held.labels(
                handler=handler, engine=label, tenant_id=tenant_id
            ).dec()
        if start is not None:
            _hold_seconds.labels(handler=handler or "unknown", engine=label).observe(
                time.monotonic() - start
            )


def setup_postgres_connection_pool_metrics(
    engines: dict[str, Engine | AsyncEngine],
) -> None:
    """Register pool metrics for all provided engines.

    Args:
        engines: Mapping of engine label to Engine or AsyncEngine.
            Example: {"sync": sync_engine, "async": async_engine, "readonly": ro_engine}

    Engines using NullPool are skipped (no pool state to monitor).
    For AsyncEngine, events are registered on the underlying sync_engine.
    """
    collector = PoolStateCollector()

    for label, engine in engines.items():
        # Resolve async engines to their underlying sync engine
        sync_engine = engine.sync_engine if isinstance(engine, AsyncEngine) else engine

        pool = sync_engine.pool
        if not isinstance(pool, QueuePool):
            logger.info(
                f"Skipping pool metrics for engine '{label}' ({type(pool).__name__} — no pool state)"
            )
            continue

        collector.add_pool(label, pool)
        _register_pool_events(sync_engine, label)
        logger.info(f"Registered pool metrics for engine '{label}'")

    REGISTRY.register(collector)


================================================
FILE: backend/onyx/server/metrics/prometheus_setup.py
================================================
"""Prometheus metrics setup for the Onyx API server.

Orchestrates HTTP request instrumentation via ``prometheus-fastapi-instrumentator``:
- Request count, latency histograms, in-progress gauges
- Pool checkout timeout exception handler
- Custom metric callbacks (e.g. slow request counting)

SQLAlchemy connection pool metrics are registered separately via
``setup_postgres_connection_pool_metrics`` during application lifespan
(after engines are created).
"""

from prometheus_fastapi_instrumentator import Instrumentator
from prometheus_fastapi_instrumentator.metrics import default as default_metrics
from sqlalchemy.exc import TimeoutError as SATimeoutError
from starlette.applications import Starlette

from onyx.server.metrics.per_tenant import per_tenant_request_callback
from onyx.server.metrics.postgres_connection_pool import pool_timeout_handler
from onyx.server.metrics.slow_requests import slow_request_callback

_EXCLUDED_HANDLERS = [
    "/health",
    "/metrics",
    "/openapi.json",
]

# Denser buckets for per-handler latency histograms. The instrumentator's
# default (0.1, 0.5, 1) is too coarse for meaningful P95/P99 computation.
_LATENCY_BUCKETS = (
    0.01,
    0.025,
    0.05,
    0.1,
    0.25,
    0.5,
    1.0,
    2.5,
    5.0,
    10.0,
)


def setup_prometheus_metrics(app: Starlette) -> None:
    """Initialize HTTP request metrics for the Onyx API server.

    Must be called in ``get_application()`` BEFORE the app starts, because
    the instrumentator adds middleware via ``app.add_middleware()``.

    Args:
        app: The FastAPI/Starlette application to instrument.
    """
    app.add_exception_handler(SATimeoutError, pool_timeout_handler)

    instrumentator = Instrumentator(
        should_group_status_codes=False,
        should_ignore_untemplated=False,
        should_group_untemplated=True,
        should_instrument_requests_inprogress=True,
        inprogress_labels=True,
        excluded_handlers=_EXCLUDED_HANDLERS,
    )

    # Explicitly create the default metrics (http_requests_total,
    # http_request_duration_seconds, etc.) and add them first.  The library
    # skips creating defaults when ANY custom instrumentations are registered
    # via .add(), so we must include them ourselves.
    default_callback = default_metrics(latency_lowr_buckets=_LATENCY_BUCKETS)
    if default_callback:
        instrumentator.add(default_callback)

    instrumentator.add(slow_request_callback)
    instrumentator.add(per_tenant_request_callback)

    instrumentator.instrument(app, latency_lowr_buckets=_LATENCY_BUCKETS).expose(app)


================================================
FILE: backend/onyx/server/metrics/slow_requests.py
================================================
"""Slow request counter metric.

Increments a counter whenever a request exceeds a configurable duration
threshold. Useful for identifying endpoints that regularly take too long.
"""

import os

from prometheus_client import Counter
from prometheus_fastapi_instrumentator.metrics import Info

SLOW_REQUEST_THRESHOLD_SECONDS: float = max(
    0.0,
    float(os.environ.get("SLOW_REQUEST_THRESHOLD_SECONDS", "1.0")),
)

_slow_requests = Counter(
    "onyx_api_slow_requests_total",
    "Total requests exceeding the slow request threshold",
    ["method", "handler", "status"],
)


def slow_request_callback(info: Info) -> None:
    """Increment slow request counter when duration exceeds threshold."""
    if info.modified_duration > SLOW_REQUEST_THRESHOLD_SECONDS:
        _slow_requests.labels(
            method=info.method,
            handler=info.modified_handler,
            status=info.modified_status,
        ).inc()


================================================
FILE: backend/onyx/server/middleware/latency_logging.py
================================================
import logging
import time
from collections.abc import Awaitable
from collections.abc import Callable

from fastapi import FastAPI
from fastapi import Request
from fastapi import Response


def add_latency_logging_middleware(app: FastAPI, logger: logging.LoggerAdapter) -> None:
    @app.middleware("http")
    async def log_latency(
        request: Request, call_next: Callable[[Request], Awaitable[Response]]
    ) -> Response:
        start_time = time.monotonic()
        response = await call_next(request)
        process_time = time.monotonic() - start_time
        logger.debug(
            f"Path: {request.url.path} - Method: {request.method} - "
            f"Status Code: {response.status_code} - Time: {process_time:.4f} secs"
        )
        return response


================================================
FILE: backend/onyx/server/middleware/rate_limiting.py
================================================
from collections.abc import Callable
from typing import List

from fastapi import Depends
from fastapi import Request
from fastapi_limiter import FastAPILimiter
from fastapi_limiter.depends import RateLimiter

from onyx.configs.app_configs import AUTH_RATE_LIMITING_ENABLED
from onyx.configs.app_configs import RATE_LIMIT_MAX_REQUESTS
from onyx.configs.app_configs import RATE_LIMIT_WINDOW_SECONDS
from onyx.redis.redis_pool import get_async_redis_connection


async def setup_auth_limiter() -> None:
    # Use the centralized async Redis connection
    redis = await get_async_redis_connection()
    await FastAPILimiter.init(redis)


async def close_auth_limiter() -> None:
    # This closes the FastAPILimiter connection so we don't leave open connections to Redis.
    await FastAPILimiter.close()


async def rate_limit_key(request: Request) -> str:
    # Uses both IP and User-Agent to make collisions less likely if IP is behind NAT.
    # If request.client is None, a fallback is used to avoid completely unknown keys.
    # This helps ensure we have a unique key for each 'user' in simple scenarios.
    ip_part = request.client.host if request.client else "unknown"
    ua_part = request.headers.get("user-agent", "none").replace(" ", "_")
    return f"{ip_part}-{ua_part}"


def get_auth_rate_limiters() -> List[Callable]:
    if not AUTH_RATE_LIMITING_ENABLED:
        return []

    return [
        Depends(
            RateLimiter(
                times=RATE_LIMIT_MAX_REQUESTS or 100,
                seconds=RATE_LIMIT_WINDOW_SECONDS or 60,
                # Use the custom key function to distinguish users
                identifier=rate_limit_key,
            )
        )
    ]


================================================
FILE: backend/onyx/server/models.py
================================================
import datetime
from typing import Generic
from typing import Optional
from typing import TypeVar
from uuid import UUID

from pydantic import BaseModel

from onyx.auth.schemas import UserRole
from onyx.db.enums import AccountType
from onyx.db.models import User


DataT = TypeVar("DataT")


class StatusResponse(BaseModel, Generic[DataT]):
    success: bool
    message: Optional[str] = None
    data: Optional[DataT] = None


class ApiKey(BaseModel):
    api_key: str


class IdReturn(BaseModel):
    id: int


class MinimalUserSnapshot(BaseModel):
    id: UUID
    email: str


class UserGroupInfo(BaseModel):
    id: int
    name: str


class FullUserSnapshot(BaseModel):
    id: UUID
    email: str
    role: UserRole
    account_type: AccountType
    is_active: bool
    password_configured: bool
    personal_name: str | None
    created_at: datetime.datetime
    updated_at: datetime.datetime
    groups: list[UserGroupInfo]
    is_scim_synced: bool

    @classmethod
    def from_user_model(
        cls,
        user: User,
        groups: list[UserGroupInfo] | None = None,
        is_scim_synced: bool = False,
    ) -> "FullUserSnapshot":
        return cls(
            id=user.id,
            email=user.email,
            role=user.role,
            account_type=user.account_type,
            is_active=user.is_active,
            password_configured=user.password_configured,
            personal_name=user.personal_name,
            created_at=user.created_at,
            updated_at=user.updated_at,
            groups=groups or [],
            is_scim_synced=is_scim_synced,
        )


class DisplayPriorityRequest(BaseModel):
    display_priority_map: dict[int, int]


class InvitedUserSnapshot(BaseModel):
    email: str


================================================
FILE: backend/onyx/server/onyx_api/__init__.py
================================================


================================================
FILE: backend/onyx/server/onyx_api/ingestion.py
================================================
from datetime import datetime
from datetime import timezone

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_curator_or_admin_user
from onyx.configs.constants import DEFAULT_CC_PAIR_ID
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.connectors.models import Document
from onyx.connectors.models import IndexAttemptMetadata
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.document import delete_documents_complete__no_commit
from onyx.db.document import get_document
from onyx.db.document import get_documents_by_cc_pair
from onyx.db.document import get_ingestion_documents
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.search_settings import get_active_search_settings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.document_index.factory import get_all_document_indices
from onyx.indexing.adapters.document_indexing_adapter import (
    DocumentIndexingBatchAdapter,
)
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.indexing_pipeline import run_indexing_pipeline
from onyx.server.onyx_api.models import DocMinimalInfo
from onyx.server.onyx_api.models import IngestionDocument
from onyx.server.onyx_api.models import IngestionResult
from onyx.server.utils_vector_db import require_vector_db
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

# not using /api to avoid confusion with nginx api path routing
router = APIRouter(prefix="/onyx-api", tags=PUBLIC_API_TAGS)


@router.get("/connector-docs/{cc_pair_id}")
def get_docs_by_connector_credential_pair(
    cc_pair_id: int,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> list[DocMinimalInfo]:
    db_docs = get_documents_by_cc_pair(cc_pair_id=cc_pair_id, db_session=db_session)
    return [
        DocMinimalInfo(
            document_id=doc.id,
            semantic_id=doc.semantic_id,
            link=doc.link,
        )
        for doc in db_docs
    ]


@router.get("/ingestion")
def get_ingestion_docs(
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> list[DocMinimalInfo]:
    db_docs = get_ingestion_documents(db_session)
    return [
        DocMinimalInfo(
            document_id=doc.id,
            semantic_id=doc.semantic_id,
            link=doc.link,
        )
        for doc in db_docs
    ]


@router.post("/ingestion", dependencies=[Depends(require_vector_db)])
def upsert_ingestion_doc(
    doc_info: IngestionDocument,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> IngestionResult:
    tenant_id = get_current_tenant_id()

    doc_info.document.from_ingestion_api = True

    if doc_info.document.doc_updated_at is None:
        doc_info.document.doc_updated_at = datetime.now(tz=timezone.utc)

    document = Document.from_base(doc_info.document)

    # TODO once the frontend is updated with this enum, remove this logic
    if document.source == DocumentSource.INGESTION_API:
        document.source = DocumentSource.FILE

    cc_pair = get_connector_credential_pair_from_id(
        db_session=db_session,
        cc_pair_id=doc_info.cc_pair_id or DEFAULT_CC_PAIR_ID,
    )
    if cc_pair is None:
        raise HTTPException(
            status_code=400, detail="Connector-Credential Pair specified does not exist"
        )

    # Need to index for both the primary and secondary index if possible
    active_search_settings = get_active_search_settings(db_session)
    # This flow is for indexing so we get all indices.
    document_indices = get_all_document_indices(
        active_search_settings.primary,
        None,
        None,
    )

    search_settings = get_current_search_settings(db_session)

    index_embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
        search_settings=search_settings
    )

    # Build adapter for primary indexing
    adapter = DocumentIndexingBatchAdapter(
        db_session=db_session,
        connector_id=cc_pair.connector_id,
        credential_id=cc_pair.credential_id,
        tenant_id=tenant_id,
        index_attempt_metadata=IndexAttemptMetadata(
            connector_id=cc_pair.connector_id,
            credential_id=cc_pair.credential_id,
        ),
    )

    indexing_pipeline_result = run_indexing_pipeline(
        embedder=index_embedding_model,
        document_indices=document_indices,
        ignore_time_skip=True,
        db_session=db_session,
        tenant_id=tenant_id,
        document_batch=[document],
        request_id=None,
        adapter=adapter,
    )

    # If there's a secondary index being built, index the doc but don't use it for return here
    if active_search_settings.secondary:
        sec_search_settings = get_secondary_search_settings(db_session)

        if sec_search_settings is None:
            # Should not ever happen
            raise RuntimeError(
                "Secondary index exists but no search settings configured"
            )

        new_index_embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
            search_settings=sec_search_settings
        )

        # This flow is for indexing so we get all indices.
        sec_document_indices = get_all_document_indices(
            active_search_settings.secondary, None, None
        )

        run_indexing_pipeline(
            embedder=new_index_embedding_model,
            document_indices=sec_document_indices,
            ignore_time_skip=True,
            db_session=db_session,
            tenant_id=tenant_id,
            document_batch=[document],
            request_id=None,
            adapter=adapter,
        )

    return IngestionResult(
        document_id=document.id,
        already_existed=indexing_pipeline_result.new_docs > 0,
    )


@router.delete("/ingestion/{document_id}", dependencies=[Depends(require_vector_db)])
def delete_ingestion_doc(
    document_id: str,
    _: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    tenant_id = get_current_tenant_id()

    # Verify the document exists and was created via the ingestion API
    document = get_document(document_id=document_id, db_session=db_session)
    if document is None:
        raise HTTPException(status_code=404, detail="Document not found")

    if not document.from_ingestion_api:
        raise HTTPException(
            status_code=400,
            detail="Document was not created via the ingestion API",
        )

    active_search_settings = get_active_search_settings(db_session)
    # This flow is for deletion so we get all indices.
    document_indices = get_all_document_indices(
        active_search_settings.primary,
        active_search_settings.secondary,
        None,
    )
    for document_index in document_indices:
        document_index.delete_single(
            doc_id=document_id,
            tenant_id=tenant_id,
            chunk_count=document.chunk_count,
        )

    # Delete from database
    delete_documents_complete__no_commit(db_session, [document_id])
    db_session.commit()


================================================
FILE: backend/onyx/server/onyx_api/models.py
================================================
from pydantic import BaseModel

from onyx.connectors.models import DocumentBase


class IngestionDocument(BaseModel):
    document: DocumentBase
    cc_pair_id: int | None = None


class IngestionResult(BaseModel):
    document_id: str
    already_existed: bool


class DocMinimalInfo(BaseModel):
    document_id: str
    semantic_id: str
    link: str | None = None


================================================
FILE: backend/onyx/server/pat/__init__.py
================================================


================================================
FILE: backend/onyx/server/pat/api.py
================================================
"""API endpoints for Personal Access Tokens."""

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.pat import create_pat
from onyx.db.pat import list_user_pats
from onyx.db.pat import revoke_pat
from onyx.server.pat.models import CreatedTokenResponse
from onyx.server.pat.models import CreateTokenRequest
from onyx.server.pat.models import TokenResponse
from onyx.utils.logger import setup_logger


logger = setup_logger()

router = APIRouter(prefix="/user/pats")


@router.get("")
def list_tokens(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> list[TokenResponse]:
    """List all active tokens for current user."""
    pats = list_user_pats(db_session, user.id)
    return [
        TokenResponse(
            id=pat.id,
            name=pat.name,
            token_display=pat.token_display,
            created_at=pat.created_at,
            expires_at=pat.expires_at,
            last_used_at=pat.last_used_at,
        )
        for pat in pats
    ]


@router.post("")
def create_token(
    request: CreateTokenRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> CreatedTokenResponse:
    """Create new personal access token for current user."""
    try:
        pat, raw_token = create_pat(
            db_session=db_session,
            user_id=user.id,
            name=request.name,
            expiration_days=request.expiration_days,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    logger.info(f"User {user.email} created PAT '{request.name}'")

    return CreatedTokenResponse(
        id=pat.id,
        name=pat.name,
        token_display=pat.token_display,
        token=raw_token,  # ONLY time we return the raw token!
        created_at=pat.created_at,
        expires_at=pat.expires_at,
        last_used_at=pat.last_used_at,
    )


@router.delete("/{token_id}")
def delete_token(
    token_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> dict[str, str]:
    """Delete (revoke) personal access token. Only owner can revoke their own tokens."""
    success = revoke_pat(db_session, token_id, user.id)
    if not success:
        raise HTTPException(
            status_code=404, detail="Token not found or not owned by user"
        )

    logger.info(f"User {user.email} revoked token {token_id}")
    return {"message": "Token deleted successfully"}


================================================
FILE: backend/onyx/server/pat/models.py
================================================
"""Pydantic models for Personal Access Token API."""

from datetime import datetime

from pydantic import BaseModel
from pydantic import Field


class CreateTokenRequest(BaseModel):
    name: str = Field(
        ..., min_length=1, max_length=100, description="Human-readable token name"
    )
    expiration_days: int | None = Field(
        None,
        ge=1,
        description="Days until expiration. Common values: 7, 30, 365, or null (no expiration). Must be >= 1 if provided.",
    )


class TokenResponse(BaseModel):
    id: int
    name: str
    token_display: str
    created_at: datetime
    expires_at: datetime | None
    last_used_at: datetime | None


class CreatedTokenResponse(TokenResponse):
    token: str  # Only returned on creation - user must copy it now!


================================================
FILE: backend/onyx/server/query_and_chat/__init__.py
================================================


================================================
FILE: backend/onyx/server/query_and_chat/chat_backend.py
================================================
import datetime
import json
from collections.abc import Generator
from datetime import timedelta
from uuid import UUID

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi import Request
from fastapi import Response
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.auth.api_key import get_hashed_api_key_from_request
from onyx.auth.pat import get_hashed_pat_from_request
from onyx.auth.users import current_chat_accessible_user
from onyx.auth.users import current_user
from onyx.cache.factory import get_cache_backend
from onyx.chat.chat_processing_checker import is_chat_session_processing
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_utils import convert_chat_history_basic
from onyx.chat.chat_utils import create_chat_history_chain
from onyx.chat.chat_utils import create_chat_session_from_request
from onyx.chat.chat_utils import extract_headers
from onyx.chat.models import ChatFullResponse
from onyx.chat.models import CreateChatSessionID
from onyx.chat.process_message import gather_stream_full
from onyx.chat.process_message import handle_multi_model_stream
from onyx.chat.process_message import handle_stream_message_objects
from onyx.chat.prompt_utils import get_default_base_system_prompt
from onyx.chat.stop_signal_checker import set_fence
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.chat_configs import HARD_DELETE_CHATS
from onyx.configs.constants import MessageType
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS
from onyx.db.chat import add_chats_to_session_from_slack_thread
from onyx.db.chat import delete_all_chat_sessions_for_user
from onyx.db.chat import delete_chat_session
from onyx.db.chat import duplicate_chat_session_for_user_from_slack
from onyx.db.chat import get_chat_message
from onyx.db.chat import get_chat_messages_by_session
from onyx.db.chat import get_chat_session_by_id
from onyx.db.chat import get_chat_sessions_by_user
from onyx.db.chat import set_as_latest_chat_message
from onyx.db.chat import set_preferred_response
from onyx.db.chat import translate_db_message_to_chat_message_detail
from onyx.db.chat import update_chat_session
from onyx.db.chat_search import search_chat_sessions
from onyx.db.engine.sql_engine import get_session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.feedback import create_chat_message_feedback
from onyx.db.feedback import remove_chat_message_feedback
from onyx.db.models import ChatSessionSharedStatus
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
from onyx.db.usage import increment_usage
from onyx.db.usage import UsageType
from onyx.db.user_file import get_file_id_by_user_file_id
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.file_store.file_store import get_default_file_store
from onyx.llm.constants import LlmProviderNames
from onyx.llm.factory import get_default_llm
from onyx.llm.factory import get_llm_for_persona
from onyx.llm.factory import get_llm_token_counter
from onyx.secondary_llm_flows.chat_session_naming import generate_chat_session_name
from onyx.server.api_key_usage import check_api_key_usage
from onyx.server.query_and_chat.models import ChatFeedbackRequest
from onyx.server.query_and_chat.models import ChatMessageIdentifier
from onyx.server.query_and_chat.models import ChatRenameRequest
from onyx.server.query_and_chat.models import ChatSearchResponse
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import ChatSessionDetailResponse
from onyx.server.query_and_chat.models import ChatSessionDetails
from onyx.server.query_and_chat.models import ChatSessionGroup
from onyx.server.query_and_chat.models import ChatSessionsResponse
from onyx.server.query_and_chat.models import ChatSessionSummary
from onyx.server.query_and_chat.models import ChatSessionUpdateRequest
from onyx.server.query_and_chat.models import MessageOrigin
from onyx.server.query_and_chat.models import RenameChatSessionResponse
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.models import SetPreferredResponseRequest
from onyx.server.query_and_chat.models import UpdateChatSessionTemperatureRequest
from onyx.server.query_and_chat.models import UpdateChatSessionThreadRequest
from onyx.server.query_and_chat.session_loading import (
    translate_assistant_message_to_packets,
)
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.token_limit import check_token_rate_limits
from onyx.server.usage_limits import check_llm_cost_limit_for_provider
from onyx.server.usage_limits import check_usage_and_raise
from onyx.server.usage_limits import is_usage_limits_enabled
from onyx.server.utils import get_json_line
from onyx.tracing.framework.create import ensure_trace
from onyx.utils.headers import get_custom_tool_additional_request_headers
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import mt_cloud_telemetry
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

router = APIRouter(prefix="/chat")


def _get_available_tokens_for_persona(
    persona: Persona,
    db_session: Session,
    user: User,
) -> int:
    def _get_non_reserved_input_tokens(
        model_max_input_tokens: int,
        system_and_agent_prompt_tokens: int,
        num_tools: int,
        token_reserved_per_tool: int = 256,
        # Estimating for a long user input message, hard to know ahead of time
        default_reserved_tokens: int = 2000,
    ) -> int:
        return (
            model_max_input_tokens
            - system_and_agent_prompt_tokens
            - num_tools * token_reserved_per_tool
            - default_reserved_tokens
        )

    llm = get_llm_for_persona(persona=persona, user=user)
    token_counter = get_llm_token_counter(llm)

    if persona.replace_base_system_prompt and persona.system_prompt:
        # User has opted to replace the base system prompt entirely
        combined_prompt_tokens = token_counter(persona.system_prompt)
    else:
        # Default behavior: prepend custom prompt to base system prompt
        system_prompt = get_default_base_system_prompt(db_session)
        agent_prompt = persona.system_prompt + " " if persona.system_prompt else ""
        combined_prompt_tokens = token_counter(agent_prompt + system_prompt)

    return _get_non_reserved_input_tokens(
        model_max_input_tokens=llm.config.max_input_tokens,
        system_and_agent_prompt_tokens=combined_prompt_tokens,
        num_tools=len(persona.tools),
    )


@router.get("/get-user-chat-sessions", tags=PUBLIC_API_TAGS)
def get_user_chat_sessions(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
    project_id: int | None = None,
    only_non_project_chats: bool = True,
    include_failed_chats: bool = False,
    page_size: int = Query(default=50, ge=1, le=100),
    before: str | None = Query(default=None),
) -> ChatSessionsResponse:
    user_id = user.id

    try:
        before_dt = (
            datetime.datetime.fromisoformat(before) if before is not None else None
        )
    except ValueError:
        raise HTTPException(status_code=422, detail="Invalid 'before' timestamp format")

    try:
        # Fetch one extra to determine if there are more results
        chat_sessions = get_chat_sessions_by_user(
            user_id=user_id,
            deleted=False,
            db_session=db_session,
            project_id=project_id,
            only_non_project_chats=only_non_project_chats,
            include_failed_chats=include_failed_chats,
            limit=page_size + 1,
            before=before_dt,
        )

    except ValueError:
        raise ValueError("Chat session does not exist or has been deleted")

    has_more = len(chat_sessions) > page_size
    chat_sessions = chat_sessions[:page_size]

    return ChatSessionsResponse(
        sessions=[
            ChatSessionDetails(
                id=chat.id,
                name=chat.description,
                persona_id=chat.persona_id,
                time_created=chat.time_created.isoformat(),
                time_updated=chat.time_updated.isoformat(),
                shared_status=chat.shared_status,
                current_alternate_model=chat.current_alternate_model,
                current_temperature_override=chat.temperature_override,
            )
            for chat in chat_sessions
        ],
        has_more=has_more,
    )


@router.put("/update-chat-session-temperature")
def update_chat_session_temperature(
    update_thread_req: UpdateChatSessionTemperatureRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    chat_session = get_chat_session_by_id(
        chat_session_id=update_thread_req.chat_session_id,
        user_id=user.id,
        db_session=db_session,
    )

    # Validate temperature_override
    if update_thread_req.temperature_override is not None:
        if (
            update_thread_req.temperature_override < 0
            or update_thread_req.temperature_override > 2
        ):
            raise HTTPException(
                status_code=400, detail="Temperature must be between 0 and 2"
            )

        # Additional check for Anthropic models
        if (
            chat_session.current_alternate_model
            and LlmProviderNames.ANTHROPIC
            in chat_session.current_alternate_model.lower()
        ):
            if update_thread_req.temperature_override > 1:
                raise HTTPException(
                    status_code=400,
                    detail="Temperature for Anthropic models must be between 0 and 1",
                )

    chat_session.temperature_override = update_thread_req.temperature_override

    db_session.add(chat_session)
    db_session.commit()


@router.put("/update-chat-session-model")
def update_chat_session_model(
    update_thread_req: UpdateChatSessionThreadRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    chat_session = get_chat_session_by_id(
        chat_session_id=update_thread_req.chat_session_id,
        user_id=user.id,
        db_session=db_session,
    )
    chat_session.current_alternate_model = update_thread_req.new_alternate_model

    db_session.add(chat_session)
    db_session.commit()


@router.get("/get-chat-session/{session_id}", tags=PUBLIC_API_TAGS)
def get_chat_session(
    session_id: UUID,
    is_shared: bool = False,
    include_deleted: bool = False,
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
) -> ChatSessionDetailResponse:
    user_id = user.id
    try:
        chat_session = get_chat_session_by_id(
            chat_session_id=session_id,
            user_id=user_id,
            db_session=db_session,
            is_shared=is_shared,
            include_deleted=include_deleted,
        )
    except ValueError:
        try:
            # If we failed to get a chat session, try to retrieve the session with
            # less restrictive filters in order to identify what exactly mismatched
            # so we can bubble up an accurate error code andmessage.
            existing_chat_session = get_chat_session_by_id(
                chat_session_id=session_id,
                user_id=None,
                db_session=db_session,
                is_shared=False,
                include_deleted=True,
            )
        except ValueError:
            raise HTTPException(status_code=404, detail="Chat session not found")

        if not include_deleted and existing_chat_session.deleted:
            raise HTTPException(status_code=404, detail="Chat session has been deleted")

        if is_shared:
            if existing_chat_session.shared_status != ChatSessionSharedStatus.PUBLIC:
                raise HTTPException(
                    status_code=403, detail="Chat session is not shared"
                )
        elif user_id is not None and existing_chat_session.user_id not in (
            user_id,
            None,
        ):
            raise HTTPException(status_code=403, detail="Access denied")

        raise HTTPException(status_code=404, detail="Chat session not found")

    # for chat-seeding: if the session is unassigned, assign it now. This is done here
    # to avoid another back and forth between FE -> BE before starting the first
    # message generation
    if chat_session.user_id is None and user_id is not None:
        chat_session.user_id = user_id
        db_session.commit()

    session_messages = get_chat_messages_by_session(
        chat_session_id=session_id,
        user_id=user_id,
        db_session=db_session,
        # we already did a permission check above with the call to
        # `get_chat_session_by_id`, so we can skip it here
        skip_permission_check=True,
        # we need the tool call objs anyways, so just fetch them in a single call
        prefetch_top_two_level_tool_calls=True,
    )

    # Convert messages to ChatMessageDetail format
    chat_message_details = [
        translate_db_message_to_chat_message_detail(msg) for msg in session_messages
    ]

    try:
        is_processing = is_chat_session_processing(session_id, get_cache_backend())
        # Edit the last message to indicate loading (Overriding default message value)
        if is_processing and chat_message_details:
            last_msg = chat_message_details[-1]
            if last_msg.message_type == MessageType.ASSISTANT:
                last_msg.message = "Message is loading... Please refresh the page soon."
    except Exception:
        logger.exception(
            "An error occurred while checking if the chat session is processing"
        )

    # Every assistant message might have a set of tool calls associated with it, these need to be replayed back for the frontend
    # Each list is the set of tool calls for the given assistant message.
    replay_packet_lists: list[list[Packet]] = []
    for msg in session_messages:
        if msg.message_type == MessageType.ASSISTANT:
            replay_packet_lists.append(
                translate_assistant_message_to_packets(
                    chat_message=msg, db_session=db_session
                )
            )
            # msg_packet_list.append(Packet(ind=end_step_nr, obj=OverallStop()))

    return ChatSessionDetailResponse(
        chat_session_id=session_id,
        description=chat_session.description,
        persona_id=chat_session.persona_id,
        persona_name=chat_session.persona.name if chat_session.persona else None,
        personal_icon_name=chat_session.persona.icon_name,
        current_alternate_model=chat_session.current_alternate_model,
        messages=chat_message_details,
        time_created=chat_session.time_created,
        shared_status=chat_session.shared_status,
        current_temperature_override=chat_session.temperature_override,
        deleted=chat_session.deleted,
        owner_name=chat_session.user.personal_name if chat_session.user else None,
        # Packets are now directly serialized as Packet Pydantic models
        packets=replay_packet_lists,
    )


@router.post("/create-chat-session", tags=PUBLIC_API_TAGS)
def create_new_chat_session(
    chat_session_creation_request: ChatSessionCreationRequest,
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
) -> CreateChatSessionID:
    user_id = user.id

    try:
        new_chat_session = create_chat_session_from_request(
            chat_session_request=chat_session_creation_request,
            user_id=user_id,
            db_session=db_session,
        )
    except ValueError as e:
        # Project access denied
        raise HTTPException(status_code=403, detail=str(e))
    except Exception as e:
        logger.exception(e)
        raise HTTPException(status_code=400, detail="Invalid Persona provided.")

    return CreateChatSessionID(chat_session_id=new_chat_session.id)


@router.put("/rename-chat-session")
def rename_chat_session(
    rename_req: ChatRenameRequest,
    request: Request,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> RenameChatSessionResponse:
    # 3000 tokens is more than enough for a pair of messages which is enough to provide the required context for generating a
    # good name for the chat session. It's also small enough to fit on even the worst context window LLMs.
    max_tokens_for_naming = 3000

    name = rename_req.name
    chat_session_id = rename_req.chat_session_id
    user_id = user.id

    if name:
        update_chat_session(
            db_session=db_session,
            user_id=user_id,
            chat_session_id=chat_session_id,
            description=name,
        )
        return RenameChatSessionResponse(new_name=name)

    llm = get_default_llm(
        additional_headers=extract_headers(
            request.headers, LITELLM_PASS_THROUGH_HEADERS
        )
    )

    check_llm_cost_limit_for_provider(
        db_session=db_session,
        tenant_id=get_current_tenant_id(),
        llm_provider_api_key=llm.config.api_key,
    )

    full_history = create_chat_history_chain(
        chat_session_id=chat_session_id, db_session=db_session
    )

    token_counter = get_llm_token_counter(llm)

    simple_chat_history = convert_chat_history_basic(
        chat_history=full_history,
        token_counter=token_counter,
        max_individual_message_tokens=max_tokens_for_naming,
        max_total_tokens=max_tokens_for_naming,
    )

    with ensure_trace(
        "chat_session_naming",
        group_id=str(chat_session_id),
        metadata={
            "tenant_id": get_current_tenant_id(),
            "chat_session_id": str(chat_session_id),
        },
    ):
        new_name = generate_chat_session_name(chat_history=simple_chat_history, llm=llm)

    update_chat_session(
        db_session=db_session,
        user_id=user_id,
        chat_session_id=chat_session_id,
        description=new_name,
    )

    return RenameChatSessionResponse(new_name=new_name)


@router.patch("/chat-session/{session_id}")
def patch_chat_session(
    session_id: UUID,
    chat_session_update_req: ChatSessionUpdateRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    user_id = user.id
    update_chat_session(
        db_session=db_session,
        user_id=user_id,
        chat_session_id=session_id,
        sharing_status=chat_session_update_req.sharing_status,
    )
    return None


@router.delete("/delete-all-chat-sessions", tags=PUBLIC_API_TAGS)
def delete_all_chat_sessions(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    try:
        delete_all_chat_sessions_for_user(user=user, db_session=db_session)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))


@router.delete("/delete-chat-session/{session_id}", tags=PUBLIC_API_TAGS)
def delete_chat_session_by_id(
    session_id: UUID,
    hard_delete: bool | None = None,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    user_id = user.id
    try:
        # Use the provided hard_delete parameter if specified, otherwise use the default config
        actual_hard_delete = (
            hard_delete if hard_delete is not None else HARD_DELETE_CHATS
        )
        delete_chat_session(
            user_id, session_id, db_session, hard_delete=actual_hard_delete
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))


# NOTE: This endpoint is extremely central to the application, any changes to it should be reviewed and approved by an experienced
# team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
@router.post(
    "/send-chat-message",
    response_model=ChatFullResponse,
    tags=PUBLIC_API_TAGS,
    responses={
        200: {
            "description": (
                "If `stream=true`, returns `text/event-stream`.\n"
                "If `stream=false`, returns `application/json` (ChatFullResponse)."
            ),
            "content": {
                "text/event-stream": {
                    "schema": {"type": "string"},
                    "examples": {
                        "stream": {
                            "summary": "Stream of NDJSON AnswerStreamPart's",
                            "value": "string",
                        }
                    },
                },
            },
        }
    },
)
def handle_send_chat_message(
    chat_message_req: SendMessageRequest,
    request: Request,
    user: User = Depends(current_chat_accessible_user),
    _rate_limit_check: None = Depends(check_token_rate_limits),
    _api_key_usage_check: None = Depends(check_api_key_usage),
) -> StreamingResponse | ChatFullResponse:
    """
    This endpoint is used to send a new chat message.

    Args:
        chat_message_req (SendMessageRequest): Details about the new chat message.
            - When stream=True (default): Returns StreamingResponse with SSE
            - When stream=False: Returns ChatFullResponse with complete data
        request (Request): The current HTTP request context.
        user (User): The current user, obtained via dependency injection.
        _ (None): Rate limit check is run if user/group/global rate limits are enabled.

    Returns:
        StreamingResponse | ChatFullResponse: Either streams or returns complete response.
    """
    logger.debug(f"Received new chat message: {chat_message_req.message}")

    tenant_id = get_current_tenant_id()
    mt_cloud_telemetry(
        tenant_id=tenant_id,
        distinct_id=tenant_id if user.is_anonymous else str(user.id),
        event=MilestoneRecordType.RAN_QUERY,
    )

    # Override origin to API when authenticated via API key or PAT
    # to prevent clients from polluting telemetry data
    if get_hashed_api_key_from_request(request) or get_hashed_pat_from_request(request):
        chat_message_req.origin = MessageOrigin.API

    # Multi-model streaming path: 2-3 LLMs in parallel (streaming only)
    is_multi_model = (
        chat_message_req.llm_overrides is not None
        and len(chat_message_req.llm_overrides) > 1
    )
    if is_multi_model and chat_message_req.stream:
        # Narrowed here; is_multi_model already checked llm_overrides is not None
        llm_overrides = chat_message_req.llm_overrides or []

        def multi_model_stream_generator() -> Generator[str, None, None]:
            try:
                with get_session_with_current_tenant() as db_session:
                    for obj in handle_multi_model_stream(
                        new_msg_req=chat_message_req,
                        user=user,
                        db_session=db_session,
                        llm_overrides=llm_overrides,
                        litellm_additional_headers=extract_headers(
                            request.headers, LITELLM_PASS_THROUGH_HEADERS
                        ),
                        custom_tool_additional_headers=get_custom_tool_additional_request_headers(
                            request.headers
                        ),
                        mcp_headers=chat_message_req.mcp_headers,
                    ):
                        yield get_json_line(obj.model_dump())
            except Exception as e:
                logger.exception("Error in multi-model streaming")
                yield json.dumps({"error": str(e)})

        return StreamingResponse(
            multi_model_stream_generator(), media_type="text/event-stream"
        )

    if is_multi_model and not chat_message_req.stream:
        raise OnyxError(
            OnyxErrorCode.INVALID_INPUT,
            "Multi-model mode (llm_overrides with >1 entry) requires stream=True.",
        )

    # Non-streaming path: consume all packets and return complete response
    if not chat_message_req.stream:
        with get_session_with_current_tenant() as db_session:
            # Check and track non-streaming API usage limits
            if is_usage_limits_enabled():
                check_usage_and_raise(
                    db_session=db_session,
                    usage_type=UsageType.NON_STREAMING_API_CALLS,
                    tenant_id=tenant_id,
                    pending_amount=1,
                )
                increment_usage(
                    db_session=db_session,
                    usage_type=UsageType.NON_STREAMING_API_CALLS,
                    amount=1,
                )
                db_session.commit()

            state_container = ChatStateContainer()
            packets = handle_stream_message_objects(
                new_msg_req=chat_message_req,
                user=user,
                db_session=db_session,
                litellm_additional_headers=extract_headers(
                    request.headers, LITELLM_PASS_THROUGH_HEADERS
                ),
                custom_tool_additional_headers=get_custom_tool_additional_request_headers(
                    request.headers
                ),
                mcp_headers=chat_message_req.mcp_headers,
                additional_context=chat_message_req.additional_context,
                external_state_container=state_container,
            )
            result = gather_stream_full(packets, state_container)
            # Note: LLM cost tracking is now handled in multi_llm.py
            return result

    # Streaming path, normal Onyx UI behavior
    def stream_generator() -> Generator[str, None, None]:
        state_container = ChatStateContainer()
        try:
            with get_session_with_current_tenant() as db_session:
                for obj in handle_stream_message_objects(
                    new_msg_req=chat_message_req,
                    user=user,
                    db_session=db_session,
                    litellm_additional_headers=extract_headers(
                        request.headers, LITELLM_PASS_THROUGH_HEADERS
                    ),
                    custom_tool_additional_headers=get_custom_tool_additional_request_headers(
                        request.headers
                    ),
                    mcp_headers=chat_message_req.mcp_headers,
                    additional_context=chat_message_req.additional_context,
                    external_state_container=state_container,
                ):
                    yield get_json_line(obj.model_dump())
                # Note: LLM cost tracking is now handled in multi_llm.py

        except Exception as e:
            logger.exception("Error in chat message streaming")
            yield json.dumps({"error": str(e)})

        finally:
            logger.debug("Stream generator finished")

    return StreamingResponse(stream_generator(), media_type="text/event-stream")


@router.put("/set-message-as-latest")
def set_message_as_latest(
    message_identifier: ChatMessageIdentifier,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    user_id = user.id

    chat_message = get_chat_message(
        chat_message_id=message_identifier.message_id,
        user_id=user_id,
        db_session=db_session,
    )

    set_as_latest_chat_message(
        chat_message=chat_message,
        user_id=user_id,
        db_session=db_session,
    )


@router.put("/set-preferred-response")
def set_preferred_response_endpoint(
    request_body: SetPreferredResponseRequest,
    user: User | None = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> None:
    """Set the preferred assistant response for a multi-model turn."""
    try:
        # Ownership check: get_chat_message raises ValueError if the message
        # doesn't belong to this user, preventing cross-user mutation.
        get_chat_message(
            chat_message_id=request_body.user_message_id,
            user_id=user.id if user else None,
            db_session=db_session,
        )
        set_preferred_response(
            db_session=db_session,
            user_message_id=request_body.user_message_id,
            preferred_assistant_message_id=request_body.preferred_response_id,
        )
    except ValueError as e:
        raise OnyxError(OnyxErrorCode.INVALID_INPUT, str(e))


@router.post("/create-chat-message-feedback")
def create_chat_feedback(
    feedback: ChatFeedbackRequest,
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
) -> None:
    user_id = user.id

    create_chat_message_feedback(
        is_positive=feedback.is_positive,
        feedback_text=feedback.feedback_text,
        predefined_feedback=feedback.predefined_feedback,
        chat_message_id=feedback.chat_message_id,
        user_id=user_id,
        db_session=db_session,
    )


@router.delete("/remove-chat-message-feedback")
def remove_chat_feedback(
    chat_message_id: int,
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
) -> None:
    user_id = user.id

    remove_chat_message_feedback(
        chat_message_id=chat_message_id,
        user_id=user_id,
        db_session=db_session,
    )


class MaxSelectedDocumentTokens(BaseModel):
    max_tokens: int


@router.get("/max-selected-document-tokens")
def get_max_document_tokens(
    persona_id: int,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> MaxSelectedDocumentTokens:
    try:
        persona = get_persona_by_id(
            persona_id=persona_id,
            user=user,
            db_session=db_session,
            is_for_edit=False,
        )
    except ValueError:
        raise HTTPException(status_code=404, detail="Persona not found")

    return MaxSelectedDocumentTokens(
        max_tokens=_get_available_tokens_for_persona(
            persona=persona,
            user=user,
            db_session=db_session,
        ),
    )


class AvailableContextTokensResponse(BaseModel):
    available_tokens: int


@router.get("/available-context-tokens/{session_id}")
def get_available_context_tokens_for_session(
    session_id: UUID,
    user: User = Depends(current_chat_accessible_user),
    db_session: Session = Depends(get_session),
) -> AvailableContextTokensResponse:
    """Return available context tokens for a chat session based on its persona."""

    try:
        chat_session = get_chat_session_by_id(
            chat_session_id=session_id,
            user_id=user.id,
            db_session=db_session,
            is_shared=False,
            include_deleted=False,
        )
    except ValueError:
        raise HTTPException(status_code=404, detail="Chat session not found")

    if not chat_session.persona:
        raise HTTPException(status_code=400, detail="Chat session has no persona")

    available = _get_available_tokens_for_persona(
        persona=chat_session.persona,
        user=user,
        db_session=db_session,
    )

    return AvailableContextTokensResponse(available_tokens=available)


"""Endpoints for chat seeding"""


class SeedChatFromSlackRequest(BaseModel):
    chat_session_id: UUID


class SeedChatFromSlackResponse(BaseModel):
    redirect_url: str


@router.post("/seed-chat-session-from-slack")
def seed_chat_from_slack(
    chat_seed_request: SeedChatFromSlackRequest,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> SeedChatFromSlackResponse:
    slack_chat_session_id = chat_seed_request.chat_session_id
    new_chat_session = duplicate_chat_session_for_user_from_slack(
        db_session=db_session,
        user=user,
        chat_session_id=slack_chat_session_id,
    )

    add_chats_to_session_from_slack_thread(
        db_session=db_session,
        slack_chat_session_id=slack_chat_session_id,
        new_chat_session_id=new_chat_session.id,
    )

    return SeedChatFromSlackResponse(
        redirect_url=f"{WEB_DOMAIN}/chat?chatId={new_chat_session.id}"
    )


@router.get("/file/{file_id:path}", tags=PUBLIC_API_TAGS)
def fetch_chat_file(
    file_id: str,
    request: Request,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> Response:

    # For user files, we need to get the file id from the user file id
    file_id_from_user_file = get_file_id_by_user_file_id(file_id, db_session)
    if file_id_from_user_file:
        file_id = file_id_from_user_file

    file_store = get_default_file_store()
    file_record = file_store.read_file_record(file_id)
    if not file_record:
        raise HTTPException(status_code=404, detail="File not found")

    media_type = file_record.file_type
    file_io = file_store.read_file(file_id, mode="b")

    # Files served here are immutable (content-addressed by file_id), so allow long-lived caching.
    # Use `private` because this is behind auth / tenant scoping.
    etag = f'"{file_id}"'
    cache_headers = {
        "Cache-Control": "private, max-age=31536000, immutable",
        "ETag": etag,
        "Vary": "Cookie",
    }

    if request.headers.get("if-none-match") == etag:
        return Response(status_code=304, headers=cache_headers)

    return StreamingResponse(file_io, media_type=media_type, headers=cache_headers)


@router.get("/search", tags=PUBLIC_API_TAGS)
async def search_chats(
    query: str | None = Query(None),
    page: int = Query(1),
    page_size: int = Query(10),
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> ChatSearchResponse:
    """
    Search for chat sessions based on the provided query.
    If no query is provided, returns recent chat sessions.
    """

    # Use the enhanced database function for chat search
    chat_sessions, has_more = search_chat_sessions(
        user_id=user.id,
        db_session=db_session,
        query=query,
        page=page,
        page_size=page_size,
        include_deleted=False,
        include_onyxbot_flows=False,
    )

    # Group chat sessions by time period
    today = datetime.datetime.now().date()
    yesterday = today - timedelta(days=1)
    this_week = today - timedelta(days=7)
    this_month = today - timedelta(days=30)

    today_chats: list[ChatSessionSummary] = []
    yesterday_chats: list[ChatSessionSummary] = []
    this_week_chats: list[ChatSessionSummary] = []
    this_month_chats: list[ChatSessionSummary] = []
    older_chats: list[ChatSessionSummary] = []

    for session in chat_sessions:
        session_date = session.time_created.date()

        chat_summary = ChatSessionSummary(
            id=session.id,
            name=session.description,
            persona_id=session.persona_id,
            time_created=session.time_created,
            shared_status=session.shared_status,
            current_alternate_model=session.current_alternate_model,
            current_temperature_override=session.temperature_override,
        )

        if session_date == today:
            today_chats.append(chat_summary)
        elif session_date == yesterday:
            yesterday_chats.append(chat_summary)
        elif session_date > this_week:
            this_week_chats.append(chat_summary)
        elif session_date > this_month:
            this_month_chats.append(chat_summary)
        else:
            older_chats.append(chat_summary)

    # Create groups
    groups = []
    if today_chats:
        groups.append(ChatSessionGroup(title="Today", chats=today_chats))
    if yesterday_chats:
        groups.append(ChatSessionGroup(title="Yesterday", chats=yesterday_chats))
    if this_week_chats:
        groups.append(ChatSessionGroup(title="This Week", chats=this_week_chats))
    if this_month_chats:
        groups.append(ChatSessionGroup(title="This Month", chats=this_month_chats))
    if older_chats:
        groups.append(ChatSessionGroup(title="Older", chats=older_chats))

    return ChatSearchResponse(
        groups=groups,
        has_more=has_more,
        next_page=page + 1 if has_more else None,
    )


@router.post("/stop-chat-session/{chat_session_id}", tags=PUBLIC_API_TAGS)
def stop_chat_session(
    chat_session_id: UUID,
    user: User = Depends(current_user),  # noqa: ARG001
) -> dict[str, str]:
    """
    Stop a chat session by setting a stop signal.
    This endpoint is called by the frontend when the user clicks the stop button.
    """
    set_fence(chat_session_id, get_cache_backend(), True)
    return {"message": "Chat session stopped"}


================================================
FILE: backend/onyx/server/query_and_chat/chat_utils.py
================================================
from onyx.file_processing.file_types import OnyxMimeTypes
from onyx.file_store.models import ChatFileType


def mime_type_to_chat_file_type(mime_type: str | None) -> ChatFileType:
    if mime_type is None:
        return ChatFileType.PLAIN_TEXT

    if mime_type in OnyxMimeTypes.IMAGE_MIME_TYPES:
        return ChatFileType.IMAGE

    if mime_type in OnyxMimeTypes.TABULAR_MIME_TYPES:
        return ChatFileType.TABULAR

    if mime_type in OnyxMimeTypes.DOCUMENT_MIME_TYPES:
        return ChatFileType.DOC

    return ChatFileType.PLAIN_TEXT


================================================
FILE: backend/onyx/server/query_and_chat/models.py
================================================
from datetime import datetime
from enum import Enum
from typing import Any
from uuid import UUID

from pydantic import BaseModel
from pydantic import model_validator

from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.configs.constants import SessionType
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc
from onyx.context.search.models import Tag
from onyx.db.enums import ChatSessionSharedStatus
from onyx.db.models import ChatSession
from onyx.file_store.models import FileDescriptor
from onyx.llm.override_models import LLMOverride
from onyx.server.query_and_chat.streaming_models import Packet


AUTO_PLACE_AFTER_LATEST_MESSAGE = -1


class MessageOrigin(str, Enum):
    """Origin of a chat message for telemetry tracking."""

    WEBAPP = "webapp"
    CHROME_EXTENSION = "chrome_extension"
    API = "api"
    SLACKBOT = "slackbot"
    WIDGET = "widget"
    DISCORDBOT = "discordbot"
    UNKNOWN = "unknown"
    UNSET = "unset"


class MessageResponseIDInfo(BaseModel):
    user_message_id: int | None
    reserved_assistant_message_id: int


class ModelResponseSlot(BaseModel):
    """Pairs a reserved assistant message ID with its model display name."""

    message_id: int
    model_name: str


class MultiModelMessageResponseIDInfo(BaseModel):
    """Sent at the start of a multi-model streaming response.
    Contains the user message ID and one slot per model being run in parallel."""

    user_message_id: int | None
    responses: list[ModelResponseSlot]


class SourceTag(Tag):
    source: DocumentSource


class TagResponse(BaseModel):
    tags: list[SourceTag]


class UpdateChatSessionThreadRequest(BaseModel):
    # If not specified, use Onyx default persona
    chat_session_id: UUID
    new_alternate_model: str


class UpdateChatSessionTemperatureRequest(BaseModel):
    chat_session_id: UUID
    temperature_override: float


class ChatSessionCreationRequest(BaseModel):
    # If not specified, use Onyx default persona
    persona_id: int = 0
    description: str | None = None
    project_id: int | None = None


class ChatFeedbackRequest(BaseModel):
    chat_message_id: int
    is_positive: bool | None = None
    feedback_text: str | None = None
    predefined_feedback: str | None = None

    @model_validator(mode="after")
    def check_is_positive_or_feedback_text(self) -> "ChatFeedbackRequest":
        if self.is_positive is None and self.feedback_text is None:
            raise ValueError("Empty feedback received.")
        return self


# NOTE: This model is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
# experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
class SendMessageRequest(BaseModel):
    message: str

    llm_override: LLMOverride | None = None
    # For multi-model mode: up to 3 LLM overrides to run in parallel.
    # When provided with >1 entry, triggers multi-model streaming.
    llm_overrides: list[LLMOverride] | None = None
    # Test-only override for deterministic LiteLLM mock responses.
    mock_llm_response: str | None = None

    allowed_tool_ids: list[int] | None = None
    forced_tool_id: int | None = None

    file_descriptors: list[FileDescriptor] = []

    internal_search_filters: BaseFilters | None = None

    deep_research: bool = False

    # Headers to forward to MCP tool calls (e.g., user JWT token, user ID)
    # Example: {"Authorization": "Bearer <user_jwt>", "X-User-ID": "user123"}
    mcp_headers: dict[str, str] | None = None

    # Origin of the message for telemetry tracking
    origin: MessageOrigin = MessageOrigin.UNSET

    # Placement information for the message in the conversation tree:
    # - -1: auto-place after latest message in chain
    # - null: regeneration from root (first message)
    # - positive int: place after that specific parent message
    # NOTE: for regeneration, this is the only case currently where there is branching on the user message.
    # If the message of parent_message_id is a user message, the message will be ignored and it will use the
    # original user message for regeneration.
    parent_message_id: int | None = AUTO_PLACE_AFTER_LATEST_MESSAGE
    chat_session_id: UUID | None = None
    chat_session_info: ChatSessionCreationRequest | None = None

    # When True (default), returns StreamingResponse with SSE
    # When False, returns ChatFullResponse with complete data
    stream: bool = True

    # When False, disables citation generation:
    # - Citation markers like [1], [2] are removed from response text
    # - No CitationInfo packets are emitted during streaming
    include_citations: bool = True

    # Additional context injected into the LLM call but NOT stored in the DB
    # (not shown in chat history). Used e.g. by the Chrome extension to pass
    # the current tab URL when "Read this tab" is enabled.
    additional_context: str | None = None

    @model_validator(mode="after")
    def check_chat_session_id_or_info(self) -> "SendMessageRequest":
        # If neither is provided, default to creating a new chat session using the
        # default ChatSessionCreationRequest values.
        if self.chat_session_id is None and self.chat_session_info is None:
            return self.model_copy(
                update={"chat_session_info": ChatSessionCreationRequest()}
            )
        if self.chat_session_id is not None and self.chat_session_info is not None:
            raise ValueError(
                "Only one of chat_session_id or chat_session_info should be provided, not both."
            )
        return self


class ChatMessageIdentifier(BaseModel):
    message_id: int


class ChatRenameRequest(BaseModel):
    chat_session_id: UUID
    name: str | None = None


class ChatSessionUpdateRequest(BaseModel):
    sharing_status: ChatSessionSharedStatus


class DeleteAllSessionsRequest(BaseModel):
    session_type: SessionType


class RenameChatSessionResponse(BaseModel):
    new_name: str  # This is only really useful if the name is generated


class ChatSessionDetails(BaseModel):
    id: UUID
    name: str | None
    persona_id: int | None = None
    time_created: str
    time_updated: str
    shared_status: ChatSessionSharedStatus
    current_alternate_model: str | None = None
    current_temperature_override: float | None = None

    @classmethod
    def from_model(cls, model: ChatSession) -> "ChatSessionDetails":
        return cls(
            id=model.id,
            name=model.description,
            persona_id=model.persona_id,
            time_created=model.time_created.isoformat(),
            time_updated=model.time_updated.isoformat(),
            shared_status=model.shared_status,
            current_alternate_model=model.current_alternate_model,
            current_temperature_override=model.temperature_override,
        )


class ChatSessionsResponse(BaseModel):
    sessions: list[ChatSessionDetails]
    has_more: bool = False


class ChatMessageDetail(BaseModel):
    chat_session_id: UUID | None = None
    message_id: int
    parent_message: int | None = None
    latest_child_message: int | None = None
    message: str
    reasoning_tokens: str | None = None
    message_type: MessageType
    context_docs: list[SavedSearchDoc] | None = None
    # Dict mapping citation number to document_id
    citations: dict[int, str] | None = None
    time_sent: datetime
    files: list[FileDescriptor]
    error: str | None = None
    current_feedback: str | None = None  # "like" | "dislike" | null
    processing_duration_seconds: float | None = None
    preferred_response_id: int | None = None
    model_display_name: str | None = None

    def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]:  # type: ignore
        initial_dict = super().model_dump(mode="json", *args, **kwargs)  # type: ignore
        initial_dict["time_sent"] = self.time_sent.isoformat()
        return initial_dict


class SetPreferredResponseRequest(BaseModel):
    user_message_id: int
    preferred_response_id: int


class ChatSessionDetailResponse(BaseModel):
    chat_session_id: UUID
    description: str | None
    persona_id: int | None = None
    persona_name: str | None
    personal_icon_name: str | None
    messages: list[ChatMessageDetail]
    time_created: datetime
    shared_status: ChatSessionSharedStatus
    current_alternate_model: str | None
    current_temperature_override: float | None
    deleted: bool = False
    owner_name: str | None = None
    packets: list[list[Packet]]


class AdminSearchRequest(BaseModel):
    query: str
    filters: BaseFilters


class AdminSearchResponse(BaseModel):
    documents: list[SearchDoc]


class ChatSessionSummary(BaseModel):
    id: UUID
    name: str | None = None
    persona_id: int | None = None
    time_created: datetime
    shared_status: ChatSessionSharedStatus
    current_alternate_model: str | None = None
    current_temperature_override: float | None = None


class ChatSessionGroup(BaseModel):
    title: str
    chats: list[ChatSessionSummary]


class ChatSearchResponse(BaseModel):
    groups: list[ChatSessionGroup]
    has_more: bool
    next_page: int | None = None


================================================
FILE: backend/onyx/server/query_and_chat/placement.py
================================================
from pydantic import BaseModel


class Placement(BaseModel):
    """Coordinates that identify where a streaming packet belongs in the UI.

    The frontend uses these fields to route each packet to the correct turn,
    tool tab, agent sub-turn, and (in multi-model mode) response column.

    Attributes:
        turn_index: Monotonically increasing index of the iterative reasoning block
            (e.g. tool call round) within this chat message. Lower values happened first.
        tab_index: Disambiguates parallel tool calls within the same turn so each
            tool's output can be displayed in its own tab.
        sub_turn_index: Nesting level for tools that invoke other tools. ``None`` for
            top-level packets; an integer for tool-within-tool output.
        model_index: Which model this packet belongs to. ``0`` for single-model
            responses; ``0``, ``1``, or ``2`` for multi-model comparison. ``None``
            for pre-LLM setup packets (e.g. message ID info) that are yielded
            before any Emitter runs.
    """

    turn_index: int
    tab_index: int = 0
    sub_turn_index: int | None = None
    model_index: int | None = None


================================================
FILE: backend/onyx/server/query_and_chat/query_backend.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from sqlalchemy.orm import Session

from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import SearchDoc
from onyx.context.search.preprocessing.access_filters import (
    build_access_filters_for_user,
)
from onyx.context.search.utils import get_query_embedding
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.search_settings import get_current_search_settings
from onyx.db.tag import find_tags
from onyx.document_index.factory import get_default_document_index
from onyx.server.query_and_chat.models import AdminSearchRequest
from onyx.server.query_and_chat.models import AdminSearchResponse
from onyx.server.query_and_chat.models import SourceTag
from onyx.server.query_and_chat.models import TagResponse
from onyx.server.utils_vector_db import require_vector_db
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

admin_router = APIRouter(prefix="/admin")
basic_router = APIRouter(prefix="/query")


@admin_router.post("/search", dependencies=[Depends(require_vector_db)])
def admin_search(
    question: AdminSearchRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
) -> AdminSearchResponse:
    tenant_id = get_current_tenant_id()

    query = question.query
    logger.notice(f"Received admin search query: {query}")
    user_acl_filters = build_access_filters_for_user(user, db_session)

    final_filters = IndexFilters(
        source_type=question.filters.source_type,
        document_set=question.filters.document_set,
        time_cutoff=question.filters.time_cutoff,
        tags=question.filters.tags,
        access_control_list=user_acl_filters,
        tenant_id=tenant_id,
    )
    search_settings = get_current_search_settings(db_session)
    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None, db_session)

    if not query or query.strip() == "":
        matching_chunks = document_index.random_retrieval(filters=final_filters)
    else:
        query_embedding = get_query_embedding(query, db_session)
        matching_chunks = document_index.admin_retrieval(
            query=query, query_embedding=query_embedding, filters=final_filters
        )

    documents = SearchDoc.from_chunks_or_sections(matching_chunks)

    # Deduplicate documents by id
    deduplicated_documents: list[SearchDoc] = []
    seen_documents: set[str] = set()
    for document in documents:
        if document.document_id not in seen_documents:
            deduplicated_documents.append(document)
            seen_documents.add(document.document_id)
    return AdminSearchResponse(documents=deduplicated_documents)


@basic_router.get("/valid-tags")
def get_tags(
    match_pattern: str | None = None,
    # If this is empty or None, then tags for all sources are considered
    sources: list[DocumentSource] | None = None,
    allow_prefix: bool = True,  # This is currently the only option
    limit: int = 50,
    _: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> TagResponse:
    if not allow_prefix:
        raise NotImplementedError("Cannot disable prefix match for now")

    key_prefix = match_pattern
    value_prefix = match_pattern
    require_both_to_match = False

    # split on = to allow the user to type in "author=bob"
    EQUAL_PAT = "="
    if match_pattern and EQUAL_PAT in match_pattern:
        split_pattern = match_pattern.split(EQUAL_PAT)
        key_prefix = split_pattern[0]
        value_prefix = EQUAL_PAT.join(split_pattern[1:])
        require_both_to_match = True

    db_tags = find_tags(
        tag_key_prefix=key_prefix,
        tag_value_prefix=value_prefix,
        sources=sources,
        limit=limit,
        db_session=db_session,
        require_both_to_match=require_both_to_match,
    )
    server_tags = [
        SourceTag(
            tag_key=db_tag.tag_key, tag_value=db_tag.tag_value, source=db_tag.source
        )
        for db_tag in db_tags
    ]
    return TagResponse(tags=server_tags)


================================================
FILE: backend/onyx/server/query_and_chat/session_loading.py
================================================
from __future__ import annotations

import json
from typing import Any
from typing import cast
from typing import Literal

from pydantic import ValidationError
from sqlalchemy.orm import Session

from onyx.chat.citation_utils import extract_citation_order_from_text
from onyx.configs.constants import MessageType
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc
from onyx.db.chat import get_db_search_doc_by_id
from onyx.db.chat import translate_db_search_doc_to_saved_search_doc
from onyx.db.models import ChatMessage
from onyx.db.tools import get_tool_by_id
from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_IN_CODE_ID
from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_TASK_KEY
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import CustomToolArgs
from onyx.server.query_and_chat.streaming_models import CustomToolDelta
from onyx.server.query_and_chat.streaming_models import CustomToolErrorInfo
from onyx.server.query_and_chat.streaming_models import CustomToolStart
from onyx.server.query_and_chat.streaming_models import FileReaderResult
from onyx.server.query_and_chat.streaming_models import FileReaderStart
from onyx.server.query_and_chat.streaming_models import GeneratedImage
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import IntermediateReportDelta
from onyx.server.query_and_chat.streaming_models import IntermediateReportStart
from onyx.server.query_and_chat.streaming_models import MemoryToolDelta
from onyx.server.query_and_chat.streaming_models import MemoryToolStart
from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
from onyx.server.query_and_chat.streaming_models import OpenUrlStart
from onyx.server.query_and_chat.streaming_models import OpenUrlUrls
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import PythonToolDelta
from onyx.server.query_and_chat.streaming_models import PythonToolStart
from onyx.server.query_and_chat.streaming_models import ReasoningDelta
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from onyx.server.query_and_chat.streaming_models import ResearchAgentStart
from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
from onyx.server.query_and_chat.streaming_models import SearchToolStart
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.server.query_and_chat.streaming_models import TopLevelBranching
from onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool
from onyx.tools.tool_implementations.images.image_generation_tool import (
    ImageGenerationTool,
)
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.python.python_tool import PythonTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.utils.logger import setup_logger

logger = setup_logger()


def create_message_packets(
    message_text: str,
    final_documents: list[SearchDoc] | None,
    turn_index: int,
) -> list[Packet]:
    packets: list[Packet] = []

    final_search_docs: list[SearchDoc] | None = None
    if final_documents:
        sorted_final_documents = sorted(
            final_documents, key=lambda x: x.score or 0.0, reverse=True
        )
        final_search_docs = [
            SearchDoc(**doc.model_dump()) for doc in sorted_final_documents
        ]

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index),
            obj=AgentResponseStart(
                final_documents=final_search_docs,
            ),
        )
    )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index),
            obj=AgentResponseDelta(
                content=message_text,
            ),
        ),
    )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index),
            obj=SectionEnd(),
        )
    )

    return packets


def create_citation_packets(
    citation_info_list: list[CitationInfo], turn_index: int
) -> list[Packet]:
    packets: list[Packet] = []

    # Emit each citation as a separate CitationInfo packet
    for citation_info in citation_info_list:
        packets.append(
            Packet(
                placement=Placement(turn_index=turn_index),
                obj=citation_info,
            )
        )

    packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))

    return packets


def create_reasoning_packets(reasoning_text: str, turn_index: int) -> list[Packet]:
    packets: list[Packet] = []

    packets.append(
        Packet(placement=Placement(turn_index=turn_index), obj=ReasoningStart())
    )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index),
            obj=ReasoningDelta(
                reasoning=reasoning_text,
            ),
        ),
    )

    packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))

    return packets


def create_image_generation_packets(
    images: list[GeneratedImage], turn_index: int, tab_index: int = 0
) -> list[Packet]:
    packets: list[Packet] = []

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=ImageGenerationToolStart(),
        )
    )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=ImageGenerationFinal(images=images),
        ),
    )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=SectionEnd(),
        )
    )

    return packets


def create_custom_tool_packets(
    tool_name: str,
    response_type: str,
    turn_index: int,
    tab_index: int = 0,
    data: dict | list | str | int | float | bool | None = None,
    file_ids: list[str] | None = None,
    error: CustomToolErrorInfo | None = None,
    tool_args: dict[str, Any] | None = None,
    tool_id: int | None = None,
) -> list[Packet]:
    packets: list[Packet] = []

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=CustomToolStart(tool_name=tool_name, tool_id=tool_id),
        )
    )

    if tool_args:
        packets.append(
            Packet(
                placement=Placement(turn_index=turn_index, tab_index=tab_index),
                obj=CustomToolArgs(tool_name=tool_name, tool_args=tool_args),
            )
        )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=CustomToolDelta(
                tool_name=tool_name,
                tool_id=tool_id,
                response_type=response_type,
                data=data,
                file_ids=file_ids,
                error=error,
            ),
        ),
    )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=SectionEnd(),
        )
    )

    return packets


def create_file_reader_packets(
    summary_json: str,
    turn_index: int,
    tab_index: int = 0,
) -> list[Packet]:
    """Recreate FileReaderStart + FileReaderResult + SectionEnd from the stored
    JSON summary so that the FileReaderToolRenderer can display the result on
    page reload."""
    import json

    packets: list[Packet] = []
    placement = Placement(turn_index=turn_index, tab_index=tab_index)

    packets.append(Packet(placement=placement, obj=FileReaderStart()))

    try:
        data = json.loads(summary_json)
        packets.append(
            Packet(
                placement=placement,
                obj=FileReaderResult(
                    file_name=data["file_name"],
                    file_id=data["file_id"],
                    start_char=data["start_char"],
                    end_char=data["end_char"],
                    total_chars=data["total_chars"],
                    preview_start=data.get("preview_start", ""),
                    preview_end=data.get("preview_end", ""),
                ),
            )
        )
    except (json.JSONDecodeError, KeyError):
        # Gracefully degrade for old data that wasn't saved as JSON summary
        pass

    packets.append(Packet(placement=placement, obj=SectionEnd()))
    return packets


def create_research_agent_packets(
    research_task: str,
    report_content: str | None,
    turn_index: int,
    tab_index: int = 0,
) -> list[Packet]:
    """Create packets for research agent tool calls.
    This recreates the packet structure that ResearchAgentRenderer expects:
    - ResearchAgentStart with the research task
    - IntermediateReportStart to signal report begins
    - IntermediateReportDelta with the report content (if available)
    - SectionEnd to mark completion
    """
    packets: list[Packet] = []

    # Emit research agent start
    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=ResearchAgentStart(research_task=research_task),
        )
    )

    # Emit report content if available
    if report_content:
        # Emit IntermediateReportStart before delta
        packets.append(
            Packet(
                placement=Placement(turn_index=turn_index, tab_index=tab_index),
                obj=IntermediateReportStart(),
            )
        )

        packets.append(
            Packet(
                placement=Placement(turn_index=turn_index, tab_index=tab_index),
                obj=IntermediateReportDelta(content=report_content),
            )
        )

    # Emit section end
    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=SectionEnd(),
        )
    )

    return packets


def create_fetch_packets(
    fetch_docs: list[SavedSearchDoc],
    urls: list[str],
    turn_index: int,
    tab_index: int = 0,
) -> list[Packet]:
    packets: list[Packet] = []
    # Emit start packet
    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=OpenUrlStart(),
        )
    )
    # Emit URLs packet
    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=OpenUrlUrls(urls=urls),
        )
    )
    # Emit documents packet
    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=OpenUrlDocuments(
                documents=[SearchDoc(**doc.model_dump()) for doc in fetch_docs]
            ),
        )
    )
    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=SectionEnd(),
        )
    )
    return packets


def create_memory_packets(
    memory_text: str,
    operation: Literal["add", "update"],
    memory_id: int | None,
    turn_index: int,
    tab_index: int = 0,
    index: int | None = None,
) -> list[Packet]:
    packets: list[Packet] = []

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=MemoryToolStart(),
        )
    )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=MemoryToolDelta(
                memory_text=memory_text,
                operation=operation,
                memory_id=memory_id,
                index=index,
            ),
        ),
    )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=SectionEnd(),
        )
    )

    return packets


def create_python_tool_packets(
    code: str,
    stdout: str,
    stderr: str,
    file_ids: list[str],
    turn_index: int,
    tab_index: int = 0,
) -> list[Packet]:
    """Recreate PythonToolStart + PythonToolDelta + SectionEnd from the stored
    tool call data so the frontend can display both the code and its output
    on page reload."""
    packets: list[Packet] = []
    placement = Placement(turn_index=turn_index, tab_index=tab_index)

    packets.append(Packet(placement=placement, obj=PythonToolStart(code=code)))

    packets.append(
        Packet(
            placement=placement,
            obj=PythonToolDelta(
                stdout=stdout,
                stderr=stderr,
                file_ids=file_ids,
            ),
        )
    )

    packets.append(Packet(placement=placement, obj=SectionEnd()))
    return packets


def create_search_packets(
    search_queries: list[str],
    search_docs: list[SavedSearchDoc],
    is_internet_search: bool,
    turn_index: int,
    tab_index: int = 0,
) -> list[Packet]:
    packets: list[Packet] = []

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=SearchToolStart(
                is_internet_search=is_internet_search,
            ),
        )
    )

    # Emit queries if present
    if search_queries:
        packets.append(
            Packet(
                placement=Placement(turn_index=turn_index, tab_index=tab_index),
                obj=SearchToolQueriesDelta(queries=search_queries),
            ),
        )

    # Emit documents if present
    if search_docs:
        sorted_search_docs = sorted(
            search_docs, key=lambda x: x.score or 0.0, reverse=True
        )
        packets.append(
            Packet(
                placement=Placement(turn_index=turn_index, tab_index=tab_index),
                obj=SearchToolDocumentsDelta(
                    documents=[
                        SearchDoc(**doc.model_dump()) for doc in sorted_search_docs
                    ]
                ),
            ),
        )

    packets.append(
        Packet(
            placement=Placement(turn_index=turn_index, tab_index=tab_index),
            obj=SectionEnd(),
        )
    )

    return packets


def translate_assistant_message_to_packets(
    chat_message: ChatMessage,
    db_session: Session,
) -> list[Packet]:
    """
    Translates an assistant message and tool calls to packet format.
    It needs to be a list of list of packets combined into indices for "steps".
    The final answer and citations are also a "step".
    """
    packet_list: list[Packet] = []

    if chat_message.message_type != MessageType.ASSISTANT:
        raise ValueError(f"Chat message {chat_message.id} is not an assistant message")

    if chat_message.tool_calls:
        # Group tool calls by turn_number
        tool_calls_by_turn: dict[int, list] = {}
        for tool_call in chat_message.tool_calls:
            turn_num = tool_call.turn_number
            if turn_num not in tool_calls_by_turn:
                tool_calls_by_turn[turn_num] = []
            tool_calls_by_turn[turn_num].append(tool_call)

        tool_call_turns = set(tool_calls_by_turn.keys())
        # Process each turn in order
        for turn_num in sorted(tool_calls_by_turn.keys()):
            tool_calls_in_turn = tool_calls_by_turn[turn_num]

            # Insert pre-tool reasoning once per turn (if available)
            turn_reasoning = next(
                (
                    tool_call.reasoning_tokens
                    for tool_call in tool_calls_in_turn
                    if tool_call.reasoning_tokens
                ),
                None,
            )
            if turn_reasoning:
                # Use the previous turn slot when free to preserve reasoning-before-tool ordering.
                reasoning_turn_index = turn_num
                if turn_num > 0 and (turn_num - 1) not in tool_call_turns:
                    reasoning_turn_index = turn_num - 1
                packet_list.extend(
                    create_reasoning_packets(
                        reasoning_text=turn_reasoning,
                        turn_index=reasoning_turn_index,
                    )
                )

            # Process each tool call in this turn (single pass).
            # We buffer packets for the turn so we can conditionally prepend a TopLevelBranching
            # packet (which must appear before any tool output in the turn).
            research_agent_count = 0
            turn_tool_packets: list[Packet] = []
            for tool_call in tool_calls_in_turn:
                # Here we do a try because some tools may get deleted before the session is reloaded.
                try:
                    tool = get_tool_by_id(tool_call.tool_id, db_session)
                    if tool.in_code_tool_id == RESEARCH_AGENT_IN_CODE_ID:
                        research_agent_count += 1

                    # Handle different tool types
                    if tool.in_code_tool_id in [
                        SearchTool.__name__,
                        WebSearchTool.__name__,
                    ]:
                        queries = cast(
                            list[str], tool_call.tool_call_arguments.get("queries", [])
                        )
                        search_docs: list[SavedSearchDoc] = [
                            translate_db_search_doc_to_saved_search_doc(doc)
                            for doc in tool_call.search_docs
                        ]
                        turn_tool_packets.extend(
                            create_search_packets(
                                search_queries=queries,
                                search_docs=search_docs,
                                is_internet_search=tool.in_code_tool_id
                                == WebSearchTool.__name__,
                                turn_index=turn_num,
                                tab_index=tool_call.tab_index,
                            )
                        )

                    elif tool.in_code_tool_id == OpenURLTool.__name__:
                        fetch_docs: list[SavedSearchDoc] = [
                            translate_db_search_doc_to_saved_search_doc(doc)
                            for doc in tool_call.search_docs
                        ]
                        # Get URLs from tool_call_arguments
                        urls = cast(
                            list[str], tool_call.tool_call_arguments.get("urls", [])
                        )
                        turn_tool_packets.extend(
                            create_fetch_packets(
                                fetch_docs,
                                urls,
                                turn_num,
                                tab_index=tool_call.tab_index,
                            )
                        )

                    elif tool.in_code_tool_id == ImageGenerationTool.__name__:
                        if tool_call.generated_images:
                            images = [
                                GeneratedImage(**img)
                                for img in tool_call.generated_images
                            ]
                            turn_tool_packets.extend(
                                create_image_generation_packets(
                                    images, turn_num, tab_index=tool_call.tab_index
                                )
                            )

                    elif tool.in_code_tool_id == FileReaderTool.__name__:
                        turn_tool_packets.extend(
                            create_file_reader_packets(
                                summary_json=tool_call.tool_call_response or "",
                                turn_index=turn_num,
                                tab_index=tool_call.tab_index,
                            )
                        )

                    elif tool.in_code_tool_id == RESEARCH_AGENT_IN_CODE_ID:
                        # Not ideal but not a huge issue if the research task is lost.
                        research_task = cast(
                            str,
                            tool_call.tool_call_arguments.get(RESEARCH_AGENT_TASK_KEY)
                            or "Could not fetch saved research task.",
                        )
                        turn_tool_packets.extend(
                            create_research_agent_packets(
                                research_task=research_task,
                                report_content=tool_call.tool_call_response,
                                turn_index=turn_num,
                                tab_index=tool_call.tab_index,
                            )
                        )

                    elif tool.in_code_tool_id == MemoryTool.__name__:
                        if tool_call.tool_call_response:
                            memory_data = json.loads(tool_call.tool_call_response)
                            turn_tool_packets.extend(
                                create_memory_packets(
                                    memory_text=memory_data["memory_text"],
                                    operation=cast(
                                        Literal["add", "update"],
                                        memory_data["operation"],
                                    ),
                                    memory_id=memory_data.get("memory_id"),
                                    turn_index=turn_num,
                                    tab_index=tool_call.tab_index,
                                    index=memory_data.get("index"),
                                )
                            )

                    elif tool.in_code_tool_id == PythonTool.__name__:
                        code = cast(
                            str,
                            tool_call.tool_call_arguments.get("code", ""),
                        )
                        stdout = ""
                        stderr = ""
                        file_ids: list[str] = []
                        if tool_call.tool_call_response:
                            try:
                                response_data = json.loads(tool_call.tool_call_response)
                                stdout = response_data.get("stdout", "")
                                stderr = response_data.get("stderr", "")
                                generated_files = response_data.get(
                                    "generated_files", []
                                )
                                file_ids = [
                                    f.get("file_link", "").split("/")[-1]
                                    for f in generated_files
                                    if f.get("file_link")
                                ]
                            except (json.JSONDecodeError, KeyError):
                                # Fall back to raw response as stdout
                                stdout = tool_call.tool_call_response
                        turn_tool_packets.extend(
                            create_python_tool_packets(
                                code=code,
                                stdout=stdout,
                                stderr=stderr,
                                file_ids=file_ids,
                                turn_index=turn_num,
                                tab_index=tool_call.tab_index,
                            )
                        )

                    else:
                        # Custom tool or unknown tool
                        # Try to parse as structured CustomToolCallSummary JSON
                        custom_data: dict | list | str | int | float | bool | None = (
                            tool_call.tool_call_response
                        )
                        custom_error: CustomToolErrorInfo | None = None
                        custom_response_type = "text"

                        try:
                            parsed = json.loads(tool_call.tool_call_response)
                            if isinstance(parsed, dict) and "tool_name" in parsed:
                                custom_data = parsed.get("tool_result")
                                custom_response_type = parsed.get(
                                    "response_type", "text"
                                )
                                if parsed.get("error"):
                                    custom_error = CustomToolErrorInfo(
                                        **parsed["error"]
                                    )
                        except (
                            json.JSONDecodeError,
                            KeyError,
                            TypeError,
                            ValidationError,
                        ):
                            pass

                        custom_file_ids: list[str] | None = None
                        if custom_response_type in ("image", "csv") and isinstance(
                            custom_data, dict
                        ):
                            custom_file_ids = custom_data.get("file_ids")
                            custom_data = None

                        custom_args = {
                            k: v
                            for k, v in (tool_call.tool_call_arguments or {}).items()
                            if k != "requestBody"
                        }
                        turn_tool_packets.extend(
                            create_custom_tool_packets(
                                tool_name=tool.display_name or tool.name,
                                response_type=custom_response_type,
                                turn_index=turn_num,
                                tab_index=tool_call.tab_index,
                                data=custom_data,
                                file_ids=custom_file_ids,
                                error=custom_error,
                                tool_args=custom_args if custom_args else None,
                                tool_id=tool_call.tool_id,
                            )
                        )

                except Exception as e:
                    logger.warning(f"Error processing tool call {tool_call.id}: {e}")
                    continue

            if research_agent_count > 1:
                # Emit TopLevelBranching before processing any tool output in the turn.
                packet_list.append(
                    Packet(
                        placement=Placement(turn_index=turn_num),
                        obj=TopLevelBranching(
                            num_parallel_branches=research_agent_count
                        ),
                    )
                )
            packet_list.extend(turn_tool_packets)

    # Determine the next turn_index for the final message
    # It should come after all tool calls
    max_tool_turn = 0
    if chat_message.tool_calls:
        max_tool_turn = max(tc.turn_number for tc in chat_message.tool_calls)

    citations = chat_message.citations
    citation_info_list: list[CitationInfo] = []

    if citations:
        for citation_num, search_doc_id in citations.items():
            search_doc = get_db_search_doc_by_id(search_doc_id, db_session)
            if search_doc:
                citation_info_list.append(
                    CitationInfo(
                        citation_number=citation_num,
                        document_id=search_doc.document_id,
                    )
                )

        # Sort citations by order of appearance in message text
        citation_order = extract_citation_order_from_text(chat_message.message or "")
        order_map = {num: idx for idx, num in enumerate(citation_order)}
        citation_info_list.sort(
            key=lambda c: order_map.get(c.citation_number, float("inf"))
        )

    # Message comes after tool calls, with optional reasoning step beforehand
    message_turn_index = max_tool_turn + 1
    if chat_message.reasoning_tokens:
        packet_list.extend(
            create_reasoning_packets(
                reasoning_text=chat_message.reasoning_tokens,
                turn_index=message_turn_index,
            )
        )
        message_turn_index += 1

    if chat_message.message:
        packet_list.extend(
            create_message_packets(
                message_text=chat_message.message,
                final_documents=[
                    translate_db_search_doc_to_saved_search_doc(doc)
                    for doc in chat_message.search_docs
                ],
                turn_index=message_turn_index,
            )
        )

    # Citations come after the message
    citation_turn_index = (
        message_turn_index + 1 if citation_info_list else message_turn_index
    )

    if len(citation_info_list) > 0:
        packet_list.extend(
            create_citation_packets(citation_info_list, citation_turn_index)
        )

    # Return the highest turn_index used
    final_turn_index = 0
    if chat_message.message_type == MessageType.ASSISTANT:
        max_tool_turn = 0
        if chat_message.tool_calls:
            max_tool_turn = max(tc.turn_number for tc in chat_message.tool_calls)

        final_turn_index = max_tool_turn
        if chat_message.reasoning_tokens:
            final_turn_index = max(final_turn_index, max_tool_turn + 1)
        if chat_message.message:
            final_turn_index = max(final_turn_index, message_turn_index)
        if citation_info_list:
            final_turn_index = max(final_turn_index, citation_turn_index)

    # Determine stop reason - check if message indicates user cancelled
    stop_reason: str | None = None
    if chat_message.message:
        if "generation was stopped" in chat_message.message.lower():
            stop_reason = "user_cancelled"

    # Add overall stop packet at the end
    packet_list.append(
        Packet(
            placement=Placement(turn_index=final_turn_index),
            obj=OverallStop(stop_reason=stop_reason),
        )
    )

    return packet_list


================================================
FILE: backend/onyx/server/query_and_chat/streaming_models.py
================================================
from enum import Enum
from typing import Annotated
from typing import Any
from typing import Literal
from typing import Union

from pydantic import BaseModel
from pydantic import Field

from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.placement import Placement


class StreamingType(Enum):
    """Enum defining all streaming packet types. This is the single source of truth for type strings."""

    SECTION_END = "section_end"
    STOP = "stop"
    TOP_LEVEL_BRANCHING = "top_level_branching"
    ERROR = "error"

    MESSAGE_START = "message_start"
    MESSAGE_DELTA = "message_delta"
    SEARCH_TOOL_START = "search_tool_start"
    SEARCH_TOOL_QUERIES_DELTA = "search_tool_queries_delta"
    SEARCH_TOOL_DOCUMENTS_DELTA = "search_tool_documents_delta"
    OPEN_URL_START = "open_url_start"
    OPEN_URL_URLS = "open_url_urls"
    OPEN_URL_DOCUMENTS = "open_url_documents"
    IMAGE_GENERATION_START = "image_generation_start"
    IMAGE_GENERATION_HEARTBEAT = "image_generation_heartbeat"
    IMAGE_GENERATION_FINAL = "image_generation_final"
    PYTHON_TOOL_START = "python_tool_start"
    PYTHON_TOOL_DELTA = "python_tool_delta"
    CUSTOM_TOOL_START = "custom_tool_start"
    CUSTOM_TOOL_ARGS = "custom_tool_args"
    CUSTOM_TOOL_DELTA = "custom_tool_delta"
    FILE_READER_START = "file_reader_start"
    FILE_READER_RESULT = "file_reader_result"
    REASONING_START = "reasoning_start"
    REASONING_DELTA = "reasoning_delta"
    REASONING_DONE = "reasoning_done"
    CITATION_INFO = "citation_info"
    TOOL_CALL_DEBUG = "tool_call_debug"
    TOOL_CALL_ARGUMENT_DELTA = "tool_call_argument_delta"

    MEMORY_TOOL_START = "memory_tool_start"
    MEMORY_TOOL_DELTA = "memory_tool_delta"
    MEMORY_TOOL_NO_ACCESS = "memory_tool_no_access"

    DEEP_RESEARCH_PLAN_START = "deep_research_plan_start"
    DEEP_RESEARCH_PLAN_DELTA = "deep_research_plan_delta"
    RESEARCH_AGENT_START = "research_agent_start"
    INTERMEDIATE_REPORT_START = "intermediate_report_start"
    INTERMEDIATE_REPORT_DELTA = "intermediate_report_delta"
    INTERMEDIATE_REPORT_CITED_DOCS = "intermediate_report_cited_docs"


class BaseObj(BaseModel):
    type: str = ""


################################################
# Control Packets
################################################
# This one isn't strictly necessary, remove in the future
class SectionEnd(BaseObj):
    type: Literal["section_end"] = StreamingType.SECTION_END.value


class OverallStop(BaseObj):
    type: Literal["stop"] = StreamingType.STOP.value
    stop_reason: str | None = None


class TopLevelBranching(BaseObj):
    # This class is used to give advanced heads up to the frontend that the top level flow is branching
    # This is used to avoid having the frontend render the first call then rerendering the other parallel branches
    type: Literal["top_level_branching"] = StreamingType.TOP_LEVEL_BRANCHING.value

    num_parallel_branches: int


class PacketException(BaseObj):
    type: Literal["error"] = StreamingType.ERROR.value

    exception: Exception = Field(exclude=True)
    model_config = {"arbitrary_types_allowed": True}


################################################
# Reasoning Packets
################################################
# Tells the frontend to display the reasoning block
class ReasoningStart(BaseObj):
    type: Literal["reasoning_start"] = StreamingType.REASONING_START.value


# The stream of tokens for the reasoning
class ReasoningDelta(BaseObj):
    type: Literal["reasoning_delta"] = StreamingType.REASONING_DELTA.value

    reasoning: str


class ReasoningDone(BaseObj):
    type: Literal["reasoning_done"] = StreamingType.REASONING_DONE.value


################################################
# Final Agent Response Packets
################################################
# Start of the final answer
class AgentResponseStart(BaseObj):
    type: Literal["message_start"] = StreamingType.MESSAGE_START.value

    final_documents: list[SearchDoc] | None = None
    pre_answer_processing_seconds: float | None = None


# The stream of tokens for the final response
# There is no end packet for this as the stream is over and a final OverallStop packet is emitted
class AgentResponseDelta(BaseObj):
    type: Literal["message_delta"] = StreamingType.MESSAGE_DELTA.value

    content: str


# Citation info for the sidebar and inline citations
class CitationInfo(BaseObj):
    type: Literal["citation_info"] = StreamingType.CITATION_INFO.value

    # The numerical number of the citation as provided by the LLM
    citation_number: int
    # The document id of the SearchDoc (same as the field stored in the DB)
    # This is the actual document id from the connector, not the int id
    document_id: str


class ToolCallDebug(BaseObj):
    type: Literal["tool_call_debug"] = StreamingType.TOOL_CALL_DEBUG.value

    tool_call_id: str
    tool_name: str
    tool_args: dict[str, Any]


################################################
# Tool Packets
################################################
# Search tool is called and the UI block needs to start
class SearchToolStart(BaseObj):
    type: Literal["search_tool_start"] = StreamingType.SEARCH_TOOL_START.value

    is_internet_search: bool = False


# Queries coming through as the LLM determines what to search
# Mostly for query expansions and advanced search strategies
class SearchToolQueriesDelta(BaseObj):
    type: Literal["search_tool_queries_delta"] = (
        StreamingType.SEARCH_TOOL_QUERIES_DELTA.value
    )

    queries: list[str]


# Documents coming through as the system knows what to add to the context
class SearchToolDocumentsDelta(BaseObj):
    type: Literal["search_tool_documents_delta"] = (
        StreamingType.SEARCH_TOOL_DOCUMENTS_DELTA.value
    )

    # This cannot be the SavedSearchDoc as this is yielded by the SearchTool directly
    # which does not save documents to the DB.
    documents: list[SearchDoc]


# OpenURL tool packets - 3-stage sequence
class OpenUrlStart(BaseObj):
    """Signal that OpenURL tool has started."""

    type: Literal["open_url_start"] = StreamingType.OPEN_URL_START.value


class OpenUrlUrls(BaseObj):
    """URLs to be fetched (sent before crawling begins)."""

    type: Literal["open_url_urls"] = StreamingType.OPEN_URL_URLS.value

    urls: list[str]


class OpenUrlDocuments(BaseObj):
    """Final documents after crawling completes."""

    type: Literal["open_url_documents"] = StreamingType.OPEN_URL_DOCUMENTS.value

    documents: list[SearchDoc]


# Image generation starting, needs to allocate a placeholder block for it on the UI
class ImageGenerationToolStart(BaseObj):
    type: Literal["image_generation_start"] = StreamingType.IMAGE_GENERATION_START.value


# Since image generation can take a while
# we send a heartbeat to the frontend to keep the UI/connection alive
class ImageGenerationToolHeartbeat(BaseObj):
    type: Literal["image_generation_heartbeat"] = (
        StreamingType.IMAGE_GENERATION_HEARTBEAT.value
    )


# Represents an image generated by an image generation tool
class GeneratedImage(BaseModel):
    """Represents an image generated by an image generation tool."""

    file_id: str
    url: str
    revised_prompt: str
    shape: str | None = None


# The final generated images all at once at the end of image generation
class ImageGenerationFinal(BaseObj):
    type: Literal["image_generation_final"] = StreamingType.IMAGE_GENERATION_FINAL.value

    images: list[GeneratedImage]


class PythonToolStart(BaseObj):
    type: Literal["python_tool_start"] = StreamingType.PYTHON_TOOL_START.value
    code: str


class PythonToolDelta(BaseObj):
    type: Literal["python_tool_delta"] = StreamingType.PYTHON_TOOL_DELTA.value

    stdout: str = ""
    stderr: str = ""
    file_ids: list[str] = []


# Custom tool being called, first allocate a placeholder block for it on the UI
class CustomToolStart(BaseObj):
    type: Literal["custom_tool_start"] = StreamingType.CUSTOM_TOOL_START.value

    tool_name: str
    tool_id: int | None = None


class CustomToolArgs(BaseObj):
    type: Literal["custom_tool_args"] = StreamingType.CUSTOM_TOOL_ARGS.value

    tool_name: str
    tool_args: dict[str, Any]


class CustomToolErrorInfo(BaseModel):
    is_auth_error: bool = False
    status_code: int
    message: str


# The allowed streamed packets for a custom tool
class CustomToolDelta(BaseObj):
    type: Literal["custom_tool_delta"] = StreamingType.CUSTOM_TOOL_DELTA.value

    tool_name: str
    tool_id: int | None = None
    response_type: str
    # For non-file responses
    data: dict | list | str | int | float | bool | None = None
    # For file-based responses like image/csv
    file_ids: list[str] | None = None
    error: CustomToolErrorInfo | None = None


class ToolCallArgumentDelta(BaseObj):
    type: Literal["tool_call_argument_delta"] = (
        StreamingType.TOOL_CALL_ARGUMENT_DELTA.value
    )

    tool_type: str
    argument_deltas: dict[str, Any]


################################################
# File Reader Packets
################################################
class FileReaderStart(BaseObj):
    type: Literal["file_reader_start"] = StreamingType.FILE_READER_START.value


class FileReaderResult(BaseObj):
    type: Literal["file_reader_result"] = StreamingType.FILE_READER_RESULT.value

    file_name: str
    file_id: str
    start_char: int
    end_char: int
    total_chars: int
    # Short previews of the retrieved text for the collapsed/expanded UI
    preview_start: str = ""
    preview_end: str = ""


# Memory Tool Packets
################################################
class MemoryToolStart(BaseObj):
    type: Literal["memory_tool_start"] = StreamingType.MEMORY_TOOL_START.value


class MemoryToolDelta(BaseObj):
    type: Literal["memory_tool_delta"] = StreamingType.MEMORY_TOOL_DELTA.value

    memory_text: str
    operation: Literal["add", "update"]
    memory_id: int | None = None
    index: int | None = None


class MemoryToolNoAccess(BaseObj):
    type: Literal["memory_tool_no_access"] = StreamingType.MEMORY_TOOL_NO_ACCESS.value


################################################
# Deep Research Packets
################################################
class DeepResearchPlanStart(BaseObj):
    type: Literal["deep_research_plan_start"] = (
        StreamingType.DEEP_RESEARCH_PLAN_START.value
    )


class DeepResearchPlanDelta(BaseObj):
    type: Literal["deep_research_plan_delta"] = (
        StreamingType.DEEP_RESEARCH_PLAN_DELTA.value
    )

    content: str


class ResearchAgentStart(BaseObj):
    type: Literal["research_agent_start"] = StreamingType.RESEARCH_AGENT_START.value
    research_task: str


class IntermediateReportStart(BaseObj):
    type: Literal["intermediate_report_start"] = (
        StreamingType.INTERMEDIATE_REPORT_START.value
    )


class IntermediateReportDelta(BaseObj):
    type: Literal["intermediate_report_delta"] = (
        StreamingType.INTERMEDIATE_REPORT_DELTA.value
    )
    content: str


class IntermediateReportCitedDocs(BaseObj):
    type: Literal["intermediate_report_cited_docs"] = (
        StreamingType.INTERMEDIATE_REPORT_CITED_DOCS.value
    )
    cited_docs: list[SearchDoc] | None = None


################################################
# Packet Object
################################################
# Discriminated union of all possible packet object types
PacketObj = Union[
    # Control Packets
    OverallStop,
    SectionEnd,
    TopLevelBranching,
    PacketException,
    # Agent Response Packets
    AgentResponseStart,
    AgentResponseDelta,
    # Tool Packets
    SearchToolStart,
    SearchToolQueriesDelta,
    SearchToolDocumentsDelta,
    ImageGenerationToolStart,
    ImageGenerationToolHeartbeat,
    ImageGenerationFinal,
    OpenUrlStart,
    OpenUrlUrls,
    OpenUrlDocuments,
    PythonToolStart,
    PythonToolDelta,
    CustomToolStart,
    CustomToolArgs,
    CustomToolDelta,
    FileReaderStart,
    FileReaderResult,
    MemoryToolStart,
    MemoryToolDelta,
    MemoryToolNoAccess,
    # Reasoning Packets
    ReasoningStart,
    ReasoningDelta,
    ReasoningDone,
    # Citation Packets
    CitationInfo,
    ToolCallDebug,
    ToolCallArgumentDelta,
    # Deep Research Packets
    DeepResearchPlanStart,
    DeepResearchPlanDelta,
    ResearchAgentStart,
    IntermediateReportStart,
    IntermediateReportDelta,
    IntermediateReportCitedDocs,
]


class Packet(BaseModel):
    placement: Placement

    obj: Annotated[PacketObj, Field(discriminator="type")]


================================================
FILE: backend/onyx/server/query_and_chat/token_limit.py
================================================
from collections.abc import Sequence
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from functools import lru_cache

from dateutil import tz
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.auth.users import current_chat_accessible_user
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession
from onyx.db.models import TokenRateLimit
from onyx.db.models import User
from onyx.db.token_limit import fetch_all_global_token_rate_limits
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation


logger = setup_logger()


TOKEN_BUDGET_UNIT = 1_000


def check_token_rate_limits(
    user: User = Depends(current_chat_accessible_user),
) -> None:
    # short circuit if no rate limits are set up
    # NOTE: result of `any_rate_limit_exists` is cached, so this call is fast 99% of the time
    if not any_rate_limit_exists():
        return

    versioned_rate_limit_strategy = fetch_versioned_implementation(
        "onyx.server.query_and_chat.token_limit", _check_token_rate_limits.__name__
    )
    return versioned_rate_limit_strategy(user)


def _check_token_rate_limits(_: User) -> None:
    _user_is_rate_limited_by_global()


"""
Global rate limits
"""


def _user_is_rate_limited_by_global() -> None:
    with get_session_with_current_tenant() as db_session:
        global_rate_limits = fetch_all_global_token_rate_limits(
            db_session=db_session, enabled_only=True, ordered=False
        )

        if global_rate_limits:
            global_cutoff_time = _get_cutoff_time(global_rate_limits)
            global_usage = _fetch_global_usage(global_cutoff_time, db_session)

            if _is_rate_limited(global_rate_limits, global_usage):
                raise HTTPException(
                    status_code=429,
                    detail="Token budget exceeded for organization. Try again later.",
                )


def _fetch_global_usage(
    cutoff_time: datetime, db_session: Session
) -> Sequence[tuple[datetime, int]]:
    """
    Fetch global token usage within the cutoff time, grouped by minute
    """
    result = db_session.execute(
        select(
            func.date_trunc("minute", ChatMessage.time_sent),
            func.sum(ChatMessage.token_count),
        )
        .join(ChatSession, ChatMessage.chat_session_id == ChatSession.id)
        .filter(
            ChatMessage.time_sent >= cutoff_time,
        )
        .group_by(func.date_trunc("minute", ChatMessage.time_sent))
    ).all()

    return [(row[0], row[1]) for row in result]


"""
Common functions
"""


def _get_cutoff_time(rate_limits: Sequence[TokenRateLimit]) -> datetime:
    max_period_hours = max(rate_limit.period_hours for rate_limit in rate_limits)
    return datetime.now(tz=timezone.utc) - timedelta(hours=max_period_hours)


def _is_rate_limited(
    rate_limits: Sequence[TokenRateLimit], usage: Sequence[tuple[datetime, int]]
) -> bool:
    """
    If at least one rate limit is exceeded, return True
    """
    for rate_limit in rate_limits:
        tokens_used = sum(
            u_token_count
            for u_date, u_token_count in usage
            if u_date
            >= datetime.now(tz=tz.UTC) - timedelta(hours=rate_limit.period_hours)
        )

        if tokens_used >= rate_limit.token_budget * TOKEN_BUDGET_UNIT:
            return True

    return False


@lru_cache()
def any_rate_limit_exists() -> bool:
    """Checks if any rate limit exists in the database. Is cached, so that if no rate limits
    are setup, we don't have any effect on average query latency."""
    logger.debug("Checking for any rate limits...")
    with get_session_with_current_tenant() as db_session:
        return (
            db_session.scalar(
                select(TokenRateLimit.id).where(
                    TokenRateLimit.enabled == True  # noqa: E712
                )
            )
            is not None
        )


================================================
FILE: backend/onyx/server/runtime/onyx_runtime.py
================================================
import io
from typing import cast

from PIL import Image

from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
from onyx.background.celery.tasks.beat_schedule import (
    CLOUD_DOC_PERMISSION_SYNC_MULTIPLIER_DEFAULT,
)
from onyx.configs.constants import CLOUD_BUILD_FENCE_LOOKUP_TABLE_INTERVAL_DEFAULT
from onyx.configs.constants import ONYX_CLOUD_REDIS_RUNTIME
from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
from onyx.configs.constants import ONYX_EMAILABLE_LOGO_MAX_DIM
from onyx.file_store.file_store import get_default_file_store
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.utils.file import FileWithMimeType
from onyx.utils.file import OnyxStaticFileManager
from onyx.utils.variable_functionality import (
    fetch_ee_implementation_or_noop,
)


class OnyxRuntime:
    """Used by the application to get the final runtime value of a setting.

    Rationale: Settings and overrides may be persisted in multiple places, including the
    DB, Redis, env vars, and default constants, etc. The logic to present a final
    setting to the application should be centralized and in one place.

    Example: To get the logo for the application, one must check the DB for an override,
    use the override if present, fall back to the filesystem if not present, and worry
    about enterprise or not enterprise.
    """

    @staticmethod
    def _get_with_static_fallback(
        db_filename: str | None, static_filename: str
    ) -> FileWithMimeType:
        onyx_file: FileWithMimeType | None = None

        if db_filename:
            file_store = get_default_file_store()
            onyx_file = file_store.get_file_with_mime_type(db_filename)

        if not onyx_file:
            onyx_file = OnyxStaticFileManager.get_static(static_filename)

        if not onyx_file:
            raise RuntimeError(
                f"Resource not found: db={db_filename} static={static_filename}"
            )

        return onyx_file

    @staticmethod
    def get_logo() -> FileWithMimeType:
        STATIC_FILENAME = "static/images/logo.png"

        db_filename: str | None = fetch_ee_implementation_or_noop(
            "onyx.server.enterprise_settings.store", "get_logo_filename", None
        )

        return OnyxRuntime._get_with_static_fallback(db_filename, STATIC_FILENAME)

    @staticmethod
    def get_emailable_logo() -> FileWithMimeType:
        onyx_file = OnyxRuntime.get_logo()

        # check dimensions and resize downwards if necessary or if not PNG
        image = Image.open(io.BytesIO(onyx_file.data))
        if (
            image.size[0] > ONYX_EMAILABLE_LOGO_MAX_DIM
            or image.size[1] > ONYX_EMAILABLE_LOGO_MAX_DIM
            or image.format != "PNG"
        ):
            image.thumbnail(
                (ONYX_EMAILABLE_LOGO_MAX_DIM, ONYX_EMAILABLE_LOGO_MAX_DIM),
                Image.LANCZOS,
            )  # maintains aspect ratio
            output_buffer = io.BytesIO()
            image.save(output_buffer, format="PNG")
            onyx_file = FileWithMimeType(
                data=output_buffer.getvalue(), mime_type="image/png"
            )

        return onyx_file

    @staticmethod
    def get_logotype() -> FileWithMimeType:
        STATIC_FILENAME = "static/images/logotype.png"

        db_filename: str | None = fetch_ee_implementation_or_noop(
            "onyx.server.enterprise_settings.store", "get_logotype_filename", None
        )

        return OnyxRuntime._get_with_static_fallback(db_filename, STATIC_FILENAME)

    @staticmethod
    def get_beat_multiplier() -> float:
        """the beat multiplier is used to scale up or down the frequency of certain beat
        tasks in the cloud. It has a significant effect on load and is useful to adjust
        in real time."""

        beat_multiplier: float = CLOUD_BEAT_MULTIPLIER_DEFAULT

        r = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)

        beat_multiplier_raw = r.get(f"{ONYX_CLOUD_REDIS_RUNTIME}:beat_multiplier")
        if beat_multiplier_raw is not None:
            try:
                beat_multiplier_bytes = cast(bytes, beat_multiplier_raw)
                beat_multiplier = float(beat_multiplier_bytes.decode())
            except ValueError:
                pass

        if beat_multiplier <= 0.0:
            return 1.0

        return beat_multiplier

    @staticmethod
    def get_doc_permission_sync_multiplier() -> float:
        """Permission syncs are a significant source of load / queueing in the cloud."""

        value: float = CLOUD_DOC_PERMISSION_SYNC_MULTIPLIER_DEFAULT

        r = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)

        value_raw = r.get(f"{ONYX_CLOUD_REDIS_RUNTIME}:doc_permission_sync_multiplier")
        if value_raw is not None:
            try:
                value_bytes = cast(bytes, value_raw)
                value = float(value_bytes.decode())
            except ValueError:
                pass

        if value <= 0.0:
            return 1.0

        return value

    @staticmethod
    def get_build_fence_lookup_table_interval() -> int:
        """We maintain an active fence table to make lookups of existing fences efficient.
        However, reconstructing the table is expensive, so adjusting it in realtime is useful.
        """

        interval: int = CLOUD_BUILD_FENCE_LOOKUP_TABLE_INTERVAL_DEFAULT

        r = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)

        interval_raw = r.get(
            f"{ONYX_CLOUD_REDIS_RUNTIME}:build_fence_lookup_table_interval"
        )
        if interval_raw is not None:
            try:
                interval_bytes = cast(bytes, interval_raw)
                interval = int(interval_bytes.decode())
            except ValueError:
                pass

        if interval <= 0.0:
            return CLOUD_BUILD_FENCE_LOOKUP_TABLE_INTERVAL_DEFAULT

        return interval


================================================
FILE: backend/onyx/server/saml.py
================================================
import contextlib
import secrets
import string
import uuid
from typing import Any
from urllib.parse import urlparse

from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from fastapi import Response
from fastapi import status
from fastapi_users import exceptions
from fastapi_users.authentication import Strategy
from onelogin.saml2.auth import OneLogin_Saml2_Auth  # type: ignore
from pydantic import BaseModel

from onyx.auth.schemas import UserCreate
from onyx.auth.schemas import UserRole
from onyx.auth.users import auth_backend
from onyx.auth.users import fastapi_users
from onyx.auth.users import get_user_manager
from onyx.auth.users import UserManager
from onyx.configs.app_configs import REQUIRE_EMAIL_VERIFICATION
from onyx.configs.app_configs import SAML_CONF_DIR
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.db.auth import get_user_count
from onyx.db.auth import get_user_db
from onyx.db.engine.async_sql_engine import get_async_session_context_manager
from onyx.db.models import User
from onyx.utils.logger import setup_logger


logger = setup_logger()
router = APIRouter(prefix="/auth/saml")

# Azure AD / Entra ID often returns the email attribute under different keys.
# Keep a list of common variations so we can fall back gracefully if the IdP
# does not send the plain "email" attribute name.
EMAIL_ATTRIBUTE_KEYS = {
    "email",
    "emailaddress",
    "mail",
    "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/emailaddress",
    "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/mail",
    "http://schemas.microsoft.com/identity/claims/emailaddress",
}
EMAIL_ATTRIBUTE_KEYS_LOWER = {key.lower() for key in EMAIL_ATTRIBUTE_KEYS}


async def upsert_saml_user(email: str) -> User:
    """
    Creates or updates a user account for SAML authentication.

    For new users or users with non-web-login roles:
    1. Generates a secure random password that meets validation criteria
    2. Creates the user with appropriate role and verified status

    SAML users never use this password directly as they authenticate via their
    Identity Provider, but we need a valid password to satisfy system requirements.
    """
    logger.debug(f"Attempting to upsert SAML user with email: {email}")
    get_user_db_context = contextlib.asynccontextmanager(get_user_db)
    get_user_manager_context = contextlib.asynccontextmanager(get_user_manager)

    async with get_async_session_context_manager() as session:
        async with get_user_db_context(session) as user_db:
            async with get_user_manager_context(user_db) as user_manager:
                try:
                    user = await user_manager.get_by_email(email)
                    # If user has a non-authenticated role, treat as non-existent
                    if not user.account_type.is_web_login():
                        raise exceptions.UserNotExists()
                    return user
                except exceptions.UserNotExists:
                    logger.info("Creating user from SAML login")

                user_count = await get_user_count()
                role = UserRole.ADMIN if user_count == 0 else UserRole.BASIC

                # Generate a secure random password meeting validation requirements
                # We use a secure random password since we never need to know what it is
                # (SAML users authenticate via their IdP)
                secure_random_password = "".join(
                    [
                        # Ensure minimum requirements are met
                        secrets.choice(
                            string.ascii_uppercase
                        ),  # at least one uppercase
                        secrets.choice(
                            string.ascii_lowercase
                        ),  # at least one lowercase
                        secrets.choice(string.digits),  # at least one digit
                        secrets.choice(
                            "!@#$%^&*()-_=+[]{}|;:,.<>?"
                        ),  # at least one special
                        # Fill remaining length with random chars (mix of all types)
                        "".join(
                            secrets.choice(
                                string.ascii_letters
                                + string.digits
                                + "!@#$%^&*()-_=+[]{}|;:,.<>?"
                            )
                            for _ in range(12)
                        ),
                    ]
                )

                # Create the user with SAML-appropriate settings
                user = await user_manager.create(
                    UserCreate(
                        email=email,
                        password=secure_random_password,  # Pass raw password, not hash
                        role=role,
                        is_verified=True,  # SAML users are pre-verified by their IdP
                    )
                )

                return user


async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:
    if request.client is None:
        raise ValueError("Invalid request for SAML")

    # Derive http_host and server_port from WEB_DOMAIN (a trusted env var)
    # instead of X-Forwarded-* headers, which can be spoofed by an attacker
    # to poison SAML redirect URLs (host header poisoning).
    parsed_domain = urlparse(WEB_DOMAIN)
    http_host = parsed_domain.hostname or request.client.host
    server_port = parsed_domain.port or (443 if parsed_domain.scheme == "https" else 80)

    rv: dict[str, Any] = {
        "http_host": http_host,
        "server_port": server_port,
        "script_name": request.url.path,
        "post_data": {},
        "get_data": {},
    }

    # Handle query parameters (for GET requests)
    if request.query_params:
        rv["get_data"] = dict(request.query_params)

    # Handle form data (for POST requests)
    if request.method == "POST":
        form_data = await request.form()
        if "SAMLResponse" in form_data:
            SAMLResponse = form_data["SAMLResponse"]
            rv["post_data"]["SAMLResponse"] = SAMLResponse
        if "RelayState" in form_data:
            RelayState = form_data["RelayState"]
            rv["post_data"]["RelayState"] = RelayState
    else:
        # For GET requests, check if SAMLResponse is in query params
        if "SAMLResponse" in request.query_params:
            rv["get_data"]["SAMLResponse"] = request.query_params["SAMLResponse"]
        if "RelayState" in request.query_params:
            rv["get_data"]["RelayState"] = request.query_params["RelayState"]

    return rv


class SAMLAuthorizeResponse(BaseModel):
    authorization_url: str


def _sanitize_relay_state(candidate: str | None) -> str | None:
    """Ensure the relay state is an internal path to avoid open redirects."""
    if not candidate:
        return None

    relay_state = candidate.strip()
    if not relay_state or not relay_state.startswith("/"):
        return None

    if "\\" in relay_state:
        return None

    # Reject colon before query/fragment to match frontend validation
    path_portion = relay_state.split("?", 1)[0].split("#", 1)[0]
    if ":" in path_portion:
        return None

    parsed = urlparse(relay_state)
    if parsed.scheme or parsed.netloc:
        return None

    return relay_state


@router.get("/authorize")
async def saml_login(request: Request) -> SAMLAuthorizeResponse:
    req = await prepare_from_fastapi_request(request)
    auth = OneLogin_Saml2_Auth(req, custom_base_path=SAML_CONF_DIR)
    return_to = _sanitize_relay_state(request.query_params.get("next"))
    callback_url = auth.login(return_to=return_to)
    return SAMLAuthorizeResponse(authorization_url=callback_url)


@router.get("/callback")
async def saml_login_callback_get(
    request: Request,
    strategy: Strategy[User, uuid.UUID] = Depends(auth_backend.get_strategy),
    user_manager: UserManager = Depends(get_user_manager),
) -> Response:
    """Handle SAML callback via HTTP-Redirect binding (GET request)"""
    return await _process_saml_callback(request, strategy, user_manager)


@router.post("/callback")
async def saml_login_callback(
    request: Request,
    strategy: Strategy[User, uuid.UUID] = Depends(auth_backend.get_strategy),
    user_manager: UserManager = Depends(get_user_manager),
) -> Response:
    """Handle SAML callback via HTTP-POST binding (POST request)"""
    return await _process_saml_callback(request, strategy, user_manager)


async def _process_saml_callback(
    request: Request,
    strategy: Strategy[User, uuid.UUID],
    user_manager: UserManager,
) -> Response:
    req = await prepare_from_fastapi_request(request)
    auth = OneLogin_Saml2_Auth(req, custom_base_path=SAML_CONF_DIR)
    auth.process_response()
    errors = auth.get_errors()
    if len(errors) != 0:
        logger.error(
            "Error when processing SAML Response: %s %s"
            % (", ".join(errors), auth.get_last_error_reason())
        )
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Access denied. Failed to parse SAML Response.",
        )

    if not auth.is_authenticated():
        detail = "Access denied. User was not authenticated"
        logger.error(detail)
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail=detail,
        )

    user_email: str | None = None

    # The OneLogin toolkit normalizes attribute keys, but still performs a
    # case-sensitive lookup. Try the common keys first and then fall back to a
    # case-insensitive scan of all returned attributes.
    for attribute_key in EMAIL_ATTRIBUTE_KEYS:
        attribute_values = auth.get_attribute(attribute_key)
        if attribute_values:
            user_email = attribute_values[0]
            break

    if not user_email:
        # Fallback: perform a case-insensitive lookup across all attributes in
        # case the IdP sent the email claim with a different capitalization.
        attributes = auth.get_attributes()
        for key, values in attributes.items():
            if key.lower() in EMAIL_ATTRIBUTE_KEYS_LOWER:
                if values:
                    user_email = values[0]
                    break
        if not user_email:
            detail = "SAML is not set up correctly, email attribute must be provided."
            logger.error(detail)
            logger.debug(
                "Received SAML attributes without email: %s",
                list(attributes.keys()),
            )
            raise HTTPException(
                status_code=status.HTTP_403_FORBIDDEN,
                detail=detail,
            )

    user = await upsert_saml_user(email=user_email)

    response = await auth_backend.login(strategy, user)
    await user_manager.on_after_login(user, request, response)
    return response


@router.post("/logout")
async def saml_logout(
    user_token: tuple[User, str] = Depends(
        fastapi_users.authenticator.current_user_token(
            active=True, verified=REQUIRE_EMAIL_VERIFICATION
        )
    ),
    strategy: Strategy[User, uuid.UUID] = Depends(auth_backend.get_strategy),
) -> Response:
    user, token = user_token
    return await auth_backend.logout(strategy, user, token)


================================================
FILE: backend/onyx/server/settings/api.py
================================================
from typing import cast

from fastapi import APIRouter
from fastapi import Depends
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session

from onyx import __version__ as onyx_version
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.auth.users import is_user_admin
from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB
from onyx.configs.constants import KV_REINDEX_KEY
from onyx.configs.constants import NotificationType
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.notification import dismiss_all_notifications
from onyx.db.notification import get_notifications
from onyx.db.notification import update_notification_last_shown
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.server.features.build.utils import is_onyx_craft_enabled
from onyx.server.settings.models import (
    DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,
)
from onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
from onyx.server.settings.models import Notification
from onyx.server.settings.models import Settings
from onyx.server.settings.models import UserSettings
from onyx.server.settings.store import load_settings
from onyx.server.settings.store import store_settings
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)
from shared_configs.configs import MULTI_TENANT

logger = setup_logger()

admin_router = APIRouter(prefix="/admin/settings")
basic_router = APIRouter(prefix="/settings")


@admin_router.put("")
def admin_put_settings(
    settings: Settings, _: User = Depends(current_admin_user)
) -> None:
    if (
        settings.user_file_max_upload_size_mb is not None
        and settings.user_file_max_upload_size_mb > 0
        and settings.user_file_max_upload_size_mb > MAX_ALLOWED_UPLOAD_SIZE_MB
    ):
        raise OnyxError(
            OnyxErrorCode.INVALID_INPUT,
            f"File upload size limit cannot exceed {MAX_ALLOWED_UPLOAD_SIZE_MB} MB",
        )
    store_settings(settings)


def apply_license_status_to_settings(settings: Settings) -> Settings:
    """MIT version: no-op, returns settings unchanged."""
    return settings


@basic_router.get("")
def fetch_settings(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
) -> UserSettings:
    """Settings and notifications are stuffed into this single endpoint to reduce number of
    Postgres calls"""
    general_settings = load_settings()
    settings_notifications = get_settings_notifications(user, db_session)

    try:
        kv_store = get_kv_store()
        needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))
    except KvKeyNotFoundError:
        needs_reindexing = False

    apply_fn = fetch_versioned_implementation_with_fallback(
        "onyx.server.settings.api",
        "apply_license_status_to_settings",
        apply_license_status_to_settings,
    )
    general_settings = apply_fn(general_settings)

    # Check if Onyx Craft is enabled for this user (used for server-side redirects)
    onyx_craft_enabled_for_user = is_onyx_craft_enabled(user) if user else False

    return UserSettings(
        **general_settings.model_dump(),
        notifications=settings_notifications,
        needs_reindexing=needs_reindexing,
        onyx_craft_enabled=onyx_craft_enabled_for_user,
        vector_db_enabled=not DISABLE_VECTOR_DB,
        hooks_enabled=not MULTI_TENANT,
        version=onyx_version,
        max_allowed_upload_size_mb=MAX_ALLOWED_UPLOAD_SIZE_MB,
        default_user_file_max_upload_size_mb=min(
            DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB,
            MAX_ALLOWED_UPLOAD_SIZE_MB,
        ),
        default_file_token_count_threshold_k=(
            DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
            if DISABLE_VECTOR_DB
            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
        ),
    )


def get_settings_notifications(user: User, db_session: Session) -> list[Notification]:
    """Get notifications for settings page, including product gating and reindex notifications"""
    # Check for product gating notification
    product_notif = get_notifications(
        user=None,
        notif_type=NotificationType.TRIAL_ENDS_TWO_DAYS,
        db_session=db_session,
    )
    notifications = [Notification.from_model(product_notif[0])] if product_notif else []

    # Only show reindex notifications to admins
    if not is_user_admin(user):
        return notifications

    # Check if reindexing is needed
    kv_store = get_kv_store()
    try:
        needs_index = cast(bool, kv_store.load(KV_REINDEX_KEY))
        if not needs_index:
            dismiss_all_notifications(
                notif_type=NotificationType.REINDEX, db_session=db_session
            )
            return notifications
    except KvKeyNotFoundError:
        # If something goes wrong and the flag is gone, better to not start a reindexing
        # it's a heavyweight long running job and maybe this flag is cleaned up later
        logger.warning("Could not find reindex flag")
        return notifications

    try:
        # Need a transaction in order to prevent under-counting current notifications
        reindex_notifs = get_notifications(
            user=user, notif_type=NotificationType.REINDEX, db_session=db_session
        )

        if len(reindex_notifs) > 1:
            logger.error("User has multiple reindex notifications")
        elif not reindex_notifs:
            return notifications

        reindex_notif = reindex_notifs[0]
        update_notification_last_shown(
            notification=reindex_notif, db_session=db_session
        )

        db_session.commit()
        notifications.append(Notification.from_model(reindex_notif))
        return notifications
    except SQLAlchemyError:
        logger.exception("Error while processing notifications")
        db_session.rollback()
        return notifications


================================================
FILE: backend/onyx/server/settings/models.py
================================================
from datetime import datetime
from enum import Enum

from pydantic import BaseModel
from pydantic import Field

from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB
from onyx.configs.constants import NotificationType
from onyx.configs.constants import QueryHistoryType
from onyx.db.models import Notification as NotificationDBModel
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB = 200
DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB = 10000


class PageType(str, Enum):
    CHAT = "chat"
    SEARCH = "search"


class ApplicationStatus(str, Enum):
    ACTIVE = "active"
    PAYMENT_REMINDER = "payment_reminder"
    GRACE_PERIOD = "grace_period"
    GATED_ACCESS = "gated_access"
    SEAT_LIMIT_EXCEEDED = "seat_limit_exceeded"


class Notification(BaseModel):
    id: int
    notif_type: NotificationType
    dismissed: bool
    last_shown: datetime
    first_shown: datetime
    title: str
    description: str | None = None
    additional_data: dict | None = None

    @classmethod
    def from_model(cls, notif: NotificationDBModel) -> "Notification":
        return cls(
            id=notif.id,
            notif_type=notif.notif_type,
            dismissed=notif.dismissed,
            last_shown=notif.last_shown,
            first_shown=notif.first_shown,
            title=notif.title,
            description=notif.description,
            additional_data=notif.additional_data,
        )


class Settings(BaseModel):
    """General settings"""

    # is float to allow for fractional days for easier automated testing
    maximum_chat_retention_days: float | None = None
    company_name: str | None = None
    company_description: str | None = None
    gpu_enabled: bool | None = None
    application_status: ApplicationStatus = ApplicationStatus.ACTIVE
    anonymous_user_enabled: bool | None = None
    invite_only_enabled: bool = False
    deep_research_enabled: bool | None = None
    search_ui_enabled: bool | None = None

    # Whether EE features are unlocked for use.
    # Depends on license status: True when the user has a valid license
    # (ACTIVE, GRACE_PERIOD, PAYMENT_REMINDER), False when there's no license
    # or the license is expired (GATED_ACCESS).
    # This controls UI visibility of EE features (user groups, analytics, RBAC, etc.).
    ee_features_enabled: bool = False

    temperature_override_enabled: bool | None = False
    auto_scroll: bool | None = False
    query_history_type: QueryHistoryType | None = None

    # Image processing settings
    image_extraction_and_analysis_enabled: bool | None = False
    search_time_image_analysis_enabled: bool | None = False
    image_analysis_max_size_mb: int | None = 20

    # User Knowledge settings
    user_knowledge_enabled: bool | None = True
    user_file_max_upload_size_mb: int | None = Field(
        default=DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB, ge=0
    )
    file_token_count_threshold_k: int | None = Field(
        default=None, ge=0  # thousands of tokens; None = context-aware default
    )

    # Connector settings
    show_extra_connectors: bool | None = True

    # Default Assistant settings
    disable_default_assistant: bool | None = False

    # Seat usage - populated by license enforcement when seat limit is exceeded
    seat_count: int | None = None
    used_seats: int | None = None

    # OpenSearch migration
    opensearch_indexing_enabled: bool = False


class UserSettings(Settings):
    notifications: list[Notification]
    needs_reindexing: bool
    tenant_id: str = POSTGRES_DEFAULT_SCHEMA
    # Feature flag for Onyx Craft (Build Mode) - used for server-side redirects
    onyx_craft_enabled: bool = False
    # True when a vector database (Vespa/OpenSearch) is available.
    # False when DISABLE_VECTOR_DB is set — connectors, RAG search, and
    # document sets are unavailable.
    vector_db_enabled: bool = True
    # True when hooks are available: single-tenant EE deployments only.
    hooks_enabled: bool = False
    # Application version, read from the ONYX_VERSION env var at startup.
    version: str | None = None
    # Hard ceiling for user_file_max_upload_size_mb, derived from env var.
    max_allowed_upload_size_mb: int = MAX_ALLOWED_UPLOAD_SIZE_MB
    # Factory defaults so the frontend can show a "restore default" button.
    default_user_file_max_upload_size_mb: int = DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
    default_file_token_count_threshold_k: int = Field(
        default_factory=lambda: (
            DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
            if DISABLE_VECTOR_DB
            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
        )
    )


================================================
FILE: backend/onyx/server/settings/store.py
================================================
from onyx.cache.factory import get_cache_backend
from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.configs.app_configs import DISABLE_USER_KNOWLEDGE
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB
from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
from onyx.configs.app_configs import SHOW_EXTRA_CONNECTORS
from onyx.configs.constants import KV_SETTINGS_KEY
from onyx.configs.constants import OnyxRedisLocks
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.server.settings.models import (
    DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,
)
from onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
from onyx.server.settings.models import Settings
from onyx.utils.logger import setup_logger

logger = setup_logger()

# TTL for settings keys - 30 days
SETTINGS_TTL = 30 * 24 * 60 * 60


def load_settings() -> Settings:
    kv_store = get_kv_store()
    try:
        stored_settings = kv_store.load(KV_SETTINGS_KEY)
        settings = (
            Settings.model_validate(stored_settings) if stored_settings else Settings()
        )
    except KvKeyNotFoundError:
        # Default to empty settings if no settings have been set yet
        logger.debug(f"No settings found in KV store for key: {KV_SETTINGS_KEY}")
        settings = Settings()
    except Exception as e:
        logger.error(f"Error loading settings from KV store: {str(e)}")
        settings = Settings()

    cache = get_cache_backend()

    try:
        value = cache.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)
        if value is not None:
            anonymous_user_enabled = int(value.decode("utf-8")) == 1
        else:
            anonymous_user_enabled = False
            cache.set(OnyxRedisLocks.ANONYMOUS_USER_ENABLED, "0", ex=SETTINGS_TTL)
    except Exception as e:
        logger.error(f"Error loading anonymous user setting from cache: {str(e)}")
        anonymous_user_enabled = False

    settings.anonymous_user_enabled = anonymous_user_enabled
    settings.query_history_type = ONYX_QUERY_HISTORY_TYPE

    if DISABLE_USER_KNOWLEDGE:
        settings.user_knowledge_enabled = False

    settings.show_extra_connectors = SHOW_EXTRA_CONNECTORS
    settings.opensearch_indexing_enabled = ENABLE_OPENSEARCH_INDEXING_FOR_ONYX

    # Resolve context-aware defaults for token threshold.
    # None = admin hasn't set a value yet → use context-aware default.
    # 0 = admin explicitly chose "no limit" → preserve as-is.
    if settings.file_token_count_threshold_k is None:
        settings.file_token_count_threshold_k = (
            DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
            if DISABLE_VECTOR_DB
            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
        )

    # Upload size: 0 and None are treated as "unset" (not "no limit") →
    # fall back to min(configured default, hard ceiling).
    if not settings.user_file_max_upload_size_mb:
        settings.user_file_max_upload_size_mb = min(
            DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB,
            MAX_ALLOWED_UPLOAD_SIZE_MB,
        )

    # Clamp to env ceiling so stale KV values are capped even if the
    # operator lowered MAX_ALLOWED_UPLOAD_SIZE_MB after a higher value
    # was already saved (api.py only guards new writes).
    if (
        settings.user_file_max_upload_size_mb > 0
        and settings.user_file_max_upload_size_mb > MAX_ALLOWED_UPLOAD_SIZE_MB
    ):
        settings.user_file_max_upload_size_mb = MAX_ALLOWED_UPLOAD_SIZE_MB

    return settings


def store_settings(settings: Settings) -> None:
    cache = get_cache_backend()

    if settings.anonymous_user_enabled is not None:
        cache.set(
            OnyxRedisLocks.ANONYMOUS_USER_ENABLED,
            "1" if settings.anonymous_user_enabled else "0",
            ex=SETTINGS_TTL,
        )

    get_kv_store().store(KV_SETTINGS_KEY, settings.model_dump())


================================================
FILE: backend/onyx/server/tenant_usage_limits.py
================================================
"""
Non-EE version of tenant usage limit overrides.

In non-EE deployments, there are no tenant-specific overrides - all tenants
use the default limits from environment variables.

The EE version (ee.onyx.server.tenant_usage_limits) fetches per-tenant
overrides from the control plane.
"""

from enum import Enum

from pydantic import BaseModel


# NOTE: this must be updated along with the BaseModel below
class TenantUsageLimitKeys(str, Enum):
    LLM_COST_CENTS_TRIAL = "llm_cost_cents_trial"
    LLM_COST_CENTS_PAID = "llm_cost_cents_paid"
    CHUNKS_INDEXED_TRIAL = "chunks_indexed_trial"
    CHUNKS_INDEXED_PAID = "chunks_indexed_paid"
    API_CALLS_TRIAL = "api_calls_trial"
    API_CALLS_PAID = "api_calls_paid"
    NON_STREAMING_CALLS_TRIAL = "non_streaming_calls_trial"
    NON_STREAMING_CALLS_PAID = "non_streaming_calls_paid"


class TenantUsageLimitOverrides(BaseModel):
    """Usage limit overrides for a specific tenant.

    Field behavior:
    - Field not present or set to null: Use the default env var value
    - Field set to -1: No limit (unlimited)
    - Field set to a positive integer: Use that specific limit
    """

    tenant_id: str | None = None

    llm_cost_cents_trial: int | None = None
    llm_cost_cents_paid: int | None = None
    chunks_indexed_trial: int | None = None
    chunks_indexed_paid: int | None = None
    api_calls_trial: int | None = None
    api_calls_paid: int | None = None
    non_streaming_calls_trial: int | None = None
    non_streaming_calls_paid: int | None = None


def get_tenant_usage_limit_overrides(
    tenant_id: str,  # noqa: ARG001
) -> TenantUsageLimitOverrides | None:
    """
    Get the usage limit overrides for a specific tenant.

    Non-EE version always returns None (no overrides available).
    The EE version fetches tenant-specific overrides from the control plane.

    Args:
        tenant_id: The tenant ID to look up

    Returns:
        None - no overrides in non-EE deployments
    """
    return None


def load_usage_limit_overrides() -> None:
    """
    Load tenant usage limit overrides from the control plane.

    Non-EE version is a no-op since there's no control plane to fetch from.
    """
    return None


================================================
FILE: backend/onyx/server/token_rate_limits/api.py
================================================
from fastapi import APIRouter
from fastapi import Depends
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.token_limit import delete_token_rate_limit
from onyx.db.token_limit import fetch_all_global_token_rate_limits
from onyx.db.token_limit import insert_global_token_rate_limit
from onyx.db.token_limit import update_token_rate_limit
from onyx.server.query_and_chat.token_limit import any_rate_limit_exists
from onyx.server.token_rate_limits.models import TokenRateLimitArgs
from onyx.server.token_rate_limits.models import TokenRateLimitDisplay

router = APIRouter(prefix="/admin/token-rate-limits", tags=PUBLIC_API_TAGS)


"""
Global Token Limit Settings
"""


@router.get("/global")
def get_global_token_limit_settings(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> list[TokenRateLimitDisplay]:
    return [
        TokenRateLimitDisplay.from_db(token_rate_limit)
        for token_rate_limit in fetch_all_global_token_rate_limits(db_session)
    ]


@router.post("/global")
def create_global_token_limit_settings(
    token_limit_settings: TokenRateLimitArgs,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> TokenRateLimitDisplay:
    rate_limit_display = TokenRateLimitDisplay.from_db(
        insert_global_token_rate_limit(db_session, token_limit_settings)
    )
    # clear cache in case this was the first rate limit created
    any_rate_limit_exists.cache_clear()
    return rate_limit_display


"""
General Token Limit Settings
"""


@router.put("/rate-limit/{token_rate_limit_id}")
def update_token_limit_settings(
    token_rate_limit_id: int,
    token_limit_settings: TokenRateLimitArgs,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> TokenRateLimitDisplay:
    return TokenRateLimitDisplay.from_db(
        update_token_rate_limit(
            db_session=db_session,
            token_rate_limit_id=token_rate_limit_id,
            token_rate_limit_settings=token_limit_settings,
        )
    )


@router.delete("/rate-limit/{token_rate_limit_id}")
def delete_token_limit_settings(
    token_rate_limit_id: int,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
) -> None:
    return delete_token_rate_limit(
        db_session=db_session,
        token_rate_limit_id=token_rate_limit_id,
    )


================================================
FILE: backend/onyx/server/token_rate_limits/models.py
================================================
from pydantic import BaseModel

from onyx.db.models import TokenRateLimit


class TokenRateLimitArgs(BaseModel):
    enabled: bool
    token_budget: int
    period_hours: int


class TokenRateLimitDisplay(BaseModel):
    token_id: int
    enabled: bool
    token_budget: int
    period_hours: int

    @classmethod
    def from_db(cls, token_rate_limit: TokenRateLimit) -> "TokenRateLimitDisplay":
        return cls(
            token_id=token_rate_limit.id,
            enabled=token_rate_limit.enabled,
            token_budget=token_rate_limit.token_budget,
            period_hours=token_rate_limit.period_hours,
        )


================================================
FILE: backend/onyx/server/usage_limits.py
================================================
"""Usage limits enforcement for cloud deployments."""

from collections.abc import Callable

from fastapi import HTTPException
from sqlalchemy.orm import Session

from onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY
from onyx.configs.app_configs import COHERE_DEFAULT_API_KEY
from onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
from onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY
from onyx.db.usage import check_usage_limit
from onyx.db.usage import UsageLimitExceededError
from onyx.db.usage import UsageType
from onyx.server.tenant_usage_limits import TenantUsageLimitKeys
from onyx.server.tenant_usage_limits import TenantUsageLimitOverrides
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation
from shared_configs.configs import USAGE_LIMIT_API_CALLS_PAID
from shared_configs.configs import USAGE_LIMIT_API_CALLS_TRIAL
from shared_configs.configs import USAGE_LIMIT_CHUNKS_INDEXED_PAID
from shared_configs.configs import USAGE_LIMIT_CHUNKS_INDEXED_TRIAL
from shared_configs.configs import USAGE_LIMIT_LLM_COST_CENTS_PAID
from shared_configs.configs import USAGE_LIMIT_LLM_COST_CENTS_TRIAL
from shared_configs.configs import USAGE_LIMIT_NON_STREAMING_CALLS_PAID
from shared_configs.configs import USAGE_LIMIT_NON_STREAMING_CALLS_TRIAL
from shared_configs.configs import USAGE_LIMITS_ENABLED

logger = setup_logger()

# Collect all Onyx-managed default API keys for comparison
_ONYX_MANAGED_API_KEYS: set[str] = set()
for key in [
    OPENAI_DEFAULT_API_KEY,
    ANTHROPIC_DEFAULT_API_KEY,
    COHERE_DEFAULT_API_KEY,
    OPENROUTER_DEFAULT_API_KEY,
]:
    if key:
        _ONYX_MANAGED_API_KEYS.add(key)


def is_onyx_managed_api_key(api_key: str | None) -> bool:
    """Check if the given API key is one of Onyx's managed default keys."""
    return bool(api_key) and api_key in _ONYX_MANAGED_API_KEYS


def is_usage_limits_enabled() -> bool:
    """Check if usage limits are enabled for this deployment."""
    return USAGE_LIMITS_ENABLED


def is_tenant_on_trial(tenant_id: str) -> bool:  # noqa: ARG001
    """
    Determine if a tenant is currently on a trial subscription.

    Non-EE version always returns False. EE version fetches billing information
    from the control plane to determine if the tenant has an active trial.
    """
    return False


def is_tenant_on_trial_fn(tenant_id: str) -> bool:
    """
    Get the versioned implementation of is_tenant_on_trial and call it.

    Uses fetch_versioned_implementation to get the EE version if available,
    otherwise falls back to the non-EE version that returns False.
    """
    fn: Callable[[str], bool] = fetch_versioned_implementation(
        "onyx.server.usage_limits", "is_tenant_on_trial"
    )
    return fn(tenant_id)


def _get_tenant_override(tenant_id: str, field_name: str) -> int | None:
    """
    Get a tenant-specific usage limit override if available.

    Uses fetch_versioned_implementation to get EE version if available.

    Returns:
        - Positive int: Use this specific limit
        - -1 (NO_LIMIT): No limit (unlimited)
        - None: No override specified, use default env var value
    """
    try:
        # Try to get EE version that has tenant overrides
        get_overrides_fn = fetch_versioned_implementation(
            "onyx.server.tenant_usage_limits", "get_tenant_usage_limit_overrides"
        )
        overrides: TenantUsageLimitOverrides | None = get_overrides_fn(tenant_id)

        if overrides is not None:
            # Get the field value - None means not set, use default
            return getattr(overrides, field_name, None)
    except Exception:
        logger.exception(
            "Error getting tenant override for %s.%s falling back to defaults",
            tenant_id,
            field_name,
        )
    return None


# Special value meaning "no limit" (unlimited)
NO_LIMIT = -1
_FIELD_AND_DEFAULT = {
    UsageType.LLM_COST: {
        True: (
            TenantUsageLimitKeys.LLM_COST_CENTS_TRIAL,
            USAGE_LIMIT_LLM_COST_CENTS_TRIAL,
        ),
        False: (
            TenantUsageLimitKeys.LLM_COST_CENTS_PAID,
            USAGE_LIMIT_LLM_COST_CENTS_PAID,
        ),
    },
    UsageType.CHUNKS_INDEXED: {
        True: (
            TenantUsageLimitKeys.CHUNKS_INDEXED_TRIAL,
            USAGE_LIMIT_CHUNKS_INDEXED_TRIAL,
        ),
        False: (
            TenantUsageLimitKeys.CHUNKS_INDEXED_PAID,
            USAGE_LIMIT_CHUNKS_INDEXED_PAID,
        ),
    },
    UsageType.API_CALLS: {
        True: (TenantUsageLimitKeys.API_CALLS_TRIAL, USAGE_LIMIT_API_CALLS_TRIAL),
        False: (TenantUsageLimitKeys.API_CALLS_PAID, USAGE_LIMIT_API_CALLS_PAID),
    },
    UsageType.NON_STREAMING_API_CALLS: {
        True: (
            TenantUsageLimitKeys.NON_STREAMING_CALLS_TRIAL,
            USAGE_LIMIT_NON_STREAMING_CALLS_TRIAL,
        ),
        False: (
            TenantUsageLimitKeys.NON_STREAMING_CALLS_PAID,
            USAGE_LIMIT_NON_STREAMING_CALLS_PAID,
        ),
    },
}


def get_limit_for_usage_type(
    usage_type: UsageType, is_trial: bool, tenant_id: str | None
) -> int:
    """
    Get the appropriate limit based on usage type, trial status, and tenant overrides.

    Returns:
        - Positive int: The usage limit
        - NO_LIMIT (-1): No limit (unlimited) for this tenant
    """

    field_name, default_value = _FIELD_AND_DEFAULT[usage_type][is_trial]
    if tenant_id:
        override = _get_tenant_override(tenant_id, field_name)
        if override is not None:
            logger.debug(
                "Using tenant override for %s.%s: %s", tenant_id, field_name, override
            )
            return override
    logger.debug(
        "Using default value for %s.%s: %s", usage_type, is_trial, default_value
    )
    return default_value


def check_llm_cost_limit_for_provider(
    db_session: Session,
    tenant_id: str,
    llm_provider_api_key: str | None,
) -> None:
    """
    Check if the LLM cost limit would be exceeded for a provider using Onyx-managed keys.

    Only enforces limits when the provider uses Onyx-managed API keys.
    Users with their own API keys are not subject to LLM cost limits.

    Args:
        db_session: Database session for the tenant
        tenant_id: The tenant ID for trial detection
        llm_provider_api_key: The API key of the LLM provider that will be used

    Raises:
        HTTPException: 429 Too Many Requests if limit exceeded
    """
    if not is_usage_limits_enabled():
        return

    # Only enforce limits for Onyx-managed API keys
    if not is_onyx_managed_api_key(llm_provider_api_key):
        return

    check_usage_and_raise(
        db_session=db_session,
        usage_type=UsageType.LLM_COST,
        tenant_id=tenant_id,
        pending_amount=0,  # We check current usage, not pending
    )


def check_usage_and_raise(
    db_session: Session,
    usage_type: UsageType,
    tenant_id: str,
    pending_amount: float | int = 0,
) -> None:
    """
    Check if usage limit would be exceeded and raise HTTPException if so.

    Args:
        db_session: Database session for the tenant
        usage_type: Type of usage to check
        tenant_id: The tenant ID for trial detection
        pending_amount: Amount about to be used

    Raises:
        HTTPException: 429 Too Many Requests if limit exceeded
    """
    if not is_usage_limits_enabled():
        return

    is_trial = is_tenant_on_trial_fn(tenant_id)
    limit = get_limit_for_usage_type(usage_type, is_trial, tenant_id)
    logger.debug("Checking usage limit for %s.%s: %s", usage_type, is_trial, limit)

    # NO_LIMIT means this tenant has unlimited usage for this type
    if limit == NO_LIMIT:
        return

    try:
        check_usage_limit(
            db_session=db_session,
            usage_type=usage_type,
            limit=limit,
            pending_amount=pending_amount,
        )
    except UsageLimitExceededError as e:
        user_type = "trial" if is_trial else "paid"
        if usage_type == UsageType.LLM_COST:
            detail = (
                f"LLM usage limit exceeded for {user_type} account. "
                f"Current cost: ${e.current / 100:.2f}, "
                f"Limit: ${e.limit / 100:.2f} per week. "
                "Please use your own LLM API key, upgrade your plan,"
                " or wait for the next billing period (1 week)."
            )
        elif usage_type == UsageType.CHUNKS_INDEXED:
            detail = (
                f"Document indexing limit exceeded for {user_type} account. "
                f"Indexed: {int(e.current)} chunks, Limit: {int(e.limit)} per week. "
                "Please upgrade your plan or wait for the next billing period."
            )
        elif usage_type == UsageType.API_CALLS:
            detail = (
                f"API call limit exceeded for {user_type} account. "
                f"Calls: {int(e.current)}, Limit: {int(e.limit)} per week. "
                "Please upgrade your plan or wait for the next billing period."
            )
        else:
            detail = (
                f"Non-streaming API call limit exceeded for {user_type} account. "
                f"Calls: {int(e.current)}, Limit: {int(e.limit)} per week. "
                "Please upgrade your plan or wait for the next billing period."
            )

        raise HTTPException(status_code=429, detail=detail)


================================================
FILE: backend/onyx/server/utils.py
================================================
import base64
import json
import os
from datetime import datetime
from typing import Any
from uuid import UUID

from fastapi import HTTPException
from fastapi import status


class BasicAuthenticationError(HTTPException):
    def __init__(self, detail: str):
        super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)


class OnyxJSONEncoder(json.JSONEncoder):
    """Custom JSON encoder that converts datetime and UUID objects to strings."""

    def default(self, obj: Any) -> Any:
        if isinstance(obj, datetime):
            return obj.isoformat()
        if isinstance(obj, UUID):
            return str(obj)
        return super().default(obj)


def get_json_line(
    json_dict: dict[str, Any], encoder: type[json.JSONEncoder] = OnyxJSONEncoder
) -> str:
    """
    Convert a dictionary to a JSON string with custom type handling, and add a newline.

    Args:
        json_dict: The dictionary to be converted to JSON.
        encoder: JSON encoder class to use, defaults to OnyxJSONEncoder.

    Returns:
        A JSON string representation of the input dictionary with a newline character.
    """
    return json.dumps(json_dict, cls=encoder) + "\n"


def make_short_id() -> str:
    """Fast way to generate a random 8 character id ... useful for tagging data
    to trace it through a flow. This is definitely not guaranteed to be unique and is
    targeted at the stated use case."""
    return base64.b32encode(os.urandom(5)).decode("utf-8")[:8]  # 5 bytes → 8 chars


================================================
FILE: backend/onyx/server/utils_vector_db.py
================================================
"""Utilities for gating endpoints that require a vector database."""

from fastapi import HTTPException
from starlette.status import HTTP_501_NOT_IMPLEMENTED

from onyx.configs.app_configs import DISABLE_VECTOR_DB


def require_vector_db() -> None:
    """FastAPI dependency — raises 501 when the vector DB is disabled."""
    if DISABLE_VECTOR_DB:
        raise HTTPException(
            status_code=HTTP_501_NOT_IMPLEMENTED,
            detail="This feature requires a vector database (DISABLE_VECTOR_DB is set).",
        )


================================================
FILE: backend/onyx/setup.py
================================================
import time

from sqlalchemy.orm import Session

from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
from onyx.configs.constants import KV_REINDEX_KEY
from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
from onyx.configs.embedding_configs import SupportedEmbeddingModel
from onyx.configs.model_configs import GEN_AI_API_KEY
from onyx.configs.model_configs import GEN_AI_MODEL_VERSION
from onyx.context.search.models import SavedSearchSettings
from onyx.db.connector import check_connectors_exist
from onyx.db.connector import create_initial_default_connector
from onyx.db.connector_credential_pair import associate_default_cc_pair
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.connector_credential_pair import resync_cc_pair
from onyx.db.credentials import create_initial_public_credential
from onyx.db.document import check_docs_exist
from onyx.db.enums import EmbeddingPrecision
from onyx.db.index_attempt import cancel_indexing_attempts_past_model
from onyx.db.index_attempt import expire_index_attempts
from onyx.db.llm import fetch_default_llm_model
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import update_default_provider
from onyx.db.llm import upsert_llm_provider
from onyx.db.search_settings import get_active_search_settings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import update_current_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.opensearch.client import OpenSearchClient
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
from onyx.document_index.opensearch.opensearch_document_index import set_cluster_state
from onyx.document_index.vespa.index import VespaIndex
from onyx.indexing.models import IndexingSetting
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.llm.constants import LlmProviderNames
from onyx.llm.well_known_providers.llm_provider_options import get_openai_model_names
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.settings.store import load_settings
from onyx.server.settings.store import store_settings
from onyx.utils.gpu_utils import gpu_status_request
from onyx.utils.logger import setup_logger
from shared_configs.configs import ALT_INDEX_SUFFIX
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT
from shared_configs.configs import MULTI_TENANT


logger = setup_logger()


def setup_onyx(
    db_session: Session,
    tenant_id: str,  # noqa: ARG001
    cohere_enabled: bool = False,  # noqa: ARG001
) -> None:
    """
    Setup Onyx for a particular tenant. In the Single Tenant case, it will set it up for the default schema
    on server startup. In the MT case, it will be called when the tenant is created.

    The Tenant Service calls the tenants/create endpoint which runs this.
    """
    check_and_perform_index_swap(db_session=db_session)

    active_search_settings = get_active_search_settings(db_session)
    search_settings = active_search_settings.primary
    secondary_search_settings = active_search_settings.secondary

    # search_settings = get_current_search_settings(db_session)
    # multipass_config_1 = get_multipass_config(search_settings)

    # secondary_large_chunks_enabled: bool | None = None
    # secondary_search_settings = get_secondary_search_settings(db_session)
    # if secondary_search_settings:
    #     multipass_config_2 = get_multipass_config(secondary_search_settings)
    #     secondary_large_chunks_enabled = multipass_config_2.enable_large_chunks

    # Break bad state for thrashing indexes
    if secondary_search_settings and DISABLE_INDEX_UPDATE_ON_SWAP:
        expire_index_attempts(
            search_settings_id=search_settings.id, db_session=db_session
        )

        for cc_pair in get_connector_credential_pairs(db_session):
            resync_cc_pair(
                cc_pair=cc_pair,
                search_settings_id=search_settings.id,
                db_session=db_session,
            )

    # Expire all old embedding models indexing attempts, technically redundant
    cancel_indexing_attempts_past_model(db_session)

    logger.notice(f'Using Embedding model: "{search_settings.model_name}"')
    if search_settings.query_prefix or search_settings.passage_prefix:
        logger.notice(f'Query embedding prefix: "{search_settings.query_prefix}"')
        logger.notice(f'Passage embedding prefix: "{search_settings.passage_prefix}"')

    if search_settings:
        if search_settings.multilingual_expansion:
            logger.notice(
                f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}."
            )

    # setup Postgres with default credential, llm providers, etc.
    setup_postgres(db_session)

    # Does the user need to trigger a reindexing to bring the document index
    # into a good state, marked in the kv store
    if not MULTI_TENANT:
        mark_reindex_flag(db_session)

    if DISABLE_VECTOR_DB:
        logger.notice(
            "DISABLE_VECTOR_DB is set — skipping document index setup and embedding model warm-up."
        )
    else:
        # Ensure Vespa is setup correctly, this step is relatively near the end
        # because Vespa takes a bit of time to start up
        logger.notice("Verifying Document Index(s) is/are available.")
        # This flow is for setting up the document index so we get all indices here.
        document_indices = get_all_document_indices(
            search_settings,
            secondary_search_settings,
            None,
        )

        success = setup_document_indices(
            document_indices,
            IndexingSetting.from_db_model(search_settings),
            (
                IndexingSetting.from_db_model(secondary_search_settings)
                if secondary_search_settings
                else None
            ),
        )
        if not success:
            raise RuntimeError(
                "Could not connect to a document index within the specified timeout."
            )

        logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}")
        if search_settings.provider_type is None:
            # In integration tests, do not block API startup on warm-up
            warm_up_bi_encoder(
                embedding_model=EmbeddingModel.from_db_model(
                    search_settings=search_settings,
                    server_host=MODEL_SERVER_HOST,
                    server_port=MODEL_SERVER_PORT,
                ),
                non_blocking=INTEGRATION_TESTS_MODE,
            )

        # update multipass indexing setting based on GPU availability
        update_default_multipass_indexing(db_session)


def mark_reindex_flag(db_session: Session) -> None:
    kv_store = get_kv_store()
    try:
        value = kv_store.load(KV_REINDEX_KEY)
        logger.debug(f"Re-indexing flag has value {value}")
        return
    except KvKeyNotFoundError:
        # Only need to update the flag if it hasn't been set
        pass

    # If their first deployment is after the changes, it will
    # enable this when the other changes go in, need to avoid
    # this being set to False, then the user indexes things on the old version
    docs_exist = check_docs_exist(db_session)
    connectors_exist = check_connectors_exist(db_session)
    if docs_exist or connectors_exist:
        kv_store.store(KV_REINDEX_KEY, True)
    else:
        kv_store.store(KV_REINDEX_KEY, False)


def setup_document_indices(
    document_indices: list[DocumentIndex],
    index_setting: IndexingSetting,
    secondary_index_setting: IndexingSetting | None,
    num_attempts: int = VESPA_NUM_ATTEMPTS_ON_STARTUP,
) -> bool:
    """Sets up all input document indices.

    If any document index setup fails, the function will return False. Otherwise
    returns True.
    """
    for document_index in document_indices:
        # Document index startup is a bit slow, so give it a few seconds.
        WAIT_SECONDS = 5
        document_index_setup_success = False
        for x in range(num_attempts):
            try:
                logger.notice(
                    f"Setting up document index {document_index.__class__.__name__} (attempt {x + 1}/{num_attempts})..."
                )
                document_index.ensure_indices_exist(
                    primary_embedding_dim=index_setting.final_embedding_dim,
                    primary_embedding_precision=index_setting.embedding_precision,
                    secondary_index_embedding_dim=(
                        secondary_index_setting.final_embedding_dim
                        if secondary_index_setting
                        else None
                    ),
                    secondary_index_embedding_precision=(
                        secondary_index_setting.embedding_precision
                        if secondary_index_setting
                        else None
                    ),
                )

                logger.notice(
                    f"Document index {document_index.__class__.__name__} setup complete."
                )
                document_index_setup_success = True
                break
            except Exception:
                logger.exception(
                    f"Document index {document_index.__class__.__name__} setup did not succeed. "
                    "The relevant service may not be ready yet. "
                    f"Retrying in {WAIT_SECONDS} seconds."
                )
                time.sleep(WAIT_SECONDS)

        if not document_index_setup_success:
            logger.error(
                f"Document index {document_index.__class__.__name__} setup did not succeed. "
                f"Attempt limit reached. ({num_attempts})"
            )
            return False

    return True


def setup_postgres(db_session: Session) -> None:
    logger.notice("Verifying default connector/credential exist.")
    create_initial_public_credential(db_session)
    create_initial_default_connector(db_session)
    associate_default_cc_pair(db_session)

    if GEN_AI_API_KEY and fetch_default_llm_model(db_session) is None:
        # Only for dev flows
        logger.notice("Setting up default OpenAI LLM for dev.")

        llm_model = GEN_AI_MODEL_VERSION or "gpt-4o-mini"
        provider_name = "DevEnvPresetOpenAI"
        existing = fetch_existing_llm_provider(
            name=provider_name, db_session=db_session
        )
        model_req = LLMProviderUpsertRequest(
            id=existing.id if existing else None,
            name=provider_name,
            provider=LlmProviderNames.OPENAI,
            api_key=GEN_AI_API_KEY,
            api_base=None,
            api_version=None,
            custom_config=None,
            is_public=True,
            groups=[],
            model_configurations=[
                ModelConfigurationUpsertRequest(name=name, is_visible=True)
                for name in get_openai_model_names()
            ],
            api_key_changed=True,
        )
        try:
            new_llm_provider = upsert_llm_provider(
                llm_provider_upsert_request=model_req, db_session=db_session
            )
        except ValueError as e:
            logger.warning("Failed to upsert LLM provider during setup: %s", e)
            return
        update_default_provider(
            provider_id=new_llm_provider.id, model_name=llm_model, db_session=db_session
        )


def update_default_multipass_indexing(db_session: Session) -> None:
    docs_exist = check_docs_exist(db_session)
    connectors_exist = check_connectors_exist(db_session)
    logger.debug(f"Docs exist: {docs_exist}, Connectors exist: {connectors_exist}")

    if not docs_exist and not connectors_exist:
        logger.info(
            "No existing docs or connectors found. Checking GPU availability for multipass indexing."
        )
        gpu_available = gpu_status_request(indexing=True)
        logger.info(f"GPU available: {gpu_available}")

        current_settings = get_current_search_settings(db_session)

        logger.notice(f"Updating multipass indexing setting to: {gpu_available}")
        updated_settings = SavedSearchSettings.from_db_model(current_settings)
        # Enable multipass indexing if GPU is available or if using a cloud provider
        updated_settings.multipass_indexing = (
            gpu_available or current_settings.cloud_provider is not None
        )
        update_current_search_settings(db_session, updated_settings)

        # Update settings with GPU availability
        settings = load_settings()
        settings.gpu_enabled = gpu_available
        store_settings(settings)
        logger.notice(f"Updated settings with GPU availability: {gpu_available}")

    else:
        logger.debug(
            "Existing docs or connectors found. Skipping multipass indexing update."
        )


def setup_multitenant_onyx() -> None:
    if DISABLE_VECTOR_DB:
        logger.notice("DISABLE_VECTOR_DB is set — skipping multitenant Vespa setup.")
        return

    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
        opensearch_client = OpenSearchClient()
        if not wait_for_opensearch_with_timeout(client=opensearch_client):
            raise RuntimeError("Failed to connect to OpenSearch.")
        set_cluster_state(opensearch_client)

    # For Managed Vespa, the schema is sent over via the Vespa Console manually.
    # NOTE: Pretty sure this code is never hit in any production environment.
    if not MANAGED_VESPA:
        setup_vespa_multitenant(SUPPORTED_EMBEDDING_MODELS)


def setup_vespa_multitenant(supported_indices: list[SupportedEmbeddingModel]) -> bool:
    # TODO(andrei): We don't yet support OpenSearch for multi-tenant instances
    # so this function remains unchanged.
    # This is for local testing
    WAIT_SECONDS = 5
    VESPA_ATTEMPTS = 5
    for x in range(VESPA_ATTEMPTS):
        try:
            logger.notice(f"Setting up Vespa (attempt {x + 1}/{VESPA_ATTEMPTS})...")
            VespaIndex.register_multitenant_indices(
                indices=[index.index_name for index in supported_indices]
                + [
                    f"{index.index_name}{ALT_INDEX_SUFFIX}"
                    for index in supported_indices
                ],
                embedding_dims=[index.dim for index in supported_indices]
                + [index.dim for index in supported_indices],
                # on the cloud, just use float for all indices, the option to change this
                # is not exposed to the user
                embedding_precisions=[
                    EmbeddingPrecision.FLOAT for _ in range(len(supported_indices) * 2)
                ],
            )

            logger.notice("Vespa setup complete.")
            return True
        except Exception:
            logger.notice(
                f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
            )
            time.sleep(WAIT_SECONDS)

    logger.error(
        f"Vespa setup did not succeed. Attempt limit reached. ({VESPA_ATTEMPTS})"
    )
    return False


================================================
FILE: backend/onyx/tools/built_in_tools.py
================================================
from typing import Type
from typing import Union

from onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool
from onyx.tools.tool_implementations.images.image_generation_tool import (
    ImageGenerationTool,
)
from onyx.tools.tool_implementations.knowledge_graph.knowledge_graph_tool import (
    KnowledgeGraphTool,
)
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.python.python_tool import PythonTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.web_search_tool import (
    WebSearchTool,
)
from onyx.utils.logger import setup_logger

logger = setup_logger()


BUILT_IN_TOOL_TYPES = Union[
    SearchTool,
    ImageGenerationTool,
    WebSearchTool,
    KnowledgeGraphTool,
    OpenURLTool,
    PythonTool,
    FileReaderTool,
    MemoryTool,
]

BUILT_IN_TOOL_MAP: dict[str, Type[BUILT_IN_TOOL_TYPES]] = {
    SearchTool.__name__: SearchTool,
    ImageGenerationTool.__name__: ImageGenerationTool,
    WebSearchTool.__name__: WebSearchTool,
    KnowledgeGraphTool.__name__: KnowledgeGraphTool,
    OpenURLTool.__name__: OpenURLTool,
    PythonTool.__name__: PythonTool,
    FileReaderTool.__name__: FileReaderTool,
    MemoryTool.__name__: MemoryTool,
}

STOPPING_TOOLS_NAMES: list[str] = [ImageGenerationTool.NAME]
CITEABLE_TOOLS_NAMES: list[str] = [
    SearchTool.NAME,
    WebSearchTool.NAME,
    OpenURLTool.NAME,
]


def get_built_in_tool_ids() -> list[str]:
    return list(BUILT_IN_TOOL_MAP.keys())


def get_built_in_tool_by_id(in_code_tool_id: str) -> Type[BUILT_IN_TOOL_TYPES]:
    return BUILT_IN_TOOL_MAP[in_code_tool_id]


def _build_tool_name_to_class() -> dict[str, Type[BUILT_IN_TOOL_TYPES]]:
    """Build a mapping from LLM-facing tool name to tool class."""
    result: dict[str, Type[BUILT_IN_TOOL_TYPES]] = {}
    for cls in BUILT_IN_TOOL_MAP.values():
        name_attr = cls.__dict__.get("name")
        if isinstance(name_attr, property) and name_attr.fget is not None:
            tool_name = name_attr.fget(cls)
        elif isinstance(name_attr, str):
            tool_name = name_attr
        else:
            raise ValueError(
                f"Built-in tool {cls.__name__} must define a valid LLM-facing tool name"
            )
        result[tool_name] = cls
    return result


TOOL_NAME_TO_CLASS: dict[str, Type[BUILT_IN_TOOL_TYPES]] = _build_tool_name_to_class()


================================================
FILE: backend/onyx/tools/constants.py
================================================
"""Tool name and ID constants matching frontend definitions."""

# Tool names as referenced by tool results / tool calls
SEARCH_TOOL_NAME = "run_search"
INTERNET_SEARCH_TOOL_NAME = "run_internet_search"
IMAGE_GENERATION_TOOL_NAME = "run_image_generation"
PYTHON_TOOL_NAME = "run_python"
OPEN_URL_TOOL_NAME = "open_url"

# In-code tool IDs that also correspond to the tool's name when associated with a persona
SEARCH_TOOL_ID = "SearchTool"
IMAGE_GENERATION_TOOL_ID = "ImageGenerationTool"
WEB_SEARCH_TOOL_ID = "WebSearchTool"
PYTHON_TOOL_ID = "PythonTool"
OPEN_URL_TOOL_ID = "OpenURLTool"
FILE_READER_TOOL_ID = "FileReaderTool"
MEMORY_TOOL_ID = "MemoryTool"

# Tool names as referenced by tool results / tool calls (read_file)
FILE_READER_TOOL_NAME = "read_file"


================================================
FILE: backend/onyx/tools/fake_tools/__init__.py
================================================


================================================
FILE: backend/onyx/tools/fake_tools/research_agent.py
================================================
import queue
import time
from collections.abc import Callable
from typing import Any
from typing import cast

from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_utils import create_tool_call_failure_messages
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.citation_processor import CitationMode
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.chat.citation_utils import collapse_citations
from onyx.chat.citation_utils import update_citation_processor_from_tool_response
from onyx.chat.emitter import Emitter
from onyx.chat.llm_loop import construct_message_history
from onyx.chat.llm_step import run_llm_step
from onyx.chat.llm_step import run_llm_step_pkt_generator
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import LlmStepResult
from onyx.chat.models import ToolCallSimple
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDocsResponse
from onyx.deep_research.dr_mock_tools import (
    get_research_agent_additional_tool_definitions,
)
from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_TASK_KEY
from onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_MESSAGE
from onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_TOKEN_COUNT
from onyx.deep_research.models import CombinedResearchAgentCallResult
from onyx.deep_research.models import ResearchAgentCallResult
from onyx.deep_research.utils import check_special_tool_calls
from onyx.deep_research.utils import create_think_tool_token_processor
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import ToolChoiceOptions
from onyx.prompts.deep_research.dr_tool_prompts import OPEN_URLS_TOOL_DESCRIPTION
from onyx.prompts.deep_research.dr_tool_prompts import (
    OPEN_URLS_TOOL_DESCRIPTION_REASONING,
)
from onyx.prompts.deep_research.dr_tool_prompts import WEB_SEARCH_TOOL_DESCRIPTION
from onyx.prompts.deep_research.research_agent import MAX_RESEARCH_CYCLES
from onyx.prompts.deep_research.research_agent import OPEN_URL_REMINDER_RESEARCH_AGENT
from onyx.prompts.deep_research.research_agent import RESEARCH_AGENT_PROMPT
from onyx.prompts.deep_research.research_agent import RESEARCH_AGENT_PROMPT_REASONING
from onyx.prompts.deep_research.research_agent import RESEARCH_REPORT_PROMPT
from onyx.prompts.deep_research.research_agent import USER_REPORT_QUERY
from onyx.prompts.prompt_utils import get_current_llm_day_time
from onyx.prompts.tool_prompts import INTERNAL_SEARCH_GUIDANCE
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import IntermediateReportCitedDocs
from onyx.server.query_and_chat.streaming_models import IntermediateReportDelta
from onyx.server.query_and_chat.streaming_models import IntermediateReportStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import PacketException
from onyx.server.query_and_chat.streaming_models import ResearchAgentStart
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.server.query_and_chat.streaming_models import StreamingType
from onyx.tools.interface import Tool
from onyx.tools.models import ToolCallInfo
from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tools.tool_runner import run_tool_calls
from onyx.tools.utils import generate_tools_description
from onyx.tracing.framework.create import function_span
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel

logger = setup_logger()


# 30 minute timeout per research agent
RESEARCH_AGENT_TIMEOUT_SECONDS = 30 * 60
RESEARCH_AGENT_TIMEOUT_MESSAGE = "Research Agent timed out after 30 minutes"
# 12 minute timeout before forcing intermediate report generation
RESEARCH_AGENT_FORCE_REPORT_SECONDS = 12 * 60
# May be good to experiment with this, empirically reports of around 5,000 tokens are pretty good.
MAX_INTERMEDIATE_REPORT_LENGTH_TOKENS = 10000


def generate_intermediate_report(
    research_topic: str,
    history: list[ChatMessageSimple],
    llm: LLM,
    token_counter: Callable[[str], int],
    citation_processor: DynamicCitationProcessor,
    user_identity: LLMUserIdentity | None,
    emitter: Emitter,
    placement: Placement,
) -> str:
    # NOTE: This step outputs a lot of tokens and has been observed to run for more than 10 minutes in a nontrivial percentage of
    # research tasks. This is also model / inference provider dependent.
    with function_span("generate_intermediate_report") as span:
        span.span_data.input = (
            f"research_topic={research_topic}, history_length={len(history)}"
        )
        # Having the state container here to handle the tokens and not passed through means there is no way to
        # get partial saves of the report. Arguably this is not useful anyway so not going to implement partial saves.
        state_container = ChatStateContainer()
        system_prompt = ChatMessageSimple(
            message=RESEARCH_REPORT_PROMPT,
            token_count=token_counter(RESEARCH_REPORT_PROMPT),
            message_type=MessageType.SYSTEM,
        )

        reminder_str = USER_REPORT_QUERY.format(research_topic=research_topic)
        reminder_message = ChatMessageSimple(
            message=reminder_str,
            token_count=token_counter(reminder_str),
            message_type=MessageType.USER,
        )

        research_history = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=history,
            reminder_message=reminder_message,
            context_files=None,
            available_tokens=llm.config.max_input_tokens,
        )

        intermediate_report_generator = run_llm_step_pkt_generator(
            history=research_history,
            tool_definitions=[],
            tool_choice=ToolChoiceOptions.NONE,
            llm=llm,
            placement=placement,
            citation_processor=citation_processor,
            state_container=state_container,
            reasoning_effort=ReasoningEffort.LOW,
            final_documents=None,
            user_identity=user_identity,
            max_tokens=MAX_INTERMEDIATE_REPORT_LENGTH_TOKENS,
            use_existing_tab_index=True,
            is_deep_research=True,
            timeout_override=300,  # 5 minute read timeout for long report generation
        )

        while True:
            try:
                packet = next(intermediate_report_generator)
                # Translate AgentResponseStart/Delta packets to IntermediateReportStart/Delta
                # Use original placement consistently for all packets
                if isinstance(packet.obj, AgentResponseStart):
                    emitter.emit(
                        Packet(
                            placement=placement,
                            obj=IntermediateReportStart(),
                        )
                    )
                elif isinstance(packet.obj, AgentResponseDelta):
                    emitter.emit(
                        Packet(
                            placement=placement,
                            obj=IntermediateReportDelta(content=packet.obj.content),
                        )
                    )
                else:
                    # Pass through other packet types (e.g., ReasoningStart, ReasoningDelta, etc.)
                    # Also use original placement to keep everything in the same group
                    emitter.emit(
                        Packet(
                            placement=placement,
                            obj=packet.obj,
                        )
                    )
            except StopIteration as e:
                llm_step_result, _ = e.value
                # Use original placement for completion packets
                emitter.emit(
                    Packet(
                        placement=placement,
                        obj=IntermediateReportCitedDocs(
                            cited_docs=list(
                                citation_processor.get_seen_citations().values()
                            )
                        ),
                    )
                )
                emitter.emit(
                    Packet(
                        placement=placement,
                        obj=SectionEnd(),
                    )
                )
                break

        llm_step_result = cast(LlmStepResult, llm_step_result)

        final_report = llm_step_result.answer
        span.span_data.output = final_report if final_report else None
        if final_report is None:
            raise ValueError(
                f"LLM failed to generate a report for research task: {research_topic}"
            )

        return final_report


def run_research_agent_call(
    research_agent_call: ToolCallKickoff,
    parent_tool_call_id: str,
    tools: list[Tool],
    emitter: Emitter,
    state_container: ChatStateContainer,
    llm: LLM,
    is_reasoning_model: bool,
    token_counter: Callable[[str], int],
    user_identity: LLMUserIdentity | None,
) -> ResearchAgentCallResult | None:
    turn_index = research_agent_call.placement.turn_index
    tab_index = research_agent_call.placement.tab_index
    with function_span("research_agent") as span:
        span.span_data.input = str(research_agent_call.tool_args)
        try:
            # Track start time for timeout-based forced report generation
            start_time = time.monotonic()

            # Used to track citations while keeping original citation markers in intermediate reports.
            # KEEP_MARKERS preserves citation markers like [1], [2] in the text unchanged
            # while tracking which documents were cited via get_seen_citations().
            # This allows collapse_citations() to later renumber them in the final report.
            citation_processor = DynamicCitationProcessor(
                citation_mode=CitationMode.KEEP_MARKERS
            )

            research_cycle_count = 0
            llm_cycle_count = 0
            current_tools = tools
            reasoning_cycles = 0
            just_ran_web_search = False

            # If this fails to parse, we can't run the loop anyway, let this one fail in that case
            research_topic = research_agent_call.tool_args[RESEARCH_AGENT_TASK_KEY]

            emitter.emit(
                Packet(
                    placement=Placement(turn_index=turn_index, tab_index=tab_index),
                    obj=ResearchAgentStart(research_task=research_topic),
                )
            )

            initial_user_message = ChatMessageSimple(
                message=research_topic,
                token_count=token_counter(research_topic),
                message_type=MessageType.USER,
            )
            msg_history: list[ChatMessageSimple] = [initial_user_message]

            citation_mapping: dict[int, str] = {}
            most_recent_reasoning: str | None = None
            while research_cycle_count <= MAX_RESEARCH_CYCLES:
                # Check if we've exceeded the time limit - if so, skip LLM and generate report
                elapsed_seconds = time.monotonic() - start_time
                if elapsed_seconds > RESEARCH_AGENT_FORCE_REPORT_SECONDS:
                    logger.info(
                        f"Research agent exceeded {RESEARCH_AGENT_FORCE_REPORT_SECONDS}s "
                        f"(elapsed: {elapsed_seconds:.1f}s), forcing intermediate report generation"
                    )
                    break

                if research_cycle_count == MAX_RESEARCH_CYCLES:
                    # Auto-generate report on last cycle
                    logger.debug("Auto-generating intermediate report on last cycle.")
                    break

                tools_by_name = {tool.name: tool for tool in current_tools}

                tools_description = generate_tools_description(current_tools)

                internal_search_tip = (
                    INTERNAL_SEARCH_GUIDANCE
                    if any(isinstance(tool, SearchTool) for tool in current_tools)
                    else ""
                )
                web_search_tip = (
                    WEB_SEARCH_TOOL_DESCRIPTION
                    if any(isinstance(tool, WebSearchTool) for tool in current_tools)
                    else ""
                )
                open_urls_tip = (
                    OPEN_URLS_TOOL_DESCRIPTION
                    if any(isinstance(tool, OpenURLTool) for tool in current_tools)
                    else ""
                )
                if is_reasoning_model and open_urls_tip:
                    open_urls_tip = OPEN_URLS_TOOL_DESCRIPTION_REASONING

                system_prompt_template = (
                    RESEARCH_AGENT_PROMPT_REASONING
                    if is_reasoning_model
                    else RESEARCH_AGENT_PROMPT
                )
                system_prompt_str = system_prompt_template.format(
                    available_tools=tools_description,
                    current_datetime=get_current_llm_day_time(full_sentence=False),
                    current_cycle_count=research_cycle_count,
                    optional_internal_search_tool_description=internal_search_tip,
                    optional_web_search_tool_description=web_search_tip,
                    optional_open_url_tool_description=open_urls_tip,
                )

                system_prompt = ChatMessageSimple(
                    message=system_prompt_str,
                    token_count=token_counter(system_prompt_str),
                    message_type=MessageType.SYSTEM,
                )

                if just_ran_web_search:
                    reminder_message = ChatMessageSimple(
                        message=OPEN_URL_REMINDER_RESEARCH_AGENT,
                        token_count=100,
                        message_type=MessageType.USER,
                    )
                else:
                    reminder_message = None

                constructed_history = construct_message_history(
                    system_prompt=system_prompt,
                    custom_agent_prompt=None,
                    simple_chat_history=msg_history,
                    reminder_message=reminder_message,
                    context_files=None,
                    available_tokens=llm.config.max_input_tokens,
                )

                research_agent_tools = get_research_agent_additional_tool_definitions(
                    include_think_tool=not is_reasoning_model
                )
                # Use think tool processor for non-reasoning models to convert
                # think_tool calls to reasoning content (same as dr_loop.py)
                custom_processor = (
                    create_think_tool_token_processor()
                    if not is_reasoning_model
                    else None
                )

                llm_step_result, has_reasoned = run_llm_step(
                    emitter=emitter,
                    history=constructed_history,
                    tool_definitions=[tool.tool_definition() for tool in current_tools]
                    + research_agent_tools,
                    tool_choice=ToolChoiceOptions.REQUIRED,
                    llm=llm,
                    placement=Placement(
                        turn_index=turn_index,
                        tab_index=tab_index,
                        sub_turn_index=llm_cycle_count + reasoning_cycles,
                    ),
                    citation_processor=None,
                    state_container=None,
                    reasoning_effort=ReasoningEffort.LOW,
                    final_documents=None,
                    user_identity=user_identity,
                    custom_token_processor=custom_processor,
                    use_existing_tab_index=True,
                    is_deep_research=True,
                    # In case the model is tripped up by the long context and gets into an endless loop of
                    # things like null tokens, we set a max token limit here. The call will likely not be valid
                    # in these situations but it at least allows a chance of recovery. None of the tool calls should
                    # be this long.
                    max_tokens=1000,
                )
                if has_reasoned:
                    reasoning_cycles += 1

                tool_responses: list[ToolResponse] = []
                tool_calls = llm_step_result.tool_calls or []

                # TODO handle the restriction of only 1 tool call type per turn
                # This is a problem right now because of the Placement system not allowing for
                # differentiating sub-tool calls.
                # Filter tool calls to only include the first tool type used
                # This prevents mixing different tool types in the same batch
                if tool_calls:
                    first_tool_type = tool_calls[0].tool_name
                    tool_calls = [
                        tc for tc in tool_calls if tc.tool_name == first_tool_type
                    ]

                just_ran_web_search = False

                special_tool_calls = check_special_tool_calls(tool_calls=tool_calls)
                if special_tool_calls.generate_report_tool_call:
                    final_report = generate_intermediate_report(
                        research_topic=research_topic,
                        history=msg_history,
                        llm=llm,
                        token_counter=token_counter,
                        citation_processor=citation_processor,
                        user_identity=user_identity,
                        emitter=emitter,
                        placement=Placement(
                            turn_index=turn_index,
                            tab_index=tab_index,
                        ),
                    )
                    span.span_data.output = final_report if final_report else None
                    return ResearchAgentCallResult(
                        intermediate_report=final_report,
                        citation_mapping=citation_processor.get_seen_citations(),
                    )
                elif special_tool_calls.think_tool_call:
                    think_tool_call = special_tool_calls.think_tool_call
                    tool_call_message = think_tool_call.to_msg_str()
                    tool_call_token_count = token_counter(tool_call_message)

                    with function_span("think_tool") as think_span:
                        think_span.span_data.input = str(think_tool_call.tool_args)

                        # Create ASSISTANT message with tool_calls (OpenAI parallel format)
                        think_tool_simple = ToolCallSimple(
                            tool_call_id=think_tool_call.tool_call_id,
                            tool_name=think_tool_call.tool_name,
                            tool_arguments=think_tool_call.tool_args,
                            token_count=tool_call_token_count,
                        )
                        think_assistant_msg = ChatMessageSimple(
                            message="",
                            token_count=tool_call_token_count,
                            message_type=MessageType.ASSISTANT,
                            tool_calls=[think_tool_simple],
                            image_files=None,
                        )
                        msg_history.append(think_assistant_msg)

                        think_tool_response_msg = ChatMessageSimple(
                            message=THINK_TOOL_RESPONSE_MESSAGE,
                            token_count=THINK_TOOL_RESPONSE_TOKEN_COUNT,
                            message_type=MessageType.TOOL_CALL_RESPONSE,
                            tool_call_id=think_tool_call.tool_call_id,
                            image_files=None,
                        )
                        msg_history.append(think_tool_response_msg)
                        think_span.span_data.output = THINK_TOOL_RESPONSE_MESSAGE
                    reasoning_cycles += 1
                    most_recent_reasoning = llm_step_result.reasoning
                    continue
                else:
                    parallel_tool_call_results = run_tool_calls(
                        tool_calls=tool_calls,
                        tools=current_tools,
                        message_history=msg_history,
                        user_memory_context=None,
                        user_info=None,
                        citation_mapping=citation_mapping,
                        next_citation_num=citation_processor.get_next_citation_number(),
                        # Packets currently cannot differentiate between parallel calls in a nested level
                        # so we just cannot show parallel calls in the UI. This should not happen for deep research anyhow.
                        max_concurrent_tools=1,
                        # May be better to not do this step, hard to say, needs to be tested
                        skip_search_query_expansion=False,
                        url_snippet_map=extract_url_snippet_map(
                            [
                                search_doc
                                for tool_call in state_container.get_tool_calls()
                                if tool_call.search_docs
                                for search_doc in tool_call.search_docs
                            ]
                        ),
                    )
                    tool_responses = parallel_tool_call_results.tool_responses
                    citation_mapping = (
                        parallel_tool_call_results.updated_citation_mapping
                    )

                    if tool_calls and not tool_responses:
                        failure_messages = create_tool_call_failure_messages(
                            tool_calls, token_counter
                        )
                        msg_history.extend(failure_messages)

                        # If there is a failure like this, we still increment to avoid potential infinite loops
                        research_cycle_count += 1
                        llm_cycle_count += 1
                        continue

                    # Filter to only responses with valid tool_call references
                    valid_tool_responses = [
                        tr for tr in tool_responses if tr.tool_call is not None
                    ]

                    # Build ONE ASSISTANT message with all tool calls (OpenAI parallel format)
                    if valid_tool_responses:
                        tool_calls_simple: list[ToolCallSimple] = []
                        for tool_response in valid_tool_responses:
                            tc = tool_response.tool_call
                            assert tc is not None  # Already filtered above
                            tool_call_message = tc.to_msg_str()
                            tool_call_token_count = token_counter(tool_call_message)
                            tool_calls_simple.append(
                                ToolCallSimple(
                                    tool_call_id=tc.tool_call_id,
                                    tool_name=tc.tool_name,
                                    tool_arguments=tc.tool_args,
                                    token_count=tool_call_token_count,
                                )
                            )

                        total_tool_call_tokens = sum(
                            tc.token_count for tc in tool_calls_simple
                        )
                        assistant_with_tools = ChatMessageSimple(
                            message="",
                            token_count=total_tool_call_tokens,
                            message_type=MessageType.ASSISTANT,
                            tool_calls=tool_calls_simple,
                            image_files=None,
                        )
                        msg_history.append(assistant_with_tools)

                    # Now add tool call info and TOOL_CALL_RESPONSE messages for each
                    for tool_response in valid_tool_responses:
                        tc = tool_response.tool_call
                        assert tc is not None  # Already filtered above
                        tool_call_tab_index = tc.placement.tab_index

                        tool = tools_by_name.get(tc.tool_name)
                        if not tool:
                            raise ValueError(
                                f"Tool '{tc.tool_name}' not found in tools list"
                            )

                        search_docs = None
                        displayed_docs = None
                        if isinstance(tool_response.rich_response, SearchDocsResponse):
                            search_docs = tool_response.rich_response.search_docs
                            displayed_docs = tool_response.rich_response.displayed_docs

                            # Add ALL search docs to state container for DB persistence
                            if search_docs:
                                state_container.add_search_docs(search_docs)

                            # This is used for the Open URL reminder in the next cycle
                            # only do this if the web search tool yielded results
                            if search_docs and tc.tool_name == WebSearchTool.NAME:
                                just_ran_web_search = True

                        # Makes sure the citation processor is updated with all the possible docs
                        # and citation numbers so that it's populated when passed in to report generation.
                        update_citation_processor_from_tool_response(
                            tool_response=tool_response,
                            citation_processor=citation_processor,
                        )

                        # Research Agent is a top level tool call but the tools called by the research
                        # agent are sub-tool calls.
                        tool_call_info = ToolCallInfo(
                            parent_tool_call_id=parent_tool_call_id,
                            # At the DB save level, there is only a turn index, no sub-turn etc.
                            # This is implied by the parent tool call's turn index and the depth
                            # of the tree traversal.
                            turn_index=llm_cycle_count + reasoning_cycles,
                            tab_index=tool_call_tab_index,
                            tool_name=tc.tool_name,
                            tool_call_id=tc.tool_call_id,
                            tool_id=tool.id,
                            reasoning_tokens=llm_step_result.reasoning
                            or most_recent_reasoning,
                            tool_call_arguments=tc.tool_args,
                            tool_call_response=tool_response.llm_facing_response,
                            search_docs=displayed_docs or search_docs,
                            generated_images=None,
                        )
                        state_container.add_tool_call(tool_call_info)

                        tool_response_message = tool_response.llm_facing_response
                        tool_response_token_count = token_counter(tool_response_message)

                        tool_response_msg = ChatMessageSimple(
                            message=tool_response_message,
                            token_count=tool_response_token_count,
                            message_type=MessageType.TOOL_CALL_RESPONSE,
                            tool_call_id=tc.tool_call_id,
                            image_files=None,
                        )
                        msg_history.append(tool_response_msg)

                # If it reached this point, it did not call reasoning, so here we wipe it to not save it to multiple turns
                most_recent_reasoning = None
                llm_cycle_count += 1
                research_cycle_count += 1

            # If we've run out of cycles, just try to generate a report from everything so far
            final_report = generate_intermediate_report(
                research_topic=research_topic,
                history=msg_history,
                llm=llm,
                token_counter=token_counter,
                citation_processor=citation_processor,
                user_identity=user_identity,
                emitter=emitter,
                placement=Placement(
                    turn_index=turn_index,
                    tab_index=tab_index,
                ),
            )
            span.span_data.output = final_report if final_report else None
            return ResearchAgentCallResult(
                intermediate_report=final_report,
                citation_mapping=citation_processor.get_seen_citations(),
            )

        except Exception as e:
            logger.error(f"Error running research agent call: {e}")
            emitter.emit(
                Packet(
                    placement=Placement(turn_index=turn_index, tab_index=tab_index),
                    obj=PacketException(type=StreamingType.ERROR.value, exception=e),
                )
            )
            return None


def _on_research_agent_timeout(
    index: int,  # noqa: ARG001
    func: Callable[..., Any],  # noqa: ARG001
    args: tuple[Any, ...],
) -> ResearchAgentCallResult:
    """Callback for handling research agent timeouts.

    Returns a ResearchAgentCallResult with the timeout message so the research
    can continue with other agents.
    """
    research_agent_call: ToolCallKickoff = args[0]  # First arg
    research_task = research_agent_call.tool_args.get(
        RESEARCH_AGENT_TASK_KEY, "unknown"
    )
    logger.warning(
        f"Research agent timed out after {RESEARCH_AGENT_TIMEOUT_SECONDS} seconds for task: {research_task}"
    )
    return ResearchAgentCallResult(
        intermediate_report=RESEARCH_AGENT_TIMEOUT_MESSAGE,
        citation_mapping={},
    )


def run_research_agent_calls(
    research_agent_calls: list[ToolCallKickoff],
    parent_tool_call_ids: list[str],
    tools: list[Tool],
    emitter: Emitter,
    state_container: ChatStateContainer,
    llm: LLM,
    is_reasoning_model: bool,
    token_counter: Callable[[str], int],
    citation_mapping: CitationMapping,
    user_identity: LLMUserIdentity | None = None,
) -> CombinedResearchAgentCallResult:
    # Run all research agent calls in parallel with timeout
    functions_with_args = [
        (
            run_research_agent_call,
            (
                research_agent_call,
                parent_tool_call_id,
                tools,
                emitter,
                state_container,
                llm,
                is_reasoning_model,
                token_counter,
                user_identity,
            ),
        )
        for research_agent_call, parent_tool_call_id in zip(
            research_agent_calls, parent_tool_call_ids
        )
    ]

    research_agent_call_results = run_functions_tuples_in_parallel(
        functions_with_args,
        allow_failures=False,
        # Note: This simply allows the main thread to continue with an error message
        # It does not kill the background thread which may still write to the state objects passed to it
        # This is because forcefully killing Python threads is very dangerous
        timeout=RESEARCH_AGENT_TIMEOUT_SECONDS,
        timeout_callback=_on_research_agent_timeout,
    )

    updated_citation_mapping = citation_mapping
    updated_answers: list[str | None] = []

    for result in research_agent_call_results:
        if result is None:
            updated_answers.append(None)
            continue

        # Use collapse_citations to renumber citations in the text and merge mappings.
        # Since we use KEEP_MARKERS mode, the intermediate reports have original citation
        # markers like [1], [2] which need to be renumbered for the combined report.
        updated_answer, updated_citation_mapping = collapse_citations(
            answer_text=result.intermediate_report,
            existing_citation_mapping=updated_citation_mapping,
            new_citation_mapping=result.citation_mapping,
        )
        updated_answers.append(updated_answer)

    return CombinedResearchAgentCallResult(
        intermediate_reports=updated_answers,
        citation_mapping=updated_citation_mapping,
    )


if __name__ == "__main__":
    from uuid import uuid4

    from onyx.chat.chat_state import ChatStateContainer
    from onyx.db.engine.sql_engine import get_session_with_current_tenant
    from onyx.db.engine.sql_engine import SqlEngine
    from onyx.db.models import User
    from onyx.db.persona import get_default_behavior_persona
    from onyx.llm.factory import get_default_llm
    from onyx.llm.factory import get_llm_token_counter
    from onyx.llm.utils import model_is_reasoning_model
    from onyx.server.query_and_chat.placement import Placement
    from onyx.tools.models import ToolCallKickoff
    from onyx.tools.tool_constructor import construct_tools

    # === CONFIGURE YOUR RESEARCH PROMPT HERE ===
    RESEARCH_PROMPT = "Your test research task."

    SqlEngine.set_app_name("research_agent_script")
    SqlEngine.init_engine(pool_size=5, max_overflow=5)

    with get_session_with_current_tenant() as db_session:
        llm = get_default_llm()
        token_counter = get_llm_token_counter(llm)
        is_reasoning = model_is_reasoning_model(
            llm.config.model_name, llm.config.model_provider
        )

        persona = get_default_behavior_persona(db_session, eager_load_for_tools=True)
        if persona is None:
            raise ValueError("No default persona found")

        user = db_session.query(User).first()
        if user is None:
            raise ValueError("No users found in database. Please create a user first.")

        emitter_queue: queue.Queue = queue.Queue()
        emitter = Emitter(merged_queue=emitter_queue)
        state_container = ChatStateContainer()

        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=emitter,
            user=user,
            llm=llm,
        )
        tools = [
            tool
            for tool_list in tool_dict.values()
            for tool in tool_list
            if tool.name != "generate_image"
        ]

        logger.info(f"Running research agent with prompt: {RESEARCH_PROMPT}")
        logger.info(f"LLM: {llm.config.model_provider}/{llm.config.model_name}")
        logger.info(f"Tools: {[t.name for t in tools]}")

        result = run_research_agent_call(
            research_agent_call=ToolCallKickoff(
                tool_name="research_agent",
                tool_args={RESEARCH_AGENT_TASK_KEY: RESEARCH_PROMPT},
                tool_call_id=str(uuid4()),
                placement=Placement(turn_index=0, tab_index=0),
            ),
            parent_tool_call_id=str(uuid4()),
            tools=tools,
            emitter=emitter,
            state_container=state_container,
            llm=llm,
            is_reasoning_model=is_reasoning,
            token_counter=token_counter,
            user_identity=None,
        )

        if result is None:
            logger.error("Research agent returned no result")
        else:
            print("\n" + "=" * 80)
            print("RESEARCH AGENT RESULT")
            print("=" * 80)
            print(result.intermediate_report)
            print("=" * 80)
            print(f"Citations: {result.citation_mapping}")
            print(f"Total packets emitted: {emitter_queue.qsize()}")


================================================
FILE: backend/onyx/tools/interface.py
================================================
from __future__ import annotations

import abc
from typing import Any
from typing import Generic
from typing import TypeVar

from sqlalchemy.orm import Session

from onyx.chat.emitter import Emitter
from onyx.server.query_and_chat.placement import Placement
from onyx.tools.models import ToolResponse


TOverride = TypeVar("TOverride")


class Tool(abc.ABC, Generic[TOverride]):
    def __init__(self, emitter: Emitter | None = None):
        """Initialize tool with optional emitter. Emitter can be set later via set_emitter()."""
        self._emitter = emitter

    @property
    def emitter(self) -> Emitter:
        """Get the emitter. Raises if not set."""
        if self._emitter is None:
            raise ValueError(
                f"Emitter not set on tool {self.name}. Call set_emitter() first."
            )
        return self._emitter

    @property
    @abc.abstractmethod
    def id(self) -> int:
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def name(self) -> str:
        """Should be the name of the tool passed to the LLM as the json field"""
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def description(self) -> str:
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def display_name(self) -> str:
        """Should be the name of the tool displayed to the user"""
        raise NotImplementedError

    @classmethod
    def is_available(cls, db_session: "Session") -> bool:  # noqa: ARG003
        """
        Whether this tool is currently available for use given
        the state of the system. Default: available.
        Subclasses may override to perform dynamic checks.

        Args:
            db_session: Database session for tools that need DB access
        """
        return True

    @abc.abstractmethod
    def tool_definition(self) -> dict:
        """
        This is the full definition of the tool with all of the parameters, settings, etc.
        """
        raise NotImplementedError

    @abc.abstractmethod
    def emit_start(self, placement: Placement) -> None:
        """
        Emit the start packet for this tool. Each tool implementation should
        emit its specific start packet type.

        Args:
            turn_index: The turn index for this tool execution
            tab_index: The tab index for parallel tool calls
        """
        raise NotImplementedError

    @abc.abstractmethod
    def run(
        self,
        placement: Placement,
        # Specific tool override arguments that are not provided by the LLM
        # For example when calling the internal search tool, the original user query is passed along too (but not by the LLM)
        override_kwargs: TOverride,
        **llm_kwargs: Any,
    ) -> ToolResponse:
        raise NotImplementedError

    @classmethod
    def should_emit_argument_deltas(cls) -> bool:
        return False


================================================
FILE: backend/onyx/tools/models.py
================================================
from __future__ import annotations

import json
from enum import Enum
from typing import Any
from typing import Literal
from uuid import UUID

from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import model_validator

from onyx.chat.emitter import Emitter
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDoc
from onyx.context.search.models import SearchDocsResponse
from onyx.db.memory import UserMemoryContext
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import CustomToolErrorInfo
from onyx.server.query_and_chat.streaming_models import GeneratedImage
from onyx.tools.tool_implementations.images.models import FinalImageGenerationResponse
from onyx.tools.tool_implementations.memory.models import MemoryToolResponse


TOOL_CALL_MSG_FUNC_NAME = "function_name"
TOOL_CALL_MSG_ARGUMENTS = "arguments"


class ToolCallException(Exception):
    """Exception raised for errors during tool calls."""

    def __init__(self, message: str, llm_facing_message: str):
        # This is the full error message which is used for tracing
        super().__init__(message)
        # LLM made tool calls are acceptable and not flow terminating, this is the message
        # which will populate the tool response.
        self.llm_facing_message = llm_facing_message


class ToolExecutionException(Exception):
    """Exception raise for errors during tool execution."""

    def __init__(self, message: str, emit_error_packet: bool = False):
        super().__init__(message)

        self.emit_error_packet = emit_error_packet


class SearchToolUsage(str, Enum):
    DISABLED = "disabled"
    ENABLED = "enabled"
    AUTO = "auto"


class CustomToolUserFileSnapshot(BaseModel):
    file_ids: list[str]  # References to saved images or CSVs


class CustomToolCallSummary(BaseModel):
    tool_name: str
    response_type: str  # e.g., 'json', 'image', 'csv', 'graph'
    tool_result: Any  # The response data
    error: CustomToolErrorInfo | None = None


class ToolCallKickoff(BaseModel):
    tool_call_id: str
    tool_name: str
    tool_args: dict[str, Any]

    placement: Placement

    def to_msg_str(self) -> str:
        return json.dumps(
            {
                TOOL_CALL_MSG_FUNC_NAME: self.tool_name,
                TOOL_CALL_MSG_ARGUMENTS: self.tool_args,
            }
        )


class ToolResponse(BaseModel):
    # Rich response is for the objects that are returned but not directly used by the LLM
    # these typically need to be saved to the database to load things in the UI (usually both)
    rich_response: (
        # This comes from image generation, image needs to be saved and the packet about it's location needs to be emitted
        FinalImageGenerationResponse
        # This comes from internal search / web search, search docs need to be saved, already emitted by the tool
        | SearchDocsResponse
        # This comes from the memory tool, memory needs to be persisted to the database
        | MemoryToolResponse
        # This comes from open url, web content needs to be saved, maybe this can be consolidated too
        # | WebContentResponse
        # This comes from custom tools, tool result needs to be saved
        | CustomToolCallSummary
        # This comes from code interpreter, carries generated files
        | PythonToolRichResponse
        # If the rich response is a string, this is what's saved to the tool call in the DB
        | str
        | None  # If nothing needs to be persisted outside of the string value passed to the LLM
    )
    # This is the final string that needs to be wrapped in a tool call response message and concatenated to the history
    llm_facing_response: str
    # The original tool call that triggered this response - set by tool_runner
    # The response is first created by the tool runner, which does not need to be aware of things like the tool_call_id
    # So this is set after the response is created by the tool runner
    tool_call: ToolCallKickoff | None = None


class ParallelToolCallResponse(BaseModel):
    tool_responses: list[ToolResponse]
    updated_citation_mapping: dict[int, str]


class ToolRunnerResponse(BaseModel):
    tool_run_kickoff: ToolCallKickoff | None = None
    tool_response: ToolResponse | None = None
    tool_message_content: str | list[str | dict[str, Any]] | None = None

    @model_validator(mode="after")
    def validate_tool_runner_response(self) -> "ToolRunnerResponse":
        fields = ["tool_response", "tool_message_content", "tool_run_kickoff"]
        provided = sum(1 for field in fields if getattr(self, field) is not None)

        if provided != 1:
            raise ValueError(
                "Exactly one of 'tool_response', 'tool_message_content', or 'tool_run_kickoff' must be provided"
            )

        return self


class ToolCallFinalResult(ToolCallKickoff):
    tool_result: Any = (
        None  # we would like to use JSON_ro, but can't due to its recursive nature
    )
    # agentic additions; only need to set during agentic tool calls
    level: int | None = None
    level_question_num: int | None = None


class ChatMinimalTextMessage(BaseModel):
    message: str
    message_type: MessageType


class DynamicSchemaInfo(BaseModel):
    chat_session_id: UUID | None
    message_id: int | None


class WebSearchToolOverrideKwargs(BaseModel):
    # To know what citation number to start at for constructing the string to the LLM
    starting_citation_num: int


class OpenURLToolOverrideKwargs(BaseModel):
    # To know what citation number to start at for constructing the string to the LLM
    starting_citation_num: int
    citation_mapping: dict[str, int]
    url_snippet_map: dict[str, str]
    max_urls: int = 10


# None indicates that the default value should be used
class SearchToolOverrideKwargs(BaseModel):
    # To know what citation number to start at for constructing the string to the LLM
    starting_citation_num: int
    # This is needed because the LLM won't be able to do a really detailed semantic query well
    # without help and a specific custom prompt for this
    original_query: str | None = None
    message_history: list[ChatMinimalTextMessage] | None = None
    user_memory_context: UserMemoryContext | None = None
    user_info: str | None = None

    # Used for tool calls after the first one but in the same chat turn. The reason for this is that if the initial pass through
    # the custom flow did not yield good results, we don't want to go through it again. In that case, we defer entirely to the LLM
    skip_query_expansion: bool = False

    # Number of results to return in the richer object format so that it can be rendered in the UI
    num_hits: int | None = NUM_RETURNED_HITS
    # Number of chunks (token approx) to include in the string to the LLM
    max_llm_chunks: int | None = MAX_CHUNKS_FED_TO_CHAT

    model_config = ConfigDict(arbitrary_types_allowed=True)


class ChatFile(BaseModel):
    """File from a chat session that can be passed to tools."""

    filename: str
    content: bytes

    model_config = ConfigDict(arbitrary_types_allowed=True)


class PythonToolRichResponse(BaseModel):
    """Rich response from the Python tool carrying generated files."""

    generated_files: list[PythonExecutionFile] = []


class PythonToolOverrideKwargs(BaseModel):
    """Override kwargs for the Python/Code Interpreter tool."""

    chat_files: list[ChatFile] = []


class ImageGenerationToolOverrideKwargs(BaseModel):
    """Override kwargs for image generation tool calls."""

    recent_generated_image_file_ids: list[str] = []


class SearchToolRunContext(BaseModel):
    emitter: Emitter

    model_config = {"arbitrary_types_allowed": True}


class ImageGenerationToolRunContext(BaseModel):
    emitter: Emitter

    model_config = {"arbitrary_types_allowed": True}


class CustomToolRunContext(BaseModel):
    emitter: Emitter

    model_config = {"arbitrary_types_allowed": True}


class MemoryToolResponseSnapshot(BaseModel):
    memory_text: str
    operation: Literal["add", "update"]
    memory_id: int | None = None
    index: int | None = None


class ToolCallInfo(BaseModel):
    # The parent_tool_call_id is the actual generated tool call id
    # It is NOT the DB ID which often does not exist yet when the ToolCallInfo is created
    # None if attached to the Chat Message directly
    parent_tool_call_id: str | None
    turn_index: int
    tab_index: int
    tool_name: str
    tool_call_id: str
    tool_id: int
    reasoning_tokens: str | None
    tool_call_arguments: dict[str, Any]
    tool_call_response: str
    search_docs: list[SearchDoc] | None = None
    generated_images: list[GeneratedImage] | None = None
    generated_files: list[PythonExecutionFile] | None = None


CHAT_SESSION_ID_PLACEHOLDER = "CHAT_SESSION_ID"
MESSAGE_ID_PLACEHOLDER = "MESSAGE_ID"


class BaseCiteableToolResult(BaseModel):
    """Base class for tool results that can be cited."""

    document_citation_number: int
    unique_identifier_to_strip_away: str | None = None
    type: str


class LlmInternalSearchResult(BaseCiteableToolResult):
    """Result from an internal search query"""

    type: Literal["internal_search"] = "internal_search"
    title: str
    excerpt: str
    metadata: dict[str, Any]


class LlmWebSearchResult(BaseCiteableToolResult):
    """Result from a web search query"""

    type: Literal["web_search"] = "web_search"
    url: str
    title: str
    snippet: str


class LlmOpenUrlResult(BaseCiteableToolResult):
    """Result from opening/fetching a URL"""

    type: Literal["open_url"] = "open_url"
    content: str


class PythonExecutionFile(BaseModel):
    """File generated during Python execution"""

    filename: str
    file_link: str


class LlmPythonExecutionResult(BaseModel):
    """Result from Python code execution"""

    type: Literal["python_execution"] = "python_execution"

    stdout: str
    stderr: str
    exit_code: int | None
    timed_out: bool
    generated_files: list[PythonExecutionFile]
    error: str | None = None


================================================
FILE: backend/onyx/tools/tool_constructor.py
================================================
from typing import cast
from uuid import UUID

from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.auth.oauth_token_manager import OAuthTokenManager
from onyx.chat.emitter import Emitter
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.model_configs import GEN_AI_TEMPERATURE
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import PersonaSearchInfo
from onyx.db.enums import MCPAuthenticationPerformer
from onyx.db.enums import MCPAuthenticationType
from onyx.db.mcp import get_all_mcp_tools_for_server
from onyx.db.mcp import get_mcp_server_by_id
from onyx.db.mcp import get_user_connection_config
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.oauth_config import get_oauth_config
from onyx.db.search_settings import get_current_search_settings
from onyx.db.tools import get_builtin_tool
from onyx.document_index.factory import get_default_document_index
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMConfig
from onyx.onyxbot.slack.models import SlackContext
from onyx.tools.built_in_tools import get_built_in_tool_by_id
from onyx.tools.interface import Tool
from onyx.tools.models import DynamicSchemaInfo
from onyx.tools.models import SearchToolUsage
from onyx.tools.tool_implementations.custom.custom_tool import (
    build_custom_tools_from_openapi_schema_and_headers,
)
from onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool
from onyx.tools.tool_implementations.images.image_generation_tool import (
    ImageGenerationTool,
)
from onyx.tools.tool_implementations.mcp.mcp_tool import MCPTool
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
from onyx.tools.tool_implementations.open_url.open_url_tool import (
    OpenURLTool,
)
from onyx.tools.tool_implementations.python.python_tool import PythonTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.web_search_tool import (
    WebSearchTool,
)
from onyx.utils.headers import header_dict_to_header_list
from onyx.utils.logger import setup_logger

logger = setup_logger()


class SearchToolConfig(BaseModel):
    user_selected_filters: BaseFilters | None = None
    # Vespa metadata filters for overflowing user files.  These are NOT the
    # IDs of the current project/persona — they are only set when the
    # project's/persona's user files didn't fit in the LLM context window and
    # must be found via vector DB search instead.
    project_id_filter: int | None = None
    persona_id_filter: int | None = None
    bypass_acl: bool = False
    additional_context: str | None = None
    slack_context: SlackContext | None = None
    enable_slack_search: bool = True


class FileReaderToolConfig(BaseModel):
    # IDs from the ``user_file`` table (project / persona-attached files).
    user_file_ids: list[UUID] = []
    # IDs from the ``file_record`` table (chat-attached files).
    chat_file_ids: list[UUID] = []


class CustomToolConfig(BaseModel):
    chat_session_id: UUID | None = None
    message_id: int | None = None
    additional_headers: dict[str, str] | None = None
    mcp_headers: dict[str, str] | None = None


def _get_image_generation_config(llm: LLM, db_session: Session) -> LLMConfig:
    """Get image generation LLM config from the default image generation configuration."""
    from onyx.db.image_generation import get_default_image_generation_config

    default_config = get_default_image_generation_config(db_session)
    if (
        not default_config
        or not default_config.model_configuration
        or not default_config.model_configuration.llm_provider
    ):
        raise ValueError("No default image generation configuration found")

    llm_provider = default_config.model_configuration.llm_provider

    return LLMConfig(
        model_provider=llm_provider.provider,
        model_name=default_config.model_configuration.name,
        temperature=GEN_AI_TEMPERATURE,
        api_key=(
            llm_provider.api_key.get_value(apply_mask=False)
            if llm_provider.api_key
            else None
        ),
        api_base=llm_provider.api_base,
        api_version=llm_provider.api_version,
        deployment_name=llm_provider.deployment_name,
        max_input_tokens=llm.config.max_input_tokens,
        custom_config=llm_provider.custom_config,
    )


def construct_tools(
    persona: Persona,
    db_session: Session,
    emitter: Emitter,
    user: User,
    llm: LLM,
    search_tool_config: SearchToolConfig | None = None,
    custom_tool_config: CustomToolConfig | None = None,
    file_reader_tool_config: FileReaderToolConfig | None = None,
    allowed_tool_ids: list[int] | None = None,
    search_usage_forcing_setting: SearchToolUsage = SearchToolUsage.AUTO,
) -> dict[int, list[Tool]]:
    """Constructs tools based on persona configuration and available APIs.

    Will simply skip tools that are not allowed/available.

    Callers must supply a persona with ``tools``, ``document_sets``,
    ``attached_documents``, and ``hierarchy_nodes`` already eager-loaded
    (e.g. via ``eager_load_persona=True`` or ``eager_load_for_tools=True``)
    to avoid lazy SQL queries after the session may have been flushed."""
    tool_dict: dict[int, list[Tool]] = {}

    # Log which tools are attached to the persona for debugging
    persona_tool_names = [t.name for t in persona.tools]
    logger.debug(
        f"Constructing tools for persona '{persona.name}' (id={persona.id}): {persona_tool_names}"
    )

    mcp_tool_cache: dict[int, dict[int, MCPTool]] = {}
    # Get user's OAuth token if available
    user_oauth_token = None
    if user.oauth_accounts:
        user_oauth_token = user.oauth_accounts[0].access_token

    search_settings = get_current_search_settings(db_session)
    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None, db_session)

    def _build_search_tool(tool_id: int, config: SearchToolConfig) -> SearchTool:
        persona_search_info = PersonaSearchInfo(
            document_set_names=[ds.name for ds in persona.document_sets],
            search_start_date=persona.search_start_date,
            attached_document_ids=[doc.id for doc in persona.attached_documents],
            hierarchy_node_ids=[node.id for node in persona.hierarchy_nodes],
        )
        return SearchTool(
            tool_id=tool_id,
            emitter=emitter,
            user=user,
            persona_search_info=persona_search_info,
            llm=llm,
            document_index=document_index,
            user_selected_filters=config.user_selected_filters,
            project_id_filter=config.project_id_filter,
            persona_id_filter=config.persona_id_filter,
            bypass_acl=config.bypass_acl,
            slack_context=config.slack_context,
            enable_slack_search=config.enable_slack_search,
        )

    added_search_tool = False
    for db_tool_model in persona.tools:
        # If allowed_tool_ids is specified, skip tools not in the allowed list
        if allowed_tool_ids is not None and db_tool_model.id not in allowed_tool_ids:
            continue

        if db_tool_model.in_code_tool_id:
            tool_cls = get_built_in_tool_by_id(db_tool_model.in_code_tool_id)

            try:
                tool_is_available = tool_cls.is_available(db_session)
            except Exception:
                logger.exception(
                    "Failed checking availability for tool %s", tool_cls.__name__
                )
                tool_is_available = False

            if not tool_is_available:
                logger.debug(
                    "Skipping tool %s because it is not available",
                    tool_cls.__name__,
                )
                continue

            # Handle Internal Search Tool
            if tool_cls.__name__ == SearchTool.__name__:
                added_search_tool = True
                if search_usage_forcing_setting == SearchToolUsage.DISABLED:
                    continue

                if not search_tool_config:
                    search_tool_config = SearchToolConfig()

                tool_dict[db_tool_model.id] = [
                    _build_search_tool(db_tool_model.id, search_tool_config)
                ]

            # Handle Image Generation Tool
            elif tool_cls.__name__ == ImageGenerationTool.__name__:
                img_generation_llm_config = _get_image_generation_config(
                    llm, db_session
                )

                tool_dict[db_tool_model.id] = [
                    ImageGenerationTool(
                        image_generation_credentials=ImageGenerationProviderCredentials(
                            api_key=cast(str, img_generation_llm_config.api_key),
                            api_base=img_generation_llm_config.api_base,
                            api_version=img_generation_llm_config.api_version,
                            deployment_name=(
                                img_generation_llm_config.deployment_name
                                or img_generation_llm_config.model_name
                            ),
                            custom_config=img_generation_llm_config.custom_config,
                        ),
                        provider=img_generation_llm_config.model_provider,
                        model=img_generation_llm_config.model_name,
                        tool_id=db_tool_model.id,
                        emitter=emitter,
                    )
                ]

            # Handle Web Search Tool
            elif tool_cls.__name__ == WebSearchTool.__name__:
                try:
                    tool_dict[db_tool_model.id] = [
                        WebSearchTool(tool_id=db_tool_model.id, emitter=emitter)
                    ]
                except ValueError as e:
                    logger.error(f"Failed to initialize Internet Search Tool: {e}")
                    raise ValueError(
                        "Internet search tool requires a search provider API key, please contact your Onyx admin to get it added!"
                    )

            # Handle Open URL Tool
            elif tool_cls.__name__ == OpenURLTool.__name__:
                try:
                    tool_dict[db_tool_model.id] = [
                        OpenURLTool(
                            tool_id=db_tool_model.id,
                            emitter=emitter,
                            document_index=document_index,
                            user=user,
                        )
                    ]
                except RuntimeError as e:
                    logger.error(f"Failed to initialize Open URL Tool: {e}")
                    raise ValueError(
                        "Open URL tool requires a web content provider, please contact your Onyx admin to get it configured!"
                    )

            # Handle Python/Code Interpreter Tool
            elif tool_cls.__name__ == PythonTool.__name__:
                tool_dict[db_tool_model.id] = [
                    PythonTool(tool_id=db_tool_model.id, emitter=emitter)
                ]

            # Handle File Reader Tool
            elif tool_cls.__name__ == FileReaderTool.__name__:
                cfg = file_reader_tool_config or FileReaderToolConfig()
                tool_dict[db_tool_model.id] = [
                    FileReaderTool(
                        tool_id=db_tool_model.id,
                        emitter=emitter,
                        user_file_ids=cfg.user_file_ids,
                        chat_file_ids=cfg.chat_file_ids,
                    )
                ]

            # Handle KG Tool
            # TODO: disabling for now because it's broken in the refactor
            # elif tool_cls.__name__ == KnowledgeGraphTool.__name__:

            #     # skip the knowledge graph tool if KG is not enabled/exposed
            #     kg_config = get_kg_config_settings()
            #     if not kg_config.KG_ENABLED or not kg_config.KG_EXPOSED:
            #         logger.debug("Knowledge Graph Tool is not enabled/exposed")
            #         continue

            #     if persona.name != TMP_DRALPHA_PERSONA_NAME:
            #         # TODO: remove this after the beta period
            #         raise ValueError(
            #             f"The Knowledge Graph Tool should only be used by the '{TMP_DRALPHA_PERSONA_NAME}' Agent."
            #         )
            #     tool_dict[db_tool_model.id] = [
            #         KnowledgeGraphTool(tool_id=db_tool_model.id)
            #     ]

        # Handle custom tools
        elif db_tool_model.openapi_schema:
            if not custom_tool_config:
                custom_tool_config = CustomToolConfig()

            # Determine which OAuth token to use
            oauth_token_for_tool = None

            # Priority 1: OAuth config (per-tool OAuth)
            if db_tool_model.oauth_config_id:
                if user.is_anonymous:
                    logger.warning(
                        f"Anonymous user cannot use OAuth tool {db_tool_model.id}"
                    )
                    continue
                oauth_config = get_oauth_config(
                    db_tool_model.oauth_config_id, db_session
                )
                if oauth_config:
                    token_manager = OAuthTokenManager(oauth_config, user.id, db_session)
                    oauth_token_for_tool = token_manager.get_valid_access_token()
                    if not oauth_token_for_tool:
                        logger.warning(
                            f"No valid OAuth token found for tool {db_tool_model.id} "
                            f"with OAuth config {db_tool_model.oauth_config_id}"
                        )

            # Priority 2: Passthrough auth (user's login OAuth token)
            elif db_tool_model.passthrough_auth:
                if user.is_anonymous:
                    logger.warning(
                        f"Anonymous user cannot use passthrough auth tool {db_tool_model.id}"
                    )
                    continue
                oauth_token_for_tool = user_oauth_token

            tool_dict[db_tool_model.id] = cast(
                list[Tool],
                build_custom_tools_from_openapi_schema_and_headers(
                    tool_id=db_tool_model.id,
                    openapi_schema=db_tool_model.openapi_schema,
                    emitter=emitter,
                    dynamic_schema_info=DynamicSchemaInfo(
                        chat_session_id=custom_tool_config.chat_session_id,
                        message_id=custom_tool_config.message_id,
                    ),
                    custom_headers=(db_tool_model.custom_headers or [])
                    + (
                        header_dict_to_header_list(
                            custom_tool_config.additional_headers or {}
                        )
                    ),
                    user_oauth_token=oauth_token_for_tool,
                ),
            )

        # Handle MCP tools
        elif db_tool_model.mcp_server_id:
            if db_tool_model.mcp_server_id in mcp_tool_cache:
                tool_dict[db_tool_model.id] = [
                    mcp_tool_cache[db_tool_model.mcp_server_id][db_tool_model.id]
                ]
                continue

            mcp_server = get_mcp_server_by_id(db_tool_model.mcp_server_id, db_session)

            # Get user-specific connection config if needed
            connection_config = None
            user_email = user.email
            mcp_user_oauth_token = None

            if mcp_server.auth_type == MCPAuthenticationType.PT_OAUTH:
                # Pass-through OAuth: use the user's login OAuth token
                if user.is_anonymous:
                    logger.warning(
                        f"Anonymous user cannot use PT_OAUTH MCP server {mcp_server.id}"
                    )
                    continue
                mcp_user_oauth_token = user_oauth_token
            elif (
                mcp_server.auth_type == MCPAuthenticationType.API_TOKEN
                or mcp_server.auth_type == MCPAuthenticationType.OAUTH
            ):
                # If server has a per-user template, only use that user's config
                if mcp_server.auth_performer == MCPAuthenticationPerformer.PER_USER:
                    connection_config = get_user_connection_config(
                        mcp_server.id, user_email, db_session
                    )
                else:
                    # No per-user template: use admin config
                    connection_config = mcp_server.admin_connection_config

            # Get all saved tools for this MCP server
            saved_tools = get_all_mcp_tools_for_server(mcp_server.id, db_session)

            # Find the specific tool that this database entry represents
            expected_tool_name = db_tool_model.display_name

            # Extract additional MCP headers from config
            additional_mcp_headers = None
            if custom_tool_config and custom_tool_config.mcp_headers:
                additional_mcp_headers = custom_tool_config.mcp_headers

            mcp_tool_cache[db_tool_model.mcp_server_id] = {}
            # Find the matching tool definition
            for saved_tool in saved_tools:
                # Create MCPTool instance for this specific tool
                mcp_tool = MCPTool(
                    tool_id=saved_tool.id,
                    emitter=emitter,
                    mcp_server=mcp_server,
                    tool_name=saved_tool.name,
                    tool_description=saved_tool.description,
                    tool_definition=saved_tool.mcp_input_schema or {},
                    connection_config=connection_config,
                    user_email=user_email,
                    user_id=str(user.id),
                    user_oauth_token=mcp_user_oauth_token,
                    additional_headers=additional_mcp_headers,
                )
                mcp_tool_cache[db_tool_model.mcp_server_id][saved_tool.id] = mcp_tool

                if saved_tool.id == db_tool_model.id:
                    tool_dict[saved_tool.id] = [cast(Tool, mcp_tool)]
            if db_tool_model.id not in tool_dict:
                logger.warning(
                    f"Tool '{expected_tool_name}' not found in MCP server '{mcp_server.name}'"
                )

    if (
        not added_search_tool
        and search_usage_forcing_setting == SearchToolUsage.ENABLED
        and not DISABLE_VECTOR_DB
    ):
        # Get the database tool model for SearchTool
        search_tool_db_model = get_builtin_tool(db_session, SearchTool)

        if not search_tool_config:
            search_tool_config = SearchToolConfig()

        tool_dict[search_tool_db_model.id] = [
            _build_search_tool(search_tool_db_model.id, search_tool_config)
        ]

    # Always inject MemoryTool when the user has the memory tool enabled,
    # bypassing persona tool associations and allowed_tool_ids filtering
    if user.enable_memory_tool:
        try:
            memory_tool_db_model = get_builtin_tool(db_session, MemoryTool)
            memory_tool = MemoryTool(
                tool_id=memory_tool_db_model.id,
                emitter=emitter,
                llm=llm,
            )
            tool_dict[memory_tool_db_model.id] = [memory_tool]
        except RuntimeError:
            logger.warning(
                "MemoryTool not found in the database. Run the latest alembic migration to seed it."
            )

    tools: list[Tool] = []
    for tool_list in tool_dict.values():
        tools.extend(tool_list)

    return tool_dict


================================================
FILE: backend/onyx/tools/tool_implementations/custom/base_tool_types.py
================================================
# should really be `JSON_ro`, but this causes issues with pydantic
ToolResultType = dict | list | str | int | float | bool


================================================
FILE: backend/onyx/tools/tool_implementations/custom/custom_tool.py
================================================
import csv
import json
import queue
import uuid
from io import BytesIO
from io import StringIO
from typing import Any
from typing import Dict
from typing import List

import requests
from requests import JSONDecodeError

from onyx.chat.emitter import Emitter
from onyx.configs.constants import FileOrigin
from onyx.file_store.file_store import get_default_file_store
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import CustomToolArgs
from onyx.server.query_and_chat.streaming_models import CustomToolDelta
from onyx.server.query_and_chat.streaming_models import CustomToolErrorInfo
from onyx.server.query_and_chat.streaming_models import CustomToolStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import CHAT_SESSION_ID_PLACEHOLDER
from onyx.tools.models import CustomToolCallSummary
from onyx.tools.models import CustomToolUserFileSnapshot
from onyx.tools.models import DynamicSchemaInfo
from onyx.tools.models import MESSAGE_ID_PLACEHOLDER
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.custom.openapi_parsing import MethodSpec
from onyx.tools.tool_implementations.custom.openapi_parsing import (
    openapi_to_method_specs,
)
from onyx.tools.tool_implementations.custom.openapi_parsing import openapi_to_url
from onyx.tools.tool_implementations.custom.openapi_parsing import REQUEST_BODY
from onyx.tools.tool_implementations.custom.openapi_parsing import (
    validate_openapi_schema,
)
from onyx.utils.headers import header_list_to_header_dict
from onyx.utils.headers import HeaderItemDict
from onyx.utils.logger import setup_logger

logger = setup_logger()

CUSTOM_TOOL_RESPONSE_ID = "custom_tool_response"


# override_kwargs is not supported for custom tools
class CustomTool(Tool[None]):
    def __init__(
        self,
        id: int,
        method_spec: MethodSpec,
        base_url: str,
        emitter: Emitter,
        custom_headers: list[HeaderItemDict] | None = None,
        user_oauth_token: str | None = None,
    ) -> None:
        super().__init__(emitter=emitter)

        self._base_url = base_url
        self._method_spec = method_spec
        self._tool_definition = self._method_spec.to_tool_definition()
        self._user_oauth_token = user_oauth_token
        self._id = id

        self._name = self._method_spec.name
        self._description = self._method_spec.summary
        self.headers = (
            header_list_to_header_dict(custom_headers) if custom_headers else {}
        )

        # Check for both Authorization header and OAuth token
        has_auth_header = any(
            key.lower() == "authorization" for key in self.headers.keys()
        )
        if has_auth_header and self._user_oauth_token:
            logger.warning(
                f"Tool '{self._name}' has both an Authorization "
                "header and OAuth token set. This is likely a configuration "
                "error as the OAuth token will override the custom header."
            )

        if self._user_oauth_token:
            self.headers["Authorization"] = f"Bearer {self._user_oauth_token}"

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self._name

    @property
    def description(self) -> str:
        return self._description

    @property
    def display_name(self) -> str:
        return self._name

    def tool_definition(self) -> dict:
        return self._tool_definition

    def _save_and_get_file_references(
        self, file_content: bytes | str, content_type: str
    ) -> List[str]:
        file_store = get_default_file_store()

        file_id = str(uuid.uuid4())

        # Handle both binary and text content
        if isinstance(file_content, str):
            content = BytesIO(file_content.encode())
        else:
            content = BytesIO(file_content)

        file_store.save_file(
            file_id=file_id,
            content=content,
            display_name=file_id,
            file_origin=FileOrigin.CHAT_UPLOAD,
            file_type=content_type,
            file_metadata={
                "content_type": content_type,
            },
        )

        return [file_id]

    def _parse_csv(self, csv_text: str) -> List[Dict[str, Any]]:
        csv_file = StringIO(csv_text)
        reader = csv.DictReader(csv_file)
        return [row for row in reader]

    """Actual execution of the tool"""

    def emit_start(self, placement: Placement) -> None:
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=CustomToolStart(tool_name=self._name, tool_id=self._id),
            )
        )

    def run(
        self,
        placement: Placement,
        override_kwargs: None = None,  # noqa: ARG002
        **llm_kwargs: Any,
    ) -> ToolResponse:
        # Build path params
        path_params = {}
        for path_param_schema in self._method_spec.get_path_param_schemas():
            param_name = path_param_schema["name"]
            if param_name not in llm_kwargs:
                raise ToolCallException(
                    message=f"Missing required path parameter '{param_name}' in {self._name} tool call",
                    llm_facing_message=(
                        f"The {self._name} tool requires the '{param_name}' path parameter. "
                        f"Please provide it in the tool call arguments."
                    ),
                )
            path_params[param_name] = llm_kwargs[param_name]

        # Build query params
        query_params = {}
        for query_param_schema in self._method_spec.get_query_param_schemas():
            if query_param_schema["name"] in llm_kwargs:
                query_params[query_param_schema["name"]] = llm_kwargs[
                    query_param_schema["name"]
                ]

        # Emit args packet (path + query params only, no request body)
        tool_args = {**path_params, **query_params}
        if tool_args:
            self.emitter.emit(
                Packet(
                    placement=placement,
                    obj=CustomToolArgs(
                        tool_name=self._name,
                        tool_args=tool_args,
                    ),
                )
            )

        request_body = llm_kwargs.get(REQUEST_BODY)
        url = self._method_spec.build_url(self._base_url, path_params, query_params)
        method = self._method_spec.method

        response = requests.request(
            method, url, json=request_body, headers=self.headers
        )
        content_type = response.headers.get("Content-Type", "")

        # Detect HTTP errors — only 401/403 are flagged as auth errors
        error_info: CustomToolErrorInfo | None = None
        if response.status_code in (401, 403):
            error_info = CustomToolErrorInfo(
                is_auth_error=True,
                status_code=response.status_code,
                message=f"{self._name} action failed because of authentication error",
            )
            logger.warning(
                f"Auth error from custom tool '{self._name}': HTTP {response.status_code}"
            )

        tool_result: Any
        response_type: str
        file_ids: List[str] | None = None
        data: dict | list | str | int | float | bool | None = None

        if "text/csv" in content_type:
            file_ids = self._save_and_get_file_references(
                response.content, content_type
            )
            tool_result = CustomToolUserFileSnapshot(file_ids=file_ids)
            response_type = "csv"

        elif "image/" in content_type:
            file_ids = self._save_and_get_file_references(
                response.content, content_type
            )
            tool_result = CustomToolUserFileSnapshot(file_ids=file_ids)
            response_type = "image"

        else:
            try:
                tool_result = response.json()
                response_type = "json"
                data = tool_result
            except JSONDecodeError:
                logger.exception(
                    f"Failed to parse response as JSON for tool '{self._name}'"
                )
                tool_result = response.text
                response_type = "text"
                data = tool_result

        logger.info(
            f"Returning tool response for {self._name} with type {response_type}"
        )

        # Emit CustomToolDelta packet
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=CustomToolDelta(
                    tool_name=self._name,
                    tool_id=self._id,
                    response_type=response_type,
                    data=data,
                    file_ids=file_ids,
                    error=error_info,
                ),
            )
        )

        llm_facing_response = json.dumps(tool_result)

        return ToolResponse(
            rich_response=CustomToolCallSummary(
                tool_name=self._name,
                response_type=response_type,
                tool_result=tool_result,
                error=error_info,
            ),
            llm_facing_response=llm_facing_response,
        )


def build_custom_tools_from_openapi_schema_and_headers(
    tool_id: int,
    openapi_schema: dict[str, Any],
    emitter: Emitter | None = None,
    custom_headers: list[HeaderItemDict] | None = None,
    dynamic_schema_info: DynamicSchemaInfo | None = None,
    user_oauth_token: str | None = None,
) -> list[CustomTool]:
    if dynamic_schema_info:
        # Process dynamic schema information
        schema_str = json.dumps(openapi_schema)
        placeholders = {
            CHAT_SESSION_ID_PLACEHOLDER: dynamic_schema_info.chat_session_id,
            MESSAGE_ID_PLACEHOLDER: dynamic_schema_info.message_id,
        }

        for placeholder, value in placeholders.items():
            if value:
                schema_str = schema_str.replace(placeholder, str(value))

        openapi_schema = json.loads(schema_str)

    url = openapi_to_url(openapi_schema)
    method_specs = openapi_to_method_specs(openapi_schema)

    # Use a discard emitter if none provided (packets go nowhere)
    if emitter is None:
        emitter = Emitter(merged_queue=queue.Queue())

    return [
        CustomTool(
            id=tool_id,
            method_spec=method_spec,
            base_url=url,
            emitter=emitter,
            custom_headers=custom_headers,
            user_oauth_token=user_oauth_token,
        )
        for method_spec in method_specs
    ]


if __name__ == "__main__":
    import openai
    from openai.types.chat.chat_completion_message_function_tool_call import (
        ChatCompletionMessageFunctionToolCall,
    )

    openapi_schema = {
        "openapi": "3.0.0",
        "info": {
            "version": "1.0.0",
            "title": "Assistants API",
            "description": "An API for managing assistants",
        },
        "servers": [
            {"url": "http://localhost:8080"},
        ],
        "paths": {
            "/assistant/{assistant_id}": {
                "get": {
                    "summary": "Get a specific Assistant",
                    "operationId": "getAssistant",
                    "parameters": [
                        {
                            "name": "assistant_id",
                            "in": "path",
                            "required": True,
                            "schema": {"type": "string"},
                        }
                    ],
                },
                "post": {
                    "summary": "Create a new Assistant",
                    "operationId": "createAssistant",
                    "parameters": [
                        {
                            "name": "assistant_id",
                            "in": "path",
                            "required": True,
                            "schema": {"type": "string"},
                        }
                    ],
                    "requestBody": {
                        "required": True,
                        "content": {"application/json": {"schema": {"type": "object"}}},
                    },
                },
            }
        },
    }
    validate_openapi_schema(openapi_schema)

    tools = build_custom_tools_from_openapi_schema_and_headers(
        tool_id=0,  # dummy tool id
        openapi_schema=openapi_schema,
        emitter=Emitter(merged_queue=queue.Queue()),
        dynamic_schema_info=None,
    )

    openai_client = openai.OpenAI()
    response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Can you fetch assistant with ID 10"},
        ],
        tools=[tool.tool_definition() for tool in tools],  # type: ignore
    )
    choice = response.choices[0]
    if choice.message.tool_calls:
        print(choice.message.tool_calls)
        tool_call = choice.message.tool_calls[0]
        if isinstance(tool_call, ChatCompletionMessageFunctionToolCall):
            # Note: This example code would need a proper run_context with emitter
            # For testing purposes, this would need to be updated
            print("Tool execution requires run_context with emitter")


================================================
FILE: backend/onyx/tools/tool_implementations/custom/openapi_parsing.py
================================================
from typing import Any
from typing import cast

from pydantic import BaseModel

REQUEST_BODY = "requestBody"


class PathSpec(BaseModel):
    path: str
    methods: dict[str, Any]


class MethodSpec(BaseModel):
    name: str
    summary: str
    path: str
    method: str
    spec: dict[str, Any]

    def get_request_body_schema(self) -> dict[str, Any]:
        content = self.spec.get("requestBody", {}).get("content", {})
        if "application/json" in content:
            return content["application/json"].get("schema")

        if content:
            raise ValueError(
                f"Unsupported content type: '{list(content.keys())[0]}'. Only 'application/json' is supported."
            )

        return {}

    def get_query_param_schemas(self) -> list[dict[str, Any]]:
        return [
            param
            for param in self.spec.get("parameters", [])
            if "schema" in param and "in" in param and param["in"] == "query"
        ]

    def get_path_param_schemas(self) -> list[dict[str, Any]]:
        return [
            param
            for param in self.spec.get("parameters", [])
            if "schema" in param and "in" in param and param["in"] == "path"
        ]

    def build_url(
        self, base_url: str, path_params: dict[str, str], query_params: dict[str, str]
    ) -> str:
        url = f"{base_url}{self.path}"
        try:
            url = url.format(**path_params)
        except KeyError as e:
            raise ValueError(f"Missing path parameter: {e}")
        if query_params:
            url += "?"
            for param, value in query_params.items():
                url += f"{param}={value}&"
            url = url[:-1]
        return url

    def to_tool_definition(self) -> dict[str, Any]:
        tool_definition: Any = {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.summary,
                "parameters": {"type": "object", "properties": {}},
            },
        }

        request_body_schema = self.get_request_body_schema()
        if request_body_schema:
            tool_definition["function"]["parameters"]["properties"][
                REQUEST_BODY
            ] = request_body_schema

        query_param_schemas = self.get_query_param_schemas()
        if query_param_schemas:
            tool_definition["function"]["parameters"]["properties"].update(
                {param["name"]: param["schema"] for param in query_param_schemas}
            )

        path_param_schemas = self.get_path_param_schemas()
        if path_param_schemas:
            tool_definition["function"]["parameters"]["properties"].update(
                {param["name"]: param["schema"] for param in path_param_schemas}
            )
        return tool_definition

    def validate_spec(self) -> None:
        # Validate url construction
        path_param_schemas = self.get_path_param_schemas()
        dummy_path_dict = {param["name"]: "value" for param in path_param_schemas}
        query_param_schemas = self.get_query_param_schemas()
        dummy_query_dict = {param["name"]: "value" for param in query_param_schemas}
        self.build_url("", dummy_path_dict, dummy_query_dict)

        # Make sure request body doesn't throw an exception
        self.get_request_body_schema()

        # Ensure the method is valid
        if not self.method:
            raise ValueError("HTTP method is not specified.")
        if self.method.upper() not in ["GET", "POST", "PUT", "DELETE", "PATCH"]:
            raise ValueError(f"HTTP method '{self.method}' is not supported.")


"""Path-level utils"""


def openapi_to_path_specs(openapi_spec: dict[str, Any]) -> list[PathSpec]:
    path_specs = []

    for path, methods in openapi_spec.get("paths", {}).items():
        path_specs.append(PathSpec(path=path, methods=methods))

    return path_specs


"""Method-level utils"""


def openapi_to_method_specs(openapi_spec: dict[str, Any]) -> list[MethodSpec]:
    path_specs = openapi_to_path_specs(openapi_spec)

    method_specs = []
    for path_spec in path_specs:
        for method_name, method in path_spec.methods.items():
            name = method.get("operationId")
            if not name:
                raise ValueError(
                    f"Operation ID is not specified for {method_name.upper()} {path_spec.path}"
                )

            summary = method.get("summary") or method.get("description")
            if not summary:
                raise ValueError(
                    f"Summary is not specified for {method_name.upper()} {path_spec.path}"
                )

            method_specs.append(
                MethodSpec(
                    name=name,
                    summary=summary,
                    path=path_spec.path,
                    method=method_name,
                    spec=method,
                )
            )

    if not method_specs:
        raise ValueError("No methods found in OpenAPI schema")

    return method_specs


def openapi_to_url(openapi_schema: dict[str, dict | str]) -> str:
    """
    Extract URLs from the servers section of an OpenAPI schema.

    Args:
        openapi_schema (Dict[str, Union[Dict, str, List]]): The OpenAPI schema in dictionary format.

    Returns:
        List[str]: A list of base URLs.
    """
    urls: list[str] = []

    servers = cast(list[dict[str, Any]], openapi_schema.get("servers", []))
    for server in servers:
        url = server.get("url")
        if url:
            urls.append(url)

    if len(urls) != 1:
        raise ValueError(
            f"Expected exactly one URL in OpenAPI schema, but found {urls}"
        )

    return urls[0]


def validate_openapi_schema(schema: dict[str, Any]) -> None:
    """
    Validate the given JSON schema as an OpenAPI schema.

    Parameters:
    - schema (dict): The JSON schema to validate.

    Returns:
    - bool: True if the schema is valid, False otherwise.
    """

    # check basic structure
    if "info" not in schema:
        raise ValueError("`info` section is required in OpenAPI schema")

    info = schema["info"]
    if "title" not in info:
        raise ValueError("`title` is required in `info` section of OpenAPI schema")
    if "description" not in info:
        raise ValueError(
            "`description` is required in `info` section of OpenAPI schema"
        )

    if "openapi" not in schema:
        raise ValueError(
            "`openapi` field which specifies OpenAPI schema version is required"
        )
    openapi_version = schema["openapi"]
    if not openapi_version.startswith("3."):
        raise ValueError(f"OpenAPI version '{openapi_version}' is not supported")

    if "paths" not in schema:
        raise ValueError("`paths` section is required in OpenAPI schema")

    url = openapi_to_url(schema)
    if not url:
        raise ValueError("OpenAPI schema does not contain a valid URL in `servers`")

    method_specs = openapi_to_method_specs(schema)
    for method_spec in method_specs:
        method_spec.validate_spec()


================================================
FILE: backend/onyx/tools/tool_implementations/file_reader/file_reader_tool.py
================================================
import io
import json
from typing import Any
from typing import cast
from uuid import UUID

from sqlalchemy.orm import Session
from typing_extensions import override

from onyx.chat.emitter import Emitter
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import InMemoryChatFile
from onyx.file_store.utils import load_chat_file_by_id
from onyx.file_store.utils import load_user_file
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import FileReaderResult
from onyx.server.query_and_chat.streaming_models import FileReaderStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolResponse
from onyx.utils.logger import setup_logger

logger = setup_logger()

FILE_ID_FIELD = "file_id"
START_CHAR_FIELD = "start_char"
NUM_CHARS_FIELD = "num_chars"

MAX_NUM_CHARS = 16000
DEFAULT_NUM_CHARS = MAX_NUM_CHARS
PREVIEW_CHARS = 500


class FileReaderToolOverrideKwargs:
    """No override kwargs needed for the file reader tool."""


class FileReaderTool(Tool[FileReaderToolOverrideKwargs]):
    NAME = "read_file"
    DISPLAY_NAME = "File Reader"
    DESCRIPTION = (
        "Read a section of a user-uploaded file by character offset. "
        "Returns up to 16000 characters starting from the given offset."
    )

    def __init__(
        self,
        tool_id: int,
        emitter: Emitter,
        user_file_ids: list[UUID],
        chat_file_ids: list[UUID],
    ) -> None:
        super().__init__(emitter=emitter)
        self._id = tool_id
        self._user_file_ids = set(user_file_ids)
        self._chat_file_ids = set(chat_file_ids)

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self.NAME

    @property
    def description(self) -> str:
        return self.DESCRIPTION

    @property
    def display_name(self) -> str:
        return self.DISPLAY_NAME

    @override
    @classmethod
    def is_available(cls, db_session: Session) -> bool:  # noqa: ARG003
        # TODO(evan): temporary – gate behind DISABLE_VECTOR_DB until the tool is
        # generalised for standard (vector-DB-enabled) deployments.
        return DISABLE_VECTOR_DB

    def tool_definition(self) -> dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.DESCRIPTION,
                "parameters": {
                    "type": "object",
                    "properties": {
                        FILE_ID_FIELD: {
                            "type": "string",
                            "description": "The UUID of the file to read.",
                        },
                        START_CHAR_FIELD: {
                            "type": "integer",
                            "description": (
                                "Character offset to start reading from. Defaults to 0."
                            ),
                        },
                        NUM_CHARS_FIELD: {
                            "type": "integer",
                            "description": (
                                "Number of characters to return (max 16000). Defaults to 16000."
                            ),
                        },
                    },
                    "required": [FILE_ID_FIELD],
                },
            },
        }

    def emit_start(self, placement: Placement) -> None:
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=FileReaderStart(),
            )
        )

    def _validate_file_id(self, raw_file_id: str) -> UUID:
        try:
            file_id = UUID(raw_file_id)
        except ValueError:
            raise ToolCallException(
                message=f"Invalid file_id: {raw_file_id}",
                llm_facing_message=f"'{raw_file_id}' is not a valid file UUID.",
            )

        if file_id not in self._user_file_ids and file_id not in self._chat_file_ids:
            raise ToolCallException(
                message=f"File {file_id} not in available files",
                llm_facing_message=(
                    f"File '{file_id}' is not available. Please use one of the file IDs listed in the context."
                ),
            )

        return file_id

    def _load_file(self, file_id: UUID) -> InMemoryChatFile:
        if file_id in self._user_file_ids:
            with get_session_with_current_tenant() as db_session:
                return load_user_file(file_id, db_session)
        return load_chat_file_by_id(str(file_id))

    def run(
        self,
        placement: Placement,
        override_kwargs: FileReaderToolOverrideKwargs,  # noqa: ARG002
        **llm_kwargs: Any,
    ) -> ToolResponse:
        if FILE_ID_FIELD not in llm_kwargs:
            raise ToolCallException(
                message=f"Missing required '{FILE_ID_FIELD}' parameter",
                llm_facing_message=(
                    f"The read_file tool requires a '{FILE_ID_FIELD}' parameter. "
                    f'Example: {{"file_id": "abc-123", "start_char": 0, "num_chars": 16000}}'
                ),
            )

        raw_file_id = cast(str, llm_kwargs[FILE_ID_FIELD])
        file_id = self._validate_file_id(raw_file_id)
        start_char = max(0, int(llm_kwargs.get(START_CHAR_FIELD, 0)))
        num_chars = min(
            MAX_NUM_CHARS,
            max(1, int(llm_kwargs.get(NUM_CHARS_FIELD, DEFAULT_NUM_CHARS))),
        )

        chat_file = self._load_file(file_id)

        # Only PLAIN_TEXT and TABULAR are guaranteed to contain actual text bytes.
        # DOC type in a loaded file means plaintext extraction failed and the
        # content is the original binary (e.g. raw PDF/DOCX bytes).
        if chat_file.file_type not in (
            ChatFileType.PLAIN_TEXT,
            ChatFileType.TABULAR,
        ):
            raise ToolCallException(
                message=f"File {file_id} is not a text file (type={chat_file.file_type})",
                llm_facing_message=(
                    f"File '{chat_file.filename or file_id}' is a {chat_file.file_type.value} file and cannot be read as text."
                ),
            )

        try:
            if chat_file.file_type == ChatFileType.PLAIN_TEXT:
                full_text = chat_file.content.decode("utf-8", errors="replace")
            else:
                full_text = (
                    extract_file_text(
                        file=io.BytesIO(chat_file.content),
                        file_name=chat_file.filename or "",
                        break_on_unprocessable=False,
                    )
                    or ""
                )
        except ToolCallException:
            raise
        except Exception:
            raise ToolCallException(
                message=f"Failed to decode file {file_id}",
                llm_facing_message="The file could not be read as text.",
            )

        total_chars = len(full_text)
        end_char = min(start_char + num_chars, total_chars)
        section = full_text[start_char:end_char]

        file_name = chat_file.filename or str(file_id)

        preview_start = section[:PREVIEW_CHARS]
        preview_end = section[-PREVIEW_CHARS:] if len(section) > PREVIEW_CHARS else ""

        # Emit result packet so the frontend can display what was read
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=FileReaderResult(
                    file_name=file_name,
                    file_id=str(file_id),
                    start_char=start_char,
                    end_char=end_char,
                    total_chars=total_chars,
                    preview_start=preview_start,
                    preview_end=preview_end,
                ),
            )
        )

        has_more = end_char < total_chars
        header = (
            f"File: {file_name}\nCharacters {start_char}-{end_char} of {total_chars}"
        )
        if has_more:
            header += f" (use start_char={end_char} to continue reading)"

        llm_response = f"{header}\n\n{section}"

        # Build a lightweight summary for DB storage (avoids saving full text).
        # The LLM-facing response carries the real content; the rich_response
        # is what gets persisted and re-hydrated on page reload.
        saved_summary = json.dumps(
            {
                "file_name": file_name,
                "file_id": str(file_id),
                "start_char": start_char,
                "end_char": end_char,
                "total_chars": total_chars,
                "preview_start": preview_start,
                "preview_end": preview_end,
            }
        )

        return ToolResponse(
            rich_response=saved_summary,
            llm_facing_response=llm_response,
        )


================================================
FILE: backend/onyx/tools/tool_implementations/images/image_generation_tool.py
================================================
import json
import threading
from typing import Any
from typing import cast

import requests
from sqlalchemy.orm import Session
from typing_extensions import override

from onyx.chat.emitter import Emitter
from onyx.configs.app_configs import IMAGE_MODEL_NAME
from onyx.configs.app_configs import IMAGE_MODEL_PROVIDER
from onyx.db.image_generation import get_default_image_generation_config
from onyx.file_store.models import ChatFileType
from onyx.file_store.utils import build_frontend_file_url
from onyx.file_store.utils import load_chat_file_by_id
from onyx.file_store.utils import save_files
from onyx.image_gen.factory import get_image_generation_provider
from onyx.image_gen.factory import validate_credentials
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.image_gen.interfaces import ReferenceImage
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import GeneratedImage
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeartbeat
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import ImageGenerationToolOverrideKwargs
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.images.models import (
    FinalImageGenerationResponse,
)
from onyx.tools.tool_implementations.images.models import ImageGenerationResponse
from onyx.tools.tool_implementations.images.models import ImageShape
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel

logger = setup_logger()

# Heartbeat interval in seconds to prevent timeouts
HEARTBEAT_INTERVAL = 5.0

PROMPT_FIELD = "prompt"
REFERENCE_IMAGE_FILE_IDS_FIELD = "reference_image_file_ids"


class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
    NAME = "generate_image"
    DESCRIPTION = "Generate an image based on a prompt. Do not use unless the user specifically requests an image."
    DISPLAY_NAME = "Image Generation"

    def __init__(
        self,
        image_generation_credentials: ImageGenerationProviderCredentials,
        tool_id: int,
        emitter: Emitter,
        model: str = IMAGE_MODEL_NAME,
        provider: str = IMAGE_MODEL_PROVIDER,
        num_imgs: int = 1,
    ) -> None:
        super().__init__(emitter=emitter)
        self.model = model
        self.provider = provider
        self.num_imgs = num_imgs

        self.img_provider = get_image_generation_provider(
            provider, image_generation_credentials
        )

        self._id = tool_id

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self.NAME

    @property
    def description(self) -> str:
        return self.DESCRIPTION

    @property
    def display_name(self) -> str:
        return self.DISPLAY_NAME

    @override
    @classmethod
    def is_available(cls, db_session: Session) -> bool:
        """Available if a default image generation config exists with valid credentials."""
        try:
            config = get_default_image_generation_config(db_session)
            if not config or not config.model_configuration:
                return False

            llm_provider = config.model_configuration.llm_provider
            credentials = ImageGenerationProviderCredentials(
                api_key=(
                    llm_provider.api_key.get_value(apply_mask=False)
                    if llm_provider.api_key
                    else None
                ),
                api_base=llm_provider.api_base,
                api_version=llm_provider.api_version,
                deployment_name=llm_provider.deployment_name,
                custom_config=llm_provider.custom_config,
            )
            return validate_credentials(
                provider=llm_provider.provider,
                credentials=credentials,
            )
        except Exception:
            logger.exception("Error checking if image generation is available")
            return False

    def tool_definition(self) -> dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": {
                    "type": "object",
                    "properties": {
                        PROMPT_FIELD: {
                            "type": "string",
                            "description": "Prompt used to generate the image",
                        },
                        "shape": {
                            "type": "string",
                            "description": (
                                "Optional - only specify if you want a specific shape."
                                " Image shape: 'square', 'portrait', or 'landscape'."
                            ),
                            "enum": [shape.value for shape in ImageShape],
                        },
                        REFERENCE_IMAGE_FILE_IDS_FIELD: {
                            "type": "array",
                            "description": (
                                "Optional image file IDs to use as reference context for edits/variations. "
                                "Use the file_id values returned by previous generate_image calls."
                            ),
                            "items": {
                                "type": "string",
                            },
                        },
                    },
                    "required": [PROMPT_FIELD],
                },
            },
        }

    def emit_start(self, placement: Placement) -> None:
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=ImageGenerationToolStart(),
            )
        )

    def _generate_image(
        self,
        prompt: str,
        shape: ImageShape,
        reference_images: list[ReferenceImage] | None = None,
    ) -> tuple[ImageGenerationResponse, Any]:
        if shape == ImageShape.LANDSCAPE:
            if "gpt-image-1" in self.model:
                size = "1536x1024"
            else:
                size = "1792x1024"
        elif shape == ImageShape.PORTRAIT:
            if "gpt-image-1" in self.model:
                size = "1024x1536"
            else:
                size = "1024x1792"
        else:
            size = "1024x1024"
        logger.debug(f"Generating image with model: {self.model}, size: {size}")
        try:
            response = self.img_provider.generate_image(
                prompt=prompt,
                model=self.model,
                size=size,
                n=1,
                reference_images=reference_images,
                # response_format parameter is not supported for gpt-image-1
                response_format=None if "gpt-image-1" in self.model else "b64_json",
            )

            if not response.data or len(response.data) == 0:
                raise RuntimeError("No image data returned from the API")

            image_item = response.data[0].model_dump()

            image_data = image_item.get("b64_json")
            if not image_data:
                raise RuntimeError("No base64 image data returned from the API")

            revised_prompt = image_item.get("revised_prompt")
            if revised_prompt is None:
                revised_prompt = prompt

            return (
                ImageGenerationResponse(
                    revised_prompt=revised_prompt,
                    image_data=image_data,
                ),
                response,
            )

        except requests.RequestException as e:
            logger.error(f"Error fetching or converting image: {e}")
            raise ToolExecutionException(
                "Failed to fetch or convert the generated image", emit_error_packet=True
            )
        except Exception as e:
            logger.debug(f"Error occurred during image generation: {e}")

            error_message = str(e)
            if "OpenAIException" in str(type(e)):
                if (
                    "Your request was rejected as a result of our safety system"
                    in error_message
                ):
                    raise ToolExecutionException(
                        (
                            "The image generation request was rejected due to OpenAI's content policy. "
                            "Please try a different prompt."
                        ),
                        emit_error_packet=True,
                    )
                elif "Invalid image URL" in error_message:
                    raise ToolExecutionException(
                        "Invalid image URL provided for image generation.",
                        emit_error_packet=True,
                    )
                elif "invalid_request_error" in error_message:
                    raise ToolExecutionException(
                        "Invalid request for image generation. Please check your input.",
                        emit_error_packet=True,
                    )

            raise ToolExecutionException(
                f"An error occurred during image generation. error={error_message}",
                emit_error_packet=True,
            )

    def _resolve_reference_image_file_ids(
        self,
        llm_kwargs: dict[str, Any],
        override_kwargs: ImageGenerationToolOverrideKwargs | None,
    ) -> list[str]:
        raw_reference_ids = llm_kwargs.get(REFERENCE_IMAGE_FILE_IDS_FIELD)
        if raw_reference_ids is not None:
            if not isinstance(raw_reference_ids, list) or not all(
                isinstance(file_id, str) for file_id in raw_reference_ids
            ):
                raise ToolCallException(
                    message=(
                        f"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}"
                    ),
                    llm_facing_message=(
                        f"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings."
                    ),
                )
            reference_image_file_ids = [
                file_id.strip() for file_id in raw_reference_ids if file_id.strip()
            ]
        elif (
            override_kwargs
            and override_kwargs.recent_generated_image_file_ids
            and self.img_provider.supports_reference_images
        ):
            # If no explicit reference was provided, default to the most recently generated image.
            reference_image_file_ids = [
                override_kwargs.recent_generated_image_file_ids[-1]
            ]
        else:
            reference_image_file_ids = []

        # Deduplicate while preserving order.
        deduped_reference_image_ids: list[str] = []
        seen_ids: set[str] = set()
        for file_id in reference_image_file_ids:
            if file_id in seen_ids:
                continue
            seen_ids.add(file_id)
            deduped_reference_image_ids.append(file_id)

        if not deduped_reference_image_ids:
            return []

        if not self.img_provider.supports_reference_images:
            raise ToolCallException(
                message=(
                    f"Reference images requested but provider '{self.provider}' does not support image-editing context."
                ),
                llm_facing_message=(
                    "This image provider does not support editing from previous image context. "
                    "Try text-only generation, or switch to a provider/model that supports image edits."
                ),
            )

        max_reference_images = self.img_provider.max_reference_images
        if max_reference_images > 0:
            return deduped_reference_image_ids[-max_reference_images:]
        return deduped_reference_image_ids

    def _load_reference_images(
        self,
        reference_image_file_ids: list[str],
    ) -> list[ReferenceImage]:
        reference_images: list[ReferenceImage] = []

        for file_id in reference_image_file_ids:
            try:
                loaded_file = load_chat_file_by_id(file_id)
            except Exception as e:
                raise ToolCallException(
                    message=f"Could not load reference image file '{file_id}': {e}",
                    llm_facing_message=(
                        f"Reference image file '{file_id}' could not be loaded. "
                        "Use file_id values returned by previous generate_image calls."
                    ),
                )

            if loaded_file.file_type != ChatFileType.IMAGE:
                raise ToolCallException(
                    message=f"Reference file '{file_id}' is not an image",
                    llm_facing_message=f"Reference file '{file_id}' is not an image.",
                )

            try:
                mime_type = get_image_type_from_bytes(loaded_file.content)
            except Exception as e:
                raise ToolCallException(
                    message=f"Unsupported reference image format for '{file_id}': {e}",
                    llm_facing_message=(
                        f"Reference image '{file_id}' has an unsupported format. Only PNG, JPEG, GIF, and WEBP are supported."
                    ),
                )

            reference_images.append(
                ReferenceImage(
                    data=loaded_file.content,
                    mime_type=mime_type,
                )
            )

        return reference_images

    def run(
        self,
        placement: Placement,
        override_kwargs: ImageGenerationToolOverrideKwargs | None = None,
        **llm_kwargs: Any,
    ) -> ToolResponse:
        if PROMPT_FIELD not in llm_kwargs:
            raise ToolCallException(
                message=f"Missing required '{PROMPT_FIELD}' parameter in generate_image tool call",
                llm_facing_message=(
                    f"The generate_image tool requires a '{PROMPT_FIELD}' parameter describing "
                    f'the image to generate. Please provide like: {{"prompt": "a sunset over mountains"}}'
                ),
            )
        prompt = cast(str, llm_kwargs[PROMPT_FIELD])
        shape = ImageShape(llm_kwargs.get("shape", ImageShape.SQUARE.value))
        reference_image_file_ids = self._resolve_reference_image_file_ids(
            llm_kwargs=llm_kwargs,
            override_kwargs=override_kwargs,
        )
        reference_images = self._load_reference_images(reference_image_file_ids)

        # Use threading to generate images in parallel while emitting heartbeats
        results: list[tuple[ImageGenerationResponse, Any] | None] = [
            None
        ] * self.num_imgs
        completed = threading.Event()
        error_holder: list[Exception | None] = [None]

        # TODO allow the LLM to determine number of images
        def generate_all_images() -> None:
            try:
                generated_results = cast(
                    list[tuple[ImageGenerationResponse, Any]],
                    run_functions_tuples_in_parallel(
                        [
                            (
                                self._generate_image,
                                (
                                    prompt,
                                    shape,
                                    reference_images or None,
                                ),
                            )
                            for _ in range(self.num_imgs)
                        ]
                    ),
                )
                for i, result in enumerate(generated_results):
                    results[i] = result
            except Exception as e:
                error_holder[0] = e
            finally:
                completed.set()

        # Start image generation in background thread
        generation_thread = threading.Thread(target=generate_all_images)
        generation_thread.start()

        # Emit heartbeat packets while waiting for completion
        heartbeat_count = 0
        while not completed.is_set():
            # Emit a heartbeat packet to prevent timeout
            self.emitter.emit(
                Packet(
                    placement=placement,
                    obj=ImageGenerationToolHeartbeat(),
                )
            )
            heartbeat_count += 1

            # Wait for a short time before next heartbeat
            if completed.wait(timeout=HEARTBEAT_INTERVAL):
                break

        # Ensure thread has completed
        generation_thread.join()

        # Check for errors
        if error_holder[0] is not None:
            raise error_holder[0]

        # Filter out None values (shouldn't happen, but safety check)
        valid_results = [r for r in results if r is not None]

        if not valid_results:
            raise ValueError("No images were generated")

        # Extract ImageGenerationResponse objects
        image_generation_responses = [r[0] for r in valid_results]

        # Save files and create GeneratedImage objects
        file_ids = save_files(
            urls=[],
            base64_files=[img.image_data for img in image_generation_responses],
        )
        generated_images_metadata = [
            GeneratedImage(
                file_id=file_id,
                url=build_frontend_file_url(file_id),
                revised_prompt=img.revised_prompt,
                shape=shape.value,
            )
            for img, file_id in zip(image_generation_responses, file_ids)
        ]

        # Emit final packet with generated images
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=ImageGenerationFinal(images=generated_images_metadata),
            )
        )

        final_image_generation_response = FinalImageGenerationResponse(
            generated_images=generated_images_metadata
        )

        # Create llm_facing_response
        llm_facing_response = json.dumps(
            [
                {
                    "file_id": img.file_id,
                    "revised_prompt": img.revised_prompt,
                }
                for img in generated_images_metadata
            ]
        )

        return ToolResponse(
            rich_response=final_image_generation_response,
            llm_facing_response=cast(str, llm_facing_response),
        )


================================================
FILE: backend/onyx/tools/tool_implementations/images/models.py
================================================
from enum import Enum

from pydantic import BaseModel

from onyx.server.query_and_chat.streaming_models import GeneratedImage


class ImageGenerationResponse(BaseModel):
    revised_prompt: str
    image_data: str


class ImageShape(str, Enum):
    SQUARE = "square"
    PORTRAIT = "portrait"
    LANDSCAPE = "landscape"


class FinalImageGenerationResponse(BaseModel):
    generated_images: list[GeneratedImage]


================================================
FILE: backend/onyx/tools/tool_implementations/knowledge_graph/knowledge_graph_tool.py
================================================
from typing import Any

from sqlalchemy.orm import Session

from onyx.chat.emitter import Emitter
from onyx.db.kg_config import get_kg_config_settings
from onyx.server.query_and_chat.placement import Placement
from onyx.tools.interface import Tool
from onyx.tools.models import ToolResponse
from onyx.utils.logger import setup_logger

logger = setup_logger()

QUERY_FIELD = "query"


class KnowledgeGraphTool(Tool[None]):
    _NAME = "run_kg_search"
    _DESCRIPTION = "Search the knowledge graph for information. Never call this tool."
    _DISPLAY_NAME = "Knowledge Graph Search"

    def __init__(self, tool_id: int, emitter: Emitter) -> None:
        super().__init__(emitter=emitter)

        self._id = tool_id

        raise NotImplementedError(
            "KnowledgeGraphTool should not be getting used right now."
        )

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self._NAME

    @property
    def description(self) -> str:
        return self._DESCRIPTION

    @property
    def display_name(self) -> str:
        return self._DISPLAY_NAME

    @classmethod
    def is_available(cls, db_session: Session) -> bool:  # noqa: ARG003
        """Available only if KG is enabled and exposed."""
        kg_configs = get_kg_config_settings()
        return kg_configs.KG_ENABLED and kg_configs.KG_EXPOSED

    def tool_definition(self) -> dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": {
                    "type": "object",
                    "properties": {
                        QUERY_FIELD: {
                            "type": "string",
                            "description": "What to search for",
                        },
                    },
                    "required": [QUERY_FIELD],
                },
            },
        }

    def emit_start(self, placement: Placement) -> None:
        raise NotImplementedError("KnowledgeGraphTool.emit_start is not implemented.")

    def run(
        self,
        placement: Placement,
        override_kwargs: None = None,
        **llm_kwargs: Any,
    ) -> ToolResponse:
        raise NotImplementedError("KnowledgeGraphTool.run is not implemented.")


================================================
FILE: backend/onyx/tools/tool_implementations/mcp/mcp_client.py
================================================
"""
MCP (Model Context Protocol) Client Implementation

This module provides a proper MCP client that follows the JSON-RPC 2.0 specification
and handles connection initialization, session management, and protocol communication.
"""

from collections.abc import Awaitable
from collections.abc import Callable
from enum import Enum
from typing import Any
from typing import Dict
from typing import TypeVar

from mcp import ClientSession
from mcp.client.auth import OAuthClientProvider
from mcp.client.sse import sse_client
from mcp.client.streamable_http import streamablehttp_client  # or use stdio_client
from mcp.types import CallToolResult
from mcp.types import InitializeResult
from mcp.types import ListResourcesResult
from mcp.types import TextResourceContents
from mcp.types import Tool as MCPLibTool
from pydantic import BaseModel

from onyx.db.enums import MCPTransport
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_async_sync_no_cancel

logger = setup_logger()

T = TypeVar("T", covariant=True)

MCPClientFunction = Callable[[ClientSession], Awaitable[T]]


class MCPMessageType(str, Enum):
    """MCP message types"""

    REQUEST = "request"
    RESPONSE = "response"
    NOTIFICATION = "notification"


class ContentBlockTypes(str, Enum):
    """MCP content block types"""  # Unfortunstely these aren't exposed by the mcp library

    TEXT = "text"
    IMAGE = "image"
    AUDIO = "audio"
    RESOURCE = "resource"
    RESOURCE_LINK = "resource_link"


class MCPMessage(BaseModel):
    """Base MCP message following JSON-RPC 2.0"""

    jsonrpc: str = "2.0"
    method: str | None = None
    params: Dict[str, Any] | None = None
    id: Any | None = None
    result: Any | None = None
    error: Dict[str, Any] | None = None

    def to_dict(self) -> Dict[str, Any]:
        """Convert to JSON-RPC message dict"""
        msg: Dict[str, Any] = {"jsonrpc": self.jsonrpc}

        if self.id is not None:
            msg["id"] = self.id

        if self.method is not None:
            msg["method"] = self.method

        if self.params is not None:
            msg["params"] = self.params

        if self.result is not None:
            msg["result"] = self.result

        if self.error is not None:
            msg["error"] = self.error

        return msg


# TODO: in the future we should do things like manage sessions and handle errors better
# using an abstraction like this. For now things are purely functional and we initialize
# a new session for each tool call.
# class MCPClient:
#     """
#     MCP Client implementation that properly handles the protocol lifecycle
#     and different transport mechanisms.
#     """

#     def __init__(
#         self,
#         server_url: str,
#         transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,
#         auth_token: str | None = None,
#     ):
#         self.server_url = server_url
#         self.transport = transport
#         self.auth_token = auth_token

#         # Session management
#         self.session: Optional[aiohttp.ClientSession] = None
#         self.initialized = False
#         self.capabilities: Dict[str, Any] = {}
#         self.protocol_version = "2025-03-26"  # Current MCP protocol version
#         self.session_id: str | None = None
#         # Legacy HTTP+SSE transport support (backwards compatibility)
#         self.legacy_post_endpoint: str | None = None

#         # Message ID counter
#         self._message_id_counter = 0

#         # For stdio transport
#         self.process: Optional[subprocess.Popen] = None


def _create_mcp_client_function_runner(
    function: Callable[[ClientSession], Awaitable[T]],
    server_url: str,
    connection_headers: dict[str, str] | None = None,
    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,
    auth: OAuthClientProvider | None = None,  # TODO: maybe used this for all auth types
    **kwargs: Any,
) -> Callable[[], Awaitable[T]]:
    auth_headers = connection_headers or {}
    # WARNING: httpx.Auth with requires_response_body=True (as in the MCP OAuth
    # provider) forces httpx to fully read the response body. That is incompatible
    # with SSE (infinite stream). Avoid passing auth for SSE; rely on headers.
    auth_for_request = auth if transport == MCPTransport.STREAMABLE_HTTP else None

    # doing this here for mypy
    client_func = (
        streamablehttp_client
        if transport == MCPTransport.STREAMABLE_HTTP
        else sse_client
    )

    async def run_client_function() -> T:
        async with client_func(
            server_url, headers=auth_headers, auth=auth_for_request
        ) as client_tuple:
            if len(client_tuple) == 3:
                read, write, _ = client_tuple
            elif len(client_tuple) == 2:
                assert isinstance(client_tuple, tuple)  # mypy
                read, write = client_tuple
            else:
                raise ValueError(
                    f"Unexpected number of client tuple elements: {len(client_tuple)}"
                )
            from datetime import timedelta

            async with ClientSession(
                read, write, read_timeout_seconds=timedelta(seconds=300)
            ) as session:
                return await function(session, **kwargs)

    return run_client_function


def log_exception_group(e: ExceptionGroup) -> Exception | None:
    logger.error(e)
    saved_e = None
    for err in e.exceptions:
        if isinstance(err, ExceptionGroup):
            saved_e = log_exception_group(err) or saved_e
        else:
            logger.error(err)
            saved_e = err

    return saved_e


def _call_mcp_client_function_sync(
    function: Callable[[ClientSession], Awaitable[T]],
    server_url: str,
    connection_headers: dict[str, str] | None = None,
    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,
    auth: OAuthClientProvider | None = None,
    **kwargs: Any,
) -> T:
    run_client_function = _create_mcp_client_function_runner(
        function, server_url, connection_headers, transport, auth, **kwargs
    )
    try:
        return run_async_sync_no_cancel(run_client_function())
    except Exception as e:
        logger.error(f"Failed to call MCP client function: {e}")
        if isinstance(e, ExceptionGroup):
            original_exception = e
            saved_e = log_exception_group(e)
            if saved_e:
                raise saved_e
            raise original_exception
        raise e


async def _call_mcp_client_function_async(
    function: Callable[[ClientSession], Awaitable[T]],
    server_url: str,
    connection_headers: dict[str, str] | None = None,
    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,
    auth: OAuthClientProvider | None = None,
    **kwargs: Any,
) -> T:
    run_client_function = _create_mcp_client_function_runner(
        function, server_url, connection_headers, transport, auth, **kwargs
    )
    return await run_client_function()


def process_mcp_result(call_tool_result: CallToolResult) -> str:
    """Flatten MCP CallToolResult->text (prefers text content blocks)."""
    # TODO: use structured_content if available
    parts = []
    for content_block in call_tool_result.content:
        if content_block.type == ContentBlockTypes.TEXT.value:
            parts.append(content_block.text or "")
        if content_block.type == ContentBlockTypes.RESOURCE.value:
            if isinstance(content_block.resource, TextResourceContents):
                parts.append(content_block.resource.text or "")
            # TODO: handle blob resource content
        if content_block.type == ContentBlockTypes.RESOURCE_LINK.value:
            parts.append(
                f"link: {content_block.uri} title: {content_block.title} description: {content_block.description}"
            )
        # TODO: handle other content block types

    return "\n\n".join(p for p in parts if p) or str(call_tool_result.structuredContent)


def _call_mcp_tool(tool_name: str, arguments: dict[str, Any]) -> MCPClientFunction[str]:
    async def call_tool(session: ClientSession) -> str:
        await session.initialize()
        result = await session.call_tool(tool_name, arguments)
        return process_mcp_result(result)

    return call_tool


def call_mcp_tool(
    server_url: str,
    tool_name: str,
    arguments: dict[str, Any],
    connection_headers: dict[str, str] | None = None,
    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,
    auth: OAuthClientProvider | None = None,
) -> str:
    """Call a specific tool on the MCP server"""
    return _call_mcp_client_function_sync(
        _call_mcp_tool(tool_name, arguments),
        server_url,
        connection_headers,
        transport,
        auth,
    )


async def initialize_mcp_client(
    server_url: str,
    connection_headers: dict[str, str] | None = None,
    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,
    auth: OAuthClientProvider | None = None,
) -> InitializeResult:
    return await _call_mcp_client_function_async(
        lambda session: session.initialize(),
        server_url,
        connection_headers,
        transport,
        auth,
    )


async def _discover_mcp_tools(session: ClientSession) -> list[MCPLibTool]:
    # 1) initialize
    import time

    t1 = time.time()
    init_result = await session.initialize()  # sends JSON-RPC "initialize"
    logger.info(f"Initialized with server: {init_result.serverInfo}")
    logger.info(f"Initialized with server time: {time.time() - t1}")
    # 2) tools/list
    t2 = time.time()
    tools_response = await session.list_tools()  # sends JSON-RPC "tools/list"
    logger.info(f"Listed tools with server time: {time.time() - t2}")
    return tools_response.tools


def discover_mcp_tools(
    server_url: str,
    connection_headers: dict[str, str] | None = None,
    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,
    auth: OAuthClientProvider | None = None,
) -> list[MCPLibTool]:
    """
    Synchronous wrapper for discovering MCP tools.
    """
    return _call_mcp_client_function_sync(
        _discover_mcp_tools,
        server_url,
        connection_headers,
        transport,
        auth,
    )


async def _discover_mcp_resources(session: ClientSession) -> ListResourcesResult:
    return await session.list_resources()


def discover_mcp_resources_sync(
    server_url: str,
    connection_headers: dict[str, str] | None = None,
    transport: str = "streamable-http",
    auth: OAuthClientProvider | None = None,
) -> ListResourcesResult:
    """
    Synchronous wrapper for discovering MCP resources.
    This is for compatibility with the existing codebase.
    """
    return _call_mcp_client_function_sync(
        _discover_mcp_resources,
        server_url,
        connection_headers,
        MCPTransport(transport),
        auth,
    )


================================================
FILE: backend/onyx/tools/tool_implementations/mcp/mcp_tool.py
================================================
import json
from typing import Any

from mcp.client.auth import OAuthClientProvider

from onyx.chat.emitter import Emitter
from onyx.db.enums import MCPAuthenticationType
from onyx.db.enums import MCPTransport
from onyx.db.models import MCPConnectionConfig
from onyx.db.models import MCPServer
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import CustomToolDelta
from onyx.server.query_and_chat.streaming_models import CustomToolStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import CustomToolCallSummary
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.mcp.mcp_client import call_mcp_tool
from onyx.utils.logger import setup_logger

logger = setup_logger()

# Headers that cannot be overridden by user requests to prevent security issues
# Host header is particularly critical - it can be used for Host Header Injection attacks
# to route requests to unintended internal servers
DENYLISTED_MCP_HEADERS = {
    "host",  # Prevents Host Header Injection attacks
}

# TODO: for now we're fitting MCP tool responses into the CustomToolCallSummary class
# In the future we may want custom handling for MCP tool responses
# class MCPToolCallSummary(BaseModel):
#     tool_name: str
#     server_url: str
#     tool_result: Any
#     server_name: str


class MCPTool(Tool[None]):
    """Tool implementation for MCP (Model Context Protocol) servers"""

    def __init__(
        self,
        tool_id: int,
        emitter: Emitter,
        mcp_server: MCPServer,  # TODO: these should be basemodels instead of db objects
        tool_name: str,
        tool_description: str,
        tool_definition: dict[str, Any],
        connection_config: MCPConnectionConfig | None = None,
        user_email: str = "",
        user_id: str = "",
        user_oauth_token: str | None = None,
        additional_headers: dict[str, str] | None = None,
    ) -> None:
        super().__init__(emitter=emitter)

        self._id = tool_id
        self.mcp_server = mcp_server
        self.connection_config = connection_config
        self.user_email = user_email
        self._user_id = user_id
        self._user_oauth_token = user_oauth_token
        self._additional_headers = additional_headers or {}

        self._name = tool_name
        self._tool_definition = tool_definition
        self._description = tool_description
        self._display_name = tool_definition.get("displayName", tool_name)
        self._llm_name = f"mcp:{mcp_server.name}:{tool_name}"

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self._name

    @property
    def description(self) -> str:
        return self._description

    @property
    def display_name(self) -> str:
        return self._display_name

    @property
    def llm_name(self) -> str:
        return self._llm_name

    def tool_definition(self) -> dict:
        """Return the tool definition from the MCP server"""
        # Convert MCP tool definition to OpenAI function calling format
        return {
            "type": "function",
            "function": {
                "name": self._name,
                "description": self._description,
                "parameters": self._tool_definition,
            },
        }

    def emit_start(self, placement: Placement) -> None:
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=CustomToolStart(tool_name=self._name),
            )
        )

    def run(
        self,
        placement: Placement,
        override_kwargs: None = None,  # noqa: ARG002
        **llm_kwargs: Any,
    ) -> ToolResponse:
        """Execute the MCP tool by calling the MCP server"""
        try:
            # Build headers with proper precedence:
            # 1. Start with additional headers from API request (filled in first, excluding denylisted)
            # 2. Override with connection config headers (from DB) - these take precedence
            # 3. Override Authorization header with OAuth token if present
            headers: dict[str, str] = {}

            # Priority 1: Additional headers from API request (filled in first)
            # Filter out denylisted headers to prevent security issues (e.g., Host Header Injection)
            if self._additional_headers:
                filtered_headers = {
                    k: v
                    for k, v in self._additional_headers.items()
                    if k.lower() not in DENYLISTED_MCP_HEADERS
                }
                if filtered_headers:
                    headers.update(filtered_headers)
                # Log if any denylisted headers were provided (for security monitoring)
                denylisted_provided = [
                    k
                    for k in self._additional_headers.keys()
                    if k.lower() in DENYLISTED_MCP_HEADERS
                ]
                if denylisted_provided:
                    logger.warning(
                        f"MCP tool '{self._name}' received denylisted headers that were filtered: {denylisted_provided}"
                    )

            # Priority 2: Base headers from connection config (DB) - overrides request
            if self.connection_config and self.connection_config.config:
                config_dict = self.connection_config.config.get_value(apply_mask=False)
                headers.update(config_dict.get("headers", {}))

            # Priority 3: For pass-through OAuth, use the user's login OAuth token
            if self._user_oauth_token:
                headers["Authorization"] = f"Bearer {self._user_oauth_token}"

            # Check if this is an authentication issue before making the call
            is_passthrough_oauth = (
                self.mcp_server.auth_type == MCPAuthenticationType.PT_OAUTH
            )
            requires_auth = (
                self.mcp_server.auth_type != MCPAuthenticationType.NONE
                and self.mcp_server.auth_type is not None
            )
            has_auth_config = (
                (self.connection_config is not None and bool(headers))
                or bool(self._additional_headers)
            ) or (is_passthrough_oauth and self._user_oauth_token is not None)

            if requires_auth and not has_auth_config:
                # Authentication required but not configured
                auth_error_msg = (
                    f"The {self._name} tool from {self.mcp_server.name} requires authentication "
                    f"but no credentials have been provided. Tell the user to use the MCP dropdown in the "
                    f"chat bar to authenticate with the {self.mcp_server.name} server before "
                    f"using this tool."
                )
                logger.warning(
                    f"Authentication required for MCP tool '{self._name}' but no credentials found"
                )

                error_result = {"error": auth_error_msg}
                llm_facing_response = json.dumps(error_result)

                # Emit CustomToolDelta packet
                self.emitter.emit(
                    Packet(
                        placement=placement,
                        obj=CustomToolDelta(
                            tool_name=self._name,
                            response_type="json",
                            data=error_result,
                        ),
                    )
                )

                return ToolResponse(
                    rich_response=CustomToolCallSummary(
                        tool_name=self._name,
                        response_type="json",
                        tool_result=error_result,
                    ),
                    llm_facing_response=llm_facing_response,
                )

            # For OAuth servers, construct OAuthClientProvider so the MCP SDK
            # can refresh expired tokens automatically
            auth: OAuthClientProvider | None = None
            if (
                self.mcp_server.auth_type == MCPAuthenticationType.OAUTH
                and self.connection_config is not None
                and self._user_id
            ):
                if self.mcp_server.transport == MCPTransport.SSE:
                    logger.warning(
                        f"MCP tool '{self._name}': OAuth token refresh is not supported "
                        f"for SSE transport — auth provider will be ignored. "
                        f"Re-authentication may be required after token expiry."
                    )
                else:
                    from onyx.server.features.mcp.api import UNUSED_RETURN_PATH
                    from onyx.server.features.mcp.api import make_oauth_provider

                    # user_id is the requesting user's UUID; safe here because
                    # UNUSED_RETURN_PATH ensures redirect_handler raises immediately
                    # and user_id is never consulted for Redis state lookups.
                    auth = make_oauth_provider(
                        self.mcp_server,
                        self._user_id,
                        UNUSED_RETURN_PATH,
                        self.connection_config.id,
                        None,
                    )

            tool_result = call_mcp_tool(
                self.mcp_server.server_url,
                self._name,
                llm_kwargs,
                connection_headers=headers,
                transport=self.mcp_server.transport or MCPTransport.STREAMABLE_HTTP,
                auth=auth,
            )

            logger.info(f"MCP tool '{self._name}' executed successfully")

            # Format the tool result for response
            tool_result_dict = {"tool_result": tool_result}
            llm_facing_response = json.dumps(tool_result_dict)

            # Emit CustomToolDelta packet
            self.emitter.emit(
                Packet(
                    placement=placement,
                    obj=CustomToolDelta(
                        tool_name=self._name,
                        response_type="json",
                        data=tool_result_dict,
                    ),
                )
            )

            return ToolResponse(
                rich_response=CustomToolCallSummary(
                    tool_name=self._name,
                    response_type="json",
                    tool_result=tool_result_dict,
                ),
                llm_facing_response=llm_facing_response,
            )

        except Exception as e:
            error_str = str(e).lower()
            logger.error(f"Failed to execute MCP tool '{self._name}': {e}")

            # Check for authentication-related errors
            auth_error_indicators = [
                "401",
                "unauthorized",
                "authentication",
                "auth",
                "forbidden",
                "access denied",
                "invalid token",
                "invalid api key",
                "invalid credentials",
                "please reconnect to the server",
            ]

            is_auth_error = any(
                indicator in error_str for indicator in auth_error_indicators
            )

            if is_auth_error:
                auth_error_msg = (
                    f"Authentication failed for the {self._name} tool from {self.mcp_server.name}. "
                    f"Please use the MCP dropdown in the chat bar to update your credentials "
                    f"for the {self.mcp_server.name} server. Original error: {str(e)}"
                )
                error_result = {"error": auth_error_msg}
            else:
                error_result = {"error": f"Tool execution failed: {str(e)}"}

            llm_facing_response = json.dumps(error_result)

            # Emit CustomToolDelta packet
            self.emitter.emit(
                Packet(
                    placement=placement,
                    obj=CustomToolDelta(
                        tool_name=self._name,
                        response_type="json",
                        data=error_result,
                    ),
                )
            )

            return ToolResponse(
                rich_response=CustomToolCallSummary(
                    tool_name=self._name,
                    response_type="json",
                    tool_result=error_result,
                ),
                llm_facing_response=llm_facing_response,
            )


================================================
FILE: backend/onyx/tools/tool_implementations/memory/__init__.py
================================================


================================================
FILE: backend/onyx/tools/tool_implementations/memory/memory_tool.py
================================================
"""
Memory Tool for storing user-specific information.

This tool allows the LLM to save memories about the user for future conversations.
The memories are passed in via override_kwargs which contains the current list of
memories that exist for the user.
"""

from typing import Any
from typing import cast
from typing import Literal

from pydantic import BaseModel
from typing_extensions import override

from onyx.chat.emitter import Emitter
from onyx.llm.interfaces import LLM
from onyx.secondary_llm_flows.memory_update import process_memory_update
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import MemoryToolDelta
from onyx.server.query_and_chat.streaming_models import MemoryToolStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import ChatMinimalTextMessage
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.memory.models import MemoryToolResponse
from onyx.utils.logger import setup_logger


logger = setup_logger()


MEMORY_FIELD = "memory"


class MemoryToolOverrideKwargs(BaseModel):
    # Not including the Team Information or User Preferences because these are less likely to contribute to building the memory
    # Things like the user's name is important because the LLM may create a memory like "Dave prefers light mode." instead of
    # User prefers light mode.
    user_name: str | None
    user_email: str | None
    user_role: str | None
    existing_memories: list[str]
    chat_history: list[ChatMinimalTextMessage]


class MemoryTool(Tool[MemoryToolOverrideKwargs]):
    NAME = "add_memory"
    DISPLAY_NAME = "Add Memory"
    DESCRIPTION = "Save memories about the user for future conversations."

    def __init__(
        self,
        tool_id: int,
        emitter: Emitter,
        llm: LLM,
    ) -> None:
        super().__init__(emitter=emitter)
        self._id = tool_id
        self.llm = llm

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self.NAME

    @property
    def description(self) -> str:
        return self.DESCRIPTION

    @property
    def display_name(self) -> str:
        return self.DISPLAY_NAME

    @override
    def tool_definition(self) -> dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": {
                    "type": "object",
                    "properties": {
                        MEMORY_FIELD: {
                            "type": "string",
                            "description": (
                                "The text of the memory to add or update. "
                                "Should be a concise, standalone statement that "
                                "captures the key information. For example: "
                                "'User prefers dark mode' or 'User's favorite frontend framework is React'."
                            ),
                        },
                    },
                    "required": [MEMORY_FIELD],
                },
            },
        }

    @override
    def emit_start(self, placement: Placement) -> None:
        self.emitter.emit(Packet(placement=placement, obj=MemoryToolStart()))

    @override
    def run(
        self,
        placement: Placement,
        override_kwargs: MemoryToolOverrideKwargs,
        **llm_kwargs: Any,
    ) -> ToolResponse:
        if MEMORY_FIELD not in llm_kwargs:
            raise ToolCallException(
                message=f"Missing required '{MEMORY_FIELD}' parameter in add_memory tool call",
                llm_facing_message=(
                    f"The add_memory tool requires a '{MEMORY_FIELD}' parameter containing "
                    f"the memory text to save. Please provide like: "
                    f'{{"memory": "User prefers dark mode"}}'
                ),
            )
        memory = cast(str, llm_kwargs[MEMORY_FIELD])

        existing_memories = override_kwargs.existing_memories
        chat_history = override_kwargs.chat_history

        # Determine if this should be an add or update operation
        memory_text, index_to_replace = process_memory_update(
            new_memory=memory,
            existing_memories=existing_memories,
            chat_history=chat_history,
            llm=self.llm,
            user_name=override_kwargs.user_name,
            user_email=override_kwargs.user_email,
            user_role=override_kwargs.user_role,
        )

        logger.info(f"New memory to be added: {memory_text}")

        operation: Literal["add", "update"] = (
            "update" if index_to_replace is not None else "add"
        )
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=MemoryToolDelta(
                    memory_text=memory_text,
                    operation=operation,
                    memory_id=None,
                    index=index_to_replace,
                ),
            )
        )

        return ToolResponse(
            rich_response=MemoryToolResponse(
                memory_text=memory_text,
                index_to_replace=index_to_replace,
            ),
            llm_facing_response=f"New memory added: {memory_text}",
        )


================================================
FILE: backend/onyx/tools/tool_implementations/memory/models.py
================================================
from pydantic import BaseModel


class MemoryToolResponse(BaseModel):
    memory_text: str
    index_to_replace: int | None  # None = add new, int = replace at 0-based index


================================================
FILE: backend/onyx/tools/tool_implementations/open_url/__init__.py
================================================


================================================
FILE: backend/onyx/tools/tool_implementations/open_url/firecrawl.py
================================================
from __future__ import annotations

from collections.abc import Sequence
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from datetime import datetime
from typing import Any

import requests

from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.tools.tool_implementations.open_url.models import WebContent
from onyx.tools.tool_implementations.open_url.models import WebContentProvider
from onyx.utils.logger import setup_logger

logger = setup_logger()

FIRECRAWL_SCRAPE_URL = "https://api.firecrawl.dev/v2/scrape"
_DEFAULT_MAX_WORKERS = 5

# Timeout is tuned to stay under the 2-minute outer timeout in
_DEFAULT_TIMEOUT_SECONDS = 55  # 10 max urls, 2 max batches


@dataclass
class ExtractedContentFields:
    text: str
    title: str
    published_date: datetime | None


class FirecrawlClient(WebContentProvider):
    def __init__(
        self,
        api_key: str,
        *,
        base_url: str = FIRECRAWL_SCRAPE_URL,
        timeout_seconds: int = _DEFAULT_TIMEOUT_SECONDS,
    ) -> None:

        self._headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
        }
        self._base_url = base_url
        self._timeout_seconds = timeout_seconds
        self._last_error: str | None = None

    @property
    def last_error(self) -> str | None:
        return self._last_error

    def contents(self, urls: Sequence[str]) -> list[WebContent]:
        if not urls:
            return []

        max_workers = min(_DEFAULT_MAX_WORKERS, len(urls))
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            return list(executor.map(self._get_webpage_content_safe, urls))

    # This allows the contents call to continue even if one URL fails, and return the results for the other URLs.
    def _get_webpage_content_safe(self, url: str) -> WebContent:
        try:
            return self._get_webpage_content(url)
        except Exception as exc:
            self._last_error = str(exc)
            return WebContent(
                title="",
                link=url,
                full_content="",
                published_date=None,
                scrape_successful=False,
            )

    # Note: explicitly deciding not to retry here, Firecrawl does not seem to ever recover on failed site crawls
    # Retrying causes other issues like timing out and dropping the entire batch when it's not needed.
    def _get_webpage_content(self, url: str) -> WebContent:
        payload = {
            "url": url,
            "formats": ["markdown"],
        }

        response = requests.post(
            self._base_url,
            headers=self._headers,
            json=payload,
            timeout=self._timeout_seconds,
        )

        if response.status_code != 200:
            try:
                error_payload = response.json()
            except Exception:
                error_payload = response.text
            self._last_error = (
                error_payload if isinstance(error_payload, str) else str(error_payload)
            )

            if 400 <= response.status_code < 500:
                return WebContent(
                    title="",
                    link=url,
                    full_content="",
                    published_date=None,
                    scrape_successful=False,
                )

            raise ValueError(
                f"Firecrawl fetch failed with status {response.status_code}."
            )
        else:
            self._last_error = None

        response_json = response.json()
        extracted = self._extract_content_fields(response_json, url)

        return WebContent(
            title=extracted.title,
            link=url,
            full_content=extracted.text,
            published_date=extracted.published_date,
            scrape_successful=bool(extracted.text),
        )

    @staticmethod
    def _extract_content_fields(
        response_json: dict[str, Any], url: str
    ) -> ExtractedContentFields:
        data_section = response_json.get("data") or {}
        metadata = data_section.get("metadata") or response_json.get("metadata") or {}

        text_candidates = [
            data_section.get("markdown"),
            data_section.get("content"),
            data_section.get("text"),
            response_json.get("markdown"),
            response_json.get("content"),
            response_json.get("text"),
        ]

        text = next((candidate for candidate in text_candidates if candidate), "")
        title = metadata.get("title") or response_json.get("title") or ""
        published_date = None

        published_date_str = (
            metadata.get("publishedTime")
            or metadata.get("date")
            or response_json.get("publishedTime")
            or response_json.get("date")
        )

        if published_date_str:
            try:
                published_date = time_str_to_utc(published_date_str)
            except Exception:
                published_date = None

        if not text:
            logger.warning(f"Firecrawl returned empty content for url={url}")

        return ExtractedContentFields(
            text=text or "",
            title=title or "",
            published_date=published_date,
        )


================================================
FILE: backend/onyx/tools/tool_implementations/open_url/models.py
================================================
from abc import ABC
from abc import abstractmethod
from collections.abc import Sequence
from datetime import datetime

from pydantic import BaseModel
from pydantic import field_validator

from onyx.utils.url import normalize_url


class WebContent(BaseModel):
    title: str
    link: str
    full_content: str
    published_date: datetime | None = None
    scrape_successful: bool = True

    @field_validator("link")
    @classmethod
    def normalize_link(cls, v: str) -> str:
        return normalize_url(v)


class WebContentProvider(ABC):
    @abstractmethod
    def contents(self, urls: Sequence[str]) -> list[WebContent]:
        pass


================================================
FILE: backend/onyx/tools/tool_implementations/open_url/onyx_web_crawler.py
================================================
from __future__ import annotations

from collections.abc import Sequence
from concurrent.futures import ThreadPoolExecutor

from onyx.file_processing.html_utils import ParsedHTML
from onyx.file_processing.html_utils import web_html_cleanup
from onyx.tools.tool_implementations.open_url.models import (
    WebContent,
)
from onyx.tools.tool_implementations.open_url.models import (
    WebContentProvider,
)
from onyx.utils.logger import setup_logger
from onyx.utils.url import ssrf_safe_get
from onyx.utils.url import SSRFException
from onyx.utils.web_content import decode_html_bytes
from onyx.utils.web_content import extract_pdf_text
from onyx.utils.web_content import is_pdf_resource
from onyx.utils.web_content import title_from_pdf_metadata
from onyx.utils.web_content import title_from_url

logger = setup_logger()

DEFAULT_READ_TIMEOUT_SECONDS = 15
DEFAULT_CONNECT_TIMEOUT_SECONDS = 5
DEFAULT_USER_AGENT = "OnyxWebCrawler/1.0 (+https://www.onyx.app)"
DEFAULT_MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024  # 50 MB
DEFAULT_MAX_HTML_SIZE_BYTES = 20 * 1024 * 1024  # 20 MB
DEFAULT_MAX_WORKERS = 5


def _failed_result(url: str) -> WebContent:
    return WebContent(
        title="",
        link=url,
        full_content="",
        published_date=None,
        scrape_successful=False,
    )


class OnyxWebCrawler(WebContentProvider):
    """
    Lightweight built-in crawler that fetches HTML directly and extracts readable text.
    Acts as the default content provider when no external crawler (e.g. Firecrawl) is
    configured.
    """

    def __init__(
        self,
        *,
        timeout_seconds: int = DEFAULT_READ_TIMEOUT_SECONDS,
        connect_timeout_seconds: int = DEFAULT_CONNECT_TIMEOUT_SECONDS,
        user_agent: str = DEFAULT_USER_AGENT,
        max_pdf_size_bytes: int | None = None,
        max_html_size_bytes: int | None = None,
    ) -> None:
        self._read_timeout_seconds = timeout_seconds
        self._connect_timeout_seconds = connect_timeout_seconds
        self._max_pdf_size_bytes = max_pdf_size_bytes
        self._max_html_size_bytes = max_html_size_bytes
        self._headers = {
            "User-Agent": user_agent,
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        }

    def contents(self, urls: Sequence[str]) -> list[WebContent]:
        if not urls:
            return []

        max_workers = min(DEFAULT_MAX_WORKERS, len(urls))
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            return list(executor.map(self._fetch_url_safe, urls))

    def _fetch_url_safe(self, url: str) -> WebContent:
        """Wrapper that catches all exceptions so one bad URL doesn't kill the batch."""
        try:
            return self._fetch_url(url)
        except Exception as exc:
            logger.warning(
                "Onyx crawler unexpected error for %s (%s)",
                url,
                exc.__class__.__name__,
            )
            return _failed_result(url)

    def _fetch_url(self, url: str) -> WebContent:
        try:
            response = ssrf_safe_get(
                url,
                headers=self._headers,
                timeout=(self._connect_timeout_seconds, self._read_timeout_seconds),
            )
        except SSRFException as exc:
            logger.error(
                "SSRF protection blocked request to %s (%s)",
                url,
                exc.__class__.__name__,
            )
            return _failed_result(url)
        except Exception as exc:
            logger.warning(
                "Onyx crawler failed to fetch %s (%s)",
                url,
                exc.__class__.__name__,
            )
            return _failed_result(url)

        if response.status_code >= 400:
            logger.warning("Onyx crawler received %s for %s", response.status_code, url)
            return _failed_result(url)

        content_type = response.headers.get("Content-Type", "")
        content = response.content

        content_sniff = content[:1024] if content else None
        if is_pdf_resource(url, content_type, content_sniff):
            if (
                self._max_pdf_size_bytes is not None
                and len(content) > self._max_pdf_size_bytes
            ):
                logger.warning(
                    "PDF content too large (%d bytes) for %s, max is %d",
                    len(content),
                    url,
                    self._max_pdf_size_bytes,
                )
                return _failed_result(url)
            text_content, metadata = extract_pdf_text(content)
            title = title_from_pdf_metadata(metadata) or title_from_url(url)
            return WebContent(
                title=title,
                link=url,
                full_content=text_content,
                published_date=None,
                scrape_successful=bool(text_content.strip()),
            )

        if (
            self._max_html_size_bytes is not None
            and len(content) > self._max_html_size_bytes
        ):
            logger.warning(
                "HTML content too large (%d bytes) for %s, max is %d",
                len(content),
                url,
                self._max_html_size_bytes,
            )
            return _failed_result(url)

        try:
            decoded_html = decode_html_bytes(
                content,
                content_type=content_type,
                fallback_encoding=response.apparent_encoding or response.encoding,
            )
            parsed: ParsedHTML = web_html_cleanup(decoded_html)
            text_content = parsed.cleaned_text or ""
            title = parsed.title or ""
        except Exception as exc:
            logger.warning(
                "Onyx crawler failed to parse %s (%s)", url, exc.__class__.__name__
            )
            text_content = ""
            title = ""

        return WebContent(
            title=title,
            link=url,
            full_content=text_content,
            published_date=None,
            scrape_successful=bool(text_content.strip()),
        )


================================================
FILE: backend/onyx/tools/tool_implementations/open_url/open_url_tool.py
================================================
import json
from collections import defaultdict
from typing import Any

from pydantic import BaseModel
from sqlalchemy.orm import Session
from typing_extensions import override

from onyx.chat.emitter import Emitter
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import SearchDocsResponse
from onyx.context.search.preprocessing.access_filters import (
    build_access_filters_for_user,
)
from onyx.context.search.utils import convert_inference_sections_to_search_docs
from onyx.context.search.utils import inference_section_from_chunks
from onyx.db.document import fetch_document_ids_by_links
from onyx.db.document import filter_existing_document_ids
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import User
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
from onyx.server.query_and_chat.streaming_models import OpenUrlStart
from onyx.server.query_and_chat.streaming_models import OpenUrlUrls
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import OpenURLToolOverrideKwargs
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.open_url.models import WebContentProvider
from onyx.tools.tool_implementations.open_url.url_normalization import (
    _default_url_normalizer,
)
from onyx.tools.tool_implementations.open_url.url_normalization import normalize_url
from onyx.tools.tool_implementations.open_url.utils import (
    filter_web_contents_with_no_title_or_content,
)
from onyx.tools.tool_implementations.web_search.providers import (
    get_default_content_provider,
)
from onyx.tools.tool_implementations.web_search.utils import (
    inference_section_from_internet_page_scrape,
)
from onyx.tools.tool_implementations.web_search.utils import MAX_CHARS_PER_URL
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.url import normalize_url as normalize_web_content_url
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()

URLS_FIELD = "urls"

# 2 minute timeout for parallel URL fetching to prevent indefinite hangs
OPEN_URL_TIMEOUT_SECONDS = 2 * 60

# Sometimes the LLM will ask for a lot of URLs, so we need to limit the total number of characters
# otherwise this alone will completely flood the context and degrade experience.
# Note that if a lot of the URLs contain very little content, this results in no truncation.
MAX_CHARS_ACROSS_URLS = 10 * MAX_CHARS_PER_URL

# Minimum content length to include a document (avoid tiny snippets)
# This is for truncation purposes, if a document is small (unless it goes into truncation flow),
# it still gets included normally.
MIN_CONTENT_CHARS = 200


class IndexedDocumentRequest(BaseModel):
    document_id: str
    original_url: str | None = None


class IndexedRetrievalResult(BaseModel):
    sections: list[InferenceSection]
    missing_document_ids: list[str]


def _dedupe_preserve_order(values: list[str]) -> list[str]:
    seen: set[str] = set()
    ordered: list[str] = []
    for value in values:
        if not value:
            continue
        if value in seen:
            continue
        seen.add(value)
        ordered.append(value)
    return ordered


def _normalize_string_list(value: str | list[str] | None) -> list[str]:
    """Normalize a value that may be a string, list of strings, or None into a cleaned list.

    Returns a deduplicated list of non-empty stripped strings.
    """
    if value is None:
        return []
    if isinstance(value, str):
        value = [value]
    return _dedupe_preserve_order(
        [stripped for item in value if (stripped := str(item).strip())]
    )


def _url_lookup_variants(url: str) -> set[str]:
    """Generate URL variants (with/without trailing slash) for database lookup.

    This is used after normalize_url() to create variants for fuzzy matching
    in the database, since URLs may be stored with or without trailing slashes.
    """
    # Use default normalizer to strip query/fragment, then create variants
    normalized = _default_url_normalizer(url)
    if not normalized:
        return set()
    variants = {normalized}
    if normalized.endswith("/"):
        variants.add(normalized.rstrip("/"))
    else:
        variants.add(f"{normalized}/")
    return {variant for variant in variants if variant}


def _lookup_document_ids_by_link(
    urls: list[str], db_session: Session
) -> list[IndexedDocumentRequest]:
    """Lookup document IDs by matching URLs against the Document.link column.

    This is used as a fallback when document ID resolution fails and URL scraping fails.
    Useful for connectors like Linear.
    """
    variant_to_original: dict[str, str] = {}
    for url in urls:
        if not url:
            continue
        # Generate URL variants (normalized, with/without trailing slash)
        variants = _url_lookup_variants(url)
        variants.add(url)
        # Map each variant back to the original URL
        for variant in variants:
            variant_to_original.setdefault(variant, url)

    if not variant_to_original:
        return []

    # Query database for documents matching any of the URL variants
    link_to_doc_id = fetch_document_ids_by_links(
        db_session, list(variant_to_original.keys())
    )

    requests: list[IndexedDocumentRequest] = []
    for link_value, doc_id in link_to_doc_id.items():
        original_url = variant_to_original.get(link_value)
        if original_url:
            requests.append(
                IndexedDocumentRequest(
                    document_id=doc_id,
                    original_url=original_url,
                )
            )
    return requests


def _dedupe_document_requests(
    requests: list[IndexedDocumentRequest],
) -> list[IndexedDocumentRequest]:
    """Remove duplicate document requests, preserving order."""
    seen: set[str] = set()
    deduped: list[IndexedDocumentRequest] = []
    for request in requests:
        if request.document_id in seen:
            continue
        seen.add(request.document_id)
        deduped.append(request)
    return deduped


def _resolve_urls_to_document_ids(
    urls: list[str], db_session: Session
) -> tuple[list[IndexedDocumentRequest], list[str]]:
    """Resolve URLs to document IDs using connector-owned normalization.

    Uses the url_normalization module which delegates to each connector's
    own normalization function to ensure URLs match the canonical Document.id
    format used during ingestion.
    """
    matches: list[IndexedDocumentRequest] = []
    unresolved: list[str] = []
    normalized_map: dict[str, set[str]] = {}

    for url in urls:
        # Use connector-owned normalization (reuses connector's own logic)
        normalized = normalize_url(url)

        if normalized:
            # Some connectors (e.g. Notion) normalize to a non-URL canonical document
            # identifier (e.g. a UUID) rather than a URL. In those cases, we should
            # treat the normalized value as a document_id directly.
            if normalized.startswith(("http://", "https://")):
                # Get URL variants (with/without trailing slash) for database lookup
                variants = _url_lookup_variants(normalized)
                # Defensive fallback: if variant generation fails, still try the
                # normalized URL itself.
                normalized_map[url] = variants or {normalized}
            else:
                normalized_map[url] = {normalized}
        else:
            # No normalizer found - could be a non-URL document ID (e.g., FILE_CONNECTOR__...)
            if url and not url.startswith(("http://", "https://")):
                # Likely a document ID, use it directly
                normalized_map[url] = {url}
            else:
                # Try generic normalization as fallback
                variants = _url_lookup_variants(url)
                if variants:
                    normalized_map[url] = variants
                else:
                    unresolved.append(url)

    if not normalized_map:
        return matches, unresolved

    # Query database with all normalized variants
    all_variants = {
        variant for variants in normalized_map.values() for variant in variants
    }
    existing_document_ids = filter_existing_document_ids(db_session, list(all_variants))

    # Match URLs to documents
    for url, variants in normalized_map.items():
        matched_doc_id = next(
            (variant for variant in variants if variant in existing_document_ids),
            None,
        )
        if matched_doc_id:
            matches.append(
                IndexedDocumentRequest(
                    document_id=matched_doc_id,
                    original_url=url,
                )
            )
        else:
            unresolved.append(url)

    return matches, unresolved


def _estimate_result_chars(result: dict[str, Any]) -> int:
    """Estimate character count from document fields in a result dict."""
    total = 0
    for key, value in result.items():
        if value is not None:
            total += len(str(value))
    return total


def _convert_sections_to_llm_string_with_citations(
    sections: list[InferenceSection],
    existing_citation_mapping: dict[str, int],
    citation_start: int,
    max_document_chars: int = MAX_CHARS_ACROSS_URLS,
) -> tuple[str, dict[int, str]]:
    """Convert InferenceSections to LLM string, reusing existing citations where available.

    Args:
        sections: List of InferenceSection objects to convert.
        existing_citation_mapping: Mapping of document_id -> citation_num for
            documents that have already been cited.
        citation_start: Starting citation number for new citations.
        max_document_chars: Maximum total characters from document fields.
            Content will be truncated to fit within this budget.

    Returns:
        Tuple of (JSON string for LLM, citation_mapping dict).
        The citation_mapping maps citation_id -> document_id.
    """
    # Build document_id to citation_id mapping, reusing existing citations
    document_id_to_citation_id: dict[str, int] = {}
    citation_mapping: dict[int, str] = {}
    next_citation_id = citation_start

    # First pass: assign citation_ids, reusing existing ones where available
    for section in sections:
        document_id = section.center_chunk.document_id
        if document_id in document_id_to_citation_id:
            # Already assigned in this batch
            continue

        if document_id in existing_citation_mapping:
            # Reuse existing citation number
            citation_id = existing_citation_mapping[document_id]
            document_id_to_citation_id[document_id] = citation_id
            citation_mapping[citation_id] = document_id
        else:
            # Assign new citation number
            document_id_to_citation_id[document_id] = next_citation_id
            citation_mapping[next_citation_id] = document_id
            next_citation_id += 1

    # Second pass: build results, respecting max_document_chars budget
    results = []
    total_chars = 0

    for section in sections:
        chunk = section.center_chunk
        document_id = chunk.document_id
        citation_id = document_id_to_citation_id[document_id]

        # Format updated_at as ISO string if available
        updated_at_str = None
        if chunk.updated_at:
            updated_at_str = chunk.updated_at.isoformat()

        # Build result dict without content first to calculate metadata overhead
        result: dict[str, Any] = {
            "document": citation_id,
            "title": chunk.semantic_identifier,
        }
        if updated_at_str is not None:
            result["updated_at"] = updated_at_str
        if chunk.source_links:
            link = next(iter(chunk.source_links.values()), None)
            if link:
                result["url"] = link

        if chunk.metadata:
            result["metadata"] = json.dumps(chunk.metadata, ensure_ascii=False)

        # Calculate chars used by metadata fields (everything except content)
        metadata_chars = _estimate_result_chars(result)

        # Calculate remaining budget for content
        remaining_budget = max_document_chars - total_chars - metadata_chars
        content = section.combined_content

        # Check if we have enough budget for meaningful content
        if remaining_budget < MIN_CONTENT_CHARS:
            # Not enough room for meaningful content, stop adding documents
            break

        # Truncate content if it exceeds remaining budget
        if len(content) > remaining_budget:
            content = content[:remaining_budget]

        result["content"] = content

        result_chars = _estimate_result_chars(result)
        results.append(result)
        total_chars += result_chars

    output = {"results": results}
    return json.dumps(output, indent=2, ensure_ascii=False), citation_mapping


class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
    NAME = "open_url"
    DESCRIPTION = "Open and read the content of one or more URLs."
    DISPLAY_NAME = "Open URL"

    def __init__(
        self,
        tool_id: int,
        emitter: Emitter,
        document_index: DocumentIndex,
        user: User,
        content_provider: WebContentProvider | None = None,
    ) -> None:
        """Initialize the OpenURLTool.

        Args:
            tool_id: Unique identifier for this tool instance.
            emitter: Emitter for streaming packets to the client.
            document_index: Index handle for retrieving stored documents.
            user: User context for ACL filtering, anonymous users only see public docs.
            content_provider: Optional content provider. If not provided,
                will use the default provider from the database or fall back
                to the built-in Onyx web crawler.
        """
        super().__init__(emitter=emitter)
        self._id = tool_id
        self._document_index = document_index
        self._user = user

        if content_provider is not None:
            self._provider = content_provider
        else:
            provider = get_default_content_provider()
            if provider is None:
                raise RuntimeError(
                    "No web content provider available. "
                    "Please configure a content provider or ensure the "
                    "built-in Onyx web crawler can be initialized."
                )
            self._provider = provider

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self.NAME

    @property
    def description(self) -> str:
        return self.DESCRIPTION

    @property
    def display_name(self) -> str:
        return self.DISPLAY_NAME

    @override
    @classmethod
    def is_available(cls, db_session: Session) -> bool:  # noqa: ARG003
        """OpenURLTool is available unless the vector DB is disabled.

        The tool uses id_based_retrieval to match URLs to indexed documents,
        which requires a vector database. When DISABLE_VECTOR_DB is set, the
        tool is disabled entirely.
        """
        from onyx.configs.app_configs import DISABLE_VECTOR_DB

        if DISABLE_VECTOR_DB:
            return False

        # The tool can use either a configured provider or the built-in crawler,
        # so it's always available when the vector DB is present
        return True

    def tool_definition(self) -> dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": {
                    "type": "object",
                    "properties": {
                        URLS_FIELD: {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": (
                                "List of URLs to open and read, can be a single URL or multiple URLs. "
                                "This will return the text content of the page(s)."
                            ),
                        },
                    },
                    "required": [URLS_FIELD],
                },
            },
        }

    def emit_start(self, placement: Placement) -> None:
        """Emit start packet to signal tool has started."""
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=OpenUrlStart(),
            )
        )

    def run(
        self,
        placement: Placement,
        override_kwargs: OpenURLToolOverrideKwargs,
        **llm_kwargs: Any,
    ) -> ToolResponse:
        """Execute the open URL tool to fetch content from the specified URLs.

        Args:
            placement: The placement info (turn_index and tab_index) for this tool call.
            override_kwargs: Override arguments including starting citation number
                and existing citation_mapping to reuse citations for already-cited URLs.
            **llm_kwargs: Arguments provided by the LLM, including the 'urls' field.

        Returns:
                ToolResponse containing the fetched content and citation mapping.
        """
        urls = _normalize_string_list(llm_kwargs.get(URLS_FIELD))

        if len(urls) > override_kwargs.max_urls:
            logger.warning(
                f"OpenURL tool received {len(urls)} URLs, but the max is {override_kwargs.max_urls}."
            )
            urls = urls[: override_kwargs.max_urls]

        if not urls:
            raise ToolCallException(
                message=f"Missing required '{URLS_FIELD}' parameter in open_url tool call",
                llm_facing_message=(
                    f"The open_url tool requires a '{URLS_FIELD}' parameter "
                    f"containing an array of URLs. Please provide "
                    f'like: {{"urls": ["https://example.com"]}}'
                ),
            )

        self.emitter.emit(
            Packet(
                placement=placement,
                obj=OpenUrlUrls(urls=urls),
            )
        )

        with get_session_with_current_tenant() as db_session:
            # Resolve URLs to document IDs for indexed retrieval
            # Handles both raw URLs and already-normalized document IDs
            url_requests, unresolved_urls = _resolve_urls_to_document_ids(
                urls, db_session
            )

            all_requests = _dedupe_document_requests(url_requests)

            # Create mapping from URL to document_id for result merging
            url_to_doc_id: dict[str, str] = {}
            for request in url_requests:
                if request.original_url:
                    url_to_doc_id[request.original_url] = request.document_id

            # Build filters before parallel execution (session-safe)
            filters = self._build_index_filters(db_session)

            # Create wrapper function for parallel execution
            # Filters are already built, so we just need to pass them
            def _retrieve_indexed_with_filters(
                requests: list[IndexedDocumentRequest],
            ) -> IndexedRetrievalResult:
                """Wrapper for parallel execution with pre-built filters."""
                return self._retrieve_indexed_documents_with_filters(requests, filters)

            # Track if timeout occurred for error reporting
            timeout_occurred = [False]  # Using list for mutability in closure

            def _timeout_handler(
                index: int,  # noqa: ARG001
                func: Any,  # noqa: ARG001
                args: tuple[Any, ...],  # noqa: ARG001
            ) -> None:
                timeout_occurred[0] = True
                return None

            # Run indexed retrieval and crawling in parallel for all URLs
            # This allows us to compare results and pick the best representation
            # Note: allow_failures=True ensures we get partial results even if one
            # task times out or fails - the other task's results will still be used
            indexed_result, crawled_result = run_functions_tuples_in_parallel(
                [
                    (_retrieve_indexed_with_filters, (all_requests,)),
                    (self._fetch_web_content, (urls, override_kwargs.url_snippet_map)),
                ],
                allow_failures=True,
                timeout=OPEN_URL_TIMEOUT_SECONDS,
                timeout_callback=_timeout_handler,
            )

            indexed_result = indexed_result or IndexedRetrievalResult(
                sections=[], missing_document_ids=[]
            )
            crawled_sections, failed_web_urls = crawled_result or ([], [])

            # If timeout occurred and we have no successful results from either path,
            # return a timeout-specific error message
            if (
                timeout_occurred[0]
                and not indexed_result.sections
                and not crawled_sections
            ):
                return ToolResponse(
                    rich_response=None,
                    llm_facing_response="The call to open_url timed out",
                )

            # Last-resort: attempt link-based lookup for URLs that failed both
            # document-ID resolution and crawling.
            failed_web_urls = self._fallback_link_lookup(
                unresolved_urls=unresolved_urls,
                failed_web_urls=failed_web_urls,
                db_session=db_session,
                indexed_result=indexed_result,
                url_to_doc_id=url_to_doc_id,
                filters=filters,
            )

            # Merge results: prefer indexed when available, fallback to crawled
            inference_sections = self._merge_indexed_and_crawled_results(
                indexed_result.sections,
                crawled_sections,
                url_to_doc_id,
                urls,
                failed_web_urls,
            )

        if not inference_sections:
            failure_descriptions = []
            if indexed_result.missing_document_ids:
                failure_descriptions.append(
                    "documents "
                    + ", ".join(sorted(set(indexed_result.missing_document_ids)))
                )
            if failed_web_urls:
                cleaned_failures = sorted({url for url in failed_web_urls if url})
                if cleaned_failures:
                    failure_descriptions.append("URLs " + ", ".join(cleaned_failures))
            failure_msg = (
                "Failed to fetch content from " + " and ".join(failure_descriptions)
                if failure_descriptions
                else "Failed to fetch content from the requested resources."
            )
            logger.warning(f"OpenURL tool failed: {failure_msg}")
            return ToolResponse(rich_response=None, llm_facing_response=failure_msg)

        for section in inference_sections:
            chunk = section.center_chunk
            if not chunk.semantic_identifier and chunk.source_links:
                chunk.semantic_identifier = chunk.source_links[0]

        # Convert sections to search docs, preserving source information
        search_docs = convert_inference_sections_to_search_docs(
            inference_sections, is_internet=False
        )

        self.emitter.emit(
            Packet(
                placement=placement,
                obj=OpenUrlDocuments(documents=search_docs),
            )
        )

        # Note that with this call, some contents may be truncated or dropped so what the LLM sees may not be the entire set
        # That said, it is still the best experience to show all the docs that were fetched, even if the LLM on rare
        # occasions only actually sees a subset.
        docs_str, citation_mapping = _convert_sections_to_llm_string_with_citations(
            sections=inference_sections,
            existing_citation_mapping=override_kwargs.citation_mapping,
            citation_start=override_kwargs.starting_citation_num,
        )

        return ToolResponse(
            rich_response=SearchDocsResponse(
                search_docs=search_docs,
                citation_mapping=citation_mapping,
            ),
            llm_facing_response=docs_str,
        )

    def _fallback_link_lookup(
        self,
        unresolved_urls: list[str],
        failed_web_urls: list[str],
        db_session: Session,
        indexed_result: IndexedRetrievalResult,
        url_to_doc_id: dict[str, str],
        filters: IndexFilters,
    ) -> list[str]:
        """Attempt link-based lookup for URLs that failed both document-ID resolution and crawling.

        Args:
            unresolved_urls: URLs that couldn't be resolved to document IDs
            failed_web_urls: URLs that failed crawling
            db_session: Database session
            indexed_result: Result object to update with found sections
            url_to_doc_id: Mapping to update with resolved URLs
            filters: Pre-built index filters for document retrieval

        Returns:
            Updated list of failed_web_urls (with resolved URLs removed)
        """
        if not unresolved_urls or not failed_web_urls:
            return failed_web_urls

        failed_set = {url for url in failed_web_urls if url}
        fallback_urls = sorted(set(unresolved_urls).intersection(failed_set))

        if not fallback_urls:
            return failed_web_urls

        fallback_requests = _lookup_document_ids_by_link(fallback_urls, db_session)

        if not fallback_requests:
            return failed_web_urls

        deduped_fallback_requests = _dedupe_document_requests(fallback_requests)
        fallback_result = self._retrieve_indexed_documents_with_filters(
            deduped_fallback_requests, filters
        )

        if fallback_result.sections:
            indexed_result.sections.extend(fallback_result.sections)
            for request in deduped_fallback_requests:
                if request.original_url:
                    url_to_doc_id[request.original_url] = request.document_id

        if fallback_result.missing_document_ids:
            indexed_result.missing_document_ids.extend(
                fallback_result.missing_document_ids
            )

        resolved_links = {request.original_url for request in deduped_fallback_requests}
        return [url for url in failed_web_urls if url not in resolved_links]

    def _retrieve_indexed_documents_with_filters(
        self,
        all_requests: list[IndexedDocumentRequest],
        filters: IndexFilters,
    ) -> IndexedRetrievalResult:
        """Retrieve indexed documents using pre-built filters (for parallel execution)."""
        if not all_requests:
            return IndexedRetrievalResult(sections=[], missing_document_ids=[])

        document_ids = [req.document_id for req in all_requests]
        chunk_requests = [
            VespaChunkRequest(document_id=request.document_id)
            for request in all_requests
        ]

        try:
            chunks = self._document_index.id_based_retrieval(
                chunk_requests=chunk_requests,
                filters=filters,
                batch_retrieval=True,
            )
        except Exception as exc:
            logger.warning(
                f"Indexed retrieval failed for document IDs {document_ids}: {exc}",
                exc_info=True,
            )
            return IndexedRetrievalResult(
                sections=[],
                missing_document_ids=[req.document_id for req in all_requests],
            )

        chunk_map: dict[str, list] = defaultdict(list)
        for chunk in chunks:
            chunk_map[chunk.document_id].append(chunk)

        sections: list[InferenceSection] = []
        missing: list[str] = []

        for request in all_requests:
            doc_chunks = chunk_map.get(request.document_id)
            if not doc_chunks:
                missing.append(request.document_id)
                continue
            doc_chunks.sort(key=lambda chunk: chunk.chunk_id)
            section = inference_section_from_chunks(
                center_chunk=doc_chunks[0],
                chunks=doc_chunks,
            )
            if section:
                sections.append(section)
            else:
                missing.append(request.document_id)

        return IndexedRetrievalResult(sections=sections, missing_document_ids=missing)

    def _build_index_filters(self, db_session: Session) -> IndexFilters:
        access_control_list = build_access_filters_for_user(self._user, db_session)
        return IndexFilters(
            source_type=None,
            document_set=None,
            time_cutoff=None,
            tags=None,
            access_control_list=access_control_list,
            tenant_id=get_current_tenant_id() if MULTI_TENANT else None,
            project_id_filter=None,
        )

    def _merge_indexed_and_crawled_results(
        self,
        indexed_sections: list[InferenceSection],
        crawled_sections: list[InferenceSection],
        url_to_doc_id: dict[str, str],
        all_urls: list[str],
        failed_web_urls: list[str],  # noqa: ARG002
    ) -> list[InferenceSection]:
        """Merge indexed and crawled results, preferring indexed when available.

        For each URL:
        - If indexed result exists and has content, use it (better/cleaner representation)
        - Otherwise, use crawled result if available
        - If both fail, the URL will be in failed_web_urls for error reporting
        """
        # Map indexed sections by document_id
        indexed_by_doc_id: dict[str, InferenceSection] = {}
        for section in indexed_sections:
            indexed_by_doc_id[section.center_chunk.document_id] = section

        # Map crawled sections by URL (from source_links)
        crawled_by_url: dict[str, InferenceSection] = {}
        for section in crawled_sections:
            # Extract URL from source_links (crawled sections store URL here)
            if section.center_chunk.source_links:
                url = next(iter(section.center_chunk.source_links.values()))
                if url:
                    crawled_by_url[url] = section

        merged_sections: list[InferenceSection] = []
        used_doc_ids: set[str] = set()

        # Process URLs: prefer indexed, fallback to crawled
        for url in all_urls:
            doc_id = url_to_doc_id.get(url)
            indexed_section = indexed_by_doc_id.get(doc_id) if doc_id else None
            # WebContent.link is normalized (query/fragment stripped). Match on the
            # same normalized form to avoid dropping successful crawl results.
            crawled_section = crawled_by_url.get(normalize_web_content_url(url))

            if indexed_section and indexed_section.combined_content:
                # Prefer indexed
                merged_sections.append(indexed_section)
                if doc_id:
                    used_doc_ids.add(doc_id)
            elif crawled_section and crawled_section.combined_content:
                # Fallback to crawled if indexed unavailable or empty
                # (e.g., auth issues, document not indexed, etc.)
                merged_sections.append(crawled_section)

        # Add any indexed sections that weren't matched to URLs
        for doc_id, section in indexed_by_doc_id.items():
            # Skip if this doc_id was already used for a URL
            if doc_id not in used_doc_ids:
                merged_sections.append(section)

        return merged_sections

    def _fetch_web_content(
        self, urls: list[str], url_snippet_map: dict[str, str]
    ) -> tuple[list[InferenceSection], list[str]]:
        if not urls:
            return [], []

        raw_web_contents = self._provider.contents(urls)
        # Treat "no title and no content" as a failure for that URL, but don't
        # include the empty entry in downstream prompting/sections.
        failed_urls: list[str] = [
            content.link
            for content in raw_web_contents
            if not content.title.strip() and not content.full_content.strip()
        ]
        web_contents = filter_web_contents_with_no_title_or_content(raw_web_contents)
        sections: list[InferenceSection] = []

        for content in web_contents:
            # Check if content is insufficient (e.g., "Loading..." or too short)
            text_stripped = content.full_content.strip()
            is_insufficient = (
                not text_stripped
                # TODO: Likely a behavior of our scraper, understand why this special pattern occurs
                or text_stripped.lower() == "loading..."
                or len(text_stripped) < 50
            )

            if (
                content.scrape_successful
                and content.full_content
                and not is_insufficient
            ):
                sections.append(
                    inference_section_from_internet_page_scrape(
                        content, url_snippet_map.get(content.link, "")
                    )
                )
            else:
                # TODO: Slight improvement - if failed URL reasons are passed back to the LLM
                # for example, if it tries to crawl Reddit and fails, it should know (probably) that this error would
                # happen again if it tried to crawl Reddit again.
                failed_urls.append(content.link or "")

        return sections, failed_urls


================================================
FILE: backend/onyx/tools/tool_implementations/open_url/snippet_matcher.py
================================================
import unicodedata

from pydantic import BaseModel
from rapidfuzz import fuzz
from rapidfuzz import utils

from onyx.utils.text_processing import is_zero_width_char
from onyx.utils.text_processing import normalize_char


class SnippetMatchResult(BaseModel):
    snippet_located: bool

    start_idx: int = -1
    end_idx: int = -1


NegativeSnippetMatchResult = SnippetMatchResult(snippet_located=False)


def find_snippet_in_content(content: str, snippet: str) -> SnippetMatchResult:
    """
    Finds where the snippet is located in the content.

    Strategy:
    1. Normalize the snippet & attempt to find it in the content
    2. Perform a token based fuzzy search for the snippet in the content

    Notes:
     - If there are multiple matches of snippet, we choose the first normalised occurrence
    """
    if not snippet or not content:
        return NegativeSnippetMatchResult

    result = _normalize_and_match(content, snippet)
    if result.snippet_located:
        return result

    result = _token_based_match(content, snippet)
    if result.snippet_located:
        return result

    return NegativeSnippetMatchResult


def _normalize_and_match(content: str, snippet: str) -> SnippetMatchResult:
    """
    Normalizes the snippet & content, then performs a direct string match.
    """
    normalized_content, content_map = _normalize_text_with_mapping(content)
    normalized_snippet, url_snippet_map = _normalize_text_with_mapping(snippet)

    if not normalized_content or not normalized_snippet:
        return NegativeSnippetMatchResult

    pos = normalized_content.find(normalized_snippet)
    if pos != -1:
        original_start = content_map[pos]

        # Account for leading characters stripped from snippet during normalization
        # (e.g., leading punctuation like "[![]![]]" that was removed)
        if url_snippet_map:
            first_snippet_orig_pos = url_snippet_map[0]
            if first_snippet_orig_pos > 0:
                # There were leading characters stripped from snippet
                # Extend start position backwards to include them from content
                original_start = max(original_start - first_snippet_orig_pos, 0)

        # Determine end position, including any trailing characters that were
        # normalized away (e.g., punctuation)
        match_end_norm = pos + len(normalized_snippet)
        if match_end_norm >= len(content_map):
            # Match extends to end of normalized content - include all trailing chars
            original_end = len(content) - 1
        else:
            # Match is in the middle - end at character before next normalized char
            original_end = content_map[match_end_norm] - 1

        # Account for trailing characters stripped from snippet during normalization
        # (e.g., trailing punctuation like "\n[" that was removed)
        if url_snippet_map:
            last_snippet_orig_pos = url_snippet_map[-1]
            trailing_stripped = len(snippet) - last_snippet_orig_pos - 1
            if trailing_stripped > 0:
                # Extend end position to include trailing characters from content
                # that correspond to the stripped trailing snippet characters
                original_end = min(original_end + trailing_stripped, len(content) - 1)

        return SnippetMatchResult(
            snippet_located=True,
            start_idx=original_start,
            end_idx=original_end,
        )

    return NegativeSnippetMatchResult


def _normalize_text_with_mapping(text: str) -> tuple[str, list[int]]:
    """
    Text normalization that maintains position mapping.

    Returns:
        tuple: (normalized_text, position_map)
        - position_map[i] gives the original position for normalized position i
    """
    if not text:
        return "", []

    original_text = text

    # Step 1: NFC normalization with position mapping
    nfc_text = unicodedata.normalize("NFC", text)

    # Map NFD positions → original positions.
    # NFD only decomposes, so each original char produces 1+ NFD chars.
    nfd_to_orig: list[int] = []
    for orig_idx, orig_char in enumerate(original_text):
        nfd_of_char = unicodedata.normalize("NFD", orig_char)
        for _ in nfd_of_char:
            nfd_to_orig.append(orig_idx)

    # Map NFC positions → NFD positions.
    # Each NFC char, when decomposed, tells us exactly how many NFD
    # chars it was composed from.
    nfc_to_orig: list[int] = []
    nfd_idx = 0
    for nfc_char in nfc_text:
        if nfd_idx < len(nfd_to_orig):
            nfc_to_orig.append(nfd_to_orig[nfd_idx])
        else:
            nfc_to_orig.append(len(original_text) - 1)
        nfd_of_nfc = unicodedata.normalize("NFD", nfc_char)
        nfd_idx += len(nfd_of_nfc)

    # Work with NFC text from here
    text = nfc_text

    html_entities = {
        "&nbsp;": " ",
        "&#160;": " ",
        "&amp;": "&",
        "&lt;": "<",
        "&gt;": ">",
        "&quot;": '"',
        "&apos;": "'",
        "&#39;": "'",
        "&#x27;": "'",
        "&ndash;": "-",
        "&mdash;": "-",
        "&hellip;": "...",
        "&#xB0;": "°",
        "&#xBA;": "°",
        "&zwj;": "",
    }

    # Sort entities by length (longest first) for greedy matching
    sorted_entities = sorted(html_entities.keys(), key=len, reverse=True)

    result_chars = []
    result_map = []
    i = 0
    last_was_space = True  # Track to avoid leading spaces

    while i < len(text):
        # Convert NFC position to original position
        orig_pos = nfc_to_orig[i] if i < len(nfc_to_orig) else len(original_text) - 1
        char = text[i]
        output = None
        step = 1

        # Check for HTML entities first (greedy match)
        for entity in sorted_entities:
            if text[i : i + len(entity)] == entity:
                output = html_entities[entity]
                step = len(entity)
                break

        # If no entity matched, process single character
        if output is None:
            # Skip zero-width characters
            if is_zero_width_char(char):
                i += 1
                continue

            output = normalize_char(char)

        # Add output to result, normalizing each character from entity output
        if output:
            for out_char in output:
                # Normalize entity output the same way as regular chars
                normalized = normalize_char(out_char)

                # Handle whitespace collapsing
                if normalized == " ":
                    if not last_was_space:
                        result_chars.append(" ")
                        result_map.append(orig_pos)
                        last_was_space = True
                else:
                    result_chars.append(normalized)
                    result_map.append(orig_pos)
                    last_was_space = False

        i += step

    # Remove trailing space if present
    if result_chars and result_chars[-1] == " ":
        result_chars.pop()
        result_map.pop()

    return "".join(result_chars), result_map


def _token_based_match(
    content: str,
    snippet: str,
    min_threshold: float = 0.8,
) -> SnippetMatchResult:
    """
    Performs a token based fuzzy search for the snippet in the content.

    min_threshold exists in the range [0, 1]
    """
    if not content or not snippet:
        return NegativeSnippetMatchResult

    res = fuzz.partial_ratio_alignment(
        content, snippet, processor=utils.default_process
    )

    if not res:
        return NegativeSnippetMatchResult

    score = res.score

    if score >= (min_threshold * 100):
        start_idx = res.src_start
        end_idx = res.src_end

        return SnippetMatchResult(
            snippet_located=True,
            start_idx=start_idx,
            end_idx=end_idx,
        )

    return NegativeSnippetMatchResult


================================================
FILE: backend/onyx/tools/tool_implementations/open_url/url_normalization.py
================================================
"""URL normalization for OpenURL tool.

Each connector implements normalize_url() as a class method to normalize URLs to match
the canonical Document.id format used during ingestion. This ensures OpenURL can find
indexed documents.

Usage:
    normalized = normalize_url("https://docs.google.com/document/d/123/edit")
    # Returns: "https://docs.google.com/document/d/123"
"""

from urllib.parse import urlparse
from urllib.parse import urlunparse

from onyx.configs.constants import DocumentSource
from onyx.connectors.factory import identify_connector_class
from onyx.utils.logger import setup_logger

logger = setup_logger()


def _default_url_normalizer(url: str) -> str | None:
    parsed = urlparse(url)
    if not parsed.netloc:
        return None

    # Strip query params and fragment, normalize trailing slash
    scheme = parsed.scheme or "https"
    netloc = parsed.netloc.lower()
    path = parsed.path.rstrip("/")
    params = ""  # URL params (rarely used)
    query = ""  # Query string (removed)
    fragment = ""  # Fragment/hash (removed)

    normalized = urlunparse((scheme, netloc, path, params, query, fragment))
    return normalized or None


def normalize_url(url: str, source_type: DocumentSource | None = None) -> str | None:
    """Normalize a URL to match the canonical Document.id format.

    Dispatches to the connector's normalize_url() method or falls back to default normalizer.
    """
    # If source_type not provided, try to detect it
    if source_type is None:
        source_type = _detect_source_type(url)

    if source_type:
        try:
            connector_class = identify_connector_class(source_type)
            result = connector_class.normalize_url(url)

            if result.use_default:
                return _default_url_normalizer(url)
            return result.normalized_url  # Could be None if failed
        except Exception as exc:
            logger.debug(
                "Failed to normalize URL for source %s: %s. Using default normalizer.",
                source_type,
                exc,
            )

    # No source_type or connector not found - fall back to default
    return _default_url_normalizer(url)


def _detect_source_type(url: str) -> DocumentSource | None:
    """Detect DocumentSource from URL patterns (simple heuristic)."""
    parsed = urlparse(url)
    netloc = parsed.netloc.lower()
    path = parsed.path.lower()

    if "docs.google.com" in netloc or "drive.google.com" in netloc:
        return DocumentSource.GOOGLE_DRIVE
    if "notion.so" in netloc or "notion.site" in netloc:
        return DocumentSource.NOTION
    if "atlassian.net" in netloc:
        # Check path for Jira indicators (more specific than netloc)
        if "/jira/" in path or "/browse/" in path or "jira" in netloc:
            return DocumentSource.JIRA
        return DocumentSource.CONFLUENCE
    if "github.com" in netloc:
        return DocumentSource.GITHUB
    if "gitlab.com" in netloc:
        return DocumentSource.GITLAB
    if "sharepoint.com" in netloc:
        return DocumentSource.SHAREPOINT
    if "slack.com" in netloc:
        return DocumentSource.SLACK
    if "linear.app" in netloc:
        return DocumentSource.LINEAR

    return None


================================================
FILE: backend/onyx/tools/tool_implementations/open_url/utils.py
================================================
from onyx.tools.tool_implementations.open_url.models import WebContent


def filter_web_contents_with_no_title_or_content(
    contents: list[WebContent],
) -> list[WebContent]:
    """Filter out content entries that have neither a title nor any extracted text.

    Some content providers can return placeholder/partial entries that only include a URL.
    Downstream uses these fields for display + prompting; drop empty ones centrally
    rather than duplicating checks across provider clients.
    """
    filtered: list[WebContent] = []
    for content in contents:
        if content.title.strip() or content.full_content.strip():
            filtered.append(content)
    return filtered


================================================
FILE: backend/onyx/tools/tool_implementations/python/__init__.py
================================================


================================================
FILE: backend/onyx/tools/tool_implementations/python/code_interpreter_client.py
================================================
from __future__ import annotations

import json
import time
from collections.abc import Generator
from typing import Literal
from typing import TypedDict
from typing import Union

import requests
from pydantic import BaseModel

from onyx.configs.app_configs import CODE_INTERPRETER_BASE_URL
from onyx.utils.logger import setup_logger

logger = setup_logger()

_HEALTH_CACHE_TTL_SECONDS = 30
_health_cache: dict[str, tuple[float, bool]] = {}


class FileInput(TypedDict):
    """Input file to be staged in execution workspace"""

    path: str
    file_id: str


class WorkspaceFile(BaseModel):
    """File in execution workspace"""

    path: str
    kind: Literal["file", "directory"]
    file_id: str | None = None


class ExecuteResponse(BaseModel):
    """Response from code execution"""

    stdout: str
    stderr: str
    exit_code: int | None
    timed_out: bool
    duration_ms: int
    files: list[WorkspaceFile]


class StreamOutputEvent(BaseModel):
    """SSE 'output' event: a chunk of stdout or stderr"""

    stream: Literal["stdout", "stderr"]
    data: str


class StreamResultEvent(BaseModel):
    """SSE 'result' event: final execution result"""

    exit_code: int | None
    timed_out: bool
    duration_ms: int
    files: list[WorkspaceFile]


class StreamErrorEvent(BaseModel):
    """SSE 'error' event: execution-level error"""

    message: str


StreamEvent = Union[StreamOutputEvent, StreamResultEvent, StreamErrorEvent]

_SSE_EVENT_MAP: dict[
    str, type[StreamOutputEvent | StreamResultEvent | StreamErrorEvent]
] = {
    "output": StreamOutputEvent,
    "result": StreamResultEvent,
    "error": StreamErrorEvent,
}


class CodeInterpreterClient:
    """Client for Code Interpreter service"""

    def __init__(self, base_url: str | None = CODE_INTERPRETER_BASE_URL):
        if not base_url:
            raise ValueError("CODE_INTERPRETER_BASE_URL not configured")
        self.base_url = base_url.rstrip("/")
        self.session = requests.Session()
        self._closed = False

    def __enter__(self) -> CodeInterpreterClient:
        return self

    def __exit__(self, *args: object) -> None:
        self.close()

    def close(self) -> None:
        if self._closed:
            return
        self.session.close()
        self._closed = True

    def _build_payload(
        self,
        code: str,
        stdin: str | None,
        timeout_ms: int,
        files: list[FileInput] | None,
    ) -> dict:
        payload: dict = {
            "code": code,
            "timeout_ms": timeout_ms,
        }
        if stdin is not None:
            payload["stdin"] = stdin
        if files:
            payload["files"] = files
        return payload

    def health(self, use_cache: bool = False) -> bool:
        """Check if the Code Interpreter service is healthy

        Args:
            use_cache: When True, return a cached result if available and
                       within the TTL window. The cache is always populated
                       after a live request regardless of this flag.
        """
        if use_cache:
            cached = _health_cache.get(self.base_url)
            if cached is not None:
                cached_at, cached_result = cached
                if time.monotonic() - cached_at < _HEALTH_CACHE_TTL_SECONDS:
                    return cached_result

        url = f"{self.base_url}/health"
        try:
            response = self.session.get(url, timeout=5)
            response.raise_for_status()
            result = response.json().get("status") == "ok"
        except Exception as e:
            logger.warning(f"Exception caught when checking health, e={e}")
            result = False

        _health_cache[self.base_url] = (time.monotonic(), result)
        return result

    def execute(
        self,
        code: str,
        stdin: str | None = None,
        timeout_ms: int = 30000,
        files: list[FileInput] | None = None,
    ) -> ExecuteResponse:
        """Execute Python code (batch)"""
        url = f"{self.base_url}/v1/execute"
        payload = self._build_payload(code, stdin, timeout_ms, files)

        response = self.session.post(url, json=payload, timeout=timeout_ms / 1000 + 10)
        response.raise_for_status()

        return ExecuteResponse(**response.json())

    def execute_streaming(
        self,
        code: str,
        stdin: str | None = None,
        timeout_ms: int = 30000,
        files: list[FileInput] | None = None,
    ) -> Generator[StreamEvent, None, None]:
        """Execute Python code with streaming SSE output.

        Yields StreamEvent objects (StreamOutputEvent, StreamResultEvent,
        StreamErrorEvent) as execution progresses. Falls back to batch
        execution if the streaming endpoint is not available (older
        code-interpreter versions).
        """
        url = f"{self.base_url}/v1/execute/stream"
        payload = self._build_payload(code, stdin, timeout_ms, files)

        response = self.session.post(
            url,
            json=payload,
            stream=True,
            timeout=timeout_ms / 1000 + 10,
        )

        if response.status_code == 404:
            logger.info(
                "Streaming endpoint not available, falling back to batch execution"
            )
            response.close()
            yield from self._batch_as_stream(code, stdin, timeout_ms, files)
            return

        try:
            response.raise_for_status()
            yield from self._parse_sse(response)
        finally:
            response.close()

    def _parse_sse(
        self, response: requests.Response
    ) -> Generator[StreamEvent, None, None]:
        """Parse SSE streaming response into StreamEvent objects.

        Expected format per event:
            event: <type>
            data: <json>
            <blank line>
        """
        event_type: str | None = None
        data_lines: list[str] = []

        for line in response.iter_lines(decode_unicode=True):
            if line is None:
                continue

            if line == "":
                # Blank line marks end of an SSE event
                if event_type is not None and data_lines:
                    data = "\n".join(data_lines)
                    model_cls = _SSE_EVENT_MAP.get(event_type)
                    if model_cls is not None:
                        yield model_cls(**json.loads(data))
                    else:
                        logger.warning(f"Unknown SSE event type: {event_type}")
                event_type = None
                data_lines = []
            elif line.startswith("event:"):
                event_type = line[len("event:") :].strip()
            elif line.startswith("data:"):
                data_lines.append(line[len("data:") :].strip())

        if event_type is not None or data_lines:
            logger.warning(
                f"SSE stream ended with incomplete event: event_type={event_type}, data_lines={data_lines}"
            )

    def _batch_as_stream(
        self,
        code: str,
        stdin: str | None,
        timeout_ms: int,
        files: list[FileInput] | None,
    ) -> Generator[StreamEvent, None, None]:
        """Execute via batch endpoint and yield results as stream events."""
        result = self.execute(code, stdin, timeout_ms, files)

        if result.stdout:
            yield StreamOutputEvent(stream="stdout", data=result.stdout)
        if result.stderr:
            yield StreamOutputEvent(stream="stderr", data=result.stderr)
        yield StreamResultEvent(
            exit_code=result.exit_code,
            timed_out=result.timed_out,
            duration_ms=result.duration_ms,
            files=result.files,
        )

    def upload_file(self, file_content: bytes, filename: str) -> str:
        """Upload file to Code Interpreter and return file_id"""
        url = f"{self.base_url}/v1/files"

        files = {"file": (filename, file_content)}
        response = self.session.post(url, files=files, timeout=30)
        response.raise_for_status()

        return response.json()["file_id"]

    def download_file(self, file_id: str) -> bytes:
        """Download file from Code Interpreter"""
        url = f"{self.base_url}/v1/files/{file_id}"

        response = self.session.get(url, timeout=30)
        response.raise_for_status()

        return response.content

    def delete_file(self, file_id: str) -> None:
        """Delete file from Code Interpreter"""
        url = f"{self.base_url}/v1/files/{file_id}"

        response = self.session.delete(url, timeout=10)
        response.raise_for_status()


================================================
FILE: backend/onyx/tools/tool_implementations/python/python_tool.py
================================================
import hashlib
import mimetypes
from io import BytesIO
from typing import Any
from typing import cast

from pydantic import TypeAdapter
from sqlalchemy.orm import Session
from typing_extensions import override

from onyx.chat.emitter import Emitter
from onyx.configs.app_configs import CODE_INTERPRETER_BASE_URL
from onyx.configs.app_configs import CODE_INTERPRETER_DEFAULT_TIMEOUT_MS
from onyx.configs.app_configs import CODE_INTERPRETER_MAX_OUTPUT_LENGTH
from onyx.configs.constants import FileOrigin
from onyx.db.code_interpreter import fetch_code_interpreter_server
from onyx.file_store.utils import build_full_frontend_file_url
from onyx.file_store.utils import get_default_file_store
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import PythonToolDelta
from onyx.server.query_and_chat.streaming_models import PythonToolStart
from onyx.tools.interface import Tool
from onyx.tools.models import LlmPythonExecutionResult
from onyx.tools.models import PythonExecutionFile
from onyx.tools.models import PythonToolOverrideKwargs
from onyx.tools.models import PythonToolRichResponse
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.python.code_interpreter_client import (
    CodeInterpreterClient,
)
from onyx.tools.tool_implementations.python.code_interpreter_client import FileInput
from onyx.tools.tool_implementations.python.code_interpreter_client import (
    StreamErrorEvent,
)
from onyx.tools.tool_implementations.python.code_interpreter_client import (
    StreamOutputEvent,
)
from onyx.tools.tool_implementations.python.code_interpreter_client import (
    StreamResultEvent,
)
from onyx.utils.logger import setup_logger


logger = setup_logger()

CODE_FIELD = "code"


def _truncate_output(output: str, max_length: int, label: str = "output") -> str:
    """
    Truncate output string to max_length and append truncation message if needed.

    Args:
        output: The original output string to truncate
        max_length: Maximum length before truncation
        label: Label for logging (e.g., "stdout", "stderr")

    Returns:
        Truncated string with truncation message appended if truncated
    """
    truncated = output[:max_length]
    if len(output) > max_length:
        truncated += (
            f"\n... [output truncated, {len(output) - max_length} characters omitted]"
        )
        logger.debug(f"Truncated {label}: {truncated}")
    return truncated


class PythonTool(Tool[PythonToolOverrideKwargs]):
    """
    Python code execution tool using an external Code Interpreter service.

    This tool allows executing Python code in a secure, isolated sandbox environment.
    It supports uploading files from the chat session and downloading generated files.
    """

    NAME = "python"
    DISPLAY_NAME = "Code Interpreter"
    DESCRIPTION = "Execute Python code in an isolated sandbox environment."

    def __init__(self, tool_id: int, emitter: Emitter) -> None:
        super().__init__(emitter=emitter)
        self._id = tool_id
        # Cache of (filename, content_hash) -> ci_file_id to avoid re-uploading
        # the same file on every tool call iteration within the same agent session.
        # Filename is included in the key so two files with identical bytes but
        # different names each get their own upload slot.
        # TTL assumption: code-interpreter file TTLs (typically hours) greatly
        # exceed the lifetime of a single agent session (at most MAX_LLM_CYCLES
        # iterations, typically a few minutes), so stale-ID eviction is not needed.
        self._uploaded_file_cache: dict[tuple[str, str], str] = {}

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self.NAME

    @property
    def description(self) -> str:
        return self.DESCRIPTION

    @property
    def display_name(self) -> str:
        return self.DISPLAY_NAME

    @override
    @classmethod
    def is_available(cls, db_session: Session) -> bool:
        if not CODE_INTERPRETER_BASE_URL:
            return False
        server = fetch_code_interpreter_server(db_session)
        if not server.server_enabled:
            return False

        with CodeInterpreterClient() as client:
            return client.health(use_cache=True)

    def tool_definition(self) -> dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": {
                    "type": "object",
                    "properties": {
                        CODE_FIELD: {
                            "type": "string",
                            "description": "Python source code to execute",
                        },
                    },
                    "required": [CODE_FIELD],
                },
            },
        }

    def emit_start(self, placement: Placement) -> None:
        """Emit start packet for this tool. Code will be emitted in run() method."""
        # Note: PythonToolStart requires code, but we don't have it in emit_start
        # The code is available in run() method via llm_kwargs
        # We'll emit the start packet in run() instead

    def run(
        self,
        placement: Placement,
        override_kwargs: PythonToolOverrideKwargs,
        **llm_kwargs: Any,
    ) -> ToolResponse:
        """
        Execute Python code in the Code Interpreter service.

        Args:
            placement: The placement info (turn_index and tab_index) for this tool call.
            override_kwargs: Contains chat_files to stage for execution
            **llm_kwargs: Contains 'code' parameter from LLM

        Returns:
            ToolResponse with execution results
        """
        if CODE_FIELD not in llm_kwargs:
            raise ToolCallException(
                message=f"Missing required '{CODE_FIELD}' parameter in python tool call",
                llm_facing_message=(
                    f"The python tool requires a '{CODE_FIELD}' parameter containing "
                    f"the Python code to execute. Please provide like: "
                    f'{{"code": "print(\'Hello, world!\')"}}'
                ),
            )
        code = cast(str, llm_kwargs[CODE_FIELD])
        chat_files = override_kwargs.chat_files if override_kwargs else []

        # Emit start event with the code
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=PythonToolStart(code=code),
            )
        )

        # Create Code Interpreter client — context manager ensures
        # session.close() is called on every exit path.
        with CodeInterpreterClient() as client:
            # Stage chat files for execution
            files_to_stage: list[FileInput] = []
            for ind, chat_file in enumerate(chat_files):
                file_name = chat_file.filename or f"file_{ind}"
                try:
                    content_hash = hashlib.sha256(chat_file.content).hexdigest()
                    cache_key = (file_name, content_hash)
                    ci_file_id = self._uploaded_file_cache.get(cache_key)
                    if ci_file_id is None:
                        # Upload to Code Interpreter
                        ci_file_id = client.upload_file(chat_file.content, file_name)
                        self._uploaded_file_cache[cache_key] = ci_file_id

                    # Stage for execution
                    files_to_stage.append({"path": file_name, "file_id": ci_file_id})

                    logger.info(f"Staged file for Python execution: {file_name}")

                except Exception as e:
                    logger.warning(f"Failed to stage file {file_name}: {e}")

            try:
                logger.debug(f"Executing code: {code}")

                # Execute code with streaming (falls back to batch if unavailable)
                stdout_parts: list[str] = []
                stderr_parts: list[str] = []
                result_event: StreamResultEvent | None = None

                for event in client.execute_streaming(
                    code=code,
                    timeout_ms=CODE_INTERPRETER_DEFAULT_TIMEOUT_MS,
                    files=files_to_stage or None,
                ):
                    if isinstance(event, StreamOutputEvent):
                        if event.stream == "stdout":
                            stdout_parts.append(event.data)
                        else:
                            stderr_parts.append(event.data)
                        # Emit incremental delta to frontend
                        self.emitter.emit(
                            Packet(
                                placement=placement,
                                obj=PythonToolDelta(
                                    stdout=(
                                        event.data if event.stream == "stdout" else ""
                                    ),
                                    stderr=(
                                        event.data if event.stream == "stderr" else ""
                                    ),
                                ),
                            )
                        )
                    elif isinstance(event, StreamResultEvent):
                        result_event = event
                    elif isinstance(event, StreamErrorEvent):
                        raise RuntimeError(f"Code interpreter error: {event.message}")

                if result_event is None:
                    raise RuntimeError(
                        "Code interpreter stream ended without a result event"
                    )

                full_stdout = "".join(stdout_parts)
                full_stderr = "".join(stderr_parts)

                # Truncate output for LLM consumption
                truncated_stdout = _truncate_output(
                    full_stdout, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stdout"
                )
                truncated_stderr = _truncate_output(
                    full_stderr, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stderr"
                )

                # Handle generated files
                generated_files: list[PythonExecutionFile] = []
                generated_file_ids: list[str] = []
                file_ids_to_cleanup: list[str] = []
                file_store = get_default_file_store()

                for workspace_file in result_event.files:
                    if workspace_file.kind != "file" or not workspace_file.file_id:
                        continue

                    try:
                        # Download file from Code Interpreter
                        file_content = client.download_file(workspace_file.file_id)

                        # Determine MIME type from file extension
                        filename = workspace_file.path.split("/")[-1]
                        mime_type, _ = mimetypes.guess_type(filename)
                        # Default to binary if we can't determine the type
                        mime_type = mime_type or "application/octet-stream"

                        # Save to Onyx file store
                        onyx_file_id = file_store.save_file(
                            content=BytesIO(file_content),
                            display_name=filename,
                            file_origin=FileOrigin.CHAT_UPLOAD,
                            file_type=mime_type,
                        )

                        generated_files.append(
                            PythonExecutionFile(
                                filename=filename,
                                file_link=build_full_frontend_file_url(onyx_file_id),
                            )
                        )
                        generated_file_ids.append(onyx_file_id)

                        # Mark for cleanup
                        file_ids_to_cleanup.append(workspace_file.file_id)

                    except Exception as e:
                        logger.error(
                            f"Failed to handle generated file {workspace_file.path}: {e}"
                        )

                # Cleanup Code Interpreter files (generated files)
                for ci_file_id in file_ids_to_cleanup:
                    try:
                        client.delete_file(ci_file_id)
                    except Exception as e:
                        logger.error(
                            f"Failed to delete Code Interpreter generated file {ci_file_id}: {e}"
                        )

                # Note: staged input files are intentionally not deleted here because
                # _uploaded_file_cache reuses their file_ids across iterations. They are
                # orphaned when the session ends, but the code interpreter cleans up
                # stale files on its own TTL.

                # Emit file_ids once files are processed
                if generated_file_ids:
                    self.emitter.emit(
                        Packet(
                            placement=placement,
                            obj=PythonToolDelta(file_ids=generated_file_ids),
                        )
                    )

                # Build result
                result = LlmPythonExecutionResult(
                    stdout=truncated_stdout,
                    stderr=truncated_stderr,
                    exit_code=result_event.exit_code,
                    timed_out=result_event.timed_out,
                    generated_files=generated_files,
                    error=(None if result_event.exit_code == 0 else truncated_stderr),
                )

                # Serialize result for LLM
                adapter = TypeAdapter(LlmPythonExecutionResult)
                llm_response = adapter.dump_json(result).decode()

                return ToolResponse(
                    rich_response=PythonToolRichResponse(
                        generated_files=generated_files,
                    ),
                    llm_facing_response=llm_response,
                )

            except Exception as e:
                logger.error(f"Python execution failed: {e}")
                error_msg = str(e)

                # Emit error delta
                self.emitter.emit(
                    Packet(
                        placement=placement,
                        obj=PythonToolDelta(
                            stdout="",
                            stderr=error_msg,
                            file_ids=[],
                        ),
                    )
                )

                # Return error result
                result = LlmPythonExecutionResult(
                    stdout="",
                    stderr=error_msg,
                    exit_code=-1,
                    timed_out=False,
                    generated_files=[],
                    error=error_msg,
                )

                adapter = TypeAdapter(LlmPythonExecutionResult)
                llm_response = adapter.dump_json(result).decode()

                return ToolResponse(
                    rich_response=None,
                    llm_facing_response=llm_response,
                )

    @classmethod
    @override
    def should_emit_argument_deltas(cls) -> bool:
        return True


================================================
FILE: backend/onyx/tools/tool_implementations/search/constants.py
================================================
"""Constants for search tool implementations."""

# Query Expansion and Fusion Weights
# Taking an opinionated stance on the weights, no chance users can do a good job customizing this.
# The dedicated rephrased/extracted semantic query is likely the best for hybrid search
LLM_SEMANTIC_QUERY_WEIGHT = 1.3
# The keyword expansions provide more breadth through a different search ranking function
# This one is likely to produce the most different results.
LLM_KEYWORD_QUERY_WEIGHT = 1.0
# This is also lower because it is the LLM generated query without the custom instructions specifically for this purpose.
LLM_NON_CUSTOM_QUERY_WEIGHT = 0.7
# This is much lower weight because it is likely pretty similar to the LLM semantic query but just worse quality.
ORIGINAL_QUERY_WEIGHT = 0.5

# Hybrid Search Configuration
# This may in the future just use an entirely keyword search. Currently it is a hybrid search with a keyword first phase.
KEYWORD_QUERY_HYBRID_ALPHA = 0.2

# Reciprocal Rank Fusion
RRF_K_VALUE = 50

# Context Expansion
FULL_DOC_NUM_CHUNKS_AROUND = 5

# If a document is quite relevant and has many returned sections, likely it's enough to use the chunks around
# the highest scoring section to detect relevance. This allows more other docs to be evaluated in the step.
# This avoids documents with good titles or generally strong matches to flood out the rest of the search results.
# If there are multiple indepedent sections from the doc, this won't truncate it, only if they're connected.
MAX_CHUNKS_FOR_RELEVANCE = 3


================================================
FILE: backend/onyx/tools/tool_implementations/search/search_tool.py
================================================
"""
An explanation of the search tool found below:

Step 1: Queries
- The LLM will generate some queries based on the chat history for what it thinks are the best things to search for.
This has a pretty generic prompt so it's not perfectly tuned for search but provides breadth and also the LLM can often break up
the query into multiple searches which the other flows do not do. Exp: Compare the sales process between company X and Y can be
broken up into "sales process company X" and "sales process company Y".
- A specifial prompt and history is used to generate another query which is best tuned for a semantic/hybrid search pipeline.
- A small set of keyword emphasized queries are also generated to cover additional breadth. This is important for cases where
the query is short, keyword heavy, or has a lot of model unseen terminology.

Step 2: Recombination
We use a weighted RRF to combine the search results from the queries above. Each query will have a list of search results with
some scores however these are downstream of a normalization step so they cannot easily be compared with one another on an
absolute scale. RRF is a good way to combine these and allows us to give some custom weightings. We also merge document chunks
that are adjacent to provide more continuous context to the LLM.

Step 3: Selection
We pass the recombined results (truncated set) to the LLM to select the most promising ones to read. This is to reduce noise and
reduce downstream chances of hallucination. The LLM at this point also has the entire set of document chunks so it has
information across documents not just per document. This also reduces the number of tokens required for the next step.

Step 4: Expansion
For the selected documents, we pass the main retrieved sections from above (this may be a single chunk or a section comprised of
several consecutive chunks) along with chunks above and below the section to the LLM. The LLM determines how much of the document
it wants to read. This is done in parallel for all selected documents. Reason being that the LLM would not be able to do a good
job of this with all of the documents in the prompt at once. Keeping every LLM decision step as simple as possible is key for
reliable performance.

Step 5: Prompt Building
We construct a response string back to the LLM as the result of the tool call. We also pass relevant richer objects back
so that the rest of the code can persist it, render it in the UI, etc. The response is a json that makes it easy for the LLM to
refer to by using matching keywords to other parts of the prompt and reminders.
"""

import time
from collections.abc import Callable
from typing import Any
from typing import cast

from sqlalchemy.orm import Session

from onyx.chat.emitter import Emitter
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
from onyx.configs.constants import FederatedConnectorSource
from onyx.context.search.federated.slack_search import slack_retrieval
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import ChunkIndexRequest
from onyx.context.search.models import ChunkSearchRequest
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import PersonaSearchInfo
from onyx.context.search.models import SearchDocsResponse
from onyx.context.search.pipeline import merge_individual_chunks
from onyx.context.search.pipeline import search_pipeline
from onyx.context.search.preprocessing.access_filters import (
    build_access_filters_for_user,
)
from onyx.context.search.utils import convert_inference_sections_to_search_docs
from onyx.db.connector import check_connectors_exist
from onyx.db.connector import check_federated_connectors_exist
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.federated import (
    get_federated_connector_document_set_mappings_by_document_set_names,
)
from onyx.db.federated import list_federated_connector_oauth_tokens
from onyx.db.models import SearchSettings
from onyx.db.models import User
from onyx.db.search_settings import get_current_search_settings
from onyx.db.slack_bot import fetch_slack_bots
from onyx.document_index.interfaces import DocumentIndex
from onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo
from onyx.federated_connectors.federated_retrieval import (
    get_federated_retrieval_functions,
)
from onyx.llm.factory import get_llm_token_counter
from onyx.llm.interfaces import LLM
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.onyxbot.slack.models import SlackContext
from onyx.secondary_llm_flows.document_filter import select_chunks_for_relevance
from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
from onyx.secondary_llm_flows.query_expansion import keyword_query_expansion
from onyx.secondary_llm_flows.query_expansion import semantic_query_rephrase
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
from onyx.server.query_and_chat.streaming_models import SearchToolStart
from onyx.tools.interface import Tool
from onyx.tools.models import SearchToolOverrideKwargs
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.search.constants import (
    KEYWORD_QUERY_HYBRID_ALPHA,
)
from onyx.tools.tool_implementations.search.constants import (
    LLM_KEYWORD_QUERY_WEIGHT,
)
from onyx.tools.tool_implementations.search.constants import (
    LLM_NON_CUSTOM_QUERY_WEIGHT,
)
from onyx.tools.tool_implementations.search.constants import (
    LLM_SEMANTIC_QUERY_WEIGHT,
)
from onyx.tools.tool_implementations.search.constants import (
    MAX_CHUNKS_FOR_RELEVANCE,
)
from onyx.tools.tool_implementations.search.constants import ORIGINAL_QUERY_WEIGHT
from onyx.tools.tool_implementations.search.search_utils import (
    expand_section_with_context,
)
from onyx.tools.tool_implementations.search.search_utils import (
    merge_overlapping_sections,
)
from onyx.tools.tool_implementations.search.search_utils import (
    weighted_reciprocal_rank_fusion,
)
from onyx.tools.tool_implementations.utils import (
    convert_inference_sections_to_llm_string,
)
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.timing import log_function_time
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT

logger = setup_logger()

QUERIES_FIELD = "queries"


def deduplicate_queries(
    queries_with_weights: list[tuple[str, float]],
) -> list[tuple[str, float]]:
    """Deduplicate queries by case-insensitive comparison and sum weights.

    Args:
        queries_with_weights: List of (query, weight) tuples

    Returns:
        Deduplicated list of (query, weight) tuples with summed weights
    """
    query_map: dict[str, tuple[str, float]] = {}
    for query, weight in queries_with_weights:
        query_lower = query.lower()
        if query_lower in query_map:
            # Sum weights for duplicate queries
            existing_query, existing_weight = query_map[query_lower]
            query_map[query_lower] = (existing_query, existing_weight + weight)
        else:
            # Keep the first occurrence (preserves original casing)
            query_map[query_lower] = (query, weight)
    return list(query_map.values())


def _estimate_section_tokens(
    section: InferenceSection,
    token_counter: Callable[[str], int],
    max_chunks_per_section: int | None = None,
) -> int:
    """Estimate token count for a section using the LLM tokenizer.

    Args:
        section: InferenceSection to estimate tokens for
        token_counter: Function that counts tokens in text
        max_chunks_per_section: Maximum chunks to consider per section (None for all)

    Returns:
        Token count for the section
    """
    # Estimate for metadata (title, source_type, etc.)
    METADATA_TOKEN_ESTIMATE = 75

    # If max_chunks_per_section is specified, only count tokens for selected chunks
    if max_chunks_per_section is not None:
        selected_chunks = select_chunks_for_relevance(section, max_chunks_per_section)
        # Combine content from selected chunks
        combined_content = "\n".join(chunk.content for chunk in selected_chunks)
        content_tokens = token_counter(combined_content)
    else:
        content_tokens = token_counter(section.combined_content)

    return content_tokens + METADATA_TOKEN_ESTIMATE


@log_function_time(print_only=True)
def _trim_sections_by_tokens(
    sections: list[InferenceSection],
    max_tokens: int,
    token_counter: Callable[[str], int],
    max_chunks_per_section: int | None = None,
) -> list[InferenceSection]:
    """Trim sections to fit within a token budget using the LLM tokenizer.

    Args:
        sections: List of InferenceSection objects to trim
        max_tokens: Maximum token budget
        token_counter: Function that counts tokens in text
        max_chunks_per_section: Maximum chunks to consider per section (None for all)

    Returns:
        Trimmed list of sections that fit within the token budget
    """
    if not sections or max_tokens <= 0:
        return sections

    trimmed_sections = []
    total_tokens = 0

    for section in sections:
        section_tokens = _estimate_section_tokens(
            section, token_counter, max_chunks_per_section
        )
        if total_tokens + section_tokens <= max_tokens:
            trimmed_sections.append(section)
            total_tokens += section_tokens
        else:
            break

    logger.debug(
        f"Trimmed sections from {len(sections)} to {len(trimmed_sections)} ({total_tokens} tokens, budget: {max_tokens})"
    )

    return trimmed_sections


class SearchTool(Tool[SearchToolOverrideKwargs]):
    NAME = "internal_search"
    DISPLAY_NAME = "Internal Search"
    DESCRIPTION = "Search connected applications for information."

    def __init__(
        self,
        tool_id: int,
        emitter: Emitter,
        # Used for ACLs and federated search, anonymous users only see public docs
        user: User,
        # Pre-extracted persona search configuration
        persona_search_info: PersonaSearchInfo,
        llm: LLM,
        document_index: DocumentIndex,
        # Respecting user selections
        user_selected_filters: BaseFilters | None,
        # Vespa metadata filters for overflowing user files.  NOT the raw IDs
        # of the current project/persona — only set when user files couldn't
        # fit in the LLM context and need to be searched via vector DB.
        project_id_filter: int | None,
        persona_id_filter: int | None = None,
        bypass_acl: bool = False,
        # Slack context for federated Slack search (tokens fetched internally)
        slack_context: SlackContext | None = None,
        # Whether to enable Slack federated search
        enable_slack_search: bool = True,
    ) -> None:
        super().__init__(emitter=emitter)

        self.user = user
        self.persona_search_info = persona_search_info
        self.llm = llm
        self.document_index = document_index
        self.user_selected_filters = user_selected_filters
        self.project_id_filter = project_id_filter
        self.persona_id_filter = persona_id_filter
        self.bypass_acl = bypass_acl
        self.slack_context = slack_context
        self.enable_slack_search = enable_slack_search

        self._id = tool_id

    def _prefetch_slack_data(
        self, db_session: Session
    ) -> tuple[str | None, str | None, dict[str, Any]]:
        """Pre-fetch Slack access token, bot token, and entity config from DB.

        All DB queries for Slack federated search are performed here in a
        single session, so the parallel search phase needs no DB access.

        Returns:
            (access_token, bot_token, entities) — access_token is None when
            Slack search should be skipped.
        """
        bot_token: str | None = None
        access_token: str | None = None
        entities: dict[str, Any] = {}

        # Case 1: Slack bot context — requires a Slack federated connector
        # linked via the persona's document sets
        if self.slack_context:
            document_set_names = self.persona_search_info.document_set_names
            if not document_set_names:
                logger.debug(
                    "Skipping Slack federated search: no document sets on persona"
                )
                return None, None, {}

            slack_federated_mappings = (
                get_federated_connector_document_set_mappings_by_document_set_names(
                    db_session, document_set_names
                )
            )
            found_slack_connector = False
            for mapping in slack_federated_mappings:
                if (
                    mapping.federated_connector is not None
                    and mapping.federated_connector.source
                    == FederatedConnectorSource.FEDERATED_SLACK
                ):
                    entities = mapping.federated_connector.config or {}
                    found_slack_connector = True
                    logger.debug(f"Found Slack federated connector config: {entities}")
                    break

            if not found_slack_connector:
                logger.debug(
                    f"Skipping Slack federated search: no Slack federated connector linked to document sets {document_set_names}"
                )
                return None, None, {}

            try:
                slack_bots = fetch_slack_bots(db_session)
                if not slack_bots:
                    return None, None, {}

                tenant_slack_bot = next(
                    (bot for bot in slack_bots if bot.enabled and bot.user_token),
                    None,
                )
                if not tenant_slack_bot:
                    tenant_slack_bot = next(
                        (bot for bot in slack_bots if bot.enabled), None
                    )

                if tenant_slack_bot:
                    bot_token = (
                        tenant_slack_bot.bot_token.get_value(apply_mask=False)
                        if tenant_slack_bot.bot_token
                        else None
                    )
                    user_token = (
                        tenant_slack_bot.user_token.get_value(apply_mask=False)
                        if tenant_slack_bot.user_token
                        else None
                    )
                    access_token = user_token or bot_token
            except Exception as e:
                logger.warning(f"Could not fetch Slack bot tokens: {e}")

        # Case 2: Web user with federated OAuth (if bot context didn't yield a token)
        if not access_token and self.user:
            try:
                federated_oauth_tokens = list_federated_connector_oauth_tokens(
                    db_session, self.user.id
                )
                if not federated_oauth_tokens:
                    return access_token, bot_token, entities

                slack_oauth_token = next(
                    (
                        token
                        for token in federated_oauth_tokens
                        if token.federated_connector.source
                        == FederatedConnectorSource.FEDERATED_SLACK
                    ),
                    None,
                )
                if slack_oauth_token and slack_oauth_token.token:
                    access_token = slack_oauth_token.token.get_value(apply_mask=False)
                    entities = slack_oauth_token.federated_connector.config or {}
            except Exception as e:
                logger.warning(f"Could not fetch Slack OAuth token: {e}")

        return access_token, bot_token, entities

    def _run_slack_search(
        self,
        query: str,
        access_token: str,
        bot_token: str | None,
        entities: dict[str, Any],
        search_settings: SearchSettings,
    ) -> list[InferenceChunk]:
        """Run Slack federated search using pre-fetched tokens and config.

        All DB data is pre-fetched in run() so this method needs no DB session.

        Args:
            query: The user's original search query
            access_token: Slack access token (user or bot)
            bot_token: Slack bot token (for enhanced permissions)
            entities: Federated connector entity config (channel filtering)
            search_settings: Pre-fetched SearchSettings for chunking config

        Returns:
            List of InferenceChunk results from Slack
        """
        try:
            chunk_request = ChunkIndexRequest(
                query=query,
                filters=IndexFilters(access_control_list=None),
            )

            chunks = slack_retrieval(
                query=chunk_request,
                access_token=access_token,
                connector=None,
                entities=entities,
                limit=None,
                slack_event_context=self.slack_context,
                bot_token=bot_token,
                team_id=None,
                search_settings=search_settings,
            )

            logger.info(f"Slack federated search returned {len(chunks)} chunks")
            return chunks

        except Exception as e:
            logger.error(f"Slack federated search error: {e}", exc_info=True)
            return []

    def _run_search_for_query(
        self,
        query: str,
        hybrid_alpha: float | None,
        num_hits: int,
        acl_filters: list[str] | None,
        embedding_model: EmbeddingModel,
        federated_retrieval_infos: list[FederatedRetrievalInfo],
    ) -> list[InferenceChunk]:
        """Run search pipeline for a single query using pre-fetched data.

        All DB data (ACL filters, embedding model, federated retrieval info)
        is pre-fetched in run() so this method needs no DB session.

        Args:
            query: The search query string
            hybrid_alpha: Hybrid search alpha parameter (None for default)
            num_hits: Maximum number of hits to return
            acl_filters: Pre-fetched ACL filters (None when bypass_acl)
            embedding_model: Pre-fetched embedding model
            federated_retrieval_infos: Pre-fetched federated retrieval functions

        Returns:
            List of InferenceChunk results
        """
        return search_pipeline(
            chunk_search_request=ChunkSearchRequest(
                query=query,
                hybrid_alpha=hybrid_alpha,
                # For projects, the search scope is the project and has no other limits
                user_selected_filters=(
                    self.user_selected_filters
                    if self.project_id_filter is None
                    else None
                ),
                bypass_acl=self.bypass_acl,
                limit=num_hits,
            ),
            project_id_filter=self.project_id_filter,
            persona_id_filter=self.persona_id_filter,
            document_index=self.document_index,
            user=self.user,
            persona_search_info=self.persona_search_info,
            acl_filters=acl_filters,
            embedding_model=embedding_model,
            prefetched_federated_retrieval_infos=federated_retrieval_infos,
        )

    @classmethod
    def is_available(cls, db_session: Session) -> bool:
        """Check if search tool is available.

        Returns False when the vector DB is disabled (search cannot function
        without it). Otherwise, available if ANY of the following exist:
        - Regular connectors (team knowledge)
        - Federated connectors (e.g., Slack)
        - User files (User Knowledge mode)
        """
        from onyx.configs.app_configs import DISABLE_VECTOR_DB
        from onyx.db.connector import check_user_files_exist

        if DISABLE_VECTOR_DB:
            return False

        return (
            check_connectors_exist(db_session)
            or check_federated_connectors_exist(db_session)
            or check_user_files_exist(db_session)
        )

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self.NAME

    @property
    def description(self) -> str:
        return self.DESCRIPTION

    @property
    def display_name(self) -> str:
        return self.DISPLAY_NAME

    """For explicit tool calling"""

    def tool_definition(self) -> dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": {
                    "type": "object",
                    "properties": {
                        QUERIES_FIELD: {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "List of search queries to execute, typically a single query.",
                        },
                    },
                    "required": [QUERIES_FIELD],
                },
            },
        }

    def emit_start(self, placement: Placement) -> None:
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=SearchToolStart(),
            )
        )

    @log_function_time(print_only=True)
    def run(
        self,
        placement: Placement,
        override_kwargs: SearchToolOverrideKwargs,
        **llm_kwargs: Any,
    ) -> ToolResponse:
        # Start overall timing
        overall_start_time = time.time()

        # Initialize timing variables (in case of early exceptions)
        query_expansion_elapsed = 0.0
        document_selection_elapsed = 0.0
        document_expansion_elapsed = 0.0

        # Pre-fetch all DB data in a single short-lived session so that
        # parallel search workers need zero DB connections.
        with get_session_with_current_tenant() as db_session:
            # ACL filters
            acl_filters: list[str] | None = (
                None
                if self.bypass_acl
                else build_access_filters_for_user(self.user, db_session)
            )

            # SearchSettings → materialise EmbeddingModel while session is
            # open (forces lazy-load of cloud_provider properties)
            search_settings = get_current_search_settings(db_session)
            if not search_settings:
                raise RuntimeError(
                    "No search settings configured — cannot run internal search"
                )

            embedding_model = EmbeddingModel.from_db_model(
                search_settings=search_settings,
                server_host=MODEL_SERVER_HOST,
                server_port=MODEL_SERVER_PORT,
            )

            # Federated retrieval functions (non-Slack; Slack is separate)
            if self.project_id_filter is not None:
                # Project mode ignores user filters → no federated sources
                prefetch_source_types = None
            else:
                prefetch_source_types = (
                    list(self.user_selected_filters.source_type)
                    if self.user_selected_filters
                    and self.user_selected_filters.source_type
                    else None
                )
            federated_retrieval_infos = (
                get_federated_retrieval_functions(
                    db_session=db_session,
                    user_id=self.user.id if self.user else None,
                    source_types=prefetch_source_types,
                    document_set_names=self.persona_search_info.document_set_names,
                )
                or []
            )

            # Slack tokens and entity config — only prefetch when Slack
            # search is enabled or we're in a Slack bot context.
            if self.enable_slack_search or self.slack_context:
                slack_access_token, slack_bot_token, slack_entities = (
                    self._prefetch_slack_data(db_session)
                )
            else:
                slack_access_token, slack_bot_token, slack_entities = (
                    None,
                    None,
                    {},
                )
        # Session is closed here — all parallel work uses plain Python objects only

        if QUERIES_FIELD not in llm_kwargs:
            raise ToolCallException(
                message=f"Missing required '{QUERIES_FIELD}' parameter in internal_search tool call",
                llm_facing_message=(
                    f"The internal_search tool requires a '{QUERIES_FIELD}' parameter "
                    f"containing an array of search queries. Please provide the queries "
                    f'like: {{"queries": ["your search query here"]}}'
                ),
            )
        llm_queries = cast(list[str], llm_kwargs[QUERIES_FIELD])

        # Run semantic and keyword query expansion in parallel (unless skipped)
        # Use message history, memories, and user info from override_kwargs
        message_history = (
            override_kwargs.message_history if override_kwargs.message_history else []
        )
        memories = (
            override_kwargs.user_memory_context.as_formatted_list()
            if override_kwargs.user_memory_context
            else []
        )
        user_info = override_kwargs.user_info

        # Skip query expansion if this is a repeat search call
        if override_kwargs.skip_query_expansion:
            logger.debug("Search tool - Skipping query expansion (repeat search call)")
            semantic_query = None
            keyword_queries: list[str] = []
        else:
            # Start timing for query expansion/rephrase
            query_expansion_start_time = time.time()

            functions_with_args: list[tuple[Callable, tuple]] = [
                (
                    semantic_query_rephrase,
                    (message_history, self.llm, user_info, memories),
                ),
                (
                    keyword_query_expansion,
                    (message_history, self.llm, user_info, memories),
                ),
            ]

            expansion_results = run_functions_tuples_in_parallel(functions_with_args)

            # End timing for query expansion/rephrase
            query_expansion_elapsed = time.time() - query_expansion_start_time
            logger.debug(
                f"Search tool - Query expansion/rephrase took {query_expansion_elapsed:.3f} seconds"
            )
            semantic_query = expansion_results[0]  # str
            keyword_queries = (
                expansion_results[1] if expansion_results[1] is not None else []
            )  # list[str]

        # Prepare queries with their weights and hybrid_alpha settings
        # Group 1: Keyword queries (use hybrid_alpha=0.2)
        keyword_queries_with_weights = [
            (kw_query, LLM_KEYWORD_QUERY_WEIGHT) for kw_query in keyword_queries
        ]
        deduplicated_keyword_queries = deduplicate_queries(keyword_queries_with_weights)

        # Group 2: Semantic/LLM/Original queries (use hybrid_alpha=None)
        # Include all LLM-provided queries with their weight
        semantic_queries_with_weights = (
            [
                (semantic_query, LLM_SEMANTIC_QUERY_WEIGHT),
            ]
            if semantic_query
            else []
        )
        for llm_query in llm_queries:
            # In rare cases, the LLM may fail to provide real queries
            if llm_query:
                semantic_queries_with_weights.append(
                    (llm_query, LLM_NON_CUSTOM_QUERY_WEIGHT)
                )
        if override_kwargs.original_query:
            semantic_queries_with_weights.append(
                (override_kwargs.original_query, ORIGINAL_QUERY_WEIGHT)
            )
        deduplicated_semantic_queries = deduplicate_queries(
            semantic_queries_with_weights
        )

        # Build the all_queries list for UI display, sorted by weight (highest first)
        # Combine all deduplicated queries and sort by weight
        all_queries_with_weights = (
            deduplicated_semantic_queries + deduplicated_keyword_queries
        )
        all_queries_with_weights.sort(key=lambda x: x[1], reverse=True)

        # Extract queries in weight order, handling cross-duplicates
        all_queries = []
        seen_lower = set()
        for query, _ in all_queries_with_weights:
            query_lower = query.lower()
            if query_lower not in seen_lower:
                all_queries.append(query)
                seen_lower.add(query_lower)

        logger.debug(
            f"All Queries (sorted by weight): {all_queries}, Keyword queries: {[q for q, _ in deduplicated_keyword_queries]}"
        )

        # Emit the queries early so the UI can display them immediately
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=SearchToolQueriesDelta(
                    queries=all_queries,
                ),
            )
        )

        # Run all searches in parallel with appropriate hybrid_alpha values
        # Keyword queries use hybrid_alpha=0.2 (favor keyword search)
        # Other queries use default hybrid_alpha (balanced semantic/keyword)
        search_functions: list[tuple[Callable, tuple]] = []
        search_weights: list[float] = []

        # Add deduplicated semantic queries (use hybrid_alpha=None)
        for query, weight in deduplicated_semantic_queries:
            search_functions.append(
                (
                    self._run_search_for_query,
                    (
                        query,
                        None,
                        override_kwargs.num_hits,
                        acl_filters,
                        embedding_model,
                        federated_retrieval_infos,
                    ),
                )
            )
            search_weights.append(weight)

        # Add deduplicated keyword queries (use hybrid_alpha=0.2)
        for query, weight in deduplicated_keyword_queries:
            search_functions.append(
                (
                    self._run_search_for_query,
                    (
                        query,
                        KEYWORD_QUERY_HYBRID_ALPHA,
                        override_kwargs.num_hits,
                        acl_filters,
                        embedding_model,
                        federated_retrieval_infos,
                    ),
                )
            )
            search_weights.append(weight)

        # Add Slack federated search (runs once in parallel with all Vespa queries)
        # This avoids the query multiplication problem where each Vespa query
        # would trigger a separate Slack search.
        # Only run if pre-fetch found a valid Slack access token.
        if slack_access_token and override_kwargs.original_query:
            search_functions.append(
                (
                    self._run_slack_search,
                    (
                        override_kwargs.original_query,
                        slack_access_token,
                        slack_bot_token,
                        slack_entities,
                        search_settings,
                    ),
                )
            )
            # Use same weight as original query for Slack results
            search_weights.append(ORIGINAL_QUERY_WEIGHT)

        # Run all searches in parallel (Vespa queries + Slack)
        all_search_results = run_functions_tuples_in_parallel(search_functions)
        if not all_search_results:
            all_search_results = []

        # Merge results using weighted Reciprocal Rank Fusion
        # This intelligently combines rankings from different queries
        top_chunks = weighted_reciprocal_rank_fusion(
            ranked_results=all_search_results,
            weights=search_weights,
            id_extractor=lambda chunk: f"{chunk.document_id}_{chunk.chunk_id}",
        )

        # We can disregard all of the chunks that exceed the num_hits parameter since it's not valid to have
        # documents/contents from things that aren't returned to the user on the frontend
        top_sections = merge_individual_chunks(top_chunks)[: override_kwargs.num_hits]

        if not top_sections:
            logger.info("Search tool - no results found, returning empty response")
            return ToolResponse(
                rich_response=SearchDocsResponse(
                    search_docs=[],
                    citation_mapping={},
                    displayed_docs=None,
                ),
                llm_facing_response="",
            )

        # Convert InferenceSections to SearchDocs for emission
        search_docs = convert_inference_sections_to_search_docs(
            top_sections, is_internet=False
        )

        secondary_flows_user_query = (
            override_kwargs.original_query
            or semantic_query
            or (llm_queries[0] if llm_queries else "")
        )

        token_counter = get_llm_token_counter(self.llm)

        # Trim sections to fit within token budget before LLM selection
        # This is to account for very short chunks flooding the search context
        # Only consider MAX_CHUNKS_FOR_RELEVANCE chunks per section to avoid flooding from
        # documents with many matching sections
        max_tokens_for_selection = (
            override_kwargs.max_llm_chunks or MAX_CHUNKS_FED_TO_CHAT
        ) * DOC_EMBEDDING_CONTEXT_SIZE

        # This is approximate since it doesn't build the exact string of the call below
        # Some things are estimated and may be under (like the metadata tokens)
        sections_for_selection = _trim_sections_by_tokens(
            sections=top_sections,
            max_tokens=max_tokens_for_selection,
            token_counter=token_counter,
            max_chunks_per_section=MAX_CHUNKS_FOR_RELEVANCE,
        )

        # Start timing for LLM document selection
        document_selection_start_time = time.time()

        # Use LLM to select the most relevant sections for expansion
        selected_sections, best_doc_ids = select_sections_for_expansion(
            sections=sections_for_selection,
            user_query=secondary_flows_user_query,
            llm=self.llm,
            max_chunks_per_section=MAX_CHUNKS_FOR_RELEVANCE,
        )

        # End timing for LLM document selection
        document_selection_elapsed = time.time() - document_selection_start_time
        logger.debug(
            f"Search tool - LLM picking documents took {document_selection_elapsed:.3f} seconds "
            f"(selected {len(selected_sections)} sections)"
        )

        # Create a set of best document IDs for quick lookup
        best_doc_ids_set = set(best_doc_ids) if best_doc_ids else set()

        # To show the users, we only pass in the docs that are determined to be good by the LLM
        final_ui_docs = convert_inference_sections_to_search_docs(
            selected_sections, is_internet=False
        )

        self.emitter.emit(
            Packet(
                placement=placement,
                obj=SearchToolDocumentsDelta(
                    documents=final_ui_docs,
                ),
            )
        )

        # Create wrapper function to handle errors gracefully
        def expand_section_safe(
            section: InferenceSection,
            user_query: str,
            llm: LLM,
            document_index: DocumentIndex,
            expand_override: bool,
        ) -> InferenceSection:
            """Wrapper that handles exceptions and returns original section on error."""
            try:
                expanded_section = expand_section_with_context(
                    section=section,
                    user_query=user_query,
                    llm=llm,
                    document_index=document_index,
                    expand_override=expand_override,
                )
                # Return expanded section if not None, otherwise original
                return expanded_section if expanded_section is not None else section
            except Exception as e:
                logger.warning(
                    f"Error processing section context expansion: {e}. Using original section."
                )
                return section

        # Build parallel function calls for all sections
        expansion_functions: list[tuple[Callable, tuple]] = [
            (
                expand_section_safe,
                (
                    section,
                    secondary_flows_user_query,
                    self.llm,
                    self.document_index,
                    section.center_chunk.document_id in best_doc_ids_set,
                ),
            )
            for section in selected_sections
        ]

        # Start timing for document expansion
        document_expansion_start_time = time.time()

        # Run all expansions in parallel
        expanded_sections = run_functions_tuples_in_parallel(expansion_functions)

        # End timing for document expansion
        document_expansion_elapsed = time.time() - document_expansion_start_time
        logger.debug(
            f"Search tool - Expansion of selected documents took {document_expansion_elapsed:.3f} seconds "
            f"(expanded {len(expanded_sections)} sections)"
        )

        if not expanded_sections:
            expanded_sections = selected_sections

        # Merge sections from the same document that have adjacent or overlapping chunks
        # This prevents duplicate content and reduces token usage
        merged_sections = merge_overlapping_sections(expanded_sections)

        docs_str, citation_mapping = convert_inference_sections_to_llm_string(
            top_sections=merged_sections,
            citation_start=override_kwargs.starting_citation_num,
            limit=override_kwargs.max_llm_chunks,
            include_document_id=False,
        )

        # End overall timing
        overall_elapsed = time.time() - overall_start_time
        logger.debug(
            f"Search tool - Total execution time: {overall_elapsed:.3f} seconds "
            f"(query expansion: {query_expansion_elapsed:.3f}s, "
            f"document selection: {document_selection_elapsed:.3f}s, "
            f"document expansion: {document_expansion_elapsed:.3f}s)"
        )

        return ToolResponse(
            # Typically the rich response will give more docs in case it needs to be displayed in the UI
            rich_response=SearchDocsResponse(
                search_docs=search_docs,
                citation_mapping=citation_mapping,
                displayed_docs=final_ui_docs or None,
            ),
            # The LLM facing response typically includes less docs to cut down on noise and token usage
            llm_facing_response=docs_str,
        )


================================================
FILE: backend/onyx/tools/tool_implementations/search/search_utils.py
================================================
from collections import defaultdict
from collections.abc import Callable
from typing import TypeVar

from onyx.context.search.models import ContextExpansionType
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceSection
from onyx.context.search.utils import inference_section_from_chunks
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.vespa.shared_utils.utils import (
    replace_invalid_doc_id_characters,
)
from onyx.llm.interfaces import LLM
from onyx.prompts.prompt_utils import clean_up_source
from onyx.secondary_llm_flows.document_filter import classify_section_relevance
from onyx.tools.tool_implementations.search.constants import (
    FULL_DOC_NUM_CHUNKS_AROUND,
)
from onyx.tools.tool_implementations.search.constants import RRF_K_VALUE
from onyx.utils.logger import setup_logger

logger = setup_logger()


T = TypeVar("T")


def weighted_reciprocal_rank_fusion(
    ranked_results: list[list[T]],
    weights: list[float],
    id_extractor: Callable[[T], str],
    k: int = RRF_K_VALUE,
) -> list[T]:
    """
    Merge multiple ranked result lists using weighted Reciprocal Rank Fusion (RRF).

    RRF combines rankings from different sources by computing a score for each item
    based on its rank positions across all lists. The weighted version allows different
    importance to be assigned to different result sources.

    Formula: RRF_score(item) = sum over all rankers of: weight / (k + rank(item))

    Args:
        ranked_results: List of ranked result lists, where each inner list contains
                       items ranked from best to worst (index 0 is rank 1)
        weights: List of weights corresponding to each result list. Higher weights
                give more importance to that ranking source.
        id_extractor: Function to extract a unique identifier from each item.
                     Items with the same ID across different lists are treated as
                     the same item and their scores are accumulated.
        k: Constant to prevent overemphasis on top-ranked items (default: RRF_K_VALUE).
           Typical values are 50-60. Lower values give more weight to top results.

    Returns:
        List of items sorted by their weighted RRF score in descending order.
        Each unique item appears only once, even if it was in multiple input lists.

    Example:
        >>> results1 = [doc_a, doc_b, doc_c]  # Semantic search results
        >>> results2 = [doc_c, doc_a, doc_d]  # Keyword search results
        >>> weights = [1.2, 1.0]  # Semantic query weighted higher
        >>> merged = weighted_reciprocal_rank_fusion(
        ...     [results1, results2],
        ...     weights,
        ...     lambda doc: doc.document_id
        ... )
        # doc_a and doc_c will have higher scores (appeared in both lists)
    """
    if len(ranked_results) != len(weights):
        raise ValueError(
            f"Number of ranked results ({len(ranked_results)}) must match number of weights ({len(weights)})"
        )

    # Track RRF scores for each unique item (identified by ID)
    rrf_scores: dict[str, float] = defaultdict(float)
    # Track the actual item object for each ID (use first occurrence)
    id_to_item: dict[str, T] = {}
    # Track which result list each item first appeared in (for tiebreaking)
    id_to_source_index: dict[str, int] = {}
    # Track the position within the source list (for tiebreaking)
    id_to_source_rank: dict[str, int] = {}

    # Compute weighted RRF scores
    for source_idx, (result_list, weight) in enumerate(zip(ranked_results, weights)):
        for rank, item in enumerate(result_list, start=1):
            item_id = id_extractor(item)

            # Add weighted RRF score: weight / (k + rank)
            rrf_scores[item_id] += weight / (k + rank)

            # Store the item object and source info (if not already stored)
            if item_id not in id_to_item:
                id_to_item[item_id] = item
                id_to_source_index[item_id] = source_idx
                id_to_source_rank[item_id] = rank

    # Sort items by:
    # 1. RRF score (descending - higher is better)
    # 2. Source index modulo (for round-robin across queries)
    # 3. Rank within source (ascending - lower rank is better)
    sorted_ids = sorted(
        rrf_scores.keys(),
        key=lambda id: (
            -rrf_scores[
                id
            ],  # Primary: higher RRF score first (negative for descending)
            id_to_source_rank[id],  # Secondary: lower rank within source first
            id_to_source_index[id],  # Tertiary: round-robin by cycling through sources
        ),
    )
    return [id_to_item[item_id] for item_id in sorted_ids]


def section_to_dict(section: InferenceSection, section_num: int) -> dict:
    doc_dict = {
        "document_number": section_num + 1,
        "title": section.center_chunk.semantic_identifier,
        "content": section.combined_content,
        "source": clean_up_source(section.center_chunk.source_type),
        "metadata": section.center_chunk.metadata,
    }
    if section.center_chunk.updated_at:
        doc_dict["updated_at"] = section.center_chunk.updated_at.strftime(
            "%B %d, %Y %H:%M"
        )
    return doc_dict


def _retrieve_adjacent_chunks(
    section: InferenceSection,
    document_index: DocumentIndex,
    num_chunks_above: int,
    num_chunks_below: int,
) -> tuple[list[InferenceChunk], list[InferenceChunk]]:
    """Retrieve adjacent chunks above and below a section.

    Args:
        section: The InferenceSection to get adjacent chunks for
        document_index: The document index to query
        num_chunks_above: Number of chunks to retrieve above the section
        num_chunks_below: Number of chunks to retrieve below the section

    Returns:
        Tuple of (chunks_above, chunks_below)
    """
    # Get the document_id and chunk range from the section
    document_id = section.center_chunk.document_id

    # The document fetching already enforced permissions
    # the expansion does not need to do this unless it's for performance reasons
    filters = IndexFilters(access_control_list=None)

    # Find the min and max chunk_id in the section
    chunk_ids = [chunk.chunk_id for chunk in section.chunks]
    min_chunk_id = min(chunk_ids)
    max_chunk_id = max(chunk_ids)

    chunks_above: list[InferenceChunk] = []
    chunks_below: list[InferenceChunk] = []

    # Retrieve chunks above (if any)
    if num_chunks_above > 0 and min_chunk_id > 0:
        above_min = max(0, min_chunk_id - num_chunks_above)
        above_max = min_chunk_id - 1

        above_request = VespaChunkRequest(
            document_id=replace_invalid_doc_id_characters(document_id),
            min_chunk_ind=above_min,
            max_chunk_ind=above_max,
        )

        try:
            chunks_above = document_index.id_based_retrieval(
                chunk_requests=[above_request],
                filters=filters,
                batch_retrieval=True,
            )
            # Sort by chunk_id to ensure correct order
            chunks_above.sort(key=lambda c: c.chunk_id)
        except Exception as e:
            logger.warning(f"Failed to retrieve chunks above section: {e}")

    # Retrieve chunks below (if any)
    if num_chunks_below > 0:
        below_min = max_chunk_id + 1
        below_max = max_chunk_id + num_chunks_below

        below_request = VespaChunkRequest(
            document_id=replace_invalid_doc_id_characters(document_id),
            min_chunk_ind=below_min,
            max_chunk_ind=below_max,
        )

        try:
            chunks_below = document_index.id_based_retrieval(
                chunk_requests=[below_request],
                filters=filters,
                batch_retrieval=True,
            )
            # Sort by chunk_id to ensure correct order
            chunks_below.sort(key=lambda c: c.chunk_id)
        except Exception as e:
            logger.warning(f"Failed to retrieve chunks below section: {e}")

    return chunks_above, chunks_below


def merge_overlapping_sections(
    sections: list[InferenceSection],
) -> list[InferenceSection]:
    """Merge sections from the same document that have adjacent or overlapping chunks.

    Sections are merged if they come from the same document and their chunk ranges
    are adjacent (chunk_ids differ by 1) or overlapping (share chunk_ids).
    The merged sections maintain the position of the first section in the original list.

    Args:
        sections: List of InferenceSection objects to merge

    Returns:
        List of merged InferenceSection objects
    """
    if not sections:
        return []

    # Create a mapping from section to its original index for ordering
    section_to_original_index: dict[tuple[str, int], int] = {}
    for idx, section in enumerate(sections):
        section_id = (section.center_chunk.document_id, section.center_chunk.chunk_id)
        section_to_original_index[section_id] = idx

    # Group sections by document_id
    doc_sections: dict[str, list[InferenceSection]] = defaultdict(list)
    for section in sections:
        doc_sections[section.center_chunk.document_id].append(section)

    # Track which sections have been merged into a result section
    merged_sections: dict[tuple[str, int], InferenceSection] = {}

    # Process each document's sections
    for doc_id, doc_section_list in doc_sections.items():
        if not doc_section_list:
            continue

        # Sort sections by their minimum chunk_id
        doc_section_list.sort(key=lambda s: min(c.chunk_id for c in s.chunks))

        # Track merged groups - start with first section
        current_merged_chunks = set(doc_section_list[0].chunks)
        sections_in_current_group = [doc_section_list[0]]

        for i in range(1, len(doc_section_list)):
            current_section = doc_section_list[i]
            current_section_chunks = set(current_section.chunks)

            # Get chunk_id ranges
            merged_chunk_ids = {c.chunk_id for c in current_merged_chunks}
            current_chunk_ids = {c.chunk_id for c in current_section_chunks}

            # Check if adjacent or overlapping
            min_merged = min(merged_chunk_ids)
            max_merged = max(merged_chunk_ids)
            min_current = min(current_chunk_ids)
            max_current = max(current_chunk_ids)

            is_adjacent = (min_current == max_merged + 1) or (
                min_merged == max_current + 1
            )
            is_overlapping = bool(merged_chunk_ids & current_chunk_ids)

            if is_adjacent or is_overlapping:
                # Merge into current group
                current_merged_chunks.update(current_section_chunks)
                sections_in_current_group.append(current_section)
            else:
                # Finalize current group and start new one
                # Find the section that appeared first in the original list
                first_section = min(
                    sections_in_current_group,
                    key=lambda s: section_to_original_index.get(
                        (s.center_chunk.document_id, s.center_chunk.chunk_id),
                        float("inf"),
                    ),
                )

                # Create merged section with all chunks
                all_chunks = sorted(current_merged_chunks, key=lambda c: c.chunk_id)
                merged_section = inference_section_from_chunks(
                    center_chunk=first_section.center_chunk,
                    chunks=all_chunks,
                )

                if merged_section:
                    # Store the merged section for all sections in this group
                    for section in sections_in_current_group:
                        section_id = (
                            section.center_chunk.document_id,
                            section.center_chunk.chunk_id,
                        )
                        merged_sections[section_id] = merged_section

                # Start new group
                current_merged_chunks = current_section_chunks
                sections_in_current_group = [current_section]

        # Finalize the last group
        if sections_in_current_group:
            first_section = min(
                sections_in_current_group,
                key=lambda s: section_to_original_index.get(
                    (s.center_chunk.document_id, s.center_chunk.chunk_id),
                    float("inf"),
                ),
            )

            all_chunks = sorted(current_merged_chunks, key=lambda c: c.chunk_id)
            merged_section = inference_section_from_chunks(
                center_chunk=first_section.center_chunk,
                chunks=all_chunks,
            )

            if merged_section:
                for section in sections_in_current_group:
                    section_id = (
                        section.center_chunk.document_id,
                        section.center_chunk.chunk_id,
                    )
                    merged_sections[section_id] = merged_section

    # Build result list maintaining original order
    seen_section_ids: set[tuple[str, int]] = set()
    result: list[InferenceSection] = []

    for section in sections:
        section_id = (section.center_chunk.document_id, section.center_chunk.chunk_id)
        merged_section = merged_sections.get(section_id, section)

        # Use merged section's center_chunk as identifier
        merged_section_id = (
            merged_section.center_chunk.document_id,
            merged_section.center_chunk.chunk_id,
        )

        if merged_section_id not in seen_section_ids:
            seen_section_ids.add(merged_section_id)
            result.append(merged_section)

    return result


def expand_section_with_context(
    section: InferenceSection,
    user_query: str,
    llm: LLM,
    document_index: DocumentIndex,
    expand_override: bool = False,
) -> InferenceSection | None:
    """Use LLM to classify section relevance and return expanded section with appropriate context.

    This function combines classification and expansion into a single operation:
    1. Retrieves chunks needed for classification (2 chunks for prompt)
    2. Uses LLM to classify relevance (situations 1-4) unless expand_override is True
    3. For FULL_DOCUMENT, fetches additional chunks (5 total above/below)
    4. Returns the expanded section or None if not relevant

    Args:
        section: The InferenceSection to classify and expand
        search_query: The user's search query
        llm: LLM instance to use for classification
        document_index: Document index for retrieving adjacent chunks
        expand_override: If True, skip LLM classification and use FULL_DOCUMENT expansion

    Returns:
        Expanded InferenceSection with appropriate context, or None if NOT_RELEVANT
    """
    chunks_above_for_prompt: list[InferenceChunk] = []
    chunks_below_for_prompt: list[InferenceChunk] = []

    # If expand_override is True, skip LLM classification and use FULL_DOCUMENT
    if expand_override:
        classification = ContextExpansionType.FULL_DOCUMENT
        # These are not used, but need to be defined to avoid type errors
    else:
        # Retrieve 2 chunks above and below for the LLM classification prompt
        chunks_above_for_prompt, chunks_below_for_prompt = _retrieve_adjacent_chunks(
            section=section,
            document_index=document_index,
            num_chunks_above=2,
            num_chunks_below=2,
        )

        # Format the section content for the prompt
        section_above_text = (
            " ".join([c.content for c in chunks_above_for_prompt])
            if chunks_above_for_prompt
            else None
        )
        section_below_text = (
            " ".join([c.content for c in chunks_below_for_prompt])
            if chunks_below_for_prompt
            else None
        )

        # Classify section relevance using LLM
        classification = classify_section_relevance(
            document_title=section.center_chunk.semantic_identifier,
            section_text=section.combined_content,
            user_query=user_query,
            llm=llm,
            section_above_text=section_above_text,
            section_below_text=section_below_text,
        )

    # Now build the expanded section based on classification
    if classification == ContextExpansionType.NOT_RELEVANT:
        # Filter out this section
        logger.debug(
            f"LLM classified section as NOT_RELEVANT: {section.center_chunk.semantic_identifier}"
        )
        return None

    elif classification == ContextExpansionType.MAIN_SECTION_ONLY:
        # Return original section unchanged
        logger.debug(
            f"LLM classified section as MAIN_SECTION_ONLY: {section.center_chunk.semantic_identifier}"
        )
        return section

    elif classification == ContextExpansionType.INCLUDE_ADJACENT_SECTIONS:
        # Use the 2 chunks we already retrieved for the prompt
        logger.debug(
            f"LLM classified section as INCLUDE_ADJACENT_SECTIONS: {section.center_chunk.semantic_identifier}"
        )

        all_chunks = chunks_above_for_prompt + section.chunks + chunks_below_for_prompt
        if not all_chunks:
            return section

        # Create new InferenceSection with expanded chunks
        expanded_section = inference_section_from_chunks(
            center_chunk=section.center_chunk,
            chunks=all_chunks,
        )

        return expanded_section if expanded_section else section

    elif classification == ContextExpansionType.FULL_DOCUMENT:
        # Fetch 5 chunks above and below (optimal single retrieval)
        if expand_override:
            logger.debug(
                f"Section marked for FULL_DOCUMENT expansion (override): {section.center_chunk.semantic_identifier}"
            )
        else:
            logger.debug(
                f"LLM classified section as FULL_DOCUMENT: {section.center_chunk.semantic_identifier}"
            )

        chunks_above_full, chunks_below_full = _retrieve_adjacent_chunks(
            section=section,
            document_index=document_index,
            num_chunks_above=FULL_DOC_NUM_CHUNKS_AROUND,
            num_chunks_below=FULL_DOC_NUM_CHUNKS_AROUND,
        )

        # Combine all chunks: 5 above + section + 5 below
        all_chunks = chunks_above_full + section.chunks + chunks_below_full

        if not all_chunks:
            logger.warning(
                f"No chunks found for full document context expansion: {section.center_chunk.semantic_identifier}"
            )
            return section

        # Create new InferenceSection with full context
        expanded_section = inference_section_from_chunks(
            center_chunk=section.center_chunk,
            chunks=all_chunks,
        )

        return expanded_section if expanded_section else section

    else:
        # Unknown classification - default to returning original section
        logger.warning(
            f"Unknown context classification {classification}, returning original section"
        )
        return section


================================================
FILE: backend/onyx/tools/tool_implementations/search_like_tool_utils.py
================================================
from onyx.connectors.models import Document
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import Section


FINAL_CONTEXT_DOCUMENTS_ID = "final_context_documents"
FINAL_SEARCH_QUERIES_ID = "final_search_queries"
SEARCH_INFERENCE_SECTIONS_ID = "search_inference_sections"


def documents_to_indexing_documents(
    documents: list[Document],
) -> list[IndexingDocument]:
    indexing_documents = []

    for document in documents:
        processed_sections = []
        for section in document.sections:
            processed_section = Section(
                text=section.text or "",
                link=section.link,
                image_file_id=None,
            )
            processed_sections.append(processed_section)

        indexed_document = IndexingDocument(
            **document.model_dump(), processed_sections=processed_sections
        )
        indexing_documents.append(indexed_document)
    return indexing_documents


================================================
FILE: backend/onyx/tools/tool_implementations/utils.py
================================================
import json

from onyx.context.search.models import InferenceSection


def convert_inference_sections_to_llm_string(
    top_sections: list[InferenceSection],
    citation_start: int = 1,
    limit: int | None = None,
    include_source_type: bool = True,
    include_link: bool = False,
    include_document_id: bool = False,
) -> tuple[str, dict[int, str]]:
    """Convert InferenceSection objects to a JSON string for LLM.

    Returns a JSON string with document results and a citation mapping.
    """
    # Apply limit if specified
    if limit is not None:
        top_sections = top_sections[:limit]

    # Group sections by document_id to assign same citation_id to sections from same document
    document_id_to_citation_id: dict[str, int] = {}
    citation_mapping: dict[int, str] = {}
    current_citation_id = citation_start

    # First pass: assign citation_ids to unique document_ids
    for section in top_sections:
        document_id = section.center_chunk.document_id
        if document_id not in document_id_to_citation_id:
            document_id_to_citation_id[document_id] = current_citation_id
            citation_mapping[current_citation_id] = document_id
            current_citation_id += 1

    # Second pass: build results with citation_ids assigned per document
    results = []

    for section in top_sections:
        chunk = section.center_chunk
        document_id = chunk.document_id
        citation_id = document_id_to_citation_id[document_id]

        # Combine primary and secondary owners for authors
        authors = None
        if chunk.primary_owners or chunk.secondary_owners:
            authors = []
            if chunk.primary_owners:
                authors.extend(chunk.primary_owners)
            if chunk.secondary_owners:
                authors.extend(chunk.secondary_owners)

        # Format updated_at as ISO string if available
        updated_at_str = None
        if chunk.updated_at:
            updated_at_str = chunk.updated_at.isoformat()

        # Build result dictionary in desired order, only including non-None/empty fields
        result = {
            "document": citation_id,
            "title": chunk.semantic_identifier,
        }
        if updated_at_str is not None:
            result["updated_at"] = updated_at_str
        if authors is not None:
            result["authors"] = authors
        if include_source_type:
            result["source_type"] = chunk.source_type.value
        if include_link:
            # Get the first link from the center chunk's source_links dict
            link = None
            if chunk.source_links:
                # source_links is dict[int, str], get the first value
                link = next(iter(chunk.source_links.values()), None)
            if link:
                result["url"] = link
        if include_document_id:
            result["document_identifier"] = chunk.document_id
        if chunk.metadata:
            result["metadata"] = json.dumps(chunk.metadata, ensure_ascii=False)
        result["content"] = section.combined_content
        results.append(result)

    return (
        json.dumps({"results": results}, indent=2, ensure_ascii=False),
        citation_mapping,
    )


================================================
FILE: backend/onyx/tools/tool_implementations/web_search/clients/brave_client.py
================================================
from __future__ import annotations

from typing import Any

import requests
from fastapi import HTTPException

from onyx.tools.tool_implementations.web_search.models import (
    WebSearchProvider,
)
from onyx.tools.tool_implementations.web_search.models import WebSearchResult
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()

BRAVE_WEB_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
BRAVE_MAX_RESULTS_PER_REQUEST = 20
BRAVE_SAFESEARCH_OPTIONS = {"off", "moderate", "strict"}
BRAVE_FRESHNESS_OPTIONS = {"pd", "pw", "pm", "py"}


class RetryableBraveSearchError(Exception):
    """Error type used to trigger retry for transient Brave search failures."""


class BraveClient(WebSearchProvider):
    def __init__(
        self,
        api_key: str,
        *,
        num_results: int = 10,
        timeout_seconds: int = 10,
        country: str | None = None,
        search_lang: str | None = None,
        ui_lang: str | None = None,
        safesearch: str | None = None,
        freshness: str | None = None,
    ) -> None:
        if timeout_seconds <= 0:
            raise ValueError("Brave provider config 'timeout_seconds' must be > 0.")

        self._headers = {
            "Accept": "application/json",
            "X-Subscription-Token": api_key,
        }
        logger.debug(f"Count of results passed to BraveClient: {num_results}")
        self._num_results = max(1, min(num_results, BRAVE_MAX_RESULTS_PER_REQUEST))
        self._timeout_seconds = timeout_seconds
        self._country = _normalize_country(country)
        self._search_lang = _normalize_language_code(
            search_lang, field_name="search_lang"
        )
        self._ui_lang = _normalize_language_code(ui_lang, field_name="ui_lang")
        self._safesearch = _normalize_option(
            safesearch,
            field_name="safesearch",
            allowed_values=BRAVE_SAFESEARCH_OPTIONS,
        )
        self._freshness = _normalize_option(
            freshness,
            field_name="freshness",
            allowed_values=BRAVE_FRESHNESS_OPTIONS,
        )

    def _build_search_params(self, query: str) -> dict[str, str]:
        params = {
            "q": query,
            "count": str(self._num_results),
        }
        if self._country:
            params["country"] = self._country
        if self._search_lang:
            params["search_lang"] = self._search_lang
        if self._ui_lang:
            params["ui_lang"] = self._ui_lang
        if self._safesearch:
            params["safesearch"] = self._safesearch
        if self._freshness:
            params["freshness"] = self._freshness
        return params

    @retry_builder(
        tries=3,
        delay=1,
        backoff=2,
        exceptions=(RetryableBraveSearchError,),
    )
    def _search_with_retries(self, query: str) -> list[WebSearchResult]:
        params = self._build_search_params(query)

        try:
            response = requests.get(
                BRAVE_WEB_SEARCH_URL,
                headers=self._headers,
                params=params,
                timeout=self._timeout_seconds,
            )
        except requests.RequestException as exc:
            raise RetryableBraveSearchError(
                f"Brave search request failed: {exc}"
            ) from exc

        try:
            response.raise_for_status()
        except requests.HTTPError as exc:
            error_msg = _build_error_message(response)
            if _is_retryable_status(response.status_code):
                raise RetryableBraveSearchError(error_msg) from exc
            raise ValueError(error_msg) from exc

        data = response.json()
        web_results = (data.get("web") or {}).get("results") or []

        results: list[WebSearchResult] = []
        for result in web_results:
            if not isinstance(result, dict):
                continue

            link = _clean_string(result.get("url"))
            if not link:
                continue

            title = _clean_string(result.get("title"))
            description = _clean_string(result.get("description"))

            results.append(
                WebSearchResult(
                    title=title,
                    link=link,
                    snippet=description,
                    author=None,
                    published_date=None,
                )
            )

        return results

    def search(self, query: str) -> list[WebSearchResult]:
        try:
            return self._search_with_retries(query)
        except RetryableBraveSearchError as exc:
            raise ValueError(str(exc)) from exc

    def test_connection(self) -> dict[str, str]:
        try:
            test_results = self.search("test")
            if not test_results or not any(result.link for result in test_results):
                raise HTTPException(
                    status_code=400,
                    detail="Brave API key validation failed: search returned no results.",
                )
        except HTTPException:
            raise
        except (ValueError, requests.RequestException) as e:
            error_msg = str(e)
            lower = error_msg.lower()
            if (
                "status 401" in lower
                or "status 403" in lower
                or "api key" in lower
                or "auth" in lower
            ):
                raise HTTPException(
                    status_code=400,
                    detail=f"Invalid Brave API key: {error_msg}",
                ) from e
            if "status 429" in lower or "rate limit" in lower:
                raise HTTPException(
                    status_code=400,
                    detail=f"Brave API rate limit exceeded: {error_msg}",
                ) from e
            raise HTTPException(
                status_code=400,
                detail=f"Brave API key validation failed: {error_msg}",
            ) from e

        logger.info("Web search provider test succeeded for Brave.")
        return {"status": "ok"}


def _build_error_message(response: requests.Response) -> str:
    return f"Brave search failed (status {response.status_code}): {_extract_error_detail(response)}"


def _extract_error_detail(response: requests.Response) -> str:
    try:
        payload: Any = response.json()
    except Exception:
        text = response.text.strip()
        return text[:200] if text else "No error details"

    if isinstance(payload, dict):
        error = payload.get("error")
        if isinstance(error, dict):
            detail = error.get("detail") or error.get("message")
            if isinstance(detail, str):
                return detail
        if isinstance(error, str):
            return error

        message = payload.get("message")
        if isinstance(message, str):
            return message

    return str(payload)[:200]


def _is_retryable_status(status_code: int) -> bool:
    return status_code == 429 or status_code >= 500


def _clean_string(value: Any) -> str:
    return value.strip() if isinstance(value, str) else ""


def _normalize_country(country: str | None) -> str | None:
    if country is None:
        return None
    normalized = country.strip().upper()
    if not normalized:
        return None
    if len(normalized) != 2 or not normalized.isalpha():
        raise ValueError(
            "Brave provider config 'country' must be a 2-letter ISO country code."
        )
    return normalized


def _normalize_language_code(value: str | None, *, field_name: str) -> str | None:
    if value is None:
        return None
    normalized = value.strip()
    if not normalized:
        return None
    if len(normalized) > 20:
        raise ValueError(f"Brave provider config '{field_name}' is too long.")
    return normalized


def _normalize_option(
    value: str | None,
    *,
    field_name: str,
    allowed_values: set[str],
) -> str | None:
    if value is None:
        return None
    normalized = value.strip().lower()
    if not normalized:
        return None
    if normalized not in allowed_values:
        allowed = ", ".join(sorted(allowed_values))
        raise ValueError(
            f"Brave provider config '{field_name}' must be one of: {allowed}."
        )
    return normalized


================================================
FILE: backend/onyx/tools/tool_implementations/web_search/clients/exa_client.py
================================================
import re
from collections.abc import Sequence
from typing import Any

import requests
from exa_py import Exa
from exa_py.api import HighlightsContentsOptions
from fastapi import HTTPException

from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.tools.tool_implementations.open_url.models import WebContent
from onyx.tools.tool_implementations.open_url.models import WebContentProvider
from onyx.tools.tool_implementations.web_search.models import (
    WebSearchProvider,
)
from onyx.tools.tool_implementations.web_search.models import (
    WebSearchResult,
)
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()

# 1 minute timeout for Exa API requests to prevent indefinite hangs
EXA_REQUEST_TIMEOUT_SECONDS = 60


class ExaWithTimeout(Exa):
    """Exa client subclass that adds timeout support to HTTP requests.

    The base Exa SDK uses requests without timeout, which can cause indefinite hangs.
    This subclass overrides the request method to add a configurable timeout.
    """

    def __init__(
        self,
        api_key: str,
        timeout_seconds: int = EXA_REQUEST_TIMEOUT_SECONDS,
    ) -> None:
        super().__init__(api_key=api_key)
        self._timeout_seconds = timeout_seconds

    def request(
        self,
        endpoint: str,
        data: dict[str, Any] | str | None = None,
        method: str = "POST",
        params: dict[str, Any] | None = None,
        headers: dict[str, str] | None = None,
    ) -> dict[str, Any] | requests.Response:
        """Override request method to add timeout support."""
        url = f"{self.base_url}/{endpoint}"
        final_headers = {**self.headers, **(headers or {})}

        if method == "GET":
            response = requests.get(
                url,
                headers=final_headers,
                params=params,
                timeout=self._timeout_seconds,
            )
        elif method == "POST":
            response = requests.post(
                url,
                headers=final_headers,
                json=data,
                params=params,
                timeout=self._timeout_seconds,
            )
        elif method == "PATCH":
            response = requests.patch(
                url,
                headers=final_headers,
                json=data,
                params=params,
                timeout=self._timeout_seconds,
            )
        elif method == "DELETE":
            response = requests.delete(
                url,
                headers=final_headers,
                params=params,
                timeout=self._timeout_seconds,
            )
        else:
            raise ValueError(f"Unsupported HTTP method: {method}")

        response.raise_for_status()
        return response.json()


def _extract_site_operators(query: str) -> tuple[str, list[str]]:
    """Extract site: operators and return cleaned query + full domains.

    Returns (cleaned_query, full_domains) where full_domains contains the full
    values after site: (e.g., ["reddit.com/r/leagueoflegends"]).
    """
    full_domains = re.findall(r"site:\s*([^\s]+)", query, re.IGNORECASE)
    cleaned_query = re.sub(r"site:\s*\S+\s*", "", query, flags=re.IGNORECASE).strip()

    if not cleaned_query and full_domains:
        cleaned_query = full_domains[0]

    return cleaned_query, full_domains


class ExaClient(WebSearchProvider, WebContentProvider):
    def __init__(self, api_key: str, num_results: int = 10) -> None:
        self.exa = ExaWithTimeout(api_key=api_key)
        self._num_results = num_results

    @property
    def supports_site_filter(self) -> bool:
        return False

    def _search_exa(
        self, query: str, include_domains: list[str] | None = None
    ) -> list[WebSearchResult]:
        response = self.exa.search_and_contents(
            query,
            type="auto",
            highlights=HighlightsContentsOptions(
                num_sentences=2,
                highlights_per_url=1,
            ),
            num_results=self._num_results,
            include_domains=include_domains,
        )

        results: list[WebSearchResult] = []
        for result in response.results:
            title = (result.title or "").strip()
            # library type stub issue
            snippet = (result.highlights[0] if result.highlights else "").strip()
            results.append(
                WebSearchResult(
                    title=title,
                    link=result.url,
                    snippet=snippet,
                    author=result.author,
                    published_date=(
                        time_str_to_utc(result.published_date)
                        if result.published_date
                        else None
                    ),
                )
            )

        return results

    @retry_builder(tries=3, delay=1, backoff=2)
    def search(self, query: str) -> list[WebSearchResult]:
        cleaned_query, full_domains = _extract_site_operators(query)

        if full_domains:
            # Try with include_domains using base domains (e.g., ["reddit.com"])
            base_domains = [d.split("/")[0].removeprefix("www.") for d in full_domains]
            results = self._search_exa(cleaned_query, include_domains=base_domains)
            if results:
                return results

        # Fallback: add full domains as keywords
        query_with_domains = f"{cleaned_query} {' '.join(full_domains)}".strip()
        return self._search_exa(query_with_domains)

    def test_connection(self) -> dict[str, str]:
        try:
            test_results = self.search("test")
            if not test_results or not any(result.link for result in test_results):
                raise HTTPException(
                    status_code=400,
                    detail="API key validation failed: search returned no results.",
                )
        except HTTPException:
            raise
        except Exception as e:
            error_msg = str(e)
            if (
                "api" in error_msg.lower()
                or "key" in error_msg.lower()
                or "auth" in error_msg.lower()
            ):
                raise HTTPException(
                    status_code=400,
                    detail=f"Invalid Exa API key: {error_msg}",
                ) from e
            raise HTTPException(
                status_code=400,
                detail=f"Exa API key validation failed: {error_msg}",
            ) from e

        logger.info("Web search provider test succeeded for Exa.")
        return {"status": "ok"}

    @retry_builder(tries=3, delay=1, backoff=2)
    def contents(self, urls: Sequence[str]) -> list[WebContent]:
        response = self.exa.get_contents(
            urls=list(urls),
            text=True,
            livecrawl="preferred",
        )

        # Exa can return partial/empty content entries; skip those to avoid
        # downstream prompt + UI pollution.
        contents: list[WebContent] = []
        for result in response.results:
            title = (result.title or "").strip()
            full_content = (result.text or "").strip()
            contents.append(
                WebContent(
                    title=title,
                    link=result.url,
                    full_content=full_content,
                    published_date=(
                        time_str_to_utc(result.published_date)
                        if result.published_date
                        else None
                    ),
                    scrape_successful=bool(full_content),
                )
            )

        return contents


================================================
FILE: backend/onyx/tools/tool_implementations/web_search/clients/google_pse_client.py
================================================
from __future__ import annotations

from datetime import datetime
from typing import Any

import requests
from fastapi import HTTPException

from onyx.tools.tool_implementations.web_search.models import (
    WebSearchProvider,
)
from onyx.tools.tool_implementations.web_search.models import WebSearchResult
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()

GOOGLE_CUSTOM_SEARCH_URL = "https://customsearch.googleapis.com/customsearch/v1"


class GooglePSEClient(WebSearchProvider):
    def __init__(
        self,
        api_key: str,
        search_engine_id: str,
        *,
        num_results: int = 10,
        timeout_seconds: int = 10,
    ) -> None:
        self._api_key = api_key
        self._search_engine_id = search_engine_id
        self._num_results = min(num_results, 10)  # Google API max is 10
        self._timeout_seconds = timeout_seconds

    @retry_builder(tries=3, delay=1, backoff=2)
    def search(self, query: str) -> list[WebSearchResult]:
        params: dict[str, str] = {
            "key": self._api_key,
            "cx": self._search_engine_id,
            "q": query,
            "num": str(self._num_results),
        }

        response = requests.get(
            GOOGLE_CUSTOM_SEARCH_URL, params=params, timeout=self._timeout_seconds
        )

        # Check for HTTP errors first
        try:
            response.raise_for_status()
        except requests.HTTPError as exc:
            status = response.status_code
            error_detail = "Unknown error"
            try:
                error_data = response.json()
                if "error" in error_data:
                    error_info = error_data["error"]
                    error_detail = error_info.get("message", str(error_info))
            except Exception:
                error_detail = (
                    response.text[:200] if response.text else "No error details"
                )

            raise ValueError(
                f"Google PSE search failed (status {status}): {error_detail}"
            ) from exc

        data = response.json()

        # Google Custom Search API can return errors in the response body even with 200 status
        if "error" in data:
            error_info = data["error"]
            error_message = error_info.get("message", "Unknown error")
            error_code = error_info.get("code", "Unknown")
            raise ValueError(f"Google PSE API error ({error_code}): {error_message}")

        items: list[dict[str, Any]] = data.get("items", [])
        results: list[WebSearchResult] = []

        for item in items:
            link = item.get("link")
            if not link:
                continue

            snippet = item.get("snippet") or ""

            # Attempt to extract metadata if available
            pagemap = item.get("pagemap") or {}
            metatags = pagemap.get("metatags", [])
            published_date: datetime | None = None
            author: str | None = None

            if metatags:
                meta = metatags[0]
                author = meta.get("og:site_name") or meta.get("author")
                published_str = (
                    meta.get("article:published_time")
                    or meta.get("og:updated_time")
                    or meta.get("date")
                )
                if published_str:
                    try:
                        published_date = datetime.fromisoformat(
                            published_str.replace("Z", "+00:00")
                        )
                    except ValueError:
                        logger.debug(
                            f"Failed to parse published_date '{published_str}' for link {link}"
                        )
                        published_date = None

            results.append(
                WebSearchResult(
                    title=item.get("title") or "",
                    link=link,
                    snippet=snippet,
                    author=author,
                    published_date=published_date,
                )
            )

        return results

    # TODO: I'm not really satisfied with how tailored this is to the particulars of Google PSE.
    # In particular, I think this might flatten errors that are caused by the API key vs. ones caused
    # by the search engine ID, or by other factors.
    # I (David Edelstein) don't feel knowledgeable enough about the return behavior of the Google PSE API
    # to ensure that we have nicely descriptive and actionable error messages. (Like, what's up with the
    # thing where 200 status codes can have error messages in the response body?)
    def test_connection(self) -> dict[str, str]:
        try:
            test_results = self.search("test")
            if not test_results or not any(result.link for result in test_results):
                raise HTTPException(
                    status_code=400,
                    detail="Google PSE validation failed: search returned no results.",
                )
        except HTTPException:
            raise
        except Exception as e:
            error_msg = str(e)
            if (
                "api" in error_msg.lower()
                or "key" in error_msg.lower()
                or "auth" in error_msg.lower()
            ):
                raise HTTPException(
                    status_code=400,
                    detail=f"Invalid Google PSE API key: {error_msg}",
                ) from e
            raise HTTPException(
                status_code=400,
                detail=f"Google PSE validation failed: {error_msg}",
            ) from e

        logger.info("Web search provider test succeeded for Google PSE.")
        return {"status": "ok"}


================================================
FILE: backend/onyx/tools/tool_implementations/web_search/clients/searxng_client.py
================================================
import requests
from fastapi import HTTPException

from onyx.tools.tool_implementations.web_search.models import (
    WebSearchProvider,
)
from onyx.tools.tool_implementations.web_search.models import (
    WebSearchResult,
)
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()


class SearXNGClient(WebSearchProvider):
    def __init__(
        self,
        searxng_base_url: str,
        num_results: int = 10,
    ) -> None:
        logger.debug(f"Initializing SearXNGClient with base URL: {searxng_base_url}")
        self._searxng_base_url = searxng_base_url
        self._num_results = num_results

    @retry_builder(tries=3, delay=1, backoff=2)
    def search(self, query: str) -> list[WebSearchResult]:
        payload = {
            "q": query,
            "format": "json",
        }
        logger.debug(
            f"Searching with payload: {payload} to {self._searxng_base_url}/search"
        )
        response = requests.post(
            f"{self._searxng_base_url}/search",
            data=payload,
        )
        response.raise_for_status()

        results = response.json()
        result_list = results.get("results", [])
        # SearXNG doesn't support limiting results via API parameters,
        # so we limit client-side after receiving the response
        limited_results = result_list[: self._num_results]
        return [
            WebSearchResult(
                title=result["title"],
                link=result["url"],
                snippet=result["content"],
            )
            for result in limited_results
        ]

    def test_connection(self) -> dict[str, str]:
        try:
            logger.debug(f"Testing connection to {self._searxng_base_url}/config")
            response = requests.get(f"{self._searxng_base_url}/config")
            logger.debug(f"Response: {response.status_code}, text: {response.text}")
            response.raise_for_status()
        except requests.HTTPError as e:
            status_code = e.response.status_code
            logger.debug(
                f"HTTPError: status_code={status_code}, e.response={e.response.status_code if e.response else None}, error={e}"
            )
            if status_code == 429:
                raise HTTPException(
                    status_code=400,
                    detail=(
                        "This SearXNG instance does not allow API requests. "
                        "Use a private instance and configure it to allow bots."
                    ),
                ) from e
            elif status_code == 404:
                raise HTTPException(
                    status_code=400,
                    detail="This SearXNG instance was not found. Please check the URL and try again.",
                ) from e
            else:
                raise HTTPException(
                    status_code=400,
                    detail=f"SearXNG connection failed (status {status_code}): {str(e)}",
                ) from e

        # Not a sure way to check if this is a SearXNG instance as opposed to some other website that
        # happens to have a /config endpoint containing a "brand" key with a "GIT_URL" key with value
        # "https://github.com/searxng/searxng". I don't think that would happen by coincidence, so I
        # think this is a good enough check for now. I'm open for suggestions on improvements.
        config = response.json()
        if (
            config.get("brand", {}).get("GIT_URL")
            != "https://github.com/searxng/searxng"
        ):
            raise HTTPException(
                status_code=400,
                detail="This does not appear to be a SearXNG instance. Please check the URL and try again.",
            )

        # Test that JSON mode is enabled by performing a simple search
        self._test_json_mode()

        logger.info("Web search provider test succeeded for SearXNG.")
        return {"status": "ok"}

    def _test_json_mode(self) -> None:
        """Test that JSON format is enabled in SearXNG settings.

        SearXNG requires JSON format to be explicitly enabled in settings.yml.
        If it's not enabled, the search endpoint returns a 403.
        """
        try:
            payload = {
                "q": "test",
                "format": "json",
            }
            response = requests.post(
                f"{self._searxng_base_url}/search",
                data=payload,
                timeout=5,
            )
            response.raise_for_status()
        except requests.HTTPError as e:
            status_code = e.response.status_code if e.response is not None else None
            if status_code == 403:
                raise HTTPException(
                    status_code=400,
                    detail=(
                        "Got a 403 response when trying to reach SearXNG. This likely means that "
                        "JSON format is not enabled on this SearXNG instance. "
                        "Please enable JSON format in your SearXNG settings.yml file by adding "
                        "'json' to the 'search.formats' list."
                    ),
                ) from e
            raise HTTPException(
                status_code=400,
                detail=f"Failed to test search on SearXNG instance (status {status_code}): {str(e)}",
            ) from e


================================================
FILE: backend/onyx/tools/tool_implementations/web_search/clients/serper_client.py
================================================
import json
from collections.abc import Sequence
from concurrent.futures import ThreadPoolExecutor

import requests
from fastapi import HTTPException

from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.tools.tool_implementations.open_url.models import WebContent
from onyx.tools.tool_implementations.open_url.models import WebContentProvider
from onyx.tools.tool_implementations.web_search.models import (
    WebSearchProvider,
)
from onyx.tools.tool_implementations.web_search.models import (
    WebSearchResult,
)
from onyx.utils.logger import setup_logger
from onyx.utils.retry_wrapper import retry_builder

logger = setup_logger()

SERPER_SEARCH_URL = "https://google.serper.dev/search"
SERPER_CONTENTS_URL = "https://scrape.serper.dev"

# 1 minute timeout for Serper API requests to prevent indefinite hangs
SERPER_REQUEST_TIMEOUT_SECONDS = 60


class SerperClient(WebSearchProvider, WebContentProvider):
    def __init__(self, api_key: str, num_results: int = 10) -> None:
        self.headers = {
            "X-API-KEY": api_key,
            "Content-Type": "application/json",
        }
        self._num_results = num_results

    @retry_builder(tries=3, delay=1, backoff=2)
    def search(self, query: str) -> list[WebSearchResult]:
        payload = {
            "q": query,
            "num": self._num_results,
        }

        response = requests.post(
            SERPER_SEARCH_URL,
            headers=self.headers,
            data=json.dumps(payload),
            timeout=SERPER_REQUEST_TIMEOUT_SECONDS,
        )

        response.raise_for_status()

        results = response.json()
        organic_results = results.get("organic") or []

        validated_results: list[WebSearchResult] = []
        for result in organic_results:
            link = (result.get("link") or "").strip()
            if not link:
                continue

            title = (result.get("title") or "").strip()
            snippet = (result.get("snippet") or "").strip()

            validated_results.append(
                WebSearchResult(
                    title=title,
                    link=link,
                    snippet=snippet,
                    author=None,
                    published_date=None,
                )
            )

        return validated_results

    def test_connection(self) -> dict[str, str]:
        try:
            test_results = self.search("test")
            if not test_results or not any(result.link for result in test_results):
                raise HTTPException(
                    status_code=400,
                    detail="API key validation failed: search returned no results.",
                )
        except HTTPException:
            raise
        except Exception as e:
            error_msg = str(e)
            if (
                "api" in error_msg.lower()
                or "key" in error_msg.lower()
                or "auth" in error_msg.lower()
            ):
                raise HTTPException(
                    status_code=400,
                    detail=f"Invalid Serper API key: {error_msg}",
                ) from e
            raise HTTPException(
                status_code=400,
                detail=f"Serper API key validation failed: {error_msg}",
            ) from e

        logger.info("Web search provider test succeeded for Serper.")
        return {"status": "ok"}

    def contents(self, urls: Sequence[str]) -> list[WebContent]:
        if not urls:
            return []

        # Serper can responds with 500s regularly. We want to retry,
        # but in the event of failure, return an unsuccesful scrape.
        def safe_get_webpage_content(url: str) -> WebContent:
            try:
                return self._get_webpage_content(url)
            except Exception:
                return WebContent(
                    title="",
                    link=url,
                    full_content="",
                    published_date=None,
                    scrape_successful=False,
                )

        with ThreadPoolExecutor(max_workers=min(8, len(urls))) as e:
            return list(e.map(safe_get_webpage_content, urls))

    @retry_builder(tries=3, delay=1, backoff=2)
    def _get_webpage_content(self, url: str) -> WebContent:
        payload = {
            "url": url,
        }

        response = requests.post(
            SERPER_CONTENTS_URL,
            headers=self.headers,
            data=json.dumps(payload),
            timeout=SERPER_REQUEST_TIMEOUT_SECONDS,
        )

        # 400 returned when serper cannot scrape
        if response.status_code == 400:
            return WebContent(
                title="",
                link=url,
                full_content="",
                published_date=None,
                scrape_successful=False,
            )

        response.raise_for_status()

        response_json = response.json()

        # Response only guarantees text
        text = response_json["text"]

        # metadata & jsonld is not guaranteed to be present
        metadata = response_json.get("metadata", {})
        jsonld = response_json.get("jsonld", {})

        title = extract_title_from_metadata(metadata)

        # Serper does not provide a reliable mechanism to extract the url
        response_url = url
        published_date_str = extract_published_date_from_jsonld(jsonld)
        published_date = None

        if published_date_str:
            try:
                published_date = time_str_to_utc(published_date_str)
            except Exception:
                published_date = None

        return WebContent(
            title=title or "",
            link=response_url,
            full_content=text or "",
            published_date=published_date,
        )


def extract_title_from_metadata(metadata: dict[str, str]) -> str | None:
    keys = ["title", "og:title"]
    return extract_value_from_dict(metadata, keys)


def extract_published_date_from_jsonld(jsonld: dict[str, str]) -> str | None:
    keys = ["dateModified"]
    return extract_value_from_dict(jsonld, keys)


def extract_value_from_dict(data: dict[str, str], keys: list[str]) -> str | None:
    for key in keys:
        if key in data:
            return data[key]
    return None


================================================
FILE: backend/onyx/tools/tool_implementations/web_search/models.py
================================================
from abc import abstractmethod
from collections.abc import Sequence
from datetime import datetime
from enum import Enum

from pydantic import BaseModel
from pydantic import field_validator

from onyx.utils.url import normalize_url

# Fairly loose number but assuming LLMs can easily handle this amount of context
# Approximately 2 pages of google search results
# This is the cap for both when the tool is running a single search and when running multiple queries in parallel
DEFAULT_MAX_RESULTS = 20

WEB_SEARCH_PREFIX = "WEB_SEARCH_DOC_"


class ProviderType(Enum):
    """Enum for internet search provider types"""

    GOOGLE = "google"
    EXA = "exa"


class WebSearchResult(BaseModel):
    title: str
    link: str
    snippet: str
    author: str | None = None
    published_date: datetime | None = None

    @field_validator("link")
    @classmethod
    def normalize_link(cls, v: str) -> str:
        return normalize_url(v)


class WebSearchProvider:
    @property
    def supports_site_filter(self) -> bool:
        """Whether this provider supports the site: operator in queries.
        Override in subclasses that don't support it.
        """
        return True

    @abstractmethod
    def search(self, query: str) -> Sequence[WebSearchResult]:
        pass

    @abstractmethod
    def test_connection(self) -> dict[str, str]:
        pass


class WebContentProviderConfig(BaseModel):
    timeout_seconds: int | None = None
    base_url: str | None = None


================================================
FILE: backend/onyx/tools/tool_implementations/web_search/providers.py
================================================
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import InternetSearchProvider
from onyx.db.web_search import fetch_active_web_content_provider
from onyx.db.web_search import fetch_active_web_search_provider
from onyx.tools.tool_implementations.open_url.firecrawl import FirecrawlClient
from onyx.tools.tool_implementations.open_url.models import (
    WebContentProvider,
)
from onyx.tools.tool_implementations.open_url.onyx_web_crawler import (
    DEFAULT_MAX_HTML_SIZE_BYTES,
)
from onyx.tools.tool_implementations.open_url.onyx_web_crawler import (
    DEFAULT_MAX_PDF_SIZE_BYTES,
)
from onyx.tools.tool_implementations.open_url.onyx_web_crawler import OnyxWebCrawler
from onyx.tools.tool_implementations.web_search.clients.brave_client import (
    BraveClient,
)
from onyx.tools.tool_implementations.web_search.clients.exa_client import (
    ExaClient,
)
from onyx.tools.tool_implementations.web_search.clients.google_pse_client import (
    GooglePSEClient,
)
from onyx.tools.tool_implementations.web_search.clients.searxng_client import (
    SearXNGClient,
)
from onyx.tools.tool_implementations.web_search.clients.serper_client import (
    SerperClient,
)
from onyx.tools.tool_implementations.web_search.models import DEFAULT_MAX_RESULTS
from onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig
from onyx.tools.tool_implementations.web_search.models import WebSearchProvider
from onyx.utils.logger import setup_logger
from shared_configs.enums import WebContentProviderType
from shared_configs.enums import WebSearchProviderType

logger = setup_logger()


def _parse_positive_int_config(
    *,
    raw_value: str | None,
    default: int,
    provider_name: str,
    config_key: str,
) -> int:
    if not raw_value:
        return default
    try:
        value = int(raw_value)
    except ValueError as exc:
        raise ValueError(
            f"{provider_name} provider config '{config_key}' must be an integer."
        ) from exc
    if value <= 0:
        raise ValueError(
            f"{provider_name} provider config '{config_key}' must be greater than 0."
        )
    return value


def provider_requires_api_key(provider_type: WebSearchProviderType) -> bool:
    """Return True if the given provider type requires an API key.
    This list is most likely just going to contain SEARXNG. The way it works is that it uses public search engines that do not
    require an API key. You can also set it up in a way which requires a key but SearXNG itself does not require a key.
    """
    return provider_type != WebSearchProviderType.SEARXNG


def build_search_provider_from_config(
    provider_type: WebSearchProviderType,
    api_key: str | None,
    config: dict[str, str] | None,  # TODO use a typed object
) -> WebSearchProvider:
    config = config or {}
    num_results = int(config.get("num_results") or DEFAULT_MAX_RESULTS)

    # SearXNG does not require an API key
    if provider_type == WebSearchProviderType.SEARXNG:
        searxng_base_url = config.get("searxng_base_url")
        if not searxng_base_url:
            raise ValueError("Please provide a URL for your private SearXNG instance.")
        return SearXNGClient(
            searxng_base_url,
            num_results=num_results,
        )

    # All other providers require an API key
    if not api_key:
        raise ValueError(f"API key is required for {provider_type.value} provider.")

    if provider_type == WebSearchProviderType.EXA:
        return ExaClient(api_key=api_key, num_results=num_results)
    if provider_type == WebSearchProviderType.BRAVE:
        return BraveClient(
            api_key=api_key,
            num_results=num_results,
            timeout_seconds=_parse_positive_int_config(
                raw_value=config.get("timeout_seconds"),
                default=10,
                provider_name="Brave",
                config_key="timeout_seconds",
            ),
            country=config.get("country"),
            search_lang=config.get("search_lang"),
            ui_lang=config.get("ui_lang"),
            safesearch=config.get("safesearch"),
            freshness=config.get("freshness"),
        )
    if provider_type == WebSearchProviderType.SERPER:
        return SerperClient(api_key=api_key, num_results=num_results)
    if provider_type == WebSearchProviderType.GOOGLE_PSE:
        search_engine_id = (
            config.get("search_engine_id")
            or config.get("cx")
            or config.get("search_engine")
        )
        if not search_engine_id:
            raise ValueError(
                "Google PSE provider requires a search engine id (cx) in addition to the API key."
            )
        return GooglePSEClient(
            api_key=api_key,
            search_engine_id=search_engine_id,
            num_results=num_results,
            timeout_seconds=int(config.get("timeout_seconds") or 10),
        )

    raise ValueError(f"Unknown provider type: {provider_type.value}")


def _build_search_provider(provider_model: InternetSearchProvider) -> WebSearchProvider:
    return build_search_provider_from_config(
        provider_type=WebSearchProviderType(provider_model.provider_type),
        api_key=(
            provider_model.api_key.get_value(apply_mask=False)
            if provider_model.api_key
            else None
        ),
        config=provider_model.config or {},
    )


def build_content_provider_from_config(
    *,
    provider_type: WebContentProviderType,
    api_key: str,
    config: WebContentProviderConfig,
) -> WebContentProvider | None:
    if provider_type == WebContentProviderType.ONYX_WEB_CRAWLER:
        if config.timeout_seconds is not None:
            return OnyxWebCrawler(
                timeout_seconds=config.timeout_seconds,
                max_pdf_size_bytes=DEFAULT_MAX_PDF_SIZE_BYTES,
                max_html_size_bytes=DEFAULT_MAX_HTML_SIZE_BYTES,
            )
        return OnyxWebCrawler(
            max_pdf_size_bytes=DEFAULT_MAX_PDF_SIZE_BYTES,
            max_html_size_bytes=DEFAULT_MAX_HTML_SIZE_BYTES,
        )

    if provider_type == WebContentProviderType.FIRECRAWL:
        if config.base_url is None:
            raise ValueError("Firecrawl content provider requires a base URL.")
        if config.timeout_seconds is None:
            return FirecrawlClient(api_key=api_key, base_url=config.base_url)
        return FirecrawlClient(
            api_key=api_key,
            base_url=config.base_url,
            timeout_seconds=config.timeout_seconds,
        )

    if provider_type == WebContentProviderType.EXA:
        return ExaClient(api_key=api_key)


def get_default_provider() -> WebSearchProvider | None:
    with get_session_with_current_tenant() as db_session:
        provider_model = fetch_active_web_search_provider(db_session)
        if provider_model is None:
            return None
        return _build_search_provider(provider_model)


def get_default_content_provider() -> WebContentProvider:
    with get_session_with_current_tenant() as db_session:
        provider_model = fetch_active_web_content_provider(db_session)
        if provider_model:
            provider = build_content_provider_from_config(
                provider_type=WebContentProviderType(provider_model.provider_type),
                api_key=(
                    provider_model.api_key.get_value(apply_mask=False)
                    if provider_model.api_key
                    else ""
                ),
                config=provider_model.config or WebContentProviderConfig(),
            )
            if provider:
                return provider

    return OnyxWebCrawler(
        max_pdf_size_bytes=DEFAULT_MAX_PDF_SIZE_BYTES,
        max_html_size_bytes=DEFAULT_MAX_HTML_SIZE_BYTES,
    )


================================================
FILE: backend/onyx/tools/tool_implementations/web_search/utils.py
================================================
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import SearchDoc
from onyx.tools.tool_implementations.open_url.models import WebContent
from onyx.tools.tool_implementations.open_url.snippet_matcher import (
    find_snippet_in_content,
)
from onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX
from onyx.tools.tool_implementations.web_search.models import WebSearchResult


TRUNCATED_CONTENT_SUFFIX = " [...truncated]"
TRUNCATED_CONTENT_PREFIX = "[...truncated] "

MAX_CHARS_PER_URL = 15000


def filter_web_search_results_with_no_title_or_snippet(
    results: list[WebSearchResult],
) -> list[WebSearchResult]:
    """Filter out results that have neither a title nor a snippet.

    Some providers can return entries that only include a URL. Downstream uses
    titles/snippets for display and prompting, so we drop those empty entries
    centrally (rather than duplicating the check in each client).
    """
    filtered: list[WebSearchResult] = []
    for result in results:
        if result.title.strip() or result.snippet.strip():
            filtered.append(result)
    return filtered


def truncate_search_result_content(
    content: str, max_chars: int = MAX_CHARS_PER_URL
) -> str:
    """Truncate search result content to a maximum number of characters"""
    if len(content) <= max_chars:
        return content
    return content[:max_chars] + TRUNCATED_CONTENT_SUFFIX


def _truncate_content_around_snippet(
    content: str, snippet: str, max_chars: int = MAX_CHARS_PER_URL
) -> str:
    """
    Truncates content around snippet with max_chars

    Assumes snippet exists
    """
    result = find_snippet_in_content(content, snippet)

    if not result.snippet_located:
        return ""

    start_idx = result.start_idx
    end_idx = result.end_idx

    new_start, new_end = _expand_range_centered(
        start_idx, end_idx + 1, len(content), max_chars
    )

    truncated_content = content[new_start:new_end]

    # Add the AFFIX to the start and end of truncated content
    if new_start > 0:
        truncated_content = TRUNCATED_CONTENT_PREFIX + truncated_content

    if new_end < len(content):
        truncated_content = truncated_content + TRUNCATED_CONTENT_SUFFIX

    return truncated_content


def _expand_range_centered(
    start_idx: int, end_idx: int, N: int, target_size: int
) -> tuple[int, int]:
    """
    Expands a range [start_idx, end_idx) to be centered within a list of size N

    Args:
        start_idx: Starting index (inclusive)
        end_idx: Ending index (exclusive)
        N: Size of the list
        target_size: Target size of the range

    Returns:
        Tuple of (new start index, new end index)
    """
    current_size = end_idx - start_idx

    if current_size >= target_size:
        return start_idx, end_idx

    padding_needed = target_size - current_size
    padding_top = padding_needed // 2
    padding_bottom = padding_needed - padding_top

    # Try expand symmetrically
    new_start = start_idx - padding_top
    new_end = end_idx + padding_bottom

    # Handle overflow
    if new_start < 0:
        overflow = -new_start
        new_start = 0
        new_end = min(N, new_end + overflow)

    if new_end > N:
        overflow = new_end - N
        new_end = N
        new_start = max(0, new_start - overflow)

    return new_start, new_end


def inference_section_from_internet_page_scrape(
    result: WebContent,
    snippet: str,
    rank: int = 0,
) -> InferenceSection:
    # truncate the content around snippet if snippet exists
    truncated_content = ""
    if snippet:
        truncated_content = _truncate_content_around_snippet(
            result.full_content, snippet
        )

    # Fallback if no snippet exists or we failed to find it
    if not truncated_content:
        truncated_content = truncate_search_result_content(result.full_content)

    # Calculate score using reciprocal rank to preserve ordering
    score = 1.0 / (rank + 1)

    inference_chunk = InferenceChunk(
        chunk_id=0,
        blurb=result.title,
        content=truncated_content,
        source_links={0: result.link},
        section_continuation=False,
        document_id=WEB_SEARCH_PREFIX + result.link,
        source_type=DocumentSource.WEB,
        semantic_identifier=result.title,
        title=result.title,
        boost=1,
        score=score,
        hidden=False,
        metadata={},
        match_highlights=[truncated_content],
        doc_summary="",
        chunk_context="",
        updated_at=result.published_date,
        image_file_id=None,
    )
    return InferenceSection(
        center_chunk=inference_chunk,
        chunks=[inference_chunk],
        combined_content=truncated_content,
    )


def inference_section_from_internet_search_result(
    result: WebSearchResult,
    rank: int = 0,
) -> InferenceSection:
    # Calculate score using reciprocal rank to preserve ordering
    score = 1.0 / (rank + 1)

    chunk = InferenceChunk(
        chunk_id=0,
        blurb=result.snippet,
        content=result.snippet,
        source_links={0: result.link},
        section_continuation=False,
        document_id=WEB_SEARCH_PREFIX + result.link,
        source_type=DocumentSource.WEB,
        semantic_identifier=result.title,
        title=result.title,
        boost=1,
        score=score,
        hidden=False,
        metadata={},
        match_highlights=[result.snippet],
        doc_summary="",
        chunk_context="",
        updated_at=result.published_date,
        image_file_id=None,
    )

    return InferenceSection(
        center_chunk=chunk,
        chunks=[chunk],
        combined_content=result.snippet,
    )


def extract_url_snippet_map(documents: list[SearchDoc]) -> dict[str, str]:
    """
    Given a list of SearchDocs, this will extract the url -> summary map.
    """
    url_snippet_map: dict[str, str] = {}
    for document in documents:
        if document.source_type == DocumentSource.WEB and document.link:
            url_snippet_map[document.link] = document.blurb
    return url_snippet_map


================================================
FILE: backend/onyx/tools/tool_implementations/web_search/web_search_tool.py
================================================
import json
from typing import Any

from sqlalchemy.orm import Session
from typing_extensions import override

from onyx.chat.emitter import Emitter
from onyx.context.search.models import SearchDocsResponse
from onyx.context.search.utils import convert_inference_sections_to_search_docs
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.web_search import fetch_active_web_search_provider
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
from onyx.server.query_and_chat.streaming_models import SearchToolStart
from onyx.tools.interface import Tool
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolResponse
from onyx.tools.models import WebSearchToolOverrideKwargs
from onyx.tools.tool_implementations.utils import (
    convert_inference_sections_to_llm_string,
)
from onyx.tools.tool_implementations.web_search.models import DEFAULT_MAX_RESULTS
from onyx.tools.tool_implementations.web_search.models import WebSearchResult
from onyx.tools.tool_implementations.web_search.providers import (
    build_search_provider_from_config,
)
from onyx.tools.tool_implementations.web_search.providers import (
    provider_requires_api_key,
)
from onyx.tools.tool_implementations.web_search.utils import (
    filter_web_search_results_with_no_title_or_snippet,
)
from onyx.tools.tool_implementations.web_search.utils import (
    inference_section_from_internet_search_result,
)
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from shared_configs.enums import WebSearchProviderType

logger = setup_logger()

QUERIES_FIELD = "queries"


def _sanitize_query(query: str) -> str:
    """Remove control characters and normalize whitespace in a query.

    LLMs sometimes produce queries with null characters or other control
    characters that need to be stripped before sending to search providers.
    """
    # Remove control characters (ASCII 0-31 and 127 DEL)
    sanitized = "".join(c for c in query if ord(c) >= 32 and ord(c) != 127)
    # Collapse multiple whitespace characters into single space and strip
    return " ".join(sanitized.split())


def _normalize_queries_input(raw: Any) -> list[str]:
    """Coerce LLM output to a list of sanitized query strings.

    Accepts a bare string or a list (possibly with non-string elements).
    Sanitizes each query (strip control chars, normalize whitespace) and
    drops empty or whitespace-only entries.
    """
    if isinstance(raw, str):
        raw = raw.strip()
        if not raw:
            return []
        raw = [raw]
    elif not isinstance(raw, list):
        return []
    result: list[str] = []
    for q in raw:
        if q is None:
            continue
        sanitized = _sanitize_query(str(q))
        if sanitized:
            result.append(sanitized)
    return result


class WebSearchTool(Tool[WebSearchToolOverrideKwargs]):
    NAME = "web_search"
    DESCRIPTION = "Search the web for information."
    DISPLAY_NAME = "Web Search"

    def __init__(self, tool_id: int, emitter: Emitter) -> None:
        super().__init__(emitter=emitter)
        self._id = tool_id

        # Get web search provider from database
        with get_session_with_current_tenant() as db_session:
            provider_model = fetch_active_web_search_provider(db_session)
            if provider_model is None:
                raise RuntimeError("No web search provider configured.")
            provider_type = WebSearchProviderType(provider_model.provider_type)
            api_key = (
                provider_model.api_key.get_value(apply_mask=False)
                if provider_model.api_key
                else None
            )
            config = provider_model.config

        if provider_requires_api_key(provider_type) and api_key is None:
            raise RuntimeError(
                f"No API key configured for {provider_type.value} web search provider."
            )

        self._provider = build_search_provider_from_config(
            provider_type=provider_type,
            api_key=api_key,
            config=config,
        )

    @property
    def id(self) -> int:
        return self._id

    @property
    def name(self) -> str:
        return self.NAME

    @property
    def description(self) -> str:
        return self.DESCRIPTION

    @property
    def display_name(self) -> str:
        return self.DISPLAY_NAME

    @property
    def supports_site_filter(self) -> bool:
        """Whether the underlying provider supports site: operator."""
        return self._provider.supports_site_filter

    @override
    @classmethod
    def is_available(cls, db_session: Session) -> bool:
        """Available only if an active web search provider is configured in the database."""
        with get_session_with_current_tenant() as session:
            provider = fetch_active_web_search_provider(session)
            return provider is not None

    def tool_definition(self) -> dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": (
                    "Search the web for information. Returns a list of search results with titles, metadata, and snippets."
                ),
                "parameters": {
                    "type": "object",
                    "properties": {
                        QUERIES_FIELD: {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "One or more queries to look up on the web. Must contain only printable characters",
                        },
                    },
                    "required": [QUERIES_FIELD],
                },
            },
        }

    def emit_start(self, placement: Placement) -> None:
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=SearchToolStart(is_internet_search=True),
            )
        )

    def _safe_execute_single_search(
        self,
        query: str,
        provider: Any,
    ) -> tuple[list[WebSearchResult] | None, str | None]:
        """Execute a single search query and return results with error capture.

        Returns:
            A tuple of (results, error_message). If successful, error_message is None.
            If failed, results is None and error_message contains the error.
        """
        try:
            raw_results = list(provider.search(query))
            filtered_results = filter_web_search_results_with_no_title_or_snippet(
                raw_results
            )
            results = filtered_results[:DEFAULT_MAX_RESULTS]
            return (results, None)
        except Exception as e:
            error_msg = str(e)
            logger.warning(f"Web search query '{query}' failed: {error_msg}")
            return (None, error_msg)

    def run(
        self,
        placement: Placement,
        override_kwargs: WebSearchToolOverrideKwargs,
        **llm_kwargs: Any,
    ) -> ToolResponse:
        """Execute the web search tool with multiple queries in parallel"""
        if QUERIES_FIELD not in llm_kwargs:
            raise ToolCallException(
                message=f"Missing required '{QUERIES_FIELD}' parameter in web_search tool call",
                llm_facing_message=(
                    f"The web_search tool requires a '{QUERIES_FIELD}' parameter "
                    f"containing an array of search queries. Please provide the queries "
                    f'like: {{"queries": ["your search query here"]}}'
                ),
            )
        queries = _normalize_queries_input(llm_kwargs[QUERIES_FIELD])
        if not queries:
            raise ToolCallException(
                message=(
                    "No valid web search queries provided; all queries were empty or whitespace-only after trimming."
                ),
                llm_facing_message=(
                    "No valid web search queries were provided (they were empty or "
                    "whitespace-only). Please provide a real search query."
                ),
            )

        # Emit queries
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=SearchToolQueriesDelta(queries=queries),
            )
        )

        # Perform searches in parallel with error capture
        functions_with_args = [
            (self._safe_execute_single_search, (query, self._provider))
            for query in queries
        ]
        search_results_with_errors: list[
            tuple[list[WebSearchResult] | None, str | None]
        ] = run_functions_tuples_in_parallel(
            functions_with_args,
            allow_failures=False,  # Our wrapper handles errors internally
        )

        # Separate successful results from failures
        valid_results: list[list[WebSearchResult]] = []
        failed_queries: dict[str, str] = {}

        for query, (results, error) in zip(queries, search_results_with_errors):
            if error is not None:
                failed_queries[query] = error
            elif results is not None:
                valid_results.append(results)

        # Log partial failures but continue if we have at least one success
        if failed_queries and valid_results:
            logger.warning(
                f"Web search partial failure: {len(failed_queries)}/{len(queries)} "
                f"queries failed. Failed queries: {json.dumps(failed_queries)}"
            )

        # If all queries failed, raise ToolCallException with details
        if not valid_results:
            error_details = json.dumps(failed_queries, indent=2)
            raise ToolCallException(
                message=f"All web search queries failed: {error_details}",
                llm_facing_message=(
                    f"All web search queries failed. Query failures:\n{error_details}"
                ),
            )

        # Interweave top results from each query in round-robin fashion
        all_search_results: list[WebSearchResult] = []

        if valid_results:
            # Track seen (title, url) pairs to avoid duplicates
            seen = set()
            # Track current index for each result set
            indices = [0] * len(valid_results)

            # Round-robin interweaving: cycle through result sets and increment indices
            while len(all_search_results) < DEFAULT_MAX_RESULTS:
                added_any = False
                for idx, results in enumerate(valid_results):
                    if len(all_search_results) >= DEFAULT_MAX_RESULTS:
                        break
                    if indices[idx] < len(results):
                        result = results[indices[idx]]
                        key = (result.title, result.link)
                        if key not in seen:
                            seen.add(key)
                            all_search_results.append(result)
                            added_any = True
                        indices[idx] += 1
                # Stop if no more results to add
                if not added_any:
                    break

        # This should be a very rare case and is due to not failing loudly enough in the search provider implementation.
        if not all_search_results:
            raise ToolCallException(
                message="Web search queries succeeded but returned no results",
                llm_facing_message=(
                    "Web search completed but found no results for the given queries. "
                    "Try rephrasing or using different search terms."
                ),
            )

        # Convert search results to InferenceSections with rank-based scoring
        inference_sections = [
            inference_section_from_internet_search_result(result, rank=i)
            for i, result in enumerate(all_search_results)
        ]

        # Convert to SearchDocs
        search_docs = convert_inference_sections_to_search_docs(
            inference_sections, is_internet=True
        )

        # Emit documents
        self.emitter.emit(
            Packet(
                placement=placement,
                obj=SearchToolDocumentsDelta(documents=search_docs),
            )
        )

        # Format for LLM
        if not all_search_results:
            docs_str = json.dumps(
                {
                    "results": [],
                    "message": "The web search completed but returned no results for any of the queries. Do not search again.",
                }
            )
            citation_mapping: dict[int, str] = {}
        else:
            docs_str, citation_mapping = convert_inference_sections_to_llm_string(
                top_sections=inference_sections,
                citation_start=override_kwargs.starting_citation_num,
                limit=None,  # Already truncated
                include_source_type=False,
                include_link=True,
            )

        return ToolResponse(
            rich_response=SearchDocsResponse(
                search_docs=search_docs, citation_mapping=citation_mapping
            ),
            llm_facing_response=docs_str,
        )


================================================
FILE: backend/onyx/tools/tool_runner.py
================================================
import json
import traceback
from collections import defaultdict
from typing import Any

import onyx.tracing.framework._error_tracing as _error_tracing
from onyx.chat.models import ChatMessageSimple
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDocsResponse
from onyx.db.memory import UserMemoryContext
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import PacketException
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.tools.interface import Tool
from onyx.tools.models import ChatFile
from onyx.tools.models import ChatMinimalTextMessage
from onyx.tools.models import ImageGenerationToolOverrideKwargs
from onyx.tools.models import OpenURLToolOverrideKwargs
from onyx.tools.models import ParallelToolCallResponse
from onyx.tools.models import PythonToolOverrideKwargs
from onyx.tools.models import SearchToolOverrideKwargs
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
from onyx.tools.models import WebSearchToolOverrideKwargs
from onyx.tools.tool_implementations.images.image_generation_tool import (
    ImageGenerationTool,
)
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
from onyx.tools.tool_implementations.memory.memory_tool import MemoryToolOverrideKwargs
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.python.python_tool import PythonTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tracing.framework.create import function_span
from onyx.tracing.framework.spans import SpanError
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel

logger = setup_logger()

QUERIES_FIELD = "queries"
URLS_FIELD = "urls"
GENERIC_TOOL_ERROR_MESSAGE = "Tool failed with error: {error}"

# 10 minute timeout for tool execution to prevent indefinite hangs
TOOL_EXECUTION_TIMEOUT_SECONDS = 10 * 60

# Mapping of tool name to the field that should be merged when multiple calls exist
MERGEABLE_TOOL_FIELDS: dict[str, str] = {
    SearchTool.NAME: QUERIES_FIELD,
    WebSearchTool.NAME: QUERIES_FIELD,
    OpenURLTool.NAME: URLS_FIELD,
}


def _merge_tool_calls(tool_calls: list[ToolCallKickoff]) -> list[ToolCallKickoff]:
    """Merge multiple tool calls for SearchTool, WebSearchTool, or OpenURLTool into a single call.

    For SearchTool (internal_search) and WebSearchTool (web_search), if there are
    multiple calls, their queries are merged into a single tool call.
    For OpenURLTool (open_url), multiple calls have their urls merged.
    Other tool calls are left unchanged.

    Args:
        tool_calls: List of tool calls to potentially merge

    Returns:
        List of merged tool calls
    """
    # Group tool calls by tool name
    tool_calls_by_name: dict[str, list[ToolCallKickoff]] = defaultdict(list)
    merged_calls: list[ToolCallKickoff] = []

    for tool_call in tool_calls:
        tool_calls_by_name[tool_call.tool_name].append(tool_call)

    # Process each tool name group
    for tool_name, calls in tool_calls_by_name.items():
        if tool_name in MERGEABLE_TOOL_FIELDS and len(calls) > 1:
            merge_field = MERGEABLE_TOOL_FIELDS[tool_name]

            # Merge field values from all calls
            all_values: list[str] = []
            for call in calls:
                values = call.tool_args.get(merge_field, [])
                if isinstance(values, list):
                    all_values.extend(values)
                elif values:
                    # Handle case where it might be a single string
                    all_values.append(str(values))

            # Create a merged tool call using the first call's ID and merging the field
            merged_args = calls[0].tool_args.copy()
            merged_args[merge_field] = all_values

            merged_call = ToolCallKickoff(
                tool_call_id=calls[0].tool_call_id,  # Use first call's ID
                tool_name=tool_name,
                tool_args=merged_args,
                # Use first call's placement since merged calls become a single call
                placement=calls[0].placement,
            )
            merged_calls.append(merged_call)
        else:
            # No merging needed, add all calls as-is
            merged_calls.extend(calls)

    return merged_calls


def _extract_image_file_ids_from_tool_response_message(
    message: str,
) -> list[str]:
    try:
        parsed_message = json.loads(message)
    except json.JSONDecodeError:
        return []

    parsed_items: list[Any] = (
        parsed_message if isinstance(parsed_message, list) else [parsed_message]
    )
    file_ids: list[str] = []
    for item in parsed_items:
        if not isinstance(item, dict):
            continue

        file_id = item.get("file_id")
        if isinstance(file_id, str):
            file_ids.append(file_id)

    return file_ids


def _extract_recent_generated_image_file_ids(
    message_history: list[ChatMessageSimple],
) -> list[str]:
    tool_name_by_tool_call_id: dict[str, str] = {}
    recent_image_file_ids: list[str] = []
    seen_file_ids: set[str] = set()

    for message in message_history:
        if message.message_type == MessageType.ASSISTANT and message.tool_calls:
            for tool_call in message.tool_calls:
                tool_name_by_tool_call_id[tool_call.tool_call_id] = tool_call.tool_name
            continue

        if (
            message.message_type != MessageType.TOOL_CALL_RESPONSE
            or not message.tool_call_id
        ):
            continue

        tool_name = tool_name_by_tool_call_id.get(message.tool_call_id)
        if tool_name != ImageGenerationTool.NAME:
            continue

        for file_id in _extract_image_file_ids_from_tool_response_message(
            message.message
        ):
            if file_id in seen_file_ids:
                continue
            seen_file_ids.add(file_id)
            recent_image_file_ids.append(file_id)

    return recent_image_file_ids


def _safe_run_single_tool(
    tool: Tool,
    tool_call: ToolCallKickoff,
    override_kwargs: Any,
) -> ToolResponse:
    """Execute a single tool and return its response.

    This function is designed to be run in parallel via run_functions_tuples_in_parallel.

    Exception handling:
    - ToolCallException: Expected errors from tool execution (e.g., invalid input,
      API failures). Uses the exception's llm_facing_message for LLM consumption.
    - Other exceptions: Unexpected errors. Uses a generic error message.

    In all cases (success or failure):
    - SectionEnd packet is emitted to signal tool completion
    - tool_call is set on the response for downstream processing
    """
    tool_response: ToolResponse | None = None

    with function_span(tool.name) as span_fn:
        span_fn.span_data.input = str(tool_call.tool_args)
        try:
            tool_response = tool.run(
                placement=tool_call.placement,
                override_kwargs=override_kwargs,
                **tool_call.tool_args,
            )
            span_fn.span_data.output = tool_response.llm_facing_response
        except ToolCallException as e:
            # ToolCallException is an expected error from tool execution
            # Use llm_facing_message which is specifically designed for LLM consumption
            logger.error(f"Tool call error for {tool.name}: {e}")
            tool_response = ToolResponse(
                rich_response=None,
                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(
                    error=e.llm_facing_message
                ),
            )
            _error_tracing.attach_error_to_current_span(
                SpanError(
                    message="Tool call error (expected)",
                    data={
                        "tool_name": tool.name,
                        "tool_call_id": tool_call.tool_call_id,
                        "tool_args": tool_call.tool_args,
                        "error": str(e),
                        "llm_facing_message": e.llm_facing_message,
                        "stack_trace": traceback.format_exc(),
                        "error_type": "ToolCallException",
                    },
                )
            )
        except ToolExecutionException as e:
            # Unexpected error during tool execution
            logger.error(f"Unexpected error running tool {tool.name}: {e}")
            tool_response = ToolResponse(
                rich_response=None,
                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(error=str(e)),
            )
            _error_tracing.attach_error_to_current_span(
                SpanError(
                    message="Tool execution error (unexpected)",
                    data={
                        "tool_name": tool.name,
                        "tool_call_id": tool_call.tool_call_id,
                        "tool_args": tool_call.tool_args,
                        "error": str(e),
                        "stack_trace": traceback.format_exc(),
                        "error_type": type(e).__name__,
                    },
                )
            )
            if e.emit_error_packet:
                tool.emitter.emit(
                    Packet(
                        placement=tool_call.placement,
                        obj=PacketException(exception=e),
                    )
                )
        except Exception as e:
            # Unexpected error during tool execution
            logger.error(f"Unexpected error running tool {tool.name}: {e}")
            tool_response = ToolResponse(
                rich_response=None,
                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(error=str(e)),
            )
            _error_tracing.attach_error_to_current_span(
                SpanError(
                    message="Tool execution error (unexpected)",
                    data={
                        "tool_name": tool.name,
                        "tool_call_id": tool_call.tool_call_id,
                        "tool_args": tool_call.tool_args,
                        "error": str(e),
                        "stack_trace": traceback.format_exc(),
                        "error_type": type(e).__name__,
                    },
                )
            )

    # Emit SectionEnd after tool completes (success or failure)
    tool.emitter.emit(
        Packet(
            placement=tool_call.placement,
            obj=SectionEnd(),
        )
    )

    # Set tool_call on the response for downstream processing
    tool_response.tool_call = tool_call
    return tool_response


def run_tool_calls(
    tool_calls: list[ToolCallKickoff],
    tools: list[Tool],
    # The stuff below is needed for the different individual built-in tools
    message_history: list[ChatMessageSimple],
    user_memory_context: UserMemoryContext | None,
    user_info: str | None,
    citation_mapping: dict[int, str],
    next_citation_num: int,
    # Max number of tools to run concurrently (and overall) in this batch.
    # If set, tool calls beyond this limit are dropped.
    max_concurrent_tools: int | None = None,
    # Skip query expansion for repeat search tool calls
    skip_search_query_expansion: bool = False,
    # Files from the chat session to pass to tools like PythonTool
    chat_files: list[ChatFile] | None = None,
    # A map of url -> summary for passing web results to open url tool
    url_snippet_map: dict[str, str] = {},
    # When False, don't pass memory context to search tools for query expansion
    # (but still pass it to the memory tool for persistence)
    inject_memories_in_prompt: bool = True,
) -> ParallelToolCallResponse:
    """Run (optionally merged) tool calls in parallel and update citation mappings.

    Before execution, tool calls for `SearchTool`, `WebSearchTool`, and `OpenURLTool`
    are merged so repeated calls are collapsed into a single call per tool:
    - `SearchTool` / `WebSearchTool`: merge the `queries` list
    - `OpenURLTool`: merge the `urls` list

    Tools are executed in parallel (threadpool). For tools that generate citations,
    each tool call is assigned a **distinct** `starting_citation_num` range to avoid
    citation number collisions when running concurrently (the range is advanced by
    100 per tool call).

    The provided `citation_mapping` may be mutated in-place: any new
    `SearchDocsResponse.citation_mapping` entries are merged into it.

    Args:
        tool_calls: List of tool calls to execute.
        tools: List of available tool instances.
        message_history: Chat message history (used to find the most recent user query
            for `SearchTool` override kwargs).
        user_memory_context: User memory context, if available (passed through to `SearchTool`).
        user_info: User information string, if available (passed through to `SearchTool`).
        citation_mapping: Current citation number to URL mapping. May be updated with
            new citations produced by search tools.
        next_citation_num: The next citation number to allocate from.
        max_concurrent_tools: Max number of tools to run in this batch. If set, any
            tool calls after this limit are dropped (not queued).
        skip_search_query_expansion: Whether to skip query expansion for `SearchTool`
            (intended for repeated search calls within the same chat turn).

    Returns:
        A `ParallelToolCallResponse` containing:
        - `tool_responses`: `ToolResponse` objects for successfully dispatched tool calls
          (each has `tool_call` set). If a tool execution fails at the threadpool layer,
          its entry will be omitted.
        - `updated_citation_mapping`: The updated citation mapping dictionary.
    """
    # Merge tool calls for SearchTool, WebSearchTool, and OpenURLTool
    merged_tool_calls = _merge_tool_calls(tool_calls)

    if not merged_tool_calls:
        return ParallelToolCallResponse(
            tool_responses=[],
            updated_citation_mapping=citation_mapping,
        )

    tools_by_name = {tool.name: tool for tool in tools}

    # Drop unknown tools (and don't let them count against the cap)
    filtered_tool_calls: list[ToolCallKickoff] = []
    for tool_call in merged_tool_calls:
        if tool_call.tool_name not in tools_by_name:
            logger.warning(f"Tool {tool_call.tool_name} not found in tools list")
            continue
        filtered_tool_calls.append(tool_call)

    # Apply safety cap (drop tool calls beyond the cap)
    if max_concurrent_tools is not None:
        if max_concurrent_tools <= 0:
            return ParallelToolCallResponse(
                tool_responses=[],
                updated_citation_mapping=citation_mapping,
            )
        filtered_tool_calls = filtered_tool_calls[:max_concurrent_tools]

    # Get starting citation number from citation processor to avoid conflicts with project files
    starting_citation_num = next_citation_num

    # Prepare minimal history for SearchTool (computed once, shared by all)
    minimal_history = [
        ChatMinimalTextMessage(message=msg.message, message_type=msg.message_type)
        for msg in message_history
    ]
    last_user_message = None
    for i in range(len(minimal_history) - 1, -1, -1):
        if minimal_history[i].message_type == MessageType.USER:
            last_user_message = minimal_history[i].message
            break

    # Convert citation_mapping for OpenURLTool (computed once, shared by all)
    url_to_citation: dict[str, int] = {
        url: citation_num for citation_num, url in citation_mapping.items()
    }
    recent_generated_image_file_ids = _extract_recent_generated_image_file_ids(
        message_history
    )

    # Prepare all tool calls with their override_kwargs
    # Each tool gets a unique starting citation number to avoid conflicts when running in parallel
    tool_run_params: list[tuple[Tool, ToolCallKickoff, Any]] = []

    for tool_call in filtered_tool_calls:
        tool = tools_by_name[tool_call.tool_name]

        # Emit the tool start packet before running the tool
        tool.emit_start(placement=tool_call.placement)

        override_kwargs: (
            SearchToolOverrideKwargs
            | WebSearchToolOverrideKwargs
            | OpenURLToolOverrideKwargs
            | PythonToolOverrideKwargs
            | ImageGenerationToolOverrideKwargs
            | MemoryToolOverrideKwargs
            | None
        ) = None

        if isinstance(tool, SearchTool):
            if last_user_message is None:
                raise ValueError("No user message found in message history")

            search_memory_context = (
                user_memory_context
                if inject_memories_in_prompt
                else (
                    user_memory_context.without_memories()
                    if user_memory_context
                    else None
                )
            )
            override_kwargs = SearchToolOverrideKwargs(
                starting_citation_num=starting_citation_num,
                original_query=last_user_message,
                message_history=minimal_history,
                user_memory_context=search_memory_context,
                user_info=user_info,
                skip_query_expansion=skip_search_query_expansion,
            )
            # Increment citation number for next search tool to avoid conflicts
            # Estimate: reserve 100 citation slots per search tool
            starting_citation_num += 100

        elif isinstance(tool, WebSearchTool):
            override_kwargs = WebSearchToolOverrideKwargs(
                starting_citation_num=starting_citation_num,
            )
            # Increment citation number for next search tool to avoid conflicts
            starting_citation_num += 100

        elif isinstance(tool, OpenURLTool):
            override_kwargs = OpenURLToolOverrideKwargs(
                starting_citation_num=starting_citation_num,
                citation_mapping=url_to_citation,
                url_snippet_map=url_snippet_map,
            )
            starting_citation_num += 100

        elif isinstance(tool, PythonTool):
            override_kwargs = PythonToolOverrideKwargs(
                chat_files=chat_files or [],
            )
        elif isinstance(tool, ImageGenerationTool):
            override_kwargs = ImageGenerationToolOverrideKwargs(
                recent_generated_image_file_ids=recent_generated_image_file_ids
            )
        elif isinstance(tool, MemoryTool):
            override_kwargs = MemoryToolOverrideKwargs(
                user_name=(
                    user_memory_context.user_info.name if user_memory_context else None
                ),
                user_email=(
                    user_memory_context.user_info.email if user_memory_context else None
                ),
                user_role=(
                    user_memory_context.user_info.role if user_memory_context else None
                ),
                existing_memories=(
                    list(user_memory_context.memories) if user_memory_context else []
                ),
                chat_history=minimal_history,
            )

        tool_run_params.append((tool, tool_call, override_kwargs))

    # Run all tools in parallel
    functions_with_args = [
        (_safe_run_single_tool, (tool, tool_call, override_kwargs))
        for tool, tool_call, override_kwargs in tool_run_params
    ]

    tool_run_results: list[ToolResponse | None] = run_functions_tuples_in_parallel(
        functions_with_args,
        allow_failures=True,  # Continue even if some tools fail
        max_workers=max_concurrent_tools,
        timeout=TOOL_EXECUTION_TIMEOUT_SECONDS,
    )

    # Process results and update citation_mapping
    for result in tool_run_results:
        if result is None:
            continue

        if result and isinstance(result.rich_response, SearchDocsResponse):
            new_citations = result.rich_response.citation_mapping
            if new_citations:
                # Merge new citations into the existing mapping
                citation_mapping.update(new_citations)

    tool_responses = [result for result in tool_run_results if result is not None]
    return ParallelToolCallResponse(
        tool_responses=tool_responses,
        updated_citation_mapping=citation_mapping,
    )


================================================
FILE: backend/onyx/tools/utils.py
================================================
import json

from sqlalchemy.orm import Session

from onyx.configs.app_configs import AZURE_IMAGE_API_KEY
from onyx.db.connector import check_connectors_exist
from onyx.db.document import check_docs_exist
from onyx.db.models import LLMProvider
from onyx.llm.constants import LlmProviderNames
from onyx.llm.utils import find_model_obj
from onyx.llm.utils import get_model_map
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.tools.interface import Tool


def explicit_tool_calling_supported(model_provider: str, model_name: str) -> bool:
    model_map = get_model_map()
    model_obj = find_model_obj(
        model_map=model_map,
        provider=model_provider,
        model_name=model_name,
    )

    model_supports = (
        model_obj.get("supports_function_calling", False) if model_obj else False
    )
    return model_supports


def compute_tool_tokens(tool: Tool, llm_tokenizer: BaseTokenizer) -> int:
    return len(llm_tokenizer.encode(json.dumps(tool.tool_definition())))


def compute_all_tool_tokens(tools: list[Tool], llm_tokenizer: BaseTokenizer) -> int:
    return sum(compute_tool_tokens(tool, llm_tokenizer) for tool in tools)


def is_image_generation_available(db_session: Session) -> bool:
    providers = db_session.query(LLMProvider).all()
    for provider in providers:
        if provider.provider == LlmProviderNames.OPENAI:
            return True

    return bool(AZURE_IMAGE_API_KEY)


def is_document_search_available(db_session: Session) -> bool:
    docs_exist = check_docs_exist(db_session)
    connectors_exist = check_connectors_exist(db_session)
    return docs_exist or connectors_exist


def generate_tools_description(tools: list[Tool]) -> str:
    if not tools:
        return ""
    if len(tools) == 1:
        return tools[0].name
    if len(tools) == 2:
        return f"{tools[0].name} and {tools[1].name}"

    names = [tool.name for tool in tools[:-1]]
    return ", ".join(names) + f", and {tools[-1].name}"


================================================
FILE: backend/onyx/tracing/braintrust_tracing_processor.py
================================================
import datetime
from typing import Any
from typing import Dict
from typing import Optional

import braintrust
from braintrust import NOOP_SPAN

from .framework.processor_interface import TracingProcessor
from .framework.span_data import AgentSpanData
from .framework.span_data import FunctionSpanData
from .framework.span_data import GenerationSpanData
from .framework.span_data import SpanData
from .framework.spans import Span
from .framework.traces import Trace
from onyx.llm.cost import calculate_llm_cost_cents


def _span_type(span: Span[Any]) -> braintrust.SpanTypeAttribute:
    if span.span_data.type in ["agent"]:
        return braintrust.SpanTypeAttribute.TASK
    elif span.span_data.type in ["function"]:
        return braintrust.SpanTypeAttribute.TOOL
    elif span.span_data.type in ["generation"]:
        return braintrust.SpanTypeAttribute.LLM
    else:
        return braintrust.SpanTypeAttribute.TASK


def _span_name(span: Span[Any]) -> str:
    if isinstance(span.span_data, AgentSpanData) or isinstance(
        span.span_data, FunctionSpanData
    ):
        return span.span_data.name
    elif isinstance(span.span_data, GenerationSpanData):
        return "Generation"
    else:
        return "Unknown"


def _timestamp_from_maybe_iso(timestamp: Optional[str]) -> Optional[float]:
    if timestamp is None:
        return None
    return datetime.datetime.fromisoformat(timestamp).timestamp()


def _maybe_timestamp_elapsed(
    end: Optional[str], start: Optional[str]
) -> Optional[float]:
    if start is None or end is None:
        return None
    return (
        datetime.datetime.fromisoformat(end) - datetime.datetime.fromisoformat(start)
    ).total_seconds()


class BraintrustTracingProcessor(TracingProcessor):
    """
    `BraintrustTracingProcessor` is a `tracing.TracingProcessor` that logs traces to Braintrust.

    Args:
        logger: A `braintrust.Span` or `braintrust.Experiment` or `braintrust.Logger` to use for logging.
            If `None`, the current span, experiment, or logger will be selected exactly as in `braintrust.start_span`.
    """

    def __init__(self, logger: Optional[braintrust.Logger] = None):
        self._logger = logger
        self._spans: Dict[str, Any] = {}
        self._first_input: Dict[str, Any] = {}
        self._last_output: Dict[str, Any] = {}
        self._trace_metadata: Dict[str, Dict[str, Any]] = {}
        self._span_names: Dict[str, str] = {}

    def on_trace_start(self, trace: Trace) -> None:
        trace_meta = trace.export() or {}
        metadata = trace_meta.get("metadata") or {}
        if metadata:
            self._trace_metadata[trace.trace_id] = metadata

        current_context = braintrust.current_span()
        if current_context != NOOP_SPAN:
            self._spans[trace.trace_id] = current_context.start_span(
                name=trace.name,
                span_attributes={"type": "task", "name": trace.name},
                metadata=metadata,
            )
        elif self._logger is not None:
            self._spans[trace.trace_id] = self._logger.start_span(
                span_attributes={"type": "task", "name": trace.name},
                span_id=trace.trace_id,
                root_span_id=trace.trace_id,
                metadata=metadata,
            )
        else:
            self._spans[trace.trace_id] = braintrust.start_span(
                id=trace.trace_id,
                span_attributes={"type": "task", "name": trace.name},
                metadata=metadata,
            )
        self._span_names[trace.trace_id] = trace.name

    def on_trace_end(self, trace: Trace) -> None:
        span: Any = self._spans.pop(trace.trace_id)
        self._trace_metadata.pop(trace.trace_id, None)
        self._span_names.pop(trace.trace_id, None)
        # Get the first input and last output for this specific trace
        trace_first_input = self._first_input.pop(trace.trace_id, None)
        trace_last_output = self._last_output.pop(trace.trace_id, None)
        span.log(input=trace_first_input, output=trace_last_output)
        span.end()

    def _agent_log_data(self, span: Span[AgentSpanData]) -> Dict[str, Any]:
        return {
            "metadata": {
                "tools": span.span_data.tools,
                "handoffs": span.span_data.handoffs,
                "output_type": span.span_data.output_type,
            }
        }

    def _function_log_data(self, span: Span[FunctionSpanData]) -> Dict[str, Any]:
        return {
            "input": span.span_data.input,
            "output": span.span_data.output,
        }

    def _generation_log_data(self, span: Span[GenerationSpanData]) -> Dict[str, Any]:
        metrics = {}
        total_latency = _maybe_timestamp_elapsed(span.ended_at, span.started_at)

        if total_latency is not None:
            metrics["total_latency_seconds"] = total_latency

        if span.span_data.time_to_first_action_seconds is not None:
            metrics["time_to_first_action_seconds"] = (
                span.span_data.time_to_first_action_seconds
            )

        usage = span.span_data.usage or {}
        prompt_tokens = None
        completion_tokens = None
        prompt_tokens = usage.get("prompt_tokens")
        if prompt_tokens is None:
            prompt_tokens = usage.get("input_tokens")
        if prompt_tokens is not None:
            metrics["prompt_tokens"] = int(prompt_tokens)
        completion_tokens = usage.get("completion_tokens")
        if completion_tokens is None:
            completion_tokens = usage.get("output_tokens")
        if completion_tokens is not None:
            metrics["completion_tokens"] = int(completion_tokens)

        if "total_tokens" in usage:
            metrics["tokens"] = usage["total_tokens"]
        elif prompt_tokens is not None and completion_tokens is not None:
            metrics["tokens"] = prompt_tokens + completion_tokens

        if "cache_read_input_tokens" in usage:
            metrics["prompt_cached_tokens"] = usage["cache_read_input_tokens"]
        if "cache_creation_input_tokens" in usage:
            metrics["prompt_cache_creation_tokens"] = usage[
                "cache_creation_input_tokens"
            ]

        model_name = span.span_data.model
        if model_name and prompt_tokens is not None and completion_tokens is not None:
            cost_cents = calculate_llm_cost_cents(
                model_name=model_name,
                prompt_tokens=prompt_tokens,
                completion_tokens=completion_tokens,
            )
            if cost_cents > 0:
                metrics["cost_cents"] = cost_cents

        metadata: Dict[str, Any] = {
            "model": span.span_data.model,
            "model_config": span.span_data.model_config,
        }

        # Include reasoning in metadata if present
        if span.span_data.reasoning:
            metadata["reasoning"] = span.span_data.reasoning

        return {
            "input": span.span_data.input,
            "output": span.span_data.output,
            "metadata": metadata,
            "metrics": metrics,
        }

    def _log_data(self, span: Span[Any]) -> Dict[str, Any]:
        if isinstance(span.span_data, AgentSpanData):
            return self._agent_log_data(span)
        elif isinstance(span.span_data, FunctionSpanData):
            return self._function_log_data(span)
        elif isinstance(span.span_data, GenerationSpanData):
            return self._generation_log_data(span)
        else:
            return {}

    def on_span_start(self, span: Span[SpanData]) -> None:
        parent: Any = (
            self._spans[span.parent_id]
            if span.parent_id is not None
            else self._spans[span.trace_id]
        )
        trace_metadata = self._trace_metadata.get(span.trace_id)
        if isinstance(span.span_data, GenerationSpanData):
            span_name = _generation_span_name(span)
        else:
            span_name = _span_name(span)
        span_kwargs: Dict[str, Any] = dict(
            id=span.span_id,
            name=span_name,
            type=_span_type(span),
            start_time=_timestamp_from_maybe_iso(span.started_at),
        )
        if trace_metadata:
            span_kwargs["metadata"] = trace_metadata
        created_span: Any = parent.start_span(**span_kwargs)
        self._spans[span.span_id] = created_span
        self._span_names[span.span_id] = span_name

        # Set the span as current so current_span() calls will return it
        created_span.set_current()

    def on_span_end(self, span: Span[SpanData]) -> None:
        s: Any = self._spans.pop(span.span_id)
        self._span_names.pop(span.span_id, None)
        event = dict(error=span.error, **self._log_data(span))
        s.log(**event)
        s.unset_current()
        s.end(_timestamp_from_maybe_iso(span.ended_at))

        input_ = event.get("input")
        output = event.get("output")
        # Store first input and last output per trace_id
        trace_id = span.trace_id
        if trace_id not in self._first_input and input_ is not None:
            self._first_input[trace_id] = input_

        if output is not None:
            self._last_output[trace_id] = output

    def shutdown(self) -> None:
        if self._logger is not None:
            self._logger.flush()
        else:
            braintrust.flush()

    def force_flush(self) -> None:
        if self._logger is not None:
            self._logger.flush()
        else:
            braintrust.flush()


def _generation_span_name(span: Span[SpanData]) -> str:
    data = span.span_data
    if isinstance(data, GenerationSpanData):
        model_config = data.model_config
        if isinstance(model_config, dict):
            flow = model_config.get("flow")
            if isinstance(flow, str) and flow.strip():
                return flow
    return _span_name(span)


================================================
FILE: backend/onyx/tracing/framework/__init__.py
================================================
from .processor_interface import TracingProcessor
from .provider import DefaultTraceProvider
from .setup import get_trace_provider
from .setup import set_trace_provider


def add_trace_processor(span_processor: TracingProcessor) -> None:
    """
    Adds a new trace processor. This processor will receive all traces/spans.
    """
    get_trace_provider().register_processor(span_processor)


def set_trace_processors(processors: list[TracingProcessor]) -> None:
    """
    Set the list of trace processors. This will replace the current list of processors.
    """
    get_trace_provider().set_processors(processors)


set_trace_provider(DefaultTraceProvider())


================================================
FILE: backend/onyx/tracing/framework/_error_tracing.py
================================================
from typing import Any

from .create import get_current_span
from .spans import Span
from .spans import SpanError
from onyx.utils.logger import setup_logger


logger = setup_logger(__name__)


def attach_error_to_span(span: Span[Any], error: SpanError) -> None:
    span.set_error(error)


def attach_error_to_current_span(error: SpanError) -> None:
    span = get_current_span()
    if span:
        attach_error_to_span(span, error)
    else:
        logger.warning(f"No span to add error {error} to")


================================================
FILE: backend/onyx/tracing/framework/create.py
================================================
from __future__ import annotations

from collections.abc import Iterator
from collections.abc import Mapping
from collections.abc import Sequence
from contextlib import contextmanager
from typing import Any
from typing import TYPE_CHECKING

from .setup import get_trace_provider
from .span_data import AgentSpanData
from .span_data import FunctionSpanData
from .span_data import GenerationSpanData
from .spans import Span
from .traces import Trace
from onyx.utils.logger import setup_logger

if TYPE_CHECKING:
    pass

logger = setup_logger(__name__)


def trace(
    workflow_name: str,
    trace_id: str | None = None,
    group_id: str | None = None,
    metadata: dict[str, Any] | None = None,
    disabled: bool = False,
) -> Trace:
    """
    Create a new trace. The trace will not be started automatically; you should either use
    it as a context manager (`with trace(...):`) or call `trace.start()` + `trace.finish()`
    manually.

    In addition to the workflow name and optional grouping identifier, you can provide
    an arbitrary metadata dictionary to attach additional user-defined information to
    the trace.

    Args:
        workflow_name: The name of the logical app or workflow. For example, you might provide
            "code_bot" for a coding agent, or "customer_support_agent" for a customer support agent.
        trace_id: The ID of the trace. Optional. If not provided, we will generate an ID. We
            recommend using `util.gen_trace_id()` to generate a trace ID, to guarantee that IDs are
            correctly formatted.
        group_id: Optional grouping identifier to link multiple traces from the same conversation
            or process. For instance, you might use a chat thread ID.
        metadata: Optional dictionary of additional metadata to attach to the trace.
        disabled: If True, we will return a Trace but the Trace will not be recorded.

    Returns:
        The newly created trace object.
    """
    current_trace = get_trace_provider().get_current_trace()
    if current_trace:
        logger.warning(
            "Trace already exists. Creating a new trace, but this is probably a mistake."
        )

    return get_trace_provider().create_trace(
        name=workflow_name,
        trace_id=trace_id,
        group_id=group_id,
        metadata=metadata,
        disabled=disabled,
    )


@contextmanager
def ensure_trace(
    workflow_name: str,
    trace_id: str | None = None,
    group_id: str | None = None,
    metadata: dict[str, Any] | None = None,
    disabled: bool = False,
) -> Iterator[Trace | None]:
    """
    Ensure a trace exists. If a trace is already active, reuse it.
    Otherwise, create a new trace for the duration of the context.
    """
    current_trace = get_trace_provider().get_current_trace()
    if current_trace:
        yield current_trace
        return

    with trace(
        workflow_name=workflow_name,
        trace_id=trace_id,
        group_id=group_id,
        metadata=metadata,
        disabled=disabled,
    ) as created_trace:
        yield created_trace


def get_current_trace() -> Trace | None:
    """Returns the currently active trace, if present."""
    return get_trace_provider().get_current_trace()


def get_current_span() -> Span[Any] | None:
    """Returns the currently active span, if present."""
    return get_trace_provider().get_current_span()


def agent_span(
    name: str,
    handoffs: list[str] | None = None,
    tools: list[str] | None = None,
    output_type: str | None = None,
    span_id: str | None = None,
    parent: Trace | Span[Any] | None = None,
    disabled: bool = False,
) -> Span[AgentSpanData]:
    """Create a new agent span. The span will not be started automatically, you should either do
    `with agent_span() ...` or call `span.start()` + `span.finish()` manually.

    Args:
        name: The name of the agent.
        handoffs: Optional list of agent names to which this agent could hand off control.
        tools: Optional list of tool names available to this agent.
        output_type: Optional name of the output type produced by the agent.
        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
            correctly formatted.
        parent: The parent span or trace. If not provided, we will automatically use the current
            trace/span as the parent.
        disabled: If True, we will return a Span but the Span will not be recorded.

    Returns:
        The newly created agent span.
    """
    return get_trace_provider().create_span(
        span_data=AgentSpanData(
            name=name, handoffs=handoffs, tools=tools, output_type=output_type
        ),
        span_id=span_id,
        parent=parent,
        disabled=disabled,
    )


def function_span(
    name: str,
    input: str | None = None,
    output: str | None = None,
    span_id: str | None = None,
    parent: Trace | Span[Any] | None = None,
    disabled: bool = False,
) -> Span[FunctionSpanData]:
    """Create a new function span. The span will not be started automatically, you should either do
    `with function_span() ...` or call `span.start()` + `span.finish()` manually.

    Args:
        name: The name of the function.
        input: The input to the function.
        output: The output of the function.
        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
            correctly formatted.
        parent: The parent span or trace. If not provided, we will automatically use the current
            trace/span as the parent.
        disabled: If True, we will return a Span but the Span will not be recorded.

    Returns:
        The newly created function span.
    """
    return get_trace_provider().create_span(
        span_data=FunctionSpanData(name=name, input=input, output=output),
        span_id=span_id,
        parent=parent,
        disabled=disabled,
    )


def generation_span(
    input: Sequence[Mapping[str, Any]] | None = None,
    output: Sequence[Mapping[str, Any]] | None = None,
    reasoning: str | None = None,
    model: str | None = None,
    model_config: Mapping[str, Any] | None = None,
    usage: dict[str, Any] | None = None,
    time_to_first_action_seconds: float | None = None,
    span_id: str | None = None,
    parent: Trace | Span[Any] | None = None,
    disabled: bool = False,
) -> Span[GenerationSpanData]:
    """Create a new generation span. The span will not be started automatically, you should either
    do `with generation_span() ...` or call `span.start()` + `span.finish()` manually.

    This span captures the details of a model generation, including the
    input message sequence, any generated outputs, the model name and
    configuration, and usage data. If you only need to capture a model
    response identifier, use `response_span()` instead.

    Args:
        input: The sequence of input messages sent to the model.
        output: The sequence of output messages received from the model.
        reasoning: The reasoning/thinking content from reasoning models (e.g., Claude extended thinking).
        model: The model identifier used for the generation.
        model_config: The model configuration (hyperparameters) used.
        usage: A dictionary of usage information (input tokens, output tokens, etc.).
        time_to_first_action_seconds: Time elapsed before the first model action is observed.
        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
            correctly formatted.
        parent: The parent span or trace. If not provided, we will automatically use the current
            trace/span as the parent.
        disabled: If True, we will return a Span but the Span will not be recorded.

    Returns:
        The newly created generation span.
    """
    return get_trace_provider().create_span(
        span_data=GenerationSpanData(
            input=input,
            output=output,
            reasoning=reasoning,
            model=model,
            model_config=model_config,
            usage=usage,
            time_to_first_action_seconds=time_to_first_action_seconds,
        ),
        span_id=span_id,
        parent=parent,
        disabled=disabled,
    )


================================================
FILE: backend/onyx/tracing/framework/processor_interface.py
================================================
import abc
from typing import Any
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from .spans import Span
    from .traces import Trace


class TracingProcessor(abc.ABC):
    """Interface for processing and monitoring traces and spans in the OpenAI Agents system.

    This abstract class defines the interface that all tracing processors must implement.
    Processors receive notifications when traces and spans start and end, allowing them
    to collect, process, and export tracing data.

    Example:
        ```python
        class CustomProcessor(TracingProcessor):
            def __init__(self):
                self.active_traces = {}
                self.active_spans = {}

            def on_trace_start(self, trace):
                self.active_traces[trace.trace_id] = trace

            def on_trace_end(self, trace):
                # Process completed trace
                del self.active_traces[trace.trace_id]

            def on_span_start(self, span):
                self.active_spans[span.span_id] = span

            def on_span_end(self, span):
                # Process completed span
                del self.active_spans[span.span_id]

            def shutdown(self):
                # Clean up resources
                self.active_traces.clear()
                self.active_spans.clear()

            def force_flush(self):
                # Force processing of any queued items
                pass
        ```

    Notes:
        - All methods should be thread-safe
        - Methods should not block for long periods
        - Handle errors gracefully to prevent disrupting agent execution
    """

    @abc.abstractmethod
    def on_trace_start(self, trace: "Trace") -> None:
        """Called when a new trace begins execution.

        Args:
            trace: The trace that started. Contains workflow name and metadata.

        Notes:
            - Called synchronously on trace start
            - Should return quickly to avoid blocking execution
            - Any errors should be caught and handled internally
        """

    @abc.abstractmethod
    def on_trace_end(self, trace: "Trace") -> None:
        """Called when a trace completes execution.

        Args:
            trace: The completed trace containing all spans and results.

        Notes:
            - Called synchronously when trace finishes
            - Good time to export/process the complete trace
            - Should handle cleanup of any trace-specific resources
        """

    @abc.abstractmethod
    def on_span_start(self, span: "Span[Any]") -> None:
        """Called when a new span begins execution.

        Args:
            span: The span that started. Contains operation details and context.

        Notes:
            - Called synchronously on span start
            - Should return quickly to avoid blocking execution
            - Spans are automatically nested under current trace/span
        """

    @abc.abstractmethod
    def on_span_end(self, span: "Span[Any]") -> None:
        """Called when a span completes execution.

        Args:
            span: The completed span containing execution results.

        Notes:
            - Called synchronously when span finishes
            - Should not block or raise exceptions
            - Good time to export/process the individual span
        """

    @abc.abstractmethod
    def shutdown(self) -> None:
        """Called when the application stops to clean up resources.

        Should perform any necessary cleanup like:
        - Flushing queued traces/spans
        - Closing connections
        - Releasing resources
        """

    @abc.abstractmethod
    def force_flush(self) -> None:
        """Forces immediate processing of any queued traces/spans.

        Notes:
            - Should process all queued items before returning
            - Useful before shutdown or when immediate processing is needed
            - May block while processing completes
        """


class TracingExporter(abc.ABC):
    """Exports traces and spans. For example, could log them or send them to a backend."""

    @abc.abstractmethod
    def export(self, items: list["Trace | Span[Any]"]) -> None:
        """Exports a list of traces and spans.

        Args:
            items: The items to export.
        """


================================================
FILE: backend/onyx/tracing/framework/provider.py
================================================
from __future__ import annotations

import threading
import uuid
from abc import ABC
from abc import abstractmethod
from datetime import datetime
from datetime import timezone
from typing import Any

from .processor_interface import TracingProcessor
from .scope import Scope
from .spans import NoOpSpan
from .spans import Span
from .spans import SpanImpl
from .spans import TSpanData
from .traces import NoOpTrace
from .traces import Trace
from .traces import TraceImpl
from onyx.utils.logger import setup_logger

logger = setup_logger(__name__)


class SynchronousMultiTracingProcessor(TracingProcessor):
    """
    Forwards all calls to a list of TracingProcessors, in order of registration.
    """

    def __init__(self) -> None:
        # Using a tuple to avoid race conditions when iterating over processors
        self._processors: tuple[TracingProcessor, ...] = ()
        self._lock = threading.Lock()

    def add_tracing_processor(self, tracing_processor: TracingProcessor) -> None:
        """
        Add a processor to the list of processors. Each processor will receive all traces/spans.
        """
        with self._lock:
            self._processors += (tracing_processor,)

    def set_processors(self, processors: list[TracingProcessor]) -> None:
        """
        Set the list of processors. This will replace the current list of processors.
        """
        with self._lock:
            self._processors = tuple(processors)

    def on_trace_start(self, trace: Trace) -> None:
        """
        Called when a trace is started.
        """
        for processor in self._processors:
            try:
                processor.on_trace_start(trace)
            except Exception as e:
                logger.error(
                    f"Error in trace processor {processor} during on_trace_start: {e}"
                )

    def on_trace_end(self, trace: Trace) -> None:
        """
        Called when a trace is finished.
        """
        for processor in self._processors:
            try:
                processor.on_trace_end(trace)
            except Exception as e:
                logger.error(
                    f"Error in trace processor {processor} during on_trace_end: {e}"
                )

    def on_span_start(self, span: Span[Any]) -> None:
        """
        Called when a span is started.
        """
        for processor in self._processors:
            try:
                processor.on_span_start(span)
            except Exception as e:
                logger.error(
                    f"Error in trace processor {processor} during on_span_start: {e}"
                )

    def on_span_end(self, span: Span[Any]) -> None:
        """
        Called when a span is finished.
        """
        for processor in self._processors:
            try:
                processor.on_span_end(span)
            except Exception as e:
                logger.error(
                    f"Error in trace processor {processor} during on_span_end: {e}"
                )

    def shutdown(self) -> None:
        """
        Called when the application stops.
        """
        for processor in self._processors:
            logger.debug(f"Shutting down trace processor {processor}")
            try:
                processor.shutdown()
            except Exception as e:
                logger.error(f"Error shutting down trace processor {processor}: {e}")

    def force_flush(self) -> None:
        """
        Force the processors to flush their buffers.
        """
        for processor in self._processors:
            try:
                processor.force_flush()
            except Exception as e:
                logger.error(f"Error flushing trace processor {processor}: {e}")


class TraceProvider(ABC):
    """Interface for creating traces and spans."""

    @abstractmethod
    def register_processor(self, processor: TracingProcessor) -> None:
        """Add a processor that will receive all traces and spans."""

    @abstractmethod
    def set_processors(self, processors: list[TracingProcessor]) -> None:
        """Replace the list of processors with ``processors``."""

    @abstractmethod
    def get_current_trace(self) -> Trace | None:
        """Return the currently active trace, if any."""

    @abstractmethod
    def get_current_span(self) -> Span[Any] | None:
        """Return the currently active span, if any."""

    @abstractmethod
    def time_iso(self) -> str:
        """Return the current time in ISO 8601 format."""

    @abstractmethod
    def gen_trace_id(self) -> str:
        """Generate a new trace identifier."""

    @abstractmethod
    def gen_span_id(self) -> str:
        """Generate a new span identifier."""

    @abstractmethod
    def gen_group_id(self) -> str:
        """Generate a new group identifier."""

    @abstractmethod
    def create_trace(
        self,
        name: str,
        trace_id: str | None = None,
        group_id: str | None = None,
        metadata: dict[str, Any] | None = None,
        disabled: bool = False,
    ) -> Trace:
        """Create a new trace."""

    @abstractmethod
    def create_span(
        self,
        span_data: TSpanData,
        span_id: str | None = None,
        parent: Trace | Span[Any] | None = None,
        disabled: bool = False,
    ) -> Span[TSpanData]:
        """Create a new span."""

    @abstractmethod
    def shutdown(self) -> None:
        """Clean up any resources used by the provider."""


class DefaultTraceProvider(TraceProvider):
    def __init__(self) -> None:
        self._multi_processor = SynchronousMultiTracingProcessor()

    def register_processor(self, processor: TracingProcessor) -> None:
        """
        Add a processor to the list of processors. Each processor will receive all traces/spans.
        """
        self._multi_processor.add_tracing_processor(processor)

    def set_processors(self, processors: list[TracingProcessor]) -> None:
        """
        Set the list of processors. This will replace the current list of processors.
        """
        self._multi_processor.set_processors(processors)

    def get_current_trace(self) -> Trace | None:
        """
        Returns the currently active trace, if any.
        """
        return Scope.get_current_trace()

    def get_current_span(self) -> Span[Any] | None:
        """
        Returns the currently active span, if any.
        """
        return Scope.get_current_span()

    def time_iso(self) -> str:
        """Return the current time in ISO 8601 format."""
        return datetime.now(timezone.utc).isoformat()

    def gen_trace_id(self) -> str:
        """Generate a new trace ID."""
        return f"trace_{uuid.uuid4().hex}"

    def gen_span_id(self) -> str:
        """Generate a new span ID."""
        return f"span_{uuid.uuid4().hex[:24]}"

    def gen_group_id(self) -> str:
        """Generate a new group ID."""
        return f"group_{uuid.uuid4().hex[:24]}"

    def create_trace(
        self,
        name: str,
        trace_id: str | None = None,
        group_id: str | None = None,
        metadata: dict[str, Any] | None = None,
        disabled: bool = False,
    ) -> Trace:
        """
        Create a new trace.
        """
        if disabled:
            logger.debug(f"Tracing is disabled. Not creating trace {name}")
            return NoOpTrace()

        trace_id = trace_id or self.gen_trace_id()

        logger.debug(f"Creating trace {name} with id {trace_id}")

        return TraceImpl(
            name=name,
            trace_id=trace_id,
            group_id=group_id,
            metadata=metadata,
            processor=self._multi_processor,
        )

    def create_span(
        self,
        span_data: TSpanData,
        span_id: str | None = None,
        parent: Trace | Span[Any] | None = None,
        disabled: bool = False,
    ) -> Span[TSpanData]:
        """
        Create a new span.
        """
        if disabled:
            logger.debug(f"Tracing is disabled. Not creating span {span_data}")
            return NoOpSpan(span_data)

        trace_id: str
        parent_id: str | None

        if not parent:
            current_span = Scope.get_current_span()
            current_trace = Scope.get_current_trace()
            if current_trace is None:
                logger.error(
                    "No active trace. Make sure to start a trace with `trace()` first Returning NoOpSpan."
                )
                return NoOpSpan(span_data)
            elif isinstance(current_trace, NoOpTrace) or isinstance(
                current_span, NoOpSpan
            ):
                logger.debug(
                    f"Parent {current_span} or {current_trace} is no-op, returning NoOpSpan"
                )
                return NoOpSpan(span_data)

            parent_id = current_span.span_id if current_span else None
            trace_id = current_trace.trace_id

        elif isinstance(parent, Trace):
            if isinstance(parent, NoOpTrace):
                logger.debug(f"Parent {parent} is no-op, returning NoOpSpan")
                return NoOpSpan(span_data)
            trace_id = parent.trace_id
            parent_id = None
        elif isinstance(parent, Span):
            if isinstance(parent, NoOpSpan):
                logger.debug(f"Parent {parent} is no-op, returning NoOpSpan")
                return NoOpSpan(span_data)
            parent_id = parent.span_id
            trace_id = parent.trace_id
        else:
            # This should never happen, but mypy needs it
            raise ValueError(f"Invalid parent type: {type(parent)}")

        return SpanImpl(
            trace_id=trace_id,
            span_id=span_id or self.gen_span_id(),
            parent_id=parent_id,
            processor=self._multi_processor,
            span_data=span_data,
        )

    def shutdown(self) -> None:
        try:
            logger.debug("Shutting down trace provider")
            self._multi_processor.shutdown()
        except Exception as e:
            logger.error(f"Error shutting down trace provider: {e}")


================================================
FILE: backend/onyx/tracing/framework/scope.py
================================================
import contextvars
from typing import Any
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from .spans import Span
    from .traces import Trace

_current_span: contextvars.ContextVar["Span[Any] | None"] = contextvars.ContextVar(
    "current_span", default=None
)

_current_trace: contextvars.ContextVar["Trace | None"] = contextvars.ContextVar(
    "current_trace", default=None
)


class Scope:
    """
    Manages the current span and trace in the context.
    """

    @classmethod
    def get_current_span(cls) -> "Span[Any] | None":
        return _current_span.get()

    @classmethod
    def set_current_span(
        cls, span: "Span[Any] | None"
    ) -> "contextvars.Token[Span[Any] | None]":
        return _current_span.set(span)

    @classmethod
    def reset_current_span(cls, token: "contextvars.Token[Span[Any] | None]") -> None:
        _current_span.reset(token)

    @classmethod
    def get_current_trace(cls) -> "Trace | None":
        return _current_trace.get()

    @classmethod
    def set_current_trace(
        cls, trace: "Trace | None"
    ) -> "contextvars.Token[Trace | None]":
        return _current_trace.set(trace)

    @classmethod
    def reset_current_trace(cls, token: "contextvars.Token[Trace | None]") -> None:
        _current_trace.reset(token)


================================================
FILE: backend/onyx/tracing/framework/setup.py
================================================
from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from .provider import TraceProvider

GLOBAL_TRACE_PROVIDER: TraceProvider | None = None


def set_trace_provider(provider: TraceProvider) -> None:
    """Set the global trace provider used by tracing utilities."""
    global GLOBAL_TRACE_PROVIDER
    GLOBAL_TRACE_PROVIDER = provider


def get_trace_provider() -> TraceProvider:
    """Get the global trace provider used by tracing utilities."""
    if GLOBAL_TRACE_PROVIDER is None:
        raise RuntimeError("Trace provider not set")
    return GLOBAL_TRACE_PROVIDER


================================================
FILE: backend/onyx/tracing/framework/span_data.py
================================================
import abc
from collections.abc import Mapping
from collections.abc import Sequence
from typing import Any


class SpanData(abc.ABC):
    """
    Represents span data in the trace.
    """

    @abc.abstractmethod
    def export(self) -> dict[str, Any]:
        """Export the span data as a dictionary."""

    @property
    @abc.abstractmethod
    def type(self) -> str:
        """Return the type of the span."""


class AgentSpanData(SpanData):
    """
    Represents an Agent Span in the trace.
    Includes name, handoffs, tools, and output type.
    """

    __slots__ = ("name", "handoffs", "tools", "output_type")

    def __init__(
        self,
        name: str,
        handoffs: list[str] | None = None,
        tools: list[str] | None = None,
        output_type: str | None = None,
    ):
        self.name = name
        self.handoffs: list[str] | None = handoffs
        self.tools: list[str] | None = tools
        self.output_type: str | None = output_type

    @property
    def type(self) -> str:
        return "agent"

    def export(self) -> dict[str, Any]:
        return {
            "type": self.type,
            "name": self.name,
            "handoffs": self.handoffs,
            "tools": self.tools,
            "output_type": self.output_type,
        }


class FunctionSpanData(SpanData):
    """
    Represents a Function Span in the trace.
    Includes input, output and MCP data (if applicable).
    """

    __slots__ = ("name", "input", "output", "mcp_data")

    def __init__(
        self,
        name: str,
        input: str | None,
        output: Any | None,
        mcp_data: dict[str, Any] | None = None,
    ):
        self.name = name
        self.input = input
        self.output = output
        self.mcp_data = mcp_data

    @property
    def type(self) -> str:
        return "function"

    def export(self) -> dict[str, Any]:
        return {
            "type": self.type,
            "name": self.name,
            "input": self.input,
            "output": str(self.output) if self.output else None,
            "mcp_data": self.mcp_data,
        }


class GenerationSpanData(SpanData):
    """
    Represents a Generation Span in the trace.
    Includes input, output, model, model configuration, and usage.
    """

    __slots__ = (
        "input",
        "output",
        "reasoning",
        "model",
        "model_config",
        "usage",
        "time_to_first_action_seconds",
    )

    def __init__(
        self,
        input: Sequence[Mapping[str, Any]] | None = None,
        output: Sequence[Mapping[str, Any]] | None = None,
        reasoning: str | None = None,
        model: str | None = None,
        model_config: Mapping[str, Any] | None = None,
        usage: dict[str, Any] | None = None,
        time_to_first_action_seconds: float | None = None,
    ):
        self.input = input
        self.output = output
        self.reasoning = reasoning
        self.model = model
        self.model_config = model_config
        self.usage = usage
        self.time_to_first_action_seconds = time_to_first_action_seconds

    @property
    def type(self) -> str:
        return "generation"

    def export(self) -> dict[str, Any]:
        return {
            "type": self.type,
            "input": self.input,
            "output": self.output,
            "reasoning": self.reasoning,
            "model": self.model,
            "model_config": self.model_config,
            "usage": self.usage,
            "time_to_first_action_seconds": self.time_to_first_action_seconds,
        }


================================================
FILE: backend/onyx/tracing/framework/spans.py
================================================
from __future__ import annotations

import abc
import contextvars
from types import TracebackType
from typing import Any
from typing import Generic
from typing import TypeVar

from typing_extensions import TypedDict

from . import util
from .processor_interface import TracingProcessor
from .scope import Scope
from .span_data import SpanData

TSpanData = TypeVar("TSpanData", bound=SpanData)


class SpanError(TypedDict):
    """Represents an error that occurred during span execution.

    Attributes:
        message: A human-readable error description
        data: Optional dictionary containing additional error context
    """

    message: str
    data: dict[str, Any] | None


class Span(abc.ABC, Generic[TSpanData]):
    """Base class for representing traceable operations with timing and context.

    A span represents a single operation within a trace (e.g., an LLM call, tool execution,
    or agent run). Spans track timing, relationships between operations, and operation-specific
    data.

    Type Args:
        TSpanData: The type of span-specific data this span contains.

    Example:
        ```python
        # Creating a custom span
        with custom_span("database_query", {
            "operation": "SELECT",
            "table": "users"
        }) as span:
            results = await db.query("SELECT * FROM users")
            span.set_output({"count": len(results)})

        # Handling errors in spans
        with custom_span("risky_operation") as span:
            try:
                result = perform_risky_operation()
            except Exception as e:
                span.set_error({
                    "message": str(e),
                    "data": {"operation": "risky_operation"}
                })
                raise
        ```

        Notes:
        - Spans automatically nest under the current trace
        - Use context managers for reliable start/finish
        - Include relevant data but avoid sensitive information
        - Handle errors properly using set_error()
    """

    @property
    @abc.abstractmethod
    def trace_id(self) -> str:
        """The ID of the trace this span belongs to.

        Returns:
            str: Unique identifier of the parent trace.
        """

    @property
    @abc.abstractmethod
    def span_id(self) -> str:
        """Unique identifier for this span.

        Returns:
            str: The span's unique ID within its trace.
        """

    @property
    @abc.abstractmethod
    def span_data(self) -> TSpanData:
        """Operation-specific data for this span.

        Returns:
            TSpanData: Data specific to this type of span (e.g., LLM generation data).
        """

    @abc.abstractmethod
    def start(self, mark_as_current: bool = False) -> None:
        """
        Start the span.

        Args:
            mark_as_current: If true, the span will be marked as the current span.
        """

    @abc.abstractmethod
    def finish(self, reset_current: bool = False) -> None:
        """
        Finish the span.

        Args:
            reset_current: If true, the span will be reset as the current span.
        """

    @abc.abstractmethod
    def __enter__(self) -> Span[TSpanData]:
        pass

    @abc.abstractmethod
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        pass

    @property
    @abc.abstractmethod
    def parent_id(self) -> str | None:
        """ID of the parent span, if any.

        Returns:
            str | None: The parent span's ID, or None if this is a root span.
        """

    @abc.abstractmethod
    def set_error(self, error: SpanError) -> None:
        pass

    @property
    @abc.abstractmethod
    def error(self) -> SpanError | None:
        """Any error that occurred during span execution.

        Returns:
            SpanError | None: Error details if an error occurred, None otherwise.
        """

    @abc.abstractmethod
    def export(self) -> dict[str, Any] | None:
        pass

    @property
    @abc.abstractmethod
    def started_at(self) -> str | None:
        """When the span started execution.

        Returns:
            str | None: ISO format timestamp of span start, None if not started.
        """

    @property
    @abc.abstractmethod
    def ended_at(self) -> str | None:
        """When the span finished execution.

        Returns:
            str | None: ISO format timestamp of span end, None if not finished.
        """


class NoOpSpan(Span[TSpanData]):
    """A no-op implementation of Span that doesn't record any data.

    Used when tracing is disabled but span operations still need to work.

    Args:
        span_data: The operation-specific data for this span.
    """

    __slots__ = ("_span_data", "_prev_span_token")

    def __init__(self, span_data: TSpanData):
        self._span_data = span_data
        self._prev_span_token: contextvars.Token[Span[TSpanData] | None] | None = None

    @property
    def trace_id(self) -> str:
        return "no-op"

    @property
    def span_id(self) -> str:
        return "no-op"

    @property
    def span_data(self) -> TSpanData:
        return self._span_data

    @property
    def parent_id(self) -> str | None:
        return None

    def start(self, mark_as_current: bool = False) -> None:
        if mark_as_current:
            self._prev_span_token = Scope.set_current_span(self)

    def finish(self, reset_current: bool = False) -> None:
        if reset_current and self._prev_span_token is not None:
            Scope.reset_current_span(self._prev_span_token)
            self._prev_span_token = None

    def __enter__(self) -> Span[TSpanData]:
        self.start(mark_as_current=True)
        return self

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        reset_current = True
        if exc_type is GeneratorExit:
            reset_current = False

        self.finish(reset_current=reset_current)

    def set_error(self, error: SpanError) -> None:
        pass

    @property
    def error(self) -> SpanError | None:
        return None

    def export(self) -> dict[str, Any] | None:
        return None

    @property
    def started_at(self) -> str | None:
        return None

    @property
    def ended_at(self) -> str | None:
        return None


class SpanImpl(Span[TSpanData]):
    __slots__ = (
        "_trace_id",
        "_span_id",
        "_parent_id",
        "_started_at",
        "_ended_at",
        "_error",
        "_prev_span_token",
        "_processor",
        "_span_data",
    )

    def __init__(
        self,
        trace_id: str,
        span_id: str | None,
        parent_id: str | None,
        processor: TracingProcessor,
        span_data: TSpanData,
    ):
        self._trace_id = trace_id
        self._span_id = span_id or util.gen_span_id()
        self._parent_id = parent_id
        self._started_at: str | None = None
        self._ended_at: str | None = None
        self._processor = processor
        self._error: SpanError | None = None
        self._prev_span_token: contextvars.Token[Span[TSpanData] | None] | None = None
        self._span_data = span_data

    @property
    def trace_id(self) -> str:
        return self._trace_id

    @property
    def span_id(self) -> str:
        return self._span_id

    @property
    def span_data(self) -> TSpanData:
        return self._span_data

    @property
    def parent_id(self) -> str | None:
        return self._parent_id

    def start(self, mark_as_current: bool = False) -> None:
        if self.started_at is not None:
            return

        self._started_at = util.time_iso()
        self._processor.on_span_start(self)
        if mark_as_current:
            self._prev_span_token = Scope.set_current_span(self)

    def finish(self, reset_current: bool = False) -> None:
        if self.ended_at is not None:
            return

        self._ended_at = util.time_iso()
        self._processor.on_span_end(self)
        if reset_current and self._prev_span_token is not None:
            Scope.reset_current_span(self._prev_span_token)
            self._prev_span_token = None

    def __enter__(self) -> Span[TSpanData]:
        self.start(mark_as_current=True)
        return self

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        reset_current = True
        if exc_type is GeneratorExit:
            reset_current = False

        self.finish(reset_current=reset_current)

    def set_error(self, error: SpanError) -> None:
        self._error = error

    @property
    def error(self) -> SpanError | None:
        return self._error

    @property
    def started_at(self) -> str | None:
        return self._started_at

    @property
    def ended_at(self) -> str | None:
        return self._ended_at

    def export(self) -> dict[str, Any] | None:
        return {
            "object": "trace.span",
            "id": self.span_id,
            "trace_id": self.trace_id,
            "parent_id": self._parent_id,
            "started_at": self._started_at,
            "ended_at": self._ended_at,
            "span_data": self.span_data.export(),
            "error": self._error,
        }


================================================
FILE: backend/onyx/tracing/framework/traces.py
================================================
from __future__ import annotations

import abc
import contextvars
from types import TracebackType
from typing import Any
from typing import TYPE_CHECKING

from . import util
from .scope import Scope

if TYPE_CHECKING:
    from .processor_interface import TracingProcessor


class Trace(abc.ABC):
    """A complete end-to-end workflow containing related spans and metadata.

    A trace represents a logical workflow or operation (e.g., "Customer Service Query"
    or "Code Generation") and contains all the spans (individual operations) that occur
    during that workflow.

    Example:
        ```python
        # Basic trace usage
        with trace("Order Processing") as t:
            validation_result = await Runner.run(validator, order_data)
            if validation_result.approved:
                await Runner.run(processor, order_data)

        # Trace with metadata and grouping
        with trace(
            "Customer Service",
            group_id="chat_123",
            metadata={"customer": "user_456"}
        ) as t:
            result = await Runner.run(support_agent, query)
        ```

    Notes:
        - Use descriptive workflow names
        - Group related traces with consistent group_ids
        - Add relevant metadata for filtering/analysis
        - Use context managers for reliable cleanup
        - Consider privacy when adding trace data
    """

    @abc.abstractmethod
    def __enter__(self) -> Trace:
        pass

    @abc.abstractmethod
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        pass

    @abc.abstractmethod
    def start(self, mark_as_current: bool = False) -> None:
        """Start the trace and optionally mark it as the current trace.

        Args:
            mark_as_current: If true, marks this trace as the current trace
                in the execution context.

        Notes:
            - Must be called before any spans can be added
            - Only one trace can be current at a time
            - Thread-safe when using mark_as_current
        """

    @abc.abstractmethod
    def finish(self, reset_current: bool = False) -> None:
        """Finish the trace and optionally reset the current trace.

        Args:
            reset_current: If true, resets the current trace to the previous
                trace in the execution context.

        Notes:
            - Must be called to complete the trace
            - Finalizes all open spans
            - Thread-safe when using reset_current
        """

    @property
    @abc.abstractmethod
    def trace_id(self) -> str:
        """Get the unique identifier for this trace.

        Returns:
            str: The trace's unique ID in the format 'trace_<32_alphanumeric>'

        Notes:
            - IDs are globally unique
            - Used to link spans to their parent trace
            - Can be used to look up traces in the dashboard
        """

    @property
    @abc.abstractmethod
    def name(self) -> str:
        """Get the human-readable name of this workflow trace.

        Returns:
            str: The workflow name (e.g., "Customer Service", "Data Processing")

        Notes:
            - Should be descriptive and meaningful
            - Used for grouping and filtering in the dashboard
            - Helps identify the purpose of the trace
        """

    @abc.abstractmethod
    def export(self) -> dict[str, Any] | None:
        """Export the trace data as a serializable dictionary.

        Returns:
            dict | None: Dictionary containing trace data, or None if tracing is disabled.

        Notes:
            - Includes all spans and their data
            - Used for sending traces to backends
            - May include metadata and group ID
        """


class NoOpTrace(Trace):
    """A no-op implementation of Trace that doesn't record any data.

    Used when tracing is disabled but trace operations still need to work.
    Maintains proper context management but doesn't store or export any data.

    Example:
        ```python
        # When tracing is disabled, traces become NoOpTrace
        with trace("Disabled Workflow") as t:
            # Operations still work but nothing is recorded
            await Runner.run(agent, "query")
        ```
    """

    def __init__(self) -> None:
        self._started = False
        self._prev_context_token: contextvars.Token[Trace | None] | None = None

    def __enter__(self) -> Trace:
        if self._started:
            return self

        self._started = True
        self.start(mark_as_current=True)

        return self

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        self.finish(reset_current=True)

    def start(self, mark_as_current: bool = False) -> None:
        if mark_as_current:
            self._prev_context_token = Scope.set_current_trace(self)

    def finish(self, reset_current: bool = False) -> None:
        if reset_current and self._prev_context_token is not None:
            Scope.reset_current_trace(self._prev_context_token)
            self._prev_context_token = None

    @property
    def trace_id(self) -> str:
        """The trace's unique identifier.

        Returns:
            str: A unique ID for this trace.
        """
        return "no-op"

    @property
    def name(self) -> str:
        """The workflow name for this trace.

        Returns:
            str: Human-readable name describing this workflow.
        """
        return "no-op"

    def export(self) -> dict[str, Any] | None:
        """Export the trace data as a dictionary.

        Returns:
            dict | None: Trace data in exportable format, or None if no data.
        """
        return None


NO_OP_TRACE = NoOpTrace()


class TraceImpl(Trace):
    """
    A trace that will be recorded by the tracing library.
    """

    __slots__ = (
        "_name",
        "_trace_id",
        "group_id",
        "metadata",
        "_prev_context_token",
        "_processor",
        "_started",
    )

    def __init__(
        self,
        name: str,
        trace_id: str | None,
        group_id: str | None,
        metadata: dict[str, Any] | None,
        processor: TracingProcessor,
    ):
        self._name = name
        self._trace_id = trace_id or util.gen_trace_id()
        self.group_id = group_id
        self.metadata = metadata
        self._prev_context_token: contextvars.Token[Trace | None] | None = None
        self._processor = processor
        self._started = False

    @property
    def trace_id(self) -> str:
        return self._trace_id

    @property
    def name(self) -> str:
        return self._name

    def start(self, mark_as_current: bool = False) -> None:
        if self._started:
            return

        self._started = True
        self._processor.on_trace_start(self)

        if mark_as_current:
            self._prev_context_token = Scope.set_current_trace(self)

    def finish(self, reset_current: bool = False) -> None:
        if not self._started:
            return

        self._processor.on_trace_end(self)

        if reset_current and self._prev_context_token is not None:
            Scope.reset_current_trace(self._prev_context_token)
            self._prev_context_token = None

    def __enter__(self) -> Trace:
        if self._started:
            return self

        self.start(mark_as_current=True)
        return self

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        self.finish(reset_current=exc_type is not GeneratorExit)

    def export(self) -> dict[str, Any] | None:
        return {
            "object": "trace",
            "id": self.trace_id,
            "workflow_name": self.name,
            "metadata": self.metadata,
        }


================================================
FILE: backend/onyx/tracing/framework/util.py
================================================
import uuid
from datetime import datetime
from datetime import timezone


def time_iso() -> str:
    """Return the current time in ISO 8601 format."""
    return datetime.now(timezone.utc).isoformat()


def gen_trace_id() -> str:
    """Generate a new trace ID."""
    return f"trace_{uuid.uuid4().hex}"


def gen_span_id() -> str:
    """Generate a new span ID."""
    return f"span_{uuid.uuid4().hex[:24]}"


================================================
FILE: backend/onyx/tracing/langfuse_tracing_processor.py
================================================
"""Langfuse tracing processor using the native Langfuse SDK."""

from __future__ import annotations

import logging
import threading
from datetime import datetime
from typing import Any
from typing import Optional
from typing import Union

from langfuse import Langfuse
from langfuse._client.span import LangfuseObservationWrapper

from onyx.tracing.framework.processor_interface import TracingProcessor
from onyx.tracing.framework.span_data import AgentSpanData
from onyx.tracing.framework.span_data import FunctionSpanData
from onyx.tracing.framework.span_data import GenerationSpanData
from onyx.tracing.framework.span_data import SpanData
from onyx.tracing.framework.spans import Span
from onyx.tracing.framework.traces import Trace

logger = logging.getLogger(__name__)


def _timestamp_from_maybe_iso(timestamp: Optional[str]) -> Optional[datetime]:
    """Convert ISO timestamp string to datetime."""
    if timestamp is None:
        return None
    try:
        return datetime.fromisoformat(timestamp)
    except ValueError:
        return None


class LangfuseTracingProcessor(TracingProcessor):
    """TracingProcessor that logs traces to Langfuse using the native SDK.

    Args:
        client: A Langfuse client instance. If None, uses get_client().
        enable_masking: Whether to mask sensitive data before sending.
    """

    def __init__(
        self,
        client: Optional[Langfuse] = None,
        enable_masking: bool = True,
    ) -> None:
        self._client: Optional[Langfuse] = client
        self._enable_masking = enable_masking
        self._lock = threading.Lock()  # Protects all dict access
        self._spans: dict[str, LangfuseObservationWrapper] = {}
        self._trace_spans: dict[str, LangfuseObservationWrapper] = (
            {}
        )  # Root spans for traces
        self._first_input: dict[str, Any] = {}
        self._last_output: dict[str, Any] = {}
        self._trace_metadata: dict[str, dict[str, Any]] = {}
        # Langfuse IDs for thread-safe parent linking via trace_context
        self._langfuse_trace_ids: dict[str, str] = (
            {}
        )  # framework_trace_id -> langfuse_trace_id
        self._langfuse_span_ids: dict[str, str] = (
            {}
        )  # framework_span_id -> langfuse_span.id

    def _get_client(self) -> Langfuse:
        """Get or create Langfuse client."""
        if self._client is None:
            from langfuse import get_client

            self._client = get_client()
        return self._client

    def _mask_if_enabled(self, data: Any) -> Any:
        """Apply masking to data if masking is enabled."""
        if not self._enable_masking:
            return data
        try:
            from onyx.tracing.masking import mask_sensitive_data

            return mask_sensitive_data(data)
        except Exception as e:
            logger.warning(f"Failed to mask data: {e}")
            return data

    def _calculate_cost(self, data: GenerationSpanData) -> Optional[float]:
        """Calculate LLM cost for this generation span."""
        try:
            from onyx.llm.cost import calculate_llm_cost_cents

            usage = data.usage or {}
            prompt_tokens = usage.get("prompt_tokens") or usage.get("input_tokens") or 0
            completion_tokens = (
                usage.get("completion_tokens") or usage.get("output_tokens") or 0
            )

            if data.model and prompt_tokens and completion_tokens:
                cost_cents = calculate_llm_cost_cents(
                    model_name=data.model,
                    prompt_tokens=int(prompt_tokens),
                    completion_tokens=int(completion_tokens),
                )
                if cost_cents > 0:
                    # Convert cents to dollars for Langfuse
                    return cost_cents / 100.0
        except Exception as e:
            logger.debug(f"Failed to calculate cost: {e}")
        return None

    def on_trace_start(self, trace: Trace) -> None:
        """Called when a trace is started."""
        try:
            client = self._get_client()
            trace_meta = trace.export() or {}
            metadata = trace_meta.get("metadata") or {}

            # Create a root span which implicitly creates a Langfuse trace
            # The span name becomes the trace name in Langfuse UI
            # In Langfuse SDK v3, use start_observation instead of start_span
            langfuse_span = client.start_observation(
                name=trace.name,
            )

            # Always update the trace-level properties to set the trace name
            # session_id is optional but name should always be set
            session_id = metadata.get("chat_session_id")
            langfuse_span.update_trace(
                name=trace.name,
                session_id=session_id if session_id else None,
                metadata=metadata if metadata else None,
            )

            with self._lock:
                if metadata:
                    self._trace_metadata[trace.trace_id] = metadata
                self._trace_spans[trace.trace_id] = langfuse_span
                # Store Langfuse IDs for thread-safe parent linking
                self._langfuse_trace_ids[trace.trace_id] = langfuse_span.trace_id
                # Use trace_id as key for root span's ID (children with no parent_id will use this)
                self._langfuse_span_ids[trace.trace_id] = langfuse_span.id
        except Exception as e:
            logger.error(f"Error starting Langfuse trace: {e}")

    def on_trace_end(self, trace: Trace) -> None:
        """Called when a trace is finished."""
        try:
            with self._lock:
                langfuse_span = self._trace_spans.pop(trace.trace_id, None)
                self._trace_metadata.pop(trace.trace_id, None)
                self._langfuse_trace_ids.pop(trace.trace_id, None)  # Clean up trace ID
                self._langfuse_span_ids.pop(
                    trace.trace_id, None
                )  # Clean up root span ID
                trace_first_input = self._first_input.pop(trace.trace_id, None)
                trace_last_output = self._last_output.pop(trace.trace_id, None)

            if langfuse_span:
                # Update the root span with input/output and end it
                langfuse_span.update(
                    input=self._mask_if_enabled(trace_first_input),
                    output=self._mask_if_enabled(trace_last_output),
                )
                langfuse_span.end()
        except Exception as e:
            logger.error(f"Error ending Langfuse trace: {e}")

    def on_span_start(self, span: Span[SpanData]) -> None:
        """Called when a span is started.

        Uses trace_context parameter for thread-safe parent linking instead of
        calling methods on parent span objects. This is necessary because research
        agents run in parallel threads, and calling methods on span objects created
        in other threads can cause OpenTelemetry context issues.
        """
        try:
            data = span.span_data
            # Declare as Any since different code paths return different observation types
            langfuse_span: Any = None

            # Get Langfuse IDs and metadata under lock for thread-safe access
            with self._lock:
                trace_metadata = self._trace_metadata.get(span.trace_id)
                langfuse_trace_id = self._langfuse_trace_ids.get(span.trace_id)
                # Get parent's Langfuse span ID
                if span.parent_id is not None:
                    parent_langfuse_id = self._langfuse_span_ids.get(span.parent_id)
                else:
                    # Parent is the root trace span (use trace_id as key)
                    parent_langfuse_id = self._langfuse_span_ids.get(span.trace_id)

            # If no trace ID found, we can't create a properly linked span
            if langfuse_trace_id is None:
                logger.warning(
                    f"No Langfuse trace ID found for span {span.span_id}, creating orphan"
                )
                # Fall back to creating an orphan span
                # In Langfuse SDK v3, use start_observation instead of start_span
                client = self._get_client()
                langfuse_span = client.start_observation(
                    name=data.type if hasattr(data, "type") else "unknown",
                )
                with self._lock:
                    self._spans[span.span_id] = langfuse_span
                    self._langfuse_span_ids[span.span_id] = langfuse_span.id
                return

            client = self._get_client()

            # Build trace_context for thread-safe parent linking
            # This uses immutable string IDs instead of mutable span objects
            # Type is Any to satisfy SDK's TraceContext type while passing a dict
            trace_context: Any = {"trace_id": langfuse_trace_id}
            if parent_langfuse_id:
                trace_context["parent_span_id"] = parent_langfuse_id

            # Create spans using trace_context (thread-safe ID-based approach)
            # In Langfuse SDK v3, use start_observation with as_type parameter
            if isinstance(data, GenerationSpanData):
                langfuse_span = client.start_observation(  # type: ignore[call-overload]
                    trace_context=trace_context,
                    name=self._get_generation_name(data),
                    as_type="generation",
                    metadata=trace_metadata,
                    model=data.model,
                    model_parameters=self._get_model_parameters(data),
                )
            elif isinstance(data, FunctionSpanData):
                langfuse_span = client.start_observation(
                    trace_context=trace_context,
                    name=data.name,
                    as_type="tool",
                    metadata=trace_metadata,
                )
            elif isinstance(data, AgentSpanData):
                langfuse_span = client.start_observation(
                    trace_context=trace_context,
                    name=data.name,
                    as_type="agent",
                    metadata={
                        **(trace_metadata or {}),
                        "tools": data.tools,
                        "handoffs": data.handoffs,
                        "output_type": data.output_type,
                    },
                )
            else:
                langfuse_span = client.start_observation(
                    trace_context=trace_context,
                    name=data.type if hasattr(data, "type") else "unknown",
                    as_type="span",
                    metadata=trace_metadata,
                )

            with self._lock:
                self._spans[span.span_id] = langfuse_span
                # Store Langfuse span ID for future children to reference
                self._langfuse_span_ids[span.span_id] = langfuse_span.id
        except Exception as e:
            logger.error(f"Error starting Langfuse span: {e}")

    def on_span_end(self, span: Span[SpanData]) -> None:
        """Called when a span is finished."""
        try:
            with self._lock:
                langfuse_span = self._spans.pop(span.span_id, None)
                self._langfuse_span_ids.pop(span.span_id, None)  # Clean up ID mapping

            if not langfuse_span:
                return

            data = span.span_data
            input_data: Optional[Any] = None
            output_data: Optional[Any] = None

            if isinstance(data, GenerationSpanData):
                input_data = data.input
                output_data = data.output
                usage = self._get_usage_details(data)
                cost = self._calculate_cost(data)

                update_kwargs: dict[str, Any] = {
                    "input": self._mask_if_enabled(input_data),
                    "output": self._mask_if_enabled(output_data),
                }
                if usage:
                    update_kwargs["usage_details"] = usage
                if cost is not None:
                    update_kwargs["cost_details"] = {"total": cost}
                if data.reasoning:
                    update_kwargs["metadata"] = {"reasoning": data.reasoning}
                if data.time_to_first_action_seconds is not None:
                    update_kwargs["completion_start_time"] = _timestamp_from_maybe_iso(
                        span.started_at
                    )

                langfuse_span.update(**update_kwargs)

            elif isinstance(data, FunctionSpanData):
                input_data = data.input
                output_data = data.output
                langfuse_span.update(
                    input=self._mask_if_enabled(input_data),
                    output=self._mask_if_enabled(output_data),
                )

            elif isinstance(data, AgentSpanData):
                # Agent spans don't have direct input/output
                pass

            # Handle errors
            if span.error:
                langfuse_span.update(
                    level="ERROR",
                    status_message=f"{span.error.get('message')}: {span.error.get('data')}",
                )

            langfuse_span.end()

            # Store first input and last output per trace_id
            trace_id = span.trace_id
            with self._lock:
                if trace_id not in self._first_input and input_data is not None:
                    self._first_input[trace_id] = input_data

                if output_data is not None:
                    self._last_output[trace_id] = output_data

        except Exception as e:
            logger.error(f"Error ending Langfuse span: {e}")

    def _get_generation_name(self, data: GenerationSpanData) -> str:
        """Get a descriptive name for a generation span."""
        if data.model:
            return f"Generation with {data.model}"
        return "Generation"

    def _get_model_parameters(
        self, data: GenerationSpanData
    ) -> Optional[dict[str, Union[str, int, bool, None]]]:
        """Extract model parameters from generation span data."""
        if not isinstance(data.model_config, dict):
            return None

        params: dict[str, Union[str, int, bool, None]] = {}
        for key in [
            "temperature",
            "max_tokens",
            "top_p",
            "frequency_penalty",
            "presence_penalty",
        ]:
            if key in data.model_config:
                params[key] = data.model_config[key]
        return params if params else None

    def _get_usage_details(self, data: GenerationSpanData) -> Optional[dict[str, int]]:
        """Extract usage details from generation span data."""
        usage = data.usage or {}
        details: dict[str, int] = {}

        prompt_tokens = usage.get("prompt_tokens") or usage.get("input_tokens")
        if prompt_tokens is not None:
            details["input"] = int(prompt_tokens)

        completion_tokens = usage.get("completion_tokens") or usage.get("output_tokens")
        if completion_tokens is not None:
            details["output"] = int(completion_tokens)

        if "total_tokens" in usage:
            details["total"] = int(usage["total_tokens"])
        elif details.get("input") and details.get("output"):
            details["total"] = details["input"] + details["output"]

        # Cache-related tokens
        if "cache_read_input_tokens" in usage:
            details["cache_read_input_tokens"] = int(usage["cache_read_input_tokens"])
        if "cache_creation_input_tokens" in usage:
            details["cache_creation_input_tokens"] = int(
                usage["cache_creation_input_tokens"]
            )

        return details if details else None

    def force_flush(self) -> None:
        """Forces an immediate flush of all queued spans/traces."""
        try:
            client = self._get_client()
            if client:
                client.flush()
        except Exception as e:
            logger.warning(f"Failed to flush Langfuse client: {e}")

    def shutdown(self) -> None:
        """Called when the application stops."""
        try:
            self.force_flush()
            client = self._get_client()
            if client:
                client.shutdown()
        except Exception as e:
            logger.warning(f"Failed to shutdown Langfuse client: {e}")


================================================
FILE: backend/onyx/tracing/llm_utils.py
================================================
from __future__ import annotations

from collections.abc import Iterator
from collections.abc import Mapping
from collections.abc import Sequence
from contextlib import contextmanager
from typing import Any
from typing import cast

from onyx.llm.interfaces import LLM
from onyx.llm.model_response import ModelResponse
from onyx.llm.models import ToolCall
from onyx.tracing.framework.create import generation_span
from onyx.tracing.framework.span_data import GenerationSpanData
from onyx.tracing.framework.spans import Span


def build_llm_model_config(llm: LLM, flow: str | None = None) -> dict[str, str]:
    model_config: dict[str, str] = {
        "base_url": str(llm.config.api_base or ""),
        "model_provider": llm.config.model_provider,
    }
    if flow:
        model_config["flow"] = flow
    return model_config


@contextmanager
def llm_generation_span(
    llm: LLM,
    flow: str | None,
    input_messages: Sequence[Any] | Any | None = None,
    parent: Any | None = None,
) -> Iterator[Span[GenerationSpanData]]:
    with generation_span(
        model=llm.config.model_name,
        model_config=build_llm_model_config(llm, flow),
        parent=parent,
    ) as span:
        if input_messages is not None:
            if isinstance(input_messages, Sequence) and not isinstance(
                input_messages, (str, bytes)
            ):
                normalized_messages = input_messages
            else:
                normalized_messages = [input_messages]
            span.span_data.input = cast(
                Sequence[Mapping[str, Any]], normalized_messages
            )
        yield span


def record_llm_response(
    span: Span[GenerationSpanData],
    response: ModelResponse,
) -> None:
    """Standard way to record a complete LLM response to a generation span.

    Extracts content, reasoning, tool_calls, and usage automatically from the
    ModelResponse object.

    Args:
        span: The generation span to record to.
        response: The ModelResponse from the LLM.
    """
    message = response.choice.message

    # Build output dict matching AssistantMessage format
    output_dict: dict[str, Any] = {"role": "assistant"}

    if message.content is not None:
        output_dict["content"] = message.content

    if message.tool_calls:
        output_dict["tool_calls"] = [tc.model_dump() for tc in message.tool_calls]

    span.span_data.output = [output_dict]

    # Record reasoning (extended thinking from reasoning models)
    if message.reasoning_content:
        span.span_data.reasoning = message.reasoning_content

    # Record usage
    if response.usage:
        usage_dict = _build_usage_dict(response.usage)
        if usage_dict:
            span.span_data.usage = usage_dict


def record_llm_span_output(
    span: Span[GenerationSpanData],
    output: str | Sequence[Mapping[str, Any]] | None,
    usage: Any | None = None,
    reasoning: str | None = None,
    tool_calls: list[ToolCall] | None = None,
) -> None:
    """Record LLM output to a generation span for streaming scenarios.

    This function is useful for streaming where content, reasoning, tool_calls,
    and usage are accumulated separately.

    Args:
        span: The generation span to record to.
        output: The text output or list of message dicts.
        usage: Optional usage information.
        reasoning: Optional reasoning/extended thinking content.
        tool_calls: Optional list of tool calls.
    """
    if output is None:
        output_dict: dict[str, Any] = {"role": "assistant", "content": None}
        if tool_calls:
            output_dict["tool_calls"] = [tc.model_dump() for tc in tool_calls]
        span.span_data.output = [output_dict]
    elif isinstance(output, str):
        output_dict = {"role": "assistant", "content": output}
        if tool_calls:
            output_dict["tool_calls"] = [tc.model_dump() for tc in tool_calls]
        span.span_data.output = [output_dict]
    else:
        span.span_data.output = cast(Sequence[Mapping[str, Any]], output)

    usage_dict = _build_usage_dict(usage)
    if usage_dict:
        span.span_data.usage = usage_dict

    if reasoning:
        span.span_data.reasoning = reasoning


def _build_usage_dict(usage: Any | None) -> dict[str, Any] | None:
    if not usage:
        return None
    if isinstance(usage, dict):
        return usage

    usage_dict: dict[str, Any] = {}
    prompt_tokens = getattr(usage, "prompt_tokens", None)
    completion_tokens = getattr(usage, "completion_tokens", None)
    input_tokens = getattr(usage, "input_tokens", None)
    output_tokens = getattr(usage, "output_tokens", None)
    total_tokens = getattr(usage, "total_tokens", None)
    cache_read_input_tokens = getattr(usage, "cache_read_input_tokens", None)
    cache_creation_input_tokens = getattr(usage, "cache_creation_input_tokens", None)

    if prompt_tokens is not None:
        usage_dict["input_tokens"] = prompt_tokens
    elif input_tokens is not None:
        usage_dict["input_tokens"] = input_tokens
    if completion_tokens is not None:
        usage_dict["output_tokens"] = completion_tokens
    elif output_tokens is not None:
        usage_dict["output_tokens"] = output_tokens
    if total_tokens is not None:
        usage_dict["total_tokens"] = total_tokens
    if cache_read_input_tokens is not None:
        usage_dict["cache_read_input_tokens"] = cache_read_input_tokens
    if cache_creation_input_tokens is not None:
        usage_dict["cache_creation_input_tokens"] = cache_creation_input_tokens

    return usage_dict or None


================================================
FILE: backend/onyx/tracing/masking.py
================================================
"""Shared data masking utilities for tracing processors."""

import os
import re
from typing import Any

# Set loosely because some tool call results may be very long.
# Ideally we don't pass those to the LLM but it's fine if we want to trace them in full.
MASKING_LENGTH = int(os.environ.get("TRACING_MASKING_LENGTH", "500000"))


def _truncate_str(s: str) -> str:
    """Truncate a string that exceeds MASKING_LENGTH."""
    tail = MASKING_LENGTH // 5
    head = MASKING_LENGTH - tail
    # Handle edge case where tail is 0 (when MASKING_LENGTH < 5)
    # s[-0:] returns the entire string, so we must check explicitly
    tail_part = s[-tail:] if tail > 0 else ""
    return f"{s[:head]}...{tail_part}[TRUNCATED {len(s)} chars to {MASKING_LENGTH}]"


def mask_sensitive_data(data: Any) -> Any:
    """Mask data if it exceeds the maximum length threshold or contains sensitive information.

    Handles:
    - Dictionaries: recursively masks values, redacts keys containing 'private_key' or 'authorization'
    - Lists: recursively masks each item
    - Strings: redacts private_key patterns, Authorization Bearer tokens, truncates long strings
    - Other types: truncates if string representation exceeds threshold
    """
    # Handle dictionaries recursively
    if isinstance(data, dict):
        masked_dict = {}
        for key, value in data.items():
            # Mask private keys and authorization headers
            if isinstance(key, str) and (
                "private_key" in key.lower() or "authorization" in key.lower()
            ):
                masked_dict[key] = "***REDACTED***"
            else:
                masked_dict[key] = mask_sensitive_data(value)
        return masked_dict

    # Handle lists recursively
    if isinstance(data, list):
        return [mask_sensitive_data(item) for item in data]

    # Handle strings
    if isinstance(data, str):
        # Mask private_key patterns
        if "private_key" in data.lower():
            return "***REDACTED***"

        # Mask Authorization: Bearer tokens
        # Pattern matches "Authorization: Bearer <token>" or "authorization: bearer <token>"
        if re.search(r"authorization:\s*bearer\s+\S+", data, re.IGNORECASE):
            data = re.sub(
                r"(authorization:\s*bearer\s+)\S+",
                r"\1***REDACTED***",
                data,
                flags=re.IGNORECASE,
            )

        if len(data) <= MASKING_LENGTH:
            return data
        return _truncate_str(data)

    # For other types, check length
    if len(str(data)) <= MASKING_LENGTH:
        return data
    return _truncate_str(str(data))


================================================
FILE: backend/onyx/tracing/setup.py
================================================
"""Unified tracing setup for all providers (Braintrust, Langfuse, etc.)."""

from onyx.configs.app_configs import BRAINTRUST_API_KEY
from onyx.configs.app_configs import BRAINTRUST_PROJECT
from onyx.configs.app_configs import LANGFUSE_HOST
from onyx.configs.app_configs import LANGFUSE_PUBLIC_KEY
from onyx.configs.app_configs import LANGFUSE_SECRET_KEY
from onyx.utils.logger import setup_logger

logger = setup_logger()

_initialized = False


def setup_tracing() -> list[str]:
    """Initialize all configured tracing providers.

    Returns a list of provider names that were successfully initialized.
    Uses add_trace_processor() to ADD processors rather than replacing them,
    allowing multiple providers to receive trace events simultaneously.

    This function is idempotent - calling it multiple times will only
    initialize providers once.
    """
    global _initialized
    if _initialized:
        logger.debug("Tracing already initialized, skipping")
        return []

    initialized_providers: list[str] = []

    # Setup Braintrust if configured
    if BRAINTRUST_API_KEY:
        try:
            _setup_braintrust()
            initialized_providers.append("braintrust")
        except Exception as e:
            logger.error(f"Failed to initialize Braintrust tracing: {e}")
    else:
        logger.info("Braintrust API key not provided, skipping Braintrust setup")

    # Setup Langfuse if configured
    if LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY:
        try:
            _setup_langfuse()
            initialized_providers.append("langfuse")
        except Exception as e:
            logger.error(f"Failed to initialize Langfuse tracing: {e}")
    else:
        logger.info("Langfuse credentials not provided, skipping Langfuse setup")

    _initialized = True

    if initialized_providers:
        logger.notice(
            f"Tracing initialized with providers: {', '.join(initialized_providers)}"
        )
    else:
        logger.info("No tracing providers configured")

    return initialized_providers


def _setup_braintrust() -> None:
    """Initialize Braintrust tracing."""
    import braintrust

    from onyx.tracing.braintrust_tracing_processor import BraintrustTracingProcessor
    from onyx.tracing.framework import add_trace_processor
    from onyx.tracing.masking import mask_sensitive_data

    braintrust_logger = braintrust.init_logger(
        project=BRAINTRUST_PROJECT,
        api_key=BRAINTRUST_API_KEY,
    )
    braintrust.set_masking_function(mask_sensitive_data)
    add_trace_processor(BraintrustTracingProcessor(braintrust_logger))


def _setup_langfuse() -> None:
    """Initialize Langfuse tracing using the native Langfuse SDK."""
    import os

    from langfuse import Langfuse

    from onyx.tracing.framework import add_trace_processor
    from onyx.tracing.langfuse_tracing_processor import LangfuseTracingProcessor

    # Set LANGFUSE_HOST env var if configured (Langfuse SDK reads this automatically)
    if LANGFUSE_HOST:
        os.environ["LANGFUSE_HOST"] = LANGFUSE_HOST

    # Initialize Langfuse client with credentials
    client = Langfuse(
        public_key=LANGFUSE_PUBLIC_KEY,
        secret_key=LANGFUSE_SECRET_KEY,
        host=LANGFUSE_HOST if LANGFUSE_HOST else None,
    )

    add_trace_processor(LangfuseTracingProcessor(client=client))


================================================
FILE: backend/onyx/utils/__init__.py
================================================


================================================
FILE: backend/onyx/utils/b64.py
================================================
import base64


def get_image_type_from_bytes(raw_b64_bytes: bytes) -> str:
    magic_number = raw_b64_bytes[:4]

    if magic_number.startswith(b"\x89PNG"):
        mime_type = "image/png"
    elif magic_number.startswith(b"\xff\xd8"):
        mime_type = "image/jpeg"
    elif magic_number.startswith(b"GIF8"):
        mime_type = "image/gif"
    elif magic_number.startswith(b"RIFF") and raw_b64_bytes[8:12] == b"WEBP":
        mime_type = "image/webp"
    else:
        raise ValueError(
            "Unsupported image format - only PNG, JPEG, GIF, and WEBP are supported."
        )

    return mime_type


def get_image_type(raw_b64_string: str) -> str:
    binary_data = base64.b64decode(raw_b64_string)
    return get_image_type_from_bytes(binary_data)


================================================
FILE: backend/onyx/utils/batching.py
================================================
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterable
from itertools import islice
from typing import TypeVar

T = TypeVar("T")


def batch_generator(
    items: Iterable[T],
    batch_size: int,
    pre_batch_yield: Callable[[list[T]], None] | None = None,
) -> Generator[list[T], None, None]:
    """Yields batches of items from an iterable.

    Optionally invokes a callback before yielding each batch.
    """
    iterator = iter(items)
    while True:
        batch = list(islice(iterator, batch_size))
        if not batch:
            return

        if pre_batch_yield:
            pre_batch_yield(batch)
        yield batch


================================================
FILE: backend/onyx/utils/callbacks.py
================================================
from typing import Generic
from typing import TypeVar

T = TypeVar("T")


class MetricsHander(Generic[T]):
    def __init__(self) -> None:
        self.metrics: T | None = None

    def record_metric(self, metrics: T) -> None:
        self.metrics = metrics


================================================
FILE: backend/onyx/utils/encryption.py
================================================
from typing import Any

from onyx.configs.app_configs import ENCRYPTION_KEY_SECRET
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_AUTHENTICATION_METHOD,
)
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation

logger = setup_logger()


# IMPORTANT DO NOT DELETE, THIS IS USED BY fetch_versioned_implementation
def _encrypt_string(input_str: str, key: str | None = None) -> bytes:
    if ENCRYPTION_KEY_SECRET:
        logger.warning("MIT version of Onyx does not support encryption of secrets.")
    elif key is not None:
        logger.debug("MIT encrypt called with explicit key — key ignored.")
    return input_str.encode()


# IMPORTANT DO NOT DELETE, THIS IS USED BY fetch_versioned_implementation
def _decrypt_bytes(input_bytes: bytes, key: str | None = None) -> str:
    if ENCRYPTION_KEY_SECRET:
        logger.warning("MIT version of Onyx does not support decryption of secrets.")
    elif key is not None:
        logger.debug("MIT decrypt called with explicit key — key ignored.")
    return input_bytes.decode()


def mask_string(sensitive_str: str) -> str:
    """Masks a sensitive string, showing first and last few characters.
    If the string is too short to safely mask, returns a fully masked placeholder.
    """
    visible_start = 4
    visible_end = 4
    min_masked_chars = 6

    if len(sensitive_str) < visible_start + visible_end + min_masked_chars:
        return "••••••••••••"

    return f"{sensitive_str[:visible_start]}...{sensitive_str[-visible_end:]}"


MASK_CREDENTIALS_WHITELIST = {
    DB_CREDENTIALS_AUTHENTICATION_METHOD,
    "wiki_base",
    "cloud_name",
    "cloud_id",
}


def mask_credential_dict(credential_dict: dict[str, Any]) -> dict[str, Any]:
    masked_creds: dict[str, Any] = {}
    for key, val in credential_dict.items():
        if isinstance(val, str):
            # we want to pass the authentication_method field through so the frontend
            # can disambiguate credentials created by different methods
            if key in MASK_CREDENTIALS_WHITELIST:
                masked_creds[key] = val
            else:
                masked_creds[key] = mask_string(val)
        elif isinstance(val, dict):
            masked_creds[key] = mask_credential_dict(val)
        elif isinstance(val, list):
            masked_creds[key] = _mask_list(val)
        elif isinstance(val, (bool, type(None))):
            masked_creds[key] = val
        elif isinstance(val, (int, float)):
            masked_creds[key] = "*****"
        else:
            masked_creds[key] = "*****"

    return masked_creds


def _mask_list(items: list[Any]) -> list[Any]:
    masked: list[Any] = []
    for item in items:
        if isinstance(item, dict):
            masked.append(mask_credential_dict(item))
        elif isinstance(item, str):
            masked.append(mask_string(item))
        elif isinstance(item, list):
            masked.append(_mask_list(item))
        elif isinstance(item, (bool, type(None))):
            masked.append(item)
        else:
            masked.append("*****")
    return masked


def encrypt_string_to_bytes(intput_str: str, key: str | None = None) -> bytes:
    versioned_encryption_fn = fetch_versioned_implementation(
        "onyx.utils.encryption", "_encrypt_string"
    )
    return versioned_encryption_fn(intput_str, key=key)


def decrypt_bytes_to_string(intput_bytes: bytes, key: str | None = None) -> str:
    versioned_decryption_fn = fetch_versioned_implementation(
        "onyx.utils.encryption", "_decrypt_bytes"
    )
    return versioned_decryption_fn(intput_bytes, key=key)


================================================
FILE: backend/onyx/utils/error_handling.py
================================================
"""
Standardized error handling utilities.
"""

from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from onyx.utils.logger import setup_logger

logger = setup_logger()


def handle_connector_error(e: Exception, context: str) -> None:
    """
    Standard error handling for connectors.

    Args:
        e: The exception that was raised
        context: A description of where the error occurred

    Raises:
        The original exception if CONTINUE_ON_CONNECTOR_FAILURE is False
    """
    logger.error(f"Error in {context}: {e}", exc_info=e)
    if not CONTINUE_ON_CONNECTOR_FAILURE:
        raise


================================================
FILE: backend/onyx/utils/errors.py
================================================
class EERequiredError(Exception):
    """This error is thrown if an Enterprise Edition feature or API is
    requested but the Enterprise Edition flag is not set."""


================================================
FILE: backend/onyx/utils/file.py
================================================
from typing import cast

import puremagic
from pydantic import BaseModel

from onyx.utils.logger import setup_logger

logger = setup_logger()


class FileWithMimeType(BaseModel):
    data: bytes
    mime_type: str


class OnyxStaticFileManager:
    """Retrieve static resources with this class. Currently, these should all be located
    in the static directory ... e.g. static/images/logo.png"""

    @staticmethod
    def get_static(filename: str) -> FileWithMimeType | None:
        try:
            mime_type: str = "application/octet-stream"
            with open(filename, "rb") as f:
                file_content = f.read()
                matches = puremagic.magic_string(file_content)
                if matches:
                    mime_type = cast(str, matches[0].mime_type)
        except (OSError, FileNotFoundError, PermissionError) as e:
            logger.error(f"Failed to read file {filename}: {e}")
            return None
        except Exception as e:
            logger.error(f"Unexpected exception reading file {filename}: {e}")
            return None

        return FileWithMimeType(data=file_content, mime_type=mime_type)


================================================
FILE: backend/onyx/utils/gpu_utils.py
================================================
import os
from functools import lru_cache

import requests
from retry import retry

from onyx.utils.logger import setup_logger
from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
from shared_configs.configs import INDEXING_MODEL_SERVER_PORT
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT

logger = setup_logger()


def _get_gpu_status_from_model_server(indexing: bool) -> bool:
    if os.environ.get("DISABLE_MODEL_SERVER", "").lower() == "true":
        logger.info("DISABLE_MODEL_SERVER is set, assuming no GPU available")
        return False
    if indexing:
        model_server_url = f"{INDEXING_MODEL_SERVER_HOST}:{INDEXING_MODEL_SERVER_PORT}"
    else:
        model_server_url = f"{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}"

    if "http" not in model_server_url:
        model_server_url = f"http://{model_server_url}"

    try:
        response = requests.get(f"{model_server_url}/api/gpu-status", timeout=10)
        response.raise_for_status()
        gpu_status = response.json()
        return gpu_status["gpu_available"]
    except requests.RequestException as e:
        logger.error(f"Error: Unable to fetch GPU status. Error: {str(e)}")
        raise  # Re-raise exception to trigger a retry


@retry(tries=5, delay=5)
def gpu_status_request(indexing: bool) -> bool:
    return _get_gpu_status_from_model_server(indexing)


@lru_cache(maxsize=1)
def fast_gpu_status_request(indexing: bool) -> bool:
    """For use in sync flows, where we don't want to retry / we want to cache this."""
    return gpu_status_request(indexing=indexing)


================================================
FILE: backend/onyx/utils/headers.py
================================================
from typing import TypedDict

from fastapi.datastructures import Headers

from onyx.configs.model_configs import LITELLM_EXTRA_HEADERS
from onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS
from onyx.configs.tool_configs import CUSTOM_TOOL_PASS_THROUGH_HEADERS
from onyx.utils.logger import setup_logger

logger = setup_logger()


class HeaderItemDict(TypedDict):
    key: str
    value: str


def clean_header_list(headers_to_clean: list[HeaderItemDict]) -> dict[str, str]:
    cleaned_headers: dict[str, str] = {}
    for item in headers_to_clean:
        key = item["key"]
        value = item["value"]
        if key in cleaned_headers:
            logger.warning(
                f"Duplicate header {key} found in custom headers, ignoring..."
            )
            continue
        cleaned_headers[key] = value
    return cleaned_headers


def header_dict_to_header_list(header_dict: dict[str, str]) -> list[HeaderItemDict]:
    return [{"key": key, "value": value} for key, value in header_dict.items()]


def header_list_to_header_dict(header_list: list[HeaderItemDict]) -> dict[str, str]:
    return {header["key"]: header["value"] for header in header_list}


def get_relevant_headers(
    headers: dict[str, str] | Headers, desired_headers: list[str] | None
) -> dict[str, str]:
    if not desired_headers:
        return {}

    pass_through_headers: dict[str, str] = {}
    for key in desired_headers:
        if key in headers:
            pass_through_headers[key] = headers[key]
        else:
            # fastapi makes all header keys lowercase, handling that here
            lowercase_key = key.lower()
            if lowercase_key in headers:
                pass_through_headers[lowercase_key] = headers[lowercase_key]

    return pass_through_headers


def get_litellm_additional_request_headers(
    headers: dict[str, str] | Headers,
) -> dict[str, str]:
    return get_relevant_headers(headers, LITELLM_PASS_THROUGH_HEADERS)


def build_llm_extra_headers(
    additional_headers: dict[str, str] | None = None,
) -> dict[str, str]:
    extra_headers: dict[str, str] = {}
    if additional_headers:
        extra_headers.update(additional_headers)
    if LITELLM_EXTRA_HEADERS:
        extra_headers.update(LITELLM_EXTRA_HEADERS)
    return extra_headers


def get_custom_tool_additional_request_headers(
    headers: dict[str, str] | Headers,
) -> dict[str, str]:
    return get_relevant_headers(headers, CUSTOM_TOOL_PASS_THROUGH_HEADERS)


================================================
FILE: backend/onyx/utils/jsonriver/__init__.py
================================================
"""
jsonriver - A streaming JSON parser for Python

Parse JSON incrementally as it streams in, e.g. from a network request or a language model.
Gives you a sequence of increasingly complete values.

Copyright (c) 2023 Google LLC (original TypeScript implementation)
Copyright (c) 2024 jsonriver-python contributors (Python port)
SPDX-License-Identifier: BSD-3-Clause
"""

from .parse import _Parser as Parser
from .parse import JsonObject
from .parse import JsonValue

__all__ = ["Parser", "JsonValue", "JsonObject"]
__version__ = "0.0.1"


================================================
FILE: backend/onyx/utils/jsonriver/parse.py
================================================
"""
JSON parser for streaming incremental parsing

Copyright (c) 2023 Google LLC (original TypeScript implementation)
Copyright (c) 2024 jsonriver-python contributors (Python port)
SPDX-License-Identifier: BSD-3-Clause
"""

from __future__ import annotations

import copy
from enum import IntEnum
from typing import cast
from typing import Union

from .tokenize import _Input
from .tokenize import json_token_type_to_string
from .tokenize import JsonTokenType
from .tokenize import Tokenizer


# Type definitions for JSON values
JsonValue = Union[None, bool, float, str, list["JsonValue"], dict[str, "JsonValue"]]
JsonObject = dict[str, JsonValue]


class _StateEnum(IntEnum):
    """Parser state machine states"""

    Initial = 0
    InString = 1
    InArray = 2
    InObjectExpectingKey = 3
    InObjectExpectingValue = 4


class _State:
    """Base class for parser states"""

    type: _StateEnum
    value: JsonValue | tuple[str, JsonObject] | None


class _InitialState(_State):
    """Initial state before any parsing"""

    def __init__(self) -> None:
        self.type = _StateEnum.Initial
        self.value = None


class _InStringState(_State):
    """State while parsing a string"""

    def __init__(self) -> None:
        self.type = _StateEnum.InString
        self.value = ""


class _InArrayState(_State):
    """State while parsing an array"""

    def __init__(self) -> None:
        self.type = _StateEnum.InArray
        self.value: list[JsonValue] = []


class _InObjectExpectingKeyState(_State):
    """State while parsing an object, expecting a key"""

    def __init__(self) -> None:
        self.type = _StateEnum.InObjectExpectingKey
        self.value: JsonObject = {}


class _InObjectExpectingValueState(_State):
    """State while parsing an object, expecting a value"""

    def __init__(self, key: str, obj: JsonObject) -> None:
        self.type = _StateEnum.InObjectExpectingValue
        self.value = (key, obj)


# Sentinel value to distinguish "not set" from "set to None/null"
class _Unset:
    pass


_UNSET = _Unset()


class _Parser:
    """
    Incremental JSON parser

    Feed chunks of JSON text via feed() and get back progressively
    more complete JSON values.
    """

    def __init__(self) -> None:
        self._state_stack: list[_State] = [_InitialState()]
        self._toplevel_value: JsonValue | _Unset = _UNSET
        self._input = _Input()
        self.tokenizer = Tokenizer(self._input, self)
        self._finished = False
        self._progressed = False
        self._prev_snapshot: JsonValue | _Unset = _UNSET

    def feed(self, chunk: str) -> list[JsonValue]:
        """
        Feed a chunk of JSON text and return deltas from the previous state.

        Each element in the returned list represents what changed since the
        last yielded value. For dicts, only changed/new keys are included,
        with string values containing only the newly appended characters.
        """
        if self._finished:
            return []

        self._input.feed(chunk)
        return self._collect_deltas()

    @staticmethod
    def _compute_delta(prev: JsonValue | None, current: JsonValue) -> JsonValue | None:
        if prev is None:
            return current

        if isinstance(current, dict) and isinstance(prev, dict):
            result: JsonObject = {}
            for key in current:
                cur_val = current[key]
                prev_val = prev.get(key)
                if key not in prev:
                    result[key] = cur_val
                elif isinstance(cur_val, str) and isinstance(prev_val, str):
                    if cur_val != prev_val:
                        result[key] = cur_val[len(prev_val) :]
                elif isinstance(cur_val, list) and isinstance(prev_val, list):
                    if cur_val != prev_val:
                        new_items = cur_val[len(prev_val) :]
                        # check if the last existing element was updated
                        if (
                            prev_val
                            and len(cur_val) >= len(prev_val)
                            and cur_val[len(prev_val) - 1] != prev_val[-1]
                        ):
                            result[key] = [cur_val[len(prev_val) - 1]] + new_items
                        elif new_items:
                            result[key] = new_items
                elif cur_val != prev_val:
                    result[key] = cur_val
            return result if result else None

        if isinstance(current, str) and isinstance(prev, str):
            delta = current[len(prev) :]
            return delta if delta else None

        if isinstance(current, list) and isinstance(prev, list):
            if current != prev:
                new_items = current[len(prev) :]
                if (
                    prev
                    and len(current) >= len(prev)
                    and current[len(prev) - 1] != prev[-1]
                ):
                    return [current[len(prev) - 1]] + new_items
                return new_items if new_items else None
            return None

        if current != prev:
            return current
        return None

    def finish(self) -> list[JsonValue]:
        """Signal that no more chunks will be fed. Validates trailing content.

        Returns any final deltas produced by flushing pending tokens (e.g.
        numbers, which have no terminator and wait for more input).
        """
        self._input.mark_complete()
        # Pump once more so the tokenizer can emit tokens that were waiting
        # for more input (e.g. numbers need buffer_complete to finalize).
        results = self._collect_deltas()
        self._input.expect_end_of_content()
        return results

    def _collect_deltas(self) -> list[JsonValue]:
        """Run one pump cycle and return any deltas produced."""
        results: list[JsonValue] = []
        while True:
            self._progressed = False
            self.tokenizer.pump()

            if self._progressed:
                if self._toplevel_value is _UNSET:
                    raise RuntimeError(
                        "Internal error: toplevel_value should not be unset after progressing"
                    )
                current = copy.deepcopy(cast(JsonValue, self._toplevel_value))
                if isinstance(self._prev_snapshot, _Unset):
                    results.append(current)
                else:
                    delta = self._compute_delta(self._prev_snapshot, current)
                    if delta is not None:
                        results.append(delta)
                self._prev_snapshot = current
            else:
                if not self._state_stack:
                    self._finished = True
                break
        return results

    # TokenHandler protocol implementation

    def handle_null(self) -> None:
        """Handle null token"""
        self._handle_value_token(JsonTokenType.Null, None)

    def handle_boolean(self, value: bool) -> None:
        """Handle boolean token"""
        self._handle_value_token(JsonTokenType.Boolean, value)

    def handle_number(self, value: float) -> None:
        """Handle number token"""
        self._handle_value_token(JsonTokenType.Number, value)

    def handle_string_start(self) -> None:
        """Handle string start token"""
        state = self._current_state()
        if not self._progressed and state.type != _StateEnum.InObjectExpectingKey:
            self._progressed = True

        if state.type == _StateEnum.Initial:
            self._state_stack.pop()
            self._toplevel_value = self._progress_value(JsonTokenType.StringStart, None)

        elif state.type == _StateEnum.InArray:
            v = self._progress_value(JsonTokenType.StringStart, None)
            arr = cast(list[JsonValue], state.value)
            arr.append(v)

        elif state.type == _StateEnum.InObjectExpectingKey:
            self._state_stack.append(_InStringState())

        elif state.type == _StateEnum.InObjectExpectingValue:
            key, obj = cast(tuple[str, JsonObject], state.value)
            sv = self._progress_value(JsonTokenType.StringStart, None)
            obj[key] = sv

        elif state.type == _StateEnum.InString:
            raise ValueError(
                f"Unexpected {json_token_type_to_string(JsonTokenType.StringStart)} token in the middle of string"
            )

    def handle_string_middle(self, value: str) -> None:
        """Handle string middle token"""
        state = self._current_state()

        if not self._progressed:
            if len(self._state_stack) >= 2:
                prev = self._state_stack[-2]
                if prev.type != _StateEnum.InObjectExpectingKey:
                    self._progressed = True
            else:
                self._progressed = True

        if state.type != _StateEnum.InString:
            raise ValueError(
                f"Unexpected {json_token_type_to_string(JsonTokenType.StringMiddle)} token when not in string"
            )

        assert isinstance(state.value, str)
        state.value += value

        parent_state = self._state_stack[-2] if len(self._state_stack) >= 2 else None
        self._update_string_parent(state.value, parent_state)

    def handle_string_end(self) -> None:
        """Handle string end token"""
        state = self._current_state()

        if state.type != _StateEnum.InString:
            raise ValueError(
                f"Unexpected {json_token_type_to_string(JsonTokenType.StringEnd)} token when not in string"
            )

        self._state_stack.pop()
        parent_state = self._state_stack[-1] if self._state_stack else None
        assert isinstance(state.value, str)
        self._update_string_parent(state.value, parent_state)

    def handle_array_start(self) -> None:
        """Handle array start token"""
        self._handle_value_token(JsonTokenType.ArrayStart, None)

    def handle_array_end(self) -> None:
        """Handle array end token"""
        state = self._current_state()
        if state.type != _StateEnum.InArray:
            raise ValueError(
                f"Unexpected {json_token_type_to_string(JsonTokenType.ArrayEnd)} token"
            )
        self._state_stack.pop()

    def handle_object_start(self) -> None:
        """Handle object start token"""
        self._handle_value_token(JsonTokenType.ObjectStart, None)

    def handle_object_end(self) -> None:
        """Handle object end token"""
        state = self._current_state()

        if state.type in (
            _StateEnum.InObjectExpectingKey,
            _StateEnum.InObjectExpectingValue,
        ):
            self._state_stack.pop()
        else:
            raise ValueError(
                f"Unexpected {json_token_type_to_string(JsonTokenType.ObjectEnd)} token"
            )

    # Private helper methods

    def _current_state(self) -> _State:
        """Get current parser state"""
        if not self._state_stack:
            raise ValueError("Unexpected trailing input")
        return self._state_stack[-1]

    def _handle_value_token(self, token_type: JsonTokenType, value: JsonValue) -> None:
        """Handle a complete value token"""
        state = self._current_state()

        if not self._progressed:
            self._progressed = True

        if state.type == _StateEnum.Initial:
            self._state_stack.pop()
            self._toplevel_value = self._progress_value(token_type, value)

        elif state.type == _StateEnum.InArray:
            v = self._progress_value(token_type, value)
            arr = cast(list[JsonValue], state.value)
            arr.append(v)

        elif state.type == _StateEnum.InObjectExpectingValue:
            key, obj = cast(tuple[str, JsonObject], state.value)
            if token_type != JsonTokenType.StringStart:
                self._state_stack.pop()
                new_state = _InObjectExpectingKeyState()
                new_state.value = obj
                self._state_stack.append(new_state)

            v = self._progress_value(token_type, value)
            obj[key] = v

        elif state.type == _StateEnum.InString:
            raise ValueError(
                f"Unexpected {json_token_type_to_string(token_type)} token in the middle of string"
            )

        elif state.type == _StateEnum.InObjectExpectingKey:
            raise ValueError(
                f"Unexpected {json_token_type_to_string(token_type)} token in the middle of object expecting key"
            )

    def _update_string_parent(self, updated: str, parent_state: _State | None) -> None:
        """Update parent container with updated string value"""
        if parent_state is None:
            self._toplevel_value = updated

        elif parent_state.type == _StateEnum.InArray:
            arr = cast(list[JsonValue], parent_state.value)
            arr[-1] = updated

        elif parent_state.type == _StateEnum.InObjectExpectingValue:
            key, obj = cast(tuple[str, JsonObject], parent_state.value)
            obj[key] = updated
            if self._state_stack and self._state_stack[-1] == parent_state:
                self._state_stack.pop()
                new_state = _InObjectExpectingKeyState()
                new_state.value = obj
                self._state_stack.append(new_state)

        elif parent_state.type == _StateEnum.InObjectExpectingKey:
            if self._state_stack and self._state_stack[-1] == parent_state:
                self._state_stack.pop()
                obj = cast(JsonObject, parent_state.value)
                self._state_stack.append(_InObjectExpectingValueState(updated, obj))

    def _progress_value(self, token_type: JsonTokenType, value: JsonValue) -> JsonValue:
        """Create initial value for a token and push appropriate state"""
        if token_type == JsonTokenType.Null:
            return None

        elif token_type == JsonTokenType.Boolean:
            return value

        elif token_type == JsonTokenType.Number:
            return value

        elif token_type == JsonTokenType.StringStart:
            string_state = _InStringState()
            self._state_stack.append(string_state)
            return ""

        elif token_type == JsonTokenType.ArrayStart:
            array_state = _InArrayState()
            self._state_stack.append(array_state)
            return array_state.value

        elif token_type == JsonTokenType.ObjectStart:
            object_state = _InObjectExpectingKeyState()
            self._state_stack.append(object_state)
            return object_state.value

        else:
            raise ValueError(
                f"Unexpected token type: {json_token_type_to_string(token_type)}"
            )


================================================
FILE: backend/onyx/utils/jsonriver/tokenize.py
================================================
"""
JSON tokenizer for streaming incremental parsing

Copyright (c) 2023 Google LLC (original TypeScript implementation)
Copyright (c) 2024 jsonriver-python contributors (Python port)
SPDX-License-Identifier: BSD-3-Clause
"""

from __future__ import annotations

import re
from enum import IntEnum
from typing import Protocol


class TokenHandler(Protocol):
    """Protocol for handling JSON tokens"""

    def handle_null(self) -> None: ...
    def handle_boolean(self, value: bool) -> None: ...
    def handle_number(self, value: float) -> None: ...
    def handle_string_start(self) -> None: ...
    def handle_string_middle(self, value: str) -> None: ...
    def handle_string_end(self) -> None: ...
    def handle_array_start(self) -> None: ...
    def handle_array_end(self) -> None: ...
    def handle_object_start(self) -> None: ...
    def handle_object_end(self) -> None: ...


class JsonTokenType(IntEnum):
    """Types of JSON tokens"""

    Null = 0
    Boolean = 1
    Number = 2
    StringStart = 3
    StringMiddle = 4
    StringEnd = 5
    ArrayStart = 6
    ArrayEnd = 7
    ObjectStart = 8
    ObjectEnd = 9


def json_token_type_to_string(token_type: JsonTokenType) -> str:
    """Convert token type to readable string"""
    names = {
        JsonTokenType.Null: "null",
        JsonTokenType.Boolean: "boolean",
        JsonTokenType.Number: "number",
        JsonTokenType.StringStart: "string start",
        JsonTokenType.StringMiddle: "string middle",
        JsonTokenType.StringEnd: "string end",
        JsonTokenType.ArrayStart: "array start",
        JsonTokenType.ArrayEnd: "array end",
        JsonTokenType.ObjectStart: "object start",
        JsonTokenType.ObjectEnd: "object end",
    }
    return names[token_type]


class _State(IntEnum):
    """Internal tokenizer states"""

    ExpectingValue = 0
    InString = 1
    StartArray = 2
    AfterArrayValue = 3
    StartObject = 4
    AfterObjectKey = 5
    AfterObjectValue = 6
    BeforeObjectKey = 7


# Regex for validating JSON numbers
_JSON_NUMBER_PATTERN = re.compile(r"^-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?$")


def _parse_json_number(s: str) -> float:
    """Parse a JSON number string, validating format"""
    if not _JSON_NUMBER_PATTERN.match(s):
        raise ValueError("Invalid number")
    return float(s)


class _Input:
    """
    Input buffer for chunk-based JSON parsing

    Manages buffering of input chunks and provides methods for
    consuming and inspecting the buffer.
    """

    def __init__(self) -> None:
        self._buffer = ""
        self._start_index = 0
        self.buffer_complete = False

    def feed(self, chunk: str) -> None:
        """Add a chunk of data to the buffer"""
        self._buffer += chunk

    def mark_complete(self) -> None:
        """Signal that no more chunks will be fed"""
        self.buffer_complete = True

    @property
    def length(self) -> int:
        """Number of characters remaining in buffer"""
        return len(self._buffer) - self._start_index

    def advance(self, length: int) -> None:
        """Advance the start position by length characters"""
        self._start_index += length

    def peek(self, offset: int) -> str | None:
        """Peek at character at offset, or None if not available"""
        idx = self._start_index + offset
        if idx < len(self._buffer):
            return self._buffer[idx]
        return None

    def peek_char_code(self, offset: int) -> int:
        """Get character code at offset"""
        return ord(self._buffer[self._start_index + offset])

    def slice(self, start: int, end: int) -> str:
        """Slice buffer from start to end (relative to current position)"""
        return self._buffer[self._start_index + start : self._start_index + end]

    def commit(self) -> None:
        """Commit consumed content, removing it from buffer"""
        if self._start_index > 0:
            self._buffer = self._buffer[self._start_index :]
            self._start_index = 0

    def remaining(self) -> str:
        """Get all remaining content in buffer"""
        return self._buffer[self._start_index :]

    def expect_end_of_content(self) -> None:
        """Verify no non-whitespace content remains"""
        self.commit()
        self.skip_past_whitespace()
        if self.length != 0:
            raise ValueError(f"Unexpected trailing content {self.remaining()!r}")

    def skip_past_whitespace(self) -> None:
        """Skip whitespace characters"""
        i = self._start_index
        while i < len(self._buffer):
            c = ord(self._buffer[i])
            if c in (32, 9, 10, 13):  # space, tab, \n, \r
                i += 1
            else:
                break
        self._start_index = i

    def try_to_take_prefix(self, prefix: str) -> bool:
        """Try to consume prefix from buffer, return True if successful"""
        if self._buffer.startswith(prefix, self._start_index):
            self._start_index += len(prefix)
            return True
        return False

    def try_to_take(self, length: int) -> str | None:
        """Try to take length characters, or None if not enough available"""
        if self.length < length:
            return None
        result = self._buffer[self._start_index : self._start_index + length]
        self._start_index += length
        return result

    def try_to_take_char_code(self) -> int | None:
        """Try to take a single character as char code, or None if buffer empty"""
        if self.length == 0:
            return None
        code = ord(self._buffer[self._start_index])
        self._start_index += 1
        return code

    def take_until_quote_or_backslash(self) -> tuple[str, bool]:
        """
        Consume input up to first quote or backslash

        Returns tuple of (consumed_content, pattern_found)
        """
        buf = self._buffer
        i = self._start_index
        while i < len(buf):
            c = ord(buf[i])
            if c <= 0x1F:
                raise ValueError("Unescaped control character in string")
            if c == 34 or c == 92:  # " or \
                result = buf[self._start_index : i]
                self._start_index = i
                return (result, True)
            i += 1

        result = buf[self._start_index :]
        self._start_index = len(buf)
        return (result, False)


class Tokenizer:
    """
    Tokenizer for chunk-based JSON parsing

    Processes chunks fed into its input buffer and calls handler methods
    as JSON tokens are recognized.
    """

    def __init__(self, input: _Input, handler: TokenHandler) -> None:
        self.input = input
        self._handler = handler
        self._stack: list[_State] = [_State.ExpectingValue]
        self._emitted_tokens = 0

    def is_done(self) -> bool:
        """Check if tokenization is complete"""
        return len(self._stack) == 0 and self.input.length == 0

    def pump(self) -> None:
        """Process all available tokens in the buffer"""
        while True:
            before = self._emitted_tokens
            self._tokenize_more()
            if self._emitted_tokens == before:
                self.input.commit()
                return

    def _tokenize_more(self) -> None:
        """Process one step of tokenization based on current state"""
        if not self._stack:
            return

        state = self._stack[-1]

        if state == _State.ExpectingValue:
            self._tokenize_value()
        elif state == _State.InString:
            self._tokenize_string()
        elif state == _State.StartArray:
            self._tokenize_array_start()
        elif state == _State.AfterArrayValue:
            self._tokenize_after_array_value()
        elif state == _State.StartObject:
            self._tokenize_object_start()
        elif state == _State.AfterObjectKey:
            self._tokenize_after_object_key()
        elif state == _State.AfterObjectValue:
            self._tokenize_after_object_value()
        elif state == _State.BeforeObjectKey:
            self._tokenize_before_object_key()

    def _tokenize_value(self) -> None:
        """Tokenize a JSON value"""
        self.input.skip_past_whitespace()

        if self.input.try_to_take_prefix("null"):
            self._handler.handle_null()
            self._emitted_tokens += 1
            self._stack.pop()
            return

        if self.input.try_to_take_prefix("true"):
            self._handler.handle_boolean(True)
            self._emitted_tokens += 1
            self._stack.pop()
            return

        if self.input.try_to_take_prefix("false"):
            self._handler.handle_boolean(False)
            self._emitted_tokens += 1
            self._stack.pop()
            return

        if self.input.length > 0:
            ch = self.input.peek_char_code(0)
            if (48 <= ch <= 57) or ch == 45:  # 0-9 or -
                # Scan for end of number
                i = 0
                while i < self.input.length:
                    c = self.input.peek_char_code(i)
                    if (48 <= c <= 57) or c in (45, 43, 46, 101, 69):  # 0-9 - + . e E
                        i += 1
                    else:
                        break

                if i == self.input.length and not self.input.buffer_complete:
                    # Need more input (numbers have no terminator)
                    return

                number_chars = self.input.slice(0, i)
                self.input.advance(i)
                number = _parse_json_number(number_chars)
                self._handler.handle_number(number)
                self._emitted_tokens += 1
                self._stack.pop()
                return

        if self.input.try_to_take_prefix('"'):
            self._stack.pop()
            self._stack.append(_State.InString)
            self._handler.handle_string_start()
            self._emitted_tokens += 1
            self._tokenize_string()
            return

        if self.input.try_to_take_prefix("["):
            self._stack.pop()
            self._stack.append(_State.StartArray)
            self._handler.handle_array_start()
            self._emitted_tokens += 1
            self._tokenize_array_start()
            return

        if self.input.try_to_take_prefix("{"):
            self._stack.pop()
            self._stack.append(_State.StartObject)
            self._handler.handle_object_start()
            self._emitted_tokens += 1
            self._tokenize_object_start()
            return

    def _tokenize_string(self) -> None:
        """Tokenize string content"""
        while True:
            chunk, interrupted = self.input.take_until_quote_or_backslash()
            if chunk:
                self._handler.handle_string_middle(chunk)
                self._emitted_tokens += 1
            elif not interrupted:
                return

            if interrupted:
                if self.input.length == 0:
                    return

                next_char = self.input.peek(0)
                if next_char == '"':
                    self.input.advance(1)
                    self._handler.handle_string_end()
                    self._emitted_tokens += 1
                    self._stack.pop()
                    return

                # Handle escape sequences
                next_char2 = self.input.peek(1)
                if next_char2 is None:
                    return

                value: str
                if next_char2 == "u":
                    # Unicode escape: need 4 hex digits
                    if self.input.length < 6:
                        return

                    code = 0
                    for j in range(2, 6):
                        c = self.input.peek_char_code(j)
                        if 48 <= c <= 57:  # 0-9
                            digit = c - 48
                        elif 65 <= c <= 70:  # A-F
                            digit = c - 55
                        elif 97 <= c <= 102:  # a-f
                            digit = c - 87
                        else:
                            raise ValueError("Bad Unicode escape in JSON")
                        code = (code << 4) | digit

                    self.input.advance(6)
                    self._handler.handle_string_middle(chr(code))
                    self._emitted_tokens += 1
                    continue

                elif next_char2 == "n":
                    value = "\n"
                elif next_char2 == "r":
                    value = "\r"
                elif next_char2 == "t":
                    value = "\t"
                elif next_char2 == "b":
                    value = "\b"
                elif next_char2 == "f":
                    value = "\f"
                elif next_char2 == "\\":
                    value = "\\"
                elif next_char2 == "/":
                    value = "/"
                elif next_char2 == '"':
                    value = '"'
                else:
                    raise ValueError("Bad escape in string")

                self.input.advance(2)
                self._handler.handle_string_middle(value)
                self._emitted_tokens += 1

    def _tokenize_array_start(self) -> None:
        """Tokenize start of array (check for empty or first element)"""
        self.input.skip_past_whitespace()
        if self.input.length == 0:
            return

        if self.input.try_to_take_prefix("]"):
            self._handler.handle_array_end()
            self._emitted_tokens += 1
            self._stack.pop()
            return

        self._stack.pop()
        self._stack.append(_State.AfterArrayValue)
        self._stack.append(_State.ExpectingValue)
        self._tokenize_value()

    def _tokenize_after_array_value(self) -> None:
        """Tokenize after an array value (expect , or ])"""
        self.input.skip_past_whitespace()
        next_char = self.input.try_to_take_char_code()

        if next_char is None:
            return
        elif next_char == 0x5D:  # ]
            self._handler.handle_array_end()
            self._emitted_tokens += 1
            self._stack.pop()
            return
        elif next_char == 0x2C:  # ,
            self._stack.append(_State.ExpectingValue)
            self._tokenize_value()
            return
        else:
            raise ValueError(f"Expected , or ], got {chr(next_char)!r}")

    def _tokenize_object_start(self) -> None:
        """Tokenize start of object (check for empty or first key)"""
        self.input.skip_past_whitespace()
        next_char = self.input.try_to_take_char_code()

        if next_char is None:
            return
        elif next_char == 0x7D:  # }
            self._handler.handle_object_end()
            self._emitted_tokens += 1
            self._stack.pop()
            return
        elif next_char == 0x22:  # "
            self._stack.pop()
            self._stack.append(_State.AfterObjectKey)
            self._stack.append(_State.InString)
            self._handler.handle_string_start()
            self._emitted_tokens += 1
            self._tokenize_string()
            return
        else:
            raise ValueError(f"Expected start of object key, got {chr(next_char)!r}")

    def _tokenize_after_object_key(self) -> None:
        """Tokenize after object key (expect :)"""
        self.input.skip_past_whitespace()
        next_char = self.input.try_to_take_char_code()

        if next_char is None:
            return
        elif next_char == 0x3A:  # :
            self._stack.pop()
            self._stack.append(_State.AfterObjectValue)
            self._stack.append(_State.ExpectingValue)
            self._tokenize_value()
            return
        else:
            raise ValueError(f"Expected colon after object key, got {chr(next_char)!r}")

    def _tokenize_after_object_value(self) -> None:
        """Tokenize after object value (expect , or })"""
        self.input.skip_past_whitespace()
        next_char = self.input.try_to_take_char_code()

        if next_char is None:
            return
        elif next_char == 0x7D:  # }
            self._handler.handle_object_end()
            self._emitted_tokens += 1
            self._stack.pop()
            return
        elif next_char == 0x2C:  # ,
            self._stack.pop()
            self._stack.append(_State.BeforeObjectKey)
            self._tokenize_before_object_key()
            return
        else:
            raise ValueError(
                f"Expected , or }} after object value, got {chr(next_char)!r}"
            )

    def _tokenize_before_object_key(self) -> None:
        """Tokenize before object key (after comma)"""
        self.input.skip_past_whitespace()
        next_char = self.input.try_to_take_char_code()

        if next_char is None:
            return
        elif next_char == 0x22:  # "
            self._stack.pop()
            self._stack.append(_State.AfterObjectKey)
            self._stack.append(_State.InString)
            self._handler.handle_string_start()
            self._emitted_tokens += 1
            self._tokenize_string()
            return
        else:
            raise ValueError(f"Expected start of object key, got {chr(next_char)!r}")


================================================
FILE: backend/onyx/utils/logger.py
================================================
import contextvars
import logging
import os
from collections.abc import MutableMapping
from logging.handlers import RotatingFileHandler
from typing import Any

from onyx.utils.tenant import get_tenant_id_short_string
from shared_configs.configs import DEV_LOGGING_ENABLED
from shared_configs.configs import LOG_FILE_NAME
from shared_configs.configs import LOG_LEVEL
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import SLACK_CHANNEL_ID
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR
from shared_configs.contextvars import ONYX_REQUEST_ID_CONTEXTVAR


logging.addLevelName(logging.INFO + 5, "NOTICE")

pruning_ctx: contextvars.ContextVar[dict[str, Any]] = contextvars.ContextVar(
    "pruning_ctx", default=dict()
)

doc_permission_sync_ctx: contextvars.ContextVar[dict[str, Any]] = (
    contextvars.ContextVar("doc_permission_sync_ctx", default=dict())
)


class LoggerContextVars:
    @staticmethod
    def reset() -> None:
        pruning_ctx.set(dict())
        doc_permission_sync_ctx.set(dict())


def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int:
    log_level_dict = {
        "CRITICAL": logging.CRITICAL,
        "ERROR": logging.ERROR,
        "WARNING": logging.WARNING,
        "NOTICE": logging.getLevelName("NOTICE"),
        "INFO": logging.INFO,
        "DEBUG": logging.DEBUG,
        "NOTSET": logging.NOTSET,
    }

    return log_level_dict.get(log_level_str.upper(), logging.INFO)


class OnyxRequestIDFilter(logging.Filter):
    def filter(self, record: logging.LogRecord) -> bool:
        from shared_configs.contextvars import ONYX_REQUEST_ID_CONTEXTVAR

        record.request_id = ONYX_REQUEST_ID_CONTEXTVAR.get() or "-"
        return True


class OnyxLoggingAdapter(logging.LoggerAdapter):
    def process(
        self, msg: str, kwargs: MutableMapping[str, Any]
    ) -> tuple[str, MutableMapping[str, Any]]:
        # If this is an indexing job, add the attempt ID to the log message
        # This helps filter the logs for this specific indexing
        while True:
            pruning_ctx_dict = pruning_ctx.get()
            if len(pruning_ctx_dict) > 0:
                if "request_id" in pruning_ctx_dict:
                    msg = f"[Prune: {pruning_ctx_dict['request_id']}] {msg}"

                if "cc_pair_id" in pruning_ctx_dict:
                    msg = f"[CC Pair: {pruning_ctx_dict['cc_pair_id']}] {msg}"
                break

            doc_permission_sync_ctx_dict = doc_permission_sync_ctx.get()
            if len(doc_permission_sync_ctx_dict) > 0:
                if "request_id" in doc_permission_sync_ctx_dict:
                    msg = f"[Doc Permissions Sync: {doc_permission_sync_ctx_dict['request_id']}] {msg}"
                break

            index_attempt_info = INDEX_ATTEMPT_INFO_CONTEXTVAR.get()
            if index_attempt_info:
                cc_pair_id, index_attempt_id = index_attempt_info
                msg = (
                    f"[Index Attempt: {index_attempt_id}] [CC Pair: {cc_pair_id}] {msg}"
                )

            break

        # Add tenant information if it differs from default
        # This will always be the case for authenticated API requests
        if MULTI_TENANT:
            tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
            if tenant_id != POSTGRES_DEFAULT_SCHEMA and tenant_id is not None:
                # Get a short string representation of the tenant id for cleaner
                # logs.
                short_tenant = get_tenant_id_short_string(tenant_id)
                msg = f"[t:{short_tenant}] {msg}"

        # request id within a fastapi route
        fastapi_request_id = ONYX_REQUEST_ID_CONTEXTVAR.get()
        if fastapi_request_id:
            msg = f"[{fastapi_request_id}] {msg}"

        # For Slack Bot, logs the channel relevant to the request
        channel_id = self.extra.get(SLACK_CHANNEL_ID) if self.extra else None
        if channel_id:
            msg = f"[Channel ID: {channel_id}] {msg}"

        return msg, kwargs

    def notice(self, msg: Any, *args: Any, **kwargs: Any) -> None:
        # Stacklevel is set to 2 to point to the actual caller of notice instead of here
        self.log(
            logging.getLevelName("NOTICE"), str(msg), *args, **kwargs, stacklevel=2
        )


class PlainFormatter(logging.Formatter):
    """Adds log levels."""

    def format(self, record: logging.LogRecord) -> str:
        levelname = record.levelname
        level_display = f"{levelname}:"
        formatted_message = super().format(record)
        return f"{level_display.ljust(9)} {formatted_message}"


class ColoredFormatter(logging.Formatter):
    """Custom formatter to add colors to log levels."""

    COLORS = {
        "CRITICAL": "\033[91m",  # Red
        "ERROR": "\033[91m",  # Red
        "WARNING": "\033[93m",  # Yellow
        "NOTICE": "\033[94m",  # Blue
        "INFO": "\033[92m",  # Green
        "DEBUG": "\033[96m",  # Light Green
        "NOTSET": "\033[91m",  # Reset
    }

    def format(self, record: logging.LogRecord) -> str:
        levelname = record.levelname
        if levelname in self.COLORS:
            prefix = self.COLORS[levelname]
            suffix = "\033[0m"
            formatted_message = super().format(record)
            # Ensure the levelname with colon is 9 characters long
            # accounts for the extra characters for coloring
            level_display = f"{prefix}{levelname}{suffix}:"
            return f"{level_display.ljust(18)} {formatted_message}"
        return super().format(record)


def get_uvicorn_standard_formatter() -> ColoredFormatter:
    """Returns a standard colored logging formatter."""
    return ColoredFormatter(
        "%(asctime)s %(filename)30s %(lineno)4s: [%(request_id)s] %(message)s",
        datefmt="%m/%d/%Y %I:%M:%S %p",
    )


def get_standard_formatter() -> ColoredFormatter:
    """Returns a standard colored logging formatter."""
    return ColoredFormatter(
        "%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
        datefmt="%m/%d/%Y %I:%M:%S %p",
    )


DANSWER_DOCKER_ENV_STR = "DANSWER_RUNNING_IN_DOCKER"


def is_running_in_container() -> bool:
    return os.getenv(DANSWER_DOCKER_ENV_STR) == "true"


def setup_logger(
    name: str = __name__,
    log_level: int = get_log_level_from_str(),
    extra: MutableMapping[str, Any] | None = None,
    propagate: bool = True,
) -> OnyxLoggingAdapter:
    logger = logging.getLogger(name)

    # If the logger already has handlers, assume it was already configured and return it.
    if logger.handlers:
        return OnyxLoggingAdapter(logger, extra=extra)

    logger.setLevel(log_level)

    formatter = get_standard_formatter()

    handler = logging.StreamHandler()
    handler.setLevel(log_level)
    handler.setFormatter(formatter)

    logger.addHandler(handler)

    is_containerized = is_running_in_container()
    if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED):
        log_levels = ["debug", "info", "notice"]
        for level in log_levels:
            file_name = (
                f"/var/log/onyx/{LOG_FILE_NAME}_{level}.log"
                if is_containerized
                else f"./log/{LOG_FILE_NAME}_{level}.log"
            )
            # Ensure the log directory exists
            log_dir = os.path.dirname(file_name)
            if not os.path.exists(log_dir):
                os.makedirs(log_dir, exist_ok=True)

            # Truncate log file if DEV_LOGGING_ENABLED (for clean dev experience)
            if DEV_LOGGING_ENABLED and os.path.exists(file_name):
                try:
                    open(file_name, "w").close()  # Truncate the file
                except Exception:
                    pass  # Ignore errors, just proceed with normal logging

            file_handler = RotatingFileHandler(
                file_name,
                maxBytes=25 * 1024 * 1024,  # 25 MB
                backupCount=5,  # Keep 5 backup files
            )
            file_handler.setLevel(get_log_level_from_str(level))
            file_handler.setFormatter(formatter)
            logger.addHandler(file_handler)

    logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs)  # type: ignore

    # After handler configuration, disable propagation to avoid duplicate logs
    # Prevent messages from propagating to the root logger which can cause
    # duplicate log entries when the root logger is also configured with its
    # own handler (e.g. by Uvicorn / Celery).
    logger.propagate = propagate

    return OnyxLoggingAdapter(logger, extra=extra)


def setup_uvicorn_logger(
    log_level: int = get_log_level_from_str(),
    shared_file_handlers: list[logging.FileHandler] | None = None,
) -> None:
    uvicorn_logger = logging.getLogger("uvicorn.access")
    if not uvicorn_logger:
        return

    formatter = get_uvicorn_standard_formatter()

    handler = logging.StreamHandler()
    handler.setLevel(log_level)
    handler.setFormatter(formatter)

    uvicorn_logger.handlers = []
    uvicorn_logger.addHandler(handler)
    uvicorn_logger.setLevel(log_level)
    uvicorn_logger.addFilter(OnyxRequestIDFilter())

    if shared_file_handlers:
        for fh in shared_file_handlers:
            uvicorn_logger.addHandler(fh)

    return


def print_loggers() -> None:
    """Print information about all loggers. Use to debug logging issues."""
    root_logger = logging.getLogger()
    loggers: list[logging.Logger | logging.PlaceHolder] = [root_logger]
    loggers.extend(logging.Logger.manager.loggerDict.values())

    for logger in loggers:
        if isinstance(logger, logging.PlaceHolder):
            # Skip placeholders that aren't actual loggers
            continue

        print(f"Logger: '{logger.name}' (Level: {logging.getLevelName(logger.level)})")
        if logger.handlers:
            for handler in logger.handlers:
                print(f"  Handler: {handler}")
        else:
            print("  No handlers")

        print(f"  Propagate: {logger.propagate}")
        print()


def format_error_for_logging(e: Exception) -> str:
    """Clean error message by removing newlines for better logging."""
    return str(e).replace("\n", " ")


================================================
FILE: backend/onyx/utils/long_term_log.py
================================================
import json
import os
import threading
from datetime import datetime
from pathlib import Path
from typing import Any

from onyx.utils.logger import setup_logger
from onyx.utils.special_types import JSON_ro

logger = setup_logger()

_LOG_FILE_NAME_TIMESTAMP_FORMAT = "%Y-%m-%d_%H-%M-%S-%f"


# NOTE: This is no longer used but keeping it around in case it's reintroduced
class LongTermLogger:
    """NOTE: should support a LOT of data AND should be extremely fast,
    ideally done in a background thread."""

    def __init__(
        self,
        metadata: dict[str, str] | None = None,
        log_file_path: str = "/tmp/long_term_log",
        max_files_per_category: int = 1000,
    ):
        self.metadata = metadata
        self.log_file_path = Path(log_file_path)
        self.max_files_per_category = max_files_per_category
        try:
            # Create directory if it doesn't exist
            os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
        except Exception:
            # logger.error(f"Error creating directory for long-term logs: {e}")
            pass

    def _cleanup_old_files(self, category_path: Path) -> None:
        try:
            files = sorted(
                [f for f in category_path.glob("*.json")],
                key=lambda x: x.stat().st_mtime,  # Sort by modification time
                reverse=True,
            )

            # Delete oldest files that exceed the limit
            for file in files[self.max_files_per_category :]:
                if not file.is_file():
                    logger.debug(f"File already deleted: {file}")
                    continue
                try:
                    file.unlink()
                except Exception:
                    pass
                    # logger.error(f"Error deleting old log file {file
                    # }: {e}")
        except Exception:
            pass
            # logger.error(f"Error during log rotation cleanup: {e}")

    def _record(self, message: Any, category: str) -> None:
        category_path = self.log_file_path / category
        try:
            # Create directory if it doesn't exist
            os.makedirs(category_path, exist_ok=True)

            # Perform cleanup before writing new file
            self._cleanup_old_files(category_path)

            final_record = {
                "metadata": self.metadata,
                "record": message,
            }

            file_path = (
                category_path
                / f"{datetime.now().strftime(_LOG_FILE_NAME_TIMESTAMP_FORMAT)}.json"
            )
            with open(file_path, "w+") as f:
                # default allows us to "ignore" unserializable objects
                json.dump(final_record, f, default=lambda x: str(x))
        except Exception:
            # logger.error(f"Error recording log: {e}")
            pass

    def record(self, message: JSON_ro, category: str = "default") -> None:
        try:
            # Run in separate thread to have minimal overhead in main flows
            thread = threading.Thread(
                target=self._record, args=(message, category), daemon=True
            )
            thread.start()
        except Exception:
            # Should never interfere with normal functions of Onyx
            pass

    def fetch_category(
        self,
        category: str,
        start_time: datetime | None = None,
        end_time: datetime | None = None,
        limit: int = 100,  # noqa: ARG002
    ) -> list[JSON_ro]:
        category_path = self.log_file_path / category
        files = list(category_path.glob("*.json"))

        results: list[JSON_ro] = []
        for file in files:
            # Parse timestamp from filename (YYYY-MM-DD_HH-MM-SS.json)
            try:
                file_time = datetime.strptime(
                    file.stem, _LOG_FILE_NAME_TIMESTAMP_FORMAT
                )

                # Skip if outside time range
                if start_time and file_time < start_time:
                    continue
                if end_time and file_time > end_time:
                    continue

                results.append(json.loads(file.read_text()))
            except ValueError:
                # Skip files that don't match expected format
                continue

        return results


================================================
FILE: backend/onyx/utils/memory_logger.py
================================================
# # leaving this here for future mem debugging efforts
# import os
# from typing import Any

# import psutil
# from pympler import asizeof

# from onyx.utils.logger import setup_logger

# logger = setup_logger()

#
# def log_memory_usage(
#     label: str,
#     specific_object: Any = None,
#     object_label: str = "",
# ) -> None:
#     """Log current process memory usage and optionally the size of a specific object.

#     Args:
#         label: A descriptive label for the current location/operation in code
#         specific_object: Optional object to measure the size of
#         object_label: Optional label describing the specific object
#     """
#     try:
#         # Get current process memory info
#         process = psutil.Process(os.getpid())
#         memory_info = process.memory_info()

#         # Convert to MB for readability
#         rss_mb = memory_info.rss / (1024 * 1024)
#         vms_mb = memory_info.vms / (1024 * 1024)

#         log_parts = [f"MEMORY[{label}]", f"RSS: {rss_mb:.2f}MB", f"VMS: {vms_mb:.2f}MB"]

#         # Add object size if provided
#         if specific_object is not None:
#             try:
#                 # recursively calculate the size of the object
#                 obj_size = asizeof.asizeof(specific_object)
#                 obj_size_mb = obj_size / (1024 * 1024)
#                 obj_desc = f"[{object_label}]" if object_label else "[object]"
#                 log_parts.append(f"OBJ{obj_desc}: {obj_size_mb:.2f}MB")
#             except Exception as e:
#                 log_parts.append(f"OBJ_SIZE_ERROR: {str(e)}")

#         logger.info(" | ".join(log_parts))

#     except Exception as e:
#         logger.warning(f"Failed to log memory usage for {label}: {str(e)}")

# For example, use this like:
# log_memory_usage("my_operation", my_large_object, "my_large_object")


================================================
FILE: backend/onyx/utils/middleware.py
================================================
import base64
import hashlib
import logging
import re
import uuid
from collections.abc import Awaitable
from collections.abc import Callable
from datetime import datetime
from datetime import timezone

from fastapi import FastAPI
from fastapi import Request
from fastapi import Response
from fastapi.routing import APIRoute

from shared_configs.contextvars import CURRENT_ENDPOINT_CONTEXTVAR
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import ONYX_REQUEST_ID_CONTEXTVAR


def add_onyx_tenant_id_middleware(
    app: FastAPI,
    logger: logging.LoggerAdapter,  # noqa: ARG001
) -> None:
    @app.middleware("http")
    async def set_tenant_id(
        request: Request, call_next: Callable[[Request], Awaitable[Response]]
    ) -> Response:
        """Captures and sets the context var for the tenant."""

        onyx_tenant_id = request.headers.get("X-Onyx-Tenant-ID")
        if onyx_tenant_id:
            CURRENT_TENANT_ID_CONTEXTVAR.set(onyx_tenant_id)
        return await call_next(request)


def add_onyx_request_id_middleware(
    app: FastAPI,
    prefix: str,
    logger: logging.LoggerAdapter,  # noqa: ARG001
) -> None:
    @app.middleware("http")
    async def set_request_id(
        request: Request, call_next: Callable[[Request], Awaitable[Response]]
    ) -> Response:
        """Generate a request hash that can be used to track the lifecycle
        of a request.  The hash is prefixed to help indicated where the request id
        originated.

        Format is f"{PREFIX}:{ID}" where PREFIX is 3 chars and ID is 8 chars.
        Total length is 12 chars.
        """

        onyx_request_id = request.headers.get("X-Onyx-Request-ID")
        if not onyx_request_id:
            onyx_request_id = make_randomized_onyx_request_id(prefix)

        ONYX_REQUEST_ID_CONTEXTVAR.set(onyx_request_id)
        return await call_next(request)


def make_randomized_onyx_request_id(prefix: str) -> str:
    """generates a randomized request id"""

    hash_input = str(uuid.uuid4())
    return _make_onyx_request_id(prefix, hash_input)


def make_structured_onyx_request_id(prefix: str, request_url: str) -> str:
    """Not used yet, but could be in the future!"""
    hash_input = f"{request_url}:{datetime.now(timezone.utc)}"
    return _make_onyx_request_id(prefix, hash_input)


def _make_onyx_request_id(prefix: str, hash_input: str) -> str:
    """helper function to return an id given a string input"""
    hash_obj = hashlib.md5(hash_input.encode("utf-8"), usedforsecurity=False)
    hash_bytes = hash_obj.digest()[:6]  # Truncate to 6 bytes

    # 6 bytes becomes 8 bytes. we shouldn't need to strip but just in case
    # NOTE: possible we'll want more input bytes if id's aren't unique enough
    hash_str = base64.urlsafe_b64encode(hash_bytes).decode("utf-8").rstrip("=")
    onyx_request_id = f"{prefix}:{hash_str}"
    return onyx_request_id


def _build_route_map(app: FastAPI) -> list[tuple[re.Pattern[str], str]]:
    """Build a list of (compiled regex, route template) from the app's routes.

    Used by endpoint context middleware to resolve request paths to route
    templates, avoiding high-cardinality raw paths in metrics labels.
    """
    route_map: list[tuple[re.Pattern[str], str]] = []
    for route in app.routes:
        if isinstance(route, APIRoute):
            route_map.append((route.path_regex, route.path))
    return route_map


def _match_route(route_map: list[tuple[re.Pattern[str], str]], path: str) -> str | None:
    """Match a request path against the route map and return the template."""
    for pattern, template in route_map:
        if pattern.match(path):
            return template
    return None


def add_endpoint_context_middleware(app: FastAPI) -> None:
    """Set CURRENT_ENDPOINT_CONTEXTVAR so Prometheus pool metrics can
    attribute DB connections to the endpoint that checked them out.

    Used by ``onyx_db_connections_held_by_endpoint`` and
    ``onyx_db_connection_hold_seconds`` in the pool event listeners.

    Resolves request paths to route templates (e.g. /api/chat/{chat_id}
    instead of /api/chat/abc-123) to keep metric label cardinality low.

    Must be registered AFTER all routes are added to the app.
    """
    route_map = _build_route_map(app)

    @app.middleware("http")
    async def set_endpoint_context(
        request: Request, call_next: Callable[[Request], Awaitable[Response]]
    ) -> Response:
        handler = _match_route(route_map, request.url.path)
        token = CURRENT_ENDPOINT_CONTEXTVAR.set(handler or "unmatched")
        try:
            return await call_next(request)
        finally:
            CURRENT_ENDPOINT_CONTEXTVAR.reset(token)


================================================
FILE: backend/onyx/utils/object_size_check.py
================================================
import sys
from typing import TypeVar

T = TypeVar("T", dict, list, tuple, set, frozenset)


def deep_getsizeof(obj: T, seen: set[int] | None = None) -> int:
    """Recursively sum size of objects, handling circular references."""
    if seen is None:
        seen = set()

    obj_id = id(obj)
    if obj_id in seen:
        return 0  # Prevent infinite recursion for circular references

    seen.add(obj_id)
    size = sys.getsizeof(obj)

    if isinstance(obj, dict):
        size += sum(
            deep_getsizeof(k, seen) + deep_getsizeof(v, seen) for k, v in obj.items()
        )
    elif isinstance(obj, (list, tuple, set, frozenset)):
        size += sum(deep_getsizeof(i, seen) for i in obj)

    return size


================================================
FILE: backend/onyx/utils/postgres_sanitization.py
================================================
import re
from typing import Any

from onyx.access.models import ExternalAccess
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.utils.logger import setup_logger

logger = setup_logger()

_SURROGATE_RE = re.compile(r"[\ud800-\udfff]")


def sanitize_string(value: str) -> str:
    """Strip characters that PostgreSQL text/JSONB columns cannot store.

    Removes:
    - NUL bytes (\\x00)
    - UTF-16 surrogates (\\ud800-\\udfff), which are invalid in UTF-8
    """
    sanitized = value.replace("\x00", "")
    sanitized = _SURROGATE_RE.sub("", sanitized)
    if value and not sanitized:
        logger.warning(
            "sanitize_string: all characters were removed from a non-empty string"
        )
    return sanitized


def sanitize_json_like(value: Any) -> Any:
    """Recursively sanitize all strings in a JSON-like structure (dict/list/tuple)."""
    if isinstance(value, str):
        return sanitize_string(value)

    if isinstance(value, list):
        return [sanitize_json_like(item) for item in value]

    if isinstance(value, tuple):
        return tuple(sanitize_json_like(item) for item in value)

    if isinstance(value, dict):
        sanitized: dict[Any, Any] = {}
        for key, nested_value in value.items():
            cleaned_key = sanitize_string(key) if isinstance(key, str) else key
            sanitized[cleaned_key] = sanitize_json_like(nested_value)
        return sanitized

    return value


def _sanitize_expert_info(expert: BasicExpertInfo) -> BasicExpertInfo:
    return expert.model_copy(
        update={
            "display_name": (
                sanitize_string(expert.display_name)
                if expert.display_name is not None
                else None
            ),
            "first_name": (
                sanitize_string(expert.first_name)
                if expert.first_name is not None
                else None
            ),
            "middle_initial": (
                sanitize_string(expert.middle_initial)
                if expert.middle_initial is not None
                else None
            ),
            "last_name": (
                sanitize_string(expert.last_name)
                if expert.last_name is not None
                else None
            ),
            "email": (
                sanitize_string(expert.email) if expert.email is not None else None
            ),
        }
    )


def _sanitize_external_access(external_access: ExternalAccess) -> ExternalAccess:
    return ExternalAccess(
        external_user_emails={
            sanitize_string(email) for email in external_access.external_user_emails
        },
        external_user_group_ids={
            sanitize_string(group_id)
            for group_id in external_access.external_user_group_ids
        },
        is_public=external_access.is_public,
    )


def sanitize_document_for_postgres(document: Document) -> Document:
    cleaned_doc = document.model_copy(deep=True)

    cleaned_doc.id = sanitize_string(cleaned_doc.id)
    cleaned_doc.semantic_identifier = sanitize_string(cleaned_doc.semantic_identifier)
    if cleaned_doc.title is not None:
        cleaned_doc.title = sanitize_string(cleaned_doc.title)
    if cleaned_doc.parent_hierarchy_raw_node_id is not None:
        cleaned_doc.parent_hierarchy_raw_node_id = sanitize_string(
            cleaned_doc.parent_hierarchy_raw_node_id
        )

    cleaned_doc.metadata = {
        sanitize_string(key): (
            [sanitize_string(item) for item in value]
            if isinstance(value, list)
            else sanitize_string(value)
        )
        for key, value in cleaned_doc.metadata.items()
    }

    if cleaned_doc.doc_metadata is not None:
        cleaned_doc.doc_metadata = sanitize_json_like(cleaned_doc.doc_metadata)

    if cleaned_doc.primary_owners is not None:
        cleaned_doc.primary_owners = [
            _sanitize_expert_info(expert) for expert in cleaned_doc.primary_owners
        ]
    if cleaned_doc.secondary_owners is not None:
        cleaned_doc.secondary_owners = [
            _sanitize_expert_info(expert) for expert in cleaned_doc.secondary_owners
        ]

    if cleaned_doc.external_access is not None:
        cleaned_doc.external_access = _sanitize_external_access(
            cleaned_doc.external_access
        )

    for section in cleaned_doc.sections:
        if section.link is not None:
            section.link = sanitize_string(section.link)
        if section.text is not None:
            section.text = sanitize_string(section.text)
        if section.image_file_id is not None:
            section.image_file_id = sanitize_string(section.image_file_id)

    return cleaned_doc


def sanitize_documents_for_postgres(documents: list[Document]) -> list[Document]:
    return [sanitize_document_for_postgres(document) for document in documents]


def sanitize_hierarchy_node_for_postgres(node: HierarchyNode) -> HierarchyNode:
    cleaned_node = node.model_copy(deep=True)

    cleaned_node.raw_node_id = sanitize_string(cleaned_node.raw_node_id)
    cleaned_node.display_name = sanitize_string(cleaned_node.display_name)
    if cleaned_node.raw_parent_id is not None:
        cleaned_node.raw_parent_id = sanitize_string(cleaned_node.raw_parent_id)
    if cleaned_node.link is not None:
        cleaned_node.link = sanitize_string(cleaned_node.link)

    if cleaned_node.external_access is not None:
        cleaned_node.external_access = _sanitize_external_access(
            cleaned_node.external_access
        )

    return cleaned_node


def sanitize_hierarchy_nodes_for_postgres(
    nodes: list[HierarchyNode],
) -> list[HierarchyNode]:
    return [sanitize_hierarchy_node_for_postgres(node) for node in nodes]


================================================
FILE: backend/onyx/utils/pydantic_util.py
================================================
from typing import Any

from pydantic import BaseModel


def shallow_model_dump(model_instance: BaseModel) -> dict[str, Any]:
    """Like model_dump(), but returns references to field values instead of
    deep copies. Use with model_construct() to avoid unnecessary memory
    duplication when building subclass instances."""
    return {
        field_name: getattr(model_instance, field_name)
        for field_name in model_instance.__class__.model_fields
    }


================================================
FILE: backend/onyx/utils/retry_wrapper.py
================================================
from collections.abc import Callable
from logging import Logger
from typing import Any
from typing import cast
from typing import TypeVar

import requests
from retry import retry

from onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS
from onyx.utils.logger import setup_logger

logger = setup_logger()


F = TypeVar("F", bound=Callable[..., Any])


def retry_builder(
    tries: int = 20,
    delay: float = 0.1,
    max_delay: float | None = 60,
    backoff: float = 2,
    jitter: tuple[float, float] | float = 1,
    exceptions: type[Exception] | tuple[type[Exception], ...] = (Exception,),
) -> Callable[[F], F]:
    """Builds a generic wrapper/decorator for calls to external APIs that
    may fail due to rate limiting, flakes, or other reasons. Applies exponential
    backoff with jitter to retry the call."""

    def retry_with_default(func: F) -> F:
        @retry(
            tries=tries,
            delay=delay,
            max_delay=max_delay,
            backoff=backoff,
            jitter=jitter,
            logger=cast(Logger, logger),
            exceptions=exceptions,
        )
        def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any:
            return func(*args, **kwargs)

        return cast(F, wrapped_func)

    return retry_with_default


def request_with_retries(
    method: str,
    url: str,
    *,
    data: dict[str, Any] | None = None,
    headers: dict[str, Any] | None = None,
    params: dict[str, Any] | None = None,
    timeout: int = REQUEST_TIMEOUT_SECONDS,
    stream: bool = False,
    tries: int = 8,
    delay: float = 1,
    backoff: float = 2,
) -> requests.Response:
    @retry(tries=tries, delay=delay, backoff=backoff, logger=cast(Logger, logger))
    def _make_request() -> requests.Response:
        response = requests.request(
            method=method,
            url=url,
            data=data,
            headers=headers,
            params=params,
            timeout=timeout,
            stream=stream,
        )
        try:
            response.raise_for_status()
        except requests.exceptions.HTTPError:
            logger.exception(
                "Request failed:\n%s",
                {
                    "method": method,
                    "url": url,
                    "data": data,
                    "headers": headers,
                    "params": params,
                    "timeout": timeout,
                    "stream": stream,
                },
            )
            raise
        return response

    return _make_request()


================================================
FILE: backend/onyx/utils/search_nlp_models_utils.py
================================================
def pass_aws_key(api_key: str) -> tuple[str, str, str]:
    """Parse AWS API key string into components.

    Args:
        api_key: String in format 'aws_ACCESSKEY_SECRETKEY_REGION'

    Returns:
        Tuple of (access_key, secret_key, region)

    Raises:
        ValueError: If key format is invalid
    """
    if not api_key.startswith("aws"):
        raise ValueError("API key must start with 'aws' prefix")
    parts = api_key.split("_")
    if len(parts) != 4:
        raise ValueError(
            f"API key must be in format 'aws_ACCESSKEY_SECRETKEY_REGION', got {len(parts) - 1} parts. "
            "This is an onyx specific format for formatting the aws secrets for bedrock"
        )

    try:
        _, aws_access_key_id, aws_secret_access_key, aws_region = parts
        return aws_access_key_id, aws_secret_access_key, aws_region
    except Exception as e:
        raise ValueError(f"Failed to parse AWS key components: {str(e)}")


================================================
FILE: backend/onyx/utils/sensitive.py
================================================
"""
Wrapper class for sensitive values that require explicit masking decisions.

This module provides a wrapper for encrypted values that forces developers to
make an explicit decision about whether to mask the value when accessing it.
This prevents accidental exposure of sensitive data in API responses.
"""

from __future__ import annotations

import json
from collections.abc import Callable
from typing import Any
from typing import Generic
from typing import NoReturn
from typing import TypeVar
from unittest.mock import MagicMock

from onyx.utils.encryption import mask_credential_dict
from onyx.utils.encryption import mask_string


T = TypeVar("T", str, dict[str, Any])


def make_mock_sensitive_value(value: dict[str, Any] | str | None) -> MagicMock:
    """
    Create a mock SensitiveValue for use in tests.

    This helper makes it easy to create mock objects that behave like
    SensitiveValue for testing code that uses credentials.

    Args:
        value: The value to return from get_value(). Can be a dict, string, or None.

    Returns:
        A MagicMock configured to behave like a SensitiveValue.

    Example:
        >>> mock_credential = MagicMock()
        >>> mock_credential.credential_json = make_mock_sensitive_value({"api_key": "secret"})
        >>> # Now mock_credential.credential_json.get_value(apply_mask=False) returns {"api_key": "secret"}
    """
    if value is None:
        return None  # type: ignore[return-value]

    mock = MagicMock(spec=SensitiveValue)
    mock.get_value.return_value = value
    mock.__bool__ = lambda self: True  # noqa: ARG005
    return mock


class SensitiveAccessError(Exception):
    """Raised when attempting to access a SensitiveValue without explicit masking decision."""


class SensitiveValue(Generic[T]):
    """
    Wrapper requiring explicit masking decisions for sensitive data.

    This class wraps encrypted data and forces callers to make an explicit
    decision about whether to mask the value when accessing it. This prevents
    accidental exposure of sensitive data.

    Usage:
        # Get raw value (for internal use like connectors)
        raw_value = sensitive.get_value(apply_mask=False)

        # Get masked value (for API responses)
        masked_value = sensitive.get_value(apply_mask=True)

    Raises SensitiveAccessError when:
        - Attempting to convert to string via str() or repr()
        - Attempting to iterate over the value
        - Attempting to subscript the value (e.g., value["key"])
        - Attempting to serialize to JSON without explicit get_value()
    """

    def __init__(
        self,
        *,
        encrypted_bytes: bytes,
        decrypt_fn: Callable[[bytes], str],
        is_json: bool = False,
    ) -> None:
        """
        Initialize a SensitiveValue wrapper.

        Args:
            encrypted_bytes: The encrypted bytes to wrap
            decrypt_fn: Function to decrypt bytes to string
            is_json: If True, the decrypted value is JSON and will be parsed to dict
        """
        self._encrypted_bytes = encrypted_bytes
        self._decrypt_fn = decrypt_fn
        self._is_json = is_json
        # Cache for decrypted value to avoid repeated decryption
        self._decrypted_value: T | None = None

    def _decrypt(self) -> T:
        """Lazily decrypt and cache the value."""
        if self._decrypted_value is None:
            decrypted_str = self._decrypt_fn(self._encrypted_bytes)
            if self._is_json:
                self._decrypted_value = json.loads(decrypted_str)
            else:
                self._decrypted_value = decrypted_str  # type: ignore[assignment]
        # The return type should always match T based on is_json flag
        return self._decrypted_value  # type: ignore[return-value]

    def get_value(
        self,
        *,
        apply_mask: bool,
        mask_fn: Callable[[T], T] | None = None,
    ) -> T:
        """
        Get the value with explicit masking decision.

        Args:
            apply_mask: Required. True = return masked value, False = return raw value
            mask_fn: Optional custom masking function. Defaults to mask_string for
                     strings and mask_credential_dict for dicts.

        Returns:
            The value, either masked or raw depending on apply_mask.
        """
        value = self._decrypt()

        if not apply_mask:
            # Callers must not mutate the returned dict — doing so would
            # desync the cache from the encrypted bytes and the DB.
            return value

        # Apply masking
        if mask_fn is not None:
            return mask_fn(value)

        # Use default masking based on type
        # Type narrowing doesn't work well here due to the generic T,
        # but at runtime the types will match
        if isinstance(value, dict):
            return mask_credential_dict(value)
        elif isinstance(value, str):
            return mask_string(value)
        else:
            raise ValueError(f"Cannot mask value of type {type(value)}")

    def __bool__(self) -> bool:
        """Allow truthiness checks without exposing the value."""
        return True

    def __str__(self) -> NoReturn:
        """Prevent accidental string conversion."""
        raise SensitiveAccessError(
            "Cannot convert SensitiveValue to string. Use .get_value(apply_mask=True/False) to access the value."
        )

    def __repr__(self) -> str:
        """Prevent accidental repr exposure."""
        return "<SensitiveValue: use .get_value(apply_mask=True/False) to access>"

    def __iter__(self) -> NoReturn:
        """Prevent iteration over the value."""
        raise SensitiveAccessError(
            "Cannot iterate over SensitiveValue. Use .get_value(apply_mask=True/False) to access the value."
        )

    def __getitem__(self, key: Any) -> NoReturn:
        """Prevent subscript access."""
        raise SensitiveAccessError(
            "Cannot subscript SensitiveValue. Use .get_value(apply_mask=True/False) to access the value."
        )

    def __eq__(self, other: Any) -> bool:
        """Compare SensitiveValues by their decrypted content."""
        # NOTE: if you attempt to compare a string/dict to a SensitiveValue,
        # this comparison will return NotImplemented, which then evaluates to False.
        # This is the convention and required for SQLAlchemy's attribute tracking.
        if not isinstance(other, SensitiveValue):
            return NotImplemented
        return self._decrypt() == other._decrypt()

    def __hash__(self) -> int:
        """Hash based on decrypted content."""
        value = self._decrypt()
        if isinstance(value, dict):
            return hash(json.dumps(value, sort_keys=True))
        return hash(value)

    # Prevent JSON serialization
    def __json__(self) -> Any:
        """Prevent JSON serialization."""
        raise SensitiveAccessError(
            "Cannot serialize SensitiveValue to JSON. Use .get_value(apply_mask=True/False) to access the value."
        )

    # For Pydantic compatibility
    @classmethod
    def __get_pydantic_core_schema__(cls, source_type: Any, handler: Any) -> Any:
        """Prevent Pydantic from serializing without explicit get_value()."""
        raise SensitiveAccessError(
            "Cannot serialize SensitiveValue in Pydantic model. "
            "Use .get_value(apply_mask=True/False) to access the value before serialization."
        )


================================================
FILE: backend/onyx/utils/sitemap.py
================================================
import re
import xml.etree.ElementTree as ET
from typing import Set
from urllib.parse import urljoin

import requests

from onyx.utils.logger import setup_logger

logger = setup_logger()


def _get_sitemap_locations_from_robots(base_url: str) -> Set[str]:
    """Extract sitemap URLs from robots.txt"""
    sitemap_urls: set = set()
    try:
        robots_url = urljoin(base_url, "/robots.txt")
        resp = requests.get(robots_url, timeout=10)
        if resp.status_code == 200:
            for line in resp.text.splitlines():
                if line.lower().startswith("sitemap:"):
                    sitemap_url = line.split(":", 1)[1].strip()
                    sitemap_urls.add(sitemap_url)
    except Exception as e:
        logger.warning(f"Error fetching robots.txt: {e}")
    return sitemap_urls


def _extract_urls_from_sitemap(sitemap_url: str) -> Set[str]:
    """Extract URLs from a sitemap XML file"""
    urls: set[str] = set()
    try:
        resp = requests.get(sitemap_url, timeout=10)
        if resp.status_code != 200:
            return urls

        root = ET.fromstring(resp.content)

        # Handle both regular sitemaps and sitemap indexes
        # Remove namespace for easier parsing
        namespace = re.match(r"\{.*\}", root.tag)
        ns = namespace.group(0) if namespace else ""

        if root.tag == f"{ns}sitemapindex":
            # This is a sitemap index
            for sitemap in root.findall(f".//{ns}loc"):
                if sitemap.text:
                    sub_urls = _extract_urls_from_sitemap(sitemap.text)
                    urls.update(sub_urls)
        else:
            # This is a regular sitemap
            for url in root.findall(f".//{ns}loc"):
                if url.text:
                    urls.add(url.text)

    except Exception as e:
        logger.warning(f"Error processing sitemap {sitemap_url}: {e}")

    return urls


def list_pages_for_site(site: str) -> list[str]:
    """Get list of pages from a site's sitemaps"""
    site = site.rstrip("/")
    all_urls = set()

    # Try both common sitemap locations
    sitemap_paths = ["/sitemap.xml", "/sitemap_index.xml"]
    for path in sitemap_paths:
        sitemap_url = urljoin(site, path)
        all_urls.update(_extract_urls_from_sitemap(sitemap_url))

    # Check robots.txt for additional sitemaps
    sitemap_locations = _get_sitemap_locations_from_robots(site)
    for sitemap_url in sitemap_locations:
        all_urls.update(_extract_urls_from_sitemap(sitemap_url))

    return list(all_urls)


================================================
FILE: backend/onyx/utils/special_types.py
================================================
from collections.abc import Mapping
from collections.abc import Sequence
from typing import TypeAlias

JSON_ro: TypeAlias = (
    Mapping[str, "JSON_ro"] | Sequence["JSON_ro"] | str | int | float | bool | None
)


================================================
FILE: backend/onyx/utils/subclasses.py
================================================
from __future__ import annotations

import importlib
import os
import pkgutil
import sys
from types import ModuleType
from typing import List
from typing import Type
from typing import TypeVar

T = TypeVar("T")


def import_all_modules_from_dir(dir_path: str) -> List[ModuleType]:
    """
    Imports all modules found in the given directory and its subdirectories,
    returning a list of imported module objects.
    """
    dir_path = os.path.abspath(dir_path)

    if dir_path not in sys.path:
        sys.path.insert(0, dir_path)

    imported_modules: List[ModuleType] = []

    for _, package_name, _ in pkgutil.walk_packages([dir_path]):
        try:
            module = importlib.import_module(package_name)
            imported_modules.append(module)
        except Exception as e:
            # Handle or log exceptions as needed
            print(f"Could not import {package_name}: {e}")

    return imported_modules


def import_all_submodules_from_package(package_name: str) -> List[ModuleType]:
    """
    Imports all submodules of a given package WITHOUT mutating sys.path.
    Uses the package's __path__ and imports with fully-qualified names.
    """
    imported_modules: List[ModuleType] = []

    try:
        pkg = importlib.import_module(package_name)
    except Exception as e:
        print(f"Could not import package {package_name}: {e}")
        return imported_modules

    pkg_paths = getattr(pkg, "__path__", None)
    if not pkg_paths:
        return imported_modules

    for _, module_name, _ in pkgutil.walk_packages(
        pkg_paths, prefix=pkg.__name__ + "."
    ):
        try:
            module = importlib.import_module(module_name)
            imported_modules.append(module)
        except Exception as e:
            print(f"Could not import {module_name}: {e}")

    return imported_modules


def all_subclasses(cls: Type[T]) -> List[Type[T]]:
    """
    Recursively find all subclasses of the given class.
    """
    direct_subs = cls.__subclasses__()
    result: List[Type[T]] = []
    for subclass in direct_subs:
        result.append(subclass)
        # Extend the result by recursively calling all_subclasses
        result.extend(all_subclasses(subclass))
    return result


def find_all_subclasses_in_dir(parent_class: Type[T], directory: str) -> List[Type[T]]:
    """
    Imports all modules from the given directory (and subdirectories),
    then returns all classes that are subclasses of parent_class.

    :param parent_class: The class to find subclasses of.
    :param directory: The directory to search for subclasses.
    :return: A list of all subclasses of parent_class found in the directory.
    """
    # First import all modules to ensure classes are loaded into memory
    import_all_modules_from_dir(directory)

    # Gather all subclasses of the given parent class
    subclasses = all_subclasses(parent_class)
    return subclasses


def find_all_subclasses_in_package(
    parent_class: Type[T], package_name: str
) -> List[Type[T]]:
    """
    Imports all submodules from the given package name, then returns all subclasses
    of parent_class that are loaded in memory.
    """
    import_all_submodules_from_package(package_name)
    subclasses = all_subclasses(parent_class)
    return subclasses


# Example usage:
if __name__ == "__main__":

    class Animal:
        pass

    # Suppose "mymodules" contains files that define classes inheriting from Animal
    found_subclasses = find_all_subclasses_in_dir(Animal, "mymodules")
    for sc in found_subclasses:
        print("Found subclass:", sc.__name__)


================================================
FILE: backend/onyx/utils/supervisord_watchdog.py
================================================
#!/usr/bin/env python3

import argparse
import subprocess
import time

from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger


logger = setup_logger()

MAX_AGE_SECONDS = 900  # how old the heartbeat can be
CHECK_INTERVAL = 60  # how often to check
MAX_LOOKUP_FAILURES = 5


def main(key: str, program: str, conf: str) -> None:
    """This script will restart the watchdog'd supervisord process via supervisorctl.

    This process continually looks up a specific redis key. If it is missing for a
    consecutive number of times and the last successful lookup is more
    than a threshold time, the specified program will be restarted.
    """
    logger.info(f"supervisord_watchdog starting: program={program} conf={conf}")

    r = get_redis_client()

    last_heartbeat = time.monotonic()
    num_lookup_failures = 0

    try:
        while True:
            time.sleep(CHECK_INTERVAL)

            now = time.monotonic()

            # check for the key ... handle any exception gracefully
            try:
                heartbeat = r.exists(key)
            except Exception:
                logger.exception(
                    f"Exception checking for celery beat heartbeat: key={key}."
                )
                continue

            # happy path ... just continue
            if heartbeat:
                logger.debug(f"Key lookup succeeded: key={key}")
                last_heartbeat = time.monotonic()
                num_lookup_failures = 0
                continue

            # if we haven't exceeded the max lookup failures, continue
            num_lookup_failures += 1
            if num_lookup_failures <= MAX_LOOKUP_FAILURES:
                logger.warning(
                    f"Key lookup failed: key={key} "
                    f"lookup_failures={num_lookup_failures} "
                    f"max_lookup_failures={MAX_LOOKUP_FAILURES}"
                )
                continue

            # if we haven't exceeded the max missing key timeout threshold, continue
            elapsed = now - last_heartbeat
            if elapsed <= MAX_AGE_SECONDS:
                logger.warning(
                    f"Key lookup failed: key={key} "
                    f"lookup_failures={num_lookup_failures} "
                    f"max_lookup_failures={MAX_LOOKUP_FAILURES} "
                    f"elapsed={elapsed:.2f} "
                    f"elapsed_threshold={MAX_AGE_SECONDS}"
                )
                continue

            # all conditions have been exceeded ... restart the process
            logger.warning(
                f"Key lookup failure thresholds exceeded - restarting {program}: "
                f"key={key} "
                f"lookup_failures={num_lookup_failures} "
                f"max_lookup_failures={MAX_LOOKUP_FAILURES} "
                f"elapsed={elapsed:.2f} "
                f"elapsed_threshold={MAX_AGE_SECONDS}"
            )

            subprocess.call(["supervisorctl", "-c", conf, "restart", program])

            # reset state so that we properly delay until the next restart
            # instead of continually restarting
            num_lookup_failures = 0
            last_heartbeat = time.monotonic()
    except KeyboardInterrupt:
        logger.info("Caught interrupt, exiting watchdog.")

    logger.info("supervisord_watchdog exiting.")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Supervisord Watchdog")
    parser.add_argument("--key", help="The redis key to watch", required=True)
    parser.add_argument(
        "--program", help="The supervisord program to restart", required=True
    )
    parser.add_argument(
        "--conf", type=str, help="Path to supervisord config file", required=True
    )
    args = parser.parse_args()

    main(args.key, args.program, args.conf)


================================================
FILE: backend/onyx/utils/telemetry.py
================================================
import contextvars
import threading
import uuid
from enum import Enum
from typing import Any

import requests

from onyx.configs.app_configs import DISABLE_TELEMETRY
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
from onyx.configs.constants import KV_CUSTOMER_UUID_KEY
from onyx.configs.constants import KV_INSTANCE_DOMAIN_KEY
from onyx.configs.constants import MilestoneRecordType
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import User
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.key_value_store.interface import unwrap_str
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
)
from onyx.utils.variable_functionality import noop_fallback
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id

logger = setup_logger()


_DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.onyx.app/anonymous_telemetry"
_CACHED_UUID: str | None = None
_CACHED_INSTANCE_DOMAIN: str | None = None


class RecordType(str, Enum):
    VERSION = "version"
    SIGN_UP = "sign_up"
    USAGE = "usage"
    LATENCY = "latency"
    FAILURE = "failure"
    METRIC = "metric"
    INDEXING_PROGRESS = "indexing_progress"
    INDEXING_COMPLETE = "indexing_complete"
    PERMISSION_SYNC_PROGRESS = "permission_sync_progress"
    PERMISSION_SYNC_COMPLETE = "permission_sync_complete"
    INDEX_ATTEMPT_STATUS = "index_attempt_status"


def _get_or_generate_customer_id_mt(tenant_id: str) -> str:
    return str(uuid.uuid5(uuid.NAMESPACE_X500, tenant_id))


def get_or_generate_uuid() -> str:
    # TODO: split out the whole "instance UUID" generation logic into a separate
    # utility function. Telemetry should not be aware at all of how the UUID is
    # generated/stored.
    # TODO: handle potential race condition for UUID generation. Doesn't matter for
    # the telemetry case, but if this is used generally it should be handled.
    global _CACHED_UUID

    if _CACHED_UUID is not None:
        return _CACHED_UUID

    kv_store = get_kv_store()

    try:
        _CACHED_UUID = unwrap_str(kv_store.load(KV_CUSTOMER_UUID_KEY))
    except KvKeyNotFoundError:
        _CACHED_UUID = str(uuid.uuid4())
        kv_store.store(KV_CUSTOMER_UUID_KEY, {"value": _CACHED_UUID}, encrypt=True)

    return _CACHED_UUID


def _get_or_generate_instance_domain() -> str | None:  #
    global _CACHED_INSTANCE_DOMAIN

    if _CACHED_INSTANCE_DOMAIN is not None:
        return _CACHED_INSTANCE_DOMAIN

    kv_store = get_kv_store()

    try:
        _CACHED_INSTANCE_DOMAIN = unwrap_str(kv_store.load(KV_INSTANCE_DOMAIN_KEY))
    except KvKeyNotFoundError:
        with get_session_with_current_tenant() as db_session:
            first_user = db_session.query(User).first()
            if first_user:
                _CACHED_INSTANCE_DOMAIN = first_user.email.split("@")[-1]
                kv_store.store(
                    KV_INSTANCE_DOMAIN_KEY,
                    {"value": _CACHED_INSTANCE_DOMAIN},
                    encrypt=True,
                )

    return _CACHED_INSTANCE_DOMAIN


def optional_telemetry(
    record_type: RecordType,
    data: dict,
    user_id: str | None = None,
    tenant_id: str | None = None,  # Allows for override of tenant_id
) -> None:
    if DISABLE_TELEMETRY:
        return

    tenant_id = tenant_id or get_current_tenant_id()

    try:

        def telemetry_logic() -> None:
            try:
                customer_uuid = (
                    _get_or_generate_customer_id_mt(tenant_id)
                    if MULTI_TENANT
                    else get_or_generate_uuid()
                )
                payload = {
                    "data": data,
                    "record": record_type,
                    # If None then it's a flow that doesn't include a user
                    # For cases where the User itself is None, a string is provided instead
                    "user_id": user_id,
                    "customer_uuid": customer_uuid,
                    "is_cloud": MULTI_TENANT,
                }
                if ENTERPRISE_EDITION_ENABLED:
                    payload["instance_domain"] = _get_or_generate_instance_domain()
                requests.post(
                    _DANSWER_TELEMETRY_ENDPOINT,
                    headers={"Content-Type": "application/json"},
                    json=payload,
                )

            except Exception:
                # This way it silences all thread level logging as well
                pass

        # Run in separate thread with the same context as the current thread
        # This is to ensure that the thread gets the current tenant ID
        current_context = contextvars.copy_context()
        thread = threading.Thread(
            target=lambda: current_context.run(telemetry_logic), daemon=True
        )
        thread.start()
    except Exception:
        # Should never interfere with normal functions of Onyx
        pass


def mt_cloud_telemetry(
    tenant_id: str,
    distinct_id: str,
    event: MilestoneRecordType,
    properties: dict[str, Any] | None = None,
) -> None:
    if not MULTI_TENANT:
        return

    # Automatically include tenant_id in properties
    all_properties = {**properties} if properties else {}
    if properties and "tenant_id" in properties:
        logger.warning(
            f"tenant_id already in properties: {properties}. Overwriting with new value {tenant_id}."
        )
    all_properties["tenant_id"] = tenant_id

    # MIT version should not need to include any Posthog code
    # This is only for Onyx MT Cloud, this code should also never be hit, no reason for any orgs to
    # be running the Multi Tenant version of Onyx.
    fetch_versioned_implementation_with_fallback(
        module="onyx.utils.telemetry",
        attribute="event_telemetry",
        fallback=noop_fallback,
    )(distinct_id, event, all_properties)


def mt_cloud_identify(
    distinct_id: str,
    properties: dict[str, Any] | None = None,
) -> None:
    """Create/update a PostHog person profile (Cloud only)."""
    if not MULTI_TENANT:
        return

    fetch_versioned_implementation_with_fallback(
        module="onyx.utils.telemetry",
        attribute="identify_user",
        fallback=noop_fallback,
    )(distinct_id, properties)


def mt_cloud_alias(
    distinct_id: str,
    anonymous_id: str,
) -> None:
    """Link an anonymous distinct_id to an identified user (Cloud only)."""
    if not MULTI_TENANT:
        return

    fetch_versioned_implementation_with_fallback(
        module="onyx.utils.posthog_client",
        attribute="alias_user",
        fallback=noop_fallback,
    )(distinct_id, anonymous_id)


def mt_cloud_get_anon_id(request: Any) -> str | None:
    """Extract the anonymous distinct_id from the app PostHog cookie (Cloud only)."""
    if not MULTI_TENANT or not request:
        return None

    return fetch_versioned_implementation_with_fallback(
        module="onyx.utils.posthog_client",
        attribute="get_anon_id_from_request",
        fallback=noop_fallback,
    )(request)


================================================
FILE: backend/onyx/utils/tenant.py
================================================
from shared_configs.configs import TENANT_ID_PREFIX


def get_tenant_id_short_string(tenant_id: str) -> str:
    """Gets a short string representation of a full tenant id.

    Args:
        tenant_id: The full tenant id.

    Returns:
        str: The first 8 characters of the tenant id after removing the prefix.
    """
    tenant_display = tenant_id.removeprefix(TENANT_ID_PREFIX)
    short_tenant = tenant_display[:8]
    return short_tenant


================================================
FILE: backend/onyx/utils/text_processing.py
================================================
import codecs
import json
import re
import string
from urllib.parse import quote

from onyx.utils.logger import setup_logger


logger = setup_logger(__name__)

# Mapping of curly/smart quotes to straight quotes
CURLY_TO_STRAIGHT_QUOTES: dict[str, str] = {
    "\u2019": "'",  # Right single quotation mark
    "\u2018": "'",  # Left single quotation mark
    "\u201c": '"',  # Left double quotation mark
    "\u201d": '"',  # Right double quotation mark
}

# Zero-width characters that should typically be removed during text normalization
ZERO_WIDTH_CHARS: set[str] = {
    "\u200b",  # Zero-width space
    "\u200c",  # Zero-width non-joiner
    "\u200d",  # Zero-width joiner
    "\ufeff",  # Byte order mark / zero-width no-break space
    "\u2060",  # Word joiner
}


def normalize_curly_quotes(text: str) -> str:
    """Convert curly/smart quotes to straight quotes."""
    for curly, straight in CURLY_TO_STRAIGHT_QUOTES.items():
        text = text.replace(curly, straight)
    return text


def is_zero_width_char(c: str) -> bool:
    """Check if a character is a zero-width character."""
    return c in ZERO_WIDTH_CHARS


ESCAPE_SEQUENCE_RE = re.compile(
    r"""
    ( \\U........      # 8-digit hex escapes
    | \\u....          # 4-digit hex escapes
    | \\x..            # 2-digit hex escapes
    | \\[0-7]{1,3}     # Octal escapes
    | \\N\{[^}]+\}     # Unicode characters by name
    | \\[\\'"abfnrtv]  # Single-character escapes
    )""",
    re.UNICODE | re.VERBOSE,
)

_INITIAL_FILTER = re.compile(
    "["
    "\U0000fff0-\U0000ffff"  # Specials
    "\U0001f000-\U0001f9ff"  # Emoticons
    "\U00002000-\U0000206f"  # General Punctuation
    "\U00002190-\U000021ff"  # Arrows
    "\U00002700-\U000027bf"  # Dingbats
    "]+",
    flags=re.UNICODE,
)

# Regex to match invalid Unicode characters that cause UTF-8 encoding errors:
# - \x00-\x08: Control characters (except tab \x09)
# - \x0b-\x0c: Vertical tab and form feed
# - \x0e-\x1f: More control characters (except newline \x0a, carriage return \x0d)
# - \ud800-\udfff: Surrogate pairs (invalid when unpaired, causes "surrogates not allowed" errors)
# - \ufdd0-\ufdef: Non-characters
# - \ufffe-\uffff: Non-characters
_INVALID_UNICODE_CHARS_RE = re.compile(
    "[\x00-\x08\x0b\x0c\x0e-\x1f\ud800-\udfff\ufdd0-\ufdef\ufffe\uffff]"
)


def decode_escapes(s: str) -> str:
    def decode_match(match: re.Match) -> str:
        return codecs.decode(match.group(0), "unicode-escape")

    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)


def make_url_compatible(s: str) -> str:
    s_with_underscores = s.replace(" ", "_")
    return quote(s_with_underscores, safe="")


def has_unescaped_quote(s: str) -> bool:
    pattern = r'(?<!\\)"'
    return bool(re.search(pattern, s))


def escape_newlines(s: str) -> str:
    return re.sub(r"(?<!\\)\n", "\\\\n", s)


def replace_whitespaces_w_space(s: str) -> str:
    return re.sub(r"\s", " ", s)


# Function to remove punctuation from a string
def remove_punctuation(s: str) -> str:
    return s.translate(str.maketrans("", "", string.punctuation))


def escape_quotes(original_json_str: str) -> str:
    result = []
    in_string = False
    for i, char in enumerate(original_json_str):
        if char == '"':
            if not in_string:
                in_string = True
                result.append(char)
            else:
                next_char = (
                    original_json_str[i + 1] if i + 1 < len(original_json_str) else None
                )
                if result and result[-1] == "\\":
                    result.append(char)
                elif next_char not in [",", ":", "}", "\n"]:
                    result.append("\\" + char)
                else:
                    result.append(char)
                    in_string = False
        else:
            result.append(char)
    return "".join(result)


def find_all_json_objects(text: str) -> list[dict]:
    """Find all JSON objects in text using balanced brace matching.

    Iterates through the text, and for each '{' found, attempts to find its
    matching '}' by counting brace depth. Each balanced substring is then
    validated as JSON. This includes nested JSON objects within other objects.

    Use case: Parsing LLM output that may contain multiple JSON objects, or when
    the LLM/serving layer outputs function calls in non-standard formats
    (e.g. OpenAI's function.open_url style).

    Args:
        text: The text to search for JSON objects.

    Returns:
        A list of all successfully parsed JSON objects (dicts only).
    """
    json_objects: list[dict] = []
    i = 0

    while i < len(text):
        if text[i] == "{":
            # Try to find a matching closing brace
            brace_count = 0
            start = i
            for j in range(i, len(text)):
                if text[j] == "{":
                    brace_count += 1
                elif text[j] == "}":
                    brace_count -= 1
                    if brace_count == 0:
                        # Found potential JSON object
                        candidate = text[start : j + 1]
                        try:
                            parsed = json.loads(candidate)
                            if isinstance(parsed, dict):
                                json_objects.append(parsed)
                        except json.JSONDecodeError:
                            pass
                        break
        i += 1

    return json_objects


def parse_llm_json_response(content: str) -> dict | None:
    """Parse a single JSON object from LLM output, handling markdown code blocks.

    Designed for LLM responses that typically contain exactly one JSON object,
    possibly wrapped in markdown formatting.

    Tries extraction in order:
    1. JSON inside markdown code block (```json ... ``` or ``` ... ```)
    2. Entire content as raw JSON
    3. First '{' to last '}' in content (greedy match)

    Args:
        content: The LLM response text to parse.

    Returns:
        The parsed JSON dict if found, None otherwise.
    """
    # Try to find JSON in markdown code block first
    # Use greedy .* (not .*?) to match nested objects correctly within code block bounds
    json_match = re.search(r"```(?:json)?\s*(\{.*\})\s*```", content, re.DOTALL)
    if json_match:
        try:
            result = json.loads(json_match.group(1))
            if isinstance(result, dict):
                return result
        except json.JSONDecodeError:
            pass

    # Try to parse the entire content as JSON
    try:
        result = json.loads(content)
        if isinstance(result, dict):
            return result
    except json.JSONDecodeError:
        pass

    # Try to find any JSON object in the content
    json_match = re.search(r"\{.*\}", content, re.DOTALL)
    if json_match:
        try:
            result = json.loads(json_match.group(0))
            if isinstance(result, dict):
                return result
        except json.JSONDecodeError:
            pass

    return None


def clean_model_quote(quote: str, trim_length: int) -> str:
    quote_clean = quote.strip()
    if quote_clean[0] == '"':
        quote_clean = quote_clean[1:]
    if quote_clean[-1] == '"':
        quote_clean = quote_clean[:-1]
    if trim_length > 0:
        quote_clean = quote_clean[:trim_length]
    return quote_clean


def shared_precompare_cleanup(text: str) -> str:
    """LLMs models sometime restructure whitespaces or edits special characters to fit a more likely
    distribution of characters found in its training data, but this hurts exact quote matching
    """
    text = text.lower()

    # \s: matches any whitespace character (spaces, tabs, newlines, etc.)
    # |: acts as an OR.
    # \*: matches the asterisk character.
    # \\": matches the \" sequence.
    # [.,:`"#-]: matches any character inside the square brackets.
    text = re.sub(r'\s|\*|\\"|[.,:`"#-]', "", text)

    return text


def clean_text(text: str) -> str:
    # Remove specific Unicode ranges that might cause issues
    cleaned = _INITIAL_FILTER.sub("", text)

    # Remove any control characters except for newline and tab
    cleaned = "".join(ch for ch in cleaned if ch >= " " or ch in "\n\t")

    return cleaned


def is_valid_email(text: str) -> bool:
    """Can use a library instead if more detailed checks are needed"""
    regex = r"^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"

    if re.match(regex, text):
        return True
    else:
        return False


def count_punctuation(text: str) -> int:
    return sum(1 for char in text if char in string.punctuation)


def remove_markdown_image_references(text: str) -> str:
    """Remove markdown-style image references like ![alt text](url)"""
    return re.sub(r"!\[[^\]]*\]\([^\)]+\)", "", text)


def remove_invalid_unicode_chars(text: str) -> str:
    """Remove Unicode characters that are invalid in UTF-8 or cause encoding issues.

    This handles:
    - Control characters (except tab, newline, carriage return)
    - Unpaired UTF-16 surrogates (e.g. \udc00) that cause 'surrogates not allowed' errors
    - Unicode non-characters
    """
    return _INVALID_UNICODE_CHARS_RE.sub("", text)


def normalize_char(c: str) -> str:
    """Normalize a single character (curly quotes, whitespace, punctuation)."""
    if c in CURLY_TO_STRAIGHT_QUOTES:
        c = CURLY_TO_STRAIGHT_QUOTES[c]
    if c.isspace():
        return " "
    elif re.match(r"[^\w\s\']", c):
        return " "
    else:
        return c.lower()


================================================
FILE: backend/onyx/utils/threadpool_concurrency.py
================================================
import asyncio
import collections.abc
import concurrent
import contextvars
import copy
import threading
import uuid
from collections.abc import Awaitable
from collections.abc import Callable
from collections.abc import Iterator
from collections.abc import MutableMapping
from collections.abc import Sequence
from concurrent.futures import as_completed
from concurrent.futures import FIRST_COMPLETED
from concurrent.futures import Future
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import wait
from typing import Any
from typing import cast
from typing import Generic
from typing import overload
from typing import Protocol
from typing import TypeVar

from pydantic import GetCoreSchemaHandler
from pydantic.types import T
from pydantic_core import core_schema

from onyx.utils.logger import setup_logger

logger = setup_logger()

R = TypeVar("R")
KT = TypeVar("KT")  # Key type
VT = TypeVar("VT")  # Value type
_T = TypeVar("_T")  # Default type


class ThreadSafeDict(MutableMapping[KT, VT]):
    """
    A thread-safe dictionary implementation that uses a lock to ensure thread safety.
    Implements the MutableMapping interface to provide a complete dictionary-like interface.

    Example usage:
        # Create a thread-safe dictionary
        safe_dict: ThreadSafeDict[str, int] = ThreadSafeDict()

        # Basic operations (atomic)
        safe_dict["key"] = 1
        value = safe_dict["key"]
        del safe_dict["key"]

        # Bulk operations (atomic)
        safe_dict.update({"key1": 1, "key2": 2})
    """

    def __init__(self, input_dict: dict[KT, VT] | None = None) -> None:
        self._dict: dict[KT, VT] = input_dict or {}
        self.lock = threading.Lock()

    def __getitem__(self, key: KT) -> VT:
        with self.lock:
            return self._dict[key]

    def __setitem__(self, key: KT, value: VT) -> None:
        with self.lock:
            self._dict[key] = value

    def __delitem__(self, key: KT) -> None:
        with self.lock:
            del self._dict[key]

    def __iter__(self) -> Iterator[KT]:
        # Return a snapshot of keys to avoid potential modification during iteration
        with self.lock:
            return iter(list(self._dict.keys()))

    def __len__(self) -> int:
        with self.lock:
            return len(self._dict)

    @classmethod
    def __get_pydantic_core_schema__(
        cls, source_type: Any, handler: GetCoreSchemaHandler
    ) -> core_schema.CoreSchema:
        return core_schema.no_info_after_validator_function(
            cls.validate, handler(dict[KT, VT])
        )

    @classmethod
    def validate(cls, v: Any) -> "ThreadSafeDict[KT, VT]":
        if isinstance(v, dict):
            return ThreadSafeDict(v)
        return v

    def __deepcopy__(self, memo: Any) -> "ThreadSafeDict[KT, VT]":
        return ThreadSafeDict(copy.deepcopy(self._dict))

    def clear(self) -> None:
        """Remove all items from the dictionary atomically."""
        with self.lock:
            self._dict.clear()

    def copy(self) -> dict[KT, VT]:
        """Return a shallow copy of the dictionary atomically."""
        with self.lock:
            return self._dict.copy()

    @overload
    def get(self, key: KT) -> VT | None: ...

    @overload
    def get(self, key: KT, default: VT | _T) -> VT | _T: ...

    def get(self, key: KT, default: Any = None) -> Any:
        """Get a value with a default, atomically."""
        with self.lock:
            return self._dict.get(key, default)

    def pop(self, key: KT, default: Any = None) -> Any:
        """Remove and return a value with optional default, atomically."""
        with self.lock:
            if default is None:
                return self._dict.pop(key)
            return self._dict.pop(key, default)

    def setdefault(self, key: KT, default: VT) -> VT:
        """Set a default value if key is missing, atomically."""
        with self.lock:
            return self._dict.setdefault(key, default)

    def update(self, *args: Any, **kwargs: VT) -> None:
        """Update the dictionary atomically from another mapping or from kwargs."""
        with self.lock:
            self._dict.update(*args, **kwargs)

    def items(self) -> collections.abc.ItemsView[KT, VT]:
        """Return a view of (key, value) pairs atomically."""
        with self.lock:
            return collections.abc.ItemsView(self)

    def keys(self) -> collections.abc.KeysView[KT]:
        """Return a view of keys atomically."""
        with self.lock:
            return collections.abc.KeysView(self)

    def values(self) -> collections.abc.ValuesView[VT]:
        """Return a view of values atomically."""
        with self.lock:
            return collections.abc.ValuesView(self)

    @overload
    def atomic_get_set(
        self, key: KT, value_callback: Callable[[VT], VT], default: VT
    ) -> tuple[VT, VT]: ...

    @overload
    def atomic_get_set(
        self, key: KT, value_callback: Callable[[VT | _T], VT], default: VT | _T
    ) -> tuple[VT | _T, VT]: ...

    def atomic_get_set(
        self, key: KT, value_callback: Callable[[Any], VT], default: Any = None
    ) -> tuple[Any, VT]:
        """Replace a value from the dict with a function applied to the previous value, atomically.

        Returns:
            A tuple of the previous value and the new value.
        """
        with self.lock:
            val = self._dict.get(key, default)
            new_val = value_callback(val)
            self._dict[key] = new_val
            return val, new_val


ST = TypeVar("ST")  # Set element type


class ThreadSafeSet(Generic[ST]):
    """
    A thread-safe set implementation that uses a lock to ensure thread safety.

    Example usage:
        # Create a thread-safe set
        safe_set: ThreadSafeSet[str] = ThreadSafeSet()

        # Basic operations (atomic)
        safe_set.add("item")
        if "item" in safe_set:
            ...
        safe_set.discard("item")

        # Bulk operations (atomic)
        safe_set.update({"item1", "item2"})

        # Atomic check-and-add (returns True if item was already present)
        was_present = safe_set.check_and_add("item")
    """

    def __init__(self, input_set: set[ST] | None = None) -> None:
        self._set: set[ST] = input_set.copy() if input_set else set()
        self.lock = threading.Lock()

    def __contains__(self, item: ST) -> bool:
        with self.lock:
            return item in self._set

    def __len__(self) -> int:
        with self.lock:
            return len(self._set)

    def __iter__(self) -> Iterator[ST]:
        # Return a snapshot to avoid modification during iteration
        with self.lock:
            return iter(list(self._set))

    @classmethod
    def __get_pydantic_core_schema__(
        cls, source_type: Any, handler: GetCoreSchemaHandler
    ) -> core_schema.CoreSchema:
        return core_schema.no_info_after_validator_function(
            cls.validate, handler(set[ST])
        )

    @classmethod
    def validate(cls, v: Any) -> "ThreadSafeSet[ST]":
        if isinstance(v, set):
            return ThreadSafeSet(v)
        return v

    def __deepcopy__(self, memo: Any) -> "ThreadSafeSet[ST]":
        with self.lock:
            return ThreadSafeSet(copy.deepcopy(self._set))

    def add(self, item: ST) -> None:
        """Add an item to the set atomically."""
        with self.lock:
            self._set.add(item)

    def discard(self, item: ST) -> None:
        """Remove an item if present, atomically."""
        with self.lock:
            self._set.discard(item)

    def remove(self, item: ST) -> None:
        """Remove an item, raise KeyError if not present, atomically."""
        with self.lock:
            self._set.remove(item)

    def clear(self) -> None:
        """Remove all items from the set atomically."""
        with self.lock:
            self._set.clear()

    def copy(self) -> set[ST]:
        """Return a shallow copy of the set atomically."""
        with self.lock:
            return self._set.copy()

    def update(self, *others: set[ST]) -> None:
        """Update the set with items from other sets atomically."""
        with self.lock:
            for other in others:
                self._set.update(other)

    def check_and_add(self, item: ST) -> bool:
        """
        Atomically check if item exists and add it if not.
        Returns True if the item was already present, False if it was added.
        This prevents race conditions in check-then-add patterns.
        """
        with self.lock:
            if item in self._set:
                return True
            self._set.add(item)
            return False


class CallableProtocol(Protocol):
    def __call__(self, *args: Any, **kwargs: Any) -> Any: ...


def run_functions_tuples_in_parallel(
    functions_with_args: Sequence[tuple[CallableProtocol, tuple[Any, ...]]],
    allow_failures: bool = False,
    max_workers: int | None = None,
    timeout: float | None = None,
    timeout_callback: (
        Callable[[int, CallableProtocol, tuple[Any, ...]], Any] | None
    ) = None,
) -> list[Any]:
    """
    Executes multiple functions in parallel and returns a list of the results for each function.
    This function preserves contextvars across threads, which is important for maintaining
    context like tenant IDs in database sessions.

    Args:
        functions_with_args: List of tuples each containing the function callable and a tuple of arguments.
        allow_failures: if set to True, then the function result will just be None
        max_workers: Max number of worker threads
        timeout: Optional wall-clock timeout in seconds. If any function hasn't completed
            within this time, it will be considered timed out. When timeout is set, threads
            that exceed the timeout will continue running in the background but their results
            will not be awaited. IMPORTANT: because the thread continues to run in the background,
            it can continue to consume resources and updated shared state objects even though the caller
            has moved on.
        timeout_callback: Optional callback for handling timeouts. Called with (index, func, args)
            for each timed-out function. If provided, its return value is used as the result.
            If not provided and allow_failures is False, TimeoutError is raised.
            If not provided and allow_failures is True, None is returned for timed-out functions.

    Returns:
        list: A list of results from each function, in the same order as the input functions.
    """
    workers = (
        min(max_workers, len(functions_with_args))
        if max_workers is not None
        else len(functions_with_args)
    )

    if workers <= 0:
        return []

    results: list[tuple[int, Any]] = []
    executor = ThreadPoolExecutor(max_workers=workers)

    try:
        # The primary reason for propagating contextvars is to allow acquiring a db session
        # that respects tenant id. Context.run is expected to be low-overhead, but if we later
        # find that it is increasing latency we can make using it optional.
        future_to_index = {
            executor.submit(contextvars.copy_context().run, func, *args): i
            for i, (func, args) in enumerate(functions_with_args)
        }

        if timeout is not None:
            # Wait for completion or timeout
            done, not_done = wait(future_to_index.keys(), timeout=timeout)

            # Process completed futures
            for future in done:
                index = future_to_index[future]
                try:
                    results.append((index, future.result()))
                except Exception as e:
                    logger.exception(f"Function at index {index} failed due to {e}")
                    results.append((index, None))
                    if not allow_failures:
                        raise

            # Process timed-out futures
            for future in not_done:
                index = future_to_index[future]
                func, args = functions_with_args[index]
                logger.warning(
                    f"Function at index {index} timed out after {timeout} seconds"
                )

                if timeout_callback:
                    timeout_result = timeout_callback(index, func, args)
                    results.append((index, timeout_result))
                else:
                    results.append((index, None))
                    if not allow_failures:
                        raise TimeoutError(
                            f"Function at index {index} timed out after {timeout} seconds"
                        )

                # Attempt to cancel (only effective if not yet started)
                future.cancel()
        else:
            for future in as_completed(future_to_index):
                index = future_to_index[future]
                try:
                    results.append((index, future.result()))
                except Exception as e:
                    logger.exception(f"Function at index {index} failed due to {e}")
                    results.append((index, None))

                    if not allow_failures:
                        raise
    finally:
        # When timeout is used, don't wait for timed-out threads to complete
        # (they will continue running in the background)
        # When no timeout, wait for all threads to complete (original behavior)
        executor.shutdown(wait=(timeout is None))

    results.sort(key=lambda x: x[0])
    return [result for index, result in results]


class FunctionCall(Generic[R]):
    """
    Container for run_functions_in_parallel, fetch the results from the output of
    run_functions_in_parallel via the FunctionCall.result_id.
    """

    def __init__(
        self, func: Callable[..., R], args: tuple = (), kwargs: dict | None = None
    ):
        self.func = func
        self.args = args
        self.kwargs = kwargs if kwargs is not None else {}
        self.result_id = str(uuid.uuid4())

    def execute(self) -> R:
        return self.func(*self.args, **self.kwargs)


def run_functions_in_parallel(
    function_calls: list[FunctionCall],
    allow_failures: bool = False,
) -> dict[str, Any]:
    """
    Executes a list of FunctionCalls in parallel and stores the results in a dictionary where the keys
    are the result_id of the FunctionCall and the values are the results of the call.
    """
    results: dict[str, Any] = {}

    if len(function_calls) == 0:
        return results

    with ThreadPoolExecutor(max_workers=len(function_calls)) as executor:
        future_to_id = {
            executor.submit(
                contextvars.copy_context().run, func_call.execute
            ): func_call.result_id
            for func_call in function_calls
        }

        for future in as_completed(future_to_id):
            result_id = future_to_id[future]
            try:
                results[result_id] = future.result()
            except Exception as e:
                logger.exception(f"Function with ID {result_id} failed due to {e}")
                results[result_id] = None

                if not allow_failures:
                    raise

    return results


def run_async_sync_no_cancel(coro: Awaitable[T]) -> T:
    """
    async-to-sync converter. Basically just executes asyncio.run in a separate thread.
    Which is probably somehow inefficient or not ideal but fine for now.
    """
    context = contextvars.copy_context()
    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        future: concurrent.futures.Future[T] = executor.submit(
            context.run,  # type: ignore[arg-type]
            asyncio.run,
            coro,
        )
        return future.result()


def run_multiple_in_background(
    funcs: list[Callable[[], None]],
    thread_name_prefix: str = "worker",
) -> ThreadPoolExecutor:
    """Submit multiple callables to a ``ThreadPoolExecutor`` with context propagation.

    Copies the current ``contextvars`` context once and runs every callable
    inside that copy, which is important for preserving tenant IDs and other
    context-local state across threads.

    Returns the executor so the caller can ``shutdown()`` when done.
    """
    ctx = contextvars.copy_context()
    executor = ThreadPoolExecutor(
        max_workers=len(funcs), thread_name_prefix=thread_name_prefix
    )
    for func in funcs:
        executor.submit(ctx.run, func)
    return executor


class TimeoutThread(threading.Thread, Generic[R]):
    def __init__(
        self, timeout: float, func: Callable[..., R], *args: Any, **kwargs: Any
    ):
        super().__init__()
        self.timeout = timeout
        self.func = func
        self.args = args
        self.kwargs = kwargs
        self.exception: Exception | None = None

    def run(self) -> None:
        try:
            self.result = self.func(*self.args, **self.kwargs)
        except Exception as e:
            self.exception = e

    def end(self) -> None:
        raise TimeoutError(
            f"Function {self.func.__name__} timed out after {self.timeout} seconds"
        )


def run_with_timeout(
    timeout: float, func: Callable[..., R], *args: Any, **kwargs: Any
) -> R:
    """
    Executes a function with a timeout. If the function doesn't complete within the specified
    timeout, raises TimeoutError.
    """
    context = contextvars.copy_context()
    task = TimeoutThread(timeout, context.run, func, *args, **kwargs)
    task.start()
    task.join(timeout)

    if task.exception is not None:
        raise task.exception
    if task.is_alive():
        task.end()

    return task.result


# NOTE: this function should really only be used when run_functions_tuples_in_parallel is
# difficult to use. It's up to the programmer to call wait_on_background on the thread after
# the code you want to run in parallel is finished. As with all python thread parallelism,
# this is only useful for I/O bound tasks.
def run_in_background(
    func: Callable[..., R], *args: Any, **kwargs: Any
) -> TimeoutThread[R]:
    """
    Runs a function in a background thread. Returns a TimeoutThread object that can be used
    to wait for the function to finish with wait_on_background.
    """
    context = contextvars.copy_context()
    # Timeout not used in the non-blocking case
    task = TimeoutThread(-1, context.run, func, *args, **kwargs)
    task.start()
    return cast(TimeoutThread[R], task)


def wait_on_background(task: TimeoutThread[R]) -> R:
    """
    Used in conjunction with run_in_background. blocks until the task is finished,
    then returns the result of the task.
    """
    task.join()

    if task.exception is not None:
        raise task.exception

    return task.result


def _next_or_none(ind: int, gen: Iterator[R]) -> tuple[int, R | None]:
    return ind, next(gen, None)


def parallel_yield(gens: list[Iterator[R]], max_workers: int = 10) -> Iterator[R]:
    """
    Runs the list of generators with thread-level parallelism, yielding
    results as available. The asynchronous nature of this yielding means
    that stopping the returned iterator early DOES NOT GUARANTEE THAT NO
    FURTHER ITEMS WERE PRODUCED by the input gens. Only use this function
    if you are consuming all elements from the generators OR it is acceptable
    for some extra generator code to run and not have the result(s) yielded.
    """
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_index: dict[Future[tuple[int, R | None]], int] = {
            executor.submit(_next_or_none, ind, gen): ind
            for ind, gen in enumerate(gens)
        }

        next_ind = len(gens)
        while future_to_index:
            done, _ = wait(future_to_index, return_when=FIRST_COMPLETED)
            for future in done:
                ind, result = future.result()
                if result is not None:
                    yield result
                    future_to_index[executor.submit(_next_or_none, ind, gens[ind])] = (
                        next_ind
                    )
                    next_ind += 1
                del future_to_index[future]


def parallel_yield_from_funcs(
    funcs: list[Callable[..., R]],
    max_workers: int = 10,
) -> Iterator[R]:
    """
    Runs the list of functions with thread-level parallelism, yielding
    results as available. The asynchronous nature of this yielding means
    that stopping the returned iterator early DOES NOT GUARANTEE THAT NO
    FURTHER ITEMS WERE PRODUCED by the input funcs. Only use this function
    if you are consuming all elements from the functions OR it is acceptable
    for some extra function code to run and not have the result(s) yielded.
    """

    def func_wrapper(func: Callable[[], R]) -> Iterator[R]:
        yield func()

    yield from parallel_yield(
        [func_wrapper(func) for func in funcs], max_workers=max_workers
    )


================================================
FILE: backend/onyx/utils/timing.py
================================================
import time
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterator
from functools import wraps
from inspect import signature
from typing import Any
from typing import cast
from typing import TypeVar

from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType

logger = setup_logger()

F = TypeVar("F", bound=Callable)
FG = TypeVar("FG", bound=Callable[..., Generator | Iterator])


def log_function_time(
    func_name: str | None = None,
    print_only: bool = False,
    debug_only: bool = False,
    include_args: bool = False,
    include_args_subset: dict[str, Callable[[Any], Any]] | None = None,
) -> Callable[[F], F]:
    """Decorates a function to log the time it takes to execute.

    Args:
        func_name: The name of the function to log. If None uses func.__name__.
            Defaults to None.
        print_only: If False, also sends the log to telemetry. Defaults to
            False.
        debug_only: If True, logs at the debug level. If False, logs at the
            notice level. Defaults to False.
        include_args: Whether to include the full args and kwargs in the log.
            Clobbers include_args_subset if True. Defaults to False.
        include_args_subset: An optional dict mapping arg names to callables to
            apply the arg value before logging. Only args supplied in the dict
            will be logged. Clobbered by include_args if True. Defaults to None.

    Returns:
        The decorated function.
    """

    def decorator(func: F) -> F:
        @wraps(func)
        def wrapped_func(*args: Any, **kwargs: Any) -> Any:
            # Elapsed time should use monotonic.
            start_time = time.monotonic()
            result = func(*args, **kwargs)
            elapsed_time = time.monotonic() - start_time
            elapsed_time_str = f"{elapsed_time:.3f}"
            log_name = func_name or func.__name__
            args_str = ""
            if include_args:
                args_str = f" args={args} kwargs={kwargs}"
            elif include_args_subset:
                sig = signature(func)
                bind = sig.bind(*args, **kwargs)
                bind.apply_defaults()
                for arg in include_args_subset:
                    if arg in bind.arguments:
                        arg_val = include_args_subset[arg](bind.arguments[arg])
                        args_str += f" {arg}={arg_val}"
            final_log = f"{log_name}{args_str} took {elapsed_time_str} seconds."
            if debug_only:
                logger.debug(final_log)
            else:
                # These are generally more important logs so the level is a bit
                # higher.
                logger.notice(final_log)

            if not print_only:
                user = kwargs.get("user")
                optional_telemetry(
                    record_type=RecordType.LATENCY,
                    data={"function": log_name, "latency": str(elapsed_time_str)},
                    user_id=str(user.id) if user else "Unknown",
                )

            return result

        return cast(F, wrapped_func)

    return decorator


def log_generator_function_time(
    func_name: str | None = None, print_only: bool = False
) -> Callable[[FG], FG]:
    def decorator(func: FG) -> FG:
        @wraps(func)
        def wrapped_func(*args: Any, **kwargs: Any) -> Any:
            start_time = time.monotonic()
            user = kwargs.get("user")
            gen = func(*args, **kwargs)
            try:
                value = next(gen)
                while True:
                    yield value
                    value = next(gen)
            except StopIteration:
                pass
            finally:
                elapsed_time_str = f"{time.monotonic() - start_time:.3f}"
                log_name = func_name or func.__name__
                logger.info(f"{log_name} took {elapsed_time_str} seconds")
                if not print_only:
                    optional_telemetry(
                        record_type=RecordType.LATENCY,
                        data={"function": log_name, "latency": str(elapsed_time_str)},
                        user_id=str(user.id) if user else "Unknown",
                    )

        return cast(FG, wrapped_func)

    return decorator


================================================
FILE: backend/onyx/utils/url.py
================================================
import ipaddress
import socket
from typing import Any
from urllib.parse import parse_qs
from urllib.parse import urlencode
from urllib.parse import urlparse
from urllib.parse import urlunparse

import requests

from onyx.utils.logger import setup_logger

logger = setup_logger()

# Hostnames that should always be blocked
BLOCKED_HOSTNAMES = {
    # Localhost variations
    "localhost",
    # Cloud metadata endpoints (defense-in-depth, IPs also blocked via _is_ip_private_or_reserved)
    "169.254.169.254",  # AWS/Azure/GCP metadata IP
    "fd00:ec2::254",  # AWS IPv6 metadata
    "metadata.azure.com",
    "metadata.google.internal",
    "metadata.gke.internal",
    # Kubernetes internal
    "kubernetes.default",
    "kubernetes.default.svc",
    "kubernetes.default.svc.cluster.local",
}


class SSRFException(Exception):
    """Exception raised when an SSRF attempt is detected."""


def _is_ip_private_or_reserved(ip_str: str) -> bool:
    """
    Check if an IP address is private, reserved, or otherwise not suitable
    for external requests.

    Uses Python's ipaddress module which handles:
    - Private addresses (10.x.x.x, 172.16-31.x.x, 192.168.x.x)
    - Loopback addresses (127.x.x.x, ::1)
    - Link-local addresses (169.254.x.x including cloud metadata IPs, fe80::/10)
    - Reserved addresses
    - Multicast addresses
    - Unspecified addresses (0.0.0.0, ::)
    """
    try:
        ip = ipaddress.ip_address(ip_str)
        # is_global returns True only for globally routable unicast addresses
        # This excludes private, loopback, link-local, reserved, and unspecified
        # We also need to explicitly check multicast as it's not covered by is_global
        return not ip.is_global or ip.is_multicast
    except ValueError:
        # If we can't parse the IP, consider it unsafe
        return True


def _validate_and_resolve_url(url: str) -> tuple[str, str, int]:
    """
    Validate a URL for SSRF and resolve it to a safe IP address.

    Returns:
        Tuple of (validated_ip, original_hostname, port)

    Raises:
        SSRFException: If the URL could be used for SSRF attack
        ValueError: If the URL is malformed
    """
    if not url:
        raise ValueError("URL cannot be empty")

    # Parse the URL
    try:
        parsed = urlparse(url)
    except Exception as e:
        raise ValueError(f"Invalid URL format: {e}")

    # Validate scheme
    if parsed.scheme not in ("http", "https"):
        raise SSRFException(
            f"Invalid URL scheme '{parsed.scheme}'. Only http and https are allowed."
        )

    # Get hostname
    hostname = parsed.hostname
    if not hostname:
        raise ValueError("URL must contain a hostname")

    # Check for blocked hostnames
    hostname_lower = hostname.lower()
    if hostname_lower in BLOCKED_HOSTNAMES:
        raise SSRFException(f"Access to hostname '{hostname}' is not allowed.")

    # Check for common SSRF bypass attempts
    # Block URLs with credentials (user:pass@host)
    if parsed.username or parsed.password:
        raise SSRFException("URLs with embedded credentials are not allowed.")

    port = parsed.port or (443 if parsed.scheme == "https" else 80)

    # Check if the hostname is already an IP address
    try:
        ip = ipaddress.ip_address(hostname)
        if _is_ip_private_or_reserved(str(ip)):
            raise SSRFException(
                f"Access to internal/private IP address '{hostname}' is not allowed."
            )
        return str(ip), hostname, port
    except ValueError:
        # Not an IP address, proceed with DNS resolution
        pass

    # Resolve hostname to IP addresses
    try:
        addr_info = socket.getaddrinfo(hostname, port)
    except socket.gaierror as e:
        logger.warning(f"DNS resolution failed for hostname '{hostname}': {e}")
        raise SSRFException(f"Could not resolve hostname '{hostname}': {e}")

    if not addr_info:
        raise SSRFException(f"Could not resolve hostname '{hostname}'")

    # Find the first valid (non-private) IP address
    validated_ip = None
    for info in addr_info:
        ip_str = info[4][0]
        if _is_ip_private_or_reserved(str(ip_str)):
            raise SSRFException(
                f"Hostname '{hostname}' resolves to internal/private IP address "
                f"'{ip_str}'. Access to internal networks is not allowed."
            )
        if validated_ip is None:
            validated_ip = ip_str

    if validated_ip is None:
        raise SSRFException(f"Could not resolve hostname '{hostname}'")

    return validated_ip, hostname, port


def validate_outbound_http_url(
    url: str,
    *,
    allow_private_network: bool = False,
    https_only: bool = False,
) -> str:
    """
    Validate a URL that will be used by backend outbound HTTP calls.

    Args:
        url: The URL to validate.
        allow_private_network: If True, skip private/reserved IP checks.
        https_only: If True, reject http:// URLs (only https:// is allowed).

    Returns:
        A normalized URL string with surrounding whitespace removed.

    Raises:
        ValueError: If URL is malformed.
        SSRFException: If URL fails SSRF checks.
    """
    normalized_url = url.strip()
    if not normalized_url:
        raise ValueError("URL cannot be empty")

    parsed = urlparse(normalized_url)

    if https_only:
        if parsed.scheme != "https":
            raise SSRFException(
                f"Invalid URL scheme '{parsed.scheme}'. Only https is allowed."
            )
    elif parsed.scheme not in ("http", "https"):
        raise SSRFException(
            f"Invalid URL scheme '{parsed.scheme}'. Only http and https are allowed."
        )

    if not parsed.hostname:
        raise ValueError("URL must contain a hostname")

    if parsed.username or parsed.password:
        raise SSRFException("URLs with embedded credentials are not allowed.")

    hostname = parsed.hostname.lower()
    if hostname in BLOCKED_HOSTNAMES:
        raise SSRFException(f"Access to hostname '{parsed.hostname}' is not allowed.")

    if not allow_private_network:
        _validate_and_resolve_url(normalized_url)

    return normalized_url


MAX_REDIRECTS = 10


def _make_ssrf_safe_request(
    url: str,
    headers: dict[str, str] | None = None,
    timeout: float | tuple[float, float] = 15,
    **kwargs: Any,
) -> requests.Response:
    """
    Make a single GET request with SSRF protection (no redirect following).

    Returns the response which may be a redirect (3xx status).
    """
    # Validate and resolve the URL to get a safe IP
    validated_ip, original_hostname, port = _validate_and_resolve_url(url)

    # Parse the URL to rebuild it with the IP
    parsed = urlparse(url)

    # Build the new URL using the validated IP
    # For HTTPS, we need to use the original hostname for TLS verification
    if parsed.scheme == "https":
        # For HTTPS, make request to original URL but we've validated the IP
        # The TLS handshake needs the hostname for SNI
        # We rely on the short time window between validation and request
        # A more robust solution would require custom SSL context
        request_url = url
    else:
        # For HTTP, we can safely request directly to the IP
        netloc = f"{validated_ip}:{port}" if port not in (80, 443) else validated_ip
        request_url = urlunparse(
            (
                parsed.scheme,
                netloc,
                parsed.path,
                parsed.params,
                parsed.query,
                parsed.fragment,
            )
        )

    # Prepare headers
    request_headers = headers.copy() if headers else {}

    # Set Host header to original hostname (required for virtual hosting)
    if parsed.scheme == "http":
        request_headers["Host"] = (
            f"{original_hostname}:{port}" if port != 80 else original_hostname
        )

    # Disable automatic redirects to prevent SSRF bypass via redirect
    return requests.get(
        request_url,
        headers=request_headers,
        timeout=timeout,
        allow_redirects=False,
        **kwargs,
    )


def ssrf_safe_get(
    url: str,
    headers: dict[str, str] | None = None,
    timeout: float | tuple[float, float] = 15,
    follow_redirects: bool = True,
    **kwargs: Any,
) -> requests.Response:
    """
    Make a GET request with SSRF protection.

    This function resolves the hostname, validates the IP is not private/internal,
    and makes the request directly to the validated IP to prevent DNS rebinding attacks.
    Redirects are followed safely by validating each redirect URL.

    Args:
        url: The URL to fetch
        headers: Optional headers to include in the request
        timeout: Request timeout in seconds
        follow_redirects: Whether to follow redirects (each redirect URL is validated)
        **kwargs: Additional arguments passed to requests.get()

    Returns:
        requests.Response object

    Raises:
        SSRFException: If the URL could be used for SSRF attack
        ValueError: If the URL is malformed
        requests.RequestException: If the request fails
    """
    response = _make_ssrf_safe_request(url, headers, timeout, **kwargs)

    if not follow_redirects:
        return response

    # Manually follow redirects while validating each redirect URL
    redirect_count = 0
    current_url = url

    while response.is_redirect and redirect_count < MAX_REDIRECTS:
        redirect_count += 1

        # Get the redirect location
        redirect_url = response.headers.get("Location")
        if not redirect_url:
            break

        # Handle relative redirects
        if not redirect_url.startswith(("http://", "https://")):
            parsed_current = urlparse(current_url)
            if redirect_url.startswith("/"):
                redirect_url = (
                    f"{parsed_current.scheme}://{parsed_current.netloc}{redirect_url}"
                )
            else:
                # Relative path
                base_path = parsed_current.path.rsplit("/", 1)[0]
                redirect_url = f"{parsed_current.scheme}://{parsed_current.netloc}{base_path}/{redirect_url}"

        # Validate and follow the redirect (this will raise SSRFException if invalid)
        current_url = redirect_url
        response = _make_ssrf_safe_request(redirect_url, headers, timeout, **kwargs)

    if response.is_redirect and redirect_count >= MAX_REDIRECTS:
        raise SSRFException(f"Too many redirects (max {MAX_REDIRECTS})")

    return response


def normalize_url(url: str) -> str:
    """
    Normalize a URL by removing query parameters and fragments.
    This is used to create consistent cache keys for deduplication.

    Args:
        url: The original URL

    Returns:
        Normalized URL (scheme + netloc + path + params only)
    """
    parsed_url = urlparse(url)

    # Reconstruct the URL without query string and fragment
    normalized = urlunparse(
        (
            parsed_url.scheme,
            parsed_url.netloc,
            parsed_url.path,
            parsed_url.params,
            "",
            "",
        )
    )

    return normalized


def add_url_params(url: str, params: dict) -> str:
    """
    Add parameters to a URL, handling existing parameters properly.

    Args:
        url: The original URL
        params: Dictionary of parameters to add

    Returns:
        URL with added parameters
    """
    # Parse the URL
    parsed_url = urlparse(url)

    # Get existing query parameters
    query_params = parse_qs(parsed_url.query)

    # Update with new parameters
    for key, value in params.items():
        query_params[key] = [value]

    # Build the new query string
    new_query = urlencode(query_params, doseq=True)

    # Reconstruct the URL with the new query string
    new_url = urlunparse(
        (
            parsed_url.scheme,
            parsed_url.netloc,
            parsed_url.path,
            parsed_url.params,
            new_query,
            parsed_url.fragment,
        )
    )

    return new_url


================================================
FILE: backend/onyx/utils/variable_functionality.py
================================================
import functools
import importlib
import inspect
import os
from typing import Any
from typing import TypeVar

from onyx.configs.app_configs import API_SERVER_HOST
from onyx.configs.app_configs import API_SERVER_PROTOCOL
from onyx.configs.app_configs import API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS
from onyx.configs.app_configs import APP_API_PREFIX
from onyx.configs.app_configs import APP_PORT
from onyx.configs.app_configs import DEV_MODE
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
from onyx.utils.logger import setup_logger

logger = setup_logger()


class OnyxVersion:
    def __init__(self) -> None:
        self._is_ee = False

    def set_ee(self) -> None:
        self._is_ee = True

    def unset_ee(self) -> None:
        self._is_ee = False

    def is_ee_version(self) -> bool:
        return self._is_ee


global_version = OnyxVersion()

# Read LICENSE_ENFORCEMENT_ENABLED directly since it's in EE configs
# This allows EE code to load when license enforcement is enabled,
# even without ENABLE_PAID_ENTERPRISE_EDITION_FEATURES being set.
# Eventually, ENABLE_PAID_ENTERPRISE_EDITION_FEATURES will be removed
# and license enforcement will be the only mechanism for EE features.
_LICENSE_ENFORCEMENT_ENABLED = (
    os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "true").lower() == "true"
)


def set_is_ee_based_on_env_variable() -> None:
    """Enable Enterprise Edition based on environment configuration.

    EE is enabled if either:
    - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true (legacy/rollout flag)
    - LICENSE_ENFORCEMENT_ENABLED=true (license-based gating)

    When LICENSE_ENFORCEMENT_ENABLED is true, EE code is loaded but access
    to EE-only features is controlled by the license enforcement middleware.
    """
    if global_version.is_ee_version():
        return

    if ENTERPRISE_EDITION_ENABLED:
        logger.notice(
            "Enterprise Edition enabled via ENABLE_PAID_ENTERPRISE_EDITION_FEATURES"
        )
        global_version.set_ee()
    elif _LICENSE_ENFORCEMENT_ENABLED:
        logger.notice("Enterprise Edition enabled via LICENSE_ENFORCEMENT_ENABLED")
        global_version.set_ee()


@functools.lru_cache(maxsize=128)
def fetch_versioned_implementation(module: str, attribute: str) -> Any:
    """
    Fetches a versioned implementation of a specified attribute from a given module.
    This function first checks if the application is running in an Enterprise Edition (EE)
    context. If so, it attempts to import the attribute from the EE-specific module.
    If the module or attribute is not found, it falls back to the default module or
    raises the appropriate exception depending on the context.

    Args:
        module (str): The name of the module from which to fetch the attribute.
        attribute (str): The name of the attribute to fetch from the module.

    Returns:
        Any: The fetched implementation of the attribute.

    Raises:
        ModuleNotFoundError: If the module cannot be found and the error is not related to
                             the Enterprise Edition fallback logic.

    Logs:
        Logs debug information about the fetching process and warnings if the versioned
        implementation cannot be found or loaded.
    """
    logger.debug("Fetching versioned implementation for %s.%s", module, attribute)
    is_ee = global_version.is_ee_version()

    module_full = f"ee.{module}" if is_ee else module
    try:
        return getattr(importlib.import_module(module_full), attribute)
    except ModuleNotFoundError as e:
        logger.warning(
            "Failed to fetch versioned implementation for %s.%s: %s",
            module_full,
            attribute,
            e,
        )

        if is_ee:
            if "ee.onyx" not in str(e):
                # If it's a non Onyx related import failure, this is likely because
                # a dependent library has not been installed. Should raise this failure
                # instead of letting the server start up
                raise e

            # Use the MIT version as a fallback, this allows us to develop MIT
            # versions independently and later add additional EE functionality
            # similar to feature flagging
            return getattr(importlib.import_module(module), attribute)

        raise


T = TypeVar("T")


def fetch_versioned_implementation_with_fallback(
    module: str, attribute: str, fallback: T
) -> T:
    """
    Attempts to fetch a versioned implementation of a specified attribute from a given module.
    If the attempt fails (e.g., due to an import error or missing attribute), the function logs
    a warning and returns the provided fallback implementation.

    Args:
        module (str): The name of the module from which to fetch the attribute.
        attribute (str): The name of the attribute to fetch from the module.
        fallback (T): The fallback implementation to return if fetching the attribute fails.

    Returns:
        T: The fetched implementation if successful, otherwise the provided fallback.
    """
    try:
        return fetch_versioned_implementation(module, attribute)
    except Exception:
        return fallback


def noop_fallback(*args: Any, **kwargs: Any) -> None:
    """
    A no-op (no operation) fallback function that accepts any arguments but does nothing.
    This is often used as a default or placeholder callback function.

    Args:
        *args (Any): Positional arguments, which are ignored.
        **kwargs (Any): Keyword arguments, which are ignored.

    Returns:
        None
    """


def fetch_ee_implementation_or_noop(
    module: str, attribute: str, noop_return_value: Any = None
) -> Any:
    """
    Fetches an EE implementation if EE is enabled, otherwise returns a no-op function.
    Raises an exception if EE is enabled but the fetch fails.

    Args:
        module (str): The name of the module from which to fetch the attribute.
        attribute (str): The name of the attribute to fetch from the module.

    Returns:
        Any: The fetched EE implementation if successful and EE is enabled, otherwise a no-op function.

    Raises:
        Exception: If EE is enabled but the fetch fails.
    """
    if not global_version.is_ee_version():
        if inspect.iscoroutinefunction(noop_return_value):

            async def async_noop(*args: Any, **kwargs: Any) -> Any:
                return await noop_return_value(*args, **kwargs)

            return async_noop

        else:

            def sync_noop(*args: Any, **kwargs: Any) -> Any:  # noqa: ARG001
                return noop_return_value

            return sync_noop
    try:
        return fetch_versioned_implementation(module, attribute)
    except Exception as e:
        logger.error(f"Failed to fetch implementation for {module}.{attribute}: {e}")
        raise


def build_api_server_url_for_http_requests(
    respect_env_override_if_set: bool = False,
) -> str:
    """
    Builds the API server URL for HTTP requests.
    """
    if DEV_MODE:
        url = f"http://127.0.0.1:{APP_PORT}"
    elif respect_env_override_if_set and API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS:
        url = API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS.rstrip("/")
    else:
        url = f"{API_SERVER_PROTOCOL}://{API_SERVER_HOST}:{APP_PORT}"

    if APP_API_PREFIX:
        url += f"/{APP_API_PREFIX.strip('/')}"

    return url


================================================
FILE: backend/onyx/utils/web_content.py
================================================
from __future__ import annotations

import io
from urllib.parse import unquote
from urllib.parse import urlparse

from bs4.dammit import UnicodeDammit

from onyx.file_processing.extract_file_text import read_pdf_file

PDF_MIME_TYPES = (
    "application/pdf",
    "application/x-pdf",
    "application/acrobat",
    "application/vnd.pdf",
    "text/pdf",
    "text/x-pdf",
)


def _charset_from_content_type(content_type: str | None) -> str | None:
    if not content_type:
        return None
    for part in content_type.split(";"):
        part = part.strip()
        if part.lower().startswith("charset="):
            charset = part.split("=", 1)[-1].strip().strip("\"'")
            return charset or None
    return None


def decode_html_bytes(
    content: bytes,
    content_type: str | None = None,
    fallback_encoding: str | None = None,
) -> str:
    override_encodings: list[str] = []
    charset = _charset_from_content_type(content_type)
    if charset:
        override_encodings.append(charset)
    if fallback_encoding and fallback_encoding not in override_encodings:
        override_encodings.append(fallback_encoding)

    unicode_dammit = UnicodeDammit(
        content, override_encodings=override_encodings or None
    )
    if unicode_dammit.unicode_markup is not None:
        return unicode_dammit.unicode_markup

    encoding = override_encodings[0] if override_encodings else "utf-8"
    return content.decode(encoding, errors="replace")


def is_pdf_mime_type(content_type: str | None) -> bool:
    if not content_type:
        return False
    lowered = content_type.lower()
    return any(pdf_type in lowered for pdf_type in PDF_MIME_TYPES)


def is_pdf_url(url: str) -> bool:
    if not url:
        return False
    parsed = urlparse(url)
    return parsed.path.lower().endswith(".pdf")


def has_pdf_signature(content_sniff: bytes | None) -> bool:
    if not content_sniff:
        return False
    return content_sniff.lstrip().startswith(b"%PDF-")


def is_pdf_resource(
    url: str,
    content_type: str | None = None,
    content_sniff: bytes | None = None,
) -> bool:
    return (
        is_pdf_mime_type(content_type)
        or is_pdf_url(url)
        or has_pdf_signature(content_sniff)
    )


def extract_pdf_text(content: bytes) -> tuple[str, dict[str, str | list[str]]]:
    text_content, metadata, _ = read_pdf_file(io.BytesIO(content))
    return text_content or "", normalize_metadata(metadata)


def title_from_pdf_metadata(metadata: dict[str, str | list[str]]) -> str:
    if not metadata:
        return ""
    for key in ("Title", "title"):
        value = metadata.get(key)
        if isinstance(value, str) and value.strip():
            return value.strip()
        if isinstance(value, list):
            items = [item.strip() for item in value if isinstance(item, str)]
            if items:
                return ", ".join(items)
    return ""


def normalize_metadata(metadata: dict[str, object]) -> dict[str, str | list[str]]:
    sanitized: dict[str, str | list[str]] = {}
    for key, value in metadata.items():
        if isinstance(value, str):
            if value.strip():
                sanitized[key] = value
            continue
        if isinstance(value, list):
            items = [item.strip() for item in value if isinstance(item, str)]
            if items:
                sanitized[key] = items
            continue
        if value is not None:
            sanitized[key] = str(value)
    return sanitized


def title_from_url(url: str) -> str:
    parsed = urlparse(url)
    filename = parsed.path.rsplit("/", 1)[-1]
    if not filename:
        return ""
    return unquote(filename)


================================================
FILE: backend/onyx/voice/__init__.py
================================================


================================================
FILE: backend/onyx/voice/factory.py
================================================
from onyx.db.models import VoiceProvider
from onyx.voice.interface import VoiceProviderInterface


def get_voice_provider(provider: VoiceProvider) -> VoiceProviderInterface:
    """
    Factory function to get the appropriate voice provider implementation.

    Args:
        provider: VoiceProvider model instance (can be from DB or constructed temporarily)

    Returns:
        VoiceProviderInterface implementation

    Raises:
        ValueError: If provider_type is not supported
    """
    provider_type = provider.provider_type.lower()

    # Handle both SensitiveValue (from DB) and plain string (from temp model)
    if provider.api_key is None:
        api_key = None
    elif hasattr(provider.api_key, "get_value"):
        # SensitiveValue from database
        api_key = provider.api_key.get_value(apply_mask=False)
    else:
        # Plain string from temporary model
        api_key = provider.api_key  # type: ignore[assignment]
    api_base = provider.api_base
    custom_config = provider.custom_config
    stt_model = provider.stt_model
    tts_model = provider.tts_model
    default_voice = provider.default_voice

    if provider_type == "openai":
        from onyx.voice.providers.openai import OpenAIVoiceProvider

        return OpenAIVoiceProvider(
            api_key=api_key,
            api_base=api_base,
            stt_model=stt_model,
            tts_model=tts_model,
            default_voice=default_voice,
        )

    elif provider_type == "azure":
        from onyx.voice.providers.azure import AzureVoiceProvider

        return AzureVoiceProvider(
            api_key=api_key,
            api_base=api_base,
            custom_config=custom_config or {},
            stt_model=stt_model,
            tts_model=tts_model,
            default_voice=default_voice,
        )

    elif provider_type == "elevenlabs":
        from onyx.voice.providers.elevenlabs import ElevenLabsVoiceProvider

        return ElevenLabsVoiceProvider(
            api_key=api_key,
            api_base=api_base,
            stt_model=stt_model,
            tts_model=tts_model,
            default_voice=default_voice,
        )

    else:
        raise ValueError(f"Unsupported voice provider type: {provider_type}")


================================================
FILE: backend/onyx/voice/interface.py
================================================
from abc import ABC
from abc import abstractmethod
from collections.abc import AsyncIterator
from typing import Protocol

from pydantic import BaseModel


class TranscriptResult(BaseModel):
    """Result from streaming transcription."""

    text: str
    """The accumulated transcript text."""

    is_vad_end: bool = False
    """True if VAD detected end of speech (silence). Use for auto-send."""


class StreamingTranscriberProtocol(Protocol):
    """Protocol for streaming transcription sessions."""

    async def send_audio(self, chunk: bytes) -> None:
        """Send an audio chunk for transcription."""
        ...

    async def receive_transcript(self) -> TranscriptResult | None:
        """
        Receive next transcript update.

        Returns:
            TranscriptResult with accumulated text and VAD status, or None when stream ends.
        """
        ...

    async def close(self) -> str:
        """Close the session and return final transcript."""
        ...

    def reset_transcript(self) -> None:
        """Reset accumulated transcript. Call after auto-send to start fresh."""
        ...


class StreamingSynthesizerProtocol(Protocol):
    """Protocol for streaming TTS sessions (real-time text-to-speech)."""

    async def connect(self) -> None:
        """Establish connection to TTS provider."""
        ...

    async def send_text(self, text: str) -> None:
        """Send text to be synthesized."""
        ...

    async def receive_audio(self) -> bytes | None:
        """
        Receive next audio chunk.

        Returns:
            Audio bytes, or None when stream ends.
        """
        ...

    async def flush(self) -> None:
        """Signal end of text input and wait for pending audio."""
        ...

    async def close(self) -> None:
        """Close the session."""
        ...


class VoiceProviderInterface(ABC):
    """Abstract base class for voice providers (STT and TTS)."""

    @abstractmethod
    async def transcribe(self, audio_data: bytes, audio_format: str) -> str:
        """
        Convert audio to text (Speech-to-Text).

        Args:
            audio_data: Raw audio bytes
            audio_format: Audio format (e.g., "webm", "wav", "mp3")

        Returns:
            Transcribed text
        """

    @abstractmethod
    def synthesize_stream(
        self, text: str, voice: str | None = None, speed: float = 1.0
    ) -> AsyncIterator[bytes]:
        """
        Convert text to audio stream (Text-to-Speech).

        Streams audio chunks progressively for lower latency playback.

        Args:
            text: Text to convert to speech
            voice: Voice identifier (e.g., "alloy", "echo"), or None for default
            speed: Playback speed multiplier (0.25 to 4.0)

        Yields:
            Audio data chunks
        """

    @abstractmethod
    async def validate_credentials(self) -> None:
        """
        Validate that the provider credentials are correct by making a
        lightweight API call. Raises on failure.
        """

    @abstractmethod
    def get_available_voices(self) -> list[dict[str, str]]:
        """
        Get list of available voices for this provider.

        Returns:
            List of voice dictionaries with 'id' and 'name' keys
        """

    @abstractmethod
    def get_available_stt_models(self) -> list[dict[str, str]]:
        """
        Get list of available STT models for this provider.

        Returns:
            List of model dictionaries with 'id' and 'name' keys
        """

    @abstractmethod
    def get_available_tts_models(self) -> list[dict[str, str]]:
        """
        Get list of available TTS models for this provider.

        Returns:
            List of model dictionaries with 'id' and 'name' keys
        """

    def supports_streaming_stt(self) -> bool:
        """Returns True if this provider supports streaming STT."""
        return False

    def supports_streaming_tts(self) -> bool:
        """Returns True if this provider supports real-time streaming TTS."""
        return False

    async def create_streaming_transcriber(
        self, audio_format: str = "webm"
    ) -> StreamingTranscriberProtocol:
        """
        Create a streaming transcription session.

        Args:
            audio_format: Audio format being sent (e.g., "webm", "pcm16")

        Returns:
            A streaming transcriber that can send audio chunks and receive transcripts

        Raises:
            NotImplementedError: If streaming STT is not supported
        """
        raise NotImplementedError("Streaming STT not supported by this provider")

    async def create_streaming_synthesizer(
        self, voice: str | None = None, speed: float = 1.0
    ) -> "StreamingSynthesizerProtocol":
        """
        Create a streaming TTS session for real-time audio synthesis.

        Args:
            voice: Voice identifier
            speed: Playback speed multiplier

        Returns:
            A streaming synthesizer that can send text and receive audio chunks

        Raises:
            NotImplementedError: If streaming TTS is not supported
        """
        raise NotImplementedError("Streaming TTS not supported by this provider")


================================================
FILE: backend/onyx/voice/providers/__init__.py
================================================


================================================
FILE: backend/onyx/voice/providers/azure.py
================================================
"""Azure Speech Services voice provider for STT and TTS.

Azure supports:
- **STT**: Batch transcription via REST API (audio/wav POST) and real-time
  streaming via the Azure Speech SDK (push audio stream with continuous
  recognition). The SDK handles VAD natively through its recognizing/recognized
  events.
- **TTS**: SSML-based synthesis via REST API (streaming response) and real-time
  synthesis via the Speech SDK. Text is escaped with ``xml.sax.saxutils.escape``
  and attributes with ``quoteattr`` to prevent SSML injection.

Both modes support Azure cloud endpoints (region-based URLs) and self-hosted
Speech containers (custom endpoint URLs). The ``speech_region`` is validated to
contain only ``[a-z0-9-]`` to prevent URL injection.

The Azure Speech SDK (``azure-cognitiveservices-speech``) is an optional C
extension dependency — it is imported lazily inside streaming methods so the
provider can still be instantiated and used for REST-based operations without it.

See https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/
for API reference.
"""

import asyncio
import io
import re
import struct
import wave
from collections.abc import AsyncIterator
from typing import Any
from urllib.parse import urlparse
from xml.sax.saxutils import escape
from xml.sax.saxutils import quoteattr

import aiohttp

from onyx.utils.logger import setup_logger
from onyx.voice.interface import StreamingSynthesizerProtocol
from onyx.voice.interface import StreamingTranscriberProtocol
from onyx.voice.interface import TranscriptResult
from onyx.voice.interface import VoiceProviderInterface

# SSML namespace — W3C standard for Speech Synthesis Markup Language.
# This is a fixed W3C specification and will not change.
SSML_NAMESPACE = "http://www.w3.org/2001/10/synthesis"

# Common Azure Neural voices
AZURE_VOICES = [
    {"id": "en-US-JennyNeural", "name": "Jenny (en-US, Female)"},
    {"id": "en-US-GuyNeural", "name": "Guy (en-US, Male)"},
    {"id": "en-US-AriaNeural", "name": "Aria (en-US, Female)"},
    {"id": "en-US-DavisNeural", "name": "Davis (en-US, Male)"},
    {"id": "en-US-AmberNeural", "name": "Amber (en-US, Female)"},
    {"id": "en-US-AnaNeural", "name": "Ana (en-US, Female)"},
    {"id": "en-US-BrandonNeural", "name": "Brandon (en-US, Male)"},
    {"id": "en-US-ChristopherNeural", "name": "Christopher (en-US, Male)"},
    {"id": "en-US-CoraNeural", "name": "Cora (en-US, Female)"},
    {"id": "en-GB-SoniaNeural", "name": "Sonia (en-GB, Female)"},
    {"id": "en-GB-RyanNeural", "name": "Ryan (en-GB, Male)"},
]


class AzureStreamingTranscriber(StreamingTranscriberProtocol):
    """Streaming transcription using Azure Speech SDK."""

    def __init__(
        self,
        api_key: str,
        region: str | None = None,
        endpoint: str | None = None,
        input_sample_rate: int = 24000,
        target_sample_rate: int = 16000,
    ):
        self.api_key = api_key
        self.region = region
        self.endpoint = endpoint
        self.input_sample_rate = input_sample_rate
        self.target_sample_rate = target_sample_rate
        self._transcript_queue: asyncio.Queue[TranscriptResult | None] = asyncio.Queue()
        self._accumulated_transcript = ""
        self._recognizer: Any = None
        self._audio_stream: Any = None
        self._closed = False
        self._loop: asyncio.AbstractEventLoop | None = None

    async def connect(self) -> None:
        """Initialize Azure Speech recognizer with push stream."""
        try:
            import azure.cognitiveservices.speech as speechsdk  # type: ignore
        except ImportError as e:
            raise RuntimeError(
                "Azure Speech SDK is required for streaming STT. Install `azure-cognitiveservices-speech`."
            ) from e

        self._loop = asyncio.get_running_loop()

        # Use endpoint for self-hosted containers, region for Azure cloud
        if self.endpoint:
            speech_config = speechsdk.SpeechConfig(
                subscription=self.api_key,
                endpoint=self.endpoint,
            )
        else:
            speech_config = speechsdk.SpeechConfig(
                subscription=self.api_key,
                region=self.region,
            )

        audio_format = speechsdk.audio.AudioStreamFormat(
            samples_per_second=16000,
            bits_per_sample=16,
            channels=1,
        )
        self._audio_stream = speechsdk.audio.PushAudioInputStream(audio_format)
        audio_config = speechsdk.audio.AudioConfig(stream=self._audio_stream)

        self._recognizer = speechsdk.SpeechRecognizer(
            speech_config=speech_config,
            audio_config=audio_config,
        )

        transcriber = self

        def on_recognizing(evt: Any) -> None:
            if evt.result.text and transcriber._loop and not transcriber._closed:
                full_text = transcriber._accumulated_transcript
                if full_text:
                    full_text += " " + evt.result.text
                else:
                    full_text = evt.result.text
                transcriber._loop.call_soon_threadsafe(
                    transcriber._transcript_queue.put_nowait,
                    TranscriptResult(text=full_text, is_vad_end=False),
                )

        def on_recognized(evt: Any) -> None:
            if evt.result.text and transcriber._loop and not transcriber._closed:
                if transcriber._accumulated_transcript:
                    transcriber._accumulated_transcript += " " + evt.result.text
                else:
                    transcriber._accumulated_transcript = evt.result.text
                transcriber._loop.call_soon_threadsafe(
                    transcriber._transcript_queue.put_nowait,
                    TranscriptResult(
                        text=transcriber._accumulated_transcript, is_vad_end=True
                    ),
                )

        self._recognizer.recognizing.connect(on_recognizing)
        self._recognizer.recognized.connect(on_recognized)
        self._recognizer.start_continuous_recognition_async()

    async def send_audio(self, chunk: bytes) -> None:
        """Send audio chunk to Azure."""
        if self._audio_stream and not self._closed:
            self._audio_stream.write(self._resample_pcm16(chunk))

    def _resample_pcm16(self, data: bytes) -> bytes:
        """Resample PCM16 audio from input_sample_rate to target_sample_rate."""
        if self.input_sample_rate == self.target_sample_rate:
            return data

        num_samples = len(data) // 2
        if num_samples == 0:
            return b""

        samples = list(struct.unpack(f"<{num_samples}h", data))
        ratio = self.input_sample_rate / self.target_sample_rate
        new_length = int(num_samples / ratio)

        resampled: list[int] = []
        for i in range(new_length):
            src_idx = i * ratio
            idx_floor = int(src_idx)
            idx_ceil = min(idx_floor + 1, num_samples - 1)
            frac = src_idx - idx_floor
            sample = int(samples[idx_floor] * (1 - frac) + samples[idx_ceil] * frac)
            sample = max(-32768, min(32767, sample))
            resampled.append(sample)

        return struct.pack(f"<{len(resampled)}h", *resampled)

    async def receive_transcript(self) -> TranscriptResult | None:
        """Receive next transcript."""
        try:
            return await asyncio.wait_for(self._transcript_queue.get(), timeout=0.1)
        except asyncio.TimeoutError:
            return TranscriptResult(text="", is_vad_end=False)

    async def close(self) -> str:
        """Stop recognition and return final transcript."""
        self._closed = True
        if self._recognizer:
            self._recognizer.stop_continuous_recognition_async()
        if self._audio_stream:
            self._audio_stream.close()
        self._loop = None
        return self._accumulated_transcript

    def reset_transcript(self) -> None:
        """Reset accumulated transcript."""
        self._accumulated_transcript = ""


class AzureStreamingSynthesizer(StreamingSynthesizerProtocol):
    """Real-time streaming TTS using Azure Speech SDK."""

    def __init__(
        self,
        api_key: str,
        region: str | None = None,
        endpoint: str | None = None,
        voice: str = "en-US-JennyNeural",
        speed: float = 1.0,
    ):
        self._logger = setup_logger()
        self.api_key = api_key
        self.region = region
        self.endpoint = endpoint
        self.voice = voice
        self.speed = max(0.5, min(2.0, speed))
        self._audio_queue: asyncio.Queue[bytes | None] = asyncio.Queue()
        self._synthesizer: Any = None
        self._closed = False
        self._loop: asyncio.AbstractEventLoop | None = None

    async def connect(self) -> None:
        """Initialize Azure Speech synthesizer with push stream."""
        try:
            import azure.cognitiveservices.speech as speechsdk
        except ImportError as e:
            raise RuntimeError(
                "Azure Speech SDK is required for streaming TTS. Install `azure-cognitiveservices-speech`."
            ) from e

        self._logger.info("AzureStreamingSynthesizer: connecting")

        # Store the event loop for thread-safe queue operations
        self._loop = asyncio.get_running_loop()

        # Use endpoint for self-hosted containers, region for Azure cloud
        if self.endpoint:
            speech_config = speechsdk.SpeechConfig(
                subscription=self.api_key,
                endpoint=self.endpoint,
            )
        else:
            speech_config = speechsdk.SpeechConfig(
                subscription=self.api_key,
                region=self.region,
            )
        speech_config.speech_synthesis_voice_name = self.voice
        # Use MP3 format for streaming - compatible with MediaSource Extensions
        speech_config.set_speech_synthesis_output_format(
            speechsdk.SpeechSynthesisOutputFormat.Audio16Khz64KBitRateMonoMp3
        )

        # Create synthesizer with pull audio output stream
        self._synthesizer = speechsdk.SpeechSynthesizer(
            speech_config=speech_config,
            audio_config=None,  # We'll manually handle audio
        )

        # Connect to synthesis events
        self._synthesizer.synthesizing.connect(self._on_synthesizing)
        self._synthesizer.synthesis_completed.connect(self._on_completed)

        self._logger.info("AzureStreamingSynthesizer: connected")

    def _on_synthesizing(self, evt: Any) -> None:
        """Called when audio chunk is available (runs in Azure SDK thread)."""
        if evt.result.audio_data and self._loop and not self._closed:
            # Thread-safe way to put item in async queue
            self._loop.call_soon_threadsafe(
                self._audio_queue.put_nowait, evt.result.audio_data
            )

    def _on_completed(self, _evt: Any) -> None:
        """Called when synthesis is complete (runs in Azure SDK thread)."""
        if self._loop and not self._closed:
            self._loop.call_soon_threadsafe(self._audio_queue.put_nowait, None)

    async def send_text(self, text: str) -> None:
        """Send text to be synthesized using SSML for prosody control."""
        if self._synthesizer and not self._closed:
            # Build SSML with prosody for speed control
            rate = f"{int((self.speed - 1) * 100):+d}%"
            escaped_text = escape(text)
            ssml = f"""<speak version='1.0' xmlns='{SSML_NAMESPACE}' xml:lang='en-US'>
                <voice name={quoteattr(self.voice)}>
                    <prosody rate='{rate}'>{escaped_text}</prosody>
                </voice>
            </speak>"""
            # Use speak_ssml_async for SSML support (includes speed/prosody)
            self._synthesizer.speak_ssml_async(ssml)

    async def receive_audio(self) -> bytes | None:
        """Receive next audio chunk."""
        try:
            return await asyncio.wait_for(self._audio_queue.get(), timeout=0.1)
        except asyncio.TimeoutError:
            return b""  # No audio yet, but not done

    async def flush(self) -> None:
        """Signal end of text input - wait for pending audio."""
        # Azure SDK handles flushing automatically

    async def close(self) -> None:
        """Close the session."""
        self._closed = True
        if self._synthesizer:
            self._synthesizer.synthesis_completed.disconnect_all()
            self._synthesizer.synthesizing.disconnect_all()
        self._loop = None


class AzureVoiceProvider(VoiceProviderInterface):
    """Azure Speech Services voice provider."""

    def __init__(
        self,
        api_key: str | None,
        api_base: str | None,
        custom_config: dict[str, Any],
        stt_model: str | None = None,
        tts_model: str | None = None,
        default_voice: str | None = None,
    ):
        self.api_key = api_key
        self.api_base = api_base
        self.custom_config = custom_config
        raw_speech_region = (
            custom_config.get("speech_region")
            or self._extract_speech_region_from_uri(api_base)
            or ""
        )
        self.speech_region = self._validate_speech_region(raw_speech_region)
        self.stt_model = stt_model
        self.tts_model = tts_model
        self.default_voice = default_voice or "en-US-JennyNeural"

    @staticmethod
    def _is_azure_cloud_url(uri: str | None) -> bool:
        """Check if URI is an Azure cloud endpoint (vs custom/self-hosted)."""
        if not uri:
            return False
        try:
            hostname = (urlparse(uri).hostname or "").lower()
        except ValueError:
            return False
        return hostname.endswith(
            (
                ".speech.microsoft.com",
                ".api.cognitive.microsoft.com",
                ".cognitiveservices.azure.com",
            )
        )

    @staticmethod
    def _extract_speech_region_from_uri(uri: str | None) -> str | None:
        """Extract Azure speech region from endpoint URI.

        Note: Custom domains (*.cognitiveservices.azure.com) contain the resource
        name, not the region. For custom domains, the region must be specified
        explicitly via custom_config["speech_region"].
        """
        if not uri:
            return None
        # Accepted examples:
        # - https://eastus.tts.speech.microsoft.com/cognitiveservices/v1
        # - https://eastus.stt.speech.microsoft.com/speech/recognition/...
        # - https://westus.api.cognitive.microsoft.com/
        #
        # NOT supported (requires explicit speech_region config):
        # - https://<resource>.cognitiveservices.azure.com/ (resource name != region)
        try:
            hostname = (urlparse(uri).hostname or "").lower()
        except ValueError:
            return None

        stt_tts_match = re.match(
            r"^([a-z0-9-]+)\.(?:tts|stt)\.speech\.microsoft\.com$", hostname
        )
        if stt_tts_match:
            return stt_tts_match.group(1)

        api_match = re.match(
            r"^([a-z0-9-]+)\.api\.cognitive\.microsoft\.com$", hostname
        )
        if api_match:
            return api_match.group(1)

        return None

    @staticmethod
    def _validate_speech_region(speech_region: str) -> str:
        normalized_region = speech_region.strip().lower()
        if not normalized_region:
            return ""
        if not re.fullmatch(r"[a-z0-9-]+", normalized_region):
            raise ValueError(
                "Invalid Azure speech_region. Use lowercase letters, digits, and hyphens only."
            )
        return normalized_region

    def _get_stt_url(self) -> str:
        """Get the STT endpoint URL (auto-detects cloud vs self-hosted)."""
        if self.api_base and not self._is_azure_cloud_url(self.api_base):
            # Self-hosted container endpoint
            return f"{self.api_base.rstrip('/')}/speech/recognition/conversation/cognitiveservices/v1"
        # Azure cloud endpoint
        return f"https://{self.speech_region}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1"

    def _get_tts_url(self) -> str:
        """Get the TTS endpoint URL (auto-detects cloud vs self-hosted)."""
        if self.api_base and not self._is_azure_cloud_url(self.api_base):
            # Self-hosted container endpoint
            return f"{self.api_base.rstrip('/')}/cognitiveservices/v1"
        # Azure cloud endpoint
        return f"https://{self.speech_region}.tts.speech.microsoft.com/cognitiveservices/v1"

    def _is_self_hosted(self) -> bool:
        """Check if using self-hosted container vs Azure cloud."""
        return bool(self.api_base and not self._is_azure_cloud_url(self.api_base))

    @staticmethod
    def _pcm16_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
        """Wrap raw PCM16 mono bytes into a WAV container."""
        buffer = io.BytesIO()
        with wave.open(buffer, "wb") as wav_file:
            wav_file.setnchannels(1)
            wav_file.setsampwidth(2)
            wav_file.setframerate(sample_rate)
            wav_file.writeframes(pcm_data)
        return buffer.getvalue()

    async def transcribe(self, audio_data: bytes, audio_format: str) -> str:
        if not self.api_key:
            raise ValueError("Azure API key required for STT")
        if not self._is_self_hosted() and not self.speech_region:
            raise ValueError("Azure speech region required for STT (cloud mode)")

        normalized_format = audio_format.lower()
        payload = audio_data
        content_type = f"audio/{normalized_format}"

        # WebSocket chunked fallback sends raw PCM16 bytes.
        if normalized_format in {"pcm", "pcm16", "raw"}:
            payload = self._pcm16_to_wav(audio_data, sample_rate=24000)
            content_type = "audio/wav"
        elif normalized_format in {"wav", "wave"}:
            content_type = "audio/wav"
        elif normalized_format == "webm":
            content_type = "audio/webm; codecs=opus"

        url = self._get_stt_url()
        params = {"language": "en-US", "format": "detailed"}
        headers = {
            "Ocp-Apim-Subscription-Key": self.api_key,
            "Content-Type": content_type,
            "Accept": "application/json",
        }

        async with aiohttp.ClientSession() as session:
            async with session.post(
                url, params=params, headers=headers, data=payload
            ) as response:
                if response.status != 200:
                    error_text = await response.text()
                    raise RuntimeError(f"Azure STT failed: {error_text}")
                result = await response.json()

        if result.get("RecognitionStatus") != "Success":
            return ""
        nbest = result.get("NBest") or []
        if nbest and isinstance(nbest, list):
            display = nbest[0].get("Display")
            if isinstance(display, str):
                return display
        display_text = result.get("DisplayText", "")
        return display_text if isinstance(display_text, str) else ""

    async def synthesize_stream(
        self, text: str, voice: str | None = None, speed: float = 1.0
    ) -> AsyncIterator[bytes]:
        """
        Convert text to audio using Azure TTS with streaming.

        Args:
            text: Text to convert to speech
            voice: Voice name (defaults to provider's default voice)
            speed: Playback speed multiplier (0.5 to 2.0)

        Yields:
            Audio data chunks (mp3 format)
        """
        if not self.api_key:
            raise ValueError("Azure API key required for TTS")

        if not self._is_self_hosted() and not self.speech_region:
            raise ValueError("Azure speech region required for TTS (cloud mode)")

        voice_name = voice or self.default_voice

        # Clamp speed to valid range and convert to rate format
        speed = max(0.5, min(2.0, speed))
        rate = f"{int((speed - 1) * 100):+d}%"  # e.g., 1.0 -> "+0%", 1.5 -> "+50%"

        # Build SSML with escaped text and quoted attributes to prevent injection
        escaped_text = escape(text)
        ssml = f"""<speak version='1.0' xmlns='{SSML_NAMESPACE}' xml:lang='en-US'>
            <voice name={quoteattr(voice_name)}>
                <prosody rate='{rate}'>{escaped_text}</prosody>
            </voice>
        </speak>"""

        url = self._get_tts_url()

        headers = {
            "Ocp-Apim-Subscription-Key": self.api_key,
            "Content-Type": "application/ssml+xml",
            "X-Microsoft-OutputFormat": "audio-16khz-128kbitrate-mono-mp3",
            "User-Agent": "Onyx",
        }

        async with aiohttp.ClientSession() as session:
            async with session.post(url, headers=headers, data=ssml) as response:
                if response.status != 200:
                    error_text = await response.text()
                    raise RuntimeError(f"Azure TTS failed: {error_text}")

                # Use 8192 byte chunks for smoother streaming
                async for chunk in response.content.iter_chunked(8192):
                    if chunk:
                        yield chunk

    async def validate_credentials(self) -> None:
        """Validate Azure credentials by listing available voices."""
        if not self.api_key:
            raise ValueError("Azure API key required")
        if not self._is_self_hosted() and not self.speech_region:
            raise ValueError("Azure speech region required (cloud mode)")

        url = f"https://{self.speech_region}.tts.speech.microsoft.com/cognitiveservices/voices/list"
        if self._is_self_hosted():
            url = f"{(self.api_base or '').rstrip('/')}/cognitiveservices/voices/list"

        headers = {"Ocp-Apim-Subscription-Key": self.api_key}
        async with aiohttp.ClientSession() as session:
            async with session.get(url, headers=headers) as response:
                if response.status in (401, 403):
                    raise RuntimeError("Invalid Azure API key.")
                if response.status != 200:
                    raise RuntimeError("Azure credential validation failed.")

    def get_available_voices(self) -> list[dict[str, str]]:
        """Return common Azure Neural voices."""
        return AZURE_VOICES.copy()

    def get_available_stt_models(self) -> list[dict[str, str]]:
        return [
            {"id": "default", "name": "Azure Speech Recognition"},
        ]

    def get_available_tts_models(self) -> list[dict[str, str]]:
        return [
            {"id": "neural", "name": "Neural TTS"},
        ]

    def supports_streaming_stt(self) -> bool:
        """Azure supports streaming STT via Speech SDK."""
        return True

    def supports_streaming_tts(self) -> bool:
        """Azure supports real-time streaming TTS via Speech SDK."""
        return True

    async def create_streaming_transcriber(
        self, _audio_format: str = "webm"
    ) -> AzureStreamingTranscriber:
        """Create a streaming transcription session."""
        if not self.api_key:
            raise ValueError("API key required for streaming transcription")
        if not self._is_self_hosted() and not self.speech_region:
            raise ValueError(
                "Speech region required for Azure streaming transcription (cloud mode)"
            )

        # Use endpoint for self-hosted, region for cloud
        transcriber = AzureStreamingTranscriber(
            api_key=self.api_key,
            region=self.speech_region if not self._is_self_hosted() else None,
            endpoint=self.api_base if self._is_self_hosted() else None,
            input_sample_rate=24000,
            target_sample_rate=16000,
        )
        await transcriber.connect()
        return transcriber

    async def create_streaming_synthesizer(
        self, voice: str | None = None, speed: float = 1.0
    ) -> AzureStreamingSynthesizer:
        """Create a streaming TTS session."""
        if not self.api_key:
            raise ValueError("API key required for streaming TTS")
        if not self._is_self_hosted() and not self.speech_region:
            raise ValueError(
                "Speech region required for Azure streaming TTS (cloud mode)"
            )

        # Use endpoint for self-hosted, region for cloud
        synthesizer = AzureStreamingSynthesizer(
            api_key=self.api_key,
            region=self.speech_region if not self._is_self_hosted() else None,
            endpoint=self.api_base if self._is_self_hosted() else None,
            voice=voice or self.default_voice or "en-US-JennyNeural",
            speed=speed,
        )
        await synthesizer.connect()
        return synthesizer


================================================
FILE: backend/onyx/voice/providers/elevenlabs.py
================================================
"""ElevenLabs voice provider for STT and TTS.

ElevenLabs supports:
- **STT**: Scribe API (batch via REST, streaming via WebSocket with Scribe v2 Realtime).
  The streaming endpoint sends base64-encoded PCM16 audio chunks and receives JSON
  transcript messages (partial_transcript, committed_transcript, utterance_end).
- **TTS**: Text-to-speech via REST streaming and WebSocket stream-input.
  The WebSocket variant accepts incremental text chunks and returns audio in order,
  enabling low-latency playback before the full text is available.

See https://elevenlabs.io/docs for API reference.
"""

import asyncio
import base64
import json
from collections.abc import AsyncIterator
from enum import StrEnum
from typing import Any

import aiohttp

from onyx.voice.interface import StreamingSynthesizerProtocol
from onyx.voice.interface import StreamingTranscriberProtocol
from onyx.voice.interface import TranscriptResult
from onyx.voice.interface import VoiceProviderInterface

# Default ElevenLabs API base URL
DEFAULT_ELEVENLABS_API_BASE = "https://api.elevenlabs.io"

# Default sample rates for STT streaming
DEFAULT_INPUT_SAMPLE_RATE = 24000  # What the browser frontend sends
DEFAULT_TARGET_SAMPLE_RATE = 16000  # What ElevenLabs Scribe expects

# Default streaming TTS output format
DEFAULT_TTS_OUTPUT_FORMAT = "mp3_44100_64"

# Default TTS voice settings
DEFAULT_VOICE_STABILITY = 0.5
DEFAULT_VOICE_SIMILARITY_BOOST = 0.75

# Chunk length schedule for streaming TTS (optimized for real-time playback)
DEFAULT_CHUNK_LENGTH_SCHEDULE = [120, 160, 250, 290]

# Default STT streaming VAD configuration
DEFAULT_VAD_SILENCE_THRESHOLD_SECS = 1.0
DEFAULT_VAD_THRESHOLD = 0.4
DEFAULT_MIN_SPEECH_DURATION_MS = 100
DEFAULT_MIN_SILENCE_DURATION_MS = 300


class ElevenLabsSTTMessageType(StrEnum):
    """Message types from ElevenLabs Scribe Realtime STT API."""

    SESSION_STARTED = "session_started"
    PARTIAL_TRANSCRIPT = "partial_transcript"
    COMMITTED_TRANSCRIPT = "committed_transcript"
    UTTERANCE_END = "utterance_end"
    SESSION_ENDED = "session_ended"
    ERROR = "error"


class ElevenLabsTTSMessageType(StrEnum):
    """Message types from ElevenLabs stream-input TTS API."""

    AUDIO = "audio"
    ERROR = "error"


def _http_to_ws_url(http_url: str) -> str:
    """Convert http(s) URL to ws(s) URL for WebSocket connections."""
    if http_url.startswith("https://"):
        return "wss://" + http_url[8:]
    elif http_url.startswith("http://"):
        return "ws://" + http_url[7:]
    return http_url


# Common ElevenLabs voices
ELEVENLABS_VOICES = [
    {"id": "21m00Tcm4TlvDq8ikWAM", "name": "Rachel"},
    {"id": "AZnzlk1XvdvUeBnXmlld", "name": "Domi"},
    {"id": "EXAVITQu4vr4xnSDxMaL", "name": "Bella"},
    {"id": "ErXwobaYiN019PkySvjV", "name": "Antoni"},
    {"id": "MF3mGyEYCl7XYWbV9V6O", "name": "Elli"},
    {"id": "TxGEqnHWrfWFTfGW9XjX", "name": "Josh"},
    {"id": "VR6AewLTigWG4xSOukaG", "name": "Arnold"},
    {"id": "pNInz6obpgDQGcFmaJgB", "name": "Adam"},
    {"id": "yoZ06aMxZJJ28mfd3POQ", "name": "Sam"},
]


class ElevenLabsStreamingTranscriber(StreamingTranscriberProtocol):
    """Streaming transcription session using ElevenLabs Scribe Realtime API."""

    def __init__(
        self,
        api_key: str,
        model: str = "scribe_v2_realtime",
        input_sample_rate: int = DEFAULT_INPUT_SAMPLE_RATE,
        target_sample_rate: int = DEFAULT_TARGET_SAMPLE_RATE,
        language_code: str = "en",
        api_base: str | None = None,
    ):
        # Import logger first
        from onyx.utils.logger import setup_logger

        self._logger = setup_logger()

        self._logger.info(
            f"ElevenLabsStreamingTranscriber: initializing with model {model}"
        )
        self.api_key = api_key
        self.model = model
        self.input_sample_rate = input_sample_rate
        self.target_sample_rate = target_sample_rate
        self.language_code = language_code
        self.api_base = api_base or DEFAULT_ELEVENLABS_API_BASE
        self._ws: aiohttp.ClientWebSocketResponse | None = None
        self._session: aiohttp.ClientSession | None = None
        self._transcript_queue: asyncio.Queue[TranscriptResult | None] = asyncio.Queue()
        self._final_transcript = ""
        self._receive_task: asyncio.Task | None = None
        self._closed = False

    async def connect(self) -> None:
        """Establish WebSocket connection to ElevenLabs."""
        self._logger.info(
            "ElevenLabsStreamingTranscriber: connecting to ElevenLabs API"
        )
        self._session = aiohttp.ClientSession()

        # VAD is configured via query parameters.
        # commit_strategy=vad enables automatic transcript commit on silence detection.
        # These params are part of the ElevenLabs Scribe Realtime API contract:
        # https://elevenlabs.io/docs/api-reference/speech-to-text/realtime
        ws_base = _http_to_ws_url(self.api_base.rstrip("/"))
        url = (
            f"{ws_base}/v1/speech-to-text/realtime"
            f"?model_id={self.model}"
            f"&sample_rate={self.target_sample_rate}"
            f"&language_code={self.language_code}"
            f"&commit_strategy=vad"
            f"&vad_silence_threshold_secs={DEFAULT_VAD_SILENCE_THRESHOLD_SECS}"
            f"&vad_threshold={DEFAULT_VAD_THRESHOLD}"
            f"&min_speech_duration_ms={DEFAULT_MIN_SPEECH_DURATION_MS}"
            f"&min_silence_duration_ms={DEFAULT_MIN_SILENCE_DURATION_MS}"
        )
        self._logger.info(
            f"ElevenLabsStreamingTranscriber: connecting to {url} "
            f"(input={self.input_sample_rate}Hz, target={self.target_sample_rate}Hz)"
        )

        try:
            self._ws = await self._session.ws_connect(
                url,
                headers={"xi-api-key": self.api_key},
            )
            self._logger.info(
                f"ElevenLabsStreamingTranscriber: connected successfully, "
                f"ws.closed={self._ws.closed}, close_code={self._ws.close_code}"
            )
        except Exception as e:
            self._logger.error(
                f"ElevenLabsStreamingTranscriber: failed to connect: {e}"
            )
            if self._session:
                await self._session.close()
            raise

        # Start receiving transcripts in background
        self._receive_task = asyncio.create_task(self._receive_loop())

    async def _receive_loop(self) -> None:
        """Background task to receive transcripts from WebSocket."""
        self._logger.info("ElevenLabsStreamingTranscriber: receive loop started")
        if not self._ws:
            self._logger.warning(
                "ElevenLabsStreamingTranscriber: no WebSocket connection"
            )
            return

        try:
            async for msg in self._ws:
                self._logger.debug(
                    f"ElevenLabsStreamingTranscriber: raw message type: {msg.type}"
                )
                if msg.type == aiohttp.WSMsgType.TEXT:
                    parsed_data: Any = None
                    data: dict[str, Any]
                    try:
                        parsed_data = json.loads(msg.data)
                    except json.JSONDecodeError:
                        self._logger.error(
                            f"ElevenLabsStreamingTranscriber: failed to parse JSON: {msg.data[:200]}"
                        )
                        continue
                    if not isinstance(parsed_data, dict):
                        self._logger.error(
                            "ElevenLabsStreamingTranscriber: expected object JSON payload"
                        )
                        continue
                    data = parsed_data

                    # ElevenLabs uses message_type field - fail fast if missing
                    if "message_type" not in data and "type" not in data:
                        self._logger.error(
                            f"ElevenLabsStreamingTranscriber: malformed packet missing 'message_type' field: {data}"
                        )
                        continue
                    msg_type = data.get("message_type", data.get("type", ""))
                    self._logger.info(
                        f"ElevenLabsStreamingTranscriber: received message_type: '{msg_type}', data keys: {list(data.keys())}"
                    )
                    # Check for error in various formats
                    if "error" in data or msg_type == ElevenLabsSTTMessageType.ERROR:
                        error_msg = data.get("error", data.get("message", data))
                        self._logger.error(
                            f"ElevenLabsStreamingTranscriber: API error: {error_msg}"
                        )
                        continue

                    # Handle message types from ElevenLabs Scribe Realtime API.
                    # See https://elevenlabs.io/docs/api-reference/speech-to-text/realtime
                    if msg_type == ElevenLabsSTTMessageType.SESSION_STARTED:
                        self._logger.info(
                            f"ElevenLabsStreamingTranscriber: session started, "
                            f"id={data.get('session_id')}, config={data.get('config')}"
                        )
                    elif msg_type == ElevenLabsSTTMessageType.PARTIAL_TRANSCRIPT:
                        # Interim result — updated as more audio is processed
                        text = data.get("text", "")
                        if text:
                            self._logger.info(
                                f"ElevenLabsStreamingTranscriber: partial_transcript: {text[:50]}..."
                            )
                            self._final_transcript = text
                            await self._transcript_queue.put(
                                TranscriptResult(text=text, is_vad_end=False)
                            )
                    elif msg_type == ElevenLabsSTTMessageType.COMMITTED_TRANSCRIPT:
                        # Final transcript for the current utterance (VAD detected end)
                        text = data.get("text", "")
                        if text:
                            self._logger.info(
                                f"ElevenLabsStreamingTranscriber: committed_transcript: {text[:50]}..."
                            )
                            self._final_transcript = text
                            await self._transcript_queue.put(
                                TranscriptResult(text=text, is_vad_end=True)
                            )
                    elif msg_type == ElevenLabsSTTMessageType.UTTERANCE_END:
                        # VAD detected end of speech (may carry text or be empty)
                        text = data.get("text", "") or self._final_transcript
                        if text:
                            self._logger.info(
                                f"ElevenLabsStreamingTranscriber: utterance_end: {text[:50]}..."
                            )
                            self._final_transcript = text
                            await self._transcript_queue.put(
                                TranscriptResult(text=text, is_vad_end=True)
                            )
                    elif msg_type == ElevenLabsSTTMessageType.SESSION_ENDED:
                        self._logger.info(
                            "ElevenLabsStreamingTranscriber: session ended"
                        )
                        break
                    else:
                        # Log unhandled message types with full data for debugging
                        self._logger.warning(
                            f"ElevenLabsStreamingTranscriber: unhandled message_type: {msg_type}, full data: {data}"
                        )
                elif msg.type == aiohttp.WSMsgType.BINARY:
                    self._logger.debug(
                        f"ElevenLabsStreamingTranscriber: received binary message: {len(msg.data)} bytes"
                    )
                elif msg.type == aiohttp.WSMsgType.CLOSED:
                    close_code = self._ws.close_code if self._ws else "N/A"
                    self._logger.info(
                        f"ElevenLabsStreamingTranscriber: WebSocket closed by server, close_code={close_code}"
                    )
                    break
                elif msg.type == aiohttp.WSMsgType.ERROR:
                    self._logger.error(
                        f"ElevenLabsStreamingTranscriber: WebSocket error: {self._ws.exception() if self._ws else 'N/A'}"
                    )
                    break
                elif msg.type == aiohttp.WSMsgType.CLOSE:
                    self._logger.info(
                        f"ElevenLabsStreamingTranscriber: WebSocket CLOSE frame received, data={msg.data}, extra={msg.extra}"
                    )
                    break
        except Exception as e:
            self._logger.error(
                f"ElevenLabsStreamingTranscriber: error in receive loop: {e}",
                exc_info=True,
            )
        finally:
            close_code = self._ws.close_code if self._ws else "N/A"
            self._logger.info(
                f"ElevenLabsStreamingTranscriber: receive loop ended, close_code={close_code}"
            )
            await self._transcript_queue.put(None)  # Signal end

    def _resample_pcm16(self, data: bytes) -> bytes:
        """Resample PCM16 audio from input_sample_rate to target_sample_rate."""
        import struct

        if self.input_sample_rate == self.target_sample_rate:
            return data

        # Parse int16 samples
        num_samples = len(data) // 2
        samples = list(struct.unpack(f"<{num_samples}h", data))

        # Calculate resampling ratio
        ratio = self.input_sample_rate / self.target_sample_rate
        new_length = int(num_samples / ratio)

        # Linear interpolation resampling
        resampled = []
        for i in range(new_length):
            src_idx = i * ratio
            idx_floor = int(src_idx)
            idx_ceil = min(idx_floor + 1, num_samples - 1)
            frac = src_idx - idx_floor
            sample = int(samples[idx_floor] * (1 - frac) + samples[idx_ceil] * frac)
            # Clamp to int16 range
            sample = max(-32768, min(32767, sample))
            resampled.append(sample)

        return struct.pack(f"<{len(resampled)}h", *resampled)

    async def send_audio(self, chunk: bytes) -> None:
        """Send an audio chunk for transcription."""
        if not self._ws:
            self._logger.warning("send_audio: no WebSocket connection")
            return
        if self._closed:
            self._logger.warning("send_audio: transcriber is closed")
            return
        if self._ws.closed:
            self._logger.warning(
                f"send_audio: WebSocket is closed, close_code={self._ws.close_code}"
            )
            return

        try:
            # Resample from input rate (24kHz) to target rate (16kHz)
            resampled = self._resample_pcm16(chunk)
            # ElevenLabs expects input_audio_chunk message format with audio_base_64
            audio_b64 = base64.b64encode(resampled).decode("utf-8")
            message = {
                "message_type": "input_audio_chunk",
                "audio_base_64": audio_b64,
                "sample_rate": self.target_sample_rate,
            }
            self._logger.info(
                f"send_audio: {len(chunk)} bytes -> {len(resampled)} bytes (resampled) -> {len(audio_b64)} chars base64"
            )
            await self._ws.send_str(json.dumps(message))
            self._logger.info("send_audio: message sent successfully")
        except Exception as e:
            self._logger.error(f"send_audio: failed to send: {e}", exc_info=True)
            raise

    async def receive_transcript(self) -> TranscriptResult | None:
        """Receive next transcript. Returns None when done."""
        try:
            return await asyncio.wait_for(self._transcript_queue.get(), timeout=0.1)
        except asyncio.TimeoutError:
            return TranscriptResult(
                text="", is_vad_end=False
            )  # No transcript yet, but not done

    async def close(self) -> str:
        """Close the session and return final transcript."""
        self._logger.info("ElevenLabsStreamingTranscriber: closing session")
        self._closed = True
        if self._ws and not self._ws.closed:
            try:
                # Just close the WebSocket - ElevenLabs Scribe doesn't need a special end message
                self._logger.info(
                    "ElevenLabsStreamingTranscriber: closing WebSocket connection"
                )
                await self._ws.close()
            except Exception as e:
                self._logger.debug(f"Error closing WebSocket: {e}")
        if self._receive_task and not self._receive_task.done():
            self._receive_task.cancel()
            try:
                await self._receive_task
            except asyncio.CancelledError:
                pass
        if self._session and not self._session.closed:
            await self._session.close()
        return self._final_transcript

    def reset_transcript(self) -> None:
        """Reset accumulated transcript. Call after auto-send to start fresh."""
        self._final_transcript = ""


class ElevenLabsStreamingSynthesizer(StreamingSynthesizerProtocol):
    """Real-time streaming TTS using ElevenLabs WebSocket API.

    Uses ElevenLabs' stream-input WebSocket which processes text as one
    continuous stream and returns audio in order.
    """

    def __init__(
        self,
        api_key: str,
        voice_id: str,
        model_id: str = "eleven_multilingual_v2",
        output_format: str = "mp3_44100_64",
        api_base: str | None = None,
        speed: float = 1.0,
    ):
        from onyx.utils.logger import setup_logger

        self._logger = setup_logger()
        self.api_key = api_key
        self.voice_id = voice_id
        self.model_id = model_id
        self.output_format = output_format
        self.api_base = api_base or DEFAULT_ELEVENLABS_API_BASE
        self.speed = speed
        self._ws: aiohttp.ClientWebSocketResponse | None = None
        self._session: aiohttp.ClientSession | None = None
        self._audio_queue: asyncio.Queue[bytes | None] = asyncio.Queue()
        self._receive_task: asyncio.Task | None = None
        self._closed = False

    async def connect(self) -> None:
        """Establish WebSocket connection to ElevenLabs TTS."""
        self._logger.info("ElevenLabsStreamingSynthesizer: connecting")
        self._session = aiohttp.ClientSession()

        # WebSocket URL for streaming input TTS with output format for streaming compatibility
        # Using mp3_44100_64 for good quality with smaller chunks for real-time playback
        ws_base = _http_to_ws_url(self.api_base.rstrip("/"))
        url = (
            f"{ws_base}/v1/text-to-speech/{self.voice_id}/stream-input"
            f"?model_id={self.model_id}&output_format={self.output_format}"
        )

        self._ws = await self._session.ws_connect(
            url,
            headers={"xi-api-key": self.api_key},
        )

        # Send initial configuration with generation settings optimized for streaming.
        # Note: API key is sent via header only (not in body to avoid log exposure).
        # See https://elevenlabs.io/docs/api-reference/text-to-speech/stream-input
        await self._ws.send_str(
            json.dumps(
                {
                    "text": " ",  # Initial space to start the stream
                    "voice_settings": {
                        "stability": DEFAULT_VOICE_STABILITY,
                        "similarity_boost": DEFAULT_VOICE_SIMILARITY_BOOST,
                        "speed": self.speed,
                    },
                    "generation_config": {
                        "chunk_length_schedule": DEFAULT_CHUNK_LENGTH_SCHEDULE,
                    },
                }
            )
        )

        # Start receiving audio in background
        self._receive_task = asyncio.create_task(self._receive_loop())
        self._logger.info("ElevenLabsStreamingSynthesizer: connected")

    async def _receive_loop(self) -> None:
        """Background task to receive audio chunks from WebSocket.

        Audio is returned in order as one continuous stream.
        """
        if not self._ws:
            return

        chunk_count = 0
        total_bytes = 0
        try:
            async for msg in self._ws:
                if self._closed:
                    self._logger.info(
                        "ElevenLabsStreamingSynthesizer: closed flag set, stopping receive loop"
                    )
                    break
                if msg.type == aiohttp.WSMsgType.TEXT:
                    data = json.loads(msg.data)
                    # Process audio if present
                    if "audio" in data and data["audio"]:
                        audio_bytes = base64.b64decode(data["audio"])
                        chunk_count += 1
                        total_bytes += len(audio_bytes)
                        await self._audio_queue.put(audio_bytes)

                    # Check isFinal separately - a message can have both audio AND isFinal
                    if "isFinal" in data:
                        self._logger.info(
                            f"ElevenLabsStreamingSynthesizer: received isFinal={data['isFinal']}, "
                            f"chunks so far: {chunk_count}, bytes: {total_bytes}"
                        )
                        if data.get("isFinal"):
                            self._logger.info(
                                "ElevenLabsStreamingSynthesizer: isFinal=true, signaling end of audio"
                            )
                            await self._audio_queue.put(None)

                    # Check for errors
                    if "error" in data or data.get("type") == "error":
                        self._logger.error(
                            f"ElevenLabsStreamingSynthesizer: received error: {data}"
                        )
                elif msg.type == aiohttp.WSMsgType.BINARY:
                    chunk_count += 1
                    total_bytes += len(msg.data)
                    await self._audio_queue.put(msg.data)
                elif msg.type in (
                    aiohttp.WSMsgType.CLOSE,
                    aiohttp.WSMsgType.ERROR,
                ):
                    self._logger.info(
                        f"ElevenLabsStreamingSynthesizer: WebSocket closed/error, type={msg.type}"
                    )
                    break
        except Exception as e:
            self._logger.error(f"ElevenLabsStreamingSynthesizer receive error: {e}")
        finally:
            self._logger.info(
                f"ElevenLabsStreamingSynthesizer: receive loop ended, {chunk_count} chunks, {total_bytes} bytes"
            )
            await self._audio_queue.put(None)  # Signal end of stream

    async def send_text(self, text: str) -> None:
        """Send text to be synthesized.

        ElevenLabs processes text as a continuous stream and returns
        audio in order. We let ElevenLabs handle buffering via chunk_length_schedule
        and only force generation when flush() is called at the end.

        Args:
            text: Text to synthesize
        """
        if self._ws and not self._closed and text.strip():
            self._logger.info(
                f"ElevenLabsStreamingSynthesizer: sending text ({len(text)} chars): '{text}'"
            )
            # Let ElevenLabs buffer and auto-generate based on chunk_length_schedule
            # Don't trigger generation here - wait for flush() at the end
            await self._ws.send_str(
                json.dumps(
                    {
                        "text": text + " ",  # Space for natural speech flow
                    }
                )
            )
            self._logger.info("ElevenLabsStreamingSynthesizer: text sent successfully")
        else:
            self._logger.warning(
                f"ElevenLabsStreamingSynthesizer: skipping send_text - "
                f"ws={self._ws is not None}, closed={self._closed}, text='{text[:30] if text else ''}'"
            )

    async def receive_audio(self) -> bytes | None:
        """Receive next audio chunk."""
        try:
            return await asyncio.wait_for(self._audio_queue.get(), timeout=0.1)
        except asyncio.TimeoutError:
            return b""  # No audio yet, but not done

    async def flush(self) -> None:
        """Signal end of text input. ElevenLabs will generate remaining audio and close."""
        if self._ws and not self._closed:
            # Send empty string to signal end of input
            # ElevenLabs will generate any remaining buffered text,
            # send all audio chunks, send isFinal, then close the connection
            self._logger.info(
                "ElevenLabsStreamingSynthesizer: sending end-of-input (empty string)"
            )
            await self._ws.send_str(json.dumps({"text": ""}))
            self._logger.info("ElevenLabsStreamingSynthesizer: end-of-input sent")
        else:
            self._logger.warning(
                f"ElevenLabsStreamingSynthesizer: skipping flush - ws={self._ws is not None}, closed={self._closed}"
            )

    async def close(self) -> None:
        """Close the session."""
        self._closed = True
        if self._ws:
            await self._ws.close()
        if self._receive_task:
            self._receive_task.cancel()
            try:
                await self._receive_task
            except asyncio.CancelledError:
                pass
        if self._session:
            await self._session.close()


# Valid ElevenLabs model IDs
ELEVENLABS_STT_MODELS = {"scribe_v1", "scribe_v2_realtime"}
ELEVENLABS_TTS_MODELS = {
    "eleven_multilingual_v2",
    "eleven_turbo_v2_5",
    "eleven_monolingual_v1",
    "eleven_flash_v2_5",
    "eleven_flash_v2",
}


class ElevenLabsVoiceProvider(VoiceProviderInterface):
    """ElevenLabs voice provider."""

    def __init__(
        self,
        api_key: str | None,
        api_base: str | None = None,
        stt_model: str | None = None,
        tts_model: str | None = None,
        default_voice: str | None = None,
    ):
        self.api_key = api_key
        self.api_base = api_base or DEFAULT_ELEVENLABS_API_BASE
        # Validate and default models - use valid ElevenLabs model IDs
        self.stt_model = (
            stt_model if stt_model in ELEVENLABS_STT_MODELS else "scribe_v1"
        )
        self.tts_model = (
            tts_model
            if tts_model in ELEVENLABS_TTS_MODELS
            else "eleven_multilingual_v2"
        )
        self.default_voice = default_voice

    async def transcribe(self, audio_data: bytes, audio_format: str) -> str:
        """
        Transcribe audio using ElevenLabs Speech-to-Text API.

        Args:
            audio_data: Raw audio bytes
            audio_format: Format of the audio (e.g., 'webm', 'mp3', 'wav')

        Returns:
            Transcribed text
        """
        if not self.api_key:
            raise ValueError("ElevenLabs API key required for transcription")

        from onyx.utils.logger import setup_logger

        logger = setup_logger()

        url = f"{self.api_base}/v1/speech-to-text"

        # Map common formats to MIME types
        mime_types = {
            "webm": "audio/webm",
            "mp3": "audio/mpeg",
            "wav": "audio/wav",
            "ogg": "audio/ogg",
            "flac": "audio/flac",
            "m4a": "audio/mp4",
        }
        mime_type = mime_types.get(audio_format.lower(), f"audio/{audio_format}")

        headers = {
            "xi-api-key": self.api_key,
        }

        # ElevenLabs expects multipart form data
        form_data = aiohttp.FormData()
        form_data.add_field(
            "audio",
            audio_data,
            filename=f"audio.{audio_format}",
            content_type=mime_type,
        )
        # For batch STT, use scribe_v1 (not the realtime model)
        batch_model = (
            self.stt_model if self.stt_model in ("scribe_v1",) else "scribe_v1"
        )
        form_data.add_field("model_id", batch_model)

        logger.info(
            f"ElevenLabs transcribe: sending {len(audio_data)} bytes, format={audio_format}"
        )

        async with aiohttp.ClientSession() as session:
            async with session.post(url, headers=headers, data=form_data) as response:
                if response.status != 200:
                    error_text = await response.text()
                    logger.error(f"ElevenLabs transcribe failed: {error_text}")
                    raise RuntimeError(f"ElevenLabs transcription failed: {error_text}")

                result = await response.json()
                text = result.get("text", "")
                logger.info(f"ElevenLabs transcribe: got result: {text[:50]}...")
                return text

    async def synthesize_stream(
        self, text: str, voice: str | None = None, speed: float = 1.0
    ) -> AsyncIterator[bytes]:
        """
        Convert text to audio using ElevenLabs TTS with streaming.

        Args:
            text: Text to convert to speech
            voice: Voice ID (defaults to provider's default voice or Rachel)
            speed: Playback speed multiplier

        Yields:
            Audio data chunks (mp3 format)
        """
        from onyx.utils.logger import setup_logger

        logger = setup_logger()

        if not self.api_key:
            raise ValueError("ElevenLabs API key required for TTS")

        voice_id = voice or self.default_voice or "21m00Tcm4TlvDq8ikWAM"  # Rachel

        url = f"{self.api_base}/v1/text-to-speech/{voice_id}/stream"

        logger.info(
            f"ElevenLabs TTS: starting synthesis, text='{text[:50]}...', voice={voice_id}, model={self.tts_model}, speed={speed}"
        )

        headers = {
            "xi-api-key": self.api_key,
            "Content-Type": "application/json",
            "Accept": "audio/mpeg",
        }

        payload = {
            "text": text,
            "model_id": self.tts_model,
            "voice_settings": {
                "stability": DEFAULT_VOICE_STABILITY,
                "similarity_boost": DEFAULT_VOICE_SIMILARITY_BOOST,
                "speed": speed,
            },
        }

        async with aiohttp.ClientSession() as session:
            async with session.post(url, headers=headers, json=payload) as response:
                logger.info(
                    f"ElevenLabs TTS: got response status={response.status}, content-type={response.headers.get('content-type')}"
                )
                if response.status != 200:
                    error_text = await response.text()
                    logger.error(f"ElevenLabs TTS failed: {error_text}")
                    raise RuntimeError(f"ElevenLabs TTS failed: {error_text}")

                # Use 8192 byte chunks for smoother streaming
                chunk_count = 0
                total_bytes = 0
                async for chunk in response.content.iter_chunked(8192):
                    if chunk:
                        chunk_count += 1
                        total_bytes += len(chunk)
                        yield chunk
                logger.info(
                    f"ElevenLabs TTS: streaming complete, {chunk_count} chunks, {total_bytes} total bytes"
                )

    async def validate_credentials(self) -> None:
        """Validate ElevenLabs API key.

        Calls /v1/models as a lightweight check. ElevenLabs returns 401 for
        both truly invalid keys and valid keys with restricted scopes, so we
        inspect the response body: a "missing_permissions" status means the
        key authenticated successfully but lacks a specific scope.
        """
        if not self.api_key:
            raise ValueError("ElevenLabs API key required")

        headers = {"xi-api-key": self.api_key}
        async with aiohttp.ClientSession() as session:
            async with session.get(
                f"{self.api_base}/v1/models", headers=headers
            ) as response:
                if response.status == 200:
                    return
                if response.status in (401, 403):
                    try:
                        body = await response.json()
                        detail = body.get("detail", {})
                        status = (
                            detail.get("status", "") if isinstance(detail, dict) else ""
                        )
                    except Exception:
                        status = ""
                    # "missing_permissions" means the key is valid but
                    # lacks this specific scope — that's fine.
                    if status == "missing_permissions":
                        return
                    raise RuntimeError("Invalid ElevenLabs API key.")
                raise RuntimeError("ElevenLabs credential validation failed.")

    def get_available_voices(self) -> list[dict[str, str]]:
        """Return common ElevenLabs voices."""
        return ELEVENLABS_VOICES.copy()

    def get_available_stt_models(self) -> list[dict[str, str]]:
        return [
            {"id": "scribe_v2_realtime", "name": "Scribe v2 Realtime (Streaming)"},
            {"id": "scribe_v1", "name": "Scribe v1 (Batch)"},
        ]

    def get_available_tts_models(self) -> list[dict[str, str]]:
        return [
            {"id": "eleven_multilingual_v2", "name": "Multilingual v2"},
            {"id": "eleven_turbo_v2_5", "name": "Turbo v2.5"},
            {"id": "eleven_monolingual_v1", "name": "Monolingual v1"},
        ]

    def supports_streaming_stt(self) -> bool:
        """ElevenLabs supports streaming via Scribe Realtime API."""
        return True

    def supports_streaming_tts(self) -> bool:
        """ElevenLabs supports real-time streaming TTS via WebSocket."""
        return True

    async def create_streaming_transcriber(
        self, _audio_format: str = "webm"
    ) -> ElevenLabsStreamingTranscriber:
        """Create a streaming transcription session."""
        if not self.api_key:
            raise ValueError("API key required for streaming transcription")
        # ElevenLabs realtime STT requires scribe_v2_realtime model.
        # Frontend sends PCM16 at DEFAULT_INPUT_SAMPLE_RATE (24kHz),
        # but ElevenLabs expects DEFAULT_TARGET_SAMPLE_RATE (16kHz).
        # The transcriber resamples automatically.
        transcriber = ElevenLabsStreamingTranscriber(
            api_key=self.api_key,
            model="scribe_v2_realtime",
            input_sample_rate=DEFAULT_INPUT_SAMPLE_RATE,
            target_sample_rate=DEFAULT_TARGET_SAMPLE_RATE,
            language_code="en",
            api_base=self.api_base,
        )
        await transcriber.connect()
        return transcriber

    async def create_streaming_synthesizer(
        self, voice: str | None = None, speed: float = 1.0
    ) -> ElevenLabsStreamingSynthesizer:
        """Create a streaming TTS session."""
        if not self.api_key:
            raise ValueError("API key required for streaming TTS")
        voice_id = voice or self.default_voice or "21m00Tcm4TlvDq8ikWAM"
        synthesizer = ElevenLabsStreamingSynthesizer(
            api_key=self.api_key,
            voice_id=voice_id,
            model_id=self.tts_model,
            output_format=DEFAULT_TTS_OUTPUT_FORMAT,
            api_base=self.api_base,
            speed=speed,
        )
        await synthesizer.connect()
        return synthesizer


================================================
FILE: backend/onyx/voice/providers/openai.py
================================================
"""OpenAI voice provider for STT and TTS.

OpenAI supports:
- **STT**: Whisper (batch transcription via REST) and Realtime API (streaming
  transcription via WebSocket with server-side VAD). Audio is sent as base64-encoded
  PCM16 at 24kHz mono. The Realtime API returns transcript deltas and completed
  transcription events per VAD-detected utterance.
- **TTS**: HTTP streaming endpoint that returns audio chunks progressively.
  Supported models: tts-1 (standard) and tts-1-hd (high quality).

See https://platform.openai.com/docs for API reference.
"""

import asyncio
import base64
import io
import json
from collections.abc import AsyncIterator
from enum import StrEnum
from typing import TYPE_CHECKING

import aiohttp

from onyx.voice.interface import StreamingSynthesizerProtocol
from onyx.voice.interface import StreamingTranscriberProtocol
from onyx.voice.interface import TranscriptResult
from onyx.voice.interface import VoiceProviderInterface

if TYPE_CHECKING:
    from openai import AsyncOpenAI

# Default OpenAI API base URL
DEFAULT_OPENAI_API_BASE = "https://api.openai.com"


class OpenAIRealtimeMessageType(StrEnum):
    """Message types from OpenAI Realtime transcription API."""

    ERROR = "error"
    SPEECH_STARTED = "input_audio_buffer.speech_started"
    SPEECH_STOPPED = "input_audio_buffer.speech_stopped"
    BUFFER_COMMITTED = "input_audio_buffer.committed"
    TRANSCRIPTION_DELTA = "conversation.item.input_audio_transcription.delta"
    TRANSCRIPTION_COMPLETED = "conversation.item.input_audio_transcription.completed"
    SESSION_CREATED = "transcription_session.created"
    SESSION_UPDATED = "transcription_session.updated"
    ITEM_CREATED = "conversation.item.created"


def _http_to_ws_url(http_url: str) -> str:
    """Convert http(s) URL to ws(s) URL for WebSocket connections."""
    if http_url.startswith("https://"):
        return "wss://" + http_url[8:]
    elif http_url.startswith("http://"):
        return "ws://" + http_url[7:]
    return http_url


class OpenAIStreamingTranscriber(StreamingTranscriberProtocol):
    """Streaming transcription using OpenAI Realtime API."""

    def __init__(
        self,
        api_key: str,
        model: str = "whisper-1",
        api_base: str | None = None,
    ):
        # Import logger first
        from onyx.utils.logger import setup_logger

        self._logger = setup_logger()

        self._logger.info(
            f"OpenAIStreamingTranscriber: initializing with model {model}"
        )
        self.api_key = api_key
        self.model = model
        self.api_base = api_base or DEFAULT_OPENAI_API_BASE
        self._ws: aiohttp.ClientWebSocketResponse | None = None
        self._session: aiohttp.ClientSession | None = None
        self._transcript_queue: asyncio.Queue[TranscriptResult | None] = asyncio.Queue()
        self._current_turn_transcript = ""  # Transcript for current VAD turn
        self._accumulated_transcript = ""  # Accumulated across all turns
        self._receive_task: asyncio.Task | None = None
        self._closed = False

    async def connect(self) -> None:
        """Establish WebSocket connection to OpenAI Realtime API."""
        self._session = aiohttp.ClientSession()

        # OpenAI Realtime transcription endpoint
        ws_base = _http_to_ws_url(self.api_base.rstrip("/"))
        url = f"{ws_base}/v1/realtime?intent=transcription"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "OpenAI-Beta": "realtime=v1",
        }

        try:
            self._ws = await self._session.ws_connect(url, headers=headers)
            self._logger.info("Connected to OpenAI Realtime API")
        except Exception as e:
            self._logger.error(f"Failed to connect to OpenAI Realtime API: {e}")
            raise

        # Configure the session for transcription
        # Enable server-side VAD (Voice Activity Detection) for automatic speech detection
        config_message = {
            "type": "transcription_session.update",
            "session": {
                "input_audio_format": "pcm16",  # 16-bit PCM at 24kHz mono
                "input_audio_transcription": {
                    "model": self.model,
                },
                "turn_detection": {
                    "type": "server_vad",
                    "threshold": 0.5,
                    "prefix_padding_ms": 300,
                    "silence_duration_ms": 500,
                },
            },
        }
        await self._ws.send_str(json.dumps(config_message))
        self._logger.info(f"Sent config for model: {self.model} with server VAD")

        # Start receiving transcripts
        self._receive_task = asyncio.create_task(self._receive_loop())

    async def _receive_loop(self) -> None:
        """Background task to receive transcripts."""
        if not self._ws:
            return

        try:
            async for msg in self._ws:
                if msg.type == aiohttp.WSMsgType.TEXT:
                    data = json.loads(msg.data)
                    msg_type = data.get("type", "")
                    self._logger.debug(f"Received message type: {msg_type}")

                    # Handle errors
                    if msg_type == OpenAIRealtimeMessageType.ERROR:
                        error = data.get("error", {})
                        self._logger.error(f"OpenAI error: {error}")
                        continue

                    # Handle VAD events
                    if msg_type == OpenAIRealtimeMessageType.SPEECH_STARTED:
                        self._logger.info("OpenAI: Speech started")
                        # Reset current turn transcript for new speech
                        self._current_turn_transcript = ""
                        continue
                    elif msg_type == OpenAIRealtimeMessageType.SPEECH_STOPPED:
                        self._logger.info(
                            "OpenAI: Speech stopped (VAD detected silence)"
                        )
                        continue
                    elif msg_type == OpenAIRealtimeMessageType.BUFFER_COMMITTED:
                        self._logger.info("OpenAI: Audio buffer committed")
                        continue

                    # Handle transcription events
                    if msg_type == OpenAIRealtimeMessageType.TRANSCRIPTION_DELTA:
                        delta = data.get("delta", "")
                        if delta:
                            self._logger.info(f"OpenAI: Transcription delta: {delta}")
                            self._current_turn_transcript += delta
                            # Show accumulated + current turn transcript
                            full_transcript = self._accumulated_transcript
                            if full_transcript and self._current_turn_transcript:
                                full_transcript += " "
                            full_transcript += self._current_turn_transcript
                            await self._transcript_queue.put(
                                TranscriptResult(text=full_transcript, is_vad_end=False)
                            )
                    elif msg_type == OpenAIRealtimeMessageType.TRANSCRIPTION_COMPLETED:
                        transcript = data.get("transcript", "")
                        if transcript:
                            self._logger.info(
                                f"OpenAI: Transcription completed (VAD turn end): {transcript[:50]}..."
                            )
                            # This is the final transcript for this VAD turn
                            self._current_turn_transcript = transcript
                            # Accumulate this turn's transcript
                            if self._accumulated_transcript:
                                self._accumulated_transcript += " " + transcript
                            else:
                                self._accumulated_transcript = transcript
                            # Send with is_vad_end=True to trigger auto-send
                            await self._transcript_queue.put(
                                TranscriptResult(
                                    text=self._accumulated_transcript,
                                    is_vad_end=True,
                                )
                            )
                    elif msg_type not in (
                        OpenAIRealtimeMessageType.SESSION_CREATED,
                        OpenAIRealtimeMessageType.SESSION_UPDATED,
                        OpenAIRealtimeMessageType.ITEM_CREATED,
                    ):
                        # Log any other message types we might be missing
                        self._logger.info(
                            f"OpenAI: Unhandled message type '{msg_type}': {data}"
                        )

                elif msg.type == aiohttp.WSMsgType.ERROR:
                    self._logger.error(f"WebSocket error: {self._ws.exception()}")
                    break
                elif msg.type == aiohttp.WSMsgType.CLOSED:
                    self._logger.info("WebSocket closed by server")
                    break
        except Exception as e:
            self._logger.error(f"Error in receive loop: {e}")
        finally:
            await self._transcript_queue.put(None)

    async def send_audio(self, chunk: bytes) -> None:
        """Send audio chunk to OpenAI."""
        if self._ws and not self._closed:
            # OpenAI expects base64-encoded PCM16 audio at 24kHz mono
            # PCM16 at 24kHz: 24000 samples/sec * 2 bytes/sample = 48000 bytes/sec
            # So chunk_bytes / 48000 = duration in seconds
            duration_ms = (len(chunk) / 48000) * 1000
            self._logger.debug(
                f"Sending {len(chunk)} bytes ({duration_ms:.1f}ms) of audio to OpenAI. "
                f"First 10 bytes: {chunk[:10].hex() if len(chunk) >= 10 else chunk.hex()}"
            )
            message = {
                "type": "input_audio_buffer.append",
                "audio": base64.b64encode(chunk).decode("utf-8"),
            }
            await self._ws.send_str(json.dumps(message))

    def reset_transcript(self) -> None:
        """Reset accumulated transcript. Call after auto-send to start fresh."""
        self._logger.info("OpenAI: Resetting accumulated transcript")
        self._accumulated_transcript = ""
        self._current_turn_transcript = ""

    async def receive_transcript(self) -> TranscriptResult | None:
        """Receive next transcript."""
        try:
            return await asyncio.wait_for(self._transcript_queue.get(), timeout=0.1)
        except asyncio.TimeoutError:
            return TranscriptResult(text="", is_vad_end=False)

    async def close(self) -> str:
        """Close session and return final transcript."""
        self._closed = True
        if self._ws:
            # With server VAD, the buffer is auto-committed when speech stops.
            # But we should still commit any remaining audio and wait for transcription.
            try:
                await self._ws.send_str(
                    json.dumps({"type": "input_audio_buffer.commit"})
                )
            except Exception as e:
                self._logger.debug(f"Error sending commit (may be expected): {e}")

            # Wait for *new* transcription to arrive (up to 5 seconds)
            self._logger.info("Waiting for transcription to complete...")
            transcript_before_commit = self._accumulated_transcript
            for _ in range(50):  # 50 * 100ms = 5 seconds max
                await asyncio.sleep(0.1)
                if self._accumulated_transcript != transcript_before_commit:
                    self._logger.info(
                        f"Got final transcript: {self._accumulated_transcript[:50]}..."
                    )
                    break
            else:
                self._logger.warning("Timed out waiting for transcription")

            await self._ws.close()
        if self._receive_task:
            self._receive_task.cancel()
            try:
                await self._receive_task
            except asyncio.CancelledError:
                pass
        if self._session:
            await self._session.close()
        return self._accumulated_transcript


# OpenAI available voices for TTS
OPENAI_VOICES = [
    {"id": "alloy", "name": "Alloy"},
    {"id": "echo", "name": "Echo"},
    {"id": "fable", "name": "Fable"},
    {"id": "onyx", "name": "Onyx"},
    {"id": "nova", "name": "Nova"},
    {"id": "shimmer", "name": "Shimmer"},
]

# OpenAI available STT models (all support streaming via Realtime API)
OPENAI_STT_MODELS = [
    {"id": "whisper-1", "name": "Whisper v1"},
    {"id": "gpt-4o-transcribe", "name": "GPT-4o Transcribe"},
    {"id": "gpt-4o-mini-transcribe", "name": "GPT-4o Mini Transcribe"},
]

# OpenAI available TTS models
OPENAI_TTS_MODELS = [
    {"id": "tts-1", "name": "TTS-1 (Standard)"},
    {"id": "tts-1-hd", "name": "TTS-1 HD (High Quality)"},
]


def _create_wav_header(
    data_length: int,
    sample_rate: int = 24000,
    channels: int = 1,
    bits_per_sample: int = 16,
) -> bytes:
    """Create a WAV file header for PCM audio data."""
    import struct

    byte_rate = sample_rate * channels * bits_per_sample // 8
    block_align = channels * bits_per_sample // 8

    # WAV header is 44 bytes
    header = struct.pack(
        "<4sI4s4sIHHIIHH4sI",
        b"RIFF",  # ChunkID
        36 + data_length,  # ChunkSize
        b"WAVE",  # Format
        b"fmt ",  # Subchunk1ID
        16,  # Subchunk1Size (PCM)
        1,  # AudioFormat (1 = PCM)
        channels,  # NumChannels
        sample_rate,  # SampleRate
        byte_rate,  # ByteRate
        block_align,  # BlockAlign
        bits_per_sample,  # BitsPerSample
        b"data",  # Subchunk2ID
        data_length,  # Subchunk2Size
    )
    return header


class OpenAIStreamingSynthesizer(StreamingSynthesizerProtocol):
    """Streaming TTS using OpenAI HTTP TTS API with streaming responses."""

    def __init__(
        self,
        api_key: str,
        voice: str = "alloy",
        model: str = "tts-1",
        speed: float = 1.0,
        api_base: str | None = None,
    ):
        from onyx.utils.logger import setup_logger

        self._logger = setup_logger()
        self.api_key = api_key
        self.voice = voice
        self.model = model
        self.speed = max(0.25, min(4.0, speed))
        self.api_base = api_base or DEFAULT_OPENAI_API_BASE
        self._session: aiohttp.ClientSession | None = None
        self._audio_queue: asyncio.Queue[bytes | None] = asyncio.Queue()
        self._text_queue: asyncio.Queue[str | None] = asyncio.Queue()
        self._synthesis_task: asyncio.Task | None = None
        self._closed = False
        self._flushed = False

    async def connect(self) -> None:
        """Initialize HTTP session for TTS requests."""
        self._logger.info("OpenAIStreamingSynthesizer: connecting")
        self._session = aiohttp.ClientSession()
        # Start background task to process text queue
        self._synthesis_task = asyncio.create_task(self._process_text_queue())
        self._logger.info("OpenAIStreamingSynthesizer: connected")

    async def _process_text_queue(self) -> None:
        """Background task to process queued text for synthesis."""
        while not self._closed:
            try:
                text = await asyncio.wait_for(self._text_queue.get(), timeout=0.1)
                if text is None:
                    break
                await self._synthesize_text(text)
            except asyncio.TimeoutError:
                continue
            except asyncio.CancelledError:
                break
            except Exception as e:
                self._logger.error(f"Error processing text queue: {e}")

    async def _synthesize_text(self, text: str) -> None:
        """Make HTTP TTS request and stream audio to queue."""
        if not self._session or self._closed:
            return

        url = f"{self.api_base.rstrip('/')}/v1/audio/speech"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }
        payload = {
            "model": self.model,
            "voice": self.voice,
            "input": text,
            "speed": self.speed,
            "response_format": "mp3",
        }

        try:
            async with self._session.post(
                url, headers=headers, json=payload
            ) as response:
                if response.status != 200:
                    error_text = await response.text()
                    self._logger.error(f"OpenAI TTS error: {error_text}")
                    return

                # Use 8192 byte chunks for smoother streaming
                # (larger chunks = more complete MP3 frames, better playback)
                async for chunk in response.content.iter_chunked(8192):
                    if self._closed:
                        break
                    if chunk:
                        await self._audio_queue.put(chunk)
        except Exception as e:
            self._logger.error(f"OpenAIStreamingSynthesizer synthesis error: {e}")

    async def send_text(self, text: str) -> None:
        """Queue text to be synthesized via HTTP streaming."""
        if not text.strip() or self._closed:
            return
        await self._text_queue.put(text)

    async def receive_audio(self) -> bytes | None:
        """Receive next audio chunk (MP3 format)."""
        try:
            return await asyncio.wait_for(self._audio_queue.get(), timeout=0.1)
        except asyncio.TimeoutError:
            return b""  # No audio yet, but not done

    async def flush(self) -> None:
        """Signal end of text input - wait for synthesis to complete."""
        if self._flushed:
            return
        self._flushed = True

        # Signal end of text input
        await self._text_queue.put(None)

        # Wait for synthesis task to complete processing all text
        if self._synthesis_task and not self._synthesis_task.done():
            try:
                await asyncio.wait_for(self._synthesis_task, timeout=60.0)
            except asyncio.TimeoutError:
                self._logger.warning("OpenAIStreamingSynthesizer: flush timeout")
                self._synthesis_task.cancel()
                try:
                    await self._synthesis_task
                except asyncio.CancelledError:
                    pass
            except asyncio.CancelledError:
                pass

        # Signal end of audio stream
        await self._audio_queue.put(None)

    async def close(self) -> None:
        """Close the session."""
        if self._closed:
            return
        self._closed = True

        # Signal end of queues only if flush wasn't already called
        if not self._flushed:
            await self._text_queue.put(None)
            await self._audio_queue.put(None)

        if self._synthesis_task and not self._synthesis_task.done():
            self._synthesis_task.cancel()
            try:
                await self._synthesis_task
            except asyncio.CancelledError:
                pass

        if self._session:
            await self._session.close()


class OpenAIVoiceProvider(VoiceProviderInterface):
    """OpenAI voice provider using Whisper for STT and TTS API for speech synthesis."""

    def __init__(
        self,
        api_key: str | None,
        api_base: str | None = None,
        stt_model: str | None = None,
        tts_model: str | None = None,
        default_voice: str | None = None,
    ):
        self.api_key = api_key
        self.api_base = api_base
        self.stt_model = stt_model or "whisper-1"
        self.tts_model = tts_model or "tts-1"
        self.default_voice = default_voice or "alloy"

        self._client: "AsyncOpenAI | None" = None

    def _get_client(self) -> "AsyncOpenAI":
        if self._client is None:
            from openai import AsyncOpenAI

            self._client = AsyncOpenAI(
                api_key=self.api_key,
                base_url=self.api_base,
            )
        return self._client

    async def transcribe(self, audio_data: bytes, audio_format: str) -> str:
        """
        Transcribe audio using OpenAI Whisper.

        Args:
            audio_data: Raw audio bytes
            audio_format: Audio format (e.g., "webm", "wav", "mp3")

        Returns:
            Transcribed text
        """
        client = self._get_client()

        # Create a file-like object from the audio bytes
        audio_file = io.BytesIO(audio_data)
        audio_file.name = f"audio.{audio_format}"

        response = await client.audio.transcriptions.create(
            model=self.stt_model,
            file=audio_file,
        )

        return response.text

    async def synthesize_stream(
        self, text: str, voice: str | None = None, speed: float = 1.0
    ) -> AsyncIterator[bytes]:
        """
        Convert text to audio using OpenAI TTS with streaming.

        Args:
            text: Text to convert to speech
            voice: Voice identifier (defaults to provider's default voice)
            speed: Playback speed multiplier (0.25 to 4.0)

        Yields:
            Audio data chunks (mp3 format)
        """
        client = self._get_client()

        # Clamp speed to valid range
        speed = max(0.25, min(4.0, speed))

        # Use with_streaming_response for proper async streaming
        # Using 8192 byte chunks for better streaming performance
        # (larger chunks = fewer round-trips, more complete MP3 frames)
        async with client.audio.speech.with_streaming_response.create(
            model=self.tts_model,
            voice=voice or self.default_voice,
            input=text,
            speed=speed,
            response_format="mp3",
        ) as response:
            async for chunk in response.iter_bytes(chunk_size=8192):
                yield chunk

    async def validate_credentials(self) -> None:
        """Validate OpenAI API key by listing models."""
        from openai import AuthenticationError, PermissionDeniedError

        client = self._get_client()
        try:
            await client.models.list()
        except AuthenticationError:
            raise RuntimeError("Invalid OpenAI API key.")
        except PermissionDeniedError:
            raise RuntimeError("OpenAI API key does not have sufficient permissions.")

    def get_available_voices(self) -> list[dict[str, str]]:
        """Get available OpenAI TTS voices."""
        return OPENAI_VOICES.copy()

    def get_available_stt_models(self) -> list[dict[str, str]]:
        """Get available OpenAI STT models."""
        return OPENAI_STT_MODELS.copy()

    def get_available_tts_models(self) -> list[dict[str, str]]:
        """Get available OpenAI TTS models."""
        return OPENAI_TTS_MODELS.copy()

    def supports_streaming_stt(self) -> bool:
        """OpenAI supports streaming via Realtime API for all STT models."""
        return True

    def supports_streaming_tts(self) -> bool:
        """OpenAI supports real-time streaming TTS via Realtime API."""
        return True

    async def create_streaming_transcriber(
        self, _audio_format: str = "webm"
    ) -> OpenAIStreamingTranscriber:
        """Create a streaming transcription session using Realtime API."""
        if not self.api_key:
            raise ValueError("API key required for streaming transcription")
        transcriber = OpenAIStreamingTranscriber(
            api_key=self.api_key,
            model=self.stt_model,
            api_base=self.api_base,
        )
        await transcriber.connect()
        return transcriber

    async def create_streaming_synthesizer(
        self, voice: str | None = None, speed: float = 1.0
    ) -> OpenAIStreamingSynthesizer:
        """Create a streaming TTS session using HTTP streaming API."""
        if not self.api_key:
            raise ValueError("API key required for streaming TTS")
        synthesizer = OpenAIStreamingSynthesizer(
            api_key=self.api_key,
            voice=voice or self.default_voice or "alloy",
            model=self.tts_model or "tts-1",
            speed=speed,
            api_base=self.api_base,
        )
        await synthesizer.connect()
        return synthesizer


================================================
FILE: backend/pyproject.toml
================================================
[project]
name = "onyx-backend"
version = "0.0.0"
requires-python = ">=3.11"
dependencies = [
    "onyx[backend,dev,ee]",
]

[tool.uv.sources]
onyx = { workspace = true }


================================================
FILE: backend/pytest.ini
================================================
[pytest]
pythonpath = 
    .
    generated/onyx_openapi_client
asyncio_default_fixture_loop_scope = function
markers =
    slow: marks tests as slow
    alembic: marks tests for alembic migration testing
filterwarnings =
    ignore::DeprecationWarning
    ignore::cryptography.utils.CryptographyDeprecationWarning
    ignore::PendingDeprecationWarning:ddtrace.internal.module
# .test.env is gitignored.
# After installing pytest-dotenv,
# you can use it to test credentials locally.
env_files =
    .test.env


================================================
FILE: backend/requirements/README.md
================================================
# Requirements Management with uv

This directory is kept for backwards compatibility with existing Docker builds.

## Overview

We use **`pyproject.toml`** as the single source of truth for all dependencies, with a unified **`uv.lock`** file for resolved versions.

### Why this approach?

- ✅ **Single source of truth**: All dependencies defined in `pyproject.toml`
- ✅ **No duplication**: Dependencies shared across environments are only listed once
- ✅ **Unified lock file**: All versions resolved together - guaranteed compatible
- ✅ **Fast**: `uv` is 10-100x faster than pip-tools
- ✅ **Reproducible builds**: Lock file pins all transitive dependencies
- ✅ **Easy updates**: Change `pyproject.toml`, commit, done!

## File Structure

```
pyproject.toml                      # SOURCE OF TRUTH - edit this!
uv.lock                             # Unified lock file (all versions)
backend/
└── requirements/                   # Legacy .txt files (for Docker compat)
    ├── default.txt
    ├── dev.txt
    ├── ee.txt
    ├── model_server.txt
    └── combined.txt
```

## Workflow

### 1. Installing uv

If you don't have `uv` installed:

```bash
# On macOS/Linux
curl -LsSf https://astral.py/uv/install.sh | sh
```

### 2. Adding/Updating Dependencies

**DO NOT** edit the `.txt` files directly! Instead:

1. Edit `pyproject.toml`
2. Add/update/remove dependencies in the appropriate section:
   - `[dependency-groups]` for dev tools
   - `[project.dependencies]` for **shared** dependencies (used by both backend and model_server)
   - `[project.optional-dependencies.backend]` for backend-only dependencies
   - `[project.optional-dependencies.model_server]` for model_server-only dependencies (ML packages)
   - `[project.optional-dependencies.ee]` for EE features
3. Commit your changes - pre-commit hooks will automatically regenerate the lock file and requirements

### 3. Generating Lock File and Requirements

The lock file (`uv.lock`) and requirements files are automatically generated by pre-commit hooks when you commit changes to `pyproject.toml`:

- **`uv-lock`**: Runs `uv lock` to resolve dependencies into `uv.lock`
- **`uv-export`**: Exports requirements to the `.txt` files in this directory

To manually regenerate:

```bash
uv lock
uv export --no-emit-project --no-default-groups --no-hashes --extra backend -o backend/requirements/default.txt
uv export --no-emit-project --no-default-groups --no-hashes --group dev -o backend/requirements/dev.txt
uv export --no-emit-project --no-default-groups --no-hashes --extra ee -o backend/requirements/ee.txt
uv export --no-emit-project --no-default-groups --no-hashes --extra model_server -o backend/requirements/model_server.txt
```

### 4. Installing Dependencies

If enabled, all packages are installed automatically by the `uv-sync` pre-commit hook when changing
branches or pulling new changes.

```bash
# For everything (most common)
uv sync --all-extras

# For backend production (shared + backend dependencies)
uv sync --extra backend

# For backend development (shared + backend + dev tools)
uv sync --extra backend --extra dev

# For backend with EE (shared + backend + ee)
uv sync --extra backend --extra ee

# For model server (shared + model_server, NO backend deps!)
uv sync --extra model_server
```

`uv` aggressively [ignores active virtual environments](https://docs.astral.sh/uv/concepts/projects/config/#project-environment-path) and prefers the root virtual environment.
When working in workspace packages, be sure to pass `--active` when syncing the virtual environment:

```bash
cd backend/
source .venv/bin/activate
uv sync --active
uv run --active ...
```

### 5. Upgrading Dependencies

Upgrade specific packages:

1. Edit version in pyproject.toml, then commit
2. Pre-commit hooks will automatically regenerate lock and requirements files

**Review changes carefully before committing!**


================================================
FILE: backend/requirements/combined.txt
================================================
# combines all the other requirements files
# Primarily for testing.
# It's generally better to install just the requirements for what you are trying to run

-r default.txt
-r ee.txt
-r model_server.txt
-r dev.txt


================================================
FILE: backend/requirements/default.txt
================================================
# This file was autogenerated by uv via the following command:
#    uv export --no-emit-project --no-default-groups --no-hashes --extra backend -o backend/requirements/default.txt
agent-client-protocol==0.7.1
    # via onyx
aioboto3==15.1.0
    # via onyx
aiobotocore==2.24.0
    # via aioboto3
aiofile==3.9.0
    # via py-key-value-aio
aiofiles==25.1.0
    # via
    #   aioboto3
    #   unstructured-client
aiohappyeyeballs==2.6.1
    # via aiohttp
aiohttp==3.13.4
    # via
    #   aiobotocore
    #   discord-py
    #   litellm
    #   onyx
    #   voyageai
aioitertools==0.13.0
    # via aiobotocore
aiolimiter==1.2.1
    # via voyageai
aiosignal==1.4.0
    # via aiohttp
alembic==1.10.4
    # via onyx
amqp==5.3.1
    # via kombu
annotated-doc==0.0.4
    # via fastapi
annotated-types==0.7.0
    # via pydantic
anyio==4.11.0
    # via
    #   claude-agent-sdk
    #   google-genai
    #   httpx
    #   mcp
    #   openai
    #   py-key-value-aio
    #   sse-starlette
    #   starlette
    #   watchfiles
argon2-cffi==23.1.0
    # via pwdlib
argon2-cffi-bindings==25.1.0
    # via argon2-cffi
asana==5.0.8
    # via onyx
async-timeout==5.0.1 ; python_full_version < '3.11.3'
    # via redis
asyncpg==0.30.0
    # via onyx
atlassian-python-api==3.41.16
    # via onyx
attrs==25.4.0
    # via
    #   aiohttp
    #   cyclopts
    #   jsonschema
    #   referencing
    #   zeep
authlib==1.6.9
    # via fastmcp
azure-cognitiveservices-speech==1.38.0
    # via onyx
babel==2.17.0
    # via courlan
backoff==2.2.1
    # via
    #   langfuse
    #   unstructured
backports-tarfile==1.2.0 ; python_full_version < '3.12'
    # via jaraco-context
bcrypt==4.3.0
    # via pwdlib
beartype==0.22.6
    # via py-key-value-aio
beautifulsoup4==4.12.3
    # via
    #   atlassian-python-api
    #   markdownify
    #   markitdown
    #   onyx
    #   unstructured
billiard==4.2.3
    # via celery
boto3==1.39.11
    # via
    #   aiobotocore
    #   cohere
    #   onyx
boto3-stubs==1.39.11
    # via onyx
botocore==1.39.11
    # via
    #   aiobotocore
    #   boto3
    #   s3transfer
botocore-stubs==1.40.74
    # via boto3-stubs
braintrust==0.3.9
    # via onyx
brotli==1.2.0
    # via onyx
bytecode==0.17.0
    # via ddtrace
cachetools==6.2.2
    # via py-key-value-aio
caio==0.9.25
    # via aiofile
celery==5.5.1
    # via onyx
certifi==2025.11.12
    # via
    #   asana
    #   httpcore
    #   httpx
    #   hubspot-api-client
    #   kubernetes
    #   opensearch-py
    #   requests
    #   sentry-sdk
    #   trafilatura
cffi==2.0.0
    # via
    #   argon2-cffi-bindings
    #   cryptography
    #   pynacl
    #   zstandard
chardet==5.2.0
    # via onyx
charset-normalizer==3.4.4
    # via
    #   htmldate
    #   markitdown
    #   pdfminer-six
    #   requests
    #   trafilatura
    #   unstructured
chevron==0.14.0
    # via braintrust
chonkie==1.0.10
    # via onyx
claude-agent-sdk==0.1.19
    # via onyx
click==8.3.1
    # via
    #   celery
    #   click-didyoumean
    #   click-plugins
    #   click-repl
    #   dask
    #   distributed
    #   litellm
    #   magika
    #   nltk
    #   python-oxmsg
    #   typer
    #   uvicorn
    #   zulip
click-didyoumean==0.3.1
    # via celery
click-plugins==1.1.1.2
    # via celery
click-repl==0.3.0
    # via celery
cloudpickle==3.1.2
    # via
    #   dask
    #   distributed
cobble==0.1.4
    # via mammoth
cohere==5.6.1
    # via onyx
colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   pytest
    #   tqdm
coloredlogs==15.0.1
    # via onnxruntime
courlan==1.3.2
    # via trafilatura
cryptography==46.0.6
    # via
    #   authlib
    #   google-auth
    #   msal
    #   msoffcrypto-tool
    #   pdfminer-six
    #   pyjwt
    #   secretstorage
    #   sendgrid
    #   unstructured-client
cyclopts==4.2.4
    # via fastmcp
dask==2026.1.1
    # via
    #   distributed
    #   onyx
dataclasses-json==0.6.7
    # via unstructured
dateparser==1.2.2
    # via htmldate
ddtrace==3.10.0
    # via onyx
decorator==5.2.1
    # via retry
defusedxml==0.7.1
    # via
    #   jira
    #   markitdown
deprecated==1.3.1
    # via
    #   atlassian-python-api
    #   pygithub
discord-py==2.4.0
    # via onyx
distributed==2026.1.1
    # via onyx
distro==1.9.0
    # via
    #   openai
    #   zulip
dnspython==2.8.0
    # via email-validator
docstring-parser==0.17.0
    # via cyclopts
docutils==0.22.3
    # via rich-rst
dropbox==12.0.2
    # via onyx
durationpy==0.10
    # via kubernetes
email-validator==2.2.0
    # via
    #   fastapi-users
    #   pydantic
emoji==2.15.0
    # via unstructured
envier==0.6.1
    # via ddtrace
et-xmlfile==2.0.0
    # via openpyxl
events==0.5
    # via opensearch-py
exa-py==1.15.4
    # via onyx
exceptiongroup==1.3.0
    # via
    #   braintrust
    #   fastmcp
fastapi==0.133.1
    # via
    #   fastapi-limiter
    #   fastapi-users
    #   onyx
fastapi-limiter==0.1.6
    # via onyx
fastapi-users==15.0.4
    # via
    #   fastapi-users-db-sqlalchemy
    #   onyx
fastapi-users-db-sqlalchemy==7.0.0
    # via onyx
fastavro==1.12.1
    # via cohere
fastmcp==3.2.0
    # via onyx
fastuuid==0.14.0
    # via litellm
filelock==3.20.3
    # via
    #   huggingface-hub
    #   onyx
filetype==1.2.0
    # via unstructured
flatbuffers==25.9.23
    # via onnxruntime
frozenlist==1.8.0
    # via
    #   aiohttp
    #   aiosignal
fsspec==2025.10.0
    # via
    #   dask
    #   huggingface-hub
gitdb==4.0.12
    # via gitpython
gitpython==3.1.45
    # via braintrust
google-api-core==2.28.1
    # via google-api-python-client
google-api-python-client==2.86.0
    # via onyx
google-auth==2.48.0
    # via
    #   google-api-core
    #   google-api-python-client
    #   google-auth-httplib2
    #   google-auth-oauthlib
    #   google-genai
    #   kubernetes
google-auth-httplib2==0.1.0
    # via
    #   google-api-python-client
    #   onyx
google-auth-oauthlib==1.0.0
    # via onyx
google-genai==1.52.0
    # via onyx
googleapis-common-protos==1.72.0
    # via
    #   google-api-core
    #   opentelemetry-exporter-otlp-proto-http
greenlet==3.2.4
    # via
    #   playwright
    #   sqlalchemy
h11==0.16.0
    # via
    #   httpcore
    #   uvicorn
h2==4.3.0
    # via httpx
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
hpack==4.1.0
    # via h2
html5lib==1.1
    # via unstructured
htmldate==1.9.1
    # via trafilatura
httpcore==1.0.9
    # via
    #   httpx
    #   onyx
    #   unstructured-client
httplib2==0.31.0
    # via
    #   google-api-python-client
    #   google-auth-httplib2
httpx==0.28.1
    # via
    #   cohere
    #   exa-py
    #   fastmcp
    #   google-genai
    #   httpx-oauth
    #   langfuse
    #   langsmith
    #   litellm
    #   mcp
    #   onyx
    #   openai
    #   unstructured-client
httpx-oauth==0.15.1
    # via onyx
httpx-sse==0.4.3
    # via
    #   cohere
    #   mcp
hubspot-api-client==11.1.0
    # via onyx
huggingface-hub==0.35.3
    # via
    #   onyx
    #   tokenizers
humanfriendly==10.0
    # via coloredlogs
hyperframe==6.1.0
    # via h2
idna==3.11
    # via
    #   anyio
    #   email-validator
    #   httpx
    #   requests
    #   yarl
importlib-metadata==8.7.0
    # via
    #   dask
    #   keyring
    #   litellm
    #   opentelemetry-api
inflection==0.5.1
    # via
    #   onyx
    #   pyairtable
iniconfig==2.3.0
    # via pytest
isodate==0.7.2
    # via
    #   python3-saml
    #   zeep
jaraco-classes==3.4.0
    # via keyring
jaraco-context==6.0.2
    # via keyring
jaraco-functools==4.4.0
    # via keyring
jeepney==0.9.0 ; sys_platform == 'linux'
    # via
    #   keyring
    #   secretstorage
jinja2==3.1.6
    # via
    #   distributed
    #   litellm
jira==3.10.5
    # via onyx
jiter==0.12.0
    # via openai
jmespath==1.0.1
    # via
    #   aiobotocore
    #   atlassian-python-api
    #   boto3
    #   botocore
joblib==1.5.2
    # via nltk
jsonpatch==1.33
    # via langchain-core
jsonpointer==3.0.0
    # via jsonpatch
jsonref==1.1.0
    # via
    #   fastmcp
    #   onyx
jsonschema==4.25.1
    # via
    #   litellm
    #   mcp
jsonschema-path==0.3.4
    # via fastmcp
jsonschema-specifications==2025.9.1
    # via jsonschema
justext==3.0.2
    # via trafilatura
keyring==25.7.0
    # via py-key-value-aio
kombu==5.5.4
    # via celery
kubernetes==31.0.0
    # via onyx
langchain-core==1.2.22
    # via onyx
langdetect==1.0.9
    # via unstructured
langfuse==3.10.0
    # via onyx
langsmith==0.3.45
    # via langchain-core
lazy-imports==1.0.1
    # via onyx
legacy-cgi==2.6.4 ; python_full_version >= '3.13'
    # via ddtrace
litellm==1.81.6
    # via onyx
locket==1.0.0
    # via
    #   distributed
    #   partd
lxml==5.3.0
    # via
    #   htmldate
    #   justext
    #   lxml-html-clean
    #   markitdown
    #   onyx
    #   python-docx
    #   python-pptx
    #   python3-saml
    #   trafilatura
    #   unstructured
    #   xmlsec
    #   zeep
lxml-html-clean==0.4.4
    # via lxml
magika==0.6.3
    # via markitdown
makefun==1.16.0
    # via fastapi-users
mako==1.2.4
    # via
    #   alembic
    #   onyx
mammoth==1.11.0
    # via markitdown
markdown-it-py==4.0.0
    # via rich
markdownify==1.2.2
    # via markitdown
markitdown==0.1.2
    # via onyx
markupsafe==3.0.3
    # via
    #   jinja2
    #   mako
    #   werkzeug
marshmallow==3.26.2
    # via dataclasses-json
matrix-client==0.3.2
    # via zulip
mcp==1.26.0
    # via
    #   claude-agent-sdk
    #   fastmcp
    #   onyx
mdurl==0.1.2
    # via markdown-it-py
mistune==3.2.0
    # via onyx
more-itertools==10.8.0
    # via
    #   jaraco-classes
    #   jaraco-functools
    #   simple-salesforce
mpmath==1.3.0
    # via sympy
msal==1.34.0
    # via
    #   office365-rest-python-client
    #   onyx
msgpack==1.1.2
    # via distributed
msoffcrypto-tool==5.4.2
    # via onyx
multidict==6.7.0
    # via
    #   aiobotocore
    #   aiohttp
    #   yarl
mwparserfromhell==0.7.2
    # via pywikibot
mypy==1.13.0
    # via sqlalchemy
mypy-boto3-s3==1.39.5
    # via boto3-stubs
mypy-extensions==1.0.0
    # via
    #   mypy
    #   typing-inspect
nest-asyncio==1.6.0
    # via onyx
nltk==3.9.4
    # via unstructured
numpy==2.4.1
    # via
    #   magika
    #   onnxruntime
    #   pandas
    #   shapely
    #   unstructured
    #   voyageai
oauthlib==3.2.2
    # via
    #   atlassian-python-api
    #   kubernetes
    #   onyx
    #   requests-oauthlib
office365-rest-python-client==2.6.2
    # via onyx
olefile==0.47
    # via
    #   msoffcrypto-tool
    #   python-oxmsg
onnxruntime==1.20.1
    # via magika
openai==2.14.0
    # via
    #   exa-py
    #   langfuse
    #   litellm
    #   onyx
openapi-pydantic==0.5.1
    # via fastmcp
openinference-instrumentation==0.1.42
    # via onyx
openinference-semantic-conventions==0.1.25
    # via openinference-instrumentation
openpyxl==3.0.10
    # via
    #   markitdown
    #   onyx
opensearch-py==3.0.0
    # via onyx
opentelemetry-api==1.39.1
    # via
    #   ddtrace
    #   fastmcp
    #   langfuse
    #   openinference-instrumentation
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-common==1.39.1
    # via opentelemetry-exporter-otlp-proto-http
opentelemetry-exporter-otlp-proto-http==1.39.1
    # via langfuse
opentelemetry-proto==1.39.1
    # via
    #   onyx
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-http
opentelemetry-sdk==1.39.1
    # via
    #   langfuse
    #   openinference-instrumentation
    #   opentelemetry-exporter-otlp-proto-http
opentelemetry-semantic-conventions==0.60b1
    # via opentelemetry-sdk
orjson==3.11.6 ; platform_python_implementation != 'PyPy'
    # via langsmith
packaging==24.2
    # via
    #   dask
    #   distributed
    #   fastmcp
    #   huggingface-hub
    #   jira
    #   kombu
    #   langchain-core
    #   langfuse
    #   langsmith
    #   marshmallow
    #   onnxruntime
    #   pytest
    #   pywikibot
pandas==2.3.3
    # via markitdown
parameterized==0.9.0
    # via cohere
partd==1.4.2
    # via dask
passlib==1.7.4
    # via onyx
pathable==0.4.4
    # via jsonschema-path
pdfminer-six==20251107
    # via markitdown
pillow==12.1.1
    # via python-pptx
platformdirs==4.5.0
    # via
    #   fastmcp
    #   zeep
playwright==1.55.0
    # via
    #   onyx
    #   pytest-playwright
pluggy==1.6.0
    # via pytest
ply==3.11
    # via stone
prometheus-client==0.23.1
    # via
    #   onyx
    #   prometheus-fastapi-instrumentator
prometheus-fastapi-instrumentator==7.1.0
    # via onyx
prompt-toolkit==3.0.52
    # via click-repl
propcache==0.4.1
    # via
    #   aiohttp
    #   yarl
proto-plus==1.26.1
    # via google-api-core
protobuf==6.33.5
    # via
    #   ddtrace
    #   google-api-core
    #   googleapis-common-protos
    #   onnxruntime
    #   opentelemetry-proto
    #   proto-plus
psutil==7.1.3
    # via
    #   distributed
    #   onyx
    #   unstructured
psycopg2-binary==2.9.9
    # via onyx
puremagic==1.28
    # via onyx
pwdlib==0.3.0
    # via fastapi-users
py==1.11.0
    # via retry
py-key-value-aio==0.4.4
    # via fastmcp
pyairtable==3.0.1
    # via onyx
pyasn1==0.6.3
    # via
    #   pyasn1-modules
    #   rsa
pyasn1-modules==0.4.2
    # via google-auth
pycparser==2.23 ; implementation_name != 'PyPy'
    # via cffi
pycryptodome==3.19.1
    # via onyx
pydantic==2.11.7
    # via
    #   agent-client-protocol
    #   cohere
    #   exa-py
    #   fastapi
    #   fastmcp
    #   google-genai
    #   langchain-core
    #   langfuse
    #   langsmith
    #   litellm
    #   mcp
    #   onyx
    #   openai
    #   openapi-pydantic
    #   pyairtable
    #   pydantic-settings
    #   unstructured-client
pydantic-core==2.33.2
    # via pydantic
pydantic-settings==2.12.0
    # via mcp
pyee==13.0.0
    # via playwright
pygithub==2.5.0
    # via onyx
pygments==2.20.0
    # via rich
pyjwt==2.12.0
    # via
    #   fastapi-users
    #   mcp
    #   msal
    #   pygithub
    #   simple-salesforce
pympler==1.1
    # via onyx
pynacl==1.6.2
    # via pygithub
pypandoc-binary==1.16.2
    # via onyx
pyparsing==3.2.5
    # via httplib2
pypdf==6.9.2
    # via
    #   onyx
    #   unstructured-client
pyperclip==1.11.0
    # via fastmcp
pyreadline3==3.5.4 ; sys_platform == 'win32'
    # via humanfriendly
pytest==8.3.5
    # via
    #   pytest-base-url
    #   pytest-mock
    #   pytest-playwright
pytest-base-url==2.1.0
    # via pytest-playwright
pytest-mock==3.12.0
    # via onyx
pytest-playwright==0.7.0
    # via onyx
python-dateutil==2.8.2
    # via
    #   aiobotocore
    #   asana
    #   botocore
    #   celery
    #   dateparser
    #   htmldate
    #   hubspot-api-client
    #   kubernetes
    #   onyx
    #   opensearch-py
    #   pandas
python-docx==1.1.2
    # via onyx
python-dotenv==1.1.1
    # via
    #   braintrust
    #   fastmcp
    #   litellm
    #   magika
    #   mcp
    #   onyx
    #   pydantic-settings
python-gitlab==5.6.0
    # via onyx
python-http-client==3.3.7
    # via sendgrid
python-iso639==2025.11.16
    # via unstructured
python-magic==0.4.27
    # via unstructured
python-multipart==0.0.22
    # via
    #   fastapi-users
    #   mcp
    #   onyx
python-oxmsg==0.0.2
    # via unstructured
python-pptx==0.6.23
    # via
    #   markitdown
    #   onyx
python-slugify==8.0.4
    # via
    #   braintrust
    #   pytest-playwright
python3-saml==1.15.0
    # via onyx
pytz==2025.2
    # via
    #   dateparser
    #   office365-rest-python-client
    #   pandas
    #   zeep
pywikibot==9.0.0
    # via onyx
pywin32==311 ; sys_platform == 'win32'
    # via
    #   mcp
    #   pympler
pywin32-ctypes==0.2.3 ; sys_platform == 'win32'
    # via keyring
pyyaml==6.0.3
    # via
    #   dask
    #   distributed
    #   fastmcp
    #   huggingface-hub
    #   jsonschema-path
    #   kubernetes
    #   langchain-core
rapidfuzz==3.13.0
    # via
    #   onyx
    #   unstructured
redis==5.0.8
    # via
    #   fastapi-limiter
    #   onyx
referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-path
    #   jsonschema-specifications
regex==2025.11.3
    # via
    #   dateparser
    #   nltk
    #   tiktoken
requests==2.33.0
    # via
    #   atlassian-python-api
    #   braintrust
    #   cohere
    #   dropbox
    #   exa-py
    #   google-api-core
    #   google-genai
    #   hubspot-api-client
    #   huggingface-hub
    #   jira
    #   jsonschema-path
    #   kubernetes
    #   langfuse
    #   langsmith
    #   markitdown
    #   matrix-client
    #   msal
    #   office365-rest-python-client
    #   onyx
    #   opensearch-py
    #   opentelemetry-exporter-otlp-proto-http
    #   pyairtable
    #   pygithub
    #   pytest-base-url
    #   python-gitlab
    #   pywikibot
    #   requests-file
    #   requests-oauthlib
    #   requests-toolbelt
    #   simple-salesforce
    #   stripe
    #   tiktoken
    #   unstructured
    #   voyageai
    #   zeep
    #   zulip
requests-file==3.0.1
    # via zeep
requests-oauthlib==1.3.1
    # via
    #   atlassian-python-api
    #   google-auth-oauthlib
    #   jira
    #   kubernetes
    #   onyx
requests-toolbelt==1.0.0
    # via
    #   jira
    #   langsmith
    #   python-gitlab
    #   unstructured-client
    #   zeep
retry==0.9.2
    # via onyx
rfc3986==1.5.0
    # via onyx
rich==14.2.0
    # via
    #   cyclopts
    #   fastmcp
    #   rich-rst
    #   typer
rich-rst==1.3.2
    # via cyclopts
rpds-py==0.29.0
    # via
    #   jsonschema
    #   referencing
rsa==4.9.1
    # via google-auth
s3transfer==0.13.1
    # via boto3
secretstorage==3.5.0 ; sys_platform == 'linux'
    # via keyring
sendgrid==6.12.5
    # via onyx
sentry-sdk==2.14.0
    # via onyx
shapely==2.0.6
    # via onyx
shellingham==1.5.4
    # via typer
simple-salesforce==1.12.6
    # via onyx
six==1.17.0
    # via
    #   asana
    #   atlassian-python-api
    #   dropbox
    #   google-auth-httplib2
    #   html5lib
    #   hubspot-api-client
    #   kubernetes
    #   langdetect
    #   markdownify
    #   python-dateutil
    #   stone
slack-sdk==3.20.2
    # via onyx
smmap==5.0.2
    # via gitdb
sniffio==1.3.1
    # via
    #   anyio
    #   openai
sortedcontainers==2.4.0
    # via distributed
soupsieve==2.8
    # via beautifulsoup4
sqlalchemy==2.0.15
    # via
    #   alembic
    #   fastapi-users-db-sqlalchemy
    #   onyx
sse-starlette==3.0.3
    # via mcp
sseclient-py==1.8.0
    # via braintrust
starlette==0.49.3
    # via
    #   fastapi
    #   mcp
    #   onyx
    #   prometheus-fastapi-instrumentator
stone==3.3.1
    # via dropbox
stripe==10.12.0
    # via onyx
supervisor==4.3.0
    # via onyx
sympy==1.14.0
    # via onnxruntime
tblib==3.2.2
    # via distributed
tenacity==9.1.2
    # via
    #   google-genai
    #   langchain-core
    #   voyageai
text-unidecode==1.3
    # via python-slugify
tiktoken==0.7.0
    # via
    #   litellm
    #   onyx
timeago==1.0.16
    # via onyx
tld==0.13.1
    # via courlan
tokenizers==0.21.4
    # via
    #   chonkie
    #   cohere
    #   litellm
toolz==1.1.0
    # via
    #   dask
    #   distributed
    #   partd
tornado==6.5.5
    # via distributed
tqdm==4.67.1
    # via
    #   braintrust
    #   chonkie
    #   huggingface-hub
    #   nltk
    #   openai
    #   unstructured
trafilatura==1.12.2
    # via onyx
typer==0.20.0
    # via mcp
types-awscrt==0.28.4
    # via botocore-stubs
types-openpyxl==3.0.4.7
    # via onyx
types-requests==2.32.0.20250328
    # via cohere
types-s3transfer==0.14.0
    # via boto3-stubs
typing-extensions==4.15.0
    # via
    #   aiosignal
    #   alembic
    #   anyio
    #   boto3-stubs
    #   braintrust
    #   cohere
    #   ddtrace
    #   exa-py
    #   exceptiongroup
    #   fastapi
    #   google-genai
    #   huggingface-hub
    #   jira
    #   langchain-core
    #   mcp
    #   mypy
    #   mypy-boto3-s3
    #   office365-rest-python-client
    #   openai
    #   opentelemetry-api
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
    #   py-key-value-aio
    #   pyairtable
    #   pydantic
    #   pydantic-core
    #   pyee
    #   pygithub
    #   python-docx
    #   python-oxmsg
    #   referencing
    #   simple-salesforce
    #   sqlalchemy
    #   starlette
    #   stripe
    #   typer
    #   typing-inspect
    #   typing-inspection
    #   unstructured
    #   zulip
typing-inspect==0.9.0
    # via dataclasses-json
typing-inspection==0.4.2
    # via
    #   fastapi
    #   mcp
    #   pydantic
    #   pydantic-settings
tzdata==2025.2
    # via
    #   kombu
    #   pandas
    #   tzlocal
tzlocal==5.3.1
    # via dateparser
uncalled-for==0.2.0
    # via fastmcp
unstructured==0.18.27
    # via onyx
unstructured-client==0.42.6
    # via
    #   onyx
    #   unstructured
uritemplate==4.2.0
    # via google-api-python-client
urllib3==2.6.3
    # via
    #   asana
    #   botocore
    #   courlan
    #   distributed
    #   htmldate
    #   hubspot-api-client
    #   kubernetes
    #   onyx
    #   opensearch-py
    #   pyairtable
    #   pygithub
    #   requests
    #   sentry-sdk
    #   trafilatura
    #   types-requests
uuid-utils==0.14.0
    # via langchain-core
uvicorn==0.35.0
    # via
    #   fastmcp
    #   mcp
    #   onyx
vine==5.1.0
    # via
    #   amqp
    #   celery
    #   kombu
voyageai==0.2.3
    # via onyx
watchfiles==1.1.1
    # via fastmcp
wcwidth==0.2.14
    # via prompt-toolkit
webencodings==0.5.1
    # via html5lib
websocket-client==1.9.0
    # via kubernetes
websockets==15.0.1
    # via
    #   fastmcp
    #   google-genai
werkzeug==3.1.6
    # via sendgrid
wrapt==1.17.3
    # via
    #   aiobotocore
    #   braintrust
    #   ddtrace
    #   deprecated
    #   langfuse
    #   openinference-instrumentation
    #   unstructured
xlrd==2.0.2
    # via markitdown
xlsxwriter==3.2.9
    # via python-pptx
xmlsec==1.3.14
    # via
    #   onyx
    #   python3-saml
xmltodict==1.0.2
    # via ddtrace
yarl==1.22.0
    # via aiohttp
zeep==4.3.2
    # via simple-salesforce
zict==3.0.0
    # via distributed
zipp==3.23.0
    # via importlib-metadata
zstandard==0.23.0
    # via langsmith
zulip==0.8.2
    # via onyx


================================================
FILE: backend/requirements/dev.txt
================================================
# This file was autogenerated by uv via the following command:
#    uv export --no-emit-project --no-default-groups --no-hashes --extra dev -o backend/requirements/dev.txt
agent-client-protocol==0.7.1
    # via onyx
aioboto3==15.1.0
    # via onyx
aiobotocore==2.24.0
    # via aioboto3
aiofiles==25.1.0
    # via aioboto3
aiohappyeyeballs==2.6.1
    # via aiohttp
aiohttp==3.13.4
    # via
    #   aiobotocore
    #   discord-py
    #   litellm
    #   voyageai
aioitertools==0.13.0
    # via aiobotocore
aiolimiter==1.2.1
    # via voyageai
aiosignal==1.4.0
    # via aiohttp
alembic==1.10.4
    # via pytest-alembic
annotated-doc==0.0.4
    # via fastapi
annotated-types==0.7.0
    # via pydantic
anyio==4.11.0
    # via
    #   claude-agent-sdk
    #   google-genai
    #   httpx
    #   mcp
    #   openai
    #   sse-starlette
    #   starlette
appnope==0.1.4 ; sys_platform == 'darwin'
    # via ipykernel
asttokens==3.0.1
    # via stack-data
attrs==25.4.0
    # via
    #   aiohttp
    #   jsonschema
    #   referencing
black==25.1.0
    # via onyx
boto3==1.39.11
    # via
    #   aiobotocore
    #   cohere
botocore==1.39.11
    # via
    #   aiobotocore
    #   boto3
    #   s3transfer
brotli==1.2.0
    # via onyx
celery-types==0.19.0
    # via onyx
certifi==2025.11.12
    # via
    #   httpcore
    #   httpx
    #   kubernetes
    #   requests
    #   sentry-sdk
cffi==2.0.0 ; implementation_name == 'pypy' or platform_python_implementation != 'PyPy'
    # via
    #   cryptography
    #   pyzmq
cfgv==3.4.0
    # via pre-commit
charset-normalizer==3.4.4
    # via requests
classify-imports==4.2.0
    # via reorder-python-imports-black
claude-agent-sdk==0.1.19
    # via onyx
click==8.3.1
    # via
    #   black
    #   litellm
    #   uvicorn
cohere==5.6.1
    # via onyx
colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   ipython
    #   pytest
    #   tqdm
comm==0.2.3
    # via ipykernel
contourpy==1.3.3
    # via matplotlib
cryptography==46.0.6
    # via
    #   google-auth
    #   pyjwt
cycler==0.12.1
    # via matplotlib
debugpy==1.8.17
    # via ipykernel
decorator==5.2.1
    # via
    #   ipython
    #   retry
discord-py==2.4.0
    # via onyx
distlib==0.4.0
    # via virtualenv
distro==1.9.0
    # via openai
durationpy==0.10
    # via kubernetes
execnet==2.1.2
    # via pytest-xdist
executing==2.2.1
    # via stack-data
faker==40.1.2
    # via onyx
fastapi==0.133.1
    # via
    #   onyx
    #   onyx-devtools
fastavro==1.12.1
    # via cohere
fastuuid==0.14.0
    # via litellm
filelock==3.20.3
    # via
    #   huggingface-hub
    #   virtualenv
fonttools==4.61.1
    # via matplotlib
frozenlist==1.8.0
    # via
    #   aiohttp
    #   aiosignal
fsspec==2025.10.0
    # via huggingface-hub
google-auth==2.48.0
    # via
    #   google-genai
    #   kubernetes
google-genai==1.52.0
    # via onyx
greenlet==3.2.4 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'
    # via sqlalchemy
h11==0.16.0
    # via
    #   httpcore
    #   uvicorn
hatchling==1.28.0
    # via onyx
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
httpcore==1.0.9
    # via httpx
httpx==0.28.1
    # via
    #   cohere
    #   google-genai
    #   litellm
    #   mcp
    #   openai
httpx-sse==0.4.3
    # via
    #   cohere
    #   mcp
huggingface-hub==0.35.3
    # via tokenizers
identify==2.6.15
    # via pre-commit
idna==3.11
    # via
    #   anyio
    #   httpx
    #   requests
    #   yarl
importlib-metadata==8.7.0
    # via litellm
iniconfig==2.3.0
    # via pytest
ipykernel==6.29.5
    # via onyx
ipython==9.7.0
    # via ipykernel
ipython-pygments-lexers==1.1.1
    # via ipython
jedi==0.19.2
    # via ipython
jinja2==3.1.6
    # via litellm
jiter==0.12.0
    # via openai
jmespath==1.0.1
    # via
    #   aiobotocore
    #   boto3
    #   botocore
jsonschema==4.25.1
    # via
    #   litellm
    #   mcp
jsonschema-specifications==2025.9.1
    # via jsonschema
jupyter-client==8.6.3
    # via ipykernel
jupyter-core==5.9.1
    # via
    #   ipykernel
    #   jupyter-client
kiwisolver==1.4.9
    # via matplotlib
kubernetes==31.0.0
    # via onyx
litellm==1.81.6
    # via onyx
mako==1.2.4
    # via alembic
manygo==0.2.0
    # via onyx
markupsafe==3.0.3
    # via
    #   jinja2
    #   mako
matplotlib==3.10.8
    # via onyx
matplotlib-inline==0.2.1
    # via
    #   ipykernel
    #   ipython
mcp==1.26.0
    # via claude-agent-sdk
multidict==6.7.0
    # via
    #   aiobotocore
    #   aiohttp
    #   yarl
mypy==1.13.0
    # via onyx
mypy-extensions==1.0.0
    # via
    #   black
    #   mypy
    #   onyx
nest-asyncio==1.6.0
    # via ipykernel
nodeenv==1.9.1
    # via pre-commit
numpy==2.4.1
    # via
    #   contourpy
    #   matplotlib
    #   pandas-stubs
    #   voyageai
oauthlib==3.2.2
    # via
    #   kubernetes
    #   requests-oauthlib
onyx-devtools==0.7.2
    # via onyx
openai==2.14.0
    # via
    #   litellm
    #   onyx
openapi-generator-cli==7.17.0
    # via
    #   onyx
    #   onyx-devtools
packaging==24.2
    # via
    #   black
    #   hatchling
    #   huggingface-hub
    #   ipykernel
    #   matplotlib
    #   pytest
pandas-stubs==2.3.3.251201
    # via onyx
parameterized==0.9.0
    # via cohere
parso==0.8.5
    # via jedi
pathspec==0.12.1
    # via
    #   black
    #   hatchling
pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
    # via ipython
pillow==12.1.1
    # via matplotlib
platformdirs==4.5.0
    # via
    #   black
    #   jupyter-core
    #   virtualenv
pluggy==1.6.0
    # via
    #   hatchling
    #   pytest
pre-commit==3.2.2
    # via onyx
prometheus-client==0.23.1
    # via
    #   onyx
    #   prometheus-fastapi-instrumentator
prometheus-fastapi-instrumentator==7.1.0
    # via onyx
prompt-toolkit==3.0.52
    # via ipython
propcache==0.4.1
    # via
    #   aiohttp
    #   yarl
psutil==7.1.3
    # via ipykernel
ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
    # via pexpect
pure-eval==0.2.3
    # via stack-data
py==1.11.0
    # via retry
pyasn1==0.6.3
    # via
    #   pyasn1-modules
    #   rsa
pyasn1-modules==0.4.2
    # via google-auth
pycparser==2.23 ; (implementation_name != 'PyPy' and platform_python_implementation != 'PyPy') or (implementation_name == 'pypy' and platform_python_implementation == 'PyPy')
    # via cffi
pydantic==2.11.7
    # via
    #   agent-client-protocol
    #   cohere
    #   fastapi
    #   google-genai
    #   litellm
    #   mcp
    #   onyx
    #   openai
    #   pydantic-settings
pydantic-core==2.33.2
    # via pydantic
pydantic-settings==2.12.0
    # via mcp
pygments==2.20.0
    # via
    #   ipython
    #   ipython-pygments-lexers
pyjwt==2.12.0
    # via mcp
pyparsing==3.2.5
    # via matplotlib
pytest==8.3.5
    # via
    #   onyx
    #   pytest-alembic
    #   pytest-asyncio
    #   pytest-dotenv
    #   pytest-repeat
    #   pytest-xdist
pytest-alembic==0.12.1
    # via onyx
pytest-asyncio==1.3.0
    # via onyx
pytest-dotenv==0.5.2
    # via onyx
pytest-repeat==0.9.4
    # via onyx
pytest-xdist==3.8.0
    # via onyx
python-dateutil==2.8.2
    # via
    #   aiobotocore
    #   botocore
    #   jupyter-client
    #   kubernetes
    #   matplotlib
python-dotenv==1.1.1
    # via
    #   litellm
    #   pydantic-settings
    #   pytest-dotenv
python-multipart==0.0.22
    # via mcp
pywin32==311 ; sys_platform == 'win32'
    # via mcp
pyyaml==6.0.3
    # via
    #   huggingface-hub
    #   kubernetes
    #   pre-commit
pyzmq==27.1.0
    # via
    #   ipykernel
    #   jupyter-client
referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-specifications
regex==2025.11.3
    # via tiktoken
release-tag==0.5.2
    # via onyx
reorder-python-imports-black==3.14.0
    # via onyx
requests==2.33.0
    # via
    #   cohere
    #   google-genai
    #   huggingface-hub
    #   kubernetes
    #   requests-oauthlib
    #   tiktoken
    #   voyageai
requests-oauthlib==1.3.1
    # via kubernetes
retry==0.9.2
    # via onyx
rpds-py==0.29.0
    # via
    #   jsonschema
    #   referencing
rsa==4.9.1
    # via google-auth
ruff==0.12.0
    # via onyx
s3transfer==0.13.1
    # via boto3
sentry-sdk==2.14.0
    # via onyx
six==1.17.0
    # via
    #   kubernetes
    #   python-dateutil
sniffio==1.3.1
    # via
    #   anyio
    #   openai
sqlalchemy==2.0.15
    # via
    #   alembic
    #   pytest-alembic
sse-starlette==3.0.3
    # via mcp
stack-data==0.6.3
    # via ipython
starlette==0.49.3
    # via
    #   fastapi
    #   mcp
    #   prometheus-fastapi-instrumentator
tenacity==9.1.2
    # via
    #   google-genai
    #   voyageai
tiktoken==0.7.0
    # via litellm
tokenizers==0.21.4
    # via
    #   cohere
    #   litellm
tornado==6.5.5
    # via
    #   ipykernel
    #   jupyter-client
tqdm==4.67.1
    # via
    #   huggingface-hub
    #   openai
traitlets==5.14.3
    # via
    #   ipykernel
    #   ipython
    #   jupyter-client
    #   jupyter-core
    #   matplotlib-inline
trove-classifiers==2025.12.1.14
    # via hatchling
types-beautifulsoup4==4.12.0.3
    # via onyx
types-html5lib==1.1.11.13
    # via
    #   onyx
    #   types-beautifulsoup4
types-oauthlib==3.2.0.9
    # via onyx
types-passlib==1.7.7.20240106
    # via onyx
types-pillow==10.2.0.20240822
    # via onyx
types-psutil==7.1.3.20251125
    # via onyx
types-psycopg2==2.9.21.10
    # via onyx
types-python-dateutil==2.8.19.13
    # via onyx
types-pytz==2023.3.1.1
    # via
    #   onyx
    #   pandas-stubs
types-pyyaml==6.0.12.11
    # via onyx
types-regex==2023.3.23.1
    # via onyx
types-requests==2.32.0.20250328
    # via
    #   cohere
    #   onyx
types-retry==0.9.9.3
    # via onyx
types-setuptools==68.0.0.3
    # via onyx
typing-extensions==4.15.0
    # via
    #   aiosignal
    #   alembic
    #   anyio
    #   celery-types
    #   cohere
    #   fastapi
    #   google-genai
    #   huggingface-hub
    #   ipython
    #   mcp
    #   mypy
    #   openai
    #   pydantic
    #   pydantic-core
    #   pytest-asyncio
    #   referencing
    #   sqlalchemy
    #   starlette
    #   typing-inspection
typing-inspection==0.4.2
    # via
    #   fastapi
    #   mcp
    #   pydantic
    #   pydantic-settings
tzdata==2025.2 ; sys_platform == 'win32'
    # via faker
urllib3==2.6.3
    # via
    #   botocore
    #   kubernetes
    #   requests
    #   sentry-sdk
    #   types-requests
uvicorn==0.35.0
    # via
    #   mcp
    #   onyx
virtualenv==20.36.1
    # via pre-commit
voyageai==0.2.3
    # via onyx
wcwidth==0.2.14
    # via prompt-toolkit
websocket-client==1.9.0
    # via kubernetes
websockets==15.0.1
    # via google-genai
wrapt==1.17.3
    # via aiobotocore
yarl==1.22.0
    # via aiohttp
zipp==3.23.0
    # via importlib-metadata
zizmor==1.18.0
    # via onyx


================================================
FILE: backend/requirements/ee.txt
================================================
# This file was autogenerated by uv via the following command:
#    uv export --no-emit-project --no-default-groups --no-hashes --extra ee -o backend/requirements/ee.txt
agent-client-protocol==0.7.1
    # via onyx
aioboto3==15.1.0
    # via onyx
aiobotocore==2.24.0
    # via aioboto3
aiofiles==25.1.0
    # via aioboto3
aiohappyeyeballs==2.6.1
    # via aiohttp
aiohttp==3.13.4
    # via
    #   aiobotocore
    #   discord-py
    #   litellm
    #   voyageai
aioitertools==0.13.0
    # via aiobotocore
aiolimiter==1.2.1
    # via voyageai
aiosignal==1.4.0
    # via aiohttp
annotated-doc==0.0.4
    # via fastapi
annotated-types==0.7.0
    # via pydantic
anyio==4.11.0
    # via
    #   claude-agent-sdk
    #   google-genai
    #   httpx
    #   mcp
    #   openai
    #   sse-starlette
    #   starlette
attrs==25.4.0
    # via
    #   aiohttp
    #   jsonschema
    #   referencing
backoff==2.2.1
    # via posthog
boto3==1.39.11
    # via
    #   aiobotocore
    #   cohere
botocore==1.39.11
    # via
    #   aiobotocore
    #   boto3
    #   s3transfer
brotli==1.2.0
    # via onyx
certifi==2025.11.12
    # via
    #   httpcore
    #   httpx
    #   kubernetes
    #   requests
    #   sentry-sdk
cffi==2.0.0 ; platform_python_implementation != 'PyPy'
    # via cryptography
charset-normalizer==3.4.4
    # via requests
claude-agent-sdk==0.1.19
    # via onyx
click==8.3.1
    # via
    #   litellm
    #   uvicorn
cohere==5.6.1
    # via onyx
colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
cryptography==46.0.6
    # via
    #   google-auth
    #   pyjwt
decorator==5.2.1
    # via retry
discord-py==2.4.0
    # via onyx
distro==1.9.0
    # via openai
durationpy==0.10
    # via kubernetes
fastapi==0.133.1
    # via onyx
fastavro==1.12.1
    # via cohere
fastuuid==0.14.0
    # via litellm
filelock==3.20.3
    # via huggingface-hub
frozenlist==1.8.0
    # via
    #   aiohttp
    #   aiosignal
fsspec==2025.10.0
    # via huggingface-hub
google-auth==2.48.0
    # via
    #   google-genai
    #   kubernetes
google-genai==1.52.0
    # via onyx
h11==0.16.0
    # via
    #   httpcore
    #   uvicorn
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
httpcore==1.0.9
    # via httpx
httpx==0.28.1
    # via
    #   cohere
    #   google-genai
    #   litellm
    #   mcp
    #   openai
httpx-sse==0.4.3
    # via
    #   cohere
    #   mcp
huggingface-hub==0.35.3
    # via tokenizers
idna==3.11
    # via
    #   anyio
    #   httpx
    #   requests
    #   yarl
importlib-metadata==8.7.0
    # via litellm
jinja2==3.1.6
    # via litellm
jiter==0.12.0
    # via openai
jmespath==1.0.1
    # via
    #   aiobotocore
    #   boto3
    #   botocore
jsonschema==4.25.1
    # via
    #   litellm
    #   mcp
jsonschema-specifications==2025.9.1
    # via jsonschema
kubernetes==31.0.0
    # via onyx
litellm==1.81.6
    # via onyx
markupsafe==3.0.3
    # via jinja2
mcp==1.26.0
    # via claude-agent-sdk
monotonic==1.6
    # via posthog
multidict==6.7.0
    # via
    #   aiobotocore
    #   aiohttp
    #   yarl
numpy==2.4.1
    # via voyageai
oauthlib==3.2.2
    # via
    #   kubernetes
    #   requests-oauthlib
openai==2.14.0
    # via
    #   litellm
    #   onyx
packaging==24.2
    # via huggingface-hub
parameterized==0.9.0
    # via cohere
posthog==3.7.4
    # via onyx
prometheus-client==0.23.1
    # via
    #   onyx
    #   prometheus-fastapi-instrumentator
prometheus-fastapi-instrumentator==7.1.0
    # via onyx
propcache==0.4.1
    # via
    #   aiohttp
    #   yarl
py==1.11.0
    # via retry
pyasn1==0.6.3
    # via
    #   pyasn1-modules
    #   rsa
pyasn1-modules==0.4.2
    # via google-auth
pycparser==2.23 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
    # via cffi
pydantic==2.11.7
    # via
    #   agent-client-protocol
    #   cohere
    #   fastapi
    #   google-genai
    #   litellm
    #   mcp
    #   onyx
    #   openai
    #   pydantic-settings
pydantic-core==2.33.2
    # via pydantic
pydantic-settings==2.12.0
    # via mcp
pyjwt==2.12.0
    # via mcp
python-dateutil==2.8.2
    # via
    #   aiobotocore
    #   botocore
    #   kubernetes
    #   posthog
python-dotenv==1.1.1
    # via
    #   litellm
    #   pydantic-settings
python-multipart==0.0.22
    # via mcp
pywin32==311 ; sys_platform == 'win32'
    # via mcp
pyyaml==6.0.3
    # via
    #   huggingface-hub
    #   kubernetes
referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-specifications
regex==2025.11.3
    # via tiktoken
requests==2.33.0
    # via
    #   cohere
    #   google-genai
    #   huggingface-hub
    #   kubernetes
    #   posthog
    #   requests-oauthlib
    #   tiktoken
    #   voyageai
requests-oauthlib==1.3.1
    # via kubernetes
retry==0.9.2
    # via onyx
rpds-py==0.29.0
    # via
    #   jsonschema
    #   referencing
rsa==4.9.1
    # via google-auth
s3transfer==0.13.1
    # via boto3
sentry-sdk==2.14.0
    # via onyx
six==1.17.0
    # via
    #   kubernetes
    #   posthog
    #   python-dateutil
sniffio==1.3.1
    # via
    #   anyio
    #   openai
sse-starlette==3.0.3
    # via mcp
starlette==0.49.3
    # via
    #   fastapi
    #   mcp
    #   prometheus-fastapi-instrumentator
tenacity==9.1.2
    # via
    #   google-genai
    #   voyageai
tiktoken==0.7.0
    # via litellm
tokenizers==0.21.4
    # via
    #   cohere
    #   litellm
tqdm==4.67.1
    # via
    #   huggingface-hub
    #   openai
types-requests==2.32.0.20250328
    # via cohere
typing-extensions==4.15.0
    # via
    #   aiosignal
    #   anyio
    #   cohere
    #   fastapi
    #   google-genai
    #   huggingface-hub
    #   mcp
    #   openai
    #   pydantic
    #   pydantic-core
    #   referencing
    #   starlette
    #   typing-inspection
typing-inspection==0.4.2
    # via
    #   fastapi
    #   mcp
    #   pydantic
    #   pydantic-settings
urllib3==2.6.3
    # via
    #   botocore
    #   kubernetes
    #   requests
    #   sentry-sdk
    #   types-requests
uvicorn==0.35.0
    # via
    #   mcp
    #   onyx
voyageai==0.2.3
    # via onyx
websocket-client==1.9.0
    # via kubernetes
websockets==15.0.1
    # via google-genai
wrapt==1.17.3
    # via aiobotocore
yarl==1.22.0
    # via aiohttp
zipp==3.23.0
    # via importlib-metadata


================================================
FILE: backend/requirements/model_server.txt
================================================
# This file was autogenerated by uv via the following command:
#    uv export --no-emit-project --no-default-groups --no-hashes --extra model_server -o backend/requirements/model_server.txt
accelerate==1.6.0
    # via onyx
agent-client-protocol==0.7.1
    # via onyx
aioboto3==15.1.0
    # via onyx
aiobotocore==2.24.0
    # via aioboto3
aiofiles==25.1.0
    # via aioboto3
aiohappyeyeballs==2.6.1
    # via aiohttp
aiohttp==3.13.4
    # via
    #   aiobotocore
    #   discord-py
    #   litellm
    #   voyageai
aioitertools==0.13.0
    # via aiobotocore
aiolimiter==1.2.1
    # via voyageai
aiosignal==1.4.0
    # via aiohttp
amqp==5.3.1
    # via kombu
annotated-doc==0.0.4
    # via fastapi
annotated-types==0.7.0
    # via pydantic
anyio==4.11.0
    # via
    #   claude-agent-sdk
    #   google-genai
    #   httpx
    #   mcp
    #   openai
    #   sse-starlette
    #   starlette
attrs==25.4.0
    # via
    #   aiohttp
    #   jsonschema
    #   referencing
billiard==4.2.3
    # via celery
boto3==1.39.11
    # via
    #   aiobotocore
    #   cohere
botocore==1.39.11
    # via
    #   aiobotocore
    #   boto3
    #   s3transfer
brotli==1.2.0
    # via onyx
celery==5.5.1
    # via sentry-sdk
certifi==2025.11.12
    # via
    #   httpcore
    #   httpx
    #   kubernetes
    #   requests
    #   sentry-sdk
cffi==2.0.0 ; platform_python_implementation != 'PyPy'
    # via cryptography
charset-normalizer==3.4.4
    # via requests
claude-agent-sdk==0.1.19
    # via onyx
click==8.3.1
    # via
    #   celery
    #   click-didyoumean
    #   click-plugins
    #   click-repl
    #   litellm
    #   uvicorn
click-didyoumean==0.3.1
    # via celery
click-plugins==1.1.1.2
    # via celery
click-repl==0.3.0
    # via celery
cohere==5.6.1
    # via onyx
colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
cryptography==46.0.6
    # via
    #   google-auth
    #   pyjwt
decorator==5.2.1
    # via retry
discord-py==2.4.0
    # via onyx
distro==1.9.0
    # via openai
durationpy==0.10
    # via kubernetes
einops==0.8.1
    # via onyx
fastapi==0.133.1
    # via
    #   onyx
    #   sentry-sdk
fastavro==1.12.1
    # via cohere
fastuuid==0.14.0
    # via litellm
filelock==3.20.3
    # via
    #   huggingface-hub
    #   torch
    #   transformers
frozenlist==1.8.0
    # via
    #   aiohttp
    #   aiosignal
fsspec==2025.10.0
    # via
    #   huggingface-hub
    #   torch
google-auth==2.48.0
    # via
    #   google-genai
    #   kubernetes
google-genai==1.52.0
    # via onyx
h11==0.16.0
    # via
    #   httpcore
    #   uvicorn
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
httpcore==1.0.9
    # via httpx
httpx==0.28.1
    # via
    #   cohere
    #   google-genai
    #   litellm
    #   mcp
    #   openai
httpx-sse==0.4.3
    # via
    #   cohere
    #   mcp
huggingface-hub==0.35.3
    # via
    #   accelerate
    #   sentence-transformers
    #   tokenizers
    #   transformers
idna==3.11
    # via
    #   anyio
    #   httpx
    #   requests
    #   yarl
importlib-metadata==8.7.0
    # via litellm
jinja2==3.1.6
    # via
    #   litellm
    #   torch
jiter==0.12.0
    # via openai
jmespath==1.0.1
    # via
    #   aiobotocore
    #   boto3
    #   botocore
joblib==1.5.2
    # via scikit-learn
jsonschema==4.25.1
    # via
    #   litellm
    #   mcp
jsonschema-specifications==2025.9.1
    # via jsonschema
kombu==5.5.4
    # via celery
kubernetes==31.0.0
    # via onyx
litellm==1.81.6
    # via onyx
markupsafe==3.0.3
    # via jinja2
mcp==1.26.0
    # via claude-agent-sdk
mpmath==1.3.0
    # via sympy
multidict==6.7.0
    # via
    #   aiobotocore
    #   aiohttp
    #   yarl
networkx==3.5
    # via torch
numpy==2.4.1
    # via
    #   accelerate
    #   onyx
    #   scikit-learn
    #   scipy
    #   transformers
    #   voyageai
nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via
    #   nvidia-cudnn-cu12
    #   nvidia-cusolver-cu12
    #   torch
nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via
    #   nvidia-cusolver-cu12
    #   torch
nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via
    #   nvidia-cufft-cu12
    #   nvidia-cusolver-cu12
    #   nvidia-cusparse-cu12
    #   torch
nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
oauthlib==3.2.2
    # via
    #   kubernetes
    #   requests-oauthlib
openai==2.14.0
    # via
    #   litellm
    #   onyx
packaging==24.2
    # via
    #   accelerate
    #   huggingface-hub
    #   kombu
    #   transformers
parameterized==0.9.0
    # via cohere
pillow==12.1.1
    # via sentence-transformers
prometheus-client==0.23.1
    # via
    #   onyx
    #   prometheus-fastapi-instrumentator
prometheus-fastapi-instrumentator==7.1.0
    # via onyx
prompt-toolkit==3.0.52
    # via click-repl
propcache==0.4.1
    # via
    #   aiohttp
    #   yarl
psutil==7.1.3
    # via accelerate
py==1.11.0
    # via retry
pyasn1==0.6.3
    # via
    #   pyasn1-modules
    #   rsa
pyasn1-modules==0.4.2
    # via google-auth
pycparser==2.23 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
    # via cffi
pydantic==2.11.7
    # via
    #   agent-client-protocol
    #   cohere
    #   fastapi
    #   google-genai
    #   litellm
    #   mcp
    #   onyx
    #   openai
    #   pydantic-settings
pydantic-core==2.33.2
    # via pydantic
pydantic-settings==2.12.0
    # via mcp
pyjwt==2.12.0
    # via mcp
python-dateutil==2.8.2
    # via
    #   aiobotocore
    #   botocore
    #   celery
    #   kubernetes
python-dotenv==1.1.1
    # via
    #   litellm
    #   pydantic-settings
python-multipart==0.0.22
    # via mcp
pywin32==311 ; sys_platform == 'win32'
    # via mcp
pyyaml==6.0.3
    # via
    #   accelerate
    #   huggingface-hub
    #   kubernetes
    #   transformers
referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-specifications
regex==2025.11.3
    # via
    #   tiktoken
    #   transformers
requests==2.33.0
    # via
    #   cohere
    #   google-genai
    #   huggingface-hub
    #   kubernetes
    #   requests-oauthlib
    #   tiktoken
    #   transformers
    #   voyageai
requests-oauthlib==1.3.1
    # via kubernetes
retry==0.9.2
    # via onyx
rpds-py==0.29.0
    # via
    #   jsonschema
    #   referencing
rsa==4.9.1
    # via google-auth
s3transfer==0.13.1
    # via boto3
safetensors==0.5.3
    # via
    #   accelerate
    #   onyx
    #   transformers
scikit-learn==1.7.2
    # via sentence-transformers
scipy==1.16.3
    # via
    #   scikit-learn
    #   sentence-transformers
sentence-transformers==4.0.2
    # via onyx
sentry-sdk==2.14.0
    # via onyx
setuptools==80.9.0 ; python_full_version >= '3.12'
    # via torch
six==1.17.0
    # via
    #   kubernetes
    #   python-dateutil
sniffio==1.3.1
    # via
    #   anyio
    #   openai
sse-starlette==3.0.3
    # via mcp
starlette==0.49.3
    # via
    #   fastapi
    #   mcp
    #   prometheus-fastapi-instrumentator
    #   sentry-sdk
sympy==1.14.0
    # via torch
tenacity==9.1.2
    # via
    #   google-genai
    #   voyageai
threadpoolctl==3.6.0
    # via scikit-learn
tiktoken==0.7.0
    # via litellm
tokenizers==0.21.4
    # via
    #   cohere
    #   litellm
    #   transformers
torch==2.9.1
    # via
    #   accelerate
    #   onyx
    #   sentence-transformers
tqdm==4.67.1
    # via
    #   huggingface-hub
    #   openai
    #   sentence-transformers
    #   transformers
transformers==4.53.0
    # via
    #   onyx
    #   sentence-transformers
triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
types-requests==2.32.0.20250328
    # via cohere
typing-extensions==4.15.0
    # via
    #   aiosignal
    #   anyio
    #   cohere
    #   fastapi
    #   google-genai
    #   huggingface-hub
    #   mcp
    #   openai
    #   pydantic
    #   pydantic-core
    #   referencing
    #   sentence-transformers
    #   starlette
    #   torch
    #   typing-inspection
typing-inspection==0.4.2
    # via
    #   fastapi
    #   mcp
    #   pydantic
    #   pydantic-settings
tzdata==2025.2
    # via kombu
urllib3==2.6.3
    # via
    #   botocore
    #   kubernetes
    #   requests
    #   sentry-sdk
    #   types-requests
uvicorn==0.35.0
    # via
    #   mcp
    #   onyx
vine==5.1.0
    # via
    #   amqp
    #   celery
    #   kombu
voyageai==0.2.3
    # via onyx
wcwidth==0.2.14
    # via prompt-toolkit
websocket-client==1.9.0
    # via kubernetes
websockets==15.0.1
    # via google-genai
wrapt==1.17.3
    # via aiobotocore
yarl==1.22.0
    # via aiohttp
zipp==3.23.0
    # via importlib-metadata


================================================
FILE: backend/scripts/__init__.py
================================================


================================================
FILE: backend/scripts/add_connector_creation_script.py
================================================
from typing import Any
from typing import Dict

import requests

API_SERVER_URL = "http://localhost:3000"  # Adjust this to your Onyx server URL
HEADERS = {"Content-Type": "application/json"}
API_KEY = "onyx-api-key"  # API key here, if auth is enabled


def create_connector(
    name: str,
    source: str,
    input_type: str,
    connector_specific_config: Dict[str, Any],
    is_public: bool = True,
    groups: list[int] | None = None,
) -> Dict[str, Any]:
    connector_update_request = {
        "name": name,
        "source": source,
        "input_type": input_type,
        "connector_specific_config": connector_specific_config,
        "is_public": is_public,
        "groups": groups or [],
    }

    response = requests.post(
        url=f"{API_SERVER_URL}/api/manage/admin/connector",
        json=connector_update_request,
        headers=HEADERS,
    )
    response.raise_for_status()
    return response.json()


def create_credential(
    name: str,
    source: str,
    credential_json: Dict[str, Any],
    is_public: bool = True,
    groups: list[int] | None = None,
) -> Dict[str, Any]:
    credential_request = {
        "name": name,
        "source": source,
        "credential_json": credential_json,
        "admin_public": is_public,
        "groups": groups or [],
    }

    response = requests.post(
        url=f"{API_SERVER_URL}/api/manage/credential",
        json=credential_request,
        headers=HEADERS,
    )
    response.raise_for_status()
    return response.json()


def create_cc_pair(
    connector_id: int,
    credential_id: int,
    name: str,
    access_type: str = "public",
    groups: list[int] | None = None,
) -> Dict[str, Any]:
    cc_pair_request = {
        "name": name,
        "access_type": access_type,
        "groups": groups or [],
    }

    response = requests.put(
        url=f"{API_SERVER_URL}/api/manage/connector/{connector_id}/credential/{credential_id}",
        json=cc_pair_request,
        headers=HEADERS,
    )
    response.raise_for_status()
    return response.json()


def main() -> None:
    # Create a Web connector
    web_connector = create_connector(
        name="Example Web Connector",
        source="web",
        input_type="load_state",
        connector_specific_config={
            "base_url": "https://example.com",
            "web_connector_type": "recursive",
        },
    )
    print(f"Created Web Connector: {web_connector}")

    # Create a credential for the Web connector
    web_credential = create_credential(
        name="Example Web Credential",
        source="web",
        credential_json={},  # Web connectors typically don't need credentials
        is_public=True,
    )
    print(f"Created Web Credential: {web_credential}")

    # Create CC pair for Web connector
    web_cc_pair = create_cc_pair(
        connector_id=web_connector["id"],
        credential_id=web_credential["id"],
        name="Example Web CC Pair",
        access_type="public",
    )
    print(f"Created Web CC Pair: {web_cc_pair}")

    # Create a GitHub connector
    github_connector = create_connector(
        name="Example GitHub Connector",
        source="github",
        input_type="poll",
        connector_specific_config={
            "repo_owner": "example-owner",
            "repo_name": "example-repo",
            "include_prs": True,
            "include_issues": True,
        },
    )
    print(f"Created GitHub Connector: {github_connector}")

    # Create a credential for the GitHub connector
    github_credential = create_credential(
        name="Example GitHub Credential",
        source="github",
        credential_json={"github_access_token": "your_github_access_token_here"},
        is_public=True,
    )
    print(f"Created GitHub Credential: {github_credential}")

    # Create CC pair for GitHub connector
    github_cc_pair = create_cc_pair(
        connector_id=github_connector["id"],
        credential_id=github_credential["id"],
        name="Example GitHub CC Pair",
        access_type="public",
    )
    print(f"Created GitHub CC Pair: {github_cc_pair}")


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/api_inference_sample.py
================================================
# This file is used to demonstrate how to use the backend APIs directly
# In this case, the equivalent of asking a question in Onyx Chat in a new chat session
import argparse
import json
import os

import requests


def create_new_chat_session(onyx_url: str, api_key: str | None) -> int:
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
    session_endpoint = onyx_url + "/api/chat/create-chat-session"

    response = requests.post(
        session_endpoint,
        headers=headers,
        json={"persona_id": 0},  # Global default Persona/Assistant ID
    )
    response.raise_for_status()

    new_session_id = response.json()["chat_session_id"]
    return new_session_id


def process_question(onyx_url: str, question: str, api_key: str | None) -> None:
    message_endpoint = onyx_url + "/api/chat/send-chat-message"

    chat_session_id = create_new_chat_session(onyx_url, api_key)

    headers = {"Authorization": f"Bearer {api_key}"} if api_key else None

    data = {
        "message": question,
        "chat_session_id": chat_session_id,
        "parent_message_id": None,
        "file_descriptors": [],
        # Default Question Answer prompt
        "prompt_id": 0,
        # Not specifying any specific docs to chat to, we want to run a search
        "search_doc_ids": None,
        "retrieval_options": {
            "run_search": "always",
            "real_time": True,
            "enable_auto_detect_filters": False,
            # No filters applied, check all sources, document-sets, time ranges, etc.
            "filters": {},
        },
    }

    with requests.post(message_endpoint, headers=headers, json=data) as response:
        response.raise_for_status()

        for packet in response.iter_lines():
            response_text = json.loads(packet.decode())
            # Can also check "top_documents" to capture the streamed search results
            # that include the highest matching documents to the query
            # or check "message_id" to get the message_id used as parent_message_id
            # to create follow-up messages
            new_token = response_text.get("answer_piece")

            if new_token:
                print(new_token, end="", flush=True)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Sample API Usage")
    parser.add_argument(
        "--onyx-url",
        type=str,
        default="http://localhost:80",
        help="Onyx URL, should point to Onyx nginx.",
    )
    parser.add_argument(
        "--test-question",
        type=str,
        default="What is Onyx?",
        help="Test question for new Chat Session.",
    )

    # Not needed if Auth is disabled
    # Or for Onyx MIT API key must be replaced with session cookie
    api_key = os.environ.get("DANSWER_API_KEY")

    args = parser.parse_args()
    process_question(
        onyx_url=args.onyx_url, question=args.test_question, api_key=api_key
    )


================================================
FILE: backend/scripts/celery_purge_queue.py
================================================
# Tool to run operations on Celery/Redis in production
# this is a work in progress and isn't completely put together yet
# but can serve as a stub for future operations
import argparse
import logging
from logging import getLogger

from redis import Redis

from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.configs.app_configs import REDIS_DB_NUMBER_CELERY
from onyx.redis.redis_pool import RedisPool

# Configure the logger
logging.basicConfig(
    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Log format
    handlers=[logging.StreamHandler()],  # Output logs to console
)

logger = getLogger(__name__)

REDIS_PASSWORD = ""


def celery_purge_queue(queue: str, tenant_id: str) -> None:  # noqa: ARG001
    """Purging a celery queue is extremely difficult because the queue is a list
    and the only way an item can be removed from a list is by VALUE, which is
    a linear scan.  Therefore, to purge the list of many values is roughly
    n^2.

    The other alternative is to pop values and push them back, but that raises
    questions about behavior while operating on a live queue.
    """

    pool = RedisPool.create_pool(
        host="127.0.0.1",
        port=6380,
        db=REDIS_DB_NUMBER_CELERY,
        password=REDIS_PASSWORD,
        ssl=True,
        ssl_cert_reqs="optional",
        ssl_ca_certs=None,
    )

    r = Redis(connection_pool=pool)

    length = celery_get_queue_length(queue, r)

    logger.info(f"queue={queue} length={length}")

    # processed = 0
    # deleted = 0
    # for i in range(len(OnyxCeleryPriority)):
    #     queue_name = queue
    #     if i > 0:
    #         queue_name += CELERY_SEPARATOR
    #         queue_name += str(i)

    #     length = r.llen(queue_name)
    #     for i in range(length):
    #         task_raw: bytes | None = r.lindex(queue_name, i)
    #         if not task_raw:
    #             break

    #         processed += 1
    #         task_str = task_raw.decode("utf-8")
    #         task = json.loads(task_str)
    #         task_kwargs_str = task["headers"]["kwargsrepr"]
    #         task_kwargs = json.loads(task_kwargs_str)
    #         task_tenant_id = task_kwargs["tenant_id"]
    #         if task_tenant_id and task_tenant_id == "tenant_id":
    #             print("Delete tenant_id={tenant_id}")
    #             if
    #             deleted += 1

    #         logger.info(f"processed={processed} deleted={deleted}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Purge celery queue by tenant id")
    parser.add_argument("--queue", type=str, help="Queue to purge", required=True)

    parser.add_argument("--tenant", type=str, help="Tenant ID to purge", required=True)

    args = parser.parse_args()
    celery_purge_queue(queue=args.queue, tenant_id=args.tenant)


================================================
FILE: backend/scripts/chat_feedback_dump.py
================================================
# This file is used to demonstrate how to use the backend APIs directly
# to query out feedback for all messages
import argparse
import logging
from logging import getLogger
from typing import Any
from uuid import UUID

import requests

from ee.onyx.server.query_history.api import ChatSessionSnapshot
from onyx.server.manage.models import AllUsersResponse
from onyx.server.query_and_chat.models import ChatSessionsResponse

# Configure the logger
logging.basicConfig(
    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Log format
    handlers=[logging.StreamHandler()],  # Output logs to console
)

logger = getLogger(__name__)

# uncomment the following pydantic models if you need the script to be independent
# from pydantic import BaseModel
# from datetime import datetime
# from enum import Enum

# class UserRole(str, Enum):
#     """
#     User roles
#     - Basic can't perform any admin actions
#     - Admin can perform all admin actions
#     - Curator can perform admin actions for
#         groups they are curators of
#     - Global Curator can perform admin actions
#         for all groups they are a member of
#     """

#     BASIC = "basic"
#     ADMIN = "admin"
#     CURATOR = "curator"
#     GLOBAL_CURATOR = "global_curator"


# class FullUserSnapshot(BaseModel):
#     id: UUID
#     email: str
#     role: UserRole
#     is_active: bool


# class InvitedUserSnapshot(BaseModel):
#     email: str


# class AllUsersResponse(BaseModel):
#     accepted: list[FullUserSnapshot]
#     invited: list[InvitedUserSnapshot]
#     accepted_pages: int
#     invited_pages: int


# class ChatSessionSharedStatus(str, Enum):
#     PUBLIC = "public"
#     PRIVATE = "private"


# class ChatSessionDetails(BaseModel):
#     id: UUID
#     name: str
#     persona_id: int | None = None
#     time_created: str
#     shared_status: ChatSessionSharedStatus
#     folder_id: int | None = None
#     current_alternate_model: str | None = None


# class ChatSessionsResponse(BaseModel):
#     sessions: list[ChatSessionDetails]


# class SessionType(str, Enum):
#     CHAT = "Chat"
#     SEARCH = "Search"
#     SLACK = "Slack"


# class AbridgedSearchDoc(BaseModel):
#     """A subset of the info present in `SearchDoc`"""

#     document_id: str
#     semantic_identifier: str
#     link: str | None


# class QAFeedbackType(str, Enum):
#     LIKE = "like"  # User likes the answer, used for metrics
#     DISLIKE = "dislike"  # User dislikes the answer, used for metrics


# class MessageType(str, Enum):
#     # Using OpenAI standards, Langchain equivalent shown in comment
#     # System message is always constructed on the fly, not saved
#     SYSTEM = "system"  # SystemMessage
#     USER = "user"  # HumanMessage
#     ASSISTANT = "assistant"  # AIMessage


# class MessageSnapshot(BaseModel):
#     id: int
#     message: str
#     message_type: MessageType
#     documents: list[AbridgedSearchDoc]
#     feedback_type: QAFeedbackType | None
#     feedback_text: str | None
#     time_created: datetime


# class ChatSessionSnapshot(BaseModel):
#     id: UUID
#     user_email: str
#     name: str | None
#     messages: list[MessageSnapshot]
#     persona_name: str | None
#     time_created: datetime
#     flow_type: SessionType


def create_new_chat_session(onyx_url: str, api_key: str | None) -> int:
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
    session_endpoint = onyx_url + "/api/chat/create-chat-session"

    response = requests.get(session_endpoint, headers=headers)
    response.raise_for_status()

    new_session_id = response.json()["chat_session_id"]
    return new_session_id


def manage_users(onyx_url: str, headers: dict[str, str] | None) -> AllUsersResponse:
    endpoint = onyx_url + "/manage/users"

    response = requests.get(
        endpoint,
        headers=headers,
    )
    response.raise_for_status()

    all_users = AllUsersResponse(**response.json())
    return all_users


def get_chat_sessions(
    onyx_url: str, headers: dict[str, str] | None, user_id: UUID
) -> ChatSessionsResponse:
    endpoint = onyx_url + "/admin/chat-sessions"

    params: dict[str, Any] = {"user_id": user_id}
    response = requests.get(
        endpoint,
        params=params,
        headers=headers,
    )
    response.raise_for_status()

    sessions = ChatSessionsResponse(**response.json())
    return sessions


def get_session_history(
    onyx_url: str, headers: dict[str, str] | None, session_id: UUID
) -> ChatSessionSnapshot:
    endpoint = onyx_url + f"/admin/chat-session-history/{session_id}"

    response = requests.get(
        endpoint,
        headers=headers,
    )
    response.raise_for_status()

    sessions = ChatSessionSnapshot(**response.json())
    return sessions


def process_all_chat_feedback(onyx_url: str, api_key: str | None) -> None:
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else None

    all_users = manage_users(onyx_url, headers)
    if not all_users:
        raise RuntimeError("manage_users returned None")

    logger.info(f"Accepted users: {len(all_users.accepted)}")

    user_ids: list[UUID] = [user.id for user in all_users.accepted]

    for user_id in user_ids:
        r_sessions = get_chat_sessions(onyx_url, headers, user_id)
        logger.info(f"user={user_id} num_sessions={len(r_sessions.sessions)}")
        for session in r_sessions.sessions:
            s: ChatSessionSnapshot
            try:
                s = get_session_history(onyx_url, headers, session.id)
            except requests.exceptions.HTTPError:
                logger.exception("get_session_history failed.")

            for m in s.messages:
                logger.info(
                    f"user={user_id} "
                    f"session={session.id} "
                    f"message={m.message} "
                    f"feedback_type={m.feedback_type} "
                    f"feedback_text={m.feedback_text}"
                )


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Sample API Usage - Chat Feedback")
    parser.add_argument(
        "--url",
        type=str,
        default="http://localhost:8080",
        help="Onyx URL, should point to Onyx nginx.",
    )

    # Not needed if Auth is disabled?
    # Or for Onyx MIT Edition API key must be replaced with session cookie
    parser.add_argument(
        "--api-key",
        type=str,
        help="Onyx Admin Level API key",
    )

    args = parser.parse_args()
    process_all_chat_feedback(onyx_url=args.url, api_key=args.api_key)


================================================
FILE: backend/scripts/chat_history_seeding.py
================================================
import argparse
import logging
from logging import getLogger

from onyx.db.seeding.chat_history_seeding import seed_chat_history

# Configure the logger
logging.basicConfig(
    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Log format
    handlers=[logging.StreamHandler()],  # Output logs to console
)

logger = getLogger(__name__)


def go_main(num_sessions: int, num_messages: int, num_days: int) -> None:
    seed_chat_history(num_sessions, num_messages, num_days)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Seed chat history")
    parser.add_argument(
        "--sessions",
        type=int,
        default=2048,
        help="Number of chat sessions to seed",
    )

    parser.add_argument(
        "--messages",
        type=int,
        default=4,
        help="Number of chat messages to seed per session",
    )

    parser.add_argument(
        "--days",
        type=int,
        default=90,
        help="Number of days looking backwards over which to seed the timestamps with",
    )

    args = parser.parse_args()
    go_main(args.sessions, args.messages, args.days)


================================================
FILE: backend/scripts/chat_loadtest.py
================================================
"""Basic Usage:

python scripts/chat_loadtest.py --api-key <api-key> --url <onyx-url>/api

to run from the container itself, copy this file in and run:

python chat_loadtest.py --api-key <api-key> --url localhost:8080

For more options, checkout the bottom of the file.
"""

import argparse
import asyncio
import logging
import statistics
import time
from collections.abc import AsyncGenerator
from dataclasses import dataclass
from logging import getLogger
from uuid import UUID

import aiohttp

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler()],
)

logger = getLogger(__name__)


@dataclass
class ChatMetrics:
    session_id: UUID
    total_time: float
    first_doc_time: float
    first_answer_time: float
    tokens_per_second: float
    total_tokens: int


class ChatLoadTester:
    def __init__(
        self,
        base_url: str,
        api_key: str | None,
        num_concurrent: int,
        messages_per_session: int,
    ):
        self.base_url = base_url
        self.headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
        self.num_concurrent = num_concurrent
        self.messages_per_session = messages_per_session
        self.metrics: list[ChatMetrics] = []

    async def create_chat_session(self, session: aiohttp.ClientSession) -> str:
        """Create a new chat session"""
        async with session.post(
            f"{self.base_url}/chat/create-chat-session",
            headers=self.headers,
            json={"persona_id": 0, "description": "Load Test"},
        ) as response:
            response.raise_for_status()
            data = await response.json()
            return data["chat_session_id"]

    async def process_stream(
        self, response: aiohttp.ClientResponse
    ) -> AsyncGenerator[str, None]:
        """Process the SSE stream from the chat response"""
        async for chunk in response.content:
            chunk_str = chunk.decode()
            yield chunk_str

    async def send_message(
        self,
        session: aiohttp.ClientSession,
        chat_session_id: str,
        message: str,
        parent_message_id: int | None = None,
    ) -> ChatMetrics:
        """Send a message and measure performance metrics"""
        start_time = time.time()
        first_doc_time = None
        first_answer_time = None
        token_count = 0

        async with session.post(
            f"{self.base_url}/chat/send-chat-message",
            headers=self.headers,
            json={
                "chat_session_id": chat_session_id,
                "message": message,
                "parent_message_id": parent_message_id,
                "prompt_id": None,
                "retrieval_options": {
                    "run_search": "always",
                    "real_time": True,
                },
                "file_descriptors": [],
                "search_doc_ids": [],
            },
        ) as response:
            response.raise_for_status()

            async for chunk in self.process_stream(response):
                if "tool_name" in chunk and "run_search" in chunk:
                    if first_doc_time is None:
                        first_doc_time = time.time() - start_time

                if "answer_piece" in chunk:
                    if first_answer_time is None:
                        first_answer_time = time.time() - start_time
                    token_count += 1

            total_time = time.time() - start_time
            tokens_per_second = token_count / total_time if total_time > 0 else 0

            return ChatMetrics(
                session_id=UUID(chat_session_id),
                total_time=total_time,
                first_doc_time=first_doc_time or 0,
                first_answer_time=first_answer_time or 0,
                tokens_per_second=tokens_per_second,
                total_tokens=token_count,
            )

    async def run_chat_session(self) -> None:
        """Run a complete chat session with multiple messages"""
        async with aiohttp.ClientSession() as session:
            try:
                chat_session_id = await self.create_chat_session(session)
                messages = [
                    "Tell me about the key features of the product",
                    "How does the search functionality work?",
                    "What are the deployment options?",
                    "Can you explain the security features?",
                    "What integrations are available?",
                ]

                parent_message_id = None
                for i in range(self.messages_per_session):
                    message = messages[i % len(messages)]
                    metrics = await self.send_message(
                        session, chat_session_id, message, parent_message_id
                    )
                    self.metrics.append(metrics)
                    parent_message_id = metrics.total_tokens  # Simplified for example

            except Exception as e:
                logger.error(f"Error in chat session: {e}")

    async def run_load_test(self) -> None:
        """Run multiple concurrent chat sessions"""
        start_time = time.time()
        tasks = [self.run_chat_session() for _ in range(self.num_concurrent)]
        await asyncio.gather(*tasks)
        total_time = time.time() - start_time

        self.print_results(total_time)

    def print_results(self, total_time: float) -> None:
        """Print load test results and metrics"""
        logger.info("\n=== Load Test Results ===")
        logger.info(f"Total Time: {total_time:.2f} seconds")
        logger.info(f"Concurrent Sessions: {self.num_concurrent}")
        logger.info(f"Messages per Session: {self.messages_per_session}")
        logger.info(f"Total Messages: {len(self.metrics)}")

        if self.metrics:
            avg_response_time = statistics.mean(m.total_time for m in self.metrics)
            avg_first_doc = statistics.mean(m.first_doc_time for m in self.metrics)
            avg_first_answer = statistics.mean(
                m.first_answer_time for m in self.metrics
            )
            avg_tokens_per_sec = statistics.mean(
                m.tokens_per_second for m in self.metrics
            )

            logger.info(f"\nAverage Response Time: {avg_response_time:.2f} seconds")
            logger.info(f"Average Time to Documents: {avg_first_doc:.2f} seconds")
            logger.info(f"Average Time to First Answer: {avg_first_answer:.2f} seconds")
            logger.info(f"Average Tokens/Second: {avg_tokens_per_sec:.2f}")


def main() -> None:
    parser = argparse.ArgumentParser(description="Chat Load Testing Tool")
    parser.add_argument(
        "--url",
        type=str,
        default="http://localhost:3000/api",
        help="Onyx URL",
    )
    parser.add_argument(
        "--api-key",
        type=str,
        help="Onyx Basic/Admin Level API key",
    )
    parser.add_argument(
        "--concurrent",
        type=int,
        default=10,
        help="Number of concurrent chat sessions",
    )
    parser.add_argument(
        "--messages",
        type=int,
        default=1,
        help="Number of messages per chat session",
    )

    args = parser.parse_args()

    load_tester = ChatLoadTester(
        base_url=args.url,
        api_key=args.api_key,
        num_concurrent=args.concurrent,
        messages_per_session=args.messages,
    )

    asyncio.run(load_tester.run_load_test())


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/debugging/debug_usage_limits.py
================================================
#!/usr/bin/env python3
"""
Debug script to fetch usage limit overrides from the control plane.
Run this from within a data plane pod to diagnose usage limits issues.

Usage:
    python debug_usage_limits.py

Environment variables required:
    - DATA_PLANE_SECRET: Secret for generating JWT tokens
    - CONTROL_PLANE_API_BASE_URL: Base URL for the control plane API
"""

import json
import os
import sys
from datetime import datetime
from datetime import timedelta
from datetime import timezone

import jwt
import requests


def generate_data_plane_token(secret: str) -> str:
    """Generate a JWT token for data plane authentication."""
    payload = {
        "iss": "data_plane",
        "exp": datetime.now(timezone.utc) + timedelta(minutes=5),
        "iat": datetime.now(timezone.utc),
        "scope": "api_access",
    }
    return jwt.encode(payload, secret, algorithm="HS256")


def main() -> None:
    # Get required environment variables
    data_plane_secret = os.environ.get("DATA_PLANE_SECRET", "")
    control_plane_url = os.environ.get(
        "CONTROL_PLANE_API_BASE_URL", "http://localhost:8082"
    )

    print("=" * 60)
    print("Usage Limits Debug Script")
    print("=" * 60)
    print(f"CONTROL_PLANE_API_BASE_URL: {control_plane_url}")
    print(f"DATA_PLANE_SECRET set: {bool(data_plane_secret)}")
    print()

    if not data_plane_secret:
        print("ERROR: DATA_PLANE_SECRET is not set!")
        sys.exit(1)

    # Generate token
    try:
        token = generate_data_plane_token(data_plane_secret)
        print(f"Generated JWT token (first 50 chars): {token[:50]}...")
    except Exception as e:
        print(f"ERROR generating token: {e}")
        sys.exit(1)

    # Make request to usage-limit-overrides endpoint
    url = f"{control_plane_url}/usage-limit-overrides"
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }

    print(f"\nMaking request to: {url}")
    print(
        f"Headers: {json.dumps({k: v[:50] + '...' if k == 'Authorization' else v for k, v in headers.items()}, indent=2)}"
    )
    print()

    try:
        response = requests.get(url, headers=headers, timeout=30)
        print(f"Status Code: {response.status_code}")
        print(f"Response Headers: {dict(response.headers)}")
        print()

        print("Response Body:")
        print("-" * 40)
        data = []
        try:
            data = response.json()
            print(json.dumps(data, indent=2))
        except json.JSONDecodeError:
            print(response.text)
        print("-" * 40)
        print("all tenant ids overridden:")
        for tenant_dct in data:  # should be a list of json
            print(tenant_dct["tenant_id"])

        if response.status_code != 200:
            print("\nWARNING: Non-200 status code received!")

    except requests.exceptions.ConnectionError as e:
        print(f"ERROR: Connection failed - {e}")
        sys.exit(1)
    except requests.exceptions.Timeout:
        print("ERROR: Request timed out after 30 seconds")
        sys.exit(1)
    except Exception as e:
        print(f"ERROR: Request failed - {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/debugging/litellm/README
================================================
Resources in this directory are used to debug LiteLLM and other AI providers.
Note that the requests are meant to be identical to what the Onyx application sends to litellm.
Double check that this is the case before using these scripts.

## Files

- **payload.json**: Contains a typical request from Onyx

- **test_litellm.py**: Imports Onyx's LiteLLM instance (with monkey patches) and outputs the raw stream events received back from LiteLLM as JSON. Does not use payload.json, but has a similar request body. Consider directly importing litellm to skip monkey patching.

- **directly_hit_azure_api.py**: Directly hits Azure OpenAI endpoints using payload.json. Bypasses LiteLLM for debugging purposes.

================================================
FILE: backend/scripts/debugging/litellm/call_litellm.py
================================================
#!/usr/bin/env python3
"""
Test LiteLLM integration and output raw stream events.

This script uses Onyx's LiteLLM instance (with monkey patches) to make a completion
request and outputs the raw stream events as JSON, one per line.

Usage:
    # Set environment variables if needed:
    export LITELLM_DEBUG=1  # Optional: enable LiteLLM debug logs

    # Update the configuration below, then run:
    python test_litellm.py
"""

import os
from typing import Any

from onyx.llm.litellm_singleton import litellm

# Optional: enable LiteLLM debug logs (set `LITELLM_DEBUG=1`)
if os.getenv("LITELLM_DEBUG") == "1":
    getattr(litellm, "_turn_on_debug", lambda: None)()

# Configuration: Update these values before running
MODEL = "azure/responses/YOUR_MODEL_NAME_HERE"
API_KEY = "YOUR_API_KEY_HERE"
BASE_URL = "https://YOUR_DEPLOYMENT_URL_HERE.cognitiveservices.azure.com"
API_VERSION = "2025-03-01-preview"  # For Azure, must be 2025-03-01-preview

# Example messages - customize as needed
MESSAGES = [
    {"role": "user", "content": "hi"},
    {"role": "assistant", "content": "Hello! How can I help you today?"},
    {"role": "user", "content": "what is onyx? search internally and the web"},
]

stream = litellm.completion(
    mock_response=None,
    # Insert /responses/ between provider and model to use the litellm completions ->responses bridge
    model=MODEL,
    api_key=API_KEY,
    base_url=BASE_URL,
    api_version=API_VERSION,
    custom_llm_provider=None,
    messages=MESSAGES,
    tools=[
        {
            "type": "function",
            "function": {
                "name": "internal_search",
                "description": "Search connected applications for information.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "queries": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "List of search queries to execute, typically a single query.",
                        }
                    },
                    "required": ["queries"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "generate_image",
                "description": "Generate an image based on a prompt. Do not use unless the user specifically requests an image.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "prompt": {
                            "type": "string",
                            "description": "Prompt used to generate the image",
                        },
                        "shape": {
                            "type": "string",
                            "description": "Optional - only specify if you want a specific shape. "
                            "Image shape: 'square', 'portrait', or 'landscape'.",
                            "enum": ["square", "portrait", "landscape"],
                        },
                    },
                    "required": ["prompt"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "web_search",
                "description": "Search the web for information. "
                "Returns a list of search results with titles, metadata, and snippets.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "queries": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "One or more queries to look up on the web.",
                        }
                    },
                    "required": ["queries"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "open_url",
                "description": "Open and read the content of one or more URLs. Returns the text content of the pages.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "urls": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "List of URLs to open and read. Can be a single URL or multiple URLs.",
                        }
                    },
                    "required": ["urls"],
                },
            },
        },
    ],
    tool_choice="auto",
    stream=True,
    temperature=1,
    timeout=600,
    max_tokens=None,
    stream_options={"include_usage": True},
    reasoning={"effort": "low", "summary": "auto"},
    parallel_tool_calls=True,
    allowed_openai_params=["tool_choice"],
)


def _to_jsonable(x: Any) -> Any:
    """Convert an object to a JSON-serializable format.

    Handles Pydantic models, dataclasses, and other common types.
    """
    if isinstance(x, (str, int, float, bool)) or x is None:
        return x
    if isinstance(x, dict):
        return {k: _to_jsonable(v) for k, v in x.items()}
    if isinstance(x, list):
        return [_to_jsonable(v) for v in x]
    if hasattr(x, "model_dump"):
        return _to_jsonable(x.model_dump())
    if hasattr(x, "dict"):
        try:
            return _to_jsonable(x.dict())
        except Exception:
            pass
    return str(x)


def _filter_null_fields(obj: Any) -> Any:
    """Recursively filter out None/null values from a data structure."""
    if isinstance(obj, dict):
        return {
            k: _filter_null_fields(v)
            for k, v in obj.items()
            if v is not None
            and (not isinstance(v, (dict, list)) or _filter_null_fields(v))
        }
    if isinstance(obj, list):
        filtered = [_filter_null_fields(item) for item in obj]
        return [item for item in filtered if item is not None]
    return obj


def _pretty_print_event(event: Any) -> str:
    """Pretty print an event, showing only non-null fields with newlines."""
    jsonable = _to_jsonable(event)
    filtered = _filter_null_fields(jsonable)

    lines = []

    def _format_value(key: str, value: Any, indent: int = 0) -> None:
        """Recursively format key-value pairs."""
        prefix = "  " * indent
        if isinstance(value, dict):
            if indent == 0:
                # Top-level: print each key-value pair on separate lines
                for k, v in value.items():
                    _format_value(k, v, indent)
            else:
                # Nested dict: print key and then nested items
                lines.append(f"{prefix}{key}:")
                for k, v in value.items():
                    _format_value(k, v, indent + 1)
        elif isinstance(value, list):
            if not value:
                return  # Skip empty lists
            lines.append(f"{prefix}{key}:")
            for i, item in enumerate(value):
                if isinstance(item, dict):
                    lines.append(f"{prefix}  [{i}]:")
                    for k, v in item.items():
                        _format_value(k, v, indent + 2)
                else:
                    lines.append(f"{prefix}  [{i}]: {item}")
        else:
            lines.append(f"{prefix}{key}: {value}")

    if isinstance(filtered, dict):
        for k, v in filtered.items():
            _format_value(k, v, 0)
    else:
        lines.append(str(filtered))

    return "\n".join(lines)


if __name__ == "__main__":
    # Output raw stream events in a pretty format
    for event in stream:
        print("=" * 80, flush=True)
        print(_pretty_print_event(event), flush=True)
        print(flush=True)


================================================
FILE: backend/scripts/debugging/litellm/directly_hit_azure_api.py
================================================
#!/usr/bin/env python3
"""
Directly hit Azure OpenAI endpoints for debugging.

This script bypasses LiteLLM and directly calls Azure OpenAI APIs.
Uses URL and API key constants plus a payload.json in the same directory.

Usage:
    python directly_hit_azure_api.py
"""

from __future__ import annotations

import json
from pathlib import Path

import httpx


# Configuration: Update these values before running
URL = "https://YOUR_AZURE_OPENAI_DEPLOYMENT_URL_HERE.cognitiveservices.azure.com/"
API_KEY = "YOUR_API_KEY_HERE"

PAYLOAD_PATH = Path(__file__).resolve().with_name("payload.json")


def _load_payload_json() -> dict:
    """Load and parse payload.json file."""
    if not PAYLOAD_PATH.exists():
        raise FileNotFoundError(
            f"payload.json not found at {PAYLOAD_PATH!r}. Create payload.json next to this script."
        )
    return json.loads(PAYLOAD_PATH.read_text())


def _print_response(resp: httpx.Response) -> None:
    """Print HTTP response in a readable format."""
    print(f"HTTP {resp.status_code}")

    content_type = resp.headers.get("content-type", "")
    raw = resp.content
    if not raw:
        return

    if "json" in content_type.lower():
        try:
            obj = resp.json()
            print(json.dumps(obj, indent=2, ensure_ascii=False, sort_keys=False))
            return
        except Exception:
            pass

    # Fallback: print as text (replace errors to avoid crashes).
    print(raw.decode("utf-8", errors="replace"))


def main() -> int:
    """Main entry point."""
    if (
        URL
        == "https://YOUR_AZURE_OPENAI_DEPLOYMENT_URL_HERE.cognitiveservices.azure.com/"
    ):
        raise SystemExit(
            "Please set the URL constant at the top of this file to your Azure OpenAI deployment URL."
        )
    if API_KEY == "YOUR_API_KEY_HERE":
        raise SystemExit(
            "Please set the API_KEY constant at the top of this file to your Azure OpenAI API key."
        )

    payload = _load_payload_json()

    headers = {
        "api-key": API_KEY,
        "content-type": "application/json",
    }

    with httpx.Client(timeout=60.0) as client:
        resp = client.post(
            url=URL,
            headers=headers,
            json=payload,
        )

    _print_response(resp)
    return 0 if resp.is_success else 1


if __name__ == "__main__":
    raise SystemExit(main())


================================================
FILE: backend/scripts/debugging/litellm/payload.json
================================================
{
    "model": "[YOUR MODEL HERE]",
    "input": [
      {
        "type": "message",
        "role": "user",
        "content": [
          {
            "type": "input_text",
            "text": "hi"
          }
        ]
      },
      {
        "type": "message",
        "role": "assistant",
        "content": [
          {
            "type": "output_text",
            "text": "Hey! 👋\n\nHow can I help today?  \n- Questions about Onyx (setup, search, auth, plugins)?\n- Debugging or architecture advice?\n- Writing docs, emails, or code snippets?\n- Anything else on your mind?"
          }
        ]
      },
      {
        "type": "message",
        "role": "user",
        "content": [
          {
            "type": "input_text",
            "text": "[YOUR QUERY HERE]"
          }
        ]
      }
    ],
    "instructions": "Formatting re-enabled. You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.\n\nThe current date is Sunday December 28, 2025.\n\n# Response Style\nYou use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.\nYou use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\\\( [expression] \\\\)' when inline.\nFor code you prefer to use Markdown and specify the language.\nYou can use horizontal rules (---) to separate sections of your responses.\nYou can use Markdown tables to format your responses for data, lists, and other structured information.\n\n# User Information\n\nThe user is at an organization called `Onyx`.\n\nOrganization description: AI chat and enterprise search. Open source, self host or managed cloud.\n\nGithub: https://github.com/onyx-dot-app/onyx\nWebsite: https://onyx.app\n\n- User's name: USER_A\n- User's email: user_a@onyx.app\n\n# Tools\n\nFor questions that can be fully answered from existing knowledge which is unlikely to change, answer the user directly without using any tools. When there is ambiguity, default to searching to get more context.\n\nWhen using any search type tool, do not make any assumptions and stay as faithful to the user's query as possible. Between internal and web search, think about if the user's query is likely better answered by team internal sources or online web pages. For queries that are short phrases, ambiguous/unclear, or keyword heavy, prioritize internal search. If ambiguous, prioritize internal search.\nWhen searching for information, if the initial results cannot fully answer the user's query, try again with different tools or arguments. Do not repeat the same or very similar queries if it already has been run in the chat history.\n\n## internal_search\nUse the `internal_search` tool to search connected applications for information.\n\n## web_search\nUse the `web_search` tool to access up-to-date information from the web.\n\n## open_url\nUse the `open_url` tool to read the content of one or more URLs.\n\n## generate_image\nNEVER use generate_image unless the user specifically requests an image.\n",
    "parallel_tool_calls": true,
    "reasoning": {
      "effort": "low",
      "summary": "auto"
    },
    "stream": true,
    "temperature": 1,
    "tool_choice": "auto",
    "tools": [
      {
        "name": "internal_search",
        "type": "function",
        "description": "Search connected applications for information.",
        "parameters": {
          "type": "object",
          "properties": {
            "queries": {
              "type": "array",
              "items": { "type": "string" },
              "description": "List of search queries to execute, typically a single query."
            }
          },
          "required": ["queries"]
        }
      },
      {
        "name": "generate_image",
        "type": "function",
        "description": "Generate an image based on a prompt. Do not use unless the user specifically requests an image.",
        "parameters": {
          "type": "object",
          "properties": {
            "prompt": { "type": "string" },
            "shape": {
              "type": "string",
              "enum": ["square", "portrait", "landscape"]
            }
          },
          "required": ["prompt"]
        }
      },
      {
        "name": "web_search",
        "type": "function",
        "description": "Search the web for information.",
        "parameters": {
          "type": "object",
          "properties": {
            "queries": {
              "type": "array",
              "items": { "type": "string" }
            }
          },
          "required": ["queries"]
        }
      },
      {
        "name": "open_url",
        "type": "function",
        "description": "Open and read the content of one or more URLs.",
        "parameters": {
          "type": "object",
          "properties": {
            "urls": {
              "type": "array",
              "items": { "type": "string" }
            }
          },
          "required": ["urls"]
        }
      }
    ]
  }
  

================================================
FILE: backend/scripts/debugging/onyx_db.py
================================================
"""Onyx Database tool"""

import os

# hack to work around excessive use of globals in other functions
os.environ["MULTI_TENANT"] = "True"

if True:  # noqa: E402
    import csv
    import argparse

    from pydantic import BaseModel
    from sqlalchemy import func

    from onyx.db.engine.sql_engine import (
        SYNC_DB_API,
        USE_IAM_AUTH,
        build_connection_string,
    )
    from onyx.db.engine.tenant_utils import get_all_tenant_ids
    from onyx.db.engine.sql_engine import get_session_with_tenant
    from onyx.db.engine.sql_engine import SqlEngine
    from onyx.db.models import Document
    from onyx.db.models import User
    from onyx.utils.logger import setup_logger
    from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

    import heapq

    logger = setup_logger()


class TenantMetadata(BaseModel):
    first_email: str | None
    user_count: int
    num_docs: int
    num_chunks: int


class SQLAlchemyDebugging:
    # Class for managing DB debugging actions.
    def __init__(self) -> None:
        pass

    def top_chunks(self, filename: str, k: int = 10) -> None:
        tenants_to_total_chunks: dict[str, TenantMetadata] = {}

        logger.info("Fetching all tenant id's.")
        tenant_ids = get_all_tenant_ids()
        num_tenant_ids = len(tenant_ids)

        logger.info(f"Found {num_tenant_ids} tenant id's.")

        num_processed = 0
        for tenant_id in tenant_ids:
            num_processed += 1

            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

            try:
                with get_session_with_tenant(tenant_id=tenant_id) as db_session:
                    first_email = None

                    first_user = db_session.query(User).first()
                    if first_user:
                        first_email = first_user.email

                    user_count = db_session.query(User).count()

                    # Calculate the total number of document rows for the current tenant
                    total_documents = db_session.query(Document).count()
                    # marginally useful to skip some tenants ... maybe we can improve on this
                    # if total_documents < 100:
                    #     logger.info(f"{num_processed} of {num_tenant_ids}: Tenant '{tenant_id}': "
                    #                 f"docs={total_documents} skip=True")
                    #     continue

                    # Calculate the sum of chunk_count for the current tenant
                    # If there are no documents or all chunk_counts are NULL, sum will be None
                    total_chunks = db_session.query(
                        func.sum(Document.chunk_count)
                    ).scalar()

                    total_chunks = total_chunks or 0

                    logger.info(
                        f"{num_processed} of {num_tenant_ids}: Tenant '{tenant_id}': "
                        f"first_email={first_email} user_count={user_count} "
                        f"docs={total_documents} chunks={total_chunks}"
                    )

                tenants_to_total_chunks[tenant_id] = TenantMetadata(
                    first_email=first_email,
                    user_count=user_count,
                    num_docs=total_documents,
                    num_chunks=total_chunks,
                )
            except Exception as e:
                logger.error(f"Error processing tenant '{tenant_id}': {e}")
            finally:
                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

        # sort all by docs and dump to csv
        sorted_tenants = sorted(
            tenants_to_total_chunks.items(),
            key=lambda x: (x[1].num_chunks, x[1].num_docs),
            reverse=True,
        )

        with open(filename, "w") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(
                ["tenant_id", "first_user_email", "num_user", "num_docs", "num_chunks"]
            )  # Write header
            # Write data rows (using the sorted list)
            for tenant_id, metadata in sorted_tenants:
                writer.writerow(
                    [
                        tenant_id,
                        metadata.first_email,
                        metadata.user_count,
                        metadata.num_docs,
                        metadata.num_chunks,
                    ]
                )
            logger.info(f"Successfully wrote statistics to {filename}")

        # output top k by chunks
        top_k_tenants = heapq.nlargest(
            k, tenants_to_total_chunks.items(), key=lambda x: x[1].num_docs
        )

        logger.info(f"Top {k} tenants by total chunks: {top_k_tenants}")


def main() -> None:
    parser = argparse.ArgumentParser(description="Database/SQL debugging tool")
    parser.add_argument("--username", help="Database username", default="postgres")
    parser.add_argument("--password", help="Database password", required=True)
    parser.add_argument("--host", help="Database host", default="localhost")
    parser.add_argument("--port", help="Database port", default=5432)
    parser.add_argument("--db", help="Database default db name", default="danswer")

    parser.add_argument("--report", help="Generate the given report")

    parser.add_argument(
        "--filename",
        type=str,
        default="tenants_by_num_docs.csv",
        help="Generate the given report",
        required=False,
    )

    args = parser.parse_args()

    logger.info(f"{args}")

    connection_string = build_connection_string(
        db_api=SYNC_DB_API,
        app_name="onyx_db_sync",
        use_iam_auth=USE_IAM_AUTH,
        user=args.username,
        password=args.password,
        host=args.host,
        port=args.port,
        db=args.db,
    )

    SqlEngine.init_engine(
        pool_size=20, max_overflow=5, connection_string=connection_string
    )

    debugger = SQLAlchemyDebugging()

    if args.report == "top-chunks":
        debugger.top_chunks(args.filename, 10)
    else:
        logger.info("No action.")


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/debugging/onyx_list_tenants.py
================================================
#!/usr/bin/env python3

"""
Tenant List Script
Simple script to list the tenant IDs in the database.
Used by the parallel migration script to determine how to split work.

Usage:

```
# List one tenant per line (default)
PYTHONPATH=. python scripts/debugging/onyx_list_tenants.py

# Output as CSV (all on one line)
PYTHONPATH=. python scripts/debugging/onyx_list_tenants.py --csv

# Output as CSV batched into groups of 5
PYTHONPATH=. python scripts/debugging/onyx_list_tenants.py --csv -n 5
```

"""

import argparse
import sys

from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from shared_configs.configs import TENANT_ID_PREFIX


def batch_list(items: list[str], batch_size: int) -> list[list[str]]:
    """Split a list into batches of specified size."""
    return [items[i : i + batch_size] for i in range(0, len(items), batch_size)]


def main() -> None:
    parser = argparse.ArgumentParser(
        description="List tenant IDs from the database.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--csv",
        action="store_true",
        help="Output as comma-separated values instead of one per line",
    )
    parser.add_argument(
        "-n",
        "--max-args",
        type=int,
        default=None,
        metavar="N",
        help="Batch CSV output into groups of N items (requires --csv)",
    )
    args = parser.parse_args()

    if args.max_args is not None and not args.csv:
        parser.error("--max-args/-n requires --csv flag")

    try:
        # Initialize the database engine with conservative settings
        SqlEngine.init_engine(pool_size=5, max_overflow=2)

        # Get all tenant IDs
        tenant_ids = get_all_tenant_ids()

        # Filter to only tenant schemas (not public or other system schemas)
        tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]

        if args.csv:
            if args.max_args:
                # Output batched CSV lines
                for batch in batch_list(tenant_schemas, args.max_args):
                    print(",".join(batch))
            else:
                # Output all on one line
                print(",".join(tenant_schemas))
        else:
            # Print all tenant IDs, one per line
            for tenant_id in tenant_schemas:
                print(tenant_id)

    except Exception as e:
        print(f"Error getting tenant IDs: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/debugging/onyx_redis.py
================================================
import argparse
import json
import logging
import sys
import time
from enum import Enum
from logging import getLogger
from typing import cast
from uuid import UUID

from redis import Redis

from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
from onyx.auth.invited_users import get_invited_users
from onyx.auth.invited_users import write_invited_users
from onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX
from onyx.configs.app_configs import REDIS_DB_NUMBER
from onyx.configs.app_configs import REDIS_HOST
from onyx.configs.app_configs import REDIS_PASSWORD
from onyx.configs.app_configs import REDIS_PORT
from onyx.configs.app_configs import REDIS_SSL
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.users import get_user_by_email
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_pool import RedisPool
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id

# Tool to run helpful operations on Redis in production
# This is targeted for internal usage and may not have all the necessary parameters
# for general usage across custom deployments

# Configure the logger
logging.basicConfig(
    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Log format
    handlers=[logging.StreamHandler()],  # Output logs to console
)

logger = getLogger(__name__)

SCAN_ITER_COUNT = 10000
BATCH_DEFAULT = 1000


class OnyxRedisCommand(Enum):
    purge_connectorsync_taskset = "purge_connectorsync_taskset"
    purge_documentset_taskset = "purge_documentset_taskset"
    purge_usergroup_taskset = "purge_usergroup_taskset"
    purge_locks_blocking_deletion = "purge_locks_blocking_deletion"
    purge_vespa_syncing = "purge_vespa_syncing"
    purge_pidbox = "purge_pidbox"
    get_user_token = "get_user_token"
    delete_user_token = "delete_user_token"
    add_invited_user = "add_invited_user"
    get_list_element = "get_list_element"

    def __str__(self) -> str:
        return self.value


def get_user_id(user_email: str) -> tuple[UUID, str]:
    tenant_id = (
        get_tenant_id_for_email(user_email) if MULTI_TENANT else POSTGRES_DEFAULT_SCHEMA
    )

    with get_session_with_tenant(tenant_id=tenant_id) as session:
        user = get_user_by_email(user_email, session)
        if user is None:
            raise ValueError(f"User not found for email: {user_email}")
        return user.id, tenant_id


def onyx_redis(
    command: OnyxRedisCommand,
    batch: int,
    dry_run: bool,
    ssl: bool,
    host: str,
    port: int,
    db: int,
    password: str | None,
    user_email: str | None = None,
    cc_pair_id: int | None = None,
) -> int:
    # this is global and not tenant aware
    pool = RedisPool.create_pool(
        host=host,
        port=port,
        db=db,
        password=password if password else "",
        ssl=ssl,
        ssl_cert_reqs="optional",
        ssl_ca_certs=None,
    )

    r = Redis(connection_pool=pool)

    logger.info("Redis ping starting. This may hang if your settings are incorrect.")

    try:
        r.ping()
    except:
        logger.exception("Redis ping exceptioned")
        raise

    logger.info("Redis ping succeeded.")

    if command == OnyxRedisCommand.purge_connectorsync_taskset:
        """Purge connector tasksets. Used when the tasks represented in the tasksets
        have been purged."""
        return purge_by_match_and_type(
            "*connectorsync_taskset*", "set", batch, dry_run, r
        )
    elif command == OnyxRedisCommand.purge_documentset_taskset:
        return purge_by_match_and_type(
            "*documentset_taskset*", "set", batch, dry_run, r
        )
    elif command == OnyxRedisCommand.purge_usergroup_taskset:
        return purge_by_match_and_type("*usergroup_taskset*", "set", batch, dry_run, r)
    elif command == OnyxRedisCommand.purge_locks_blocking_deletion:
        if cc_pair_id is None:
            logger.error("You must specify --cc-pair with purge_deletion_locks")
            return 1

        tenant_id = get_current_tenant_id()
        logger.info(f"Purging locks associated with deleting cc_pair={cc_pair_id}.")
        redis_connector = RedisConnector(tenant_id, cc_pair_id)

        redis_delete_if_exists_helper(
            f"{tenant_id}:{redis_connector.prune.fence_key}", dry_run, r
        )
        redis_delete_if_exists_helper(
            f"{tenant_id}:{redis_connector.permissions.fence_key}", dry_run, r
        )
        redis_delete_if_exists_helper(
            f"{tenant_id}:{redis_connector.external_group_sync.fence_key}", dry_run, r
        )
        return 0
    elif command == OnyxRedisCommand.purge_vespa_syncing:
        return purge_by_match_and_type(
            "*connectorsync:vespa_syncing*", "string", batch, dry_run, r
        )
    elif command == OnyxRedisCommand.purge_pidbox:
        return purge_by_match_and_type(
            "*reply.celery.pidbox", "list", batch, dry_run, r
        )
    elif command == OnyxRedisCommand.get_list_element:
        # just hardcoded for now
        result = r.lrange(
            "0097a564-d343-3c1f-9fd1-af8cce038115.reply.celery.pidbox", 0, 0
        )
        print(f"{result}")
        return 0
    elif command == OnyxRedisCommand.get_user_token:
        if not user_email:
            logger.error("You must specify --user-email with get_user_token")
            return 1
        token_key = get_user_token_from_redis(r, user_email)
        if token_key:
            print(f"Token key for user {user_email}: {token_key}")
            return 0
        else:
            print(f"No token found for user {user_email}")
            return 2
    elif command == OnyxRedisCommand.delete_user_token:
        if not user_email:
            logger.error("You must specify --user-email with delete_user_token")
            return 1
        if delete_user_token_from_redis(r, user_email, dry_run):
            return 0
        else:
            return 2
    elif command == OnyxRedisCommand.add_invited_user:
        if not user_email:
            logger.error("You must specify --user-email with add_invited_user")
            return 1
        current_invited_users = get_invited_users()
        if user_email not in current_invited_users:
            current_invited_users.append(user_email)
            if dry_run:
                logger.info(f"(DRY-RUN) Would add {user_email} to invited users")
            else:
                write_invited_users(current_invited_users)
                logger.info(f"Added {user_email} to invited users")
        else:
            logger.info(f"{user_email} is already in the invited users list")
        return 0
    else:
        pass

    return 255


def flush_batch_delete(batch_keys: list[bytes], r: Redis) -> None:
    logger.info(f"Flushing {len(batch_keys)} operations to Redis.")
    with r.pipeline() as pipe:
        for batch_key in batch_keys:
            pipe.delete(batch_key)
        pipe.execute()


def redis_delete_if_exists_helper(key: str, dry_run: bool, r: Redis) -> bool:
    """Returns True if the key was found, False if not.
    This function exists for logging purposes as the delete operation itself
    doesn't really need to check the existence of the key.
    """

    if not r.exists(key):
        logger.info(f"Did not find {key}.")
        return False

    if dry_run:
        logger.info(f"(DRY-RUN) Deleting {key}.")
    else:
        logger.info(f"Deleting {key}.")
        r.delete(key)

    return True


def purge_by_match_and_type(
    match_pattern: str, match_type: str, batch_size: int, dry_run: bool, r: Redis
) -> int:
    """match_pattern: glob style expression
    match_type: https://redis.io/docs/latest/commands/type/
    """

    logger.info(
        f"purge_by_match_and_type start: match_pattern={match_pattern} match_type={match_type}"
    )

    # cursor = "0"
    # while cursor != 0:
    #     cursor, data = self.scan(
    #         cursor=cursor, match=match, count=count, _type=_type, **kwargs
    #     )

    start = time.monotonic()

    count = 0
    batch_keys: list[bytes] = []
    for key in r.scan_iter(match_pattern, count=SCAN_ITER_COUNT, _type=match_type):
        # key_type = r.type(key)
        # if key_type != match_type.encode("utf-8"):
        #     continue

        key = cast(bytes, key)
        key_str = key.decode("utf-8")

        count += 1
        if dry_run:
            logger.info(f"(DRY-RUN) Deleting item {count}: {key_str}")
            continue

        logger.info(f"Deleting item {count}: {key_str}")

        batch_keys.append(key)

        # flush if batch size has been reached
        if len(batch_keys) >= batch_size:
            flush_batch_delete(batch_keys, r)
            batch_keys.clear()

    # final flush
    flush_batch_delete(batch_keys, r)
    batch_keys.clear()

    logger.info(f"Deleted {count} matches.")

    elapsed = time.monotonic() - start
    logger.info(f"Time elapsed: {elapsed:.2f}s")
    return 0


def get_user_token_from_redis(r: Redis, user_email: str) -> str | None:
    """
    Scans Redis keys for a user token that matches user_email or user_id fields.
    Returns the token key if found, else None.
    """
    user_id, tenant_id = get_user_id(user_email)

    # Scan for keys matching the auth key prefix
    auth_keys = r.scan_iter(f"{REDIS_AUTH_KEY_PREFIX}*", count=SCAN_ITER_COUNT)

    matching_key = None

    for key in auth_keys:
        key_str = key.decode("utf-8")
        jwt_token = r.get(key_str)

        if not jwt_token:
            continue

        try:
            jwt_token_str = (
                jwt_token.decode("utf-8")
                if isinstance(jwt_token, bytes)
                else str(jwt_token)
            )

            if jwt_token_str.startswith("b'") and jwt_token_str.endswith("'"):
                jwt_token_str = jwt_token_str[2:-1]  # Remove b'' wrapper

            jwt_data = json.loads(jwt_token_str)
            if jwt_data.get("tenant_id") == tenant_id and str(
                jwt_data.get("sub")
            ) == str(user_id):
                matching_key = key_str
                break
        except json.JSONDecodeError:
            logger.error(f"Failed to decode JSON for key: {key_str}")
        except Exception as e:
            logger.error(f"Error processing JWT for key: {key_str}. Error: {str(e)}")

    if matching_key:
        return matching_key[len(REDIS_AUTH_KEY_PREFIX) :]
    return None


def delete_user_token_from_redis(
    r: Redis, user_email: str, dry_run: bool = False
) -> bool:
    """
    Scans Redis keys for a user token matching user_email and deletes it if found.
    Returns True if something was deleted, otherwise False.
    """
    user_id, tenant_id = get_user_id(user_email)

    # Scan for keys matching the auth key prefix
    auth_keys = r.scan_iter(f"{REDIS_AUTH_KEY_PREFIX}*", count=SCAN_ITER_COUNT)
    matching_key = None

    for key in auth_keys:
        key_str = key.decode("utf-8")
        jwt_token = r.get(key_str)

        if not jwt_token:
            continue

        try:
            jwt_token_str = (
                jwt_token.decode("utf-8")
                if isinstance(jwt_token, bytes)
                else str(jwt_token)
            )

            if jwt_token_str.startswith("b'") and jwt_token_str.endswith("'"):
                jwt_token_str = jwt_token_str[2:-1]  # Remove b'' wrapper

            jwt_data = json.loads(jwt_token_str)
            if jwt_data.get("tenant_id") == tenant_id and str(
                jwt_data.get("sub")
            ) == str(user_id):
                matching_key = key_str
                break
        except json.JSONDecodeError:
            logger.error(f"Failed to decode JSON for key: {key_str}")
        except Exception as e:
            logger.error(f"Error processing JWT for key: {key_str}. Error: {str(e)}")

    if matching_key:
        if dry_run:
            logger.info(f"(DRY-RUN) Would delete token key: {matching_key}")
        else:
            r.delete(matching_key)
            logger.info(f"Deleted token for user: {user_email}")
        return True
    else:
        logger.info(f"No token found for user: {user_email}")
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Onyx Redis Manager")
    parser.add_argument(
        "--command",
        type=OnyxRedisCommand,
        help="The command to run",
        choices=list(OnyxRedisCommand),
        required=True,
    )

    parser.add_argument(
        "--ssl",
        type=bool,
        default=REDIS_SSL,
        help="Use SSL when connecting to Redis. Usually True for prod and False for local testing",
        required=False,
    )

    parser.add_argument(
        "--host",
        type=str,
        default=REDIS_HOST,
        help="The redis host",
        required=False,
    )

    parser.add_argument(
        "--port",
        type=int,
        default=REDIS_PORT,
        help="The redis port",
        required=False,
    )

    parser.add_argument(
        "--db",
        type=int,
        default=REDIS_DB_NUMBER,
        help="The redis db",
        required=False,
    )

    parser.add_argument(
        "--password",
        type=str,
        default=REDIS_PASSWORD,
        help="The redis password",
        required=False,
    )

    parser.add_argument(
        "--tenant-id",
        type=str,
        help="Tenant ID for get, delete user token, or add to invited users",
        required=False,
    )

    parser.add_argument(
        "--batch",
        type=int,
        default=BATCH_DEFAULT,
        help="Size of operation batches to send to Redis",
        required=False,
    )

    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Perform a dry run without actually executing modifications",
        required=False,
    )

    parser.add_argument(
        "--user-email",
        type=str,
        help="User email for get, delete user token, or add to invited users",
        required=False,
    )

    parser.add_argument(
        "--cc-pair",
        type=int,
        help="A connector credential pair id. Used with the purge_deletion_locks command.",
        required=False,
    )

    args = parser.parse_args()

    if args.tenant_id:
        CURRENT_TENANT_ID_CONTEXTVAR.set(args.tenant_id)

    exitcode = onyx_redis(
        command=args.command,
        batch=args.batch,
        dry_run=args.dry_run,
        ssl=args.ssl,
        host=args.host,
        port=args.port,
        db=args.db,
        password=args.password,
        user_email=args.user_email,
        cc_pair_id=args.cc_pair,
    )
    sys.exit(exitcode)


================================================
FILE: backend/scripts/debugging/onyx_vespa_schemas.py
================================================
"""Tool to generate all supported schema variations for Onyx Cloud's Vespa database.

Usage:

```
PYTHONPATH=. python scripts/debugging/onyx_vespa_schemas.py
```

Then, paste them into the existing vespa schema downloaded from the Vespa console,
and then re-zip.
"""

import argparse
import os
from pathlib import Path

import jinja2

from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
from onyx.db.enums import EmbeddingPrecision
from onyx.utils.logger import setup_logger

logger = setup_logger()


def write_schema(
    index_name: str,
    dim: int,
    embedding_precision: EmbeddingPrecision,
    template: jinja2.Template,
    output_path: Path,
) -> None:
    # Create schemas directory if it doesn't exist
    schemas_dir = output_path / "schemas"
    schemas_dir.mkdir(parents=True, exist_ok=True)

    index_filename = schemas_dir / (index_name + ".sd")

    schema = template.render(
        multi_tenant=True,
        schema_name=index_name,
        dim=dim,
        embedding_precision=embedding_precision.value,
    )

    with open(index_filename, "w", encoding="utf-8") as f:
        f.write(schema)

    logger.info(f"Wrote {index_filename}")


def generate_document_entries() -> str:
    """Generate document entries for all supported embedding models."""
    document_entries = []

    for model in SUPPORTED_EMBEDDING_MODELS:
        # Add regular index
        document_entries.append(
            f'            <document type="{model.index_name}" mode="index" />'
        )
        # Add alt index
        document_entries.append(
            f'            <document type="{model.index_name}__danswer_alt_index" mode="index" />'
        )

    return "\n".join(document_entries)


def write_cloud_services(cloud_services_template_path: str, output_path: Path) -> None:
    """Generate and write the cloud-services.xml file."""
    # Create output directory if it doesn't exist
    output_path.mkdir(parents=True, exist_ok=True)

    jinja_env = jinja2.Environment()

    with open(cloud_services_template_path, "r", encoding="utf-8") as f:
        template_str = f.read()

    template = jinja_env.from_string(template_str)
    document_entries = generate_document_entries()

    services_xml = template.render(document_elements=document_entries)

    services_file = output_path / "services.xml"
    with open(services_file, "w", encoding="utf-8") as f:
        f.write(services_xml)

    logger.info(f"Wrote {services_file}")


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Generate multi tenant Vespa schemas and services configuration"
    )
    parser.add_argument(
        "--template",
        help="The Jinja template to use for schemas",
        default="onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd.jinja",
    )
    parser.add_argument(
        "--cloud-services-template",
        help="The cloud-services.xml.jinja template path",
        default="ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja",
    )
    parser.add_argument(
        "--output-path",
        help="Output directory path (defaults to current directory)",
        default=".",
    )
    args = parser.parse_args()

    # Convert output path to Path object
    output_path = Path(args.output_path)

    jinja_env = jinja2.Environment()

    # Generate schema files
    with open(args.template, "r", encoding="utf-8") as f:
        template_str = f.read()

    template = jinja_env.from_string(template_str)

    num_indexes = 0
    for model in SUPPORTED_EMBEDDING_MODELS:
        write_schema(
            model.index_name,
            model.dim,
            model.embedding_precision,
            template,
            output_path,
        )
        write_schema(
            model.index_name + "__danswer_alt_index",
            model.dim,
            model.embedding_precision,
            template,
            output_path,
        )
        num_indexes += 2

    logger.info(f"Wrote {num_indexes} indexes.")

    # Generate cloud services configuration if template is provided
    if args.cloud_services_template:
        if os.path.exists(args.cloud_services_template):
            write_cloud_services(args.cloud_services_template, output_path)
        else:
            logger.error(
                f"Cloud services template not found: {args.cloud_services_template}"
            )


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/debugging/opensearch/benchmark_retrieval.py
================================================
#!/usr/bin/env python3
"""Benchmarks OpenSearchDocumentIndex latency.

Requires Onyx to be running as it reads search settings from the database.

Usage:
    source .venv/bin/activate
    python backend/scripts/debugging/opensearch/benchmark_retrieval.py --help
"""

import argparse
import statistics
import time

from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.context.search.enums import QueryType
from onyx.context.search.models import IndexFilters
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchDocumentIndex,
)
from onyx.indexing.models import IndexingSetting
from scripts.debugging.opensearch.constants import DEV_TENANT_ID
from scripts.debugging.opensearch.embedding_io import load_query_embedding_from_file
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id

DEFAULT_N = 50


def main() -> None:
    def add_query_embedding_argument(parser: argparse.ArgumentParser) -> None:
        parser.add_argument(
            "-e",
            "--embedding-file-path",
            type=str,
            required=True,
            help="Path to the query embedding file.",
        )

    def add_query_string_argument(parser: argparse.ArgumentParser) -> None:
        parser.add_argument(
            "-q",
            "--query",
            type=str,
            required=True,
            help="Query string.",
        )

    parser = argparse.ArgumentParser(
        description="A benchmarking tool to measure OpenSearch retrieval latency."
    )
    parser.add_argument(
        "-n",
        type=int,
        default=DEFAULT_N,
        help=f"Number of samples to take (default: {DEFAULT_N}).",
    )
    subparsers = parser.add_subparsers(
        dest="query_type",
        help="Query type to benchmark.",
        required=True,
    )

    hybrid_parser = subparsers.add_parser(
        "hybrid", help="Benchmark hybrid retrieval latency."
    )
    add_query_embedding_argument(hybrid_parser)
    add_query_string_argument(hybrid_parser)

    keyword_parser = subparsers.add_parser(
        "keyword", help="Benchmark keyword retrieval latency."
    )
    add_query_string_argument(keyword_parser)

    semantic_parser = subparsers.add_parser(
        "semantic", help="Benchmark semantic retrieval latency."
    )
    add_query_embedding_argument(semantic_parser)

    args = parser.parse_args()

    if args.n < 1:
        parser.error("Number of samples (-n) must be at least 1.")

    if MULTI_TENANT:
        CURRENT_TENANT_ID_CONTEXTVAR.set(DEV_TENANT_ID)

    SqlEngine.init_engine(pool_size=1, max_overflow=0)
    with get_session_with_current_tenant() as session:
        search_settings = get_current_search_settings(session)
        indexing_setting = IndexingSetting.from_db_model(search_settings)

    tenant_state = TenantState(
        tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
    )
    index = OpenSearchDocumentIndex(
        tenant_state=tenant_state,
        index_name=search_settings.index_name,
        embedding_dim=indexing_setting.final_embedding_dim,
        embedding_precision=indexing_setting.embedding_precision,
    )
    filters = IndexFilters(
        access_control_list=[],
        tenant_id=get_current_tenant_id(),
    )

    if args.query_type == "hybrid":
        embedding = load_query_embedding_from_file(args.embedding_file_path)
        search_callable = lambda: index.hybrid_retrieval(  # noqa: E731
            query=args.query,
            query_embedding=embedding,
            final_keywords=None,
            # This arg doesn't do anything right now.
            query_type=QueryType.KEYWORD,
            filters=filters,
            num_to_retrieve=NUM_RETURNED_HITS,
        )
    elif args.query_type == "keyword":
        search_callable = lambda: index.keyword_retrieval(  # noqa: E731
            query=args.query,
            filters=filters,
            num_to_retrieve=NUM_RETURNED_HITS,
        )
    elif args.query_type == "semantic":
        embedding = load_query_embedding_from_file(args.embedding_file_path)
        search_callable = lambda: index.semantic_retrieval(  # noqa: E731
            query_embedding=embedding,
            filters=filters,
            num_to_retrieve=NUM_RETURNED_HITS,
        )
    else:
        raise ValueError(f"Invalid query type: {args.query_type}")

    print(f"Running {args.n} invocations of {args.query_type} retrieval...")

    latencies: list[float] = []
    for i in range(args.n):
        start = time.perf_counter()
        results = search_callable()
        elapsed_ms = (time.perf_counter() - start) * 1000
        latencies.append(elapsed_ms)
        # Print the current iteration and its elapsed time on the same line.
        print(
            f"  [{i:>{len(str(args.n))}}] {elapsed_ms:7.1f} ms  ({len(results)} results) (top result doc ID, chunk idx: {results[0].document_id if results else 'N/A'}, {results[0].chunk_id if results else 'N/A'})",
            end="\r",
            flush=True,
        )

    print()
    print(f"Results over {args.n} invocations:")
    print(f"   mean: {statistics.mean(latencies):7.1f} ms")
    print(
        f"  stdev: {statistics.stdev(latencies):7.1f} ms"
        if args.n > 1
        else "  stdev: N/A (only 1 sample)"
    )
    print(f"    max: {max(latencies):7.1f} ms (i: {latencies.index(max(latencies))})")
    print(f"    min: {min(latencies):7.1f} ms (i: {latencies.index(min(latencies))})")
    if args.n >= 20:
        print(f"    p50: {statistics.median(latencies):7.1f} ms")
        print(f"    p95: {statistics.quantiles(latencies, n=20)[-1]:7.1f} ms")


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/debugging/opensearch/constants.py
================================================
DEV_TENANT_ID = "tenant_dev"


================================================
FILE: backend/scripts/debugging/opensearch/embed_and_save.py
================================================
#!/usr/bin/env python3
"""Embeds a query and saves the embedding to a file.

Requires Onyx to be running as it reads search settings from the database.

Usage:
    source .venv/bin/activate
    python backend/scripts/debugging/opensearch/embed_and_save.py --help
"""

import argparse
import time

from onyx.context.search.utils import get_query_embedding
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from scripts.debugging.opensearch.constants import DEV_TENANT_ID
from scripts.debugging.opensearch.embedding_io import save_query_embedding_to_file
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR


def main() -> None:
    parser = argparse.ArgumentParser(
        description="A tool to embed a query and save the embedding to a file."
    )
    parser.add_argument(
        "-q",
        "--query",
        type=str,
        required=True,
        help="Query string to embed.",
    )
    parser.add_argument(
        "-f",
        "--file-path",
        type=str,
        required=True,
        help="Path to the output file to save the embedding to.",
    )

    args = parser.parse_args()

    if MULTI_TENANT:
        CURRENT_TENANT_ID_CONTEXTVAR.set(DEV_TENANT_ID)

    SqlEngine.init_engine(pool_size=1, max_overflow=0)
    with get_session_with_current_tenant() as session:
        start = time.perf_counter()
        query_embedding = get_query_embedding(
            query=args.query,
            db_session=session,
            embedding_model=None,
        )
        elapsed_ms = (time.perf_counter() - start) * 1000

    save_query_embedding_to_file(query_embedding, args.file_path)
    print(
        f"Query embedding of dimension {len(query_embedding)} generated in {elapsed_ms:.1f} ms and saved to {args.file_path}."
    )


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/debugging/opensearch/embedding_io.py
================================================
from shared_configs.model_server_models import Embedding


def load_query_embedding_from_file(file_path: str) -> Embedding:
    """Returns an embedding vector read from a file.

    The file should be formatted as follows:
    - The first line should contain an integer representing the embedding
      dimension.
    - Every subsequent line should contain a float value representing a
      component of the embedding vector.
    - The size and embedding content should all be delimited by a newline.

    Args:
        file_path: Path to the file containing the embedding vector.

    Returns:
        Embedding: The embedding vector.
    """
    with open(file_path, "r") as f:
        dimension = int(f.readline().strip())
        embedding = [float(line.strip()) for line in f.readlines()]
        assert len(embedding) == dimension, "Embedding dimension mismatch."
        return embedding


def save_query_embedding_to_file(embedding: Embedding, file_path: str) -> None:
    """Saves an embedding vector to a file.

    The file will be formatted as follows:
    - The first line will contain the embedding dimension.
    - Every subsequent line will contain a float value representing a
      component of the embedding vector.
    - The size and embedding content will all be delimited by a newline.

    Args:
        embedding: The embedding vector to save.
        file_path: Path to the file to save the embedding vector to.
    """
    with open(file_path, "w") as f:
        f.write(f"{len(embedding)}\n")
        for component in embedding:
            f.write(f"{component}\n")


================================================
FILE: backend/scripts/debugging/opensearch/opensearch_debug.py
================================================
#!/usr/bin/env python3
"""A utility to interact with OpenSearch.

Usage:
    source .venv/bin/activate
    python backend/scripts/debugging/opensearch/opensearch_debug.py --help
    python backend/scripts/debugging/opensearch/opensearch_debug.py list
    python backend/scripts/debugging/opensearch/opensearch_debug.py delete <index_name>

Environment Variables:
    OPENSEARCH_HOST: OpenSearch host
    OPENSEARCH_REST_API_PORT: OpenSearch port
    OPENSEARCH_ADMIN_USERNAME: Admin username
    OPENSEARCH_ADMIN_PASSWORD: Admin password

Dependencies:
    backend/shared_configs/configs.py
    backend/onyx/document_index/opensearch/client.py
"""

import argparse
import os
import sys

from onyx.document_index.opensearch.client import OpenSearchClient
from onyx.document_index.opensearch.client import OpenSearchIndexClient
from shared_configs.configs import MULTI_TENANT


def list_indices(client: OpenSearchClient) -> None:
    indices = client.list_indices_with_info()
    print(f"Found {len(indices)} indices.")
    print("-" * 80)
    for index in sorted(indices, key=lambda x: x.name):
        print(f"Index: {index.name}")
        print(f"Health: {index.health}")
        print(f"Status: {index.status}")
        print(f"Num Primary Shards: {index.num_primary_shards}")
        print(f"Num Replica Shards: {index.num_replica_shards}")
        print(f"Docs Count: {index.docs_count}")
        print(f"Docs Deleted: {index.docs_deleted}")
        print(f"Created At: {index.created_at}")
        print(f"Total Size: {index.total_size}")
        print(f"Primary Shards Size: {index.primary_shards_size}")
        print("-" * 80)


def delete_index(client: OpenSearchIndexClient) -> None:
    if not client.index_exists():
        print(f"Index '{client._index_name}' does not exist.")
        return

    confirm = input(f"Delete index '{client._index_name}'? (yes/no): ")
    if confirm.lower() != "yes":
        print("Aborted.")
        return

    if client.delete_index():
        print(f"Deleted index '{client._index_name}'.")
    else:
        print(f"Failed to delete index '{client._index_name}' for an unknown reason.")


def main() -> None:
    def add_standard_arguments(parser: argparse.ArgumentParser) -> None:
        parser.add_argument(
            "--host",
            help="OpenSearch host. If not provided, will fall back to OPENSEARCH_HOST, then prompt for input.",
            type=str,
            default=os.environ.get("OPENSEARCH_HOST", ""),
        )
        parser.add_argument(
            "--port",
            help="OpenSearch port. If not provided, will fall back to OPENSEARCH_REST_API_PORT, then prompt for input.",
            type=int,
            default=int(os.environ.get("OPENSEARCH_REST_API_PORT", 0)),
        )
        parser.add_argument(
            "--username",
            help="OpenSearch username. If not provided, will fall back to OPENSEARCH_ADMIN_USERNAME, then prompt for input.",
            type=str,
            default=os.environ.get("OPENSEARCH_ADMIN_USERNAME", ""),
        )
        parser.add_argument(
            "--password",
            help="OpenSearch password. If not provided, will fall back to OPENSEARCH_ADMIN_PASSWORD, then prompt for input.",
            type=str,
            default=os.environ.get("OPENSEARCH_ADMIN_PASSWORD", ""),
        )
        parser.add_argument(
            "--no-ssl", help="Disable SSL.", action="store_true", default=False
        )
        parser.add_argument(
            "--no-verify-certs",
            help="Disable certificate verification (for self-signed certs).",
            action="store_true",
            default=False,
        )
        parser.add_argument(
            "--use-aws-managed-opensearch",
            help="Whether to use AWS-managed OpenSearch. If not provided, will fall back to checking "
            "USING_AWS_MANAGED_OPENSEARCH=='true', then default to False.",
            action=argparse.BooleanOptionalAction,
            default=os.environ.get("USING_AWS_MANAGED_OPENSEARCH", "").lower()
            == "true",
        )

    parser = argparse.ArgumentParser(
        description="A utility to interact with OpenSearch."
    )
    add_standard_arguments(parser)
    subparsers = parser.add_subparsers(
        dest="command", help="Command to execute.", required=True
    )

    subparsers.add_parser("list", help="List all indices with info.")

    delete_parser = subparsers.add_parser("delete", help="Delete an index.")
    delete_parser.add_argument("index", help="Index name.", type=str)

    args = parser.parse_args()

    if not (host := args.host or input("Enter the OpenSearch host: ")):
        print("Error: OpenSearch host is required.")
        sys.exit(1)
    if not (port := args.port or int(input("Enter the OpenSearch port: "))):
        print("Error: OpenSearch port is required.")
        sys.exit(1)
    if not (username := args.username or input("Enter the OpenSearch username: ")):
        print("Error: OpenSearch username is required.")
        sys.exit(1)
    if not (password := args.password or input("Enter the OpenSearch password: ")):
        print("Error: OpenSearch password is required.")
        sys.exit(1)
    print("Using AWS-managed OpenSearch: ", args.use_aws_managed_opensearch)
    print(f"MULTI_TENANT: {MULTI_TENANT}")

    with (
        OpenSearchIndexClient(
            index_name=args.index,
            host=host,
            port=port,
            auth=(username, password),
            use_ssl=not args.no_ssl,
            verify_certs=not args.no_verify_certs,
        )
        if args.command == "delete"
        else OpenSearchClient(
            host=host,
            port=port,
            auth=(username, password),
            use_ssl=not args.no_ssl,
            verify_certs=not args.no_verify_certs,
        )
    ) as client:
        if not client.ping():
            print("Error: Could not connect to OpenSearch.")
            sys.exit(1)

        if args.command == "list":
            list_indices(client)
        elif args.command == "delete":
            delete_index(client)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/debugging/opensearch/query_hierarchy_debug.py
================================================
#!/usr/bin/env python3
"""
Debug utility for querying and inspecting hierarchy data in OpenSearch.

This script connects to OpenSearch and allows you to:
- Query documents by ID and view their hierarchy ancestor node IDs
- List documents that have hierarchy data

Usage:
    python query_hierarchy_debug.py --document-id <doc_id>
    python query_hierarchy_debug.py --list-with-hierarchy

Environment Variables:
    OPENSEARCH_HOST: OpenSearch host (default: localhost)
    OPENSEARCH_PORT: OpenSearch port (default: 9200)

Dependencies:
    pip install opensearch-py
"""

import argparse
import os
import sys

try:
    from opensearchpy import OpenSearch
except ImportError as e:
    print("Error: Missing dependency. Run: pip install opensearch-py")
    print(f"Details: {e}")
    sys.exit(1)


def get_client() -> OpenSearch:
    """Create OpenSearch client from environment variables."""
    host = os.environ.get("OPENSEARCH_HOST", "localhost")
    port = int(os.environ.get("OPENSEARCH_PORT", "9200"))
    return OpenSearch(
        hosts=[{"host": host, "port": port}],
        http_auth=None,  # Add auth if needed
        use_ssl=False,
    )


def query_document(client: OpenSearch, index: str, doc_id: str) -> None:
    """Query a specific document and view its hierarchy ancestor node IDs."""
    query = {"query": {"term": {"document_id": doc_id}}, "size": 10}

    result = client.search(index=index, body=query)
    hits = result.get("hits", {}).get("hits", [])

    if not hits:
        print(f"No document found with ID: {doc_id}")
        return

    print(f"Found {len(hits)} chunk(s) for document ID: {doc_id}\n")

    for hit in hits:
        source = hit.get("_source", {})
        ancestor_ids = source.get("ancestor_hierarchy_node_ids", [])

        print(f"  Chunk Index: {source.get('chunk_index')}")
        print(f"  Semantic ID: {source.get('semantic_identifier', 'N/A')}")

        if ancestor_ids:
            print(f"  Ancestor Node IDs: {ancestor_ids}")
        else:
            print("  Ancestor Node IDs: (none)")
        print()


def list_with_hierarchy(client: OpenSearch, index: str, limit: int = 10) -> None:
    """List documents that have hierarchy data."""
    query = {
        "query": {"exists": {"field": "ancestor_hierarchy_node_ids"}},
        "size": limit,
        "_source": [
            "document_id",
            "chunk_index",
            "ancestor_hierarchy_node_ids",
            "semantic_identifier",
        ],
    }

    result = client.search(index=index, body=query)
    hits = result.get("hits", {}).get("hits", [])

    print(f"Found {len(hits)} document chunks with hierarchy data (limit: {limit}):\n")

    for hit in hits:
        source = hit.get("_source", {})
        ancestor_ids = source.get("ancestor_hierarchy_node_ids", [])

        print(f"  {source.get('document_id')} (chunk {source.get('chunk_index')})")
        print(f"    Semantic ID: {source.get('semantic_identifier', 'N/A')}")
        print(f"    Ancestors: {ancestor_ids}\n")


def list_indices(client: OpenSearch) -> None:
    """List available indices."""
    indices = client.indices.get_alias(index="*")
    print("Available indices:")
    for index_name in sorted(indices.keys()):
        if not index_name.startswith("."):  # Skip system indices
            print(f"  - {index_name}")


def main() -> None:
    parser = argparse.ArgumentParser(description="Debug hierarchy data in OpenSearch")
    parser.add_argument("--document-id", help="Query a specific document by ID")
    parser.add_argument(
        "--list-with-hierarchy",
        action="store_true",
        help="List documents with hierarchy data",
    )
    parser.add_argument("--list-indices", action="store_true", help="List all indices")
    parser.add_argument("--index", default="onyx_index", help="OpenSearch index name")
    parser.add_argument("--limit", type=int, default=10, help="Limit for list queries")

    args = parser.parse_args()

    client = get_client()

    if args.list_indices:
        list_indices(client)
    elif args.document_id:
        query_document(client, args.index, args.document_id)
    elif args.list_with_hierarchy:
        list_with_hierarchy(client, args.index, args.limit)
    else:
        parser.print_help()


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/decrypt.py
================================================
"""Decrypt a raw hex-encoded credential value.

Usage:
    python -m scripts.decrypt <hex_value>
    python -m scripts.decrypt <hex_value> --key "my-encryption-key"
    python -m scripts.decrypt <hex_value> --key ""

Pass --key "" to skip decryption and just decode the raw bytes as UTF-8.
Omit --key to use the current ENCRYPTION_KEY_SECRET from the environment.
"""

import argparse
import binascii
import json
import os
import sys

parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)

from onyx.utils.encryption import decrypt_bytes_to_string  # noqa: E402
from onyx.utils.variable_functionality import global_version  # noqa: E402


def decrypt_raw_credential(encrypted_value: str, key: str | None = None) -> None:
    """Decrypt and display a raw encrypted credential value.

    Args:
        encrypted_value: The hex-encoded encrypted credential value.
        key: Encryption key to use. None means use ENCRYPTION_KEY_SECRET,
             empty string means just decode as UTF-8.
    """
    # Strip common hex prefixes
    if encrypted_value.startswith("\\x"):
        encrypted_value = encrypted_value[2:]
    elif encrypted_value.startswith("x"):
        encrypted_value = encrypted_value[1:]
    print(encrypted_value)

    try:
        raw_bytes = binascii.unhexlify(encrypted_value)
    except binascii.Error:
        print("Error: Invalid hex-encoded string")
        sys.exit(1)

    if key == "":
        # Empty key → just decode as UTF-8, no decryption
        try:
            decrypted_str = raw_bytes.decode("utf-8")
        except UnicodeDecodeError as e:
            print(f"Error decoding bytes as UTF-8: {e}")
            sys.exit(1)
    else:
        print(key)
        try:
            decrypted_str = decrypt_bytes_to_string(raw_bytes, key=key)
        except Exception as e:
            print(f"Error decrypting value: {e}")
            sys.exit(1)

    # Try to pretty-print as JSON, otherwise print raw
    try:
        parsed = json.loads(decrypted_str)
        print(json.dumps(parsed, indent=2))
    except json.JSONDecodeError:
        print(decrypted_str)


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Decrypt a hex-encoded credential value."
    )
    parser.add_argument(
        "value",
        help="Hex-encoded encrypted value to decrypt.",
    )
    parser.add_argument(
        "--key",
        default=None,
        help=(
            "Encryption key. Omit to use ENCRYPTION_KEY_SECRET from env. "
            'Pass "" (empty) to just decode as UTF-8 without decryption.'
        ),
    )
    args = parser.parse_args()

    global_version.set_ee()
    decrypt_raw_credential(args.value, key=args.key)
    global_version.unset_ee()


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/dev_run_background_jobs.py
================================================
import subprocess
import threading


def monitor_process(process_name: str, process: subprocess.Popen) -> None:
    assert process.stdout is not None

    while True:
        output = process.stdout.readline()

        if output:
            print(f"{process_name}: {output.strip()}")

        if process.poll() is not None:
            break


def run_jobs() -> None:
    cmd_worker_primary = [
        "celery",
        "-A",
        "onyx.background.celery.versioned_apps.primary",
        "worker",
        "--pool=threads",
        "--concurrency=6",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=primary@%n",
        "-Q",
        "celery",
    ]

    cmd_worker_light = [
        "celery",
        "-A",
        "onyx.background.celery.versioned_apps.light",
        "worker",
        "--pool=threads",
        "--concurrency=16",
        "--prefetch-multiplier=8",
        "--loglevel=INFO",
        "--hostname=light@%n",
        "-Q",
        "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,opensearch_migration",
    ]

    cmd_worker_docprocessing = [
        "celery",
        "-A",
        "onyx.background.celery.versioned_apps.docprocessing",
        "worker",
        "--pool=threads",
        "--concurrency=6",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=docprocessing@%n",
        "--queues=docprocessing",
    ]

    cmd_worker_docfetching = [
        "celery",
        "-A",
        "onyx.background.celery.versioned_apps.docfetching",
        "worker",
        "--pool=threads",
        "--concurrency=1",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=docfetching@%n",
        "--queues=connector_doc_fetching",
    ]

    cmd_worker_heavy = [
        "celery",
        "-A",
        "onyx.background.celery.versioned_apps.heavy",
        "worker",
        "--pool=threads",
        "--concurrency=4",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=heavy@%n",
        "-Q",
        "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,sandbox",
    ]

    cmd_worker_monitoring = [
        "celery",
        "-A",
        "onyx.background.celery.versioned_apps.monitoring",
        "worker",
        "--pool=threads",
        "--concurrency=1",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=monitoring@%n",
        "-Q",
        "monitoring",
    ]

    cmd_worker_user_file_processing = [
        "celery",
        "-A",
        "onyx.background.celery.versioned_apps.user_file_processing",
        "worker",
        "--pool=threads",
        "--concurrency=2",
        "--prefetch-multiplier=1",
        "--loglevel=INFO",
        "--hostname=user_file_processing@%n",
        "-Q",
        "user_file_processing,user_file_project_sync,user_file_delete",
    ]

    cmd_beat = [
        "celery",
        "-A",
        "onyx.background.celery.versioned_apps.beat",
        "beat",
        "--loglevel=INFO",
    ]

    all_workers = [
        ("PRIMARY", cmd_worker_primary),
        ("LIGHT", cmd_worker_light),
        ("DOCPROCESSING", cmd_worker_docprocessing),
        ("DOCFETCHING", cmd_worker_docfetching),
        ("HEAVY", cmd_worker_heavy),
        ("MONITORING", cmd_worker_monitoring),
        ("USER_FILE_PROCESSING", cmd_worker_user_file_processing),
        ("BEAT", cmd_beat),
    ]

    processes = []
    for name, cmd in all_workers:
        process = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
        )
        processes.append((name, process))

    threads = []
    for name, process in processes:
        thread = threading.Thread(target=monitor_process, args=(name, process))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()


if __name__ == "__main__":
    run_jobs()


================================================
FILE: backend/scripts/docker_memory_tracking.sh
================================================
#!/bin/bash

# USAGE: nohup ./docker_memory_tracking.sh &

# Set default output file or use the provided argument
OUTPUT_FILE="./docker_stats.log"
if [ $# -ge 1 ]; then
    OUTPUT_FILE="$1"
fi

INTERVAL_SECONDS=600  # 10 minutes

# Create the output file if it doesn't exist, or append to it if it does
touch "$OUTPUT_FILE"

echo "Docker stats will be collected every 10 minutes and saved to $OUTPUT_FILE"
echo "Press Ctrl+C to stop the script"

# Function to handle script termination
cleanup() {
    echo -e "\nStopping docker stats collection"
    exit 0
}

# Set up trap for clean exit
trap cleanup SIGINT SIGTERM

# Main loop
while true; do
    # Add timestamp
    echo -e "\n--- Docker Stats: $(date) ---" >> "$OUTPUT_FILE"
    
    # Run docker stats for a single snapshot (--no-stream ensures it runs once)
    docker stats --no-stream --all >> "$OUTPUT_FILE"
    
    # Wait for the next interval
    echo "Stats collected at $(date). Next collection in 10 minutes."
    sleep $INTERVAL_SECONDS
done


================================================
FILE: backend/scripts/force_delete_connector_by_id.py
================================================
import argparse
import os
import sys

from sqlalchemy import delete
from sqlalchemy.orm import Session

from onyx.db.document import delete_documents_complete__no_commit
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.search_settings import get_active_search_settings
from onyx.db.tag import delete_orphan_tags__no_commit
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

# Modify sys.path
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)

# pylint: disable=E402
# flake8: noqa: E402

# Now import Onyx modules
from onyx.db.models import (
    DocumentSet__ConnectorCredentialPair,
    UserGroup__ConnectorCredentialPair,
)
from onyx.db.connector import fetch_connector_by_id
from onyx.db.document import get_documents_for_connector_credential_pair
from onyx.db.index_attempt import (
    delete_index_attempts,
    cancel_indexing_attempts_for_ccpair,
)
from onyx.db.permission_sync_attempt import (
    delete_doc_permission_sync_attempts__no_commit,
)
from onyx.db.permission_sync_attempt import (
    delete_external_group_permission_sync_attempts__no_commit,
)
from onyx.db.models import ConnectorCredentialPair
from onyx.document_index.interfaces import DocumentIndex
from onyx.utils.logger import setup_logger
from onyx.configs.constants import DocumentSource
from onyx.db.connector_credential_pair import (
    get_connector_credential_pair_from_id,
    get_connector_credential_pair,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.document_index.factory import (
    get_all_document_indices,
)
from onyx.file_store.file_store import get_default_file_store

# pylint: enable=E402
# flake8: noqa: E402


logger = setup_logger()

_DELETION_BATCH_SIZE = 1000


def _unsafe_deletion(
    db_session: Session,
    document_indices: list[DocumentIndex],
    cc_pair: ConnectorCredentialPair,
    pair_id: int,
) -> int:
    connector_id = cc_pair.connector_id
    credential_id = cc_pair.credential_id

    num_docs_deleted = 0

    # Gather and delete documents
    while True:
        documents = get_documents_for_connector_credential_pair(
            db_session=db_session,
            connector_id=connector_id,
            credential_id=credential_id,
            limit=_DELETION_BATCH_SIZE,
        )
        if not documents:
            break

        for document in documents:
            for document_index in document_indices:
                document_index.delete_single(
                    doc_id=document.id,
                    tenant_id=POSTGRES_DEFAULT_SCHEMA,
                    chunk_count=document.chunk_count,
                )

        delete_documents_complete__no_commit(
            db_session=db_session,
            document_ids=[document.id for document in documents],
        )
        delete_orphan_tags__no_commit(db_session=db_session)

        num_docs_deleted += len(documents)

    # Delete index attempts
    delete_index_attempts(
        db_session=db_session,
        cc_pair_id=cc_pair.id,
    )

    # Delete permission sync attempts
    delete_doc_permission_sync_attempts__no_commit(
        db_session=db_session,
        cc_pair_id=cc_pair.id,
    )
    delete_external_group_permission_sync_attempts__no_commit(
        db_session=db_session,
        cc_pair_id=cc_pair.id,
    )

    # Delete document sets
    stmt = delete(DocumentSet__ConnectorCredentialPair).where(
        DocumentSet__ConnectorCredentialPair.connector_credential_pair_id == pair_id
    )
    db_session.execute(stmt)

    # delete user group associations
    stmt = delete(UserGroup__ConnectorCredentialPair).where(
        UserGroup__ConnectorCredentialPair.cc_pair_id == pair_id
    )
    db_session.execute(stmt)

    # need to flush to avoid foreign key violations
    db_session.flush()

    # delete the actual connector credential pair
    stmt = delete(ConnectorCredentialPair).where(
        ConnectorCredentialPair.connector_id == connector_id,
        ConnectorCredentialPair.credential_id == credential_id,
    )
    db_session.execute(stmt)

    # Delete Connector
    connector = fetch_connector_by_id(
        db_session=db_session,
        connector_id=connector_id,
    )
    if not connector or not len(connector.credentials):
        logger.debug("Found no credentials left for connector, deleting connector")
        db_session.delete(connector)
    db_session.commit()

    logger.notice(
        "Successfully deleted connector_credential_pair with connector_id:"
        f" '{connector_id}' and credential_id: '{credential_id}'. Deleted {num_docs_deleted} docs."
    )
    return num_docs_deleted


def _delete_connector(cc_pair_id: int, db_session: Session) -> None:
    user_input = input(
        "DO NOT USE THIS UNLESS YOU KNOW WHAT YOU ARE DOING. \
        IT MAY CAUSE ISSUES with your Onyx instance! \
        Are you SURE you want to continue? (enter 'Y' to continue): "
    )
    if user_input != "Y":
        logger.notice(f"You entered {user_input}. Exiting!")
        return

    logger.notice("Getting connector credential pair")
    cc_pair = get_connector_credential_pair_from_id(
        db_session=db_session,
        cc_pair_id=cc_pair_id,
    )

    if not cc_pair:
        logger.error(f"Connector credential pair with ID {cc_pair_id} not found")
        return

    if cc_pair.status == ConnectorCredentialPairStatus.ACTIVE:
        logger.error(
            f"Connector {cc_pair.connector.name} is active, cannot continue. \
            Please navigate to the connector and pause before attempting again"
        )
        return

    connector_id = cc_pair.connector_id
    credential_id = cc_pair.credential_id

    if cc_pair is None:
        logger.error(
            f"Connector with ID '{connector_id}' and credential ID "
            f"'{credential_id}' does not exist. Has it already been deleted?",
        )
        return

    logger.notice("Cancelling indexing attempt for the connector")
    cancel_indexing_attempts_for_ccpair(
        cc_pair_id=cc_pair_id, db_session=db_session, include_secondary_index=True
    )

    validated_cc_pair = get_connector_credential_pair(
        db_session=db_session,
        connector_id=connector_id,
        credential_id=credential_id,
    )

    if not validated_cc_pair:
        logger.error(
            f"Cannot run deletion attempt - connector_credential_pair with Connector ID: "
            f"{connector_id} and Credential ID: {credential_id} does not exist."
        )

    file_ids: list[str] = (
        cc_pair.connector.connector_specific_config["file_locations"]
        if cc_pair.connector.source == DocumentSource.FILE
        else []
    )
    try:
        logger.notice("Deleting information from Vespa and Postgres")
        active_search_settings = get_active_search_settings(db_session)
        # This flow is for deletion so we get all indices.
        document_indices = get_all_document_indices(
            active_search_settings.primary,
            active_search_settings.secondary,
            None,
        )

        files_deleted_count = _unsafe_deletion(
            db_session=db_session,
            document_indices=document_indices,
            cc_pair=cc_pair,
            pair_id=cc_pair_id,
        )
        logger.notice(f"Deleted {files_deleted_count} files!")

    except Exception as e:
        logger.error(f"Failed to delete connector due to {e}")

    if file_ids:
        logger.notice("Deleting stored files!")
        file_store = get_default_file_store()
        for file_id in file_ids:
            logger.notice(f"Deleting file {file_id}")
            file_store.delete_file(file_id)

    db_session.commit()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Delete a connector by its ID")
    parser.add_argument(
        "connector_id", type=int, help="The ID of the connector to delete"
    )

    args = parser.parse_args()
    with get_session_with_current_tenant() as db_session:
        _delete_connector(args.connector_id, db_session)


================================================
FILE: backend/scripts/get_wikidocs.py
================================================
#!/usr/bin/env python3
"""
Script to pull Wikipedia documents from Hugging Face and organize them into zip files.

Usage:
    python get_wikidocs.py --total 1000 --per-zip 100 --output ./wikidata_zips
"""

import argparse
import os
import re
import zipfile
from pathlib import Path

from datasets import load_dataset  # type: ignore
from tqdm import tqdm  # type: ignore


def sanitize_filename(title: str) -> str:
    """
    Sanitize a title for use as a filename.

    - Remove special characters
    - Replace whitespaces with underscores
    - Limit length to avoid filesystem issues

    Args:
        title: The Wikipedia page title

    Returns:
        Sanitized filename string
    """
    # Replace whitespace with underscores
    sanitized = re.sub(r"\s+", "_", title)

    # Remove special characters, keep alphanumeric, underscores, and hyphens
    sanitized = re.sub(r"[^a-zA-Z0-9_\-]", "", sanitized)

    # Limit length to 200 characters to avoid filesystem issues
    if len(sanitized) > 200:
        sanitized = sanitized[:200]

    # Ensure it's not empty after sanitization
    if not sanitized:
        sanitized = "untitled"

    return sanitized


def stream_wikipedia_to_zips(
    total_pages: int,
    pages_per_zip: int,
    output_dir: str = ".",
    dataset_name: str = "wikipedia",
    dataset_config: str = "20220301.en",
) -> None:
    """
    Stream Wikipedia pages from Hugging Face and write them to zip files.

    Args:
        total_pages: Total number of Wikipedia pages to download
        pages_per_zip: Number of pages to include in each zip file
        output_dir: Directory where zip files will be saved
        dataset_name: Name of the dataset on Hugging Face
        dataset_config: Configuration/version of the dataset
    """
    # Create output directory if it doesn't exist
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    print("Loading Wikipedia dataset from Hugging Face (streaming mode)...")
    print(f"Dataset: {dataset_name}, Config: {dataset_config}")

    # Load dataset in streaming mode
    dataset = load_dataset(
        dataset_name,
        dataset_config,
        split="train",
        streaming=True,
        trust_remote_code=True,
    )

    # Initialize counters
    current_zip_index = 0
    pages_in_current_zip = 0
    current_zip = None
    zip_path = None

    # Process pages with progress bar
    with tqdm(total=total_pages, desc="Processing Wikipedia pages") as pbar:
        for idx, page in enumerate(dataset):
            if idx >= total_pages:
                break

            # Create new zip file if needed
            if pages_in_current_zip == 0 or pages_in_current_zip >= pages_per_zip:
                # Close previous zip if exists
                if current_zip is not None:
                    current_zip.close()
                    print(f"\nCompleted: {zip_path} ({pages_in_current_zip} pages)")

                # Create new zip
                zip_path = output_path / f"wiki_data_{current_zip_index}.zip"
                current_zip = zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED)
                current_zip_index += 1
                pages_in_current_zip = 0

            # Extract page data
            title = page.get("title", f"page_{idx}")
            text = page.get("text", "")

            # Create sanitized filename
            filename = f"{sanitize_filename(title)}.txt"

            # Ensure current_zip is not None (should always be created in the if block above)
            if current_zip is None:
                raise RuntimeError("Zip file was not properly initialized")

            # Handle potential duplicate filenames within the same zip
            base_filename = filename
            counter = 1
            while filename in current_zip.namelist():
                name, ext = os.path.splitext(base_filename)
                filename = f"{name}_{counter}{ext}"
                counter += 1

            # Write page content to zip
            page_content = f"Title: {title}\n\n{text}"
            current_zip.writestr(filename, page_content)

            pages_in_current_zip += 1
            pbar.update(1)

    # Close final zip file
    if current_zip is not None:
        current_zip.close()
        print(f"\nCompleted: {zip_path} ({pages_in_current_zip} pages)")

    print(f"\nSuccessfully created {current_zip_index} zip file(s) in {output_dir}")
    print(f"Total pages processed: {min(total_pages, idx + 1)}")


def main() -> int:
    """Main entry point for the script."""
    parser = argparse.ArgumentParser(
        description="Pull Wikipedia documents from Hugging Face and organize into zip files",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        "--total",
        type=int,
        required=True,
        help="Total number of Wikipedia pages to download",
    )

    parser.add_argument(
        "--per-zip",
        type=int,
        required=True,
        help="Number of pages to include in each zip file",
    )

    parser.add_argument(
        "--output", type=str, default=".", help="Output directory for zip files"
    )

    parser.add_argument(
        "--dataset",
        type=str,
        default="wikipedia",
        help="Name of the Wikipedia dataset on Hugging Face",
    )

    parser.add_argument(
        "--config",
        type=str,
        default="20220301.en",
        help="Dataset configuration (e.g., '20220301.en' for English Wikipedia from March 2022)",
    )

    args = parser.parse_args()

    # Validate arguments
    if args.total <= 0:
        parser.error("--total must be a positive integer")

    if args.per_zip <= 0:
        parser.error("--per-zip must be a positive integer")

    print("=" * 70)
    print("Wikipedia Data Extractor")
    print("=" * 70)
    print(f"Total pages: {args.total}")
    print(f"Pages per zip: {args.per_zip}")
    print(f"Output directory: {args.output}")
    print(f"Expected zip files: {(args.total + args.per_zip - 1) // args.per_zip}")
    print("=" * 70)
    print()

    try:
        stream_wikipedia_to_zips(
            total_pages=args.total,
            pages_per_zip=args.per_zip,
            output_dir=args.output,
            dataset_name=args.dataset,
            dataset_config=args.config,
        )
    except KeyboardInterrupt:
        print("\n\nProcess interrupted by user")
    except Exception as e:
        print(f"\nError: {e}")
        import traceback

        traceback.print_exc()
        return 1

    return 0


if __name__ == "__main__":
    exit(main())


================================================
FILE: backend/scripts/hard_delete_chats.py
================================================
import os
import sys


# Ensure PYTHONPATH is set up for direct script execution
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
print(parent_dir)
sys.path.append(parent_dir)

from onyx.db.engine.sql_engine import get_session_with_current_tenant  # noqa: E402
from onyx.db.engine.sql_engine import SqlEngine  # noqa: E402
from onyx.db.models import ChatSession  # noqa: E402
from onyx.db.chat import delete_chat_session  # noqa: E402


def main() -> None:
    SqlEngine.init_engine(pool_size=20, max_overflow=5)

    with get_session_with_current_tenant() as db_session:
        deleted_sessions = (
            db_session.query(ChatSession).filter(ChatSession.deleted.is_(True)).all()
        )
        if not deleted_sessions:
            print("No deleted chat sessions found.")
            return
        print(f"Found {len(deleted_sessions)} deleted chat sessions:")
        for session in deleted_sessions:
            print(f"  - ID: {session.id} | deleted: {session.deleted}")
        confirm = input(
            "\nAre you sure you want to hard delete these sessions? Type 'yes' to confirm: "
        )
        if confirm.strip().lower() != "yes":
            print("Aborted by user.")
            return
        total = 0
        for session in deleted_sessions:
            print(f"Deleting {session.id}")
            try:
                delete_chat_session(
                    user_id=None,
                    chat_session_id=session.id,
                    db_session=db_session,
                    include_deleted=True,
                    hard_delete=True,
                )
                total += 1
            except Exception as e:
                print(f"Error deleting session {session.id}: {e}")
        print(f"Deleted {total}")


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/lib/logger.py
================================================
from __future__ import annotations

import logging
import os
import sys

# Detect CI environment
IS_CI = os.getenv("CI", "").lower() == "true"
IS_DEBUG = os.getenv("DEBUG", "").lower() == "true"

# ANSI color codes for local terminal
GRAY = "\033[90m"
RED = "\033[91m"
YELLOW = "\033[93m"
CYAN = "\033[96m"
RESET = "\033[0m"


class CIFormatter(logging.Formatter):
    """
    Formatter that emits GitHub Actions workflow commands in CI,
    or colored output locally.
    """

    def format(self, record: logging.LogRecord) -> str:
        msg = record.getMessage()
        metadata = getattr(record, "extra", {})

        # Use standard extra fields as GitHub Actions metadata
        meta_fields = ["file", "line", "col", "endLine", "endColumn"]
        metadata = {k: getattr(record, k) for k in meta_fields if hasattr(record, k)}

        if IS_CI and record.levelno >= logging.WARNING:
            command = "error" if record.levelno >= logging.ERROR else "warning"
            meta_str = ",".join(f"{k}={v}" for k, v in metadata.items())
            if meta_str:
                return f"::{command} {meta_str}::{msg}"
            else:
                return f"::{command}::{msg}"

        # Local colored output
        if record.levelno >= logging.ERROR:
            return f"{RED}Error:{RESET} {msg}"
        elif record.levelno >= logging.WARNING:
            return f"{YELLOW}Warning:{RESET} {msg}"
        elif record.levelno >= logging.INFO:
            return f"{CYAN}Info:{RESET} {msg}"
        elif record.levelno >= logging.DEBUG:
            return f"{GRAY}Debug:{RESET} {msg}"
        return msg


def getLogger(name: str | None = None, level: int | None = None) -> logging.Logger:
    """
    Get a CI-aware logger.
    """
    logger = logging.getLogger(name)
    if level is None:
        level = logging.DEBUG if IS_DEBUG else logging.INFO
    logger.setLevel(level)

    if not logger.hasHandlers():
        handler = logging.StreamHandler(sys.stdout)
        handler.setFormatter(CIFormatter())
        logger.addHandler(handler)

    return logger


================================================
FILE: backend/scripts/make_foss_repo.sh
================================================
#!/usr/bin/env bash
set -euo pipefail

echo "=== Building FOSS mirror ==="
rm -rf /tmp/foss_repo && mkdir -p /tmp/foss_repo
git clone . /tmp/foss_repo
cd /tmp/foss_repo

echo "=== Creating MIT license file ==="
cat > /tmp/mit_license.txt << 'EOF'
Copyright (c) 2023-present DanswerAI, Inc.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
EOF

# NOTE: intentionally keeping the web/src/app/ee directory
# for now since there's no clean way to remove it
echo "=== Removing enterprise directory and licenses from history ==="
git filter-repo \
  --path backend/ee --invert-paths \
  --path backend/ee/LICENSE --invert-paths \
  --path web/src/app/ee/LICENSE --invert-paths \
  --force

# NOTE: not ideal, since this means every day folks with the repo
# locally will need to hard reset if they want to pull in more stuff.
echo "=== Recreating empty enterprise directory ==="
mkdir -p backend/ee
touch backend/ee/__init__.py
git add backend/ee

echo "=== Updating README ==="

cat > /tmp/foss_notice.txt << 'EOF'

> [!NOTE]
> **This is the FOSS (Free and Open Source Software) version of Onyx**
> 
> This repository is 100% MIT-licensed and automatically synced with the [main Onyx repository](https://github.com/onyx-dot-app/onyx). The [main repository](https://github.com/onyx-dot-app/onyx) is recommended for most users. This FOSS version is maintained for users with strict open-source licensing requirements.
> 
> ---

EOF

sed -i '/<a name="readme-top"><\/a>/r /tmp/foss_notice.txt' README.md
sed -i 's/utm_source=onyx_repo/utm_source=foss_repo/g' README.md

git add README.md
git commit -m "README"

echo "=== Creating blob callback script ==="
cat > /tmp/license_replacer.py << 'PYEOF'
#!/usr/bin/env python3
import sys

# Read MIT license from file
with open('/tmp/mit_license.txt', 'rb') as f:
    MIT_LICENSE = f.read()

import git_filter_repo as fr

replaced_count = 0

def replace_license_blob_content(blob, metadata):
    """Replace LICENSE blob content with MIT license based on content detection"""
    global replaced_count

    # Check if this blob looks like a license file
    # We'll replace any blob that contains the old Apache/custom license text
    if blob.data and len(blob.data) > 100:
        # Check for license-like content
        # Unfortunately, we don't have access to the path, so we can't just check that the path
        # is `LICENSE`.
        data_lower = blob.data.lower()
        if (
            b'portions of this software are licensed as follows' in data_lower and
            b'all third party components incorporated into the' in data_lower
        ):
            # Additional check: make sure it's actually a license file, not source code
            # License files typically don't have common code patterns
            if b'def ' not in blob.data and b'class ' not in blob.data and b'import ' not in blob.data[:200]:
                blob.data = MIT_LICENSE
                replaced_count += 1

args = fr.FilteringOptions.parse_args(['--force'], error_on_empty=False)
filter_obj = fr.RepoFilter(args, blob_callback=replace_license_blob_content)
filter_obj.run()

print(f"Replaced {replaced_count} LICENSE blob(s)", file=sys.stderr)
PYEOF

echo "=== Replacing LICENSE file in all commits ==="
chmod +x /tmp/license_replacer.py
/tmp/license_replacer.py

echo "=== Done building FOSS repo ==="


================================================
FILE: backend/scripts/onyx_openapi_schema.py
================================================
# export openapi schema without having to start the actual web server

# helpful tips: https://github.com/fastapi/fastapi/issues/1173

import argparse
import json
import os
import subprocess
import sys

from fastapi import FastAPI
from fastapi.openapi.utils import get_openapi

from onyx.main import app as app_fn

OPENAPI_VERSION = "3.1.0"


def go(filename: str, tagged_for_docs: str | None = None) -> None:
    """Generate OpenAPI schema.

    By default outputs tag-stripped schema (for client generation).
    If tagged_for_docs is provided, also outputs the original tagged version for docs.
    """
    app: FastAPI = app_fn()
    app.openapi_version = OPENAPI_VERSION
    schema = get_openapi(
        title=app.title,
        version=app.version,
        openapi_version=app.openapi_version,
        description=app.description,
        routes=app.routes,
    )

    # Output tagged version for docs if requested
    if tagged_for_docs:
        with open(tagged_for_docs, "w") as f:
            json.dump(schema, f)
        print(f"Wrote tagged OpenAPI schema to {tagged_for_docs}")

    # Output stripped version (default) for client generation
    stripped = strip_tags_from_schema(schema)
    with open(filename, "w") as f:
        json.dump(stripped, f)
    print(f"Wrote OpenAPI schema to {filename}.")


def strip_tags_from_schema(schema: dict) -> dict:
    """Strip tags from OpenAPI schema so openapi-generator puts all endpoints in DefaultApi."""
    import copy

    schema = copy.deepcopy(schema)

    # Remove tags from all operations
    if "paths" in schema:
        for path_item in schema["paths"].values():
            for operation in path_item.values():
                if isinstance(operation, dict) and "tags" in operation:
                    del operation["tags"]

    # Remove top-level tags definition
    if "tags" in schema:
        del schema["tags"]

    return schema


def generate_client(openapi_json_path: str, strip_tags: bool = True) -> None:
    """Generate Python client from OpenAPI schema using openapi-generator."""
    import tempfile

    output_dir = os.path.join(os.path.dirname(openapi_json_path), "onyx_openapi_client")

    # Optionally strip tags so all endpoints go under DefaultApi
    schema_path = openapi_json_path
    if strip_tags:
        with open(openapi_json_path) as f:
            schema = json.load(f)
        stripped = strip_tags_from_schema(schema)
        fd, schema_path = tempfile.mkstemp(suffix=".json")
        with os.fdopen(fd, "w") as f:
            json.dump(stripped, f)
        print(f"Stripped tags from schema, using temp file: {schema_path}")

    cmd = [
        "openapi-generator",
        "generate",
        "-i",
        schema_path,
        "-g",
        "python",
        "-o",
        output_dir,
        "--package-name",
        "onyx_openapi_client",
        "--skip-validate-spec",
        "--openapi-normalizer",
        "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true",
    ]

    print("Running openapi-generator...")
    try:
        result = subprocess.run(cmd)
        if result.returncode == 0:
            print(f"Generated Python client at {output_dir}")
        else:
            print(
                "Failed to generate Python client. See backend/tests/integration/README.md for setup instructions.",
                file=sys.stderr,
            )
    finally:
        # Clean up temp file if we created one
        if strip_tags and schema_path != openapi_json_path:
            os.unlink(schema_path)


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Export OpenAPI schema for Onyx API (does not require starting API server)"
    )
    parser.add_argument(
        "--filename", "-f", help="Filename to write to", default="openapi.json"
    )
    parser.add_argument(
        "--generate-python-client",
        action="store_true",
        help="Generate Python client schemas (needed for integration tests)",
    )
    parser.add_argument(
        "--tagged-for-docs",
        help="Also output a tagged version for API docs (specify output path)",
    )

    args = parser.parse_args()
    go(args.filename, tagged_for_docs=args.tagged_for_docs)

    if args.generate_python_client:
        # Schema is already stripped by go(), no need to strip again
        generate_client(args.filename, strip_tags=False)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/orphan_doc_cleanup_script.py
================================================
import concurrent.futures
import os
import sys

from sqlalchemy import text
from sqlalchemy.orm import Session

from onyx.document_index.document_index_utils import get_multipass_config
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

# makes it so `PYTHONPATH=.` is not required when running this script
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)

from onyx.context.search.models import IndexFilters  # noqa: E402
from onyx.document_index.interfaces import VespaChunkRequest  # noqa: E402
from onyx.db.engine.sql_engine import get_session_with_current_tenant  # noqa: E402
from onyx.db.document import delete_documents_complete__no_commit  # noqa: E402
from onyx.db.tag import delete_orphan_tags__no_commit  # noqa: E402
from onyx.db.search_settings import get_current_search_settings  # noqa: E402
from onyx.document_index.vespa.index import VespaIndex  # noqa: E402
from onyx.db.document import get_document  # noqa: E402

BATCH_SIZE = 100


def _get_orphaned_document_ids(db_session: Session, limit: int) -> list[str]:
    """Get document IDs that don't have any entries in document_by_connector_credential_pair"""
    query = text(
        """
        SELECT d.id
        FROM document d
        LEFT JOIN document_by_connector_credential_pair dbcc ON d.id = dbcc.id
        WHERE dbcc.id IS NULL
        LIMIT :limit
    """
    )
    orphaned_ids = [doc_id[0] for doc_id in db_session.execute(query, {"limit": limit})]
    print(f"Found {len(orphaned_ids)} orphaned documents in this batch")
    return orphaned_ids


def main() -> None:
    with get_session_with_current_tenant() as db_session:
        total_processed = 0
        while True:
            # Get orphaned document IDs in batches
            orphaned_ids = _get_orphaned_document_ids(db_session, BATCH_SIZE)
            if not orphaned_ids:
                if total_processed == 0:
                    print("No orphaned documents found")
                else:
                    print(
                        f"Finished processing all batches. Total documents processed: {total_processed}"
                    )
                return

            # Setup Vespa index
            search_settings = get_current_search_settings(db_session)
            multipass_config = get_multipass_config(search_settings)
            index_name = search_settings.index_name
            vespa_index = VespaIndex(
                index_name=index_name,
                secondary_index_name=None,
                large_chunks_enabled=multipass_config.enable_large_chunks,
                secondary_large_chunks_enabled=None,
            )

            # Delete chunks from Vespa first
            print("Deleting orphaned document chunks from Vespa")
            successfully_vespa_deleted_doc_ids: list[str] = []
            # Process documents in parallel using ThreadPoolExecutor
            with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:

                def process_doc(doc_id: str) -> str | None:
                    document = get_document(doc_id, db_session)
                    if not document:
                        return None
                    # Check if document exists in Vespa first
                    try:
                        chunks = vespa_index.id_based_retrieval(
                            chunk_requests=[
                                VespaChunkRequest(document_id=doc_id, max_chunk_ind=2)
                            ],
                            filters=IndexFilters(access_control_list=None),
                            batch_retrieval=True,
                        )
                        if not chunks:
                            print(f"Document {doc_id} not found in Vespa")
                            return doc_id
                    except Exception as e:
                        print(
                            f"Error checking if document {doc_id} exists in Vespa: {e}"
                        )
                        return None

                    try:
                        print(f"Deleting document {doc_id} in Vespa")
                        chunks_deleted = vespa_index.delete_single(
                            doc_id,
                            tenant_id=POSTGRES_DEFAULT_SCHEMA,
                            chunk_count=document.chunk_count,
                        )
                        if chunks_deleted > 0:
                            print(
                                f"Deleted {chunks_deleted} chunks for document {doc_id}"
                            )
                        return doc_id
                    except Exception as e:
                        print(
                            f"Error deleting document {doc_id} in Vespa and will not delete from Postgres: {e}"
                        )
                        return None

                # Submit all tasks and gather results
                futures = [
                    executor.submit(process_doc, doc_id) for doc_id in orphaned_ids
                ]
                for future in concurrent.futures.as_completed(futures):
                    doc_id = future.result()
                    if doc_id:
                        successfully_vespa_deleted_doc_ids.append(doc_id)

            # Delete documents from Postgres
            print("Deleting orphaned documents from Postgres")
            try:
                delete_documents_complete__no_commit(
                    db_session, successfully_vespa_deleted_doc_ids
                )
                delete_orphan_tags__no_commit(db_session)
                db_session.commit()
            except Exception as e:
                print(f"Error deleting documents from Postgres: {e}")
                break

            total_processed += len(successfully_vespa_deleted_doc_ids)
            print(
                f"Successfully cleaned up {len(successfully_vespa_deleted_doc_ids)} orphaned documents in this batch"
            )
            print(f"Total documents processed so far: {total_processed}")


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/query_time_check/seed_dummy_docs.py
================================================
"""
launch:
- api server
- postgres
- vespa
- model server (this is only needed so the api server can startup, no embedding is done)

Run this script to seed the database with dummy documents.
Then run test_query_times.py to test query times.
"""

import random
from datetime import datetime

from onyx.access.models import DocumentAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.document_index_utils import get_multipass_config
from onyx.document_index.vespa.index import VespaIndex
from onyx.indexing.indexing_pipeline import IndexBatchParams
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.utils.timing import log_function_time
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.model_server_models import Embedding

TOTAL_DOC_SETS = 8
TOTAL_ACL_ENTRIES_PER_CATEGORY = 80


def generate_random_embedding(dim: int) -> Embedding:
    return [random.uniform(-1, 1) for _ in range(dim)]


def generate_random_identifier() -> str:
    return f"dummy_doc_{random.randint(1, 1000)}"


def generate_dummy_chunk(
    doc_id: str,
    chunk_id: int,
    embedding_dim: int,
    number_of_acl_entries: int,
    number_of_document_sets: int,
) -> DocMetadataAwareIndexChunk:
    document = Document(
        id=doc_id,
        source=DocumentSource.GOOGLE_DRIVE,
        sections=[],
        metadata={},
        semantic_identifier=generate_random_identifier(),
    )

    chunk = IndexChunk(
        chunk_id=chunk_id,
        blurb=f"Blurb for chunk {chunk_id} of document {doc_id}.",
        content=f"Content for chunk {chunk_id} of document {doc_id}. This is dummy text for testing purposes.",
        source_links={},
        section_continuation=False,
        source_document=document,
        title_prefix=f"Title prefix for doc {doc_id}",
        metadata_suffix_semantic="",
        metadata_suffix_keyword="",
        doc_summary="",
        chunk_context="",
        mini_chunk_texts=None,
        contextual_rag_reserved_tokens=0,
        embeddings=ChunkEmbedding(
            full_embedding=generate_random_embedding(embedding_dim),
            mini_chunk_embeddings=[],
        ),
        title_embedding=generate_random_embedding(embedding_dim),
        large_chunk_id=None,
        large_chunk_reference_ids=[],
        image_file_id=None,
    )

    document_set_names = []
    for i in range(number_of_document_sets):
        document_set_names.append(f"Document Set {i}")

    user_emails: list[str | None] = []
    user_groups: list[str] = []
    external_user_emails: list[str] = []
    external_user_group_ids: list[str] = []
    for i in range(number_of_acl_entries):
        user_emails.append(f"user_{i}@example.com")
        user_groups.append(f"group_{i}")
        external_user_emails.append(f"external_user_{i}@example.com")
        external_user_group_ids.append(f"external_group_{i}")

    return DocMetadataAwareIndexChunk.from_index_chunk(
        index_chunk=chunk,
        user_project=[],
        personas=[],
        access=DocumentAccess.build(
            user_emails=user_emails,
            user_groups=user_groups,
            external_user_emails=external_user_emails,
            external_user_group_ids=external_user_group_ids,
            is_public=random.choice([True, False]),
        ),
        document_sets={document_set for document_set in document_set_names},
        boost=random.randint(-1, 1),
        aggregated_chunk_boost_factor=random.random(),
        tenant_id=POSTGRES_DEFAULT_SCHEMA,
    )


@log_function_time()
def do_insertion(
    vespa_index: VespaIndex, all_chunks: list[DocMetadataAwareIndexChunk]
) -> None:
    insertion_records = vespa_index.index(
        chunks=all_chunks,
        index_batch_params=IndexBatchParams(
            doc_id_to_previous_chunk_cnt={},
            doc_id_to_new_chunk_cnt={},
            tenant_id=POSTGRES_DEFAULT_SCHEMA,
            large_chunks_enabled=False,
        ),
    )
    print(f"Indexed {len(insertion_records)} documents.")
    print(
        f"New documents: {sum(1 for record in insertion_records if not record.already_existed)}"
    )
    print(
        f"Existing documents updated: {sum(1 for record in insertion_records if record.already_existed)}"
    )


@log_function_time()
def seed_dummy_docs(
    number_of_document_sets: int,
    number_of_acl_entries: int,
    num_docs: int = 1000,
    chunks_per_doc: int = 5,
    batch_size: int = 100,
) -> None:
    with get_session_with_current_tenant() as db_session:
        search_settings = get_current_search_settings(db_session)
        multipass_config = get_multipass_config(search_settings)
        index_name = search_settings.index_name
        embedding_dim = search_settings.final_embedding_dim

    vespa_index = VespaIndex(
        index_name=index_name,
        secondary_index_name=None,
        large_chunks_enabled=multipass_config.enable_large_chunks,
        secondary_large_chunks_enabled=None,
    )
    print(index_name)

    all_chunks = []
    chunk_count = 0
    for doc_num in range(num_docs):
        doc_id = f"dummy_doc_{doc_num}_{datetime.now().isoformat()}"
        for chunk_num in range(chunks_per_doc):
            chunk = generate_dummy_chunk(
                doc_id=doc_id,
                chunk_id=chunk_num,
                embedding_dim=embedding_dim,
                number_of_acl_entries=number_of_acl_entries,
                number_of_document_sets=number_of_document_sets,
            )
            all_chunks.append(chunk)
            chunk_count += 1

            if len(all_chunks) >= chunks_per_doc * batch_size:
                do_insertion(vespa_index, all_chunks)
                print(
                    f"Indexed {chunk_count} chunks out of {num_docs * chunks_per_doc}."
                )
                print(
                    f"percentage: {chunk_count / (num_docs * chunks_per_doc) * 100:.2f}% \n"
                )
                all_chunks = []

    if all_chunks:
        do_insertion(vespa_index, all_chunks)


if __name__ == "__main__":
    seed_dummy_docs(
        number_of_document_sets=TOTAL_DOC_SETS,
        number_of_acl_entries=TOTAL_ACL_ENTRIES_PER_CATEGORY,
        num_docs=100000,
        chunks_per_doc=5,
        batch_size=1000,
    )


================================================
FILE: backend/scripts/query_time_check/test_query_times.py
================================================
# """
# RUN THIS AFTER SEED_DUMMY_DOCS.PY
# """

# import random
# import time

# from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType
# from onyx.configs.constants import DocumentSource
# from onyx.configs.model_configs import DOC_EMBEDDING_DIM
# from onyx.context.search.models import IndexFilters
# from onyx.db.engine.sql_engine import get_session_with_current_tenant
# from onyx.db.search_settings import get_current_search_settings
# from onyx.document_index.document_index_utils import get_multipass_config
# from onyx.document_index.vespa.index import VespaIndex
# from scripts.query_time_check.seed_dummy_docs import TOTAL_ACL_ENTRIES_PER_CATEGORY
# from scripts.query_time_check.seed_dummy_docs import TOTAL_DOC_SETS
# from shared_configs.model_server_models import Embedding

# # make sure these are smaller than TOTAL_ACL_ENTRIES_PER_CATEGORY and TOTAL_DOC_SETS, respectively
# NUMBER_OF_ACL_ENTRIES_PER_QUERY = 6
# NUMBER_OF_DOC_SETS_PER_QUERY = 2


# def get_slowest_99th_percentile(results: list[float]) -> float:
#     return sorted(results)[int(0.99 * len(results))]


# # Generate random filters
# def _random_filters() -> IndexFilters:
#     """
#     Generate random filters for the query containing:
#     - NUMBER_OF_ACL_ENTRIES_PER_QUERY user emails
#     - NUMBER_OF_ACL_ENTRIES_PER_QUERY groups
#     - NUMBER_OF_ACL_ENTRIES_PER_QUERY external groups
#     - NUMBER_OF_DOC_SETS_PER_QUERY document sets
#     """
#     access_control_list = [
#         f"user_email:user_{random.randint(0, TOTAL_ACL_ENTRIES_PER_CATEGORY - 1)}@example.com",
#     ]
#     acl_indices = random.sample(
#         range(TOTAL_ACL_ENTRIES_PER_CATEGORY), NUMBER_OF_ACL_ENTRIES_PER_QUERY
#     )
#     for i in acl_indices:
#         access_control_list.append(f"group:group_{acl_indices[i]}")
#         access_control_list.append(f"external_group:external_group_{acl_indices[i]}")

#     doc_sets = []
#     doc_set_indices = random.sample(
#         range(TOTAL_DOC_SETS), NUMBER_OF_ACL_ENTRIES_PER_QUERY
#     )
#     for i in doc_set_indices:
#         doc_sets.append(f"document_set:Document Set {doc_set_indices[i]}")

#     return IndexFilters(
#         source_type=[DocumentSource.GOOGLE_DRIVE],
#         document_set=doc_sets,
#         tags=[],
#         access_control_list=access_control_list,
#     )


# def test_hybrid_retrieval_times(
#     number_of_queries: int,
# ) -> None:
#     with get_session_with_current_tenant() as db_session:
#         search_settings = get_current_search_settings(db_session)
#         multipass_config = get_multipass_config(search_settings)
#         index_name = search_settings.index_name

#     vespa_index = VespaIndex(
#         index_name=index_name,
#         secondary_index_name=None,
#         large_chunks_enabled=multipass_config.enable_large_chunks,
#         secondary_large_chunks_enabled=None,
#     )

#     # Generate random queries
#     queries = [f"Random Query {i}" for i in range(number_of_queries)]

#     # Generate random embeddings
#     embeddings = [
#         Embedding([random.random() for _ in range(DOC_EMBEDDING_DIM)])
#         for _ in range(number_of_queries)
#     ]

#     total_time = 0.0
#     results = []
#     for i in range(number_of_queries):
#         start_time = time.time()

#         vespa_index.hybrid_retrieval(
#             query=queries[i],
#             query_embedding=embeddings[i],
#             final_keywords=None,
#             filters=_random_filters(),
#             hybrid_alpha=0.5,
#             time_decay_multiplier=1.0,
#             num_to_retrieve=50,
#             ranking_profile_type=QueryExpansionType.SEMANTIC,
#             offset=0,
#             title_content_ratio=0.5,
#         )

#         end_time = time.time()
#         query_time = end_time - start_time
#         total_time += query_time
#         results.append(query_time)

#         print(f"Query {i+1}: {query_time:.4f} seconds")

#     avg_time = total_time / number_of_queries
#     fast_time = min(results)
#     slow_time = max(results)
#     ninety_ninth_percentile = get_slowest_99th_percentile(results)
#     # Write results to a file
#     _OUTPUT_PATH = "query_times_results_large_more.txt"
#     with open(_OUTPUT_PATH, "w") as f:
#         f.write(f"Average query time: {avg_time:.4f} seconds\n")
#         f.write(f"Fastest query: {fast_time:.4f} seconds\n")
#         f.write(f"Slowest query: {slow_time:.4f} seconds\n")
#         f.write(f"99th percentile: {ninety_ninth_percentile:.4f} seconds\n")
#     print(f"Results written to {_OUTPUT_PATH}")

#     print(f"\nAverage query time: {avg_time:.4f} seconds")
#     print(f"Fastest query: {fast_time:.4f} seconds")
#     print(f"Slowest query: {max(results):.4f} seconds")
#     print(f"99th percentile: {get_slowest_99th_percentile(results):.4f} seconds")


# if __name__ == "__main__":
#     test_hybrid_retrieval_times(number_of_queries=1000)


================================================
FILE: backend/scripts/reencrypt_secrets.py
================================================
"""Re-encrypt secrets under the current ENCRYPTION_KEY_SECRET.

Decrypts all encrypted columns using the old key (or raw decode if the old key
is empty), then re-encrypts them with the current ENCRYPTION_KEY_SECRET.

Usage (docker):
    docker exec -it onyx-api_server-1 \
        python -m scripts.reencrypt_secrets --old-key "previous-key"

Usage (kubernetes):
    kubectl exec -it <pod> -- \
        python -m scripts.reencrypt_secrets --old-key "previous-key"

Omit --old-key (or pass "") if secrets were not previously encrypted.

For multi-tenant deployments, pass --tenant-id to target a specific tenant,
or --all-tenants to iterate every tenant.
"""

import argparse
import os
import sys

parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)

from onyx.db.rotate_encryption_key import rotate_encryption_key  # noqa: E402
from onyx.db.engine.sql_engine import get_session_with_tenant  # noqa: E402
from onyx.db.engine.sql_engine import SqlEngine  # noqa: E402
from onyx.db.engine.tenant_utils import get_all_tenant_ids  # noqa: E402
from onyx.utils.variable_functionality import global_version  # noqa: E402
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA  # noqa: E402


def _run_for_tenant(tenant_id: str, old_key: str | None, dry_run: bool = False) -> None:
    print(f"Re-encrypting secrets for tenant: {tenant_id}")
    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        results = rotate_encryption_key(db_session, old_key=old_key, dry_run=dry_run)

    if results:
        for col, count in results.items():
            print(
                f"  {col}: {count} row(s) {'would be ' if dry_run else ''}re-encrypted"
            )
    else:
        print("No rows needed re-encryption.")


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Re-encrypt secrets under the current encryption key."
    )
    parser.add_argument(
        "--old-key",
        default=None,
        help="Previous encryption key. Omit or pass empty string if not applicable.",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show what would be re-encrypted without making changes.",
    )

    tenant_group = parser.add_mutually_exclusive_group()
    tenant_group.add_argument(
        "--tenant-id",
        default=None,
        help="Target a specific tenant schema.",
    )
    tenant_group.add_argument(
        "--all-tenants",
        action="store_true",
        help="Iterate all tenants.",
    )

    args = parser.parse_args()

    old_key = args.old_key if args.old_key else None

    global_version.set_ee()
    SqlEngine.init_engine(pool_size=5, max_overflow=2)

    if args.dry_run:
        print("DRY RUN — no changes will be made")

    if args.all_tenants:
        tenant_ids = get_all_tenant_ids()
        print(f"Found {len(tenant_ids)} tenant(s)")
        failed_tenants: list[str] = []
        for tid in tenant_ids:
            try:
                _run_for_tenant(tid, old_key, dry_run=args.dry_run)
            except Exception as e:
                print(f"  ERROR for tenant {tid}: {e}")
                failed_tenants.append(tid)
        if failed_tenants:
            print(f"FAILED tenants ({len(failed_tenants)}): {failed_tenants}")
            sys.exit(1)
    else:
        tenant_id = args.tenant_id or POSTGRES_DEFAULT_SCHEMA
        _run_for_tenant(tenant_id, old_key, dry_run=args.dry_run)

    print("Done.")


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/reset_indexes.py
================================================
# This file is purely for development use, not included in any builds
import os
import sys
from time import sleep

import requests
from requests.exceptions import RequestException

# makes it so `PYTHONPATH=.` is not required when running this script
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)

from onyx.configs.app_configs import DOCUMENT_INDEX_NAME  # noqa: E402
from onyx.document_index.vespa.index import DOCUMENT_ID_ENDPOINT  # noqa: E402
from onyx.utils.logger import setup_logger  # noqa: E402

logger = setup_logger()


def wipe_vespa_index() -> bool:
    """
    Wipes the Vespa index by deleting all documents.
    """
    continuation = None
    should_continue = True
    RETRIES = 3

    while should_continue:
        params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
        if continuation:
            params["continuation"] = continuation

        for attempt in range(RETRIES):
            try:
                response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
                response.raise_for_status()

                response_json = response.json()
                logger.info(f"Response: {response_json}")

                continuation = response_json.get("continuation")
                should_continue = bool(continuation)
                break  # Exit the retry loop if the request is successful

            except RequestException:
                logger.exception("Request failed")
                sleep(2**attempt)  # Exponential backoff
        else:
            logger.error(f"Max retries ({RETRIES}) exceeded. Exiting.")
            return False

    return True


def main() -> int:
    """
    Main function to execute the script.
    """
    try:
        succeeded = wipe_vespa_index()
    except Exception:
        logger.exception("wipe_vespa_index exceptioned.")
        return 1

    if not succeeded:
        logger.info("Vespa index wipe failed.")
        return 0

    logger.info("Vespa index wiped successfully.")
    return 1


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: backend/scripts/reset_postgres.py
================================================
import os
import sys

import psycopg2
from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_sqlalchemy_engine

# makes it so `PYTHONPATH=.` is not required when running this script
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)

from onyx.configs.app_configs import POSTGRES_DB  # noqa: E402
from onyx.configs.app_configs import POSTGRES_HOST  # noqa: E402
from onyx.configs.app_configs import POSTGRES_PASSWORD  # noqa: E402
from onyx.configs.app_configs import POSTGRES_PORT  # noqa: E402
from onyx.configs.app_configs import POSTGRES_USER  # noqa: E402
from onyx.db.credentials import create_initial_public_credential  # noqa: E402


def wipe_all_rows(database: str) -> None:
    conn = psycopg2.connect(
        dbname=database,
        user=POSTGRES_USER,
        password=POSTGRES_PASSWORD,
        host=POSTGRES_HOST,
        port=POSTGRES_PORT,
    )
    cur = conn.cursor()

    # Disable triggers to prevent foreign key constraints from being checked
    cur.execute("SET session_replication_role = 'replica';")

    # Fetch all table names in the current database
    cur.execute(
        """
        SELECT tablename
        FROM pg_tables
        WHERE schemaname = 'public'
    """
    )

    tables = cur.fetchall()

    for table in tables:
        table_name = table[0]

        # Don't touch migration history
        if table_name == "alembic_version":
            continue

        print(f"Deleting all rows from {table_name}...")
        cur.execute(f'DELETE FROM "{table_name}"')

    # Re-enable triggers
    cur.execute("SET session_replication_role = 'origin';")

    conn.commit()
    cur.close()
    conn.close()
    print("Finished wiping all rows.")


if __name__ == "__main__":
    print("Cleaning up all Onyx tables")
    wipe_all_rows(POSTGRES_DB)
    with Session(get_sqlalchemy_engine(), expire_on_commit=False) as db_session:
        create_initial_public_credential(db_session)
    print("To keep data consistent, it's best to wipe the document index as well.")
    print(
        "To be safe, it's best to restart the Onyx services (API Server and Background Tasks"
    )


================================================
FILE: backend/scripts/restart_containers.sh
================================================
#!/bin/bash
set -e

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
COMPOSE_FILE="$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.yml"
COMPOSE_DEV_FILE="$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.dev.yml"

stop_and_remove_containers() {
  docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
  docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
  docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled stop opensearch 2>/dev/null || true
  docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled rm -f opensearch 2>/dev/null || true
}

cleanup() {
  echo "Error occurred. Cleaning up..."
  stop_and_remove_containers
}

# Trap errors and output a message, then cleanup
trap 'echo "Error occurred on line $LINENO. Exiting script." >&2; cleanup' ERR

# Usage of the script with optional volume arguments
# ./restart_containers.sh [vespa_volume] [postgres_volume] [redis_volume]
# [minio_volume] [--keep-opensearch-data]

KEEP_OPENSEARCH_DATA=false
POSITIONAL_ARGS=()
for arg in "$@"; do
    if [[ "$arg" == "--keep-opensearch-data" ]]; then
        KEEP_OPENSEARCH_DATA=true
    else
        POSITIONAL_ARGS+=("$arg")
    fi
done

VESPA_VOLUME=${POSITIONAL_ARGS[0]:-""}
POSTGRES_VOLUME=${POSITIONAL_ARGS[1]:-""}
REDIS_VOLUME=${POSITIONAL_ARGS[2]:-""}
MINIO_VOLUME=${POSITIONAL_ARGS[3]:-""}

# Stop and remove the existing containers
echo "Stopping and removing existing containers..."
stop_and_remove_containers

# Start the PostgreSQL container with optional volume
echo "Starting PostgreSQL container..."
if [[ -n "$POSTGRES_VOLUME" ]]; then
    docker run -p 5432:5432 --name onyx_postgres -e POSTGRES_PASSWORD=password -d -v $POSTGRES_VOLUME:/var/lib/postgresql/data postgres -c max_connections=250
else
    docker run -p 5432:5432 --name onyx_postgres -e POSTGRES_PASSWORD=password -d postgres -c max_connections=250
fi

# Start the Vespa container with optional volume
echo "Starting Vespa container..."
if [[ -n "$VESPA_VOLUME" ]]; then
    docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 -v $VESPA_VOLUME:/opt/vespa/var vespaengine/vespa:8
else
    docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8
fi

# If OPENSEARCH_ADMIN_PASSWORD is not already set, try loading it from
# .vscode/.env so existing dev setups that stored it there aren't silently
# broken.
VSCODE_ENV="$SCRIPT_DIR/../../.vscode/.env"
if [[ -z "${OPENSEARCH_ADMIN_PASSWORD:-}" && -f "$VSCODE_ENV" ]]; then
    set -a
    # shellcheck source=/dev/null
    source "$VSCODE_ENV"
    set +a
fi

# Start the OpenSearch container using the same service from docker-compose that
# our users use, setting OPENSEARCH_INITIAL_ADMIN_PASSWORD from the env's
# OPENSEARCH_ADMIN_PASSWORD if it exists, else defaulting to StrongPassword123!.
# Pass --keep-opensearch-data to preserve the opensearch-data volume across
# restarts, else the volume is deleted so the container starts fresh.
if [[ "$KEEP_OPENSEARCH_DATA" == "false" ]]; then
    echo "Deleting opensearch-data volume..."
    docker volume rm onyx_opensearch-data 2>/dev/null || true
fi
echo "Starting OpenSearch container..."
docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled up --force-recreate -d opensearch

# Start the Redis container with optional volume
echo "Starting Redis container..."
if [[ -n "$REDIS_VOLUME" ]]; then
    docker run --detach --name onyx_redis --publish 6379:6379 -v $REDIS_VOLUME:/data redis
else
    docker run --detach --name onyx_redis --publish 6379:6379 redis
fi

# Start the MinIO container with optional volume
echo "Starting MinIO container..."
if [[ -n "$MINIO_VOLUME" ]]; then
    docker run --detach --name onyx_minio --publish 9004:9000 --publish 9005:9001 -e MINIO_ROOT_USER=minioadmin -e MINIO_ROOT_PASSWORD=minioadmin -v $MINIO_VOLUME:/data minio/minio server /data --console-address ":9001"
else
    docker run --detach --name onyx_minio --publish 9004:9000 --publish 9005:9001 -e MINIO_ROOT_USER=minioadmin -e MINIO_ROOT_PASSWORD=minioadmin minio/minio server /data --console-address ":9001"
fi

# Start the Code Interpreter container
echo "Starting Code Interpreter container..."
docker run --detach --name onyx_code_interpreter --publish 8000:8000 --user root -v /var/run/docker.sock:/var/run/docker.sock onyxdotapp/code-interpreter:latest bash ./entrypoint.sh code-interpreter-api

# Ensure alembic runs in the correct directory (backend/)
PARENT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PARENT_DIR"

# Give Postgres a second to start
sleep 1

# Alembic should be configured in the virtualenv for this repo
if [[ -f "../.venv/bin/activate" ]]; then
    source ../.venv/bin/activate
else
    echo "Warning: Python virtual environment not found at .venv/bin/activate; alembic may not work."
fi

# Run Alembic upgrade
echo "Running Alembic migration..."
alembic upgrade head

# Run the following instead of the above if using MT cloud
# alembic -n schema_private upgrade head

echo "Containers restarted and migration completed."


================================================
FILE: backend/scripts/resume_paused_connectors.py
================================================
import argparse

import requests

API_SERVER_URL = "http://localhost:3000"
API_KEY = "onyx-api-key"  # API key here, if auth is enabled


def resume_paused_connectors(
    api_server_url: str,
    api_key: str | None,
    specific_connector_sources: list[str] | None = None,
) -> None:
    headers = {"Content-Type": "application/json"}
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"

    # Get all paused connectors
    response = requests.post(
        f"{api_server_url}/api/manage/admin/connector/indexing-status",
        headers=headers,
        json={"get_all_connectors": True},
    )
    response.raise_for_status()

    indexing_status_response = response.json()

    # Iterate over all connectors and resume paused ones
    for connectors_by_source in indexing_status_response:
        if (
            specific_connector_sources
            and connectors_by_source["source"] not in specific_connector_sources
        ):
            print(f"Skipping connector source: {connectors_by_source['source']}")
            continue
        connectors = connectors_by_source["indexing_statuses"]
        for connector in connectors:
            if connector.get("cc_pair_status"):
                if connector["cc_pair_status"] == "PAUSED":
                    print(f"Resuming connector: {connector['name']}")
                    response = requests.put(
                        f"{api_server_url}/api/manage/admin/cc-pair/{connector['cc_pair_id']}/status",
                        json={"status": "ACTIVE"},
                        headers=headers,
                    )
                    response.raise_for_status()
                    print(f"Resumed connector: {connector['name']}")
                else:
                    print(f"Connector {connector['name']} is not paused")
            else:
                print(f"Connector {connector['name']} is a Federated Connector")


def main() -> None:
    parser = argparse.ArgumentParser(description="Resume paused connectors")
    parser.add_argument(
        "--api_server_url",
        type=str,
        default=API_SERVER_URL,
        help="The URL of the API server to use. If not provided, will use the default.",
    )
    parser.add_argument(
        "--api_key",
        type=str,
        default=None,
        help="The API key to use for authentication. If not provided, no authentication will be used.",
    )
    parser.add_argument(
        "--connector_sources",
        type=str.lower,
        nargs="+",
        help="The sources of the connectors to resume. If not provided, will resume all paused connectors.",
    )
    args = parser.parse_args()

    resume_paused_connectors(args.api_server_url, args.api_key, args.connector_sources)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/run_industryrag_bench_questions.py
================================================
from __future__ import annotations

import argparse
import asyncio
import json
import logging
import sys
import time
from dataclasses import asdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from typing import TypedDict
from typing import TypeGuard

import aiohttp


logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(message)s",
)
logger = logging.getLogger(__name__)


DEFAULT_API_BASE = "http://localhost:3000"
INTERNAL_SEARCH_TOOL_NAME = "internal_search"
INTERNAL_SEARCH_IN_CODE_TOOL_ID = "SearchTool"
MAX_REQUEST_ATTEMPTS = 5
RETRIABLE_STATUS_CODES = {429, 500, 502, 503, 504}
QUESTION_TIMEOUT_SECONDS = 300
QUESTION_RETRY_PAUSE_SECONDS = 30
MAX_QUESTION_ATTEMPTS = 3


@dataclass(frozen=True)
class QuestionRecord:
    question_id: str
    question: str


@dataclass(frozen=True)
class AnswerRecord:
    question_id: str
    answer: str
    document_ids: list[str]


@dataclass(frozen=True)
class FailedQuestionRecord:
    question_id: str
    error: str


class Citation(TypedDict, total=False):
    citation_number: int
    document_id: str


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description=(
            "Submit questions to Onyx chat with internal search forced and write "
            "answers to a JSONL file."
        )
    )
    parser.add_argument(
        "--questions-file",
        type=Path,
        required=True,
        help="Path to the input questions JSONL file.",
    )
    parser.add_argument(
        "--output-file",
        type=Path,
        required=True,
        help="Path to the output answers JSONL file.",
    )
    parser.add_argument(
        "--api-key",
        type=str,
        required=True,
        help="API key used to authenticate against Onyx.",
    )
    parser.add_argument(
        "--api-base",
        type=str,
        default=DEFAULT_API_BASE,
        help=(
            "Frontend base URL for Onyx. If `/api` is omitted, it will be added "
            f"automatically. Default: {DEFAULT_API_BASE}"
        ),
    )
    parser.add_argument(
        "--parallelism",
        type=int,
        default=1,
        help="Number of questions to process in parallel. Default: 1.",
    )
    parser.add_argument(
        "--max-questions",
        type=int,
        default=None,
        help="Optional cap on how many questions to process. Defaults to all.",
    )
    return parser.parse_args()


def normalize_api_base(api_base: str) -> str:
    normalized = api_base.rstrip("/")
    if normalized.endswith("/api"):
        return normalized
    return f"{normalized}/api"


def load_completed_question_ids(output_file: Path) -> set[str]:
    if not output_file.exists():
        return set()

    completed_ids: set[str] = set()
    with output_file.open("r", encoding="utf-8") as file:
        for line in file:
            stripped = line.strip()
            if not stripped:
                continue
            try:
                record = json.loads(stripped)
            except json.JSONDecodeError:
                continue
            question_id = record.get("question_id")
            if isinstance(question_id, str) and question_id:
                completed_ids.add(question_id)

    return completed_ids


def load_questions(questions_file: Path) -> list[QuestionRecord]:
    if not questions_file.exists():
        raise FileNotFoundError(f"Questions file not found: {questions_file}")

    questions: list[QuestionRecord] = []
    with questions_file.open("r", encoding="utf-8") as file:
        for line_number, line in enumerate(file, start=1):
            stripped_line = line.strip()
            if not stripped_line:
                continue

            try:
                payload = json.loads(stripped_line)
            except json.JSONDecodeError as exc:
                raise ValueError(
                    f"Invalid JSON on line {line_number} of {questions_file}"
                ) from exc

            question_id = payload.get("question_id")
            question = payload.get("question")

            if not isinstance(question_id, str) or not question_id:
                raise ValueError(
                    f"Line {line_number} is missing a non-empty `question_id`."
                )
            if not isinstance(question, str) or not question:
                raise ValueError(
                    f"Line {line_number} is missing a non-empty `question`."
                )

            questions.append(QuestionRecord(question_id=question_id, question=question))

    return questions


async def read_json_response(
    response: aiohttp.ClientResponse,
) -> dict[str, Any] | list[dict[str, Any]]:
    response_text = await response.text()
    if response.status >= 400:
        raise RuntimeError(
            f"Request to {response.url} failed with {response.status}: {response_text}"
        )

    try:
        payload = json.loads(response_text)
    except json.JSONDecodeError as exc:
        raise RuntimeError(
            f"Request to {response.url} returned non-JSON content: {response_text}"
        ) from exc

    if not isinstance(payload, (dict, list)):
        raise RuntimeError(
            f"Unexpected response payload type from {response.url}: {type(payload)}"
        )

    return payload


async def request_json_with_retries(
    session: aiohttp.ClientSession,
    method: str,
    url: str,
    headers: dict[str, str],
    json_payload: dict[str, Any] | None = None,
) -> dict[str, Any] | list[dict[str, Any]]:
    backoff_seconds = 1.0

    for attempt in range(1, MAX_REQUEST_ATTEMPTS + 1):
        try:
            async with session.request(
                method=method,
                url=url,
                headers=headers,
                json=json_payload,
            ) as response:
                if (
                    response.status in RETRIABLE_STATUS_CODES
                    and attempt < MAX_REQUEST_ATTEMPTS
                ):
                    response_text = await response.text()
                    logger.warning(
                        "Retryable response from %s on attempt %s/%s: %s %s",
                        url,
                        attempt,
                        MAX_REQUEST_ATTEMPTS,
                        response.status,
                        response_text,
                    )
                    await asyncio.sleep(backoff_seconds)
                    backoff_seconds *= 2
                    continue

                return await read_json_response(response)
        except (aiohttp.ClientError, asyncio.TimeoutError) as exc:
            if attempt == MAX_REQUEST_ATTEMPTS:
                raise RuntimeError(
                    f"Request to {url} failed after {MAX_REQUEST_ATTEMPTS} attempts."
                ) from exc

            logger.warning(
                "Request to %s failed on attempt %s/%s: %s",
                url,
                attempt,
                MAX_REQUEST_ATTEMPTS,
                exc,
            )
            await asyncio.sleep(backoff_seconds)
            backoff_seconds *= 2

    raise RuntimeError(f"Request to {url} failed unexpectedly.")


def extract_document_ids(citation_info: object) -> list[str]:
    if not isinstance(citation_info, list):
        return []

    sorted_citations = sorted(
        (citation for citation in citation_info if _is_valid_citation(citation)),
        key=_citation_sort_key,
    )

    document_ids: list[str] = []
    seen_document_ids: set[str] = set()
    for citation in sorted_citations:
        document_id = citation["document_id"]
        if document_id not in seen_document_ids:
            seen_document_ids.add(document_id)
            document_ids.append(document_id)

    return document_ids


def _is_valid_citation(citation: object) -> TypeGuard[Citation]:
    return (
        isinstance(citation, dict)
        and isinstance(citation.get("document_id"), str)
        and bool(citation["document_id"])
    )


def _citation_sort_key(citation: Citation) -> int:
    citation_number = citation.get("citation_number")
    if isinstance(citation_number, int):
        return citation_number
    return sys.maxsize


async def fetch_internal_search_tool_id(
    session: aiohttp.ClientSession,
    api_base: str,
    headers: dict[str, str],
) -> int:
    payload = await request_json_with_retries(
        session=session,
        method="GET",
        url=f"{api_base}/tool",
        headers=headers,
    )

    if not isinstance(payload, list):
        raise RuntimeError("Expected `/tool` to return a list.")

    for tool in payload:
        if not isinstance(tool, dict):
            continue

        if tool.get("in_code_tool_id") == INTERNAL_SEARCH_IN_CODE_TOOL_ID:
            tool_id = tool.get("id")
            if isinstance(tool_id, int):
                return tool_id

    for tool in payload:
        if not isinstance(tool, dict):
            continue

        if tool.get("name") == INTERNAL_SEARCH_TOOL_NAME:
            tool_id = tool.get("id")
            if isinstance(tool_id, int):
                return tool_id

    raise RuntimeError(
        "Could not find the internal search tool in `/tool`. "
        "Make sure SearchTool is available for this environment."
    )


async def submit_question(
    session: aiohttp.ClientSession,
    api_base: str,
    headers: dict[str, str],
    internal_search_tool_id: int,
    question_record: QuestionRecord,
) -> AnswerRecord:
    payload = {
        "message": question_record.question,
        "chat_session_info": {"persona_id": 0},
        "parent_message_id": None,
        "file_descriptors": [],
        "allowed_tool_ids": [internal_search_tool_id],
        "forced_tool_id": internal_search_tool_id,
        "stream": False,
    }

    response_payload = await request_json_with_retries(
        session=session,
        method="POST",
        url=f"{api_base}/chat/send-chat-message",
        headers=headers,
        json_payload=payload,
    )

    if not isinstance(response_payload, dict):
        raise RuntimeError(
            "Expected `/chat/send-chat-message` to return an object when `stream=false`."
        )

    answer = response_payload.get("answer_citationless")
    if not isinstance(answer, str):
        answer = response_payload.get("answer")

    if not isinstance(answer, str):
        raise RuntimeError(
            f"Response for question {question_record.question_id} is missing `answer`."
        )

    return AnswerRecord(
        question_id=question_record.question_id,
        answer=answer,
        document_ids=extract_document_ids(response_payload.get("citation_info")),
    )


async def generate_answers(
    questions: list[QuestionRecord],
    output_file: Path,
    api_base: str,
    api_key: str,
    parallelism: int,
    skipped: int,
) -> None:
    if parallelism < 1:
        raise ValueError("`--parallelism` must be at least 1.")

    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }

    timeout = aiohttp.ClientTimeout(
        total=None,
        connect=30,
        sock_connect=30,
        sock_read=600,
    )
    connector = aiohttp.TCPConnector(limit=parallelism)

    output_file.parent.mkdir(parents=True, exist_ok=True)
    with output_file.open("a", encoding="utf-8") as file:
        async with aiohttp.ClientSession(
            timeout=timeout, connector=connector
        ) as session:
            internal_search_tool_id = await fetch_internal_search_tool_id(
                session=session,
                api_base=api_base,
                headers=headers,
            )
            logger.info("Using internal search tool id %s", internal_search_tool_id)

            semaphore = asyncio.Semaphore(parallelism)
            progress_lock = asyncio.Lock()
            write_lock = asyncio.Lock()
            completed = 0
            successful = 0
            stuck_count = 0
            failed_questions: list[FailedQuestionRecord] = []
            remaining_count = len(questions)
            overall_total = remaining_count + skipped
            question_durations: list[float] = []
            run_start_time = time.monotonic()

            def print_progress() -> None:
                avg_time = (
                    sum(question_durations) / len(question_durations)
                    if question_durations
                    else 0.0
                )
                elapsed = time.monotonic() - run_start_time
                eta = avg_time * (remaining_count - completed) / max(parallelism, 1)

                done = skipped + completed
                bar_width = 30
                filled = (
                    int(bar_width * done / overall_total)
                    if overall_total
                    else bar_width
                )
                bar = "█" * filled + "░" * (bar_width - filled)
                pct = (done / overall_total * 100) if overall_total else 100.0

                parts = (
                    f"\r{bar} {pct:5.1f}% "
                    f"[{done}/{overall_total}] "
                    f"avg {avg_time:.1f}s/q "
                    f"elapsed {elapsed:.0f}s "
                    f"ETA {eta:.0f}s "
                    f"(ok:{successful} fail:{len(failed_questions)}"
                )
                if stuck_count:
                    parts += f" stuck:{stuck_count}"
                if skipped:
                    parts += f" skip:{skipped}"
                parts += ")"

                sys.stderr.write(parts)
                sys.stderr.flush()

            print_progress()

            async def process_question(question_record: QuestionRecord) -> None:
                nonlocal completed
                nonlocal successful
                nonlocal stuck_count

                last_error: Exception | None = None
                for attempt in range(1, MAX_QUESTION_ATTEMPTS + 1):
                    q_start = time.monotonic()
                    try:
                        async with semaphore:
                            result = await asyncio.wait_for(
                                submit_question(
                                    session=session,
                                    api_base=api_base,
                                    headers=headers,
                                    internal_search_tool_id=internal_search_tool_id,
                                    question_record=question_record,
                                ),
                                timeout=QUESTION_TIMEOUT_SECONDS,
                            )
                    except asyncio.TimeoutError:
                        async with progress_lock:
                            stuck_count += 1
                            logger.warning(
                                "Question %s timed out after %ss (attempt %s/%s, "
                                "total stuck: %s) — retrying in %ss",
                                question_record.question_id,
                                QUESTION_TIMEOUT_SECONDS,
                                attempt,
                                MAX_QUESTION_ATTEMPTS,
                                stuck_count,
                                QUESTION_RETRY_PAUSE_SECONDS,
                            )
                            print_progress()
                        last_error = TimeoutError(
                            f"Timed out after {QUESTION_TIMEOUT_SECONDS}s "
                            f"on attempt {attempt}/{MAX_QUESTION_ATTEMPTS}"
                        )
                        await asyncio.sleep(QUESTION_RETRY_PAUSE_SECONDS)
                        continue
                    except Exception as exc:
                        duration = time.monotonic() - q_start
                        async with progress_lock:
                            completed += 1
                            question_durations.append(duration)
                            failed_questions.append(
                                FailedQuestionRecord(
                                    question_id=question_record.question_id,
                                    error=str(exc),
                                )
                            )
                            logger.exception(
                                "Failed question %s (%s/%s)",
                                question_record.question_id,
                                completed,
                                remaining_count,
                            )
                            print_progress()
                        return

                    duration = time.monotonic() - q_start

                    async with write_lock:
                        file.write(json.dumps(asdict(result), ensure_ascii=False))
                        file.write("\n")
                        file.flush()

                    async with progress_lock:
                        completed += 1
                        successful += 1
                        question_durations.append(duration)
                        print_progress()
                    return

                # All attempts exhausted due to timeouts
                async with progress_lock:
                    completed += 1
                    failed_questions.append(
                        FailedQuestionRecord(
                            question_id=question_record.question_id,
                            error=str(last_error),
                        )
                    )
                    logger.error(
                        "Question %s failed after %s timeout attempts (%s/%s)",
                        question_record.question_id,
                        MAX_QUESTION_ATTEMPTS,
                        completed,
                        remaining_count,
                    )
                    print_progress()

            await asyncio.gather(
                *(process_question(question_record) for question_record in questions)
            )

            # Final newline after progress bar
            sys.stderr.write("\n")
            sys.stderr.flush()

            total_elapsed = time.monotonic() - run_start_time
            avg_time = (
                sum(question_durations) / len(question_durations)
                if question_durations
                else 0.0
            )
            stuck_suffix = f", {stuck_count} stuck timeouts" if stuck_count else ""
            resume_suffix = (
                f" — {skipped} previously completed, "
                f"{skipped + successful}/{overall_total} overall"
                if skipped
                else ""
            )
            logger.info(
                "Done: %s/%s successful in %.1fs (avg %.1fs/question%s)%s",
                successful,
                remaining_count,
                total_elapsed,
                avg_time,
                stuck_suffix,
                resume_suffix,
            )

            if failed_questions:
                logger.warning(
                    "%s questions failed:",
                    len(failed_questions),
                )
                for failed_question in failed_questions:
                    logger.warning(
                        "Failed question %s: %s",
                        failed_question.question_id,
                        failed_question.error,
                    )


def main() -> None:
    args = parse_args()
    questions = load_questions(args.questions_file)
    api_base = normalize_api_base(args.api_base)

    if args.max_questions is not None:
        if args.max_questions < 1:
            raise ValueError("`--max-questions` must be at least 1 when provided.")
        questions = questions[: args.max_questions]

    completed_ids = load_completed_question_ids(args.output_file)
    logger.info(
        "Found %s already-answered question IDs in %s",
        len(completed_ids),
        args.output_file,
    )
    total_before_filter = len(questions)
    questions = [q for q in questions if q.question_id not in completed_ids]
    skipped = total_before_filter - len(questions)

    if skipped:
        logger.info(
            "Resuming: %s/%s already answered, %s remaining",
            skipped,
            total_before_filter,
            len(questions),
        )
    else:
        logger.info("Loaded %s questions from %s", len(questions), args.questions_file)

    if not questions:
        logger.info("All questions already answered. Nothing to do.")
        return

    logger.info("Writing answers to %s", args.output_file)

    asyncio.run(
        generate_answers(
            questions=questions,
            output_file=args.output_file,
            api_base=api_base,
            api_key=args.api_key,
            parallelism=args.parallelism,
            skipped=skipped,
        )
    )


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/save_load_state.py
================================================
# This file is purely for development use, not included in any builds
# Remember to first to send over the schema information (run API Server)
import argparse
import json
import os
import subprocess

import requests

from alembic import command
from alembic.config import Config
from onyx.configs.app_configs import POSTGRES_DB
from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_PASSWORD
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USER
from onyx.document_index.vespa.index import DOCUMENT_ID_ENDPOINT
from onyx.utils.logger import setup_logger

logger = setup_logger()


def save_postgres(filename: str, container_name: str) -> None:
    logger.notice("Attempting to take Postgres snapshot")
    cmd = f"docker exec {container_name} pg_dump -U {POSTGRES_USER} -h {POSTGRES_HOST} -p {POSTGRES_PORT} -W -F t {POSTGRES_DB}"
    with open(filename, "w") as file:
        subprocess.run(
            cmd,
            shell=True,
            check=True,
            stdout=file,
            text=True,
            input=f"{POSTGRES_PASSWORD}\n",
        )


def load_postgres(filename: str, container_name: str) -> None:
    logger.notice("Attempting to load Postgres snapshot")
    try:
        alembic_cfg = Config("alembic.ini")
        command.upgrade(alembic_cfg, "head")
    except Exception as e:
        logger.error(f"Alembic upgrade failed: {e}")

    host_file_path = os.path.abspath(filename)

    copy_cmd = f"docker cp {host_file_path} {container_name}:/tmp/"
    subprocess.run(copy_cmd, shell=True, check=True)

    container_file_path = f"/tmp/{os.path.basename(filename)}"

    restore_cmd = (
        f"docker exec {container_name} pg_restore --clean -U {POSTGRES_USER} "
        f"-h localhost -p {POSTGRES_PORT} -d {POSTGRES_DB} -1 -F t {container_file_path}"
    )
    subprocess.run(restore_cmd, shell=True, check=True)


def save_vespa(filename: str) -> None:
    logger.notice("Attempting to take Vespa snapshot")
    continuation = ""
    params = {}
    doc_jsons: list[dict] = []
    while continuation is not None:
        if continuation:
            params = {"continuation": continuation}
        response = requests.get(DOCUMENT_ID_ENDPOINT, params=params)
        response.raise_for_status()
        found = response.json()
        continuation = found.get("continuation")
        docs = found["documents"]
        for doc in docs:
            doc_json = {"update": doc["id"], "create": True, "fields": doc["fields"]}
            doc_jsons.append(doc_json)

    with open(filename, "w") as jsonl_file:
        for doc in doc_jsons:
            json_str = json.dumps(doc)
            jsonl_file.write(json_str + "\n")


def load_vespa(filename: str) -> None:
    headers = {"Content-Type": "application/json"}
    with open(filename, "r") as f:
        for line in f:
            new_doc = json.loads(line.strip())
            doc_id = new_doc["update"].split("::")[-1]
            response = requests.post(
                DOCUMENT_ID_ENDPOINT + "/" + doc_id,
                headers=headers,
                json=new_doc,
            )
            response.raise_for_status()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Onyx checkpoint saving and loading.")
    parser.add_argument(
        "--save", action="store_true", help="Save Onyx state to directory."
    )
    parser.add_argument(
        "--load", action="store_true", help="Load Onyx state from save directory."
    )
    parser.add_argument(
        "--postgres_container_name",
        type=str,
        default="onyx-relational_db-1",
        help="Name of the postgres container to dump",
    )
    parser.add_argument(
        "--checkpoint_dir",
        type=str,
        default=os.path.join("..", "onyx_checkpoint"),
        help="A directory to store temporary files to.",
    )

    args = parser.parse_args()
    checkpoint_dir = args.checkpoint_dir
    postgres_container = args.postgres_container_name

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    if not args.save and not args.load:
        raise ValueError("Must specify --save or --load")

    if args.load:
        load_postgres(
            os.path.join(checkpoint_dir, "postgres_snapshot.tar"), postgres_container
        )
        load_vespa(os.path.join(checkpoint_dir, "vespa_snapshot.jsonl"))
    else:
        save_postgres(
            os.path.join(checkpoint_dir, "postgres_snapshot.tar"), postgres_container
        )
        save_vespa(os.path.join(checkpoint_dir, "vespa_snapshot.jsonl"))


================================================
FILE: backend/scripts/setup_craft_templates.sh
================================================
#!/bin/sh
# Setup Onyx Craft templates
# This script is called on container startup to ensure Craft templates are ready
# Set ENABLE_CRAFT=false to skip setup

# Check if Craft is disabled
if [ "$ENABLE_CRAFT" = "false" ] || [ "$ENABLE_CRAFT" = "False" ]; then
    echo "Onyx Craft is disabled (ENABLE_CRAFT=false), skipping template setup"
    exit 0
fi

set -e

# Verify opencode CLI is available (installed in Dockerfile)
if ! command -v opencode >/dev/null 2>&1; then
    echo "ERROR: opencode CLI is not available but ENABLE_CRAFT is enabled." >&2
    echo "opencode is required for Craft agent functionality. Ensure you are using Dockerfile" >&2
    echo "which includes the opencode CLI, or set ENABLE_CRAFT=false to disable Craft." >&2
    exit 1
fi

CRAFT_BASE="/app/onyx/server/features/build/sandbox/kubernetes/docker"
DEMO_DATA_ZIP="${CRAFT_BASE}/demo_data.zip"
DEMO_DATA_DIR="${CRAFT_BASE}/demo_data"
# Use environment variables if set, otherwise use defaults
OUTPUTS_TEMPLATE_PATH="${OUTPUTS_TEMPLATE_PATH:-${CRAFT_BASE}/templates/outputs}"
VENV_TEMPLATE_PATH="${VENV_TEMPLATE_PATH:-${CRAFT_BASE}/templates/venv}"
WEB_TEMPLATE_PATH="${WEB_TEMPLATE_PATH:-${OUTPUTS_TEMPLATE_PATH}/web}"
REQUIREMENTS_PATH="${CRAFT_BASE}/initial-requirements.txt"

echo "Setting up Onyx Craft templates..."

# 1. Unzip demo_data.zip if demo_data directory doesn't exist
if [ ! -d "$DEMO_DATA_DIR" ] && [ -f "$DEMO_DATA_ZIP" ]; then
    echo "  Extracting demo data..."
    cd "$CRAFT_BASE" && unzip -q demo_data.zip || { echo "ERROR: Failed to extract demo data" >&2; exit 1; }
    echo "  Demo data extracted"
fi

# 2. Create Python venv template if it doesn't exist
if [ ! -d "$VENV_TEMPLATE_PATH" ] && [ -f "$REQUIREMENTS_PATH" ]; then
    echo "  Creating Python venv template (this may take 30-60 seconds)..."
    python -m venv "$VENV_TEMPLATE_PATH"
    "$VENV_TEMPLATE_PATH/bin/pip" install --upgrade pip -q
    "$VENV_TEMPLATE_PATH/bin/pip" install -q -r "$REQUIREMENTS_PATH"
    echo "  Python venv template created"
fi

# 3. Run npm install in web template
if [ -d "$WEB_TEMPLATE_PATH" ]; then
    if ! command -v npm >/dev/null 2>&1; then
        echo "ERROR: npm is not available but ENABLE_CRAFT is enabled." >&2
        echo "npm is required for Craft web features. Ensure you are using Dockerfile" >&2
        echo "which includes Node.js, or set ENABLE_CRAFT=false to disable Craft." >&2
        exit 1
    fi
    # Always remove and reinstall to ensure correct architecture binaries
    if [ -d "${WEB_TEMPLATE_PATH}/node_modules" ]; then
        echo "  Removing existing node_modules..."
        rm -rf "${WEB_TEMPLATE_PATH}/node_modules"
    fi
    echo "  Installing npm packages (this may take 1-2 minutes)..."
    cd "$WEB_TEMPLATE_PATH" && npm install 2>&1 || { echo "ERROR: npm install failed" >&2; exit 1; }
    echo "  Web template dependencies installed"
fi

echo "Craft template setup complete"


================================================
FILE: backend/scripts/sources_selection_analysis.py
================================================
import argparse
import json
import os
import sys
import time
from datetime import datetime
from os import listdir
from os.path import isfile
from os.path import join
from typing import Optional

import requests

from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME

parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)

from onyx.configs.app_configs import DOCUMENT_INDEX_NAME  # noqa: E402
from onyx.configs.constants import SOURCE_TYPE  # noqa: E402

ANALYSIS_FOLDER = f"{parent_dir}/scripts/.analysisfiles/"


def color_output(
    text: str,
    model: Optional[str] = None,
    text_color: str = "white",
    bg_color: str = "black",
    text_style: str = "normal",
    text_prefix: str = "",
) -> None:
    """Color and print a text

    Args:
        text (str): The text to display
        model (str, optional): A pre-defined output model. Defaults to None.
        text_color (str, optional): Define the text color. Defaults to "white".
        bg_color (str, optional): Define the background color. Defaults to "black".
        text_style (str, optional): Define the text style. Defaults to "normal".
        text_prefix (str, optional): Set a text prefix. Defaults to "".
    """
    if model:
        if model == "alert":
            text_color = "black"
            bg_color = "red"
            text_style = "bold"
        elif model == "critical":
            text_prefix = "CRITICAL: "
            text_color = "white"
            bg_color = "red"
            text_style = "bold"
        elif model == "note":
            text_color = "yellow"
            bg_color = "transparent"
            text_style = "normal"
        elif model == "info":
            text_prefix = "INFO:     "
            text_color = "black"
            bg_color = "yellow"
            text_style = "bold"
        elif model == "info2":
            text_prefix = "INFO:     "
            text_color = "black"
            bg_color = "white"
            text_style = "bold"
        elif model == "valid":
            text_prefix = "INFO:     "
            text_color = "white"
            bg_color = "green"
            text_style = "bold"
        elif model == "debug":
            text_prefix = "DEBUG:    "
            text_color = "blue"
            bg_color = "transparent"
            text_style = "bold"

    text_colors = {
        "black": 30,
        "red": 31,
        "green": 32,
        "yellow": 33,
        "blue": 34,
        "purple": 35,
        "cian": 36,
        "white": 37,
    }
    bg_colors = {
        "black": 40,
        "red": 41,
        "green": 42,
        "yellow": 43,
        "blue": 44,
        "purple": 45,
        "cian": 46,
        "white": 47,
        "transparent": 49,
    }
    text_styles = {
        "normal": 0,
        "bold": 1,
        "light": 2,
        "italicized": 3,
        "underlined": 4,
        "blink": 5,
    }
    print(
        f"\033[{text_styles[text_style]};{text_colors[text_color]};{bg_colors[bg_color]}m {text_prefix} {text} \033[0;0m"
    )


class CompareAnalysis:
    def __init__(
        self, query: str, previous_content: dict, new_content: dict, threshold: float
    ) -> None:
        """Make the comparison between 2 analysis for a specific query

        Args:
            query (str): The analysed query
            previous_content (dict): The previous analysis content for the selected query
            new_content (dict): The new analysis content for the selected query
            threshold (float): The minimum difference (percentage) between scores to raise an anomaly
        """
        self._query = query
        self._previous_content = previous_content
        self._new_content = new_content
        self._threshold = threshold

    def _identify_diff(self, content_key: str) -> list[dict]:
        """Try to identify differences between the two analysis based
            on the selected analysis key.

        Args:
            content_key (str): The analysis item's key to compare the versions.
                                Examples: score / document_id

        Returns:
            list[dict]: List of dict representing the information regarding the difference
                        Format: {
                                    "previous_rank": XX,
                                    "new_rank": XX,
                                    "document_id": XXXX,
                                    "previous_score": XX,
                                    "new_score": XX,
                                    "score_change_pct": XX
                                }
        """
        changes = []

        previous_content = {
            k: v[content_key] for k, v in self._previous_content.items()
        }
        new_content = {k: v[content_key] for k, v in self._new_content.items()}

        if previous_content != new_content:
            for pos, data in previous_content.items():
                if data != new_content[pos]:
                    try:
                        score_change_pct = round(
                            (
                                abs(
                                    self._new_content[pos]["score"]
                                    - self._previous_content[pos]["score"]
                                )
                                / self._new_content[pos]["score"]
                            )
                            * 100.0,
                            2,
                        )
                    except ZeroDivisionError:
                        score_change_pct = 0

                    changes.append(
                        {
                            "previous_rank": pos,
                            "new_rank": (
                                pos
                                if content_key == "score"
                                else {
                                    "x": k for k, v in new_content.items() if v == data
                                }.get("x", "not_ranked")
                            ),
                            "document_id": self._previous_content[pos]["document_id"],
                            "previous_score": self._previous_content[pos]["score"],
                            "new_score": self._new_content[pos]["score"],
                            "score_change_pct": score_change_pct,
                        }
                    )
        return changes

    def check_config_changes(
        self, previous_doc_rank: int | str, new_doc_rank: int
    ) -> None:
        """Try to identify possible reasons why a change has been detected by
            checking the latest document update date or the boost value.

        Args:
            previous_doc_rank (int): The document rank for the previous analysis
            new_doc_rank (int): The document rank for the new analysis
        """
        if isinstance(new_doc_rank, str) and new_doc_rank == "not_ranked":
            color_output(
                (
                    "NOTE: The document is missing in the 'current' analysis file. "
                    "Unable to identify more details about the reason for the change."
                ),
                model="note",
            )
            return None

        if (
            self._previous_content[previous_doc_rank]["boost"]
            != self._new_content[new_doc_rank]["boost"]
        ):
            color_output(
                "NOTE: The 'boost' value has been changed which (maybe) explains the change.",
                model="note",
            )
            color_output(
                (
                    f"Previously it was '{self._previous_content[previous_doc_rank]['boost']}' "
                    f"and now is set to '{self._new_content[new_doc_rank]['boost']}'"
                ),
                model="note",
            )
        if (
            self._previous_content[previous_doc_rank]["updated_at"]
            != self._new_content[new_doc_rank]["updated_at"]
        ):
            color_output("NOTE: The document seems to have been updated.", model="note")
            color_output(
                (
                    f"Previously the updated date was '{self._previous_content[previous_doc_rank]['updated_at']}' "
                    f"and now is '{self._new_content[new_doc_rank]['updated_at']}'"
                ),
                model="note",
            )

    def check_documents_score(self) -> bool:
        """Check if the scores have changed between analysis.

        Returns:
            bool: True if at least one change has been detected. False otherwise.
        """
        color_output("Checking documents Score....", model="info")
        color_output(
            f"Differences under '{self._threshold}%' are ignored (based on the '--threshold' argument)",
            model="info",
        )

        if diff := [
            x
            for x in self._identify_diff("score")
            if x["score_change_pct"] > self._threshold
        ]:
            color_output("<<<<< Changes detected >>>>>", model="alert")
            for change in diff:
                color_output("-" * 100)
                color_output(
                    (
                        f"The document '{change['document_id']}' (rank: {change['previous_rank']}) "
                        f"score has a changed of {change['score_change_pct']}%"
                    )
                )
                color_output(f"previous score: {change['previous_score']}")
                color_output(f"current score:  {change['new_score']}")
                self.check_config_changes(change["previous_rank"], change["new_rank"])

            color_output("<<<<< End of changes >>>>>", model="alert")
            color_output(f"Number of changes detected {len(diff)}", model="info")
        else:
            color_output("No change detected", model="valid")
        color_output("Documents Score check completed.", model="info")

        return False if diff else True

    def check_documents_order(self) -> bool:
        """Check if the selected documents are the same and in the same order.

        Returns:
            bool: True if at least one change has been detected. False otherwise.
        """
        color_output("Checking documents Order....", model="info")

        if diff := self._identify_diff("document_id"):
            color_output("<<<<< Changes detected >>>>>", model="alert")
            for change in diff:
                color_output("-" * 100)
                color_output(
                    (
                        f"The document '{change['document_id']}' was at a rank "
                        f"'{change['previous_rank']}' but now is at rank '{change['new_rank']}'"
                    )
                )
                color_output(f"previous score: {change['previous_score']}")
                color_output(f"current score:  {change['new_score']}")
                self.check_config_changes(change["previous_rank"], change["new_rank"])
            color_output("<<<<< End of changes >>>>>", model="alert")
            color_output(f"Number of changes detected {len(diff)}", model="info")

        else:
            color_output("No change detected", model="valid")
        color_output("Documents order check completed.", model="info")

        return False if diff else True

    def __call__(self) -> None:
        """Manage the analysis process"""
        if not self.check_documents_order():
            color_output(
                "Skipping other checks as the documents order has changed", model="info"
            )
            return None

        self.check_documents_score()


class SelectionAnalysis:
    def __init__(
        self,
        exectype: str,
        analysisfiles: list = [],
        queries: list = [],
        threshold: float = 0.0,
        web_port: int = 3000,
        auth_cookie: str = "",
        wait: int = 10,
    ) -> None:
        """

        Args:
            exectype (str): The execution mode (new or compare)
            analysisfiles (list, optional): List of analysis files to compare or if only one, to use as the base. Defaults to [].
                                        Requiered only by the 'compare' mode
            queries (list, optional): The queries to analysed. Defaults to [].
                                        Required only by the 'new' mode
            threshold (float, optional): The minimum difference (percentage) between scores to raise an anomaly
            web_port (int, optional): The port of the UI. Defaults to 3000 (local exec port)
            auth_cookie (str, optional): The Auth cookie value (fastapiusersauth). Defaults to None.
            wait (int, optional): The waiting time (in seconds) to respect between queries.
                                    It is helpful to avoid hitting the Generative AI rate limiting.
        """
        self._exectype = exectype
        self._analysisfiles = analysisfiles
        self._queries = queries
        self._threshold = threshold
        self._web_port = web_port
        self._auth_cookie = auth_cookie
        self._wait = wait

    def _wait_between_queries(self, query: str) -> None:
        """If there are remaining queries, waits for the defined time.

        Args:
            query (str): The latest executed query
        """
        if query != self._queries[-1]:
            color_output(f"Next query in {self._wait} seconds", model="debug")
            time.sleep(self._wait)

    def prepare(self) -> bool:
        """Create the requirements to execute this script

        Returns:
            bool: True if all the requirements are setup. False otherwise
        """
        try:
            os.makedirs(ANALYSIS_FOLDER, exist_ok=True)
            return True
        except Exception as e:
            color_output(f"Unable to setup the requirements: {e}", model="critical")
            return False

    def do_request(self, query: str) -> dict:
        """Request the Onyx API

        Args:
            query (str): A query

        Returns:
            dict: The Onyx API response content
        """
        cookies = (
            {FASTAPI_USERS_AUTH_COOKIE_NAME: self._auth_cookie}
            if self._auth_cookie
            else {}
        )

        endpoint = f"http://127.0.0.1:{self._web_port}/api/direct-qa"
        query_json = {
            "query": query,
            "collection": DOCUMENT_INDEX_NAME,
            "filters": {SOURCE_TYPE: None},
            "enable_auto_detect_filters": True,
            "search_type": "hybrid",
            "offset": 0,
            "favor_recent": True,
        }
        try:
            response = requests.post(endpoint, json=query_json, cookies=cookies)
            if response.status_code != 200:
                color_output(
                    (
                        f"something goes wrong while requesting the Onyx API for the query '{query}': {response.text}"
                    ),
                    model="critical",
                )
                sys.exit(1)
        except Exception as e:
            color_output(
                f"Unable to request the Onyx API for the query '{query}': {e}",
                model="critical",
            )
            sys.exit(1)

        return json.loads(response.content)

    def get_analysis_files(self) -> list[str]:
        """Returns the list of existing analysis files.

        Returns:
            list[str]: List of filename
        """
        return [f for f in listdir(ANALYSIS_FOLDER) if isfile(join(ANALYSIS_FOLDER, f))]

    def get_analysis_file_content(self, filename: str) -> list[dict]:
        """Returns the content of an analysis file

        Args:
            filename (str): The analysis filename

        Returns:
            list[dict]: Content of the selected file
        """
        with open(f"{ANALYSIS_FOLDER}{filename}", "r") as f:
            return json.load(f)

    def extract_content(self, contents: dict) -> dict:
        """Extract the content returns by the Onyx API

        Args:
            contents (dict): The onyx response content

        Returns:
            dict: Data regarding the selected sources document
        """
        return {
            pos: doc
            for pos, doc in enumerate(
                sorted(
                    contents["top_ranked_docs"], key=lambda d: d["score"], reverse=True
                )[:5]
            )
        }

    def save_analysisfile(self, content: list[dict]) -> Optional[str]:
        """Save the extracted content

        Args:
            content (list[dict]): The content to save

        Returns:
            str: The filname
        """
        filename = datetime.now().strftime("%Y_%m_%d-%I_%M_%S")
        analysis_file = f"{ANALYSIS_FOLDER}{filename}.json"

        try:
            with open(analysis_file, "w") as f:
                json.dump(content, f, indent=4)
        except Exception as e:
            color_output(f"Unable to create the analysis file: {e}", model="critical")
            return None

        color_output(f"Analysis file created: {analysis_file}", model="debug")
        return analysis_file

    def new(self) -> Optional[str]:
        """Manage the process to create a new analysis file
            based on the submitted queries

        Returns:
            str: The new filename with the analysis content
        """
        if not self._queries:
            color_output("Missing queries", model="critical")
            sys.exit(1)

        color_output("Generating a new analysis file...", model="debug")
        analysisfile = []

        for query in self._queries:
            color_output(f"Gathering data of the query: '{query}'", model="info2")
            contents = self.do_request(query)

            analysisfile.append(
                {"query": query, "selected_documents": self.extract_content(contents)}
            )
            color_output("Data gathered", model="info2")
            self._wait_between_queries(query)

        return self.save_analysisfile(analysisfile)

    def compare(
        self,
        previous_analysisfile_content: list[dict],
        new_analysisfile_content: list[dict],
    ) -> None:
        """Manage the process to compare two analysis

        Args:
            previous_analysisfile_content (list): Previous content analysis
            new_analysisfile_content (list): New content analysis
        """
        for query in self._queries:
            # Extract data regarding the selected source documents
            prev_querie_content = [
                x for x in previous_analysisfile_content if x["query"] == query
            ][0]["selected_documents"]
            new_querie_content = [
                x for x in new_analysisfile_content if x["query"] == query
            ][0]["selected_documents"]

            color_output(f"Analysing the query: '{query}'", model="info2")
            CompareAnalysis(
                query, prev_querie_content, new_querie_content, self._threshold
            )()
            color_output(f"Analyse completed for the query: '{query}'", model="info2")
            self._wait_between_queries(query)

        color_output("All the defined queries have been evaluated.", model="info2")

    def validate_analysisfiles(self) -> bool:
        """Validate that the selected analysis files exist

        Returns:
            bool: True if all of them exist. False otherwise
        """
        existing_analysisfiles = self.get_analysis_files()

        if missing_analysisfiles := [
            x for x in self._analysisfiles if x not in existing_analysisfiles
        ]:
            color_output(
                f"Missing analysis file(s) '{', '.join(missing_analysisfiles)}' - NOT FOUND",
                model="critical",
            )
            analysisfiles = "\n ".join(existing_analysisfiles)
            color_output("Available analysis files:", model="info2")
            color_output(analysisfiles)
            return False

        return True

    def __call__(self) -> None:
        if not self.prepare():
            sys.exit(1)

        if self._exectype == "new":
            self.new()

        elif self._exectype == "compare":
            self._analysisfiles = [
                x.replace(".json", "") + ".json" for x in self._analysisfiles
            ]

            if not self.validate_analysisfiles():
                sys.exit(1)

            color_output(
                "Extracting queries from the existing analysis file...", model="debug"
            )
            previous_analysisfile_content = self.get_analysis_file_content(
                self._analysisfiles[0]
            )

            # Extract the queries
            self._queries = sorted([x["query"] for x in previous_analysisfile_content])
            color_output(
                f"Extracted queries: {', '.join(self._queries)}", model="debug"
            )

            if len(self._analysisfiles) == 1:
                if new_file := self.new():
                    new_analysisfile_content = self.get_analysis_file_content(
                        new_file.split("/")[-1:][0]
                    )
                    return self.compare(
                        previous_analysisfile_content, new_analysisfile_content
                    )
                else:
                    color_output(
                        "Unable to generate a new analysis file", model="critical"
                    )
                    sys.exit(1)
            else:
                color_output(
                    (
                        f"For the rest of this execution, the analysis file '{self._analysisfiles[0]}' "
                        f"is identified as 'previous' and '{self._analysisfiles[1]}' as 'current'"
                    ),
                    model="info2",
                )
                new_analysisfile_content = self.get_analysis_file_content(
                    self._analysisfiles[1]
                )
                new_queries = sorted([x["query"] for x in new_analysisfile_content])
                if new_queries != self._queries:
                    color_output(
                        "Unable to compare analysis files as the queries are differents",
                        model="critical",
                    )
                    sys.exit(1)
                self.compare(previous_analysisfile_content, new_analysisfile_content)


def validate_cmd_args(args: argparse.Namespace) -> bool:
    """Validate the CMD arguments

    Args:
        args (argparse.Namespace): The argparse data input

    Returns:
        bool: True if the CMD arguments are valid. False otherwise
    """
    if not args.execution:
        color_output(
            "Missing argument. The execution mode ('--execution') must be defined ('new' or 'compare')",
            model="critical",
        )
        return False
    if args.execution == "new" and not args.q__queries:
        color_output(
            "Missing argument. When the execution type is set to 'new' the '--queries' argument must be defined",
            model="critical",
        )
        return False
    elif args.execution == "compare":
        if not args.files:
            color_output(
                "Missing argument. When the execution type is set to 'compare' the '--files' argument must be defined",
                model="critical",
            )
            return False
        elif len(args.files) > 2:
            color_output(
                "Too many arguments. The '--files' argument cannot be repeated more than 2 times.",
                model="critical",
            )
            return False
    return True


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-a",
        "--auth",
        type=str,
        default=None,
        help=(
            "Currently, to get this script working when the Onyx Auth is "
            "enabled, you must extract from the UI your cookie 'fastapiusersauth' "
            "and then set it using this argument"
        ),
    )
    parser.add_argument(
        "-e",
        "--execution",
        type=str,
        choices=["new", "compare"],
        default=None,
        help=(
            "The execution type. Must be 'new' to generate a new analysis file "
            "or 'compare' to compare a previous execution with a new one based on the same queries"
        ),
    )
    parser.add_argument(
        "-f",
        "--files",
        action="extend",
        default=[],
        nargs=1,
        help=(
            "Analysis file(s) to use for the comparison. Required if the execution arg is set "
            "to 'compare'. NOTE: By repeating this argument, you can make a comparison between "
            "two specific executions. If not repeated, a new execution will be performed and "
            "compared with the selected one."
        ),
    )
    parser.add_argument(
        "-p",
        "--port",
        type=int,
        default=3000,
        help=(
            "The Onyx Web (not the API) port. We use the UI to forward the requests to the API. "
            "It should be '3000' for local dev and '80' if Onyx runs using docker compose."
        ),
    )
    parser.add_argument(
        "-q--queries",
        type=str,
        action="extend",
        default=[],
        nargs=1,
        help=(
            "The query to evaluate. Required if the execution arg is set to 'new'. "
            "NOTE: This argument can be repeated multiple times"
        ),
    )
    parser.add_argument(
        "-t",
        "--threshold",
        type=float,
        default=0.0,
        help="The minimum score change (percentage) to detect an issue.",
    )
    parser.add_argument(
        "-w",
        "--wait",
        type=int,
        default=10,
        help=(
            "The waiting time (in seconds) to respect between queries. "
            "It is helpful to avoid hitting the Generative AI rate limiting."
        ),
    )

    args = parser.parse_args()
    if not validate_cmd_args(args):
        sys.exit(1)

    SelectionAnalysis(
        args.execution,
        args.files,
        args.q__queries,
        args.threshold,
        args.port,
        args.auth,
        args.wait,
    )()


================================================
FILE: backend/scripts/supervisord_entrypoint.sh
================================================
#!/bin/sh
# Entrypoint script for supervisord

# Launch supervisord with environment variables available
exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf


================================================
FILE: backend/scripts/tenant_cleanup/QUICK_START_NO_BASTION.md
================================================
# Quick Start: Tenant Cleanup Without Bastion

## TL;DR - The Commands You Need

```bash
# Navigate to backend directory
cd onyx/backend

# Step 1: Generate CSV of tenants to clean (5-10 min)
PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_analyze_tenants.py

# Step 2: Mark connectors for deletion (1-2 min)
PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py \
  --csv gated_tenants_no_query_3mo_*.csv \
  --force \
  --concurrency 16

# ⏰ WAIT 6+ hours for background deletion to complete

# Step 3: Final cleanup (1-2 min)
PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py \
  --csv gated_tenants_no_query_3mo_*.csv \
  --force
```

## What Changed?

Instead of the original scripts that require bastion access:
- `analyze_current_tenants.py` → `no_bastion_analyze_tenants.py`
- `mark_connectors_for_deletion.py` → `no_bastion_mark_connectors.py`
- `cleanup_tenants.py` → `no_bastion_cleanup_tenants.py`

**No environment variables needed!** All queries run directly from pods.

## What You Need

✅ `kubectl` access to your cluster
✅ Running `celery-worker-user-file-processing` pods
✅ Permission to exec into pods

❌ No bastion host required
❌ No SSH keys required
❌ No environment variables required

## Test Your Setup

```bash
# Check if you can find worker pods
kubectl get po | grep celery-worker-user-file-processing | grep Running

# If you see pods, you're ready to go!
```

## Important Notes

1. **Step 2 triggers background deletion** - the actual document deletion happens asynchronously via Celery workers
2. **You MUST wait** between Step 2 and Step 3 for deletion to complete (can take 6+ hours)
3. **Monitor deletion progress** with: `kubectl logs -f <celery-worker-pod>`
4. **All scripts verify tenant status** - they'll refuse to process active (non-GATED_ACCESS) tenants

## Files Generated

- `gated_tenants_no_query_3mo_YYYYMMDD_HHMMSS.csv` - List of tenants to clean
- `cleaned_tenants.csv` - Successfully cleaned tenants with timestamps

## Safety First

The scripts include multiple safety checks:
- ✅ Verifies tenant status before any operation
- ✅ Checks documents are deleted before dropping schemas
- ✅ Prompts for confirmation on dangerous operations (unless `--force`)
- ✅ Records all successful operations in real-time

## Need More Details?

See [NO_BASTION_README.md](./NO_BASTION_README.md) for:
- Detailed explanations of each step
- Troubleshooting guide
- How it works under the hood
- Performance characteristics


================================================
FILE: backend/scripts/tenant_cleanup/README.md
================================================
## How to Tenant Cleanup

Three main steps.

### Build a list of tenants to cleanup

Use the `analyze_current_tenants.py` script:

```
PYTHONPATH=. \
CONTROL_PLANE_RDS_HOST=<PROD_CONTROL_PLANE_RDS_HOST> \
CONTROL_PLANE_RDS_PASSWORD=<PROD_CONTROL_PLANE_RDS_PASSWORD> \
BASTION_HOST=<BASTION_IP_ADDRESS> \
PEM_FILE_LOCATION=<PEM_FILE_LOCATION_WHICH_GIVES_ACCESS_TO_BASTION> \
python scripts/tenant_cleanup/analyze_current_tenants.py
```

This will create a `.csv` called something like `gated_tenants_no_query_3mo_20251012_161102.csv` in the `backend` dir.


### Delete all documents within these tenants

Use the `mark_connectors_for_deletion.py` script:

```
PYTHONPATH=. \
CONTROL_PLANE_RDS_HOST=<PROD_CONTROL_PLANE_RDS_HOST> \
CONTROL_PLANE_RDS_PASSWORD=<PROD_CONTROL_PLANE_RDS_PASSWORD> \
BASTION_HOST=<BASTION_IP_ADDRESS> \
PEM_FILE_LOCATION=<PEM_FILE_LOCATION_WHICH_GIVES_ACCESS_TO_BASTION> \
python scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo_<your_datetime>.csv --force
```

Replace `gated_tenants_no_query_3mo_<your_datetime>.csv` with the CSV name from step (1).

This will update the data plane database to 1/ cancel all index attempts 2/ mark all connectors as up for deletion.
We now need to wait for the deletion to run.

It's done this way to re-use as much of the existing code + take advantage of existing infra for parallelized, long running jobs. These 
deletion jobs can take a LONG time (>6hrs), so having it performed syncronously by a script is not really tenable.


### Cleanup the tenants

Use the `cleanup_tenants.py` script:

```
PYTHONPATH=. \
CONTROL_PLANE_RDS_HOST=<PROD_CONTROL_PLANE_RDS_HOST> \
CONTROL_PLANE_RDS_PASSWORD=<PROD_CONTROL_PLANE_RDS_PASSWORD> \
BASTION_HOST=<BASTION_IP_ADDRESS> \
PEM_FILE_LOCATION=<PEM_FILE_LOCATION_WHICH_GIVES_ACCESS_TO_BASTION> \
python scripts/tenant_cleanup/cleanup_tenants.py --csv gated_tenants_no_query_3mo_<your_datetime>.csv --force
```

This will drop the tenant schema from the data plane DB, cleanup the `user_tenant_mapping` table, and 
clean up any control plane DB tables associated with each tenant.

NOTE: if the previous step has not completed, tenants with documents will throw an exception.


================================================
FILE: backend/scripts/tenant_cleanup/analyze_current_tenants.py
================================================
#!/usr/bin/env python3
"""
Full tenant analysis script that:
1. Finds a heavy worker pod
2. Runs the tenant data collection script on the pod
3. Analyzes the collected data
"""

import argparse
import csv
import json
import os
import subprocess
import sys
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from pathlib import Path
from typing import Any

from scripts.tenant_cleanup.cleanup_utils import find_worker_pod


def collect_tenant_data(pod_name: str) -> list[dict[str, Any]]:
    """Run the understand_tenants script on the pod and return the data."""
    print(f"\nCollecting tenant data from pod {pod_name}...")

    # Get the path to the understand_tenants script
    script_dir = Path(__file__).parent
    understand_tenants_script = script_dir / "on_pod_scripts" / "understand_tenants.py"

    if not understand_tenants_script.exists():
        raise FileNotFoundError(
            f"understand_tenants.py not found at {understand_tenants_script}"
        )

    # Copy script to pod
    print("Copying script to pod...")
    subprocess.run(
        [
            "kubectl",
            "cp",
            str(understand_tenants_script),
            f"{pod_name}:/tmp/understand_tenants.py",
        ],
        check=True,
        capture_output=True,
    )

    # Execute script on pod
    print("Executing script on pod (this may take a while)...")
    result = subprocess.run(
        ["kubectl", "exec", pod_name, "--", "python", "/tmp/understand_tenants.py"],
        capture_output=True,
        text=True,
        check=True,
    )

    # Show progress messages from stderr
    if result.stderr:
        print(result.stderr, file=sys.stderr)

    # Parse JSON from stdout
    try:
        tenant_data = json.loads(result.stdout)
        print(f"Successfully collected data for {len(tenant_data)} tenants")
        return tenant_data
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON output: {e}", file=sys.stderr)
        print(f"stdout: {result.stdout[:500]}", file=sys.stderr)
        raise


def collect_control_plane_data() -> list[dict[str, Any]]:
    """Collect control plane data from the control plane database."""
    print("\nCollecting control plane data...")

    rds_host = os.environ.get("CONTROL_PLANE_RDS_HOST")
    if not rds_host:
        raise ValueError("CONTROL_PLANE_RDS_HOST is not set")

    rds_password = os.environ.get("CONTROL_PLANE_RDS_PASSWORD")
    if not rds_password:
        raise ValueError("CONTROL_PLANE_RDS_PASSWORD is not set")

    db_url = f"postgresql://postgres:{rds_password}@{rds_host}:5432/control"

    bastion_host = os.environ.get("BASTION_HOST")
    if not bastion_host:
        raise ValueError("BASTION_HOST is not set")

    pem_file_location = os.environ.get("PEM_FILE_LOCATION")
    if not pem_file_location:
        raise ValueError("PEM_FILE_LOCATION is not set")

    full_cmd = (
        f"ssh -i {pem_file_location} ec2-user@{bastion_host} "
        f"\"psql {db_url} -c '\\copy (SELECT * FROM tenant) "
        f"to '/tmp/control_plane_data.csv' with (format csv);'\""
    )

    result = subprocess.run(
        full_cmd,
        shell=True,
        check=True,
        capture_output=True,
        text=True,
    )

    # Copy the CSV file from the bastion to local machine
    copy_cmd = f"scp -i {pem_file_location} ec2-user@{bastion_host}:/tmp/control_plane_data.csv ."

    copy_result = subprocess.run(
        copy_cmd, shell=True, check=True, capture_output=True, text=True
    )

    if copy_result.stderr:
        print(f"Copy warnings: {copy_result.stderr}", file=sys.stderr)
        raise RuntimeError(
            "Failed to copy control plane data from bastion to local machine"
        )

    print("Control plane data copied to local machine as control_plane_data.csv")

    print(result.stdout)

    # Read the CSV file and convert to list of dictionaries
    control_plane_data = []
    with open("control_plane_data.csv", "r", newline="", encoding="utf-8") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            control_plane_data.append(row)

    return control_plane_data


def analyze_tenants(
    tenants: list[dict[str, Any]], control_plane_data: list[dict[str, Any]]
) -> list[dict[str, Any]]:
    """Analyze tenant activity data and return gated tenants with no query in last 3 months."""

    print(f"\n{'=' * 80}")
    print(f"TENANT ANALYSIS REPORT - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"{'=' * 80}")
    print(f"Total tenants analyzed: {len(tenants)}\n")

    # Create a lookup dict for control plane data by tenant_id
    control_plane_lookup = {}
    for row in control_plane_data:
        # CSV has no header, columns are: tenant_id, stripe_customer_id, created_at,
        # stripe_subscription_quantity, contact_email, registration_origin, tenant_status
        if len(row) >= 7:
            tenant_id = list(row.values())[0]  # First column is tenant_id
            tenant_status = list(row.values())[6]  # 7th column is tenant_status
            control_plane_lookup[tenant_id] = tenant_status

    # Calculate cutoff dates
    one_month_cutoff = datetime.now(timezone.utc) - timedelta(days=30)
    three_month_cutoff = datetime.now(timezone.utc) - timedelta(days=90)

    # Categorize tenants into 4 groups
    gated_no_query_3_months = []  # GATED_ACCESS + no query in last 3 months
    gated_query_1_3_months = []  # GATED_ACCESS + query between 1-3 months
    gated_query_1_month = []  # GATED_ACCESS + query in last 1 month
    everyone_else = []  # All other tenants

    for tenant in tenants:
        tenant_id = tenant.get("tenant_id")
        last_query_time = tenant.get("last_query_time")
        tenant_status = control_plane_lookup.get(tenant_id, "UNKNOWN")

        is_gated = tenant_status == "GATED_ACCESS"

        # Parse last query time
        if last_query_time:
            query_time = datetime.fromisoformat(last_query_time.replace("Z", "+00:00"))
        else:
            query_time = None

        # Categorize
        if is_gated:
            if query_time is None or query_time <= three_month_cutoff:
                gated_no_query_3_months.append(tenant)
            elif query_time <= one_month_cutoff:
                gated_query_1_3_months.append(tenant)
            else:  # query_time > one_month_cutoff
                gated_query_1_month.append(tenant)
        else:
            everyone_else.append(tenant)

    # Calculate document counts for each group
    gated_no_query_docs = sum(
        t.get("num_documents", 0) for t in gated_no_query_3_months
    )
    gated_1_3_month_docs = sum(
        t.get("num_documents", 0) for t in gated_query_1_3_months
    )
    gated_1_month_docs = sum(t.get("num_documents", 0) for t in gated_query_1_month)
    everyone_else_docs = sum(t.get("num_documents", 0) for t in everyone_else)

    print("=" * 80)
    print("TENANT CATEGORIZATION BY GATED ACCESS STATUS AND ACTIVITY")
    print("=" * 80)

    print("\n1. GATED_ACCESS + No query in last 3 months:")
    print(f"   Count: {len(gated_no_query_3_months):,}")
    print(f"   Total documents: {gated_no_query_docs:,}")
    print(
        f"   Avg documents per tenant: {gated_no_query_docs / len(gated_no_query_3_months) if gated_no_query_3_months else 0:.2f}"
    )

    print("\n2. GATED_ACCESS + Query between 1-3 months ago:")
    print(f"   Count: {len(gated_query_1_3_months):,}")
    print(f"   Total documents: {gated_1_3_month_docs:,}")
    print(
        f"   Avg documents per tenant: {gated_1_3_month_docs / len(gated_query_1_3_months) if gated_query_1_3_months else 0:.2f}"
    )

    print("\n3. GATED_ACCESS + Query in last 1 month:")
    print(f"   Count: {len(gated_query_1_month):,}")
    print(f"   Total documents: {gated_1_month_docs:,}")
    print(
        f"   Avg documents per tenant: {gated_1_month_docs / len(gated_query_1_month) if gated_query_1_month else 0:.2f}"
    )

    print("\n4. Everyone else (non-GATED_ACCESS):")
    print(f"   Count: {len(everyone_else):,}")
    print(f"   Total documents: {everyone_else_docs:,}")
    print(
        f"   Avg documents per tenant: {everyone_else_docs / len(everyone_else) if everyone_else else 0:.2f}"
    )

    total_docs = (
        gated_no_query_docs
        + gated_1_3_month_docs
        + gated_1_month_docs
        + everyone_else_docs
    )
    print(f"\nTotal documents across all tenants: {total_docs:,}")

    # Top 100 tenants by document count
    print("\n" + "=" * 80)
    print("TOP 100 TENANTS BY DOCUMENT COUNT")
    print("=" * 80)

    # Sort all tenants by document count
    sorted_tenants = sorted(
        tenants, key=lambda t: t.get("num_documents", 0), reverse=True
    )

    top_100 = sorted_tenants[:100]

    print(
        f"\n{'Rank':<6} {'Tenant ID':<45} {'Documents':>12} {'Users':>8} {'Last Query':<12} {'Group'}"
    )
    print("-" * 130)

    for idx, tenant in enumerate(top_100, 1):
        tenant_id = tenant.get("tenant_id", "Unknown")
        num_docs = tenant.get("num_documents", 0)
        num_users = tenant.get("num_users", 0)
        last_query = tenant.get("last_query_time", "Never")
        tenant_status = control_plane_lookup.get(tenant_id, "UNKNOWN")

        # Format the last query time
        if last_query and last_query != "Never":
            try:
                query_dt = datetime.fromisoformat(last_query.replace("Z", "+00:00"))
                last_query_str = query_dt.strftime("%Y-%m-%d")
            except Exception:
                last_query_str = last_query[:10] if len(last_query) > 10 else last_query
        else:
            last_query_str = "Never"

        # Determine group
        if tenant_status == "GATED_ACCESS":
            if last_query and last_query != "Never":
                query_time = datetime.fromisoformat(last_query.replace("Z", "+00:00"))
                if query_time <= three_month_cutoff:
                    group = "Gated - No query (3mo)"
                elif query_time <= one_month_cutoff:
                    group = "Gated - Query (1-3mo)"
                else:
                    group = "Gated - Query (1mo)"
            else:
                group = "Gated - No query (3mo)"
        else:
            group = f"Other ({tenant_status})"

        print(
            f"{idx:<6} {tenant_id:<45} {num_docs:>12,} {num_users:>8} {last_query_str:<12} {group}"
        )

    # Summary stats for top 100
    top_100_docs = sum(t.get("num_documents", 0) for t in top_100)

    print("\n" + "-" * 110)
    print(f"Top 100 total documents: {top_100_docs:,}")
    print(
        f"Percentage of all documents: {(top_100_docs / total_docs * 100) if total_docs > 0 else 0:.2f}%"
    )

    # Additional insights
    print("\n" + "=" * 80)
    print("ADDITIONAL INSIGHTS")
    print("=" * 80)

    # Tenants with no documents
    no_docs = [t for t in tenants if t.get("num_documents", 0) == 0]
    print(
        f"\nTenants with 0 documents: {len(no_docs):,} ({len(no_docs) / len(tenants) * 100:.2f}%)"
    )

    # Tenants with no users
    no_users = [t for t in tenants if t.get("num_users", 0) == 0]
    print(
        f"Tenants with 0 users: {len(no_users):,} ({len(no_users) / len(tenants) * 100:.2f}%)"
    )

    # Document distribution quartiles
    doc_counts = sorted([t.get("num_documents", 0) for t in tenants])
    if doc_counts:
        print("\nDocument count distribution:")
        print(f"  Median: {doc_counts[len(doc_counts) // 2]:,}")
        print(f"  75th percentile: {doc_counts[int(len(doc_counts) * 0.75)]:,}")
        print(f"  90th percentile: {doc_counts[int(len(doc_counts) * 0.90)]:,}")
        print(f"  95th percentile: {doc_counts[int(len(doc_counts) * 0.95)]:,}")
        print(f"  99th percentile: {doc_counts[int(len(doc_counts) * 0.99)]:,}")
        print(f"  Max: {doc_counts[-1]:,}")

    return gated_no_query_3_months


def find_recent_tenant_data() -> tuple[list[dict[str, Any]] | None, str | None]:
    """Find the most recent tenant data file if it's less than 7 days old."""
    current_dir = Path.cwd()
    tenant_data_files = list(current_dir.glob("tenant_data_*.json"))

    if not tenant_data_files:
        return None, None

    # Sort by modification time, most recent first
    tenant_data_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    most_recent = tenant_data_files[0]

    # Check if file is less than 7 days old
    file_age = datetime.now().timestamp() - most_recent.stat().st_mtime
    seven_days_in_seconds = 7 * 24 * 60 * 60

    if file_age < seven_days_in_seconds:
        file_age_days = file_age / (24 * 60 * 60)
        print(
            f"\n✓ Found recent tenant data: {most_recent.name} (age: {file_age_days:.1f} days)"
        )

        with open(most_recent, "r") as f:
            tenant_data = json.load(f)

        return tenant_data, str(most_recent)

    return None, None


def main() -> None:
    # Parse command-line arguments
    parser = argparse.ArgumentParser(
        description="Analyze tenant data and identify gated tenants with no recent queries"
    )
    parser.add_argument(
        "--skip-cache",
        action="store_true",
        help="Skip cached tenant data and collect fresh data from pod",
    )
    args = parser.parse_args()

    try:
        # Step 0: Collect control plane data
        control_plane_data = collect_control_plane_data()

        # Step 1: Check for recent tenant data (< 7 days old) unless --skip-cache is set
        tenant_data = None
        cached_file = None

        if not args.skip_cache:
            tenant_data, cached_file = find_recent_tenant_data()

        if tenant_data:
            print(f"Using cached tenant data from: {cached_file}")
            print(f"Total tenants in cache: {len(tenant_data)}")
        else:
            if args.skip_cache:
                print("\n⚠ Skipping cache (--skip-cache flag set)")

            # Step 2a: Find the heavy worker pod
            pod_name = find_worker_pod()

            # Step 2b: Collect tenant data
            tenant_data = collect_tenant_data(pod_name)

            # Step 2c: Save raw data to file with timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_file = f"tenant_data_{timestamp}.json"
            with open(output_file, "w") as f:
                json.dump(tenant_data, f, indent=2, default=str)
            print(f"\n✓ Raw data saved to: {output_file}")

        # Step 3: Analyze the data and get gated tenants without recent queries
        gated_no_query_3_months = analyze_tenants(tenant_data, control_plane_data)

        # Step 4: Export to CSV (sorted by num_documents descending)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        csv_file = f"gated_tenants_no_query_3mo_{timestamp}.csv"

        # Sort by num_documents in descending order
        sorted_tenants = sorted(
            gated_no_query_3_months,
            key=lambda t: t.get("num_documents", 0),
            reverse=True,
        )

        with open(csv_file, "w", newline="", encoding="utf-8") as csvfile:
            fieldnames = [
                "tenant_id",
                "num_documents",
                "num_users",
                "last_query_time",
                "days_since_last_query",
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

            now = datetime.now(timezone.utc)
            for tenant in sorted_tenants:
                # Calculate days since last query
                last_query_time = tenant.get("last_query_time")
                if last_query_time:
                    try:
                        query_dt = datetime.fromisoformat(
                            last_query_time.replace("Z", "+00:00")
                        )
                        days_since = str((now - query_dt).days)
                    except Exception:
                        days_since = "N/A"
                else:
                    days_since = "Never"

                writer.writerow(
                    {
                        "tenant_id": tenant.get("tenant_id", ""),
                        "num_documents": tenant.get("num_documents", 0),
                        "num_users": tenant.get("num_users", 0),
                        "last_query_time": last_query_time or "Never",
                        "days_since_last_query": days_since,
                    }
                )

        print(f"\n✓ CSV exported to: {csv_file}")
        print(
            f"  Total gated tenants with no query in last 3 months: {len(gated_no_query_3_months)}"
        )

    except subprocess.CalledProcessError as e:
        print(f"Error running command: {e}", file=sys.stderr)
        if e.stderr:
            print(f"stderr: {e.stderr}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/check_no_bastion_setup.py
================================================
#!/usr/bin/env python3
"""
Verification script to check if your environment is ready for no-bastion tenant cleanup.

Usage:
    python scripts/tenant_cleanup/check_no_bastion_setup.py
"""

import subprocess
import sys


def print_header(text: str) -> None:
    """Print a formatted header."""
    print(f"\n{'=' * 80}")
    print(f"  {text}")
    print(f"{'=' * 80}\n")


def check_kubectl_access() -> bool:
    """Check if kubectl is installed and can access the cluster."""
    print("Checking kubectl access...")

    try:
        result = subprocess.run(
            ["kubectl", "version", "--client", "--short"],
            capture_output=True,
            text=True,
            timeout=5,
        )

        if result.returncode == 0:
            print(f"✅ kubectl is installed: {result.stdout.strip()}")

            # Try to access cluster
            result = subprocess.run(
                ["kubectl", "get", "ns"],
                capture_output=True,
                text=True,
                timeout=10,
            )

            if result.returncode == 0:
                print("✅ kubectl can access the cluster")
                return True
            else:
                print("❌ kubectl cannot access the cluster")
                print(f"   Error: {result.stderr}")
                return False
        else:
            print("❌ kubectl is not installed or not in PATH")
            return False

    except FileNotFoundError:
        print("❌ kubectl is not installed")
        return False
    except subprocess.TimeoutExpired:
        print("❌ kubectl command timed out")
        return False
    except Exception as e:
        print(f"❌ Error checking kubectl: {e}")
        return False


def check_worker_pods() -> tuple[bool, list[str]]:
    """Check if worker pods are running."""
    print("\nChecking for worker pods...")

    try:
        result = subprocess.run(
            ["kubectl", "get", "po"],
            capture_output=True,
            text=True,
            timeout=10,
            check=True,
        )

        lines = result.stdout.strip().split("\n")
        worker_pods = []

        for line in lines[1:]:  # Skip header
            if "celery-worker-user-file-processing" in line and "Running" in line:
                pod_name = line.split()[0]
                worker_pods.append(pod_name)

        if worker_pods:
            print(f"✅ Found {len(worker_pods)} running worker pod(s):")
            for pod in worker_pods[:3]:  # Show first 3
                print(f"   - {pod}")
            if len(worker_pods) > 3:
                print(f"   ... and {len(worker_pods) - 3} more")
            return True, worker_pods
        else:
            print("❌ No running celery-worker-user-file-processing pods found")
            print("   Available pods:")
            for line in lines[1:6]:  # Show first 5 pods
                print(f"   {line}")
            return False, []

    except subprocess.CalledProcessError as e:
        print(f"❌ Error getting pods: {e}")
        return False, []
    except Exception as e:
        print(f"❌ Error checking worker pods: {e}")
        return False, []


def check_pod_exec_permission(pod_name: str) -> bool:
    """Check if we can exec into a pod."""
    print("\nChecking pod exec permissions...")

    try:
        result = subprocess.run(
            ["kubectl", "exec", pod_name, "--", "echo", "test"],
            capture_output=True,
            text=True,
            timeout=10,
        )

        if result.returncode == 0 and "test" in result.stdout:
            print(f"✅ Can exec into pod: {pod_name}")
            return True
        else:
            print(f"❌ Cannot exec into pod: {pod_name}")
            print(f"   Error: {result.stderr}")
            return False

    except subprocess.TimeoutExpired:
        print(f"❌ Exec command timed out for pod: {pod_name}")
        return False
    except Exception as e:
        print(f"❌ Error checking exec permission: {e}")
        return False


def check_pod_db_access(pod_name: str) -> dict:
    """Check if pod has database environment variables."""
    print("\nChecking database access from pod...")

    checks = {
        "control_plane": False,
        "data_plane": False,
    }

    try:
        # Check for control plane DB env vars
        result = subprocess.run(
            ["kubectl", "exec", pod_name, "--", "env"],
            capture_output=True,
            text=True,
            timeout=10,
        )

        if result.returncode == 0:
            env_output = result.stdout

            # Check control plane access
            if any(
                var in env_output
                for var in [
                    "POSTGRES_CONTROL_URI",
                    "POSTGRES_CONTROL_HOST",
                ]
            ):
                print("✅ Pod has control plane database environment variables")
                checks["control_plane"] = True
            else:
                print(
                    "⚠️  Pod may not have control plane database environment variables"
                )
                print("   (This might be okay if they're dynamically loaded)")

            # Check data plane access
            if any(
                var in env_output
                for var in ["POSTGRES_URI", "POSTGRES_HOST", "DATABASE_URL"]
            ):
                print("✅ Pod has data plane database environment variables")
                checks["data_plane"] = True
            else:
                print("❌ Pod does not have data plane database environment variables")

        return checks

    except Exception as e:
        print(f"❌ Error checking database access: {e}")
        return checks


def check_required_scripts() -> bool:
    """Check if the required on_pod_scripts exist."""
    print("\nChecking for required scripts...")

    from pathlib import Path

    script_dir = Path(__file__).parent
    required_scripts = [
        "on_pod_scripts/understand_tenants.py",
        "on_pod_scripts/execute_connector_deletion.py",
        "on_pod_scripts/check_documents_deleted.py",
        "on_pod_scripts/cleanup_tenant_schema.py",
        "on_pod_scripts/get_tenant_index_name.py",
        "on_pod_scripts/get_tenant_users.py",
    ]

    all_exist = True
    for script in required_scripts:
        script_path = script_dir / script
        if script_path.exists():
            print(f"✅ {script}")
        else:
            print(f"❌ {script} - NOT FOUND")
            all_exist = False

    return all_exist


def main() -> None:
    print_header("No-Bastion Tenant Cleanup - Setup Verification")

    all_checks_passed = True

    # 1. Check kubectl access
    if not check_kubectl_access():
        all_checks_passed = False

    # 2. Check for worker pods
    has_pods, worker_pods = check_worker_pods()
    if not has_pods:
        all_checks_passed = False
        print("\n⚠️  Cannot proceed without running worker pods")
        print_header("SETUP VERIFICATION FAILED")
        sys.exit(1)

    # Use first worker pod for remaining checks
    test_pod = worker_pods[0]

    # 3. Check exec permissions
    if not check_pod_exec_permission(test_pod):
        all_checks_passed = False

    # 4. Check database access
    db_checks = check_pod_db_access(test_pod)
    if not db_checks["data_plane"]:
        all_checks_passed = False

    # 5. Check required scripts
    if not check_required_scripts():
        all_checks_passed = False

    # Summary
    print_header("VERIFICATION SUMMARY")

    if all_checks_passed and db_checks["control_plane"]:
        print("✅ ALL CHECKS PASSED!")
        print("\nYou're ready to run tenant cleanup without bastion access.")
        print("\nNext steps:")
        print("1. Read QUICK_START_NO_BASTION.md for commands")
        print(
            "2. Run: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_analyze_tenants.py"
        )
        sys.exit(0)
    elif all_checks_passed:
        print("⚠️  MOSTLY READY (with warnings)")
        print("\nYou can proceed, but control plane access may need verification.")
        print("Try running Step 1 and see if it works.")
        print("\nNext steps:")
        print(
            "1. Run: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_analyze_tenants.py"
        )
        print("2. If it fails with DB errors, check pod environment variables")
        sys.exit(0)
    else:
        print("❌ SETUP VERIFICATION FAILED")
        print("\nPlease fix the issues above before proceeding.")
        print("\nCommon fixes:")
        print("- Install kubectl: https://kubernetes.io/docs/tasks/tools/")
        print("- Configure cluster access: kubectl config use-context <context>")
        print("- Check pod status: kubectl get po")
        sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/cleanup_tenants.py
================================================
#!/usr/bin/env python3
"""
Tenant cleanup script that:
1. Deletes all documents from Vespa
2. Drops the data plane PostgreSQL schema
3. Clean up control plane (tenants, subscription table)

Usage:
    python backend/scripts/cleanup_tenant.py <tenant_id> [--force]
    python backend/scripts/cleanup_tenant.py --csv <csv_file_path> [--force]

Arguments:
    tenant_id        The tenant ID to clean up (required if not using --csv)
    --csv PATH       Path to CSV file containing tenant IDs to clean up
    --force          Skip all confirmation prompts (optional)

Examples:
    python backend/scripts/cleanup_tenant.py tenant_abc123-def456-789
    python backend/scripts/cleanup_tenant.py tenant_abc123-def456-789 --force
    python backend/scripts/cleanup_tenant.py --csv gated_tenants_no_query_3mo.csv
    python backend/scripts/cleanup_tenant.py --csv gated_tenants_no_query_3mo.csv --force
"""

import csv
import json
import signal
import subprocess
import sys
from datetime import datetime
from pathlib import Path

from scripts.tenant_cleanup.cleanup_utils import confirm_step
from scripts.tenant_cleanup.cleanup_utils import execute_control_plane_query
from scripts.tenant_cleanup.cleanup_utils import find_worker_pod
from scripts.tenant_cleanup.cleanup_utils import get_tenant_status
from scripts.tenant_cleanup.cleanup_utils import read_tenant_ids_from_csv
from scripts.tenant_cleanup.cleanup_utils import TenantNotFoundInControlPlaneError


def signal_handler(signum: int, frame: object) -> None:  # noqa: ARG001
    """Handle termination signals by killing active subprocess."""
    sys.exit(1)


def get_tenant_index_name(pod_name: str, tenant_id: str) -> str:
    """Get the default index name for the given tenant by running script on pod."""
    print(f"Getting default index name for tenant: {tenant_id}")

    # Get the path to the script
    script_dir = Path(__file__).parent
    index_name_script = script_dir / "on_pod_scripts" / "get_tenant_index_name.py"

    if not index_name_script.exists():
        raise FileNotFoundError(
            f"get_tenant_index_name.py not found at {index_name_script}"
        )

    try:
        # Copy script to pod
        print("  Copying script to pod...")
        subprocess.run(
            [
                "kubectl",
                "cp",
                str(index_name_script),
                f"{pod_name}:/tmp/get_tenant_index_name.py",
            ],
            check=True,
            capture_output=True,
        )

        # Execute script on pod
        print("  Executing script on pod...")
        result = subprocess.run(
            [
                "kubectl",
                "exec",
                pod_name,
                "--",
                "python",
                "/tmp/get_tenant_index_name.py",
                tenant_id,
            ],
            capture_output=True,
            text=True,
            check=True,
        )

        # Show progress messages from stderr
        if result.stderr:
            print(f"  {result.stderr}", end="")

        # Parse JSON result from stdout
        result_data = json.loads(result.stdout)
        status = result_data.get("status")

        if status == "success":
            index_name = result_data.get("index_name")
            print(f"✓ Found index name: {index_name}")
            return index_name
        else:
            message = result_data.get("message", "Unknown error")
            raise RuntimeError(f"Failed to get index name: {message}")

    except subprocess.CalledProcessError as e:
        print(
            f"✗ Failed to get index name for tenant {tenant_id}: {e}", file=sys.stderr
        )
        if e.stderr:
            print(f"  Error details: {e.stderr}", file=sys.stderr)
        raise
    except Exception as e:
        print(
            f"✗ Failed to get index name for tenant {tenant_id}: {e}", file=sys.stderr
        )
        raise


def get_tenant_users(pod_name: str, tenant_id: str) -> list[str]:
    """Get list of user emails from the tenant's data plane schema.

    Args:
        pod_name: The Kubernetes pod name to execute on
        tenant_id: The tenant ID to query

    Returns:
        List of user email addresses, or empty list if query fails
    """
    print(f"Fetching user emails for tenant: {tenant_id}")

    # Get the path to the script
    script_dir = Path(__file__).parent
    get_users_script = script_dir / "on_pod_scripts" / "get_tenant_users.py"

    if not get_users_script.exists():
        raise FileNotFoundError(f"get_tenant_users.py not found at {get_users_script}")

    try:
        # Copy script to pod
        print("  Copying script to pod...")
        subprocess.run(
            [
                "kubectl",
                "cp",
                str(get_users_script),
                f"{pod_name}:/tmp/get_tenant_users.py",
            ],
            check=True,
            capture_output=True,
        )

        # Execute script on pod
        print("  Executing script on pod...")
        result = subprocess.run(
            [
                "kubectl",
                "exec",
                pod_name,
                "--",
                "python",
                "/tmp/get_tenant_users.py",
                tenant_id,
            ],
            capture_output=True,
            text=True,
            check=True,
        )

        # Show progress messages from stderr
        if result.stderr:
            print(f"  {result.stderr}", end="")

        # Parse JSON result from stdout
        result_data = json.loads(result.stdout)
        status = result_data.get("status")

        if status == "success":
            users = result_data.get("users", [])
            if users:
                print(f"✓ Found {len(users)} user(s):")
                for email in users:
                    print(f"    - {email}")
            else:
                print("  No users found in tenant")
            return users
        else:
            message = result_data.get("message", "Unknown error")
            print(f"⚠ Could not fetch users: {message}")
            return []

    except subprocess.CalledProcessError as e:
        print(f"⚠ Failed to get users for tenant {tenant_id}: {e}")
        if e.stderr:
            print(f"  Error details: {e.stderr}")
        return []
    except Exception as e:
        print(f"⚠ Failed to get users for tenant {tenant_id}: {e}")
        return []


def check_documents_deleted(pod_name: str, tenant_id: str) -> None:
    """Check if all documents and connector credential pairs have been deleted.

    Raises RuntimeError if any ConnectorCredentialPairs or Documents remain.
    """
    print(f"Checking for remaining documents in tenant: {tenant_id}")

    # Get the path to the script
    script_dir = Path(__file__).parent
    check_script = script_dir / "on_pod_scripts" / "check_documents_deleted.py"

    if not check_script.exists():
        raise FileNotFoundError(
            f"check_documents_deleted.py not found at {check_script}"
        )

    try:
        # Copy script to pod
        print("  Copying script to pod...")
        subprocess.run(
            [
                "kubectl",
                "cp",
                str(check_script),
                f"{pod_name}:/tmp/check_documents_deleted.py",
            ],
            check=True,
            capture_output=True,
        )

        # Execute script on pod
        print("  Executing check on pod...")
        result = subprocess.run(
            [
                "kubectl",
                "exec",
                pod_name,
                "--",
                "python",
                "/tmp/check_documents_deleted.py",
                tenant_id,
            ],
            capture_output=True,
            text=True,
            check=True,
        )

        # Show progress messages from stderr
        if result.stderr:
            print(f"  {result.stderr}", end="")

        # Parse JSON result from stdout
        result_data = json.loads(result.stdout)
        status = result_data.get("status")

        if status == "success":
            message = result_data.get("message")
            print(f"✓ {message}")
        elif status == "not_found":
            message = result_data.get("message", "Schema not found")
            print(f"⚠ {message}")
        else:
            message = result_data.get("message", "Unknown error")
            cc_count = result_data.get("connector_credential_pair_count", 0)
            doc_count = result_data.get("document_count", 0)
            error_details = f"{message}"
            if cc_count > 0 or doc_count > 0:
                error_details += f"\n  ConnectorCredentialPairs: {cc_count}\n  Documents: {doc_count}"
            raise RuntimeError(error_details)

    except subprocess.CalledProcessError as e:
        print(
            f"✗ Failed to check documents for tenant {tenant_id}: {e}",
            file=sys.stderr,
        )
        if e.stderr:
            print(f"  Error details: {e.stderr}", file=sys.stderr)
        raise
    except Exception as e:
        print(
            f"✗ Failed to check documents for tenant {tenant_id}: {e}",
            file=sys.stderr,
        )
        raise


def drop_data_plane_schema(pod_name: str, tenant_id: str) -> None:
    """Drop the PostgreSQL schema for the given tenant by running script on pod."""
    print(f"Dropping data plane schema for tenant: {tenant_id}")

    # Get the path to the cleanup script
    script_dir = Path(__file__).parent
    schema_cleanup_script = script_dir / "on_pod_scripts" / "cleanup_tenant_schema.py"

    if not schema_cleanup_script.exists():
        raise FileNotFoundError(
            f"cleanup_tenant_schema.py not found at {schema_cleanup_script}"
        )

    try:
        # Copy script to pod
        print("  Copying script to pod...")
        subprocess.run(
            [
                "kubectl",
                "cp",
                str(schema_cleanup_script),
                f"{pod_name}:/tmp/cleanup_tenant_schema.py",
            ],
            check=True,
            capture_output=True,
        )

        # Execute script on pod
        print("  Executing schema cleanup on pod...")
        result = subprocess.run(
            [
                "kubectl",
                "exec",
                pod_name,
                "--",
                "python",
                "/tmp/cleanup_tenant_schema.py",
                tenant_id,
            ],
            capture_output=True,
            text=True,
            check=True,
        )

        # Show progress messages from stderr
        if result.stderr:
            print(f"  {result.stderr}", end="")

        # Parse JSON result from stdout
        result_data = json.loads(result.stdout)
        status = result_data.get("status")
        message = result_data.get("message")

        if status == "success":
            print(f"✓ {message}")
        elif status == "not_found":
            print(f"⚠ {message}")
        else:
            print(f"✗ {message}", file=sys.stderr)
            raise RuntimeError(message)

    except subprocess.CalledProcessError as e:
        print(f"✗ Failed to drop schema for tenant {tenant_id}: {e}", file=sys.stderr)
        if e.stderr:
            print(f"  Error details: {e.stderr}", file=sys.stderr)
        raise
    except Exception as e:
        print(f"✗ Failed to drop schema for tenant {tenant_id}: {e}", file=sys.stderr)
        raise


def cleanup_control_plane(tenant_id: str, force: bool = False) -> None:
    """
    Clean up control plane data (tenants table, subscription table, etc.)

    Deletes from tables in this order:
    1. tenant_notification (foreign key to tenant)
    2. tenant_config (foreign key to tenant)
    3. subscription (foreign key to tenant)
    4. tenant (primary table)
    """
    print(f"Cleaning up control plane data for tenant: {tenant_id}")

    # Delete in order respecting foreign key constraints
    delete_queries = [
        (
            "tenant_notification",
            "DELETE FROM tenant_notification WHERE tenant_id = '{tenant_id}';",
        ),
        ("tenant_config", "DELETE FROM tenant_config WHERE tenant_id = '{tenant_id}';"),
        ("subscription", "DELETE FROM subscription WHERE tenant_id = '{tenant_id}';"),
        ("tenant", "DELETE FROM tenant WHERE tenant_id = '{tenant_id}';"),
    ]

    try:
        for table_name, query in delete_queries:
            formatted_query = query.format(tenant_id=tenant_id)
            print(f"  Deleting from {table_name}...")

            if not confirm_step(f"Delete from {table_name}?", force):
                print(f"  Skipping deletion from {table_name}")
                continue

            result = execute_control_plane_query(formatted_query)

            if result.stdout:
                # Extract row count from output (e.g., "DELETE 5")
                print(f"    {result.stdout.strip()}")

        print(f"✓ Successfully cleaned up control plane data for tenant: {tenant_id}")

    except subprocess.CalledProcessError as e:
        print(
            f"✗ Failed to clean up control plane for tenant {tenant_id}: {e}",
            file=sys.stderr,
        )
        if e.stderr:
            print(f"  Error details: {e.stderr}", file=sys.stderr)
        raise


def cleanup_tenant(tenant_id: str, pod_name: str, force: bool = False) -> bool:
    """
    Main cleanup function that orchestrates all cleanup steps.

    Args:
        tenant_id: The tenant ID to clean up
        pod_name: The Kubernetes pod name to execute operations on
        force: If True, skip all confirmation prompts

    Returns:
        True if cleanup was performed, False if skipped
    """
    print(f"Starting cleanup for tenant: {tenant_id}")

    # Track if tenant was not found in control plane (for force mode)
    tenant_not_found_in_control_plane = False

    # Check tenant status first
    print(f"\n{'=' * 80}")
    try:
        tenant_status = get_tenant_status(tenant_id)

        # If tenant is not GATED_ACCESS, require explicit confirmation even in force mode
        if tenant_status and tenant_status != "GATED_ACCESS":
            print(
                f"\n⚠️  WARNING: Tenant status is '{tenant_status}', not 'GATED_ACCESS'!"
            )
            print(
                "This tenant may be active and should not be deleted without careful review."
            )
            print(f"{'=' * 80}\n")

            if force:
                print(f"Skipping cleanup for tenant {tenant_id} in force mode")
                return False

            # Always ask for confirmation if not gated, even in force mode
            response = input(
                "Are you ABSOLUTELY SURE you want to proceed? Type 'yes' to confirm: "
            )
            if response.lower() != "yes":
                print("Cleanup aborted - tenant is not GATED_ACCESS")
                return False
        elif tenant_status == "GATED_ACCESS":
            print("✓ Tenant status is GATED_ACCESS - safe to proceed with cleanup")
        elif tenant_status is None:
            print("⚠️  WARNING: Could not determine tenant status!")

            if force:
                print(f"Skipping cleanup for tenant {tenant_id} in force mode")
                return False

            response = input("Continue anyway? Type 'yes' to confirm: ")
            if response.lower() != "yes":
                print("Cleanup aborted - could not verify tenant status")
                return False
    except TenantNotFoundInControlPlaneError as e:
        # Tenant/table not found in control plane
        error_str = str(e)
        print(f"⚠️  WARNING: Tenant not found in control plane: {error_str}")
        tenant_not_found_in_control_plane = True

        if force:
            print(
                "[FORCE MODE] Tenant not found in control plane - continuing with dataplane cleanup only"
            )
        else:
            response = input("Continue anyway? Type 'yes' to confirm: ")
            if response.lower() != "yes":
                print("Cleanup aborted - tenant not found in control plane")
                return False
    except Exception as e:
        # Other errors (not "not found")
        error_str = str(e)
        print(f"⚠️  WARNING: Failed to check tenant status: {error_str}")

        if force:
            print(f"Skipping cleanup for tenant {tenant_id} in force mode")
            return False

        response = input("Continue anyway? Type 'yes' to confirm: ")
        if response.lower() != "yes":
            print("Cleanup aborted - could not verify tenant status")
            return False
    print(f"{'=' * 80}\n")

    # Fetch tenant users for informational purposes (non-blocking)
    # Skip in force mode as it's only informational
    if not force:
        print(f"\n{'=' * 80}")
        try:
            get_tenant_users(pod_name, tenant_id)
        except Exception as e:
            print(f"⚠ Could not fetch tenant users: {e}")
        print(f"{'=' * 80}\n")

    # Step 1: Make sure all documents are deleted
    print(f"\n{'=' * 80}")
    print("Step 1/3: Checking for remaining ConnectorCredentialPairs and Documents")
    print(f"{'=' * 80}")
    try:
        check_documents_deleted(pod_name, tenant_id)
    except Exception as e:
        print(f"✗ Document check failed: {e}", file=sys.stderr)
        print(
            "\nPlease ensure all ConnectorCredentialPairs and Documents are deleted before running cleanup."
        )
        print(
            "You may need to mark connectors for deletion and wait for cleanup to complete."
        )
        return False
    print(f"{'=' * 80}\n")

    # Step 2: Drop data plane schema
    if confirm_step(
        f"Step 2/3: Drop data plane schema '{tenant_id}' (CASCADE - will delete all tables, functions, etc.)",
        force,
    ):
        try:
            drop_data_plane_schema(pod_name, tenant_id)
        except Exception as e:
            print(f"✗ Failed at schema cleanup step: {e}", file=sys.stderr)
            if not force:
                response = input("Continue with control plane cleanup? (y/n): ")
                if response.lower() != "y":
                    print("Cleanup aborted by user")
                    return False
            else:
                print("[FORCE MODE] Continuing despite schema cleanup failure")
    else:
        print("Step 2 skipped by user")

    # Step 3: Clean up control plane (skip if tenant not found in control plane with --force)
    if tenant_not_found_in_control_plane:
        print(f"\n{'=' * 80}")
        print(
            "Step 3/3: Skipping control plane cleanup (tenant not found in control plane)"
        )
        print(f"{'=' * 80}\n")
    elif confirm_step(
        "Step 3/3: Delete control plane records (tenant_notification, tenant_config, subscription, tenant)",
        force,
    ):
        try:
            cleanup_control_plane(tenant_id, force)
        except Exception as e:
            print(f"✗ Failed at control plane cleanup step: {e}", file=sys.stderr)
            if not force:
                print("Control plane cleanup failed")
            else:
                print("[FORCE MODE] Control plane cleanup failed but continuing")
    else:
        print("Step 3 skipped by user")
        return False

    print(f"\n{'=' * 80}")
    print(f"✓ Cleanup completed for tenant: {tenant_id}")
    print(f"{'=' * 80}")
    return True


def main() -> None:
    # Register signal handlers for graceful shutdown
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)

    if len(sys.argv) < 2:
        print("Usage: python backend/scripts/cleanup_tenant.py <tenant_id> [--force]")
        print(
            "       python backend/scripts/cleanup_tenant.py --csv <csv_file_path> [--force]"
        )
        print("\nArguments:")
        print(
            "  tenant_id        The tenant ID to clean up (required if not using --csv)"
        )
        print("  --csv PATH       Path to CSV file containing tenant IDs to clean up")
        print("  --force          Skip all confirmation prompts (optional)")
        print("\nExamples:")
        print("  python backend/scripts/cleanup_tenant.py tenant_abc123-def456-789")
        print(
            "  python backend/scripts/cleanup_tenant.py tenant_abc123-def456-789 --force"
        )
        print(
            "  python backend/scripts/cleanup_tenant.py --csv gated_tenants_no_query_3mo.csv"
        )
        print(
            "  python backend/scripts/cleanup_tenant.py --csv gated_tenants_no_query_3mo.csv --force"
        )
        sys.exit(1)

    # Parse arguments
    force = "--force" in sys.argv
    tenant_ids = []

    # Check for CSV mode
    if "--csv" in sys.argv:
        try:
            csv_index = sys.argv.index("--csv")
            if csv_index + 1 >= len(sys.argv):
                print("Error: --csv flag requires a file path", file=sys.stderr)
                sys.exit(1)

            csv_path = sys.argv[csv_index + 1]
            tenant_ids = read_tenant_ids_from_csv(csv_path)

            if not tenant_ids:
                print("Error: No tenant IDs found in CSV file", file=sys.stderr)
                sys.exit(1)

            print(f"Found {len(tenant_ids)} tenant(s) in CSV file: {csv_path}")

        except Exception as e:
            print(f"Error reading CSV file: {e}", file=sys.stderr)
            sys.exit(1)
    else:
        # Single tenant mode
        tenant_ids = [sys.argv[1]]

    # Initial confirmation (unless --force is used)
    if not force:
        print(f"\n{'=' * 80}")
        print("TENANT CLEANUP - CONFIRMATION REQUIRED")
        print(f"{'=' * 80}")
        if len(tenant_ids) == 1:
            print(f"Tenant ID: {tenant_ids[0]}")
        else:
            print(f"Number of tenants: {len(tenant_ids)}")
            print(f"Tenant IDs: {', '.join(tenant_ids[:5])}")
            if len(tenant_ids) > 5:
                print(f"            ... and {len(tenant_ids) - 5} more")

        print("Index Name: Will be fetched automatically when deleting Vespa documents")
        print(
            f"Mode: {'FORCE (no confirmations)' if force else 'Interactive (will ask for confirmation at each step)'}"
        )
        print("\nThis will:")
        print("  1. Delete ALL Vespa documents for this tenant")
        print("  2. Drop the data plane PostgreSQL schema (CASCADE)")
        print("  3. Clean up control plane data:")
        print("     - Delete from tenant_notification table")
        print("     - Delete from tenant_config table")
        print("     - Delete from subscription table")
        print("     - Delete from tenant table")
        print(f"\n{'=' * 80}")
        print("WARNING: This operation is IRREVERSIBLE!")
        print(f"{'=' * 80}\n")

        response = input("Are you sure you want to proceed? Type 'yes' to confirm: ")

        if response.lower() != "yes":
            print("Cleanup aborted by user")
            sys.exit(0)
    else:
        if len(tenant_ids) == 1:
            print(
                f"⚠ FORCE MODE: Running cleanup for {tenant_ids[0]} without confirmations"
            )
        else:
            print(
                f"⚠ FORCE MODE: Running cleanup for {len(tenant_ids)} tenants without confirmations"
            )

    # Find heavy worker pod once for all tenants
    try:
        pod_name = find_worker_pod()
        print(f"✓ Found worker pod: {pod_name}\n")
    except Exception as e:
        print(f"✗ Failed to find heavy worker pod: {e}", file=sys.stderr)
        print("Cannot proceed with cleanup")
        sys.exit(1)

    # Run cleanup for each tenant
    failed_tenants = []
    successful_tenants = []
    skipped_tenants = []

    # Open CSV file for writing successful cleanups in real-time
    csv_output_path = "cleaned_tenants.csv"
    with open(csv_output_path, "w", newline="") as csv_file:
        csv_writer = csv.writer(csv_file)
        csv_writer.writerow(["tenant_id", "cleaned_at"])
        csv_file.flush()  # Ensure header is written immediately

        print(f"Writing successful cleanups to: {csv_output_path}\n")

        for idx, tenant_id in enumerate(tenant_ids, 1):
            if len(tenant_ids) > 1:
                print(f"\n{'=' * 80}")
                print(f"Processing tenant {idx}/{len(tenant_ids)}: {tenant_id}")
                print(f"{'=' * 80}")

            try:
                was_cleaned = cleanup_tenant(tenant_id, pod_name, force)

                if was_cleaned:
                    # Only record if actually cleaned up (not skipped)
                    successful_tenants.append(tenant_id)

                    # Write to CSV immediately after successful cleanup
                    timestamp = datetime.utcnow().isoformat()
                    csv_writer.writerow([tenant_id, timestamp])
                    csv_file.flush()  # Ensure real-time write
                    print(f"✓ Recorded cleanup in {csv_output_path}")
                else:
                    skipped_tenants.append(tenant_id)
                    print(f"⚠ Tenant {tenant_id} was skipped (not recorded in CSV)")

            except Exception as e:
                print(f"✗ Cleanup failed for tenant {tenant_id}: {e}", file=sys.stderr)
                failed_tenants.append((tenant_id, str(e)))

                # If not in force mode and there are more tenants, ask if we should continue
                if not force and idx < len(tenant_ids):
                    response = input(
                        f"\nContinue with remaining {len(tenant_ids) - idx} tenant(s)? (y/n): "
                    )
                    if response.lower() != "y":
                        print("Cleanup aborted by user")
                        break

    # Print summary
    if len(tenant_ids) == 1:
        if successful_tenants:
            print(f"\n✓ Successfully cleaned tenant written to: {csv_output_path}")
        elif skipped_tenants:
            print("\n⚠ Tenant was skipped")
    elif len(tenant_ids) > 1:
        print(f"\n{'=' * 80}")
        print("CLEANUP SUMMARY")
        print(f"{'=' * 80}")
        print(f"Total tenants: {len(tenant_ids)}")
        print(f"Successful: {len(successful_tenants)}")
        print(f"Skipped: {len(skipped_tenants)}")
        print(f"Failed: {len(failed_tenants)}")
        print(f"\nSuccessfully cleaned tenants written to: {csv_output_path}")

        if skipped_tenants:
            print(f"\nSkipped tenants ({len(skipped_tenants)}):")
            for tenant_id in skipped_tenants:
                print(f"  - {tenant_id}")

        if failed_tenants:
            print(f"\nFailed tenants ({len(failed_tenants)}):")
            for tenant_id, error in failed_tenants:
                print(f"  - {tenant_id}: {error}")

        print(f"{'=' * 80}")

        if failed_tenants:
            sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/cleanup_utils.py
================================================
import csv
import os
import random
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path


class TenantNotFoundInControlPlaneError(Exception):
    """Exception raised when tenant/table is not found in control plane."""


@dataclass
class ControlPlaneConfig:
    """Configuration for connecting to the control plane database."""

    db_url: str
    bastion_host: str
    pem_file_location: str


def find_worker_pod() -> str:
    """Find a user file processing worker pod using kubectl."""
    print("Finding user file processing worker pod...")

    result = subprocess.run(
        ["kubectl", "get", "po"], capture_output=True, text=True, check=True
    )

    # Parse output and find user file processing worker pod
    lines = result.stdout.strip().split("\n")
    lines = lines[1:]  # Skip header
    random.shuffle(lines)
    for line in lines:
        if "celery-worker-user-file-processing" in line and "Running" in line:
            pod_name = line.split()[0]
            print(f"Found pod: {pod_name}")
            return pod_name

    raise RuntimeError("No running user file processing worker pod found")


def confirm_step(message: str, force: bool = False) -> bool:
    """Ask for confirmation before executing a step.

    Args:
        message: The confirmation message to display
        force: If True, skip confirmation and return True

    Returns:
        True if user confirms or force is True, False otherwise
    """
    if force:
        print(f"[FORCE MODE] Skipping confirmation: {message}")
        return True

    print(f"\n{message}")
    response = input("Proceed? (y/n): ")
    return response.lower() == "y"


def get_control_plane_config() -> ControlPlaneConfig:
    """Get control plane database configuration from environment variables.

    Returns:
        ControlPlaneConfig with db_url, bastion_host, and pem_file_location

    Raises:
        ValueError: If any required environment variable is not set
    """
    rds_host = os.environ.get("CONTROL_PLANE_RDS_HOST")
    if not rds_host:
        raise ValueError("CONTROL_PLANE_RDS_HOST is not set")

    rds_password = os.environ.get("CONTROL_PLANE_RDS_PASSWORD")
    if not rds_password:
        raise ValueError("CONTROL_PLANE_RDS_PASSWORD is not set")

    bastion_host = os.environ.get("BASTION_HOST")
    if not bastion_host:
        raise ValueError("BASTION_HOST is not set")

    pem_file_location = os.environ.get("PEM_FILE_LOCATION")
    if not pem_file_location:
        raise ValueError("PEM_FILE_LOCATION is not set")

    db_url = f"postgresql://postgres:{rds_password}@{rds_host}:5432/control"

    return ControlPlaneConfig(
        db_url=db_url,
        bastion_host=bastion_host,
        pem_file_location=pem_file_location,
    )


def execute_control_plane_query(
    query: str, tuple_only: bool = False
) -> subprocess.CompletedProcess:
    """Execute a SQL query against the control plane database via SSH.

    Args:
        query: The SQL query to execute
        tuple_only: If True, use psql's tuple-only mode (-t flag) for cleaner output

    Returns:
        subprocess.CompletedProcess with the result

    Raises:
        subprocess.CalledProcessError: If the command fails
    """
    config = get_control_plane_config()
    db_url = config.db_url
    bastion_host = config.bastion_host
    pem_file_location = config.pem_file_location

    # Build psql flags
    psql_flags = "-t" if tuple_only else ""

    # Build the SSH command with proper escaping
    full_cmd = f'ssh -i {pem_file_location} ec2-user@{bastion_host} "psql {db_url} {psql_flags} -c \\"{query}\\""'

    result = subprocess.run(
        full_cmd,
        shell=True,
        check=True,
        capture_output=True,
        text=True,
    )

    return result


def get_tenant_status(tenant_id: str) -> str | None:
    """
    Get tenant status from control plane database.

    Returns:
        Tenant status string (e.g., 'GATED_ACCESS', 'ACTIVE') or None if not found

    Raises:
        TenantNotFoundInControlPlaneError: If the tenant table/relation does not exist
    """
    print(f"Fetching tenant status for tenant: {tenant_id}")

    query = f"SELECT application_status FROM tenant WHERE tenant_id = '{tenant_id}';"

    try:
        result = execute_control_plane_query(query, tuple_only=True)

        # Parse the output - psql returns the value with whitespace
        status = result.stdout.strip()

        if status:
            print(f"✓ Tenant status: {status}")
            return status
        else:
            print("⚠ Tenant not found in control plane")
            raise TenantNotFoundInControlPlaneError(
                f"Tenant {tenant_id} not found in control plane database"
            )
    except TenantNotFoundInControlPlaneError:
        # Re-raise without wrapping
        raise
    except subprocess.CalledProcessError as e:
        error_msg = e.stderr if e.stderr else str(e)
        print(
            f"✗ Failed to get tenant status for {tenant_id}: {error_msg}",
            file=sys.stderr,
        )
        return None


def read_tenant_ids_from_csv(csv_path: str) -> list[str]:
    """Read tenant IDs from CSV file.

    Args:
        csv_path: Path to CSV file

    Returns:
        List of tenant IDs
    """
    if not Path(csv_path).exists():
        raise FileNotFoundError(f"CSV file not found: {csv_path}")

    tenant_ids = []
    with open(csv_path, "r", newline="", encoding="utf-8") as csvfile:
        reader = csv.DictReader(csvfile)

        # Check if tenant_id column exists
        if not reader.fieldnames or "tenant_id" not in reader.fieldnames:
            raise ValueError(
                f"CSV file must have a 'tenant_id' column. Found columns: {reader.fieldnames}"
            )

        for row in reader:
            tenant_id = row.get("tenant_id", "").strip()
            if tenant_id:
                tenant_ids.append(tenant_id)

    return tenant_ids


================================================
FILE: backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py
================================================
#!/usr/bin/env python3
"""
Mark connectors for deletion script that:
1. Finds all connectors for the specified tenant(s)
2. Cancels any scheduled indexing attempts
3. Marks each connector credential pair as DELETING
4. Triggers the cleanup task

Usage:
    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py <tenant_id> [--force] [--concurrency N]
    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv <csv_file_path> [--force] [--concurrency N]

Arguments:
    tenant_id        The tenant ID to process (required if not using --csv)
    --csv PATH       Path to CSV file containing tenant IDs to process
    --force          Skip all confirmation prompts (optional)
    --concurrency N  Process N tenants concurrently (default: 1)

Examples:
    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py tenant_abc123-def456-789
    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py tenant_abc123-def456-789 --force
    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo.csv
    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo.csv --force
    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py \
        --csv gated_tenants_no_query_3mo.csv --force --concurrency 16
"""

import subprocess
import sys
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from threading import Lock
from typing import Any

from scripts.tenant_cleanup.cleanup_utils import confirm_step
from scripts.tenant_cleanup.cleanup_utils import find_worker_pod
from scripts.tenant_cleanup.cleanup_utils import get_tenant_status
from scripts.tenant_cleanup.cleanup_utils import read_tenant_ids_from_csv

# Global lock for thread-safe printing
_print_lock: Lock = Lock()


def safe_print(*args: Any, **kwargs: Any) -> None:
    """Thread-safe print function."""
    with _print_lock:
        print(*args, **kwargs)


def run_connector_deletion(pod_name: str, tenant_id: str) -> None:
    """Mark all connector credential pairs for deletion.

    Args:
        pod_name: The Kubernetes pod name to execute on
        tenant_id: The tenant ID
    """
    safe_print("  Marking all connector credential pairs for deletion...")

    # Get the path to the script
    script_dir = Path(__file__).parent
    mark_deletion_script = (
        script_dir / "on_pod_scripts" / "execute_connector_deletion.py"
    )

    if not mark_deletion_script.exists():
        raise FileNotFoundError(
            f"execute_connector_deletion.py not found at {mark_deletion_script}"
        )

    try:
        # Copy script to pod
        subprocess.run(
            [
                "kubectl",
                "cp",
                str(mark_deletion_script),
                f"{pod_name}:/tmp/execute_connector_deletion.py",
            ],
            check=True,
            capture_output=True,
        )

        # Execute script on pod
        result = subprocess.run(
            [
                "kubectl",
                "exec",
                pod_name,
                "--",
                "python",
                "/tmp/execute_connector_deletion.py",
                tenant_id,
                "--all",
            ],
        )

        if result.returncode != 0:
            raise RuntimeError(result.stderr)

    except subprocess.CalledProcessError as e:
        safe_print(
            f"  ✗ Failed to mark all connector credential pairs for deletion: {e}",
            file=sys.stderr,
        )
        if e.stderr:
            safe_print(f"    Error details: {e.stderr}", file=sys.stderr)
        raise
    except Exception as e:
        safe_print(
            f"  ✗ Failed to mark all connector credential pairs for deletion: {e}",
            file=sys.stderr,
        )
        raise


def mark_tenant_connectors_for_deletion(
    tenant_id: str, pod_name: str, force: bool = False
) -> None:
    """
    Main function to mark all connectors for a tenant for deletion.

    Args:
        tenant_id: The tenant ID to process
        pod_name: The Kubernetes pod name to execute on
        force: If True, skip all confirmation prompts
    """
    safe_print(f"Processing connectors for tenant: {tenant_id}")

    # Check tenant status first
    safe_print(f"\n{'=' * 80}")
    try:
        tenant_status = get_tenant_status(tenant_id)

        # If tenant is not GATED_ACCESS, require explicit confirmation even in force mode
        if tenant_status and tenant_status != "GATED_ACCESS":
            safe_print(
                f"\n⚠️  WARNING: Tenant status is '{tenant_status}', not 'GATED_ACCESS'!"
            )
            safe_print(
                "This tenant may be active and should not have connectors deleted without careful review."
            )
            safe_print(f"{'=' * 80}\n")

            # Always ask for confirmation if not gated, even in force mode
            # Note: In parallel mode with force, this will still block
            if not force:
                response = input(
                    "Are you ABSOLUTELY SURE you want to proceed? Type 'yes' to confirm: "
                )
                if response.lower() != "yes":
                    safe_print("Operation aborted - tenant is not GATED_ACCESS")
                    raise RuntimeError(f"Tenant {tenant_id} is not GATED_ACCESS")
            else:
                raise RuntimeError(f"Tenant {tenant_id} is not GATED_ACCESS")
        elif tenant_status == "GATED_ACCESS":
            safe_print("✓ Tenant status is GATED_ACCESS - safe to proceed")
        elif tenant_status is None:
            safe_print("⚠️  WARNING: Could not determine tenant status!")
            if not force:
                response = input("Continue anyway? Type 'yes' to confirm: ")
                if response.lower() != "yes":
                    safe_print("Operation aborted - could not verify tenant status")
                    raise RuntimeError(
                        f"Could not verify tenant status for {tenant_id}"
                    )
            else:
                raise RuntimeError(f"Could not verify tenant status for {tenant_id}")
    except Exception as e:
        safe_print(f"⚠️  WARNING: Failed to check tenant status: {e}")
        if not force:
            response = input("Continue anyway? Type 'yes' to confirm: ")
            if response.lower() != "yes":
                safe_print("Operation aborted - could not verify tenant status")
                raise
        else:
            raise RuntimeError(f"Failed to check tenant status for {tenant_id}")
    safe_print(f"{'=' * 80}\n")

    # Confirm before proceeding (only in non-force mode)
    if not confirm_step(
        f"Mark all connector credential pairs for deletion for tenant {tenant_id}?",
        force,
    ):
        safe_print("Operation cancelled by user")
        raise ValueError("Operation cancelled by user")

    run_connector_deletion(pod_name, tenant_id)

    # Print summary
    safe_print(
        f"✓ Marked all connector credential pairs for deletion for tenant {tenant_id}"
    )


def main() -> None:
    if len(sys.argv) < 2:
        print(
            "Usage: python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py <tenant_id> [--force] [--concurrency N]"
        )
        print(
            "       python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv <csv_file_path> [--force]"
            " [--concurrency N]"
        )
        print("\nArguments:")
        print(
            "  tenant_id        The tenant ID to process (required if not using --csv)"
        )
        print("  --csv PATH       Path to CSV file containing tenant IDs to process")
        print("  --force          Skip all confirmation prompts (optional)")
        print("  --concurrency N  Process N tenants concurrently (default: 1)")
        print("\nExamples:")
        print(
            "  python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py tenant_abc123-def456-789"
        )
        print(
            "  python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py tenant_abc123-def456-789 --force"
        )
        print(
            "  python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo.csv"
        )
        print(
            "  python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo.csv "
            "--force --concurrency 16"
        )
        sys.exit(1)

    # Parse arguments
    force = "--force" in sys.argv
    tenant_ids: list[str] = []

    # Parse concurrency
    concurrency: int = 1
    if "--concurrency" in sys.argv:
        try:
            concurrency_index = sys.argv.index("--concurrency")
            if concurrency_index + 1 >= len(sys.argv):
                print("Error: --concurrency flag requires a number", file=sys.stderr)
                sys.exit(1)
            concurrency = int(sys.argv[concurrency_index + 1])
            if concurrency < 1:
                print("Error: concurrency must be at least 1", file=sys.stderr)
                sys.exit(1)
        except ValueError:
            print("Error: --concurrency value must be an integer", file=sys.stderr)
            sys.exit(1)

    # Validate: concurrency > 1 requires --force
    if concurrency > 1 and not force:
        print(
            "Error: --concurrency > 1 requires --force flag (interactive mode not supported with parallel processing)",
            file=sys.stderr,
        )
        sys.exit(1)

    # Check for CSV mode
    if "--csv" in sys.argv:
        try:
            csv_index: int = sys.argv.index("--csv")
            if csv_index + 1 >= len(sys.argv):
                print("Error: --csv flag requires a file path", file=sys.stderr)
                sys.exit(1)

            csv_path: str = sys.argv[csv_index + 1]
            tenant_ids = read_tenant_ids_from_csv(csv_path)

            if not tenant_ids:
                print("Error: No tenant IDs found in CSV file", file=sys.stderr)
                sys.exit(1)

            print(f"Found {len(tenant_ids)} tenant(s) in CSV file: {csv_path}")

        except Exception as e:
            print(f"Error reading CSV file: {e}", file=sys.stderr)
            sys.exit(1)
    else:
        # Single tenant mode
        tenant_ids = [sys.argv[1]]

    # Find heavy worker pod once before processing
    try:
        print("Finding worker pod...")
        pod_name: str = find_worker_pod()
        print(f"✓ Using worker pod: {pod_name}")
    except Exception as e:
        print(f"✗ Failed to find heavy worker pod: {e}", file=sys.stderr)
        print("Cannot proceed with marking connectors for deletion")
        sys.exit(1)

    # Initial confirmation (unless --force is used)
    if not force:
        print(f"\n{'=' * 80}")
        print("MARK CONNECTORS FOR DELETION - CONFIRMATION REQUIRED")
        print(f"{'=' * 80}")
        if len(tenant_ids) == 1:
            print(f"Tenant ID: {tenant_ids[0]}")
        else:
            print(f"Number of tenants: {len(tenant_ids)}")
            print(f"Tenant IDs: {', '.join(tenant_ids[:5])}")
            if len(tenant_ids) > 5:
                print(f"            ... and {len(tenant_ids) - 5} more")

        print(
            f"Mode: {'FORCE (no confirmations)' if force else 'Interactive (will ask for confirmation at each step)'}"
        )
        print(f"Concurrency: {concurrency} tenant(s) at a time")
        print("\nThis will:")
        print("  1. Fetch all connector credential pairs for each tenant")
        print("  2. Cancel any scheduled indexing attempts for each connector")
        print("  3. Mark each connector credential pair status as DELETING")
        print("  4. Trigger the connector deletion task")
        print(f"\n{'=' * 80}")
        print("WARNING: This will mark connectors for deletion!")
        print("The actual deletion will be performed by the background celery worker.")
        print(f"{'=' * 80}\n")

        response = input("Are you sure you want to proceed? Type 'yes' to confirm: ")

        if response.lower() != "yes":
            print("Operation aborted by user")
            sys.exit(0)
    else:
        if len(tenant_ids) == 1:
            print(
                f"⚠ FORCE MODE: Marking connectors for deletion for {tenant_ids[0]} without confirmations"
            )
        else:
            print(
                f"⚠ FORCE MODE: Marking connectors for deletion for {len(tenant_ids)} tenants "
                f"(concurrency: {concurrency}) without confirmations"
            )

    # Process tenants (in parallel if concurrency > 1)
    failed_tenants: list[tuple[str, str]] = []
    successful_tenants: list[str] = []

    if concurrency == 1:
        # Sequential processing
        for idx, tenant_id in enumerate(tenant_ids, 1):
            if len(tenant_ids) > 1:
                print(f"\n{'=' * 80}")
                print(f"Processing tenant {idx}/{len(tenant_ids)}: {tenant_id}")
                print(f"{'=' * 80}")

            try:
                mark_tenant_connectors_for_deletion(tenant_id, pod_name, force)
                successful_tenants.append(tenant_id)
            except Exception as e:
                print(
                    f"✗ Failed to process tenant {tenant_id}: {e}",
                    file=sys.stderr,
                )
                failed_tenants.append((tenant_id, str(e)))

                # If not in force mode and there are more tenants, ask if we should continue
                if not force and idx < len(tenant_ids):
                    response = input(
                        f"\nContinue with remaining {len(tenant_ids) - idx} tenant(s)? (y/n): "
                    )
                    if response.lower() != "y":
                        print("Operation aborted by user")
                        break
    else:
        # Parallel processing
        print(
            f"\nProcessing {len(tenant_ids)} tenant(s) with concurrency={concurrency}"
        )

        def process_tenant(tenant_id: str) -> tuple[str, bool, str | None]:
            """Process a single tenant. Returns (tenant_id, success, error_message)."""
            try:
                mark_tenant_connectors_for_deletion(tenant_id, pod_name, force)
                return (tenant_id, True, None)
            except Exception as e:
                return (tenant_id, False, str(e))

        with ThreadPoolExecutor(max_workers=concurrency) as executor:
            # Submit all tasks
            future_to_tenant = {
                executor.submit(process_tenant, tenant_id): tenant_id
                for tenant_id in tenant_ids
            }

            # Process results as they complete
            completed: int = 0
            for future in as_completed(future_to_tenant):
                completed += 1
                tenant_id, success, error = future.result()

                if success:
                    successful_tenants.append(tenant_id)
                    safe_print(
                        f"[{completed}/{len(tenant_ids)}] ✓ Successfully processed {tenant_id}"
                    )
                else:
                    failed_tenants.append((tenant_id, error or "Unknown error"))
                    safe_print(
                        f"[{completed}/{len(tenant_ids)}] ✗ Failed to process {tenant_id}: {error}",
                        file=sys.stderr,
                    )

    # Print summary if multiple tenants
    if len(tenant_ids) > 1:
        print(f"\n{'=' * 80}")
        print("OPERATION SUMMARY")
        print(f"{'=' * 80}")
        print(f"Total tenants: {len(tenant_ids)}")
        print(f"Successful: {len(successful_tenants)}")
        print(f"Failed: {len(failed_tenants)}")

        if failed_tenants:
            print("\nFailed tenants:")
            for tenant_id, error in failed_tenants:
                print(f"  - {tenant_id}: {error}")

        print(f"{'=' * 80}")

        if failed_tenants:
            sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/no_bastion_analyze_tenants.py
================================================
#!/usr/bin/env python3
"""
Tenant analysis script that works WITHOUT bastion access.
Control plane and data plane are in SEPARATE clusters.

Usage:
    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_analyze_tenants.py \
        [--skip-cache] \
        [--data-plane-context <context>] \
        [--control-plane-context <context>]
"""

import argparse
import csv
import json
import subprocess
import sys
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from pathlib import Path
from typing import Any

from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_background_pod
from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_worker_pod


def collect_tenant_data(
    pod_name: str, context: str | None = None
) -> list[dict[str, Any]]:
    """Run the understand_tenants script on the data plane pod."""
    print(f"\nCollecting tenant data from data plane pod {pod_name}...")

    # Get the path to the understand_tenants script
    script_dir = Path(__file__).parent
    understand_tenants_script = script_dir / "on_pod_scripts" / "understand_tenants.py"

    if not understand_tenants_script.exists():
        raise FileNotFoundError(
            f"understand_tenants.py not found at {understand_tenants_script}"
        )

    # Copy script to pod
    print("Copying script to pod...")
    cmd_cp = [
        "kubectl",
        "cp",
        str(understand_tenants_script),
        f"{pod_name}:/tmp/understand_tenants.py",
    ]
    if context:
        cmd_cp.extend(["--context", context])

    subprocess.run(cmd_cp, check=True, capture_output=True)

    # Execute script on pod
    print("Executing script on pod (this may take a while)...")
    cmd_exec = ["kubectl", "exec", pod_name]
    if context:
        cmd_exec.extend(["--context", context])
    cmd_exec.extend(["--", "python", "/tmp/understand_tenants.py"])

    result = subprocess.run(cmd_exec, capture_output=True, text=True, check=True)

    # Show progress messages from stderr
    if result.stderr:
        print(result.stderr, file=sys.stderr)

    # Parse JSON from stdout
    try:
        tenant_data = json.loads(result.stdout)
        print(f"Successfully collected data for {len(tenant_data)} tenants")
        return tenant_data
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON output: {e}", file=sys.stderr)
        print(f"stdout: {result.stdout[:500]}", file=sys.stderr)
        raise


def collect_control_plane_data_from_pod(
    pod_name: str, context: str | None = None
) -> list[dict[str, Any]]:
    """Collect control plane data by running a query on a control plane pod."""
    print(f"\nCollecting control plane data from pod {pod_name}...")

    # Create a script to query the control plane database
    query_script = """
import json
import os
from sqlalchemy import create_engine, text

# Try to get database URL from various environment patterns
control_db_url = None

# Pattern 1: POSTGRES_CONTROL_* variables
if os.environ.get("POSTGRES_CONTROL_HOST"):
    host = os.environ.get("POSTGRES_CONTROL_HOST")
    port = os.environ.get("POSTGRES_CONTROL_PORT", "5432")
    db = os.environ.get("POSTGRES_CONTROL_DB", "control")
    user = os.environ.get("POSTGRES_CONTROL_USER", "postgres")
    password = os.environ.get("POSTGRES_CONTROL_PASSWORD", "")
    if password:
        control_db_url = f"postgresql://{user}:{password}@{host}:{port}/{db}"

# Pattern 2: Standard POSTGRES_* variables (in control plane cluster)
if not control_db_url and os.environ.get("POSTGRES_HOST"):
    host = os.environ.get("POSTGRES_HOST")
    port = os.environ.get("POSTGRES_PORT", "5432")
    db = os.environ.get("POSTGRES_DB", "danswer")
    user = os.environ.get("POSTGRES_USER", "postgres")
    password = os.environ.get("POSTGRES_PASSWORD", "")
    if password:
        control_db_url = f"postgresql://{user}:{password}@{host}:{port}/{db}"

if not control_db_url:
    raise ValueError("Cannot determine control plane database connection")

engine = create_engine(control_db_url)

with engine.connect() as conn:
    result = conn.execute(
        text(
            "SELECT tenant_id, stripe_customer_id, created_at, active_seats, "
            "creator_email, referral_source, application_status FROM tenant"
        )
    )
    rows = [dict(row._mapping) for row in result]
    print(json.dumps(rows, default=str))
"""

    # Write the script to a temp file
    script_path = "/tmp/query_control_plane.py"

    print("  Creating control plane query script on pod...")
    cmd_write = ["kubectl", "exec", pod_name]
    if context:
        cmd_write.extend(["--context", context])
    cmd_write.extend(
        ["--", "bash", "-c", f"cat > {script_path} << 'EOF'\n{query_script}\nEOF"]
    )

    subprocess.run(cmd_write, check=True, capture_output=True)

    # Execute the script on the pod
    print("  Executing control plane query on pod...")
    cmd_exec = ["kubectl", "exec", pod_name]
    if context:
        cmd_exec.extend(["--context", context])
    cmd_exec.extend(["--", "python", script_path])

    result = subprocess.run(cmd_exec, capture_output=True, text=True, check=True)

    # Parse JSON output
    try:
        control_plane_data = json.loads(result.stdout)
        print(
            f"✓ Successfully collected {len(control_plane_data)} tenant records from control plane"
        )
        return control_plane_data
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON output: {e}", file=sys.stderr)
        print(f"stdout: {result.stdout[:500]}", file=sys.stderr)
        raise


def analyze_tenants(
    tenants: list[dict[str, Any]], control_plane_data: list[dict[str, Any]]
) -> list[dict[str, Any]]:
    """Analyze tenant activity data and return gated tenants with no query in last 3 months."""

    print(f"\n{'=' * 80}")
    print(f"TENANT ANALYSIS REPORT - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"{'=' * 80}")
    print(f"Total tenants analyzed: {len(tenants)}\n")

    # Create a lookup dict for control plane data by tenant_id
    control_plane_lookup = {}
    for row in control_plane_data:
        tenant_id = row.get("tenant_id")
        tenant_status = row.get("application_status")
        if tenant_id:
            control_plane_lookup[tenant_id] = tenant_status

    # Calculate cutoff dates
    one_month_cutoff = datetime.now(timezone.utc) - timedelta(days=30)
    three_month_cutoff = datetime.now(timezone.utc) - timedelta(days=90)

    # Categorize tenants into 4 groups
    gated_no_query_3_months = []  # GATED_ACCESS + no query in last 3 months
    gated_query_1_3_months = []  # GATED_ACCESS + query between 1-3 months
    gated_query_1_month = []  # GATED_ACCESS + query in last 1 month
    everyone_else = []  # All other tenants

    for tenant in tenants:
        tenant_id = tenant.get("tenant_id")
        last_query_time = tenant.get("last_query_time")
        tenant_status = control_plane_lookup.get(tenant_id, "UNKNOWN")

        is_gated = tenant_status == "GATED_ACCESS"

        # Parse last query time
        if last_query_time:
            query_time = datetime.fromisoformat(last_query_time.replace("Z", "+00:00"))
        else:
            query_time = None

        # Categorize
        if is_gated:
            if query_time is None or query_time <= three_month_cutoff:
                gated_no_query_3_months.append(tenant)
            elif query_time <= one_month_cutoff:
                gated_query_1_3_months.append(tenant)
            else:  # query_time > one_month_cutoff
                gated_query_1_month.append(tenant)
        else:
            everyone_else.append(tenant)

    # Calculate document counts for each group
    gated_no_query_docs = sum(
        t.get("num_documents", 0) for t in gated_no_query_3_months
    )
    gated_1_3_month_docs = sum(
        t.get("num_documents", 0) for t in gated_query_1_3_months
    )
    gated_1_month_docs = sum(t.get("num_documents", 0) for t in gated_query_1_month)
    everyone_else_docs = sum(t.get("num_documents", 0) for t in everyone_else)

    print("=" * 80)
    print("TENANT CATEGORIZATION BY GATED ACCESS STATUS AND ACTIVITY")
    print("=" * 80)

    print("\n1. GATED_ACCESS + No query in last 3 months:")
    print(f"   Count: {len(gated_no_query_3_months):,}")
    print(f"   Total documents: {gated_no_query_docs:,}")
    print(
        f"   Avg documents per tenant: {gated_no_query_docs / len(gated_no_query_3_months) if gated_no_query_3_months else 0:.2f}"
    )

    print("\n2. GATED_ACCESS + Query between 1-3 months ago:")
    print(f"   Count: {len(gated_query_1_3_months):,}")
    print(f"   Total documents: {gated_1_3_month_docs:,}")
    print(
        f"   Avg documents per tenant: {gated_1_3_month_docs / len(gated_query_1_3_months) if gated_query_1_3_months else 0:.2f}"
    )

    print("\n3. GATED_ACCESS + Query in last 1 month:")
    print(f"   Count: {len(gated_query_1_month):,}")
    print(f"   Total documents: {gated_1_month_docs:,}")
    print(
        f"   Avg documents per tenant: {gated_1_month_docs / len(gated_query_1_month) if gated_query_1_month else 0:.2f}"
    )

    print("\n4. Everyone else (non-GATED_ACCESS):")
    print(f"   Count: {len(everyone_else):,}")
    print(f"   Total documents: {everyone_else_docs:,}")
    print(
        f"   Avg documents per tenant: {everyone_else_docs / len(everyone_else) if everyone_else else 0:.2f}"
    )

    total_docs = (
        gated_no_query_docs
        + gated_1_3_month_docs
        + gated_1_month_docs
        + everyone_else_docs
    )
    print(f"\nTotal documents across all tenants: {total_docs:,}")

    # Top 100 tenants by document count
    print("\n" + "=" * 80)
    print("TOP 100 TENANTS BY DOCUMENT COUNT")
    print("=" * 80)

    # Sort all tenants by document count
    sorted_tenants = sorted(
        tenants, key=lambda t: t.get("num_documents", 0), reverse=True
    )

    top_100 = sorted_tenants[:100]

    print(
        f"\n{'Rank':<6} {'Tenant ID':<45} {'Documents':>12} {'Users':>8} {'Last Query':<12} {'Group'}"
    )
    print("-" * 130)

    for idx, tenant in enumerate(top_100, 1):
        tenant_id = tenant.get("tenant_id", "Unknown")
        num_docs = tenant.get("num_documents", 0)
        num_users = tenant.get("num_users", 0)
        last_query = tenant.get("last_query_time", "Never")
        tenant_status = control_plane_lookup.get(tenant_id, "UNKNOWN")

        # Format the last query time
        if last_query and last_query != "Never":
            try:
                query_dt = datetime.fromisoformat(last_query.replace("Z", "+00:00"))
                last_query_str = query_dt.strftime("%Y-%m-%d")
            except Exception:
                last_query_str = last_query[:10] if len(last_query) > 10 else last_query
        else:
            last_query_str = "Never"

        # Determine group
        if tenant_status == "GATED_ACCESS":
            if last_query and last_query != "Never":
                query_time = datetime.fromisoformat(last_query.replace("Z", "+00:00"))
                if query_time <= three_month_cutoff:
                    group = "Gated - No query (3mo)"
                elif query_time <= one_month_cutoff:
                    group = "Gated - Query (1-3mo)"
                else:
                    group = "Gated - Query (1mo)"
            else:
                group = "Gated - No query (3mo)"
        else:
            group = f"Other ({tenant_status})"

        print(
            f"{idx:<6} {tenant_id:<45} {num_docs:>12,} {num_users:>8} {last_query_str:<12} {group}"
        )

    # Summary stats for top 100
    top_100_docs = sum(t.get("num_documents", 0) for t in top_100)

    print("\n" + "-" * 110)
    print(f"Top 100 total documents: {top_100_docs:,}")
    print(
        f"Percentage of all documents: {(top_100_docs / total_docs * 100) if total_docs > 0 else 0:.2f}%"
    )

    # Additional insights
    print("\n" + "=" * 80)
    print("ADDITIONAL INSIGHTS")
    print("=" * 80)

    # Tenants with no documents
    no_docs = [t for t in tenants if t.get("num_documents", 0) == 0]
    print(
        f"\nTenants with 0 documents: {len(no_docs):,} ({len(no_docs) / len(tenants) * 100:.2f}%)"
    )

    # Tenants with no users
    no_users = [t for t in tenants if t.get("num_users", 0) == 0]
    print(
        f"Tenants with 0 users: {len(no_users):,} ({len(no_users) / len(tenants) * 100:.2f}%)"
    )

    # Document distribution quartiles
    doc_counts = sorted([t.get("num_documents", 0) for t in tenants])
    if doc_counts:
        print("\nDocument count distribution:")
        print(f"  Median: {doc_counts[len(doc_counts) // 2]:,}")
        print(f"  75th percentile: {doc_counts[int(len(doc_counts) * 0.75)]:,}")
        print(f"  90th percentile: {doc_counts[int(len(doc_counts) * 0.90)]:,}")
        print(f"  95th percentile: {doc_counts[int(len(doc_counts) * 0.95)]:,}")
        print(f"  99th percentile: {doc_counts[int(len(doc_counts) * 0.99)]:,}")
        print(f"  Max: {doc_counts[-1]:,}")

    return gated_no_query_3_months


def find_recent_tenant_data() -> tuple[list[dict[str, Any]] | None, str | None]:
    """Find the most recent tenant data file if it's less than 7 days old."""
    current_dir = Path.cwd()
    tenant_data_files = list(current_dir.glob("tenant_data_*.json"))

    if not tenant_data_files:
        return None, None

    # Sort by modification time, most recent first
    tenant_data_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    most_recent = tenant_data_files[0]

    # Check if file is less than 7 days old
    file_age = datetime.now().timestamp() - most_recent.stat().st_mtime
    seven_days_in_seconds = 7 * 24 * 60 * 60

    if file_age < seven_days_in_seconds:
        file_age_days = file_age / (24 * 60 * 60)
        print(
            f"\n✓ Found recent tenant data: {most_recent.name} (age: {file_age_days:.1f} days)"
        )

        with open(most_recent, "r") as f:
            tenant_data = json.load(f)

        return tenant_data, str(most_recent)

    return None, None


def main() -> None:
    # Parse command-line arguments
    parser = argparse.ArgumentParser(
        description="Analyze tenant data WITHOUT bastion access - control plane and data plane are separate clusters"
    )
    parser.add_argument(
        "--skip-cache",
        action="store_true",
        help="Skip cached tenant data and collect fresh data from pod",
    )
    parser.add_argument(
        "--data-plane-context",
        type=str,
        help="Kubectl context for data plane cluster (optional)",
    )
    parser.add_argument(
        "--control-plane-context",
        type=str,
        help="Kubectl context for control plane cluster (optional)",
    )
    args = parser.parse_args()

    try:
        # Step 1: Check for recent tenant data (< 7 days old) unless --skip-cache is set
        tenant_data = None
        cached_file = None

        if not args.skip_cache:
            tenant_data, cached_file = find_recent_tenant_data()

        if tenant_data:
            print(f"Using cached tenant data from: {cached_file}")
            print(f"Total tenants in cache: {len(tenant_data)}")
        else:
            if args.skip_cache:
                print("\n⚠ Skipping cache (--skip-cache flag set)")

            # Find data plane worker pod
            print("\n" + "=" * 80)
            print("CONNECTING TO DATA PLANE CLUSTER")
            print("=" * 80)
            data_plane_pod = find_worker_pod(args.data_plane_context)

            # Collect tenant data from data plane
            tenant_data = collect_tenant_data(data_plane_pod, args.data_plane_context)

            # Save raw data to file with timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_file = f"tenant_data_{timestamp}.json"
            with open(output_file, "w") as f:
                json.dump(tenant_data, f, indent=2, default=str)
            print(f"\n✓ Raw data saved to: {output_file}")

        # Step 2: Collect control plane data from control plane cluster
        print("\n" + "=" * 80)
        print("CONNECTING TO CONTROL PLANE CLUSTER")
        print("=" * 80)
        control_plane_pod = find_background_pod(args.control_plane_context)
        control_plane_data = collect_control_plane_data_from_pod(
            control_plane_pod, args.control_plane_context
        )

        # Step 3: Analyze the data and get gated tenants without recent queries
        gated_no_query_3_months = analyze_tenants(tenant_data, control_plane_data)

        # Step 4: Export to CSV (sorted by num_documents descending)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        csv_file = f"gated_tenants_no_query_3mo_{timestamp}.csv"

        # Sort by num_documents in descending order
        sorted_tenants = sorted(
            gated_no_query_3_months,
            key=lambda t: t.get("num_documents", 0),
            reverse=True,
        )

        with open(csv_file, "w", newline="", encoding="utf-8") as csvfile:
            fieldnames = [
                "tenant_id",
                "num_documents",
                "num_users",
                "last_query_time",
                "days_since_last_query",
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

            now = datetime.now(timezone.utc)
            for tenant in sorted_tenants:
                # Calculate days since last query
                last_query_time = tenant.get("last_query_time")
                if last_query_time:
                    try:
                        query_dt = datetime.fromisoformat(
                            last_query_time.replace("Z", "+00:00")
                        )
                        days_since = str((now - query_dt).days)
                    except Exception:
                        days_since = "N/A"
                else:
                    days_since = "Never"

                writer.writerow(
                    {
                        "tenant_id": tenant.get("tenant_id", ""),
                        "num_documents": tenant.get("num_documents", 0),
                        "num_users": tenant.get("num_users", 0),
                        "last_query_time": last_query_time or "Never",
                        "days_since_last_query": days_since,
                    }
                )

        print(f"\n✓ CSV exported to: {csv_file}")
        print(
            f"  Total gated tenants with no query in last 3 months: {len(gated_no_query_3_months)}"
        )

    except subprocess.CalledProcessError as e:
        print(f"Error running command: {e}", file=sys.stderr)
        if e.stderr:
            print(f"stderr: {e.stderr}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/no_bastion_cleanup_tenants.py
================================================
#!/usr/bin/env python3
"""
Tenant cleanup script that works WITHOUT bastion access.
All queries run directly from pods.
Supports two-cluster architecture (data plane and control plane in separate clusters).

Usage:
    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py <tenant_id> \
        --data-plane-context <context> --control-plane-context <context> [--force]

    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py --csv <csv_file_path> \
        --data-plane-context <context> --control-plane-context <context> [--force]
"""

import csv
import json
import signal
import subprocess
import sys
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from pathlib import Path
from threading import Lock

from scripts.tenant_cleanup.no_bastion_cleanup_utils import confirm_step
from scripts.tenant_cleanup.no_bastion_cleanup_utils import execute_control_plane_delete
from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_background_pod
from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_worker_pod
from scripts.tenant_cleanup.no_bastion_cleanup_utils import get_tenant_status
from scripts.tenant_cleanup.no_bastion_cleanup_utils import read_tenant_ids_from_csv
from scripts.tenant_cleanup.no_bastion_cleanup_utils import (
    TenantNotFoundInControlPlaneError,
)


# Global lock for thread-safe operations
_print_lock: Lock = Lock()
_csv_lock: Lock = Lock()


def signal_handler(signum: int, frame: object) -> None:  # noqa: ARG001
    """Handle termination signals by killing active subprocess."""
    sys.exit(1)


def setup_scripts_on_pod(pod_name: str, context: str) -> None:
    """Copy all required scripts to the pod once at the beginning.

    Args:
        pod_name: Pod to copy scripts to
        context: kubectl context for the cluster
    """
    print("Setting up scripts on pod (one-time operation)...")

    script_dir = Path(__file__).parent
    scripts_to_copy = [
        (
            "on_pod_scripts/check_documents_deleted.py",
            "/tmp/check_documents_deleted.py",
        ),
        ("on_pod_scripts/cleanup_tenant_schema.py", "/tmp/cleanup_tenant_schema.py"),
        ("on_pod_scripts/get_tenant_users.py", "/tmp/get_tenant_users.py"),
        ("on_pod_scripts/get_tenant_index_name.py", "/tmp/get_tenant_index_name.py"),
    ]

    for local_path, remote_path in scripts_to_copy:
        local_file = script_dir / local_path
        if not local_file.exists():
            raise FileNotFoundError(f"Script not found: {local_file}")

        cmd_cp = ["kubectl", "cp", "--context", context]
        cmd_cp.extend([str(local_file), f"{pod_name}:{remote_path}"])

        subprocess.run(cmd_cp, check=True, capture_output=True)

    print("✓ All scripts copied to pod")


def get_tenant_index_name(pod_name: str, tenant_id: str, context: str) -> str:
    """Get the default index name for the given tenant by running script on pod.

    Args:
        pod_name: Data plane pod to execute on
        tenant_id: Tenant ID to process
        context: kubectl context for data plane cluster
    """
    print(f"Getting default index name for tenant: {tenant_id}")

    # Get the path to the script
    script_dir = Path(__file__).parent
    index_name_script = script_dir / "on_pod_scripts" / "get_tenant_index_name.py"

    if not index_name_script.exists():
        raise FileNotFoundError(
            f"get_tenant_index_name.py not found at {index_name_script}"
        )

    try:
        # Copy script to pod
        print("  Copying script to pod...")
        cmd_cp = ["kubectl", "cp", "--context", context]
        cmd_cp.extend(
            [
                str(index_name_script),
                f"{pod_name}:/tmp/get_tenant_index_name.py",
            ]
        )

        subprocess.run(
            cmd_cp,
            check=True,
            capture_output=True,
        )

        # Execute script on pod
        print("  Executing script on pod...")
        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
                "--",
                "python",
                "/tmp/get_tenant_index_name.py",
                tenant_id,
            ]
        )

        result = subprocess.run(
            cmd_exec,
            capture_output=True,
            text=True,
            check=True,
        )

        # Show progress messages from stderr
        if result.stderr:
            print(f"  {result.stderr}", end="")

        # Parse JSON result from stdout
        result_data = json.loads(result.stdout)
        status = result_data.get("status")

        if status == "success":
            index_name = result_data.get("index_name")
            print(f"✓ Found index name: {index_name}")
            return index_name
        else:
            message = result_data.get("message", "Unknown error")
            raise RuntimeError(f"Failed to get index name: {message}")

    except subprocess.CalledProcessError as e:
        print(
            f"✗ Failed to get index name for tenant {tenant_id}: {e}", file=sys.stderr
        )
        if e.stderr:
            print(f"  Error details: {e.stderr}", file=sys.stderr)
        raise
    except Exception as e:
        print(
            f"✗ Failed to get index name for tenant {tenant_id}: {e}", file=sys.stderr
        )
        raise


def get_tenant_users(pod_name: str, tenant_id: str, context: str) -> list[str]:
    """Get list of user emails from the tenant's data plane schema.

    Args:
        pod_name: Data plane pod to execute on
        tenant_id: Tenant ID to process
        context: kubectl context for data plane cluster
    """
    # Script is already on pod from setup_scripts_on_pod()
    try:
        # Execute script on pod
        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
                "--",
                "python",
                "/tmp/get_tenant_users.py",
                tenant_id,
            ]
        )

        result = subprocess.run(
            cmd_exec,
            capture_output=True,
            text=True,
            check=True,
        )

        # Show progress messages from stderr
        if result.stderr:
            print(f"  {result.stderr}", end="")

        # Parse JSON result from stdout
        result_data = json.loads(result.stdout)
        status = result_data.get("status")

        if status == "success":
            users = result_data.get("users", [])
            if users:
                print(f"✓ Found {len(users)} user(s):")
                for email in users:
                    print(f"    - {email}")
            else:
                print("  No users found in tenant")
            return users
        else:
            message = result_data.get("message", "Unknown error")
            print(f"⚠ Could not fetch users: {message}")
            return []

    except subprocess.CalledProcessError as e:
        print(f"⚠ Failed to get users for tenant {tenant_id}: {e}")
        if e.stderr:
            print(f"  Error details: {e.stderr}")
        return []
    except Exception as e:
        print(f"⚠ Failed to get users for tenant {tenant_id}: {e}")
        return []


def check_documents_deleted(pod_name: str, tenant_id: str, context: str) -> None:
    """Check if all documents and connector credential pairs have been deleted.

    Args:
        pod_name: Data plane pod to execute on
        tenant_id: Tenant ID to process
        context: kubectl context for data plane cluster
    """
    # Script is already on pod from setup_scripts_on_pod()
    try:
        # Execute script on pod
        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
                "--",
                "python",
                "/tmp/check_documents_deleted.py",
                tenant_id,
            ]
        )

        result = subprocess.run(
            cmd_exec,
            capture_output=True,
            text=True,
            check=True,
        )

        # Show progress messages from stderr
        if result.stderr:
            print(f"  {result.stderr}", end="")

        # Parse JSON result from stdout
        result_data = json.loads(result.stdout)
        status = result_data.get("status")

        if status == "success":
            message = result_data.get("message")
            print(f"✓ {message}")
        elif status == "not_found":
            message = result_data.get("message", "Schema not found")
            print(f"⚠ {message}")
        else:
            message = result_data.get("message", "Unknown error")
            cc_count = result_data.get("connector_credential_pair_count", 0)
            doc_count = result_data.get("document_count", 0)
            error_details = f"{message}"
            if cc_count > 0 or doc_count > 0:
                error_details += f"\n  ConnectorCredentialPairs: {cc_count}\n  Documents: {doc_count}"
            raise RuntimeError(error_details)

    except subprocess.CalledProcessError as e:
        print(
            f"✗ Failed to check documents for tenant {tenant_id}: {e}",
            file=sys.stderr,
        )
        if e.stderr:
            print(f"  Error details: {e.stderr}", file=sys.stderr)
        raise
    except Exception as e:
        print(
            f"✗ Failed to check documents for tenant {tenant_id}: {e}",
            file=sys.stderr,
        )
        raise


def drop_data_plane_schema(pod_name: str, tenant_id: str, context: str) -> None:
    """Drop the PostgreSQL schema for the given tenant by running script on pod.

    Args:
        pod_name: Data plane pod to execute on
        tenant_id: Tenant ID to process
        context: kubectl context for data plane cluster
    """
    # Script is already on pod from setup_scripts_on_pod()
    try:
        # Execute script on pod
        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
                "--",
                "python",
                "/tmp/cleanup_tenant_schema.py",
                tenant_id,
            ]
        )

        result = subprocess.run(
            cmd_exec,
            capture_output=True,
            text=True,
            check=True,
        )

        # Show progress messages from stderr
        if result.stderr:
            print(f"  {result.stderr}", end="")

        # Parse JSON result from stdout
        result_data = json.loads(result.stdout)
        status = result_data.get("status")
        message = result_data.get("message")

        if status == "success":
            print(f"✓ {message}")
        elif status == "not_found":
            print(f"⚠ {message}")
        else:
            print(f"✗ {message}", file=sys.stderr)
            raise RuntimeError(message)

    except subprocess.CalledProcessError as e:
        print(f"✗ Failed to drop schema for tenant {tenant_id}: {e}", file=sys.stderr)
        if e.stderr:
            print(f"  Error details: {e.stderr}", file=sys.stderr)
        raise
    except Exception as e:
        print(f"✗ Failed to drop schema for tenant {tenant_id}: {e}", file=sys.stderr)
        raise


def cleanup_control_plane(
    pod_name: str, tenant_id: str, context: str, force: bool = False
) -> None:
    """Clean up control plane data via pod queries.

    Args:
        pod_name: Control plane pod to execute on
        tenant_id: Tenant ID to process
        context: kubectl context for control plane cluster
        force: Skip confirmations if True
    """
    print(f"Cleaning up control plane data for tenant: {tenant_id}")

    # Delete in order respecting foreign key constraints
    delete_queries = [
        (
            "tenant_notification",
            f"DELETE FROM tenant_notification WHERE tenant_id = '{tenant_id}'",
        ),
        ("tenant_config", f"DELETE FROM tenant_config WHERE tenant_id = '{tenant_id}'"),
        ("subscription", f"DELETE FROM subscription WHERE tenant_id = '{tenant_id}'"),
        ("tenant", f"DELETE FROM tenant WHERE tenant_id = '{tenant_id}'"),
    ]

    try:
        for table_name, query in delete_queries:
            print(f"  Deleting from {table_name}...")

            if not confirm_step(f"Delete from {table_name}?", force):
                print(f"  Skipping deletion from {table_name}")
                continue

            execute_control_plane_delete(pod_name, query, context)

        print(f"✓ Successfully cleaned up control plane data for tenant: {tenant_id}")

    except Exception as e:
        print(
            f"✗ Failed to clean up control plane for tenant {tenant_id}: {e}",
            file=sys.stderr,
        )
        raise


def cleanup_tenant(
    tenant_id: str,
    data_plane_pod: str,
    control_plane_pod: str,
    data_plane_context: str,
    control_plane_context: str,
    force: bool = False,
) -> bool:
    """Main cleanup function that orchestrates all cleanup steps.

    Args:
        tenant_id: Tenant ID to process
        data_plane_pod: Data plane pod for schema operations
        control_plane_pod: Control plane pod for tenant record operations
        data_plane_context: kubectl context for data plane cluster
        control_plane_context: kubectl context for control plane cluster
        force: Skip confirmations if True
    """
    print(f"Starting cleanup for tenant: {tenant_id}")

    # Track if tenant was not found in control plane (for force mode)
    tenant_not_found_in_control_plane = False

    # Check tenant status first (from control plane)
    print(f"\n{'=' * 80}")
    try:
        tenant_status = get_tenant_status(
            control_plane_pod, tenant_id, control_plane_context
        )

        # If tenant is not GATED_ACCESS, require explicit confirmation even in force mode
        if tenant_status and tenant_status != "GATED_ACCESS":
            print(
                f"\n⚠️  WARNING: Tenant status is '{tenant_status}', not 'GATED_ACCESS'!"
            )
            print(
                "This tenant may be active and should not be deleted without careful review."
            )
            print(f"{'=' * 80}\n")

            if force:
                print(f"Skipping cleanup for tenant {tenant_id} in force mode")
                return False

            # Always ask for confirmation if not gated
            response = input(
                "Are you ABSOLUTELY SURE you want to proceed? Type 'yes' to confirm: "
            )
            if response.lower() != "yes":
                print("Cleanup aborted - tenant is not GATED_ACCESS")
                return False
        elif tenant_status == "GATED_ACCESS":
            print("✓ Tenant status is GATED_ACCESS - safe to proceed with cleanup")
        elif tenant_status is None:
            print("⚠️  WARNING: Could not determine tenant status!")

            if force:
                print(f"Skipping cleanup for tenant {tenant_id} in force mode")
                return False

            response = input("Continue anyway? Type 'yes' to confirm: ")
            if response.lower() != "yes":
                print("Cleanup aborted - could not verify tenant status")
                return False
    except TenantNotFoundInControlPlaneError as e:
        # Tenant/table not found in control plane
        error_str = str(e)
        print(f"⚠️  WARNING: Tenant not found in control plane: {error_str}")
        tenant_not_found_in_control_plane = True

        if force:
            print(
                "[FORCE MODE] Tenant not found in control plane - continuing with dataplane cleanup only"
            )
        else:
            response = input("Continue anyway? Type 'yes' to confirm: ")
            if response.lower() != "yes":
                print("Cleanup aborted - tenant not found in control plane")
                return False
    except Exception as e:
        # Other errors (not "not found")
        error_str = str(e)
        print(f"⚠️  WARNING: Failed to check tenant status: {error_str}")

        if force:
            print(f"Skipping cleanup for tenant {tenant_id} in force mode")
            return False

        response = input("Continue anyway? Type 'yes' to confirm: ")
        if response.lower() != "yes":
            print("Cleanup aborted - could not verify tenant status")
            return False
    print(f"{'=' * 80}\n")

    # Fetch tenant users for informational purposes (non-blocking) from data plane
    if not force:
        print(f"\n{'=' * 80}")
        try:
            get_tenant_users(data_plane_pod, tenant_id, data_plane_context)
        except Exception as e:
            print(f"⚠ Could not fetch tenant users: {e}")
        print(f"{'=' * 80}\n")

    # Step 1: Make sure all documents are deleted (data plane)
    print(f"\n{'=' * 80}")
    print("Step 1/3: Checking for remaining ConnectorCredentialPairs and Documents")
    print(f"{'=' * 80}")
    try:
        check_documents_deleted(data_plane_pod, tenant_id, data_plane_context)
    except Exception as e:
        print(f"✗ Document check failed: {e}", file=sys.stderr)
        print(
            "\nPlease ensure all ConnectorCredentialPairs and Documents are deleted before running cleanup."
        )
        print(
            "You may need to mark connectors for deletion and wait for cleanup to complete."
        )
        return False
    print(f"{'=' * 80}\n")

    # Step 2: Drop data plane schema
    if confirm_step(
        f"Step 2/3: Drop data plane schema '{tenant_id}' (CASCADE - will delete all tables, functions, etc.)",
        force,
    ):
        try:
            drop_data_plane_schema(data_plane_pod, tenant_id, data_plane_context)
        except Exception as e:
            print(f"✗ Failed at schema cleanup step: {e}", file=sys.stderr)
            if not force:
                response = input("Continue with control plane cleanup? (y/n): ")
                if response.lower() != "y":
                    print("Cleanup aborted by user")
                    return False
            else:
                print("[FORCE MODE] Continuing despite schema cleanup failure")
    else:
        print("Step 2 skipped by user")

    # Step 3: Clean up control plane (skip if tenant not found in control plane with --force)
    if tenant_not_found_in_control_plane:
        print(f"\n{'=' * 80}")
        print(
            "Step 3/3: Skipping control plane cleanup (tenant not found in control plane)"
        )
        print(f"{'=' * 80}\n")
    elif confirm_step(
        "Step 3/3: Delete control plane records (tenant_notification, tenant_config, subscription, tenant)",
        force,
    ):
        try:
            cleanup_control_plane(
                control_plane_pod, tenant_id, control_plane_context, force
            )
        except Exception as e:
            print(f"✗ Failed at control plane cleanup step: {e}", file=sys.stderr)
            if not force:
                print("Control plane cleanup failed")
            else:
                print("[FORCE MODE] Control plane cleanup failed but continuing")
    else:
        print("Step 3 skipped by user")
        return False

    print(f"\n{'=' * 80}")
    print(f"✓ Cleanup completed for tenant: {tenant_id}")
    print(f"{'=' * 80}")
    return True


def main() -> None:
    # Register signal handlers for graceful shutdown
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)

    if len(sys.argv) < 2:
        print(
            "Usage: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py <tenant_id> \\"
        )
        print(
            "           --data-plane-context <context> --control-plane-context <context> [--force]"
        )
        print(
            "       PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py --csv <csv_file_path> \\"
        )
        print(
            "           --data-plane-context <context> --control-plane-context <context> [--force]"
        )
        print("\nThis version runs ALL operations from pods (no bastion required)")
        print("\nArguments:")
        print(
            "  tenant_id                   The tenant ID to clean up (required if not using --csv)"
        )
        print(
            "  --csv PATH                  Path to CSV file containing tenant IDs to clean up"
        )
        print("  --force                     Skip all confirmation prompts (optional)")
        print(
            "  --concurrency N             Process N tenants concurrently (default: 1)"
        )
        print(
            "  --data-plane-context CTX    Kubectl context for data plane cluster (required)"
        )
        print(
            "  --control-plane-context CTX Kubectl context for control plane cluster (required)"
        )
        sys.exit(1)

    # Parse arguments
    force = "--force" in sys.argv
    tenant_ids = []

    # Parse concurrency
    concurrency: int = 1
    if "--concurrency" in sys.argv:
        try:
            concurrency_index = sys.argv.index("--concurrency")
            if concurrency_index + 1 >= len(sys.argv):
                print("Error: --concurrency flag requires a number", file=sys.stderr)
                sys.exit(1)
            concurrency = int(sys.argv[concurrency_index + 1])
            if concurrency < 1:
                print("Error: concurrency must be at least 1", file=sys.stderr)
                sys.exit(1)
        except ValueError:
            print("Error: --concurrency value must be an integer", file=sys.stderr)
            sys.exit(1)

    # Validate: concurrency > 1 requires --force
    if concurrency > 1 and not force:
        print(
            "Error: --concurrency > 1 requires --force flag (interactive mode not supported with parallel processing)",
            file=sys.stderr,
        )
        sys.exit(1)

    # Parse contexts (required)
    data_plane_context: str | None = None
    control_plane_context: str | None = None

    if "--data-plane-context" in sys.argv:
        try:
            idx = sys.argv.index("--data-plane-context")
            if idx + 1 >= len(sys.argv):
                print(
                    "Error: --data-plane-context requires a context name",
                    file=sys.stderr,
                )
                sys.exit(1)
            data_plane_context = sys.argv[idx + 1]
        except ValueError:
            pass

    if "--control-plane-context" in sys.argv:
        try:
            idx = sys.argv.index("--control-plane-context")
            if idx + 1 >= len(sys.argv):
                print(
                    "Error: --control-plane-context requires a context name",
                    file=sys.stderr,
                )
                sys.exit(1)
            control_plane_context = sys.argv[idx + 1]
        except ValueError:
            pass

    # Validate required contexts
    if not data_plane_context:
        print(
            "Error: --data-plane-context is required",
            file=sys.stderr,
        )
        sys.exit(1)

    if not control_plane_context:
        print(
            "Error: --control-plane-context is required",
            file=sys.stderr,
        )
        sys.exit(1)

    # Check for CSV mode
    if "--csv" in sys.argv:
        try:
            csv_index = sys.argv.index("--csv")
            if csv_index + 1 >= len(sys.argv):
                print("Error: --csv flag requires a file path", file=sys.stderr)
                sys.exit(1)

            csv_path = sys.argv[csv_index + 1]
            tenant_ids = read_tenant_ids_from_csv(csv_path)

            if not tenant_ids:
                print("Error: No tenant IDs found in CSV file", file=sys.stderr)
                sys.exit(1)

            print(f"Found {len(tenant_ids)} tenant(s) in CSV file: {csv_path}")

        except Exception as e:
            print(f"Error reading CSV file: {e}", file=sys.stderr)
            sys.exit(1)
    else:
        # Single tenant mode
        tenant_ids = [sys.argv[1]]

    # Initial confirmation (unless --force is used)
    if not force:
        print(f"\n{'=' * 80}")
        print("TENANT CLEANUP - NO BASTION VERSION")
        print(f"{'=' * 80}")
        if len(tenant_ids) == 1:
            print(f"Tenant ID: {tenant_ids[0]}")
        else:
            print(f"Number of tenants: {len(tenant_ids)}")
            print(f"Tenant IDs: {', '.join(tenant_ids[:5])}")
            if len(tenant_ids) > 5:
                print(f"            ... and {len(tenant_ids) - 5} more")

        print("\nThis will:")
        print("  1. Check for remaining documents and connector credential pairs")
        print("  2. Drop the data plane PostgreSQL schema (CASCADE)")
        print("  3. Clean up control plane data (all via pod queries)")
        print(f"\n{'=' * 80}")
        print("WARNING: This operation is IRREVERSIBLE!")
        print(f"{'=' * 80}\n")

        response = input("Are you sure you want to proceed? Type 'yes' to confirm: ")

        if response.lower() != "yes":
            print("Cleanup aborted by user")
            sys.exit(0)
    else:
        print(
            f"⚠ FORCE MODE: Running cleanup for {len(tenant_ids)} tenant(s) without confirmations"
        )

    # Find pods in both clusters before processing
    try:
        print("Finding data plane worker pod...")
        data_plane_pod = find_worker_pod(data_plane_context)
        print(f"✓ Using data plane worker pod: {data_plane_pod}")

        print("Finding control plane pod...")
        control_plane_pod = find_background_pod(control_plane_context)
        print(f"✓ Using control plane pod: {control_plane_pod}\n")

        # Copy all scripts to data plane pod once
        setup_scripts_on_pod(data_plane_pod, data_plane_context)
        print()
    except Exception as e:
        print(f"✗ Failed to find required pods or setup scripts: {e}", file=sys.stderr)
        print("Cannot proceed with cleanup")
        sys.exit(1)

    # Run cleanup for each tenant
    failed_tenants = []
    successful_tenants = []
    skipped_tenants = []

    # Open CSV file for writing successful cleanups in real-time
    csv_output_path = "cleaned_tenants.csv"
    with open(csv_output_path, "w", newline="") as csv_file:
        csv_writer = csv.writer(csv_file)
        csv_writer.writerow(["tenant_id", "cleaned_at"])
        csv_file.flush()

        print(f"Writing successful cleanups to: {csv_output_path}\n")

        if concurrency == 1:
            # Sequential processing
            for idx, tenant_id in enumerate(tenant_ids, 1):
                if len(tenant_ids) > 1:
                    print(f"\n{'=' * 80}")
                    print(f"Processing tenant {idx}/{len(tenant_ids)}: {tenant_id}")
                    print(f"{'=' * 80}")

                try:
                    was_cleaned = cleanup_tenant(
                        tenant_id,
                        data_plane_pod,
                        control_plane_pod,
                        data_plane_context,
                        control_plane_context,
                        force,
                    )

                    if was_cleaned:
                        successful_tenants.append(tenant_id)

                        # Write to CSV immediately after successful cleanup
                        timestamp = datetime.utcnow().isoformat()
                        csv_writer.writerow([tenant_id, timestamp])
                        csv_file.flush()
                        print(f"✓ Recorded cleanup in {csv_output_path}")
                    else:
                        skipped_tenants.append(tenant_id)
                        print(f"⚠ Tenant {tenant_id} was skipped (not recorded in CSV)")

                except Exception as e:
                    print(
                        f"✗ Cleanup failed for tenant {tenant_id}: {e}", file=sys.stderr
                    )
                    failed_tenants.append((tenant_id, str(e)))

                    # If not in force mode and there are more tenants, ask if we should continue
                    if not force and idx < len(tenant_ids):
                        response = input(
                            f"\nContinue with remaining {len(tenant_ids) - idx} tenant(s)? (y/n): "
                        )
                        if response.lower() != "y":
                            print("Cleanup aborted by user")
                            break
        else:
            # Parallel processing
            print(
                f"Processing {len(tenant_ids)} tenant(s) with concurrency={concurrency}\n"
            )

            def process_tenant(tenant_id: str) -> tuple[str, bool, str | None]:
                """Process a single tenant. Returns (tenant_id, was_cleaned, error_message)."""
                try:
                    was_cleaned = cleanup_tenant(
                        tenant_id,
                        data_plane_pod,
                        control_plane_pod,
                        data_plane_context,
                        control_plane_context,
                        force,
                    )
                    return (tenant_id, was_cleaned, None)
                except Exception as e:
                    return (tenant_id, False, str(e))

            with ThreadPoolExecutor(max_workers=concurrency) as executor:
                # Submit all tasks
                future_to_tenant = {
                    executor.submit(process_tenant, tenant_id): tenant_id
                    for tenant_id in tenant_ids
                }

                # Process results as they complete
                completed = 0
                for future in as_completed(future_to_tenant):
                    completed += 1
                    tenant_id, was_cleaned, error = future.result()

                    if error:
                        with _print_lock:
                            print(
                                f"[{completed}/{len(tenant_ids)}] ✗ Failed: {tenant_id}: {error}",
                                file=sys.stderr,
                            )
                        failed_tenants.append((tenant_id, error))
                    elif was_cleaned:
                        with _csv_lock:
                            timestamp = datetime.utcnow().isoformat()
                            csv_writer.writerow([tenant_id, timestamp])
                            csv_file.flush()
                        successful_tenants.append(tenant_id)
                        with _print_lock:
                            print(
                                f"[{completed}/{len(tenant_ids)}] ✓ Cleaned: {tenant_id}"
                            )
                    else:
                        skipped_tenants.append(tenant_id)
                        with _print_lock:
                            print(
                                f"[{completed}/{len(tenant_ids)}] ⊘ Skipped: {tenant_id}"
                            )

    # Print summary
    if len(tenant_ids) > 1:
        print(f"\n{'=' * 80}")
        print("CLEANUP SUMMARY")
        print(f"{'=' * 80}")
        print(f"Total tenants: {len(tenant_ids)}")
        print(f"Successful: {len(successful_tenants)}")
        print(f"Skipped: {len(skipped_tenants)}")
        print(f"Failed: {len(failed_tenants)}")
        print(f"\nSuccessfully cleaned tenants written to: {csv_output_path}")

        if skipped_tenants:
            print(f"\nSkipped tenants ({len(skipped_tenants)}):")
            for tenant_id in skipped_tenants:
                print(f"  - {tenant_id}")

        if failed_tenants:
            print(f"\nFailed tenants ({len(failed_tenants)}):")
            for tenant_id, error in failed_tenants:
                print(f"  - {tenant_id}: {error}")

        print(f"{'=' * 80}")

        if failed_tenants:
            sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/no_bastion_cleanup_utils.py
================================================
"""
Cleanup utilities that work WITHOUT bastion access.
Control plane and data plane are in SEPARATE clusters.
"""

import csv
import json
import subprocess
import sys
from pathlib import Path


class TenantNotFoundInControlPlaneError(Exception):
    """Exception raised when tenant/table is not found in control plane."""


def find_worker_pod(context: str) -> str:
    """Find a user file processing worker pod using kubectl.

    Args:
        context: kubectl context to use
    """
    print(f"Finding user file processing worker pod in context {context}...")

    cmd = ["kubectl", "get", "po", "--context", context]

    result = subprocess.run(cmd, capture_output=True, text=True, check=True)

    # Parse output and find user file processing worker pod
    lines = result.stdout.strip().split("\n")
    lines = lines[1:]  # Skip header

    import random

    random.shuffle(lines)

    for line in lines:
        if "celery-worker-user-file-processing" in line and "Running" in line:
            pod_name = line.split()[0]
            print(f"Found pod: {pod_name}")
            return pod_name

    raise RuntimeError("No running user file processing worker pod found")


def find_background_pod(context: str) -> str:
    """Find a pod for control plane operations.

    Args:
        context: kubectl context to use
    """
    print(f"Finding control plane pod in context {context}...")

    cmd = ["kubectl", "get", "po", "--context", context]

    result = subprocess.run(cmd, capture_output=True, text=True, check=True)

    # Parse output and find suitable pod
    lines = result.stdout.strip().split("\n")
    lines = lines[1:]  # Skip header

    import random

    random.shuffle(lines)

    # Try to find control plane pods
    for line in lines:
        if (
            any(
                name in line
                for name in [
                    "background-processing-deployment",
                    "subscription-deployment",
                    "tenants-deployment",
                ]
            )
            and "Running" in line
        ):
            pod_name = line.split()[0]
            print(f"Found pod: {pod_name}")
            return pod_name

    raise RuntimeError("No suitable background pod found for control plane operations")


def confirm_step(message: str, force: bool = False) -> bool:
    """Ask for confirmation before executing a step.

    Args:
        message: The confirmation message to display
        force: If True, skip confirmation and return True

    Returns:
        True if user confirms or force is True, False otherwise
    """
    if force:
        print(f"[FORCE MODE] Skipping confirmation: {message}")
        return True

    print(f"\n{message}")
    response = input("Proceed? (y/n): ")
    return response.lower() == "y"


def execute_control_plane_query_from_pod(
    pod_name: str, query: str, context: str
) -> dict:
    """Execute a SQL query against control plane database from within a pod.

    Args:
        pod_name: The Kubernetes pod name to execute from
        query: The SQL query to execute
        context: kubectl context for control plane cluster

    Returns:
        Dict with 'success' bool, 'stdout' str, and optional 'error' str
    """
    # Create a Python script to run the query
    # This script tries multiple environment variable patterns

    # NOTE: whuang 01/08/2026: POSTGRES_CONTROL_* don't exist. This uses pattern 2 currently.

    query_script = f'''
import os
from sqlalchemy import create_engine, text

# Try to get control plane database URL from various environment patterns
control_db_url = None

# Pattern 1: POSTGRES_CONTROL_* variables
if os.environ.get("POSTGRES_CONTROL_HOST"):
    host = os.environ.get("POSTGRES_CONTROL_HOST")
    port = os.environ.get("POSTGRES_CONTROL_PORT", "5432")
    db = os.environ.get("POSTGRES_CONTROL_DB", "control")
    user = os.environ.get("POSTGRES_CONTROL_USER", "postgres")
    password = os.environ.get("POSTGRES_CONTROL_PASSWORD", "")
    if password:
        control_db_url = f"postgresql://{{user}}:{{password}}@{{host}}:{{port}}/{{db}}"

# Pattern 2: Standard POSTGRES_* variables (might point to control plane in this cluster)
if not control_db_url and os.environ.get("POSTGRES_HOST"):
    host = os.environ.get("POSTGRES_HOST")
    port = os.environ.get("POSTGRES_PORT", "5432")
    db = os.environ.get("POSTGRES_DB", "danswer")
    user = os.environ.get("POSTGRES_USER", "postgres")
    password = os.environ.get("POSTGRES_PASSWORD", "")
    if password:
        control_db_url = f"postgresql://{{user}}:{{password}}@{{host}}:{{port}}/{{db}}"

# Pattern 3: Direct URI
if not control_db_url:
    control_db_url = os.environ.get("DATABASE_URL") or os.environ.get("POSTGRES_URI")

if not control_db_url:
    raise ValueError("Cannot determine control plane database connection. No suitable environment variables found.")

engine = create_engine(control_db_url)

with engine.connect() as conn:
    result = conn.execute(text("""{query}"""))

    # Check if this is a SELECT query
    if result.returns_rows:
        rows = [dict(row._mapping) for row in result]
        import json
        print(json.dumps(rows, default=str))
    else:
        # For INSERT/UPDATE/DELETE, print rowcount
        print(f"{{result.rowcount}} rows affected")

    conn.commit()
'''

    # Write the script to a temp file on the pod
    script_path = "/tmp/control_plane_query.py"

    try:
        cmd_write = ["kubectl", "exec", "--context", context, pod_name]
        cmd_write.extend(
            [
                "--",
                "bash",
                "-c",
                f"cat > {script_path} << 'EOFQUERY'\n{query_script}\nEOFQUERY",
            ]
        )

        subprocess.run(
            cmd_write,
            check=True,
            capture_output=True,
        )

        # Execute the script
        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(["--", "python", script_path])

        result = subprocess.run(
            cmd_exec,
            capture_output=True,
            text=True,
            check=True,
        )

        return {
            "success": True,
            "stdout": result.stdout.strip(),
            "stderr": result.stderr.strip() if result.stderr else "",
        }

    except subprocess.CalledProcessError as e:
        return {
            "success": False,
            "stdout": e.stdout if e.stdout else "",
            "error": e.stderr if e.stderr else str(e),
        }


def get_tenant_status(pod_name: str, tenant_id: str, context: str) -> str | None:
    """
    Get tenant status from control plane database via pod.

    Args:
        pod_name: The pod to execute the query from
        tenant_id: The tenant ID to look up
        context: kubectl context for control plane cluster

    Returns:
        Tenant status string (e.g., 'GATED_ACCESS', 'ACTIVE') or None if not found

    Raises:
        TenantNotFoundInControlPlaneError: If the tenant record is not found in the table
    """
    print(f"Fetching tenant status for tenant: {tenant_id}")

    query = f"SELECT application_status FROM tenant WHERE tenant_id = '{tenant_id}'"

    result = execute_control_plane_query_from_pod(pod_name, query, context)

    if not result["success"]:
        error_msg = result.get("error", "Unknown error")
        print(
            f"✗ Failed to get tenant status for {tenant_id}: {error_msg}",
            file=sys.stderr,
        )
        return None

    try:
        # Parse JSON output
        rows = json.loads(result["stdout"])

        if rows and len(rows) > 0:
            status = rows[0].get("application_status")
            if status:
                print(f"✓ Tenant status: {status}")
                return status

        # Tenant record not found in control plane table
        print("⚠ Tenant not found in control plane")
        raise TenantNotFoundInControlPlaneError(
            f"Tenant {tenant_id} not found in control plane database"
        )

    except TenantNotFoundInControlPlaneError:
        # Re-raise without wrapping
        raise
    except (json.JSONDecodeError, KeyError, IndexError) as e:
        print(f"✗ Failed to parse tenant status: {e}", file=sys.stderr)
        return None


def execute_control_plane_delete(pod_name: str, query: str, context: str) -> bool:
    """Execute a DELETE query against control plane database from pod.

    Args:
        pod_name: The pod to execute the query from
        query: The DELETE query to execute
        context: kubectl context for control plane cluster

    Returns:
        True if successful, False otherwise
    """
    result = execute_control_plane_query_from_pod(pod_name, query, context)

    if result["success"]:
        print(f"    {result['stdout']}")
        return True
    else:
        print(f"    Error: {result.get('error', 'Unknown error')}", file=sys.stderr)
        return False


def read_tenant_ids_from_csv(csv_path: str) -> list[str]:
    """Read tenant IDs from CSV file.

    Args:
        csv_path: Path to CSV file

    Returns:
        List of tenant IDs
    """
    if not Path(csv_path).exists():
        raise FileNotFoundError(f"CSV file not found: {csv_path}")

    tenant_ids = []
    with open(csv_path, "r", newline="", encoding="utf-8") as csvfile:
        reader = csv.DictReader(csvfile)

        # Check if tenant_id column exists
        if not reader.fieldnames or "tenant_id" not in reader.fieldnames:
            raise ValueError(
                f"CSV file must have a 'tenant_id' column. Found columns: {reader.fieldnames}"
            )

        for row in reader:
            tenant_id = row.get("tenant_id", "").strip()
            if tenant_id:
                tenant_ids.append(tenant_id)

    return tenant_ids


================================================
FILE: backend/scripts/tenant_cleanup/no_bastion_mark_connectors.py
================================================
#!/usr/bin/env python3
"""
Mark connectors for deletion script that works WITHOUT bastion access.
All queries run directly from pods.
Supports two-cluster architecture (data plane and control plane in separate clusters).

Usage:
    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py <tenant_id> \
        --data-plane-context <context> --control-plane-context <context> [--force]

    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py --csv <csv_file_path> \
        --data-plane-context <context> --control-plane-context <context> [--force] [--concurrency N]
"""

import subprocess
import sys
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from threading import Lock
from typing import Any

from scripts.tenant_cleanup.no_bastion_cleanup_utils import confirm_step
from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_background_pod
from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_worker_pod
from scripts.tenant_cleanup.no_bastion_cleanup_utils import get_tenant_status
from scripts.tenant_cleanup.no_bastion_cleanup_utils import read_tenant_ids_from_csv
from scripts.tenant_cleanup.no_bastion_cleanup_utils import (
    TenantNotFoundInControlPlaneError,
)

# Global lock for thread-safe printing
_print_lock: Lock = Lock()


def safe_print(*args: Any, **kwargs: Any) -> None:
    """Thread-safe print function."""
    with _print_lock:
        print(*args, **kwargs)


def run_connector_deletion(pod_name: str, tenant_id: str, context: str) -> None:
    """Mark all connector credential pairs for deletion.

    Args:
        pod_name: Data plane pod to execute deletion on
        tenant_id: Tenant ID to process
        context: kubectl context for data plane cluster
    """
    safe_print("  Marking all connector credential pairs for deletion...")

    # Get the path to the script
    script_dir = Path(__file__).parent
    mark_deletion_script = (
        script_dir / "on_pod_scripts" / "execute_connector_deletion.py"
    )

    if not mark_deletion_script.exists():
        raise FileNotFoundError(
            f"execute_connector_deletion.py not found at {mark_deletion_script}"
        )

    try:
        # Copy script to pod
        cmd_cp = ["kubectl", "cp", "--context", context]
        cmd_cp.extend(
            [
                str(mark_deletion_script),
                f"{pod_name}:/tmp/execute_connector_deletion.py",
            ]
        )

        subprocess.run(
            cmd_cp,
            check=True,
            capture_output=True,
        )

        # Execute script on pod
        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
                "--",
                "python",
                "/tmp/execute_connector_deletion.py",
                tenant_id,
                "--all",
            ]
        )

        result = subprocess.run(cmd_exec)

        if result.returncode != 0:
            raise RuntimeError(result.stderr)

    except subprocess.CalledProcessError as e:
        safe_print(
            f"  ✗ Failed to mark all connector credential pairs for deletion: {e}",
            file=sys.stderr,
        )
        if e.stderr:
            safe_print(f"    Error details: {e.stderr}", file=sys.stderr)
        raise
    except Exception as e:
        safe_print(
            f"  ✗ Failed to mark all connector credential pairs for deletion: {e}",
            file=sys.stderr,
        )
        raise


def mark_tenant_connectors_for_deletion(
    tenant_id: str,
    data_plane_pod: str,
    control_plane_pod: str,
    data_plane_context: str,
    control_plane_context: str,
    force: bool = False,
) -> None:
    """Main function to mark all connectors for a tenant for deletion.

    Args:
        tenant_id: Tenant ID to process
        data_plane_pod: Data plane pod for connector operations
        control_plane_pod: Control plane pod for status checks
        data_plane_context: kubectl context for data plane cluster
        control_plane_context: kubectl context for control plane cluster
        force: Skip confirmations if True
    """
    safe_print(f"Processing connectors for tenant: {tenant_id}")

    # Check tenant status first (from control plane)
    safe_print(f"\n{'=' * 80}")
    try:
        tenant_status = get_tenant_status(
            control_plane_pod, tenant_id, control_plane_context
        )

        # If tenant is not GATED_ACCESS, require explicit confirmation even in force mode
        if tenant_status and tenant_status != "GATED_ACCESS":
            safe_print(
                f"\n⚠️  WARNING: Tenant status is '{tenant_status}', not 'GATED_ACCESS'!"
            )
            safe_print(
                "This tenant may be active and should not have connectors deleted without careful review."
            )
            safe_print(f"{'=' * 80}\n")

            # Always ask for confirmation if not gated, even in force mode
            if not force:
                response = input(
                    "Are you ABSOLUTELY SURE you want to proceed? Type 'yes' to confirm: "
                )
                if response.lower() != "yes":
                    safe_print("Operation aborted - tenant is not GATED_ACCESS")
                    raise RuntimeError(f"Tenant {tenant_id} is not GATED_ACCESS")
            else:
                raise RuntimeError(f"Tenant {tenant_id} is not GATED_ACCESS")
        elif tenant_status == "GATED_ACCESS":
            safe_print("✓ Tenant status is GATED_ACCESS - safe to proceed")
        elif tenant_status is None:
            safe_print("⚠️  WARNING: Could not determine tenant status!")
            if not force:
                response = input("Continue anyway? Type 'yes' to confirm: ")
                if response.lower() != "yes":
                    safe_print("Operation aborted - could not verify tenant status")
                    raise RuntimeError(
                        f"Could not verify tenant status for {tenant_id}"
                    )
            else:
                raise RuntimeError(f"Could not verify tenant status for {tenant_id}")
    except TenantNotFoundInControlPlaneError as e:
        # Tenant/table not found in control plane
        error_str = str(e)
        safe_print(f"⚠️  WARNING: Tenant not found in control plane: {error_str}")

        if force:
            safe_print(
                "[FORCE MODE] Tenant not found in control plane - continuing with connector deletion anyway"
            )
        else:
            response = input("Continue anyway? Type 'yes' to confirm: ")
            if response.lower() != "yes":
                safe_print("Operation aborted - tenant not found in control plane")
                raise RuntimeError(f"Tenant {tenant_id} not found in control plane")
    except RuntimeError:
        # Re-raise RuntimeError (from status checks above) without wrapping
        raise
    except Exception as e:
        safe_print(f"⚠️  WARNING: Failed to check tenant status: {e}")
        if not force:
            response = input("Continue anyway? Type 'yes' to confirm: ")
            if response.lower() != "yes":
                safe_print("Operation aborted - could not verify tenant status")
                raise
        else:
            raise RuntimeError(f"Failed to check tenant status for {tenant_id}")
    safe_print(f"{'=' * 80}\n")

    # Confirm before proceeding (only in non-force mode)
    if not confirm_step(
        f"Mark all connector credential pairs for deletion for tenant {tenant_id}?",
        force,
    ):
        safe_print("Operation cancelled by user")
        raise ValueError("Operation cancelled by user")

    run_connector_deletion(data_plane_pod, tenant_id, data_plane_context)

    # Print summary
    safe_print(
        f"✓ Marked all connector credential pairs for deletion for tenant {tenant_id}"
    )


def main() -> None:
    if len(sys.argv) < 2:
        print(
            "Usage: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py <tenant_id> \\"
        )
        print(
            "           --data-plane-context <context> --control-plane-context <context> [--force]"
        )
        print(
            "       PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py --csv <csv_file_path> \\"
        )
        print(
            "           --data-plane-context <context> --control-plane-context <context> [--force] [--concurrency N]"
        )
        print("\nThis version runs ALL operations from pods (no bastion required)")
        print("\nArguments:")
        print(
            "  tenant_id                   The tenant ID to process (required if not using --csv)"
        )
        print(
            "  --csv PATH                  Path to CSV file containing tenant IDs to process"
        )
        print("  --force                     Skip all confirmation prompts (optional)")
        print(
            "  --concurrency N             Process N tenants concurrently (default: 1)"
        )
        print(
            "  --data-plane-context CTX    Kubectl context for data plane cluster (required)"
        )
        print(
            "  --control-plane-context CTX Kubectl context for control plane cluster (required)"
        )
        sys.exit(1)

    # Parse arguments
    force = "--force" in sys.argv
    tenant_ids: list[str] = []

    # Parse contexts (required)
    data_plane_context: str | None = None
    control_plane_context: str | None = None

    if "--data-plane-context" in sys.argv:
        try:
            idx = sys.argv.index("--data-plane-context")
            if idx + 1 >= len(sys.argv):
                print(
                    "Error: --data-plane-context requires a context name",
                    file=sys.stderr,
                )
                sys.exit(1)
            data_plane_context = sys.argv[idx + 1]
        except ValueError:
            pass

    if "--control-plane-context" in sys.argv:
        try:
            idx = sys.argv.index("--control-plane-context")
            if idx + 1 >= len(sys.argv):
                print(
                    "Error: --control-plane-context requires a context name",
                    file=sys.stderr,
                )
                sys.exit(1)
            control_plane_context = sys.argv[idx + 1]
        except ValueError:
            pass

    # Validate required contexts
    if not data_plane_context:
        print(
            "Error: --data-plane-context is required",
            file=sys.stderr,
        )
        sys.exit(1)

    if not control_plane_context:
        print(
            "Error: --control-plane-context is required",
            file=sys.stderr,
        )
        sys.exit(1)

    # Parse concurrency
    concurrency: int = 1
    if "--concurrency" in sys.argv:
        try:
            concurrency_index = sys.argv.index("--concurrency")
            if concurrency_index + 1 >= len(sys.argv):
                print("Error: --concurrency flag requires a number", file=sys.stderr)
                sys.exit(1)
            concurrency = int(sys.argv[concurrency_index + 1])
            if concurrency < 1:
                print("Error: concurrency must be at least 1", file=sys.stderr)
                sys.exit(1)
        except ValueError:
            print("Error: --concurrency value must be an integer", file=sys.stderr)
            sys.exit(1)

    # Validate: concurrency > 1 requires --force
    if concurrency > 1 and not force:
        print(
            "Error: --concurrency > 1 requires --force flag (interactive mode not supported with parallel processing)",
            file=sys.stderr,
        )
        sys.exit(1)

    # Check for CSV mode
    if "--csv" in sys.argv:
        try:
            csv_index: int = sys.argv.index("--csv")
            if csv_index + 1 >= len(sys.argv):
                print("Error: --csv flag requires a file path", file=sys.stderr)
                sys.exit(1)

            csv_path: str = sys.argv[csv_index + 1]
            tenant_ids = read_tenant_ids_from_csv(csv_path)

            if not tenant_ids:
                print("Error: No tenant IDs found in CSV file", file=sys.stderr)
                sys.exit(1)

            print(f"Found {len(tenant_ids)} tenant(s) in CSV file: {csv_path}")

        except Exception as e:
            print(f"Error reading CSV file: {e}", file=sys.stderr)
            sys.exit(1)
    else:
        # Single tenant mode
        tenant_ids = [sys.argv[1]]

    # Find pods in both clusters before processing
    try:
        print("Finding data plane worker pod...")
        data_plane_pod: str = find_worker_pod(data_plane_context)
        print(f"✓ Using data plane worker pod: {data_plane_pod}")

        print("Finding control plane pod...")
        control_plane_pod: str = find_background_pod(control_plane_context)
        print(f"✓ Using control plane pod: {control_plane_pod}")
    except Exception as e:
        print(f"✗ Failed to find required pods: {e}", file=sys.stderr)
        print("Cannot proceed with marking connectors for deletion")
        sys.exit(1)

    # Initial confirmation (unless --force is used)
    if not force:
        print(f"\n{'=' * 80}")
        print("MARK CONNECTORS FOR DELETION - NO BASTION VERSION")
        print(f"{'=' * 80}")
        if len(tenant_ids) == 1:
            print(f"Tenant ID: {tenant_ids[0]}")
        else:
            print(f"Number of tenants: {len(tenant_ids)}")
            print(f"Tenant IDs: {', '.join(tenant_ids[:5])}")
            if len(tenant_ids) > 5:
                print(f"            ... and {len(tenant_ids) - 5} more")

        print(
            f"Mode: {'FORCE (no confirmations)' if force else 'Interactive (will ask for confirmation at each step)'}"
        )
        print(f"Concurrency: {concurrency} tenant(s) at a time")
        print("\nThis will:")
        print("  1. Fetch all connector credential pairs for each tenant")
        print("  2. Cancel any scheduled indexing attempts for each connector")
        print("  3. Mark each connector credential pair status as DELETING")
        print("  4. Trigger the connector deletion task")
        print(f"\n{'=' * 80}")
        print("WARNING: This will mark connectors for deletion!")
        print("The actual deletion will be performed by the background celery worker.")
        print(f"{'=' * 80}\n")

        response = input("Are you sure you want to proceed? Type 'yes' to confirm: ")

        if response.lower() != "yes":
            print("Operation aborted by user")
            sys.exit(0)
    else:
        if len(tenant_ids) == 1:
            print(
                f"⚠ FORCE MODE: Marking connectors for deletion for {tenant_ids[0]} without confirmations"
            )
        else:
            print(
                f"⚠ FORCE MODE: Marking connectors for deletion for {len(tenant_ids)} tenants "
                f"(concurrency: {concurrency}) without confirmations"
            )

    # Process tenants (in parallel if concurrency > 1)
    failed_tenants: list[tuple[str, str]] = []
    successful_tenants: list[str] = []

    if concurrency == 1:
        # Sequential processing
        for idx, tenant_id in enumerate(tenant_ids, 1):
            if len(tenant_ids) > 1:
                print(f"\n{'=' * 80}")
                print(f"Processing tenant {idx}/{len(tenant_ids)}: {tenant_id}")
                print(f"{'=' * 80}")

            try:
                mark_tenant_connectors_for_deletion(
                    tenant_id,
                    data_plane_pod,
                    control_plane_pod,
                    data_plane_context,
                    control_plane_context,
                    force,
                )
                successful_tenants.append(tenant_id)
            except Exception as e:
                print(
                    f"✗ Failed to process tenant {tenant_id}: {e}",
                    file=sys.stderr,
                )
                failed_tenants.append((tenant_id, str(e)))

                # If not in force mode and there are more tenants, ask if we should continue
                if not force and idx < len(tenant_ids):
                    response = input(
                        f"\nContinue with remaining {len(tenant_ids) - idx} tenant(s)? (y/n): "
                    )
                    if response.lower() != "y":
                        print("Operation aborted by user")
                        break
    else:
        # Parallel processing
        print(
            f"\nProcessing {len(tenant_ids)} tenant(s) with concurrency={concurrency}"
        )

        def process_tenant(tenant_id: str) -> tuple[str, bool, str | None]:
            """Process a single tenant. Returns (tenant_id, success, error_message)."""
            try:
                mark_tenant_connectors_for_deletion(
                    tenant_id,
                    data_plane_pod,
                    control_plane_pod,
                    data_plane_context,
                    control_plane_context,
                    force,
                )
                return (tenant_id, True, None)
            except Exception as e:
                return (tenant_id, False, str(e))

        with ThreadPoolExecutor(max_workers=concurrency) as executor:
            # Submit all tasks
            future_to_tenant = {
                executor.submit(process_tenant, tenant_id): tenant_id
                for tenant_id in tenant_ids
            }

            # Process results as they complete
            completed: int = 0
            for future in as_completed(future_to_tenant):
                completed += 1
                tenant_id, success, error = future.result()

                if success:
                    successful_tenants.append(tenant_id)
                    safe_print(
                        f"[{completed}/{len(tenant_ids)}] ✓ Successfully processed {tenant_id}"
                    )
                else:
                    failed_tenants.append((tenant_id, error or "Unknown error"))
                    safe_print(
                        f"[{completed}/{len(tenant_ids)}] ✗ Failed to process {tenant_id}: {error}",
                        file=sys.stderr,
                    )

    # Print summary if multiple tenants
    if len(tenant_ids) > 1:
        print(f"\n{'=' * 80}")
        print("OPERATION SUMMARY")
        print(f"{'=' * 80}")
        print(f"Total tenants: {len(tenant_ids)}")
        print(f"Successful: {len(successful_tenants)}")
        print(f"Failed: {len(failed_tenants)}")

        if failed_tenants:
            print("\nFailed tenants:")
            for tenant_id, error in failed_tenants:
                print(f"  - {tenant_id}: {error}")

        print(f"{'=' * 80}")

        if failed_tenants:
            sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/on_pod_scripts/check_documents_deleted.py
================================================
#!/usr/bin/env python3
"""
Script to check for remaining ConnectorCredentialPairs and Documents in a tenant's schema.
Must be run on a pod with access to the data plane PostgreSQL database.

Usage:
    python check_documents_deleted.py <tenant_id>

Output:
    JSON object with status, message, and counts of remaining records
"""

import json
import sys

from sqlalchemy import func
from sqlalchemy import select

from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document


def check_documents_deleted(tenant_id: str) -> dict:
    """
    Check for remaining ConnectorCredentialPairs and Documents in tenant schema.

    Args:
        tenant_id: The tenant ID to query

    Returns:
        Dictionary with status and counts of remaining records
    """
    try:
        print(
            f"Checking for remaining documents in tenant: {tenant_id}",
            file=sys.stderr,
        )

        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            # Count ConnectorCredentialPairs
            cc_count = db_session.scalar(
                select(func.count()).select_from(ConnectorCredentialPair)
            )

            # Count Documents
            doc_count = db_session.scalar(select(func.count()).select_from(Document))

        # Handle None values from scalar (should not happen but mypy needs it)
        cc_count = cc_count or 0
        doc_count = doc_count or 0

        # If any records remain beyond acceptable thresholds, return error status
        is_deletable = cc_count == 0 or doc_count <= 5
        if not is_deletable:
            return {
                "status": "error",
                "message": (
                    f"Found {cc_count} ConnectorCredentialPair(s) and {doc_count} Document(s) "
                    "still remaining. Must have 0 ConnectorCredentialPairs and no more than "
                    "5 Documents before cleanup."
                ),
                "connector_credential_pair_count": cc_count,
                "document_count": doc_count,
            }

        # All clear
        return {
            "status": "success",
            "message": "No ConnectorCredentialPairs or Documents found - safe to proceed",
            "connector_credential_pair_count": 0,
            "document_count": 0,
        }

    except Exception as e:
        error_msg = str(e)
        print(f"Error checking documents: {error_msg}", file=sys.stderr)
        # Check if it's a schema not found error
        if "does not exist" in error_msg:
            return {
                "status": "not_found",
                "message": f"Schema '{tenant_id}' does not exist",
            }
        return {"status": "error", "message": f"Error checking documents: {error_msg}"}


def main() -> None:
    if len(sys.argv) != 2:
        print(
            json.dumps(
                {
                    "status": "error",
                    "message": "Usage: python check_documents_deleted.py <tenant_id>",
                }
            )
        )
        sys.exit(1)

    tenant_id = sys.argv[1]

    SqlEngine.init_engine(pool_size=5, max_overflow=2)

    result = check_documents_deleted(tenant_id)
    print(json.dumps(result))


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/on_pod_scripts/cleanup_tenant_schema.py
================================================
#!/usr/bin/env python3
"""
Script to drop a tenant's PostgreSQL schema.
Designed to be run on a heavy worker pod.

Usage:
    python cleanup_tenant_schema.py <tenant_id>
"""

import json
import sys

from sqlalchemy import text

from onyx.db.engine.sql_engine import get_session_with_shared_schema
from onyx.db.engine.sql_engine import SqlEngine


def drop_data_plane_schema(tenant_id: str) -> dict[str, str]:
    """Drop the PostgreSQL schema for the given tenant."""
    print(f"Dropping data plane schema for tenant: {tenant_id}", file=sys.stderr)

    SqlEngine.init_engine(pool_size=5, max_overflow=2)

    try:
        with get_session_with_shared_schema() as session:
            # First, verify the schema exists
            check_schema_query = text(
                """
                SELECT nspname
                FROM pg_namespace
                WHERE nspname = :schema_name
            """
            )

            result = session.execute(
                check_schema_query, {"schema_name": tenant_id}
            ).fetchone()

            if not result:
                print(f"Schema {tenant_id} does not exist", file=sys.stderr)
                return {
                    "status": "not_found",
                    "message": f"Schema {tenant_id} does not exist",
                }

            # Drop the schema with CASCADE to remove all objects within it
            drop_schema_query = text(f'DROP SCHEMA IF EXISTS "{tenant_id}" CASCADE')
            session.execute(drop_schema_query)
            session.commit()

            print(f"Successfully dropped schema: {tenant_id}", file=sys.stderr)

            # Delete the tenant mapping from user_tenant_mapping table
            delete_mapping_query = text(
                """
                DELETE FROM user_tenant_mapping
                WHERE tenant_id = :tenant_id
                """
            )
            session.execute(delete_mapping_query, {"tenant_id": tenant_id})
            session.commit()

            print(
                f"Successfully deleted tenant mapping for: {tenant_id}", file=sys.stderr
            )
            return {
                "status": "success",
                "message": f"Successfully dropped schema: {tenant_id}",
            }

    except Exception as e:
        print(f"Failed to drop schema for tenant {tenant_id}: {e}", file=sys.stderr)
        return {"status": "error", "message": str(e)}


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python cleanup_tenant_schema.py <tenant_id>", file=sys.stderr)
        sys.exit(1)

    tenant_id = sys.argv[1]

    result = drop_data_plane_schema(tenant_id)

    # Output result as JSON to stdout for easy parsing
    print(json.dumps(result))

    # Exit with error code if failed
    if result["status"] == "error":
        sys.exit(1)


================================================
FILE: backend/scripts/tenant_cleanup/on_pod_scripts/execute_connector_deletion.py
================================================
#!/usr/bin/env python3
"""
Script to mark connector credential pairs for deletion.
Runs on a Kubernetes pod with access to the data plane database.

Usage:
    # Mark a specific connector for deletion
    python mark_connector_for_deletion.py <tenant_id> <cc_pair_id>

    # Mark all connectors for deletion
    python mark_connector_for_deletion.py <tenant_id> --all

Output:
    JSON to stdout with structure:
    {
        "status": "success" | "error",
        "message": str,
        "deleted_count": int (when using --all),
        "timing": {
            "total_seconds": float,
            "per_connector": [...]
        }
    }
"""

import json
import sys
import time
from typing import Any

from sqlalchemy.orm import Session

from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.connector_credential_pair import update_connector_credential_pair_from_id
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.index_attempt import cancel_indexing_attempts_for_ccpair


def mark_connector_for_deletion(
    tenant_id: str, cc_pair_id: int, db_session: Session | None = None
) -> dict[str, Any]:
    """Mark a connector credential pair for deletion.

    Args:
        tenant_id: The tenant ID
        cc_pair_id: The connector credential pair ID
        db_session: Optional database session (if None, creates a new one)

    Returns:
        Dict with status, message, and timing
    """
    timing: dict[str, float] = {}
    start_time: float = time.time()

    try:
        print(
            f"Marking connector credential pair {cc_pair_id} for deletion",
            file=sys.stderr,
        )

        def _mark_deletion(db_sess: Session) -> dict[str, Any]:
            # Get the connector credential pair
            fetch_start: float = time.time()
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_sess,
                cc_pair_id=cc_pair_id,
            )
            timing["fetch_cc_pair_seconds"] = time.time() - fetch_start

            if not cc_pair:
                return {
                    "status": "error",
                    "message": f"Connector credential pair {cc_pair_id} not found",
                    "timing": timing,
                }

            # Cancel any scheduled indexing attempts
            print(
                f"Canceling indexing attempts for CC pair {cc_pair_id}",
                file=sys.stderr,
            )
            cancel_start: float = time.time()
            cancel_indexing_attempts_for_ccpair(
                cc_pair_id=cc_pair.id,
                db_session=db_sess,
                include_secondary_index=True,
            )
            timing["cancel_indexing_seconds"] = time.time() - cancel_start

            # Mark as deleting
            print(
                f"Updating CC pair {cc_pair_id} status to DELETING",
                file=sys.stderr,
            )
            update_start: float = time.time()
            update_connector_credential_pair_from_id(
                db_session=db_sess,
                cc_pair_id=cc_pair.id,
                status=ConnectorCredentialPairStatus.DELETING,
            )
            timing["update_status_seconds"] = time.time() - update_start

            commit_start: float = time.time()
            db_sess.commit()
            timing["commit_seconds"] = time.time() - commit_start

            return {
                "status": "success",
                "message": f"Marked connector credential pair {cc_pair_id} for deletion",
                "timing": timing,
            }

        result: dict[str, Any]
        if db_session:
            result = _mark_deletion(db_session)
        else:
            with get_session_with_tenant(tenant_id=tenant_id) as db_sess:
                result = _mark_deletion(db_sess)

        # Trigger the deletion check task
        print(
            "Triggering connector deletion check task",
            file=sys.stderr,
        )
        task_start: float = time.time()
        client_app.send_task(
            OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
            priority=OnyxCeleryPriority.HIGH,
            kwargs={"tenant_id": tenant_id},
        )
        timing["send_task_seconds"] = time.time() - task_start
        timing["total_seconds"] = time.time() - start_time

        result["timing"] = timing
        return result

    except Exception as e:
        print(
            f"Error marking connector for deletion: {e}",
            file=sys.stderr,
        )
        timing["total_seconds"] = time.time() - start_time
        return {
            "status": "error",
            "message": str(e),
            "timing": timing,
        }


def mark_all_connectors_for_deletion(tenant_id: str) -> dict[str, Any]:
    """Mark all connector credential pairs for a tenant for deletion.

    Args:
        tenant_id: The tenant ID

    Returns:
        Dict with status, message, deleted_count, and timing
    """
    overall_start: float = time.time()
    per_connector_timing: list[dict[str, Any]] = []

    try:
        print(
            f"Marking all connector credential pairs for tenant {tenant_id} for deletion",
            file=sys.stderr,
        )

        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            # Get all connector credential pairs
            fetch_all_start: float = time.time()
            cc_pairs = get_connector_credential_pairs(db_session=db_session)
            fetch_all_time: float = time.time() - fetch_all_start

            print(
                f"Found {len(cc_pairs)} connector credential pairs to delete",
                file=sys.stderr,
            )

            if not cc_pairs:
                return {
                    "status": "success",
                    "message": "No connector credential pairs found for tenant",
                    "deleted_count": 0,
                    "timing": {
                        "fetch_all_seconds": fetch_all_time,
                        "total_seconds": time.time() - overall_start,
                    },
                }

            deleted_count: int = 0
            errors: list[str] = []

            for cc_pair in cc_pairs:
                connector_start: float = time.time()
                print(
                    f"Processing CC pair {cc_pair.id} ({deleted_count + 1}/{len(cc_pairs)})",
                    file=sys.stderr,
                )

                # Cancel any scheduled indexing attempts
                cancel_start: float = time.time()
                cancel_indexing_attempts_for_ccpair(
                    cc_pair_id=cc_pair.id,
                    db_session=db_session,
                    include_secondary_index=True,
                )
                cancel_time: float = time.time() - cancel_start

                # Mark as deleting
                update_start: float = time.time()
                try:
                    update_connector_credential_pair_from_id(
                        db_session=db_session,
                        cc_pair_id=cc_pair.id,
                        status=ConnectorCredentialPairStatus.DELETING,
                    )
                    deleted_count += 1
                except Exception as e:
                    errors.append(f"CC pair {cc_pair.id}: {str(e)}")
                    print(
                        f"Error updating CC pair {cc_pair.id}: {e}",
                        file=sys.stderr,
                    )

                update_time: float = time.time() - update_start
                connector_total_time: float = time.time() - connector_start

                per_connector_timing.append(
                    {
                        "cc_pair_id": cc_pair.id,
                        "cancel_indexing_seconds": cancel_time,
                        "update_status_seconds": update_time,
                        "total_seconds": connector_total_time,
                    }
                )

            # Commit all changes
            commit_start: float = time.time()
            db_session.commit()
            commit_time: float = time.time() - commit_start

        # Trigger the deletion check task
        print(
            "Triggering connector deletion check task",
            file=sys.stderr,
        )
        task_start: float = time.time()
        client_app.send_task(
            OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
            priority=OnyxCeleryPriority.HIGH,
            kwargs={"tenant_id": tenant_id},
        )
        task_time: float = time.time() - task_start

        total_time: float = time.time() - overall_start

        result: dict[str, Any] = {
            "status": "success",
            "message": f"Marked {deleted_count} connector credential pairs for deletion",
            "deleted_count": deleted_count,
            "timing": {
                "fetch_all_seconds": fetch_all_time,
                "commit_seconds": commit_time,
                "send_task_seconds": task_time,
                "total_seconds": total_time,
                "per_connector": per_connector_timing,
            },
        }

        if errors:
            result["errors"] = errors

        return result

    except Exception as e:
        print(
            f"Error marking all connectors for deletion: {e}",
            file=sys.stderr,
        )
        return {
            "status": "error",
            "message": str(e),
            "timing": {
                "total_seconds": time.time() - overall_start,
                "per_connector": per_connector_timing,
            },
        }


def main() -> None:
    if len(sys.argv) < 2 or len(sys.argv) > 3:
        print(
            json.dumps(
                {
                    "status": "error",
                    "message": "Usage: python mark_connector_for_deletion.py <tenant_id> [<cc_pair_id>|--all]",
                }
            )
        )
        sys.exit(1)

    tenant_id: str = sys.argv[1]

    SqlEngine.init_engine(pool_size=5, max_overflow=2)

    result: dict[str, Any]
    # Check if we should mark all connectors or just one
    if len(sys.argv) == 3:
        second_arg: str = sys.argv[2]
        if second_arg == "--all":
            result = mark_all_connectors_for_deletion(tenant_id)
        else:
            try:
                cc_pair_id: int = int(second_arg)
                result = mark_connector_for_deletion(tenant_id, cc_pair_id)
            except ValueError:
                print(
                    json.dumps(
                        {
                            "status": "error",
                            "message": "cc_pair_id must be an integer or use --all",
                        }
                    )
                )
                sys.exit(1)
    else:
        # If only tenant_id is provided, show error
        print(
            json.dumps(
                {
                    "status": "error",
                    "message": "Usage: python mark_connector_for_deletion.py <tenant_id> [<cc_pair_id>|--all]",
                }
            )
        )
        sys.exit(1)

    print(json.dumps(result, indent=2))


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/on_pod_scripts/get_tenant_connectors.py
================================================
#!/usr/bin/env python3
"""
Script to fetch connector credential pairs for a tenant.
Runs on a Kubernetes pod with access to the data plane database.

Usage:
    python get_tenant_connectors.py <tenant_id>

Output:
    JSON to stdout with structure:
    {
        "status": "success" | "error",
        "connectors": [
            {
                "id": int,
                "connector_id": int,
                "credential_id": int,
                "name": str,
                "status": str
            },
            ...
        ] (if success),
        "message": str (if error)
    }
"""

import json
import sys

from sqlalchemy import select

from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.models import ConnectorCredentialPair


def get_tenant_connectors(tenant_id: str) -> dict:
    """Get all connector credential pairs for a tenant.

    Args:
        tenant_id: The tenant ID to query

    Returns:
        Dict with status and list of connectors or error message
    """
    try:
        print(
            f"Fetching connector credential pairs for tenant: {tenant_id}",
            file=sys.stderr,
        )

        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            # Get all connector credential pairs
            stmt = select(ConnectorCredentialPair)
            cc_pairs = db_session.execute(stmt).scalars().all()

            connectors = [
                {
                    "id": cc.id,
                    "connector_id": cc.connector_id,
                    "credential_id": cc.credential_id,
                    "name": cc.name,
                    "status": cc.status.value,
                }
                for cc in cc_pairs
            ]

            print(
                f"Found {len(connectors)} connector credential pair(s)",
                file=sys.stderr,
            )

            return {
                "status": "success",
                "connectors": connectors,
            }

    except Exception as e:
        print(f"Error fetching connectors: {e}", file=sys.stderr)
        return {
            "status": "error",
            "message": str(e),
        }


def main() -> None:
    if len(sys.argv) != 2:
        print(
            json.dumps(
                {
                    "status": "error",
                    "message": "Usage: python get_tenant_connectors.py <tenant_id>",
                }
            )
        )
        sys.exit(1)

    tenant_id = sys.argv[1]

    SqlEngine.init_engine(pool_size=5, max_overflow=2)

    result = get_tenant_connectors(tenant_id)
    print(json.dumps(result))


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/on_pod_scripts/get_tenant_index_name.py
================================================
#!/usr/bin/env python3
"""
Script to get the default index name for a tenant.
Designed to be run on a heavy worker pod.

Usage:
    python get_tenant_index_name.py <tenant_id>
"""

import json
import sys

from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.search_settings import get_current_search_settings


def get_tenant_index_name(tenant_id: str) -> dict[str, str]:
    """Get the default index name for the given tenant."""
    print(f"Getting default index name for tenant: {tenant_id}", file=sys.stderr)

    SqlEngine.init_engine(pool_size=5, max_overflow=2)

    try:
        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            search_settings = get_current_search_settings(db_session)
            index_name = search_settings.index_name
            print(f"Found index name: {index_name}", file=sys.stderr)
            return {"status": "success", "index_name": index_name}

    except Exception as e:
        print(f"Failed to get index name for tenant {tenant_id}: {e}", file=sys.stderr)
        return {"status": "error", "message": str(e)}


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python get_tenant_index_name.py <tenant_id>", file=sys.stderr)
        sys.exit(1)

    tenant_id = sys.argv[1]

    result = get_tenant_index_name(tenant_id)

    # Output result as JSON to stdout for easy parsing
    print(json.dumps(result))

    # Exit with error code if failed
    if result["status"] == "error":
        sys.exit(1)


================================================
FILE: backend/scripts/tenant_cleanup/on_pod_scripts/get_tenant_users.py
================================================
#!/usr/bin/env python3
"""
Script to fetch user emails from a tenant's data plane schema.
Must be run on a pod with access to the data plane PostgreSQL database.

Usage:
    python get_tenant_users.py <tenant_id>

Output:
    JSON object with status and users list
"""

import json
import sys

from sqlalchemy import select

from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.models import User


def get_tenant_users(tenant_id: str) -> dict:
    """
    Fetch user emails from the tenant's data plane schema.

    Args:
        tenant_id: The tenant ID to query

    Returns:
        Dictionary with status and users list
    """
    try:
        print(f"Querying users for tenant: {tenant_id}", file=sys.stderr)

        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            # Query users from the tenant schema
            # Select only the email column
            user_email_column = User.__table__.c.email
            stmt = select(user_email_column).order_by(user_email_column)
            result = db_session.execute(stmt)
            users = [row[0] for row in result]

        return {"status": "success", "users": users}

    except Exception as e:
        error_msg = str(e)
        print(f"Error fetching users: {error_msg}", file=sys.stderr)
        # Check if it's a schema not found error
        if "does not exist" in error_msg:
            return {
                "status": "not_found",
                "message": f"Schema '{tenant_id}' does not exist",
                "users": [],
            }
        return {"status": "error", "message": error_msg, "users": []}


def main() -> None:
    if len(sys.argv) != 2:
        print(
            json.dumps(
                {
                    "status": "error",
                    "message": "Usage: python get_tenant_users.py <tenant_id>",
                }
            )
        )
        sys.exit(1)

    tenant_id = sys.argv[1]

    SqlEngine.init_engine(pool_size=5, max_overflow=2)

    result = get_tenant_users(tenant_id)
    print(json.dumps(result))


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/tenant_cleanup/on_pod_scripts/understand_tenants.py
================================================
import json
import sys
from typing import Any

from sqlalchemy import text
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_session_with_shared_schema
from onyx.db.engine.sql_engine import SqlEngine


def get_tenant_activity_summary(session: Session) -> list[dict[str, Any]]:
    """Return a list of dicts, one per tenant, with last query info, doc count, and user count."""

    # Step 1: fetch all tenant schemas
    tenant_schemas = [
        row[0]
        for row in session.execute(
            text(
                """
            SELECT nspname
            FROM pg_namespace
            WHERE nspname NOT IN ('pg_catalog', 'information_schema', 'public')
                AND nspname NOT LIKE 'pg_toast%%'
                AND nspname NOT LIKE 'pg_temp%%'
            ORDER BY nspname
        """
            )
        )
    ]

    print(f"Found {len(tenant_schemas)} tenant schemas", file=sys.stderr)

    summaries = []

    # Step 2: loop through each tenant schema
    for idx, schema in enumerate(tenant_schemas):
        if idx % 100 == 0:
            print(f"Processing tenant {idx}/{len(tenant_schemas)}", file=sys.stderr)

        try:
            # Use a single query to get all data at once
            query = text(
                f"""
                SELECT
                    :tenant_id AS tenant_id,
                    (
                        SELECT time_sent
                        FROM "{schema}".chat_message
                        WHERE message_type = 'USER'
                        ORDER BY time_sent DESC
                        LIMIT 1
                    ) AS last_query_time,
                    (
                        SELECT message
                        FROM "{schema}".chat_message
                        WHERE message_type = 'USER'
                        ORDER BY time_sent DESC
                        LIMIT 1
                    ) AS last_query_text,
                    (SELECT COUNT(*) FROM "{schema}".document) AS num_documents,
                    (SELECT COUNT(*) FROM "{schema}".user) AS num_users
            """
            )

            result = session.execute(query, {"tenant_id": schema}).mappings().first()

            if result:
                summaries.append(dict(result))

        except ProgrammingError as e:
            # schema may be missing a table
            print(f"Error processing schema {schema}: {e}", file=sys.stderr)
            session.rollback()
            continue
        except Exception as e:
            print(f"Unexpected error processing schema {schema}: {e}", file=sys.stderr)
            session.rollback()
            continue

    return summaries


def main() -> None:

    SqlEngine.init_engine(pool_size=5, max_overflow=2)

    with get_session_with_shared_schema() as session:
        summaries = get_tenant_activity_summary(session)

    print(json.dumps(summaries, indent=2, default=str))


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/test-openapi-key.py
================================================
VALID_MODEL_LIST = [
    "gpt-4o-mini",
    "gpt-4o",
    "gpt-4-1106-preview",
    "gpt-4-vision-preview",
    "gpt-4",
    "gpt-4-0314",
    "gpt-4-0613",
    "gpt-4-32k",
    "gpt-4-32k-0314",
    "gpt-4-32k-0613",
    "gpt-3.5-turbo-0125",
    "gpt-3.5-turbo-1106",
    "gpt-3.5-turbo",
    "gpt-3.5-turbo-16k",
    "gpt-3.5-turbo-0301",
    "gpt-3.5-turbo-0613",
    "gpt-3.5-turbo-16k-0613",
]


if __name__ == "__main__":
    from openai import OpenAI

    model_version = None
    while model_version not in VALID_MODEL_LIST:
        model_version = input("Please provide an OpenAI model version to test: ")
        if model_version not in VALID_MODEL_LIST:
            print(f"Model must be from valid list: {', '.join(VALID_MODEL_LIST)}")
    assert model_version

    api_key = input("Please provide an OpenAI API Key to test: ")
    client = OpenAI(
        api_key=api_key,
    )

    prompt = "The boy went to the "
    print(f"Asking OpenAI to finish the sentence using {model_version}")
    print(prompt)
    try:
        messages = [
            {"role": "system", "content": "Finish the sentence"},
            {"role": "user", "content": prompt},
        ]
        response = client.chat.completions.create(
            model=model_version,
            messages=messages,  # type:ignore
            max_tokens=5,
            temperature=2,
        )
        print(response.choices[0].message.content)
        print("Success! Feel free to use this API key for Onyx.")
    except Exception:
        print(
            "Failed, provided API key is invalid for Onyx, please address the error from OpenAI."
        )
        raise


================================================
FILE: backend/scripts/transform_openapi_for_docs.py
================================================
"""
Transform OpenAPI schema for public documentation.

Filters endpoints tagged with "public", converts auth to Bearer token,
and removes internal parameters (tenant_id, db_session).

Usage:
    python scripts/transform_openapi_for_docs.py -i generated/openapi.json -o openapi_docs.json
"""

import argparse
import copy
import json
from typing import Any

PUBLIC_TAG = "public"
DOCS_SERVER_URL = "https://cloud.onyx.app/api"
INTERNAL_PARAMETERS = {"tenant_id", "db_session"}


def collect_schema_refs(obj: Any, refs: set[str]) -> None:
    """Recursively collect all $ref references from an object."""
    if isinstance(obj, dict):
        if "$ref" in obj:
            ref = obj["$ref"]
            if ref.startswith("#/components/schemas/"):
                refs.add(ref.split("/")[-1])
        for value in obj.values():
            collect_schema_refs(value, refs)
    elif isinstance(obj, list):
        for item in obj:
            collect_schema_refs(item, refs)


def get_all_referenced_schemas(
    schemas: dict[str, Any], initial_refs: set[str]
) -> set[str]:
    """Get all schemas referenced by initial_refs, including nested references."""
    all_refs = set(initial_refs)
    to_process = list(initial_refs)

    while to_process:
        schema_name = to_process.pop()
        if schema_name not in schemas:
            continue

        new_refs: set[str] = set()
        collect_schema_refs(schemas[schema_name], new_refs)

        for ref in new_refs:
            if ref not in all_refs:
                all_refs.add(ref)
                to_process.append(ref)

    return all_refs


def remove_internal_properties_from_schema(schema: dict[str, Any]) -> None:
    """Recursively remove internal properties from a schema."""
    if not isinstance(schema, dict):
        return

    if "properties" in schema and isinstance(schema["properties"], dict):
        for prop_name in list(schema["properties"].keys()):
            if prop_name in INTERNAL_PARAMETERS:
                del schema["properties"][prop_name]

        if "required" in schema and isinstance(schema["required"], list):
            schema["required"] = [
                r for r in schema["required"] if r not in INTERNAL_PARAMETERS
            ]
            if not schema["required"]:
                del schema["required"]

    for key in ["allOf", "oneOf", "anyOf"]:
        if key in schema and isinstance(schema[key], list):
            for item in schema[key]:
                remove_internal_properties_from_schema(item)

    if "items" in schema:
        remove_internal_properties_from_schema(schema["items"])

    if "additionalProperties" in schema and isinstance(
        schema["additionalProperties"], dict
    ):
        remove_internal_properties_from_schema(schema["additionalProperties"])


def remove_internal_parameters(spec: dict[str, Any]) -> None:
    """Remove internal parameters from all endpoints and schemas."""
    for path_data in spec.get("paths", {}).values():
        for method_data in path_data.values():
            if isinstance(method_data, dict) and "parameters" in method_data:
                method_data["parameters"] = [
                    p
                    for p in method_data["parameters"]
                    if not (
                        isinstance(p, dict) and p.get("name") in INTERNAL_PARAMETERS
                    )
                ]
                if not method_data["parameters"]:
                    del method_data["parameters"]

    for schema in spec.get("components", {}).get("schemas", {}).values():
        remove_internal_properties_from_schema(schema)


def transform_openapi(input_spec: dict[str, Any]) -> dict[str, Any]:
    """Transform the OpenAPI spec for public documentation."""
    output_spec: dict[str, Any] = {
        "openapi": input_spec.get("openapi", "3.1.0"),
        "info": {
            "title": "Onyx API",
            "description": "Onyx API for AI-powered enterprise search and chat",
            "version": input_spec.get("info", {}).get("version", "1.0.0"),
        },
        "servers": [{"url": DOCS_SERVER_URL}],
        "paths": {},
        "components": {
            "schemas": {},
            "securitySchemes": {
                "BearerAuth": {
                    "type": "http",
                    "scheme": "bearer",
                    "description": "Authorization header with Bearer token",
                }
            },
        },
    }

    input_paths = input_spec.get("paths", {})
    initial_refs: set[str] = set()

    for path, path_data in input_paths.items():
        for method, method_data in path_data.items():
            if not isinstance(method_data, dict):
                continue

            if PUBLIC_TAG in method_data.get("tags", []):
                if path not in output_spec["paths"]:
                    output_spec["paths"][path] = {}

                endpoint = copy.deepcopy(method_data)
                if "security" in endpoint:
                    endpoint["security"] = [{"BearerAuth": []}]
                output_spec["paths"][path][method] = endpoint
                collect_schema_refs(method_data, initial_refs)

    input_schemas = input_spec.get("components", {}).get("schemas", {})
    all_refs = get_all_referenced_schemas(input_schemas, initial_refs)

    for schema_name in all_refs:
        if schema_name in input_schemas:
            output_spec["components"]["schemas"][schema_name] = copy.deepcopy(
                input_schemas[schema_name]
            )

    remove_internal_parameters(output_spec)

    return output_spec


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Transform OpenAPI schema for public documentation"
    )
    parser.add_argument(
        "--input", "-i", default="openapi.json", help="Input OpenAPI JSON file"
    )
    parser.add_argument(
        "--output", "-o", default="openapi_docs.json", help="Output OpenAPI JSON file"
    )
    args = parser.parse_args()

    with open(args.input) as f:
        input_spec = json.load(f)

    output_spec = transform_openapi(input_spec)

    with open(args.output, "w") as f:
        json.dump(output_spec, f, indent=2)

    endpoint_count = sum(len(m) for m in output_spec["paths"].values())
    schema_count = len(output_spec["components"]["schemas"])
    print(f"Wrote {args.output}: {endpoint_count} endpoints, {schema_count} schemas")


if __name__ == "__main__":
    main()


================================================
FILE: backend/scripts/upload_files_as_connectors.py
================================================
"""
Script to upload files from a directory as individual file connectors in Onyx.
Each file gets its own connector named after the file.

Usage:
    python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY
    python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY --api-base http://onyxserver:3000
    python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY --file-glob '*.zip'

Requires:
    pip install requests
"""

import argparse
import fnmatch
import os
import sys
import threading
import time

import requests

REQUEST_TIMEOUT = 900  # 15 minutes


def _elapsed_printer(label: str, stop_event: threading.Event) -> None:
    """Print a live elapsed-time counter until stop_event is set."""
    start = time.monotonic()
    while not stop_event.wait(timeout=1):
        elapsed = int(time.monotonic() - start)
        m, s = divmod(elapsed, 60)
        print(f"\r  {label} ... {m:02d}:{s:02d}", end="", flush=True)
    elapsed = int(time.monotonic() - start)
    m, s = divmod(elapsed, 60)
    print(f"\r  {label} ... {m:02d}:{s:02d} done")


def _timed_request(label: str, fn: object) -> requests.Response:
    """Run a request function while displaying a live elapsed timer."""
    stop = threading.Event()
    t = threading.Thread(target=_elapsed_printer, args=(label, stop), daemon=True)
    t.start()
    try:
        resp = fn()  # type: ignore[operator]
    finally:
        stop.set()
        t.join()
    return resp


def upload_file(
    session: requests.Session, base_url: str, file_path: str
) -> dict | None:
    """Upload a single file and return the response with file_paths and file_names."""
    with open(file_path, "rb") as f:
        resp = _timed_request(
            "Uploading",
            lambda: session.post(
                f"{base_url}/api/manage/admin/connector/file/upload",
                files={"files": (os.path.basename(file_path), f)},
                timeout=REQUEST_TIMEOUT,
            ),
        )
    if not resp.ok:
        print(f"  ERROR uploading: {resp.text}")
        return None
    return resp.json()


def create_connector(
    session: requests.Session,
    base_url: str,
    name: str,
    file_paths: list[str],
    file_names: list[str],
    zip_metadata_file_id: str | None,
) -> int | None:
    """Create a file connector and return its ID."""
    resp = _timed_request(
        "Creating connector",
        lambda: session.post(
            f"{base_url}/api/manage/admin/connector",
            json={
                "name": name,
                "source": "file",
                "input_type": "load_state",
                "connector_specific_config": {
                    "file_locations": file_paths,
                    "file_names": file_names,
                    "zip_metadata_file_id": zip_metadata_file_id,
                },
                "refresh_freq": None,
                "prune_freq": None,
                "indexing_start": None,
                "access_type": "public",
                "groups": [],
            },
            timeout=REQUEST_TIMEOUT,
        ),
    )
    if not resp.ok:
        print(f"  ERROR creating connector: {resp.text}")
        return None
    return resp.json()["id"]


def create_credential(
    session: requests.Session, base_url: str, name: str
) -> int | None:
    """Create a dummy credential for the file connector."""
    resp = session.post(
        f"{base_url}/api/manage/credential",
        json={
            "credential_json": {},
            "admin_public": True,
            "source": "file",
            "curator_public": True,
            "groups": [],
            "name": name,
        },
        timeout=REQUEST_TIMEOUT,
    )
    if not resp.ok:
        print(f"  ERROR creating credential: {resp.text}")
        return None
    return resp.json()["id"]


def link_credential(
    session: requests.Session,
    base_url: str,
    connector_id: int,
    credential_id: int,
    name: str,
) -> bool:
    """Link the connector to the credential (create CC pair)."""
    resp = session.put(
        f"{base_url}/api/manage/connector/{connector_id}/credential/{credential_id}",
        json={
            "name": name,
            "access_type": "public",
            "groups": [],
            "auto_sync_options": None,
            "processing_mode": "REGULAR",
        },
        timeout=REQUEST_TIMEOUT,
    )
    if not resp.ok:
        print(f"  ERROR linking credential: {resp.text}")
        return False
    return True


def run_connector(
    session: requests.Session,
    base_url: str,
    connector_id: int,
    credential_id: int,
) -> bool:
    """Trigger the connector to start indexing."""
    resp = session.post(
        f"{base_url}/api/manage/admin/connector/run-once",
        json={
            "connector_id": connector_id,
            "credentialIds": [credential_id],
            "from_beginning": False,
        },
        timeout=REQUEST_TIMEOUT,
    )
    if not resp.ok:
        print(f"  ERROR running connector: {resp.text}")
        return False
    return True


def process_file(session: requests.Session, base_url: str, file_path: str) -> bool:
    """Process a single file through the full connector creation flow."""
    file_name = os.path.basename(file_path)
    connector_name = file_name
    print(f"Processing: {file_name}")

    # Step 1: Upload
    upload_resp = upload_file(session, base_url, file_path)
    if not upload_resp:
        return False

    # Step 2: Create connector
    connector_id = create_connector(
        session,
        base_url,
        name=f"FileConnector-{connector_name}",
        file_paths=upload_resp["file_paths"],
        file_names=upload_resp["file_names"],
        zip_metadata_file_id=upload_resp.get("zip_metadata_file_id"),
    )
    if connector_id is None:
        return False

    # Step 3: Create credential
    credential_id = create_credential(session, base_url, name=connector_name)
    if credential_id is None:
        return False

    # Step 4: Link connector to credential
    if not link_credential(
        session, base_url, connector_id, credential_id, connector_name
    ):
        return False

    # Step 5: Trigger indexing
    if not run_connector(session, base_url, connector_id, credential_id):
        return False

    print(f"  OK (connector_id={connector_id})")
    return True


def get_authenticated_session(api_key: str) -> requests.Session:
    """Create a session authenticated with an API key."""
    session = requests.Session()
    session.headers.update({"Authorization": f"Bearer {api_key}"})
    return session


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Upload files as individual Onyx file connectors."
    )
    parser.add_argument(
        "--data-dir",
        required=True,
        help="Directory containing files to upload.",
    )
    parser.add_argument(
        "--api-base",
        default="http://localhost:3000",
        help="Base URL for the Onyx API (default: http://localhost:3000).",
    )
    parser.add_argument(
        "--api-key",
        required=True,
        help="API key for authentication.",
    )
    parser.add_argument(
        "--file-glob",
        default=None,
        help="Glob pattern to filter files (e.g. '*.json', '*.zip').",
    )
    args = parser.parse_args()

    data_dir = args.data_dir
    base_url = args.api_base.rstrip("/")
    api_key = args.api_key
    file_glob = args.file_glob

    if not os.path.isdir(data_dir):
        print(f"Error: {data_dir} is not a directory")
        sys.exit(1)

    script_path = os.path.realpath(__file__)
    files = sorted(
        os.path.join(data_dir, f)
        for f in os.listdir(data_dir)
        if os.path.isfile(os.path.join(data_dir, f))
        and os.path.realpath(os.path.join(data_dir, f)) != script_path
        and (file_glob is None or fnmatch.fnmatch(f, file_glob))
    )

    if not files:
        print(f"No files found in {data_dir}")
        sys.exit(1)

    print(f"Found {len(files)} file(s) in {data_dir}\n")

    session = get_authenticated_session(api_key)

    success = 0
    failed = 0
    for file_path in files:
        if process_file(session, base_url, file_path):
            success += 1
        else:
            failed += 1
        # Small delay to avoid overwhelming the server
        time.sleep(0.5)

    print(f"\nDone: {success} succeeded, {failed} failed out of {len(files)} files.")


if __name__ == "__main__":
    main()


================================================
FILE: backend/shared_configs/__init__.py
================================================


================================================
FILE: backend/shared_configs/configs.py
================================================
import os
from typing import Any
from typing import List
from urllib.parse import urlparse

# Used for logging
SLACK_CHANNEL_ID = "channel_id"

# Skip model warmup at startup
# Default to True (skip warmup) if not set, otherwise respect the value
SKIP_WARM_UP = os.environ.get("SKIP_WARM_UP", "true").lower() == "true"

# Check if model server is disabled
DISABLE_MODEL_SERVER = os.environ.get("DISABLE_MODEL_SERVER", "").lower() == "true"

# If model server is disabled, use "disabled" as host to trigger proper handling
if DISABLE_MODEL_SERVER:
    MODEL_SERVER_HOST = "disabled"
    MODEL_SERVER_ALLOWED_HOST = "disabled"
    INDEXING_MODEL_SERVER_HOST = "disabled"
else:
    MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or "localhost"
    MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0"
    INDEXING_MODEL_SERVER_HOST = (
        os.environ.get("INDEXING_MODEL_SERVER_HOST") or MODEL_SERVER_HOST
    )

MODEL_SERVER_PORT = int(os.environ.get("MODEL_SERVER_PORT") or "9000")
# Model server for indexing should use a separate one to not allow indexing to introduce delay
# for inference
INDEXING_MODEL_SERVER_PORT = int(
    os.environ.get("INDEXING_MODEL_SERVER_PORT") or MODEL_SERVER_PORT
)

# Onyx custom Deep Learning Models
CONNECTOR_CLASSIFIER_MODEL_REPO = "Danswer/filter-extraction-model"
CONNECTOR_CLASSIFIER_MODEL_TAG = "1.0.0"
INTENT_MODEL_VERSION = "onyx-dot-app/hybrid-intent-token-classifier"
# INTENT_MODEL_TAG = "v1.0.3"
INTENT_MODEL_TAG: str | None = None
# Bi-Encoder, other details
DOC_EMBEDDING_CONTEXT_SIZE = 512

# Used to distinguish alternative indices
ALT_INDEX_SUFFIX = "__danswer_alt_index"

# Used for loading defaults for automatic deployments and dev flows
# For local, use: mixedbread-ai/mxbai-rerank-xsmall-v1
DEFAULT_CROSS_ENCODER_MODEL_NAME = (
    os.environ.get("DEFAULT_CROSS_ENCODER_MODEL_NAME") or None
)
DEFAULT_CROSS_ENCODER_API_KEY = os.environ.get("DEFAULT_CROSS_ENCODER_API_KEY") or None
DEFAULT_CROSS_ENCODER_PROVIDER_TYPE = (
    os.environ.get("DEFAULT_CROSS_ENCODER_PROVIDER_TYPE") or None
)
DISABLE_RERANK_FOR_STREAMING = (
    os.environ.get("DISABLE_RERANK_FOR_STREAMING", "").lower() == "true"
)

# This controls the minimum number of pytorch "threads" to allocate to the embedding
# model. If torch finds more threads on its own, this value is not used.
MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)

# Model server that has indexing only set will throw exception if used for reranking
# or intent classification
INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true"

# The process needs to have this for the log file to write to
# otherwise, it will not create additional log files
# This should just be the filename base without extension or path.
LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "onyx"

# Enable generating persistent log files for local dev environments
DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true"
# notset, debug, info, notice, warning, error, or critical
LOG_LEVEL = os.environ.get("LOG_LEVEL") or "info"

# Timeout for API-based embedding models
# NOTE: does not apply for Google VertexAI, since the python client doesn't
# allow us to specify a custom timeout
API_BASED_EMBEDDING_TIMEOUT = int(os.environ.get("API_BASED_EMBEDDING_TIMEOUT", "600"))

# Local batch size for VertexAI embedding models currently calibrated for item size of 512 tokens
# NOTE: increasing this value may lead to API errors due to token limit exhaustion per call.
VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE = int(
    os.environ.get("VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE", "50")
)

# Only used for OpenAI
OPENAI_EMBEDDING_TIMEOUT = int(
    os.environ.get("OPENAI_EMBEDDING_TIMEOUT", API_BASED_EMBEDDING_TIMEOUT)
)

# Whether or not to strictly enforce token limit for chunking.
STRICT_CHUNK_TOKEN_LIMIT = (
    os.environ.get("STRICT_CHUNK_TOKEN_LIMIT", "").lower() == "true"
)

# Set up Sentry integration (for error logging)
SENTRY_DSN = os.environ.get("SENTRY_DSN")


# Fields which should only be set on new search setting
PRESERVED_SEARCH_FIELDS = [
    "id",
    "provider_type",
    "api_key",
    "model_name",
    "api_url",
    "index_name",
    "multipass_indexing",
    "enable_contextual_rag",
    "model_dim",
    "normalize",
    "passage_prefix",
    "query_prefix",
]


def validate_cors_origin(origin: str) -> None:
    parsed = urlparse(origin)
    if parsed.scheme not in ["http", "https"] or not parsed.netloc:
        raise ValueError(f"Invalid CORS origin: '{origin}'")


# Examples of valid values for the environment variable:
# - "" (allow all origins)
# - "http://example.com" (single origin)
# - "http://example.com,https://example.org" (multiple origins)
# - "*" (allow all origins)
CORS_ALLOWED_ORIGIN_ENV = os.environ.get("CORS_ALLOWED_ORIGIN", "")

# Explicitly declare the type of CORS_ALLOWED_ORIGIN
CORS_ALLOWED_ORIGIN: List[str]

if CORS_ALLOWED_ORIGIN_ENV:
    # Split the environment variable into a list of origins
    CORS_ALLOWED_ORIGIN = [
        origin.strip()
        for origin in CORS_ALLOWED_ORIGIN_ENV.split(",")
        if origin.strip()
    ]
    # Validate each origin in the list
    for origin in CORS_ALLOWED_ORIGIN:
        validate_cors_origin(origin)
else:
    # If the environment variable is empty, allow all origins
    CORS_ALLOWED_ORIGIN = ["*"]


# Multi-tenancy configuration
MULTI_TENANT = os.environ.get("MULTI_TENANT", "").lower() == "true"

# Outside this file, should almost always use `POSTGRES_DEFAULT_SCHEMA` unless you
# have a very good reason
POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE = "public"
POSTGRES_DEFAULT_SCHEMA = (
    os.environ.get("POSTGRES_DEFAULT_SCHEMA") or POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
)
DEFAULT_REDIS_PREFIX = os.environ.get("DEFAULT_REDIS_PREFIX") or "default"


async def async_return_default_schema(
    *args: Any, **kwargs: Any  # noqa: ARG001
) -> str:  # noqa: ARG001
    return POSTGRES_DEFAULT_SCHEMA


# Prefix used for all tenant ids
TENANT_ID_PREFIX = "tenant_"

DISALLOWED_SLACK_BOT_TENANT_IDS = os.environ.get("DISALLOWED_SLACK_BOT_TENANT_IDS")
DISALLOWED_SLACK_BOT_TENANT_LIST = (
    [
        tenant.strip()
        for tenant in DISALLOWED_SLACK_BOT_TENANT_IDS.split(",")
        if tenant.strip()
    ]
    if DISALLOWED_SLACK_BOT_TENANT_IDS
    else None
)

IGNORED_SYNCING_TENANT_IDS = os.environ.get("IGNORED_SYNCING_TENANT_IDS")
IGNORED_SYNCING_TENANT_LIST = (
    [
        tenant.strip()
        for tenant in IGNORED_SYNCING_TENANT_IDS.split(",")
        if tenant.strip()
    ]
    if IGNORED_SYNCING_TENANT_IDS
    else None
)

ENVIRONMENT = os.environ.get("ENVIRONMENT") or "not_explicitly_set"


#####
# Usage Limits Configuration (meant for cloud, off by default for self-hosted)
#####
# Whether usage limits are enforced (defaults to MULTI_TENANT value)
_USAGE_LIMITS_ENABLED_RAW = os.environ.get("USAGE_LIMITS_ENABLED")
if _USAGE_LIMITS_ENABLED_RAW is not None:
    USAGE_LIMITS_ENABLED = _USAGE_LIMITS_ENABLED_RAW.lower() == "true"
else:
    # Default: enabled on cloud (MULTI_TENANT), disabled for self-hosted
    USAGE_LIMITS_ENABLED = MULTI_TENANT

# Usage limit window in seconds (default: 1 week = 604800 seconds)
USAGE_LIMIT_WINDOW_SECONDS = int(os.environ.get("USAGE_LIMIT_WINDOW_SECONDS", "604800"))

# Per-week LLM usage cost limits in cents (e.g., 1000 = $10.00)
# Trial users get lower limits than paid users
USAGE_LIMIT_LLM_COST_CENTS_TRIAL = int(
    os.environ.get("USAGE_LIMIT_LLM_COST_CENTS_TRIAL", "3200")  # $32.00 default
)
USAGE_LIMIT_LLM_COST_CENTS_PAID = int(
    os.environ.get("USAGE_LIMIT_LLM_COST_CENTS_PAID", "6400")  # $64.00 default
)

# Per-week chunks indexed limits
USAGE_LIMIT_CHUNKS_INDEXED_TRIAL = int(
    os.environ.get("USAGE_LIMIT_CHUNKS_INDEXED_TRIAL", 400_000)
)
USAGE_LIMIT_CHUNKS_INDEXED_PAID = int(
    os.environ.get("USAGE_LIMIT_CHUNKS_INDEXED_PAID", 4_000_000)
)

# Per-week API calls using API keys or Personal Access Tokens
USAGE_LIMIT_API_CALLS_TRIAL = int(os.environ.get("USAGE_LIMIT_API_CALLS_TRIAL", "0"))
USAGE_LIMIT_API_CALLS_PAID = int(os.environ.get("USAGE_LIMIT_API_CALLS_PAID", "40000"))

# Per-week non-streaming API calls (more expensive, so lower limits)
USAGE_LIMIT_NON_STREAMING_CALLS_TRIAL = int(
    os.environ.get("USAGE_LIMIT_NON_STREAMING_CALLS_TRIAL", "0")
)
USAGE_LIMIT_NON_STREAMING_CALLS_PAID = int(
    os.environ.get("USAGE_LIMIT_NON_STREAMING_CALLS_PAID", "160")
)


================================================
FILE: backend/shared_configs/contextvars.py
================================================
import contextvars

from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA


# Context variable for the current tenant id
CURRENT_TENANT_ID_CONTEXTVAR: contextvars.ContextVar[str | None] = (
    contextvars.ContextVar(
        "current_tenant_id", default=None if MULTI_TENANT else POSTGRES_DEFAULT_SCHEMA
    )
)

# set by every route in the API server
INDEXING_REQUEST_ID_CONTEXTVAR: contextvars.ContextVar[str | None] = (
    contextvars.ContextVar("indexing_request_id", default=None)
)

# set by every route in the API server
ONYX_REQUEST_ID_CONTEXTVAR: contextvars.ContextVar[str | None] = contextvars.ContextVar(
    "onyx_request_id", default=None
)

# Used to store cc pair id and index attempt id in multithreaded environments
INDEX_ATTEMPT_INFO_CONTEXTVAR: contextvars.ContextVar[tuple[int, int] | None] = (
    contextvars.ContextVar("index_attempt_info", default=None)
)

# Set by endpoint context middleware — used for per-endpoint DB pool attribution
CURRENT_ENDPOINT_CONTEXTVAR: contextvars.ContextVar[str | None] = (
    contextvars.ContextVar("current_endpoint", default=None)
)


"""Utils related to contextvars"""


def get_current_tenant_id() -> str:
    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
    if tenant_id is None:
        import traceback

        if not MULTI_TENANT:
            return POSTGRES_DEFAULT_SCHEMA

        stack_trace = traceback.format_stack()
        error_message = (
            "Tenant ID is not set. This should never happen.\nStack trace:\n"
            + "".join(stack_trace)
        )
        raise RuntimeError(error_message)
    return tenant_id


================================================
FILE: backend/shared_configs/enums.py
================================================
from enum import Enum


class EmbeddingProvider(str, Enum):
    OPENAI = "openai"
    COHERE = "cohere"
    VOYAGE = "voyage"
    GOOGLE = "google"
    LITELLM = "litellm"
    AZURE = "azure"


class RerankerProvider(str, Enum):
    COHERE = "cohere"
    LITELLM = "litellm"
    BEDROCK = "bedrock"


class EmbedTextType(str, Enum):
    QUERY = "query"
    PASSAGE = "passage"


class WebSearchProviderType(str, Enum):
    GOOGLE_PSE = "google_pse"
    SERPER = "serper"
    EXA = "exa"
    SEARXNG = "searxng"
    BRAVE = "brave"


class WebContentProviderType(str, Enum):
    ONYX_WEB_CRAWLER = "onyx_web_crawler"
    FIRECRAWL = "firecrawl"
    EXA = "exa"


================================================
FILE: backend/shared_configs/model_server_models.py
================================================
from pydantic import BaseModel

from shared_configs.enums import EmbeddingProvider
from shared_configs.enums import EmbedTextType
from shared_configs.enums import RerankerProvider


Embedding = list[float]


class EmbedRequest(BaseModel):
    texts: list[str]
    # Can be none for cloud embedding model requests, error handling logic exists for other cases
    model_name: str | None = None
    deployment_name: str | None = None
    max_context_length: int
    normalize_embeddings: bool
    api_key: str | None = None
    provider_type: EmbeddingProvider | None = None
    text_type: EmbedTextType
    manual_query_prefix: str | None = None
    manual_passage_prefix: str | None = None
    api_url: str | None = None
    api_version: str | None = None

    # allows for the truncation of the vector to a lower dimension
    # to reduce memory usage. Currently only supported for OpenAI models.
    # will be ignored for other providers.
    reduced_dimension: int | None = None

    # This disables the "model_" protected namespace for pydantic
    model_config = {"protected_namespaces": ()}


class EmbedResponse(BaseModel):
    embeddings: list[Embedding]


class RerankRequest(BaseModel):
    query: str
    documents: list[str]
    model_name: str
    provider_type: RerankerProvider | None = None
    api_key: str | None = None
    api_url: str | None = None

    # This disables the "model_" protected namespace for pydantic
    model_config = {"protected_namespaces": ()}


class RerankResponse(BaseModel):
    scores: list[float]


class IntentRequest(BaseModel):
    query: str
    # Sequence classification threshold
    semantic_percent_threshold: float
    # Token classification threshold
    keyword_percent_threshold: float


class IntentResponse(BaseModel):
    is_keyword: bool
    keywords: list[str]


================================================
FILE: backend/shared_configs/utils.py
================================================
from typing import TypeVar


T = TypeVar("T")


def batch_list(
    lst: list[T],
    batch_size: int,
) -> list[list[T]]:
    return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)]


================================================
FILE: backend/slackbot_images/README.md
================================================
This folder contains images needed by the Onyx Slack Bot. When possible, we use the images
within `web/public`, but sometimes those images do not work for the Slack Bot.


================================================
FILE: backend/supervisord.conf
================================================
[supervisord]
nodaemon=true
user=root
logfile=/var/log/supervisord.log
environment=PYTHONPATH="/app"

# region enable supervisorctl usage
[supervisorctl]
serverurl=unix:///tmp/supervisor.sock

[unix_http_server]
file=/tmp/supervisor.sock
chmod=0700

[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
# endregion enable supervisorctl usage

# Background jobs that must be run async due to long time to completion
# NOTE: due to an issue with Celery + SQLAlchemy
# (https://github.com/celery/celery/issues/7007#issuecomment-1740139367)
# we must use the threads pool instead of the default prefork pool for now
# in order to avoid intermittent errors like:
# `billiard.exceptions.WorkerLostError: Worker exited prematurely: signal 11 (SIGSEGV)`.
#
# This means workers will not be able take advantage of multiple CPU cores
# on a system, but this should be okay for now since all our celery tasks are
# relatively compute-light (e.g. they tend to just make a bunch of requests to
# Vespa / Postgres)
[program:celery_worker_primary]
command=celery -A onyx.background.celery.versioned_apps.primary worker
    --loglevel=INFO
    --hostname=primary@%%n
    -Q celery
stdout_logfile=/var/log/celery_worker_primary.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
stopasgroup=true

# NOTE: only allowing configuration here and not in the other celery workers,
# since this is often the bottleneck for "sync" jobs (e.g. document set syncing,
# user group syncing, deletion, etc.)
[program:celery_worker_light]
command=celery -A onyx.background.celery.versioned_apps.light worker
    --loglevel=INFO
    --hostname=light@%%n
    -Q vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,opensearch_migration
stdout_logfile=/var/log/celery_worker_light.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
stopasgroup=true

[program:celery_worker_heavy]
command=celery -A onyx.background.celery.versioned_apps.heavy worker
    --loglevel=INFO
    --hostname=heavy@%%n
    -Q connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,sandbox
stdout_logfile=/var/log/celery_worker_heavy.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
stopasgroup=true

[program:celery_worker_docprocessing]
command=celery -A onyx.background.celery.versioned_apps.docprocessing worker
    --loglevel=INFO
    --hostname=docprocessing@%%n
    -Q docprocessing
stdout_logfile=/var/log/celery_worker_docprocessing.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
stopasgroup=true

[program:celery_worker_user_file_processing]
command=celery -A onyx.background.celery.versioned_apps.user_file_processing worker
    --loglevel=INFO
    --hostname=user_file_processing@%%n
    -Q user_file_processing,user_file_project_sync,user_file_delete
stdout_logfile=/var/log/celery_worker_user_file_processing.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
stopasgroup=true

[program:celery_worker_docfetching]
command=celery -A onyx.background.celery.versioned_apps.docfetching worker
    --loglevel=INFO
    --hostname=docfetching@%%n
    -Q connector_doc_fetching
stdout_logfile=/var/log/celery_worker_docfetching.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
stopasgroup=true

[program:celery_worker_monitoring]
command=celery -A onyx.background.celery.versioned_apps.monitoring worker
    --loglevel=INFO
    --hostname=monitoring@%%n
    -Q monitoring
stdout_logfile=/var/log/celery_worker_monitoring.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
stopasgroup=true


# Job scheduler for periodic tasks
[program:celery_beat]
command=celery -A onyx.background.celery.versioned_apps.beat beat
stdout_logfile=/var/log/celery_beat.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
startsecs=10
stopasgroup=true

# watchdog to detect and restart the beat in case of inactivity
# supervisord only restarts the process if it's dead
# make sure this key matches ONYX_CELERY_BEAT_HEARTBEAT_KEY
[program:supervisord_watchdog_celery_beat]
command=python onyx/utils/supervisord_watchdog.py
    --conf /etc/supervisor/conf.d/supervisord.conf
    --key "onyx:celery:beat:heartbeat"
    --program celery_beat
stdout_logfile=/var/log/supervisord_watchdog_celery_beat.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
startsecs=10
stopasgroup=true

# Listens for Slack messages and responds with answers
# for all channels that the OnyxBot has been added to.
# If not setup, this will just fail 5 times and then stop.
# More details on setup here: https://docs.onyx.app/admins/getting_started/slack_bot_setup
[program:slack_bot]
command=python onyx/onyxbot/slack/listener.py
stdout_logfile=/var/log/slack_bot.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startretries=5
startsecs=60

# Listens for Discord messages and responds with answers
# for all guilds/channels that the OnyxBot has been added to.
# If not configured, will continue to probe every 3 minutes for a Discord bot token.
[program:discord_bot]
command=python onyx/onyxbot/discord/client.py
stdout_logfile=/var/log/discord_bot.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startretries=5
startsecs=60

# Pushes all logs from the above programs to stdout
# No log rotation here, since it's stdout it's handled by the Docker container logging
[program:log-redirect-handler]
command=tail -qF
    /var/log/celery_beat.log
    /var/log/celery_worker_primary.log
    /var/log/celery_worker_light.log
    /var/log/celery_worker_heavy.log
    /var/log/celery_worker_docprocessing.log
    /var/log/celery_worker_monitoring.log
    /var/log/celery_worker_user_file_processing.log
    /var/log/celery_worker_docfetching.log
    /var/log/slack_bot.log
    /var/log/discord_bot.log
    /var/log/supervisord_watchdog_celery_beat.log
    /var/log/mcp_server.log
    /var/log/mcp_server.err.log
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes = 0  # must be set to 0 when stdout_logfile=/dev/stdout
autorestart=true


================================================
FILE: backend/tests/README.md
================================================
# Backend Tests

## Test Types

There are four test categories, ordered by increasing scope:

### Unit Tests (`tests/unit/`)

No external services. Mock all I/O with `unittest.mock`. Use for complex, isolated
logic (e.g. citation processing, encryption).

```bash
pytest -xv backend/tests/unit
```

### External Dependency Unit Tests (`tests/external_dependency_unit/`)

External services (Postgres, Redis, Vespa, OpenAI, etc.) are running, but Onyx
application containers are not. Tests call functions directly and can mock selectively.

Use when you need a real database or real API calls but want control over setup.

```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
```

### Integration Tests (`tests/integration/`)

Full Onyx deployment running. No mocking. Prefer this over other test types when possible.

```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```

### Playwright / E2E Tests (`web/tests/e2e/`)

Full stack including web server. Use for frontend-backend coordination.

```bash
npx playwright test <TEST_NAME>
```

## Shared Fixtures

Shared fixtures live in `backend/tests/conftest.py`. Test subdirectories can define
their own `conftest.py` for directory-scoped fixtures.

## Running Tests Repeatedly (`pytest-repeat`)

Use `pytest-repeat` to catch flaky tests by running them multiple times:

```bash
# Run a specific test 50 times
pytest --count=50 backend/tests/unit/path/to/test.py::test_name

# Stop on first failure with -x
pytest --count=50 -x backend/tests/unit/path/to/test.py::test_name

# Repeat an entire test file
pytest --count=10 backend/tests/unit/path/to/test_file.py
```

## Best Practices

### Use `enable_ee` fixture instead of inlining

Enables EE mode for a test, with proper teardown and cache clearing.

```python
# Whole file (in a test module, NOT in conftest.py)
pytestmark = pytest.mark.usefixtures("enable_ee")

# Whole directory — add an autouse wrapper to the directory's conftest.py
@pytest.fixture(autouse=True)
def _enable_ee_for_directory(enable_ee: None) -> None:  
    """Wraps the shared enable_ee fixture with autouse for this directory."""

# Single test
def test_something(enable_ee: None) -> None: ...
```

**Note:** `pytestmark` in a `conftest.py` does NOT apply markers to tests in that
directory — it only affects tests defined in the conftest itself (which is none).
Use the autouse fixture wrapper pattern shown above instead.

Do NOT inline `global_version.set_ee()` — always use the fixture.


================================================
FILE: backend/tests/__init__.py
================================================


================================================
FILE: backend/tests/api/test_api.py
================================================
import os
from collections.abc import Generator
from typing import Any

import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient

from onyx.configs.constants import DEV_VERSION_PATTERN
from onyx.configs.constants import STABLE_VERSION_PATTERN
from onyx.main import fetch_versioned_implementation
from onyx.utils.logger import setup_logger

logger = setup_logger()


@pytest.fixture(scope="function")
def client() -> Generator[TestClient, Any, None]:
    # Set environment variables
    os.environ["ENABLE_PAID_ENTERPRISE_EDITION_FEATURES"] = "True"

    # Initialize TestClient with the FastAPI app
    app: FastAPI = fetch_versioned_implementation(
        module="onyx.main", attribute="get_application"
    )()
    client = TestClient(app)
    yield client


@pytest.mark.skip(
    reason="enable when we have a testing environment with preloaded data"
)
def test_handle_simplified_chat_message(client: TestClient) -> None:
    req: dict[str, Any] = {}

    req["persona_id"] = 0
    req["description"] = "pytest"
    response = client.post("/chat/create-chat-session", json=req)
    chat_session_id = response.json()["chat_session_id"]

    req = {}
    req["chat_session_id"] = chat_session_id
    req["message"] = "hello"

    response = client.post("/chat/send-message-simple-api", json=req)
    assert response.status_code == 200


@pytest.mark.skip(
    reason="enable when we have a testing environment with preloaded data"
)
def test_handle_send_message_simple_with_history(client: TestClient) -> None:
    req: dict[str, Any] = {}
    messages = []
    messages.append({"message": "What sorts of questions can you answer for me?"})
    # messages.append({"message":
    #                  "I'd be happy to assist you with a wide range of questions related to Ramp's expense management platform. "
    #                  "I can help with topics such as:\n\n"
    #                  "1. Setting up and managing your Ramp account\n"
    #                  "2. Using Ramp cards and making purchases\n"
    #                  "3. Submitting and reviewing expenses\n"
    #                  "4. Understanding Ramp's features and benefits\n"
    #                  "5. Navigating the Ramp dashboard and mobile app\n"
    #                  "6. Managing team spending and budgets\n"
    #                  "7. Integrating Ramp with accounting software\n"
    #                  "8. Troubleshooting common issues\n\n"
    #                  "Feel free to ask any specific questions you have about using Ramp, "
    #                  "and I'll do my best to provide clear and helpful answers. "
    #                  "Is there a particular area you'd like to know more about?",
    #                  "role": "assistant"})
    # req["prompt_id"] = 9
    # req["persona_id"] = 6

    # Yoda
    req["persona_id"] = 1
    messages.append(
        {
            "message": "Answer questions for you, I can. "
            "About many topics, knowledge I have. "
            "But specific to documents provided, limited my responses are. "
            "Ask you may about:\n\n"
            "- User interviews and building trust with participants\n"
            "- Designing effective surveys and survey questions  \n"
            "- Product analysis approaches\n"
            "- Recruiting participants for research\n"
            "- Discussion guides for user interviews\n"
            "- Types of survey questions\n\n"
            "More there may be, but focus on these areas, the given context does. "
            "Specific questions you have, ask you should. Guide you I will, as best I can.",
            "role": "assistant",
        }
    )
    # messages.append({"message": "Where can I pilot a survey?"})

    # messages.append({"message": "How many data points should I collect to validate my solution?"})
    messages.append({"message": "What is solution validation research used for?"})

    req["messages"] = messages

    response = client.post("/chat/send-message-simple-with-history", json=req)
    assert response.status_code == 200

    resp_json = response.json()

    # persona must have LLM relevance enabled for this to pass
    assert len(resp_json["llm_selected_doc_indices"]) > 0


def test_versions_endpoint(client: TestClient) -> None:
    """Test that /api/versions endpoint returns valid stable, dev, and migration configurations"""
    response = client.get("/versions")
    assert response.status_code == 200

    data = response.json()

    # Verify the top-level structure
    assert "stable" in data
    assert "dev" in data
    assert "migration" in data

    # Verify stable configuration
    stable = data["stable"]
    assert "onyx" in stable
    assert "relational_db" in stable
    assert "index" in stable
    assert "nginx" in stable

    # Verify stable version follows correct pattern (v1.2.3)
    # If this fails, revise latest Github release for typo or incorrect version name
    assert STABLE_VERSION_PATTERN.match(
        stable["onyx"]
    ), f"Stable version {stable['onyx']} doesn't match pattern v(number).(number).(number)"

    # Verify dev configuration
    dev = data["dev"]
    assert "onyx" in dev
    assert "relational_db" in dev
    assert "index" in dev
    assert "nginx" in dev

    # Verify dev version follows correct pattern (v1.2.3-beta.4)
    assert DEV_VERSION_PATTERN.match(
        dev["onyx"]
    ), f"Dev version {dev['onyx']} doesn't match pattern v(number).(number).(number)-beta.(number)"

    # Verify migration configuration
    migration = data["migration"]
    assert "onyx" in migration
    assert "relational_db" in migration
    assert "index" in migration
    assert "nginx" in migration

    # Verify migration has expected values
    assert migration["onyx"] == "airgapped-intfloat-nomic-migration"
    assert migration["relational_db"] == "postgres:15.2-alpine"
    assert migration["index"] == "vespaengine/vespa:8.277.17"
    assert migration["nginx"] == "nginx:1.25.5-alpine"

    # Verify versions are different between stable and dev
    assert stable["onyx"] != dev["onyx"], "Stable and dev versions should be different"

    # Additional validation: ensure all required fields are strings
    for config_name, config in [
        ("stable", stable),
        ("dev", dev),
        ("migration", migration),
    ]:
        for field_name, field_value in config.items():
            assert isinstance(
                field_value, str
            ), f"{config_name}.{field_name} should be a string, got {type(field_value)}"
            assert (
                field_value.strip() != ""
            ), f"{config_name}.{field_name} should not be empty"


================================================
FILE: backend/tests/conftest.py
================================================
"""Root conftest — shared fixtures available to all test directories."""

from collections.abc import Generator

import pytest

from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version


@pytest.fixture()
def enable_ee() -> Generator[None, None, None]:
    """Temporarily enable EE mode for a single test.

    Restores the previous EE state and clears the versioned-implementation
    cache on teardown so state doesn't leak between tests.
    """
    was_ee = global_version.is_ee_version()
    global_version.set_ee()
    fetch_versioned_implementation.cache_clear()
    yield
    if not was_ee:
        global_version.unset_ee()
    fetch_versioned_implementation.cache_clear()


================================================
FILE: backend/tests/daily/conftest.py
================================================
import os

# Set environment variables BEFORE any other imports to ensure they're picked up
# by module-level code that reads env vars at import time
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
os.environ["LICENSE_ENFORCEMENT_ENABLED"] = "false"

from collections.abc import AsyncGenerator
from collections.abc import Generator
from contextlib import asynccontextmanager
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from dotenv import load_dotenv
from fastapi import FastAPI
from fastapi.testclient import TestClient

from onyx.auth.users import current_admin_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import UserRole
from onyx.main import get_application
from onyx.utils.logger import setup_logger

logger = setup_logger()

load_dotenv()


@asynccontextmanager
async def test_lifespan(
    app: FastAPI,  # noqa: ARG001
) -> AsyncGenerator[None, None]:  # noqa: ARG001
    """No-op lifespan for tests that don't need database or other services."""
    yield


def mock_get_session() -> Generator[MagicMock, None, None]:
    """Mock database session for tests that don't actually need DB access."""
    yield MagicMock()


def mock_current_admin_user() -> MagicMock:
    """Mock admin user for endpoints protected by current_admin_user."""
    mock_admin = MagicMock()
    mock_admin.role = UserRole.ADMIN
    return mock_admin


@pytest.fixture(scope="function")
def client() -> Generator[TestClient, None, None]:
    # Initialize TestClient with the FastAPI app using a no-op test lifespan.
    # Patch out prometheus metrics setup to avoid "Duplicated timeseries in
    # CollectorRegistry" errors when multiple tests each create a new app
    # (prometheus registers metrics globally and rejects duplicate names).
    with patch("onyx.main.setup_prometheus_metrics"):
        app: FastAPI = get_application(lifespan_override=test_lifespan)

    # Override the database session dependency with a mock
    # (these tests don't actually need DB access)
    app.dependency_overrides[get_session] = mock_get_session
    app.dependency_overrides[current_admin_user] = mock_current_admin_user

    # Use TestClient as a context manager to properly trigger lifespan
    with TestClient(app) as client:
        yield client

    # Clean up dependency overrides
    app.dependency_overrides.clear()


================================================
FILE: backend/tests/daily/connectors/airtable/test_airtable_basic.py
================================================
import os
from typing import cast
from unittest.mock import MagicMock

import pytest
from pydantic import BaseModel

from onyx.configs.constants import DocumentSource
from onyx.connectors.airtable.airtable_connector import AirtableConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection

BASE_VIEW_ID = "viwVUEJjWPd8XYjh8"


class AirtableConfig(BaseModel):
    base_id: str
    table_identifier: str
    access_token: str


@pytest.fixture(params=[True, False])
def airtable_config(request: pytest.FixtureRequest) -> AirtableConfig:
    table_identifier = (
        os.environ["AIRTABLE_TEST_TABLE_NAME"]
        if request.param
        else os.environ["AIRTABLE_TEST_TABLE_ID"]
    )
    return AirtableConfig(
        base_id=os.environ["AIRTABLE_TEST_BASE_ID"],
        table_identifier=table_identifier,
        access_token=os.environ["AIRTABLE_ACCESS_TOKEN"],
    )


def create_test_document(
    id: str,
    title: str,
    description: str,
    priority: str,
    status: str,
    # Link to another record is skipped for now
    # category: str,
    ticket_id: str,
    created_time: str,
    status_last_changed: str,
    submitted_by: str,
    assignee: str,
    days_since_status_change: int | None,
    attachments: list[tuple[str, str]] | None = None,
    all_fields_as_metadata: bool = False,
    share_id: str | None = None,
    view_id: str | None = None,
) -> Document:
    base_id = os.environ.get("AIRTABLE_TEST_BASE_ID")
    table_id = os.environ.get("AIRTABLE_TEST_TABLE_ID")
    missing_vars = []
    if not base_id:
        missing_vars.append("AIRTABLE_TEST_BASE_ID")
    if not table_id:
        missing_vars.append("AIRTABLE_TEST_TABLE_ID")

    if missing_vars:
        raise RuntimeError(
            f"Required environment variables not set: {', '.join(missing_vars)}. "
            "These variables are required to run Airtable connector tests."
        )
    link_base = f"https://airtable.com/{base_id}"
    if share_id:
        link_base = f"{link_base}/{share_id}"
    link_base = f"{link_base}/{table_id}"
    if view_id:
        link_base = f"{link_base}/{view_id}"

    sections = []

    if not all_fields_as_metadata:
        sections.extend(
            [
                TextSection(
                    text=f"Title:\n------------------------\n{title}\n------------------------",
                    link=f"{link_base}/{id}",
                ),
                TextSection(
                    text=f"Description:\n------------------------\n{description}\n------------------------",
                    link=f"{link_base}/{id}",
                ),
            ]
        )

    if attachments:
        for attachment_text, attachment_link in attachments:
            sections.append(
                TextSection(
                    text=f"Attachment:\n------------------------\n{attachment_text}\n------------------------",
                    link=attachment_link,
                ),
            )

    metadata: dict[str, str | list[str]] = {
        # "Category": category,
        "Assignee": assignee,
        "Submitted by": submitted_by,
        "Priority": priority,
        "Status": status,
        "Created time": created_time,
        "ID": ticket_id,
        "Status last changed": status_last_changed,
        **(
            {"Days since status change": str(days_since_status_change)}
            if days_since_status_change is not None
            else {}
        ),
    }

    if all_fields_as_metadata:
        metadata.update(
            {
                "Title": title,
                "Description": description,
            }
        )

    return Document(
        id=f"airtable__{id}",
        sections=cast(list[TextSection | ImageSection], sections),
        source=DocumentSource.AIRTABLE,
        semantic_identifier=f"{os.environ.get('AIRTABLE_TEST_TABLE_NAME', '')}: {title}",
        metadata=metadata,
        doc_updated_at=None,
        primary_owners=None,
        secondary_owners=None,
        title=None,
        from_ingestion_api=False,
        additional_info=None,
    )


def compare_documents(
    actual_docs: list[Document], expected_docs: list[Document]
) -> None:
    """Utility function to compare actual and expected documents, ignoring order."""
    actual_docs_dict = {doc.id: doc for doc in actual_docs}
    expected_docs_dict = {doc.id: doc for doc in expected_docs}

    assert actual_docs_dict.keys() == expected_docs_dict.keys(), "Document ID mismatch"

    for doc_id in actual_docs_dict:
        actual = actual_docs_dict[doc_id]
        expected = expected_docs_dict[doc_id]

        assert (
            actual.source == expected.source
        ), f"Source mismatch for document {doc_id}"
        assert (
            actual.semantic_identifier == expected.semantic_identifier
        ), f"Semantic identifier mismatch for document {doc_id}"
        assert (
            actual.metadata == expected.metadata
        ), f"Metadata mismatch for document {doc_id}"
        assert (
            actual.doc_updated_at == expected.doc_updated_at
        ), f"Updated at mismatch for document {doc_id}"
        assert (
            actual.primary_owners == expected.primary_owners
        ), f"Primary owners mismatch for document {doc_id}"
        assert (
            actual.secondary_owners == expected.secondary_owners
        ), f"Secondary owners mismatch for document {doc_id}"
        assert actual.title == expected.title, f"Title mismatch for document {doc_id}"
        assert (
            actual.from_ingestion_api == expected.from_ingestion_api
        ), f"Ingestion API flag mismatch for document {doc_id}"
        assert (
            actual.additional_info == expected.additional_info
        ), f"Additional info mismatch for document {doc_id}"

        # Compare sections
        assert len(actual.sections) == len(
            expected.sections
        ), f"Number of sections mismatch for document {doc_id}"
        for i, (actual_section, expected_section) in enumerate(
            zip(actual.sections, expected.sections)
        ):
            assert (
                actual_section.text == expected_section.text
            ), f"Section {i} text mismatch for document {doc_id}"
            assert (
                actual_section.link == expected_section.link
            ), f"Section {i} link mismatch for document {doc_id}"


def test_airtable_connector_basic(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    airtable_config: AirtableConfig,
) -> None:
    """Test behavior when all non-attachment fields are treated as metadata."""
    connector = AirtableConnector(
        base_id=airtable_config.base_id,
        table_name_or_id=airtable_config.table_identifier,
        treat_all_non_attachment_fields_as_metadata=False,
    )
    connector.load_credentials(
        {
            "airtable_access_token": airtable_config.access_token,
        }
    )
    doc_batch_generator = connector.load_from_state()
    doc_batch = [
        doc for doc in next(doc_batch_generator) if not isinstance(doc, HierarchyNode)
    ]
    with pytest.raises(StopIteration):
        next(doc_batch_generator)

    assert len(doc_batch) == 2

    expected_docs = [
        create_test_document(
            id="rec8BnxDLyWeegOuO",
            title="Slow Internet",
            description="The internet connection is very slow.",
            priority="Medium",
            status="In Progress",
            ticket_id="2",
            created_time="2024-12-24T21:02:49.000Z",
            status_last_changed="2024-12-24T21:02:49.000Z",
            days_since_status_change=0,
            assignee="Chris Weaver (chris@onyx.app)",
            submitted_by="Chris Weaver (chris@onyx.app)",
            all_fields_as_metadata=False,
            view_id=BASE_VIEW_ID,
        ),
        create_test_document(
            id="reccSlIA4pZEFxPBg",
            title="Printer Issue",
            description="The office printer is not working.",
            priority="High",
            status="Open",
            ticket_id="1",
            created_time="2024-12-24T21:02:49.000Z",
            status_last_changed="2024-12-24T21:02:49.000Z",
            days_since_status_change=0,
            assignee="Chris Weaver (chris@onyx.app)",
            submitted_by="Chris Weaver (chris@onyx.app)",
            attachments=[
                (
                    "Test.pdf:\ntesting!!!",
                    "https://airtable.com/appCXJqDFS4gea8tn/tblRxFQsTlBBZdRY1/viwVUEJjWPd8XYjh8/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide",
                )
            ],
            all_fields_as_metadata=False,
            view_id=BASE_VIEW_ID,
        ),
    ]

    # Compare documents using the utility function
    compare_documents(doc_batch, expected_docs)


def test_airtable_connector_url(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    airtable_config: AirtableConfig,
) -> None:
    """Test that passing an Airtable URL produces the same results as base_id + table_id."""
    if not airtable_config.table_identifier.startswith("tbl"):
        pytest.skip("URL test requires table ID, not table name")

    url = f"https://airtable.com/{airtable_config.base_id}/{airtable_config.table_identifier}/{BASE_VIEW_ID}"
    connector = AirtableConnector(
        airtable_url=url,
        treat_all_non_attachment_fields_as_metadata=False,
    )
    connector.load_credentials({"airtable_access_token": airtable_config.access_token})

    doc_batch_generator = connector.load_from_state()
    doc_batch = [
        doc for doc in next(doc_batch_generator) if not isinstance(doc, HierarchyNode)
    ]
    with pytest.raises(StopIteration):
        next(doc_batch_generator)

    assert len(doc_batch) == 2

    expected_docs = [
        create_test_document(
            id="rec8BnxDLyWeegOuO",
            title="Slow Internet",
            description="The internet connection is very slow.",
            priority="Medium",
            status="In Progress",
            ticket_id="2",
            created_time="2024-12-24T21:02:49.000Z",
            status_last_changed="2024-12-24T21:02:49.000Z",
            days_since_status_change=0,
            assignee="Chris Weaver (chris@onyx.app)",
            submitted_by="Chris Weaver (chris@onyx.app)",
            all_fields_as_metadata=False,
            view_id=BASE_VIEW_ID,
        ),
        create_test_document(
            id="reccSlIA4pZEFxPBg",
            title="Printer Issue",
            description="The office printer is not working.",
            priority="High",
            status="Open",
            ticket_id="1",
            created_time="2024-12-24T21:02:49.000Z",
            status_last_changed="2024-12-24T21:02:49.000Z",
            days_since_status_change=0,
            assignee="Chris Weaver (chris@onyx.app)",
            submitted_by="Chris Weaver (chris@onyx.app)",
            attachments=[
                (
                    "Test.pdf:\ntesting!!!",
                    f"https://airtable.com/{airtable_config.base_id}/{airtable_config.table_identifier}/{BASE_VIEW_ID}/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide",
                )
            ],
            all_fields_as_metadata=False,
            view_id=BASE_VIEW_ID,
        ),
    ]

    compare_documents(doc_batch, expected_docs)


def test_airtable_connector_index_all(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    airtable_config: AirtableConfig,
) -> None:
    """Test index_all mode discovers all bases/tables and returns documents.

    The test token has access to one base ("Onyx") with three tables:
      - Tickets: 3 records, 2 with content (1 empty record is skipped)
      - Support Categories: 4 records, all with Category Name field
      - Table 3: 3 records, 1 with content (2 empty records are skipped)
    Total expected: 7 documents
    """
    connector = AirtableConnector()
    connector.load_credentials({"airtable_access_token": airtable_config.access_token})

    all_docs: list[Document] = []
    for batch in connector.load_from_state():
        for item in batch:
            if isinstance(item, Document):
                all_docs.append(item)

    # 2 from Tickets + 4 from Support Categories + 1 from Table 3 = 7
    assert len(all_docs) == 7

    docs_by_id = {d.id: d for d in all_docs}

    # Verify all expected document IDs are present
    expected_ids = {
        # Tickets
        "airtable__rec8BnxDLyWeegOuO",
        "airtable__reccSlIA4pZEFxPBg",
        # Support Categories
        "airtable__rec5SgUDcHXcBc8kS",
        "airtable__recD3DQHc0BQkDaqX",
        "airtable__recPHdnWu1Q9ZxyTg",
        "airtable__recWbIElUDz9HjgMd",
        # Table 3
        "airtable__recNalBz02QU1LhbM",
    }
    assert docs_by_id.keys() == expected_ids

    # In index_all mode, semantic identifiers include "Base Name > Table Name: Primary Field"
    assert (
        docs_by_id["airtable__rec8BnxDLyWeegOuO"].semantic_identifier
        == "Onyx > Tickets: Slow Internet"
    )
    assert (
        docs_by_id["airtable__rec5SgUDcHXcBc8kS"].semantic_identifier
        == "Onyx > Support Categories: Software Development"
    )
    assert (
        docs_by_id["airtable__recNalBz02QU1LhbM"].semantic_identifier
        == "Onyx > Table 3: A"
    )

    # Verify hierarchy metadata on a Tickets doc
    tickets_doc = docs_by_id["airtable__rec8BnxDLyWeegOuO"]
    assert tickets_doc.doc_metadata is not None
    hierarchy = tickets_doc.doc_metadata["hierarchy"]
    assert hierarchy["source_path"] == ["Onyx", "Tickets"]
    assert hierarchy["base_id"] == airtable_config.base_id
    assert hierarchy["base_name"] == "Onyx"
    assert hierarchy["table_name"] == "Tickets"

    # Verify hierarchy on a Support Categories doc
    cat_doc = docs_by_id["airtable__rec5SgUDcHXcBc8kS"]
    assert cat_doc.doc_metadata is not None
    assert cat_doc.doc_metadata["hierarchy"]["source_path"] == [
        "Onyx",
        "Support Categories",
    ]


def test_airtable_connector_all_metadata(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    airtable_config: AirtableConfig,
) -> None:
    connector = AirtableConnector(
        base_id=airtable_config.base_id,
        table_name_or_id=airtable_config.table_identifier,
        treat_all_non_attachment_fields_as_metadata=True,
    )
    connector.load_credentials(
        {
            "airtable_access_token": airtable_config.access_token,
        }
    )
    doc_batch_generator = connector.load_from_state()
    doc_batch = [
        doc for doc in next(doc_batch_generator) if not isinstance(doc, HierarchyNode)
    ]
    with pytest.raises(StopIteration):
        next(doc_batch_generator)

    # NOTE: one of the rows has no attachments -> no content -> no document
    assert len(doc_batch) == 1

    expected_docs = [
        create_test_document(
            id="reccSlIA4pZEFxPBg",
            title="Printer Issue",
            description="The office printer is not working.",
            priority="High",
            status="Open",
            # Link to another record is skipped for now
            # category="Software Development",
            ticket_id="1",
            created_time="2024-12-24T21:02:49.000Z",
            status_last_changed="2024-12-24T21:02:49.000Z",
            days_since_status_change=0,
            assignee="Chris Weaver (chris@onyx.app)",
            submitted_by="Chris Weaver (chris@onyx.app)",
            attachments=[
                (
                    "Test.pdf:\ntesting!!!",
                    # hard code link for now
                    "https://airtable.com/appCXJqDFS4gea8tn/tblRxFQsTlBBZdRY1/viwVUEJjWPd8XYjh8/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide",
                )
            ],
            all_fields_as_metadata=True,
            view_id=BASE_VIEW_ID,
        ),
    ]

    # Compare documents using the utility function
    compare_documents(doc_batch, expected_docs)


def test_airtable_connector_with_share_and_view(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    airtable_config: AirtableConfig,
) -> None:
    """Test behavior when using share_id and view_id for URL generation."""
    SHARE_ID = "shrkfjEzDmLaDtK83"

    connector = AirtableConnector(
        base_id=airtable_config.base_id,
        table_name_or_id=airtable_config.table_identifier,
        treat_all_non_attachment_fields_as_metadata=False,
        share_id=SHARE_ID,
        view_id=BASE_VIEW_ID,
    )
    connector.load_credentials(
        {
            "airtable_access_token": airtable_config.access_token,
        }
    )
    doc_batch_generator = connector.load_from_state()
    doc_batch = [
        doc for doc in next(doc_batch_generator) if not isinstance(doc, HierarchyNode)
    ]
    with pytest.raises(StopIteration):
        next(doc_batch_generator)

    assert len(doc_batch) == 2

    expected_docs = [
        create_test_document(
            id="rec8BnxDLyWeegOuO",
            title="Slow Internet",
            description="The internet connection is very slow.",
            priority="Medium",
            status="In Progress",
            ticket_id="2",
            created_time="2024-12-24T21:02:49.000Z",
            status_last_changed="2024-12-24T21:02:49.000Z",
            days_since_status_change=0,
            assignee="Chris Weaver (chris@onyx.app)",
            submitted_by="Chris Weaver (chris@onyx.app)",
            all_fields_as_metadata=False,
            share_id=SHARE_ID,
            view_id=BASE_VIEW_ID,
        ),
        create_test_document(
            id="reccSlIA4pZEFxPBg",
            title="Printer Issue",
            description="The office printer is not working.",
            priority="High",
            status="Open",
            ticket_id="1",
            created_time="2024-12-24T21:02:49.000Z",
            status_last_changed="2024-12-24T21:02:49.000Z",
            days_since_status_change=0,
            assignee="Chris Weaver (chris@onyx.app)",
            submitted_by="Chris Weaver (chris@onyx.app)",
            attachments=[
                (
                    "Test.pdf:\ntesting!!!",
                    (
                        f"https://airtable.com/{airtable_config.base_id}/{SHARE_ID}/"
                        f"{os.environ['AIRTABLE_TEST_TABLE_ID']}/{BASE_VIEW_ID}/reccSlIA4pZEFxPBg/"
                        "fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide"
                    ),
                )
            ],
            all_fields_as_metadata=False,
            share_id=SHARE_ID,
            view_id=BASE_VIEW_ID,
        ),
    ]

    # Compare documents using the utility function
    compare_documents(doc_batch, expected_docs)


================================================
FILE: backend/tests/daily/connectors/bitbucket/conftest.py
================================================
from tests.load_env_vars import load_env_vars


# Load environment variables at the module level
load_env_vars()


================================================
FILE: backend/tests/daily/connectors/bitbucket/test_bitbucket_checkpointed.py
================================================
import os
import time

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.bitbucket.connector import BitbucketConnector
from tests.daily.connectors.utils import load_all_from_connector


@pytest.fixture
def bitbucket_connector_for_checkpoint() -> BitbucketConnector:
    """Daily fixture for Bitbucket checkpointed indexing.

    Env vars:
    - BITBUCKET_EMAIL: Bitbucket account email
    - BITBUCKET_API_TOKEN: Bitbucket app password/token
    - BITBUCKET_WORKSPACE: workspace id
    - BITBUCKET_REPOSITORIES: comma-separated slugs
    - BITBUCKET_PROJECTS: optional comma-separated project keys
    """
    workspace = os.environ["BITBUCKET_WORKSPACE"]
    repositories = os.environ.get("BITBUCKET_REPOSITORIES")
    projects = os.environ.get("BITBUCKET_PROJECTS")

    connector = BitbucketConnector(
        workspace=workspace,
        repositories=repositories,
        projects=projects,
        batch_size=10,
    )

    email = os.environ.get("BITBUCKET_EMAIL")
    token = os.environ.get("BITBUCKET_API_TOKEN")
    if not email or not token:
        pytest.skip("BITBUCKET_EMAIL or BITBUCKET_API_TOKEN not set in environment")

    connector.load_credentials({"bitbucket_email": email, "bitbucket_api_token": token})
    return connector


def test_bitbucket_checkpointed_load(
    bitbucket_connector_for_checkpoint: BitbucketConnector,
) -> None:
    # Use a broad window; results may be empty depending on repository state
    start = 1755004439  # Tue Aug 12 2025 13:13:59 UTC
    end = time.time()

    docs = load_all_from_connector(
        connector=bitbucket_connector_for_checkpoint,
        start=start,
        end=end,
    ).documents

    assert isinstance(docs, list)

    for doc in docs:
        assert doc.source == DocumentSource.BITBUCKET
        assert doc.metadata is not None
        assert doc.metadata.get("object_type") == "PullRequest"
        assert "id" in doc.metadata
        assert "state" in doc.metadata
        assert "title" in doc.metadata
        assert "updated_on" in doc.metadata

        # Basic section checks
        assert len(doc.sections) >= 1
        section = doc.sections[0]
        assert isinstance(section.link, str)
        assert isinstance(section.text, str)


================================================
FILE: backend/tests/daily/connectors/bitbucket/test_bitbucket_slim_connector.py
================================================
import os
import time

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.bitbucket.connector import BitbucketConnector
from onyx.connectors.models import HierarchyNode
from tests.daily.connectors.utils import load_all_from_connector


@pytest.fixture
def bitbucket_connector_for_slim() -> BitbucketConnector:
    workspace = os.environ["BITBUCKET_WORKSPACE"]
    repositories = os.environ.get("BITBUCKET_REPOSITORIES")
    projects = os.environ.get("BITBUCKET_PROJECTS")

    connector = BitbucketConnector(
        workspace=workspace,
        repositories=repositories,
        projects=projects,
        batch_size=10,
    )

    email = os.environ.get("BITBUCKET_EMAIL")
    token = os.environ.get("BITBUCKET_API_TOKEN")
    if not email or not token:
        pytest.skip("BITBUCKET_EMAIL or BITBUCKET_API_TOKEN not set in environment")

    connector.load_credentials({"bitbucket_email": email, "bitbucket_api_token": token})
    return connector


def test_bitbucket_full_ids_subset_of_slim_ids(
    bitbucket_connector_for_slim: BitbucketConnector,
) -> None:
    # Get all full doc IDs from load_from_state
    docs = load_all_from_connector(
        connector=bitbucket_connector_for_slim,
        start=0,
        end=time.time(),
    ).documents
    all_full_doc_ids: set[str] = set([doc.id for doc in docs])

    # Get all doc IDs from the slim connector
    all_slim_doc_ids: set[str] = set()
    for (
        slim_doc_batch
    ) in bitbucket_connector_for_slim.retrieve_all_slim_docs_perm_sync():
        all_slim_doc_ids.update(
            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # The set of full doc IDs should always be a subset of slim doc IDs
    assert all_full_doc_ids.issubset(all_slim_doc_ids)
    # Make sure we actually got some documents
    assert len(all_slim_doc_ids) > 0

    # Basic sanity checks if any docs exist
    if all_slim_doc_ids:
        example_id = next(iter(all_slim_doc_ids))
        assert example_id.startswith(f"{DocumentSource.BITBUCKET.value}:")


================================================
FILE: backend/tests/daily/connectors/blob/test_blob_connector.py
================================================
import os
from unittest.mock import MagicMock
from unittest.mock import patch
from urllib.parse import parse_qs
from urllib.parse import unquote
from urllib.parse import urlparse

import pytest

from onyx.configs.constants import BlobType
from onyx.connectors.blob.connector import BlobStorageConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions


@pytest.fixture
def blob_connector(request: pytest.FixtureRequest) -> BlobStorageConnector:
    """Fixture requires (BlobType, bucket_name) and optional init kwargs.

    Param format: (BlobType, bucket_name, {optional init kwargs})
    - The 3rd element is optional and, if provided, must be a dict.
    - Extra kwargs are passed to BlobStorageConnector.__init__.

    Example:
      @pytest.mark.parametrize(
          "blob_connector",
          [(BlobType.S3, "my-bucket"), (BlobType.S3, "my-bucket", {"prefix": "foo/"})],
          indirect=True,
      )
    """
    try:
        bucket_type, bucket_name, *rest = request.param
    except Exception as e:
        raise AssertionError(
            "blob_connector requires (BlobType, bucket_name, [init_kwargs])"
        ) from e

    init_kwargs = rest[0] if rest else {}
    if rest and not isinstance(init_kwargs, dict):
        raise AssertionError("init_kwargs must be a dict if provided")

    if not isinstance(bucket_type, BlobType):
        bucket_type = BlobType(bucket_type)

    connector = BlobStorageConnector(
        bucket_type=bucket_type, bucket_name=bucket_name, **init_kwargs
    )

    if bucket_type == BlobType.S3:
        creds = {
            "aws_access_key_id": os.environ["AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS"],
            "aws_secret_access_key": os.environ[
                "AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS"
            ],
        }
    elif bucket_type == BlobType.R2:
        creds = {
            "account_id": os.environ["R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS"],
            "r2_access_key_id": os.environ["R2_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS"],
            "r2_secret_access_key": os.environ[
                "R2_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS"
            ],
        }
    elif bucket_type == BlobType.GOOGLE_CLOUD_STORAGE:
        creds = {
            "access_key_id": os.environ["GCS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS"],
            "secret_access_key": os.environ[
                "GCS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS"
            ],
        }
    else:
        # Until we figure out the Oracle log in, this fixture only supports S3, R2, and GCS.
        raise AssertionError(f"Unsupported bucket type: {bucket_type}")

    connector.load_credentials(creds)
    return connector


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
@pytest.mark.parametrize(
    "blob_connector", [(BlobType.S3, "onyx-connector-tests")], indirect=True
)
def test_blob_s3_connector(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    blob_connector: BlobStorageConnector,
) -> None:
    """
    Plain and document file types should be fully indexed.

    Multimedia and unknown file types will be indexed be skipped unless `set_allow_images`
    is called with `True`.

    This is intentional in order to allow searching by just the title even if we can't
    index the file content.
    """
    all_docs: list[Document] = []
    document_batches = blob_connector.load_from_state()
    for doc_batch in document_batches:
        for doc in doc_batch:
            if isinstance(doc, HierarchyNode):
                continue
            all_docs.append(doc)

    assert len(all_docs) == 15

    for doc in all_docs:
        section = doc.sections[0]
        assert isinstance(section, TextSection)

        file_extension = get_file_ext(doc.semantic_identifier)
        if file_extension in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS:
            assert len(section.text) > 0
            continue

        # unknown extension
        assert len(section.text) == 0


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
@pytest.mark.parametrize(
    "blob_connector", [(BlobType.S3, "s3-role-connector-test")], indirect=True
)
def test_blob_s3_cross_region_and_citation_link(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    blob_connector: BlobStorageConnector,
) -> None:
    """Buckets in a different region should be accessible and links should reflect the correct region.

    Validates that using the same credentials we can access a bucket in a
    different AWS region and that the generated object URL includes the bucket's
    region and is a valid S3 dashboard URL.
    """

    assert blob_connector.bucket_region == "ap-south-1"

    # Load documents and validate the single object + its link
    all_docs: list[Document] = []
    for doc_batch in blob_connector.load_from_state():
        all_docs.extend(
            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # The test bucket contains exactly one object named "Chapter 6.pdf"
    assert len(all_docs) == 1
    doc = all_docs[0]
    assert doc.semantic_identifier == "Chapter 6.pdf"

    # Validate link
    assert len(doc.sections) >= 1
    link = doc.sections[0].link
    assert link is not None and isinstance(link, str) and len(link) > 0

    parsed = urlparse(link)
    # Expect the link to be the AWS S3 console object URL
    assert parsed.netloc == "s3.console.aws.amazon.com"
    assert parsed.path == "/s3/object/s3-role-connector-test"

    # Query should include region and prefix
    query = parse_qs(parsed.query)
    assert query.get("region") == ["ap-south-1"]
    assert "prefix" in query and len(query["prefix"]) == 1
    prefix_val = query["prefix"][0]
    # The prefix (object key) should decode to the filename
    decoded_prefix = unquote(prefix_val)
    assert decoded_prefix == "Chapter 6.pdf" or decoded_prefix.endswith(
        "/Chapter 6.pdf"
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
@pytest.mark.parametrize(
    "blob_connector", [(BlobType.R2, "asia-pacific-bucket")], indirect=True
)
def test_blob_r2_connector(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    blob_connector: BlobStorageConnector,
) -> None:
    """Validate basic R2 connector creation and document loading"""

    all_docs: list[Document] = []
    for doc_batch in blob_connector.load_from_state():
        all_docs.extend(
            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]
        )

    assert len(all_docs) >= 1
    doc = all_docs[0]
    assert len(doc.sections) >= 1


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
@pytest.mark.parametrize(
    "blob_connector",
    [(BlobType.R2, "onyx-daily-connector-tests", {"european_residency": True})],
    indirect=True,
)
def test_blob_r2_eu_residency_connector(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    blob_connector: BlobStorageConnector,
) -> None:
    """Validate R2 connector with European residency setting"""

    all_docs: list[Document] = []
    for doc_batch in blob_connector.load_from_state():
        all_docs.extend(
            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]
        )

    assert len(all_docs) >= 1
    doc = all_docs[0]
    assert len(doc.sections) >= 1


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
@pytest.mark.parametrize(
    "blob_connector", [(BlobType.GOOGLE_CLOUD_STORAGE, "onyx-test-1")], indirect=True
)
def test_blob_gcs_connector(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    blob_connector: BlobStorageConnector,
) -> None:
    all_docs: list[Document] = []
    for doc_batch in blob_connector.load_from_state():
        all_docs.extend(
            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # At least one object from the test bucket
    assert len(all_docs) >= 1
    doc = all_docs[0]
    assert len(doc.sections) >= 1


================================================
FILE: backend/tests/daily/connectors/coda/README.md
================================================
# Coda Connector Test Suite

## Overview

The `test_coda_connector.py` file contains comprehensive integration tests for the Coda connector. These tests validate that the connector properly:
- Authenticates with the Coda API
- Retrieves documents, pages, and tables
- Generates properly structured Onyx `Document` objects
- Handles batch processing correctly
- Supports workspace scoping
- Polls for recent updates
- Handles error cases gracefully

## Prerequisites

1. **Coda API Access**: You need a valid Coda account with at least one workspace containing documents, pages, or tables
2. **Coda Bearer Token**: Generate an API token from your Coda account settings
3. **Python Environment**: Backend dependencies installed (see backend/requirements)
4. **Test Data**: Ideally, your Coda workspace should have:
   - At least one document
   - At least one page within a document
   - At least one table within a document

## Environment Variables

The test suite requires the following environment variables:

### Required
- **`CODA_BEARER_TOKEN`**: Your Coda API bearer token
  - Get this from: Coda Account Settings → API Settings → Generate API Token
  - Without this, tests will be skipped

### Optional
- **`CODA_BASE_URL`**: The Coda API base URL
  - Default: `https://coda.io/apis/v1`
  - Only override if using a different API endpoint

- **`CODA_WORKSPACE_ID`**: A specific workspace ID to test workspace scoping
  - If not provided, workspace-scoped tests will be skipped
  - Find this by inspecting the Coda API response or your workspace URL

## Running the Tests

### Method 1: Run All Tests in the File

From the `backend/` directory:

```bash
# Set environment variables and run all tests
export CODA_BEARER_TOKEN="your_token_here"
pytest -v -s tests/daily/connectors/coda/test_coda_connector.py
```

### Method 2: Run a Specific Test Class

```bash
# Run only validation tests
export CODA_BEARER_TOKEN="your_token_here"
pytest -v -s tests/daily/connectors/coda/test_coda_connector.py::TestCodaConnectorValidation

# Run only load_from_state tests
pytest -v -s tests/daily/connectors/coda/test_coda_connector.py::TestLoadFromState
```

### Method 3: Run a Single Test

```bash
# Run a specific test function
export CODA_BEARER_TOKEN="your_token_here"
pytest -v -s tests/daily/connectors/coda/test_coda_connector.py::TestLoadFromState::test_document_count_matches_expected
```

### Method 4: Using an Environment File

Create a `.env` file in `backend/tests/daily/connectors/coda/`:

```bash
# .env
CODA_BEARER_TOKEN=your_token_here
CODA_WORKSPACE_ID=your_workspace_id  # Optional
```

Then run with dotenv:

```bash
cd backend
python -m dotenv -f tests/daily/connectors/coda/.env run -- pytest -v -s tests/daily/connectors/coda/test_coda_connector.py
```

### Method 5: Direct Execution

The test file can be run directly:

```bash
export CODA_BEARER_TOKEN="your_token_here"
cd backend/tests/daily/connectors/coda
python test_coda_connector.py
```

## Test Structure

### Test Classes

1. **`TestCodaConnectorValidation`**
   - Validates connector settings and credentials
   - Tests authentication success and failure cases
   - Tests workspace-scoped connector validation

2. **`TestLoadFromState`**
   - Tests full document retrieval via `load_from_state()`
   - Validates batch sizes, document counts, and structure
   - Checks document fields, metadata, and content
   - Verifies both page and table document generation
   - Tests the `index_page_content` configuration flag

3. **`TestPollSource`**
   - Tests incremental updates via `poll_source()`
   - Validates time-range filtering
   - Checks that only updated documents are returned

4. **`TestWorkspaceScoping`**
   - Tests the `workspace_id` filtering functionality
   - Validates that scoped connectors only retrieve documents from the specified workspace

5. **`TestErrorHandling`**
   - Tests graceful handling of edge cases
   - Validates behavior with inaccessible content or empty tables

## Common Test Patterns

### Fixtures

The test suite uses pytest fixtures for setup:

- **`coda_credentials`**: Loads and validates credentials from environment variables
- **`connector`**: Creates a standard CodaConnector instance
- **`workspace_scoped_connector`**: Creates a workspace-scoped connector (if `CODA_WORKSPACE_ID` is set)
- **`reference_data`**: Fetches ground truth data from the Coda API for validation

### Skipped Tests

Tests are automatically skipped when:
- `CODA_BEARER_TOKEN` is not set
- `CODA_WORKSPACE_ID` is not set (for workspace-scoped tests)
- No documents, pages, or tables are found in the workspace

## Troubleshooting

### Tests are Skipped

**Issue**: Tests show as "SKIPPED" instead of running

**Solutions**:
- Ensure `CODA_BEARER_TOKEN` is set and valid
- Verify your Coda workspace has at least one document with pages or tables
- For workspace tests, ensure `CODA_WORKSPACE_ID` is set

### Authentication Errors

**Issue**: Tests fail with authentication errors

**Solutions**:
- Verify your bearer token is valid and hasn't expired
- Check that the token has appropriate API permissions
- Ensure you're not hitting API rate limits

### Document Count Mismatches

**Issue**: Tests fail with "Expected X documents but got Y"

**Possible Causes**:
- API rate limiting causing partial data retrieval
- Network issues during test execution
- Changes to workspace data during test execution
- Permission issues preventing access to some documents

### Empty Content Errors

**Issue**: Tests fail due to empty document content

**Possible Causes**:
- Pages without accessible content (permission issues)
- Empty tables or pages in your workspace
- The `index_page_content` flag set incorrectly

## Test Execution Tips

1. **Run tests during low-traffic times**: API rate limits may affect test reliability
2. **Use a dedicated test workspace**: Avoid running tests on production workspaces with changing data
3. **Check test output verbosity**: Use `-v` for verbose test names, `-s` to see print statements
4. **Isolate failing tests**: Run specific test classes or functions to debug issues
5. **Review fixture output**: The `reference_data` fixture prints warnings about API access issues

## CI/CD Integration

When integrating these tests into CI/CD pipelines:

```yaml
# Example GitHub Actions configuration
- name: Run Coda Connector Tests
  env:
    CODA_BEARER_TOKEN: ${{ secrets.CODA_BEARER_TOKEN }}
    CODA_WORKSPACE_ID: ${{ secrets.CODA_WORKSPACE_ID }}
  run: |
    cd backend
    pytest -v tests/daily/connectors/coda/test_coda_connector.py
```

Store credentials as encrypted secrets in your CI/CD platform.

## Expected Test Duration

- Full test suite: ~30-60 seconds (depending on workspace size and API latency)
- Individual test classes: ~5-15 seconds
- Validation tests: <5 seconds

## Additional Resources

- [Coda API Documentation](https://coda.io/developers/apis/v1)
- [Onyx Connector Documentation](../../../../onyx/connectors/README.md)
- [pytest Documentation](https://docs.pytest.org/)


================================================
FILE: backend/tests/daily/connectors/coda/test_coda_connector.py
================================================
import os
import time
from collections.abc import Generator
from typing import Any

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.coda.connector import CodaConnector
from onyx.connectors.exceptions import CredentialInvalidError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode


def connector_doc_generator(
    connector: CodaConnector,
) -> Generator[Document, None, None]:
    for batch in connector.load_from_state():
        for doc in batch:
            if isinstance(doc, HierarchyNode):
                continue
            yield doc


@pytest.fixture
def coda_credentials() -> dict[str, str]:
    """Fixture to get and validate Coda credentials."""
    bearer_token = os.environ.get("CODA_BEARER_TOKEN")

    if not bearer_token:
        pytest.skip("CODA_BEARER_TOKEN not set")

    return {
        "coda_bearer_token": bearer_token,
    }


@pytest.fixture
def connector(coda_credentials: dict[str, str]) -> CodaConnector:
    """Fixture to create and authenticate connector."""
    conn = CodaConnector(batch_size=5, index_page_content=True)
    conn.load_credentials(coda_credentials)
    return conn


@pytest.fixture
def workspace_scoped_connector(coda_credentials: dict[str, str]) -> CodaConnector:
    """Fixture to create connector scoped to a specific workspace (if CODA_WORKSPACE_ID is set)."""
    workspace_id = os.environ.get("CODA_WORKSPACE_ID")
    if not workspace_id:
        pytest.skip("CODA_WORKSPACE_ID not set - skipping workspace-scoped tests")

    conn = CodaConnector(
        batch_size=5, index_page_content=True, workspace_id=workspace_id
    )
    conn.load_credentials(coda_credentials)
    return conn


@pytest.fixture
def reference_data(connector: CodaConnector) -> dict[str, Any]:
    """Fixture to fetch reference data from API for validation."""
    all_docs = connector._list_all_docs()

    if not all_docs:
        pytest.skip("No docs found in Coda workspace")

    expected_page_count = 0
    expected_table_count = 0
    pages_by_doc = {}
    tables_by_doc = {}

    for doc in all_docs:
        doc_id = doc.id

        try:
            pages = connector._list_pages_in_doc(doc_id)
            pages_by_doc[doc_id] = pages
            expected_page_count += len(pages)
        except Exception as e:
            print(f"Warning: Could not fetch pages for doc {doc_id}: {e}")
            pages_by_doc[doc_id] = []

        try:
            tables = connector._list_tables(doc_id)
            tables_by_doc[doc_id] = tables
            expected_table_count += len(tables)
        except Exception as e:
            print(f"Warning: Could not fetch tables for doc {doc_id}: {e}")
            tables_by_doc[doc_id] = []

    total_expected_documents = expected_page_count + expected_table_count

    if total_expected_documents == 0:
        pytest.skip("No pages or tables found in Coda workspace")

    return {
        "docs": all_docs,
        "total_pages": expected_page_count,
        "total_tables": expected_table_count,
        "total_documents": total_expected_documents,
        "pages_by_doc": pages_by_doc,
        "tables_by_doc": tables_by_doc,
    }


class TestCodaConnectorValidation:
    """Test suite for connector validation and credential handling."""

    def test_validate_connector_settings_success(
        self, connector: CodaConnector
    ) -> None:
        """Test that validate_connector_settings succeeds with valid credentials."""
        # Should not raise any exceptions
        connector.validate_connector_settings()

    def test_validate_workspace_scoped_connector(
        self, workspace_scoped_connector: CodaConnector
    ) -> None:
        """Test that workspace-scoped connector validates successfully."""
        workspace_scoped_connector.validate_connector_settings()

    def test_load_credentials_invalid_token(self) -> None:
        """Test that invalid credentials are rejected."""
        conn = CodaConnector()

        with pytest.raises(CredentialInvalidError):
            conn.load_credentials(
                {
                    "coda_bearer_token": "invalid_token_12345",
                }
            )


class TestLoadFromState:
    """Test suite for load_from_state functionality."""

    def test_returns_generator(self, connector: CodaConnector) -> None:
        """Test that load_from_state returns a generator."""
        gen = connector.load_from_state()
        assert isinstance(gen, Generator), "load_from_state should return a Generator"

    def test_batch_sizes_respect_config(
        self,
        connector: CodaConnector,
        reference_data: dict[str, Any],  # noqa: ARG002
    ) -> None:
        """Test that batches respect the configured batch_size."""
        batch_size = connector.batch_size
        gen = connector.load_from_state()

        batch_sizes = []
        for batch in gen:
            batch_sizes.append(len(batch))
            assert (
                len(batch) <= batch_size
            ), f"Batch size {len(batch)} exceeds configured {batch_size}"

        for i, size in enumerate(batch_sizes[:-1]):
            assert (
                size == batch_size
            ), f"Non-final batch {i} has size {size}, expected {batch_size}"

        # Last batch may be smaller or equal
        if batch_sizes:
            assert batch_sizes[-1] <= batch_size

    def test_document_count_matches_expected(
        self, connector: CodaConnector, reference_data: dict[str, Any]
    ) -> None:
        """Test that total documents match expected pages + tables."""
        gen = connector.load_from_state()

        total_documents = sum(len(batch) for batch in gen)
        expected_count = reference_data["total_documents"]

        assert total_documents == expected_count, (
            f"Expected {expected_count} documents "
            f"({reference_data['total_pages']} pages + "
            f"{reference_data['total_tables']} tables) "
            f"but got {total_documents}"
        )

    def test_document_required_fields(
        self,
        connector: CodaConnector,
        reference_data: dict[str, Any],  # noqa: ARG002
    ) -> None:
        """Test that all documents have required fields with valid values."""
        gen = connector.load_from_state()

        for batch in gen:
            for doc in batch:
                assert isinstance(doc, Document)

                assert doc.id is not None, "Document ID should not be None"
                assert doc.id.startswith(
                    "coda-"
                ), "Document ID should start with 'coda-'"
                assert (
                    doc.source == DocumentSource.CODA
                ), "Document source should be CODA"
                assert (
                    doc.semantic_identifier is not None
                ), "Semantic identifier should not be None"
                assert (
                    doc.doc_updated_at is not None
                ), "doc_updated_at should not be None"

                assert (
                    len(doc.sections) > 0
                ), "Document should have at least one section"
                for section in doc.sections:
                    assert section.text is not None, "Section text should not be None"
                    assert len(section.text) > 0, "Section text should not be empty"
                    assert section.link is not None, "Section link should not be None"
                    assert section.link.startswith(
                        "https://"
                    ), "Section link should be a valid URL"

                assert "doc_id" in doc.metadata, "Metadata should contain doc_id"
                assert (
                    "browser_link" in doc.metadata
                ), "Metadata should contain browser_link"

    def test_document_types(
        self, connector: CodaConnector, reference_data: dict[str, Any]
    ) -> None:
        """Test that both page and table documents are generated correctly."""
        page_docs = []
        table_docs = []

        for doc in connector_doc_generator(connector):
            if "coda-page-" in doc.id:
                page_docs.append(doc)
                assert "content_type" in doc.metadata
            elif "coda-table-" in doc.id:
                table_docs.append(doc)
                assert "row_count" in doc.metadata

        # Verify we found both types (if both exist in the workspace)
        if reference_data["total_pages"] > 0:
            assert len(page_docs) > 0, "Should have found page documents"

        if reference_data["total_tables"] > 0:
            assert len(table_docs) > 0, "Should have found table documents"

        # Verify counts match
        assert (
            len(page_docs) == reference_data["total_pages"]
        ), f"Expected {reference_data['total_pages']} page documents, got {len(page_docs)}"
        assert (
            len(table_docs) == reference_data["total_tables"]
        ), f"Expected {reference_data['total_tables']} table documents, got {len(table_docs)}"

    def test_no_duplicate_documents(
        self,
        connector: CodaConnector,
        reference_data: dict[str, Any],  # noqa: ARG002
    ) -> None:
        """Test that no documents are yielded twice."""
        document_ids = []
        for doc in connector_doc_generator(connector):
            document_ids.append(doc.id)

        unique_ids = set(document_ids)
        assert len(document_ids) == len(
            unique_ids
        ), f"Found {len(document_ids) - len(unique_ids)} duplicate documents"

    def test_all_docs_processed(
        self, connector: CodaConnector, reference_data: dict[str, Any]
    ) -> None:
        """Test that content from all docs are included."""
        processed_doc_ids = set()
        for doc in connector_doc_generator(connector):
            doc_id = doc.metadata.get("doc_id")
            processed_doc_ids.add(doc_id)

        expected_doc_ids = {doc.id for doc in reference_data["docs"]}

        expected_doc_ids_with_content = {
            doc_id
            for doc_id in expected_doc_ids
            if len(reference_data["pages_by_doc"].get(doc_id, [])) > 0
            or len(reference_data["tables_by_doc"].get(doc_id, [])) > 0
        }

        assert (
            processed_doc_ids == expected_doc_ids_with_content
        ), f"Not all docs with content were processed. Expected {expected_doc_ids_with_content}, got {processed_doc_ids}"

    def test_document_content_not_empty(
        self,
        connector: CodaConnector,
        reference_data: dict[str, Any],  # noqa: ARG002
    ) -> None:
        """Test that all documents have meaningful content."""
        for doc in connector_doc_generator(connector):
            assert doc.semantic_identifier, "Semantic identifier should not be empty"
            assert (
                len(doc.semantic_identifier) > 0
            ), "Semantic identifier should have content"

            total_text_length = sum(len(section.text or "") for section in doc.sections)
            assert total_text_length > 0, f"Document {doc.id} has no content"

    def test_page_content_indexing(self, coda_credentials: dict[str, str]) -> None:
        """Test that index_page_content flag works correctly."""
        # page indexing disabled
        conn_no_content = CodaConnector(batch_size=5, index_page_content=False)
        conn_no_content.load_credentials(coda_credentials)

        # page indexing enabled
        conn_with_content = CodaConnector(batch_size=5, index_page_content=True)
        conn_with_content.load_credentials(coda_credentials)

        docs_no_content = []
        for batch in conn_no_content.load_from_state():
            for doc in batch:
                if isinstance(doc, HierarchyNode):
                    continue
                if "coda-page-" in doc.id:
                    docs_no_content.append(doc)
                    break
            if docs_no_content:
                break

        docs_with_content = []
        for batch in conn_with_content.load_from_state():
            for doc in batch:
                if isinstance(doc, HierarchyNode):
                    continue
                if "coda-page-" in doc.id:
                    docs_with_content.append(doc)
                    break
            if docs_with_content:
                break

        if docs_no_content and docs_with_content:
            no_content_length = sum(
                len(s.text or "") for s in docs_no_content[0].sections
            )
            with_content_length = sum(
                len(s.text or "") for s in docs_with_content[0].sections
            )

            assert (
                with_content_length >= no_content_length
            ), "Content-indexed page should have at least as much text as non-indexed"


class TestPollSource:
    """Test suite for poll_source functionality."""

    def test_poll_source_returns_generator(self, connector: CodaConnector) -> None:
        """Test that poll_source returns a generator."""
        current_time = time.time()
        start_time = current_time - 86400  # 24 hours

        gen = connector.poll_source(start_time, current_time)
        assert isinstance(gen, Generator), "poll_source should return a Generator"

    def test_poll_source_recent_updates(self, connector: CodaConnector) -> None:
        """Test polling for recently updated documents."""
        current_time = time.time()
        start_time = current_time - (86400 * 30)

        gen = connector.poll_source(start_time, current_time)

        documents = []
        for batch in gen:
            documents.extend(batch)

        # All returned documents should be updated within the time range
        for doc in documents:
            if isinstance(doc, HierarchyNode):
                continue
            assert doc.doc_updated_at is not None, "doc_updated_at should not be None"
            doc_timestamp = doc.doc_updated_at.timestamp()
            assert (
                start_time < doc_timestamp <= current_time
            ), f"Document {doc.id} updated at {doc_timestamp} is outside range [{start_time}, {current_time}]"

    def test_poll_source_no_updates_in_range(self, connector: CodaConnector) -> None:
        """Test polling with a time range that has no updates."""
        end_time = time.time() - (86400 * 365)  # 1 year ago
        start_time = end_time - 86400  # 1 day before that

        gen = connector.poll_source(start_time, end_time)

        documents = []
        for batch in gen:
            documents.extend(batch)

        # Should return no documents (unless workspace is very old)
        print(f"Found {len(documents)} documents updated over a year ago")
        assert len(documents) == 0

    def test_poll_source_batch_sizes(self, connector: CodaConnector) -> None:
        """Test that poll_source respects batch sizes."""
        current_time = time.time()
        start_time = current_time - (86400 * 30)

        batch_size = connector.batch_size
        gen = connector.poll_source(start_time, current_time)

        for batch in gen:
            assert (
                len(batch) <= batch_size
            ), f"Batch size {len(batch)} exceeds configured {batch_size}"


class TestWorkspaceScoping:
    """Test suite for workspace_id scoping functionality."""

    def test_workspace_scoped_loads_subset(
        self,
        connector: CodaConnector,
        workspace_scoped_connector: CodaConnector,
        reference_data: dict[str, Any],  # noqa: ARG002
    ) -> None:
        """Test that workspace-scoped connector loads a subset of documents."""
        all_docs = []
        for batch in connector.load_from_state():
            all_docs.extend(batch)

        scoped_docs = []
        for batch in workspace_scoped_connector.load_from_state():
            scoped_docs.extend(batch)

        # Scoped should be <= all docs
        assert len(scoped_docs) <= len(
            all_docs
        ), "Workspace-scoped connector should return same or fewer documents"

        workspace_id = workspace_scoped_connector.workspace_id
        for doc in scoped_docs:
            if isinstance(doc, HierarchyNode):
                continue
            doc_id = doc.metadata.get("doc_id")
            assert isinstance(doc_id, str), "doc_id should be a string"
            coda_doc = workspace_scoped_connector._get_doc(doc_id)
            assert (
                coda_doc.workspace_id == workspace_id
            ), f"Document {doc_id} has workspace {coda_doc.workspace_id}, expected {workspace_id}"


class TestErrorHandling:
    """Test suite for error handling and edge cases."""

    def test_handles_missing_page_content_gracefully(
        self, connector: CodaConnector
    ) -> None:
        """Test that connector handles pages without accessible content."""
        gen = connector.load_from_state()

        documents = []
        for batch in gen:
            documents.extend(batch)

        assert (
            len(documents) > 0
        ), "Should yield documents even if some content is inaccessible"

    def test_handles_empty_tables_gracefully(self, connector: CodaConnector) -> None:
        """Test that connector handles tables with no rows."""
        for doc in connector_doc_generator(connector):
            if "coda-table-" in doc.id:
                assert len(doc.sections) > 0, "Empty table should still have a section"
                if doc.metadata.get("row_count") == "0":
                    assert (
                        len(doc.sections) == 1
                    ), "Empty table should have exactly one section"


if __name__ == "__main__":
    pytest.main([__file__, "-v", "-s"])


================================================
FILE: backend/tests/daily/connectors/confluence/models.py
================================================
from pydantic import BaseModel

from ee.onyx.db.external_perm import ExternalUserGroup


class ExternalUserGroupSet(BaseModel):
    """A version of ExternalUserGroup that uses a set for user_emails to avoid order-dependent comparisons."""

    id: str
    user_emails: set[str]
    gives_anyone_access: bool

    @classmethod
    def from_model(
        cls, external_user_group: ExternalUserGroup
    ) -> "ExternalUserGroupSet":
        """Convert from ExternalUserGroup to ExternalUserGroupSet."""
        return cls(
            id=external_user_group.id,
            user_emails=set(external_user_group.user_emails),
            gives_anyone_access=external_user_group.gives_anyone_access,
        )


================================================
FILE: backend/tests/daily/connectors/confluence/test_confluence_basic.py
================================================
import os
import time
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.confluence.utils import AttachmentProcessingResult
from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider
from onyx.connectors.models import Document
from tests.daily.connectors.utils import load_all_from_connector


def _make_connector(
    space: str, access_token: str, scoped_token: bool = False
) -> ConfluenceConnector:
    connector = ConfluenceConnector(
        wiki_base=os.environ["CONFLUENCE_TEST_SPACE_URL"],
        space=space,
        is_cloud=os.environ.get("CONFLUENCE_IS_CLOUD", "true").lower() == "true",
        page_id=os.environ.get("CONFLUENCE_TEST_PAGE_ID", ""),
        scoped_token=scoped_token,
    )

    credentials_provider = OnyxStaticCredentialsProvider(
        None,
        DocumentSource.CONFLUENCE,
        {
            "confluence_username": os.environ["CONFLUENCE_USER_NAME"],
            "confluence_access_token": access_token,
        },
    )
    connector.set_credentials_provider(credentials_provider)
    return connector


@pytest.fixture
def confluence_connector(space: str) -> ConfluenceConnector:
    return _make_connector(space, os.environ["CONFLUENCE_ACCESS_TOKEN"].strip())


@pytest.fixture
def confluence_connector_scoped(space: str) -> ConfluenceConnector:
    return _make_connector(
        space, os.environ["CONFLUENCE_ACCESS_TOKEN_SCOPED"].strip(), scoped_token=True
    )


@pytest.mark.parametrize("space", [os.getenv("CONFLUENCE_TEST_SPACE") or "DailyConne"])
@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_confluence_connector_basic(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    confluence_connector: ConfluenceConnector,
) -> None:
    _test_confluence_connector_basic(confluence_connector)


@pytest.mark.parametrize("space", [os.getenv("CONFLUENCE_TEST_SPACE") or "DailyConne"])
@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_confluence_connector_basic_scoped(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    confluence_connector_scoped: ConfluenceConnector,
) -> None:
    _test_confluence_connector_basic(
        confluence_connector_scoped, expect_attachments=True
    )


def _test_confluence_connector_basic(
    confluence_connector: ConfluenceConnector, expect_attachments: bool = True
) -> None:
    confluence_connector.set_allow_images(False)
    result = load_all_from_connector(confluence_connector, 0, time.time())
    doc_batch = result.documents
    hierarchy_nodes = result.hierarchy_nodes

    assert len(doc_batch) == (3 if expect_attachments else 2)

    # Hierarchy structure:
    # - Space "DailyConne" (root)
    #   - Page "DailyConnectorTestSpace Home" (has attachments, so becomes hierarchy node)
    #     - Attachment "small-file.txt"
    #   - Page "Page Within A Page" (no children/attachments, not a hierarchy node)
    expected_hierarchy_count = 2 if expect_attachments else 1
    assert len(hierarchy_nodes) == expected_hierarchy_count, (
        f"Expected {expected_hierarchy_count} hierarchy nodes but got {len(hierarchy_nodes)}. "
        f"Nodes: {[(n.raw_node_id, n.node_type, n.display_name) for n in hierarchy_nodes]}"
    )

    # Verify hierarchy node structure
    space_node = next(
        (n for n in hierarchy_nodes if n.node_type.value == "space"), None
    )
    assert space_node is not None, "Space hierarchy node not found"
    assert space_node.raw_node_id == "DailyConne"
    assert space_node.display_name == "DailyConnectorTestSpace"
    assert space_node.raw_parent_id is None  # Space is root

    if expect_attachments:
        home_page_node = next(
            (n for n in hierarchy_nodes if n.node_type.value == "page"), None
        )
        assert home_page_node is not None, "Home page hierarchy node not found"
        assert home_page_node.display_name == "DailyConnectorTestSpace Home"
        assert home_page_node.raw_parent_id == "DailyConne"  # Parent is the space

    page_within_a_page_doc: Document | None = None
    page_doc: Document | None = None
    small_file_doc: Document | None = None

    for doc in doc_batch:
        if doc.semantic_identifier == "DailyConnectorTestSpace Home":
            page_doc = doc
        elif doc.semantic_identifier == "Page Within A Page":
            page_within_a_page_doc = doc
        elif doc.semantic_identifier == "small-file.txt":
            small_file_doc = doc
        else:
            print(f"Unexpected doc: {doc.semantic_identifier}")

    assert page_within_a_page_doc is not None
    assert page_within_a_page_doc.semantic_identifier == "Page Within A Page"
    assert page_within_a_page_doc.primary_owners
    assert page_within_a_page_doc.primary_owners[0].email == "hagen@danswer.ai"
    assert (
        page_within_a_page_doc.id
        == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/pages/200769540/Page+Within+A+Page"
    )
    assert len(page_within_a_page_doc.sections) == 1

    page_within_a_page_section = page_within_a_page_doc.sections[0]
    page_within_a_page_text = "@Chris Weaver loves cherry pie"
    assert page_within_a_page_section.text == page_within_a_page_text
    assert (
        page_within_a_page_section.link
        == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/pages/200769540/Page+Within+A+Page"
    )

    assert page_doc is not None
    assert page_doc.semantic_identifier == "DailyConnectorTestSpace Home"
    assert (
        page_doc.id == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/overview"
    )
    assert page_doc.metadata["labels"] == ["testlabel"]
    assert page_doc.primary_owners
    assert page_doc.primary_owners[0].email == "hagen@danswer.ai"
    assert (
        len(page_doc.sections) == 1
    )  # just page text, attachment text is separate doc

    page_section = page_doc.sections[0]
    assert (
        page_section.text
        == "test123 "
        + page_within_a_page_text
        + "\n<attachment>small-file.txt</attachment>\n<attachment>big-file.txt</attachment>"
    )
    assert (
        page_section.link
        == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/overview"
    )

    if expect_attachments:
        assert small_file_doc is not None
        text_attachment_section = small_file_doc.sections[0]
        assert text_attachment_section.text == "small"
        assert text_attachment_section.link
        assert text_attachment_section.link.split("?")[0].endswith("small-file.txt")


@pytest.mark.parametrize("space", ["MI"])
@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_confluence_connector_skip_images(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    confluence_connector: ConfluenceConnector,
) -> None:
    confluence_connector.set_allow_images(False)
    result = load_all_from_connector(confluence_connector, 0, time.time())
    doc_batch = result.documents
    hierarchy_nodes = result.hierarchy_nodes

    assert len(doc_batch) == 8
    assert sum(len(doc.sections) for doc in doc_batch) == 8

    # Hierarchy structure for MI space (when images are skipped):
    # - Space "MI" (Many Images)
    #   - Page "Many Images" (home page, has children)
    #     - Page "Image formats" (has children - the image pages)
    # Note: Image pages themselves don't become hierarchy nodes since images are skipped
    assert len(hierarchy_nodes) == 3, (
        f"Expected 3 hierarchy nodes but got {len(hierarchy_nodes)}. "
        f"Nodes: {[(n.raw_node_id, n.node_type, n.display_name) for n in hierarchy_nodes]}"
    )


def mock_process_image_attachment(
    *args: Any,  # noqa: ARG001
    **kwargs: Any,  # noqa: ARG001
) -> AttachmentProcessingResult:
    """We need this mock to bypass DB access happening in the connector. Which shouldn't
    be done as a rule to begin with, but life is not perfect. Fix it later"""

    return AttachmentProcessingResult(
        text="Hi_text",
        file_name="Hi_filename",
        error=None,
    )


@pytest.mark.parametrize("space", ["MI"])
@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
@patch(
    "onyx.connectors.confluence.utils._process_image_attachment",
    side_effect=mock_process_image_attachment,
)
def test_confluence_connector_allow_images(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    mock_process_image_attachment: MagicMock,  # noqa: ARG001
    confluence_connector: ConfluenceConnector,
) -> None:
    confluence_connector.set_allow_images(True)

    result = load_all_from_connector(confluence_connector, 0, time.time())
    doc_batch = result.documents
    hierarchy_nodes = result.hierarchy_nodes

    assert len(doc_batch) == 12
    assert sum(len(doc.sections) for doc in doc_batch) == 12

    # Hierarchy structure for MI space (when images are allowed):
    # - Space "MI" (Many Images)
    #   - Page "Many Images" (home page)
    #     - Page "Image formats" (has children)
    #     - Page "Dunder Mifflin Org Chart" (has image attachments)
    #     - Page "List of Joey's Favorite Objects" (has image attachments)
    #     - Page "Content" (has image attachments)
    # Pages with image attachments become hierarchy nodes because attachments reference them
    assert len(hierarchy_nodes) == 6, (
        f"Expected 6 hierarchy nodes but got {len(hierarchy_nodes)}. "
        f"Nodes: {[(n.raw_node_id, n.node_type, n.display_name) for n in hierarchy_nodes]}"
    )


================================================
FILE: backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py
================================================
import os
import time
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from ee.onyx.external_permissions.confluence.doc_sync import confluence_doc_sync
from onyx.access.models import DocExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider
from onyx.connectors.models import HierarchyNode
from onyx.db.models import ConnectorCredentialPair
from onyx.db.utils import DocumentRow
from onyx.db.utils import SortOrder
from tests.daily.connectors.utils import load_all_from_connector


@pytest.fixture
def confluence_connector() -> ConfluenceConnector:
    connector = ConfluenceConnector(
        wiki_base="https://danswerai.atlassian.net",
        is_cloud=True,
    )

    credentials_provider = OnyxStaticCredentialsProvider(
        None,
        DocumentSource.CONFLUENCE,
        {
            "confluence_username": os.environ["CONFLUENCE_USER_NAME"],
            "confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"],
        },
    )
    connector.set_credentials_provider(credentials_provider)
    return connector


# This should never fail because even if the docs in the cloud change,
# the full doc ids retrieved should always be a subset of the slim doc ids
@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_confluence_connector_permissions(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    confluence_connector: ConfluenceConnector,
    enable_ee: None,  # noqa: ARG001
) -> None:
    # Get all doc IDs from the full connector
    all_full_doc_ids = set()
    result = load_all_from_connector(confluence_connector, 0, time.time())
    doc_batch = result.documents
    hierarchy_nodes = result.hierarchy_nodes
    all_full_doc_ids.update([doc.id for doc in doc_batch])

    # Verify hierarchy nodes are returned and have valid structure
    # Note: The exact count depends on the current state of the Confluence instance
    assert len(hierarchy_nodes) > 0, "Expected at least some hierarchy nodes"

    # Verify all space nodes have no parent and all page nodes have a parent
    for node in hierarchy_nodes:
        if node.node_type.value == "space":
            assert (
                node.raw_parent_id is None
            ), f"Space node {node.raw_node_id} should have no parent"
        elif node.node_type.value == "page":
            assert (
                node.raw_parent_id is not None
            ), f"Page node {node.raw_node_id} should have a parent"

    # Get all doc IDs from the slim connector
    all_slim_doc_ids = set()
    for slim_doc_batch in confluence_connector.retrieve_all_slim_docs_perm_sync():
        all_slim_doc_ids.update(
            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # Find IDs that are in full but not in slim
    difference = all_full_doc_ids - all_slim_doc_ids

    # The set of full doc IDs should be always be a subset of the slim doc IDs
    assert all_full_doc_ids.issubset(
        all_slim_doc_ids
    ), f"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs."


@patch("ee.onyx.external_permissions.confluence.doc_sync.OnyxDBCredentialsProvider")
@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_confluence_connector_restriction_handling(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    mock_db_provider_class: MagicMock,
    enable_ee: None,  # noqa: ARG001
) -> None:
    # Test space key
    test_space_key = "DailyPermS"

    # Configure the mock provider instance that will be returned
    mock_provider_instance = MagicMock()
    mock_provider_instance.get_credentials.return_value = {
        "confluence_username": os.environ["CONFLUENCE_USER_NAME"],
        "confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"],
    }
    # this prevents redis calls inside of OnyxConfluence
    mock_provider_instance.is_dynamic.return_value = False
    # Make the class return our configured instance when called
    mock_db_provider_class.return_value = mock_provider_instance

    # Mock the cc_pair to pass to the function
    mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)
    # Mock the nested connector attribute and its config
    mock_cc_pair.connector = MagicMock()
    mock_cc_pair.connector.connector_specific_config = {
        "wiki_base": "https://danswerai.atlassian.net",
        "is_cloud": True,
        "space": test_space_key,
    }
    # Set a mock credential ID
    mock_cc_pair.credential_id = 1

    # Call the confluence_doc_sync function directly with the mock cc_pair
    def mock_fetch_all_docs_fn(
        sort_order: SortOrder | None = None,  # noqa: ARG001
    ) -> list[DocumentRow]:
        return []

    def mock_fetch_all_docs_ids_fn() -> list[str]:
        return []

    doc_access_generator = confluence_doc_sync(
        mock_cc_pair, mock_fetch_all_docs_fn, mock_fetch_all_docs_ids_fn, None
    )
    doc_access_list = list(doc_access_generator)
    assert len(doc_access_list) == 7
    assert all(
        not doc_access.external_access.is_public for doc_access in doc_access_list
    )

    # if no restriction is applied, the groups should give access, so no need
    # for more emails outside of the owner
    non_restricted_emails = {"chris@onyx.app"}
    non_restricted_user_groups = {
        "confluence-admins-danswerai",
        "org-admins",
        "atlassian-addons-admin",
        "confluence-users-danswerai",
    }

    # if restriction is applied, only should be visible to shared users / groups
    restricted_emails = {"chris@onyx.app", "hagen@danswer.ai", "oauth@onyx.app"}
    restricted_user_groups = {"confluence-admins-danswerai"}

    extra_restricted_emails = {"chris@onyx.app", "oauth@onyx.app"}
    extra_restricted_user_groups: set[str] = set()

    # note that this is only allowed since yuhong@onyx.app is a member of the
    # confluence-admins-danswerai group
    special_restricted_emails = {"chris@onyx.app", "yuhong@onyx.app", "oauth@onyx.app"}
    special_restricted_user_groups: set[str] = set()

    # Check Root+Page+2 is public
    root_page_2 = next(
        d
        for d in doc_access_list
        if isinstance(d, DocExternalAccess) and d.doc_id.endswith("Root+Page+2")
    )
    assert root_page_2.external_access.external_user_emails == non_restricted_emails
    assert (
        root_page_2.external_access.external_user_group_ids
        == non_restricted_user_groups
    )

    # Check Overview page is public
    overview_page = next(
        d
        for d in doc_access_list
        if isinstance(d, DocExternalAccess) and d.doc_id.lower().endswith("overview")
    )
    assert (
        overview_page.external_access.external_user_emails == non_restricted_emails
    ), "Overview page emails do not match expected values"
    assert (
        overview_page.external_access.external_user_group_ids
        == non_restricted_user_groups
    ), "Overview page groups do not match expected values"

    # check root page is restricted
    root_page = next(
        d
        for d in doc_access_list
        if isinstance(d, DocExternalAccess) and d.doc_id.endswith("Root+Page")
    )
    assert (
        root_page.external_access.external_user_emails == restricted_emails
    ), "Root page emails do not match expected values"
    assert (
        root_page.external_access.external_user_group_ids == restricted_user_groups
    ), "Root page groups do not match expected values"

    # check child page has restriction propagated
    child_page = next(
        d
        for d in doc_access_list
        if isinstance(d, DocExternalAccess) and d.doc_id.endswith("Child+Page")
    )
    assert (
        child_page.external_access.external_user_emails == restricted_emails
    ), "Child page emails do not match expected values"
    assert (
        child_page.external_access.external_user_group_ids == restricted_user_groups
    ), "Child page groups do not match expected values"

    # check doubly nested child page has restriction propagated
    child_page_2 = next(
        d
        for d in doc_access_list
        if isinstance(d, DocExternalAccess) and d.doc_id.endswith("Child+Page+2")
    )
    assert (
        child_page_2.external_access.external_user_emails == restricted_emails
    ), "Child page 2 emails do not match expected values"
    assert (
        child_page_2.external_access.external_user_group_ids == restricted_user_groups
    ), "Child page 2 groups do not match expected values"

    # check child page w/ specific restrictions have those applied
    child_page_3 = next(
        d
        for d in doc_access_list
        if isinstance(d, DocExternalAccess) and d.doc_id.endswith("Child+Page+3")
    )
    assert (
        child_page_3.external_access.external_user_emails == extra_restricted_emails
    ), "Child page 3 emails do not match expected values"
    assert (
        child_page_3.external_access.external_user_group_ids
        == extra_restricted_user_groups
    ), "Child page 3 groups do not match expected values"

    # check child page w/ specific restrictions have those applied
    child_page_4 = next(
        d
        for d in doc_access_list
        if isinstance(d, DocExternalAccess) and d.doc_id.endswith("Child+Page+4")
    )
    assert (
        child_page_4.external_access.external_user_emails == special_restricted_emails
    ), "Child page 4 emails do not match expected values"
    assert (
        child_page_4.external_access.external_user_group_ids
        == special_restricted_user_groups
    ), "Child page 4 groups do not match expected values"


================================================
FILE: backend/tests/daily/connectors/confluence/test_confluence_user_email_overrides.py
================================================
import types
from unittest.mock import patch

from onyx.connectors.confluence.onyx_confluence import ConfluenceUser
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.connectors.interfaces import CredentialsProviderInterface


class MockCredentialsProvider(CredentialsProviderInterface):
    def get_tenant_id(self) -> str:
        return "test_tenant"

    def get_provider_key(self) -> str:
        return "test_provider"

    def is_dynamic(self) -> bool:
        return False

    def get_credentials(self) -> dict[str, str]:
        return {"confluence_access_token": "test_token"}

    def set_credentials(self, credentials: dict[str, str]) -> None:
        pass

    def __enter__(self) -> "MockCredentialsProvider":
        return self

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: types.TracebackType | None,
    ) -> None:
        pass


def test_paginated_cql_user_retrieval_with_overrides() -> None:
    """
    Tests that paginated_cql_user_retrieval yields users from the overrides
    when provided and is_cloud is False.
    """
    mock_provider = MockCredentialsProvider()
    overrides = [
        {
            "user_id": "override_user_1",
            "username": "override1",
            "display_name": "Override User One",
            "email": "override1@example.com",
            "type": "override",
        },
        {
            "user_id": "override_user_2",
            "username": "override2",
            "display_name": "Override User Two",
            "email": "override2@example.com",
            "type": "override",
        },
    ]
    expected_users = [ConfluenceUser(**user_data) for user_data in overrides]

    confluence_client = OnyxConfluence(
        is_cloud=False,  # Overrides are primarily for Server/DC
        url="http://dummy-confluence.com",
        credentials_provider=mock_provider,
        confluence_user_profiles_override=overrides,
    )

    retrieved_users = list(confluence_client.paginated_cql_user_retrieval())

    assert len(retrieved_users) == len(expected_users)
    # Sort lists by user_id for order-independent comparison
    retrieved_users.sort(key=lambda u: u.user_id)
    expected_users.sort(key=lambda u: u.user_id)
    assert retrieved_users == expected_users


def test_paginated_cql_user_retrieval_no_overrides_server() -> None:
    """
    Tests that paginated_cql_user_retrieval attempts to call the actual
    API pagination when no overrides are provided for Server/DC.
    """
    mock_provider = MockCredentialsProvider()
    confluence_client = OnyxConfluence(
        is_cloud=False,
        url="http://dummy-confluence.com",
        credentials_provider=mock_provider,
        confluence_user_profiles_override=None,
    )

    # Mock the internal pagination method to check if it's called
    with patch.object(confluence_client, "_paginate_url") as mock_paginate:
        mock_paginate.return_value = iter([])  # Return an empty iterator

        list(confluence_client.paginated_cql_user_retrieval())

        mock_paginate.assert_called_once_with("rest/api/user/list", None)


def test_paginated_cql_user_retrieval_no_overrides_cloud() -> None:
    """
    Tests that paginated_cql_user_retrieval attempts to call the actual
    API pagination when no overrides are provided for Cloud.
    """
    mock_provider = MockCredentialsProvider()
    confluence_client = OnyxConfluence(
        is_cloud=True,
        url="http://dummy-confluence.com",  # URL doesn't matter much here due to mocking
        credentials_provider=mock_provider,
        confluence_user_profiles_override=None,
    )

    # Mock the internal pagination method to check if it's called
    with patch.object(confluence_client, "_paginate_url") as mock_paginate:
        mock_paginate.return_value = iter([])  # Return an empty iterator

        list(confluence_client.paginated_cql_user_retrieval())

        # Check that the cloud-specific user search URL is called
        mock_paginate.assert_called_once_with(
            "rest/api/search/user?cql=type=user",
            None,
            force_offset_pagination=True,
        )


================================================
FILE: backend/tests/daily/connectors/conftest.py
================================================
from collections.abc import Generator
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest


@pytest.fixture
def mock_get_unstructured_api_key() -> Generator[MagicMock, None, None]:
    with patch(
        "onyx.file_processing.extract_file_text.get_unstructured_api_key",
        return_value=None,
    ) as mock:
        yield mock


================================================
FILE: backend/tests/daily/connectors/discord/test_discord_connector.py
================================================
import os
import time

import pytest

from onyx.connectors.discord.connector import DiscordConnector
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentSource
from onyx.connectors.models import HierarchyNode


@pytest.fixture
def discord_connector() -> DiscordConnector:
    connector = DiscordConnector()
    connector.load_credentials(
        {"discord_bot_token": os.environ["DISCORD_CONNECTOR_BOT_TOKEN"]}
    )
    return connector


def test_discord_connector_basic(discord_connector: DiscordConnector) -> None:
    # If there are no Discord messages in the last 7 days, something has gone horribly wrong
    end_time = time.time()
    start_time = end_time - (7 * 24 * 60 * 60)
    doc_batch_generator = discord_connector.poll_source(start_time, end_time)

    doc_batch = next(doc_batch_generator)

    docs: list[Document] = []
    for doc in doc_batch:
        if not isinstance(doc, HierarchyNode):
            docs.append(doc)

    assert len(docs) > 0, "No documents were retrieved from the connector"

    # Check basic document structure
    doc = docs[0]
    assert doc.source == DocumentSource.DISCORD
    assert doc.id is not None
    assert doc.semantic_identifier is not None
    assert len(doc.sections) > 0
    assert doc.sections[0].text is not None
    assert doc.sections[0].link is not None


================================================
FILE: backend/tests/daily/connectors/file/test_file_connector.py
================================================
import io
from datetime import datetime
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest

from onyx.connectors.file.connector import LocalFileConnector
from onyx.connectors.models import HierarchyNode


@pytest.fixture
def mock_db_session() -> MagicMock:
    return MagicMock()


@pytest.fixture
def mock_file_store() -> MagicMock:
    store = MagicMock()
    return store


@pytest.fixture
def mock_filestore_record() -> MagicMock:
    record = MagicMock()
    record.file_id = uuid4()
    record.display_name = "test.txt"
    return record


@patch("onyx.connectors.file.connector.get_default_file_store")
@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key", return_value=None
)
def test_single_text_file_with_metadata(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_get_session: MagicMock,
    mock_db_session: MagicMock,
    mock_file_store: MagicMock,
    mock_filestore_record: MagicMock,
) -> None:
    file_content = io.BytesIO(
        b'#ONYX_METADATA={"link": "https://onyx.app", "file_display_name":"my display name", "tag_of_your_choice": "test-tag", \
          "primary_owners": ["wenxi@onyx.app"], "secondary_owners": ["founders@onyx.app"], \
          "doc_updated_at": "2001-01-01T00:00:00Z"}\n'
        b"Test answer is 12345"
    )
    mock_get_filestore = MagicMock()
    mock_get_filestore.return_value = mock_file_store
    mock_file_store.read_file_record.return_value = mock_filestore_record
    mock_get_session.return_value.__enter__.return_value = mock_db_session
    mock_file_store.read_file.return_value = file_content

    with patch(
        "onyx.connectors.file.connector.get_default_file_store",
        return_value=mock_file_store,
    ):
        connector = LocalFileConnector(
            file_locations=["test.txt"], file_names=["test.txt"], zip_metadata={}
        )
        batches = list(connector.load_from_state())

    assert len(batches) == 1
    docs = batches[0]
    assert len(docs) == 1
    doc = docs[0]
    assert not isinstance(doc, HierarchyNode)

    assert doc.sections[0].text == "Test answer is 12345"
    assert doc.sections[0].link == "https://onyx.app"
    assert doc.semantic_identifier == "my display name"
    assert doc.primary_owners[0].display_name == "wenxi@onyx.app"  # type: ignore
    assert doc.secondary_owners[0].display_name == "founders@onyx.app"  # type: ignore
    assert doc.doc_updated_at == datetime(2001, 1, 1, 0, 0, 0, tzinfo=timezone.utc)


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key", return_value=None
)
def test_two_text_files_with_zip_metadata(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_db_session: MagicMock,  # noqa: ARG001
    mock_file_store: MagicMock,
) -> None:
    file1_content = io.BytesIO(b"File 1 content")
    file2_content = io.BytesIO(b"File 2 content")
    mock_get_filestore = MagicMock()
    mock_get_filestore.return_value = mock_file_store
    mock_file_store.read_file_record.side_effect = [
        MagicMock(file_id=str(uuid4()), display_name="file1.txt"),
        MagicMock(file_id=str(uuid4()), display_name="file2.txt"),
    ]
    mock_file_store.read_file.side_effect = [file1_content, file2_content]
    zip_metadata = {
        "file1.txt": {
            "filename": "file1.txt",
            "file_display_name": "display 1",
            "link": "https://onyx.app/1",
            "primary_owners": ["alice@onyx.app"],
            "secondary_owners": ["bob@onyx.app"],
            "doc_updated_at": "2022-02-02T00:00:00Z",
        },
        "file2.txt": {
            "filename": "file2.txt",
            "file_display_name": "display 2",
            "link": "https://onyx.app/2",
            "primary_owners": ["carol@onyx.app"],
            "secondary_owners": ["dave@onyx.app"],
            "doc_updated_at": "2023-03-03T00:00:00Z",
        },
    }

    with patch(
        "onyx.connectors.file.connector.get_default_file_store",
        return_value=mock_file_store,
    ):
        connector = LocalFileConnector(
            file_locations=["file1.txt", "file2.txt"],
            file_names=["file1.txt", "file2.txt"],
            zip_metadata=zip_metadata,
        )
        batches = list(connector.load_from_state())

    assert len(batches) == 1
    docs = batches[0]
    assert len(docs) == 2
    doc1, doc2 = docs
    assert not isinstance(doc1, HierarchyNode)
    assert not isinstance(doc2, HierarchyNode)

    assert doc1.sections[0].text == "File 1 content"
    assert doc1.sections[0].link == "https://onyx.app/1"
    assert doc1.semantic_identifier == "display 1"
    assert doc1.primary_owners[0].display_name == "alice@onyx.app"  # type: ignore
    assert doc1.secondary_owners[0].display_name == "bob@onyx.app"  # type: ignore
    assert doc1.doc_updated_at == datetime(2022, 2, 2, 0, 0, 0, tzinfo=timezone.utc)
    assert doc2.sections[0].text == "File 2 content"
    assert doc2.sections[0].link == "https://onyx.app/2"
    assert doc2.semantic_identifier == "display 2"
    assert doc2.primary_owners[0].display_name == "carol@onyx.app"  # type: ignore
    assert doc2.secondary_owners[0].display_name == "dave@onyx.app"  # type: ignore
    assert doc2.doc_updated_at == datetime(2023, 3, 3, 0, 0, 0, tzinfo=timezone.utc)


================================================
FILE: backend/tests/daily/connectors/fireflies/test_fireflies_connector.py
================================================
import json
import os
import time
from pathlib import Path
from typing import Any

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.fireflies.connector import FirefliesConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode


def load_test_data(file_name: str = "test_fireflies_data.json") -> dict[str, Any]:
    current_dir = Path(__file__).parent
    with open(current_dir / file_name, "r") as f:
        return json.load(f)


@pytest.fixture
def fireflies_connector() -> FirefliesConnector:
    connector = FirefliesConnector()
    connector.load_credentials(
        {"fireflies_api_key": os.environ["FIREFLIES_API_KEY"]},
    )
    return connector


@pytest.mark.xfail(
    reason="We don't have the key that is stored in GitHub Secrets and the returned data is different than expected",
)
def test_fireflies_connector_basic(fireflies_connector: FirefliesConnector) -> None:
    test_data = load_test_data()

    connector_return_data: list[Document | HierarchyNode] = next(
        fireflies_connector.poll_source(0, time.time())
    )
    target_doc: Document | HierarchyNode = connector_return_data[0]
    if isinstance(target_doc, HierarchyNode):
        raise ValueError("Hierarchy node returned from connector")

    assert target_doc is not None, "No documents were retrieved from the connector"
    assert (
        target_doc.primary_owners is not None
    ), "No primary owners were retrieved from the connector"

    assert target_doc.id == test_data["id"]
    assert target_doc.semantic_identifier == test_data["semantic_identifier"]
    assert target_doc.primary_owners[0].email == test_data["primary_owners"]
    assert target_doc.secondary_owners == test_data["secondary_owners"]
    assert str(target_doc.doc_updated_at) == test_data["doc_updated_at"]

    assert (
        target_doc.source == DocumentSource.FIREFLIES
    ), "Document source is not fireflies"
    assert target_doc.metadata == test_data["metadata"]

    # Check that the test data and the connector data contain the same section data
    assert {section.text for section in target_doc.sections} == {
        section["text"] for section in test_data["sections"]
    }
    assert {section.link for section in target_doc.sections} == {
        section["link"] for section in test_data["sections"]
    }


================================================
FILE: backend/tests/daily/connectors/fireflies/test_fireflies_data.json
================================================
{
  "id": "FIREFLIES_VcBdZpuV82rImQCA",
  "semantic_identifier": "Lead Generation Efforts",
  "primary_owners": "admin@onyx-test.com",
  "secondary_owners": [],
  "doc_updated_at": "2025-01-10 19:10:00+00:00",
  "metadata": {
    "meeting_date": "2025-01-10 19:10:00+00:00",
    "duration_min": "10"
  },
  "sections": [
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=153.1",
      "text": "test_user_1 1:  Hey, David, thanks for taking the time today."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=158.14",
      "text": "Test Admin Admin: Of course Sarah, It's nice to see you. Whenever you're ready."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=165.1",
      "text": "test_user_1 1: All right then, David, let's jump right in. How are the lead generation efforts for the new product launch looking?"
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=171.084",
      "text": "Test Admin Admin: So far we've seen a good initial response, but we're facing a slight challenge with qualifying leads. The sales team is getting inquiries. Some aren't quite aligned with our ideal customer profile."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=191.86",
      "text": "test_user_1 1: That makes sense. Do you think we need to adjust our marketing messaging to better target the right audience?"
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=202.26",
      "text": "Test Admin Admin: Absolutely. Maybe we could emphasize the key features that are most relevant to our target market in the marketing materials. What are your thoughts on refining the lead capture to gather more specific information?"
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=225.99",
      "text": "test_user_1 1: I think that's a great idea. We could add additional qualifying questions to ensure we're capturing leads with the right needs."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=238.56",
      "text": "Test Admin Admin: On another note, how are the social media campaigns performing? Are we seeing good engagement with the new product launch post?"
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=257.2",
      "text": "test_user_1 1: The engagement is positive, but we could potentially increase increase reach further with targeted ad campaigns and key platforms."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=268.91",
      "text": "Test Admin Admin: Agreed. Let's discuss a strategy to develop targeted ads that focus on the pain points our ideal customers are facing and how our product solves them."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=270.27",
      "text": "test_user_1 1: We can collaborate on creating specific ad copy that highlights these benefits."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=289.06",
      "text": "Test Admin Admin: All right, so to summarize, let's prioritize refining the lead capture form, develop targeted social media ads, and make sure our marketing method clearly aligns with our ideal customer profile."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=303.38",
      "text": "test_user_1 1: Yep. And let's schedule a follow up meeting in a week, review progress and discuss any adjustments."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=310.9",
      "text": "Test Admin Admin: Sounds good. I'll send you address updated lead form by the end of the day. Thanks, Sarah."
    },
    {
      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=319.19",
      "text": "test_user_1 1: Thank you David."
    }
  ]
}


================================================
FILE: backend/tests/daily/connectors/gitbook/test_gitbook_connector.py
================================================
import os
import time

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.gitbook.connector import GitbookConnector
from onyx.connectors.models import HierarchyNode


@pytest.fixture
def gitbook_connector() -> GitbookConnector:
    connector = GitbookConnector(
        space_id=os.environ["GITBOOK_SPACE_ID"],
    )
    connector.load_credentials(
        {
            "gitbook_api_key": os.environ["GITBOOK_API_KEY"],
        }
    )
    return connector


NUM_PAGES = 3


def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
    doc_batch_generator = gitbook_connector.load_from_state()

    # Get first batch of documents
    doc_batch = next(doc_batch_generator)
    assert len(doc_batch) == NUM_PAGES

    # Verify first document structure
    main_doc = doc_batch[0]
    assert not isinstance(main_doc, HierarchyNode)

    # Basic document properties
    assert main_doc.id.startswith("gitbook-")
    assert main_doc.semantic_identifier == "Acme Corp Internal Handbook"
    assert main_doc.source == DocumentSource.GITBOOK

    # Metadata checks
    assert "path" in main_doc.metadata
    assert "type" in main_doc.metadata
    assert "kind" in main_doc.metadata

    # Section checks
    assert len(main_doc.sections) == 1
    section = main_doc.sections[0]

    # Content specific checks
    content = section.text
    assert content is not None, "Section text should not be None"

    # Check for specific content elements
    assert "* Fruit Shopping List:" in content
    assert "> test quote it doesn't mean anything" in content

    # Check headings
    assert "# Heading 1" in content
    assert "## Heading 2" in content
    assert "### Heading 3" in content

    # Check task list
    assert "- [ ] Uncompleted Task" in content
    assert "- [x] Completed Task" in content

    # Check table content
    assert "| ethereum | 10 | 3000 |" in content
    assert "| bitcoin | 2 | 98000 |" in content

    # Check paragraph content
    assert "New York City comprises 5 boroughs" in content
    assert "Empire State Building" in content

    # Check code block (just verify presence of some unique code elements)
    assert "function fizzBuzz(n)" in content
    assert 'res.push("FizzBuzz")' in content

    assert section.link  # Should have a URL

    nested1 = doc_batch[1]
    assert not isinstance(nested1, HierarchyNode)
    assert nested1.id.startswith("gitbook-")
    assert nested1.semantic_identifier == "Nested1"
    assert len(nested1.sections) == 1
    # extra newlines at the end, remove them to make test easier
    assert nested1.sections[0].text is not None
    assert nested1.sections[0].text.strip() == "nested1"
    assert nested1.source == DocumentSource.GITBOOK

    nested2 = doc_batch[2]
    assert not isinstance(nested2, HierarchyNode)
    assert nested2.id.startswith("gitbook-")
    assert nested2.semantic_identifier == "Nested2"
    assert len(nested2.sections) == 1
    assert nested2.sections[0].text is not None
    assert nested2.sections[0].text.strip() == "nested2"
    assert nested2.source == DocumentSource.GITBOOK

    # Time-based polling test
    current_time = time.time()
    poll_docs = gitbook_connector.poll_source(0, current_time)
    poll_batch = next(poll_docs)
    assert len(poll_batch) == NUM_PAGES


================================================
FILE: backend/tests/daily/connectors/github/test_github_basic.py
================================================
import os
import time

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.github.connector import GithubConnector
from tests.daily.connectors.utils import load_all_from_connector


@pytest.fixture
def github_connector() -> GithubConnector:
    connector = GithubConnector(
        repo_owner="onyx-dot-app",
        repositories="documentation",
        include_prs=True,
        include_issues=True,
    )
    connector.load_credentials(
        {
            "github_access_token": os.environ["ACCESS_TOKEN_GITHUB"],
        }
    )
    return connector


def test_github_connector_basic(github_connector: GithubConnector) -> None:
    docs = load_all_from_connector(
        connector=github_connector,
        start=0,
        end=time.time(),
    ).documents
    assert len(docs) > 1  # We expect at least one PR and one Issue to exist

    # Test the first document's structure
    pr_doc = docs[0]
    issue_doc = docs[-1]

    # Verify basic document properties
    assert pr_doc.source == DocumentSource.GITHUB
    assert pr_doc.secondary_owners is None
    assert pr_doc.from_ingestion_api is False
    assert pr_doc.additional_info is None

    # Verify GitHub-specific properties
    assert "github.com" in pr_doc.id  # Should be a GitHub URL

    # Verify PR-specific properties
    assert pr_doc.metadata is not None
    assert pr_doc.metadata.get("object_type") == "PullRequest"
    assert "id" in pr_doc.metadata
    assert "merged" in pr_doc.metadata
    assert "state" in pr_doc.metadata
    assert "user" in pr_doc.metadata
    assert "assignees" in pr_doc.metadata
    assert pr_doc.metadata.get("repo") == "onyx-dot-app/documentation"
    assert "num_commits" in pr_doc.metadata
    assert "num_files_changed" in pr_doc.metadata
    assert "labels" in pr_doc.metadata
    assert "created_at" in pr_doc.metadata

    # Verify Issue-specific properties
    assert issue_doc.metadata is not None
    assert issue_doc.metadata.get("object_type") == "Issue"
    assert "id" in issue_doc.metadata
    assert "state" in issue_doc.metadata
    assert "user" in issue_doc.metadata
    assert "assignees" in issue_doc.metadata
    assert issue_doc.metadata.get("repo") == "onyx-dot-app/documentation"
    assert "labels" in issue_doc.metadata
    assert "created_at" in issue_doc.metadata

    # Verify sections
    assert len(pr_doc.sections) == 1
    section = pr_doc.sections[0]
    assert section.link == pr_doc.id  # Section link should match document ID
    assert isinstance(section.text, str)  # Should have some text content


================================================
FILE: backend/tests/daily/connectors/gitlab/test_gitlab_basic.py
================================================
import itertools
import os

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.gitlab.connector import GitlabConnector
from onyx.connectors.models import HierarchyNode


@pytest.fixture
def gitlab_connector() -> GitlabConnector:
    connector = GitlabConnector(
        project_owner="onyx2895818",
        project_name="onyx",
        include_mrs=True,
        include_issues=True,
        include_code_files=True,  # Include code files in the test
    )
    # Ensure GITLAB_ACCESS_TOKEN and optionally GITLAB_URL are set in the environment
    gitlab_url = os.environ.get("GITLAB_URL", "https://gitlab.com")
    gitlab_token = os.environ.get("GITLAB_ACCESS_TOKEN")

    if not gitlab_token:
        pytest.skip("GITLAB_ACCESS_TOKEN environment variable not set.")

    connector.load_credentials(
        {
            "gitlab_access_token": gitlab_token,
            "gitlab_url": gitlab_url,
        }
    )
    return connector


def test_gitlab_connector_basic(gitlab_connector: GitlabConnector) -> None:
    doc_batches = gitlab_connector.load_from_state()
    docs = list(itertools.chain(*doc_batches))
    # Assert right number of docs - Adjust if necessary based on test repo state
    assert len(docs) == 79

    # Find one of each type to validate
    validated_mr = False
    validated_issue = False
    validated_code_file = False
    gitlab_base_url = os.environ.get("GITLAB_URL", "https://gitlab.com").split("//")[-1]
    project_path = f"{gitlab_connector.project_owner}/{gitlab_connector.project_name}"

    # --- Specific Document Details to Validate ---
    target_mr_id = f"https://{gitlab_base_url}/{project_path}/-/merge_requests/1"
    target_issue_id = f"https://{gitlab_base_url}/{project_path}/-/work_items/2"
    target_code_file_semantic_id = "README.md"
    # ---

    for doc in docs:
        if isinstance(doc, HierarchyNode):
            continue
        # Verify basic document properties (common to all types)
        assert doc.source == DocumentSource.GITLAB
        assert doc.secondary_owners is None
        assert doc.from_ingestion_api is False
        assert doc.additional_info is None
        assert isinstance(doc.id, str)
        assert doc.metadata is not None
        assert "type" in doc.metadata
        doc_type = doc.metadata["type"]

        # Verify sections (common structure)
        assert len(doc.sections) >= 1
        section = doc.sections[0]
        assert isinstance(section.link, str)
        assert gitlab_base_url in section.link
        assert isinstance(section.text, str)

        # --- Type-specific and Content Validation ---
        if doc.id == target_mr_id and doc_type == "MergeRequest":
            assert doc.metadata["state"] == "opened"
            assert doc.semantic_identifier == "Add awesome feature"
            assert section.text == "This MR implements the awesome feature"
            assert doc.primary_owners is not None
            assert len(doc.primary_owners) == 1
            assert (
                doc.primary_owners[0].display_name == "Test"
            )  # Adjust if author changes
            assert doc.id == section.link
            validated_mr = True
        elif doc.id == target_issue_id and doc_type == "ISSUE":
            assert doc.metadata["state"] == "opened"
            assert doc.semantic_identifier == "Investigate performance issue"
            assert (
                section.text
                == "Investigate and resolve the performance degradation on endpoint X"
            )
            assert doc.primary_owners is not None
            assert len(doc.primary_owners) == 1
            assert (
                doc.primary_owners[0].display_name == "Test"
            )  # Adjust if author changes
            assert doc.id == section.link
            validated_issue = True
        elif (
            doc.semantic_identifier == target_code_file_semantic_id
            and doc_type == "CodeFile"
        ):
            # ID is a git hash (e.g., 'd177...'), Link is the blob URL
            assert doc.id != section.link
            assert section.link.endswith("/README.md")
            assert "# onyx" in section.text  # Check for a known part of the content
            # Code files might not have primary owners assigned this way
            # assert len(doc.primary_owners) == 0
            validated_code_file = True

        # Generic validation for *any* document of the type if specific one not found yet
        elif doc_type == "MergeRequest" and not validated_mr:
            assert "state" in doc.metadata
            assert gitlab_base_url in doc.id  # MR ID should be a URL
            assert doc.id == section.link  # Link and ID are the same URL
        elif doc_type == "ISSUE" and not validated_issue:
            assert "state" in doc.metadata
            assert gitlab_base_url in doc.id  # Issue ID should be a URL
            assert doc.id == section.link  # Link and ID are the same URL
        elif doc_type == "CodeFile" and not validated_code_file:
            assert doc.id != section.link  # ID is GID/hash, link is blob URL

        # Early exit optimization (optional)
        # if validated_mr and validated_issue and validated_code_file:
        #     break

    # Assert that we found and validated the specific documents
    assert (
        validated_mr
    ), f"Failed to find and validate the specific MergeRequest ({target_mr_id})."
    assert (
        validated_issue
    ), f"Failed to find and validate the specific Issue ({target_issue_id})."
    assert (
        validated_code_file
    ), f"Failed to find and validate the specific CodeFile ({target_code_file_semantic_id})."


================================================
FILE: backend/tests/daily/connectors/gmail/conftest.py
================================================
import json
import os
from collections.abc import Callable

import pytest

from onyx.connectors.gmail.connector import GmailConnector
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_AUTHENTICATION_METHOD,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_TOKEN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    GoogleOAuthAuthenticationMethod,
)
from tests.load_env_vars import load_env_vars


# Load environment variables at the module level
load_env_vars()


def parse_credentials(env_str: str) -> dict:
    """
    Parse a double-escaped JSON string from environment variables into a Python dictionary.

    Args:
        env_str (str): The double-escaped JSON string from environment variables

    Returns:
        dict: Parsed OAuth credentials
    """
    # first try normally
    try:
        return json.loads(env_str)
    except Exception:
        # First, try remove extra escaping backslashes
        unescaped = env_str.replace('\\"', '"')

        # remove leading / trailing quotes
        unescaped = unescaped.strip('"')

        # Now parse the JSON
        return json.loads(unescaped)


@pytest.fixture
def google_gmail_oauth_connector_factory() -> Callable[..., GmailConnector]:
    def _connector_factory(
        primary_admin_email: str = "admin@onyx-test.com",
    ) -> GmailConnector:
        print("Creating GmailConnector with OAuth credentials")
        connector = GmailConnector()

        json_string = os.environ["GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR"]
        refried_json_string = json.dumps(parse_credentials(json_string))

        credentials_json = {
            DB_CREDENTIALS_DICT_TOKEN_KEY: refried_json_string,
            DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email,
            DB_CREDENTIALS_AUTHENTICATION_METHOD: GoogleOAuthAuthenticationMethod.UPLOADED.value,
        }
        connector.load_credentials(credentials_json)
        return connector

    return _connector_factory


@pytest.fixture
def google_gmail_service_acct_connector_factory() -> Callable[..., GmailConnector]:
    def _connector_factory(
        primary_admin_email: str = "admin@onyx-test.com",
    ) -> GmailConnector:
        print("Creating GmailConnector with service account credentials")
        connector = GmailConnector()

        json_string = os.environ["GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR"]
        refried_json_string = json.dumps(parse_credentials(json_string))

        # Load Service Account Credentials
        connector.load_credentials(
            {
                DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: refried_json_string,
                DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email,
                DB_CREDENTIALS_AUTHENTICATION_METHOD: GoogleOAuthAuthenticationMethod.UPLOADED.value,
            }
        )
        return connector

    return _connector_factory


================================================
FILE: backend/tests/daily/connectors/gmail/test_gmail_connector.py
================================================
from collections.abc import Callable
from typing import Any
from typing import cast
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.connectors.gmail.connector import GmailConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector


_THREAD_1_START_TIME = 1730568700
_THREAD_1_END_TIME = 1730569000

"""
This thread was 4 emails long:
    admin@onyx-test.com -> test-group-1@onyx-test.com (conaining test_user_1 and test_user_2)
    test_user_1@onyx-test.com -> admin@onyx-test.com
    admin@onyx-test.com -> test_user_2@onyx-test.com + BCC: test_user_3@onyx-test.com
    test_user_3@onyx-test.com -> admin@onyx-test.com
"""
_THREAD_1_BY_ID: dict[str, dict[str, Any]] = {
    "192edefb315737c3": {
        "email": "admin@onyx-test.com",
        "sections_count": 4,
        "primary_owners": set(
            [
                "admin@onyx-test.com",
                "test_user_1@onyx-test.com",
                "test_user_3@onyx-test.com",
            ]
        ),
        "secondary_owners": set(
            [
                "test-group-1@onyx-test.com",
                "admin@onyx-test.com",
                "test_user_2@onyx-test.com",
                "test_user_3@onyx-test.com",
            ]
        ),
    },
    "192edf020d2f5def": {
        "email": "test_user_1@onyx-test.com",
        "sections_count": 2,
        "primary_owners": set(["admin@onyx-test.com", "test_user_1@onyx-test.com"]),
        "secondary_owners": set(["test-group-1@onyx-test.com", "admin@onyx-test.com"]),
    },
    "192edf020ae90aab": {
        "email": "test_user_2@onyx-test.com",
        "sections_count": 2,
        "primary_owners": set(["admin@onyx-test.com"]),
        "secondary_owners": set(
            ["test-group-1@onyx-test.com", "test_user_2@onyx-test.com"]
        ),
    },
    "192edf18316015fa": {
        "email": "test_user_3@onyx-test.com",
        "sections_count": 2,
        "primary_owners": set(["admin@onyx-test.com", "test_user_3@onyx-test.com"]),
        "secondary_owners": set(
            [
                "admin@onyx-test.com",
                "test_user_2@onyx-test.com",
                "test_user_3@onyx-test.com",
            ]
        ),
    },
}


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_slim_docs_retrieval(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_gmail_service_acct_connector_factory: Callable[..., GmailConnector],
) -> None:
    print("\n\nRunning test_slim_docs_retrieval")
    connector = google_gmail_service_acct_connector_factory()
    retrieved_slim_docs: list[SlimDocument] = []
    for doc_batch in connector.retrieve_all_slim_docs_perm_sync(
        _THREAD_1_START_TIME, _THREAD_1_END_TIME
    ):
        retrieved_slim_docs.extend(
            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]
        )

    assert len(retrieved_slim_docs) == 4

    for doc in retrieved_slim_docs:
        assert doc.external_access is not None
        assert len(doc.external_access.external_user_emails) == 1
        user_email = next(iter(doc.external_access.external_user_emails))
        assert _THREAD_1_BY_ID[doc.id]["email"] == user_email


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_docs_retrieval(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_gmail_service_acct_connector_factory: Callable[..., GmailConnector],
) -> None:
    print("\n\nRunning test_docs_retrieval")
    connector = google_gmail_service_acct_connector_factory()
    retrieved_docs: list[Document] = []
    for doc_batch in load_everything_from_checkpoint_connector(
        connector, _THREAD_1_START_TIME, _THREAD_1_END_TIME
    ):
        assert all(isinstance(item, Document) for item in doc_batch.items)
        retrieved_docs.extend(cast(list[Document], doc_batch.items))

    assert len(retrieved_docs) == 4

    for doc in retrieved_docs:
        id = doc.id
        retrieved_primary_owner_emails: set[str | None] = set()
        retrieved_secondary_owner_emails: set[str | None] = set()
        if doc.primary_owners:
            retrieved_primary_owner_emails = set(
                [owner.email for owner in doc.primary_owners]
            )
        if doc.secondary_owners:
            retrieved_secondary_owner_emails = set(
                [owner.email for owner in doc.secondary_owners]
            )
        assert _THREAD_1_BY_ID[id]["sections_count"] == len(doc.sections)
        assert _THREAD_1_BY_ID[id]["primary_owners"] == retrieved_primary_owner_emails
        assert (
            _THREAD_1_BY_ID[id]["secondary_owners"] == retrieved_secondary_owner_emails
        )


================================================
FILE: backend/tests/daily/connectors/gong/test_gong.py
================================================
import os
import time
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.connectors.gong.connector import GongConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode


@pytest.fixture
def gong_connector() -> GongConnector:
    connector = GongConnector()

    connector.load_credentials(
        {
            "gong_access_key": os.environ["GONG_ACCESS_KEY"],
            "gong_access_key_secret": os.environ["GONG_ACCESS_KEY_SECRET"],
        }
    )

    return connector


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_gong_basic(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    gong_connector: GongConnector,
) -> None:
    doc_batch_generator = gong_connector.poll_source(0, time.time())

    doc_batch = next(doc_batch_generator)
    with pytest.raises(StopIteration):
        next(doc_batch_generator)

    assert len(doc_batch) == 2

    docs: list[Document] = []
    for doc in doc_batch:
        if not isinstance(doc, HierarchyNode):
            docs.append(doc)

    assert docs[0].semantic_identifier == "test with chris"
    assert docs[1].semantic_identifier == "Testing Gong"


================================================
FILE: backend/tests/daily/connectors/google_drive/conftest.py
================================================
import json
import os
import resource
from collections.abc import Callable

import pytest

from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_AUTHENTICATION_METHOD,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_TOKEN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    GoogleOAuthAuthenticationMethod,
)
from tests.load_env_vars import load_env_vars


# Load environment variables at the module level
load_env_vars()


_USER_TO_OAUTH_CREDENTIALS_MAP = {
    "admin@onyx-test.com": "GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR",
    "test_user_1@onyx-test.com": "GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1",
}

_USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP = {
    "admin@onyx-test.com": "GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR",
}


def parse_credentials(env_str: str) -> dict:
    """
    Parse a double-escaped JSON string from environment variables into a Python dictionary.

    Args:
        env_str (str): The double-escaped JSON string from environment variables

    Returns:
        dict: Parsed OAuth credentials
    """
    # first try normally
    try:
        return json.loads(env_str)
    except Exception:
        # First, try remove extra escaping backslashes
        unescaped = env_str.replace('\\"', '"')

        # remove leading / trailing quotes
        unescaped = unescaped.strip('"')

        # Now parse the JSON
        return json.loads(unescaped)


def get_credentials_from_env(email: str, oauth: bool) -> dict:
    if oauth:
        raw_credential_string = os.environ[_USER_TO_OAUTH_CREDENTIALS_MAP[email]]
    else:
        raw_credential_string = os.environ[
            _USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP[email]
        ]

    refried_credential_string = json.dumps(parse_credentials(raw_credential_string))

    cred_key = (
        DB_CREDENTIALS_DICT_TOKEN_KEY
        if oauth
        else DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
    )
    return {
        cred_key: refried_credential_string,
        DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
        DB_CREDENTIALS_AUTHENTICATION_METHOD: GoogleOAuthAuthenticationMethod.UPLOADED.value,
    }


@pytest.fixture
def google_drive_oauth_uploaded_connector_factory() -> (
    Callable[..., GoogleDriveConnector]
):
    def _connector_factory(
        primary_admin_email: str,
        include_shared_drives: bool,
        shared_drive_urls: str | None,
        include_my_drives: bool,
        my_drive_emails: str | None,
        shared_folder_urls: str | None,
        include_files_shared_with_me: bool,
    ) -> GoogleDriveConnector:
        print("Creating GoogleDriveConnector with OAuth credentials")
        connector = GoogleDriveConnector(
            include_shared_drives=include_shared_drives,
            shared_drive_urls=shared_drive_urls,
            include_my_drives=include_my_drives,
            include_files_shared_with_me=include_files_shared_with_me,
            my_drive_emails=my_drive_emails,
            shared_folder_urls=shared_folder_urls,
        )

        credentials_json = get_credentials_from_env(primary_admin_email, oauth=True)
        connector.load_credentials(credentials_json)
        return connector

    return _connector_factory


@pytest.fixture
def google_drive_service_acct_connector_factory() -> (
    Callable[..., GoogleDriveConnector]
):
    def _connector_factory(
        primary_admin_email: str,
        include_shared_drives: bool,
        shared_drive_urls: str | None,
        include_my_drives: bool,
        my_drive_emails: str | None,
        shared_folder_urls: str | None,
        include_files_shared_with_me: bool,
        specific_user_emails: str | None = None,
    ) -> GoogleDriveConnector:
        print("Creating GoogleDriveConnector with service account credentials")
        connector = GoogleDriveConnector(
            include_shared_drives=include_shared_drives,
            shared_drive_urls=shared_drive_urls,
            include_my_drives=include_my_drives,
            my_drive_emails=my_drive_emails,
            shared_folder_urls=shared_folder_urls,
            include_files_shared_with_me=include_files_shared_with_me,
            specific_user_emails=specific_user_emails,
        )

        # Load Service Account Credentials
        credentials_json = get_credentials_from_env(
            email=primary_admin_email, oauth=False
        )
        connector.load_credentials(credentials_json)
        return connector

    return _connector_factory


@pytest.fixture(scope="session", autouse=True)
def set_resource_limits() -> None:
    # the google sdk is aggressive about using up file descriptors and
    # macos is stingy ... these tests will fail randomly unless the descriptor limit is raised
    RLIMIT_MINIMUM = 2048
    soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
    desired_soft = min(RLIMIT_MINIMUM, hard)  # Pick your target here

    print(f"Open file limit: soft={soft} hard={hard} soft_required={RLIMIT_MINIMUM}")

    if soft < desired_soft:
        print(f"Raising open file limit: {soft} -> {desired_soft}")
        resource.setrlimit(resource.RLIMIT_NOFILE, (desired_soft, hard))

    soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
    print(f"New open file limit: soft={soft} hard={hard}")
    return


================================================
FILE: backend/tests/daily/connectors/google_drive/consts_and_utils.py
================================================
import time
from collections.abc import Sequence
from dataclasses import dataclass
from dataclasses import field
from dataclasses import replace
from urllib.parse import urlparse

from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import TextSection
from onyx.db.enums import HierarchyNodeType
from tests.daily.connectors.utils import ConnectorOutput
from tests.daily.connectors.utils import load_all_from_connector

ALL_FILES = list(range(0, 60))
SHARED_DRIVE_FILES = list(range(20, 25))


ADMIN_FILE_IDS = list(range(0, 5))
ADMIN_FOLDER_3_FILE_IDS = list(range(65, 70))  # This folder is shared with test_user_1
TEST_USER_1_FILE_IDS = list(range(5, 10))
TEST_USER_2_FILE_IDS = list(range(10, 15))
TEST_USER_3_FILE_IDS = list(range(15, 20))
SHARED_DRIVE_1_FILE_IDS = list(range(20, 25))
FOLDER_1_FILE_IDS = list(range(25, 30))
FOLDER_1_1_FILE_IDS = list(range(30, 35))
FOLDER_1_2_FILE_IDS = list(range(35, 40))  # This folder is public
SHARED_DRIVE_2_FILE_IDS = list(range(40, 45))
FOLDER_2_FILE_IDS = list(range(45, 50))
FOLDER_2_1_FILE_IDS = list(range(50, 55))
FOLDER_2_2_FILE_IDS = list(range(55, 60))
SECTIONS_FILE_IDS = [61]
FOLDER_3_FILE_IDS = list(range(62, 65))

DONWLOAD_REVOKED_FILE_ID = 21

PUBLIC_FOLDER_RANGE = FOLDER_1_2_FILE_IDS
PUBLIC_FILE_IDS = list(range(55, 57))
PUBLIC_RANGE = PUBLIC_FOLDER_RANGE + PUBLIC_FILE_IDS

SHARED_DRIVE_1_URL = "https://drive.google.com/drive/folders/0AC_OJ4BkMd4kUk9PVA"
# Group 1 is given access to this folder
FOLDER_1_URL = (
    "https://drive.google.com/drive/folders/1d3I7U3vUZMDziF1OQqYRkB8Jp2s_GWUn"
)
FOLDER_1_1_URL = (
    "https://drive.google.com/drive/folders/1aR33-zwzl_mnRAwH55GgtWTE-4A4yWWI"
)
FOLDER_1_2_URL = (
    "https://drive.google.com/drive/folders/1IO0X55VhvLXf4mdxzHxuKf4wxrDBB6jq"
)
SHARED_DRIVE_2_URL = "https://drive.google.com/drive/folders/0ABKspIh7P4f4Uk9PVA"
FOLDER_2_URL = (
    "https://drive.google.com/drive/folders/1lNpCJ1teu8Se0louwL0oOHK9nEalskof"
)
FOLDER_2_1_URL = (
    "https://drive.google.com/drive/folders/1XeDOMWwxTDiVr9Ig2gKum3Zq_Wivv6zY"
)
FOLDER_2_2_URL = (
    "https://drive.google.com/drive/folders/1RKlsexA8h7NHvBAWRbU27MJotic7KXe3"
)
FOLDER_3_URL = (
    "https://drive.google.com/drive/folders/1LHibIEXfpUmqZ-XjBea44SocA91Nkveu"
)
SECTIONS_FOLDER_URL = (
    "https://drive.google.com/drive/u/5/folders/1loe6XJ-pJxu9YYPv7cF3Hmz296VNzA33"
)


def extract_folder_id_from_url(url: str) -> str:
    """Extract the folder ID from a Google Drive URL."""
    parsed = urlparse(url)
    # URL format: /drive/folders/{id} or /drive/u/{num}/folders/{id}
    parts = parsed.path.split("/")
    # Find 'folders' and take the next segment
    for i, part in enumerate(parts):
        if part == "folders" and i + 1 < len(parts):
            return parts[i + 1]
    raise ValueError(f"Could not extract folder ID from URL: {url}")


# Folder IDs extracted from URLs
SHARED_DRIVE_1_ID = extract_folder_id_from_url(SHARED_DRIVE_1_URL)
SHARED_DRIVE_2_ID = extract_folder_id_from_url(SHARED_DRIVE_2_URL)
FOLDER_1_ID = extract_folder_id_from_url(FOLDER_1_URL)
FOLDER_1_1_ID = extract_folder_id_from_url(FOLDER_1_1_URL)
FOLDER_1_2_ID = extract_folder_id_from_url(FOLDER_1_2_URL)
FOLDER_2_ID = extract_folder_id_from_url(FOLDER_2_URL)
FOLDER_2_1_ID = extract_folder_id_from_url(FOLDER_2_1_URL)
FOLDER_2_2_ID = extract_folder_id_from_url(FOLDER_2_2_URL)
FOLDER_3_ID = extract_folder_id_from_url(FOLDER_3_URL)
SECTIONS_FOLDER_ID = extract_folder_id_from_url(SECTIONS_FOLDER_URL)
RESTRICTED_ACCESS_FOLDER_ID = "1HK4wZ16ucz8QGywlcS87Y629W7i7KdeN"


# ============================================================================
# FOLDER HIERARCHY DEFINITION
# ============================================================================
# This defines the expected folder hierarchy for our test Google Drive setup.
#
# Folder Hierarchy:
# shared_drive_1 (0AC_OJ4BkMd4kUk9PVA)
#   ├── restricted_access_folder (1HK4wZ16ucz8QGywlcS87Y629W7i7KdeN)
#   └── folder_1 (1d3I7U3vUZMDziF1OQqYRkB8Jp2s_GWUn)
#       ├── folder_1_1 (1aR33-zwzl_mnRAwH55GgtWTE-4A4yWWI)
#       └── folder_1_2 (1IO0X55VhvLXf4mdxzHxuKf4wxrDBB6jq)
#
# shared_drive_2 (0ABKspIh7P4f4Uk9PVA)
#   ├── sections_folder (1loe6XJ-pJxu9YYPv7cF3Hmz296VNzA33)
#   └── folder_2 (1lNpCJ1teu8Se0louwL0oOHK9nEalskof)
#       ├── folder_2_1 (1XeDOMWwxTDiVr9Ig2gKum3Zq_Wivv6zY)
#       └── folder_2_2 (1RKlsexA8h7NHvBAWRbU27MJotic7KXe3)
# ============================================================================


@dataclass
class ExpectedHierarchyNode:
    """Expected hierarchy node for test verification."""

    raw_node_id: str
    display_name: str
    node_type: HierarchyNodeType
    # None means parent is the source root (shared drive or my drive)
    raw_parent_id: str | None = None
    children: list["ExpectedHierarchyNode"] = field(default_factory=list)


# Expected hierarchy for shared_drive_1
EXPECTED_SHARED_DRIVE_1_HIERARCHY = ExpectedHierarchyNode(
    raw_node_id=SHARED_DRIVE_1_ID,
    display_name="Shared Drive 1",
    node_type=HierarchyNodeType.SHARED_DRIVE,
    raw_parent_id=None,
    children=[
        ExpectedHierarchyNode(
            raw_node_id=RESTRICTED_ACCESS_FOLDER_ID,
            display_name="restricted_access",
            node_type=HierarchyNodeType.FOLDER,
            raw_parent_id=SHARED_DRIVE_1_ID,
        ),
        ExpectedHierarchyNode(
            raw_node_id=FOLDER_1_ID,
            display_name="folder 1",
            node_type=HierarchyNodeType.FOLDER,
            raw_parent_id=SHARED_DRIVE_1_ID,
            children=[
                ExpectedHierarchyNode(
                    raw_node_id=FOLDER_1_1_ID,
                    display_name="folder 1-1",
                    node_type=HierarchyNodeType.FOLDER,
                    raw_parent_id=FOLDER_1_ID,
                ),
                ExpectedHierarchyNode(
                    raw_node_id=FOLDER_1_2_ID,
                    display_name="folder 1-2",
                    node_type=HierarchyNodeType.FOLDER,
                    raw_parent_id=FOLDER_1_ID,
                ),
            ],
        ),
    ],
)

# Expected hierarchy for shared_drive_2
EXPECTED_SHARED_DRIVE_2_HIERARCHY = ExpectedHierarchyNode(
    raw_node_id=SHARED_DRIVE_2_ID,
    display_name="Shared Drive 2",
    node_type=HierarchyNodeType.SHARED_DRIVE,
    raw_parent_id=None,
    children=[
        ExpectedHierarchyNode(
            raw_node_id=SECTIONS_FOLDER_ID,
            display_name="sections",
            node_type=HierarchyNodeType.FOLDER,
            raw_parent_id=SHARED_DRIVE_2_ID,
        ),
        ExpectedHierarchyNode(
            raw_node_id=FOLDER_2_ID,
            display_name="folder 2",
            node_type=HierarchyNodeType.FOLDER,
            raw_parent_id=SHARED_DRIVE_2_ID,
            children=[
                ExpectedHierarchyNode(
                    raw_node_id=FOLDER_2_1_ID,
                    display_name="folder 2-1",
                    node_type=HierarchyNodeType.FOLDER,
                    raw_parent_id=FOLDER_2_ID,
                ),
                ExpectedHierarchyNode(
                    raw_node_id=FOLDER_2_2_ID,
                    display_name="folder 2-2",
                    node_type=HierarchyNodeType.FOLDER,
                    raw_parent_id=FOLDER_2_ID,
                ),
            ],
        ),
    ],
)


def flatten_hierarchy(
    expected: ExpectedHierarchyNode,
) -> dict[str, ExpectedHierarchyNode]:
    """Flatten an expected hierarchy tree into a dict keyed by raw_node_id."""
    result = {expected.raw_node_id: expected}
    for child in expected.children:
        result.update(flatten_hierarchy(child))
    return result


def _node(
    raw_node_id: str,
    display_name: str,
    node_type: HierarchyNodeType,
    raw_parent_id: str | None = None,
) -> ExpectedHierarchyNode:
    return ExpectedHierarchyNode(
        raw_node_id=raw_node_id,
        display_name=display_name,
        node_type=node_type,
        raw_parent_id=raw_parent_id,
    )


# Flattened maps for easy lookup
EXPECTED_SHARED_DRIVE_1_NODES = flatten_hierarchy(EXPECTED_SHARED_DRIVE_1_HIERARCHY)
EXPECTED_SHARED_DRIVE_2_NODES = flatten_hierarchy(EXPECTED_SHARED_DRIVE_2_HIERARCHY)

EXTERNAL_SHARED_FOLDER_URL = (
    "https://drive.google.com/drive/folders/1sWC7Oi0aQGgifLiMnhTjvkhRWVeDa-XS"
)
EXTERNAL_SHARED_FOLDER_ID = "1sWC7Oi0aQGgifLiMnhTjvkhRWVeDa-XS"
EXTERNAL_SHARED_DOCS_IN_FOLDER = [
    "https://docs.google.com/document/d/1Sywmv1-H6ENk2GcgieKou3kQHR_0te1mhIUcq8XlcdY"
]
EXTERNAL_SHARED_DOC_SINGLETON = (
    "https://docs.google.com/document/d/11kmisDfdvNcw5LYZbkdPVjTOdj-Uc5ma6Jep68xzeeA"
)

SHARED_DRIVE_3_URL = "https://drive.google.com/drive/folders/0AJYm2K_I_vtNUk9PVA"

RESTRICTED_ACCESS_FOLDER_URL = (
    "https://drive.google.com/drive/folders/1HK4wZ16ucz8QGywlcS87Y629W7i7KdeN"
)

# ============================================================================
# PERMISSION SYNC TEST DRIVES
# ============================================================================
# These are separate shared drives used specifically for testing permission sync.
# Each drive has different access levels:
#
# PERM_SYNC_DRIVE_ADMIN_ONLY: Only shared with admin
# PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A: Shared with admin and test_user_1
# PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B: Shared with admin and test_user_1
# ============================================================================

PERM_SYNC_DRIVE_ADMIN_ONLY_URL = (
    "https://drive.google.com/drive/folders/0ACOrCU1EMD1hUk9PVA"
)
PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_URL = (
    "https://drive.google.com/drive/folders/0ABec4pV29sMuUk9PVA"
)
PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_URL = (
    "https://drive.google.com/drive/folders/0ANpbToRgjHD4Uk9PVA"
)

PERM_SYNC_DRIVE_ADMIN_ONLY_ID = "0ACOrCU1EMD1hUk9PVA"
PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID = "0ABec4pV29sMuUk9PVA"
PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID = "0ANpbToRgjHD4Uk9PVA"

# ============================================================================
# ADDITIONAL DRIVES/FOLDERS ACCESSIBLE TO TEST_USER_1
# ============================================================================
# These are additional shared drives and folders that test_user_1 has access to.
# They are returned as hierarchy nodes when running the connector as test_user_1.
# ============================================================================

# Additional shared drives accessible to test_user_1
TEST_USER_1_MY_DRIVE_ID = "0AFpeuWG1VyABUk9PVA"  # My Drive indicator for test_user_1
TEST_USER_1_MY_DRIVE_FOLDER_ID = (
    "1tF10nDFND-GE_IT0f6PjEn2Du6m2k-DE"  # Child folder (partial sharing)
)

TEST_USER_1_DRIVE_B_ID = (
    "0AFskk4zfZm86Uk9PVA"  # My_super_special_shared_drive_suuuper_private
)
TEST_USER_1_DRIVE_B_FOLDER_ID = (
    "1oIj7nigzvP5xI2F8BmibUA8R_J3AbBA-"  # Child folder (silliness)
)

# Other drives test_user_1 has access to
TEST_USER_1_EXTRA_DRIVE_1_ID = "0AL67XRMq9reYUk9PVA"  # Okay_fine_admin_I_will_share
TEST_USER_1_EXTRA_DRIVE_2_ID = "0ACeKoHrGKxCbUk9PVA"  # reee test
TEST_USER_1_EXTRA_FOLDER_ID = (
    "1i2Q1TNvUfZkH-A7RGyAqRuEI-3mHANku"  # read only no download test
)

# Additional shared drives in the organization that appear when running include_all tests
ADMIN_MY_DRIVE_ID = "0ABTZwt798K7MUk9PVA"  # Admin's My Drive
TEST_USER_2_MY_DRIVE = "0ADjBZv2nEvJNUk9PVA"  # Test user 2's My Drive
TEST_USER_3_MY_DRIVE_ID = "0AKl0e4Wr5NW7Uk9PVA"  # Test user 3's My Drive
PILL_FOLDER_ID = "1FWzfA369tx9VT8scJ3LCOPBBuTBgt0OH"  # contains file with date pills

PADDING_DRIVE_URLS = [
    "0AOorXE6AfJRAUk9PVA",
    "0ANn2MSqGi74JUk9PVA",
    "0ANI_NFCPzaRwUk9PVA",
    "0ABu8fYjvA21dUk9PVA",
]

ADMIN_EMAIL = "admin@onyx-test.com"
TEST_USER_1_EMAIL = "test_user_1@onyx-test.com"
TEST_USER_2_EMAIL = "test_user_2@onyx-test.com"
TEST_USER_3_EMAIL = "test_user_3@onyx-test.com"

# Expected permissions for perm sync drives
# Maps drive ID -> set of user emails with access
PERM_SYNC_DRIVE_ACCESS_MAPPING: dict[str, set[str]] = {
    PERM_SYNC_DRIVE_ADMIN_ONLY_ID: {ADMIN_EMAIL},
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID: {ADMIN_EMAIL, TEST_USER_1_EMAIL},
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID: {ADMIN_EMAIL, TEST_USER_1_EMAIL},
}

# ============================================================================
# NON-SHARED-DRIVE HIERARCHY NODES
# ============================================================================
# These cover My Drive roots, perm sync drives, extra shared drives,
# and standalone folders that appear in various tests.
# Display names must match what the Google Drive API actually returns.
# ============================================================================

EXPECTED_FOLDER_3 = _node(
    FOLDER_3_ID, "Folder 3", HierarchyNodeType.FOLDER, ADMIN_MY_DRIVE_ID
)

EXPECTED_ADMIN_MY_DRIVE = _node(ADMIN_MY_DRIVE_ID, "My Drive", HierarchyNodeType.FOLDER)
EXPECTED_TEST_USER_1_MY_DRIVE = _node(
    TEST_USER_1_MY_DRIVE_ID, "My Drive", HierarchyNodeType.FOLDER
)
EXPECTED_TEST_USER_1_MY_DRIVE_FOLDER = _node(
    TEST_USER_1_MY_DRIVE_FOLDER_ID,
    "partial_sharing",
    HierarchyNodeType.FOLDER,
    TEST_USER_1_MY_DRIVE_ID,
)
EXPECTED_TEST_USER_2_MY_DRIVE = _node(
    TEST_USER_2_MY_DRIVE, "My Drive", HierarchyNodeType.FOLDER
)
EXPECTED_TEST_USER_3_MY_DRIVE = _node(
    TEST_USER_3_MY_DRIVE_ID, "My Drive", HierarchyNodeType.FOLDER
)

EXPECTED_PERM_SYNC_DRIVE_ADMIN_ONLY = _node(
    PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
    "perm_sync_drive_0dc9d8b5-e243-4c2f-8678-2235958f7d7c",
    HierarchyNodeType.SHARED_DRIVE,
)
EXPECTED_PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A = _node(
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
    "perm_sync_drive_785db121-0823-4ebe-8689-ad7f52405e32",
    HierarchyNodeType.SHARED_DRIVE,
)
EXPECTED_PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B = _node(
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
    "perm_sync_drive_d8dc3649-3f65-4392-b87f-4b20e0389673",
    HierarchyNodeType.SHARED_DRIVE,
)

EXPECTED_TEST_USER_1_DRIVE_B = _node(
    TEST_USER_1_DRIVE_B_ID,
    "My_super_special_shared_drive_suuuper_private",
    HierarchyNodeType.SHARED_DRIVE,
)
EXPECTED_TEST_USER_1_DRIVE_B_FOLDER = _node(
    TEST_USER_1_DRIVE_B_FOLDER_ID,
    "silliness",
    HierarchyNodeType.FOLDER,
    TEST_USER_1_DRIVE_B_ID,
)
EXPECTED_TEST_USER_1_EXTRA_DRIVE_1 = _node(
    TEST_USER_1_EXTRA_DRIVE_1_ID,
    "Okay_Admin_fine_I_will_share",
    HierarchyNodeType.SHARED_DRIVE,
)
EXPECTED_TEST_USER_1_EXTRA_DRIVE_2 = _node(
    TEST_USER_1_EXTRA_DRIVE_2_ID, "reee test", HierarchyNodeType.SHARED_DRIVE
)
EXPECTED_TEST_USER_1_EXTRA_FOLDER = _node(
    TEST_USER_1_EXTRA_FOLDER_ID,
    "read only no download test",
    HierarchyNodeType.FOLDER,
)

EXPECTED_PILL_FOLDER = _node(
    PILL_FOLDER_ID, "pill_folder", HierarchyNodeType.FOLDER, ADMIN_MY_DRIVE_ID
)
EXPECTED_EXTERNAL_SHARED_FOLDER = _node(
    EXTERNAL_SHARED_FOLDER_ID, "Onyx-test", HierarchyNodeType.FOLDER
)

# Comprehensive mapping of ALL known hierarchy nodes.
# Every retrieved node is checked against this for display_name and node_type.
ALL_EXPECTED_HIERARCHY_NODES: dict[str, ExpectedHierarchyNode] = {
    **EXPECTED_SHARED_DRIVE_1_NODES,
    **EXPECTED_SHARED_DRIVE_2_NODES,
    FOLDER_3_ID: EXPECTED_FOLDER_3,
    ADMIN_MY_DRIVE_ID: EXPECTED_ADMIN_MY_DRIVE,
    TEST_USER_1_MY_DRIVE_ID: EXPECTED_TEST_USER_1_MY_DRIVE,
    TEST_USER_1_MY_DRIVE_FOLDER_ID: EXPECTED_TEST_USER_1_MY_DRIVE_FOLDER,
    TEST_USER_2_MY_DRIVE: EXPECTED_TEST_USER_2_MY_DRIVE,
    TEST_USER_3_MY_DRIVE_ID: EXPECTED_TEST_USER_3_MY_DRIVE,
    PERM_SYNC_DRIVE_ADMIN_ONLY_ID: EXPECTED_PERM_SYNC_DRIVE_ADMIN_ONLY,
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID: EXPECTED_PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A,
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID: EXPECTED_PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B,
    TEST_USER_1_DRIVE_B_ID: EXPECTED_TEST_USER_1_DRIVE_B,
    TEST_USER_1_DRIVE_B_FOLDER_ID: EXPECTED_TEST_USER_1_DRIVE_B_FOLDER,
    TEST_USER_1_EXTRA_DRIVE_1_ID: EXPECTED_TEST_USER_1_EXTRA_DRIVE_1,
    TEST_USER_1_EXTRA_DRIVE_2_ID: EXPECTED_TEST_USER_1_EXTRA_DRIVE_2,
    TEST_USER_1_EXTRA_FOLDER_ID: EXPECTED_TEST_USER_1_EXTRA_FOLDER,
    PILL_FOLDER_ID: EXPECTED_PILL_FOLDER,
    EXTERNAL_SHARED_FOLDER_ID: EXPECTED_EXTERNAL_SHARED_FOLDER,
}

# Dictionary for access permissions
# All users have access to their own My Drive as well as public files
ACCESS_MAPPING: dict[str, list[int]] = {
    # Admin has access to everything in shared
    ADMIN_EMAIL: (
        ADMIN_FILE_IDS
        + ADMIN_FOLDER_3_FILE_IDS
        + SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + SHARED_DRIVE_2_FILE_IDS
        + FOLDER_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
        + SECTIONS_FILE_IDS
    ),
    TEST_USER_1_EMAIL: (
        TEST_USER_1_FILE_IDS
        # This user has access to drive 1
        + SHARED_DRIVE_1_FILE_IDS
        # This user has redundant access to folder 1 because of group access
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        # This user has been given shared access to folder 3 in Admin's My Drive
        + ADMIN_FOLDER_3_FILE_IDS
        # This user has been given shared access to files 0 and 1 in Admin's My Drive
        + list(range(0, 2))
    ),
    TEST_USER_2_EMAIL: (
        TEST_USER_2_FILE_IDS
        # Group 1 includes this user, giving access to folder 1
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        # This folder is public
        + FOLDER_1_2_FILE_IDS
        # Folder 2-1 is shared with this user
        + FOLDER_2_1_FILE_IDS
        # This user has been given shared access to files 45 and 46 in folder 2
        + list(range(45, 47))
    ),
    # This user can only see his own files and public files
    TEST_USER_3_EMAIL: TEST_USER_3_FILE_IDS,
}

SPECIAL_FILE_ID_TO_CONTENT_MAP: dict[int, str] = {
    61: (
        "Title\n"
        "This is a Google Doc with sections - "
        "Section 1\n"
        "Section 1 content - "
        "Sub-Section 1-1\n"
        "Sub-Section 1-1 content - "
        "Sub-Section 1-2\n"
        "Sub-Section 1-2 content - "
        "Section 2\n"
        "Section 2 content"
    ),
}

MISC_SHARED_DRIVE_FNAMES = [
    "asdfasdfsfad",
    "perm_sync_doc_0ABec4pV29sMuUk9PVA_a5ea8ec4-0440-4926-a43d-3aeef1c10bdd",
    "perm_sync_doc_0ACOrCU1EMD1hUk9PVA_651821cb-8140-42fe-a876-1a92012375c9",
    "perm_sync_doc_0ACOrCU1EMD1hUk9PVA_ab63b976-effb-49af-84e7-423d17a17dd7",
    "super secret thing that test user 1 can't see",
    "perm_sync_doc_0ABec4pV29sMuUk9PVA_419f2ef0-9815-4c69-8435-98b163c9c156",
    "Untitled documentfsdfsdfsdf",
    "bingle_bongle.txt",
    "bb4.txt",
    "bb3.txt",
    "bb2.txt",
]

file_name_template = "file_{}.txt"
file_text_template = "This is file {}"

# This is done to prevent different tests from interfering with each other
# So each test type should have its own valid prefix
_VALID_PREFIX = "file_"


def filter_invalid_prefixes(names: set[str]) -> set[str]:
    return {name for name in names if name.startswith(_VALID_PREFIX)}


def print_discrepancies(
    expected: set[str],
    retrieved: set[str],
) -> None:
    if expected != retrieved:
        expected_list = sorted(expected)
        retrieved_list = sorted(retrieved)
        print(expected_list)
        print(retrieved_list)
        print("Extra:")
        print(sorted(retrieved - expected))
        print("Missing:")
        print(sorted(expected - retrieved))


def _get_expected_file_content(file_id: int) -> str:
    if file_id in SPECIAL_FILE_ID_TO_CONTENT_MAP:
        return SPECIAL_FILE_ID_TO_CONTENT_MAP[file_id]

    return file_text_template.format(file_id)


def id_to_name(file_id: int) -> str:
    return file_name_template.format(file_id)


def assert_expected_docs_in_retrieved_docs(
    retrieved_docs: list[Document],
    expected_file_ids: Sequence[int],
) -> None:
    """NOTE: as far as i can tell this does NOT assert for an exact match.
    it only checks to see if that the expected file id's are IN the retrieved doc list
    """

    expected_file_names = {id_to_name(file_id) for file_id in expected_file_ids}
    expected_file_texts = {
        _get_expected_file_content(file_id) for file_id in expected_file_ids
    }

    retrieved_docs.sort(key=lambda x: x.semantic_identifier)

    for doc in retrieved_docs:
        print(f"retrieved doc: doc.semantic_identifier={doc.semantic_identifier}")

    # Filter out invalid prefixes to prevent different tests from interfering with each other
    valid_retrieved_docs = [
        doc
        for doc in retrieved_docs
        if doc.semantic_identifier.startswith(_VALID_PREFIX)
    ]
    valid_retrieved_file_names = set(
        [doc.semantic_identifier for doc in valid_retrieved_docs]
    )
    valid_retrieved_texts = set(
        [
            " - ".join(
                [
                    section.text
                    for section in doc.sections
                    if isinstance(section, TextSection) and section.text is not None
                ]
            )
            for doc in valid_retrieved_docs
        ]
    )

    # Check file names
    print_discrepancies(
        expected=expected_file_names,
        retrieved=valid_retrieved_file_names,
    )
    assert expected_file_names == valid_retrieved_file_names

    # Check file texts
    print_discrepancies(
        expected=expected_file_texts,
        retrieved=valid_retrieved_texts,
    )
    assert expected_file_texts == valid_retrieved_texts


def load_connector_outputs(
    connector: GoogleDriveConnector,
    include_permissions: bool = False,
) -> ConnectorOutput:
    """Load all documents, failures, and hierarchy nodes from the connector."""
    return load_all_from_connector(
        connector,
        0,
        time.time(),
        include_permissions=include_permissions,
    )


def assert_hierarchy_nodes_match_expected(
    retrieved_nodes: list[HierarchyNode],
    expected_nodes: dict[str, ExpectedHierarchyNode],
    ignorable_node_ids: set[str] | None = None,
) -> None:
    """
    Assert that retrieved hierarchy nodes match expected structure.

    Checks node IDs, display names, node types, and parent relationships
    for EVERY retrieved node (global checks).

    Args:
        retrieved_nodes: List of HierarchyNode objects from the connector
        expected_nodes: Dict mapping raw_node_id -> ExpectedHierarchyNode with
            expected display_name, node_type, and raw_parent_id
        ignorable_node_ids: Optional set of node IDs that can be missing or extra
            without failing. Useful for non-deterministically returned nodes.
    """
    expected_node_ids = set(expected_nodes.keys())
    retrieved_node_ids = {node.raw_node_id for node in retrieved_nodes}
    ignorable = ignorable_node_ids or set()

    missing = expected_node_ids - retrieved_node_ids - ignorable
    extra = retrieved_node_ids - expected_node_ids - ignorable

    if missing or extra:
        print("Expected hierarchy node IDs:")
        print(sorted(expected_node_ids))
        print("Retrieved hierarchy node IDs:")
        print(sorted(retrieved_node_ids))
        print("Extra (retrieved but not expected):")
        print(sorted(retrieved_node_ids - expected_node_ids))
        print("Missing (expected but not retrieved):")
        print(sorted(expected_node_ids - retrieved_node_ids))
        if ignorable:
            print("Ignorable node IDs:")
            print(sorted(ignorable))

    assert (
        not missing and not extra
    ), f"Hierarchy node mismatch. Missing: {missing}, Extra: {extra}"

    for node in retrieved_nodes:
        if node.raw_node_id in ignorable and node.raw_node_id not in expected_nodes:
            continue

        assert (
            node.raw_node_id in expected_nodes
        ), f"Node {node.raw_node_id} ({node.display_name}) not found in expected_nodes"
        expected = expected_nodes[node.raw_node_id]

        assert (
            node.display_name == expected.display_name
        ), f"Display name mismatch for node {node.raw_node_id}: expected '{expected.display_name}', got '{node.display_name}'"
        assert (
            node.node_type == expected.node_type
        ), f"Node type mismatch for node {node.raw_node_id}: expected '{expected.node_type}', got '{node.node_type}'"
        if expected.raw_parent_id is not None:
            assert node.raw_parent_id == expected.raw_parent_id, (
                f"Parent mismatch for node {node.raw_node_id} ({node.display_name}): "
                f"expected parent={expected.raw_parent_id}, got parent={node.raw_parent_id}"
            )


def _pick(
    *node_ids: str,
) -> dict[str, ExpectedHierarchyNode]:
    """Pick nodes from ALL_EXPECTED_HIERARCHY_NODES by their IDs."""
    return {nid: ALL_EXPECTED_HIERARCHY_NODES[nid] for nid in node_ids}


def _clear_parents(
    nodes: dict[str, ExpectedHierarchyNode],
    *node_ids: str,
) -> dict[str, ExpectedHierarchyNode]:
    """Return a shallow copy of nodes with the specified nodes' parents set to None.
    Useful for OAuth tests where the user can't resolve certain parents
    (e.g. a folder in another user's My Drive)."""
    result = dict(nodes)
    for nid in node_ids:
        result[nid] = replace(result[nid], raw_parent_id=None)
    return result


def get_expected_hierarchy_for_shared_drives(
    include_drive_1: bool = True,
    include_drive_2: bool = True,
    include_restricted_folder: bool = True,
) -> dict[str, ExpectedHierarchyNode]:
    """Get expected hierarchy nodes for shared drives."""
    result: dict[str, ExpectedHierarchyNode] = {}

    if include_drive_1:
        result.update(EXPECTED_SHARED_DRIVE_1_NODES)
        if not include_restricted_folder:
            result.pop(RESTRICTED_ACCESS_FOLDER_ID, None)

    if include_drive_2:
        result.update(EXPECTED_SHARED_DRIVE_2_NODES)

    return result


def get_expected_hierarchy_for_folder_1() -> dict[str, ExpectedHierarchyNode]:
    """Get expected hierarchy for folder_1 and its children only."""
    return _pick(FOLDER_1_ID, FOLDER_1_1_ID, FOLDER_1_2_ID)


def get_expected_hierarchy_for_folder_2() -> dict[str, ExpectedHierarchyNode]:
    """Get expected hierarchy for folder_2 and its children only."""
    return _pick(FOLDER_2_ID, FOLDER_2_1_ID, FOLDER_2_2_ID)


def get_expected_hierarchy_for_test_user_1() -> dict[str, ExpectedHierarchyNode]:
    """
    Get expected hierarchy for test_user_1's full access (OAuth).

    test_user_1 has access to:
    - shared_drive_1 and its contents (folder_1, folder_1_1, folder_1_2)
    - folder_3 (shared from admin's My Drive)
    - PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A and PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B
    - Additional drives/folders the user has access to

    NOTE: Folder 3 lives in the admin's My Drive. When running as an OAuth
    connector for test_user_1, the Google Drive API won't return the parent
    for Folder 3 because the user can't access the admin's My Drive root.
    """
    result = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=False,
        include_restricted_folder=False,
    )
    result.update(
        _pick(
            FOLDER_3_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
            TEST_USER_1_MY_DRIVE_ID,
            TEST_USER_1_MY_DRIVE_FOLDER_ID,
            TEST_USER_1_DRIVE_B_ID,
            TEST_USER_1_DRIVE_B_FOLDER_ID,
            TEST_USER_1_EXTRA_DRIVE_1_ID,
            TEST_USER_1_EXTRA_DRIVE_2_ID,
            TEST_USER_1_EXTRA_FOLDER_ID,
        )
    )
    return _clear_parents(result, FOLDER_3_ID)


def get_expected_hierarchy_for_test_user_1_shared_drives_only() -> (
    dict[str, ExpectedHierarchyNode]
):
    """Expected hierarchy nodes when test_user_1 runs with include_shared_drives=True only."""
    result = get_expected_hierarchy_for_test_user_1()
    for nid in (
        TEST_USER_1_MY_DRIVE_ID,
        TEST_USER_1_MY_DRIVE_FOLDER_ID,
        FOLDER_3_ID,
        TEST_USER_1_EXTRA_FOLDER_ID,
    ):
        result.pop(nid, None)
    return result


def get_expected_hierarchy_for_test_user_1_shared_with_me_only() -> (
    dict[str, ExpectedHierarchyNode]
):
    """Expected hierarchy nodes when test_user_1 runs with include_files_shared_with_me=True only."""
    return _clear_parents(
        _pick(FOLDER_3_ID, TEST_USER_1_EXTRA_FOLDER_ID),
        FOLDER_3_ID,
    )


def get_expected_hierarchy_for_test_user_1_my_drive_only() -> (
    dict[str, ExpectedHierarchyNode]
):
    """Expected hierarchy nodes when test_user_1 runs with include_my_drives=True only."""
    return _pick(TEST_USER_1_MY_DRIVE_ID, TEST_USER_1_MY_DRIVE_FOLDER_ID)


================================================
FILE: backend/tests/daily/connectors/google_drive/drive_id_mapping.json
================================================
{
  "12": "https://drive.google.com/file/d/1u7nynrG4WuFZeuZs8yyhqJF_lbo-op-m",
  "10": "https://drive.google.com/file/d/1LFcVuXuXIdNJ7hkL0C40eYn_cQtryUVQ",
  "13": "https://drive.google.com/file/d/1muQMyYAJe0_F-HiDFIfFMt-4qsgMlREM",
  "11": "https://drive.google.com/file/d/1oHNtlsdJJtk7dE10NgH83Kn5_f2L-Su1",
  "14": "https://drive.google.com/file/d/1sAw-DrsqpnqLF5A8P59BZwIpt9-LrlaL",
  "18": "https://drive.google.com/file/d/1qqKH3esasdqV6ryEhdoSQezDPlKj11At",
  "17": "https://drive.google.com/file/d/1z08VsrCUTozpc5Quzb7mEDUwNkXU3foT",
  "15": "https://drive.google.com/file/d/1QQ6ZGyYP49IJNeGKNmqZISyVLzTOtK4v",
  "19": "https://drive.google.com/file/d/172as_pb7E15bXUd63mIIBRotk_tT7h56",
  "16": "https://drive.google.com/file/d/1552S6HEjJ81q8JXr46BtixQiVq9xlW_I",
  "5": "https://drive.google.com/file/d/1sv9epxLcNlgM6C-oPDeD_heFw7AIZMgp",
  "7": "https://drive.google.com/file/d/1S_S0LpQW90EUPPPjJX4jfu5p9gOQjiQF",
  "9": "https://drive.google.com/file/d/1wH2dBrWzmiGJ88ySHWu6srb7Jsj7qYbA",
  "8": "https://drive.google.com/file/d/14URUm6RKSZziH1lUtT6gs-xnCTWkXpSn",
  "6": "https://drive.google.com/file/d/1LBKBuTMRSss-kVw8ut3rMk51wSbTM95j",
  "3": "https://drive.google.com/file/d/1nNazkPrkuRXHFOl8gdA68pU2g8cy-h6n",
  "2": "https://drive.google.com/file/d/1miG_QpqXe2QIMApcrlNzaB6fsXW5WMFX",
  "4": "https://drive.google.com/file/d/1o-i8can6ciL1XXzy2pVUPHZEXEjBJi6C",
  "0": "https://drive.google.com/file/d/1d3Y59Sns8I0FIW9CtOAjVVLE2MEe_3nP",
  "1": "https://drive.google.com/file/d/1ipSqxJajs_NkfSKFxgltIMNc0ffdt-NX",
  "68": "https://drive.google.com/file/d/1rCBZsbhQ-ULWGztiKB0JYhFth9EChiSZ",
  "66": "https://drive.google.com/file/d/1WVAlbWcu9-Braa0aG6w3cShrY5dbIYcY",
  "67": "https://drive.google.com/file/d/1p44poOCdNLnVYMxTL9b3h-BXsOQ2RDgM",
  "69": "https://drive.google.com/file/d/1HFYsaqC14aE-EaobQdwkw0FOlAYMYqkV",
  "65": "https://drive.google.com/file/d/1RyE07CpTIDYMO3b-atwjWH6ZHFDjyoCl",
  "32": "https://drive.google.com/file/d/17egJ5W-0bvS2akLBqvxylTIViN0d9nG7",
  "28": "https://drive.google.com/file/d/1HNqSM2XGqgHnyNYT5wp8hyski18HMcfO",
  "37": "https://drive.google.com/file/d/16Tdu3gveWkFL0VBUzYSzKxFO4ffv-8h7",
  "30": "https://drive.google.com/file/d/1uj69jGyYnNOXXqKmLNIp-4KKrVC1qaPy",
  "25": "https://drive.google.com/file/d/1bw6NFlR4ZxOV6reQK1Oqeq_UaYFVpNV6",
  "33": "https://drive.google.com/file/d/1FkmXBkt__lOFXg_uhxLI0QIuxWbIGySL",
  "20": "https://drive.google.com/file/d/1r77uBVOHkuiDQFa9iz9FU8QbfjImOAjF",
  "24": "https://drive.google.com/file/d/1kwLrdhTgCdjNrOcSwRI14K3gXnS48xne",
  "39": "https://drive.google.com/file/d/1V3av9F47t44Nf3jcO12U6OIsjsX-B7L1",
  "29": "https://drive.google.com/file/d/172dCAUNaaoZX0RHqEi7Ev12eV930LtTa",
  "31": "https://drive.google.com/file/d/17zzfgMSWBVebWGnpSHKd6g1LFN4vn-YP",
  "38": "https://drive.google.com/file/d/1xOQvIBlBJ2swTGp78WkCZJUQ-d1F8pVu",
  "23": "https://drive.google.com/file/d/1X89y_CoTWWjh3BWq0ZgeGydCvg3gMZeJ",
  "34": "https://drive.google.com/file/d/1VNDhcbA_-Ckjp084hKyl9bwP4E3l9K_2",
  "47": "https://drive.google.com/file/d/1O8E7haA8WcJIma0iKcvebd4_dlC5Zr7S",
  "52": "https://drive.google.com/file/d/1o-ateliXHj4TyugOxb9zYYXwrkhFl4FX",
  "27": "https://drive.google.com/file/d/1aZ1CwNVWJt_OtIBVO-9zv1UUqXTDlM1F",
  "26": "https://drive.google.com/file/d/1qegrc27hYeECs0KexnEuuG0WQm-8Y9oZ",
  "59": "https://drive.google.com/file/d/1L9oWKHMTjQreGW_k8rNy7kBQ7c0FuXFm",
  "35": "https://drive.google.com/file/d/1NewjF092B9KKDBs-dpnZ9dzVl2GAs2LW",
  "49": "https://drive.google.com/file/d/1TsUrBlr2nxJtH122nKQ_GzdMc0DFFERB",
  "41": "https://drive.google.com/file/d/1gc2Vo3HZF-Bm_WhZ0zyFedWNfVL2BEol",
  "22": "https://drive.google.com/file/d/1iPfQeganYriuqHO2e5npUPeuX5VIbhG3",
  "36": "https://drive.google.com/file/d/1KyNoHRTfGMNR15dCRpcVW74l2z-wVm0V",
  "44": "https://drive.google.com/file/d/1PDuxwmrD20s54FHQIhXn3ucdFmXSX5kS",
  "21": "https://drive.google.com/file/d/1ZwO5cCfBJgGpZTIpoi8p2js8zuHT_qxe",
  "53": "https://drive.google.com/file/d/140NZAuAOoiqrNVqWmF4TPNv6njd_guwE",
  "50": "https://drive.google.com/file/d/1MBmy7nQi7pMwwIPZHJjB_iuQeO07QWsN",
  "54": "https://drive.google.com/file/d/1TtIJ-ULYWyv0yUvUVdfTPuBNlBt_j1Yd",
  "57": "https://drive.google.com/file/d/19V5d3NcR029AhGiRibk2nlTmFNCVGBgO",
  "43": "https://drive.google.com/file/d/1kLChcxIWZS_kHLEHThLcm7ekcgwYP0jF",
  "42": "https://drive.google.com/file/d/1HKW3C1B5vFYUuXmFieMKYAfq4CwtnEZ_",
  "48": "https://drive.google.com/file/d/1EJGd47XpWZDXJKWU0CGp84Hm7K47GNVt",
  "40": "https://drive.google.com/file/d/1Fr4dVKdOvth_O-Td8PTwgNGzZz8ridAl",
  "58": "https://drive.google.com/file/d/1lUFpiwE7ISzLbowHvCtEUj4sfG4w0Gst",
  "51": "https://drive.google.com/file/d/1V6fOoKgA8QSTJYWPP5GVHz8WFAQIRLNB",
  "45": "https://drive.google.com/file/d/1hSrPOwyxFEth4GWWN1e4BjBftmnKa8px",
  "46": "https://drive.google.com/file/d/1jCynzDt1r0EISpwcrFuk3RlKWHM9u7Mj",
  "55": "https://drive.google.com/file/d/1Db01f4I_Xn8Bs9piQgZU59ZWAeC2MaQm",
  "56": "https://drive.google.com/file/d/1NxVfwIxm6FVVR1XnxQNMWWbQEVX66cQm",
  "61": "https://docs.google.com/document/d/1eAaZJAqjXMZ2VvG_r04EGtn6EGcYycofdNUkDHEA8vY"
}

================================================
FILE: backend/tests/daily/connectors/google_drive/test_admin_oauth.py
================================================
from collections.abc import Callable
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.connectors.google_drive.connector import GoogleDriveConnector
from tests.daily.connectors.google_drive.consts_and_utils import _pick
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_MY_DRIVE_ID
from tests.daily.connectors.google_drive.consts_and_utils import (
    assert_expected_docs_in_retrieved_docs,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    assert_hierarchy_nodes_match_expected,
)
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_ID
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_URL
from tests.daily.connectors.google_drive.consts_and_utils import (
    get_expected_hierarchy_for_shared_drives,
)
from tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import PILL_FOLDER_ID
from tests.daily.connectors.google_drive.consts_and_utils import (
    RESTRICTED_ACCESS_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FOLDER_ID
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_EXTRA_DRIVE_1_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_EXTRA_DRIVE_2_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_EXTRA_FOLDER_ID,
)


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_include_all(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_include_all")
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=True,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        my_drive_emails=None,
        shared_drive_urls=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = (
        ADMIN_FILE_IDS
        + ADMIN_FOLDER_3_FILE_IDS
        + SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + SHARED_DRIVE_2_FILE_IDS
        + FOLDER_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
        + SECTIONS_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=True,
        include_restricted_folder=False,
    )
    expected_nodes.update(
        _pick(
            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
            TEST_USER_1_EXTRA_DRIVE_1_ID,
            TEST_USER_1_EXTRA_DRIVE_2_ID,
            ADMIN_MY_DRIVE_ID,
            PILL_FOLDER_ID,
            RESTRICTED_ACCESS_FOLDER_ID,
            TEST_USER_1_EXTRA_FOLDER_ID,
            FOLDER_3_ID,
        )
    )
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_include_shared_drives_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_include_shared_drives_only")
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        my_drive_emails=None,
        shared_drive_urls=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = (
        SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + SHARED_DRIVE_2_FILE_IDS
        + FOLDER_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
        + SECTIONS_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=True,
        include_restricted_folder=False,
    )
    expected_nodes.update(
        _pick(
            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
            TEST_USER_1_EXTRA_DRIVE_1_ID,
            TEST_USER_1_EXTRA_DRIVE_2_ID,
            RESTRICTED_ACCESS_FOLDER_ID,
        )
    )
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_include_my_drives_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_include_my_drives_only")
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=True,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        my_drive_emails=None,
        shared_drive_urls=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = ADMIN_FILE_IDS + ADMIN_FOLDER_3_FILE_IDS
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = _pick(
        FOLDER_3_ID,
        ADMIN_MY_DRIVE_ID,
        PILL_FOLDER_ID,
        TEST_USER_1_EXTRA_FOLDER_ID,
    )
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_drive_one_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_drive_one_only")
    drive_urls = [SHARED_DRIVE_1_URL]
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        my_drive_emails=None,
        shared_drive_urls=",".join([str(url) for url in drive_urls]),
    )
    output = load_connector_outputs(connector)

    expected_file_ids = (
        SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=False,
        include_restricted_folder=False,
    )
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_folder_and_shared_drive(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_folder_and_shared_drive")
    drive_urls = [SHARED_DRIVE_1_URL]
    folder_urls = [FOLDER_2_URL]
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=",".join([str(url) for url in folder_urls]),
        my_drive_emails=None,
        shared_drive_urls=",".join([str(url) for url in drive_urls]),
    )
    output = load_connector_outputs(connector)

    expected_file_ids = (
        SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + FOLDER_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=True,
        include_restricted_folder=False,
    )
    expected_nodes.pop(SECTIONS_FOLDER_ID, None)
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_folders_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_folders_only")
    folder_urls = [
        FOLDER_1_2_URL,
        FOLDER_2_1_URL,
        FOLDER_2_2_URL,
        FOLDER_3_URL,
    ]
    shared_drive_urls = [
        FOLDER_1_1_URL,
    ]
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=",".join([str(url) for url in folder_urls]),
        my_drive_emails=None,
        shared_drive_urls=",".join([str(url) for url in shared_drive_urls]),
    )
    output = load_connector_outputs(connector)

    expected_file_ids = (
        FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
        + ADMIN_FOLDER_3_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=True,
        include_restricted_folder=False,
    )
    expected_nodes.pop(SECTIONS_FOLDER_ID, None)
    expected_nodes.update(_pick(ADMIN_MY_DRIVE_ID, FOLDER_3_ID))
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_personal_folders_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_personal_folders_only")
    folder_urls = [
        FOLDER_3_URL,
    ]
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=",".join([str(url) for url in folder_urls]),
        my_drive_emails=None,
        shared_drive_urls=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = ADMIN_FOLDER_3_FILE_IDS
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = _pick(FOLDER_3_ID, ADMIN_MY_DRIVE_ID)
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
    )


================================================
FILE: backend/tests/daily/connectors/google_drive/test_drive_perm_sync.py
================================================
import copy
import json
import os
from collections import defaultdict
from collections.abc import Callable
from unittest.mock import MagicMock
from unittest.mock import patch

from ee.onyx.external_permissions.google_drive.doc_sync import gdrive_doc_sync
from ee.onyx.external_permissions.google_drive.group_sync import gdrive_group_sync
from onyx.access.models import DocExternalAccess
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.utils import DocumentRow
from onyx.db.utils import SortOrder
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from tests.daily.connectors.google_drive.consts_and_utils import _pick
from tests.daily.connectors.google_drive.consts_and_utils import ACCESS_MAPPING
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_MY_DRIVE_ID
from tests.daily.connectors.google_drive.consts_and_utils import (
    assert_hierarchy_nodes_match_expected,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    EXTERNAL_SHARED_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    FOLDER_3_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    get_expected_hierarchy_for_shared_drives,
)
from tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ACCESS_MAPPING,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PILL_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import PUBLIC_RANGE
from tests.daily.connectors.google_drive.consts_and_utils import (
    RESTRICTED_ACCESS_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_DRIVE_B_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_DRIVE_B_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_EXTRA_DRIVE_1_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_EXTRA_DRIVE_2_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_EXTRA_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_MY_DRIVE_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_MY_DRIVE_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_2_MY_DRIVE,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_3_MY_DRIVE_ID,
)


def _build_connector(
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> GoogleDriveConnector:
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=True,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    # don't need this anymore, it's been called in the factory
    connector.load_credentials = MagicMock()  # type: ignore
    return connector


def test_gdrive_perm_sync_with_real_data(
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
    enable_ee: None,  # noqa: ARG001
) -> None:
    """
    Test gdrive_doc_sync and gdrive_group_sync with real data from the test drive.

    This test uses the real connector to make actual API calls to Google Drive
    and verifies the permission structure returned.
    """
    # Create a mock cc_pair that will use our real connector
    mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)
    mock_cc_pair.connector = MagicMock()
    mock_cc_pair.connector.connector_specific_config = {}
    mock_cc_pair.credential_id = 1
    # Import and use the mock helper
    from onyx.utils.sensitive import make_mock_sensitive_value

    mock_cc_pair.credential.credential_json = make_mock_sensitive_value({})
    mock_cc_pair.last_time_perm_sync = None
    mock_cc_pair.last_time_external_group_sync = None

    # Create a mock heartbeat
    mock_heartbeat = MagicMock(spec=IndexingHeartbeatInterface)
    mock_heartbeat.should_stop.return_value = False

    # Load drive_id_mapping.json
    with open(
        os.path.join(os.path.dirname(__file__), "drive_id_mapping.json"), "r"
    ) as f:
        drive_id_mapping = json.load(f)

    # Invert the mapping to get URL -> ID
    url_to_id_mapping = {url: int(id) for id, url in drive_id_mapping.items()}

    # Use the connector directly without mocking Google Drive API calls
    with patch(
        "ee.onyx.external_permissions.google_drive.doc_sync.GoogleDriveConnector",
        return_value=_build_connector(google_drive_service_acct_connector_factory),
    ):
        # Call the function under test
        def mock_fetch_all_docs_fn(
            sort_order: SortOrder | None = None,  # noqa: ARG001
        ) -> list[DocumentRow]:
            return []

        def mock_fetch_all_docs_ids_fn() -> list[str]:
            return []

        doc_access_generator = gdrive_doc_sync(
            mock_cc_pair,
            mock_fetch_all_docs_fn,
            mock_fetch_all_docs_ids_fn,
            mock_heartbeat,
        )
        doc_access_list = list(doc_access_generator)

    # Verify we got some results
    assert len(doc_access_list) > 0
    print(f"Found {len(doc_access_list)} documents with permissions")

    # create new connector
    with patch(
        "ee.onyx.external_permissions.google_drive.group_sync.GoogleDriveConnector",
        return_value=_build_connector(google_drive_service_acct_connector_factory),
    ):
        external_user_group_generator = gdrive_group_sync("test_tenant", mock_cc_pair)
        external_user_groups = list(external_user_group_generator)

    # map group ids to emails
    group_id_to_email_mapping: dict[str, set[str]] = defaultdict(set)
    groups_with_anyone_access: set[str] = set()
    for group in external_user_groups:
        for email in group.user_emails:
            group_id_to_email_mapping[group.id].add(email)

        if group.gives_anyone_access:
            groups_with_anyone_access.add(group.id)

    # Map documents to their permissions (flattening groups)
    doc_to_email_mapping: dict[str, set[str]] = {}
    doc_to_raw_result_mapping: dict[str, set[str]] = {}
    public_doc_ids: set[str] = set()

    for doc_access in doc_access_list:
        if not isinstance(doc_access, DocExternalAccess):
            continue
        doc_id = doc_access.doc_id
        # make sure they are new sets to avoid mutating the original
        doc_to_email_mapping[doc_id] = copy.deepcopy(
            doc_access.external_access.external_user_emails
        )
        doc_to_raw_result_mapping[doc_id] = copy.deepcopy(
            doc_access.external_access.external_user_emails
        )

        for group_id in doc_access.external_access.external_user_group_ids:
            doc_to_email_mapping[doc_id].update(group_id_to_email_mapping[group_id])
            doc_to_raw_result_mapping[doc_id].add(group_id)

        if doc_access.external_access.is_public:
            public_doc_ids.add(doc_id)

        if any(
            group_id in groups_with_anyone_access
            for group_id in doc_access.external_access.external_user_group_ids
        ):
            public_doc_ids.add(doc_id)

    # Check permissions based on drive_id_mapping.json and ACCESS_MAPPING
    # For each document URL that exists in our mapping
    checked_files = 0
    for doc_id, emails_with_access in doc_to_email_mapping.items():
        # Skip URLs that aren't in our mapping, we don't want new stuff to interfere
        # with the test.
        if doc_id not in url_to_id_mapping:
            continue

        file_numeric_id = url_to_id_mapping.get(doc_id)
        if file_numeric_id is None:
            raise ValueError(f"File {doc_id} not found in drive_id_mapping.json")

        checked_files += 1

        # Check which users should have access to this file according to ACCESS_MAPPING
        expected_users = set()
        for user_email, file_ids in ACCESS_MAPPING.items():
            if file_numeric_id in file_ids:
                expected_users.add(user_email)

        # Verify the permissions match
        if file_numeric_id in PUBLIC_RANGE:
            assert (
                doc_id in public_doc_ids
            ), f"File {doc_id} (ID: {file_numeric_id}) should be public but is not in the public_doc_ids set"
        else:
            assert expected_users == emails_with_access, (
                f"File {doc_id} (ID: {file_numeric_id}) should be accessible to users {expected_users} "
                f"but is accessible to {emails_with_access}. Raw result: {doc_to_raw_result_mapping[doc_id]} "
            )

    # Verify that we checked every file in ACCESS_MAPPING
    all_expected_files = set()
    for file_ids in ACCESS_MAPPING.values():
        all_expected_files.update(file_ids)

    checked_file_ids = {
        url_to_id_mapping[doc_id]
        for doc_id in doc_to_email_mapping
        if doc_id in url_to_id_mapping
    }

    assert all_expected_files == checked_file_ids, (
        f"Not all expected files were checked. "
        f"Missing files: {all_expected_files - checked_file_ids}, "
        f"Extra files checked: {checked_file_ids - all_expected_files}"
    )

    print(f"Checked permissions for {checked_files} files from drive_id_mapping.json")

    # Verify hierarchy nodes are returned with correct structure
    # Use include_permissions=True to populate external_access on hierarchy nodes
    hierarchy_connector = _build_connector(google_drive_service_acct_connector_factory)
    output = load_connector_outputs(hierarchy_connector, include_permissions=True)

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=True,
        include_restricted_folder=False,
    )
    expected_nodes.update(
        _pick(
            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
            TEST_USER_1_MY_DRIVE_ID,
            TEST_USER_1_MY_DRIVE_FOLDER_ID,
            TEST_USER_1_DRIVE_B_ID,
            TEST_USER_1_DRIVE_B_FOLDER_ID,
            TEST_USER_1_EXTRA_DRIVE_1_ID,
            TEST_USER_1_EXTRA_DRIVE_2_ID,
            ADMIN_MY_DRIVE_ID,
            TEST_USER_2_MY_DRIVE,
            TEST_USER_3_MY_DRIVE_ID,
            PILL_FOLDER_ID,
            RESTRICTED_ACCESS_FOLDER_ID,
            TEST_USER_1_EXTRA_FOLDER_ID,
            EXTERNAL_SHARED_FOLDER_ID,
            FOLDER_3_ID,
        )
    )
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},
    )

    # Verify the perm sync drives are included in the hierarchy
    # These drives should have external_access set on their hierarchy nodes
    perm_sync_drive_nodes = [
        node
        for node in output.hierarchy_nodes
        if node.raw_node_id
        in {
            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
        }
    ]

    # Verify permissions on perm sync drive hierarchy nodes
    for node in perm_sync_drive_nodes:
        assert (
            node.external_access is not None
        ), f"Hierarchy node {node.raw_node_id} has no external access"
        expected_emails = PERM_SYNC_DRIVE_ACCESS_MAPPING.get(node.raw_node_id, set())
        actual_emails = node.external_access.external_user_emails
        assert actual_emails == expected_emails, (
            f"Permission mismatch for perm sync drive {node.raw_node_id} ({node.display_name}): "
            f"expected {expected_emails}, got {actual_emails}"
        )

    print(f"Verified {len(output.hierarchy_nodes)} hierarchy nodes")


================================================
FILE: backend/tests/daily/connectors/google_drive/test_link_visibility_filter.py
================================================
from collections.abc import Iterable
from typing import Any
from unittest.mock import patch

from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_drive.file_retrieval import has_link_only_permission
from onyx.connectors.google_drive.models import DriveRetrievalStage
from onyx.connectors.google_drive.models import RetrievedDriveFile


def _stub_run_functions(
    func_with_args: Iterable[tuple],
    max_workers: int = 8,  # noqa: ARG001
) -> list[Any]:
    return [func(*args) for func, args in func_with_args]


def _build_retrieved_file(
    permissions: list[dict[str, Any]],
) -> RetrievedDriveFile:
    return RetrievedDriveFile(
        completion_stage=DriveRetrievalStage.OAUTH_FILES,
        drive_file={
            "id": "file-id",
            "name": "Test File",
            "permissions": permissions,
        },
        user_email="user@example.com",
    )


def _prepare_connector(exclude: bool) -> GoogleDriveConnector:
    connector = GoogleDriveConnector(
        include_shared_drives=True,
        exclude_domain_link_only=exclude,
    )
    connector._creds = object()  # type: ignore[assignment]
    connector._primary_admin_email = "admin@example.com"
    return connector


def test_has_link_only_permission_detects_domain_link() -> None:
    file = {
        "permissions": [
            {"type": "domain", "allowFileDiscovery": False},
            {"type": "user", "emailAddress": "user@example.com"},
        ]
    }
    assert has_link_only_permission(file) is True


def test_has_link_only_permission_detects_anyone_link() -> None:
    file = {
        "permissions": [
            {"type": "anyone", "allowFileDiscovery": False},
        ]
    }
    assert has_link_only_permission(file) is True


def test_has_link_only_permission_ignores_other_permissions() -> None:
    file = {
        "permissions": [
            {"type": "domain", "allowFileDiscovery": True},
            {"type": "user", "emailAddress": "user@example.com"},
        ]
    }
    assert has_link_only_permission(file) is False


def test_connector_skips_link_only_files_when_enabled() -> None:
    connector = _prepare_connector(exclude=True)
    retrieved_file = _build_retrieved_file(
        [{"type": "domain", "allowFileDiscovery": False}]
    )

    with (
        patch(
            "onyx.connectors.google_drive.connector.run_functions_tuples_in_parallel",
            side_effect=_stub_run_functions,
        ),
        patch(
            "onyx.connectors.google_drive.connector.convert_drive_item_to_document"
        ) as convert_mock,
        patch(
            "onyx.connectors.google_drive.connector.GoogleDriveConnector._get_new_ancestors_for_files"
        ) as get_new_ancestors_mock,
    ):
        convert_mock.return_value = "doc"
        checkpoint = connector.build_dummy_checkpoint()
        results = list(
            connector._convert_retrieved_files_to_documents(
                drive_files_iter=iter([retrieved_file]),
                checkpoint=checkpoint,
                include_permissions=False,
            )
        )

    assert results == []
    convert_mock.assert_not_called()
    get_new_ancestors_mock.assert_called_once()


def test_connector_processes_files_when_option_disabled() -> None:
    connector = _prepare_connector(exclude=False)
    retrieved_file = _build_retrieved_file(
        [{"type": "domain", "allowFileDiscovery": False}]
    )

    with (
        patch(
            "onyx.connectors.google_drive.connector.run_functions_tuples_in_parallel",
            side_effect=_stub_run_functions,
        ),
        patch(
            "onyx.connectors.google_drive.connector.convert_drive_item_to_document"
        ) as convert_mock,
        patch(
            "onyx.connectors.google_drive.connector.GoogleDriveConnector._get_new_ancestors_for_files"
        ) as get_new_ancestors_mock,
    ):
        convert_mock.return_value = "doc"
        checkpoint = connector.build_dummy_checkpoint()
        results = list(
            connector._convert_retrieved_files_to_documents(
                drive_files_iter=iter([retrieved_file]),
                checkpoint=checkpoint,
                include_permissions=False,
            )
        )

    assert len(results) == 1
    convert_mock.assert_called_once()
    get_new_ancestors_mock.assert_called_once()


================================================
FILE: backend/tests/daily/connectors/google_drive/test_map_test_ids.py
================================================
#!/usr/bin/env python

import json
import os

import pytest

from onyx.connectors.google_drive.connector import GoogleDriveConnector
from tests.daily.connectors.google_drive.conftest import get_credentials_from_env
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import file_name_template
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_FILE_IDS


def generate_test_id_to_drive_id_mapping() -> dict[int, str]:
    """
    Generate a mapping from test file IDs to actual Google Drive file IDs.

    This is useful for writing tests that need to verify specific files
    are accessible to specific users.

    Returns:
        dict: Mapping from test file ID (int) to Google Drive file ID (str)
    """
    # Set up the connector with real credentials
    connector = GoogleDriveConnector(
        include_shared_drives=True,
        include_my_drives=True,
        include_files_shared_with_me=False,
    )

    # Load credentials
    connector.load_credentials(get_credentials_from_env(email=ADMIN_EMAIL, oauth=False))

    # Get all documents from the connector
    docs = load_connector_outputs(connector).documents

    # Create a mapping from test file ID to actual Drive file ID
    test_id_to_drive_id = {}

    # Process all documents retrieved from Drive
    for doc in docs:
        # Check if this document's name matches our test file naming pattern (file_X.txt)
        if not doc.semantic_identifier.startswith(
            file_name_template.format("").split("_")[0]
        ):
            continue

        try:
            # Extract the test file ID from the filename (file_X.txt -> X)
            file_id_str = doc.semantic_identifier.split("_")[1].split(".")[0]
            test_file_id = int(file_id_str)

            # Store the mapping from test ID to actual Drive ID
            # Extract Drive ID from document URL
            test_id_to_drive_id[test_file_id] = doc.id
        except (ValueError, IndexError):
            # Skip files that don't follow our naming convention
            continue

    # Print the mapping for all defined test file ID ranges
    all_test_ranges = {
        "ADMIN_FILE_IDS": ADMIN_FILE_IDS,
        "TEST_USER_1_FILE_IDS": TEST_USER_1_FILE_IDS,
        "TEST_USER_2_FILE_IDS": TEST_USER_2_FILE_IDS,
        "TEST_USER_3_FILE_IDS": TEST_USER_3_FILE_IDS,
        "SHARED_DRIVE_1_FILE_IDS": SHARED_DRIVE_1_FILE_IDS,
        "SHARED_DRIVE_2_FILE_IDS": SHARED_DRIVE_2_FILE_IDS,
        "FOLDER_1_FILE_IDS": FOLDER_1_FILE_IDS,
        "FOLDER_1_1_FILE_IDS": FOLDER_1_1_FILE_IDS,
        "FOLDER_1_2_FILE_IDS": FOLDER_1_2_FILE_IDS,
        "FOLDER_2_FILE_IDS": FOLDER_2_FILE_IDS,
        "FOLDER_2_1_FILE_IDS": FOLDER_2_1_FILE_IDS,
        "FOLDER_2_2_FILE_IDS": FOLDER_2_2_FILE_IDS,
        "FOLDER_3_FILE_IDS": FOLDER_3_FILE_IDS,
    }

    # Print the mapping for each test range
    for range_name, file_ids in all_test_ranges.items():
        print(f"\n{range_name}:")
        for test_id in file_ids:
            drive_id = test_id_to_drive_id.get(test_id, "NOT_FOUND")
            print(f"  {test_id} -> {drive_id}")

    return test_id_to_drive_id


@pytest.mark.skipif(
    not os.getenv("RUN_MANUAL_TESTS"),
    reason="This test maps test IDs to actual Google Drive IDs. Set RUN_MANUAL_TESTS=1 to run.",
)
def test_generate_drive_id_mapping() -> None:
    """Test to generate mapping from test IDs to actual Google Drive IDs.

    This test is skipped by default as it requires real Google Drive credentials
    and is primarily used to generate mappings for other tests.

    Run with:

    RUN_MANUAL_TESTS=true pytest -xvs tests/daily/connectors/google_drive/test_map_test_ids.py::test_generate_drive_id_mapping
    """
    mapping = generate_test_id_to_drive_id_mapping()
    assert mapping, "Failed to generate any test ID to drive ID mappings"

    # Write the mapping to a JSON file
    output_dir = os.path.dirname(os.path.abspath(__file__))
    mapping_file = os.path.join(output_dir, "drive_id_mapping.json")

    # Convert int keys to strings for JSON compatibility
    json_mapping = {str(k): v for k, v in mapping.items()}

    # Write the mapping to a JSON file
    with open(mapping_file, "w") as f:
        json.dump(json_mapping, f, indent=2)

    print(f"\nMapping written to: {mapping_file}")
    raise RuntimeError("Mapping written to file, test complete")


================================================
FILE: backend/tests/daily/connectors/google_drive/test_sections.py
================================================
from collections.abc import Callable
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.connectors.google_drive.connector import GoogleDriveConnector
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FOLDER_URL


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_google_drive_sections(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    oauth_connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=SECTIONS_FOLDER_URL,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    service_acct_connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=SECTIONS_FOLDER_URL,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    for connector in [oauth_connector, service_acct_connector]:
        output = load_connector_outputs(connector)
        retrieved_docs = output.documents

        # Verify we got the 1 doc with sections
        assert len(retrieved_docs) == 1

        # Verify each section has the expected structure
        doc = retrieved_docs[0]
        assert len(doc.sections) == 5

        header_section = doc.sections[0]
        assert header_section.text == "Title\nThis is a Google Doc with sections"
        assert header_section.link is not None
        assert header_section.link.endswith(
            "?tab=t.0#heading=h.hfjc17k6qwzt"
        ) or header_section.link.endswith("?tab=t.0#heading=h.hfjc17k6qwzt")

        section_1 = doc.sections[1]
        assert section_1.text == "Section 1\nSection 1 content"
        assert section_1.link is not None
        assert section_1.link.endswith("?tab=t.0#heading=h.8slfx752a3g5")

        section_2 = doc.sections[2]
        assert section_2.text == "Sub-Section 1-1\nSub-Section 1-1 content"
        assert section_2.link is not None
        assert section_2.link.endswith("?tab=t.0#heading=h.4kj3ayade1bp")

        section_3 = doc.sections[3]
        assert section_3.text == "Sub-Section 1-2\nSub-Section 1-2 content"
        assert section_3.link is not None
        assert section_3.link.endswith("?tab=t.0#heading=h.pm6wrpzgk69l")

        section_4 = doc.sections[4]
        assert section_4.text == "Section 2\nSection 2 content"
        assert section_4.link is not None
        assert section_4.link.endswith("?tab=t.0#heading=h.2m0s9youe2k9")


================================================
FILE: backend/tests/daily/connectors/google_drive/test_service_acct.py
================================================
from collections.abc import Callable
from unittest.mock import MagicMock
from unittest.mock import patch
from urllib.parse import urlparse

from onyx.connectors.google_drive.connector import GoogleDriveConnector
from tests.daily.connectors.google_drive.consts_and_utils import _pick
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_MY_DRIVE_ID
from tests.daily.connectors.google_drive.consts_and_utils import (
    assert_expected_docs_in_retrieved_docs,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    assert_hierarchy_nodes_match_expected,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    EXTERNAL_SHARED_DOC_SINGLETON,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    EXTERNAL_SHARED_DOCS_IN_FOLDER,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    EXTERNAL_SHARED_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    EXTERNAL_SHARED_FOLDER_URL,
)
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_ID
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_URL
from tests.daily.connectors.google_drive.consts_and_utils import (
    get_expected_hierarchy_for_shared_drives,
)
from tests.daily.connectors.google_drive.consts_and_utils import id_to_name
from tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs
from tests.daily.connectors.google_drive.consts_and_utils import (
    MISC_SHARED_DRIVE_FNAMES,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    PILL_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    RESTRICTED_ACCESS_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    RESTRICTED_ACCESS_FOLDER_URL,
)
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FOLDER_ID
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_DRIVE_B_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_DRIVE_B_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_EXTRA_DRIVE_1_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_EXTRA_DRIVE_2_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_EXTRA_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_MY_DRIVE_FOLDER_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_1_MY_DRIVE_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_2_MY_DRIVE,
)
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import (
    TEST_USER_3_MY_DRIVE_ID,
)


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_include_all(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_include_all")
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=True,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    # Should get everything
    expected_file_ids = (
        ADMIN_FILE_IDS
        + ADMIN_FOLDER_3_FILE_IDS
        + TEST_USER_1_FILE_IDS
        + TEST_USER_2_FILE_IDS
        + TEST_USER_3_FILE_IDS
        + SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + SHARED_DRIVE_2_FILE_IDS
        + FOLDER_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
        + SECTIONS_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=True,
        include_restricted_folder=False,
    )
    expected_nodes.update(
        _pick(
            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
            TEST_USER_1_MY_DRIVE_ID,
            TEST_USER_1_MY_DRIVE_FOLDER_ID,
            TEST_USER_1_DRIVE_B_ID,
            TEST_USER_1_DRIVE_B_FOLDER_ID,
            TEST_USER_1_EXTRA_DRIVE_1_ID,
            TEST_USER_1_EXTRA_DRIVE_2_ID,
            ADMIN_MY_DRIVE_ID,
            TEST_USER_2_MY_DRIVE,
            TEST_USER_3_MY_DRIVE_ID,
            PILL_FOLDER_ID,
            RESTRICTED_ACCESS_FOLDER_ID,
            TEST_USER_1_EXTRA_FOLDER_ID,
            EXTERNAL_SHARED_FOLDER_ID,
            FOLDER_3_ID,
        )
    )
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_include_shared_drives_only_with_size_threshold(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_include_shared_drives_only_with_size_threshold")
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
    )

    # this threshold will skip one file
    connector.size_threshold = 16384

    output = load_connector_outputs(connector)

    expected_file_ids = (
        SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + SHARED_DRIVE_2_FILE_IDS
        + FOLDER_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
        + SECTIONS_FILE_IDS
    )

    expected_file_names = {id_to_name(file_id) for file_id in expected_file_ids}
    expected_file_names.update(MISC_SHARED_DRIVE_FNAMES)
    retrieved_file_names = {doc.semantic_identifier for doc in output.documents}
    for name in expected_file_names - retrieved_file_names:
        print(f"expected but did not retrieve: {name}")
    for name in retrieved_file_names - expected_file_names:
        print(f"retrieved but did not expect: {name}")

    # 2 extra files from shared drive owned by non-admin and not shared with admin
    # TODO: added a file in a "restricted" folder, which the connector sometimes succeeds at finding
    # and adding. Specifically, our shared drive retrieval logic currently assumes that
    # "having access to a shared drive" means that the connector has access to all files in the shared drive.
    # therefore when a user successfully retrieves a shared drive, we mark it as "done". If that user's
    # access is restricted for a folder in the shared drive, the connector will not retrieve that folder.
    # If instead someone with FULL access to the shared drive retrieves it, the connector will retrieve
    # the folder and all its files. There is currently no consistency to the order of assignment of users
    # to shared drives, so this is a heisenbug. When we guarantee that restricted folders are retrieved,
    # we can change this to 52
    assert len(output.documents) == 50 or len(output.documents) == 51


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_include_shared_drives_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_include_shared_drives_only")
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=True,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
    )

    output = load_connector_outputs(connector)

    # Should only get shared drives
    expected_file_ids = (
        SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + SHARED_DRIVE_2_FILE_IDS
        + FOLDER_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
        + SECTIONS_FILE_IDS
    )

    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    # 2 extra files from shared drive owned by non-admin and not shared with admin
    # another one flaky for unknown reasons
    # TODO: switch to 54 when restricted access issue is resolved
    assert len(output.documents) == 51 or len(output.documents) == 52

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=True,
        include_restricted_folder=False,
    )
    expected_nodes.update(
        _pick(
            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,
            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,
            TEST_USER_1_DRIVE_B_ID,
            TEST_USER_1_DRIVE_B_FOLDER_ID,
            TEST_USER_1_EXTRA_DRIVE_1_ID,
            TEST_USER_1_EXTRA_DRIVE_2_ID,
            RESTRICTED_ACCESS_FOLDER_ID,
        )
    )
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_include_my_drives_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_include_my_drives_only")
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=True,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    # Should only get everyone's My Drives
    expected_file_ids = (
        ADMIN_FILE_IDS
        + ADMIN_FOLDER_3_FILE_IDS
        + TEST_USER_1_FILE_IDS
        + TEST_USER_2_FILE_IDS
        + TEST_USER_3_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = _pick(
        FOLDER_3_ID,
        ADMIN_MY_DRIVE_ID,
        TEST_USER_1_MY_DRIVE_ID,
        TEST_USER_1_MY_DRIVE_FOLDER_ID,
        TEST_USER_2_MY_DRIVE,
        TEST_USER_3_MY_DRIVE_ID,
        PILL_FOLDER_ID,
        TEST_USER_1_EXTRA_FOLDER_ID,
        EXTERNAL_SHARED_FOLDER_ID,
    )
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_drive_one_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_drive_one_only")
    urls = [SHARED_DRIVE_1_URL]
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        shared_drive_urls=",".join([str(url) for url in urls]),
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    # We ignore shared_drive_urls if include_shared_drives is False
    expected_file_ids = (
        SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=False,
        include_restricted_folder=False,
    )
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_folder_and_shared_drive(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_folder_and_shared_drive")
    drive_urls = [SHARED_DRIVE_1_URL]
    folder_urls = [FOLDER_2_URL]
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_drive_urls=",".join([str(url) for url in drive_urls]),
        shared_folder_urls=",".join([str(url) for url in folder_urls]),
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    # Should get everything except for the top level files in drive 2
    expected_file_ids = (
        SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + FOLDER_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=True,
        include_restricted_folder=False,
    )
    expected_nodes.pop(SECTIONS_FOLDER_ID, None)
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_folders_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_folders_only")
    folder_urls = [
        FOLDER_1_2_URL,
        FOLDER_2_1_URL,
        FOLDER_2_2_URL,
        FOLDER_3_URL,
    ]
    # This should get converted to a drive request and spit out a warning in the logs
    shared_drive_urls = [
        FOLDER_1_1_URL,
    ]
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_drive_urls=",".join([str(url) for url in shared_drive_urls]),
        shared_folder_urls=",".join([str(url) for url in folder_urls]),
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = (
        FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + FOLDER_2_1_FILE_IDS
        + FOLDER_2_2_FILE_IDS
        + ADMIN_FOLDER_3_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    expected_nodes = get_expected_hierarchy_for_shared_drives(
        include_drive_1=True,
        include_drive_2=True,
        include_restricted_folder=False,
    )
    expected_nodes.pop(SECTIONS_FOLDER_ID, None)
    expected_nodes.update(_pick(ADMIN_MY_DRIVE_ID, FOLDER_3_ID))
    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=expected_nodes,
    )


def test_shared_folder_owned_by_external_user(
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_shared_folder_owned_by_external_user")
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_drive_urls=None,
        shared_folder_urls=EXTERNAL_SHARED_FOLDER_URL,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    expected_docs = EXTERNAL_SHARED_DOCS_IN_FOLDER

    assert len(output.documents) == len(expected_docs)  # 1 for now
    assert expected_docs[0] in output.documents[0].id


def test_shared_with_me(
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_shared_with_me")
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=True,
        include_files_shared_with_me=True,
        shared_drive_urls=None,
        shared_folder_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    print(output.documents)

    expected_file_ids = (
        ADMIN_FILE_IDS
        + ADMIN_FOLDER_3_FILE_IDS
        + TEST_USER_1_FILE_IDS
        + TEST_USER_2_FILE_IDS
        + TEST_USER_3_FILE_IDS
    )
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    retrieved_ids = {urlparse(doc.id).path.split("/")[-1] for doc in output.documents}
    for id in retrieved_ids:
        print(id)

    assert EXTERNAL_SHARED_DOC_SINGLETON.split("/")[-1] in retrieved_ids
    assert EXTERNAL_SHARED_DOCS_IN_FOLDER[0].split("/")[-1] in retrieved_ids


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_specific_emails(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_specific_emails")
    my_drive_emails = [
        TEST_USER_1_EMAIL,
        TEST_USER_3_EMAIL,
    ]
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=",".join([str(email) for email in my_drive_emails]),
    )
    output = load_connector_outputs(connector)

    expected_file_ids = TEST_USER_1_FILE_IDS + TEST_USER_3_FILE_IDS
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def get_specific_folders_in_my_drive(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning get_specific_folders_in_my_drive")
    folder_urls = [
        FOLDER_3_URL,
    ]
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=",".join([str(url) for url in folder_urls]),
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = ADMIN_FOLDER_3_FILE_IDS
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_specific_user_emails_restricted_folder(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_specific_user_emails_restricted_folder")

    # Test with admin email - should get 1 doc
    admin_connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=RESTRICTED_ACCESS_FOLDER_URL,
        shared_drive_urls=None,
        my_drive_emails=None,
        specific_user_emails=ADMIN_EMAIL,
    )
    admin_output = load_connector_outputs(admin_connector)
    assert len(admin_output.documents) == 1

    # Test with test users - should get 0 docs
    test_users = [TEST_USER_1_EMAIL, TEST_USER_2_EMAIL, TEST_USER_3_EMAIL]
    test_connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=False,
        include_files_shared_with_me=False,
        shared_folder_urls=RESTRICTED_ACCESS_FOLDER_URL,
        shared_drive_urls=None,
        my_drive_emails=None,
        specific_user_emails=",".join(test_users),
    )
    test_output = load_connector_outputs(test_connector)
    assert len(test_output.documents) == 0


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_specific_user_email_shared_with_me(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_specific_user_email_shared_with_me")

    # Test with admin email - should get 1 doc
    connector = google_drive_service_acct_connector_factory(
        primary_admin_email=ADMIN_EMAIL,
        include_shared_drives=False,
        include_my_drives=True,
        include_files_shared_with_me=False,  # This is what is set in the UI unfortunately
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
        specific_user_emails=TEST_USER_1_EMAIL,
    )
    output = load_connector_outputs(connector)
    expected = [id_to_name(file_id) for file_id in TEST_USER_1_FILE_IDS]
    expected += ["private_file", "shared_file"]  # in My Drive
    expected += ["read only users can't download"]  # Shared with me

    expected += [id_to_name(file_id) for file_id in [0, 1] + ADMIN_FOLDER_3_FILE_IDS]

    # these are in shared drives
    # expected += ['perm_sync_doc_0ACOrCU1EMD1hUk9PVA_ab63b976-effb-49af-84e7-423d17a17dd7']
    # expected += ['file_22.txt'] # Shared drive

    doc_titles = set(doc.semantic_identifier for doc in output.documents)
    assert doc_titles == set(expected)


================================================
FILE: backend/tests/daily/connectors/google_drive/test_user_1_oauth.py
================================================
from collections.abc import Callable
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.models import Document
from tests.daily.connectors.google_drive.consts_and_utils import _clear_parents
from tests.daily.connectors.google_drive.consts_and_utils import _pick
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import (
    assert_expected_docs_in_retrieved_docs,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    assert_hierarchy_nodes_match_expected,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    DONWLOAD_REVOKED_FILE_ID,
)
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_ID
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_ID
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_ID
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_ID
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_URL
from tests.daily.connectors.google_drive.consts_and_utils import (
    get_expected_hierarchy_for_test_user_1,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    get_expected_hierarchy_for_test_user_1_my_drive_only,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    get_expected_hierarchy_for_test_user_1_shared_drives_only,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
    get_expected_hierarchy_for_test_user_1_shared_with_me_only,
)
from tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_ID
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS
from tests.daily.connectors.utils import ConnectorOutput


def _check_for_error(
    output: ConnectorOutput,
    expected_file_ids: list[int],
) -> list[Document]:
    retrieved_docs = output.documents
    retrieved_failures = output.failures
    assert len(retrieved_failures) <= 1

    if len(retrieved_failures) == 1:
        fail_msg = retrieved_failures[0].failure_message
        assert "HttpError 403" in fail_msg
        assert f"file_{DONWLOAD_REVOKED_FILE_ID}.txt" in fail_msg

    expected_file_ids.remove(DONWLOAD_REVOKED_FILE_ID)
    return retrieved_docs


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_all(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_all")
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=TEST_USER_1_EMAIL,
        include_files_shared_with_me=True,
        include_shared_drives=True,
        include_my_drives=True,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = (
        TEST_USER_1_FILE_IDS
        + SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
        + ADMIN_FOLDER_3_FILE_IDS
        + list(range(0, 2))
    )

    retrieved_docs = _check_for_error(output, expected_file_ids)

    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=retrieved_docs,
        expected_file_ids=expected_file_ids,
    )

    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=get_expected_hierarchy_for_test_user_1(),
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_shared_drives_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_shared_drives_only")
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=TEST_USER_1_EMAIL,
        include_files_shared_with_me=False,
        include_shared_drives=True,
        include_my_drives=False,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = (
        SHARED_DRIVE_1_FILE_IDS
        + FOLDER_1_FILE_IDS
        + FOLDER_1_1_FILE_IDS
        + FOLDER_1_2_FILE_IDS
    )

    retrieved_docs = _check_for_error(output, expected_file_ids)
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=retrieved_docs,
        expected_file_ids=expected_file_ids,
    )

    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=get_expected_hierarchy_for_test_user_1_shared_drives_only(),
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_shared_with_me_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_shared_with_me_only")
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=TEST_USER_1_EMAIL,
        include_files_shared_with_me=True,
        include_shared_drives=False,
        include_my_drives=False,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = ADMIN_FOLDER_3_FILE_IDS + list(range(0, 2))
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=get_expected_hierarchy_for_test_user_1_shared_with_me_only(),
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_my_drive_only(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_my_drive_only")
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=TEST_USER_1_EMAIL,
        include_files_shared_with_me=False,
        include_shared_drives=False,
        include_my_drives=True,
        shared_folder_urls=None,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = TEST_USER_1_FILE_IDS
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=get_expected_hierarchy_for_test_user_1_my_drive_only(),
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_shared_my_drive_folder(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_shared_my_drive_folder")
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=TEST_USER_1_EMAIL,
        include_files_shared_with_me=False,
        include_shared_drives=False,
        include_my_drives=True,
        shared_folder_urls=FOLDER_3_URL,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = ADMIN_FOLDER_3_FILE_IDS
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=_clear_parents(_pick(FOLDER_3_ID), FOLDER_3_ID),
    )


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_shared_drive_folder(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
    print("\n\nRunning test_shared_drive_folder")
    connector = google_drive_oauth_uploaded_connector_factory(
        primary_admin_email=TEST_USER_1_EMAIL,
        include_files_shared_with_me=False,
        include_shared_drives=False,
        include_my_drives=True,
        shared_folder_urls=FOLDER_1_URL,
        shared_drive_urls=None,
        my_drive_emails=None,
    )
    output = load_connector_outputs(connector)

    expected_file_ids = FOLDER_1_FILE_IDS + FOLDER_1_1_FILE_IDS + FOLDER_1_2_FILE_IDS
    assert_expected_docs_in_retrieved_docs(
        retrieved_docs=output.documents,
        expected_file_ids=expected_file_ids,
    )

    assert_hierarchy_nodes_match_expected(
        retrieved_nodes=output.hierarchy_nodes,
        expected_nodes=_pick(
            SHARED_DRIVE_1_ID, FOLDER_1_ID, FOLDER_1_1_ID, FOLDER_1_2_ID
        ),
    )


================================================
FILE: backend/tests/daily/connectors/highspot/test_highspot_connector.py
================================================
import json
import os
import time
from datetime import datetime
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.highspot.connector import HighspotConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode


def load_test_data(file_name: str = "test_highspot_data.json") -> dict:
    """Load test data from JSON file."""
    current_dir = Path(__file__).parent
    with open(current_dir / file_name, "r") as f:
        return json.load(f)


@pytest.fixture
def highspot_connector() -> HighspotConnector:
    """Create a Highspot connector with credentials from environment variables."""
    # Check if required environment variables are set
    if not os.environ.get("HIGHSPOT_KEY") or not os.environ.get("HIGHSPOT_SECRET"):
        pytest.fail("HIGHSPOT_KEY or HIGHSPOT_SECRET environment variables not set")

    connector = HighspotConnector(
        spot_names=["Test content"],  # Use specific spot name instead of empty list
        batch_size=10,  # Smaller batch size for testing
    )
    connector.load_credentials(
        {
            "highspot_key": os.environ["HIGHSPOT_KEY"],
            "highspot_secret": os.environ["HIGHSPOT_SECRET"],
            "highspot_url": os.environ.get(
                "HIGHSPOT_URL", "https://api-su2.highspot.com/v1.0/"
            ),
        }
    )
    return connector


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_highspot_connector_basic(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    highspot_connector: HighspotConnector,
) -> None:
    """Test basic functionality of the Highspot connector."""
    all_docs: list[Document] = []
    test_data = load_test_data()
    target_test_doc_id = test_data.get("target_doc_id")
    target_test_doc: Document | None = None

    # Test loading documents
    for doc_batch in highspot_connector.poll_source(0, time.time()):
        for doc in doc_batch:
            if isinstance(doc, HierarchyNode):
                continue
            all_docs.append(doc)
            if doc.id == f"HIGHSPOT_{target_test_doc_id}":
                target_test_doc = doc

    # Verify documents were loaded
    assert len(all_docs) > 0

    # If we have a specific test document ID, validate it
    if target_test_doc_id and target_test_doc is not None:
        assert target_test_doc.semantic_identifier == test_data.get(
            "semantic_identifier"
        )
        assert target_test_doc.source == DocumentSource.HIGHSPOT
        assert target_test_doc.metadata is not None

        assert len(target_test_doc.sections) == 1
        section = target_test_doc.sections[0]
        assert section.link is not None
        # Only check if content exists, as exact content might change
        assert section.text is not None
        assert len(section.text) > 0


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_highspot_connector_slim(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    highspot_connector: HighspotConnector,
) -> None:
    """Test slim document retrieval."""
    # Get all doc IDs from the full connector
    all_full_doc_ids = set()
    for doc_batch in highspot_connector.load_from_state():
        all_full_doc_ids.update(
            [doc.id for doc in doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # Get all doc IDs from the slim connector
    all_slim_doc_ids = set()
    for slim_doc_batch in highspot_connector.retrieve_all_slim_docs_perm_sync():
        all_slim_doc_ids.update(
            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # The set of full doc IDs should be a subset of the slim doc IDs
    assert all_full_doc_ids.issubset(all_slim_doc_ids)
    # Make sure we actually got some documents
    assert len(all_slim_doc_ids) > 0


"""This test might fail because of how Highspot handles changes to the document's
"updated at" property. It is marked as expected to fail until we can confirm the behavior."""


@pytest.mark.xfail(reason="Highspot is not returning updated documents as expected.")
@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_highspot_connector_poll_source(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    highspot_connector: HighspotConnector,
) -> None:
    """Test poll_source functionality with date range filtering."""
    # Define date range: April 3, 2025 to April 4, 2025
    start_date = datetime(2025, 4, 3, 0, 0, 0)
    end_date = datetime(2025, 4, 4, 23, 59, 59)

    # Convert to seconds since Unix epoch
    start_time = int(time.mktime(start_date.timetuple()))
    end_time = int(time.mktime(end_date.timetuple()))

    # Load test data for assertions
    test_data = load_test_data()
    poll_source_data = test_data.get("poll_source", {})
    target_doc_id = poll_source_data.get("target_doc_id")

    # Call poll_source with date range
    all_docs: list[Document] = []
    target_doc: Document | None = None

    for doc_batch in highspot_connector.poll_source(start_time, end_time):
        for doc in doc_batch:
            if isinstance(doc, HierarchyNode):
                continue
            all_docs.append(doc)
            if doc.id == f"HIGHSPOT_{target_doc_id}":
                target_doc = doc

    # Verify documents were loaded
    assert len(all_docs) > 0

    # Verify the specific test document was found and has correct properties
    assert target_doc is not None
    assert target_doc.semantic_identifier == poll_source_data.get("semantic_identifier")
    assert target_doc.source == DocumentSource.HIGHSPOT
    assert target_doc.metadata is not None

    # Verify sections
    assert len(target_doc.sections) == 1
    section = target_doc.sections[0]
    assert section.link == poll_source_data.get("link")
    assert section.text is not None
    assert len(section.text) > 0


def test_highspot_connector_validate_credentials(
    highspot_connector: HighspotConnector,
) -> None:
    """Test credential validation."""
    assert highspot_connector.validate_credentials() is True


================================================
FILE: backend/tests/daily/connectors/highspot/test_highspot_data.json
================================================
{
    "target_doc_id": "67cd8eb35d3ee0487de2e704",
    "semantic_identifier": "Highspot in Action _ Salesforce Integration",
    "link": "https://www.highspot.com/items/67cd8eb35d3ee0487de2e704",
    "poll_source": {
        "target_doc_id":"67efb452c3f40bcca2b48ca5",
        "semantic_identifier":"Introduction to Intelligent Agents",
        "link":"https://www.highspot.com/items/67efb452c3f40bcca2b48ca5"
    }
}


================================================
FILE: backend/tests/daily/connectors/hubspot/test_hubspot_connector.py
================================================
import os
from datetime import datetime
from datetime import timezone
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.connectors.hubspot.connector import AVAILABLE_OBJECT_TYPES
from onyx.connectors.hubspot.connector import HubSpotConnector
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode


class TestHubSpotConnector:
    """Test HubSpot connector functionality using real API calls."""

    @pytest.fixture
    def connector(self) -> HubSpotConnector:
        """Create a HubSpot connector instance."""
        return HubSpotConnector(batch_size=10)

    @pytest.fixture
    def credentials(self) -> dict[str, Any]:
        """Provide test credentials."""
        return {"hubspot_access_token": os.environ["HUBSPOT_ACCESS_TOKEN"]}

    def test_credentials_properties_raise_exception_when_none(self) -> None:
        """Test that access_token and portal_id properties raise exceptions when not set."""
        connector = HubSpotConnector()

        # access_token should raise exception when not set
        with pytest.raises(ConnectorMissingCredentialError) as exc_info:
            _ = connector.access_token
        assert "HubSpot access token not set" in str(exc_info.value)

        # portal_id should raise exception when not set
        with pytest.raises(ConnectorMissingCredentialError) as exc_info:
            _ = connector.portal_id
        assert "HubSpot portal ID not set" in str(exc_info.value)

    def test_load_credentials(
        self, connector: HubSpotConnector, credentials: dict[str, Any]
    ) -> None:
        """Test that credentials are loaded correctly."""
        result = connector.load_credentials(credentials)

        assert result is None  # Should return None on success
        assert connector.access_token == credentials["hubspot_access_token"]
        assert connector.portal_id is not None
        assert isinstance(connector.portal_id, str)

    def test_load_from_state_basic_functionality(
        self, connector: HubSpotConnector, credentials: dict[str, Any]
    ) -> None:
        """Test basic load_from_state functionality."""
        connector.load_credentials(credentials)

        # Get first batch of documents
        document_batches = connector.load_from_state()
        first_batch = next(document_batches, None)

        # Should have at least some documents
        assert first_batch is not None
        assert isinstance(first_batch, list)
        assert len(first_batch) > 0

        # Check document structure
        doc = first_batch[0]
        assert isinstance(doc, Document)
        assert doc.id.startswith("hubspot_")
        assert doc.source.value == "hubspot"
        assert doc.semantic_identifier is not None
        assert doc.doc_updated_at is not None
        assert isinstance(doc.metadata, dict)
        assert "object_type" in doc.metadata
        assert doc.metadata["object_type"] in ["ticket", "company", "deal", "contact"]

        # Check sections
        assert len(doc.sections) > 0
        assert doc.sections[0].text is not None
        assert doc.sections[0].link is not None

    def test_document_metadata_structure(
        self, connector: HubSpotConnector, credentials: dict[str, Any]
    ) -> None:
        """Test that document metadata contains expected fields."""
        connector.load_credentials(credentials)

        document_batches = connector.load_from_state()
        all_docs: list[Document] = []

        # Collect a few batches to test different object types
        batch_count = 0
        for batch in document_batches:
            all_docs.extend(
                [doc for doc in batch if not isinstance(doc, HierarchyNode)]
            )
            batch_count += 1
            if (
                batch_count >= 3 or len(all_docs) >= 20
            ):  # Limit to avoid too many API calls
                break

        # Group documents by object type
        docs_by_type: dict[str, list[Document]] = {}
        for doc in all_docs:
            obj_type_value = doc.metadata["object_type"]
            # Handle the case where metadata value could be a list
            obj_type = (
                obj_type_value if isinstance(obj_type_value, str) else obj_type_value[0]
            )
            if obj_type not in docs_by_type:
                docs_by_type[obj_type] = []
            docs_by_type[obj_type].append(doc)

        # Test each object type has expected metadata
        for obj_type, docs in docs_by_type.items():
            doc = docs[0]  # Test first document of each type

            if obj_type == "ticket":
                assert "ticket_id" in doc.metadata
                assert doc.id.startswith("hubspot_ticket_")
            elif obj_type == "company":
                assert "company_id" in doc.metadata
                assert doc.id.startswith("hubspot_company_")

            elif obj_type == "deal":
                assert "deal_id" in doc.metadata
                assert doc.id.startswith("hubspot_deal_")

            elif obj_type == "contact":
                assert "contact_id" in doc.metadata
                assert doc.id.startswith("hubspot_contact_")

            # Check for associated object IDs in metadata (if they exist)
            potential_association_keys = [
                "associated_contact_ids",
                "associated_company_ids",
                "associated_deal_ids",
                "associated_ticket_ids",
                "associated_note_ids",
            ]

            for key in potential_association_keys:
                if key in doc.metadata:
                    assert isinstance(doc.metadata[key], list)
                    assert len(doc.metadata[key]) > 0
                    assert all(isinstance(id_val, str) for id_val in doc.metadata[key])

    def test_associated_objects_as_sections(
        self, connector: HubSpotConnector, credentials: dict[str, Any]
    ) -> None:
        """Test that associated objects are included as sections."""
        connector.load_credentials(credentials)

        document_batches = connector.load_from_state()

        # Find a document with multiple sections (indicating associated objects)
        found_multi_section_doc = False
        batch_count = 0

        for batch in document_batches:
            for doc in batch:
                if isinstance(doc, HierarchyNode):
                    continue
                if len(doc.sections) > 1:
                    found_multi_section_doc = True

                    # First section should be the main object
                    main_section = doc.sections[0]
                    assert main_section.text is not None
                    assert main_section.link is not None

                    # Additional sections should be associated objects
                    for section in doc.sections[1:]:
                        assert section.text is not None
                        assert section.link is not None
                        # Should contain object type information
                        assert any(
                            obj_type in section.text.lower()
                            for obj_type in [
                                "contact:",
                                "company:",
                                "deal:",
                                "ticket:",
                                "note:",
                            ]
                        )

                    break

            if found_multi_section_doc:
                break

            batch_count += 1
            if batch_count >= 5:  # Limit API calls
                break

        # Note: This test might not always pass if there are no associated objects
        # in the test HubSpot instance, but it validates the structure when they exist
        if found_multi_section_doc:
            print("✓ Found document with associated objects as sections")
        else:
            print("⚠ No documents with associated objects found in test data")

    def test_poll_source_functionality(
        self, connector: HubSpotConnector, credentials: dict[str, Any]
    ) -> None:
        """Test poll_source with time filtering."""
        connector.load_credentials(credentials)

        # Test with a recent time range (last 30 days)
        end_time = datetime.now(timezone.utc)
        start_time = datetime.now(timezone.utc).replace(day=1)  # Start of current month

        start_timestamp = int(start_time.timestamp())
        end_timestamp = int(end_time.timestamp())

        document_batches = connector.poll_source(start_timestamp, end_timestamp)

        # Should be able to get at least one batch
        first_batch = next(document_batches, None)

        if first_batch is not None:
            assert isinstance(first_batch, list)
            assert len(first_batch) > 0

            # Check that documents have proper timestamps
            for doc in first_batch:
                if isinstance(doc, HierarchyNode):
                    continue
                assert doc.doc_updated_at is not None
                # Note: We don't strictly enforce the time range here since
                # the test data might not have recent updates
        else:
            print("⚠ No documents found in the specified time range")

    def test_all_object_types_processed(
        self, connector: HubSpotConnector, credentials: dict[str, Any]
    ) -> None:
        """Integration test to verify all object types are processed correctly."""
        connector.load_credentials(credentials)

        document_batches = connector.load_from_state()
        all_docs: list[Document] = []
        object_types_found = set()

        # Collect several batches to ensure we see all object types
        batch_count = 0
        for batch in document_batches:
            all_docs.extend(
                [doc for doc in batch if not isinstance(doc, HierarchyNode)]
            )
            for doc in batch:
                if isinstance(doc, HierarchyNode):
                    continue
                object_types_found.add(doc.metadata["object_type"])

            batch_count += 1
            # Stop after we've seen all expected types or after reasonable number of batches
            if len(object_types_found) >= 4 or batch_count >= 10:
                break

        print(f"Found {len(all_docs)} total documents")
        print(f"Object types found: {sorted(object_types_found)}")

        # Should have at least some documents
        assert len(all_docs) > 0

        # Verify we can process multiple object types
        # Note: We don't require all 4 types since the test instance might not have all types
        assert len(object_types_found) >= 1

        # Verify document structure for each type found
        for obj_type in object_types_found:
            type_docs = [
                doc for doc in all_docs if doc.metadata["object_type"] == obj_type
            ]
            assert len(type_docs) > 0

            # Check first document of this type
            doc = type_docs[0]
            assert doc.id.startswith(f"hubspot_{obj_type}_")
            assert doc.semantic_identifier is not None
            assert len(doc.sections) > 0
            assert doc.sections[0].text is not None
            assert doc.sections[0].link is not None

            # Check object-specific metadata
            if obj_type == "company":
                assert "company_id" in doc.metadata
            elif obj_type == "deal":
                assert "deal_id" in doc.metadata
            elif obj_type == "contact":
                assert "contact_id" in doc.metadata
            elif obj_type == "ticket":
                assert "ticket_id" in doc.metadata

    def test_init_default_object_types(self) -> None:
        """Test that connector initializes with all object types by default."""
        connector = HubSpotConnector()
        assert connector.object_types == AVAILABLE_OBJECT_TYPES
        assert "tickets" in connector.object_types
        assert "companies" in connector.object_types
        assert "deals" in connector.object_types
        assert "contacts" in connector.object_types

    def test_init_custom_object_types(self) -> None:
        """Test that connector can be initialized with custom object types."""
        custom_types = ["tickets", "companies"]
        connector = HubSpotConnector(object_types=custom_types)
        expected_set = {"tickets", "companies"}
        assert connector.object_types == expected_set
        assert "tickets" in connector.object_types
        assert "companies" in connector.object_types
        assert "deals" not in connector.object_types
        assert "contacts" not in connector.object_types

    def test_init_custom_object_types_from_list(self) -> None:
        """Test that connector can be initialized with custom object types from a list (frontend format)."""
        custom_types_list = ["tickets", "companies"]
        connector = HubSpotConnector(object_types=custom_types_list)
        expected_set = {"tickets", "companies"}
        assert connector.object_types == expected_set
        assert "tickets" in connector.object_types
        assert "companies" in connector.object_types
        assert "deals" not in connector.object_types
        assert "contacts" not in connector.object_types

    def test_init_single_object_type(self) -> None:
        """Test that connector can be initialized with a single object type."""
        single_type = ["deals"]
        connector = HubSpotConnector(object_types=single_type)
        expected_set = {"deals"}
        assert connector.object_types == expected_set
        assert len(connector.object_types) == 1
        assert "deals" in connector.object_types

    def test_init_invalid_object_types(self) -> None:
        """Test that connector raises error for invalid object types."""
        invalid_types = ["tickets", "invalid_type", "another_invalid"]

        with pytest.raises(ValueError) as exc_info:
            HubSpotConnector(object_types=invalid_types)

        error_message = str(exc_info.value)
        assert "Invalid object types" in error_message
        assert "invalid_type" in error_message
        assert "another_invalid" in error_message
        assert "Available types" in error_message

    def test_init_empty_object_types(self) -> None:
        """Test that connector can be initialized with empty object types set."""
        empty_types: list[str] = []
        connector = HubSpotConnector(object_types=empty_types)
        expected_set: set[str] = set()
        assert connector.object_types == expected_set
        assert len(connector.object_types) == 0

    def test_selective_object_fetching_tickets_only(
        self, credentials: dict[str, Any]
    ) -> None:
        """Test that only tickets are fetched when configured."""
        connector = HubSpotConnector(object_types=["tickets"], batch_size=5)
        connector.load_credentials(credentials)

        document_batches = connector.load_from_state()
        all_docs: list[Document] = []

        # Collect a few batches
        batch_count = 0
        for batch in document_batches:
            all_docs.extend(
                [doc for doc in batch if not isinstance(doc, HierarchyNode)]
            )
            batch_count += 1
            if batch_count >= 3 or len(all_docs) >= 10:
                break

        # Should have documents
        if all_docs:
            # All documents should be tickets
            for doc in all_docs:
                assert doc.metadata["object_type"] == "ticket"
                assert doc.id.startswith("hubspot_ticket_")

            print(f"✓ Successfully fetched {len(all_docs)} ticket documents only")
        else:
            print("⚠ No ticket documents found in test data")

    def test_selective_object_fetching_companies_and_deals(
        self, credentials: dict[str, Any]
    ) -> None:
        """Test that only companies and deals are fetched when configured."""
        connector = HubSpotConnector(object_types=["companies", "deals"], batch_size=5)
        connector.load_credentials(credentials)

        document_batches = connector.load_from_state()
        all_docs: list[Document] = []
        object_types_found = set()

        # Collect a few batches
        batch_count = 0
        for batch in document_batches:
            all_docs.extend(
                [doc for doc in batch if not isinstance(doc, HierarchyNode)]
            )
            for doc in batch:
                if isinstance(doc, HierarchyNode):
                    continue
                object_types_found.add(doc.metadata["object_type"])
            batch_count += 1
            if batch_count >= 3 or len(all_docs) >= 10:
                break

        if all_docs:
            # Should only have companies and deals
            assert object_types_found.issubset({"company", "deal"})
            assert "ticket" not in object_types_found
            assert "contact" not in object_types_found

            # Verify document structure
            for doc in all_docs:
                obj_type = doc.metadata["object_type"]
                assert obj_type in ["company", "deal"]
                if obj_type == "company":
                    assert doc.id.startswith("hubspot_company_")
                elif obj_type == "deal":
                    assert doc.id.startswith("hubspot_deal_")

            print(
                f"✓ Successfully fetched {len(all_docs)} documents of types: {object_types_found}"
            )
        else:
            print("⚠ No company/deal documents found in test data")

    def test_empty_object_types_fetches_nothing(
        self, credentials: dict[str, Any]
    ) -> None:
        """Test that no documents are fetched when object_types is empty."""
        connector = HubSpotConnector(object_types=[], batch_size=5)
        connector.load_credentials(credentials)

        document_batches = connector.load_from_state()
        all_docs: list[Document] = []

        # Try to collect batches
        batch_count = 0
        for batch in document_batches:
            all_docs.extend(
                [doc for doc in batch if not isinstance(doc, HierarchyNode)]
            )
            batch_count += 1
            if batch_count >= 2:  # Don't wait too long
                break

        # Should have no documents
        assert len(all_docs) == 0
        print("✓ No documents fetched with empty object_types as expected")

    def test_poll_source_respects_object_types(
        self, credentials: dict[str, Any]
    ) -> None:
        """Test that poll_source respects the object_types configuration."""
        connector = HubSpotConnector(object_types=["contacts"], batch_size=5)
        connector.load_credentials(credentials)

        # Test with a recent time range
        end_time = datetime.now(timezone.utc)
        start_time = datetime.now(timezone.utc).replace(day=1)

        start_timestamp = int(start_time.timestamp())
        end_timestamp = int(end_time.timestamp())

        document_batches = connector.poll_source(start_timestamp, end_timestamp)
        all_docs: list[Document] = []

        # Collect a few batches
        batch_count = 0
        for batch in document_batches:
            all_docs.extend(
                [doc for doc in batch if not isinstance(doc, HierarchyNode)]
            )
            batch_count += 1
            if batch_count >= 2 or len(all_docs) >= 5:
                break

        if all_docs:
            # All documents should be contacts
            for doc in all_docs:
                assert doc.metadata["object_type"] == "contact"
                assert doc.id.startswith("hubspot_contact_")

            print(
                f"✓ Poll source successfully fetched {len(all_docs)} contact documents only"
            )
        else:
            print("⚠ No contact documents found in specified time range")

    def test_object_types_immutability(self) -> None:
        """Test that object_types set cannot be modified externally."""
        original_types = ["tickets", "companies"]
        connector = HubSpotConnector(object_types=original_types)

        # Modifying the original list should not affect the connector
        original_types.append("deals")
        assert "deals" not in connector.object_types
        assert connector.object_types == {"tickets", "companies"}

        # Trying to modify the connector's object_types should not affect the original
        connector_types = connector.object_types
        connector_types.add("contacts")
        # The connector should still have the original types since we made a copy
        # Note: This test verifies our implementation makes a copy in __init__

    def test_url_generation(self) -> None:
        """Test that URLs are generated correctly for different object types."""
        connector = HubSpotConnector()
        connector.portal_id = "12345"  # Mock portal ID

        # Test URL generation for each object type
        ticket_url = connector._get_object_url("tickets", "67890")
        expected_ticket_url = "https://app.hubspot.com/contacts/12345/record/0-5/67890"
        assert ticket_url == expected_ticket_url

        company_url = connector._get_object_url("companies", "11111")
        expected_company_url = "https://app.hubspot.com/contacts/12345/record/0-2/11111"
        assert company_url == expected_company_url

        deal_url = connector._get_object_url("deals", "22222")
        expected_deal_url = "https://app.hubspot.com/contacts/12345/record/0-3/22222"
        assert deal_url == expected_deal_url

        contact_url = connector._get_object_url("contacts", "33333")
        expected_contact_url = "https://app.hubspot.com/contacts/12345/record/0-1/33333"
        assert contact_url == expected_contact_url

        note_url = connector._get_object_url("notes", "44444")
        expected_note_url = "https://app.hubspot.com/contacts/12345/objects/0-4/44444"
        assert note_url == expected_note_url

    def test_ticket_with_none_content(self) -> None:
        """Test that tickets with None content are handled gracefully."""
        connector = HubSpotConnector(object_types=["tickets"], batch_size=10)
        connector._access_token = "mock_token"
        connector._portal_id = "mock_portal_id"

        # Create a mock ticket with None content
        mock_ticket = MagicMock()
        mock_ticket.id = "12345"
        mock_ticket.properties = {
            "subject": "Test Ticket",
            "content": None,  # This is the key test case
            "hs_ticket_priority": "HIGH",
        }
        mock_ticket.updated_at = datetime.now(timezone.utc)

        # Mock the HubSpot API client
        mock_api_client = MagicMock()

        # Mock the API calls and associated object methods
        with (
            patch("onyx.connectors.hubspot.connector.HubSpot") as MockHubSpot,
            patch.object(connector, "_paginated_results") as mock_paginated,
            patch.object(connector, "_get_associated_objects", return_value=[]),
            patch.object(connector, "_get_associated_notes", return_value=[]),
        ):
            MockHubSpot.return_value = mock_api_client
            mock_paginated.return_value = iter([mock_ticket])

            # This should not raise a validation error
            document_batches = connector._process_tickets()
            first_batch = next(document_batches, None)

            # Verify the document was created successfully
            assert first_batch is not None
            assert len(first_batch) == 1

            doc = first_batch[0]
            assert not isinstance(doc, HierarchyNode)
            assert doc.id == "hubspot_ticket_12345"
            assert doc.semantic_identifier == "Test Ticket"

            # Verify the first section has an empty string, not None
            assert len(doc.sections) > 0
            assert doc.sections[0].text == ""  # Should be empty string, not None
            assert doc.sections[0].link is not None


================================================
FILE: backend/tests/daily/connectors/imap/models.py
================================================
from pydantic import BaseModel

from onyx.connectors.models import Document
from tests.daily.connectors.utils import to_text_sections


class EmailDoc(BaseModel):
    subject: str
    recipients: set[str]
    body: str

    @classmethod
    def from_doc(cls, document: Document) -> "EmailDoc":
        # Acceptable to perform assertions since this class is only used in tests.
        assert document.title
        assert document.external_access

        body = " ".join(to_text_sections(sections=iter(document.sections)))

        return cls(
            subject=document.title,
            recipients=document.external_access.external_user_emails,
            body=body,
        )


================================================
FILE: backend/tests/daily/connectors/imap/test_imap_connector.py
================================================
import os
import time

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider
from onyx.connectors.imap.connector import ImapConnector
from tests.daily.connectors.imap.models import EmailDoc
from tests.daily.connectors.utils import (
    load_all_from_connector,
)


@pytest.fixture
def imap_connector() -> ImapConnector:
    host = os.environ.get("IMAP_HOST")
    mailboxes_str = os.environ.get("IMAP_MAILBOXES")
    username = os.environ.get("IMAP_USERNAME")
    password = os.environ.get("IMAP_PASSWORD")

    assert host
    mailboxes = (
        [mailbox.strip() for mailbox in mailboxes_str.split(",") if mailbox]
        if mailboxes_str
        else []
    )

    imap_connector = ImapConnector(
        host=host,
        mailboxes=mailboxes,
    )
    imap_connector.set_credentials_provider(
        OnyxStaticCredentialsProvider(
            tenant_id=None,
            connector_name=DocumentSource.IMAP,
            credential_json={
                "imap_username": username,
                "imap_password": password,
            },
        )
    )

    return imap_connector


@pytest.mark.parametrize(
    "expected_email_docs",
    [
        [
            EmailDoc(
                subject="Testing",
                recipients=set(["admin@onyx-test.com", "raunak@onyx.app"]),
                body="Hello, testing.",
            ),
            EmailDoc(
                subject="Hello world",
                recipients=set(["admin@onyx-test.com", "r@rabh.io", "raunak@onyx.app"]),
                body='Hello world, this is an email that contains multiple "To" recipients.',
            ),
        ]
    ],
)
def test_imap_connector(
    imap_connector: ImapConnector,
    expected_email_docs: list[EmailDoc],
) -> None:
    actual_email_docs = [
        EmailDoc.from_doc(document=document)
        for document in load_all_from_connector(
            connector=imap_connector,
            start=0,
            end=time.time(),
            include_permissions=True,
        ).documents
    ]

    assert actual_email_docs == expected_email_docs


================================================
FILE: backend/tests/daily/connectors/jira/test_jira_basic.py
================================================
import os
import time
from unittest.mock import patch

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.jira.connector import JiraConnector
from onyx.connectors.models import Document
from tests.daily.connectors.utils import load_all_from_connector


def _make_connector(scoped_token: bool = False) -> JiraConnector:
    connector = JiraConnector(
        jira_base_url="https://danswerai.atlassian.net",
        project_key="AS",
        comment_email_blacklist=[],
        scoped_token=scoped_token,
    )
    connector.load_credentials(
        {
            "jira_user_email": os.environ["JIRA_USER_EMAIL"],
            "jira_api_token": (
                os.environ["JIRA_API_TOKEN_SCOPED"]
                if scoped_token
                else os.environ["JIRA_API_TOKEN"]
            ),
        }
    )
    return connector


@pytest.fixture
def jira_connector() -> JiraConnector:
    return _make_connector()


@pytest.fixture
def jira_connector_scoped() -> JiraConnector:
    return _make_connector(scoped_token=True)


@pytest.fixture
def jira_connector_with_jql() -> JiraConnector:
    connector = JiraConnector(
        jira_base_url="https://danswerai.atlassian.net",
        jql_query="project = 'AS' AND issuetype = Story",
        comment_email_blacklist=[],
    )
    connector.load_credentials(
        {
            "jira_user_email": os.environ["JIRA_USER_EMAIL"],
            "jira_api_token": os.environ["JIRA_API_TOKEN"],
        }
    )
    connector.validate_connector_settings()

    return connector


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_jira_connector_basic(
    reset: None,  # noqa: ARG001
    jira_connector: JiraConnector,
) -> None:
    _test_jira_connector_basic(jira_connector)


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_jira_connector_basic_scoped(
    reset: None,  # noqa: ARG001
    jira_connector_scoped: JiraConnector,
) -> None:
    _test_jira_connector_basic(jira_connector_scoped)


def _test_jira_connector_basic(jira_connector: JiraConnector) -> None:
    docs = load_all_from_connector(
        connector=jira_connector,
        start=0,
        end=time.time(),
    ).documents
    assert len(docs) == 2

    # Find story and epic
    story: Document | None = None
    epic: Document | None = None
    for doc in docs:
        if doc.metadata["issuetype"] == "Story":
            story = doc
        elif doc.metadata["issuetype"] == "Epic":
            epic = doc

    assert story is not None
    assert epic is not None

    # Check task
    assert story.id == "https://danswerai.atlassian.net/browse/AS-3"
    assert story.semantic_identifier == "AS-3: Magic Answers"
    assert story.source == DocumentSource.JIRA
    assert story.metadata == {
        "priority": "Medium",
        "status": "Done",
        "resolution": "Done",
        "resolution_date": "2025-05-29T15:33:31.031-0700",
        "reporter": "Chris Weaver",
        "assignee": "Chris Weaver",
        "issuetype": "Story",
        "created": "2025-04-16T16:44:06.716-0700",
        "reporter_email": "chris@onyx.app",
        "assignee_email": "chris@onyx.app",
        "project_name": "DailyConnectorTestProject",
        "project": "AS",
        "parent": "AS-4",
        "key": "AS-3",
        "updated": "2025-06-17T12:13:00.070-0700",
    }
    assert story.secondary_owners is None
    assert story.title == "AS-3 Magic Answers"
    assert story.from_ingestion_api is False
    assert story.additional_info is None

    assert len(story.sections) == 1
    section = story.sections[0]
    assert (
        section.text
        == "This is a critical request for super-human answer quality in Onyx! We need magic!\n"
    )
    assert section.link == "https://danswerai.atlassian.net/browse/AS-3"

    # Check epic
    assert epic.id == "https://danswerai.atlassian.net/browse/AS-4"
    assert epic.semantic_identifier == "AS-4: EPIC"
    assert epic.source == DocumentSource.JIRA
    assert epic.metadata == {
        "priority": "Medium",
        "status": "Backlog",
        "reporter": "Founder Onyx",
        "assignee": "Chris Weaver",
        "issuetype": "Epic",
        "created": "2025-04-16T16:55:53.068-0700",
        "reporter_email": "founders@onyx.app",
        "assignee_email": "chris@onyx.app",
        "project_name": "DailyConnectorTestProject",
        "project": "AS",
        "key": "AS-4",
        "updated": "2025-05-29T14:43:05.312-0700",
    }
    assert epic.secondary_owners is None
    assert epic.title == "AS-4 EPIC"
    assert epic.from_ingestion_api is False
    assert epic.additional_info is None

    assert len(epic.sections) == 1
    section = epic.sections[0]
    assert section.text == "example_text\n"
    assert section.link == "https://danswerai.atlassian.net/browse/AS-4"


@patch(
    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
    return_value=None,
)
def test_jira_connector_with_jql(
    reset: None,  # noqa: ARG001
    jira_connector_with_jql: JiraConnector,
) -> None:
    """Test that JQL query functionality works correctly.

    This test verifies that when a JQL query is provided, only issues matching the query are returned.
    The JQL query used is "project = \'AS\' AND issuetype = Story", which should only return Story-type issues.
    """
    docs = load_all_from_connector(
        connector=jira_connector_with_jql,
        start=0,
        end=time.time(),
    ).documents

    # Should only return Story-type issues
    assert len(docs) == 1

    # All documents should be Story-type
    for doc in docs:
        assert doc.metadata["issuetype"] == "Story"

    # Verify it's the expected Story
    story = docs[0]
    assert story.id == "https://danswerai.atlassian.net/browse/AS-3"
    assert story.semantic_identifier == "AS-3: Magic Answers"
    assert story.metadata["issuetype"] == "Story"


================================================
FILE: backend/tests/daily/connectors/notion/test_notion_connector.py
================================================
import os
import time

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.notion.connector import NotionConnector


def compare_hierarchy_nodes(
    yielded_nodes: list[HierarchyNode],
    expected_nodes: list[HierarchyNode],
) -> None:
    """Compare yielded HierarchyNodes against expected ground truth.

    Compares nodes by their essential fields (raw_node_id, raw_parent_id, display_name, link).
    Order does not matter.
    """
    if not expected_nodes:
        # Empty ground truth - skip comparison for now
        return

    yielded_set = {
        (n.raw_node_id, n.raw_parent_id, n.display_name, n.link) for n in yielded_nodes
    }
    expected_set = {
        (n.raw_node_id, n.raw_parent_id, n.display_name, n.link) for n in expected_nodes
    }

    missing = expected_set - yielded_set
    extra = yielded_set - expected_set

    assert not missing, f"Missing expected HierarchyNodes: {missing}"
    assert not extra, f"Unexpected HierarchyNodes: {extra}"


@pytest.fixture
def notion_connector() -> NotionConnector:
    """Create a NotionConnector with credentials from environment variables"""
    connector = NotionConnector()
    connector.load_credentials(
        {
            "notion_integration_token": os.environ["NOTION_INTEGRATION_TOKEN"],
        }
    )
    return connector


def test_notion_connector_basic(notion_connector: NotionConnector) -> None:
    """Test the NotionConnector with a real Notion page.

    Uses a Notion workspace under the onyx-test.com domain.
    """
    doc_batch_generator = notion_connector.poll_source(0, time.time())

    # Collect all documents and hierarchy nodes from all batches
    documents: list[Document] = []
    hierarchy_nodes: list[HierarchyNode] = []
    for doc_batch in doc_batch_generator:
        for item in doc_batch:
            if isinstance(item, HierarchyNode):
                hierarchy_nodes.append(item)
            else:
                documents.append(item)

    # Verify document count
    assert (
        len(documents) == 5
    ), "Expected exactly 5 documents (root, two children, table entry, and table entry child)"

    # Verify HierarchyNodes against ground truth (empty for now)
    expected_hierarchy_nodes: list[HierarchyNode] = []
    compare_hierarchy_nodes(hierarchy_nodes, expected_hierarchy_nodes)

    # Find root and child documents by semantic identifier
    root_doc = None
    child1_doc = None
    child2_doc = None
    table_entry_doc = None
    table_entry_child_doc = None
    for doc in documents:
        if doc.semantic_identifier == "Root":
            root_doc = doc
        elif doc.semantic_identifier == "Child1":
            child1_doc = doc
        elif doc.semantic_identifier == "Child2":
            child2_doc = doc
        elif doc.semantic_identifier == "table-entry01":
            table_entry_doc = doc
        elif doc.semantic_identifier == "Child-table-entry01":
            table_entry_child_doc = doc

    assert root_doc is not None, "Root document not found"
    assert child1_doc is not None, "Child1 document not found"
    assert child2_doc is not None, "Child2 document not found"
    assert table_entry_doc is not None, "Table entry document not found"
    assert table_entry_child_doc is not None, "Table entry child document not found"

    # Verify root document structure
    assert root_doc.id is not None
    assert root_doc.source == DocumentSource.NOTION

    # Section checks for root
    assert len(root_doc.sections) == 1
    root_section = root_doc.sections[0]

    # Content specific checks for root
    assert root_section.text == "\nroot"
    assert root_section.link is not None
    assert root_section.link.startswith("https://www.notion.so/")

    # Verify child1 document structure
    assert child1_doc.id is not None
    assert child1_doc.source == DocumentSource.NOTION

    # Section checks for child1
    assert len(child1_doc.sections) == 1
    child1_section = child1_doc.sections[0]

    # Content specific checks for child1
    assert child1_section.text == "\nchild1"
    assert child1_section.link is not None
    assert child1_section.link.startswith("https://www.notion.so/")

    # Verify child2 document structure (includes database)
    assert child2_doc.id is not None
    assert child2_doc.source == DocumentSource.NOTION

    # Section checks for child2
    assert len(child2_doc.sections) == 2  # One for content, one for database
    child2_section = child2_doc.sections[0]
    child2_db_section = child2_doc.sections[1]

    # Content specific checks for child2
    assert child2_section.text == "\nchild2"
    assert child2_section.link is not None
    assert child2_section.link.startswith("https://www.notion.so/")

    # Database section checks for child2
    assert child2_db_section.text is not None
    assert child2_db_section.text.strip() != ""  # Should contain some database content
    assert child2_db_section.link is not None
    assert child2_db_section.link.startswith("https://www.notion.so/")

    # Verify table entry document structure
    assert table_entry_doc.id is not None
    assert table_entry_doc.source == DocumentSource.NOTION

    # Section checks for table entry
    assert len(table_entry_doc.sections) == 1
    table_entry_section = table_entry_doc.sections[0]

    # Content specific checks for table entry
    assert table_entry_section.text == "\ntable-entry01"
    assert table_entry_section.link is not None
    assert table_entry_section.link.startswith("https://www.notion.so/")

    # Verify table entry child document structure
    assert table_entry_child_doc.id is not None
    assert table_entry_child_doc.source == DocumentSource.NOTION

    # Section checks for table entry child
    assert len(table_entry_child_doc.sections) == 1
    table_entry_child_section = table_entry_child_doc.sections[0]

    # Content specific checks for table entry child
    assert table_entry_child_section.text == "\nchild-table-entry01"
    assert table_entry_child_section.link is not None
    assert table_entry_child_section.link.startswith("https://www.notion.so/")


================================================
FILE: backend/tests/daily/connectors/outline/test_outline_connector.py
================================================
import os
import time
from typing import Any

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.outline.connector import OutlineConnector


class TestOutlineConnector:
    """Comprehensive test suite for the OutlineConnector."""

    @pytest.fixture
    def connector(self) -> OutlineConnector:
        """Create an Outline connector instance."""
        return OutlineConnector(batch_size=10)

    @pytest.fixture
    def credentials(self) -> dict[str, Any]:
        """Provide test credentials from environment variables."""
        outline_base_url = os.environ.get("OUTLINE_BASE_URL")
        outline_api_token = os.environ.get("OUTLINE_API_TOKEN")

        if not outline_base_url or not outline_api_token:
            pytest.skip(
                "OUTLINE_BASE_URL and OUTLINE_API_TOKEN environment variables must be set"
            )

        return {
            "outline_api_token": outline_api_token,
            "outline_base_url": outline_base_url,
        }

    def test_credentials_missing_raises_exception(self) -> None:
        """Should raise if credentials are missing."""
        connector = OutlineConnector()

        with pytest.raises(ConnectorMissingCredentialError) as exc_info:
            list(connector.load_from_state())
        assert "Outline" in str(exc_info.value)

    def test_load_credentials(
        self, connector: OutlineConnector, credentials: dict[str, Any]
    ) -> None:
        """Credentials should load correctly."""
        result = connector.load_credentials(credentials)

        assert result is None
        assert connector.outline_client is not None
        assert connector.outline_client.api_token == credentials["outline_api_token"]
        assert connector.outline_client.base_url == credentials[
            "outline_base_url"
        ].rstrip("/")

    def test_outline_connector_basic(
        self, connector: OutlineConnector, credentials: dict[str, Any]
    ) -> None:
        """Validate that connector fetches and structures documents properly."""
        connector.load_credentials(credentials)

        documents: list[Document] = []
        for batch in connector.load_from_state():
            documents.extend(
                [doc for doc in batch if not isinstance(doc, HierarchyNode)]
            )

        assert len(documents) > 0, "Expected at least one document/collection"

        collections = [d for d in documents if d.metadata.get("type") == "collection"]
        docs = [d for d in documents if d.metadata.get("type") == "document"]

        assert len(collections) > 0, "Should find at least one collection"

        collection = collections[0]
        assert collection.id.startswith("outline_collection__")
        assert collection.source == DocumentSource.OUTLINE
        assert collection.title is not None
        assert len(collection.sections) == 1
        assert collection.sections[0].text is not None
        assert collection.metadata["type"] == "collection"

        if docs:
            document = docs[0]
            assert document.id.startswith("outline_document__")
            assert document.source == DocumentSource.OUTLINE
            assert document.title is not None
            assert len(document.sections) == 1
            assert document.sections[0].text is not None
            assert document.metadata["type"] == "document"

            section_link = document.sections[0].link
            assert section_link is not None
            assert "/doc/" in section_link

    def test_outline_connector_time_filtering(
        self, connector: OutlineConnector, credentials: dict[str, Any]
    ) -> None:
        """Validate poll_source with time range filtering."""
        connector.load_credentials(credentials)

        end_time = time.time()
        start_time = end_time - 30 * 24 * 60 * 60

        docs: list[Document] = []
        for batch in connector.poll_source(start_time, end_time):
            docs.extend([doc for doc in batch if not isinstance(doc, HierarchyNode)])

        for doc in docs:
            assert isinstance(doc, Document)
            assert doc.source == DocumentSource.OUTLINE
            if doc.doc_updated_at:
                assert start_time <= doc.doc_updated_at.timestamp() <= end_time

    def test_outline_connector_load_from_state(
        self, connector: OutlineConnector, credentials: dict[str, Any]
    ) -> None:
        """load_from_state should fetch documents."""
        connector.load_credentials(credentials)

        gen = connector.load_from_state()
        batch = next(gen)
        assert isinstance(batch, list)

        for doc in batch:
            assert isinstance(doc, Document)
            assert doc.source == DocumentSource.OUTLINE

    def test_outline_connector_batch_processing(
        self, credentials: dict[str, Any]
    ) -> None:
        """Connector should respect batch size."""
        small_batch_connector = OutlineConnector(batch_size=2)
        small_batch_connector.load_credentials(credentials)

        for batch in small_batch_connector.poll_source(0, time.time()):
            assert len(batch) <= 2
            break

    def test_outline_connector_document_types(
        self, connector: OutlineConnector, credentials: dict[str, Any]
    ) -> None:
        """Validate metadata for collections and documents."""
        connector.load_credentials(credentials)

        docs: list[Document] = []
        for batch in connector.poll_source(0, time.time()):
            docs.extend([doc for doc in batch if not isinstance(doc, HierarchyNode)])

        if docs:
            doc_types = {d.metadata["type"] for d in docs}
            assert doc_types.issubset({"document", "collection"})

            for doc in docs:
                if doc.metadata["type"] == "document":
                    assert any(
                        (s.text.strip() if s.text else None) for s in doc.sections
                    )
                elif doc.metadata["type"] == "collection":
                    assert len(doc.sections) >= 1

    def test_outline_connector_invalid_credentials(self) -> None:
        """Should raise with invalid/missing credentials."""
        connector = OutlineConnector()

        # Missing everything
        with pytest.raises(ConnectorMissingCredentialError):
            connector.load_credentials({})

        # Missing base URL
        with pytest.raises(ConnectorMissingCredentialError):
            connector.load_credentials({"outline_api_token": "token"})

        # Missing token
        with pytest.raises(ConnectorMissingCredentialError):
            connector.load_credentials({"outline_base_url": "https://example.com"})

        # Invalid credentials will be caught during validation, not credential loading
        connector.load_credentials(
            {
                "outline_base_url": "https://invalid.invalid",
                "outline_api_token": "invalid",
            }
        )
        # Validation should catch invalid credentials
        with pytest.raises((CredentialExpiredError, ConnectorValidationError)):
            connector.validate_connector_settings()

    def test_outline_connector_invalid_url(self) -> None:
        """Invalid URL should raise validation error during validation."""
        connector = OutlineConnector()

        # Load credentials with invalid URL
        connector.load_credentials(
            {
                "outline_base_url": "https://not-a-valid-url.invalid",
                "outline_api_token": "token",
            }
        )

        # Validation should catch invalid URL
        with pytest.raises(ConnectorValidationError):
            connector.validate_connector_settings()


================================================
FILE: backend/tests/daily/connectors/salesforce/test_salesforce_connector.py
================================================
import json
import os
from datetime import datetime
from datetime import timezone
from pathlib import Path
from typing import Any
from typing import cast

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.salesforce.connector import SalesforceConnector
from onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE


def extract_key_value_pairs_to_set(
    list_of_unparsed_key_value_strings: list[str],
) -> set[str]:
    set_of_key_value_pairs = set()
    for string_key_value_pairs in list_of_unparsed_key_value_strings:
        list_of_parsed_key_values = string_key_value_pairs.split("\n")
        for key_value_pair in list_of_parsed_key_values:
            set_of_key_value_pairs.add(key_value_pair.strip())
    return set_of_key_value_pairs


def _load_reference_data(
    file_name: str = "test_salesforce_data.json",
) -> dict[str, str | list[str] | dict[str, Any] | list[dict[str, Any]]]:
    current_dir = Path(__file__).parent
    with open(current_dir / file_name, "r") as f:
        return json.load(f)


@pytest.fixture
def salesforce_connector() -> SalesforceConnector:
    connector = SalesforceConnector(
        requested_objects=[ACCOUNT_OBJECT_TYPE, "Contact", "Opportunity"],
    )

    username = os.environ["SF_USERNAME"]
    password = os.environ["SF_PASSWORD"]
    security_token = os.environ["SF_SECURITY_TOKEN"]

    connector.load_credentials(
        {
            "sf_username": username,
            "sf_password": password,
            "sf_security_token": security_token,
        }
    )
    return connector


# TODO: make the credentials not expire
@pytest.mark.skip(
    reason=(
        "Credentials change over time, so this test will fail if run when the credentials expire."
    )
)
def test_salesforce_connector_basic(salesforce_connector: SalesforceConnector) -> None:
    test_data = _load_reference_data()
    target_test_doc: Document | None = None
    all_docs: list[Document] = []
    for doc_batch in salesforce_connector.load_from_state():
        for doc in doc_batch:
            if isinstance(doc, HierarchyNode):
                continue
            all_docs.append(doc)
            if doc.id == test_data["id"]:
                target_test_doc = doc
                break

    # The number of docs here seems to change actively so do a very loose check
    # as of 2025-03-28 it was around 32472
    assert len(all_docs) > 32000
    assert len(all_docs) < 40000

    assert target_test_doc is not None

    # Set of received links
    received_links: set[str] = set()
    # List of received text fields, which contain key-value pairs seperated by newlines
    received_text: list[str] = []

    # Iterate over the sections of the target test doc to extract the links and text
    for section in target_test_doc.sections:
        assert section.link
        assert section.text
        received_links.add(section.link)
        received_text.append(section.text)

    # Check that the received links match the expected links from the test data json
    expected_links = set(test_data["expected_links"])
    assert received_links == expected_links

    # Check that the received key-value pairs from the text fields match the expected key-value pairs from the test data json
    expected_text = test_data["expected_text"]
    if not isinstance(expected_text, list):
        raise ValueError("Expected text is not a list")

    unparsed_expected_key_value_pairs: list[str] = cast(list[str], expected_text)
    received_key_value_pairs = extract_key_value_pairs_to_set(received_text)
    expected_key_value_pairs = extract_key_value_pairs_to_set(
        unparsed_expected_key_value_pairs
    )
    assert received_key_value_pairs == expected_key_value_pairs

    # Check that the rest of the fields match the expected fields from the test data json
    assert target_test_doc.source == DocumentSource.SALESFORCE
    assert target_test_doc.semantic_identifier == test_data["semantic_identifier"]
    assert target_test_doc.metadata == test_data["metadata"]

    assert target_test_doc.primary_owners is not None
    primary_owner = target_test_doc.primary_owners[0]
    expected_primary_owner = test_data["primary_owners"]
    assert isinstance(expected_primary_owner, dict)
    assert primary_owner.email == expected_primary_owner["email"]
    assert primary_owner.first_name == expected_primary_owner["first_name"]
    assert primary_owner.last_name == expected_primary_owner["last_name"]

    secondary_owners = (
        [owner.model_dump() for owner in target_test_doc.secondary_owners]
        if target_test_doc.secondary_owners
        else None
    )
    assert secondary_owners == test_data["secondary_owners"]
    assert target_test_doc.title == test_data["title"]


@pytest.mark.skip(
    reason=(
        "All Salesforce tests need to be re-thought + made less flakey. "
        "We need to handle credential resets + the rate limits (move to a smaller dataset)"
    )
)
def test_salesforce_connector_poll_source(
    salesforce_connector: SalesforceConnector,
) -> None:

    intermediate_time = datetime(
        2024, 6, 3, 0, 0, 0, tzinfo=timezone.utc
    )  # roughly 92 docs

    # intermediate_time = datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc)  # roughly 1100 to 1200 docs

    all_docs_1: list[Document] = []
    for doc_batch in salesforce_connector.poll_source(0, intermediate_time.timestamp()):
        for doc in doc_batch:
            if isinstance(doc, HierarchyNode):
                continue
            all_docs_1.append(doc)

    len_1 = len(all_docs_1)

    # NOTE: this is the correct document count.
    # If you were to inspect the underlying db, however, the partial download results in
    #  an incomplete set of object relationships. This is expected.

    assert len_1 > 85 and len_1 < 100
    print(f"all_docs_1 length: {len(all_docs_1)}")

    # assert len_1 > 1100 and len_1 < 1200
    # print(f"all_docs_1 length: {len(all_docs_1)}")

    # leave this out for the moment because it's slow to process 30k docs
    # all_docs_2: list[Document] = []
    # for doc_batch in salesforce_connector.poll_source(
    #     intermediate_time.timestamp(), time.time()
    # ):
    #     for doc in doc_batch:
    #         all_docs_2.append(doc)

    # len_2 = len(all_docs_2)
    # assert len_2 > 31000

    # print(f"all_docs_2 length: {len(all_docs_2)}")


# TODO: make the credentials not expire
@pytest.mark.skip(
    reason=(
        "Credentials change over time, so this test will fail if run when the credentials expire."
    )
)
def test_salesforce_connector_slim(salesforce_connector: SalesforceConnector) -> None:
    # Get all doc IDs from the full connector
    all_full_doc_ids = set()
    for doc_batch in salesforce_connector.load_from_state():
        all_full_doc_ids.update(
            [doc.id for doc in doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # Get all doc IDs from the slim connector
    all_slim_doc_ids = set()
    for slim_doc_batch in salesforce_connector.retrieve_all_slim_docs_perm_sync():
        all_slim_doc_ids.update(
            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # The set of full doc IDs should be always be a subset of the slim doc IDs
    assert all_full_doc_ids.issubset(all_slim_doc_ids)


================================================
FILE: backend/tests/daily/connectors/salesforce/test_salesforce_data.json
================================================
{
  "id": "SALESFORCE_001bm00000eu6n5AAA",
  "expected_links": [
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpEeAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqd3AAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoKiAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvDSAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrmHAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrl2AAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvejAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStlvAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpPfAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrP9AAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvlMAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESt3JAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoBkAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStw2AAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrkMAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESojKAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuLEAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoSIAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESu2YAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvgSAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESurnAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrnqAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoB5AAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuJuAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrfyAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/001bm00000eu6n5AAA",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpUHAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsgGAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESr7UAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESu1BAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpqzAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESplZAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvJ3AAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESurKAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStSiAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuJFAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESu8xAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqfzAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqsrAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStoZAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsIUAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsAGAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESv8GAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrOKAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoUmAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESudKAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuJ8AAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvf2AAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESw3qAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESugRAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESr18AAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqV1AAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuLVAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpjoAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqULAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuCAAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrfpAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESp5YAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrMNAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStaUAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESt5LAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrtcAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESomaAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrtIAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoToAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuWLAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrWvAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsJEAA1",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsxwAAD",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvUgAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvWjAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStBuAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpZiAAL",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuhYAAT",
    "https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuWAAA1"
  ],
  "expected_text": [
    "IsDeleted: false\nBillingCity: Shaykh al \u00e1\u00b8\u00a8ad\u00c4\u00abd\nName: Voonder\nCleanStatus: Pending\nBillingStreet: 12 Cambridge Parkway",
    "Email: eslayqzs@icio.us\nIsDeleted: false\nLastName: Slay\nIsEmailBounced: false\nFirstName: Ebeneser\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: ptweedgdh@umich.edu\nIsDeleted: false\nLastName: Tweed\nIsEmailBounced: false\nFirstName: Paulita\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: ehurnellnlx@facebook.com\nIsDeleted: false\nLastName: Hurnell\nIsEmailBounced: false\nFirstName: Eliot\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: ccarik4q4@google.it\nIsDeleted: false\nLastName: Carik\nIsEmailBounced: false\nFirstName: Chadwick\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: cvannozziina6@moonfruit.com\nIsDeleted: false\nLastName: Vannozzii\nIsEmailBounced: false\nFirstName: Christophorus\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: mikringill2kz@hugedomains.com\nIsDeleted: false\nLastName: Ikringill\nIsEmailBounced: false\nFirstName: Meghann\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: bgrinvalray@fda.gov\nIsDeleted: false\nLastName: Grinval\nIsEmailBounced: false\nFirstName: Berti\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: aollanderhr7@cam.ac.uk\nIsDeleted: false\nLastName: Ollander\nIsEmailBounced: false\nFirstName: Annemarie\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: rwhitesideq38@gravatar.com\nIsDeleted: false\nLastName: Whiteside\nIsEmailBounced: false\nFirstName: Rolando\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: vkrafthmz@techcrunch.com\nIsDeleted: false\nLastName: Kraft\nIsEmailBounced: false\nFirstName: Vidovik\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: jhillaut@4shared.com\nIsDeleted: false\nLastName: Hill\nIsEmailBounced: false\nFirstName: Janel\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: lralstonycs@discovery.com\nIsDeleted: false\nLastName: Ralston\nIsEmailBounced: false\nFirstName: Lorrayne\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: blyttlewba@networkadvertising.org\nIsDeleted: false\nLastName: Lyttle\nIsEmailBounced: false\nFirstName: Ban\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: pplummernvf@technorati.com\nIsDeleted: false\nLastName: Plummer\nIsEmailBounced: false\nFirstName: Pete\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: babrahamoffxpb@theatlantic.com\nIsDeleted: false\nLastName: Abrahamoff\nIsEmailBounced: false\nFirstName: Brander\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: ahargieym0@homestead.com\nIsDeleted: false\nLastName: Hargie\nIsEmailBounced: false\nFirstName: Aili\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: hstotthp2@yelp.com\nIsDeleted: false\nLastName: Stott\nIsEmailBounced: false\nFirstName: Hartley\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: jganniclifftuvj@blinklist.com\nIsDeleted: false\nLastName: Ganniclifft\nIsEmailBounced: false\nFirstName: Jamima\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: ldodelly8q@ed.gov\nIsDeleted: false\nLastName: Dodell\nIsEmailBounced: false\nFirstName: Lynde\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: rmilner3cp@smh.com.au\nIsDeleted: false\nLastName: Milner\nIsEmailBounced: false\nFirstName: Ralph\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: gghiriardellic19@state.tx.us\nIsDeleted: false\nLastName: Ghiriardelli\nIsEmailBounced: false\nFirstName: Garv\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: rhubatschfpu@nature.com\nIsDeleted: false\nLastName: Hubatsch\nIsEmailBounced: false\nFirstName: Rose\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: mtrenholme1ws@quantcast.com\nIsDeleted: false\nLastName: Trenholme\nIsEmailBounced: false\nFirstName: Mariejeanne\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: jmussettpbd@over-blog.com\nIsDeleted: false\nLastName: Mussett\nIsEmailBounced: false\nFirstName: Juliann\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: bgoroni145@illinois.edu\nIsDeleted: false\nLastName: Goroni\nIsEmailBounced: false\nFirstName: Bernarr\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: afalls3ph@theguardian.com\nIsDeleted: false\nLastName: Falls\nIsEmailBounced: false\nFirstName: Angelia\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: lswettjoi@go.com\nIsDeleted: false\nLastName: Swett\nIsEmailBounced: false\nFirstName: Levon\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: emullinsz38@dailymotion.com\nIsDeleted: false\nLastName: Mullins\nIsEmailBounced: false\nFirstName: Elsa\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: ibernettehco@ebay.co.uk\nIsDeleted: false\nLastName: Bernette\nIsEmailBounced: false\nFirstName: Ingrid\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: trisleybtt@simplemachines.org\nIsDeleted: false\nLastName: Risley\nIsEmailBounced: false\nFirstName: Toma\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: rgypsonqx1@goodreads.com\nIsDeleted: false\nLastName: Gypson\nIsEmailBounced: false\nFirstName: Reed\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: cposvneri28@jiathis.com\nIsDeleted: false\nLastName: Posvner\nIsEmailBounced: false\nFirstName: Culley\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: awilmut2rz@geocities.jp\nIsDeleted: false\nLastName: Wilmut\nIsEmailBounced: false\nFirstName: Andy\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: aluckwellra5@exblog.jp\nIsDeleted: false\nLastName: Luckwell\nIsEmailBounced: false\nFirstName: Andreana\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: irollings26j@timesonline.co.uk\nIsDeleted: false\nLastName: Rollings\nIsEmailBounced: false\nFirstName: Ibrahim\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: gspireqpd@g.co\nIsDeleted: false\nLastName: Spire\nIsEmailBounced: false\nFirstName: Gaelan\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: sbezleyk2y@acquirethisname.com\nIsDeleted: false\nLastName: Bezley\nIsEmailBounced: false\nFirstName: Sindee\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: icollerrr@flickr.com\nIsDeleted: false\nLastName: Coller\nIsEmailBounced: false\nFirstName: Inesita\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: kfolliott1bo@nature.com\nIsDeleted: false\nLastName: Folliott\nIsEmailBounced: false\nFirstName: Kennan\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: kroofjfo@gnu.org\nIsDeleted: false\nLastName: Roof\nIsEmailBounced: false\nFirstName: Karlik\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: lcovotti8s4@rediff.com\nIsDeleted: false\nLastName: Covotti\nIsEmailBounced: false\nFirstName: Lucho\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: gpatriskson1rs@census.gov\nIsDeleted: false\nLastName: Patriskson\nIsEmailBounced: false\nFirstName: Gardener\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: spidgleyqvw@usgs.gov\nIsDeleted: false\nLastName: Pidgley\nIsEmailBounced: false\nFirstName: Simona\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: cbecarrak0i@over-blog.com\nIsDeleted: false\nLastName: Becarra\nIsEmailBounced: false\nFirstName: Cally\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: aparkman9td@bbc.co.uk\nIsDeleted: false\nLastName: Parkman\nIsEmailBounced: false\nFirstName: Agneta\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: bboddingtonhn@quantcast.com\nIsDeleted: false\nLastName: Boddington\nIsEmailBounced: false\nFirstName: Betta\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: dcasementx0p@cafepress.com\nIsDeleted: false\nLastName: Casement\nIsEmailBounced: false\nFirstName: Dannie\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: hzornbhe@latimes.com\nIsDeleted: false\nLastName: Zorn\nIsEmailBounced: false\nFirstName: Haleigh\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: cfifieldbjb@blogspot.com\nIsDeleted: false\nLastName: Fifield\nIsEmailBounced: false\nFirstName: Christalle\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: ddewerson4t3@skype.com\nIsDeleted: false\nLastName: Dewerson\nIsEmailBounced: false\nFirstName: Dyann\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: khullock52p@sohu.com\nIsDeleted: false\nLastName: Hullock\nIsEmailBounced: false\nFirstName: Kellina\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: tfremantle32n@bandcamp.com\nIsDeleted: false\nLastName: Fremantle\nIsEmailBounced: false\nFirstName: Turner\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: sbernardtylp@nps.gov\nIsDeleted: false\nLastName: Bernardt\nIsEmailBounced: false\nFirstName: Selina\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: smcgettigan8kk@slideshare.net\nIsDeleted: false\nLastName: McGettigan\nIsEmailBounced: false\nFirstName: Sada\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: wdelafontvgn@businesswire.com\nIsDeleted: false\nLastName: Delafont\nIsEmailBounced: false\nFirstName: West\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: lbelsher9ne@indiatimes.com\nIsDeleted: false\nLastName: Belsher\nIsEmailBounced: false\nFirstName: Lou\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: cgoody27y@blogtalkradio.com\nIsDeleted: false\nLastName: Goody\nIsEmailBounced: false\nFirstName: Colene\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: cstodejzz@ucoz.ru\nIsDeleted: false\nLastName: Stode\nIsEmailBounced: false\nFirstName: Curcio\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: abromidgejb@china.com.cn\nIsDeleted: false\nLastName: Bromidge\nIsEmailBounced: false\nFirstName: Ariela\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: ldelgardilloqvp@xrea.com\nIsDeleted: false\nLastName: Delgardillo\nIsEmailBounced: false\nFirstName: Lauralee\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: dcroal9t4@businessinsider.com\nIsDeleted: false\nLastName: Croal\nIsEmailBounced: false\nFirstName: Devlin\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: dclarageqzb@wordpress.com\nIsDeleted: false\nLastName: Clarage\nIsEmailBounced: false\nFirstName: Dre\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: dthirlwall3jf@taobao.com\nIsDeleted: false\nLastName: Thirlwall\nIsEmailBounced: false\nFirstName: Dareen\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: tkeddie2lj@wiley.com\nIsDeleted: false\nLastName: Keddie\nIsEmailBounced: false\nFirstName: Tandi\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: jrimingtoni3i@istockphoto.com\nIsDeleted: false\nLastName: Rimington\nIsEmailBounced: false\nFirstName: Judy\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: gtroynet@slashdot.org\nIsDeleted: false\nLastName: Troy\nIsEmailBounced: false\nFirstName: Gail\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: ebunneyh0n@meetup.com\nIsDeleted: false\nLastName: Bunney\nIsEmailBounced: false\nFirstName: Efren\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: yhaken8p3@slate.com\nIsDeleted: false\nLastName: Haken\nIsEmailBounced: false\nFirstName: Yard\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: nolliffeq6q@biblegateway.com\nIsDeleted: false\nLastName: Olliffe\nIsEmailBounced: false\nFirstName: Nani\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: bgalia9jz@odnoklassniki.ru\nIsDeleted: false\nLastName: Galia\nIsEmailBounced: false\nFirstName: Berrie\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: djedrzej3v1@google.com\nIsDeleted: false\nLastName: Jedrzej\nIsEmailBounced: false\nFirstName: Deanne\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: mcamiesh1t@fc2.com\nIsDeleted: false\nLastName: Camies\nIsEmailBounced: false\nFirstName: Mikaela\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: csunshineqni@state.tx.us\nIsDeleted: false\nLastName: Sunshine\nIsEmailBounced: false\nFirstName: Curtis\nIsPriorityRecord: false\nCleanStatus: Pending",
    "Email: fiannellib46@marriott.com\nIsDeleted: false\nLastName: Iannelli\nIsEmailBounced: false\nFirstName: Felicio\nIsPriorityRecord: false\nCleanStatus: Pending"
  ],
  "semantic_identifier": "Voonder",
  "metadata": {"object_type": "Account"},
  "primary_owners": {"email": "hagen@danswer.ai", "first_name": "Hagen", "last_name": "oneill"},
  "secondary_owners": null,
  "title": null
}


================================================
FILE: backend/tests/daily/connectors/sharepoint/test_sharepoint_connector.py
================================================
import os
import time
from dataclasses import dataclass
from datetime import datetime
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.sharepoint.connector import SharepointAuthMethod
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.db.enums import HierarchyNodeType
from tests.daily.connectors.utils import load_all_from_connector

# NOTE: Sharepoint site for tests is "sharepoint-tests"


@dataclass
class ExpectedDocument:
    semantic_identifier: str
    content: str
    folder_path: str | None = None
    library: str = "Shared Documents"  # Default to main library
    expected_link_substrings: list[str] | None = None


EXPECTED_DOCUMENTS = [
    ExpectedDocument(
        semantic_identifier="test1.docx",
        content="test1",
        folder_path="test",
        expected_link_substrings=["_layouts/15/Doc.aspx", "file=test1.docx"],
    ),
    ExpectedDocument(
        semantic_identifier="test2.docx",
        content="test2",
        folder_path="test/nested with spaces",
        expected_link_substrings=["_layouts/15/Doc.aspx", "file=test2.docx"],
    ),
    ExpectedDocument(
        semantic_identifier="should-not-index-on-specific-folder.docx",
        content="should-not-index-on-specific-folder",
        folder_path=None,  # root folder
        expected_link_substrings=[
            "_layouts/15/Doc.aspx",
            "file=should-not-index-on-specific-folder.docx",
        ],
    ),
    ExpectedDocument(
        semantic_identifier="other.docx",
        content="other",
        folder_path=None,
        library="Other Library",
        expected_link_substrings=["_layouts/15/Doc.aspx", "file=other.docx"],
    ),
]

EXPECTED_PAGES = [
    ExpectedDocument(
        semantic_identifier="CollabHome",
        content=(
            "# Home\n\nDisplay recent news.\n\n## News\n\nShow recent activities from your site\n\n"
            "## Site activity\n\n## Quick links\n\nLearn about a team site\n\nLearn how to add a page\n\n"
            "Add links to important documents and pages.\n\n## Quick links\n\nDocuments\n\n"
            "Add a document library\n\n## Document library"
        ),
        folder_path=None,
        expected_link_substrings=["SitePages/CollabHome.aspx"],
    ),
    ExpectedDocument(
        semantic_identifier="Home",
        content="# Home",
        folder_path=None,
        expected_link_substrings=["SitePages/Home.aspx"],
    ),
]


def verify_document_metadata(doc: Document) -> None:
    """Verify common metadata that should be present on all documents."""
    assert isinstance(doc.doc_updated_at, datetime)
    assert doc.doc_updated_at.tzinfo == timezone.utc
    assert doc.source == DocumentSource.SHAREPOINT
    assert doc.primary_owners is not None
    assert len(doc.primary_owners) == 1
    owner = doc.primary_owners[0]
    assert owner.display_name is not None
    assert owner.email is not None


def verify_document_content(doc: Document, expected: ExpectedDocument) -> None:
    """Verify a document matches its expected content."""
    assert doc.semantic_identifier == expected.semantic_identifier
    assert len(doc.sections) == 1
    assert doc.sections[0].text is not None
    assert expected.content == doc.sections[0].text

    if expected.expected_link_substrings is not None:
        actual_link = doc.sections[0].link
        assert actual_link is not None, (
            f"Expected section link containing {expected.expected_link_substrings} "
            f"for '{expected.semantic_identifier}', but link was None"
        )
        for substr in expected.expected_link_substrings:
            assert substr in actual_link, (
                f"Section link for '{expected.semantic_identifier}' "
                f"missing expected substring '{substr}', "
                f"actual link: '{actual_link}'"
            )

    verify_document_metadata(doc)


def find_document(documents: list[Document], semantic_identifier: str) -> Document:
    """Find a document by its semantic identifier."""
    matching_docs = [
        d for d in documents if d.semantic_identifier == semantic_identifier
    ]
    assert (
        len(matching_docs) == 1
    ), f"Expected exactly one document with identifier {semantic_identifier}"
    return matching_docs[0]


@pytest.fixture
def mock_store_image() -> MagicMock:
    """Mock store_image_and_create_section to return a predefined ImageSection."""
    mock = MagicMock()
    mock.return_value = (
        ImageSection(image_file_id="mocked-file-id", link="https://example.com/image"),
        "mocked-file-id",
    )
    return mock


@pytest.fixture
def sharepoint_credentials() -> dict[str, str]:
    return {
        "sp_client_id": os.environ["SHAREPOINT_CLIENT_ID"],
        "sp_client_secret": os.environ["SHAREPOINT_CLIENT_SECRET"],
        "sp_directory_id": os.environ["SHAREPOINT_CLIENT_DIRECTORY_ID"],
    }


def test_sharepoint_connector_all_sites__docs_only(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_store_image: MagicMock,
    sharepoint_credentials: dict[str, str],
) -> None:
    with patch(
        "onyx.connectors.sharepoint.connector.store_image_and_create_section",
        mock_store_image,
    ):
        # Initialize connector with no sites
        connector = SharepointConnector(
            include_site_pages=False, include_site_documents=True
        )

        # Load credentials
        connector.load_credentials(sharepoint_credentials)

        # Not asserting expected sites because that can change in test tenant at any time
        # Finding any docs is good enough to verify that the connector is working
        document_batches = load_all_from_connector(
            connector=connector,
            start=0,
            end=time.time(),
        )
        assert document_batches, "Should find documents from all sites"


def test_sharepoint_connector_all_sites__pages_only(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_store_image: MagicMock,
    sharepoint_credentials: dict[str, str],
) -> None:
    with patch(
        "onyx.connectors.sharepoint.connector.store_image_and_create_section",
        mock_store_image,
    ):
        # Initialize connector with no docs
        connector = SharepointConnector(
            include_site_pages=True, include_site_documents=False
        )

        # Load credentials
        connector.load_credentials(sharepoint_credentials)

        # Not asserting expected sites because that can change in test tenant at any time
        # Finding any docs is good enough to verify that the connector is working
        document_batches = load_all_from_connector(
            connector=connector,
            start=0,
            end=time.time(),
        )
        assert document_batches, "Should find site pages from all sites"


def test_sharepoint_connector_specific_folder(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_store_image: MagicMock,
    sharepoint_credentials: dict[str, str],
) -> None:
    with patch(
        "onyx.connectors.sharepoint.connector.store_image_and_create_section",
        mock_store_image,
    ):
        # Initialize connector with the test site URL and specific folder
        connector = SharepointConnector(
            sites=[os.environ["SHAREPOINT_SITE"] + "/Shared Documents/test"],
            include_site_pages=False,
            include_site_documents=True,
        )

        # Load credentials
        connector.load_credentials(sharepoint_credentials)

        # Get all documents
        found_documents: list[Document] = load_all_from_connector(
            connector=connector,
            start=0,
            end=time.time(),
        ).documents

        # Should only find documents in the test folder
        test_folder_docs = [
            doc
            for doc in EXPECTED_DOCUMENTS
            if doc.folder_path and doc.folder_path.startswith("test")
        ]
        assert len(found_documents) == len(
            test_folder_docs
        ), "Should only find documents in test folder"

        # Verify each expected document
        for expected in test_folder_docs:
            doc = find_document(found_documents, expected.semantic_identifier)
            verify_document_content(doc, expected)


def test_sharepoint_connector_root_folder__docs_only(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_store_image: MagicMock,
    sharepoint_credentials: dict[str, str],
) -> None:
    with patch(
        "onyx.connectors.sharepoint.connector.store_image_and_create_section",
        mock_store_image,
    ):
        # Initialize connector with the base site URL
        connector = SharepointConnector(
            sites=[os.environ["SHAREPOINT_SITE"]],
            include_site_pages=False,
            include_site_documents=True,
        )

        # Load credentials
        connector.load_credentials(sharepoint_credentials)

        # Get all documents
        found_documents: list[Document] = load_all_from_connector(
            connector=connector,
            start=0,
            end=time.time(),
        ).documents

        assert len(found_documents) == len(
            EXPECTED_DOCUMENTS
        ), "Should find all documents in main library"

        # Verify each expected document
        for expected in EXPECTED_DOCUMENTS:
            doc = find_document(found_documents, expected.semantic_identifier)
            verify_document_content(doc, expected)


def test_sharepoint_connector_other_library(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_store_image: MagicMock,
    sharepoint_credentials: dict[str, str],
) -> None:
    with patch(
        "onyx.connectors.sharepoint.connector.store_image_and_create_section",
        mock_store_image,
    ):
        # Initialize connector with the other library
        connector = SharepointConnector(
            sites=[
                os.environ["SHAREPOINT_SITE"] + "/Other Library",
            ],
            include_site_pages=False,
            include_site_documents=True,
        )

        # Load credentials
        connector.load_credentials(sharepoint_credentials)

        # Get all documents
        found_documents: list[Document] = load_all_from_connector(
            connector=connector,
            start=0,
            end=time.time(),
        ).documents
        expected_documents: list[ExpectedDocument] = [
            doc for doc in EXPECTED_DOCUMENTS if doc.library == "Other Library"
        ]

        # Should find all documents in `Other Library`
        assert len(found_documents) == len(
            expected_documents
        ), "Should find all documents in `Other Library`"

        # Verify each expected document
        for expected in expected_documents:
            doc = find_document(found_documents, expected.semantic_identifier)
            verify_document_content(doc, expected)


def test_sharepoint_connector_poll(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_store_image: MagicMock,
    sharepoint_credentials: dict[str, str],
) -> None:
    with patch(
        "onyx.connectors.sharepoint.connector.store_image_and_create_section",
        mock_store_image,
    ):
        # Initialize connector with the base site URL
        connector = SharepointConnector(sites=[os.environ["SHAREPOINT_SITE"]])

        # Load credentials
        connector.load_credentials(sharepoint_credentials)

        # Set time window to only capture test1.docx (modified at 2025-01-28 20:51:42+00:00)
        start = datetime(
            2025, 1, 28, 20, 51, 30, tzinfo=timezone.utc
        )  # 12 seconds before
        end = datetime(2025, 1, 28, 20, 51, 50, tzinfo=timezone.utc)  # 8 seconds after

        # Get documents within the time window
        found_documents: list[Document] = load_all_from_connector(
            connector=connector,
            start=start.timestamp(),
            end=end.timestamp(),
        ).documents

        # Should only find test1.docx
        assert (
            len(found_documents) == 1
        ), "Should only find one document in the time window"
        doc = found_documents[0]
        assert doc.semantic_identifier == "test1.docx"
        verify_document_content(
            doc,
            next(
                d for d in EXPECTED_DOCUMENTS if d.semantic_identifier == "test1.docx"
            ),
        )


def test_sharepoint_connector_pages(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_store_image: MagicMock,
    sharepoint_credentials: dict[str, str],
) -> None:
    with patch(
        "onyx.connectors.sharepoint.connector.store_image_and_create_section",
        mock_store_image,
    ):
        connector = SharepointConnector(
            sites=[os.environ["SHAREPOINT_SITE"]],
            include_site_pages=True,
            include_site_documents=False,
        )

        connector.load_credentials(sharepoint_credentials)

        found_documents = load_all_from_connector(
            connector=connector,
            start=0,
            end=time.time(),
        ).documents

        assert len(found_documents) == len(
            EXPECTED_PAGES
        ), "Should find all pages in test site"

        for expected in EXPECTED_PAGES:
            doc = find_document(found_documents, expected.semantic_identifier)
            verify_document_content(doc, expected)


def verify_hierarchy_nodes(
    hierarchy_nodes: list[HierarchyNode],
    documents: list[Document],
    expected_site_url: str,
) -> None:
    """Verify hierarchy nodes have correct structure and relationships."""
    # Build a set of all raw_node_ids for parent validation
    all_node_ids = {node.raw_node_id for node in hierarchy_nodes}

    # Track nodes by type
    site_nodes = [n for n in hierarchy_nodes if n.node_type == HierarchyNodeType.SITE]
    drive_nodes = [n for n in hierarchy_nodes if n.node_type == HierarchyNodeType.DRIVE]
    folder_nodes = [
        n for n in hierarchy_nodes if n.node_type == HierarchyNodeType.FOLDER
    ]

    # Verify we have at least one site node
    assert len(site_nodes) >= 1, "Should have at least one SITE hierarchy node"
    assert len(drive_nodes) >= 1, "Should have at least one DRIVE hierarchy node"
    assert len(folder_nodes) >= 1, "Should have at least one FOLDER hierarchy node"

    # Verify expected site is in hierarchy
    site_node_ids = {n.raw_node_id for n in site_nodes}
    assert (
        expected_site_url in site_node_ids
    ), f"Expected site {expected_site_url} not found in hierarchy nodes. Found sites: {site_node_ids}"

    # Verify no duplicate raw_node_ids
    assert len(all_node_ids) == len(
        hierarchy_nodes
    ), "Should not have duplicate hierarchy nodes"

    # Verify all hierarchy nodes have required fields
    for node in hierarchy_nodes:
        assert node.raw_node_id, "All nodes should have raw_node_id"
        assert node.display_name, "All nodes should have display_name"
        assert node.link, "All nodes should have link"
        assert node.node_type in [
            HierarchyNodeType.SITE,
            HierarchyNodeType.DRIVE,
            HierarchyNodeType.FOLDER,
        ], f"Unexpected node type: {node.node_type}"

    # Verify parent relationships
    for node in hierarchy_nodes:
        if node.node_type == HierarchyNodeType.SITE:
            # Sites should have no parent (direct child of SOURCE)
            assert node.raw_parent_id is None, "SITE nodes should have no parent"
        elif node.node_type == HierarchyNodeType.DRIVE:
            # Drives should have a site as parent
            assert node.raw_parent_id is not None, "DRIVE nodes should have a parent"
            assert (
                node.raw_parent_id in site_node_ids
            ), f"DRIVE parent {node.raw_parent_id} should be a SITE node"
        elif node.node_type == HierarchyNodeType.FOLDER:
            # Folders should have either a drive or another folder as parent
            assert node.raw_parent_id is not None, "FOLDER nodes should have a parent"
            assert (
                node.raw_parent_id in all_node_ids
            ), f"FOLDER parent {node.raw_parent_id} should exist in hierarchy"

    # Verify documents have parent_hierarchy_raw_node_id set
    for doc in documents:
        if doc.parent_hierarchy_raw_node_id:
            assert (
                doc.parent_hierarchy_raw_node_id in all_node_ids
            ), f"Document {doc.semantic_identifier} parent {doc.parent_hierarchy_raw_node_id} should exist in hierarchy"


def test_sharepoint_connector_hierarchy_nodes(
    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001
    mock_store_image: MagicMock,
    sharepoint_credentials: dict[str, str],
) -> None:
    """Test that the SharePoint connector yields proper hierarchy nodes."""
    with patch(
        "onyx.connectors.sharepoint.connector.store_image_and_create_section",
        mock_store_image,
    ):
        site_url = os.environ["SHAREPOINT_SITE"]

        # Initialize connector with the test site
        connector = SharepointConnector(
            sites=[site_url],
            include_site_pages=True,
            include_site_documents=True,
        )

        # Load credentials
        connector.load_credentials(sharepoint_credentials)

        # Get all documents and hierarchy nodes
        result = load_all_from_connector(
            connector=connector,
            start=0,
            end=time.time(),
        )

        found_documents = result.documents
        hierarchy_nodes = result.hierarchy_nodes

        # Should have hierarchy nodes
        assert len(hierarchy_nodes) > 0, "Should have hierarchy nodes"

        # Verify hierarchy structure
        verify_hierarchy_nodes(hierarchy_nodes, found_documents, site_url)

        # Verify we have the expected node types
        node_types = {n.node_type for n in hierarchy_nodes}
        assert HierarchyNodeType.SITE in node_types, "Should have SITE nodes"
        assert HierarchyNodeType.DRIVE in node_types, "Should have DRIVE nodes"

        # Should have folder nodes if documents are in folders
        docs_in_folders = [d for d in EXPECTED_DOCUMENTS if d.folder_path]
        if docs_in_folders:
            assert (
                HierarchyNodeType.FOLDER in node_types
            ), "Should have FOLDER nodes since documents are in folders"

        # Verify all documents have parent_hierarchy_raw_node_id set
        for doc in found_documents:
            assert (
                doc.parent_hierarchy_raw_node_id is not None
            ), f"Document {doc.semantic_identifier} should have parent_hierarchy_raw_node_id set"


@pytest.fixture
def sharepoint_cert_credentials() -> dict[str, str]:
    return {
        "authentication_method": SharepointAuthMethod.CERTIFICATE.value,
        "sp_client_id": os.environ["PERM_SYNC_SHAREPOINT_CLIENT_ID"],
        "sp_private_key": os.environ["PERM_SYNC_SHAREPOINT_PRIVATE_KEY"],
        "sp_certificate_password": os.environ[
            "PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD"
        ],
        "sp_directory_id": os.environ["PERM_SYNC_SHAREPOINT_DIRECTORY_ID"],
    }


def test_resolve_tenant_domain_from_site_urls(
    sharepoint_cert_credentials: dict[str, str],
) -> None:
    """Verify that certificate auth resolves the tenant domain from site URLs
    without calling the /organization endpoint."""
    site_url = os.environ["SHAREPOINT_SITE"]
    connector = SharepointConnector(sites=[site_url])
    connector.load_credentials(sharepoint_cert_credentials)

    assert connector.sp_tenant_domain is not None
    assert len(connector.sp_tenant_domain) > 0
    # The tenant domain should match the first label of the site URL hostname
    from urllib.parse import urlsplit

    expected = urlsplit(site_url).hostname.split(".")[0]  # type: ignore
    assert connector.sp_tenant_domain == expected


def test_resolve_tenant_domain_from_root_site(
    sharepoint_cert_credentials: dict[str, str],
) -> None:
    """Verify that certificate auth resolves the tenant domain via the root
    site endpoint when no site URLs are configured."""
    connector = SharepointConnector(sites=[])
    connector.load_credentials(sharepoint_cert_credentials)

    assert connector.sp_tenant_domain is not None
    assert len(connector.sp_tenant_domain) > 0


================================================
FILE: backend/tests/daily/connectors/slab/test_slab_connector.py
================================================
import json
import os
import time
from pathlib import Path

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.slab.connector import SlabConnector


def load_test_data(file_name: str = "test_slab_data.json") -> dict[str, str]:
    current_dir = Path(__file__).parent
    with open(current_dir / file_name, "r") as f:
        return json.load(f)


@pytest.fixture
def slab_connector() -> SlabConnector:
    connector = SlabConnector(
        base_url="https://onyx-test.slab.com/",
    )
    connector.load_credentials(
        {
            "slab_bot_token": os.environ["SLAB_BOT_TOKEN"],
        }
    )
    return connector


@pytest.mark.xfail(
    reason=(
        "Need a test account with a slab subscription to run this test.Trial only lasts 14 days."
    )
)
def test_slab_connector_basic(slab_connector: SlabConnector) -> None:
    all_docs: list[Document] = []
    target_test_doc_id = "jcp6cohu"
    target_test_doc: Document | None = None
    for doc_batch in slab_connector.poll_source(0, time.time()):
        for doc in doc_batch:
            if not isinstance(doc, Document):
                continue
            all_docs.append(doc)
            if doc.id == target_test_doc_id:
                target_test_doc = doc

    assert len(all_docs) == 6
    assert target_test_doc is not None

    desired_test_data = load_test_data()
    assert (
        target_test_doc.semantic_identifier == desired_test_data["semantic_identifier"]
    )
    assert target_test_doc.source == DocumentSource.SLAB
    assert target_test_doc.metadata == {}
    assert target_test_doc.primary_owners is None
    assert target_test_doc.secondary_owners is None
    assert target_test_doc.title is None
    assert target_test_doc.from_ingestion_api is False
    assert target_test_doc.additional_info is None

    assert len(target_test_doc.sections) == 1
    section = target_test_doc.sections[0]
    # Need to replace the weird apostrophe with a normal one
    assert section.text is not None
    assert section.text.replace("\u2019", "'") == desired_test_data["section_text"]
    assert section.link == desired_test_data["link"]


@pytest.mark.xfail(
    reason=(
        "Need a test account with a slab subscription to run this test.Trial only lasts 14 days."
    )
)
def test_slab_connector_slim(slab_connector: SlabConnector) -> None:
    # Get all doc IDs from the full connector
    all_full_doc_ids = set()
    for doc_batch in slab_connector.load_from_state():
        all_full_doc_ids.update(
            [doc.id for doc in doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # Get all doc IDs from the slim connector
    all_slim_doc_ids = set()
    for slim_doc_batch in slab_connector.retrieve_all_slim_docs_perm_sync():
        all_slim_doc_ids.update(
            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # The set of full doc IDs should be always be a subset of the slim doc IDs
    assert all_full_doc_ids.issubset(all_slim_doc_ids)


================================================
FILE: backend/tests/daily/connectors/slab/test_slab_data.json
================================================
{
    "section_text": "Learn about Posts\nWelcome\nThis is a post, where you can edit, share, and collaborate in real time with your team. We'd love to show you how it works!\nReading and editing\nClick the mode button to toggle between read and edit modes. You can only make changes to a post when editing.\nOrganize your posts\nWhen in edit mode, you can add topics to a post, which will keep it organized for the right 👀 to see.\nSmart mentions\nMentions are references to users, posts, topics and third party tools that show details on hover. Paste in a link for automatic conversion.\nLook back in time\nYou are ready to begin writing. You can always bring back this tour in the help menu.\nGreat job!\nYou are ready to begin writing. You can always bring back this tour in the help menu.\n\n",
    "link": "https://onyx-test.slab.com/posts/learn-about-posts-jcp6cohu",
    "semantic_identifier": "Learn about Posts"
} 

================================================
FILE: backend/tests/daily/connectors/slack/conftest.py
================================================
import os
from collections.abc import Generator
from unittest.mock import MagicMock

import pytest
from pytest import FixtureRequest
from slack_sdk import WebClient

from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider
from onyx.connectors.slack.connector import SlackConnector
from shared_configs.contextvars import get_current_tenant_id


@pytest.fixture
def mock_slack_client() -> MagicMock:
    mock = MagicMock(spec=WebClient)
    return mock


@pytest.fixture
def slack_connector(
    request: FixtureRequest,
    mock_slack_client: MagicMock,
    slack_credentials_provider: OnyxStaticCredentialsProvider,
) -> Generator[SlackConnector]:
    channel: str | None = request.param if hasattr(request, "param") else None
    connector = SlackConnector(
        channels=[channel] if channel else None,
        channel_regex_enabled=False,
        use_redis=False,
    )
    connector.client = mock_slack_client
    connector.set_credentials_provider(credentials_provider=slack_credentials_provider)
    yield connector


@pytest.fixture
def slack_credentials_provider() -> OnyxStaticCredentialsProvider:
    CI_ENV_VAR = "SLACK_BOT_TOKEN"
    LOCAL_ENV_VAR = "ONYX_BOT_SLACK_BOT_TOKEN"

    slack_bot_token = os.environ.get(CI_ENV_VAR, os.environ.get(LOCAL_ENV_VAR))
    if not slack_bot_token:
        raise RuntimeError(
            f"No slack credentials found; either set the {CI_ENV_VAR} env-var or the {LOCAL_ENV_VAR} env-var"
        )

    return OnyxStaticCredentialsProvider(
        tenant_id=get_current_tenant_id(),
        connector_name="slack",
        credential_json={
            "slack_bot_token": slack_bot_token,
        },
    )


================================================
FILE: backend/tests/daily/connectors/slack/test_slack_connector.py
================================================
import time

import pytest

from onyx.connectors.slack.connector import SlackConnector
from onyx.db.enums import HierarchyNodeType
from tests.daily.connectors.utils import load_all_from_connector
from tests.daily.connectors.utils import to_sections
from tests.daily.connectors.utils import to_text_sections


def test_validate_slack_connector_settings(
    slack_connector: SlackConnector,
) -> None:
    slack_connector.validate_connector_settings()


@pytest.mark.parametrize(
    "slack_connector,expected_messages,expected_channel_name",
    [
        ["general", set(), "general"],
        ["#general", set(), "general"],
        [
            "daily-connector-test-channel",
            set(
                [
                    "Hello, world!",
                    "",
                    "Reply!",
                    "Testing again...",
                ]
            ),
            "daily-connector-test-channel",
        ],
        [
            "#daily-connector-test-channel",
            set(
                [
                    "Hello, world!",
                    "",
                    "Reply!",
                    "Testing again...",
                ]
            ),
            "daily-connector-test-channel",
        ],
    ],
    indirect=["slack_connector"],
)
def test_indexing_channels_with_message_count(
    slack_connector: SlackConnector,
    expected_messages: set[str],
    expected_channel_name: str,
) -> None:
    if not slack_connector.client:
        raise RuntimeError("Web client must be defined")

    result = load_all_from_connector(
        connector=slack_connector,
        start=0.0,
        end=time.time(),
    )
    docs = result.documents
    hierarchy_nodes = result.hierarchy_nodes

    # Verify messages
    actual_messages = set(to_text_sections(to_sections(docs)))
    assert expected_messages == actual_messages

    # Verify hierarchy nodes exist
    assert len(hierarchy_nodes) > 0, "Expected at least one hierarchy node (channel)"

    # Verify all hierarchy nodes are channels with correct structure
    for node in hierarchy_nodes:
        assert node.node_type == HierarchyNodeType.CHANNEL
        assert node.raw_parent_id is None  # Direct child of SOURCE
        assert node.raw_node_id  # Channel ID must be present
        assert node.display_name.startswith("#")  # e.g. "#general"

    # Verify the expected channel appears in the hierarchy nodes
    channel_display_names = {node.display_name for node in hierarchy_nodes}
    assert (
        f"#{expected_channel_name}" in channel_display_names
    ), f"Expected channel '#{expected_channel_name}' not found in hierarchy nodes. Found: {channel_display_names}"

    # Verify documents reference their parent channel
    channel_ids = {node.raw_node_id for node in hierarchy_nodes}
    for doc in docs:
        assert (
            doc.parent_hierarchy_raw_node_id is not None
        ), f"Document '{doc.id}' has no parent_hierarchy_raw_node_id"
        assert doc.parent_hierarchy_raw_node_id in channel_ids, (
            f"Document '{doc.id}' has parent_hierarchy_raw_node_id="
            f"'{doc.parent_hierarchy_raw_node_id}' which is not in "
            f"hierarchy nodes: {channel_ids}"
        )


@pytest.mark.parametrize(
    "slack_connector",
    [
        # w/o hashtag
        "doesnt-exist",
        # w/ hashtag
        "#doesnt-exist",
    ],
    indirect=True,
)
def test_indexing_channels_that_dont_exist(
    slack_connector: SlackConnector,
) -> None:
    if not slack_connector.client:
        raise RuntimeError("Web client must be defined")

    with pytest.raises(
        ValueError,
        match=r"Channel '.*' not found in workspace.*",
    ):
        load_all_from_connector(
            connector=slack_connector,
            start=0.0,
            end=time.time(),
        ).documents


================================================
FILE: backend/tests/daily/connectors/slack/test_slack_perm_sync.py
================================================
import time
from datetime import datetime
from datetime import timezone

import pytest

from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from onyx.connectors.slack.connector import SlackConnector
from tests.daily.connectors.utils import load_all_from_connector


PUBLIC_CHANNEL_NAME = "#daily-connector-test-channel"
PRIVATE_CHANNEL_NAME = "#private-channel"
PRIVATE_CHANNEL_USERS = [
    "admin@onyx-test.com",
    "test_user_1@onyx-test.com",
    # user 2 added via a group
    "test_user_2@onyx-test.com",
]

# Predates any test workspace messages, so the result set should match
# the "no start time" case while exercising the oldest= parameter.
OLDEST_TS_2016 = datetime(2016, 1, 1, tzinfo=timezone.utc).timestamp()

pytestmark = pytest.mark.usefixtures("enable_ee")


@pytest.mark.parametrize(
    "slack_connector",
    [
        PUBLIC_CHANNEL_NAME,
    ],
    indirect=True,
)
def test_load_from_checkpoint_access__public_channel(
    slack_connector: SlackConnector,
) -> None:
    """Test that load_from_checkpoint returns correct access information for documents."""
    if not slack_connector.client:
        raise RuntimeError("Web client must be defined")

    docs = load_all_from_connector(
        connector=slack_connector,
        start=0.0,
        end=time.time(),
        include_permissions=True,
    ).documents

    # We should have at least some documents
    assert len(docs) > 0, "Expected to find at least one document"

    for doc in docs:
        assert (
            doc.external_access is not None
        ), f"Document {doc.id} should have external_access when using perm sync"
        assert (
            doc.external_access.is_public is True
        ), f"Document {doc.id} should have public access when using perm sync"
        assert (
            doc.external_access.external_user_emails == set()
        ), f"Document {doc.id} should have no external user emails when using perm sync"
        assert (
            doc.external_access.external_user_group_ids == set()
        ), f"Document {doc.id} should have no external user group ids when using perm sync"


@pytest.mark.parametrize(
    "slack_connector",
    [
        PRIVATE_CHANNEL_NAME,
    ],
    indirect=True,
)
def test_load_from_checkpoint_access__private_channel(
    slack_connector: SlackConnector,
) -> None:
    """Test that load_from_checkpoint returns correct access information for documents."""
    if not slack_connector.client:
        raise RuntimeError("Web client must be defined")

    docs = load_all_from_connector(
        connector=slack_connector,
        start=0.0,
        end=time.time(),
        include_permissions=True,
    ).documents

    # We should have at least some documents
    assert len(docs) > 0, "Expected to find at least one document"

    for doc in docs:
        assert (
            doc.external_access is not None
        ), f"Document {doc.id} should have external_access when using perm sync"
        assert (
            doc.external_access.is_public is False
        ), f"Document {doc.id} should have private access when using perm sync"
        assert doc.external_access.external_user_emails == set(
            PRIVATE_CHANNEL_USERS
        ), f"Document {doc.id} should have private channel users when using perm sync"
        assert (
            doc.external_access.external_user_group_ids == set()
        ), f"Document {doc.id} should have no external user group ids when using perm sync"


@pytest.mark.parametrize(
    "slack_connector",
    [
        PUBLIC_CHANNEL_NAME,
    ],
    indirect=True,
)
@pytest.mark.parametrize("start_ts", [None, OLDEST_TS_2016])
def test_slim_documents_access__public_channel(
    slack_connector: SlackConnector,
    start_ts: float | None,
) -> None:
    """Test that retrieve_all_slim_docs_perm_sync returns correct access information for slim documents."""
    if not slack_connector.client:
        raise RuntimeError("Web client must be defined")

    slim_docs_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
        start=start_ts,
        end=time.time(),
    )

    # Collect all slim documents from the generator
    all_slim_docs: list[SlimDocument] = []
    for slim_doc_batch in slim_docs_generator:
        all_slim_docs.extend(
            [doc for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # We should have at least some slim documents
    assert len(all_slim_docs) > 0, "Expected to find at least one slim document"

    for slim_doc in all_slim_docs:
        assert slim_doc.external_access is not None
        assert slim_doc.external_access.is_public is True
        assert slim_doc.external_access.external_user_emails == set()
        assert slim_doc.external_access.external_user_group_ids == set()


@pytest.mark.parametrize(
    "slack_connector",
    [
        PRIVATE_CHANNEL_NAME,
    ],
    indirect=True,
)
def test_slim_documents_access__private_channel(
    slack_connector: SlackConnector,
) -> None:
    """Test that retrieve_all_slim_docs_perm_sync returns correct access information for slim documents."""
    if not slack_connector.client:
        raise RuntimeError("Web client must be defined")

    slim_docs_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
        start=None,
        end=time.time(),
    )

    # Collect all slim documents from the generator
    all_slim_docs: list[SlimDocument] = []
    for slim_doc_batch in slim_docs_generator:
        all_slim_docs.extend(
            [doc for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # We should have at least some slim documents
    assert len(all_slim_docs) > 0, "Expected to find at least one slim document"

    for slim_doc in all_slim_docs:
        assert slim_doc.external_access is not None
        assert slim_doc.external_access.is_public is False
        assert slim_doc.external_access.external_user_emails == set(
            PRIVATE_CHANNEL_USERS
        )
        assert slim_doc.external_access.external_user_group_ids == set()


================================================
FILE: backend/tests/daily/connectors/teams/models.py
================================================
from pydantic import BaseModel

from onyx.access.models import ExternalAccess
from onyx.connectors.models import Document


class TeamsThread(BaseModel):
    thread: str
    external_access: ExternalAccess

    @classmethod
    def from_doc(cls, document: Document) -> "TeamsThread":
        assert (
            document.external_access
        ), f"ExternalAccess should always be available, instead got {document=}"

        return cls(
            thread=document.get_text_content(),
            external_access=document.external_access,
        )


================================================
FILE: backend/tests/daily/connectors/teams/test_teams_connector.py
================================================
import os
import time

import pytest

from onyx.access.models import ExternalAccess
from onyx.connectors.models import HierarchyNode
from onyx.connectors.teams.connector import TeamsConnector
from tests.daily.connectors.teams.models import TeamsThread
from tests.daily.connectors.utils import load_all_from_connector


TEAMS_THREAD = [
    # Posted in "Public Channel"
    TeamsThread(
        thread="This is the first message in Onyx-Testing ...This is a reply!This is a second reply.Third.4th.5",
        external_access=ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=True,
        ),
    ),
    TeamsThread(
        thread="Testing body.",
        external_access=ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=True,
        ),
    ),
    TeamsThread(
        thread="Hello, world! Nice to meet you all.",
        external_access=ExternalAccess(
            external_user_emails=set(),
            external_user_group_ids=set(),
            is_public=True,
        ),
    ),
    # Posted in "Private Channel (Raunak is excluded)"
    TeamsThread(
        thread="This is a test post. Raunak should not be able to see this!",
        external_access=ExternalAccess(
            external_user_emails=set(["test@danswerai.onmicrosoft.com"]),
            external_user_group_ids=set(),
            is_public=False,
        ),
    ),
    # Posted in "Private Channel (Raunak is a member)"
    TeamsThread(
        thread="This is a test post in a private channel that Raunak does have access to! Hello, Raunak!"
        "Hello, world! I am just a member in this chat, but not an owner.",
        external_access=ExternalAccess(
            external_user_emails=set(
                ["test@danswerai.onmicrosoft.com", "raunak@onyx.app"]
            ),
            external_user_group_ids=set(),
            is_public=False,
        ),
    ),
    # Posted in "Private Channel (Raunak owns)"
    TeamsThread(
        thread="This is a test post in a private channel that Raunak is an owner of! Whoa!"
        "Hello, world! I am an owner of this chat. The power!",
        external_access=ExternalAccess(
            external_user_emails=set(
                ["test@danswerai.onmicrosoft.com", "raunak@onyx.app"]
            ),
            external_user_group_ids=set(),
            is_public=False,
        ),
    ),
]


@pytest.fixture
def teams_credentials() -> dict[str, str]:
    app_id = os.environ["TEAMS_APPLICATION_ID"]
    dir_id = os.environ["TEAMS_DIRECTORY_ID"]
    secret = os.environ["TEAMS_SECRET"]

    return {
        "teams_client_id": app_id,
        "teams_directory_id": dir_id,
        "teams_client_secret": secret,
    }


@pytest.fixture
def teams_connector(
    teams_credentials: dict[str, str],
) -> TeamsConnector:
    teams_connector = TeamsConnector(teams=["Onyx-Testing"])
    teams_connector.load_credentials(teams_credentials)
    return teams_connector


def _build_map(threads: list[TeamsThread]) -> dict[str, TeamsThread]:
    map: dict[str, TeamsThread] = {}

    for thread in threads:
        assert thread.thread not in map, f"Duplicate thread found in map; {thread=}"
        map[thread.thread] = thread

    return map


def _assert_is_valid_external_access(
    external_access: ExternalAccess,
) -> None:
    assert (
        not external_access.external_user_group_ids
    ), f"{external_access.external_user_group_ids=} should be empty for MS Teams"

    if external_access.is_public:
        assert (
            not external_access.external_user_emails
        ), f"{external_access.external_user_emails=} should be empty for public channels"
    else:
        assert (
            external_access.external_user_emails
        ), f"{external_access.external_user_emails=} should contains at least one user for private channels"


@pytest.mark.parametrize(
    "expected_teams_threads",
    [TEAMS_THREAD],
)
def test_loading_all_docs_from_teams_connector(
    teams_connector: TeamsConnector,
    expected_teams_threads: list[TeamsThread],
) -> None:
    docs = list(
        load_all_from_connector(
            connector=teams_connector,
            start=0.0,
            end=time.time(),
        ).documents
    )
    actual_teams_threads = [TeamsThread.from_doc(doc) for doc in docs]
    actual_teams_threads_map = _build_map(threads=actual_teams_threads)
    expected_teams_threads_map = _build_map(threads=expected_teams_threads)

    # Assert that each thread document matches what we expect.
    assert actual_teams_threads_map == expected_teams_threads_map

    # Assert that all the `ExternalAccess` instances are well-formed.
    for thread in actual_teams_threads:
        _assert_is_valid_external_access(external_access=thread.external_access)


def test_slim_docs_retrieval_from_teams_connector(
    teams_connector: TeamsConnector,
) -> None:
    slim_docs = [
        slim_doc
        for slim_doc_batch in teams_connector.retrieve_all_slim_docs_perm_sync()
        for slim_doc in slim_doc_batch
    ]

    for slim_doc in slim_docs:
        if isinstance(slim_doc, HierarchyNode):
            continue
        assert (
            slim_doc.external_access
        ), f"ExternalAccess should always be available, instead got {slim_doc=}"
        _assert_is_valid_external_access(external_access=slim_doc.external_access)


def test_load_from_checkpoint_with_perm_sync(
    teams_connector: TeamsConnector,
    enable_ee: None,  # noqa: ARG001
) -> None:
    """Test that load_from_checkpoint_with_perm_sync returns documents with external_access.

    This verifies the CheckpointedConnectorWithPermSync interface is properly implemented.
    """
    docs = load_all_from_connector(
        connector=teams_connector,
        start=0.0,
        end=time.time(),
        include_permissions=True,  # Uses load_from_checkpoint_with_perm_sync
    ).documents

    # We should have at least some documents
    assert len(docs) > 0, "Expected to find at least one document"

    for doc in docs:
        assert (
            doc.external_access is not None
        ), f"Document {doc.id} should have external_access when using perm sync"
        _assert_is_valid_external_access(external_access=doc.external_access)


================================================
FILE: backend/tests/daily/connectors/utils.py
================================================
from collections.abc import Iterator
from typing import TypeVar

from pydantic import BaseModel

from onyx.connectors.connector_runner import CheckpointOutputWrapper
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection

_ITERATION_LIMIT = 100_000

CT = TypeVar("CT", bound=ConnectorCheckpoint)


class ConnectorOutput(BaseModel):
    """Structured output from loading a connector."""

    documents: list[Document]
    failures: list[ConnectorFailure]
    hierarchy_nodes: list[HierarchyNode]

    model_config = {"arbitrary_types_allowed": True}


def load_all_from_connector(
    connector: CheckpointedConnector[CT],
    start: SecondsSinceUnixEpoch,
    end: SecondsSinceUnixEpoch,
    include_permissions: bool = False,
    raise_on_failures: bool = True,
) -> ConnectorOutput:
    """
    Load all documents, hierarchy nodes, and failures from a connector.

    Returns a ConnectorOutput with documents, failures, and hierarchy_nodes separated.

    Also validates that parent hierarchy nodes are always yielded before their children:
    - For documents: parent must have been yielded before the document
    - For hierarchy nodes: after each batch, validates that all parents in the batch
      have been seen (either in the current batch or a previous batch)
    """
    num_iterations = 0

    if include_permissions and not isinstance(
        connector, CheckpointedConnectorWithPermSync
    ):
        raise ValueError("Connector does not support permission syncing")

    checkpoint = connector.build_dummy_checkpoint()
    documents: list[Document] = []
    failures: list[ConnectorFailure] = []
    hierarchy_nodes: list[HierarchyNode] = []

    # Track all seen hierarchy node raw_ids for parent validation
    seen_hierarchy_raw_ids: set[str] = set()

    while checkpoint.has_more:
        load_from_checkpoint_generator = (
            connector.load_from_checkpoint_with_perm_sync
            if include_permissions
            and isinstance(connector, CheckpointedConnectorWithPermSync)
            else connector.load_from_checkpoint
        )
        doc_batch_generator = CheckpointOutputWrapper[CT]()(
            load_from_checkpoint_generator(start, end, checkpoint)
        )

        # Collect hierarchy nodes from this batch (for end-of-batch validation)
        batch_hierarchy_nodes: list[HierarchyNode] = []

        for document, hierarchy_node, failure, next_checkpoint in doc_batch_generator:
            if hierarchy_node is not None:
                hierarchy_nodes.append(hierarchy_node)
                batch_hierarchy_nodes.append(hierarchy_node)
                # Add to seen set immediately so subsequent documents can reference it
                seen_hierarchy_raw_ids.add(hierarchy_node.raw_node_id)

            if failure is not None:
                failures.append(failure)

            if document is not None and isinstance(document, Document):
                documents.append(document)
                # Validate: document's parent must have been yielded before this document
                if document.parent_hierarchy_raw_node_id is not None:
                    if (
                        document.parent_hierarchy_raw_node_id
                        not in seen_hierarchy_raw_ids
                    ):
                        raise AssertionError(
                            f"Document '{document.id}' "
                            f"(semantic_identifier='{document.semantic_identifier}') "
                            f"has parent_hierarchy_raw_node_id="
                            f"'{document.parent_hierarchy_raw_node_id}' "
                            f"which was not yielded before this document. "
                            f"Seen hierarchy IDs: {seen_hierarchy_raw_ids}"
                        )

            if next_checkpoint is not None:
                checkpoint = next_checkpoint

        # End-of-batch validation for hierarchy nodes:
        # Each node's parent must be in the current batch or a previous batch
        batch_hierarchy_raw_ids = {node.raw_node_id for node in batch_hierarchy_nodes}
        for node in batch_hierarchy_nodes:
            if node.raw_parent_id is None:
                continue  # Root nodes have no parent

            parent_in_current_batch = node.raw_parent_id in batch_hierarchy_raw_ids
            parent_in_previous_batch = node.raw_parent_id in seen_hierarchy_raw_ids

            if not parent_in_current_batch and not parent_in_previous_batch:
                raise AssertionError(
                    f"HierarchyNode '{node.raw_node_id}' "
                    f"(display_name='{node.display_name}') "
                    f"has raw_parent_id='{node.raw_parent_id}' which was not yielded "
                    f"in the current batch or any previous batch. "
                    f"Seen hierarchy IDs: {seen_hierarchy_raw_ids}, "
                    f"Current batch IDs: {batch_hierarchy_raw_ids}"
                )

        num_iterations += 1
        if num_iterations > _ITERATION_LIMIT:
            raise RuntimeError("Too many iterations. Infinite loop?")

    if raise_on_failures and failures:
        raise RuntimeError(f"Failed to load documents: {failures}")

    return ConnectorOutput(
        documents=documents,
        failures=failures,
        hierarchy_nodes=hierarchy_nodes,
    )


def to_sections(
    documents: list[Document],
) -> Iterator[TextSection | ImageSection]:
    for doc in documents:
        for section in doc.sections:
            yield section


def to_text_sections(sections: Iterator[TextSection | ImageSection]) -> Iterator[str]:
    for section in sections:
        if isinstance(section, TextSection):
            yield section.text


================================================
FILE: backend/tests/daily/connectors/web/test_web_connector.py
================================================
from concurrent.futures import ThreadPoolExecutor

import pytest

from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.web.connector import WEB_CONNECTOR_VALID_SETTINGS
from onyx.connectors.web.connector import WebConnector

EXPECTED_QUOTE = (
    "If you can't explain it to a six year old, you don't understand it yourself."
)


# NOTE(rkuo): we will probably need to adjust this test to point at our own test site
# to avoid depending on a third party site
@pytest.fixture
def quotes_to_scroll_web_connector(request: pytest.FixtureRequest) -> WebConnector:
    scroll_before_scraping = request.param
    connector = WebConnector(
        base_url="https://quotes.toscrape.com/scroll",
        web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,
        scroll_before_scraping=scroll_before_scraping,
    )
    return connector


@pytest.mark.parametrize("quotes_to_scroll_web_connector", [True], indirect=True)
def test_web_connector_scroll(quotes_to_scroll_web_connector: WebConnector) -> None:
    all_docs: list[Document] = []
    document_batches = quotes_to_scroll_web_connector.load_from_state()
    for doc_batch in document_batches:
        for doc in doc_batch:
            if isinstance(doc, HierarchyNode):
                continue
            all_docs.append(doc)

    assert len(all_docs) == 1
    doc = all_docs[0]
    assert doc.sections[0].text is not None
    assert EXPECTED_QUOTE in doc.sections[0].text


@pytest.mark.parametrize("quotes_to_scroll_web_connector", [False], indirect=True)
def test_web_connector_no_scroll(quotes_to_scroll_web_connector: WebConnector) -> None:
    all_docs: list[Document] = []
    document_batches = quotes_to_scroll_web_connector.load_from_state()
    for doc_batch in document_batches:
        for doc in doc_batch:
            if isinstance(doc, HierarchyNode):
                continue
            all_docs.append(doc)

    assert len(all_docs) == 1
    doc = all_docs[0]
    assert doc.sections[0].text is not None
    assert EXPECTED_QUOTE not in doc.sections[0].text


MERCURY_EXPECTED_QUOTE = "How can we help?"


@pytest.mark.xfail(
    reason=(
        "flaky. maybe we can improve how we avoid triggering bot protection ormaybe this is just how it has to be."
    ),
)
def test_web_connector_bot_protection() -> None:
    connector = WebConnector(
        base_url="https://support.mercury.com/hc",
        web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,
    )
    document_batches = list(connector.load_from_state())
    assert len(document_batches) == 1
    doc_batch = document_batches[0]
    assert len(doc_batch) == 1
    doc = doc_batch[0]
    assert not isinstance(doc, HierarchyNode)
    assert doc.sections[0].text is not None
    assert MERCURY_EXPECTED_QUOTE in doc.sections[0].text


def test_web_connector_recursive_www_redirect() -> None:
    # Check that https://onyx.app can be recursed if re-directed to www.onyx.app
    # Run in thread pool to avoid conflict with pytest-asyncio's event loop
    def _run_connector() -> list[Document]:
        connector = WebConnector(
            base_url="https://onyx.app",
            web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value,
        )
        return [
            doc
            for batch in connector.load_from_state()
            for doc in batch
            if not isinstance(doc, HierarchyNode)
        ]

    with ThreadPoolExecutor(max_workers=1) as executor:
        future = executor.submit(_run_connector)
        documents = future.result()

    assert len(documents) > 1


================================================
FILE: backend/tests/daily/connectors/zendesk/test_zendesk_connector.py
================================================
import json
import os
import time
from pathlib import Path
from typing import cast

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.zendesk.connector import ZendeskConnector
from tests.daily.connectors.utils import load_all_from_connector


def load_test_data(file_name: str = "test_zendesk_data.json") -> dict[str, dict]:
    current_dir = Path(__file__).parent
    with open(current_dir / file_name, "r") as f:
        return json.load(f)


@pytest.fixture
def zendesk_article_connector() -> ZendeskConnector:
    connector = ZendeskConnector(content_type="articles")
    connector.load_credentials(get_credentials())
    return connector


@pytest.fixture
def zendesk_ticket_connector() -> ZendeskConnector:
    connector = ZendeskConnector(content_type="tickets")
    connector.load_credentials(get_credentials())
    return connector


def get_credentials() -> dict[str, str]:
    return {
        "zendesk_subdomain": os.environ["ZENDESK_SUBDOMAIN"],
        "zendesk_email": os.environ["ZENDESK_EMAIL"],
        "zendesk_token": os.environ["ZENDESK_TOKEN"],
    }


@pytest.mark.xfail(
    reason=(
        "Cannot get Zendesk developer account to ensure zendesk account does not expire after 2 weeks"
    )
)
@pytest.mark.parametrize(
    "connector_fixture", ["zendesk_article_connector", "zendesk_ticket_connector"]
)
def test_zendesk_connector_basic(
    request: pytest.FixtureRequest, connector_fixture: str
) -> None:
    connector = cast(ZendeskConnector, request.getfixturevalue(connector_fixture))
    test_data = load_test_data()
    all_docs: list[Document] = []
    target_test_doc_id: str
    if connector.content_type == "articles":
        target_test_doc_id = f"article:{test_data['article']['id']}"
    else:
        target_test_doc_id = f"zendesk_ticket_{test_data['ticket']['id']}"

    target_doc: Document | None = None

    for doc in load_all_from_connector(connector, 0, time.time()).documents:
        all_docs.append(doc)
        if doc.id == target_test_doc_id:
            target_doc = doc
            print(f"target_doc {target_doc}")

    assert len(all_docs) > 0, "No documents were retrieved from the connector"
    assert (
        target_doc is not None
    ), "Target document was not found in the retrieved documents"
    assert target_doc.source == DocumentSource.ZENDESK, "Document source is not ZENDESK"

    if connector.content_type == "articles":
        test_article = test_data["article"]
        assert target_doc.semantic_identifier == test_article["semantic_identifier"]
        assert target_doc.sections[0].link == test_article["sections"][0]["link"]
        assert target_doc.source == test_article["source"]
        assert target_doc.primary_owners is not None
        assert len(target_doc.primary_owners) == 1
        assert (
            target_doc.primary_owners[0].display_name
            == test_article["primary_owners"][0]["display_name"]
        )
        assert (
            target_doc.primary_owners[0].email
            == test_article["primary_owners"][0]["email"]
        )
    else:
        test_ticket = test_data["ticket"]
        assert target_doc.semantic_identifier == test_ticket["semantic_identifier"]
        assert target_doc.sections[0].link == test_ticket["sections"][0]["link"]
        assert target_doc.source == test_ticket["source"]
        assert target_doc.metadata["status"] == test_ticket["metadata"]["status"]
        assert target_doc.metadata["priority"] == test_ticket["metadata"]["priority"]
        assert target_doc.metadata["tags"] == test_ticket["metadata"]["tags"]
        assert (
            target_doc.metadata["ticket_type"] == test_ticket["metadata"]["ticket_type"]
        )


@pytest.mark.xfail(
    reason=(
        "Cannot get Zendesk developer account to ensure zendesk account does not expire after 2 weeks"
    )
)
def test_zendesk_connector_slim(zendesk_article_connector: ZendeskConnector) -> None:
    # Get full doc IDs
    all_full_doc_ids = set()
    for doc in load_all_from_connector(
        zendesk_article_connector, 0, time.time()
    ).documents:
        all_full_doc_ids.add(doc.id)

    # Get slim doc IDs
    all_slim_doc_ids = set()
    for slim_doc_batch in zendesk_article_connector.retrieve_all_slim_docs_perm_sync():
        all_slim_doc_ids.update(
            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]
        )

    # Full docs should be subset of slim docs
    assert all_full_doc_ids.issubset(
        all_slim_doc_ids
    ), f"Full doc IDs {all_full_doc_ids} not subset of slim doc IDs {all_slim_doc_ids}"


================================================
FILE: backend/tests/daily/connectors/zendesk/test_zendesk_data.json
================================================
{
  "article": {
      "id": "32502691728155",
      "sections": [
        {
          "link": "https://d3v-onyx.zendesk.com/hc/en-us/articles/32502691728155-How-can-agents-leverage-knowledge-to-help-customers"
        }
      ],
      "source": "zendesk",
      "semantic_identifier": "How can agents leverage knowledge to help customers?",
      "primary_owners": [
        {
          "display_name": "Dan Swer",
          "email": "admin@onyx-test.com"
        }
      ]
  },
  "ticket": {
    "id": "1",
    "sections": [
      {
        "link": "https://d3v-onyx.zendesk.com/agent/tickets/1"
      }
    ],
    "source": "zendesk",
    "semantic_identifier": "Ticket #1: SAMPLE TICKET: Meet the ticket",
    "metadata": {
      "status": "open",
      "priority": "normal",
      "tags": ["sample", "support", "zendesk"],
      "ticket_type": "incident"
    }
  }
}

================================================
FILE: backend/tests/daily/embedding/test_embeddings.py
================================================
import os

import pytest
from tenacity import retry
from tenacity import retry_if_exception_type
from tenacity import stop_after_attempt
from tenacity import wait_exponential

from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from shared_configs.enums import EmbedTextType
from shared_configs.model_server_models import EmbeddingProvider

VALID_SAMPLE = ["hi", "hello my name is bob", "woah there!!!. 😃"]
VALID_LONG_SAMPLE = ["hi " * 999]
# openai limit is 2048, cohere is supposed to be 96 but in practice that doesn't
# seem to be true
TOO_LONG_SAMPLE = ["a"] * 2500


def _run_embeddings(
    texts: list[str], embedding_model: EmbeddingModel, expected_dim: int
) -> None:
    for text_type in [EmbedTextType.QUERY, EmbedTextType.PASSAGE]:
        embeddings = embedding_model.encode(texts, text_type)
        assert len(embeddings) == len(texts)
        assert len(embeddings[0]) == expected_dim


@pytest.fixture
def openai_embedding_model() -> EmbeddingModel:
    return EmbeddingModel(
        server_host="localhost",
        server_port=9000,
        model_name="text-embedding-3-small",
        normalize=True,
        query_prefix=None,
        passage_prefix=None,
        api_key=os.environ["OPENAI_API_KEY"],
        provider_type=EmbeddingProvider.OPENAI,
        api_url=None,
    )


def test_openai_embedding(openai_embedding_model: EmbeddingModel) -> None:
    _run_embeddings(VALID_SAMPLE, openai_embedding_model, 1536)
    _run_embeddings(TOO_LONG_SAMPLE, openai_embedding_model, 1536)


@pytest.fixture
def cohere_embedding_model() -> EmbeddingModel:
    return EmbeddingModel(
        server_host="localhost",
        server_port=9000,
        model_name="embed-english-light-v3.0",
        normalize=True,
        query_prefix=None,
        passage_prefix=None,
        api_key=os.environ["COHERE_API_KEY"],
        provider_type=EmbeddingProvider.COHERE,
        api_url=None,
    )


def test_cohere_embedding(cohere_embedding_model: EmbeddingModel) -> None:
    _run_embeddings(VALID_SAMPLE, cohere_embedding_model, 384)
    _run_embeddings(TOO_LONG_SAMPLE, cohere_embedding_model, 384)


@pytest.fixture
def litellm_embedding_model() -> EmbeddingModel:
    return EmbeddingModel(
        server_host="localhost",
        server_port=9000,
        model_name="text-embedding-3-small",
        normalize=True,
        query_prefix=None,
        passage_prefix=None,
        api_key=os.environ["LITELLM_API_KEY"],
        provider_type=EmbeddingProvider.LITELLM,
        api_url=os.environ["LITELLM_API_URL"],
    )


@pytest.mark.skip(reason="re-enable when we can get the correct litellm key and url")
def test_litellm_embedding(litellm_embedding_model: EmbeddingModel) -> None:
    _run_embeddings(VALID_SAMPLE, litellm_embedding_model, 1536)
    _run_embeddings(TOO_LONG_SAMPLE, litellm_embedding_model, 1536)


@pytest.fixture
def local_nomic_embedding_model() -> EmbeddingModel:
    return EmbeddingModel(
        server_host="localhost",
        server_port=9000,
        model_name="nomic-ai/nomic-embed-text-v1",
        normalize=True,
        query_prefix="search_query: ",
        passage_prefix="search_document: ",
        api_key=None,
        provider_type=None,
        api_url=None,
    )


def test_local_nomic_embedding(local_nomic_embedding_model: EmbeddingModel) -> None:
    _run_embeddings(VALID_SAMPLE, local_nomic_embedding_model, 768)
    _run_embeddings(TOO_LONG_SAMPLE, local_nomic_embedding_model, 768)


@pytest.fixture
def azure_embedding_model() -> EmbeddingModel:
    return EmbeddingModel(
        server_host="localhost",
        server_port=9000,
        model_name="text-embedding-3-small",
        normalize=True,
        query_prefix=None,
        passage_prefix=None,
        api_key=os.environ["AZURE_API_KEY"],
        provider_type=EmbeddingProvider.AZURE,
        api_url=os.environ["AZURE_API_URL"],
    )


# Azure has strict rate limits on their embedding API, so we retry with exponential
# backoff to handle transient RateLimitError responses
@retry(
    retry=retry_if_exception_type(RuntimeError),
    stop=stop_after_attempt(5),
    wait=wait_exponential(multiplier=1, min=1, max=10),
    reraise=True,
)
def test_azure_embedding(azure_embedding_model: EmbeddingModel) -> None:
    _run_embeddings(VALID_SAMPLE, azure_embedding_model, 1536)
    _run_embeddings(TOO_LONG_SAMPLE, azure_embedding_model, 1536)


# NOTE (chris): this test doesn't work, and I do not know why
# def test_azure_embedding_model_rate_limit(azure_embedding_model: EmbeddingModel):
#     """NOTE: this test relies on a very low rate limit for the Azure API +
#     this test only being run once in a 1 minute window"""
#     # VALID_LONG_SAMPLE is 999 tokens, so the second call should run into rate
#     # limits assuming the limit is 1000 tokens per minute
#     result = azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.QUERY)
#     assert len(result) == 1
#     assert len(result[0]) == 1536

#     # this should fail
#     with pytest.raises(ModelServerRateLimitError):
#         azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.QUERY)
#         azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.QUERY)
#         azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.QUERY)

#     # this should succeed, since passage requests retry up to 10 times
#     start = time.time()
#     result = azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.PASSAGE)
#     assert len(result) == 1
#     assert len(result[0]) == 1536
#     assert time.time() - start > 30  # make sure we waited, even though we hit rate limits


================================================
FILE: backend/tests/daily/llm/test_bedrock.py
================================================
import os
from typing import Any

import pytest
from fastapi.testclient import TestClient

from onyx.llm.constants import LlmProviderNames


_DEFAULT_BEDROCK_MODEL = "anthropic.claude-3-5-sonnet-20241022-v2:0"


@pytest.mark.xfail(
    reason="Credentials not yet available due to compliance work needed",
)
def test_bedrock_llm_configuration(client: TestClient) -> None:
    # Prepare the test request payload
    test_request: dict[str, Any] = {
        "provider": LlmProviderNames.BEDROCK,
        "model": _DEFAULT_BEDROCK_MODEL,
        "api_key": None,
        "api_base": None,
        "api_version": None,
        "custom_config": {
            "AWS_REGION_NAME": os.environ.get("AWS_REGION_NAME", "us-east-1"),
            "AWS_ACCESS_KEY_ID": os.environ.get("AWS_ACCESS_KEY_ID"),
            "AWS_SECRET_ACCESS_KEY": os.environ.get("AWS_SECRET_ACCESS_KEY"),
        },
        "model_configurations": [{"name": _DEFAULT_BEDROCK_MODEL, "is_visible": True}],
        "api_key_changed": True,
        "custom_config_changed": True,
    }

    # Send the test request
    response = client.post("/admin/llm/test", json=test_request)

    # Assert the response
    assert (
        response.status_code == 200
    ), f"Expected status code 200, but got {response.status_code}. Response: {response.text}"


def test_bedrock_llm_configuration_invalid_key(client: TestClient) -> None:
    # Prepare the test request payload with invalid credentials
    test_request: dict[str, Any] = {
        "provider": LlmProviderNames.BEDROCK,
        "model": _DEFAULT_BEDROCK_MODEL,
        "api_key": None,
        "api_base": None,
        "api_version": None,
        "custom_config": {
            "AWS_REGION_NAME": "us-east-1",
            "AWS_ACCESS_KEY_ID": "invalid_access_key_id",
            "AWS_SECRET_ACCESS_KEY": "invalid_secret_access_key",
        },
        "model_configurations": [{"name": _DEFAULT_BEDROCK_MODEL, "is_visible": True}],
        "api_key_changed": True,
        "custom_config_changed": True,
    }

    # Send the test request
    response = client.post("/admin/llm/test", json=test_request)

    # Assert the response
    assert (
        response.status_code == 400
    ), f"Expected status code 400, but got {response.status_code}. Response: {response.text}"
    assert (
        "Invalid credentials" in response.text
        or "Invalid Authentication" in response.text
    ), f"Expected error message about invalid credentials, but got: {response.text}"


================================================
FILE: backend/tests/external_dependency_unit/answer/conftest.py
================================================
import os
from collections.abc import Iterator
from collections.abc import Mapping
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from sqlalchemy.orm import Session

from onyx.db.llm import update_default_provider
from onyx.db.llm import upsert_llm_provider
from onyx.llm.constants import LlmProviderNames
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest


# Counter for generating unique file IDs in mock file store
_mock_file_id_counter = 0


def ensure_default_llm_provider(db_session: Session) -> None:
    """Ensure a default LLM provider exists for tests that exercise chat flows."""

    try:
        llm_provider_request = LLMProviderUpsertRequest(
            name="test-provider",
            provider=LlmProviderNames.OPENAI,
            api_key=os.environ.get("OPENAI_API_KEY", "test"),
            is_public=True,
            model_configurations=[
                ModelConfigurationUpsertRequest(
                    name="gpt-4o-mini",
                    is_visible=True,
                )
            ],
            groups=[],
        )
        provider = upsert_llm_provider(
            llm_provider_upsert_request=llm_provider_request,
            db_session=db_session,
        )
        update_default_provider(provider.id, "gpt-4o-mini", db_session)
    except Exception as exc:  # pragma: no cover - only hits on duplicate setup issues
        # Rollback to clear the pending transaction state
        db_session.rollback()
        print(f"Note: Could not create LLM provider: {exc}")


@pytest.fixture
def mock_nlp_embeddings_post() -> Iterator[None]:
    """Patch model-server embedding HTTP calls used by NLP components."""

    def _mock_post(
        url: str,
        json: Mapping[str, Any] | None = None,
        headers: Mapping[str, str] | None = None,  # noqa: ARG001
        **kwargs: Any,  # noqa: ARG001
    ) -> MagicMock:
        resp = MagicMock()
        if "encoder/bi-encoder-embed" in url:
            num_texts = len(json.get("texts", [])) if json else 1
            resp.status_code = 200
            resp.json.return_value = {"embeddings": [[0.0] * 768] * num_texts}
            resp.raise_for_status = MagicMock()
            return resp
        resp.status_code = 200
        resp.json.return_value = {}
        resp.raise_for_status = MagicMock()
        return resp

    with patch(
        "onyx.natural_language_processing.search_nlp_models.requests.post",
        side_effect=_mock_post,
    ):
        yield


@pytest.fixture
def mock_gpu_status() -> Iterator[None]:
    """Avoid hitting model server for GPU status checks."""
    with patch(
        "onyx.utils.gpu_utils._get_gpu_status_from_model_server", return_value=False
    ):
        yield


@pytest.fixture
def mock_vespa_query() -> Iterator[None]:
    """Stub Vespa query to a safe empty response to avoid CI flakiness."""
    with patch("onyx.document_index.vespa.index.query_vespa", return_value=[]):
        yield


@pytest.fixture
def mock_file_store() -> Iterator[None]:
    """Mock the file store to avoid S3/storage dependencies in tests."""
    global _mock_file_id_counter

    def _mock_save_file(*args: Any, **kwargs: Any) -> str:  # noqa: ARG001
        global _mock_file_id_counter
        _mock_file_id_counter += 1
        # Return a predictable file ID for tests
        return "123"

    mock_store = MagicMock()
    mock_store.save_file.side_effect = _mock_save_file
    mock_store.initialize.return_value = None

    with patch(
        "onyx.file_store.utils.get_default_file_store",
        return_value=mock_store,
    ):
        yield


@pytest.fixture
def mock_external_deps(
    mock_nlp_embeddings_post: None,  # noqa: ARG001
    mock_gpu_status: None,  # noqa: ARG001
    mock_vespa_query: None,  # noqa: ARG001
    mock_file_store: None,  # noqa: ARG001
) -> Iterator[None]:
    """Convenience fixture to enable all common external dependency mocks."""
    yield


================================================
FILE: backend/tests/external_dependency_unit/answer/stream_test_assertions.py
================================================
from __future__ import annotations

from typing import cast

from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import CreateChatSessionID
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta


def assert_answer_stream_part_correct(
    received: AnswerStreamPart, expected: AnswerStreamPart
) -> None:
    assert isinstance(received, type(expected))

    if isinstance(received, Packet):
        r_packet = cast(Packet, received)
        e_packet = cast(Packet, expected)

        assert r_packet.placement == e_packet.placement

        if isinstance(r_packet.obj, SearchToolDocumentsDelta):
            assert isinstance(e_packet.obj, SearchToolDocumentsDelta)
            assert is_search_tool_document_delta_equal(r_packet.obj, e_packet.obj)
            return
        elif isinstance(r_packet.obj, OpenUrlDocuments):
            assert isinstance(e_packet.obj, OpenUrlDocuments)
            assert is_open_url_documents_equal(r_packet.obj, e_packet.obj)
            return
        elif isinstance(r_packet.obj, AgentResponseStart):
            assert isinstance(e_packet.obj, AgentResponseStart)
            assert is_agent_response_start_equal(r_packet.obj, e_packet.obj)
            return
        elif isinstance(r_packet.obj, ImageGenerationFinal):
            assert isinstance(e_packet.obj, ImageGenerationFinal)
            assert is_image_generation_final_equal(r_packet.obj, e_packet.obj)
            return

        assert r_packet.obj == e_packet.obj
    elif isinstance(received, MessageResponseIDInfo):
        # We're not going to make assumptions about what the user id / assistant id should be
        # So just return
        return
    elif isinstance(received, CreateChatSessionID):
        # Don't worry about same session ids
        return
    else:
        raise NotImplementedError("Not implemented")


def _are_search_docs_equal(
    received: list[SearchDoc],
    expected: list[SearchDoc],
) -> bool:
    """
    What we care about:
     - All documents are present (order does not)
     - Expected document_id, link, blurb, source_type and hidden
    """
    if len(received) != len(expected):
        return False

    received.sort(key=lambda x: x.document_id)
    expected.sort(key=lambda x: x.document_id)

    for received_document, expected_document in zip(received, expected):
        if received_document.document_id != expected_document.document_id:
            return False
        if received_document.link != expected_document.link:
            return False
        if received_document.blurb != expected_document.blurb:
            return False
        if received_document.source_type != expected_document.source_type:
            return False
        if received_document.hidden != expected_document.hidden:
            return False
    return True


def is_search_tool_document_delta_equal(
    received: SearchToolDocumentsDelta,
    expected: SearchToolDocumentsDelta,
) -> bool:
    """
    What we care about:
     - All documents are present (order does not)
     - Expected document_id, link, blurb, source_type and hidden
    """
    received_documents = received.documents
    expected_documents = expected.documents

    return _are_search_docs_equal(received_documents, expected_documents)


def is_open_url_documents_equal(
    received: OpenUrlDocuments,
    expected: OpenUrlDocuments,
) -> bool:
    """
    What we care about:
     - All documents are present (order does not)
     - Expected document_id, link, blurb, source_type and hidden
    """
    received_documents = received.documents
    expected_documents = expected.documents

    return _are_search_docs_equal(received_documents, expected_documents)


def is_agent_response_start_equal(
    received: AgentResponseStart,
    expected: AgentResponseStart,
) -> bool:
    """
    What we care about:
     - All documents are present (order does not)
     - Expected document_id, link, blurb, source_type and hidden
    """
    received_documents = received.final_documents
    expected_documents = expected.final_documents

    if received_documents is None and expected_documents is None:
        return True
    if not received_documents or not expected_documents:
        return False

    return _are_search_docs_equal(received_documents, expected_documents)


def is_image_generation_final_equal(
    received: ImageGenerationFinal,
    expected: ImageGenerationFinal,
) -> bool:
    """
    What we care about:
     - Number of images are the same
     - On each image, url and file_id are aligned such that url=/api/chat/file/{file_id}
     - Revised prompt is expected
     - Shape is expected
    """
    if len(received.images) != len(expected.images):
        return False

    for received_image, expected_image in zip(received.images, expected.images):
        if received_image.url != f"/api/chat/file/{received_image.file_id}":
            return False
        if received_image.revised_prompt != expected_image.revised_prompt:
            return False
        if received_image.shape != expected_image.shape:
            return False
    return True


================================================
FILE: backend/tests/external_dependency_unit/answer/stream_test_builder.py
================================================
from __future__ import annotations

from collections.abc import Iterator

from onyx.chat.models import AnswerStreamPart
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDone
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from tests.external_dependency_unit.answer.stream_test_assertions import (
    assert_answer_stream_part_correct,
)
from tests.external_dependency_unit.answer.stream_test_utils import (
    create_packet_with_agent_response_delta,
)
from tests.external_dependency_unit.answer.stream_test_utils import (
    create_packet_with_reasoning_delta,
)
from tests.external_dependency_unit.answer.stream_test_utils import create_placement
from tests.external_dependency_unit.mock_llm import LLMResponse
from tests.external_dependency_unit.mock_llm import MockLLMController


class StreamTestBuilder:
    def __init__(self, llm_controller: MockLLMController) -> None:
        self._llm_controller = llm_controller

        # List of (expected_packet, forward_count) tuples
        self._expected_packets_queue: list[tuple[Packet, int]] = []

    def add_response(self, response: LLMResponse) -> StreamTestBuilder:
        self._llm_controller.add_response(response)

        return self

    def add_responses_together(self, *responses: LLMResponse) -> StreamTestBuilder:
        """Add multiple responses that should be emitted together in the same tick."""
        self._llm_controller.add_responses_together(*responses)

        return self

    def expect(
        self, expected_pkt: Packet, forward: int | bool = True
    ) -> StreamTestBuilder:
        """
        Add an expected packet to the queue.

        Args:
            expected_pkt: The packet to expect
            forward: Number of tokens to forward before expecting this packet.
                     True = 1 token, False = 0 tokens, int = that many tokens.
        """
        forward_count = 1 if forward is True else (0 if forward is False else forward)
        self._expected_packets_queue.append((expected_pkt, forward_count))

        return self

    def expect_packets(
        self, packets: list[Packet], forward: int | bool = True
    ) -> StreamTestBuilder:
        """
        Add multiple expected packets to the queue.

        Args:
            packets: List of packets to expect
            forward: Number of tokens to forward before expecting EACH packet.
                     True = 1 token per packet, False = 0 tokens, int = that many tokens per packet.
        """
        forward_count = 1 if forward is True else (0 if forward is False else forward)
        for pkt in packets:
            self._expected_packets_queue.append((pkt, forward_count))

        return self

    def expect_reasoning(
        self,
        reasoning_tokens: list[str],
        turn_index: int,
    ) -> StreamTestBuilder:
        return (
            self.expect(
                Packet(
                    placement=create_placement(turn_index),
                    obj=ReasoningStart(),
                )
            )
            .expect_packets(
                [
                    create_packet_with_reasoning_delta(token, turn_index)
                    for token in reasoning_tokens
                ]
            )
            .expect(
                Packet(
                    placement=create_placement(turn_index),
                    obj=ReasoningDone(),
                )
            )
        )

    def expect_agent_response(
        self,
        answer_tokens: list[str],
        turn_index: int,
        final_documents: list[SearchDoc] | None = None,
    ) -> StreamTestBuilder:
        return (
            self.expect(
                Packet(
                    placement=create_placement(turn_index),
                    obj=AgentResponseStart(
                        final_documents=final_documents,
                    ),
                )
            )
            .expect_packets(
                [
                    create_packet_with_agent_response_delta(token, turn_index)
                    for token in answer_tokens
                ]
            )
            .expect(
                Packet(
                    placement=create_placement(turn_index),
                    obj=OverallStop(),
                )
            )
        )

    def run_and_validate(self, stream: Iterator[AnswerStreamPart]) -> None:
        while self._expected_packets_queue:
            expected_pkt, forward_count = self._expected_packets_queue.pop(0)
            if forward_count > 0:
                self._llm_controller.forward(forward_count)
            received_pkt = next(stream)

            assert_answer_stream_part_correct(received_pkt, expected_pkt)


================================================
FILE: backend/tests/external_dependency_unit/answer/stream_test_utils.py
================================================
from __future__ import annotations

from collections.abc import Iterator
from uuid import UUID

from sqlalchemy.orm import Session

from onyx.chat.chat_utils import create_chat_session_from_request
from onyx.chat.models import AnswerStreamPart
from onyx.chat.process_message import handle_stream_message_objects
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import SearchDoc
from onyx.db.models import ChatSession
from onyx.db.models import User
from onyx.llm.override_models import LLMOverride
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDelta
from tests.external_dependency_unit.mock_content_provider import MockWebContent
from tests.external_dependency_unit.mock_search_provider import MockWebSearchResult


def create_placement(
    turn_index: int,
    tab_index: int = 0,
    sub_turn_index: int | None = None,
    model_index: int | None = 0,
) -> Placement:
    return Placement(
        turn_index=turn_index,
        tab_index=tab_index,
        sub_turn_index=sub_turn_index,
        model_index=model_index,
    )


def submit_query(
    query: str,
    chat_session_id: UUID | None,
    db_session: Session,
    user: User,
    llm_override: LLMOverride | None = None,
) -> Iterator[AnswerStreamPart]:
    request = SendMessageRequest(
        message=query,
        chat_session_id=chat_session_id,
        stream=True,
        chat_session_info=(
            ChatSessionCreationRequest() if chat_session_id is None else None
        ),
        llm_override=llm_override,
    )

    return handle_stream_message_objects(
        new_msg_req=request,
        user=user,
        db_session=db_session,
    )


def create_chat_session(
    db_session: Session,
    user: User,
) -> ChatSession:
    return create_chat_session_from_request(
        chat_session_request=ChatSessionCreationRequest(),
        user_id=user.id,
        db_session=db_session,
    )


def create_packet_with_agent_response_delta(token: str, turn_index: int) -> Packet:
    return Packet(
        placement=create_placement(turn_index),
        obj=AgentResponseDelta(
            content=token,
        ),
    )


def create_packet_with_reasoning_delta(token: str, turn_index: int) -> Packet:
    return Packet(
        placement=create_placement(turn_index),
        obj=ReasoningDelta(
            reasoning=token,
        ),
    )


def create_web_search_doc(
    semantic_identifier: str,
    link: str,
    blurb: str,
) -> SearchDoc:
    return SearchDoc(
        document_id=f"WEB_SEARCH_DOC_{link}",
        chunk_ind=0,
        semantic_identifier=semantic_identifier,
        link=link,
        blurb=blurb,
        source_type=DocumentSource.WEB,
        boost=1,
        hidden=False,
        metadata={},
        match_highlights=[],
    )


def mock_web_search_result_to_search_doc(result: MockWebSearchResult) -> SearchDoc:
    return create_web_search_doc(
        semantic_identifier=result.title,
        link=result.link,
        blurb=result.snippet,
    )


def mock_web_content_to_search_doc(content: MockWebContent) -> SearchDoc:
    return create_web_search_doc(
        semantic_identifier=content.title,
        link=content.url,
        blurb=content.title,
    )


def tokenise(text: str) -> list[str]:
    return [(token + " ") for token in text.split(" ")]


================================================
FILE: backend/tests/external_dependency_unit/answer/test_answer_without_openai.py
================================================
from __future__ import annotations

import os
from uuid import uuid4

from sqlalchemy.orm import Session

from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import StreamingError
from onyx.chat.process_message import handle_stream_message_objects
from onyx.db.chat import create_chat_session
from onyx.db.enums import LLMModelFlowType
from onyx.db.llm import fetch_existing_llm_providers
from onyx.db.llm import remove_llm_provider
from onyx.db.llm import update_default_provider
from onyx.db.llm import upsert_llm_provider
from onyx.llm.constants import LlmProviderNames
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import Packet
from tests.external_dependency_unit.conftest import create_test_user


def test_answer_with_only_anthropic_provider(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
    mock_external_deps: None,  # noqa: ARG001
) -> None:
    """Ensure chat still streams answers when only an Anthropic provider is configured."""

    anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
    assert anthropic_api_key, "ANTHROPIC_API_KEY environment variable must be set"

    # Drop any existing providers so that only Anthropic is available.
    for provider in fetch_existing_llm_providers(db_session, [LLMModelFlowType.CHAT]):
        remove_llm_provider(db_session, provider.id)

    anthropic_model = "claude-haiku-4-5-20251001"
    provider_name = f"anthropic-test-{uuid4().hex}"

    anthropic_provider = upsert_llm_provider(
        LLMProviderUpsertRequest(
            name=provider_name,
            provider=LlmProviderNames.ANTHROPIC,
            api_key=anthropic_api_key,
            is_public=True,
            groups=[],
            model_configurations=[
                ModelConfigurationUpsertRequest(name=anthropic_model, is_visible=True)
            ],
            api_key_changed=True,
        ),
        db_session=db_session,
    )

    try:
        update_default_provider(anthropic_provider.id, anthropic_model, db_session)

        test_user = create_test_user(db_session, email_prefix="anthropic_only")
        chat_session = create_chat_session(
            db_session=db_session,
            description="Anthropic only chat",
            user_id=test_user.id,
            persona_id=0,
        )

        chat_request = SendMessageRequest(
            message="hello",
            chat_session_id=chat_session.id,
        )

        response_stream: list[AnswerStreamPart] = []
        for packet in handle_stream_message_objects(
            new_msg_req=chat_request,
            user=test_user,
            db_session=db_session,
        ):
            response_stream.append(packet)

        assert response_stream, "Should receive streamed packets"
        assert not any(
            isinstance(packet, StreamingError) for packet in response_stream
        ), "No streaming errors expected with Anthropic provider"

        has_message_id = any(
            isinstance(packet, MessageResponseIDInfo) for packet in response_stream
        )
        assert has_message_id, "Should include reserved assistant message ID"

        has_message_start = any(
            isinstance(packet, Packet) and isinstance(packet.obj, AgentResponseStart)
            for packet in response_stream
        )
        assert has_message_start, "Stream should have a MessageStart packet"

        has_message_delta = any(
            isinstance(packet, Packet) and isinstance(packet.obj, AgentResponseDelta)
            for packet in response_stream
        )
        assert has_message_delta, "Stream should have a MessageDelta packet"

    finally:
        remove_llm_provider(db_session, anthropic_provider.id)


================================================
FILE: backend/tests/external_dependency_unit/answer/test_current_datetime_replacement.py
================================================
import re
from datetime import datetime

from sqlalchemy.orm import Session

from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import StreamingError
from onyx.chat.process_message import handle_stream_message_objects
from onyx.db.chat import create_chat_session
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider
from tests.external_dependency_unit.conftest import create_test_user


def test_stream_chat_current_date_response(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
    mock_external_deps: None,  # noqa: ARG001
) -> None:
    """Smoke test that asking for current date yields a streamed response.

    This exercises the full chat path using the default persona, ensuring
    the system prompt makes it to the LLM and a response is returned.
    """
    # Ensure LLM provider exists
    ensure_default_llm_provider(db_session)

    # Create user, persona, session
    test_user: User = create_test_user(db_session, email_prefix="test_current_date")
    default_persona = get_persona_by_id(
        persona_id=0, user=test_user, db_session=db_session, is_for_edit=False
    )
    chat_session = create_chat_session(
        db_session=db_session,
        description="Test current date question",
        user_id=test_user.id if test_user else None,
        persona_id=default_persona.id,
    )

    chat_request = SendMessageRequest(
        message="Please respond only with the current date in the format 'Weekday Month DD, YYYY'.",
        chat_session_id=chat_session.id,
    )

    gen = handle_stream_message_objects(
        new_msg_req=chat_request,
        user=test_user,
        db_session=db_session,
    )

    raw: list[AnswerStreamPart] = []
    content = ""
    had_error = False

    for pkt in gen:
        raw.append(pkt)
        if hasattr(pkt, "obj") and isinstance(pkt.obj, AgentResponseDelta):
            if pkt.obj.content:
                content += pkt.obj.content
        if hasattr(pkt, "obj") and isinstance(pkt.obj, StreamingError):
            had_error = True
            break

    assert not had_error, "Should not error when answering current date"
    assert any(
        isinstance(p, MessageResponseIDInfo) for p in raw
    ), "Should yield a message ID"
    assert len(content) > 0, "Should stream some assistant content"

    # Validate the response contains a properly formatted current date string
    match = re.search(r"[A-Za-z]+ [A-Za-z]+ \d{1,2}, \d{4}", content)
    assert match, f"Expected a date in content, got: {content[:200]}..."

    timestamp_str = match.group(0)
    timestamp_dt = datetime.strptime(timestamp_str, "%A %B %d, %Y")
    now = datetime.now()

    assert timestamp_dt.strftime("%A") == now.strftime(
        "%A"
    ), f"Expected weekday {now.strftime('%A')}, got {timestamp_dt.strftime('%A')}"
    assert timestamp_dt.strftime("%B") == now.strftime(
        "%B"
    ), f"Expected month {now.strftime('%B')}, got {timestamp_dt.strftime('%B')}"
    assert timestamp_dt.day == now.day and timestamp_dt.year == now.year, (
        f"Expected day {now.strftime('%d')} and year {now.strftime('%Y')}, "
        f"got {timestamp_dt.strftime('%d')} {timestamp_dt.strftime('%Y')}"
    )


================================================
FILE: backend/tests/external_dependency_unit/answer/test_stream_chat_message.py
================================================
from __future__ import annotations

import json
from uuid import UUID

import pytest
from sqlalchemy.orm import Session

from onyx.chat.models import CreateChatSessionID
from onyx.configs.constants import DocumentSource
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import GeneratedImage
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeartbeat
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
from onyx.server.query_and_chat.streaming_models import OpenUrlStart
from onyx.server.query_and_chat.streaming_models import OpenUrlUrls
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDone
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
from onyx.server.query_and_chat.streaming_models import SearchToolStart
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.server.query_and_chat.streaming_models import TopLevelBranching
from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider
from tests.external_dependency_unit.answer.stream_test_assertions import (
    assert_answer_stream_part_correct,
)
from tests.external_dependency_unit.answer.stream_test_builder import StreamTestBuilder
from tests.external_dependency_unit.answer.stream_test_utils import create_chat_session
from tests.external_dependency_unit.answer.stream_test_utils import (
    create_packet_with_agent_response_delta,
)
from tests.external_dependency_unit.answer.stream_test_utils import (
    create_packet_with_reasoning_delta,
)
from tests.external_dependency_unit.answer.stream_test_utils import create_placement
from tests.external_dependency_unit.answer.stream_test_utils import (
    mock_web_content_to_search_doc,
)
from tests.external_dependency_unit.answer.stream_test_utils import (
    mock_web_search_result_to_search_doc,
)
from tests.external_dependency_unit.answer.stream_test_utils import submit_query
from tests.external_dependency_unit.answer.stream_test_utils import tokenise
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.mock_content_provider import MockWebContent
from tests.external_dependency_unit.mock_content_provider import (
    use_mock_content_provider,
)
from tests.external_dependency_unit.mock_image_provider import (
    use_mock_image_generation_provider,
)
from tests.external_dependency_unit.mock_llm import LLMAnswerResponse
from tests.external_dependency_unit.mock_llm import LLMReasoningResponse
from tests.external_dependency_unit.mock_llm import LLMToolCallResponse
from tests.external_dependency_unit.mock_llm import use_mock_llm
from tests.external_dependency_unit.mock_search_pipeline import MockInternalSearchResult
from tests.external_dependency_unit.mock_search_pipeline import use_mock_search_pipeline
from tests.external_dependency_unit.mock_search_provider import MockWebSearchResult
from tests.external_dependency_unit.mock_search_provider import use_mock_web_provider


def test_stream_chat_with_answer(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
    mock_external_deps: None,  # noqa: ARG001
) -> None:
    """Test that the stream chat with answer endpoint returns a valid answer."""
    ensure_default_llm_provider(db_session)
    test_user = create_test_user(
        db_session, email_prefix="test_stream_chat_with_answer"
    )

    query = "What is the capital of France?"
    answer = "The capital of France is Paris."

    answer_tokens = tokenise(answer)

    with use_mock_llm() as mock_llm:
        handler = StreamTestBuilder(llm_controller=mock_llm)

        handler.add_response(LLMAnswerResponse(answer_tokens=answer_tokens))

        chat_session = create_chat_session(db_session=db_session, user=test_user)

        answer_stream = submit_query(
            query=query,
            chat_session_id=chat_session.id,
            db_session=db_session,
            user=test_user,
        )

        assert_answer_stream_part_correct(
            received=next(answer_stream),
            expected=MessageResponseIDInfo(
                user_message_id=1,
                reserved_assistant_message_id=1,
            ),
        )

        handler.expect_agent_response(
            answer_tokens=answer_tokens,
            turn_index=0,
        ).run_and_validate(stream=answer_stream)

        with pytest.raises(StopIteration):
            next(answer_stream)


def test_stream_chat_with_answer_create_chat(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
    mock_external_deps: None,  # noqa: ARG001
) -> None:
    ensure_default_llm_provider(db_session)
    test_user = create_test_user(
        db_session, email_prefix="test_stream_chat_with_answer_create_chat"
    )

    query = "Hi there friends"
    answer = "Hello friend"

    tokens = [answer]

    with use_mock_llm() as mock_llm:
        handler = StreamTestBuilder(llm_controller=mock_llm)

        handler.add_response(LLMAnswerResponse(answer_tokens=tokens))

        answer_stream = submit_query(
            query=query,
            chat_session_id=None,
            db_session=db_session,
            user=test_user,
        )

        assert_answer_stream_part_correct(
            received=next(answer_stream),
            expected=CreateChatSessionID(
                chat_session_id=UUID("123e4567-e89b-12d3-a456-426614174000")
            ),
        )

        assert_answer_stream_part_correct(
            received=next(answer_stream),
            expected=MessageResponseIDInfo(
                user_message_id=1,
                reserved_assistant_message_id=2,
            ),
        )

        handler.expect_agent_response(
            answer_tokens=tokens,
            turn_index=0,
        ).run_and_validate(stream=answer_stream)

        with pytest.raises(StopIteration):
            next(answer_stream)


def test_stream_chat_with_search_and_openurl_tools(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
    mock_external_deps: None,  # noqa: ARG001
) -> None:
    ensure_default_llm_provider(db_session)
    test_user = create_test_user(
        db_session, email_prefix="test_stream_chat_with_search_tool"
    )

    QUERY = "What is the weather in Sydney?"

    REASONING_RESPONSE_1 = (
        "I need to perform a web search to get current weather details. "
        "I can use the search tool to do this."
    )

    WEB_QUERY_1 = "weather in sydney"
    WEB_QUERY_2 = "current weather in sydney"

    RESULTS1 = [
        MockWebSearchResult(
            title="Official Weather",
            link="www.weather.com.au",
            snippet="The current weather in Sydney is 20 degrees Celsius.",
        ),
        MockWebSearchResult(
            title="Weather CHannel",
            link="www.wc.com.au",
            snippet="Morning is 10 degree Celsius, afternoon is 25 degrees Celsius.",
        ),
    ]

    RESULTS2 = [
        MockWebSearchResult(
            title="Weather Now!",
            link="www.weathernow.com.au",
            snippet="The weather right now is sunny with a temperature of 22 degrees Celsius.",
        )
    ]

    REASONING_RESPONSE_2 = "I like weathernow and the official weather site"

    QUERY_URLS_1 = ["www.weathernow.com.au", "www.weather.com.au"]

    CONTENT1 = [
        MockWebContent(
            title="Weather Now!",
            url="www.weathernow.com.au",
            content="The weather right now is sunny with a temperature of 22 degrees Celsius.",
        ),
        MockWebContent(
            title="Weather Official",
            url="www.weather.com.au",
            content="The current weather in Sydney is 20 degrees Celsius.",
        ),
    ]

    REASONING_RESPONSE_3 = (
        "I now know everything that I need to know. " "I can now answer the question."
    )

    ANSWER_RESPONSE_1 = (
        "The weather in Sydney is sunny with a temperature of 22 degrees celsius."
    )

    with (
        use_mock_llm() as mock_llm,
        use_mock_web_provider(db_session) as mock_web,
        use_mock_content_provider() as mock_content,
    ):
        handler = StreamTestBuilder(
            llm_controller=mock_llm,
        )

        chat_session = create_chat_session(db_session=db_session, user=test_user)

        answer_stream = submit_query(
            query=QUERY,
            chat_session_id=chat_session.id,
            db_session=db_session,
            user=test_user,
        )

        assert_answer_stream_part_correct(
            received=next(answer_stream),
            expected=MessageResponseIDInfo(
                user_message_id=1,
                reserved_assistant_message_id=1,
            ),
        )

        # LLM Stream Response 1
        mock_web.add_results(WEB_QUERY_1, RESULTS1)
        mock_web.add_results(WEB_QUERY_2, RESULTS2)

        handler.add_response(
            LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_1))
        ).add_response(
            LLMToolCallResponse(
                tool_name="web_search",
                tool_call_id="123",
                tool_call_argument_tokens=[
                    json.dumps({"queries": [WEB_QUERY_1, WEB_QUERY_2]})
                ],
            )
        ).expect(
            Packet(
                placement=create_placement(0),
                obj=ReasoningStart(),
            )
        ).expect_packets(
            [
                create_packet_with_reasoning_delta(token, 0)
                for token in tokenise(REASONING_RESPONSE_1)
            ]
        ).expect(
            Packet(placement=create_placement(0), obj=ReasoningDone())
        ).expect(
            Packet(
                placement=create_placement(1),
                obj=SearchToolStart(
                    is_internet_search=True,
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1),
                obj=SearchToolQueriesDelta(
                    queries=[WEB_QUERY_1, WEB_QUERY_2],
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1),
                obj=SearchToolDocumentsDelta(
                    documents=[
                        mock_web_search_result_to_search_doc(result)
                        for result in RESULTS1
                    ]
                    + [
                        mock_web_search_result_to_search_doc(result)
                        for result in RESULTS2
                    ]
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1),
                obj=SectionEnd(),
            )
        ).run_and_validate(
            stream=answer_stream
        )

        # LLM Stream Response 2
        for content in CONTENT1:
            mock_content.add_content(content)

        handler.add_response(
            LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_2))
        ).add_response(
            LLMToolCallResponse(
                tool_name="open_url",
                tool_call_id="123",
                tool_call_argument_tokens=[json.dumps({"urls": QUERY_URLS_1})],
            )
        ).expect(
            Packet(
                placement=create_placement(2),
                obj=ReasoningStart(),
            )
        ).expect_packets(
            [
                create_packet_with_reasoning_delta(token, 2)
                for token in tokenise(REASONING_RESPONSE_2)
            ]
        ).expect(
            Packet(
                placement=create_placement(2),
                obj=ReasoningDone(),
            )
        ).expect(
            Packet(
                placement=create_placement(3),
                obj=OpenUrlStart(),
            )
        ).expect(
            Packet(
                placement=create_placement(3),
                obj=OpenUrlUrls(urls=[content.url for content in CONTENT1]),
            )
        ).expect(
            Packet(
                placement=create_placement(3),
                obj=OpenUrlDocuments(
                    documents=[
                        mock_web_content_to_search_doc(content) for content in CONTENT1
                    ]
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(3),
                obj=SectionEnd(),
            )
        ).run_and_validate(
            stream=answer_stream
        )

        # LLM Stream Response 3
        handler.add_response(
            LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_3))
        ).add_response(
            LLMAnswerResponse(answer_tokens=tokenise(ANSWER_RESPONSE_1))
        ).expect(
            Packet(
                placement=create_placement(4),
                obj=ReasoningStart(),
            )
        ).expect_packets(
            [
                create_packet_with_reasoning_delta(token, 4)
                for token in tokenise(REASONING_RESPONSE_3)
            ]
        ).expect(
            Packet(
                placement=create_placement(4),
                obj=ReasoningDone(),
            )
        ).expect_agent_response(
            answer_tokens=tokenise(ANSWER_RESPONSE_1),
            turn_index=5,
            final_documents=[
                mock_web_search_result_to_search_doc(result) for result in RESULTS1
            ]
            + [mock_web_search_result_to_search_doc(result) for result in RESULTS2]
            + [mock_web_content_to_search_doc(content) for content in CONTENT1],
        ).run_and_validate(
            stream=answer_stream
        )

        with pytest.raises(StopIteration):
            next(answer_stream)


def test_image_generation_tool_no_reasoning(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
    mock_external_deps: None,  # noqa: ARG001
) -> None:
    ensure_default_llm_provider(db_session)
    test_user = create_test_user(db_session, email_prefix="test_image_generation_tool")

    QUERY = "Create me an image of a dog on a rocketship"

    IMAGE_DATA = (
        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfF"
        "cSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
    )
    # Heartbeat interval is 5 seconds. A delay of 8 seconds ensures exactly 2 heartbeats:
    IMAGE_DELAY = 8.0

    ANSWER_RESPONSE = "Here is a dog on a rocketship"

    with (
        use_mock_llm() as mock_llm,
        use_mock_image_generation_provider() as mock_image_gen,
    ):
        handler = StreamTestBuilder(
            llm_controller=mock_llm,
        )

        chat_session = create_chat_session(db_session=db_session, user=test_user)

        answer_stream = submit_query(
            query=QUERY,
            chat_session_id=chat_session.id,
            db_session=db_session,
            user=test_user,
        )

        assert_answer_stream_part_correct(
            received=next(answer_stream),
            expected=MessageResponseIDInfo(
                user_message_id=1,
                reserved_assistant_message_id=1,
            ),
        )

        # LLM Stream Response 1
        mock_image_gen.add_image(IMAGE_DATA, IMAGE_DELAY)
        mock_llm.set_max_timeout(
            IMAGE_DELAY + 5.0
        )  # Give enough buffer for image generation

        # The LLMToolCallResponse has 2 tokens (1 for tool name/id + 1 for arguments).
        # We need to forward all 2 tokens before the tool starts executing and emitting packets.
        # The tool then emits: start, heartbeats (during image generation), final, and section end.
        handler.add_response(
            LLMToolCallResponse(
                tool_name="generate_image",
                tool_call_id="123",
                tool_call_argument_tokens=[json.dumps({"prompt": QUERY})],
            )
        ).expect(
            Packet(
                placement=create_placement(0),
                obj=ImageGenerationToolStart(),
            ),
            forward=2,  # Forward both tool call tokens before expecting first packet
        ).expect_packets(
            [
                Packet(
                    placement=create_placement(0),
                    obj=ImageGenerationToolHeartbeat(),
                )
            ]
            * 2,
            forward=False,
        ).expect(
            Packet(
                placement=create_placement(0),
                obj=ImageGenerationFinal(
                    images=[
                        GeneratedImage(
                            file_id="123",
                            url="/api/chat/file/123",
                            revised_prompt=QUERY,
                            shape="square",
                        )
                    ]
                ),
            ),
            forward=False,
        ).expect(
            Packet(
                placement=create_placement(0),
                obj=SectionEnd(),
            ),
            forward=False,
        ).run_and_validate(
            stream=answer_stream
        )

        # LLM Stream Response 2 - the answer comes after the tool call, so turn_index=1
        handler.add_response(
            LLMAnswerResponse(
                answer_tokens=tokenise(ANSWER_RESPONSE),
            )
        ).expect(
            Packet(
                placement=create_placement(1),
                obj=AgentResponseStart(final_documents=None),
            )
        ).expect_packets(
            [
                create_packet_with_agent_response_delta(token, 1)
                for token in tokenise(ANSWER_RESPONSE)
            ]
        ).expect(
            Packet(
                placement=create_placement(1),
                obj=OverallStop(),
            )
        ).run_and_validate(
            stream=answer_stream
        )

        with pytest.raises(StopIteration):
            next(answer_stream)


def test_parallel_internal_and_web_search_tool_calls(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
    mock_external_deps: None,  # noqa: ARG001
) -> None:
    """
    User asks a question
    LLM does some thinking
    LLM runs parallel tool calls for internal & web search

    -> Interal Search Branch performs seach + read ~10 documents
    -> Web Search: Searches the web for information

    LLM reads web documents
    LLM does thinking across all results
    LLM reads one more website
    LLM does more thinking
    LLM generates answer
    """
    ensure_default_llm_provider(db_session)
    test_user = create_test_user(
        db_session, email_prefix="test_parallel_internal_and_web_search_tool_calls"
    )

    AVALIABLE_CONNECTORS = [
        DocumentSource.GOOGLE_DRIVE,
        DocumentSource.CONFLUENCE,
        DocumentSource.LINEAR,
        DocumentSource.FIREFLIES,
    ]

    QUERY = "How will forecasts against 2026 global GDP growth affect our Q2 strategy?"

    THINKING_RESPONSE_1 = (
        "I need to build more context around the user's query to answer it. "
        "I should look at GDP growth projections for 2026. "
        "I should also look at what the Q2 strategy is and what projects are included. "
        "I should perform both web and internal searches in parallel to get information efficiently."
    )

    WEB_QUERIES_1 = [
        "2026 global GDP growth projections",
        "GDP growth 2026",
        "GDP forecast 2026",
    ]

    WEB_RESULTS_1 = {
        WEB_QUERIES_1[0]: [
            MockWebSearchResult(
                title="World Economic Outlook Update, January 2026",
                link="https://www.imf.org/weo/issues/2026/01/19/world-economic-outlook-update-january-2026",
                snippet="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
            ),
            MockWebSearchResult(
                title="IMF sees steady global growth in 2026 as AI boom offsets ...",
                link="https://www.reuters.com/article/us-world-economy-imf-idUSKBN2JU23E",
                snippet="IMF forecasts 2026 global GDP growth at 3.3% even with stronger 2025 performance",
            ),
            MockWebSearchResult(
                title="The Global Economy Is Forecast to Post...",
                link="https://www.goldmansachs.com/insights/articles/123",
                snippet="Global GDP is projected by Goldman Sachs Research to increase 2.8% in 2026",
            ),
        ],
        WEB_QUERIES_1[1]: [
            MockWebSearchResult(
                title="US third-quarter economic growth revised  slightly higher",
                link="https://www.reuters.com/word/us-third-quarter-eco",
                snippet="Gross domestic product increased at an upwardly revised 4.4% annualized rate, the ...",
            ),
            MockWebSearchResult(
                title="US GDP Growth Is Projected to Outperform Economist ...",
                link="https://www.goldmansachs.com/insights/articles/321",
                snippet="US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus",
            ),
            MockWebSearchResult(
                title="Gross Domestic Product",
                link="https://www.bea.gov/data/gdp/gross-domestic-product",
                snippet="Real gross domestic product (GDP) increased at an annual rate of 4.4 percent in the third quarter",
            ),
        ],
        WEB_QUERIES_1[2]: [
            MockWebSearchResult(
                title="World Economic Outlook Update, January 2026",
                link="https://www.imf.org/web/issues/2026/01/19/world-economic-outlook-update-january-2026",
                snippet="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
            ),
            MockWebSearchResult(
                title="US GDP Growth Is Projected to Outperform Economist ...",
                link="https://www.goldmansachs.com/insights/articles/321",
                snippet="US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus",
            ),
            MockWebSearchResult(
                title="Our economic outlook for the United States - Vanguard",
                link="https://corporate.vanguard.com/content/corp/vemo",
                snippet="We expect strong capital investment to remain a principal strength in the year ahead",
            ),
        ],
    }

    INTERNAL_QUERIES_1 = ["Q2 strategy 2026", "GDP growth 2026 projects", "Q2 projects"]

    INTERNAL_RESULTS_1 = {
        INTERNAL_QUERIES_1[0]: [
            MockInternalSearchResult(
                document_id="123456789",
                source_type=DocumentSource.GOOGLE_DRIVE,
                semantic_identifier="Q2 strategy 2026",
                chunk_ind=11,
            ),
            MockInternalSearchResult(
                document_id="732190732173",
                source_type=DocumentSource.FIREFLIES,
                semantic_identifier="What we think is going to happen in Q2",
                chunk_ind=5,
            ),
            MockInternalSearchResult(
                document_id="12389123219",
                source_type=DocumentSource.CONFLUENCE,
                semantic_identifier="Strategy roadmap for Q2 2026",
                chunk_ind=7,
            ),
        ],
        INTERNAL_QUERIES_1[1]: [
            MockInternalSearchResult(
                document_id="123123",
                source_type=DocumentSource.LINEAR,
                semantic_identifier="GDP growth 2026 projects",
                chunk_ind=13,
            )
        ],
        INTERNAL_QUERIES_1[2]: [
            MockInternalSearchResult(
                document_id="98823643243",
                source_type=DocumentSource.GOOGLE_DRIVE,
                semantic_identifier="Full list of Q2 projects",
                chunk_ind=1,
            )
        ],
    }

    OPEN_URL_URLS_1 = [
        WEB_RESULTS_1[WEB_QUERIES_1[0]][0].link,
        WEB_RESULTS_1[WEB_QUERIES_1[0]][2].link,
        WEB_RESULTS_1[WEB_QUERIES_1[2]][0].link,
    ]

    OPEN_URL_DOCUMENTS_1 = [
        MockWebContent(
            title=WEB_RESULTS_1[WEB_QUERIES_1[0]][0].title,
            url=WEB_RESULTS_1[WEB_QUERIES_1[0]][0].link,
            content="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
        ),
        MockWebContent(
            title=WEB_RESULTS_1[WEB_QUERIES_1[0]][2].title,
            url=WEB_RESULTS_1[WEB_QUERIES_1[0]][2].link,
            content="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
        ),
        MockWebContent(
            title=WEB_RESULTS_1[WEB_QUERIES_1[2]][0].title,
            url=WEB_RESULTS_1[WEB_QUERIES_1[2]][0].link,
            content="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
        ),
    ]

    THINKING_RESPONSE_2 = (
        "I now have a clear picture of the 2026 global GDP projections and the Q2 strategy. "
        "I would like to now about the outperform expections though..."
    )

    OPEN_URL_URLS_2 = [WEB_RESULTS_1[WEB_QUERIES_1[1]][1].link]
    OPEN_URL_DOCUMENTS_2 = [
        MockWebContent(
            title=WEB_RESULTS_1[WEB_QUERIES_1[1]][1].title,
            url=WEB_RESULTS_1[WEB_QUERIES_1[1]][1].link,
            content="US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus",
        )
    ]

    REASONING_RESPONSE_3 = (
        "I now have all the information I need to answer the user's question."
    )

    ANSWER_RESPONSE = (
        "We will have to change around some of our projects to accomodate the outperform expections. "
        "We should focus on aggresive expansion projects and prioritize them over cost-cutting initiatives."
    )

    expected_web_docs = []
    seen_web_results = set()
    for web_results in WEB_RESULTS_1.values():
        for web_result in web_results:
            key = (web_result.title, web_result.link)
            if key in seen_web_results:
                continue
            seen_web_results.add(key)
            expected_web_docs.append(mock_web_search_result_to_search_doc(web_result))

    expected_internal_docs = []
    seen_internal_results = set()
    for internal_results in INTERNAL_RESULTS_1.values():
        for internal_result in internal_results:
            key = (internal_result.semantic_identifier, internal_result.document_id)
            if key in seen_internal_results:
                continue
            seen_internal_results.add(key)
            expected_internal_docs.append(internal_result.to_search_doc())

    with (
        use_mock_llm() as mock_llm,
        use_mock_search_pipeline(
            connectors=AVALIABLE_CONNECTORS
        ) as mock_search_pipeline,
        use_mock_web_provider(db_session) as mock_web,
        use_mock_content_provider() as mock_content,
    ):
        for query, web_results in WEB_RESULTS_1.items():
            mock_web.add_results(query, web_results)

        for query, internal_results in INTERNAL_RESULTS_1.items():
            mock_search_pipeline.add_search_results(query, internal_results)

        handler = StreamTestBuilder(
            llm_controller=mock_llm,
        )

        chat_session = create_chat_session(db_session=db_session, user=test_user)

        answer_stream = submit_query(
            query=QUERY,
            chat_session_id=chat_session.id,
            db_session=db_session,
            user=test_user,
        )

        assert_answer_stream_part_correct(
            received=next(answer_stream),
            expected=MessageResponseIDInfo(
                user_message_id=1,
                reserved_assistant_message_id=1,
            ),
        )

        # LLM Stream Response 1
        handler.add_response(
            LLMReasoningResponse(
                reasoning_tokens=tokenise(THINKING_RESPONSE_1),
            )
        ).add_responses_together(
            LLMToolCallResponse(
                tool_name="internal_search",
                tool_call_id="123",
                tool_call_argument_tokens=[json.dumps({"queries": INTERNAL_QUERIES_1})],
            ),
            LLMToolCallResponse(
                tool_name="web_search",
                tool_call_id="321",
                tool_call_argument_tokens=[json.dumps({"queries": WEB_QUERIES_1})],
            ),
        ).expect_reasoning(
            reasoning_tokens=tokenise(THINKING_RESPONSE_1),
            turn_index=0,
        ).expect(
            Packet(
                placement=create_placement(1),
                obj=TopLevelBranching(
                    num_parallel_branches=2,
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1, 0),
                obj=SearchToolStart(
                    is_internet_search=False,
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1, 1),
                obj=SearchToolStart(
                    is_internet_search=True,
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1, 0),
                obj=SearchToolQueriesDelta(
                    queries=INTERNAL_QUERIES_1 + [QUERY],
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1, 0),
                obj=SearchToolDocumentsDelta(
                    documents=expected_internal_docs,
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1, 0),
                obj=SectionEnd(),
            )
        ).expect(
            Packet(
                placement=create_placement(1, 1),
                obj=SearchToolQueriesDelta(
                    queries=WEB_QUERIES_1,
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1, 1),
                obj=SearchToolDocumentsDelta(
                    documents=expected_web_docs,
                ),
            )
        ).expect(
            Packet(
                placement=create_placement(1, 1),
                obj=SectionEnd(),
            )
        ).run_and_validate(
            stream=answer_stream
        )

        # LLM Stream Response 2
        for content in OPEN_URL_DOCUMENTS_1:
            mock_content.add_content(content)

        handler.add_response(
            LLMToolCallResponse(
                tool_name="open_url",
                tool_call_id="456",
                tool_call_argument_tokens=[json.dumps({"urls": OPEN_URL_URLS_1})],
            )
        ).expect(
            Packet(
                placement=create_placement(2, 0),
                obj=OpenUrlStart(),
            ),
            forward=2,  # Need both header + argument tokens for the tool call
        ).expect(
            Packet(
                placement=create_placement(2, 0),
                obj=OpenUrlUrls(urls=OPEN_URL_URLS_1),
            ),
            forward=False,
        ).expect(
            Packet(
                placement=create_placement(2, 0),
                obj=OpenUrlDocuments(
                    documents=[
                        mock_web_content_to_search_doc(content)
                        for content in OPEN_URL_DOCUMENTS_1
                    ]
                ),
            ),
            forward=False,
        ).expect(
            Packet(
                placement=create_placement(2, 0),
                obj=SectionEnd(),
            ),
            forward=False,
        ).run_and_validate(
            stream=answer_stream
        )

        # LLM Stream Response 3
        for content in OPEN_URL_DOCUMENTS_2:
            mock_content.add_content(content)

        handler.add_response(
            LLMReasoningResponse(
                reasoning_tokens=tokenise(THINKING_RESPONSE_2),
            )
        ).add_response(
            LLMToolCallResponse(
                tool_name="open_url",
                tool_call_id="789",
                tool_call_argument_tokens=[json.dumps({"urls": OPEN_URL_URLS_2})],
            )
        ).expect_reasoning(
            reasoning_tokens=tokenise(THINKING_RESPONSE_2),
            turn_index=3,
        ).expect(
            Packet(
                placement=create_placement(4),
                obj=OpenUrlStart(),
            )
        ).expect(
            Packet(placement=create_placement(4), obj=OpenUrlUrls(urls=OPEN_URL_URLS_2))
        ).expect(
            Packet(
                placement=create_placement(4),
                obj=OpenUrlDocuments(
                    documents=[
                        mock_web_content_to_search_doc(content)
                        for content in OPEN_URL_DOCUMENTS_2
                    ]
                ),
            ),
            forward=False,
        ).expect(
            Packet(
                placement=create_placement(4),
                obj=SectionEnd(),
            )
        ).run_and_validate(
            stream=answer_stream
        )

        # LLM Stream Response 4
        handler.add_response(
            LLMReasoningResponse(
                reasoning_tokens=tokenise(REASONING_RESPONSE_3),
            )
        ).add_response(
            LLMAnswerResponse(
                answer_tokens=tokenise(ANSWER_RESPONSE),
            )
        ).expect_reasoning(
            reasoning_tokens=tokenise(REASONING_RESPONSE_3),
            turn_index=5,
        ).expect_agent_response(
            answer_tokens=tokenise(ANSWER_RESPONSE),
            turn_index=6,
            final_documents=expected_internal_docs
            + expected_web_docs
            + [
                mock_web_content_to_search_doc(content)
                for content in OPEN_URL_DOCUMENTS_1
            ]
            + [
                mock_web_content_to_search_doc(content)
                for content in OPEN_URL_DOCUMENTS_2
            ],
        ).run_and_validate(
            stream=answer_stream
        )

        # End stream
        with pytest.raises(StopIteration):
            next(answer_stream)


================================================
FILE: backend/tests/external_dependency_unit/answer/test_stream_chat_message_objects.py
================================================
import uuid
from typing import Any
from unittest.mock import MagicMock

import pytest
from sqlalchemy.orm import Session

from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import StreamingError
from onyx.chat.process_message import handle_stream_message_objects
from onyx.db.chat import create_chat_session
from onyx.db.models import User
from onyx.db.persona import upsert_persona
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import Packet
from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider
from tests.external_dependency_unit.conftest import create_test_user


@pytest.mark.skip(reason="Temporarily disabled")
def test_stream_chat_message_objects_without_web_search(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
    mock_external_deps: None,  # noqa: ARG001
) -> None:
    """
    Test that when web search is requested but the persona has no web search tool,
    the system handles it gracefully and returns a message explaining that web
    search is not available.
    """

    # Mock the model server HTTP calls for embeddings
    def mock_post(
        url: str,
        json: dict[str, Any] | None = None,
        headers: dict[str, str] | None = None,  # noqa: ARG001
        **kwargs: Any,  # noqa: ARG001
    ) -> MagicMock:
        """Mock requests.post for model server embedding calls"""
        mock_response = MagicMock()

        # Check if this is a call to the embedding endpoint
        if "encoder/bi-encoder-embed" in url:
            # Return a mock embedding response
            # The embedding dimension doesn't matter for this test,
            # just needs to be a valid response structure
            num_texts = len(json.get("texts", [])) if json else 1
            mock_response.status_code = 200
            mock_response.json.return_value = {
                "embeddings": [[0.1] * 768]
                * num_texts  # 768 is a common embedding dimension
            }
            mock_response.raise_for_status = MagicMock()
            return mock_response

        # For other URLs, return a generic success response
        mock_response.status_code = 200
        mock_response.json.return_value = {}
        mock_response.raise_for_status = MagicMock()
        return mock_response

    # First, ensure we have an LLM provider set up
    ensure_default_llm_provider(db_session)

    # Create a test user
    test_user: User = create_test_user(db_session, email_prefix="test_web_search")

    # Create a test persona explicitly WITHOUT any tools (including web search)
    # This ensures the test doesn't rely on the state of the default persona
    test_persona = upsert_persona(
        user=None,  # System persona
        name=f"Test Persona {uuid.uuid4()}",
        description="Test persona with no tools for web search test",
        llm_model_provider_override=None,
        llm_model_version_override=None,
        starter_messages=None,
        system_prompt=None,
        task_prompt=None,
        datetime_aware=None,
        is_public=True,
        db_session=db_session,
        tool_ids=[],  # Explicitly no tools
        document_set_ids=None,
        is_listed=True,
    )

    # Create a chat session with our test persona
    chat_session = create_chat_session(
        db_session=db_session,
        description="Test web search without tool",
        user_id=test_user.id if test_user else None,
        persona_id=test_persona.id,
    )
    # Create the chat message request with a query that attempts to force web search
    chat_request = SendMessageRequest(
        message="run a web search for 'Onyx'",
        chat_session_id=chat_session.id,
    )
    # Call handle_stream_message_objects
    response_generator = handle_stream_message_objects(
        new_msg_req=chat_request,
        user=test_user,
        db_session=db_session,
    )
    # Collect all packets from the response
    raw_answer_stream: list[AnswerStreamPart] = []
    message_content = ""
    error_occurred = False

    for packet in response_generator:
        raw_answer_stream.append(packet)
        if isinstance(packet, Packet):
            if isinstance(packet.obj, AgentResponseDelta):
                # Direct MessageDelta (if not wrapped)
                if packet.obj.content:
                    message_content += packet.obj.content
            elif isinstance(packet.obj, StreamingError):
                error_occurred = True
                break

    assert not error_occurred, "Should not have received a streaming error"

    # Verify that we got a response
    assert len(raw_answer_stream) > 0, "Should have received at least some packets"

    # Check if we got MessageResponseIDInfo packet (indicating message was created)
    has_message_id = any(
        isinstance(packet, MessageResponseIDInfo) for packet in raw_answer_stream
    )
    assert has_message_id, "Should have received a message ID packet"

    assert len(message_content) > 0, "Should have received some message content"


def test_nothing() -> None:
    assert True, "This test is just to ensure the test suite is running"


================================================
FILE: backend/tests/external_dependency_unit/background/test_periodic_task_claim.py
================================================
"""External dependency unit tests for periodic task claiming.

Tests ``_try_claim_task`` and ``_try_run_periodic_task`` against real
PostgreSQL, verifying happy-path behavior and concurrent-access safety.

The claim mechanism uses a transaction-scoped advisory lock + a KVStore
timestamp for cross-instance dedup.  The DB session is released before
the task runs, so long-running tasks don't hold connections.
"""

import time
from collections.abc import Generator
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from unittest.mock import MagicMock
from uuid import uuid4

import pytest

from onyx.background.periodic_poller import _PeriodicTaskDef
from onyx.background.periodic_poller import _try_claim_task
from onyx.background.periodic_poller import _try_run_periodic_task
from onyx.background.periodic_poller import PERIODIC_TASK_KV_PREFIX
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.models import KVStore
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from tests.external_dependency_unit.constants import TEST_TENANT_ID

_TEST_LOCK_BASE = 90_000


@pytest.fixture(scope="module", autouse=True)
def _init_engine() -> None:
    SqlEngine.init_engine(pool_size=10, max_overflow=5)


def _make_task(
    *,
    name: str | None = None,
    interval: float = 3600,
    lock_id: int | None = None,
    run_fn: MagicMock | None = None,
) -> _PeriodicTaskDef:
    return _PeriodicTaskDef(
        name=name if name is not None else f"test-{uuid4().hex[:8]}",
        interval_seconds=interval,
        lock_id=lock_id if lock_id is not None else _TEST_LOCK_BASE,
        run_fn=run_fn if run_fn is not None else MagicMock(),
    )


@pytest.fixture(autouse=True)
def _cleanup_kv(
    tenant_context: None,  # noqa: ARG001
) -> Generator[None, None, None]:
    yield
    with get_session_with_current_tenant() as db_session:
        db_session.query(KVStore).filter(
            KVStore.key.like(f"{PERIODIC_TASK_KV_PREFIX}test-%")
        ).delete(synchronize_session=False)
        db_session.commit()


# ------------------------------------------------------------------
# Happy-path: _try_claim_task
# ------------------------------------------------------------------


class TestClaimHappyPath:
    def test_first_claim_succeeds(self) -> None:
        assert _try_claim_task(_make_task()) is True

    def test_first_claim_creates_kv_row(self) -> None:
        task = _make_task()
        _try_claim_task(task)

        with get_session_with_current_tenant() as db_session:
            row = (
                db_session.query(KVStore)
                .filter_by(key=PERIODIC_TASK_KV_PREFIX + task.name)
                .first()
            )
        assert row is not None
        assert row.value is not None

    def test_second_claim_within_interval_fails(self) -> None:
        task = _make_task(interval=3600)
        assert _try_claim_task(task) is True
        assert _try_claim_task(task) is False

    def test_claim_after_interval_succeeds(self) -> None:
        task = _make_task(interval=1)
        assert _try_claim_task(task) is True

        kv_key = PERIODIC_TASK_KV_PREFIX + task.name
        with get_session_with_current_tenant() as db_session:
            row = db_session.query(KVStore).filter_by(key=kv_key).first()
            assert row is not None
            row.value = (datetime.now(timezone.utc) - timedelta(seconds=10)).isoformat()
            db_session.commit()

        assert _try_claim_task(task) is True


# ------------------------------------------------------------------
# Happy-path: _try_run_periodic_task
# ------------------------------------------------------------------


class TestRunHappyPath:
    def test_runs_task_and_updates_last_run_at(self) -> None:
        mock_fn = MagicMock()
        task = _make_task(run_fn=mock_fn)

        _try_run_periodic_task(task)

        mock_fn.assert_called_once()
        assert task.last_run_at > 0

    def test_skips_when_in_memory_interval_not_elapsed(self) -> None:
        mock_fn = MagicMock()
        task = _make_task(run_fn=mock_fn, interval=3600)
        task.last_run_at = time.monotonic()

        _try_run_periodic_task(task)

        mock_fn.assert_not_called()

    def test_skips_when_db_claim_blocked(self) -> None:
        name = f"test-{uuid4().hex[:8]}"
        lock_id = _TEST_LOCK_BASE + 10

        _try_claim_task(_make_task(name=name, lock_id=lock_id, interval=3600))

        mock_fn = MagicMock()
        task = _make_task(name=name, lock_id=lock_id, interval=3600, run_fn=mock_fn)
        _try_run_periodic_task(task)

        mock_fn.assert_not_called()

    def test_task_exception_does_not_propagate(self) -> None:
        task = _make_task(run_fn=MagicMock(side_effect=RuntimeError("boom")))
        _try_run_periodic_task(task)

    def test_claim_committed_before_task_runs(self) -> None:
        """The KV claim must be visible in the DB when run_fn executes."""
        task_name = f"test-order-{uuid4().hex[:8]}"
        kv_key = PERIODIC_TASK_KV_PREFIX + task_name
        claim_visible: list[bool] = []

        def check_claim() -> None:
            with get_session_with_current_tenant() as db_session:
                row = db_session.query(KVStore).filter_by(key=kv_key).first()
                claim_visible.append(row is not None and row.value is not None)

        task = _PeriodicTaskDef(
            name=task_name,
            interval_seconds=3600,
            lock_id=_TEST_LOCK_BASE + 11,
            run_fn=check_claim,
        )

        _try_run_periodic_task(task)

        assert claim_visible == [True]


# ------------------------------------------------------------------
# Concurrency: only one claimer should win
# ------------------------------------------------------------------


class TestClaimConcurrency:
    def test_concurrent_claims_single_winner(self) -> None:
        """Many threads claim the same task — exactly one should succeed."""
        num_threads = 20
        task_name = f"test-race-{uuid4().hex[:8]}"
        lock_id = _TEST_LOCK_BASE + 20

        def claim() -> bool:
            CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
            return _try_claim_task(
                _PeriodicTaskDef(
                    name=task_name,
                    interval_seconds=3600,
                    lock_id=lock_id,
                    run_fn=lambda: None,
                )
            )

        results: list[bool] = []
        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            futures = [executor.submit(claim) for _ in range(num_threads)]
            for future in as_completed(futures):
                results.append(future.result())

        winners = sum(1 for r in results if r)
        assert winners == 1, f"Expected 1 winner, got {winners}"

    def test_concurrent_run_single_execution(self) -> None:
        """Many threads run the same task — run_fn fires exactly once."""
        num_threads = 20
        task_name = f"test-run-race-{uuid4().hex[:8]}"
        lock_id = _TEST_LOCK_BASE + 21
        counter = MagicMock()

        def run() -> None:
            CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
            _try_run_periodic_task(
                _PeriodicTaskDef(
                    name=task_name,
                    interval_seconds=3600,
                    lock_id=lock_id,
                    run_fn=counter,
                )
            )

        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            futures = [executor.submit(run) for _ in range(num_threads)]
            for future in as_completed(futures):
                future.result()

        assert (
            counter.call_count == 1
        ), f"Expected run_fn called once, got {counter.call_count}"

    def test_no_errors_under_contention(self) -> None:
        """All threads complete without exceptions under high contention."""
        num_threads = 30
        task_name = f"test-err-{uuid4().hex[:8]}"
        lock_id = _TEST_LOCK_BASE + 22
        errors: list[Exception] = []

        def claim() -> bool:
            CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
            return _try_claim_task(
                _PeriodicTaskDef(
                    name=task_name,
                    interval_seconds=3600,
                    lock_id=lock_id,
                    run_fn=lambda: None,
                )
            )

        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            futures = [executor.submit(claim) for _ in range(num_threads)]
            for future in as_completed(futures):
                try:
                    future.result()
                except Exception as e:
                    errors.append(e)

        assert errors == [], f"Got {len(errors)} errors: {errors}"


================================================
FILE: backend/tests/external_dependency_unit/background/test_startup_recovery.py
================================================
"""External dependency unit tests for startup recovery (Step 10g).

Seeds ``UserFile`` records in stuck states (PROCESSING, DELETING,
needs_project_sync) then calls ``recover_stuck_user_files`` and verifies
the drain loops pick them up via ``FOR UPDATE SKIP LOCKED``.

Uses real PostgreSQL (via ``db_session`` / ``tenant_context`` fixtures).
The per-file ``*_impl`` functions are mocked so no real file store or
connector is needed — we only verify that recovery finds and dispatches
the correct files.
"""

from collections.abc import Generator
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import UUID
from uuid import uuid4

import pytest
import sqlalchemy as sa
from sqlalchemy.orm import Session

from onyx.background.periodic_poller import recover_stuck_user_files
from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.constants import TEST_TENANT_ID

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_IMPL_MODULE = "onyx.background.celery.tasks.user_file_processing.tasks"


def _create_user_file(
    db_session: Session,
    user_id: object,
    *,
    status: UserFileStatus = UserFileStatus.PROCESSING,
    needs_project_sync: bool = False,
    needs_persona_sync: bool = False,
) -> UserFile:
    uf = UserFile(
        id=uuid4(),
        user_id=user_id,
        file_id=f"test_file_{uuid4().hex[:8]}",
        name=f"test_{uuid4().hex[:8]}.txt",
        file_type="text/plain",
        status=status,
        needs_project_sync=needs_project_sync,
        needs_persona_sync=needs_persona_sync,
    )
    db_session.add(uf)
    db_session.commit()
    db_session.refresh(uf)
    return uf


def _fake_delete_impl(
    user_file_id: str,
    tenant_id: str,  # noqa: ARG001
    redis_locking: bool,  # noqa: ARG001
) -> None:
    """Mock side-effect: delete the row so the drain loop terminates."""
    from onyx.db.engine.sql_engine import get_session_with_current_tenant

    with get_session_with_current_tenant() as session:
        session.execute(sa.delete(UserFile).where(UserFile.id == UUID(user_file_id)))
        session.commit()


def _fake_sync_impl(
    user_file_id: str,
    tenant_id: str,  # noqa: ARG001
    redis_locking: bool,  # noqa: ARG001
) -> None:
    """Mock side-effect: clear sync flags so the drain loop terminates."""
    from onyx.db.engine.sql_engine import get_session_with_current_tenant

    with get_session_with_current_tenant() as session:
        session.execute(
            sa.update(UserFile)
            .where(UserFile.id == UUID(user_file_id))
            .values(needs_project_sync=False, needs_persona_sync=False)
        )
        session.commit()


@pytest.fixture()
def _cleanup_user_files(db_session: Session) -> Generator[list[UserFile], None, None]:
    """Track created UserFile rows and delete them after each test."""
    created: list[UserFile] = []
    yield created
    for uf in created:
        existing = db_session.get(UserFile, uf.id)
        if existing:
            db_session.delete(existing)
    db_session.commit()


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


class TestRecoverProcessingFiles:
    """Files in PROCESSING status are re-processed via the processing drain loop."""

    def test_processing_files_recovered(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
        _cleanup_user_files: list[UserFile],
    ) -> None:
        user = create_test_user(db_session, "recovery_proc")
        uf = _create_user_file(db_session, user.id, status=UserFileStatus.PROCESSING)
        _cleanup_user_files.append(uf)

        mock_impl = MagicMock()
        with patch(f"{_IMPL_MODULE}.process_user_file_impl", mock_impl):
            recover_stuck_user_files(TEST_TENANT_ID)

        called_ids = [call.kwargs["user_file_id"] for call in mock_impl.call_args_list]
        assert (
            str(uf.id) in called_ids
        ), f"Expected file {uf.id} to be recovered but got: {called_ids}"

    def test_completed_files_not_recovered(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
        _cleanup_user_files: list[UserFile],
    ) -> None:
        user = create_test_user(db_session, "recovery_comp")
        uf = _create_user_file(db_session, user.id, status=UserFileStatus.COMPLETED)
        _cleanup_user_files.append(uf)

        mock_impl = MagicMock()
        with patch(f"{_IMPL_MODULE}.process_user_file_impl", mock_impl):
            recover_stuck_user_files(TEST_TENANT_ID)

        called_ids = [call.kwargs["user_file_id"] for call in mock_impl.call_args_list]
        assert (
            str(uf.id) not in called_ids
        ), f"COMPLETED file {uf.id} should not have been recovered"


class TestRecoverDeletingFiles:
    """Files in DELETING status are recovered via the delete drain loop."""

    def test_deleting_files_recovered(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
        _cleanup_user_files: list[UserFile],
    ) -> None:
        user = create_test_user(db_session, "recovery_del")
        uf = _create_user_file(db_session, user.id, status=UserFileStatus.DELETING)
        # Row is deleted by _fake_delete_impl, so no cleanup needed.

        mock_impl = MagicMock(side_effect=_fake_delete_impl)
        with patch(f"{_IMPL_MODULE}.delete_user_file_impl", mock_impl):
            recover_stuck_user_files(TEST_TENANT_ID)

        called_ids = [call.kwargs["user_file_id"] for call in mock_impl.call_args_list]
        assert (
            str(uf.id) in called_ids
        ), f"Expected file {uf.id} to be recovered for deletion but got: {called_ids}"


class TestRecoverSyncFiles:
    """Files needing project/persona sync are recovered via the sync drain loop."""

    def test_needs_project_sync_recovered(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
        _cleanup_user_files: list[UserFile],
    ) -> None:
        user = create_test_user(db_session, "recovery_sync")
        uf = _create_user_file(
            db_session,
            user.id,
            status=UserFileStatus.COMPLETED,
            needs_project_sync=True,
        )
        _cleanup_user_files.append(uf)

        mock_impl = MagicMock(side_effect=_fake_sync_impl)
        with patch(f"{_IMPL_MODULE}.project_sync_user_file_impl", mock_impl):
            recover_stuck_user_files(TEST_TENANT_ID)

        called_ids = [call.kwargs["user_file_id"] for call in mock_impl.call_args_list]
        assert (
            str(uf.id) in called_ids
        ), f"Expected file {uf.id} to be recovered for sync but got: {called_ids}"

    def test_needs_persona_sync_recovered(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
        _cleanup_user_files: list[UserFile],
    ) -> None:
        user = create_test_user(db_session, "recovery_psync")
        uf = _create_user_file(
            db_session,
            user.id,
            status=UserFileStatus.COMPLETED,
            needs_persona_sync=True,
        )
        _cleanup_user_files.append(uf)

        mock_impl = MagicMock(side_effect=_fake_sync_impl)
        with patch(f"{_IMPL_MODULE}.project_sync_user_file_impl", mock_impl):
            recover_stuck_user_files(TEST_TENANT_ID)

        called_ids = [call.kwargs["user_file_id"] for call in mock_impl.call_args_list]
        assert (
            str(uf.id) in called_ids
        ), f"Expected file {uf.id} to be recovered for persona sync but got: {called_ids}"


class TestRecoveryMultipleFiles:
    """Recovery processes all stuck files in one pass, not just the first."""

    def test_multiple_processing_files(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
        _cleanup_user_files: list[UserFile],
    ) -> None:
        user = create_test_user(db_session, "recovery_multi")
        files = []
        for _ in range(3):
            uf = _create_user_file(
                db_session, user.id, status=UserFileStatus.PROCESSING
            )
            _cleanup_user_files.append(uf)
            files.append(uf)

        mock_impl = MagicMock()
        with patch(f"{_IMPL_MODULE}.process_user_file_impl", mock_impl):
            recover_stuck_user_files(TEST_TENANT_ID)

        called_ids = {call.kwargs["user_file_id"] for call in mock_impl.call_args_list}
        expected_ids = {str(uf.id) for uf in files}
        assert expected_ids.issubset(
            called_ids
        ), f"Expected all {len(files)} files to be recovered. Missing: {expected_ids - called_ids}"


class TestTransientFailures:
    """Drain loops skip failed files, process the rest, and terminate."""

    def test_processing_failure_skips_and_continues(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
        _cleanup_user_files: list[UserFile],
    ) -> None:
        user = create_test_user(db_session, "fail_proc")
        uf_fail = _create_user_file(
            db_session, user.id, status=UserFileStatus.PROCESSING
        )
        uf_ok = _create_user_file(db_session, user.id, status=UserFileStatus.PROCESSING)
        _cleanup_user_files.extend([uf_fail, uf_ok])

        fail_id = str(uf_fail.id)

        def side_effect(
            *,
            user_file_id: str,
            tenant_id: str,  # noqa: ARG001
            redis_locking: bool,  # noqa: ARG001
        ) -> None:
            if user_file_id == fail_id:
                raise RuntimeError("transient failure")

        mock_impl = MagicMock(side_effect=side_effect)
        with patch(f"{_IMPL_MODULE}.process_user_file_impl", mock_impl):
            recover_stuck_user_files(TEST_TENANT_ID)

        called_ids = [call.kwargs["user_file_id"] for call in mock_impl.call_args_list]
        assert fail_id in called_ids, "Failed file should have been attempted"
        assert str(uf_ok.id) in called_ids, "Healthy file should have been processed"
        assert called_ids.count(fail_id) == 1, "Failed file retried — infinite loop"
        assert called_ids.count(str(uf_ok.id)) == 1

    def test_delete_failure_skips_and_continues(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
        _cleanup_user_files: list[UserFile],
    ) -> None:
        user = create_test_user(db_session, "fail_del")
        uf_fail = _create_user_file(db_session, user.id, status=UserFileStatus.DELETING)
        uf_ok = _create_user_file(db_session, user.id, status=UserFileStatus.DELETING)
        _cleanup_user_files.append(uf_fail)

        fail_id = str(uf_fail.id)

        def side_effect(
            *, user_file_id: str, tenant_id: str, redis_locking: bool
        ) -> None:
            if user_file_id == fail_id:
                raise RuntimeError("transient failure")
            _fake_delete_impl(user_file_id, tenant_id, redis_locking)

        mock_impl = MagicMock(side_effect=side_effect)
        with patch(f"{_IMPL_MODULE}.delete_user_file_impl", mock_impl):
            recover_stuck_user_files(TEST_TENANT_ID)

        called_ids = [call.kwargs["user_file_id"] for call in mock_impl.call_args_list]
        assert fail_id in called_ids, "Failed file should have been attempted"
        assert str(uf_ok.id) in called_ids, "Healthy file should have been deleted"
        assert called_ids.count(fail_id) == 1, "Failed file retried — infinite loop"
        assert called_ids.count(str(uf_ok.id)) == 1

    def test_sync_failure_skips_and_continues(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
        _cleanup_user_files: list[UserFile],
    ) -> None:
        user = create_test_user(db_session, "fail_sync")
        uf_fail = _create_user_file(
            db_session,
            user.id,
            status=UserFileStatus.COMPLETED,
            needs_project_sync=True,
        )
        uf_ok = _create_user_file(
            db_session,
            user.id,
            status=UserFileStatus.COMPLETED,
            needs_persona_sync=True,
        )
        _cleanup_user_files.extend([uf_fail, uf_ok])

        fail_id = str(uf_fail.id)

        def side_effect(
            *, user_file_id: str, tenant_id: str, redis_locking: bool
        ) -> None:
            if user_file_id == fail_id:
                raise RuntimeError("transient failure")
            _fake_sync_impl(user_file_id, tenant_id, redis_locking)

        mock_impl = MagicMock(side_effect=side_effect)
        with patch(f"{_IMPL_MODULE}.project_sync_user_file_impl", mock_impl):
            recover_stuck_user_files(TEST_TENANT_ID)

        called_ids = [call.kwargs["user_file_id"] for call in mock_impl.call_args_list]
        assert fail_id in called_ids, "Failed file should have been attempted"
        assert str(uf_ok.id) in called_ids, "Healthy file should have been synced"
        assert called_ids.count(fail_id) == 1, "Failed file retried — infinite loop"
        assert called_ids.count(str(uf_ok.id)) == 1


================================================
FILE: backend/tests/external_dependency_unit/cache/conftest.py
================================================
"""Fixtures for cache backend tests.

Requires a running PostgreSQL instance (and Redis for parity tests).
Run with::

    python -m dotenv -f .vscode/.env run -- pytest tests/external_dependency_unit/cache/
"""

from collections.abc import Generator

import pytest

from onyx.cache.interface import CacheBackend
from onyx.cache.postgres_backend import PostgresCacheBackend
from onyx.cache.redis_backend import RedisCacheBackend
from onyx.db.engine.sql_engine import SqlEngine
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from tests.external_dependency_unit.constants import TEST_TENANT_ID


@pytest.fixture(scope="session", autouse=True)
def _init_db() -> Generator[None, None, None]:
    """Initialize DB engine. Assumes Postgres has migrations applied (e.g. via docker compose)."""
    SqlEngine.init_engine(pool_size=5, max_overflow=2)
    yield


@pytest.fixture(autouse=True)
def _tenant_context() -> Generator[None, None, None]:
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
    try:
        yield
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


@pytest.fixture
def pg_cache() -> PostgresCacheBackend:
    return PostgresCacheBackend(TEST_TENANT_ID)


@pytest.fixture
def redis_cache() -> RedisCacheBackend:
    from onyx.redis.redis_pool import redis_pool

    return RedisCacheBackend(redis_pool.get_client(TEST_TENANT_ID))


@pytest.fixture(params=["postgres", "redis"], ids=["postgres", "redis"])
def cache(
    request: pytest.FixtureRequest,
    pg_cache: PostgresCacheBackend,
    redis_cache: RedisCacheBackend,
) -> CacheBackend:
    if request.param == "postgres":
        return pg_cache
    return redis_cache


================================================
FILE: backend/tests/external_dependency_unit/cache/test_cache_backend_parity.py
================================================
"""Parameterized tests that run the same CacheBackend operations against
both Redis and PostgreSQL, asserting identical return values.

Each test runs twice (once per backend) via the ``cache`` fixture defined
in conftest.py.
"""

import time
from uuid import uuid4

from onyx.cache.interface import CacheBackend
from onyx.cache.interface import TTL_KEY_NOT_FOUND
from onyx.cache.interface import TTL_NO_EXPIRY


def _key() -> str:
    return f"parity_{uuid4().hex[:12]}"


class TestKVParity:
    def test_get_missing(self, cache: CacheBackend) -> None:
        assert cache.get(_key()) is None

    def test_get_set(self, cache: CacheBackend) -> None:
        k = _key()
        cache.set(k, b"value")
        assert cache.get(k) == b"value"

    def test_overwrite(self, cache: CacheBackend) -> None:
        k = _key()
        cache.set(k, b"a")
        cache.set(k, b"b")
        assert cache.get(k) == b"b"

    def test_set_string(self, cache: CacheBackend) -> None:
        k = _key()
        cache.set(k, "hello")
        assert cache.get(k) == b"hello"

    def test_set_int(self, cache: CacheBackend) -> None:
        k = _key()
        cache.set(k, 42)
        assert cache.get(k) == b"42"

    def test_delete(self, cache: CacheBackend) -> None:
        k = _key()
        cache.set(k, b"x")
        cache.delete(k)
        assert cache.get(k) is None

    def test_exists(self, cache: CacheBackend) -> None:
        k = _key()
        assert not cache.exists(k)
        cache.set(k, b"x")
        assert cache.exists(k)


class TestTTLParity:
    def test_ttl_missing(self, cache: CacheBackend) -> None:
        assert cache.ttl(_key()) == TTL_KEY_NOT_FOUND

    def test_ttl_no_expiry(self, cache: CacheBackend) -> None:
        k = _key()
        cache.set(k, b"x")
        assert cache.ttl(k) == TTL_NO_EXPIRY

    def test_ttl_remaining(self, cache: CacheBackend) -> None:
        k = _key()
        cache.set(k, b"x", ex=10)
        remaining = cache.ttl(k)
        assert 8 <= remaining <= 10

    def test_set_with_ttl_expires(self, cache: CacheBackend) -> None:
        k = _key()
        cache.set(k, b"x", ex=1)
        assert cache.get(k) == b"x"
        time.sleep(1.5)
        assert cache.get(k) is None


class TestLockParity:
    def test_acquire_release(self, cache: CacheBackend) -> None:
        lock = cache.lock(f"parity_lock_{uuid4().hex[:8]}")
        assert lock.acquire(blocking=False)
        assert lock.owned()
        lock.release()
        assert not lock.owned()


class TestListParity:
    def test_rpush_blpop(self, cache: CacheBackend) -> None:
        k = f"parity_list_{uuid4().hex[:8]}"
        cache.rpush(k, b"item")
        result = cache.blpop([k], timeout=1)
        assert result is not None
        assert result[1] == b"item"

    def test_blpop_timeout(self, cache: CacheBackend) -> None:
        result = cache.blpop([f"parity_empty_{uuid4().hex[:8]}"], timeout=1)
        assert result is None


================================================
FILE: backend/tests/external_dependency_unit/cache/test_kv_store_cache_layer.py
================================================
"""Tests for PgRedisKVStore's cache layer integration with CacheBackend.

Verifies that the KV store correctly uses the CacheBackend for caching
in front of PostgreSQL: cache hits, cache misses falling through to PG,
cache population after PG reads, cache invalidation on delete, and
graceful degradation when the cache backend raises.

Requires running PostgreSQL.
"""

import json
from collections.abc import Generator
from unittest.mock import MagicMock

import pytest
from sqlalchemy import delete

from onyx.cache.interface import CacheBackend
from onyx.cache.postgres_backend import PostgresCacheBackend
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.models import CacheStore
from onyx.db.models import KVStore
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.key_value_store.store import PgRedisKVStore
from onyx.key_value_store.store import REDIS_KEY_PREFIX
from tests.external_dependency_unit.constants import TEST_TENANT_ID


@pytest.fixture(autouse=True)
def _clean_kv() -> Generator[None, None, None]:
    yield
    with get_session_with_tenant(tenant_id=TEST_TENANT_ID) as session:
        session.execute(delete(KVStore))
        session.execute(delete(CacheStore))
        session.commit()


@pytest.fixture
def kv_store(pg_cache: PostgresCacheBackend) -> PgRedisKVStore:
    return PgRedisKVStore(cache=pg_cache)


class TestStoreAndLoad:
    def test_store_populates_cache_and_pg(
        self, kv_store: PgRedisKVStore, pg_cache: PostgresCacheBackend
    ) -> None:
        kv_store.store("k1", {"hello": "world"})

        cached = pg_cache.get(REDIS_KEY_PREFIX + "k1")
        assert cached is not None
        assert json.loads(cached) == {"hello": "world"}

        loaded = kv_store.load("k1")
        assert loaded == {"hello": "world"}

    def test_load_returns_cached_value_without_pg_hit(
        self, pg_cache: PostgresCacheBackend
    ) -> None:
        """If the cache already has the value, PG should not be queried."""
        pg_cache.set(REDIS_KEY_PREFIX + "cached_only", json.dumps({"from": "cache"}))
        kv = PgRedisKVStore(cache=pg_cache)
        assert kv.load("cached_only") == {"from": "cache"}

    def test_load_falls_through_to_pg_on_cache_miss(
        self, kv_store: PgRedisKVStore, pg_cache: PostgresCacheBackend
    ) -> None:
        kv_store.store("k2", [1, 2, 3])

        pg_cache.delete(REDIS_KEY_PREFIX + "k2")
        assert pg_cache.get(REDIS_KEY_PREFIX + "k2") is None

        loaded = kv_store.load("k2")
        assert loaded == [1, 2, 3]

        repopulated = pg_cache.get(REDIS_KEY_PREFIX + "k2")
        assert repopulated is not None
        assert json.loads(repopulated) == [1, 2, 3]

    def test_load_with_refresh_cache_skips_cache(
        self, kv_store: PgRedisKVStore, pg_cache: PostgresCacheBackend
    ) -> None:
        kv_store.store("k3", "original")

        pg_cache.set(REDIS_KEY_PREFIX + "k3", json.dumps("stale"))

        loaded = kv_store.load("k3", refresh_cache=True)
        assert loaded == "original"


class TestDelete:
    def test_delete_removes_from_cache_and_pg(
        self, kv_store: PgRedisKVStore, pg_cache: PostgresCacheBackend
    ) -> None:
        kv_store.store("del_me", "bye")
        kv_store.delete("del_me")

        assert pg_cache.get(REDIS_KEY_PREFIX + "del_me") is None

        with pytest.raises(KvKeyNotFoundError):
            kv_store.load("del_me")

    def test_delete_missing_key_raises(self, kv_store: PgRedisKVStore) -> None:
        with pytest.raises(KvKeyNotFoundError):
            kv_store.delete("nonexistent")


class TestCacheFailureGracefulDegradation:
    def test_store_succeeds_when_cache_set_raises(self) -> None:
        failing_cache = MagicMock(spec=CacheBackend)
        failing_cache.set.side_effect = ConnectionError("cache down")

        kv = PgRedisKVStore(cache=failing_cache)
        kv.store("resilient", {"data": True})

        working_cache = MagicMock(spec=CacheBackend)
        working_cache.get.return_value = None
        kv_reader = PgRedisKVStore(cache=working_cache)
        loaded = kv_reader.load("resilient")
        assert loaded == {"data": True}

    def test_load_falls_through_when_cache_get_raises(self) -> None:
        failing_cache = MagicMock(spec=CacheBackend)
        failing_cache.get.side_effect = ConnectionError("cache down")
        failing_cache.set.side_effect = ConnectionError("cache down")

        kv = PgRedisKVStore(cache=failing_cache)
        kv.store("survive", 42)
        loaded = kv.load("survive")
        assert loaded == 42


================================================
FILE: backend/tests/external_dependency_unit/cache/test_postgres_cache_backend.py
================================================
"""Tests for PostgresCacheBackend against real PostgreSQL.

Covers every method on the backend: KV CRUD, TTL behaviour, advisory
locks (acquire / release / contention), list operations (rpush / blpop),
and the periodic cleanup function.
"""

import time
from uuid import uuid4

from sqlalchemy import select

from onyx.cache.interface import TTL_KEY_NOT_FOUND
from onyx.cache.interface import TTL_NO_EXPIRY
from onyx.cache.postgres_backend import cleanup_expired_cache_entries
from onyx.cache.postgres_backend import PostgresCacheBackend
from onyx.db.models import CacheStore


def _key() -> str:
    return f"test_{uuid4().hex[:12]}"


# ------------------------------------------------------------------
# Basic KV
# ------------------------------------------------------------------


class TestKV:
    def test_get_set(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"hello")
        assert pg_cache.get(k) == b"hello"

    def test_get_missing(self, pg_cache: PostgresCacheBackend) -> None:
        assert pg_cache.get(_key()) is None

    def test_set_overwrite(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"first")
        pg_cache.set(k, b"second")
        assert pg_cache.get(k) == b"second"

    def test_set_string_value(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, "string_val")
        assert pg_cache.get(k) == b"string_val"

    def test_set_int_value(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, 42)
        assert pg_cache.get(k) == b"42"

    def test_delete(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"to_delete")
        pg_cache.delete(k)
        assert pg_cache.get(k) is None

    def test_delete_missing_is_noop(self, pg_cache: PostgresCacheBackend) -> None:
        pg_cache.delete(_key())

    def test_exists(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        assert not pg_cache.exists(k)
        pg_cache.set(k, b"x")
        assert pg_cache.exists(k)


# ------------------------------------------------------------------
# TTL
# ------------------------------------------------------------------


class TestTTL:
    def test_set_with_ttl_expires(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"ephemeral", ex=1)
        assert pg_cache.get(k) == b"ephemeral"
        time.sleep(1.5)
        assert pg_cache.get(k) is None

    def test_ttl_no_expiry(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"forever")
        assert pg_cache.ttl(k) == TTL_NO_EXPIRY

    def test_ttl_missing_key(self, pg_cache: PostgresCacheBackend) -> None:
        assert pg_cache.ttl(_key()) == TTL_KEY_NOT_FOUND

    def test_ttl_remaining(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"x", ex=10)
        remaining = pg_cache.ttl(k)
        assert 8 <= remaining <= 10

    def test_ttl_expired_key(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"x", ex=1)
        time.sleep(1.5)
        assert pg_cache.ttl(k) == TTL_KEY_NOT_FOUND

    def test_expire_adds_ttl(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"x")
        assert pg_cache.ttl(k) == TTL_NO_EXPIRY
        pg_cache.expire(k, 10)
        assert 8 <= pg_cache.ttl(k) <= 10

    def test_exists_respects_ttl(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"x", ex=1)
        assert pg_cache.exists(k)
        time.sleep(1.5)
        assert not pg_cache.exists(k)


# ------------------------------------------------------------------
# Locks
# ------------------------------------------------------------------


class TestLock:
    def test_acquire_release(self, pg_cache: PostgresCacheBackend) -> None:
        lock = pg_cache.lock(f"lock_{uuid4().hex[:8]}")
        assert lock.acquire(blocking=False)
        assert lock.owned()
        lock.release()
        assert not lock.owned()

    def test_contention(self, pg_cache: PostgresCacheBackend) -> None:
        name = f"contention_{uuid4().hex[:8]}"
        lock1 = pg_cache.lock(name)
        lock2 = pg_cache.lock(name)

        assert lock1.acquire(blocking=False)
        assert not lock2.acquire(blocking=False)

        lock1.release()
        assert lock2.acquire(blocking=False)
        lock2.release()

    def test_context_manager(self, pg_cache: PostgresCacheBackend) -> None:
        with pg_cache.lock(f"ctx_{uuid4().hex[:8]}") as lock:
            assert lock.owned()
        assert not lock.owned()

    def test_blocking_timeout(self, pg_cache: PostgresCacheBackend) -> None:
        name = f"timeout_{uuid4().hex[:8]}"
        holder = pg_cache.lock(name)
        holder.acquire(blocking=False)

        waiter = pg_cache.lock(name, timeout=0.3)
        start = time.monotonic()
        assert not waiter.acquire(blocking=True, blocking_timeout=0.3)
        elapsed = time.monotonic() - start
        assert elapsed >= 0.25

        holder.release()


# ------------------------------------------------------------------
# List (rpush / blpop)
# ------------------------------------------------------------------


class TestList:
    def test_rpush_blpop(self, pg_cache: PostgresCacheBackend) -> None:
        k = f"list_{uuid4().hex[:8]}"
        pg_cache.rpush(k, b"item1")
        result = pg_cache.blpop([k], timeout=1)
        assert result is not None
        assert result == (k.encode(), b"item1")

    def test_blpop_timeout(self, pg_cache: PostgresCacheBackend) -> None:
        result = pg_cache.blpop([f"empty_{uuid4().hex[:8]}"], timeout=1)
        assert result is None

    def test_fifo_order(self, pg_cache: PostgresCacheBackend) -> None:
        k = f"fifo_{uuid4().hex[:8]}"
        pg_cache.rpush(k, b"first")
        time.sleep(0.01)
        pg_cache.rpush(k, b"second")

        r1 = pg_cache.blpop([k], timeout=1)
        r2 = pg_cache.blpop([k], timeout=1)
        assert r1 is not None and r1[1] == b"first"
        assert r2 is not None and r2[1] == b"second"

    def test_multiple_keys(self, pg_cache: PostgresCacheBackend) -> None:
        k1 = f"mk1_{uuid4().hex[:8]}"
        k2 = f"mk2_{uuid4().hex[:8]}"
        pg_cache.rpush(k2, b"from_k2")

        result = pg_cache.blpop([k1, k2], timeout=1)
        assert result is not None
        assert result == (k2.encode(), b"from_k2")


# ------------------------------------------------------------------
# Cleanup
# ------------------------------------------------------------------


class TestCleanup:
    def test_removes_expired_rows(self, pg_cache: PostgresCacheBackend) -> None:
        from onyx.db.engine.sql_engine import get_session_with_current_tenant

        k = _key()
        pg_cache.set(k, b"stale", ex=1)
        time.sleep(1.5)
        cleanup_expired_cache_entries()

        stmt = select(CacheStore.key).where(CacheStore.key == k)
        with get_session_with_current_tenant() as session:
            row = session.execute(stmt).first()
        assert row is None, "expired row should be physically deleted"

    def test_preserves_unexpired_rows(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"fresh", ex=300)
        cleanup_expired_cache_entries()
        assert pg_cache.get(k) == b"fresh"

    def test_preserves_no_ttl_rows(self, pg_cache: PostgresCacheBackend) -> None:
        k = _key()
        pg_cache.set(k, b"permanent")
        cleanup_expired_cache_entries()
        assert pg_cache.get(k) == b"permanent"


================================================
FILE: backend/tests/external_dependency_unit/celery/test_docfetching_priority.py
================================================
"""
External dependency unit tests for document processing job priority.

Tests that first-time indexing connectors (no last_successful_index_time)
get higher priority than re-indexing jobs from connectors that have
previously completed indexing.

Uses real Redis for locking and real database objects for CC pairs and search settings.
"""

from datetime import datetime
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.background.celery.tasks.docfetching.task_creation_utils import (
    try_creating_docfetching_task,
)
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import EmbeddingPrecision
from onyx.db.enums import IndexModelStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import SearchSettings
from onyx.redis.redis_pool import get_redis_client
from tests.external_dependency_unit.constants import TEST_TENANT_ID


def _create_test_connector(db_session: Session, name: str) -> Connector:
    """Create a test connector with all required fields."""
    connector = Connector(
        name=name,
        source=DocumentSource.FILE,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={},
        refresh_freq=3600,
    )
    db_session.add(connector)
    db_session.commit()
    db_session.refresh(connector)
    return connector


def _create_test_credential(db_session: Session) -> Credential:
    """Create a test credential with all required fields."""
    credential = Credential(
        name=f"test_credential_{uuid4().hex[:8]}",
        source=DocumentSource.FILE,
        credential_json={},
        admin_public=True,
    )
    db_session.add(credential)
    db_session.commit()
    db_session.refresh(credential)
    return credential


def _create_test_cc_pair(
    db_session: Session,
    connector: Connector,
    credential: Credential,
    status: ConnectorCredentialPairStatus,
    name: str,
    last_successful_index_time: datetime | None = None,
) -> ConnectorCredentialPair:
    """Create a connector credential pair with the specified status."""
    cc_pair = ConnectorCredentialPair(
        name=name,
        connector_id=connector.id,
        credential_id=credential.id,
        status=status,
        access_type=AccessType.PUBLIC,
        last_successful_index_time=last_successful_index_time,
    )
    db_session.add(cc_pair)
    db_session.commit()
    db_session.refresh(cc_pair)
    return cc_pair


def _create_test_search_settings(
    db_session: Session, index_name: str
) -> SearchSettings:
    """Create test search settings with all required fields."""
    search_settings = SearchSettings(
        model_name="test-model",
        model_dim=768,
        normalize=True,
        query_prefix="",
        passage_prefix="",
        status=IndexModelStatus.PRESENT,
        index_name=index_name,
        embedding_precision=EmbeddingPrecision.FLOAT,
    )
    db_session.add(search_settings)
    db_session.commit()
    db_session.refresh(search_settings)
    return search_settings


class TestDocfetchingTaskPriorityWithRealObjects:
    """
    Tests for document fetching task priority based on last_successful_index_time.

    Uses real Redis for locking and real database objects for CC pairs
    and search settings.
    """

    @pytest.mark.parametrize(
        "has_successful_index,expected_priority",
        [
            # First-time indexing (no last_successful_index_time) should get HIGH priority
            (False, OnyxCeleryPriority.HIGH),
            # Re-indexing (has last_successful_index_time) should get MEDIUM priority
            (True, OnyxCeleryPriority.MEDIUM),
        ],
    )
    @patch(
        "onyx.background.celery.tasks.docfetching.task_creation_utils.IndexingCoordination.try_create_index_attempt"
    )
    def test_priority_based_on_last_successful_index_time(
        self,
        mock_try_create_index_attempt: MagicMock,
        db_session: Session,
        has_successful_index: bool,
        expected_priority: OnyxCeleryPriority,
    ) -> None:
        """
        Test that first-time indexing connectors get higher priority than re-indexing.

        Priority is determined by last_successful_index_time:
        - None (never indexed): HIGH priority
        - Has timestamp (previously indexed): MEDIUM priority

        Uses real Redis for locking and real database objects.
        """
        # Create unique names to avoid conflicts between test runs
        unique_suffix = uuid4().hex[:8]

        # Determine last_successful_index_time based on the test case
        last_successful_index_time = (
            datetime.now(timezone.utc) if has_successful_index else None
        )

        # Create real database objects
        connector = _create_test_connector(
            db_session, f"test_connector_{has_successful_index}_{unique_suffix}"
        )
        credential = _create_test_credential(db_session)
        cc_pair = _create_test_cc_pair(
            db_session,
            connector,
            credential,
            ConnectorCredentialPairStatus.ACTIVE,
            name=f"test_cc_pair_{has_successful_index}_{unique_suffix}",
            last_successful_index_time=last_successful_index_time,
        )
        search_settings = _create_test_search_settings(
            db_session, f"test_index_{unique_suffix}"
        )

        # Mock the index attempt creation to return a valid ID
        mock_try_create_index_attempt.return_value = 12345

        # Mock celery app to capture task submission
        mock_celery_app = MagicMock()
        mock_celery_app.send_task.return_value = MagicMock()

        # Use real Redis client
        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)

        # Call the function with real objects
        result = try_creating_docfetching_task(
            celery_app=mock_celery_app,
            cc_pair=cc_pair,
            search_settings=search_settings,
            reindex=False,
            db_session=db_session,
            r=redis_client,
            tenant_id=TEST_TENANT_ID,
        )

        # Verify task was created
        assert result == 12345

        # Verify send_task was called with the expected priority
        mock_celery_app.send_task.assert_called_once()
        call_kwargs = mock_celery_app.send_task.call_args
        actual_priority = call_kwargs.kwargs["priority"]
        assert (
            actual_priority == expected_priority
        ), f"Expected priority {expected_priority} for has_successful_index={has_successful_index}, but got {actual_priority}"

    @patch(
        "onyx.background.celery.tasks.docfetching.task_creation_utils.IndexingCoordination.try_create_index_attempt"
    )
    def test_no_task_created_when_deleting(
        self,
        mock_try_create_index_attempt: MagicMock,
        db_session: Session,
    ) -> None:
        """Test that no task is created when connector is in DELETING status."""
        unique_suffix = uuid4().hex[:8]

        connector = _create_test_connector(
            db_session, f"test_connector_deleting_{unique_suffix}"
        )
        credential = _create_test_credential(db_session)
        cc_pair = _create_test_cc_pair(
            db_session,
            connector,
            credential,
            ConnectorCredentialPairStatus.DELETING,
            name=f"test_cc_pair_deleting_{unique_suffix}",
        )
        search_settings = _create_test_search_settings(
            db_session, f"test_index_deleting_{unique_suffix}"
        )

        mock_celery_app = MagicMock()
        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)

        result = try_creating_docfetching_task(
            celery_app=mock_celery_app,
            cc_pair=cc_pair,
            search_settings=search_settings,
            reindex=False,
            db_session=db_session,
            r=redis_client,
            tenant_id=TEST_TENANT_ID,
        )

        # Verify no task was created
        assert result is None
        mock_celery_app.send_task.assert_not_called()
        mock_try_create_index_attempt.assert_not_called()

    @patch(
        "onyx.background.celery.tasks.docfetching.task_creation_utils.IndexingCoordination.try_create_index_attempt"
    )
    def test_redis_lock_prevents_concurrent_task_creation(
        self,
        mock_try_create_index_attempt: MagicMock,
        db_session: Session,
    ) -> None:
        """
        Test that the Redis lock prevents concurrent task creation attempts.

        This test uses real Redis to verify the locking mechanism works correctly.
        When the lock is already held, the function should return None without
        attempting to create a task.
        """
        unique_suffix = uuid4().hex[:8]

        connector = _create_test_connector(
            db_session, f"test_connector_lock_{unique_suffix}"
        )
        credential = _create_test_credential(db_session)
        cc_pair = _create_test_cc_pair(
            db_session,
            connector,
            credential,
            ConnectorCredentialPairStatus.INITIAL_INDEXING,
            name=f"test_cc_pair_lock_{unique_suffix}",
        )
        search_settings = _create_test_search_settings(
            db_session, f"test_index_lock_{unique_suffix}"
        )

        mock_try_create_index_attempt.return_value = 12345
        mock_celery_app = MagicMock()
        mock_celery_app.send_task.return_value = MagicMock()

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)

        # Acquire the lock before calling the function
        from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX

        lock = redis_client.lock(
            DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_creating_indexing_task",
            timeout=30,
        )

        try:
            acquired = lock.acquire(blocking=False)
            assert acquired, "Failed to acquire lock for test"

            # Now try to create a task - should fail because lock is held
            result = try_creating_docfetching_task(
                celery_app=mock_celery_app,
                cc_pair=cc_pair,
                search_settings=search_settings,
                reindex=False,
                db_session=db_session,
                r=redis_client,
                tenant_id=TEST_TENANT_ID,
            )

            # Should return None because lock couldn't be acquired
            assert result is None
            mock_celery_app.send_task.assert_not_called()

        finally:
            # Always release the lock
            if lock.owned():
                lock.release()

    @patch(
        "onyx.background.celery.tasks.docfetching.task_creation_utils.IndexingCoordination.try_create_index_attempt"
    )
    def test_lock_released_after_successful_task_creation(
        self,
        mock_try_create_index_attempt: MagicMock,
        db_session: Session,
    ) -> None:
        """
        Test that the Redis lock is released after successful task creation.

        This verifies that subsequent calls can acquire the lock and create tasks.
        """
        unique_suffix = uuid4().hex[:8]

        connector = _create_test_connector(
            db_session, f"test_connector_release_{unique_suffix}"
        )
        credential = _create_test_credential(db_session)
        cc_pair = _create_test_cc_pair(
            db_session,
            connector,
            credential,
            ConnectorCredentialPairStatus.INITIAL_INDEXING,
            name=f"test_cc_pair_release_{unique_suffix}",
        )
        search_settings = _create_test_search_settings(
            db_session, f"test_index_release_{unique_suffix}"
        )

        mock_try_create_index_attempt.return_value = 12345
        mock_celery_app = MagicMock()
        mock_celery_app.send_task.return_value = MagicMock()

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)

        # First call should succeed
        result1 = try_creating_docfetching_task(
            celery_app=mock_celery_app,
            cc_pair=cc_pair,
            search_settings=search_settings,
            reindex=False,
            db_session=db_session,
            r=redis_client,
            tenant_id=TEST_TENANT_ID,
        )
        assert result1 == 12345

        # Reset mocks for second call
        mock_celery_app.reset_mock()
        mock_try_create_index_attempt.reset_mock()
        mock_try_create_index_attempt.return_value = 67890

        # Second call should also succeed (lock was released)
        result2 = try_creating_docfetching_task(
            celery_app=mock_celery_app,
            cc_pair=cc_pair,
            search_settings=search_settings,
            reindex=False,
            db_session=db_session,
            r=redis_client,
            tenant_id=TEST_TENANT_ID,
        )
        assert result2 == 67890

        # Both calls should have submitted tasks
        mock_celery_app.send_task.assert_called_once()


================================================
FILE: backend/tests/external_dependency_unit/celery/test_docprocessing_priority.py
================================================
"""
External dependency unit tests for docprocessing task priority.

Tests that docprocessing tasks spawned by connector_document_extraction
get the correct priority based on last_successful_index_time.

Uses real database objects for CC pairs, search settings, and index attempts.
"""

from datetime import datetime
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.background.indexing.run_docfetching import connector_document_extraction
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import EmbeddingPrecision
from onyx.db.enums import IndexingStatus
from onyx.db.enums import IndexModelStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import IndexAttempt
from onyx.db.models import SearchSettings
from tests.external_dependency_unit.constants import TEST_TENANT_ID


def _create_test_connector(db_session: Session, name: str) -> Connector:
    """Create a test connector with all required fields."""
    connector = Connector(
        name=name,
        source=DocumentSource.FILE,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={},
        refresh_freq=3600,
    )
    db_session.add(connector)
    db_session.commit()
    db_session.refresh(connector)
    return connector


def _create_test_credential(db_session: Session) -> Credential:
    """Create a test credential with all required fields."""
    credential = Credential(
        name=f"test_credential_{uuid4().hex[:8]}",
        source=DocumentSource.FILE,
        credential_json={},
        admin_public=True,
    )
    db_session.add(credential)
    db_session.commit()
    db_session.refresh(credential)
    return credential


def _create_test_cc_pair(
    db_session: Session,
    connector: Connector,
    credential: Credential,
    status: ConnectorCredentialPairStatus,
    name: str,
    last_successful_index_time: datetime | None = None,
) -> ConnectorCredentialPair:
    """Create a connector credential pair with the specified status."""
    cc_pair = ConnectorCredentialPair(
        name=name,
        connector_id=connector.id,
        credential_id=credential.id,
        status=status,
        access_type=AccessType.PUBLIC,
        last_successful_index_time=last_successful_index_time,
    )
    db_session.add(cc_pair)
    db_session.commit()
    db_session.refresh(cc_pair)
    return cc_pair


def _create_test_search_settings(
    db_session: Session, index_name: str
) -> SearchSettings:
    """Create test search settings with all required fields."""
    search_settings = SearchSettings(
        model_name="test-model",
        model_dim=768,
        normalize=True,
        query_prefix="",
        passage_prefix="",
        status=IndexModelStatus.PRESENT,
        index_name=index_name,
        embedding_precision=EmbeddingPrecision.FLOAT,
    )
    db_session.add(search_settings)
    db_session.commit()
    db_session.refresh(search_settings)
    return search_settings


def _create_test_index_attempt(
    db_session: Session,
    cc_pair: ConnectorCredentialPair,
    search_settings: SearchSettings,
    from_beginning: bool = False,
) -> IndexAttempt:
    """Create a test index attempt with the specified cc_pair and search_settings."""
    index_attempt = IndexAttempt(
        connector_credential_pair_id=cc_pair.id,
        search_settings_id=search_settings.id,
        from_beginning=from_beginning,
        status=IndexingStatus.IN_PROGRESS,
        celery_task_id=f"test_celery_task_{uuid4().hex[:8]}",
    )
    db_session.add(index_attempt)
    db_session.commit()
    db_session.refresh(index_attempt)
    return index_attempt


class TestDocprocessingPriorityInDocumentExtraction:
    """
    Tests for docprocessing task priority within connector_document_extraction.

    Verifies that the priority passed to docprocessing tasks is determined
    by last_successful_index_time on the cc_pair.
    """

    @pytest.mark.parametrize(
        "has_successful_index,expected_priority",
        [
            # First-time indexing (no last_successful_index_time) should get HIGH priority
            (False, OnyxCeleryPriority.HIGH),
            # Re-indexing (has last_successful_index_time) should get MEDIUM priority
            (True, OnyxCeleryPriority.MEDIUM),
        ],
    )
    @patch("onyx.background.indexing.run_docfetching.get_document_batch_storage")
    @patch("onyx.background.indexing.run_docfetching.MemoryTracer")
    @patch("onyx.background.indexing.run_docfetching._get_connector_runner")
    @patch(
        "onyx.background.indexing.run_docfetching.strip_null_characters",
        side_effect=lambda batch: batch,
    )
    @patch(
        "onyx.background.indexing.run_docfetching.get_recent_completed_attempts_for_cc_pair"
    )
    @patch(
        "onyx.background.indexing.run_docfetching.get_last_successful_attempt_poll_range_end"
    )
    @patch("onyx.background.indexing.run_docfetching.save_checkpoint")
    @patch("onyx.background.indexing.run_docfetching.get_latest_valid_checkpoint")
    @patch("onyx.background.indexing.run_docfetching.get_redis_client")
    @patch("onyx.background.indexing.run_docfetching.ensure_source_node_exists")
    @patch("onyx.background.indexing.run_docfetching.get_source_node_id_from_cache")
    @patch("onyx.background.indexing.run_docfetching.get_node_id_from_raw_id")
    @patch("onyx.background.indexing.run_docfetching.cache_hierarchy_nodes_batch")
    def test_docprocessing_priority_based_on_last_successful_index_time(
        self,
        mock_cache_hierarchy_nodes_batch: MagicMock,  # noqa: ARG002
        mock_get_node_id_from_raw_id: MagicMock,
        mock_get_source_node_id_from_cache: MagicMock,
        mock_ensure_source_node_exists: MagicMock,
        mock_get_redis_client: MagicMock,
        mock_get_latest_valid_checkpoint: MagicMock,
        mock_save_checkpoint: MagicMock,  # noqa: ARG002
        mock_get_last_successful_attempt_poll_range_end: MagicMock,
        mock_get_recent_completed_attempts: MagicMock,
        mock_strip_null_characters: MagicMock,  # noqa: ARG002
        mock_get_connector_runner: MagicMock,
        mock_memory_tracer_class: MagicMock,
        mock_get_batch_storage: MagicMock,
        db_session: Session,
        has_successful_index: bool,
        expected_priority: OnyxCeleryPriority,
    ) -> None:
        """
        Test that docprocessing tasks get the correct priority based on
        last_successful_index_time.

        Priority is determined by last_successful_index_time:
        - None (never indexed): HIGH priority
        - Has timestamp (previously indexed): MEDIUM priority

        Uses real database objects for CC pairs and search settings.
        """
        unique_suffix = uuid4().hex[:8]

        # Determine last_successful_index_time based on the test case
        last_successful_index_time = (
            datetime.now(timezone.utc) if has_successful_index else None
        )

        # Create real database objects
        connector = _create_test_connector(
            db_session, f"test_connector_docproc_{has_successful_index}_{unique_suffix}"
        )
        credential = _create_test_credential(db_session)
        cc_pair = _create_test_cc_pair(
            db_session,
            connector,
            credential,
            ConnectorCredentialPairStatus.ACTIVE,
            name=f"test_cc_pair_docproc_{has_successful_index}_{unique_suffix}",
            last_successful_index_time=last_successful_index_time,
        )
        search_settings = _create_test_search_settings(
            db_session, f"test_index_docproc_{unique_suffix}"
        )
        index_attempt = _create_test_index_attempt(
            db_session, cc_pair, search_settings, from_beginning=False
        )

        # Setup mocks
        mock_batch_storage = MagicMock()
        mock_get_batch_storage.return_value = mock_batch_storage

        mock_memory_tracer = MagicMock()
        mock_memory_tracer_class.return_value = mock_memory_tracer

        # Mock Redis-related functions (not the focus of this test)
        # Configure mock Redis client to return None for common operations
        # as a safety net in case any patches don't work as expected
        mock_redis_client = MagicMock()
        mock_redis_client.get.return_value = None
        mock_redis_client.hget.return_value = None
        mock_redis_client.hset.return_value = None
        mock_redis_client.exists.return_value = 0
        mock_redis_client.expire.return_value = True
        mock_get_redis_client.return_value = mock_redis_client

        # Mock hierarchy/cache functions
        mock_ensure_source_node_exists.return_value = 1  # Return a valid node ID
        mock_get_source_node_id_from_cache.return_value = (
            1  # Return a valid source node ID
        )
        mock_get_node_id_from_raw_id.return_value = (None, False)  # (node_id, found)
        # cache_hierarchy_nodes_batch doesn't need a return value (returns None)

        # Create checkpoint mocks - initial checkpoint has_more=True, final has_more=False
        mock_initial_checkpoint = MagicMock(has_more=True)
        mock_final_checkpoint = MagicMock(has_more=False)

        # get_latest_valid_checkpoint returns (checkpoint, resuming_from_checkpoint)
        mock_get_latest_valid_checkpoint.return_value = (mock_initial_checkpoint, False)

        # Create a mock connector runner that yields one document batch
        mock_connector = MagicMock()
        mock_connector_runner = MagicMock()
        mock_connector_runner.connector = mock_connector
        # The connector runner yields (document_batch, hierarchy_nodes, failure, next_checkpoint)
        # We provide one batch of documents to trigger a send_task call
        mock_doc = MagicMock()
        mock_doc.to_short_descriptor.return_value = "test_doc"
        mock_doc.sections = []
        # Set to None to avoid Redis operations trying to resolve hierarchy
        mock_doc.parent_hierarchy_raw_node_id = None
        mock_doc.parent_hierarchy_node_id = None
        mock_connector_runner.run.return_value = iter(
            [([mock_doc], None, None, mock_final_checkpoint)]
        )
        mock_get_connector_runner.return_value = mock_connector_runner

        mock_get_recent_completed_attempts.return_value = iter([])
        mock_get_last_successful_attempt_poll_range_end.return_value = 0

        # Mock celery app to capture task submission
        mock_celery_app = MagicMock()
        mock_celery_app.send_task.return_value = MagicMock()

        # Call the function
        connector_document_extraction(
            app=mock_celery_app,
            index_attempt_id=index_attempt.id,
            cc_pair_id=cc_pair.id,
            search_settings_id=search_settings.id,
            tenant_id=TEST_TENANT_ID,
            callback=None,
        )

        # Verify send_task was called with the expected priority for docprocessing
        assert mock_celery_app.send_task.called, "send_task should have been called"
        call_kwargs = mock_celery_app.send_task.call_args
        actual_priority = call_kwargs.kwargs["priority"]
        assert (
            actual_priority == expected_priority
        ), f"Expected priority {expected_priority} for has_successful_index={has_successful_index}, but got {actual_priority}"


================================================
FILE: backend/tests/external_dependency_unit/celery/test_persona_file_sync.py
================================================
"""
External dependency unit tests for persona file sync.

Validates that:

1. The check_for_user_file_project_sync beat task picks up UserFiles with
   needs_persona_sync=True (not just needs_project_sync).

2. The process_single_user_file_project_sync worker task reads persona
   associations from the DB, passes persona_ids to the document index via
   VespaDocumentUserFields, and clears needs_persona_sync afterwards.

3. upsert_persona correctly marks affected UserFiles with
   needs_persona_sync=True when file associations change.

Uses real Redis and PostgreSQL.  Document index (Vespa) calls are mocked
since we only need to verify the arguments passed to update_single.
"""

from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from unittest.mock import PropertyMock
from uuid import uuid4

from sqlalchemy.orm import Session

from onyx.background.celery.tasks.user_file_processing.tasks import (
    check_for_user_file_project_sync,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    process_single_user_file_project_sync,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    user_file_project_sync_lock_key,
)
from onyx.db.enums import UserFileStatus
from onyx.db.models import Persona
from onyx.db.models import Persona__UserFile
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.persona import upsert_persona
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.redis.redis_pool import get_redis_client
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.constants import TEST_TENANT_ID

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _create_completed_user_file(
    db_session: Session,
    user: User,
    needs_persona_sync: bool = False,
    needs_project_sync: bool = False,
) -> UserFile:
    """Insert a UserFile in COMPLETED status."""
    uf = UserFile(
        id=uuid4(),
        user_id=user.id,
        file_id=f"test_file_{uuid4().hex[:8]}",
        name=f"test_{uuid4().hex[:8]}.txt",
        file_type="text/plain",
        status=UserFileStatus.COMPLETED,
        needs_persona_sync=needs_persona_sync,
        needs_project_sync=needs_project_sync,
        chunk_count=5,
    )
    db_session.add(uf)
    db_session.commit()
    db_session.refresh(uf)
    return uf


def _create_test_persona(
    db_session: Session,
    user: User,
    user_files: list[UserFile] | None = None,
) -> Persona:
    """Create a minimal Persona via direct model insert."""
    persona = Persona(
        name=f"Test Persona {uuid4().hex[:8]}",
        description="Test persona",
        system_prompt="You are a test assistant",
        task_prompt="Answer the question",
        tools=[],
        document_sets=[],
        users=[user],
        groups=[],
        is_listed=True,
        is_public=True,
        display_priority=None,
        starter_messages=None,
        deleted=False,
        user_files=user_files or [],
        user_id=user.id,
    )
    db_session.add(persona)
    db_session.commit()
    db_session.refresh(persona)
    return persona


def _link_file_to_persona(
    db_session: Session, persona: Persona, user_file: UserFile
) -> None:
    """Create the join table row between a persona and a user file."""
    link = Persona__UserFile(persona_id=persona.id, user_file_id=user_file.id)
    db_session.add(link)
    db_session.commit()


_PATCH_QUEUE_DEPTH = "onyx.background.celery.tasks.user_file_processing.tasks.get_user_file_project_sync_queue_depth"


@contextmanager
def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:
    """Patch the ``app`` property on a bound Celery task."""
    task_instance = task.run.__self__
    with (
        patch.object(
            type(task_instance),
            "app",
            new_callable=PropertyMock,
            return_value=mock_app,
        ),
        patch(_PATCH_QUEUE_DEPTH, return_value=0),
        patch(
            "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client",
            return_value=MagicMock(),
        ),
    ):
        yield


# ---------------------------------------------------------------------------
# Test: check_for_user_file_project_sync picks up persona sync
# ---------------------------------------------------------------------------


class TestCheckSweepIncludesPersonaSync:
    """The beat task must pick up files needing persona sync, not just project sync."""

    def test_persona_sync_flag_enqueues_task(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """A file with needs_persona_sync=True (and COMPLETED) gets enqueued."""
        user = create_test_user(db_session, "persona_sweep")
        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)

        mock_app = MagicMock()

        with _patch_task_app(check_for_user_file_project_sync, mock_app):
            check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)

        enqueued_ids = {
            call.kwargs["kwargs"]["user_file_id"]
            for call in mock_app.send_task.call_args_list
        }
        assert str(uf.id) in enqueued_ids

    def test_neither_flag_does_not_enqueue(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """A file with both flags False is not enqueued."""
        user = create_test_user(db_session, "no_sync")
        uf = _create_completed_user_file(db_session, user)

        mock_app = MagicMock()

        with _patch_task_app(check_for_user_file_project_sync, mock_app):
            check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)

        enqueued_ids = {
            call.kwargs["kwargs"]["user_file_id"]
            for call in mock_app.send_task.call_args_list
        }
        assert str(uf.id) not in enqueued_ids

    def test_both_flags_enqueues_once(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """A file with BOTH flags True is enqueued exactly once."""
        user = create_test_user(db_session, "both_flags")
        uf = _create_completed_user_file(
            db_session, user, needs_persona_sync=True, needs_project_sync=True
        )

        mock_app = MagicMock()

        with _patch_task_app(check_for_user_file_project_sync, mock_app):
            check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)

        matching_calls = [
            call
            for call in mock_app.send_task.call_args_list
            if call.kwargs["kwargs"]["user_file_id"] == str(uf.id)
        ]
        assert len(matching_calls) == 1


# ---------------------------------------------------------------------------
# Test: process_single_user_file_project_sync passes persona_ids to index
# ---------------------------------------------------------------------------

_PATCH_GET_SETTINGS = (
    "onyx.background.celery.tasks.user_file_processing.tasks.get_active_search_settings"
)
_PATCH_GET_INDICES = (
    "onyx.background.celery.tasks.user_file_processing.tasks.get_all_document_indices"
)
_PATCH_HTTPX_INIT = (
    "onyx.background.celery.tasks.user_file_processing.tasks.httpx_init_vespa_pool"
)
_PATCH_DISABLE_VDB = (
    "onyx.background.celery.tasks.user_file_processing.tasks.DISABLE_VECTOR_DB"
)


class TestSyncTaskWritesPersonaIds:
    """The sync task reads persona associations and sends them to the index."""

    def test_passes_persona_ids_to_update_single(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """After linking a file to a persona, sync sends the persona ID."""
        user = create_test_user(db_session, "sync_persona")
        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
        persona = _create_test_persona(db_session, user)
        _link_file_to_persona(db_session, persona, uf)

        mock_doc_index = MagicMock()
        mock_search_settings = MagicMock()
        mock_search_settings.primary = MagicMock()
        mock_search_settings.secondary = None

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        lock_key = user_file_project_sync_lock_key(str(uf.id))
        redis_client.delete(lock_key)

        with (
            patch(_PATCH_DISABLE_VDB, False),
            patch(_PATCH_HTTPX_INIT),
            patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),
            patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),
        ):
            process_single_user_file_project_sync.run(
                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
            )

        mock_doc_index.update_single.assert_called_once()
        call_args = mock_doc_index.update_single.call_args
        user_fields: VespaDocumentUserFields = call_args.kwargs["user_fields"]
        assert user_fields.personas is not None
        assert persona.id in user_fields.personas
        assert call_args.args[0] == str(uf.id)

    def test_clears_persona_sync_flag(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """After a successful sync the needs_persona_sync flag is cleared."""
        user = create_test_user(db_session, "sync_clear")
        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        lock_key = user_file_project_sync_lock_key(str(uf.id))
        redis_client.delete(lock_key)

        with patch(_PATCH_DISABLE_VDB, True):
            process_single_user_file_project_sync.run(
                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
            )

        db_session.refresh(uf)
        assert uf.needs_persona_sync is False

    def test_passes_both_project_and_persona_ids(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """A file linked to both a project and a persona gets both IDs."""
        from onyx.db.models import Project__UserFile
        from onyx.db.models import UserProject

        user = create_test_user(db_session, "sync_both")
        uf = _create_completed_user_file(
            db_session, user, needs_persona_sync=True, needs_project_sync=True
        )
        persona = _create_test_persona(db_session, user)
        _link_file_to_persona(db_session, persona, uf)

        project = UserProject(user_id=user.id, name="test-project", instructions="")
        db_session.add(project)
        db_session.commit()
        db_session.refresh(project)

        link = Project__UserFile(project_id=project.id, user_file_id=uf.id)
        db_session.add(link)
        db_session.commit()

        mock_doc_index = MagicMock()
        mock_search_settings = MagicMock()
        mock_search_settings.primary = MagicMock()
        mock_search_settings.secondary = None

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        lock_key = user_file_project_sync_lock_key(str(uf.id))
        redis_client.delete(lock_key)

        with (
            patch(_PATCH_DISABLE_VDB, False),
            patch(_PATCH_HTTPX_INIT),
            patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),
            patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),
        ):
            process_single_user_file_project_sync.run(
                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
            )

        call_kwargs = mock_doc_index.update_single.call_args.kwargs
        user_fields: VespaDocumentUserFields = call_kwargs["user_fields"]
        assert user_fields.personas is not None
        assert user_fields.user_projects is not None
        assert persona.id in user_fields.personas
        assert project.id in user_fields.user_projects

        # Both flags should be cleared
        db_session.refresh(uf)
        assert uf.needs_persona_sync is False
        assert uf.needs_project_sync is False

    def test_deleted_persona_excluded_from_ids(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """A soft-deleted persona should NOT appear in the persona_ids sent to Vespa."""
        user = create_test_user(db_session, "sync_deleted")
        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
        persona = _create_test_persona(db_session, user)
        _link_file_to_persona(db_session, persona, uf)

        persona.deleted = True
        db_session.commit()

        mock_doc_index = MagicMock()
        mock_search_settings = MagicMock()
        mock_search_settings.primary = MagicMock()
        mock_search_settings.secondary = None

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        lock_key = user_file_project_sync_lock_key(str(uf.id))
        redis_client.delete(lock_key)

        with (
            patch(_PATCH_DISABLE_VDB, False),
            patch(_PATCH_HTTPX_INIT),
            patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),
            patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),
        ):
            process_single_user_file_project_sync.run(
                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
            )

        call_kwargs = mock_doc_index.update_single.call_args.kwargs
        user_fields: VespaDocumentUserFields = call_kwargs["user_fields"]
        assert user_fields.personas is not None
        assert persona.id not in user_fields.personas


# ---------------------------------------------------------------------------
# Test: upsert_persona marks files for persona sync
# ---------------------------------------------------------------------------


class TestUpsertPersonaMarksSyncFlag:
    """upsert_persona must set needs_persona_sync on affected UserFiles."""

    def test_creating_persona_with_files_marks_sync(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        user = create_test_user(db_session, "upsert_create")
        uf = _create_completed_user_file(db_session, user)
        assert uf.needs_persona_sync is False

        upsert_persona(
            user=user,
            name=f"persona-{uuid4().hex[:8]}",
            description="test",
            llm_model_provider_override=None,
            llm_model_version_override=None,
            starter_messages=None,
            system_prompt="test",
            task_prompt="test",
            datetime_aware=None,
            is_public=True,
            db_session=db_session,
            user_file_ids=[uf.id],
        )

        db_session.refresh(uf)
        assert uf.needs_persona_sync is True

    def test_updating_persona_files_marks_both_old_and_new(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """When file associations change, both the removed and added files are flagged."""
        user = create_test_user(db_session, "upsert_update")
        uf_old = _create_completed_user_file(db_session, user)
        uf_new = _create_completed_user_file(db_session, user)

        persona = upsert_persona(
            user=user,
            name=f"persona-{uuid4().hex[:8]}",
            description="test",
            llm_model_provider_override=None,
            llm_model_version_override=None,
            starter_messages=None,
            system_prompt="test",
            task_prompt="test",
            datetime_aware=None,
            is_public=True,
            db_session=db_session,
            user_file_ids=[uf_old.id],
        )

        # Clear the flag from creation so we can observe the update
        uf_old.needs_persona_sync = False
        db_session.commit()

        # Now update the persona to swap files
        upsert_persona(
            user=user,
            name=persona.name,
            description=persona.description,
            llm_model_provider_override=None,
            llm_model_version_override=None,
            starter_messages=None,
            system_prompt=persona.system_prompt,
            task_prompt=persona.task_prompt,
            datetime_aware=None,
            is_public=persona.is_public,
            db_session=db_session,
            persona_id=persona.id,
            user_file_ids=[uf_new.id],
        )

        db_session.refresh(uf_old)
        db_session.refresh(uf_new)
        assert uf_old.needs_persona_sync is True, "Removed file should be flagged"
        assert uf_new.needs_persona_sync is True, "Added file should be flagged"

    def test_removing_all_files_marks_old_files(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Removing all files from a persona flags the previously associated files."""
        user = create_test_user(db_session, "upsert_remove")
        uf = _create_completed_user_file(db_session, user)

        persona = upsert_persona(
            user=user,
            name=f"persona-{uuid4().hex[:8]}",
            description="test",
            llm_model_provider_override=None,
            llm_model_version_override=None,
            starter_messages=None,
            system_prompt="test",
            task_prompt="test",
            datetime_aware=None,
            is_public=True,
            db_session=db_session,
            user_file_ids=[uf.id],
        )

        uf.needs_persona_sync = False
        db_session.commit()

        upsert_persona(
            user=user,
            name=persona.name,
            description=persona.description,
            llm_model_provider_override=None,
            llm_model_version_override=None,
            starter_messages=None,
            system_prompt=persona.system_prompt,
            task_prompt=persona.task_prompt,
            datetime_aware=None,
            is_public=persona.is_public,
            db_session=db_session,
            persona_id=persona.id,
            user_file_ids=[],
        )

        db_session.refresh(uf)
        assert uf.needs_persona_sync is True


================================================
FILE: backend/tests/external_dependency_unit/celery/test_pruning_hierarchy_nodes.py
================================================
"""
External dependency unit tests for pruning hierarchy node extraction and DB persistence.

Verifies that:
1. extract_ids_from_runnable_connector correctly separates hierarchy nodes from doc IDs
2. Extracted hierarchy nodes are correctly upserted to Postgres via upsert_hierarchy_nodes_batch
3. Upserting is idempotent (running twice doesn't duplicate nodes)
4. Document-to-hierarchy-node linkage is updated during pruning
5. link_hierarchy_nodes_to_documents links nodes that are also documents
6. HierarchyNodeByConnectorCredentialPair join table population and pruning
7. Orphaned hierarchy node deletion and re-parenting

Uses a mock SlimConnectorWithPermSync that yields known hierarchy nodes and slim documents,
combined with a real PostgreSQL database for verifying persistence.
"""

from collections.abc import Iterator
from typing import Any

from sqlalchemy.orm import Session

from onyx.access.models import ExternalAccess
from onyx.background.celery.celery_utils import extract_ids_from_runnable_connector
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import HierarchyNode as PydanticHierarchyNode
from onyx.connectors.models import InputType
from onyx.connectors.models import SlimDocument
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import HierarchyNodeType
from onyx.db.hierarchy import delete_orphaned_hierarchy_nodes
from onyx.db.hierarchy import ensure_source_node_exists
from onyx.db.hierarchy import get_all_hierarchy_nodes_for_source
from onyx.db.hierarchy import get_hierarchy_node_by_raw_id
from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
from onyx.db.hierarchy import remove_stale_hierarchy_node_cc_pair_entries
from onyx.db.hierarchy import reparent_orphaned_hierarchy_nodes
from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
from onyx.db.hierarchy import upsert_hierarchy_node_cc_pair_entries
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import Document as DbDocument
from onyx.db.models import HierarchyNode as DBHierarchyNode
from onyx.db.models import HierarchyNodeByConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.kg.models import KGStage

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

TEST_SOURCE = DocumentSource.SLACK

CHANNEL_A_ID = "C_GENERAL"
CHANNEL_A_NAME = "#general"
CHANNEL_B_ID = "C_RANDOM"
CHANNEL_B_NAME = "#random"
CHANNEL_C_ID = "C_ENGINEERING"
CHANNEL_C_NAME = "#engineering"

SLIM_DOC_IDS = ["msg-001", "msg-002", "msg-003"]


# ---------------------------------------------------------------------------
# Mock connector
# ---------------------------------------------------------------------------


def _make_hierarchy_nodes() -> list[PydanticHierarchyNode]:
    """Build a known set of hierarchy nodes resembling Slack channels."""
    return [
        PydanticHierarchyNode(
            raw_node_id=CHANNEL_A_ID,
            raw_parent_id=None,
            display_name=CHANNEL_A_NAME,
            link="https://slack.example.com/channels/general",
            node_type=HierarchyNodeType.CHANNEL,
            external_access=ExternalAccess(
                external_user_emails={"alice@example.com", "bob@example.com"},
                external_user_group_ids=set(),
                is_public=False,
            ),
        ),
        PydanticHierarchyNode(
            raw_node_id=CHANNEL_B_ID,
            raw_parent_id=None,
            display_name=CHANNEL_B_NAME,
            link="https://slack.example.com/channels/random",
            node_type=HierarchyNodeType.CHANNEL,
        ),
        PydanticHierarchyNode(
            raw_node_id=CHANNEL_C_ID,
            raw_parent_id=None,
            display_name=CHANNEL_C_NAME,
            link="https://slack.example.com/channels/engineering",
            node_type=HierarchyNodeType.CHANNEL,
            external_access=ExternalAccess(
                external_user_emails=set(),
                external_user_group_ids={"eng-team"},
                is_public=True,
            ),
        ),
    ]


DOC_PARENT_MAP = {
    "msg-001": CHANNEL_A_ID,
    "msg-002": CHANNEL_A_ID,
    "msg-003": CHANNEL_B_ID,
}


def _make_slim_docs() -> list[SlimDocument | PydanticHierarchyNode]:
    return [
        SlimDocument(id=doc_id, parent_hierarchy_raw_node_id=DOC_PARENT_MAP.get(doc_id))
        for doc_id in SLIM_DOC_IDS
    ]


class MockSlimConnectorWithPermSync(SlimConnectorWithPermSync):
    """Yields a batch containing interleaved hierarchy nodes and slim docs."""

    def load_credentials(
        self,
        credentials: dict[str, Any],  # noqa: ARG002
    ) -> dict[str, Any] | None:  # noqa: ARG002
        return None

    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002
    ) -> GenerateSlimDocumentOutput:
        return self._generate()

    def _generate(self) -> Iterator[list[SlimDocument | PydanticHierarchyNode]]:
        # First batch: hierarchy nodes + first slim doc
        batch_1: list[SlimDocument | PydanticHierarchyNode] = [
            *_make_hierarchy_nodes(),
            _make_slim_docs()[0],
        ]
        yield batch_1

        # Second batch: remaining slim docs only (no hierarchy nodes)
        yield _make_slim_docs()[1:]


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _create_cc_pair(
    db_session: Session,
    source: DocumentSource = TEST_SOURCE,
) -> ConnectorCredentialPair:
    """Create a real Connector + Credential + ConnectorCredentialPair for testing."""
    connector = Connector(
        name=f"Test {source.value} Connector",
        source=source,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={},
    )
    db_session.add(connector)
    db_session.flush()

    credential = Credential(
        source=source,
        credential_json={},
        admin_public=True,
    )
    db_session.add(credential)
    db_session.flush()
    db_session.expire(credential)

    cc_pair = ConnectorCredentialPair(
        connector_id=connector.id,
        credential_id=credential.id,
        name=f"Test {source.value} CC Pair",
        status=ConnectorCredentialPairStatus.ACTIVE,
        access_type=AccessType.PUBLIC,
    )
    db_session.add(cc_pair)
    db_session.commit()
    db_session.refresh(cc_pair)
    return cc_pair


def _cleanup_test_data(db_session: Session) -> None:
    """Remove all test hierarchy nodes and documents to isolate tests."""
    for doc_id in SLIM_DOC_IDS:
        db_session.query(DbDocument).filter(DbDocument.id == doc_id).delete()

    test_connector_ids_q = db_session.query(Connector.id).filter(
        Connector.source == TEST_SOURCE,
        Connector.name.like("Test %"),
    )

    db_session.query(HierarchyNodeByConnectorCredentialPair).filter(
        HierarchyNodeByConnectorCredentialPair.connector_id.in_(test_connector_ids_q)
    ).delete(synchronize_session="fetch")
    db_session.query(DBHierarchyNode).filter(
        DBHierarchyNode.source == TEST_SOURCE
    ).delete()
    db_session.flush()

    # Collect credential IDs before deleting cc_pairs (bulk query.delete()
    # bypasses ORM-level cascade, so credentials won't be auto-removed).
    credential_ids = [
        row[0]
        for row in db_session.query(ConnectorCredentialPair.credential_id)
        .filter(ConnectorCredentialPair.connector_id.in_(test_connector_ids_q))
        .all()
    ]

    db_session.query(ConnectorCredentialPair).filter(
        ConnectorCredentialPair.connector_id.in_(test_connector_ids_q)
    ).delete(synchronize_session="fetch")
    db_session.query(Connector).filter(
        Connector.source == TEST_SOURCE,
        Connector.name.like("Test %"),
    ).delete(synchronize_session="fetch")
    if credential_ids:
        db_session.query(Credential).filter(Credential.id.in_(credential_ids)).delete(
            synchronize_session="fetch"
        )
    db_session.commit()


def _create_test_documents(db_session: Session) -> list[DbDocument]:
    """Insert minimal Document rows for our test doc IDs."""
    docs = []
    for doc_id in SLIM_DOC_IDS:
        doc = DbDocument(
            id=doc_id,
            semantic_id=doc_id,
            kg_stage=KGStage.NOT_STARTED,
        )
        db_session.add(doc)
        docs.append(doc)
    db_session.commit()
    return docs


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


def test_pruning_extracts_hierarchy_nodes(
    db_session: Session,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """extract_ids_from_runnable_connector must separate hierarchy node IDs and
    document IDs into the correct buckets of the SlimConnectorExtractionResult."""
    connector = MockSlimConnectorWithPermSync()

    result = extract_ids_from_runnable_connector(connector, callback=None)

    # raw_id_to_parent should contain ONLY document IDs, not hierarchy node IDs
    assert result.raw_id_to_parent.keys() == set(SLIM_DOC_IDS)

    # Hierarchy nodes should be the 3 channels
    assert len(result.hierarchy_nodes) == 3
    extracted_raw_ids = {n.raw_node_id for n in result.hierarchy_nodes}
    assert extracted_raw_ids == {CHANNEL_A_ID, CHANNEL_B_ID, CHANNEL_C_ID}


def test_pruning_upserts_hierarchy_nodes_to_db(db_session: Session) -> None:
    """Full flow: extract hierarchy nodes from mock connector, upsert to Postgres,
    then verify the DB state (node count, parent relationships, permissions)."""
    _cleanup_test_data(db_session)

    # Step 1: ensure the SOURCE node exists (mirrors what the pruning task does)
    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)

    # Step 2: extract from mock connector
    connector = MockSlimConnectorWithPermSync()
    result = extract_ids_from_runnable_connector(connector, callback=None)
    assert len(result.hierarchy_nodes) == 3

    # Step 3: upsert hierarchy nodes (public connector = False)
    upserted = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=result.hierarchy_nodes,
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )
    assert len(upserted) == 3

    # Step 4: verify DB state
    all_nodes = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)
    # 3 channel nodes + 1 SOURCE node
    assert len(all_nodes) == 4

    # Verify each channel node
    channel_a = get_hierarchy_node_by_raw_id(db_session, CHANNEL_A_ID, TEST_SOURCE)
    assert channel_a is not None
    assert channel_a.display_name == CHANNEL_A_NAME
    assert channel_a.node_type == HierarchyNodeType.CHANNEL
    assert channel_a.link == "https://slack.example.com/channels/general"
    # Parent should be the SOURCE node (raw_parent_id was None)
    assert channel_a.parent_id == source_node.id
    # Permission fields for channel A (private, has user emails)
    assert channel_a.is_public is False
    assert channel_a.external_user_emails is not None
    assert set(channel_a.external_user_emails) == {
        "alice@example.com",
        "bob@example.com",
    }

    channel_b = get_hierarchy_node_by_raw_id(db_session, CHANNEL_B_ID, TEST_SOURCE)
    assert channel_b is not None
    assert channel_b.display_name == CHANNEL_B_NAME
    assert channel_b.parent_id == source_node.id
    # Channel B has no external_access -> defaults to not public, no emails/groups
    assert channel_b.is_public is False
    assert channel_b.external_user_emails is None
    assert channel_b.external_user_group_ids is None

    channel_c = get_hierarchy_node_by_raw_id(db_session, CHANNEL_C_ID, TEST_SOURCE)
    assert channel_c is not None
    assert channel_c.display_name == CHANNEL_C_NAME
    assert channel_c.parent_id == source_node.id
    # Channel C is public and has a group
    assert channel_c.is_public is True
    assert channel_c.external_user_group_ids is not None
    assert set(channel_c.external_user_group_ids) == {"eng-team"}


def test_pruning_upserts_hierarchy_nodes_public_connector(
    db_session: Session,
) -> None:
    """When the connector's access type is PUBLIC, all hierarchy nodes must be
    marked is_public=True regardless of their external_access settings."""
    _cleanup_test_data(db_session)

    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)

    connector = MockSlimConnectorWithPermSync()
    result = extract_ids_from_runnable_connector(connector, callback=None)

    upserted = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=result.hierarchy_nodes,
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=True,
    )
    assert len(upserted) == 3

    # Every node should be public
    for node in upserted:
        assert node.is_public is True
        # Public connector forces emails/groups to None
        assert node.external_user_emails is None
        assert node.external_user_group_ids is None


def test_pruning_hierarchy_node_upsert_idempotency(db_session: Session) -> None:
    """Upserting the same hierarchy nodes twice must not create duplicates.
    The second call should update existing rows in place."""
    _cleanup_test_data(db_session)

    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)

    nodes = _make_hierarchy_nodes()

    # First upsert
    first_result = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=nodes,
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )
    first_ids = {n.id for n in first_result}
    all_after_first = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)
    count_after_first = len(all_after_first)

    # Second upsert with the same nodes
    second_result = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=nodes,
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )
    second_ids = {n.id for n in second_result}
    all_after_second = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)
    count_after_second = len(all_after_second)

    # No new rows should have been created
    assert count_after_first == count_after_second
    # Same DB primary keys should have been returned
    assert first_ids == second_ids


def test_pruning_hierarchy_node_upsert_updates_fields(db_session: Session) -> None:
    """Upserting a hierarchy node with changed fields should update the existing row."""
    _cleanup_test_data(db_session)

    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)

    original_node = PydanticHierarchyNode(
        raw_node_id=CHANNEL_A_ID,
        raw_parent_id=None,
        display_name=CHANNEL_A_NAME,
        link="https://slack.example.com/channels/general",
        node_type=HierarchyNodeType.CHANNEL,
    )
    upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=[original_node],
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )

    # Now upsert again with updated display_name and permissions
    updated_node = PydanticHierarchyNode(
        raw_node_id=CHANNEL_A_ID,
        raw_parent_id=None,
        display_name="#general-renamed",
        link="https://slack.example.com/channels/general-renamed",
        node_type=HierarchyNodeType.CHANNEL,
        external_access=ExternalAccess(
            external_user_emails={"new_user@example.com"},
            external_user_group_ids=set(),
            is_public=True,
        ),
    )
    upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=[updated_node],
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )

    db_node = get_hierarchy_node_by_raw_id(db_session, CHANNEL_A_ID, TEST_SOURCE)
    assert db_node is not None
    assert db_node.display_name == "#general-renamed"
    assert db_node.link == "https://slack.example.com/channels/general-renamed"
    assert db_node.is_public is True
    assert db_node.external_user_emails is not None
    assert set(db_node.external_user_emails) == {"new_user@example.com"}


# ---------------------------------------------------------------------------
# Document-to-hierarchy-node linkage tests
# ---------------------------------------------------------------------------


def test_extraction_preserves_parent_hierarchy_raw_node_id(
    db_session: Session,  # noqa: ARG001
) -> None:
    """extract_ids_from_runnable_connector should carry the
    parent_hierarchy_raw_node_id from SlimDocument into the raw_id_to_parent dict."""
    connector = MockSlimConnectorWithPermSync()
    result = extract_ids_from_runnable_connector(connector, callback=None)

    for doc_id, expected_parent in DOC_PARENT_MAP.items():
        assert (
            result.raw_id_to_parent[doc_id] == expected_parent
        ), f"raw_id_to_parent[{doc_id}] should be {expected_parent}"

    # Hierarchy node IDs should NOT be in raw_id_to_parent
    for channel_id in [CHANNEL_A_ID, CHANNEL_B_ID, CHANNEL_C_ID]:
        assert channel_id not in result.raw_id_to_parent


def test_update_document_parent_hierarchy_nodes(db_session: Session) -> None:
    """update_document_parent_hierarchy_nodes should set
    Document.parent_hierarchy_node_id for each document in the mapping."""
    _cleanup_test_data(db_session)

    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
    upserted = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=_make_hierarchy_nodes(),
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )
    node_id_by_raw = {n.raw_node_id: n.id for n in upserted}

    # Create documents with no parent set
    docs = _create_test_documents(db_session)
    for doc in docs:
        assert doc.parent_hierarchy_node_id is None

    # Build resolved map (same logic as _resolve_and_update_document_parents)
    resolved: dict[str, int | None] = {}
    for doc_id, raw_parent in DOC_PARENT_MAP.items():
        resolved[doc_id] = node_id_by_raw.get(raw_parent, source_node.id)

    updated = update_document_parent_hierarchy_nodes(
        db_session=db_session,
        doc_parent_map=resolved,
        commit=True,
    )
    assert updated == len(SLIM_DOC_IDS)

    # Verify each document now points to the correct hierarchy node
    db_session.expire_all()
    for doc_id, raw_parent in DOC_PARENT_MAP.items():
        tmp_doc = db_session.get(DbDocument, doc_id)
        assert tmp_doc is not None
        doc = tmp_doc
        expected_node_id = node_id_by_raw[raw_parent]
        assert (
            doc.parent_hierarchy_node_id == expected_node_id
        ), f"Document {doc_id} should point to node for {raw_parent}"


def test_update_document_parent_is_idempotent(db_session: Session) -> None:
    """Running update_document_parent_hierarchy_nodes a second time with the
    same mapping should update zero rows."""
    _cleanup_test_data(db_session)

    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
    upserted = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=_make_hierarchy_nodes(),
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )
    node_id_by_raw = {n.raw_node_id: n.id for n in upserted}
    _create_test_documents(db_session)

    resolved: dict[str, int | None] = {
        doc_id: node_id_by_raw[raw_parent]
        for doc_id, raw_parent in DOC_PARENT_MAP.items()
    }

    first_updated = update_document_parent_hierarchy_nodes(
        db_session=db_session,
        doc_parent_map=resolved,
        commit=True,
    )
    assert first_updated == len(SLIM_DOC_IDS)

    second_updated = update_document_parent_hierarchy_nodes(
        db_session=db_session,
        doc_parent_map=resolved,
        commit=True,
    )
    assert second_updated == 0


def test_link_hierarchy_nodes_to_documents_for_confluence(
    db_session: Session,
) -> None:
    """For sources in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS (e.g. Confluence),
    link_hierarchy_nodes_to_documents should set HierarchyNode.document_id
    when a hierarchy node's raw_node_id matches a document ID."""
    _cleanup_test_data(db_session)
    confluence_source = DocumentSource.CONFLUENCE

    # Clean up any existing Confluence hierarchy nodes
    db_session.query(DBHierarchyNode).filter(
        DBHierarchyNode.source == confluence_source
    ).delete()
    db_session.commit()

    ensure_source_node_exists(db_session, confluence_source, commit=True)

    # Create a hierarchy node whose raw_node_id matches a document ID
    page_node_id = "confluence-page-123"
    nodes = [
        PydanticHierarchyNode(
            raw_node_id=page_node_id,
            raw_parent_id=None,
            display_name="Test Page",
            link="https://wiki.example.com/page/123",
            node_type=HierarchyNodeType.PAGE,
        ),
    ]
    upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=nodes,
        source=confluence_source,
        commit=True,
        is_connector_public=False,
    )

    # Verify the node exists but has no document_id yet
    db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)
    assert db_node is not None
    assert db_node.document_id is None

    # Create a document with the same ID as the hierarchy node
    doc = DbDocument(
        id=page_node_id,
        semantic_id="Test Page",
        kg_stage=KGStage.NOT_STARTED,
    )
    db_session.add(doc)
    db_session.commit()

    # Link nodes to documents
    linked = link_hierarchy_nodes_to_documents(
        db_session=db_session,
        document_ids=[page_node_id],
        source=confluence_source,
        commit=True,
    )
    assert linked == 1

    # Verify the hierarchy node now has document_id set
    db_session.expire_all()
    db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)
    assert db_node is not None
    assert db_node.document_id == page_node_id

    # Cleanup
    db_session.query(DbDocument).filter(DbDocument.id == page_node_id).delete()
    db_session.query(DBHierarchyNode).filter(
        DBHierarchyNode.source == confluence_source
    ).delete()
    db_session.commit()


def test_link_hierarchy_nodes_skips_non_hierarchy_sources(
    db_session: Session,
) -> None:
    """link_hierarchy_nodes_to_documents should return 0 for sources that
    don't support hierarchy-node-as-document (e.g. Slack, Google Drive)."""
    linked = link_hierarchy_nodes_to_documents(
        db_session=db_session,
        document_ids=SLIM_DOC_IDS,
        source=TEST_SOURCE,  # Slack — not in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS
        commit=False,
    )
    assert linked == 0


# ---------------------------------------------------------------------------
# Join table + pruning tests
# ---------------------------------------------------------------------------


def test_upsert_hierarchy_node_cc_pair_entries(db_session: Session) -> None:
    """upsert_hierarchy_node_cc_pair_entries should insert rows and be idempotent."""
    _cleanup_test_data(db_session)
    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
    cc_pair = _create_cc_pair(db_session)

    upserted = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=_make_hierarchy_nodes(),
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )
    node_ids = [n.id for n in upserted]

    # First call — should insert rows
    upsert_hierarchy_node_cc_pair_entries(
        db_session=db_session,
        hierarchy_node_ids=node_ids,
        connector_id=cc_pair.connector_id,
        credential_id=cc_pair.credential_id,
        commit=True,
    )

    rows = (
        db_session.query(HierarchyNodeByConnectorCredentialPair)
        .filter(
            HierarchyNodeByConnectorCredentialPair.connector_id == cc_pair.connector_id,
            HierarchyNodeByConnectorCredentialPair.credential_id
            == cc_pair.credential_id,
        )
        .all()
    )
    assert len(rows) == 3

    # Second call — idempotent, same count
    upsert_hierarchy_node_cc_pair_entries(
        db_session=db_session,
        hierarchy_node_ids=node_ids,
        connector_id=cc_pair.connector_id,
        credential_id=cc_pair.credential_id,
        commit=True,
    )
    rows_after = (
        db_session.query(HierarchyNodeByConnectorCredentialPair)
        .filter(
            HierarchyNodeByConnectorCredentialPair.connector_id == cc_pair.connector_id,
            HierarchyNodeByConnectorCredentialPair.credential_id
            == cc_pair.credential_id,
        )
        .all()
    )
    assert len(rows_after) == 3


def test_remove_stale_entries_and_delete_orphans(db_session: Session) -> None:
    """After removing stale join-table entries, orphaned hierarchy nodes should
    be deleted and the SOURCE node should survive."""
    _cleanup_test_data(db_session)
    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
    cc_pair = _create_cc_pair(db_session)

    upserted = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=_make_hierarchy_nodes(),
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )
    all_ids = [n.id for n in upserted]
    upsert_hierarchy_node_cc_pair_entries(
        db_session=db_session,
        hierarchy_node_ids=all_ids,
        connector_id=cc_pair.connector_id,
        credential_id=cc_pair.credential_id,
        commit=True,
    )

    # Now simulate a pruning run where only channel A survived
    channel_a = get_hierarchy_node_by_raw_id(db_session, CHANNEL_A_ID, TEST_SOURCE)
    assert channel_a is not None
    live_ids = {channel_a.id}

    stale_removed = remove_stale_hierarchy_node_cc_pair_entries(
        db_session=db_session,
        connector_id=cc_pair.connector_id,
        credential_id=cc_pair.credential_id,
        live_hierarchy_node_ids=live_ids,
        commit=True,
    )
    assert stale_removed == 2

    # Delete orphaned nodes
    deleted_raw_ids = delete_orphaned_hierarchy_nodes(
        db_session=db_session,
        source=TEST_SOURCE,
        commit=True,
    )
    assert set(deleted_raw_ids) == {CHANNEL_B_ID, CHANNEL_C_ID}

    # Verify only channel A + SOURCE remain
    remaining = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)
    remaining_raw = {n.raw_node_id for n in remaining}
    assert remaining_raw == {CHANNEL_A_ID, source_node.raw_node_id}


def test_multi_cc_pair_prevents_premature_deletion(db_session: Session) -> None:
    """A hierarchy node shared by two cc_pairs should NOT be deleted when only
    one cc_pair removes its association."""
    _cleanup_test_data(db_session)
    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
    cc_pair_1 = _create_cc_pair(db_session)
    cc_pair_2 = _create_cc_pair(db_session)

    upserted = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=_make_hierarchy_nodes(),
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )
    all_ids = [n.id for n in upserted]

    # cc_pair 1 owns all 3
    upsert_hierarchy_node_cc_pair_entries(
        db_session=db_session,
        hierarchy_node_ids=all_ids,
        connector_id=cc_pair_1.connector_id,
        credential_id=cc_pair_1.credential_id,
        commit=True,
    )
    # cc_pair 2 also owns all 3
    upsert_hierarchy_node_cc_pair_entries(
        db_session=db_session,
        hierarchy_node_ids=all_ids,
        connector_id=cc_pair_2.connector_id,
        credential_id=cc_pair_2.credential_id,
        commit=True,
    )

    # cc_pair 1 prunes — keeps none
    remove_stale_hierarchy_node_cc_pair_entries(
        db_session=db_session,
        connector_id=cc_pair_1.connector_id,
        credential_id=cc_pair_1.credential_id,
        live_hierarchy_node_ids=set(),
        commit=True,
    )

    # Orphan deletion should find nothing because cc_pair 2 still references them
    deleted = delete_orphaned_hierarchy_nodes(
        db_session=db_session,
        source=TEST_SOURCE,
        commit=True,
    )
    assert deleted == []

    # All 3 nodes + SOURCE should still exist
    remaining = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)
    assert len(remaining) == 4


def test_reparent_orphaned_children(db_session: Session) -> None:
    """After deleting a parent hierarchy node, its children should be
    re-parented to the SOURCE node."""
    _cleanup_test_data(db_session)
    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
    cc_pair = _create_cc_pair(db_session)

    # Create a parent node and a child node
    parent_node = PydanticHierarchyNode(
        raw_node_id="PARENT",
        raw_parent_id=None,
        display_name="Parent",
        node_type=HierarchyNodeType.CHANNEL,
    )
    child_node = PydanticHierarchyNode(
        raw_node_id="CHILD",
        raw_parent_id="PARENT",
        display_name="Child",
        node_type=HierarchyNodeType.CHANNEL,
    )
    upserted = upsert_hierarchy_nodes_batch(
        db_session=db_session,
        nodes=[parent_node, child_node],
        source=TEST_SOURCE,
        commit=True,
        is_connector_public=False,
    )
    assert len(upserted) == 2

    parent_db = get_hierarchy_node_by_raw_id(db_session, "PARENT", TEST_SOURCE)
    child_db = get_hierarchy_node_by_raw_id(db_session, "CHILD", TEST_SOURCE)
    assert parent_db is not None and child_db is not None
    assert child_db.parent_id == parent_db.id

    # Associate only the child with a cc_pair (parent is orphaned)
    upsert_hierarchy_node_cc_pair_entries(
        db_session=db_session,
        hierarchy_node_ids=[child_db.id],
        connector_id=cc_pair.connector_id,
        credential_id=cc_pair.credential_id,
        commit=True,
    )

    # Delete orphaned nodes (parent has no cc_pair entry)
    deleted = delete_orphaned_hierarchy_nodes(
        db_session=db_session,
        source=TEST_SOURCE,
        commit=True,
    )
    assert "PARENT" in deleted

    # Child should now have parent_id=NULL (SET NULL cascade)
    db_session.expire_all()
    child_db = get_hierarchy_node_by_raw_id(db_session, "CHILD", TEST_SOURCE)
    assert child_db is not None
    assert child_db.parent_id is None

    # Re-parent orphans to SOURCE
    reparented = reparent_orphaned_hierarchy_nodes(
        db_session=db_session,
        source=TEST_SOURCE,
        commit=True,
    )
    assert len(reparented) == 1

    db_session.expire_all()
    child_db = get_hierarchy_node_by_raw_id(db_session, "CHILD", TEST_SOURCE)
    assert child_db is not None
    assert child_db.parent_id == source_node.id


================================================
FILE: backend/tests/external_dependency_unit/celery/test_user_file_delete_queue.py
================================================
"""
External dependency unit tests for user file delete queue protections.

Verifies that the three mechanisms added to check_for_user_file_delete work
correctly:

1. Queue depth backpressure – when the broker queue exceeds
   USER_FILE_DELETE_MAX_QUEUE_DEPTH, no new tasks are enqueued.

2. Per-file Redis guard key – if the guard key for a file already exists in
   Redis, that file is skipped even though it is still in DELETING status.

3. Task expiry – every send_task call carries expires=
   CELERY_USER_FILE_DELETE_TASK_EXPIRES so that stale queued tasks are
   discarded by workers automatically.

Also verifies that delete_user_file_impl clears the guard key the moment
it is picked up by a worker.

Uses real Redis (DB 0 via get_redis_client) and real PostgreSQL for UserFile
rows.  The Celery app is provided as a MagicMock injected via a PropertyMock
on the task class so no real broker is needed.
"""

from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from unittest.mock import PropertyMock
from uuid import uuid4

from sqlalchemy.orm import Session

from onyx.background.celery.tasks.user_file_processing.tasks import (
    _user_file_delete_lock_key,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    _user_file_delete_queued_key,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    check_for_user_file_delete,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    process_single_user_file_delete,
)
from onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH
from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.redis.redis_pool import get_redis_client
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.constants import TEST_TENANT_ID

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_PATCH_QUEUE_LEN = (
    "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_queue_length"
)


def _create_deleting_user_file(db_session: Session, user_id: object) -> UserFile:
    """Insert a UserFile in DELETING status and return it."""
    uf = UserFile(
        id=uuid4(),
        user_id=user_id,
        file_id=f"test_file_{uuid4().hex[:8]}",
        name=f"test_{uuid4().hex[:8]}.txt",
        file_type="text/plain",
        status=UserFileStatus.DELETING,
    )
    db_session.add(uf)
    db_session.commit()
    db_session.refresh(uf)
    return uf


@contextmanager
def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:
    """Patch the ``app`` property on *task*'s class so that ``self.app``
    inside the task function returns *mock_app*.

    With ``bind=True``, ``task.run`` is a bound method whose ``__self__`` is
    the actual task instance.  We patch ``app`` on that instance's class
    (a unique Celery-generated Task subclass) so the mock is scoped to this
    task only.

    Also patches ``celery_get_broker_client`` so the mock app doesn't need
    a real broker URL.
    """
    task_instance = task.run.__self__
    with (
        patch.object(
            type(task_instance),
            "app",
            new_callable=PropertyMock,
            return_value=mock_app,
        ),
        patch(
            "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client",
            return_value=MagicMock(),
        ),
    ):
        yield


# ---------------------------------------------------------------------------
# Test classes
# ---------------------------------------------------------------------------


class TestDeleteQueueDepthBackpressure:
    """Protection 1: skip all enqueuing when the broker queue is too deep."""

    def test_no_tasks_enqueued_when_queue_over_limit(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """When the queue depth exceeds the limit the beat cycle is skipped."""
        user = create_test_user(db_session, "del_bp_user")
        _create_deleting_user_file(db_session, user.id)

        mock_app = MagicMock()

        with (
            _patch_task_app(check_for_user_file_delete, mock_app),
            patch(_PATCH_QUEUE_LEN, return_value=USER_FILE_DELETE_MAX_QUEUE_DEPTH + 1),
        ):
            check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)

        mock_app.send_task.assert_not_called()


class TestDeletePerFileGuardKey:
    """Protection 2: per-file Redis guard key prevents duplicate enqueue."""

    def test_guarded_file_not_re_enqueued(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """A file whose guard key is already set in Redis is skipped."""
        user = create_test_user(db_session, "del_guard_user")
        uf = _create_deleting_user_file(db_session, user.id)

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        guard_key = _user_file_delete_queued_key(uf.id)
        redis_client.setex(guard_key, CELERY_USER_FILE_DELETE_TASK_EXPIRES, 1)

        mock_app = MagicMock()

        try:
            with (
                _patch_task_app(check_for_user_file_delete, mock_app),
                patch(_PATCH_QUEUE_LEN, return_value=0),
            ):
                check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)

            # send_task must not have been called with this specific file's ID
            for call in mock_app.send_task.call_args_list:
                kwargs = call.kwargs.get("kwargs", {})
                assert kwargs.get("user_file_id") != str(
                    uf.id
                ), f"File {uf.id} should have been skipped because its guard key exists"
        finally:
            redis_client.delete(guard_key)

    def test_guard_key_exists_in_redis_after_enqueue(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """After a file is enqueued its guard key is present in Redis with a TTL."""
        user = create_test_user(db_session, "del_guard_set_user")
        uf = _create_deleting_user_file(db_session, user.id)

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        guard_key = _user_file_delete_queued_key(uf.id)
        redis_client.delete(guard_key)  # clean slate

        mock_app = MagicMock()

        try:
            with (
                _patch_task_app(check_for_user_file_delete, mock_app),
                patch(_PATCH_QUEUE_LEN, return_value=0),
            ):
                check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)

            assert redis_client.exists(
                guard_key
            ), "Guard key should be set in Redis after enqueue"
            ttl = int(redis_client.ttl(guard_key))  # type: ignore[arg-type]
            assert (
                0 < ttl <= CELERY_USER_FILE_DELETE_TASK_EXPIRES
            ), f"Guard key TTL {ttl}s is outside the expected range (0, {CELERY_USER_FILE_DELETE_TASK_EXPIRES}]"
        finally:
            redis_client.delete(guard_key)


class TestDeleteTaskExpiry:
    """Protection 3: every send_task call includes an expires value."""

    def test_send_task_called_with_expires(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """send_task is called with the correct queue, task name, and expires."""
        user = create_test_user(db_session, "del_expires_user")
        uf = _create_deleting_user_file(db_session, user.id)

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        guard_key = _user_file_delete_queued_key(uf.id)
        redis_client.delete(guard_key)

        mock_app = MagicMock()

        try:
            with (
                _patch_task_app(check_for_user_file_delete, mock_app),
                patch(_PATCH_QUEUE_LEN, return_value=0),
            ):
                check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)

            # At least one task should have been submitted (for our file)
            assert (
                mock_app.send_task.call_count >= 1
            ), "Expected at least one task to be submitted"

            # Every submitted task must carry expires
            for call in mock_app.send_task.call_args_list:
                assert call.args[0] == OnyxCeleryTask.DELETE_SINGLE_USER_FILE
                assert call.kwargs.get("queue") == OnyxCeleryQueues.USER_FILE_DELETE
                assert (
                    call.kwargs.get("expires") == CELERY_USER_FILE_DELETE_TASK_EXPIRES
                ), "Task must be submitted with the correct expires value to prevent stale task accumulation"
        finally:
            redis_client.delete(guard_key)


class TestDeleteWorkerClearsGuardKey:
    """process_single_user_file_delete removes the guard key when it picks up a task."""

    def test_guard_key_deleted_on_pickup(
        self,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """The guard key is deleted before the worker does any real work.

        We simulate an already-locked file so delete_user_file_impl returns
        early – but crucially, after the guard key deletion.
        """
        user_file_id = str(uuid4())

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        guard_key = _user_file_delete_queued_key(user_file_id)

        # Simulate the guard key set when the beat enqueued the task
        redis_client.setex(guard_key, CELERY_USER_FILE_DELETE_TASK_EXPIRES, 1)
        assert redis_client.exists(guard_key), "Guard key must exist before pickup"

        # Hold the per-file delete lock so the worker exits early without
        # touching the database or file store.
        lock_key = _user_file_delete_lock_key(user_file_id)
        delete_lock = redis_client.lock(lock_key, timeout=10)
        acquired = delete_lock.acquire(blocking=False)
        assert acquired, "Should be able to acquire the delete lock for this test"

        try:
            process_single_user_file_delete.run(
                user_file_id=user_file_id,
                tenant_id=TEST_TENANT_ID,
            )
        finally:
            if delete_lock.owned():
                delete_lock.release()

        assert not redis_client.exists(
            guard_key
        ), "Guard key should be deleted when the worker picks up the task"


================================================
FILE: backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
================================================
"""
External dependency unit tests for UserFileIndexingAdapter metadata writing.

Validates that prepare_enrichment produces DocMetadataAwareIndexChunk
objects with both `user_project` and `personas` fields populated correctly
based on actual DB associations.

Uses real PostgreSQL for UserFile/Persona/UserProject rows.
Mocks the LLM tokenizer and file store since they are not relevant here.
"""

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.db.enums import UserFileStatus
from onyx.db.models import Persona
from onyx.db.models import Persona__UserFile
from onyx.db.models import Project__UserFile
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserProject
from onyx.indexing.adapters.user_file_indexing_adapter import UserFileIndexingAdapter
from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import IndexChunk
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.constants import TEST_TENANT_ID

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _create_user_file(db_session: Session, user: User) -> UserFile:
    uf = UserFile(
        id=uuid4(),
        user_id=user.id,
        file_id=f"test_file_{uuid4().hex[:8]}",
        name=f"test_{uuid4().hex[:8]}.txt",
        file_type="text/plain",
        status=UserFileStatus.COMPLETED,
        chunk_count=1,
    )
    db_session.add(uf)
    db_session.commit()
    db_session.refresh(uf)
    return uf


def _create_persona(db_session: Session, user: User) -> Persona:
    persona = Persona(
        name=f"Test Persona {uuid4().hex[:8]}",
        description="Test persona",
        system_prompt="test",
        task_prompt="test",
        tools=[],
        document_sets=[],
        users=[user],
        groups=[],
        is_listed=True,
        is_public=True,
        display_priority=None,
        starter_messages=None,
        deleted=False,
        user_id=user.id,
    )
    db_session.add(persona)
    db_session.commit()
    db_session.refresh(persona)
    return persona


def _create_project(db_session: Session, user: User) -> UserProject:
    project = UserProject(
        user_id=user.id,
        name=f"project-{uuid4().hex[:8]}",
        instructions="",
    )
    db_session.add(project)
    db_session.commit()
    db_session.refresh(project)
    return project


def _make_index_chunk(user_file: UserFile) -> IndexChunk:
    """Build a minimal IndexChunk whose source document ID matches the UserFile."""
    doc = Document(
        id=str(user_file.id),
        source=DocumentSource.USER_FILE,
        semantic_identifier=user_file.name,
        sections=[TextSection(text="test chunk content", link=None)],
        metadata={},
    )
    return IndexChunk(
        source_document=doc,
        chunk_id=0,
        blurb="test chunk",
        content="test chunk content",
        source_links={0: ""},
        image_file_id=None,
        section_continuation=False,
        title_prefix="",
        metadata_suffix_semantic="",
        metadata_suffix_keyword="",
        contextual_rag_reserved_tokens=0,
        doc_summary="",
        chunk_context="",
        mini_chunk_texts=None,
        large_chunk_id=None,
        embeddings=ChunkEmbedding(
            full_embedding=[0.0] * 768,
            mini_chunk_embeddings=[],
        ),
        title_embedding=None,
    )


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


class TestAdapterWritesBothMetadataFields:
    """prepare_enrichment must populate user_project AND personas."""

    @patch(
        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
        side_effect=Exception("no LLM in test"),
    )
    def test_file_linked_to_persona_gets_persona_id(
        self,
        _mock_llm: MagicMock,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        user = create_test_user(db_session, "adapter_persona")
        uf = _create_user_file(db_session, user)
        persona = _create_persona(db_session, user)

        db_session.add(Persona__UserFile(persona_id=persona.id, user_file_id=uf.id))
        db_session.commit()

        adapter = UserFileIndexingAdapter(
            tenant_id=TEST_TENANT_ID, db_session=db_session
        )
        chunk = _make_index_chunk(uf)
        doc = chunk.source_document
        context = DocumentBatchPrepareContext(updatable_docs=[doc], id_to_boost_map={})

        enricher = adapter.prepare_enrichment(
            context=context,
            tenant_id=TEST_TENANT_ID,
            chunks=[chunk],
        )
        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

        assert persona.id in aware_chunk.personas
        assert aware_chunk.user_project == []

    @patch(
        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
        side_effect=Exception("no LLM in test"),
    )
    def test_file_linked_to_project_gets_project_id(
        self,
        _mock_llm: MagicMock,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        user = create_test_user(db_session, "adapter_project")
        uf = _create_user_file(db_session, user)
        project = _create_project(db_session, user)

        db_session.add(Project__UserFile(project_id=project.id, user_file_id=uf.id))
        db_session.commit()

        adapter = UserFileIndexingAdapter(
            tenant_id=TEST_TENANT_ID, db_session=db_session
        )
        chunk = _make_index_chunk(uf)
        context = DocumentBatchPrepareContext(
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

        enricher = adapter.prepare_enrichment(
            context=context,
            tenant_id=TEST_TENANT_ID,
            chunks=[chunk],
        )
        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

        assert project.id in aware_chunk.user_project
        assert aware_chunk.personas == []

    @patch(
        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
        side_effect=Exception("no LLM in test"),
    )
    def test_file_linked_to_both_gets_both_ids(
        self,
        _mock_llm: MagicMock,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        user = create_test_user(db_session, "adapter_both")
        uf = _create_user_file(db_session, user)
        persona = _create_persona(db_session, user)
        project = _create_project(db_session, user)

        db_session.add(Persona__UserFile(persona_id=persona.id, user_file_id=uf.id))
        db_session.add(Project__UserFile(project_id=project.id, user_file_id=uf.id))
        db_session.commit()

        adapter = UserFileIndexingAdapter(
            tenant_id=TEST_TENANT_ID, db_session=db_session
        )
        chunk = _make_index_chunk(uf)
        context = DocumentBatchPrepareContext(
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

        enricher = adapter.prepare_enrichment(
            context=context,
            tenant_id=TEST_TENANT_ID,
            chunks=[chunk],
        )
        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

        assert persona.id in aware_chunk.personas
        assert project.id in aware_chunk.user_project

    @patch(
        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
        side_effect=Exception("no LLM in test"),
    )
    def test_file_with_no_associations_gets_empty_lists(
        self,
        _mock_llm: MagicMock,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        user = create_test_user(db_session, "adapter_empty")
        uf = _create_user_file(db_session, user)

        adapter = UserFileIndexingAdapter(
            tenant_id=TEST_TENANT_ID, db_session=db_session
        )
        chunk = _make_index_chunk(uf)
        context = DocumentBatchPrepareContext(
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

        enricher = adapter.prepare_enrichment(
            context=context,
            tenant_id=TEST_TENANT_ID,
            chunks=[chunk],
        )
        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

        assert aware_chunk.personas == []
        assert aware_chunk.user_project == []

    @patch(
        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
        side_effect=Exception("no LLM in test"),
    )
    def test_multiple_personas_all_appear(
        self,
        _mock_llm: MagicMock,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """A file linked to multiple personas should have all their IDs."""
        user = create_test_user(db_session, "adapter_multi")
        uf = _create_user_file(db_session, user)
        persona_a = _create_persona(db_session, user)
        persona_b = _create_persona(db_session, user)

        db_session.add(Persona__UserFile(persona_id=persona_a.id, user_file_id=uf.id))
        db_session.add(Persona__UserFile(persona_id=persona_b.id, user_file_id=uf.id))
        db_session.commit()

        adapter = UserFileIndexingAdapter(
            tenant_id=TEST_TENANT_ID, db_session=db_session
        )
        chunk = _make_index_chunk(uf)
        context = DocumentBatchPrepareContext(
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

        enricher = adapter.prepare_enrichment(
            context=context,
            tenant_id=TEST_TENANT_ID,
            chunks=[chunk],
        )
        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

        assert set(aware_chunk.personas) == {persona_a.id, persona_b.id}


================================================
FILE: backend/tests/external_dependency_unit/celery/test_user_file_processing_queue.py
================================================
"""
External dependency unit tests for user file processing queue protections.

Verifies that the three mechanisms added to check_user_file_processing work
correctly:

1. Queue depth backpressure – when the broker queue exceeds
   USER_FILE_PROCESSING_MAX_QUEUE_DEPTH, no new tasks are enqueued.

2. Per-file Redis guard key – if the guard key for a file already exists in
   Redis, that file is skipped even though it is still in PROCESSING status.

3. Task expiry – every send_task call carries expires=
   CELERY_USER_FILE_PROCESSING_TASK_EXPIRES so that stale queued tasks are
   discarded by workers automatically.

Also verifies that process_single_user_file clears the guard key the moment
it is picked up by a worker.

Uses real Redis (DB 0 via get_redis_client) and real PostgreSQL for UserFile
rows.  The Celery app is provided as a MagicMock injected via a PropertyMock
on the task class so no real broker is needed.
"""

from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from unittest.mock import PropertyMock
from uuid import uuid4

from sqlalchemy.orm import Session

from onyx.background.celery.tasks.user_file_processing.tasks import (
    _user_file_lock_key,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    _user_file_queued_key,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    check_user_file_processing,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    process_single_user_file,
)
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.redis.redis_pool import get_redis_client
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.constants import TEST_TENANT_ID

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_PATCH_QUEUE_LEN = (
    "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_queue_length"
)


def _create_processing_user_file(db_session: Session, user_id: object) -> UserFile:
    """Insert a UserFile in PROCESSING status and return it."""
    uf = UserFile(
        id=uuid4(),
        user_id=user_id,
        file_id=f"test_file_{uuid4().hex[:8]}",
        name=f"test_{uuid4().hex[:8]}.txt",
        file_type="text/plain",
        status=UserFileStatus.PROCESSING,
    )
    db_session.add(uf)
    db_session.commit()
    db_session.refresh(uf)
    return uf


@contextmanager
def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:
    """Patch the ``app`` property on *task*'s class so that ``self.app``
    inside the task function returns *mock_app*.

    With ``bind=True``, ``task.run`` is a bound method whose ``__self__`` is
    the actual task instance.  We patch ``app`` on that instance's class
    (a unique Celery-generated Task subclass) so the mock is scoped to this
    task only.
    """
    task_instance = task.run.__self__
    with (
        patch.object(
            type(task_instance),
            "app",
            new_callable=PropertyMock,
            return_value=mock_app,
        ),
        patch(
            "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client",
            return_value=MagicMock(),
        ),
    ):
        yield


# ---------------------------------------------------------------------------
# Test classes
# ---------------------------------------------------------------------------


class TestQueueDepthBackpressure:
    """Protection 1: skip all enqueuing when the broker queue is too deep."""

    def test_no_tasks_enqueued_when_queue_over_limit(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """When the queue depth exceeds the limit the beat cycle is skipped."""
        user = create_test_user(db_session, "bp_user")
        _create_processing_user_file(db_session, user.id)

        mock_app = MagicMock()

        with (
            _patch_task_app(check_user_file_processing, mock_app),
            patch(
                _PATCH_QUEUE_LEN, return_value=USER_FILE_PROCESSING_MAX_QUEUE_DEPTH + 1
            ),
        ):
            check_user_file_processing.run(tenant_id=TEST_TENANT_ID)

        mock_app.send_task.assert_not_called()


class TestPerFileGuardKey:
    """Protection 2: per-file Redis guard key prevents duplicate enqueue."""

    def test_guarded_file_not_re_enqueued(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """A file whose guard key is already set in Redis is skipped."""
        user = create_test_user(db_session, "guard_user")
        uf = _create_processing_user_file(db_session, user.id)

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        guard_key = _user_file_queued_key(uf.id)
        redis_client.setex(guard_key, CELERY_USER_FILE_PROCESSING_TASK_EXPIRES, 1)

        mock_app = MagicMock()

        try:
            with (
                _patch_task_app(check_user_file_processing, mock_app),
                patch(_PATCH_QUEUE_LEN, return_value=0),
            ):
                check_user_file_processing.run(tenant_id=TEST_TENANT_ID)

            # send_task must not have been called with this specific file's ID
            for call in mock_app.send_task.call_args_list:
                kwargs = call.kwargs.get("kwargs", {})
                assert kwargs.get("user_file_id") != str(
                    uf.id
                ), f"File {uf.id} should have been skipped because its guard key exists"
        finally:
            redis_client.delete(guard_key)

    def test_guard_key_exists_in_redis_after_enqueue(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """After a file is enqueued its guard key is present in Redis with a TTL."""
        user = create_test_user(db_session, "guard_set_user")
        uf = _create_processing_user_file(db_session, user.id)

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        guard_key = _user_file_queued_key(uf.id)
        redis_client.delete(guard_key)  # clean slate

        mock_app = MagicMock()

        try:
            with (
                _patch_task_app(check_user_file_processing, mock_app),
                patch(_PATCH_QUEUE_LEN, return_value=0),
            ):
                check_user_file_processing.run(tenant_id=TEST_TENANT_ID)

            assert redis_client.exists(
                guard_key
            ), "Guard key should be set in Redis after enqueue"
            ttl = int(redis_client.ttl(guard_key))  # type: ignore[arg-type]
            assert (
                0 < ttl <= CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
            ), f"Guard key TTL {ttl}s is outside the expected range (0, {CELERY_USER_FILE_PROCESSING_TASK_EXPIRES}]"
        finally:
            redis_client.delete(guard_key)


class TestTaskExpiry:
    """Protection 3: every send_task call includes an expires value."""

    def test_send_task_called_with_expires(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """send_task is called with the correct queue, task name, and expires."""
        user = create_test_user(db_session, "expires_user")
        uf = _create_processing_user_file(db_session, user.id)

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        guard_key = _user_file_queued_key(uf.id)
        redis_client.delete(guard_key)

        mock_app = MagicMock()

        try:
            with (
                _patch_task_app(check_user_file_processing, mock_app),
                patch(_PATCH_QUEUE_LEN, return_value=0),
            ):
                check_user_file_processing.run(tenant_id=TEST_TENANT_ID)

            # At least one task should have been submitted (for our file)
            assert (
                mock_app.send_task.call_count >= 1
            ), "Expected at least one task to be submitted"

            # Every submitted task must carry expires
            for call in mock_app.send_task.call_args_list:
                assert call.args[0] == OnyxCeleryTask.PROCESS_SINGLE_USER_FILE
                assert call.kwargs.get("queue") == OnyxCeleryQueues.USER_FILE_PROCESSING
                assert (
                    call.kwargs.get("expires")
                    == CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
                ), "Task must be submitted with the correct expires value to prevent stale task accumulation"
        finally:
            redis_client.delete(guard_key)


class TestWorkerClearsGuardKey:
    """process_single_user_file removes the guard key when it picks up a task."""

    def test_guard_key_deleted_on_pickup(
        self,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """The guard key is deleted before the worker does any real work.

        We simulate an already-locked file so process_single_user_file returns
        early – but crucially, after the guard key deletion.
        """
        user_file_id = str(uuid4())

        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
        guard_key = _user_file_queued_key(user_file_id)

        # Simulate the guard key set when the beat enqueued the task
        redis_client.setex(guard_key, CELERY_USER_FILE_PROCESSING_TASK_EXPIRES, 1)
        assert redis_client.exists(guard_key), "Guard key must exist before pickup"

        # Hold the per-file processing lock so the worker exits early without
        # touching the database or file store.
        lock_key = _user_file_lock_key(user_file_id)
        processing_lock = redis_client.lock(lock_key, timeout=10)
        acquired = processing_lock.acquire(blocking=False)
        assert acquired, "Should be able to acquire the processing lock for this test"

        try:
            process_single_user_file.run(
                user_file_id=user_file_id,
                tenant_id=TEST_TENANT_ID,
            )
        finally:
            if processing_lock.owned():
                processing_lock.release()

        assert not redis_client.exists(
            guard_key
        ), "Guard key should be deleted when the worker picks up the task"


================================================
FILE: backend/tests/external_dependency_unit/chat/test_user_reminder_message_type.py
================================================
"""
Tests for the USER_REMINDER message type handling in translate_history_to_llm_format.

These tests verify that:
1. USER_REMINDER messages are wrapped with <system-reminder> tags
2. The wrapped messages are converted to UserMessage type for the LLM
3. The tags are properly applied around the message content
4. CODE_BLOCK_MARKDOWN is prepended to system messages for models that need it
"""

import pytest

from onyx.chat.llm_step import translate_history_to_llm_format
from onyx.chat.models import ChatMessageSimple
from onyx.configs.constants import MessageType
from onyx.llm.interfaces import LLMConfig
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.models import SystemMessage
from onyx.llm.models import UserMessage
from onyx.prompts.chat_prompts import CODE_BLOCK_MARKDOWN
from onyx.prompts.constants import SYSTEM_REMINDER_TAG_CLOSE
from onyx.prompts.constants import SYSTEM_REMINDER_TAG_OPEN


def _ensure_list(
    result: list[ChatCompletionMessage] | ChatCompletionMessage,
) -> list[ChatCompletionMessage]:
    """Convert LanguageModelInput to a list for easier testing."""
    if isinstance(result, list):
        return result
    return [result]


@pytest.fixture
def mock_llm_config() -> LLMConfig:
    """Create a minimal LLMConfig for testing."""
    return LLMConfig(
        model_provider="openai",
        model_name="gpt-4o-mini",
        temperature=0.7,
        api_key="test-key",
        api_base=None,
        api_version=None,
        max_input_tokens=128000,
    )


class TestUserReminderMessageType:
    """Tests for USER_REMINDER message handling in translate_history_to_llm_format."""

    def test_user_reminder_wrapped_with_tags(self, mock_llm_config: LLMConfig) -> None:
        """Test that USER_REMINDER messages are wrapped with system-reminder tags."""
        reminder_text = "Remember to cite your sources."
        history = [
            ChatMessageSimple(
                message=reminder_text,
                token_count=10,
                message_type=MessageType.USER_REMINDER,
            )
        ]

        raw_result = translate_history_to_llm_format(history, mock_llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 1
        msg = result[0]
        assert isinstance(msg, UserMessage)
        assert msg.role == "user"
        # Verify the content starts and ends with the proper tags
        assert isinstance(msg.content, str)
        assert msg.content.startswith(SYSTEM_REMINDER_TAG_OPEN)
        assert msg.content.endswith(SYSTEM_REMINDER_TAG_CLOSE)
        # Verify the original message is inside the tags
        assert reminder_text in msg.content

    def test_user_reminder_tag_format(self, mock_llm_config: LLMConfig) -> None:
        """Test the exact format of the system-reminder tag wrapping."""
        reminder_text = "This is a test reminder."
        history = [
            ChatMessageSimple(
                message=reminder_text,
                token_count=10,
                message_type=MessageType.USER_REMINDER,
            )
        ]

        raw_result = translate_history_to_llm_format(history, mock_llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 1
        msg = result[0]
        assert isinstance(msg, UserMessage)
        expected_content = (
            f"{SYSTEM_REMINDER_TAG_OPEN}\n{reminder_text}\n{SYSTEM_REMINDER_TAG_CLOSE}"
        )
        assert msg.content == expected_content

    def test_user_reminder_converted_to_user_message(
        self, mock_llm_config: LLMConfig
    ) -> None:
        """Test that USER_REMINDER is converted to UserMessage (not a different type)."""
        history = [
            ChatMessageSimple(
                message="Test reminder",
                token_count=5,
                message_type=MessageType.USER_REMINDER,
            )
        ]

        raw_result = translate_history_to_llm_format(history, mock_llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 1
        # Should be a UserMessage since LLM APIs don't have a native reminder type
        assert isinstance(result[0], UserMessage)
        assert result[0].role == "user"

    def test_user_reminder_in_mixed_history(self, mock_llm_config: LLMConfig) -> None:
        """Test USER_REMINDER handling when mixed with other message types."""
        history = [
            ChatMessageSimple(
                message="You are a helpful assistant.",
                token_count=10,
                message_type=MessageType.SYSTEM,
            ),
            ChatMessageSimple(
                message="Hello!",
                token_count=5,
                message_type=MessageType.USER,
            ),
            ChatMessageSimple(
                message="Hi there! How can I help?",
                token_count=10,
                message_type=MessageType.ASSISTANT,
            ),
            ChatMessageSimple(
                message="Remember to be concise.",
                token_count=8,
                message_type=MessageType.USER_REMINDER,
            ),
        ]

        raw_result = translate_history_to_llm_format(history, mock_llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 4
        # Check the reminder message (last one)
        reminder_msg = result[3]
        assert isinstance(reminder_msg, UserMessage)
        assert isinstance(reminder_msg.content, str)
        assert reminder_msg.content.startswith(SYSTEM_REMINDER_TAG_OPEN)
        assert reminder_msg.content.endswith(SYSTEM_REMINDER_TAG_CLOSE)
        assert "Remember to be concise." in reminder_msg.content

        # Check that regular USER message is NOT wrapped
        user_msg = result[1]
        assert isinstance(user_msg, UserMessage)
        assert user_msg.content == "Hello!"  # No tags

    def test_regular_user_message_not_wrapped(self, mock_llm_config: LLMConfig) -> None:
        """Test that regular USER messages are NOT wrapped with system-reminder tags."""
        history = [
            ChatMessageSimple(
                message="This is a normal user message.",
                token_count=10,
                message_type=MessageType.USER,
            )
        ]

        raw_result = translate_history_to_llm_format(history, mock_llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 1
        msg = result[0]
        assert isinstance(msg, UserMessage)
        # Regular user message should NOT have the tags
        assert isinstance(msg.content, str)
        assert SYSTEM_REMINDER_TAG_OPEN not in msg.content
        assert SYSTEM_REMINDER_TAG_CLOSE not in msg.content
        assert msg.content == "This is a normal user message."


def _create_llm_config(model_name: str) -> LLMConfig:
    """Create a LLMConfig with the specified model name."""
    return LLMConfig(
        model_provider="openai",
        model_name=model_name,
        temperature=0.7,
        api_key="test-key",
        api_base=None,
        api_version=None,
        max_input_tokens=128000,
    )


class TestCodeBlockMarkdownFormatting:
    """Tests for CODE_BLOCK_MARKDOWN prefix handling in translate_history_to_llm_format.

    OpenAI reasoning models (o1, o3, gpt-5) need a "Formatting re-enabled. " prefix
    in their system messages for correct markdown generation.
    """

    def test_o1_model_prepends_markdown_to_string(self) -> None:
        """Test that o1 model prepends CODE_BLOCK_MARKDOWN to string system message."""
        llm_config = _create_llm_config("o1")
        history = [
            ChatMessageSimple(
                message="You are a helpful assistant.",
                token_count=10,
                message_type=MessageType.SYSTEM,
            )
        ]

        raw_result = translate_history_to_llm_format(history, llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 1
        msg = result[0]
        assert isinstance(msg, SystemMessage)
        assert isinstance(msg.content, str)
        assert msg.content == CODE_BLOCK_MARKDOWN + "You are a helpful assistant."

    def test_o3_model_prepends_markdown(self) -> None:
        """Test that o3 model prepends CODE_BLOCK_MARKDOWN to system message."""
        llm_config = _create_llm_config("o3-mini")
        history = [
            ChatMessageSimple(
                message="System prompt here.",
                token_count=10,
                message_type=MessageType.SYSTEM,
            )
        ]

        raw_result = translate_history_to_llm_format(history, llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 1
        msg = result[0]
        assert isinstance(msg, SystemMessage)
        assert isinstance(msg.content, str)
        assert msg.content.startswith(CODE_BLOCK_MARKDOWN)

    def test_gpt5_model_prepends_markdown(self) -> None:
        """Test that gpt-5 model prepends CODE_BLOCK_MARKDOWN to system message."""
        llm_config = _create_llm_config("gpt-5")
        history = [
            ChatMessageSimple(
                message="System prompt here.",
                token_count=10,
                message_type=MessageType.SYSTEM,
            )
        ]

        raw_result = translate_history_to_llm_format(history, llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 1
        msg = result[0]
        assert isinstance(msg, SystemMessage)
        assert isinstance(msg.content, str)
        assert msg.content.startswith(CODE_BLOCK_MARKDOWN)

    def test_gpt4o_does_not_prepend(self) -> None:
        """Test that gpt-4o model does NOT prepend CODE_BLOCK_MARKDOWN."""
        llm_config = _create_llm_config("gpt-4o")
        history = [
            ChatMessageSimple(
                message="You are a helpful assistant.",
                token_count=10,
                message_type=MessageType.SYSTEM,
            )
        ]

        raw_result = translate_history_to_llm_format(history, llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 1
        msg = result[0]
        assert isinstance(msg, SystemMessage)
        assert isinstance(msg.content, str)
        # Should NOT have the prefix
        assert msg.content == "You are a helpful assistant."
        assert not msg.content.startswith(CODE_BLOCK_MARKDOWN)

    def test_no_system_message_no_crash(self) -> None:
        """Test that history without system message doesn't crash."""
        llm_config = _create_llm_config("o1")
        history = [
            ChatMessageSimple(
                message="Hello!",
                token_count=5,
                message_type=MessageType.USER,
            )
        ]

        raw_result = translate_history_to_llm_format(history, llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 1
        msg = result[0]
        assert isinstance(msg, UserMessage)
        assert msg.content == "Hello!"

    def test_only_first_system_message_modified(self) -> None:
        """Test that only the first system message gets the prefix."""
        llm_config = _create_llm_config("o1")
        history = [
            ChatMessageSimple(
                message="First system prompt.",
                token_count=10,
                message_type=MessageType.SYSTEM,
            ),
            ChatMessageSimple(
                message="Hello!",
                token_count=5,
                message_type=MessageType.USER,
            ),
            ChatMessageSimple(
                message="Second system prompt.",
                token_count=10,
                message_type=MessageType.SYSTEM,
            ),
        ]

        raw_result = translate_history_to_llm_format(history, llm_config)
        result = _ensure_list(raw_result)

        assert len(result) == 3
        # First system message should have prefix
        first_sys = result[0]
        assert isinstance(first_sys, SystemMessage)
        assert isinstance(first_sys.content, str)
        assert first_sys.content.startswith(CODE_BLOCK_MARKDOWN)
        # Second system message should NOT have prefix (only first one is modified)
        second_sys = result[2]
        assert isinstance(second_sys, SystemMessage)
        assert isinstance(second_sys.content, str)
        assert not second_sys.content.startswith(CODE_BLOCK_MARKDOWN)


================================================
FILE: backend/tests/external_dependency_unit/conftest.py
================================================
from collections.abc import Generator
from uuid import uuid4

import pytest
from fastapi_users.password import PasswordHelper
from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.enums import AccountType
from onyx.db.models import User
from onyx.db.models import UserRole
from onyx.file_store.file_store import get_default_file_store
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from tests.external_dependency_unit.constants import TEST_TENANT_ID
from tests.external_dependency_unit.full_setup import (
    ensure_full_deployment_setup,
)


@pytest.fixture(scope="function")
def db_session() -> Generator[Session, None, None]:
    """Create a database session for testing using the actual PostgreSQL database"""
    # Make sure that the db engine is initialized before any tests are run
    SqlEngine.init_engine(
        pool_size=10,
        max_overflow=5,
    )
    with get_session_with_current_tenant() as session:
        yield session


@pytest.fixture(scope="session")
def full_deployment_setup() -> Generator[None, None, None]:
    """Optional fixture to perform full deployment-like setup on demand.

    Import and call tests.external_dependency_unit.startup.full_setup.ensure_full_deployment_setup
    to initialize Postgres defaults, Vespa indices, and seed initial docs.
    """
    ensure_full_deployment_setup()
    yield


@pytest.fixture(scope="function")
def tenant_context() -> Generator[None, None, None]:
    """Set up tenant context for testing"""
    # Set the tenant context for the test
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
    try:
        yield
    finally:
        # Reset the tenant context after the test
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


def create_test_user(
    db_session: Session,
    email_prefix: str,
    role: UserRole = UserRole.BASIC,
    account_type: AccountType = AccountType.STANDARD,
) -> User:
    """Helper to create a test user with a unique email"""
    # Use UUID to ensure unique email addresses
    unique_email = f"{email_prefix}_{uuid4().hex[:8]}@example.com"

    password_helper = PasswordHelper()
    password = password_helper.generate()
    hashed_password = password_helper.hash(password)

    user = User(
        id=uuid4(),
        email=unique_email,
        hashed_password=hashed_password,
        is_active=True,
        is_superuser=False,
        is_verified=True,
        role=role,
        account_type=account_type,
    )
    db_session.add(user)
    db_session.commit()
    db_session.refresh(user)
    return user


@pytest.fixture(scope="module")
def initialize_file_store() -> Generator[None, None, None]:
    """Initialize the file store for testing.

    Scoped to module level since file store initialization is idempotent
    and doesn't need to be reset between tests.
    """
    get_default_file_store().initialize()
    yield


================================================
FILE: backend/tests/external_dependency_unit/connectors/confluence/conftest.py
================================================
import os
from typing import Any

import pytest


@pytest.fixture
def confluence_connector_config() -> dict[str, Any]:
    url_base = os.environ.get("CONFLUENCE_TEST_SPACE_URL")
    space_key = os.environ.get("CONFLUENCE_SPACE_KEY")
    page_id = os.environ.get("CONFLUENCE_PAGE_ID")
    is_cloud = os.environ.get("CONFLUENCE_IS_CLOUD", "true").lower() == "true"

    assert url_base, "CONFLUENCE_URL environment variable is required"

    return {
        "wiki_base": url_base,
        "is_cloud": is_cloud,
        "space": space_key or "",
        "page_id": page_id or "",
    }


@pytest.fixture
def confluence_credential_json() -> dict[str, Any]:
    username = os.environ.get("CONFLUENCE_USER_NAME")
    access_token = os.environ.get("CONFLUENCE_ACCESS_TOKEN")

    assert username, "CONFLUENCE_USERNAME environment variable is required"
    assert access_token, "CONFLUENCE_ACCESS_TOKEN environment variable is required"

    return {
        "confluence_username": username,
        "confluence_access_token": access_token,
    }


================================================
FILE: backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py
================================================
from typing import Any

from sqlalchemy.orm import Session

from ee.onyx.external_permissions.confluence.group_sync import confluence_group_sync
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from shared_configs.contextvars import get_current_tenant_id
from tests.daily.connectors.confluence.models import ExternalUserGroupSet


# In order to get these tests to run, use the credentials from Bitwarden.
# Search up "ENV vars for local and Github tests", and find the Confluence relevant key-value pairs.

_EXPECTED_CONFLUENCE_GROUPS = [
    ExternalUserGroupSet(
        id="confluence-admins-danswerai",
        user_emails={"chris@onyx.app", "yuhong@onyx.app"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="org-admins",
        user_emails={
            "founders@onyx.app",
            "chris@onyx.app",
            "yuhong@onyx.app",
            "oauth@onyx.app",
        },
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="confluence-users-danswerai",
        user_emails={
            "chris@onyx.app",
            "hagen@danswer.ai",
            "founders@onyx.app",
            "pablo@onyx.app",
            "yuhong@onyx.app",
            "oauth@onyx.app",
        },
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="jira-users-danswerai",
        user_emails={
            "hagen@danswer.ai",
            "founders@onyx.app",
            "pablo@onyx.app",
            "chris@onyx.app",
            "oauth@onyx.app",
        },
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="jira-admins-danswerai",
        user_emails={"hagen@danswer.ai", "founders@onyx.app", "pablo@onyx.app"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="confluence-user-access-admins-danswerai",
        user_emails={"hagen@danswer.ai"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="jira-user-access-admins-danswerai",
        user_emails={"hagen@danswer.ai"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="Yuhong Only No Chris Allowed",
        user_emails={"yuhong@onyx.app"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="All_Confluence_Users_Found_By_Onyx",
        user_emails={
            "chris@onyx.app",
            "founders@onyx.app",
            "hagen@danswer.ai",
            "pablo@onyx.app",
            "yuhong@onyx.app",
            "oauth@onyx.app",
        },
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="bitbucket-users-onyxai",
        user_emails={"founders@onyx.app", "oauth@onyx.app"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="bitbucket-admins-onyxai",
        user_emails={"founders@onyx.app", "oauth@onyx.app"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="jira-servicemanagement-users-danswerai",
        user_emails={"oauth@onyx.app"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="no yuhong allowed",
        user_emails={"hagen@danswer.ai", "pablo@onyx.app", "chris@onyx.app"},
        gives_anyone_access=False,
    ),
]


def test_confluence_group_sync(
    db_session: Session,
    confluence_connector_config: dict[str, Any],
    confluence_credential_json: dict[str, Any],
) -> None:
    connector = Connector(
        name="Test Connector",
        source=DocumentSource.CONFLUENCE,
        input_type=InputType.POLL,
        connector_specific_config=confluence_connector_config,
        refresh_freq=None,
        prune_freq=None,
        indexing_start=None,
    )
    db_session.add(connector)
    db_session.flush()

    credential = Credential(
        source=DocumentSource.CONFLUENCE,
        credential_json=confluence_credential_json,
    )
    db_session.add(credential)
    db_session.flush()
    # Expire the credential so it reloads from DB with SensitiveValue wrapper
    db_session.expire(credential)

    cc_pair = ConnectorCredentialPair(
        connector_id=connector.id,
        credential_id=credential.id,
        name="Test CC Pair",
        status=ConnectorCredentialPairStatus.ACTIVE,
        access_type=AccessType.SYNC,
        auto_sync_options=None,
    )
    db_session.add(cc_pair)
    db_session.commit()
    db_session.refresh(cc_pair)

    tenant_id = get_current_tenant_id()
    group_sync_iter = confluence_group_sync(
        tenant_id=tenant_id,
        cc_pair=cc_pair,
    )

    expected_groups = {group.id: group for group in _EXPECTED_CONFLUENCE_GROUPS}
    actual_groups = {
        group.id: ExternalUserGroupSet.from_model(external_user_group=group)
        for group in group_sync_iter
    }
    assert expected_groups == actual_groups


================================================
FILE: backend/tests/external_dependency_unit/connectors/google_drive/test_google_drive_group_sync.py
================================================
from collections.abc import Generator
from unittest.mock import Mock
from unittest.mock import patch

from sqlalchemy import select
from sqlalchemy.orm import Session

from ee.onyx.background.celery.tasks.external_group_syncing.tasks import (
    _perform_external_group_sync,
)
from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import AccountType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import PublicExternalUserGroup
from onyx.db.models import User
from onyx.db.models import User__ExternalUserGroupId
from onyx.db.models import UserRole
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.constants import TEST_TENANT_ID


def _create_ext_perm_user(db_session: Session, name: str) -> User:
    """Create an external-permission user for group sync tests."""
    return create_test_user(
        db_session,
        name,
        role=UserRole.EXT_PERM_USER,
        account_type=AccountType.EXT_PERM_USER,
    )


def _create_test_connector_credential_pair(
    db_session: Session, source: DocumentSource = DocumentSource.GOOGLE_DRIVE
) -> ConnectorCredentialPair:
    """Helper to create a test connector credential pair"""
    # For Google Drive, we need to include required config parameters
    connector_config = {}
    if source == DocumentSource.GOOGLE_DRIVE:
        connector_config = {
            "include_shared_drives": True,  # At least one of these is required
        }

    connector = Connector(
        name="Test Connector",
        source=source,
        input_type=InputType.POLL,
        connector_specific_config=connector_config,
        refresh_freq=None,
        prune_freq=None,
        indexing_start=None,
    )
    db_session.add(connector)
    db_session.flush()  # To get the connector ID

    credential = Credential(
        source=source,
        credential_json={},
        user_id=None,
    )
    db_session.add(credential)
    db_session.flush()  # To get the credential ID
    # Expire the credential so it reloads from DB with SensitiveValue wrapper
    db_session.expire(credential)

    cc_pair = ConnectorCredentialPair(
        connector_id=connector.id,
        credential_id=credential.id,
        name="Test CC Pair",
        status=ConnectorCredentialPairStatus.ACTIVE,
        access_type=AccessType.SYNC,
        auto_sync_options=None,
    )
    db_session.add(cc_pair)
    db_session.commit()
    db_session.refresh(cc_pair)
    return cc_pair


def _get_user_external_groups(
    db_session: Session, cc_pair_id: int, include_stale: bool = False
) -> list[User__ExternalUserGroupId]:
    """Helper to get user external groups from database"""
    query = select(User__ExternalUserGroupId).where(
        User__ExternalUserGroupId.cc_pair_id == cc_pair_id
    )
    if not include_stale:
        query = query.where(User__ExternalUserGroupId.stale.is_(False))

    return list(db_session.scalars(query).all())


def _get_public_external_groups(
    db_session: Session, cc_pair_id: int, include_stale: bool = False
) -> list[PublicExternalUserGroup]:
    """Helper to get public external groups from database"""
    query = select(PublicExternalUserGroup).where(
        PublicExternalUserGroup.cc_pair_id == cc_pair_id
    )
    if not include_stale:
        query = query.where(PublicExternalUserGroup.stale.is_(False))

    return list(db_session.scalars(query).all())


class TestPerformExternalGroupSync:
    def test_initial_group_sync(self, db_session: Session) -> None:
        """Test syncing external groups for the first time (initial sync)"""
        # Create test data
        user1 = _create_ext_perm_user(db_session, "user1")
        user2 = _create_ext_perm_user(db_session, "user2")
        user3 = _create_ext_perm_user(db_session, "user3")
        cc_pair = _create_test_connector_credential_pair(db_session)

        # Mock external groups data as a generator that yields the expected groups
        mock_groups = [
            ExternalUserGroup(id="group1", user_emails=[user1.email, user2.email]),
            ExternalUserGroup(id="group2", user_emails=[user2.email, user3.email]),
            ExternalUserGroup(
                id="public_group", user_emails=[user1.email], gives_anyone_access=True
            ),
        ]

        def mock_group_sync_func(
            tenant_id: str,  # noqa: ARG001
            cc_pair: ConnectorCredentialPair,  # noqa: ARG001
        ) -> Generator[ExternalUserGroup, None, None]:
            for group in mock_groups:
                yield group

        # Verify no groups exist initially
        assert len(_get_user_external_groups(db_session, cc_pair.id)) == 0
        assert len(_get_public_external_groups(db_session, cc_pair.id)) == 0

        with patch(
            "ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config"
        ) as mock_config:
            # Mock sync config
            mock_group_config = Mock()
            mock_group_config.group_sync_func = mock_group_sync_func

            mock_sync_config = Mock()
            mock_sync_config.group_sync_config = mock_group_config

            mock_config.return_value = mock_sync_config

            # Run the sync
            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)

            # Verify user groups were created
            user_groups = _get_user_external_groups(db_session, cc_pair.id)
            assert (
                len(user_groups) == 5
            )  # user1+2 in group1, user2+3 in group2, user1 in public_group

            # Verify group names are properly prefixed
            expected_group1_id = build_ext_group_name_for_onyx(
                "group1", DocumentSource.GOOGLE_DRIVE
            )
            expected_group2_id = build_ext_group_name_for_onyx(
                "group2", DocumentSource.GOOGLE_DRIVE
            )
            expected_public_group_id = build_ext_group_name_for_onyx(
                "public_group", DocumentSource.GOOGLE_DRIVE
            )

            group_ids = {ug.external_user_group_id for ug in user_groups}
            assert expected_group1_id in group_ids
            assert expected_group2_id in group_ids
            assert expected_public_group_id in group_ids

            # Verify public group was created
            public_groups = _get_public_external_groups(db_session, cc_pair.id)
            assert len(public_groups) == 1
            assert public_groups[0].external_user_group_id == expected_public_group_id
            assert public_groups[0].stale is False

            # Verify all groups are not stale
            for ug in user_groups:
                assert ug.stale is False

    def test_update_existing_groups(self, db_session: Session) -> None:
        """Test updating existing groups (adding/removing users)"""
        # Create test data
        user1 = _create_ext_perm_user(db_session, "user1")
        user2 = _create_ext_perm_user(db_session, "user2")
        user3 = _create_ext_perm_user(db_session, "user3")
        cc_pair = _create_test_connector_credential_pair(db_session)

        # Initial sync with original groups
        def initial_group_sync_func(
            tenant_id: str,  # noqa: ARG001
            cc_pair: ConnectorCredentialPair,  # noqa: ARG001
        ) -> Generator[ExternalUserGroup, None, None]:
            yield ExternalUserGroup(id="group1", user_emails=[user1.email, user2.email])
            yield ExternalUserGroup(id="group2", user_emails=[user2.email])

        # For now, verify test setup is working
        assert len(_get_user_external_groups(db_session, cc_pair.id)) == 0

        with patch(
            "ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config"
        ) as mock_config:
            # Mock sync config
            mock_group_config = Mock()
            mock_group_config.group_sync_func = initial_group_sync_func

            mock_sync_config = Mock()
            mock_sync_config.group_sync_config = mock_group_config

            mock_config.return_value = mock_sync_config

            # Run initial sync
            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)

            # Verify initial state
            initial_user_groups = _get_user_external_groups(db_session, cc_pair.id)
            assert (
                len(initial_user_groups) == 3
            )  # user1+user2 in group1, user2 in group2

            # Updated sync with modified groups
            def updated_group_sync_func(
                tenant_id: str,  # noqa: ARG001
                cc_pair: ConnectorCredentialPair,  # noqa: ARG001
            ) -> Generator[ExternalUserGroup, None, None]:
                # group1 now has user1 and user3 (user2 removed, user3 added)
                yield ExternalUserGroup(
                    id="group1", user_emails=[user1.email, user3.email]
                )
                # group2 now has all three users (user1 and user3 added)
                yield ExternalUserGroup(
                    id="group2", user_emails=[user1.email, user2.email, user3.email]
                )

            # Update the mock function
            mock_group_config.group_sync_func = updated_group_sync_func

            # Run updated sync
            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)

            # Verify updated state
            updated_user_groups = _get_user_external_groups(db_session, cc_pair.id)
            assert (
                len(updated_user_groups) == 5
            )  # user1+user3 in group1, user1+user2+user3 in group2

            # Verify specific user-group mappings
            expected_group1_id = build_ext_group_name_for_onyx(
                "group1", DocumentSource.GOOGLE_DRIVE
            )
            expected_group2_id = build_ext_group_name_for_onyx(
                "group2", DocumentSource.GOOGLE_DRIVE
            )

            group1_users = {
                ug.user_id
                for ug in updated_user_groups
                if ug.external_user_group_id == expected_group1_id
            }
            group2_users = {
                ug.user_id
                for ug in updated_user_groups
                if ug.external_user_group_id == expected_group2_id
            }

            assert user1.id in group1_users and user3.id in group1_users
            assert user2.id not in group1_users  # user2 was removed from group1
            assert (
                user1.id in group2_users
                and user2.id in group2_users
                and user3.id in group2_users
            )

            # Verify no stale groups remain
            for ug in updated_user_groups:
                assert ug.stale is False

    def test_remove_groups(self, db_session: Session) -> None:
        """Test removing groups (groups that no longer exist in external system)"""
        # Create test data
        user1 = _create_ext_perm_user(db_session, "user1")
        user2 = _create_ext_perm_user(db_session, "user2")
        cc_pair = _create_test_connector_credential_pair(db_session)

        # Initial sync with multiple groups
        def initial_group_sync_func(
            tenant_id: str,  # noqa: ARG001
            cc_pair: ConnectorCredentialPair,  # noqa: ARG001
        ) -> Generator[ExternalUserGroup, None, None]:
            yield ExternalUserGroup(id="group1", user_emails=[user1.email, user2.email])
            yield ExternalUserGroup(id="group2", user_emails=[user1.email])
            yield ExternalUserGroup(
                id="public_group", user_emails=[user1.email], gives_anyone_access=True
            )

        assert len(_get_user_external_groups(db_session, cc_pair.id)) == 0
        assert len(_get_public_external_groups(db_session, cc_pair.id)) == 0

        with patch(
            "ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config"
        ) as mock_config:
            # Mock sync config
            mock_group_config = Mock()
            mock_group_config.group_sync_func = initial_group_sync_func

            mock_sync_config = Mock()
            mock_sync_config.group_sync_config = mock_group_config

            mock_config.return_value = mock_sync_config

            # Run initial sync
            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)

            # Verify initial state
            initial_user_groups = _get_user_external_groups(db_session, cc_pair.id)
            initial_public_groups = _get_public_external_groups(db_session, cc_pair.id)
            assert (
                len(initial_user_groups) == 4
            )  # 2 in group1, 1 in group2, 1 in public_group
            assert len(initial_public_groups) == 1

            # Updated sync with only one group remaining
            def updated_group_sync_func(
                tenant_id: str,  # noqa: ARG001
                cc_pair: ConnectorCredentialPair,  # noqa: ARG001
            ) -> Generator[ExternalUserGroup, None, None]:
                # Only group1 remains, group2 and public_group are removed
                yield ExternalUserGroup(
                    id="group1", user_emails=[user1.email, user2.email]
                )

            # Update the mock function
            mock_group_config.group_sync_func = updated_group_sync_func

            # Run updated sync
            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)

            # Verify updated state
            updated_user_groups = _get_user_external_groups(db_session, cc_pair.id)
            updated_public_groups = _get_public_external_groups(db_session, cc_pair.id)

            assert len(updated_user_groups) == 2  # Only group1 mappings remain
            assert len(updated_public_groups) == 0  # Public group was removed

            # Verify only group1 exists
            expected_group1_id = build_ext_group_name_for_onyx(
                "group1", DocumentSource.GOOGLE_DRIVE
            )
            group_ids = {ug.external_user_group_id for ug in updated_user_groups}
            assert group_ids == {expected_group1_id}

            # Verify stale groups were actually deleted from database
            all_user_groups_including_stale = _get_user_external_groups(
                db_session, cc_pair.id, include_stale=True
            )
            all_public_groups_including_stale = _get_public_external_groups(
                db_session, cc_pair.id, include_stale=True
            )

            assert len(all_user_groups_including_stale) == 2  # Only group1 mappings
            assert len(all_public_groups_including_stale) == 0  # Public group deleted

    def test_empty_group_sync(self, db_session: Session) -> None:
        """Test syncing when no groups are returned (all groups removed)"""
        # Create test data
        user1 = _create_ext_perm_user(db_session, "user1")
        cc_pair = _create_test_connector_credential_pair(db_session)

        # Initial sync with groups
        def initial_group_sync_func(
            tenant_id: str,  # noqa: ARG001
            cc_pair: ConnectorCredentialPair,  # noqa: ARG001
        ) -> Generator[ExternalUserGroup, None, None]:
            yield ExternalUserGroup(id="group1", user_emails=[user1.email])

        with patch(
            "ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config"
        ) as mock_config:
            # Mock sync config
            mock_group_config = Mock()
            mock_group_config.group_sync_func = initial_group_sync_func

            mock_sync_config = Mock()
            mock_sync_config.group_sync_config = mock_group_config

            mock_config.return_value = mock_sync_config

            # Run initial sync
            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)

            # Verify initial state
            initial_user_groups = _get_user_external_groups(db_session, cc_pair.id)
            assert len(initial_user_groups) == 1

            # Updated sync with no groups
            def empty_group_sync_func(
                tenant_id: str,  # noqa: ARG001
                cc_pair: ConnectorCredentialPair,  # noqa: ARG001
            ) -> Generator[ExternalUserGroup, None, None]:
                # No groups yielded
                return
                yield  # This line is never reached but satisfies the generator type

            # Update the mock function
            mock_group_config.group_sync_func = empty_group_sync_func

            # Run updated sync
            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)

            # Verify all groups were removed
            updated_user_groups = _get_user_external_groups(db_session, cc_pair.id)
            updated_public_groups = _get_public_external_groups(db_session, cc_pair.id)

            assert len(updated_user_groups) == 0
            assert len(updated_public_groups) == 0

    def test_batch_processing(self, db_session: Session) -> None:
        """Test that large numbers of groups are processed in batches"""
        # Create many test users
        users = []
        for i in range(150):  # More than the batch size of 100
            users.append(_create_ext_perm_user(db_session, f"user{i}"))

        cc_pair = _create_test_connector_credential_pair(db_session)

        # Create a large group with many users
        def large_group_sync_func(
            tenant_id: str,  # noqa: ARG001
            cc_pair: ConnectorCredentialPair,  # noqa: ARG001
        ) -> Generator[ExternalUserGroup, None, None]:
            yield ExternalUserGroup(
                id="large_group", user_emails=[user.email for user in users]
            )

        with patch(
            "ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config"
        ) as mock_config:
            # Mock sync config
            mock_group_config = Mock()
            mock_group_config.group_sync_func = large_group_sync_func

            mock_sync_config = Mock()
            mock_sync_config.group_sync_config = mock_group_config

            mock_config.return_value = mock_sync_config

            # Run the sync
            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)

            # Verify all users were added to the group
            user_groups = _get_user_external_groups(db_session, cc_pair.id)
            assert len(user_groups) == 150

            # Verify all groups are not stale
            for ug in user_groups:
                assert ug.stale is False

    def test_mixed_regular_and_public_groups(self, db_session: Session) -> None:
        """Test syncing a mix of regular and public groups"""
        # Create test data
        user1 = _create_ext_perm_user(db_session, "user1")
        user2 = _create_ext_perm_user(db_session, "user2")
        cc_pair = _create_test_connector_credential_pair(db_session)

        def mixed_group_sync_func(
            tenant_id: str,  # noqa: ARG001
            cc_pair: ConnectorCredentialPair,  # noqa: ARG001
        ) -> Generator[ExternalUserGroup, None, None]:
            yield ExternalUserGroup(
                id="regular_group", user_emails=[user1.email, user2.email]
            )
            yield ExternalUserGroup(
                id="public_group1", user_emails=[user1.email], gives_anyone_access=True
            )
            yield ExternalUserGroup(
                id="public_group2",
                user_emails=[],  # Empty user list for public group
                gives_anyone_access=True,
            )

        with patch(
            "ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config"
        ) as mock_config:
            # Mock sync config
            mock_group_config = Mock()
            mock_group_config.group_sync_func = mixed_group_sync_func

            mock_sync_config = Mock()
            mock_sync_config.group_sync_config = mock_group_config

            mock_config.return_value = mock_sync_config

            # Run the sync
            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)

            # Verify user groups
            user_groups = _get_user_external_groups(db_session, cc_pair.id)
            expected_regular_group_id = build_ext_group_name_for_onyx(
                "regular_group", DocumentSource.GOOGLE_DRIVE
            )
            expected_public_group1_id = build_ext_group_name_for_onyx(
                "public_group1", DocumentSource.GOOGLE_DRIVE
            )

            # Should have 2 users in regular_group + 1 user in public_group1 = 3 total
            assert len(user_groups) == 3

            regular_group_users = [
                ug
                for ug in user_groups
                if ug.external_user_group_id == expected_regular_group_id
            ]
            public_group1_users = [
                ug
                for ug in user_groups
                if ug.external_user_group_id == expected_public_group1_id
            ]

            assert len(regular_group_users) == 2
            assert len(public_group1_users) == 1

            # Verify public groups
            public_groups = _get_public_external_groups(db_session, cc_pair.id)
            assert len(public_groups) == 2  # public_group1 and public_group2

            public_group_ids = {pg.external_user_group_id for pg in public_groups}
            expected_public_group2_id = build_ext_group_name_for_onyx(
                "public_group2", DocumentSource.GOOGLE_DRIVE
            )
            assert expected_public_group1_id in public_group_ids
            assert expected_public_group2_id in public_group_ids


================================================
FILE: backend/tests/external_dependency_unit/connectors/jira/conftest.py
================================================
import os
from typing import Any

import pytest


@pytest.fixture
def jira_connector_config() -> dict[str, Any]:
    jira_base_url = os.environ.get("JIRA_BASE_URL", "https://danswerai.atlassian.net")

    return {
        "jira_base_url": jira_base_url,
        "project_key": "",  # Empty to sync all projects
        "scoped_token": False,
    }


@pytest.fixture
def jira_credential_json() -> dict[str, Any]:
    user_email = os.environ.get("JIRA_ADMIN_USER_EMAIL", "chris@onyx.app")
    api_token = os.environ.get("JIRA_ADMIN_API_TOKEN")

    assert user_email, "JIRA_ADMIN_USER_EMAIL environment variable is required"
    assert api_token, "JIRA_ADMIN_API_TOKEN environment variable is required"

    return {
        "jira_user_email": user_email,
        "jira_api_token": api_token,
    }


================================================
FILE: backend/tests/external_dependency_unit/connectors/jira/test_jira_doc_sync.py
================================================
from typing import Any

import pytest
from pydantic import BaseModel
from sqlalchemy.orm import Session

from ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync
from onyx.access.models import DocExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.utils import DocumentRow
from onyx.db.utils import SortOrder


# In order to get these tests to run, use the credentials from Bitwarden.
# Search up "ENV vars for local and Github tests", and find the Jira relevant key-value pairs.
# Required env vars: JIRA_USER_EMAIL, JIRA_API_TOKEN

pytestmark = pytest.mark.usefixtures("enable_ee")


class DocExternalAccessSet(BaseModel):
    """A version of DocExternalAccess that uses sets for comparison."""

    doc_id: str
    external_user_emails: set[str]
    external_user_group_ids: set[str]
    is_public: bool

    @classmethod
    def from_doc_external_access(
        cls, doc_external_access: DocExternalAccess
    ) -> "DocExternalAccessSet":
        return cls(
            doc_id=doc_external_access.doc_id,
            external_user_emails=doc_external_access.external_access.external_user_emails,
            external_user_group_ids=doc_external_access.external_access.external_user_group_ids,
            is_public=doc_external_access.external_access.is_public,
        )


def test_jira_doc_sync(
    db_session: Session,
    jira_connector_config: dict[str, Any],
    jira_credential_json: dict[str, Any],
) -> None:
    """Test that Jira doc sync returns documents with correct permissions.

    This test uses the AS project which has applicationRole permission,
    meaning all documents should be marked as public.
    """
    try:
        # Use AS project specifically for this test
        connector_config = {
            **jira_connector_config,
            "project_key": "AS",  # DailyConnectorTestProject
        }

        connector = Connector(
            name="Test Jira Doc Sync Connector",
            source=DocumentSource.JIRA,
            input_type=InputType.POLL,
            connector_specific_config=connector_config,
            refresh_freq=None,
            prune_freq=None,
            indexing_start=None,
        )
        db_session.add(connector)
        db_session.flush()

        credential = Credential(
            source=DocumentSource.JIRA,
            credential_json=jira_credential_json,
        )
        db_session.add(credential)
        db_session.flush()
        # Expire the credential so it reloads from DB with SensitiveValue wrapper
        db_session.expire(credential)

        cc_pair = ConnectorCredentialPair(
            connector_id=connector.id,
            credential_id=credential.id,
            name="Test Jira Doc Sync CC Pair",
            status=ConnectorCredentialPairStatus.ACTIVE,
            access_type=AccessType.SYNC,
            auto_sync_options=None,
        )
        db_session.add(cc_pair)
        db_session.flush()
        db_session.refresh(cc_pair)

        # Mock functions - we don't have existing docs in the test DB
        def fetch_all_existing_docs_fn(
            sort_order: SortOrder | None = None,  # noqa: ARG001
        ) -> list[DocumentRow]:
            return []

        def fetch_all_existing_docs_ids_fn() -> list[str]:
            return []

        doc_sync_iter = jira_doc_sync(
            cc_pair=cc_pair,
            fetch_all_existing_docs_fn=fetch_all_existing_docs_fn,
            fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,
        )

        # Expected documents from the danswerai.atlassian.net Jira instance
        # The AS project has applicationRole permission, so all docs should be public
        _EXPECTED_JIRA_DOCS = [
            DocExternalAccessSet(
                doc_id="https://danswerai.atlassian.net/browse/AS-3",
                external_user_emails=set(),
                external_user_group_ids=set(),
                is_public=True,
            ),
            DocExternalAccessSet(
                doc_id="https://danswerai.atlassian.net/browse/AS-4",
                external_user_emails=set(),
                external_user_group_ids=set(),
                is_public=True,
            ),
        ]

        expected_docs = {doc.doc_id: doc for doc in _EXPECTED_JIRA_DOCS}
        actual_docs = {
            doc.doc_id: DocExternalAccessSet.from_doc_external_access(doc)
            for doc in doc_sync_iter
            if isinstance(doc, DocExternalAccess)
        }
        assert (
            expected_docs == actual_docs
        ), f"Expected docs: {expected_docs}\nActual docs: {actual_docs}"
    finally:
        db_session.rollback()


def test_jira_doc_sync_with_specific_permissions(
    db_session: Session,
    jira_connector_config: dict[str, Any],
    jira_credential_json: dict[str, Any],
) -> None:
    """Test that Jira doc sync returns documents with specific permissions.

    This test uses a project that has specific user permissions to verify
    that specific users are correctly extracted.
    """
    try:
        # Use SUP project which has specific user permissions
        connector_config = {
            **jira_connector_config,
            "project_key": "SUP",
        }

        connector = Connector(
            name="Test Jira Doc Sync with Groups Connector",
            source=DocumentSource.JIRA,
            input_type=InputType.POLL,
            connector_specific_config=connector_config,
            refresh_freq=None,
            prune_freq=None,
            indexing_start=None,
        )
        db_session.add(connector)
        db_session.flush()

        credential = Credential(
            source=DocumentSource.JIRA,
            credential_json=jira_credential_json,
        )
        db_session.add(credential)
        db_session.flush()
        # Expire the credential so it reloads from DB with SensitiveValue wrapper
        db_session.expire(credential)

        cc_pair = ConnectorCredentialPair(
            connector_id=connector.id,
            credential_id=credential.id,
            name="Test Jira Doc Sync with Groups CC Pair",
            status=ConnectorCredentialPairStatus.ACTIVE,
            access_type=AccessType.SYNC,
            auto_sync_options=None,
        )
        db_session.add(cc_pair)
        db_session.flush()
        db_session.refresh(cc_pair)

        # Mock functions
        def fetch_all_existing_docs_fn(
            sort_order: SortOrder | None = None,  # noqa: ARG001
        ) -> list[DocumentRow]:
            return []

        def fetch_all_existing_docs_ids_fn() -> list[str]:
            return []

        doc_sync_iter = jira_doc_sync(
            cc_pair=cc_pair,
            fetch_all_existing_docs_fn=fetch_all_existing_docs_fn,
            fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,
        )

        docs = list(doc_sync_iter)

        # SUP project should have user-specific permissions (not public)
        assert len(docs) > 0, "Expected at least one document from SUP project"

        _EXPECTED_USER_EMAILS = set(
            ["yuhong@onyx.app", "chris@onyx.app", "founders@onyx.app"]
        )
        _EXPECTED_USER_GROUP_IDS = set(["jira-users-danswerai"])

        for doc in docs:
            if not isinstance(doc, DocExternalAccess):
                continue
            assert doc.doc_id.startswith("https://danswerai.atlassian.net/browse/SUP-")
            # SUP project has specific users assigned, not applicationRole
            assert (
                not doc.external_access.is_public
            ), f"Document {doc.doc_id} should not be public"
            # Should have user emails
            assert doc.external_access.external_user_emails == _EXPECTED_USER_EMAILS
            assert (
                doc.external_access.external_user_group_ids == _EXPECTED_USER_GROUP_IDS
            )
    finally:
        db_session.rollback()


================================================
FILE: backend/tests/external_dependency_unit/connectors/jira/test_jira_group_sync.py
================================================
from typing import Any

import pytest
from sqlalchemy.orm import Session

from ee.onyx.external_permissions.jira.group_sync import jira_group_sync
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from shared_configs.contextvars import get_current_tenant_id
from tests.daily.connectors.confluence.models import ExternalUserGroupSet


# In order to get these tests to run, use the credentials from Bitwarden.
# Search up "ENV vars for local and Github tests", and find the Jira relevant key-value pairs.
# Required env vars: JIRA_USER_EMAIL, JIRA_API_TOKEN

pytestmark = pytest.mark.usefixtures("enable_ee")

# Expected groups from the danswerai.atlassian.net Jira instance
# Note: These groups are shared with Confluence since they're both Atlassian products
# App accounts (bots, integrations) are filtered out
_EXPECTED_JIRA_GROUPS = [
    ExternalUserGroupSet(
        id="Yuhong Only No Chris Allowed",
        user_emails={"yuhong@onyx.app"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="confluence-admins-danswerai",
        user_emails={"chris@onyx.app", "yuhong@onyx.app"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="confluence-user-access-admins-danswerai",
        user_emails={"hagen@danswer.ai"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="confluence-users-danswerai",
        user_emails={
            "chris@onyx.app",
            "founders@onyx.app",
            "hagen@danswer.ai",
            "pablo@onyx.app",
            "yuhong@onyx.app",
        },
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="jira-admins-danswerai",
        user_emails={"founders@onyx.app", "hagen@danswer.ai", "pablo@onyx.app"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="jira-user-access-admins-danswerai",
        user_emails={"hagen@danswer.ai"},
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="jira-users-danswerai",
        user_emails={
            "chris@onyx.app",
            "founders@onyx.app",
            "hagen@danswer.ai",
            "pablo@onyx.app",
        },
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="org-admins",
        user_emails={
            "chris@onyx.app",
            "founders@onyx.app",
            "yuhong@onyx.app",
        },
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="bitbucket-admins-onyxai",
        user_emails={"founders@onyx.app"},  # no Oauth, we skip "app" account in jira
        gives_anyone_access=False,
    ),
    ExternalUserGroupSet(
        id="bitbucket-users-onyxai",
        user_emails={"founders@onyx.app"},  # no Oauth, we skip "app" account in jira
        gives_anyone_access=False,
    ),
]


def test_jira_group_sync(
    db_session: Session,
    jira_connector_config: dict[str, Any],
    jira_credential_json: dict[str, Any],
) -> None:
    try:
        connector = Connector(
            name="Test Jira Connector",
            source=DocumentSource.JIRA,
            input_type=InputType.POLL,
            connector_specific_config=jira_connector_config,
            refresh_freq=None,
            prune_freq=None,
            indexing_start=None,
        )
        db_session.add(connector)
        db_session.flush()

        credential = Credential(
            source=DocumentSource.JIRA,
            credential_json=jira_credential_json,
        )
        db_session.add(credential)
        db_session.flush()
        # Expire the credential so it reloads from DB with SensitiveValue wrapper
        db_session.expire(credential)

        cc_pair = ConnectorCredentialPair(
            connector_id=connector.id,
            credential_id=credential.id,
            name="Test Jira CC Pair",
            status=ConnectorCredentialPairStatus.ACTIVE,
            access_type=AccessType.SYNC,
            auto_sync_options=None,
        )
        db_session.add(cc_pair)
        db_session.flush()
        db_session.refresh(cc_pair)

        tenant_id = get_current_tenant_id()
        group_sync_iter = jira_group_sync(
            tenant_id=tenant_id,
            cc_pair=cc_pair,
        )

        expected_groups = {group.id: group for group in _EXPECTED_JIRA_GROUPS}
        actual_groups = {
            group.id: ExternalUserGroupSet.from_model(external_user_group=group)
            for group in group_sync_iter
        }
        assert expected_groups == actual_groups
    finally:
        db_session.rollback()


================================================
FILE: backend/tests/external_dependency_unit/constants.py
================================================
TEST_TENANT_ID: str = "public"


================================================
FILE: backend/tests/external_dependency_unit/craft/conftest.py
================================================
"""Fixtures for build mode tests."""

from collections.abc import Generator
from uuid import uuid4

import pytest
from fastapi_users.password import PasswordHelper
from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.enums import AccountType
from onyx.db.enums import BuildSessionStatus
from onyx.db.models import BuildSession
from onyx.db.models import User
from onyx.db.models import UserRole
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from tests.external_dependency_unit.constants import TEST_TENANT_ID


@pytest.fixture(scope="function")
def db_session() -> Generator[Session, None, None]:
    """Create a database session for testing using the actual PostgreSQL database."""
    SqlEngine.init_engine(pool_size=10, max_overflow=5)
    with get_session_with_current_tenant() as session:
        yield session


@pytest.fixture(scope="function")
def tenant_context() -> Generator[None, None, None]:
    """Set up tenant context for testing."""
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
    try:
        yield
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


@pytest.fixture(scope="function")
def test_user(db_session: Session, tenant_context: None) -> User:  # noqa: ARG001
    """Create a test user for build session tests."""
    unique_email = f"build_test_{uuid4().hex[:8]}@example.com"

    password_helper = PasswordHelper()
    password = password_helper.generate()
    hashed_password = password_helper.hash(password)

    user = User(
        id=uuid4(),
        email=unique_email,
        hashed_password=hashed_password,
        is_active=True,
        is_superuser=False,
        is_verified=True,
        role=UserRole.EXT_PERM_USER,
        account_type=AccountType.EXT_PERM_USER,
    )
    db_session.add(user)
    db_session.commit()
    db_session.refresh(user)
    return user


@pytest.fixture(scope="function")
def build_session(
    db_session: Session,
    test_user: User,
    tenant_context: None,  # noqa: ARG001
) -> BuildSession:
    """Create a test build session."""
    session = BuildSession(
        id=uuid4(),
        user_id=test_user.id,
        name="Test Build Session",
        status=BuildSessionStatus.ACTIVE,
    )
    db_session.add(session)
    db_session.commit()
    db_session.refresh(session)
    return session


================================================
FILE: backend/tests/external_dependency_unit/craft/test_build_packet_storage.py
================================================
"""
Test suite for build mode packet storage.

Tests the new packet storage behavior:
- All data stored in message_metadata as JSON (no content column)
- turn_index tracks which user message each assistant message belongs to
- Tool calls: Only save when status="completed"
- Message/thought chunks: Accumulated and saved as synthetic packets
- Agent plan updates: Upserted (only latest kept per turn)
"""

from sqlalchemy.orm import Session

from onyx.configs.constants import MessageType
from onyx.db.models import BuildSession
from onyx.server.features.build.db.build_session import create_message
from onyx.server.features.build.db.build_session import get_session_messages
from onyx.server.features.build.db.build_session import upsert_agent_plan
from onyx.server.features.build.session.manager import BuildStreamingState


class TestBuildMessageStorage:
    """Tests for build message storage in the database."""

    def test_create_message_with_metadata(
        self,
        db_session: Session,
        build_session: BuildSession,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test creating a message with JSON metadata and turn_index."""
        user_message_metadata = {
            "type": "user_message",
            "content": {"type": "text", "text": "Hello, world!"},
        }

        message = create_message(
            session_id=build_session.id,
            message_type=MessageType.USER,
            turn_index=0,
            message_metadata=user_message_metadata,
            db_session=db_session,
        )

        assert message.id is not None
        assert message.session_id == build_session.id
        assert message.type == MessageType.USER
        assert message.turn_index == 0
        assert message.message_metadata == user_message_metadata
        assert message.message_metadata["type"] == "user_message"
        assert message.message_metadata["content"]["text"] == "Hello, world!"

    def test_create_multiple_messages_with_turn_index(
        self,
        db_session: Session,
        build_session: BuildSession,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test creating multiple messages with correct turn_index values."""
        # First user message (turn 0)
        create_message(
            session_id=build_session.id,
            message_type=MessageType.USER,
            turn_index=0,
            message_metadata={
                "type": "user_message",
                "content": {"type": "text", "text": "First question"},
            },
            db_session=db_session,
        )

        # Assistant response (turn 0)
        create_message(
            session_id=build_session.id,
            message_type=MessageType.ASSISTANT,
            turn_index=0,
            message_metadata={
                "type": "agent_message",
                "content": {"type": "text", "text": "First answer"},
            },
            db_session=db_session,
        )

        # Second user message (turn 1)
        create_message(
            session_id=build_session.id,
            message_type=MessageType.USER,
            turn_index=1,
            message_metadata={
                "type": "user_message",
                "content": {"type": "text", "text": "Second question"},
            },
            db_session=db_session,
        )

        # Assistant response (turn 1)
        create_message(
            session_id=build_session.id,
            message_type=MessageType.ASSISTANT,
            turn_index=1,
            message_metadata={
                "type": "agent_message",
                "content": {"type": "text", "text": "Second answer"},
            },
            db_session=db_session,
        )

        # Verify messages
        messages = get_session_messages(build_session.id, db_session)
        assert len(messages) == 4

        # Check turn indices
        turn_0_messages = [m for m in messages if m.turn_index == 0]
        turn_1_messages = [m for m in messages if m.turn_index == 1]

        assert len(turn_0_messages) == 2
        assert len(turn_1_messages) == 2

    def test_tool_call_completed_storage(
        self,
        db_session: Session,
        build_session: BuildSession,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test storing only completed tool calls."""
        # Create a user message first
        create_message(
            session_id=build_session.id,
            message_type=MessageType.USER,
            turn_index=0,
            message_metadata={
                "type": "user_message",
                "content": {"type": "text", "text": "Run a tool"},
            },
            db_session=db_session,
        )

        # Create a completed tool call
        tool_call_packet = {
            "type": "tool_call_progress",
            "toolCallId": "tool-123",
            "status": "completed",
            "kind": "bash",
            "title": "Running command",
            "rawOutput": "Command completed successfully",
            "timestamp": "2025-01-01T00:00:00Z",
        }

        message = create_message(
            session_id=build_session.id,
            message_type=MessageType.ASSISTANT,
            turn_index=0,
            message_metadata=tool_call_packet,
            db_session=db_session,
        )

        assert message.message_metadata["type"] == "tool_call_progress"
        assert message.message_metadata["status"] == "completed"
        assert message.message_metadata["toolCallId"] == "tool-123"

    def test_upsert_agent_plan(
        self,
        db_session: Session,
        build_session: BuildSession,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test upserting agent plan - only latest should be kept."""
        # Create a user message first
        create_message(
            session_id=build_session.id,
            message_type=MessageType.USER,
            turn_index=0,
            message_metadata={
                "type": "user_message",
                "content": {"type": "text", "text": "Create a plan"},
            },
            db_session=db_session,
        )

        # First plan
        plan1 = {
            "type": "agent_plan_update",
            "entries": [
                {"id": "1", "status": "pending", "content": "Step 1"},
            ],
            "timestamp": "2025-01-01T00:00:00Z",
        }

        plan_msg1 = upsert_agent_plan(
            session_id=build_session.id,
            turn_index=0,
            plan_metadata=plan1,
            db_session=db_session,
        )

        assert plan_msg1.message_metadata["entries"][0]["status"] == "pending"

        # Update plan with new status
        plan2 = {
            "type": "agent_plan_update",
            "entries": [
                {"id": "1", "status": "completed", "content": "Step 1"},
                {"id": "2", "status": "in_progress", "content": "Step 2"},
            ],
            "timestamp": "2025-01-01T00:01:00Z",
        }

        plan_msg2 = upsert_agent_plan(
            session_id=build_session.id,
            turn_index=0,
            plan_metadata=plan2,
            db_session=db_session,
            existing_plan_id=plan_msg1.id,
        )

        # Should be the same message, updated
        assert plan_msg2.id == plan_msg1.id
        assert len(plan_msg2.message_metadata["entries"]) == 2
        assert plan_msg2.message_metadata["entries"][0]["status"] == "completed"

        # Verify only one plan message exists for this turn
        messages = get_session_messages(build_session.id, db_session)
        plan_messages = [
            m for m in messages if m.message_metadata.get("type") == "agent_plan_update"
        ]
        assert len(plan_messages) == 1

    def test_upsert_agent_plan_without_existing_id(
        self,
        db_session: Session,
        build_session: BuildSession,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test upserting agent plan when we don't know the existing ID."""
        # Create a user message first
        create_message(
            session_id=build_session.id,
            message_type=MessageType.USER,
            turn_index=0,
            message_metadata={
                "type": "user_message",
                "content": {"type": "text", "text": "Create a plan"},
            },
            db_session=db_session,
        )

        # First plan - no existing ID
        plan1 = {
            "type": "agent_plan_update",
            "entries": [{"id": "1", "status": "pending", "content": "Step 1"}],
        }

        plan_msg1 = upsert_agent_plan(
            session_id=build_session.id,
            turn_index=0,
            plan_metadata=plan1,
            db_session=db_session,
        )

        # Second plan - still no existing ID, should find and update
        plan2 = {
            "type": "agent_plan_update",
            "entries": [{"id": "1", "status": "completed", "content": "Step 1"}],
        }

        plan_msg2 = upsert_agent_plan(
            session_id=build_session.id,
            turn_index=0,
            plan_metadata=plan2,
            db_session=db_session,
        )

        # Should be the same message
        assert plan_msg2.id == plan_msg1.id

    def test_streaming_flow_db_calls(
        self,
        db_session: Session,
        build_session: BuildSession,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test that streaming flow creates correct number of DB messages.

        Simulates:
        1. Agent message chunks -> 1 message
        2. Tool call -> 1 message
        3. Agent message chunks -> 1 message

        This verifies that we save parts of the turn as they finish, rather than
        buffering everything into one giant message or losing granularity.
        """
        # 0. Initial user message
        create_message(
            session_id=build_session.id,
            message_type=MessageType.USER,
            turn_index=0,
            message_metadata={
                "type": "user_message",
                "content": {"type": "text", "text": "Do something"},
            },
            db_session=db_session,
        )

        state = BuildStreamingState(turn_index=0)

        # 1. Stream agent message chunks
        state.add_message_chunk("Thinking")
        state.add_message_chunk(" about it...")

        # Simulate switch to tool call (e.g. ToolCallStart event) -> finalize message
        # In SessionManager, this happens via state.should_finalize_chunks()
        if state.should_finalize_chunks("tool_call_start"):
            msg_packet = state.finalize_message_chunks()
            if msg_packet:
                create_message(
                    session_id=build_session.id,
                    message_type=MessageType.ASSISTANT,
                    turn_index=0,
                    message_metadata=msg_packet,
                    db_session=db_session,
                )
        state.clear_last_chunk_type()

        # 2. Handle completed tool call (immediate save)
        tool_packet = {
            "type": "tool_call_progress",
            "toolCallId": "call_1",
            "status": "completed",
            "timestamp": "2025-01-01T00:00:00Z",
        }
        create_message(
            session_id=build_session.id,
            message_type=MessageType.ASSISTANT,
            turn_index=0,
            message_metadata=tool_packet,
            db_session=db_session,
        )

        # 3. Stream more agent message chunks
        state.add_message_chunk("Done")
        state.add_message_chunk(" with tool.")

        # End of stream -> finalize
        msg_packet = state.finalize_message_chunks()
        if msg_packet:
            create_message(
                session_id=build_session.id,
                message_type=MessageType.ASSISTANT,
                turn_index=0,
                message_metadata=msg_packet,
                db_session=db_session,
            )

        # Verify DB state
        messages = get_session_messages(build_session.id, db_session)
        # 1 user + 3 assistant = 4 total
        assert len(messages) == 4

        # Verify types/order
        assert messages[0].type == MessageType.USER

        assert messages[1].type == MessageType.ASSISTANT
        assert messages[1].message_metadata["content"]["text"] == "Thinking about it..."

        assert messages[2].type == MessageType.ASSISTANT
        assert messages[2].message_metadata["type"] == "tool_call_progress"

        assert messages[3].type == MessageType.ASSISTANT
        assert messages[3].message_metadata["content"]["text"] == "Done with tool."


class TestBuildStreamingState:
    """Tests for BuildStreamingState class."""

    def test_message_chunk_accumulation(self) -> None:
        """Test accumulating message chunks."""
        state = BuildStreamingState(turn_index=0)

        state.add_message_chunk("Hello, ")
        state.add_message_chunk("world!")

        packet = state.finalize_message_chunks()

        assert packet is not None
        assert packet["type"] == "agent_message"
        assert packet["content"]["text"] == "Hello, world!"

        # After finalize, chunks should be cleared
        assert len(state.message_chunks) == 0

    def test_thought_chunk_accumulation(self) -> None:
        """Test accumulating thought chunks."""
        state = BuildStreamingState(turn_index=0)

        state.add_thought_chunk("Thinking about ")
        state.add_thought_chunk("the problem...")

        packet = state.finalize_thought_chunks()

        assert packet is not None
        assert packet["type"] == "agent_thought"
        assert packet["content"]["text"] == "Thinking about the problem..."

    def test_should_finalize_chunks_on_type_change(self) -> None:
        """Test detection of when to finalize chunks."""
        state = BuildStreamingState(turn_index=0)

        # Add message chunk
        state.add_message_chunk("Hello")

        # Should finalize when receiving non-message packet
        assert state.should_finalize_chunks("tool_call_start") is True
        assert state.should_finalize_chunks("agent_plan_update") is True
        assert state.should_finalize_chunks("agent_thought_chunk") is True

        # Should NOT finalize for same type
        assert state.should_finalize_chunks("agent_message_chunk") is False

    def test_finalize_returns_none_when_empty(self) -> None:
        """Test that finalize returns None when no chunks accumulated."""
        state = BuildStreamingState(turn_index=0)

        assert state.finalize_message_chunks() is None
        assert state.finalize_thought_chunks() is None


================================================
FILE: backend/tests/external_dependency_unit/craft/test_file_upload.py
================================================
"""Tests for file upload functionality in build sessions.

Tests the file upload and delete operations for pre-provisioned sessions,
including limit enforcement and SandboxManager delegation.
"""

from __future__ import annotations

from collections.abc import Generator
from typing import TYPE_CHECKING
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.db.enums import BuildSessionStatus
from onyx.db.enums import SandboxStatus
from onyx.db.models import BuildSession
from onyx.db.models import Sandbox
from onyx.db.models import User
from onyx.server.features.build.configs import ATTACHMENTS_DIRECTORY
from onyx.server.features.build.configs import MAX_TOTAL_UPLOAD_SIZE_BYTES
from onyx.server.features.build.configs import MAX_UPLOAD_FILES_PER_SESSION
from onyx.server.features.build.session.manager import UploadLimitExceededError

if TYPE_CHECKING:
    from onyx.server.features.build.session.manager import SessionManager


@pytest.fixture(scope="function")
def sandbox(
    db_session: Session,
    test_user: User,
    tenant_context: None,  # noqa: ARG001
) -> Sandbox:
    """Create a test sandbox for the user (sandboxes are per-user, not per-session)."""
    sandbox = Sandbox(
        id=uuid4(),
        user_id=test_user.id,
        status=SandboxStatus.RUNNING,
    )
    db_session.add(sandbox)
    db_session.commit()
    db_session.refresh(sandbox)
    return sandbox


@pytest.fixture(scope="function")
def build_session_with_user(
    db_session: Session,
    test_user: User,
    sandbox: Sandbox,  # noqa: ARG001
    tenant_context: None,  # noqa: ARG001
) -> BuildSession:
    """Create a test build session for a user who has a sandbox."""
    session = BuildSession(
        id=uuid4(),
        user_id=test_user.id,
        name="Test Build Session",
        status=BuildSessionStatus.ACTIVE,
    )
    db_session.add(session)
    db_session.commit()
    db_session.refresh(session)
    return session


@pytest.fixture(scope="function")
def mock_sandbox_manager() -> MagicMock:
    """Create a mock sandbox manager."""
    return MagicMock()


@pytest.fixture(scope="function")
def session_manager_with_mock(
    db_session: Session, mock_sandbox_manager: MagicMock
) -> Generator["SessionManager", None, None]:
    """Create a SessionManager with mocked sandbox manager."""
    # Import here to avoid module-level initialization issues
    with patch(
        "onyx.server.features.build.session.manager.get_sandbox_manager",
        return_value=mock_sandbox_manager,
    ):
        from onyx.server.features.build.session.manager import SessionManager

        manager = SessionManager(db_session)
        yield manager


class TestFileUpload:
    """Tests for file upload functionality."""

    def test_upload_file_delegates_to_sandbox_manager(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that uploading a file delegates to the sandbox manager."""
        # Configure mocks
        mock_sandbox_manager.get_upload_stats.return_value = (0, 0)
        mock_sandbox_manager.upload_file.return_value = (
            f"{ATTACHMENTS_DIRECTORY}/test.txt"
        )

        # Upload a file
        content = b"Hello, World!"
        relative_path, size = session_manager_with_mock.upload_file(
            session_id=build_session_with_user.id,
            user_id=test_user.id,
            filename="test.txt",
            content=content,
        )

        # Verify the sandbox manager was called correctly
        mock_sandbox_manager.upload_file.assert_called_once_with(
            sandbox_id=sandbox.id,
            session_id=build_session_with_user.id,
            filename="test.txt",
            content=content,
        )
        assert relative_path == f"{ATTACHMENTS_DIRECTORY}/test.txt"
        assert size == len(content)

    def test_upload_file_returns_correct_path(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that upload returns the correct relative path."""
        mock_sandbox_manager.get_upload_stats.return_value = (0, 0)
        mock_sandbox_manager.upload_file.return_value = (
            f"{ATTACHMENTS_DIRECTORY}/document.pdf"
        )

        relative_path, size = session_manager_with_mock.upload_file(
            session_id=build_session_with_user.id,
            user_id=test_user.id,
            filename="document.pdf",
            content=b"PDF content",
        )

        assert relative_path == f"{ATTACHMENTS_DIRECTORY}/document.pdf"
        assert size == 11  # len("PDF content")

    def test_upload_file_session_not_found(
        self,
        test_user: User,
        sandbox: Sandbox,  # noqa: ARG002
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that uploading to a non-existent session raises ValueError."""
        with pytest.raises(ValueError, match="Session not found"):
            session_manager_with_mock.upload_file(
                session_id=uuid4(),  # Non-existent session
                user_id=test_user.id,
                filename="test.txt",
                content=b"content",
            )


class TestFileUploadLimits:
    """Tests for file upload limit enforcement."""

    def test_upload_file_count_limit_enforced(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that exceeding the file count limit raises an error."""
        # Mock get_upload_stats to return max files already uploaded
        mock_sandbox_manager.get_upload_stats.return_value = (
            MAX_UPLOAD_FILES_PER_SESSION,
            1000,
        )

        # Try to upload one more file
        with pytest.raises(UploadLimitExceededError, match="Maximum number of files"):
            session_manager_with_mock.upload_file(
                session_id=build_session_with_user.id,
                user_id=test_user.id,
                filename="one_too_many.txt",
                content=b"content",
            )

        # Verify upload_file was NOT called (limit check happens before)
        mock_sandbox_manager.upload_file.assert_not_called()

    def test_upload_total_size_limit_enforced(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that exceeding the total size limit raises an error."""
        # Mock get_upload_stats to return almost at the limit
        existing_size = MAX_TOTAL_UPLOAD_SIZE_BYTES - 100  # 100 bytes under limit
        mock_sandbox_manager.get_upload_stats.return_value = (1, existing_size)

        # Try to upload a file that would exceed the limit
        with pytest.raises(UploadLimitExceededError, match="Total upload size limit"):
            session_manager_with_mock.upload_file(
                session_id=build_session_with_user.id,
                user_id=test_user.id,
                filename="over_limit.txt",
                content=b"x" * 200,  # 200 bytes, would exceed by 100
            )

        # Verify upload_file was NOT called (limit check happens before)
        mock_sandbox_manager.upload_file.assert_not_called()

    def test_upload_succeeds_when_under_limits(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that upload succeeds when under limits."""
        # Mock get_upload_stats to return well under limits
        mock_sandbox_manager.get_upload_stats.return_value = (5, 1000)
        mock_sandbox_manager.upload_file.return_value = (
            f"{ATTACHMENTS_DIRECTORY}/test.txt"
        )

        relative_path, size = session_manager_with_mock.upload_file(
            session_id=build_session_with_user.id,
            user_id=test_user.id,
            filename="test.txt",
            content=b"content",
        )

        # Verify upload_file was called
        mock_sandbox_manager.upload_file.assert_called_once()
        assert relative_path == f"{ATTACHMENTS_DIRECTORY}/test.txt"


class TestFileDelete:
    """Tests for file delete functionality."""

    def test_delete_file_delegates_to_sandbox_manager(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that delete file delegates to the sandbox manager."""
        mock_sandbox_manager.delete_file.return_value = True

        result = session_manager_with_mock.delete_file(
            session_id=build_session_with_user.id,
            user_id=test_user.id,
            path=f"{ATTACHMENTS_DIRECTORY}/test.txt",
        )

        assert result is True
        mock_sandbox_manager.delete_file.assert_called_once_with(
            sandbox_id=sandbox.id,
            session_id=build_session_with_user.id,
            path=f"{ATTACHMENTS_DIRECTORY}/test.txt",
        )

    def test_delete_file_returns_false_when_not_found(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that delete returns False when file doesn't exist."""
        mock_sandbox_manager.delete_file.return_value = False

        result = session_manager_with_mock.delete_file(
            session_id=build_session_with_user.id,
            user_id=test_user.id,
            path=f"{ATTACHMENTS_DIRECTORY}/nonexistent.txt",
        )

        assert result is False

    def test_delete_file_session_not_found(
        self,
        test_user: User,
        sandbox: Sandbox,  # noqa: ARG002
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that deleting from a non-existent session raises ValueError."""
        with pytest.raises(ValueError, match="Session not found"):
            session_manager_with_mock.delete_file(
                session_id=uuid4(),  # Non-existent session
                user_id=test_user.id,
                path=f"{ATTACHMENTS_DIRECTORY}/test.txt",
            )


class TestPathSanitization:
    """Tests for path sanitization in delete operations."""

    def test_delete_file_rejects_path_traversal(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that paths with .. are rejected."""
        # Configure mock to raise ValueError (simulating sandbox manager behavior)
        mock_sandbox_manager.delete_file.side_effect = ValueError(
            "Invalid path: potential path traversal detected"
        )

        with pytest.raises(ValueError, match="path traversal"):
            session_manager_with_mock.delete_file(
                session_id=build_session_with_user.id,
                user_id=test_user.id,
                path="../../../etc/passwd",
            )

    def test_delete_file_rejects_url_encoded_traversal(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that URL-encoded paths are rejected."""
        mock_sandbox_manager.delete_file.side_effect = ValueError(
            "Invalid path: potential path traversal detected"
        )

        with pytest.raises(ValueError, match="path traversal"):
            session_manager_with_mock.delete_file(
                session_id=build_session_with_user.id,
                user_id=test_user.id,
                path="attachments/%2e%2e/secret.txt",
            )

    def test_delete_file_rejects_shell_metacharacters(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that shell metacharacters are rejected."""
        mock_sandbox_manager.delete_file.side_effect = ValueError(
            "Invalid path: contains disallowed characters"
        )

        dangerous_paths = [
            "attachments/file;rm -rf /",
            "attachments/file|cat /etc/passwd",
            "attachments/file`whoami`",
            "attachments/file$(id)",
            "attachments/file'test",
        ]

        for dangerous_path in dangerous_paths:
            with pytest.raises(ValueError, match="disallowed characters"):
                session_manager_with_mock.delete_file(
                    session_id=build_session_with_user.id,
                    user_id=test_user.id,
                    path=dangerous_path,
                )
            # Reset mock for next iteration
            mock_sandbox_manager.delete_file.reset_mock()

    def test_delete_file_rejects_null_bytes(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that null bytes in paths are rejected."""
        mock_sandbox_manager.delete_file.side_effect = ValueError(
            "Invalid path: potential path traversal detected"
        )

        with pytest.raises(ValueError, match="path traversal"):
            session_manager_with_mock.delete_file(
                session_id=build_session_with_user.id,
                user_id=test_user.id,
                path="attachments/file.txt\x00.jpg",
            )


class TestFilenameCollision:
    """Tests for filename collision handling."""

    def test_upload_returns_collision_handled_path(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,  # noqa: ARG002
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that sandbox manager can return a renamed path for collisions."""
        # Simulate sandbox manager handling collision by returning renamed path
        mock_sandbox_manager.get_upload_stats.return_value = (1, 100)  # 1 existing file
        mock_sandbox_manager.upload_file.return_value = (
            f"{ATTACHMENTS_DIRECTORY}/document_1.pdf"
        )

        relative_path, size = session_manager_with_mock.upload_file(
            session_id=build_session_with_user.id,
            user_id=test_user.id,
            filename="document.pdf",
            content=b"PDF content",
        )

        # Verify the collision-handled path is returned
        assert relative_path == f"{ATTACHMENTS_DIRECTORY}/document_1.pdf"


class TestGetUploadStats:
    """Tests for get_upload_stats functionality."""

    def test_get_upload_stats_delegates_to_sandbox_manager(
        self,
        test_user: User,
        build_session_with_user: BuildSession,
        sandbox: Sandbox,
        mock_sandbox_manager: MagicMock,
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that get_upload_stats delegates to the sandbox manager."""
        mock_sandbox_manager.get_upload_stats.return_value = (3, 1500)

        file_count, total_size = session_manager_with_mock.get_upload_stats(
            session_id=build_session_with_user.id,
            user_id=test_user.id,
        )

        # Verify the sandbox manager was called correctly
        mock_sandbox_manager.get_upload_stats.assert_called_once_with(
            sandbox_id=sandbox.id,
            session_id=build_session_with_user.id,
        )
        assert file_count == 3
        assert total_size == 1500

    def test_get_upload_stats_session_not_found(
        self,
        test_user: User,
        sandbox: Sandbox,  # noqa: ARG002
        session_manager_with_mock: "SessionManager",
    ) -> None:
        """Test that getting stats for non-existent session raises ValueError."""
        with pytest.raises(ValueError, match="Session not found"):
            session_manager_with_mock.get_upload_stats(
                session_id=uuid4(),  # Non-existent session
                user_id=test_user.id,
            )


================================================
FILE: backend/tests/external_dependency_unit/craft/test_kubernetes_sandbox.py
================================================
"""Integration test for KubernetesSandboxManager.provision().

This test requires:
- A running Kubernetes cluster (kind, minikube, or real cluster)
- The SANDBOX_BACKEND=kubernetes environment variable
- The sandbox namespace to exist (default: onyx-sandboxes)
- Service accounts for sandbox (sandbox-runner, sandbox-file-sync)

Run with:
    SANDBOX_BACKEND=kubernetes python -m dotenv -f .vscode/.env run -- \
        pytest backend/tests/integration/tests/build/test_kubernetes_sandbox_provision.py -v
"""

import time
from uuid import UUID
from uuid import uuid4

import pytest
from kubernetes import client  # type: ignore[import-untyped]
from kubernetes import config
from kubernetes.client.rest import ApiException  # type: ignore[import-untyped]
from kubernetes.stream import stream as k8s_stream  # type: ignore[import-untyped]

from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.enums import SandboxStatus
from onyx.server.features.build.configs import SANDBOX_BACKEND
from onyx.server.features.build.configs import SANDBOX_NAMESPACE
from onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_START
from onyx.server.features.build.configs import SandboxBackend
from onyx.server.features.build.sandbox.base import ACPEvent
from onyx.server.features.build.sandbox.kubernetes.kubernetes_sandbox_manager import (
    KubernetesSandboxManager,
)
from onyx.server.features.build.sandbox.models import LLMProviderConfig
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

logger = setup_logger()

# Test constants
TEST_TENANT_ID = "test-tenant"
TEST_USER_ID = UUID("ee0dd46a-23dc-4128-abab-6712b3f4464c")


def _is_kubernetes_available() -> None:
    """Check if Kubernetes is available and configured."""
    try:
        config.load_incluster_config()
    except config.ConfigException:
        config.load_kube_config()

    v1 = client.CoreV1Api()
    # List pods in sandbox namespace instead of namespaces (avoids cluster-scope permissions)
    v1.list_namespaced_pod(SANDBOX_NAMESPACE, limit=1)


def _get_kubernetes_client() -> client.CoreV1Api:
    """Get a configured Kubernetes CoreV1Api client."""
    try:
        config.load_incluster_config()
    except config.ConfigException:
        config.load_kube_config()
    return client.CoreV1Api()


@pytest.mark.skipif(
    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,
    reason="SANDBOX_BACKEND must be 'kubernetes' to run this test",
)
def test_kubernetes_sandbox_provision() -> None:
    """Test that provision() creates a sandbox pod and DB record successfully.

    This is a happy path test that:
    1. Creates a BuildSession in the database
    2. Calls provision() to create a Kubernetes pod
    3. Verifies the sandbox is created with RUNNING status
    4. Cleans up by terminating the sandbox
    """
    _is_kubernetes_available()

    # Initialize the database engine
    SqlEngine.init_engine(pool_size=10, max_overflow=5)

    # Set up tenant context (required for multi-tenant operations)
    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)

    # Get the manager instance
    manager = KubernetesSandboxManager()

    sandbox_id = uuid4()

    # Create a test LLM config (values don't matter for this test)
    llm_config = LLMProviderConfig(
        provider="openai",
        model_name="gpt-4",
        api_key="test-key",
        api_base=None,
    )

    try:
        # Call provision
        sandbox_info = manager.provision(
            sandbox_id=sandbox_id,
            user_id=TEST_USER_ID,
            tenant_id=TEST_TENANT_ID,
            llm_config=llm_config,
        )

        # Verify the return value
        assert sandbox_info.sandbox_id == sandbox_id
        assert sandbox_info.status == SandboxStatus.RUNNING
        assert sandbox_info.directory_path.startswith("k8s://")

        # Verify Kubernetes resources exist
        k8s_client = _get_kubernetes_client()
        pod_name = f"sandbox-{str(sandbox_id)[:8]}"
        service_name = pod_name

        # Verify pod exists and is running
        pod = k8s_client.read_namespaced_pod(
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
        )
        assert pod is not None
        assert pod.status.phase == "Running"

        # Verify service exists
        service = k8s_client.read_namespaced_service(
            name=service_name,
            namespace=SANDBOX_NAMESPACE,
        )
        assert service is not None
        assert service.spec.type == "ClusterIP"

        # Verify /workspace/templates/outputs directory exists and contains expected files
        exec_command = ["/bin/sh", "-c", "ls -la /workspace/templates/outputs"]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        assert resp is not None
        print(f"DEBUG: Contents of /workspace/templates/outputs:\n{resp}")
        assert (
            "web" in resp
        ), f"/workspace/templates/outputs should contain web directory. Actual contents:\n{resp}"

        # Verify /workspace/templates/outputs/web/AGENTS.md file exists
        exec_command = [
            "/bin/sh",
            "-c",
            "cat /workspace/templates/outputs/web/AGENTS.md",
        ]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        assert resp is not None
        assert (
            len(resp) > 0
        ), "/workspace/templates/outputs/web/AGENTS.md file should not be empty"
        # Verify it contains expected content
        assert (
            "Agent" in resp or "Instructions" in resp or "#" in resp
        ), "/workspace/templates/outputs/web/AGENTS.md should contain agent instructions"

        # Verify /workspace/files directory exists and contains expected files
        exec_command = ["/bin/sh", "-c", "find /workspace/files -type f | wc -l"]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        assert resp is not None
        file_count = int(resp.strip())
        assert (
            file_count == 1099
        ), f"/workspace/files should contain 1099 files, but found {file_count}"

        # start session
        session_id = uuid4()
        manager.setup_session_workspace(
            sandbox_id=sandbox_id,
            session_id=session_id,
            llm_config=llm_config,
            nextjs_port=SANDBOX_NEXTJS_PORT_START,
            file_system_path=None,
            snapshot_path=None,
            user_name="Test User",
            user_role="Test Role",
        )

        # Verify AGENTS.md file exists for the session
        exec_command = [
            "/bin/sh",
            "-c",
            f"cat /workspace/sessions/{session_id}/AGENTS.md",
        ]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        assert resp is not None
        assert len(resp) > 0, "AGENTS.md file should not be empty"
        # Verify it contains expected content (from template or default)
        assert "Agent" in resp or "Instructions" in resp or "#" in resp
        assert "Test User" in resp
        assert "Test Role" in resp

        # Verify opencode.json file exists for the session
        exec_command = [
            "/bin/sh",
            "-c",
            f"cat /workspace/sessions/{session_id}/opencode.json",
        ]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        assert resp is not None
        assert len(resp) > 0, "opencode.json file should not be empty"

        # verify that the outputs directory is copied over
        exec_command = [
            "/bin/sh",
            "-c",
            f"ls -la /workspace/sessions/{session_id}/outputs",
        ]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        assert resp is not None
        assert len(resp) > 0, "outputs directory should not be empty"
        assert "web" in resp, "outputs directory should contain web directory"

    finally:
        # Clean up: terminate the sandbox (no longer needs db_session)
        if sandbox_id:
            manager.terminate(sandbox_id)

            # Verify Kubernetes resources are cleaned up
            k8s_client = _get_kubernetes_client()
            pod_name = f"sandbox-{str(sandbox_id)[:8]}"

            # Give K8s a moment to delete resources
            time.sleep(2)

            # Verify pod is deleted (or being deleted)
            try:
                pod = k8s_client.read_namespaced_pod(
                    name=pod_name,
                    namespace=SANDBOX_NAMESPACE,
                )
                # Pod might still exist but be terminating
                assert pod.metadata.deletion_timestamp is not None
            except ApiException as e:
                # 404 means pod was successfully deleted
                assert e.status == 404


@pytest.mark.skipif(
    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,
    reason="SANDBOX_BACKEND must be 'kubernetes' to run this test",
)
def test_kubernetes_sandbox_send_message() -> None:
    """Test that send_message() communicates with the sandbox agent successfully.

    This test:
    1. Creates a sandbox pod
    2. Sends a simple message via send_message()
    3. Verifies we receive ACP events back (agent responses)
    4. Cleans up by terminating the sandbox
    """
    from acp.schema import AgentMessageChunk
    from acp.schema import Error
    from acp.schema import PromptResponse

    _is_kubernetes_available()

    # Initialize the database engine
    SqlEngine.init_engine(pool_size=10, max_overflow=5)

    # Set up tenant context (required for multi-tenant operations)
    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)

    # Get the manager instance
    manager = KubernetesSandboxManager()

    sandbox_id = uuid4()
    session_id = uuid4()

    # Create a test LLM config (values don't matter for this test)
    llm_config = LLMProviderConfig(
        provider="openai",
        model_name="gpt-4",
        api_key="test-key",
        api_base=None,
    )

    try:
        # Provision the sandbox
        sandbox_info = manager.provision(
            sandbox_id=sandbox_id,
            user_id=TEST_USER_ID,
            tenant_id=TEST_TENANT_ID,
            llm_config=llm_config,
        )

        assert sandbox_info.status == SandboxStatus.RUNNING

        # Verify health check passes before sending message
        is_healthy = False
        for _ in range(10):
            is_healthy = manager.health_check(sandbox_id)
            if is_healthy:
                break
            time.sleep(10)

        assert is_healthy, "Sandbox agent should be healthy before sending messages"
        print("DEBUG: Sandbox agent is healthy")

        manager.setup_session_workspace(
            sandbox_id, session_id, llm_config, nextjs_port=SANDBOX_NEXTJS_PORT_START
        )

        # Send a simple message
        events: list[ACPEvent] = []
        for event in manager.send_message(sandbox_id, session_id, "What is 2 + 2?"):
            events.append(event)

        # Verify we received events
        assert len(events) > 0, "Should receive at least one event from send_message"

        for event in events:
            print(f"Recieved event: {event}")

        # Check for errors
        errors = [e for e in events if isinstance(e, Error)]
        assert len(errors) == 0, f"Should not receive errors: {errors}"

        # Verify we received some agent message content or a final response
        message_chunks = [e for e in events if isinstance(e, AgentMessageChunk)]
        prompt_responses = [e for e in events if isinstance(e, PromptResponse)]

        assert (
            len(message_chunks) > 0 or len(prompt_responses) > 0
        ), "Should receive either AgentMessageChunk or PromptResponse events"

        # If we got a PromptResponse, verify it completed successfully
        if prompt_responses:
            final_response = prompt_responses[-1]
            assert (
                final_response.stop_reason is not None
            ), "PromptResponse should have a stop_reason"

    finally:
        # Clean up: terminate the sandbox
        if sandbox_id:
            manager.terminate(sandbox_id)

            # Verify Kubernetes resources are cleaned up
            k8s_client = _get_kubernetes_client()
            pod_name = f"sandbox-{str(sandbox_id)[:8]}"

            # Give K8s a moment to delete resources
            time.sleep(2)

            # Verify pod is deleted (or being deleted)
            try:
                pod = k8s_client.read_namespaced_pod(
                    name=pod_name,
                    namespace=SANDBOX_NAMESPACE,
                )
                # Pod might still exist but be terminating
                assert pod.metadata.deletion_timestamp is not None
            except ApiException as e:
                # 404 means pod was successfully deleted
                assert e.status == 404


@pytest.mark.skipif(
    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,
    reason="SANDBOX_BACKEND must be 'kubernetes' to run this test",
)
def test_kubernetes_sandbox_webapp_passthrough() -> None:
    """Test that the webapp passthrough (Next.js server) is accessible in the sandbox.

    This test:
    1. Creates a sandbox pod
    2. Sets up a session workspace
    3. Verifies the Next.js server is running and accessible within the pod
    4. Verifies get_nextjs_url returns the correct cluster URL format
    5. Cleans up by terminating the sandbox
    """
    _is_kubernetes_available()

    # Initialize the database engine
    SqlEngine.init_engine(pool_size=10, max_overflow=5)

    # Set up tenant context (required for multi-tenant operations)
    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)

    # Get the manager instance
    manager = KubernetesSandboxManager()

    sandbox_id = uuid4()
    session_id = uuid4()

    # Create a test LLM config
    llm_config = LLMProviderConfig(
        provider="openai",
        model_name="gpt-4",
        api_key="test-key",
        api_base=None,
    )

    try:
        # Provision the sandbox
        sandbox_info = manager.provision(
            sandbox_id=sandbox_id,
            user_id=TEST_USER_ID,
            tenant_id=TEST_TENANT_ID,
            llm_config=llm_config,
        )

        assert sandbox_info.status == SandboxStatus.RUNNING

        # Verify health check passes before testing webapp
        is_healthy = False
        for _ in range(10):
            is_healthy = manager.health_check(sandbox_id)
            if is_healthy:
                break
            time.sleep(10)

        assert is_healthy, "Sandbox should be healthy before testing webapp passthrough"
        print("DEBUG: Sandbox is healthy")

        # Set up session workspace
        manager.setup_session_workspace(
            sandbox_id=sandbox_id,
            session_id=session_id,
            llm_config=llm_config,
            nextjs_port=SANDBOX_NEXTJS_PORT_START,
            file_system_path=None,
            snapshot_path=None,
            user_name="Test User",
            user_role="Test Role",
        )

        # Get Kubernetes client for exec operations
        k8s_client = _get_kubernetes_client()
        pod_name = f"sandbox-{str(sandbox_id)[:8]}"

        # Wait for Next.js server to be ready (it may take a few seconds to start)
        # The session uses the first port in the configured range
        test_nextjs_port = SANDBOX_NEXTJS_PORT_START
        nextjs_ready = False
        for attempt in range(30):
            exec_command = [
                "/bin/sh",
                "-c",
                (
                    f"curl -s -o /dev/null -w '%{{http_code}}' http://localhost:{test_nextjs_port}/ 2>/dev/null || echo 'failed'"
                ),
            ]
            resp = k8s_stream(
                k8s_client.connect_get_namespaced_pod_exec,
                name=pod_name,
                namespace=SANDBOX_NAMESPACE,
                container="sandbox",
                command=exec_command,
                stderr=True,
                stdin=False,
                stdout=True,
                tty=False,
            )
            print(f"DEBUG: Next.js health check attempt {attempt + 1}: {resp}")
            if resp and resp.strip() in ("200", "304"):
                nextjs_ready = True
                break
            time.sleep(2)

        assert (
            nextjs_ready
        ), f"Next.js server should be accessible at localhost:{SANDBOX_NEXTJS_PORT_START}"
        print("DEBUG: Next.js server is ready")

        # Verify we can fetch actual content from the Next.js server
        exec_command = [
            "/bin/sh",
            "-c",
            f"curl -s http://localhost:{SANDBOX_NEXTJS_PORT_START}/ | head -c 500",
        ]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        assert resp is not None, "Should receive content from Next.js server"
        assert len(resp) > 0, "Next.js server response should not be empty"
        # Basic check that it looks like HTML
        assert (
            "<" in resp or "html" in resp.lower() or "<!doctype" in resp.lower()
        ), f"Response should be HTML content. Got: {resp[:200]}"
        print(f"DEBUG: Next.js server returned content (first 200 chars): {resp[:200]}")

        # Verify get_nextjs_url returns correctly formatted cluster URL
        nextjs_url = manager.get_webapp_url(sandbox_id, test_nextjs_port)
        expected_service_name = f"sandbox-{str(sandbox_id)[:8]}"
        expected_url_pattern = (
            f"http://{expected_service_name}.{SANDBOX_NAMESPACE}.svc.cluster.local:"
        )
        assert nextjs_url.startswith(
            expected_url_pattern
        ), f"Next.js URL should follow cluster service format. Expected to start with: {expected_url_pattern}, Got: {nextjs_url}"
        assert (
            str(SANDBOX_NEXTJS_PORT_START) in nextjs_url
        ), f"Next.js URL should contain port {SANDBOX_NEXTJS_PORT_START}. Got: {nextjs_url}"
        print(f"DEBUG: get_nextjs_url returned: {nextjs_url}")

        # Verify the service is accessible via the cluster URL from within the pod
        exec_command = [
            "/bin/sh",
            "-c",
            f"curl -s -o /dev/null -w '%{{http_code}}' {nextjs_url}/ 2>/dev/null || echo 'failed'",
        ]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        print(f"DEBUG: Cluster URL health check response: {resp}")
        assert resp and resp.strip() in (
            "200",
            "304",
        ), f"Next.js server should be accessible via cluster URL {nextjs_url}. Got response: {resp}"

    finally:
        # Clean up: terminate the sandbox
        if sandbox_id:
            manager.terminate(sandbox_id)

            # Verify Kubernetes resources are cleaned up
            k8s_client = _get_kubernetes_client()
            pod_name = f"sandbox-{str(sandbox_id)[:8]}"

            # Give K8s a moment to delete resources
            time.sleep(2)

            # Verify pod is deleted (or being deleted)
            try:
                pod = k8s_client.read_namespaced_pod(
                    name=pod_name,
                    namespace=SANDBOX_NAMESPACE,
                )
                # Pod might still exist but be terminating
                assert pod.metadata.deletion_timestamp is not None
            except ApiException as e:
                # 404 means pod was successfully deleted
                assert e.status == 404


@pytest.mark.skipif(
    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,
    reason="SANDBOX_BACKEND must be 'kubernetes' to run this test",
)
def test_kubernetes_sandbox_file_sync() -> None:
    """Test that sync_files() triggers S3 sync in the file-sync sidecar.

    This test:
    1. Creates a sandbox pod (which now has file-sync as sidecar)
    2. Verifies the file-sync sidecar is running
    3. Calls sync_files() to trigger S3 sync
    4. Verifies the sync command executes successfully
    5. Cleans up by terminating the sandbox
    """
    _is_kubernetes_available()

    # Initialize the database engine
    SqlEngine.init_engine(pool_size=10, max_overflow=5)

    # Set up tenant context (required for multi-tenant operations)
    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)

    # Get the manager instance
    manager = KubernetesSandboxManager()

    sandbox_id = uuid4()

    # Create a test LLM config
    llm_config = LLMProviderConfig(
        provider="openai",
        model_name="gpt-4",
        api_key="test-key",
        api_base=None,
    )

    try:
        # Provision the sandbox
        sandbox_info = manager.provision(
            sandbox_id=sandbox_id,
            user_id=TEST_USER_ID,
            tenant_id=TEST_TENANT_ID,
            llm_config=llm_config,
        )

        assert sandbox_info.status == SandboxStatus.RUNNING

        # Verify the pod is running
        k8s_client = _get_kubernetes_client()
        pod_name = f"sandbox-{str(sandbox_id)[:8]}"
        pod = k8s_client.read_namespaced_pod(
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
        )
        assert pod is not None
        assert pod.status.phase == "Running"

        # Verify file-sync sidecar container is running
        # With sidecar model, file-sync should be a regular container (not init)
        container_statuses = pod.status.container_statuses or []
        file_sync_status = next(
            (c for c in container_statuses if c.name == "file-sync"),
            None,
        )
        assert file_sync_status is not None, "file-sync sidecar container should exist"
        assert file_sync_status.ready, "file-sync sidecar container should be ready"
        print(f"DEBUG: file-sync container status: {file_sync_status}")

        # Wipe the /workspace/files directory to ensure files we find are from the sync
        exec_command = ["/bin/sh", "-c", "rm -rf /workspace/files/*"]
        k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="file-sync",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        print("DEBUG: Wiped /workspace/files directory")

        # Verify the directory is empty
        exec_command = ["/bin/sh", "-c", "find /workspace/files -type f | wc -l"]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        file_count = int(resp.strip()) if resp else 0
        assert (
            file_count == 0
        ), f"/workspace/files should be empty before sync, found {file_count} files"
        print("DEBUG: Verified /workspace/files is empty")

        # Call sync_files() to trigger S3 sync
        result = manager.sync_files(
            sandbox_id=sandbox_id,
            user_id=TEST_USER_ID,
            tenant_id=TEST_TENANT_ID,
        )
        assert result is True, "sync_files() should return True on success"
        print("DEBUG: sync_files() completed successfully")

        # Verify /workspace/files exists and has files synced from S3
        # (verifies the shared volume is working and sync actually transferred files)
        exec_command = ["/bin/sh", "-c", "find /workspace/files -type f | wc -l"]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="sandbox",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        assert resp is not None, "/workspace/files should be accessible from sandbox"
        file_count = int(resp.strip()) if resp else 0
        assert (
            file_count > 0
        ), f"sync_files() should have synced files, but found {file_count} files"
        print(f"DEBUG: sync_files() synced {file_count} files to /workspace/files")

        # Also verify we can exec into file-sync sidecar directly
        exec_command = ["/bin/sh", "-c", "ls -la /workspace/files"]
        resp = k8s_stream(
            k8s_client.connect_get_namespaced_pod_exec,
            name=pod_name,
            namespace=SANDBOX_NAMESPACE,
            container="file-sync",
            command=exec_command,
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
        assert resp is not None, "/workspace/files should be accessible from file-sync"
        print(f"DEBUG: Contents of /workspace/files (from file-sync sidecar):\n{resp}")

    finally:
        # Clean up: terminate the sandbox
        if sandbox_id:
            manager.terminate(sandbox_id)

            # Verify Kubernetes resources are cleaned up
            k8s_client = _get_kubernetes_client()
            pod_name = f"sandbox-{str(sandbox_id)[:8]}"

            # Give K8s a moment to delete resources
            time.sleep(2)

            # Verify pod is deleted (or being deleted)
            try:
                pod = k8s_client.read_namespaced_pod(
                    name=pod_name,
                    namespace=SANDBOX_NAMESPACE,
                )
                # Pod might still exist but be terminating
                assert pod.metadata.deletion_timestamp is not None
            except ApiException as e:
                # 404 means pod was successfully deleted
                assert e.status == 404


@pytest.mark.skipif(
    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,
    reason="SANDBOX_BACKEND must be 'kubernetes' to run this test",
)
def test_health_check_returns_true_for_running_pod() -> None:
    """Test that health_check() returns True for a healthy, running pod.

    This test:
    1. Creates a sandbox pod
    2. Calls health_check() and verifies it returns True
    3. Cleans up by terminating the sandbox
    """
    _is_kubernetes_available()

    # Initialize the database engine
    SqlEngine.init_engine(pool_size=10, max_overflow=5)

    # Set up tenant context
    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)

    manager = KubernetesSandboxManager()
    sandbox_id = uuid4()

    llm_config = LLMProviderConfig(
        provider="openai",
        model_name="gpt-4",
        api_key="test-key",
        api_base=None,
    )

    try:
        # Provision the sandbox
        sandbox_info = manager.provision(
            sandbox_id=sandbox_id,
            user_id=TEST_USER_ID,
            tenant_id=TEST_TENANT_ID,
            llm_config=llm_config,
        )

        assert sandbox_info.status == SandboxStatus.RUNNING

        # Wait for pod to be fully healthy (it may take a few seconds)
        is_healthy = False
        for _ in range(10):
            is_healthy = manager.health_check(sandbox_id, timeout=5.0)
            if is_healthy:
                break
            time.sleep(2)

        assert (
            is_healthy
        ), "health_check() should return True for a running, healthy pod"

    finally:
        if sandbox_id:
            manager.terminate(sandbox_id)


@pytest.mark.skipif(
    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,
    reason="SANDBOX_BACKEND must be 'kubernetes' to run this test",
)
def test_health_check_returns_false_for_missing_pod() -> None:
    """Test that health_check() returns False when the pod doesn't exist.

    This test:
    1. Uses a random UUID that has no corresponding pod
    2. Calls health_check() and verifies it returns False
    """
    _is_kubernetes_available()

    # Initialize the database engine
    SqlEngine.init_engine(pool_size=10, max_overflow=5)

    # Set up tenant context
    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)

    manager = KubernetesSandboxManager()

    # Use a random UUID that definitely has no pod
    nonexistent_sandbox_id = uuid4()

    # health_check should return False for non-existent pod
    is_healthy = manager.health_check(nonexistent_sandbox_id, timeout=5.0)

    assert not is_healthy, "health_check() should return False for a non-existent pod"


@pytest.mark.skipif(
    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,
    reason="SANDBOX_BACKEND must be 'kubernetes' to run this test",
)
def test_health_check_returns_false_after_termination() -> None:
    """Test that health_check() returns False after a pod has been terminated.

    This test:
    1. Creates a sandbox pod
    2. Verifies health_check() returns True
    3. Terminates the sandbox
    4. Verifies health_check() returns False
    """
    _is_kubernetes_available()

    # Initialize the database engine
    SqlEngine.init_engine(pool_size=10, max_overflow=5)

    # Set up tenant context
    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)

    manager = KubernetesSandboxManager()
    sandbox_id = uuid4()

    llm_config = LLMProviderConfig(
        provider="openai",
        model_name="gpt-4",
        api_key="test-key",
        api_base=None,
    )

    # Provision the sandbox
    sandbox_info = manager.provision(
        sandbox_id=sandbox_id,
        user_id=TEST_USER_ID,
        tenant_id=TEST_TENANT_ID,
        llm_config=llm_config,
    )

    assert sandbox_info.status == SandboxStatus.RUNNING

    # Wait for pod to be fully healthy
    is_healthy = False
    for _ in range(10):
        is_healthy = manager.health_check(sandbox_id, timeout=5.0)
        if is_healthy:
            break
        time.sleep(2)

    assert is_healthy, "Pod should be healthy before termination"

    # Terminate the sandbox
    manager.terminate(sandbox_id)

    # Wait for pod to be deleted
    time.sleep(3)

    # health_check should now return False
    is_healthy_after = manager.health_check(sandbox_id, timeout=5.0)

    assert (
        not is_healthy_after
    ), "health_check() should return False after pod has been terminated"


================================================
FILE: backend/tests/external_dependency_unit/craft/test_persistent_document_writer.py
================================================
"""
Tests for PersistentDocumentWriter (local) and S3PersistentDocumentWriter.

Run with:
    python -m dotenv -f .vscode/.env run -- \
        pytest backend/tests/external_dependency_unit/craft/test_persistent_document_writer.py -v
"""

import json
import os
import tempfile
from datetime import datetime
from datetime import timezone
from uuid import uuid4

import boto3
import pytest
from botocore.exceptions import ClientError

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.server.features.build.configs import SANDBOX_S3_BUCKET
from onyx.server.features.build.indexing.persistent_document_writer import (
    PersistentDocumentWriter,
)
from onyx.server.features.build.indexing.persistent_document_writer import (
    S3PersistentDocumentWriter,
)
from tests.external_dependency_unit.constants import TEST_TENANT_ID


def _create_test_document(doc_id: str, name: str) -> Document:
    """Helper to create a test document."""
    return Document(
        id=doc_id,
        semantic_identifier=name,
        title=name,
        source=DocumentSource.WEB,
        sections=[TextSection(text="Test content", link="https://example.com")],
        metadata={},
        doc_metadata={"hierarchy": {"source_path": ["Folder"]}},
        doc_updated_at=datetime.now(timezone.utc),
        primary_owners=[],
        secondary_owners=[],
    )


def test_local_persistent_document_writer() -> None:
    """Test writing documents to local filesystem."""
    with tempfile.TemporaryDirectory() as temp_dir:
        tenant_id = TEST_TENANT_ID
        user_id = str(uuid4())
        writer = PersistentDocumentWriter(
            base_path=temp_dir, tenant_id=tenant_id, user_id=user_id
        )

        doc = _create_test_document("doc-001", "Test Document")
        written_paths = writer.write_documents([doc])

        assert len(written_paths) == 1
        assert written_paths[0] == os.path.join(
            temp_dir,
            tenant_id,
            "knowledge",
            user_id,
            "web",
            "Folder",
            "Test_Document.json",
        )
        assert os.path.exists(written_paths[0])

        with open(written_paths[0]) as f:
            content = json.load(f)
        assert content["id"] == "doc-001"
        assert content["semantic_identifier"] == "Test Document"


def _is_s3_available() -> bool:
    """Check if S3 is available for testing."""
    try:
        s3_client = boto3.client("s3")
        s3_client.head_bucket(Bucket=SANDBOX_S3_BUCKET)
        return True
    except (ClientError, Exception):
        return False


@pytest.mark.skipif(
    not _is_s3_available(),
    reason=f"S3 bucket '{SANDBOX_S3_BUCKET}' not available",
)
def test_s3_persistent_document_writer() -> None:
    """Test writing documents to S3."""
    user_id = str(uuid4())
    writer = S3PersistentDocumentWriter(tenant_id=TEST_TENANT_ID, user_id=user_id)

    doc = _create_test_document("s3-doc-001", "S3 Test Doc")
    written_keys = writer.write_documents([doc])

    try:
        assert len(written_keys) == 1
        assert f"{TEST_TENANT_ID}/knowledge/{user_id}" in written_keys[0]

        # Verify the object exists in S3
        s3_client = boto3.client("s3")
        response = s3_client.get_object(Bucket=SANDBOX_S3_BUCKET, Key=written_keys[0])
        content = json.loads(response["Body"].read().decode("utf-8"))

        assert content["id"] == "s3-doc-001"
        assert content["semantic_identifier"] == "S3 Test Doc"
    finally:
        # Cleanup
        s3_client = boto3.client("s3")
        try:
            s3_client.delete_object(Bucket=SANDBOX_S3_BUCKET, Key=written_keys[0])
        except Exception:
            pass


================================================
FILE: backend/tests/external_dependency_unit/db/__init__.py
================================================


================================================
FILE: backend/tests/external_dependency_unit/db/conftest.py
================================================
"""Fixtures for testing DAL classes against a real PostgreSQL database.

These fixtures build on the db_session and tenant_context fixtures from
the parent conftest (tests/external_dependency_unit/conftest.py).

Requires a running Postgres instance. Run with::

    python -m dotenv -f .vscode/.env run -- pytest tests/external_dependency_unit/db/
"""

from collections.abc import Callable
from collections.abc import Generator
from uuid import UUID
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from ee.onyx.db.scim import ScimDAL
from onyx.db.models import ScimToken
from onyx.db.models import UserGroup


@pytest.fixture
def scim_dal(db_session: Session) -> ScimDAL:
    """A ScimDAL backed by the real test database session."""
    return ScimDAL(db_session)


@pytest.fixture
def scim_token_factory(
    db_session: Session,
) -> Generator[Callable[..., ScimToken], None, None]:
    """Factory that creates ScimToken rows and cleans them up after the test."""
    created_ids: list[int] = []

    def _create(
        name: str = "test-token",
        hashed_token: str | None = None,
        token_display: str = "onyx_scim_****test",
        created_by_id: UUID | None = None,
    ) -> ScimToken:
        token = ScimToken(
            name=name,
            hashed_token=hashed_token or uuid4().hex,
            token_display=token_display,
            created_by_id=created_by_id or uuid4(),
        )
        db_session.add(token)
        db_session.flush()
        created_ids.append(token.id)
        return token

    yield _create

    for token_id in created_ids:
        obj = db_session.get(ScimToken, token_id)
        if obj:
            db_session.delete(obj)
    db_session.commit()


@pytest.fixture
def user_group_factory(
    db_session: Session,
) -> Generator[Callable[..., UserGroup], None, None]:
    """Factory that creates UserGroup rows for testing group mappings."""
    created_ids: list[int] = []

    def _create(name: str | None = None) -> UserGroup:
        group = UserGroup(name=name or f"test-group-{uuid4().hex[:8]}")
        db_session.add(group)
        db_session.flush()
        created_ids.append(group.id)
        return group

    yield _create

    for group_id in created_ids:
        obj = db_session.get(UserGroup, group_id)
        if obj:
            db_session.delete(obj)
    db_session.commit()


================================================
FILE: backend/tests/external_dependency_unit/db/test_chat_session_eager_load.py
================================================
from sqlalchemy import inspect
from sqlalchemy.orm import Session

from onyx.db.chat import create_chat_session
from onyx.db.chat import get_chat_session_by_id
from onyx.db.models import Persona
from onyx.db.models import UserProject
from tests.external_dependency_unit.conftest import create_test_user


def test_eager_load_persona_loads_relationships(db_session: Session) -> None:
    """Verify that eager_load_persona pre-loads persona, its collections, and project."""
    user = create_test_user(db_session, "eager-load")
    persona = Persona(name="eager-load-test", description="test")
    project = UserProject(name="eager-load-project", user_id=user.id)
    db_session.add_all([persona, project])
    db_session.flush()

    chat_session = create_chat_session(
        db_session=db_session,
        description="test",
        user_id=None,
        persona_id=persona.id,
        project_id=project.id,
    )

    loaded = get_chat_session_by_id(
        chat_session_id=chat_session.id,
        user_id=None,
        db_session=db_session,
        eager_load_persona=True,
    )

    try:
        tmp = inspect(loaded)
        assert tmp is not None
        unloaded = tmp.unloaded
        assert "persona" not in unloaded
        assert "project" not in unloaded

        tmp = inspect(loaded.persona)
        assert tmp is not None
        persona_unloaded = tmp.unloaded
        assert "tools" not in persona_unloaded
        assert "user_files" not in persona_unloaded
        assert "document_sets" not in persona_unloaded
        assert "attached_documents" not in persona_unloaded
        assert "hierarchy_nodes" not in persona_unloaded
    finally:
        db_session.rollback()


================================================
FILE: backend/tests/external_dependency_unit/db/test_credential_sensitive_value.py
================================================
"""Test that Credential with nested JSON round-trips through SensitiveValue correctly.

Exercises the full encrypt → store → read → decrypt → SensitiveValue path
with realistic nested OAuth credential data, and verifies SQLAlchemy dirty
tracking works with nested dict comparison.

Requires a running Postgres instance.
"""

from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.db.models import Credential
from onyx.utils.sensitive import SensitiveValue

# NOTE: this is not the real shape of a Drive credential,
# but it is intended to test nested JSON credential handling

_NESTED_CRED_JSON = {
    "oauth_tokens": {
        "access_token": "ya29.abc123",
        "refresh_token": "1//xEg-def456",
    },
    "scopes": ["read", "write", "admin"],
    "client_config": {
        "client_id": "123.apps.googleusercontent.com",
        "client_secret": "GOCSPX-secret",
    },
}


def test_nested_credential_json_round_trip(db_session: Session) -> None:
    """Nested OAuth credential survives encrypt → store → read → decrypt."""
    credential = Credential(
        source=DocumentSource.GOOGLE_DRIVE,
        credential_json=_NESTED_CRED_JSON,
    )
    db_session.add(credential)
    db_session.flush()

    # Immediate read (no DB round-trip) — tests the set event wrapping
    assert isinstance(credential.credential_json, SensitiveValue)
    assert credential.credential_json.get_value(apply_mask=False) == _NESTED_CRED_JSON

    # DB round-trip — tests process_result_value
    db_session.expire(credential)
    reloaded = credential.credential_json
    assert isinstance(reloaded, SensitiveValue)
    assert reloaded.get_value(apply_mask=False) == _NESTED_CRED_JSON

    db_session.rollback()


def test_reassign_same_nested_json_not_dirty(db_session: Session) -> None:
    """Re-assigning the same nested dict should not mark the session dirty."""
    credential = Credential(
        source=DocumentSource.GOOGLE_DRIVE,
        credential_json=_NESTED_CRED_JSON,
    )
    db_session.add(credential)
    db_session.flush()

    # Clear dirty state from the insert
    db_session.expire(credential)
    _ = credential.credential_json  # force reload

    # Re-assign identical value
    credential.credential_json = _NESTED_CRED_JSON  # type: ignore[assignment]
    assert not db_session.is_modified(credential)

    db_session.rollback()


def test_assign_different_nested_json_is_dirty(db_session: Session) -> None:
    """Assigning a different nested dict should mark the session dirty."""
    credential = Credential(
        source=DocumentSource.GOOGLE_DRIVE,
        credential_json=_NESTED_CRED_JSON,
    )
    db_session.add(credential)
    db_session.flush()

    db_session.expire(credential)
    _ = credential.credential_json  # force reload

    modified_cred = {**_NESTED_CRED_JSON, "scopes": ["read"]}
    credential.credential_json = modified_cred  # type: ignore[assignment]
    assert db_session.is_modified(credential)

    db_session.rollback()


================================================
FILE: backend/tests/external_dependency_unit/db/test_rotate_encryption_key.py
================================================
"""Tests for rotate_encryption_key against real Postgres.

Uses real ORM models (Credential, InternetSearchProvider) and the actual
Postgres database. Discovery is mocked in rotation tests to scope mutations
to only the test rows — the real _discover_encrypted_columns walk is tested
separately in TestDiscoverEncryptedColumns.

Requires a running Postgres instance. Run with::

    python -m dotenv -f .vscode/.env run -- pytest tests/external_dependency_unit/db/test_rotate_encryption_key.py
"""

import json
from collections.abc import Generator
from unittest.mock import patch

import pytest
from sqlalchemy import LargeBinary
from sqlalchemy import select
from sqlalchemy import text
from sqlalchemy.orm import Session

from ee.onyx.utils.encryption import _decrypt_bytes
from ee.onyx.utils.encryption import _encrypt_string
from ee.onyx.utils.encryption import _get_trimmed_key
from onyx.configs.constants import DocumentSource
from onyx.db.models import Credential
from onyx.db.models import EncryptedJson
from onyx.db.models import EncryptedString
from onyx.db.models import InternetSearchProvider
from onyx.db.rotate_encryption_key import _discover_encrypted_columns
from onyx.db.rotate_encryption_key import rotate_encryption_key
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import global_version

EE_MODULE = "ee.onyx.utils.encryption"
ROTATE_MODULE = "onyx.db.rotate_encryption_key"

OLD_KEY = "o" * 16
NEW_KEY = "n" * 16


@pytest.fixture(autouse=True)
def _enable_ee() -> Generator[None, None, None]:
    prev = global_version._is_ee
    global_version.set_ee()
    fetch_versioned_implementation.cache_clear()
    yield
    global_version._is_ee = prev
    fetch_versioned_implementation.cache_clear()


@pytest.fixture(autouse=True)
def _clear_key_cache() -> None:
    _get_trimmed_key.cache_clear()


def _raw_credential_bytes(db_session: Session, credential_id: int) -> bytes | None:
    """Read raw bytes from credential_json, bypassing the TypeDecorator."""
    col = Credential.__table__.c.credential_json
    stmt = select(col.cast(LargeBinary)).where(
        Credential.__table__.c.id == credential_id
    )
    return db_session.execute(stmt).scalar()


def _raw_isp_bytes(db_session: Session, isp_id: int) -> bytes | None:
    """Read raw bytes from InternetSearchProvider.api_key."""
    col = InternetSearchProvider.__table__.c.api_key
    stmt = select(col.cast(LargeBinary)).where(
        InternetSearchProvider.__table__.c.id == isp_id
    )
    return db_session.execute(stmt).scalar()


class TestDiscoverEncryptedColumns:
    """Verify _discover_encrypted_columns finds real production models."""

    def test_discovers_credential_json(self) -> None:
        results = _discover_encrypted_columns()
        found = {
            (model_cls.__tablename__, col_name, is_json)  # type: ignore[attr-defined]
            for model_cls, col_name, _, is_json in results
        }
        assert ("credential", "credential_json", True) in found

    def test_discovers_internet_search_provider_api_key(self) -> None:
        results = _discover_encrypted_columns()
        found = {
            (model_cls.__tablename__, col_name, is_json)  # type: ignore[attr-defined]
            for model_cls, col_name, _, is_json in results
        }
        assert ("internet_search_provider", "api_key", False) in found

    def test_all_encrypted_string_columns_are_not_json(self) -> None:
        results = _discover_encrypted_columns()
        for model_cls, col_name, _, is_json in results:
            col = getattr(model_cls, col_name).property.columns[0]
            if isinstance(col.type, EncryptedString):
                assert not is_json, (
                    f"{model_cls.__tablename__}.{col_name} is EncryptedString "  # type: ignore[attr-defined]
                    f"but is_json={is_json}"
                )

    def test_all_encrypted_json_columns_are_json(self) -> None:
        results = _discover_encrypted_columns()
        for model_cls, col_name, _, is_json in results:
            col = getattr(model_cls, col_name).property.columns[0]
            if isinstance(col.type, EncryptedJson):
                assert is_json, (
                    f"{model_cls.__tablename__}.{col_name} is EncryptedJson "  # type: ignore[attr-defined]
                    f"but is_json={is_json}"
                )


class TestRotateCredential:
    """Test rotation against the real Credential table (EncryptedJson).

    Discovery is scoped to only the Credential model to avoid mutating
    other tables in the test database.
    """

    @pytest.fixture(autouse=True)
    def _limit_discovery(self) -> Generator[None, None, None]:
        with patch(
            f"{ROTATE_MODULE}._discover_encrypted_columns",
            return_value=[(Credential, "credential_json", ["id"], True)],
        ):
            yield

    @pytest.fixture()
    def credential_id(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> Generator[int, None, None]:
        """Insert a Credential row with raw encrypted bytes, clean up after."""
        config = {"api_key": "sk-test-1234", "endpoint": "https://example.com"}
        encrypted = _encrypt_string(json.dumps(config), key=OLD_KEY)

        result = db_session.execute(
            text(
                "INSERT INTO credential "
                "(source, credential_json, admin_public, curator_public) "
                "VALUES (:source, :cred_json, true, false) "
                "RETURNING id"
            ),
            {"source": DocumentSource.INGESTION_API.value, "cred_json": encrypted},
        )
        cred_id = result.scalar_one()
        db_session.commit()

        yield cred_id

        db_session.execute(
            text("DELETE FROM credential WHERE id = :id"), {"id": cred_id}
        )
        db_session.commit()

    def test_rotates_credential_json(
        self, db_session: Session, credential_id: int
    ) -> None:
        with (
            patch(f"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
            patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
        ):
            totals = rotate_encryption_key(db_session, old_key=OLD_KEY)

        assert totals.get("credential.credential_json", 0) >= 1

        raw = _raw_credential_bytes(db_session, credential_id)
        assert raw is not None
        decrypted = json.loads(_decrypt_bytes(raw, key=NEW_KEY))
        assert decrypted["api_key"] == "sk-test-1234"
        assert decrypted["endpoint"] == "https://example.com"

    def test_skips_already_rotated(
        self, db_session: Session, credential_id: int
    ) -> None:
        with (
            patch(f"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
            patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
        ):
            rotate_encryption_key(db_session, old_key=OLD_KEY)
            _ = rotate_encryption_key(db_session, old_key=OLD_KEY)

        raw = _raw_credential_bytes(db_session, credential_id)
        assert raw is not None
        decrypted = json.loads(_decrypt_bytes(raw, key=NEW_KEY))
        assert decrypted["api_key"] == "sk-test-1234"

    def test_dry_run_does_not_modify(
        self, db_session: Session, credential_id: int
    ) -> None:
        original = _raw_credential_bytes(db_session, credential_id)

        with (
            patch(f"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
            patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
        ):
            totals = rotate_encryption_key(db_session, old_key=OLD_KEY, dry_run=True)

        assert totals.get("credential.credential_json", 0) >= 1

        raw_after = _raw_credential_bytes(db_session, credential_id)
        assert raw_after == original


class TestRotateInternetSearchProvider:
    """Test rotation against the real InternetSearchProvider table (EncryptedString).

    Discovery is scoped to only the InternetSearchProvider model to avoid
    mutating other tables in the test database.
    """

    @pytest.fixture(autouse=True)
    def _limit_discovery(self) -> Generator[None, None, None]:
        with patch(
            f"{ROTATE_MODULE}._discover_encrypted_columns",
            return_value=[
                (InternetSearchProvider, "api_key", ["id"], False),
            ],
        ):
            yield

    @pytest.fixture()
    def isp_id(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> Generator[int, None, None]:
        """Insert an InternetSearchProvider row with raw encrypted bytes."""
        encrypted = _encrypt_string("sk-secret-api-key", key=OLD_KEY)

        result = db_session.execute(
            text(
                "INSERT INTO internet_search_provider "
                "(name, provider_type, api_key, is_active) "
                "VALUES (:name, :ptype, :api_key, false) "
                "RETURNING id"
            ),
            {
                "name": f"test-rotation-{id(self)}",
                "ptype": "test",
                "api_key": encrypted,
            },
        )
        isp_id = result.scalar_one()
        db_session.commit()

        yield isp_id

        db_session.execute(
            text("DELETE FROM internet_search_provider WHERE id = :id"),
            {"id": isp_id},
        )
        db_session.commit()

    def test_rotates_api_key(self, db_session: Session, isp_id: int) -> None:
        with (
            patch(f"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
            patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
        ):
            totals = rotate_encryption_key(db_session, old_key=OLD_KEY)

        assert totals.get("internet_search_provider.api_key", 0) >= 1

        raw = _raw_isp_bytes(db_session, isp_id)
        assert raw is not None
        assert _decrypt_bytes(raw, key=NEW_KEY) == "sk-secret-api-key"

    def test_rotates_from_unencrypted(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Test rotating data that was stored without any encryption key."""
        result = db_session.execute(
            text(
                "INSERT INTO internet_search_provider "
                "(name, provider_type, api_key, is_active) "
                "VALUES (:name, :ptype, :api_key, false) "
                "RETURNING id"
            ),
            {
                "name": f"test-raw-{id(self)}",
                "ptype": "test",
                "api_key": b"raw-api-key",
            },
        )
        isp_id = result.scalar_one()
        db_session.commit()

        try:
            with (
                patch(f"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
                patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", NEW_KEY),
            ):
                totals = rotate_encryption_key(db_session, old_key=None)

            assert totals.get("internet_search_provider.api_key", 0) >= 1

            raw = _raw_isp_bytes(db_session, isp_id)
            assert raw is not None
            assert _decrypt_bytes(raw, key=NEW_KEY) == "raw-api-key"
        finally:
            db_session.execute(
                text("DELETE FROM internet_search_provider WHERE id = :id"),
                {"id": isp_id},
            )
            db_session.commit()


================================================
FILE: backend/tests/external_dependency_unit/db/test_tag_race_condition.py
================================================
"""
Test suite for tag creation race condition handling.

Tests that concurrent tag creation operations don't fail due to
UniqueViolation errors, which would occur if the upsert logic
isn't properly implemented.
"""

from concurrent.futures import as_completed
from concurrent.futures import Future
from concurrent.futures import ThreadPoolExecutor
from typing import Union
from uuid import uuid4

from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import Document
from onyx.db.models import Tag
from onyx.db.tag import create_or_add_document_tag
from onyx.db.tag import create_or_add_document_tag_list


def _create_test_document(db_session: Session, doc_id: str) -> Document:
    """Create a minimal test document."""
    document = Document(
        id=doc_id,
        semantic_id=f"semantic_{doc_id}",
        boost=0,
        hidden=False,
        from_ingestion_api=False,
    )
    db_session.add(document)
    db_session.commit()
    return document


class TestTagRaceCondition:
    """Tests for tag creation race condition handling."""

    def test_concurrent_tag_creation_single_tag(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """
        Test that multiple concurrent calls to create_or_add_document_tag
        with the same tag key/value all succeed without UniqueViolation errors.

        This simulates the race condition that occurs when multiple workers
        try to create the same tag simultaneously during document indexing.
        """
        # Create multiple test documents that will all get the same tag
        num_documents = 20
        doc_ids = [f"test_doc_race_{uuid4().hex[:8]}" for _ in range(num_documents)]

        for doc_id in doc_ids:
            _create_test_document(db_session, doc_id)

        # Use a unique tag key/value for this test run to avoid interference
        test_tag_key = f"test_key_{uuid4().hex[:8]}"
        test_tag_value = f"test_value_{uuid4().hex[:8]}"
        test_source = DocumentSource.FILE

        errors: list[Exception] = []
        results: list[Tag | None] = []

        def create_tag_for_document(doc_id: str) -> Tag | None:
            """Worker function that creates a tag for a document using its own session."""
            with get_session_with_current_tenant() as session:
                return create_or_add_document_tag(
                    tag_key=test_tag_key,
                    tag_value=test_tag_value,
                    source=test_source,
                    document_id=doc_id,
                    db_session=session,
                )

        # Run all tag creations concurrently with high parallelism
        with ThreadPoolExecutor(max_workers=num_documents) as executor:
            futures = {
                executor.submit(create_tag_for_document, doc_id): doc_id
                for doc_id in doc_ids
            }

            for future in as_completed(futures):
                doc_id = futures[future]
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    errors.append(e)

        # All operations should succeed without errors
        assert len(errors) == 0, f"Got {len(errors)} errors: {errors}"
        assert len(results) == num_documents

        # All results should be valid Tag objects
        for result in results:
            assert result is not None
            assert result.tag_key == test_tag_key
            assert result.tag_value == test_tag_value
            assert result.source == test_source

        # Verify only ONE tag was created in the database (not num_documents tags)
        with get_session_with_current_tenant() as session:
            tag_count = (
                session.execute(
                    select(Tag).where(
                        Tag.tag_key == test_tag_key,
                        Tag.tag_value == test_tag_value,
                        Tag.source == test_source,
                    )
                )
                .scalars()
                .all()
            )

        assert len(tag_count) == 1, f"Expected 1 tag, found {len(tag_count)}"

    def test_concurrent_tag_list_creation(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """
        Test that multiple concurrent calls to create_or_add_document_tag_list
        with the same tag values all succeed without UniqueViolation errors.
        """
        # Create multiple test documents
        num_documents = 20
        doc_ids = [
            f"test_doc_list_race_{uuid4().hex[:8]}" for _ in range(num_documents)
        ]

        for doc_id in doc_ids:
            _create_test_document(db_session, doc_id)

        # Use unique tag key/values for this test run
        test_tag_key = f"test_list_key_{uuid4().hex[:8]}"
        test_tag_values = [f"value_{i}_{uuid4().hex[:4]}" for i in range(5)]
        test_source = DocumentSource.FILE

        errors: list[Exception] = []
        results: list[list[Tag]] = []

        def create_tag_list_for_document(doc_id: str) -> list[Tag]:
            """Worker function that creates tag list for a document using its own session."""
            with get_session_with_current_tenant() as session:
                return create_or_add_document_tag_list(
                    tag_key=test_tag_key,
                    tag_values=test_tag_values,
                    source=test_source,
                    document_id=doc_id,
                    db_session=session,
                )

        # Run all tag creations concurrently
        with ThreadPoolExecutor(max_workers=num_documents) as executor:
            futures = {
                executor.submit(create_tag_list_for_document, doc_id): doc_id
                for doc_id in doc_ids
            }

            for future in as_completed(futures):
                doc_id = futures[future]
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    errors.append(e)

        # All operations should succeed without errors
        assert len(errors) == 0, f"Got {len(errors)} errors: {errors}"
        assert len(results) == num_documents

        # Each result should have all the expected tags
        for result in results:
            assert len(result) == len(test_tag_values)
            result_values = {tag.tag_value for tag in result}
            assert result_values == set(test_tag_values)

        # Verify exactly len(test_tag_values) tags were created (one per value)
        with get_session_with_current_tenant() as session:
            tags = (
                session.execute(
                    select(Tag).where(
                        Tag.tag_key == test_tag_key,
                        Tag.tag_value.in_(test_tag_values),
                        Tag.source == test_source,
                    )
                )
                .scalars()
                .all()
            )

        assert len(tags) == len(
            test_tag_values
        ), f"Expected {len(test_tag_values)} tags, found {len(tags)}"

    def test_concurrent_mixed_tag_operations(
        self,
        db_session: Session,
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """
        Test that concurrent single tag and tag list operations on the same
        tag key/value don't interfere with each other.

        This is a more realistic scenario where different documents might
        have the same metadata key but different value types (single vs list).
        """
        num_documents = 10
        doc_ids_single = [
            f"test_doc_single_{uuid4().hex[:8]}" for _ in range(num_documents)
        ]
        doc_ids_list = [
            f"test_doc_list_{uuid4().hex[:8]}" for _ in range(num_documents)
        ]

        for doc_id in doc_ids_single + doc_ids_list:
            _create_test_document(db_session, doc_id)

        # Same key but used as both single value and list value
        test_tag_key = f"mixed_key_{uuid4().hex[:8]}"
        test_single_value = f"single_value_{uuid4().hex[:8]}"
        test_list_values = [test_single_value]  # Same value but as list
        test_source = DocumentSource.FILE

        errors: list[Exception] = []

        def create_single_tag(doc_id: str) -> Tag | None:
            with get_session_with_current_tenant() as session:
                return create_or_add_document_tag(
                    tag_key=test_tag_key,
                    tag_value=test_single_value,
                    source=test_source,
                    document_id=doc_id,
                    db_session=session,
                )

        def create_list_tag(doc_id: str) -> list[Tag]:
            with get_session_with_current_tenant() as session:
                return create_or_add_document_tag_list(
                    tag_key=test_tag_key,
                    tag_values=test_list_values,
                    source=test_source,
                    document_id=doc_id,
                    db_session=session,
                )

        # Run both types of operations concurrently
        with ThreadPoolExecutor(max_workers=num_documents * 2) as executor:
            futures: list[Future[Union[Tag | None] | list[Tag]]] = []
            for doc_id in doc_ids_single:
                futures.append(executor.submit(create_single_tag, doc_id))
            for doc_id in doc_ids_list:
                futures.append(executor.submit(create_list_tag, doc_id))

            for future in as_completed(futures):
                try:
                    future.result()
                except Exception as e:
                    errors.append(e)

        # All operations should succeed
        assert len(errors) == 0, f"Got {len(errors)} errors: {errors}"

        # Should have exactly 2 tags: one with is_list=False, one with is_list=True
        with get_session_with_current_tenant() as session:
            tags = (
                session.execute(
                    select(Tag).where(
                        Tag.tag_key == test_tag_key,
                        Tag.tag_value == test_single_value,
                        Tag.source == test_source,
                    )
                )
                .scalars()
                .all()
            )

        assert (
            len(tags) == 2
        ), f"Expected 2 tags (is_list=True and False), found {len(tags)}"
        is_list_values = {tag.is_list for tag in tags}
        assert is_list_values == {True, False}


================================================
FILE: backend/tests/external_dependency_unit/db/test_user_account_type.py
================================================
"""
Tests that account_type is correctly set when creating users through
the internal DB functions: add_slack_user_if_not_exists and
batch_add_ext_perm_user_if_not_exists.

These functions are called by background workers (Slack bot, permission sync)
and are not exposed via API endpoints, so they must be tested directly.
"""

from sqlalchemy.orm import Session

from onyx.db.enums import AccountType
from onyx.db.models import UserRole
from onyx.db.users import add_slack_user_if_not_exists
from onyx.db.users import batch_add_ext_perm_user_if_not_exists


def test_slack_user_creation_sets_account_type_bot(db_session: Session) -> None:
    """add_slack_user_if_not_exists sets account_type=BOT and role=SLACK_USER."""
    user = add_slack_user_if_not_exists(db_session, "slack_acct_type@test.com")

    assert user.role == UserRole.SLACK_USER
    assert user.account_type == AccountType.BOT


def test_ext_perm_user_creation_sets_account_type(db_session: Session) -> None:
    """batch_add_ext_perm_user_if_not_exists sets account_type=EXT_PERM_USER."""
    users = batch_add_ext_perm_user_if_not_exists(
        db_session, ["extperm_acct_type@test.com"]
    )

    assert len(users) == 1
    user = users[0]
    assert user.role == UserRole.EXT_PERM_USER
    assert user.account_type == AccountType.EXT_PERM_USER


def test_ext_perm_to_slack_upgrade_updates_role_and_account_type(
    db_session: Session,
) -> None:
    """When an EXT_PERM_USER is upgraded to slack, both role and account_type update."""
    email = "ext_to_slack_acct_type@test.com"

    # Create as ext_perm user first
    batch_add_ext_perm_user_if_not_exists(db_session, [email])

    # Now "upgrade" via slack path
    user = add_slack_user_if_not_exists(db_session, email)

    assert user.role == UserRole.SLACK_USER
    assert user.account_type == AccountType.BOT


================================================
FILE: backend/tests/external_dependency_unit/discord_bot/conftest.py
================================================
"""Fixtures for Discord bot external dependency tests."""

from collections.abc import Generator
from unittest.mock import AsyncMock
from unittest.mock import MagicMock

import discord
import pytest
from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR


TEST_TENANT_ID: str = "public"


@pytest.fixture(scope="function")
def db_session() -> Generator[Session, None, None]:
    """Create a database session for testing."""
    SqlEngine.init_engine(pool_size=10, max_overflow=5)
    with get_session_with_current_tenant() as session:
        yield session


@pytest.fixture(scope="function")
def tenant_context() -> Generator[None, None, None]:
    """Set up tenant context for testing."""
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
    try:
        yield
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


@pytest.fixture
def mock_cache_manager() -> MagicMock:
    """Mock DiscordCacheManager."""
    cache = MagicMock()
    cache.get_tenant.return_value = TEST_TENANT_ID
    cache.get_api_key.return_value = "test_api_key"
    cache.refresh_all = AsyncMock()
    cache.refresh_guild = AsyncMock()
    cache.is_initialized = True
    return cache


@pytest.fixture
def mock_api_client() -> MagicMock:
    """Mock OnyxAPIClient."""
    client = MagicMock()
    client.initialize = AsyncMock()
    client.close = AsyncMock()
    client.is_initialized = True

    # Mock successful response
    mock_response = MagicMock()
    mock_response.answer = "Test response from bot"
    mock_response.citation_info = None
    mock_response.top_documents = None
    mock_response.error_msg = None

    client.send_chat_message = AsyncMock(return_value=mock_response)
    client.health_check = AsyncMock(return_value=True)
    return client


@pytest.fixture
def mock_discord_guild() -> MagicMock:
    """Mock Discord guild with channels."""
    guild = MagicMock(spec=discord.Guild)
    guild.id = 123456789
    guild.name = "Test Server"
    guild.default_role = MagicMock()

    # Create some mock channels
    text_channel = MagicMock(spec=discord.TextChannel)
    text_channel.id = 111111111
    text_channel.name = "general"
    text_channel.type = discord.ChannelType.text
    perms = MagicMock()
    perms.view_channel = True
    text_channel.permissions_for.return_value = perms

    forum_channel = MagicMock(spec=discord.ForumChannel)
    forum_channel.id = 222222222
    forum_channel.name = "forum"
    forum_channel.type = discord.ChannelType.forum
    forum_channel.permissions_for.return_value = perms

    private_channel = MagicMock(spec=discord.TextChannel)
    private_channel.id = 333333333
    private_channel.name = "private"
    private_channel.type = discord.ChannelType.text
    private_perms = MagicMock()
    private_perms.view_channel = False
    private_channel.permissions_for.return_value = private_perms

    guild.channels = [text_channel, forum_channel, private_channel]
    guild.text_channels = [text_channel, private_channel]
    guild.forum_channels = [forum_channel]

    return guild


@pytest.fixture
def mock_discord_message(mock_discord_guild: MagicMock) -> MagicMock:
    """Mock Discord message for testing."""
    msg = MagicMock(spec=discord.Message)
    msg.id = 555555555
    msg.author = MagicMock(spec=discord.Member)
    msg.author.id = 444444444
    msg.author.bot = False
    msg.author.display_name = "TestUser"
    msg.author.guild_permissions = MagicMock()
    msg.author.guild_permissions.administrator = True
    msg.author.guild_permissions.manage_guild = True
    msg.content = "Hello bot"
    msg.guild = mock_discord_guild
    msg.channel = MagicMock()
    msg.channel.id = 111111111
    msg.channel.name = "general"
    msg.channel.send = AsyncMock()
    msg.type = discord.MessageType.default
    msg.mentions = []
    msg.role_mentions = []
    msg.channel_mentions = []
    msg.reference = None
    msg.add_reaction = AsyncMock()
    msg.remove_reaction = AsyncMock()
    msg.reply = AsyncMock()
    msg.create_thread = AsyncMock()
    return msg


@pytest.fixture
def mock_bot_user() -> MagicMock:
    """Mock Discord bot user."""
    user = MagicMock(spec=discord.ClientUser)
    user.id = 987654321
    user.display_name = "OnyxBot"
    user.bot = True
    return user


@pytest.fixture
def mock_discord_bot(
    mock_cache_manager: MagicMock,
    mock_api_client: MagicMock,
    mock_bot_user: MagicMock,
) -> MagicMock:
    """Mock OnyxDiscordClient."""
    bot = MagicMock()
    bot.user = mock_bot_user
    bot.cache = mock_cache_manager
    bot.api_client = mock_api_client
    bot.ready = True
    bot.loop = MagicMock()
    bot.is_closed.return_value = False
    bot.guilds = []
    return bot


================================================
FILE: backend/tests/external_dependency_unit/discord_bot/test_discord_events.py
================================================
"""Tests for Discord bot event handling with mocked Discord API.

These tests mock the Discord API to test event handling logic.
"""

from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import discord
import pytest

from onyx.onyxbot.discord.handle_commands import get_text_channels
from onyx.onyxbot.discord.handle_commands import handle_dm
from onyx.onyxbot.discord.handle_commands import handle_registration_command
from onyx.onyxbot.discord.handle_commands import handle_sync_channels_command
from onyx.onyxbot.discord.handle_message import process_chat_message
from onyx.onyxbot.discord.handle_message import send_error_response
from onyx.onyxbot.discord.handle_message import send_response


class TestGuildRegistrationCommand:
    """Tests for !register command handling."""

    @pytest.mark.asyncio
    async def test_register_guild_success(
        self,
        mock_discord_message: MagicMock,
        mock_cache_manager: MagicMock,
    ) -> None:
        """Valid registration key with admin perms succeeds."""
        mock_discord_message.content = "!register discord_public.valid_token"

        with (
            patch(
                "onyx.onyxbot.discord.handle_commands.parse_discord_registration_key",
                return_value="public",
            ),
            patch(
                "onyx.onyxbot.discord.handle_commands.get_session_with_tenant"
            ) as mock_session,
            patch(
                "onyx.onyxbot.discord.handle_commands.get_guild_config_by_registration_key"
            ) as mock_get_config,
            patch("onyx.onyxbot.discord.handle_commands.bulk_create_channel_configs"),
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            mock_config = MagicMock()
            mock_config.id = 1
            mock_config.guild_id = None  # Not yet registered
            mock_get_config.return_value = mock_config

            mock_cache_manager.get_tenant.return_value = None  # Not in cache yet

            result = await handle_registration_command(
                mock_discord_message, mock_cache_manager
            )

        assert result is True
        mock_discord_message.reply.assert_called()
        # Check that success message was sent
        call_args = mock_discord_message.reply.call_args
        assert "Successfully registered" in str(call_args)

    @pytest.mark.asyncio
    async def test_register_invalid_key_format(
        self,
        mock_discord_message: MagicMock,
        mock_cache_manager: MagicMock,
    ) -> None:
        """Malformed key DMs user and deletes message."""
        mock_discord_message.content = "!register abc"  # Malformed

        with patch(
            "onyx.onyxbot.discord.handle_commands.parse_discord_registration_key",
            return_value=None,  # Invalid format
        ):
            result = await handle_registration_command(
                mock_discord_message, mock_cache_manager
            )

        assert result is True
        # On failure: DM the author and delete the message
        mock_discord_message.author.send.assert_called()
        call_args = mock_discord_message.author.send.call_args
        assert "Invalid" in str(call_args)
        mock_discord_message.delete.assert_called()

    @pytest.mark.asyncio
    async def test_register_key_not_found(
        self,
        mock_discord_message: MagicMock,
        mock_cache_manager: MagicMock,
    ) -> None:
        """Key not in database DMs user and deletes message."""
        mock_discord_message.content = "!register discord_public.notexist"

        with (
            patch(
                "onyx.onyxbot.discord.handle_commands.parse_discord_registration_key",
                return_value="public",
            ),
            patch(
                "onyx.onyxbot.discord.handle_commands.get_session_with_tenant"
            ) as mock_session,
            patch(
                "onyx.onyxbot.discord.handle_commands.get_guild_config_by_registration_key",
                return_value=None,  # Not found
            ),
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            # Must return False so exceptions are not suppressed
            mock_session.return_value.__exit__ = MagicMock(return_value=False)
            mock_cache_manager.get_tenant.return_value = None

            result = await handle_registration_command(
                mock_discord_message, mock_cache_manager
            )

        assert result is True
        # On failure: DM the author and delete the message
        mock_discord_message.author.send.assert_called()
        call_args = mock_discord_message.author.send.call_args
        assert "not found" in str(call_args).lower()
        mock_discord_message.delete.assert_called()

    @pytest.mark.asyncio
    async def test_register_key_already_used(
        self,
        mock_discord_message: MagicMock,
        mock_cache_manager: MagicMock,
    ) -> None:
        """Previously used key DMs user and deletes message."""
        mock_discord_message.content = "!register discord_public.used_key"

        with (
            patch(
                "onyx.onyxbot.discord.handle_commands.parse_discord_registration_key",
                return_value="public",
            ),
            patch(
                "onyx.onyxbot.discord.handle_commands.get_session_with_tenant"
            ) as mock_session,
            patch(
                "onyx.onyxbot.discord.handle_commands.get_guild_config_by_registration_key"
            ) as mock_get_config,
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            # Must return False so exceptions are not suppressed
            mock_session.return_value.__exit__ = MagicMock(return_value=False)

            mock_config = MagicMock()
            mock_config.guild_id = 999999  # Already registered!
            mock_get_config.return_value = mock_config

            mock_cache_manager.get_tenant.return_value = None

            result = await handle_registration_command(
                mock_discord_message, mock_cache_manager
            )

        assert result is True
        # On failure: DM the author and delete the message
        mock_discord_message.author.send.assert_called()
        call_args = mock_discord_message.author.send.call_args
        assert "already" in str(call_args).lower()
        mock_discord_message.delete.assert_called()

    @pytest.mark.asyncio
    async def test_register_guild_already_registered(
        self,
        mock_discord_message: MagicMock,
        mock_cache_manager: MagicMock,
    ) -> None:
        """Guild already in cache DMs user and deletes message."""
        mock_discord_message.content = "!register discord_public.valid_token"

        with patch(
            "onyx.onyxbot.discord.handle_commands.parse_discord_registration_key",
            return_value="public",
        ):
            # Guild already in cache
            mock_cache_manager.get_tenant.return_value = "existing_tenant"

            result = await handle_registration_command(
                mock_discord_message, mock_cache_manager
            )

        assert result is True
        # On failure: DM the author and delete the message
        mock_discord_message.author.send.assert_called()
        call_args = mock_discord_message.author.send.call_args
        assert "already registered" in str(call_args).lower()
        mock_discord_message.delete.assert_called()

    @pytest.mark.asyncio
    async def test_register_no_permission(
        self,
        mock_discord_message: MagicMock,
        mock_cache_manager: MagicMock,
    ) -> None:
        """User without admin perms gets DM and message deleted."""
        mock_discord_message.content = "!register discord_public.valid_token"
        mock_discord_message.author.guild_permissions.administrator = False
        mock_discord_message.author.guild_permissions.manage_guild = False

        result = await handle_registration_command(
            mock_discord_message, mock_cache_manager
        )

        assert result is True
        # On failure: DM the author and delete the message
        mock_discord_message.author.send.assert_called()
        call_args = mock_discord_message.author.send.call_args
        assert "permission" in str(call_args).lower()
        mock_discord_message.delete.assert_called()

    @pytest.mark.asyncio
    async def test_register_in_dm(
        self,
        mock_cache_manager: MagicMock,
    ) -> None:
        """Registration in DM sends DM and returns True."""
        msg = MagicMock(spec=discord.Message)
        msg.guild = None  # DM
        msg.content = "!register discord_public.token"
        msg.author = MagicMock()
        msg.author.send = AsyncMock()

        result = await handle_registration_command(msg, mock_cache_manager)

        assert result is True
        msg.author.send.assert_called()
        call_args = msg.author.send.call_args
        assert "server" in str(call_args).lower()

    @pytest.mark.asyncio
    async def test_register_syncs_forum_channels(
        self,
        mock_discord_message: MagicMock,  # noqa: ARG002
        mock_discord_guild: MagicMock,
    ) -> None:
        """Forum channels are included in sync."""
        channels = get_text_channels(mock_discord_guild)

        channel_types = [c.channel_type for c in channels]
        assert "forum" in channel_types

    @pytest.mark.asyncio
    async def test_register_private_channel_detection(
        self,
        mock_discord_message: MagicMock,  # noqa: ARG002
        mock_discord_guild: MagicMock,
    ) -> None:
        """Private channels are marked correctly."""
        channels = get_text_channels(mock_discord_guild)

        private_channels = [c for c in channels if c.is_private]
        assert len(private_channels) >= 1


class TestSyncChannelsCommand:
    """Tests for !sync-channels command handling."""

    @pytest.mark.asyncio
    async def test_sync_channels_adds_new(
        self,
        mock_discord_message: MagicMock,
        mock_discord_bot: MagicMock,
    ) -> None:
        """New channel in Discord creates channel config."""
        mock_discord_message.content = "!sync-channels"

        with (
            patch(
                "onyx.onyxbot.discord.handle_commands.get_session_with_tenant"
            ) as mock_session,
            patch(
                "onyx.onyxbot.discord.handle_commands.get_guild_config_by_discord_id"
            ) as mock_get_guild,
            patch(
                "onyx.onyxbot.discord.handle_commands.get_guild_config_by_internal_id"
            ) as mock_get_guild_internal,
            patch(
                "onyx.onyxbot.discord.handle_commands.sync_channel_configs"
            ) as mock_sync,
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            mock_config = MagicMock()
            mock_config.id = 1
            mock_config.guild_id = 123456789
            mock_get_guild.return_value = mock_config
            mock_get_guild_internal.return_value = mock_config

            mock_sync.return_value = (1, 0, 0)  # 1 added, 0 removed, 0 updated

            mock_discord_bot.get_guild.return_value = mock_discord_message.guild

            result = await handle_sync_channels_command(
                mock_discord_message, "public", mock_discord_bot
            )

        assert result is True
        mock_discord_message.reply.assert_called()

    @pytest.mark.asyncio
    async def test_sync_channels_no_permission(
        self,
        mock_discord_message: MagicMock,
        mock_discord_bot: MagicMock,
    ) -> None:
        """User without admin perms gets DM and reaction."""
        mock_discord_message.content = "!sync-channels"
        mock_discord_message.author.guild_permissions.administrator = False
        mock_discord_message.author.guild_permissions.manage_guild = False

        result = await handle_sync_channels_command(
            mock_discord_message, "public", mock_discord_bot
        )

        assert result is True
        # On failure: DM the author and react with ❌
        mock_discord_message.author.send.assert_called()
        call_args = mock_discord_message.author.send.call_args
        assert "permission" in str(call_args).lower()
        mock_discord_message.add_reaction.assert_called_with("❌")

    @pytest.mark.asyncio
    async def test_sync_channels_unregistered_guild(
        self,
        mock_discord_message: MagicMock,
        mock_discord_bot: MagicMock,
    ) -> None:
        """Sync in unregistered guild gets DM and reaction."""
        mock_discord_message.content = "!sync-channels"

        # tenant_id is None = not registered
        result = await handle_sync_channels_command(
            mock_discord_message, None, mock_discord_bot
        )

        assert result is True
        # On failure: DM the author and react with ❌
        mock_discord_message.author.send.assert_called()
        call_args = mock_discord_message.author.send.call_args
        assert "not registered" in str(call_args).lower()
        mock_discord_message.add_reaction.assert_called_with("❌")


class TestMessageHandling:
    """Tests for message handling behavior."""

    @pytest.mark.asyncio
    async def test_message_adds_thinking_emoji(
        self,
        mock_discord_message: MagicMock,
        mock_api_client: MagicMock,
        mock_bot_user: MagicMock,
    ) -> None:
        """Thinking emoji is added during processing."""
        await process_chat_message(
            message=mock_discord_message,
            api_key="test_key",
            persona_id=None,
            thread_only_mode=False,
            api_client=mock_api_client,
            bot_user=mock_bot_user,
        )

        mock_discord_message.add_reaction.assert_called()

    @pytest.mark.asyncio
    async def test_message_removes_thinking_emoji(
        self,
        mock_discord_message: MagicMock,
        mock_api_client: MagicMock,
        mock_bot_user: MagicMock,
    ) -> None:
        """Thinking emoji is removed after response."""
        await process_chat_message(
            message=mock_discord_message,
            api_key="test_key",
            persona_id=None,
            thread_only_mode=False,
            api_client=mock_api_client,
            bot_user=mock_bot_user,
        )

        mock_discord_message.remove_reaction.assert_called()

    @pytest.mark.asyncio
    async def test_message_reaction_failure_non_blocking(
        self,
        mock_discord_message: MagicMock,
        mock_api_client: MagicMock,
        mock_bot_user: MagicMock,
    ) -> None:
        """add_reaction failure doesn't block processing."""
        mock_discord_message.add_reaction = AsyncMock(
            side_effect=discord.DiscordException("Cannot add reaction")
        )

        # Should not raise - just log warning and continue
        await process_chat_message(
            message=mock_discord_message,
            api_key="test_key",
            persona_id=None,
            thread_only_mode=False,
            api_client=mock_api_client,
            bot_user=mock_bot_user,
        )

        # Should still complete and send reply
        mock_discord_message.reply.assert_called()

    @pytest.mark.asyncio
    async def test_dm_response(self) -> None:
        """DM to bot sends redirect message."""
        msg = MagicMock(spec=discord.Message)
        msg.channel = MagicMock(spec=discord.DMChannel)
        msg.channel.send = AsyncMock()

        await handle_dm(msg)

        msg.channel.send.assert_called_once()
        call_args = msg.channel.send.call_args
        assert "DM" in str(call_args) or "server" in str(call_args).lower()


class TestThreadCreationAndResponseRouting:
    """Tests for thread creation and response routing."""

    @pytest.mark.asyncio
    async def test_response_in_existing_thread(
        self,
        mock_bot_user: MagicMock,  # noqa: ARG002
    ) -> None:
        """Message in thread - response appended to thread."""
        thread = MagicMock(spec=discord.Thread)
        thread.send = AsyncMock()

        msg = MagicMock(spec=discord.Message)
        msg.channel = thread
        msg.reply = AsyncMock()
        msg.create_thread = AsyncMock()

        await send_response(msg, "Test response", thread_only_mode=False)

        # Should send to thread, not create new thread
        thread.send.assert_called()
        msg.create_thread.assert_not_called()

    @pytest.mark.asyncio
    async def test_response_creates_thread_thread_only_mode(
        self,
        mock_discord_message: MagicMock,
        mock_bot_user: MagicMock,  # noqa: ARG002
    ) -> None:
        """thread_only_mode=true creates new thread for response."""
        mock_thread = MagicMock()
        mock_thread.send = AsyncMock()
        mock_discord_message.create_thread = AsyncMock(return_value=mock_thread)

        # Make sure it's not a thread
        mock_discord_message.channel = MagicMock(spec=discord.TextChannel)

        await send_response(
            mock_discord_message, "Test response", thread_only_mode=True
        )

        mock_discord_message.create_thread.assert_called()
        mock_thread.send.assert_called()

    @pytest.mark.asyncio
    async def test_response_replies_inline(
        self,
        mock_discord_message: MagicMock,
        mock_bot_user: MagicMock,  # noqa: ARG002
    ) -> None:
        """thread_only_mode=false uses message.reply()."""
        # Make sure it's not a thread
        mock_discord_message.channel = MagicMock(spec=discord.TextChannel)

        await send_response(
            mock_discord_message, "Test response", thread_only_mode=False
        )

        mock_discord_message.reply.assert_called()

    @pytest.mark.asyncio
    async def test_thread_name_truncation(
        self,
        mock_bot_user: MagicMock,  # noqa: ARG002
    ) -> None:
        """Thread name is truncated to 100 chars."""
        msg = MagicMock(spec=discord.Message)
        msg.channel = MagicMock(spec=discord.TextChannel)
        msg.author = MagicMock()
        msg.author.display_name = "A" * 200  # Very long name

        mock_thread = MagicMock()
        mock_thread.send = AsyncMock()
        msg.create_thread = AsyncMock(return_value=mock_thread)

        await send_response(msg, "Test", thread_only_mode=True)

        call_args = msg.create_thread.call_args
        thread_name = call_args.kwargs.get("name") or call_args[1].get("name")
        assert len(thread_name) <= 100

    @pytest.mark.asyncio
    async def test_error_response_creates_thread(
        self,
        mock_discord_message: MagicMock,
        mock_bot_user: MagicMock,
    ) -> None:
        """Error response in channel creates thread."""
        mock_discord_message.channel = MagicMock(spec=discord.TextChannel)
        mock_thread = MagicMock()
        mock_thread.send = AsyncMock()
        mock_discord_message.create_thread = AsyncMock(return_value=mock_thread)

        await send_error_response(mock_discord_message, mock_bot_user)

        mock_discord_message.create_thread.assert_called()


class TestBotLifecycle:
    """Tests for bot lifecycle management."""

    @pytest.mark.asyncio
    async def test_setup_hook_initializes_cache(
        self,
        mock_cache_manager: MagicMock,
        mock_api_client: MagicMock,
    ) -> None:
        """setup_hook calls cache.refresh_all()."""
        from onyx.onyxbot.discord.client import OnyxDiscordClient

        with (
            patch.object(
                OnyxDiscordClient,
                "__init__",
                lambda self: None,  # noqa: ARG005
            ),
            patch(
                "onyx.onyxbot.discord.client.DiscordCacheManager",
                return_value=mock_cache_manager,
            ),
            patch(
                "onyx.onyxbot.discord.client.OnyxAPIClient",
                return_value=mock_api_client,
            ),
        ):
            bot = OnyxDiscordClient()
            bot.cache = mock_cache_manager
            bot.api_client = mock_api_client
            bot.loop = MagicMock()
            bot.loop.create_task = MagicMock()

            await bot.setup_hook()

        mock_cache_manager.refresh_all.assert_called()

    @pytest.mark.asyncio
    async def test_setup_hook_initializes_api_client(
        self,
        mock_cache_manager: MagicMock,
        mock_api_client: MagicMock,
    ) -> None:
        """setup_hook calls api_client.initialize()."""
        from onyx.onyxbot.discord.client import OnyxDiscordClient

        with (
            patch.object(
                OnyxDiscordClient,
                "__init__",
                lambda self: None,  # noqa: ARG005
            ),
        ):
            bot = OnyxDiscordClient()
            bot.cache = mock_cache_manager
            bot.api_client = mock_api_client
            bot.loop = MagicMock()
            bot.loop.create_task = MagicMock()

            await bot.setup_hook()

        mock_api_client.initialize.assert_called()

    @pytest.mark.asyncio
    async def test_close_closes_api_client(
        self,
        mock_cache_manager: MagicMock,
        mock_api_client: MagicMock,
    ) -> None:
        """close() calls api_client.close()."""
        from onyx.onyxbot.discord.client import OnyxDiscordClient

        with (
            patch.object(
                OnyxDiscordClient,
                "__init__",
                lambda self: None,  # noqa: ARG005
            ),
            patch.object(OnyxDiscordClient, "is_closed", return_value=True),
        ):
            bot = OnyxDiscordClient()
            bot.cache = mock_cache_manager
            bot.api_client = mock_api_client
            bot._cache_refresh_task = None
            bot.ready = True

            # Mock parent close
            async def mock_super_close() -> None:
                pass

            with patch("discord.ext.commands.Bot.close", mock_super_close):
                await bot.close()

        mock_api_client.close.assert_called()
        mock_cache_manager.clear.assert_called()


================================================
FILE: backend/tests/external_dependency_unit/document_index/conftest.py
================================================
"""Shared fixtures for document_index external dependency tests.

Provides Vespa and OpenSearch index setup, tenant context, and chunk helpers.
"""

import os
import time
import uuid
from collections.abc import Generator
from unittest.mock import patch

import httpx
import pytest

from onyx.access.models import DocumentAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.db.enums import EmbeddingPrecision
from onyx.document_index.interfaces_new import IndexingMetadata
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchOldDocumentIndex,
)
from onyx.document_index.vespa.index import VespaIndex
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocMetadataAwareIndexChunk
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id
from tests.external_dependency_unit.constants import TEST_TENANT_ID

EMBEDDING_DIM = 128


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def make_chunk(
    doc_id: str,
    chunk_id: int = 0,
    content: str = "test content",
) -> DocMetadataAwareIndexChunk:
    """Create a chunk suitable for external dependency testing (128-dim embeddings)."""
    tenant_id = get_current_tenant_id()
    access = DocumentAccess.build(
        user_emails=[],
        user_groups=[],
        external_user_emails=[],
        external_user_group_ids=[],
        is_public=True,
    )
    embeddings = ChunkEmbedding(
        full_embedding=[1.0] + [0.0] * (EMBEDDING_DIM - 1),
        mini_chunk_embeddings=[],
    )
    source_document = Document(
        id=doc_id,
        semantic_identifier="test_doc",
        source=DocumentSource.FILE,
        sections=[],
        metadata={},
        title="test title",
    )
    return DocMetadataAwareIndexChunk(
        tenant_id=tenant_id,
        access=access,
        document_sets=set(),
        user_project=[],
        personas=[],
        boost=0,
        aggregated_chunk_boost_factor=0,
        ancestor_hierarchy_node_ids=[],
        embeddings=embeddings,
        title_embedding=[1.0] + [0.0] * (EMBEDDING_DIM - 1),
        source_document=source_document,
        title_prefix="",
        metadata_suffix_keyword="",
        metadata_suffix_semantic="",
        contextual_rag_reserved_tokens=0,
        doc_summary="",
        chunk_context="",
        mini_chunk_texts=None,
        large_chunk_id=None,
        chunk_id=chunk_id,
        blurb=content[:50],
        content=content,
        source_links={0: ""},
        image_file_id=None,
        section_continuation=False,
    )


def make_indexing_metadata(
    doc_ids: list[str],
    old_counts: list[int],
    new_counts: list[int],
) -> IndexingMetadata:
    return IndexingMetadata(
        doc_id_to_chunk_cnt_diff={
            doc_id: IndexingMetadata.ChunkCounts(
                old_chunk_cnt=old,
                new_chunk_cnt=new,
            )
            for doc_id, old, new in zip(doc_ids, old_counts, new_counts)
        }
    )


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture(scope="module")
def tenant_context() -> Generator[None, None, None]:
    """Sets up tenant context for testing."""
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
    try:
        yield
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


@pytest.fixture(scope="module")
def test_index_name() -> Generator[str, None, None]:
    yield f"test_index_{uuid.uuid4().hex[:8]}"


@pytest.fixture(scope="module")
def httpx_client() -> Generator[httpx.Client, None, None]:
    client = get_vespa_http_client()
    try:
        yield client
    finally:
        client.close()


@pytest.fixture(scope="module")
def vespa_index(
    httpx_client: httpx.Client,
    tenant_context: None,  # noqa: ARG001
    test_index_name: str,
) -> Generator[VespaIndex, None, None]:
    """Create a Vespa index, wait for schema readiness, and yield it."""
    vespa_idx = VespaIndex(
        index_name=test_index_name,
        secondary_index_name=None,
        large_chunks_enabled=False,
        secondary_large_chunks_enabled=None,
        multitenant=MULTI_TENANT,
        httpx_client=httpx_client,
    )
    backend_dir = os.path.abspath(
        os.path.join(os.path.dirname(__file__), "..", "..", "..")
    )
    with patch("os.getcwd", return_value=backend_dir):
        vespa_idx.ensure_indices_exist(
            primary_embedding_dim=EMBEDDING_DIM,
            primary_embedding_precision=EmbeddingPrecision.FLOAT,
            secondary_index_embedding_dim=None,
            secondary_index_embedding_precision=None,
        )
    if not wait_for_vespa_with_timeout(wait_limit=90):
        pytest.fail("Vespa is not available.")

    # Wait until the schema is actually ready for writes on content nodes. We
    # probe by attempting a PUT; 200 means the schema is live, 400 means not
    # yet. This is only temporary until we entirely move off of Vespa.
    probe_doc = {
        "fields": {
            "document_id": "__probe__",
            "chunk_id": 0,
            "blurb": "",
            "title": "",
            "skip_title": True,
            "content": "",
            "content_summary": "",
            "source_type": "file",
            "source_links": "null",
            "semantic_identifier": "",
            "section_continuation": False,
            "large_chunk_reference_ids": [],
            "metadata": "{}",
            "metadata_list": [],
            "metadata_suffix": "",
            "chunk_context": "",
            "doc_summary": "",
            "embeddings": {"full_chunk": [1.0] + [0.0] * (EMBEDDING_DIM - 1)},
            "access_control_list": {},
            "document_sets": {},
            "image_file_name": None,
            "user_project": [],
            "personas": [],
            "boost": 0.0,
            "aggregated_chunk_boost_factor": 0.0,
            "primary_owners": [],
            "secondary_owners": [],
        }
    }
    probe_url = (
        f"http://localhost:8081/document/v1/default/{test_index_name}/docid/__probe__"
    )
    schema_ready = False
    for _ in range(60):
        resp = httpx_client.post(probe_url, json=probe_doc)
        if resp.status_code == 200:
            schema_ready = True
            httpx_client.delete(probe_url)
            break
        time.sleep(1)
    if not schema_ready:
        pytest.fail(f"Vespa schema '{test_index_name}' did not become ready in time.")

    yield vespa_idx


@pytest.fixture(scope="module")
def opensearch_old_index(
    tenant_context: None,  # noqa: ARG001
    test_index_name: str,
) -> Generator[OpenSearchOldDocumentIndex, None, None]:
    """Create an OpenSearch index via the old adapter and yield it."""
    if not wait_for_opensearch_with_timeout():
        pytest.fail("OpenSearch is not available.")

    opensearch_idx = OpenSearchOldDocumentIndex(
        index_name=test_index_name,
        embedding_dim=EMBEDDING_DIM,
        embedding_precision=EmbeddingPrecision.FLOAT,
        secondary_index_name=None,
        secondary_embedding_dim=None,
        secondary_embedding_precision=None,
        large_chunks_enabled=False,
        secondary_large_chunks_enabled=None,
        multitenant=MULTI_TENANT,
    )
    opensearch_idx.ensure_indices_exist(
        primary_embedding_dim=EMBEDDING_DIM,
        primary_embedding_precision=EmbeddingPrecision.FLOAT,
        secondary_index_embedding_dim=None,
        secondary_index_embedding_precision=None,
    )

    yield opensearch_idx


================================================
FILE: backend/tests/external_dependency_unit/document_index/test_document_index.py
================================================
"""External dependency tests for the new DocumentIndex interface.

These tests assume Vespa and OpenSearch are running.
"""

import time
import uuid
from collections.abc import Generator
from collections.abc import Iterator

import httpx
import pytest

from onyx.db.enums import EmbeddingPrecision
from onyx.document_index.interfaces_new import DocumentIndex as DocumentIndexNew
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchDocumentIndex,
)
from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchOldDocumentIndex,
)
from onyx.document_index.vespa.index import VespaIndex
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.indexing.models import DocMetadataAwareIndexChunk
from tests.external_dependency_unit.constants import TEST_TENANT_ID
from tests.external_dependency_unit.document_index.conftest import EMBEDDING_DIM
from tests.external_dependency_unit.document_index.conftest import make_chunk
from tests.external_dependency_unit.document_index.conftest import (
    make_indexing_metadata,
)


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture(scope="module")
def vespa_document_index(
    vespa_index: VespaIndex,  # noqa: ARG001 — ensures schema exists
    httpx_client: httpx.Client,
    test_index_name: str,
) -> Generator[VespaDocumentIndex, None, None]:
    yield VespaDocumentIndex(
        index_name=test_index_name,
        tenant_state=TenantState(tenant_id=TEST_TENANT_ID, multitenant=False),
        large_chunks_enabled=False,
        httpx_client=httpx_client,
    )


@pytest.fixture(scope="module")
def opensearch_document_index(
    opensearch_old_index: OpenSearchOldDocumentIndex,  # noqa: ARG001 — ensures index exists
    test_index_name: str,
) -> Generator[OpenSearchDocumentIndex, None, None]:
    yield OpenSearchDocumentIndex(
        tenant_state=TenantState(tenant_id=TEST_TENANT_ID, multitenant=False),
        index_name=test_index_name,
        embedding_dim=EMBEDDING_DIM,
        embedding_precision=EmbeddingPrecision.FLOAT,
    )


@pytest.fixture(scope="module")
def document_indices(
    vespa_document_index: VespaDocumentIndex,
    opensearch_document_index: OpenSearchDocumentIndex,
) -> Generator[list[DocumentIndexNew], None, None]:
    yield [opensearch_document_index, vespa_document_index]


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


class TestDocumentIndexNew:
    """Tests the new DocumentIndex interface against real Vespa and OpenSearch."""

    def test_index_single_new_doc(
        self,
        document_indices: list[DocumentIndexNew],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Indexing a single new document returns one record with already_existed=False."""
        for document_index in document_indices:
            doc_id = f"test_single_new_{uuid.uuid4().hex[:8]}"
            chunk = make_chunk(doc_id)
            metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[1])

            results = document_index.index(chunks=[chunk], indexing_metadata=metadata)

            assert len(results) == 1
            assert results[0].document_id == doc_id
            assert results[0].already_existed is False

    def test_index_existing_doc_already_existed_true(
        self,
        document_indices: list[DocumentIndexNew],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Re-indexing a doc with previous chunks returns already_existed=True."""
        for document_index in document_indices:
            doc_id = f"test_existing_{uuid.uuid4().hex[:8]}"
            chunk = make_chunk(doc_id)

            # First index — brand new document.
            metadata_first = make_indexing_metadata(
                [doc_id], old_counts=[0], new_counts=[1]
            )
            document_index.index(chunks=[chunk], indexing_metadata=metadata_first)

            # Allow near-real-time indexing to settle (needed for Vespa).
            time.sleep(1)

            # Re-index — old_chunk_cnt=1 signals the document already existed.
            metadata_second = make_indexing_metadata(
                [doc_id], old_counts=[1], new_counts=[1]
            )
            results = document_index.index(
                chunks=[chunk], indexing_metadata=metadata_second
            )

            assert len(results) == 1
            assert results[0].already_existed is True

    def test_index_multiple_docs(
        self,
        document_indices: list[DocumentIndexNew],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Indexing multiple documents returns one record per unique document."""
        for document_index in document_indices:
            doc1 = f"test_multi_1_{uuid.uuid4().hex[:8]}"
            doc2 = f"test_multi_2_{uuid.uuid4().hex[:8]}"
            chunks = [
                make_chunk(doc1, chunk_id=0),
                make_chunk(doc1, chunk_id=1),
                make_chunk(doc2, chunk_id=0),
            ]
            metadata = make_indexing_metadata(
                [doc1, doc2], old_counts=[0, 0], new_counts=[2, 1]
            )

            results = document_index.index(chunks=chunks, indexing_metadata=metadata)

            result_map = {r.document_id: r.already_existed for r in results}
            assert len(result_map) == 2
            assert result_map[doc1] is False
            assert result_map[doc2] is False

    def test_index_deduplicates_doc_ids_in_results(
        self,
        document_indices: list[DocumentIndexNew],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """Multiple chunks from the same document produce only one
        DocumentInsertionRecord."""
        for document_index in document_indices:
            doc_id = f"test_dedup_{uuid.uuid4().hex[:8]}"
            chunks = [make_chunk(doc_id, chunk_id=i) for i in range(5)]
            metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[5])

            results = document_index.index(chunks=chunks, indexing_metadata=metadata)

            assert len(results) == 1
            assert results[0].document_id == doc_id

    def test_index_mixed_new_and_existing_docs(
        self,
        document_indices: list[DocumentIndexNew],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """A batch with both new and existing documents returns the correct
        already_existed flag for each."""
        for document_index in document_indices:
            existing_doc = f"test_mixed_exist_{uuid.uuid4().hex[:8]}"
            new_doc = f"test_mixed_new_{uuid.uuid4().hex[:8]}"

            # Pre-index the existing document.
            pre_chunk = make_chunk(existing_doc)
            pre_metadata = make_indexing_metadata(
                [existing_doc], old_counts=[0], new_counts=[1]
            )
            document_index.index(chunks=[pre_chunk], indexing_metadata=pre_metadata)

            time.sleep(1)

            # Now index a batch with the existing doc and a new doc.
            chunks = [
                make_chunk(existing_doc, chunk_id=0),
                make_chunk(new_doc, chunk_id=0),
            ]
            metadata = make_indexing_metadata(
                [existing_doc, new_doc], old_counts=[1, 0], new_counts=[1, 1]
            )

            results = document_index.index(chunks=chunks, indexing_metadata=metadata)

            result_map = {r.document_id: r.already_existed for r in results}
            assert len(result_map) == 2
            assert result_map[existing_doc] is True
            assert result_map[new_doc] is False

    def test_index_accepts_generator(
        self,
        document_indices: list[DocumentIndexNew],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """index() accepts a generator (any iterable), not just a list."""
        for document_index in document_indices:
            doc_id = f"test_gen_{uuid.uuid4().hex[:8]}"
            metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[3])

            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
                for i in range(3):
                    yield make_chunk(doc_id, chunk_id=i)

            results = document_index.index(
                chunks=chunk_gen(), indexing_metadata=metadata
            )

            assert len(results) == 1
            assert results[0].document_id == doc_id
            assert results[0].already_existed is False


================================================
FILE: backend/tests/external_dependency_unit/document_index/test_document_index_old.py
================================================
"""External dependency tests for the old DocumentIndex interface.

These tests assume Vespa and OpenSearch are running.
"""

import time
from collections.abc import Generator
from collections.abc import Iterator

import pytest

from onyx.context.search.models import IndexFilters
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import IndexBatchParams
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchOldDocumentIndex,
)
from onyx.document_index.vespa.index import VespaIndex
from onyx.indexing.models import DocMetadataAwareIndexChunk
from shared_configs.contextvars import get_current_tenant_id
from tests.external_dependency_unit.document_index.conftest import make_chunk


@pytest.fixture(scope="module")
def document_indices(
    vespa_index: VespaIndex,
    opensearch_old_index: OpenSearchOldDocumentIndex,
) -> Generator[list[DocumentIndex], None, None]:
    # Ideally these are parametrized; doing so with pytest fixtures is tricky.
    yield [opensearch_old_index, vespa_index]


@pytest.fixture(scope="function")
def chunks(
    tenant_context: None,  # noqa: ARG001
) -> Generator[list[DocMetadataAwareIndexChunk], None, None]:
    yield [make_chunk("test_doc", chunk_id=i) for i in range(5)]


@pytest.fixture(scope="function")
def index_batch_params(
    tenant_context: None,  # noqa: ARG001
) -> Generator[IndexBatchParams, None, None]:
    # WARNING: doc_id_to_previous_chunk_cnt={"test_doc": 0} is hardcoded to 0,
    # which is only correct on the very first index call. The document_indices
    # fixture is scope="module", meaning the same OpenSearch and Vespa backends
    # persist across all test functions in this module. When a second test
    # function uses this fixture and calls document_index.index(...), the
    # backend already has 5 chunks for "test_doc" from the previous test run,
    # but the batch params still claim 0 prior chunks exist. This can lead to
    # orphaned/duplicate chunks that make subsequent assertions incorrect.
    # TODO: Whenever adding a second test, either change this or cleanup the
    # index between test cases.
    yield IndexBatchParams(
        doc_id_to_previous_chunk_cnt={"test_doc": 0},
        doc_id_to_new_chunk_cnt={"test_doc": 5},
        tenant_id=get_current_tenant_id(),
        large_chunks_enabled=False,
    )


class TestDocumentIndexOld:
    """Tests the old DocumentIndex interface."""

    # TODO(ENG-3864)(andrei): Re-enable this test.
    @pytest.mark.xfail(
        reason="Flaky test: Retrieved chunks vary non-deterministically before and after changing user projects and personas. Likely a timing issue with the index being updated."
    )
    def test_update_single_can_clear_user_projects_and_personas(
        self,
        document_indices: list[DocumentIndex],
        # This test case assumes all these chunks correspond to one document.
        chunks: list[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> None:
        """
        Tests that update_single can clear user_projects and personas.
        """
        for document_index in document_indices:
            # Precondition.
            # Ensure there is some non-empty value for user project and
            # personas.
            for chunk in chunks:
                chunk.user_project = [1]
                chunk.personas = [2]
            document_index.index(chunks, index_batch_params)

            # Ensure that we can get chunks as expected with filters.
            doc_id = chunks[0].source_document.id
            chunk_count = len(chunks)
            tenant_id = get_current_tenant_id()
            # We need to specify the chunk index range and specify
            # batch_retrieval=True below to trigger the codepath for Vespa's
            # search API, which uses the expected additive filtering for
            # project_id and persona_id. Otherwise we would use the codepath for
            # the visit API, which does not have this kind of filtering
            # implemented.
            chunk_request = VespaChunkRequest(
                document_id=doc_id, min_chunk_ind=0, max_chunk_ind=chunk_count - 1
            )
            project_persona_filters = IndexFilters(
                access_control_list=None,
                tenant_id=tenant_id,
                project_id_filter=1,
                persona_id_filter=2,
                # We need this even though none of the chunks belong to a
                # document set because project_id and persona_id are only
                # additive filters in the event the agent has knowledge scope;
                # if the agent does not, it is implied that it can see
                # everything it is allowed to.
                document_set=["1"],
            )
            # Not best practice here but the API for refreshing the index to
            # ensure that the latest data is present is not exposed in this
            # class and is not the same for Vespa and OpenSearch, so we just
            # tolerate a sleep for now. As a consequence the number of tests in
            # this suite should be small. We only need to tolerate this for as
            # long as we continue to use Vespa, we can consider exposing
            # something for OpenSearch later.
            time.sleep(1)
            inference_chunks = document_index.id_based_retrieval(
                chunk_requests=[chunk_request],
                filters=project_persona_filters,
                batch_retrieval=True,
            )
            assert len(inference_chunks) == chunk_count
            # Sort by chunk id to easily test if we have all chunks.
            for i, inference_chunk in enumerate(
                sorted(inference_chunks, key=lambda x: x.chunk_id)
            ):
                assert inference_chunk.chunk_id == i
                assert inference_chunk.document_id == doc_id

            # Under test.
            # Explicitly set empty fields here.
            user_fields = VespaDocumentUserFields(user_projects=[], personas=[])
            document_index.update_single(
                doc_id=doc_id,
                chunk_count=chunk_count,
                tenant_id=tenant_id,
                fields=None,
                user_fields=user_fields,
            )

            # Postcondition.
            filters = IndexFilters(access_control_list=None, tenant_id=tenant_id)
            # We should expect to get back all expected chunks with no filters.
            # Again, not best practice here.
            time.sleep(1)
            inference_chunks = document_index.id_based_retrieval(
                chunk_requests=[chunk_request], filters=filters, batch_retrieval=True
            )
            assert len(inference_chunks) == chunk_count
            # Sort by chunk id to easily test if we have all chunks.
            for i, inference_chunk in enumerate(
                sorted(inference_chunks, key=lambda x: x.chunk_id)
            ):
                assert inference_chunk.chunk_id == i
                assert inference_chunk.document_id == doc_id
            # Now, we should expect to not get any chunks if we specify the user
            # project and personas filters.
            inference_chunks = document_index.id_based_retrieval(
                chunk_requests=[chunk_request],
                filters=project_persona_filters,
                batch_retrieval=True,
            )
            assert len(inference_chunks) == 0

    def test_index_accepts_generator(
        self,
        document_indices: list[DocumentIndex],
        tenant_context: None,  # noqa: ARG002
    ) -> None:
        """index() accepts a generator (any iterable), not just a list."""
        for document_index in document_indices:

            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
                for i in range(3):
                    yield make_chunk("test_doc_gen", chunk_id=i)

            index_batch_params = IndexBatchParams(
                doc_id_to_previous_chunk_cnt={"test_doc_gen": 0},
                doc_id_to_new_chunk_cnt={"test_doc_gen": 3},
                tenant_id=get_current_tenant_id(),
                large_chunks_enabled=False,
            )

            results = document_index.index(chunk_gen(), index_batch_params)

            assert len(results) == 1
            record = results.pop()
            assert record.document_id == "test_doc_gen"
            assert record.already_existed is False


================================================
FILE: backend/tests/external_dependency_unit/feature_flags/__init__.py
================================================
# External dependency unit tests for feature flag service


================================================
FILE: backend/tests/external_dependency_unit/feature_flags/test_feature_flag_provider_factory.py
================================================
"""
External dependency unit tests for the feature flag service.

These tests verify the feature flag service implementation with real
PostHog integration when available, and fallback behavior otherwise.
"""

from uuid import UUID

from ee.onyx.feature_flags.posthog_provider import PostHogFeatureFlagProvider
from onyx.feature_flags.factory import get_default_feature_flag_provider
from onyx.feature_flags.interface import FeatureFlagProvider
from onyx.feature_flags.interface import NoOpFeatureFlagProvider


class TestNoOpFeatureFlagProvider:
    """Tests for the no-op feature flag provider."""

    def test_always_returns_false(self) -> None:
        """No-op provider should always return False."""
        provider = NoOpFeatureFlagProvider()

        my_uuid = UUID("79a75f76-6b63-43ee-b04c-a0c6806900bd")
        assert provider.feature_enabled("another-flag", my_uuid) is False


class TestFeatureFlagFactory:
    """Tests for the feature flag factory function."""

    def test_factory_returns_provider(self) -> None:
        """Factory should return a FeatureFlagProvider instance."""
        provider = get_default_feature_flag_provider()
        assert isinstance(provider, FeatureFlagProvider)

    def test_posthog_provider(self) -> None:
        """Posthog provider should return True if the feature is enabled."""
        provider = PostHogFeatureFlagProvider()
        assert isinstance(provider, FeatureFlagProvider)


================================================
FILE: backend/tests/external_dependency_unit/file_store/test_file_store_non_mocked.py
================================================
import os
import time
import uuid
from collections.abc import Generator
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO
from typing import Any
from typing import cast
from typing import Dict
from typing import List
from typing import Tuple
from typing import TypedDict
from unittest.mock import patch

import pytest
from botocore.exceptions import ClientError
from sqlalchemy.orm import Session

from onyx.configs.constants import FileOrigin
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.file_store.file_store import S3BackedFileStore
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from tests.external_dependency_unit.constants import TEST_TENANT_ID

logger = setup_logger()


TEST_BUCKET_NAME: str = "onyx-file-store-tests"
TEST_FILE_PREFIX: str = "test-files"


# Type definitions for test data
class BackendConfig(TypedDict):
    endpoint_url: str | None
    access_key: str
    secret_key: str
    region: str
    verify_ssl: bool
    backend_name: str


class FileTestData(TypedDict):
    name: str
    display_name: str
    content: str
    type: str
    origin: FileOrigin


class WorkerResult(TypedDict):
    worker_id: int
    file_name: str
    content: str


def _get_all_backend_configs() -> List[BackendConfig]:
    """Get configurations for all available backends"""
    from onyx.configs.app_configs import (
        S3_ENDPOINT_URL,
        AWS_REGION_NAME,
    )

    s3_aws_access_key_id = os.environ.get("S3_AWS_ACCESS_KEY_ID_FOR_TEST")
    s3_aws_secret_access_key = os.environ.get("S3_AWS_SECRET_ACCESS_KEY_FOR_TEST")

    configs: List[BackendConfig] = []

    # MinIO configuration (if endpoint is configured)
    if S3_ENDPOINT_URL:
        minio_access_key = "minioadmin"
        minio_secret_key = "minioadmin"
        configs.append(
            {
                "endpoint_url": S3_ENDPOINT_URL,
                "access_key": minio_access_key,
                "secret_key": minio_secret_key,
                "region": "us-east-1",
                "verify_ssl": False,
                "backend_name": "MinIO",
            }
        )

    # AWS S3 configuration (if credentials are available)
    if s3_aws_access_key_id and s3_aws_secret_access_key:
        configs.append(
            {
                "endpoint_url": None,
                "access_key": s3_aws_access_key_id,
                "secret_key": s3_aws_secret_access_key,
                "region": AWS_REGION_NAME or "us-east-2",
                "verify_ssl": True,
                "backend_name": "AWS S3",
            }
        )

    if not configs:
        pytest.skip(
            "No backend configurations available - set MinIO or AWS S3 credentials"
        )

    return configs


@pytest.fixture(
    scope="function",
    params=_get_all_backend_configs(),
    ids=lambda config: config["backend_name"],
)
def file_store(
    request: pytest.FixtureRequest,
    db_session: Session,  # noqa: ARG001
    tenant_context: None,  # noqa: ARG001
) -> Generator[S3BackedFileStore, None, None]:
    """Create an S3BackedFileStore instance for testing with parametrized backend"""
    backend_config: BackendConfig = request.param

    # Create S3BackedFileStore with backend-specific configuration
    store = S3BackedFileStore(
        bucket_name=TEST_BUCKET_NAME,
        aws_access_key_id=backend_config["access_key"],
        aws_secret_access_key=backend_config["secret_key"],
        aws_region_name=backend_config["region"],
        s3_endpoint_url=backend_config["endpoint_url"],
        s3_prefix=f"{TEST_FILE_PREFIX}-{uuid.uuid4()}",
        s3_verify_ssl=backend_config["verify_ssl"],
    )

    # Initialize the store and ensure bucket exists
    store.initialize()
    logger.info(
        f"Successfully initialized {backend_config['backend_name']} file store with bucket {TEST_BUCKET_NAME}"
    )

    yield store

    # Cleanup: Remove all test files from the bucket (including tenant-prefixed files)
    try:
        s3_client = store._get_s3_client()
        actual_bucket_name = store._get_bucket_name()

        # List and delete all objects in the test prefix (including tenant subdirectories)
        response = s3_client.list_objects_v2(
            Bucket=actual_bucket_name, Prefix=f"{store._s3_prefix}/"
        )

        if "Contents" in response:
            objects_to_delete = [{"Key": obj["Key"]} for obj in response["Contents"]]
            s3_client.delete_objects(
                Bucket=actual_bucket_name,
                Delete={"Objects": objects_to_delete},  # type: ignore[typeddict-item]
            )
            logger.info(
                f"Cleaned up {len(objects_to_delete)} test objects from {backend_config['backend_name']}"
            )
    except Exception as e:
        logger.warning(f"Failed to cleanup test objects: {e}")


class TestS3BackedFileStore:
    """Test suite for S3BackedFileStore using real S3-compatible storage (MinIO or AWS S3)"""

    def test_store_initialization(self, file_store: S3BackedFileStore) -> None:
        """Test that the file store initializes properly"""
        # The fixture already calls initialize(), so we just verify it worked
        bucket_name = file_store._get_bucket_name()
        assert bucket_name.startswith(TEST_BUCKET_NAME)  # Should be backend-specific

        # Verify bucket exists by trying to list objects
        s3_client = file_store._get_s3_client()

        # This should not raise an exception
        s3_client.list_objects_v2(Bucket=bucket_name, MaxKeys=1)

    def test_save_and_read_text_file(self, file_store: S3BackedFileStore) -> None:
        """Test saving and reading a text file"""
        file_id = f"{uuid.uuid4()}.txt"
        display_name = "Test Text File"
        content = "This is a test text file content.\nWith multiple lines."
        file_type = "text/plain"
        file_origin = FileOrigin.OTHER

        # Save the file
        content_io = BytesIO(content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Read the file back
        read_content_io = file_store.read_file(file_id)
        read_content = read_content_io.read().decode("utf-8")

        assert read_content == content

        # Verify file record in database
        file_record = file_store.read_file_record(file_id)
        assert file_record.file_id == file_id
        assert file_record.display_name == display_name
        assert file_record.file_origin == file_origin
        assert file_record.file_type == file_type
        assert (
            file_record.bucket_name == file_store._get_bucket_name()
        )  # Use actual bucket name
        # The object key should include the tenant ID
        expected_object_key = f"{file_store._s3_prefix}/{TEST_TENANT_ID}/{file_id}"
        assert file_record.object_key == expected_object_key

    def test_save_and_read_binary_file(self, file_store: S3BackedFileStore) -> None:
        """Test saving and reading a binary file"""
        file_id = f"{uuid.uuid4()}.bin"
        display_name = "Test Binary File"
        # Create some binary content
        content = bytes(range(256))  # 0-255 bytes
        file_type = "application/octet-stream"
        file_origin = FileOrigin.CONNECTOR

        # Save the file
        content_io = BytesIO(content)
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Read the file back
        read_content_io = file_store.read_file(file_id)
        read_content = read_content_io.read()

        assert read_content == content

    def test_save_with_metadata(self, file_store: S3BackedFileStore) -> None:
        """Test saving a file with metadata"""
        file_id = f"{uuid.uuid4()}.json"
        display_name = "Test Metadata File"
        content = '{"key": "value", "number": 42}'
        file_type = "application/json"
        file_origin = FileOrigin.CHAT_UPLOAD
        metadata: Dict[str, Any] = {
            "source": "test_suite",
            "version": "1.0",
            "tags": ["test", "json"],
            "size": len(content),
        }

        # Save the file with metadata
        content_io = BytesIO(content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_metadata=metadata,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Verify metadata is stored in database
        file_record = file_store.read_file_record(file_id)
        assert file_record.file_metadata == metadata

    def test_has_file(self, file_store: S3BackedFileStore) -> None:
        """Test the has_file method"""
        file_id = f"{uuid.uuid4()}.txt"
        display_name = "Test Has File"
        content = "Content for has_file test"
        file_type = "text/plain"
        file_origin = FileOrigin.OTHER

        # Initially, file should not exist
        assert not file_store.has_file(
            file_id=file_id,
            file_origin=file_origin,
            file_type=file_type,
        )

        # Save the file
        content_io = BytesIO(content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Now file should exist
        assert file_store.has_file(
            file_id=file_id,
            file_origin=file_origin,
            file_type=file_type,
        )

        # Test with wrong parameters
        assert not file_store.has_file(
            file_id=file_id,
            file_origin=FileOrigin.CONNECTOR,  # Wrong origin
            file_type=file_type,
        )

        assert not file_store.has_file(
            file_id=file_id,
            file_origin=file_origin,
            file_type="application/pdf",  # Wrong type
        )

    def test_read_file_with_tempfile(self, file_store: S3BackedFileStore) -> None:
        """Test reading a file using temporary file"""
        file_id = f"{uuid.uuid4()}.txt"
        display_name = "Test Temp File"
        content = "Content for temporary file test"
        file_type = "text/plain"
        file_origin = FileOrigin.OTHER

        # Save the file
        content_io = BytesIO(content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Read using temporary file
        temp_file = file_store.read_file(file_id, use_tempfile=True)

        # Read content from temp file
        temp_file.seek(0)
        read_content_bytes = temp_file.read()
        if isinstance(read_content_bytes, bytes):
            read_content_str = read_content_bytes.decode("utf-8")
        else:
            read_content_str = str(read_content_bytes)

        assert read_content_str == content

        # Clean up the temp file
        temp_file.close()
        if hasattr(temp_file, "name"):
            try:
                os.unlink(temp_file.name)
            except (OSError, AttributeError):
                pass

    def test_delete_file(self, file_store: S3BackedFileStore) -> None:
        """Test deleting a file"""
        file_id = f"{uuid.uuid4()}.txt"
        display_name = "Test Delete File"
        content = "Content for delete test"
        file_type = "text/plain"
        file_origin = FileOrigin.OTHER

        # Save the file
        content_io = BytesIO(content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Verify file exists
        assert file_store.has_file(
            file_id=file_id,
            file_origin=file_origin,
            file_type=file_type,
        )

        # Delete the file
        file_store.delete_file(file_id)

        # Verify file no longer exists
        assert not file_store.has_file(
            file_id=file_id,
            file_origin=file_origin,
            file_type=file_type,
        )

        # Verify trying to read deleted file raises exception
        with pytest.raises(RuntimeError, match="does not exist or was deleted"):
            file_store.read_file(file_id)

    def test_get_file_with_mime_type(self, file_store: S3BackedFileStore) -> None:
        """Test getting file with mime type detection"""
        file_id = f"{uuid.uuid4()}.txt"
        display_name = "Test MIME Type"
        content = "This is a plain text file"
        file_type = "text/plain"
        file_origin = FileOrigin.OTHER

        # Save the file
        content_io = BytesIO(content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Get file with mime type
        file_with_mime = file_store.get_file_with_mime_type(file_id)

        assert file_with_mime is not None
        assert file_with_mime.data.decode("utf-8") == content
        # The detected mime type might be different from what we stored
        assert file_with_mime.mime_type is not None

    def test_file_overwrite(self, file_store: S3BackedFileStore) -> None:
        """Test overwriting an existing file"""
        file_id = f"{uuid.uuid4()}.txt"
        display_name = "Test Overwrite"
        original_content = "Original content"
        new_content = "New content after overwrite"
        file_type = "text/plain"
        file_origin = FileOrigin.OTHER

        # Save original file
        content_io = BytesIO(original_content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Verify original content
        read_content_io = file_store.read_file(file_id)
        assert read_content_io.read().decode("utf-8") == original_content

        # Overwrite with new content
        new_content_io = BytesIO(new_content.encode("utf-8"))
        returned_file_id_2 = file_store.save_file(
            content=new_content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id_2 == file_id

        # Verify new content
        read_content_io = file_store.read_file(file_id)
        assert read_content_io.read().decode("utf-8") == new_content

    def test_large_file_handling(self, file_store: S3BackedFileStore) -> None:
        """Test handling of larger files"""
        file_id = f"{uuid.uuid4()}.bin"
        display_name = "Test Large File"
        # Create a 1MB file
        content_size = 1024 * 1024  # 1MB
        content = b"A" * content_size
        file_type = "application/octet-stream"
        file_origin = FileOrigin.CONNECTOR

        # Save the large file
        content_io = BytesIO(content)
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Read the file back
        read_content_io = file_store.read_file(file_id)
        read_content = read_content_io.read()

        assert len(read_content) == content_size
        assert read_content == content

    def test_error_handling_nonexistent_file(
        self, file_store: S3BackedFileStore
    ) -> None:
        """Test error handling when trying to read a non-existent file"""
        nonexistent_file_id = f"{uuid.uuid4()}.txt"

        with pytest.raises(RuntimeError, match="does not exist or was deleted"):
            file_store.read_file(nonexistent_file_id)

        with pytest.raises(RuntimeError, match="does not exist or was deleted"):
            file_store.read_file_record(nonexistent_file_id)

        # get_file_with_mime_type should return None for non-existent files
        result = file_store.get_file_with_mime_type(nonexistent_file_id)
        assert result is None

    def test_error_handling_delete_nonexistent_file(
        self, file_store: S3BackedFileStore
    ) -> None:
        """Test error handling when trying to delete a non-existent file"""
        nonexistent_file_id = f"{uuid.uuid4()}.txt"

        # Should raise an exception when trying to delete non-existent file
        with pytest.raises(RuntimeError, match="does not exist or was deleted"):
            file_store.delete_file(nonexistent_file_id)

    def test_multiple_files_different_origins(
        self, file_store: S3BackedFileStore
    ) -> None:
        """Test storing multiple files with different origins and types"""
        files_data: List[FileTestData] = [
            {
                "name": f"{uuid.uuid4()}.txt",
                "display_name": "Chat Upload File",
                "content": "Content from chat upload",
                "type": "text/plain",
                "origin": FileOrigin.CHAT_UPLOAD,
            },
            {
                "name": f"{uuid.uuid4()}.json",
                "display_name": "Connector File",
                "content": '{"from": "connector"}',
                "type": "application/json",
                "origin": FileOrigin.CONNECTOR,
            },
            {
                "name": f"{uuid.uuid4()}.csv",
                "display_name": "Generated Report",
                "content": "col1,col2\nval1,val2",
                "type": "text/csv",
                "origin": FileOrigin.GENERATED_REPORT,
            },
        ]

        # Save all files
        for file_data in files_data:
            content_io = BytesIO(file_data["content"].encode("utf-8"))
            returned_file_id = file_store.save_file(
                content=content_io,
                display_name=file_data["display_name"],
                file_origin=file_data["origin"],
                file_type=file_data["type"],
                file_id=file_data["name"],
            )
            assert returned_file_id == file_data["name"]

        # Verify all files exist and have correct properties
        for file_data in files_data:
            assert file_store.has_file(
                file_id=file_data["name"],
                file_origin=file_data["origin"],
                file_type=file_data["type"],
            )

            # Read and verify content
            read_content_io = file_store.read_file(file_data["name"])
            read_content = read_content_io.read().decode("utf-8")
            assert read_content == file_data["content"]

            # Verify record
            file_record = file_store.read_file_record(file_data["name"])
            assert file_record.file_origin == file_data["origin"]
            assert file_record.file_type == file_data["type"]

    def test_special_characters_in_filenames(
        self, file_store: S3BackedFileStore
    ) -> None:
        """Test handling of special characters in filenames"""
        # Note: S3 keys have some restrictions, so we test reasonable special characters
        special_files: List[str] = [
            f"{uuid.uuid4()} with spaces.txt",
            f"{uuid.uuid4()}-with-dashes.txt",
            f"{uuid.uuid4()}_with_underscores.txt",
            f"{uuid.uuid4()}.with.dots.txt",
            f"{uuid.uuid4()}(with)parentheses.txt",
        ]

        for file_id in special_files:
            content = f"Content for {file_id}"
            content_io = BytesIO(content.encode("utf-8"))

            # Save the file
            returned_file_id = file_store.save_file(
                content=content_io,
                display_name=f"Display: {file_id}",
                file_origin=FileOrigin.OTHER,
                file_type="text/plain",
                file_id=file_id,
            )

            assert returned_file_id == file_id

            # Read and verify
            read_content_io = file_store.read_file(file_id)
            read_content = read_content_io.read().decode("utf-8")
            assert read_content == content

    @pytest.mark.skipif(
        not os.environ.get("TEST_S3_NETWORK_ERRORS"),
        reason="Network error tests require TEST_S3_NETWORK_ERRORS environment variable",
    )
    def test_network_error_handling(self, file_store: S3BackedFileStore) -> None:
        """Test handling of network errors (requires special setup)"""
        # This test requires specific network configuration to simulate failures
        # It's marked as skip by default and only runs when explicitly enabled

        # Mock a network error during file operations
        with patch.object(file_store, "_get_s3_client") as mock_client:
            mock_s3 = mock_client.return_value
            mock_s3.put_object.side_effect = ClientError(
                error_response={
                    "Error": {
                        "Code": "NetworkingError",
                        "Message": "Connection timeout",
                    }
                },
                operation_name="PutObject",
            )

            content_io = BytesIO(b"test content")

            with pytest.raises(ClientError):
                file_store.save_file(
                    content=content_io,
                    display_name="Network Error Test",
                    file_origin=FileOrigin.OTHER,
                    file_type="text/plain",
                    file_id=f"{uuid.uuid4()}.txt",
                )

    def test_database_transaction_rollback(self, file_store: S3BackedFileStore) -> None:
        """Test database transaction rollback behavior with PostgreSQL"""
        file_id = f"{uuid.uuid4()}.txt"
        display_name = "Test Rollback"
        content = "Content for rollback test"
        file_type = "text/plain"
        file_origin = FileOrigin.OTHER

        # Mock S3 to fail after database write but before commit
        with patch.object(file_store, "_get_s3_client") as mock_client:
            mock_s3 = mock_client.return_value
            mock_s3.put_object.side_effect = ClientError(
                error_response={
                    "Error": {"Code": "InternalError", "Message": "S3 internal error"}
                },
                operation_name="PutObject",
            )

            content_io = BytesIO(content.encode("utf-8"))

            # This should fail and rollback the database transaction
            with pytest.raises(ClientError):
                file_store.save_file(
                    content=content_io,
                    display_name=display_name,
                    file_origin=file_origin,
                    file_type=file_type,
                    file_id=file_id,
                )

        # Verify that the database record was not created due to rollback
        with pytest.raises(RuntimeError, match="does not exist or was deleted"):
            file_store.read_file_record(file_id)

    def test_complex_jsonb_metadata(self, file_store: S3BackedFileStore) -> None:
        """Test PostgreSQL JSONB metadata handling with complex data structures"""
        file_id = f"{uuid.uuid4()}.json"
        display_name = "Test Complex Metadata"
        content = '{"data": "test"}'
        file_type = "application/json"
        file_origin = FileOrigin.CONNECTOR

        # Complex metadata that tests PostgreSQL JSONB capabilities
        complex_metadata: Dict[str, Any] = {
            "nested": {
                "array": [1, 2, 3, {"inner": "value"}],
                "boolean": True,
                "null_value": None,
                "number": 42.5,
            },
            "unicode": "测试数据 🚀",
            "special_chars": "Line 1\nLine 2\t\r\nSpecial: !@#$%^&*()",
            "large_text": "x" * 1000,  # Test large text in JSONB
            "timestamps": {
                "created": "2024-01-01T00:00:00Z",
                "updated": "2024-01-02T12:30:45Z",
            },
        }

        # Save file with complex metadata
        content_io = BytesIO(content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_metadata=complex_metadata,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Retrieve and verify the metadata was stored correctly
        file_record = file_store.read_file_record(file_id)
        stored_metadata = file_record.file_metadata

        # Verify all metadata fields were preserved
        assert stored_metadata == complex_metadata

        # Type casting for complex metadata access
        stored_metadata_dict = cast(Dict[str, Any], stored_metadata)
        nested_data = cast(Dict[str, Any], stored_metadata_dict["nested"])
        array_data = cast(List[Any], nested_data["array"])
        inner_obj = cast(Dict[str, Any], array_data[3])

        assert inner_obj["inner"] == "value"
        assert stored_metadata_dict["unicode"] == "测试数据 🚀"
        assert nested_data["boolean"] is True
        assert nested_data["null_value"] is None
        assert len(cast(str, stored_metadata_dict["large_text"])) == 1000

    def test_database_consistency_after_s3_failure(
        self, file_store: S3BackedFileStore
    ) -> None:
        """Test that database stays consistent when S3 operations fail"""
        file_id = f"{uuid.uuid4()}.txt"
        display_name = "Test Consistency"
        content = "Initial content"
        file_type = "text/plain"
        file_origin = FileOrigin.OTHER

        # First, save a file successfully
        content_io = BytesIO(content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Verify initial state
        assert file_store.has_file(file_id, file_origin, file_type)
        initial_record = file_store.read_file_record(file_id)

        # Now try to update but fail on S3 side
        with patch.object(file_store, "_get_s3_client") as mock_client:
            mock_s3 = mock_client.return_value
            # Let the first call (for reading/checking) succeed, but fail on put_object
            mock_s3.put_object.side_effect = ClientError(
                error_response={
                    "Error": {
                        "Code": "ServiceUnavailable",
                        "Message": "Service temporarily unavailable",
                    }
                },
                operation_name="PutObject",
            )

            new_content = "Updated content that should fail"
            new_content_io = BytesIO(new_content.encode("utf-8"))

            # This should fail and rollback
            with pytest.raises(ClientError):
                file_store.save_file(
                    content=new_content_io,
                    display_name=display_name,
                    file_origin=file_origin,
                    file_type=file_type,
                    file_id=file_id,
                )

        # Verify the database record is unchanged (not updated)
        current_record = file_store.read_file_record(file_id)
        assert current_record.file_id == initial_record.file_id
        assert current_record.display_name == initial_record.display_name
        assert current_record.bucket_name == initial_record.bucket_name
        assert current_record.object_key == initial_record.object_key

        # Verify we can still read the original file content
        read_content_io = file_store.read_file(file_id)
        read_content = read_content_io.read().decode("utf-8")
        assert read_content == content  # Original content, not the failed update

    def test_concurrent_file_operations(self, file_store: S3BackedFileStore) -> None:
        """Test handling of concurrent file operations on the same file"""
        base_file_name: str = str(uuid.uuid4())
        file_type: str = "text/plain"
        file_origin: FileOrigin = FileOrigin.OTHER

        # Get current file store configuration to replicate in workers
        current_bucket_name = file_store._get_bucket_name()
        current_access_key = file_store._aws_access_key_id
        current_secret_key = file_store._aws_secret_access_key
        current_region = file_store._aws_region_name
        current_endpoint_url = file_store._s3_endpoint_url
        current_verify_ssl = file_store._s3_verify_ssl

        results: List[Tuple[str, str]] = []
        errors: List[Tuple[int, str]] = []

        def save_file_worker(worker_id: int) -> bool:
            """Worker function to save a file with its own database session"""
            try:
                # Set up tenant context for this worker
                token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)
                try:
                    # Create a new database session for each worker to avoid conflicts
                    with get_session_with_current_tenant() as worker_session:
                        worker_file_store = S3BackedFileStore(
                            bucket_name=current_bucket_name,
                            aws_access_key_id=current_access_key,
                            aws_secret_access_key=current_secret_key,
                            aws_region_name=current_region,
                            s3_endpoint_url=current_endpoint_url,
                            s3_prefix=TEST_FILE_PREFIX,
                            s3_verify_ssl=current_verify_ssl,
                        )

                        file_name: str = f"{base_file_name}_{worker_id}.txt"
                        content: str = (
                            f"Content from worker {worker_id} at {time.time()}"
                        )
                        content_io: BytesIO = BytesIO(content.encode("utf-8"))

                        worker_file_store.save_file(
                            file_id=file_name,
                            content=content_io,
                            display_name=f"Worker {worker_id} File",
                            file_origin=file_origin,
                            file_type=file_type,
                            db_session=worker_session,
                        )
                        results.append((file_name, content))
                        return True
                finally:
                    # Reset the tenant context after the worker completes
                    CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
            except Exception as e:
                errors.append((worker_id, str(e)))
                return False

        # Run multiple concurrent file save operations
        with ThreadPoolExecutor(max_workers=5) as executor:
            futures = [executor.submit(save_file_worker, i) for i in range(10)]

            for future in as_completed(futures):
                future.result()  # Wait for completion

        # Verify all operations completed successfully
        assert len(errors) == 0, f"Concurrent operations had errors: {errors}"
        assert (
            len(results) == 10
        ), f"Expected 10 successful operations, got {len(results)}"

        # Verify all files were saved correctly
        for file_id, expected_content in results:
            # Check file exists
            assert file_store.has_file(
                file_id=file_id,
                file_origin=file_origin,
                file_type=file_type,
            )

            # Check content is correct
            read_content_io = file_store.read_file(file_id)
            actual_content: str = read_content_io.read().decode("utf-8")
            assert actual_content == expected_content

    def test_list_files_by_prefix(self, file_store: S3BackedFileStore) -> None:
        """Test listing files by prefix returns only correctly prefixed files"""
        test_prefix = "documents-batch-"

        # Files that should be returned (start with the prefix)
        prefixed_files: List[str] = [
            f"{test_prefix}001.txt",
            f"{test_prefix}002.json",
            f"{test_prefix}abc.pdf",
            f"{test_prefix}xyz-final.docx",
        ]

        # Files that should NOT be returned (don't start with prefix, even if they contain it)
        non_prefixed_files: List[str] = [
            f"other-{test_prefix}001.txt",  # Contains prefix but doesn't start with it
            f"backup-{test_prefix}data.txt",  # Contains prefix but doesn't start with it
            f"{uuid.uuid4()}.txt",  # Random file without prefix
            "reports-001.pdf",  # Different prefix
            f"my-{test_prefix[:-1]}.txt",  # Similar but not exact prefix
        ]

        all_files = prefixed_files + non_prefixed_files
        saved_file_ids: List[str] = []

        # Save all test files
        for file_name in all_files:
            content = f"Content for {file_name}"
            content_io = BytesIO(content.encode("utf-8"))

            returned_file_id = file_store.save_file(
                content=content_io,
                display_name=f"Display: {file_name}",
                file_origin=FileOrigin.OTHER,
                file_type="text/plain",
                file_id=file_name,
            )
            saved_file_ids.append(returned_file_id)

            # Verify file was saved
            assert returned_file_id == file_name

        # Test the list_files_by_prefix functionality
        prefix_results = file_store.list_files_by_prefix(test_prefix)

        # Extract file IDs from results
        returned_file_ids = [record.file_id for record in prefix_results]

        # Verify correct number of files returned
        assert len(returned_file_ids) == len(prefixed_files), (
            f"Expected {len(prefixed_files)} files with prefix '{test_prefix}', "
            f"but got {len(returned_file_ids)}: {returned_file_ids}"
        )

        # Verify all prefixed files are returned
        for expected_file_id in prefixed_files:
            assert (
                expected_file_id in returned_file_ids
            ), f"File '{expected_file_id}' should be in results but was not found. Returned files: {returned_file_ids}"

        # Verify no non-prefixed files are returned
        for unexpected_file_id in non_prefixed_files:
            assert (
                unexpected_file_id not in returned_file_ids
            ), f"File '{unexpected_file_id}' should NOT be in results but was found. Returned files: {returned_file_ids}"

        # Verify the returned records have correct properties
        for record in prefix_results:
            assert record.file_id.startswith(test_prefix)
            assert record.display_name == f"Display: {record.file_id}"
            assert record.file_origin == FileOrigin.OTHER
            assert record.file_type == "text/plain"
            assert record.bucket_name == file_store._get_bucket_name()

        # Test with empty prefix (should return all files we created)
        all_results = file_store.list_files_by_prefix("")
        all_returned_ids = [record.file_id for record in all_results]

        # Should include all our test files
        for file_id in saved_file_ids:
            assert (
                file_id in all_returned_ids
            ), f"File '{file_id}' should be in results for empty prefix"

        # Test with non-existent prefix
        nonexistent_results = file_store.list_files_by_prefix("nonexistent-prefix-")
        assert (
            len(nonexistent_results) == 0
        ), "Should return empty list for non-existent prefix"

    def test_get_file_size(self, file_store: S3BackedFileStore) -> None:
        """Test getting file size from S3"""
        file_id = f"{uuid.uuid4()}.txt"
        display_name = "Test File Size"
        content = "This is test content for file size check."
        expected_size = len(content.encode("utf-8"))
        file_type = "text/plain"
        file_origin = FileOrigin.OTHER

        # Save the file
        content_io = BytesIO(content.encode("utf-8"))
        returned_file_id = file_store.save_file(
            content=content_io,
            display_name=display_name,
            file_origin=file_origin,
            file_type=file_type,
            file_id=file_id,
        )

        assert returned_file_id == file_id

        # Get file size
        file_size = file_store.get_file_size(file_id)

        assert file_size is not None
        assert file_size == expected_size

    def test_get_file_size_nonexistent_file(
        self, file_store: S3BackedFileStore
    ) -> None:
        """Test getting file size for a non-existent file returns None"""
        nonexistent_file_id = f"{uuid.uuid4()}.txt"

        file_size = file_store.get_file_size(nonexistent_file_id)

        assert file_size is None


================================================
FILE: backend/tests/external_dependency_unit/file_store/test_postgres_file_store_non_mocked.py
================================================
"""External dependency tests for PostgresBackedFileStore.

These tests interact with a real PostgreSQL database — no mocking.
They exercise Large Object creation, reading, streaming, deletion,
and verify consistency between the file_record / file_content tables
and the underlying pg_largeobject storage.
"""

import uuid
from collections.abc import Generator
from io import BytesIO
from io import StringIO
from typing import Any
from typing import Dict
from typing import List

import pytest
from sqlalchemy.orm import Session

from onyx.configs.constants import FileOrigin
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.file_content import get_file_content_by_file_id
from onyx.db.file_content import get_file_content_by_file_id_optional
from onyx.file_store.postgres_file_store import _get_raw_connection
from onyx.file_store.postgres_file_store import _read_large_object
from onyx.file_store.postgres_file_store import POSTGRES_BUCKET_SENTINEL
from onyx.file_store.postgres_file_store import PostgresBackedFileStore
from onyx.utils.logger import setup_logger

logger = setup_logger()


# ------------------------------------------------------------------ fixtures --


@pytest.fixture(scope="function")
def pg_file_store(
    db_session: Session,  # noqa: ARG001 — ensures engine is ready
    tenant_context: None,  # noqa: ARG001
) -> Generator[PostgresBackedFileStore, None, None]:
    """Provide a PostgresBackedFileStore wired to the real test database."""
    store = PostgresBackedFileStore()
    store.initialize()

    # Track file IDs so we can clean up after each test
    created_ids: list[str] = []
    original_save = store.save_file

    def _tracking_save(*args: Any, **kwargs: Any) -> str:
        file_id = original_save(*args, **kwargs)
        created_ids.append(file_id)
        return file_id

    store.save_file = _tracking_save  # type: ignore[method-assign]

    yield store

    # Cleanup: delete every file we created (including Large Objects)
    for fid in created_ids:
        try:
            store.delete_file(fid)
        except Exception:
            pass


# -------------------------------------------------------------------- tests --


class TestPostgresBackedFileStore:
    """Full integration tests against a real PostgreSQL instance."""

    # ── basic save / read ──────────────────────────────────────────

    def test_save_and_read_text_file(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        file_id = f"{uuid.uuid4()}.txt"
        content = "Hello, Postgres Large Objects!"

        returned_id = pg_file_store.save_file(
            content=BytesIO(content.encode()),
            display_name="greeting.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        assert returned_id == file_id

        result = pg_file_store.read_file(file_id)
        assert result.read().decode() == content

    def test_save_and_read_binary_file(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        file_id = f"{uuid.uuid4()}.bin"
        content = bytes(range(256))

        pg_file_store.save_file(
            content=BytesIO(content),
            display_name="binary.bin",
            file_origin=FileOrigin.CONNECTOR,
            file_type="application/octet-stream",
            file_id=file_id,
        )

        assert pg_file_store.read_file(file_id).read() == content

    def test_save_string_io(self, pg_file_store: PostgresBackedFileStore) -> None:
        """StringIO content should be transparently UTF-8 encoded."""
        file_id = f"{uuid.uuid4()}.txt"
        text = "StringIO content — including unicode: 测试 🚀"

        pg_file_store.save_file(
            content=StringIO(text),
            display_name="stringio.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        assert pg_file_store.read_file(file_id).read().decode() == text

    def test_auto_generated_file_id(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        """When no file_id is supplied, a UUID should be generated."""
        returned_id = pg_file_store.save_file(
            content=BytesIO(b"auto-id"),
            display_name="auto.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
        )

        # Should be a valid UUID
        uuid.UUID(returned_id)
        assert pg_file_store.read_file(returned_id).read() == b"auto-id"

    # ── read with tempfile (streaming) ─────────────────────────────

    def test_read_file_with_tempfile(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        file_id = f"{uuid.uuid4()}.txt"
        content = "Streamed via tempfile"

        pg_file_store.save_file(
            content=BytesIO(content.encode()),
            display_name="streamed.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        tmp = pg_file_store.read_file(file_id, use_tempfile=True)
        try:
            tmp.seek(0)
            assert tmp.read().decode() == content
        finally:
            tmp.close()

    # ── file record metadata ───────────────────────────────────────

    def test_file_record_fields(self, pg_file_store: PostgresBackedFileStore) -> None:
        file_id = f"{uuid.uuid4()}.json"
        metadata: Dict[str, Any] = {"source": "test", "version": 1}

        pg_file_store.save_file(
            content=BytesIO(b'{"k":"v"}'),
            display_name="meta.json",
            file_origin=FileOrigin.CHAT_UPLOAD,
            file_type="application/json",
            file_metadata=metadata,
            file_id=file_id,
        )

        record = pg_file_store.read_file_record(file_id)
        assert record.file_id == file_id
        assert record.display_name == "meta.json"
        assert record.file_origin == FileOrigin.CHAT_UPLOAD
        assert record.file_type == "application/json"
        assert record.file_metadata == metadata
        assert record.bucket_name == POSTGRES_BUCKET_SENTINEL

        # object_key should be the stringified Large Object OID
        oid = int(record.object_key)
        assert oid > 0

    def test_file_content_record(self, pg_file_store: PostgresBackedFileStore) -> None:
        """file_content row should track the OID and byte-size."""
        file_id = f"{uuid.uuid4()}.txt"
        payload = b"measure my size"

        pg_file_store.save_file(
            content=BytesIO(payload),
            display_name="sized.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        with get_session_with_current_tenant() as session:
            fc = get_file_content_by_file_id(file_id, session)
            assert fc.file_size == len(payload)
            assert fc.lobj_oid > 0

    # ── has_file ───────────────────────────────────────────────────

    def test_has_file(self, pg_file_store: PostgresBackedFileStore) -> None:
        file_id = f"{uuid.uuid4()}.txt"

        assert not pg_file_store.has_file(file_id, FileOrigin.OTHER, "text/plain")

        pg_file_store.save_file(
            content=BytesIO(b"exists"),
            display_name="exists.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        assert pg_file_store.has_file(file_id, FileOrigin.OTHER, "text/plain")
        # Wrong origin / type → False
        assert not pg_file_store.has_file(file_id, FileOrigin.CONNECTOR, "text/plain")
        assert not pg_file_store.has_file(file_id, FileOrigin.OTHER, "image/png")

    # ── get_file_size ──────────────────────────────────────────────

    def test_get_file_size(self, pg_file_store: PostgresBackedFileStore) -> None:
        file_id = f"{uuid.uuid4()}.txt"
        payload = b"exactly 24 bytes long!?!"

        pg_file_store.save_file(
            content=BytesIO(payload),
            display_name="sized.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        assert pg_file_store.get_file_size(file_id) == len(payload)

    def test_get_file_size_nonexistent(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        assert pg_file_store.get_file_size(f"{uuid.uuid4()}") is None

    # ── delete ─────────────────────────────────────────────────────

    def test_delete_file(self, pg_file_store: PostgresBackedFileStore) -> None:
        file_id = f"{uuid.uuid4()}.txt"

        pg_file_store.save_file(
            content=BytesIO(b"delete me"),
            display_name="doomed.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        pg_file_store.delete_file(file_id)

        assert not pg_file_store.has_file(file_id, FileOrigin.OTHER, "text/plain")

        with pytest.raises(RuntimeError, match="does not exist"):
            pg_file_store.read_file(file_id)

        # file_content row should also be gone
        with get_session_with_current_tenant() as session:
            assert get_file_content_by_file_id_optional(file_id, session) is None

    def test_delete_nonexistent_raises(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        with pytest.raises(RuntimeError, match="does not exist"):
            pg_file_store.delete_file(f"{uuid.uuid4()}")

    # ── overwrite (upsert) ─────────────────────────────────────────

    def test_overwrite_file(self, pg_file_store: PostgresBackedFileStore) -> None:
        file_id = f"{uuid.uuid4()}.txt"

        pg_file_store.save_file(
            content=BytesIO(b"original"),
            display_name="v1.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        assert pg_file_store.read_file(file_id).read() == b"original"

        # Capture the OID of the original Large Object
        with get_session_with_current_tenant() as session:
            old_oid = get_file_content_by_file_id(file_id, session).lobj_oid

        pg_file_store.save_file(
            content=BytesIO(b"overwritten"),
            display_name="v2.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        assert pg_file_store.read_file(file_id).read() == b"overwritten"

        # The old Large Object should have been unlinked
        with get_session_with_current_tenant() as session:
            new_oid = get_file_content_by_file_id(file_id, session).lobj_oid
            assert new_oid != old_oid

            raw_conn = _get_raw_connection(session)
            with pytest.raises(Exception):
                _read_large_object(raw_conn, old_oid)

    # ── change_file_id ─────────────────────────────────────────────

    def test_change_file_id(self, pg_file_store: PostgresBackedFileStore) -> None:
        old_id = f"{uuid.uuid4()}.txt"
        new_id = f"{uuid.uuid4()}.txt"
        content = b"portable content"

        pg_file_store.save_file(
            content=BytesIO(content),
            display_name="rename.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=old_id,
        )

        pg_file_store.change_file_id(old_id, new_id)

        # Old ID should be gone
        assert not pg_file_store.has_file(old_id, FileOrigin.OTHER, "text/plain")

        # New ID should serve the same content
        assert pg_file_store.read_file(new_id).read() == content
        assert pg_file_store.get_file_size(new_id) == len(content)

        # Clean up the renamed file (fixture only tracks save_file calls)
        pg_file_store.delete_file(new_id)

    # ── list_files_by_prefix ───────────────────────────────────────

    def test_list_files_by_prefix(self, pg_file_store: PostgresBackedFileStore) -> None:
        prefix = f"batch-{uuid.uuid4().hex[:8]}-"

        # Create files with and without the prefix
        for i in range(3):
            pg_file_store.save_file(
                content=BytesIO(f"prefixed-{i}".encode()),
                display_name=f"p{i}.txt",
                file_origin=FileOrigin.OTHER,
                file_type="text/plain",
                file_id=f"{prefix}{i}.txt",
            )

        pg_file_store.save_file(
            content=BytesIO(b"unrelated"),
            display_name="other.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=f"other-{uuid.uuid4()}.txt",
        )

        results = pg_file_store.list_files_by_prefix(prefix)
        returned_ids = [r.file_id for r in results]

        assert len(returned_ids) == 3
        for i in range(3):
            assert f"{prefix}{i}.txt" in returned_ids

    # ── get_file_with_mime_type ────────────────────────────────────

    def test_get_file_with_mime_type(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        file_id = f"{uuid.uuid4()}.txt"

        pg_file_store.save_file(
            content=BytesIO(b"plain text"),
            display_name="mime.txt",
            file_origin=FileOrigin.OTHER,
            file_type="text/plain",
            file_id=file_id,
        )

        result = pg_file_store.get_file_with_mime_type(file_id)
        assert result is not None
        assert result.data == b"plain text"
        assert result.mime_type is not None

    def test_get_file_with_mime_type_nonexistent(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        assert pg_file_store.get_file_with_mime_type(f"{uuid.uuid4()}") is None

    # ── error handling ─────────────────────────────────────────────

    def test_read_nonexistent_raises(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        with pytest.raises(RuntimeError, match="does not exist"):
            pg_file_store.read_file(f"{uuid.uuid4()}")

    def test_read_file_record_nonexistent_raises(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        with pytest.raises(RuntimeError, match="does not exist"):
            pg_file_store.read_file_record(f"{uuid.uuid4()}")

    # ── large file ─────────────────────────────────────────────────

    def test_large_file_roundtrip(self, pg_file_store: PostgresBackedFileStore) -> None:
        """Verify a 1 MB payload survives a full save / read cycle."""
        file_id = f"{uuid.uuid4()}.bin"
        content = b"X" * (1024 * 1024)

        pg_file_store.save_file(
            content=BytesIO(content),
            display_name="big.bin",
            file_origin=FileOrigin.CONNECTOR,
            file_type="application/octet-stream",
            file_id=file_id,
        )

        assert pg_file_store.read_file(file_id).read() == content
        assert pg_file_store.get_file_size(file_id) == len(content)

    # ── multiple files with different origins ──────────────────────

    def test_multiple_files_different_origins(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        files: List[Dict[str, Any]] = [
            {
                "id": f"{uuid.uuid4()}.txt",
                "content": b"chat upload",
                "origin": FileOrigin.CHAT_UPLOAD,
                "type": "text/plain",
            },
            {
                "id": f"{uuid.uuid4()}.json",
                "content": b'{"from":"connector"}',
                "origin": FileOrigin.CONNECTOR,
                "type": "application/json",
            },
            {
                "id": f"{uuid.uuid4()}.csv",
                "content": b"a,b\n1,2",
                "origin": FileOrigin.GENERATED_REPORT,
                "type": "text/csv",
            },
        ]

        for f in files:
            pg_file_store.save_file(
                content=BytesIO(f["content"]),
                display_name=f["id"],
                file_origin=f["origin"],
                file_type=f["type"],
                file_id=f["id"],
            )

        for f in files:
            assert pg_file_store.has_file(f["id"], f["origin"], f["type"])
            assert pg_file_store.read_file(f["id"]).read() == f["content"]

    # ── complex JSONB metadata ─────────────────────────────────────

    def test_complex_jsonb_metadata(
        self, pg_file_store: PostgresBackedFileStore
    ) -> None:
        file_id = f"{uuid.uuid4()}.json"
        metadata: Dict[str, Any] = {
            "nested": {"array": [1, 2, {"inner": True}], "null_val": None},
            "unicode": "测试 🚀",
            "large_text": "z" * 1000,
        }

        pg_file_store.save_file(
            content=BytesIO(b"{}"),
            display_name="meta.json",
            file_origin=FileOrigin.OTHER,
            file_type="application/json",
            file_metadata=metadata,
            file_id=file_id,
        )

        record = pg_file_store.read_file_record(file_id)
        assert record.file_metadata == metadata


================================================
FILE: backend/tests/external_dependency_unit/full_setup.py
================================================
from __future__ import annotations

import os
from pathlib import Path
from typing import Optional

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.search_settings import get_active_search_settings
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.file_store.file_store import get_default_file_store
from onyx.indexing.models import IndexingSetting
from onyx.setup import setup_document_indices
from onyx.setup import setup_postgres
from shared_configs import configs as shared_configs_module
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from tests.external_dependency_unit.constants import TEST_TENANT_ID


_SETUP_COMPLETE: bool = False


def ensure_full_deployment_setup(
    tenant_id: Optional[str] = None,
    opensearch_available: bool = False,
) -> None:
    """Initialize test environment to mirror a real deployment, on demand.

    - Initializes DB engine and sets tenant context
    - Skips model warm-ups during setup
    - Runs setup_onyx (Postgres defaults, Vespa indices)
    - Initializes file store (best-effort)
    - Ensures Vespa indices exist
    """
    global _SETUP_COMPLETE
    if _SETUP_COMPLETE:
        return

    if os.environ.get("SKIP_EXTERNAL_DEPENDENCY_UNIT_SETUP", "").lower() == "true":
        return

    tenant = tenant_id or TEST_TENANT_ID

    # Initialize engine (noop if already initialized)
    SqlEngine.init_engine(pool_size=10, max_overflow=5)

    # Avoid warm-up network calls during setup
    shared_configs_module.SKIP_WARM_UP = True

    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant)
    original_cwd = os.getcwd()
    backend_dir = Path(__file__).resolve().parents[2]  # points to 'backend'
    os.chdir(str(backend_dir))

    try:
        with get_session_with_current_tenant() as db_session:
            setup_postgres(db_session)

            # Initialize file store; ignore if not configured
            try:
                get_default_file_store().initialize()
            except Exception:
                pass

        # Also ensure indices exist explicitly (no-op if already created)
        with get_session_with_current_tenant() as db_session:
            active = get_active_search_settings(db_session)
            if opensearch_available:
                # We use this special bool here instead of just relying on
                # ENABLE_OPENSEARCH_INDEXING_FOR_ONYX because not all testing
                # infra is configured for OpenSearch.
                document_indices = get_all_document_indices(
                    active.primary, active.secondary
                )
            else:
                document_indices = [
                    get_default_document_index(
                        active.primary, active.secondary, db_session
                    )
                ]
            ok = setup_document_indices(
                document_indices=document_indices,
                index_setting=IndexingSetting.from_db_model(active.primary),
                secondary_index_setting=(
                    IndexingSetting.from_db_model(active.secondary)
                    if active.secondary
                    else None
                ),
            )
            if not ok:
                raise RuntimeError(
                    "Vespa did not initialize within the specified timeout."
                )

        _SETUP_COMPLETE = True
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
        os.chdir(original_cwd)


================================================
FILE: backend/tests/external_dependency_unit/hierarchy/__init__.py
================================================


================================================
FILE: backend/tests/external_dependency_unit/hierarchy/test_hierarchy_access_filter.py
================================================
"""Tests for hierarchy node access filtering.

Validates that the overlap operator on external_user_group_ids works correctly
with PostgreSQL's VARCHAR[] column type. This specifically tests the fix for
the `character varying[] && text[]` type mismatch error.
"""

from collections.abc import Generator
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from ee.onyx.db.hierarchy import _get_accessible_hierarchy_nodes_for_source
from onyx.configs.constants import DocumentSource
from onyx.db.enums import HierarchyNodeType
from onyx.db.models import HierarchyNode


def _make_node(
    raw_node_id: str,
    display_name: str,
    *,
    is_public: bool = False,
    external_user_emails: list[str] | None = None,
    external_user_group_ids: list[str] | None = None,
) -> HierarchyNode:
    return HierarchyNode(
        raw_node_id=raw_node_id,
        display_name=display_name,
        source=DocumentSource.GOOGLE_DRIVE,
        node_type=HierarchyNodeType.FOLDER,
        is_public=is_public,
        external_user_emails=external_user_emails,
        external_user_group_ids=external_user_group_ids,
    )


@pytest.fixture()
def seeded_nodes(db_session: Session) -> Generator[list[HierarchyNode], None, None]:
    """Seed hierarchy nodes with various permission configurations."""
    tag = uuid4().hex[:8]
    nodes = [
        _make_node(
            f"public_{tag}",
            f"Public Folder {tag}",
            is_public=True,
        ),
        _make_node(
            f"email_only_{tag}",
            f"Email-Only Folder {tag}",
            external_user_emails=["alice@example.com"],
        ),
        _make_node(
            f"group_only_{tag}",
            f"Group-Only Folder {tag}",
            external_user_group_ids=["group_engineering", "group_design"],
        ),
        _make_node(
            f"private_{tag}",
            f"Private Folder {tag}",
        ),
    ]
    for node in nodes:
        db_session.add(node)
    db_session.flush()

    yield nodes

    # Cleanup
    for node in nodes:
        db_session.delete(node)
    db_session.commit()


def test_group_overlap_filter(
    db_session: Session,
    seeded_nodes: list[HierarchyNode],
) -> None:
    """The overlap (&&) operator must work on the VARCHAR[] column.

    This is the core regression test: before the cast fix, PostgreSQL raised
    `operator does not exist: character varying[] && text[]`.
    """
    results = _get_accessible_hierarchy_nodes_for_source(
        db_session,
        source=DocumentSource.GOOGLE_DRIVE,
        user_email="",
        external_group_ids=["group_engineering"],
    )
    result_ids = {n.raw_node_id for n in results}

    public_node, _, group_node, private_node = seeded_nodes
    assert public_node.raw_node_id in result_ids
    assert group_node.raw_node_id in result_ids
    assert private_node.raw_node_id not in result_ids


def test_email_filter(
    db_session: Session,
    seeded_nodes: list[HierarchyNode],
) -> None:
    """User email matching should return the email-permissioned node."""
    results = _get_accessible_hierarchy_nodes_for_source(
        db_session,
        source=DocumentSource.GOOGLE_DRIVE,
        user_email="alice@example.com",
        external_group_ids=[],
    )
    result_ids = {n.raw_node_id for n in results}

    public_node, email_node, group_node, private_node = seeded_nodes
    assert public_node.raw_node_id in result_ids
    assert email_node.raw_node_id in result_ids
    assert group_node.raw_node_id not in result_ids
    assert private_node.raw_node_id not in result_ids


def test_no_credentials_returns_only_public(
    db_session: Session,
    seeded_nodes: list[HierarchyNode],
) -> None:
    """With no email and no groups, only public nodes should be returned."""
    results = _get_accessible_hierarchy_nodes_for_source(
        db_session,
        source=DocumentSource.GOOGLE_DRIVE,
        user_email="",
        external_group_ids=[],
    )
    result_ids = {n.raw_node_id for n in results}

    public_node, email_node, group_node, private_node = seeded_nodes
    assert public_node.raw_node_id in result_ids
    assert email_node.raw_node_id not in result_ids
    assert group_node.raw_node_id not in result_ids
    assert private_node.raw_node_id not in result_ids


def test_combined_email_and_group(
    db_session: Session,
    seeded_nodes: list[HierarchyNode],
) -> None:
    """Both email and group filters should apply together via OR."""
    results = _get_accessible_hierarchy_nodes_for_source(
        db_session,
        source=DocumentSource.GOOGLE_DRIVE,
        user_email="alice@example.com",
        external_group_ids=["group_design"],
    )
    result_ids = {n.raw_node_id for n in results}

    public_node, email_node, group_node, private_node = seeded_nodes
    assert public_node.raw_node_id in result_ids
    assert email_node.raw_node_id in result_ids
    assert group_node.raw_node_id in result_ids
    assert private_node.raw_node_id not in result_ids


================================================
FILE: backend/tests/external_dependency_unit/llm/test_llm_provider.py
================================================
"""
Tests for the test_llm_configuration endpoint (/admin/llm/test).

This tests the LLM configuration testing functionality which verifies
that LLM credentials are valid before saving them.
"""

from collections.abc import Generator
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.db.enums import LLMModelFlowType
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import remove_llm_provider
from onyx.db.llm import update_default_provider
from onyx.db.llm import upsert_llm_provider
from onyx.db.models import UserRole
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLM
from onyx.server.manage.llm.api import (
    test_default_provider as run_test_default_provider,
)
from onyx.server.manage.llm.api import (
    test_llm_configuration as run_test_llm_configuration,
)
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.manage.llm.models import TestLLMRequest as LLMTestRequest


def _create_mock_admin() -> MagicMock:
    """Create a mock admin user for testing."""
    mock_admin = MagicMock()
    mock_admin.role = UserRole.ADMIN
    return mock_admin


def _create_test_provider(
    db_session: Session,
    name: str,
    api_key: str = "sk-test-key-00000000000000000000000000000000000",
) -> LLMProviderView:
    """Helper to create a test LLM provider in the database."""
    return upsert_llm_provider(
        LLMProviderUpsertRequest(
            name=name,
            provider=LlmProviderNames.OPENAI,
            api_key=api_key,
            api_key_changed=True,
            model_configurations=[
                ModelConfigurationUpsertRequest(name="gpt-4o-mini", is_visible=True)
            ],
        ),
        db_session=db_session,
    )


def _cleanup_provider(db_session: Session, name: str) -> None:
    """Helper to clean up a test provider by name."""
    provider = fetch_existing_llm_provider(name=name, db_session=db_session)
    if provider:
        remove_llm_provider(db_session, provider.id)


@pytest.fixture
def provider_name() -> Generator[str, None, None]:
    """Generate a unique provider name for each test."""
    yield f"test-provider-{uuid4().hex[:8]}"


class TestLLMConfigurationEndpoint:
    """Tests for the test_llm_configuration endpoint."""

    def test_successful_llm_test_with_new_provider(
        self,
        db_session: Session,
        provider_name: str,  # noqa: ARG002
    ) -> None:
        """
        Test that a successful LLM test returns normally (no exception).

        When test_llm returns None (success), the endpoint should complete
        without raising an exception.
        """
        captured_llms: list[LLM] = []

        def mock_test_llm_success(llm: LLM) -> str | None:
            """Mock test_llm that always succeeds."""
            captured_llms.append(llm)
            return None  # Success

        try:
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_success
            ):
                # This should complete without exception
                run_test_llm_configuration(
                    test_llm_request=LLMTestRequest(
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-new-test-key-0000000000000000000000000000",
                        api_key_changed=True,
                        custom_config_changed=False,
                        model="gpt-4o-mini",
                    ),
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Verify test_llm was called
            assert len(captured_llms) == 1, "test_llm should have been called once"

            # Verify the LLM was configured with the correct model
            assert captured_llms[0].config.model_name == "gpt-4o-mini"
            assert captured_llms[0].config.model_provider == LlmProviderNames.OPENAI

        finally:
            db_session.rollback()

    def test_failed_llm_test_raises_onyx_error(
        self,
        db_session: Session,
        provider_name: str,  # noqa: ARG002
    ) -> None:
        """
        Test that a failed LLM test raises an OnyxError with VALIDATION_ERROR.

        When test_llm returns an error message, the endpoint should raise
        an OnyxError with the error details.
        """
        error_message = "Invalid API key: Authentication failed"

        def mock_test_llm_failure(llm: LLM) -> str | None:  # noqa: ARG001
            """Mock test_llm that always fails."""
            return error_message

        try:
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_failure
            ):
                with pytest.raises(OnyxError) as exc_info:
                    run_test_llm_configuration(
                        test_llm_request=LLMTestRequest(
                            provider=LlmProviderNames.OPENAI,
                            api_key="sk-invalid-key-00000000000000000000000000",
                            api_key_changed=True,
                            custom_config_changed=False,
                            model="gpt-4o-mini",
                        ),
                        _=_create_mock_admin(),
                        db_session=db_session,
                    )

                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
                assert exc_info.value.detail == error_message

        finally:
            db_session.rollback()

    def test_uses_existing_provider_api_key_when_not_changed(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Test that when testing an existing provider without changing the API key,
        the stored API key from the database is used.
        """
        original_api_key = "sk-original-stored-key-00000000000000000000"
        captured_llms: list[LLM] = []

        def mock_test_llm_capture(llm: LLM) -> str | None:
            """Mock test_llm that captures the LLM for inspection."""
            captured_llms.append(llm)
            return None

        try:
            # First, create the provider in the database
            provider = _create_test_provider(
                db_session, provider_name, api_key=original_api_key
            )

            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_capture
            ):
                # Test with api_key_changed=False - should use stored key
                run_test_llm_configuration(
                    test_llm_request=LLMTestRequest(
                        id=provider.id,
                        provider=LlmProviderNames.OPENAI,
                        api_key=None,  # Not providing a new key
                        api_key_changed=False,  # Using existing key
                        custom_config_changed=False,
                        model="gpt-4o-mini",
                    ),
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Verify test_llm was called with the original API key
            assert len(captured_llms) == 1
            assert captured_llms[0].config.api_key == original_api_key

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_uses_new_api_key_when_changed(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Test that when testing an existing provider with a new API key,
        the new API key is used instead of the stored one.
        """
        original_api_key = "sk-original-stored-key-00000000000000000000"
        new_api_key = "sk-new-updated-key-000000000000000000000000"
        captured_llms: list[LLM] = []

        def mock_test_llm_capture(llm: LLM) -> str | None:
            """Mock test_llm that captures the LLM for inspection."""
            captured_llms.append(llm)
            return None

        try:
            # First, create the provider in the database
            provider = _create_test_provider(
                db_session, provider_name, api_key=original_api_key
            )

            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_capture
            ):
                # Test with api_key_changed=True - should use new key
                run_test_llm_configuration(
                    test_llm_request=LLMTestRequest(
                        id=provider.id,
                        provider=LlmProviderNames.OPENAI,
                        api_key=new_api_key,  # Providing a new key
                        api_key_changed=True,  # Key is being changed
                        custom_config_changed=False,
                        model="gpt-4o-mini",
                    ),
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Verify test_llm was called with the new API key
            assert len(captured_llms) == 1
            assert captured_llms[0].config.api_key == new_api_key

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_uses_existing_custom_config_when_not_changed(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Test that when testing an existing provider without changing custom_config,
        the stored custom_config from the database is used.
        """
        original_custom_config = {"custom_key": "original_value"}
        captured_llms: list[LLM] = []

        def mock_test_llm_capture(llm: LLM) -> str | None:
            """Mock test_llm that captures the LLM for inspection."""
            captured_llms.append(llm)
            return None

        try:
            # First, create the provider in the database with custom_config
            provider = upsert_llm_provider(
                LLMProviderUpsertRequest(
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-test-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    custom_config=original_custom_config,
                    custom_config_changed=True,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(
                            name="gpt-4o-mini", is_visible=True
                        )
                    ],
                ),
                db_session=db_session,
            )

            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_capture
            ):
                # Test with custom_config_changed=False - should use stored config
                run_test_llm_configuration(
                    test_llm_request=LLMTestRequest(
                        id=provider.id,
                        provider=LlmProviderNames.OPENAI,
                        api_key=None,
                        api_key_changed=False,
                        custom_config=None,  # Not providing new config
                        custom_config_changed=False,  # Using existing config
                        model="gpt-4o-mini",
                    ),
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Verify test_llm was called with the original custom_config
            assert len(captured_llms) == 1
            assert captured_llms[0].config.custom_config == original_custom_config

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_different_model_names(
        self,
        db_session: Session,
    ) -> None:
        """
        Test that the endpoint correctly passes different model names to the LLM.
        """
        captured_llms: list[LLM] = []

        def mock_test_llm_capture(llm: LLM) -> str | None:
            captured_llms.append(llm)
            return None

        test_models = ["gpt-4", "gpt-4o", "gpt-4o-mini", "gpt-3.5-turbo"]

        try:
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_capture
            ):
                for model_name in test_models:
                    run_test_llm_configuration(
                        test_llm_request=LLMTestRequest(
                            provider=LlmProviderNames.OPENAI,
                            api_key="sk-test-key-00000000000000000000000000000000000",
                            api_key_changed=True,
                            custom_config_changed=False,
                            model=model_name,
                        ),
                        _=_create_mock_admin(),
                        db_session=db_session,
                    )

            # Verify all models were tested
            assert len(captured_llms) == len(test_models)

            for i, llm in enumerate(captured_llms):
                assert (
                    llm.config.model_name == test_models[i]
                ), f"Expected model {test_models[i]}, got {llm.config.model_name}"

        finally:
            db_session.rollback()


class TestDefaultProviderEndpoint:
    """Tests for the test_default_provider endpoint (/admin/llm/test/default)."""

    def test_default_provider_switching(
        self,
        db_session: Session,
    ) -> None:
        """
        Test that run_test_default_provider correctly uses the default provider
        and responds to changes in default model and default provider.

        Steps:
        1. Upload provider 1 with models, set as default
        2. Call run_test_default_provider - should use provider 1's default model
        3. Upload provider 2 with models (not default)
        4. Call run_test_default_provider - should still use provider 1
        5. Change the default model on provider 1
        6. Call run_test_default_provider - should use new model on provider 1
        7. Change the default provider to provider 2
        8. Call run_test_default_provider - should use provider 2
        """
        provider_1_name = f"test-provider-1-{uuid4().hex[:8]}"
        provider_2_name = f"test-provider-2-{uuid4().hex[:8]}"

        provider_1_api_key = "sk-provider1-key-000000000000000000000000000"
        provider_2_api_key = "sk-provider2-key-000000000000000000000000000"

        provider_1_initial_model = "gpt-4"
        provider_1_updated_model = "gpt-4o"
        provider_2_default_model = "gpt-4o-mini"

        captured_llms: list[LLM] = []

        def mock_test_llm_capture(llm: LLM) -> str | None:
            """Mock test_llm that captures the LLM for inspection."""
            captured_llms.append(llm)
            return None

        try:
            # Step 1: Create provider 1 with models, it becomes default (first provider)
            provider_1 = upsert_llm_provider(
                LLMProviderUpsertRequest(
                    name=provider_1_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key=provider_1_api_key,
                    api_key_changed=True,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True),
                        ModelConfigurationUpsertRequest(name="gpt-4o", is_visible=True),
                    ],
                ),
                db_session=db_session,
            )

            # Set provider 1 as the default provider explicitly
            update_default_provider(provider_1.id, provider_1_initial_model, db_session)

            # Step 2: Call run_test_default_provider - should use provider 1's default model
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_capture
            ):
                run_test_default_provider(_=_create_mock_admin())

            assert len(captured_llms) == 1
            assert captured_llms[0].config.model_name == provider_1_initial_model
            assert captured_llms[0].config.api_key == provider_1_api_key
            captured_llms.clear()

            # Step 3: Create provider 2 (not default)
            provider_2 = upsert_llm_provider(
                LLMProviderUpsertRequest(
                    name=provider_2_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key=provider_2_api_key,
                    api_key_changed=True,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(
                            name="gpt-4o-mini", is_visible=True
                        ),
                        ModelConfigurationUpsertRequest(
                            name="gpt-3.5-turbo", is_visible=True
                        ),
                    ],
                ),
                db_session=db_session,
            )

            # Step 4: Call run_test_default_provider - should still use provider 1
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_capture
            ):
                run_test_default_provider(_=_create_mock_admin())

            assert len(captured_llms) == 1
            assert captured_llms[0].config.model_name == provider_1_initial_model
            assert captured_llms[0].config.api_key == provider_1_api_key
            captured_llms.clear()

            # Step 5: Update provider 1's default model
            upsert_llm_provider(
                LLMProviderUpsertRequest(
                    id=provider_1.id,
                    name=provider_1_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key=provider_1_api_key,
                    api_key_changed=True,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True),
                        ModelConfigurationUpsertRequest(name="gpt-4o", is_visible=True),
                    ],
                ),
                db_session=db_session,
            )

            # Set provider 1's default model to the updated model
            update_default_provider(provider_1.id, provider_1_updated_model, db_session)

            # Step 6: Call run_test_default_provider - should use new model on provider 1
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_capture
            ):
                run_test_default_provider(_=_create_mock_admin())

            assert len(captured_llms) == 1
            assert captured_llms[0].config.model_name == provider_1_updated_model
            assert captured_llms[0].config.api_key == provider_1_api_key
            captured_llms.clear()

            # Step 7: Change the default provider to provider 2
            update_default_provider(provider_2.id, provider_2_default_model, db_session)

            # Step 8: Call run_test_default_provider - should use provider 2
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_capture
            ):
                run_test_default_provider(_=_create_mock_admin())

            assert len(captured_llms) == 1
            assert captured_llms[0].config.model_name == provider_2_default_model
            assert captured_llms[0].config.api_key == provider_2_api_key

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_1_name)
            _cleanup_provider(db_session, provider_2_name)

    def test_no_default_provider_raises_exception(
        self,
        db_session: Session,
    ) -> None:
        """
        Test that when no default provider exists, the endpoint raises an exception.
        """
        # Clear any existing providers to ensure no default exists
        from onyx.db.llm import fetch_existing_llm_providers

        try:
            existing_providers = fetch_existing_llm_providers(
                db_session, flow_type_filter=[LLMModelFlowType.CHAT]
            )
            provider_names_to_restore: list[str] = []

            for provider in existing_providers:
                provider_names_to_restore.append(provider.name)

            # Remove all providers temporarily
            for provider in existing_providers:
                remove_llm_provider(db_session, provider.id)

            # Now run_test_default_provider should fail
            with pytest.raises(OnyxError) as exc_info:
                run_test_default_provider(_=_create_mock_admin())

            assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
            assert "No LLM Provider setup" in exc_info.value.detail

        finally:
            db_session.rollback()

    def test_default_provider_test_failure(
        self,
        db_session: Session,
    ) -> None:
        """
        Test that when the default provider's LLM test fails, an exception is raised.
        """
        provider_name = f"test-provider-{uuid4().hex[:8]}"
        error_message = "Connection to LLM provider failed"

        def mock_test_llm_failure(llm: LLM) -> str | None:  # noqa: ARG001
            """Mock test_llm that always fails."""
            return error_message

        try:
            # Create a provider and set it as default
            provider = upsert_llm_provider(
                LLMProviderUpsertRequest(
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-test-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(
                            name="gpt-4o-mini", is_visible=True
                        ),
                    ],
                ),
                db_session=db_session,
            )
            update_default_provider(provider.id, "gpt-4o-mini", db_session)

            # Test should fail
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_failure
            ):
                with pytest.raises(OnyxError) as exc_info:
                    run_test_default_provider(_=_create_mock_admin())

                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
                assert exc_info.value.detail == error_message

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)


================================================
FILE: backend/tests/external_dependency_unit/llm/test_llm_provider_api_base.py
================================================
"""
Tests for LLM provider api_base and custom_config change restrictions.

This ensures we don't have a vulnerability where an admin could change the api_base
or custom_config of an LLM provider without changing the API key, allowing them to
redirect API requests (containing the real API key in headers) to an attacker-controlled
server.

These are external dependency unit tests because they need a real database but
also need to control the MULTI_TENANT setting via patching.
"""

from collections.abc import Generator
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import remove_llm_provider
from onyx.db.llm import upsert_llm_provider
from onyx.db.models import UserRole
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.llm.constants import LlmProviderNames
from onyx.server.manage.llm.api import _mask_string
from onyx.server.manage.llm.api import put_llm_provider
from onyx.server.manage.llm.api import test_llm_configuration as run_llm_config_test
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.manage.llm.models import TestLLMRequest as LLMTestRequest
from tests.external_dependency_unit.mock_llm import LLM


def _create_test_provider(
    db_session: Session,
    name: str,
    api_base: str | None = None,
    custom_config: dict[str, str] | None = None,
) -> LLMProviderView:
    """Helper to create a test LLM provider."""
    return upsert_llm_provider(
        LLMProviderUpsertRequest(
            name=name,
            provider=LlmProviderNames.OPENAI,
            api_key="sk-test-key-00000000000000000000000000000000000",
            api_key_changed=True,
            api_base=api_base,
            custom_config=custom_config,
            model_configurations=[
                ModelConfigurationUpsertRequest(name="gpt-4o-mini", is_visible=True)
            ],
        ),
        db_session=db_session,
    )


def _cleanup_provider(db_session: Session, name: str) -> None:
    """Helper to clean up a test provider by name."""
    provider = fetch_existing_llm_provider(name=name, db_session=db_session)
    if provider:
        remove_llm_provider(db_session, provider.id)


def _create_mock_admin() -> MagicMock:
    """Create a mock admin user for testing."""
    mock_admin = MagicMock()
    mock_admin.role = UserRole.ADMIN
    return mock_admin


@pytest.fixture
def provider_name() -> Generator[str, None, None]:
    """Generate a unique provider name for each test."""
    yield f"test-provider-{uuid4().hex[:8]}"


class TestLLMProviderChanges:
    """Tests for api_base change restrictions when updating LLM providers."""

    def test_blocks_api_base_change_without_key_change__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        In multi-tenant mode, changing api_base without also changing
        the API key should be blocked.
        """
        try:
            provider = _create_test_provider(db_session, provider_name)

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_base="https://attacker.example.com",
                )

                with pytest.raises(OnyxError) as exc_info:
                    put_llm_provider(
                        llm_provider_upsert_request=update_request,
                        is_creation=False,
                        _=_create_mock_admin(),
                        db_session=db_session,
                    )

                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
                assert "cannot be changed without changing the API key" in str(
                    exc_info.value.detail
                )
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_allows_api_base_change_with_key_change__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Changing api_base IS allowed when the API key is also being changed.
        """
        try:
            provider = _create_test_provider(db_session, provider_name)

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-new-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    api_base="https://custom-endpoint.example.com/v1",
                )

                result = put_llm_provider(
                    llm_provider_upsert_request=update_request,
                    is_creation=False,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

                assert result.api_base == "https://custom-endpoint.example.com/v1"
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_allows_same_api_base__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Keeping the same api_base (no change) is allowed without changing the API key.
        """
        original_api_base = "https://original.example.com/v1"

        try:
            provider = _create_test_provider(
                db_session, provider_name, api_base=original_api_base
            )

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_base=original_api_base,
                )

                result = put_llm_provider(
                    llm_provider_upsert_request=update_request,
                    is_creation=False,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

                assert result.api_base == original_api_base
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_allows_empty_string_api_base_when_existing_is_none__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Treat empty-string api_base from clients as unset when comparing provider
        changes. This allows model-only updates when provider has no custom base URL.
        """
        try:
            view = _create_test_provider(db_session, provider_name, api_base=None)

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                update_request = LLMProviderUpsertRequest(
                    id=view.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_base="",
                )

                result = put_llm_provider(
                    llm_provider_upsert_request=update_request,
                    is_creation=False,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

                assert result.api_base is None
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_blocks_clearing_api_base__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Clearing api_base (setting to None when it was previously set)
        is also blocked without changing the API key.
        """
        original_api_base = "https://original.example.com/v1"

        try:
            provider = _create_test_provider(
                db_session, provider_name, api_base=original_api_base
            )

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_base=None,
                )

                with pytest.raises(OnyxError) as exc_info:
                    put_llm_provider(
                        llm_provider_upsert_request=update_request,
                        is_creation=False,
                        _=_create_mock_admin(),
                        db_session=db_session,
                    )

                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
                assert "cannot be changed without changing the API key" in str(
                    exc_info.value.detail
                )
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_allows_api_base_change__single_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        In single-tenant mode (MULTI_TENANT=False), changing api_base without
        changing the API key IS allowed. This is by design since single-tenant
        users have full control over their deployment.
        """
        try:
            provider = _create_test_provider(db_session, provider_name)

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", False):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_base="https://custom.example.com/v1",
                )

                result = put_llm_provider(
                    llm_provider_upsert_request=update_request,
                    is_creation=False,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

                assert result.api_base == "https://custom.example.com/v1"
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_new_provider_creation_not_affected__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Creating a new provider with an api_base should work regardless of
        api_key_changed (since there's no existing key to protect).
        """
        try:
            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                create_request = LLMProviderUpsertRequest(
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-new-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    api_base="https://custom.example.com/v1",
                )

                result = put_llm_provider(
                    llm_provider_upsert_request=create_request,
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

                assert result.api_base == "https://custom.example.com/v1"
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_blocks_custom_config_change_without_key_change__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        In multi-tenant mode, changing custom_config without also changing
        the API key should be blocked (custom_config can set env vars that
        redirect LLM API requests).
        """
        try:
            provider = _create_test_provider(
                db_session,
                provider_name,
                custom_config={"SOME_CONFIG": "original_value"},
            )

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    custom_config={"OPENAI_API_BASE": "https://attacker.example.com"},
                    custom_config_changed=True,
                )

                with pytest.raises(OnyxError) as exc_info:
                    put_llm_provider(
                        llm_provider_upsert_request=update_request,
                        is_creation=False,
                        _=_create_mock_admin(),
                        db_session=db_session,
                    )

                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
                assert "cannot be changed without changing the API key" in str(
                    exc_info.value.detail
                )
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_blocks_adding_custom_config_without_key_change__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Adding custom_config when none existed should also be blocked
        without changing the API key.
        """
        try:
            provider = _create_test_provider(db_session, provider_name)

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    custom_config={"OPENAI_API_BASE": "https://attacker.example.com"},
                    custom_config_changed=True,
                )

                with pytest.raises(OnyxError) as exc_info:
                    put_llm_provider(
                        llm_provider_upsert_request=update_request,
                        is_creation=False,
                        _=_create_mock_admin(),
                        db_session=db_session,
                    )

                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
                assert "cannot be changed without changing the API key" in str(
                    exc_info.value.detail
                )
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_allows_custom_config_change_with_key_change__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Changing custom_config IS allowed when the API key is also being changed.
        """
        new_config = {"AWS_REGION_NAME": "us-west-2"}

        try:
            provider = _create_test_provider(
                db_session,
                provider_name,
                custom_config={"AWS_REGION_NAME": "us-east-1"},
            )

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-new-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    custom_config_changed=True,
                    custom_config=new_config,
                )

                result = put_llm_provider(
                    llm_provider_upsert_request=update_request,
                    is_creation=False,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

                assert result.custom_config == new_config
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_allows_same_custom_config__multi_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Keeping the same custom_config (no change) is allowed without changing the API key.
        """
        original_config = {"AWS_REGION_NAME": "us-east-1"}

        try:
            provider = _create_test_provider(
                db_session, provider_name, custom_config=original_config
            )

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", True):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    custom_config=original_config,
                    custom_config_changed=True,
                )

                result = put_llm_provider(
                    llm_provider_upsert_request=update_request,
                    is_creation=False,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

                assert result.custom_config == original_config
        finally:
            _cleanup_provider(db_session, provider_name)

    def test_allows_custom_config_change__single_tenant(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        In single-tenant mode, changing custom_config without changing
        the API key IS allowed.
        """
        new_config = {"AWS_REGION_NAME": "eu-west-1"}

        try:
            provider = _create_test_provider(
                db_session,
                provider_name,
                custom_config={"AWS_REGION_NAME": "us-east-1"},
            )

            with patch("onyx.server.manage.llm.api.MULTI_TENANT", False):
                update_request = LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    custom_config=new_config,
                    custom_config_changed=True,
                )

                result = put_llm_provider(
                    llm_provider_upsert_request=update_request,
                    is_creation=False,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

                assert result.custom_config == new_config
        finally:
            _cleanup_provider(db_session, provider_name)


def test_upload_with_custom_config_then_change(
    db_session: Session,
) -> None:
    """
    Run test + upload with a custom config (vertex).
    Edit attributes of provider that are not custom config or api key.
    Check that the test and update maintain the same values.
    """
    custom_config = {
        "vertex_credentials": "1234",
        "vertex_location": "us-east-1",
    }
    name = "test-provider-vertex-ai"
    provider_name = LlmProviderNames.VERTEX_AI.value
    default_model_name = "gemini-2.5-pro"

    # List to capture LLM inputs passed to test_llm
    captured_llms: list = []

    def capture_test_llm(llm: LLM) -> str:
        """Captures the LLM input and returns None (success)."""
        captured_llms.append(llm)
        return ""

    try:
        # Patch the test_llm method
        with patch("onyx.server.manage.llm.api.test_llm", side_effect=capture_test_llm):
            run_llm_config_test(
                LLMTestRequest(
                    provider=provider_name,
                    model=default_model_name,
                    api_key_changed=False,
                    custom_config_changed=True,
                    custom_config=custom_config,
                ),
                _=_create_mock_admin(),
                db_session=db_session,
            )

            provider = put_llm_provider(
                llm_provider_upsert_request=LLMProviderUpsertRequest(
                    name=name,
                    provider=provider_name,
                    custom_config=custom_config,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(
                            name=default_model_name, is_visible=True
                        )
                    ],
                    api_key_changed=False,
                    custom_config_changed=True,
                    is_auto_mode=False,
                ),
                is_creation=True,
                _=_create_mock_admin(),
                db_session=db_session,
            )

            # Turn auto mode off
            run_llm_config_test(
                LLMTestRequest(
                    id=provider.id,
                    provider=provider_name,
                    model=default_model_name,
                    api_key_changed=False,
                    custom_config_changed=False,
                ),
                _=_create_mock_admin(),
                db_session=db_session,
            )

            put_llm_provider(
                llm_provider_upsert_request=LLMProviderUpsertRequest(
                    id=provider.id,
                    name=name,
                    provider=provider_name,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(
                            name=default_model_name, is_visible=True
                        ),
                        ModelConfigurationUpsertRequest(
                            name="gpt-4o-mini", is_visible=True
                        ),
                    ],
                    api_key_changed=False,
                    custom_config_changed=False,
                    is_auto_mode=False,
                ),
                is_creation=False,
                _=_create_mock_admin(),
                db_session=db_session,
            )

            # Verify that test_llm was called and custom_config matches
            assert len(captured_llms) == 2, "test_llm should have been called 2 times"

            for llm in captured_llms:
                assert (
                    llm.config.custom_config == custom_config
                ), f"Expected custom_config {custom_config}, but got {llm.config.custom_config}"

            # Check inside the database and check that custom_config is the same as the original
            db_provider = fetch_existing_llm_provider(name=name, db_session=db_session)
            if not db_provider:
                assert False, "Provider not found in the database"

            assert (
                db_provider.custom_config == custom_config
            ), f"Expected custom_config {custom_config}, but got {db_provider.custom_config}"
    finally:
        db_session.rollback()
        _cleanup_provider(db_session, name)


def test_preserves_masked_sensitive_custom_config_on_provider_update(
    db_session: Session,
) -> None:
    """Masked sensitive values from the UI should not overwrite stored secrets."""
    name = f"test-provider-vertex-update-{uuid4().hex[:8]}"
    provider = LlmProviderNames.VERTEX_AI.value
    default_model_name = "gemini-2.5-pro"
    original_custom_config = {
        "vertex_credentials": '{"type":"service_account","private_key":"REAL_PRIVATE_KEY"}',
        "vertex_location": "global",
    }

    try:
        view = put_llm_provider(
            llm_provider_upsert_request=LLMProviderUpsertRequest(
                name=name,
                provider=provider,
                custom_config=original_custom_config,
                model_configurations=[
                    ModelConfigurationUpsertRequest(
                        name=default_model_name, is_visible=True
                    )
                ],
                api_key_changed=False,
                custom_config_changed=True,
                is_auto_mode=False,
            ),
            is_creation=True,
            _=_create_mock_admin(),
            db_session=db_session,
        )

        with patch("onyx.server.manage.llm.api.MULTI_TENANT", False):
            put_llm_provider(
                llm_provider_upsert_request=LLMProviderUpsertRequest(
                    id=view.id,
                    name=name,
                    provider=provider,
                    custom_config={
                        "vertex_credentials": _mask_string(
                            original_custom_config["vertex_credentials"]
                        ),
                        "vertex_location": "us-central1",
                    },
                    model_configurations=[
                        ModelConfigurationUpsertRequest(
                            name=default_model_name, is_visible=True
                        )
                    ],
                    api_key_changed=False,
                    custom_config_changed=True,
                    is_auto_mode=False,
                ),
                is_creation=False,
                _=_create_mock_admin(),
                db_session=db_session,
            )

        updated_provider = fetch_existing_llm_provider(name=name, db_session=db_session)
        assert updated_provider is not None
        assert updated_provider.custom_config is not None
        assert (
            updated_provider.custom_config["vertex_credentials"]
            == original_custom_config["vertex_credentials"]
        )
        assert updated_provider.custom_config["vertex_location"] == "us-central1"
    finally:
        db_session.rollback()
        _cleanup_provider(db_session, name)


def test_preserves_masked_sensitive_custom_config_on_test_request(
    db_session: Session,
) -> None:
    """LLM test should restore masked sensitive custom config values before invocation."""
    name = f"test-provider-vertex-test-{uuid4().hex[:8]}"
    provider_name = LlmProviderNames.VERTEX_AI.value
    default_model_name = "gemini-2.5-pro"
    original_custom_config = {
        "vertex_credentials": '{"type":"service_account","private_key":"REAL_PRIVATE_KEY"}',
        "vertex_location": "global",
    }
    captured_llms: list[LLM] = []

    def capture_test_llm(llm: LLM) -> str:
        captured_llms.append(llm)
        return ""

    try:
        provider = put_llm_provider(
            llm_provider_upsert_request=LLMProviderUpsertRequest(
                name=name,
                provider=provider_name,
                custom_config=original_custom_config,
                model_configurations=[
                    ModelConfigurationUpsertRequest(
                        name=default_model_name, is_visible=True
                    )
                ],
                api_key_changed=False,
                custom_config_changed=True,
                is_auto_mode=False,
            ),
            is_creation=True,
            _=_create_mock_admin(),
            db_session=db_session,
        )

        with patch("onyx.server.manage.llm.api.test_llm", side_effect=capture_test_llm):
            run_llm_config_test(
                LLMTestRequest(
                    id=provider.id,
                    provider=provider_name,
                    model=default_model_name,
                    api_key_changed=False,
                    custom_config_changed=True,
                    custom_config={
                        "vertex_credentials": _mask_string(
                            original_custom_config["vertex_credentials"]
                        ),
                        "vertex_location": "us-central1",
                    },
                ),
                _=_create_mock_admin(),
                db_session=db_session,
            )

        assert len(captured_llms) == 1
        assert captured_llms[0].config.custom_config is not None
        assert (
            captured_llms[0].config.custom_config["vertex_credentials"]
            == original_custom_config["vertex_credentials"]
        )
        assert captured_llms[0].config.custom_config["vertex_location"] == "us-central1"
    finally:
        db_session.rollback()
        _cleanup_provider(db_session, name)


================================================
FILE: backend/tests/external_dependency_unit/llm/test_llm_provider_auto_mode.py
================================================
"""
Tests for the LLM Provider Auto Mode feature.

This tests the automatic model syncing from GitHub config when a provider
is uploaded with is_auto_mode=True.
"""

from collections.abc import Generator
from datetime import datetime
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.db.enums import LLMModelFlowType
from onyx.db.llm import fetch_auto_mode_providers
from onyx.db.llm import fetch_default_llm_model
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import fetch_existing_llm_providers
from onyx.db.llm import fetch_llm_provider_view
from onyx.db.llm import remove_llm_provider
from onyx.db.llm import sync_auto_mode_models
from onyx.db.llm import update_default_provider
from onyx.db.models import UserRole
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLM
from onyx.llm.well_known_providers.auto_update_models import LLMProviderRecommendation
from onyx.llm.well_known_providers.auto_update_models import LLMRecommendations
from onyx.llm.well_known_providers.models import SimpleKnownModel
from onyx.server.manage.llm.api import put_llm_provider
from onyx.server.manage.llm.api import (
    test_default_provider as run_test_default_provider,
)
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest


def _create_mock_admin() -> MagicMock:
    """Create a mock admin user for testing."""
    mock_admin = MagicMock()
    mock_admin.role = UserRole.ADMIN
    return mock_admin


def _cleanup_provider(db_session: Session, name: str) -> None:
    """Helper to clean up a test provider by name."""
    provider = fetch_existing_llm_provider(name=name, db_session=db_session)
    if provider:
        remove_llm_provider(db_session, provider.id)


def _create_mock_llm_recommendations(
    provider: str,
    default_model_name: str,
    additional_models: list[str],
) -> LLMRecommendations:
    """Create a mock LLMRecommendations object for testing.

    Args:
        provider: The provider name (e.g., "openai")
        default_model_name: The name of the default model
        additional_models: List of additional visible model names

    Returns:
        LLMRecommendations object with the specified configuration
    """
    return LLMRecommendations(
        version="1.0.0",
        updated_at=datetime.now(),
        providers={
            provider: LLMProviderRecommendation(
                default_model=SimpleKnownModel(
                    name=default_model_name,
                    display_name=default_model_name.upper(),
                ),
                additional_visible_models=[
                    SimpleKnownModel(name=model, display_name=model.upper())
                    for model in additional_models
                ],
            )
        },
    )


@pytest.fixture
def provider_name() -> Generator[str, None, None]:
    """Generate a unique provider name for each test."""
    yield f"test-auto-provider-{uuid4().hex[:8]}"


class TestAutoModeSyncFeature:
    """Tests for the Auto Mode model syncing feature."""

    def test_auto_mode_syncs_models_from_github_config(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Test that when a provider is uploaded with auto mode enabled and no model
        configurations, the models from fetch_llm_recommendations_from_github()
        are synced to the provider.

        Steps:
        1. Mock fetch_llm_recommendations_from_github to return a known config
        2. Upload provider with is_auto_mode=True and no model_configurations
        3. Fetch the provider and verify all recommended models are present
        4. Set the provider as default
        5. Fetch the default provider and verify the default model matches the config
        """
        # Define the expected models from the mock GitHub config
        expected_default_model = "gpt-4o"
        expected_additional_models = ["gpt-4o-mini", "gpt-4-turbo"]
        all_expected_models = [expected_default_model] + expected_additional_models

        # Create the mock LLMRecommendations
        mock_recommendations = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name=expected_default_model,
            additional_models=expected_additional_models,
        )

        try:
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=mock_recommendations,
            ):
                # Step 1-2: Upload provider with auto mode on and no model configs
                # NOTE: We need to provide a default_model_name for the initial upsert,
                # but auto mode will override it with the GitHub config's default
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-test-key-00000000000000000000000000000000000",
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],  # No model configs provided
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Step 3: Verify all models from the GitHub config are present
            # Fetch the provider fresh from the database
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None, "Provider should exist"
            assert provider.is_auto_mode is True, "Provider should be in auto mode"

            # Check that all expected models are present and visible
            model_names = {mc.name for mc in provider.model_configurations}
            for expected_model in all_expected_models:
                assert (
                    expected_model in model_names
                ), f"Expected model '{expected_model}' not found in provider models"

            # Verify visibility of all synced models
            for mc in provider.model_configurations:
                if mc.name in all_expected_models:
                    assert mc.is_visible is True, f"Model '{mc.name}' should be visible"

            # Step 4: Set the provider as default
            update_default_provider(provider.id, expected_default_model, db_session)

            # Step 5: Fetch the default provider and verify
            default_model = fetch_default_llm_model(db_session)
            assert default_model is not None, "Default provider should exist"
            assert (
                default_model.llm_provider.name == provider_name
            ), "Default provider should be our test provider"
            assert (
                default_model.name == expected_default_model
            ), f"Default provider's default model should be '{expected_default_model}'"
            assert (
                default_model.llm_provider.is_auto_mode is True
            ), "Default provider should be in auto mode"

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_auto_mode_with_multiple_providers_in_config(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Test that auto mode only syncs models for the matching provider type,
        ignoring models from other providers in the config.
        """
        # Create recommendations with multiple providers
        mock_recommendations = LLMRecommendations(
            version="1.0.0",
            updated_at=datetime.now(),
            providers={
                LlmProviderNames.OPENAI: LLMProviderRecommendation(
                    default_model=SimpleKnownModel(
                        name="gpt-4o", display_name="GPT-4o"
                    ),
                    additional_visible_models=[
                        SimpleKnownModel(name="gpt-4o-mini", display_name="GPT-4o Mini")
                    ],
                ),
                LlmProviderNames.ANTHROPIC: LLMProviderRecommendation(
                    default_model=SimpleKnownModel(
                        name="claude-3-5-sonnet-latest",
                        display_name="Claude 3.5 Sonnet",
                    ),
                    additional_visible_models=[
                        SimpleKnownModel(
                            name="claude-haiku-4-5",
                            display_name="Claude Haiku 4.5",
                        )
                    ],
                ),
            },
        )

        try:
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=mock_recommendations,
            ):
                # Upload an OpenAI provider with auto mode
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-test-key-00000000000000000000000000000000000",
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Verify only OpenAI models are synced, not Anthropic models
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None

            model_names = {mc.name for mc in provider.model_configurations}

            # OpenAI models should be present
            assert "gpt-4o" in model_names
            assert "gpt-4o-mini" in model_names

            # Anthropic models should NOT be present
            assert "claude-3-5-sonnet-latest" not in model_names
            assert "claude-haiku-4-5" not in model_names

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_existing_provider_transition_to_auto_mode(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Test that when an existing provider with visible models transitions to auto mode,
        models from the auto mode config become visible, and models not in the config
        become not visible.

        Steps:
        1. Upload a provider with some visible model configurations (not in auto mode)
        2. Update the provider to enable auto mode
        3. Verify:
           - Models in the auto mode config are now visible
           - Models NOT in the auto mode config are now NOT visible
        """
        # Initial models on the provider (all visible initially)
        initial_models = [
            ModelConfigurationUpsertRequest(
                name="gpt-4", is_visible=True
            ),  # Will NOT be in auto config
            ModelConfigurationUpsertRequest(
                name="gpt-4o", is_visible=True
            ),  # Will be in auto config
            ModelConfigurationUpsertRequest(
                name="gpt-3.5-turbo", is_visible=True
            ),  # Will NOT be in auto config
        ]

        # Auto mode config: gpt-4o (default) + gpt-4o-mini (additional)
        # Note: gpt-4 and gpt-3.5-turbo are NOT in this config
        auto_mode_default = "gpt-4o"
        auto_mode_additional = ["gpt-4o-mini"]
        all_auto_mode_models = [auto_mode_default] + auto_mode_additional

        mock_recommendations = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name=auto_mode_default,
            additional_models=auto_mode_additional,
        )

        try:
            # Step 1: Upload provider WITHOUT auto mode, with initial models
            put_llm_provider(
                llm_provider_upsert_request=LLMProviderUpsertRequest(
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-test-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    is_auto_mode=False,  # Not in auto mode initially
                    model_configurations=initial_models,
                ),
                is_creation=True,
                _=_create_mock_admin(),
                db_session=db_session,
            )

            # Verify initial state: all models are visible
            initial_provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert initial_provider is not None
            assert initial_provider.is_auto_mode is False

            for mc in initial_provider.model_configurations:
                assert (
                    mc.is_visible is True
                ), f"Initial model '{mc.name}' should be visible"

            # Step 2: Update provider to enable auto mode
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=mock_recommendations,
            ):
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        id=initial_provider.id,
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key=None,  # Not changing API key
                        api_key_changed=False,
                        is_auto_mode=True,  # Now enabling auto mode
                        model_configurations=[],  # Auto mode will sync from config
                    ),
                    is_creation=False,  # This is an update
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Step 3: Verify model visibility after auto mode transition
            # Expire session cache to force fresh fetch after sync_auto_mode_models committed
            db_session.expire_all()
            provider_view = fetch_llm_provider_view(
                provider_name=provider_name, db_session=db_session
            )
            assert provider_view is not None
            assert provider_view.is_auto_mode is True

            # Build a map of model name -> visibility
            model_visibility = {
                mc.name: mc.is_visible for mc in provider_view.model_configurations
            }

            # Models in auto mode config should be visible
            for model_name in all_auto_mode_models:
                assert (
                    model_name in model_visibility
                ), f"Auto mode model '{model_name}' should exist"
                assert (
                    model_visibility[model_name] is True
                ), f"Auto mode model '{model_name}' should be visible"

            # Models NOT in auto mode config should NOT be visible
            models_not_in_config = ["gpt-4", "gpt-3.5-turbo"]
            for model_name in models_not_in_config:
                if model_name in model_visibility:
                    assert (
                        model_visibility[model_name] is False
                    ), f"Model '{model_name}' not in auto config should NOT be visible"

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_auto_mode_provider_not_in_config(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Test that when the provider type is not in the GitHub config,
        no model syncing occurs.
        """
        # Create recommendations that don't include OpenAI
        mock_recommendations = LLMRecommendations(
            version="1.0.0",
            updated_at=datetime.now(),
            providers={
                LlmProviderNames.ANTHROPIC: LLMProviderRecommendation(
                    default_model=SimpleKnownModel(
                        name="claude-3-5-sonnet-latest",
                        display_name="Claude 3.5 Sonnet",
                    ),
                    additional_visible_models=[],
                ),
            },
        )

        try:
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=mock_recommendations,
            ):
                # Upload an OpenAI provider (not in config)
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-test-key-00000000000000000000000000000000000",
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[
                            ModelConfigurationUpsertRequest(
                                name="gpt-4o",
                                is_visible=True,
                            )
                        ],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Provider should be created but without synced models from config
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            assert provider.is_auto_mode is True

            # Only the default model provided in the request should exist
            model_names = {mc.name for mc in provider.model_configurations}
            assert "gpt-4o" in model_names
            # Anthropic models should NOT be synced
            assert "claude-3-5-sonnet-latest" not in model_names

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_switching_default_between_auto_mode_providers(
        self,
        db_session: Session,
    ) -> None:
        """
        Test switching the default provider between two auto mode providers
        and verifying test_default_provider uses the correct default model.

        Steps:
        1. Create provider 1 (OpenAI) with auto mode, set as default
        2. Create provider 2 (Anthropic) with auto mode
        3. Verify provider 1 is the default
        4. Change default to provider 2
        5. Verify provider 2 is the default
        6. Run test_default_provider and verify it uses provider 2's default model
        """
        provider_1_name = f"test-auto-openai-{uuid4().hex[:8]}"
        provider_2_name = f"test-auto-anthropic-{uuid4().hex[:8]}"

        provider_1_api_key = "sk-provider1-key-000000000000000000000000000"
        provider_2_api_key = "sk-ant-provider2-key-0000000000000000000000"

        # Provider 1 (OpenAI) config
        provider_1_default_model = "gpt-4o"
        provider_1_additional_models = ["gpt-4o-mini"]

        # Provider 2 (Anthropic) config
        provider_2_default_model = "claude-3-5-sonnet-latest"
        provider_2_additional_models = ["claude-haiku-4-5"]

        # Create mock recommendations with both providers
        mock_recommendations = LLMRecommendations(
            version="1.0.0",
            updated_at=datetime.now(),
            providers={
                LlmProviderNames.OPENAI: LLMProviderRecommendation(
                    default_model=SimpleKnownModel(
                        name=provider_1_default_model,
                        display_name="GPT-4o",
                    ),
                    additional_visible_models=[
                        SimpleKnownModel(name=m, display_name=m.upper())
                        for m in provider_1_additional_models
                    ],
                ),
                LlmProviderNames.ANTHROPIC: LLMProviderRecommendation(
                    default_model=SimpleKnownModel(
                        name=provider_2_default_model,
                        display_name="Claude 3.5 Sonnet",
                    ),
                    additional_visible_models=[
                        SimpleKnownModel(name=m, display_name=m.upper())
                        for m in provider_2_additional_models
                    ],
                ),
            },
        )

        captured_llms: list[LLM] = []

        def mock_test_llm_capture(llm: LLM) -> str | None:
            """Mock test_llm that captures the LLM for inspection."""
            captured_llms.append(llm)
            return None  # Success

        try:
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=mock_recommendations,
            ):
                # Step 1: Create provider 1 (OpenAI) with auto mode
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_1_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key=provider_1_api_key,
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Set provider 1 as the default
            db_session.expire_all()
            provider_1 = fetch_existing_llm_provider(
                name=provider_1_name, db_session=db_session
            )
            assert provider_1 is not None
            update_default_provider(provider_1.id, provider_1_default_model, db_session)

            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=mock_recommendations,
            ):
                # Step 2: Create provider 2 (Anthropic) with auto mode
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_2_name,
                        provider=LlmProviderNames.ANTHROPIC,
                        api_key=provider_2_api_key,
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Step 3: Verify provider 1 is still the default
            db_session.expire_all()
            default_model = fetch_default_llm_model(db_session)
            assert default_model is not None
            assert default_model.llm_provider.name == provider_1_name
            assert default_model.name == provider_1_default_model
            assert default_model.llm_provider.is_auto_mode is True

            # Step 4: Change the default to provider 2
            provider_2 = fetch_existing_llm_provider(
                name=provider_2_name, db_session=db_session
            )
            assert provider_2 is not None
            update_default_provider(provider_2.id, provider_2_default_model, db_session)

            # Step 5: Verify provider 2 is now the default
            db_session.expire_all()
            default_model = fetch_default_llm_model(db_session)
            assert default_model is not None
            assert default_model.llm_provider.name == provider_2_name
            assert default_model.name == provider_2_default_model
            assert default_model.llm_provider.is_auto_mode is True

            # Step 6: Run test_default_provider and verify it uses provider 2's model
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_capture
            ):
                run_test_default_provider(_=_create_mock_admin())

            # Verify test_llm was called with provider 2's default model
            assert len(captured_llms) == 1
            assert captured_llms[0].config.model_name == provider_2_default_model
            assert captured_llms[0].config.api_key == provider_2_api_key

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_1_name)
            _cleanup_provider(db_session, provider_2_name)


class TestAutoModeMissingFlows:
    """Regression test: sync_auto_mode_models must create LLMModelFlow rows
    for every ModelConfiguration it inserts, otherwise the provider vanishes
    from listing queries that join through LLMModelFlow."""

    def test_sync_auto_mode_creates_flow_rows(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """
        Steps:
        1. Create a provider with no model configs (empty shell).
        2. Call sync_auto_mode_models to add models from a mock config.
        3. Assert every new ModelConfiguration has at least one LLMModelFlow.
        4. Assert fetch_existing_llm_providers (which joins through
           LLMModelFlow) returns the provider.
        """
        mock_recommendations = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o",
            additional_models=["gpt-4o-mini"],
        )

        try:
            # Step 1: Create provider with no model configs
            put_llm_provider(
                llm_provider_upsert_request=LLMProviderUpsertRequest(
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-test-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    is_auto_mode=True,
                    model_configurations=[],
                ),
                is_creation=True,
                _=_create_mock_admin(),
                db_session=db_session,
            )

            # Step 2: Run sync_auto_mode_models (simulating the periodic sync)
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None

            sync_auto_mode_models(
                db_session=db_session,
                provider=provider,
                llm_recommendations=mock_recommendations,
            )

            # Step 3: Every ModelConfiguration must have at least one LLMModelFlow
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None

            synced_model_names = {mc.name for mc in provider.model_configurations}
            assert "gpt-4o" in synced_model_names
            assert "gpt-4o-mini" in synced_model_names

            for mc in provider.model_configurations:
                assert len(mc.llm_model_flows) > 0, (
                    f"ModelConfiguration '{mc.name}' (id={mc.id}) has no "
                    f"LLMModelFlow rows — it will be invisible to listing queries"
                )

                flow_types = {f.llm_model_flow_type for f in mc.llm_model_flows}
                assert (
                    LLMModelFlowType.CHAT in flow_types
                ), f"ModelConfiguration '{mc.name}' is missing a CHAT flow"

            # Step 4: The provider must appear in fetch_existing_llm_providers
            listed_providers = fetch_existing_llm_providers(
                db_session=db_session,
                flow_type_filter=[LLMModelFlowType.CHAT],
            )
            listed_provider_names = {p.name for p in listed_providers}
            assert (
                provider_name in listed_provider_names
            ), f"Provider '{provider_name}' not returned by fetch_existing_llm_providers — models are missing flow rows"

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)


class TestAutoModeTransitionsAndResync:
    """Tests for auto/manual transitions, config evolution, and sync idempotency."""

    def test_transition_to_auto_mode_preserves_default(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """When the default provider transitions from manual to auto mode,
        the global default should be preserved (set to the recommended model).

        Steps:
        1. Create a manual-mode provider with models, set it as global default.
        2. Transition to auto mode (model_configurations=[] triggers cascade
           delete of old ModelConfigurations and their LLMModelFlow rows).
        3. Verify the provider is still the global default, now using the
           recommended default model from the GitHub config.
        """
        initial_models = [
            ModelConfigurationUpsertRequest(name="gpt-4o", is_visible=True),
            ModelConfigurationUpsertRequest(name="gpt-4o-mini", is_visible=True),
        ]

        auto_config = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o-mini",
            additional_models=["gpt-4o"],
        )

        try:
            # Step 1: Create manual-mode provider and set as default
            put_llm_provider(
                llm_provider_upsert_request=LLMProviderUpsertRequest(
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-test-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    is_auto_mode=False,
                    model_configurations=initial_models,
                ),
                is_creation=True,
                _=_create_mock_admin(),
                db_session=db_session,
            )

            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            update_default_provider(provider.id, "gpt-4o", db_session)

            default_before = fetch_default_llm_model(db_session)
            assert default_before is not None
            assert default_before.name == "gpt-4o"
            assert default_before.llm_provider_id == provider.id

            # Step 2: Transition to auto mode
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=auto_config,
            ):
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        id=provider.id,
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key=None,
                        api_key_changed=False,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=False,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Step 3: Default should be preserved on this provider
            db_session.expire_all()
            default_after = fetch_default_llm_model(db_session)
            assert default_after is not None, (
                "Default model should not be None after transitioning to auto mode — "
                "the provider was the default before and should remain so"
            )
            assert (
                default_after.llm_provider_id == provider.id
            ), "Default should still belong to the same provider after transition"
            assert (
                default_after.name == "gpt-4o-mini"
            ), f"Default should be updated to the recommended model 'gpt-4o-mini', got '{default_after.name}'"

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_auto_to_manual_mode_preserves_models_and_stops_syncing(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """Disabling auto mode should preserve the current model list and
        prevent future syncs from altering visibility.

        Steps:
        1. Create provider in auto mode — models synced from config.
        2. Update provider to manual mode (is_auto_mode=False).
        3. Verify all models remain with unchanged visibility.
        4. Call sync_auto_mode_models with a *different* config.
        5. Verify fetch_auto_mode_providers excludes this provider, so the
           periodic task would never call sync on it.
        """
        initial_config = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o",
            additional_models=["gpt-4o-mini"],
        )

        try:
            # Step 1: Create in auto mode
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=initial_config,
            ):
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-test-key-00000000000000000000000000000000000",
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            visibility_before = {
                mc.name: mc.is_visible for mc in provider.model_configurations
            }
            assert visibility_before == {"gpt-4o": True, "gpt-4o-mini": True}

            # Step 2: Switch to manual mode
            put_llm_provider(
                llm_provider_upsert_request=LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key=None,
                    api_key_changed=False,
                    is_auto_mode=False,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(name="gpt-4o", is_visible=True),
                        ModelConfigurationUpsertRequest(
                            name="gpt-4o-mini", is_visible=True
                        ),
                    ],
                ),
                is_creation=False,
                _=_create_mock_admin(),
                db_session=db_session,
            )

            # Step 3: Models unchanged
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            assert provider.is_auto_mode is False
            visibility_after = {
                mc.name: mc.is_visible for mc in provider.model_configurations
            }
            assert visibility_after == visibility_before

            # Step 4-5: Provider excluded from auto mode queries
            auto_providers = fetch_auto_mode_providers(db_session)
            auto_provider_ids = {p.id for p in auto_providers}
            assert provider.id not in auto_provider_ids

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_resync_adds_new_and_hides_removed_models(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """When the GitHub config changes between syncs, a subsequent sync
        should add newly listed models and hide models that were removed.

        Steps:
        1. Create provider in auto mode with config v1: [gpt-4o, gpt-4o-mini].
        2. Sync with config v2: [gpt-4o, gpt-4-turbo] (gpt-4o-mini removed,
           gpt-4-turbo added).
        3. Verify gpt-4o still visible, gpt-4o-mini hidden, gpt-4-turbo added
           and visible.
        """
        config_v1 = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o",
            additional_models=["gpt-4o-mini"],
        )
        config_v2 = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o",
            additional_models=["gpt-4-turbo"],
        )

        try:
            # Step 1: Create with config v1
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=config_v1,
            ):
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-test-key-00000000000000000000000000000000000",
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Step 2: Re-sync with config v2
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None

            changes = sync_auto_mode_models(
                db_session=db_session,
                provider=provider,
                llm_recommendations=config_v2,
            )
            assert changes > 0

            # Step 3: Verify
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None

            visibility = {
                mc.name: mc.is_visible for mc in provider.model_configurations
            }

            # gpt-4o: still in config -> visible
            assert visibility["gpt-4o"] is True
            # gpt-4o-mini: removed from config -> hidden (not deleted)
            assert "gpt-4o-mini" in visibility, "Removed model should still exist in DB"
            assert visibility["gpt-4o-mini"] is False
            # gpt-4-turbo: newly added -> visible
            assert visibility["gpt-4-turbo"] is True

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_sync_is_idempotent(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """Running sync twice with the same config should produce zero
        changes on the second call."""
        config = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o",
            additional_models=["gpt-4o-mini", "gpt-4-turbo"],
        )

        try:
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=config,
            ):
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-test-key-00000000000000000000000000000000000",
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None

            # First explicit sync (may report changes if creation already synced)
            sync_auto_mode_models(
                db_session=db_session,
                provider=provider,
                llm_recommendations=config,
            )

            # Snapshot state after first sync
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            snapshot = {
                mc.name: (mc.is_visible, mc.display_name)
                for mc in provider.model_configurations
            }

            # Second sync — should be a no-op
            changes = sync_auto_mode_models(
                db_session=db_session,
                provider=provider,
                llm_recommendations=config,
            )
            assert (
                changes == 0
            ), f"Expected 0 changes on idempotent re-sync, got {changes}"

            # State should be identical
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            current = {
                mc.name: (mc.is_visible, mc.display_name)
                for mc in provider.model_configurations
            }
            assert current == snapshot

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_default_model_hidden_when_removed_from_config(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """When the current default model is removed from the config, sync
        should hide it. The default model flow row should still exist (it
        points at the ModelConfiguration), but the model is no longer visible.

        Steps:
        1. Create provider with config: default=gpt-4o, additional=[gpt-4o-mini].
        2. Set gpt-4o as the global default.
        3. Re-sync with config: default=gpt-4o-mini (gpt-4o removed entirely).
        4. Verify gpt-4o is hidden, gpt-4o-mini is visible, and
           fetch_default_llm_model still returns a result (the flow row persists).
        """
        config_v1 = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o",
            additional_models=["gpt-4o-mini"],
        )
        config_v2 = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o-mini",
            additional_models=[],
        )

        try:
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=config_v1,
            ):
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-test-key-00000000000000000000000000000000000",
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Step 2: Set gpt-4o as global default
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            update_default_provider(provider.id, "gpt-4o", db_session)

            default_before = fetch_default_llm_model(db_session)
            assert default_before is not None
            assert default_before.name == "gpt-4o"

            # Step 3: Re-sync with config v2 (gpt-4o removed)
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None

            changes = sync_auto_mode_models(
                db_session=db_session,
                provider=provider,
                llm_recommendations=config_v2,
            )
            assert changes > 0

            # Step 4: Verify visibility
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None

            visibility = {
                mc.name: mc.is_visible for mc in provider.model_configurations
            }
            assert visibility["gpt-4o"] is False, "Removed default should be hidden"
            assert visibility["gpt-4o-mini"] is True, "New default should be visible"

            # The old default (gpt-4o) is now hidden. sync_auto_mode_models
            # should update the global default to the new recommended default
            # (gpt-4o-mini) so that it is not silently lost.
            db_session.expire_all()
            default_after = fetch_default_llm_model(db_session)
            assert (
                default_after is not None
            ), "Default model should not be None — sync should set the new recommended default when the old one is hidden"
            assert (
                default_after.name == "gpt-4o-mini"
            ), f"Default should be updated to the new recommended model 'gpt-4o-mini', but got '{default_after.name}'"

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_sync_updates_default_when_recommended_default_changes(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """When the provider owns the CHAT default and a sync arrives with a
        different recommended default model (both models still in config),
        the global default should be updated to the new recommendation.

        Steps:
        1. Create auto-mode provider with config v1: default=gpt-4o.
        2. Set gpt-4o as the global CHAT default.
        3. Re-sync with config v2: default=gpt-4o-mini (gpt-4o still present).
        4. Verify the CHAT default switched to gpt-4o-mini and both models
           remain visible.
        """
        config_v1 = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o",
            additional_models=["gpt-4o-mini"],
        )
        config_v2 = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o-mini",
            additional_models=["gpt-4o"],
        )

        try:
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=config_v1,
            ):
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-test-key-00000000000000000000000000000000000",
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Set gpt-4o as the global CHAT default
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            update_default_provider(provider.id, "gpt-4o", db_session)

            default_before = fetch_default_llm_model(db_session)
            assert default_before is not None
            assert default_before.name == "gpt-4o"

            # Re-sync with config v2 (recommended default changed)
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None

            changes = sync_auto_mode_models(
                db_session=db_session,
                provider=provider,
                llm_recommendations=config_v2,
            )
            assert changes > 0, "Sync should report changes when default switches"

            # Both models should remain visible
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            visibility = {
                mc.name: mc.is_visible for mc in provider.model_configurations
            }
            assert visibility["gpt-4o"] is True
            assert visibility["gpt-4o-mini"] is True

            # The CHAT default should now be gpt-4o-mini
            default_after = fetch_default_llm_model(db_session)
            assert default_after is not None
            assert (
                default_after.name == "gpt-4o-mini"
            ), f"Default should be updated to 'gpt-4o-mini', got '{default_after.name}'"

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)

    def test_sync_idempotent_when_default_already_matches(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """When the provider owns the CHAT default and it already matches the
        recommended default, re-syncing should report zero changes.

        This is a regression test for the bug where changes was unconditionally
        incremented even when the default was already correct.
        """
        config = _create_mock_llm_recommendations(
            provider=LlmProviderNames.OPENAI,
            default_model_name="gpt-4o",
            additional_models=["gpt-4o-mini"],
        )

        try:
            with patch(
                "onyx.server.manage.llm.api.fetch_llm_recommendations_from_github",
                return_value=config,
            ):
                put_llm_provider(
                    llm_provider_upsert_request=LLMProviderUpsertRequest(
                        name=provider_name,
                        provider=LlmProviderNames.OPENAI,
                        api_key="sk-test-key-00000000000000000000000000000000000",
                        api_key_changed=True,
                        is_auto_mode=True,
                        model_configurations=[],
                    ),
                    is_creation=True,
                    _=_create_mock_admin(),
                    db_session=db_session,
                )

            # Set gpt-4o (the recommended default) as global CHAT default
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            update_default_provider(provider.id, "gpt-4o", db_session)

            # First sync to stabilize state
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            sync_auto_mode_models(
                db_session=db_session,
                provider=provider,
                llm_recommendations=config,
            )

            # Second sync — default already matches, should be a no-op
            db_session.expire_all()
            provider = fetch_existing_llm_provider(
                name=provider_name, db_session=db_session
            )
            assert provider is not None
            changes = sync_auto_mode_models(
                db_session=db_session,
                provider=provider,
                llm_recommendations=config,
            )
            assert (
                changes == 0
            ), f"Expected 0 changes when default already matches recommended, got {changes}"

            # Default should still be gpt-4o
            default_model = fetch_default_llm_model(db_session)
            assert default_model is not None
            assert default_model.name == "gpt-4o"

        finally:
            db_session.rollback()
            _cleanup_provider(db_session, provider_name)


================================================
FILE: backend/tests/external_dependency_unit/llm/test_llm_provider_called.py
================================================
from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from unittest.mock import patch
from uuid import uuid4

import pytest
from fastapi_users.password import PasswordHelper
from sqlalchemy.orm import Session

from onyx.db.enums import AccountType
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import remove_llm_provider
from onyx.db.llm import update_default_provider
from onyx.db.llm import upsert_llm_provider
from onyx.db.models import User
from onyx.db.models import UserRole
from onyx.llm.constants import LlmProviderNames
from onyx.llm.override_models import LLMOverride
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.query_and_chat.chat_backend import create_new_chat_session
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from tests.external_dependency_unit.answer.stream_test_assertions import (
    assert_answer_stream_part_correct,
)
from tests.external_dependency_unit.answer.stream_test_builder import StreamTestBuilder
from tests.external_dependency_unit.answer.stream_test_utils import submit_query
from tests.external_dependency_unit.answer.stream_test_utils import tokenise
from tests.external_dependency_unit.mock_llm import LLMAnswerResponse
from tests.external_dependency_unit.mock_llm import MockLLM


def _create_admin(db_session: Session) -> User:
    """Create a mock admin user for testing."""
    unique_email = f"admin_{uuid4().hex[:8]}@example.com"
    password_helper = PasswordHelper()
    password = password_helper.generate()
    hashed_password = password_helper.hash(password)

    user = User(
        id=uuid4(),
        email=unique_email,
        hashed_password=hashed_password,
        is_active=True,
        is_superuser=True,
        is_verified=True,
        role=UserRole.ADMIN,
        account_type=AccountType.STANDARD,
    )
    db_session.add(user)
    db_session.commit()
    db_session.refresh(user)
    return user


def _create_provider(
    db_session: Session,
    provider: LlmProviderNames,
    name: str,
    is_public: bool,
) -> int:
    result = upsert_llm_provider(
        LLMProviderUpsertRequest(
            name=name,
            provider=provider,
            api_key="sk-ant-api03-...",
            is_public=is_public,
            model_configurations=[
                ModelConfigurationUpsertRequest(
                    name="claude-3-5-sonnet-20240620",
                    is_visible=True,
                ),
            ],
        ),
        db_session=db_session,
    )
    return result.id


@contextmanager
def use_mock_llm() -> (
    Generator[tuple[MockLLM, dict[str, bool | str | None]], None, None]
):
    """Context manager that patches LLM factory functions and tracks which ones are called."""
    mock_llm = MockLLM()

    call_tracker: dict[str, bool | str | None] = {
        "get_default_llm_called": False,
        "get_llm_called": False,
        "provider": None,
    }

    def mock_get_default_llm(*_args: Any, **_kwargs: Any) -> MockLLM:
        call_tracker["get_default_llm_called"] = True
        return mock_llm

    def mock_get_llm(provider: str, *_args: Any, **_kwargs: Any) -> MockLLM:
        call_tracker["get_llm_called"] = True
        call_tracker["provider"] = provider
        return mock_llm

    with (
        patch(
            "onyx.llm.factory.get_default_llm",
            side_effect=mock_get_default_llm,
        ),
        patch(
            "onyx.llm.factory.get_llm",
            side_effect=mock_get_llm,
        ),
    ):
        yield mock_llm, call_tracker


def _cleanup_provider(db_session: Session, name: str) -> None:
    """Helper to clean up a test provider by name."""
    provider = fetch_existing_llm_provider(name=name, db_session=db_session)
    if provider:
        remove_llm_provider(db_session, provider.id)


def _assert_llm_calls(
    call_tracker: dict[str, bool | str | None], expected_provider: str
) -> None:
    """Assert that get_llm was called with expected provider and get_default_llm was not called."""
    assert not call_tracker[
        "get_default_llm_called"
    ], "get_default_llm should not be called when using private provider"
    assert call_tracker[
        "get_llm_called"
    ], "get_llm should be called when using private provider"
    assert (
        call_tracker["provider"] == expected_provider
    ), f"Expected provider '{expected_provider}', got '{call_tracker['provider']}'"


def _reset_call_tracker(call_tracker: dict[str, bool | str | None]) -> None:
    """Reset the call tracker for the next test iteration."""
    call_tracker["get_default_llm_called"] = False
    call_tracker["get_llm_called"] = False
    call_tracker["provider"] = None


def test_user_sends_message_to_private_provider(
    db_session: Session,
) -> None:
    """Test that messages sent to a private provider use get_llm instead of get_default_llm."""
    admin_user = _create_admin(db_session)

    # Create providers
    public_provider_id = _create_provider(
        db_session, LlmProviderNames.ANTHROPIC, "public-provider", True
    )
    _create_provider(db_session, LlmProviderNames.GOOGLE, "private-provider", False)

    update_default_provider(
        public_provider_id, "claude-3-5-sonnet-20240620", db_session
    )

    try:
        # Create chat session
        chat_session = create_new_chat_session(
            ChatSessionCreationRequest(),
            user=admin_user,
            db_session=db_session,
        )

        chat_session_id = chat_session.chat_session_id
        answer_tokens_1 = tokenise("Hello, how are you?")
        answer_tokens_2 = tokenise("I'm good, thank you!")

        with use_mock_llm() as (mock_llm, call_tracker):
            handler = StreamTestBuilder(llm_controller=mock_llm)

            # First message
            handler.add_response(LLMAnswerResponse(answer_tokens=answer_tokens_1))
            answer_stream = submit_query(
                query="Hello, how are you?",
                chat_session_id=chat_session_id,
                db_session=db_session,
                user=admin_user,
                llm_override=LLMOverride(
                    model_provider="private-provider",
                    model_version="claude-3-5-sonnet-20240620",
                ),
            )

            assert_answer_stream_part_correct(
                received=next(answer_stream),
                expected=MessageResponseIDInfo(
                    user_message_id=1,
                    reserved_assistant_message_id=1,
                ),
            )

            handler.expect_agent_response(
                answer_tokens=answer_tokens_1,
                turn_index=0,
            ).run_and_validate(stream=answer_stream)

            with pytest.raises(StopIteration):
                next(answer_stream)

            _assert_llm_calls(call_tracker, "google")
            _reset_call_tracker(call_tracker)

            # Second message
            handler.add_response(LLMAnswerResponse(answer_tokens=answer_tokens_2))
            answer_stream = submit_query(
                query="I'm good, thank you!",
                chat_session_id=chat_session_id,
                db_session=db_session,
                user=admin_user,
                llm_override=LLMOverride(
                    model_provider="private-provider",
                    model_version="claude-3-5-sonnet-20240620",
                ),
            )

            assert_answer_stream_part_correct(
                received=next(answer_stream),
                expected=MessageResponseIDInfo(
                    user_message_id=2,
                    reserved_assistant_message_id=2,
                ),
            )

            handler.expect_agent_response(
                answer_tokens=answer_tokens_2,
                turn_index=0,
            ).run_and_validate(stream=answer_stream)

            with pytest.raises(StopIteration):
                next(answer_stream)

            _assert_llm_calls(call_tracker, "google")

    finally:
        _cleanup_provider(db_session, "public-provider")
        _cleanup_provider(db_session, "private-provider")


================================================
FILE: backend/tests/external_dependency_unit/llm/test_llm_provider_default_model_protection.py
================================================
"""
This should act as the main point of reference for testing that default model
logic is consisten.

 -
"""

from collections.abc import Generator
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import remove_llm_provider
from onyx.db.llm import update_default_provider
from onyx.db.llm import update_default_vision_provider
from onyx.db.llm import upsert_llm_provider
from onyx.llm.constants import LlmProviderNames
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest


def _create_test_provider(
    db_session: Session,
    name: str,
    models: list[ModelConfigurationUpsertRequest] | None = None,
) -> LLMProviderView:
    """Helper to create a test LLM provider with multiple models."""
    if models is None:
        models = [
            ModelConfigurationUpsertRequest(
                name="gpt-4o", is_visible=True, supports_image_input=True
            ),
            ModelConfigurationUpsertRequest(
                name="gpt-4o-mini", is_visible=True, supports_image_input=False
            ),
        ]
    return upsert_llm_provider(
        LLMProviderUpsertRequest(
            name=name,
            provider=LlmProviderNames.OPENAI,
            api_key="sk-test-key-00000000000000000000000000000000000",
            api_key_changed=True,
            model_configurations=models,
        ),
        db_session=db_session,
    )


def _cleanup_provider(db_session: Session, name: str) -> None:
    """Helper to clean up a test provider by name."""
    provider = fetch_existing_llm_provider(name=name, db_session=db_session)
    if provider:
        remove_llm_provider(db_session, provider.id)


@pytest.fixture
def provider_name(db_session: Session) -> Generator[str, None, None]:
    """Generate a unique provider name for each test, with automatic cleanup."""
    name = f"test-provider-{uuid4().hex[:8]}"
    yield name
    db_session.rollback()
    _cleanup_provider(db_session, name)


class TestDefaultModelProtection:
    """Tests that the default model cannot be removed or hidden."""

    def test_cannot_remove_default_text_model(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """Removing the default text model from a provider should raise ValueError."""
        provider = _create_test_provider(db_session, provider_name)
        update_default_provider(provider.id, "gpt-4o", db_session)

        # Try to update the provider without the default model
        with pytest.raises(ValueError, match="Cannot remove the default model"):
            upsert_llm_provider(
                LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-test-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(
                            name="gpt-4o-mini", is_visible=True
                        ),
                    ],
                ),
                db_session=db_session,
            )

    def test_cannot_hide_default_text_model(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """Setting is_visible=False on the default text model should raise ValueError."""
        provider = _create_test_provider(db_session, provider_name)
        update_default_provider(provider.id, "gpt-4o", db_session)

        # Try to hide the default model
        with pytest.raises(ValueError, match="Cannot hide the default model"):
            upsert_llm_provider(
                LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-test-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(
                            name="gpt-4o", is_visible=False
                        ),
                        ModelConfigurationUpsertRequest(
                            name="gpt-4o-mini", is_visible=True
                        ),
                    ],
                ),
                db_session=db_session,
            )

    def test_cannot_remove_default_vision_model(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """Removing the default vision model from a provider should raise ValueError."""
        provider = _create_test_provider(db_session, provider_name)
        # Set gpt-4o as both the text and vision default
        update_default_provider(provider.id, "gpt-4o", db_session)
        update_default_vision_provider(provider.id, "gpt-4o", db_session)

        # Try to remove the default vision model
        with pytest.raises(ValueError, match="Cannot remove the default model"):
            upsert_llm_provider(
                LLMProviderUpsertRequest(
                    id=provider.id,
                    name=provider_name,
                    provider=LlmProviderNames.OPENAI,
                    api_key="sk-test-key-00000000000000000000000000000000000",
                    api_key_changed=True,
                    model_configurations=[
                        ModelConfigurationUpsertRequest(
                            name="gpt-4o-mini", is_visible=True
                        ),
                    ],
                ),
                db_session=db_session,
            )

    def test_can_remove_non_default_model(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """Removing a non-default model should succeed."""
        provider = _create_test_provider(db_session, provider_name)
        update_default_provider(provider.id, "gpt-4o", db_session)

        # Remove gpt-4o-mini (not default) — should succeed
        updated = upsert_llm_provider(
            LLMProviderUpsertRequest(
                id=provider.id,
                name=provider_name,
                provider=LlmProviderNames.OPENAI,
                api_key="sk-test-key-00000000000000000000000000000000000",
                api_key_changed=True,
                model_configurations=[
                    ModelConfigurationUpsertRequest(
                        name="gpt-4o", is_visible=True, supports_image_input=True
                    ),
                ],
            ),
            db_session=db_session,
        )

        model_names = {mc.name for mc in updated.model_configurations}
        assert "gpt-4o" in model_names
        assert "gpt-4o-mini" not in model_names

    def test_can_hide_non_default_model(
        self,
        db_session: Session,
        provider_name: str,
    ) -> None:
        """Hiding a non-default model should succeed."""
        provider = _create_test_provider(db_session, provider_name)
        update_default_provider(provider.id, "gpt-4o", db_session)

        # Hide gpt-4o-mini (not default) — should succeed
        updated = upsert_llm_provider(
            LLMProviderUpsertRequest(
                id=provider.id,
                name=provider_name,
                provider=LlmProviderNames.OPENAI,
                api_key="sk-test-key-00000000000000000000000000000000000",
                api_key_changed=True,
                model_configurations=[
                    ModelConfigurationUpsertRequest(
                        name="gpt-4o", is_visible=True, supports_image_input=True
                    ),
                    ModelConfigurationUpsertRequest(
                        name="gpt-4o-mini", is_visible=False
                    ),
                ],
            ),
            db_session=db_session,
        )

        model_visibility = {
            mc.name: mc.is_visible for mc in updated.model_configurations
        }
        assert model_visibility["gpt-4o"] is True
        assert model_visibility["gpt-4o-mini"] is False


================================================
FILE: backend/tests/external_dependency_unit/llm/test_prompt_caching.py
================================================
"""External dependency unit tests for prompt caching functionality.

These tests call LLM providers directly and use litellm's completion_cost() to verify
that prompt caching reduces costs.
"""

import json
import os
import tempfile
import time
from pathlib import Path
from typing import Any

import pytest
from litellm import completion_cost
from sqlalchemy.orm import Session

from onyx.llm.model_response import Usage
from onyx.llm.models import AssistantMessage
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.models import SystemMessage
from onyx.llm.models import UserMessage
from onyx.llm.multi_llm import LitellmLLM
from onyx.llm.prompt_cache.processor import process_with_prompt_cache


VERTEX_CREDENTIALS_ENV = "VERTEX_CREDENTIALS"
VERTEX_LOCATION_ENV = "VERTEX_LOCATION"
VERTEX_MODEL_ENV = "VERTEX_MODEL_NAME"
DEFAULT_VERTEX_MODEL = "gemini-2.5-flash"


def _extract_cached_tokens(usage: Usage | None) -> int:
    """Helper to extract cached_tokens from usage (dict or object)."""
    if not usage:
        print("Usage is None")
        return 0

    cached_tokens = usage.cache_creation_input_tokens

    return cached_tokens


def _extract_prompt_tokens(usage: Usage | None) -> int:
    """Helper to extract prompt_tokens from usage (dict or object)."""
    if not usage:
        print("Usage is None")
        return 0

    return usage.prompt_tokens


def _extract_cache_read_tokens(usage: Usage | None) -> int:
    """Extract cache read metrics from usage (dict or object)."""
    print(f"usage: {usage}")
    if not usage:
        print("Usage is None")
        return 0

    return usage.cache_read_input_tokens


def _get_usage_value(usage: Any, key: str) -> int:
    """Retrieve a numeric field from usage objects or dictionaries."""
    if isinstance(usage, dict):
        value = usage.get(key)
    else:
        value = getattr(usage, key, None)
    return int(value or 0)


def _resolve_vertex_credentials() -> tuple[Path, bool]:
    """Return a path to credentials; support inline JSON or filesystem path."""
    raw_value = os.environ.get(VERTEX_CREDENTIALS_ENV)
    if not raw_value:
        raise FileNotFoundError("Vertex credentials environment variable not set.")

    raw_value = raw_value.strip()
    candidate_path = Path(raw_value)
    if len(raw_value) < 100 and candidate_path.exists():
        return candidate_path, False

    try:
        json.loads(raw_value)
    except json.JSONDecodeError as exc:
        raise ValueError(
            "Vertex credentials must be a valid JSON string or file path."
        ) from exc

    temp_file = tempfile.NamedTemporaryFile(
        mode="w", suffix=".json", delete=False, encoding="utf-8"
    )
    try:
        temp_file.write(raw_value)
        temp_file.flush()
    finally:
        temp_file.close()
    return Path(temp_file.name), True


def _validate_vertex_credentials_file(credentials_path: Path) -> None:
    """Validate that the credentials file contains a usable service account."""
    try:
        content = credentials_path.read_text(encoding="utf-8")
    except OSError as exc:
        raise ValueError(f"Failed to read credentials file: {exc}") from exc

    try:
        data = json.loads(content)
    except json.JSONDecodeError as exc:
        raise ValueError("Credentials file does not contain valid JSON.") from exc

    if not isinstance(data, dict):
        raise ValueError("Credentials JSON must be an object.")

    cred_type = data.get("type")
    if cred_type != "service_account":
        raise ValueError(
            f"Unsupported credential type '{cred_type}'. Provide a service_account JSON blob."
        )

    missing_fields = [
        field
        for field in ("project_id", "client_email", "private_key")
        if not data.get(field)
    ]
    if missing_fields:
        raise ValueError(
            "Missing required service account fields: "
            + ", ".join(sorted(missing_fields))
        )

    try:
        from google.oauth2 import service_account

        service_account.Credentials.from_service_account_info(
            data,
            scopes=["https://www.googleapis.com/auth/cloud-platform"],
        )
    except (
        Exception
    ) as exc:  # pragma: no cover - depends on google SDK validation paths
        raise ValueError(
            f"Failed to construct service account credentials: {exc}"
        ) from exc


@pytest.mark.skip(reason="OpenAI prompt caching is unreliable")
@pytest.mark.skipif(
    not os.environ.get("OPENAI_API_KEY"),
    reason="OpenAI API key not available",
)
def test_openai_prompt_caching_reduces_costs(
    db_session: Session,  # noqa: ARG001
) -> None:
    """Test that OpenAI prompt caching reduces costs on subsequent calls.

    OpenAI uses implicit caching for prompts >1024 tokens.
    """
    attempts = 8
    successes = 0
    for _ in range(attempts):
        # Create OpenAI LLM
        llm = LitellmLLM(
            api_key=os.environ["OPENAI_API_KEY"],
            model_provider="openai",
            model_name="gpt-4o",
            max_input_tokens=128000,
        )
        import random
        import string

        # Insert 32 random lowercase characters at the start of long_context
        # to prevent holdover cache from previous tests
        random_prefix = "".join(random.choices(string.ascii_lowercase, k=32))
        # Create a long context message to ensure caching threshold is met (>1024 tokens)
        long_context = (
            random_prefix
            + "This is a comprehensive document about artificial intelligence and machine learning. "
            + " ".join(
                [
                    f"Section {i}: This section discusses various aspects of AI technology, "
                    f"including neural networks, deep learning, natural language processing, "
                    f"computer vision, and reinforcement learning. These technologies are "
                    f"revolutionizing how we interact with computers and process information."
                    for i in range(50)
                ]
            )
        )

        # Split into cacheable prefix (the long context) and suffix (the question)
        cacheable_prefix: list[ChatCompletionMessage] = [
            UserMessage(role="user", content=long_context)
        ]

        # First call - creates cache
        print("\n=== First call (cache creation) ===")
        question1: list[ChatCompletionMessage] = [
            UserMessage(role="user", content="What are the main topics discussed?")
        ]

        # Apply prompt caching (for OpenAI, this is mostly a no-op but should still work)
        processed_messages1, _ = process_with_prompt_cache(
            llm_config=llm.config,
            cacheable_prefix=cacheable_prefix,
            suffix=question1,
            continuation=False,
        )
        # print(f"Processed messages 1: {processed_messages1}")
        # print(f"Metadata 1: {metadata1}")
        # print(f"Cache key 1: {metadata1.cache_key if metadata1 else None}")

        # Call litellm directly so we can get the raw response
        response1 = llm.invoke(prompt=processed_messages1)
        cost1 = completion_cost(
            completion_response=response1.model_dump(),
            model=f"{llm._model_provider}/{llm._model_version}",
        )

        usage1 = response1.usage
        cached_tokens_1 = _extract_cached_tokens(usage1)
        prompt_tokens_1 = _extract_prompt_tokens(usage1)
        # print(f"Response 1 usage: {usage1}")
        # print(f"Cost 1: ${cost1:.10f}")

        # Wait to ensure cache is available
        time.sleep(5)

        # Second call with same context - should use cache
        print("\n=== Second call (cache read) ===")
        question2: list[ChatCompletionMessage] = [
            UserMessage(role="user", content="Can you elaborate on neural networks?")
        ]

        # Apply prompt caching (same cacheable prefix)
        processed_messages2, _ = process_with_prompt_cache(
            llm_config=llm.config,
            cacheable_prefix=cacheable_prefix,
            suffix=question2,
            continuation=False,
        )
        # print(f"Processed messages 2: {processed_messages2}")
        response2 = llm.invoke(prompt=processed_messages2)
        cost2 = completion_cost(
            completion_response=response2.model_dump(),
            model=f"{llm._model_provider}/{llm._model_version}",
        )

        usage2 = response2.usage
        cached_tokens_2 = _extract_cache_read_tokens(usage2)
        prompt_tokens_2 = _extract_prompt_tokens(usage2)
        # print(f"Response 2 usage: {usage2}")
        # print(f"Cost 2: ${cost2:.10f}")

        # Verify caching occurred – OpenAI reports cached work via prompt_tokens_details.cached_tokens
        print(f"\nCached tokens call 1: {cached_tokens_1}, call 2: {cached_tokens_2}")
        print(f"Prompt tokens call 1: {prompt_tokens_1}, call 2: {prompt_tokens_2}")
        print(f"Cost delta (1 -> 2): ${cost1 - cost2:.10f}")

        # The first call is expected to *create* cache (cached_tokens may be 0).
        # The second call should show cached tokens being used.
        if cached_tokens_2 > 0:
            successes += 1
            break

    # empirically there's a 60% chance of success per attempt, so we expect at least one success in 8 attempts
    # (99.94% probability). we can bump this number if the test is too flaky.
    assert (
        successes > 0
    ), f"Expected at least one success. 0 of {attempts} attempts used prompt caching."


@pytest.mark.skipif(
    not os.environ.get("ANTHROPIC_API_KEY"),
    reason="Anthropic API key not available",
)
def test_anthropic_prompt_caching_reduces_costs(
    db_session: Session,  # noqa: ARG001
) -> None:
    """Test that Anthropic prompt caching reduces costs on subsequent calls.

    Anthropic requires explicit cache_control parameters.
    """
    # Prompt caching support is model/account specific.
    # Allow override via env var and otherwise try a few non-retired candidates.
    anthropic_prompt_cache_models_env = os.environ.get("ANTHROPIC_PROMPT_CACHE_MODELS")
    if anthropic_prompt_cache_models_env:
        candidate_models = [
            model.strip()
            for model in anthropic_prompt_cache_models_env.split(",")
            if model.strip()
        ]
    else:
        candidate_models = [
            "claude-haiku-4-5-20251001",
            "claude-sonnet-4-5-20250929",
            "claude-3-5-sonnet-20241022",
            "claude-3-5-sonnet-latest",
        ]

    import random
    import string

    # Create a long context message.
    # Add a random prefix to avoid reusing an existing ephemeral cache from prior test runs.
    random_prefix = "".join(random.choices(string.ascii_lowercase, k=32))
    long_context = (
        random_prefix + " "
        "This is a comprehensive document about artificial intelligence and machine learning. "
        + " ".join(
            [
                f"Section {i}: This section discusses various aspects of AI technology, "
                f"including neural networks, deep learning, natural language processing, "
                f"computer vision, and reinforcement learning. These technologies are "
                f"revolutionizing how we interact with computers and process information."
                for i in range(50)
            ]
        )
    )

    base_messages: list[ChatCompletionMessage] = [
        UserMessage(role="user", content=long_context)
    ]

    unavailable_models: list[str] = []
    non_caching_models: list[str] = []

    for model_name in candidate_models:
        llm = LitellmLLM(
            api_key=os.environ["ANTHROPIC_API_KEY"],
            model_provider="anthropic",
            model_name=model_name,
            max_input_tokens=200000,
        )

        # First call - creates cache
        print(f"\n=== First call (cache creation) model={model_name} ===")
        question1: list[ChatCompletionMessage] = [
            UserMessage(
                role="user",
                content="Reply with exactly one lowercase word: topics",
            )
        ]

        processed_messages1, _ = process_with_prompt_cache(
            llm_config=llm.config,
            cacheable_prefix=base_messages,
            suffix=question1,
            continuation=False,
        )

        try:
            response1 = llm.invoke(prompt=processed_messages1, max_tokens=8)
        except Exception as e:
            error_str = str(e).lower()
            if (
                "not_found_error" in error_str
                or "model_not_found" in error_str
                or ('"type":"not_found_error"' in error_str and "model:" in error_str)
            ):
                unavailable_models.append(model_name)
                continue
            raise

        cost1 = completion_cost(
            completion_response=response1.model_dump(),
            model=f"{llm._model_provider}/{llm._model_version}",
        )

        usage1 = response1.usage
        print(f"Response 1 usage: {usage1}")
        print(f"Cost 1: ${cost1:.10f}")

        # Wait to ensure cache is available
        time.sleep(2)

        # Second call with same context - should use cache
        print(f"\n=== Second call (cache read) model={model_name} ===")
        question2: list[ChatCompletionMessage] = [
            UserMessage(
                role="user",
                content="Reply with exactly one lowercase word: neural",
            )
        ]

        processed_messages2, _ = process_with_prompt_cache(
            llm_config=llm.config,
            cacheable_prefix=base_messages,
            suffix=question2,
            continuation=False,
        )

        response2 = llm.invoke(prompt=processed_messages2, max_tokens=8)
        cost2 = completion_cost(
            completion_response=response2.model_dump(),
            model=f"{llm._model_provider}/{llm._model_version}",
        )

        usage2 = response2.usage
        print(f"Response 2 usage: {usage2}")
        print(f"Cost 2: ${cost2:.10f}")

        cache_creation_tokens = _get_usage_value(usage1, "cache_creation_input_tokens")
        cache_read_tokens = _get_usage_value(usage2, "cache_read_input_tokens")

        print(f"\nCache creation tokens (call 1): {cache_creation_tokens}")
        print(f"Cache read tokens (call 2): {cache_read_tokens}")
        print(f"Cost reduction: ${cost1 - cost2:.10f}")

        # Model is available but does not expose Anthropic cache usage metrics
        if cache_creation_tokens <= 0 or cache_read_tokens <= 0:
            non_caching_models.append(model_name)
            continue

        # Cost should be lower on second call
        assert (
            cost2 < cost1
        ), f"Expected lower cost on cached call. Cost 1: ${cost1:.10f}, Cost 2: ${cost2:.10f}"
        return

    pytest.skip(
        "No Anthropic model available with observable prompt-cache metrics. "
        f"Tried models={candidate_models}, unavailable={unavailable_models}, non_caching={non_caching_models}"
    )


@pytest.mark.skipif(
    not os.environ.get(VERTEX_CREDENTIALS_ENV),
    reason="Vertex AI credentials file not available",
)
@pytest.mark.skipif(
    not os.environ.get(VERTEX_LOCATION_ENV),
    reason="VERTEX_LOCATION required for Vertex AI context caching (e.g., 'us-central1')",
)
@pytest.mark.skip(reason="Vertex AI prompt caching is disabled for now")
def test_google_genai_prompt_caching_reduces_costs(
    db_session: Session,  # noqa: ARG001
) -> None:
    """Test that Litellm Gemini prompt caching reduces costs on subsequent calls.

    Vertex AI requires explicit context caching via the Context Caching API,
    which needs both credentials and a valid location (e.g., us-central1).
    """
    import random
    import string
    from litellm import exceptions as litellm_exceptions

    try:
        credentials_path, should_cleanup = _resolve_vertex_credentials()
    except FileNotFoundError:
        pytest.skip("Vertex credentials not available for test.")
    except ValueError as exc:
        pytest.skip(str(exc))

    vertex_location = os.environ.get(VERTEX_LOCATION_ENV)
    if not vertex_location:
        pytest.skip("VERTEX_LOCATION required for Vertex AI context caching")
    model_name = os.environ.get(VERTEX_MODEL_ENV, DEFAULT_VERTEX_MODEL)

    try:
        _validate_vertex_credentials_file(credentials_path)
        os.environ.setdefault("GOOGLE_APPLICATION_CREDENTIALS", str(credentials_path))

        custom_config: dict[str, str] = {"vertex_credentials": str(credentials_path)}
        if vertex_location:
            custom_config["vertex_location"] = vertex_location

        llm = LitellmLLM(
            api_key=None,
            model_provider="vertex_ai",
            model_name=model_name,
            max_input_tokens=1_000_000,
            custom_config=custom_config,
        )

        attempts = 4
        success = False
        last_metrics: dict[str, Any] = {}

        for attempt in range(attempts):
            random_prefix = "".join(random.choices(string.ascii_lowercase, k=32))
            long_context = (
                random_prefix
                + "This is a comprehensive document about artificial intelligence and machine learning. "
                + " ".join(
                    [
                        f"Section {i}: This section discusses various aspects of AI technology, "
                        f"including neural networks, deep learning, natural language processing, "
                        f"computer vision, and reinforcement learning. These technologies are "
                        f"revolutionizing how we interact with computers and process information."
                        for i in range(50)
                    ]
                )
            )

            cacheable_prefix: list[ChatCompletionMessage] = [
                SystemMessage(role="system", content=long_context)
            ]

            print(f"\n=== Vertex attempt {attempt + 1} (cache creation) ===")
            question1: list[ChatCompletionMessage] = [
                UserMessage(role="user", content="What are the main topics discussed?")
            ]

            processed_messages1, _ = process_with_prompt_cache(
                llm_config=llm.config,
                cacheable_prefix=cacheable_prefix,
                suffix=question1,
                continuation=False,
            )
            # Debug: print processed messages structure
            first_msg = (
                processed_messages1[0]
                if isinstance(processed_messages1, list) and processed_messages1
                else processed_messages1
            )
            print(f"Processed messages structure (first msg): {first_msg}")

            response1 = llm.invoke(prompt=processed_messages1)
            cost1 = completion_cost(
                completion_response=response1.model_dump(),
                model=f"{llm._model_provider}/{llm._model_version}",
            )
            usage1 = response1.usage
            cache_creation_tokens = _get_usage_value(
                usage1, "cache_creation_input_tokens"
            )
            cached_tokens_1 = _extract_cached_tokens(usage1)
            cache_read_tokens_1 = _extract_cache_read_tokens(usage1)

            print(f"Vertex response 1 usage: {usage1}")
            print(f"Vertex cost 1: ${cost1:.10f}")

            time.sleep(5)

            print(f"\n=== Vertex attempt {attempt + 1} (cache read) ===")
            question2: list[ChatCompletionMessage] = [
                UserMessage(
                    role="user", content="Can you elaborate on neural networks?"
                )
            ]

            processed_messages2, _ = process_with_prompt_cache(
                llm_config=llm.config,
                cacheable_prefix=cacheable_prefix,
                suffix=question2,
                continuation=False,
            )

            response2 = llm.invoke(prompt=processed_messages2)
            cost2 = completion_cost(
                completion_response=response2.model_dump(),
                model=f"{llm._model_provider}/{llm._model_version}",
            )
            usage2 = response2.usage
            cache_read_tokens_2 = _extract_cache_read_tokens(usage2)
            cached_tokens_2 = _extract_cached_tokens(usage2)

            print(f"Vertex response 2 usage: {usage2}")
            print(f"Vertex cost 2: ${cost2:.10f}")
            print(
                f"Vertex cache metrics - creation: {cache_creation_tokens}, "
                f"call1 cached tokens: {cached_tokens_1}, "
                f"call1 cache read tokens: {cache_read_tokens_1}, "
                f"call2 cached tokens: {cached_tokens_2}, "
                f"call2 cache read tokens: {cache_read_tokens_2}"
            )
            print(f"Vertex cost delta (1 -> 2): ${cost1 - cost2:.10f}")

            last_metrics = {
                "cache_creation_tokens": cache_creation_tokens,
                "cached_tokens_1": cached_tokens_1,
                "cache_read_tokens_1": cache_read_tokens_1,
                "cached_tokens_2": cached_tokens_2,
                "cache_read_tokens_2": cache_read_tokens_2,
                "cost_delta": cost1 - cost2,
            }

            if cache_read_tokens_2 > 0 or cached_tokens_2 > 0 or (cost1 - cost2) > 0:
                success = True
                break
    except ValueError as exc:
        pytest.fail(f"Invalid Vertex credentials: {exc}")
    except litellm_exceptions.APIConnectionError as exc:
        creds_details = json.loads(credentials_path.read_text(encoding="utf-8"))
        pytest.fail(
            "Vertex credentials appeared well-formed but failed to mint an access token. "
            "This typically means the service account lacks the required Vertex AI permissions "
            "or the key was revoked.\n"
            f"project_id={creds_details.get('project_id')!r}, "
            f"client_email={creds_details.get('client_email')!r}\n"
            f"Original error: {exc}"
        )
    finally:
        if should_cleanup:
            try:
                credentials_path.unlink(missing_ok=True)
            except OSError:
                pass

    assert (
        success
    ), f"Expected Gemini prompt caching evidence across attempts. Last observed metrics: {last_metrics}"


@pytest.mark.skipif(
    not os.environ.get("OPENAI_API_KEY"),
    reason="OpenAI API key not available",
)
def test_prompt_caching_with_conversation_history(
    db_session: Session,  # noqa: ARG001
) -> None:
    """Test that prompt caching works with multi-turn conversations.

    System message and history should be cached, only new user message is uncached.
    """
    # Create OpenAI LLM
    llm = LitellmLLM(
        api_key=os.environ["OPENAI_API_KEY"],
        model_provider="openai",
        model_name="gpt-4o-mini",
        max_input_tokens=128000,
    )

    # Create a long system message and context
    system_message: SystemMessage = SystemMessage(
        role="system",
        content=(
            "You are an AI assistant specialized in technology. "
            + " ".join(
                [
                    f"You have knowledge about topic {i} including detailed information. "
                    for i in range(50)
                ]
            )
        ),
    )

    long_context = "This is a comprehensive document. " + " ".join(
        [f"Section {i}: Details about topic {i}. " * 20 for i in range(30)]
    )

    # Turn 1
    print("\n=== Turn 1 ===")
    messages_turn1: list[ChatCompletionMessage] = [
        system_message,
        UserMessage(role="user", content=long_context + "\n\nWhat is this about?"),
    ]

    response1 = llm.invoke(prompt=messages_turn1)
    cost1 = completion_cost(
        completion_response=response1.model_dump(),
        model=f"{llm._model_provider}/{llm._model_version}",
    )

    usage1 = response1.usage
    print(f"Turn 1 usage: {usage1}")
    print(f"Turn 1 cost: ${cost1:.10f}")

    # Wait for cache
    time.sleep(2)

    # Turn 2 - add assistant response and new user message
    print("\n=== Turn 2 (with cached history) ===")
    messages_turn2: list[ChatCompletionMessage] = messages_turn1 + [
        AssistantMessage(
            role="assistant", content="This document discusses various topics."
        ),
        UserMessage(role="user", content="Tell me about the first topic."),
    ]

    response2 = llm.invoke(prompt=messages_turn2)
    cost2 = completion_cost(
        completion_response=response2.model_dump(),
        model=f"{llm._model_provider}/{llm._model_version}",
    )

    usage2 = response2.usage
    print(f"Turn 2 usage: {usage2}")
    print(f"Turn 2 cost: ${cost2:.10f}")

    # Turn 3 - continue conversation
    print("\n=== Turn 3 (with even more cached history) ===")
    messages_turn3: list[ChatCompletionMessage] = messages_turn2 + [
        AssistantMessage(role="assistant", content="The first topic covers..."),
        UserMessage(role="user", content="What about the second topic?"),
    ]

    response3 = llm.invoke(prompt=messages_turn3)
    cost3 = completion_cost(
        completion_response=response3.model_dump(),
        model=f"{llm._model_provider}/{llm._model_version}",
    )

    usage3 = response3.usage
    print(f"Turn 3 usage: {usage3}")
    print(f"Turn 3 cost: ${cost3:.10f}")

    # Verify caching in subsequent turns
    cache_tokens_2 = _get_usage_value(usage2, "cache_read_input_tokens")
    cache_tokens_3 = _get_usage_value(usage3, "cache_read_input_tokens")

    prompt_tokens_1 = _get_usage_value(usage1, "prompt_tokens")
    prompt_tokens_2 = _get_usage_value(usage2, "prompt_tokens")
    prompt_tokens_3 = _get_usage_value(usage3, "prompt_tokens")

    print(f"\nCache tokens - Turn 2: {cache_tokens_2}, Turn 3: {cache_tokens_3}")
    print(
        f"Prompt tokens - Turn 1: {prompt_tokens_1}, Turn 2: {prompt_tokens_2}, Turn 3: {prompt_tokens_3}"
    )

    # Either cache tokens should increase or prompt tokens should be relatively stable
    # (not growing linearly with conversation length)
    assert (
        cache_tokens_2 > 0
        or cache_tokens_3 > 0
        or prompt_tokens_2 < prompt_tokens_1 * 1.5
    ), "Expected caching benefits in multi-turn conversation"


@pytest.mark.skipif(
    not os.environ.get("OPENAI_API_KEY"),
    reason="OpenAI API key not available",
)
def test_no_caching_without_process_with_prompt_cache(
    db_session: Session,  # noqa: ARG001
) -> None:
    """Test baseline: without using process_with_prompt_cache, no special caching occurs.

    This establishes a baseline to compare against the caching tests.
    """
    # Create OpenAI LLM
    llm = LitellmLLM(
        api_key=os.environ["OPENAI_API_KEY"],
        model_provider="openai",
        model_name="gpt-4o-mini",
        max_input_tokens=128000,
    )

    # Create a long context
    long_context = "This is a comprehensive document. " + " ".join(
        [f"Section {i}: Details about technology topic {i}. " * 10 for i in range(50)]
    )

    # First call - no explicit caching
    print("\n=== First call (no explicit caching) ===")
    messages1: list[ChatCompletionMessage] = [
        UserMessage(role="user", content=long_context + "\n\nSummarize this.")
    ]

    response1 = llm.invoke(prompt=messages1)
    cost1 = completion_cost(
        completion_response=response1.model_dump(),
        model=f"{llm._model_provider}/{llm._model_version}",
    )

    usage1 = response1.usage
    print(f"Response 1 usage: {usage1}")
    print(f"Cost 1: ${cost1:.10f}")

    # This test just verifies the LLM works and we can calculate costs
    # It serves as a baseline comparison for the caching tests
    assert cost1 > 0, "Should have non-zero cost"
    assert usage1, "Should have usage data"

    print("\nBaseline test passed - ready to compare with caching tests")


================================================
FILE: backend/tests/external_dependency_unit/mock_content_provider.py
================================================
import abc
from collections.abc import Generator
from collections.abc import Sequence
from contextlib import contextmanager
from unittest.mock import patch

from pydantic import BaseModel

from onyx.tools.tool_implementations.open_url.models import WebContent
from onyx.tools.tool_implementations.open_url.models import WebContentProvider


class MockWebContent(BaseModel):
    title: str
    url: str
    content: str

    def to_web_content(self) -> WebContent:
        return WebContent(
            title=self.title,
            link=self.url,
            full_content=self.content,
            published_date=None,
            scrape_successful=True,
        )


class ContentProviderController(abc.ABC):
    @abc.abstractmethod
    def add_content(self, content: MockWebContent) -> None:
        raise NotImplementedError


class MockContentProvider(WebContentProvider, ContentProviderController):
    def __init__(self) -> None:
        self._contents: list[MockWebContent] = []

    def add_content(self, web_content: MockWebContent) -> None:
        self._contents.append(web_content)

    def contents(self, urls: Sequence[str]) -> list[WebContent]:
        filtered_contents = list(
            filter(lambda web_content: web_content.url in urls, self._contents)
        )

        return list(
            map(lambda web_content: web_content.to_web_content(), filtered_contents)
        )


@contextmanager
def use_mock_content_provider() -> Generator[ContentProviderController, None, None]:
    content_provider = MockContentProvider()

    with patch(
        "onyx.tools.tool_implementations.open_url.open_url_tool.get_default_content_provider",
        return_value=content_provider,
    ):
        yield content_provider


================================================
FILE: backend/tests/external_dependency_unit/mock_image_provider.py
================================================
import abc
import asyncio
import concurrent.futures
import time
from collections.abc import Generator
from contextlib import contextmanager
from datetime import datetime
from typing import Any
from unittest.mock import patch

from litellm.types.utils import ImageObject
from litellm.types.utils import ImageResponse

from onyx.image_gen.interfaces import ImageGenerationProvider
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.image_gen.interfaces import ReferenceImage
from onyx.llm.interfaces import LLMConfig


class ImageGenerationProviderController(abc.ABC):
    @abc.abstractmethod
    def add_image(
        self,
        data: str,
        delay: float = 0.0,
    ) -> None:
        raise NotImplementedError


class MockImageGenerationProvider(
    ImageGenerationProvider, ImageGenerationProviderController
):
    def __init__(self) -> None:
        self._images: list[str] = []
        self._delays: list[float] = []

    def add_image(
        self,
        data: str,
        delay: float = 0.0,
    ) -> None:
        self._images.append(data)
        self._delays.append(delay)

    @classmethod
    def validate_credentials(
        cls,
        credentials: ImageGenerationProviderCredentials,  # noqa: ARG003
    ) -> bool:
        return True

    @classmethod
    def _build_from_credentials(
        cls,
        _: ImageGenerationProviderCredentials,
    ) -> ImageGenerationProvider:
        return cls()

    def generate_image(
        self,
        prompt: str,
        model: str,  # noqa: ARG002
        size: str,  # noqa: ARG002
        n: int,  # noqa: ARG002
        quality: str | None = None,  # noqa: ARG002
        reference_images: list[ReferenceImage] | None = None,  # noqa: ARG002
        **kwargs: Any,  # noqa: ARG002
    ) -> ImageResponse:
        image_data = self._images.pop(0)
        delay = self._delays.pop(0)

        if delay > 0.0:
            try:
                asyncio.get_running_loop()
                # Event loop is running - run sleep in executor to avoid blocking the event loop
                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
                    future = executor.submit(time.sleep, delay)
                    future.result()
            except RuntimeError:
                # No running event loop, use regular thread sleep
                time.sleep(delay)

        return ImageResponse(
            created=int(datetime.now().timestamp()),
            data=[
                ImageObject(
                    b64_json=image_data,
                    revised_prompt=prompt,
                )
            ],
        )


def _create_mock_image_generation_llm_config() -> LLMConfig:
    """Create a mock LLMConfig for image generation."""
    return LLMConfig(
        model_provider="openai",
        model_name="gpt-image-1",
        temperature=0.0,
        api_key="mock-api-key",
        api_base=None,
        api_version=None,
        deployment_name=None,
        max_input_tokens=100000,
        custom_config=None,
    )


@contextmanager
def use_mock_image_generation_provider() -> (
    Generator[ImageGenerationProviderController, None, None]
):
    image_gen_provider = MockImageGenerationProvider()

    with (
        # Mock the image generation provider factory
        patch(
            "onyx.tools.tool_implementations.images.image_generation_tool.get_image_generation_provider",
            return_value=image_gen_provider,
        ),
        # Mock is_available to return True so the tool is registered
        patch(
            "onyx.tools.tool_implementations.images.image_generation_tool.ImageGenerationTool.is_available",
            return_value=True,
        ),
        # Mock the config lookup in tool_constructor to return a valid LLMConfig
        patch(
            "onyx.tools.tool_constructor._get_image_generation_config",
            return_value=_create_mock_image_generation_llm_config(),
        ),
    ):
        yield image_gen_provider


================================================
FILE: backend/tests/external_dependency_unit/mock_llm.py
================================================
from __future__ import annotations

import abc
import threading
import time
from collections.abc import Generator
from collections.abc import Iterator
from contextlib import contextmanager
from enum import Enum
from typing import Any
from typing import cast
from typing import Generic
from typing import Literal
from typing import TypeVar
from unittest.mock import patch

from pydantic import BaseModel

from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMConfig
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.interfaces import ReasoningEffort
from onyx.llm.interfaces import ToolChoiceOptions
from onyx.llm.model_response import ChatCompletionDeltaToolCall
from onyx.llm.model_response import Delta
from onyx.llm.model_response import FunctionCall
from onyx.llm.model_response import ModelResponse
from onyx.llm.model_response import ModelResponseStream
from onyx.llm.model_response import StreamingChoice

T = TypeVar("T")


class LLMResponseType(str, Enum):
    REASONING = "reasoning"
    ANSWER = "answer"
    TOOL_CALL = "tool_call"


class LLMResponse(abc.ABC, BaseModel):
    type: str = ""

    @abc.abstractmethod
    def num_tokens(self) -> int:
        raise NotImplementedError


class LLMReasoningResponse(LLMResponse):
    type: Literal["reasoning"] = LLMResponseType.REASONING.value
    reasoning_tokens: list[str]

    def num_tokens(self) -> int:
        return len(self.reasoning_tokens)


class LLMAnswerResponse(LLMResponse):
    type: Literal["answer"] = LLMResponseType.ANSWER.value
    answer_tokens: list[str]

    def num_tokens(self) -> int:
        return len(self.answer_tokens)


class LLMToolCallResponse(LLMResponse):
    type: Literal["tool_call"] = LLMResponseType.TOOL_CALL.value
    tool_name: str
    tool_call_id: str
    tool_call_argument_tokens: list[str]

    def num_tokens(self) -> int:
        return (
            len(self.tool_call_argument_tokens) + 1
        )  # +1 for the tool_call_id and tool_name


class StreamItem(BaseModel):
    """Represents a single item in the mock LLM stream with its type."""

    response_type: LLMResponseType
    data: Any


def _response_to_stream_items(response: LLMResponse) -> list[StreamItem]:
    match LLMResponseType(response.type):
        case LLMResponseType.REASONING:
            response = cast(LLMReasoningResponse, response)
            return [
                StreamItem(
                    response_type=LLMResponseType.REASONING,
                    data=token,
                )
                for token in response.reasoning_tokens
            ]
        case LLMResponseType.ANSWER:
            response = cast(LLMAnswerResponse, response)
            return [
                StreamItem(
                    response_type=LLMResponseType.ANSWER,
                    data=token,
                )
                for token in response.answer_tokens
            ]
        case LLMResponseType.TOOL_CALL:
            response = cast(LLMToolCallResponse, response)
            return [
                StreamItem(
                    response_type=LLMResponseType.TOOL_CALL,
                    data={
                        "tool_call_id": response.tool_call_id,
                        "tool_name": response.tool_name,
                        "arguments": None,
                    },
                )
            ] + [
                StreamItem(
                    response_type=LLMResponseType.TOOL_CALL,
                    data={
                        "tool_call_id": None,
                        "tool_name": None,
                        "arguments": token,
                    },
                )
                for token in response.tool_call_argument_tokens
            ]
        case _:
            raise ValueError(f"Unknown response type: {response.type}")


def create_delta_from_stream_item(item: StreamItem) -> Delta:
    response_type = item.response_type
    data = item.data
    if response_type == LLMResponseType.REASONING:
        return Delta(reasoning_content=data)
    elif response_type == LLMResponseType.ANSWER:
        return Delta(content=data)
    elif response_type == LLMResponseType.TOOL_CALL:
        # Handle grouped tool calls (list) vs single tool call (dict)
        if isinstance(data, list):
            # Multiple tool calls emitted together in the same tick
            tool_calls = []
            for tc_data in data:
                if tc_data["tool_call_id"] is not None:
                    tool_calls.append(
                        ChatCompletionDeltaToolCall(
                            id=tc_data["tool_call_id"],
                            index=tc_data["index"],
                            function=FunctionCall(
                                arguments="",
                                name=tc_data["tool_name"],
                            ),
                        )
                    )
                else:
                    tool_calls.append(
                        ChatCompletionDeltaToolCall(
                            index=tc_data["index"],
                            id=None,
                            function=FunctionCall(
                                arguments=tc_data["arguments"],
                                name=None,
                            ),
                        )
                    )
            return Delta(tool_calls=tool_calls)
        else:
            # Single tool call (original behavior)
            # First tick has tool_call_id and tool_name, subsequent ticks have arguments
            if data["tool_call_id"] is not None:
                return Delta(
                    tool_calls=[
                        ChatCompletionDeltaToolCall(
                            id=data["tool_call_id"],
                            function=FunctionCall(
                                name=data["tool_name"],
                                arguments="",
                            ),
                        )
                    ]
                )
            else:
                return Delta(
                    tool_calls=[
                        ChatCompletionDeltaToolCall(
                            id=None,
                            function=FunctionCall(
                                name=None,
                                arguments=data["arguments"],
                            ),
                        )
                    ]
                )
    else:
        raise ValueError(f"Unknown response type: {response_type}")


class MockLLMController(abc.ABC):
    @abc.abstractmethod
    def add_response(self, response: LLMResponse) -> None:
        """Add a response to the current stream."""
        raise NotImplementedError

    @abc.abstractmethod
    def add_responses_together(self, *responses: LLMResponse) -> None:
        """Add multiple responses that should be emitted together in the same tick."""
        raise NotImplementedError

    @abc.abstractmethod
    def forward(self, n: int) -> None:
        """Forward the stream by n tokens."""
        raise NotImplementedError

    @abc.abstractmethod
    def forward_till_end(self) -> None:
        """Forward the stream until the end."""
        raise NotImplementedError

    @abc.abstractmethod
    def set_max_timeout(self, timeout: float = 5.0) -> None:
        raise NotImplementedError


class MockLLM(LLM, MockLLMController):
    def __init__(self) -> None:
        self.stream_controller = SyncStreamController[StreamItem]()

    def add_response(self, response: LLMResponse) -> None:
        items = _response_to_stream_items(response)
        self.stream_controller.queue_items(items)

    def add_responses_together(self, *responses: LLMResponse) -> None:
        """Add multiple responses that should be emitted together in the same tick.

        Currently only supports multiple tool call responses being grouped together.
        The initial tool call info (id, name) for all tool calls will be emitted
        in a single delta, followed by argument tokens for each tool call.
        """
        tool_calls = [r for r in responses if r.type == LLMResponseType.TOOL_CALL]

        if len(tool_calls) != len(responses):
            raise ValueError(
                "add_responses_together currently only supports multiple tool call responses"
            )

        # Create combined first item with all tool call initial info
        combined_data = [
            {
                "index": idx,
                "tool_call_id": cast(LLMToolCallResponse, tc).tool_call_id,
                "tool_name": cast(LLMToolCallResponse, tc).tool_name,
                "arguments": None,
            }
            for idx, tc in enumerate(tool_calls)
        ]
        combined_item = StreamItem(
            response_type=LLMResponseType.TOOL_CALL,
            data=combined_data,
        )
        self.stream_controller.queue_items([combined_item])

        # Add argument tokens for each tool call with their index
        for idx, tc in enumerate(tool_calls):
            tc = cast(LLMToolCallResponse, tc)
            for token in tc.tool_call_argument_tokens:
                item = StreamItem(
                    response_type=LLMResponseType.TOOL_CALL,
                    data=[
                        {
                            "index": idx,
                            "tool_call_id": None,
                            "tool_name": None,
                            "arguments": token,
                        }
                    ],
                )
                self.stream_controller.queue_items([item])

    def forward(self, n: int) -> None:
        if self.stream_controller:
            self.stream_controller.forward(n)
        else:
            raise ValueError("No response set")

    def forward_till_end(self) -> None:
        if self.stream_controller:
            self.stream_controller.forward_till_end()
        else:
            raise ValueError("No response set")

    def set_max_timeout(self, timeout: float = 5.0) -> None:
        self.stream_controller.timeout = timeout

    @property
    def config(self) -> LLMConfig:
        return LLMConfig(
            model_provider="mock",
            model_name="mock",
            temperature=1.0,
            max_input_tokens=1000000000,
        )

    def invoke(
        self,
        prompt: LanguageModelInput,
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
        max_tokens: int | None = None,
        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,
        user_identity: LLMUserIdentity | None = None,
    ) -> ModelResponse:
        raise NotImplementedError("We only care about streaming atm")

    def stream(
        self,
        prompt: LanguageModelInput,  # noqa: ARG002
        tools: list[dict] | None = None,  # noqa: ARG002
        tool_choice: ToolChoiceOptions | None = None,  # noqa: ARG002
        structured_response_format: dict | None = None,  # noqa: ARG002
        timeout_override: int | None = None,  # noqa: ARG002
        max_tokens: int | None = None,  # noqa: ARG002
        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,  # noqa: ARG002
        user_identity: LLMUserIdentity | None = None,  # noqa: ARG002
    ) -> Iterator[ModelResponseStream]:
        if not self.stream_controller:
            return

        for idx, item in enumerate(self.stream_controller):
            yield ModelResponseStream(
                id="chatcmp-123",
                created="1",
                choice=StreamingChoice(
                    finish_reason=None,
                    index=0,  # Choice index should stay at 0 for all items in the same stream
                    delta=create_delta_from_stream_item(item),
                ),
                usage=None,
            )


class StreamTimeoutError(Exception):
    """Raised when the stream controller times out waiting for tokens."""


class SyncStreamController(Generic[T]):
    def __init__(self, items: list[T] | None = None, timeout: float = 5.0) -> None:
        self.items = items if items is not None else []
        self.position = 0
        self.pending: list[int] = []  # The indices of the tokens that are pending
        self.timeout = timeout  # Maximum time to wait for tokens before failing

        self._has_pending = threading.Event()

    def queue_items(self, new_items: list[T]) -> None:
        """Queue additional tokens to the stream (for chaining responses like reasoning + tool calls)."""
        self.items.extend(new_items)

    def forward(self, n: int) -> None:
        """Queue the next n tokens to be yielded"""
        end = min(self.position + n, len(self.items))
        self.pending.extend(range(self.position, end))
        self.position = end

        if self.pending:
            self._has_pending.set()

    def forward_till_end(self) -> None:
        self.forward(len(self.items) - self.position)

    @property
    def is_done(self) -> bool:
        return self.position >= len(self.items) and not self.pending

    def __iter__(self) -> SyncStreamController[T]:
        return self

    def __next__(self) -> T:
        start_time = time.monotonic()
        while not self.is_done:
            if self.pending:
                item_idx = self.pending.pop(0)
                if not self.pending:
                    self._has_pending.clear()
                return self.items[item_idx]

            elapsed = time.monotonic() - start_time
            if elapsed >= self.timeout:
                raise StreamTimeoutError(
                    f"Stream controller timed out after {self.timeout}s waiting for tokens. "
                    f"Position: {self.position}/{len(self.items)}, Pending: {len(self.pending)}"
                )

            self._has_pending.wait(timeout=0.1)

        raise StopIteration


@contextmanager
def use_mock_llm() -> Generator[MockLLMController, None, None]:
    mock_llm = MockLLM()

    with patch("onyx.chat.process_message.get_llm_for_persona", return_value=mock_llm):
        yield mock_llm


================================================
FILE: backend/tests/external_dependency_unit/mock_search_pipeline.py
================================================
from collections.abc import Callable
from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.context.search.models import ChunkSearchRequest
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import PersonaSearchInfo
from onyx.context.search.models import SearchDoc
from onyx.db.models import SearchSettings
from onyx.db.models import User
from onyx.document_index.interfaces import DocumentIndex
from onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo
from onyx.llm.interfaces import LLM
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.tools.tool_implementations.search.search_tool import SearchTool


def run_functions_tuples_sequential(
    functions_with_args: list[tuple[Callable, tuple]],
    allow_failures: bool = False,
    max_workers: int | None = None,  # noqa: ARG001
    timeout: float | None = None,  # noqa: ARG001
    timeout_callback: Callable | None = None,  # noqa: ARG001
) -> list[Any]:
    """
    A sequential replacement for run_functions_tuples_in_parallel.
    Useful in tests to make parallel tool calls deterministic.
    """
    results = []
    for func, args in functions_with_args:
        try:
            results.append(func(*args))
        except Exception:
            if allow_failures:
                results.append(None)
            else:
                raise
    return results


class MockInternalSearchResult(BaseModel):
    document_id: str
    source_type: DocumentSource
    semantic_identifier: str
    chunk_ind: int

    def to_inference_chunk(self) -> InferenceChunk:
        return InferenceChunk(
            document_id=f"{self.source_type.value.upper()}_{self.document_id}",
            source_type=self.source_type,
            semantic_identifier=self.semantic_identifier,
            title=self.semantic_identifier,
            chunk_id=self.chunk_ind,
            blurb="",
            content="",
            source_links=None,
            image_file_id=None,
            section_continuation=False,
            boost=0,
            score=1.0,
            hidden=False,
            metadata={},
            match_highlights=[],
            doc_summary="",
            chunk_context="",
            updated_at=None,
        )

    def to_search_doc(self) -> SearchDoc:
        return SearchDoc(
            document_id=f"{self.source_type.value.upper()}_{self.document_id}",
            chunk_ind=self.chunk_ind,
            semantic_identifier=self.semantic_identifier,
            link=None,
            blurb="",
            source_type=self.source_type,
            boost=0,
            hidden=False,
            metadata={},
            score=1.0,
            match_highlights=[],
            updated_at=None,
        )


class SearchPipelineController:
    def __init__(self) -> None:
        self.search_results: dict[str, list[MockInternalSearchResult]] = {}

    def add_search_results(
        self, query: str, results: list[MockInternalSearchResult]
    ) -> None:
        self.search_results[query] = results

    def get_search_results(self, query: str) -> list[InferenceChunk]:
        return [
            result.to_inference_chunk() for result in self.search_results.get(query, [])
        ]


@contextmanager
def use_mock_search_pipeline(
    connectors: list[DocumentSource],
) -> Generator[SearchPipelineController, None, None]:
    """Mock the search pipeline and connector availability.

    Args:
        connectors: List of DocumentSource types to pretend are available.
                   Pass an empty list to simulate no connectors.
    """
    controller = SearchPipelineController()

    def mock_check_connectors_exist(db_session: Session) -> bool:  # noqa: ARG001
        return len(connectors) > 0

    def mock_check_federated_connectors_exist(
        db_session: Session,  # noqa: ARG001
    ) -> bool:
        # For now, federated connectors are not mocked as available
        return False

    def mock_check_user_files_exist(db_session: Session) -> bool:  # noqa: ARG001
        # For now, user files are not mocked as available
        return False

    def mock_fetch_unique_document_sources(
        db_session: Session,  # noqa: ARG001
    ) -> list[DocumentSource]:
        return connectors

    def override_search_pipeline(
        chunk_search_request: ChunkSearchRequest,
        document_index: DocumentIndex,  # noqa: ARG001
        user: User | None,  # noqa: ARG001
        persona_search_info: PersonaSearchInfo | None,  # noqa: ARG001
        db_session: Session | None = None,  # noqa: ARG001
        auto_detect_filters: bool = False,  # noqa: ARG001
        llm: LLM | None = None,  # noqa: ARG001
        project_id_filter: int | None = None,  # noqa: ARG001
        persona_id_filter: int | None = None,  # noqa: ARG001
        # Pre-fetched data (used by SearchTool to avoid DB access in parallel calls)
        acl_filters: list[str] | None = None,  # noqa: ARG001
        embedding_model: EmbeddingModel | None = None,  # noqa: ARG001
        prefetched_federated_retrieval_infos: (  # noqa: ARG001
            list[FederatedRetrievalInfo] | None
        ) = None,
    ) -> list[InferenceChunk]:
        return controller.get_search_results(chunk_search_request.query)

    # Mock the pre-fetch session and DB queries in SearchTool.run() so
    # tests don't need a fully initialised DB with search settings.
    @contextmanager
    def mock_get_session() -> Generator[MagicMock, None, None]:
        yield MagicMock(spec=Session)

    with (
        patch(
            "onyx.tools.tool_implementations.search.search_tool.search_pipeline",
            new=override_search_pipeline,
        ),
        patch(
            "onyx.tools.tool_implementations.search.search_tool.check_connectors_exist",
            new=mock_check_connectors_exist,
        ),
        patch(
            "onyx.tools.tool_implementations.search.search_tool.check_federated_connectors_exist",
            new=mock_check_federated_connectors_exist,
        ),
        patch(
            "onyx.tools.tool_implementations.search.search_tool.semantic_query_rephrase",
            return_value="",
        ),
        patch(
            "onyx.tools.tool_implementations.search.search_tool.keyword_query_expansion",
            return_value=[],
        ),
        patch(
            "onyx.tools.tool_runner.run_functions_tuples_in_parallel",
            new=run_functions_tuples_sequential,
        ),
        patch(
            "onyx.db.connector.check_connectors_exist",
            new=mock_check_connectors_exist,
        ),
        patch(
            "onyx.db.connector.check_federated_connectors_exist",
            new=mock_check_federated_connectors_exist,
        ),
        patch(
            "onyx.db.connector.check_user_files_exist",
            new=mock_check_user_files_exist,
        ),
        patch(
            "onyx.db.connector.fetch_unique_document_sources",
            new=mock_fetch_unique_document_sources,
        ),
        # Mock the pre-fetch phase of SearchTool.run()
        patch(
            "onyx.tools.tool_implementations.search.search_tool.get_session_with_current_tenant",
            new=mock_get_session,
        ),
        patch(
            "onyx.tools.tool_implementations.search.search_tool.build_access_filters_for_user",
            return_value=[],
        ),
        patch(
            "onyx.tools.tool_implementations.search.search_tool.get_current_search_settings",
            return_value=MagicMock(spec=SearchSettings),
        ),
        patch(
            "onyx.tools.tool_implementations.search.search_tool.EmbeddingModel.from_db_model",
            return_value=MagicMock(spec=EmbeddingModel),
        ),
        patch(
            "onyx.tools.tool_implementations.search.search_tool.get_federated_retrieval_functions",
            return_value=[],
        ),
        patch.object(
            SearchTool,
            "_prefetch_slack_data",
            return_value=(None, None, {}),
        ),
    ):
        yield controller


================================================
FILE: backend/tests/external_dependency_unit/mock_search_provider.py
================================================
import abc
from collections import defaultdict
from collections.abc import Generator
from collections.abc import Sequence
from contextlib import contextmanager
from unittest.mock import patch

from pydantic import BaseModel
from sqlalchemy.orm import Session

from onyx.db.models import InternetSearchProvider
from onyx.db.web_search import fetch_web_search_provider_by_name
from onyx.tools.tool_implementations.web_search.models import WebSearchProvider
from onyx.tools.tool_implementations.web_search.models import WebSearchResult
from shared_configs.enums import WebSearchProviderType


class MockWebSearchResult(BaseModel):
    title: str
    link: str
    snippet: str

    def to_web_search_result(self) -> WebSearchResult:
        return WebSearchResult(
            title=self.title,
            link=self.link,
            snippet=self.snippet,
            author=None,
            published_date=None,
        )


class WebProviderController(abc.ABC):
    @abc.abstractmethod
    def add_results(self, query: str, results: list[MockWebSearchResult]) -> None:
        raise NotImplementedError


class MockWebProvider(WebSearchProvider, WebProviderController):
    def __init__(self) -> None:
        self._results: dict[str, list[MockWebSearchResult]] = defaultdict(list)

    def add_results(self, query: str, results: list[MockWebSearchResult]) -> None:
        self._results[query] = results

    def search(self, query: str) -> Sequence[WebSearchResult]:
        return list(
            map(lambda result: result.to_web_search_result(), self._results[query])
        )

    def test_connection(self) -> dict[str, str]:
        return {}


def add_web_provider_to_db(db_session: Session) -> None:
    # Write a provider to the database
    if fetch_web_search_provider_by_name(name="Test Provider 2", db_session=db_session):
        return

    provider = InternetSearchProvider(
        name="Test Provider 2",
        provider_type=WebSearchProviderType.EXA.value,
        api_key="test-api-key",
        config={},
        is_active=True,
    )

    db_session.add(provider)
    db_session.commit()


def delete_web_provider_from_db(db_session: Session) -> None:
    provider = fetch_web_search_provider_by_name(
        name="Test Provider 2", db_session=db_session
    )
    if provider is not None:
        db_session.delete(provider)
        db_session.commit()


@contextmanager
def use_mock_web_provider(
    db_session: Session,
) -> Generator[WebProviderController, None, None]:
    web_provider = MockWebProvider()

    # Write the tool to the database
    add_web_provider_to_db(db_session)

    # override the build function
    with patch(
        "onyx.tools.tool_implementations.web_search.web_search_tool.build_search_provider_from_config",
        return_value=web_provider,
    ):
        yield web_provider

    delete_web_provider_from_db(db_session)


================================================
FILE: backend/tests/external_dependency_unit/opensearch/test_assistant_knowledge_filter.py
================================================
"""Tests for OpenSearch assistant knowledge filter construction.

These tests verify that when an assistant (persona) has knowledge attached,
the search filter includes the appropriate scope filters with OR logic (not AND),
ensuring documents are discoverable across knowledge types like attached documents,
hierarchy nodes, document sets, and persona/project user files.
"""

from typing import Any

from onyx.configs.constants import DocumentSource
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.schema import ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME
from onyx.document_index.opensearch.schema import DOCUMENT_ID_FIELD_NAME
from onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME
from onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME
from onyx.document_index.opensearch.search import DocumentQuery
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

ATTACHED_DOCUMENT_ID = "https://docs.google.com/document/d/test-doc-id"
HIERARCHY_NODE_ID = 42
PERSONA_ID = 7
KNOWLEDGE_FILTER_SCHEMA_FIELDS = {
    DOCUMENT_ID_FIELD_NAME,
    ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME,
    DOCUMENT_SETS_FIELD_NAME,
    PERSONAS_FIELD_NAME,
}


def _get_search_filters(
    source_types: list[DocumentSource],
    attached_document_ids: list[str] | None,
    hierarchy_node_ids: list[int] | None,
    persona_id_filter: int | None = None,
    document_sets: list[str] | None = None,
) -> list[dict[str, Any]]:
    return DocumentQuery._get_search_filters(
        tenant_state=TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False),
        include_hidden=False,
        access_control_list=["user_email:test@example.com"],
        source_types=source_types,
        tags=[],
        document_sets=document_sets or [],
        project_id_filter=None,
        persona_id_filter=persona_id_filter,
        time_cutoff=None,
        min_chunk_index=None,
        max_chunk_index=None,
        max_chunk_size=None,
        document_id=None,
        attached_document_ids=attached_document_ids,
        hierarchy_node_ids=hierarchy_node_ids,
    )


class TestAssistantKnowledgeFilter:
    """Tests for assistant knowledge filter construction in OpenSearch queries."""

    def test_persona_id_filter_added_when_knowledge_scope_exists(self) -> None:
        """persona_id_filter should be OR'd into the knowledge scope filter
        when explicit knowledge attachments (attached_document_ids,
        hierarchy_node_ids, document_sets) are present."""
        filter_clauses = _get_search_filters(
            source_types=[DocumentSource.FILE],
            attached_document_ids=[ATTACHED_DOCUMENT_ID],
            hierarchy_node_ids=[HIERARCHY_NODE_ID],
            persona_id_filter=PERSONA_ID,
        )

        knowledge_filter = None
        for clause in filter_clauses:
            if "bool" in clause and "should" in clause["bool"]:
                if (
                    clause["bool"].get("minimum_should_match") == 1
                    and len(clause["bool"]["should"]) > 0
                    and (
                        (
                            clause["bool"]["should"][0].get("term", {}).keys()
                            and list(
                                clause["bool"]["should"][0].get("term", {}).keys()
                            )[0]
                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS
                        )
                        or (
                            clause["bool"]["should"][0].get("terms", {}).keys()
                            and list(
                                clause["bool"]["should"][0].get("terms", {}).keys()
                            )[0]
                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS
                        )
                    )
                ):
                    knowledge_filter = clause
                    break

        assert knowledge_filter is not None, (
            "Expected to find an assistant knowledge filter with "
            "'minimum_should_match: 1'"
        )

        should_clauses = knowledge_filter["bool"]["should"]
        persona_found = any(
            clause.get("term", {}).get(PERSONAS_FIELD_NAME, {}).get("value")
            == PERSONA_ID
            for clause in should_clauses
        )
        assert persona_found, (
            f"Expected persona_id={PERSONA_ID} filter on {PERSONAS_FIELD_NAME} "
            f"in should clauses. Got: {should_clauses}"
        )

    def test_persona_id_filter_alone_creates_knowledge_scope(self) -> None:
        """persona_id_filter IS a primary knowledge scope trigger — a persona
        with user files is explicit knowledge, so it should restrict
        search on its own."""
        filter_clauses = _get_search_filters(
            source_types=[],
            attached_document_ids=None,
            hierarchy_node_ids=None,
            persona_id_filter=PERSONA_ID,
        )

        knowledge_filter = None
        for clause in filter_clauses:
            if "bool" in clause and "should" in clause["bool"]:
                if (
                    clause["bool"].get("minimum_should_match") == 1
                    and len(clause["bool"]["should"]) > 0
                    and (
                        (
                            clause["bool"]["should"][0].get("term", {}).keys()
                            and list(
                                clause["bool"]["should"][0].get("term", {}).keys()
                            )[0]
                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS
                        )
                        or (
                            clause["bool"]["should"][0].get("terms", {}).keys()
                            and list(
                                clause["bool"]["should"][0].get("terms", {}).keys()
                            )[0]
                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS
                        )
                    )
                ):
                    knowledge_filter = clause
                    break

        assert (
            knowledge_filter is not None
        ), "Expected persona_id_filter alone to create a knowledge scope filter"
        persona_found = any(
            clause.get("term", {}).get(PERSONAS_FIELD_NAME, {}).get("value")
            == PERSONA_ID
            for clause in knowledge_filter["bool"]["should"]
        )
        assert persona_found, (
            f"Expected persona_id={PERSONA_ID} filter in knowledge scope. "
            f"Got: {knowledge_filter}"
        )

    def test_knowledge_filter_with_document_sets_and_persona_filter(self) -> None:
        """document_sets and persona_id_filter should be OR'd together in
        the knowledge scope filter."""
        filter_clauses = _get_search_filters(
            source_types=[],
            attached_document_ids=None,
            hierarchy_node_ids=None,
            persona_id_filter=PERSONA_ID,
            document_sets=["engineering"],
        )

        knowledge_filter = None
        for clause in filter_clauses:
            if "bool" in clause and "should" in clause["bool"]:
                if (
                    clause["bool"].get("minimum_should_match") == 1
                    and len(clause["bool"]["should"]) > 0
                    and (
                        (
                            clause["bool"]["should"][0].get("term", {}).keys()
                            and list(
                                clause["bool"]["should"][0].get("term", {}).keys()
                            )[0]
                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS
                        )
                        or (
                            clause["bool"]["should"][0].get("terms", {}).keys()
                            and list(
                                clause["bool"]["should"][0].get("terms", {}).keys()
                            )[0]
                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS
                        )
                    )
                ):
                    knowledge_filter = clause
                    break

        assert (
            knowledge_filter is not None
        ), "Expected knowledge filter when document_sets is provided"

        filter_str = str(knowledge_filter)
        assert (
            "engineering" in filter_str
        ), "Expected document_set 'engineering' in knowledge filter"
        assert (
            str(PERSONA_ID) in filter_str
        ), f"Expected persona_id_filter {PERSONA_ID} in knowledge filter"


================================================
FILE: backend/tests/external_dependency_unit/opensearch/test_opensearch_client.py
================================================
"""External dependency unit tests for OpenSearchIndexClient.

These tests assume OpenSearch is running and test all implemented methods
using real schemas, pipelines, and search queries from the codebase.
"""

import re
import uuid
from collections.abc import Generator
from datetime import datetime
from datetime import timedelta
from datetime import timezone

import pytest
from opensearchpy import NotFoundError

from onyx.access.models import DocumentAccess
from onyx.access.utils import prefix_user_email
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import IndexFilters
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.client import OpenSearchIndexClient
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.constants import HybridSearchNormalizationPipeline
from onyx.document_index.opensearch.constants import HybridSearchSubqueryConfiguration
from onyx.document_index.opensearch.opensearch_document_index import (
    generate_opensearch_filtered_access_control_list,
)
from onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
from onyx.document_index.opensearch.schema import DocumentSchema
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.search import DocumentQuery
from onyx.document_index.opensearch.search import (
    get_min_max_normalization_pipeline_name_and_config,
)
from onyx.document_index.opensearch.search import (
    get_normalization_pipeline_name_and_config,
)
from onyx.document_index.opensearch.search import (
    get_zscore_normalization_pipeline_name_and_config,
)
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA


def _patch_global_tenant_state(monkeypatch: pytest.MonkeyPatch, state: bool) -> None:
    """Patches MULTI_TENANT wherever necessary for this test file.

    Args:
        monkeypatch: The test instance's monkeypatch instance, used for
            patching.
        state: The intended state of MULTI_TENANT.
    """
    monkeypatch.setattr("shared_configs.configs.MULTI_TENANT", state)
    monkeypatch.setattr("onyx.document_index.opensearch.schema.MULTI_TENANT", state)


def _patch_hybrid_search_subquery_configuration(
    monkeypatch: pytest.MonkeyPatch, configuration: HybridSearchSubqueryConfiguration
) -> None:
    """
    Patches HYBRID_SEARCH_SUBQUERY_CONFIGURATION wherever necessary for this
    test file.

    Args:
        monkeypatch: The test instance's monkeypatch instance, used for
            patching.
        configuration: The intended state of
            HYBRID_SEARCH_SUBQUERY_CONFIGURATION.
    """
    monkeypatch.setattr(
        "onyx.document_index.opensearch.constants.HYBRID_SEARCH_SUBQUERY_CONFIGURATION",
        configuration,
    )
    monkeypatch.setattr(
        "onyx.document_index.opensearch.search.HYBRID_SEARCH_SUBQUERY_CONFIGURATION",
        configuration,
    )


def _patch_hybrid_search_normalization_pipeline(
    monkeypatch: pytest.MonkeyPatch, pipeline: HybridSearchNormalizationPipeline
) -> None:
    """
    Patches HYBRID_SEARCH_NORMALIZATION_PIPELINE wherever necessary for this
    test file.
    """
    monkeypatch.setattr(
        "onyx.document_index.opensearch.constants.HYBRID_SEARCH_NORMALIZATION_PIPELINE",
        pipeline,
    )
    monkeypatch.setattr(
        "onyx.document_index.opensearch.search.HYBRID_SEARCH_NORMALIZATION_PIPELINE",
        pipeline,
    )


def _patch_opensearch_match_highlights_disabled(
    monkeypatch: pytest.MonkeyPatch, disabled: bool
) -> None:
    """
    Patches OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED wherever necessary for this
    test file.
    """
    monkeypatch.setattr(
        "onyx.configs.app_configs.OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED",
        disabled,
    )
    monkeypatch.setattr(
        "onyx.document_index.opensearch.search.OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED",
        disabled,
    )


def _create_test_document_chunk(
    document_id: str,
    content: str,
    tenant_state: TenantState,
    chunk_index: int = 0,
    content_vector: list[float] | None = None,
    title: str | None = None,
    title_vector: list[float] | None = None,
    hidden: bool = False,
    document_access: DocumentAccess = DocumentAccess.build(
        user_emails=[],
        user_groups=[],
        external_user_emails=[],
        external_user_group_ids=[],
        is_public=True,
    ),
    source_type: DocumentSource = DocumentSource.FILE,
    last_updated: datetime | None = None,
) -> DocumentChunk:
    if content_vector is None:
        # Generate dummy vector - 128 dimensions for fast testing.
        content_vector = [0.1] * 128

    # If title is provided but no vector, generate one.
    if title is not None and title_vector is None:
        title_vector = [0.2] * 128

    return DocumentChunk(
        document_id=document_id,
        chunk_index=chunk_index,
        title=title,
        title_vector=title_vector,
        content=content,
        content_vector=content_vector,
        source_type=source_type.value,
        metadata_list=None,
        last_updated=last_updated,
        public=document_access.is_public,
        access_control_list=generate_opensearch_filtered_access_control_list(
            document_access
        ),
        hidden=hidden,
        global_boost=0,
        semantic_identifier="Test semantic identifier",
        image_file_id=None,
        source_links=None,
        blurb="Test blurb",
        doc_summary="Test doc summary",
        chunk_context="Test chunk context",
        document_sets=None,
        user_projects=None,
        primary_owners=None,
        secondary_owners=None,
        tenant_id=tenant_state,
    )


def _generate_test_vector(base_value: float = 0.1, dimension: int = 128) -> list[float]:
    """Generates a test vector with slight variations.

    We round to eliminate floating point precision errors when comparing chunks
    for equality.
    """
    return [round(base_value + (i * 0.001), 5) for i in range(dimension)]


@pytest.fixture(scope="module")
def opensearch_available() -> None:
    """Verifies OpenSearch is running, skips all tests if not."""
    if not wait_for_opensearch_with_timeout():
        pytest.fail("OpenSearch is not available.")


@pytest.fixture(scope="function")
def test_client(
    opensearch_available: None,  # noqa: ARG001
) -> Generator[OpenSearchIndexClient, None, None]:
    """Creates an OpenSearch client for testing with automatic cleanup."""
    test_index_name = f"test_index_{uuid.uuid4().hex[:8]}"
    client = OpenSearchIndexClient(index_name=test_index_name)

    yield client  # Test runs here.

    # Cleanup after test completes.
    try:
        client.delete_index()
    except Exception:
        pass
    finally:
        client.close()


@pytest.fixture(scope="function")
def search_pipeline(test_client: OpenSearchIndexClient) -> Generator[None, None, None]:
    """Creates a search pipeline for testing with automatic cleanup."""
    min_max_normalization_pipeline_name, min_max_normalization_pipeline_config = (
        get_min_max_normalization_pipeline_name_and_config()
    )
    zscore_normalization_pipeline_name, zscore_normalization_pipeline_config = (
        get_zscore_normalization_pipeline_name_and_config()
    )
    test_client.create_search_pipeline(
        pipeline_id=min_max_normalization_pipeline_name,
        pipeline_body=min_max_normalization_pipeline_config,
    )
    test_client.create_search_pipeline(
        pipeline_id=zscore_normalization_pipeline_name,
        pipeline_body=zscore_normalization_pipeline_config,
    )
    yield  # Test runs here.
    try:
        test_client.delete_search_pipeline(
            pipeline_id=min_max_normalization_pipeline_name,
        )
        test_client.delete_search_pipeline(
            pipeline_id=zscore_normalization_pipeline_name,
        )
    except Exception:
        pass


class TestOpenSearchClient:
    """Tests for OpenSearchIndexClient."""

    def test_create_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests creating an index with a real schema."""
        # Precondition.
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=True
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()

        # Under test.
        # Should not raise.
        test_client.create_index(mappings=mappings, settings=settings)

        # Postcondition.
        # Verify index exists.
        assert test_client.validate_index(expected_mappings=mappings) is True

    def test_delete_existing_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests deleting an existing index returns True."""
        # Precondition.
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=True
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Under test.
        # Delete should return True.
        result = test_client.delete_index()

        # Postcondition.
        assert result is True
        assert test_client.validate_index(expected_mappings=mappings) is False

    def test_delete_nonexistent_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests deleting a nonexistent index returns False."""
        # Under test.
        # Don't create index, just try to delete.
        result = test_client.delete_index()

        # Postcondition.
        assert result is False

    def test_index_exists(self, test_client: OpenSearchIndexClient) -> None:
        """Tests checking if an index exists."""
        # Precondition.
        # Index should not exist before creation.
        assert test_client.index_exists() is False

        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=True
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()

        test_client.create_index(mappings=mappings, settings=settings)

        # Under test and postcondition.
        # Index should exist after creation.
        assert test_client.index_exists() is True

    def test_validate_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests validating an index."""
        # Precondition.
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=True
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()

        # Under test and postcondition.
        # Should return False before creation.
        assert test_client.validate_index(expected_mappings=mappings) is False

        # Precondition.
        # Create index.
        test_client.create_index(mappings=mappings, settings=settings)

        # Under test and postcondition.
        # Should return True after creation.
        assert test_client.validate_index(expected_mappings=mappings) is True

    def test_put_mapping_idempotent(self, test_client: OpenSearchIndexClient) -> None:
        """Tests put_mapping with same schema is idempotent."""
        # Precondition.
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=True
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Under test.
        # Applying the same mappings again should succeed.
        test_client.put_mapping(mappings)

        # Postcondition.
        # Index should still be valid.
        assert test_client.validate_index(expected_mappings=mappings)

    def test_put_mapping_adds_new_field(
        self, test_client: OpenSearchIndexClient
    ) -> None:
        """Tests put_mapping successfully adds new fields to existing index."""
        # Precondition.
        # Create index with minimal schema (just required fields).
        initial_mappings = {
            "dynamic": "strict",
            "properties": {
                "document_id": {"type": "keyword"},
                "chunk_index": {"type": "integer"},
                "content": {"type": "text"},
                "content_vector": {
                    "type": "knn_vector",
                    "dimension": 128,
                    "method": {
                        "name": "hnsw",
                        "space_type": "cosinesimil",
                        "engine": "lucene",
                        "parameters": {"ef_construction": 512, "m": 16},
                    },
                },
            },
        }
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=initial_mappings, settings=settings)

        # Under test.
        # Add a new field using put_mapping.
        updated_mappings = {
            "properties": {
                "document_id": {"type": "keyword"},
                "chunk_index": {"type": "integer"},
                "content": {"type": "text"},
                "content_vector": {
                    "type": "knn_vector",
                    "dimension": 128,
                    "method": {
                        "name": "hnsw",
                        "space_type": "cosinesimil",
                        "engine": "lucene",
                        "parameters": {"ef_construction": 512, "m": 16},
                    },
                },
                # New field
                "new_test_field": {"type": "keyword"},
            },
        }
        # Should not raise.
        test_client.put_mapping(updated_mappings)

        # Postcondition.
        # Validate the new schema includes the new field.
        assert test_client.validate_index(expected_mappings=updated_mappings)

    def test_put_mapping_fails_on_type_change(
        self, test_client: OpenSearchIndexClient
    ) -> None:
        """Tests put_mapping fails when trying to change existing field type."""
        # Precondition.
        initial_mappings = {
            "dynamic": "strict",
            "properties": {
                "document_id": {"type": "keyword"},
                "test_field": {"type": "keyword"},
            },
        }
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=initial_mappings, settings=settings)

        # Under test and postcondition.
        # Try to change test_field type from keyword to text.
        conflicting_mappings = {
            "properties": {
                "document_id": {"type": "keyword"},
                "test_field": {"type": "text"},  # Changed from keyword to text
            },
        }
        # Should raise because field type cannot be changed.
        with pytest.raises(Exception, match="mapper|illegal_argument_exception"):
            test_client.put_mapping(conflicting_mappings)

    def test_put_mapping_on_nonexistent_index(
        self, test_client: OpenSearchIndexClient
    ) -> None:
        """Tests put_mapping on non-existent index raises an error."""
        # Precondition.
        # Index does not exist yet.
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=True
        )

        # Under test and postcondition.
        with pytest.raises(Exception, match="index_not_found_exception|404"):
            test_client.put_mapping(mappings)

    def test_create_duplicate_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests creating an index twice raises an error."""
        # Precondition.
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=True
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        # Create once - should succeed.
        test_client.create_index(mappings=mappings, settings=settings)

        # Under test and postcondition.
        # Create again - should raise.
        with pytest.raises(Exception, match="already exists"):
            test_client.create_index(mappings=mappings, settings=settings)

    def test_update_settings(self, test_client: OpenSearchIndexClient) -> None:
        """Tests that update_settings raises NotImplementedError."""
        # Under test and postcondition.
        with pytest.raises(NotImplementedError):
            test_client.update_settings(settings={})

    def test_create_and_delete_search_pipeline(
        self, test_client: OpenSearchIndexClient
    ) -> None:
        """Tests creating and deleting a search pipeline."""
        # Precondition.
        pipeline_name, pipeline_config = get_normalization_pipeline_name_and_config()

        # Under test and postcondition.
        # Should not raise.
        test_client.create_search_pipeline(
            pipeline_id=pipeline_name,
            pipeline_body=pipeline_config,
        )

        # Under test and postcondition.
        # Should not raise.
        test_client.delete_search_pipeline(pipeline_id=pipeline_name)

    def test_index_document(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests indexing a document."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        doc = _create_test_document_chunk(
            document_id="test-doc-1",
            chunk_index=0,
            content="Test content for indexing",
            tenant_state=tenant_state,
        )

        # Under test and postcondition.
        # Should not raise.
        test_client.index_document(document=doc, tenant_state=tenant_state)
        # Should not raise if we supply update_if_exists.
        test_client.index_document(
            document=doc, tenant_state=tenant_state, update_if_exists=True
        )

    def test_bulk_index_documents(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests bulk indexing documents."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        docs = [
            _create_test_document_chunk(
                document_id=f"test-doc-{i}",
                chunk_index=i,
                content=f"Test content for indexing {i}",
                tenant_state=tenant_state,
            )
            for i in range(500)
        ]

        # Under test and postcondition.
        # Should not raise.
        test_client.bulk_index_documents(documents=docs, tenant_state=tenant_state)
        # Should not raise if we supply update_if_exists.
        test_client.bulk_index_documents(
            documents=docs, tenant_state=tenant_state, update_if_exists=True
        )

    def test_index_duplicate_document(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests indexing a duplicate document raises an error."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        doc = _create_test_document_chunk(
            document_id="test-doc-duplicate",
            chunk_index=0,
            content="Duplicate test",
            tenant_state=tenant_state,
        )

        # Index once - should succeed.
        test_client.index_document(document=doc, tenant_state=tenant_state)

        # Under test and postcondition.
        # Index again - should raise.
        with pytest.raises(Exception, match="already exists"):
            test_client.index_document(document=doc, tenant_state=tenant_state)

    def test_get_document(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests getting a document."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        original_doc = _create_test_document_chunk(
            document_id="test-doc-get",
            chunk_index=0,
            content="Content to retrieve",
            tenant_state=tenant_state,
            # We only store second precision, so to make sure asserts work in
            # this test we'll deliberately lose some precision.
            last_updated=datetime.now(timezone.utc).replace(microsecond=0),
        )
        test_client.index_document(document=original_doc, tenant_state=tenant_state)

        # Under test.
        doc_chunk_id = get_opensearch_doc_chunk_id(
            tenant_state=tenant_state,
            document_id=original_doc.document_id,
            chunk_index=original_doc.chunk_index,
            max_chunk_size=original_doc.max_chunk_size,
        )
        retrieved_doc = test_client.get_document(document_chunk_id=doc_chunk_id)

        # Postcondition.
        assert retrieved_doc == original_doc

    def test_get_nonexistent_document(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests getting a nonexistent document raises an error."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=False
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Under test and postcondition.
        with pytest.raises(Exception, match="404"):
            test_client.get_document(
                document_chunk_id="test_source__nonexistent__512__0"
            )

    def test_delete_existing_document(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests deleting an existing document returns True."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        doc = _create_test_document_chunk(
            document_id="test-doc-delete",
            chunk_index=0,
            content="Content to delete",
            tenant_state=tenant_state,
        )
        test_client.index_document(document=doc, tenant_state=tenant_state)

        # Under test.
        doc_chunk_id = get_opensearch_doc_chunk_id(
            tenant_state=tenant_state,
            document_id=doc.document_id,
            chunk_index=doc.chunk_index,
            max_chunk_size=doc.max_chunk_size,
        )
        result = test_client.delete_document(document_chunk_id=doc_chunk_id)

        # Postcondition.
        assert result is True
        # Verify the document is gone.
        with pytest.raises(NotFoundError, match="404"):
            test_client.get_document(document_chunk_id=doc_chunk_id)

    def test_delete_nonexistent_document(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests deleting a nonexistent document returns False."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Under test.
        result = test_client.delete_document(
            document_chunk_id="test_source__nonexistent__512__0"
        )

        # Postcondition.
        assert result is False

    def test_delete_by_query(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests deleting documents by query."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Index multiple documents.
        docs_to_delete = [
            _create_test_document_chunk(
                document_id="delete-me",
                chunk_index=i,
                content=f"Delete this {i}",
                tenant_state=tenant_state,
            )
            for i in range(3)
        ]
        docs_to_keep = [
            _create_test_document_chunk(
                document_id="keep-me",
                chunk_index=0,
                content="Keep this",
                tenant_state=tenant_state,
            )
        ]

        for doc in docs_to_delete + docs_to_keep:
            test_client.index_document(document=doc, tenant_state=tenant_state)
        test_client.refresh_index()

        query_body = DocumentQuery.delete_from_document_id_query(
            document_id="delete-me",
            tenant_state=tenant_state,
        )

        # Under test.
        num_deleted = test_client.delete_by_query(query_body=query_body)

        # Postcondition.
        assert num_deleted == 3

        # Verify deletion - the deleted documents should no longer exist.
        test_client.refresh_index()
        search_query = DocumentQuery.get_from_document_id_query(
            document_id="delete-me",
            tenant_state=tenant_state,
            index_filters=IndexFilters(access_control_list=None, tenant_id=None),
            include_hidden=False,
            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
            min_chunk_index=None,
            max_chunk_index=None,
            get_full_document=False,
        )
        remaining_ids = test_client.search_for_document_ids(body=search_query)
        assert len(remaining_ids) == 0

        # Verify other documents still exist.
        keep_query = DocumentQuery.get_from_document_id_query(
            document_id="keep-me",
            tenant_state=tenant_state,
            index_filters=IndexFilters(access_control_list=None, tenant_id=None),
            include_hidden=False,
            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
            min_chunk_index=None,
            max_chunk_index=None,
            get_full_document=False,
        )
        keep_ids = test_client.search_for_document_ids(body=keep_query)
        assert len(keep_ids) == 1

    def test_update_document(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests updating a document's properties."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Create a document to update.
        doc = _create_test_document_chunk(
            document_id="test-doc-update",
            chunk_index=0,
            content="Original content",
            tenant_state=tenant_state,
            hidden=False,
        )
        test_client.index_document(document=doc, tenant_state=tenant_state)

        # Under test.
        doc_chunk_id = get_opensearch_doc_chunk_id(
            tenant_state=tenant_state,
            document_id=doc.document_id,
            chunk_index=doc.chunk_index,
            max_chunk_size=doc.max_chunk_size,
        )
        properties_to_update = {
            "hidden": True,
            "global_boost": 5,
        }
        test_client.update_document(
            document_chunk_id=doc_chunk_id,
            properties_to_update=properties_to_update,
        )

        # Postcondition.
        # Retrieve the document and verify updates were applied.
        updated_doc = test_client.get_document(document_chunk_id=doc_chunk_id)
        assert updated_doc.hidden is True
        assert updated_doc.global_boost == 5
        # Other properties should remain unchanged.
        assert updated_doc.document_id == doc.document_id
        assert updated_doc.content == doc.content
        assert updated_doc.public == doc.public

    def test_update_nonexistent_document(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests updating a nonexistent document raises an error."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Under test and postcondition.
        # Try to update a document that doesn't exist.
        with pytest.raises(NotFoundError, match="404"):
            test_client.update_document(
                document_chunk_id="test_source__nonexistent__512__0",
                properties_to_update={"hidden": True},
            )

    def test_hybrid_search_configurations_and_pipelines(
        self,
        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
        """Tests all hybrid search configurations and pipelines."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        _patch_opensearch_match_highlights_disabled(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)
        # Index documents.
        docs = {
            "doc-1": _create_test_document_chunk(
                document_id="doc-1",
                chunk_index=0,
                content="Python programming language tutorial",
                content_vector=_generate_test_vector(0.1),
                tenant_state=tenant_state,
            ),
            "doc-2": _create_test_document_chunk(
                document_id="doc-2",
                chunk_index=0,
                content="How to make cheese",
                content_vector=_generate_test_vector(0.2),
                tenant_state=tenant_state,
            ),
            "doc-3": _create_test_document_chunk(
                document_id="doc-3",
                chunk_index=0,
                content="C++ for newborns",
                content_vector=_generate_test_vector(0.15),
                tenant_state=tenant_state,
            ),
        }
        for doc in docs.values():
            test_client.index_document(document=doc, tenant_state=tenant_state)

        # Refresh index to make documents searchable.
        test_client.refresh_index()

        for configuration in HybridSearchSubqueryConfiguration:
            _patch_hybrid_search_subquery_configuration(monkeypatch, configuration)
            for pipeline in HybridSearchNormalizationPipeline:
                _patch_hybrid_search_normalization_pipeline(monkeypatch, pipeline)
                pipeline_name, pipeline_config = (
                    get_normalization_pipeline_name_and_config()
                )
                test_client.create_search_pipeline(
                    pipeline_id=pipeline_name,
                    pipeline_body=pipeline_config,
                )

                # Search query.
                query_text = "Python programming"
                query_vector = _generate_test_vector(0.12)
                search_body = DocumentQuery.get_hybrid_search_query(
                    query_text=query_text,
                    query_vector=query_vector,
                    num_hits=5,
                    tenant_state=tenant_state,
                    # We're not worried about filtering here. tenant_id in this object
                    # is not relevant.
                    index_filters=IndexFilters(
                        access_control_list=None, tenant_id=None
                    ),
                    include_hidden=False,
                )

                # Under test.
                results = test_client.search(
                    body=search_body, search_pipeline_id=pipeline_name
                )

                # Postcondition.
                assert len(results) == len(docs)
                # Assert that all the chunks above are present.
                assert all(
                    chunk.document_chunk.document_id in docs.keys() for chunk in results
                )
                # Make sure the chunk contents are preserved.
                for i, chunk in enumerate(results):
                    expected = docs[chunk.document_chunk.document_id]
                    assert chunk.document_chunk == DocumentChunkWithoutVectors(
                        **{
                            k: getattr(expected, k)
                            for k in DocumentChunkWithoutVectors.model_fields
                        }
                    )
                    # Make sure score reporting seems reasonable (it should not be None
                    # or 0).
                    assert chunk.score
                    # Make sure there is some kind of match highlight only for the first
                    # result. The other results are so bad they're not expected to have
                    # match highlights.
                    if i == 0:
                        assert chunk.match_highlights.get(CONTENT_FIELD_NAME, [])

    def test_search_empty_index(
        self,
        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
        """Tests search on an empty index returns an empty list."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)
        # Note no documents were indexed.

        # Search query.
        query_text = "test query"
        query_vector = _generate_test_vector(0.5)
        search_body = DocumentQuery.get_hybrid_search_query(
            query_text=query_text,
            query_vector=query_vector,
            num_hits=5,
            tenant_state=tenant_state,
            # We're not worried about filtering here. tenant_id in this object
            # is not relevant.
            index_filters=IndexFilters(access_control_list=None, tenant_id=None),
            include_hidden=False,
        )
        pipeline_name, _ = get_normalization_pipeline_name_and_config()

        # Under test.
        results = test_client.search(body=search_body, search_pipeline_id=pipeline_name)

        # Postcondition.
        assert len(results) == 0

    def test_hybrid_search_with_pipeline_and_filters(
        self,
        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
        """
        Tests search filters for ACL, hidden documents, and tenant isolation.
        """
        # Precondition.
        _patch_global_tenant_state(monkeypatch, True)
        _patch_opensearch_match_highlights_disabled(monkeypatch, False)
        tenant_x = TenantState(tenant_id="tenant-x", multitenant=True)
        tenant_y = TenantState(tenant_id="tenant-y", multitenant=True)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_x.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Index documents with different public/hidden, ACL, and tenant states.
        docs = {
            "public-doc": _create_test_document_chunk(
                document_id="public-doc",
                chunk_index=0,
                content="Public document content",
                hidden=False,
                tenant_state=tenant_x,
            ),
            "hidden-doc": _create_test_document_chunk(
                document_id="hidden-doc",
                chunk_index=0,
                content="Hidden document content, spooky",
                hidden=True,
                tenant_state=tenant_x,
            ),
            "private-doc-user-a": _create_test_document_chunk(
                document_id="private-doc-user-a",
                chunk_index=0,
                content="Private document content, btw my SSN is 123-45-6789",
                hidden=False,
                tenant_state=tenant_x,
                document_access=DocumentAccess.build(
                    user_emails=["user-a@example.com", "user-b@example.com"],
                    user_groups=[],
                    external_user_emails=[],
                    external_user_group_ids=[],
                    is_public=False,
                ),
            ),
            "private-doc-user-b": _create_test_document_chunk(
                document_id="private-doc-user-b",
                chunk_index=0,
                content="Private document content, btw my SSN is 987-65-4321",
                hidden=False,
                tenant_state=tenant_x,
                document_access=DocumentAccess.build(
                    user_emails=["user-b@example.com"],
                    user_groups=[],
                    external_user_emails=[],
                    external_user_group_ids=[],
                    is_public=False,
                ),
            ),
            "should-not-exist-from-tenant-x-pov": _create_test_document_chunk(
                document_id="should-not-exist-from-tenant-x-pov",
                chunk_index=0,
                content="This is an entirely different tenant, x should never see this",
                # Make this as permissive as possible to exercise tenant
                # isolation.
                hidden=False,
                tenant_state=tenant_y,
            ),
        }
        for doc in docs.values():
            test_client.index_document(document=doc, tenant_state=doc.tenant_id)

        # Refresh index to make documents searchable.
        test_client.refresh_index()

        query_text = "document content"
        query_vector = _generate_test_vector(0.6)
        search_body = DocumentQuery.get_hybrid_search_query(
            query_text=query_text,
            query_vector=query_vector,
            num_hits=5,
            tenant_state=tenant_x,
            # The user should only be able to see their private docs. tenant_id
            # in this object is not relevant.
            index_filters=IndexFilters(
                access_control_list=[
                    prefix_user_email("user-a@example.com"),
                    prefix_user_email("user-c@example.com"),
                ],
                tenant_id=None,
            ),
            include_hidden=False,
        )
        pipeline_name, _ = get_normalization_pipeline_name_and_config()

        # Under test.
        results = test_client.search(body=search_body, search_pipeline_id=pipeline_name)

        # Postcondition.
        # Should only get the public, non-hidden document, and the private
        # document for which the user has access.
        assert len(results) == 2
        # NOTE: This test is not explicitly testing for how well results are
        # ordered; we're just assuming which doc will be the first result here.
        assert results[0].document_chunk.document_id == "public-doc"
        # Make sure the chunk contents are preserved.
        assert results[0].document_chunk == DocumentChunkWithoutVectors(
            **{
                k: getattr(docs["public-doc"], k)
                for k in DocumentChunkWithoutVectors.model_fields
            }
        )
        # Make sure score reporting seems reasonable (it should not be None
        # or 0).
        assert results[0].score
        # Make sure there is some kind of match highlight.
        assert results[0].match_highlights.get(CONTENT_FIELD_NAME, [])
        # Same for the second result.
        assert results[1].document_chunk.document_id == "private-doc-user-a"
        assert results[1].document_chunk == DocumentChunkWithoutVectors(
            **{
                k: getattr(docs["private-doc-user-a"], k)
                for k in DocumentChunkWithoutVectors.model_fields
            }
        )
        assert results[1].score
        assert results[1].match_highlights.get(CONTENT_FIELD_NAME, [])

    def test_hybrid_search_with_pipeline_and_filters_returns_chunks_with_related_content_first(
        self,
        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
        """
        Tests search with a normalization pipeline and filters returns chunks
        with related content first.
        """
        # Precondition.
        _patch_global_tenant_state(monkeypatch, True)
        _patch_opensearch_match_highlights_disabled(monkeypatch, False)
        tenant_x = TenantState(tenant_id="tenant-x", multitenant=True)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_x.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Index documents with varying relevance to the query.
        # Vectors closer to query_vector (0.1) should rank higher.
        docs = [
            _create_test_document_chunk(
                document_id="highly-relevant",
                chunk_index=0,
                content="Artificial intelligence and machine learning transform technology",
                content_vector=_generate_test_vector(
                    0.1
                ),  # Very close to query vector.
                hidden=False,
                tenant_state=tenant_x,
            ),
            _create_test_document_chunk(
                document_id="somewhat-relevant",
                chunk_index=0,
                content="Computer programming with various languages",
                content_vector=_generate_test_vector(0.5),  # Far from query vector.
                hidden=False,
                tenant_state=tenant_x,
            ),
            _create_test_document_chunk(
                document_id="not-very-relevant",
                chunk_index=0,
                content="Cooking recipes for delicious meals",
                content_vector=_generate_test_vector(
                    0.9
                ),  # Very far from query vector.
                hidden=False,
                tenant_state=tenant_x,
            ),
            # These should be filtered out by public/hidden filters.
            _create_test_document_chunk(
                document_id="hidden-but-relevant",
                chunk_index=0,
                content="Artificial intelligence research papers",
                content_vector=_generate_test_vector(0.05),  # Very close but hidden.
                hidden=True,
                tenant_state=tenant_x,
            ),
            _create_test_document_chunk(
                document_id="private-but-relevant",
                chunk_index=0,
                content="Artificial intelligence industry analysis",
                content_vector=_generate_test_vector(0.08),  # Very close but private.
                document_access=DocumentAccess.build(
                    user_emails=[],
                    user_groups=[],
                    external_user_emails=[],
                    external_user_group_ids=[],
                    is_public=False,
                ),
                hidden=False,
                tenant_state=tenant_x,
            ),
        ]
        for doc in docs:
            test_client.index_document(document=doc, tenant_state=tenant_x)

        # Refresh index to make documents searchable.
        test_client.refresh_index()

        # Search query matching "highly-relevant" most closely.
        query_text = "artificial intelligence"
        query_vector = _generate_test_vector(0.1)
        search_body = DocumentQuery.get_hybrid_search_query(
            query_text=query_text,
            query_vector=query_vector,
            num_hits=5,
            tenant_state=tenant_x,
            # Explicitly pass in an empty list to enforce private doc filtering.
            index_filters=IndexFilters(access_control_list=[], tenant_id=None),
            include_hidden=False,
        )
        pipeline_name, _ = get_normalization_pipeline_name_and_config()

        # Under test.
        results = test_client.search(body=search_body, search_pipeline_id=pipeline_name)

        # Postcondition.
        # Should only get public, non-hidden documents (3 out of 5).
        assert len(results) == 3
        result_ids = [chunk.document_chunk.document_id for chunk in results]
        assert "highly-relevant" in result_ids
        assert "somewhat-relevant" in result_ids
        assert "not-very-relevant" in result_ids
        # Filtered out by public/hidden constraints.
        assert "hidden-but-relevant" not in result_ids
        assert "private-but-relevant" not in result_ids

        # Most relevant document should be first.
        assert results[0].document_chunk.document_id == "highly-relevant"

        # Make sure there is some kind of match highlight for the most relevant
        # result.
        match_highlights = results[0].match_highlights.get(CONTENT_FIELD_NAME, [])
        assert len(match_highlights) == 1
        # We expect the terms "Artificial" and "intelligence" to be matched.
        highlight_split = re.findall(r"<hi>(.*?)</hi>", match_highlights[0])
        assert len(highlight_split) == 2
        assert highlight_split[0] == "Artificial"
        assert highlight_split[1] == "intelligence"

        # Returned documents should be ordered by descending score.
        previous_score = float("inf")
        for result in results:
            current_score = result.score
            assert current_score
            assert current_score < previous_score
            previous_score = current_score

    def test_delete_by_query_multitenant_isolation(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """
        Tests delete_by_query respects tenant boundaries in multi-tenant mode.
        """
        # Precondition.
        _patch_global_tenant_state(monkeypatch, True)
        tenant_x = TenantState(tenant_id="tenant-x", multitenant=True)
        tenant_y = TenantState(tenant_id="tenant-y", multitenant=True)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_x.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Although very unlikely in practice, let's use the same doc ID just to
        # make sure that doesn't break the index.
        tenant_x_chunks = [
            _create_test_document_chunk(
                document_id="doc",
                chunk_index=i,
                content=f"Tenant A Chunk {i}",
                tenant_state=tenant_x,
            )
            for i in range(3)
        ]

        tenant_y_chunks = [
            _create_test_document_chunk(
                document_id="doc",
                chunk_index=i,
                content=f"Tenant B Chunk {i}",
                tenant_state=tenant_y,
            )
            for i in range(2)
        ]

        for chunk in tenant_x_chunks + tenant_y_chunks:
            test_client.index_document(document=chunk, tenant_state=chunk.tenant_id)
        test_client.refresh_index()

        # Build deletion query for tenant-x only.
        query_body = DocumentQuery.delete_from_document_id_query(
            document_id="doc",
            tenant_state=tenant_x,
        )

        # Under test.
        # Delete tenant-x chunks using delete_by_query.
        num_deleted = test_client.delete_by_query(query_body=query_body)

        # Postcondition.
        assert num_deleted == 3

        # Verify tenant-x chunks are deleted.
        test_client.refresh_index()
        verify_query_x = DocumentQuery.get_from_document_id_query(
            document_id="doc",
            tenant_state=tenant_x,
            index_filters=IndexFilters(access_control_list=None, tenant_id=None),
            include_hidden=False,
            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
            min_chunk_index=None,
            max_chunk_index=None,
            get_full_document=False,
        )
        remaining_a_ids = test_client.search_for_document_ids(body=verify_query_x)
        assert len(remaining_a_ids) == 0

        # Verify tenant-y chunks still exist.
        verify_query_y = DocumentQuery.get_from_document_id_query(
            document_id="doc",
            tenant_state=tenant_y,
            index_filters=IndexFilters(access_control_list=None, tenant_id=None),
            include_hidden=False,
            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
            min_chunk_index=None,
            max_chunk_index=None,
            get_full_document=False,
        )
        remaining_y_ids = test_client.search_for_document_ids(body=verify_query_y)
        assert len(remaining_y_ids) == 2
        expected_y_ids = {
            get_opensearch_doc_chunk_id(
                tenant_state=tenant_y,
                document_id=chunk.document_id,
                chunk_index=chunk.chunk_index,
                max_chunk_size=chunk.max_chunk_size,
            )
            for chunk in tenant_y_chunks
        }
        assert set(remaining_y_ids) == expected_y_ids

    def test_delete_by_query_nonexistent_document(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """
        Tests delete_by_query for non-existent document returns 0 deleted.
        """
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Don't index any documents.

        # Build deletion query.
        query_body = DocumentQuery.delete_from_document_id_query(
            document_id="nonexistent-doc",
            tenant_state=tenant_state,
        )

        # Under test.
        num_deleted = test_client.delete_by_query(query_body=query_body)

        # Postcondition.
        assert num_deleted == 0

    def test_search_for_document_ids(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests search_for_document_ids method returns correct chunk IDs."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Index chunks for two different documents.
        doc1_chunks = [
            _create_test_document_chunk(
                document_id="doc-1",
                chunk_index=i,
                content=f"Doc 1 Chunk {i}",
                tenant_state=tenant_state,
            )
            for i in range(3)
        ]
        doc2_chunks = [
            _create_test_document_chunk(
                document_id="doc-2",
                chunk_index=i,
                content=f"Doc 2 Chunk {i}",
                tenant_state=tenant_state,
            )
            for i in range(2)
        ]

        for chunk in doc1_chunks + doc2_chunks:
            test_client.index_document(document=chunk, tenant_state=tenant_state)
        test_client.refresh_index()

        # Build query for doc-1.
        query_body = DocumentQuery.get_from_document_id_query(
            document_id="doc-1",
            tenant_state=tenant_state,
            index_filters=IndexFilters(access_control_list=None, tenant_id=None),
            include_hidden=False,
            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
            min_chunk_index=None,
            max_chunk_index=None,
            get_full_document=False,
        )

        # Under test.
        chunk_ids = test_client.search_for_document_ids(body=query_body)

        # Postcondition.
        assert len(chunk_ids) == 3
        expected_ids = {
            get_opensearch_doc_chunk_id(
                tenant_state=tenant_state,
                document_id=chunk.document_id,
                chunk_index=chunk.chunk_index,
                max_chunk_size=chunk.max_chunk_size,
            )
            for chunk in doc1_chunks
        }
        assert set(chunk_ids) == expected_ids

    def test_search_with_no_document_access_can_retrieve_all_documents(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """
        Tests search with no document access can retrieve all documents, even
        private ones.
        """
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Index documents with different public/hidden and tenant states.
        docs = {
            "public-doc": _create_test_document_chunk(
                document_id="public-doc",
                chunk_index=0,
                content="Public document content",
                hidden=False,
                tenant_state=tenant_state,
            ),
            "hidden-doc": _create_test_document_chunk(
                document_id="hidden-doc",
                chunk_index=0,
                content="Hidden document content, spooky",
                hidden=True,
                tenant_state=tenant_state,
            ),
            "private-doc-user-a": _create_test_document_chunk(
                document_id="private-doc-user-a",
                chunk_index=0,
                content="Private document content, btw my SSN is 123-45-6789",
                hidden=False,
                tenant_state=tenant_state,
                document_access=DocumentAccess.build(
                    user_emails=["user-a@example.com"],
                    user_groups=[],
                    external_user_emails=[],
                    external_user_group_ids=[],
                    is_public=False,
                ),
            ),
        }
        for doc in docs.values():
            test_client.index_document(document=doc, tenant_state=tenant_state)

        # Refresh index to make documents searchable.
        test_client.refresh_index()

        # Build query for all documents.
        query_body = DocumentQuery.get_from_document_id_query(
            document_id="private-doc-user-a",
            tenant_state=tenant_state,
            # This is the input under test, notice None for acl.
            index_filters=IndexFilters(access_control_list=None, tenant_id=None),
            include_hidden=False,
            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
            min_chunk_index=None,
            max_chunk_index=None,
            get_full_document=False,
        )

        # Under test.
        chunk_ids = test_client.search_for_document_ids(body=query_body)

        # Postcondition.
        # Even though this doc is private, because we supplied None for acl we
        # were able to retrieve it.
        assert len(chunk_ids) == 1
        # Since this is a chunk ID, it will have the doc ID in it plus other
        # stuff we don't care about in this test.
        assert chunk_ids[0].startswith("private-doc-user-a")

    def test_time_cutoff_filter(
        self,
        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
        """Tests the time cutoff filter works."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Index docs with various ages.
        one_day_ago = datetime.now(timezone.utc) - timedelta(days=1)
        one_week_ago = datetime.now(timezone.utc) - timedelta(days=7)
        six_months_ago = datetime.now(timezone.utc) - timedelta(days=180)
        one_year_ago = datetime.now(timezone.utc) - timedelta(days=365)
        docs = [
            _create_test_document_chunk(
                document_id="one-day-ago",
                content="Good match",
                last_updated=one_day_ago,
                tenant_state=tenant_state,
            ),
            _create_test_document_chunk(
                document_id="one-year-ago",
                content="Good match",
                last_updated=one_year_ago,
                tenant_state=tenant_state,
            ),
            _create_test_document_chunk(
                document_id="no-last-updated",
                # Since we test for result ordering in the postconditions, let's
                # just make this content slightly less of a match with the query
                # so this test is not flaky from the ordering of the results.
                content="Still an ok match",
                last_updated=None,
                tenant_state=tenant_state,
            ),
        ]
        for doc in docs:
            test_client.index_document(document=doc, tenant_state=tenant_state)

        # Refresh index to make documents searchable.
        test_client.refresh_index()

        # Build query for documents updated in the last week.
        last_week_search_body = DocumentQuery.get_hybrid_search_query(
            query_text="Good match",
            query_vector=_generate_test_vector(0.1),
            num_hits=5,
            tenant_state=tenant_state,
            index_filters=IndexFilters(
                access_control_list=None, tenant_id=None, time_cutoff=one_week_ago
            ),
            include_hidden=False,
        )
        last_six_months_search_body = DocumentQuery.get_hybrid_search_query(
            query_text="Good match",
            query_vector=_generate_test_vector(0.1),
            num_hits=5,
            tenant_state=tenant_state,
            index_filters=IndexFilters(
                access_control_list=None, tenant_id=None, time_cutoff=six_months_ago
            ),
            include_hidden=False,
        )
        pipeline_name, _ = get_normalization_pipeline_name_and_config()

        # Under test.
        last_week_results = test_client.search(
            body=last_week_search_body,
            search_pipeline_id=pipeline_name,
        )
        last_six_months_results = test_client.search(
            body=last_six_months_search_body,
            search_pipeline_id=pipeline_name,
        )

        # Postcondition.
        # We expect to only get one-day-ago.
        assert len(last_week_results) == 1
        assert last_week_results[0].document_chunk.document_id == "one-day-ago"
        # We expect to get one-day-ago and no-last-updated since six months >
        # ASSUMED_DOCUMENT_AGE_DAYS.
        assert len(last_six_months_results) == 2
        assert last_six_months_results[0].document_chunk.document_id == "one-day-ago"
        assert (
            last_six_months_results[1].document_chunk.document_id == "no-last-updated"
        )

    def test_random_search(
        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests the random search query works."""
        # Precondition.
        _patch_global_tenant_state(monkeypatch, False)
        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_state.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Index chunks for two different documents, one hidden one not.
        doc1_chunks = [
            _create_test_document_chunk(
                document_id="doc-1",
                chunk_index=i,
                content=f"Doc 1 Chunk {i}",
                tenant_state=tenant_state,
                hidden=False,
            )
            for i in range(3)
        ]
        doc2_chunks = [
            _create_test_document_chunk(
                document_id="doc-2",
                chunk_index=i,
                content=f"Doc 2 Chunk {i}",
                tenant_state=tenant_state,
                hidden=True,
            )
            for i in range(2)
        ]

        for chunk in doc1_chunks + doc2_chunks:
            test_client.index_document(document=chunk, tenant_state=tenant_state)
        test_client.refresh_index()

        # Build query.
        query_body = DocumentQuery.get_random_search_query(
            tenant_state=tenant_state,
            index_filters=IndexFilters(
                access_control_list=None, tenant_id=tenant_state.tenant_id
            ),
            num_to_retrieve=3,
        )

        # Under test.
        results = test_client.search(body=query_body, search_pipeline_id=None)

        # Postcondition.
        assert len(results) == 3
        assert set(result.document_chunk.chunk_index for result in results) == set(
            [0, 1, 2]
        )
        for result in results:
            # Note each result must be from doc 1, which is not hidden.
            expected_result = doc1_chunks[result.document_chunk.chunk_index]
            assert result.document_chunk == DocumentChunkWithoutVectors(
                **{
                    k: getattr(expected_result, k)
                    for k in DocumentChunkWithoutVectors.model_fields
                }
            )

    def test_keyword_search(
        self,
        test_client: OpenSearchIndexClient,
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
        """
        Tests keyword search with filters for ACL, hidden documents, and tenant
        isolation.
        """
        # Precondition.
        _patch_global_tenant_state(monkeypatch, True)
        _patch_opensearch_match_highlights_disabled(monkeypatch, False)
        tenant_x = TenantState(tenant_id="tenant-x", multitenant=True)
        tenant_y = TenantState(tenant_id="tenant-y", multitenant=True)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_x.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Index documents with different public/hidden, ACL, and tenant states.
        docs = {
            "public-doc": _create_test_document_chunk(
                document_id="public-doc",
                chunk_index=0,
                content="Public document content",
                hidden=False,
                tenant_state=tenant_x,
            ),
            "hidden-doc": _create_test_document_chunk(
                document_id="hidden-doc",
                chunk_index=0,
                content="Hidden document content, spooky",
                hidden=True,
                tenant_state=tenant_x,
            ),
            "private-doc-user-a": _create_test_document_chunk(
                document_id="private-doc-user-a",
                chunk_index=0,
                content="Private document content, btw my SSN is 123-45-6789",
                hidden=False,
                tenant_state=tenant_x,
                document_access=DocumentAccess.build(
                    user_emails=["user-a@example.com", "user-b@example.com"],
                    user_groups=[],
                    external_user_emails=[],
                    external_user_group_ids=[],
                    is_public=False,
                ),
            ),
            # Tests that we don't return documents that don't match keywords at
            # all, even if they match filters.
            "private-but-not-relevant-doc-user-a": _create_test_document_chunk(
                document_id="private-but-not-relevant-doc-user-a",
                chunk_index=0,
                content="This text should not match the query at all",
                hidden=False,
                tenant_state=tenant_x,
                document_access=DocumentAccess.build(
                    user_emails=["user-a@example.com"],
                    user_groups=[],
                    external_user_emails=[],
                    external_user_group_ids=[],
                    is_public=False,
                ),
            ),
            "private-doc-user-b": _create_test_document_chunk(
                document_id="private-doc-user-b",
                chunk_index=0,
                content="Private document content, btw my SSN is 987-65-4321",
                hidden=False,
                tenant_state=tenant_x,
                document_access=DocumentAccess.build(
                    user_emails=["user-b@example.com"],
                    user_groups=[],
                    external_user_emails=[],
                    external_user_group_ids=[],
                    is_public=False,
                ),
            ),
            "should-not-exist-from-tenant-x-pov": _create_test_document_chunk(
                document_id="should-not-exist-from-tenant-x-pov",
                chunk_index=0,
                content="This is an entirely different tenant, x should never see this",
                # Make this as permissive as possible to exercise tenant
                # isolation.
                hidden=False,
                tenant_state=tenant_y,
            ),
        }
        for doc in docs.values():
            test_client.index_document(document=doc, tenant_state=doc.tenant_id)

        # Refresh index to make documents searchable.
        test_client.refresh_index()

        # Should not match private-but-not-relevant-doc-user-a.
        query_text = "document content"
        search_body = DocumentQuery.get_keyword_search_query(
            query_text=query_text,
            num_hits=5,
            tenant_state=tenant_x,
            # The user should only be able to see their private docs. tenant_id
            # in this object is not relevant.
            index_filters=IndexFilters(
                access_control_list=[
                    prefix_user_email("user-a@example.com"),
                    prefix_user_email("user-c@example.com"),
                ],
                tenant_id=None,
            ),
            include_hidden=False,
        )

        # Under test.
        results = test_client.search(body=search_body, search_pipeline_id=None)

        # Postcondition.
        # Should only get the public, non-hidden document, and the private
        # document for which the user has access.
        assert len(results) == 2
        # This should be the highest-ranked result, as a higher percentage of
        # the content matches the query.
        assert results[0].document_chunk.document_id == "public-doc"
        # Make sure the chunk contents are preserved.
        assert results[0].document_chunk == DocumentChunkWithoutVectors(
            **{
                k: getattr(docs["public-doc"], k)
                for k in DocumentChunkWithoutVectors.model_fields
            }
        )
        # Make sure score reporting seems reasonable (it should not be None
        # or 0).
        assert results[0].score
        # Make sure there is some kind of match highlight.
        assert results[0].match_highlights.get(CONTENT_FIELD_NAME, [])
        # Same for the second result.
        assert results[1].document_chunk.document_id == "private-doc-user-a"
        assert results[1].document_chunk == DocumentChunkWithoutVectors(
            **{
                k: getattr(docs["private-doc-user-a"], k)
                for k in DocumentChunkWithoutVectors.model_fields
            }
        )
        assert results[1].score
        assert results[1].match_highlights.get(CONTENT_FIELD_NAME, [])
        assert results[1].score < results[0].score

    def test_semantic_search(
        self,
        test_client: OpenSearchIndexClient,
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
        """
        Tests semantic search with filters for ACL, hidden documents, and tenant
        isolation.
        """
        # Precondition.
        _patch_global_tenant_state(monkeypatch, True)
        tenant_x = TenantState(tenant_id="tenant-x", multitenant=True)
        tenant_y = TenantState(tenant_id="tenant-y", multitenant=True)
        mappings = DocumentSchema.get_document_schema(
            vector_dimension=128, multitenant=tenant_x.multitenant
        )
        settings = DocumentSchema.get_index_settings_based_on_environment()
        test_client.create_index(mappings=mappings, settings=settings)

        # Index documents with different public/hidden, ACL, and tenant states.
        docs = {
            "public-doc": _create_test_document_chunk(
                document_id="public-doc",
                chunk_index=0,
                content="Public document content",
                hidden=False,
                tenant_state=tenant_x,
                # Make this identical to the query vector to test that this
                # result is returned first.
                content_vector=_generate_test_vector(0.6),
            ),
            "hidden-doc": _create_test_document_chunk(
                document_id="hidden-doc",
                chunk_index=0,
                content="Hidden document content, spooky",
                hidden=True,
                tenant_state=tenant_x,
            ),
            "private-doc-user-a": _create_test_document_chunk(
                document_id="private-doc-user-a",
                chunk_index=0,
                content="Private document content, btw my SSN is 123-45-6789",
                hidden=False,
                tenant_state=tenant_x,
                document_access=DocumentAccess.build(
                    user_emails=["user-a@example.com", "user-b@example.com"],
                    user_groups=[],
                    external_user_emails=[],
                    external_user_group_ids=[],
                    is_public=False,
                ),
                # Make this different from the query vector to test that this
                # result is returned second.
                content_vector=_generate_test_vector(0.5),
            ),
            "private-doc-user-b": _create_test_document_chunk(
                document_id="private-doc-user-b",
                chunk_index=0,
                content="Private document content, btw my SSN is 987-65-4321",
                hidden=False,
                tenant_state=tenant_x,
                document_access=DocumentAccess.build(
                    user_emails=["user-b@example.com"],
                    user_groups=[],
                    external_user_emails=[],
                    external_user_group_ids=[],
                    is_public=False,
                ),
            ),
            "should-not-exist-from-tenant-x-pov": _create_test_document_chunk(
                document_id="should-not-exist-from-tenant-x-pov",
                chunk_index=0,
                content="This is an entirely different tenant, x should never see this",
                # Make this as permissive as possible to exercise tenant
                # isolation.
                hidden=False,
                tenant_state=tenant_y,
            ),
        }
        for doc in docs.values():
            test_client.index_document(document=doc, tenant_state=doc.tenant_id)

        # Refresh index to make documents searchable.
        test_client.refresh_index()

        query_vector = _generate_test_vector(0.6)
        search_body = DocumentQuery.get_semantic_search_query(
            query_embedding=query_vector,
            num_hits=5,
            tenant_state=tenant_x,
            # The user should only be able to see their private docs. tenant_id
            # in this object is not relevant.
            index_filters=IndexFilters(
                access_control_list=[
                    prefix_user_email("user-a@example.com"),
                    prefix_user_email("user-c@example.com"),
                ],
                tenant_id=None,
            ),
            include_hidden=False,
        )

        # Under test.
        results = test_client.search(body=search_body, search_pipeline_id=None)

        # Postcondition.
        # Should only get the public, non-hidden document, and the private
        # document for which the user has access.
        assert len(results) == 2
        # We explicitly expect this to be the highest-ranked result.
        assert results[0].document_chunk.document_id == "public-doc"
        # Make sure the chunk contents are preserved.
        assert results[0].document_chunk == DocumentChunkWithoutVectors(
            **{
                k: getattr(docs["public-doc"], k)
                for k in DocumentChunkWithoutVectors.model_fields
            }
        )
        assert results[0].score == 1.0
        # Same for the second result.
        assert results[1].document_chunk.document_id == "private-doc-user-a"
        assert results[1].document_chunk == DocumentChunkWithoutVectors(
            **{
                k: getattr(docs["private-doc-user-a"], k)
                for k in DocumentChunkWithoutVectors.model_fields
            }
        )
        assert results[1].score
        assert 0.0 < results[1].score < 1.0


================================================
FILE: backend/tests/external_dependency_unit/opensearch_migration/test_opensearch_migration_tasks.py
================================================
"""External dependency tests for OpenSearch migration celery tasks.

These tests require Postgres, Redis, Vespa, and OpenSearch to be running.

WARNING: As with all external dependency tests, do not run them against a
database with data you care about. Your data will be destroyed.
"""

import json
from collections.abc import Generator
from copy import deepcopy
from datetime import datetime
from typing import Any
from unittest.mock import Mock
from unittest.mock import patch

import pytest
from sqlalchemy.orm import Session

from onyx.background.celery.tasks.opensearch_migration.constants import (
    GET_VESPA_CHUNKS_SLICE_COUNT,
)
from onyx.background.celery.tasks.opensearch_migration.tasks import (
    is_continuation_token_done_for_all_slices,
)
from onyx.background.celery.tasks.opensearch_migration.tasks import (
    migrate_chunks_from_vespa_to_opensearch_task,
)
from onyx.background.celery.tasks.opensearch_migration.transformer import (
    transform_vespa_chunks_to_opensearch_chunks,
)
from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.configs.constants import SOURCE_TYPE
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import Document
from onyx.db.models import OpenSearchDocumentMigrationRecord
from onyx.db.models import OpenSearchTenantMigrationRecord
from onyx.db.opensearch_migration import build_sanitized_to_original_doc_id_mapping
from onyx.db.search_settings import get_active_search_settings
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.client import OpenSearchClient
from onyx.document_index.opensearch.client import OpenSearchIndexClient
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.search import DocumentQuery
from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import BLURB
from onyx.document_index.vespa_constants import BOOST
from onyx.document_index.vespa_constants import CHUNK_CONTEXT
from onyx.document_index.vespa_constants import CHUNK_ID
from onyx.document_index.vespa_constants import CONTENT
from onyx.document_index.vespa_constants import DOC_SUMMARY
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_ID
from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import EMBEDDINGS
from onyx.document_index.vespa_constants import FULL_CHUNK_EMBEDDING_KEY
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import METADATA_SUFFIX
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import TITLE_EMBEDDING
from onyx.document_index.vespa_constants import USER_PROJECT
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
from tests.external_dependency_unit.full_setup import ensure_full_deployment_setup


CHUNK_COUNT = 5


def _get_document_chunks_from_opensearch(
    opensearch_client: OpenSearchIndexClient,
    document_id: str,
    tenant_state: TenantState,
) -> list[DocumentChunk]:
    opensearch_client.refresh_index()
    results: list[DocumentChunk] = []
    for i in range(CHUNK_COUNT):
        document_chunk_id: str = get_opensearch_doc_chunk_id(
            tenant_state=tenant_state,
            document_id=document_id,
            chunk_index=i,
            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
        )
        result = opensearch_client.get_document(document_chunk_id)
        results.append(result)
    return results


def _delete_document_chunks_from_opensearch(
    opensearch_client: OpenSearchIndexClient, document_id: str, current_tenant_id: str
) -> None:
    opensearch_client.refresh_index()
    query_body = DocumentQuery.delete_from_document_id_query(
        document_id=document_id,
        tenant_state=TenantState(tenant_id=current_tenant_id, multitenant=False),
    )
    opensearch_client.delete_by_query(query_body)


def _generate_test_vector(dim: int) -> list[float]:
    """Generate a deterministic test embedding vector."""
    return [0.1 + (i * 0.001) for i in range(dim)]


def _insert_test_documents_with_commit(
    db_session: Session,
    document_ids: list[str],
) -> list[Document]:
    """Creates test Document records in Postgres."""
    documents = [
        Document(
            id=document_id,
            semantic_id=document_id,
            chunk_count=CHUNK_COUNT,
        )
        for document_id in document_ids
    ]
    db_session.add_all(documents)
    db_session.commit()
    return documents


def _delete_test_documents_with_commit(
    db_session: Session,
    documents: list[Document],
) -> None:
    """Deletes test Document records from Postgres."""
    for document in documents:
        db_session.delete(document)
    db_session.commit()


def _insert_test_migration_records_with_commit(
    db_session: Session,
    migration_records: list[OpenSearchDocumentMigrationRecord],
) -> None:
    db_session.add_all(migration_records)
    db_session.commit()


def _create_raw_document_chunk(
    document_id: str,
    chunk_index: int,
    content: str,
    embedding: list[float],
    now: datetime,
    title: str | None = None,
    title_embedding: list[float] | None = None,
) -> dict[str, Any]:
    return {
        DOCUMENT_ID: document_id,
        CHUNK_ID: chunk_index,
        CONTENT: content,
        EMBEDDINGS: {FULL_CHUNK_EMBEDDING_KEY: embedding},
        TITLE: title,
        TITLE_EMBEDDING: title_embedding,
        SOURCE_TYPE: "test source type",
        METADATA_LIST: ["stuff=things"],
        DOC_UPDATED_AT: int(now.timestamp()),
        HIDDEN: False,
        BOOST: 1,
        SEMANTIC_IDENTIFIER: "test semantic identifier",
        IMAGE_FILE_NAME: "test.png",
        SOURCE_LINKS: "https://test.com",
        BLURB: "test blurb",
        DOC_SUMMARY: "test doc summary",
        CHUNK_CONTEXT: "test chunk context",
        METADATA_SUFFIX: "test metadata suffix",
        DOCUMENT_SETS: {"test document set": 1},
        USER_PROJECT: [1],
        PRIMARY_OWNERS: ["test primary owner"],
        SECONDARY_OWNERS: ["test secondary owner"],
        ACCESS_CONTROL_LIST: {PUBLIC_DOC_PAT: 1, "test user": 1},
    }


def _assert_chunk_matches_vespa_chunk(
    opensearch_chunk: DocumentChunk,
    vespa_chunk: dict[str, Any],
) -> None:
    assert opensearch_chunk.document_id == vespa_chunk[DOCUMENT_ID]
    assert opensearch_chunk.chunk_index == vespa_chunk[CHUNK_ID]
    assert opensearch_chunk.content == vespa_chunk[CONTENT]
    assert opensearch_chunk.content_vector == pytest.approx(
        vespa_chunk[EMBEDDINGS][FULL_CHUNK_EMBEDDING_KEY]
    )
    assert opensearch_chunk.title == vespa_chunk[TITLE]
    assert opensearch_chunk.title_vector == pytest.approx(vespa_chunk[TITLE_EMBEDDING])
    assert opensearch_chunk.source_type == vespa_chunk[SOURCE_TYPE]
    assert opensearch_chunk.metadata_list == vespa_chunk[METADATA_LIST]
    assert (
        opensearch_chunk.last_updated is not None
        and int(opensearch_chunk.last_updated.timestamp())
        == vespa_chunk[DOC_UPDATED_AT]
    )
    assert opensearch_chunk.public == vespa_chunk[ACCESS_CONTROL_LIST][PUBLIC_DOC_PAT]
    assert opensearch_chunk.access_control_list == [
        access_control
        for access_control in vespa_chunk[ACCESS_CONTROL_LIST]
        if access_control != PUBLIC_DOC_PAT
    ]
    assert opensearch_chunk.hidden == vespa_chunk[HIDDEN]
    assert opensearch_chunk.global_boost == vespa_chunk[BOOST]
    assert opensearch_chunk.semantic_identifier == vespa_chunk[SEMANTIC_IDENTIFIER]
    assert opensearch_chunk.image_file_id == vespa_chunk[IMAGE_FILE_NAME]
    assert opensearch_chunk.source_links == vespa_chunk[SOURCE_LINKS]
    assert opensearch_chunk.blurb == vespa_chunk[BLURB]
    assert opensearch_chunk.doc_summary == vespa_chunk[DOC_SUMMARY]
    assert opensearch_chunk.chunk_context == vespa_chunk[CHUNK_CONTEXT]
    assert opensearch_chunk.metadata_suffix == vespa_chunk[METADATA_SUFFIX]
    assert opensearch_chunk.document_sets == [
        doc_set for doc_set in vespa_chunk[DOCUMENT_SETS]
    ]
    assert opensearch_chunk.user_projects == vespa_chunk[USER_PROJECT]
    assert opensearch_chunk.primary_owners == vespa_chunk[PRIMARY_OWNERS]
    assert opensearch_chunk.secondary_owners == vespa_chunk[SECONDARY_OWNERS]


@pytest.fixture(scope="module")
def full_deployment_setup() -> Generator[None, None, None]:
    """Optional fixture to perform full deployment-like setup on demand.

    Imports and calls
    tests.external_dependency_unit.startup.full_setup.ensure_full_deployment_setup
    to initialize Postgres defaults, Vespa indices, and seed initial docs.

    NOTE: We deliberately duplicate this logic from
    backend/tests/external_dependency_unit/conftest.py because we need to set
    opensearch_available just for this module, not the entire test session.

    TODO(ENG-3764)(andrei): Consolidate some of these test fixtures.
    """
    # Patch ENABLE_OPENSEARCH_INDEXING_FOR_ONYX just for this test because we
    # don't yet want that enabled for all tests.
    # TODO(andrei): Remove this once CI enables OpenSearch for all tests.
    with (
        patch(
            "onyx.configs.app_configs.ENABLE_OPENSEARCH_INDEXING_FOR_ONYX",
            True,
        ),
        patch("onyx.document_index.factory.ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", True),
    ):
        ensure_full_deployment_setup(opensearch_available=True)
        yield  # Test runs here.


@pytest.fixture(scope="module")
def db_session(
    full_deployment_setup: None,  # noqa: ARG001
) -> Generator[Session, None, None]:
    """
    NOTE: We deliberately duplicate this logic from
    backend/tests/external_dependency_unit/conftest.py because we need a
    module-level fixture whereas the fixture in that file is function-level. I
    don't want to change it in this change to not risk inadvertently breaking
    things.
    """
    with get_session_with_current_tenant() as session:
        yield session  # Test runs here.


@pytest.fixture(scope="module")
def vespa_document_index(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
) -> Generator[VespaDocumentIndex, None, None]:
    """Creates a Vespa document index for the test tenant."""
    active = get_active_search_settings(db_session)
    yield VespaDocumentIndex(
        index_name=active.primary.index_name,
        tenant_state=TenantState(tenant_id=get_current_tenant_id(), multitenant=False),
        large_chunks_enabled=False,
    )  # Test runs here.


@pytest.fixture(scope="module")
def opensearch_client(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
) -> Generator[OpenSearchIndexClient, None, None]:
    """Creates an OpenSearch client for the test tenant."""
    active = get_active_search_settings(db_session)
    yield OpenSearchIndexClient(index_name=active.primary.index_name)  # Test runs here.


@pytest.fixture(scope="module")
def opensearch_available(
    opensearch_client: OpenSearchClient,
) -> Generator[None, None, None]:
    """Verifies OpenSearch is running, fails the test if not."""
    if not wait_for_opensearch_with_timeout(client=opensearch_client):
        pytest.fail("OpenSearch is not available.")
    yield  # Test runs here.


@pytest.fixture(scope="module")
def vespa_available(
    full_deployment_setup: None,  # noqa: ARG001
) -> Generator[None, None, None]:
    """Verifies Vespa is running, fails the test if not."""
    # Try 90 seconds for testing in CI.
    if not wait_for_vespa_with_timeout(wait_limit=90):
        pytest.fail("Vespa is not available.")
    yield  # Test runs here.


@pytest.fixture(scope="module")
def test_embedding_dimension(db_session: Session) -> Generator[int, None, None]:
    active = get_active_search_settings(db_session)
    yield active.primary.model_dim  # Test runs here.


@pytest.fixture(scope="function")
def patch_get_vespa_chunks_page_size() -> Generator[int, None, None]:
    test_page_size = 5
    with (
        patch(
            "onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
            test_page_size,
        ),
        patch(
            "onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE",
            test_page_size,
        ),
    ):
        yield test_page_size  # Test runs here.


@pytest.fixture(scope="function")
def test_documents(
    db_session: Session,
    vespa_document_index: VespaDocumentIndex,
    opensearch_client: OpenSearchIndexClient,
    patch_get_vespa_chunks_page_size: int,
) -> Generator[list[Document], None, None]:
    """
    Creates and cleans test Document records in Postgres and the document
    indices.
    """
    # We use a large number of documents >
    # get_all_raw_document_chunks_paginated's page_size argument in the task.
    documents_to_create = patch_get_vespa_chunks_page_size * 2
    doc_ids = [f"test_doc_{i}" for i in range(documents_to_create)]
    documents = _insert_test_documents_with_commit(db_session, doc_ids)

    # NOTE: chunk_count must be passed because index_raw_chunks uses the "new"
    # chunk ID system (get_uuid_from_chunk_info). Without chunk_count, delete()
    # falls back to the "old" system (get_uuid_from_chunk_info_old) and won't
    # find/delete the chunks.
    for document in documents:
        vespa_document_index.delete(document.id, chunk_count=CHUNK_COUNT)

    for document in documents:
        _delete_document_chunks_from_opensearch(
            opensearch_client, document.id, get_current_tenant_id()
        )

    yield documents  # Test runs here.

    # Cleanup.
    for document in documents:
        _delete_document_chunks_from_opensearch(
            opensearch_client, document.id, get_current_tenant_id()
        )

    for document in documents:
        vespa_document_index.delete(document.id, chunk_count=CHUNK_COUNT)

    _delete_test_documents_with_commit(db_session, documents)


@pytest.fixture(scope="function")
def clean_migration_tables(db_session: Session) -> Generator[None, None, None]:
    """Cleans up migration-related tables before and after each test."""
    # Clean before test.
    db_session.query(OpenSearchDocumentMigrationRecord).delete()
    db_session.query(OpenSearchTenantMigrationRecord).delete()
    db_session.commit()

    yield  # Test runs here.

    # Clean after test.
    db_session.query(OpenSearchDocumentMigrationRecord).delete()
    db_session.query(OpenSearchTenantMigrationRecord).delete()
    db_session.commit()


@pytest.fixture(scope="function")
def enable_opensearch_indexing_for_onyx() -> Generator[None, None, None]:
    with patch(
        "onyx.background.celery.tasks.opensearch_migration.tasks.ENABLE_OPENSEARCH_INDEXING_FOR_ONYX",
        True,
    ):
        yield  # Test runs here.


@pytest.fixture(scope="function")
def disable_opensearch_indexing_for_onyx() -> Generator[None, None, None]:
    with patch(
        "onyx.background.celery.tasks.opensearch_migration.tasks.ENABLE_OPENSEARCH_INDEXING_FOR_ONYX",
        False,
    ):
        yield  # Test runs here.


class TestMigrateChunksFromVespaToOpenSearchTask:
    """Tests migrate_chunks_from_vespa_to_opensearch_task."""

    def test_chunk_migration_completes_successfully(
        self,
        db_session: Session,
        test_documents: list[Document],
        vespa_document_index: VespaDocumentIndex,
        opensearch_client: OpenSearchIndexClient,
        test_embedding_dimension: int,
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
    ) -> None:
        """
        Tests that all chunks are migrated from Vespa to OpenSearch.
        """
        # Precondition.
        # Index chunks into Vespa.
        document_chunks: dict[str, list[dict[str, Any]]] = {
            document.id: [
                _create_raw_document_chunk(
                    document_id=document.id,
                    chunk_index=i,
                    content=f"Test content {i} for {document.id}",
                    embedding=_generate_test_vector(test_embedding_dimension),
                    now=datetime.now(),
                    title=f"Test title {document.id}",
                    title_embedding=_generate_test_vector(test_embedding_dimension),
                )
                for i in range(CHUNK_COUNT)
            ]
            for document in test_documents
        }
        all_chunks: list[dict[str, Any]] = []
        for chunks in document_chunks.values():
            all_chunks.extend(chunks)
        vespa_document_index.index_raw_chunks(all_chunks)
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
        )

        # Under test.
        result = migrate_chunks_from_vespa_to_opensearch_task(
            tenant_id=tenant_state.tenant_id
        )

        # Postcondition.
        assert result is True
        # Expire the session cache to see the committed changes from the task.
        db_session.expire_all()
        # Verify tenant migration record was updated.
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        assert tenant_record.total_chunks_migrated == len(all_chunks)
        # Visit is complete so continuation token should be None.
        assert tenant_record.vespa_visit_continuation_token is not None
        assert is_continuation_token_done_for_all_slices(
            json.loads(tenant_record.vespa_visit_continuation_token)
        )
        assert tenant_record.migration_completed_at is not None
        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)

        # Verify chunks were indexed in OpenSearch.
        for document in test_documents:
            opensearch_chunks = _get_document_chunks_from_opensearch(
                opensearch_client, document.id, tenant_state
            )
            assert len(opensearch_chunks) == CHUNK_COUNT
            opensearch_chunks.sort(key=lambda x: x.chunk_index)
            for opensearch_chunk in opensearch_chunks:
                _assert_chunk_matches_vespa_chunk(
                    opensearch_chunk,
                    document_chunks[document.id][opensearch_chunk.chunk_index],
                )

    def test_chunk_migration_resumes_from_continuation_token(
        self,
        db_session: Session,
        test_documents: list[Document],
        vespa_document_index: VespaDocumentIndex,
        opensearch_client: OpenSearchIndexClient,
        test_embedding_dimension: int,
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
    ) -> None:
        """Tests that chunk migration resumes from a saved continuation token.

        Simulates task time running out my mocking the locking behavior.
        """
        # Precondition.
        # Index chunks into Vespa.
        document_chunks: dict[str, list[dict[str, Any]]] = {
            document.id: [
                _create_raw_document_chunk(
                    document_id=document.id,
                    chunk_index=i,
                    content=f"Test content {i} for {document.id}",
                    embedding=_generate_test_vector(test_embedding_dimension),
                    now=datetime.now(),
                    title=f"Test title {document.id}",
                    title_embedding=_generate_test_vector(test_embedding_dimension),
                )
                for i in range(CHUNK_COUNT)
            ]
            for document in test_documents
        }
        all_chunks: list[dict[str, Any]] = []
        for chunks in document_chunks.values():
            all_chunks.extend(chunks)
        vespa_document_index.index_raw_chunks(all_chunks)
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
        )

        # Run the initial batch. To simulate partial progress we will mock the
        # redis lock to return True for the first invocation of .owned() and
        # False subsequently.
        mock_redis_client = Mock()
        mock_lock = Mock()
        mock_lock.owned.side_effect = [True, False, False]
        mock_lock.acquire.return_value = True
        mock_redis_client.lock.return_value = mock_lock
        with patch(
            "onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client",
            return_value=mock_redis_client,
        ):
            result_1 = migrate_chunks_from_vespa_to_opensearch_task(
                tenant_id=tenant_state.tenant_id
            )

        assert result_1 is True
        # Expire the session cache to see the committed changes from the task.
        db_session.expire_all()

        # Verify partial progress was saved.
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        partial_chunks_migrated = tenant_record.total_chunks_migrated
        assert partial_chunks_migrated > 0
        assert tenant_record.vespa_visit_continuation_token is not None
        # Slices are not necessarily evenly distributed across all document
        # chunks so we can't test that every token is non-None, but certainly at
        # least one must be.
        assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())
        assert tenant_record.migration_completed_at is None
        assert tenant_record.approx_chunk_count_in_vespa is not None

        # Under test.
        # Run the remainder of the migration.
        result_2 = migrate_chunks_from_vespa_to_opensearch_task(
            tenant_id=tenant_state.tenant_id
        )

        # Postcondition.
        assert result_2 is True
        # Expire the session cache to see the committed changes from the task.
        db_session.expire_all()

        # Verify completion.
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        assert tenant_record.total_chunks_migrated > partial_chunks_migrated
        assert tenant_record.total_chunks_migrated == len(all_chunks)
        # Visit is complete so continuation token should be None.
        assert tenant_record.vespa_visit_continuation_token is not None
        assert is_continuation_token_done_for_all_slices(
            json.loads(tenant_record.vespa_visit_continuation_token)
        )
        assert tenant_record.migration_completed_at is not None
        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)

        # Verify chunks were indexed in OpenSearch.
        for document in test_documents:
            opensearch_chunks = _get_document_chunks_from_opensearch(
                opensearch_client, document.id, tenant_state
            )
            assert len(opensearch_chunks) == CHUNK_COUNT
            opensearch_chunks.sort(key=lambda x: x.chunk_index)
            for opensearch_chunk in opensearch_chunks:
                _assert_chunk_matches_vespa_chunk(
                    opensearch_chunk,
                    document_chunks[document.id][opensearch_chunk.chunk_index],
                )

    def test_chunk_migration_visits_all_chunks_even_when_batch_size_varies(
        self,
        db_session: Session,
        test_documents: list[Document],
        vespa_document_index: VespaDocumentIndex,
        opensearch_client: OpenSearchIndexClient,
        test_embedding_dimension: int,
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
    ) -> None:
        """
        Tests that chunk migration works correctly even when the batch size
        changes halfway through a migration.

        Simulates task time running out my mocking the locking behavior.
        """
        # Precondition.
        # Index chunks into Vespa.
        document_chunks: dict[str, list[dict[str, Any]]] = {
            document.id: [
                _create_raw_document_chunk(
                    document_id=document.id,
                    chunk_index=i,
                    content=f"Test content {i} for {document.id}",
                    embedding=_generate_test_vector(test_embedding_dimension),
                    now=datetime.now(),
                    title=f"Test title {document.id}",
                    title_embedding=_generate_test_vector(test_embedding_dimension),
                )
                for i in range(CHUNK_COUNT)
            ]
            for document in test_documents
        }
        all_chunks: list[dict[str, Any]] = []
        for chunks in document_chunks.values():
            all_chunks.extend(chunks)
        vespa_document_index.index_raw_chunks(all_chunks)
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
        )

        # Run the initial batch. To simulate partial progress we will mock the
        # redis lock to return True for the first invocation of .owned() and
        # False subsequently.
        # NOTE: The batch size is currently set to 5 in
        # patch_get_vespa_chunks_page_size.
        mock_redis_client = Mock()
        mock_lock = Mock()
        mock_lock.owned.side_effect = [True, False, False]
        mock_lock.acquire.return_value = True
        mock_redis_client.lock.return_value = mock_lock
        with patch(
            "onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client",
            return_value=mock_redis_client,
        ):
            result_1 = migrate_chunks_from_vespa_to_opensearch_task(
                tenant_id=tenant_state.tenant_id
            )

        assert result_1 is True
        # Expire the session cache to see the committed changes from the task.
        db_session.expire_all()

        # Verify partial progress was saved.
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        partial_chunks_migrated = tenant_record.total_chunks_migrated
        assert partial_chunks_migrated > 0
        # page_size applies per slice, so one iteration can fetch up to
        # page_size * GET_VESPA_CHUNKS_SLICE_COUNT chunks total.
        assert partial_chunks_migrated <= 5 * GET_VESPA_CHUNKS_SLICE_COUNT
        assert tenant_record.vespa_visit_continuation_token is not None
        # Slices are not necessarily evenly distributed across all document
        # chunks so we can't test that every token is non-None, but certainly at
        # least one must be.
        assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())
        assert tenant_record.migration_completed_at is None
        assert tenant_record.approx_chunk_count_in_vespa is not None

        # Under test.
        # Now patch the batch size to be some other number, like 2.
        mock_redis_client = Mock()
        mock_lock = Mock()
        mock_lock.owned.side_effect = [True, False, False]
        mock_lock.acquire.return_value = True
        mock_redis_client.lock.return_value = mock_lock
        with (
            patch(
                "onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
                2,
            ),
            patch(
                "onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE",
                2,
            ),
            patch(
                "onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client",
                return_value=mock_redis_client,
            ),
        ):
            result_2 = migrate_chunks_from_vespa_to_opensearch_task(
                tenant_id=tenant_state.tenant_id
            )

        # Postcondition.
        assert result_2 is True
        # Expire the session cache to see the committed changes from the task.
        db_session.expire_all()

        # Verify next partial progress was saved.
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        new_partial_chunks_migrated = tenant_record.total_chunks_migrated
        assert new_partial_chunks_migrated > partial_chunks_migrated
        # page_size applies per slice, so one iteration can fetch up to
        # page_size * GET_VESPA_CHUNKS_SLICE_COUNT chunks total.
        assert new_partial_chunks_migrated <= (5 + 2) * GET_VESPA_CHUNKS_SLICE_COUNT
        assert tenant_record.vespa_visit_continuation_token is not None
        # Slices are not necessarily evenly distributed across all document
        # chunks so we can't test that every token is non-None, but certainly at
        # least one must be.
        assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())
        assert tenant_record.migration_completed_at is None
        assert tenant_record.approx_chunk_count_in_vespa is not None

        # Under test.
        # Run the remainder of the migration.
        with (
            patch(
                "onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
                2,
            ),
            patch(
                "onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE",
                2,
            ),
        ):
            result_3 = migrate_chunks_from_vespa_to_opensearch_task(
                tenant_id=tenant_state.tenant_id
            )

        # Postcondition.
        assert result_3 is True
        # Expire the session cache to see the committed changes from the task.
        db_session.expire_all()

        # Verify completion.
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        assert tenant_record.total_chunks_migrated > new_partial_chunks_migrated
        assert tenant_record.total_chunks_migrated == len(all_chunks)
        # Visit is complete so continuation token should be None.
        assert tenant_record.vespa_visit_continuation_token is not None
        assert is_continuation_token_done_for_all_slices(
            json.loads(tenant_record.vespa_visit_continuation_token)
        )
        assert tenant_record.migration_completed_at is not None
        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)

        # Verify chunks were indexed in OpenSearch.
        for document in test_documents:
            opensearch_chunks = _get_document_chunks_from_opensearch(
                opensearch_client, document.id, tenant_state
            )
            assert len(opensearch_chunks) == CHUNK_COUNT
            opensearch_chunks.sort(key=lambda x: x.chunk_index)
            for opensearch_chunk in opensearch_chunks:
                _assert_chunk_matches_vespa_chunk(
                    opensearch_chunk,
                    document_chunks[document.id][opensearch_chunk.chunk_index],
                )

    def test_chunk_migration_empty_vespa(
        self,
        db_session: Session,
        # Get this just to ensure Vespa is clean from previous test runs.
        test_documents: list[Document],  # noqa: ARG002
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
    ) -> None:
        """
        Tests that chunk migration completes without error when Vespa is empty.
        """
        # Under test.
        # No chunks in Vespa.
        result = migrate_chunks_from_vespa_to_opensearch_task(
            tenant_id=get_current_tenant_id()
        )

        # Postcondition.
        assert result is True
        db_session.expire_all()
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        assert tenant_record.total_chunks_migrated == 0
        # Visit is complete so continuation token should be marked as done for all slices.
        assert tenant_record.vespa_visit_continuation_token is not None
        assert is_continuation_token_done_for_all_slices(
            json.loads(tenant_record.vespa_visit_continuation_token)
        )
        # Mark migration as completed even for empty Vespa.
        assert tenant_record.migration_completed_at is not None
        assert tenant_record.approx_chunk_count_in_vespa == 0

    def test_chunk_migration_updates_existing_chunks(
        self,
        db_session: Session,
        test_documents: list[Document],
        vespa_document_index: VespaDocumentIndex,
        opensearch_client: OpenSearchIndexClient,
        test_embedding_dimension: int,
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
    ) -> None:
        """
        Tests that the migration task updates existing chunks in OpenSearch if
        they already exist.

        Chunks existing in the index is not a failure mode as the document may
        have been dual indexed. Since dual indexing indexes into Vespa first, we
        can assume that the state of the chunk we want to migrate is the most
        up-to-date.
        """
        # Precondition.
        # Index chunks into Vespa.
        document_chunks: dict[str, list[dict[str, Any]]] = {
            document.id: [
                _create_raw_document_chunk(
                    document_id=document.id,
                    chunk_index=i,
                    content=f"Test content {i} for {document.id}",
                    embedding=_generate_test_vector(test_embedding_dimension),
                    now=datetime.now(),
                    title=f"Test title {document.id}",
                    title_embedding=_generate_test_vector(test_embedding_dimension),
                )
                for i in range(CHUNK_COUNT)
            ]
            for document in test_documents
        }
        all_chunks: list[dict[str, Any]] = []
        for chunks in document_chunks.values():
            all_chunks.extend(chunks)
        vespa_document_index.index_raw_chunks(all_chunks)
        # Index the first document into OpenSearch with some different content.
        document_in_opensearch = deepcopy(document_chunks[test_documents[0].id])
        for chunk in document_in_opensearch:
            chunk["content"] = (
                f"Different content {chunk[CHUNK_ID]} for {test_documents[0].id}"
            )
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
        )
        chunks_for_document_in_opensearch, _ = (
            transform_vespa_chunks_to_opensearch_chunks(
                document_in_opensearch,
                tenant_state,
                {},
            )
        )
        opensearch_client.bulk_index_documents(
            documents=chunks_for_document_in_opensearch,
            tenant_state=tenant_state,
            update_if_exists=True,
        )

        # Under test.
        result = migrate_chunks_from_vespa_to_opensearch_task(
            tenant_id=tenant_state.tenant_id
        )

        # Postcondition.
        assert result is True
        # Expire the session cache to see the committed changes from the task.
        db_session.expire_all()
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        assert tenant_record.total_chunks_migrated == len(all_chunks)
        # Visit is complete so continuation token should be None.
        assert tenant_record.vespa_visit_continuation_token is not None
        assert is_continuation_token_done_for_all_slices(
            json.loads(tenant_record.vespa_visit_continuation_token)
        )
        assert tenant_record.migration_completed_at is not None
        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)

        # Verify chunks were indexed in OpenSearch.
        for document in test_documents:
            opensearch_chunks = _get_document_chunks_from_opensearch(
                opensearch_client, document.id, tenant_state
            )
            assert len(opensearch_chunks) == CHUNK_COUNT
            opensearch_chunks.sort(key=lambda x: x.chunk_index)
            for opensearch_chunk in opensearch_chunks:
                _assert_chunk_matches_vespa_chunk(
                    opensearch_chunk,
                    document_chunks[document.id][opensearch_chunk.chunk_index],
                )

    def test_chunk_migration_noops_when_migration_is_complete(
        self,
        db_session: Session,
        test_documents: list[Document],
        vespa_document_index: VespaDocumentIndex,
        opensearch_client: OpenSearchIndexClient,
        test_embedding_dimension: int,
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
    ) -> None:
        """
        Tests that the migration task no-ops when the migration is complete.
        """
        # Precondition.
        # Index chunks into Vespa.
        document_chunks: dict[str, list[dict[str, Any]]] = {
            document.id: [
                _create_raw_document_chunk(
                    document_id=document.id,
                    chunk_index=i,
                    content=f"Test content {i} for {document.id}",
                    embedding=_generate_test_vector(test_embedding_dimension),
                    now=datetime.now(),
                    title=f"Test title {document.id}",
                    title_embedding=_generate_test_vector(test_embedding_dimension),
                )
                for i in range(CHUNK_COUNT)
            ]
            for document in test_documents
        }
        all_chunks: list[dict[str, Any]] = []
        for chunks in document_chunks.values():
            all_chunks.extend(chunks)
        vespa_document_index.index_raw_chunks(all_chunks)
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
        )

        # Under test.
        # First run.
        result_1 = migrate_chunks_from_vespa_to_opensearch_task(
            tenant_id=tenant_state.tenant_id
        )

        # Postcondition.
        assert result_1 is True
        # Expire the session cache to see the committed changes from the task.
        db_session.expire_all()
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        assert tenant_record.total_chunks_migrated == len(all_chunks)
        # Visit is complete so continuation token should be None.
        assert tenant_record.vespa_visit_continuation_token is not None
        assert is_continuation_token_done_for_all_slices(
            json.loads(tenant_record.vespa_visit_continuation_token)
        )
        assert tenant_record.migration_completed_at is not None
        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)

        # Verify chunks were indexed in OpenSearch.
        for document in test_documents:
            opensearch_chunks = _get_document_chunks_from_opensearch(
                opensearch_client, document.id, tenant_state
            )
            assert len(opensearch_chunks) == CHUNK_COUNT
            opensearch_chunks.sort(key=lambda x: x.chunk_index)
            for opensearch_chunk in opensearch_chunks:
                _assert_chunk_matches_vespa_chunk(
                    opensearch_chunk,
                    document_chunks[document.id][opensearch_chunk.chunk_index],
                )

        # Under test.
        # Second run.
        result_2 = migrate_chunks_from_vespa_to_opensearch_task(
            tenant_id=tenant_state.tenant_id
        )

        # Postcondition.
        assert result_2 is True
        # Expire the session cache to see the committed changes from the task.
        db_session.expire_all()
        # This all should be unchanged.
        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
        assert tenant_record is not None
        assert tenant_record.total_chunks_migrated == len(all_chunks)
        # Visit is complete so continuation token should be None.
        assert tenant_record.vespa_visit_continuation_token is not None
        assert is_continuation_token_done_for_all_slices(
            json.loads(tenant_record.vespa_visit_continuation_token)
        )
        assert tenant_record.migration_completed_at is not None
        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)

        # Verify chunks were indexed in OpenSearch.
        for document in test_documents:
            opensearch_chunks = _get_document_chunks_from_opensearch(
                opensearch_client, document.id, tenant_state
            )
            assert len(opensearch_chunks) == CHUNK_COUNT
            opensearch_chunks.sort(key=lambda x: x.chunk_index)
            for opensearch_chunk in opensearch_chunks:
                _assert_chunk_matches_vespa_chunk(
                    opensearch_chunk,
                    document_chunks[document.id][opensearch_chunk.chunk_index],
                )

    def test_returns_none_when_feature_disabled(
        self,
        disable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
    ) -> None:
        """Tests that task returns None when feature is disabled."""
        # Under test.
        result = migrate_chunks_from_vespa_to_opensearch_task(
            tenant_id=get_current_tenant_id()
        )

        # Postcondition.
        assert result is None

    def test_vespa_get_chunk_count(
        self,
        vespa_document_index: VespaDocumentIndex,
        test_embedding_dimension: int,
    ) -> None:
        """
        Tests that the VespaDocumentIndex.get_chunk_count() method returns the
        correct number of chunks.
        """
        # Precondition.
        # Index chunks into Vespa.
        all_chunks = [
            _create_raw_document_chunk(
                document_id="test_doc_1",
                chunk_index=i,
                content=f"Test content {i} for test_doc_1",
                embedding=_generate_test_vector(test_embedding_dimension),
                now=datetime.now(),
                title=f"Test title {i}",
                title_embedding=_generate_test_vector(test_embedding_dimension),
            )
            for i in range(500)
        ]
        vespa_document_index.index_raw_chunks(all_chunks)

        # Under test.
        chunk_count = vespa_document_index.get_chunk_count()

        # Postcondition.
        assert chunk_count == len(all_chunks)


class TestSanitizedDocIdResolution:
    """Tests document ID resolution functions."""

    def test_resolve_sanitized_document_ids_batch_normal(
        self,
        db_session: Session,
        test_documents: list[Document],  # noqa: ARG002
    ) -> None:
        """
        Tests batch resolution for normal document IDs (no sanitization needed).
        """
        # Under test.
        result = build_sanitized_to_original_doc_id_mapping(db_session)

        # Postcondition.
        # Since we expect no IDs in test_documents to need sanitization, the
        # result should be empty.
        assert not result

    def test_resolve_sanitized_document_ids_batch_with_quotes(
        self,
        db_session: Session,
    ) -> None:
        """Tests batch resolution for a document ID containing single quotes."""
        # Precondition.
        # Create a document with a single quote in its ID.
        original_id = "test_doc_with'quote"
        sanitized_id = "test_doc_with_quote"
        document = Document(
            id=original_id,
            semantic_id=original_id,
            chunk_count=1,
        )
        try:
            db_session.add(document)
            db_session.commit()

            # Under test.
            result = build_sanitized_to_original_doc_id_mapping(db_session)

            # Postcondition.
            assert len(result) == 1
            # The sanitized version should map to the original.
            assert sanitized_id in result
            assert result[sanitized_id] == original_id

        finally:
            _delete_test_documents_with_commit(db_session, [document])

    def test_raises_when_sanitized_id_matches_another_document(
        self,
        db_session: Session,
    ) -> None:
        """
        Tests that the function raises when a sanitized ID matches another
        document's original ID.
        """
        # Precondition.
        # Create a document with a single quote in its ID, and another document
        # with that string as its ID.
        original_id = "test_doc_with'quote"
        sanitized_id = "test_doc_with_quote"
        document_bad = Document(
            id=original_id,
            semantic_id=original_id,
            chunk_count=1,
        )
        document_fine = Document(
            id=sanitized_id,
            semantic_id=sanitized_id,
            chunk_count=1,
        )
        try:
            db_session.add(document_bad)
            db_session.add(document_fine)
            db_session.commit()

            # Under test.
            with pytest.raises(RuntimeError):
                build_sanitized_to_original_doc_id_mapping(db_session)

        finally:
            _delete_test_documents_with_commit(
                db_session, [document_bad, document_fine]
            )


================================================
FILE: backend/tests/external_dependency_unit/permission_sync/test_doc_permission_sync_attempt.py
================================================
"""
Test suite for DocPermissionSyncAttempt CRUD operations.

Tests the basic CRUD operations for document permission sync attempts,
including creation, status updates, progress tracking, and querying.
"""

from datetime import datetime
from datetime import timezone

import pytest
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import PermissionSyncStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.permission_sync_attempt import complete_doc_permission_sync_attempt
from onyx.db.permission_sync_attempt import create_doc_permission_sync_attempt
from onyx.db.permission_sync_attempt import get_doc_permission_sync_attempt
from onyx.db.permission_sync_attempt import (
    get_recent_doc_permission_sync_attempts_for_cc_pair,
)
from onyx.db.permission_sync_attempt import mark_doc_permission_sync_attempt_failed
from onyx.db.permission_sync_attempt import (
    mark_doc_permission_sync_attempt_in_progress,
)
from tests.external_dependency_unit.conftest import create_test_user


def _create_test_connector_credential_pair(
    db_session: Session, source: DocumentSource = DocumentSource.GOOGLE_DRIVE
) -> ConnectorCredentialPair:
    """Create a test connector credential pair for testing."""
    user = create_test_user(db_session, "test_user")

    connector = Connector(
        name=f"Test {source.value} Connector",
        source=source,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={},
        refresh_freq=None,
        prune_freq=None,
        indexing_start=datetime.now(timezone.utc),
    )
    db_session.add(connector)
    db_session.flush()

    credential = Credential(
        credential_json={},
        user_id=user.id,
        admin_public=True,
    )
    db_session.add(credential)
    db_session.flush()
    # Expire the credential so it reloads from DB with SensitiveValue wrapper
    db_session.expire(credential)

    cc_pair = ConnectorCredentialPair(
        connector_id=connector.id,
        credential_id=credential.id,
        name="Test CC Pair",
        status=ConnectorCredentialPairStatus.ACTIVE,
        access_type=AccessType.PUBLIC,
    )
    db_session.add(cc_pair)
    db_session.commit()

    return cc_pair


class TestDocPermissionSyncAttempt:
    def test_create_doc_permission_sync_attempt(self, db_session: Session) -> None:
        """Test creating a new doc permission sync attempt."""
        cc_pair = _create_test_connector_credential_pair(db_session)

        attempt_id = create_doc_permission_sync_attempt(
            connector_credential_pair_id=cc_pair.id,
            db_session=db_session,
        )

        assert attempt_id is not None
        assert isinstance(attempt_id, int)

        # Verify the attempt was created with correct defaults
        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.connector_credential_pair_id == cc_pair.id
        assert attempt.status == PermissionSyncStatus.NOT_STARTED
        assert attempt.total_docs_synced == 0
        assert attempt.docs_with_permission_errors == 0
        assert attempt.time_started is None
        assert attempt.time_finished is None
        assert attempt.time_created is not None

    def test_get_doc_permission_sync_attempt(self, db_session: Session) -> None:
        """Test retrieving a doc permission sync attempt by ID."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)

        # Test basic retrieval
        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.id == attempt_id

        # Test with eager loading
        attempt_with_connector = get_doc_permission_sync_attempt(
            db_session, attempt_id, eager_load_connector=True
        )
        assert attempt_with_connector is not None
        assert attempt_with_connector.connector_credential_pair is not None
        assert attempt_with_connector.connector_credential_pair.id == cc_pair.id

        # Test non-existent ID
        non_existent_attempt = get_doc_permission_sync_attempt(db_session, 99999)
        assert non_existent_attempt is None

    def test_mark_doc_permission_sync_attempt_in_progress(
        self, db_session: Session
    ) -> None:
        """Test marking a doc permission sync attempt as in progress."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)

        # Mark as in progress
        updated_attempt = mark_doc_permission_sync_attempt_in_progress(
            attempt_id, db_session
        )

        assert updated_attempt.status == PermissionSyncStatus.IN_PROGRESS
        assert updated_attempt.time_started is not None
        assert updated_attempt.time_finished is None

        # Verify it fails if already in progress
        with pytest.raises(RuntimeError, match="not in NOT_STARTED status"):
            mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)

    def test_mark_doc_permission_sync_attempt_failed(self, db_session: Session) -> None:
        """Test marking a doc permission sync attempt as failed."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)

        # Mark as failed with error message (should work even without starting)
        error_msg = "Sync process crashed unexpectedly"
        mark_doc_permission_sync_attempt_failed(
            attempt_id, db_session, error_message=error_msg
        )

        # Verify the status and timestamps
        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.status == PermissionSyncStatus.FAILED
        assert attempt.time_started is not None
        assert attempt.time_finished is not None
        assert attempt.error_message == error_msg

    def test_get_recent_doc_permission_sync_attempts_for_cc_pair(
        self, db_session: Session
    ) -> None:
        """Test retrieving recent doc permission sync attempts for a connector credential pair."""
        cc_pair = _create_test_connector_credential_pair(db_session)

        # Create multiple attempts
        attempt_ids = []
        for i in range(5):
            attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)
            attempt_ids.append(attempt_id)

        # Get recent attempts
        recent_attempts = get_recent_doc_permission_sync_attempts_for_cc_pair(
            cc_pair_id=cc_pair.id,
            limit=3,
            db_session=db_session,
        )

        assert len(recent_attempts) == 3

        # Verify they are ordered by time_created descending (most recent first)
        for i in range(len(recent_attempts) - 1):
            assert (
                recent_attempts[i].time_created >= recent_attempts[i + 1].time_created
            )

        # Verify they all belong to the correct cc_pair
        for attempt in recent_attempts:
            assert attempt.connector_credential_pair_id == cc_pair.id

        # Test with different cc_pair (should return empty)
        other_cc_pair = _create_test_connector_credential_pair(
            db_session, source=DocumentSource.SLACK
        )
        other_attempts = get_recent_doc_permission_sync_attempts_for_cc_pair(
            cc_pair_id=other_cc_pair.id,
            limit=10,
            db_session=db_session,
        )
        assert len(other_attempts) == 0

    def test_status_enum_methods(self, db_session: Session) -> None:
        """Test the status enum helper methods."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)

        # Test NOT_STARTED status
        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert not attempt.status.is_terminal()
        assert not attempt.status.is_successful()

        # Test IN_PROGRESS status
        mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)
        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert not attempt.status.is_terminal()
        assert not attempt.status.is_successful()

        # Test SUCCESS status via complete function
        complete_doc_permission_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_docs_synced=100,
            docs_with_permission_errors=0,
        )
        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.status.is_terminal()
        assert attempt.status.is_successful()

        # Test FAILED status (create new attempt)
        failed_attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)
        mark_doc_permission_sync_attempt_failed(
            failed_attempt_id, db_session, error_message="Test failure"
        )
        failed_attempt = get_doc_permission_sync_attempt(db_session, failed_attempt_id)
        assert failed_attempt is not None
        assert failed_attempt.status.is_terminal()
        assert not failed_attempt.status.is_successful()

        # Test COMPLETED_WITH_ERRORS status via complete function (create new attempt)
        error_attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)
        mark_doc_permission_sync_attempt_in_progress(error_attempt_id, db_session)
        complete_doc_permission_sync_attempt(
            db_session=db_session,
            attempt_id=error_attempt_id,
            total_docs_synced=100,
            docs_with_permission_errors=10,
        )
        error_attempt = get_doc_permission_sync_attempt(db_session, error_attempt_id)
        assert error_attempt is not None
        assert error_attempt.status.is_terminal()
        assert (
            error_attempt.status.is_successful()
        )  # Completed with errors is still "successful"

    def test_complete_doc_permission_sync_attempt_success(
        self, db_session: Session
    ) -> None:
        """Test completing a doc permission sync attempt without errors."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)

        # Mark as in progress first
        mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)

        # Complete without errors
        completed_attempt = complete_doc_permission_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_docs_synced=100,
            docs_with_permission_errors=0,
        )

        assert completed_attempt.status == PermissionSyncStatus.SUCCESS
        assert completed_attempt.total_docs_synced == 100
        assert completed_attempt.docs_with_permission_errors == 0
        assert completed_attempt.time_finished is not None

    def test_complete_doc_permission_sync_attempt_with_errors(
        self, db_session: Session
    ) -> None:
        """Test completing a doc permission sync attempt with errors."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)

        # Mark as in progress first
        mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)

        # Complete with errors
        completed_attempt = complete_doc_permission_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_docs_synced=100,
            docs_with_permission_errors=15,
        )

        assert completed_attempt.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS
        assert completed_attempt.total_docs_synced == 100
        assert completed_attempt.docs_with_permission_errors == 15
        assert completed_attempt.time_finished is not None

    def test_complete_doc_permission_sync_attempt_can_be_called_multiple_times(
        self, db_session: Session
    ) -> None:
        """Test that complete can be called multiple times if needed (accumulates correctly)."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)

        # Mark as in progress
        mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)

        # Complete once
        first_complete = complete_doc_permission_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_docs_synced=50,
            docs_with_permission_errors=5,
        )

        # Verify first completion
        assert first_complete.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS
        assert first_complete.total_docs_synced == 50
        assert first_complete.docs_with_permission_errors == 5
        assert first_complete.time_finished is not None

        # Call complete again (simulating additional batch processing)
        second_complete = complete_doc_permission_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_docs_synced=50,
            docs_with_permission_errors=10,
        )

        # Should accumulate progress from both calls
        assert second_complete.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS
        assert second_complete.total_docs_synced == 100
        assert second_complete.docs_with_permission_errors == 15
        assert second_complete.time_finished is not None


================================================
FILE: backend/tests/external_dependency_unit/permission_sync/test_external_group_permission_sync_attempt.py
================================================
"""
Test suite for ExternalGroupPermissionSyncAttempt CRUD operations.

Tests the basic CRUD operations for external group permission sync attempts,
including creation, status updates, progress tracking, and querying.
Supports both connector-specific and global group sync attempts.
"""

from datetime import datetime
from datetime import timezone

import pytest
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import PermissionSyncStatus
from onyx.db.models import Connector
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import ExternalGroupPermissionSyncAttempt
from onyx.db.permission_sync_attempt import (
    complete_external_group_sync_attempt,
)
from onyx.db.permission_sync_attempt import (
    create_external_group_sync_attempt,
)
from onyx.db.permission_sync_attempt import (
    get_external_group_sync_attempt,
)
from onyx.db.permission_sync_attempt import (
    get_recent_external_group_sync_attempts_for_cc_pair,
)
from onyx.db.permission_sync_attempt import (
    mark_external_group_sync_attempt_failed,
)
from onyx.db.permission_sync_attempt import (
    mark_external_group_sync_attempt_in_progress,
)
from tests.external_dependency_unit.conftest import create_test_user


def _create_test_connector_credential_pair(
    db_session: Session, source: DocumentSource = DocumentSource.GOOGLE_DRIVE
) -> ConnectorCredentialPair:
    """Create a test connector credential pair for testing."""
    user = create_test_user(db_session, "test_user")

    connector = Connector(
        name=f"Test {source.value} Connector",
        source=source,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={},
        refresh_freq=None,
        prune_freq=None,
        indexing_start=datetime.now(timezone.utc),
    )
    db_session.add(connector)
    db_session.flush()

    credential = Credential(
        credential_json={},
        user_id=user.id,
        admin_public=True,
    )
    db_session.add(credential)
    db_session.flush()
    # Expire the credential so it reloads from DB with SensitiveValue wrapper
    db_session.expire(credential)

    cc_pair = ConnectorCredentialPair(
        connector_id=connector.id,
        credential_id=credential.id,
        name="Test CC Pair",
        status=ConnectorCredentialPairStatus.ACTIVE,
        access_type=AccessType.PUBLIC,
    )
    db_session.add(cc_pair)
    db_session.commit()

    return cc_pair


def _cleanup_global_external_group_sync_attempts(db_session: Session) -> None:
    """Clean up any existing global external group sync attempts from previous test runs."""
    # Delete all global attempts (where connector_credential_pair_id is None)
    db_session.query(ExternalGroupPermissionSyncAttempt).filter(
        ExternalGroupPermissionSyncAttempt.connector_credential_pair_id.is_(None)
    ).delete()
    db_session.commit()


class TestExternalGroupPermissionSyncAttempt:
    def test_create_external_group_sync_attempt_with_cc_pair(
        self, db_session: Session
    ) -> None:
        """Test creating a new external group sync attempt for a specific connector."""
        cc_pair = _create_test_connector_credential_pair(db_session)

        attempt_id = create_external_group_sync_attempt(
            connector_credential_pair_id=cc_pair.id,
            db_session=db_session,
        )

        assert attempt_id is not None
        assert isinstance(attempt_id, int)

        # Verify the attempt was created with correct defaults
        attempt = get_external_group_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.connector_credential_pair_id == cc_pair.id
        assert attempt.status == PermissionSyncStatus.NOT_STARTED
        assert attempt.total_users_processed == 0
        assert attempt.total_groups_processed == 0
        assert attempt.total_group_memberships_synced == 0
        assert attempt.time_started is None
        assert attempt.time_finished is None
        assert attempt.time_created is not None

    def test_create_global_external_group_sync_attempt(
        self, db_session: Session
    ) -> None:
        """Test creating a new global external group sync attempt."""
        attempt_id = create_external_group_sync_attempt(
            connector_credential_pair_id=None,  # Global sync
            db_session=db_session,
        )

        assert attempt_id is not None
        assert isinstance(attempt_id, int)

        # Verify the attempt was created as global
        attempt = get_external_group_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.connector_credential_pair_id is None
        assert attempt.status == PermissionSyncStatus.NOT_STARTED

    def test_get_external_group_sync_attempt(self, db_session: Session) -> None:
        """Test retrieving an external group sync attempt by ID."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)

        # Test basic retrieval
        attempt = get_external_group_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.id == attempt_id

        # Test with eager loading
        attempt_with_connector = get_external_group_sync_attempt(
            db_session, attempt_id, eager_load_connector=True
        )
        assert attempt_with_connector is not None
        assert attempt_with_connector.connector_credential_pair is not None
        assert attempt_with_connector.connector_credential_pair.id == cc_pair.id

        # Test non-existent ID
        non_existent_attempt = get_external_group_sync_attempt(db_session, 99999)
        assert non_existent_attempt is None

    def test_mark_external_group_sync_attempt_in_progress(
        self, db_session: Session
    ) -> None:
        """Test marking an external group sync attempt as in progress."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)

        # Mark as in progress
        updated_attempt = mark_external_group_sync_attempt_in_progress(
            attempt_id, db_session
        )

        assert updated_attempt.status == PermissionSyncStatus.IN_PROGRESS
        assert updated_attempt.time_started is not None
        assert updated_attempt.time_finished is None

        # Verify it fails if already in progress
        with pytest.raises(RuntimeError, match="not in NOT_STARTED status"):
            mark_external_group_sync_attempt_in_progress(attempt_id, db_session)

    def test_mark_external_group_sync_attempt_failed(self, db_session: Session) -> None:
        """Test marking an external group sync attempt as failed."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)

        # Mark as failed with error message (should work even without starting)
        error_msg_1 = "External group sync service unavailable"
        mark_external_group_sync_attempt_failed(
            attempt_id, db_session, error_message=error_msg_1
        )

        # Verify the status and timestamps
        attempt = get_external_group_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.status == PermissionSyncStatus.FAILED
        assert attempt.time_started is not None  # Should be set if not already set
        assert attempt.time_finished is not None
        assert attempt.error_message == error_msg_1

        # Test with error message
        attempt_id_2 = create_external_group_sync_attempt(cc_pair.id, db_session)
        error_msg = "Connection timeout to external service"
        mark_external_group_sync_attempt_failed(
            attempt_id_2, db_session, error_message=error_msg
        )

        # Verify the error message was stored
        attempt_2 = get_external_group_sync_attempt(db_session, attempt_id_2)
        assert attempt_2 is not None
        assert attempt_2.status == PermissionSyncStatus.FAILED
        assert attempt_2.error_message == error_msg

    def test_get_recent_external_group_sync_attempts_for_cc_pair(
        self, db_session: Session
    ) -> None:
        """Test retrieving recent external group sync attempts for a connector credential pair."""
        cc_pair = _create_test_connector_credential_pair(db_session)

        # Create multiple attempts for the cc_pair
        attempt_ids = []
        for i in range(5):
            attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)
            attempt_ids.append(attempt_id)

        # Get recent attempts
        recent_attempts = get_recent_external_group_sync_attempts_for_cc_pair(
            cc_pair_id=cc_pair.id,
            limit=3,
            db_session=db_session,
        )

        assert len(recent_attempts) == 3

        # Verify they are ordered by time_created descending (most recent first)
        for i in range(len(recent_attempts) - 1):
            assert (
                recent_attempts[i].time_created >= recent_attempts[i + 1].time_created
            )

        # Verify they all belong to the correct cc_pair
        for attempt in recent_attempts:
            assert attempt.connector_credential_pair_id == cc_pair.id

        # Test with different cc_pair (should return empty)
        other_cc_pair = _create_test_connector_credential_pair(
            db_session, source=DocumentSource.SLACK
        )
        other_attempts = get_recent_external_group_sync_attempts_for_cc_pair(
            cc_pair_id=other_cc_pair.id,
            limit=10,
            db_session=db_session,
        )
        assert len(other_attempts) == 0

    def test_get_recent_global_external_group_sync_attempts(
        self, db_session: Session
    ) -> None:
        """Test retrieving recent global external group sync attempts."""
        # Clean up any existing global attempts from previous test runs
        _cleanup_global_external_group_sync_attempts(db_session)

        # Create a cc_pair specific attempt
        cc_pair = _create_test_connector_credential_pair(db_session)
        create_external_group_sync_attempt(cc_pair.id, db_session)

        # Create multiple global attempts
        global_attempt_ids = []
        for i in range(3):
            attempt_id = create_external_group_sync_attempt(None, db_session)  # Global
            global_attempt_ids.append(attempt_id)

        # Get recent global attempts
        recent_global_attempts = get_recent_external_group_sync_attempts_for_cc_pair(
            cc_pair_id=None,  # Global
            limit=5,
            db_session=db_session,
        )

        assert len(recent_global_attempts) == 3

        # Verify they are all global (cc_pair_id is None)
        for attempt in recent_global_attempts:
            assert attempt.connector_credential_pair_id is None

        # Verify they are ordered by time_created descending
        for i in range(len(recent_global_attempts) - 1):
            assert (
                recent_global_attempts[i].time_created
                >= recent_global_attempts[i + 1].time_created
            )

    def test_status_enum_methods(self, db_session: Session) -> None:
        """Test the status enum helper methods."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)

        # Test NOT_STARTED status
        attempt = get_external_group_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert not attempt.status.is_terminal()
        assert not attempt.status.is_successful()

        # Test IN_PROGRESS status
        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)
        attempt = get_external_group_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert not attempt.status.is_terminal()
        assert not attempt.status.is_successful()

        # Test SUCCESS status via complete function
        complete_external_group_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_users_processed=100,
            total_groups_processed=10,
            total_group_memberships_synced=500,
            errors_encountered=0,
        )
        attempt = get_external_group_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.status.is_terminal()
        assert attempt.status.is_successful()

        # Test FAILED status (create new attempt)
        failed_attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)
        mark_external_group_sync_attempt_failed(
            failed_attempt_id, db_session, error_message="Test failure"
        )
        failed_attempt = get_external_group_sync_attempt(db_session, failed_attempt_id)
        assert failed_attempt is not None
        assert failed_attempt.status.is_terminal()
        assert not failed_attempt.status.is_successful()

        # Test COMPLETED_WITH_ERRORS status via complete function (create new attempt)
        error_attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)
        mark_external_group_sync_attempt_in_progress(error_attempt_id, db_session)
        complete_external_group_sync_attempt(
            db_session=db_session,
            attempt_id=error_attempt_id,
            total_users_processed=100,
            total_groups_processed=10,
            total_group_memberships_synced=500,
            errors_encountered=5,
        )
        error_attempt = get_external_group_sync_attempt(db_session, error_attempt_id)
        assert error_attempt is not None
        assert error_attempt.status.is_terminal()
        assert (
            error_attempt.status.is_successful()
        )  # Completed with errors is still "successful"

    def test_complete_external_group_sync_attempt_success(
        self, db_session: Session
    ) -> None:
        """Test completing an external group sync attempt without errors."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)

        # Mark as in progress first
        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)

        # Complete without errors
        completed_attempt = complete_external_group_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_users_processed=500,
            total_groups_processed=25,
            total_group_memberships_synced=1200,
            errors_encountered=0,
        )

        assert completed_attempt.status == PermissionSyncStatus.SUCCESS
        assert completed_attempt.total_users_processed == 500
        assert completed_attempt.total_groups_processed == 25
        assert completed_attempt.total_group_memberships_synced == 1200
        assert completed_attempt.time_finished is not None

    def test_complete_external_group_sync_attempt_with_errors(
        self, db_session: Session
    ) -> None:
        """Test completing an external group sync attempt with errors."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)

        # Mark as in progress first
        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)

        # Complete with errors
        completed_attempt = complete_external_group_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_users_processed=500,
            total_groups_processed=25,
            total_group_memberships_synced=1200,
            errors_encountered=10,
        )

        assert completed_attempt.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS
        assert completed_attempt.total_users_processed == 500
        assert completed_attempt.total_groups_processed == 25
        assert completed_attempt.total_group_memberships_synced == 1200
        assert completed_attempt.time_finished is not None

    def test_complete_external_group_sync_attempt_can_be_called_multiple_times(
        self, db_session: Session
    ) -> None:
        """Test that complete can be called multiple times if needed (accumulates correctly)."""
        cc_pair = _create_test_connector_credential_pair(db_session)
        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)

        # Mark as in progress
        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)

        # Complete once
        first_complete = complete_external_group_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_users_processed=200,
            total_groups_processed=10,
            total_group_memberships_synced=600,
            errors_encountered=0,
        )

        # Verify first completion
        assert first_complete.status == PermissionSyncStatus.SUCCESS
        assert first_complete.total_users_processed == 200
        assert first_complete.total_groups_processed == 10
        assert first_complete.total_group_memberships_synced == 600
        assert first_complete.time_finished is not None

        # Call complete again (simulating additional batch processing)
        second_complete = complete_external_group_sync_attempt(
            db_session=db_session,
            attempt_id=attempt_id,
            total_users_processed=300,
            total_groups_processed=15,
            total_group_memberships_synced=600,
            errors_encountered=5,
        )

        # Should accumulate progress from both calls and update status
        assert second_complete.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS
        assert second_complete.total_users_processed == 500
        assert second_complete.total_groups_processed == 25
        assert second_complete.total_group_memberships_synced == 1200
        assert second_complete.time_finished is not None

    def test_global_vs_connector_specific_attempts(self, db_session: Session) -> None:
        """Test that global and connector-specific attempts are properly separated."""
        # Clean up any existing global attempts from previous test runs
        _cleanup_global_external_group_sync_attempts(db_session)

        cc_pair = _create_test_connector_credential_pair(db_session)

        # Create connector-specific attempts
        cc_attempt_1 = create_external_group_sync_attempt(cc_pair.id, db_session)
        cc_attempt_2 = create_external_group_sync_attempt(cc_pair.id, db_session)

        # Create global attempts
        global_attempt_1 = create_external_group_sync_attempt(None, db_session)
        global_attempt_2 = create_external_group_sync_attempt(None, db_session)

        # Verify connector-specific attempts
        cc_attempts = get_recent_external_group_sync_attempts_for_cc_pair(
            cc_pair_id=cc_pair.id, limit=10, db_session=db_session
        )
        assert len(cc_attempts) == 2
        cc_attempt_ids = {attempt.id for attempt in cc_attempts}
        assert cc_attempt_ids == {cc_attempt_1, cc_attempt_2}

        # Verify global attempts
        global_attempts = get_recent_external_group_sync_attempts_for_cc_pair(
            cc_pair_id=None, limit=10, db_session=db_session
        )
        assert len(global_attempts) == 2
        global_attempt_ids = {attempt.id for attempt in global_attempts}
        assert global_attempt_ids == {global_attempt_1, global_attempt_2}

    def test_external_group_sync_attempt_not_stuck_on_early_failure(
        self, db_session: Session
    ) -> None:
        """Test that attempts transition to FAILED on early validation failures.

        This tests the bug fix where attempts could get stuck in NOT_STARTED status
        if validation checks failed after the attempt was created but before it was
        marked as IN_PROGRESS.
        """
        cc_pair = _create_test_connector_credential_pair(db_session)

        # Create an attempt (simulating the start of a sync task)
        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)

        # Verify it starts in NOT_STARTED
        attempt = get_external_group_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.status == PermissionSyncStatus.NOT_STARTED
        assert attempt.error_message is None

        # Simulate an early validation failure (e.g., missing sync config)
        # In the actual code, this would be called by _fail_external_group_sync_attempt()
        error_msg = "No group sync config found for source"
        mark_external_group_sync_attempt_failed(
            attempt_id, db_session, error_message=error_msg
        )

        # Verify the attempt transitions to FAILED (not stuck in NOT_STARTED)
        attempt = get_external_group_sync_attempt(db_session, attempt_id)
        assert attempt is not None
        assert attempt.status == PermissionSyncStatus.FAILED
        assert attempt.error_message == error_msg
        assert attempt.time_started is not None  # Should be set even on early failure
        assert attempt.time_finished is not None
        assert attempt.status.is_terminal()
        assert not attempt.status.is_successful()


================================================
FILE: backend/tests/external_dependency_unit/search_settings/test_search_settings.py
================================================
"""Tests that search settings with contextual RAG are properly propagated
to the indexing pipeline's LLM configuration."""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from sqlalchemy.orm import Session

from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.models import SearchSettingsCreationRequest
from onyx.db.enums import EmbeddingPrecision
from onyx.db.llm import fetch_default_contextual_rag_model
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import update_default_contextual_model
from onyx.db.llm import upsert_llm_provider
from onyx.db.models import IndexModelStatus
from onyx.db.search_settings import create_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.indexing.indexing_pipeline import IndexingPipelineResult
from onyx.indexing.indexing_pipeline import run_indexing_pipeline
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.manage.search_settings import set_new_search_settings
from onyx.server.manage.search_settings import update_saved_search_settings


TEST_CONTEXTUAL_RAG_LLM_NAME = "test-contextual-model"
TEST_CONTEXTUAL_RAG_LLM_PROVIDER = "test-contextual-provider"

UPDATED_CONTEXTUAL_RAG_LLM_NAME = "updated-contextual-model"
UPDATED_CONTEXTUAL_RAG_LLM_PROVIDER = "updated-contextual-provider"


def _create_llm_provider_and_model(
    db_session: Session,
    provider_name: str,
    model_name: str,
) -> None:
    """Insert an LLM provider with a single visible model configuration."""
    if fetch_existing_llm_provider(name=provider_name, db_session=db_session):
        return
    upsert_llm_provider(
        LLMProviderUpsertRequest(
            name=provider_name,
            provider="openai",
            api_key="test-api-key",
            model_configurations=[
                ModelConfigurationUpsertRequest(
                    name=model_name,
                    is_visible=True,
                    max_input_tokens=4096,
                )
            ],
        ),
        db_session=db_session,
    )


def _make_creation_request(
    llm_name: str = TEST_CONTEXTUAL_RAG_LLM_NAME,
    llm_provider: str = TEST_CONTEXTUAL_RAG_LLM_PROVIDER,
    enable_contextual_rag: bool = True,
) -> SearchSettingsCreationRequest:
    return SearchSettingsCreationRequest(
        model_name="test-embedding-model",
        model_dim=768,
        normalize=True,
        query_prefix="",
        passage_prefix="",
        provider_type=None,
        index_name=None,
        multipass_indexing=False,
        embedding_precision=EmbeddingPrecision.FLOAT,
        reduced_dimension=None,
        enable_contextual_rag=enable_contextual_rag,
        contextual_rag_llm_name=llm_name,
        contextual_rag_llm_provider=llm_provider,
    )


def _make_saved_search_settings(
    llm_name: str = TEST_CONTEXTUAL_RAG_LLM_NAME,
    llm_provider: str = TEST_CONTEXTUAL_RAG_LLM_PROVIDER,
    enable_contextual_rag: bool = True,
) -> SavedSearchSettings:
    return SavedSearchSettings(
        model_name="test-embedding-model",
        model_dim=768,
        normalize=True,
        query_prefix="",
        passage_prefix="",
        provider_type=None,
        index_name="test_index",
        multipass_indexing=False,
        embedding_precision=EmbeddingPrecision.FLOAT,
        reduced_dimension=None,
        enable_contextual_rag=enable_contextual_rag,
        contextual_rag_llm_name=llm_name,
        contextual_rag_llm_provider=llm_provider,
    )


def _run_indexing_pipeline_with_mocks(
    mock_get_llm: MagicMock,
    mock_index_handler: MagicMock,
    db_session: Session,
) -> None:
    """Call run_indexing_pipeline with all heavy dependencies mocked out."""
    mock_get_llm.return_value = MagicMock()
    mock_index_handler.return_value = IndexingPipelineResult(
        new_docs=0,
        total_docs=0,
        total_chunks=0,
        failures=[],
    )

    run_indexing_pipeline(
        document_batch=[],
        request_id=None,
        embedder=MagicMock(),
        document_indices=[],
        db_session=db_session,
        tenant_id="public",
        adapter=MagicMock(),
        chunker=MagicMock(chunk_token_limit=512),
    )


@pytest.fixture()
def baseline_search_settings(
    tenant_context: None,  # noqa: ARG001
    db_session: Session,
) -> None:
    """Ensure a baseline PRESENT search settings row exists in the DB,
    which is required before set_new_search_settings can be called."""
    baseline = _make_saved_search_settings(enable_contextual_rag=False)
    create_search_settings(
        search_settings=baseline,
        db_session=db_session,
        status=IndexModelStatus.PRESENT,
    )
    # Sync default contextual model to match PRESENT (clears any leftover state)
    update_default_contextual_model(
        db_session=db_session,
        enable_contextual_rag=baseline.enable_contextual_rag,
        contextual_rag_llm_provider=baseline.contextual_rag_llm_provider,
        contextual_rag_llm_name=baseline.contextual_rag_llm_name,
    )


@patch("onyx.db.swap_index.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
def test_indexing_pipeline_uses_contextual_rag_settings_from_create(
    mock_index_handler: MagicMock,
    mock_get_llm: MagicMock,
    mock_get_doc_index: MagicMock,  # noqa: ARG001
    mock_get_all_doc_indices_search_settings: MagicMock,  # noqa: ARG001
    mock_get_all_doc_indices: MagicMock,
    baseline_search_settings: None,  # noqa: ARG001
    db_session: Session,
) -> None:
    """After creating FUTURE settings and swapping to PRESENT,
    fetch_default_contextual_rag_model should match the PRESENT settings
    and run_indexing_pipeline should call get_llm_for_contextual_rag."""
    _create_llm_provider_and_model(
        db_session=db_session,
        provider_name=TEST_CONTEXTUAL_RAG_LLM_PROVIDER,
        model_name=TEST_CONTEXTUAL_RAG_LLM_NAME,
    )

    set_new_search_settings(
        search_settings_new=_make_creation_request(),
        _=MagicMock(),
        db_session=db_session,
    )

    # PRESENT still has contextual RAG disabled, so default should be None
    default_model = fetch_default_contextual_rag_model(db_session)
    assert default_model is None

    # Swap FUTURE → PRESENT (with 0 cc-pairs, REINDEX swaps immediately)
    mock_get_all_doc_indices.return_value = []
    old_settings = check_and_perform_index_swap(db_session)
    assert old_settings is not None, "Swap should have occurred"

    # Now PRESENT has contextual RAG enabled, default should match
    default_model = fetch_default_contextual_rag_model(db_session)
    assert default_model is not None
    assert default_model.name == TEST_CONTEXTUAL_RAG_LLM_NAME

    _run_indexing_pipeline_with_mocks(mock_get_llm, mock_index_handler, db_session)

    mock_get_llm.assert_called_once_with(
        TEST_CONTEXTUAL_RAG_LLM_NAME,
        TEST_CONTEXTUAL_RAG_LLM_PROVIDER,
    )


@patch("onyx.db.swap_index.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
def test_indexing_pipeline_uses_updated_contextual_rag_settings(
    mock_index_handler: MagicMock,
    mock_get_llm: MagicMock,
    mock_get_doc_index: MagicMock,  # noqa: ARG001
    mock_get_all_doc_indices_search_settings: MagicMock,  # noqa: ARG001
    mock_get_all_doc_indices: MagicMock,
    baseline_search_settings: None,  # noqa: ARG001
    db_session: Session,
) -> None:
    """After creating FUTURE settings, swapping to PRESENT, then updating
    via update_saved_search_settings, run_indexing_pipeline should use
    the updated LLM names."""
    _create_llm_provider_and_model(
        db_session=db_session,
        provider_name=TEST_CONTEXTUAL_RAG_LLM_PROVIDER,
        model_name=TEST_CONTEXTUAL_RAG_LLM_NAME,
    )
    _create_llm_provider_and_model(
        db_session=db_session,
        provider_name=UPDATED_CONTEXTUAL_RAG_LLM_PROVIDER,
        model_name=UPDATED_CONTEXTUAL_RAG_LLM_NAME,
    )

    # Create FUTURE settings with contextual RAG enabled
    set_new_search_settings(
        search_settings_new=_make_creation_request(),
        _=MagicMock(),
        db_session=db_session,
    )

    # PRESENT still has contextual RAG disabled, so default should be None
    default_model = fetch_default_contextual_rag_model(db_session)
    assert default_model is None

    # Swap FUTURE → PRESENT (with 0 cc-pairs, REINDEX swaps immediately)
    mock_get_all_doc_indices.return_value = []
    old_settings = check_and_perform_index_swap(db_session)
    assert old_settings is not None, "Swap should have occurred"

    # Now PRESENT has contextual RAG enabled, default should match
    default_model = fetch_default_contextual_rag_model(db_session)
    assert default_model is not None
    assert default_model.name == TEST_CONTEXTUAL_RAG_LLM_NAME

    # Update the PRESENT LLM names
    update_saved_search_settings(
        search_settings=_make_saved_search_settings(
            llm_name=UPDATED_CONTEXTUAL_RAG_LLM_NAME,
            llm_provider=UPDATED_CONTEXTUAL_RAG_LLM_PROVIDER,
        ),
        _=MagicMock(),
        db_session=db_session,
    )

    default_model = fetch_default_contextual_rag_model(db_session)
    assert default_model is not None
    assert default_model.name == UPDATED_CONTEXTUAL_RAG_LLM_NAME

    _run_indexing_pipeline_with_mocks(mock_get_llm, mock_index_handler, db_session)

    mock_get_llm.assert_called_once_with(
        UPDATED_CONTEXTUAL_RAG_LLM_NAME,
        UPDATED_CONTEXTUAL_RAG_LLM_PROVIDER,
    )


@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
def test_indexing_pipeline_skips_llm_when_contextual_rag_disabled(
    mock_index_handler: MagicMock,
    mock_get_llm: MagicMock,
    mock_get_doc_index: MagicMock,  # noqa: ARG001
    mock_get_all_doc_indices_search_settings: MagicMock,  # noqa: ARG001
    baseline_search_settings: None,  # noqa: ARG001
    db_session: Session,
) -> None:
    """When contextual RAG is disabled in search settings,
    get_llm_for_contextual_rag should not be called."""
    _create_llm_provider_and_model(
        db_session=db_session,
        provider_name=TEST_CONTEXTUAL_RAG_LLM_PROVIDER,
        model_name=TEST_CONTEXTUAL_RAG_LLM_NAME,
    )

    set_new_search_settings(
        search_settings_new=_make_creation_request(enable_contextual_rag=False),
        _=MagicMock(),
        db_session=db_session,
    )

    # PRESENT has contextual RAG disabled, so default should be None
    default_model = fetch_default_contextual_rag_model(db_session)
    assert default_model is None

    _run_indexing_pipeline_with_mocks(mock_get_llm, mock_index_handler, db_session)

    mock_get_llm.assert_not_called()


================================================
FILE: backend/tests/external_dependency_unit/slack_bot/__init__.py
================================================


================================================
FILE: backend/tests/external_dependency_unit/slack_bot/test_slack_bot_crud.py
================================================
"""Tests that SlackBot CRUD operations return properly typed SensitiveValue fields.

Regression test for the bug where insert_slack_bot/update_slack_bot returned
objects with raw string tokens instead of SensitiveValue wrappers, causing
'str object has no attribute get_value' errors in SlackBot.from_model().
"""

from uuid import uuid4

from sqlalchemy.orm import Session

from onyx.db.slack_bot import insert_slack_bot
from onyx.db.slack_bot import update_slack_bot
from onyx.server.manage.models import SlackBot
from onyx.utils.sensitive import SensitiveValue


def _unique(prefix: str) -> str:
    return f"{prefix}-{uuid4().hex[:8]}"


def test_insert_slack_bot_returns_sensitive_values(db_session: Session) -> None:
    bot_token = _unique("xoxb-insert")
    app_token = _unique("xapp-insert")
    user_token = _unique("xoxp-insert")

    slack_bot = insert_slack_bot(
        db_session=db_session,
        name=_unique("test-bot-insert"),
        enabled=True,
        bot_token=bot_token,
        app_token=app_token,
        user_token=user_token,
    )

    assert isinstance(slack_bot.bot_token, SensitiveValue)
    assert isinstance(slack_bot.app_token, SensitiveValue)
    assert isinstance(slack_bot.user_token, SensitiveValue)

    assert slack_bot.bot_token.get_value(apply_mask=False) == bot_token
    assert slack_bot.app_token.get_value(apply_mask=False) == app_token
    assert slack_bot.user_token.get_value(apply_mask=False) == user_token

    # Verify from_model works without error
    pydantic_bot = SlackBot.from_model(slack_bot)
    assert pydantic_bot.bot_token  # masked, but not empty
    assert pydantic_bot.app_token


def test_update_slack_bot_returns_sensitive_values(db_session: Session) -> None:
    slack_bot = insert_slack_bot(
        db_session=db_session,
        name=_unique("test-bot-update"),
        enabled=True,
        bot_token=_unique("xoxb-update"),
        app_token=_unique("xapp-update"),
    )

    new_bot_token = _unique("xoxb-update-new")
    new_app_token = _unique("xapp-update-new")
    new_user_token = _unique("xoxp-update-new")

    updated = update_slack_bot(
        db_session=db_session,
        slack_bot_id=slack_bot.id,
        name=_unique("test-bot-updated"),
        enabled=False,
        bot_token=new_bot_token,
        app_token=new_app_token,
        user_token=new_user_token,
    )

    assert isinstance(updated.bot_token, SensitiveValue)
    assert isinstance(updated.app_token, SensitiveValue)
    assert isinstance(updated.user_token, SensitiveValue)

    assert updated.bot_token.get_value(apply_mask=False) == new_bot_token
    assert updated.app_token.get_value(apply_mask=False) == new_app_token
    assert updated.user_token.get_value(apply_mask=False) == new_user_token

    # Verify from_model works without error
    pydantic_bot = SlackBot.from_model(updated)
    assert pydantic_bot.bot_token
    assert pydantic_bot.app_token
    assert pydantic_bot.user_token is not None


================================================
FILE: backend/tests/external_dependency_unit/slack_bot/test_slack_bot_federated_search.py
================================================
# NOTE: ruff and black disagree after applying this noqa, so we just set file-level.
# ruff: noqa: ARG005
import os
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import Mock
from unittest.mock import patch
from uuid import uuid4

from onyx.db.llm import update_default_provider
from onyx.db.llm import upsert_llm_provider
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest

# Set environment variables to disable model server for testing
os.environ["DISABLE_MODEL_SERVER"] = "true"
os.environ["MODEL_SERVER_HOST"] = "disabled"
os.environ["MODEL_SERVER_PORT"] = "9000"

from sqlalchemy import inspect
from sqlalchemy.orm import Session
from slack_sdk.errors import SlackApiError

from onyx.configs.constants import FederatedConnectorSource
from onyx.context.search.federated.slack_search import fetch_and_cache_channel_metadata
from onyx.db.models import DocumentSet
from onyx.db.models import FederatedConnector
from onyx.db.models import FederatedConnector__DocumentSet
from onyx.db.models import LLMProvider
from onyx.db.models import Persona
from onyx.db.models import Persona__DocumentSet
from onyx.db.models import Persona__Tool
from onyx.db.models import SlackBot
from onyx.db.models import SlackChannelConfig
from onyx.db.models import User
from onyx.onyxbot.slack.listener import process_message
from onyx.onyxbot.slack.models import ChannelType
from onyx.db.tools import get_builtin_tool
from onyx.tools.built_in_tools import SearchTool
from tests.external_dependency_unit.conftest import create_test_user
from onyx.llm.constants import LlmProviderNames


def _create_test_persona_with_slack_config(db_session: Session) -> Persona | None:
    """Helper to create a test persona configured for Slack federated search"""
    unique_id = str(uuid4())[:8]
    document_set = DocumentSet(
        name=f"test_slack_docs_{unique_id}",
        description="Test document set for Slack federated search",
    )
    db_session.add(document_set)
    db_session.flush()

    persona = Persona(
        name=f"test_slack_persona_{unique_id}",
        description="Test persona for Slack federated search",
        system_prompt="You are a helpful assistant.",
        task_prompt="Answer the user's question based on the provided context.",
    )
    db_session.add(persona)
    db_session.flush()

    persona_doc_set = Persona__DocumentSet(
        persona_id=persona.id,
        document_set_id=document_set.id,
    )
    db_session.add(persona_doc_set)
    db_session.commit()

    # Built-in tools are automatically seeded by migrations

    try:
        search_tool = get_builtin_tool(db_session=db_session, tool_type=SearchTool)
        if search_tool:
            persona_tool = Persona__Tool(persona_id=persona.id, tool_id=search_tool.id)
            db_session.add(persona_tool)
    except RuntimeError:
        # SearchTool not found, skip adding it
        pass

    db_session.commit()

    # Prompts are now directly on the persona table, no need for joinedload
    return persona


def _create_mock_slack_request(
    text: str, channel_id: str = "C1234567890", slack_bot_id: int = 12345
) -> Mock:
    """Create a mock Slack request"""
    mock_req = Mock()
    mock_req.type = "events_api"
    mock_req.envelope_id = "test_envelope_id"
    mock_req.payload = {
        "event": {
            "type": "app_mention",
            "text": f"<@U1234567890> {text}",
            "channel": channel_id,
            "user": "U9876543210",
            "ts": "1234567890.123456",
        }
    }
    mock_req.slack_bot_id = slack_bot_id
    return mock_req


def _create_mock_slack_client(
    channel_id: str = "C1234567890",  # noqa: ARG001
    slack_bot_id: int = 12345,
) -> Mock:
    """Create a mock Slack client"""
    mock_client = Mock()
    mock_client.slack_bot_id = slack_bot_id
    mock_client.web_client = Mock()

    mock_post_message_response = {"ok": True, "message_ts": "1234567890.123456"}
    mock_client.web_client.chat_postMessage = Mock(
        return_value=mock_post_message_response
    )

    mock_users_info_response = Mock()
    mock_users_info_response.__getitem__ = Mock(
        side_effect=lambda key: {"ok": True}[key]
    )
    mock_users_info_response.data = {
        "user": {
            "id": "U9876543210",
            "name": "testuser",
            "real_name": "Test User",
            "profile": {
                "display_name": "Test User",
                "first_name": "Test",
                "last_name": "User",
                "email": "test@example.com",
            },
        }
    }
    mock_client.web_client.users_info = Mock(return_value=mock_users_info_response)

    mock_auth_test_response = {
        "ok": True,
        "user_id": "U1234567890",
        "bot_id": "B1234567890",
    }
    mock_client.web_client.auth_test = Mock(return_value=mock_auth_test_response)

    def mock_conversations_info_response(channel: str) -> Mock:
        channel_id = channel
        if channel_id == "C1234567890":  # general - public
            mock_response = Mock()
            mock_response.validate.return_value = None
            mock_response.data = {
                "channel": {
                    "id": "C1234567890",
                    "name": "general",
                    "is_channel": True,
                    "is_private": False,
                    "is_group": False,
                    "is_mpim": False,
                    "is_im": False,
                }
            }
            mock_response.__getitem__ = lambda self, key: mock_response.data[key]
            return mock_response
        elif channel_id == "C1111111111":  # support - public
            mock_response = Mock()
            mock_response.validate.return_value = None
            mock_response.data = {
                "channel": {
                    "id": "C1111111111",
                    "name": "support",
                    "is_channel": True,
                    "is_private": False,
                    "is_group": False,
                    "is_mpim": False,
                    "is_im": False,
                }
            }
            mock_response.__getitem__ = lambda self, key: mock_response.data[key]
            return mock_response
        elif channel_id == "C9999999999":  # dev-team - private
            mock_response = Mock()
            mock_response.validate.return_value = None
            mock_response.data = {
                "channel": {
                    "id": "C9999999999",
                    "name": "dev-team",
                    "is_channel": True,
                    "is_private": True,
                    "is_group": False,
                    "is_mpim": False,
                    "is_im": False,
                }
            }
            mock_response.__getitem__ = lambda self, key: mock_response.data[key]
            return mock_response
        elif channel_id == "D1234567890":  # DM
            mock_response = Mock()
            mock_response.validate.return_value = None
            mock_response.data = {
                "channel": {
                    "id": "D1234567890",
                    "name": "directmessage",
                    "is_channel": False,
                    "is_private": False,
                    "is_group": False,
                    "is_mpim": False,
                    "is_im": True,
                }
            }
            mock_response.__getitem__ = lambda self, key: mock_response.data[key]
            return mock_response
        else:
            mock_response = Mock()
            mock_response.validate.side_effect = Exception("channel_not_found")
            return mock_response

    mock_client.web_client.conversations_info = Mock(
        side_effect=mock_conversations_info_response
    )

    mock_client.web_client.conversations_members = Mock(
        return_value={"ok": True, "members": ["U9876543210", "U1234567890"]}
    )

    mock_client.web_client.conversations_replies = Mock(
        return_value={"ok": True, "messages": []}
    )

    return mock_client


class TestSlackBotFederatedSearch:
    """Test Slack bot federated search functionality"""

    def _setup_test_environment(
        self, db_session: Session
    ) -> tuple[User, Persona, FederatedConnector, SlackBot, SlackChannelConfig]:
        """Setup test environment with user, persona, and federated connector"""
        user = create_test_user(db_session, "slack_bot_test")

        persona = _create_test_persona_with_slack_config(db_session)
        if persona is None:
            raise ValueError("Failed to create test persona")

        federated_connector = FederatedConnector(
            source=FederatedConnectorSource.FEDERATED_SLACK,
            credentials={"workspace_url": "https://test.slack.com"},
        )
        db_session.add(federated_connector)
        db_session.flush()
        # Expire to ensure credentials is reloaded as SensitiveValue from DB
        db_session.expire(federated_connector)

        # Associate the federated connector with the persona's document sets
        # This is required for Slack federated search to be enabled
        for doc_set in persona.document_sets:
            federated_doc_set_mapping = FederatedConnector__DocumentSet(
                federated_connector_id=federated_connector.id,
                document_set_id=doc_set.id,
                entities={},  # Empty entities for test
            )
            db_session.add(federated_doc_set_mapping)
        db_session.flush()

        unique_id = str(uuid4())[:8]
        slack_bot = SlackBot(
            name=f"Test Slack Bot {unique_id}",
            bot_token=f"xoxb-test-token-{unique_id}",
            app_token=f"xapp-test-token-{unique_id}",
            user_token=f"xoxp-test-user-token-{unique_id}",
            enabled=True,
        )
        db_session.add(slack_bot)
        db_session.flush()
        # Expire to ensure tokens are reloaded as SensitiveValue from DB
        db_session.expire(slack_bot)

        slack_channel_config = SlackChannelConfig(
            slack_bot_id=slack_bot.id,
            persona_id=persona.id,
            channel_config={"channel_name": "general", "disabled": False},
            enable_auto_filters=True,
            is_default=True,
        )
        db_session.add(slack_channel_config)
        db_session.commit()

        return user, persona, federated_connector, slack_bot, slack_channel_config

    def _setup_slack_mocks(self, channel_name: str) -> tuple[list, list]:
        """Setup only Slack API mocks - everything else runs live"""
        patches = [
            patch("slack_sdk.WebClient.search_messages"),
            patch("onyx.context.search.federated.slack_search.query_slack"),
            patch("onyx.onyxbot.slack.listener.get_channel_type_from_id"),
            patch("onyx.context.search.utils.get_query_embeddings"),
        ]

        started_patches = [p.start() for p in patches]

        self._setup_slack_api_mocks(started_patches[0], started_patches[0])

        self._setup_query_slack_mock(started_patches[1], channel_name)

        self._setup_channel_type_mock(started_patches[2], channel_name)

        self._setup_embedding_mock(started_patches[3])

        return patches, started_patches

    def _setup_embedding_mock(self, mock_get_query_embeddings: Mock) -> None:
        """Mock embedding calls to avoid model server dependency"""
        # Return a dummy embedding vector for any query
        mock_get_query_embeddings.return_value = [[0.1] * 768]  # 768-dimensional vector

    def _setup_slack_api_mocks(
        self,
        mock_search_messages: Mock,
        mock_conversations_info: Mock,  # noqa: ARG002
    ) -> None:
        """Setup Slack API mocks to return controlled data for testing filtering"""
        mock_search_response = Mock()
        mock_search_response.validate.return_value = None
        mock_search_response.get.return_value = {
            "matches": [
                {
                    "text": "Performance issue in API",
                    "permalink": "https://test.slack.com/archives/C1234567890/p1234567890",
                    "ts": "1234567890.123456",
                    "channel": {"id": "C1234567890", "name": "general"},
                    "username": "user1",
                    "score": 0.9,
                },
                {
                    "text": "Performance issue in dashboard",
                    "permalink": "https://test.slack.com/archives/C1111111111/p1234567891",
                    "ts": "1234567891.123456",
                    "channel": {"id": "C1111111111", "name": "support"},
                    "username": "user2",
                    "score": 0.8,
                },
                {
                    "text": "Performance issue in private channel",
                    "permalink": "https://test.slack.com/archives/C9999999999/p1234567892",
                    "ts": "1234567892.123456",
                    "channel": {"id": "C9999999999", "name": "dev-team"},
                    "username": "user3",
                    "score": 0.7,
                },
                {
                    "text": "Performance issue in DM",
                    "permalink": "https://test.slack.com/archives/D1234567890/p1234567893",
                    "ts": "1234567893.123456",
                    "channel": {"id": "D1234567890", "name": "directmessage"},
                    "username": "user4",
                    "score": 0.6,
                },
            ]
        }
        mock_search_messages.return_value = mock_search_response

    def _setup_query_slack_mock(
        self, mock_query_slack: Mock, channel_name: str
    ) -> None:
        """Setup query_slack mock to capture filtering parameters"""
        from onyx.context.search.federated.slack_search import SlackQueryResult

        def mock_query_slack_capture_params(
            query_string: str,  # noqa: ARG001
            access_token: str,  # noqa: ARG001
            limit: int | None = None,  # noqa: ARG001
            allowed_private_channel: str | None = None,
            bot_token: str | None = None,  # noqa: ARG001
            include_dm: bool = False,
            entities: dict | None = None,  # noqa: ARG001
            available_channels: list | None = None,  # noqa: ARG001
            channel_metadata_dict: dict | None = None,  # noqa: ARG001
        ) -> SlackQueryResult:
            self._captured_filtering_params = {
                "allowed_private_channel": allowed_private_channel,
                "include_dm": include_dm,
                "channel_name": channel_name,
            }

            return SlackQueryResult(messages=[], filtered_channels=[])

        mock_query_slack.side_effect = mock_query_slack_capture_params

    def _setup_channel_type_mock(
        self,
        mock_get_channel_type_from_id: Mock,
        channel_name: str,  # noqa: ARG002
    ) -> None:
        """Setup get_channel_type_from_id mock to return correct channel types"""

        def mock_channel_type_response(
            web_client: Mock,  # noqa: ARG001
            channel_id: str,
        ) -> ChannelType:
            if channel_id == "C1234567890":  # general - public
                return ChannelType.PUBLIC_CHANNEL
            elif channel_id == "C1111111111":  # support - public
                return ChannelType.PUBLIC_CHANNEL
            elif channel_id == "C9999999999":  # dev-team - private
                return ChannelType.PRIVATE_CHANNEL
            elif channel_id == "D1234567890":  # DM
                return ChannelType.IM
            else:
                return ChannelType.PUBLIC_CHANNEL  # default

        mock_get_channel_type_from_id.side_effect = mock_channel_type_response

    def _setup_llm_provider(self, db_session: Session) -> None:
        """Create a default LLM provider in the database for testing with real API key"""
        # Delete any existing default LLM provider to ensure clean state
        # Use SQL-level delete to properly trigger ON DELETE CASCADE
        # (ORM-level delete tries to set foreign keys to NULL instead)
        from sqlalchemy import delete

        existing_providers = db_session.query(LLMProvider).all()
        for provider in existing_providers:
            db_session.execute(delete(LLMProvider).where(LLMProvider.id == provider.id))
        db_session.commit()

        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError(
                "OPENAI_API_KEY environment variable not set - test requires real API key"
            )

        provider_view = upsert_llm_provider(
            LLMProviderUpsertRequest(
                name=f"test-llm-provider-{uuid4().hex[:8]}",
                provider=LlmProviderNames.OPENAI,
                api_key=api_key,
                is_public=True,
                model_configurations=[
                    ModelConfigurationUpsertRequest(
                        name="gpt-4o",
                        is_visible=True,
                        max_input_tokens=None,
                        display_name="gpt-4o",
                    ),
                ],
            ),
            db_session=db_session,
        )

        update_default_provider(provider_view.id, "gpt-4o", db_session)

    def _teardown_common_mocks(self, patches: list) -> None:
        """Stop all patches"""
        for p in patches:
            p.stop()

    @patch("onyx.utils.gpu_utils.fast_gpu_status_request", return_value=False)
    @patch(
        "onyx.document_index.vespa.index.VespaIndex.hybrid_retrieval", return_value=[]
    )
    def test_slack_bot_public_channel_filtering(
        self,
        mock_vespa: Mock,  # noqa: ARG002
        mock_gpu_status: Mock,  # noqa: ARG002
        db_session: Session,
    ) -> None:
        """Test that slack bot in public channel sees only public channel messages"""
        self._setup_llm_provider(db_session)

        user, persona, federated_connector, slack_bot, slack_channel_config = (
            self._setup_test_environment(db_session)
        )

        channel_id = "C1234567890"  # #general (public)
        channel_name = "general"

        patches, started_patches = self._setup_slack_mocks(channel_name)

        try:
            mock_req = _create_mock_slack_request(
                "search for performance issues", channel_id, slack_bot.id
            )
            mock_client = _create_mock_slack_client(channel_id, slack_bot.id)

            process_message(mock_req, mock_client)

            mock_client.web_client.chat_postMessage.assert_called()
            post_message_calls = mock_client.web_client.chat_postMessage.call_args_list
            last_call = post_message_calls[-1]
            assert (
                last_call[1]["channel"] == channel_id
            ), f"Response should be sent to {channel_id}"

            response_text = last_call[1].get("text", "")
            assert len(response_text) > 0, "Bot should have sent a non-empty response"

            assert hasattr(
                self, "_captured_filtering_params"
            ), "query_slack should have been called"
            params = self._captured_filtering_params

            assert (
                params["allowed_private_channel"] is None
            ), "Public channels should not have private channel access"
            assert (
                params["include_dm"] is False
            ), "Public channels should not include DMs"
            assert (
                params["channel_name"] == "general"
            ), "Should be testing general channel"

        finally:
            self._teardown_common_mocks(patches)

    @patch("onyx.utils.gpu_utils.fast_gpu_status_request", return_value=False)
    @patch(
        "onyx.document_index.vespa.index.VespaIndex.hybrid_retrieval", return_value=[]
    )
    def test_slack_bot_private_channel_filtering(
        self,
        mock_vespa: Mock,  # noqa: ARG002
        mock_gpu_status: Mock,  # noqa: ARG002
        db_session: Session,
    ) -> None:
        """Test that slack bot in private channel sees private + public channel messages"""
        self._setup_llm_provider(db_session)

        user, persona, federated_connector, slack_bot, slack_channel_config = (
            self._setup_test_environment(db_session)
        )

        channel_id = "C9999999999"  # #dev-team (private)
        channel_name = "dev-team"

        patches, started_patches = self._setup_slack_mocks(channel_name)

        try:
            mock_req = _create_mock_slack_request(
                "search for performance issues", channel_id, slack_bot.id
            )
            mock_client = _create_mock_slack_client(channel_id, slack_bot.id)

            process_message(mock_req, mock_client)

            mock_client.web_client.chat_postMessage.assert_called()
            post_message_calls = mock_client.web_client.chat_postMessage.call_args_list
            last_call = post_message_calls[-1]
            assert (
                last_call[1]["channel"] == channel_id
            ), f"Response should be sent to {channel_id}"

            response_text = last_call[1].get("text", "")
            assert len(response_text) > 0, "Bot should have sent a non-empty response"

            assert hasattr(
                self, "_captured_filtering_params"
            ), "query_slack should have been called"
            params = self._captured_filtering_params

            assert (
                params["allowed_private_channel"] == "C9999999999"
            ), "Private channels should have access to their specific private channel"
            assert (
                params["include_dm"] is False
            ), "Private channels should not include DMs"
            assert (
                params["channel_name"] == "dev-team"
            ), "Should be testing dev-team channel"

        finally:
            self._teardown_common_mocks(patches)

    @patch("onyx.utils.gpu_utils.fast_gpu_status_request", return_value=False)
    @patch(
        "onyx.document_index.vespa.index.VespaIndex.hybrid_retrieval", return_value=[]
    )
    def test_slack_bot_dm_filtering(
        self,
        mock_vespa: Mock,  # noqa: ARG002
        mock_gpu_status: Mock,  # noqa: ARG002
        db_session: Session,
    ) -> None:
        """Test that slack bot in DM sees all messages (no filtering)"""
        self._setup_llm_provider(db_session)

        user, persona, federated_connector, slack_bot, slack_channel_config = (
            self._setup_test_environment(db_session)
        )

        channel_id = "D1234567890"  # DM
        channel_name = "directmessage"

        patches, started_patches = self._setup_slack_mocks(channel_name)

        try:
            mock_req = _create_mock_slack_request(
                "search for performance issues", channel_id, slack_bot.id
            )
            mock_client = _create_mock_slack_client(channel_id, slack_bot.id)

            process_message(mock_req, mock_client)

            mock_client.web_client.chat_postMessage.assert_called()
            post_message_calls = mock_client.web_client.chat_postMessage.call_args_list
            last_call = post_message_calls[-1]
            assert (
                last_call[1]["channel"] == channel_id
            ), f"Response should be sent to {channel_id}"

            response_text = last_call[1].get("text", "")
            assert len(response_text) > 0, "Bot should have sent a non-empty response"

            assert hasattr(
                self, "_captured_filtering_params"
            ), "query_slack should have been called"
            params = self._captured_filtering_params

            assert (
                params["allowed_private_channel"] is None
            ), "DMs should not have private channel access"
            assert params["include_dm"] is True, "DMs should include DM messages"
            assert (
                params["channel_name"] == "directmessage"
            ), "Should be testing directmessage channel"

        finally:
            self._teardown_common_mocks(patches)


@patch("onyx.context.search.federated.slack_search.get_redis_client")
@patch("onyx.context.search.federated.slack_search.WebClient")
def test_missing_scope_resilience(
    mock_web_client: Mock, mock_redis_client: Mock
) -> None:
    """Test that missing scopes are handled gracefully"""
    # Setup mock Redis client
    mock_redis = MagicMock()
    mock_redis.get.return_value = None  # Cache miss
    mock_redis_client.return_value = mock_redis

    # Setup mock Slack client that simulates missing_scope error
    mock_client_instance = MagicMock()
    mock_web_client.return_value = mock_client_instance

    # Track which channel types were attempted
    attempted_types: list[str] = []

    def mock_conversations_list(
        types: str | None = None,
        **kwargs: Any,  # noqa: ARG001
    ) -> MagicMock:
        if types:
            attempted_types.append(types)

        # First call: all types including mpim -> missing_scope error
        if types and "mpim" in types:
            error_response = {
                "ok": False,
                "error": "missing_scope",
                "needed": "mpim:read",
                "provided": "identify,channels:history,channels:read,groups:read,im:read,search:read",
            }
            raise SlackApiError("missing_scope", error_response)

        # Second call: without mpim -> success
        mock_response = MagicMock()
        mock_response.validate.return_value = None
        mock_response.data = {
            "channels": [
                {
                    "id": "C1234567890",
                    "name": "general",
                    "is_channel": True,
                    "is_private": False,
                    "is_group": False,
                    "is_mpim": False,
                    "is_im": False,
                    "is_member": True,
                },
                {
                    "id": "D9876543210",
                    "name": "",
                    "is_channel": False,
                    "is_private": False,
                    "is_group": False,
                    "is_mpim": False,
                    "is_im": True,
                    "is_member": True,
                },
            ],
            "response_metadata": {},
        }
        return mock_response

    mock_client_instance.conversations_list.side_effect = mock_conversations_list

    # Call the function
    result = fetch_and_cache_channel_metadata(
        access_token="xoxp-test-token",
        team_id="T1234567890",
        include_private=True,
    )

    # Assertions
    # Should have attempted twice: once with mpim, once without
    assert len(attempted_types) == 2, f"Expected 2 attempts, got {len(attempted_types)}"
    assert "mpim" in attempted_types[0], "First attempt should include mpim"
    assert "mpim" not in attempted_types[1], "Second attempt should not include mpim"

    # Should have successfully returned channels despite missing scope
    assert len(result) == 2, f"Expected 2 channels, got {len(result)}"
    assert "C1234567890" in result, "Should have public channel"
    assert "D9876543210" in result, "Should have DM channel"

    # Verify channel metadata structure
    assert result["C1234567890"]["name"] == "general"
    assert result["C1234567890"]["type"] == "public_channel"
    assert result["D9876543210"]["type"] == "im"


@patch("onyx.context.search.federated.slack_search.get_redis_client")
@patch("onyx.context.search.federated.slack_search.WebClient")
def test_multiple_missing_scopes_resilience(
    mock_web_client: Mock, mock_redis_client: Mock
) -> None:
    """Test handling multiple missing scopes gracefully"""
    # Setup mock Redis client
    mock_redis = MagicMock()
    mock_redis.get.return_value = None  # Cache miss
    mock_redis_client.return_value = mock_redis

    # Setup mock Slack client
    mock_client_instance = MagicMock()
    mock_web_client.return_value = mock_client_instance

    # Track attempts
    attempted_types: list[str] = []

    def mock_conversations_list(
        types: str | None = None,
        **kwargs: Any,  # noqa: ARG001
    ) -> MagicMock:
        if types:
            attempted_types.append(types)

        # First: mpim missing
        if types and "mpim" in types:
            error_response = {
                "ok": False,
                "error": "missing_scope",
                "needed": "mpim:read",
                "provided": "identify,channels:history,channels:read,groups:read",
            }
            raise SlackApiError("missing_scope", error_response)

        # Second: im missing
        if types and "im" in types:
            error_response = {
                "ok": False,
                "error": "missing_scope",
                "needed": "im:read",
                "provided": "identify,channels:history,channels:read,groups:read",
            }
            raise SlackApiError("missing_scope", error_response)

        # Third: success with only public and private channels
        mock_response = MagicMock()
        mock_response.validate.return_value = None
        mock_response.data = {
            "channels": [
                {
                    "id": "C1234567890",
                    "name": "general",
                    "is_channel": True,
                    "is_private": False,
                    "is_group": False,
                    "is_mpim": False,
                    "is_im": False,
                    "is_member": True,
                }
            ],
            "response_metadata": {},
        }
        return mock_response

    mock_client_instance.conversations_list.side_effect = mock_conversations_list

    # Call the function
    result = fetch_and_cache_channel_metadata(
        access_token="xoxp-test-token",
        team_id="T1234567890",
        include_private=True,
    )

    # Should gracefully handle multiple missing scopes
    assert len(attempted_types) == 3, f"Expected 3 attempts, got {len(attempted_types)}"
    assert "mpim" in attempted_types[0], "First attempt should include mpim"
    assert "mpim" not in attempted_types[1], "Second attempt should not include mpim"
    assert "im" in attempted_types[1], "Second attempt should include im"
    assert "im" not in attempted_types[2], "Third attempt should not include im"

    # Should still return available channels
    assert len(result) == 1, f"Expected 1 channel, got {len(result)}"
    assert result["C1234567890"]["name"] == "general"


def test_slack_channel_config_eager_loads_persona(db_session: Session) -> None:
    """Test that fetch_slack_channel_config_for_channel_or_default eagerly loads persona.

    This prevents lazy loading failures when the session context changes later
    in the request handling flow (e.g., in handle_regular_answer).
    """
    from onyx.db.slack_channel_config import (
        fetch_slack_channel_config_for_channel_or_default,
    )

    unique_id = str(uuid4())[:8]

    # Create a persona (using same fields as _create_test_persona_with_slack_config)
    persona = Persona(
        name=f"test_eager_load_persona_{unique_id}",
        description="Test persona for eager loading test",
        system_prompt="You are a helpful assistant.",
        task_prompt="Answer the user's question.",
    )
    db_session.add(persona)
    db_session.flush()

    # Create a slack bot
    slack_bot = SlackBot(
        name=f"Test Bot {unique_id}",
        bot_token=f"xoxb-test-{unique_id}",
        app_token=f"xapp-test-{unique_id}",
        enabled=True,
    )
    db_session.add(slack_bot)
    db_session.flush()

    # Create slack channel config with persona
    channel_name = f"test-channel-{unique_id}"
    slack_channel_config = SlackChannelConfig(
        slack_bot_id=slack_bot.id,
        persona_id=persona.id,
        channel_config={"channel_name": channel_name, "disabled": False},
        enable_auto_filters=False,
        is_default=False,
    )
    db_session.add(slack_channel_config)
    db_session.commit()

    # Fetch the config using the function under test
    fetched_config = fetch_slack_channel_config_for_channel_or_default(
        db_session=db_session,
        slack_bot_id=slack_bot.id,
        channel_name=channel_name,
    )

    assert fetched_config is not None, "Should find the channel config"

    # Check that persona relationship is already loaded (not pending lazy load)
    insp = inspect(fetched_config)
    assert insp is not None, "Should be able to inspect the config"
    assert "persona" not in insp.unloaded, (
        "Persona should be eagerly loaded, not pending lazy load. "
        "This is required to prevent fallback to default persona when "
        "session context changes in handle_regular_answer."
    )

    # Verify the persona is correct
    assert fetched_config.persona is not None, "Persona should not be None"
    assert fetched_config.persona.id == persona.id, "Should load the correct persona"
    assert fetched_config.persona.name == persona.name


================================================
FILE: backend/tests/external_dependency_unit/tools/test_image_generation_tool.py
================================================
# TODO re-enable this test
# import os
# import time
# from typing import Any
# from unittest.mock import patch

# import pytest

# from onyx.tools.models import ToolResponse
# from onyx.tools.tool_implementations.images.image_generation_tool import (
#     IMAGE_GENERATION_HEARTBEAT_ID,
# )
# from onyx.tools.tool_implementations.images.image_generation_tool import (
#     IMAGE_GENERATION_RESPONSE_ID,
# )
# from onyx.tools.tool_implementations.images.image_generation_tool import (
#     ImageGenerationResponse,
# )
# from onyx.tools.tool_implementations.images.image_generation_tool import (
#     ImageGenerationTool,
# )
# from onyx.tools.tool_implementations.images.image_generation_tool import ImageShape


# @pytest.fixture
# def dalle3_tool() -> ImageGenerationTool:
#     """Fixture for DALL-E 3 tool with API key from environment."""
#     api_key = os.environ["OPENAI_API_KEY"]
#     return ImageGenerationTool(
#         tool_id=0,
#         api_key=api_key,
#         api_base=None,
#         api_version=None,
#         model="dall-e-3",
#         num_imgs=1,
#     )


# def test_image_generation_with_heartbeats(dalle3_tool: ImageGenerationTool) -> None:
#     """Test that heartbeat packets are yielded during image generation."""
#     responses = []
#     heartbeat_count = 0
#     image_response_count = 0

#     # Collect all responses
#     for response in dalle3_tool.run(prompt="A simple red circle on white background"):
#         responses.append(response)
#         if response.id == IMAGE_GENERATION_HEARTBEAT_ID:
#             heartbeat_count += 1
#         elif response.id == IMAGE_GENERATION_RESPONSE_ID:
#             image_response_count += 1

#     # Should have at least one heartbeat (depending on generation speed)
#     # and exactly one image response
#     assert image_response_count == 1
#     # May have 0 or more heartbeats depending on API speed
#     print(f"Received {heartbeat_count} heartbeat packets")

#     # Verify the final image response
#     final_response = responses[-1]
#     assert final_response.id == IMAGE_GENERATION_RESPONSE_ID
#     assert isinstance(final_response.response, list)
#     assert len(final_response.response) == 1

#     image = final_response.response[0]
#     assert isinstance(image, ImageGenerationResponse)
#     assert image.image_data is not None
#     assert len(image.image_data) > 100  # Base64 data should be substantial
#     assert image.revised_prompt is not None


# def test_heartbeat_timing_with_mock() -> None:
#     """Test that heartbeats are sent at correct intervals using mocked generation."""
#     api_key = os.getenv("OPENAI_API_KEY", "mock-key-for-testing")

#     tool = ImageGenerationTool(
#         tool_id=0,
#         api_key=api_key,
#         api_base=None,
#         api_version=None,
#         model="dall-e-3",
#         num_imgs=1,
#     )

#     # Mock the _generate_image method to simulate slow generation
#     def slow_generate(*args: Any, **kwargs: Any) -> ImageGenerationResponse:
#         time.sleep(5)  # Simulate 5 second generation time
#         return ImageGenerationResponse(
#             revised_prompt="Test prompt",
#             image_data="base64encodedimagedata",
#         )

#     with patch.object(tool, "_generate_image", side_effect=slow_generate):
#         start_time = time.time()
#         responses = list(tool.run(prompt="Test prompt"))
#         time.time() - start_time

#         # Count heartbeats
#         heartbeat_count = sum(
#             1 for r in responses if r.id == IMAGE_GENERATION_HEARTBEAT_ID
#         )

#         # With 5 second generation and 2 second intervals,
#         # we should get approximately 2 heartbeats
#         assert heartbeat_count >= 1
#         assert heartbeat_count <= 3  # Allow some timing variance

#         # Verify we still get the final result
#         image_responses = [r for r in responses if r.id == IMAGE_GENERATION_RESPONSE_ID]
#         assert len(image_responses) == 1
#         assert image_responses[0].response[0].image_data == "base64encodedimagedata"


# def test_error_handling_with_heartbeats() -> None:
#     """Test that errors are properly propagated even with heartbeat mechanism."""
#     api_key = os.getenv("OPENAI_API_KEY", "mock-key-for-testing")

#     tool = ImageGenerationTool(
#         tool_id=0,
#         api_key=api_key,
#         api_base=None,
#         api_version=None,
#         model="dall-e-3",
#         num_imgs=1,
#     )

#     # Mock the _generate_image method to raise an error after delay
#     def error_generate(*args: Any, **kwargs: Any) -> None:
#         time.sleep(1)  # Small delay to ensure at least one heartbeat
#         raise ValueError("Test error during generation")

#     with patch.object(tool, "_generate_image", side_effect=error_generate):
#         with pytest.raises(ValueError, match="Test error during generation"):
#             # Consume the generator to trigger the error
#             list(tool.run(prompt="Test prompt"))


# def test_tool_message_content_filters_heartbeats() -> None:
#     """Test that get_llm_tool_response correctly filters heartbeats."""
#     api_key = os.getenv("OPENAI_API_KEY", "mock-key-for-testing")

#     tool = ImageGenerationTool(
#         tool_id=0,
#         api_key=api_key,
#         api_base=None,
#         api_version=None,
#         model="dall-e-3",
#         num_imgs=1,
#     )

#     # Create mock responses
#     heartbeat1 = ToolResponse(
#         id=IMAGE_GENERATION_HEARTBEAT_ID,
#         response={"status": "generating", "heartbeat": 0},
#     )
#     heartbeat2 = ToolResponse(
#         id=IMAGE_GENERATION_HEARTBEAT_ID,
#         response={"status": "generating", "heartbeat": 1},
#     )
#     image_response = ToolResponse(
#         id=IMAGE_GENERATION_RESPONSE_ID,
#         response=[
#             ImageGenerationResponse(
#                 revised_prompt="Test",
#                 image_data="base64encodedimagedata",
#             )
#         ],
#     )

#     # Test that heartbeats are filtered out
#     result = tool.get_llm_tool_response(heartbeat1, heartbeat2, image_response)

#     # Should return JSON with image info, not heartbeats
#     assert isinstance(result, str)
#     assert "Test" in result
#     assert "heartbeat" not in result


# def test_final_result_filters_heartbeats() -> None:
#     """Test that final_result correctly filters heartbeats."""
#     api_key = os.getenv("OPENAI_API_KEY", "mock-key-for-testing")

#     tool = ImageGenerationTool(
#         tool_id=0,
#         api_key=api_key,
#         api_base=None,
#         api_version=None,
#         model="dall-e-3",
#         num_imgs=1,
#     )

#     # Create mock responses
#     heartbeat = ToolResponse(
#         id=IMAGE_GENERATION_HEARTBEAT_ID,
#         response={"status": "generating", "heartbeat": 0},
#     )
#     image_response = ToolResponse(
#         id=IMAGE_GENERATION_RESPONSE_ID,
#         response=[
#             ImageGenerationResponse(
#                 revised_prompt="Test prompt",
#                 image_data="base64encodedimagedata",
#             )
#         ],
#     )

#     # Test that final_result returns only image data
#     result = tool.get_final_result(heartbeat, image_response)

#     assert isinstance(result, list)
#     assert len(result) == 1
#     assert result[0]["revised_prompt"] == "Test prompt"
#     assert result[0]["image_data"] == "base64encodedimagedata"


# def test_different_image_shapes(dalle3_tool: ImageGenerationTool) -> None:
#     """Test image generation with different shape parameters."""
#     shapes_to_test = [
#         (ImageShape.SQUARE, "A red square"),
#         (ImageShape.PORTRAIT, "A tall building"),
#         (ImageShape.LANDSCAPE, "A wide landscape"),
#     ]

#     for shape, prompt in shapes_to_test:
#         responses = list(dalle3_tool.run(prompt=prompt, shape=shape.value))

#         # Find the image response
#         image_response = None
#         for response in responses:
#             if response.id == IMAGE_GENERATION_RESPONSE_ID:
#                 image_response = response
#                 break

#         assert image_response is not None
#         assert len(image_response.response) == 1
#         image = image_response.response[0]
#         assert image.image_data is not None
#         assert len(image.image_data) > 100  # Base64 data should be substantial
#         print(f"Generated {shape.value} image (base64, {len(image.image_data)} chars)")


# def test_image_generation_response_format() -> None:
#     """Test that image generation returns data in at least one format (URL or base64)."""
#     api_key = os.getenv("OPENAI_API_KEY")
#     if not api_key:
#         pytest.skip("OPENAI_API_KEY environment variable not set")

#     tool = ImageGenerationTool(
#         tool_id=0,
#         api_key=api_key,
#         api_base=None,
#         api_version=None,
#         model="dall-e-3",
#         num_imgs=1,
#     )

#     responses = list(tool.run(prompt="A simple blue circle"))

#     # Find the image response
#     image_response = None
#     for response in responses:
#         if response.id == IMAGE_GENERATION_RESPONSE_ID:
#             image_response = response
#             break

#     assert image_response is not None
#     assert len(image_response.response) == 1
#     image = image_response.response[0]
#     # Should always have base64 data
#     assert image.image_data is not None
#     assert len(image.image_data) > 100  # Base64 data should be substantial


# if __name__ == "__main__":
#     # Run with: python -m pytest tests/external_dependency_unit/tools/test_image_generation_tool.py -v
#     pytest.main([__file__, "-v"])


================================================
FILE: backend/tests/external_dependency_unit/tools/test_mcp_passthrough_oauth.py
================================================
"""
Test suite for MCP Pass-Through OAuth (PT_OAUTH) integration.

Tests the pass-through OAuth flow where Onyx forwards the user's login OAuth token
to an MCP server for authentication.

This test:
1. Creates a test user with an OAuthAccount (simulating Google OAuth login)
2. Creates an MCP server with PT_OAUTH auth type
3. Creates MCP tools for that server
4. Verifies the user's OAuth token is correctly passed to MCPTool

All external HTTP calls are mocked, but Postgres and Redis are running.
"""

import queue
from typing import Any
from unittest.mock import patch
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.chat.emitter import Emitter
from onyx.db.enums import MCPAuthenticationPerformer
from onyx.db.enums import MCPAuthenticationType
from onyx.db.enums import MCPTransport
from onyx.db.mcp import create_mcp_server__no_commit
from onyx.db.models import OAuthAccount
from onyx.db.models import Persona
from onyx.db.models import Tool
from onyx.db.models import User
from onyx.llm.factory import get_default_llm
from onyx.server.query_and_chat.placement import Placement
from onyx.tools.models import CustomToolCallSummary
from onyx.tools.tool_constructor import construct_tools
from onyx.tools.tool_constructor import SearchToolConfig
from onyx.tools.tool_implementations.mcp.mcp_tool import MCPTool
from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider
from tests.external_dependency_unit.conftest import create_test_user


def _create_test_persona_with_mcp_tool(
    db_session: Session, user: User, tools: list[Tool]
) -> Persona:
    """Helper to create a test persona with MCP tools"""
    persona = Persona(
        name=f"Test MCP Persona {uuid4().hex[:8]}",
        description="Test persona with MCP tools",
        system_prompt="You are a helpful assistant",
        task_prompt="Answer the user's question",
        tools=tools,
        document_sets=[],
        users=[user],
        groups=[],
        is_listed=True,
        is_public=True,
        display_priority=None,
        starter_messages=None,
        deleted=False,
    )
    db_session.add(persona)
    db_session.commit()
    db_session.refresh(persona)
    return persona


class TestMCPPassThroughOAuth:
    """Tests for MCP Pass-Through OAuth (PT_OAUTH) flow"""

    @pytest.fixture(autouse=True)
    def setup_llm_provider(self, db_session: Session) -> None:
        """Ensure default LLM provider is set up for each test."""
        ensure_default_llm_provider(db_session)

    def test_pt_oauth_passes_user_login_token(self, db_session: Session) -> None:
        """
        Test that PT_OAUTH correctly passes the user's login OAuth token to MCPTool.

        This simulates a user who logged into Onyx with Google OAuth and is using
        an MCP server that requires their Google token for authentication.
        """
        # Create user with login OAuth token (simulating Google OAuth login)
        user = create_test_user(db_session, "pt_oauth_user")
        user_oauth_token = "google_oauth_token_abc123"

        oauth_account = OAuthAccount(
            user_id=user.id,
            oauth_name="google",
            account_id="google_user_12345",
            account_email=user.email,
            access_token=user_oauth_token,
            refresh_token="google_refresh_token",
        )
        db_session.add(oauth_account)
        db_session.commit()
        # Refresh user to load oauth_accounts relationship
        db_session.refresh(user)

        # Create MCP server with PT_OAUTH auth type
        mcp_server = create_mcp_server__no_commit(
            owner_email=user.email,
            name=f"PT_OAUTH Test Server {uuid4().hex[:8]}",
            description="MCP server for pass-through OAuth testing",
            server_url="http://test-mcp-server.example.com/mcp",
            auth_type=MCPAuthenticationType.PT_OAUTH,
            transport=MCPTransport.STREAMABLE_HTTP,
            auth_performer=MCPAuthenticationPerformer.ADMIN,  # Not used for PT_OAUTH
            db_session=db_session,
        )
        db_session.commit()

        # Create MCP tool associated with this server
        mcp_tool_db = Tool(
            name="test_mcp_tool",
            display_name="Test MCP Tool",
            description="Test MCP tool for PT_OAUTH",
            mcp_server_id=mcp_server.id,
            mcp_input_schema={
                "type": "object",
                "properties": {
                    "message": {"type": "string", "description": "Test message"}
                },
            },
            user_id=user.id,
        )
        db_session.add(mcp_tool_db)
        db_session.commit()
        db_session.refresh(mcp_tool_db)

        # Create persona with the MCP tool
        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])
        llm = get_default_llm()

        # Construct tools
        search_tool_config = SearchToolConfig()

        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
            search_tool_config=search_tool_config,
        )

        # Verify MCP tool was constructed
        assert mcp_tool_db.id in tool_dict
        constructed_tools = tool_dict[mcp_tool_db.id]
        assert len(constructed_tools) == 1
        mcp_tool = constructed_tools[0]
        assert isinstance(mcp_tool, MCPTool)

        # Verify the user's OAuth token was passed to the MCPTool
        assert mcp_tool._user_oauth_token == user_oauth_token

    def test_pt_oauth_without_user_oauth_account(self, db_session: Session) -> None:
        """
        Test PT_OAUTH behavior when user doesn't have an OAuth account.

        The user logged in with basic auth (no OAuth token), so the MCP tool
        should have no OAuth token to pass through.
        """
        # Create user WITHOUT OAuth account (basic auth login)
        user = create_test_user(db_session, "basic_auth_user")
        # No OAuthAccount created

        # Create MCP server with PT_OAUTH auth type
        mcp_server = create_mcp_server__no_commit(
            owner_email=user.email,
            name=f"PT_OAUTH No Token Server {uuid4().hex[:8]}",
            description="MCP server for testing missing OAuth token",
            server_url="http://test-mcp-server.example.com/mcp",
            auth_type=MCPAuthenticationType.PT_OAUTH,
            transport=MCPTransport.STREAMABLE_HTTP,
            auth_performer=MCPAuthenticationPerformer.ADMIN,
            db_session=db_session,
        )
        db_session.commit()

        # Create MCP tool
        mcp_tool_db = Tool(
            name="test_mcp_tool_no_token",
            display_name="Test MCP Tool No Token",
            description="Test MCP tool without OAuth token",
            mcp_server_id=mcp_server.id,
            mcp_input_schema={
                "type": "object",
                "properties": {"query": {"type": "string"}},
            },
            user_id=user.id,
        )
        db_session.add(mcp_tool_db)
        db_session.commit()
        db_session.refresh(mcp_tool_db)

        # Create persona
        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])
        llm = get_default_llm()

        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
        )

        # Verify MCP tool was constructed
        assert mcp_tool_db.id in tool_dict
        constructed_tools = tool_dict[mcp_tool_db.id]
        assert len(constructed_tools) == 1
        mcp_tool = constructed_tools[0]
        assert isinstance(mcp_tool, MCPTool)

        # Verify NO OAuth token was passed (user has no OAuth account)
        assert mcp_tool._user_oauth_token is None

    def test_pt_oauth_vs_api_token_auth(self, db_session: Session) -> None:
        """
        Test that PT_OAUTH and API_TOKEN auth types behave differently.

        PT_OAUTH should use the user's login token, while API_TOKEN should
        NOT use the user's login token (it uses the connection config instead).
        """
        # Create user with OAuth account
        user = create_test_user(db_session, "auth_type_test_user")
        user_oauth_token = "user_login_token_xyz789"

        oauth_account = OAuthAccount(
            user_id=user.id,
            oauth_name="google",
            account_id="google_user_xyz",
            account_email=user.email,
            access_token=user_oauth_token,
            refresh_token="",
        )
        db_session.add(oauth_account)
        db_session.commit()
        db_session.refresh(user)

        # Create MCP server with API_TOKEN auth type (not PT_OAUTH)
        mcp_server = create_mcp_server__no_commit(
            owner_email=user.email,
            name=f"API Token Server {uuid4().hex[:8]}",
            description="MCP server with API token auth",
            server_url="http://api-token-server.example.com/mcp",
            auth_type=MCPAuthenticationType.API_TOKEN,  # Not PT_OAUTH
            transport=MCPTransport.STREAMABLE_HTTP,
            auth_performer=MCPAuthenticationPerformer.ADMIN,
            db_session=db_session,
        )
        db_session.commit()

        # Create MCP tool
        mcp_tool_db = Tool(
            name="api_token_tool",
            display_name="API Token Tool",
            description="Tool with API token auth",
            mcp_server_id=mcp_server.id,
            mcp_input_schema={
                "type": "object",
                "properties": {"data": {"type": "string"}},
            },
            user_id=user.id,
        )
        db_session.add(mcp_tool_db)
        db_session.commit()
        db_session.refresh(mcp_tool_db)

        # Create persona
        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])
        llm = get_default_llm()

        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
        )
        # Verify MCP tool was constructed
        assert mcp_tool_db.id in tool_dict
        constructed_tools = tool_dict[mcp_tool_db.id]
        assert len(constructed_tools) == 1
        mcp_tool = constructed_tools[0]
        assert isinstance(mcp_tool, MCPTool)

        # Verify the user's OAuth token was NOT passed (API_TOKEN auth type)
        # API_TOKEN auth should use connection config, not user's login token
        assert mcp_tool._user_oauth_token is None

    def test_mcp_tool_run_sets_authorization_header_for_pt_oauth(
        self, db_session: Session
    ) -> None:
        """
        Test that MCPTool.run() correctly sets the Authorization header
        when PT_OAUTH is configured.
        """
        # Create user with OAuth token
        user = create_test_user(db_session, "pt_oauth_header_user")
        user_oauth_token = "bearer_token_for_mcp_server"

        oauth_account = OAuthAccount(
            user_id=user.id,
            oauth_name="google",
            account_id="google_header_user",
            account_email=user.email,
            access_token=user_oauth_token,
            refresh_token="",
        )
        db_session.add(oauth_account)
        db_session.commit()
        db_session.refresh(user)

        # Create MCP server with PT_OAUTH
        mcp_server = create_mcp_server__no_commit(
            owner_email=user.email,
            name=f"Header Test Server {uuid4().hex[:8]}",
            description="Server for testing Authorization header",
            server_url="http://header-test-server.example.com/mcp",
            auth_type=MCPAuthenticationType.PT_OAUTH,
            transport=MCPTransport.STREAMABLE_HTTP,
            auth_performer=MCPAuthenticationPerformer.ADMIN,
            db_session=db_session,
        )
        db_session.commit()

        # Create MCP tool
        mcp_tool_db = Tool(
            name="header_test_tool",
            display_name="Header Test Tool",
            description="Tool to test Authorization header",
            mcp_server_id=mcp_server.id,
            mcp_input_schema={
                "type": "object",
                "properties": {"input": {"type": "string"}},
            },
            user_id=user.id,
        )
        db_session.add(mcp_tool_db)
        db_session.commit()
        db_session.refresh(mcp_tool_db)

        # Create persona
        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])
        llm = get_default_llm()

        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
        )

        # Get the constructed MCPTool
        mcp_tool = tool_dict[mcp_tool_db.id][0]
        assert isinstance(mcp_tool, MCPTool)

        # Mock the call_mcp_tool function to capture the headers
        captured_headers: dict[str, str] = {}

        mocked_response = {"result": "mocked_response"}

        def mock_call_mcp_tool(
            server_url: str,  # noqa: ARG001
            tool_name: str,  # noqa: ARG001
            arguments: dict[str, Any],  # noqa: ARG001
            connection_headers: dict[str, str],
            transport: MCPTransport,  # noqa: ARG001
            auth: Any = None,  # noqa: ARG001
        ) -> dict[str, Any]:
            captured_headers.update(connection_headers)
            return mocked_response

        with patch(
            "onyx.tools.tool_implementations.mcp.mcp_tool.call_mcp_tool",
            side_effect=mock_call_mcp_tool,
        ):
            # Run the tool
            response = mcp_tool.run(
                placement=Placement(turn_index=0, tab_index=0),
                override_kwargs=None,
                input="test",
            )
            print(response.rich_response)
            assert isinstance(response.rich_response, CustomToolCallSummary)
            print(response.rich_response.tool_result)
            assert response.rich_response.tool_result["tool_result"] == mocked_response

        # Verify Authorization header was set with the user's OAuth token
        assert "Authorization" in captured_headers
        assert captured_headers["Authorization"] == f"Bearer {user_oauth_token}"

    def test_pt_oauth_works_with_oidc_provider(self, db_session: Session) -> None:
        """
        Test that PT_OAUTH works correctly when user logged in via OIDC (not Google).

        This is important because OIDC providers (Okta, Auth0, Keycloak, etc.)
        use oauth_name='openid' while Google uses oauth_name='google'.
        The PT_OAUTH code should work with any OAuth provider.
        """
        # Create user with OIDC OAuth token (simulating Okta/Auth0/Keycloak login)
        user = create_test_user(db_session, "oidc_user")
        # Use a random test token (not a real JWT to avoid pre-commit false positives)
        oidc_access_token = "oidc_test_token_abc123_not_a_real_jwt_xyz789"

        # OIDC providers use oauth_name='openid' by default
        oauth_account = OAuthAccount(
            user_id=user.id,
            oauth_name="openid",  # This is the key difference from Google OAuth
            account_id="oidc_user_sub_12345",
            account_email=user.email,
            access_token=oidc_access_token,
            refresh_token="oidc_refresh_token",
        )
        db_session.add(oauth_account)
        db_session.commit()
        db_session.refresh(user)

        # Create MCP server with PT_OAUTH auth type
        mcp_server = create_mcp_server__no_commit(
            owner_email=user.email,
            name=f"PT_OAUTH OIDC Server {uuid4().hex[:8]}",
            description="MCP server for OIDC pass-through OAuth testing",
            server_url="http://oidc-mcp-server.example.com/mcp",
            auth_type=MCPAuthenticationType.PT_OAUTH,
            transport=MCPTransport.STREAMABLE_HTTP,
            auth_performer=MCPAuthenticationPerformer.ADMIN,
            db_session=db_session,
        )
        db_session.commit()

        # Create MCP tool
        mcp_tool_db = Tool(
            name="oidc_mcp_tool",
            display_name="OIDC MCP Tool",
            description="Test MCP tool for OIDC PT_OAUTH",
            mcp_server_id=mcp_server.id,
            mcp_input_schema={
                "type": "object",
                "properties": {"query": {"type": "string"}},
            },
            user_id=user.id,
        )
        db_session.add(mcp_tool_db)
        db_session.commit()
        db_session.refresh(mcp_tool_db)

        # Create persona
        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])
        llm = get_default_llm()

        # Construct tools
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
        )
        # Verify MCP tool was constructed
        assert mcp_tool_db.id in tool_dict
        constructed_tools = tool_dict[mcp_tool_db.id]
        assert len(constructed_tools) == 1
        mcp_tool = constructed_tools[0]
        assert isinstance(mcp_tool, MCPTool)

        # Verify the OIDC token was passed to the MCPTool
        # (code should work identically for Google OAuth and OIDC)
        assert mcp_tool._user_oauth_token == oidc_access_token

    def test_pt_oauth_uses_first_oauth_account(self, db_session: Session) -> None:
        """
        Test that PT_OAUTH uses the first OAuth account when user has multiple.

        Users might have OAuth accounts from multiple providers (unlikely but possible).
        The code should consistently use the first one.
        """
        user = create_test_user(db_session, "multi_oauth_user")
        first_token = "first_oauth_token_123"
        second_token = "second_oauth_token_456"

        # Add first OAuth account (Google)
        oauth_account_1 = OAuthAccount(
            user_id=user.id,
            oauth_name="google",
            account_id="google_user_123",
            account_email=user.email,
            access_token=first_token,
            refresh_token="",
        )
        db_session.add(oauth_account_1)
        db_session.commit()

        # Add second OAuth account (OIDC)
        oauth_account_2 = OAuthAccount(
            user_id=user.id,
            oauth_name="openid",
            account_id="oidc_user_456",
            account_email=user.email,
            access_token=second_token,
            refresh_token="",
        )
        db_session.add(oauth_account_2)
        db_session.commit()
        db_session.refresh(user)

        # Create MCP server and tool
        mcp_server = create_mcp_server__no_commit(
            owner_email=user.email,
            name=f"Multi OAuth Server {uuid4().hex[:8]}",
            description="MCP server for multi-OAuth testing",
            server_url="http://multi-oauth-server.example.com/mcp",
            auth_type=MCPAuthenticationType.PT_OAUTH,
            transport=MCPTransport.STREAMABLE_HTTP,
            auth_performer=MCPAuthenticationPerformer.ADMIN,
            db_session=db_session,
        )
        db_session.commit()

        mcp_tool_db = Tool(
            name="multi_oauth_tool",
            display_name="Multi OAuth Tool",
            description="Test tool",
            mcp_server_id=mcp_server.id,
            mcp_input_schema={"type": "object", "properties": {}},
            user_id=user.id,
        )
        db_session.add(mcp_tool_db)
        db_session.commit()
        db_session.refresh(mcp_tool_db)

        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])
        llm = get_default_llm()

        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
        )

        mcp_tool = tool_dict[mcp_tool_db.id][0]
        assert isinstance(mcp_tool, MCPTool)

        # Should use the first OAuth account's token
        assert mcp_tool._user_oauth_token == first_token


================================================
FILE: backend/tests/external_dependency_unit/tools/test_memory_tool_integration.py
================================================
"""Tests for MemoryTool integration: registration, construction, and DB persistence."""

import pytest
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.db.memory import add_memory
from onyx.db.memory import get_memories
from onyx.db.memory import MAX_MEMORIES_PER_USER
from onyx.db.memory import update_memory_at_index
from onyx.db.models import Memory
from onyx.db.models import User
from onyx.tools.tool_implementations.memory.models import MemoryToolResponse
from tests.external_dependency_unit.conftest import create_test_user


@pytest.fixture()
def test_user(db_session: Session):  # type: ignore
    """Create a test user with use_memories enabled."""
    user = create_test_user(db_session, "memory_test")
    user.use_memories = True
    db_session.commit()
    db_session.refresh(user)
    return user


@pytest.fixture()
def test_user_no_memories(db_session: Session):  # type: ignore
    """Create a test user with use_memories disabled."""
    user = create_test_user(db_session, "memory_test_off")
    user.use_memories = False
    db_session.commit()
    db_session.refresh(user)
    return user


class TestAddMemory:
    def test_add_memory_creates_row(self, db_session: Session, test_user: User) -> None:
        """Verify that add_memory inserts a new Memory row."""
        user_id = test_user.id
        memory = add_memory(
            user_id=user_id,
            memory_text="User prefers dark mode",
            db_session=db_session,
        )

        assert memory.id is not None
        assert memory.user_id == user_id
        assert memory.memory_text == "User prefers dark mode"

        # Verify it persists
        fetched = db_session.get(Memory, memory.id)
        assert fetched is not None
        assert fetched.memory_text == "User prefers dark mode"

    def test_add_multiple_memories(self, db_session: Session, test_user: User) -> None:
        """Verify that multiple memories can be added for the same user."""
        user_id = test_user.id
        m1 = add_memory(
            user_id=user_id,
            memory_text="Favorite color is blue",
            db_session=db_session,
        )
        m2 = add_memory(
            user_id=user_id,
            memory_text="Works in engineering",
            db_session=db_session,
        )

        assert m1.id != m2.id
        assert m1.memory_text == "Favorite color is blue"
        assert m2.memory_text == "Works in engineering"


class TestUpdateMemoryAtIndex:
    def test_update_memory_at_valid_index(
        self, db_session: Session, test_user: User
    ) -> None:
        """Verify that update_memory_at_index updates the correct row."""
        user_id = test_user.id
        add_memory(user_id=user_id, memory_text="Memory 0", db_session=db_session)
        add_memory(user_id=user_id, memory_text="Memory 1", db_session=db_session)
        add_memory(user_id=user_id, memory_text="Memory 2", db_session=db_session)

        updated = update_memory_at_index(
            user_id=user_id,
            index=1,
            new_text="Updated Memory 1",
            db_session=db_session,
        )

        assert updated is not None
        assert updated.memory_text == "Updated Memory 1"

    def test_update_memory_at_out_of_range_index(
        self, db_session: Session, test_user: User
    ) -> None:
        """Verify that out-of-range index returns None."""
        user_id = test_user.id
        add_memory(user_id=user_id, memory_text="Only memory", db_session=db_session)

        result = update_memory_at_index(
            user_id=user_id,
            index=5,
            new_text="Should not update",
            db_session=db_session,
        )

        assert result is None

    def test_update_memory_at_negative_index(
        self, db_session: Session, test_user: User
    ) -> None:
        """Verify that negative index returns None."""
        user_id = test_user.id
        add_memory(user_id=user_id, memory_text="Only memory", db_session=db_session)

        result = update_memory_at_index(
            user_id=user_id,
            index=-1,
            new_text="Should not update",
            db_session=db_session,
        )

        assert result is None


class TestMemoryToolResponse:
    def test_response_with_add(self) -> None:
        """Verify MemoryToolResponse correctly carries add (index_to_replace=None)."""
        response = MemoryToolResponse(
            memory_text="User likes Python",
            index_to_replace=None,
        )
        assert response.memory_text == "User likes Python"
        assert response.index_to_replace is None

    def test_response_with_update(self) -> None:
        """Verify MemoryToolResponse correctly carries update (index_to_replace=int)."""
        response = MemoryToolResponse(
            memory_text="User likes TypeScript",
            index_to_replace=2,
        )
        assert response.memory_text == "User likes TypeScript"
        assert response.index_to_replace == 2


class TestMemoryCap:
    def test_add_memory_evicts_oldest_when_at_cap(
        self, db_session: Session, test_user: User
    ) -> None:
        """When the user has MAX_MEMORIES_PER_USER memories, adding a new one
        should delete the oldest (lowest id) and keep the total at the cap."""
        user_id = test_user.id

        # Fill up to the cap
        for i in range(MAX_MEMORIES_PER_USER):
            add_memory(
                user_id=user_id,
                memory_text=f"Memory {i}",
                db_session=db_session,
            )

        rows_before = db_session.scalars(
            Memory.__table__.select().where(Memory.user_id == user_id)
        ).all()
        assert len(rows_before) == MAX_MEMORIES_PER_USER

        # Add one more — should evict the oldest
        new_memory = add_memory(
            user_id=user_id,
            memory_text="New memory after cap",
            db_session=db_session,
        )

        rows_after = db_session.scalars(
            select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())
        ).all()

        assert len(rows_after) == MAX_MEMORIES_PER_USER
        # Oldest ("Memory 0") should be gone; "Memory 1" is now the oldest
        assert rows_after[0].memory_text == "Memory 1"
        # Newest should be the one we just added
        assert rows_after[-1].id == new_memory.id
        assert rows_after[-1].memory_text == "New memory after cap"


class TestGetMemoriesWithUserId:
    def test_get_memories_populates_user_id(
        self, db_session: Session, test_user: User
    ) -> None:
        """Verify that get_memories populates user_id on the returned context."""
        context = get_memories(test_user, db_session)
        assert context.user_id == test_user.id

    def test_get_memories_disabled_still_populates_user_id(
        self, db_session: Session, test_user_no_memories: User
    ) -> None:
        """Verify that get_memories with use_memories=False still returns a
        fully populated context (user_id, user_info, memories). The
        use_memories flag only controls whether memories are injected into
        the system prompt, not whether the context is fetched."""
        # Add a memory for this user so we can verify it's fetched
        add_memory(
            user_id=test_user_no_memories.id,
            memory_text="Should still be fetched",
            db_session=db_session,
        )

        context = get_memories(test_user_no_memories, db_session)
        assert context.user_id == test_user_no_memories.id
        assert context.user_info.email == test_user_no_memories.email
        assert len(context.memories) == 1
        assert context.memories[0] == "Should still be fetched"

    def test_get_memories_disabled_persistence_works(
        self, db_session: Session, test_user_no_memories: User
    ) -> None:
        """Verify that add_memory and update_memory_at_index work correctly
        when use_memories=False, since the memory tool should still persist."""
        user_id = test_user_no_memories.id

        # Add a memory
        memory = add_memory(
            user_id=user_id,
            memory_text="Memory with use_memories off",
            db_session=db_session,
        )
        assert memory.memory_text == "Memory with use_memories off"

        # Update that memory
        updated = update_memory_at_index(
            user_id=user_id,
            index=0,
            new_text="Updated memory with use_memories off",
            db_session=db_session,
        )
        assert updated is not None
        assert updated.memory_text == "Updated memory with use_memories off"

        # Verify get_memories returns the updated memory
        context = get_memories(test_user_no_memories, db_session)
        assert len(context.memories) == 1
        assert context.memories[0] == "Updated memory with use_memories off"


================================================
FILE: backend/tests/external_dependency_unit/tools/test_oauth_config_crud.py
================================================
"""
Test suite for OAuth Config CRUD operations.

Tests the basic CRUD operations for OAuth configurations and user tokens,
including creation, retrieval, updates, deletion, and token management.
"""

from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.db.models import OAuthConfig
from onyx.db.models import Tool
from onyx.db.oauth_config import create_oauth_config
from onyx.db.oauth_config import delete_oauth_config
from onyx.db.oauth_config import delete_user_oauth_token
from onyx.db.oauth_config import get_oauth_config
from onyx.db.oauth_config import get_oauth_configs
from onyx.db.oauth_config import get_tools_by_oauth_config
from onyx.db.oauth_config import get_user_oauth_token
from onyx.db.oauth_config import update_oauth_config
from onyx.db.oauth_config import upsert_user_oauth_token
from onyx.db.tools import delete_tool__no_commit
from onyx.db.tools import update_tool
from tests.external_dependency_unit.conftest import create_test_user


def _create_test_oauth_config(
    db_session: Session,
    name: str | None = None,
) -> OAuthConfig:
    """Helper to create a test OAuth config with unique name"""
    unique_name = name or f"Test OAuth Config {uuid4().hex[:8]}"
    return create_oauth_config(
        name=unique_name,
        authorization_url="https://github.com/login/oauth/authorize",
        token_url="https://github.com/login/oauth/access_token",
        client_id="test_client_id",
        client_secret="test_client_secret",
        scopes=["repo", "user"],
        additional_params={"test_param": "test_value"},
        db_session=db_session,
    )


def _create_test_tool_with_oauth(
    db_session: Session, oauth_config: OAuthConfig
) -> Tool:
    """Helper to create a test tool with OAuth config"""
    user = create_test_user(db_session, "tool_owner")
    tool = Tool(
        name="Test Tool",
        description="Test tool with OAuth",
        openapi_schema={"openapi": "3.0.0"},
        user_id=user.id,
        oauth_config_id=oauth_config.id,
    )
    db_session.add(tool)
    db_session.commit()
    db_session.refresh(tool)
    return tool


class TestOAuthConfigCRUD:
    """Tests for OAuth configuration CRUD operations"""

    def test_create_oauth_config(self, db_session: Session) -> None:
        """Test creating a new OAuth configuration"""
        oauth_config = _create_test_oauth_config(db_session)

        assert oauth_config.id is not None
        assert oauth_config.name.startswith("Test OAuth Config")
        assert (
            oauth_config.authorization_url == "https://github.com/login/oauth/authorize"
        )
        assert oauth_config.token_url == "https://github.com/login/oauth/access_token"
        assert oauth_config.scopes == ["repo", "user"]
        assert oauth_config.additional_params == {"test_param": "test_value"}
        assert oauth_config.created_at is not None
        assert oauth_config.updated_at is not None

        # Verify encrypted fields are stored (we can't decrypt in tests, but we can check they exist)
        assert oauth_config.client_id is not None
        assert oauth_config.client_secret is not None

    def test_get_oauth_config(self, db_session: Session) -> None:
        """Test retrieving an OAuth config by ID"""
        created_config = _create_test_oauth_config(db_session)

        retrieved_config = get_oauth_config(created_config.id, db_session)

        assert retrieved_config is not None
        assert retrieved_config.id == created_config.id
        assert retrieved_config.name == created_config.name

    def test_get_oauth_config_not_found(self, db_session: Session) -> None:
        """Test retrieving a non-existent OAuth config returns None"""
        config = get_oauth_config(99999, db_session)
        assert config is None

    def test_get_oauth_configs(self, db_session: Session) -> None:
        """Test retrieving all OAuth configurations"""
        # Create multiple configs with unique names
        config1 = _create_test_oauth_config(db_session)
        config2 = _create_test_oauth_config(db_session)

        configs = get_oauth_configs(db_session)

        assert len(configs) >= 2
        config_ids = [c.id for c in configs]
        assert config1.id in config_ids
        assert config2.id in config_ids

    def test_update_oauth_config(self, db_session: Session) -> None:
        """Test updating an OAuth configuration"""
        oauth_config = _create_test_oauth_config(db_session)
        original_name = oauth_config.name

        # Update the config with unique name
        new_name = f"Updated GitHub OAuth {uuid4().hex[:8]}"
        updated_config = update_oauth_config(
            oauth_config.id,
            db_session,
            name=new_name,
            scopes=["repo", "user", "admin"],
        )

        assert updated_config.id == oauth_config.id
        assert updated_config.name == new_name
        assert updated_config.name != original_name
        assert updated_config.scopes == ["repo", "user", "admin"]

    def test_update_oauth_config_preserves_secrets(self, db_session: Session) -> None:
        """Test that updating config without providing secrets preserves existing values"""
        oauth_config = _create_test_oauth_config(db_session)
        original_client_id = oauth_config.client_id
        original_client_secret = oauth_config.client_secret

        # Update config without providing client_id or client_secret
        new_name = f"Updated Name {uuid4().hex[:8]}"
        updated_config = update_oauth_config(
            oauth_config.id,
            db_session,
            name=new_name,
            client_id=None,
            client_secret=None,
        )

        # Secrets should be preserved
        assert updated_config.client_id is not None
        assert original_client_id is not None
        assert updated_config.client_id.get_value(
            apply_mask=False
        ) == original_client_id.get_value(apply_mask=False)
        assert updated_config.client_secret is not None
        assert original_client_secret is not None
        assert updated_config.client_secret.get_value(
            apply_mask=False
        ) == original_client_secret.get_value(apply_mask=False)
        # But name should be updated
        assert updated_config.name == new_name

    def test_update_oauth_config_not_found(self, db_session: Session) -> None:
        """Test updating a non-existent OAuth config raises error"""
        with pytest.raises(
            ValueError, match="OAuth config with id 99999 does not exist"
        ):
            update_oauth_config(99999, db_session, name="New Name")

    def test_update_oauth_config_clear_client_id(self, db_session: Session) -> None:
        """Test clearing client_id while preserving client_secret"""
        oauth_config = _create_test_oauth_config(db_session)
        original_client_secret = oauth_config.client_secret

        # Clear client_id
        updated_config = update_oauth_config(
            oauth_config.id,
            db_session,
            clear_client_id=True,
        )

        # client_id should be cleared (empty string)
        assert updated_config.client_id is not None
        assert updated_config.client_id.get_value(apply_mask=False) == ""
        # client_secret should be preserved
        assert updated_config.client_secret is not None
        assert original_client_secret is not None
        assert updated_config.client_secret.get_value(
            apply_mask=False
        ) == original_client_secret.get_value(apply_mask=False)

    def test_update_oauth_config_clear_client_secret(self, db_session: Session) -> None:
        """Test clearing client_secret while preserving client_id"""
        oauth_config = _create_test_oauth_config(db_session)
        original_client_id = oauth_config.client_id

        # Clear client_secret
        updated_config = update_oauth_config(
            oauth_config.id,
            db_session,
            clear_client_secret=True,
        )

        # client_secret should be cleared (empty string)
        assert updated_config.client_secret is not None
        assert updated_config.client_secret.get_value(apply_mask=False) == ""
        # client_id should be preserved
        assert updated_config.client_id is not None
        assert original_client_id is not None
        assert updated_config.client_id.get_value(
            apply_mask=False
        ) == original_client_id.get_value(apply_mask=False)

    def test_update_oauth_config_clear_both_secrets(self, db_session: Session) -> None:
        """Test clearing both client_id and client_secret"""
        oauth_config = _create_test_oauth_config(db_session)

        # Clear both secrets
        updated_config = update_oauth_config(
            oauth_config.id,
            db_session,
            clear_client_id=True,
            clear_client_secret=True,
        )

        # Both should be cleared (empty strings)
        assert updated_config.client_id is not None
        assert updated_config.client_id.get_value(apply_mask=False) == ""
        assert updated_config.client_secret is not None
        assert updated_config.client_secret.get_value(apply_mask=False) == ""

    def test_update_oauth_config_authorization_url(self, db_session: Session) -> None:
        """Test updating authorization_url"""
        oauth_config = _create_test_oauth_config(db_session)
        new_auth_url = "https://example.com/oauth/authorize"

        updated_config = update_oauth_config(
            oauth_config.id,
            db_session,
            authorization_url=new_auth_url,
        )

        assert updated_config.authorization_url == new_auth_url

    def test_update_oauth_config_token_url(self, db_session: Session) -> None:
        """Test updating token_url"""
        oauth_config = _create_test_oauth_config(db_session)
        new_token_url = "https://example.com/oauth/token"

        updated_config = update_oauth_config(
            oauth_config.id,
            db_session,
            token_url=new_token_url,
        )

        assert updated_config.token_url == new_token_url

    def test_update_oauth_config_additional_params(self, db_session: Session) -> None:
        """Test updating additional_params"""
        oauth_config = _create_test_oauth_config(db_session)
        new_params = {"access_type": "offline", "prompt": "consent"}

        updated_config = update_oauth_config(
            oauth_config.id,
            db_session,
            additional_params=new_params,
        )

        assert updated_config.additional_params == new_params

    def test_update_oauth_config_multiple_fields(self, db_session: Session) -> None:
        """Test updating multiple fields at once"""
        oauth_config = _create_test_oauth_config(db_session)
        new_name = f"Updated Config {uuid4().hex[:8]}"
        new_auth_url = "https://example.com/oauth/authorize"
        new_token_url = "https://example.com/oauth/token"
        new_scopes = ["read", "write", "admin"]
        new_params = {"access_type": "offline"}
        new_client_id = "new_client_id"

        updated_config = update_oauth_config(
            oauth_config.id,
            db_session,
            name=new_name,
            authorization_url=new_auth_url,
            token_url=new_token_url,
            scopes=new_scopes,
            additional_params=new_params,
            client_id=new_client_id,
        )

        assert updated_config.name == new_name
        assert updated_config.authorization_url == new_auth_url
        assert updated_config.token_url == new_token_url
        assert updated_config.scopes == new_scopes
        assert updated_config.additional_params == new_params
        assert updated_config.client_id is not None
        assert updated_config.client_id.get_value(apply_mask=False) == new_client_id

    def test_delete_oauth_config(self, db_session: Session) -> None:
        """Test deleting an OAuth configuration"""
        oauth_config = _create_test_oauth_config(db_session)
        config_id = oauth_config.id

        # Delete the config
        delete_oauth_config(config_id, db_session)

        # Verify it's deleted
        deleted_config = get_oauth_config(config_id, db_session)
        assert deleted_config is None

    def test_delete_oauth_config_not_found(self, db_session: Session) -> None:
        """Test deleting a non-existent OAuth config raises error"""
        with pytest.raises(
            ValueError, match="OAuth config with id 99999 does not exist"
        ):
            delete_oauth_config(99999, db_session)

    def test_delete_oauth_config_sets_tool_reference_to_null(
        self, db_session: Session
    ) -> None:
        """Test that deleting OAuth config sets tool's oauth_config_id to NULL"""
        oauth_config = _create_test_oauth_config(db_session)
        tool = _create_test_tool_with_oauth(db_session, oauth_config)

        assert tool.oauth_config_id == oauth_config.id

        # Delete the OAuth config
        delete_oauth_config(oauth_config.id, db_session)

        # Refresh tool from database
        db_session.refresh(tool)

        # Tool should still exist but oauth_config_id should be NULL
        assert tool.oauth_config_id is None

    def test_update_tool_cleans_up_orphaned_oauth_config(
        self, db_session: Session
    ) -> None:
        """Test that changing a tool's oauth_config_id deletes the old config if no other tool uses it."""
        old_config = _create_test_oauth_config(db_session)
        new_config = _create_test_oauth_config(db_session)
        tool = _create_test_tool_with_oauth(db_session, old_config)
        old_config_id = old_config.id

        update_tool(
            tool_id=tool.id,
            name=None,
            description=None,
            openapi_schema=None,
            custom_headers=None,
            user_id=None,
            db_session=db_session,
            passthrough_auth=None,
            oauth_config_id=new_config.id,
        )

        assert tool.oauth_config_id == new_config.id
        assert get_oauth_config(old_config_id, db_session) is None

    def test_delete_tool_cleans_up_orphaned_oauth_config(
        self, db_session: Session
    ) -> None:
        """Test that deleting the last tool referencing an OAuthConfig also deletes the config."""
        config = _create_test_oauth_config(db_session)
        tool = _create_test_tool_with_oauth(db_session, config)
        config_id = config.id

        delete_tool__no_commit(tool.id, db_session)
        db_session.commit()

        assert get_oauth_config(config_id, db_session) is None

    def test_update_tool_preserves_shared_oauth_config(
        self, db_session: Session
    ) -> None:
        """Test that updating one tool's oauth_config_id preserves the config when another tool still uses it."""
        shared_config = _create_test_oauth_config(db_session)
        new_config = _create_test_oauth_config(db_session)
        tool_a = _create_test_tool_with_oauth(db_session, shared_config)
        tool_b = _create_test_tool_with_oauth(db_session, shared_config)
        shared_config_id = shared_config.id

        # Move tool_a to a new config; tool_b still references shared_config
        update_tool(
            tool_id=tool_a.id,
            name=None,
            description=None,
            openapi_schema=None,
            custom_headers=None,
            user_id=None,
            db_session=db_session,
            passthrough_auth=None,
            oauth_config_id=new_config.id,
        )

        assert tool_a.oauth_config_id == new_config.id
        assert tool_b.oauth_config_id == shared_config_id
        assert get_oauth_config(shared_config_id, db_session) is not None

    def test_delete_tool_preserves_shared_oauth_config(
        self, db_session: Session
    ) -> None:
        """Test that deleting one tool preserves the config when another tool still uses it."""
        shared_config = _create_test_oauth_config(db_session)
        tool_a = _create_test_tool_with_oauth(db_session, shared_config)
        tool_b = _create_test_tool_with_oauth(db_session, shared_config)
        shared_config_id = shared_config.id

        delete_tool__no_commit(tool_a.id, db_session)
        db_session.commit()

        assert tool_b.oauth_config_id == shared_config_id
        assert get_oauth_config(shared_config_id, db_session) is not None


class TestOAuthUserTokenCRUD:
    """Tests for OAuth user token CRUD operations"""

    def test_upsert_user_oauth_token_create(self, db_session: Session) -> None:
        """Test creating a new user OAuth token"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        token_data = {
            "access_token": "test_access_token",
            "refresh_token": "test_refresh_token",
            "token_type": "Bearer",
            "expires_at": 1234567890,
        }

        user_token = upsert_user_oauth_token(
            oauth_config.id, user.id, token_data, db_session
        )

        assert user_token.id is not None
        assert user_token.oauth_config_id == oauth_config.id
        assert user_token.user_id == user.id
        assert user_token.token_data is not None
        assert user_token.token_data.get_value(apply_mask=False) == token_data
        assert user_token.created_at is not None
        assert user_token.updated_at is not None

    def test_upsert_user_oauth_token_update(self, db_session: Session) -> None:
        """Test updating an existing user OAuth token"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Create initial token
        initial_token_data = {
            "access_token": "initial_token",
            "expires_at": 1234567890,
        }
        initial_token = upsert_user_oauth_token(
            oauth_config.id, user.id, initial_token_data, db_session
        )
        initial_token_id = initial_token.id

        # Update with new token data
        updated_token_data = {
            "access_token": "updated_token",
            "expires_at": 9876543210,
        }
        updated_token = upsert_user_oauth_token(
            oauth_config.id, user.id, updated_token_data, db_session
        )

        # Should be the same token record (updated, not inserted)
        assert updated_token.id == initial_token_id
        assert updated_token.token_data is not None
        assert (
            updated_token.token_data.get_value(apply_mask=False) == updated_token_data
        )
        assert (
            updated_token.token_data.get_value(apply_mask=False) != initial_token_data
        )

    def test_get_user_oauth_token(self, db_session: Session) -> None:
        """Test retrieving a user's OAuth token"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        token_data = {"access_token": "test_token"}
        created_token = upsert_user_oauth_token(
            oauth_config.id, user.id, token_data, db_session
        )

        retrieved_token = get_user_oauth_token(oauth_config.id, user.id, db_session)

        assert retrieved_token is not None
        assert retrieved_token.id == created_token.id
        assert retrieved_token.token_data is not None
        assert retrieved_token.token_data.get_value(apply_mask=False) == token_data

    def test_get_user_oauth_token_not_found(self, db_session: Session) -> None:
        """Test retrieving a non-existent user token returns None"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        token = get_user_oauth_token(oauth_config.id, user.id, db_session)
        assert token is None

    def test_delete_user_oauth_token(self, db_session: Session) -> None:
        """Test deleting a user's OAuth token"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        token_data = {"access_token": "test_token"}
        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)

        # Delete the token
        delete_user_oauth_token(oauth_config.id, user.id, db_session)

        # Verify it's deleted
        deleted_token = get_user_oauth_token(oauth_config.id, user.id, db_session)
        assert deleted_token is None

    def test_delete_user_oauth_token_not_found(self, db_session: Session) -> None:
        """Test deleting a non-existent user token raises error"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        with pytest.raises(
            ValueError,
            match=f"OAuth token for user {user.id} and config {oauth_config.id} does not exist",
        ):
            delete_user_oauth_token(oauth_config.id, user.id, db_session)

    def test_unique_constraint_on_user_config(self, db_session: Session) -> None:
        """Test that unique constraint prevents duplicate tokens per user per config"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Create first token
        token_data1 = {"access_token": "token1"}
        upsert_user_oauth_token(oauth_config.id, user.id, token_data1, db_session)

        # Try to manually insert a duplicate (should fail at DB level)
        # But upsert should work fine (updates instead of inserting)
        token_data2 = {"access_token": "token2"}
        updated_token = upsert_user_oauth_token(
            oauth_config.id, user.id, token_data2, db_session
        )

        # Should only be one token
        retrieved_token = get_user_oauth_token(oauth_config.id, user.id, db_session)
        assert retrieved_token is not None
        assert retrieved_token.id == updated_token.id
        assert retrieved_token.token_data is not None
        assert retrieved_token.token_data.get_value(apply_mask=False) == token_data2

    def test_cascade_delete_user_tokens_on_config_deletion(
        self, db_session: Session
    ) -> None:
        """Test that deleting OAuth config cascades to user tokens"""
        oauth_config = _create_test_oauth_config(db_session)
        user1 = create_test_user(db_session, "user1")
        user2 = create_test_user(db_session, "user2")

        # Create tokens for both users
        upsert_user_oauth_token(
            oauth_config.id, user1.id, {"access_token": "token1"}, db_session
        )
        upsert_user_oauth_token(
            oauth_config.id, user2.id, {"access_token": "token2"}, db_session
        )

        # Delete the OAuth config
        delete_oauth_config(oauth_config.id, db_session)

        # User tokens should be deleted
        token1 = get_user_oauth_token(oauth_config.id, user1.id, db_session)
        token2 = get_user_oauth_token(oauth_config.id, user2.id, db_session)
        assert token1 is None
        assert token2 is None


class TestOAuthHelperOperations:
    """Tests for OAuth helper operations"""

    def test_get_tools_by_oauth_config(self, db_session: Session) -> None:
        """Test retrieving tools that use a specific OAuth config"""
        oauth_config = _create_test_oauth_config(db_session)

        # Create multiple tools using this config
        tool1 = _create_test_tool_with_oauth(db_session, oauth_config)
        tool2 = _create_test_tool_with_oauth(db_session, oauth_config)

        # Create another tool without OAuth
        user = create_test_user(db_session, "other_user")
        tool3 = Tool(
            name="Tool without OAuth",
            description="No OAuth config",
            openapi_schema={"openapi": "3.0.0"},
            user_id=user.id,
        )
        db_session.add(tool3)
        db_session.commit()

        # Get tools by OAuth config
        tools = get_tools_by_oauth_config(oauth_config.id, db_session)

        assert len(tools) == 2
        tool_ids = [t.id for t in tools]
        assert tool1.id in tool_ids
        assert tool2.id in tool_ids
        assert tool3.id not in tool_ids

    def test_get_tools_by_oauth_config_empty(self, db_session: Session) -> None:
        """Test retrieving tools for config with no associated tools"""
        oauth_config = _create_test_oauth_config(db_session)

        tools = get_tools_by_oauth_config(oauth_config.id, db_session)

        assert len(tools) == 0


================================================
FILE: backend/tests/external_dependency_unit/tools/test_oauth_token_manager.py
================================================
"""
Test suite for OAuthTokenManager.

Tests the OAuth token management functionality including token validation,
refresh, expiration checking, and authorization URL building.
All HTTP requests to external OAuth providers are mocked.
"""

import time
from unittest.mock import Mock
from unittest.mock import patch
from uuid import uuid4

import pytest
from requests import HTTPError
from requests import Response
from sqlalchemy.orm import Session

from onyx.auth.oauth_token_manager import OAuthTokenManager
from onyx.db.models import OAuthConfig
from onyx.db.oauth_config import create_oauth_config
from onyx.db.oauth_config import upsert_user_oauth_token
from onyx.utils.sensitive import SensitiveValue
from tests.external_dependency_unit.conftest import create_test_user


def _create_test_oauth_config(db_session: Session) -> OAuthConfig:
    """Helper to create a test OAuth config"""
    return create_oauth_config(
        name=f"Test OAuth Config {uuid4().hex[:8]}",
        authorization_url="https://github.com/login/oauth/authorize",
        token_url="https://github.com/login/oauth/access_token",
        client_id="test_client_id",
        client_secret="test_client_secret",
        scopes=["repo", "user"],
        additional_params=None,
        db_session=db_session,
    )


class TestOAuthTokenManagerValidation:
    """Tests for token validation and retrieval"""

    def test_get_valid_access_token_with_valid_token(self, db_session: Session) -> None:
        """Test getting a valid access token that hasn't expired"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Create a non-expired token
        future_timestamp = int(time.time()) + 3600  # Expires in 1 hour
        token_data = {
            "access_token": "valid_token",
            "refresh_token": "refresh_token",
            "expires_at": future_timestamp,
        }
        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)

        # Get the token
        manager = OAuthTokenManager(oauth_config, user.id, db_session)
        access_token = manager.get_valid_access_token()

        assert access_token == "valid_token"

    def test_get_valid_access_token_no_token_exists(self, db_session: Session) -> None:
        """Test getting access token when no token exists returns None"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        manager = OAuthTokenManager(oauth_config, user.id, db_session)
        access_token = manager.get_valid_access_token()

        assert access_token is None

    def test_get_valid_access_token_no_expiration(self, db_session: Session) -> None:
        """Test getting access token without expiration data (assumes valid)"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Create token without expiration
        token_data = {
            "access_token": "token_without_expiry",
            "token_type": "Bearer",
        }
        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)

        manager = OAuthTokenManager(oauth_config, user.id, db_session)
        access_token = manager.get_valid_access_token()

        assert access_token == "token_without_expiry"

    @patch("onyx.auth.oauth_token_manager.requests.post")
    def test_get_valid_access_token_with_expired_token_refreshes(
        self, mock_post: Mock, db_session: Session
    ) -> None:
        """Test that expired token triggers automatic refresh"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Create an expired token
        past_timestamp = int(time.time()) - 100  # Expired 100 seconds ago
        token_data = {
            "access_token": "expired_token",
            "refresh_token": "refresh_token",
            "expires_at": past_timestamp,
        }
        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)

        # Mock the refresh token response
        mock_response = Mock(spec=Response)
        mock_response.json.return_value = {
            "access_token": "new_access_token",
            "refresh_token": "new_refresh_token",
            "expires_in": 3600,
        }
        mock_response.raise_for_status = Mock()
        mock_post.return_value = mock_response

        # Get the token (should trigger refresh)
        manager = OAuthTokenManager(oauth_config, user.id, db_session)
        access_token = manager.get_valid_access_token()

        assert access_token == "new_access_token"
        # Verify refresh endpoint was called
        mock_post.assert_called_once()
        call_args = mock_post.call_args
        assert call_args[0][0] == oauth_config.token_url
        assert call_args[1]["data"]["grant_type"] == "refresh_token"
        assert call_args[1]["data"]["refresh_token"] == "refresh_token"

    def test_get_valid_access_token_expired_no_refresh_token(
        self, db_session: Session
    ) -> None:
        """Test that expired token without refresh_token returns None"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Create an expired token without refresh_token
        past_timestamp = int(time.time()) - 100
        token_data = {
            "access_token": "expired_token",
            "expires_at": past_timestamp,
            # No refresh_token
        }
        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)

        manager = OAuthTokenManager(oauth_config, user.id, db_session)
        access_token = manager.get_valid_access_token()

        assert access_token is None

    @patch("onyx.auth.oauth_token_manager.requests.post")
    def test_get_valid_access_token_refresh_fails(
        self, mock_post: Mock, db_session: Session
    ) -> None:
        """Test that failed refresh returns None"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Create an expired token
        past_timestamp = int(time.time()) - 100
        token_data = {
            "access_token": "expired_token",
            "refresh_token": "refresh_token",
            "expires_at": past_timestamp,
        }
        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)

        # Mock the refresh to fail
        mock_post.side_effect = HTTPError("Token refresh failed")

        manager = OAuthTokenManager(oauth_config, user.id, db_session)
        access_token = manager.get_valid_access_token()

        assert access_token is None


class TestOAuthTokenManagerRefresh:
    """Tests for token refresh functionality"""

    @patch("onyx.auth.oauth_token_manager.requests.post")
    def test_refresh_token_success(self, mock_post: Mock, db_session: Session) -> None:
        """Test successful token refresh"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Create initial token
        token_data = {
            "access_token": "old_token",
            "refresh_token": "old_refresh",
            "expires_at": int(time.time()) - 100,
        }
        user_token = upsert_user_oauth_token(
            oauth_config.id, user.id, token_data, db_session
        )

        # Mock successful refresh
        new_expires_in = 3600
        mock_response = Mock(spec=Response)
        mock_response.json.return_value = {
            "access_token": "new_token",
            "refresh_token": "new_refresh",
            "expires_in": new_expires_in,
        }
        mock_response.raise_for_status = Mock()
        mock_post.return_value = mock_response

        # Refresh the token
        manager = OAuthTokenManager(oauth_config, user.id, db_session)
        new_access_token = manager.refresh_token(user_token)

        assert new_access_token == "new_token"

        # Verify token was updated in DB
        db_session.refresh(user_token)
        assert user_token.token_data is not None
        token_data = user_token.token_data.get_value(apply_mask=False)
        assert token_data["access_token"] == "new_token"
        assert token_data["refresh_token"] == "new_refresh"
        assert "expires_at" in token_data

    @patch("onyx.auth.oauth_token_manager.requests.post")
    def test_refresh_token_preserves_refresh_token(
        self, mock_post: Mock, db_session: Session
    ) -> None:
        """Test that refresh preserves old refresh_token if provider doesn't return new one"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Create initial token
        token_data = {
            "access_token": "old_token",
            "refresh_token": "old_refresh",
            "expires_at": int(time.time()) - 100,
        }
        user_token = upsert_user_oauth_token(
            oauth_config.id, user.id, token_data, db_session
        )

        # Mock refresh response WITHOUT refresh_token
        mock_response = Mock(spec=Response)
        mock_response.json.return_value = {
            "access_token": "new_token",
            "expires_in": 3600,
            # No refresh_token returned
        }
        mock_response.raise_for_status = Mock()
        mock_post.return_value = mock_response

        # Refresh the token
        manager = OAuthTokenManager(oauth_config, user.id, db_session)
        manager.refresh_token(user_token)

        # Verify old refresh_token was preserved
        db_session.refresh(user_token)
        assert user_token.token_data is not None
        token_data = user_token.token_data.get_value(apply_mask=False)
        assert token_data["refresh_token"] == "old_refresh"

    @patch("onyx.auth.oauth_token_manager.requests.post")
    def test_refresh_token_http_error(
        self, mock_post: Mock, db_session: Session
    ) -> None:
        """Test that HTTP error during refresh is raised"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        token_data = {
            "access_token": "old_token",
            "refresh_token": "old_refresh",
            "expires_at": int(time.time()) - 100,
        }
        user_token = upsert_user_oauth_token(
            oauth_config.id, user.id, token_data, db_session
        )

        # Mock HTTP error
        mock_response = Mock(spec=Response)
        mock_response.raise_for_status.side_effect = HTTPError("Invalid refresh token")
        mock_post.return_value = mock_response

        manager = OAuthTokenManager(oauth_config, user.id, db_session)

        with pytest.raises(HTTPError):
            manager.refresh_token(user_token)


class TestOAuthTokenManagerExpiration:
    """Tests for token expiration checking"""

    def test_is_token_expired_with_valid_token(self, db_session: Session) -> None:
        """Test that non-expired token is detected as valid"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        manager = OAuthTokenManager(oauth_config, user.id, db_session)

        # Token expires in 2 hours (well beyond 60 second buffer)
        token_data = {"expires_at": int(time.time()) + 7200}

        assert manager.is_token_expired(token_data) is False

    def test_is_token_expired_with_expired_token(self, db_session: Session) -> None:
        """Test that expired token is detected"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        manager = OAuthTokenManager(oauth_config, user.id, db_session)

        # Token expired 1 hour ago
        token_data = {"expires_at": int(time.time()) - 3600}

        assert manager.is_token_expired(token_data) is True

    def test_is_token_expired_with_buffer_zone(self, db_session: Session) -> None:
        """Test that token within 60 second buffer is considered expired"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        manager = OAuthTokenManager(oauth_config, user.id, db_session)

        # Token expires in 30 seconds (within 60 second buffer)
        token_data = {"expires_at": int(time.time()) + 30}

        assert manager.is_token_expired(token_data) is True

    def test_is_token_expired_no_expiration_data(self, db_session: Session) -> None:
        """Test that token without expiration is considered valid"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        manager = OAuthTokenManager(oauth_config, user.id, db_session)

        # Token without expires_at
        token_data = {"access_token": "some_token"}

        assert manager.is_token_expired(token_data) is False


class TestOAuthTokenManagerCodeExchange:
    """Tests for authorization code exchange"""

    @patch("onyx.auth.oauth_token_manager.requests.post")
    def test_exchange_code_for_token_success(
        self, mock_post: Mock, db_session: Session
    ) -> None:
        """Test successful code exchange"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Mock successful token exchange
        mock_response = Mock(spec=Response)
        mock_response.json.return_value = {
            "access_token": "new_access_token",
            "refresh_token": "new_refresh_token",
            "token_type": "Bearer",
            "expires_in": 3600,
            "scope": "repo user",
        }
        mock_response.raise_for_status = Mock()
        mock_post.return_value = mock_response

        manager = OAuthTokenManager(oauth_config, user.id, db_session)
        token_data = manager.exchange_code_for_token(
            code="auth_code_123", redirect_uri="https://example.com/callback"
        )

        assert token_data["access_token"] == "new_access_token"
        assert token_data["refresh_token"] == "new_refresh_token"
        assert "expires_at" in token_data

        # Verify correct parameters were sent
        mock_post.assert_called_once()
        call_args = mock_post.call_args
        assert call_args[0][0] == oauth_config.token_url
        assert call_args[1]["data"]["grant_type"] == "authorization_code"
        assert call_args[1]["data"]["code"] == "auth_code_123"
        assert oauth_config.client_id is not None
        assert oauth_config.client_secret is not None
        assert call_args[1]["data"]["client_id"] == oauth_config.client_id.get_value(
            apply_mask=False
        )
        assert call_args[1]["data"][
            "client_secret"
        ] == oauth_config.client_secret.get_value(apply_mask=False)
        assert call_args[1]["data"]["redirect_uri"] == "https://example.com/callback"

    @patch("onyx.auth.oauth_token_manager.requests.post")
    def test_exchange_code_for_token_http_error(
        self, mock_post: Mock, db_session: Session
    ) -> None:
        """Test that HTTP error during code exchange is raised"""
        oauth_config = _create_test_oauth_config(db_session)
        user = create_test_user(db_session, "oauth_user")

        # Mock HTTP error
        mock_response = Mock(spec=Response)
        mock_response.raise_for_status.side_effect = HTTPError("Invalid code")
        mock_post.return_value = mock_response

        manager = OAuthTokenManager(oauth_config, user.id, db_session)

        with pytest.raises(HTTPError):
            manager.exchange_code_for_token(
                code="invalid_code", redirect_uri="https://example.com/callback"
            )


class TestOAuthTokenManagerURLBuilding:
    """Tests for authorization URL building"""

    def test_build_authorization_url_basic(self, db_session: Session) -> None:
        """Test building basic authorization URL"""
        oauth_config = _create_test_oauth_config(db_session)

        url = OAuthTokenManager.build_authorization_url(
            oauth_config=oauth_config,
            redirect_uri="https://example.com/callback",
            state="random_state_123",
        )

        assert url.startswith(oauth_config.authorization_url)
        assert "client_id=test_client_id" in url
        assert "redirect_uri=https%3A%2F%2Fexample.com%2Fcallback" in url
        assert "response_type=code" in url
        assert "state=random_state_123" in url
        # Check scopes are included
        assert "scope=repo+user" in url

    def test_build_authorization_url_with_additional_params(
        self, db_session: Session
    ) -> None:
        """Test building URL with additional provider-specific parameters"""
        oauth_config = create_oauth_config(
            name=f"Test OAuth {uuid4().hex[:8]}",
            authorization_url="https://accounts.google.com/o/oauth2/v2/auth",
            token_url="https://oauth2.googleapis.com/token",
            client_id="google_client_id",
            client_secret="google_client_secret",
            scopes=["email", "profile"],
            additional_params={"access_type": "offline", "prompt": "consent"},
            db_session=db_session,
        )

        url = OAuthTokenManager.build_authorization_url(
            oauth_config=oauth_config,
            redirect_uri="https://example.com/callback",
            state="state_456",
        )

        assert "access_type=offline" in url
        assert "prompt=consent" in url
        assert "scope=email+profile" in url

    def test_build_authorization_url_no_scopes(self, db_session: Session) -> None:
        """Test building URL when no scopes are configured"""
        oauth_config = create_oauth_config(
            name=f"Test OAuth {uuid4().hex[:8]}",
            authorization_url="https://oauth.example.com/authorize",
            token_url="https://oauth.example.com/token",
            client_id="simple_client_id",
            client_secret="simple_client_secret",
            scopes=None,  # No scopes
            additional_params=None,
            db_session=db_session,
        )

        url = OAuthTokenManager.build_authorization_url(
            oauth_config=oauth_config,
            redirect_uri="https://example.com/callback",
            state="state_789",
        )

        # Should not include scope parameter
        assert "scope=" not in url
        assert "client_id=simple_client_id" in url

    def test_build_authorization_url_with_existing_query_params(
        self, db_session: Session
    ) -> None:
        """Test building URL when authorization_url already has query parameters"""
        oauth_config = create_oauth_config(
            name=f"Test OAuth {uuid4().hex[:8]}",
            authorization_url="https://oauth.example.com/authorize?foo=bar",
            token_url="https://oauth.example.com/token",
            client_id="custom_client_id",
            client_secret="custom_client_secret",
            scopes=["read"],
            additional_params=None,
            db_session=db_session,
        )

        url = OAuthTokenManager.build_authorization_url(
            oauth_config=oauth_config,
            redirect_uri="https://example.com/callback",
            state="state_xyz",
        )

        # Should use & instead of ? since URL already has query params
        assert "foo=bar&" in url or "?foo=bar" in url
        assert "client_id=custom_client_id" in url


class TestUnwrapSensitiveStr:
    """Tests for _unwrap_sensitive_str static method"""

    def test_unwrap_sensitive_str(self) -> None:
        """Test that both SensitiveValue and plain str inputs are handled"""
        # SensitiveValue input
        sensitive = SensitiveValue[str](
            encrypted_bytes=b"test_client_id",
            decrypt_fn=lambda b: b.decode(),
        )
        assert OAuthTokenManager._unwrap_sensitive_str(sensitive) == "test_client_id"

        # Plain str input
        assert OAuthTokenManager._unwrap_sensitive_str("plain_string") == "plain_string"


================================================
FILE: backend/tests/external_dependency_unit/tools/test_oauth_tool_integration.py
================================================
"""
Test suite for OAuth integration in tool_constructor.

Tests the priority logic for OAuth tokens when constructing custom tools:
1. Priority 1: OAuth config (per-tool OAuth)
2. Priority 2: Passthrough auth (user's login OAuth token)

All external HTTP calls are mocked, but Postgres and Redis are running.
"""

import queue
from typing import Any
from unittest.mock import Mock
from unittest.mock import patch
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.chat.emitter import Emitter
from onyx.db.models import OAuthAccount
from onyx.db.models import OAuthConfig
from onyx.db.models import Persona
from onyx.db.models import Tool
from onyx.db.models import User
from onyx.db.oauth_config import create_oauth_config
from onyx.db.oauth_config import upsert_user_oauth_token
from onyx.llm.factory import get_default_llm
from onyx.tools.tool_constructor import construct_tools
from onyx.tools.tool_constructor import SearchToolConfig
from onyx.tools.tool_implementations.custom.custom_tool import CustomTool
from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider
from tests.external_dependency_unit.conftest import create_test_user


# Simple OpenAPI schema for testing
SIMPLE_OPENAPI_SCHEMA: dict[str, Any] = {
    "openapi": "3.0.0",
    "info": {"title": "Test API", "version": "1.0.0"},
    "servers": [{"url": "https://api.example.com"}],
    "paths": {
        "/test": {
            "get": {
                "operationId": "test_operation",
                "summary": "Test operation",
                "description": "A test operation",
                "responses": {"200": {"description": "Success"}},
            }
        }
    },
}


def _create_test_persona(db_session: Session, user: User, tools: list[Tool]) -> Persona:
    """Helper to create a test persona with the given tools"""
    # Create persona with prompts directly on it
    persona = Persona(
        name=f"Test Persona {uuid4().hex[:8]}",
        description="Test persona",
        system_prompt="You are a helpful assistant",
        task_prompt="Answer the user's question",
        tools=tools,
        document_sets=[],
        users=[user],
        groups=[],
        is_listed=True,
        is_public=True,
        display_priority=None,
        starter_messages=None,
        deleted=False,
    )
    db_session.add(persona)
    db_session.commit()
    db_session.refresh(persona)
    return persona


def _create_test_oauth_config(
    db_session: Session, name: str | None = None
) -> OAuthConfig:
    """Helper to create a test OAuth config"""
    return create_oauth_config(
        name=name or f"Test OAuth Config {uuid4().hex[:8]}",
        authorization_url="https://github.com/login/oauth/authorize",
        token_url="https://github.com/login/oauth/access_token",
        client_id="test_client_id",
        client_secret="test_client_secret",
        scopes=["repo", "user"],
        additional_params=None,
        db_session=db_session,
    )


def _get_authorization_header(headers: dict[str, str]) -> str | None:
    """
    Helper to extract authorization header from headers dict.
    Checks both 'authorization' and 'Authorization' keys.

    Returns:
        The authorization header value, or None if not present.
    """
    return headers.get("authorization") or headers.get("Authorization")


def _assert_has_authorization_header(headers: dict[str, str]) -> None:
    """Assert that headers contain an authorization header (any case)."""
    assert (
        "authorization" in headers or "Authorization" in headers
    ), "Expected authorization header to be present"


def _assert_no_authorization_header(headers: dict[str, str]) -> None:
    """Assert that headers do NOT contain an authorization header."""
    assert (
        "authorization" not in headers and "Authorization" not in headers
    ), "Expected no authorization header"


class TestOAuthToolIntegrationPriority:
    """Tests for OAuth token priority logic in tool_constructor"""

    @pytest.fixture(autouse=True)
    def setup_llm_provider(self, db_session: Session) -> None:
        """Ensure default LLM provider is set up for each test."""
        ensure_default_llm_provider(db_session)

    def test_oauth_config_priority_over_passthrough(self, db_session: Session) -> None:
        """
        Test that oauth_config_id takes priority over passthrough_auth.
        When both are set, the tool should use the OAuth config token.
        """
        # Create user with login OAuth token
        user = create_test_user(db_session, "oauth_user")
        oauth_account = OAuthAccount(
            user_id=user.id,
            oauth_name="github",
            account_id="github_user_123",
            account_email=user.email,
            access_token="user_login_token_12345",
            refresh_token="",
        )
        db_session.add(oauth_account)
        db_session.commit()
        # Refresh user to load oauth_accounts relationship
        db_session.refresh(user)

        # Create OAuth config with a valid token
        oauth_config = _create_test_oauth_config(db_session)
        token_data = {
            "access_token": "oauth_config_token_67890",
            "token_type": "Bearer",
        }
        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)

        # Create tool with BOTH oauth_config_id and passthrough_auth set
        tool = Tool(
            name="test_tool",
            description="Test tool",
            openapi_schema=SIMPLE_OPENAPI_SCHEMA,
            oauth_config_id=oauth_config.id,  # Priority 1
            passthrough_auth=True,  # Priority 2 - should be ignored
            user_id=user.id,
        )
        db_session.add(tool)
        db_session.commit()
        db_session.refresh(tool)

        # Create persona and chat session
        persona = _create_test_persona(db_session, user, [tool])
        llm = get_default_llm()

        # Construct tools
        search_tool_config = SearchToolConfig()

        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
            search_tool_config=search_tool_config,
        )

        # Verify tool was constructed
        assert tool.id in tool_dict
        custom_tools = tool_dict[tool.id]
        assert len(custom_tools) == 1
        custom_tool = custom_tools[0]
        assert isinstance(custom_tool, CustomTool)

        # Verify the OAuth config token is used (Priority 1), NOT passthrough token
        _assert_has_authorization_header(custom_tool.headers)
        auth_header = _get_authorization_header(custom_tool.headers)
        assert auth_header == "Bearer oauth_config_token_67890"

    def test_passthrough_auth_when_no_oauth_config(self, db_session: Session) -> None:
        """
        Test that passthrough_auth works when oauth_config_id is not set.
        """
        # Create user with login OAuth token
        user = create_test_user(db_session, "oauth_user")
        oauth_account = OAuthAccount(
            user_id=user.id,
            oauth_name="google",
            account_id="google_user_456",
            account_email=user.email,
            access_token="user_passthrough_token_99999",
            refresh_token="",
        )
        db_session.add(oauth_account)
        db_session.commit()
        # Refresh user to load oauth_accounts relationship
        db_session.refresh(user)

        # Create tool with only passthrough_auth set (no oauth_config_id)
        tool = Tool(
            name="test_tool_passthrough",
            description="Test tool with passthrough",
            openapi_schema=SIMPLE_OPENAPI_SCHEMA,
            oauth_config_id=None,  # No OAuth config
            passthrough_auth=True,  # Should use user's login token
            user_id=user.id,
        )
        db_session.add(tool)
        db_session.commit()
        db_session.refresh(tool)

        # Create persona
        persona = _create_test_persona(db_session, user, [tool])
        llm = get_default_llm()

        # Construct tools
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
        )

        # Verify tool was constructed
        assert tool.id in tool_dict
        custom_tools = tool_dict[tool.id]
        assert len(custom_tools) == 1
        custom_tool = custom_tools[0]
        assert isinstance(custom_tool, CustomTool)

        # Verify the passthrough token is used
        _assert_has_authorization_header(custom_tool.headers)
        auth_header = _get_authorization_header(custom_tool.headers)
        assert auth_header == "Bearer user_passthrough_token_99999"

    def test_oauth_config_without_valid_token_logs_warning(
        self, db_session: Session, caplog: pytest.LogCaptureFixture
    ) -> None:
        """
        Test that when oauth_config_id is set but no valid token exists,
        a warning is logged and the tool has no auth header.
        """
        # Create user (no OAuth account)
        user = create_test_user(db_session, "oauth_user")

        # Create OAuth config but DO NOT create a token for the user
        oauth_config = _create_test_oauth_config(db_session)

        # Create tool with oauth_config_id but user has no token
        tool = Tool(
            name="test_tool_no_token",
            description="Test tool without token",
            openapi_schema=SIMPLE_OPENAPI_SCHEMA,
            oauth_config_id=oauth_config.id,
            passthrough_auth=False,
            user_id=user.id,
        )
        db_session.add(tool)
        db_session.commit()
        db_session.refresh(tool)

        # Create persona
        persona = _create_test_persona(db_session, user, [tool])
        llm = get_default_llm()

        # Construct tools
        with caplog.at_level("WARNING"):
            tool_dict = construct_tools(
                persona=persona,
                db_session=db_session,
                emitter=Emitter(merged_queue=queue.Queue()),
                user=user,
                llm=llm,
            )

        # Verify warning was logged
        assert any(
            "No valid OAuth token found for tool" in record.message
            for record in caplog.records
        )
        assert any(str(oauth_config.id) in record.message for record in caplog.records)

        # Verify tool was constructed but has no authorization header
        assert tool.id in tool_dict
        custom_tools = tool_dict[tool.id]
        assert len(custom_tools) == 1
        custom_tool = custom_tools[0]
        assert isinstance(custom_tool, CustomTool)

        # Verify NO authorization header is present
        _assert_no_authorization_header(custom_tool.headers)

    def test_no_auth_when_both_disabled(self, db_session: Session) -> None:
        """
        Test that when neither oauth_config_id nor passthrough_auth is set,
        the tool has no authorization header.
        """
        # Create user with OAuth account (but tool won't use it)
        user = create_test_user(db_session, "oauth_user")
        oauth_account = OAuthAccount(
            user_id=user.id,
            oauth_name="github",
            account_id="github_user_789",
            account_email=user.email,
            access_token="unused_token",
            refresh_token="",
        )
        db_session.add(oauth_account)
        db_session.commit()

        # Create tool with neither oauth_config_id nor passthrough_auth
        tool = Tool(
            name="test_tool_no_auth",
            description="Test tool without auth",
            openapi_schema=SIMPLE_OPENAPI_SCHEMA,
            oauth_config_id=None,
            passthrough_auth=False,
            user_id=user.id,
        )
        db_session.add(tool)
        db_session.commit()
        db_session.refresh(tool)

        # Create persona
        persona = _create_test_persona(db_session, user, [tool])
        llm = get_default_llm()

        # Construct tools
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
        )

        # Verify tool was constructed
        assert tool.id in tool_dict
        custom_tools = tool_dict[tool.id]
        assert len(custom_tools) == 1
        custom_tool = custom_tools[0]
        assert isinstance(custom_tool, CustomTool)

        # Verify NO authorization header
        _assert_no_authorization_header(custom_tool.headers)

    def test_oauth_config_with_expired_token_refreshes(
        self, db_session: Session
    ) -> None:
        """
        Test that expired OAuth config tokens are automatically refreshed.
        """
        import time

        # Create user
        user = create_test_user(db_session, "oauth_user")

        # Create OAuth config with expired token
        oauth_config = _create_test_oauth_config(db_session)
        expired_token_data = {
            "access_token": "expired_token",
            "refresh_token": "refresh_token_12345",
            "expires_at": int(time.time()) - 100,  # Expired 100 seconds ago
        }
        upsert_user_oauth_token(
            oauth_config.id, user.id, expired_token_data, db_session
        )

        # Create tool with oauth_config_id
        tool = Tool(
            name="test_tool_refresh",
            description="Test tool with token refresh",
            openapi_schema=SIMPLE_OPENAPI_SCHEMA,
            oauth_config_id=oauth_config.id,
            passthrough_auth=False,
            user_id=user.id,
        )
        db_session.add(tool)
        db_session.commit()
        db_session.refresh(tool)

        # Create persona
        persona = _create_test_persona(db_session, user, [tool])
        llm = get_default_llm()

        # Mock the token refresh response
        mock_response = Mock()
        mock_response.json.return_value = {
            "access_token": "refreshed_token_67890",
            "refresh_token": "refresh_token_12345",
            "expires_in": 3600,
            "token_type": "Bearer",
        }
        mock_response.raise_for_status = Mock()

        with patch("onyx.auth.oauth_token_manager.requests.post") as mock_post:
            mock_post.return_value = mock_response

            # Construct tools
            tool_dict = construct_tools(
                persona=persona,
                db_session=db_session,
                emitter=Emitter(merged_queue=queue.Queue()),
                user=user,
                llm=llm,
            )

            # Verify token refresh was called
            mock_post.assert_called_once()
            call_args = mock_post.call_args
            assert call_args[0][0] == oauth_config.token_url
            assert call_args[1]["data"]["grant_type"] == "refresh_token"
            assert call_args[1]["data"]["refresh_token"] == "refresh_token_12345"

        # Verify tool was constructed with refreshed token
        assert tool.id in tool_dict
        custom_tools = tool_dict[tool.id]
        assert len(custom_tools) == 1
        custom_tool = custom_tools[0]
        assert isinstance(custom_tool, CustomTool)

        # Verify the refreshed token is used
        _assert_has_authorization_header(custom_tool.headers)
        auth_header = _get_authorization_header(custom_tool.headers)
        assert auth_header == "Bearer refreshed_token_67890"

    def test_custom_headers_combined_with_oauth_token(
        self, db_session: Session
    ) -> None:
        """
        Test that custom headers are properly combined with OAuth token.
        The OAuth Authorization header should be added to existing custom headers.
        """
        # Create user
        user = create_test_user(db_session, "oauth_user")

        # Create OAuth config with token
        oauth_config = _create_test_oauth_config(db_session)
        token_data = {
            "access_token": "oauth_token_abc123",
            "token_type": "Bearer",
        }
        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)

        # Create tool with oauth_config_id AND custom headers
        tool = Tool(
            name="test_tool_combined",
            description="Test tool with custom headers and OAuth",
            openapi_schema=SIMPLE_OPENAPI_SCHEMA,
            oauth_config_id=oauth_config.id,
            custom_headers=[
                {"key": "X-Custom-Header", "value": "custom-value"},
                {"key": "X-API-Key", "value": "api-key-123"},
            ],
            passthrough_auth=False,
            user_id=user.id,
        )
        db_session.add(tool)
        db_session.commit()
        db_session.refresh(tool)

        # Create persona
        persona = _create_test_persona(db_session, user, [tool])
        llm = get_default_llm()

        # Construct tools
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
        )

        # Verify tool was constructed
        assert tool.id in tool_dict
        custom_tools = tool_dict[tool.id]
        assert len(custom_tools) == 1
        custom_tool = custom_tools[0]
        assert isinstance(custom_tool, CustomTool)

        # Verify both OAuth token AND custom headers are present
        _assert_has_authorization_header(custom_tool.headers)
        auth_header = _get_authorization_header(custom_tool.headers)
        assert auth_header == "Bearer oauth_token_abc123"

        # Headers are capitalized by the tool
        assert "X-Custom-Header" in custom_tool.headers
        assert custom_tool.headers["X-Custom-Header"] == "custom-value"
        assert "X-API-Key" in custom_tool.headers
        assert custom_tool.headers["X-API-Key"] == "api-key-123"

    def test_passthrough_auth_without_user_oauth_account(
        self, db_session: Session
    ) -> None:
        """
        Test that passthrough_auth handles gracefully when user has no OAuth account.
        """
        # Create user WITHOUT OAuth account
        user = create_test_user(db_session, "no_oauth_user")

        # Create tool with passthrough_auth
        tool = Tool(
            name="test_tool_no_account",
            description="Test tool passthrough without account",
            openapi_schema=SIMPLE_OPENAPI_SCHEMA,
            oauth_config_id=None,
            passthrough_auth=True,
            user_id=user.id,
        )
        db_session.add(tool)
        db_session.commit()
        db_session.refresh(tool)

        # Create persona
        persona = _create_test_persona(db_session, user, [tool])
        llm = get_default_llm()

        # Construct tools
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
            emitter=Emitter(merged_queue=queue.Queue()),
            user=user,
            llm=llm,
        )

        # Verify tool was constructed
        assert tool.id in tool_dict
        custom_tools = tool_dict[tool.id]
        assert len(custom_tools) == 1
        custom_tool = custom_tools[0]
        assert isinstance(custom_tool, CustomTool)

        # Verify NO authorization header (user has no OAuth account)
        _assert_no_authorization_header(custom_tool.headers)


================================================
FILE: backend/tests/external_dependency_unit/tools/test_python_tool.py
================================================
# """
# External dependency unit tests for Python tool.

# These tests run against a real Code Interpreter service (no mocking of the service).
# They verify code execution, error handling, timeout behavior, and file generation.

# Requirements:
# - CODE_INTERPRETER_BASE_URL must be configured and point to a running service
# - Tests use minimal mocking - only mock run_context infrastructure and db lookups
# - File store operations execute for real (files are saved and read back)
# """

# import asyncio
# import io
# import json
# from unittest.mock import Mock
# from unittest.mock import patch

# import pytest
# from agents import RunContextWrapper
# from openpyxl import load_workbook
# from pydantic import TypeAdapter
# from sqlalchemy.orm import Session

# from onyx.chat.turn.models import ChatTurnContext
# from onyx.configs.app_configs import CODE_INTERPRETER_BASE_URL
# from onyx.file_store.models import ChatFileType
# from onyx.file_store.models import InMemoryChatFile
# from onyx.file_store.utils import get_default_file_store
# from onyx.server.query_and_chat.streaming_models import Packet
# from onyx.server.query_and_chat.streaming_models import PythonToolDelta
# from onyx.server.query_and_chat.streaming_models import PythonToolStart
# from onyx.tools.tool_implementations.python.python_tool import PythonTool
# from onyx.tools.tool_implementations_v2.code_interpreter_client import (
#     CodeInterpreterClient,
# )
# from onyx.tools.tool_implementations_v2.python import _python_execution_core
# from onyx.tools.tool_implementations_v2.python import python
# from onyx.tools.tool_implementations_v2.tool_result_models import (
#     LlmPythonExecutionResult,
# )


# # Apply initialize_file_store fixture to all tests in this module
# pytestmark = pytest.mark.usefixtures("initialize_file_store")


# @pytest.fixture
# def mock_run_context() -> RunContextWrapper[ChatTurnContext]:
#     """Create a mock run context for testing."""
#     # Create mock emitter
#     mock_emitter = Mock()
#     mock_emitter.emit = Mock()

#     # Create mock run dependencies
#     mock_dependencies = Mock()
#     mock_dependencies.emitter = mock_emitter
#     mock_dependencies.db_session = Mock()

#     # Create mock context
#     mock_context = Mock(spec=ChatTurnContext)
#     mock_context.current_run_step = 0
#     mock_context.run_dependencies = mock_dependencies
#     mock_context.iteration_instructions = []
#     mock_context.global_iteration_responses = []
#     mock_context.chat_files = []

#     # Create run context wrapper
#     run_context = Mock(spec=RunContextWrapper)
#     run_context.context = mock_context

#     return run_context


# @pytest.fixture
# def code_interpreter_client() -> CodeInterpreterClient:
#     """Create a real Code Interpreter client for testing."""
#     if not CODE_INTERPRETER_BASE_URL:
#         pytest.skip("CODE_INTERPRETER_BASE_URL not configured")
#     return CodeInterpreterClient()


# def test_python_execution_basic(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
# ) -> None:
#     """Test basic Python execution with simple code."""
#     code = 'print("Hello, World!")'

#     # Mock get_tool_by_name
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 1
#         mock_get_tool.return_value = mock_tool

#         # Execute code
#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)

#     # Verify result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert "Hello, World!" in result.stdout
#     assert result.stderr == ""
#     assert result.exit_code == 0
#     assert not result.timed_out
#     assert len(result.generated_files) == 0

#     # Verify context was updated
#     # Note: @tool_accounting increments current_run_step from 0 to 1 before execution
#     assert len(mock_run_context.context.iteration_instructions) == 1
#     instruction = mock_run_context.context.iteration_instructions[0]
#     assert instruction.iteration_nr == 1
#     assert instruction.plan and "Python" in instruction.plan

#     assert len(mock_run_context.context.global_iteration_responses) == 1
#     answer = mock_run_context.context.global_iteration_responses[0]
#     assert answer.tool == "PythonTool"
#     assert "Hello, World!" in answer.answer

#     # Verify streaming packets were emitted
#     mock_emitter = mock_run_context.context.run_dependencies.emitter
#     emitter_calls = mock_emitter.emit.call_args_list  # type: ignore
#     assert len(emitter_calls) >= 2  # At least start and delta

#     # Check for PythonToolStart packet
#     start_packets = [
#         call[0][0]
#         for call in emitter_calls
#         if isinstance(call[0][0].obj, PythonToolStart)
#     ]
#     assert len(start_packets) == 1

#     # Check for PythonToolDelta packet
#     delta_packets = [
#         call[0][0]
#         for call in emitter_calls
#         if isinstance(call[0][0].obj, PythonToolDelta)
#     ]
#     assert len(delta_packets) >= 1
#     assert "Hello, World!" in delta_packets[0].obj.stdout


# def test_python_execution_with_syntax_error(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
# ) -> None:
#     """Test Python execution with syntax error."""
#     code = "print('missing closing quote"

#     # Mock get_tool_by_name
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 1
#         mock_get_tool.return_value = mock_tool

#         # Execute code
#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)

#     # Verify error result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert result.stdout == ""
#     assert len(result.stderr) > 0
#     assert "SyntaxError" in result.stderr or "unterminated" in result.stderr.lower()
#     assert result.exit_code != 0
#     assert not result.timed_out
#     assert result.error is not None or len(result.stderr) > 0
#     assert len(result.generated_files) == 0


# def test_python_execution_with_runtime_error(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
# ) -> None:
#     """Test Python execution with runtime error."""
#     code = """
# x = 10
# y = 0
# result = x / y  # Division by zero
# print(result)
# """

#     # Mock get_tool_by_name
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 1
#         mock_get_tool.return_value = mock_tool

#         # Execute code
#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)

#     # Verify error result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert result.exit_code != 0
#     assert "ZeroDivisionError" in result.stderr or "division" in result.stderr.lower()
#     assert result.error is not None or len(result.stderr) > 0


# def test_python_execution_timeout(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
# ) -> None:
#     """Test execution timeout handling."""
#     # Code that will run longer than the timeout
#     code = """
# import time
# time.sleep(10)
# print("Should not reach here")
# """

#     # Create client with short timeout (override via execute method)
#     if not CODE_INTERPRETER_BASE_URL:
#         pytest.skip("CODE_INTERPRETER_BASE_URL not configured")

#     client = CodeInterpreterClient()

#     # Mock get_tool_by_name
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 1
#         mock_get_tool.return_value = mock_tool

#         # Mock the config to use a short timeout
#         with patch(
#             "onyx.tools.tool_implementations_v2.python.CODE_INTERPRETER_DEFAULT_TIMEOUT_MS",
#             1000,
#         ):
#             # Execute code
#             result = _python_execution_core(mock_run_context, code, client)

#     # Verify timeout result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert result.timed_out


# def test_python_execution_file_generation(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
#     db_session: Session,  # Needed to initialize DB engine for file_store
# ) -> None:
#     """Test file generation and retrieval."""
#     code = """
# import csv

# # Create a CSV file
# with open('test_output.csv', 'w', newline='') as f:
#     writer = csv.writer(f)
#     writer.writerow(['Name', 'Age', 'City'])
#     writer.writerow(['Alice', '30', 'New York'])
#     writer.writerow(['Bob', '25', 'San Francisco'])

# print("CSV file created successfully")
# """

#     # Mock only get_tool_by_name (database lookup)
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 1
#         mock_get_tool.return_value = mock_tool

#         # Execute code - file store operations happen for real
#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)

#     # Verify result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert result.exit_code == 0
#     assert "CSV file created successfully" in result.stdout
#     assert len(result.generated_files) == 1

#     # Verify file metadata
#     generated_file = result.generated_files[0]
#     assert generated_file.filename == "test_output.csv"
#     assert generated_file.file_link  # File link exists
#     assert generated_file.file_link.startswith("http://localhost:3000/api/chat/file/")

#     # Extract file_id from file_link
#     file_id = generated_file.file_link.split("/")[-1]

#     # Verify we can read the file back from the file store
#     file_store = get_default_file_store()
#     file_io = file_store.read_file(file_id)
#     file_content = file_io.read()

#     # Verify file content
#     assert b"Name,Age,City" in file_content
#     assert b"Alice,30,New York" in file_content
#     assert b"Bob,25,San Francisco" in file_content

#     # Verify iteration answer includes file_ids
#     assert len(mock_run_context.context.global_iteration_responses) == 1
#     answer = mock_run_context.context.global_iteration_responses[0]
#     assert answer.file_ids == [file_id]


# def test_python_execution_with_matplotlib(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
#     db_session: Session,  # Needed to initialize DB engine for file_store
# ) -> None:
#     """Test matplotlib plot generation."""
#     code = """
# import matplotlib
# matplotlib.use('Agg')  # Use non-interactive backend
# import matplotlib.pyplot as plt
# import numpy as np

# # Generate data
# x = np.linspace(0, 10, 100)
# y = np.sin(x)

# # Create plot
# plt.figure(figsize=(10, 6))
# plt.plot(x, y)
# plt.title('Sine Wave')
# plt.xlabel('x')
# plt.ylabel('sin(x)')
# plt.grid(True)

# # Save plot
# plt.savefig('sine_wave.png')
# print("Plot saved successfully")
# """

#     # Mock only get_tool_by_name (database lookup)
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 1
#         mock_get_tool.return_value = mock_tool

#         # Execute code - file store operations happen for real
#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)

#     # Verify result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert result.exit_code == 0
#     assert "Plot saved successfully" in result.stdout
#     assert len(result.generated_files) == 1

#     # Verify file metadata
#     generated_file = result.generated_files[0]
#     assert generated_file.filename == "sine_wave.png"
#     assert ".png" in generated_file.filename

#     # Extract file_id from file_link
#     file_id = generated_file.file_link.split("/")[-1]

#     # Verify we can read the file back from the file store
#     file_store = get_default_file_store()
#     file_io = file_store.read_file(file_id)
#     file_content = file_io.read()

#     # Verify the file is a valid PNG (check PNG magic bytes)
#     # PNG magic bytes: 89 50 4E 47 0D 0A 1A 0A
#     assert file_content[:8] == b"\x89PNG\r\n\x1a\n"
#     assert len(file_content) > 1000  # PNG should be substantial


# def test_python_execution_context_updates(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
# ) -> None:
#     """Test that run_context is properly updated."""
#     code = 'print("Context update test")'

#     # Mock get_tool_by_name
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 42
#         mock_get_tool.return_value = mock_tool

#         # Set specific run step - will be incremented to 6 by @tool_accounting
#         mock_run_context.context.current_run_step = 5

#         # Execute code
#         _python_execution_core(mock_run_context, code, code_interpreter_client)

#     # Verify iteration_instructions was updated
#     # Note: @tool_accounting increments from 5 to 6
#     assert len(mock_run_context.context.iteration_instructions) == 1
#     instruction = mock_run_context.context.iteration_instructions[0]
#     assert instruction.iteration_nr == 6
#     assert instruction.plan == "Executing Python code"
#     assert instruction.purpose == "Running Python code"
#     assert "secure environment" in instruction.reasoning

#     # Verify global_iteration_responses was updated
#     assert len(mock_run_context.context.global_iteration_responses) == 1
#     answer = mock_run_context.context.global_iteration_responses[0]
#     assert answer.tool == "PythonTool"
#     assert answer.tool_id == 42
#     assert answer.iteration_nr == 6
#     assert answer.parallelization_nr == 0
#     assert answer.question == "Execute Python code"
#     assert answer.reasoning and "secure environment" in answer.reasoning
#     assert "Context update test" in answer.answer
#     assert answer.cited_documents == {}

#     # Verify packets were emitted with correct index
#     mock_emitter = mock_run_context.context.run_dependencies.emitter
#     emitter_calls = mock_emitter.emit.call_args_list  # type: ignore
#     for call in emitter_calls:
#         packet = call[0][0]
#         assert isinstance(packet, Packet)
#         assert packet.ind == 6


# def test_python_tool_availability_with_url_set(db_session: Session) -> None:
#     """Test PythonTool.is_available() returns True when URL is configured."""
#     with patch(
#         "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
#         "http://localhost:8000",
#     ):
#         assert PythonTool.is_available(db_session) is True


# def test_python_tool_availability_without_url(db_session: Session) -> None:
#     """Test PythonTool.is_available() returns False when URL is not configured."""
#     with patch(
#         "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
#         None,
#     ):
#         assert PythonTool.is_available(db_session) is False

#     with patch(
#         "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
#         "",
#     ):
#         assert PythonTool.is_available(db_session) is False


# def test_python_function_tool_wrapper(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
# ) -> None:
#     """Test the @function_tool decorated python() wrapper function."""
#     code = 'print("Testing function tool wrapper")'

#     # Mock get_tool_by_name and patch CodeInterpreterClient to use our fixture
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         with patch(
#             "onyx.tools.tool_implementations_v2.python.CodeInterpreterClient"
#         ) as mock_client_class:
#             mock_tool = Mock()
#             mock_tool.id = 1
#             mock_get_tool.return_value = mock_tool
#             mock_client_class.return_value = code_interpreter_client

#             # Call the function tool wrapper
#             result_coro = python.on_invoke_tool(mock_run_context, json.dumps({"code": code}))  # type: ignore
#             result_json: str = asyncio.run(result_coro)  # type: ignore

#     # Verify result is JSON string
#     assert isinstance(result_json, str)

#     # Parse and verify result
#     adapter = TypeAdapter(LlmPythonExecutionResult)
#     result = adapter.validate_json(result_json)

#     assert isinstance(result, LlmPythonExecutionResult)
#     assert "Testing function tool wrapper" in result.stdout
#     assert result.exit_code == 0


# def test_python_execution_output_truncation(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
# ) -> None:
#     """Test that large outputs are properly truncated."""
#     # Generate code that produces output larger than truncation limit
#     code = """
# for i in range(10000):
#     print(f"Line {i}: " + "x" * 100)
# """

#     # Mock get_tool_by_name
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         # Set a small truncation limit for testing
#         with patch(
#             "onyx.tools.tool_implementations_v2.python.CODE_INTERPRETER_MAX_OUTPUT_LENGTH",
#             5000,
#         ):
#             mock_tool = Mock()
#             mock_tool.id = 1
#             mock_get_tool.return_value = mock_tool

#             # Execute code
#             result = _python_execution_core(
#                 mock_run_context, code, code_interpreter_client
#             )

#     # Verify output was truncated
#     assert len(result.stdout) <= 5000 + 200  # Allow for truncation message
#     assert "output truncated" in result.stdout
#     assert "characters omitted" in result.stdout


# def test_python_execution_multiple_files(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
#     db_session: Session,  # Needed to initialize DB engine for file_store
# ) -> None:
#     """Test generation of multiple files."""
#     code = """
# # Create multiple files
# with open('file1.txt', 'w') as f:
#     f.write('Content of file 1')

# with open('file2.txt', 'w') as f:
#     f.write('Content of file 2')

# with open('file3.txt', 'w') as f:
#     f.write('Content of file 3')

# print("Created 3 files")
# """

#     # Mock only get_tool_by_name (database lookup)
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 1
#         mock_get_tool.return_value = mock_tool

#         # Execute code - file store operations happen for real
#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)

#     # Verify result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert result.exit_code == 0
#     assert "Created 3 files" in result.stdout
#     assert len(result.generated_files) == 3

#     # Verify all files have unique IDs and proper metadata
#     file_ids_result = [f.file_link.split("/")[-1] for f in result.generated_files]
#     assert len(set(file_ids_result)) == 3  # All unique

#     # Verify filenames
#     filenames = [f.filename for f in result.generated_files]
#     assert "file1.txt" in filenames
#     assert "file2.txt" in filenames
#     assert "file3.txt" in filenames

#     # Verify we can read all files back from the file store
#     file_store = get_default_file_store()

#     # Create a mapping of filename to generated file for easier verification
#     files_by_name = {f.filename: f for f in result.generated_files}

#     # Verify each expected file
#     for i in range(1, 4):
#         filename = f"file{i}.txt"
#         assert filename in files_by_name, f"Expected file {filename} not found"

#         generated_file = files_by_name[filename]
#         file_id = generated_file.file_link.split("/")[-1]
#         file_io = file_store.read_file(file_id)
#         file_content = file_io.read()
#         expected_content = f"Content of file {i}".encode()
#         assert (
#             expected_content in file_content
#         ), f"Expected content not found in {filename}"


# def test_python_execution_client_error_handling(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
# ) -> None:
#     """Test error handling when Code Interpreter service fails."""
#     code = 'print("Test")'

#     # Create a client that will fail
#     if not CODE_INTERPRETER_BASE_URL:
#         pytest.skip("CODE_INTERPRETER_BASE_URL not configured")

#     client = CodeInterpreterClient()

#     # Mock the execute method to raise an exception
#     with patch.object(client, "execute", side_effect=Exception("Service unavailable")):
#         # Execute code
#         result = _python_execution_core(mock_run_context, code, client)

#     # Verify error result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert result.exit_code == -1
#     error_msg = result.error or ""
#     assert "Service unavailable" in result.stderr or "Service unavailable" in error_msg
#     assert not result.timed_out
#     assert len(result.generated_files) == 0

#     # Verify error delta was emitted
#     mock_emitter = mock_run_context.context.run_dependencies.emitter
#     emitter_calls = mock_emitter.emit.call_args_list  # type: ignore
#     delta_packets = [
#         call[0][0]
#         for call in emitter_calls
#         if isinstance(call[0][0].obj, PythonToolDelta)
#     ]
#     assert len(delta_packets) >= 1
#     assert "Service unavailable" in delta_packets[-1].obj.stderr


# def test_python_execution_with_excel_file(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
#     db_session: Session,  # Needed to initialize DB engine for file_store
# ) -> None:
#     """Test Excel file generation with financial data."""
#     code = """
# import pandas as pd

# # Create financial sample data
# data = {
#     'Segment': ['Government', 'Government', 'Midmarket', 'Midmarket', 'Enterprise'],
#     'Country': ['Canada', 'Germany', 'France', 'Germany', 'Canada'],
#     'Product': ['Carretera', 'Carretera', 'Carretera', 'Carretera', 'Amarilla'],
#     'Units Sold': [1618.5, 1321, 2178, 888, 2470],
#     'Manufacturing Price': [3, 3, 3, 3, 260],
#     'Sale Price': [20, 20, 20, 20, 300],
#     'Gross Sales': [32370, 26420, 43560, 17760, 741000],
#     'Discounts': [0, 0, 0, 0, 0],
#     'Sales': [32370, 26420, 43560, 17760, 741000],
#     'COGS': [16850, 13940, 22800, 9390, 642000],
#     'Profit': [15520, 12480, 20760, 8370, 99000],
#     'Month': ['January', 'January', 'June', 'April', 'September']
# }

# # Create DataFrame
# df = pd.DataFrame(data)

# # Write to Excel
# df.to_excel('financial_report.xlsx', index=False, sheet_name='Financial Data')

# print(f"Excel file created with {len(df)} rows")
# """

#     # Mock only get_tool_by_name (database lookup)
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 1
#         mock_get_tool.return_value = mock_tool

#         # Execute code - file store operations happen for real
#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)

#     # Verify result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert result.exit_code == 0
#     assert "Excel file created with 5 rows" in result.stdout
#     assert len(result.generated_files) == 1

#     # Verify file metadata
#     generated_file = result.generated_files[0]
#     assert generated_file.filename == "financial_report.xlsx"
#     assert ".xlsx" in generated_file.filename

#     # Extract file_id from file_link
#     file_id = generated_file.file_link.split("/")[-1]

#     # Verify we can read the file back from the file store
#     file_store = get_default_file_store()
#     file_io = file_store.read_file(file_id)
#     file_content = file_io.read()

#     # Verify the file is a valid Excel file (check ZIP magic bytes - xlsx is a ZIP archive)
#     # ZIP magic bytes: 50 4B 03 04
#     assert file_content[:4] == b"PK\x03\x04"
#     assert len(file_content) > 1000  # Excel file should be substantial

#     # Verify we can parse the Excel file with openpyxl directly
#     file_io = io.BytesIO(file_content)
#     workbook = load_workbook(file_io)
#     sheet = workbook["Financial Data"]

#     # Verify data structure - get headers from first row
#     first_row = list(sheet.iter_rows(min_row=1, max_row=1, values_only=True))[0]
#     headers = list(first_row) if first_row else []
#     expected_columns = [
#         "Segment",
#         "Country",
#         "Product",
#         "Units Sold",
#         "Manufacturing Price",
#         "Sale Price",
#         "Gross Sales",
#         "Discounts",
#         "Sales",
#         "COGS",
#         "Profit",
#         "Month",
#     ]
#     assert headers == expected_columns

#     # Verify row count (excluding header)
#     assert sheet.max_row == 6  # 1 header + 5 data rows

#     # Read data rows
#     rows = []
#     for row in sheet.iter_rows(min_row=2, values_only=True):
#         rows.append(row)

#     assert len(rows) == 5

#     # Verify some sample data
#     segments = [row[0] for row in rows]
#     countries = [row[1] for row in rows]
#     units_sold = [float(row[3]) if row[3] is not None else 0.0 for row in rows]  # type: ignore
#     profits = [float(row[10]) if row[10] is not None else 0.0 for row in rows]  # type: ignore

#     assert "Government" in segments
#     assert "Canada" in countries
#     assert sum(units_sold) > 8000  # Total units sold
#     assert sum(profits) > 155000  # Total profit


# def test_python_execution_with_excel_file_input(
#     mock_run_context: RunContextWrapper[ChatTurnContext],
#     code_interpreter_client: CodeInterpreterClient,
#     db_session: Session,  # Needed to initialize DB engine for file_store
# ) -> None:
#     """Test processing an uploaded Excel file - reading and analyzing it."""
#     # Load the sample Excel file
#     import os

#     test_file_path = os.path.join(
#         os.path.dirname(__file__), "data", "financial-sample.xlsx"
#     )

#     with open(test_file_path, "rb") as f:
#         file_content = f.read()

#     # Create InMemoryChatFile with the Excel file
#     chat_file = InMemoryChatFile(
#         file_id="test-financial-sample",
#         content=file_content,
#         file_type=ChatFileType.DOC,
#         filename="financial-sample.xlsx",
#     )

#     # Add the file to the mock context's chat_files
#     mock_run_context.context.chat_files = [chat_file]

#     # Code to analyze the uploaded Excel file
#     code = """
# import pandas as pd
# import matplotlib
# matplotlib.use('Agg')
# import matplotlib.pyplot as plt
# from openpyxl import load_workbook

# # Read the uploaded Excel file using openpyxl directly
# workbook = load_workbook('financial-sample.xlsx')
# sheet = workbook.active

# # Convert to pandas DataFrame
# data = []
# headers = [cell.value for cell in sheet[1]]
# for row in sheet.iter_rows(min_row=2, values_only=True):
#     data.append(row)

# df = pd.DataFrame(data, columns=headers)

# print(f"Loaded Excel file with {len(df)} rows and {len(df.columns)} columns")
# print(f"\\nColumns: {', '.join(df.columns.tolist())}")

# # Perform analysis
# print(f"\\n=== Analysis ===")

# # Group by segment and calculate total sales and profit
# segment_summary = df.groupby('Segment').agg({
#     ' Sales': 'sum',
#     'Profit': 'sum',
#     'Units Sold': 'sum'
# }).round(2)

# print(f"\\nSales by Segment:")
# print(segment_summary)

# # Find top 5 products by profit
# top_products = df.groupby('Product')['Profit'].sum().sort_values(ascending=False).head(5)
# print(f"\\nTop 5 Products by Profit:")
# print(top_products)

# # Calculate profit margin
# total_sales = df[' Sales'].sum()
# total_profit = df['Profit'].sum()
# profit_margin = (total_profit / total_sales * 100) if total_sales > 0 else 0
# print(f"\\nOverall Profit Margin: {profit_margin:.2f}%")

# # Create a visualization
# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# # Sales by Segment
# segment_summary[' Sales'].plot(kind='bar', ax=ax1, color='steelblue')
# ax1.set_title('Total Sales by Segment')
# ax1.set_xlabel('Segment')
# ax1.set_ylabel('Sales ($)')
# ax1.tick_params(axis='x', rotation=45)

# # Top 5 Products by Profit
# top_products.plot(kind='barh', ax=ax2, color='seagreen')
# ax2.set_title('Top 5 Products by Profit')
# ax2.set_xlabel('Profit ($)')
# ax2.set_ylabel('Product')

# plt.tight_layout()
# plt.savefig('financial_analysis.png', dpi=100, bbox_inches='tight')
# print(f"\\nVisualization saved as financial_analysis.png")

# # Create summary report Excel file
# summary_data = {
#     'Metric': ['Total Sales', 'Total Profit', 'Profit Margin %', 'Total Units Sold', 'Number of Records'],
#     'Value': [
#         f"${total_sales:,.2f}",
#         f"${total_profit:,.2f}",
#         f"{profit_margin:.2f}%",
#         f"{df['Units Sold'].sum():,.0f}",
#         len(df)
#     ]
# }
# summary_df = pd.DataFrame(summary_data)

# with pd.ExcelWriter('financial_summary.xlsx') as writer:
#     summary_df.to_excel(writer, sheet_name='Summary', index=False)
#     segment_summary.to_excel(writer, sheet_name='By Segment')

# print(f"Summary report saved as financial_summary.xlsx")
# """

#     # Mock only get_tool_by_name (database lookup)
#     with patch(
#         "onyx.tools.tool_implementations_v2.python.get_tool_by_name"
#     ) as mock_get_tool:
#         mock_tool = Mock()
#         mock_tool.id = 1
#         mock_get_tool.return_value = mock_tool

#         # Execute code - file store operations happen for real
#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)

#     # Verify result
#     assert isinstance(result, LlmPythonExecutionResult)
#     assert result.exit_code == 0
#     assert "Loaded Excel file" in result.stdout
#     assert "Analysis" in result.stdout
#     assert "Sales by Segment" in result.stdout
#     assert "Top 5 Products by Profit" in result.stdout
#     assert "Profit Margin" in result.stdout

#     # Should generate 2 files: PNG visualization and Excel summary
#     assert len(result.generated_files) == 2

#     # Verify generated files
#     filenames = [f.filename for f in result.generated_files]
#     assert "financial_analysis.png" in filenames
#     assert "financial_summary.xlsx" in filenames

#     # Verify we can read and validate the generated files
#     file_store = get_default_file_store()

#     # Check the PNG file
#     png_file = next(
#         f for f in result.generated_files if f.filename == "financial_analysis.png"
#     )
#     png_file_id = png_file.file_link.split("/")[-1]
#     png_io = file_store.read_file(png_file_id)
#     png_content = png_io.read()
#     assert png_content[:8] == b"\x89PNG\r\n\x1a\n"  # PNG magic bytes
#     assert len(png_content) > 5000  # Should be substantial

#     # Check the Excel summary file
#     xlsx_file = next(
#         f for f in result.generated_files if f.filename == "financial_summary.xlsx"
#     )
#     xlsx_file_id = xlsx_file.file_link.split("/")[-1]
#     xlsx_io = file_store.read_file(xlsx_file_id)
#     xlsx_content = xlsx_io.read()
#     assert xlsx_content[:4] == b"PK\x03\x04"  # ZIP/Excel magic bytes

#     # Parse and verify the summary Excel file using openpyxl directly
#     xlsx_io_obj = io.BytesIO(xlsx_content)
#     workbook = load_workbook(xlsx_io_obj)
#     sheet = workbook["Summary"]

#     # Read headers from first row
#     first_row = list(sheet.iter_rows(min_row=1, max_row=1, values_only=True))[0]
#     headers = list(first_row) if first_row else []
#     assert "Metric" in headers
#     assert "Value" in headers

#     # Read all rows and extract metrics
#     metrics = []
#     for row in sheet.iter_rows(min_row=2, values_only=True):
#         if row[0]:  # Metric column
#             metrics.append(row[0])

#     assert "Total Sales" in metrics
#     assert "Total Profit" in metrics
#     assert "Profit Margin %" in metrics


# if __name__ == "__main__":
#     # Run with: python -m pytest tests/external_dependency_unit/tools/test_python_tool.py -v
#     pytest.main([__file__, "-v"])


from __future__ import annotations

import io
import json
import threading
from collections.abc import Generator
from http.server import BaseHTTPRequestHandler
from http.server import HTTPServer
from typing import Any
from unittest.mock import patch

import pytest
from fastapi import UploadFile
from fastapi.background import BackgroundTasks
from sqlalchemy.orm import Session
from starlette.datastructures import Headers

import onyx.tools.tool_implementations.python.code_interpreter_client as ci_mod
from onyx.chat.process_message import handle_stream_message_objects
from onyx.db.models import Persona
from onyx.db.tools import get_builtin_tool
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import FileDescriptor
from onyx.server.features.projects.api import upload_user_files
from onyx.server.query_and_chat.chat_backend import get_chat_session
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import PythonToolDelta
from onyx.server.query_and_chat.streaming_models import PythonToolStart
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.server.query_and_chat.streaming_models import ToolCallArgumentDelta
from onyx.tools.tool_implementations.python.python_tool import PythonTool
from tests.external_dependency_unit.answer.stream_test_builder import StreamTestBuilder
from tests.external_dependency_unit.answer.stream_test_utils import create_chat_session
from tests.external_dependency_unit.answer.stream_test_utils import create_placement
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.mock_llm import LLMAnswerResponse
from tests.external_dependency_unit.mock_llm import LLMToolCallResponse
from tests.external_dependency_unit.mock_llm import use_mock_llm


# ---------------------------------------------------------------------------
# Mock Code Interpreter Server
# ---------------------------------------------------------------------------


class CapturedRequest:
    """A single HTTP request captured by the mock server."""

    def __init__(self, method: str, path: str, body: bytes) -> None:
        self.method = method
        self.path = path
        self.body = body

    def json_body(self) -> dict[str, Any]:
        return json.loads(self.body)


class _MockCIHandler(BaseHTTPRequestHandler):
    """HTTP handler that records every request and returns canned responses."""

    server: MockCodeInterpreterServer

    def do_POST(self) -> None:
        body = self._read_body()
        self._capture("POST", body)

        if self.path == "/v1/files":
            self.server._file_counter += 1
            self._respond_json(
                200, {"file_id": f"mock-ci-file-{self.server._file_counter}"}
            )
        elif self.path == "/v1/execute/stream":
            if self.server.streaming_enabled:
                self._respond_sse(
                    [
                        (
                            "output",
                            {"stream": "stdout", "data": "mock output\n"},
                        ),
                        (
                            "result",
                            {
                                "exit_code": 0,
                                "timed_out": False,
                                "duration_ms": 50,
                                "files": [],
                            },
                        ),
                    ]
                )
            else:
                self._respond_json(404, {"error": "not found"})
        elif self.path == "/v1/execute":
            self._respond_json(
                200,
                {
                    "stdout": "mock output\n",
                    "stderr": "",
                    "exit_code": 0,
                    "timed_out": False,
                    "duration_ms": 50,
                    "files": [],
                },
            )
        else:
            self._respond_json(404, {"error": "not found"})

    def do_GET(self) -> None:
        self._capture("GET", b"")
        if self.path == "/health":
            self._respond_json(200, {"status": "ok"})
        else:
            self._respond_json(404, {"error": "not found"})

    def do_DELETE(self) -> None:
        self._capture("DELETE", b"")
        self.send_response(200)
        self.end_headers()

    def _read_body(self) -> bytes:
        length = int(self.headers.get("Content-Length", 0))
        return self.rfile.read(length) if length else b""

    def _capture(self, method: str, body: bytes) -> None:
        self.server.captured_requests.append(
            CapturedRequest(method=method, path=self.path, body=body)
        )

    def _respond_json(self, status: int, data: dict[str, Any]) -> None:
        payload = json.dumps(data).encode()
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(payload)))
        self.end_headers()
        self.wfile.write(payload)

    def _respond_sse(self, events: list[tuple[str, dict[str, Any]]]) -> None:
        frames = []
        for event_type, data in events:
            frames.append(f"event: {event_type}\ndata: {json.dumps(data)}\n\n")
        payload = "".join(frames).encode()
        self.send_response(200)
        self.send_header("Content-Type", "text/event-stream")
        self.send_header("Content-Length", str(len(payload)))
        self.end_headers()
        self.wfile.write(payload)

    def log_message(self, format: str, *args: Any) -> None:  # noqa: A002
        pass


class MockCodeInterpreterServer(HTTPServer):
    """HTTPServer wrapper that records requests for assertions."""

    def __init__(self) -> None:
        super().__init__(("localhost", 0), _MockCIHandler)
        self.captured_requests: list[CapturedRequest] = []
        self._file_counter = 0
        self.streaming_enabled: bool = True

    @property
    def url(self) -> str:
        host, port = self.server_address
        return f"http://{host!s}:{port}"

    def start(self) -> None:
        threading.Thread(target=self.serve_forever, daemon=True).start()

    def get_requests(
        self,
        method: str | None = None,
        path: str | None = None,
    ) -> list[CapturedRequest]:
        results = self.captured_requests
        if method:
            results = [r for r in results if r.method == method]
        if path:
            results = [r for r in results if r.path == path]
        return results


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture(scope="module")
def mock_ci_server() -> Generator[MockCodeInterpreterServer, None, None]:
    server = MockCodeInterpreterServer()
    server.start()
    yield server
    server.shutdown()


@pytest.fixture(autouse=True)
def _clear_health_cache() -> None:
    """Reset the health check cache before every test."""
    import onyx.tools.tool_implementations.python.code_interpreter_client as mod

    mod._health_cache = {}


@pytest.fixture()
def _attach_python_tool_to_default_persona(db_session: Session) -> None:
    """Ensure the default persona (id=0) has the PythonTool attached."""
    python_tool_db = get_builtin_tool(db_session, PythonTool)
    persona = db_session.get(Persona, 0)
    assert persona is not None, "Default persona (id=0) not found"

    if python_tool_db not in persona.tools:
        persona.tools.append(python_tool_db)
        db_session.commit()


# ---------------------------------------------------------------------------
# Test
# ---------------------------------------------------------------------------


def test_code_interpreter_receives_chat_files(
    db_session: Session,
    mock_ci_server: MockCodeInterpreterServer,
    _attach_python_tool_to_default_persona: None,
    initialize_file_store: None,  # noqa: ARG001
) -> None:
    mock_ci_server.captured_requests.clear()
    mock_ci_server._file_counter = 0
    mock_url = mock_ci_server.url

    user = create_test_user(db_session, "ci_test_admin")
    chat_session = create_chat_session(db_session=db_session, user=user)

    # Upload a test CSV
    csv_content = b"name,age,city\nAlice,30,NYC\nBob,25,SF\n"
    result = upload_user_files(
        bg_tasks=BackgroundTasks(),
        files=[
            UploadFile(
                file=io.BytesIO(csv_content),
                filename="data.csv",
                size=len(csv_content),
                headers=Headers({"content-type": "text/csv"}),
            )
        ],
        project_id=None,
        temp_id_map=json.dumps({"0|data.csv": "data.csv"}),
        user=user,
        db_session=db_session,
    )
    assert len(result.user_files) == 1
    user_file = result.user_files[0]

    file_descriptor: FileDescriptor = {
        "id": user_file.file_id,
        "type": ChatFileType.TABULAR,
        "name": "data.csv",
        "user_file_id": str(user_file.id),
    }

    code = "import pandas as pd\ndf = pd.read_csv('data.csv')\nprint(df)"
    msg_req = SendMessageRequest(
        message="Read the CSV and print it.",
        chat_session_id=chat_session.id,
        file_descriptors=[file_descriptor],
        stream=True,
    )

    original_defaults = ci_mod.CodeInterpreterClient.__init__.__defaults__
    with (
        use_mock_llm() as mock_llm,
        patch(
            "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
            mock_url,
        ),
        patch(
            "onyx.tools.tool_implementations.python.code_interpreter_client.CODE_INTERPRETER_BASE_URL",
            mock_url,
        ),
    ):
        mock_llm.add_response(
            LLMToolCallResponse(
                tool_name="python",
                tool_call_id="call_test_1",
                tool_call_argument_tokens=[json.dumps({"code": code})],
            )
        )
        mock_llm.forward_till_end()

        ci_mod.CodeInterpreterClient.__init__.__defaults__ = (mock_url,)
        try:
            list(
                handle_stream_message_objects(
                    new_msg_req=msg_req, user=user, db_session=db_session
                )
            )
        finally:
            ci_mod.CodeInterpreterClient.__init__.__defaults__ = original_defaults

    # Verify: file uploaded and code executed via streaming.
    assert len(mock_ci_server.get_requests(method="POST", path="/v1/files")) == 1
    assert (
        len(mock_ci_server.get_requests(method="POST", path="/v1/execute/stream")) == 1
    )

    # Staged input files are intentionally NOT deleted — PythonTool caches their
    # file IDs across agent-loop iterations to avoid re-uploading on every call.
    # The code interpreter cleans them up via its own TTL.
    assert len(mock_ci_server.get_requests(method="DELETE")) == 0

    execute_body = mock_ci_server.get_requests(
        method="POST", path="/v1/execute/stream"
    )[0].json_body()
    assert execute_body["code"] == code
    assert len(execute_body["files"]) == 1
    assert execute_body["files"][0]["path"] == "data.csv"


def test_code_interpreter_replay_packets_include_code_and_output(
    db_session: Session,
    mock_ci_server: MockCodeInterpreterServer,
    _attach_python_tool_to_default_persona: None,
    initialize_file_store: None,  # noqa: ARG001
) -> None:
    """After a code interpreter message completes, retrieving the message
    via translate_assistant_message_to_packets should emit PythonToolStart
    (containing the executed code) and PythonToolDelta (containing
    stdout/stderr), not generic CustomTool packets."""
    mock_ci_server.captured_requests.clear()
    mock_ci_server._file_counter = 0
    mock_url = mock_ci_server.url

    user = create_test_user(db_session, "ci_replay_test")
    chat_session = create_chat_session(db_session=db_session, user=user)

    code = 'x = 2 + 2\nprint(f"Result: {x}")'
    msg_req = SendMessageRequest(
        message="Calculate 2 + 2",
        chat_session_id=chat_session.id,
        stream=True,
    )

    original_defaults = ci_mod.CodeInterpreterClient.__init__.__defaults__
    with (
        use_mock_llm() as mock_llm,
        patch(
            "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
            mock_url,
        ),
        patch(
            "onyx.tools.tool_implementations.python.code_interpreter_client.CODE_INTERPRETER_BASE_URL",
            mock_url,
        ),
    ):
        answer_tokens = ["The ", "result ", "is ", "4."]

        ci_mod.CodeInterpreterClient.__init__.__defaults__ = (mock_url,)
        try:
            handler = StreamTestBuilder(llm_controller=mock_llm)

            stream = handle_stream_message_objects(
                new_msg_req=msg_req, user=user, db_session=db_session
            )
            # First packet is always MessageResponseIDInfo
            next(stream)

            # Phase 1: LLM requests python tool execution.
            handler.add_response(
                LLMToolCallResponse(
                    tool_name="python",
                    tool_call_id="call_replay_test",
                    tool_call_argument_tokens=[json.dumps({"code": code})],
                )
            ).expect(
                Packet(
                    placement=create_placement(0),
                    obj=ToolCallArgumentDelta(
                        tool_type="python",
                        argument_deltas={"code": code},
                    ),
                ),
                forward=2,
            ).expect(
                Packet(
                    placement=create_placement(0),
                    obj=PythonToolStart(code=code),
                ),
                forward=False,
            ).expect(
                Packet(
                    placement=create_placement(0),
                    obj=PythonToolDelta(stdout="mock output\n", stderr="", file_ids=[]),
                ),
                forward=False,
            ).expect(
                Packet(
                    placement=create_placement(0),
                    obj=SectionEnd(),
                ),
                forward=False,
            ).run_and_validate(
                stream=stream
            )

            # Phase 2: LLM produces a final answer after tool execution.
            handler.add_response(
                LLMAnswerResponse(answer_tokens=answer_tokens)
            ).expect_agent_response(
                answer_tokens=answer_tokens,
                turn_index=1,
            ).run_and_validate(
                stream=stream
            )

            with pytest.raises(StopIteration):
                next(stream)

        finally:
            ci_mod.CodeInterpreterClient.__init__.__defaults__ = original_defaults

    # Retrieve the chat session through the same endpoint the frontend uses
    chat_detail = get_chat_session(
        session_id=chat_session.id,
        user=user,
        db_session=db_session,
    )

    assert (
        len(mock_ci_server.get_requests(method="POST", path="/v1/execute/stream")) == 1
    )

    # The response contains `packets` — a list of packet-lists, one per
    # assistant message. We should have exactly one assistant message.
    assert (
        len(chat_detail.packets) == 1
    ), f"Expected 1 assistant packet list, got {len(chat_detail.packets)}"
    packets = chat_detail.packets[0]

    # Extract PythonToolStart packets – these must contain the code
    start_packets = [p for p in packets if isinstance(p.obj, PythonToolStart)]
    assert (
        len(start_packets) == 1
    ), f"Expected 1 PythonToolStart packet, got {len(start_packets)}. Packet types: {[type(p.obj).__name__ for p in packets]}"
    start_obj = start_packets[0].obj
    assert isinstance(start_obj, PythonToolStart)
    assert start_obj.code == code

    # Extract PythonToolDelta packets – these must contain stdout/stderr
    delta_packets = [p for p in packets if isinstance(p.obj, PythonToolDelta)]
    assert len(delta_packets) >= 1, (
        f"Expected at least 1 PythonToolDelta packet, got {len(delta_packets)}. "
        f"Packet types: {[type(p.obj).__name__ for p in packets]}"
    )
    # The mock CI server returns "mock output\n" as stdout
    delta_obj = delta_packets[0].obj
    assert isinstance(delta_obj, PythonToolDelta)
    assert "mock output" in delta_obj.stdout


def test_code_interpreter_streaming_fallback_to_batch(
    db_session: Session,
    mock_ci_server: MockCodeInterpreterServer,
    _attach_python_tool_to_default_persona: None,
    initialize_file_store: None,  # noqa: ARG001
) -> None:
    """When the streaming endpoint is not available (older code-interpreter),
    execute_streaming should fall back to the batch /v1/execute endpoint."""
    mock_ci_server.captured_requests.clear()
    mock_ci_server._file_counter = 0
    mock_ci_server.streaming_enabled = False
    mock_url = mock_ci_server.url

    user = create_test_user(db_session, "ci_fallback_test")
    chat_session = create_chat_session(db_session=db_session, user=user)

    code = 'print("fallback test")'
    msg_req = SendMessageRequest(
        message="Print fallback test",
        chat_session_id=chat_session.id,
        stream=True,
    )

    original_defaults = ci_mod.CodeInterpreterClient.__init__.__defaults__
    with (
        use_mock_llm() as mock_llm,
        patch(
            "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
            mock_url,
        ),
        patch(
            "onyx.tools.tool_implementations.python.code_interpreter_client.CODE_INTERPRETER_BASE_URL",
            mock_url,
        ),
    ):
        mock_llm.add_response(
            LLMToolCallResponse(
                tool_name="python",
                tool_call_id="call_fallback",
                tool_call_argument_tokens=[json.dumps({"code": code})],
            )
        )
        mock_llm.forward_till_end()

        ci_mod.CodeInterpreterClient.__init__.__defaults__ = (mock_url,)
        try:
            packets = list(
                handle_stream_message_objects(
                    new_msg_req=msg_req, user=user, db_session=db_session
                )
            )
        finally:
            ci_mod.CodeInterpreterClient.__init__.__defaults__ = original_defaults
            mock_ci_server.streaming_enabled = True

    # Streaming was attempted first (returned 404), then fell back to batch
    assert (
        len(mock_ci_server.get_requests(method="POST", path="/v1/execute/stream")) == 1
    )
    assert len(mock_ci_server.get_requests(method="POST", path="/v1/execute")) == 1

    # Verify output still made it through
    delta_packets = [
        p
        for p in packets
        if isinstance(p, Packet) and isinstance(p.obj, PythonToolDelta)
    ]
    assert len(delta_packets) >= 1
    first_delta = delta_packets[0].obj
    assert isinstance(first_delta, PythonToolDelta)
    assert "mock output" in first_delta.stdout


================================================
FILE: backend/tests/external_dependency_unit/tools/test_python_tool_server_enabled.py
================================================
"""Tests that PythonTool.is_available() respects the server_enabled DB flag.

Uses a real DB session with CODE_INTERPRETER_BASE_URL mocked so the
environment-variable check passes and the DB flag is the deciding factor.
"""

from unittest.mock import patch

from sqlalchemy.orm import Session

from onyx.db.code_interpreter import fetch_code_interpreter_server
from onyx.db.code_interpreter import update_code_interpreter_server_enabled
from onyx.tools.tool_implementations.python.python_tool import PythonTool


def test_python_tool_unavailable_when_server_disabled(
    db_session: Session,
) -> None:
    """With a valid base URL, the tool should be unavailable when
    server_enabled is False in the DB."""
    server = fetch_code_interpreter_server(db_session)
    initial_enabled = server.server_enabled

    try:
        update_code_interpreter_server_enabled(db_session, enabled=False)

        with patch(
            "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
            "http://fake:8888",
        ):
            assert PythonTool.is_available(db_session) is False
    finally:
        update_code_interpreter_server_enabled(db_session, enabled=initial_enabled)


def test_python_tool_available_when_server_enabled(
    db_session: Session,
) -> None:
    """With a valid base URL, the tool should be available when
    server_enabled is True in the DB."""
    server = fetch_code_interpreter_server(db_session)
    initial_enabled = server.server_enabled

    try:
        update_code_interpreter_server_enabled(db_session, enabled=True)

        with patch(
            "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
            "http://fake:8888",
        ):
            assert PythonTool.is_available(db_session) is True
    finally:
        update_code_interpreter_server_enabled(db_session, enabled=initial_enabled)


================================================
FILE: backend/tests/external_dependency_unit/tracing/__init__.py
================================================


================================================
FILE: backend/tests/external_dependency_unit/tracing/test_llm_span_recording.py
================================================
"""Tests for LLM span recording utilities."""

from typing import Any
from unittest.mock import MagicMock

import pytest

from onyx.llm.model_response import ChatCompletionMessageToolCall
from onyx.llm.model_response import Choice
from onyx.llm.model_response import FunctionCall as ModelResponseFunctionCall
from onyx.llm.model_response import Message
from onyx.llm.model_response import ModelResponse
from onyx.llm.model_response import Usage
from onyx.llm.models import FunctionCall
from onyx.llm.models import ToolCall
from onyx.tracing.framework.span_data import GenerationSpanData
from onyx.tracing.llm_utils import record_llm_response
from onyx.tracing.llm_utils import record_llm_span_output


@pytest.fixture
def mock_span() -> MagicMock:
    """Create a mock span with GenerationSpanData."""
    span = MagicMock()
    span.span_data = GenerationSpanData()
    return span


class TestRecordLlmResponse:
    """Tests for record_llm_response function."""

    def test_records_content_from_response(self, mock_span: MagicMock) -> None:
        """Test that content is correctly extracted and recorded."""
        response = ModelResponse(
            id="test-id",
            created="2024-01-01",
            choice=Choice(
                message=Message(content="Hello, world!", role="assistant"),
            ),
        )

        record_llm_response(mock_span, response)

        assert mock_span.span_data.output == [
            {"role": "assistant", "content": "Hello, world!"}
        ]

    def test_records_reasoning_from_response(self, mock_span: MagicMock) -> None:
        """Test that reasoning/extended thinking is recorded."""
        response = ModelResponse(
            id="test-id",
            created="2024-01-01",
            choice=Choice(
                message=Message(
                    content="The answer is 42.",
                    role="assistant",
                    reasoning_content="Let me think step by step...",
                ),
            ),
        )

        record_llm_response(mock_span, response)

        assert mock_span.span_data.output == [
            {"role": "assistant", "content": "The answer is 42."}
        ]
        assert mock_span.span_data.reasoning == "Let me think step by step..."

    def test_records_tool_calls_from_response(self, mock_span: MagicMock) -> None:
        """Test that tool calls are correctly extracted and recorded."""
        tool_call = ChatCompletionMessageToolCall(
            id="call-123",
            type="function",
            function=ModelResponseFunctionCall(
                name="search_documents",
                arguments='{"query": "test query"}',
            ),
        )
        response = ModelResponse(
            id="test-id",
            created="2024-01-01",
            choice=Choice(
                message=Message(
                    content=None,
                    role="assistant",
                    tool_calls=[tool_call],
                ),
            ),
        )

        record_llm_response(mock_span, response)

        output = mock_span.span_data.output
        assert len(output) == 1
        assert output[0]["role"] == "assistant"
        assert "tool_calls" in output[0]
        assert len(output[0]["tool_calls"]) == 1
        assert output[0]["tool_calls"][0]["id"] == "call-123"
        assert output[0]["tool_calls"][0]["function"]["name"] == "search_documents"

    def test_records_usage_from_response(self, mock_span: MagicMock) -> None:
        """Test that usage metrics are correctly recorded."""
        response = ModelResponse(
            id="test-id",
            created="2024-01-01",
            choice=Choice(
                message=Message(content="Test", role="assistant"),
            ),
            usage=Usage(
                prompt_tokens=100,
                completion_tokens=50,
                total_tokens=150,
                cache_creation_input_tokens=10,
                cache_read_input_tokens=20,
            ),
        )

        record_llm_response(mock_span, response)

        assert mock_span.span_data.usage is not None
        assert mock_span.span_data.usage["input_tokens"] == 100
        assert mock_span.span_data.usage["output_tokens"] == 50
        assert mock_span.span_data.usage["total_tokens"] == 150
        assert mock_span.span_data.usage["cache_read_input_tokens"] == 20
        assert mock_span.span_data.usage["cache_creation_input_tokens"] == 10

    def test_handles_none_content(self, mock_span: MagicMock) -> None:
        """Test that None content is handled (e.g., tool-only response)."""
        response = ModelResponse(
            id="test-id",
            created="2024-01-01",
            choice=Choice(
                message=Message(content=None, role="assistant"),
            ),
        )

        record_llm_response(mock_span, response)

        # Content should not be in output dict when None
        assert mock_span.span_data.output == [{"role": "assistant"}]

    def test_handles_no_usage(self, mock_span: MagicMock) -> None:
        """Test that missing usage is handled gracefully."""
        response = ModelResponse(
            id="test-id",
            created="2024-01-01",
            choice=Choice(
                message=Message(content="Test", role="assistant"),
            ),
            usage=None,
        )

        record_llm_response(mock_span, response)

        # Usage should remain None/unset
        assert mock_span.span_data.usage is None

    def test_records_all_fields_together(self, mock_span: MagicMock) -> None:
        """Test recording a response with all fields present."""
        tool_call = ChatCompletionMessageToolCall(
            id="call-456",
            type="function",
            function=ModelResponseFunctionCall(
                name="analyze",
                arguments='{"text": "sample"}',
            ),
        )
        response = ModelResponse(
            id="test-id",
            created="2024-01-01",
            choice=Choice(
                message=Message(
                    content="Here's my analysis:",
                    role="assistant",
                    reasoning_content="I need to think about this carefully...",
                    tool_calls=[tool_call],
                ),
            ),
            usage=Usage(
                prompt_tokens=200,
                completion_tokens=100,
                total_tokens=300,
                cache_creation_input_tokens=0,
                cache_read_input_tokens=50,
            ),
        )

        record_llm_response(mock_span, response)

        # Check output
        output = mock_span.span_data.output
        assert len(output) == 1
        assert output[0]["role"] == "assistant"
        assert output[0]["content"] == "Here's my analysis:"
        assert len(output[0]["tool_calls"]) == 1

        # Check reasoning
        assert (
            mock_span.span_data.reasoning == "I need to think about this carefully..."
        )

        # Check usage
        assert mock_span.span_data.usage["input_tokens"] == 200
        assert mock_span.span_data.usage["output_tokens"] == 100


class TestRecordLlmSpanOutput:
    """Tests for record_llm_span_output function (streaming scenarios)."""

    def test_records_string_output(self, mock_span: MagicMock) -> None:
        """Test recording a simple string output."""
        record_llm_span_output(mock_span, "Hello, world!")

        assert mock_span.span_data.output == [
            {"role": "assistant", "content": "Hello, world!"}
        ]

    def test_records_none_output(self, mock_span: MagicMock) -> None:
        """Test recording None output."""
        record_llm_span_output(mock_span, None)

        assert mock_span.span_data.output == [{"role": "assistant", "content": None}]

    def test_records_sequence_output(self, mock_span: MagicMock) -> None:
        """Test recording a sequence of message dicts."""
        messages: list[dict[str, Any]] = [
            {"role": "assistant", "content": "Part 1"},
            {"role": "assistant", "content": "Part 2"},
        ]

        record_llm_span_output(mock_span, messages)

        assert mock_span.span_data.output == messages

    def test_records_usage(self, mock_span: MagicMock) -> None:
        """Test recording usage information."""
        usage = MagicMock()
        usage.prompt_tokens = 50
        usage.completion_tokens = 25
        usage.total_tokens = 75
        usage.cache_read_input_tokens = 10
        usage.cache_creation_input_tokens = 5

        record_llm_span_output(mock_span, "Test output", usage=usage)

        assert mock_span.span_data.usage is not None
        assert mock_span.span_data.usage["input_tokens"] == 50
        assert mock_span.span_data.usage["output_tokens"] == 25

    def test_records_reasoning(self, mock_span: MagicMock) -> None:
        """Test recording reasoning content."""
        record_llm_span_output(
            mock_span, "Final answer", reasoning="Step by step thinking..."
        )

        assert mock_span.span_data.reasoning == "Step by step thinking..."

    def test_records_tool_calls(self, mock_span: MagicMock) -> None:
        """Test recording tool calls in streaming scenario."""
        tool_calls = [
            ToolCall(
                id="call-789",
                type="function",
                function=FunctionCall(
                    name="get_weather",
                    arguments='{"location": "NYC"}',
                ),
            )
        ]

        record_llm_span_output(mock_span, "Checking weather...", tool_calls=tool_calls)

        output = mock_span.span_data.output
        assert len(output) == 1
        assert output[0]["content"] == "Checking weather..."
        assert "tool_calls" in output[0]
        assert len(output[0]["tool_calls"]) == 1
        assert output[0]["tool_calls"][0]["id"] == "call-789"

    def test_records_tool_calls_with_none_output(self, mock_span: MagicMock) -> None:
        """Test recording tool calls when output is None."""
        tool_calls = [
            ToolCall(
                id="call-abc",
                type="function",
                function=FunctionCall(
                    name="search",
                    arguments='{"q": "test"}',
                ),
            )
        ]

        record_llm_span_output(mock_span, None, tool_calls=tool_calls)

        output = mock_span.span_data.output
        assert len(output) == 1
        assert output[0]["content"] is None
        assert len(output[0]["tool_calls"]) == 1

    def test_records_all_streaming_fields(self, mock_span: MagicMock) -> None:
        """Test recording all fields in streaming scenario."""
        usage = MagicMock()
        usage.prompt_tokens = 100
        usage.completion_tokens = 50
        usage.total_tokens = 150
        usage.cache_read_input_tokens = 0
        usage.cache_creation_input_tokens = 0

        tool_calls = [
            ToolCall(
                id="call-xyz",
                type="function",
                function=FunctionCall(
                    name="calculator",
                    arguments='{"expr": "2+2"}',
                ),
            )
        ]

        record_llm_span_output(
            mock_span,
            output="Computing...",
            usage=usage,
            reasoning="Let me calculate this.",
            tool_calls=tool_calls,
        )

        # Check all fields
        output = mock_span.span_data.output
        assert output[0]["content"] == "Computing..."
        assert len(output[0]["tool_calls"]) == 1
        assert mock_span.span_data.reasoning == "Let me calculate this."
        assert mock_span.span_data.usage["input_tokens"] == 100


================================================
FILE: backend/tests/integration/Dockerfile
================================================
# syntax=docker/dockerfile:1.6
# This image is only for running integration tests. It layers test-specific
# files and dependencies on top of the backend image.
FROM base AS integration-base

WORKDIR /app

# Integration test stuff
COPY ./requirements/dev.txt /tmp/dev-requirements.txt
RUN uv pip install --system --no-cache-dir --upgrade -r /tmp/dev-requirements.txt && \
    rm -rf ~/.cache/uv /tmp/*.txt

COPY ./pytest.ini /app/pytest.ini
COPY ./tests/integration /app/tests/integration
# copies all files, but not folders, in the tests directory
COPY ./tests/* /app/tests/

FROM base AS openapi-schema
COPY ./scripts/onyx_openapi_schema.py /app/scripts/onyx_openapi_schema.py
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
RUN LICENSE_ENFORCEMENT_ENABLED=false python scripts/onyx_openapi_schema.py --filename openapi.json

FROM openapitools/openapi-generator-cli:latest AS openapi-client
WORKDIR /local
COPY --from=openapi-schema /app/openapi.json /local/openapi.json
RUN openapi-generator-cli generate \
    -i /local/openapi.json \
    -g python \
    -o /local/onyx_openapi_client \
    --package-name onyx_openapi_client \
    --skip-validate-spec \
    --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"

FROM integration-base AS integration
COPY --from=openapi-schema /app/openapi.json /app/generated/openapi.json
COPY --from=openapi-client /local/onyx_openapi_client /app/generated/onyx_openapi_client


ENV PYTHONPATH=/app

ENTRYPOINT ["pytest", "-s", "-rs"]
CMD ["/app/tests/integration", "--ignore=/app/tests/integration/multitenant_tests"]


================================================
FILE: backend/tests/integration/README.md
================================================
# Integration Tests

## General Testing Overview

The integration tests are designed with a "manager" class and a "test" class for each type of object being manipulated (e.g., user, persona, credential):

- **Manager Class**: Contains methods for each type of API call. Responsible for creating, deleting, and verifying the existence of an entity.
- **Test Class**: Stores data for each entity being tested. This is our "expected state" of the object.

The idea is that each test can use the manager class to create (.create()) a "test*" object. It can then perform an operation on the object (e.g., send a request to the API) and then check if the "test*" object is in the expected state by using the manager class (.verify()) function.

## Instructions for Running Integration Tests Locally
0. Generate dependencies
First install openap-generator
```sh
brew install openapi-generator
```

Then, using the VSCode/Cursor debugger, run the `Onyx OpenAPI Schema Generator` task (see `CONTRIBUTING_VSCODE.md` for `launch.json` setup instructions).
The task automatically generates the Python client needed for integration tests.

If the client generation fails, try running this command manually:
```sh
openapi-generator generate -i backend/generated/openapi.json -g python -o backend/generated/onyx_openapi_client --package-name onyx_openapi_client --skip-validate-spec --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
```

1. Launch onyx (using Docker or running with a debugger), ensuring the API server is running on port 8080.
   - If you'd like to set environment variables, you can do so by creating a `.env` file in the onyx/backend/tests/integration/ directory.
   - Onyx MUST be launched with AUTH_TYPE=basic and ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
   - Tests that use `mock_llm_response` (e.g. llm workflow tool call tests) also require `INTEGRATION_TESTS_MODE=true` on the API server process.
2. Navigate to `onyx/backend`.
3. Run the following command in the terminal:
   ```sh
   python -m dotenv -f .env run -- pytest -s tests/integration/tests/
   ```
   or to run all tests in a file:
   ```sh
   python -m dotenv -f .env run -- pytest -s tests/integration/tests/path_to/test_file.py
   ```
   or to run a single test:
   ```sh
   python -m dotenv -f .env run -- pytest -s tests/integration/tests/path_to/test_file.py::test_function_name
   ```

Running some single tests require the `mock_connector_server` container to be running. If the above doesn't work, 
navigate to `backend/tests/integration/mock_services` and run
```sh
docker compose -f docker-compose.mock-it-services.yml -p mock-it-services-stack up -d
```
You will have to modify the networks section of the docker-compose file to `<your stack name>_default` if you brought up the standard
onyx services with a name different from the default `onyx`.

## Guidelines for Writing Integration Tests

- As authentication is currently required for all tests, each test should start by creating a user.
- Each test should ideally focus on a single API flow.
- The test writer should try to consider failure cases and edge cases for the flow and write the tests to check for these cases.
- Every step of the test should be commented describing what is being done and what the expected behavior is.
- A summary of the test should be given at the top of the test function as well!
- When writing new tests, manager classes, manager functions, and test classes, try to copy the style of the other ones that have already been written.
- Be careful for scope creep!
  - No need to overcomplicate every test by verifying after every single API call so long as the case you would be verifying is covered elsewhere (ideally in a test focused on covering that case).
  - An example of this is: Creating an admin user is done at the beginning of nearly every test, but we only need to verify that the user is actually an admin in the test focused on checking admin permissions. For every other test, we can just create the admin user and assume that the permissions are working as expected.

## Current Testing Limitations

### Test coverage

- All tests are probably not as high coverage as they could be.
- The "connector" tests in particular are super bare bones because we will be reworking connector/cc_pair sometime soon.
- Global Curator role is not thoroughly tested.
- No auth is not tested at all.

### Failure checking

- While we test expected auth failures, we only check that it failed at all.
- We dont check that the return codes are what we expect.
- This means that a test could be failing for a different reason than expected.
- We should ensure that the proper codes are being returned for each failure case.
- We should also query the db after each failure to ensure that the db is in the expected state.

### Scope/focus

- The tests may be scoped sub-optimally.
- The scoping of each test may be overlapping.

## Current Testing Coverage

The current testing coverage should be checked by reading the comments at the top of each test file.

## TODO: Testing Coverage

- Persona permissions testing
- Read only (and/or basic) user permissions
  - Ensuring proper permission enforcement using the chat/doc_search endpoints
- No auth

## Ideas for integration testing design

### Combine the "test" and "manager" classes

This could make test writing a bit cleaner by preventing test writers from having to pass around objects into functions that the objects have a 1:1 relationship with.

### Rework VespaClient

Right now, its used a fixture and has to be passed around between manager classes.
Could just be built where its used


================================================
FILE: backend/tests/integration/__init__.py
================================================


================================================
FILE: backend/tests/integration/common_utils/chat.py
================================================
import requests

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import User


def test_create_chat_session_and_send_messages() -> None:
    # Create a test user
    with get_session_with_current_tenant() as db_session:
        test_user = User(email="test@example.com", hashed_password="dummy_hash")
        db_session.add(test_user)
        db_session.commit()

    base_url = "http://localhost:8080"  # Adjust this to your API's base URL
    headers = {"Authorization": f"Bearer {test_user.id}"}

    # Create a new chat session
    create_session_response = requests.post(
        f"{base_url}/chat/create-chat-session",
        json={
            "description": "Test Chat",
            "persona_id": 1,
        },  # Assuming persona_id 1 exists
        headers=headers,
    )
    assert create_session_response.status_code == 200
    chat_session_id = create_session_response.json()["chat_session_id"]

    # Send first message
    first_message = "Hello, this is a test message."
    send_message_response = requests.post(
        f"{base_url}/chat/send-chat-message",
        json={
            "chat_session_id": chat_session_id,
            "message": first_message,
            "retrieval_options": {"top_k": 3},
            "stream_response": False,
        },
        headers=headers,
    )
    assert send_message_response.status_code == 200

    # Send second message
    second_message = "Can you provide more information?"
    send_message_response = requests.post(
        f"{base_url}/chat/send-chat-message",
        json={
            "chat_session_id": chat_session_id,
            "message": second_message,
            "retrieval_options": {"top_k": 3},
            "stream_response": False,
        },
        headers=headers,
    )
    assert send_message_response.status_code == 200

    # Verify chat session details
    get_session_response = requests.get(
        f"{base_url}/chat/get-chat-session/{chat_session_id}", headers=headers
    )
    assert get_session_response.status_code == 200
    session_details = get_session_response.json()
    assert session_details["chat_session_id"] == chat_session_id
    assert session_details["description"] == "Test Chat"
    assert len(session_details["messages"]) == 4  # 2 user messages + 2 AI responses


================================================
FILE: backend/tests/integration/common_utils/config.py
================================================
import generated.onyx_openapi_client.onyx_openapi_client as onyx_api  # type: ignore[import-untyped,unused-ignore]
from tests.integration.common_utils.constants import API_SERVER_URL

api_config = onyx_api.Configuration(host=API_SERVER_URL)


================================================
FILE: backend/tests/integration/common_utils/constants.py
================================================
import os

ADMIN_USER_NAME = "admin_user"

API_SERVER_PROTOCOL = os.getenv("API_SERVER_PROTOCOL") or "http"
API_SERVER_HOST = os.getenv("API_SERVER_HOST") or "127.0.0.1"
API_SERVER_PORT = os.getenv("API_SERVER_PORT") or "8080"
API_SERVER_URL = f"{API_SERVER_PROTOCOL}://{API_SERVER_HOST}:{API_SERVER_PORT}"
MAX_DELAY = 300

MCP_SERVER_HOST = os.getenv("MCP_SERVER_HOST") or "127.0.0.1"
MCP_SERVER_PORT = os.getenv("MCP_SERVER_PORT") or "8090"
MCP_SERVER_URL = f"{API_SERVER_PROTOCOL}://{MCP_SERVER_HOST}:{MCP_SERVER_PORT}"

GENERAL_HEADERS = {"Content-Type": "application/json"}

NUM_DOCS = 5

MOCK_CONNECTOR_SERVER_HOST = os.getenv("MOCK_CONNECTOR_SERVER_HOST") or "localhost"
MOCK_CONNECTOR_SERVER_PORT = os.getenv("MOCK_CONNECTOR_SERVER_PORT") or 8001


================================================
FILE: backend/tests/integration/common_utils/document_acl.py
================================================
"""
Utilities for testing document access control lists (ACLs) and permissions.
"""

from typing import List
from uuid import UUID

from sqlalchemy import select
from sqlalchemy.orm import Session

from ee.onyx.access.access import _get_access_for_documents
from ee.onyx.db.external_perm import fetch_external_groups_for_user
from onyx.access.utils import prefix_external_group
from onyx.access.utils import prefix_user_email
from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import User
from onyx.db.users import fetch_user_by_id
from onyx.utils.logger import setup_logger
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestUser

logger = setup_logger()


def get_user_acl(user: User, db_session: Session) -> set[str]:
    """
    Get the ACL entries for a user, including their external groups, email, and public doc pattern.

    Args:
        user: The user object
        db_session: Database session

    Returns:
        Set of ACL entries for the user
    """
    db_external_groups = (
        fetch_external_groups_for_user(db_session, user.id) if user else []
    )
    prefixed_external_groups = [
        prefix_external_group(db_external_group.external_user_group_id)
        for db_external_group in db_external_groups
    ]

    user_acl = set(prefixed_external_groups)
    user_acl.update({prefix_user_email(user.email), PUBLIC_DOC_PAT})
    return user_acl


def get_user_document_access_via_acl(
    test_user: DATestUser, document_ids: List[str], db_session: Session
) -> List[str]:
    """
    Determine which documents a user can access by comparing user ACL with document ACLs.

    This is a more reliable method than search-based verification as it directly checks
    permission logic without depending on search relevance or ranking.

    Args:
        test_user: The test user to check access for
        document_ids: List of document IDs to check
        db_session: Database session

    Returns:
        List of document IDs that the user can access
    """
    # Get the actual User object from the database
    user = fetch_user_by_id(db_session, UUID(test_user.id))
    if not user:
        logger.error(f"Could not find user with ID {test_user.id}")
        return []

    user_acl = get_user_acl(user, db_session)
    logger.info(f"User {user.email} ACL entries: {user_acl}")

    # Get document access information
    doc_access_map = _get_access_for_documents(document_ids, db_session)
    logger.info(f"Found access info for {len(doc_access_map)} documents")

    accessible_docs = []
    for doc_id, doc_access in doc_access_map.items():
        doc_acl = doc_access.to_acl()
        logger.info(f"Document {doc_id} ACL: {doc_acl}")

        # Check if user has any matching ACL entry
        if user_acl.intersection(doc_acl):
            accessible_docs.append(doc_id)
            logger.info(f"User {user.email} has access to document {doc_id}")
        else:
            logger.info(f"User {user.email} does NOT have access to document {doc_id}")

    return accessible_docs


def get_all_connector_documents(
    cc_pair: DATestCCPair, db_session: Session
) -> List[str]:
    """
    Get all document IDs for a given connector/credential pair.

    Args:
        cc_pair: The connector-credential pair
        db_session: Database session

    Returns:
        List of document IDs
    """
    stmt = select(DocumentByConnectorCredentialPair.id).where(
        DocumentByConnectorCredentialPair.connector_id == cc_pair.connector_id,
        DocumentByConnectorCredentialPair.credential_id == cc_pair.credential_id,
    )

    result = db_session.execute(stmt)
    document_ids = [row[0] for row in result.fetchall()]
    logger.info(
        f"Found {len(document_ids)} documents for connector {cc_pair.connector_id}"
    )

    return document_ids


def get_documents_by_permission_type(
    document_ids: List[str], db_session: Session
) -> List[str]:
    """
    Categorize documents by their permission types and return public documents.

    Args:
        document_ids: List of document IDs to check
        db_session: Database session

    Returns:
        List of document IDs that are public
    """
    doc_access_map = _get_access_for_documents(document_ids, db_session)

    public_docs = []

    for doc_id, doc_access in doc_access_map.items():
        if doc_access.is_public:
            public_docs.append(doc_id)

    return public_docs


================================================
FILE: backend/tests/integration/common_utils/managers/api_key.py
================================================
from uuid import uuid4

import requests

from onyx.db.models import UserRole
from onyx.server.api_key.models import APIKeyArgs
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestUser


class APIKeyManager:
    @staticmethod
    def create(
        user_performing_action: DATestUser,
        name: str | None = None,
        api_key_role: UserRole = UserRole.ADMIN,
    ) -> DATestAPIKey:
        name = f"{name}-api-key" if name else f"test-api-key-{uuid4()}"
        api_key_request = APIKeyArgs(
            name=name,
            role=api_key_role,
        )
        api_key_response = requests.post(
            f"{API_SERVER_URL}/admin/api-key",
            json=api_key_request.model_dump(),
            headers=user_performing_action.headers,
        )
        api_key_response.raise_for_status()
        api_key = api_key_response.json()
        result_api_key = DATestAPIKey(
            api_key_id=api_key["api_key_id"],
            api_key_display=api_key["api_key_display"],
            api_key=api_key["api_key"],
            api_key_name=name,
            api_key_role=api_key_role,
            user_id=api_key["user_id"],
            headers=GENERAL_HEADERS,
        )
        result_api_key.headers["Authorization"] = f"Bearer {result_api_key.api_key}"
        return result_api_key

    @staticmethod
    def delete(
        api_key: DATestAPIKey,
        user_performing_action: DATestUser,
    ) -> None:
        api_key_response = requests.delete(
            f"{API_SERVER_URL}/admin/api-key/{api_key.api_key_id}",
            headers=user_performing_action.headers,
        )
        api_key_response.raise_for_status()

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
    ) -> list[DATestAPIKey]:
        api_key_response = requests.get(
            f"{API_SERVER_URL}/admin/api-key",
            headers=user_performing_action.headers,
        )
        api_key_response.raise_for_status()
        return [DATestAPIKey(**api_key) for api_key in api_key_response.json()]

    @staticmethod
    def verify(
        api_key: DATestAPIKey,
        user_performing_action: DATestUser,
        verify_deleted: bool = False,
    ) -> None:
        retrieved_keys = APIKeyManager.get_all(
            user_performing_action=user_performing_action
        )
        for key in retrieved_keys:
            if key.api_key_id == api_key.api_key_id:
                if verify_deleted:
                    raise ValueError("API Key found when it should have been deleted")
                if (
                    key.api_key_name == api_key.api_key_name
                    and key.api_key_role == api_key.api_key_role
                ):
                    return

        if not verify_deleted:
            raise Exception("API Key not found")


================================================
FILE: backend/tests/integration/common_utils/managers/cc_pair.py
================================================
import time
from datetime import datetime
from typing import Any
from uuid import uuid4

import requests

import generated.onyx_openapi_client.onyx_openapi_client as api  # type: ignore[import-untyped,unused-ignore]
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.server.documents.models import CCPairFullInfo
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
from onyx.server.documents.models import ConnectorIndexingStatusLite
from onyx.server.documents.models import ConnectorStatus
from onyx.server.documents.models import DocumentSource
from onyx.server.documents.models import DocumentSyncStatus
from tests.integration.common_utils.config import api_config
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import MAX_DELAY
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestUser


def _cc_pair_creator(
    connector_id: int,
    credential_id: int,
    user_performing_action: DATestUser,
    name: str | None = None,
    access_type: AccessType = AccessType.PUBLIC,
    groups: list[int] | None = None,
) -> DATestCCPair:
    name = f"{name}-cc-pair" if name else f"test-cc-pair-{uuid4()}"

    with api.ApiClient(api_config) as api_client:
        api_instance = api.DefaultApi(api_client)
        connector_credential_pair_metadata = api.ConnectorCredentialPairMetadata(
            name=name, access_type=access_type, groups=groups or []
        )
        api_response: api.StatusResponseInt = (
            api_instance.associate_credential_to_connector(
                connector_id,
                credential_id,
                connector_credential_pair_metadata,
                _headers=user_performing_action.headers,
            )
        )

    return DATestCCPair(
        id=int(api_response.data),
        name=name,
        connector_id=connector_id,
        credential_id=credential_id,
        access_type=access_type,
        groups=groups or [],
    )


class CCPairManager:
    @staticmethod
    def create_from_scratch(
        user_performing_action: DATestUser,
        name: str | None = None,
        access_type: AccessType = AccessType.PUBLIC,
        groups: list[int] | None = None,
        source: DocumentSource = DocumentSource.FILE,
        input_type: InputType = InputType.LOAD_STATE,
        connector_specific_config: dict[str, Any] | None = None,
        credential_json: dict[str, Any] | None = None,
        refresh_freq: int | None = None,
    ) -> DATestCCPair:
        connector = ConnectorManager.create(
            user_performing_action=user_performing_action,
            name=name,
            source=source,
            input_type=input_type,
            connector_specific_config=connector_specific_config,
            access_type=access_type,
            groups=groups,
            refresh_freq=refresh_freq,
        )
        credential = CredentialManager.create(
            user_performing_action=user_performing_action,
            credential_json=credential_json,
            name=name,
            source=source,
            curator_public=(access_type == AccessType.PUBLIC),
            groups=groups,
        )
        cc_pair = _cc_pair_creator(
            connector_id=connector.id,
            credential_id=credential.id,
            name=name,
            access_type=access_type,
            groups=groups,
            user_performing_action=user_performing_action,
        )
        return cc_pair

    @staticmethod
    def create(
        connector_id: int,
        credential_id: int,
        user_performing_action: DATestUser,
        name: str | None = None,
        access_type: AccessType = AccessType.PUBLIC,
        groups: list[int] | None = None,
    ) -> DATestCCPair:
        cc_pair = _cc_pair_creator(
            connector_id=connector_id,
            credential_id=credential_id,
            name=name,
            access_type=access_type,
            groups=groups,
            user_performing_action=user_performing_action,
        )
        return cc_pair

    @staticmethod
    def pause_cc_pair(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
    ) -> None:
        result = requests.put(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/status",
            json={"status": "PAUSED"},
            headers=user_performing_action.headers,
        )
        result.raise_for_status()

    @staticmethod
    def unpause_cc_pair(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
    ) -> None:
        result = requests.put(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/status",
            json={"status": "ACTIVE"},
            headers=user_performing_action.headers,
        )
        result.raise_for_status()

    @staticmethod
    def delete(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
    ) -> None:
        cc_pair_identifier = ConnectorCredentialPairIdentifier(
            connector_id=cc_pair.connector_id,
            credential_id=cc_pair.credential_id,
        )
        result = requests.post(
            url=f"{API_SERVER_URL}/manage/admin/deletion-attempt",
            json=cc_pair_identifier.model_dump(),
            headers=user_performing_action.headers,
        )
        result.raise_for_status()

    @staticmethod
    def get_single(
        cc_pair_id: int,
        user_performing_action: DATestUser,
    ) -> CCPairFullInfo | None:
        response = requests.get(
            f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair_id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        cc_pair_json = response.json()
        return CCPairFullInfo(**cc_pair_json)

    @staticmethod
    def get_indexing_status_by_id(
        cc_pair_id: int,
        user_performing_action: DATestUser,
    ) -> ConnectorIndexingStatusLite | None:
        response = requests.post(
            f"{API_SERVER_URL}/manage/admin/connector/indexing-status",
            headers=user_performing_action.headers,
            json={"get_all_connectors": True},
        )
        response.raise_for_status()
        indexing_status_response = response.json()
        for connectors_by_source in indexing_status_response:
            connectors = connectors_by_source["indexing_statuses"]
            for connector in connectors:
                if connector["cc_pair_id"] == cc_pair_id:
                    return ConnectorIndexingStatusLite(**connector)

        return None

    @staticmethod
    def get_indexing_statuses(
        user_performing_action: DATestUser,
    ) -> list[ConnectorIndexingStatusLite]:
        response = requests.post(
            f"{API_SERVER_URL}/manage/admin/connector/indexing-status",
            headers=user_performing_action.headers,
            json={"get_all_connectors": True},
        )
        response.raise_for_status()
        indexing_status_response = response.json()
        indexing_statuses = []
        for connectors_by_source in indexing_status_response:
            connectors = connectors_by_source["indexing_statuses"]
            for connector in connectors:
                indexing_statuses.append(ConnectorIndexingStatusLite(**connector))
        return indexing_statuses

    @staticmethod
    def get_connector_statuses(
        user_performing_action: DATestUser,
    ) -> list[ConnectorStatus]:
        response = requests.get(
            f"{API_SERVER_URL}/manage/admin/connector/status",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [ConnectorStatus(**status) for status in response.json()]

    @staticmethod
    def verify(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
        verify_deleted: bool = False,
    ) -> None:
        all_cc_pairs = CCPairManager.get_connector_statuses(user_performing_action)
        for retrieved_cc_pair in all_cc_pairs:
            if retrieved_cc_pair.cc_pair_id == cc_pair.id:
                if verify_deleted:
                    # We assume that this check will be performed after the deletion is
                    # already waited for
                    raise ValueError(
                        f"CC pair {cc_pair.id} found but should be deleted"
                    )
                if (
                    retrieved_cc_pair.name == cc_pair.name
                    and retrieved_cc_pair.connector.id == cc_pair.connector_id
                    and retrieved_cc_pair.credential.id == cc_pair.credential_id
                    and retrieved_cc_pair.access_type == cc_pair.access_type
                    and set(retrieved_cc_pair.groups) == set(cc_pair.groups)
                ):
                    return

        if not verify_deleted:
            raise ValueError(f"CC pair {cc_pair.id} not found")

    @staticmethod
    def run_once(
        cc_pair: DATestCCPair,
        from_beginning: bool,
        user_performing_action: DATestUser,
    ) -> None:
        body = {
            "connector_id": cc_pair.connector_id,
            "credential_ids": [cc_pair.credential_id],
            "from_beginning": from_beginning,
        }
        result = requests.post(
            url=f"{API_SERVER_URL}/manage/admin/connector/run-once",
            json=body,
            headers=user_performing_action.headers,
        )
        result.raise_for_status()

    @staticmethod
    def wait_for_indexing_inactive(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
        timeout: float = MAX_DELAY,
    ) -> None:
        """wait for the number of docs to be indexed on the connector.
        This is used to test pausing a connector in the middle of indexing and
        terminating that indexing."""
        print(f"Indexing wait for inactive starting: cc_pair={cc_pair.id}")
        start = time.monotonic()
        while True:
            fetched_cc_pairs = CCPairManager.get_indexing_statuses(
                user_performing_action
            )
            for fetched_cc_pair in fetched_cc_pairs:
                if fetched_cc_pair.cc_pair_id != cc_pair.id:
                    continue

                if fetched_cc_pair.in_progress:
                    continue

                print(f"Indexing is inactive: cc_pair={cc_pair.id}")
                return

            elapsed = time.monotonic() - start
            if elapsed > timeout:
                raise TimeoutError(
                    f"Indexing wait for inactive timed out: cc_pair={cc_pair.id} timeout={timeout}s"
                )

            print(
                f"Indexing wait for inactive still waiting: cc_pair={cc_pair.id} elapsed={elapsed:.2f} timeout={timeout}s"
            )
            time.sleep(5)

    @staticmethod
    def wait_for_indexing_in_progress(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
        timeout: float = MAX_DELAY,
        num_docs: int = 16,
    ) -> None:
        """wait for the number of docs to be indexed on the connector.
        This is used to test pausing a connector in the middle of indexing and
        terminating that indexing."""
        start = time.monotonic()
        while True:
            fetched_cc_pairs = CCPairManager.get_indexing_statuses(
                user_performing_action
            )
            for fetched_cc_pair in fetched_cc_pairs:
                if fetched_cc_pair.cc_pair_id != cc_pair.id:
                    continue

                if not fetched_cc_pair.in_progress:
                    continue

                if fetched_cc_pair.docs_indexed < num_docs:
                    print(
                        f"Indexing in progress: cc_pair={cc_pair.id} "
                        f"docs_indexed={fetched_cc_pair.docs_indexed} num_docs={num_docs}"
                    )
                    continue

                if fetched_cc_pair.docs_indexed >= num_docs:
                    print(
                        "Indexed at least the requested number of docs: "
                        f"cc_pair={cc_pair.id} "
                        f"docs_indexed={fetched_cc_pair.docs_indexed} "
                        f"num_docs={num_docs}"
                    )
                    return

            elapsed = time.monotonic() - start
            if elapsed > timeout:
                raise TimeoutError(
                    f"Indexing in progress wait timed out: cc_pair={cc_pair.id} timeout={timeout}s"
                )

            print(
                f"Indexing in progress waiting: cc_pair={cc_pair.id} elapsed={elapsed:.2f} timeout={timeout}s"
            )
            time.sleep(5)

    @staticmethod
    def wait_for_indexing_completion(
        cc_pair: DATestCCPair,
        after: datetime,
        user_performing_action: DATestUser,
        timeout: float = MAX_DELAY,
    ) -> None:
        """after: Wait for an indexing success time after this time"""
        start = time.monotonic()
        while True:
            fetched_cc_pairs = CCPairManager.get_indexing_statuses(
                user_performing_action
            )
            for fetched_cc_pair in fetched_cc_pairs:
                if fetched_cc_pair.cc_pair_id != cc_pair.id:
                    continue

                if fetched_cc_pair.in_progress:
                    continue

                if (
                    fetched_cc_pair.last_success
                    and fetched_cc_pair.last_success > after
                ):
                    print(f"Indexing complete: cc_pair={cc_pair.id}")
                    return

            elapsed = time.monotonic() - start
            if elapsed > timeout:
                raise TimeoutError(
                    f"Indexing wait timed out: cc_pair={cc_pair.id} timeout={timeout}s"
                )

            print(
                f"Indexing wait for completion: cc_pair={cc_pair.id} elapsed={elapsed:.2f} timeout={timeout}s"
            )
            time.sleep(5)

    @staticmethod
    def prune(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
    ) -> None:
        result = requests.post(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/prune",
            headers=user_performing_action.headers,
        )
        result.raise_for_status()

    @staticmethod
    def last_pruned(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
    ) -> datetime | None:
        response = requests.get(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/last_pruned",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        response_str = response.json()

        # If the response itself is a datetime string, parse it
        if not isinstance(response_str, str):
            return None

        try:
            return datetime.fromisoformat(response_str)
        except ValueError:
            return None

    @staticmethod
    def wait_for_prune(
        cc_pair: DATestCCPair,
        after: datetime,
        user_performing_action: DATestUser,
        timeout: float = MAX_DELAY,
    ) -> None:
        """after: The task register time must be after this time."""
        start = time.monotonic()
        while True:
            last_pruned = CCPairManager.last_pruned(cc_pair, user_performing_action)
            if last_pruned and last_pruned > after:
                print(f"Pruning complete: cc_pair={cc_pair.id}")
                break

            elapsed = time.monotonic() - start
            if elapsed > timeout:
                raise TimeoutError(
                    f"CC pair pruning was not completed within {timeout} seconds"
                )

            print(
                f"Waiting for CC pruning to complete. elapsed={elapsed:.2f} timeout={timeout}"
            )
            time.sleep(5)

    @staticmethod
    def sync(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
    ) -> None:
        """This function triggers a permission sync.
        Naming / intent of this function probably could use improvement, but currently it's letting
        409 Conflict pass through since if it's running that's what we were trying to do anyway.
        """
        result = requests.post(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-permissions",
            headers=user_performing_action.headers,
        )
        if result.status_code != 409:
            result.raise_for_status()

        group_sync_result = requests.post(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-groups",
            headers=user_performing_action.headers,
        )
        if group_sync_result.status_code != 409:
            group_sync_result.raise_for_status()
        time.sleep(2)

    @staticmethod
    def get_doc_sync_task(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
    ) -> datetime | None:
        doc_sync_response = requests.get(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-permissions",
            headers=user_performing_action.headers,
        )
        doc_sync_response.raise_for_status()
        doc_sync_response_str = doc_sync_response.json()

        # If the response itself is a datetime string, parse it
        if not isinstance(doc_sync_response_str, str):
            return None

        try:
            return datetime.fromisoformat(doc_sync_response_str)
        except ValueError:
            return None

    @staticmethod
    def get_group_sync_task(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
    ) -> datetime | None:
        group_sync_response = requests.get(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-groups",
            headers=user_performing_action.headers,
        )
        group_sync_response.raise_for_status()
        group_sync_response_str = group_sync_response.json()

        # If the response itself is a datetime string, parse it
        if not isinstance(group_sync_response_str, str):
            return None

        try:
            return datetime.fromisoformat(group_sync_response_str)
        except ValueError:
            return None

    @staticmethod
    def get_doc_sync_statuses(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser,
    ) -> list[DocumentSyncStatus]:
        response = requests.get(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/get-docs-sync-status",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        doc_sync_statuses: list[DocumentSyncStatus] = []
        for doc_sync_status in response.json():
            last_synced = doc_sync_status.get("last_synced")
            if last_synced:
                last_synced = datetime.fromisoformat(last_synced)

            last_modified = doc_sync_status.get("last_modified")
            if last_modified:
                last_modified = datetime.fromisoformat(last_modified)

            doc_sync_statuses.append(
                DocumentSyncStatus(
                    doc_id=doc_sync_status["doc_id"],
                    last_synced=last_synced,
                    last_modified=last_modified,
                )
            )

        return doc_sync_statuses

    @staticmethod
    def wait_for_sync(
        cc_pair: DATestCCPair,
        after: datetime,
        user_performing_action: DATestUser,
        timeout: float = MAX_DELAY,
        number_of_updated_docs: int = 0,
        # Sometimes waiting for a group sync is not necessary
        should_wait_for_group_sync: bool = True,
        # Sometimes waiting for a vespa sync is not necessary
        should_wait_for_vespa_sync: bool = True,
    ) -> None:
        """after: The task register time must be after this time."""
        doc_synced = False
        group_synced = False
        start = time.monotonic()
        while True:
            # We are treating both syncs as part of one larger permission sync job
            doc_last_synced = CCPairManager.get_doc_sync_task(
                cc_pair, user_performing_action
            )
            group_last_synced = CCPairManager.get_group_sync_task(
                cc_pair, user_performing_action
            )

            if not doc_synced and doc_last_synced and doc_last_synced > after:
                print(f"doc_last_synced: {doc_last_synced}")
                print(f"sync command start time: {after}")
                print(f"permission sync complete: cc_pair={cc_pair.id}")
                doc_synced = True

            if not group_synced and group_last_synced and group_last_synced > after:
                print(f"group_last_synced: {group_last_synced}")
                print(f"sync command start time: {after}")
                print(f"group sync complete: cc_pair={cc_pair.id}")
                group_synced = True

            if doc_synced and (group_synced or not should_wait_for_group_sync):
                break

            elapsed = time.monotonic() - start
            if elapsed > timeout:
                raise TimeoutError(
                    f"Permission sync was not completed within {timeout} seconds"
                )

            print(
                f"Waiting for CC sync to complete. elapsed={elapsed:.2f} timeout={timeout}"
            )
            time.sleep(5)

        # TODO: remove this sleep,
        # this shouldnt be necessary but something is off with the timing for the sync jobs
        time.sleep(5)

        if not should_wait_for_vespa_sync:
            return

        print("waiting for vespa sync")
        # wait for the vespa sync to complete once the permission sync is complete
        start = time.monotonic()
        while True:
            doc_sync_statuses = CCPairManager.get_doc_sync_statuses(
                cc_pair=cc_pair,
                user_performing_action=user_performing_action,
            )
            synced_docs = 0
            for doc_sync_status in doc_sync_statuses:
                if (
                    doc_sync_status.last_synced is not None
                    and doc_sync_status.last_modified is not None
                    and doc_sync_status.last_synced >= doc_sync_status.last_modified
                    and doc_sync_status.last_synced >= after
                    and doc_sync_status.last_modified >= after
                ):
                    synced_docs += 1

            if synced_docs >= number_of_updated_docs:
                print(f"all docs synced: cc_pair={cc_pair.id}")
                break

            elapsed = time.monotonic() - start
            if elapsed > timeout:
                raise TimeoutError(
                    f"Vespa sync was not completed within {timeout} seconds"
                )

            print(
                f"Waiting for vespa sync to complete. elapsed={elapsed:.2f} timeout={timeout}"
            )
            time.sleep(5)

    @staticmethod
    def wait_for_deletion_completion(
        user_performing_action: DATestUser,
        cc_pair_id: int | None = None,
    ) -> None:
        """if cc_pair_id is not specified, just waits until no connectors are in the deleting state.
        if cc_pair_id is specified, checks to ensure the specific cc_pair_id is gone.
        We had a bug where the connector was paused in the middle of deleting, so specifying the
        cc_pair_id is good to do."""
        start = time.monotonic()
        while True:
            cc_pairs = CCPairManager.get_indexing_statuses(user_performing_action)
            if cc_pair_id:
                found = False
                for cc_pair in cc_pairs:
                    if cc_pair.cc_pair_id == cc_pair_id:
                        found = True
                        break

                if not found:
                    return
            else:
                if all(
                    cc_pair.cc_pair_status != ConnectorCredentialPairStatus.DELETING
                    for cc_pair in cc_pairs
                ):
                    return

            if time.monotonic() - start > MAX_DELAY:
                raise TimeoutError(
                    f"CC pairs deletion was not completed within the {MAX_DELAY} seconds"
                )
            else:
                print("Some CC pairs are still being deleted, waiting...")
            time.sleep(2)


================================================
FILE: backend/tests/integration/common_utils/managers/chat.py
================================================
import json
from typing import Any
from typing import cast
from typing import Literal
from typing import TypedDict
from uuid import UUID

import requests
from requests.models import Response

from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc
from onyx.file_store.models import FileDescriptor
from onyx.llm.override_models import LLMOverride
from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import StreamingType
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestChatMessage
from tests.integration.common_utils.test_models import DATestChatSession
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import ErrorResponse
from tests.integration.common_utils.test_models import StreamedResponse
from tests.integration.common_utils.test_models import ToolCallDebug
from tests.integration.common_utils.test_models import ToolName
from tests.integration.common_utils.test_models import ToolResult


class StreamPacketObj(TypedDict, total=False):
    """Base structure for streaming packet objects."""

    type: Literal[
        "message_start",
        "message_delta",
        "search_tool_start",
        "search_tool_queries_delta",
        "search_tool_documents_delta",
        "image_generation_start",
        "image_generation_heartbeat",
        "image_generation_final",
        "tool_call_debug",
    ]
    content: str
    final_documents: list[dict[str, Any]]
    is_internet_search: bool
    images: list[dict[str, Any]]
    queries: list[str]
    documents: list[dict[str, Any]]
    tool_call_id: str
    tool_name: str
    tool_args: dict[str, Any]


class PlacementData(TypedDict, total=False):
    """Structure for packet placement information."""

    turn_index: int
    tab_index: int
    sub_turn_index: int | None


class StreamPacketData(TypedDict, total=False):
    """Structure for streaming response packets."""

    reserved_assistant_message_id: int
    error: str
    stack_trace: str
    obj: StreamPacketObj
    placement: PlacementData


class ChatSessionManager:
    @staticmethod
    def create(
        user_performing_action: DATestUser,
        persona_id: int = 0,
        description: str = "Test chat session",
        project_id: int | None = None,
    ) -> DATestChatSession:
        chat_session_creation_req = ChatSessionCreationRequest(
            persona_id=persona_id,
            description=description,
            project_id=project_id,
        )
        response = requests.post(
            f"{API_SERVER_URL}/chat/create-chat-session",
            json=chat_session_creation_req.model_dump(),
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        chat_session_id = response.json()["chat_session_id"]
        return DATestChatSession(
            id=chat_session_id, persona_id=persona_id, description=description
        )

    @staticmethod
    def send_message(
        chat_session_id: UUID,
        message: str,
        user_performing_action: DATestUser,
        parent_message_id: int | None = None,
        file_descriptors: list[FileDescriptor] | None = None,
        allowed_tool_ids: list[int] | None = None,
        forced_tool_ids: list[int] | None = None,
        chat_session: DATestChatSession | None = None,
        mock_llm_response: str | None = None,
        deep_research: bool = False,
        llm_override: LLMOverride | None = None,
    ) -> StreamedResponse:
        chat_message_req = SendMessageRequest(
            message=message,
            chat_session_id=chat_session_id,
            parent_message_id=(
                parent_message_id
                if parent_message_id is not None
                else AUTO_PLACE_AFTER_LATEST_MESSAGE
            ),
            file_descriptors=file_descriptors or [],
            allowed_tool_ids=allowed_tool_ids,
            forced_tool_id=forced_tool_ids[0] if forced_tool_ids else None,
            mock_llm_response=mock_llm_response,
            deep_research=deep_research,
            llm_override=llm_override,
        )

        response = requests.post(
            f"{API_SERVER_URL}/chat/send-chat-message",
            json=chat_message_req.model_dump(mode="json"),
            headers=user_performing_action.headers,
            stream=True,
            cookies=user_performing_action.cookies,
        )

        streamed_response = ChatSessionManager.analyze_response(response)

        if not chat_session:
            return streamed_response

        # TODO: ideally we would get the research answer purpose from the chat history
        # but atm the field needed would not be used outside of testing, so we're not adding it.
        # chat_history = ChatSessionManager.get_chat_history(
        #     chat_session=chat_session,
        #     user_performing_action=user_performing_action,
        # )

        # for message_obj in chat_history:
        #     if message_obj.message_type == MessageType.ASSISTANT:
        #         streamed_response.research_answer_purpose = (
        #             message_obj.research_answer_purpose
        #         )
        #         streamed_response.assistant_message_id = message_obj.id
        #         break

        return streamed_response

    @staticmethod
    def send_message_with_disconnect(
        chat_session_id: UUID,
        message: str,
        user_performing_action: DATestUser,
        disconnect_after_packets: int = 0,
        parent_message_id: int | None = None,
        file_descriptors: list[FileDescriptor] | None = None,
        allowed_tool_ids: list[int] | None = None,
        forced_tool_ids: list[int] | None = None,
        mock_llm_response: str | None = None,
        deep_research: bool = False,
        llm_override: LLMOverride | None = None,
    ) -> None:
        """
        Send a message and simulate client disconnect before stream completes.

        This is useful for testing how the server handles client disconnections
        during streaming responses.

        Args:
            chat_session_id: The chat session ID
            message: The message to send
            disconnect_after_packets: Disconnect after receiving this many packets.
            ... (other standard message parameters)

        Returns:
            None. Caller can verify server-side cleanup via get_chat_history etc.
        """
        chat_message_req = SendMessageRequest(
            message=message,
            chat_session_id=chat_session_id,
            parent_message_id=(
                parent_message_id
                if parent_message_id is not None
                else AUTO_PLACE_AFTER_LATEST_MESSAGE
            ),
            file_descriptors=file_descriptors or [],
            allowed_tool_ids=allowed_tool_ids,
            forced_tool_id=forced_tool_ids[0] if forced_tool_ids else None,
            mock_llm_response=mock_llm_response,
            deep_research=deep_research,
            llm_override=llm_override,
        )

        packets_received = 0

        with requests.post(
            f"{API_SERVER_URL}/chat/send-chat-message",
            json=chat_message_req.model_dump(mode="json"),
            headers=user_performing_action.headers,
            stream=True,
            cookies=user_performing_action.cookies,
        ) as response:
            for line in response.iter_lines():
                if not line:
                    continue

                packets_received += 1
                if packets_received > disconnect_after_packets:
                    break

        return None

    @staticmethod
    def analyze_response(response: Response) -> StreamedResponse:
        response_data = cast(
            list[StreamPacketData],
            [
                json.loads(line.decode("utf-8"))
                for line in response.iter_lines()
                if line
            ],
        )
        ind_to_tool_use: dict[int, ToolResult] = {}
        tool_call_debug: list[ToolCallDebug] = []
        top_documents: list[SearchDoc] = []
        heartbeat_packets: list[StreamPacketData] = []
        full_message = ""
        assistant_message_id: int | None = None
        error = None
        ind: int
        for data in response_data:
            if reserved_id := data.get("reserved_assistant_message_id"):
                assistant_message_id = reserved_id
            elif data.get("error"):
                error = ErrorResponse(
                    error=str(data["error"]),
                    stack_trace=str(data.get("stack_trace") or ""),
                )
            elif (error_obj := cast(dict[str, Any], data.get("obj") or {})) and (
                error_obj.get("error")
                or error_obj.get("type") == StreamingType.ERROR.value
            ):
                error = ErrorResponse(
                    error=str(error_obj.get("error") or "Streaming error"),
                    stack_trace=str(
                        error_obj.get("stack_trace") or data.get("stack_trace") or ""
                    ),
                )
            elif (
                (data_obj := data.get("obj"))
                and (packet_type := data_obj.get("type"))
                and (
                    ind := cast(
                        int,
                        (
                            data.get("ind")
                            if data.get("ind") is not None
                            else data.get("placement", {}).get("turn_index")
                        ),
                    )
                )
                is not None
            ):
                packet_type_str = str(packet_type)
                if packet_type_str == StreamingType.MESSAGE_START.value:
                    final_docs = data_obj.get("final_documents")
                    if isinstance(final_docs, list):
                        top_documents = [SearchDoc(**doc) for doc in final_docs]
                    full_message += data_obj.get("content", "")
                elif packet_type_str == StreamingType.MESSAGE_DELTA.value:
                    full_message += data_obj["content"]
                elif packet_type_str == StreamingType.SEARCH_TOOL_START.value:
                    tool_name = (
                        ToolName.INTERNET_SEARCH
                        if data_obj.get("is_internet_search", False)
                        else ToolName.INTERNAL_SEARCH
                    )
                    ind_to_tool_use[ind] = ToolResult(
                        tool_name=tool_name,
                    )
                elif packet_type_str == StreamingType.IMAGE_GENERATION_START.value:
                    ind_to_tool_use[ind] = ToolResult(
                        tool_name=ToolName.IMAGE_GENERATION,
                    )
                elif packet_type_str == StreamingType.IMAGE_GENERATION_HEARTBEAT.value:
                    # Track heartbeat packets for debugging/testing
                    heartbeat_packets.append(data)
                elif packet_type_str == StreamingType.IMAGE_GENERATION_FINAL.value:
                    from tests.integration.common_utils.test_models import (
                        GeneratedImage,
                    )

                    images = data_obj.get("images", [])
                    ind_to_tool_use[ind].images.extend(
                        [GeneratedImage(**img) for img in images]
                    )
                elif packet_type_str == StreamingType.SEARCH_TOOL_QUERIES_DELTA.value:
                    ind_to_tool_use[ind].queries.extend(data_obj.get("queries", []))
                elif packet_type_str == StreamingType.SEARCH_TOOL_DOCUMENTS_DELTA.value:
                    docs = []
                    for doc in data_obj.get("documents", []):
                        if "db_doc_id" in doc:
                            # Already a SavedSearchDoc format
                            docs.append(SavedSearchDoc(**doc))
                        else:
                            # SearchDoc format - Convert to SavedSearchDoc
                            search_doc = SearchDoc(**doc)
                            docs.append(
                                SavedSearchDoc.from_search_doc(search_doc, db_doc_id=0)
                            )
                    ind_to_tool_use[ind].documents.extend(docs)
                elif packet_type_str == StreamingType.TOOL_CALL_DEBUG.value:
                    tool_call_debug.append(
                        ToolCallDebug(
                            tool_call_id=str(data_obj.get("tool_call_id", "")),
                            tool_name=str(data_obj.get("tool_name", "")),
                            tool_args=cast(
                                dict[str, Any], data_obj.get("tool_args") or {}
                            ),
                        )
                    )
        # If there's an error, assistant_message_id might not be present
        if not assistant_message_id and not error:
            raise ValueError("Assistant message id not found")
        return StreamedResponse(
            full_message=full_message,
            assistant_message_id=assistant_message_id or -1,  # Use -1 for error cases
            top_documents=top_documents,
            used_tools=list(ind_to_tool_use.values()),
            tool_call_debug=tool_call_debug,
            heartbeat_packets=[dict(packet) for packet in heartbeat_packets],
            error=error,
        )

    @staticmethod
    def get_chat_history(
        chat_session: DATestChatSession,
        user_performing_action: DATestUser,
    ) -> list[DATestChatMessage]:
        response = requests.get(
            f"{API_SERVER_URL}/chat/get-chat-session/{chat_session.id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

        return [
            DATestChatMessage(
                id=msg["message_id"],
                chat_session_id=chat_session.id,
                parent_message_id=msg.get("parent_message"),
                message=msg["message"],
                message_type=msg.get("message_type"),
                files=msg.get("files"),
            )
            for msg in response.json()["messages"]
        ]

    @staticmethod
    def create_chat_message_feedback(
        message_id: int,
        is_positive: bool,
        user_performing_action: DATestUser,
        feedback_text: str | None = None,
        predefined_feedback: str | None = None,
    ) -> None:
        response = requests.post(
            url=f"{API_SERVER_URL}/chat/create-chat-message-feedback",
            json={
                "chat_message_id": message_id,
                "is_positive": is_positive,
                "feedback_text": feedback_text,
                "predefined_feedback": predefined_feedback,
            },
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def delete(
        chat_session: DATestChatSession,
        user_performing_action: DATestUser,
    ) -> bool:
        """
        Delete a chat session and all its related records (messages, agent data, etc.)
        Uses the default deletion method configured on the server.

        Returns True if deletion was successful, False otherwise.
        """
        response = requests.delete(
            f"{API_SERVER_URL}/chat/delete-chat-session/{chat_session.id}",
            headers=user_performing_action.headers,
        )
        return response.ok

    @staticmethod
    def soft_delete(
        chat_session: DATestChatSession,
        user_performing_action: DATestUser,
    ) -> bool:
        """
        Soft delete a chat session (marks as deleted but keeps in database).

        Returns True if deletion was successful, False otherwise.
        """
        # Since there's no direct API for soft delete, we'll use a query parameter approach
        # or make a direct call with hard_delete=False parameter via a new endpoint
        response = requests.delete(
            f"{API_SERVER_URL}/chat/delete-chat-session/{chat_session.id}?hard_delete=false",
            headers=user_performing_action.headers,
        )
        return response.ok

    @staticmethod
    def hard_delete(
        chat_session: DATestChatSession,
        user_performing_action: DATestUser,
    ) -> bool:
        """
        Hard delete a chat session (completely removes from database).

        Returns True if deletion was successful, False otherwise.
        """
        response = requests.delete(
            f"{API_SERVER_URL}/chat/delete-chat-session/{chat_session.id}?hard_delete=true",
            headers=user_performing_action.headers,
        )
        return response.ok

    @staticmethod
    def verify_deleted(
        chat_session: DATestChatSession,
        user_performing_action: DATestUser,
    ) -> bool:
        """
        Verify that a chat session has been deleted by attempting to retrieve it.

        Returns True if the chat session is confirmed deleted, False if it still exists.
        """
        response = requests.get(
            f"{API_SERVER_URL}/chat/get-chat-session/{chat_session.id}",
            headers=user_performing_action.headers,
        )
        # Chat session should return 404 if it doesn't exist or is deleted
        return response.status_code == 404

    @staticmethod
    def verify_soft_deleted(
        chat_session: DATestChatSession,
        user_performing_action: DATestUser,
    ) -> bool:
        """
        Verify that a chat session has been soft deleted (marked as deleted but still in DB).

        Returns True if the chat session is soft deleted, False otherwise.
        """
        # Try to get the chat session with include_deleted=true
        response = requests.get(
            f"{API_SERVER_URL}/chat/get-chat-session/{chat_session.id}?include_deleted=true",
            headers=user_performing_action.headers,
        )

        if response.status_code == 200:
            # Chat exists, check if it's marked as deleted
            chat_data = response.json()
            return chat_data.get("deleted", False) is True
        return False

    @staticmethod
    def verify_hard_deleted(
        chat_session: DATestChatSession,
        user_performing_action: DATestUser,
    ) -> bool:
        """
        Verify that a chat session has been hard deleted (completely removed from DB).

        Returns True if the chat session is hard deleted, False otherwise.
        """
        # Try to get the chat session with include_deleted=true
        response = requests.get(
            f"{API_SERVER_URL}/chat/get-chat-session/{chat_session.id}?include_deleted=true",
            headers=user_performing_action.headers,
        )

        # For hard delete, even with include_deleted=true, the record should not exist
        return response.status_code != 200


================================================
FILE: backend/tests/integration/common_utils/managers/connector.py
================================================
from typing import Any
from uuid import uuid4

import requests

from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from onyx.server.documents.models import ConnectorUpdateRequest
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestConnector
from tests.integration.common_utils.test_models import DATestUser


class ConnectorManager:
    @staticmethod
    def create(
        user_performing_action: DATestUser,
        name: str | None = None,
        source: DocumentSource = DocumentSource.FILE,
        input_type: InputType = InputType.LOAD_STATE,
        connector_specific_config: dict[str, Any] | None = None,
        access_type: AccessType = AccessType.PUBLIC,
        groups: list[int] | None = None,
        refresh_freq: int | None = None,
    ) -> DATestConnector:
        name = f"{name}-connector" if name else f"test-connector-{uuid4()}"

        connector_update_request = ConnectorUpdateRequest(
            name=name,
            source=source,
            input_type=input_type,
            connector_specific_config=(
                connector_specific_config
                or (
                    {
                        "file_locations": [],
                        "file_names": [],
                        "zip_metadata_file_id": None,
                    }
                    if source == DocumentSource.FILE
                    else {}
                )
            ),
            access_type=access_type,
            groups=groups or [],
            refresh_freq=refresh_freq,
        )

        response = requests.post(
            url=f"{API_SERVER_URL}/manage/admin/connector",
            json=connector_update_request.model_dump(),
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

        response_data = response.json()
        return DATestConnector(
            id=response_data.get("id"),
            name=name,
            source=source,
            input_type=input_type,
            connector_specific_config=connector_specific_config or {},
            groups=groups,
            access_type=access_type,
        )

    @staticmethod
    def edit(
        connector: DATestConnector,
        user_performing_action: DATestUser,
    ) -> None:
        response = requests.patch(
            url=f"{API_SERVER_URL}/manage/admin/connector/{connector.id}",
            json=connector.model_dump(exclude={"id"}),
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def delete(
        connector: DATestConnector,
        user_performing_action: DATestUser,
    ) -> None:
        response = requests.delete(
            url=f"{API_SERVER_URL}/manage/admin/connector/{connector.id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
    ) -> list[DATestConnector]:
        response = requests.get(
            url=f"{API_SERVER_URL}/manage/connector",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [
            DATestConnector(
                id=conn.get("id"),
                name=conn.get("name", ""),
                source=conn.get("source", DocumentSource.FILE),
                input_type=conn.get("input_type", InputType.LOAD_STATE),
                connector_specific_config=conn.get("connector_specific_config", {}),
            )
            for conn in response.json()
        ]

    @staticmethod
    def get(
        connector_id: int,
        user_performing_action: DATestUser,
    ) -> DATestConnector:
        response = requests.get(
            url=f"{API_SERVER_URL}/manage/connector/{connector_id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        conn = response.json()
        return DATestConnector(
            id=conn.get("id"),
            name=conn.get("name", ""),
            source=conn.get("source", DocumentSource.FILE),
            input_type=conn.get("input_type", InputType.LOAD_STATE),
            connector_specific_config=conn.get("connector_specific_config", {}),
        )


================================================
FILE: backend/tests/integration/common_utils/managers/credential.py
================================================
from typing import Any
from uuid import uuid4

import requests

from onyx.server.documents.models import CredentialSnapshot
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestCredential
from tests.integration.common_utils.test_models import DATestUser


class CredentialManager:
    @staticmethod
    def create(
        user_performing_action: DATestUser,
        credential_json: dict[str, Any] | None = None,
        admin_public: bool = True,
        name: str | None = None,
        source: DocumentSource = DocumentSource.FILE,
        curator_public: bool = True,
        groups: list[int] | None = None,
    ) -> DATestCredential:
        name = f"{name}-credential" if name else f"test-credential-{uuid4()}"

        credential_request = {
            "name": name,
            "credential_json": credential_json or {},
            "admin_public": admin_public,
            "source": source,
            "curator_public": curator_public,
            "groups": groups or [],
        }

        response = requests.post(
            url=f"{API_SERVER_URL}/manage/credential",
            json=credential_request,
            headers=user_performing_action.headers,
        )

        response.raise_for_status()
        return DATestCredential(
            id=response.json()["id"],
            name=name,
            credential_json=credential_json or {},
            admin_public=admin_public,
            source=source,
            curator_public=curator_public,
            groups=groups or [],
        )

    @staticmethod
    def edit(
        credential: DATestCredential,
        user_performing_action: DATestUser,
    ) -> None:
        request = credential.model_dump(include={"name", "credential_json"})
        response = requests.put(
            url=f"{API_SERVER_URL}/manage/admin/credential/{credential.id}",
            json=request,
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def delete(
        credential: DATestCredential,
        user_performing_action: DATestUser,
    ) -> None:
        response = requests.delete(
            url=f"{API_SERVER_URL}/manage/credential/{credential.id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def get(
        credential_id: int,
        user_performing_action: DATestUser,
    ) -> CredentialSnapshot:
        response = requests.get(
            url=f"{API_SERVER_URL}/manage/credential/{credential_id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return CredentialSnapshot(**response.json())

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
    ) -> list[CredentialSnapshot]:
        response = requests.get(
            f"{API_SERVER_URL}/manage/credential",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [CredentialSnapshot(**cred) for cred in response.json()]

    @staticmethod
    def verify(
        credential: DATestCredential,
        user_performing_action: DATestUser,
        verify_deleted: bool = False,
    ) -> None:
        all_credentials = CredentialManager.get_all(user_performing_action)
        for fetched_credential in all_credentials:
            if credential.id == fetched_credential.id:
                if verify_deleted:
                    raise ValueError(
                        f"Credential {credential.id} found but should be deleted"
                    )
                if (
                    credential.name == fetched_credential.name
                    and credential.admin_public == fetched_credential.admin_public
                    and credential.source == fetched_credential.source
                    and credential.curator_public == fetched_credential.curator_public
                ):
                    return
        if not verify_deleted:
            raise ValueError(f"Credential {credential.id} not found")


================================================
FILE: backend/tests/integration/common_utils/managers/discord_bot.py
================================================
"""Manager for Discord bot API integration tests."""

import requests

from onyx.db.discord_bot import create_channel_config
from onyx.db.discord_bot import create_guild_config
from onyx.db.discord_bot import register_guild
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.utils import DiscordChannelView
from onyx.server.manage.discord_bot.utils import generate_discord_registration_key
from shared_configs.contextvars import get_current_tenant_id
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestDiscordChannelConfig
from tests.integration.common_utils.test_models import DATestDiscordGuildConfig
from tests.integration.common_utils.test_models import DATestUser

DISCORD_BOT_API_URL = f"{API_SERVER_URL}/manage/admin/discord-bot"


class DiscordBotManager:
    """Manager for Discord bot API operations."""

    # === Bot Config ===

    @staticmethod
    def get_bot_config(
        user_performing_action: DATestUser,
    ) -> dict:
        """Get Discord bot config."""
        response = requests.get(
            url=f"{DISCORD_BOT_API_URL}/config",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        response.raise_for_status()
        return response.json()

    @staticmethod
    def create_bot_config(
        bot_token: str,
        user_performing_action: DATestUser,
    ) -> dict:
        """Create Discord bot config."""
        response = requests.post(
            url=f"{DISCORD_BOT_API_URL}/config",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
            json={"bot_token": bot_token},
        )
        response.raise_for_status()
        return response.json()

    @staticmethod
    def delete_bot_config(
        user_performing_action: DATestUser,
    ) -> dict:
        """Delete Discord bot config."""
        response = requests.delete(
            url=f"{DISCORD_BOT_API_URL}/config",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        response.raise_for_status()
        return response.json()

    # === Guild Config ===

    @staticmethod
    def list_guilds(
        user_performing_action: DATestUser,
    ) -> list[dict]:
        """List all guild configs."""
        response = requests.get(
            url=f"{DISCORD_BOT_API_URL}/guilds",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        response.raise_for_status()
        return response.json()

    @staticmethod
    def create_guild(
        user_performing_action: DATestUser,
    ) -> DATestDiscordGuildConfig:
        """Create a new guild config with registration key."""
        response = requests.post(
            url=f"{DISCORD_BOT_API_URL}/guilds",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        response.raise_for_status()
        data = response.json()
        return DATestDiscordGuildConfig(
            id=data["id"],
            registration_key=data["registration_key"],
        )

    @staticmethod
    def get_guild(
        config_id: int,
        user_performing_action: DATestUser,
    ) -> dict:
        """Get a specific guild config."""
        response = requests.get(
            url=f"{DISCORD_BOT_API_URL}/guilds/{config_id}",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        response.raise_for_status()
        return response.json()

    @staticmethod
    def update_guild(
        config_id: int,
        user_performing_action: DATestUser,
        enabled: bool | None = None,
        default_persona_id: int | None = None,
    ) -> dict:
        """Update a guild config."""
        # Fetch current guild config to get existing values
        current_guild = DiscordBotManager.get_guild(config_id, user_performing_action)

        # Build request body with required fields
        body: dict = {
            "enabled": enabled if enabled is not None else current_guild["enabled"],
            "default_persona_id": (
                default_persona_id
                if default_persona_id is not None
                else current_guild.get("default_persona_id")
            ),
        }

        response = requests.patch(
            url=f"{DISCORD_BOT_API_URL}/guilds/{config_id}",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
            json=body,
        )
        response.raise_for_status()
        return response.json()

    @staticmethod
    def delete_guild(
        config_id: int,
        user_performing_action: DATestUser,
    ) -> dict:
        """Delete a guild config."""
        response = requests.delete(
            url=f"{DISCORD_BOT_API_URL}/guilds/{config_id}",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        response.raise_for_status()
        return response.json()

    # === Channel Config ===

    @staticmethod
    def list_channels(
        guild_config_id: int,
        user_performing_action: DATestUser,
    ) -> list[DATestDiscordChannelConfig]:
        """List all channel configs for a guild."""
        response = requests.get(
            url=f"{DISCORD_BOT_API_URL}/guilds/{guild_config_id}/channels",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        response.raise_for_status()
        return [DATestDiscordChannelConfig(**c) for c in response.json()]

    @staticmethod
    def update_channel(
        guild_config_id: int,
        channel_config_id: int,
        user_performing_action: DATestUser,
        enabled: bool = False,
        thread_only_mode: bool = False,
        require_bot_invocation: bool = True,
        persona_override_id: int | None = None,
    ) -> DATestDiscordChannelConfig:
        """Update a channel config.

        All fields are required by the API. Default values match the channel
        config defaults from create_channel_config.
        """
        body: dict = {
            "enabled": enabled,
            "thread_only_mode": thread_only_mode,
            "require_bot_invocation": require_bot_invocation,
            "persona_override_id": persona_override_id,
        }

        response = requests.patch(
            url=f"{DISCORD_BOT_API_URL}/guilds/{guild_config_id}/channels/{channel_config_id}",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
            json=body,
        )
        response.raise_for_status()
        return DATestDiscordChannelConfig(**response.json())

    # === Utility methods for testing ===

    @staticmethod
    def create_registered_guild_in_db(
        guild_id: int,
        guild_name: str,
    ) -> DATestDiscordGuildConfig:
        """Create a registered guild config directly in the database.

        This creates a guild that has already completed registration,
        with guild_id and guild_name set. Use this for testing channel
        endpoints which require a registered guild.
        """
        with get_session_with_current_tenant() as db_session:
            tenant_id = get_current_tenant_id()
            registration_key = generate_discord_registration_key(tenant_id)
            config = create_guild_config(db_session, registration_key)
            config = register_guild(db_session, config, guild_id, guild_name)
            db_session.commit()

            return DATestDiscordGuildConfig(
                id=config.id,
                registration_key=registration_key,
            )

    @staticmethod
    def get_guild_or_none(
        config_id: int,
        user_performing_action: DATestUser,
    ) -> dict | None:
        """Get a guild config, returning None if not found."""
        response = requests.get(
            url=f"{DISCORD_BOT_API_URL}/guilds/{config_id}",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        if response.status_code == 404:
            return None
        response.raise_for_status()
        return response.json()

    @staticmethod
    def delete_guild_if_exists(
        config_id: int,
        user_performing_action: DATestUser,
    ) -> bool:
        """Delete a guild config if it exists. Returns True if deleted."""
        response = requests.delete(
            url=f"{DISCORD_BOT_API_URL}/guilds/{config_id}",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        if response.status_code == 404:
            return False
        response.raise_for_status()
        return True

    @staticmethod
    def delete_bot_config_if_exists(
        user_performing_action: DATestUser,
    ) -> bool:
        """Delete bot config if it exists. Returns True if deleted."""
        response = requests.delete(
            url=f"{DISCORD_BOT_API_URL}/config",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
        )
        if response.status_code == 404:
            return False
        response.raise_for_status()
        return True

    @staticmethod
    def create_test_channel_in_db(
        guild_config_id: int,
        channel_id: int,
        channel_name: str,
        channel_type: str = "text",
        is_private: bool = False,
    ) -> DATestDiscordChannelConfig:
        """Create a test channel config directly in the database.

        This is needed because channels are normally synced from Discord,
        not created via API. For testing the channel API endpoints,
        we need to populate test data directly.
        """
        with get_session_with_current_tenant() as db_session:
            channel_view = DiscordChannelView(
                channel_id=channel_id,
                channel_name=channel_name,
                channel_type=channel_type,
                is_private=is_private,
            )
            config = create_channel_config(db_session, guild_config_id, channel_view)
            db_session.commit()

            return DATestDiscordChannelConfig(
                id=config.id,
                guild_config_id=config.guild_config_id,
                channel_id=config.channel_id,
                channel_name=config.channel_name,
                channel_type=config.channel_type,
                is_private=config.is_private,
                enabled=config.enabled,
                thread_only_mode=config.thread_only_mode,
                require_bot_invocation=config.require_bot_invocation,
                persona_override_id=config.persona_override_id,
            )


================================================
FILE: backend/tests/integration/common_utils/managers/document.py
================================================
from uuid import uuid4

import requests
from sqlalchemy import and_
from sqlalchemy import select
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.db.enums import AccessType
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import DocumentByConnectorCredentialPair
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import NUM_DOCS
from tests.integration.common_utils.managers.api_key import DATestAPIKey
from tests.integration.common_utils.managers.cc_pair import DATestCCPair
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import SimpleTestDocument
from tests.integration.common_utils.vespa import vespa_fixture


def _verify_document_permissions(
    retrieved_doc: dict,
    cc_pair: DATestCCPair,
    doc_creating_user: DATestUser,
    doc_set_names: list[str] | None = None,
    group_names: list[str] | None = None,
) -> None:
    acl_keys = set(retrieved_doc.get("access_control_list", {}).keys())
    print(f"ACL keys: {acl_keys}")

    if cc_pair.access_type == AccessType.PUBLIC:
        if "PUBLIC" not in acl_keys:
            raise ValueError(
                f"Document {retrieved_doc['document_id']} is public but does not have the PUBLIC ACL key"
            )

    if f"user_email:{doc_creating_user.email}" not in acl_keys:
        raise ValueError(
            f"Document {retrieved_doc['document_id']} was created by user"
            f" {doc_creating_user.email} but does not have the user_email:{doc_creating_user.email} ACL key"
        )

    if group_names is not None:
        expected_group_keys = {f"group:{group_name}" for group_name in group_names}
        found_group_keys = {key for key in acl_keys if key.startswith("group:")}
        if found_group_keys != expected_group_keys:
            raise ValueError(
                f"Document {retrieved_doc['document_id']} has incorrect group ACL keys. "
                f"Expected: {expected_group_keys}  Found: {found_group_keys}\n"
                f"All ACL keys: {acl_keys}"
            )

    if doc_set_names is not None:
        found_doc_set_names = set(retrieved_doc.get("document_sets", {}).keys())
        if found_doc_set_names != set(doc_set_names):
            raise ValueError(
                f"Document set names mismatch. \nFound: {found_doc_set_names}, \nExpected: {set(doc_set_names)}"
            )


def _generate_dummy_document(
    document_id: str,
    cc_pair_id: int,
    content: str | None = None,
    extra_metadata: dict | None = None,
) -> dict:
    text = content if content else f"This is test document {document_id}"

    metadata: dict = {"document_id": document_id}
    if extra_metadata:
        metadata.update(extra_metadata)

    return {
        "document": {
            "id": document_id,
            "sections": [
                {
                    "text": text,
                    "link": f"{document_id}",
                }
            ],
            "source": DocumentSource.NOT_APPLICABLE,
            "metadata": metadata,
            "semantic_identifier": f"Test Document {document_id}",
            "from_ingestion_api": True,
        },
        "cc_pair_id": cc_pair_id,
    }


class DocumentManager:
    """
    Manager for seeding documents via the ingestion API.
    Used to test various connector features.
    """

    @staticmethod
    def seed_dummy_docs(
        cc_pair: DATestCCPair,
        api_key: DATestAPIKey,
        num_docs: int = NUM_DOCS,
        document_ids: list[str] | None = None,
    ) -> list[SimpleTestDocument]:
        # Use provided document_ids if available, otherwise generate random UUIDs
        if document_ids is None:
            document_ids = [f"test-doc-{uuid4()}" for _ in range(num_docs)]
        else:
            num_docs = len(document_ids)
        # Create and ingest some documents
        documents: list[dict] = []
        for document_id in document_ids:
            document = _generate_dummy_document(document_id, cc_pair.id)
            documents.append(document)
            response = requests.post(
                f"{API_SERVER_URL}/onyx-api/ingestion",
                json=document,
                headers=api_key.headers,
            )
            response.raise_for_status()

        print(
            f"Seeding docs for api_key_id={api_key.api_key_id} completed successfully."
        )
        return [
            SimpleTestDocument(
                id=document["document"]["id"],
                content=document["document"]["sections"][0]["text"],
            )
            for document in documents
        ]

    @staticmethod
    def seed_doc_with_content(
        cc_pair: DATestCCPair,
        content: str,
        api_key: DATestAPIKey,
        document_id: str | None = None,
        metadata: dict | None = None,
    ) -> SimpleTestDocument:
        # Use provided document_ids if available, otherwise generate random UUIDs
        if document_id is None:
            document_id = f"test-doc-{uuid4()}"
        # Create and ingest some documents
        document: dict = _generate_dummy_document(
            document_id,
            cc_pair.id,
            content,
            extra_metadata=metadata,
        )
        response = requests.post(
            f"{API_SERVER_URL}/onyx-api/ingestion",
            json=document,
            headers=api_key.headers,
        )
        response.raise_for_status()

        print(
            f"Seeding doc for api_key_id={api_key.api_key_id} completed successfully."
        )

        return SimpleTestDocument(
            id=document["document"]["id"],
            content=document["document"]["sections"][0]["text"],
        )

    @staticmethod
    def verify(
        vespa_client: vespa_fixture,
        cc_pair: DATestCCPair,
        doc_creating_user: DATestUser,
        # If None, will not check doc sets or groups
        # If empty list, will check for empty doc sets or groups
        doc_set_names: list[str] | None = None,
        group_names: list[str] | None = None,
        verify_deleted: bool = False,
    ) -> None:
        doc_ids = [document.id for document in cc_pair.documents]
        retrieved_docs_dict = vespa_client.get_documents_by_id(doc_ids)["documents"]

        retrieved_docs = {
            doc["fields"]["document_id"]: doc["fields"] for doc in retrieved_docs_dict
        }

        # NOTE(rkuo): too much log spam
        # Left this here for debugging purposes.
        # import json

        # print("DEBUGGING DOCUMENTS")
        # print(retrieved_docs)
        # for doc in retrieved_docs.values():
        #     printable_doc = doc.copy()
        #     print(printable_doc.keys())
        #     printable_doc.pop("embeddings")
        #     printable_doc.pop("title_embedding")
        #     print(json.dumps(printable_doc, indent=2))

        for document in cc_pair.documents:
            retrieved_doc = retrieved_docs.get(document.id)
            if not retrieved_doc:
                if not verify_deleted:
                    print(f"Document not found: {document.id}")
                    print(retrieved_docs.keys())
                    print(retrieved_docs.values())
                    raise ValueError(f"Document not found: {document.id}")
                continue
            if verify_deleted:
                raise ValueError(
                    f"Document found when it should be deleted: {document.id}"
                )
            _verify_document_permissions(
                retrieved_doc,
                cc_pair,
                doc_creating_user,
                doc_set_names,
                group_names,
            )

    @staticmethod
    def fetch_documents_for_cc_pair(
        cc_pair_id: int,
        db_session: Session,
        vespa_client: vespa_fixture,
    ) -> list[SimpleTestDocument]:
        stmt = (
            select(DocumentByConnectorCredentialPair)
            .join(
                ConnectorCredentialPair,
                and_(
                    DocumentByConnectorCredentialPair.connector_id
                    == ConnectorCredentialPair.connector_id,
                    DocumentByConnectorCredentialPair.credential_id
                    == ConnectorCredentialPair.credential_id,
                ),
            )
            .where(ConnectorCredentialPair.id == cc_pair_id)
        )
        documents = db_session.execute(stmt).scalars().all()
        if not documents:
            return []

        doc_ids = [document.id for document in documents]
        retrieved_docs_dict = vespa_client.get_documents_by_id(doc_ids)["documents"]

        final_docs: list[SimpleTestDocument] = []
        # NOTE: they are really chunks, but we're assuming that for these tests
        # we only have one chunk per document for now
        for doc_dict in retrieved_docs_dict:
            doc_id = doc_dict["fields"]["document_id"]
            doc_content = doc_dict["fields"]["content"]
            # still called `image_file_name` in Vespa for backwards compatibility
            image_file_id = doc_dict["fields"].get("image_file_name", None)
            final_docs.append(
                SimpleTestDocument(
                    id=doc_id, content=doc_content, image_file_id=image_file_id
                )
            )

        return final_docs


class IngestionManager(DocumentManager):
    """
    Manager for additional ingestion API endpoints not covered by DocumentManager.
    Used specifically to test the ingestion API.
    """

    @staticmethod
    def list_all_ingestion_docs(
        api_key: DATestAPIKey,
    ) -> list[dict]:
        response = requests.get(
            f"{API_SERVER_URL}/onyx-api/ingestion",
            headers=api_key.headers,
        )
        response.raise_for_status()
        return response.json()

    @staticmethod
    def delete(
        document_id: str,
        api_key: DATestAPIKey,
    ) -> None:
        response = requests.delete(
            f"{API_SERVER_URL}/onyx-api/ingestion/{document_id}",
            headers=api_key.headers,
        )
        response.raise_for_status()
        print(f"Deleted document {document_id} successfully.")


================================================
FILE: backend/tests/integration/common_utils/managers/document_search.py
================================================
import requests

from ee.onyx.server.query_and_chat.models import SearchFullResponse
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser


class DocumentSearchManager:
    @staticmethod
    def search_documents(
        query: str,
        user_performing_action: DATestUser,
    ) -> list[str]:
        """
        Search for documents using the EE search API.

        Args:
            query: The search query string
            user_performing_action: The user performing the search (for auth)

        Returns:
            A list of document content strings (blurbs) from the search results
        """
        search_request = SendSearchQueryRequest(
            search_query=query,
            filters=None,
            stream=False,
        )
        result = requests.post(
            url=f"{API_SERVER_URL}/search/send-search-message",
            json=search_request.model_dump(),
            headers=user_performing_action.headers,
        )
        result.raise_for_status()
        result_json = result.json()
        search_response = SearchFullResponse(**result_json)

        # Return the blurbs as the document content
        # For small documents (like test docs), the blurb should contain the full content
        document_content_list: list[str] = [
            doc.blurb for doc in search_response.search_docs
        ]
        return document_content_list


================================================
FILE: backend/tests/integration/common_utils/managers/document_set.py
================================================
import time
from typing import Any
from uuid import UUID
from uuid import uuid4

import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import MAX_DELAY
from tests.integration.common_utils.test_models import DATestDocumentSet
from tests.integration.common_utils.test_models import DATestUser


class DocumentSetManager:
    @staticmethod
    def create(
        user_performing_action: DATestUser,
        name: str | None = None,
        description: str | None = None,
        cc_pair_ids: list[int] | None = None,
        is_public: bool = True,
        users: list[str] | None = None,
        groups: list[int] | None = None,
        federated_connectors: list[dict[str, Any]] | None = None,
    ) -> DATestDocumentSet:
        if name is None:
            name = f"test_doc_set_{str(uuid4())}"

        doc_set_creation_request = {
            "name": name,
            "description": description or name,
            "cc_pair_ids": cc_pair_ids or [],
            "is_public": is_public,
            "users": [str(UUID(user_id)) for user_id in (users or [])],
            "groups": groups or [],
            "federated_connectors": federated_connectors or [],
        }

        response = requests.post(
            f"{API_SERVER_URL}/manage/admin/document-set",
            json=doc_set_creation_request,
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

        return DATestDocumentSet(
            id=int(response.json()),
            name=name,
            description=description or name,
            cc_pair_ids=cc_pair_ids or [],
            is_public=is_public,
            is_up_to_date=True,
            users=users or [],
            groups=groups or [],
            federated_connectors=federated_connectors or [],
        )

    @staticmethod
    def edit(
        document_set: DATestDocumentSet,
        user_performing_action: DATestUser,
    ) -> bool:
        doc_set_update_request = {
            "id": document_set.id,
            "description": document_set.description,
            "cc_pair_ids": document_set.cc_pair_ids,
            "is_public": document_set.is_public,
            "users": [str(UUID(user_id)) for user_id in document_set.users],
            "groups": document_set.groups,
            "federated_connectors": document_set.federated_connectors,
        }
        response = requests.patch(
            f"{API_SERVER_URL}/manage/admin/document-set",
            json=doc_set_update_request,
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return True

    @staticmethod
    def delete(
        document_set: DATestDocumentSet,
        user_performing_action: DATestUser,
    ) -> bool:
        response = requests.delete(
            f"{API_SERVER_URL}/manage/admin/document-set/{document_set.id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return True

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
    ) -> list[DATestDocumentSet]:
        response = requests.get(
            f"{API_SERVER_URL}/manage/document-set",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [
            DATestDocumentSet(
                id=doc_set["id"],
                name=doc_set["name"],
                description=doc_set["description"],
                cc_pair_ids=[cc_pair["id"] for cc_pair in doc_set["cc_pair_summaries"]],
                is_public=doc_set["is_public"],
                is_up_to_date=doc_set["is_up_to_date"],
                users=[str(user_id) for user_id in doc_set["users"]],
                groups=doc_set["groups"],
                federated_connectors=doc_set["federated_connector_summaries"],
            )
            for doc_set in response.json()
        ]

    @staticmethod
    def wait_for_sync(
        user_performing_action: DATestUser,
        document_sets_to_check: list[DATestDocumentSet] | None = None,
    ) -> None:
        # wait for document sets to be synced
        start = time.time()
        while True:
            doc_sets = DocumentSetManager.get_all(user_performing_action)
            if document_sets_to_check:
                check_ids = {doc_set.id for doc_set in document_sets_to_check}
                doc_set_ids = {doc_set.id for doc_set in doc_sets}
                if not check_ids.issubset(doc_set_ids):
                    raise RuntimeError("Document set not found")
                doc_sets = [doc_set for doc_set in doc_sets if doc_set.id in check_ids]
            all_up_to_date = all(doc_set.is_up_to_date for doc_set in doc_sets)

            if all_up_to_date:
                print("Document sets synced successfully.")
                break

            if time.time() - start > MAX_DELAY:
                not_synced_doc_sets = [
                    doc_set for doc_set in doc_sets if not doc_set.is_up_to_date
                ]
                raise TimeoutError(
                    f"Document sets were not synced within the {MAX_DELAY} seconds. "
                    f"Remaining unsynced document sets: {len(not_synced_doc_sets)}. "
                    f"IDs: {[doc_set.id for doc_set in not_synced_doc_sets]}"
                )
            else:
                not_synced_doc_sets = [
                    doc_set for doc_set in doc_sets if not doc_set.is_up_to_date
                ]
                print(
                    f"Document sets were not synced yet, waiting... "
                    f"{len(not_synced_doc_sets)}/{len(doc_sets)} document sets still syncing. "
                    f"IDs: {[doc_set.id for doc_set in not_synced_doc_sets]}"
                )

            time.sleep(2)

    @staticmethod
    def verify(
        document_set: DATestDocumentSet,
        user_performing_action: DATestUser,
        verify_deleted: bool = False,
    ) -> None:
        doc_sets = DocumentSetManager.get_all(user_performing_action)
        for doc_set in doc_sets:
            if doc_set.id == document_set.id:
                if verify_deleted:
                    raise ValueError(
                        f"Document set {document_set.id} found but should have been deleted"
                    )
                if (
                    doc_set.name == document_set.name
                    and set(doc_set.cc_pair_ids) == set(document_set.cc_pair_ids)
                    and doc_set.is_public == document_set.is_public
                    and set(doc_set.users) == set(document_set.users)
                    and set(doc_set.groups) == set(document_set.groups)
                    and doc_set.federated_connectors
                    == document_set.federated_connectors
                ):
                    return
        if not verify_deleted:
            raise ValueError(f"Document set {document_set.id} not found")


================================================
FILE: backend/tests/integration/common_utils/managers/file.py
================================================
import io
import mimetypes
from typing import cast
from typing import IO
from typing import List
from typing import Tuple

import requests

from onyx.file_store.models import FileDescriptor
from onyx.server.documents.models import FileUploadResponse
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser


class FileManager:
    @staticmethod
    def upload_files(
        files: List[Tuple[str, IO]],
        user_performing_action: DATestUser,
    ) -> Tuple[List[FileDescriptor], str]:
        headers = user_performing_action.headers
        headers.pop("Content-Type", None)

        files_param = []
        for filename, file_obj in files:
            mime_type, _ = mimetypes.guess_type(filename)
            if mime_type is None:
                mime_type = "application/octet-stream"
            files_param.append(("files", (filename, file_obj, mime_type)))

        response = requests.post(
            f"{API_SERVER_URL}/user/projects/file/upload",
            files=files_param,
            headers=headers,
        )

        if not response.ok:
            try:
                detail = response.json().get("detail", response.text)
            except Exception:
                detail = response.text
            return (
                cast(List[FileDescriptor], []),
                f"Failed to upload files - {detail}",
            )

        response_json = response.json()
        # Convert UserFileSnapshot to FileDescriptor format
        file_descriptors: List[FileDescriptor] = []
        for user_file in response_json.get("user_files", []):
            file_descriptors.append(
                {
                    "id": user_file["file_id"],
                    "type": user_file["chat_file_type"],
                    "name": user_file["name"],
                    "user_file_id": str(user_file["id"]),
                }
            )
        return file_descriptors, ""

    @staticmethod
    def fetch_uploaded_file(
        file_id: str,
        user_performing_action: DATestUser,
    ) -> bytes:
        response = requests.get(
            f"{API_SERVER_URL}/chat/file/{file_id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return response.content

    @staticmethod
    def upload_file_for_connector(
        file_path: str,
        file_name: str,
        user_performing_action: DATestUser,
        content_type: str = "application/octet-stream",
    ) -> FileUploadResponse:
        # Read the file content
        with open(file_path, "rb") as f:
            file_content = f.read()

        # Create a file-like object
        file_obj = io.BytesIO(file_content)

        # The 'files' form field expects a list of files
        files = [("files", (file_name, file_obj, content_type))]

        # Use the user's headers but without Content-Type
        # as requests will set the correct multipart/form-data Content-Type for us
        headers = user_performing_action.headers.copy()
        if "Content-Type" in headers:
            del headers["Content-Type"]

        # Make the request
        response = requests.post(
            f"{API_SERVER_URL}/manage/admin/connector/file/upload",
            files=files,
            headers=headers,
        )

        if not response.ok:
            try:
                error_detail = response.json().get("detail", "Unknown error")
            except Exception:
                error_detail = response.text

            raise Exception(
                f"Unable to upload files - {error_detail} (Status code: {response.status_code})"
            )

        response_json = response.json()
        return FileUploadResponse(**response_json)


================================================
FILE: backend/tests/integration/common_utils/managers/image_generation.py
================================================
import json
import os
from typing import Any
from uuid import uuid4

import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestImageGenerationConfig
from tests.integration.common_utils.test_models import DATestUser


def _serialize_custom_config(
    custom_config: dict[str, Any] | None,
) -> dict[str, str] | None:
    """Convert custom_config values to strings (API expects dict[str, str])."""
    if custom_config is None:
        return None
    return {
        key: json.dumps(value) if not isinstance(value, str) else value
        for key, value in custom_config.items()
    }


class ImageGenerationConfigManager:
    @staticmethod
    def create(
        user_performing_action: DATestUser,
        image_provider_id: str | None = None,
        model_name: str = "gpt-image-1",
        provider: str = "openai",
        api_key: str | None = None,
        api_base: str | None = None,
        api_version: str | None = None,
        deployment_name: str | None = None,
        custom_config: dict[str, Any] | None = None,
        is_default: bool = False,
    ) -> DATestImageGenerationConfig:
        """Create a new image generation config with new credentials."""
        image_provider_id = image_provider_id or f"test-provider-{uuid4()}"

        response = requests.post(
            f"{API_SERVER_URL}/admin/image-generation/config",
            json={
                "image_provider_id": image_provider_id,
                "model_name": model_name,
                "provider": provider,
                "api_key": api_key or os.environ["OPENAI_API_KEY"],
                "api_base": api_base,
                "api_version": api_version,
                "deployment_name": deployment_name,
                "custom_config": _serialize_custom_config(custom_config),
                "is_default": is_default,
            },
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        data = response.json()

        return DATestImageGenerationConfig(
            image_provider_id=data["image_provider_id"],
            model_configuration_id=data["model_configuration_id"],
            model_name=data["model_name"],
            llm_provider_id=data["llm_provider_id"],
            llm_provider_name=data["llm_provider_name"],
            is_default=data["is_default"],
        )

    @staticmethod
    def create_from_provider(
        source_llm_provider_id: int,
        user_performing_action: DATestUser,
        image_provider_id: str | None = None,
        model_name: str = "gpt-image-1",
        api_base: str | None = None,
        api_version: str | None = None,
        deployment_name: str | None = None,
        is_default: bool = False,
    ) -> DATestImageGenerationConfig:
        """Create a new image generation config by cloning from an existing LLM provider."""
        image_provider_id = image_provider_id or f"test-provider-{uuid4()}"

        response = requests.post(
            f"{API_SERVER_URL}/admin/image-generation/config",
            json={
                "image_provider_id": image_provider_id,
                "model_name": model_name,
                "source_llm_provider_id": source_llm_provider_id,
                "api_base": api_base,
                "api_version": api_version,
                "deployment_name": deployment_name,
                "is_default": is_default,
            },
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        data = response.json()

        return DATestImageGenerationConfig(
            image_provider_id=data["image_provider_id"],
            model_configuration_id=data["model_configuration_id"],
            model_name=data["model_name"],
            llm_provider_id=data["llm_provider_id"],
            llm_provider_name=data["llm_provider_name"],
            is_default=data["is_default"],
        )

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
    ) -> list[DATestImageGenerationConfig]:
        """Get all image generation configs."""
        response = requests.get(
            f"{API_SERVER_URL}/admin/image-generation/config",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [DATestImageGenerationConfig(**config) for config in response.json()]

    @staticmethod
    def get_credentials(
        image_provider_id: str,
        user_performing_action: DATestUser,
    ) -> dict:
        """Get credentials for an image generation config."""
        response = requests.get(
            f"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}/credentials",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return response.json()

    @staticmethod
    def update(
        image_provider_id: str,
        model_name: str,
        user_performing_action: DATestUser,
        provider: str | None = None,
        api_key: str | None = None,
        source_llm_provider_id: int | None = None,
        api_base: str | None = None,
        api_version: str | None = None,
        deployment_name: str | None = None,
    ) -> DATestImageGenerationConfig:
        """Update an existing image generation config."""
        payload: dict = {
            "model_name": model_name,
            "api_base": api_base,
            "api_version": api_version,
            "deployment_name": deployment_name,
        }

        if source_llm_provider_id is not None:
            payload["source_llm_provider_id"] = source_llm_provider_id
        elif api_key is not None and provider is not None:
            payload["provider"] = provider
            payload["api_key"] = api_key
        else:
            raise ValueError(
                f"Either source_llm_provider_id or (api_key + provider) must be provided. "
                f"Got: source_llm_provider_id={source_llm_provider_id}, provider={provider}, api_key={'***' if api_key else None}"
            )

        response = requests.put(
            f"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}",
            json=payload,
            headers=user_performing_action.headers,
        )
        if not response.ok:
            print(f"Update failed with status {response.status_code}: {response.text}")
        response.raise_for_status()
        data = response.json()

        return DATestImageGenerationConfig(
            image_provider_id=data["image_provider_id"],
            model_configuration_id=data["model_configuration_id"],
            model_name=data["model_name"],
            llm_provider_id=data["llm_provider_id"],
            llm_provider_name=data["llm_provider_name"],
            is_default=data["is_default"],
        )

    @staticmethod
    def delete(
        image_provider_id: str,
        user_performing_action: DATestUser,
    ) -> None:
        """Delete an image generation config."""
        response = requests.delete(
            f"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def set_default(
        image_provider_id: str,
        user_performing_action: DATestUser,
    ) -> None:
        """Set an image generation config as the default."""
        response = requests.post(
            f"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}/default",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def verify(
        config: DATestImageGenerationConfig,
        user_performing_action: DATestUser,
        verify_deleted: bool = False,
    ) -> None:
        """Verify that a config exists (or doesn't exist if verify_deleted=True)."""
        all_configs = ImageGenerationConfigManager.get_all(user_performing_action)

        for fetched_config in all_configs:
            if fetched_config.image_provider_id == config.image_provider_id:
                if verify_deleted:
                    raise ValueError(
                        f"ImageGenerationConfig {config.image_provider_id} found but should be deleted"
                    )
                # Verify the config matches
                if (
                    fetched_config.model_name == config.model_name
                    and fetched_config.is_default == config.is_default
                ):
                    return

        if not verify_deleted:
            raise ValueError(
                f"ImageGenerationConfig {config.image_provider_id} not found"
            )


================================================
FILE: backend/tests/integration/common_utils/managers/index_attempt.py
================================================
import time
from datetime import datetime
from datetime import timedelta
from urllib.parse import urlencode

import requests

from onyx.background.indexing.models import IndexAttemptErrorPydantic
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import IndexModelStatus
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexingStatus
from onyx.db.search_settings import get_current_search_settings
from onyx.server.documents.models import IndexAttemptSnapshot
from onyx.server.documents.models import PaginatedReturn
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import MAX_DELAY
from tests.integration.common_utils.test_models import DATestIndexAttempt
from tests.integration.common_utils.test_models import DATestUser


class IndexAttemptManager:
    @staticmethod
    def create_test_index_attempts(
        num_attempts: int,
        cc_pair_id: int,
        from_beginning: bool = False,
        status: IndexingStatus = IndexingStatus.SUCCESS,
        new_docs_indexed: int = 10,
        total_docs_indexed: int = 10,
        docs_removed_from_index: int = 0,
        error_msg: str | None = None,
        base_time: datetime | None = None,
    ) -> list[DATestIndexAttempt]:
        if base_time is None:
            base_time = datetime.now()

        attempts = []
        with get_session_with_current_tenant() as db_session:
            # Get the current search settings
            search_settings = get_current_search_settings(db_session)
            if (
                not search_settings
                or search_settings.status != IndexModelStatus.PRESENT
            ):
                raise ValueError("No current search settings found with PRESENT status")

            for i in range(num_attempts):
                time_created = base_time - timedelta(hours=i)

                index_attempt = IndexAttempt(
                    connector_credential_pair_id=cc_pair_id,
                    from_beginning=from_beginning,
                    status=status,
                    new_docs_indexed=new_docs_indexed,
                    total_docs_indexed=total_docs_indexed,
                    docs_removed_from_index=docs_removed_from_index,
                    error_msg=error_msg,
                    time_created=time_created,
                    time_started=time_created,
                    time_updated=time_created,
                    search_settings_id=search_settings.id,
                )

                db_session.add(index_attempt)
                db_session.flush()  # To get the ID

                attempts.append(
                    DATestIndexAttempt(
                        id=index_attempt.id,
                        status=index_attempt.status,
                        new_docs_indexed=index_attempt.new_docs_indexed,
                        total_docs_indexed=index_attempt.total_docs_indexed,
                        docs_removed_from_index=index_attempt.docs_removed_from_index,
                        error_msg=index_attempt.error_msg,
                        time_started=index_attempt.time_started,
                        time_updated=index_attempt.time_updated,
                    )
                )

            db_session.commit()

        return attempts

    @staticmethod
    def get_index_attempt_page(
        cc_pair_id: int,
        user_performing_action: DATestUser,
        page: int = 0,
        page_size: int = 10,
    ) -> PaginatedReturn[IndexAttemptSnapshot]:
        query_params: dict[str, str | int] = {
            "page_num": page,
            "page_size": page_size,
        }

        url = f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair_id}/index-attempts?{urlencode(query_params, doseq=True)}"
        response = requests.get(
            url=url,
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        data = response.json()
        return PaginatedReturn(
            items=[IndexAttemptSnapshot(**item) for item in data["items"]],
            total_items=data["total_items"],
        )

    @staticmethod
    def get_latest_index_attempt_for_cc_pair(
        cc_pair_id: int,
        user_performing_action: DATestUser,
    ) -> IndexAttemptSnapshot | None:
        """Get an IndexAttempt by ID"""
        index_attempts = IndexAttemptManager.get_index_attempt_page(
            cc_pair_id, user_performing_action=user_performing_action
        ).items
        if not index_attempts:
            return None

        index_attempts = sorted(
            index_attempts, key=lambda x: x.time_started or "0", reverse=True
        )
        return index_attempts[0]

    @staticmethod
    def wait_for_index_attempt_start(
        cc_pair_id: int,
        user_performing_action: DATestUser,
        index_attempts_to_ignore: list[int] | None = None,
        timeout: float = MAX_DELAY,
    ) -> IndexAttemptSnapshot:
        """Wait for an IndexAttempt to start"""
        start = datetime.now()
        index_attempts_to_ignore = index_attempts_to_ignore or []

        while True:
            index_attempt = IndexAttemptManager.get_latest_index_attempt_for_cc_pair(
                cc_pair_id=cc_pair_id,
                user_performing_action=user_performing_action,
            )
            if (
                index_attempt
                and index_attempt.time_started
                and index_attempt.id not in index_attempts_to_ignore
            ):
                return index_attempt

            elapsed = (datetime.now() - start).total_seconds()
            if elapsed > timeout:
                raise TimeoutError(
                    f"IndexAttempt for CC Pair {cc_pair_id} did not start within {timeout} seconds"
                )

    @staticmethod
    def get_index_attempt_by_id(
        index_attempt_id: int,
        cc_pair_id: int,
        user_performing_action: DATestUser,
    ) -> IndexAttemptSnapshot:
        page_num = 0
        page_size = 10
        while True:
            page = IndexAttemptManager.get_index_attempt_page(
                cc_pair_id=cc_pair_id,
                page=page_num,
                page_size=page_size,
                user_performing_action=user_performing_action,
            )
            for attempt in page.items:
                if attempt.id == index_attempt_id:
                    return attempt

            if len(page.items) < page_size:
                break

            page_num += 1

        raise ValueError(f"IndexAttempt {index_attempt_id} not found")

    @staticmethod
    def wait_for_index_attempt_completion(
        index_attempt_id: int,
        cc_pair_id: int,
        user_performing_action: DATestUser,
        timeout: float = MAX_DELAY,
    ) -> None:
        """Wait for an IndexAttempt to complete"""
        start = time.monotonic()
        while True:
            index_attempt = IndexAttemptManager.get_index_attempt_by_id(
                index_attempt_id=index_attempt_id,
                cc_pair_id=cc_pair_id,
                user_performing_action=user_performing_action,
            )

            if index_attempt.status and index_attempt.status.is_terminal():
                print(
                    f"IndexAttempt {index_attempt_id} completed with status {index_attempt.status}"
                )
                return

            elapsed = time.monotonic() - start
            if elapsed > timeout:
                raise TimeoutError(
                    f"IndexAttempt {index_attempt_id} did not complete within {timeout} seconds"
                )

            print(
                f"Waiting for IndexAttempt {index_attempt_id} to complete. elapsed={elapsed:.2f} timeout={timeout}"
            )
            time.sleep(5)

    @staticmethod
    def get_index_attempt_errors_for_cc_pair(
        cc_pair_id: int,
        user_performing_action: DATestUser,
        include_resolved: bool = True,
    ) -> list[IndexAttemptErrorPydantic]:
        url = f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair_id}/errors?page_size=100"
        if include_resolved:
            url += "&include_resolved=true"
        response = requests.get(
            url=url,
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        data = response.json()
        return [IndexAttemptErrorPydantic(**item) for item in data["items"]]


================================================
FILE: backend/tests/integration/common_utils/managers/llm_provider.py
================================================
import os
from uuid import uuid4

import requests

from onyx.llm.constants import LlmProviderNames
from onyx.server.manage.llm.models import DefaultModel
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser


class LLMProviderManager:
    @staticmethod
    def create(
        user_performing_action: DATestUser,
        name: str | None = None,
        provider: str | None = None,
        api_key: str | None = None,
        default_model_name: str | None = None,
        api_base: str | None = None,
        api_version: str | None = None,
        groups: list[int] | None = None,
        personas: list[int] | None = None,
        is_public: bool | None = None,
        set_as_default: bool = True,
    ) -> DATestLLMProvider:
        print(f"Seeding LLM Providers for {user_performing_action.email}...")

        llm_provider = LLMProviderUpsertRequest(
            name=name or f"test-provider-{uuid4()}",
            provider=provider or LlmProviderNames.OPENAI,
            api_key=api_key or os.environ["OPENAI_API_KEY"],
            api_base=api_base,
            api_version=api_version,
            custom_config=None,
            is_public=True if is_public is None else is_public,
            groups=groups or [],
            personas=personas or [],
            model_configurations=[
                ModelConfigurationUpsertRequest(
                    name=default_model_name or "gpt-4o-mini",
                    is_visible=True,
                    max_input_tokens=None,
                    display_name=default_model_name or "gpt-4o-mini",
                    supports_image_input=True,
                )
            ],
            api_key_changed=True,
        )

        llm_response = requests.put(
            f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
            json=llm_provider.model_dump(),
            headers=user_performing_action.headers,
        )
        llm_response.raise_for_status()
        response_data = llm_response.json()

        result_llm = DATestLLMProvider(
            id=response_data["id"],
            name=response_data["name"],
            provider=response_data["provider"],
            api_key=response_data["api_key"],
            default_model_name=default_model_name or "gpt-4o-mini",
            is_public=response_data["is_public"],
            is_auto_mode=response_data.get("is_auto_mode", False),
            groups=response_data["groups"],
            personas=response_data.get("personas", []),
            api_base=response_data["api_base"],
            api_version=response_data["api_version"],
        )

        if set_as_default:
            if default_model_name is None:
                default_model_name = "gpt-4o-mini"
            set_default_response = requests.post(
                f"{API_SERVER_URL}/admin/llm/default",
                json={
                    "provider_id": response_data["id"],
                    "model_name": default_model_name,
                },
                headers=(
                    user_performing_action.headers
                    if user_performing_action
                    else GENERAL_HEADERS
                ),
            )
            set_default_response.raise_for_status()

        return result_llm

    @staticmethod
    def delete(
        llm_provider: DATestLLMProvider,
        user_performing_action: DATestUser,
    ) -> bool:
        response = requests.delete(
            f"{API_SERVER_URL}/admin/llm/provider/{llm_provider.id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return True

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
    ) -> list[LLMProviderView]:
        response = requests.get(
            f"{API_SERVER_URL}/admin/llm/provider",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [LLMProviderView(**p) for p in response.json()["providers"]]

    @staticmethod
    def verify(
        llm_provider: DATestLLMProvider,
        user_performing_action: DATestUser,
        verify_deleted: bool = False,
    ) -> None:
        all_llm_providers = LLMProviderManager.get_all(user_performing_action)
        default_model = LLMProviderManager.get_default_model(user_performing_action)
        for fetched_llm_provider in all_llm_providers:
            model_names = [
                model.name for model in fetched_llm_provider.model_configurations
            ]
            if llm_provider.id == fetched_llm_provider.id:
                if verify_deleted:
                    raise ValueError(
                        f"LLM Provider {llm_provider.id} found but should be deleted"
                    )
                fetched_llm_groups = set(fetched_llm_provider.groups)
                llm_provider_groups = set(llm_provider.groups)

                # NOTE: returned api keys are sanitized and should not match
                if (
                    fetched_llm_groups == llm_provider_groups
                    and llm_provider.provider == fetched_llm_provider.provider
                    and (
                        default_model is None or default_model.model_name in model_names
                    )
                    and llm_provider.is_public == fetched_llm_provider.is_public
                    and set(fetched_llm_provider.personas) == set(llm_provider.personas)
                ):
                    return
        if not verify_deleted:
            raise ValueError(f"LLM Provider {llm_provider.id} not found")

    @staticmethod
    def get_default_model(
        user_performing_action: DATestUser | None = None,
    ) -> DefaultModel | None:
        response = requests.get(
            f"{API_SERVER_URL}/admin/llm/provider",
            headers=(
                user_performing_action.headers
                if user_performing_action
                else GENERAL_HEADERS
            ),
        )
        response.raise_for_status()
        default_text = response.json().get("default_text")
        if default_text is None:
            return None
        return DefaultModel(**default_text)


================================================
FILE: backend/tests/integration/common_utils/managers/pat.py
================================================
"""Helper for managing Personal Access Tokens in integration tests."""

import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestPAT
from tests.integration.common_utils.test_models import DATestUser


class PATManager:
    """Manager for creating and managing Personal Access Tokens in tests."""

    @staticmethod
    def create(
        name: str,
        expiration_days: int | None,
        user_performing_action: DATestUser,
    ) -> DATestPAT:
        """Create a Personal Access Token for a user.

        Args:
            name: Name of the token
            expiration_days: Number of days until expiration (None for never)
            user_performing_action: User creating the token

        Returns:
            DATestPAT with PAT data including the raw token
        """
        response = requests.post(
            f"{API_SERVER_URL}/user/pats",
            json={"name": name, "expiration_days": expiration_days},
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
            timeout=60,
        )
        response.raise_for_status()
        return DATestPAT(**response.json())

    @staticmethod
    def list(user_performing_action: DATestUser) -> list[DATestPAT]:
        """List all PATs for a user.

        Args:
            user_performing_action: User listing their tokens

        Returns:
            List of DATestPAT (without raw tokens)
        """
        response = requests.get(
            f"{API_SERVER_URL}/user/pats",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
            timeout=60,
        )
        response.raise_for_status()
        return [DATestPAT(**pat_data) for pat_data in response.json()]

    @staticmethod
    def revoke(token_id: int, user_performing_action: DATestUser) -> None:
        """Revoke a Personal Access Token.

        Args:
            token_id: ID of the token to revoke
            user_performing_action: User revoking the token
        """
        response = requests.delete(
            f"{API_SERVER_URL}/user/pats/{token_id}",
            headers=user_performing_action.headers,
            cookies=user_performing_action.cookies,
            timeout=60,
        )
        response.raise_for_status()

    @staticmethod
    def authenticate(token: str) -> requests.Response:
        """Authenticate using a PAT token and get user info.

        Args:
            token: The raw PAT token

        Returns:
            Response from /me endpoint
        """
        return requests.get(
            f"{API_SERVER_URL}/me",
            headers={"Authorization": f"Bearer {token}"},
            timeout=60,
        )

    @staticmethod
    def get_auth_headers(token: str) -> dict[str, str]:
        """Get authorization headers for a PAT token.

        Args:
            token: The raw PAT token

        Returns:
            Headers dict with Authorization bearer token
        """
        return {"Authorization": f"Bearer {token}"}


================================================
FILE: backend/tests/integration/common_utils/managers/persona.py
================================================
from uuid import UUID
from uuid import uuid4

import requests

from onyx.server.features.persona.models import FullPersonaSnapshot
from onyx.server.features.persona.models import PersonaUpsertRequest
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestPersona
from tests.integration.common_utils.test_models import DATestPersonaLabel
from tests.integration.common_utils.test_models import DATestUser


class PersonaManager:
    @staticmethod
    def create(
        user_performing_action: DATestUser,
        name: str | None = None,
        description: str | None = None,
        system_prompt: str | None = None,
        task_prompt: str | None = None,
        is_public: bool = True,
        datetime_aware: bool = False,
        document_set_ids: list[int] | None = None,
        tool_ids: list[int] | None = None,
        llm_model_provider_override: str | None = None,
        llm_model_version_override: str | None = None,
        users: list[str] | None = None,
        groups: list[int] | None = None,
        label_ids: list[int] | None = None,
        user_file_ids: list[str] | None = None,
        display_priority: int | None = None,
        featured: bool = False,
    ) -> DATestPersona:
        name = name or f"test-persona-{uuid4()}"
        description = description or f"Description for {name}"
        system_prompt = system_prompt or f"System prompt for {name}"
        task_prompt = task_prompt or f"Task prompt for {name}"

        persona_creation_request = PersonaUpsertRequest(
            name=name,
            description=description,
            system_prompt=system_prompt,
            task_prompt=task_prompt,
            datetime_aware=datetime_aware,
            is_public=is_public,
            document_set_ids=document_set_ids or [],
            tool_ids=tool_ids or [],
            llm_model_provider_override=llm_model_provider_override,
            llm_model_version_override=llm_model_version_override,
            users=[UUID(user) for user in (users or [])],
            groups=groups or [],
            label_ids=label_ids or [],
            user_file_ids=user_file_ids or [],
            display_priority=display_priority,
            is_featured=featured,
        )

        response = requests.post(
            f"{API_SERVER_URL}/persona",
            json=persona_creation_request.model_dump(mode="json"),
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        persona_data = response.json()

        return DATestPersona(
            id=persona_data["id"],
            name=name,
            description=description,
            is_public=is_public,
            system_prompt=system_prompt,
            task_prompt=task_prompt,
            datetime_aware=datetime_aware,
            document_set_ids=document_set_ids or [],
            tool_ids=tool_ids or [],
            llm_model_provider_override=llm_model_provider_override,
            llm_model_version_override=llm_model_version_override,
            users=users or [],
            groups=groups or [],
            label_ids=label_ids or [],
            is_featured=featured,
        )

    @staticmethod
    def edit(
        persona: DATestPersona,
        user_performing_action: DATestUser,
        name: str | None = None,
        description: str | None = None,
        system_prompt: str | None = None,
        task_prompt: str | None = None,
        is_public: bool | None = None,
        datetime_aware: bool = False,
        document_set_ids: list[int] | None = None,
        tool_ids: list[int] | None = None,
        llm_model_provider_override: str | None = None,
        llm_model_version_override: str | None = None,
        users: list[str] | None = None,
        groups: list[int] | None = None,
        label_ids: list[int] | None = None,
        featured: bool | None = None,
    ) -> DATestPersona:
        system_prompt = system_prompt or f"System prompt for {persona.name}"
        task_prompt = task_prompt or f"Task prompt for {persona.name}"

        persona_update_request = PersonaUpsertRequest(
            name=name or persona.name,
            description=description or persona.description,
            system_prompt=system_prompt,
            task_prompt=task_prompt,
            datetime_aware=datetime_aware,
            is_public=persona.is_public if is_public is None else is_public,
            document_set_ids=document_set_ids or persona.document_set_ids,
            tool_ids=tool_ids or persona.tool_ids,
            llm_model_provider_override=(
                llm_model_provider_override or persona.llm_model_provider_override
            ),
            llm_model_version_override=(
                llm_model_version_override or persona.llm_model_version_override
            ),
            users=[UUID(user) for user in (users or persona.users)],
            groups=groups or persona.groups,
            label_ids=label_ids or persona.label_ids,
            is_featured=featured if featured is not None else persona.is_featured,
        )

        response = requests.patch(
            f"{API_SERVER_URL}/persona/{persona.id}",
            json=persona_update_request.model_dump(mode="json"),
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        updated_persona_data = response.json()

        return DATestPersona(
            id=updated_persona_data["id"],
            name=updated_persona_data["name"],
            description=updated_persona_data["description"],
            is_public=updated_persona_data["is_public"],
            system_prompt=system_prompt,
            task_prompt=task_prompt,
            datetime_aware=datetime_aware,
            document_set_ids=[ds["id"] for ds in updated_persona_data["document_sets"]],
            tool_ids=[t["id"] for t in updated_persona_data["tools"]],
            llm_model_provider_override=updated_persona_data[
                "llm_model_provider_override"
            ],
            llm_model_version_override=updated_persona_data[
                "llm_model_version_override"
            ],
            users=[user["email"] for user in updated_persona_data["users"]],
            groups=updated_persona_data["groups"],
            label_ids=[label["id"] for label in updated_persona_data["labels"]],
            is_featured=updated_persona_data["is_featured"],
        )

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
    ) -> list[FullPersonaSnapshot]:
        response = requests.get(
            f"{API_SERVER_URL}/admin/persona",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [FullPersonaSnapshot(**persona) for persona in response.json()]

    @staticmethod
    def get_one(
        persona_id: int,
        user_performing_action: DATestUser,
    ) -> list[FullPersonaSnapshot]:
        response = requests.get(
            f"{API_SERVER_URL}/persona/{persona_id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [FullPersonaSnapshot(**response.json())]

    @staticmethod
    def verify(
        persona: DATestPersona,
        user_performing_action: DATestUser,
    ) -> bool:
        all_personas = PersonaManager.get_one(
            persona_id=persona.id,
            user_performing_action=user_performing_action,
        )
        for fetched_persona in all_personas:
            if fetched_persona.id == persona.id:
                mismatches: list[tuple[str, object, object]] = []

                if fetched_persona.name != persona.name:
                    mismatches.append(("name", persona.name, fetched_persona.name))
                if fetched_persona.description != persona.description:
                    mismatches.append(
                        (
                            "description",
                            persona.description,
                            fetched_persona.description,
                        )
                    )
                if fetched_persona.is_public != persona.is_public:
                    mismatches.append(
                        ("is_public", persona.is_public, fetched_persona.is_public)
                    )
                if fetched_persona.is_featured != persona.is_featured:
                    mismatches.append(
                        (
                            "is_featured",
                            persona.is_featured,
                            fetched_persona.is_featured,
                        )
                    )
                if (
                    fetched_persona.llm_model_provider_override
                    != persona.llm_model_provider_override
                ):
                    mismatches.append(
                        (
                            "llm_model_provider_override",
                            persona.llm_model_provider_override,
                            fetched_persona.llm_model_provider_override,
                        )
                    )
                if (
                    fetched_persona.llm_model_version_override
                    != persona.llm_model_version_override
                ):
                    mismatches.append(
                        (
                            "llm_model_version_override",
                            persona.llm_model_version_override,
                            fetched_persona.llm_model_version_override,
                        )
                    )
                if fetched_persona.system_prompt != persona.system_prompt:
                    mismatches.append(
                        (
                            "system_prompt",
                            persona.system_prompt,
                            fetched_persona.system_prompt,
                        )
                    )
                if fetched_persona.task_prompt != persona.task_prompt:
                    mismatches.append(
                        (
                            "task_prompt",
                            persona.task_prompt,
                            fetched_persona.task_prompt,
                        )
                    )
                if fetched_persona.datetime_aware != persona.datetime_aware:
                    mismatches.append(
                        (
                            "datetime_aware",
                            persona.datetime_aware,
                            fetched_persona.datetime_aware,
                        )
                    )

                fetched_document_set_ids = {
                    document_set.id for document_set in fetched_persona.document_sets
                }
                expected_document_set_ids = set(persona.document_set_ids)
                if fetched_document_set_ids != expected_document_set_ids:
                    mismatches.append(
                        (
                            "document_set_ids",
                            sorted(expected_document_set_ids),
                            sorted(fetched_document_set_ids),
                        )
                    )

                fetched_tool_ids = {tool.id for tool in fetched_persona.tools}
                expected_tool_ids = set(persona.tool_ids)
                if fetched_tool_ids != expected_tool_ids:
                    mismatches.append(
                        (
                            "tool_ids",
                            sorted(expected_tool_ids),
                            sorted(fetched_tool_ids),
                        )
                    )

                fetched_user_emails = {user.email for user in fetched_persona.users}
                expected_user_emails = set(persona.users)
                if fetched_user_emails != expected_user_emails:
                    mismatches.append(
                        (
                            "users",
                            sorted(expected_user_emails),
                            sorted(fetched_user_emails),
                        )
                    )

                fetched_group_ids = set(fetched_persona.groups)
                expected_group_ids = set(persona.groups)
                if fetched_group_ids != expected_group_ids:
                    mismatches.append(
                        (
                            "groups",
                            sorted(expected_group_ids),
                            sorted(fetched_group_ids),
                        )
                    )

                fetched_label_ids = {label.id for label in fetched_persona.labels}
                expected_label_ids = set(persona.label_ids)
                if fetched_label_ids != expected_label_ids:
                    mismatches.append(
                        (
                            "label_ids",
                            sorted(expected_label_ids),
                            sorted(fetched_label_ids),
                        )
                    )

                if mismatches:
                    print(
                        f"Persona verification failed for id={persona.id}. Fields mismatched:"
                    )
                    for field_name, expected_value, actual_value in mismatches:
                        print(
                            f" - {field_name}: expected {expected_value!r}, got {actual_value!r}"
                        )
                    return False
                return True
        print(
            f"Persona verification failed: persona with id={persona.id} not found in fetched results."
        )
        return False

    @staticmethod
    def delete(
        persona: DATestPersona,
        user_performing_action: DATestUser,
    ) -> bool:
        response = requests.delete(
            f"{API_SERVER_URL}/persona/{persona.id}",
            headers=user_performing_action.headers,
        )
        return response.ok


class PersonaLabelManager:
    @staticmethod
    def create(
        label: DATestPersonaLabel,
        user_performing_action: DATestUser,
    ) -> DATestPersonaLabel:
        response = requests.post(
            f"{API_SERVER_URL}/persona/labels",
            json={
                "name": label.name,
            },
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        response_data = response.json()
        label.id = response_data["id"]
        return label

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
    ) -> list[DATestPersonaLabel]:
        response = requests.get(
            f"{API_SERVER_URL}/persona/labels",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [DATestPersonaLabel(**label) for label in response.json()]

    @staticmethod
    def update(
        label: DATestPersonaLabel,
        user_performing_action: DATestUser,
    ) -> DATestPersonaLabel:
        response = requests.patch(
            f"{API_SERVER_URL}/admin/persona/label/{label.id}",
            json={
                "label_name": label.name,
            },
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return label

    @staticmethod
    def delete(
        label: DATestPersonaLabel,
        user_performing_action: DATestUser,
    ) -> bool:
        response = requests.delete(
            f"{API_SERVER_URL}/admin/persona/label/{label.id}",
            headers=user_performing_action.headers,
        )
        return response.ok

    @staticmethod
    def verify(
        label: DATestPersonaLabel,
        user_performing_action: DATestUser,
    ) -> bool:
        all_labels = PersonaLabelManager.get_all(user_performing_action)
        for fetched_label in all_labels:
            if fetched_label.id == label.id:
                return fetched_label.name == label.name
        return False


================================================
FILE: backend/tests/integration/common_utils/managers/project.py
================================================
from typing import List

import requests

from onyx.server.features.projects.models import CategorizedFilesSnapshot
from onyx.server.features.projects.models import UserFileSnapshot
from onyx.server.features.projects.models import UserProjectSnapshot
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser


class ProjectManager:
    @staticmethod
    def create(
        name: str,
        user_performing_action: DATestUser,
    ) -> UserProjectSnapshot:
        """Create a new project via API."""
        response = requests.post(
            f"{API_SERVER_URL}/user/projects/create",
            params={"name": name},
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return UserProjectSnapshot.model_validate(response.json())

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
    ) -> List[UserProjectSnapshot]:
        """Get all projects for a user via API."""
        response = requests.get(
            f"{API_SERVER_URL}/user/projects",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [UserProjectSnapshot.model_validate(obj) for obj in response.json()]

    @staticmethod
    def delete(
        project_id: int,
        user_performing_action: DATestUser,
    ) -> bool:
        """Delete a project via API."""
        response = requests.delete(
            f"{API_SERVER_URL}/user/projects/{project_id}",
            headers=user_performing_action.headers,
        )
        return response.status_code == 204

    @staticmethod
    def verify_deleted(
        project_id: int,
        user_performing_action: DATestUser,
    ) -> bool:
        """Verify that a project has been deleted by ensuring it's not in list."""
        response = requests.get(
            f"{API_SERVER_URL}/user/projects",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        projects = [UserProjectSnapshot.model_validate(obj) for obj in response.json()]
        return all(p.id != project_id for p in projects)

    @staticmethod
    def verify_files_unlinked(
        project_id: int,
        user_performing_action: DATestUser,
    ) -> bool:
        """Verify that all files have been unlinked from the project via API."""
        response = requests.get(
            f"{API_SERVER_URL}/user/projects/files/{project_id}",
            headers=user_performing_action.headers,
        )
        if response.status_code == 404:
            return True
        if not response.ok:
            return False
        files = [UserFileSnapshot.model_validate(obj) for obj in response.json()]
        return len(files) == 0

    @staticmethod
    def verify_chat_sessions_unlinked(
        project_id: int,
        user_performing_action: DATestUser,
    ) -> bool:
        """Verify that all chat sessions have been unlinked from the project via API."""
        response = requests.get(
            f"{API_SERVER_URL}/user/projects/{project_id}",
            headers=user_performing_action.headers,
        )
        if response.status_code == 404:
            return True
        if not response.ok:
            return False
        try:
            project = UserProjectSnapshot.model_validate(response.json())
            chat_sessions = getattr(project, "chat_sessions", [])
            return len(chat_sessions or []) == 0
        except Exception:
            # If response doesn't include chat_sessions, assume unlinked
            return True

    @staticmethod
    def upload_files(
        project_id: int,
        files: List[tuple[str, bytes]],  # List of (filename, content) tuples
        user_performing_action: DATestUser,
    ) -> CategorizedFilesSnapshot:
        """Upload files to a project via API."""
        # Build multipart form-data
        files_payload = [
            (
                "files",
                (filename, content, "text/plain"),
            )
            for filename, content in files
        ]

        data = {"project_id": str(project_id)} if project_id is not None else {}

        # Let requests set Content-Type boundary by not overriding header
        headers = dict(user_performing_action.headers or {})
        headers.pop("Content-Type", None)

        response = requests.post(
            f"{API_SERVER_URL}/user/projects/file/upload",
            data=data,
            files=files_payload,
            headers=headers,
        )
        response.raise_for_status()
        return CategorizedFilesSnapshot.model_validate(response.json())

    @staticmethod
    def get_project_files(
        project_id: int,
        user_performing_action: DATestUser,
    ) -> List[UserFileSnapshot]:
        """Get all files associated with a project via API."""
        response = requests.get(
            f"{API_SERVER_URL}/user/projects/files/{project_id}",
            headers=user_performing_action.headers,
        )
        if response.status_code == 404:
            return []
        response.raise_for_status()
        return [UserFileSnapshot.model_validate(obj) for obj in response.json()]

    @staticmethod
    def set_instructions(
        project_id: int,
        instructions: str,
        user_performing_action: DATestUser,
    ) -> str:
        """Set project instructions via API."""
        response = requests.post(
            f"{API_SERVER_URL}/user/projects/{project_id}/instructions",
            json={"instructions": instructions},
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return (response.json() or {}).get("instructions") or ""


================================================
FILE: backend/tests/integration/common_utils/managers/query_history.py
================================================
import time
from datetime import datetime
from urllib.parse import urlencode
from uuid import UUID

import requests
from requests.models import CaseInsensitiveDict

from ee.onyx.server.query_history.models import ChatSessionMinimal
from ee.onyx.server.query_history.models import ChatSessionSnapshot
from onyx.configs.constants import QAFeedbackType
from onyx.db.enums import TaskStatus
from onyx.server.documents.models import PaginatedReturn
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import MAX_DELAY
from tests.integration.common_utils.test_models import DATestUser


class QueryHistoryManager:
    @staticmethod
    def get_query_history_page(
        user_performing_action: DATestUser,
        page_num: int = 0,
        page_size: int = 10,
        feedback_type: QAFeedbackType | None = None,
        start_time: datetime | None = None,
        end_time: datetime | None = None,
    ) -> PaginatedReturn[ChatSessionMinimal]:
        query_params: dict[str, str | int] = {
            "page_num": page_num,
            "page_size": page_size,
        }
        if feedback_type:
            query_params["feedback_type"] = feedback_type.value
        if start_time:
            query_params["start_time"] = start_time.isoformat()
        if end_time:
            query_params["end_time"] = end_time.isoformat()

        response = requests.get(
            url=f"{API_SERVER_URL}/admin/chat-session-history?{urlencode(query_params, doseq=True)}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        data = response.json()
        return PaginatedReturn(
            items=[ChatSessionMinimal(**item) for item in data["items"]],
            total_items=data["total_items"],
        )

    @staticmethod
    def get_chat_session_admin(
        chat_session_id: UUID | str,
        user_performing_action: DATestUser,
    ) -> ChatSessionSnapshot:
        response = requests.get(
            url=f"{API_SERVER_URL}/admin/chat-session-history/{chat_session_id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return ChatSessionSnapshot(**response.json())

    @staticmethod
    def get_query_history_as_csv(
        user_performing_action: DATestUser,
        start_time: datetime | None = None,
        end_time: datetime | None = None,
    ) -> tuple[CaseInsensitiveDict[str], str]:
        query_params: dict[str, str | int] = {}
        if start_time:
            query_params["start"] = start_time.isoformat()
        if end_time:
            query_params["end"] = end_time.isoformat()

        start_response = requests.post(
            url=f"{API_SERVER_URL}/admin/query-history/start-export?{urlencode(query_params, doseq=True)}",
            headers=user_performing_action.headers,
        )
        start_response.raise_for_status()
        request_id = start_response.json()["request_id"]

        deadline = time.time() + MAX_DELAY
        while time.time() < deadline:
            status_response = requests.get(
                url=f"{API_SERVER_URL}/admin/query-history/export-status",
                params={"request_id": request_id},
                headers=user_performing_action.headers,
            )
            status_response.raise_for_status()
            status = status_response.json()["status"]
            if status == TaskStatus.SUCCESS:
                break
            if status == TaskStatus.FAILURE:
                raise RuntimeError("Query history export task failed")
            time.sleep(2)
        else:
            raise TimeoutError(
                f"Query history export not completed within {MAX_DELAY} seconds"
            )

        download_response = requests.get(
            url=f"{API_SERVER_URL}/admin/query-history/download",
            params={"request_id": request_id},
            headers=user_performing_action.headers,
        )
        download_response.raise_for_status()

        if not download_response.content:
            raise RuntimeError(
                "Query history CSV download returned zero-length content"
            )

        return download_response.headers, download_response.content.decode()


================================================
FILE: backend/tests/integration/common_utils/managers/scim_client.py
================================================
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS


class ScimClient:
    """HTTP client for making authenticated SCIM v2 requests."""

    @staticmethod
    def _headers(raw_token: str) -> dict[str, str]:
        return {
            **GENERAL_HEADERS,
            "Authorization": f"Bearer {raw_token}",
        }

    @staticmethod
    def get(path: str, raw_token: str) -> requests.Response:
        return requests.get(
            f"{API_SERVER_URL}/scim/v2{path}",
            headers=ScimClient._headers(raw_token),
            timeout=60,
        )

    @staticmethod
    def post(path: str, raw_token: str, json: dict) -> requests.Response:
        return requests.post(
            f"{API_SERVER_URL}/scim/v2{path}",
            json=json,
            headers=ScimClient._headers(raw_token),
            timeout=60,
        )

    @staticmethod
    def put(path: str, raw_token: str, json: dict) -> requests.Response:
        return requests.put(
            f"{API_SERVER_URL}/scim/v2{path}",
            json=json,
            headers=ScimClient._headers(raw_token),
            timeout=60,
        )

    @staticmethod
    def patch(path: str, raw_token: str, json: dict) -> requests.Response:
        return requests.patch(
            f"{API_SERVER_URL}/scim/v2{path}",
            json=json,
            headers=ScimClient._headers(raw_token),
            timeout=60,
        )

    @staticmethod
    def delete(path: str, raw_token: str) -> requests.Response:
        return requests.delete(
            f"{API_SERVER_URL}/scim/v2{path}",
            headers=ScimClient._headers(raw_token),
            timeout=60,
        )

    @staticmethod
    def get_no_auth(path: str) -> requests.Response:
        return requests.get(
            f"{API_SERVER_URL}/scim/v2{path}",
            headers=GENERAL_HEADERS,
            timeout=60,
        )


================================================
FILE: backend/tests/integration/common_utils/managers/scim_token.py
================================================
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestScimToken
from tests.integration.common_utils.test_models import DATestUser


class ScimTokenManager:
    @staticmethod
    def create(
        name: str,
        user_performing_action: DATestUser,
    ) -> DATestScimToken:
        response = requests.post(
            f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
            json={"name": name},
            headers=user_performing_action.headers,
            timeout=60,
        )
        response.raise_for_status()
        data = response.json()
        return DATestScimToken(
            id=data["id"],
            name=data["name"],
            token_display=data["token_display"],
            is_active=data["is_active"],
            created_at=data["created_at"],
            last_used_at=data.get("last_used_at"),
            raw_token=data["raw_token"],
        )

    @staticmethod
    def get_active(
        user_performing_action: DATestUser,
    ) -> DATestScimToken | None:
        response = requests.get(
            f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
            headers=user_performing_action.headers,
            timeout=60,
        )
        if response.status_code == 404:
            return None
        response.raise_for_status()
        data = response.json()
        return DATestScimToken(
            id=data["id"],
            name=data["name"],
            token_display=data["token_display"],
            is_active=data["is_active"],
            created_at=data["created_at"],
            last_used_at=data.get("last_used_at"),
        )


================================================
FILE: backend/tests/integration/common_utils/managers/settings.py
================================================
from typing import Any
from typing import Dict
from typing import Optional

import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestSettings
from tests.integration.common_utils.test_models import DATestUser


class SettingsManager:
    @staticmethod
    def get_settings(
        user_performing_action: DATestUser,
    ) -> tuple[Dict[str, Any], str]:
        headers = user_performing_action.headers
        headers.pop("Content-Type", None)

        response = requests.get(
            f"{API_SERVER_URL}/admin/settings",
            headers=headers,
        )

        if not response.ok:
            return (
                {},
                f"Failed to get settings - {response.json().get('detail', 'Unknown error')}",
            )

        return response.json(), ""

    @staticmethod
    def update_settings(
        settings: DATestSettings,
        user_performing_action: DATestUser,
    ) -> tuple[Dict[str, Any], str]:
        headers = user_performing_action.headers
        headers.pop("Content-Type", None)

        payload = settings.model_dump()
        response = requests.put(
            f"{API_SERVER_URL}/admin/settings",
            json=payload,
            headers=headers,
        )

        if not response.ok:
            return (
                {},
                f"Failed to update settings - {response.json().get('detail', 'Unknown error')}",
            )

        return response.json(), ""

    @staticmethod
    def get_setting(
        key: str,
        user_performing_action: DATestUser,
    ) -> Optional[Any]:
        settings, error = SettingsManager.get_settings(user_performing_action)
        if error:
            return None
        return settings.get(key)


================================================
FILE: backend/tests/integration/common_utils/managers/tenant.py
================================================
from datetime import datetime
from datetime import timedelta

import jwt
import requests

from onyx.server.manage.models import AllUsersResponse
from onyx.server.models import FullUserSnapshot
from onyx.server.models import InvitedUserSnapshot
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser


def generate_auth_token() -> str:
    payload = {
        "iss": "control_plane",
        "exp": datetime.utcnow() + timedelta(minutes=5),
        "iat": datetime.utcnow(),
        "scope": "tenant:create",
    }
    token = jwt.encode(payload, "", algorithm="HS256")
    return token


class TenantManager:
    @staticmethod
    def get_all_users(
        user_performing_action: DATestUser,
    ) -> AllUsersResponse:
        response = requests.get(
            url=f"{API_SERVER_URL}/manage/users",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

        data = response.json()
        return AllUsersResponse(
            accepted=[FullUserSnapshot(**user) for user in data["accepted"]],
            invited=[InvitedUserSnapshot(**user) for user in data["invited"]],
            slack_users=[FullUserSnapshot(**user) for user in data["slack_users"]],
            accepted_pages=data["accepted_pages"],
            invited_pages=data["invited_pages"],
            slack_users_pages=data["slack_users_pages"],
        )

    @staticmethod
    def verify_user_in_tenant(
        user: DATestUser,
        user_performing_action: DATestUser,
    ) -> None:
        all_users = TenantManager.get_all_users(user_performing_action)
        for accepted_user in all_users.accepted:
            if accepted_user.email == user.email and accepted_user.id == user.id:
                return
        raise ValueError(f"User {user.email} not found in tenant")


================================================
FILE: backend/tests/integration/common_utils/managers/tool.py
================================================
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestTool
from tests.integration.common_utils.test_models import DATestUser


class ToolManager:
    @staticmethod
    def list_tools(
        user_performing_action: DATestUser,
    ) -> list[DATestTool]:
        response = requests.get(
            url=f"{API_SERVER_URL}/tool",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return [
            DATestTool(
                id=tool.get("id"),
                name=tool.get("name"),
                description=tool.get("description"),
                display_name=tool.get("display_name"),
                in_code_tool_id=tool.get("in_code_tool_id"),
            )
            for tool in response.json()
        ]


================================================
FILE: backend/tests/integration/common_utils/managers/user.py
================================================
from copy import deepcopy
from urllib.parse import urlencode
from uuid import uuid4

import pytest
import requests
from requests import HTTPError

from onyx.auth.schemas import UserRole
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.configs.constants import ANONYMOUS_USER_UUID
from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
from onyx.server.documents.models import PaginatedReturn
from onyx.server.manage.models import UserInfo
from onyx.server.models import FullUserSnapshot
from onyx.server.models import InvitedUserSnapshot
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.test_models import DATestUser

DOMAIN = "example.com"
DEFAULT_PASSWORD = "TestPassword123!"


def build_email(name: str) -> str:
    return f"{name}@example.com"


class UserManager:
    @staticmethod
    def get_anonymous_user() -> DATestUser:
        """Get a DATestUser representing the anonymous user.

        Anonymous users are real users in the database with LIMITED role.
        They don't have login cookies - requests are made with GENERAL_HEADERS.
        The anonymous_user_enabled setting must be True for these requests to work.
        """
        return DATestUser(
            id=ANONYMOUS_USER_UUID,
            email=ANONYMOUS_USER_EMAIL,
            password="",
            headers=GENERAL_HEADERS,
            role=UserRole.LIMITED,
            is_active=True,
        )

    @staticmethod
    def create(
        name: str | None = None,
        email: str | None = None,
    ) -> DATestUser:
        if name is None:
            name = f"test{str(uuid4())}"

        if email is None:
            email = build_email(name)

        password = DEFAULT_PASSWORD

        body = {
            "email": email,
            "username": email,
            "password": password,
        }
        response = requests.post(
            url=f"{API_SERVER_URL}/auth/register",
            json=body,
            headers=GENERAL_HEADERS,
        )
        response.raise_for_status()

        test_user = DATestUser(
            id=response.json()["id"],
            email=email,
            password=password,
            headers=deepcopy(GENERAL_HEADERS),
            # fill as basic for now, the `login_as_user` call will
            # fill it in correctly
            role=UserRole.BASIC,
            is_active=True,
        )
        print(f"Created user {test_user.email}")

        return UserManager.login_as_user(test_user)

    @staticmethod
    def login_as_user(test_user: DATestUser) -> DATestUser:
        data = urlencode(
            {
                "username": test_user.email,
                "password": test_user.password,
            }
        )
        headers = test_user.headers.copy()
        headers["Content-Type"] = "application/x-www-form-urlencoded"

        response = requests.post(
            url=f"{API_SERVER_URL}/auth/login",
            data=data,
            headers=headers,
        )

        response.raise_for_status()

        cookies = response.cookies.get_dict()
        session_cookie = cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)

        if not session_cookie:
            raise Exception("Failed to login")

        # Set cookies in the headers
        test_user.headers["Cookie"] = f"fastapiusersauth={session_cookie}; "
        test_user.cookies = {"fastapiusersauth": session_cookie}

        # Get user role from /me endpoint
        me_response = requests.get(
            url=f"{API_SERVER_URL}/me",
            headers=test_user.headers,
            cookies=test_user.cookies,
        )
        me_response.raise_for_status()
        me_response_json = me_response.json()
        test_user.id = me_response_json["id"]
        role = UserRole(me_response_json["role"])
        test_user.role = role

        return test_user

    @staticmethod
    def get_permissions(user: DATestUser) -> list[str]:
        response = requests.get(
            url=f"{API_SERVER_URL}/me/permissions",
            headers=user.headers,
        )
        response.raise_for_status()
        return response.json()

    @staticmethod
    def is_role(
        user_to_verify: DATestUser,
        target_role: UserRole,
    ) -> bool:
        response = requests.get(
            url=f"{API_SERVER_URL}/me",
            headers=user_to_verify.headers,
            cookies=user_to_verify.cookies,
        )

        if user_to_verify.is_active is False:
            with pytest.raises(HTTPError):
                response.raise_for_status()
            return user_to_verify.role == target_role
        else:
            response.raise_for_status()

        role_from_response = response.json().get("role", None)

        if role_from_response is None:
            return user_to_verify.role == target_role

        return target_role == UserRole(role_from_response)

    @staticmethod
    def set_role(
        user_to_set: DATestUser,
        target_role: UserRole,
        user_performing_action: DATestUser,
        explicit_override: bool = False,
    ) -> DATestUser:
        response = requests.patch(
            url=f"{API_SERVER_URL}/manage/set-user-role",
            json={
                "user_email": user_to_set.email,
                "new_role": target_role.value,
                "explicit_override": explicit_override,
            },
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

        new_user_updated_role = DATestUser(
            id=user_to_set.id,
            email=user_to_set.email,
            password=user_to_set.password,
            headers=user_to_set.headers,
            role=target_role,
            is_active=user_to_set.is_active,
        )
        return new_user_updated_role

    # TODO: Add a way to check invited status
    @staticmethod
    def is_status(user_to_verify: DATestUser, target_status: bool) -> bool:
        response = requests.get(
            url=f"{API_SERVER_URL}/me",
            headers=user_to_verify.headers,
        )

        if target_status is False:
            with pytest.raises(HTTPError):
                response.raise_for_status()
        else:
            response.raise_for_status()

        is_active = response.json().get("is_active", None)
        if is_active is None:
            return user_to_verify.is_active == target_status
        return target_status == is_active

    @staticmethod
    def set_status(
        user_to_set: DATestUser,
        target_status: bool,
        user_performing_action: DATestUser,
    ) -> DATestUser:
        url_substring: str
        if target_status is True:
            url_substring = "activate"
        elif target_status is False:
            url_substring = "deactivate"
        response = requests.patch(
            url=f"{API_SERVER_URL}/manage/admin/{url_substring}-user",
            json={"user_email": user_to_set.email},
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

        new_user_updated_status = DATestUser(
            id=user_to_set.id,
            email=user_to_set.email,
            password=user_to_set.password,
            headers=user_to_set.headers,
            role=user_to_set.role,
            is_active=target_status,
        )
        return new_user_updated_status

    @staticmethod
    def create_test_users(
        user_performing_action: DATestUser,
        user_name_prefix: str,
        count: int,
        role: UserRole = UserRole.BASIC,
        is_active: bool | None = None,
    ) -> list[DATestUser]:
        users_list = []
        for i in range(1, count + 1):
            user = UserManager.create(name=f"{user_name_prefix}_{i}")
            if role != UserRole.BASIC:
                user = UserManager.set_role(user, role, user_performing_action)
            if is_active is not None:
                user = UserManager.set_status(user, is_active, user_performing_action)
            users_list.append(user)
        return users_list

    @staticmethod
    def get_user_page(
        user_performing_action: DATestUser,
        page_num: int = 0,
        page_size: int = 10,
        search_query: str | None = None,
        role_filter: list[UserRole] | None = None,
        is_active_filter: bool | None = None,
    ) -> PaginatedReturn[FullUserSnapshot]:
        query_params: dict[str, str | list[str] | int] = {
            "page_num": page_num,
            "page_size": page_size,
        }
        if search_query:
            query_params["q"] = search_query
        if role_filter:
            query_params["roles"] = [role.value for role in role_filter]
        if is_active_filter is not None:
            query_params["is_active"] = is_active_filter

        response = requests.get(
            url=f"{API_SERVER_URL}/manage/users/accepted?{urlencode(query_params, doseq=True)}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

        data = response.json()
        paginated_result = PaginatedReturn(
            items=[FullUserSnapshot(**user) for user in data["items"]],
            total_items=data["total_items"],
        )
        return paginated_result

    @staticmethod
    def invite_user(
        user_to_invite_email: str, user_performing_action: DATestUser
    ) -> None:
        """Invite a user by email to join the organization.

        Args:
            user_to_invite_email: Email of the user to invite
            user_performing_action: User with admin permissions performing the invitation
        """
        response = requests.put(
            url=f"{API_SERVER_URL}/manage/admin/users",
            headers=user_performing_action.headers,
            json={"emails": [user_to_invite_email]},
        )
        response.raise_for_status()

    @staticmethod
    def accept_invitation(tenant_id: str, user_performing_action: DATestUser) -> None:
        """Accept an invitation to join the organization.

        Args:
            tenant_id: ID of the tenant/organization to accept invitation for
            user_performing_action: User accepting the invitation
        """
        response = requests.post(
            url=f"{API_SERVER_URL}/tenants/users/invite/accept",
            headers=user_performing_action.headers,
            json={"tenant_id": tenant_id},
        )
        response.raise_for_status()

    @staticmethod
    def get_invited_users(
        user_performing_action: DATestUser,
    ) -> list[InvitedUserSnapshot]:
        """Get a list of all invited users.

        Args:
            user_performing_action: User with admin permissions performing the action

        Returns:
            List of invited user snapshots
        """
        response = requests.get(
            url=f"{API_SERVER_URL}/manage/users/invited",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

        return [InvitedUserSnapshot(**user) for user in response.json()]

    @staticmethod
    def get_user_info(user_performing_action: DATestUser) -> UserInfo:
        """Get user info for the current user.

        Args:
            user_performing_action: User performing the action
        """
        response = requests.get(
            url=f"{API_SERVER_URL}/me",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return UserInfo(**response.json())


================================================
FILE: backend/tests/integration/common_utils/managers/user_group.py
================================================
import time
from uuid import uuid4

import requests

from ee.onyx.server.user_group.models import UserGroup
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import MAX_DELAY
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import DATestUserGroup


class UserGroupManager:
    @staticmethod
    def create(
        user_performing_action: DATestUser,
        name: str | None = None,
        user_ids: list[str] | None = None,
        cc_pair_ids: list[int] | None = None,
    ) -> DATestUserGroup:
        name = f"{name}-user-group" if name else f"test-user-group-{uuid4()}"

        request = {
            "name": name,
            "user_ids": user_ids or [],
            "cc_pair_ids": cc_pair_ids or [],
        }
        response = requests.post(
            f"{API_SERVER_URL}/manage/admin/user-group",
            json=request,
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        test_user_group = DATestUserGroup(
            id=response.json()["id"],
            name=response.json()["name"],
            user_ids=[user["id"] for user in response.json()["users"]],
            cc_pair_ids=[cc_pair["id"] for cc_pair in response.json()["cc_pairs"]],
        )
        return test_user_group

    @staticmethod
    def edit(
        user_group: DATestUserGroup,
        user_performing_action: DATestUser,
    ) -> None:
        response = requests.patch(
            f"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}",
            json=user_group.model_dump(),
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def delete(
        user_group: DATestUserGroup,
        user_performing_action: DATestUser,
    ) -> None:
        response = requests.delete(
            f"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def add_users(
        user_group: DATestUserGroup,
        user_ids: list[str],
        user_performing_action: DATestUser,
    ) -> DATestUserGroup:
        request = {
            "user_ids": user_ids,
        }

        response = requests.post(
            f"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}/add-users",
            json=request,
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

        user_group.user_ids = [user["id"] for user in response.json()["users"]]
        user_group.cc_pair_ids = [
            cc_pair["id"] for cc_pair in response.json()["cc_pairs"]
        ]
        user_group.name = response.json()["name"]
        return user_group

    @staticmethod
    def set_curator_status(
        test_user_group: DATestUserGroup,
        user_to_set_as_curator: DATestUser,
        user_performing_action: DATestUser,
        is_curator: bool = True,
    ) -> None:
        set_curator_request = {
            "user_id": user_to_set_as_curator.id,
            "is_curator": is_curator,
        }
        response = requests.post(
            f"{API_SERVER_URL}/manage/admin/user-group/{test_user_group.id}/set-curator",
            json=set_curator_request,
            headers=user_performing_action.headers,
        )
        response.raise_for_status()

    @staticmethod
    def get_permissions(
        user_group: DATestUserGroup,
        user_performing_action: DATestUser,
    ) -> list[str]:
        response = requests.get(
            f"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}/permissions",
            headers=user_performing_action.headers,
        )
        response.raise_for_status()
        return response.json()

    @staticmethod
    def get_all(
        user_performing_action: DATestUser,
        include_default: bool = False,
    ) -> list[UserGroup]:
        params: dict[str, str] = {}
        if include_default:
            params["include_default"] = "true"
        response = requests.get(
            f"{API_SERVER_URL}/manage/admin/user-group",
            headers=user_performing_action.headers,
            params=params,
        )
        response.raise_for_status()
        return [UserGroup(**ug) for ug in response.json()]

    @staticmethod
    def verify(
        user_group: DATestUserGroup,
        user_performing_action: DATestUser,
        verify_deleted: bool = False,
    ) -> None:
        all_user_groups = UserGroupManager.get_all(user_performing_action)
        for fetched_user_group in all_user_groups:
            if user_group.id == fetched_user_group.id:
                if verify_deleted:
                    raise ValueError(
                        f"User group {user_group.id} found but should be deleted"
                    )
                fetched_cc_ids = {cc_pair.id for cc_pair in fetched_user_group.cc_pairs}
                fetched_user_ids = {user.id for user in fetched_user_group.users}
                user_group_cc_ids = set(user_group.cc_pair_ids)
                user_group_user_ids = set(user_group.user_ids)
                if (
                    fetched_cc_ids == user_group_cc_ids
                    and fetched_user_ids == user_group_user_ids
                ):
                    return
        if not verify_deleted:
            raise ValueError(f"User group {user_group.id} not found")

    @staticmethod
    def wait_for_sync(
        user_performing_action: DATestUser,
        user_groups_to_check: list[DATestUserGroup] | None = None,
    ) -> None:
        start = time.time()
        while True:
            user_groups = UserGroupManager.get_all(user_performing_action)
            if user_groups_to_check:
                check_ids = {user_group.id for user_group in user_groups_to_check}
                user_group_ids = {user_group.id for user_group in user_groups}
                if not check_ids.issubset(user_group_ids):
                    raise RuntimeError("User group not found")
                user_groups = [
                    user_group
                    for user_group in user_groups
                    if user_group.id in check_ids
                ]
            if all(ug.is_up_to_date for ug in user_groups):
                print("User groups synced successfully.")
                return

            if time.time() - start > MAX_DELAY:
                raise TimeoutError(
                    f"User groups were not synced within the {MAX_DELAY} seconds"
                )
            else:
                print("User groups were not synced yet, waiting...")
            time.sleep(2)

    @staticmethod
    def wait_for_deletion_completion(
        user_groups_to_check: list[DATestUserGroup],
        user_performing_action: DATestUser,
    ) -> None:
        start = time.time()
        user_group_ids_to_check = {user_group.id for user_group in user_groups_to_check}
        while True:
            fetched_user_groups = UserGroupManager.get_all(user_performing_action)
            fetched_user_group_ids = {
                user_group.id for user_group in fetched_user_groups
            }
            if not user_group_ids_to_check.intersection(fetched_user_group_ids):
                return

            if time.time() - start > MAX_DELAY:
                raise TimeoutError(
                    f"User groups deletion was not completed within the {MAX_DELAY} seconds"
                )
            else:
                print("Some user groups are still being deleted, waiting...")
            time.sleep(2)


================================================
FILE: backend/tests/integration/common_utils/reset.py
================================================
import logging
import os
import time
from types import SimpleNamespace

import psycopg2
import requests

from alembic import command
from alembic.config import Config
from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_PASSWORD
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USER
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SYNC_DB_API
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.db.search_settings import get_current_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.document_index.document_index_utils import get_multipass_config
from onyx.document_index.vespa.index import DOCUMENT_ID_ENDPOINT
from onyx.document_index.vespa.index import VespaIndex
from onyx.file_store.file_store import get_default_file_store
from onyx.indexing.models import IndexingSetting
from onyx.setup import setup_document_indices
from onyx.setup import setup_postgres
from onyx.utils.logger import setup_logger
from tests.integration.common_utils.timeout import run_with_timeout_multiproc

logger = setup_logger()


def _run_migrations(
    database_url: str,
    config_name: str,
    direction: str = "upgrade",
    revision: str = "head",
    schema: str = "public",
) -> None:
    # hide info logs emitted during migration
    logging.getLogger("alembic").setLevel(logging.CRITICAL)

    # Create an Alembic configuration object
    alembic_cfg = Config("alembic.ini")
    alembic_cfg.set_section_option("logger_alembic", "level", "WARN")
    alembic_cfg.attributes["configure_logger"] = False
    alembic_cfg.config_ini_section = config_name

    alembic_cfg.cmd_opts = SimpleNamespace()  # type: ignore
    alembic_cfg.cmd_opts.x = [f"schema={schema}"]  # type: ignore

    # Set the SQLAlchemy URL in the Alembic configuration
    alembic_cfg.set_main_option("sqlalchemy.url", database_url)

    # Run the migration
    if direction == "upgrade":
        command.upgrade(alembic_cfg, revision)
    elif direction == "downgrade":
        command.downgrade(alembic_cfg, revision)
    else:
        raise ValueError(
            f"Invalid direction: {direction}. Must be 'upgrade' or 'downgrade'."
        )

    logging.getLogger("alembic").setLevel(logging.INFO)


def downgrade_postgres(
    database: str = "postgres",
    schema: str = "public",
    config_name: str = "alembic",
    revision: str = "base",
    clear_data: bool = False,
) -> None:
    """Downgrade Postgres database to base state."""
    if clear_data:
        if revision != "base":
            raise ValueError("Clearing data without rolling back to base state")

        conn = psycopg2.connect(
            dbname=database,
            user=POSTGRES_USER,
            password=POSTGRES_PASSWORD,
            host=POSTGRES_HOST,
            port=POSTGRES_PORT,
            application_name="downgrade_postgres",
        )
        conn.autocommit = True  # Need autocommit for dropping schema
        cur = conn.cursor()

        # Close any existing connections to the schema before dropping
        cur.execute(
            f"""
            SELECT pg_terminate_backend(pg_stat_activity.pid)
            FROM pg_stat_activity
            WHERE pg_stat_activity.datname = '{database}'
            AND pg_stat_activity.state = 'idle in transaction'
            AND pid <> pg_backend_pid();
        """
        )

        # Drop and recreate the public schema - this removes ALL objects
        cur.execute(f"DROP SCHEMA {schema} CASCADE;")
        cur.execute(f"CREATE SCHEMA {schema};")

        # Restore default privileges
        cur.execute(f"GRANT ALL ON SCHEMA {schema} TO postgres;")
        cur.execute(f"GRANT ALL ON SCHEMA {schema} TO public;")

        cur.close()
        conn.close()

        return

    # Downgrade to base
    conn_str = build_connection_string(
        db=database,
        user=POSTGRES_USER,
        password=POSTGRES_PASSWORD,
        host=POSTGRES_HOST,
        port=POSTGRES_PORT,
        db_api=SYNC_DB_API,
    )
    _run_migrations(
        conn_str,
        config_name,
        direction="downgrade",
        revision=revision,
    )


def upgrade_postgres(
    database: str = "postgres", config_name: str = "alembic", revision: str = "head"
) -> None:
    """Upgrade Postgres database to latest version."""
    conn_str = build_connection_string(
        db=database,
        user=POSTGRES_USER,
        password=POSTGRES_PASSWORD,
        host=POSTGRES_HOST,
        port=POSTGRES_PORT,
        db_api=SYNC_DB_API,
        app_name="upgrade_postgres",
    )
    _run_migrations(
        conn_str,
        config_name,
        direction="upgrade",
        revision=revision,
    )


def drop_multitenant_postgres(
    database: str = "postgres",
) -> None:
    """Reset the Postgres database."""
    # this seems to hang due to locking issues, so run with a timeout with a few retries
    NUM_TRIES = 10
    TIMEOUT = 40
    success = False
    for _ in range(NUM_TRIES):
        logger.info(f"drop_multitenant_postgres_task starting... ({_ + 1}/{NUM_TRIES})")
        try:
            run_with_timeout_multiproc(
                drop_multitenant_postgres_task,
                TIMEOUT,
                kwargs={
                    "dbname": database,
                },
            )
            success = True
            break
        except TimeoutError:
            logger.warning(
                f"drop_multitenant_postgres_task timed out, retrying... ({_ + 1}/{NUM_TRIES})"
            )
        except RuntimeError:
            logger.warning(
                f"drop_multitenant_postgres_task exceptioned, retrying... ({_ + 1}/{NUM_TRIES})"
            )

    if not success:
        raise RuntimeError("drop_multitenant_postgres_task failed after 10 timeouts.")


def drop_multitenant_postgres_task(dbname: str) -> None:
    conn = psycopg2.connect(
        dbname=dbname,
        user=POSTGRES_USER,
        password=POSTGRES_PASSWORD,
        host=POSTGRES_HOST,
        port=POSTGRES_PORT,
        connect_timeout=10,
        application_name="drop_multitenant_postgres_task",
    )

    conn.autocommit = True
    cur = conn.cursor()

    logger.info("Selecting tenant schemas.")
    # Get all tenant schemas
    cur.execute(
        """
        SELECT schema_name
        FROM information_schema.schemata
        WHERE schema_name LIKE 'tenant_%'
        """
    )
    tenant_schemas = cur.fetchall()

    # Drop all tenant schemas
    logger.info("Dropping all tenant schemas.")
    for schema in tenant_schemas:
        # Close any existing connections to the schema before dropping
        cur.execute(
            """
            SELECT pg_terminate_backend(pg_stat_activity.pid)
            FROM pg_stat_activity
            WHERE pg_stat_activity.datname = 'postgres'
            AND pg_stat_activity.state = 'idle in transaction'
            AND pid <> pg_backend_pid();
        """
        )

        schema_name = schema[0]
        cur.execute(f'DROP SCHEMA "{schema_name}" CASCADE')

    # Drop tables in the public schema
    logger.info("Selecting public schema tables.")
    cur.execute(
        """
        SELECT tablename FROM pg_tables
        WHERE schemaname = 'public'
        """
    )
    public_tables = cur.fetchall()

    logger.info("Dropping public schema tables.")
    for table in public_tables:
        table_name = table[0]
        cur.execute(f'DROP TABLE IF EXISTS public."{table_name}" CASCADE')

    cur.close()
    conn.close()


def reset_postgres(
    database: str = "postgres",
    config_name: str = "alembic",
    setup_onyx: bool = True,
) -> None:
    """Reset the Postgres database."""
    # this seems to hang due to locking issues, so run with a timeout with a few retries
    NUM_TRIES = 10
    TIMEOUT = 40
    success = False
    for _ in range(NUM_TRIES):
        logger.info(f"Downgrading Postgres... ({_ + 1}/{NUM_TRIES})")
        try:
            run_with_timeout_multiproc(
                downgrade_postgres,
                TIMEOUT,
                kwargs={
                    "database": database,
                    "config_name": config_name,
                    "revision": "base",
                    "clear_data": True,
                },
            )
            success = True
            break
        except TimeoutError:
            logger.warning(
                f"Postgres downgrade timed out, retrying... ({_ + 1}/{NUM_TRIES})"
            )
        except RuntimeError:
            logger.warning(
                f"Postgres downgrade exceptioned, retrying... ({_ + 1}/{NUM_TRIES})"
            )

    if not success:
        raise RuntimeError("Postgres downgrade failed after 10 timeouts.")

    logger.info("Upgrading Postgres...")
    upgrade_postgres(database=database, config_name=config_name, revision="head")
    if setup_onyx:
        logger.info("Setting up Postgres...")
        with get_session_with_current_tenant() as db_session:
            setup_postgres(db_session)


def reset_vespa() -> None:
    """Wipe all data from the Vespa index."""

    with get_session_with_current_tenant() as db_session:
        # swap to the correct default model
        check_and_perform_index_swap(db_session)

        search_settings = get_current_search_settings(db_session)
        multipass_config = get_multipass_config(search_settings)
        index_name = search_settings.index_name

    success = setup_document_indices(
        document_indices=[
            VespaIndex(
                index_name=index_name,
                secondary_index_name=None,
                large_chunks_enabled=multipass_config.enable_large_chunks,
                secondary_large_chunks_enabled=None,
            )
        ],
        index_setting=IndexingSetting.from_db_model(search_settings),
        secondary_index_setting=None,
    )
    if not success:
        raise RuntimeError("Could not connect to Vespa within the specified timeout.")

    for _ in range(5):
        try:
            continuation = None
            should_continue = True
            while should_continue:
                params = {"selection": "true", "cluster": "danswer_index"}
                if continuation:
                    params = {**params, "continuation": continuation}
                response = requests.delete(
                    DOCUMENT_ID_ENDPOINT.format(index_name=index_name), params=params
                )
                response.raise_for_status()

                response_json = response.json()

                continuation = response_json.get("continuation")
                should_continue = bool(continuation)

            break
        except Exception as e:
            print(f"Error deleting documents: {e}")
            time.sleep(5)


def reset_postgres_multitenant() -> None:
    """Reset the Postgres database for all tenants in a multitenant setup."""

    drop_multitenant_postgres()
    reset_postgres(config_name="schema_private", setup_onyx=False)


def reset_vespa_multitenant() -> None:
    """Wipe all data from the Vespa index for all tenants."""

    for tenant_id in get_all_tenant_ids():
        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            # swap to the correct default model for each tenant
            check_and_perform_index_swap(db_session)

            search_settings = get_current_search_settings(db_session)
            multipass_config = get_multipass_config(search_settings)
            index_name = search_settings.index_name

        success = setup_document_indices(
            document_indices=[
                VespaIndex(
                    index_name=index_name,
                    secondary_index_name=None,
                    large_chunks_enabled=multipass_config.enable_large_chunks,
                    secondary_large_chunks_enabled=None,
                )
            ],
            index_setting=IndexingSetting.from_db_model(search_settings),
            secondary_index_setting=None,
        )

        if not success:
            raise RuntimeError(
                f"Could not connect to Vespa for tenant {tenant_id} within the specified timeout."
            )

        for _ in range(5):
            try:
                continuation = None
                should_continue = True
                while should_continue:
                    params = {"selection": "true", "cluster": "danswer_index"}
                    if continuation:
                        params = {**params, "continuation": continuation}
                    response = requests.delete(
                        DOCUMENT_ID_ENDPOINT.format(index_name=index_name),
                        params=params,
                    )
                    response.raise_for_status()

                    response_json = response.json()

                    continuation = response_json.get("continuation")
                    should_continue = bool(continuation)

                break
            except Exception as e:
                print(f"Error deleting documents for tenant {tenant_id}: {e}")
                time.sleep(5)


def reset_file_store() -> None:
    """Reset the FileStore."""
    filestore = get_default_file_store()
    for file_record in filestore.list_files_by_prefix(""):
        filestore.delete_file(file_record.file_id)


def reset_all() -> None:
    if os.environ.get("SKIP_RESET", "").lower() == "true":
        logger.info("Skipping reset.")
        return

    logger.info("Resetting Postgres...")
    reset_postgres()
    logger.info("Resetting Vespa...")
    reset_vespa()
    logger.info("Resetting FileStore...")
    reset_file_store()


def reset_all_multitenant() -> None:
    """Reset both Postgres and Vespa for all tenants.

    Honors SKIP_RESET env var to allow callers (e.g., CI) to disable
    heavy resets entirely for faster end-to-end runs.
    """
    if os.environ.get("SKIP_RESET", "").lower() == "true":
        logger.info("SKIPPING multitenant reset due to SKIP_RESET=true")
        return

    logger.info("Resetting Postgres for all tenants...")
    reset_postgres_multitenant()
    logger.info("Resetting Vespa for all tenants...")
    reset_vespa_multitenant()
    logger.info("Finished resetting all.")


================================================
FILE: backend/tests/integration/common_utils/test_document_utils.py
================================================
import uuid
from datetime import datetime
from datetime import timezone

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import TextSection


def create_test_document(
    doc_id: str | None = None,
    text: str = "Test content",
    link: str = "http://example.com",
    source: DocumentSource = DocumentSource.MOCK_CONNECTOR,
    metadata: dict | None = None,
) -> Document:
    """Create a test document with the given parameters.

    Args:
        doc_id: Optional document ID. If not provided, a random UUID will be generated.
        text: The text content of the document. Defaults to "Test content".
        link: The link for the document section. Defaults to "http://example.com".
        source: The document source. Defaults to MOCK_CONNECTOR.
        metadata: Optional metadata dictionary. Defaults to empty dict.
    """
    doc_id = doc_id or f"test-doc-{uuid.uuid4()}"
    return Document(
        id=doc_id,
        sections=[TextSection(text=text, link=link)],
        source=source,
        semantic_identifier=doc_id,
        doc_updated_at=datetime.now(timezone.utc),
        metadata=metadata or {},
    )


def create_test_document_failure(
    doc_id: str,
    failure_message: str = "Simulated failure",
    document_link: str | None = None,
) -> ConnectorFailure:
    """Create a test document failure with the given parameters.

    Args:
        doc_id: The ID of the document that failed.
        failure_message: The failure message. Defaults to "Simulated failure".
        document_link: Optional link to the failed document.
    """
    return ConnectorFailure(
        failed_document=DocumentFailure(
            document_id=doc_id,
            document_link=document_link,
        ),
        failure_message=failure_message,
    )


================================================
FILE: backend/tests/integration/common_utils/test_file_utils.py
================================================
import io

from PIL import Image


def create_test_image(
    width: int = 1,
    height: int = 1,
    color: str = "white",
    format: str = "PNG",
) -> io.BytesIO:
    """Create a test image file in memory for file attachment testing.

    Args:
        width: Width of the image in pixels. Defaults to 1.
        height: Height of the image in pixels. Defaults to 1.
        color: Color of the image. Defaults to "white".
        format: Image format (PNG, JPEG, etc.). Defaults to "PNG".

    Returns:
        A BytesIO object containing the image data, positioned at the start.
    """
    image = Image.new("RGB", (width, height), color=color)
    image_file = io.BytesIO()
    image.save(image_file, format=format)
    image_file.seek(0)
    return image_file


def create_test_text_file(content: str | bytes) -> io.BytesIO:
    """Create a test text file in memory for file attachment testing.

    Args:
        content: The text content of the file. Can be string or bytes.

    Returns:
        A BytesIO object containing the text data, positioned at the start.
    """
    if isinstance(content, str):
        content = content.encode("utf-8")
    text_file = io.BytesIO(content)
    text_file.seek(0)
    return text_file


================================================
FILE: backend/tests/integration/common_utils/test_models.py
================================================
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Any
from uuid import UUID

from pydantic import BaseModel
from pydantic import Field

from onyx.auth.schemas import UserRole
from onyx.configs.constants import MessageType
from onyx.configs.constants import QAFeedbackType
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc
from onyx.db.enums import AccessType
from onyx.server.documents.models import DocumentSource
from onyx.server.documents.models import IndexAttemptSnapshot
from onyx.server.documents.models import IndexingStatus
from onyx.server.documents.models import InputType
from onyx.server.query_and_chat.streaming_models import GeneratedImage

"""
These data models are used to represent the data on the testing side of things.
This means the flow is:
1. Make request that changes data in db
2. Make a change to the testing model
3. Retrieve data from db
4. Compare db data with testing model to verify
"""


class DATestPAT(BaseModel):
    """Personal Access Token model for testing."""

    id: int
    name: str
    token: str | None = None  # Raw token - only present on initial creation
    token_display: str
    created_at: str
    expires_at: str | None = None
    last_used_at: str | None = None


class DATestScimToken(BaseModel):
    """SCIM bearer token model for testing."""

    id: int
    name: str
    raw_token: str | None = None  # Only present on initial creation
    token_display: str
    is_active: bool
    created_at: str
    last_used_at: str | None = None


class DATestAPIKey(BaseModel):
    api_key_id: int
    api_key_display: str
    api_key: str | None = None  # only present on initial creation
    api_key_name: str | None = None
    api_key_role: UserRole

    user_id: UUID
    headers: dict


class DATestUser(BaseModel):
    id: str
    email: str
    password: str
    headers: dict
    role: UserRole
    is_active: bool
    cookies: dict = {}


class DATestPersonaLabel(BaseModel):
    id: int | None = None
    name: str


class DATestCredential(BaseModel):
    id: int
    name: str
    credential_json: dict[str, Any]
    admin_public: bool
    source: DocumentSource
    curator_public: bool
    groups: list[int]


class DATestConnector(BaseModel):
    id: int
    name: str
    source: DocumentSource
    input_type: InputType
    connector_specific_config: dict[str, Any]
    groups: list[int] | None = None
    access_type: AccessType | None = None


class SimpleTestDocument(BaseModel):
    id: str
    content: str
    image_file_id: str | None = None


class DATestCCPair(BaseModel):
    id: int
    name: str
    connector_id: int
    credential_id: int
    access_type: AccessType
    groups: list[int]
    documents: list[SimpleTestDocument] = Field(default_factory=list)


class DATestUserGroup(BaseModel):
    id: int
    name: str
    user_ids: list[str]
    cc_pair_ids: list[int]


class DATestLLMProvider(BaseModel):
    id: int
    name: str
    provider: str
    api_key: str
    default_model_name: str | None = None
    is_public: bool
    is_auto_mode: bool = False
    groups: list[int]
    personas: list[int]
    api_base: str | None = None
    api_version: str | None = None


class DATestImageGenerationConfig(BaseModel):
    image_provider_id: str
    model_configuration_id: int
    model_name: str
    llm_provider_id: int
    llm_provider_name: str
    is_default: bool


class DATestDocumentSet(BaseModel):
    id: int
    name: str
    description: str
    cc_pair_ids: list[int] = Field(default_factory=list)
    is_public: bool
    is_up_to_date: bool
    users: list[str] = Field(default_factory=list)
    groups: list[int] = Field(default_factory=list)
    federated_connectors: list[dict[str, Any]] = Field(default_factory=list)


class DATestPersona(BaseModel):
    id: int
    name: str
    description: str
    is_public: bool
    document_set_ids: list[int]
    tool_ids: list[int]
    llm_model_provider_override: str | None
    llm_model_version_override: str | None
    users: list[str]
    groups: list[int]
    label_ids: list[int]
    is_featured: bool = False

    # Embedded prompt fields (no longer separate prompt_ids)
    system_prompt: str | None = None
    task_prompt: str | None = None
    datetime_aware: bool = True


class DATestChatMessage(BaseModel):
    id: int
    chat_session_id: UUID
    parent_message_id: int | None
    message: str
    message_type: MessageType | None = None
    files: list | None = None


class DATestChatSession(BaseModel):
    id: UUID
    persona_id: int
    description: str


class DAQueryHistoryEntry(DATestChatSession):
    feedback_type: QAFeedbackType | None


class ToolName(str, Enum):
    INTERNET_SEARCH = "internet_search"
    INTERNAL_SEARCH = "run_search"
    IMAGE_GENERATION = "generate_image"


class ToolResult(BaseModel):
    tool_name: ToolName

    queries: list[str] = Field(default_factory=list)
    documents: list[SavedSearchDoc] = Field(default_factory=list)
    images: list[GeneratedImage] = Field(default_factory=list)


class ToolCallDebug(BaseModel):
    tool_call_id: str
    tool_name: str
    tool_args: dict[str, Any]


class ErrorResponse(BaseModel):
    error: str
    stack_trace: str


class StreamedResponse(BaseModel):
    full_message: str
    assistant_message_id: int
    top_documents: list[SearchDoc]
    used_tools: list[ToolResult]
    tool_call_debug: list[ToolCallDebug] = Field(default_factory=list)
    error: ErrorResponse | None = None

    # Track heartbeat packets for image generation and other tools
    heartbeat_packets: list[dict[str, Any]]


class DATestGatingType(str, Enum):
    FULL = "full"
    PARTIAL = "partial"
    NONE = "none"


class DATestSettings(BaseModel):
    """General settings"""

    # is float to allow for fractional days for easier automated testing
    maximum_chat_retention_days: float | None = None
    gpu_enabled: bool | None = None
    product_gating: DATestGatingType = DATestGatingType.NONE
    anonymous_user_enabled: bool | None = None
    image_extraction_and_analysis_enabled: bool | None = False
    search_time_image_analysis_enabled: bool | None = False


@dataclass
class DATestIndexAttempt:
    id: int
    status: IndexingStatus | None
    new_docs_indexed: int | None
    total_docs_indexed: int | None
    docs_removed_from_index: int | None
    error_msg: str | None
    time_started: datetime | None
    time_updated: datetime | None

    @classmethod
    def from_index_attempt_snapshot(
        cls, index_attempt: IndexAttemptSnapshot
    ) -> "DATestIndexAttempt":
        return cls(
            id=index_attempt.id,
            status=index_attempt.status,
            new_docs_indexed=index_attempt.new_docs_indexed,
            total_docs_indexed=index_attempt.total_docs_indexed,
            docs_removed_from_index=index_attempt.docs_removed_from_index,
            error_msg=index_attempt.error_msg,
            time_started=(
                datetime.fromisoformat(index_attempt.time_started)
                if index_attempt.time_started
                else None
            ),
            time_updated=datetime.fromisoformat(index_attempt.time_updated),
        )


class DATestTool(BaseModel):
    id: int
    name: str
    description: str
    display_name: str
    in_code_tool_id: str | None


# Discord Bot Models
class DATestDiscordGuildConfig(BaseModel):
    """Discord guild config model for testing."""

    id: int
    registration_key: str | None = None  # Only present on creation
    guild_id: int | None = None
    guild_name: str | None = None
    enabled: bool = True
    default_persona_id: int | None = None


class DATestDiscordChannelConfig(BaseModel):
    """Discord channel config model for testing."""

    id: int
    guild_config_id: int
    channel_id: int
    channel_name: str
    channel_type: str
    is_private: bool
    enabled: bool = False
    thread_only_mode: bool = False
    require_bot_invocation: bool = True
    persona_override_id: int | None = None


================================================
FILE: backend/tests/integration/common_utils/timeout.py
================================================
# import multiprocessing
# from collections.abc import Callable
# from typing import Any
# from typing import TypeVar

# T = TypeVar("T")


# def run_with_timeout_multiproc(
#     task: Callable[..., T], timeout: int, kwargs: dict[str, Any]
# ) -> T:
#     # Use multiprocessing to prevent a thread from blocking the main thread
#     with multiprocessing.Pool(processes=1) as pool:
#         async_result = pool.apply_async(task, kwds=kwargs)
#         try:
#             # Wait at most timeout seconds for the function to complete
#             result = async_result.get(timeout=timeout)
#             return result
#         except multiprocessing.TimeoutError:
#             raise TimeoutError(f"Function timed out after {timeout} seconds")


import multiprocessing
import traceback
from collections.abc import Callable
from multiprocessing import Queue
from typing import Any
from typing import TypeVar

T = TypeVar("T")


def _multiproc_wrapper(
    task: Callable[..., T], kwargs: dict[str, Any], q: Queue
) -> None:
    try:
        result = task(**kwargs)
        q.put(("success", result))
    except Exception:
        q.put(("error", traceback.format_exc()))


def run_with_timeout_multiproc(
    task: Callable[..., T], timeout: int, kwargs: dict[str, Any]
) -> T:
    ctx = multiprocessing.get_context("spawn")
    q: Queue = ctx.Queue()
    p = ctx.Process(
        target=_multiproc_wrapper,
        args=(
            task,
            kwargs,
            q,
        ),
    )
    p.start()
    p.join(timeout)

    if p.is_alive():
        p.terminate()
        raise TimeoutError(f"{task.__name__} timed out after {timeout} seconds")

    if not q.empty():
        status, result = q.get()
        if status == "success":
            return result
        else:
            raise RuntimeError(f"{task.__name__} failed:\n{result}")
    else:
        raise RuntimeError(f"{task.__name__} returned no result")


================================================
FILE: backend/tests/integration/common_utils/vespa.py
================================================
import requests

from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT


class vespa_fixture:
    def __init__(self, index_name: str):
        self.index_name = index_name
        self.vespa_document_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)

    def get_documents_by_id(
        self, document_ids: list[str], wanted_doc_count: int = 1_000
    ) -> dict:
        selection = " or ".join(
            f"{self.index_name}.document_id=='{document_id}'"
            for document_id in document_ids
        )
        params = {
            "selection": selection,
            "wantedDocumentCount": wanted_doc_count,
        }
        response = requests.get(
            self.vespa_document_url,
            params=params,  # type: ignore
        )
        response.raise_for_status()
        return response.json()


================================================
FILE: backend/tests/integration/conftest.py
================================================
import os
from collections.abc import Callable

import pytest

# Integration tests rely on this mode to enable mock_llm_response paths.
os.environ["INTEGRATION_TESTS_MODE"] = "true"

from onyx.auth.schemas import UserRole
from onyx.configs.constants import DocumentSource
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.search_settings import get_current_search_settings
from tests.integration.common_utils.constants import ADMIN_USER_NAME
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.image_generation import (
    ImageGenerationConfigManager,
)
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import build_email
from tests.integration.common_utils.managers.user import DEFAULT_PASSWORD
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.reset import reset_all
from tests.integration.common_utils.reset import reset_all_multitenant
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestImageGenerationConfig
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import SimpleTestDocument
from tests.integration.common_utils.vespa import vespa_fixture

BASIC_USER_NAME = "basic_user"

DocumentBuilderType = Callable[[list[str]], list[SimpleTestDocument]]


@pytest.fixture(scope="session", autouse=True)
def initialize_db() -> None:
    # Make sure that the db engine is initialized before any tests are run
    SqlEngine.init_engine(
        pool_size=10,
        max_overflow=5,
    )


def load_env_vars(env_file: str = ".env") -> None:
    current_dir = os.path.dirname(os.path.abspath(__file__))
    env_path = os.path.join(current_dir, env_file)
    try:
        with open(env_path, "r") as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith("#"):
                    key, value = line.split("=", 1)
                    # Preserve explicitly pre-set vars (e.g. INTEGRATION_TESTS_MODE).
                    os.environ.setdefault(key, value.strip())
        print("Successfully loaded environment variables")
    except FileNotFoundError:
        print(f"File {env_file} not found")


# Load environment variables at the module level
load_env_vars()


"""NOTE: for some reason using this seems to lead to misc
`sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) server closed the connection unexpectedly`
errors.

Commenting out till we can get to the bottom of it. For now, just using
instantiate the session directly within the test.
"""


@pytest.fixture
def vespa_client() -> vespa_fixture:
    with get_session_with_current_tenant() as db_session:
        search_settings = get_current_search_settings(db_session)
        return vespa_fixture(index_name=search_settings.index_name)


@pytest.fixture
def reset() -> None:
    reset_all()


@pytest.fixture
def new_admin_user(reset: None) -> DATestUser:  # noqa: ARG001
    return UserManager.create(name=ADMIN_USER_NAME)


@pytest.fixture
def admin_user() -> DATestUser:
    try:
        user = UserManager.create(name=ADMIN_USER_NAME)

        # if there are other users for some reason, reset and try again
        if not UserManager.is_role(user, UserRole.ADMIN):
            print("Trying to reset")
            reset_all()
            user = UserManager.create(name=ADMIN_USER_NAME)
        return user
    except Exception as e:
        print(f"Failed to create admin user: {e}")

    try:
        user = UserManager.login_as_user(
            DATestUser(
                id="",
                email=build_email("admin_user"),
                password=DEFAULT_PASSWORD,
                headers=GENERAL_HEADERS,
                role=UserRole.ADMIN,
                is_active=True,
            )
        )
        if not UserManager.is_role(user, UserRole.ADMIN):
            reset_all()
            user = UserManager.create(name=ADMIN_USER_NAME)
            return user

        return user
    except Exception as e:
        print(f"Failed to create or login as admin user: {e}")

    raise RuntimeError("Failed to create or login as admin user")


@pytest.fixture
def basic_user(
    # make sure the admin user exists first to ensure this new user
    # gets the BASIC role
    admin_user: DATestUser,  # noqa: ARG001
) -> DATestUser:
    try:
        user = UserManager.create(name=BASIC_USER_NAME)

        # Validate that the user has the BASIC role
        if user.role != UserRole.BASIC:
            raise RuntimeError(
                f"Created user {BASIC_USER_NAME} does not have BASIC role"
            )

        return user
    except Exception as e:
        print(f"Failed to create basic user, trying to login as existing user: {e}")

        # Try to login as existing basic user
        user = UserManager.login_as_user(
            DATestUser(
                id="",
                email=build_email(BASIC_USER_NAME),
                password=DEFAULT_PASSWORD,
                headers=GENERAL_HEADERS,
                role=UserRole.BASIC,
                is_active=True,
            )
        )

        # Validate that the logged-in user has the BASIC role
        if not UserManager.is_role(user, UserRole.BASIC):
            raise RuntimeError(f"User {BASIC_USER_NAME} does not have BASIC role")

        return user


@pytest.fixture(scope="session")
def reset_multitenant() -> None:
    """Initialize multi-tenant state once per test session.

    Intentionally avoid per-test resets to speed up the multitenant suite.
    The underlying reset function honors SKIP_RESET to allow CI to disable
    heavy resets entirely.
    """
    reset_all_multitenant()


@pytest.fixture
def llm_provider(admin_user: DATestUser) -> DATestLLMProvider:
    return LLMProviderManager.create(user_performing_action=admin_user)


@pytest.fixture
def image_generation_config(
    admin_user: DATestUser,
) -> DATestImageGenerationConfig:
    """Create a default image generation config for tests."""
    return ImageGenerationConfigManager.create(
        user_performing_action=admin_user,
        is_default=True,
    )


@pytest.fixture
def document_builder(admin_user: DATestUser) -> DocumentBuilderType:
    api_key: DATestAPIKey = APIKeyManager.create(
        user_performing_action=admin_user,
    )

    # create connector
    cc_pair_1 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    def _document_builder(contents: list[str]) -> list[SimpleTestDocument]:
        # seed documents
        docs: list[SimpleTestDocument] = [
            DocumentManager.seed_doc_with_content(
                cc_pair=cc_pair_1,
                content=content,
                api_key=api_key,
            )
            for content in contents
        ]

        return docs

    return _document_builder


def pytest_runtest_logstart(
    nodeid: str,
    location: tuple[str, int | None, str],  # noqa: ARG001
) -> None:
    print(f"\nTest start: {nodeid}")


def pytest_runtest_logfinish(
    nodeid: str,
    location: tuple[str, int | None, str],  # noqa: ARG001
) -> None:
    print(f"\nTest end: {nodeid}")


================================================
FILE: backend/tests/integration/connector_job_tests/github/conftest.py
================================================
import os
from collections.abc import Generator
from datetime import datetime
from datetime import timezone

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.reset import reset_all
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestConnector
from tests.integration.common_utils.test_models import DATestCredential
from tests.integration.common_utils.test_models import DATestUser


GitHubTestEnvSetupTuple = tuple[
    DATestUser,  # admin_user
    DATestUser,  # test_user_1
    DATestUser,  # test_user_2
    DATestCredential,  # github_credential
    DATestConnector,  # github_connector
    DATestCCPair,  # github_cc_pair
]


def _get_github_test_tokens() -> list[str]:
    """
    Returns a list of GitHub tokens to run the GitHub connector suite against.

    Minimal setup:
    - Set ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN (token1)
    Optional:
    - Set ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC (token2 / classic)

    If the classic token is provided, the GitHub suite will run twice (once per token).
    """
    token_1 = os.environ.get("ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN")
    # Prefer the new "classic" name, but keep backward compatibility.
    token_2 = os.environ.get("ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC")

    tokens: list[str] = []
    if token_1:
        tokens.append(token_1)
    if token_2:
        tokens.append(token_2)
    return tokens


@pytest.fixture(scope="module", params=_get_github_test_tokens())
def github_access_token(request: pytest.FixtureRequest) -> str:
    tokens = _get_github_test_tokens()
    if not tokens:
        pytest.skip(
            "Skipping GitHub tests due to missing env vars "
            "ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN and "
            "ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC"
        )
    return request.param


@pytest.fixture(scope="module")
def github_test_env_setup(
    github_access_token: str,
) -> Generator[GitHubTestEnvSetupTuple]:
    """
    Create a complete GitHub test environment with:
    - 3 users with email IDs from environment variables
    - GitHub credentials using ACCESS_TOKEN_GITHUB from environment
    - GitHub connector configured for testing
    - Connector-Credential pair linking them together

    Returns:
        Tuple containing: (admin_user, test_user_1, test_user_2, github_credential, github_connector, github_cc_pair)
    """
    # Reset all resources before setting up the test environment
    reset_all()

    # Get user emails from environment (with fallbacks)
    admin_email = os.environ.get("ONYX_GITHUB_ADMIN_EMAIL", "admin@example.com")
    test_user_1_email = os.environ.get(
        "ONYX_GITHUB_TEST_USER_1_EMAIL", "subash@onyx.app"
    )
    test_user_2_email = os.environ.get(
        "ONYX_GITHUB_TEST_USER_2_EMAIL", "msubash203@gmail.com"
    )

    if not admin_email or not test_user_1_email or not test_user_2_email:
        pytest.skip(
            "Skipping GitHub test environment setup due to missing environment variables"
        )

    # Create users
    admin_user: DATestUser = UserManager.create(email=admin_email)
    test_user_1: DATestUser = UserManager.create(email=test_user_1_email)
    test_user_2: DATestUser = UserManager.create(email=test_user_2_email)

    # Create LLM provider - required for document search to work
    LLMProviderManager.create(user_performing_action=admin_user)

    # Create GitHub credentials
    github_credentials = {
        "github_access_token": github_access_token,
    }

    github_credential: DATestCredential = CredentialManager.create(
        source=DocumentSource.GITHUB,
        credential_json=github_credentials,
        user_performing_action=admin_user,
    )

    # Create GitHub connector
    github_connector: DATestConnector = ConnectorManager.create(
        name="GitHub Test Connector",
        input_type=InputType.POLL,
        source=DocumentSource.GITHUB,
        connector_specific_config={
            "repo_owner": "permission-sync-test",
            "include_prs": True,
            "repositories": "perm-sync-test-minimal",
            "include_issues": True,
        },
        access_type=AccessType.SYNC,
        user_performing_action=admin_user,
    )

    # Create CC pair linking connector and credential
    github_cc_pair: DATestCCPair = CCPairManager.create(
        credential_id=github_credential.id,
        connector_id=github_connector.id,
        name="GitHub Test CC Pair",
        access_type=AccessType.SYNC,
        user_performing_action=admin_user,
    )

    # Wait for initial indexing to complete
    # GitHub API operations can be slow due to rate limiting and network latency
    # Use a longer timeout for initial indexing to avoid flaky test failures
    before = datetime.now(tz=timezone.utc)
    CCPairManager.wait_for_indexing_completion(
        cc_pair=github_cc_pair,
        after=before,
        user_performing_action=admin_user,
        timeout=900,
    )

    yield admin_user, test_user_1, test_user_2, github_credential, github_connector, github_cc_pair


================================================
FILE: backend/tests/integration/connector_job_tests/github/test_github_permission_sync.py
================================================
import os
from datetime import datetime
from datetime import timezone

import pytest
from github import Github

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.utils.logger import setup_logger
from tests.integration.common_utils.document_acl import (
    get_all_connector_documents,
)
from tests.integration.common_utils.document_acl import (
    get_user_document_access_via_acl,
)
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.connector_job_tests.github.conftest import (
    GitHubTestEnvSetupTuple,
)
from tests.integration.connector_job_tests.github.utils import GitHubManager

logger = setup_logger()


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission tests are enterprise only",
)
def test_github_private_repo_permission_sync(
    github_test_env_setup: GitHubTestEnvSetupTuple,
) -> None:

    (
        admin_user,
        test_user_1,
        test_user_2,
        github_credential,
        github_connector,
        github_cc_pair,
    ) = github_test_env_setup

    # Create GitHub client from credential
    # Note: github_credential is a DATestCredential (Pydantic model), not a SQLAlchemy model
    # so credential_json is already a plain dict
    github_access_token = github_credential.credential_json["github_access_token"]
    github_client = Github(github_access_token)
    github_manager = GitHubManager(github_client)

    # Get repository configuration from connector
    repo_owner = github_connector.connector_specific_config["repo_owner"]
    repo_name = github_connector.connector_specific_config["repositories"]

    success = github_manager.change_repository_visibility(
        repo_owner=repo_owner, repo_name=repo_name, visibility="private"
    )

    if not success:
        pytest.fail(f"Failed to change repository {repo_owner}/{repo_name} to private")

    # Add test-team to repository at the start
    logger.info(f"Adding test-team to repository {repo_owner}/{repo_name}")
    team_added = github_manager.add_team_to_repository(
        repo_owner=repo_owner,
        repo_name=repo_name,
        team_slug="test-team",
        permission="pull",
    )

    if not team_added:
        logger.warning(
            f"Failed to add test-team to repository {repo_owner}/{repo_name}"
        )

    try:
        after = datetime.now(timezone.utc)
        CCPairManager.sync(
            cc_pair=github_cc_pair,
            user_performing_action=admin_user,
        )

        # Use a longer timeout for GitHub permission sync operations
        # GitHub API operations can be slow, especially with rate limiting
        # This accounts for document sync, group sync, and vespa sync operations
        CCPairManager.wait_for_sync(
            cc_pair=github_cc_pair,
            user_performing_action=admin_user,
            after=after,
            should_wait_for_group_sync=True,
            timeout=900,
        )

        # ACL-based verification
        with get_session_with_current_tenant() as db_session:
            # Get all documents for this connector
            all_document_ids = get_all_connector_documents(github_cc_pair, db_session)

            # Test access for both users using ACL verification
            accessible_docs_user1 = get_user_document_access_via_acl(
                test_user=test_user_1,
                document_ids=all_document_ids,
                db_session=db_session,
            )

            accessible_docs_user2 = get_user_document_access_via_acl(
                test_user=test_user_2,
                document_ids=all_document_ids,
                db_session=db_session,
            )

            logger.info(
                f"test_user_1 has access to {len(accessible_docs_user1)} documents"
            )
            logger.info(
                f"test_user_2 has access to {len(accessible_docs_user2)} documents"
            )

            # test_user_1 (part of test-team) should have access
            # test_user_2 (not part of test-team) should NOT have access
            assert len(accessible_docs_user1) > 0, (
                f"test_user_1 should have access to private repository documents. "
                f"Found {len(accessible_docs_user1)} accessible docs out of "
                f"{len(all_document_ids)} total"
            )
            assert len(accessible_docs_user2) == 0, (
                f"test_user_2 should NOT have access to private repository documents. "
                f"Found {len(accessible_docs_user2)} accessible docs out of "
                f"{len(all_document_ids)} total"
            )

    finally:
        # Remove test-team from repository at the end
        logger.info(f"Removing test-team from repository {repo_owner}/{repo_name}")
        team_removed = github_manager.remove_team_from_repository(
            repo_owner=repo_owner, repo_name=repo_name, team_slug="test-team"
        )

        if not team_removed:
            logger.warning(
                f"Failed to remove test-team from repository {repo_owner}/{repo_name}"
            )


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission tests are enterprise only",
)
def test_github_public_repo_permission_sync(
    github_test_env_setup: GitHubTestEnvSetupTuple,
) -> None:
    """
    Test that when a repository is changed to public, both users can access the documents.
    """
    (
        admin_user,
        test_user_1,
        test_user_2,
        github_credential,
        github_connector,
        github_cc_pair,
    ) = github_test_env_setup

    # Create GitHub client from credential
    # Note: github_credential is a DATestCredential (Pydantic model), not a SQLAlchemy model
    # so credential_json is already a plain dict
    github_access_token = github_credential.credential_json["github_access_token"]
    github_client = Github(github_access_token)
    github_manager = GitHubManager(github_client)

    # Get repository configuration from connector
    repo_owner = github_connector.connector_specific_config["repo_owner"]
    repo_name = github_connector.connector_specific_config["repositories"]

    # Change repository to public
    logger.info(f"Changing repository {repo_owner}/{repo_name} to public")
    success = github_manager.change_repository_visibility(
        repo_owner=repo_owner, repo_name=repo_name, visibility="public"
    )

    if not success:
        pytest.fail(f"Failed to change repository {repo_owner}/{repo_name} to public")

    # Verify repository is now public
    current_visibility = github_manager.get_repository_visibility(
        repo_owner=repo_owner, repo_name=repo_name
    )
    logger.info(f"Repository {repo_owner}/{repo_name} visibility: {current_visibility}")
    assert (
        current_visibility == "public"
    ), f"Repository should be public, but is {current_visibility}"

    # Trigger sync to update permissions
    after = datetime.now(timezone.utc)
    CCPairManager.sync(
        cc_pair=github_cc_pair,
        user_performing_action=admin_user,
    )

    # Wait for sync to complete with group sync
    # Public repositories should be accessible to all users
    CCPairManager.wait_for_sync(
        cc_pair=github_cc_pair,
        user_performing_action=admin_user,
        after=after,
        should_wait_for_group_sync=True,
        timeout=900,
    )

    # ACL-based verification
    with get_session_with_current_tenant() as db_session:
        # Get all documents for this connector
        all_document_ids = get_all_connector_documents(github_cc_pair, db_session)

        # Test access for both users using ACL verification
        accessible_docs_user1 = get_user_document_access_via_acl(
            test_user=test_user_1,
            document_ids=all_document_ids,
            db_session=db_session,
        )

        accessible_docs_user2 = get_user_document_access_via_acl(
            test_user=test_user_2,
            document_ids=all_document_ids,
            db_session=db_session,
        )

        logger.info(f"test_user_1 has access to {len(accessible_docs_user1)} documents")
        logger.info(f"test_user_2 has access to {len(accessible_docs_user2)} documents")

        # Both users should have access to the public repository documents
        assert len(accessible_docs_user1) > 0, (
            f"test_user_1 should have access to public repository documents. "
            f"Found {len(accessible_docs_user1)} accessible docs out of "
            f"{len(all_document_ids)} total"
        )
        assert len(accessible_docs_user2) > 0, (
            f"test_user_2 should have access to public repository documents. "
            f"Found {len(accessible_docs_user2)} accessible docs out of "
            f"{len(all_document_ids)} total"
        )

        # Verify that both users get the same results (since repo is public)
        assert len(accessible_docs_user1) == len(accessible_docs_user2), (
            f"Both users should see the same documents from public repository. "
            f"User1: {len(accessible_docs_user1)}, User2: {len(accessible_docs_user2)}"
        )


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission tests are enterprise only",
)
def test_github_internal_repo_permission_sync(
    github_test_env_setup: GitHubTestEnvSetupTuple,
) -> None:
    """
    Test that when a repository is changed to internal, test_user_1 has access but test_user_2 doesn't.
    Internal repositories are accessible only to organization members.
    """
    (
        admin_user,
        test_user_1,
        test_user_2,
        github_credential,
        github_connector,
        github_cc_pair,
    ) = github_test_env_setup

    # Create GitHub client from credential
    # Note: github_credential is a DATestCredential (Pydantic model), not a SQLAlchemy model
    # so credential_json is already a plain dict
    github_access_token = github_credential.credential_json["github_access_token"]
    github_client = Github(github_access_token)
    github_manager = GitHubManager(github_client)

    # Get repository configuration from connector
    repo_owner = github_connector.connector_specific_config["repo_owner"]
    repo_name = github_connector.connector_specific_config["repositories"]

    # Change repository to internal
    logger.info(f"Changing repository {repo_owner}/{repo_name} to internal")
    success = github_manager.change_repository_visibility(
        repo_owner=repo_owner, repo_name=repo_name, visibility="internal"
    )

    if not success:
        pytest.fail(f"Failed to change repository {repo_owner}/{repo_name} to internal")

    # Verify repository is now internal
    current_visibility = github_manager.get_repository_visibility(
        repo_owner=repo_owner, repo_name=repo_name
    )
    logger.info(f"Repository {repo_owner}/{repo_name} visibility: {current_visibility}")
    assert (
        current_visibility == "internal"
    ), f"Repository should be internal, but is {current_visibility}"

    # Trigger sync to update permissions
    after = datetime.now(timezone.utc)
    CCPairManager.sync(
        cc_pair=github_cc_pair,
        user_performing_action=admin_user,
    )

    # Wait for sync to complete with group sync
    # Internal repositories should be accessible only to organization members
    CCPairManager.wait_for_sync(
        cc_pair=github_cc_pair,
        user_performing_action=admin_user,
        after=after,
        should_wait_for_group_sync=True,
        timeout=900,
    )

    #  ACL-based verification
    with get_session_with_current_tenant() as db_session:
        # Get all documents for this connector
        all_document_ids = get_all_connector_documents(github_cc_pair, db_session)

        # Test access for both users using ACL verification
        accessible_docs_user1 = get_user_document_access_via_acl(
            test_user=test_user_1,
            document_ids=all_document_ids,
            db_session=db_session,
        )

        accessible_docs_user2 = get_user_document_access_via_acl(
            test_user=test_user_2,
            document_ids=all_document_ids,
            db_session=db_session,
        )

        logger.info(f"test_user_1 has access to {len(accessible_docs_user1)} documents")
        logger.info(f"test_user_2 has access to {len(accessible_docs_user2)} documents")

        # For internal repositories:
        # - test_user_1 should have access (assuming they're part of the organization)
        # - test_user_2 should NOT have access (assuming they're not part of the organization)
        assert len(accessible_docs_user1) > 0, (
            f"test_user_1 should have access to internal repository documents (organization member). "
            f"Found {len(accessible_docs_user1)} accessible docs out of "
            f"{len(all_document_ids)} total"
        )
        assert len(accessible_docs_user2) == 0, (
            f"test_user_2 should NOT have access to internal repository documents (not organization member). "
            f"Found {len(accessible_docs_user2)} accessible docs out of "
            f"{len(all_document_ids)} total"
        )


================================================
FILE: backend/tests/integration/connector_job_tests/github/utils.py
================================================
from typing import Optional

from github import Github
from github.GithubException import GithubException

from onyx.utils.logger import setup_logger

logger = setup_logger()


class GitHubManager:
    """
    Manager class for GitHub operations used in testing.
    Provides methods to change repository visibility, check repository visibility, and manage teams.
    """

    def __init__(self, github_client: Github):
        """
        Initialize the GitHub manager with a GitHub client.

        Args:
            github_client: Authenticated GitHub client instance
        """
        self.github_client = github_client

    def change_repository_visibility(
        self, repo_owner: str, repo_name: str, visibility: str
    ) -> bool:
        """
        Change the visibility of a repository.

        Args:
            repo_owner: Repository owner (organization or username)
            repo_name: Repository name
            visibility: New visibility ('public', 'private', or 'internal')

        Returns:
            bool: True if successful, False otherwise

        Raises:
            ValueError: If visibility is not valid
            GithubException: If GitHub API call fails
        """
        if visibility not in ["public", "private", "internal"]:
            raise ValueError(
                f"Invalid visibility: {visibility}. Must be 'public', 'private', or 'internal'"
            )

        try:
            repo = self.github_client.get_repo(f"{repo_owner}/{repo_name}")

            # Check if we have admin permissions
            if not repo.permissions.admin:
                logger.error(
                    f"No admin permissions for repository {repo_owner}/{repo_name}"
                )
                return False

            # Note: Internal repositories are only available for GitHub Enterprise
            try:
                repo.edit(visibility=visibility)
            except GithubException as e:
                logger.warning(f"Could not set repository to {visibility}: {e}")
                return False

            logger.info(
                f"Successfully changed {repo_owner}/{repo_name} visibility to {visibility}"
            )
            return True

        except GithubException as e:
            logger.error(f"Failed to change repository visibility: {e}")
            return False

    def add_team_to_repository(
        self, repo_owner: str, repo_name: str, team_slug: str, permission: str = "push"
    ) -> bool:
        """
        Add a team to a repository with specified permissions.

        Args:
            repo_owner: Repository owner (organization)
            repo_name: Repository name
            team_slug: Team slug (not team name)
            permission: Permission level ('pull', 'push', 'admin', 'maintain', 'triage')

        Returns:
            bool: True if successful, False otherwise

        Raises:
            GithubException: If GitHub API call fails
        """
        valid_permissions = ["pull", "push", "admin", "maintain", "triage"]
        if permission not in valid_permissions:
            raise ValueError(
                f"Invalid permission: {permission}. Must be one of {valid_permissions}"
            )

        try:
            repo = self.github_client.get_repo(f"{repo_owner}/{repo_name}")
            org = self.github_client.get_organization(repo_owner)
            team = org.get_team_by_slug(team_slug)

            # Add team to repository
            team.add_to_repos(repo)

            # Set team permissions on the repository
            team.set_repo_permission(repo, permission)

            logger.info(
                f"Successfully added team {team_slug} to {repo_owner}/{repo_name} with {permission} permissions"
            )
            return True

        except GithubException as e:
            logger.error(f"Failed to add team to repository: {e}")
            return False

    def remove_team_from_repository(
        self, repo_owner: str, repo_name: str, team_slug: str
    ) -> bool:
        """
        Remove a team from a repository.

        Args:
            repo_owner: Repository owner (organization)
            repo_name: Repository name
            team_slug: Team slug (not team name)

        Returns:
            bool: True if successful, False otherwise

        Raises:
            GithubException: If GitHub API call fails
        """
        try:
            repo = self.github_client.get_repo(f"{repo_owner}/{repo_name}")
            org = self.github_client.get_organization(repo_owner)
            team = org.get_team_by_slug(team_slug)

            # Remove team from repository
            team.remove_from_repos(repo)

            logger.info(
                f"Successfully removed team {team_slug} from {repo_owner}/{repo_name}"
            )
            return True

        except GithubException as e:
            logger.error(f"Failed to remove team from repository: {e}")
            return False

    def get_repository_visibility(
        self, repo_owner: str, repo_name: str
    ) -> Optional[str]:
        """
        Get the current visibility of a repository.

        Args:
            repo_owner: Repository owner
            repo_name: Repository name

        Returns:
            Optional[str]: Repository visibility ('public', 'private', 'internal') or None if failed
        """
        try:
            repo = self.github_client.get_repo(f"{repo_owner}/{repo_name}")

            if hasattr(repo, "visibility"):
                return repo.visibility
            else:
                # Fallback for older GitHub API versions
                return "private" if repo.private else "public"

        except GithubException as e:
            logger.error(f"Failed to get repository visibility: {e}")
            return None


================================================
FILE: backend/tests/integration/connector_job_tests/google/google_drive_api_utils.py
================================================
from typing import Any
from uuid import uuid4

from google.oauth2.service_account import Credentials

from onyx.connectors.google_utils.resources import get_drive_service
from onyx.connectors.google_utils.resources import get_google_docs_service
from onyx.connectors.google_utils.resources import GoogleDocsService
from onyx.connectors.google_utils.resources import GoogleDriveService


GOOGLE_SCOPES = {
    "google_drive": [
        "https://www.googleapis.com/auth/drive",
        "https://www.googleapis.com/auth/admin.directory.group",
        "https://www.googleapis.com/auth/admin.directory.user",
    ],
}


def _create_doc_service(drive_service: GoogleDriveService) -> GoogleDocsService:
    docs_service = get_google_docs_service(
        creds=drive_service._http.credentials,
        user_email=drive_service._http.credentials._subject,
    )
    return docs_service


class GoogleDriveManager:
    @staticmethod
    def create_impersonated_drive_service(
        service_account_key: dict, impersonated_user_email: str
    ) -> GoogleDriveService:
        """Gets a drive service that impersonates a specific user"""
        credentials = Credentials.from_service_account_info(
            service_account_key,
            scopes=GOOGLE_SCOPES["google_drive"],
            subject=impersonated_user_email,
        )

        service = get_drive_service(credentials, impersonated_user_email)

        # Verify impersonation
        about = service.about().get(fields="user").execute()
        if about.get("user", {}).get("emailAddress") != impersonated_user_email:
            raise ValueError(
                f"Failed to impersonate {impersonated_user_email}. Instead got {about.get('user', {}).get('emailAddress')}"
            )
        return service

    @staticmethod
    def create_shared_drive(
        drive_service: GoogleDriveService, admin_email: str, test_id: str
    ) -> str:
        """
        Creates a shared drive and returns the drive's ID
        """
        try:
            about = drive_service.about().get(fields="user").execute()
            creating_user = about["user"]["emailAddress"]

            # Verify we're still impersonating the admin
            if creating_user != admin_email:
                raise ValueError(
                    f"Expected to create drive as {admin_email}, but instead created drive as {creating_user}"
                )

            drive_metadata = {"name": f"perm_sync_drive_{test_id}"}

            request_id = str(uuid4())
            drive = (
                drive_service.drives()
                .create(
                    body=drive_metadata,
                    requestId=request_id,
                    fields="id,name,capabilities",
                )
                .execute()
            )

            return drive["id"]
        except Exception as e:
            print(f"Error creating shared drive: {str(e)}")
            raise

    @staticmethod
    def create_empty_doc(
        drive_service: Any,
        drive_id: str,
    ) -> str:
        """
        Creates an empty document in the given drive and returns the document's ID
        """
        file_metadata = {
            "name": f"perm_sync_doc_{drive_id}_{str(uuid4())}",
            "mimeType": "application/vnd.google-apps.document",
            "parents": [drive_id],
        }
        file = (
            drive_service.files()
            .create(body=file_metadata, supportsAllDrives=True)
            .execute()
        )

        return file["id"]

    @staticmethod
    def append_text_to_doc(
        drive_service: GoogleDriveService, doc_id: str, text: str
    ) -> None:
        docs_service = _create_doc_service(drive_service)

        docs_service.documents().batchUpdate(
            documentId=doc_id,
            body={
                "requests": [{"insertText": {"location": {"index": 1}, "text": text}}]
            },
        ).execute()

    @staticmethod
    def update_file_permissions(
        drive_service: Any, file_id: str, email: str, role: str = "reader"
    ) -> None:
        permission = {"type": "user", "role": role, "emailAddress": email}
        drive_service.permissions().create(
            fileId=file_id,
            body=permission,
            supportsAllDrives=True,
            sendNotificationEmail=False,
        ).execute()

    @staticmethod
    def remove_file_permissions(
        drive_service: Any,
        file_id: str,
        email: str,  # noqa: ARG004
    ) -> None:
        permissions = (
            drive_service.permissions()
            .list(fileId=file_id, supportsAllDrives=True)
            .execute()
        )
        # TODO: This is a hacky way to remove permissions. Removes anyone with reader role.
        # Need to find a way to map a user's email to a permission id.
        # The permissions.get returns a permissionID but email field is None,
        # something to do with it being a group or domain wide delegation.
        for permission in permissions.get("permissions", []):
            if permission.get("role") == "reader":
                drive_service.permissions().delete(
                    fileId=file_id,
                    permissionId=permission["id"],
                    supportsAllDrives=True,
                ).execute()
                break

    @staticmethod
    def make_file_public(drive_service: Any, file_id: str) -> None:
        permission = {"type": "anyone", "role": "reader"}
        drive_service.permissions().create(
            fileId=file_id, body=permission, supportsAllDrives=True
        ).execute()

    @staticmethod
    def cleanup_drive(drive_service: Any, drive_id: str) -> None:
        try:
            # Delete up to 2 files that match our pattern
            file_name_prefix = f"perm_sync_doc_{drive_id}"
            files = (
                drive_service.files()
                .list(
                    q=f"name contains '{file_name_prefix}'",
                    driveId=drive_id,
                    includeItemsFromAllDrives=True,
                    supportsAllDrives=True,
                    corpora="drive",
                    fields="files(id)",
                )
                .execute()
            )

            for file in files.get("files", []):
                drive_service.files().delete(
                    fileId=file["id"], supportsAllDrives=True
                ).execute()

            # Then delete the drive
            drive_service.drives().delete(driveId=drive_id).execute()
        except Exception as e:
            print(f"Error cleaning up drive {drive_id}: {e}")


================================================
FILE: backend/tests/integration/connector_job_tests/google/test_google_drive_permission_sync.py
================================================
import json
import os
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from uuid import uuid4

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.google_utils.resources import GoogleDriveService
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
)
from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
)
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.document_search import (
    DocumentSearchManager,
)
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestConnector
from tests.integration.common_utils.test_models import DATestCredential
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture
from tests.integration.connector_job_tests.google.google_drive_api_utils import (
    GoogleDriveManager,
)


@pytest.fixture()
def google_drive_test_env_setup() -> Generator[
    tuple[GoogleDriveService, str, DATestCCPair, DATestUser, DATestUser, DATestUser],
    None,
    None,
]:
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(email="admin@example.com")
    # Creating a non-admin user
    test_user_1: DATestUser = UserManager.create(email="test_user_1@example.com")
    # Creating a non-admin user
    test_user_2: DATestUser = UserManager.create(email="test_user_2@example.com")

    service_account_key = os.environ["FULL_CONTROL_DRIVE_SERVICE_ACCOUNT"]
    drive_id: str | None = None
    drive_service: GoogleDriveService | None = None

    try:
        credentials = {
            DB_CREDENTIALS_PRIMARY_ADMIN_KEY: admin_user.email,
            DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key,
        }

        # Setup Google Drive
        drive_service = GoogleDriveManager.create_impersonated_drive_service(
            json.loads(service_account_key), admin_user.email
        )
        test_id = str(uuid4())
        drive_id = GoogleDriveManager.create_shared_drive(
            drive_service, admin_user.email, test_id
        )

        # Setup Onyx infrastructure
        LLMProviderManager.create(user_performing_action=admin_user)

        before = datetime.now(timezone.utc)
        credential: DATestCredential = CredentialManager.create(
            source=DocumentSource.GOOGLE_DRIVE,
            credential_json=credentials,
            user_performing_action=admin_user,
        )
        connector: DATestConnector = ConnectorManager.create(
            name="Google Drive Test",
            input_type=InputType.POLL,
            source=DocumentSource.GOOGLE_DRIVE,
            connector_specific_config={
                "shared_drive_urls": f"https://drive.google.com/drive/folders/{drive_id}"
            },
            access_type=AccessType.SYNC,
            user_performing_action=admin_user,
        )
        cc_pair: DATestCCPair = CCPairManager.create(
            credential_id=credential.id,
            connector_id=connector.id,
            access_type=AccessType.SYNC,
            user_performing_action=admin_user,
        )
        CCPairManager.wait_for_indexing_completion(
            cc_pair=cc_pair, after=before, user_performing_action=admin_user
        )

        yield drive_service, drive_id, cc_pair, admin_user, test_user_1, test_user_2

    except json.JSONDecodeError:
        pytest.skip("FULL_CONTROL_DRIVE_SERVICE_ACCOUNT is not valid JSON")
    finally:
        # Cleanup drive and file
        if drive_id is not None:
            GoogleDriveManager.cleanup_drive(drive_service, drive_id)


@pytest.mark.xfail(reason="Needs to be tested for flakiness")
def test_google_permission_sync(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,  # noqa: ARG001
    google_drive_test_env_setup: tuple[
        GoogleDriveService, str, DATestCCPair, DATestUser, DATestUser, DATestUser
    ],
) -> None:
    (
        drive_service,
        drive_id,
        cc_pair,
        admin_user,
        test_user_1,
        test_user_2,
    ) = google_drive_test_env_setup

    # ----------------------BASELINE TEST----------------------
    before = datetime.now(timezone.utc)

    # Create empty test doc in drive
    doc_id_1 = GoogleDriveManager.create_empty_doc(drive_service, drive_id)

    # Append text to doc
    doc_text_1 = "The secret number is 12345"
    GoogleDriveManager.append_text_to_doc(drive_service, doc_id_1, doc_text_1)

    # run indexing
    CCPairManager.run_once(
        cc_pair, from_beginning=True, user_performing_action=admin_user
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair, after=before, user_performing_action=admin_user
    )

    # run permission sync
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=1,
        user_performing_action=admin_user,
    )

    # Verify admin has access to document
    admin_results = DocumentSearchManager.search_documents(
        query="secret number", user_performing_action=admin_user
    )
    assert doc_text_1 in [result.strip("\ufeff") for result in admin_results]

    # Verify test_user_1 cannot access document
    user1_results = DocumentSearchManager.search_documents(
        query="secret number", user_performing_action=test_user_1
    )
    assert doc_text_1 not in [result.strip("\ufeff") for result in user1_results]

    # ----------------------GRANT USER 1 DOC PERMISSIONS TEST--------------------------
    before = datetime.now(timezone.utc)

    # Grant user 1 access to document 1
    GoogleDriveManager.update_file_permissions(
        drive_service=drive_service,
        file_id=doc_id_1,
        email=test_user_1.email,
        role="reader",
    )

    # Create a second doc in the drive which user 1 should not have access to
    doc_id_2 = GoogleDriveManager.create_empty_doc(drive_service, drive_id)
    doc_text_2 = "The secret number is 67890"
    GoogleDriveManager.append_text_to_doc(drive_service, doc_id_2, doc_text_2)

    # Run indexing
    CCPairManager.run_once(
        cc_pair, from_beginning=True, user_performing_action=admin_user
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=admin_user,
    )

    # Run permission sync
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=1,
        user_performing_action=admin_user,
    )

    # Verify admin can access both documents
    admin_results = DocumentSearchManager.search_documents(
        query="secret number", user_performing_action=admin_user
    )
    assert {doc_text_1, doc_text_2} == {
        result.strip("\ufeff") for result in admin_results
    }

    # Verify user 1 can access document 1
    user1_results = DocumentSearchManager.search_documents(
        query="secret number", user_performing_action=test_user_1
    )
    assert doc_text_1 in [result.strip("\ufeff") for result in user1_results]

    # Verify user 1 cannot access document 2
    user1_results_2 = DocumentSearchManager.search_documents(
        query="secret number", user_performing_action=test_user_1
    )
    assert doc_text_2 not in [result.strip("\ufeff") for result in user1_results_2]

    # ----------------------REMOVE USER 1 DOC PERMISSIONS TEST--------------------------
    before = datetime.now(timezone.utc)

    # Remove user 1 access to document 1
    GoogleDriveManager.remove_file_permissions(
        drive_service=drive_service, file_id=doc_id_1, email=test_user_1.email
    )
    # Run permission sync
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=1,
        user_performing_action=admin_user,
    )

    # Verify admin can access both documents
    admin_results = DocumentSearchManager.search_documents(
        query="secret number", user_performing_action=admin_user
    )
    assert {doc_text_1, doc_text_2} == {
        result.strip("\ufeff") for result in admin_results
    }

    # Verify user 1 cannot access either document
    user1_results = DocumentSearchManager.search_documents(
        query="secret numbers", user_performing_action=test_user_1
    )
    assert {result.strip("\ufeff") for result in user1_results} == set()

    # ----------------------GRANT USER 1 DRIVE PERMISSIONS TEST--------------------------
    before = datetime.now(timezone.utc)

    # Grant user 1 access to drive
    GoogleDriveManager.update_file_permissions(
        drive_service=drive_service,
        file_id=drive_id,
        email=test_user_1.email,
        role="reader",
    )

    # Run permission sync
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )

    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=2,
        user_performing_action=admin_user,
        # if we are only updating the group definition for this test we use this varaiable,
        # since it doesn't result in a vespa sync so we don't want to wait for it
        should_wait_for_vespa_sync=False,
    )

    # Verify user 1 can access both documents
    user1_results = DocumentSearchManager.search_documents(
        query="secret numbers", user_performing_action=test_user_1
    )
    assert {doc_text_1, doc_text_2} == {
        result.strip("\ufeff") for result in user1_results
    }

    # ----------------------MAKE DRIVE PUBLIC TEST--------------------------
    before = datetime.now(timezone.utc)

    # Unable to make drive itself public as Google's security policies prevent this, so we make the documents public instead
    GoogleDriveManager.make_file_public(drive_service, doc_id_1)
    GoogleDriveManager.make_file_public(drive_service, doc_id_2)

    # Run permission sync
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=2,
        user_performing_action=admin_user,
    )

    # Verify all users can access both documents
    admin_results = DocumentSearchManager.search_documents(
        query="secret number", user_performing_action=admin_user
    )
    assert {doc_text_1, doc_text_2} == {
        result.strip("\ufeff") for result in admin_results
    }

    user1_results = DocumentSearchManager.search_documents(
        query="secret number", user_performing_action=test_user_1
    )
    assert {doc_text_1, doc_text_2} == {
        result.strip("\ufeff") for result in user1_results
    }

    user2_results = DocumentSearchManager.search_documents(
        query="secret number", user_performing_action=test_user_2
    )
    assert {doc_text_1, doc_text_2} == {
        result.strip("\ufeff") for result in user2_results
    }


================================================
FILE: backend/tests/integration/connector_job_tests/jira/conftest.py
================================================
import os
from collections.abc import Generator
from datetime import datetime
from datetime import timezone

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import AccessType
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestConnector
from tests.integration.common_utils.test_models import DATestCredential
from tests.integration.common_utils.test_models import DATestUser


JiraTestEnvSetupTuple = tuple[
    DATestUser,
    DATestCredential,
    DATestConnector,
    DATestCCPair,
]


@pytest.fixture()
def jira_test_env_setup() -> Generator[JiraTestEnvSetupTuple]:
    jira_base_url = os.environ["JIRA_BASE_URL"]
    jira_user_email = os.environ["JIRA_USER_EMAIL"]
    jira_api_token = os.environ["JIRA_API_TOKEN"]

    credentials = {
        "jira_user_email": jira_user_email,
        "jira_api_token": jira_api_token,
    }

    admin_user: DATestUser = UserManager.create(email=jira_user_email)
    credential: DATestCredential = CredentialManager.create(
        source=DocumentSource.JIRA,
        credential_json=credentials,
        user_performing_action=admin_user,
    )
    connector: DATestConnector = ConnectorManager.create(
        name="Jira Test",
        input_type=InputType.POLL,
        source=DocumentSource.JIRA,
        connector_specific_config={
            "jira_base_url": jira_base_url,
        },
        access_type=AccessType.SYNC,
        user_performing_action=admin_user,
    )
    cc_pair: DATestCCPair = CCPairManager.create(
        credential_id=credential.id,
        connector_id=connector.id,
        access_type=AccessType.SYNC,
        user_performing_action=admin_user,
    )
    before = datetime.now(tz=timezone.utc)
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair, after=before, user_performing_action=admin_user
    )

    yield admin_user, credential, connector, cc_pair


================================================
FILE: backend/tests/integration/connector_job_tests/jira/test_jira_permission_sync_full.py
================================================
import os
from datetime import datetime
from datetime import timezone

import pytest

from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.connector_job_tests.jira.conftest import JiraTestEnvSetupTuple


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Jira permission sync is enterprise only",
)
@pytest.mark.xfail(reason="Needs to be tested for flakiness")
def test_jira_permission_sync_full(
    reset: None,  # noqa: ARG001
    jira_test_env_setup: JiraTestEnvSetupTuple,
) -> None:
    (
        admin_user,
        credential,
        connector,
        cc_pair,
    ) = jira_test_env_setup

    before = datetime.now(tz=timezone.utc)

    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=1,
        user_performing_action=admin_user,
        timeout=float("inf"),
    )


================================================
FILE: backend/tests/integration/connector_job_tests/sharepoint/conftest.py
================================================
import os
from collections.abc import Generator
from datetime import datetime
from datetime import timezone

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.connectors.sharepoint.connector import SharepointAuthMethod
from onyx.db.enums import AccessType
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.reset import reset_all
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestConnector
from tests.integration.common_utils.test_models import DATestCredential
from tests.integration.common_utils.test_models import DATestUser

SharepointTestEnvSetupTuple = tuple[
    DATestUser,  # admin_user
    DATestUser,  # regular_user_1
    DATestUser,  # regular_user_2
    DATestCredential,
    DATestConnector,
    DATestCCPair,
]


@pytest.fixture(scope="module")
def sharepoint_test_env_setup() -> Generator[SharepointTestEnvSetupTuple]:
    # Reset all data before running the test
    reset_all()
    # Required environment variables for SharePoint certificate authentication
    sp_client_id = os.environ.get("PERM_SYNC_SHAREPOINT_CLIENT_ID")
    sp_private_key = os.environ.get("PERM_SYNC_SHAREPOINT_PRIVATE_KEY")
    sp_certificate_password = os.environ.get(
        "PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD"
    )
    sp_directory_id = os.environ.get("PERM_SYNC_SHAREPOINT_DIRECTORY_ID")
    sharepoint_sites = "https://danswerai.sharepoint.com/sites/Permisisonsync"
    admin_email = "admin@onyx.app"
    user1_email = "subash@onyx.app"
    user2_email = "raunak@onyx.app"

    if not sp_private_key or not sp_certificate_password or not sp_directory_id:
        pytest.skip("Skipping test because required environment variables are not set")

    # Certificate-based credentials
    credentials = {
        "authentication_method": SharepointAuthMethod.CERTIFICATE.value,
        "sp_client_id": sp_client_id,
        "sp_private_key": sp_private_key,
        "sp_certificate_password": sp_certificate_password,
        "sp_directory_id": sp_directory_id,
    }

    # Create users
    admin_user: DATestUser = UserManager.create(email=admin_email)
    regular_user_1: DATestUser = UserManager.create(email=user1_email)
    regular_user_2: DATestUser = UserManager.create(email=user2_email)

    # Create LLM provider for search functionality
    LLMProviderManager.create(user_performing_action=admin_user)

    # Create credential
    credential: DATestCredential = CredentialManager.create(
        source=DocumentSource.SHAREPOINT,
        credential_json=credentials,
        user_performing_action=admin_user,
    )

    # Create connector with SharePoint-specific configuration
    connector: DATestConnector = ConnectorManager.create(
        name="SharePoint Test",
        input_type=InputType.POLL,
        source=DocumentSource.SHAREPOINT,
        connector_specific_config={
            "sites": sharepoint_sites.split(","),
            "treat_sharing_link_as_public": True,
        },
        access_type=AccessType.SYNC,  # Enable permission sync
        user_performing_action=admin_user,
    )

    # Create CC pair with permission sync enabled
    cc_pair: DATestCCPair = CCPairManager.create(
        credential_id=credential.id,
        connector_id=connector.id,
        access_type=AccessType.SYNC,  # Enable permission sync
        user_performing_action=admin_user,
    )

    # Wait for both indexing and permission sync to complete
    before = datetime.now(tz=timezone.utc)
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=admin_user,
        timeout=float("inf"),
    )

    # Wait for permission sync completion specifically
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=admin_user,
        timeout=float("inf"),
    )

    yield admin_user, regular_user_1, regular_user_2, credential, connector, cc_pair


================================================
FILE: backend/tests/integration/connector_job_tests/sharepoint/test_sharepoint_permissions.py
================================================
import os

import pytest

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.utils.logger import setup_logger
from tests.integration.common_utils.document_acl import (
    get_all_connector_documents,
)
from tests.integration.common_utils.document_acl import (
    get_documents_by_permission_type,
)
from tests.integration.common_utils.document_acl import (
    get_user_document_access_via_acl,
)
from tests.integration.connector_job_tests.sharepoint.conftest import (
    SharepointTestEnvSetupTuple,
)

logger = setup_logger()


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission tests are enterprise only",
)
def test_public_documents_accessible_by_all_users(
    sharepoint_test_env_setup: SharepointTestEnvSetupTuple,
) -> None:
    """Test that public documents are accessible by both test users using ACL verification"""
    (
        admin_user,
        regular_user_1,
        regular_user_2,
        credential,
        connector,
        cc_pair,
    ) = sharepoint_test_env_setup

    with get_session_with_current_tenant() as db_session:
        # Get all documents for this connector
        all_document_ids = get_all_connector_documents(cc_pair, db_session)

        # Test that regular_user_1 can access documents
        accessible_docs_user1 = get_user_document_access_via_acl(
            test_user=regular_user_1,
            document_ids=all_document_ids,
            db_session=db_session,
        )

        # Test that regular_user_2 can access documents
        accessible_docs_user2 = get_user_document_access_via_acl(
            test_user=regular_user_2,
            document_ids=all_document_ids,
            db_session=db_session,
        )

        logger.info(f"User 1 has access to {len(accessible_docs_user1)} documents")
        logger.info(f"User 2 has access to {len(accessible_docs_user2)} documents")

        # For public documents, both users should have access to at least some docs
        assert len(accessible_docs_user1) == 8, (
            f"User 1 should have access to documents. Found "
            f"{len(accessible_docs_user1)} accessible docs out of "
            f"{len(all_document_ids)} total"
        )
        assert len(accessible_docs_user2) == 1, (
            f"User 2 should have access to documents. Found "
            f"{len(accessible_docs_user2)} accessible docs out of "
            f"{len(all_document_ids)} total"
        )

        logger.info(
            "Successfully verified public documents are accessible by users via ACL"
        )


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission tests are enterprise only",
)
def test_group_based_permissions(
    sharepoint_test_env_setup: SharepointTestEnvSetupTuple,
) -> None:
    """Test that documents with group permissions are accessible only by users in that group using ACL verification"""
    (
        admin_user,
        regular_user_1,
        regular_user_2,
        credential,
        connector,
        cc_pair,
    ) = sharepoint_test_env_setup

    with get_session_with_current_tenant() as db_session:
        # Get all documents for this connector
        all_document_ids = get_all_connector_documents(cc_pair, db_session)

        if not all_document_ids:
            pytest.skip("No documents found for connector - skipping test")

        # Test access for both users
        accessible_docs_user1 = get_user_document_access_via_acl(
            test_user=regular_user_1,
            document_ids=all_document_ids,
            db_session=db_session,
        )

        accessible_docs_user2 = get_user_document_access_via_acl(
            test_user=regular_user_2,
            document_ids=all_document_ids,
            db_session=db_session,
        )

        logger.info(f"User 1 has access to {len(accessible_docs_user1)} documents")
        logger.info(f"User 2 has access to {len(accessible_docs_user2)} documents")

        public_docs = get_documents_by_permission_type(all_document_ids, db_session)

        # Check if user 2 has access to any non-public documents
        non_public_access_user2 = [
            doc for doc in accessible_docs_user2 if doc not in public_docs
        ]

        assert (
            len(non_public_access_user2) == 0
        ), f"User 2 should only have access to public documents. Found access to non-public docs: {non_public_access_user2}"


================================================
FILE: backend/tests/integration/connector_job_tests/slack/conftest.py
================================================
import os
from collections.abc import Generator

import pytest

from onyx.connectors.slack.models import ChannelType
from tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager

SLACK_ADMIN_EMAIL = os.environ.get("SLACK_ADMIN_EMAIL", "evan@onyx.app")
SLACK_TEST_USER_1_EMAIL = os.environ.get("SLACK_TEST_USER_1_EMAIL", "evan+1@onyx.app")
SLACK_TEST_USER_2_EMAIL = os.environ.get("SLACK_TEST_USER_2_EMAIL", "justin@onyx.app")


def _provision_slack_channels(
    bot_token: str,
) -> Generator[tuple[ChannelType, ChannelType], None, None]:
    slack_client = SlackManager.get_slack_client(bot_token)

    auth_info = slack_client.auth_test()
    print(f"\nSlack workspace: {auth_info.get('team')} ({auth_info.get('url')})")

    user_map = SlackManager.build_slack_user_email_id_map(slack_client)
    if SLACK_ADMIN_EMAIL not in user_map:
        raise KeyError(
            f"'{SLACK_ADMIN_EMAIL}' not found in Slack workspace. Available emails: {sorted(user_map.keys())}"
        )
    admin_user_id = user_map[SLACK_ADMIN_EMAIL]

    (
        public_channel,
        private_channel,
        run_id,
    ) = SlackManager.get_and_provision_available_slack_channels(
        slack_client=slack_client, admin_user_id=admin_user_id
    )

    yield public_channel, private_channel

    SlackManager.cleanup_after_test(slack_client=slack_client, test_id=run_id)


@pytest.fixture()
def slack_test_setup() -> Generator[tuple[ChannelType, ChannelType], None, None]:
    yield from _provision_slack_channels(os.environ["SLACK_BOT_TOKEN"])


@pytest.fixture()
def slack_perm_sync_test_setup() -> (
    Generator[tuple[ChannelType, ChannelType], None, None]
):
    yield from _provision_slack_channels(os.environ["SLACK_BOT_TOKEN_TEST_SPACE"])


================================================
FILE: backend/tests/integration/connector_job_tests/slack/slack_api_utils.py
================================================
"""
Assumptions:
- The test users have already been created
- General is empty of messages
- In addition to the normal slack oauth permissions, the following scopes are needed:
    - channels:manage
    - groups:write
    - chat:write
    - chat:write.public
"""

from typing import Any
from typing import cast
from uuid import uuid4

from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

from onyx.connectors.slack.connector import get_channel_messages
from onyx.connectors.slack.models import ChannelType
from onyx.connectors.slack.utils import make_paginated_slack_api_call


def _get_slack_channel_id(channel: ChannelType) -> str:
    if not (channel_id := channel.get("id")):
        raise ValueError("Channel ID is missing")
    return channel_id


def _get_non_general_channels(
    slack_client: WebClient,
    get_private: bool,
    get_public: bool,
    only_get_done: bool = False,
) -> list[ChannelType]:
    channel_types = []
    if get_private:
        channel_types.append("private_channel")
    if get_public:
        channel_types.append("public_channel")

    conversations: list[dict[str, Any]] = []
    for result in make_paginated_slack_api_call(
        slack_client.conversations_list,
        exclude_archived=False,
        types=channel_types,
    ):
        conversations.extend(result["channels"])

    filtered_conversations = []
    for conversation in conversations:
        if conversation.get("is_general", False):
            continue
        if only_get_done and "done" not in conversation.get("name", ""):
            continue
        filtered_conversations.append(conversation)
    return cast(list[ChannelType], filtered_conversations)


def _clear_slack_conversation_members(
    slack_client: WebClient,
    admin_user_id: str,
    channel: ChannelType,
) -> None:
    channel_id = _get_slack_channel_id(channel)
    member_ids: list[str] = []
    for result in make_paginated_slack_api_call(
        slack_client.conversations_members,
        channel=channel_id,
    ):
        member_ids.extend(result["members"])

    for member_id in member_ids:
        if member_id == admin_user_id:
            continue
        try:
            slack_client.conversations_kick(channel=channel_id, user=member_id)
            print(f"Kicked member: {member_id}")
        except Exception as e:
            if "cant_kick_self" in str(e):
                continue
            print(f"Error kicking member: {e}")
            print(member_id)
    try:
        slack_client.conversations_unarchive(channel=channel_id)
        channel["is_archived"] = False
    except Exception:
        # Channel is already unarchived
        pass


def _add_slack_conversation_members(
    slack_client: WebClient, channel: ChannelType, member_ids: list[str]
) -> None:
    channel_id = _get_slack_channel_id(channel)
    for user_id in member_ids:
        try:
            slack_client.conversations_invite(channel=channel_id, users=user_id)
        except Exception as e:
            if "already_in_channel" in str(e):
                continue
            print(f"Error inviting member: {e}")
            print(user_id)


def _delete_slack_conversation_messages(
    slack_client: WebClient,
    channel: ChannelType,
    message_to_delete: str | None = None,
) -> None:
    """deletes all messages from a channel if message_to_delete is None"""
    channel_id = _get_slack_channel_id(channel)
    for message_batch in get_channel_messages(slack_client, channel):
        for message in message_batch:
            if message_to_delete and message.get("text") != message_to_delete:
                continue
            print(" removing message: ", message.get("text"))

            try:
                if not (ts := message.get("ts")):
                    raise ValueError("Message timestamp is missing")
                slack_client.chat_delete(channel=channel_id, ts=ts)
            except Exception as e:
                print(f"Error deleting message: {e}")
                print(message)


def _build_slack_channel_from_name(
    slack_client: WebClient,
    admin_user_id: str,
    suffix: str,
    is_private: bool,
    channel: ChannelType | None,
) -> ChannelType:
    base = "public_channel" if not is_private else "private_channel"
    channel_name = f"{base}-{suffix}"
    if channel:
        # If channel is provided, we rename it
        channel_id = _get_slack_channel_id(channel)
        channel_response = slack_client.conversations_rename(
            channel=channel_id,
            name=channel_name,
        )
    else:
        # Otherwise, we create a new channel
        channel_response = slack_client.conversations_create(
            name=channel_name,
            is_private=is_private,
        )

    try:
        slack_client.conversations_unarchive(channel=channel_response["channel"]["id"])
    except Exception:
        # Channel is already unarchived
        pass
    try:
        slack_client.conversations_invite(
            channel=channel_response["channel"]["id"],
            users=[admin_user_id],
        )
    except Exception:
        pass

    final_channel = channel_response["channel"] if channel_response else {}
    return cast(ChannelType, final_channel)


class SlackManager:
    @staticmethod
    def get_slack_client(token: str) -> WebClient:
        return WebClient(token=token)

    @staticmethod
    def get_and_provision_available_slack_channels(
        slack_client: WebClient, admin_user_id: str
    ) -> tuple[ChannelType, ChannelType, str]:
        run_id = str(uuid4())
        public_channels = _get_non_general_channels(
            slack_client, get_private=False, get_public=True, only_get_done=True
        )

        first_available_channel = (
            None if len(public_channels) < 1 else public_channels[0]
        )
        public_channel = _build_slack_channel_from_name(
            slack_client=slack_client,
            admin_user_id=admin_user_id,
            suffix=run_id,
            is_private=False,
            channel=first_available_channel,
        )
        _delete_slack_conversation_messages(
            slack_client=slack_client, channel=public_channel
        )

        private_channels = _get_non_general_channels(
            slack_client, get_private=True, get_public=False, only_get_done=True
        )
        second_available_channel = (
            None if len(private_channels) < 1 else private_channels[0]
        )
        private_channel = _build_slack_channel_from_name(
            slack_client=slack_client,
            admin_user_id=admin_user_id,
            suffix=run_id,
            is_private=True,
            channel=second_available_channel,
        )
        _delete_slack_conversation_messages(
            slack_client=slack_client, channel=private_channel
        )

        return public_channel, private_channel, run_id

    @staticmethod
    def build_slack_user_email_id_map(slack_client: WebClient) -> dict[str, str]:
        users: list[dict[str, Any]] = []

        for users_results in make_paginated_slack_api_call(
            slack_client.users_list,
        ):
            users.extend(users_results.get("members", []))

        user_email_id_map = {}
        for user in users:
            if not (email := user.get("profile", {}).get("email")):
                continue
            if not (user_id := user.get("id")):
                raise ValueError("User ID is missing")
            user_email_id_map[email] = user_id
        return user_email_id_map

    @staticmethod
    def set_channel_members(
        slack_client: WebClient,
        admin_user_id: str,
        channel: ChannelType,
        user_ids: list[str],
    ) -> None:
        _clear_slack_conversation_members(
            slack_client=slack_client,
            channel=channel,
            admin_user_id=admin_user_id,
        )
        _add_slack_conversation_members(
            slack_client=slack_client, channel=channel, member_ids=user_ids
        )

    @staticmethod
    def add_message_to_channel(
        slack_client: WebClient, channel: ChannelType, message: str
    ) -> None:
        channel_id = _get_slack_channel_id(channel)
        slack_client.chat_postMessage(
            channel=channel_id,
            text=message,
        )

    @staticmethod
    def remove_message_from_channel(
        slack_client: WebClient, channel: ChannelType, message: str
    ) -> None:
        _delete_slack_conversation_messages(
            slack_client=slack_client, channel=channel, message_to_delete=message
        )

    @staticmethod
    def cleanup_after_test(
        slack_client: WebClient,
        test_id: str,
    ) -> None:
        channel_types = ["private_channel", "public_channel"]
        channels: list[ChannelType] = []
        for result in make_paginated_slack_api_call(
            slack_client.conversations_list,
            exclude_archived=False,
            types=channel_types,
        ):
            channels.extend(result["channels"])

        for channel in channels:
            if test_id not in channel.get("name", ""):
                continue
            # "done" in the channel name indicates that this channel is free to be used for a new test
            new_name = f"done_{str(uuid4())}"
            try:
                slack_client.conversations_rename(channel=channel["id"], name=new_name)
            except SlackApiError as e:
                print(f"Error renaming channel {channel['id']}: {e}")


================================================
FILE: backend/tests/integration/connector_job_tests/slack/test_permission_sync.py
================================================
import os
from datetime import datetime
from datetime import timezone

import pytest

from onyx.connectors.models import InputType
from onyx.connectors.slack.models import ChannelType
from onyx.db.enums import AccessType
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.document_search import (
    DocumentSearchManager,
)
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestConnector
from tests.integration.common_utils.test_models import DATestCredential
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture
from tests.integration.connector_job_tests.slack.conftest import SLACK_ADMIN_EMAIL
from tests.integration.connector_job_tests.slack.conftest import SLACK_TEST_USER_1_EMAIL
from tests.integration.connector_job_tests.slack.conftest import SLACK_TEST_USER_2_EMAIL
from tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager


# NOTE(rkuo): it isn't yet clear if the reason these were previously xfail'd
# still exists. May need to xfail again if flaky (DAN-789)
@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission tests are enterprise only",
)
def test_slack_permission_sync(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,  # noqa: ARG001
    slack_perm_sync_test_setup: tuple[ChannelType, ChannelType],
) -> None:
    public_channel, private_channel = slack_perm_sync_test_setup

    admin_user: DATestUser = UserManager.create(
        email=SLACK_ADMIN_EMAIL,
    )

    test_user_1: DATestUser = UserManager.create(
        email=SLACK_TEST_USER_1_EMAIL,
    )

    test_user_2: DATestUser = UserManager.create(
        email=SLACK_TEST_USER_2_EMAIL,
    )

    bot_token = os.environ["SLACK_BOT_TOKEN_TEST_SPACE"]
    slack_client = SlackManager.get_slack_client(bot_token)
    email_id_map = SlackManager.build_slack_user_email_id_map(slack_client)
    admin_user_id = email_id_map[admin_user.email]

    LLMProviderManager.create(user_performing_action=admin_user)

    before = datetime.now(timezone.utc)
    credential: DATestCredential = CredentialManager.create(
        source=DocumentSource.SLACK,
        credential_json={
            "slack_bot_token": bot_token,
        },
        user_performing_action=admin_user,
    )
    connector: DATestConnector = ConnectorManager.create(
        name="Slack",
        input_type=InputType.POLL,
        source=DocumentSource.SLACK,
        connector_specific_config={
            "channels": [public_channel["name"], private_channel["name"]],
            "include_bot_messages": True,
        },
        access_type=AccessType.SYNC,
        groups=[],
        user_performing_action=admin_user,
    )
    cc_pair: DATestCCPair = CCPairManager.create(
        credential_id=credential.id,
        connector_id=connector.id,
        access_type=AccessType.SYNC,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=admin_user,
    )

    # Add test_user_1 and admin_user to the private channel
    desired_channel_members = [admin_user, test_user_1]
    SlackManager.set_channel_members(
        slack_client=slack_client,
        admin_user_id=admin_user_id,
        channel=private_channel,
        user_ids=[email_id_map[user.email] for user in desired_channel_members],
    )

    public_message = "Steve's favorite number is 809752"
    private_message = "Sara's favorite number is 346794"

    SlackManager.add_message_to_channel(
        slack_client=slack_client,
        channel=public_channel,
        message=public_message,
    )
    SlackManager.add_message_to_channel(
        slack_client=slack_client,
        channel=private_channel,
        message=private_message,
    )

    # Run indexing
    before = datetime.now(timezone.utc)
    CCPairManager.run_once(
        cc_pair, from_beginning=True, user_performing_action=admin_user
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=admin_user,
    )

    # Run permission sync. Since initial_index_should_sync=True for Slack,
    # permissions were already set during indexing above — the explicit sync
    # should find no changes to apply.
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=0,
        user_performing_action=admin_user,
        should_wait_for_group_sync=False,
        should_wait_for_vespa_sync=False,
    )

    # Verify admin can see messages from both channels
    admin_docs = DocumentSearchManager.search_documents(
        query="favorite number",
        user_performing_action=admin_user,
    )
    assert public_message in admin_docs
    assert private_message in admin_docs

    # Verify test_user_2 can only see public channel messages
    user_2_docs = DocumentSearchManager.search_documents(
        query="favorite number",
        user_performing_action=test_user_2,
    )
    assert public_message in user_2_docs
    assert private_message not in user_2_docs

    # Verify test_user_1 can see both channels (member of private channel)
    user_1_docs = DocumentSearchManager.search_documents(
        query="favorite number",
        user_performing_action=test_user_1,
    )
    assert public_message in user_1_docs
    assert private_message in user_1_docs

    # Remove test_user_1 from the private channel
    before = datetime.now(timezone.utc)
    desired_channel_members = [admin_user]
    SlackManager.set_channel_members(
        slack_client=slack_client,
        admin_user_id=admin_user_id,
        channel=private_channel,
        user_ids=[email_id_map[user.email] for user in desired_channel_members],
    )

    # Run permission sync
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=1,
        user_performing_action=admin_user,
        should_wait_for_group_sync=False,
    )

    # Verify test_user_1 can no longer see private channel after removal
    user_1_docs = DocumentSearchManager.search_documents(
        query="favorite number",
        user_performing_action=test_user_1,
    )
    assert public_message in user_1_docs
    assert private_message not in user_1_docs


# NOTE(rkuo): it isn't yet clear if the reason these were previously xfail'd
# still exists. May need to xfail again if flaky (DAN-789)
@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission tests are enterprise only",
)
def test_slack_group_permission_sync(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,  # noqa: ARG001
    slack_perm_sync_test_setup: tuple[ChannelType, ChannelType],
) -> None:
    """
    This test ensures that permission sync overrides onyx group access.
    """
    public_channel, private_channel = slack_perm_sync_test_setup

    admin_user: DATestUser = UserManager.create(
        email=SLACK_ADMIN_EMAIL,
    )

    test_user_1: DATestUser = UserManager.create(
        email=SLACK_TEST_USER_1_EMAIL,
    )

    # Create a user group and adding the non-admin user to it
    user_group = UserGroupManager.create(
        name="test_group",
        user_ids=[test_user_1.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group],
        user_performing_action=admin_user,
    )

    bot_token = os.environ["SLACK_BOT_TOKEN_TEST_SPACE"]
    slack_client = SlackManager.get_slack_client(bot_token)
    email_id_map = SlackManager.build_slack_user_email_id_map(slack_client)
    admin_user_id = email_id_map[admin_user.email]

    LLMProviderManager.create(user_performing_action=admin_user)

    # Add only admin to the private channel
    SlackManager.set_channel_members(
        slack_client=slack_client,
        admin_user_id=admin_user_id,
        channel=private_channel,
        user_ids=[admin_user_id],
    )

    before = datetime.now(timezone.utc)
    credential = CredentialManager.create(
        source=DocumentSource.SLACK,
        credential_json={
            "slack_bot_token": bot_token,
        },
        user_performing_action=admin_user,
    )

    # Create connector with sync access and assign it to the user group
    connector = ConnectorManager.create(
        name="Slack",
        input_type=InputType.POLL,
        source=DocumentSource.SLACK,
        connector_specific_config={
            "channels": [private_channel["name"]],
            "include_bot_messages": True,
        },
        access_type=AccessType.SYNC,
        groups=[user_group.id],
        user_performing_action=admin_user,
    )

    cc_pair = CCPairManager.create(
        credential_id=credential.id,
        connector_id=connector.id,
        access_type=AccessType.SYNC,
        user_performing_action=admin_user,
        groups=[user_group.id],
    )

    # Add a test message to the private channel
    private_message = "This is a secret message: 987654"
    SlackManager.add_message_to_channel(
        slack_client=slack_client,
        channel=private_channel,
        message=private_message,
    )

    # Run indexing
    CCPairManager.run_once(
        cc_pair, from_beginning=True, user_performing_action=admin_user
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=admin_user,
    )

    # Run permission sync. Since initial_index_should_sync=True for Slack,
    # permissions were already set during indexing — no changes expected.
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=0,
        user_performing_action=admin_user,
        should_wait_for_group_sync=False,
        should_wait_for_vespa_sync=False,
    )

    # Verify admin can see the message
    admin_docs = DocumentSearchManager.search_documents(
        query="secret message",
        user_performing_action=admin_user,
    )
    assert private_message in admin_docs

    # Verify test_user_1 cannot see the message despite being in the group
    # (Slack permissions should take precedence)
    user_1_docs = DocumentSearchManager.search_documents(
        query="secret message",
        user_performing_action=test_user_1,
    )
    assert private_message not in user_1_docs


================================================
FILE: backend/tests/integration/connector_job_tests/slack/test_prune.py
================================================
import os
from datetime import datetime
from datetime import timezone

import pytest

from onyx.connectors.models import InputType
from onyx.connectors.slack.models import ChannelType
from onyx.db.enums import AccessType
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.document_search import (
    DocumentSearchManager,
)
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestConnector
from tests.integration.common_utils.test_models import DATestCredential
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture
from tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager


@pytest.mark.xfail(reason="flaky - see DAN-986 for details", strict=False)
def test_slack_prune(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,  # noqa: ARG001
    slack_test_setup: tuple[ChannelType, ChannelType],
) -> None:
    public_channel, private_channel = slack_test_setup

    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(
        email="admin@example.com",
    )

    # Creating a non-admin user
    test_user_1: DATestUser = UserManager.create(
        email="test_user_1@example.com",
    )

    slack_client = SlackManager.get_slack_client(os.environ["SLACK_BOT_TOKEN"])
    email_id_map = SlackManager.build_slack_user_email_id_map(slack_client)
    admin_user_id = email_id_map[admin_user.email]

    LLMProviderManager.create(user_performing_action=admin_user)

    before = datetime.now(timezone.utc)
    credential: DATestCredential = CredentialManager.create(
        source=DocumentSource.SLACK,
        credential_json={
            "slack_bot_token": os.environ["SLACK_BOT_TOKEN"],
        },
        user_performing_action=admin_user,
    )
    connector: DATestConnector = ConnectorManager.create(
        name="Slack",
        input_type=InputType.POLL,
        source=DocumentSource.SLACK,
        connector_specific_config={
            "channels": [public_channel["name"], private_channel["name"]],
        },
        access_type=AccessType.PUBLIC,
        groups=[],
        user_performing_action=admin_user,
    )
    cc_pair: DATestCCPair = CCPairManager.create(
        credential_id=credential.id,
        connector_id=connector.id,
        access_type=AccessType.SYNC,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=admin_user,
    )

    # ----------------------SETUP INITIAL SLACK STATE--------------------------
    # Add test_user_1 and admin_user to the private channel
    desired_channel_members = [admin_user, test_user_1]
    SlackManager.set_channel_members(
        slack_client=slack_client,
        admin_user_id=admin_user_id,
        channel=private_channel,
        user_ids=[email_id_map[user.email] for user in desired_channel_members],
    )

    public_message = "Steve's favorite number is 809752"
    private_message = "Sara's favorite number is 346794"
    message_to_delete = "Rebecca's favorite number is 753468"

    SlackManager.add_message_to_channel(
        slack_client=slack_client,
        channel=public_channel,
        message=public_message,
    )
    SlackManager.add_message_to_channel(
        slack_client=slack_client,
        channel=private_channel,
        message=private_message,
    )
    SlackManager.add_message_to_channel(
        slack_client=slack_client,
        channel=private_channel,
        message=message_to_delete,
    )

    # Run indexing
    before = datetime.now(timezone.utc)
    CCPairManager.run_once(
        cc_pair, from_beginning=True, user_performing_action=admin_user
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=admin_user,
    )

    # Run permission sync
    before = datetime.now(timezone.utc)
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=admin_user,
    )

    # ----------------------TEST THE SETUP--------------------------
    # Search as admin with access to both channels
    onyx_doc_message_strings = DocumentSearchManager.search_documents(
        query="favorite number",
        user_performing_action=admin_user,
    )
    print(
        "\ntop_documents content before deleting for admin: ",
        onyx_doc_message_strings,
    )

    # Ensure admin user can see all messages
    assert public_message in onyx_doc_message_strings
    assert private_message in onyx_doc_message_strings
    assert message_to_delete in onyx_doc_message_strings

    # Search as test_user_1 with access to both channels
    onyx_doc_message_strings = DocumentSearchManager.search_documents(
        query="favorite number",
        user_performing_action=test_user_1,
    )
    print(
        "\ntop_documents content before deleting for test_user_1: ",
        onyx_doc_message_strings,
    )

    # Ensure test_user_1 can see all messages
    assert public_message in onyx_doc_message_strings
    assert private_message in onyx_doc_message_strings
    assert message_to_delete in onyx_doc_message_strings

    # ----------------------MAKE THE CHANGES--------------------------
    # Delete messages
    print("\nDeleting message: ", message_to_delete)
    SlackManager.remove_message_from_channel(
        slack_client=slack_client,
        channel=private_channel,
        message=message_to_delete,
    )

    # Prune the cc_pair
    now = datetime.now(timezone.utc)
    CCPairManager.prune(cc_pair, user_performing_action=admin_user)
    CCPairManager.wait_for_prune(cc_pair, now, user_performing_action=admin_user)

    # ----------------------------VERIFY THE CHANGES---------------------------
    # Ensure admin user can't see deleted messages
    # Search as admin user with access to only the public channel
    onyx_doc_message_strings = DocumentSearchManager.search_documents(
        query="favorite number",
        user_performing_action=admin_user,
    )
    print(
        "\ntop_documents content after deleting for admin: ",
        onyx_doc_message_strings,
    )

    # Ensure admin can't see deleted messages
    assert public_message in onyx_doc_message_strings
    assert private_message in onyx_doc_message_strings
    assert message_to_delete not in onyx_doc_message_strings

    # Ensure test_user_1 can't see deleted messages
    # Search as test_user_1 with access to only the public channel
    onyx_doc_message_strings = DocumentSearchManager.search_documents(
        query="favorite number",
        user_performing_action=test_user_1,
    )
    print(
        "\ntop_documents content after prune for test_user_1: ",
        onyx_doc_message_strings,
    )

    # Ensure test_user_1 can't see deleted messages
    assert public_message in onyx_doc_message_strings
    assert private_message in onyx_doc_message_strings
    assert message_to_delete not in onyx_doc_message_strings


================================================
FILE: backend/tests/integration/mock_services/docker-compose.mock-it-services.yml
================================================
version: '3.8'

services:
  mock_connector_server:
    build:
      context: ./mock_connector_server
      dockerfile: Dockerfile
    ports:
      - "8001:8001"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - onyx_default
networks:
  onyx_default:
    name: onyx_default
    external: true


================================================
FILE: backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_api_key.py
================================================
import sys

import uvicorn
from fastapi import FastAPI
from fastapi.responses import PlainTextResponse
from fastmcp import FastMCP
from fastmcp.server.auth import StaticTokenVerifier


def make_many_tools(mcp: FastMCP) -> None:
    def make_tool(i: int) -> None:
        @mcp.tool(name=f"tool_{i}", description=f"Get secret value {i}")
        def tool_name(name: str) -> str:  # noqa: ARG001
            """Get secret value."""
            return f"Secret value {200 - i}!"

    for i in range(100):
        make_tool(i)


if __name__ == "__main__":
    # Accept only these tokens (treat them like API keys) and require a scope
    if len(sys.argv) > 1:
        api_key = sys.argv[1]
    else:
        api_key = "dev-api-key-123"

    if len(sys.argv) > 2:
        port = int(sys.argv[2])
    else:
        port = 8001

    auth = StaticTokenVerifier(
        tokens={
            api_key: {"client_id": "evan", "scopes": ["mcp:use"]},
        },
        required_scopes=["mcp:use"],
    )

    # Create FastMCP instance - it will handle /mcp path internally
    mcp = FastMCP("My HTTP MCP", auth=auth)
    make_many_tools(mcp)

    # Get the MCP HTTP app (configured to serve at /mcp)
    mcp_app = mcp.http_app()

    # Create wrapper FastAPI app with the MCP app's lifespan
    app = FastAPI(title="MCP API Key Test Server", lifespan=mcp_app.lifespan)

    # Health check (unprotected)
    @app.get("/healthz")
    def health() -> PlainTextResponse:
        return PlainTextResponse("ok")

    # Mount MCP app at root - it handles /mcp internally
    app.mount("/", mcp_app)

    # Run the server
    uvicorn.run(app, host="0.0.0.0", port=port)


================================================
FILE: backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_google_oauth.py
================================================
"""
MCP Test Server for Google OAuth Pass-Through Authentication

This server validates Google OAuth access tokens that are passed through from
Onyx. When users log into Onyx with Google OAuth, their access token is stored
and can be passed to MCP servers that require authentication.

This server validates those tokens by calling Google's tokeninfo endpoint.

Usage:
    python run_mcp_server_google_oauth.py [port]

Environment Variables:
    MCP_SERVER_HOST: Host to bind to (default: 127.0.0.1)
    MCP_SERVER_PUBLIC_HOST: Public hostname for the server
    MCP_SERVER_PUBLIC_URL: Public URL for the server (e.g., for proxied setups)
"""

import os
import sys
from typing import Any

import httpx
import uvicorn
from fastapi import FastAPI
from fastapi.responses import PlainTextResponse
from fastmcp import FastMCP
from fastmcp.server.auth import AccessToken
from fastmcp.server.auth import TokenVerifier
from fastmcp.server.dependencies import get_access_token

# Google's tokeninfo endpoint for validating access tokens
GOOGLE_TOKENINFO_URL = "https://oauth2.googleapis.com/tokeninfo"


class GoogleOAuthTokenVerifier(TokenVerifier):
    """
    Token verifier that validates Google OAuth access tokens.

    Google access tokens are opaque tokens (not JWTs), so they need to be
    validated by calling Google's tokeninfo endpoint. This verifier makes
    an HTTP request to Google to validate the token and extract user info.

    This is useful for testing pass-through OAuth scenarios where Onyx
    forwards the user's Google OAuth token to an MCP server.
    """

    def __init__(
        self,
        required_scopes: list[str] | None = None,
        base_url: str | None = None,
    ):
        """
        Initialize the Google OAuth token verifier.

        Args:
            required_scopes: Optional list of scopes that must be present in the token.
                            Google tokens have scopes like 'openid', 'email', 'profile'.
            base_url: URL of this resource server (for RFC 8707)
        """
        super().__init__(
            base_url=base_url,
            required_scopes=required_scopes,
        )
        self._http_client: httpx.AsyncClient | None = None

    async def _get_http_client(self) -> httpx.AsyncClient:
        """Get or create the HTTP client for token validation."""
        if self._http_client is None or self._http_client.is_closed:
            self._http_client = httpx.AsyncClient(timeout=10.0)
        return self._http_client

    async def verify_token(self, token: str) -> AccessToken | None:
        """
        Verify a Google OAuth access token by calling Google's tokeninfo endpoint.

        Args:
            token: The Google OAuth access token to validate

        Returns:
            AccessToken object if valid, None if invalid or expired
        """
        try:
            client = await self._get_http_client()

            # Call Google's tokeninfo endpoint
            response = await client.get(
                GOOGLE_TOKENINFO_URL,
                params={"access_token": token},
            )

            if response.status_code != 200:
                # Token is invalid or expired
                return None

            token_info = response.json()

            # Check if token has an error (Google returns 200 with error field sometimes)
            if "error" in token_info:
                return None

            # Extract scopes from the token
            scopes_str = token_info.get("scope", "")
            scopes = scopes_str.split() if scopes_str else []

            # Check required scopes if configured
            if self.required_scopes:
                token_scopes = set(scopes)
                required = set(self.required_scopes)
                if not required.issubset(token_scopes):
                    return None

            # Extract client/user ID - prefer email for user identification
            client_id = (
                token_info.get("email")
                or token_info.get("sub")
                or token_info.get("user_id")
                or "unknown"
            )

            # Extract expiration time
            expires_in = token_info.get("expires_in")
            expires_at = None
            if expires_in:
                import time

                expires_at = int(time.time()) + int(expires_in)

            return AccessToken(
                token=token,
                client_id=client_id,
                scopes=scopes,
                expires_at=expires_at,
                claims=token_info,
            )

        except httpx.HTTPError:
            # Network error or timeout
            return None
        except Exception:
            # Any other error during validation
            return None

    async def close(self) -> None:
        """Close the HTTP client."""
        if self._http_client and not self._http_client.is_closed:
            await self._http_client.aclose()


def make_tools(mcp: FastMCP) -> None:
    """Create test tools for the MCP server."""

    @mcp.tool(name="echo", description="Echo back the input message")
    def echo(message: str) -> str:
        """Echo the message back to the caller."""
        return f"You said: {message}"

    @mcp.tool(name="get_secret", description="Get a secret value (requires auth)")
    def get_secret(secret_name: str) -> str:
        """Get a secret value. This proves the token was validated."""
        return f"Secret value for '{secret_name}': super-secret-value-12345"

    @mcp.tool(name="whoami", description="Get information about the authenticated user")
    async def whoami() -> dict[str, Any]:
        """Get information about the authenticated user from their Google token."""
        tok = get_access_token()
        if not tok:
            return {"error": "Not authenticated"}

        return {
            "client_id": tok.client_id,
            "scopes": tok.scopes,
            "email": tok.claims.get("email"),
            "email_verified": tok.claims.get("email_verified"),
            "expires_in": tok.claims.get("expires_in"),
            "access_type": tok.claims.get("access_type"),
        }

    for i in range(5):

        @mcp.tool(name=f"oauth_tool_{i}", description=f"Test tool number {i}")
        def numbered_tool(name: str, _i: int = i) -> str:
            """A numbered test tool."""
            return f"Tool {_i} says hello to {name}!"


if __name__ == "__main__":
    port = int(sys.argv[1] if len(sys.argv) > 1 else "8006")

    # Get configuration from environment
    bind_host = os.getenv("MCP_SERVER_HOST", "127.0.0.1")
    public_host = os.getenv("MCP_SERVER_PUBLIC_HOST", bind_host)
    public_url = os.getenv("MCP_SERVER_PUBLIC_URL")

    # Optional: require specific scopes (Google tokens have scopes like 'email', 'profile')
    # Leave empty to accept any valid Google token
    required_scopes_str = os.getenv("MCP_GOOGLE_REQUIRED_SCOPES", "")
    required_scopes = (
        required_scopes_str.split(",") if required_scopes_str.strip() else None
    )

    print(f"Starting Google OAuth MCP Test Server on port {port}")
    print(f"Bind host: {bind_host}")
    print(f"Public host: {public_host}")
    if public_url:
        print(f"Public URL: {public_url}")
    if required_scopes:
        print(f"Required scopes: {required_scopes}")
    else:
        print("No specific scopes required - any valid Google token accepted")

    # Create the auth verifier
    auth = GoogleOAuthTokenVerifier(required_scopes=required_scopes)

    # Create FastMCP instance with auth
    mcp = FastMCP("Google OAuth Test MCP Server", auth=auth)
    make_tools(mcp)

    # Get the MCP HTTP app
    mcp_app = mcp.http_app()

    # Create wrapper FastAPI app
    app = FastAPI(
        title="MCP Google OAuth Test Server",
        description="MCP server that authenticates using Google OAuth tokens passed through from Onyx",
        lifespan=mcp_app.lifespan,
    )

    # Health check (unprotected)
    @app.get("/healthz")
    def health() -> PlainTextResponse:
        return PlainTextResponse("ok")

    # Info endpoint (unprotected) - useful for debugging
    @app.get("/info")
    def info() -> dict[str, Any]:
        return {
            "server": "Google OAuth MCP Test Server",
            "auth_type": "google_oauth_pass_through",
            "description": "Validates Google OAuth tokens passed from Onyx",
            "tokeninfo_endpoint": GOOGLE_TOKENINFO_URL,
            "required_scopes": required_scopes,
        }

    # Mount MCP app at root
    app.mount("/", mcp_app)

    # Run the server
    uvicorn.run(app, host=bind_host, port=port)


================================================
FILE: backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_no_auth.py
================================================
import os
import sys

from fastmcp import FastMCP

mcp = FastMCP("My HTTP MCP")


@mcp.tool
def hello(name: str) -> str:
    """Say hi."""
    return f"Hello, {name}!"


def make_many_tools() -> None:
    def make_tool(i: int) -> None:
        @mcp.tool(name=f"tool_{i}", description=f"Get secret value {i}")
        def tool_name(name: str) -> str:  # noqa: ARG001
            """Get secret value."""
            return f"Secret value {100 - i}!"

    for i in range(100):
        make_tool(i)


if __name__ == "__main__":
    # Get port from command-line argument first (passed by test)
    port_from_arg = int(sys.argv[1]) if len(sys.argv) > 1 else None
    # Streamable HTTP transport (recommended)
    make_many_tools()
    host = os.getenv("MCP_SERVER_BIND_HOST", "0.0.0.0")
    # Use MOCK_MCP_SERVER_PORT to avoid conflicts with the real Onyx MCP server port (8090)
    # Priority: command-line arg > MOCK_MCP_SERVER_PORT > MCP_SERVER_PORT > default 8000
    if port_from_arg is not None:
        port = port_from_arg
    else:
        port = int(
            os.getenv("MOCK_MCP_SERVER_PORT") or os.getenv("MCP_SERVER_PORT") or "8000"
        )
    path = os.getenv("MCP_SERVER_PATH", "/mcp")
    mcp.run(transport="http", host=host, port=port, path=path)


================================================
FILE: backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_oauth.py
================================================
import os
from collections.abc import Awaitable
from collections.abc import Callable
from collections.abc import Iterable
from typing import Any
from urllib.parse import urlsplit
from urllib.parse import urlunsplit

import uvicorn
from fastapi import FastAPI
from fastapi import Request
from fastapi.responses import JSONResponse
from fastapi.responses import PlainTextResponse
from fastapi.responses import Response
from fastmcp import FastMCP
from fastmcp.server.auth.providers.jwt import JWTVerifier
from fastmcp.server.dependencies import get_access_token
from starlette.middleware.base import BaseHTTPMiddleware

# uncomment for debug logs
# logging.basicConfig(level=logging.DEBUG)

"""
Setup Okta:
1. Create an authorization Server (Admin Console → Security →
API → Authorization Servers), and get the Issuer, JWKS uri,
audience (i.e. api://mcp). Add the mcp:use scope.
Grant types should be Authorization Code and Refresh Token.
policy should allow your client (or All Clients) to grant oidc default scopes + mcp:use
WARNING: due to the order of discovery urls, you actually need to use the default authorization server
until Okta updates where their discovery urls are or the client library stops trying
to go to <base_url>/.well-known/oauth-authorization-server before trying the fallback

2. Create a client (Admin Console → Applications → Create App Integration)
Enable authorization code and store the client id and secret.
"""


def make_many_tools(mcp: FastMCP) -> None:
    def make_tool(i: int) -> None:
        @mcp.tool(name=f"tool_{i}", description=f"Get secret value {i}")
        def tool_name(name: str) -> str:  # noqa: ARG001
            """Get secret value."""
            return f"Secret value {500 - i}!"

    for i in range(100):
        make_tool(i)

    @mcp.tool
    async def whoami() -> dict[str, Any]:
        tok = get_access_token()  # None if unauthenticated
        return {
            "client_id": tok.client_id if tok else None,
            "scopes": tok.scopes if tok else [],
            "claims": tok.claims if tok else {},
        }


# ---------- FASTAPI APP ----------


def init_app(
    app: FastAPI,
    mcp_resource_url: str,
    authorization_servers: list[str],
    scopes_supported: list[str],
) -> None:
    # 1) Protected Resource Metadata (RFC 9728) at well-known URL.
    #    We accept both with and without the trailing resource suffix to be lenient in dev.
    @app.get("/.well-known/oauth-protected-resource")
    @app.get("/.well-known/oauth-protected-resource/{_suffix:path}")
    def oauth_protected_resource(_suffix: str = "") -> JSONResponse:
        """
        Return PRM document. The 'resource' MUST equal the MCP resource identifier (the URL clients use),
        and should be validated by clients per RFC 9728 §3.3.
        """
        return JSONResponse(
            {
                "resource": mcp_resource_url,
                "authorization_servers": authorization_servers,
                "bearer_methods_supported": ["header"],
                "scopes_supported": scopes_supported,
                # (Optional extras: jwks_uri, resource_signing_alg_values_supported, etc.)
            }
        )

    # Health check (unprotected)
    @app.get("/healthz")
    def health() -> PlainTextResponse:
        return PlainTextResponse("ok")


def metadata_url_for_resource(resource_url: str) -> str:
    """
    RFC 9728: insert '/.well-known/oauth-protected-resource' between host and path.
    If the resource has a path (e.g., '/mcp'), append it after the well-known suffix.
    """
    u = urlsplit(resource_url)
    path = u.path.lstrip("/")
    suffix = "/.well-known/oauth-protected-resource"
    if path:
        suffix += f"/{path}"
    return urlunsplit((u.scheme, u.netloc, suffix, "", ""))


PRM_URL = "replace me"


# 2) Middleware that ensures 401s include a proper WWW-Authenticate challenge
#    pointing clients to our PRM URL (RFC 9728 §5.1), and includes RFC 6750 error info.
class WWWAuthenticateMiddleware(BaseHTTPMiddleware):
    def __init__(self, app: FastAPI, protected_prefixes: Iterable[str]) -> None:
        super().__init__(app)
        self.protected_prefixes = tuple(protected_prefixes)

    async def dispatch(
        self, request: Request, call_next: Callable[[Request], Awaitable[Response]]
    ) -> Response:
        # Only guard MCP endpoints (both Streamable HTTP and SSE)
        if not request.url.path.startswith(self.protected_prefixes):
            return await call_next(request)

        # Let FastMCP/verifier run first
        response = await call_next(request)

        # If unauthenticated or invalid token, attach RFC-compliant challenge header
        if response.status_code == 401:
            # RFC 9728: include resource_metadata param pointing to PRM URL.
            # RFC 6750: include error + error_description when appropriate.
            challenge = f'Bearer resource_metadata="{PRM_URL}", error="invalid_token", error_description="Authentication required"'
            # Don't clobber if already present; append or set.
            if "www-authenticate" in response.headers:
                response.headers["www-authenticate"] += ", " + challenge
            else:
                response.headers["www-authenticate"] = challenge
            # Helpful cache headers
            response.headers.setdefault("cache-control", "no-store")
            response.headers.setdefault("pragma", "no-cache")
        return response


if __name__ == "__main__":
    import sys

    port = int(sys.argv[1] if len(sys.argv) > 1 else "8004")

    audience = os.getenv("MCP_OAUTH_AUDIENCE", "api://mcp")
    issuer = os.getenv(
        "MCP_OAUTH_ISSUER",
        "https://test-domain.okta.com/oauth2/default?well_known_override=https://test-domain.okta.com/oauth2/<as_id>/.well-known/oauth-authorization-server",
    )  # NOTE: the mcp client library currently tries the root discovery url before
    # falling back to the one actually used by Okta. Our client code lets you specify this well_known_override
    # for Okta and other Idps that use these discovery urls.

    # issuer = os.getenv("MCP_OAUTH_ISSUER", "https://test-domain.okta.com/.well-known/oauth-authorization-server?issuer=https://test-domain.okta.com/oauth2/<auth_server_id>")
    jwks_uri = os.getenv(
        "MCP_OAUTH_JWKS_URI", "https://test-domain.okta.com/oauth2/default/v1/keys"
    )
    required_scopes = os.getenv("MCP_OAUTH_REQUIRED_SCOPES", "mcp:use")
    print(f"Required scopes: {required_scopes}")
    print(f"Audience: {audience}")
    print(f"Issuer: {issuer}")
    print(f"JWKS URI: {jwks_uri}")

    verifier = JWTVerifier(
        issuer=issuer.split("?")[0],  # ignore auth url override if present
        audience=audience,  # exactly what you set on the AS
        jwks_uri=jwks_uri,
        required_scopes=required_scopes.split(
            ","
        ),  # must be present in the token's `scp`
    )

    bind_host = os.getenv("MCP_SERVER_HOST", "127.0.0.1")
    public_host = os.getenv("MCP_SERVER_PUBLIC_HOST", bind_host)
    public_url = os.getenv("MCP_SERVER_PUBLIC_URL")

    mcp = FastMCP("My HTTP MCP", auth=verifier)
    make_many_tools(mcp)
    mcp_app = mcp.http_app()

    app = FastAPI(title="MCP over HTTP/SSE with OAuth", lifespan=mcp_app.lifespan)

    if public_url:
        normalized_public_url = public_url.rstrip("/")
        if not normalized_public_url.endswith("/mcp"):
            normalized_public_url = f"{normalized_public_url}/mcp"
        mcp_resource_url = f"{normalized_public_url}/"
    else:
        mcp_resource_url = f"http://{public_host}:{port}/mcp/"
    authorization_servers = [issuer]
    scopes_supported = ["mcp:use"]

    init_app(app, mcp_resource_url, authorization_servers, scopes_supported)
    PRM_URL = metadata_url_for_resource(mcp_resource_url)
    print(f"PRM URL: {PRM_URL}")
    print(f"MCP Resource URL: {mcp_resource_url}")
    print(f"Authorization Servers: {authorization_servers}")
    print(f"Scopes Supported: {scopes_supported}")

    # Apply middleware at the parent app so it wraps mounted sub-apps too
    app.add_middleware(WWWAuthenticateMiddleware, protected_prefixes=["/mcp", "/sse"])

    # 3) Mount MCP apps
    # Streamable HTTP transport (recommended for modern MCP clients)
    app.mount("/", mcp_app)
    # SSE transport (some clients still use this)
    # app.mount("/sse", mcp.sse_app()) # TODO: v2

    uvicorn.run(app, host=bind_host, port=port)


================================================
FILE: backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_per_user_key.py
================================================
import sys
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import Dict
from typing import Optional

import bcrypt
from fastmcp import FastMCP
from fastmcp.server.auth.auth import AccessToken
from fastmcp.server.auth.auth import TokenVerifier
from fastmcp.server.dependencies import get_access_token

# pip install fastmcp bcrypt


# ---- pretend database --------------------------------------------------------
# Keys look like: "mcp_live_<key_id>_<secret>"
def _hash(secret: str) -> bytes:
    return bcrypt.hashpw(secret.encode(), bcrypt.gensalt(rounds=12))


API_KEY_RECORDS: Dict[str, Dict[str, Any]] = {
    # key_id -> record
    "kid_alice_001": {
        "user_id": "alice",
        "hashed_secret": _hash("S3cr3tAlice"),
        "scopes": ["mcp:use"],
        "revoked": False,
        "expires_at": None,  # or datetime(...)
        "metadata": {"plan": "pro"},
    },
    "kid_bob_001": {
        "user_id": "bob",
        "hashed_secret": _hash("S3cr3tBob"),
        "scopes": ["mcp:use"],
        "revoked": False,
        "expires_at": None,
        "metadata": {"plan": "free"},
    },
}

# These are inferrable from the file anyways, no need to obfuscate.
# use them to test your auth with this server
#
# mcp_live-kid_alice_001-S3cr3tAlice
# mcp_live-kid_bob_001-S3cr3tBob


# ---- verifier ---------------------------------------------------------------
class ApiKeyVerifier(TokenVerifier):
    """
    Accepts API keys in Authorization: Bearer mcp_live_<key_id>_<secret>
    Looks up <key_id> in storage, bcrypt-verifies <secret>, returns AccessToken.
    """

    def __init__(self, api_key_dict: dict[str, Any]):
        super().__init__()
        self.api_key_dict = api_key_dict

    async def verify_token(self, token: str) -> Optional[AccessToken]:
        # print(f"Verifying token: {token}")
        try:
            prefix, key_id, secret = token.split("-")
            # print(f"Prefix: {prefix}, Key ID: {key_id}, Secret: {secret}")
            if prefix not in ("mcp_live", "mcp_test"):
                return None
        except ValueError:
            return None

        rec = self.api_key_dict.get(key_id)
        if not rec or rec.get("revoked"):
            return None
        if rec.get("expires_at") and rec["expires_at"] < datetime.now(timezone.utc):
            return None

        # constant-time bcrypt verification
        if not bcrypt.checkpw(secret.encode(), rec["hashed_secret"]):
            return None

        # Build an AccessToken with claims FastMCP can pass to your tools
        return AccessToken(
            token=token,
            client_id=rec["user_id"],
            scopes=rec.get("scopes", []),
            expires_at=rec.get("expires_at"),
            resource=None,
            claims={"key_id": key_id, **rec.get("metadata", {})},
        )


# ---- server -----------------------------------------------------------------


def make_many_tools(mcp: FastMCP) -> None:
    def make_tool(i: int) -> None:
        @mcp.tool(name=f"tool_{i}", description=f"Get secret value {i}")
        def tool_name(name: str) -> str:  # noqa: ARG001
            """Get secret value."""
            return f"Secret value {400 - i}!"

    for i in range(100):
        make_tool(i)


if __name__ == "__main__":
    if len(sys.argv) > 1:
        port = int(sys.argv[1])
    else:
        port = 8003

    mcp = FastMCP("My HTTP MCP", auth=ApiKeyVerifier(API_KEY_RECORDS))

    @mcp.tool
    def whoami() -> dict:
        """Return authenticated identity info (for demo)."""
        # FastMCP exposes the verified AccessToken to tools; see docs for helpers
        tok = get_access_token()
        return {
            "user": tok.client_id if tok else None,
            "scopes": tok.scopes if tok else [],
        }

    make_many_tools(mcp)
    mcp.run(transport="http", host="127.0.0.1", port=port, path="/mcp")


================================================
FILE: backend/tests/integration/mock_services/mock_connector_server/Dockerfile
================================================
FROM python:3.11.7-slim-bookworm

WORKDIR /app

RUN pip install --no-cache-dir "pydantic-core>=2.28.0" fastapi uvicorn

COPY ./main.py /app/main.py

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8001"]


================================================
FILE: backend/tests/integration/mock_services/mock_connector_server/main.py
================================================
from fastapi import FastAPI
from fastapi import HTTPException
from pydantic import BaseModel
from pydantic import Field

# We would like to import these, but it makes building this so much harder/slower
# from onyx.connectors.mock_connector.connector import SingleConnectorYield
# from onyx.connectors.models import ConnectorCheckpoint

app = FastAPI()


# Global state to store connector behavior configuration
class ConnectorBehavior(BaseModel):
    connector_yields: list[dict] = Field(
        default_factory=list
    )  # really list[SingleConnectorYield]
    called_with_checkpoints: list[dict] = Field(
        default_factory=list
    )  # really list[ConnectorCheckpoint]


current_behavior: ConnectorBehavior = ConnectorBehavior()


@app.post("/set-behavior")
async def set_behavior(behavior: list[dict]) -> None:
    """Set the behavior for the next connector run"""
    global current_behavior
    current_behavior = ConnectorBehavior(connector_yields=behavior)


@app.get("/get-documents")
async def get_documents() -> list[dict]:
    """Get the next batch of documents and update the checkpoint"""
    global current_behavior

    if not current_behavior.connector_yields:
        raise HTTPException(
            status_code=400, detail="No documents or failures configured"
        )

    connector_yields = current_behavior.connector_yields

    # Clear the current behavior after returning it
    current_behavior = ConnectorBehavior()

    return connector_yields


@app.post("/add-checkpoint")
async def add_checkpoint(checkpoint: dict) -> None:
    """Add a checkpoint to the list of checkpoints. Called by the MockConnector."""
    global current_behavior
    current_behavior.called_with_checkpoints.append(checkpoint)


@app.get("/get-checkpoints")
async def get_checkpoints() -> list[dict]:
    """Get the list of checkpoints. Used by the test to verify the
    proper checkpoint ordering."""
    global current_behavior
    return current_behavior.called_with_checkpoints


@app.post("/reset")
async def reset() -> None:
    """Reset the connector behavior to default"""
    global current_behavior
    current_behavior = ConnectorBehavior()


@app.get("/health")
async def health_check() -> dict[str, str]:
    """Health check endpoint"""
    return {"status": "healthy"}


================================================
FILE: backend/tests/integration/multitenant_tests/discord_bot/test_discord_bot_multitenant.py
================================================
"""Multi-tenant isolation tests for Discord bot.

These tests ensure tenant isolation and prevent data leakage between tenants.
Tests follow the multi-tenant integration test pattern using API requests.
"""

from unittest.mock import patch
from uuid import uuid4

import pytest
import requests

from onyx.configs.constants import AuthType
from onyx.db.discord_bot import get_guild_config_by_registration_key
from onyx.db.discord_bot import register_guild
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.models import UserRole
from onyx.onyxbot.discord.cache import DiscordCacheManager
from onyx.server.manage.discord_bot.utils import generate_discord_registration_key
from onyx.server.manage.discord_bot.utils import parse_discord_registration_key
from onyx.server.manage.discord_bot.utils import REGISTRATION_KEY_PREFIX
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


class TestBotConfigIsolationCloudMode:
    """Tests for bot config isolation in cloud mode."""

    def test_cannot_create_bot_config_in_cloud_mode(self) -> None:
        """Bot config creation is blocked in cloud mode."""
        with patch("onyx.configs.app_configs.AUTH_TYPE", AuthType.CLOUD):
            from fastapi import HTTPException

            from onyx.server.manage.discord_bot.api import _check_bot_config_api_access

            with pytest.raises(HTTPException) as exc_info:
                _check_bot_config_api_access()

            assert exc_info.value.status_code == 403
            assert "Cloud" in str(exc_info.value.detail)

    def test_bot_token_from_env_only_in_cloud(self) -> None:
        """Bot token comes from env var in cloud mode, ignores DB."""
        from onyx.onyxbot.discord.utils import get_bot_token

        with (
            patch("onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN", "env_token"),
            patch("onyx.onyxbot.discord.utils.AUTH_TYPE", AuthType.CLOUD),
        ):
            result = get_bot_token()

        assert result == "env_token"


class TestGuildRegistrationIsolation:
    """Tests for guild registration isolation between tenants."""

    def test_guild_can_only_register_to_one_tenant(self) -> None:
        """Guild registered to tenant 1 cannot be registered to tenant 2."""
        cache = DiscordCacheManager()

        # Register guild to tenant 1
        cache._guild_tenants[123456789] = "tenant1"

        # Check if guild is already registered
        existing = cache.get_tenant(123456789)

        assert existing is not None
        assert existing == "tenant1"

    def test_registration_key_tenant_mismatch(self) -> None:
        """Key created in tenant 1 cannot be used in tenant 2 context."""
        key = generate_discord_registration_key("tenant1")

        # Parse the key to get tenant
        parsed_tenant = parse_discord_registration_key(key)

        assert parsed_tenant == "tenant1"
        assert parsed_tenant != "tenant2"

    def test_registration_key_encodes_correct_tenant(self) -> None:
        """Key format discord_<tenant_id>.<token> encodes correct tenant."""
        tenant_id = "my_tenant_123"
        key = generate_discord_registration_key(tenant_id)

        assert key.startswith(REGISTRATION_KEY_PREFIX)
        assert "my_tenant_123" in key or "my%5Ftenant%5F123" in key

        parsed = parse_discord_registration_key(key)
        assert parsed == tenant_id


class TestGuildDataIsolation:
    """Tests for guild data isolation between tenants via API."""

    def test_tenant_cannot_see_other_tenant_guilds(
        self,
        reset_multitenant: None,  # noqa: ARG002
    ) -> None:
        """Guilds created in tenant 1 are not visible from tenant 2.

        Creates guilds via API in tenant 1, then queries from tenant 2
        context to verify the guilds are not visible.
        """
        unique = uuid4().hex

        # Create admin user for tenant 1
        admin_user1: DATestUser = UserManager.create(
            email=f"discord_admin1_{unique}@example.com",
        )
        assert UserManager.is_role(admin_user1, UserRole.ADMIN)

        # Create admin user for tenant 2
        admin_user2: DATestUser = UserManager.create(
            email=f"discord_admin2_{unique}@example.com",
        )
        assert UserManager.is_role(admin_user2, UserRole.ADMIN)

        # Create a guild registration key in tenant 1
        response1 = requests.post(
            f"{API_SERVER_URL}/manage/admin/discord-bot/guilds",
            headers=admin_user1.headers,
        )

        # If Discord bot feature is not enabled, skip the test
        if response1.status_code == 404:
            pytest.skip("Discord bot feature not enabled")

        assert response1.ok, f"Failed to create guild in tenant 1: {response1.text}"
        guild1_data = response1.json()
        guild1_id = guild1_data["id"]

        try:
            # List guilds from tenant 1 - should see the guild
            list_response1 = requests.get(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds",
                headers=admin_user1.headers,
            )
            assert list_response1.ok
            tenant1_guilds = list_response1.json()
            tenant1_guild_ids = [g["id"] for g in tenant1_guilds]
            assert guild1_id in tenant1_guild_ids

            # List guilds from tenant 2 - should NOT see tenant 1's guild
            list_response2 = requests.get(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds",
                headers=admin_user2.headers,
            )
            assert list_response2.ok
            tenant2_guilds = list_response2.json()
            tenant2_guild_ids = [g["id"] for g in tenant2_guilds]
            assert guild1_id not in tenant2_guild_ids

        finally:
            # Cleanup - delete guild from tenant 1
            requests.delete(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}",
                headers=admin_user1.headers,
            )

    def test_guild_list_returns_only_own_tenant(
        self,
        reset_multitenant: None,  # noqa: ARG002
    ) -> None:
        """List guilds returns exactly the guilds for that tenant.

        Creates 1 guild in each tenant, registers them with different data,
        and verifies each tenant only sees their own guild.
        """
        unique = uuid4().hex

        # Create admin users for two tenants
        admin_user1: DATestUser = UserManager.create(
            email=f"discord_list1_{unique}@example.com",
        )
        admin_user2: DATestUser = UserManager.create(
            email=f"discord_list2_{unique}@example.com",
        )

        # Create 1 guild in tenant 1
        response1 = requests.post(
            f"{API_SERVER_URL}/manage/admin/discord-bot/guilds",
            headers=admin_user1.headers,
        )
        if response1.status_code == 404:
            pytest.skip("Discord bot feature not enabled")
        assert response1.ok, f"Failed to create guild in tenant 1: {response1.text}"
        guild1_data = response1.json()
        guild1_id = guild1_data["id"]
        registration_key1 = guild1_data["registration_key"]
        tenant1_id = parse_discord_registration_key(registration_key1)
        assert (
            tenant1_id is not None
        ), "Failed to parse tenant ID from registration key 1"

        # Create 1 guild in tenant 2
        response2 = requests.post(
            f"{API_SERVER_URL}/manage/admin/discord-bot/guilds",
            headers=admin_user2.headers,
        )
        assert response2.ok, f"Failed to create guild in tenant 2: {response2.text}"
        guild2_data = response2.json()
        guild2_id = guild2_data["id"]
        registration_key2 = guild2_data["registration_key"]
        tenant2_id = parse_discord_registration_key(registration_key2)
        assert (
            tenant2_id is not None
        ), "Failed to parse tenant ID from registration key 2"

        # Verify tenant IDs are different
        assert (
            tenant1_id != tenant2_id
        ), "Tenant 1 and tenant 2 should have different tenant IDs"

        # Register guild 1 with tenant 1's context - populate with different data
        with get_session_with_tenant(tenant_id=tenant1_id) as db_session:
            config1 = get_guild_config_by_registration_key(
                db_session, registration_key1
            )
            assert config1 is not None, "Guild config 1 should exist"
            register_guild(
                db_session=db_session,
                config=config1,
                guild_id=111111111111111111,  # Different Discord guild ID
                guild_name="Tenant 1 Server",  # Different guild name
            )
            db_session.commit()

        # Register guild 2 with tenant 2's context - populate with different data
        with get_session_with_tenant(tenant_id=tenant2_id) as db_session:
            config2 = get_guild_config_by_registration_key(
                db_session, registration_key2
            )
            assert config2 is not None, "Guild config 2 should exist"
            register_guild(
                db_session=db_session,
                config=config2,
                guild_id=222222222222222222,  # Different Discord guild ID
                guild_name="Tenant 2 Server",  # Different guild name
            )
            db_session.commit()

        try:
            # Verify tenant 1 sees only their guild
            list_response1 = requests.get(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds",
                headers=admin_user1.headers,
            )
            assert list_response1.ok
            tenant1_guilds = list_response1.json()

            # Tenant 1 should see exactly 1 guild
            assert (
                len(tenant1_guilds) == 1
            ), f"Tenant 1 should see 1 guild, got {len(tenant1_guilds)}"

            # Verify tenant 1's guild has the correct data
            tenant1_guild = tenant1_guilds[0]
            assert (
                tenant1_guild["id"] == guild1_id
            ), "Tenant 1 should see their own guild"
            assert (
                tenant1_guild["guild_id"] == 111111111111111111
            ), f"Tenant 1's guild should have guild_id 111111111111111111, got {tenant1_guild['guild_id']}"
            assert (
                tenant1_guild["guild_name"] == "Tenant 1 Server"
            ), f"Tenant 1's guild should have name 'Tenant 1 Server', got {tenant1_guild['guild_name']}"
            assert (
                tenant1_guild["registered_at"] is not None
            ), "Tenant 1's guild should be registered"

            # Tenant 1 should NOT see tenant 2's guild
            assert (
                tenant1_guild["guild_id"] != 222222222222222222
            ), "Tenant 1 should not see tenant 2's guild_id"
            assert (
                tenant1_guild["guild_name"] != "Tenant 2 Server"
            ), "Tenant 1 should not see tenant 2's guild_name"

            # Verify tenant 2 sees only their guild
            list_response2 = requests.get(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds",
                headers=admin_user2.headers,
            )
            assert list_response2.ok
            tenant2_guilds = list_response2.json()

            # Tenant 2 should see exactly 1 guild
            assert (
                len(tenant2_guilds) == 1
            ), f"Tenant 2 should see 1 guild, got {len(tenant2_guilds)}"

            # Verify tenant 2's guild has the correct data
            tenant2_guild = tenant2_guilds[0]
            assert (
                tenant2_guild["id"] == guild2_id
            ), "Tenant 2 should see their own guild"
            assert (
                tenant2_guild["guild_id"] == 222222222222222222
            ), f"Tenant 2's guild should have guild_id 222222222222222222, got {tenant2_guild['guild_id']}"
            assert (
                tenant2_guild["guild_name"] == "Tenant 2 Server"
            ), f"Tenant 2's guild should have name 'Tenant 2 Server', got {tenant2_guild['guild_name']}"
            assert (
                tenant2_guild["registered_at"] is not None
            ), "Tenant 2's guild should be registered"

            # Tenant 2 should NOT see tenant 1's guild
            assert (
                tenant2_guild["guild_id"] != 111111111111111111
            ), "Tenant 2 should not see tenant 1's guild_id"
            assert (
                tenant2_guild["guild_name"] != "Tenant 1 Server"
            ), "Tenant 2 should not see tenant 1's guild_name"

            # Verify the guilds are different (different data)
            assert (
                tenant1_guild["guild_id"] != tenant2_guild["guild_id"]
            ), "Guilds should have different Discord guild IDs"
            assert (
                tenant1_guild["guild_name"] != tenant2_guild["guild_name"]
            ), "Guilds should have different names"

        finally:
            # Cleanup
            requests.delete(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}",
                headers=admin_user1.headers,
            )
            requests.delete(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild2_id}",
                headers=admin_user2.headers,
            )


class TestGuildAccessIsolation:
    """Tests for guild access isolation between tenants."""

    def test_tenant_cannot_access_other_tenant_guild(
        self,
        reset_multitenant: None,  # noqa: ARG002
    ) -> None:
        """Tenant 2 cannot access or modify tenant 1's guild by ID.

        Creates a guild in tenant 1, then attempts to access it from tenant 2.
        """
        unique = uuid4().hex

        # Create admin users for two tenants
        admin_user1: DATestUser = UserManager.create(
            email=f"discord_access1_{unique}@example.com",
        )
        admin_user2: DATestUser = UserManager.create(
            email=f"discord_access2_{unique}@example.com",
        )

        # Create a guild in tenant 1
        response = requests.post(
            f"{API_SERVER_URL}/manage/admin/discord-bot/guilds",
            headers=admin_user1.headers,
        )
        if response.status_code == 404:
            pytest.skip("Discord bot feature not enabled")
        assert response.ok
        guild1_id = response.json()["id"]

        try:
            # Tenant 2 tries to get the guild - should fail (404 or 403)
            get_response = requests.get(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}",
                headers=admin_user2.headers,
            )
            # Should either return 404 (not found) or 403 (forbidden)
            assert get_response.status_code in [
                403,
                404,
            ], f"Expected 403 or 404, got {get_response.status_code}"

            # Tenant 2 tries to delete the guild - should fail
            delete_response = requests.delete(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}",
                headers=admin_user2.headers,
            )
            assert delete_response.status_code in [403, 404]

        finally:
            # Cleanup - delete from tenant 1
            requests.delete(
                f"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}",
                headers=admin_user1.headers,
            )


class TestCacheManagerIsolation:
    """Tests for cache manager tenant isolation."""

    def test_cache_maps_guild_to_correct_tenant(self) -> None:
        """Cache correctly maps guild_id to tenant_id."""
        cache = DiscordCacheManager()

        # Set up mappings
        cache._guild_tenants[111] = "tenant1"
        cache._guild_tenants[222] = "tenant2"
        cache._guild_tenants[333] = "tenant1"

        assert cache.get_tenant(111) == "tenant1"
        assert cache.get_tenant(222) == "tenant2"
        assert cache.get_tenant(333) == "tenant1"
        assert cache.get_tenant(444) is None

    def test_api_key_per_tenant_isolation(self) -> None:
        """Each tenant has unique API key."""
        cache = DiscordCacheManager()

        cache._api_keys["tenant1"] = "key_for_tenant1"
        cache._api_keys["tenant2"] = "key_for_tenant2"

        assert cache.get_api_key("tenant1") == "key_for_tenant1"
        assert cache.get_api_key("tenant2") == "key_for_tenant2"
        assert cache.get_api_key("tenant1") != cache.get_api_key("tenant2")


class TestAPIRequestIsolation:
    """Tests for API request isolation between tenants."""

    @pytest.mark.asyncio
    async def test_discord_bot_uses_tenant_specific_api_key(self) -> None:
        """Message from guild in tenant 1 uses tenant 1's API key."""
        cache = DiscordCacheManager()
        cache._guild_tenants[123456] = "tenant1"
        cache._api_keys["tenant1"] = "tenant1_api_key"
        cache._api_keys["tenant2"] = "tenant2_api_key"

        # When processing message from guild 123456
        tenant = cache.get_tenant(123456)
        assert tenant is not None
        api_key = cache.get_api_key(tenant)

        assert tenant == "tenant1"
        assert api_key == "tenant1_api_key"
        assert api_key != "tenant2_api_key"

    @pytest.mark.asyncio
    async def test_guild_message_routes_to_correct_tenant(self) -> None:
        """Message from registered guild routes to correct tenant context."""
        cache = DiscordCacheManager()
        cache._guild_tenants[999] = "target_tenant"
        cache._api_keys["target_tenant"] = "target_key"

        # Simulate message routing
        guild_id = 999
        tenant = cache.get_tenant(guild_id)
        api_key = cache.get_api_key(tenant) if tenant else None

        assert tenant == "target_tenant"
        assert api_key == "target_key"


================================================
FILE: backend/tests/integration/multitenant_tests/invitation/test_user_invitation.py
================================================
from uuid import uuid4

from onyx.db.models import UserRole
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser

INVITED_BASIC_USER = "basic_user"
INVITED_BASIC_USER_EMAIL = "basic_user@example.com"


def test_admin_can_invite_users(reset_multitenant: None) -> None:  # noqa: ARG001
    """Test that an admin can invite both registered and non-registered users."""
    # Create first user (admin)
    unique = uuid4().hex
    admin_user: DATestUser = UserManager.create(name=f"admin_{unique}")
    assert UserManager.is_role(admin_user, UserRole.ADMIN)

    # Create second user
    invited_user: DATestUser = UserManager.create(name=f"admin_invited_{unique}")
    assert UserManager.is_role(invited_user, UserRole.ADMIN)

    # Admin user invites the previously registered and non-registered user
    UserManager.invite_user(invited_user.email, admin_user)
    UserManager.invite_user(f"{INVITED_BASIC_USER}_{unique}@example.com", admin_user)

    # Verify users are in the invited users list
    invited_users = UserManager.get_invited_users(admin_user)
    assert invited_user.email in [
        user.email for user in invited_users
    ], f"User {invited_user.email} not found in invited users list"


def test_non_registered_user_gets_basic_role(
    reset_multitenant: None,  # noqa: ARG001
) -> None:
    """Test that a non-registered user gets a BASIC role when they register after being invited."""
    # Create admin user
    unique = uuid4().hex
    admin_user: DATestUser = UserManager.create(name=f"admin_{unique}")
    assert UserManager.is_role(admin_user, UserRole.ADMIN)

    # Admin user invites a non-registered user
    invited_email = f"{INVITED_BASIC_USER}_{unique}@example.com"
    UserManager.invite_user(invited_email, admin_user)

    # Non-registered user registers
    invited_basic_user: DATestUser = UserManager.create(
        name=f"{INVITED_BASIC_USER}_{unique}", email=invited_email
    )
    assert UserManager.is_role(invited_basic_user, UserRole.BASIC)


def test_user_can_accept_invitation(
    reset_multitenant: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """Test that a user can accept an invitation and join the organization with BASIC role."""
    # Create admin user
    unique = uuid4().hex
    admin_user: DATestUser = UserManager.create(name=f"admin_{unique}")
    assert UserManager.is_role(admin_user, UserRole.ADMIN)

    # Create a user to be invited
    invited_user_email = f"invited_user_{unique}@example.com"

    # User registers with the same email as the invitation
    invited_user: DATestUser = UserManager.create(
        name=f"invited_user_{unique}", email=invited_user_email
    )
    # Admin user invites the user
    UserManager.invite_user(invited_user_email, admin_user)

    # Get user info to check tenant information
    user_info = UserManager.get_user_info(invited_user)

    # Extract the tenant_id from the invitation
    invited_tenant_id = (
        user_info.tenant_info.invitation.tenant_id
        if user_info.tenant_info and user_info.tenant_info.invitation
        else None
    )
    assert invited_tenant_id is not None, "Expected to find an invitation tenant_id"

    # User accepts invitation
    UserManager.accept_invitation(invited_tenant_id, invited_user)

    # User needs to reauthenticate after accepting invitation
    # Simulate this by creating a new user instance with the same credentials
    authenticated_user: DATestUser = UserManager.create(
        name="invited_user", email=invited_user_email
    )

    # Get updated user info after accepting invitation and reauthenticating
    updated_user_info = UserManager.get_user_info(authenticated_user)

    # Verify the user has BASIC role in the organization
    assert (
        updated_user_info.role == UserRole.BASIC
    ), f"Expected user to have BASIC role, but got {updated_user_info.role}"

    # Verify user is in the organization
    user_page = UserManager.get_user_page(
        user_performing_action=admin_user, role_filter=[UserRole.BASIC]
    )

    # Check if the invited user is in the list of users with BASIC role
    invited_user_emails = [user.email for user in user_page.items]
    assert invited_user_email in invited_user_emails, (
        f"User {invited_user_email} not found in the list of basic users "
        f"in the organization. Available users: {invited_user_emails}"
    )

    invited_users = UserManager.get_invited_users(admin_user)
    assert invited_user.email not in [
        user.email for user in invited_users
    ], f"User {invited_user.email} should not be found in invited users list after accepting invitation"


================================================
FILE: backend/tests/integration/multitenant_tests/migrations/test_run_multitenant_migrations.py
================================================
"""
Black-box integration tests for the parallel alembic migration runner
(backend/alembic/run_multitenant_migrations.py).

The script is invoked as a subprocess — the same way it would be used in
production.  Tests verify exit codes and stdout messages.

Usage:
    pytest tests/integration/tests/migrations/test_run_multitenant_migrations.py -v
"""

from __future__ import annotations

import os
import subprocess
import sys
import time
import uuid
from collections.abc import Generator

import pytest
from sqlalchemy import text
from sqlalchemy.engine import Engine

from onyx.db.engine.sql_engine import SqlEngine

# Resolve the backend/ directory once so every helper can use it as cwd.
_BACKEND_DIR = os.path.normpath(
    os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")
)

_DROP_SCHEMA_MAX_RETRIES = 3
_DROP_SCHEMA_RETRY_DELAY_SEC = 2


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _run_script(
    *extra_args: str,
    env_override: dict[str, str] | None = None,
) -> subprocess.CompletedProcess[str]:
    """Run ``python alembic/run_multitenant_migrations.py`` from the backend/ directory."""
    env = {**os.environ, **(env_override or {})}
    return subprocess.run(
        [sys.executable, "alembic/run_multitenant_migrations.py", *extra_args],
        cwd=_BACKEND_DIR,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        env=env,
    )


def _force_drop_schema(engine: Engine, schema: str) -> None:
    """Terminate backends using *schema* then drop it, retrying on deadlock.

    Background Celery workers may discover test schemas (they match the
    ``tenant_`` prefix) and hold locks on tables inside them.  A bare
    ``DROP SCHEMA … CASCADE`` can deadlock with those workers, so we
    first kill their connections and retry if we still hit a deadlock.
    """
    for attempt in range(_DROP_SCHEMA_MAX_RETRIES):
        try:
            with engine.connect() as conn:
                conn.execute(
                    text(
                        """
                        SELECT pg_terminate_backend(l.pid)
                        FROM pg_locks l
                        JOIN pg_class c ON c.oid = l.relation
                        JOIN pg_namespace n ON n.oid = c.relnamespace
                        WHERE n.nspname = :schema
                          AND l.pid != pg_backend_pid()
                        """
                    ),
                    {"schema": schema},
                )
                conn.execute(text(f'DROP SCHEMA IF EXISTS "{schema}" CASCADE'))
                conn.commit()
            return
        except Exception:
            if attempt == _DROP_SCHEMA_MAX_RETRIES - 1:
                raise
            time.sleep(_DROP_SCHEMA_RETRY_DELAY_SEC)


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture
def engine() -> Engine:
    return SqlEngine.get_engine()


@pytest.fixture
def current_head_rev() -> str:
    """Get the head revision from the alembic script directory.

    Runs ``alembic heads`` as a subprocess — the same source of truth that
    ``run_multitenant_migrations.py`` uses internally.
    """
    result = subprocess.run(
        ["alembic", "heads", "--resolve-dependencies"],
        cwd=_BACKEND_DIR,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
    )
    assert (
        result.returncode == 0
    ), f"alembic heads failed (exit {result.returncode}):\n{result.stdout}"
    # Output looks like "d5c86e2c6dc6 (head)\n"
    rev = result.stdout.strip().split()[0]
    assert len(rev) > 0
    return rev


@pytest.fixture
def tenant_schema_at_head(
    engine: Engine, current_head_rev: str
) -> Generator[str, None, None]:
    """Create a temporary tenant schema whose alembic_version is at head."""
    schema = f"tenant_test_{uuid.uuid4().hex[:12]}"
    with engine.connect() as conn:
        conn.execute(text(f'CREATE SCHEMA "{schema}"'))
        conn.execute(
            text(
                f'CREATE TABLE "{schema}".alembic_version (version_num VARCHAR(32) NOT NULL)'
            )
        )
        conn.execute(
            text(f'INSERT INTO "{schema}".alembic_version (version_num) VALUES (:rev)'),
            {"rev": current_head_rev},
        )
        conn.commit()

    yield schema

    _force_drop_schema(engine, schema)


@pytest.fixture
def tenant_schema_empty(engine: Engine) -> Generator[str, None, None]:
    """Create a temporary tenant schema with no tables at all.

    Alembic will treat it as a fresh schema and run every migration from base
    to head.
    """
    schema = f"tenant_test_{uuid.uuid4().hex[:12]}"
    with engine.connect() as conn:
        conn.execute(text(f'CREATE SCHEMA "{schema}"'))
        conn.commit()

    yield schema

    _force_drop_schema(engine, schema)


@pytest.fixture
def tenant_schema_bad_rev(engine: Engine) -> Generator[str, None, None]:
    """Create a tenant schema whose alembic_version points to a non-existent
    revision.  Alembic cannot find a migration path from this revision, so
    it will fail."""
    schema = f"tenant_test_{uuid.uuid4().hex[:12]}"
    with engine.connect() as conn:
        conn.execute(text(f'CREATE SCHEMA "{schema}"'))
        conn.execute(
            text(
                f'CREATE TABLE "{schema}".alembic_version (version_num VARCHAR(32) NOT NULL)'
            )
        )
        conn.execute(
            text(
                f"INSERT INTO \"{schema}\".alembic_version (version_num) VALUES ('00000bad0000')"
            )
        )
        conn.commit()

    yield schema

    _force_drop_schema(engine, schema)


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


def test_no_tenant_schemas_exits_nonzero() -> None:
    """In non-multi-tenant mode there are no tenant_ schemas, so the script
    should print a hint and exit 1."""
    result = _run_script(env_override={"MULTI_TENANT": "false"})
    assert result.returncode == 1
    assert "No tenant schemas found" in result.stdout
    assert "MULTI_TENANT" in result.stdout


def test_at_head_schema_is_skipped(tenant_schema_at_head: str) -> None:
    """A tenant schema already at head should not be targeted for migration."""
    result = _run_script(
        "--jobs",
        "1",
        "--batch-size",
        "50",
        env_override={"MULTI_TENANT": "true"},
    )
    assert result.returncode == 0
    # Our at-head schema should not appear in any batch "started" lines.
    batch_start_lines = [
        line
        for line in result.stdout.splitlines()
        if "Batch" in line and "started" in line
    ]
    for line in batch_start_lines:
        assert tenant_schema_at_head not in line


def test_detects_schemas_needing_migration(
    tenant_schema_at_head: str,
    tenant_schema_empty: str,
) -> None:
    """When some schemas are behind, the script should report how many need
    migration, upgrade them, and succeed."""
    result = _run_script(
        "--jobs",
        "1",
        "--batch-size",
        "50",
        env_override={"MULTI_TENANT": "true"},
    )
    assert result.returncode == 0, f"Script failed:\n{result.stdout}"
    assert "tenants need migration" in result.stdout
    assert "All migrations successful" in result.stdout

    # The empty schema should appear in the batch that was started.
    assert tenant_schema_empty in result.stdout

    # The at-head schema should NOT appear in any batch "started" lines
    # (it was filtered out by get_schemas_needing_migration).
    batch_start_lines = [
        line
        for line in result.stdout.splitlines()
        if "Batch" in line and "started" in line
    ]
    for line in batch_start_lines:
        assert tenant_schema_at_head not in line


def test_failed_migration(
    tenant_schema_at_head: str,
    tenant_schema_empty: str,
    tenant_schema_bad_rev: str,
) -> None:
    """A schema with a bogus alembic revision causes alembic to fail.

    The script should:
    - Exit non-zero (some migrations failed).
    - Still skip the at-head schema.
    - Still attempt the other schemas via the ``continue=true`` retry.
    """
    result = _run_script(
        "--jobs",
        "1",
        "--batch-size",
        "50",
        env_override={"MULTI_TENANT": "true"},
    )
    assert result.returncode == 1, f"Expected failure but got:\n{result.stdout}"
    assert "Some migrations failed" in result.stdout

    # The bad-rev schema should appear in the batch (it needs migration).
    assert tenant_schema_bad_rev in result.stdout

    # The empty schema should also appear (it was attempted via continue=true retry).
    assert tenant_schema_empty in result.stdout

    # The at-head schema should still be skipped.
    batch_start_lines = [
        line
        for line in result.stdout.splitlines()
        if "Batch" in line and "started" in line
    ]
    for line in batch_start_lines:
        assert tenant_schema_at_head not in line


================================================
FILE: backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py
================================================
from typing import Any
from uuid import uuid4

from onyx.db.models import UserRole
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestChatSession
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import ToolName


def setup_test_tenants(reset_multitenant: None) -> dict[str, Any]:  # noqa: ARG001
    """Helper function to set up test tenants with documents and users."""
    unique = uuid4().hex
    # Creating an admin user for Tenant 1
    admin_user1: DATestUser = UserManager.create(
        email=f"admin_{unique}@example.com",
    )
    assert UserManager.is_role(admin_user1, UserRole.ADMIN)

    # Create Tenant 2 and its Admin User
    admin_user2: DATestUser = UserManager.create(
        email=f"admin2_{unique}@example.com",
    )
    assert UserManager.is_role(admin_user2, UserRole.ADMIN)

    # Create connectors for Tenant 1
    cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch(
        user_performing_action=admin_user1,
    )
    api_key_1: DATestAPIKey = APIKeyManager.create(
        user_performing_action=admin_user1,
    )
    api_key_1.headers.update(admin_user1.headers)
    LLMProviderManager.create(user_performing_action=admin_user1)

    # Create connectors for Tenant 2
    cc_pair_2: DATestCCPair = CCPairManager.create_from_scratch(
        user_performing_action=admin_user2,
    )
    api_key_2: DATestAPIKey = APIKeyManager.create(
        user_performing_action=admin_user2,
    )
    api_key_2.headers.update(admin_user2.headers)
    LLMProviderManager.create(user_performing_action=admin_user2)

    # Seed documents for Tenant 1
    cc_pair_1.documents = []
    doc1_tenant1 = DocumentManager.seed_doc_with_content(
        cc_pair=cc_pair_1,
        content="Tenant 1 Document Content",
        api_key=api_key_1,
    )
    doc2_tenant1 = DocumentManager.seed_doc_with_content(
        cc_pair=cc_pair_1,
        content="Tenant 1 Document Content",
        api_key=api_key_1,
    )
    cc_pair_1.documents.extend([doc1_tenant1, doc2_tenant1])

    # Seed documents for Tenant 2
    cc_pair_2.documents = []
    doc1_tenant2 = DocumentManager.seed_doc_with_content(
        cc_pair=cc_pair_2,
        content="Tenant 2 Document Content",
        api_key=api_key_2,
    )
    doc2_tenant2 = DocumentManager.seed_doc_with_content(
        cc_pair=cc_pair_2,
        content="Tenant 2 Document Content",
        api_key=api_key_2,
    )
    cc_pair_2.documents.extend([doc1_tenant2, doc2_tenant2])

    tenant1_doc_ids = {doc1_tenant1.id, doc2_tenant1.id}
    tenant2_doc_ids = {doc1_tenant2.id, doc2_tenant2.id}

    # Create chat sessions for each user
    chat_session1: DATestChatSession = ChatSessionManager.create(
        user_performing_action=admin_user1
    )
    chat_session2: DATestChatSession = ChatSessionManager.create(
        user_performing_action=admin_user2
    )

    return {
        "admin_user1": admin_user1,
        "admin_user2": admin_user2,
        "chat_session1": chat_session1,
        "chat_session2": chat_session2,
        "tenant1_doc_ids": tenant1_doc_ids,
        "tenant2_doc_ids": tenant2_doc_ids,
    }


def test_tenant1_can_access_own_documents(reset_multitenant: None) -> None:
    """Test that Tenant 1 can access its own documents but not Tenant 2's."""
    test_data = setup_test_tenants(reset_multitenant)

    # User 1 sends a message and gets a response
    response1 = ChatSessionManager.send_message(
        chat_session_id=test_data["chat_session1"].id,
        message="What is in Tenant 1's documents? Run an internal search.",
        user_performing_action=test_data["admin_user1"],
    )

    assert response1.error is None, "Chat response should not have an error"

    # Assert that only the internal search tool was used
    assert all(
        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response1.used_tools
    )

    response_doc_ids = {doc.document_id for doc in response1.used_tools[0].documents}
    assert test_data["tenant1_doc_ids"].issubset(
        response_doc_ids
    ), "Not all Tenant 1 document IDs are in the response"
    assert not response_doc_ids.intersection(
        test_data["tenant2_doc_ids"]
    ), "Tenant 2 document IDs should not be in the response"

    # Assert that the contents are correct
    assert any(
        doc.blurb == "Tenant 1 Document Content"
        for doc in response1.used_tools[0].documents
    ), "Tenant 1 Document Content not found in any document"


def test_tenant2_can_access_own_documents(reset_multitenant: None) -> None:
    """Test that Tenant 2 can access its own documents but not Tenant 1's."""
    test_data = setup_test_tenants(reset_multitenant)

    # User 2 sends a message and gets a response
    response2 = ChatSessionManager.send_message(
        chat_session_id=test_data["chat_session2"].id,
        message="What is in Tenant 2's documents? Run an internal search.",
        user_performing_action=test_data["admin_user2"],
    )

    assert response2.error is None, "Chat response should not have an error"

    # Assert that the search tool was used
    assert all(
        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response2.used_tools
    )

    # Assert that the tool_result contains Tenant 2's documents
    response_doc_ids = {doc.document_id for doc in response2.used_tools[0].documents}
    assert test_data["tenant2_doc_ids"].issubset(
        response_doc_ids
    ), "Not all Tenant 2 document IDs are in the response"
    assert not response_doc_ids.intersection(
        test_data["tenant1_doc_ids"]
    ), "Tenant 1 document IDs should not be in the response"

    # Assert that the contents are correct
    assert any(
        doc.blurb == "Tenant 2 Document Content"
        for doc in response2.used_tools[0].documents
    ), "Tenant 2 Document Content not found in any document"


def test_tenant1_cannot_access_tenant2_documents(reset_multitenant: None) -> None:
    """Test that Tenant 1 cannot access Tenant 2's documents."""
    test_data = setup_test_tenants(reset_multitenant)

    # User 1 tries to access Tenant 2's documents
    response_cross = ChatSessionManager.send_message(
        chat_session_id=test_data["chat_session1"].id,
        message="What is in Tenant 2's documents? Run an internal search.",
        user_performing_action=test_data["admin_user1"],
    )

    assert response_cross.error is None, "Chat response should not have an error"

    # Assert that the search tool was used
    assert all(
        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response_cross.used_tools
    )

    # Assert that the tool_result is empty or does not contain Tenant 2's documents
    response_doc_ids = {
        doc.document_id for doc in response_cross.used_tools[0].documents
    }

    # Ensure none of Tenant 2's document IDs are in the response
    assert not response_doc_ids.intersection(test_data["tenant2_doc_ids"])


def test_tenant2_cannot_access_tenant1_documents(reset_multitenant: None) -> None:
    """Test that Tenant 2 cannot access Tenant 1's documents."""
    test_data = setup_test_tenants(reset_multitenant)

    # User 2 tries to access Tenant 1's documents
    response_cross2 = ChatSessionManager.send_message(
        chat_session_id=test_data["chat_session2"].id,
        message="What is in Tenant 1's documents? Run an internal search.",
        user_performing_action=test_data["admin_user2"],
    )

    assert response_cross2.error is None, "Chat response should not have an error"

    # Assert that the search tool was used
    assert all(
        tool.tool_name == ToolName.INTERNAL_SEARCH
        for tool in response_cross2.used_tools
    )

    # Assert that the tool_result is empty or does not contain Tenant 1's documents
    response_doc_ids = {
        doc.document_id for doc in response_cross2.used_tools[0].documents
    }

    # Ensure none of Tenant 1's document IDs are in the response
    assert not response_doc_ids.intersection(test_data["tenant1_doc_ids"])


def test_multi_tenant_access_control(reset_multitenant: None) -> None:
    """Legacy test for multi-tenant access control."""
    test_data = setup_test_tenants(reset_multitenant)

    # User 1 sends a message and gets a response with only Tenant 1's documents
    response1 = ChatSessionManager.send_message(
        chat_session_id=test_data["chat_session1"].id,
        message="What is in Tenant 1's documents? Run an internal search.",
        user_performing_action=test_data["admin_user1"],
    )
    assert response1.error is None, "Chat response should not have an error"
    assert all(
        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response1.used_tools
    )
    response_doc_ids = {doc.document_id for doc in response1.used_tools[0].documents}
    assert test_data["tenant1_doc_ids"].issubset(response_doc_ids)
    assert not response_doc_ids.intersection(test_data["tenant2_doc_ids"])

    # User 2 sends a message and gets a response with only Tenant 2's documents
    response2 = ChatSessionManager.send_message(
        chat_session_id=test_data["chat_session2"].id,
        message="What is in Tenant 2's documents? Run an internal search.",
        user_performing_action=test_data["admin_user2"],
    )
    assert response2.error is None, "Chat response should not have an error"
    assert all(
        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response2.used_tools
    )
    response_doc_ids = {doc.document_id for doc in response2.used_tools[0].documents}
    assert test_data["tenant2_doc_ids"].issubset(response_doc_ids)
    assert not response_doc_ids.intersection(test_data["tenant1_doc_ids"])

    # User 1 tries to access Tenant 2's documents and fails
    user1_second_chat_session = ChatSessionManager.create(
        user_performing_action=test_data["admin_user1"]
    )
    response_cross = ChatSessionManager.send_message(
        chat_session_id=user1_second_chat_session.id,
        message="What is in Tenant 2's documents? Run an internal search.",
        user_performing_action=test_data["admin_user1"],
    )
    assert response_cross.error is None, "Chat response should not have an error"
    assert all(
        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response_cross.used_tools
    )
    response_doc_ids = {
        doc.document_id for doc in response_cross.used_tools[0].documents
    }
    assert not response_doc_ids.intersection(test_data["tenant2_doc_ids"])

    # User 2 tries to access Tenant 1's documents and fails
    user2_second_chat_session = ChatSessionManager.create(
        user_performing_action=test_data["admin_user2"]
    )
    response_cross2 = ChatSessionManager.send_message(
        chat_session_id=user2_second_chat_session.id,
        message="What is in Tenant 1's documents? Run an internal search.",
        user_performing_action=test_data["admin_user2"],
    )
    assert response_cross2.error is None, "Chat response should not have an error"
    assert all(
        tool.tool_name == ToolName.INTERNAL_SEARCH
        for tool in response_cross2.used_tools
    )
    response_doc_ids = {
        doc.document_id for doc in response_cross2.used_tools[0].documents
    }
    assert not response_doc_ids.intersection(test_data["tenant1_doc_ids"])


================================================
FILE: backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py
================================================
from http import HTTPStatus
from uuid import uuid4

import requests

from onyx.configs.constants import DocumentSource
from onyx.db.enums import AccessType
from onyx.db.models import UserRole
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.image_generation import (
    ImageGenerationConfigManager,
)
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


def test_first_user_is_admin(reset_multitenant: None) -> None:  # noqa: ARG001
    """Test that the first user of a tenant is automatically assigned ADMIN role."""
    unique = uuid4().hex
    test_user: DATestUser = UserManager.create(
        name=f"test_{unique}", email=f"test_{unique}@example.com"
    )
    assert UserManager.is_role(test_user, UserRole.ADMIN)


def test_admin_can_create_credential(
    reset_multitenant: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """Test that an admin user can create a credential in their tenant."""
    # Create admin user
    unique = uuid4().hex
    test_user: DATestUser = UserManager.create(
        name=f"test_{unique}", email=f"test_{unique}@example.com"
    )
    assert UserManager.is_role(test_user, UserRole.ADMIN)

    # Create credential
    test_credential = CredentialManager.create(
        name="admin_test_credential",
        source=DocumentSource.FILE,
        curator_public=False,
        user_performing_action=test_user,
    )
    assert test_credential is not None


def test_admin_can_create_connector(
    reset_multitenant: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """Test that an admin user can create a connector in their tenant."""
    # Create admin user
    unique = uuid4().hex
    test_user: DATestUser = UserManager.create(
        name=f"test_{unique}", email=f"test_{unique}@example.com"
    )
    assert UserManager.is_role(test_user, UserRole.ADMIN)

    # Create connector
    test_connector = ConnectorManager.create(
        name="admin_test_connector",
        source=DocumentSource.FILE,
        access_type=AccessType.PRIVATE,
        user_performing_action=test_user,
    )
    assert test_connector is not None


def test_admin_can_create_and_verify_cc_pair(
    reset_multitenant: None,  # noqa: ARG001
) -> None:
    """Test that an admin user can create and verify a connector-credential pair in their tenant."""
    # Create admin user
    unique = uuid4().hex
    test_user: DATestUser = UserManager.create(
        name=f"test_{unique}", email=f"test_{unique}@example.com"
    )
    assert UserManager.is_role(test_user, UserRole.ADMIN)

    # Create credential
    test_credential = CredentialManager.create(
        name="admin_test_credential",
        source=DocumentSource.FILE,
        curator_public=False,
        user_performing_action=test_user,
    )

    # Create connector
    test_connector = ConnectorManager.create(
        name="admin_test_connector",
        source=DocumentSource.FILE,
        access_type=AccessType.PRIVATE,
        user_performing_action=test_user,
    )

    # Create cc_pair
    test_cc_pair = CCPairManager.create(
        connector_id=test_connector.id,
        credential_id=test_credential.id,
        name="admin_test_cc_pair",
        access_type=AccessType.PRIVATE,
        user_performing_action=test_user,
    )
    assert test_cc_pair is not None

    # Verify cc_pair
    CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=test_user)


def test_settings_access() -> None:
    """Calls to the enterprise settings endpoint without authentication should fail with
    403 (and not 500, which will lock the web UI into a "maintenance mode" page)"""

    response = requests.get(url=f"{API_SERVER_URL}/enterprise-settings")
    assert response.status_code == HTTPStatus.FORBIDDEN


def test_image_gen_config_created_on_tenant_provision(
    reset_multitenant: None,  # noqa: ARG001
) -> None:
    """Test that image generation config is automatically created when a tenant is provisioned."""
    unique = uuid4().hex
    test_user: DATestUser = UserManager.create(
        name=f"test_{unique}", email=f"test_{unique}@example.com"
    )
    assert UserManager.is_role(test_user, UserRole.ADMIN)

    # Check if image gen config was created during tenant provisioning
    all_configs = ImageGenerationConfigManager.get_all(user_performing_action=test_user)

    # Should have at least one config created during provisioning
    assert (
        len(all_configs) > 0
    ), "Image generation config should be created during tenant provisioning"

    # Verify a default config exists
    default_configs = [c for c in all_configs if c.is_default]
    assert (
        len(default_configs) == 1
    ), "Exactly one default image generation config should exist"

    # Verify expected properties
    default_config = default_configs[0]
    assert default_config.image_provider_id == "openai_gpt_image_1"
    assert default_config.model_name == "gpt-image-1"


================================================
FILE: backend/tests/integration/multitenant_tests/tenants/test_tenant_provisioning_rollback.py
================================================
"""
Integration tests for tenant provisioning rollback behavior.

Tests the fix for the drop_schema bug where:
1. isidentifier() rejected valid UUID tenant IDs (with hyphens)
2. SQL syntax was broken (%(schema_name)s instead of proper identifier handling)

This test verifies the full flow: provisioning failure → rollback → schema cleanup.
"""

import uuid
from unittest.mock import patch

from sqlalchemy import text

from ee.onyx.server.tenants.schema_management import create_schema_if_not_exists
from ee.onyx.server.tenants.schema_management import drop_schema
from onyx.db.engine.sql_engine import get_session_with_shared_schema
from shared_configs.configs import TENANT_ID_PREFIX


def _schema_exists(schema_name: str) -> bool:
    """Check if a schema exists in the database."""
    with get_session_with_shared_schema() as session:
        result = session.execute(
            text(
                "SELECT 1 FROM information_schema.schemata WHERE schema_name = :schema"
            ),
            {"schema": schema_name},
        ).fetchone()
        return result is not None


class TestTenantProvisioningRollback:
    """Integration tests for provisioning failure and rollback."""

    def test_failed_provisioning_cleans_up_schema(self) -> None:
        """
        When setup_tenant fails after schema creation, rollback should
        clean up the orphaned schema.

        This is the actual bug scenario: pre_provision_tenant creates a schema,
        setup_tenant fails, rollback is called, but drop_schema was broken
        (isidentifier rejected UUIDs with hyphens), leaving orphaned schemas.
        """
        from ee.onyx.background.celery.tasks.tenant_provisioning.tasks import (
            pre_provision_tenant,
        )

        # Track which tenant_id gets created
        created_tenant_id = None

        def track_schema_creation(tenant_id: str) -> bool:
            nonlocal created_tenant_id
            created_tenant_id = tenant_id
            return create_schema_if_not_exists(tenant_id)

        # Mock setup_tenant to fail after schema creation
        with patch(
            "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.setup_tenant"
        ) as mock_setup:
            mock_setup.side_effect = Exception("Simulated provisioning failure")

            with patch(
                "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.create_schema_if_not_exists",
                side_effect=track_schema_creation,
            ):
                # Run pre-provisioning - it should fail and trigger rollback
                pre_provision_tenant()

        # Verify that the schema was created and then cleaned up
        assert created_tenant_id is not None, "Schema should have been created"
        assert created_tenant_id.startswith(
            TENANT_ID_PREFIX
        ), f"Should have tenant prefix: {created_tenant_id}"
        assert not _schema_exists(
            created_tenant_id
        ), f"Schema {created_tenant_id} should have been rolled back"

    def test_drop_schema_works_with_uuid_tenant_id(self) -> None:
        """
        drop_schema should work with UUID-format tenant IDs.

        This directly tests the fix: UUID tenant IDs contain hyphens,
        which isidentifier() rejected. The new regex validation accepts them.
        """
        tenant_id = f"{TENANT_ID_PREFIX}{uuid.uuid4()}"

        # Create schema
        create_schema_if_not_exists(tenant_id)
        assert _schema_exists(tenant_id), "Schema should exist after creation"

        # Drop schema
        drop_schema(tenant_id)
        assert not _schema_exists(tenant_id), "Schema should be dropped"


================================================
FILE: backend/tests/integration/multitenant_tests/test_get_schemas_needing_migration.py
================================================
"""
Integration tests for onyx.db.engine.tenant_utils.get_schemas_needing_migration.

These tests require a live database and exercise the function directly,
independent of the alembic migration runner script.

Usage:
    pytest tests/integration/multitenant_tests/test_get_schemas_needing_migration.py -v
"""

from __future__ import annotations

import subprocess
import uuid
from collections.abc import Generator

import pytest
from sqlalchemy import text
from sqlalchemy.engine import Engine

from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.engine.tenant_utils import get_schemas_needing_migration

_BACKEND_DIR = __file__[: __file__.index("/tests/")]


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture
def engine() -> Engine:
    return SqlEngine.get_engine()


@pytest.fixture
def current_head_rev() -> str:
    result = subprocess.run(
        ["alembic", "heads", "--resolve-dependencies"],
        cwd=_BACKEND_DIR,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
    )
    assert (
        result.returncode == 0
    ), f"alembic heads failed (exit {result.returncode}):\n{result.stdout}"
    rev = result.stdout.strip().split()[0]
    assert len(rev) > 0
    return rev


@pytest.fixture
def tenant_schema_at_head(
    engine: Engine, current_head_rev: str
) -> Generator[str, None, None]:
    """Tenant schema with alembic_version already at head — should be excluded."""
    schema = f"tenant_test_{uuid.uuid4().hex[:12]}"
    with engine.connect() as conn:
        conn.execute(text(f'CREATE SCHEMA "{schema}"'))
        conn.execute(
            text(
                f'CREATE TABLE "{schema}".alembic_version (version_num VARCHAR(32) NOT NULL)'
            )
        )
        conn.execute(
            text(f'INSERT INTO "{schema}".alembic_version (version_num) VALUES (:rev)'),
            {"rev": current_head_rev},
        )
        conn.commit()

    yield schema

    with engine.connect() as conn:
        conn.execute(text(f'DROP SCHEMA IF EXISTS "{schema}" CASCADE'))
        conn.commit()


@pytest.fixture
def tenant_schema_empty(engine: Engine) -> Generator[str, None, None]:
    """Tenant schema with no tables — should be included (needs migration)."""
    schema = f"tenant_test_{uuid.uuid4().hex[:12]}"
    with engine.connect() as conn:
        conn.execute(text(f'CREATE SCHEMA "{schema}"'))
        conn.commit()

    yield schema

    with engine.connect() as conn:
        conn.execute(text(f'DROP SCHEMA IF EXISTS "{schema}" CASCADE'))
        conn.commit()


@pytest.fixture
def tenant_schema_stale_rev(engine: Engine) -> Generator[str, None, None]:
    """Tenant schema with a non-head revision — should be included (needs migration)."""
    schema = f"tenant_test_{uuid.uuid4().hex[:12]}"
    with engine.connect() as conn:
        conn.execute(text(f'CREATE SCHEMA "{schema}"'))
        conn.execute(
            text(
                f'CREATE TABLE "{schema}".alembic_version (version_num VARCHAR(32) NOT NULL)'
            )
        )
        conn.execute(
            text(
                f"INSERT INTO \"{schema}\".alembic_version (version_num) VALUES ('stalerev000000000000')"
            )
        )
        conn.commit()

    yield schema

    with engine.connect() as conn:
        conn.execute(text(f'DROP SCHEMA IF EXISTS "{schema}" CASCADE'))
        conn.commit()


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


def test_classifies_all_cases(
    current_head_rev: str,
    tenant_schema_at_head: str,
    tenant_schema_empty: str,
    tenant_schema_stale_rev: str,
) -> None:
    """Correctly classifies all three schema states:
    - at head      → excluded
    - no table     → included (needs migration)
    - stale rev    → included (needs migration)
    """
    all_schemas = [tenant_schema_at_head, tenant_schema_empty, tenant_schema_stale_rev]
    result = get_schemas_needing_migration(all_schemas, current_head_rev)

    assert tenant_schema_at_head not in result
    assert tenant_schema_empty in result
    assert tenant_schema_stale_rev in result


def test_idempotent(
    current_head_rev: str,
    tenant_schema_at_head: str,
    tenant_schema_empty: str,
) -> None:
    """Calling the function twice returns the same result.

    Verifies that the DROP TABLE IF EXISTS guards correctly clean up temp
    tables so a second call succeeds even if the first left state behind.
    """
    schemas = [tenant_schema_at_head, tenant_schema_empty]

    first = get_schemas_needing_migration(schemas, current_head_rev)
    second = get_schemas_needing_migration(schemas, current_head_rev)

    assert first == second


def test_empty_input(current_head_rev: str) -> None:
    """An empty input list returns immediately without touching the DB."""
    assert get_schemas_needing_migration([], current_head_rev) == []


================================================
FILE: backend/tests/integration/tests/anonymous_user/test_anonymous_user.py
================================================
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.settings import SettingsManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestSettings
from tests.integration.common_utils.test_models import DATestUser


def test_me_endpoint_returns_anonymous_user_when_enabled(
    reset: None,  # noqa: ARG001
) -> None:
    """Unauthenticated /me returns anonymous user info when anonymous access is enabled."""
    admin_user: DATestUser = UserManager.create(name="admin_user")

    SettingsManager.update_settings(
        DATestSettings(anonymous_user_enabled=True),
        user_performing_action=admin_user,
    )

    response = requests.get(f"{API_SERVER_URL}/me")

    assert response.status_code == 200
    data = response.json()
    assert data["is_anonymous_user"] is True
    assert data["email"] == "anonymous@onyx.app"
    assert data["role"] == "limited"


def test_me_endpoint_returns_403_when_anonymous_disabled(
    reset: None,  # noqa: ARG001
) -> None:
    """Unauthenticated /me returns 403 when anonymous access is disabled."""
    admin_user: DATestUser = UserManager.create(name="admin_user")

    SettingsManager.update_settings(
        DATestSettings(anonymous_user_enabled=False),
        user_performing_action=admin_user,
    )

    response = requests.get(f"{API_SERVER_URL}/me")

    # 403 is returned when user is not authenticated
    assert response.status_code == 403


def test_me_endpoint_returns_authenticated_user_info(
    reset: None,  # noqa: ARG001
) -> None:
    """Authenticated /me returns the actual user's info."""
    admin_user: DATestUser = UserManager.create(name="admin_user")

    response = requests.get(
        f"{API_SERVER_URL}/me",
        headers=admin_user.headers,
    )

    assert response.status_code == 200
    data = response.json()
    assert data.get("is_anonymous_user") is not True
    assert data["email"] == admin_user.email
    assert data["role"] == "admin"


def test_anonymous_user_can_access_persona_when_enabled(
    reset: None,  # noqa: ARG001
) -> None:
    """Verify that anonymous users can access limited endpoints when enabled."""
    admin_user: DATestUser = UserManager.create(name="admin_user")

    SettingsManager.update_settings(
        DATestSettings(anonymous_user_enabled=True),
        user_performing_action=admin_user,
    )

    anon_user = UserManager.get_anonymous_user()

    response = requests.get(
        f"{API_SERVER_URL}/persona",
        headers=anon_user.headers,
    )
    assert response.status_code == 200


def test_anonymous_user_denied_persona_when_disabled(
    reset: None,  # noqa: ARG001
) -> None:
    """Verify that anonymous users cannot access endpoints when disabled."""
    admin_user: DATestUser = UserManager.create(name="admin_user")

    SettingsManager.update_settings(
        DATestSettings(anonymous_user_enabled=False),
        user_performing_action=admin_user,
    )

    anon_user = UserManager.get_anonymous_user()

    response = requests.get(
        f"{API_SERVER_URL}/persona",
        headers=anon_user.headers,
    )
    # 403 is returned - BasicAuthenticationError uses HTTP 403 for all auth failures
    assert response.status_code == 403


================================================
FILE: backend/tests/integration/tests/api_key/test_api_key.py
================================================
from uuid import UUID

import requests

from onyx.auth.schemas import UserRole
from onyx.db.enums import AccountType
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestUser


def test_limited(reset: None) -> None:  # noqa: ARG001
    """Verify that with a limited role key, limited endpoints are accessible and
    others are not."""

    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    api_key: DATestAPIKey = APIKeyManager.create(
        api_key_role=UserRole.LIMITED,
        user_performing_action=admin_user,
    )

    # test limited endpoint
    response = requests.get(
        f"{API_SERVER_URL}/persona/0",
        headers=api_key.headers,
    )
    assert response.status_code == 200

    # test admin endpoints
    response = requests.get(
        f"{API_SERVER_URL}/admin/api-key",
        headers=api_key.headers,
    )
    assert response.status_code == 403


def _get_service_account_account_type(
    admin_user: DATestUser,
    api_key_user_id: UUID,
) -> AccountType:
    """Fetch the account_type of a service account user via the user listing API."""
    response = requests.get(
        f"{API_SERVER_URL}/manage/users",
        headers=admin_user.headers,
        params={"include_api_keys": "true"},
    )
    response.raise_for_status()
    data = response.json()
    user_id_str = str(api_key_user_id)
    for user in data["accepted"]:
        if user["id"] == user_id_str:
            return AccountType(user["account_type"])
    raise AssertionError(
        f"Service account user {user_id_str} not found in user listing"
    )


def _get_default_group_user_ids(
    admin_user: DATestUser,
) -> tuple[set[str], set[str]]:
    """Return (admin_group_user_ids, basic_group_user_ids) from default groups."""
    all_groups = UserGroupManager.get_all(
        user_performing_action=admin_user,
        include_default=True,
    )
    admin_group = next(
        (g for g in all_groups if g.name == "Admin" and g.is_default), None
    )
    basic_group = next(
        (g for g in all_groups if g.name == "Basic" and g.is_default), None
    )
    assert admin_group is not None, "Admin default group not found"
    assert basic_group is not None, "Basic default group not found"

    admin_ids = {str(u.id) for u in admin_group.users}
    basic_ids = {str(u.id) for u in basic_group.users}
    return admin_ids, basic_ids


def test_api_key_limited_service_account(reset: None) -> None:  # noqa: ARG001
    """LIMITED role API key: account_type is SERVICE_ACCOUNT, no group membership."""
    admin_user: DATestUser = UserManager.create(name="admin_user")

    api_key: DATestAPIKey = APIKeyManager.create(
        api_key_role=UserRole.LIMITED,
        user_performing_action=admin_user,
    )

    # Verify account_type
    account_type = _get_service_account_account_type(admin_user, api_key.user_id)
    assert (
        account_type == AccountType.SERVICE_ACCOUNT
    ), f"Expected account_type={AccountType.SERVICE_ACCOUNT}, got {account_type}"

    # Verify no group membership
    admin_ids, basic_ids = _get_default_group_user_ids(admin_user)
    user_id_str = str(api_key.user_id)
    assert (
        user_id_str not in admin_ids
    ), "LIMITED API key should NOT be in Admin default group"
    assert (
        user_id_str not in basic_ids
    ), "LIMITED API key should NOT be in Basic default group"


def test_api_key_basic_service_account(reset: None) -> None:  # noqa: ARG001
    """BASIC role API key: account_type is SERVICE_ACCOUNT, in Basic group only."""
    admin_user: DATestUser = UserManager.create(name="admin_user")

    api_key: DATestAPIKey = APIKeyManager.create(
        api_key_role=UserRole.BASIC,
        user_performing_action=admin_user,
    )

    # Verify account_type
    account_type = _get_service_account_account_type(admin_user, api_key.user_id)
    assert (
        account_type == AccountType.SERVICE_ACCOUNT
    ), f"Expected account_type={AccountType.SERVICE_ACCOUNT}, got {account_type}"

    # Verify Basic group membership
    admin_ids, basic_ids = _get_default_group_user_ids(admin_user)
    user_id_str = str(api_key.user_id)
    assert user_id_str in basic_ids, "BASIC API key should be in Basic default group"
    assert (
        user_id_str not in admin_ids
    ), "BASIC API key should NOT be in Admin default group"


def test_api_key_admin_service_account(reset: None) -> None:  # noqa: ARG001
    """ADMIN role API key: account_type is SERVICE_ACCOUNT, in Admin group only."""
    admin_user: DATestUser = UserManager.create(name="admin_user")

    api_key: DATestAPIKey = APIKeyManager.create(
        api_key_role=UserRole.ADMIN,
        user_performing_action=admin_user,
    )

    # Verify account_type
    account_type = _get_service_account_account_type(admin_user, api_key.user_id)
    assert (
        account_type == AccountType.SERVICE_ACCOUNT
    ), f"Expected account_type={AccountType.SERVICE_ACCOUNT}, got {account_type}"

    # Verify Admin group membership
    admin_ids, basic_ids = _get_default_group_user_ids(admin_user)
    user_id_str = str(api_key.user_id)
    assert user_id_str in admin_ids, "ADMIN API key should be in Admin default group"
    assert (
        user_id_str not in basic_ids
    ), "ADMIN API key should NOT be in Basic default group"


================================================
FILE: backend/tests/integration/tests/auth/test_saml_user_conversion.py
================================================
import os

import pytest
import requests

from onyx.auth.schemas import UserRole
from onyx.db.enums import AccountType
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestUser


def _simulate_saml_login(email: str, admin_user: DATestUser) -> dict:
    """Simulate a SAML login by calling the test upsert endpoint."""
    response = requests.post(
        f"{API_SERVER_URL}/manage/users/test-upsert-user",
        json={"email": email},
        headers=admin_user.headers,
    )
    response.raise_for_status()
    return response.json()


def _get_basic_group_member_emails(admin_user: DATestUser) -> set[str]:
    """Get the set of emails of all members in the Basic default group."""
    all_groups = UserGroupManager.get_all(admin_user, include_default=True)
    basic_default = [g for g in all_groups if g.is_default and g.name == "Basic"]
    assert basic_default, "Basic default group not found"
    return {u.email for u in basic_default[0].users}


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="SAML tests are enterprise only",
)
def test_saml_user_conversion(reset: None) -> None:  # noqa: ARG001
    """
    Test that SAML login correctly converts users with non-authenticated roles
    (SLACK_USER or EXT_PERM_USER) to authenticated roles (BASIC).

    This test:
    1. Creates an admin and a regular user
    2. Changes the regular user's role to EXT_PERM_USER
    3. Simulates a SAML login by calling the test endpoint
    4. Verifies the user's role is converted to BASIC

    This tests the fix that ensures users with non-authenticated roles (SLACK_USER or EXT_PERM_USER)
    are properly converted to authenticated roles during SAML login.
    """
    # Create an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(email="admin@example.com")

    # Create a regular user that we'll convert to EXT_PERM_USER
    test_user_email = "ext_perm_user@example.com"
    test_user = UserManager.create(email=test_user_email)

    # Verify the user was created with BASIC role initially
    assert UserManager.is_role(test_user, UserRole.BASIC)

    # Change the user's role to EXT_PERM_USER using the UserManager
    UserManager.set_role(
        user_to_set=test_user,
        target_role=UserRole.EXT_PERM_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )

    # Verify the user has EXT_PERM_USER role now
    assert UserManager.is_role(test_user, UserRole.EXT_PERM_USER)

    # Simulate SAML login by calling the test endpoint
    user_data = _simulate_saml_login(test_user_email, admin_user)

    # Verify the response indicates the role changed to BASIC
    assert user_data["role"] == UserRole.BASIC.value

    # Verify user role was changed in the database
    assert UserManager.is_role(test_user, UserRole.BASIC)

    # Do the same test with SLACK_USER
    slack_user_email = "slack_user@example.com"
    slack_user = UserManager.create(email=slack_user_email)

    # Verify the user was created with BASIC role initially
    assert UserManager.is_role(slack_user, UserRole.BASIC)

    # Change the user's role to SLACK_USER
    UserManager.set_role(
        user_to_set=slack_user,
        target_role=UserRole.SLACK_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )

    # Verify the user has SLACK_USER role
    assert UserManager.is_role(slack_user, UserRole.SLACK_USER)

    # Simulate SAML login again
    user_data = _simulate_saml_login(slack_user_email, admin_user)

    # Verify the response indicates the role changed to BASIC
    assert user_data["role"] == UserRole.BASIC.value

    # Verify the user's role was changed in the database
    assert UserManager.is_role(slack_user, UserRole.BASIC)


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="SAML tests are enterprise only",
)
def test_saml_user_conversion_sets_account_type_and_group(
    reset: None,  # noqa: ARG001
) -> None:
    """
    Test that SAML login sets account_type to STANDARD when converting a
    non-web user (EXT_PERM_USER) and that the user receives the correct role
    (BASIC) after conversion.

    This validates the permissions-migration-phase2 changes which ensure that:
    1. account_type is updated to 'standard' on SAML conversion
    2. The converted user is assigned to the Basic default group
    """
    # Create an admin user (first user is automatically admin)
    admin_user: DATestUser = UserManager.create(email="admin@example.com")

    # Create a user and set them as EXT_PERM_USER
    test_email = "ext_convert@example.com"
    test_user = UserManager.create(email=test_email)
    UserManager.set_role(
        user_to_set=test_user,
        target_role=UserRole.EXT_PERM_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert UserManager.is_role(test_user, UserRole.EXT_PERM_USER)

    # Simulate SAML login
    user_data = _simulate_saml_login(test_email, admin_user)

    # Verify account_type is set to standard after conversion
    assert (
        user_data["account_type"] == AccountType.STANDARD.value
    ), f"Expected account_type='{AccountType.STANDARD.value}', got '{user_data['account_type']}'"

    # Verify role is BASIC after conversion
    assert user_data["role"] == UserRole.BASIC.value

    # Verify the user was assigned to the Basic default group
    assert test_email in _get_basic_group_member_emails(
        admin_user
    ), f"Converted user '{test_email}' not found in Basic default group"


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="SAML tests are enterprise only",
)
def test_saml_normal_signin_assigns_group(
    reset: None,  # noqa: ARG001
) -> None:
    """
    Test that a brand-new user signing in via SAML for the first time
    is created with the correct role, account_type, and group membership.

    This validates that normal SAML sign-in (not an upgrade from
    SLACK_USER/EXT_PERM_USER) correctly:
    1. Creates the user with role=BASIC and account_type=STANDARD
    2. Assigns the user to the Basic default group
    """
    # First user becomes admin
    admin_user: DATestUser = UserManager.create(email="admin@example.com")

    # New user signs in via SAML (no prior account)
    new_email = "new_saml_user@example.com"
    user_data = _simulate_saml_login(new_email, admin_user)

    # Verify role and account_type
    assert user_data["role"] == UserRole.BASIC.value
    assert user_data["account_type"] == AccountType.STANDARD.value

    # Verify user is in the Basic default group
    assert new_email in _get_basic_group_member_emails(
        admin_user
    ), f"New SAML user '{new_email}' not found in Basic default group"


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="SAML tests are enterprise only",
)
def test_saml_user_conversion_restores_group_membership(
    reset: None,  # noqa: ARG001
) -> None:
    """
    Test that SAML login restores Basic group membership when converting
    a non-authenticated user (EXT_PERM_USER or SLACK_USER) to BASIC.

    Group membership implies 'basic' permission (verified by
    test_new_group_gets_basic_permission).
    """
    admin_user: DATestUser = UserManager.create(email="admin@example.com")

    # --- EXT_PERM_USER path ---
    ext_email = "ext_perm_perms@example.com"
    ext_user = UserManager.create(email=ext_email)
    assert ext_email in _get_basic_group_member_emails(admin_user)

    UserManager.set_role(
        user_to_set=ext_user,
        target_role=UserRole.EXT_PERM_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert ext_email not in _get_basic_group_member_emails(admin_user)

    user_data = _simulate_saml_login(ext_email, admin_user)
    assert user_data["role"] == UserRole.BASIC.value
    assert ext_email in _get_basic_group_member_emails(
        admin_user
    ), "EXT_PERM_USER should be back in Basic group after SAML conversion"

    # --- SLACK_USER path ---
    slack_email = "slack_perms@example.com"
    slack_user = UserManager.create(email=slack_email)

    UserManager.set_role(
        user_to_set=slack_user,
        target_role=UserRole.SLACK_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert slack_email not in _get_basic_group_member_emails(admin_user)

    user_data = _simulate_saml_login(slack_email, admin_user)
    assert user_data["role"] == UserRole.BASIC.value
    assert slack_email in _get_basic_group_member_emails(
        admin_user
    ), "SLACK_USER should be back in Basic group after SAML conversion"


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="SAML tests are enterprise only",
)
def test_saml_round_trip_group_lifecycle(
    reset: None,  # noqa: ARG001
) -> None:
    """
    Test the full round-trip: BASIC -> EXT_PERM -> SAML(BASIC) -> EXT_PERM -> SAML(BASIC).

    Verifies group membership is correctly removed and restored at each transition.
    """
    admin_user: DATestUser = UserManager.create(email="admin@example.com")

    test_email = "roundtrip@example.com"
    test_user = UserManager.create(email=test_email)

    # Step 1: BASIC user is in Basic group
    assert test_email in _get_basic_group_member_emails(admin_user)

    # Step 2: Downgrade to EXT_PERM_USER — loses Basic group
    UserManager.set_role(
        user_to_set=test_user,
        target_role=UserRole.EXT_PERM_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert test_email not in _get_basic_group_member_emails(admin_user)

    # Step 3: SAML login — converts back to BASIC, regains Basic group
    _simulate_saml_login(test_email, admin_user)
    assert test_email in _get_basic_group_member_emails(
        admin_user
    ), "Should be in Basic group after first SAML conversion"

    # Step 4: Downgrade again
    UserManager.set_role(
        user_to_set=test_user,
        target_role=UserRole.EXT_PERM_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert test_email not in _get_basic_group_member_emails(admin_user)

    # Step 5: SAML login again — should still restore correctly
    _simulate_saml_login(test_email, admin_user)
    assert test_email in _get_basic_group_member_emails(
        admin_user
    ), "Should be in Basic group after second SAML conversion"


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="SAML tests are enterprise only",
)
def test_saml_slack_user_conversion_sets_account_type_and_group(
    reset: None,  # noqa: ARG001
) -> None:
    """
    Test that SAML login sets account_type to STANDARD and assigns Basic group
    when converting a SLACK_USER (BOT account_type).

    Mirrors test_saml_user_conversion_sets_account_type_and_group but for
    SLACK_USER instead of EXT_PERM_USER, and additionally verifies permissions.
    """
    admin_user: DATestUser = UserManager.create(email="admin@example.com")

    test_email = "slack_convert@example.com"
    test_user = UserManager.create(email=test_email)

    UserManager.set_role(
        user_to_set=test_user,
        target_role=UserRole.SLACK_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert UserManager.is_role(test_user, UserRole.SLACK_USER)

    # SAML login
    user_data = _simulate_saml_login(test_email, admin_user)

    # Verify account_type and role
    assert (
        user_data["account_type"] == AccountType.STANDARD.value
    ), f"Expected STANDARD, got {user_data['account_type']}"
    assert user_data["role"] == UserRole.BASIC.value

    # Verify Basic group membership (implies 'basic' permission)
    assert test_email in _get_basic_group_member_emails(
        admin_user
    ), f"Converted SLACK_USER '{test_email}' not found in Basic default group"


================================================
FILE: backend/tests/integration/tests/chat/test_chat_deletion.py
================================================
import pytest

from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.reset import reset_all
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser


MESSAGE = "Hi"


@pytest.fixture(scope="module", autouse=True)
def reset_for_module() -> None:
    """Reset all data once before running any tests in this module."""
    reset_all()


@pytest.fixture
def llm_provider(admin_user: DATestUser) -> DATestLLMProvider:
    return LLMProviderManager.create(user_performing_action=admin_user)


def test_soft_delete_chat_session(
    basic_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """
    Test soft deletion of a chat session.
    Soft delete should mark the chat as deleted but keep it in the database.
    """
    # Create a chat session
    test_chat_session = ChatSessionManager.create(
        persona_id=0,  # Use default persona
        description="Test chat session for soft deletion",
        user_performing_action=basic_user,
    )

    # Send a message to create some data
    response = ChatSessionManager.send_message(
        chat_session_id=test_chat_session.id,
        message=MESSAGE,
        user_performing_action=basic_user,
    )

    # Verify that the message was processed successfully
    assert response.error is None, "Chat response should not have an error"
    assert len(response.full_message) > 0, "Chat response should not be empty"

    # Verify that the chat session can be retrieved before deletion
    chat_history = ChatSessionManager.get_chat_history(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    )
    assert len(chat_history) > 0, "Chat session should have messages"

    # Test soft deletion of the chat session
    deletion_success = ChatSessionManager.soft_delete(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    )

    # Verify that the deletion was successful
    assert deletion_success, "Chat session soft deletion should succeed"

    # Verify that the chat session is soft deleted (marked as deleted but still in DB)
    assert ChatSessionManager.verify_soft_deleted(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    ), "Chat session should be soft deleted"

    # Verify that normal access is blocked
    assert ChatSessionManager.verify_deleted(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    ), "Chat session should not be accessible normally after soft delete"


def test_hard_delete_chat_session(
    basic_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """
    Test hard deletion of a chat session.
    Hard delete should completely remove the chat from the database.
    """
    # Create a chat session
    test_chat_session = ChatSessionManager.create(
        persona_id=0,  # Use default persona
        description="Test chat session for hard deletion",
        user_performing_action=basic_user,
    )

    # Send a message to create some data
    response = ChatSessionManager.send_message(
        chat_session_id=test_chat_session.id,
        message=MESSAGE,
        user_performing_action=basic_user,
    )

    # Verify that the message was processed successfully
    assert response.error is None, "Chat response should not have an error"
    assert len(response.full_message) > 0, "Chat response should not be empty"

    # Verify that the chat session can be retrieved before deletion
    chat_history = ChatSessionManager.get_chat_history(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    )
    assert len(chat_history) > 0, "Chat session should have messages"

    # Test hard deletion of the chat session
    deletion_success = ChatSessionManager.hard_delete(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    )

    # Verify that the deletion was successful
    assert deletion_success, "Chat session hard deletion should succeed"

    # Verify that the chat session is hard deleted (completely removed from DB)
    assert ChatSessionManager.verify_hard_deleted(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    ), "Chat session should be hard deleted"

    # Verify that the chat session is not accessible at all
    assert ChatSessionManager.verify_deleted(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    ), "Chat session should not be accessible after hard delete"

    # Verify it's not soft deleted (since it doesn't exist at all)
    assert not ChatSessionManager.verify_soft_deleted(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    ), "Hard deleted chat should not be found as soft deleted"


def test_multiple_soft_deletions(
    basic_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """
    Test multiple chat session soft deletions to ensure proper handling
    when there are multiple related records.
    """
    chat_sessions = []

    # Create multiple chat sessions with potential agent behavior
    for i in range(3):
        chat_session = ChatSessionManager.create(
            persona_id=0,
            description=f"Test chat session {i} for multi-soft-deletion",
            user_performing_action=basic_user,
        )

        # Send a message to create some data
        ChatSessionManager.send_message(
            chat_session_id=chat_session.id,
            message=f"Tell me about topic {i} with detailed analysis",
            user_performing_action=basic_user,
        )

        chat_sessions.append(chat_session)

    # Soft delete all chat sessions
    for chat_session in chat_sessions:
        deletion_success = ChatSessionManager.soft_delete(
            chat_session=chat_session,
            user_performing_action=basic_user,
        )
        assert deletion_success, f"Failed to soft delete chat {chat_session.id}"

    # Verify all chat sessions are soft deleted
    for chat_session in chat_sessions:
        assert ChatSessionManager.verify_soft_deleted(
            chat_session=chat_session,
            user_performing_action=basic_user,
        ), f"Chat {chat_session.id} should be soft deleted"

        assert ChatSessionManager.verify_deleted(
            chat_session=chat_session,
            user_performing_action=basic_user,
        ), f"Chat {chat_session.id} should not be accessible normally"


def test_multiple_hard_deletions_with_agent_data(
    basic_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """
    Test multiple chat session hard deletions to ensure CASCADE deletes work correctly
    when there are multiple related records.
    """
    chat_sessions = []

    # Create multiple chat sessions with potential agent behavior
    for i in range(3):
        chat_session = ChatSessionManager.create(
            persona_id=0,
            description=f"Test chat session {i} for multi-hard-deletion",
            user_performing_action=basic_user,
        )

        # Send a message to create some data
        ChatSessionManager.send_message(
            chat_session_id=chat_session.id,
            message=f"Tell me about topic {i} with detailed analysis",
            user_performing_action=basic_user,
        )

        chat_sessions.append(chat_session)

    # Hard delete all chat sessions
    for chat_session in chat_sessions:
        deletion_success = ChatSessionManager.hard_delete(
            chat_session=chat_session,
            user_performing_action=basic_user,
        )
        assert deletion_success, f"Failed to hard delete chat {chat_session.id}"

    # Verify all chat sessions are hard deleted
    for chat_session in chat_sessions:
        assert ChatSessionManager.verify_hard_deleted(
            chat_session=chat_session,
            user_performing_action=basic_user,
        ), f"Chat {chat_session.id} should be hard deleted"

        assert ChatSessionManager.verify_deleted(
            chat_session=chat_session,
            user_performing_action=basic_user,
        ), f"Chat {chat_session.id} should not be accessible"


def test_soft_vs_hard_delete_edge_cases(
    basic_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """
    Test edge cases for both soft and hard deletion to ensure robustness.
    """
    # Test 1: Soft delete a chat session with no messages
    empty_chat_session_soft = ChatSessionManager.create(
        persona_id=0,
        description="Empty chat session for soft delete",
        user_performing_action=basic_user,
    )

    # Soft delete without sending any messages
    deletion_success = ChatSessionManager.soft_delete(
        chat_session=empty_chat_session_soft,
        user_performing_action=basic_user,
    )
    assert deletion_success, "Empty chat session should be soft deletable"
    assert ChatSessionManager.verify_soft_deleted(
        chat_session=empty_chat_session_soft,
        user_performing_action=basic_user,
    ), "Empty chat session should be confirmed as soft deleted"

    # Test 2: Hard delete a chat session with no messages
    empty_chat_session_hard = ChatSessionManager.create(
        persona_id=0,
        description="Empty chat session for hard delete",
        user_performing_action=basic_user,
    )

    # Hard delete without sending any messages
    deletion_success = ChatSessionManager.hard_delete(
        chat_session=empty_chat_session_hard,
        user_performing_action=basic_user,
    )
    assert deletion_success, "Empty chat session should be hard deletable"
    assert ChatSessionManager.verify_hard_deleted(
        chat_session=empty_chat_session_hard,
        user_performing_action=basic_user,
    ), "Empty chat session should be confirmed as hard deleted"

    # Test 3: Soft delete a chat session with multiple messages
    multi_message_chat_soft = ChatSessionManager.create(
        persona_id=0,
        description="Multi-message chat session for soft delete",
        user_performing_action=basic_user,
    )

    # Send multiple messages to create more complex data
    for i in range(3):
        ChatSessionManager.send_message(
            chat_session_id=multi_message_chat_soft.id,
            message=f"Message {i}: Tell me about different aspects of this topic",
            user_performing_action=basic_user,
        )

    # Verify messages exist
    chat_history = ChatSessionManager.get_chat_history(
        chat_session=multi_message_chat_soft,
        user_performing_action=basic_user,
    )
    assert len(chat_history) >= 3, "Chat should have multiple messages"

    # Soft delete the chat with multiple messages
    deletion_success = ChatSessionManager.soft_delete(
        chat_session=multi_message_chat_soft,
        user_performing_action=basic_user,
    )
    assert deletion_success, "Multi-message chat session should be soft deletable"
    assert ChatSessionManager.verify_soft_deleted(
        chat_session=multi_message_chat_soft,
        user_performing_action=basic_user,
    ), "Multi-message chat session should be confirmed as soft deleted"

    # Test 4: Hard delete a chat session with multiple messages
    multi_message_chat_hard = ChatSessionManager.create(
        persona_id=0,
        description="Multi-message chat session for hard delete",
        user_performing_action=basic_user,
    )

    # Send multiple messages to create more complex data
    for i in range(3):
        ChatSessionManager.send_message(
            chat_session_id=multi_message_chat_hard.id,
            message=f"Message {i}: Tell me about different aspects of this topic",
            user_performing_action=basic_user,
        )

    # Verify messages exist
    chat_history = ChatSessionManager.get_chat_history(
        chat_session=multi_message_chat_hard,
        user_performing_action=basic_user,
    )
    assert len(chat_history) >= 3, "Chat should have multiple messages"

    # Hard delete the chat with multiple messages
    deletion_success = ChatSessionManager.hard_delete(
        chat_session=multi_message_chat_hard,
        user_performing_action=basic_user,
    )
    assert deletion_success, "Multi-message chat session should be hard deletable"
    assert ChatSessionManager.verify_hard_deleted(
        chat_session=multi_message_chat_hard,
        user_performing_action=basic_user,
    ), "Multi-message chat session should be confirmed as hard deleted"


================================================
FILE: backend/tests/integration/tests/chat/test_chat_session_access.py
================================================
from uuid import uuid4

import pytest
import requests
from requests import HTTPError

from onyx.auth.schemas import UserRole
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.user import build_email
from tests.integration.common_utils.managers.user import DEFAULT_PASSWORD
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.reset import reset_all
from tests.integration.common_utils.test_models import DATestUser


@pytest.fixture(scope="module", autouse=True)
def reset_for_module() -> None:
    """Reset all data once before running any tests in this module."""
    reset_all()


@pytest.fixture
def second_user(admin_user: DATestUser) -> DATestUser:  # noqa: ARG001
    # Ensure admin exists so this new user is created with BASIC role.
    try:
        return UserManager.create(name="second_basic_user")
    except HTTPError as e:
        response = e.response
        if response is None:
            raise
        if response.status_code not in (400, 409):
            raise
        try:
            payload = response.json()
        except ValueError:
            raise
        detail = payload.get("detail")
        if not _is_user_already_exists_detail(detail):
            raise
        print("Second basic user already exists; logging in instead.")
        return UserManager.login_as_user(
            DATestUser(
                id="",
                email=build_email("second_basic_user"),
                password=DEFAULT_PASSWORD,
                headers=GENERAL_HEADERS,
                role=UserRole.BASIC,
                is_active=True,
            )
        )


def _is_user_already_exists_detail(detail: object) -> bool:
    if isinstance(detail, str):
        normalized = detail.lower()
        return (
            "already exists" in normalized
            or "register_user_already_exists" in normalized
        )
    if isinstance(detail, dict):
        code = detail.get("code")
        if isinstance(code, str) and code.lower() == "register_user_already_exists":
            return True
        message = detail.get("message")
        if isinstance(message, str) and "already exists" in message.lower():
            return True
    return False


def _get_chat_session(
    chat_session_id: str,
    user: DATestUser,
    is_shared: bool | None = None,
    include_deleted: bool | None = None,
) -> requests.Response:
    params: dict[str, str] = {}
    if is_shared is not None:
        params["is_shared"] = str(is_shared).lower()
    if include_deleted is not None:
        params["include_deleted"] = str(include_deleted).lower()

    return requests.get(
        f"{API_SERVER_URL}/chat/get-chat-session/{chat_session_id}",
        params=params,
        headers=user.headers,
        cookies=user.cookies,
    )


def _set_sharing_status(
    chat_session_id: str, sharing_status: str, user: DATestUser
) -> requests.Response:
    return requests.patch(
        f"{API_SERVER_URL}/chat/chat-session/{chat_session_id}",
        json={"sharing_status": sharing_status},
        headers=user.headers,
        cookies=user.cookies,
    )


def test_private_chat_session_access(
    basic_user: DATestUser, second_user: DATestUser
) -> None:
    """Verify private sessions are only accessible by the owner and never via share link."""
    # Create a private chat session owned by basic_user.
    chat_session = ChatSessionManager.create(user_performing_action=basic_user)

    # Owner can access the private session normally.
    response = _get_chat_session(str(chat_session.id), basic_user)
    assert response.status_code == 200

    # Share link should be forbidden when the session is private.
    response = _get_chat_session(str(chat_session.id), basic_user, is_shared=True)
    assert response.status_code == 403

    # Other users cannot access private sessions directly.
    response = _get_chat_session(str(chat_session.id), second_user)
    assert response.status_code == 403

    # Other users also cannot access private sessions via share link.
    response = _get_chat_session(str(chat_session.id), second_user, is_shared=True)
    assert response.status_code == 403


def test_public_shared_chat_session_access(
    basic_user: DATestUser, second_user: DATestUser
) -> None:
    """Verify shared sessions are accessible only via share link for non-owners."""
    # Create a private session, then mark it public.
    chat_session = ChatSessionManager.create(user_performing_action=basic_user)

    response = _set_sharing_status(str(chat_session.id), "public", basic_user)
    assert response.status_code == 200

    # Owner can access normally.
    response = _get_chat_session(str(chat_session.id), basic_user)
    assert response.status_code == 200

    # Owner can also access via share link.
    response = _get_chat_session(str(chat_session.id), basic_user, is_shared=True)
    assert response.status_code == 200

    # Non-owner cannot access without share link.
    response = _get_chat_session(str(chat_session.id), second_user)
    assert response.status_code == 403

    # Non-owner can access with share link for public sessions.
    response = _get_chat_session(str(chat_session.id), second_user, is_shared=True)
    assert response.status_code == 200


def test_deleted_chat_session_access(
    basic_user: DATestUser, second_user: DATestUser
) -> None:
    """Verify deleted sessions return 404, with include_deleted gated by access checks."""
    # Create and soft-delete a session.
    chat_session = ChatSessionManager.create(user_performing_action=basic_user)

    deletion_success = ChatSessionManager.soft_delete(
        chat_session=chat_session, user_performing_action=basic_user
    )
    assert deletion_success is True

    # Deleted sessions are not accessible normally.
    response = _get_chat_session(str(chat_session.id), basic_user)
    assert response.status_code == 404

    # Owner can fetch deleted session only with include_deleted.
    response = _get_chat_session(str(chat_session.id), basic_user, include_deleted=True)
    assert response.status_code == 200
    assert response.json().get("deleted") is True

    # Non-owner should be blocked even with include_deleted.
    response = _get_chat_session(
        str(chat_session.id), second_user, include_deleted=True
    )
    assert response.status_code == 403


def test_chat_session_not_found_returns_404(basic_user: DATestUser) -> None:
    """Verify unknown IDs return 404."""
    response = _get_chat_session(str(uuid4()), basic_user)
    assert response.status_code == 404


================================================
FILE: backend/tests/integration/tests/chat_retention/test_chat_retention.py
================================================
import os
import time

import pytest
import requests

from onyx.db.chat import delete_chat_session
from onyx.db.chat import get_chat_sessions_older_than
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.settings import SettingsManager
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestSettings
from tests.integration.common_utils.test_models import DATestUser

RETENTION_SECONDS = 10


def _run_ttl_cleanup(retention_days: int) -> None:
    """Directly execute TTL cleanup logic, bypassing Celery task infrastructure."""
    with get_session_with_current_tenant() as db_session:
        old_chat_sessions = get_chat_sessions_older_than(retention_days, db_session)

    for user_id, session_id in old_chat_sessions:
        with get_session_with_current_tenant() as db_session:
            delete_chat_session(
                user_id,
                session_id,
                db_session,
                include_deleted=True,
                hard_delete=True,
            )


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Chat retention tests are enterprise only",
)
def test_chat_retention(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """Test that chat sessions are deleted after the retention period expires."""

    retention_days = RETENTION_SECONDS // 86400
    settings = DATestSettings(maximum_chat_retention_days=retention_days)
    SettingsManager.update_settings(settings, user_performing_action=admin_user)

    chat_session = ChatSessionManager.create(
        persona_id=0,
        description="Test chat retention",
        user_performing_action=admin_user,
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="This message should be deleted soon",
        user_performing_action=admin_user,
    )
    assert (
        response.error is None
    ), f"Chat response should not have an error: {response.error}"

    chat_history = ChatSessionManager.get_chat_history(
        chat_session=chat_session,
        user_performing_action=admin_user,
    )
    assert len(chat_history) > 0, "Chat session should have messages"

    # Wait for the retention period to elapse, then directly run TTL cleanup
    time.sleep(RETENTION_SECONDS + 2)
    _run_ttl_cleanup(retention_days)

    # Verify the chat session was deleted
    session_deleted = False
    try:
        chat_history = ChatSessionManager.get_chat_history(
            chat_session=chat_session,
            user_performing_action=admin_user,
        )
        session_deleted = len(chat_history) == 0
    except requests.exceptions.HTTPError as e:
        if e.response.status_code in (404, 400):
            session_deleted = True
        else:
            raise

    assert session_deleted, "Chat session was not deleted after retention period"


================================================
FILE: backend/tests/integration/tests/code_interpreter/conftest.py
================================================
from collections.abc import Generator

import pytest
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser

CODE_INTERPRETER_URL = f"{API_SERVER_URL}/admin/code-interpreter"


@pytest.fixture
def preserve_code_interpreter_state(
    admin_user: DATestUser,
) -> Generator[None, None, None]:
    """Capture the code interpreter enabled state before a test and restore it
    afterwards, so that tests that toggle the setting cannot leak state."""
    response = requests.get(
        CODE_INTERPRETER_URL,
        headers=admin_user.headers,
    )
    response.raise_for_status()
    initial_enabled = response.json()["enabled"]

    yield

    restore = requests.put(
        CODE_INTERPRETER_URL,
        json={"enabled": initial_enabled},
        headers=admin_user.headers,
    )
    restore.raise_for_status()


================================================
FILE: backend/tests/integration/tests/code_interpreter/test_code_interpreter_api.py
================================================
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser

CODE_INTERPRETER_URL = f"{API_SERVER_URL}/admin/code-interpreter"
CODE_INTERPRETER_HEALTH_URL = f"{CODE_INTERPRETER_URL}/health"


def test_get_code_interpreter_health_as_admin(
    admin_user: DATestUser,
) -> None:
    """Health endpoint should return a JSON object with a 'healthy' boolean."""
    response = requests.get(
        CODE_INTERPRETER_HEALTH_URL,
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    data = response.json()
    assert "healthy" in data
    assert isinstance(data["healthy"], bool)


def test_get_code_interpreter_status_as_admin(
    admin_user: DATestUser,
) -> None:
    """GET endpoint should return a JSON object with an 'enabled' boolean."""
    response = requests.get(
        CODE_INTERPRETER_URL,
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    data = response.json()
    assert "enabled" in data
    assert isinstance(data["enabled"], bool)


def test_update_code_interpreter_disable_and_enable(
    admin_user: DATestUser,
    preserve_code_interpreter_state: None,  # noqa: ARG001
) -> None:
    """PUT endpoint should update the enabled flag and persist across reads."""
    # Disable
    response = requests.put(
        CODE_INTERPRETER_URL,
        json={"enabled": False},
        headers=admin_user.headers,
    )
    assert response.status_code == 200

    # Verify disabled
    response = requests.get(
        CODE_INTERPRETER_URL,
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    assert response.json()["enabled"] is False

    # Re-enable
    response = requests.put(
        CODE_INTERPRETER_URL,
        json={"enabled": True},
        headers=admin_user.headers,
    )
    assert response.status_code == 200

    # Verify enabled
    response = requests.get(
        CODE_INTERPRETER_URL,
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    assert response.json()["enabled"] is True


def test_code_interpreter_endpoints_require_admin(
    basic_user: DATestUser,
) -> None:
    """All code interpreter endpoints should reject non-admin users."""
    health_response = requests.get(
        CODE_INTERPRETER_HEALTH_URL,
        headers=basic_user.headers,
    )
    assert health_response.status_code == 403

    get_response = requests.get(
        CODE_INTERPRETER_URL,
        headers=basic_user.headers,
    )
    assert get_response.status_code == 403

    put_response = requests.put(
        CODE_INTERPRETER_URL,
        json={"enabled": True},
        headers=basic_user.headers,
    )
    assert put_response.status_code == 403


================================================
FILE: backend/tests/integration/tests/connector/test_connector_creation.py
================================================
import os
from datetime import datetime
from datetime import timezone

from onyx.connectors.models import InputType
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


def test_connector_creation(reset: None) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # create connectors
    cc_pair_1 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    cc_pair_info = CCPairManager.get_single(
        cc_pair_1.id, user_performing_action=admin_user
    )
    assert cc_pair_info
    assert cc_pair_info.creator
    assert str(cc_pair_info.creator) == admin_user.id
    assert cc_pair_info.creator_email == admin_user.email


def test_overlapping_connector_creation(reset: None) -> None:  # noqa: ARG001
    """Tests that connectors indexing the same documents don't interfere with each other.
    A previous bug involved document by cc pair entries not being added for new connectors
    when the docs existed already via another connector and were up to date relative to the source.
    """
    admin_user: DATestUser = UserManager.create(name="admin_user")

    config = {
        "wiki_base": os.environ["CONFLUENCE_TEST_SPACE_URL"],
        "space": "DailyConne",
        "is_cloud": True,
    }

    credential = {
        "confluence_username": os.environ["CONFLUENCE_USER_NAME"],
        "confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"],
    }

    # store the time before we create the connector so that we know after
    # when the indexing should have started
    now = datetime.now(timezone.utc)

    # create connector
    cc_pair_1 = CCPairManager.create_from_scratch(
        source=DocumentSource.CONFLUENCE,
        connector_specific_config=config,
        credential_json=credential,
        user_performing_action=admin_user,
        input_type=InputType.POLL,
    )

    CCPairManager.wait_for_indexing_completion(
        cc_pair_1, now, timeout=300, user_performing_action=admin_user
    )

    now = datetime.now(timezone.utc)

    cc_pair_2 = CCPairManager.create_from_scratch(
        source=DocumentSource.CONFLUENCE,
        connector_specific_config=config,
        credential_json=credential,
        user_performing_action=admin_user,
        input_type=InputType.POLL,
    )

    CCPairManager.wait_for_indexing_completion(
        cc_pair_2, now, timeout=300, user_performing_action=admin_user
    )

    info_1 = CCPairManager.get_single(cc_pair_1.id, user_performing_action=admin_user)
    assert info_1

    info_2 = CCPairManager.get_single(cc_pair_2.id, user_performing_action=admin_user)
    assert info_2

    assert info_1.num_docs_indexed == info_2.num_docs_indexed


def test_connector_pause_while_indexing(reset: None) -> None:  # noqa: ARG001
    """Tests that we can pause a connector while indexing is in progress and that
    tasks end early or abort as a result.

    TODO: This does not specifically test for soft or hard termination code paths.
    Design specific tests for those use cases.
    """
    admin_user: DATestUser = UserManager.create(name="admin_user")

    config = {
        "wiki_base": os.environ["CONFLUENCE_TEST_SPACE_URL"],
        "is_cloud": True,
    }

    credential = {
        "confluence_username": os.environ["CONFLUENCE_USER_NAME"],
        "confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"],
    }

    # store the time before we create the connector so that we know after
    # when the indexing should have started
    datetime.now(timezone.utc)

    # create connector
    cc_pair_1 = CCPairManager.create_from_scratch(
        source=DocumentSource.CONFLUENCE,
        connector_specific_config=config,
        credential_json=credential,
        user_performing_action=admin_user,
        input_type=InputType.POLL,
    )

    # NOTE: A bit flaky in our CI due to varying indexing times. Empirically
    # 120s was not always enough to index 16 docs from Confluence so trying to
    # bump down the indexing progress to wait for to 4 docs from 16.
    CCPairManager.wait_for_indexing_in_progress(
        cc_pair_1, timeout=120, num_docs=4, user_performing_action=admin_user
    )

    CCPairManager.pause_cc_pair(cc_pair_1, user_performing_action=admin_user)

    CCPairManager.wait_for_indexing_inactive(
        cc_pair_1, timeout=60, user_performing_action=admin_user
    )
    return


================================================
FILE: backend/tests/integration/tests/connector/test_connector_deletion.py
================================================
"""
This file contains tests for the following:
- Ensuring deletion of a connector also:
    - deletes the documents in vespa for that connector
    - updates the document sets and user groups to remove the connector
- Ensure that deleting a connector that is part of an overlapping document set and/or user group works as expected
"""

import os
from uuid import uuid4

from sqlalchemy.orm import Session

from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import DocumentFailure
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from onyx.db.enums import IndexingStatus
from onyx.db.index_attempt import create_index_attempt
from onyx.db.index_attempt import create_index_attempt_error
from onyx.db.models import IndexAttempt
from onyx.db.search_settings import get_current_search_settings
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.constants import NUM_DOCS
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.document_set import DocumentSetManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import DATestUserGroup
from tests.integration.common_utils.vespa import vespa_fixture


def test_connector_deletion(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,
) -> None:
    user_group_1: DATestUserGroup
    user_group_2: DATestUserGroup

    is_ee = (
        os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() == "true"
    )

    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")
    # create api key
    api_key: DATestAPIKey = APIKeyManager.create(
        user_performing_action=admin_user,
    )

    # create connectors
    cc_pair_1 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )
    cc_pair_2 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    # seed documents
    cc_pair_1.documents = DocumentManager.seed_dummy_docs(
        cc_pair=cc_pair_1,
        num_docs=NUM_DOCS,
        api_key=api_key,
    )
    cc_pair_2.documents = DocumentManager.seed_dummy_docs(
        cc_pair=cc_pair_2,
        num_docs=NUM_DOCS,
        api_key=api_key,
    )

    # create document sets
    doc_set_1 = DocumentSetManager.create(
        name="Test Document Set 1",
        cc_pair_ids=[cc_pair_1.id],
        user_performing_action=admin_user,
    )
    doc_set_2 = DocumentSetManager.create(
        name="Test Document Set 2",
        cc_pair_ids=[cc_pair_1.id, cc_pair_2.id],
        user_performing_action=admin_user,
    )

    # wait for document sets to be synced
    DocumentSetManager.wait_for_sync(user_performing_action=admin_user)

    print("Document sets created and synced")

    if is_ee:
        # create user groups
        user_group_1 = UserGroupManager.create(
            cc_pair_ids=[cc_pair_1.id],
            user_performing_action=admin_user,
        )
        user_group_2 = UserGroupManager.create(
            cc_pair_ids=[cc_pair_1.id, cc_pair_2.id],
            user_performing_action=admin_user,
        )
        UserGroupManager.wait_for_sync(user_performing_action=admin_user)

    # inject a finished index attempt and index attempt error (exercises foreign key errors)
    with Session(get_sqlalchemy_engine()) as db_session:
        primary_search_settings = get_current_search_settings(db_session)
        new_attempt = IndexAttempt(
            connector_credential_pair_id=cc_pair_1.id,
            search_settings_id=primary_search_settings.id,
            from_beginning=False,
            status=IndexingStatus.COMPLETED_WITH_ERRORS,
        )
        db_session.add(new_attempt)
        db_session.commit()

        create_index_attempt_error(
            index_attempt_id=new_attempt.id,
            connector_credential_pair_id=cc_pair_1.id,
            failure=ConnectorFailure(
                failure_message="Test error",
                failed_document=DocumentFailure(
                    document_id=cc_pair_1.documents[0].id,
                    document_link=None,
                ),
                failed_entity=None,
            ),
            db_session=db_session,
        )

    # delete connector 1
    CCPairManager.pause_cc_pair(
        cc_pair=cc_pair_1,
        user_performing_action=admin_user,
    )
    CCPairManager.delete(
        cc_pair=cc_pair_1,
        user_performing_action=admin_user,
    )

    # inject an index attempt and index attempt error (exercises foreign key errors)
    with Session(get_sqlalchemy_engine()) as db_session:
        attempt_id = create_index_attempt(
            connector_credential_pair_id=cc_pair_1.id,
            search_settings_id=1,
            db_session=db_session,
        )
        create_index_attempt_error(
            index_attempt_id=attempt_id,
            connector_credential_pair_id=cc_pair_1.id,
            failure=ConnectorFailure(
                failure_message="Test error",
                failed_document=DocumentFailure(
                    document_id=cc_pair_1.documents[0].id,
                    document_link=None,
                ),
                failed_entity=None,
            ),
            db_session=db_session,
        )

    # Update local records to match the database for later comparison
    doc_set_1.cc_pair_ids = []
    doc_set_2.cc_pair_ids = [cc_pair_2.id]
    cc_pair_1.groups = []
    if is_ee:
        cc_pair_2.groups = [user_group_2.id]
    else:
        cc_pair_2.groups = []

    CCPairManager.wait_for_deletion_completion(
        cc_pair_id=cc_pair_1.id, user_performing_action=admin_user
    )

    # validate vespa documents
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_1,
        doc_set_names=[],
        group_names=[],
        doc_creating_user=admin_user,
        verify_deleted=True,
    )

    cc_pair_2_group_name_expected = []
    if is_ee:
        cc_pair_2_group_name_expected = [user_group_2.name]

    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_2,
        doc_set_names=[doc_set_2.name],
        group_names=cc_pair_2_group_name_expected,
        doc_creating_user=admin_user,
        verify_deleted=False,
    )

    # check that only connector 1 is deleted
    CCPairManager.verify(
        cc_pair=cc_pair_2,
        user_performing_action=admin_user,
    )

    # validate document sets
    DocumentSetManager.verify(
        document_set=doc_set_1,
        user_performing_action=admin_user,
    )
    DocumentSetManager.verify(
        document_set=doc_set_2,
        user_performing_action=admin_user,
    )

    if is_ee:
        user_group_1.cc_pair_ids = []
        user_group_2.cc_pair_ids = [cc_pair_2.id]

        # validate user groups
        UserGroupManager.verify(
            user_group=user_group_1,
            user_performing_action=admin_user,
        )
        UserGroupManager.verify(
            user_group=user_group_2,
            user_performing_action=admin_user,
        )


def test_connector_deletion_for_overlapping_connectors(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,
) -> None:
    """Checks to make sure that connectors with overlapping documents work properly. Specifically, that the overlapping
    document (1) still exists and (2) has the right document set / group post-deletion of one of the connectors.
    """
    user_group_1: DATestUserGroup
    user_group_2: DATestUserGroup

    is_ee = (
        os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() == "true"
    )

    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")
    # create api key
    api_key: DATestAPIKey = APIKeyManager.create(
        user_performing_action=admin_user,
    )

    # create connectors
    cc_pair_1 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )
    cc_pair_2 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    doc_ids = [str(uuid4())]
    cc_pair_1.documents = DocumentManager.seed_dummy_docs(
        cc_pair=cc_pair_1,
        document_ids=doc_ids,
        api_key=api_key,
    )
    cc_pair_2.documents = DocumentManager.seed_dummy_docs(
        cc_pair=cc_pair_2,
        document_ids=doc_ids,
        api_key=api_key,
    )

    # verify vespa document exists and that it is not in any document sets or groups
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_1,
        doc_set_names=[],
        group_names=[],
        doc_creating_user=admin_user,
    )
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_2,
        doc_set_names=[],
        group_names=[],
        doc_creating_user=admin_user,
    )

    # create document set
    doc_set_1 = DocumentSetManager.create(
        name="Test Document Set 1",
        cc_pair_ids=[cc_pair_1.id],
        user_performing_action=admin_user,
    )
    DocumentSetManager.wait_for_sync(
        document_sets_to_check=[doc_set_1],
        user_performing_action=admin_user,
    )

    print("Document set 1 created and synced")

    # verify vespa document is in the document set
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_1,
        doc_set_names=[doc_set_1.name],
        doc_creating_user=admin_user,
    )
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_2,
        doc_creating_user=admin_user,
    )

    if is_ee:
        # create a user group and attach it to connector 1
        user_group_1 = UserGroupManager.create(
            name="Test User Group 1",
            cc_pair_ids=[cc_pair_1.id],
            user_performing_action=admin_user,
        )
        UserGroupManager.wait_for_sync(
            user_groups_to_check=[user_group_1],
            user_performing_action=admin_user,
        )
        cc_pair_1.groups = [user_group_1.id]

        print("User group 1 created and synced")

        # create a user group and attach it to connector 2
        user_group_2 = UserGroupManager.create(
            name="Test User Group 2",
            cc_pair_ids=[cc_pair_2.id],
            user_performing_action=admin_user,
        )
        UserGroupManager.wait_for_sync(
            user_groups_to_check=[user_group_2],
            user_performing_action=admin_user,
        )
        cc_pair_2.groups = [user_group_2.id]

        print("User group 2 created and synced")

        # verify vespa document is in the user group
        DocumentManager.verify(
            vespa_client=vespa_client,
            cc_pair=cc_pair_1,
            group_names=[user_group_1.name, user_group_2.name],
            doc_creating_user=admin_user,
        )
        DocumentManager.verify(
            vespa_client=vespa_client,
            cc_pair=cc_pair_2,
            group_names=[user_group_1.name, user_group_2.name],
            doc_creating_user=admin_user,
        )

    # delete connector 1
    CCPairManager.pause_cc_pair(
        cc_pair=cc_pair_1,
        user_performing_action=admin_user,
    )
    CCPairManager.delete(
        cc_pair=cc_pair_1,
        user_performing_action=admin_user,
    )

    # wait for deletion to finish
    CCPairManager.wait_for_deletion_completion(
        cc_pair_id=cc_pair_1.id, user_performing_action=admin_user
    )

    print("Connector 1 deleted")

    # check that only connector 1 is deleted
    # TODO: check for the CC pair rather than the connector once the refactor is done
    CCPairManager.verify(
        cc_pair=cc_pair_1,
        verify_deleted=True,
        user_performing_action=admin_user,
    )
    CCPairManager.verify(
        cc_pair=cc_pair_2,
        user_performing_action=admin_user,
    )

    # verify the document is not in any document sets
    # verify the document is only in user group 2
    group_names_expected = []
    if is_ee:
        group_names_expected = [user_group_2.name]

    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_2,
        doc_set_names=[],
        group_names=group_names_expected,
        doc_creating_user=admin_user,
        verify_deleted=False,
    )


================================================
FILE: backend/tests/integration/tests/connector/test_last_indexed_time.py
================================================
"""
Integration tests for the "Last Indexed" time displayed on both the
per-connector detail page and the all-connectors listing page.

Expected behavior: "Last Indexed" = time_started of the most recent
successful index attempt for the cc pair, regardless of pagination.

Edge cases:
1. First page of index attempts is entirely errors — last_indexed should
   still reflect the older successful attempt beyond page 1.
2. Credential swap — successful attempts, then failures after a
   "credential change"; last_indexed should reflect the most recent
   successful attempt.
3. Mix of statuses — only the most recent successful attempt matters.
4. COMPLETED_WITH_ERRORS counts as a success for last_indexed purposes.
"""

from datetime import datetime
from datetime import timedelta
from datetime import timezone

from onyx.db.models import IndexingStatus
from onyx.server.documents.models import CCPairFullInfo
from onyx.server.documents.models import ConnectorIndexingStatusLite
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestUser


def _wait_for_real_success(
    cc_pair: DATestCCPair,
    admin: DATestUser,
) -> None:
    """Wait for the initial index attempt to complete successfully."""
    CCPairManager.wait_for_indexing_completion(
        cc_pair,
        after=datetime(2000, 1, 1, tzinfo=timezone.utc),
        user_performing_action=admin,
        timeout=120,
    )


def _get_detail(cc_pair_id: int, admin: DATestUser) -> CCPairFullInfo:
    result = CCPairManager.get_single(cc_pair_id, admin)
    assert result is not None
    return result


def _get_listing(cc_pair_id: int, admin: DATestUser) -> ConnectorIndexingStatusLite:
    result = CCPairManager.get_indexing_status_by_id(cc_pair_id, admin)
    assert result is not None
    return result


def test_last_indexed_first_page_all_errors(reset: None) -> None:  # noqa: ARG001
    """When the first page of index attempts is entirely errors but an
    older successful attempt exists, both the detail page and the listing
    page should still show the time of that successful attempt.

    The detail page UI uses page size 8. We insert 10 failed attempts
    more recent than the initial success to push the success off page 1.
    """
    admin = UserManager.create(name="admin_first_page_errors")
    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
    _wait_for_real_success(cc_pair, admin)

    # Baseline: last_success should be set from the initial successful run
    listing_before = _get_listing(cc_pair.id, admin)
    assert listing_before.last_success is not None

    # 10 recent failures push the success off page 1
    IndexAttemptManager.create_test_index_attempts(
        num_attempts=10,
        cc_pair_id=cc_pair.id,
        status=IndexingStatus.FAILED,
        error_msg="simulated failure",
        base_time=datetime.now(tz=timezone.utc),
    )

    detail = _get_detail(cc_pair.id, admin)
    listing = _get_listing(cc_pair.id, admin)

    assert (
        detail.last_indexed is not None
    ), "Detail page last_indexed is None even though a successful attempt exists"
    assert (
        listing.last_success is not None
    ), "Listing page last_success is None even though a successful attempt exists"

    # Both surfaces must agree
    assert detail.last_indexed == listing.last_success, (
        f"Detail last_indexed={detail.last_indexed} != "
        f"listing last_success={listing.last_success}"
    )


def test_last_indexed_credential_swap_scenario(reset: None) -> None:  # noqa: ARG001
    """Perform an actual credential swap: create connector + cred1 (cc_pair_1),
    wait for success, then associate a new cred2 with the same connector
    (cc_pair_2), wait for that to succeed, and inject failures on cc_pair_2.

    cc_pair_2's last_indexed must reflect cc_pair_2's own success, not
    cc_pair_1's older one. Both the detail page and listing page must agree.
    """
    admin = UserManager.create(name="admin_cred_swap")

    connector = ConnectorManager.create(user_performing_action=admin)
    cred1 = CredentialManager.create(user_performing_action=admin)
    cc_pair_1 = CCPairManager.create(
        connector_id=connector.id,
        credential_id=cred1.id,
        user_performing_action=admin,
    )
    _wait_for_real_success(cc_pair_1, admin)

    cred2 = CredentialManager.create(user_performing_action=admin, name="swapped-cred")
    cc_pair_2 = CCPairManager.create(
        connector_id=connector.id,
        credential_id=cred2.id,
        user_performing_action=admin,
    )
    _wait_for_real_success(cc_pair_2, admin)

    listing_after_swap = _get_listing(cc_pair_2.id, admin)
    assert listing_after_swap.last_success is not None

    IndexAttemptManager.create_test_index_attempts(
        num_attempts=10,
        cc_pair_id=cc_pair_2.id,
        status=IndexingStatus.FAILED,
        error_msg="credential expired",
        base_time=datetime.now(tz=timezone.utc),
    )

    detail = _get_detail(cc_pair_2.id, admin)
    listing = _get_listing(cc_pair_2.id, admin)

    assert detail.last_indexed is not None
    assert listing.last_success is not None

    assert detail.last_indexed == listing.last_success, (
        f"Detail last_indexed={detail.last_indexed} != "
        f"listing last_success={listing.last_success}"
    )


def test_last_indexed_mixed_statuses(reset: None) -> None:  # noqa: ARG001
    """Mix of in_progress, failed, and successful attempts. Only the most
    recent successful attempt's time matters."""
    admin = UserManager.create(name="admin_mixed")
    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
    _wait_for_real_success(cc_pair, admin)

    now = datetime.now(tz=timezone.utc)

    # Success 5 hours ago
    IndexAttemptManager.create_test_index_attempts(
        num_attempts=1,
        cc_pair_id=cc_pair.id,
        status=IndexingStatus.SUCCESS,
        base_time=now - timedelta(hours=5),
    )

    # Failures 3 hours ago
    IndexAttemptManager.create_test_index_attempts(
        num_attempts=3,
        cc_pair_id=cc_pair.id,
        status=IndexingStatus.FAILED,
        error_msg="transient failure",
        base_time=now - timedelta(hours=3),
    )

    # In-progress 1 hour ago
    IndexAttemptManager.create_test_index_attempts(
        num_attempts=1,
        cc_pair_id=cc_pair.id,
        status=IndexingStatus.IN_PROGRESS,
        base_time=now - timedelta(hours=1),
    )

    detail = _get_detail(cc_pair.id, admin)
    listing = _get_listing(cc_pair.id, admin)

    assert detail.last_indexed is not None
    assert listing.last_success is not None

    assert detail.last_indexed == listing.last_success, (
        f"Detail last_indexed={detail.last_indexed} != "
        f"listing last_success={listing.last_success}"
    )


def test_last_indexed_completed_with_errors(reset: None) -> None:  # noqa: ARG001
    """COMPLETED_WITH_ERRORS is treated as a successful attempt (matching
    IndexingStatus.is_successful()). When it is the most recent "success"
    and later attempts all failed, both surfaces should reflect its time."""
    admin = UserManager.create(name="admin_completed_errors")
    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
    _wait_for_real_success(cc_pair, admin)

    now = datetime.now(tz=timezone.utc)

    # COMPLETED_WITH_ERRORS 2 hours ago
    IndexAttemptManager.create_test_index_attempts(
        num_attempts=1,
        cc_pair_id=cc_pair.id,
        status=IndexingStatus.COMPLETED_WITH_ERRORS,
        base_time=now - timedelta(hours=2),
    )

    # 10 failures after — push everything else off page 1
    IndexAttemptManager.create_test_index_attempts(
        num_attempts=10,
        cc_pair_id=cc_pair.id,
        status=IndexingStatus.FAILED,
        error_msg="post-partial failure",
        base_time=now,
    )

    detail = _get_detail(cc_pair.id, admin)
    listing = _get_listing(cc_pair.id, admin)

    assert (
        detail.last_indexed is not None
    ), "COMPLETED_WITH_ERRORS should count as a success for last_indexed"
    assert (
        listing.last_success is not None
    ), "COMPLETED_WITH_ERRORS should count as a success for last_success"

    assert detail.last_indexed == listing.last_success, (
        f"Detail last_indexed={detail.last_indexed} != "
        f"listing last_success={listing.last_success}"
    )


================================================
FILE: backend/tests/integration/tests/discord_bot/test_discord_bot_api.py
================================================
"""Integration tests for Discord bot API endpoints.

These tests hit actual API endpoints via HTTP requests.
"""

import pytest
import requests

from onyx.db.discord_bot import get_discord_service_api_key
from onyx.db.discord_bot import get_or_create_discord_service_api_key
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from tests.integration.common_utils.managers.discord_bot import DiscordBotManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


class TestBotConfigEndpoints:
    """Tests for /manage/admin/discord-bot/config endpoints."""

    def test_get_bot_config_not_configured(self, reset: None) -> None:  # noqa: ARG002
        """GET /config returns configured=False when no config exists."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Ensure no config exists
        DiscordBotManager.delete_bot_config_if_exists(admin_user)

        config = DiscordBotManager.get_bot_config(admin_user)

        assert config["configured"] is False
        assert "created_at" not in config or config.get("created_at") is None

    def test_create_bot_config(self, reset: None) -> None:  # noqa: ARG002
        """POST /config creates a new bot config."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Ensure no config exists
        DiscordBotManager.delete_bot_config_if_exists(admin_user)

        config = DiscordBotManager.create_bot_config(
            bot_token="test_token_123",
            user_performing_action=admin_user,
        )

        assert config["configured"] is True
        assert "created_at" in config

        # Cleanup
        DiscordBotManager.delete_bot_config_if_exists(admin_user)

    def test_create_bot_config_already_exists(
        self,
        reset: None,  # noqa: ARG002
    ) -> None:
        """POST /config returns 409 if config already exists."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Ensure no config exists, then create one
        DiscordBotManager.delete_bot_config_if_exists(admin_user)
        DiscordBotManager.create_bot_config(
            bot_token="token1",
            user_performing_action=admin_user,
        )

        # Try to create another - should fail
        with pytest.raises(requests.HTTPError) as exc_info:
            DiscordBotManager.create_bot_config(
                bot_token="token2",
                user_performing_action=admin_user,
            )

        assert exc_info.value.response.status_code == 409

        # Cleanup
        DiscordBotManager.delete_bot_config_if_exists(admin_user)

    def test_delete_bot_config(self, reset: None) -> None:  # noqa: ARG002
        """DELETE /config removes the bot config."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Ensure no config exists, then create one
        DiscordBotManager.delete_bot_config_if_exists(admin_user)
        DiscordBotManager.create_bot_config(
            bot_token="test_token",
            user_performing_action=admin_user,
        )

        # Delete it
        result = DiscordBotManager.delete_bot_config(admin_user)
        assert result["deleted"] is True

        # Verify it's gone
        config = DiscordBotManager.get_bot_config(admin_user)
        assert config["configured"] is False

    def test_delete_bot_config_not_found(self, reset: None) -> None:  # noqa: ARG002
        """DELETE /config returns 404 if no config exists."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Ensure no config exists
        DiscordBotManager.delete_bot_config_if_exists(admin_user)

        # Try to delete - should fail
        with pytest.raises(requests.HTTPError) as exc_info:
            DiscordBotManager.delete_bot_config(admin_user)

        assert exc_info.value.response.status_code == 404


class TestGuildConfigEndpoints:
    """Tests for /manage/admin/discord-bot/guilds endpoints."""

    def test_create_guild_config(self, reset: None) -> None:  # noqa: ARG002
        """POST /guilds creates a new guild config with registration key."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        guild = DiscordBotManager.create_guild(admin_user)

        assert guild.id is not None
        assert guild.registration_key is not None
        assert guild.registration_key.startswith("discord_")

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)

    def test_list_guilds(self, reset: None) -> None:  # noqa: ARG002
        """GET /guilds returns all guild configs."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Create some guilds
        guild1 = DiscordBotManager.create_guild(admin_user)
        guild2 = DiscordBotManager.create_guild(admin_user)

        guilds = DiscordBotManager.list_guilds(admin_user)

        guild_ids = [g["id"] for g in guilds]
        assert guild1.id in guild_ids
        assert guild2.id in guild_ids

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild1.id, admin_user)
        DiscordBotManager.delete_guild_if_exists(guild2.id, admin_user)

    def test_get_guild_config(self, reset: None) -> None:  # noqa: ARG002
        """GET /guilds/{config_id} returns the specific guild config."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        guild = DiscordBotManager.create_guild(admin_user)

        fetched = DiscordBotManager.get_guild(guild.id, admin_user)

        assert fetched["id"] == guild.id
        assert fetched["enabled"] is True  # Default
        assert fetched["guild_id"] is None  # Not registered yet
        assert fetched["guild_name"] is None

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)

    def test_get_guild_config_not_found(self, reset: None) -> None:  # noqa: ARG002
        """GET /guilds/{config_id} returns 404 for non-existent guild."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        result = DiscordBotManager.get_guild_or_none(999999, admin_user)
        assert result is None

    def test_update_guild_config(self, reset: None) -> None:  # noqa: ARG002
        """PATCH /guilds/{config_id} updates the guild config."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        guild = DiscordBotManager.create_guild(admin_user)

        # Update enabled status
        updated = DiscordBotManager.update_guild(
            guild.id,
            admin_user,
            enabled=False,
        )

        assert updated["enabled"] is False

        # Verify persistence
        fetched = DiscordBotManager.get_guild(guild.id, admin_user)
        assert fetched["enabled"] is False

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)

    def test_delete_guild_config(self, reset: None) -> None:  # noqa: ARG002
        """DELETE /guilds/{config_id} removes the guild config."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        guild = DiscordBotManager.create_guild(admin_user)

        # Delete it
        result = DiscordBotManager.delete_guild(guild.id, admin_user)
        assert result["deleted"] is True

        # Verify it's gone
        assert DiscordBotManager.get_guild_or_none(guild.id, admin_user) is None

    def test_delete_guild_config_not_found(self, reset: None) -> None:  # noqa: ARG002
        """DELETE /guilds/{config_id} returns 404 for non-existent guild."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        with pytest.raises(requests.HTTPError) as exc_info:
            DiscordBotManager.delete_guild(999999, admin_user)

        assert exc_info.value.response.status_code == 404

    def test_registration_key_format(self, reset: None) -> None:  # noqa: ARG002
        """Registration key has proper format with tenant encoded."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        guild = DiscordBotManager.create_guild(admin_user)

        # Key should be: discord_{encoded_tenant}.{random}
        key = guild.registration_key
        assert key is not None
        assert key.startswith("discord_")

        # Should have two parts separated by dot
        key_body = key.removeprefix("discord_")
        parts = key_body.split(".", 1)
        assert len(parts) == 2

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)

    def test_each_registration_key_is_unique(self, reset: None) -> None:  # noqa: ARG002
        """Each created guild gets a unique registration key."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        guilds = [DiscordBotManager.create_guild(admin_user) for _ in range(5)]
        keys = [g.registration_key for g in guilds]

        assert len(set(keys)) == 5  # All unique

        # Cleanup
        for guild in guilds:
            DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)


class TestChannelConfigEndpoints:
    """Tests for /manage/admin/discord-bot/guilds/{id}/channels endpoints."""

    def test_list_channels_empty(self, reset: None) -> None:  # noqa: ARG002
        """GET /guilds/{id}/channels returns empty list when no channels exist."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Create a registered guild (has guild_id set)
        guild = DiscordBotManager.create_registered_guild_in_db(
            guild_id=111111111,
            guild_name="Test Guild",
        )

        channels = DiscordBotManager.list_channels(guild.id, admin_user)

        assert channels == []

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)

    def test_list_channels_with_data(self, reset: None) -> None:  # noqa: ARG002
        """GET /guilds/{id}/channels returns channel configs."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Create a registered guild (has guild_id set)
        guild = DiscordBotManager.create_registered_guild_in_db(
            guild_id=222222222,
            guild_name="Test Guild",
        )

        # Create test channels directly in DB
        channel1 = DiscordBotManager.create_test_channel_in_db(
            guild_config_id=guild.id,
            channel_id=123456789,
            channel_name="general",
        )
        channel2 = DiscordBotManager.create_test_channel_in_db(
            guild_config_id=guild.id,
            channel_id=987654321,
            channel_name="help",
            channel_type="forum",
        )

        channels = DiscordBotManager.list_channels(guild.id, admin_user)

        assert len(channels) == 2
        channel_ids = [c.id for c in channels]
        assert channel1.id in channel_ids
        assert channel2.id in channel_ids

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)

    def test_update_channel_enabled(self, reset: None) -> None:  # noqa: ARG002
        """PATCH /guilds/{id}/channels/{id} updates enabled status."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Create a registered guild (has guild_id set)
        guild = DiscordBotManager.create_registered_guild_in_db(
            guild_id=333333333,
            guild_name="Test Guild",
        )
        channel = DiscordBotManager.create_test_channel_in_db(
            guild_config_id=guild.id,
            channel_id=123456789,
            channel_name="general",
        )

        # Default is disabled
        assert channel.enabled is False

        # Enable the channel
        updated = DiscordBotManager.update_channel(
            guild.id,
            channel.id,
            admin_user,
            enabled=True,
        )

        assert updated.enabled is True

        # Verify persistence
        channels = DiscordBotManager.list_channels(guild.id, admin_user)
        found = next(c for c in channels if c.id == channel.id)
        assert found.enabled is True

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)

    def test_update_channel_thread_only_mode(self, reset: None) -> None:  # noqa: ARG002
        """PATCH /guilds/{id}/channels/{id} updates thread_only_mode."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Create a registered guild (has guild_id set)
        guild = DiscordBotManager.create_registered_guild_in_db(
            guild_id=444444444,
            guild_name="Test Guild",
        )
        channel = DiscordBotManager.create_test_channel_in_db(
            guild_config_id=guild.id,
            channel_id=123456789,
            channel_name="general",
        )

        # Default is False
        assert channel.thread_only_mode is False

        # Enable thread_only_mode
        updated = DiscordBotManager.update_channel(
            guild.id,
            channel.id,
            admin_user,
            thread_only_mode=True,
        )

        assert updated.thread_only_mode is True

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)

    def test_update_channel_require_bot_invocation(
        self,
        reset: None,  # noqa: ARG002
    ) -> None:
        """PATCH /guilds/{id}/channels/{id} updates require_bot_invocation."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Create a registered guild (has guild_id set)
        guild = DiscordBotManager.create_registered_guild_in_db(
            guild_id=555555555,
            guild_name="Test Guild",
        )
        channel = DiscordBotManager.create_test_channel_in_db(
            guild_config_id=guild.id,
            channel_id=123456789,
            channel_name="general",
        )

        # Default is True
        assert channel.require_bot_invocation is True

        # Disable require_bot_invocation
        updated = DiscordBotManager.update_channel(
            guild.id,
            channel.id,
            admin_user,
            require_bot_invocation=False,
        )

        assert updated.require_bot_invocation is False

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)

    def test_update_channel_not_found(self, reset: None) -> None:  # noqa: ARG002
        """PATCH /guilds/{id}/channels/{id} returns 404 for non-existent channel."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Create a registered guild (has guild_id set)
        guild = DiscordBotManager.create_registered_guild_in_db(
            guild_id=666666666,
            guild_name="Test Guild",
        )

        with pytest.raises(requests.HTTPError) as exc_info:
            DiscordBotManager.update_channel(
                guild.id,
                999999,
                admin_user,
                enabled=True,
            )

        assert exc_info.value.response.status_code == 404

        # Cleanup
        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)


class TestServiceApiKeyCleanup:
    """Tests for service API key cleanup when bot/guild configs are deleted."""

    def test_delete_bot_config_also_deletes_service_api_key(
        self,
        reset: None,  # noqa: ARG002
    ) -> None:
        """DELETE /config also deletes the service API key (self-hosted flow)."""
        admin_user: DATestUser = UserManager.create(name="admin_user")

        # Setup: create bot config via API
        DiscordBotManager.delete_bot_config_if_exists(admin_user)
        DiscordBotManager.create_bot_config(
            bot_token="test_token",
            user_performing_action=admin_user,
        )

        # Create service API key directly in DB (simulating bot registration)
        with get_session_with_current_tenant() as db_session:
            get_or_create_discord_service_api_key(db_session, "public")
            db_session.commit()

            # Verify it exists
            assert get_discord_service_api_key(db_session) is not None

        # Delete bot config via API
        result = DiscordBotManager.delete_bot_config(admin_user)
        assert result["deleted"] is True

        # Verify service API key was also deleted
        with get_session_with_current_tenant() as db_session:
            assert get_discord_service_api_key(db_session) is None


================================================
FILE: backend/tests/integration/tests/discord_bot/test_discord_bot_db.py
================================================
"""Integration tests for Discord bot database operations.

These tests verify CRUD operations for Discord bot models.
"""

from collections.abc import Generator

import pytest
from sqlalchemy.orm import Session

from onyx.db.discord_bot import bulk_create_channel_configs
from onyx.db.discord_bot import create_discord_bot_config
from onyx.db.discord_bot import create_guild_config
from onyx.db.discord_bot import delete_discord_bot_config
from onyx.db.discord_bot import delete_discord_service_api_key
from onyx.db.discord_bot import delete_guild_config
from onyx.db.discord_bot import get_channel_configs
from onyx.db.discord_bot import get_discord_bot_config
from onyx.db.discord_bot import get_discord_service_api_key
from onyx.db.discord_bot import get_guild_config_by_internal_id
from onyx.db.discord_bot import get_guild_config_by_registration_key
from onyx.db.discord_bot import get_guild_configs
from onyx.db.discord_bot import get_or_create_discord_service_api_key
from onyx.db.discord_bot import sync_channel_configs
from onyx.db.discord_bot import update_discord_channel_config
from onyx.db.discord_bot import update_guild_config
from onyx.db.models import Persona
from onyx.db.utils import DiscordChannelView
from onyx.server.manage.discord_bot.utils import generate_discord_registration_key


def _create_test_persona(db_session: Session, persona_id: int, name: str) -> Persona:
    """Create a minimal test persona."""
    persona = Persona(
        id=persona_id,
        name=name,
        description="Test persona for Discord bot tests",
        is_listed=True,
        is_featured=False,
        deleted=False,
        builtin_persona=False,
    )
    db_session.add(persona)
    db_session.flush()
    return persona


def _delete_test_persona(db_session: Session, persona_id: int) -> None:
    """Delete a test persona."""
    db_session.query(Persona).filter(Persona.id == persona_id).delete()
    db_session.flush()


class TestBotConfigAPI:
    """Tests for bot config API operations."""

    def test_create_bot_config(self, db_session: Session) -> None:
        """Create bot config succeeds with valid token."""
        # Clean up any existing config first
        delete_discord_bot_config(db_session)
        db_session.commit()

        config = create_discord_bot_config(db_session, bot_token="test_token_123")
        db_session.commit()

        assert config is not None
        assert config.bot_token is not None
        assert config.bot_token.get_value(apply_mask=False) == "test_token_123"

        # Cleanup
        delete_discord_bot_config(db_session)
        db_session.commit()

    def test_create_bot_config_already_exists(self, db_session: Session) -> None:
        """Creating config twice raises ValueError."""
        # Clean up first
        delete_discord_bot_config(db_session)
        db_session.commit()

        create_discord_bot_config(db_session, bot_token="token1")
        db_session.commit()

        with pytest.raises(ValueError):
            create_discord_bot_config(db_session, bot_token="token2")

        # Cleanup
        delete_discord_bot_config(db_session)
        db_session.commit()

    def test_get_bot_config(self, db_session: Session) -> None:
        """Get bot config returns config with masked token."""
        # Clean up first
        delete_discord_bot_config(db_session)
        db_session.commit()

        create_discord_bot_config(db_session, bot_token="my_secret_token")
        db_session.commit()

        config = get_discord_bot_config(db_session)

        assert config is not None
        # Token should be stored (we don't mask in DB, only API response)
        assert config.bot_token is not None

        # Cleanup
        delete_discord_bot_config(db_session)
        db_session.commit()

    def test_delete_bot_config(self, db_session: Session) -> None:
        """Delete bot config removes it from DB."""
        # Clean up first
        delete_discord_bot_config(db_session)
        db_session.commit()

        create_discord_bot_config(db_session, bot_token="token")
        db_session.commit()

        deleted = delete_discord_bot_config(db_session)
        db_session.commit()

        assert deleted is True
        assert get_discord_bot_config(db_session) is None

    def test_delete_bot_config_not_found(self, db_session: Session) -> None:
        """Delete when no config exists returns False."""
        # Ensure no config exists
        delete_discord_bot_config(db_session)
        db_session.commit()

        deleted = delete_discord_bot_config(db_session)
        assert deleted is False


class TestRegistrationKeyAPI:
    """Tests for registration key API operations."""

    def test_create_registration_key(self, db_session: Session) -> None:
        """Create registration key with proper format."""
        key = generate_discord_registration_key("test_tenant")

        config = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        assert config is not None
        assert config.registration_key == key
        assert key.startswith("discord_")
        assert "test_tenant" in key or "test%5Ftenant" in key

        # Cleanup
        delete_guild_config(db_session, config.id)
        db_session.commit()

    def test_registration_key_is_unique(
        self,
        db_session: Session,  # noqa: ARG002
    ) -> None:
        """Each generated key is unique."""
        keys = [generate_discord_registration_key("tenant") for _ in range(5)]
        assert len(set(keys)) == 5

    def test_delete_registration_key(self, db_session: Session) -> None:
        """Deleted key can no longer be used."""
        key = generate_discord_registration_key("tenant")
        config = create_guild_config(db_session, registration_key=key)
        db_session.commit()
        config_id = config.id

        # Delete
        deleted = delete_guild_config(db_session, config_id)
        db_session.commit()

        assert deleted is True

        # Should not find it anymore
        found = get_guild_config_by_registration_key(db_session, key)
        assert found is None


class TestGuildConfigAPI:
    """Tests for guild config API operations."""

    def test_list_guilds(self, db_session: Session) -> None:
        """List guilds returns all guild configs."""
        # Create some guild configs
        key1 = generate_discord_registration_key("t1")
        key2 = generate_discord_registration_key("t2")

        config1 = create_guild_config(db_session, registration_key=key1)
        config2 = create_guild_config(db_session, registration_key=key2)
        db_session.commit()

        configs = get_guild_configs(db_session)

        assert len(configs) >= 2

        # Cleanup
        delete_guild_config(db_session, config1.id)
        delete_guild_config(db_session, config2.id)
        db_session.commit()

    def test_get_guild_config(self, db_session: Session) -> None:
        """Get specific guild config by ID."""
        key = generate_discord_registration_key("tenant")
        config = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        found = get_guild_config_by_internal_id(db_session, config.id)

        assert found is not None
        assert found.id == config.id
        assert found.registration_key == key

        # Cleanup
        delete_guild_config(db_session, config.id)
        db_session.commit()

    def test_update_guild_enabled(self, db_session: Session) -> None:
        """Update guild enabled status."""
        key = generate_discord_registration_key("tenant")
        config = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        # Initially enabled is True by default
        assert config.enabled is True

        # Disable
        updated = update_guild_config(
            db_session, config, enabled=False, default_persona_id=None
        )
        db_session.commit()

        assert updated.enabled is False

        # Cleanup
        delete_guild_config(db_session, config.id)
        db_session.commit()

    def test_update_guild_persona(self, db_session: Session) -> None:
        """Update guild default persona."""
        # Create test persona first to satisfy foreign key constraint
        _create_test_persona(db_session, 5, "Test Persona 5")
        db_session.commit()

        key = generate_discord_registration_key("tenant")
        config = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        # Set persona
        updated = update_guild_config(
            db_session, config, enabled=True, default_persona_id=5
        )
        db_session.commit()

        assert updated.default_persona_id == 5

        # Cleanup
        delete_guild_config(db_session, config.id)
        _delete_test_persona(db_session, 5)
        db_session.commit()


class TestChannelConfigAPI:
    """Tests for channel config API operations."""

    def test_list_channels_for_guild(self, db_session: Session) -> None:
        """List channels returns all channel configs for guild."""
        key = generate_discord_registration_key("tenant")
        guild = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        # Create some channels
        channels = [
            DiscordChannelView(
                channel_id=111,
                channel_name="general",
                channel_type="text",
                is_private=False,
            ),
            DiscordChannelView(
                channel_id=222,
                channel_name="help",
                channel_type="text",
                is_private=False,
            ),
        ]
        bulk_create_channel_configs(db_session, guild.id, channels)
        db_session.commit()

        channel_configs = get_channel_configs(db_session, guild.id)

        assert len(channel_configs) == 2

        # Cleanup
        delete_guild_config(db_session, guild.id)
        db_session.commit()

    def test_update_channel_enabled(self, db_session: Session) -> None:
        """Update channel enabled status."""
        key = generate_discord_registration_key("tenant")
        guild = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        channels = [
            DiscordChannelView(
                channel_id=111,
                channel_name="general",
                channel_type="text",
                is_private=False,
            ),
        ]
        created = bulk_create_channel_configs(db_session, guild.id, channels)
        db_session.commit()

        # Channels are disabled by default
        assert created[0].enabled is False

        # Enable
        updated = update_discord_channel_config(
            db_session,
            created[0],
            channel_name="general",
            thread_only_mode=False,
            require_bot_invocation=True,
            enabled=True,
        )
        db_session.commit()

        assert updated.enabled is True

        # Cleanup
        delete_guild_config(db_session, guild.id)
        db_session.commit()

    def test_update_channel_thread_only_mode(self, db_session: Session) -> None:
        """Update channel thread_only_mode setting."""
        key = generate_discord_registration_key("tenant")
        guild = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        channels = [
            DiscordChannelView(
                channel_id=111,
                channel_name="general",
                channel_type="text",
                is_private=False,
            ),
        ]
        created = bulk_create_channel_configs(db_session, guild.id, channels)
        db_session.commit()

        # Update thread_only_mode
        updated = update_discord_channel_config(
            db_session,
            created[0],
            channel_name="general",
            thread_only_mode=True,
            require_bot_invocation=True,
            enabled=True,
        )
        db_session.commit()

        assert updated.thread_only_mode is True

        # Cleanup
        delete_guild_config(db_session, guild.id)
        db_session.commit()

    def test_sync_channels_adds_new(self, db_session: Session) -> None:
        """Sync channels adds new channels."""
        key = generate_discord_registration_key("tenant")
        guild = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        # Initial channels
        initial = [
            DiscordChannelView(
                channel_id=111,
                channel_name="general",
                channel_type="text",
                is_private=False,
            ),
        ]
        bulk_create_channel_configs(db_session, guild.id, initial)
        db_session.commit()

        # Sync with new channel
        current = [
            DiscordChannelView(
                channel_id=111,
                channel_name="general",
                channel_type="text",
                is_private=False,
            ),
            DiscordChannelView(
                channel_id=222,
                channel_name="new-channel",
                channel_type="text",
                is_private=False,
            ),
        ]
        added, removed, updated = sync_channel_configs(db_session, guild.id, current)
        db_session.commit()

        assert added == 1
        assert removed == 0

        # Cleanup
        delete_guild_config(db_session, guild.id)
        db_session.commit()

    def test_sync_channels_removes_deleted(self, db_session: Session) -> None:
        """Sync channels removes deleted channels."""
        key = generate_discord_registration_key("tenant")
        guild = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        # Initial channels
        initial = [
            DiscordChannelView(
                channel_id=111,
                channel_name="general",
                channel_type="text",
                is_private=False,
            ),
            DiscordChannelView(
                channel_id=222,
                channel_name="old-channel",
                channel_type="text",
                is_private=False,
            ),
        ]
        bulk_create_channel_configs(db_session, guild.id, initial)
        db_session.commit()

        # Sync with one channel removed
        current = [
            DiscordChannelView(
                channel_id=111,
                channel_name="general",
                channel_type="text",
                is_private=False,
            ),
        ]
        added, removed, updated = sync_channel_configs(db_session, guild.id, current)
        db_session.commit()

        assert added == 0
        assert removed == 1

        # Cleanup
        delete_guild_config(db_session, guild.id)
        db_session.commit()

    def test_sync_channels_updates_renamed(self, db_session: Session) -> None:
        """Sync channels updates renamed channels."""
        key = generate_discord_registration_key("tenant")
        guild = create_guild_config(db_session, registration_key=key)
        db_session.commit()

        # Initial channels
        initial = [
            DiscordChannelView(
                channel_id=111,
                channel_name="old-name",
                channel_type="text",
                is_private=False,
            ),
        ]
        bulk_create_channel_configs(db_session, guild.id, initial)
        db_session.commit()

        # Sync with renamed channel
        current = [
            DiscordChannelView(
                channel_id=111,
                channel_name="new-name",
                channel_type="text",
                is_private=False,
            ),
        ]
        added, removed, updated = sync_channel_configs(db_session, guild.id, current)
        db_session.commit()

        assert added == 0
        assert removed == 0
        assert updated == 1

        # Verify name was updated
        configs = get_channel_configs(db_session, guild.id)
        assert configs[0].channel_name == "new-name"

        # Cleanup
        delete_guild_config(db_session, guild.id)
        db_session.commit()


class TestPersonaConfigurationAPI:
    """Tests for persona configuration in API."""

    def test_guild_persona_used_in_api_call(self, db_session: Session) -> None:
        """Guild default_persona_id is used when no channel override."""
        # Create test persona first
        _create_test_persona(db_session, 42, "Test Persona 42")
        db_session.commit()

        key = generate_discord_registration_key("tenant")
        guild = create_guild_config(db_session, registration_key=key)
        update_guild_config(db_session, guild, enabled=True, default_persona_id=42)
        db_session.commit()

        # Verify persona is set
        config = get_guild_config_by_internal_id(db_session, guild.id)
        assert config is not None
        assert config.default_persona_id == 42

        # Cleanup
        delete_guild_config(db_session, guild.id)
        _delete_test_persona(db_session, 42)
        db_session.commit()

    def test_channel_persona_override_in_api_call(self, db_session: Session) -> None:
        """Channel persona_override_id takes precedence over guild default."""
        # Create test personas first
        _create_test_persona(db_session, 42, "Test Persona 42")
        _create_test_persona(db_session, 99, "Test Persona 99")
        db_session.commit()

        key = generate_discord_registration_key("tenant")
        guild = create_guild_config(db_session, registration_key=key)
        update_guild_config(db_session, guild, enabled=True, default_persona_id=42)
        db_session.commit()

        channels = [
            DiscordChannelView(
                channel_id=111,
                channel_name="general",
                channel_type="text",
                is_private=False,
            ),
        ]
        created = bulk_create_channel_configs(db_session, guild.id, channels)
        db_session.commit()

        # Set channel persona override
        updated = update_discord_channel_config(
            db_session,
            created[0],
            channel_name="general",
            thread_only_mode=False,
            require_bot_invocation=True,
            enabled=True,
            persona_override_id=99,  # Override!
        )
        db_session.commit()

        assert updated.persona_override_id == 99

        # Cleanup
        delete_guild_config(db_session, guild.id)
        _delete_test_persona(db_session, 42)
        _delete_test_persona(db_session, 99)
        db_session.commit()

    def test_no_persona_uses_default(self, db_session: Session) -> None:
        """Neither guild nor channel has persona - uses API default."""
        key = generate_discord_registration_key("tenant")
        guild = create_guild_config(db_session, registration_key=key)
        # No persona set
        db_session.commit()

        config = get_guild_config_by_internal_id(db_session, guild.id)
        assert config is not None
        assert config.default_persona_id is None

        # Cleanup
        delete_guild_config(db_session, guild.id)
        db_session.commit()


class TestServiceApiKeyAPI:
    """Tests for Discord service API key operations."""

    def test_create_service_api_key(self, db_session: Session) -> None:
        """Create service API key returns valid key."""
        # Clean up any existing key first
        delete_discord_service_api_key(db_session)
        db_session.commit()

        api_key = get_or_create_discord_service_api_key(db_session, "public")
        db_session.commit()

        assert api_key is not None
        assert len(api_key) > 0

        # Verify key was stored in database
        stored_key = get_discord_service_api_key(db_session)
        assert stored_key is not None

        # Cleanup
        delete_discord_service_api_key(db_session)
        db_session.commit()

    def test_get_or_create_returns_existing(self, db_session: Session) -> None:
        """get_or_create_discord_service_api_key regenerates key if exists."""
        # Clean up any existing key first
        delete_discord_service_api_key(db_session)
        db_session.commit()

        # Create first key
        key1 = get_or_create_discord_service_api_key(db_session, "public")
        db_session.commit()

        # Call again - should regenerate (per implementation, it regenerates to update cache)
        key2 = get_or_create_discord_service_api_key(db_session, "public")
        db_session.commit()

        # Keys should be different since it regenerates
        assert key1 != key2

        # But there should still be only one key in the database
        stored_key = get_discord_service_api_key(db_session)
        assert stored_key is not None

        # Cleanup
        delete_discord_service_api_key(db_session)
        db_session.commit()

    def test_delete_service_api_key(self, db_session: Session) -> None:
        """Delete service API key removes it from DB."""
        # Clean up any existing key first
        delete_discord_service_api_key(db_session)
        db_session.commit()

        # Create a key
        get_or_create_discord_service_api_key(db_session, "public")
        db_session.commit()

        # Delete it
        deleted = delete_discord_service_api_key(db_session)
        db_session.commit()

        assert deleted is True
        assert get_discord_service_api_key(db_session) is None

    def test_delete_service_api_key_not_found(self, db_session: Session) -> None:
        """Delete when no key exists returns False."""
        # Ensure no key exists
        delete_discord_service_api_key(db_session)
        db_session.commit()

        deleted = delete_discord_service_api_key(db_session)
        assert deleted is False


# Pytest fixture for db_session
@pytest.fixture
def db_session() -> Generator[Session, None, None]:
    """Create database session for tests."""
    from onyx.db.engine.sql_engine import get_session_with_current_tenant
    from onyx.db.engine.sql_engine import SqlEngine
    from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

    SqlEngine.init_engine(pool_size=10, max_overflow=5)

    token = CURRENT_TENANT_ID_CONTEXTVAR.set("public")
    try:
        with get_session_with_current_tenant() as session:
            yield session
    finally:
        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


================================================
FILE: backend/tests/integration/tests/document_set/test_syncing.py
================================================
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.constants import NUM_DOCS
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.document_set import DocumentSetManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture


def test_multiple_document_sets_syncing_same_connnector(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,
) -> None:
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # create api key
    api_key: DATestAPIKey = APIKeyManager.create(
        user_performing_action=admin_user,
    )

    # create connector
    cc_pair_1 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    # seed documents
    cc_pair_1.documents = DocumentManager.seed_dummy_docs(
        cc_pair=cc_pair_1,
        num_docs=NUM_DOCS,
        api_key=api_key,
    )

    # Create document sets
    doc_set_1 = DocumentSetManager.create(
        cc_pair_ids=[cc_pair_1.id],
        user_performing_action=admin_user,
    )
    doc_set_2 = DocumentSetManager.create(
        cc_pair_ids=[cc_pair_1.id],
        user_performing_action=admin_user,
    )

    DocumentSetManager.wait_for_sync(
        user_performing_action=admin_user,
    )

    DocumentSetManager.verify(
        document_set=doc_set_1,
        user_performing_action=admin_user,
    )
    DocumentSetManager.verify(
        document_set=doc_set_2,
        user_performing_action=admin_user,
    )

    # make sure documents are as expected
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_1,
        doc_set_names=[doc_set_1.name, doc_set_2.name],
        doc_creating_user=admin_user,
    )


def test_removing_connector(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,
) -> None:
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # create api key
    api_key: DATestAPIKey = APIKeyManager.create(
        user_performing_action=admin_user,
    )

    # create connectors
    cc_pair_1 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )
    cc_pair_2 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    # seed documents
    cc_pair_1.documents = DocumentManager.seed_dummy_docs(
        cc_pair=cc_pair_1,
        num_docs=NUM_DOCS,
        api_key=api_key,
    )

    cc_pair_2.documents = DocumentManager.seed_dummy_docs(
        cc_pair=cc_pair_2,
        num_docs=NUM_DOCS,
        api_key=api_key,
    )

    # Create document sets
    doc_set_1 = DocumentSetManager.create(
        cc_pair_ids=[cc_pair_1.id, cc_pair_2.id],
        user_performing_action=admin_user,
    )

    DocumentSetManager.wait_for_sync(
        user_performing_action=admin_user,
    )

    DocumentSetManager.verify(
        document_set=doc_set_1,
        user_performing_action=admin_user,
    )

    # make sure cc_pair_1 docs are doc_set_1 only
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_1,
        doc_set_names=[doc_set_1.name],
        doc_creating_user=admin_user,
    )

    # make sure cc_pair_2 docs are doc_set_1 only
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_2,
        doc_set_names=[doc_set_1.name],
        doc_creating_user=admin_user,
    )

    # remove cc_pair_2 from document set
    doc_set_1.cc_pair_ids = [cc_pair_1.id]
    DocumentSetManager.edit(
        doc_set_1,
        user_performing_action=admin_user,
    )

    DocumentSetManager.wait_for_sync(
        user_performing_action=admin_user,
    )

    # make sure cc_pair_1 docs are doc_set_1 only
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_1,
        doc_set_names=[doc_set_1.name],
        doc_creating_user=admin_user,
    )

    # make sure cc_pair_2 docs have no doc set
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_2,
        doc_set_names=[],
        doc_creating_user=admin_user,
    )


================================================
FILE: backend/tests/integration/tests/image_generation/test_image_generation_config.py
================================================
"""Integration tests for image generation config endpoints.

Tests cover CRUD operations for /admin/image-generation/config endpoints.
The /admin/image-generation/test endpoint is not tested as it makes real API calls.

Uses module-scoped fixtures to reset DB and create users once per module for faster execution.
"""

import pytest
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.image_generation import (
    ImageGenerationConfigManager,
)
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.reset import reset_all
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser


@pytest.fixture(scope="module")
def setup_image_generation_tests() -> tuple[DATestUser, DATestLLMProvider]:
    """Module-scoped fixture that runs once for all tests in this module.

    - Resets DB once at the start of the module
    - Creates admin user once
    - Creates LLM provider once (for clone-mode test)
    - Returns (admin_user, llm_provider) tuple for all tests to use
    """
    reset_all()
    admin_user = UserManager.create(name="admin_user")
    llm_provider = LLMProviderManager.create(user_performing_action=admin_user)
    return admin_user, llm_provider


def test_create_image_generation_config(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test creating an image generation config with new credentials."""
    admin_user, _ = setup_image_generation_tests

    config = ImageGenerationConfigManager.create(
        image_provider_id="test-openai-dalle",
        model_name="dall-e-3",
        provider="openai",
        api_key="sk-test-key-12345",
        is_default=False,
        user_performing_action=admin_user,
    )

    assert config.image_provider_id == "test-openai-dalle"
    assert config.model_name == "dall-e-3"
    assert config.is_default is False

    # Verify it exists in the list
    ImageGenerationConfigManager.verify(
        config=config,
        user_performing_action=admin_user,
    )


def test_create_image_generation_config_from_provider(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test creating an image generation config by cloning from an existing LLM provider."""
    admin_user, llm_provider = setup_image_generation_tests

    # Create image generation config from the provider
    config = ImageGenerationConfigManager.create_from_provider(
        source_llm_provider_id=llm_provider.id,
        image_provider_id="test-from-provider",
        model_name="gpt-image-1",
        is_default=True,
        user_performing_action=admin_user,
    )

    assert config.image_provider_id == "test-from-provider"
    assert config.model_name == "gpt-image-1"
    assert config.is_default is True

    # Verify it exists
    ImageGenerationConfigManager.verify(
        config=config,
        user_performing_action=admin_user,
    )


def test_create_duplicate_config_fails(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test that creating a config with an existing image_provider_id fails."""
    admin_user, _ = setup_image_generation_tests

    # Create first config
    ImageGenerationConfigManager.create(
        image_provider_id="duplicate-test-id",
        model_name="dall-e-3",
        provider="openai",
        api_key="sk-test-key-1",
        user_performing_action=admin_user,
    )

    # Try to create another with the same image_provider_id
    response = requests.post(
        f"{API_SERVER_URL}/admin/image-generation/config",
        json={
            "image_provider_id": "duplicate-test-id",
            "model_name": "gpt-image-1",
            "provider": "openai",
            "api_key": "sk-test-key-2",
        },
        headers=admin_user.headers,
    )

    assert response.status_code == 400
    assert "already exists" in response.json()["detail"]


def test_get_all_configs(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test getting all image generation configs."""
    admin_user, _ = setup_image_generation_tests

    # Create multiple configs
    config1 = ImageGenerationConfigManager.create(
        image_provider_id="config-1",
        model_name="dall-e-3",
        provider="openai",
        api_key="sk-key-1",
        user_performing_action=admin_user,
    )
    config2 = ImageGenerationConfigManager.create(
        image_provider_id="config-2",
        model_name="gpt-image-1",
        provider="openai",
        api_key="sk-key-2",
        user_performing_action=admin_user,
    )

    # Get all configs
    all_configs = ImageGenerationConfigManager.get_all(
        user_performing_action=admin_user
    )

    assert len(all_configs) >= 2
    config_ids = [c.image_provider_id for c in all_configs]
    assert config1.image_provider_id in config_ids
    assert config2.image_provider_id in config_ids


def test_get_config_credentials(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test getting credentials for an image generation config."""
    admin_user, _ = setup_image_generation_tests

    test_api_key = "sk-test-credentials-key-12345"
    config = ImageGenerationConfigManager.create(
        image_provider_id="credentials-test",
        model_name="dall-e-3",
        provider="openai",
        api_key=test_api_key,
        user_performing_action=admin_user,
    )

    # Get credentials
    credentials = ImageGenerationConfigManager.get_credentials(
        image_provider_id=config.image_provider_id,
        user_performing_action=admin_user,
    )

    # Credentials should contain the masked API key (first 4 + **** + last 4)
    assert credentials["api_key"] == "sk-t****2345"
    assert "api_base" in credentials
    assert "api_version" in credentials
    assert "deployment_name" in credentials


def test_get_credentials_not_found(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test getting credentials for a non-existent config returns 404."""
    admin_user, _ = setup_image_generation_tests

    response = requests.get(
        f"{API_SERVER_URL}/admin/image-generation/config/non-existent-id/credentials",
        headers=admin_user.headers,
    )

    assert response.status_code == 404


def test_update_config_direct_key_entry(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test updating an image generation config with new direct credentials."""
    admin_user, _ = setup_image_generation_tests

    # Create initial config
    config = ImageGenerationConfigManager.create(
        image_provider_id="update-direct-test",
        model_name="dall-e-3",
        provider="openai",
        api_key="sk-initial-key",
        user_performing_action=admin_user,
    )

    assert config.model_name == "dall-e-3"

    # Update with new credentials and model
    new_api_key = "sk-updated-key-12345"
    updated_config = ImageGenerationConfigManager.update(
        image_provider_id=config.image_provider_id,
        model_name="dall-e-3",
        provider="openai",
        api_key=new_api_key,
        user_performing_action=admin_user,
    )

    assert updated_config.image_provider_id == config.image_provider_id
    assert updated_config.model_name == "dall-e-3"

    # Verify credentials were updated (masked: first 4 + **** + last 4)
    credentials = ImageGenerationConfigManager.get_credentials(
        image_provider_id=config.image_provider_id,
        user_performing_action=admin_user,
    )
    assert credentials["api_key"] == "sk-u****2345"


def test_update_config_clone_mode(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test updating an image generation config by cloning from an LLM provider."""
    admin_user, llm_provider = setup_image_generation_tests

    # Create initial config with direct credentials
    config = ImageGenerationConfigManager.create(
        image_provider_id="update-clone-test",
        model_name="dall-e-3",
        provider="openai",
        api_key="sk-initial-direct-key",
        user_performing_action=admin_user,
    )

    assert config.model_name == "dall-e-3"

    # Update by cloning from LLM provider
    updated_config = ImageGenerationConfigManager.update(
        image_provider_id=config.image_provider_id,
        model_name="gpt-image-1",
        source_llm_provider_id=llm_provider.id,
        user_performing_action=admin_user,
    )

    assert updated_config.image_provider_id == config.image_provider_id
    assert updated_config.model_name == "gpt-image-1"

    # Verify config still exists and is accessible
    ImageGenerationConfigManager.verify(
        config=updated_config,
        user_performing_action=admin_user,
    )


def test_update_config_source_provider_not_found(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test that updating with non-existent source_llm_provider_id fails."""
    admin_user, _ = setup_image_generation_tests

    # Create initial config
    config = ImageGenerationConfigManager.create(
        image_provider_id="update-bad-source-test",
        model_name="dall-e-3",
        provider="openai",
        api_key="sk-initial-key",
        user_performing_action=admin_user,
    )

    # Try to update with non-existent source provider
    response = requests.put(
        f"{API_SERVER_URL}/admin/image-generation/config/{config.image_provider_id}",
        json={
            "model_name": "gpt-image-1",
            "source_llm_provider_id": 999999,
        },
        headers=admin_user.headers,
    )

    assert response.status_code == 404
    assert "not found" in response.json()["detail"]


def test_delete_config(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test deleting an image generation config."""
    admin_user, _ = setup_image_generation_tests

    # Create a config
    config = ImageGenerationConfigManager.create(
        image_provider_id="delete-test",
        model_name="dall-e-3",
        provider="openai",
        api_key="sk-delete-key",
        user_performing_action=admin_user,
    )

    # Verify it exists
    ImageGenerationConfigManager.verify(
        config=config,
        user_performing_action=admin_user,
    )

    # Delete it
    ImageGenerationConfigManager.delete(
        image_provider_id=config.image_provider_id,
        user_performing_action=admin_user,
    )

    # Verify it's deleted
    ImageGenerationConfigManager.verify(
        config=config,
        verify_deleted=True,
        user_performing_action=admin_user,
    )


def test_delete_config_not_found(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test deleting a non-existent config returns 404."""
    admin_user, _ = setup_image_generation_tests

    response = requests.delete(
        f"{API_SERVER_URL}/admin/image-generation/config/non-existent-id",
        headers=admin_user.headers,
    )

    assert response.status_code == 404


def test_set_default_config(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test setting a config as the default."""
    admin_user, _ = setup_image_generation_tests

    # Create a config that is not default
    config = ImageGenerationConfigManager.create(
        image_provider_id="default-test",
        model_name="dall-e-3",
        provider="openai",
        api_key="sk-test-key",
        is_default=False,
        user_performing_action=admin_user,
    )

    assert config.is_default is False

    # Set it as default
    ImageGenerationConfigManager.set_default(
        image_provider_id=config.image_provider_id,
        user_performing_action=admin_user,
    )

    # Verify it's now default
    all_configs = ImageGenerationConfigManager.get_all(
        user_performing_action=admin_user
    )
    updated_config = next(
        c for c in all_configs if c.image_provider_id == config.image_provider_id
    )
    assert updated_config.is_default is True


def test_set_default_clears_previous(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test that setting a new default clears the previous default."""
    admin_user, _ = setup_image_generation_tests

    # Create first config as default
    config1 = ImageGenerationConfigManager.create(
        image_provider_id="first-default",
        model_name="dall-e-3",
        provider="openai",
        api_key="sk-key-1",
        is_default=True,
        user_performing_action=admin_user,
    )

    # Create second config not as default
    config2 = ImageGenerationConfigManager.create(
        image_provider_id="second-default",
        model_name="gpt-image-1",
        provider="openai",
        api_key="sk-key-2",
        is_default=False,
        user_performing_action=admin_user,
    )

    # Verify first is default
    all_configs = ImageGenerationConfigManager.get_all(
        user_performing_action=admin_user
    )
    first = next(
        c for c in all_configs if c.image_provider_id == config1.image_provider_id
    )
    second = next(
        c for c in all_configs if c.image_provider_id == config2.image_provider_id
    )
    assert first.is_default is True
    assert second.is_default is False

    # Set second as default
    ImageGenerationConfigManager.set_default(
        image_provider_id=config2.image_provider_id,
        user_performing_action=admin_user,
    )

    # Verify second is now default and first is not
    all_configs = ImageGenerationConfigManager.get_all(
        user_performing_action=admin_user
    )
    first = next(
        c for c in all_configs if c.image_provider_id == config1.image_provider_id
    )
    second = next(
        c for c in all_configs if c.image_provider_id == config2.image_provider_id
    )
    assert first.is_default is False
    assert second.is_default is True


def test_set_default_not_found(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test setting a non-existent config as default returns 404."""
    admin_user, _ = setup_image_generation_tests

    response = requests.post(
        f"{API_SERVER_URL}/admin/image-generation/config/non-existent-id/default",
        headers=admin_user.headers,
    )

    assert response.status_code == 404


def test_create_config_missing_credentials(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test that creating a config without credentials fails."""
    admin_user, _ = setup_image_generation_tests

    # Try to create without api_key/provider or source_llm_provider_id
    response = requests.post(
        f"{API_SERVER_URL}/admin/image-generation/config",
        json={
            "image_provider_id": "no-creds-test",
            "model_name": "dall-e-3",
        },
        headers=admin_user.headers,
    )

    assert response.status_code == 400
    assert "No provider or source llm provided" in response.json()["detail"]


def test_create_config_source_provider_not_found(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """Test creating a config with non-existent source_llm_provider_id fails."""
    admin_user, _ = setup_image_generation_tests

    response = requests.post(
        f"{API_SERVER_URL}/admin/image-generation/config",
        json={
            "image_provider_id": "bad-source-test",
            "model_name": "dall-e-3",
            "source_llm_provider_id": 999999,  # Non-existent ID
        },
        headers=admin_user.headers,
    )

    assert response.status_code == 404
    assert "not found" in response.json()["detail"]


================================================
FILE: backend/tests/integration/tests/image_generation/test_image_generation_tool_visibility.py
================================================
"""Integration tests to check broader image generation config flow endpoints."""

import pytest

from onyx.tools.tool_implementations.images.image_generation_tool import (
    ImageGenerationTool,
)
from tests.integration.common_utils.managers.image_generation import (
    ImageGenerationConfigManager,
)
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.tool import ToolManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.reset import reset_all
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser

IMAGE_GENERATION_TOOL_NAME = ImageGenerationTool.NAME


@pytest.fixture(scope="module")
def setup_image_generation_tests() -> tuple[DATestUser, DATestLLMProvider]:
    """Module-scoped fixture that runs once for all tests in this module.

    - Resets DB once at the start of the module
    - Creates admin user once
    - Creates LLM provider once (for clone-mode test)
    - Returns (admin_user, llm_provider) tuple for all tests to use
    """
    reset_all()
    admin_user = UserManager.create(name="admin_user")
    llm_provider = LLMProviderManager.create(user_performing_action=admin_user)
    return admin_user, llm_provider


def test_vertex_creds_upload_image_tool_visibility(
    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],
) -> None:
    """
    Tests the following scenario:
    1. No image model added so tool not visible
    2. Vertex AI creds uploaded
    3. Image model added so tool visible
    """
    admin_user, _ = setup_image_generation_tests

    # 1. Check the tools and check that image generation tool is not visible yet
    tools = ToolManager.list_tools(user_performing_action=admin_user)
    assert not any(tool.name == IMAGE_GENERATION_TOOL_NAME for tool in tools)

    # 2. Upload vertex ai credentials
    config = ImageGenerationConfigManager.create(
        image_provider_id="gemini-2.5-flash-image",
        model_name="gemini-2.5-flash-image",
        provider="vertex_ai",
        custom_config={
            "vertex_credentials": {
                "type": "service_account",
                "project_id": "test-project-id",
                "private_key_id": "test-private-key-id",
                "private_key": "test-private-key",
                # ... Other random fields that we dont care about
            },
            "vertex_location": "test-location",
        },
        user_performing_action=admin_user,
        is_default=True,
    )

    assert config.image_provider_id == "gemini-2.5-flash-image"
    assert config.model_name == "gemini-2.5-flash-image"

    # 3. Check that the tool is visible
    tools = ToolManager.list_tools(user_performing_action=admin_user)
    assert any(tool.name == IMAGE_GENERATION_TOOL_NAME for tool in tools)


================================================
FILE: backend/tests/integration/tests/image_indexing/test_indexing_images.py
================================================
import os
from datetime import datetime
from datetime import timezone

import pytest

from onyx.connectors.models import InputType
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.file import FileManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.settings import SettingsManager
from tests.integration.common_utils.test_models import DATestSettings
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture

FILE_NAME = "Sample.pdf"
FILE_PATH = "tests/integration/common_utils/test_files"
DOCX_FILE_NAME = "three_images.docx"


def test_image_indexing(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    vespa_client: vespa_fixture,
) -> None:
    os.makedirs(FILE_PATH, exist_ok=True)
    test_file_path = os.path.join(FILE_PATH, FILE_NAME)

    # Use FileManager to upload the test file
    upload_response = FileManager.upload_file_for_connector(
        file_path=test_file_path, file_name=FILE_NAME, user_performing_action=admin_user
    )

    LLMProviderManager.create(
        name="test_llm",
        user_performing_action=admin_user,
    )

    SettingsManager.update_settings(
        DATestSettings(
            search_time_image_analysis_enabled=True,
            image_extraction_and_analysis_enabled=True,
        ),
        user_performing_action=admin_user,
    )

    file_paths = upload_response.file_paths

    if not file_paths:
        pytest.fail("File upload failed - no file paths returned")

    # Create a dummy credential for the file connector
    credential = CredentialManager.create(
        source=DocumentSource.FILE,
        credential_json={},
        user_performing_action=admin_user,
    )

    # Create the connector
    connector_name = f"FileConnector-{int(datetime.now().timestamp())}"
    connector = ConnectorManager.create(
        name=connector_name,
        source=DocumentSource.FILE,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={
            "file_locations": file_paths,
            "file_names": [FILE_NAME],
            "zip_metadata_file_id": None,
        },
        access_type=AccessType.PUBLIC,
        groups=[],
        user_performing_action=admin_user,
    )

    # Link the credential to the connector
    cc_pair = CCPairManager.create(
        credential_id=credential.id,
        connector_id=connector.id,
        access_type=AccessType.PUBLIC,
        user_performing_action=admin_user,
    )

    # Explicitly run the connector to start indexing
    CCPairManager.run_once(
        cc_pair=cc_pair,
        from_beginning=True,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=datetime.now(timezone.utc),
        timeout=300,
        user_performing_action=admin_user,
    )

    with get_session_with_current_tenant() as db_session:
        # really gets the chunks from Vespa, which is why there are two;
        # one for the raw text and one for the summarized image.
        documents = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )

        assert len(documents) == 2
        for document in documents:
            if "These  are  Johns  dogs" in document.content:
                assert document.image_file_id is None
            else:
                assert document.image_file_id is not None
                assert file_paths[0] in document.image_file_id


def test_docx_image_indexing(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    vespa_client: vespa_fixture,
) -> None:
    """Test that images from docx files are correctly extracted and indexed."""
    os.makedirs(FILE_PATH, exist_ok=True)
    test_file_path = os.path.join(FILE_PATH, DOCX_FILE_NAME)

    # Use FileManager to upload the test file
    upload_response = FileManager.upload_file_for_connector(
        file_path=test_file_path,
        file_name=DOCX_FILE_NAME,
        user_performing_action=admin_user,
    )

    LLMProviderManager.create(
        name="test_llm_docx",
        user_performing_action=admin_user,
    )

    SettingsManager.update_settings(
        DATestSettings(
            search_time_image_analysis_enabled=True,
            image_extraction_and_analysis_enabled=True,
        ),
        user_performing_action=admin_user,
    )

    file_paths = upload_response.file_paths

    if not file_paths:
        pytest.fail("File upload failed - no file paths returned")

    # Create a dummy credential for the file connector
    credential = CredentialManager.create(
        source=DocumentSource.FILE,
        credential_json={},
        user_performing_action=admin_user,
    )

    # Create the connector
    connector_name = f"DocxFileConnector-{int(datetime.now().timestamp())}"
    connector = ConnectorManager.create(
        name=connector_name,
        source=DocumentSource.FILE,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={
            "file_locations": file_paths,
            "file_names": [DOCX_FILE_NAME],
            "zip_metadata_file_id": None,
        },
        access_type=AccessType.PUBLIC,
        groups=[],
        user_performing_action=admin_user,
    )

    # Link the credential to the connector
    cc_pair = CCPairManager.create(
        credential_id=credential.id,
        connector_id=connector.id,
        access_type=AccessType.PUBLIC,
        user_performing_action=admin_user,
    )

    # Explicitly run the connector to start indexing
    CCPairManager.run_once(
        cc_pair=cc_pair,
        from_beginning=True,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=datetime.now(timezone.utc),
        timeout=300,
        user_performing_action=admin_user,
    )

    with get_session_with_current_tenant() as db_session:
        # Fetch documents from Vespa - expect text content plus 3 images
        documents = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )

        # Should have documents for text content plus 3 images
        assert (
            len(documents) >= 3
        ), f"Expected at least 3 documents (3 images), got {len(documents)}"

        # Count documents with images
        image_documents = [doc for doc in documents if doc.image_file_id is not None]
        text_documents = [doc for doc in documents if doc.image_file_id is None]

        assert (
            len(image_documents) == 3
        ), f"Expected exactly 3 image documents, got {len(image_documents)}"
        assert (
            len(text_documents) >= 1
        ), f"Expected at least 1 text document, got {len(text_documents)}"

        # Verify each image document has a valid image_file_id pointing to our uploaded file
        for image_doc in image_documents:
            assert file_paths[0] in (
                image_doc.image_file_id or ""
            ), f"Image document should reference uploaded file: {image_doc.image_file_id}"


================================================
FILE: backend/tests/integration/tests/index_attempt/test_index_attempt_pagination.py
================================================
import time
from datetime import datetime

from onyx.db.models import IndexingStatus
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


def _verify_index_attempt_pagination(
    cc_pair_id: int,
    index_attempt_ids: list[int],
    user_performing_action: DATestUser,
    page_size: int = 5,
) -> None:
    retrieved_attempts: list[int] = []
    last_time_started = None  # Track the last time_started seen

    for i in range(0, len(index_attempt_ids), page_size):
        paginated_result = IndexAttemptManager.get_index_attempt_page(
            cc_pair_id=cc_pair_id,
            page=(i // page_size),
            page_size=page_size,
            user_performing_action=user_performing_action,
        )

        # Verify that the total items is equal to the length of the index attempts list
        assert paginated_result.total_items == len(index_attempt_ids)
        # Verify that the number of items in the page is equal to the page size
        assert len(paginated_result.items) == min(page_size, len(index_attempt_ids) - i)

        # Verify time ordering within the page (descending order)
        for attempt in paginated_result.items:
            if last_time_started is not None:
                assert attempt.time_started is not None
                assert (
                    attempt.time_started <= last_time_started
                ), "Index attempts not in descending time order"
            last_time_started = attempt.time_started

        # Add the retrieved index attempts to the list of retrieved attempts
        retrieved_attempts.extend([attempt.id for attempt in paginated_result.items])

    # Create a set of all the expected index attempt IDs
    all_expected_attempts = set(index_attempt_ids)
    # Create a set of all the retrieved index attempt IDs
    all_retrieved_attempts = set(retrieved_attempts)

    # Verify that the set of retrieved attempts is equal to the set of expected attempts
    assert all_expected_attempts == all_retrieved_attempts


def test_index_attempt_pagination(reset: None) -> None:  # noqa: ARG001
    MAX_WAIT = 60
    all_attempt_ids: list[int] = []

    # Create an admin user to perform actions
    user_performing_action: DATestUser = UserManager.create(
        name="admin_performing_action",
    )

    # Create a CC pair to attach index attempts to
    cc_pair = CCPairManager.create_from_scratch(
        user_performing_action=user_performing_action,
    )

    # Creating a CC pair will create an index attempt as well. wait for it.
    start = time.monotonic()
    while True:
        paginated_result = IndexAttemptManager.get_index_attempt_page(
            cc_pair_id=cc_pair.id,
            page=0,
            page_size=5,
            user_performing_action=user_performing_action,
        )

        if paginated_result.total_items == 1:
            all_attempt_ids.append(paginated_result.items[0].id)
            print("Initial index attempt from cc_pair creation detected. Continuing...")
            break

        elapsed = time.monotonic() - start
        if elapsed > MAX_WAIT:
            raise TimeoutError(
                f"Initial index attempt: Not detected within {MAX_WAIT} seconds."
            )

        print(
            f"Waiting for initial index attempt: elapsed={elapsed:.2f} timeout={MAX_WAIT}"
        )
        time.sleep(1)

    # Create 299 successful index attempts (for 300 total)
    base_time = datetime.now()
    generated_attempts = IndexAttemptManager.create_test_index_attempts(
        num_attempts=299,
        cc_pair_id=cc_pair.id,
        status=IndexingStatus.SUCCESS,
        base_time=base_time,
    )

    for attempt in generated_attempts:
        all_attempt_ids.append(attempt.id)

    # Verify basic pagination with different page sizes
    print("Verifying basic pagination with page size 5")
    _verify_index_attempt_pagination(
        cc_pair_id=cc_pair.id,
        index_attempt_ids=all_attempt_ids,
        page_size=5,
        user_performing_action=user_performing_action,
    )

    # Test with a larger page size
    print("Verifying pagination with page size 100")
    _verify_index_attempt_pagination(
        cc_pair_id=cc_pair.id,
        index_attempt_ids=all_attempt_ids,
        page_size=100,
        user_performing_action=user_performing_action,
    )


================================================
FILE: backend/tests/integration/tests/indexing/conftest.py
================================================
import httpx
import pytest

from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST
from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT


@pytest.fixture
def mock_server_client() -> httpx.Client:
    print(
        f"Initializing mock server client with host: {MOCK_CONNECTOR_SERVER_HOST} and port: {MOCK_CONNECTOR_SERVER_PORT}"
    )
    return httpx.Client(
        base_url=f"http://{MOCK_CONNECTOR_SERVER_HOST}:{MOCK_CONNECTOR_SERVER_PORT}",
        timeout=5.0,
    )


================================================
FILE: backend/tests/integration/tests/indexing/file_connector/test_file_connector_zip_metadata.py
================================================
import json
import os
from datetime import datetime
from datetime import timezone

import pytest

from onyx.connectors.models import InputType
from onyx.db.document import get_documents_for_cc_pair
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.file import FileManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture


# This is a placeholder - you'll need to create this zip file with actual test files
TEST_FILES_BASE = "tests/integration/tests/indexing/file_connector/test_files"
TEST_META_ZIP_PATH = f"{TEST_FILES_BASE}/with_meta.zip"
TEST_NO_META_ZIP_PATH = f"{TEST_FILES_BASE}/without_meta.zip"
TEST_METADATA_FILE = f"{TEST_FILES_BASE}/.onyx_metadata.json"


@pytest.mark.parametrize(
    "zip_path, has_metadata",
    [
        (TEST_META_ZIP_PATH, True),
        (TEST_NO_META_ZIP_PATH, False),
    ],
)
def test_zip_metadata_handling(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,  # noqa: ARG001
    zip_path: str,
    has_metadata: bool,
) -> None:
    before = datetime.now(timezone.utc)
    # Create an admin user
    admin_user: DATestUser = UserManager.create(
        email="admin@example.com",
    )

    # Upload the test zip file (simulate this happening from frontend)
    upload_response = FileManager.upload_file_for_connector(
        file_path=zip_path,
        file_name=os.path.basename(zip_path),
        user_performing_action=admin_user,
        content_type="application/zip",
    )

    file_paths = upload_response.file_paths
    assert file_paths, "File upload failed - no file paths returned"
    if has_metadata:
        zip_metadata_file_id = upload_response.zip_metadata_file_id
        assert zip_metadata_file_id, "Metadata file ID should be present"
    else:
        zip_metadata_file_id = None

    # Create a dummy credential for the file connector
    credential = CredentialManager.create(
        source=DocumentSource.FILE,
        credential_json={},
        user_performing_action=admin_user,
    )

    # Create the connector
    connector_name = f"FileConnector-{int(datetime.now().timestamp())}"
    connector = ConnectorManager.create(
        name=connector_name,
        source=DocumentSource.FILE,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={
            "file_locations": file_paths,
            "file_names": [os.path.basename(file_path) for file_path in file_paths],
            "zip_metadata_file_id": zip_metadata_file_id,
        },
        access_type=AccessType.PUBLIC,
        groups=[],
        user_performing_action=admin_user,
    )

    # Link the credential to the connector
    cc_pair = CCPairManager.create(
        credential_id=credential.id,
        connector_id=connector.id,
        access_type=AccessType.PUBLIC,
        user_performing_action=admin_user,
    )

    # Run the connector to index the files
    CCPairManager.run_once(
        cc_pair, from_beginning=True, user_performing_action=admin_user
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair, after=before, user_performing_action=admin_user
    )

    # Get the indexed documents
    with get_session_with_current_tenant() as db_session:
        documents = get_documents_for_cc_pair(db_session, cc_pair.id)

    # Expected metadata from the .onyx_metadata.json file
    with open(TEST_METADATA_FILE, "r") as f:
        expected_metadata = json.load(f)

    # Verify each document has the correct metadata
    for doc in documents:
        filename = doc.semantic_id
        if filename in expected_metadata:
            expected = expected_metadata[filename]
            assert (
                doc.semantic_id == expected["display_name"]
            ), f"Display name mismatch for {filename}"
            assert doc.link == expected["link"], f"Link mismatch for {filename}"


================================================
FILE: backend/tests/integration/tests/indexing/file_connector/test_files/.onyx_metadata.json
================================================
[
    {
        "filename": "sample1.txt",
        "link": "https://www.google.com",
        "file_display_name": "Basically Google",
        "primary_owners": ["evan@onyx.app"],
        "status": "bingle bongle"
    },
    {
        "filename": "sample2.txt",
        "link": "https://www.youtube.com",
        "file_display_name": "Pretty much youtube",
        "primary_owners": ["chris@onyx.app"],
        "status": "not bingle bongle"
    }
]

================================================
FILE: backend/tests/integration/tests/indexing/file_connector/test_files/sample1.txt
================================================
The following contains some excerpts from our docs.

The File Connector indexes user uploaded files. Currently supports .txt, .pdf, .docx, .pptx, .xlsx, .csv, .md, .mdx, .conf, .log, .json, .tsv, .xml, .yml, .yaml, .eml, and .epub files. 
You can also upload a .zip containing these files - If there are other file types in the zip, the other file types are ignored. 
There is also an optional metadata line that supports links, document owners, and time updated as metadata for Onyx’s retrieval and AI Answer.

The metadata line should be placed at the very top of the file and can take one of two formats:

#ONYX_METADATA={"link": "<LINK>"}
<!-- ONYX_METADATA={"link": "<LINK>"} -->
Where ONYX_METADATA= is followed by a json. The valid json keys are:

link
primary_owners
secondary_owners
doc_updated_at
file_display_name
You can also include arbitrary key/value pairs which will be understood as “tags”. 
These tags can then be used in the UI as a filter if you want to constrain your search / conversation to only documents with certain tag(s) attached

================================================
FILE: backend/tests/integration/tests/indexing/file_connector/test_files/sample2.txt
================================================
Hello, I hope you're having a wonderful day!

================================================
FILE: backend/tests/integration/tests/indexing/test_checkpointing.py
================================================
import uuid
from datetime import datetime
from datetime import timedelta
from datetime import timezone

import httpx

from onyx.configs.constants import DocumentSource
from onyx.connectors.mock_connector.connector import MockConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import EntityFailure
from onyx.connectors.models import InputType
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import IndexingStatus
from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST
from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
from tests.integration.common_utils.test_document_utils import create_test_document
from tests.integration.common_utils.test_document_utils import (
    create_test_document_failure,
)
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture


def test_mock_connector_basic_flow(
    mock_server_client: httpx.Client,
    vespa_client: vespa_fixture,
    admin_user: DATestUser,
) -> None:
    """Test that the mock connector can successfully process documents and failures"""
    # Set up mock server behavior
    doc_uuid = uuid.uuid4()
    test_doc = create_test_document(doc_id=f"test-doc-{doc_uuid}")

    response = mock_server_client.post(
        "/set-behavior",
        json=[
            {
                "documents": [test_doc.model_dump(mode="json")],
                "checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(
                    mode="json"
                ),
                "failures": [],
            }
        ],
    )
    assert response.status_code == 200

    # create CC Pair + index attempt
    cc_pair = CCPairManager.create_from_scratch(
        name=f"mock-connector-{uuid.uuid4()}",
        source=DocumentSource.MOCK_CONNECTOR,
        input_type=InputType.POLL,
        connector_specific_config={
            "mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
            "mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
        },
        user_performing_action=admin_user,
    )

    # wait for index attempt to start
    index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    # wait for index attempt to finish
    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    # validate status
    finished_index_attempt = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert finished_index_attempt.status == IndexingStatus.SUCCESS

    # Verify results
    with get_session_with_current_tenant() as db_session:
        chunks = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )
    assert len(chunks) == 1
    assert chunks[0].id == test_doc.id

    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert len(errors) == 0


def test_mock_connector_with_failures(
    mock_server_client: httpx.Client,
    vespa_client: vespa_fixture,
    admin_user: DATestUser,
) -> None:
    """Test that the mock connector processes both successes and failures properly."""
    doc1 = create_test_document()
    doc2 = create_test_document()
    doc2_failure = create_test_document_failure(doc_id=doc2.id)

    response = mock_server_client.post(
        "/set-behavior",
        json=[
            {
                "documents": [doc1.model_dump(mode="json")],
                "checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(
                    mode="json"
                ),
                "failures": [doc2_failure.model_dump(mode="json")],
            }
        ],
    )
    assert response.status_code == 200

    # Create a CC Pair for the mock connector
    cc_pair = CCPairManager.create_from_scratch(
        name=f"mock-connector-failure-{uuid.uuid4()}",
        source=DocumentSource.MOCK_CONNECTOR,
        input_type=InputType.POLL,
        connector_specific_config={
            "mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
            "mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
        },
        user_performing_action=admin_user,
    )

    # Wait for the index attempt to start and then complete
    index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    # validate status
    finished_index_attempt = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert finished_index_attempt.status == IndexingStatus.COMPLETED_WITH_ERRORS

    # Verify results: doc1 should be indexed and doc2 should have an error entry
    with get_session_with_current_tenant() as db_session:
        documents = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )
    assert len(documents) == 1
    assert documents[0].id == doc1.id

    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert len(errors) == 1
    error = errors[0]
    assert error.failure_message == doc2_failure.failure_message
    assert error.document_id == doc2.id


def test_mock_connector_failure_recovery(
    mock_server_client: httpx.Client,
    vespa_client: vespa_fixture,
    admin_user: DATestUser,
) -> None:
    """Test that a failed document can be successfully indexed in a subsequent attempt
    while maintaining previously successful documents."""
    # Create test documents and failure
    doc1 = create_test_document()
    doc2 = create_test_document()
    doc2_failure = create_test_document_failure(doc_id=doc2.id)
    entity_id = "test-entity-id"
    entity_failure_msg = "Simulated unhandled error"

    response = mock_server_client.post(
        "/set-behavior",
        json=[
            {
                "documents": [doc1.model_dump(mode="json")],
                "checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(
                    mode="json"
                ),
                "failures": [
                    doc2_failure.model_dump(mode="json"),
                    ConnectorFailure(
                        failed_entity=EntityFailure(
                            entity_id=entity_id,
                            missed_time_range=(
                                datetime.now(timezone.utc) - timedelta(days=1),
                                datetime.now(timezone.utc),
                            ),
                        ),
                        failure_message=entity_failure_msg,
                    ).model_dump(mode="json"),
                ],
            }
        ],
    )
    assert response.status_code == 200

    # Create CC Pair and run initial indexing attempt
    cc_pair = CCPairManager.create_from_scratch(
        name=f"mock-connector-{uuid.uuid4()}",
        source=DocumentSource.MOCK_CONNECTOR,
        input_type=InputType.POLL,
        connector_specific_config={
            "mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
            "mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
        },
        user_performing_action=admin_user,
    )

    # Wait for first index attempt to complete
    initial_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=initial_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    # validate status
    finished_index_attempt = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=initial_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert finished_index_attempt.status == IndexingStatus.COMPLETED_WITH_ERRORS

    # Verify initial state: doc1 indexed, doc2 failed
    with get_session_with_current_tenant() as db_session:
        documents = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )
    assert len(documents) == 1
    assert documents[0].id == doc1.id

    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert len(errors) == 2
    error_doc2 = next(error for error in errors if error.document_id == doc2.id)
    assert error_doc2.failure_message == doc2_failure.failure_message
    assert not error_doc2.is_resolved

    error_entity = next(error for error in errors if error.entity_id == entity_id)
    assert error_entity.failure_message == entity_failure_msg
    assert not error_entity.is_resolved

    # Update mock server to return success for both documents
    response = mock_server_client.post(
        "/set-behavior",
        json=[
            {
                "documents": [
                    doc1.model_dump(mode="json"),
                    doc2.model_dump(mode="json"),
                ],
                "checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(
                    mode="json"
                ),
                "failures": [],
            }
        ],
    )
    assert response.status_code == 200

    # Trigger another indexing attempt
    # NOTE: must be from beginning to handle the entity failure
    CCPairManager.run_once(
        cc_pair, from_beginning=True, user_performing_action=admin_user
    )
    recovery_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        index_attempts_to_ignore=[initial_index_attempt.id],
        user_performing_action=admin_user,
    )
    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=recovery_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    finished_second_index_attempt = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=recovery_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert finished_second_index_attempt.status == IndexingStatus.SUCCESS

    # Verify both documents are now indexed
    with get_session_with_current_tenant() as db_session:
        documents = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )
    assert len(documents) == 2
    document_ids = {doc.id for doc in documents}
    assert doc2.id in document_ids
    assert doc1.id in document_ids

    # Verify original failures were marked as resolved
    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert len(errors) == 2
    error_doc2 = next(error for error in errors if error.document_id == doc2.id)
    error_entity = next(error for error in errors if error.entity_id == entity_id)

    assert error_doc2.is_resolved
    assert error_entity.is_resolved


def test_mock_connector_checkpoint_recovery(
    mock_server_client: httpx.Client,
    vespa_client: vespa_fixture,
    admin_user: DATestUser,
) -> None:
    """Test that checkpointing works correctly when an unhandled exception occurs
    and that subsequent runs pick up from the last successful checkpoint."""
    # Create test documents
    docs_batch_1 = [create_test_document() for _ in range(100)]
    doc2 = create_test_document()
    doc3 = create_test_document()

    # Set up mock server behavior for initial run:
    # - First yield: 100 docs with checkpoint1
    # - Second yield: doc2 with checkpoint2
    # - Third yield: unhandled exception
    response = mock_server_client.post(
        "/set-behavior",
        json=[
            {
                "documents": [doc.model_dump(mode="json") for doc in docs_batch_1],
                "checkpoint": MockConnectorCheckpoint(
                    has_more=True, last_document_id=docs_batch_1[-1].id
                ).model_dump(mode="json"),
                "failures": [],
            },
            {
                "documents": [doc2.model_dump(mode="json")],
                "checkpoint": MockConnectorCheckpoint(
                    has_more=True, last_document_id=doc2.id
                ).model_dump(mode="json"),
                "failures": [],
            },
            {
                "documents": [],
                # should never hit this, unhandled exception happens first
                "checkpoint": MockConnectorCheckpoint(
                    has_more=False, last_document_id=doc2.id
                ).model_dump(mode="json"),
                "failures": [],
                "unhandled_exception": "Simulated unhandled error",
            },
        ],
    )
    assert response.status_code == 200

    # Create CC Pair and run initial indexing attempt
    # Note: Setting refresh_freq to allow manual retrigger after failure
    cc_pair = CCPairManager.create_from_scratch(
        name=f"mock-connector-checkpoint-{uuid.uuid4()}",
        source=DocumentSource.MOCK_CONNECTOR,
        input_type=InputType.POLL,
        connector_specific_config={
            "mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
            "mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
        },
        user_performing_action=admin_user,
        refresh_freq=60 * 60,  # 1 hour
    )

    # Wait for first index attempt to complete
    initial_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=initial_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    # validate status
    finished_index_attempt = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=initial_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert finished_index_attempt.status == IndexingStatus.FAILED

    # Pause the connector immediately to prevent check_for_indexing from
    # creating automatic retry attempts while we reset the mock server.
    # Without this, the INITIAL_INDEXING status causes immediate retries
    # that would consume (or fail against) the mock server before we can
    # set up the recovery behavior.
    CCPairManager.pause_cc_pair(cc_pair, user_performing_action=admin_user)

    # Collect all index attempt IDs created so far (the initial one plus
    # any automatic retries that may have started before the pause took effect).
    all_prior_attempt_ids: list[int] = []
    index_attempts_page = IndexAttemptManager.get_index_attempt_page(
        cc_pair_id=cc_pair.id,
        page=0,
        page_size=100,
        user_performing_action=admin_user,
    )
    all_prior_attempt_ids = [ia.id for ia in index_attempts_page.items]

    # Verify initial state: both docs should be indexed
    with get_session_with_current_tenant() as db_session:
        documents = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )
    # This is no longer guaranteed because docfetching and docprocessing are decoupled!
    # Some batches may not be processed when docfetching fails, but they should still stick around
    # in the filestore and be ready for the next run.
    # assert len(documents) == 101  # 100 docs from first batch + doc2
    # document_ids = {doc.id for doc in documents}
    # assert doc2.id in document_ids
    # assert all(doc.id in document_ids for doc in docs_batch_1)

    # Get the checkpoints that were sent to the mock server
    response = mock_server_client.get("/get-checkpoints")
    assert response.status_code == 200
    initial_checkpoints = response.json()

    # Verify we got the expected checkpoints in order
    assert len(initial_checkpoints) == 3
    assert initial_checkpoints[0] == {
        "has_more": True,
        "last_document_id": None,
    }  # Initial empty checkpoint
    assert initial_checkpoints[1] == {
        "has_more": True,
        "last_document_id": docs_batch_1[-1].id,
    }
    assert initial_checkpoints[2] == {"has_more": True, "last_document_id": doc2.id}

    # Reset the mock server for the next run
    response = mock_server_client.post("/reset")
    assert response.status_code == 200

    # Set up mock server behavior for recovery run - should succeed fully this time
    response = mock_server_client.post(
        "/set-behavior",
        json=[
            {
                "documents": [doc3.model_dump(mode="json")],
                "checkpoint": MockConnectorCheckpoint(
                    has_more=False, last_document_id=doc3.id
                ).model_dump(mode="json"),
                "failures": [],
            }
        ],
    )
    assert response.status_code == 200

    # Set the manual indexing trigger, then unpause to allow the recovery run.
    CCPairManager.run_once(
        cc_pair, from_beginning=False, user_performing_action=admin_user
    )
    CCPairManager.unpause_cc_pair(cc_pair, user_performing_action=admin_user)
    recovery_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        index_attempts_to_ignore=all_prior_attempt_ids,
        user_performing_action=admin_user,
    )
    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=recovery_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    # validate status
    finished_recovery_attempt = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=recovery_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert finished_recovery_attempt.status == IndexingStatus.SUCCESS

    # Verify results
    with get_session_with_current_tenant() as db_session:
        documents = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )
    assert len(documents) == 102  # 100 docs from first batch + doc2 + doc3
    document_ids = {doc.id for doc in documents}
    assert doc3.id in document_ids
    assert doc2.id in document_ids
    assert all(doc.id in document_ids for doc in docs_batch_1)

    # Get the checkpoints from the recovery run
    response = mock_server_client.get("/get-checkpoints")
    assert response.status_code == 200
    recovery_checkpoints = response.json()

    # Verify the recovery run started from the last successful checkpoint
    assert len(recovery_checkpoints) == 1
    assert recovery_checkpoints[0] == {"has_more": True, "last_document_id": doc2.id}


================================================
FILE: backend/tests/integration/tests/indexing/test_initial_permission_sync.py
================================================
import os
import uuid
from datetime import datetime
from datetime import timezone

import httpx
import pytest
from sqlalchemy import select

from onyx.configs.constants import DocumentSource
from onyx.connectors.mock_connector.connector import EXTERNAL_USER_EMAILS
from onyx.connectors.mock_connector.connector import EXTERNAL_USER_GROUP_IDS
from onyx.connectors.mock_connector.connector import MockConnectorCheckpoint
from onyx.connectors.models import Document
from onyx.connectors.models import InputType
from onyx.db.document import get_documents_by_ids
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import IndexingStatus
from onyx.db.enums import PermissionSyncStatus
from onyx.db.models import DocPermissionSyncAttempt
from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST
from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
from tests.integration.common_utils.test_document_utils import create_test_document
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture


def _setup_mock_connector(
    mock_server_client: httpx.Client,
    admin_user: DATestUser,
) -> tuple[DATestCCPair, Document]:
    """Common setup: create a test doc, configure mock server, create cc_pair, wait for indexing."""
    doc_uuid = uuid.uuid4()
    test_doc = create_test_document(doc_id=f"test-doc-{doc_uuid}")

    response = mock_server_client.post(
        "/set-behavior",
        json=[
            {
                "documents": [test_doc.model_dump(mode="json")],
                "checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(
                    mode="json"
                ),
                "failures": [],
            }
        ],
    )
    assert response.status_code == 200

    cc_pair = CCPairManager.create_from_scratch(
        name=f"mock-connector-{uuid.uuid4()}",
        source=DocumentSource.MOCK_CONNECTOR,
        input_type=InputType.POLL,
        connector_specific_config={
            "mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
            "mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
        },
        access_type=AccessType.SYNC,
        user_performing_action=admin_user,
    )

    index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    finished = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert finished.status == IndexingStatus.SUCCESS
    return cc_pair, test_doc


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission sync is enterprise only",
)
def test_mock_connector_initial_permission_sync(
    mock_server_client: httpx.Client,
    vespa_client: vespa_fixture,
    admin_user: DATestUser,
) -> None:
    """Test that the MockConnector fetches and sets permissions during initial indexing
    when AccessType.SYNC is used."""

    cc_pair, test_doc = _setup_mock_connector(mock_server_client, admin_user)

    with get_session_with_current_tenant() as db_session:
        documents = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )
    assert len(documents) == 1
    assert documents[0].id == test_doc.id

    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert len(errors) == 0

    with get_session_with_current_tenant() as db_session:
        db_docs = get_documents_by_ids(
            db_session=db_session,
            document_ids=[test_doc.id],
        )
        assert len(db_docs) == 1
        db_doc = db_docs[0]

        assert db_doc.external_user_emails is not None
        assert db_doc.external_user_group_ids is not None
        assert set(db_doc.external_user_emails) == EXTERNAL_USER_EMAILS
        assert set(db_doc.external_user_group_ids) == EXTERNAL_USER_GROUP_IDS
        assert db_doc.is_public is False

    # After initial indexing, the beat task detects last_time_perm_sync is None
    # and triggers a doc permission sync. Explicitly trigger it to avoid
    # waiting for the 30s beat interval.
    before = datetime.now(timezone.utc)
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )
    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=1,
        user_performing_action=admin_user,
        should_wait_for_group_sync=False,
        should_wait_for_vespa_sync=False,
    )

    updated_cc_pair_info = CCPairManager.get_single(
        cc_pair.id, user_performing_action=admin_user
    )
    assert updated_cc_pair_info is not None
    assert updated_cc_pair_info.last_full_permission_sync is not None


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission sync attempt tracking is enterprise only",
)
def test_permission_sync_attempt_tracking_integration(
    mock_server_client: httpx.Client,
    vespa_client: vespa_fixture,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test that permission sync attempts are properly tracked during real sync workflows."""

    cc_pair, _test_doc = _setup_mock_connector(mock_server_client, admin_user)

    before = datetime.now(timezone.utc)
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )

    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=1,
        user_performing_action=admin_user,
        should_wait_for_group_sync=False,
        should_wait_for_vespa_sync=False,
    )

    with get_session_with_current_tenant() as db_session:
        attempt = db_session.execute(
            select(DocPermissionSyncAttempt).where(
                DocPermissionSyncAttempt.connector_credential_pair_id == cc_pair.id
            )
        ).scalar_one()

        assert attempt.status in [
            PermissionSyncStatus.SUCCESS,
            PermissionSyncStatus.COMPLETED_WITH_ERRORS,
            PermissionSyncStatus.FAILED,
        ]
        assert attempt.total_docs_synced is not None and attempt.total_docs_synced >= 0
        assert (
            attempt.docs_with_permission_errors is not None
            and attempt.docs_with_permission_errors >= 0
        )


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission sync attempt tracking is enterprise only",
)
def test_permission_sync_attempt_status_success(
    mock_server_client: httpx.Client,
    vespa_client: vespa_fixture,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test that permission sync attempts are marked as SUCCESS when sync completes without errors."""

    cc_pair, _test_doc = _setup_mock_connector(mock_server_client, admin_user)

    before = datetime.now(timezone.utc)
    CCPairManager.sync(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )

    CCPairManager.wait_for_sync(
        cc_pair=cc_pair,
        after=before,
        number_of_updated_docs=1,
        user_performing_action=admin_user,
        should_wait_for_group_sync=False,
        should_wait_for_vespa_sync=False,
    )

    with get_session_with_current_tenant() as db_session:
        attempt = db_session.execute(
            select(DocPermissionSyncAttempt).where(
                DocPermissionSyncAttempt.connector_credential_pair_id == cc_pair.id
            )
        ).scalar_one()

        assert attempt.status == PermissionSyncStatus.SUCCESS
        assert attempt.total_docs_synced is not None and attempt.total_docs_synced >= 0
        assert (
            attempt.docs_with_permission_errors is not None
            and attempt.docs_with_permission_errors == 0
        )


================================================
FILE: backend/tests/integration/tests/indexing/test_polling.py
================================================
import uuid
from datetime import datetime
from datetime import timedelta
from datetime import timezone

import httpx

from onyx.configs.app_configs import POLL_CONNECTOR_OFFSET
from onyx.configs.constants import DocumentSource
from onyx.connectors.mock_connector.connector import MockConnectorCheckpoint
from onyx.connectors.models import InputType
from onyx.db.enums import IndexingStatus
from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST
from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
from tests.integration.common_utils.test_document_utils import create_test_document
from tests.integration.common_utils.test_models import DATestUser


def _setup_mock_connector(
    mock_server_client: httpx.Client,
    admin_user: DATestUser,  # noqa: ARG001
) -> None:
    test_doc = create_test_document()
    successful_response = {
        "documents": [test_doc.model_dump(mode="json")],
        "checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(mode="json"),
        "failures": [],
    }
    response = mock_server_client.post(
        "/set-behavior",
        json=[successful_response, successful_response],  # For two attempts
    )
    assert response.status_code == 200


def test_poll_connector_time_ranges(
    mock_server_client: httpx.Client,
    admin_user: DATestUser,
) -> None:
    """
    Tests that poll connectors correctly set their poll_range_start and poll_range_end
    across multiple indexing attempts.
    """
    # Set up mock server behavior - a simple successful response
    _setup_mock_connector(mock_server_client, admin_user)

    # Create a CC Pair for the mock connector with POLL input type
    cc_pair_name = f"mock-poll-time-range-{uuid.uuid4()}"
    cc_pair = CCPairManager.create_from_scratch(
        name=cc_pair_name,
        source=DocumentSource.MOCK_CONNECTOR,
        input_type=InputType.POLL,
        connector_specific_config={
            "mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
            "mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
        },
        user_performing_action=admin_user,
        refresh_freq=3,  # refresh often to ensure the second attempt actually runs
    )

    # --- First Indexing Attempt ---
    time_before_first_attempt = datetime.now(timezone.utc)
    first_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=first_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    time_after_first_attempt = datetime.now(timezone.utc)

    # Fetch and validate the first attempt
    completed_first_attempt = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=first_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert completed_first_attempt.status == IndexingStatus.SUCCESS
    assert completed_first_attempt.poll_range_start is not None
    assert completed_first_attempt.poll_range_end is not None

    # For the first run (no prior successful attempts), poll_range_start should be epoch (0)
    expected_first_start = datetime.fromtimestamp(0, tz=timezone.utc)
    assert completed_first_attempt.poll_range_start == expected_first_start

    # `poll_range_end` should be sometime in between the time the attempt
    # started and the time it finished.
    # no way to have a more precise assertion here since the `poll_range_end`
    # can really be set anytime in that range and be "correct"
    assert (
        time_before_first_attempt
        <= completed_first_attempt.poll_range_end
        <= time_after_first_attempt
    )

    first_attempt_poll_end = completed_first_attempt.poll_range_end

    # --- Second Indexing Attempt ---
    # Trigger another run manually (since automatic refresh might be too slow for test)
    # Ensure there's a slight delay so the poll window moves
    # In a real scenario, the scheduler would wait for the refresh frequency.
    # Here we manually trigger a new run.
    _setup_mock_connector(mock_server_client, admin_user)
    CCPairManager.run_once(
        cc_pair, from_beginning=False, user_performing_action=admin_user
    )

    time_before_second_attempt = datetime.now(timezone.utc)
    second_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        index_attempts_to_ignore=[first_index_attempt.id],
        user_performing_action=admin_user,
    )
    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=second_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    time_after_second_attempt = datetime.now(timezone.utc)

    # Fetch and validate the second attempt
    completed_second_attempt = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=second_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert completed_second_attempt.status == IndexingStatus.SUCCESS
    assert completed_second_attempt.poll_range_start is not None
    assert completed_second_attempt.poll_range_end is not None

    # For the second run, poll_range_start should be the previous successful attempt's
    # poll_range_end minus the POLL_CONNECTOR_OFFSET
    expected_second_start = first_attempt_poll_end - timedelta(
        minutes=POLL_CONNECTOR_OFFSET
    )
    assert completed_second_attempt.poll_range_start == expected_second_start

    # `poll_range_end` should be sometime in between the time the attempt
    # started and the time it finished.
    # again, no way to have a more precise assertion here since the `poll_range_end`
    # can really be set anytime in that range and be "correct"
    assert (
        time_before_second_attempt
        <= completed_second_attempt.poll_range_end
        <= time_after_second_attempt
    )


================================================
FILE: backend/tests/integration/tests/indexing/test_repeated_error_state.py
================================================
import time
import uuid

import httpx

from onyx.background.celery.tasks.docprocessing.utils import (
    NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE,
)
from onyx.configs.constants import DocumentSource
from onyx.connectors.mock_connector.connector import MockConnectorCheckpoint
from onyx.connectors.models import InputType
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import IndexingStatus
from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST
from tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
from tests.integration.common_utils.test_document_utils import create_test_document
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture


def test_repeated_error_state_detection_and_recovery(
    mock_server_client: httpx.Client,
    vespa_client: vespa_fixture,
    admin_user: DATestUser,
) -> None:
    """Test that a connector is marked as in a repeated error state after
    NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE consecutive failures, and
    that it recovers after a successful indexing.

    This test ensures we properly wait for the required number of indexing attempts
    to fail before checking that the connector is in a repeated error state."""

    # Create test document for successful response
    test_doc = create_test_document()

    # First, set up the mock server to consistently fail
    error_response = {
        "documents": [],
        "checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(mode="json"),
        "failures": [],
        "unhandled_exception": "Simulated unhandled error for testing repeated errors",
    }

    # Create a list of failure responses with at least the same length
    # as NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE
    failure_behaviors = [error_response] * (
        5 * NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE
    )

    response = mock_server_client.post(
        "/set-behavior",
        json=failure_behaviors,
    )
    assert response.status_code == 200

    # Create a new CC pair for testing
    cc_pair = CCPairManager.create_from_scratch(
        name=f"mock-repeated-error-{uuid.uuid4()}",
        source=DocumentSource.MOCK_CONNECTOR,
        input_type=InputType.POLL,
        connector_specific_config={
            "mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
            "mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
        },
        user_performing_action=admin_user,
        refresh_freq=60 * 60,  # a very long time
    )

    # Wait for the required number of failed indexing attempts
    # This shouldn't take long, since we keep retrying while we haven't
    # succeeded yet
    start_time = time.monotonic()
    while True:
        index_attempts_page = IndexAttemptManager.get_index_attempt_page(
            cc_pair_id=cc_pair.id,
            page=0,
            page_size=100,
            user_performing_action=admin_user,
        )
        index_attempts = [
            ia
            for ia in index_attempts_page.items
            if ia.status and ia.status.is_terminal()
        ]
        if len(index_attempts) >= NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE:
            break

        if time.monotonic() - start_time > 180:
            raise TimeoutError(
                "Did not get required number of failed attempts within 180 seconds"
            )

        # make sure that we don't mark the connector as in repeated error state
        # before we have the required number of failed attempts
        with get_session_with_current_tenant() as db_session:
            cc_pair_obj = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair.id,
            )
            assert cc_pair_obj is not None
        assert not cc_pair_obj.in_repeated_error_state

        time.sleep(2)

    # Verify we have the correct number of failed attempts
    assert len(index_attempts) == NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE
    for attempt in index_attempts:
        assert attempt.status == IndexingStatus.FAILED

    # Check if the connector is in a repeated error state
    start_time = time.monotonic()
    while True:
        with get_session_with_current_tenant() as db_session:
            cc_pair_obj = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair.id,
            )
            assert cc_pair_obj is not None
            if cc_pair_obj.in_repeated_error_state:
                # Pausing only happens for cloud deployments and the IT don't run with
                # that auth type :(
                # if AUTH_TYPE == AuthType.CLOUD:
                #     assert cc_pair_obj.status == ConnectorCredentialPairStatus.PAUSED, (
                #         f"Expected status to be PAUSED when in repeated error state, "
                #         f"but got {cc_pair_obj.status}"
                #     )
                break

        if time.monotonic() - start_time > 90:
            assert False, "CC pair did not enter repeated error state within 90 seconds"

        time.sleep(2)

    # Reset the mock server state
    response = mock_server_client.post("/reset")
    assert response.status_code == 200

    # Now set up the mock server to succeed
    success_response = {
        "documents": [test_doc.model_dump(mode="json")],
        "checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(mode="json"),
        "failures": [],
    }

    response = mock_server_client.post(
        "/set-behavior",
        json=[success_response],
    )
    assert response.status_code == 200

    # Set the manual indexing trigger first (while paused), then unpause.
    # This ensures the trigger is set before CHECK_FOR_INDEXING runs, which will
    # prevent the connector from being re-paused when repeated error state is detected.
    CCPairManager.run_once(
        cc_pair, from_beginning=True, user_performing_action=admin_user
    )
    CCPairManager.unpause_cc_pair(cc_pair, user_performing_action=admin_user)

    recovery_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
        cc_pair_id=cc_pair.id,
        index_attempts_to_ignore=[index_attempt.id for index_attempt in index_attempts],
        user_performing_action=admin_user,
    )

    IndexAttemptManager.wait_for_index_attempt_completion(
        index_attempt_id=recovery_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )

    # Validate the indexing succeeded
    finished_recovery_attempt = IndexAttemptManager.get_index_attempt_by_id(
        index_attempt_id=recovery_index_attempt.id,
        cc_pair_id=cc_pair.id,
        user_performing_action=admin_user,
    )
    assert finished_recovery_attempt.status == IndexingStatus.SUCCESS

    # Verify the document was indexed
    with get_session_with_current_tenant() as db_session:
        documents = DocumentManager.fetch_documents_for_cc_pair(
            cc_pair_id=cc_pair.id,
            db_session=db_session,
            vespa_client=vespa_client,
        )
    assert len(documents) == 1
    assert documents[0].id == test_doc.id

    # Verify the CC pair is no longer in a repeated error state
    start = time.monotonic()
    while True:
        with get_session_with_current_tenant() as db_session:
            cc_pair_obj = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair.id,
            )
            assert cc_pair_obj is not None
            if not cc_pair_obj.in_repeated_error_state:
                break

        elapsed = time.monotonic() - start
        if elapsed > 30:
            raise TimeoutError(
                "CC pair did not exit repeated error state within 30 seconds"
            )

        print(
            f"Waiting for CC pair to exit repeated error state. elapsed={elapsed:.2f}"
        )
        time.sleep(1)


================================================
FILE: backend/tests/integration/tests/ingestion/test_ingestion_api.py
================================================
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import Document
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import IngestionManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture


def test_ingestion_api_crud(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,
) -> None:
    """Test create, list, and delete via the ingestion API."""
    admin_user: DATestUser = UserManager.create(email="admin@onyx.app")
    cc_pair = CCPairManager.create_from_scratch(
        name="Ingestion-API-Test",
        source=DocumentSource.FILE,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={
            "file_locations": [],
            "file_names": [],
            "zip_metadata_file_id": None,
        },
        user_performing_action=admin_user,
    )
    api_key = APIKeyManager.create(user_performing_action=admin_user)
    api_key.headers.update(admin_user.headers)

    # CREATE
    doc = IngestionManager.seed_doc_with_content(
        cc_pair=cc_pair,
        content="Test document",
        document_id="test-doc-1",
        api_key=api_key,
    )

    with get_session_with_current_tenant() as db_session:
        doc_db = db_session.query(Document).filter(Document.id == doc.id).first()
        assert doc_db is not None
        assert doc_db.from_ingestion_api is True

    vespa_docs = vespa_client.get_documents_by_id([doc.id])["documents"]
    assert len(vespa_docs) == 1

    # LIST
    docs_list = IngestionManager.list_all_ingestion_docs(api_key=api_key)
    assert any(d["document_id"] == doc.id for d in docs_list)

    # DELETE
    IngestionManager.delete(document_id=doc.id, api_key=api_key)

    with get_session_with_current_tenant() as db_session:
        doc_db = db_session.query(Document).filter(Document.id == doc.id).first()
        assert doc_db is None

    vespa_docs = vespa_client.get_documents_by_id([doc.id])["documents"]
    assert len(vespa_docs) == 0


================================================
FILE: backend/tests/integration/tests/kg/test_kg_api.py
================================================
import json
from datetime import datetime
from http import HTTPStatus

import pytest
import requests

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.connector import create_connector
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.kg_config import get_kg_config_settings
from onyx.db.kg_config import set_kg_config_settings
from onyx.db.models import Connector
from onyx.server.documents.models import ConnectorBase
from onyx.server.kg.models import DisableKGConfigRequest
from onyx.server.kg.models import EnableKGConfigRequest
from onyx.server.kg.models import EntityType
from onyx.server.kg.models import KGConfig as KGConfigAPIModel
from onyx.server.kg.models import SourceAndEntityTypeView
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.reset import reset_all


@pytest.fixture(autouse=True)
def reset_for_test() -> None:
    """Reset all data before each test."""
    reset_all()

    kg_config_settings = get_kg_config_settings()
    kg_config_settings.KG_EXPOSED = True
    set_kg_config_settings(kg_config_settings)


@pytest.fixture()
def connectors() -> None:
    """Set up connectors for tests."""
    with get_session_with_current_tenant() as db_session:
        # Create Salesforce connector
        connector_data = ConnectorBase(
            name="Salesforce Test",
            source=DocumentSource.SALESFORCE,
            input_type=InputType.POLL,
            connector_specific_config={},
            refresh_freq=None,
            indexing_start=None,
            prune_freq=None,
        )
        create_connector(db_session, connector_data)


def test_kg_enable_and_disable(connectors: None) -> None:  # noqa: ARG001
    admin_user = UserManager.create(name="admin_user")

    # Enable KG
    # Need to `.model_dump_json()` and then `json.loads`.
    # Seems redundant, but this is because simply calling `json=data.model_dump()`
    # returns in a "datetime cannot be JSON serialized error".
    req1 = json.loads(
        EnableKGConfigRequest(
            vendor="Test",
            vendor_domains=["test.app", "tester.ai"],
            ignore_domains=[],
            coverage_start=datetime(1970, 1, 1, 0, 0),
        ).model_dump_json()
    )
    res1 = requests.put(
        f"{API_SERVER_URL}/admin/kg/config",
        headers=admin_user.headers,
        json=req1,
    )
    assert (
        res1.status_code == HTTPStatus.OK
    ), f"Error response: {res1.status_code} - {res1.text}"

    # Check KG
    res2 = requests.get(
        f"{API_SERVER_URL}/admin/kg/config",
        headers=admin_user.headers,
    )
    assert (
        res2.status_code == HTTPStatus.OK
    ), f"Error response: {res2.status_code} - {res2.text}"

    actual_config = KGConfigAPIModel.model_validate_json(res2.text)
    assert actual_config == KGConfigAPIModel(
        enabled=True,
        vendor="Test",
        vendor_domains=["test.app", "tester.ai"],
        ignore_domains=[],
        coverage_start=datetime(1970, 1, 1, 0, 0),
    )

    # Disable KG
    req3 = DisableKGConfigRequest().model_dump()
    res3 = requests.put(
        f"{API_SERVER_URL}/admin/kg/config",
        headers=admin_user.headers,
        json=req3,
    )
    assert (
        res3.status_code == HTTPStatus.OK
    ), f"Error response: {res3.status_code} - {res3.text}"

    # Check KG
    res4 = requests.get(
        f"{API_SERVER_URL}/admin/kg/config",
        headers=admin_user.headers,
    )
    assert (
        res4.status_code == HTTPStatus.OK
    ), f"Error response: {res4.status_code} - {res4.text}"

    actual_config = KGConfigAPIModel.model_validate_json(res4.text)
    assert actual_config == KGConfigAPIModel(
        enabled=False,
        vendor="Test",
        vendor_domains=["test.app", "tester.ai"],
        ignore_domains=[],
        coverage_start=datetime(1970, 1, 1, 0, 0),
    )


def test_kg_enable_with_missing_fields_should_fail() -> None:
    admin_user = UserManager.create(name="admin_user")

    req = json.loads(
        EnableKGConfigRequest(
            vendor="Test",
            vendor_domains=[],
            ignore_domains=[],
            coverage_start=datetime(1970, 1, 1, 0, 0),
        ).model_dump_json()
    )
    res = requests.put(
        f"{API_SERVER_URL}/admin/kg/config",
        headers=admin_user.headers,
        json=req,
    )
    assert res.status_code == HTTPStatus.BAD_REQUEST


def test_update_kg_entity_types(connectors: None) -> None:  # noqa: ARG001
    admin_user = UserManager.create(name="admin_user")

    # Enable kg and populate default entity types
    req1 = json.loads(
        EnableKGConfigRequest(
            vendor="Test",
            vendor_domains=["test.app", "tester.ai"],
            ignore_domains=[],
            coverage_start=datetime(1970, 1, 1, 0, 0),
        ).model_dump_json()
    )
    res1 = requests.put(
        f"{API_SERVER_URL}/admin/kg/config",
        headers=admin_user.headers,
        json=req1,
    )
    assert (
        res1.status_code == HTTPStatus.OK
    ), f"Error response: {res1.status_code} - {res1.text}"

    # Get old entity types
    res2 = requests.get(
        f"{API_SERVER_URL}/admin/kg/entity-types",
        headers=admin_user.headers,
    )
    assert (
        res2.status_code == HTTPStatus.OK
    ), f"Error response: {res2.status_code} - {res2.text}"
    res2_parsed = SourceAndEntityTypeView.model_validate(res2.json())

    # Update entity types
    req3 = [
        EntityType(
            name="ACCOUNT",
            description="Test.",
            active=True,
            grounded_source_name="salesforce",
        ).model_dump(),
        EntityType(
            name="OPPORTUNITY",
            description="Test 2.",
            active=False,
        ).model_dump(),
    ]
    res3 = requests.put(
        f"{API_SERVER_URL}/admin/kg/entity-types",
        headers=admin_user.headers,
        json=req3,
    )
    assert (
        res3.status_code == HTTPStatus.OK
    ), f"Error response: {res3.status_code} - {res3.text}"

    # Check connector kg_processing is enabled
    with get_session_with_current_tenant() as db_session:
        connector = (
            db_session.query(Connector)
            .filter(Connector.name == "Salesforce Test")
            .scalar()
        )
        assert connector.kg_processing_enabled

    # Check entity types looks correct
    res4 = requests.get(
        f"{API_SERVER_URL}/admin/kg/entity-types",
        headers=admin_user.headers,
    )
    assert (
        res4.status_code == HTTPStatus.OK
    ), f"Error response: {res4.status_code} - {res4.text}"
    res4_parsed = SourceAndEntityTypeView.model_validate(res4.json())

    def to_entity_type_map(map: dict[str, list[EntityType]]) -> dict[str, EntityType]:
        return {
            entity_type.name: entity_type
            for entity_types in map.values()
            for entity_type in entity_types
        }

    expected_entity_types = to_entity_type_map(map=res2_parsed.entity_types)
    new_entity_types = to_entity_type_map(map=res4_parsed.entity_types)

    # These are the updates.
    # We're just manually updating them.
    expected_entity_types["ACCOUNT"].active = True
    expected_entity_types["ACCOUNT"].description = "Test."
    expected_entity_types["OPPORTUNITY"].active = False
    expected_entity_types["OPPORTUNITY"].description = "Test 2."

    assert new_entity_types == expected_entity_types


def test_update_invalid_kg_entity_type_should_do_nothing(
    connectors: None,  # noqa: ARG001
) -> None:
    admin_user = UserManager.create(name="admin_user")

    # Enable kg and populate default entity types
    req1 = json.loads(
        EnableKGConfigRequest(
            vendor="Test",
            vendor_domains=["test.app", "tester.ai"],
            ignore_domains=[],
            coverage_start=datetime(1970, 1, 1, 0, 0),
        ).model_dump_json()
    )
    res1 = requests.put(
        f"{API_SERVER_URL}/admin/kg/config",
        headers=admin_user.headers,
        json=req1,
    )
    assert (
        res1.status_code == HTTPStatus.OK
    ), f"Error response: {res1.status_code} - {res1.text}"

    # Get old entity types
    res2 = requests.get(
        f"{API_SERVER_URL}/admin/kg/entity-types",
        headers=admin_user.headers,
    )
    assert (
        res2.status_code == HTTPStatus.OK
    ), f"Error response: {res2.status_code} - {res2.text}"

    # Update entity types with non-existent entity type
    req3 = [
        EntityType(name="NON-EXISTENT", description="Test.", active=False).model_dump(),
    ]
    res3 = requests.put(
        f"{API_SERVER_URL}/admin/kg/entity-types",
        headers=admin_user.headers,
        json=req3,
    )
    assert (
        res3.status_code == HTTPStatus.OK
    ), f"Error response: {res3.status_code} - {res3.text}"

    # Get entity types after the update attempt
    res4 = requests.get(
        f"{API_SERVER_URL}/admin/kg/entity-types",
        headers=admin_user.headers,
    )
    assert (
        res4.status_code == HTTPStatus.OK
    ), f"Error response: {res4.status_code} - {res4.text}"

    # Should be the same as before since non-existent entity type should be ignored
    assert res2.json() == res4.json()


================================================
FILE: backend/tests/integration/tests/llm_auto_update/test_auto_llm_update.py
================================================
"""
Integration tests for Auto LLM model update feature.

These tests verify that LLM providers in Auto mode get their models
automatically synced from the GitHub config via the celery background task.

Environment variables for testing:
- AUTO_LLM_UPDATE_INTERVAL_SECONDS: Set to a low value (e.g., 5) for faster tests
- AUTO_LLM_CONFIG_URL: Points to the config file to sync from

The celery beat scheduler will run the check_for_auto_llm_updates task
at the configured interval.
"""

import time

import pytest
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser


# How long to wait for the celery task to run and sync models
# This should be longer than AUTO_LLM_UPDATE_INTERVAL_SECONDS
MAX_WAIT_TIME_SECONDS = 120
POLL_INTERVAL_SECONDS = 5


def _create_provider_with_api(
    admin_user: DATestUser,
    name: str,
    provider_type: str,
    default_model: str,
    is_auto_mode: bool,
    model_configurations: list[dict] | None = None,
) -> dict:
    """Create an LLM provider via the API."""
    if model_configurations is None:
        model_configurations = [{"name": default_model, "is_visible": True}]

    llm_provider_data = {
        "name": name,
        "provider": provider_type,
        "api_key": "test-api-key-for-auto-mode-testing",
        "api_base": None,
        "api_version": None,
        "custom_config": None,
        "is_public": True,
        "is_auto_mode": is_auto_mode,
        "groups": [],
        "personas": [],
        "model_configurations": model_configurations,
        "api_key_changed": True,
    }

    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        json=llm_provider_data,
        headers=admin_user.headers,
    )
    response.raise_for_status()
    return response.json()


def _get_provider_by_id(admin_user: DATestUser, provider_id: int) -> dict:
    """Get a provider by ID via the API."""
    response = requests.get(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
    )
    response.raise_for_status()
    for provider in response.json()["providers"]:
        if provider["id"] == provider_id:
            return provider
    raise ValueError(f"Provider with id {provider_id} not found")


def get_auto_config(admin_user: DATestUser) -> dict | None:
    """Get the current auto config from the API."""
    response = requests.get(
        f"{API_SERVER_URL}/admin/llm/auto-config",
        headers=admin_user.headers,
    )
    if response.status_code == 502:
        return None
    response.raise_for_status()
    return response.json()


def wait_for_model_sync(
    admin_user: DATestUser,
    provider_id: int,
    expected_model_names: set[str],
    max_wait_seconds: int = MAX_WAIT_TIME_SECONDS,
) -> dict:
    """
    Wait for the provider's models to match the expected set.

    Returns the provider data once models match, or raises an assertion error.
    """
    start_time = time.time()
    last_provider: dict | None = None

    while time.time() - start_time < max_wait_seconds:
        provider = _get_provider_by_id(admin_user, provider_id)
        last_provider = provider
        current_models = {m["name"] for m in provider["model_configurations"]}

        # Check if we have all expected models
        if expected_model_names.issubset(current_models):
            return provider

        print(
            f"Waiting for model sync... Current: {current_models}, Expected: {expected_model_names}"
        )
        time.sleep(POLL_INTERVAL_SECONDS)

    # Timeout - return last state for debugging
    current_models = (
        {m["name"] for m in last_provider["model_configurations"]}
        if last_provider
        else set()
    )
    raise AssertionError(
        f"Model sync timed out after {max_wait_seconds}s. Current models: {current_models}, Expected: {expected_model_names}"
    )


def test_auto_mode_provider_gets_synced_from_github_config(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """
    Test that a provider in Auto mode gets its models synced from GitHub config.

    This test:
    1. Fetches the current GitHub config to know what models to expect
    2. Creates an OpenAI provider in Auto mode with outdated/minimal models
    3. Waits for the celery task to sync models from GitHub
    4. Verifies the models match the GitHub config
    """
    # First, get the GitHub config to know what models we should expect
    github_config = get_auto_config(admin_user)
    if github_config is None:
        pytest.fail("GitHub config not found")

    # Get expected models for OpenAI from the config
    if "openai" not in github_config.get("providers", {}):
        pytest.fail("OpenAI not in GitHub config")

    openai_config = github_config["providers"]["openai"]

    # Build expected model names from default_model + additional_visible_models
    expected_models: set[str] = set()

    # Add default model
    default_model = openai_config.get("default_model", {})
    if isinstance(default_model, dict):
        expected_models.add(default_model["name"])
    elif isinstance(default_model, str):
        expected_models.add(default_model)

    # Add additional visible models
    for model in openai_config.get("additional_visible_models", []):
        if isinstance(model, dict):
            expected_models.add(model["name"])
        elif isinstance(model, str):
            expected_models.add(model)

    print(f"Expected models from GitHub config: {expected_models}")

    # Create an OpenAI provider in Auto mode with a single outdated model
    provider = _create_provider_with_api(
        admin_user=admin_user,
        name="test-auto-sync-openai",
        provider_type="openai",
        default_model="outdated-model-name",
        is_auto_mode=True,
        model_configurations=[
            {"name": "outdated-model-name", "is_visible": True},
        ],
    )

    assert provider["is_auto_mode"] is True
    print(f"Created provider {provider['id']} in Auto mode")

    # Wait for the celery task to sync models
    # The task runs at AUTO_LLM_UPDATE_INTERVAL_SECONDS interval
    synced_provider = wait_for_model_sync(
        admin_user=admin_user,
        provider_id=provider["id"],
        expected_model_names=expected_models,
    )

    # Verify the models were synced
    synced_model_configs = synced_provider["model_configurations"]
    synced_model_names = {m["name"] for m in synced_model_configs}
    print(f"Synced models: {synced_model_names}")

    assert expected_models.issubset(
        synced_model_names
    ), f"Expected models {expected_models} not found in synced models {synced_model_names}"

    # Verify the outdated model still exists but is not visible
    # (Auto mode marks removed models as not visible, it doesn't delete them)
    outdated_model = next(
        (m for m in synced_model_configs if m["name"] == "outdated-model-name"),
        None,
    )
    assert (
        outdated_model is not None
    ), "Outdated model should still exist after sync (marked invisible, not deleted)"
    assert not outdated_model[
        "is_visible"
    ], "Outdated model should not be visible after sync"


def test_manual_mode_provider_not_affected_by_auto_sync(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """
    Test that a provider in Manual mode is NOT affected by auto sync.

    This test:
    1. Creates an OpenAI provider in Manual mode with custom models
    2. Waits for a period longer than the sync interval
    3. Verifies the models remain unchanged
    """
    custom_model = "my-custom-finetuned-model"

    # Create a provider in Manual mode
    provider = _create_provider_with_api(
        admin_user=admin_user,
        name="test-manual-mode-unchanged",
        provider_type="openai",
        default_model=custom_model,
        is_auto_mode=False,  # Manual mode
        model_configurations=[
            {"name": custom_model, "is_visible": True},
            {"name": "another-custom-model", "is_visible": True},
        ],
    )

    assert provider["is_auto_mode"] is False
    initial_models = {m["name"] for m in provider["model_configurations"]}
    print(f"Created manual mode provider with models: {initial_models}")

    # Wait for longer than the sync interval
    wait_time = 15  # Should be longer than AUTO_LLM_UPDATE_INTERVAL_SECONDS
    print(f"Waiting {wait_time}s to ensure sync task runs...")
    time.sleep(wait_time)

    # Verify models are unchanged
    updated_provider = _get_provider_by_id(admin_user, provider["id"])
    current_models = {m["name"] for m in updated_provider["model_configurations"]}

    assert (
        current_models == initial_models
    ), f"Manual mode provider models should not change. Initial: {initial_models}, Current: {current_models}"


================================================
FILE: backend/tests/integration/tests/llm_provider/test_llm_provider.py
================================================
import uuid
from typing import Any

import pytest
import requests
from requests.models import Response

from onyx.llm.constants import LlmProviderNames
from onyx.llm.model_name_parser import parse_litellm_model_name
from onyx.llm.utils import get_max_input_tokens
from onyx.llm.utils import litellm_thinks_model_supports_image_input
from onyx.llm.utils import model_is_reasoning_model
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


def _get_provider_by_id(admin_user: DATestUser, provider_id: str) -> dict | None:
    """Utility function to fetch an LLM provider by ID"""
    response = requests.get(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    providers = response.json()["providers"]
    return next((p for p in providers if p["id"] == provider_id), None)


def assert_response_is_equivalent(
    admin_user: DATestUser,
    response: Response,
    model_configurations: list[ModelConfigurationUpsertRequest],
    api_key: str | None = None,
) -> None:
    assert response.status_code == 200
    created_provider = response.json()

    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
    assert provider_data is not None

    assert provider_data["personas"] == []

    def fill_max_input_tokens_and_supports_image_input(
        req: ModelConfigurationUpsertRequest,
    ) -> dict[str, Any]:
        provider_name = created_provider["provider"]
        # Match how ModelConfigurationView.from_model builds the key for parsing
        model_key = req.name
        if provider_name and not model_key.startswith(f"{provider_name}/"):
            model_key = f"{provider_name}/{model_key}"
        parsed = parse_litellm_model_name(model_key)

        # Include region in display name for Bedrock cross-region models (matches from_model)
        display_name = (
            f"{parsed.display_name} ({parsed.region})"
            if parsed.region
            else parsed.display_name
        )

        filled_with_max_input_tokens = ModelConfigurationUpsertRequest(
            name=req.name,
            is_visible=req.is_visible,
            max_input_tokens=req.max_input_tokens
            or get_max_input_tokens(model_name=req.name, model_provider=provider_name),
        )
        return {
            **filled_with_max_input_tokens.model_dump(),
            "supports_image_input": litellm_thinks_model_supports_image_input(
                req.name, provider_name
            ),
            "supports_reasoning": model_is_reasoning_model(req.name, provider_name),
            "display_name": display_name,
            "provider_display_name": parsed.provider_display_name,
            "vendor": parsed.vendor,
            "region": parsed.region,
            "version": parsed.version,
        }

    # Compare model configurations by name (order-independent)
    actual_by_name = {
        config["name"]: config for config in provider_data["model_configurations"]
    }
    expected_by_name = {
        config.name: fill_max_input_tokens_and_supports_image_input(config)
        for config in model_configurations
    }

    assert set(actual_by_name.keys()) == set(
        expected_by_name.keys()
    ), f"Model names don't match. Actual: {set(actual_by_name.keys())}, Expected: {set(expected_by_name.keys())}"

    for name in actual_by_name:
        actual_config = actual_by_name[name]
        expected_config = expected_by_name[name]
        assert (
            actual_config == expected_config
        ), f"Config mismatch for {name}:\nActual: {actual_config}\nExpected: {expected_config}"

    # test that returned key is sanitized
    if api_key:
        assert provider_data["api_key"] == api_key


# Test creating an LLM Provider with some various model-configurations.
@pytest.mark.parametrize(
    "model_configurations, expected",
    [
        # Test the case in which a basic model-configuration is passed.
        (
            [
                ModelConfigurationUpsertRequest(
                    name="gpt-4", is_visible=True, max_input_tokens=4096
                )
            ],
            [
                ModelConfigurationUpsertRequest(
                    name="gpt-4", is_visible=True, max_input_tokens=4096
                )
            ],
        ),
        # Test the case in which multiple model-configuration are passed.
        (
            [
                ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True),
                ModelConfigurationUpsertRequest(name="gpt-4o", is_visible=True),
            ],
            [
                ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True),
                ModelConfigurationUpsertRequest(name="gpt-4o", is_visible=True),
            ],
        ),
        # Test the case in which duplicate model-configuration are passed.
        (
            [ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True)] * 4,
            [ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True)],
        ),
    ],
)
def test_create_llm_provider(
    reset: None,  # noqa: ARG001
    model_configurations: list[ModelConfigurationUpsertRequest],
    expected: list[ModelConfigurationUpsertRequest],
) -> None:
    admin_user = UserManager.create(name="admin_user")

    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": str(uuid.uuid4()),
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                model_configuration.model_dump()
                for model_configuration in model_configurations
            ],
            "is_public": True,
            "groups": [],
        },
    )

    assert_response_is_equivalent(
        admin_user,
        response,
        expected,
        "sk-0****0000",
    )


# Test creating a new LLM Provider with some given model-configurations, then performing some arbitrary update on it.
@pytest.mark.parametrize(
    "initial, initial_expected, updated, updated_expected",
    [
        # Test the case in which a basic model-configuration is passed, but then it's updated to have *NO* max-input-tokens.
        (
            (
                "gpt-4",
                [
                    ModelConfigurationUpsertRequest(
                        name="gpt-4", is_visible=True, max_input_tokens=4096
                    )
                ],
            ),
            [
                ModelConfigurationUpsertRequest(
                    name="gpt-4", is_visible=True, max_input_tokens=4096
                )
            ],
            (
                "gpt-4",
                [ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True)],
            ),
            [ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True)],
        ),
        # Test the case where we insert 2 model-configurations, and then in the update the first,
        # we update one and delete the second.
        (
            (
                "gpt-4",
                [
                    ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True),
                    ModelConfigurationUpsertRequest(
                        name="gpt-4o", is_visible=True, max_input_tokens=4096
                    ),
                ],
            ),
            [
                ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True),
                ModelConfigurationUpsertRequest(
                    name="gpt-4o", is_visible=True, max_input_tokens=4096
                ),
            ],
            (
                "gpt-4",
                [
                    ModelConfigurationUpsertRequest(
                        name="gpt-4", is_visible=True, max_input_tokens=4096
                    )
                ],
            ),
            [
                ModelConfigurationUpsertRequest(
                    name="gpt-4", is_visible=True, max_input_tokens=4096
                )
            ],
        ),
    ],
)
def test_update_model_configurations(
    reset: None,  # noqa: ARG001
    initial: tuple[str, list[ModelConfigurationUpsertRequest]],
    initial_expected: list[ModelConfigurationUpsertRequest],
    updated: tuple[str, list[ModelConfigurationUpsertRequest]],
    updated_expected: list[ModelConfigurationUpsertRequest],
) -> None:
    admin_user = UserManager.create(name="admin_user")

    default_model_name, model_configurations = initial
    updated_default_model_name, updated_model_configurations = updated

    name = str(uuid.uuid4())

    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                model_configuration.dict()
                for model_configuration in model_configurations
            ],
            "is_public": True,
            "groups": [],
            "api_key_changed": True,
        },
    )
    created_provider = response.json()
    assert_response_is_equivalent(
        admin_user,
        response,
        initial_expected,
    )

    response = requests.post(
        f"{API_SERVER_URL}/admin/llm/default",
        headers=admin_user.headers,
        json={
            "provider_id": created_provider["id"],
            "model_name": updated_default_model_name,
        },
    )
    assert response.status_code == 200

    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
        json={
            "id": created_provider["id"],
            "name": name,
            "provider": created_provider["provider"],
            "api_key": "sk-000000000000000000000000000000000000000000000001",
            "model_configurations": [
                model_configuration.dict()
                for model_configuration in updated_model_configurations
            ],
            "is_public": True,
            "groups": [],
        },
    )
    assert_response_is_equivalent(
        admin_user,
        response,
        updated_expected,
        "sk-0****0000",
    )

    response = requests.post(
        f"{API_SERVER_URL}/admin/llm/default",
        headers=admin_user.headers,
        json={
            "provider_id": created_provider["id"],
            "model_name": updated_default_model_name,
        },
    )
    assert response.status_code == 200

    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
        json={
            "id": created_provider["id"],
            "name": name,
            "provider": created_provider["provider"],
            "api_key": "sk-000000000000000000000000000000000000000000000001",
            "model_configurations": [
                model_configuration.dict()
                for model_configuration in updated_model_configurations
            ],
            "is_public": True,
            "groups": [],
            "api_key_changed": True,
        },
    )
    assert_response_is_equivalent(
        admin_user,
        response,
        updated_expected,
        "sk-0****0001",
    )


@pytest.mark.parametrize(
    "model_configurations",
    [
        [
            ModelConfigurationUpsertRequest(
                name="gpt-4", is_visible=True, max_input_tokens=4096
            )
        ],
        [
            ModelConfigurationUpsertRequest(name="gpt-4o", is_visible=True),
            ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True),
        ],
    ],
)
def test_delete_llm_provider(
    reset: None,  # noqa: ARG001
    model_configurations: list[ModelConfigurationUpsertRequest],
) -> None:
    admin_user = UserManager.create(name="admin_user")

    # Create a provider
    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": "test-provider-delete",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                model_configuration.dict()
                for model_configuration in model_configurations
            ],
            "is_public": True,
            "groups": [],
        },
    )
    created_provider = response.json()
    assert response.status_code == 200

    # Delete the provider
    response = requests.delete(
        f"{API_SERVER_URL}/admin/llm/provider/{created_provider['id']}",
        headers=admin_user.headers,
    )
    assert response.status_code == 200

    # Verify provider is deleted by checking it's not in the list
    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
    assert provider_data is None


def test_delete_default_llm_provider_rejected(
    reset: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """Deleting the default LLM provider should return 400."""
    admin_user = UserManager.create(name="admin_user")

    # Create a provider
    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": "test-provider-default-delete",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                ModelConfigurationUpsertRequest(
                    name="gpt-4o-mini", is_visible=True
                ).model_dump()
            ],
            "is_public": True,
            "groups": [],
        },
    )
    assert response.status_code == 200
    created_provider = response.json()

    # Set this provider as the default
    set_default_response = requests.post(
        f"{API_SERVER_URL}/admin/llm/default",
        headers=admin_user.headers,
        json={
            "provider_id": created_provider["id"],
            "model_name": "gpt-4o-mini",
        },
    )
    assert set_default_response.status_code == 200

    # Attempt to delete the default provider — should be rejected
    delete_response = requests.delete(
        f"{API_SERVER_URL}/admin/llm/provider/{created_provider['id']}",
        headers=admin_user.headers,
    )
    assert delete_response.status_code == 400
    assert "Cannot delete the default LLM provider" in delete_response.json()["detail"]

    # Verify provider still exists
    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
    assert provider_data is not None


def test_delete_non_default_llm_provider_with_default_set(
    reset: None,  # noqa: ARG001
) -> None:
    """Deleting a non-default provider should succeed even when a default is set."""
    admin_user = UserManager.create(name="admin_user")

    # Create two providers
    response_default = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": "default-provider",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                ModelConfigurationUpsertRequest(
                    name="gpt-4o-mini", is_visible=True
                ).model_dump()
            ],
            "is_public": True,
            "groups": [],
        },
    )
    assert response_default.status_code == 200
    default_provider = response_default.json()

    response_other = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": "other-provider",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                ModelConfigurationUpsertRequest(
                    name="gpt-4o", is_visible=True
                ).model_dump()
            ],
            "is_public": True,
            "groups": [],
        },
    )
    assert response_other.status_code == 200
    other_provider = response_other.json()

    # Set the first provider as default
    set_default_response = requests.post(
        f"{API_SERVER_URL}/admin/llm/default",
        headers=admin_user.headers,
        json={
            "provider_id": default_provider["id"],
            "model_name": "gpt-4o-mini",
        },
    )
    assert set_default_response.status_code == 200

    # Delete the non-default provider — should succeed
    delete_response = requests.delete(
        f"{API_SERVER_URL}/admin/llm/provider/{other_provider['id']}",
        headers=admin_user.headers,
    )
    assert delete_response.status_code == 200

    # Verify the non-default provider is gone
    provider_data = _get_provider_by_id(admin_user, other_provider["id"])
    assert provider_data is None

    # Verify the default provider still exists
    default_data = _get_provider_by_id(admin_user, default_provider["id"])
    assert default_data is not None


def test_force_delete_default_llm_provider(
    reset: None,  # noqa: ARG001
) -> None:
    """Force-deleting the default LLM provider should succeed."""
    admin_user = UserManager.create(name="admin_user")

    # Create a provider
    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": "test-provider-force-delete",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                ModelConfigurationUpsertRequest(
                    name="gpt-4o-mini", is_visible=True
                ).model_dump()
            ],
            "is_public": True,
            "groups": [],
        },
    )
    assert response.status_code == 200
    created_provider = response.json()

    # Set this provider as the default
    set_default_response = requests.post(
        f"{API_SERVER_URL}/admin/llm/default",
        headers=admin_user.headers,
        json={
            "provider_id": created_provider["id"],
            "model_name": "gpt-4o-mini",
        },
    )
    assert set_default_response.status_code == 200

    # Attempt to delete without force — should be rejected
    delete_response = requests.delete(
        f"{API_SERVER_URL}/admin/llm/provider/{created_provider['id']}",
        headers=admin_user.headers,
    )
    assert delete_response.status_code == 400

    # Force delete — should succeed
    force_delete_response = requests.delete(
        f"{API_SERVER_URL}/admin/llm/provider/{created_provider['id']}?force=true",
        headers=admin_user.headers,
    )
    assert force_delete_response.status_code == 200

    # Verify provider is gone
    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
    assert provider_data is None


def test_delete_default_vision_provider_clears_vision_default(
    reset: None,  # noqa: ARG001
) -> None:
    """Deleting the default vision provider should succeed and clear the vision default."""
    admin_user = UserManager.create(name="admin_user")

    # Create a text provider and set it as default (so we have a default text provider)
    text_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": "text-provider",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000001",
            "model_configurations": [
                ModelConfigurationUpsertRequest(
                    name="gpt-4o-mini", is_visible=True
                ).model_dump()
            ],
            "is_public": True,
            "groups": [],
        },
    )
    assert text_response.status_code == 200
    text_provider = text_response.json()
    _set_default_provider(admin_user, text_provider["id"], "gpt-4o-mini")

    # Create a vision provider and set it as default vision
    vision_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": "vision-provider",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000002",
            "model_configurations": [
                ModelConfigurationUpsertRequest(
                    name="gpt-4o",
                    is_visible=True,
                    supports_image_input=True,
                ).model_dump()
            ],
            "is_public": True,
            "groups": [],
        },
    )
    assert vision_response.status_code == 200
    vision_provider = vision_response.json()
    _set_default_vision_provider(admin_user, vision_provider["id"], "gpt-4o")

    # Verify vision default is set
    data = _get_providers_admin(admin_user)
    assert data is not None
    _, _, vision_default = _unpack_data(data)
    assert vision_default is not None
    assert vision_default["provider_id"] == vision_provider["id"]

    # Delete the vision provider — should succeed (only text default is protected)
    delete_response = requests.delete(
        f"{API_SERVER_URL}/admin/llm/provider/{vision_provider['id']}",
        headers=admin_user.headers,
    )
    assert delete_response.status_code == 200

    # Verify the vision provider is gone
    provider_data = _get_provider_by_id(admin_user, vision_provider["id"])
    assert provider_data is None

    # Verify there is no default vision provider
    data = _get_providers_admin(admin_user)
    assert data is not None
    _, text_default, vision_default = _unpack_data(data)
    assert vision_default is None

    # Verify the text default is still intact
    assert text_default is not None
    assert text_default["provider_id"] == text_provider["id"]


def test_duplicate_provider_name_rejected(reset: None) -> None:  # noqa: ARG001
    """Creating a provider with a name that already exists should return 400."""
    admin_user = UserManager.create(name="admin_user")
    provider_name = f"unique-provider-{uuid.uuid4()}"

    base_payload = {
        "name": provider_name,
        "provider": LlmProviderNames.OPENAI,
        "api_key": "sk-000000000000000000000000000000000000000000000000",
        "model_configurations": [
            ModelConfigurationUpsertRequest(
                name="gpt-4o-mini", is_visible=True
            ).model_dump()
        ],
        "is_public": True,
        "groups": [],
    }

    # First creation succeeds
    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json=base_payload,
    )
    assert response.status_code == 200

    # Second creation with the same name is rejected
    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json=base_payload,
    )
    assert response.status_code == 409
    assert "already exists" in response.json()["detail"]


def test_rename_provider_rejected(reset: None) -> None:  # noqa: ARG001
    """Renaming a provider is not currently supported and should return 400."""
    admin_user = UserManager.create(name="admin_user")

    create_payload = {
        "name": f"original-name-{uuid.uuid4()}",
        "provider": LlmProviderNames.OPENAI,
        "api_key": "sk-000000000000000000000000000000000000000000000000",
        "model_configurations": [
            ModelConfigurationUpsertRequest(
                name="gpt-4o-mini", is_visible=True
            ).model_dump()
        ],
        "is_public": True,
        "groups": [],
    }

    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json=create_payload,
    )
    assert response.status_code == 200
    provider_id = response.json()["id"]

    # Attempt to rename — should be rejected
    new_name = f"renamed-provider-{uuid.uuid4()}"
    update_payload = {**create_payload, "id": provider_id, "name": new_name}
    response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=false",
        headers=admin_user.headers,
        json=update_payload,
    )
    assert response.status_code == 400
    assert "not currently supported" in response.json()["detail"]

    # Verify no duplicate was created — only the original provider should exist
    provider = _get_provider_by_id(admin_user, provider_id)
    assert provider is not None
    assert provider["name"] == create_payload["name"]

    all_response = requests.get(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
    )
    assert all_response.status_code == 200
    all_names = [p["name"] for p in all_response.json()["providers"]]
    assert new_name not in all_names


def test_model_visibility_preserved_on_edit(reset: None) -> None:  # noqa: ARG001
    """
    Test that model visibility flags are correctly preserved when editing an LLM provider.

    This test verifies the fix for the bug where editing a provider with specific visible models
    would incorrectly map visibility flags when the provider's model list differs from the
    descriptor's default model list.

    Scenario:
    1. Create a provider with 3 models, 2 visible
    2. Edit the provider to change visibility (make all 3 visible)
    3. Verify all 3 models are now visible
    4. Edit again to make only 1 visible
    5. Verify only 1 is visible
    """
    admin_user = UserManager.create(name="admin_user")

    # Initial model configurations: 2 visible, 1 hidden
    model_configs = [
        ModelConfigurationUpsertRequest(
            name="gpt-4o",
            is_visible=True,
            max_input_tokens=None,
            supports_image_input=None,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4o-mini",
            is_visible=True,
            max_input_tokens=None,
            supports_image_input=None,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4-turbo",
            is_visible=False,
            max_input_tokens=None,
            supports_image_input=None,
        ),
    ]

    # Create the provider
    create_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": "test-visibility-provider",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [config.dict() for config in model_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert create_response.status_code == 200
    created_provider = create_response.json()

    # Verify initial state: 2 visible models
    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
    assert provider_data is not None
    visible_models = [
        model for model in provider_data["model_configurations"] if model["is_visible"]
    ]
    assert len(visible_models) == 2
    assert any(m["name"] == "gpt-4o" for m in visible_models)
    assert any(m["name"] == "gpt-4o-mini" for m in visible_models)

    # Edit 1: Make all 3 models visible
    edit_configs_all_visible = [
        ModelConfigurationUpsertRequest(
            name="gpt-4o",
            is_visible=True,
            max_input_tokens=None,
            supports_image_input=None,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4o-mini",
            is_visible=True,
            max_input_tokens=None,
            supports_image_input=None,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4-turbo",
            is_visible=True,  # Now visible
            max_input_tokens=None,
            supports_image_input=None,
        ),
    ]

    edit_response_1 = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=false",
        headers=admin_user.headers,
        json={
            "id": created_provider["id"],
            "name": "test-visibility-provider",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                config.dict() for config in edit_configs_all_visible
            ],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert edit_response_1.status_code == 200

    # Verify all 3 models are now visible
    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
    assert provider_data is not None
    visible_models = [
        model for model in provider_data["model_configurations"] if model["is_visible"]
    ]
    assert len(visible_models) == 3

    # Edit 2: Make only 1 model visible
    edit_configs_one_visible = [
        ModelConfigurationUpsertRequest(
            name="gpt-4o",
            is_visible=True,  # Only this one visible
            max_input_tokens=None,
            supports_image_input=None,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4o-mini",
            is_visible=False,
            max_input_tokens=None,
            supports_image_input=None,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4-turbo",
            is_visible=False,
            max_input_tokens=None,
            supports_image_input=None,
        ),
    ]

    edit_response_2 = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=false",
        headers=admin_user.headers,
        json={
            "id": created_provider["id"],
            "name": "test-visibility-provider",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                config.dict() for config in edit_configs_one_visible
            ],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert edit_response_2.status_code == 200

    # Verify only 1 model is visible
    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
    assert provider_data is not None
    visible_models = [
        model for model in provider_data["model_configurations"] if model["is_visible"]
    ]
    assert len(visible_models) == 1
    assert visible_models[0]["name"] == "gpt-4o"

    # Make none visible
    edit_configs_none_visible = [
        ModelConfigurationUpsertRequest(
            name="gpt-4o",
            is_visible=False,
            max_input_tokens=None,
            supports_image_input=None,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4o-mini",
            is_visible=False,
            max_input_tokens=None,
            supports_image_input=None,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4-turbo",
            is_visible=False,
            max_input_tokens=None,
            supports_image_input=None,
        ),
    ]
    edit_response_3 = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=false",
        headers=admin_user.headers,
        json={
            "id": created_provider["id"],
            "name": "test-visibility-provider",
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [
                config.dict() for config in edit_configs_none_visible
            ],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert edit_response_3.status_code == 200

    # Verify no models are visible
    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
    assert provider_data is not None
    visible_models = [
        model for model in provider_data["model_configurations"] if model["is_visible"]
    ]
    assert len(visible_models) == 0

    # Make gpt-4o the default
    _set_default_provider(admin_user, created_provider["id"], "gpt-4o")

    # Verify gpt-4o is the default
    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
    assert provider_data is not None
    visible_models = [
        model for model in provider_data["model_configurations"] if model["is_visible"]
    ]
    assert len(visible_models) == 1
    assert visible_models[0]["name"] == "gpt-4o"


def _get_provider_by_name(providers: list[dict], provider_name: str) -> dict | None:
    return next((p for p in providers if p["name"] == provider_name), None)


def _get_providers_admin(
    admin_user: DATestUser,
) -> dict | None:
    response = requests.get(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    resp_json = response.json()

    return resp_json


def _unpack_data(data: dict) -> tuple[list[dict], dict | None, dict | None]:
    providers = data["providers"]
    text_default = data.get("default_text")
    vision_default = data.get("default_vision")

    return providers, text_default, vision_default


def _get_providers_basic(
    user: DATestUser,
) -> dict | None:
    response = requests.get(
        f"{API_SERVER_URL}/llm/provider",
        headers=user.headers,
    )
    assert response.status_code == 200
    resp_json = response.json()

    return resp_json


def _validate_default_model(
    default: dict | None,
    provider_id: int | None = None,
    model_name: str | None = None,
) -> None:
    if default is None:
        assert provider_id is None and model_name is None
        return

    assert default["provider_id"] == provider_id
    assert default["model_name"] == model_name


def _get_provider_by_name_admin(
    admin_user: DATestUser, provider_name: str
) -> dict | None:
    """Utility function to fetch an LLM provider by name via admin endpoint."""
    response = requests.get(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    providers = response.json()
    return next((p for p in providers if p["name"] == provider_name), None)


def _get_provider_by_name_basic(user: DATestUser, provider_name: str) -> dict | None:
    """Utility function to fetch an LLM provider by name via basic (non-admin) endpoint."""
    response = requests.get(
        f"{API_SERVER_URL}/llm/provider",
        headers=user.headers,
    )
    assert response.status_code == 200
    providers = response.json()["providers"]
    return next((p for p in providers if p["name"] == provider_name), None)


def _validate_model_configurations(
    actual_configs: list[dict],
    expected_model_names: list[str],
    expected_visible: dict[str, bool] | None = None,
    expected_image_support: dict[str, bool] | None = None,
) -> None:
    """
    Validate that model configurations match expectations.

    Args:
        actual_configs: List of model configuration dicts from the API response
        expected_model_names: List of expected model names
        expected_visible: Optional dict mapping model name to expected visibility
        expected_image_support: Optional dict mapping model name to expected supports_image_input
    """
    actual_names = {config["name"] for config in actual_configs}
    expected_names = set(expected_model_names)

    assert (
        actual_names == expected_names
    ), f"Model names mismatch. Expected: {expected_names}, Actual: {actual_names}"

    if expected_visible:
        for config in actual_configs:
            if config["name"] in expected_visible:
                assert config["is_visible"] == expected_visible[config["name"]], (
                    f"Visibility mismatch for {config['name']}. "
                    f"Expected: {expected_visible[config['name']]}, Actual: {config['is_visible']}"
                )

    if expected_image_support:
        for config in actual_configs:
            if config["name"] in expected_image_support:
                assert (
                    config["supports_image_input"]
                    == expected_image_support[config["name"]]
                ), (
                    f"supports_image_input mismatch for {config['name']}. "
                    f"Expected: {expected_image_support[config['name']]}, "
                    f"Actual: {config['supports_image_input']}"
                )


def _validate_provider_data(
    provider_data: dict,
    expected_name: str,
    expected_provider: str,
    expected_model_names: list[str],
    expected_visible: dict[str, bool] | None = None,
    expected_is_public: bool | None = None,
    expected_image_support: dict[str, bool] | None = None,
) -> None:
    """
    Validate that provider data matches expectations.

    Args:
        provider_data: Provider dict from the API response
        expected_name: Expected provider name
        expected_provider: Expected provider type (e.g., 'openai')
        expected_model_names: List of expected model names in configurations
        expected_visible: Optional dict mapping model name to expected visibility
        expected_is_public: Optional expected is_public value (admin endpoint only)
        expected_image_support: Optional dict mapping model name to expected supports_image_input
    """
    assert (
        provider_data["name"] == expected_name
    ), f"Provider name mismatch. Expected: {expected_name}, Actual: {provider_data['name']}"
    assert (
        provider_data["provider"] == expected_provider
    ), f"Provider type mismatch. Expected: {expected_provider}, Actual: {provider_data['provider']}"

    # Validate is_public if provided (only available in admin endpoint response)
    if expected_is_public is not None and "is_public" in provider_data:
        assert (
            provider_data["is_public"] == expected_is_public
        ), f"is_public mismatch. Expected: {expected_is_public}, Actual: {provider_data['is_public']}"

    # Validate model configurations
    _validate_model_configurations(
        provider_data["model_configurations"],
        expected_model_names,
        expected_visible,
        expected_image_support,
    )


def test_default_model_persistence_and_update(
    reset: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """
    Test that the default model is correctly set, persisted, and can be updated.

    This test verifies:
    1. Admin creates a provider with a specific default model
    2. Admin endpoint (/admin/llm/provider) shows correct default model
    3. Basic endpoint (/llm/provider) shows correct default model for admin user
    4. Non-admin user can see the same default model via basic endpoint
    5. Admin updates the default model
    6. Both admin and basic endpoints reflect the new default model
    7. Non-admin user sees the updated default model
    """
    from onyx.auth.schemas import UserRole

    admin_user = UserManager.create(name="admin_user")

    # Create a non-admin user
    basic_user = UserManager.create(name="basic_user")
    # The first user is admin, subsequent users are basic by default
    assert basic_user.role == UserRole.BASIC or basic_user.role != UserRole.ADMIN

    provider_name = f"test-default-model-{uuid.uuid4()}"
    updated_default_model = "gpt-4o"

    # Model configurations including all models we'll use
    model_configs = [
        ModelConfigurationUpsertRequest(
            name="gpt-4",
            is_visible=True,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4o",
            is_visible=True,
        ),
    ]

    # Expected model names and visibility
    expected_model_names = ["gpt-4", "gpt-4o"]
    expected_visible = {"gpt-4": True, "gpt-4o": True}

    # Step 1: Admin creates the provider with initial default model
    create_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": provider_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [config.model_dump() for config in model_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert create_response.status_code == 200

    # Capture initial defaults (setup_postgres may have created a DevEnvPresetOpenAI default)
    initial_data = _get_providers_admin(admin_user)
    assert initial_data is not None
    _, initial_text_default, initial_vision_default = _unpack_data(initial_data)

    # Step 2: Verify via admin endpoint that all provider data is correct
    admin_data = _get_providers_admin(admin_user)
    assert admin_data is not None
    providers, text_default, vision_default = _unpack_data(admin_data)
    # Defaults should be unchanged from initial state (new provider not set as default)
    assert text_default == initial_text_default
    assert vision_default == initial_vision_default

    admin_provider_data = _get_provider_by_name(providers, provider_name)
    assert admin_provider_data is not None

    _validate_provider_data(
        admin_provider_data,
        expected_name=provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=expected_model_names,
        expected_visible=expected_visible,
        expected_is_public=True,
    )

    # Step 3: Verify via basic endpoint (admin user) that all provider data is correct
    admin_basic_data = _get_providers_basic(admin_user)
    assert admin_basic_data is not None
    providers, text_default, vision_default = _unpack_data(admin_basic_data)
    assert text_default == initial_text_default
    assert vision_default == initial_vision_default

    admin_basic_provider_data = _get_provider_by_name(providers, provider_name)
    assert admin_basic_provider_data is not None
    _validate_provider_data(
        admin_basic_provider_data,
        expected_name=provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=expected_model_names,
        expected_visible=expected_visible,
    )

    # Step 4: Verify non-admin user sees the same provider data via basic endpoint
    basic_user_data = _get_providers_basic(basic_user)
    assert basic_user_data is not None
    providers, text_default, vision_default = _unpack_data(basic_user_data)
    assert text_default == initial_text_default
    assert vision_default == initial_vision_default

    basic_user_provider_data = _get_provider_by_name(providers, provider_name)
    assert basic_user_provider_data is not None
    _validate_provider_data(
        basic_user_provider_data,
        expected_name=provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=expected_model_names,
        expected_visible=expected_visible,
    )

    # Step 5: Admin updates the provider to change the default model
    update_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=false",
        headers=admin_user.headers,
        json={
            "id": create_response.json()["id"],
            "name": provider_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [config.model_dump() for config in model_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert update_response.status_code == 200

    default_provider_response = requests.post(
        f"{API_SERVER_URL}/admin/llm/default",
        json={
            "provider_id": update_response.json()["id"],
            "model_name": updated_default_model,
        },
        headers=admin_user.headers,
    )
    assert default_provider_response.status_code == 200

    # Step 6a: Verify the updated provider data via admin endpoint
    admin_data = _get_providers_admin(admin_user)
    assert admin_data is not None
    providers, text_default, vision_default = _unpack_data(admin_data)
    _validate_default_model(
        text_default,
        provider_id=update_response.json()["id"],
        model_name=updated_default_model,
    )
    _validate_default_model(vision_default)  # None

    admin_provider_data = _get_provider_by_name(providers, provider_name)
    assert admin_provider_data is not None
    _validate_provider_data(
        admin_provider_data,
        expected_name=provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=expected_model_names,
        expected_visible=expected_visible,
        expected_is_public=True,
    )

    # Step 6b: Verify the updated provider data via basic endpoint (admin user)
    admin_basic_data = _get_providers_basic(admin_user)
    assert admin_basic_data is not None
    providers, text_default, vision_default = _unpack_data(admin_basic_data)
    _validate_default_model(
        text_default,
        provider_id=update_response.json()["id"],
        model_name=updated_default_model,
    )
    _validate_default_model(vision_default)  # None

    admin_basic_provider_data = _get_provider_by_name(providers, provider_name)
    assert admin_basic_provider_data is not None
    _validate_provider_data(
        admin_basic_provider_data,
        expected_name=provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=expected_model_names,
        expected_visible=expected_visible,
    )

    # Step 7: Verify non-admin user sees the updated provider data
    basic_user_data = _get_providers_basic(basic_user)
    assert basic_user_data is not None
    providers, text_default, vision_default = _unpack_data(basic_user_data)
    _validate_default_model(
        text_default,
        provider_id=update_response.json()["id"],
        model_name=updated_default_model,
    )
    _validate_default_model(vision_default)  # None

    basic_user_provider_data = _get_provider_by_name(providers, provider_name)
    assert basic_user_provider_data is not None
    _validate_provider_data(
        basic_user_provider_data,
        expected_name=provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=expected_model_names,
        expected_visible=expected_visible,
    )


def _get_all_providers_basic(user: DATestUser) -> list[dict]:
    """Utility function to fetch all LLM providers via basic endpoint."""
    response = requests.get(
        f"{API_SERVER_URL}/llm/provider",
        headers=user.headers,
    )
    assert response.status_code == 200
    return response.json()["providers"]


def _get_all_providers_admin(admin_user: DATestUser) -> list[dict]:
    """Utility function to fetch all LLM providers via admin endpoint."""
    response = requests.get(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    return response.json()["providers"]


def _set_default_provider(
    admin_user: DATestUser, provider_id: int, model_name: str
) -> None:
    """Utility function to set a provider as the default."""
    response = requests.post(
        f"{API_SERVER_URL}/admin/llm/default",
        json={
            "provider_id": provider_id,
            "model_name": model_name,
        },
        headers=admin_user.headers,
    )
    assert response.status_code == 200


def _set_default_vision_provider(
    admin_user: DATestUser, provider_id: int, vision_model: str | None = None
) -> None:
    """Utility function to set a provider as the default vision provider."""
    response = requests.post(
        f"{API_SERVER_URL}/admin/llm/default-vision",
        json={
            "provider_id": provider_id,
            "model_name": vision_model,
        },
        headers=admin_user.headers,
    )
    assert response.status_code == 200


def test_multiple_providers_default_switching(
    reset: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """
    Test switching default providers and models across multiple LLM providers.

    This test verifies:
    1. Admin creates multiple LLM providers
    2. Admin sets one as the default provider with a specific default model
    3. Both admin and basic_user query /provider and see the same default provider/model
    4. Admin changes the default provider and model to something different
    5. Both admin and basic_user verify they see the same updated default
    6. Admin switches to a different provider that has a model with the same name
    7. Both users should see the new provider as default with the same model name
    """
    from onyx.auth.schemas import UserRole

    admin_user = UserManager.create(name="admin_user")

    # Create a non-admin user
    basic_user = UserManager.create(name="basic_user")
    assert basic_user.role == UserRole.BASIC or basic_user.role != UserRole.ADMIN

    # We'll create two providers, both with a model named "gpt-4" to test the
    # scenario where different providers have models with the same name
    provider_1_name = f"test-provider-1-{uuid.uuid4()}"
    provider_2_name = f"test-provider-2-{uuid.uuid4()}"

    # Both providers will have "gpt-4" as a model
    shared_model_name = "gpt-4"
    provider_1_unique_model = "gpt-4o"
    provider_2_unique_model = "gpt-4-turbo"

    # Model configurations for provider 1
    provider_1_configs = [
        ModelConfigurationUpsertRequest(
            name=shared_model_name,
            is_visible=True,
        ),
        ModelConfigurationUpsertRequest(
            name=provider_1_unique_model,
            is_visible=True,
        ),
    ]

    # Model configurations for provider 2
    provider_2_configs = [
        ModelConfigurationUpsertRequest(
            name=shared_model_name,
            is_visible=True,
        ),
        ModelConfigurationUpsertRequest(
            name=provider_2_unique_model,
            is_visible=True,
        ),
    ]

    # Expected model names and visibility for each provider
    provider_1_model_names = [shared_model_name, provider_1_unique_model]
    provider_1_visible = {shared_model_name: True, provider_1_unique_model: True}
    provider_2_model_names = [shared_model_name, provider_2_unique_model]
    provider_2_visible = {shared_model_name: True, provider_2_unique_model: True}

    # Step 1: Create provider 1 with shared_model_name as default
    create_response_1 = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": provider_1_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000001",
            "model_configurations": [c.model_dump() for c in provider_1_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert create_response_1.status_code == 200
    provider_1 = create_response_1.json()

    _set_default_provider(admin_user, provider_1["id"], shared_model_name)

    # Create provider 2 with provider_2_unique_model as default initially
    create_response_2 = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": provider_2_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000002",
            "model_configurations": [c.model_dump() for c in provider_2_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert create_response_2.status_code == 200
    provider_2 = create_response_2.json()

    # Step 2: Set provider 1 as the default provider
    _set_default_provider(admin_user, provider_1["id"], shared_model_name)

    # Step 3: Both admin and basic_user query and verify they see the same default
    # Validate via admin endpoint
    admin_data = _get_providers_admin(admin_user)
    assert admin_data is not None
    providers, text_default, vision_default = _unpack_data(admin_data)
    _validate_default_model(
        text_default, provider_id=provider_1["id"], model_name=shared_model_name
    )
    _validate_default_model(vision_default)  # None
    admin_provider_data = _get_provider_by_name(providers, provider_1_name)
    assert admin_provider_data is not None
    _validate_provider_data(
        admin_provider_data,
        expected_name=provider_1_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_1_model_names,
        expected_visible=provider_1_visible,
        expected_is_public=True,
    )

    # Validate provider 2 via admin endpoint (should not be default)
    admin_provider_2 = _get_provider_by_name(providers, provider_2_name)
    assert admin_provider_2 is not None
    _validate_provider_data(
        admin_provider_2,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
        expected_is_public=True,
    )

    # Validate via basic endpoint (basic_user)
    basic_data = _get_providers_basic(basic_user)
    assert basic_data is not None
    providers, text_default, vision_default = _unpack_data(basic_data)
    _validate_default_model(
        text_default, provider_id=provider_1["id"], model_name=shared_model_name
    )
    _validate_default_model(vision_default)  # None
    basic_provider_data = _get_provider_by_name(providers, provider_1_name)
    assert basic_provider_data is not None
    _validate_provider_data(
        basic_provider_data,
        expected_name=provider_1_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_1_model_names,
        expected_visible=provider_1_visible,
    )

    # Also verify admin sees the same via basic endpoint
    admin_basic_data = _get_providers_basic(admin_user)
    assert admin_basic_data is not None
    providers, text_default, vision_default = _unpack_data(admin_basic_data)
    _validate_default_model(
        text_default, provider_id=provider_1["id"], model_name=shared_model_name
    )
    _validate_default_model(vision_default)  # None
    admin_basic_provider_data = _get_provider_by_name(providers, provider_1_name)
    assert admin_basic_provider_data is not None
    _validate_provider_data(
        admin_basic_provider_data,
        expected_name=provider_1_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_1_model_names,
        expected_visible=provider_1_visible,
    )

    # Step 4: Admin changes the default provider to provider 2 and updates its default model
    # First update provider 2's default model to the unique model (it already is, but reconfirm)
    update_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=false",
        headers=admin_user.headers,
        json={
            "id": provider_2["id"],
            "name": provider_2_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000002",
            "model_configurations": [c.model_dump() for c in provider_2_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert update_response.status_code == 200

    # Now set provider 2 as the default
    _set_default_provider(admin_user, provider_2["id"], provider_2_unique_model)

    # Step 5: Both admin and basic_user verify they see the updated default
    # Validate via admin endpoint
    admin_data = _get_providers_admin(admin_user)
    assert admin_data is not None
    providers, text_default, vision_default = _unpack_data(admin_data)
    _validate_default_model(
        text_default, provider_id=provider_2["id"], model_name=provider_2_unique_model
    )
    _validate_default_model(vision_default)  # None
    admin_provider_data = _get_provider_by_name(providers, provider_2_name)
    assert admin_provider_data is not None
    _validate_provider_data(
        admin_provider_data,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
        expected_is_public=True,
    )

    # Validate provider 1 via admin endpoint (should no longer be default)
    admin_provider_1 = _get_provider_by_name(providers, provider_1_name)
    assert admin_provider_1 is not None
    _validate_provider_data(
        admin_provider_1,
        expected_name=provider_1_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_1_model_names,
        expected_visible=provider_1_visible,
        expected_is_public=True,
    )

    # Validate via basic endpoint (basic_user)
    basic_data = _get_providers_basic(basic_user)
    assert basic_data is not None
    providers, text_default, vision_default = _unpack_data(basic_data)
    _validate_default_model(
        text_default, provider_id=provider_2["id"], model_name=provider_2_unique_model
    )
    _validate_default_model(vision_default)  # None
    basic_provider_data = _get_provider_by_name(providers, provider_2_name)
    assert basic_provider_data is not None
    _validate_provider_data(
        basic_provider_data,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
    )

    # Validate via basic endpoint (admin_user)
    admin_basic_data = _get_providers_basic(admin_user)
    assert admin_basic_data is not None
    providers, text_default, vision_default = _unpack_data(admin_basic_data)
    _validate_default_model(
        text_default, provider_id=provider_2["id"], model_name=provider_2_unique_model
    )
    _validate_default_model(vision_default)  # None
    admin_basic_provider_data = _get_provider_by_name(providers, provider_2_name)
    assert admin_basic_provider_data is not None
    _validate_provider_data(
        admin_basic_provider_data,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
    )

    # Step 6: Admin changes provider 2's default model to the shared model name
    # (same model name as provider 1 had)
    update_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=false",
        headers=admin_user.headers,
        json={
            "id": provider_2["id"],
            "name": provider_2_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000002",
            "model_configurations": [c.model_dump() for c in provider_2_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert update_response.status_code == 200

    _set_default_provider(
        admin_user, provider_2["id"], shared_model_name
    )  # Same name as provider 1's model

    # Step 7: Both users verify they see provider 2 as default with the shared model name
    # Validate via admin endpoint
    admin_data = _get_providers_admin(admin_user)
    assert admin_data is not None
    providers, text_default, vision_default = _unpack_data(admin_data)
    _validate_default_model(
        text_default, provider_id=provider_2["id"], model_name=shared_model_name
    )
    _validate_default_model(vision_default)  # None
    admin_provider_data = _get_provider_by_name(providers, provider_2_name)
    assert admin_provider_data is not None
    _validate_provider_data(
        admin_provider_data,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
        expected_is_public=True,
    )

    # Validate via basic endpoint (basic_user)
    basic_data = _get_providers_basic(basic_user)
    assert basic_data is not None
    providers, text_default, vision_default = _unpack_data(basic_data)
    _validate_default_model(
        text_default, provider_id=provider_2["id"], model_name=shared_model_name
    )
    _validate_default_model(vision_default)  # None
    basic_provider_data = _get_provider_by_name(providers, provider_2_name)
    assert basic_provider_data is not None
    _validate_provider_data(
        basic_provider_data,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
    )

    # Validate via basic endpoint (admin_user)
    admin_basic_data = _get_providers_basic(admin_user)
    assert admin_basic_data is not None
    providers, text_default, vision_default = _unpack_data(admin_basic_data)
    _validate_default_model(
        text_default, provider_id=provider_2["id"], model_name=shared_model_name
    )
    _validate_default_model(vision_default)  # None
    admin_basic_provider_data = _get_provider_by_name(providers, provider_2_name)
    assert admin_basic_provider_data is not None
    _validate_provider_data(
        admin_basic_provider_data,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
    )

    # Verify provider 1 is no longer the default and has correct data
    admin_provider_1 = _get_provider_by_name(providers, provider_1_name)
    assert admin_provider_1 is not None
    _validate_provider_data(
        admin_provider_1,
        expected_name=provider_1_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_1_model_names,
        expected_visible=provider_1_visible,
        expected_is_public=True,
    )

    basic_provider_1 = _get_provider_by_name(providers, provider_1_name)
    assert basic_provider_1 is not None
    _validate_provider_data(
        basic_provider_1,
        expected_name=provider_1_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_1_model_names,
        expected_visible=provider_1_visible,
    )


def test_default_provider_and_vision_provider_selection(
    reset: None,  # noqa: ARG001
) -> None:
    """
    Test setting separate default providers for regular LLM and vision capabilities.

    This test verifies:
    1. Create provider 1 with mixed models (some with vision, some without)
    2. Create provider 2 with only vision-capable models
    3. Set a non-vision model from provider 1 as the general default
    4. Set a vision model from provider 2 as the default vision model
    5. Verify both admin and basic users see correct default provider and vision provider
    6. Verify model configurations show correct image support capabilities
    """
    from onyx.auth.schemas import UserRole

    admin_user = UserManager.create(name="admin_user")

    # Create a non-admin user
    basic_user = UserManager.create(name="basic_user")
    assert basic_user.role == UserRole.BASIC or basic_user.role != UserRole.ADMIN

    provider_1_name = f"test-mixed-models-{uuid.uuid4()}"
    provider_2_name = f"test-vision-only-{uuid.uuid4()}"

    # Provider 1: Mixed models - some with vision support, some without
    # Using real model names that litellm recognizes for vision support
    provider_1_non_vision_model = "gpt-4"  # No vision support
    provider_1_vision_model = "gpt-4o"  # Has vision support

    # Provider 2: Only vision-capable models
    provider_2_vision_model_1 = "gpt-4-vision-preview"  # Vision model
    provider_2_vision_model_2 = "gpt-4o-mini"  # Also has vision support

    # Model configurations for provider 1 (mixed)
    provider_1_configs = [
        ModelConfigurationUpsertRequest(
            name=provider_1_non_vision_model,
            is_visible=True,
        ),
        ModelConfigurationUpsertRequest(
            name=provider_1_vision_model,
            is_visible=True,
        ),
    ]

    # Model configurations for provider 2 (vision only)
    provider_2_configs = [
        ModelConfigurationUpsertRequest(
            name=provider_2_vision_model_1,
            is_visible=True,
            supports_image_input=True,
        ),
        ModelConfigurationUpsertRequest(
            name=provider_2_vision_model_2,
            is_visible=True,
            supports_image_input=True,
        ),
    ]

    # Expected model names
    provider_1_model_names = [provider_1_non_vision_model, provider_1_vision_model]
    provider_1_visible = {
        provider_1_non_vision_model: True,
        provider_1_vision_model: True,
    }

    provider_2_model_names = [provider_2_vision_model_1, provider_2_vision_model_2]
    provider_2_visible = {
        provider_2_vision_model_1: True,
        provider_2_vision_model_2: True,
    }

    # Step 1: Create provider 1 with mixed models, set non-vision model as default
    create_response_1 = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": provider_1_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000001",
            "model_configurations": [c.model_dump() for c in provider_1_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert create_response_1.status_code == 200
    provider_1 = create_response_1.json()

    # Step 2: Create provider 2 with vision-only models
    create_response_2 = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": provider_2_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000002",
            "model_configurations": [c.model_dump() for c in provider_2_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert create_response_2.status_code == 200
    provider_2 = create_response_2.json()

    # Step 3: Set provider 1 as the general default provider
    _set_default_provider(admin_user, provider_1["id"], provider_1_non_vision_model)

    # Step 4: Set provider 2 with a specific vision model as the default vision provider
    _set_default_vision_provider(
        admin_user, provider_2["id"], provider_2_vision_model_1
    )

    # Step 5: Verify via admin endpoint
    admin_data = _get_providers_admin(admin_user)
    assert admin_data is not None

    # Find and validate the default provider (provider 1)
    providers, text_default, vision_default = _unpack_data(admin_data)
    _validate_default_model(
        text_default,
        provider_id=provider_1["id"],
        model_name=provider_1_non_vision_model,
    )
    _validate_default_model(
        vision_default,
        provider_id=provider_2["id"],
        model_name=provider_2_vision_model_1,
    )
    admin_default = _get_provider_by_name(providers, provider_1_name)
    assert admin_default is not None
    _validate_provider_data(
        admin_default,
        expected_name=provider_1_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_1_model_names,
        expected_visible=provider_1_visible,
        expected_is_public=True,
    )

    # Find and validate the default vision provider (provider 2)
    admin_vision_default = _get_provider_by_name(providers, provider_2_name)
    assert admin_vision_default is not None
    _validate_provider_data(
        admin_vision_default,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
        expected_is_public=True,
    )

    # Step 6: Verify via basic endpoint (basic_user)
    # Find and validate the default provider (provider 1)
    basic_data = _get_providers_basic(basic_user)
    assert basic_data is not None
    providers, text_default, vision_default = _unpack_data(basic_data)
    _validate_default_model(
        text_default,
        provider_id=provider_1["id"],
        model_name=provider_1_non_vision_model,
    )
    _validate_default_model(
        vision_default,
        provider_id=provider_2["id"],
        model_name=provider_2_vision_model_1,
    )
    basic_default = _get_provider_by_name(providers, provider_1_name)
    assert basic_default is not None
    _validate_provider_data(
        basic_default,
        expected_name=provider_1_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_1_model_names,
        expected_visible=provider_1_visible,
    )

    # Find and validate the default vision provider (provider 2)
    basic_vision_default = _get_provider_by_name(providers, provider_2_name)
    assert basic_vision_default is not None
    _validate_provider_data(
        basic_vision_default,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
    )

    # Step 7: Verify via basic endpoint (admin_user sees same as basic_user)
    admin_basic_data = _get_providers_basic(admin_user)
    assert admin_basic_data is not None
    providers, text_default, vision_default = _unpack_data(admin_basic_data)
    _validate_default_model(
        text_default,
        provider_id=provider_1["id"],
        model_name=provider_1_non_vision_model,
    )
    _validate_default_model(
        vision_default,
        provider_id=provider_2["id"],
        model_name=provider_2_vision_model_1,
    )
    admin_basic_default = _get_provider_by_name(providers, provider_1_name)
    assert admin_basic_default is not None
    _validate_provider_data(
        admin_basic_default,
        expected_name=provider_1_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_1_model_names,
        expected_visible=provider_1_visible,
    )

    admin_basic_vision_default = _get_provider_by_name(providers, provider_2_name)
    assert admin_basic_vision_default is not None
    _validate_provider_data(
        admin_basic_vision_default,
        expected_name=provider_2_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=provider_2_model_names,
        expected_visible=provider_2_visible,
    )

    # Verify that the providers are distinct (different providers for regular vs vision)
    assert (
        admin_default["name"] != admin_vision_default["name"]
    ), "Default provider and vision provider should be different providers"
    assert (
        basic_default["name"] != basic_vision_default["name"]
    ), "Default provider and vision provider should be different providers (basic endpoint)"


def test_default_provider_is_not_default_vision_provider(
    reset: None,  # noqa: ARG001
) -> None:
    """
    Test that setting a provider as the default provider does NOT make it
    the default vision provider.

    This test verifies:
    1. Create a provider with some models
    2. Set it as the default provider
    3. Verify it is the default provider (is_default_provider=True)
    4. Verify it is NOT the default vision provider (is_default_vision_provider should be None/False)
    """
    admin_user = UserManager.create(name="admin_user")

    provider_name = f"test-default-not-vision-{uuid.uuid4()}"

    # Model configurations
    model_configs = [
        ModelConfigurationUpsertRequest(
            name="gpt-4",
            is_visible=True,
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4o",
            is_visible=True,
        ),
    ]

    expected_model_names = ["gpt-4", "gpt-4o"]
    expected_visible = {"gpt-4": True, "gpt-4o": True}

    # Step 1: Create the provider
    create_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": provider_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000000",
            "model_configurations": [c.model_dump() for c in model_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert create_response.status_code == 200
    created_provider = create_response.json()

    # Step 2: Set it as the default provider
    _set_default_provider(admin_user, created_provider["id"], "gpt-4")

    # Step 3 & 4: Verify via admin endpoint
    admin_data = _get_providers_admin(admin_user)
    assert admin_data is not None
    providers, text_default, vision_default = _unpack_data(admin_data)
    _validate_default_model(
        text_default, provider_id=created_provider["id"], model_name="gpt-4"
    )
    _validate_default_model(vision_default)  # None
    admin_provider_data = _get_provider_by_name(providers, provider_name)
    assert admin_provider_data is not None

    # Full validation of provider data
    _validate_provider_data(
        admin_provider_data,
        expected_name=provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=expected_model_names,
        expected_visible=expected_visible,
        expected_is_public=True,
    )

    # Also verify via basic endpoint
    basic_data = _get_providers_basic(admin_user)
    assert basic_data is not None
    providers, text_default, vision_default = _unpack_data(basic_data)
    _validate_default_model(
        text_default, provider_id=created_provider["id"], model_name="gpt-4"
    )
    _validate_default_model(vision_default)  # None
    basic_provider_data = _get_provider_by_name(providers, provider_name)
    assert basic_provider_data is not None

    _validate_provider_data(
        basic_provider_data,
        expected_name=provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=expected_model_names,
        expected_visible=expected_visible,
    )


def _get_all_image_gen_configs(admin_user: DATestUser) -> list[dict]:
    """Utility function to fetch all image generation configs."""
    response = requests.get(
        f"{API_SERVER_URL}/admin/image-generation/config",
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    return response.json()


def _create_image_gen_config(
    admin_user: DATestUser,
    image_provider_id: str,
    model_name: str,
    source_llm_provider_id: int,
    is_default: bool = False,
) -> dict:
    """Utility function to create an image generation config using clone mode."""
    response = requests.post(
        f"{API_SERVER_URL}/admin/image-generation/config",
        headers=admin_user.headers,
        json={
            "image_provider_id": image_provider_id,
            "model_name": model_name,
            "source_llm_provider_id": source_llm_provider_id,
            "is_default": is_default,
        },
    )
    assert (
        response.status_code == 200
    ), f"Failed to create image gen config: {response.text}"
    return response.json()


def _set_image_gen_config_default(
    admin_user: DATestUser, image_provider_id: str
) -> None:
    """Utility function to set an image generation config as default."""
    response = requests.post(
        f"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}/default",
        headers=admin_user.headers,
    )
    assert response.status_code == 200


def _delete_image_gen_config(admin_user: DATestUser, image_provider_id: str) -> None:
    """Utility function to delete an image generation config."""
    response = requests.delete(
        f"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}",
        headers=admin_user.headers,
    )
    assert response.status_code == 200


def test_all_three_provider_types_no_mixup(reset: None) -> None:  # noqa: ARG001
    """
    Test that regular LLM providers, vision providers, and image generation providers
    are all tracked separately with no mixup.

    This test verifies:
    1. Create a regular LLM provider and set as default
    2. Create a vision LLM provider and set as default vision
    3. Create an image generation config (using clone mode from regular provider)
    4. Set the image gen config as default
    5. Verify all three are correctly identified:
       - Regular provider: is_default_provider=True, is_default_vision_provider=None
       - Vision provider: is_default_provider=None, is_default_vision_provider=True
       - Image gen config: is_default=True (separate from LLM provider defaults)
    6. Verify image gen config doesn't appear in LLM provider lists
    7. Verify LLM providers don't appear in image gen config list
    """
    from onyx.auth.schemas import UserRole

    admin_user = UserManager.create(name="admin_user")

    # Create a non-admin user
    basic_user = UserManager.create(name="basic_user")
    assert basic_user.role == UserRole.BASIC or basic_user.role != UserRole.ADMIN

    # Provider names
    regular_provider_name = f"test-regular-provider-{uuid.uuid4()}"
    vision_provider_name = f"test-vision-provider-{uuid.uuid4()}"
    image_gen_provider_id = f"test-image-gen-{uuid.uuid4()}"

    # Model configurations
    regular_model_configs = [
        ModelConfigurationUpsertRequest(name="gpt-4", is_visible=True),
        ModelConfigurationUpsertRequest(name="gpt-4o", is_visible=True),
    ]

    vision_model_configs = [
        ModelConfigurationUpsertRequest(
            name="gpt-4-vision-preview", is_visible=True, supports_image_input=True
        ),
        ModelConfigurationUpsertRequest(
            name="gpt-4o", is_visible=True, supports_image_input=True
        ),
    ]

    # Step 1: Create regular LLM provider
    create_regular_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": regular_provider_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000001",
            "model_configurations": [c.model_dump() for c in regular_model_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert create_regular_response.status_code == 200
    regular_provider = create_regular_response.json()

    # Set as default provider
    _set_default_provider(admin_user, regular_provider["id"], "gpt-4")

    # Step 2: Create vision LLM provider
    create_vision_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json={
            "name": vision_provider_name,
            "provider": LlmProviderNames.OPENAI,
            "api_key": "sk-000000000000000000000000000000000000000000000002",
            "default_model_name": "gpt-4-vision-preview",
            "model_configurations": [c.model_dump() for c in vision_model_configs],
            "is_public": True,
            "groups": [],
            "personas": [],
        },
    )
    assert create_vision_response.status_code == 200
    vision_provider = create_vision_response.json()

    # Set as default vision provider
    _set_default_vision_provider(
        admin_user, vision_provider["id"], "gpt-4-vision-preview"
    )

    # Step 3: Create image generation config using clone mode from regular provider
    _create_image_gen_config(
        admin_user=admin_user,
        image_provider_id=image_gen_provider_id,
        model_name="dall-e-3",
        source_llm_provider_id=regular_provider["id"],
        is_default=True,
    )

    # Step 4: Verify all three types are correctly tracked

    # Get all LLM providers (via admin endpoint)
    admin_data = _get_providers_admin(admin_user)
    assert admin_data is not None
    providers, text_default, vision_default = _unpack_data(admin_data)
    _validate_default_model(
        text_default, provider_id=regular_provider["id"], model_name="gpt-4"
    )
    _validate_default_model(
        vision_default,
        provider_id=vision_provider["id"],
        model_name="gpt-4-vision-preview",
    )
    _validate_default_model(
        vision_default, vision_provider["id"], "gpt-4-vision-preview"
    )
    _get_provider_by_name(providers, regular_provider_name)

    # Get all image generation configs
    image_gen_configs = _get_all_image_gen_configs(admin_user)

    # Verify the regular provider is the default provider
    admin_regular_provider_data = _get_provider_by_name(
        providers, regular_provider_name
    )
    assert admin_regular_provider_data is not None
    _validate_provider_data(
        admin_regular_provider_data,
        expected_name=regular_provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=[c.name for c in regular_model_configs],
        expected_visible={c.name: True for c in regular_model_configs},
    )
    admin_vision_provider_data = _get_provider_by_name(providers, vision_provider_name)
    assert admin_vision_provider_data is not None
    _validate_provider_data(
        admin_vision_provider_data,
        expected_name=vision_provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=[c.name for c in vision_model_configs],
        expected_visible={c.name: True for c in vision_model_configs},
    )

    # Verify the image gen config is the default image generation config
    image_gen_config_data = next(
        (
            c
            for c in image_gen_configs
            if c["image_provider_id"] == image_gen_provider_id
        ),
        None,
    )
    assert image_gen_config_data is not None, "Image gen config not found"
    assert (
        image_gen_config_data["is_default"] is True
    ), "Image gen config should be the default"
    assert (
        image_gen_config_data["model_name"] == "dall-e-3"
    ), "Image gen config should have correct model name"

    # Step 5: Verify no mixup - image gen providers don't appear in LLM provider lists
    # Image gen provider should not appear in the list
    assert image_gen_provider_id not in [p["name"] for p in providers]

    # Step 6: Verify via basic endpoint (non-admin user)
    basic_data = _get_providers_basic(basic_user)
    assert basic_data is not None
    providers, text_default, vision_default = _unpack_data(basic_data)
    _validate_default_model(
        text_default, provider_id=regular_provider["id"], model_name="gpt-4"
    )
    _validate_default_model(
        vision_default,
        provider_id=vision_provider["id"],
        model_name="gpt-4-vision-preview",
    )
    _validate_default_model(
        vision_default, vision_provider["id"], "gpt-4-vision-preview"
    )
    basic_provider_data = _get_provider_by_name(providers, regular_provider_name)
    assert basic_provider_data is not None
    _validate_provider_data(
        basic_provider_data,
        expected_name=regular_provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=[c.name for c in regular_model_configs],
        expected_visible={c.name: True for c in regular_model_configs},
    )
    basic_vision_provider_data = _get_provider_by_name(providers, vision_provider_name)
    assert basic_vision_provider_data is not None
    _validate_provider_data(
        basic_vision_provider_data,
        expected_name=vision_provider_name,
        expected_provider=LlmProviderNames.OPENAI,
        expected_model_names=[c.name for c in vision_model_configs],
        expected_visible={c.name: True for c in vision_model_configs},
    )

    # Step 7: Verify the counts are as expected
    # We should have at least 2 user-created providers (setup_postgres may add more)
    assert len(providers) >= 2
    assert len(image_gen_configs) == 1

    # Clean up: Delete the image gen config (to clean up the internal LLM provider)
    _delete_image_gen_config(admin_user, image_gen_provider_id)


================================================
FILE: backend/tests/integration/tests/llm_provider/test_llm_provider_access_control.py
================================================
import os

import pytest
import requests
from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.llm import can_user_access_llm_provider
from onyx.db.llm import fetch_user_group_ids
from onyx.db.llm import update_default_provider
from onyx.db.llm import upsert_llm_provider
from onyx.db.models import LLMProvider as LLMProviderModel
from onyx.db.models import LLMProvider__Persona
from onyx.db.models import LLMProvider__UserGroup
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.llm.constants import LlmProviderNames
from onyx.llm.factory import get_llm_for_persona
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


pytestmark = pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="LLM provider access control is enterprise only",
)


def _create_llm_provider(
    db_session: Session,
    *,
    name: str,
    default_model_name: str,
    is_public: bool,
    is_default: bool,
) -> LLMProviderModel:
    _provider = upsert_llm_provider(
        llm_provider_upsert_request=LLMProviderUpsertRequest(
            name=name,
            provider=LlmProviderNames.OPENAI,
            api_key=None,
            api_base=None,
            api_version=None,
            custom_config=None,
            is_public=is_public,
            model_configurations=[
                ModelConfigurationUpsertRequest(
                    name=default_model_name,
                    is_visible=True,
                )
            ],
        ),
        db_session=db_session,
    )
    if is_default:
        update_default_provider(_provider.id, default_model_name, db_session)

    provider = db_session.get(LLMProviderModel, _provider.id)
    if not provider:
        raise ValueError(f"Provider {name} not found")
    return provider


def _create_persona(
    db_session: Session,
    *,
    name: str,
    provider_name: str,
) -> Persona:
    persona = Persona(
        name=name,
        description=f"{name} description",
        llm_model_provider_override=provider_name,
        llm_model_version_override="gpt-4o-mini",
        system_prompt="System prompt",
        task_prompt="Task prompt",
        datetime_aware=True,
        is_public=True,
    )
    db_session.add(persona)
    db_session.flush()
    return persona


@pytest.fixture()
def users(reset: None) -> tuple[DATestUser, DATestUser]:  # noqa: ARG001
    admin_user = UserManager.create(name="admin_user")
    basic_user = UserManager.create(name="basic_user")
    return admin_user, basic_user


def test_can_user_access_llm_provider_or_logic(
    users: tuple[DATestUser, DATestUser],
) -> None:
    """Test LLM provider access control with is_public flag and AND logic.

    Tests the new access control logic:
    - is_public=True providers are accessible to everyone
    - is_public=False with no restrictions locks the provider
    - When both groups AND personas are set, AND logic applies (must satisfy both)
    """
    admin_user, basic_user = users

    with get_session_with_current_tenant() as db_session:
        # Public provider - accessible to everyone
        default_provider = _create_llm_provider(
            db_session,
            name="default-provider",
            default_model_name="gpt-4o",
            is_public=True,
            is_default=True,
        )
        # Locked provider - is_public=False with no restrictions
        locked_provider = _create_llm_provider(
            db_session,
            name="locked-provider",
            default_model_name="gpt-4o",
            is_public=False,
            is_default=False,
        )
        # Restricted provider - has both group AND persona restrictions (AND logic)
        restricted_provider = _create_llm_provider(
            db_session,
            name="restricted-provider",
            default_model_name="gpt-4o-mini",
            is_public=False,
            is_default=False,
        )

        allowed_persona = _create_persona(
            db_session,
            name="allowed-persona",
            provider_name=restricted_provider.name,
        )
        blocked_persona = _create_persona(
            db_session,
            name="blocked-persona",
            provider_name=restricted_provider.name,
        )

        access_group = UserGroup(name="access-group")
        db_session.add(access_group)
        db_session.flush()

        # Add both group and persona restrictions to restricted_provider
        db_session.add(
            LLMProvider__UserGroup(
                llm_provider_id=restricted_provider.id,
                user_group_id=access_group.id,
            )
        )
        db_session.add(
            LLMProvider__Persona(
                llm_provider_id=restricted_provider.id,
                persona_id=allowed_persona.id,
            )
        )
        # Only admin_user is in the access_group
        db_session.add(
            User__UserGroup(
                user_group_id=access_group.id,
                user_id=admin_user.id,
            )
        )
        db_session.flush()

        db_session.refresh(restricted_provider)
        db_session.refresh(locked_provider)

        admin_model = db_session.get(User, admin_user.id)
        basic_model = db_session.get(User, basic_user.id)

        assert admin_model is not None
        assert basic_model is not None

        # Fetch user group IDs for both users
        admin_group_ids = fetch_user_group_ids(db_session, admin_model)
        basic_group_ids = fetch_user_group_ids(db_session, basic_model)

        # Test is_public flag
        assert default_provider.is_public
        assert not locked_provider.is_public
        assert not restricted_provider.is_public

        # Public provider - everyone can access
        assert can_user_access_llm_provider(
            default_provider,
            admin_group_ids,
            allowed_persona,
        )
        assert can_user_access_llm_provider(
            default_provider,
            basic_group_ids,
            blocked_persona,
        )

        # Locked provider (is_public=False, no restrictions) - nobody can access
        assert not can_user_access_llm_provider(
            locked_provider,
            admin_group_ids,
            allowed_persona,
        )
        assert not can_user_access_llm_provider(
            locked_provider,
            basic_group_ids,
            allowed_persona,
        )

        # Restricted provider with AND logic (both groups AND personas set)
        # admin_user in group + allowed_persona whitelisted → SUCCESS (both conditions met)
        assert can_user_access_llm_provider(
            restricted_provider,
            admin_group_ids,
            allowed_persona,
        )

        # admin_user in group + blocked_persona not whitelisted → FAIL (persona not allowed)
        assert not can_user_access_llm_provider(
            restricted_provider,
            admin_group_ids,
            blocked_persona,
        )

        # basic_user not in group + allowed_persona whitelisted → FAIL (user not in group)
        assert not can_user_access_llm_provider(
            restricted_provider,
            basic_group_ids,
            allowed_persona,
        )

        # basic_user not in group + blocked_persona not whitelisted → FAIL (neither condition met)
        assert not can_user_access_llm_provider(
            restricted_provider,
            basic_group_ids,
            blocked_persona,
        )


def test_public_provider_with_persona_restrictions(
    users: tuple[DATestUser, DATestUser],
) -> None:
    """Public providers should still enforce persona restrictions.

    Regression test for the bug where is_public=True caused
    can_user_access_llm_provider() to return True immediately,
    bypassing persona whitelist checks entirely.
    """
    admin_user, _basic_user = users

    with get_session_with_current_tenant() as db_session:
        # Public provider with persona restrictions
        public_restricted = _create_llm_provider(
            db_session,
            name="public-persona-restricted",
            default_model_name="gpt-4o",
            is_public=True,
            is_default=True,
        )

        whitelisted_persona = _create_persona(
            db_session,
            name="whitelisted-persona",
            provider_name=public_restricted.name,
        )
        non_whitelisted_persona = _create_persona(
            db_session,
            name="non-whitelisted-persona",
            provider_name=public_restricted.name,
        )

        # Only whitelist one persona
        db_session.add(
            LLMProvider__Persona(
                llm_provider_id=public_restricted.id,
                persona_id=whitelisted_persona.id,
            )
        )
        db_session.flush()
        db_session.refresh(public_restricted)

        admin_model = db_session.get(User, admin_user.id)
        assert admin_model is not None
        admin_group_ids = fetch_user_group_ids(db_session, admin_model)

        # Whitelisted persona — should be allowed
        assert can_user_access_llm_provider(
            public_restricted,
            admin_group_ids,
            whitelisted_persona,
        )

        # Non-whitelisted persona — should be denied despite is_public=True
        assert not can_user_access_llm_provider(
            public_restricted,
            admin_group_ids,
            non_whitelisted_persona,
        )

        # No persona context (e.g. global provider list) — should be denied
        # because provider has persona restrictions set
        assert not can_user_access_llm_provider(
            public_restricted,
            admin_group_ids,
            persona=None,
        )


def test_public_provider_without_persona_restrictions(
    users: tuple[DATestUser, DATestUser],
) -> None:
    """Public providers with no persona restrictions remain accessible to all."""
    admin_user, basic_user = users

    with get_session_with_current_tenant() as db_session:
        public_unrestricted = _create_llm_provider(
            db_session,
            name="public-unrestricted",
            default_model_name="gpt-4o",
            is_public=True,
            is_default=True,
        )

        any_persona = _create_persona(
            db_session,
            name="any-persona",
            provider_name=public_unrestricted.name,
        )

        admin_model = db_session.get(User, admin_user.id)
        basic_model = db_session.get(User, basic_user.id)
        assert admin_model is not None
        assert basic_model is not None

        admin_group_ids = fetch_user_group_ids(db_session, admin_model)
        basic_group_ids = fetch_user_group_ids(db_session, basic_model)

        # Any user, any persona — all allowed
        assert can_user_access_llm_provider(
            public_unrestricted, admin_group_ids, any_persona
        )
        assert can_user_access_llm_provider(
            public_unrestricted, basic_group_ids, any_persona
        )
        assert can_user_access_llm_provider(
            public_unrestricted, admin_group_ids, persona=None
        )


def test_get_llm_for_persona_falls_back_when_access_denied(
    users: tuple[DATestUser, DATestUser],
) -> None:
    admin_user, basic_user = users

    with get_session_with_current_tenant() as db_session:
        default_provider = _create_llm_provider(
            db_session,
            name="default-provider",
            default_model_name="gpt-4o",
            is_public=True,
            is_default=True,
        )
        restricted_provider = _create_llm_provider(
            db_session,
            name="restricted-provider",
            default_model_name="gpt-4o-mini",
            is_public=False,
            is_default=False,
        )

        persona = _create_persona(
            db_session,
            name="fallback-persona",
            provider_name=restricted_provider.name,
        )

        access_group = UserGroup(name="persona-group")
        db_session.add(access_group)
        db_session.flush()

        db_session.add(
            LLMProvider__UserGroup(
                llm_provider_id=restricted_provider.id,
                user_group_id=access_group.id,
            )
        )
        db_session.add(
            User__UserGroup(
                user_group_id=access_group.id,
                user_id=admin_user.id,
            )
        )
        db_session.flush()
        db_session.commit()

        db_session.refresh(default_provider)
        db_session.refresh(restricted_provider)
        db_session.refresh(persona)

        admin_model = db_session.get(User, admin_user.id)
        basic_model = db_session.get(User, basic_user.id)

        assert admin_model is not None
        assert basic_model is not None

        allowed_llm = get_llm_for_persona(
            persona=persona,
            user=admin_model,
        )
        assert (
            allowed_llm.config.model_name
            == restricted_provider.model_configurations[0].name
        )

        fallback_llm = get_llm_for_persona(
            persona=persona,
            user=basic_model,
        )
        assert (
            fallback_llm.config.model_name
            == default_provider.model_configurations[0].name
        )


def test_list_llm_provider_basics_excludes_non_public_unrestricted(
    users: tuple[DATestUser, DATestUser],
) -> None:
    """Test that the /llm/provider endpoint correctly excludes non-public providers
    with no group/persona restrictions.

    This tests the fix for the bug where non-public providers with no restrictions
    were incorrectly shown to all users instead of being admin-only.
    """
    admin_user, basic_user = users

    # Create a public provider (should be visible to all)
    public_provider = LLMProviderManager.create(
        name="public-provider",
        is_public=True,
        set_as_default=True,
        default_model_name="gpt-4o",
        user_performing_action=admin_user,
    )

    # Create a non-public provider with no restrictions (should be admin-only)
    non_public_provider = LLMProviderManager.create(
        name="non-public-unrestricted",
        is_public=False,
        groups=[],
        personas=[],
        set_as_default=False,
        user_performing_action=admin_user,
    )

    # Non-admin user calls the /llm/provider endpoint
    response = requests.get(
        f"{API_SERVER_URL}/llm/provider",
        headers=basic_user.headers,
    )
    assert response.status_code == 200
    providers = response.json()["providers"]
    provider_names = [p["name"] for p in providers]

    # Public provider should be visible
    assert public_provider.name in provider_names

    # Non-public provider with no restrictions should NOT be visible to non-admin
    assert non_public_provider.name not in provider_names

    # Admin user should see both providers
    admin_response = requests.get(
        f"{API_SERVER_URL}/llm/provider",
        headers=admin_user.headers,
    )
    assert admin_response.status_code == 200
    admin_providers = admin_response.json()["providers"]
    admin_provider_names = [p["name"] for p in admin_providers]

    assert public_provider.name in admin_provider_names
    assert non_public_provider.name in admin_provider_names


def test_provider_delete_clears_persona_references(
    reset: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """Test that deleting a provider automatically clears persona references."""
    admin_user = UserManager.create(name="admin_user")

    # Create a default provider first so personas have something to fall back to
    LLMProviderManager.create(
        name="default-provider",
        is_public=True,
        set_as_default=True,
        default_model_name="gpt-4o",
        user_performing_action=admin_user,
    )

    provider = LLMProviderManager.create(
        is_public=False,
        set_as_default=False,
        user_performing_action=admin_user,
    )
    persona = PersonaManager.create(
        llm_model_provider_override=provider.name,
        user_performing_action=admin_user,
    )

    # Delete the provider - should succeed and automatically clear persona references
    assert LLMProviderManager.delete(
        provider,
        user_performing_action=admin_user,
    )

    # Verify the persona now falls back to default (llm_model_provider_override cleared)
    persona_response = requests.get(
        f"{API_SERVER_URL}/persona/{persona.id}",
        headers=admin_user.headers,
    )
    assert persona_response.status_code == 200
    updated_persona = persona_response.json()
    assert updated_persona["llm_model_provider_override"] is None


================================================
FILE: backend/tests/integration/tests/llm_provider/test_llm_provider_persona_access.py
================================================
"""
Integration tests for LLM Provider persona access authorization.
"""

import os

import pytest
import requests

from onyx.llm.constants import LlmProviderNames
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestUser


pytestmark = pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="LLM provider persona access is enterprise only",
)


@pytest.fixture()
def users_and_groups(
    reset: None,  # noqa: ARG001
) -> tuple[DATestUser, DATestUser, int, int]:
    """Create admin, basic user, and two user groups."""
    admin_user = UserManager.create(name="admin_user")
    basic_user = UserManager.create(name="basic_user")

    # Create two user groups
    group1 = UserGroupManager.create(
        user_performing_action=admin_user,
        name="test_group_1",
        user_ids=[basic_user.id],
    )

    group2 = UserGroupManager.create(
        user_performing_action=admin_user,
        name="test_group_2",
        user_ids=[],  # basic_user is NOT in this group
    )

    return admin_user, basic_user, group1.id, group2.id


def test_unauthorized_persona_access_returns_403(
    users_and_groups: tuple[DATestUser, DATestUser, int, int],
) -> None:
    """Test that users cannot query providers for personas they don't have access to."""
    admin_user, basic_user, group1_id, group2_id = users_and_groups

    # Create a persona restricted to group2 (which basic_user is NOT in)
    restricted_persona = PersonaManager.create(
        user_performing_action=admin_user,
        name="Restricted Persona",
        description="Only accessible to group2",
        is_public=False,
        groups=[group2_id],
    )

    # Try to query providers for the restricted persona as basic_user
    response = requests.get(
        f"{API_SERVER_URL}/llm/persona/{restricted_persona.id}/providers",
        headers=basic_user.headers,
    )

    # Should return 403 Forbidden
    assert response.status_code == 403
    assert "don't have access to this assistant" in response.json()["detail"]


def test_authorized_persona_access_returns_filtered_providers(
    users_and_groups: tuple[DATestUser, DATestUser, int, int],
) -> None:
    """Test that users can query providers for personas they have access to."""
    admin_user, basic_user, group1_id, group2_id = users_and_groups

    # Create a persona accessible to group1 (which basic_user IS in)
    accessible_persona = PersonaManager.create(
        user_performing_action=admin_user,
        name="Accessible Persona",
        description="Accessible to group1",
        is_public=False,
        groups=[group1_id],
    )

    # Create a restricted provider accessible only to the persona
    restricted_provider = LLMProviderManager.create(
        user_performing_action=admin_user,
        name="Restricted Provider",
        provider=LlmProviderNames.OPENAI,
        api_key="test-key",
        default_model_name="gpt-4o",
        is_public=False,
        groups=[],
        personas=[accessible_persona.id],
    )

    # Query providers for the accessible persona as basic_user
    response = requests.get(
        f"{API_SERVER_URL}/llm/persona/{accessible_persona.id}/providers",
        headers=basic_user.headers,
    )

    # Should succeed
    assert response.status_code == 200
    providers = response.json()["providers"]

    # Should include the restricted provider since basic_user can access the persona
    provider_names = [p["name"] for p in providers]
    assert restricted_provider.name in provider_names


def test_persona_id_zero_applies_rbac(
    users_and_groups: tuple[DATestUser, DATestUser, int, int],
) -> None:
    """Test that persona_id=0 (default persona) properly applies RBAC."""
    admin_user, basic_user, group1_id, group2_id = users_and_groups

    # Create a restricted provider accessible only to group2
    restricted_provider = LLMProviderManager.create(
        user_performing_action=admin_user,
        name="Group2 Only Provider",
        provider=LlmProviderNames.OPENAI,
        api_key="test-key",
        default_model_name="gpt-4o",
        is_public=False,
        groups=[group2_id],
        personas=[],
    )

    # Query providers with persona_id=0 as basic_user
    response = requests.get(
        f"{API_SERVER_URL}/llm/persona/0/providers",
        headers=basic_user.headers,
    )

    # Should succeed (persona_id=0 refers to default persona, which is public)
    assert response.status_code == 200
    providers = response.json()["providers"]

    # Should NOT include the restricted provider since basic_user is not in group2
    provider_names = [p["name"] for p in providers]
    assert restricted_provider.name not in provider_names


def test_admin_can_query_any_persona(
    users_and_groups: tuple[DATestUser, DATestUser, int, int],
) -> None:
    """Test that admin users can query any persona's providers."""
    admin_user, basic_user, group1_id, group2_id = users_and_groups

    # Create a persona restricted to group2 (admin is not explicitly in this group)
    restricted_persona = PersonaManager.create(
        user_performing_action=admin_user,
        name="Admin Test Persona",
        description="Only accessible to group2",
        is_public=False,
        groups=[group2_id],
    )

    # Create a restricted provider accessible only to the persona
    restricted_provider = LLMProviderManager.create(
        user_performing_action=admin_user,
        name="Admin Test Provider",
        provider=LlmProviderNames.OPENAI,
        api_key="test-key",
        default_model_name="gpt-4o",
        is_public=False,
        groups=[],
        personas=[restricted_persona.id],
    )

    # Query providers for the restricted persona as admin_user
    response = requests.get(
        f"{API_SERVER_URL}/llm/persona/{restricted_persona.id}/providers",
        headers=admin_user.headers,
    )

    # Should succeed - admins can access any persona
    assert response.status_code == 200
    providers = response.json()["providers"]

    # Should include the restricted provider
    provider_names = [p["name"] for p in providers]
    assert restricted_provider.name in provider_names


def test_public_persona_accessible_to_all(
    users_and_groups: tuple[DATestUser, DATestUser, int, int],
) -> None:
    """Test that public personas are accessible to all users."""
    admin_user, basic_user, group1_id, group2_id = users_and_groups

    # Create a public LLM provider so there's something to return
    public_provider = LLMProviderManager.create(
        user_performing_action=admin_user,
        name="Public Provider",
        provider=LlmProviderNames.OPENAI,
        api_key="test-key",
        default_model_name="gpt-4o",
        is_public=True,
        set_as_default=True,
    )

    # Create a public persona
    public_persona = PersonaManager.create(
        user_performing_action=admin_user,
        name="Public Persona",
        description="Accessible to everyone",
        is_public=True,
        groups=[],
    )

    # Query providers for the public persona as basic_user
    response = requests.get(
        f"{API_SERVER_URL}/llm/persona/{public_persona.id}/providers",
        headers=basic_user.headers,
    )

    # Should succeed
    assert response.status_code == 200
    providers = response.json()["providers"]

    # Should return the public provider
    assert len(providers) > 0
    provider_names = [p["name"] for p in providers]
    assert public_provider.name in provider_names


def test_nonexistent_persona_returns_404(
    users_and_groups: tuple[DATestUser, DATestUser, int, int],
) -> None:
    """Test that querying a nonexistent persona returns 404."""
    admin_user, basic_user, group1_id, group2_id = users_and_groups

    # Query providers for a nonexistent persona
    response = requests.get(
        f"{API_SERVER_URL}/llm/persona/99999/providers",
        headers=basic_user.headers,
    )

    # Should return 404
    assert response.status_code == 404
    assert "Persona not found" in response.json()["detail"]


================================================
FILE: backend/tests/integration/tests/llm_workflows/test_mock_llm_tool_calls.py
================================================
from onyx.configs import app_configs
from onyx.configs.constants import DocumentSource
from onyx.tools.constants import SEARCH_TOOL_ID
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.tool import ToolManager
from tests.integration.common_utils.test_models import DATestUser


_DUMMY_OPENAI_API_KEY = "sk-mock-llm-workflow-tests"


def _get_internal_search_tool_id(admin_user: DATestUser) -> int:
    tools = ToolManager.list_tools(user_performing_action=admin_user)
    for tool in tools:
        if tool.in_code_tool_id == SEARCH_TOOL_ID:
            return tool.id
    raise AssertionError("SearchTool must exist for this test")


def _assert_integration_mode_enabled() -> None:
    assert (
        app_configs.INTEGRATION_TESTS_MODE is True
    ), "Integration tests require INTEGRATION_TESTS_MODE=true."


def _seed_connector_for_search_tool(admin_user: DATestUser) -> None:
    # SearchTool is only exposed when at least one non-default connector exists.
    CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )


def test_mock_llm_response_single_tool_call_debug(admin_user: DATestUser) -> None:
    _assert_integration_mode_enabled()
    _seed_connector_for_search_tool(admin_user)

    LLMProviderManager.create(
        user_performing_action=admin_user,
        api_key=_DUMMY_OPENAI_API_KEY,
    )
    chat_session = ChatSessionManager.create(user_performing_action=admin_user)
    search_tool_id = _get_internal_search_tool_id(admin_user)

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="run the search tool",
        user_performing_action=admin_user,
        forced_tool_ids=[search_tool_id],
        mock_llm_response='{"name":"internal_search","arguments":{"queries":["alpha"]}}',
    )

    assert response.error is None, f"Unexpected stream error: {response.error}"
    assert len(response.tool_call_debug) == 1
    assert response.tool_call_debug[0].tool_name == "internal_search"
    assert response.tool_call_debug[0].tool_args == {"queries": ["alpha"]}


def test_mock_llm_response_parallel_tool_call_debug(admin_user: DATestUser) -> None:
    _assert_integration_mode_enabled()
    _seed_connector_for_search_tool(admin_user)

    LLMProviderManager.create(
        user_performing_action=admin_user,
        api_key=_DUMMY_OPENAI_API_KEY,
    )
    chat_session = ChatSessionManager.create(user_performing_action=admin_user)
    search_tool_id = _get_internal_search_tool_id(admin_user)

    mock_response = "\n".join(
        [
            '{"name":"internal_search","arguments":{"queries":["alpha"]}}',
            '{"name":"internal_search","arguments":{"queries":["beta"]}}',
        ]
    )
    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="run the search tool twice",
        user_performing_action=admin_user,
        forced_tool_ids=[search_tool_id],
        mock_llm_response=mock_response,
    )

    assert response.error is None, f"Unexpected stream error: {response.error}"
    assert len(response.tool_call_debug) == 2
    assert [entry.tool_name for entry in response.tool_call_debug] == [
        "internal_search",
        "internal_search",
    ]
    assert [entry.tool_args for entry in response.tool_call_debug] == [
        {"queries": ["alpha"]},
        {"queries": ["beta"]},
    ]


def test_mock_llm_response_embedded_json_fallback_tool_call_debug(
    admin_user: DATestUser,
) -> None:
    _assert_integration_mode_enabled()
    _seed_connector_for_search_tool(admin_user)

    LLMProviderManager.create(
        user_performing_action=admin_user,
        api_key=_DUMMY_OPENAI_API_KEY,
    )
    chat_session = ChatSessionManager.create(user_performing_action=admin_user)
    search_tool_id = _get_internal_search_tool_id(admin_user)

    # Validate fallback extraction when the model returns tool-call JSON embedded in
    # normal assistant text instead of structured tool_call objects.
    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="use the search tool",
        user_performing_action=admin_user,
        forced_tool_ids=[search_tool_id],
        mock_llm_response=(
            'I will call a tool now. {"name":"internal_search","arguments":{"queries":["gamma"]}}'
        ),
    )

    assert response.error is None, f"Unexpected stream error: {response.error}"
    assert len(response.tool_call_debug) == 1
    assert response.tool_call_debug[0].tool_name == "internal_search"
    assert response.tool_call_debug[0].tool_args == {"queries": ["gamma"]}


================================================
FILE: backend/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py
================================================
import json
import os
import time
from uuid import uuid4

import pytest
import requests
from pydantic import BaseModel
from pydantic import ConfigDict

from onyx.configs import app_configs
from onyx.configs.constants import DocumentSource
from onyx.tools.constants import SEARCH_TOOL_ID
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.tool import ToolManager
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import ToolName


_ENV_PROVIDER = "NIGHTLY_LLM_PROVIDER"
_ENV_MODELS = "NIGHTLY_LLM_MODELS"
_ENV_API_KEY = "NIGHTLY_LLM_API_KEY"
_ENV_API_BASE = "NIGHTLY_LLM_API_BASE"
_ENV_API_VERSION = "NIGHTLY_LLM_API_VERSION"
_ENV_DEPLOYMENT_NAME = "NIGHTLY_LLM_DEPLOYMENT_NAME"
_ENV_CUSTOM_CONFIG_JSON = "NIGHTLY_LLM_CUSTOM_CONFIG_JSON"
_ENV_STRICT = "NIGHTLY_LLM_STRICT"


class NightlyProviderConfig(BaseModel):
    model_config = ConfigDict(frozen=True)

    provider: str
    model_names: list[str]
    api_key: str | None
    api_base: str | None
    api_version: str | None
    deployment_name: str | None
    custom_config: dict[str, str] | None
    strict: bool


def _stringify_custom_config_value(value: object) -> str:
    if isinstance(value, str):
        return value
    if isinstance(value, (dict, list)):
        return json.dumps(value)
    return str(value)


def _looks_like_vertex_credentials_payload(
    raw_custom_config: dict[object, object],
) -> bool:
    normalized_keys = {str(key).strip().lower() for key in raw_custom_config}
    provider_specific_keys = {
        "vertex_credentials",
        "credentials_file",
        "vertex_credentials_file",
        "google_application_credentials",
        "vertex_location",
        "location",
        "vertex_region",
        "region",
    }
    if normalized_keys & provider_specific_keys:
        return False

    normalized_type = str(raw_custom_config.get("type", "")).strip().lower()
    if normalized_type not in {"service_account", "external_account"}:
        return False

    # Service account JSON usually includes private_key/client_email, while external
    # account JSON includes credential_source. Either shape should be accepted.
    has_service_account_markers = any(
        key in normalized_keys for key in {"private_key", "client_email"}
    )
    has_external_account_markers = "credential_source" in normalized_keys
    return has_service_account_markers or has_external_account_markers


def _normalize_custom_config(
    provider: str, raw_custom_config: dict[object, object]
) -> dict[str, str]:
    if provider == "vertex_ai" and _looks_like_vertex_credentials_payload(
        raw_custom_config
    ):
        return {"vertex_credentials": json.dumps(raw_custom_config)}

    normalized: dict[str, str] = {}
    for raw_key, raw_value in raw_custom_config.items():
        key = str(raw_key).strip()
        key_lower = key.lower()

        if provider == "vertex_ai":
            if key_lower in {
                "vertex_credentials",
                "credentials_file",
                "vertex_credentials_file",
                "google_application_credentials",
            }:
                key = "vertex_credentials"
            elif key_lower in {
                "vertex_location",
                "location",
                "vertex_region",
                "region",
            }:
                key = "vertex_location"

        normalized[key] = _stringify_custom_config_value(raw_value)

    return normalized


def _env_true(env_var: str, default: bool = False) -> bool:
    value = os.environ.get(env_var)
    if value is None:
        return default
    return value.strip().lower() in {"1", "true", "yes", "on"}


def _parse_models_env(env_var: str) -> list[str]:
    raw_value = os.environ.get(env_var, "").strip()
    if not raw_value:
        return []

    try:
        parsed_json = json.loads(raw_value)
    except json.JSONDecodeError:
        parsed_json = None

    if isinstance(parsed_json, list):
        return [str(model).strip() for model in parsed_json if str(model).strip()]

    return [part.strip() for part in raw_value.split(",") if part.strip()]


def _load_provider_config() -> NightlyProviderConfig:
    provider = os.environ.get(_ENV_PROVIDER, "").strip().lower()
    model_names = _parse_models_env(_ENV_MODELS)
    api_key = os.environ.get(_ENV_API_KEY) or None
    api_base = os.environ.get(_ENV_API_BASE) or None
    api_version = os.environ.get(_ENV_API_VERSION) or None
    deployment_name = os.environ.get(_ENV_DEPLOYMENT_NAME) or None
    strict = _env_true(_ENV_STRICT, default=False)

    custom_config: dict[str, str] | None = None
    custom_config_json = os.environ.get(_ENV_CUSTOM_CONFIG_JSON, "").strip()
    if custom_config_json:
        parsed = json.loads(custom_config_json)
        if not isinstance(parsed, dict):
            raise ValueError(f"{_ENV_CUSTOM_CONFIG_JSON} must be a JSON object")
        custom_config = _normalize_custom_config(
            provider=provider, raw_custom_config=parsed
        )

    if provider == "ollama_chat" and api_key and not custom_config:
        custom_config = {"OLLAMA_API_KEY": api_key}

    return NightlyProviderConfig(
        provider=provider,
        model_names=model_names,
        api_key=api_key,
        api_base=api_base,
        api_version=api_version,
        deployment_name=deployment_name,
        custom_config=custom_config,
        strict=strict,
    )


def _skip_or_fail(strict: bool, message: str) -> None:
    if strict:
        pytest.fail(message)
    pytest.skip(message)


def _validate_provider_config(config: NightlyProviderConfig) -> None:
    if not config.provider:
        _skip_or_fail(strict=config.strict, message=f"{_ENV_PROVIDER} must be set")

    if not config.model_names:
        _skip_or_fail(
            strict=config.strict,
            message=f"{_ENV_MODELS} must include at least one model",
        )

    if config.provider != "ollama_chat" and not (
        config.api_key or config.custom_config
    ):
        _skip_or_fail(
            strict=config.strict,
            message=(
                f"{_ENV_API_KEY} or {_ENV_CUSTOM_CONFIG_JSON} is required for provider '{config.provider}'"
            ),
        )

    if config.provider == "ollama_chat" and not (
        config.api_base or _default_api_base_for_provider(config.provider)
    ):
        _skip_or_fail(
            strict=config.strict,
            message=(f"{_ENV_API_BASE} is required for provider '{config.provider}'"),
        )

    if config.provider == "azure":
        if not config.api_base:
            _skip_or_fail(
                strict=config.strict,
                message=(
                    f"{_ENV_API_BASE} is required for provider '{config.provider}'"
                ),
            )
        if not config.api_version:
            _skip_or_fail(
                strict=config.strict,
                message=(
                    f"{_ENV_API_VERSION} is required for provider '{config.provider}'"
                ),
            )

    if config.provider == "vertex_ai":
        has_vertex_credentials = bool(
            config.custom_config and config.custom_config.get("vertex_credentials")
        )
        if not has_vertex_credentials:
            configured_keys = (
                sorted(config.custom_config.keys()) if config.custom_config else []
            )
            _skip_or_fail(
                strict=config.strict,
                message=(
                    f"{_ENV_CUSTOM_CONFIG_JSON} must include 'vertex_credentials' "
                    f"for provider '{config.provider}'. "
                    f"Found keys: {configured_keys}"
                ),
            )


def _assert_integration_mode_enabled() -> None:
    assert (
        app_configs.INTEGRATION_TESTS_MODE is True
    ), "Integration tests require INTEGRATION_TESTS_MODE=true."


def _seed_connector_for_search_tool(admin_user: DATestUser) -> None:
    # SearchTool is only exposed when at least one non-default connector exists.
    CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )


def _get_internal_search_tool_id(admin_user: DATestUser) -> int:
    tools = ToolManager.list_tools(user_performing_action=admin_user)
    for tool in tools:
        if tool.in_code_tool_id == SEARCH_TOOL_ID:
            return tool.id
    raise AssertionError("SearchTool must exist for this test")


def _default_api_base_for_provider(provider: str) -> str | None:
    if provider == "openrouter":
        return "https://openrouter.ai/api/v1"
    if provider == "ollama_chat":
        # host.docker.internal works when tests are running inside the integration test container.
        return "http://host.docker.internal:11434"
    return None


def _create_provider_payload(
    provider: str,
    provider_name: str,
    model_name: str,
    api_key: str | None,
    api_base: str | None,
    api_version: str | None,
    deployment_name: str | None,
    custom_config: dict[str, str] | None,
) -> dict:
    return {
        "name": provider_name,
        "provider": provider,
        "model": model_name,
        "api_key": api_key,
        "api_base": api_base,
        "api_version": api_version,
        "deployment_name": deployment_name,
        "custom_config": custom_config,
        "default_model_name": model_name,
        "is_public": True,
        "groups": [],
        "personas": [],
        "model_configurations": [{"name": model_name, "is_visible": True}],
        "api_key_changed": bool(api_key),
        "custom_config_changed": bool(custom_config),
    }


def _ensure_provider_is_default(
    provider_id: int, model_name: str, admin_user: DATestUser
) -> None:
    list_response = requests.get(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
    )
    list_response.raise_for_status()
    default_text = list_response.json().get("default_text")
    assert default_text is not None, "Expected a default provider after setting default"
    assert (
        default_text.get("provider_id") == provider_id
    ), f"Expected provider {provider_id} to be default, found {default_text.get('provider_id')}"
    assert (
        default_text.get("model_name") == model_name
    ), f"Expected default model {model_name}, found {default_text.get('model_name')}"


def _run_chat_assertions(
    admin_user: DATestUser,
    search_tool_id: int,
    provider: str,
    model_name: str,
) -> None:
    last_error: str | None = None
    # Retry once to reduce transient nightly flakes due provider-side blips.
    for attempt in range(1, 3):
        chat_session = ChatSessionManager.create(user_performing_action=admin_user)

        response = ChatSessionManager.send_message(
            chat_session_id=chat_session.id,
            message=(
                "Use internal_search to search for 'nightly-provider-regression-sentinel', "
                "then summarize the result in one short sentence."
            ),
            user_performing_action=admin_user,
            forced_tool_ids=[search_tool_id],
        )

        if response.error is None:
            used_internal_search = any(
                used_tool.tool_name == ToolName.INTERNAL_SEARCH
                for used_tool in response.used_tools
            )
            debug_has_internal_search = any(
                debug_tool_call.tool_name == "internal_search"
                for debug_tool_call in response.tool_call_debug
            )
            has_answer = bool(response.full_message.strip())

            if used_internal_search and debug_has_internal_search and has_answer:
                return

            last_error = (
                f"attempt={attempt} provider={provider} model={model_name} "
                f"used_internal_search={used_internal_search} "
                f"debug_internal_search={debug_has_internal_search} "
                f"has_answer={has_answer} "
                f"tool_call_debug={response.tool_call_debug}"
            )
        else:
            last_error = f"attempt={attempt} provider={provider} model={model_name} stream_error={response.error.error}"

        time.sleep(attempt)

    pytest.fail(f"Chat/tool-call assertions failed: {last_error}")


def _create_and_test_provider_for_model(
    admin_user: DATestUser,
    config: NightlyProviderConfig,
    model_name: str,
    search_tool_id: int,
) -> None:
    provider_name = f"nightly-{config.provider}-{uuid4().hex[:12]}"
    resolved_api_base = config.api_base or _default_api_base_for_provider(
        config.provider
    )

    provider_payload = _create_provider_payload(
        provider=config.provider,
        provider_name=provider_name,
        model_name=model_name,
        api_key=config.api_key,
        api_base=resolved_api_base,
        api_version=config.api_version,
        deployment_name=config.deployment_name,
        custom_config=config.custom_config,
    )

    test_response = requests.post(
        f"{API_SERVER_URL}/admin/llm/test",
        headers=admin_user.headers,
        json=provider_payload,
    )
    assert test_response.status_code == 200, (
        f"Provider test endpoint failed for provider={config.provider} "
        f"model={model_name}: {test_response.status_code} {test_response.text}"
    )

    create_response = requests.put(
        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
        headers=admin_user.headers,
        json=provider_payload,
    )
    assert create_response.status_code == 200, (
        f"Provider creation failed for provider={config.provider} "
        f"model={model_name}: {create_response.status_code} {create_response.text}"
    )
    provider_id = create_response.json()["id"]

    try:
        set_default_response = requests.post(
            f"{API_SERVER_URL}/admin/llm/default",
            headers=admin_user.headers,
            json={"provider_id": provider_id, "model_name": model_name},
        )
        assert set_default_response.status_code == 200, (
            f"Setting default provider failed for provider={config.provider} "
            f"model={model_name}: {set_default_response.status_code} "
            f"{set_default_response.text}"
        )

        _ensure_provider_is_default(
            provider_id=provider_id, model_name=model_name, admin_user=admin_user
        )
        _run_chat_assertions(
            admin_user=admin_user,
            search_tool_id=search_tool_id,
            provider=config.provider,
            model_name=model_name,
        )
    finally:
        requests.delete(
            f"{API_SERVER_URL}/admin/llm/provider/{provider_id}",
            headers=admin_user.headers,
        )


def test_nightly_provider_chat_workflow(admin_user: DATestUser) -> None:
    """Nightly regression test for provider setup + default selection + chat tool calls."""
    _assert_integration_mode_enabled()
    config = _load_provider_config()
    _validate_provider_config(config)

    _seed_connector_for_search_tool(admin_user)
    search_tool_id = _get_internal_search_tool_id(admin_user)

    failures: list[str] = []
    for model_name in config.model_names:
        try:
            _create_and_test_provider_for_model(
                admin_user=admin_user,
                config=config,
                model_name=model_name,
                search_tool_id=search_tool_id,
            )
        except BaseException as exc:
            if isinstance(exc, (KeyboardInterrupt, SystemExit)):
                raise
            failures.append(
                f"provider={config.provider} model={model_name} error={type(exc).__name__}: {exc}"
            )

    if failures:
        pytest.fail("Nightly provider chat failures:\n" + "\n".join(failures))


================================================
FILE: backend/tests/integration/tests/llm_workflows/test_tool_policy_enforcement.py
================================================
from onyx.configs import app_configs
from onyx.configs.constants import DocumentSource
from onyx.tools.constants import SEARCH_TOOL_ID
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.managers.tool import ToolManager
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import ToolName


_DUMMY_OPENAI_API_KEY = "sk-mock-tool-policy-tests"


def _assert_integration_mode_enabled() -> None:
    assert (
        app_configs.INTEGRATION_TESTS_MODE is True
    ), "Integration tests require INTEGRATION_TESTS_MODE=true."


def _seed_connector_for_search_tool(admin_user: DATestUser) -> None:
    # SearchTool is only exposed when at least one non-default connector exists.
    CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )


def _get_internal_search_tool_id(admin_user: DATestUser) -> int:
    tools = ToolManager.list_tools(user_performing_action=admin_user)
    for tool in tools:
        if tool.in_code_tool_id == SEARCH_TOOL_ID:
            return tool.id
    raise AssertionError("SearchTool must exist for this test")


def _ensure_llm_provider(admin_user: DATestUser) -> None:
    LLMProviderManager.create(
        user_performing_action=admin_user,
        api_key=_DUMMY_OPENAI_API_KEY,
    )


def test_forced_tool_executes_when_available(admin_user: DATestUser) -> None:
    _assert_integration_mode_enabled()
    _seed_connector_for_search_tool(admin_user)
    _ensure_llm_provider(admin_user)

    search_tool_id = _get_internal_search_tool_id(admin_user)
    persona = PersonaManager.create(
        tool_ids=[search_tool_id], user_performing_action=admin_user
    )
    chat_session = ChatSessionManager.create(
        persona_id=persona.id, user_performing_action=admin_user
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="force the search tool",
        user_performing_action=admin_user,
        forced_tool_ids=[search_tool_id],
        mock_llm_response='{"name":"internal_search","arguments":{"queries":["alpha"]}}',
    )

    assert response.error is None, f"Unexpected stream error: {response.error}"
    assert any(
        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response.used_tools
    )
    assert len(response.tool_call_debug) == 1
    assert response.tool_call_debug[0].tool_name == "internal_search"
    assert response.tool_call_debug[0].tool_args == {"queries": ["alpha"]}


def test_forced_tool_rejected_when_not_in_persona_tools(
    admin_user: DATestUser,
) -> None:
    _assert_integration_mode_enabled()
    _seed_connector_for_search_tool(admin_user)
    _ensure_llm_provider(admin_user)

    search_tool_id = _get_internal_search_tool_id(admin_user)
    persona = PersonaManager.create(tool_ids=[], user_performing_action=admin_user)
    chat_session = ChatSessionManager.create(
        persona_id=persona.id, user_performing_action=admin_user
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="try forcing a missing tool",
        user_performing_action=admin_user,
        forced_tool_ids=[search_tool_id],
    )

    assert response.error is not None
    assert response.error.error == f"Forced tool {search_tool_id} not found in tools"
    assert response.used_tools == []


def test_allowed_tool_ids_excludes_tools_outside_allowlist(
    admin_user: DATestUser,
) -> None:
    _assert_integration_mode_enabled()
    _seed_connector_for_search_tool(admin_user)
    _ensure_llm_provider(admin_user)

    search_tool_id = _get_internal_search_tool_id(admin_user)
    persona = PersonaManager.create(
        tool_ids=[search_tool_id], user_performing_action=admin_user
    )
    chat_session = ChatSessionManager.create(
        persona_id=persona.id, user_performing_action=admin_user
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="attempt tool use with empty allowlist",
        user_performing_action=admin_user,
        allowed_tool_ids=[],
        mock_llm_response='{"name":"internal_search","arguments":{"queries":["beta"]}}',
    )

    assert response.error is None, f"Unexpected stream error: {response.error}"
    assert response.used_tools == []
    assert response.tool_call_debug == []


def test_forced_and_allowlist_conflict_returns_validation_error(
    admin_user: DATestUser,
) -> None:
    _assert_integration_mode_enabled()
    _seed_connector_for_search_tool(admin_user)
    _ensure_llm_provider(admin_user)

    search_tool_id = _get_internal_search_tool_id(admin_user)
    persona = PersonaManager.create(
        tool_ids=[search_tool_id], user_performing_action=admin_user
    )
    chat_session = ChatSessionManager.create(
        persona_id=persona.id, user_performing_action=admin_user
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="force a tool blocked by allowlist",
        user_performing_action=admin_user,
        allowed_tool_ids=[],
        forced_tool_ids=[search_tool_id],
    )

    assert response.error is not None
    assert response.error.error == f"Forced tool {search_tool_id} not found in tools"
    assert response.used_tools == []


def test_run_search_always_maps_to_forced_search_tool(admin_user: DATestUser) -> None:
    _assert_integration_mode_enabled()
    _seed_connector_for_search_tool(admin_user)
    _ensure_llm_provider(admin_user)

    search_tool_id = _get_internal_search_tool_id(admin_user)
    persona = PersonaManager.create(
        tool_ids=[search_tool_id], user_performing_action=admin_user
    )
    chat_session = ChatSessionManager.create(
        persona_id=persona.id, user_performing_action=admin_user
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="always run search",
        user_performing_action=admin_user,
        forced_tool_ids=[search_tool_id],
        mock_llm_response='{"name":"internal_search","arguments":{"queries":["gamma"]}}',
    )

    assert response.error is None, f"Unexpected stream error: {response.error}"
    assert any(
        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response.used_tools
    )
    assert len(response.tool_call_debug) == 1
    assert response.tool_call_debug[0].tool_name == "internal_search"
    assert response.tool_call_debug[0].tool_args == {"queries": ["gamma"]}


================================================
FILE: backend/tests/integration/tests/mcp/test_mcp_client_no_auth_flow.py
================================================
import os
import socket
import subprocess
import sys
import time
from collections.abc import Generator
from pathlib import Path

import pytest
import requests

from onyx.db.enums import MCPAuthenticationPerformer
from onyx.db.enums import MCPAuthenticationType
from onyx.db.enums import MCPTransport
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser

# TODO: update mcp client tests to use constants in common_utils/constants.py
# NOTE: the tests for client should be independent of the Onyx MCP server
# This means the port should probably stay to be 8010/not 8090 the Onyx MCP server port
# Use MOCK_MCP_SERVER_PORT to avoid conflicts with the real Onyx MCP server port (8090)
MCP_SERVER_HOST = os.getenv("TEST_WEB_HOSTNAME", "127.0.0.1")
MCP_SERVER_PORT = int(os.getenv("MOCK_MCP_SERVER_PORT", "8010"))
MCP_SERVER_URL = f"http://{MCP_SERVER_HOST}:{MCP_SERVER_PORT}/mcp"
MCP_HELLO_TOOL = "hello"

MCP_SERVER_SCRIPT = (
    Path(__file__).resolve().parents[2]
    / "mock_services"
    / "mcp_test_server"
    / "run_mcp_server_no_auth.py"
)


def _wait_for_port(
    host: str,
    port: int,
    process: subprocess.Popen[bytes],
    timeout_seconds: float = 10.0,
) -> None:
    start = time.monotonic()
    while time.monotonic() - start < timeout_seconds:
        if process.poll() is not None:
            raise RuntimeError("MCP server process exited unexpectedly during startup")

        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
            sock.settimeout(0.5)
            try:
                sock.connect((host, port))
                return
            except OSError:
                time.sleep(0.1)

    raise TimeoutError("Timed out waiting for MCP server to accept connections")


@pytest.fixture(scope="module")
def mcp_no_auth_server() -> Generator[None, None, None]:
    process = subprocess.Popen(
        [sys.executable, str(MCP_SERVER_SCRIPT), str(MCP_SERVER_PORT)],
        cwd=MCP_SERVER_SCRIPT.parent,
    )

    try:
        _wait_for_port(MCP_SERVER_HOST, MCP_SERVER_PORT, process)
        yield
    finally:
        process.terminate()
        try:
            process.wait(timeout=5)
        except subprocess.TimeoutExpired:
            process.kill()


@pytest.fixture(scope="module", autouse=True)
def ensure_mcp_server_exists() -> None:
    if not MCP_SERVER_SCRIPT.exists():
        raise FileNotFoundError(
            f"Expected MCP server script at {MCP_SERVER_SCRIPT}, but it was not found"
        )


def test_mcp_client_no_auth_flow(
    mcp_no_auth_server: None,  # noqa: ARG001
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    basic_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    # Step a) Create a no-auth MCP server via the admin API
    create_response = requests.post(
        f"{API_SERVER_URL}/admin/mcp/servers/create",
        json={
            "name": "integration-mcp-no-auth",
            "description": "Integration test MCP server",
            "server_url": MCP_SERVER_URL,
            "transport": MCPTransport.STREAMABLE_HTTP.value,
            "auth_type": MCPAuthenticationType.NONE.value,
            "auth_performer": MCPAuthenticationPerformer.ADMIN.value,
        },
        headers=admin_user.headers,
        cookies=admin_user.cookies,
    )
    create_response.raise_for_status()
    server_id = create_response.json()["server_id"]

    # Step b) list the server's tools
    tools_response = requests.get(
        f"{API_SERVER_URL}/admin/mcp/server/{server_id}/tools",
        headers=admin_user.headers,
        cookies=admin_user.cookies,
    )
    tools_response.raise_for_status()
    tool_entries = tools_response.json()["tools"]
    assert len(tool_entries) == 101

    # Update server status to CONNECTED
    status_response = requests.patch(
        f"{API_SERVER_URL}/admin/mcp/server/{server_id}/status",
        params={"status": "CONNECTED"},
        headers=admin_user.headers,
        cookies=admin_user.cookies,
    )
    status_response.raise_for_status()

    tools_response = requests.get(
        f"{API_SERVER_URL}/admin/mcp/server/{server_id}/db-tools",
        headers=admin_user.headers,
        cookies=admin_user.cookies,
    )
    tools_response.raise_for_status()
    tool_entries = tools_response.json()["tools"]
    hello_tool_entry = next(
        tool for tool in tool_entries if tool["name"] == MCP_HELLO_TOOL
    )
    tool_id = hello_tool_entry["id"]

    # Step c) Create an assistant (persona) with the MCP tool attached
    persona = PersonaManager.create(
        name="integration-mcp-persona",
        description="Persona for MCP integration test",
        tool_ids=[tool_id],
        user_performing_action=admin_user,
    )
    persona_tools_response = requests.get(
        f"{API_SERVER_URL}/persona",
        headers=basic_user.headers,
        cookies=basic_user.cookies,
    )
    persona_tools_response.raise_for_status()
    persona_entries = persona_tools_response.json()
    persona_entry = next(
        entry for entry in persona_entries if entry["id"] == persona.id
    )
    persona_tool_ids = {tool["id"] for tool in persona_entry["tools"]}
    assert tool_id in persona_tool_ids


================================================
FILE: backend/tests/integration/tests/mcp/test_mcp_server_auth.py
================================================
"""Integration tests for MCP Server auth delegated to API /me."""

import requests

from tests.integration.common_utils.constants import MCP_SERVER_URL
from tests.integration.common_utils.managers.pat import PATManager
from tests.integration.common_utils.test_models import DATestUser


STREAMABLE_HTTP_URL = f"{MCP_SERVER_URL.rstrip('/')}/?transportType=streamable-http"


def test_mcp_server_health_check(reset: None) -> None:  # noqa: ARG001
    """Test MCP server health check endpoint."""
    response = requests.get(f"{MCP_SERVER_URL}/health", timeout=10)
    assert response.status_code == 200
    assert response.json()["status"] == "healthy"
    assert response.json()["service"] == "mcp_server"


def test_mcp_server_auth_missing_token(reset: None) -> None:  # noqa: ARG001
    """Test MCP server rejects requests without credentials."""
    response = requests.post(STREAMABLE_HTTP_URL)
    assert response.status_code == 401


def test_mcp_server_auth_invalid_token(reset: None) -> None:  # noqa: ARG001
    """Test MCP server rejects requests with an invalid bearer token."""
    response = requests.post(
        STREAMABLE_HTTP_URL,
        headers={"Authorization": "Bearer invalid-token"},
        json={"jsonrpc": "2.0", "method": "initialize", "id": 1},
    )
    assert response.status_code == 401


def test_mcp_server_auth_valid_token(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test MCP server accepts requests with a valid bearer token."""
    pat = PATManager.create(
        name="Test MCP Token",
        expiration_days=7,
        user_performing_action=admin_user,
    )
    access_token = pat.token

    # Test connection with MCP protocol request
    response = requests.post(
        STREAMABLE_HTTP_URL,
        headers={
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json",
            "Accept": "application/json",
            "MCP-Protocol-Version": "2025-03-26",
        },
        json={"jsonrpc": "2.0", "method": "initialize", "id": 1},
    )

    # Should be authenticated (may return MCP protocol response or error)
    # 200 = valid MCP protocol response
    # 400 = valid protocol error (authenticated but bad request)
    assert response.status_code in [200, 400]


================================================
FILE: backend/tests/integration/tests/mcp/test_mcp_server_search.py
================================================
"""Integration tests covering MCP document search flows."""

from __future__ import annotations

import asyncio
import json
import os
from collections.abc import Awaitable
from collections.abc import Callable
from datetime import datetime
from datetime import timezone
from typing import Any

import pytest
from mcp import ClientSession
from mcp.client.streamable_http import streamablehttp_client
from mcp.types import CallToolResult
from mcp.types import TextContent

from onyx.db.enums import AccessType
from tests.integration.common_utils.constants import MCP_SERVER_URL
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.pat import PATManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestUser


# Constants
MCP_SEARCH_TOOL = "search_indexed_documents"
INDEXED_SOURCES_RESOURCE_URI = "resource://indexed_sources"
DEFAULT_SEARCH_LIMIT = 5
STREAMABLE_HTTP_URL = f"{MCP_SERVER_URL.rstrip('/')}/?transportType=streamable-http"


def _run_with_mcp_session(
    headers: dict[str, str],
    action: Callable[[ClientSession], Awaitable[Any]],
) -> Any:
    """Run an async action with an MCP client session."""

    async def _runner() -> Any:
        async with streamablehttp_client(STREAMABLE_HTTP_URL, headers=headers) as (
            read,
            write,
            _,
        ):
            async with ClientSession(read, write) as session:
                return await action(session)

    return asyncio.run(_runner())


def _extract_tool_payload(result: CallToolResult) -> dict[str, Any]:
    """Extract JSON payload from MCP tool result."""
    if result.isError:
        raise AssertionError(f"MCP tool returned error: {result}")

    text_blocks = [
        block.text
        for block in result.content
        if isinstance(block, TextContent) and block.text
    ]
    if not text_blocks:
        raise AssertionError("Expected textual content from MCP tool result")

    return json.loads(text_blocks[-1])


def _call_search_tool(
    headers: dict[str, str], query: str, limit: int = DEFAULT_SEARCH_LIMIT
) -> CallToolResult:
    """Call the search_indexed_documents tool via MCP."""

    async def _action(session: ClientSession) -> CallToolResult:
        await session.initialize()
        return await session.call_tool(
            MCP_SEARCH_TOOL,
            {
                "query": query,
                "limit": limit,
            },
        )

    return _run_with_mcp_session(headers, _action)


def _auth_headers(user: DATestUser, name: str) -> dict[str, str]:
    """Create authorization headers with a PAT token."""
    pat = PATManager.create(
        name=name,
        expiration_days=7,
        user_performing_action=user,
    )
    return {"Authorization": f"Bearer {pat.token}"}


def _seed_document_and_wait_for_indexing(
    cc_pair: DATestCCPair,
    content: str,
    api_key: DATestAPIKey,
    user_performing_action: DATestUser,
) -> None:
    """Seed a document and wait for indexing to complete."""
    before = datetime.now(timezone.utc)
    DocumentManager.seed_doc_with_content(
        cc_pair=cc_pair,
        content=content,
        api_key=api_key,
    )
    CCPairManager.wait_for_indexing_completion(
        cc_pair=cc_pair,
        after=before,
        user_performing_action=user_performing_action,
    )


def test_mcp_document_search_flow(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test the complete MCP search flow: initialization, resources, tools, and search."""
    # LLM provider is required for the document-search endpoint
    LLMProviderManager.create(user_performing_action=admin_user)

    api_key = APIKeyManager.create(user_performing_action=admin_user)
    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin_user)

    doc_text = "MCP happy path search document"
    _seed_document_and_wait_for_indexing(
        cc_pair=cc_pair,
        content=doc_text,
        api_key=api_key,
        user_performing_action=admin_user,
    )

    headers = _auth_headers(admin_user, name="mcp-search-flow")

    async def _full_flow(session: ClientSession) -> Any:
        await session.initialize()
        resources = await session.list_resources()
        tools = await session.list_tools()
        search_result = await session.call_tool(
            MCP_SEARCH_TOOL,
            {
                "query": doc_text,
                "limit": DEFAULT_SEARCH_LIMIT,
            },
        )
        return resources, tools, search_result

    resources_result, tools_result, search_result = _run_with_mcp_session(
        headers, _full_flow
    )

    # Verify resources are available
    resource_uris = {str(resource.uri) for resource in resources_result.resources}
    assert INDEXED_SOURCES_RESOURCE_URI in resource_uris

    # Verify tools are available
    tool_names = {tool.name for tool in tools_result.tools}
    assert MCP_SEARCH_TOOL in tool_names

    # Verify search results
    payload = _extract_tool_payload(search_result)
    assert payload["query"] == doc_text
    assert payload["total_results"] >= 1
    assert isinstance(payload["documents"], list)
    assert len(payload["documents"]) > 0
    assert any(doc_text in (doc.get("content") or "") for doc in payload["documents"])

    # Verify document structure
    for doc in payload["documents"]:
        assert isinstance(doc, dict)
        # Verify expected fields exist (may be None)
        assert "content" in doc
        assert "semantic_identifier" in doc
        assert "source_type" in doc


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="User group permissions are Enterprise-only",
)
def test_mcp_search_respects_acl_filters(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test that search respects ACL filters - privileged users can access, others cannot."""
    # LLM provider is required for the document-search endpoint
    LLMProviderManager.create(user_performing_action=admin_user)

    user_without_access = UserManager.create(name="mcp-acl-user-a")
    privileged_user = UserManager.create(name="mcp-acl-user-b")

    api_key = APIKeyManager.create(user_performing_action=admin_user)
    restricted_cc_pair = CCPairManager.create_from_scratch(
        access_type=AccessType.PRIVATE,
        user_performing_action=admin_user,
    )

    user_group = UserGroupManager.create(
        user_ids=[privileged_user.id],
        cc_pair_ids=[restricted_cc_pair.id],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_performing_action=admin_user, user_groups_to_check=[user_group]
    )

    restricted_doc_content = "MCP restricted knowledge base document"
    _seed_document_and_wait_for_indexing(
        cc_pair=restricted_cc_pair,
        content=restricted_doc_content,
        api_key=api_key,
        user_performing_action=admin_user,
    )

    privileged_headers = _auth_headers(privileged_user, "mcp-acl-allowed")
    restricted_headers = _auth_headers(user_without_access, "mcp-acl-blocked")

    # Privileged user should find the document
    allowed_result = _call_search_tool(privileged_headers, restricted_doc_content)
    allowed_payload = _extract_tool_payload(allowed_result)
    assert allowed_payload["total_results"] >= 1
    assert any(
        restricted_doc_content in (doc.get("content") or "")
        for doc in allowed_payload["documents"]
    )

    # User without access should not find the document
    blocked_result = _call_search_tool(restricted_headers, restricted_doc_content)
    blocked_payload = _extract_tool_payload(blocked_result)
    assert blocked_payload["total_results"] == 0
    assert blocked_payload["documents"] == []


================================================
FILE: backend/tests/integration/tests/migrations/conftest.py
================================================
"""
pytest-alembic configuration for testing Alembic migrations.

This module provides fixtures required by pytest-alembic to test the main
schema migrations (alembic). For alembic_tenants, see test_alembic_tenants.py.

Usage:
    Run all built-in pytest-alembic tests:
        pytest tests/integration/tests/migrations/test_alembic_main.py -v

See: https://pytest-alembic.readthedocs.io/en/latest/
"""

from collections.abc import Generator
from typing import Any

import pytest
from sqlalchemy import create_engine
from sqlalchemy import text
from sqlalchemy.engine import Engine

from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_PASSWORD
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USER
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.sql_engine import SYNC_DB_API
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA


def _create_sync_engine() -> Engine:
    """Create a synchronous SQLAlchemy engine for pytest-alembic."""
    conn_str = build_connection_string(
        db="postgres",
        user=POSTGRES_USER,
        password=POSTGRES_PASSWORD,
        host=POSTGRES_HOST,
        port=POSTGRES_PORT,
        db_api=SYNC_DB_API,
    )
    return create_engine(conn_str)


@pytest.fixture
def alembic_config() -> dict[str, Any]:
    """
    Configure pytest-alembic for the main schema migrations.

    Returns pytest-alembic configuration options.
    See: https://pytest-alembic.readthedocs.io/en/latest/setup.html
    """
    return {
        "file": "alembic.ini",
        "script_location": "alembic",
        # Pass additional attributes to the alembic config
        # These will be available in env.py via context.config.attributes
        "attributes": {
            "schema_name": POSTGRES_DEFAULT_SCHEMA,
        },
    }


@pytest.fixture
def alembic_engine() -> Generator[Engine, None, None]:
    """
    Provide a synchronous SQLAlchemy engine for pytest-alembic.

    pytest-alembic requires a synchronous engine to run migrations.
    The engine is configured to connect to the test database.

    Note: pytest-alembic will internally perform commits, so ensure
    the database is in an appropriate state before running tests.
    """
    engine = _create_sync_engine()

    # Ensure the default schema exists
    with engine.connect() as conn:
        conn.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{POSTGRES_DEFAULT_SCHEMA}"'))
        conn.commit()

    yield engine

    engine.dispose()


================================================
FILE: backend/tests/integration/tests/migrations/test_alembic_main.py
================================================
"""
pytest-alembic tests for the main schema migrations.

These tests use pytest-alembic to verify that alembic migrations are correct.
The tests cover:
- Single head revision (no diverged migration history)
- Upgrade path from base to head
- Up/down consistency (all downgrades succeed)

Usage:
    pytest tests/integration/tests/migrations/test_alembic_main.py -v

See: https://github.com/schireson/pytest-alembic
"""

from pytest_alembic.tests import test_single_head_revision  # type: ignore[import-not-found,unused-ignore]
from pytest_alembic.tests import test_up_down_consistency  # type: ignore[import-not-found,unused-ignore]
from pytest_alembic.tests import test_upgrade  # type: ignore[import-not-found,unused-ignore]

__all__ = [
    "test_single_head_revision",
    "test_up_down_consistency",
    "test_upgrade",
]


================================================
FILE: backend/tests/integration/tests/migrations/test_alembic_tenants.py
================================================
"""
pytest-alembic tests for the tenants/public schema migrations.

These tests use pytest-alembic to verify that alembic_tenants migrations
are correct. The alembic_tenants configuration handles migrations for
the public schema tables that are shared across tenants.

Usage:
    pytest tests/integration/tests/migrations/test_alembic_tenants.py -v

See: https://github.com/schireson/pytest-alembic
"""

from collections.abc import Generator
from typing import Any

import pytest
from pytest_alembic import create_alembic_fixture  # type: ignore[import-not-found,unused-ignore]
from pytest_alembic.tests import test_single_head_revision  # type: ignore[import-not-found,unused-ignore]
from pytest_alembic.tests import test_up_down_consistency  # type: ignore[import-not-found,unused-ignore]
from pytest_alembic.tests import test_upgrade  # type: ignore[import-not-found,unused-ignore]
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine

from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_PASSWORD
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USER
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.sql_engine import SYNC_DB_API


@pytest.fixture
def alembic_config() -> dict[str, Any]:
    """Override alembic_config for tenants configuration."""
    return {
        "file": "alembic.ini",
        "config_ini_section": "schema_private",
        "script_location": "alembic_tenants",
    }


@pytest.fixture
def alembic_engine() -> Generator[Engine, None, None]:
    """Override alembic_engine for tenants configuration."""
    conn_str = build_connection_string(
        db="postgres",
        user=POSTGRES_USER,
        password=POSTGRES_PASSWORD,
        host=POSTGRES_HOST,
        port=POSTGRES_PORT,
        db_api=SYNC_DB_API,
    )
    engine = create_engine(conn_str)
    yield engine
    engine.dispose()


# Create a custom alembic fixture for the tenants configuration
alembic_runner = create_alembic_fixture()

__all__ = [
    "test_single_head_revision",
    "test_up_down_consistency",
    "test_upgrade",
]


================================================
FILE: backend/tests/integration/tests/migrations/test_assistant_consolidation_migration.py
================================================
"""
Integration tests for the assistant consolidation migration.

Tests the migration from multiple default assistants (Search, General, Art, etc.)
to a single default Assistant (ID 0) and the associated tool seeding.
"""

from sqlalchemy import text

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from tests.integration.common_utils.reset import downgrade_postgres
from tests.integration.common_utils.reset import upgrade_postgres


def test_cold_startup_default_assistant() -> None:
    """Test that cold startup creates only the default assistant."""
    # Start fresh at the head revision
    downgrade_postgres(
        database="postgres", config_name="alembic", revision="base", clear_data=True
    )
    upgrade_postgres(database="postgres", config_name="alembic", revision="head")

    with get_session_with_current_tenant() as db_session:
        # Check only default assistant exists
        result = db_session.execute(
            text(
                """
                SELECT id, name, builtin_persona, is_featured, deleted
                FROM persona
                WHERE builtin_persona = true
                ORDER BY id
                """
            )
        )
        assistants = result.fetchall()

        # Should have exactly one builtin assistant
        assert len(assistants) == 1, "Should have exactly one builtin assistant"
        default = assistants[0]
        assert default[0] == 0, "Default assistant should have ID 0"
        assert default[1] == "Assistant", "Should be named 'Assistant'"
        assert default[2] is True, "Should be builtin"
        assert default[3] is True, "Should be is_featured"
        assert default[4] is False, "Should not be deleted"

        # Check tools are properly associated
        result = db_session.execute(
            text(
                """
                SELECT t.name, t.display_name
                FROM tool t
                JOIN persona__tool pt ON t.id = pt.tool_id
                WHERE pt.persona_id = 0
                ORDER BY t.name
                """
            )
        )
        tool_associations = result.fetchall()
        tool_names = [row[0] for row in tool_associations]
        tool_display_names = [row[1] for row in tool_associations]

        # Verify all three main tools are attached
        assert (
            "internal_search" in tool_names
        ), "Default assistant should have SearchTool attached"
        assert (
            "generate_image" in tool_names
        ), "Default assistant should have ImageGenerationTool attached"
        assert (
            "web_search" in tool_names
        ), "Default assistant should have WebSearchTool attached"
        assert (
            "read_file" in tool_names
        ), "Default assistant should have FileReaderTool attached"
        assert (
            "python" in tool_names
        ), "Default assistant should have PythonTool attached"

        # Also verify by display names for clarity
        assert (
            "Internal Search" in tool_display_names
        ), "Default assistant should have Internal Search tool"
        assert (
            "Image Generation" in tool_display_names
        ), "Default assistant should have Image Generation tool"
        assert (
            "Web Search" in tool_display_names
        ), "Default assistant should have Web Search tool"
        assert (
            "File Reader" in tool_display_names
        ), "Default assistant should have File Reader tool"
        assert (
            "Code Interpreter" in tool_display_names
        ), "Default assistant should have Code Interpreter tool"

        # Should have exactly 6 tools
        assert (
            len(tool_associations) == 6
        ), f"Default assistant should have exactly 6 tools attached, got {len(tool_associations)}"


================================================
FILE: backend/tests/integration/tests/migrations/test_migrations.py
================================================
# TODO(rkuo): All of the downgrade_postgres and upgrade_postgres operations here
# are vulnerable to deadlocks. We could deal with them similar to reset_postgres
# where we retry out of process

import json

import pytest
from sqlalchemy import text

from onyx.configs.constants import ANONYMOUS_USER_UUID
from onyx.configs.constants import DEFAULT_BOOST
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from tests.integration.common_utils.reset import downgrade_postgres
from tests.integration.common_utils.reset import upgrade_postgres


@pytest.mark.skip(
    reason="Migration test no longer needed - migration has been applied to production"
)
def test_fix_capitalization_migration() -> None:
    """Test that the be2ab2aa50ee migration correctly lowercases external_user_group_ids"""
    # Reset the database and run migrations up to the second to last migration
    downgrade_postgres(
        database="postgres", config_name="alembic", revision="base", clear_data=True
    )
    upgrade_postgres(
        database="postgres",
        config_name="alembic",
        # Upgrade it to the migration before the fix
        revision="369644546676",
    )

    # Insert test data with mixed case group IDs
    test_data = [
        {
            "id": "test_doc_1",
            "external_user_group_ids": ["Group1", "GROUP2", "group3"],
            "semantic_id": "test_doc_1",
            "boost": DEFAULT_BOOST,
            "hidden": False,
            "from_ingestion_api": False,
            "last_modified": "NOW()",
        },
        {
            "id": "test_doc_2",
            "external_user_group_ids": ["UPPER1", "upper2", "UPPER3"],
            "semantic_id": "test_doc_2",
            "boost": DEFAULT_BOOST,
            "hidden": False,
            "from_ingestion_api": False,
            "last_modified": "NOW()",
        },
    ]

    # Insert the test data
    with get_session_with_current_tenant() as db_session:
        for doc in test_data:
            db_session.execute(
                text(
                    """
                    INSERT INTO document (
                        id,
                        external_user_group_ids,
                        semantic_id,
                        boost,
                        hidden,
                        from_ingestion_api,
                        last_modified
                    )
                    VALUES (
                        :id,
                        :group_ids,
                        :semantic_id,
                        :boost,
                        :hidden,
                        :from_ingestion_api,
                        :last_modified
                    )
                    """
                ),
                {
                    "id": doc["id"],
                    "group_ids": doc["external_user_group_ids"],
                    "semantic_id": doc["semantic_id"],
                    "boost": doc["boost"],
                    "hidden": doc["hidden"],
                    "from_ingestion_api": doc["from_ingestion_api"],
                    "last_modified": doc["last_modified"],
                },
            )
        db_session.commit()

    # Verify the data was inserted correctly
    with get_session_with_current_tenant() as db_session:
        results = db_session.execute(
            text(
                """
                SELECT id, external_user_group_ids
                FROM document
                WHERE id IN ('test_doc_1', 'test_doc_2')
                ORDER BY id
                """
            )
        ).fetchall()

        # Verify initial state
        assert len(results) == 2
        assert results[0].external_user_group_ids == ["Group1", "GROUP2", "group3"]
        assert results[1].external_user_group_ids == ["UPPER1", "upper2", "UPPER3"]

    # Run migrations again to apply the fix
    upgrade_postgres(
        database="postgres", config_name="alembic", revision="be2ab2aa50ee"
    )

    # Verify the fix was applied
    with get_session_with_current_tenant() as db_session:
        results = db_session.execute(
            text(
                """
                SELECT id, external_user_group_ids
                FROM document
                WHERE id IN ('test_doc_1', 'test_doc_2')
                ORDER BY id
                """
            )
        ).fetchall()

        # Verify all group IDs are lowercase
        assert len(results) == 2
        assert results[0].external_user_group_ids == ["group1", "group2", "group3"]
        assert results[1].external_user_group_ids == ["upper1", "upper2", "upper3"]


def test_jira_connector_migration() -> None:
    """Test that the da42808081e3 migration correctly updates Jira connector configurations"""
    # Reset the database and run migrations up to the migration before the Jira connector change
    downgrade_postgres(
        database="postgres", config_name="alembic", revision="base", clear_data=True
    )
    upgrade_postgres(
        database="postgres",
        config_name="alembic",
        # Upgrade it to the migration before the Jira connector change
        revision="f13db29f3101",
    )

    # Insert test data with various Jira connector configurations
    test_data = [
        {
            "id": 1,
            "name": "jira_connector_1",
            "source": "JIRA",
            "connector_specific_config": {
                "jira_project_url": "https://example.atlassian.net/projects/PROJ",
                "comment_email_blacklist": ["test@example.com"],
                "batch_size": 100,
                "labels_to_skip": ["skip-me"],
            },
        },
        {
            "id": 2,
            "name": "jira_connector_2",
            "source": "JIRA",
            "connector_specific_config": {
                "jira_project_url": "https://other.atlassian.net/projects/OTHER"
            },
        },
        {
            "id": 3,
            "name": "jira_connector_3",
            "source": "JIRA",
            "connector_specific_config": {
                "jira_project_url": "https://example.atlassian.net/projects/TEST",
                "batch_size": 50,
            },
        },
    ]

    # Insert the test data
    with get_session_with_current_tenant() as db_session:
        for connector in test_data:
            db_session.execute(
                text(
                    """
                    INSERT INTO connector (
                        id,
                        name,
                        source,
                        connector_specific_config
                    )
                    VALUES (
                        :id,
                        :name,
                        :source,
                        :config
                    )
                    """
                ),
                {
                    "id": connector["id"],
                    "name": connector["name"],
                    "source": connector["source"],
                    "config": json.dumps(connector["connector_specific_config"]),
                },
            )
        db_session.commit()

    # Verify the data was inserted correctly
    with get_session_with_current_tenant() as db_session:
        results = db_session.execute(
            text(
                """
                SELECT id, connector_specific_config
                FROM connector
                WHERE source = 'JIRA'
                ORDER BY id
                """
            )
        ).fetchall()

        # Verify initial state
        assert len(results) == 3
        assert (
            results[0].connector_specific_config
            == test_data[0]["connector_specific_config"]
        )
        assert (
            results[1].connector_specific_config
            == test_data[1]["connector_specific_config"]
        )
        assert (
            results[2].connector_specific_config
            == test_data[2]["connector_specific_config"]
        )

    # Run migrations again to apply the Jira connector change
    upgrade_postgres(
        database="postgres", config_name="alembic", revision="da42808081e3"
    )
    # Verify the upgrade was applied correctly
    with get_session_with_current_tenant() as db_session:
        results = db_session.execute(
            text(
                """
                SELECT id, connector_specific_config
                FROM connector
                WHERE source = 'JIRA'
                ORDER BY id
                """
            )
        ).fetchall()

        # Verify new format
        assert len(results) == 3

        # First connector - full config
        config_0 = results[0].connector_specific_config
        assert config_0["jira_base_url"] == "https://example.atlassian.net"
        assert config_0["project_key"] == "PROJ"
        assert config_0["comment_email_blacklist"] == ["test@example.com"]
        assert config_0["batch_size"] == 100
        assert config_0["labels_to_skip"] == ["skip-me"]

        # Second connector - minimal config
        config_1 = results[1].connector_specific_config
        assert config_1["jira_base_url"] == "https://other.atlassian.net"
        assert config_1["project_key"] == "OTHER"
        assert "comment_email_blacklist" not in config_1
        assert "batch_size" not in config_1
        assert "labels_to_skip" not in config_1

        # Third connector - partial config
        config_2 = results[2].connector_specific_config
        assert config_2["jira_base_url"] == "https://example.atlassian.net"
        assert config_2["project_key"] == "TEST"
        assert config_2["batch_size"] == 50
        assert "comment_email_blacklist" not in config_2
        assert "labels_to_skip" not in config_2

    # Test downgrade path
    downgrade_postgres(
        database="postgres", config_name="alembic", revision="f13db29f3101"
    )

    # Verify the downgrade was applied correctly
    with get_session_with_current_tenant() as db_session:
        results = db_session.execute(
            text(
                """
                SELECT id, connector_specific_config
                FROM connector
                WHERE source = 'JIRA'
                ORDER BY id
                """
            )
        ).fetchall()

        # Verify reverted to old format
        assert len(results) == 3

        # First connector - full config
        config_0 = results[0].connector_specific_config
        assert (
            config_0["jira_project_url"]
            == "https://example.atlassian.net/projects/PROJ"
        )
        assert config_0["comment_email_blacklist"] == ["test@example.com"]
        assert config_0["batch_size"] == 100
        assert config_0["labels_to_skip"] == ["skip-me"]

        # Second connector - minimal config
        config_1 = results[1].connector_specific_config
        assert (
            config_1["jira_project_url"] == "https://other.atlassian.net/projects/OTHER"
        )

        # Third connector - partial config
        config_2 = results[2].connector_specific_config
        assert (
            config_2["jira_project_url"]
            == "https://example.atlassian.net/projects/TEST"
        )
        assert config_2["batch_size"] == 50


def test_anonymous_user_migration_dedupes_null_notifications() -> None:
    downgrade_postgres(
        database="postgres", config_name="alembic", revision="base", clear_data=True
    )
    upgrade_postgres(
        database="postgres",
        config_name="alembic",
        revision="f7ca3e2f45d9",
    )

    with get_session_with_current_tenant() as db_session:
        db_session.execute(
            text(
                """
                INSERT INTO notification (
                    id,
                    notif_type,
                    user_id,
                    dismissed,
                    last_shown,
                    first_shown,
                    title,
                    description,
                    additional_data
                )
                VALUES
                    (
                        1,
                        'RELEASE_NOTES',
                        NULL,
                        FALSE,
                        NOW(),
                        NOW(),
                        'Onyx v2.10.0 is available!',
                        'Check out what''s new in v2.10.0',
                        '{"version":"v2.10.0","link":"https://docs.onyx.app/changelog#v2-10-0"}'::jsonb
                    ),
                    (
                        2,
                        'RELEASE_NOTES',
                        NULL,
                        FALSE,
                        NOW(),
                        NOW(),
                        'Onyx v2.10.0 is available!',
                        'Check out what''s new in v2.10.0',
                        '{"version":"v2.10.0","link":"https://docs.onyx.app/changelog#v2-10-0"}'::jsonb
                    )
                """
            )
        )
        db_session.commit()

    upgrade_postgres(
        database="postgres", config_name="alembic", revision="e7f8a9b0c1d2"
    )

    with get_session_with_current_tenant() as db_session:
        notifications = db_session.execute(
            text(
                """
                SELECT id, user_id
                FROM notification
                ORDER BY id
                """
            )
        ).fetchall()

        anonymous_user = db_session.execute(
            text(
                """
                SELECT id, email, role
                FROM "user"
                WHERE id = :user_id
                """
            ),
            {"user_id": ANONYMOUS_USER_UUID},
        ).fetchone()

    assert len(notifications) == 1
    assert notifications[0].id == 2  # Higher id wins when timestamps are equal
    assert str(notifications[0].user_id) == ANONYMOUS_USER_UUID
    assert anonymous_user is not None
    assert anonymous_user.email == "anonymous@onyx.app"
    assert anonymous_user.role == "LIMITED"


def test_anonymous_user_migration_collision_with_existing_anonymous_notification() -> (
    None
):
    """Test that a NULL-owned notification that collides with an already-existing
    anonymous-owned notification is removed during migration."""
    downgrade_postgres(
        database="postgres", config_name="alembic", revision="base", clear_data=True
    )
    upgrade_postgres(
        database="postgres",
        config_name="alembic",
        revision="f7ca3e2f45d9",
    )

    with get_session_with_current_tenant() as db_session:
        # Create the anonymous user early so we can insert a notification owned by it
        db_session.execute(
            text(
                """
                INSERT INTO "user" (id, email, hashed_password, is_active, is_superuser, is_verified, role)
                VALUES (:id, 'anonymous@onyx.app', '', TRUE, FALSE, TRUE, 'LIMITED')
                ON CONFLICT (id) DO NOTHING
                """
            ),
            {"id": ANONYMOUS_USER_UUID},
        )
        # Insert an anonymous-owned notification (already migrated in a prior partial run)
        db_session.execute(
            text(
                """
                INSERT INTO notification (
                    id, notif_type, user_id, dismissed, last_shown, first_shown,
                    title, description, additional_data
                )
                VALUES
                    (
                        1, 'RELEASE_NOTES', :user_id, FALSE, NOW(), NOW(),
                        'Onyx v2.10.0 is available!',
                        'Check out what''s new in v2.10.0',
                        '{"version":"v2.10.0","link":"https://docs.onyx.app/changelog#v2-10-0"}'::jsonb
                    ),
                    (
                        2, 'RELEASE_NOTES', NULL, FALSE, NOW(), NOW(),
                        'Onyx v2.10.0 is available!',
                        'Check out what''s new in v2.10.0',
                        '{"version":"v2.10.0","link":"https://docs.onyx.app/changelog#v2-10-0"}'::jsonb
                    )
                """
            ),
            {"user_id": ANONYMOUS_USER_UUID},
        )
        db_session.commit()

    upgrade_postgres(
        database="postgres", config_name="alembic", revision="e7f8a9b0c1d2"
    )

    with get_session_with_current_tenant() as db_session:
        notifications = db_session.execute(
            text(
                """
                SELECT id, user_id
                FROM notification
                ORDER BY id
                """
            )
        ).fetchall()

    # Only the original anonymous-owned notification should remain;
    # the NULL-owned duplicate should have been deleted
    assert len(notifications) == 1
    assert notifications[0].id == 1
    assert str(notifications[0].user_id) == ANONYMOUS_USER_UUID


================================================
FILE: backend/tests/integration/tests/migrations/test_tool_seeding.py
================================================
from pydantic import BaseModel
from sqlalchemy import text

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from tests.integration.common_utils.reset import downgrade_postgres
from tests.integration.common_utils.reset import upgrade_postgres


class ToolSeedingExpectedResult(BaseModel):
    name: str
    display_name: str
    in_code_tool_id: str
    user_id: str | None


EXPECTED_TOOLS = {
    "SearchTool": ToolSeedingExpectedResult(
        name="internal_search",
        display_name="Internal Search",
        in_code_tool_id="SearchTool",
        user_id=None,
    ),
    "ImageGenerationTool": ToolSeedingExpectedResult(
        name="generate_image",
        display_name="Image Generation",
        in_code_tool_id="ImageGenerationTool",
        user_id=None,
    ),
    "WebSearchTool": ToolSeedingExpectedResult(
        name="web_search",
        display_name="Web Search",
        in_code_tool_id="WebSearchTool",
        user_id=None,
    ),
    "KnowledgeGraphTool": ToolSeedingExpectedResult(
        name="run_kg_search",
        display_name="Knowledge Graph Search",
        in_code_tool_id="KnowledgeGraphTool",
        user_id=None,
    ),
    "PythonTool": ToolSeedingExpectedResult(
        name="python",
        display_name="Code Interpreter",
        in_code_tool_id="PythonTool",
        user_id=None,
    ),
    "ResearchAgent": ToolSeedingExpectedResult(
        name="research_agent",
        display_name="Research Agent",
        in_code_tool_id="ResearchAgent",
        user_id=None,
    ),
    "FileReaderTool": ToolSeedingExpectedResult(
        name="read_file",
        display_name="File Reader",
        in_code_tool_id="FileReaderTool",
        user_id=None,
    ),
    "MemoryTool": ToolSeedingExpectedResult(
        name="MemoryTool",
        display_name="Add Memory",
        in_code_tool_id="MemoryTool",
        user_id=None,
    ),
}


def test_tool_seeding_migration() -> None:
    """Test that migration from base to head correctly seeds builtin tools."""
    # Start from base and upgrade to just before tool seeding
    downgrade_postgres(
        database="postgres", config_name="alembic", revision="base", clear_data=True
    )
    upgrade_postgres(
        database="postgres",
        config_name="alembic",
        revision="b7ec9b5b505f",  # Revision before tool seeding
    )

    # Verify no tools exist yet
    with get_session_with_current_tenant() as db_session:
        result = db_session.execute(text("SELECT COUNT(*) FROM tool"))
        count = result.scalar()
        assert count == 0, "No tools should exist before migration"

    # Upgrade to head
    upgrade_postgres(
        database="postgres",
        config_name="alembic",
        revision="head",
    )

    # Verify tools were created
    with get_session_with_current_tenant() as db_session:
        result = db_session.execute(
            text(
                """
                SELECT id, name, display_name, description, in_code_tool_id,
                       user_id
                FROM tool
                ORDER BY id
                """
            )
        )
        tools = result.fetchall()

        # Should have all 9 builtin tools
        assert (
            len(tools) == 10
        ), f"Should have created exactly 9 builtin tools, got {len(tools)}"

        def validate_tool(expected: ToolSeedingExpectedResult) -> None:
            tool = next((t for t in tools if t[1] == expected.name), None)
            assert tool is not None, f"{expected.name} should exist"
            assert (
                tool[2] == expected.display_name
            ), f"{expected.name} display name should be '{expected.display_name}'"
            assert (
                tool[4] == expected.in_code_tool_id
            ), f"{expected.name} in_code_tool_id should be '{expected.in_code_tool_id}'"
            assert (
                tool[5] is None
            ), f"{expected.name} should not have a user_id (builtin)"

        # Check SearchTool
        validate_tool(EXPECTED_TOOLS["SearchTool"])

        # Check ImageGenerationTool
        validate_tool(EXPECTED_TOOLS["ImageGenerationTool"])

        # Check WebSearchTool
        validate_tool(EXPECTED_TOOLS["WebSearchTool"])

        # Check KnowledgeGraphTool
        validate_tool(EXPECTED_TOOLS["KnowledgeGraphTool"])

        # Check PythonTool
        validate_tool(EXPECTED_TOOLS["PythonTool"])

        # Check ResearchAgent (Deep Research as a tool)
        validate_tool(EXPECTED_TOOLS["ResearchAgent"])

        # Check FileReaderTool
        validate_tool(EXPECTED_TOOLS["FileReaderTool"])

        # Check MemoryTool
        validate_tool(EXPECTED_TOOLS["MemoryTool"])


================================================
FILE: backend/tests/integration/tests/no_vectordb/conftest.py
================================================
"""Fixtures for no-vector-DB integration tests.

These tests are intended to run against an Onyx deployment started with
DISABLE_VECTOR_DB=true.  They are automatically **skipped** when the
server reports vector_db_enabled=true (i.e. when Vespa is available).
"""

import pytest
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.reset import reset_file_store
from tests.integration.common_utils.reset import reset_postgres
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser


def _server_has_vector_db_disabled() -> bool:
    """Query the running server to check whether DISABLE_VECTOR_DB is set."""
    try:
        resp = requests.get(
            f"{API_SERVER_URL}/settings",
            headers=GENERAL_HEADERS,
        )
        if resp.ok:
            return resp.json().get("vector_db_enabled") is False
    except Exception:
        pass
    return False


# Skip the entire module when the server has vector DB enabled —
# these tests only make sense in no-vector-DB deployments.
pytestmark = pytest.mark.skipif(
    not _server_has_vector_db_disabled(),
    reason="Server is running with vector DB enabled; skipping no-vectordb tests",
)


@pytest.fixture()
def reset() -> None:
    """Reset Postgres and the file store, but skip Vespa (not running)."""
    reset_postgres()
    reset_file_store()


@pytest.fixture()
def llm_provider(admin_user: DATestUser) -> DATestLLMProvider:
    """Ensure an LLM provider exists for the test session."""
    return LLMProviderManager.create(user_performing_action=admin_user)


================================================
FILE: backend/tests/integration/tests/no_vectordb/test_no_vectordb_chat.py
================================================
"""Integration tests for chat in no-vector-DB mode.

Covers:
- Uploading a file to a project, sending a chat message, and verifying the LLM
  receives the file content (small project — fits in context window).
- Creating a persona with user_files and verifying chat works.
- Verifying that persona creation with document_sets / hierarchy_nodes /
  document_ids is rejected with a 400.
"""

import io
import time

import requests

from onyx.db.enums import UserFileStatus
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.file import FileManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.managers.project import ProjectManager
from tests.integration.common_utils.managers.tool import ToolManager
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser


FILE_READER_TOOL_ID = "FileReaderTool"


def _wait_for_file_processed(
    project_id: int,
    user: DATestUser,
    timeout: int = 30,
) -> None:
    """Poll until all files in the project reach COMPLETED status."""
    deadline = time.time() + timeout
    while time.time() < deadline:
        files = ProjectManager.get_project_files(project_id, user)
        if files and all(f.status == UserFileStatus.COMPLETED for f in files):
            return
        time.sleep(1)
    raise TimeoutError(
        f"Files in project {project_id} did not reach COMPLETED within {timeout}s"
    )


# ------------------------------------------------------------------
# Small-project chat — file content loaded directly into context
# ------------------------------------------------------------------


def test_chat_with_small_project_file(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """Upload a small text file to a project and send a chat message.

    The file is small enough to fit in the LLM context window, so the LLM
    should see the file content directly and be able to answer questions
    about it.
    """
    project = ProjectManager.create(
        name="test-no-vectordb-small", user_performing_action=admin_user
    )

    file_content = b"The secret code is PINEAPPLE-42."
    ProjectManager.upload_files(
        project_id=project.id,
        files=[("secret.txt", file_content)],
        user_performing_action=admin_user,
    )

    _wait_for_file_processed(project.id, admin_user)

    # Create a chat session associated with the project's default persona
    chat_session = ChatSessionManager.create(
        persona_id=0,
        description="no-vectordb small project test",
        user_performing_action=admin_user,
    )

    # Link the chat session to the project
    resp = requests.post(
        f"{API_SERVER_URL}/user/projects/{project.id}/move_chat_session",
        json={"chat_session_id": str(chat_session.id)},
        headers=admin_user.headers,
    )
    resp.raise_for_status()

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="What is the secret code in the file?",
        user_performing_action=admin_user,
    )

    assert response.error is None, f"Chat returned an error: {response.error}"
    assert (
        "PINEAPPLE-42" in response.full_message
    ), f"Expected the LLM to reference the file content. Got: {response.full_message}"


# ------------------------------------------------------------------
# Persona with user_files — should work in no-vector-DB mode
# ------------------------------------------------------------------


def test_persona_with_user_files_chat(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """Create a persona with attached user files and verify chat works."""
    # Upload a file first
    file_content = b"Quarterly revenue was $42 million."
    file_obj = io.BytesIO(file_content)
    file_descriptors, error = FileManager.upload_files(
        files=[("revenue.txt", file_obj)],
        user_performing_action=admin_user,
    )
    assert not error, f"File upload failed: {error}"
    assert len(file_descriptors) > 0

    user_file_id = file_descriptors[0].get("user_file_id")
    assert user_file_id, "Expected user_file_id in upload response"

    # Wait for the file to be processed
    deadline = time.time() + 30
    while time.time() < deadline:
        time.sleep(1)
        # Check via file fetch — if it succeeds, the file is ready
        try:
            FileManager.fetch_uploaded_file(
                file_descriptors[0]["id"],
                admin_user,
            )
            break
        except Exception:
            continue

    # Find the FileReaderTool ID from available tools
    tools = ToolManager.list_tools(user_performing_action=admin_user)
    file_reader_tool = next(
        (t for t in tools if t.in_code_tool_id == FILE_READER_TOOL_ID), None
    )
    assert (
        file_reader_tool is not None
    ), "FileReaderTool should be registered as a built-in tool"

    # Create a persona with the user file attached
    persona = PersonaManager.create(
        name="no-vectordb-persona-test",
        description="Test persona for no-vectordb mode",
        system_prompt="You are a helpful assistant. Answer questions using the available tools and files.",
        task_prompt="",
        user_file_ids=[user_file_id],
        tool_ids=[file_reader_tool.id],
        user_performing_action=admin_user,
    )

    chat_session = ChatSessionManager.create(
        persona_id=persona.id,
        description="no-vectordb persona test",
        user_performing_action=admin_user,
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="What was the quarterly revenue?",
        user_performing_action=admin_user,
    )

    assert response.error is None, f"Chat returned an error: {response.error}"
    # The LLM should be able to answer about the revenue (either from direct
    # context injection or via the FileReaderTool)
    assert (
        "$42 million" in response.full_message or "42" in response.full_message
    ), f"Expected the LLM to reference the file content. Got: {response.full_message}"


# ------------------------------------------------------------------
# Persona validation — vector-DB knowledge types rejected
# ------------------------------------------------------------------


def _base_persona_body(**overrides: object) -> dict:
    """Build a valid PersonaUpsertRequest body with sensible defaults.

    Callers override only the fields under test so that Pydantic validation
    passes and the vector-DB guard (``_validate_vector_db_knowledge``) is
    the one that rejects the request.
    """
    body: dict = {
        "name": "should-fail",
        "description": "test",
        "system_prompt": "test",
        "task_prompt": "",
        "is_public": True,
        "datetime_aware": False,
        "document_set_ids": [],
        "tool_ids": [],
        "users": [],
        "groups": [],
    }
    body.update(overrides)
    return body


def test_persona_rejects_document_sets_without_vector_db(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Creating a persona with document_set_ids should fail with 400."""
    resp = requests.post(
        f"{API_SERVER_URL}/persona",
        json=_base_persona_body(document_set_ids=[1]),
        headers=admin_user.headers,
    )
    assert (
        resp.status_code == 400
    ), f"Expected 400 for document_set_ids, got {resp.status_code}: {resp.text}"


def test_persona_rejects_document_ids_without_vector_db(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Creating a persona with document_ids should fail with 400."""
    resp = requests.post(
        f"{API_SERVER_URL}/persona",
        json=_base_persona_body(document_ids=["fake-doc-id"]),
        headers=admin_user.headers,
    )
    assert (
        resp.status_code == 400
    ), f"Expected 400 for document_ids, got {resp.status_code}: {resp.text}"


================================================
FILE: backend/tests/integration/tests/no_vectordb/test_no_vectordb_endpoints.py
================================================
"""Integration tests for endpoint gating when DISABLE_VECTOR_DB is set.

Vector-DB-dependent endpoints should return HTTP 501.
Non-dependent endpoints (settings, document sets, chat, etc.) should work
normally.
"""

import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser


# ------------------------------------------------------------------
# Helper
# ------------------------------------------------------------------


def _headers(user: DATestUser) -> dict[str, str]:
    return user.headers if user else {"Content-Type": "application/json"}


# ------------------------------------------------------------------
# Gated endpoints — should return 501
# ------------------------------------------------------------------


def test_admin_search_returns_501(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.post(
        f"{API_SERVER_URL}/admin/search",
        json={"query": "test", "filters": {}},
        headers=_headers(admin_user),
    )
    assert resp.status_code == 501, f"Expected 501, got {resp.status_code}"


def test_document_size_info_returns_501(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.get(
        f"{API_SERVER_URL}/document/document-size-info",
        params={"document_id": "fake-doc"},
        headers=_headers(admin_user),
    )
    assert resp.status_code == 501


def test_document_chunk_info_returns_501(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.get(
        f"{API_SERVER_URL}/document/chunk-info",
        params={"document_id": "fake-doc"},
        headers=_headers(admin_user),
    )
    assert resp.status_code == 501


def test_set_new_search_settings_returns_501(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.post(
        f"{API_SERVER_URL}/search-settings/set-new-search-settings",
        json={},
        headers=_headers(admin_user),
    )
    assert resp.status_code == 501


def test_cancel_new_embedding_returns_501(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.post(
        f"{API_SERVER_URL}/search-settings/cancel-new-embedding",
        headers=_headers(admin_user),
    )
    assert resp.status_code == 501


def test_connector_router_returns_501(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """The entire /manage router is gated — any connector endpoint should 501."""
    resp = requests.get(
        f"{API_SERVER_URL}/manage/connector",
        headers=_headers(admin_user),
    )
    assert resp.status_code == 501


def test_ingestion_post_returns_501(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.post(
        f"{API_SERVER_URL}/onyx-api/ingestion",
        json={"document": {}},
        headers=_headers(admin_user),
    )
    assert resp.status_code == 501


def test_ingestion_delete_returns_501(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.delete(
        f"{API_SERVER_URL}/onyx-api/ingestion/fake-doc-id",
        headers=_headers(admin_user),
    )
    assert resp.status_code == 501


# ------------------------------------------------------------------
# Non-gated endpoints — should work (2xx)
# ------------------------------------------------------------------


def test_settings_endpoint_works(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.get(
        f"{API_SERVER_URL}/settings",
        headers=_headers(admin_user),
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["vector_db_enabled"] is False


def test_document_set_list_works(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.get(
        f"{API_SERVER_URL}/manage/document-set",
        headers=_headers(admin_user),
    )
    assert resp.status_code == 200


def test_persona_list_works(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    resp = requests.get(
        f"{API_SERVER_URL}/admin/persona",
        headers=_headers(admin_user),
    )
    assert resp.status_code == 200


def test_tool_list_works(
    reset: None, admin_user: DATestUser  # noqa: ARG001
) -> None:  # noqa: ARG001
    resp = requests.get(
        f"{API_SERVER_URL}/tool",
        headers=_headers(admin_user),
    )
    assert resp.status_code == 200
    tools = resp.json()
    tool_ids = {t["in_code_tool_id"] for t in tools if t.get("in_code_tool_id")}
    assert (
        "FileReaderTool" in tool_ids
    ), "FileReaderTool should be registered as a built-in tool"


================================================
FILE: backend/tests/integration/tests/no_vectordb/test_no_vectordb_file_lifecycle.py
================================================
"""Integration test for the full user-file lifecycle in no-vector-DB mode.

Covers: upload → COMPLETED → unlink from project → delete → gone.

The entire lifecycle is handled by FastAPI BackgroundTasks (no Celery workers
needed).  The conftest-level ``pytestmark`` ensures these tests are skipped
when the server is running with vector DB enabled.
"""

import time
from uuid import UUID

import requests

from onyx.db.enums import UserFileStatus
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.project import ProjectManager
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser

POLL_INTERVAL_SECONDS = 1
POLL_TIMEOUT_SECONDS = 30


def _poll_file_status(
    file_id: UUID,
    user: DATestUser,
    target_status: UserFileStatus,
    timeout: int = POLL_TIMEOUT_SECONDS,
) -> None:
    """Poll GET /user/projects/file/{file_id} until the file reaches *target_status*."""
    deadline = time.time() + timeout
    while time.time() < deadline:
        resp = requests.get(
            f"{API_SERVER_URL}/user/projects/file/{file_id}",
            headers=user.headers,
        )
        if resp.ok:
            status = resp.json().get("status")
            if status == target_status.value:
                return
        time.sleep(POLL_INTERVAL_SECONDS)
    raise TimeoutError(
        f"File {file_id} did not reach {target_status.value} within {timeout}s"
    )


def _file_is_gone(file_id: UUID, user: DATestUser, timeout: int = 15) -> None:
    """Poll until GET /user/projects/file/{file_id} returns 404."""
    deadline = time.time() + timeout
    while time.time() < deadline:
        resp = requests.get(
            f"{API_SERVER_URL}/user/projects/file/{file_id}",
            headers=user.headers,
        )
        if resp.status_code == 404:
            return
        time.sleep(POLL_INTERVAL_SECONDS)
    raise TimeoutError(
        f"File {file_id} still accessible after {timeout}s (expected 404)"
    )


def test_file_upload_process_delete_lifecycle(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """Full lifecycle: upload → COMPLETED → unlink → delete → 404.

    Validates that the API server handles all background processing
    (via FastAPI BackgroundTasks) without any Celery workers running.
    """
    project = ProjectManager.create(
        name="lifecycle-test", user_performing_action=admin_user
    )

    file_content = b"Integration test file content for lifecycle verification."
    upload_result = ProjectManager.upload_files(
        project_id=project.id,
        files=[("lifecycle.txt", file_content)],
        user_performing_action=admin_user,
    )
    assert upload_result.user_files, "Expected at least one file in upload response"

    user_file = upload_result.user_files[0]
    file_id = user_file.id

    _poll_file_status(file_id, admin_user, UserFileStatus.COMPLETED)

    project_files = ProjectManager.get_project_files(project.id, admin_user)
    assert any(
        f.id == file_id for f in project_files
    ), "File should be listed in project files after processing"

    # Unlink the file from the project so the delete endpoint will proceed
    unlink_resp = requests.delete(
        f"{API_SERVER_URL}/user/projects/{project.id}/files/{file_id}",
        headers=admin_user.headers,
    )
    assert (
        unlink_resp.status_code == 204
    ), f"Expected 204 on unlink, got {unlink_resp.status_code}: {unlink_resp.text}"

    delete_resp = requests.delete(
        f"{API_SERVER_URL}/user/projects/file/{file_id}",
        headers=admin_user.headers,
    )
    assert (
        delete_resp.ok
    ), f"Delete request failed: {delete_resp.status_code} {delete_resp.text}"
    body = delete_resp.json()
    assert (
        body["has_associations"] is False
    ), f"File still has associations after unlink: {body}"

    _file_is_gone(file_id, admin_user)

    project_files_after = ProjectManager.get_project_files(project.id, admin_user)
    assert not any(
        f.id == file_id for f in project_files_after
    ), "Deleted file should not appear in project files"


def test_delete_blocked_while_associated(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """Deleting a file that still belongs to a project should return
    has_associations=True without actually deleting the file."""
    project = ProjectManager.create(
        name="assoc-test", user_performing_action=admin_user
    )

    upload_result = ProjectManager.upload_files(
        project_id=project.id,
        files=[("assoc.txt", b"associated file content")],
        user_performing_action=admin_user,
    )
    file_id = upload_result.user_files[0].id

    _poll_file_status(file_id, admin_user, UserFileStatus.COMPLETED)

    # Attempt to delete while still linked
    delete_resp = requests.delete(
        f"{API_SERVER_URL}/user/projects/file/{file_id}",
        headers=admin_user.headers,
    )
    assert delete_resp.ok
    body = delete_resp.json()
    assert body["has_associations"] is True, "Should report existing associations"
    assert project.name in body["project_names"]

    # File should still be accessible
    get_resp = requests.get(
        f"{API_SERVER_URL}/user/projects/file/{file_id}",
        headers=admin_user.headers,
    )
    assert get_resp.status_code == 200, "File should still exist after blocked delete"


================================================
FILE: backend/tests/integration/tests/opensearch_migration/test_opensearch_migration_api.py
================================================
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser


def test_migration_status_returns_defaults_when_no_record(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """When no migration record exists, status should return zeros/nulls."""
    # Under test.
    response = requests.get(
        f"{API_SERVER_URL}/admin/opensearch-migration/status",
        headers=admin_user.headers,
    )

    # Postcondition.
    assert response.status_code == 200
    data = response.json()
    assert data["total_chunks_migrated"] == 0
    assert data["created_at"] is None
    assert data["migration_completed_at"] is None
    assert data["approx_chunk_count_in_vespa"] is None


def test_retrieval_status_returns_false_when_no_record(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """When no migration record exists, retrieval should default to disabled."""
    # Under test.
    response = requests.get(
        f"{API_SERVER_URL}/admin/opensearch-migration/retrieval",
        headers=admin_user.headers,
    )

    # Postcondition.
    assert response.status_code == 200
    data = response.json()
    assert data["enable_opensearch_retrieval"] is False


def test_set_and_get_retrieval_status(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Setting retrieval to True should persist and be readable."""
    # Under test.
    # Enable retrieval.
    response = requests.put(
        f"{API_SERVER_URL}/admin/opensearch-migration/retrieval",
        json={"enable_opensearch_retrieval": True},
        headers=admin_user.headers,
    )

    # Postcondition.
    assert response.status_code == 200
    assert response.json()["enable_opensearch_retrieval"] is True
    # Verify it persisted.
    response = requests.get(
        f"{API_SERVER_URL}/admin/opensearch-migration/retrieval",
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    assert response.json()["enable_opensearch_retrieval"] is True

    # Under test.
    # Disable retrieval.
    response = requests.put(
        f"{API_SERVER_URL}/admin/opensearch-migration/retrieval",
        json={"enable_opensearch_retrieval": False},
        headers=admin_user.headers,
    )

    # Postcondition.
    assert response.status_code == 200
    assert response.json()["enable_opensearch_retrieval"] is False
    # Verify it persisted.
    response = requests.get(
        f"{API_SERVER_URL}/admin/opensearch-migration/retrieval",
        headers=admin_user.headers,
    )
    assert response.status_code == 200
    assert response.json()["enable_opensearch_retrieval"] is False


def test_migration_status_after_record_created(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """After toggling retrieval (which creates the record), status should
    return a valid created_at timestamp."""
    # Precondition.
    # Create the record by setting retrieval.
    requests.put(
        f"{API_SERVER_URL}/admin/opensearch-migration/retrieval",
        json={"enable_opensearch_retrieval": False},
        headers=admin_user.headers,
    )

    # Under test.
    response = requests.get(
        f"{API_SERVER_URL}/admin/opensearch-migration/status",
        headers=admin_user.headers,
    )

    # Postcondition.
    assert response.status_code == 200
    data = response.json()
    assert data["total_chunks_migrated"] == 0
    assert data["created_at"] is not None
    assert data["migration_completed_at"] is None
    assert data["approx_chunk_count_in_vespa"] is None


def test_endpoints_require_admin(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,  # noqa: ARG001
) -> None:
    """Endpoints should reject unauthenticated requests."""
    for url in [
        f"{API_SERVER_URL}/admin/opensearch-migration/status",
        f"{API_SERVER_URL}/admin/opensearch-migration/retrieval",
    ]:
        response = requests.get(url)
        assert response.status_code == 403

    response = requests.put(
        f"{API_SERVER_URL}/admin/opensearch-migration/retrieval",
        json={"enable_opensearch_retrieval": True},
    )
    assert response.status_code == 403


================================================
FILE: backend/tests/integration/tests/pat/test_pat_api.py
================================================
"""
Integration tests for Personal Access Token (PAT) API.

Test Suite:
1. test_pat_lifecycle_happy_path - Complete PAT lifecycle (create, auth, revoke)
2. test_pat_user_isolation_and_authentication - User authentication and multi-user isolation
3. test_pat_expiration_flow - Expiration logic (end-of-day UTC, never-expiring)
4. test_pat_validation_errors - Input validation and error handling
5. test_pat_sorting_and_last_used - Sorting and last_used_at tracking
6. test_pat_role_based_access_control - Admin vs Basic vs Curator permissions
"""

import time
from datetime import datetime
from datetime import timedelta
from datetime import timezone

import requests

from onyx.auth.schemas import UserRole
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.pat import PATManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


def test_pat_lifecycle_happy_path(reset: None) -> None:  # noqa: ARG001
    """Complete PAT lifecycle: create, authenticate, revoke."""
    user: DATestUser = UserManager.create(name="pat_user")

    # Create PAT
    pat = PATManager.create(
        name="My Integration Token",
        expiration_days=30,
        user_performing_action=user,
    )

    assert pat.id is not None
    assert pat.name == "My Integration Token"
    assert pat.token is not None  # Raw token only returned on creation
    assert pat.token_display is not None
    assert pat.created_at is not None
    assert pat.expires_at is not None

    assert pat.token.startswith("onyx_pat_")
    assert len(pat.token) > 20

    assert "****" in pat.token_display
    assert pat.token_display.startswith("onyx_pat_")

    # List PATs
    tokens = PATManager.list(user)
    assert len(tokens) == 1
    assert tokens[0].id == pat.id
    assert tokens[0].name == "My Integration Token"
    assert tokens[0].token_display == pat.token_display
    assert tokens[0].token is None

    # Authenticate with PAT
    auth_response = PATManager.authenticate(pat.token)
    assert auth_response.status_code == 200
    me_data = auth_response.json()
    assert me_data["email"] == user.email
    assert me_data["id"] == user.id

    # Revoke PAT
    PATManager.revoke(pat.id, user)

    # Verify revoked token fails authentication
    revoked_auth_response = PATManager.authenticate(pat.token)
    assert revoked_auth_response.status_code == 403  # Revoked token returns 403

    # Verify token is no longer listed
    tokens_after_revoke = PATManager.list(user)
    assert len(tokens_after_revoke) == 0


def test_pat_user_isolation_and_authentication(
    reset: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """
    PATs authenticate as real users, and users can only see/manage their own tokens.
    """
    user_a: DATestUser = UserManager.create(name="user_a")
    user_b: DATestUser = UserManager.create(name="user_b")

    # Create tokens for both users
    user_a_pats = []
    for i in range(2):
        pat = PATManager.create(
            name=f"User A Token {i + 1}",
            expiration_days=30,
            user_performing_action=user_a,
        )
        user_a_pats.append(pat)

    user_b_pats = []
    for i in range(2):
        pat = PATManager.create(
            name=f"User B Token {i + 1}",
            expiration_days=30,
            user_performing_action=user_b,
        )
        user_b_pats.append(pat)

    # Verify PATs authenticate as the correct users
    for user, pat in [(user_a, user_a_pats[0]), (user_b, user_b_pats[0])]:
        assert pat.token is not None
        me_response = PATManager.authenticate(pat.token)
        assert me_response.status_code == 200
        me_data = me_response.json()
        assert me_data["email"] == user.email
        assert me_data["id"] == user.id

    # Verify each user only sees their own tokens
    user_a_list = PATManager.list(user_a)
    assert len(user_a_list) == 2

    user_b_list = PATManager.list(user_b)
    assert len(user_b_list) == 2

    # Verify user A cannot delete user B's token using their PAT
    assert user_a_pats[0].token is not None
    delete_response = requests.delete(
        f"{API_SERVER_URL}/user/pats/{user_b_pats[0].id}",
        headers=PATManager.get_auth_headers(user_a_pats[0].token),
        timeout=60,
    )
    assert delete_response.status_code == 404

    # Verify user B's token still exists
    user_b_list_after = PATManager.list(user_b)
    assert len(user_b_list_after) == 2

    # Verify deleting non-existent token returns 404
    delete_fake = requests.delete(
        f"{API_SERVER_URL}/user/pats/999999",
        headers=user_a.headers,
        timeout=60,
    )
    assert delete_fake.status_code == 404


def test_pat_expiration_flow(reset: None) -> None:  # noqa: ARG001
    """Expiration timestamp is end-of-day (23:59:59 UTC); never-expiring tokens work; revoked tokens fail."""
    user: DATestUser = UserManager.create(name="expiration_user")

    # Create expiring token
    pat = PATManager.create(
        name="Expiring Token",
        expiration_days=7,
        user_performing_action=user,
    )

    assert pat.expires_at is not None
    expires_at = datetime.fromisoformat(pat.expires_at.replace("Z", "+00:00"))

    # Verify end-of-day expiration
    assert expires_at.hour == 23
    assert expires_at.minute == 59
    assert expires_at.second == 59

    # Calculate expected end-of-day 7 days from now
    now = datetime.now(timezone.utc)
    expected_date = (now + timedelta(days=7)).date()
    expected_expiry = datetime.combine(expected_date, datetime.max.time()).replace(
        tzinfo=timezone.utc
    )
    # Allow for small timing differences (within a day)
    assert abs((expires_at - expected_expiry).total_seconds()) < 86400  # 1 day

    # Create never-expiring token
    never_expiring_pat = PATManager.create(
        name="Never Expiring Token",
        expiration_days=None,
        user_performing_action=user,
    )
    assert never_expiring_pat.expires_at is None

    # Verify never-expiring token works
    assert never_expiring_pat.token is not None
    auth_response = PATManager.authenticate(never_expiring_pat.token)
    assert auth_response.status_code == 200

    # Revoke the never-expiring token
    PATManager.revoke(never_expiring_pat.id, user)

    # Verify revoked token fails (token var still holds the revoked value)
    revoked_auth_response = PATManager.authenticate(never_expiring_pat.token)
    assert revoked_auth_response.status_code == 403


def test_pat_validation_errors(reset: None) -> None:  # noqa: ARG001
    """Validate input errors: empty name, name too long, negative/zero expiration."""
    user: DATestUser = UserManager.create(name="validation_user")

    # Empty name should fail
    empty_name_response = requests.post(
        f"{API_SERVER_URL}/user/pats",
        json={"name": "", "expiration_days": 30},
        headers=user.headers,
        timeout=60,
    )
    assert empty_name_response.status_code == 422

    # Name too long should fail
    long_name = "a" * 101
    long_name_response = requests.post(
        f"{API_SERVER_URL}/user/pats",
        json={"name": long_name, "expiration_days": 30},
        headers=user.headers,
        timeout=60,
    )
    assert long_name_response.status_code == 422

    # Negative expiration should fail
    negative_exp_response = requests.post(
        f"{API_SERVER_URL}/user/pats",
        json={"name": "Test Token", "expiration_days": -1},
        headers=user.headers,
        timeout=60,
    )
    assert negative_exp_response.status_code == 422

    # Zero expiration should fail
    zero_exp_response = requests.post(
        f"{API_SERVER_URL}/user/pats",
        json={"name": "Test Token", "expiration_days": 0},
        headers=user.headers,
        timeout=60,
    )
    assert zero_exp_response.status_code == 422

    # Max length name (100 chars) should succeed
    valid_name = "a" * 100
    valid_pat = PATManager.create(
        name=valid_name,
        expiration_days=7,
        user_performing_action=user,
    )
    assert valid_pat.id is not None

    # Missing name should fail
    missing_name_response = requests.post(
        f"{API_SERVER_URL}/user/pats",
        json={"expiration_days": 30},
        headers=user.headers,
        timeout=60,
    )
    assert missing_name_response.status_code == 422


def test_pat_sorting_and_last_used(reset: None) -> None:  # noqa: ARG001
    """PATs are sorted by created_at DESC; last_used_at updates after authentication."""
    user: DATestUser = UserManager.create(name="sorting_user")

    # Create tokens with small delays to ensure different timestamps
    token1 = PATManager.create(
        name="First Token",
        expiration_days=30,
        user_performing_action=user,
    )

    time.sleep(0.1)

    PATManager.create(
        name="Second Token",
        expiration_days=30,
        user_performing_action=user,
    )

    time.sleep(0.1)

    PATManager.create(
        name="Third Token",
        expiration_days=30,
        user_performing_action=user,
    )

    # Verify sorted by created_at DESC (newest first)
    tokens = PATManager.list(user)
    assert len(tokens) == 3

    assert tokens[0].name == "Third Token"
    assert tokens[1].name == "Second Token"
    assert tokens[2].name == "First Token"

    # Verify all tokens have no last_used_at initially
    for token in tokens:
        assert token.last_used_at is None

    # Use the first token (oldest)
    assert token1.token is not None
    auth_response = PATManager.authenticate(token1.token)
    assert auth_response.status_code == 200

    time.sleep(0.5)

    # Verify last_used_at is updated for the used token only
    tokens_after_use = PATManager.list(user)

    token1_after_use = next(t for t in tokens_after_use if t.name == "First Token")
    assert token1_after_use.last_used_at is not None

    token2_after_use = next(t for t in tokens_after_use if t.name == "Second Token")
    token3_after_use = next(t for t in tokens_after_use if t.name == "Third Token")
    assert token2_after_use.last_used_at is None
    assert token3_after_use.last_used_at is None


def test_pat_role_based_access_control(reset: None) -> None:  # noqa: ARG001
    """
    PATs inherit user roles and permissions:
    - Admin PAT: Full access to admin-only endpoints
    - Curator/Global Curator PATs: Access to management endpoints
    - Basic PAT: Denied access to admin and management endpoints
    """
    # Create users with different roles
    admin_user: DATestUser = UserManager.create(name="admin_user")
    assert admin_user.role == UserRole.ADMIN

    basic_user: DATestUser = UserManager.create(name="basic_user")
    assert basic_user.role == UserRole.BASIC

    curator_user: DATestUser = UserManager.create(name="curator_user")
    curator_user = UserManager.set_role(
        user_to_set=curator_user,
        target_role=UserRole.CURATOR,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert curator_user.role == UserRole.CURATOR

    global_curator_user: DATestUser = UserManager.create(name="global_curator_user")
    global_curator_user = UserManager.set_role(
        user_to_set=global_curator_user,
        target_role=UserRole.GLOBAL_CURATOR,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert global_curator_user.role == UserRole.GLOBAL_CURATOR

    # Create PATs for each user
    admin_pat = PATManager.create(
        name="Admin Token",
        expiration_days=7,
        user_performing_action=admin_user,
    )

    basic_pat = PATManager.create(
        name="Basic Token",
        expiration_days=7,
        user_performing_action=basic_user,
    )

    curator_pat = PATManager.create(
        name="Curator Token",
        expiration_days=7,
        user_performing_action=curator_user,
    )

    global_curator_pat = PATManager.create(
        name="Global Curator Token",
        expiration_days=7,
        user_performing_action=global_curator_user,
    )

    # Verify all tokens are present (type narrowing for mypy)
    assert admin_pat.token is not None
    assert basic_pat.token is not None
    assert curator_pat.token is not None
    assert global_curator_pat.token is not None

    # Test admin-only endpoint access
    print("\n[Test] Admin PAT accessing admin-only endpoint...")
    admin_endpoint_response = requests.get(
        f"{API_SERVER_URL}/admin/api-key",
        headers=PATManager.get_auth_headers(admin_pat.token),
        timeout=60,
    )
    assert admin_endpoint_response.status_code == 200
    print("[✓] Admin PAT successfully accessed /admin/api-key")

    print("\n[Test] Basic PAT accessing admin endpoint...")
    basic_admin_response = requests.get(
        f"{API_SERVER_URL}/admin/api-key",
        headers=PATManager.get_auth_headers(basic_pat.token),
        timeout=60,
    )
    assert basic_admin_response.status_code == 403
    print("[✓] Basic PAT correctly denied access (403) to /admin/api-key")

    print("\n[Test] Curator PAT accessing admin-only endpoint...")
    curator_admin_response = requests.get(
        f"{API_SERVER_URL}/admin/api-key",
        headers=PATManager.get_auth_headers(curator_pat.token),
        timeout=60,
    )
    assert curator_admin_response.status_code == 403
    print("[✓] Curator PAT correctly denied access (403) to /admin/api-key")

    print("\n[Test] Global Curator PAT accessing admin-only endpoint...")
    global_curator_admin_response = requests.get(
        f"{API_SERVER_URL}/admin/api-key",
        headers=PATManager.get_auth_headers(global_curator_pat.token),
        timeout=60,
    )
    assert global_curator_admin_response.status_code == 403
    print("[✓] Global Curator PAT correctly denied access (403) to /admin/api-key")

    # Test management endpoint access
    print("\n[Test] Testing management endpoint access for curators...")

    admin_manage_response = requests.get(
        f"{API_SERVER_URL}/manage/admin/connector",
        headers=PATManager.get_auth_headers(admin_pat.token),
        timeout=60,
    )
    assert admin_manage_response.status_code == 200
    print("[✓] Admin PAT can access /manage/admin/connector")

    curator_manage_response = requests.get(
        f"{API_SERVER_URL}/manage/admin/connector",
        headers=PATManager.get_auth_headers(curator_pat.token),
        timeout=60,
    )
    assert curator_manage_response.status_code == 200
    print("[✓] Curator PAT can access /manage/admin/connector")

    global_curator_manage_response = requests.get(
        f"{API_SERVER_URL}/manage/admin/connector",
        headers=PATManager.get_auth_headers(global_curator_pat.token),
        timeout=60,
    )
    assert global_curator_manage_response.status_code == 200
    print("[✓] Global Curator PAT can access /manage/admin/connector")

    basic_manage_response = requests.get(
        f"{API_SERVER_URL}/manage/admin/connector",
        headers=PATManager.get_auth_headers(basic_pat.token),
        timeout=60,
    )
    assert basic_manage_response.status_code in [403, 401]
    print(
        f"[✓] Basic PAT correctly denied access ({basic_manage_response.status_code}) to /manage/admin/connector"
    )

    # Verify PATs authenticate with correct identity and role
    print("\n[Test] Verifying PATs authenticate as correct users with correct roles...")

    admin_me = PATManager.authenticate(admin_pat.token)
    assert admin_me.status_code == 200
    assert admin_me.json()["email"] == admin_user.email
    assert admin_me.json()["role"] == UserRole.ADMIN.value

    basic_me = PATManager.authenticate(basic_pat.token)
    assert basic_me.status_code == 200
    assert basic_me.json()["email"] == basic_user.email
    assert basic_me.json()["role"] == UserRole.BASIC.value

    curator_me = PATManager.authenticate(curator_pat.token)
    assert curator_me.status_code == 200
    assert curator_me.json()["email"] == curator_user.email
    assert curator_me.json()["role"] == UserRole.CURATOR.value

    global_curator_me = PATManager.authenticate(global_curator_pat.token)
    assert global_curator_me.status_code == 200
    assert global_curator_me.json()["email"] == global_curator_user.email
    assert global_curator_me.json()["role"] == UserRole.GLOBAL_CURATOR.value

    print("[✓] All PATs authenticate with correct user identity and role")

    # Verify all PATs can access basic endpoints
    print("\n[Test] All PATs can access basic endpoints...")
    for pat, user_name in [
        (admin_pat, "Admin"),
        (basic_pat, "Basic"),
        (curator_pat, "Curator"),
        (global_curator_pat, "Global Curator"),
    ]:
        assert pat.token is not None
        persona_response = requests.get(
            f"{API_SERVER_URL}/persona",
            headers=PATManager.get_auth_headers(pat.token),
            timeout=60,
        )
        assert persona_response.status_code == 200
        print(f"[✓] {user_name} PAT can access /persona endpoint")

    print("\n[✓] All role-based access control tests passed!")
    print("Summary:")
    print(
        "  - Admin PAT: Full access to admin-only endpoints (/admin/*, /manage/admin/*)"
    )
    print(
        "  - Curator PAT: Access to management endpoints (/manage/admin/*), denied on admin-only (/admin/*)"
    )
    print(
        "  - Global Curator PAT: Access to management endpoints (/manage/admin/*), denied on admin-only (/admin/*)"
    )
    print("  - Basic PAT: Denied access to admin and management endpoints")
    print("  - All PATs: Can access basic endpoints (/persona, /me, etc.)")
    print("  - All PATs: Authenticate with correct user identity and role")


================================================
FILE: backend/tests/integration/tests/permissions/test_auth_permission_propagation.py
================================================
"""Integration tests for permission propagation across auth-triggered group changes.

These tests verify that effective permissions (via /me/permissions) actually
propagate when users are added/removed from default groups through role changes.
Custom permission grant tests will be added once the permission grant API is built.
"""

import os

import pytest

from onyx.auth.schemas import UserRole
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestUser


def _get_basic_group_member_emails(admin_user: DATestUser) -> set[str]:
    all_groups = UserGroupManager.get_all(admin_user, include_default=True)
    basic_group = next(
        (g for g in all_groups if g.is_default and g.name == "Basic"), None
    )
    assert basic_group is not None, "Basic default group not found"
    return {u.email for u in basic_group.users}


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission propagation tests require enterprise features",
)
def test_basic_permission_granted_on_registration(
    reset: None,  # noqa: ARG001
) -> None:
    """New users should get 'basic' permission through default group assignment."""
    admin_user: DATestUser = UserManager.create(email="admin@example.com")
    basic_user: DATestUser = UserManager.create(email="basic@example.com")

    # Admin should have permissions from Admin group
    admin_perms = UserManager.get_permissions(admin_user)
    assert "basic" in admin_perms

    # Basic user should have 'basic' from Basic default group
    basic_perms = UserManager.get_permissions(basic_user)
    assert "basic" in basic_perms

    # Verify group membership matches
    assert basic_user.email in _get_basic_group_member_emails(admin_user)


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permission propagation tests require enterprise features",
)
def test_role_downgrade_removes_basic_group_and_permission(
    reset: None,  # noqa: ARG001
) -> None:
    """Downgrading to EXT_PERM_USER or SLACK_USER should remove from Basic group."""
    admin_user: DATestUser = UserManager.create(email="admin@example.com")

    # --- EXT_PERM_USER ---
    ext_user: DATestUser = UserManager.create(email="ext@example.com")
    assert ext_user.email in _get_basic_group_member_emails(admin_user)

    UserManager.set_role(
        user_to_set=ext_user,
        target_role=UserRole.EXT_PERM_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert ext_user.email not in _get_basic_group_member_emails(admin_user)

    # --- SLACK_USER ---
    slack_user: DATestUser = UserManager.create(email="slack@example.com")
    assert slack_user.email in _get_basic_group_member_emails(admin_user)

    UserManager.set_role(
        user_to_set=slack_user,
        target_role=UserRole.SLACK_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )
    assert slack_user.email not in _get_basic_group_member_emails(admin_user)


================================================
FILE: backend/tests/integration/tests/permissions/test_cc_pair_permissions.py
================================================
"""
This file takes the happy path to adding a curator to a user group and then tests
the permissions of the curator manipulating connector-credential pairs.
"""

import os

import pytest
from onyx_openapi_client.exceptions import ApiException  # type: ignore[import-untyped,unused-ignore,import-not-found]

from onyx.db.enums import AccessType
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.user import DATestUser
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Curator and User Group tests are enterprise only",
)
def test_cc_pair_permissions(reset: None) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # Creating a curator
    curator: DATestUser = UserManager.create(name="curator")

    # Creating a user group
    user_group_1 = UserGroupManager.create(
        name="curated_user_group",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )
    # setting the user as a curator for the user group
    UserGroupManager.set_curator_status(
        test_user_group=user_group_1,
        user_to_set_as_curator=curator,
        user_performing_action=admin_user,
    )

    # Creating another user group that the user is not a curator of
    user_group_2 = UserGroupManager.create(
        name="uncurated_user_group",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )

    connector_1 = ConnectorManager.create(
        name="admin_owned_connector",
        source=DocumentSource.CONFLUENCE,
        groups=[user_group_1.id],
        access_type=AccessType.PRIVATE,
        user_performing_action=admin_user,
    )
    # currently we dont enforce permissions at the connector level
    # pending cc_pair -> connector rework
    # connector_2 = ConnectorManager.create(
    #     name="curator_visible_connector",
    #     source=DocumentSource.CONFLUENCE,
    #     groups=[user_group_2.id],
    #     is_public=False,
    #     user_performing_action=admin_user,
    # )
    # Create a credentials that the curator is and is not curator of
    credential_1 = CredentialManager.create(
        name="curator_owned_credential",
        source=DocumentSource.CONFLUENCE,
        groups=[user_group_1.id],
        curator_public=False,
        user_performing_action=admin_user,
    )
    credential_2 = CredentialManager.create(
        name="curator_visible_credential",
        source=DocumentSource.CONFLUENCE,
        groups=[user_group_2.id],
        curator_public=False,
        user_performing_action=admin_user,
    )

    # END OF HAPPY PATH

    """Tests for things Curators should not be able to do"""

    # Curators should not be able to create a cc
    # pair for a user group they are not a curator of
    with pytest.raises(ApiException):
        CCPairManager.create(
            connector_id=connector_1.id,
            credential_id=credential_1.id,
            name="invalid_cc_pair_2",
            access_type=AccessType.PRIVATE,
            groups=[user_group_1.id, user_group_2.id],
            user_performing_action=curator,
        )

    # Curators should not be able to create a cc
    # pair without an attached user group
    with pytest.raises(ApiException):
        CCPairManager.create(
            connector_id=connector_1.id,
            credential_id=credential_1.id,
            name="invalid_cc_pair_2",
            access_type=AccessType.PRIVATE,
            groups=[],
            user_performing_action=curator,
        )

    # # This test is currently disabled because permissions are
    # # not enforced at the connector level
    # # Curators should not be able to create a cc pair
    # # for a user group that the connector does not belong to (NOT WORKING)
    # with pytest.raises(HTTPError):
    #     CCPairManager.create(
    #         connector_id=connector_2.id,
    #         credential_id=credential_1.id,
    #         name="invalid_cc_pair_3",
    #         access_type=AccessType.PRIVATE,
    #         groups=[user_group_1.id],
    #         user_performing_action=curator,
    #     )

    # Curators should not be able to create a cc
    # pair for a user group that the credential does not belong to
    with pytest.raises(ApiException):
        CCPairManager.create(
            connector_id=connector_1.id,
            credential_id=credential_2.id,
            name="invalid_cc_pair_4",
            access_type=AccessType.PRIVATE,
            groups=[user_group_1.id],
            user_performing_action=curator,
        )

    """Tests for things Curators should be able to do"""

    # Re-create connector since the credential_2 validation error above
    # triggers connector deletion in the exception handler
    connector_1 = ConnectorManager.create(
        name="admin_owned_connector_2",
        source=DocumentSource.CONFLUENCE,
        groups=[user_group_1.id],
        access_type=AccessType.PRIVATE,
        user_performing_action=admin_user,
    )

    # Curators should be able to create a private
    # cc pair for a user group they are a curator of
    valid_cc_pair = CCPairManager.create(
        name="valid_cc_pair",
        connector_id=connector_1.id,
        credential_id=credential_1.id,
        access_type=AccessType.PRIVATE,
        groups=[user_group_1.id],
        user_performing_action=curator,
    )

    # Verify the created cc pair
    CCPairManager.verify(
        cc_pair=valid_cc_pair,
        user_performing_action=curator,
    )

    # Test pausing the cc pair
    CCPairManager.pause_cc_pair(valid_cc_pair, user_performing_action=curator)

    # Test deleting the cc pair
    CCPairManager.delete(valid_cc_pair, user_performing_action=curator)
    CCPairManager.wait_for_deletion_completion(
        cc_pair_id=valid_cc_pair.id, user_performing_action=curator
    )

    CCPairManager.verify(
        cc_pair=valid_cc_pair,
        verify_deleted=True,
        user_performing_action=curator,
    )


================================================
FILE: backend/tests/integration/tests/permissions/test_connector_permissions.py
================================================
"""
This file takes the happy path to adding a curator to a user group and then tests
the permissions of the curator manipulating connectors.
"""

import os

import pytest
from requests.exceptions import HTTPError

from onyx.db.enums import AccessType
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.user import DATestUser
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Curator and user group tests are enterprise only",
)
def test_connector_permissions(reset: None) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # Creating a curator
    curator: DATestUser = UserManager.create(name="curator")

    # Creating a user group
    user_group_1 = UserGroupManager.create(
        name="user_group_1",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )
    # setting the user as a curator for the user group
    UserGroupManager.set_curator_status(
        test_user_group=user_group_1,
        user_to_set_as_curator=curator,
        user_performing_action=admin_user,
    )

    # Creating another user group that the user is not a curator of
    user_group_2 = UserGroupManager.create(
        name="user_group_2",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )

    # END OF HAPPY PATH

    """Tests for things Curators should not be able to do"""

    # Curators should not be able to create a connector for a
    # user group they are not a curator of
    with pytest.raises(HTTPError):
        ConnectorManager.create(
            name="invalid_connector_2",
            source=DocumentSource.CONFLUENCE,
            groups=[user_group_1.id, user_group_2.id],
            access_type=AccessType.PRIVATE,
            user_performing_action=curator,
        )

    """Tests for things Curators should be able to do"""

    # Curators should be able to create a private
    # connector for a user group they are a curator of
    valid_connector = ConnectorManager.create(
        name="valid_connector",
        source=DocumentSource.CONFLUENCE,
        groups=[user_group_1.id],
        access_type=AccessType.PRIVATE,
        user_performing_action=curator,
    )
    assert valid_connector.id is not None

    # Verify the created connector
    created_connector = ConnectorManager.get(
        valid_connector.id, user_performing_action=curator
    )
    assert created_connector.name == valid_connector.name
    assert created_connector.source == valid_connector.source

    # Verify that the connector can be found in the list of all connectors
    all_connectors = ConnectorManager.get_all(user_performing_action=curator)
    assert any(conn.id == valid_connector.id for conn in all_connectors)

    # Test editing the connector
    valid_connector.name = "updated_valid_connector"
    ConnectorManager.edit(valid_connector, user_performing_action=curator)

    # Verify the edit
    updated_connector = ConnectorManager.get(
        valid_connector.id, user_performing_action=curator
    )
    assert updated_connector.name == "updated_valid_connector"

    # Test deleting the connector
    ConnectorManager.delete(connector=valid_connector, user_performing_action=curator)

    # Verify the deletion
    all_connectors_after_delete = ConnectorManager.get_all(
        user_performing_action=curator
    )
    assert all(conn.id != valid_connector.id for conn in all_connectors_after_delete)

    # Test that curator cannot create a connector for a group they are not a curator of
    with pytest.raises(HTTPError):
        ConnectorManager.create(
            name="invalid_connector_3",
            source=DocumentSource.CONFLUENCE,
            groups=[user_group_2.id],
            access_type=AccessType.PRIVATE,
            user_performing_action=curator,
        )

    # Curators should be able to create a public connector
    public_connector = ConnectorManager.create(
        name="curator_public_connector",
        source=DocumentSource.CONFLUENCE,
        groups=[user_group_1.id],
        access_type=AccessType.PUBLIC,
        user_performing_action=curator,
    )
    assert public_connector.id is not None


================================================
FILE: backend/tests/integration/tests/permissions/test_credential_permissions.py
================================================
"""
This file takes the happy path to adding a curator to a user group and then tests
the permissions of the curator manipulating credentials.
"""

import os

import pytest
from requests.exceptions import HTTPError

from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.user import DATestUser
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Curator and user group tests are enterprise only",
)
def test_credential_permissions(reset: None) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # Creating a curator
    curator: DATestUser = UserManager.create(name="curator")

    # Creating a user group
    user_group_1 = UserGroupManager.create(
        name="user_group_1",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )
    # setting the user as a curator for the user group
    UserGroupManager.set_curator_status(
        test_user_group=user_group_1,
        user_to_set_as_curator=curator,
        user_performing_action=admin_user,
    )

    # Creating another user group that the user is not a curator of
    user_group_2 = UserGroupManager.create(
        name="user_group_2",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )

    # END OF HAPPY PATH

    """Tests for things Curators should not be able to do"""

    # Curators should not be able to create a credential for a user group they are not a curator of
    with pytest.raises(HTTPError):
        CredentialManager.create(
            name="invalid_credential_2",
            source=DocumentSource.CONFLUENCE,
            groups=[user_group_1.id, user_group_2.id],
            curator_public=False,
            user_performing_action=curator,
        )

    """Tests for things Curators should be able to do"""
    # Curators should be able to create a private credential for a user group they are a curator of
    valid_credential = CredentialManager.create(
        name="valid_credential",
        source=DocumentSource.CONFLUENCE,
        groups=[user_group_1.id],
        curator_public=False,
        user_performing_action=curator,
    )

    # Verify the created credential
    CredentialManager.verify(
        credential=valid_credential,
        user_performing_action=curator,
    )

    # Test editing the credential
    valid_credential.name = "updated_valid_credential"
    CredentialManager.edit(valid_credential, user_performing_action=curator)

    # Verify the edit
    CredentialManager.verify(
        credential=valid_credential,
        user_performing_action=curator,
    )

    # Test deleting the credential
    CredentialManager.delete(valid_credential, user_performing_action=curator)

    # Verify the deletion
    CredentialManager.verify(
        credential=valid_credential,
        verify_deleted=True,
        user_performing_action=curator,
    )

    # Curators should be able to create a public credential
    public_credential = CredentialManager.create(
        name="curator_public_credential",
        source=DocumentSource.CONFLUENCE,
        groups=[user_group_1.id],
        curator_public=True,
        user_performing_action=curator,
    )
    CredentialManager.verify(
        credential=public_credential,
        user_performing_action=curator,
    )


================================================
FILE: backend/tests/integration/tests/permissions/test_doc_set_permissions.py
================================================
import os

import pytest
from requests.exceptions import HTTPError

from onyx.db.enums import AccessType
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document_set import DocumentSetManager
from tests.integration.common_utils.managers.user import DATestUser
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Curator and user group tests are enterprise only",
)
def test_doc_set_permissions_setup(reset: None) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # Creating a second user (curator)
    curator: DATestUser = UserManager.create(name="curator")

    # Creating the first user group
    user_group_1 = UserGroupManager.create(
        name="curated_user_group",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )

    # Setting the curator as a curator for the first user group
    UserGroupManager.set_curator_status(
        test_user_group=user_group_1,
        user_to_set_as_curator=curator,
        user_performing_action=admin_user,
    )

    # Creating a second user group
    user_group_2 = UserGroupManager.create(
        name="uncurated_user_group",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )

    # Admin creates a cc_pair
    private_cc_pair = CCPairManager.create_from_scratch(
        access_type=AccessType.PRIVATE,
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    # Admin creates a public cc_pair
    public_cc_pair = CCPairManager.create_from_scratch(
        access_type=AccessType.PUBLIC,
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    # END OF HAPPY PATH

    """Tests for things Curators/Admins should not be able to do"""

    # Test that curator cannot create a non-public document set for the group they don't curate
    with pytest.raises(HTTPError):
        DocumentSetManager.create(
            name="Invalid Document Set 1",
            is_public=False,
            groups=[user_group_2.id],
            cc_pair_ids=[public_cc_pair.id],
            user_performing_action=curator,
        )

    # Test that curator cannot create a document set attached to both groups
    with pytest.raises(HTTPError):
        DocumentSetManager.create(
            name="Invalid Document Set 2",
            is_public=False,
            cc_pair_ids=[public_cc_pair.id],
            groups=[user_group_1.id, user_group_2.id],
            user_performing_action=curator,
        )

    # Test that curator cannot create a document set with no groups
    with pytest.raises(HTTPError):
        DocumentSetManager.create(
            name="Invalid Document Set 3",
            is_public=False,
            cc_pair_ids=[public_cc_pair.id],
            groups=[],
            user_performing_action=curator,
        )

    # Test that curator cannot create a document set with no cc_pairs
    with pytest.raises(HTTPError):
        DocumentSetManager.create(
            name="Invalid Document Set 4",
            is_public=False,
            cc_pair_ids=[],
            groups=[user_group_1.id],
            user_performing_action=curator,
        )

    # Test that admin cannot create a document set with no cc_pairs
    with pytest.raises(HTTPError):
        DocumentSetManager.create(
            name="Invalid Document Set 4",
            is_public=False,
            cc_pair_ids=[],
            groups=[user_group_1.id],
            user_performing_action=admin_user,
        )

    """Tests for things Curators should be able to do"""
    # Test that curator can create a document set for the group they curate
    valid_doc_set = DocumentSetManager.create(
        name="Valid Document Set",
        is_public=False,
        cc_pair_ids=[public_cc_pair.id],
        groups=[user_group_1.id],
        user_performing_action=curator,
    )

    DocumentSetManager.wait_for_sync(
        document_sets_to_check=[valid_doc_set], user_performing_action=admin_user
    )

    # Verify that the valid document set was created
    DocumentSetManager.verify(
        document_set=valid_doc_set,
        user_performing_action=admin_user,
    )

    # Verify that only one document set exists
    all_doc_sets = DocumentSetManager.get_all(user_performing_action=admin_user)
    assert len(all_doc_sets) == 1

    # Add the private_cc_pair to the doc set on our end for later comparison
    valid_doc_set.cc_pair_ids.append(private_cc_pair.id)

    # Confirm the curator can't add the private_cc_pair to the doc set
    with pytest.raises(HTTPError):
        DocumentSetManager.edit(
            document_set=valid_doc_set,
            user_performing_action=curator,
        )
    # Confirm the admin can't add the private_cc_pair to the doc set
    with pytest.raises(HTTPError):
        DocumentSetManager.edit(
            document_set=valid_doc_set,
            user_performing_action=admin_user,
        )

    # Verify the document set has not been updated in the db
    with pytest.raises(ValueError):
        DocumentSetManager.verify(
            document_set=valid_doc_set,
            user_performing_action=admin_user,
        )

    # Add the private_cc_pair to the user group on our end for later comparison
    user_group_1.cc_pair_ids.append(private_cc_pair.id)

    # Admin adds the cc_pair to the group the curator curates
    UserGroupManager.edit(
        user_group=user_group_1,
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )
    UserGroupManager.verify(
        user_group=user_group_1,
        user_performing_action=admin_user,
    )

    # Confirm the curator can now add the cc_pair to the doc set
    DocumentSetManager.edit(
        document_set=valid_doc_set,
        user_performing_action=curator,
    )
    DocumentSetManager.wait_for_sync(
        document_sets_to_check=[valid_doc_set], user_performing_action=admin_user
    )
    # Verify the updated document set
    DocumentSetManager.verify(
        document_set=valid_doc_set,
        user_performing_action=admin_user,
    )


================================================
FILE: backend/tests/integration/tests/permissions/test_file_connector_permissions.py
================================================
import io
import json
import os

import pytest
import requests

from onyx.db.enums import AccessType
from onyx.db.models import UserRole
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.user import DATestUser
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager


def _upload_connector_file(
    *,
    user_performing_action: DATestUser,
    file_name: str,
    content: bytes,
) -> tuple[str, str]:
    headers = user_performing_action.headers.copy()
    headers.pop("Content-Type", None)

    response = requests.post(
        f"{API_SERVER_URL}/manage/admin/connector/file/upload",
        files=[("files", (file_name, io.BytesIO(content), "text/plain"))],
        headers=headers,
    )
    response.raise_for_status()
    payload = response.json()
    return payload["file_paths"][0], payload["file_names"][0]


def _update_connector_files(
    *,
    connector_id: int,
    user_performing_action: DATestUser,
    file_ids_to_remove: list[str],
    new_file_name: str,
    new_file_content: bytes,
) -> requests.Response:
    headers = user_performing_action.headers.copy()
    headers.pop("Content-Type", None)

    return requests.post(
        f"{API_SERVER_URL}/manage/admin/connector/{connector_id}/files/update",
        data={"file_ids_to_remove": json.dumps(file_ids_to_remove)},
        files=[("files", (new_file_name, io.BytesIO(new_file_content), "text/plain"))],
        headers=headers,
    )


def _list_connector_files(
    *,
    connector_id: int,
    user_performing_action: DATestUser,
) -> requests.Response:
    return requests.get(
        f"{API_SERVER_URL}/manage/admin/connector/{connector_id}/files",
        headers=user_performing_action.headers,
    )


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Curator and user group tests are enterprise only",
)
@pytest.mark.usefixtures("reset")
def test_only_global_curator_can_update_public_file_connector_files() -> None:
    admin_user = UserManager.create(name="admin_user")

    global_curator_creator = UserManager.create(name="global_curator_creator")
    global_curator_creator = UserManager.set_role(
        user_to_set=global_curator_creator,
        target_role=UserRole.GLOBAL_CURATOR,
        user_performing_action=admin_user,
    )

    global_curator_editor = UserManager.create(name="global_curator_editor")
    global_curator_editor = UserManager.set_role(
        user_to_set=global_curator_editor,
        target_role=UserRole.GLOBAL_CURATOR,
        user_performing_action=admin_user,
    )

    curator_user = UserManager.create(name="curator_user")
    curator_group = UserGroupManager.create(
        name="curator_group",
        user_ids=[curator_user.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[curator_group],
        user_performing_action=admin_user,
    )
    UserGroupManager.set_curator_status(
        test_user_group=curator_group,
        user_to_set_as_curator=curator_user,
        user_performing_action=admin_user,
    )

    initial_file_id, initial_file_name = _upload_connector_file(
        user_performing_action=global_curator_creator,
        file_name="initial-file.txt",
        content=b"initial file content",
    )

    connector = ConnectorManager.create(
        user_performing_action=global_curator_creator,
        name="public_file_connector",
        source=DocumentSource.FILE,
        connector_specific_config={
            "file_locations": [initial_file_id],
            "file_names": [initial_file_name],
            "zip_metadata_file_id": None,
        },
        access_type=AccessType.PUBLIC,
        groups=[],
    )
    credential = CredentialManager.create(
        user_performing_action=global_curator_creator,
        source=DocumentSource.FILE,
        curator_public=True,
        groups=[],
        name="public_file_connector_credential",
    )
    CCPairManager.create(
        connector_id=connector.id,
        credential_id=credential.id,
        user_performing_action=global_curator_creator,
        access_type=AccessType.PUBLIC,
        groups=[],
        name="public_file_connector_cc_pair",
    )

    curator_list_response = _list_connector_files(
        connector_id=connector.id,
        user_performing_action=curator_user,
    )
    curator_list_response.raise_for_status()
    curator_list_payload = curator_list_response.json()
    assert any(f["file_id"] == initial_file_id for f in curator_list_payload["files"])

    global_curator_list_response = _list_connector_files(
        connector_id=connector.id,
        user_performing_action=global_curator_editor,
    )
    global_curator_list_response.raise_for_status()
    global_curator_list_payload = global_curator_list_response.json()
    assert any(
        f["file_id"] == initial_file_id for f in global_curator_list_payload["files"]
    )

    denied_response = _update_connector_files(
        connector_id=connector.id,
        user_performing_action=curator_user,
        file_ids_to_remove=[initial_file_id],
        new_file_name="curator-file.txt",
        new_file_content=b"curator updated file",
    )
    assert denied_response.status_code == 403

    allowed_response = _update_connector_files(
        connector_id=connector.id,
        user_performing_action=global_curator_editor,
        file_ids_to_remove=[initial_file_id],
        new_file_name="global-curator-file.txt",
        new_file_content=b"global curator updated file",
    )
    allowed_response.raise_for_status()

    payload = allowed_response.json()
    assert initial_file_id not in payload["file_paths"]
    assert "global-curator-file.txt" in payload["file_names"]

    creator_group = UserGroupManager.create(
        name="creator_group",
        user_ids=[global_curator_creator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[creator_group],
        user_performing_action=admin_user,
    )

    private_file_id, private_file_name = _upload_connector_file(
        user_performing_action=global_curator_creator,
        file_name="private-initial-file.txt",
        content=b"private initial file content",
    )

    private_connector = ConnectorManager.create(
        user_performing_action=global_curator_creator,
        name="private_file_connector",
        source=DocumentSource.FILE,
        connector_specific_config={
            "file_locations": [private_file_id],
            "file_names": [private_file_name],
            "zip_metadata_file_id": None,
        },
        access_type=AccessType.PRIVATE,
        groups=[creator_group.id],
    )
    private_credential = CredentialManager.create(
        user_performing_action=global_curator_creator,
        source=DocumentSource.FILE,
        curator_public=False,
        groups=[creator_group.id],
        name="private_file_connector_credential",
    )
    CCPairManager.create(
        connector_id=private_connector.id,
        credential_id=private_credential.id,
        user_performing_action=global_curator_creator,
        access_type=AccessType.PRIVATE,
        groups=[creator_group.id],
        name="private_file_connector_cc_pair",
    )

    private_denied_response = _update_connector_files(
        connector_id=private_connector.id,
        user_performing_action=global_curator_editor,
        file_ids_to_remove=[private_file_id],
        new_file_name="global-curator-private-file.txt",
        new_file_content=b"global curator private update",
    )
    assert private_denied_response.status_code == 403


================================================
FILE: backend/tests/integration/tests/permissions/test_persona_permissions.py
================================================
"""
This file tests the permissions for creating and editing personas for different user roles:
- Basic users can create personas and edit their own
- Curators can edit personas that belong exclusively to groups they curate
- Admins can edit all personas
"""

import os

import pytest
from requests.exceptions import HTTPError

from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.managers.user import DATestUser
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Curator and user group tests are enterprise only",
)
def test_persona_permissions(reset: None) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # Creating a curator user
    curator: DATestUser = UserManager.create(name="curator")

    # Creating a basic user
    basic_user: DATestUser = UserManager.create(name="basic_user")

    # Creating user groups
    user_group_1 = UserGroupManager.create(
        name="curated_user_group",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )
    # Setting the user as a curator for the user group
    UserGroupManager.set_curator_status(
        test_user_group=user_group_1,
        user_to_set_as_curator=curator,
        user_performing_action=admin_user,
    )

    # Creating another user group that the user is not a curator of
    user_group_2 = UserGroupManager.create(
        name="uncurated_user_group",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_2], user_performing_action=admin_user
    )

    """Test that any user can create a persona"""
    # Basic user creates a persona
    basic_user_persona = PersonaManager.create(
        name="basic_user_persona",
        description="A persona created by basic user",
        is_public=False,
        groups=[],
        users=[admin_user.id],
        user_performing_action=basic_user,
    )
    PersonaManager.verify(basic_user_persona, user_performing_action=basic_user)

    # Curator creates a persona
    curator_persona = PersonaManager.create(
        name="curator_persona",
        description="A persona created by curator",
        is_public=False,
        groups=[],
        user_performing_action=curator,
    )
    PersonaManager.verify(curator_persona, user_performing_action=curator)

    # Admin creates personas for different groups
    admin_persona_group_1 = PersonaManager.create(
        name="admin_persona_group_1",
        description="A persona for group 1",
        is_public=False,
        groups=[user_group_1.id],
        user_performing_action=admin_user,
    )
    admin_persona_group_2 = PersonaManager.create(
        name="admin_persona_group_2",
        description="A persona for group 2",
        is_public=False,
        groups=[user_group_2.id],
        user_performing_action=admin_user,
    )
    admin_persona_both_groups = PersonaManager.create(
        name="admin_persona_both_groups",
        description="A persona for both groups",
        is_public=False,
        groups=[user_group_1.id, user_group_2.id],
        user_performing_action=admin_user,
    )

    """Test that users can edit their own personas"""
    # Basic user can edit their own persona
    PersonaManager.edit(
        persona=basic_user_persona,
        description="Updated description by basic user",
        user_performing_action=basic_user,
    )
    PersonaManager.verify(basic_user_persona, user_performing_action=basic_user)

    # Basic user cannot edit other's personas
    with pytest.raises(HTTPError):
        PersonaManager.edit(
            persona=curator_persona,
            description="Invalid edit by basic user",
            user_performing_action=basic_user,
        )

    """Test curator permissions"""
    # Curator can edit personas that belong exclusively to groups they curate
    PersonaManager.edit(
        persona=admin_persona_group_1,
        description="Updated by curator",
        user_performing_action=curator,
    )
    PersonaManager.verify(admin_persona_group_1, user_performing_action=curator)

    # Curator cannot edit personas in groups they don't curate
    with pytest.raises(HTTPError):
        PersonaManager.edit(
            persona=admin_persona_group_2,
            description="Invalid edit by curator",
            user_performing_action=curator,
        )

    # Curator cannot edit personas that belong to multiple groups, even if they curate one
    with pytest.raises(HTTPError):
        PersonaManager.edit(
            persona=admin_persona_both_groups,
            description="Invalid edit by curator",
            user_performing_action=curator,
        )

    """Test admin permissions"""
    # Admin can edit any persona

    # the persona was shared with the admin user on creation
    # this edit call will simulate having the same user in the list twice.
    # The server side should dedupe and handle this correctly (prior bug)
    PersonaManager.edit(
        persona=basic_user_persona,
        description="Updated by admin 2",
        users=[admin_user.id, admin_user.id],
        user_performing_action=admin_user,
    )
    PersonaManager.verify(basic_user_persona, user_performing_action=admin_user)

    PersonaManager.edit(
        persona=curator_persona,
        description="Updated by admin",
        user_performing_action=admin_user,
    )
    PersonaManager.verify(curator_persona, user_performing_action=admin_user)

    PersonaManager.edit(
        persona=admin_persona_group_1,
        description="Updated by admin",
        user_performing_action=admin_user,
    )
    PersonaManager.verify(admin_persona_group_1, user_performing_action=admin_user)

    PersonaManager.edit(
        persona=admin_persona_group_2,
        description="Updated by admin",
        user_performing_action=admin_user,
    )
    PersonaManager.verify(admin_persona_group_2, user_performing_action=admin_user)

    PersonaManager.edit(
        persona=admin_persona_both_groups,
        description="Updated by admin",
        user_performing_action=admin_user,
    )
    PersonaManager.verify(admin_persona_both_groups, user_performing_action=admin_user)


================================================
FILE: backend/tests/integration/tests/permissions/test_user_file_permissions.py
================================================
"""
This file tests user file permissions in different scenarios:
1. Public assistant with user files - files should be accessible to all users
2. Direct file access - user files should NOT be accessible by users who don't own them
"""

import io
from typing import NamedTuple

import pytest

from onyx.file_store.models import FileDescriptor
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.file import FileManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestPersona
from tests.integration.common_utils.test_models import DATestUser


class UserFileTestSetup(NamedTuple):
    admin_user: DATestUser
    user1_file_owner: DATestUser
    user2_non_owner: DATestUser
    user1_file_descriptor: FileDescriptor
    user1_file_id: str
    public_assistant: DATestPersona


@pytest.fixture
def user_file_setup(reset: None) -> UserFileTestSetup:  # noqa: ARG001
    """
    Common setup for user file permission tests.
    Creates users, files, and a public assistant with files.
    """
    # Create an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # Create LLM provider for chat functionality
    LLMProviderManager.create(user_performing_action=admin_user)

    # Create user1 who will own the file
    user1: DATestUser = UserManager.create(name="user1_file_owner")

    # Create user2 who will use the assistant but doesn't own the file
    user2: DATestUser = UserManager.create(name="user2_non_owner")

    # Create a test file and upload as user1
    test_file_content = b"This is test content for user file permission checking."
    test_file = ("test_file.txt", io.BytesIO(test_file_content))

    file_descriptors, error = FileManager.upload_files(
        files=[test_file],
        user_performing_action=user1,
    )

    assert not error, f"Failed to upload file: {error}"
    assert len(file_descriptors) == 1, "Expected 1 file to be uploaded"

    # Get the file descriptor and user_file_id
    user1_file_descriptor = file_descriptors[0]
    user_file_id = user1_file_descriptor.get("user_file_id")

    assert user_file_id is not None, "user_file_id should not be None"

    # Create a public assistant with the user file attached
    public_assistant = PersonaManager.create(
        name="Public Assistant with Files",
        description="A public assistant with user files for testing permissions",
        is_public=True,
        user_file_ids=[user_file_id],
        user_performing_action=admin_user,
    )

    return UserFileTestSetup(
        admin_user=admin_user,
        user1_file_owner=user1,
        user2_non_owner=user2,
        user1_file_descriptor=user1_file_descriptor,
        user1_file_id=user_file_id,
        public_assistant=public_assistant,
    )


def test_public_assistant_with_user_files(
    user_file_setup: UserFileTestSetup,
) -> None:
    """
    Test that a public assistant with user files attached can be used by users
    who don't own those files without permission errors.
    """
    # Create a chat session with the public assistant as user2
    chat_session = ChatSessionManager.create(
        persona_id=user_file_setup.public_assistant.id,
        description="Test chat session for user file permissions",
        user_performing_action=user_file_setup.user2_non_owner,
    )

    # Send a message as user2 - this should not throw a permission error
    # even though user2 doesn't own the file attached to the assistant
    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="Hello, can you help me?",
        user_performing_action=user_file_setup.user2_non_owner,
    )

    # Verify the message was processed without errors
    assert (
        response.error is None
    ), f"Expected no error when user2 uses public assistant with user1's files, but got error: {response.error}"
    assert len(response.full_message) > 0, "Expected a response from the assistant"

    # Verify chat history is accessible
    chat_history = ChatSessionManager.get_chat_history(
        chat_session=chat_session,
        user_performing_action=user_file_setup.user2_non_owner,
    )
    assert (
        len(chat_history) >= 2
    ), "Expected at least 2 messages (user message and assistant response)"


================================================
FILE: backend/tests/integration/tests/permissions/test_user_role_permissions.py
================================================
"""
This file tests the ability of different user types to set the role of other users.
"""

import os

import pytest
from requests.exceptions import HTTPError

from onyx.db.models import UserRole
from tests.integration.common_utils.managers.user import DATestUser
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Curator and user group tests are enterprise only",
)
def test_user_role_setting_permissions(reset: None) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")
    assert UserManager.is_role(admin_user, UserRole.ADMIN)

    # Creating a basic user
    basic_user: DATestUser = UserManager.create(name="basic_user")
    assert UserManager.is_role(basic_user, UserRole.BASIC)

    # Creating a curator
    curator: DATestUser = UserManager.create(name="curator")
    assert UserManager.is_role(curator, UserRole.BASIC)

    # Creating a curator without adding to a group should not work
    with pytest.raises(HTTPError):
        UserManager.set_role(
            user_to_set=curator,
            target_role=UserRole.CURATOR,
            user_performing_action=admin_user,
        )

    global_curator: DATestUser = UserManager.create(name="global_curator")
    assert UserManager.is_role(global_curator, UserRole.BASIC)

    # Setting the role of a global curator should not work for a basic user
    with pytest.raises(HTTPError):
        UserManager.set_role(
            user_to_set=global_curator,
            target_role=UserRole.GLOBAL_CURATOR,
            user_performing_action=basic_user,
        )

    # Setting the role of a global curator should work for an admin user
    UserManager.set_role(
        user_to_set=global_curator,
        target_role=UserRole.GLOBAL_CURATOR,
        user_performing_action=admin_user,
    )
    assert UserManager.is_role(global_curator, UserRole.GLOBAL_CURATOR)

    # Setting the role of a global curator should not work for an invalid curator
    with pytest.raises(HTTPError):
        UserManager.set_role(
            user_to_set=global_curator,
            target_role=UserRole.BASIC,
            user_performing_action=global_curator,
        )
    assert UserManager.is_role(global_curator, UserRole.GLOBAL_CURATOR)

    # Creating a user group
    user_group_1 = UserGroupManager.create(
        name="user_group_1",
        user_ids=[],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )

    # This should fail because the curator is not in the user group
    with pytest.raises(HTTPError):
        UserGroupManager.set_curator_status(
            test_user_group=user_group_1,
            user_to_set_as_curator=curator,
            user_performing_action=admin_user,
        )

    # Adding the curator to the user group
    user_group_1.user_ids = [curator.id]
    UserGroupManager.edit(user_group=user_group_1, user_performing_action=admin_user)
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )

    # This should work because the curator is in the user group
    UserGroupManager.set_curator_status(
        test_user_group=user_group_1,
        user_to_set_as_curator=curator,
        user_performing_action=admin_user,
    )


================================================
FILE: backend/tests/integration/tests/permissions/test_whole_curator_flow.py
================================================
"""
This test tests the happy path for curator permissions
"""

import os

import pytest

from onyx.db.enums import AccessType
from onyx.db.models import UserRole
from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.user import DATestUser
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Curator tests are enterprise only",
)
def test_whole_curator_flow(reset: None) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")
    assert UserManager.is_role(admin_user, UserRole.ADMIN)

    # Creating a curator
    curator: DATestUser = UserManager.create(name="curator")

    # Creating a user group
    user_group_1 = UserGroupManager.create(
        name="user_group_1",
        user_ids=[curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )
    # Making curator a curator of user_group_1
    UserGroupManager.set_curator_status(
        test_user_group=user_group_1,
        user_to_set_as_curator=curator,
        user_performing_action=admin_user,
    )
    assert UserManager.is_role(curator, UserRole.CURATOR)

    # Creating a credential as curator
    test_credential = CredentialManager.create(
        name="curator_test_credential",
        source=DocumentSource.FILE,
        curator_public=False,
        groups=[user_group_1.id],
        user_performing_action=curator,
    )

    # Creating a connector as curator
    test_connector = ConnectorManager.create(
        name="curator_test_connector",
        source=DocumentSource.FILE,
        access_type=AccessType.PRIVATE,
        groups=[user_group_1.id],
        user_performing_action=curator,
    )

    # Test editing the connector
    test_connector.name = "updated_test_connector"
    ConnectorManager.edit(connector=test_connector, user_performing_action=curator)

    # Creating a CC pair as curator
    test_cc_pair = CCPairManager.create(
        connector_id=test_connector.id,
        credential_id=test_credential.id,
        name="curator_test_cc_pair",
        access_type=AccessType.PRIVATE,
        groups=[user_group_1.id],
        user_performing_action=curator,
    )

    CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=admin_user)

    # Verify that the curator can pause and unpause the CC pair
    CCPairManager.pause_cc_pair(cc_pair=test_cc_pair, user_performing_action=curator)

    # Verify that the curator can delete the CC pair
    CCPairManager.delete(cc_pair=test_cc_pair, user_performing_action=curator)
    CCPairManager.wait_for_deletion_completion(
        cc_pair_id=test_cc_pair.id, user_performing_action=curator
    )

    # Verify that the CC pair has been deleted
    CCPairManager.verify(
        cc_pair=test_cc_pair,
        verify_deleted=True,
        user_performing_action=admin_user,
    )


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Curator tests are enterprise only",
)
def test_global_curator_flow(reset: None) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")
    assert UserManager.is_role(admin_user, UserRole.ADMIN)

    # Creating a user
    global_curator: DATestUser = UserManager.create(name="global_curator")
    assert UserManager.is_role(global_curator, UserRole.BASIC)

    # Set the user to a global curator
    UserManager.set_role(
        user_to_set=global_curator,
        target_role=UserRole.GLOBAL_CURATOR,
        user_performing_action=admin_user,
    )
    assert UserManager.is_role(global_curator, UserRole.GLOBAL_CURATOR)

    # Creating a user group containing the global curator
    user_group_1 = UserGroupManager.create(
        name="user_group_1",
        user_ids=[global_curator.id],
        cc_pair_ids=[],
        user_performing_action=admin_user,
    )
    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )

    # Creating a credential as global curator
    test_credential = CredentialManager.create(
        name="curator_test_credential",
        source=DocumentSource.FILE,
        curator_public=False,
        groups=[user_group_1.id],
        user_performing_action=global_curator,
    )

    # Creating a connector as global curator
    test_connector = ConnectorManager.create(
        name="curator_test_connector",
        source=DocumentSource.FILE,
        access_type=AccessType.PRIVATE,
        groups=[user_group_1.id],
        user_performing_action=global_curator,
    )

    # Test editing the connector
    test_connector.name = "updated_test_connector"
    ConnectorManager.edit(
        connector=test_connector, user_performing_action=global_curator
    )

    # Creating a CC pair as global curator
    test_cc_pair = CCPairManager.create(
        connector_id=test_connector.id,
        credential_id=test_credential.id,
        name="curator_test_cc_pair",
        access_type=AccessType.PRIVATE,
        groups=[user_group_1.id],
        user_performing_action=global_curator,
    )

    CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=admin_user)

    # Verify that the curator can pause and unpause the CC pair
    CCPairManager.pause_cc_pair(
        cc_pair=test_cc_pair, user_performing_action=global_curator
    )

    # Verify that the curator can delete the CC pair
    CCPairManager.delete(cc_pair=test_cc_pair, user_performing_action=global_curator)
    CCPairManager.wait_for_deletion_completion(
        cc_pair_id=test_cc_pair.id, user_performing_action=global_curator
    )

    # Verify that the CC pair has been deleted
    CCPairManager.verify(
        cc_pair=test_cc_pair,
        verify_deleted=True,
        user_performing_action=admin_user,
    )


================================================
FILE: backend/tests/integration/tests/personalization/test_personalization_flow.py
================================================
import requests

from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


def _get_auth_headers(user: DATestUser) -> tuple[dict, dict]:
    return user.headers, {
        FASTAPI_USERS_AUTH_COOKIE_NAME: user.cookies[FASTAPI_USERS_AUTH_COOKIE_NAME]
    }


def _get_me(headers: dict, cookies: dict) -> dict:
    response = requests.get(f"{API_SERVER_URL}/me", headers=headers, cookies=cookies)
    response.raise_for_status()
    return response.json()


def _patch_personalization(headers: dict, cookies: dict, payload: dict) -> None:
    response = requests.patch(
        f"{API_SERVER_URL}/user/personalization",
        json=payload,
        headers=headers,
        cookies=cookies,
    )
    response.raise_for_status()


def test_personalization_round_trip(reset: None) -> None:  # noqa: ARG001
    user = UserManager.create()
    headers, cookies = _get_auth_headers(user)

    # baseline should have empty personalization
    me_initial = _get_me(headers, cookies)
    assert me_initial["personalization"]["name"] == ""
    assert me_initial["personalization"]["role"] == ""
    assert me_initial["personalization"]["use_memories"] is True
    assert me_initial["personalization"]["enable_memory_tool"] is True
    assert me_initial["personalization"]["memories"] == []

    payload = {
        "name": "Jane Doe",
        "role": "Developer advocate",
        "use_memories": True,
        "memories": [
            {"content": "Loves peanut butter"},
            {"content": "Prefers API docs"},
        ],
    }

    _patch_personalization(headers, cookies, payload)

    me_after = _get_me(headers, cookies)
    personalization = me_after["personalization"]

    assert personalization["name"] == payload["name"]
    assert personalization["role"] == payload["role"]
    assert personalization["use_memories"] is True
    returned_memories = personalization["memories"]
    assert len(returned_memories) == 2
    for mem in returned_memories:
        assert isinstance(mem["id"], int)
        assert isinstance(mem["content"], str)
    assert [m["content"] for m in returned_memories] == [
        "Prefers API docs",
        "Loves peanut butter",
    ]

    # update memories to empty
    payload["memories"] = []
    _patch_personalization(headers, cookies, payload)
    me_final = _get_me(headers, cookies)
    assert me_final["personalization"]["memories"] == []


def test_enable_memory_tool_round_trip(reset: None) -> None:  # noqa: ARG001
    user = UserManager.create()
    headers, cookies = _get_auth_headers(user)

    # default should be True
    me_initial = _get_me(headers, cookies)
    assert me_initial["personalization"]["enable_memory_tool"] is True

    # disable enable_memory_tool
    _patch_personalization(headers, cookies, {"enable_memory_tool": False})
    me_after = _get_me(headers, cookies)
    assert me_after["personalization"]["enable_memory_tool"] is False

    # re-enable
    _patch_personalization(headers, cookies, {"enable_memory_tool": True})
    me_reenabled = _get_me(headers, cookies)
    assert me_reenabled["personalization"]["enable_memory_tool"] is True


def test_enable_memory_tool_independent_of_use_memories(
    reset: None,  # noqa: ARG001
) -> None:
    user = UserManager.create()
    headers, cookies = _get_auth_headers(user)

    # set use_memories=False and enable_memory_tool=True simultaneously
    _patch_personalization(
        headers, cookies, {"use_memories": False, "enable_memory_tool": True}
    )
    me = _get_me(headers, cookies)
    assert me["personalization"]["use_memories"] is False
    assert me["personalization"]["enable_memory_tool"] is True

    # reverse: use_memories=True and enable_memory_tool=False
    _patch_personalization(
        headers, cookies, {"use_memories": True, "enable_memory_tool": False}
    )
    me = _get_me(headers, cookies)
    assert me["personalization"]["use_memories"] is True
    assert me["personalization"]["enable_memory_tool"] is False


================================================
FILE: backend/tests/integration/tests/personas/test_persona_categories.py
================================================
from uuid import uuid4

import pytest
from requests.exceptions import HTTPError

from tests.integration.common_utils.managers.persona import (
    PersonaLabelManager,
)
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestPersonaLabel
from tests.integration.common_utils.test_models import DATestUser


def test_persona_label_management(reset: None) -> None:  # noqa: ARG001
    admin_user: DATestUser = UserManager.create(name="admin_user")

    persona_label = DATestPersonaLabel(
        id=None,
        name=f"Test label {uuid4()}",
    )
    persona_label = PersonaLabelManager.create(
        label=persona_label,
        user_performing_action=admin_user,
    )
    print(f"Created persona label {persona_label.name} with id {persona_label.id}")

    assert PersonaLabelManager.verify(
        label=persona_label,
        user_performing_action=admin_user,
    ), "Persona label was not found after creation"

    regular_user: DATestUser = UserManager.create(name="regular_user")

    updated_persona_label = DATestPersonaLabel(
        id=persona_label.id,
        name=f"Updated {persona_label.name}",
    )
    with pytest.raises(HTTPError) as exc_info:
        PersonaLabelManager.update(
            label=updated_persona_label,
            user_performing_action=regular_user,
        )
    assert exc_info.value.response is not None
    assert exc_info.value.response.status_code == 403

    assert PersonaLabelManager.verify(
        label=persona_label,
        user_performing_action=admin_user,
    ), "Persona label should not have been updated by non-admin user"

    result = PersonaLabelManager.delete(
        label=persona_label,
        user_performing_action=regular_user,
    )
    assert (
        result is False
    ), "Regular user should not be able to delete the persona label"

    assert PersonaLabelManager.verify(
        label=persona_label,
        user_performing_action=admin_user,
    ), "Persona label should not have been deleted by non-admin user"

    updated_persona_label.name = f"Updated {persona_label.name}"
    updated_persona_label = PersonaLabelManager.update(
        label=updated_persona_label,
        user_performing_action=admin_user,
    )
    print(f"Updated persona label to {updated_persona_label.name}")

    assert PersonaLabelManager.verify(
        label=updated_persona_label,
        user_performing_action=admin_user,
    ), "Persona label was not updated by admin"

    success = PersonaLabelManager.delete(
        label=persona_label,
        user_performing_action=admin_user,
    )
    assert success, "Admin user should be able to delete the persona label"
    print(f"Deleted persona label {persona_label.name} with id {persona_label.id}")

    assert not PersonaLabelManager.verify(
        label=persona_label,
        user_performing_action=admin_user,
    ), "Persona label should not exist after deletion by admin"


================================================
FILE: backend/tests/integration/tests/personas/test_persona_creation.py
================================================
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.test_models import DATestUser


def _list_minimal_personas(user: DATestUser) -> list[dict]:
    response = requests.get(
        f"{API_SERVER_URL}/persona",
        headers=user.headers,
        cookies=user.cookies,
    )
    response.raise_for_status()
    return response.json()


def _share_persona(
    persona_id: int, user_ids: list[str], acting_user: DATestUser
) -> None:
    response = requests.patch(
        f"{API_SERVER_URL}/persona/{persona_id}/share",
        json={"user_ids": user_ids},
        headers=acting_user.headers,
        cookies=acting_user.cookies,
    )
    response.raise_for_status()


def test_persona_create_update_share_delete(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    basic_user: DATestUser,
) -> None:
    # TODO: refactor `PersonaManager.verify`, not a good pattern
    # Create a persona as admin and verify it can be fetched
    expected_persona = PersonaManager.create(user_performing_action=admin_user)
    PersonaManager.verify(expected_persona, user_performing_action=admin_user)

    # Update the persona and verify changes
    updated_persona = PersonaManager.edit(
        expected_persona,
        name=f"updated-{expected_persona.name}",
        description=f"updated-{expected_persona.description}",
        is_public=False,
        user_performing_action=admin_user,
    )
    assert PersonaManager.verify(updated_persona, user_performing_action=admin_user)

    # Creator should see the persona in their minimal list
    creator_minimals = _list_minimal_personas(admin_user)
    assert any(p["id"] == updated_persona.id for p in creator_minimals)

    # Regular user should not see a non-public, non-shared persona
    other_minimals_before = _list_minimal_personas(basic_user)
    assert all(p["id"] != updated_persona.id for p in other_minimals_before)

    # Share persona with the regular user and verify visibility
    _share_persona(updated_persona.id, [basic_user.id], admin_user)
    other_minimals_after = _list_minimal_personas(basic_user)
    assert any(p["id"] == updated_persona.id for p in other_minimals_after)

    # Delete persona and verify it no longer appears in lists
    assert PersonaManager.delete(updated_persona, user_performing_action=admin_user)

    # After deletion, list should not include it for either user
    creator_minimals_after_delete = _list_minimal_personas(admin_user)
    assert all(p["id"] != updated_persona.id for p in creator_minimals_after_delete)

    regular_minimals_after_delete = _list_minimal_personas(basic_user)
    assert all(p["id"] != updated_persona.id for p in regular_minimals_after_delete)


================================================
FILE: backend/tests/integration/tests/personas/test_persona_file_context.py
================================================
"""
Integration tests for the unified persona file context flow.

End-to-end tests that verify:
1. Files can be uploaded and attached to a persona via API.
2. The persona correctly reports its attached files.
3. A chat session with a file-bearing persona processes without error.
4. Precedence: custom persona files take priority over project files when
   the chat session is inside a project.

These tests run against a real Onyx deployment (all services running).
File processing is asynchronous, so we poll the file status endpoint
until files reach COMPLETED before chatting.
"""

import time

import requests

from onyx.db.enums import UserFileStatus
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import MAX_DELAY
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.file import FileManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.managers.project import ProjectManager
from tests.integration.common_utils.test_file_utils import create_test_text_file
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

FILE_PROCESSING_POLL_INTERVAL = 2


def _poll_file_statuses(
    user_file_ids: list[str],
    user: DATestUser,
    target_status: UserFileStatus = UserFileStatus.COMPLETED,
    timeout: int = MAX_DELAY,
) -> None:
    """Block until all files reach the target status or timeout expires."""
    deadline = time.time() + timeout
    while time.time() < deadline:
        response = requests.post(
            f"{API_SERVER_URL}/user/projects/file/statuses",
            json={"file_ids": user_file_ids},
            headers=user.headers,
        )
        response.raise_for_status()
        statuses = response.json()
        if all(f["status"] == target_status.value for f in statuses):
            return
        time.sleep(FILE_PROCESSING_POLL_INTERVAL)
    raise TimeoutError(
        f"Files {user_file_ids} did not reach {target_status.value} within {timeout}s"
    )


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


def test_persona_with_files_chat_no_error(
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """Upload files, attach them to a persona, wait for processing,
    then send a chat message.  Verify no error is returned."""

    # Upload files (creates UserFile records)
    text_file = create_test_text_file(
        "The secret project codename is NIGHTINGALE. It was started in 2024 by the Advanced Research division."
    )
    file_descriptors, error = FileManager.upload_files(
        files=[("nightingale_brief.txt", text_file)],
        user_performing_action=admin_user,
    )
    assert not error, f"File upload failed: {error}"
    assert len(file_descriptors) == 1

    user_file_id = file_descriptors[0]["user_file_id"]
    assert user_file_id is not None

    # Wait for file processing
    _poll_file_statuses([user_file_id], admin_user, timeout=120)

    # Create persona with the file attached
    persona = PersonaManager.create(
        user_performing_action=admin_user,
        name="Nightingale Agent",
        description="Agent with secret file",
        system_prompt="You are a helpful assistant with access to uploaded files.",
        user_file_ids=[user_file_id],
    )

    # Verify persona has the file
    persona_snapshots = PersonaManager.get_one(persona.id, admin_user)
    assert len(persona_snapshots) == 1
    assert user_file_id in persona_snapshots[0].user_file_ids

    # Chat with the persona
    chat_session = ChatSessionManager.create(
        persona_id=persona.id,
        description="Test persona file context",
        user_performing_action=admin_user,
    )
    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="What is the secret project codename?",
        user_performing_action=admin_user,
    )

    assert response.error is None, f"Chat should succeed, got error: {response.error}"
    assert len(response.full_message) > 0, "Response should not be empty"


def test_persona_without_files_still_works(
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """A persona with no attached files should still chat normally."""
    persona = PersonaManager.create(
        user_performing_action=admin_user,
        name="Blank Agent",
        description="No files attached",
        system_prompt="You are a helpful assistant.",
    )

    chat_session = ChatSessionManager.create(
        persona_id=persona.id,
        description="Test blank persona",
        user_performing_action=admin_user,
    )
    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="Hello, how are you?",
        user_performing_action=admin_user,
    )

    assert response.error is None
    assert len(response.full_message) > 0


def test_persona_files_override_project_files(
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """When a custom persona (with its own files) is used inside a project,
    the persona's files take precedence — the project's files are invisible.

    We verify this by putting different content in project vs persona files
    and checking which content the model responds with."""

    # Upload persona file
    persona_file = create_test_text_file("The persona's secret word is ALBATROSS.")
    persona_fds, err1 = FileManager.upload_files(
        files=[("persona_secret.txt", persona_file)],
        user_performing_action=admin_user,
    )
    assert not err1
    persona_user_file_id = persona_fds[0]["user_file_id"]
    assert persona_user_file_id is not None
    # Create a project and upload project files
    project = ProjectManager.create(
        name="Precedence Test Project",
        user_performing_action=admin_user,
    )
    project_files = [
        ("project_secret.txt", b"The project's secret word is FLAMINGO."),
    ]
    project_upload_result = ProjectManager.upload_files(
        project_id=project.id,
        files=project_files,
        user_performing_action=admin_user,
    )
    assert len(project_upload_result.user_files) == 1
    project_user_file_id = str(project_upload_result.user_files[0].id)

    # Wait for both persona and project file processing
    _poll_file_statuses([persona_user_file_id], admin_user, timeout=120)
    _poll_file_statuses([project_user_file_id], admin_user, timeout=120)

    # Create persona with persona file
    persona = PersonaManager.create(
        user_performing_action=admin_user,
        name="Override Agent",
        description="Persona with its own files",
        system_prompt="You are a helpful assistant. Answer using the files.",
        user_file_ids=[persona_user_file_id],
    )

    # Create chat session inside the project but using the custom persona
    chat_session = ChatSessionManager.create(
        persona_id=persona.id,
        project_id=project.id,
        user_performing_action=admin_user,
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="What is the secret word?",
        user_performing_action=admin_user,
    )

    assert response.error is None, f"Chat should succeed, got error: {response.error}"
    # The persona's file should be what the model sees, not the project's
    message_lower = response.full_message.lower()
    assert (
        "albatross" in message_lower
    ), f"Response should reference the persona file's secret word (ALBATROSS), but got: {response.full_message}"


def test_default_persona_in_project_uses_project_files(
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """When the default persona (id=0) is used inside a project,
    the project's files should be used for context."""
    project = ProjectManager.create(
        name="Default Persona Project",
        user_performing_action=admin_user,
    )
    project_files = [
        ("project_info.txt", b"The project mascot is a PANGOLIN."),
    ]
    upload_result = ProjectManager.upload_files(
        project_id=project.id,
        files=project_files,
        user_performing_action=admin_user,
    )
    assert len(upload_result.user_files) == 1

    # Wait for project file processing
    project_file_id = str(upload_result.user_files[0].id)
    _poll_file_statuses([project_file_id], admin_user, timeout=120)

    # Create chat session inside project using default persona (id=0)
    chat_session = ChatSessionManager.create(
        persona_id=0,
        project_id=project.id,
        user_performing_action=admin_user,
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="What is the project mascot?",
        user_performing_action=admin_user,
    )

    assert response.error is None
    assert (
        "pangolin" in response.full_message.lower()
    ), f"Response should reference the project file content (PANGOLIN), but got: {response.full_message}"


def test_custom_persona_no_files_in_project_ignores_project(
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """A custom persona with NO files, used inside a project with files,
    should NOT see the project's files.  The project is purely organizational.

    We verify by asking about content only in the project file and checking
    the model does NOT reference it."""

    project = ProjectManager.create(
        name="Ignored Project",
        user_performing_action=admin_user,
    )
    project_upload_result = ProjectManager.upload_files(
        project_id=project.id,
        files=[("project_only.txt", b"The project secret is CAPYBARA.")],
        user_performing_action=admin_user,
    )
    assert len(project_upload_result.user_files) == 1
    project_user_file_id = str(project_upload_result.user_files[0].id)

    # Wait for project file processing
    _poll_file_statuses([project_user_file_id], admin_user, timeout=120)

    # Custom persona with no files
    persona = PersonaManager.create(
        user_performing_action=admin_user,
        name="No Files Agent",
        description="No files, project is irrelevant",
        system_prompt=(
            "You are a helpful assistant. If you do not have information "
            "to answer a question, say 'I do not have that information.'"
        ),
    )

    chat_session = ChatSessionManager.create(
        persona_id=persona.id,
        project_id=project.id,
        user_performing_action=admin_user,
    )

    response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="What is the project secret?",
        user_performing_action=admin_user,
    )

    assert response.error is None
    assert len(response.full_message) > 0
    assert "capybara" not in response.full_message.lower(), (
        "Response should NOT reference the project file content (CAPYBARA) "
        "because the custom persona has no files and should not inherit "
        f"project files, but got: {response.full_message}"
    )


================================================
FILE: backend/tests/integration/tests/personas/test_persona_label_updates.py
================================================
from uuid import uuid4

import requests

from onyx.server.features.persona.models import PersonaUpsertRequest
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.persona import PersonaLabelManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.test_models import DATestPersonaLabel
from tests.integration.common_utils.test_models import DATestUser


def test_update_persona_with_null_label_ids_preserves_labels(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    persona_label = PersonaLabelManager.create(
        label=DATestPersonaLabel(name=f"Test label {uuid4()}"),
        user_performing_action=admin_user,
    )
    assert persona_label.id is not None
    persona = PersonaManager.create(
        label_ids=[persona_label.id],
        user_performing_action=admin_user,
    )

    updated_description = f"{persona.description}-updated"
    update_request = PersonaUpsertRequest(
        name=persona.name,
        description=updated_description,
        system_prompt=persona.system_prompt or "",
        task_prompt=persona.task_prompt or "",
        datetime_aware=persona.datetime_aware,
        document_set_ids=persona.document_set_ids,
        is_public=persona.is_public,
        llm_model_provider_override=persona.llm_model_provider_override,
        llm_model_version_override=persona.llm_model_version_override,
        tool_ids=persona.tool_ids,
        users=[],
        groups=[],
        label_ids=None,
    )

    response = requests.patch(
        f"{API_SERVER_URL}/persona/{persona.id}",
        json=update_request.model_dump(mode="json", exclude_none=False),
        headers=admin_user.headers,
        cookies=admin_user.cookies,
    )
    response.raise_for_status()

    fetched = requests.get(
        f"{API_SERVER_URL}/persona/{persona.id}",
        headers=admin_user.headers,
        cookies=admin_user.cookies,
    )
    fetched.raise_for_status()
    fetched_persona = fetched.json()

    assert fetched_persona["description"] == updated_description
    fetched_label_ids = {label["id"] for label in fetched_persona["labels"]}
    assert persona_label.id in fetched_label_ids


================================================
FILE: backend/tests/integration/tests/personas/test_persona_pagination.py
================================================
import requests

from onyx.server.features.persona.constants import ADMIN_AGENTS_RESOURCE
from onyx.server.features.persona.constants import AGENTS_RESOURCE
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.test_models import DATestUser


def _get_agents_paginated(
    user: DATestUser,
    page_num: int,
    page_size: int,
    include_deleted: bool = False,
    get_editable: bool = False,
    include_default: bool = True,
) -> tuple[dict, int]:
    """Fetches a paginated page of agents, with status code."""
    response = requests.get(
        f"{API_SERVER_URL}{AGENTS_RESOURCE}",
        params={
            "page_num": page_num,
            "page_size": page_size,
            "include_deleted": include_deleted,
            "get_editable": get_editable,
            "include_default": include_default,
        },
        headers=user.headers,
        cookies=user.cookies,
    )
    return response.json(), response.status_code


def _get_agents_admin_paginated(
    user: DATestUser,
    page_num: int,
    page_size: int,
    include_deleted: bool = False,
    get_editable: bool = False,
    include_default: bool = True,
) -> tuple[dict, int]:
    """Fetches a paginated page of agents (admin endpoint) with status code."""
    response = requests.get(
        f"{API_SERVER_URL}{ADMIN_AGENTS_RESOURCE}",
        params={
            "page_num": page_num,
            "page_size": page_size,
            "include_deleted": include_deleted,
            "get_editable": get_editable,
            "include_default": include_default,
        },
        headers=user.headers,
        cookies=user.cookies,
    )
    response.raise_for_status()
    return response.json(), response.status_code


def test_persona_pagination_basic(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test basic pagination - verify correct items and total count."""
    # Preconditions
    personas_to_create = 25
    personas = []
    for i in range(personas_to_create):
        persona = PersonaManager.create(
            name=f"Test Persona {i}",
            user_performing_action=admin_user,
        )
        personas.append(persona)

    # Under test and postconditions
    # Test page 0 with size 10.
    page_0, _ = _get_agents_paginated(admin_user, page_num=0, page_size=10)
    assert "items" in page_0
    assert "total_items" in page_0
    assert len(page_0["items"]) == 10
    assert (
        page_0["total_items"] >= personas_to_create
    )  # At least personas_to_create (may have default personas)

    # Test page 2 with size 10 (should have 5+ items if only our test personas
    # exist).
    page_2, _ = _get_agents_paginated(admin_user, page_num=2, page_size=10)
    assert len(page_2["items"]) >= 5
    assert page_2["total_items"] >= personas_to_create

    # Test page beyond end (page 10 with size 10, offset 100).
    page_beyond, _ = _get_agents_paginated(admin_user, page_num=10, page_size=10)
    assert len(page_beyond["items"]) == 0
    assert page_beyond["total_items"] >= personas_to_create  # Total doesn't change.


def test_persona_pagination_ordering(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test ordering - display_priority ASC nulls last, then ID ASC."""
    # Preconditions
    # Create personas with specific display_priority values.
    persona_a = PersonaManager.create(
        name="Persona A",
        description="This should be second",
        user_performing_action=admin_user,
        display_priority=2,
    )
    persona_b = PersonaManager.create(
        name="Persona B",
        description="This should be first",
        user_performing_action=admin_user,
        display_priority=1,
    )
    persona_c = PersonaManager.create(
        name="Persona C",
        description="This should be third",
        user_performing_action=admin_user,
        display_priority=3,
    )
    persona_d = PersonaManager.create(
        name="Persona D",
        description="This should be fourth",
        user_performing_action=admin_user,
        display_priority=3,  # Note the same prio as above, should sort by id
    )

    # Under test
    page_0, _ = _get_agents_paginated(admin_user, page_num=0, page_size=100)

    # Postconditions
    # Find our personas in the results.
    our_expected_ordered_persona_ids = [
        persona_b.id,
        persona_a.id,
        persona_c.id,
        persona_d.id,
    ]
    our_personas_in_results = [
        p for p in page_0["items"] if p["id"] in our_expected_ordered_persona_ids
    ]
    assert len(our_personas_in_results) == 4
    # Verify ordering.
    for i in range(len(our_expected_ordered_persona_ids)):
        assert our_expected_ordered_persona_ids[i] == our_personas_in_results[i]["id"]


def test_persona_pagination_admin_endpoint(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test admin paginated endpoint returns PersonaSnapshot format."""
    # Preconditions
    personas_to_create = 5
    for i in range(personas_to_create):
        PersonaManager.create(
            name=f"Admin Test Persona {i}",
            user_performing_action=admin_user,
        )

    # Under test
    page_0, _ = _get_agents_admin_paginated(admin_user, page_num=0, page_size=10)

    # Postconditions
    assert "items" in page_0
    assert "total_items" in page_0
    assert len(page_0["items"]) >= personas_to_create
    assert page_0["total_items"] >= personas_to_create
    # Verify admin-specific fields are present (PersonaSnapshot has more
    # fields).
    first_persona = page_0["items"][0]
    # PersonaSnapshot should have these fields that MinimalPersonaSnapshot
    # doesn't.
    assert "users" in first_persona
    assert "groups" in first_persona
    assert "user_file_ids" in first_persona


def test_persona_pagination_with_deleted(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test pagination with include_deleted parameter."""
    # Preconditions
    # Create and delete a persona.
    persona = PersonaManager.create(
        name="To Be Deleted",
        user_performing_action=admin_user,
    )
    PersonaManager.delete(persona, user_performing_action=admin_user)

    # Under test and postconditions
    # Without include_deleted, should not appear.
    page_without_deleted, _ = _get_agents_paginated(
        admin_user, page_num=0, page_size=100, include_deleted=False
    )
    persona_ids_without_deleted = [p["id"] for p in page_without_deleted["items"]]
    assert persona.id not in persona_ids_without_deleted

    # With include_deleted, should appear.
    page_with_deleted, _ = _get_agents_paginated(
        admin_user, page_num=0, page_size=100, include_deleted=True
    )
    persona_ids_with_deleted = [p["id"] for p in page_with_deleted["items"]]
    assert persona.id in persona_ids_with_deleted

    # Total counts should differ.
    assert page_with_deleted["total_items"] > page_without_deleted["total_items"]


def test_persona_pagination_page_size_limits(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test page_size parameter validation (max 1000)."""
    # Preconditions
    # Create a few personas.
    for i in range(5):
        PersonaManager.create(
            name=f"Size Limit Test {i}",
            user_performing_action=admin_user,
        )

    # Under test and postconditions
    # Valid page_size of 1
    data, _ = _get_agents_paginated(admin_user, page_num=0, page_size=1)
    assert len(data["items"]) <= 1

    # Valid page_size of 1000
    data, _ = _get_agents_paginated(admin_user, page_num=0, page_size=1000)
    # We assume not that many default personas are made.
    assert len(data["items"]) == data["total_items"]

    # Invalid page_size of 1001 (exceeds max)
    _, status_code = _get_agents_paginated(admin_user, page_num=0, page_size=1001)
    assert status_code == 422  # Validation error

    # Invalid page_size of 0
    _, status_code = _get_agents_paginated(admin_user, page_num=0, page_size=0)
    assert status_code == 422  # Validation error


def test_persona_pagination_count_accuracy(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Test that total_items count is consistent across pages."""
    # Preconditions
    # Create 15 personas.
    created_personas = []
    for i in range(15):
        persona = PersonaManager.create(
            name=f"Count Test {i}",
            user_performing_action=admin_user,
        )
        created_personas.append(persona)

    # Under test and postconditions
    # Fetch first page to get total count.
    page_0, _ = _get_agents_paginated(admin_user, page_num=0, page_size=5)
    total_items = page_0["total_items"]
    assert total_items >= 15

    # Fetch all pages to cover all personas.
    all_ids_from_pages: set[int] = set()
    num_pages_needed = (total_items + 4) // 5  # Ceiling division
    for page_num in range(num_pages_needed):
        page, _ = _get_agents_paginated(admin_user, page_num=page_num, page_size=5)
        # All pages should report the same total.
        assert (
            page["total_items"] == total_items
        ), f"Page {page_num} has inconsistent total_items"
        all_ids_from_pages.update(p["id"] for p in page["items"])

    # Our created personas should all appear.
    our_ids = {p.id for p in created_personas}
    assert our_ids.issubset(
        all_ids_from_pages
    ), "All created personas should appear in paginated results"


def test_persona_pagination_user_permissions(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    basic_user: DATestUser,
) -> None:
    """Test that pagination respects user permissions."""
    # Preconditions
    # Admin creates a private persona (not shared).
    private_persona = PersonaManager.create(
        name="Private Persona",
        description="Not shared",
        is_public=False,
        user_performing_action=admin_user,
    )
    # Admin creates a public persona.
    public_persona = PersonaManager.create(
        name="Public Persona",
        description="Shared with all",
        is_public=True,
        user_performing_action=admin_user,
    )

    # Under test and postconditions
    # Admin should see both in paginated results.
    admin_page, _ = _get_agents_paginated(admin_user, page_num=0, page_size=100)
    admin_ids = {p["id"] for p in admin_page["items"]}
    assert private_persona.id in admin_ids
    assert public_persona.id in admin_ids

    # Basic user should only see public persona.
    user_page, _ = _get_agents_paginated(basic_user, page_num=0, page_size=100)
    user_ids = {p["id"] for p in user_page["items"]}
    assert private_persona.id not in user_ids
    assert public_persona.id in user_ids

    # Totals should differ.
    assert admin_page["total_items"] > user_page["total_items"]


================================================
FILE: backend/tests/integration/tests/personas/test_unified_assistant.py
================================================
"""Integration tests for the unified assistant."""

from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.test_models import DATestUser


def test_unified_assistant(
    reset: None, admin_user: DATestUser  # noqa: ARG001
) -> None:  # noqa: ARG001
    """Combined test verifying unified assistant existence, tools, and starter messages."""
    # Fetch all personas
    personas = PersonaManager.get_all(admin_user)

    # Find the unified assistant (ID 0)
    unified_assistant = None
    for persona in personas:
        if persona.id == 0:
            unified_assistant = persona
            break

    # Assert that there are no other assistants (personas) besides the unified assistant
    # (ID 0)
    assert (
        len(personas) == 1
    ), f"Expected only the unified assistant, found {len(personas)} personas"

    # Verify the unified assistant exists
    assert unified_assistant is not None, "Unified assistant (ID 0) not found"

    # Verify basic properties
    assert unified_assistant.name == "Assistant"
    assert (
        "search, web browsing, and image generation"
        in unified_assistant.description.lower()
    )
    assert unified_assistant.is_featured is True
    assert unified_assistant.is_listed is True

    # Verify tools
    tools = unified_assistant.tools
    tool_names = [tool.name for tool in tools]
    assert "internal_search" in tool_names, "SearchTool not found in unified assistant"
    assert (
        "generate_image" in tool_names
    ), "ImageGenerationTool not found in unified assistant"
    assert "web_search" in tool_names, "WebSearchTool not found in unified assistant"

    # Verify no starter messages
    starter_messages = unified_assistant.starter_messages or []
    assert len(starter_messages) == 0, "Starter messages found"


================================================
FILE: backend/tests/integration/tests/projects/test_projects.py
================================================
from typing import List

import pytest

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import UserFile
from onyx.server.features.projects.models import UserProjectSnapshot
from tests.integration.common_utils.managers.project import ProjectManager
from tests.integration.common_utils.reset import reset_all
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser


@pytest.fixture(scope="module", autouse=True)
def reset_for_module() -> None:
    """Reset all data once before running any tests in this module."""
    reset_all()


def test_projects_flow(
    reset_for_module: None,  # noqa: ARG001
    basic_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
) -> None:
    """End-to-end project flow covering creation, listing, files, instructions, deletion, and edge cases."""
    # Case 1: Project creation and listing
    ProjectManager.create(
        name="Test Project 1",
        user_performing_action=basic_user,
    )
    ProjectManager.create(
        name="Test Project 2",
        user_performing_action=basic_user,
    )

    projects = ProjectManager.get_all(user_performing_action=basic_user)
    assert len(projects) >= 2
    project_names = {p.name for p in projects}
    assert "Test Project 1" in project_names
    assert "Test Project 2" in project_names
    assert all(str(p.user_id) == basic_user.id for p in projects)

    # Case 2: File upload and management
    file_project = ProjectManager.create(
        name="File Test Project",
        user_performing_action=basic_user,
    )
    test_files = [
        ("test1.txt", b"This is test file 1 content"),
        ("test2.txt", b"This is test file 2 content"),
    ]
    upload_result = ProjectManager.upload_files(
        project_id=file_project.id,
        files=test_files,
        user_performing_action=basic_user,
    )
    assert len(upload_result.user_files) == 2
    assert len(upload_result.rejected_files) == 0
    project_files = ProjectManager.get_project_files(
        project_id=file_project.id,
        user_performing_action=basic_user,
    )
    assert len(project_files) == 2
    file_names = {f.name for f in project_files}
    assert "test1.txt" in file_names
    assert "test2.txt" in file_names

    # Case 3: Instructions set and update
    instructions_project = ProjectManager.create(
        name="Instructions Test Project",
        user_performing_action=basic_user,
    )
    instructions = "These are test project instructions"
    result = ProjectManager.set_instructions(
        project_id=instructions_project.id,
        instructions=instructions,
        user_performing_action=basic_user,
    )
    assert result == instructions
    new_instructions = "These are updated test project instructions"
    result = ProjectManager.set_instructions(
        project_id=instructions_project.id,
        instructions=new_instructions,
        user_performing_action=basic_user,
    )
    assert result == new_instructions

    # Case 4: Deletion with files (unlink but do not delete files)
    delete_file_project = ProjectManager.create(
        name="Deletion Test Project",
        user_performing_action=basic_user,
    )
    del_test_files = [
        ("delete_test1.txt", b"This is test file 1 content"),
        ("delete_test2.txt", b"This is test file 2 content"),
    ]
    ProjectManager.upload_files(
        project_id=delete_file_project.id,
        files=del_test_files,
        user_performing_action=basic_user,
    )
    del_project_files = ProjectManager.get_project_files(
        project_id=delete_file_project.id,
        user_performing_action=basic_user,
    )
    assert len(del_project_files) == 2
    deletion_success = ProjectManager.delete(
        project_id=delete_file_project.id,
        user_performing_action=basic_user,
    )
    assert deletion_success
    assert ProjectManager.verify_deleted(
        project_id=delete_file_project.id,
        user_performing_action=basic_user,
    )
    assert ProjectManager.verify_files_unlinked(
        project_id=delete_file_project.id,
        user_performing_action=basic_user,
    )
    with get_session_with_current_tenant() as db_session:
        file_ids = [f.id for f in del_project_files]
        remaining_files = (
            db_session.query(UserFile).filter(UserFile.id.in_(file_ids)).all()
        )
        assert len(remaining_files) == 2

    # Case 5: Deletion with chat sessions unlinked
    chat_project = ProjectManager.create(
        name="Chat Session Test Project",
        user_performing_action=basic_user,
    )
    deletion_success = ProjectManager.delete(
        project_id=chat_project.id,
        user_performing_action=basic_user,
    )
    assert deletion_success
    assert ProjectManager.verify_chat_sessions_unlinked(
        project_id=chat_project.id,
        user_performing_action=basic_user,
    )

    # Case 6: Multiple project operations
    projects_group: List[UserProjectSnapshot] = []
    for i in range(3):
        proj = ProjectManager.create(
            name=f"Multi-op Project {i}",
            user_performing_action=basic_user,
        )
        projects_group.append(proj)

    for i, proj in enumerate(projects_group):
        tfiles = [
            (f"multi_test{i}_1.txt", b"This is test file 1 content"),
            (f"multi_test{i}_2.txt", b"This is test file 2 content"),
        ]
        ProjectManager.upload_files(
            project_id=proj.id,
            files=tfiles,
            user_performing_action=basic_user,
        )

    for i, proj in enumerate(projects_group):
        instr = f"Instructions for project {i}"
        res = ProjectManager.set_instructions(
            project_id=proj.id,
            instructions=instr,
            user_performing_action=basic_user,
        )
        assert res == instr

    for proj in projects_group:
        proj_files = ProjectManager.get_project_files(
            project_id=proj.id,
            user_performing_action=basic_user,
        )
        assert len(proj_files) == 2
        deletion_success = ProjectManager.delete(
            project_id=proj.id,
            user_performing_action=basic_user,
        )
        assert deletion_success
        assert ProjectManager.verify_deleted(
            project_id=proj.id,
            user_performing_action=basic_user,
        )
        assert ProjectManager.verify_files_unlinked(
            project_id=proj.id,
            user_performing_action=basic_user,
        )
        with get_session_with_current_tenant() as db_session:
            file_ids = [f.id for f in proj_files]
            remaining_files = (
                db_session.query(UserFile).filter(UserFile.id.in_(file_ids)).all()
            )
            assert len(remaining_files) == 2

    # Case 7: Edge cases
    with pytest.raises(Exception):
        ProjectManager.create(
            name="",
            user_performing_action=basic_user,
        )

    non_existent_id = 99999
    deletion_success = ProjectManager.delete(
        project_id=non_existent_id,
        user_performing_action=basic_user,
    )
    assert not deletion_success

    with pytest.raises(Exception):
        ProjectManager.set_instructions(
            project_id=non_existent_id,
            instructions="Test instructions",
            user_performing_action=basic_user,
        )

    with pytest.raises(Exception):
        ProjectManager.upload_files(
            project_id=non_existent_id,
            files=[("test.txt", b"content")],
            user_performing_action=basic_user,
        )

    long_name = "a" * 1000
    with pytest.raises(Exception):
        ProjectManager.create(
            name=long_name,
            user_performing_action=basic_user,
        )

    long_instr_project = ProjectManager.create(
        name="Long Instructions Test",
        user_performing_action=basic_user,
    )
    long_instructions = "a" * 10000
    result = ProjectManager.set_instructions(
        project_id=long_instr_project.id,
        instructions=long_instructions,
        user_performing_action=basic_user,
    )
    assert result == long_instructions


================================================
FILE: backend/tests/integration/tests/pruning/test_pruning.py
================================================
import http.server
import os
import shutil
import tempfile
import threading
from collections.abc import Generator
from contextlib import contextmanager
from datetime import datetime
from datetime import timezone
from time import sleep
from typing import Any

import uvicorn
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles

from onyx.server.documents.models import DocumentSource
from onyx.utils.logger import setup_logger
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.vespa import vespa_fixture

logger = setup_logger()


# FastAPI server for serving files
def create_fastapi_app(directory: str) -> FastAPI:
    app = FastAPI()

    # Mount the directory to serve static files
    app.mount("/", StaticFiles(directory=directory, html=True), name="static")

    return app


# as far as we know, this doesn't hang when crawled. This is good.
@contextmanager
def fastapi_server_context(
    directory: str, port: int = 8000
) -> Generator[None, None, None]:
    app = create_fastapi_app(directory)

    config = uvicorn.Config(app=app, host="0.0.0.0", port=port, log_level="info")
    server = uvicorn.Server(config)

    # Create a thread to run the FastAPI server
    server_thread = threading.Thread(target=server.run)
    server_thread.daemon = (
        True  # Ensures the thread will exit when the main program exits
    )

    try:
        # Start the server in the background
        server_thread.start()
        sleep(5)  # Give it a few seconds to start
        yield  # Yield control back to the calling function (context manager in use)
    finally:
        # Shutdown the server
        server.should_exit = True
        server_thread.join()


# Leaving this here for posterity and experimentation, but the reason we're
# not using this is python's web servers hang frequently when crawled
# this is obviously not good for a unit test
@contextmanager
def http_server_context(
    directory: str, port: int = 8000
) -> Generator[http.server.ThreadingHTTPServer, None, None]:
    # Create a handler that serves files from the specified directory
    def handler_class(
        *args: Any, **kwargs: Any
    ) -> http.server.SimpleHTTPRequestHandler:
        return http.server.SimpleHTTPRequestHandler(
            *args, directory=directory, **kwargs
        )

    # Create an HTTPServer instance
    httpd = http.server.ThreadingHTTPServer(("0.0.0.0", port), handler_class)

    # Define a thread that runs the server in the background
    server_thread = threading.Thread(target=httpd.serve_forever)
    server_thread.daemon = (
        True  # Ensures the thread will exit when the main program exits
    )

    try:
        # Start the server in the background
        server_thread.start()
        sleep(5)  # give it a few seconds to start
        yield httpd
    finally:
        # Shutdown the server and wait for the thread to finish
        httpd.shutdown()
        httpd.server_close()
        server_thread.join()


def test_web_pruning(
    reset: None, vespa_client: vespa_fixture  # noqa: ARG001
) -> None:  # noqa: ARG001
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # add api key to user
    APIKeyManager.create(
        user_performing_action=admin_user,
    )

    test_filename = os.path.realpath(__file__)
    test_directory = os.path.dirname(test_filename)
    with tempfile.TemporaryDirectory() as temp_dir:
        port = 8889

        website_src = os.path.join(test_directory, "website")
        website_tgt = os.path.join(temp_dir, "website")
        shutil.copytree(website_src, website_tgt)
        with fastapi_server_context(os.path.join(temp_dir, "website"), port):
            sleep(1)  # sleep a tiny bit before starting everything

            hostname = os.getenv("TEST_WEB_HOSTNAME", "localhost")
            config = {
                "base_url": f"http://{hostname}:{port}/",
                "web_connector_type": "recursive",
            }

            # store the time before we create the connector so that we know after
            # when the indexing should have started
            now = datetime.now(timezone.utc)

            # create connector
            cc_pair_1 = CCPairManager.create_from_scratch(
                source=DocumentSource.WEB,
                connector_specific_config=config,
                user_performing_action=admin_user,
            )

            CCPairManager.wait_for_indexing_completion(
                cc_pair_1, now, timeout=300, user_performing_action=admin_user
            )

            selected_cc_pair = CCPairManager.get_indexing_status_by_id(
                cc_pair_1.id, user_performing_action=admin_user
            )

            assert selected_cc_pair is not None, "cc_pair not found after indexing!"

            # used to be 15, but now
            # localhost:8889/ and localhost:8889/index.html are deduped
            assert selected_cc_pair.docs_indexed == 14

            logger.info("Removing about.html.")
            os.remove(os.path.join(website_tgt, "about.html"))
            logger.info("Removing courses.html.")
            os.remove(os.path.join(website_tgt, "courses.html"))

            now = datetime.now(timezone.utc)
            CCPairManager.prune(cc_pair_1, user_performing_action=admin_user)
            CCPairManager.wait_for_prune(
                cc_pair_1, now, timeout=300, user_performing_action=admin_user
            )

            selected_cc_pair = CCPairManager.get_indexing_status_by_id(
                cc_pair_1.id, user_performing_action=admin_user
            )
            assert selected_cc_pair is not None, "cc_pair not found after pruning!"
            assert selected_cc_pair.docs_indexed == 12

            # check vespa
            root_id = f"http://{hostname}:{port}/"
            index_id = f"http://{hostname}:{port}/index.html"
            about_id = f"http://{hostname}:{port}/about.html"
            courses_id = f"http://{hostname}:{port}/courses.html"

            doc_ids = [root_id, index_id, about_id, courses_id]
            retrieved_docs_dict = vespa_client.get_documents_by_id(doc_ids)["documents"]
            retrieved_docs = {
                doc["fields"]["document_id"]: doc["fields"]
                for doc in retrieved_docs_dict
            }

            # verify root exists in Vespa
            retrieved_doc = retrieved_docs.get(root_id)
            assert retrieved_doc

            # verify index.html does not exist in Vespa since it is a duplicate of root
            retrieved_doc = retrieved_docs.get(index_id)
            assert not retrieved_doc

            # verify about and courses do not exist
            retrieved_doc = retrieved_docs.get(about_id)
            assert not retrieved_doc

            retrieved_doc = retrieved_docs.get(courses_id)
            assert not retrieved_doc


================================================
FILE: backend/tests/integration/tests/pruning/website/about.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <title>Above Multi-purpose Free Bootstrap Responsive Template</title>
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta name="description" content="" />
    <meta name="author" content="http://webthemez.com" />
    <!-- css -->
    <link href="css/bootstrap.min.css" rel="stylesheet" />
    <link href="css/fancybox/jquery.fancybox.css" rel="stylesheet" />
    <link href="css/jcarousel.css" rel="stylesheet" />
    <link href="css/flexslider.css" rel="stylesheet" />
    <link href="css/style.css" rel="stylesheet" />

    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
    <!--[if lt IE 9]>
      <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
    <![endif]-->
  </head>
  <body>
    <div id="wrapper">
      <!-- start header -->
      <header>
        <div class="navbar navbar-default navbar-static-top">
          <div class="container">
            <div class="navbar-header">
              <button
                type="button"
                class="navbar-toggle"
                data-toggle="collapse"
                data-target=".navbar-collapse"
              >
                <span class="icon-bar"></span>
                <span class="icon-bar"></span>
                <span class="icon-bar"></span>
              </button>
              <a class="navbar-brand" href="index.html"
                ><img src="img/logo.png" alt="logo"
              /></a>
            </div>
            <div class="navbar-collapse collapse">
              <ul class="nav navbar-nav">
                <li><a href="index.html">Home</a></li>
                <li class="active"><a href="about.html">About Us</a></li>
                <li><a href="courses.html">Courses</a></li>
                <li><a href="portfolio.html">Portfolio</a></li>
                <li><a href="pricing.html">Pricing</a></li>
                <li><a href="contact.html">Contact</a></li>
              </ul>
            </div>
          </div>
        </div>
      </header>
      <!-- end header -->
      <section id="inner-headline">
        <div class="container">
          <div class="row">
            <div class="col-lg-12">
              <h2 class="pageTitle">About Us</h2>
            </div>
          </div>
        </div>
      </section>
      <section id="content">
        <div class="container">
          <div class="about">
            <div class="row">
              <div class="col-md-12">
                <div class="about-logo">
                  <h3>We are awesome <span class="color">TEAM</span></h3>
                  <p>
                    Sed ut perspiciaatis unde omnis iste natus error sit
                    voluptatem accusantium doloremque laudantium, totam rem
                    aperiam, eaque ipsa quae ab illo inventore veritatis et
                    quasi architecto beatae vitae dicta sunt explicabo. Nemo
                    enim ipsam voluptatem quia voluptas
                  </p>
                  <p>
                    Sed ut perspiciaatis unde omnis iste natus error sit
                    voluptatem accusantium doloremque laudantium, totam rem
                    aperiam, eaque ipsa quae ab illo inventore veritatis et
                    quasi architecto beatae vitae dicta sunt explicabo. Nemo
                    enim ipsam voluptatem quia voluptas
                  </p>
                </div>
                <a href="#" class="btn btn-color">Read more</a>
              </div>
            </div>
            <br />

            <div class="row">
              <div class="col-md-6">
                <img src="img/section-image-1.png" alt="" />
                <div class="space"></div>
              </div>
              <div class="col-md-6">
                <p>
                  Lorem ipsum dolor sit amet, cadipisicing sit amet, consectetur
                  adipisicing elit. Atque sed, quidem quis praesentium, ut unde
                  fuga error commodi architecto, laudantium culpa tenetur at id,
                  beatae pet.
                </p>
                <p>
                  Lorem ipsum dolor sit amet, consectetur adipisicing elit.
                  adipisicing sit amet, consectetur adipisicing elit. Atque sed,
                  quidem quis praesentium,m deserunt.
                </p>
                <ul class="list-unstyled">
                  <li>
                    <i class="fa fa-arrow-circle-right pr-10 colored"></i> Lorem
                    ipsum enimdolor sit amet
                  </li>
                  <li>
                    <i class="fa fa-arrow-circle-right pr-10 colored"></i>
                    Explicabo deleniti neque aliquid
                  </li>
                  <li>
                    <i class="fa fa-arrow-circle-right pr-10 colored"></i>
                    Consectetur adipisicing elit
                  </li>
                  <li>
                    <i class="fa fa-arrow-circle-right pr-10 colored"></i> Lorem
                    ipsum dolor sit amet
                  </li>
                  <li>
                    <i class="fa fa-arrow-circle-right pr-10 colored"></i> Quo
                    issimos molest quibusdam temporibus
                  </li>
                </ul>
              </div>
            </div>
            <br />
            <hr />
            <br />
            <div class="row">
              <div class="col-md-4">
                <!-- Heading and para -->
                <div class="block-heading-two">
                  <h3><span>Why Choose Us?</span></h3>
                </div>
                <p>
                  Sed ut perspiciaatis unde omnis iste natus error sit
                  voluptatem accusantium doloremque laudantium, totam rem
                  aperiam, eaque ipsa quae ab illo inventore veritatis et quasi
                  architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam
                  voluptatem quia voluptas sit aspernatur. <br /><br />Sed ut
                  perspiciaatis iste natus error sit voluptatem probably haven't
                  heard of them accusamus.
                </p>
              </div>
              <div class="col-md-4">
                <div class="block-heading-two">
                  <h3><span>Our Solution</span></h3>
                </div>
                <!-- Accordion starts -->
                <div class="panel-group" id="accordion-alt3">
                  <!-- Panel. Use "panel-XXX" class for different colors. Replace "XXX" with color. -->
                  <div class="panel">
                    <!-- Panel heading -->
                    <div class="panel-heading">
                      <h4 class="panel-title">
                        <a
                          data-toggle="collapse"
                          data-parent="#accordion-alt3"
                          href="#collapseOne-alt3"
                        >
                          <i class="fa fa-angle-right"></i> Accordion Heading
                          Text Item # 1
                        </a>
                      </h4>
                    </div>
                    <div id="collapseOne-alt3" class="panel-collapse collapse">
                      <!-- Panel body -->
                      <div class="panel-body">
                        Sed ut perspiciaatis unde omnis iste natus error sit
                        voluptatem accusantium doloremque laudantium, totam rem
                        aperiam, eaque ipsa quae ab illo inventore veritatis et
                        quasi architecto beatae vitae dicta sunt explicabo. Nemo
                        enim ipsam voluptatem quia voluptas
                      </div>
                    </div>
                  </div>
                  <div class="panel">
                    <div class="panel-heading">
                      <h4 class="panel-title">
                        <a
                          data-toggle="collapse"
                          data-parent="#accordion-alt3"
                          href="#collapseTwo-alt3"
                        >
                          <i class="fa fa-angle-right"></i> Accordion Heading
                          Text Item # 2
                        </a>
                      </h4>
                    </div>
                    <div id="collapseTwo-alt3" class="panel-collapse collapse">
                      <div class="panel-body">
                        Sed ut perspiciaatis unde omnis iste natus error sit
                        voluptatem accusantium doloremque laudantium, totam rem
                        aperiam, eaque ipsa quae ab illo inventore veritatis et
                        quasi architecto beatae vitae dicta sunt explicabo. Nemo
                        enim ipsam voluptatem quia voluptas
                      </div>
                    </div>
                  </div>
                  <div class="panel">
                    <div class="panel-heading">
                      <h4 class="panel-title">
                        <a
                          data-toggle="collapse"
                          data-parent="#accordion-alt3"
                          href="#collapseThree-alt3"
                        >
                          <i class="fa fa-angle-right"></i> Accordion Heading
                          Text Item # 3
                        </a>
                      </h4>
                    </div>
                    <div
                      id="collapseThree-alt3"
                      class="panel-collapse collapse"
                    >
                      <div class="panel-body">
                        Sed ut perspiciaatis unde omnis iste natus error sit
                        voluptatem accusantium doloremque laudantium, totam rem
                        aperiam, eaque ipsa quae ab illo inventore veritatis et
                        quasi architecto beatae vitae dicta sunt explicabo. Nemo
                        enim ipsam voluptatem quia voluptas
                      </div>
                    </div>
                  </div>
                  <div class="panel">
                    <div class="panel-heading">
                      <h4 class="panel-title">
                        <a
                          data-toggle="collapse"
                          data-parent="#accordion-alt3"
                          href="#collapseFour-alt3"
                        >
                          <i class="fa fa-angle-right"></i> Accordion Heading
                          Text Item # 4
                        </a>
                      </h4>
                    </div>
                    <div id="collapseFour-alt3" class="panel-collapse collapse">
                      <div class="panel-body">
                        Sed ut perspiciaatis unde omnis iste natus error sit
                        voluptatem accusantium doloremque laudantium, totam rem
                        aperiam, eaque ipsa quae ab illo inventore veritatis et
                        quasi architecto beatae vitae dicta sunt explicabo. Nemo
                        enim ipsam voluptatem quia voluptas
                      </div>
                    </div>
                  </div>
                </div>
                <!-- Accordion ends -->
              </div>

              <div class="col-md-4">
                <div class="block-heading-two">
                  <h3><span>Our Expertise</span></h3>
                </div>
                <h6>Web Development</h6>
                <div class="progress pb-sm">
                  <!-- White color (progress-bar-white) -->
                  <div
                    class="progress-bar progress-bar-red"
                    role="progressbar"
                    aria-valuenow="40"
                    aria-valuemin="0"
                    aria-valuemax="100"
                    style="width: 40%"
                  >
                    <span class="sr-only">40% Complete (success)</span>
                  </div>
                </div>
                <h6>Designing</h6>
                <div class="progress pb-sm">
                  <div
                    class="progress-bar progress-bar-green"
                    role="progressbar"
                    aria-valuenow="60"
                    aria-valuemin="0"
                    aria-valuemax="100"
                    style="width: 60%"
                  >
                    <span class="sr-only">40% Complete (success)</span>
                  </div>
                </div>
                <h6>User Experience</h6>
                <div class="progress pb-sm">
                  <div
                    class="progress-bar progress-bar-lblue"
                    role="progressbar"
                    aria-valuenow="80"
                    aria-valuemin="0"
                    aria-valuemax="100"
                    style="width: 80%"
                  >
                    <span class="sr-only">40% Complete (success)</span>
                  </div>
                </div>
                <h6>Development</h6>
                <div class="progress pb-sm">
                  <div
                    class="progress-bar progress-bar-yellow"
                    role="progressbar"
                    aria-valuenow="30"
                    aria-valuemin="0"
                    aria-valuemax="100"
                    style="width: 30%"
                  >
                    <span class="sr-only">40% Complete (success)</span>
                  </div>
                </div>
              </div>
            </div>

            <hr />
            <br />
            <!-- Our Team starts -->

            <!-- Heading -->
            <div class="block-heading-six">
              <h3 class="bg-color">Our Team</h3>
            </div>
            <br />

            <!-- Our team starts -->

            <div class="team-six">
              <div class="row">
                <div class="col-md-3 col-sm-6">
                  <!-- Team Member -->
                  <div class="team-member">
                    <!-- Image -->
                    <img class="img-responsive" src="img/team1.jpg" alt="" />
                    <!-- Name -->
                    <h4>Johne Doe</h4>
                    <span class="deg">Creative</span>
                  </div>
                </div>
                <div class="col-md-3 col-sm-6">
                  <!-- Team Member -->
                  <div class="team-member">
                    <!-- Image -->
                    <img class="img-responsive" src="img/team2.jpg" alt="" />
                    <!-- Name -->
                    <h4>Jennifer</h4>
                    <span class="deg">Programmer</span>
                  </div>
                </div>
                <div class="col-md-3 col-sm-6">
                  <!-- Team Member -->
                  <div class="team-member">
                    <!-- Image -->
                    <img class="img-responsive" src="img/team3.jpg" alt="" />
                    <!-- Name -->
                    <h4>Christean</h4>
                    <span class="deg">CEO</span>
                  </div>
                </div>
                <div class="col-md-3 col-sm-6">
                  <!-- Team Member -->
                  <div class="team-member">
                    <!-- Image -->
                    <img class="img-responsive" src="img/team4.jpg" alt="" />
                    <!-- Name -->
                    <h4>Kerinele rase</h4>
                    <span class="deg">Manager</span>
                  </div>
                </div>
              </div>
            </div>

            <!-- Our team ends -->
          </div>
        </div>
      </section>
      <footer>
        <div class="container">
          <div class="row">
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Our Contact</h5>
                <address>
                  <strong>Abovecompany Inc</strong><br />
                  JC Main Road, Near Silnile tower<br />
                  Pin-21542 NewYork US.
                </address>
                <p>
                  <i class="icon-phone"></i> (123) 456-789 - 1255-12584 <br />
                  <i class="icon-envelope-alt"></i> email@domainname.com
                </p>
              </div>
            </div>
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Quick Links</h5>
                <ul class="link-list">
                  <li><a href="#">Latest Events</a></li>
                  <li><a href="#">Terms and conditions</a></li>
                  <li><a href="#">Privacy policy</a></li>
                  <li><a href="#">Career</a></li>
                  <li><a href="#">Contact us</a></li>
                </ul>
              </div>
            </div>
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Latest posts</h5>
                <ul class="link-list">
                  <li>
                    <a href="#"
                      >Lorem ipsum dolor sit amet, consectetur adipiscing
                      elit.</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Pellentesque et pulvinar enim. Quisque at tempor
                      ligula</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Natus error sit voluptatem accusantium doloremque</a
                    >
                  </li>
                </ul>
              </div>
            </div>
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Recent News</h5>
                <ul class="link-list">
                  <li>
                    <a href="#"
                      >Lorem ipsum dolor sit amet, consectetur adipiscing
                      elit.</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Pellentesque et pulvinar enim. Quisque at tempor
                      ligula</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Natus error sit voluptatem accusantium doloremque</a
                    >
                  </li>
                </ul>
              </div>
            </div>
          </div>
        </div>
        <div id="sub-footer">
          <div class="container">
            <div class="row">
              <div class="col-lg-6">
                <div class="copyright">
                  <p>
                    <span
                      >&copy; Above Site All right reserved. Template By </span
                    ><a href="http://webthemez.com" target="_blank"
                      >WebThemez</a
                    >
                  </p>
                </div>
              </div>
              <div class="col-lg-6">
                <ul class="social-network">
                  <li>
                    <a href="#" data-placement="top" title="Facebook"
                      ><i class="fa fa-facebook"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Twitter"
                      ><i class="fa fa-twitter"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Linkedin"
                      ><i class="fa fa-linkedin"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Pinterest"
                      ><i class="fa fa-pinterest"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Google plus"
                      ><i class="fa fa-google-plus"></i
                    ></a>
                  </li>
                </ul>
              </div>
            </div>
          </div>
        </div>
      </footer>
    </div>
    <a href="#" class="scrollup"><i class="fa fa-angle-up active"></i></a>
    <!-- javascript
    ================================================== -->
    <!-- Placed at the end of the document so the pages load faster -->
    <script src="js/jquery.js"></script>
    <script src="js/jquery.easing.1.3.js"></script>
    <script src="js/bootstrap.min.js"></script>
    <script src="js/jquery.fancybox.pack.js"></script>
    <script src="js/jquery.fancybox-media.js"></script>
    <script src="js/portfolio/jquery.quicksand.js"></script>
    <script src="js/portfolio/setting.js"></script>
    <script src="js/jquery.flexslider.js"></script>
    <script src="js/animate.js"></script>
    <script src="js/custom.js"></script>
  </body>
</html>


================================================
FILE: backend/tests/integration/tests/pruning/website/contact.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <title>Above Multi-purpose Free Bootstrap Responsive Template</title>
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta name="description" content="" />
    <meta name="author" content="http://webthemez.com" />
    <!-- css -->
    <link href="css/bootstrap.min.css" rel="stylesheet" />
    <link href="css/fancybox/jquery.fancybox.css" rel="stylesheet" />
    <link href="css/jcarousel.css" rel="stylesheet" />
    <link href="css/flexslider.css" rel="stylesheet" />
    <link href="css/style.css" rel="stylesheet" />

    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
    <!--[if lt IE 9]>
      <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
    <![endif]-->
  </head>
  <body>
    <div id="wrapper">
      <!-- start header -->
      <header>
        <div class="navbar navbar-default navbar-static-top">
          <div class="container">
            <div class="navbar-header">
              <button
                type="button"
                class="navbar-toggle"
                data-toggle="collapse"
                data-target=".navbar-collapse"
              >
                <span class="icon-bar"></span>
                <span class="icon-bar"></span>
                <span class="icon-bar"></span>
              </button>
              <a class="navbar-brand" href="index.html"
                ><img src="img/logo.png" alt="logo"
              /></a>
            </div>
            <div class="navbar-collapse collapse">
              <ul class="nav navbar-nav">
                <li><a href="index.html">Home</a></li>
                <li><a href="about.html">About Us</a></li>
                <li><a href="courses.html">Courses</a></li>
                <li><a href="portfolio.html">Portfolio</a></li>
                <li><a href="pricing.html">Pricing</a></li>
                <li class="active"><a href="contact.html">Contact</a></li>
              </ul>
            </div>
          </div>
        </div>
      </header>
      <!-- end header -->
      <section id="inner-headline">
        <div class="container">
          <div class="row">
            <div class="col-lg-12">
              <h2 class="pageTitle">Contact Us</h2>
            </div>
          </div>
        </div>
      </section>
      <section id="content">
        <div class="container">
          <div class="row">
            <div class="col-md-12">
              <script
                type="text/javascript"
                src="http://maps.google.com/maps/api/js?sensor=false"
              ></script>
              <div style="overflow: hidden; height: 300px; width: 100%">
                <div id="gmap_canvas" style="height: 300px; width: 100%"></div>
                <style>
                  #gmap_canvas img {
                    max-width: none !important;
                    background: none !important;
                  }</style
                ><a
                  class="google-map-code"
                  href="http://www.trivoo.net"
                  id="get-map-data"
                  >trivoo</a
                >
              </div>
              <script type="text/javascript">
                function init_map() {
                  var myOptions = {
                    zoom: 14,
                    center: new google.maps.LatLng(
                      40.805478,
                      -73.96522499999998,
                    ),
                    mapTypeId: google.maps.MapTypeId.ROADMAP,
                  };
                  map = new google.maps.Map(
                    document.getElementById("gmap_canvas"),
                    myOptions,
                  );
                  marker = new google.maps.Marker({
                    map: map,
                    position: new google.maps.LatLng(
                      40.805478,
                      -73.96522499999998,
                    ),
                  });
                  infowindow = new google.maps.InfoWindow({
                    content:
                      "<b>The Breslin</b><br/>2880 Broadway<br/> New York",
                  });
                  google.maps.event.addListener(marker, "click", function () {
                    infowindow.open(map, marker);
                  });
                  infowindow.open(map, marker);
                }
                google.maps.event.addDomListener(window, "load", init_map);
              </script>
            </div>
          </div>

          <div class="row">
            <div class="col-md-6">
              <br />
              <div class="alert alert-success hidden" id="contactSuccess">
                <strong>Success!</strong> Your message has been sent to us.
              </div>
              <div class="alert alert-error hidden" id="contactError">
                <strong>Error!</strong> There was an error sending your message.
              </div>
              <div class="contact-form">
                <form id="contact-form" role="form" novalidate="novalidate">
                  <div class="form-group has-feedback">
                    <label for="name">Name*</label>
                    <input
                      type="text"
                      class="form-control"
                      id="name"
                      name="name"
                      placeholder=""
                    />
                    <i class="fa fa-user form-control-feedback"></i>
                  </div>
                  <div class="form-group has-feedback">
                    <label for="email">Email*</label>
                    <input
                      type="email"
                      class="form-control"
                      id="email"
                      name="email"
                      placeholder=""
                    />
                    <i class="fa fa-envelope form-control-feedback"></i>
                  </div>
                  <div class="form-group has-feedback">
                    <label for="subject">Subject*</label>
                    <input
                      type="text"
                      class="form-control"
                      id="subject"
                      name="subject"
                      placeholder=""
                    />
                    <i class="fa fa-navicon form-control-feedback"></i>
                  </div>
                  <div class="form-group has-feedback">
                    <label for="message">Message*</label>
                    <textarea
                      class="form-control"
                      rows="6"
                      id="message"
                      name="message"
                      placeholder=""
                    ></textarea>
                    <i class="fa fa-pencil form-control-feedback"></i>
                  </div>
                  <input type="submit" value="Submit" class="btn btn-default" />
                </form>
              </div>
            </div>
            <div class="col-md-6">
              <div class="span4">
                <div class="title-box clearfix">
                  <h3 class="title-box_primary">Contact info</h3>
                </div>
                <h5>
                  Lorem ipsum dolor sit amet, cadipisicing sit amet, consectetur
                  adipisicing elit. Atque sed, quidem quis praesentium.
                </h5>
                <p>
                  Lorem ipsum dolor sit amet, cadipisicing sit amet, consectetur
                  adipisicing elit. Lorem ipsum dolor sit amet, cadipisicing sit
                  amet, consectetur adipisicing elit. Atque sed, quidem quis
                  praesentium Atque sed, quidem quis praesentium, ut unde fuga
                  error commodi architecto, laudantium culpa tenetur at id,
                  beatae pet.<br />
                </p>
                <address>
                  <strong
                    >The Company Name.<br />
                    12345 St John Point,<br />
                    Brisbean, ABC 12 St 11.</strong
                  ><br />
                  Telephone: +1 234 567 890<br />
                  FAX: +1 234 567 890<br />
                  E-mail:
                  <a href="mailto:info@sitename.org">mail@sitename.org</a><br />
                </address>
              </div>
            </div>
          </div>
        </div>
      </section>
      <footer>
        <div class="container">
          <div class="row">
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Our Contact</h5>
                <address>
                  <strong>Abovecompany Inc</strong><br />
                  JC Main Road, Near Silnile tower<br />
                  Pin-21542 NewYork US.
                </address>
                <p>
                  <i class="icon-phone"></i> (123) 456-789 - 1255-12584 <br />
                  <i class="icon-envelope-alt"></i> email@domainname.com
                </p>
              </div>
            </div>
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Quick Links</h5>
                <ul class="link-list">
                  <li><a href="#">Latest Events</a></li>
                  <li><a href="#">Terms and conditions</a></li>
                  <li><a href="#">Privacy policy</a></li>
                  <li><a href="#">Career</a></li>
                  <li><a href="#">Contact us</a></li>
                </ul>
              </div>
            </div>
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Latest posts</h5>
                <ul class="link-list">
                  <li>
                    <a href="#"
                      >Lorem ipsum dolor sit amet, consectetur adipiscing
                      elit.</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Pellentesque et pulvinar enim. Quisque at tempor
                      ligula</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Natus error sit voluptatem accusantium doloremque</a
                    >
                  </li>
                </ul>
              </div>
            </div>
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Recent News</h5>
                <ul class="link-list">
                  <li>
                    <a href="#"
                      >Lorem ipsum dolor sit amet, consectetur adipiscing
                      elit.</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Pellentesque et pulvinar enim. Quisque at tempor
                      ligula</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Natus error sit voluptatem accusantium doloremque</a
                    >
                  </li>
                </ul>
              </div>
            </div>
          </div>
        </div>
        <div id="sub-footer">
          <div class="container">
            <div class="row">
              <div class="col-lg-6">
                <div class="copyright">
                  <p>
                    <span
                      >&copy; Above Site All right reserved. Template By </span
                    ><a href="http://webthemez.com" target="_blank"
                      >WebThemez</a
                    >
                  </p>
                </div>
              </div>
              <div class="col-lg-6">
                <ul class="social-network">
                  <li>
                    <a href="#" data-placement="top" title="Facebook"
                      ><i class="fa fa-facebook"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Twitter"
                      ><i class="fa fa-twitter"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Linkedin"
                      ><i class="fa fa-linkedin"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Pinterest"
                      ><i class="fa fa-pinterest"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Google plus"
                      ><i class="fa fa-google-plus"></i
                    ></a>
                  </li>
                </ul>
              </div>
            </div>
          </div>
        </div>
      </footer>
    </div>
    <a href="#" class="scrollup"><i class="fa fa-angle-up active"></i></a>
    <!-- javascript
    ================================================== -->
    <!-- Placed at the end of the document so the pages load faster -->
    <script src="js/jquery.js"></script>
    <script src="js/jquery.easing.1.3.js"></script>
    <script src="js/bootstrap.min.js"></script>
    <script src="js/jquery.fancybox.pack.js"></script>
    <script src="js/jquery.fancybox-media.js"></script>
    <script src="js/portfolio/jquery.quicksand.js"></script>
    <script src="js/portfolio/setting.js"></script>
    <script src="js/jquery.flexslider.js"></script>
    <script src="js/animate.js"></script>
    <script src="js/custom.js"></script>
    <script src="js/validate.js"></script>
  </body>
</html>


================================================
FILE: backend/tests/integration/tests/pruning/website/courses.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Above Multi-purpose Free Bootstrap Responsive Template</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="description" content="" />
<meta name="author" content="http://webthemez.com" />
 
<link href="css/bootstrap.min.css" rel="stylesheet" />
<link href="css/fancybox/jquery.fancybox.css" rel="stylesheet">
<link href="css/jcarousel.css" rel="stylesheet" />
<link href="css/flexslider.css" rel="stylesheet" />
<link href="css/style.css" rel="stylesheet" />
 
<!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
<!--[if lt IE 9]>
      <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
    <![endif]-->

</head>
<body>
<div id="wrapper">

	<!-- start header -->
		<header>
        <div class="navbar navbar-default navbar-static-top">
            <div class="container">
                <div class="navbar-header">
                    <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
                        <span class="icon-bar"></span>
                        <span class="icon-bar"></span>
                        <span class="icon-bar"></span>
                    </button>
                    <a class="navbar-brand" href="index.html"><img src="img/logo.png" alt="logo"/></a>
                </div>
                <div class="navbar-collapse collapse ">
                    <ul class="nav navbar-nav">
                        <li><a href="index.html">Home</a></li> 
						<li><a href="about.html">About Us</a></li>
						<li class="active"><a href="courses.html">Courses</a></li>
                        <li><a href="portfolio.html">Portfolio</a></li>
                        <li><a href="pricing.html">Pricing</a></li>
                        <li><a href="contact.html">Contact</a></li>
                    </ul>
                </div>
            </div>
        </div>
	</header><!-- end header -->
	<section id="inner-headline">
	<div class="container">
		<div class="row">
			<div class="col-lg-12">
				<h2 class="pageTitle">Courses</h2>
			</div>
		</div>
	</div>
	</section>
	 
	<section id="content">
		<div class="container">		 
				
<section class="course"> 

		<div class="row">
			<div class="col-lg-12">
				<div class=""><h3>Courses We Offer</h3><span class="clear spacer_responsive_hide_mobile " style="height:13px;display:block;"></span>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus, vero mollitia velit ad consectetur. Alias, laborum excepturi nihil autem nemo numquam, ipsa architecto non, magni consequuntur quam.</div>
			</div>
		</div>
<div class="row">
            <div class="col-md-4">
			<div class="textbox">
                <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
            <div class="col-md-4">
			<div class="textbox">
                  <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
			<div class="col-md-4">
			<div class="textbox">
                  <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
        </div>
		<div class="row">
            <div class="col-md-4">
			<div class="textbox">
                <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
            <div class="col-md-4">
			<div class="textbox">
                  <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
			<div class="col-md-4">
			<div class="textbox">
                  <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
        </div> 
</section>	<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus</p> </br>
        <div class="row service-v1 margin-bottom-40">
            <div class="col-md-4 md-margin-bottom-40">
               <img class="img-responsive" src="img/service1.jpg" alt="">   
                <h3>Web Development</h3>
                <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus</p>        
            </div>
            <div class="col-md-4">
                <img class="img-responsive" src="img/service2.jpg" alt="">            
                <h3>Mobile Development</h3>
                <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus</p>        
            </div>
            <div class="col-md-4 md-margin-bottom-40">
              <img class="img-responsive" src="img/service3.jpg" alt="">  
                <h3>Responsive Design</h3>
                <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus</p>        
            </div>
        </div> 
 

    </div>
    </section>
	<footer>
	<div class="container">
		<div class="row">
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Our Contact</h5>
					<address>
					<strong>Abovecompany Inc</strong><br>
					JC Main Road, Near Silnile tower<br>
					 Pin-21542 NewYork US.</address>
					<p>
						<i class="icon-phone"></i> (123) 456-789 - 1255-12584 <br>
						<i class="icon-envelope-alt"></i> email@domainname.com
					</p>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Quick Links</h5>
					<ul class="link-list">
						<li><a href="#">Latest Events</a></li>
						<li><a href="#">Terms and conditions</a></li>
						<li><a href="#">Privacy policy</a></li>
						<li><a href="#">Career</a></li>
						<li><a href="#">Contact us</a></li>
					</ul>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Latest posts</h5>
					<ul class="link-list">
						<li><a href="#">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>
						<li><a href="#">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>
						<li><a href="#">Natus error sit voluptatem accusantium doloremque</a></li>
					</ul>
				</div>
			</div>
			<div class="col-lg-3">
					<div class="widget">
					<h5 class="widgetheading">Recent News</h5>
					<ul class="link-list">
						<li><a href="#">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>
						<li><a href="#">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>
						<li><a href="#">Natus error sit voluptatem accusantium doloremque</a></li>
					</ul>
				</div>
			</div>
		</div>
	</div>
	<div id="sub-footer">
		<div class="container">
			<div class="row">
				<div class="col-lg-6">
					<div class="copyright">
						<p>
							<span>&copy; Above Site All right reserved. Template By </span><a href="http://webthemez.com" target="_blank">WebThemez</a>
						</p>
					</div>
				</div>
				<div class="col-lg-6">
					<ul class="social-network">
						<li><a href="#" data-placement="top" title="Facebook"><i class="fa fa-facebook"></i></a></li>
						<li><a href="#" data-placement="top" title="Twitter"><i class="fa fa-twitter"></i></a></li>
						<li><a href="#" data-placement="top" title="Linkedin"><i class="fa fa-linkedin"></i></a></li>
						<li><a href="#" data-placement="top" title="Pinterest"><i class="fa fa-pinterest"></i></a></li>
						<li><a href="#" data-placement="top" title="Google plus"><i class="fa fa-google-plus"></i></a></li>
					</ul>
				</div>
			</div>
		</div>
	</div>
	</footer>
</div>
<a href="#" class="scrollup"><i class="fa fa-angle-up active"></i></a>
<!-- javascript
    ================================================== -->
<!-- Placed at the end of the document so the pages load faster -->
<script src="js/jquery.js"></script>
<script src="js/jquery.easing.1.3.js"></script>
<script src="js/bootstrap.min.js"></script>
<script src="js/jquery.fancybox.pack.js"></script>
<script src="js/jquery.fancybox-media.js"></script> 
<script src="js/portfolio/jquery.quicksand.js"></script>
<script src="js/portfolio/setting.js"></script>
<script src="js/jquery.flexslider.js"></script>
<script src="js/animate.js"></script>
<script src="js/custom.js"></script>
</body>
</html>

================================================
FILE: backend/tests/integration/tests/pruning/website/css/animate.css
================================================
@charset "UTF-8";
/*
Animate.css - http://daneden.me/animate
Licensed under the MIT license

Copyright (c) 2013 Daniel Eden

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
body {
  /* Addresses a small issue in webkit: http://bit.ly/NEdoDq */
  -webkit-backface-visibility: hidden;
}
.animated {
  -webkit-animation-duration: 1s;
  -moz-animation-duration: 1s;
  -o-animation-duration: 1s;
  animation-duration: 1s;
  -webkit-animation-fill-mode: both;
  -moz-animation-fill-mode: both;
  -o-animation-fill-mode: both;
  animation-fill-mode: both;
}

.animated.hinge {
  -webkit-animation-duration: 2s;
  -moz-animation-duration: 2s;
  -o-animation-duration: 2s;
  animation-duration: 2s;
}

@-webkit-keyframes flash {
  0%,
  50%,
  100% {
    opacity: 1;
  }
  25%,
  75% {
    opacity: 0;
  }
}

@-moz-keyframes flash {
  0%,
  50%,
  100% {
    opacity: 1;
  }
  25%,
  75% {
    opacity: 0;
  }
}

@-o-keyframes flash {
  0%,
  50%,
  100% {
    opacity: 1;
  }
  25%,
  75% {
    opacity: 0;
  }
}

@keyframes flash {
  0%,
  50%,
  100% {
    opacity: 1;
  }
  25%,
  75% {
    opacity: 0;
  }
}

.flash {
  -webkit-animation-name: flash;
  -moz-animation-name: flash;
  -o-animation-name: flash;
  animation-name: flash;
}
@-webkit-keyframes shake {
  0%,
  100% {
    -webkit-transform: translateX(0);
  }
  10%,
  30%,
  50%,
  70%,
  90% {
    -webkit-transform: translateX(-10px);
  }
  20%,
  40%,
  60%,
  80% {
    -webkit-transform: translateX(10px);
  }
}

@-moz-keyframes shake {
  0%,
  100% {
    -moz-transform: translateX(0);
  }
  10%,
  30%,
  50%,
  70%,
  90% {
    -moz-transform: translateX(-10px);
  }
  20%,
  40%,
  60%,
  80% {
    -moz-transform: translateX(10px);
  }
}

@-o-keyframes shake {
  0%,
  100% {
    -o-transform: translateX(0);
  }
  10%,
  30%,
  50%,
  70%,
  90% {
    -o-transform: translateX(-10px);
  }
  20%,
  40%,
  60%,
  80% {
    -o-transform: translateX(10px);
  }
}

@keyframes shake {
  0%,
  100% {
    transform: translateX(0);
  }
  10%,
  30%,
  50%,
  70%,
  90% {
    transform: translateX(-10px);
  }
  20%,
  40%,
  60%,
  80% {
    transform: translateX(10px);
  }
}

.shake {
  -webkit-animation-name: shake;
  -moz-animation-name: shake;
  -o-animation-name: shake;
  animation-name: shake;
}
@-webkit-keyframes bounce {
  0%,
  20%,
  50%,
  80%,
  100% {
    -webkit-transform: translateY(0);
  }
  40% {
    -webkit-transform: translateY(-30px);
  }
  60% {
    -webkit-transform: translateY(-15px);
  }
}

@-moz-keyframes bounce {
  0%,
  20%,
  50%,
  80%,
  100% {
    -moz-transform: translateY(0);
  }
  40% {
    -moz-transform: translateY(-30px);
  }
  60% {
    -moz-transform: translateY(-15px);
  }
}

@-o-keyframes bounce {
  0%,
  20%,
  50%,
  80%,
  100% {
    -o-transform: translateY(0);
  }
  40% {
    -o-transform: translateY(-30px);
  }
  60% {
    -o-transform: translateY(-15px);
  }
}
@keyframes bounce {
  0%,
  20%,
  50%,
  80%,
  100% {
    transform: translateY(0);
  }
  40% {
    transform: translateY(-30px);
  }
  60% {
    transform: translateY(-15px);
  }
}

.bounce {
  -webkit-animation-name: bounce;
  -moz-animation-name: bounce;
  -o-animation-name: bounce;
  animation-name: bounce;
}
@-webkit-keyframes tada {
  0% {
    -webkit-transform: scale(1);
  }
  10%,
  20% {
    -webkit-transform: scale(0.9) rotate(-3deg);
  }
  30%,
  50%,
  70%,
  90% {
    -webkit-transform: scale(1.1) rotate(3deg);
  }
  40%,
  60%,
  80% {
    -webkit-transform: scale(1.1) rotate(-3deg);
  }
  100% {
    -webkit-transform: scale(1) rotate(0);
  }
}

@-moz-keyframes tada {
  0% {
    -moz-transform: scale(1);
  }
  10%,
  20% {
    -moz-transform: scale(0.9) rotate(-3deg);
  }
  30%,
  50%,
  70%,
  90% {
    -moz-transform: scale(1.1) rotate(3deg);
  }
  40%,
  60%,
  80% {
    -moz-transform: scale(1.1) rotate(-3deg);
  }
  100% {
    -moz-transform: scale(1) rotate(0);
  }
}

@-o-keyframes tada {
  0% {
    -o-transform: scale(1);
  }
  10%,
  20% {
    -o-transform: scale(0.9) rotate(-3deg);
  }
  30%,
  50%,
  70%,
  90% {
    -o-transform: scale(1.1) rotate(3deg);
  }
  40%,
  60%,
  80% {
    -o-transform: scale(1.1) rotate(-3deg);
  }
  100% {
    -o-transform: scale(1) rotate(0);
  }
}

@keyframes tada {
  0% {
    transform: scale(1);
  }
  10%,
  20% {
    transform: scale(0.9) rotate(-3deg);
  }
  30%,
  50%,
  70%,
  90% {
    transform: scale(1.1) rotate(3deg);
  }
  40%,
  60%,
  80% {
    transform: scale(1.1) rotate(-3deg);
  }
  100% {
    transform: scale(1) rotate(0);
  }
}

.tada {
  -webkit-animation-name: tada;
  -moz-animation-name: tada;
  -o-animation-name: tada;
  animation-name: tada;
}
@-webkit-keyframes swing {
  20%,
  40%,
  60%,
  80%,
  100% {
    -webkit-transform-origin: top center;
  }
  20% {
    -webkit-transform: rotate(15deg);
  }
  40% {
    -webkit-transform: rotate(-10deg);
  }
  60% {
    -webkit-transform: rotate(5deg);
  }
  80% {
    -webkit-transform: rotate(-5deg);
  }
  100% {
    -webkit-transform: rotate(0deg);
  }
}

@-moz-keyframes swing {
  20% {
    -moz-transform: rotate(15deg);
  }
  40% {
    -moz-transform: rotate(-10deg);
  }
  60% {
    -moz-transform: rotate(5deg);
  }
  80% {
    -moz-transform: rotate(-5deg);
  }
  100% {
    -moz-transform: rotate(0deg);
  }
}

@-o-keyframes swing {
  20% {
    -o-transform: rotate(15deg);
  }
  40% {
    -o-transform: rotate(-10deg);
  }
  60% {
    -o-transform: rotate(5deg);
  }
  80% {
    -o-transform: rotate(-5deg);
  }
  100% {
    -o-transform: rotate(0deg);
  }
}

@keyframes swing {
  20% {
    transform: rotate(15deg);
  }
  40% {
    transform: rotate(-10deg);
  }
  60% {
    transform: rotate(5deg);
  }
  80% {
    transform: rotate(-5deg);
  }
  100% {
    transform: rotate(0deg);
  }
}

.swing {
  -webkit-transform-origin: top center;
  -moz-transform-origin: top center;
  -o-transform-origin: top center;
  transform-origin: top center;
  -webkit-animation-name: swing;
  -moz-animation-name: swing;
  -o-animation-name: swing;
  animation-name: swing;
}
/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */

@-webkit-keyframes wobble {
  0% {
    -webkit-transform: translateX(0%);
  }
  15% {
    -webkit-transform: translateX(-25%) rotate(-5deg);
  }
  30% {
    -webkit-transform: translateX(20%) rotate(3deg);
  }
  45% {
    -webkit-transform: translateX(-15%) rotate(-3deg);
  }
  60% {
    -webkit-transform: translateX(10%) rotate(2deg);
  }
  75% {
    -webkit-transform: translateX(-5%) rotate(-1deg);
  }
  100% {
    -webkit-transform: translateX(0%);
  }
}

@-moz-keyframes wobble {
  0% {
    -moz-transform: translateX(0%);
  }
  15% {
    -moz-transform: translateX(-25%) rotate(-5deg);
  }
  30% {
    -moz-transform: translateX(20%) rotate(3deg);
  }
  45% {
    -moz-transform: translateX(-15%) rotate(-3deg);
  }
  60% {
    -moz-transform: translateX(10%) rotate(2deg);
  }
  75% {
    -moz-transform: translateX(-5%) rotate(-1deg);
  }
  100% {
    -moz-transform: translateX(0%);
  }
}

@-o-keyframes wobble {
  0% {
    -o-transform: translateX(0%);
  }
  15% {
    -o-transform: translateX(-25%) rotate(-5deg);
  }
  30% {
    -o-transform: translateX(20%) rotate(3deg);
  }
  45% {
    -o-transform: translateX(-15%) rotate(-3deg);
  }
  60% {
    -o-transform: translateX(10%) rotate(2deg);
  }
  75% {
    -o-transform: translateX(-5%) rotate(-1deg);
  }
  100% {
    -o-transform: translateX(0%);
  }
}

@keyframes wobble {
  0% {
    transform: translateX(0%);
  }
  15% {
    transform: translateX(-25%) rotate(-5deg);
  }
  30% {
    transform: translateX(20%) rotate(3deg);
  }
  45% {
    transform: translateX(-15%) rotate(-3deg);
  }
  60% {
    transform: translateX(10%) rotate(2deg);
  }
  75% {
    transform: translateX(-5%) rotate(-1deg);
  }
  100% {
    transform: translateX(0%);
  }
}

.wobble {
  -webkit-animation-name: wobble;
  -moz-animation-name: wobble;
  -o-animation-name: wobble;
  animation-name: wobble;
}
/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */

@-webkit-keyframes pulse {
  0% {
    -webkit-transform: scale(1);
  }
  50% {
    -webkit-transform: scale(1.1);
  }
  100% {
    -webkit-transform: scale(1);
  }
}
@-moz-keyframes pulse {
  0% {
    -moz-transform: scale(1);
  }
  50% {
    -moz-transform: scale(1.1);
  }
  100% {
    -moz-transform: scale(1);
  }
}
@-o-keyframes pulse {
  0% {
    -o-transform: scale(1);
  }
  50% {
    -o-transform: scale(1.1);
  }
  100% {
    -o-transform: scale(1);
  }
}
@keyframes pulse {
  0% {
    transform: scale(1);
  }
  50% {
    transform: scale(1.1);
  }
  100% {
    transform: scale(1);
  }
}

.pulse {
  -webkit-animation-name: pulse;
  -moz-animation-name: pulse;
  -o-animation-name: pulse;
  animation-name: pulse;
}
@-webkit-keyframes flip {
  0% {
    -webkit-transform: perspective(400px) rotateY(0);
    -webkit-animation-timing-function: ease-out;
  }
  40% {
    -webkit-transform: perspective(400px) translateZ(150px) rotateY(170deg);
    -webkit-animation-timing-function: ease-out;
  }
  50% {
    -webkit-transform: perspective(400px) translateZ(150px) rotateY(190deg)
      scale(1);
    -webkit-animation-timing-function: ease-in;
  }
  80% {
    -webkit-transform: perspective(400px) rotateY(360deg) scale(0.95);
    -webkit-animation-timing-function: ease-in;
  }
  100% {
    -webkit-transform: perspective(400px) scale(1);
    -webkit-animation-timing-function: ease-in;
  }
}
@-moz-keyframes flip {
  0% {
    -moz-transform: perspective(400px) rotateY(0);
    -moz-animation-timing-function: ease-out;
  }
  40% {
    -moz-transform: perspective(400px) translateZ(150px) rotateY(170deg);
    -moz-animation-timing-function: ease-out;
  }
  50% {
    -moz-transform: perspective(400px) translateZ(150px) rotateY(190deg)
      scale(1);
    -moz-animation-timing-function: ease-in;
  }
  80% {
    -moz-transform: perspective(400px) rotateY(360deg) scale(0.95);
    -moz-animation-timing-function: ease-in;
  }
  100% {
    -moz-transform: perspective(400px) scale(1);
    -moz-animation-timing-function: ease-in;
  }
}
@-o-keyframes flip {
  0% {
    -o-transform: perspective(400px) rotateY(0);
    -o-animation-timing-function: ease-out;
  }
  40% {
    -o-transform: perspective(400px) translateZ(150px) rotateY(170deg);
    -o-animation-timing-function: ease-out;
  }
  50% {
    -o-transform: perspective(400px) translateZ(150px) rotateY(190deg) scale(1);
    -o-animation-timing-function: ease-in;
  }
  80% {
    -o-transform: perspective(400px) rotateY(360deg) scale(0.95);
    -o-animation-timing-function: ease-in;
  }
  100% {
    -o-transform: perspective(400px) scale(1);
    -o-animation-timing-function: ease-in;
  }
}
@keyframes flip {
  0% {
    transform: perspective(400px) rotateY(0);
    animation-timing-function: ease-out;
  }
  40% {
    transform: perspective(400px) translateZ(150px) rotateY(170deg);
    animation-timing-function: ease-out;
  }
  50% {
    transform: perspective(400px) translateZ(150px) rotateY(190deg) scale(1);
    animation-timing-function: ease-in;
  }
  80% {
    transform: perspective(400px) rotateY(360deg) scale(0.95);
    animation-timing-function: ease-in;
  }
  100% {
    transform: perspective(400px) scale(1);
    animation-timing-function: ease-in;
  }
}

.flip {
  -webkit-backface-visibility: visible !important;
  -webkit-animation-name: flip;
  -moz-backface-visibility: visible !important;
  -moz-animation-name: flip;
  -o-backface-visibility: visible !important;
  -o-animation-name: flip;
  backface-visibility: visible !important;
  animation-name: flip;
}
@-webkit-keyframes flipInX {
  0% {
    -webkit-transform: perspective(400px) rotateX(90deg);
    opacity: 0;
  }

  40% {
    -webkit-transform: perspective(400px) rotateX(-10deg);
  }

  70% {
    -webkit-transform: perspective(400px) rotateX(10deg);
  }

  100% {
    -webkit-transform: perspective(400px) rotateX(0deg);
    opacity: 1;
  }
}
@-moz-keyframes flipInX {
  0% {
    -moz-transform: perspective(400px) rotateX(90deg);
    opacity: 0;
  }

  40% {
    -moz-transform: perspective(400px) rotateX(-10deg);
  }

  70% {
    -moz-transform: perspective(400px) rotateX(10deg);
  }

  100% {
    -moz-transform: perspective(400px) rotateX(0deg);
    opacity: 1;
  }
}
@-o-keyframes flipInX {
  0% {
    -o-transform: perspective(400px) rotateX(90deg);
    opacity: 0;
  }

  40% {
    -o-transform: perspective(400px) rotateX(-10deg);
  }

  70% {
    -o-transform: perspective(400px) rotateX(10deg);
  }

  100% {
    -o-transform: perspective(400px) rotateX(0deg);
    opacity: 1;
  }
}
@keyframes flipInX {
  0% {
    transform: perspective(400px) rotateX(90deg);
    opacity: 0;
  }

  40% {
    transform: perspective(400px) rotateX(-10deg);
  }

  70% {
    transform: perspective(400px) rotateX(10deg);
  }

  100% {
    transform: perspective(400px) rotateX(0deg);
    opacity: 1;
  }
}

.flipInX {
  -webkit-backface-visibility: visible !important;
  -webkit-animation-name: flipInX;
  -moz-backface-visibility: visible !important;
  -moz-animation-name: flipInX;
  -o-backface-visibility: visible !important;
  -o-animation-name: flipInX;
  backface-visibility: visible !important;
  animation-name: flipInX;
}
@-webkit-keyframes flipOutX {
  0% {
    -webkit-transform: perspective(400px) rotateX(0deg);
    opacity: 1;
  }
  100% {
    -webkit-transform: perspective(400px) rotateX(90deg);
    opacity: 0;
  }
}

@-moz-keyframes flipOutX {
  0% {
    -moz-transform: perspective(400px) rotateX(0deg);
    opacity: 1;
  }
  100% {
    -moz-transform: perspective(400px) rotateX(90deg);
    opacity: 0;
  }
}

@-o-keyframes flipOutX {
  0% {
    -o-transform: perspective(400px) rotateX(0deg);
    opacity: 1;
  }
  100% {
    -o-transform: perspective(400px) rotateX(90deg);
    opacity: 0;
  }
}

@keyframes flipOutX {
  0% {
    transform: perspective(400px) rotateX(0deg);
    opacity: 1;
  }
  100% {
    transform: perspective(400px) rotateX(90deg);
    opacity: 0;
  }
}

.flipOutX {
  -webkit-animation-name: flipOutX;
  -webkit-backface-visibility: visible !important;
  -moz-animation-name: flipOutX;
  -moz-backface-visibility: visible !important;
  -o-animation-name: flipOutX;
  -o-backface-visibility: visible !important;
  animation-name: flipOutX;
  backface-visibility: visible !important;
}
@-webkit-keyframes flipInY {
  0% {
    -webkit-transform: perspective(400px) rotateY(90deg);
    opacity: 0;
  }

  40% {
    -webkit-transform: perspective(400px) rotateY(-10deg);
  }

  70% {
    -webkit-transform: perspective(400px) rotateY(10deg);
  }

  100% {
    -webkit-transform: perspective(400px) rotateY(0deg);
    opacity: 1;
  }
}
@-moz-keyframes flipInY {
  0% {
    -moz-transform: perspective(400px) rotateY(90deg);
    opacity: 0;
  }

  40% {
    -moz-transform: perspective(400px) rotateY(-10deg);
  }

  70% {
    -moz-transform: perspective(400px) rotateY(10deg);
  }

  100% {
    -moz-transform: perspective(400px) rotateY(0deg);
    opacity: 1;
  }
}
@-o-keyframes flipInY {
  0% {
    -o-transform: perspective(400px) rotateY(90deg);
    opacity: 0;
  }

  40% {
    -o-transform: perspective(400px) rotateY(-10deg);
  }

  70% {
    -o-transform: perspective(400px) rotateY(10deg);
  }

  100% {
    -o-transform: perspective(400px) rotateY(0deg);
    opacity: 1;
  }
}
@keyframes flipInY {
  0% {
    transform: perspective(400px) rotateY(90deg);
    opacity: 0;
  }

  40% {
    transform: perspective(400px) rotateY(-10deg);
  }

  70% {
    transform: perspective(400px) rotateY(10deg);
  }

  100% {
    transform: perspective(400px) rotateY(0deg);
    opacity: 1;
  }
}

.flipInY {
  -webkit-backface-visibility: visible !important;
  -webkit-animation-name: flipInY;
  -moz-backface-visibility: visible !important;
  -moz-animation-name: flipInY;
  -o-backface-visibility: visible !important;
  -o-animation-name: flipInY;
  backface-visibility: visible !important;
  animation-name: flipInY;
}
@-webkit-keyframes flipOutY {
  0% {
    -webkit-transform: perspective(400px) rotateY(0deg);
    opacity: 1;
  }
  100% {
    -webkit-transform: perspective(400px) rotateY(90deg);
    opacity: 0;
  }
}
@-moz-keyframes flipOutY {
  0% {
    -moz-transform: perspective(400px) rotateY(0deg);
    opacity: 1;
  }
  100% {
    -moz-transform: perspective(400px) rotateY(90deg);
    opacity: 0;
  }
}
@-o-keyframes flipOutY {
  0% {
    -o-transform: perspective(400px) rotateY(0deg);
    opacity: 1;
  }
  100% {
    -o-transform: perspective(400px) rotateY(90deg);
    opacity: 0;
  }
}
@keyframes flipOutY {
  0% {
    transform: perspective(400px) rotateY(0deg);
    opacity: 1;
  }
  100% {
    transform: perspective(400px) rotateY(90deg);
    opacity: 0;
  }
}

.flipOutY {
  -webkit-backface-visibility: visible !important;
  -webkit-animation-name: flipOutY;
  -moz-backface-visibility: visible !important;
  -moz-animation-name: flipOutY;
  -o-backface-visibility: visible !important;
  -o-animation-name: flipOutY;
  backface-visibility: visible !important;
  animation-name: flipOutY;
}
@-webkit-keyframes fadeIn {
  0% {
    opacity: 0;
  }
  100% {
    opacity: 1;
  }
}

@-moz-keyframes fadeIn {
  0% {
    opacity: 0;
  }
  100% {
    opacity: 1;
  }
}

@-o-keyframes fadeIn {
  0% {
    opacity: 0;
  }
  100% {
    opacity: 1;
  }
}

@keyframes fadeIn {
  0% {
    opacity: 0;
  }
  100% {
    opacity: 1;
  }
}

.fadeIn {
  -webkit-animation-name: fadeIn;
  -moz-animation-name: fadeIn;
  -o-animation-name: fadeIn;
  animation-name: fadeIn;
}
@-webkit-keyframes fadeInUp {
  0% {
    opacity: 0;
    -webkit-transform: translateY(20px);
  }

  100% {
    opacity: 1;
    -webkit-transform: translateY(0);
  }
}

@-moz-keyframes fadeInUp {
  0% {
    opacity: 0;
    -moz-transform: translateY(20px);
  }

  100% {
    opacity: 1;
    -moz-transform: translateY(0);
  }
}

@-o-keyframes fadeInUp {
  0% {
    opacity: 0;
    -o-transform: translateY(20px);
  }

  100% {
    opacity: 1;
    -o-transform: translateY(0);
  }
}

@keyframes fadeInUp {
  0% {
    opacity: 0;
    transform: translateY(20px);
  }

  100% {
    opacity: 1;
    transform: translateY(0);
  }
}

.fadeInUp {
  -webkit-animation-name: fadeInUp;
  -moz-animation-name: fadeInUp;
  -o-animation-name: fadeInUp;
  animation-name: fadeInUp;
}
@-webkit-keyframes fadeInDown {
  0% {
    opacity: 0;
    -webkit-transform: translateY(-20px);
  }

  100% {
    opacity: 1;
    -webkit-transform: translateY(0);
  }
}

@-moz-keyframes fadeInDown {
  0% {
    opacity: 0;
    -moz-transform: translateY(-20px);
  }

  100% {
    opacity: 1;
    -moz-transform: translateY(0);
  }
}

@-o-keyframes fadeInDown {
  0% {
    opacity: 0;
    -o-transform: translateY(-20px);
  }

  100% {
    opacity: 1;
    -o-transform: translateY(0);
  }
}

@keyframes fadeInDown {
  0% {
    opacity: 0;
    transform: translateY(-20px);
  }

  100% {
    opacity: 1;
    transform: translateY(0);
  }
}

.fadeInDown {
  -webkit-animation-name: fadeInDown;
  -moz-animation-name: fadeInDown;
  -o-animation-name: fadeInDown;
  animation-name: fadeInDown;
}
@-webkit-keyframes fadeInLeft {
  0% {
    opacity: 0;
    -webkit-transform: translateX(-20px);
  }

  100% {
    opacity: 1;
    -webkit-transform: translateX(0);
  }
}

@-moz-keyframes fadeInLeft {
  0% {
    opacity: 0;
    -moz-transform: translateX(-20px);
  }

  100% {
    opacity: 1;
    -moz-transform: translateX(0);
  }
}

@-o-keyframes fadeInLeft {
  0% {
    opacity: 0;
    -o-transform: translateX(-20px);
  }

  100% {
    opacity: 1;
    -o-transform: translateX(0);
  }
}

@keyframes fadeInLeft {
  0% {
    opacity: 0;
    transform: translateX(-20px);
  }

  100% {
    opacity: 1;
    transform: translateX(0);
  }
}

.fadeInLeft {
  -webkit-animation-name: fadeInLeft;
  -moz-animation-name: fadeInLeft;
  -o-animation-name: fadeInLeft;
  animation-name: fadeInLeft;
}
@-webkit-keyframes fadeInRight {
  0% {
    opacity: 0;
    -webkit-transform: translateX(20px);
  }

  100% {
    opacity: 1;
    -webkit-transform: translateX(0);
  }
}

@-moz-keyframes fadeInRight {
  0% {
    opacity: 0;
    -moz-transform: translateX(20px);
  }

  100% {
    opacity: 1;
    -moz-transform: translateX(0);
  }
}

@-o-keyframes fadeInRight {
  0% {
    opacity: 0;
    -o-transform: translateX(20px);
  }

  100% {
    opacity: 1;
    -o-transform: translateX(0);
  }
}

@keyframes fadeInRight {
  0% {
    opacity: 0;
    transform: translateX(20px);
  }

  100% {
    opacity: 1;
    transform: translateX(0);
  }
}

.fadeInRight {
  -webkit-animation-name: fadeInRight;
  -moz-animation-name: fadeInRight;
  -o-animation-name: fadeInRight;
  animation-name: fadeInRight;
}
@-webkit-keyframes fadeInUpBig {
  0% {
    opacity: 0;
    -webkit-transform: translateY(2000px);
  }

  100% {
    opacity: 1;
    -webkit-transform: translateY(0);
  }
}

@-moz-keyframes fadeInUpBig {
  0% {
    opacity: 0;
    -moz-transform: translateY(2000px);
  }

  100% {
    opacity: 1;
    -moz-transform: translateY(0);
  }
}

@-o-keyframes fadeInUpBig {
  0% {
    opacity: 0;
    -o-transform: translateY(2000px);
  }

  100% {
    opacity: 1;
    -o-transform: translateY(0);
  }
}

@keyframes fadeInUpBig {
  0% {
    opacity: 0;
    transform: translateY(2000px);
  }

  100% {
    opacity: 1;
    transform: translateY(0);
  }
}

.fadeInUpBig {
  -webkit-animation-name: fadeInUpBig;
  -moz-animation-name: fadeInUpBig;
  -o-animation-name: fadeInUpBig;
  animation-name: fadeInUpBig;
}
@-webkit-keyframes fadeInDownBig {
  0% {
    opacity: 0;
    -webkit-transform: translateY(-2000px);
  }

  100% {
    opacity: 1;
    -webkit-transform: translateY(0);
  }
}

@-moz-keyframes fadeInDownBig {
  0% {
    opacity: 0;
    -moz-transform: translateY(-2000px);
  }

  100% {
    opacity: 1;
    -moz-transform: translateY(0);
  }
}

@-o-keyframes fadeInDownBig {
  0% {
    opacity: 0;
    -o-transform: translateY(-2000px);
  }

  100% {
    opacity: 1;
    -o-transform: translateY(0);
  }
}

@keyframes fadeInDownBig {
  0% {
    opacity: 0;
    transform: translateY(-2000px);
  }

  100% {
    opacity: 1;
    transform: translateY(0);
  }
}

.fadeInDownBig {
  -webkit-animation-name: fadeInDownBig;
  -moz-animation-name: fadeInDownBig;
  -o-animation-name: fadeInDownBig;
  animation-name: fadeInDownBig;
}
@-webkit-keyframes fadeInLeftBig {
  0% {
    opacity: 0;
    -webkit-transform: translateX(-2000px);
  }

  100% {
    opacity: 1;
    -webkit-transform: translateX(0);
  }
}
@-moz-keyframes fadeInLeftBig {
  0% {
    opacity: 0;
    -moz-transform: translateX(-2000px);
  }

  100% {
    opacity: 1;
    -moz-transform: translateX(0);
  }
}
@-o-keyframes fadeInLeftBig {
  0% {
    opacity: 0;
    -o-transform: translateX(-2000px);
  }

  100% {
    opacity: 1;
    -o-transform: translateX(0);
  }
}
@keyframes fadeInLeftBig {
  0% {
    opacity: 0;
    transform: translateX(-2000px);
  }

  100% {
    opacity: 1;
    transform: translateX(0);
  }
}

.fadeInLeftBig {
  -webkit-animation-name: fadeInLeftBig;
  -moz-animation-name: fadeInLeftBig;
  -o-animation-name: fadeInLeftBig;
  animation-name: fadeInLeftBig;
}
@-webkit-keyframes fadeInRightBig {
  0% {
    opacity: 0;
    -webkit-transform: translateX(2000px);
  }

  100% {
    opacity: 1;
    -webkit-transform: translateX(0);
  }
}

@-moz-keyframes fadeInRightBig {
  0% {
    opacity: 0;
    -moz-transform: translateX(2000px);
  }

  100% {
    opacity: 1;
    -moz-transform: translateX(0);
  }
}

@-o-keyframes fadeInRightBig {
  0% {
    opacity: 0;
    -o-transform: translateX(2000px);
  }

  100% {
    opacity: 1;
    -o-transform: translateX(0);
  }
}

@keyframes fadeInRightBig {
  0% {
    opacity: 0;
    transform: translateX(2000px);
  }

  100% {
    opacity: 1;
    transform: translateX(0);
  }
}

.fadeInRightBig {
  -webkit-animation-name: fadeInRightBig;
  -moz-animation-name: fadeInRightBig;
  -o-animation-name: fadeInRightBig;
  animation-name: fadeInRightBig;
}
@-webkit-keyframes fadeOut {
  0% {
    opacity: 1;
  }
  100% {
    opacity: 0;
  }
}

@-moz-keyframes fadeOut {
  0% {
    opacity: 1;
  }
  100% {
    opacity: 0;
  }
}

@-o-keyframes fadeOut {
  0% {
    opacity: 1;
  }
  100% {
    opacity: 0;
  }
}

@keyframes fadeOut {
  0% {
    opacity: 1;
  }
  100% {
    opacity: 0;
  }
}

.fadeOut {
  -webkit-animation-name: fadeOut;
  -moz-animation-name: fadeOut;
  -o-animation-name: fadeOut;
  animation-name: fadeOut;
}
@-webkit-keyframes fadeOutUp {
  0% {
    opacity: 1;
    -webkit-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateY(-20px);
  }
}
@-moz-keyframes fadeOutUp {
  0% {
    opacity: 1;
    -moz-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -moz-transform: translateY(-20px);
  }
}
@-o-keyframes fadeOutUp {
  0% {
    opacity: 1;
    -o-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -o-transform: translateY(-20px);
  }
}
@keyframes fadeOutUp {
  0% {
    opacity: 1;
    transform: translateY(0);
  }

  100% {
    opacity: 0;
    transform: translateY(-20px);
  }
}

.fadeOutUp {
  -webkit-animation-name: fadeOutUp;
  -moz-animation-name: fadeOutUp;
  -o-animation-name: fadeOutUp;
  animation-name: fadeOutUp;
}
@-webkit-keyframes fadeOutDown {
  0% {
    opacity: 1;
    -webkit-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateY(20px);
  }
}

@-moz-keyframes fadeOutDown {
  0% {
    opacity: 1;
    -moz-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -moz-transform: translateY(20px);
  }
}

@-o-keyframes fadeOutDown {
  0% {
    opacity: 1;
    -o-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -o-transform: translateY(20px);
  }
}

@keyframes fadeOutDown {
  0% {
    opacity: 1;
    transform: translateY(0);
  }

  100% {
    opacity: 0;
    transform: translateY(20px);
  }
}

.fadeOutDown {
  -webkit-animation-name: fadeOutDown;
  -moz-animation-name: fadeOutDown;
  -o-animation-name: fadeOutDown;
  animation-name: fadeOutDown;
}
@-webkit-keyframes fadeOutLeft {
  0% {
    opacity: 1;
    -webkit-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateX(-20px);
  }
}

@-moz-keyframes fadeOutLeft {
  0% {
    opacity: 1;
    -moz-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -moz-transform: translateX(-20px);
  }
}

@-o-keyframes fadeOutLeft {
  0% {
    opacity: 1;
    -o-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -o-transform: translateX(-20px);
  }
}

@keyframes fadeOutLeft {
  0% {
    opacity: 1;
    transform: translateX(0);
  }

  100% {
    opacity: 0;
    transform: translateX(-20px);
  }
}

.fadeOutLeft {
  -webkit-animation-name: fadeOutLeft;
  -moz-animation-name: fadeOutLeft;
  -o-animation-name: fadeOutLeft;
  animation-name: fadeOutLeft;
}
@-webkit-keyframes fadeOutRight {
  0% {
    opacity: 1;
    -webkit-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateX(20px);
  }
}

@-moz-keyframes fadeOutRight {
  0% {
    opacity: 1;
    -moz-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -moz-transform: translateX(20px);
  }
}

@-o-keyframes fadeOutRight {
  0% {
    opacity: 1;
    -o-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -o-transform: translateX(20px);
  }
}

@keyframes fadeOutRight {
  0% {
    opacity: 1;
    transform: translateX(0);
  }

  100% {
    opacity: 0;
    transform: translateX(20px);
  }
}

.fadeOutRight {
  -webkit-animation-name: fadeOutRight;
  -moz-animation-name: fadeOutRight;
  -o-animation-name: fadeOutRight;
  animation-name: fadeOutRight;
}
@-webkit-keyframes fadeOutUpBig {
  0% {
    opacity: 1;
    -webkit-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateY(-2000px);
  }
}

@-moz-keyframes fadeOutUpBig {
  0% {
    opacity: 1;
    -moz-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -moz-transform: translateY(-2000px);
  }
}

@-o-keyframes fadeOutUpBig {
  0% {
    opacity: 1;
    -o-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -o-transform: translateY(-2000px);
  }
}

@keyframes fadeOutUpBig {
  0% {
    opacity: 1;
    transform: translateY(0);
  }

  100% {
    opacity: 0;
    transform: translateY(-2000px);
  }
}

.fadeOutUpBig {
  -webkit-animation-name: fadeOutUpBig;
  -moz-animation-name: fadeOutUpBig;
  -o-animation-name: fadeOutUpBig;
  animation-name: fadeOutUpBig;
}
@-webkit-keyframes fadeOutDownBig {
  0% {
    opacity: 1;
    -webkit-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateY(2000px);
  }
}

@-moz-keyframes fadeOutDownBig {
  0% {
    opacity: 1;
    -moz-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -moz-transform: translateY(2000px);
  }
}

@-o-keyframes fadeOutDownBig {
  0% {
    opacity: 1;
    -o-transform: translateY(0);
  }

  100% {
    opacity: 0;
    -o-transform: translateY(2000px);
  }
}

@keyframes fadeOutDownBig {
  0% {
    opacity: 1;
    transform: translateY(0);
  }

  100% {
    opacity: 0;
    transform: translateY(2000px);
  }
}

.fadeOutDownBig {
  -webkit-animation-name: fadeOutDownBig;
  -moz-animation-name: fadeOutDownBig;
  -o-animation-name: fadeOutDownBig;
  animation-name: fadeOutDownBig;
}
@-webkit-keyframes fadeOutLeftBig {
  0% {
    opacity: 1;
    -webkit-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateX(-2000px);
  }
}

@-moz-keyframes fadeOutLeftBig {
  0% {
    opacity: 1;
    -moz-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -moz-transform: translateX(-2000px);
  }
}

@-o-keyframes fadeOutLeftBig {
  0% {
    opacity: 1;
    -o-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -o-transform: translateX(-2000px);
  }
}

@keyframes fadeOutLeftBig {
  0% {
    opacity: 1;
    transform: translateX(0);
  }

  100% {
    opacity: 0;
    transform: translateX(-2000px);
  }
}

.fadeOutLeftBig {
  -webkit-animation-name: fadeOutLeftBig;
  -moz-animation-name: fadeOutLeftBig;
  -o-animation-name: fadeOutLeftBig;
  animation-name: fadeOutLeftBig;
}
@-webkit-keyframes fadeOutRightBig {
  0% {
    opacity: 1;
    -webkit-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateX(2000px);
  }
}
@-moz-keyframes fadeOutRightBig {
  0% {
    opacity: 1;
    -moz-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -moz-transform: translateX(2000px);
  }
}
@-o-keyframes fadeOutRightBig {
  0% {
    opacity: 1;
    -o-transform: translateX(0);
  }

  100% {
    opacity: 0;
    -o-transform: translateX(2000px);
  }
}
@keyframes fadeOutRightBig {
  0% {
    opacity: 1;
    transform: translateX(0);
  }

  100% {
    opacity: 0;
    transform: translateX(2000px);
  }
}

.fadeOutRightBig {
  -webkit-animation-name: fadeOutRightBig;
  -moz-animation-name: fadeOutRightBig;
  -o-animation-name: fadeOutRightBig;
  animation-name: fadeOutRightBig;
}
@-webkit-keyframes bounceIn {
  0% {
    opacity: 0;
    -webkit-transform: scale(0.3);
  }

  50% {
    opacity: 1;
    -webkit-transform: scale(1.05);
  }

  70% {
    -webkit-transform: scale(0.9);
  }

  100% {
    -webkit-transform: scale(1);
  }
}

@-moz-keyframes bounceIn {
  0% {
    opacity: 0;
    -moz-transform: scale(0.3);
  }

  50% {
    opacity: 1;
    -moz-transform: scale(1.05);
  }

  70% {
    -moz-transform: scale(0.9);
  }

  100% {
    -moz-transform: scale(1);
  }
}

@-o-keyframes bounceIn {
  0% {
    opacity: 0;
    -o-transform: scale(0.3);
  }

  50% {
    opacity: 1;
    -o-transform: scale(1.05);
  }

  70% {
    -o-transform: scale(0.9);
  }

  100% {
    -o-transform: scale(1);
  }
}

@keyframes bounceIn {
  0% {
    opacity: 0;
    transform: scale(0.3);
  }

  50% {
    opacity: 1;
    transform: scale(1.05);
  }

  70% {
    transform: scale(0.9);
  }

  100% {
    transform: scale(1);
  }
}

.bounceIn {
  -webkit-animation-name: bounceIn;
  -moz-animation-name: bounceIn;
  -o-animation-name: bounceIn;
  animation-name: bounceIn;
}
@-webkit-keyframes bounceInUp {
  0% {
    opacity: 0;
    -webkit-transform: translateY(2000px);
  }

  60% {
    opacity: 1;
    -webkit-transform: translateY(-30px);
  }

  80% {
    -webkit-transform: translateY(10px);
  }

  100% {
    -webkit-transform: translateY(0);
  }
}
@-moz-keyframes bounceInUp {
  0% {
    opacity: 0;
    -moz-transform: translateY(2000px);
  }

  60% {
    opacity: 1;
    -moz-transform: translateY(-30px);
  }

  80% {
    -moz-transform: translateY(10px);
  }

  100% {
    -moz-transform: translateY(0);
  }
}

@-o-keyframes bounceInUp {
  0% {
    opacity: 0;
    -o-transform: translateY(2000px);
  }

  60% {
    opacity: 1;
    -o-transform: translateY(-30px);
  }

  80% {
    -o-transform: translateY(10px);
  }

  100% {
    -o-transform: translateY(0);
  }
}

@keyframes bounceInUp {
  0% {
    opacity: 0;
    transform: translateY(2000px);
  }

  60% {
    opacity: 1;
    transform: translateY(-30px);
  }

  80% {
    transform: translateY(10px);
  }

  100% {
    transform: translateY(0);
  }
}

.bounceInUp {
  -webkit-animation-name: bounceInUp;
  -moz-animation-name: bounceInUp;
  -o-animation-name: bounceInUp;
  animation-name: bounceInUp;
}
@-webkit-keyframes bounceInDown {
  0% {
    opacity: 0;
    -webkit-transform: translateY(-2000px);
  }

  60% {
    opacity: 1;
    -webkit-transform: translateY(30px);
  }

  80% {
    -webkit-transform: translateY(-10px);
  }

  100% {
    -webkit-transform: translateY(0);
  }
}

@-moz-keyframes bounceInDown {
  0% {
    opacity: 0;
    -moz-transform: translateY(-2000px);
  }

  60% {
    opacity: 1;
    -moz-transform: translateY(30px);
  }

  80% {
    -moz-transform: translateY(-10px);
  }

  100% {
    -moz-transform: translateY(0);
  }
}

@-o-keyframes bounceInDown {
  0% {
    opacity: 0;
    -o-transform: translateY(-2000px);
  }

  60% {
    opacity: 1;
    -o-transform: translateY(30px);
  }

  80% {
    -o-transform: translateY(-10px);
  }

  100% {
    -o-transform: translateY(0);
  }
}

@keyframes bounceInDown {
  0% {
    opacity: 0;
    transform: translateY(-2000px);
  }

  60% {
    opacity: 1;
    transform: translateY(30px);
  }

  80% {
    transform: translateY(-10px);
  }

  100% {
    transform: translateY(0);
  }
}

.bounceInDown {
  -webkit-animation-name: bounceInDown;
  -moz-animation-name: bounceInDown;
  -o-animation-name: bounceInDown;
  animation-name: bounceInDown;
}
@-webkit-keyframes bounceInLeft {
  0% {
    opacity: 0;
    -webkit-transform: translateX(-2000px);
  }

  60% {
    opacity: 1;
    -webkit-transform: translateX(30px);
  }

  80% {
    -webkit-transform: translateX(-10px);
  }

  100% {
    -webkit-transform: translateX(0);
  }
}

@-moz-keyframes bounceInLeft {
  0% {
    opacity: 0;
    -moz-transform: translateX(-2000px);
  }

  60% {
    opacity: 1;
    -moz-transform: translateX(30px);
  }

  80% {
    -moz-transform: translateX(-10px);
  }

  100% {
    -moz-transform: translateX(0);
  }
}

@-o-keyframes bounceInLeft {
  0% {
    opacity: 0;
    -o-transform: translateX(-2000px);
  }

  60% {
    opacity: 1;
    -o-transform: translateX(30px);
  }

  80% {
    -o-transform: translateX(-10px);
  }

  100% {
    -o-transform: translateX(0);
  }
}

@keyframes bounceInLeft {
  0% {
    opacity: 0;
    transform: translateX(-2000px);
  }

  60% {
    opacity: 1;
    transform: translateX(30px);
  }

  80% {
    transform: translateX(-10px);
  }

  100% {
    transform: translateX(0);
  }
}

.bounceInLeft {
  -webkit-animation-name: bounceInLeft;
  -moz-animation-name: bounceInLeft;
  -o-animation-name: bounceInLeft;
  animation-name: bounceInLeft;
}
@-webkit-keyframes bounceInRight {
  0% {
    opacity: 0;
    -webkit-transform: translateX(2000px);
  }

  60% {
    opacity: 1;
    -webkit-transform: translateX(-30px);
  }

  80% {
    -webkit-transform: translateX(10px);
  }

  100% {
    -webkit-transform: translateX(0);
  }
}

@-moz-keyframes bounceInRight {
  0% {
    opacity: 0;
    -moz-transform: translateX(2000px);
  }

  60% {
    opacity: 1;
    -moz-transform: translateX(-30px);
  }

  80% {
    -moz-transform: translateX(10px);
  }

  100% {
    -moz-transform: translateX(0);
  }
}

@-o-keyframes bounceInRight {
  0% {
    opacity: 0;
    -o-transform: translateX(2000px);
  }

  60% {
    opacity: 1;
    -o-transform: translateX(-30px);
  }

  80% {
    -o-transform: translateX(10px);
  }

  100% {
    -o-transform: translateX(0);
  }
}

@keyframes bounceInRight {
  0% {
    opacity: 0;
    transform: translateX(2000px);
  }

  60% {
    opacity: 1;
    transform: translateX(-30px);
  }

  80% {
    transform: translateX(10px);
  }

  100% {
    transform: translateX(0);
  }
}

.bounceInRight {
  -webkit-animation-name: bounceInRight;
  -moz-animation-name: bounceInRight;
  -o-animation-name: bounceInRight;
  animation-name: bounceInRight;
}
@-webkit-keyframes bounceOut {
  0% {
    -webkit-transform: scale(1);
  }

  25% {
    -webkit-transform: scale(0.95);
  }

  50% {
    opacity: 1;
    -webkit-transform: scale(1.1);
  }

  100% {
    opacity: 0;
    -webkit-transform: scale(0.3);
  }
}

@-moz-keyframes bounceOut {
  0% {
    -moz-transform: scale(1);
  }

  25% {
    -moz-transform: scale(0.95);
  }

  50% {
    opacity: 1;
    -moz-transform: scale(1.1);
  }

  100% {
    opacity: 0;
    -moz-transform: scale(0.3);
  }
}

@-o-keyframes bounceOut {
  0% {
    -o-transform: scale(1);
  }

  25% {
    -o-transform: scale(0.95);
  }

  50% {
    opacity: 1;
    -o-transform: scale(1.1);
  }

  100% {
    opacity: 0;
    -o-transform: scale(0.3);
  }
}

@keyframes bounceOut {
  0% {
    transform: scale(1);
  }

  25% {
    transform: scale(0.95);
  }

  50% {
    opacity: 1;
    transform: scale(1.1);
  }

  100% {
    opacity: 0;
    transform: scale(0.3);
  }
}

.bounceOut {
  -webkit-animation-name: bounceOut;
  -moz-animation-name: bounceOut;
  -o-animation-name: bounceOut;
  animation-name: bounceOut;
}
@-webkit-keyframes bounceOutUp {
  0% {
    -webkit-transform: translateY(0);
  }

  20% {
    opacity: 1;
    -webkit-transform: translateY(20px);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateY(-2000px);
  }
}

@-moz-keyframes bounceOutUp {
  0% {
    -moz-transform: translateY(0);
  }

  20% {
    opacity: 1;
    -moz-transform: translateY(20px);
  }

  100% {
    opacity: 0;
    -moz-transform: translateY(-2000px);
  }
}

@-o-keyframes bounceOutUp {
  0% {
    -o-transform: translateY(0);
  }

  20% {
    opacity: 1;
    -o-transform: translateY(20px);
  }

  100% {
    opacity: 0;
    -o-transform: translateY(-2000px);
  }
}

@keyframes bounceOutUp {
  0% {
    transform: translateY(0);
  }

  20% {
    opacity: 1;
    transform: translateY(20px);
  }

  100% {
    opacity: 0;
    transform: translateY(-2000px);
  }
}

.bounceOutUp {
  -webkit-animation-name: bounceOutUp;
  -moz-animation-name: bounceOutUp;
  -o-animation-name: bounceOutUp;
  animation-name: bounceOutUp;
}
@-webkit-keyframes bounceOutDown {
  0% {
    -webkit-transform: translateY(0);
  }

  20% {
    opacity: 1;
    -webkit-transform: translateY(-20px);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateY(2000px);
  }
}

@-moz-keyframes bounceOutDown {
  0% {
    -moz-transform: translateY(0);
  }

  20% {
    opacity: 1;
    -moz-transform: translateY(-20px);
  }

  100% {
    opacity: 0;
    -moz-transform: translateY(2000px);
  }
}

@-o-keyframes bounceOutDown {
  0% {
    -o-transform: translateY(0);
  }

  20% {
    opacity: 1;
    -o-transform: translateY(-20px);
  }

  100% {
    opacity: 0;
    -o-transform: translateY(2000px);
  }
}

@keyframes bounceOutDown {
  0% {
    transform: translateY(0);
  }

  20% {
    opacity: 1;
    transform: translateY(-20px);
  }

  100% {
    opacity: 0;
    transform: translateY(2000px);
  }
}

.bounceOutDown {
  -webkit-animation-name: bounceOutDown;
  -moz-animation-name: bounceOutDown;
  -o-animation-name: bounceOutDown;
  animation-name: bounceOutDown;
}
@-webkit-keyframes bounceOutLeft {
  0% {
    -webkit-transform: translateX(0);
  }

  20% {
    opacity: 1;
    -webkit-transform: translateX(20px);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateX(-2000px);
  }
}

@-moz-keyframes bounceOutLeft {
  0% {
    -moz-transform: translateX(0);
  }

  20% {
    opacity: 1;
    -moz-transform: translateX(20px);
  }

  100% {
    opacity: 0;
    -moz-transform: translateX(-2000px);
  }
}

@-o-keyframes bounceOutLeft {
  0% {
    -o-transform: translateX(0);
  }

  20% {
    opacity: 1;
    -o-transform: translateX(20px);
  }

  100% {
    opacity: 0;
    -o-transform: translateX(-2000px);
  }
}

@keyframes bounceOutLeft {
  0% {
    transform: translateX(0);
  }

  20% {
    opacity: 1;
    transform: translateX(20px);
  }

  100% {
    opacity: 0;
    transform: translateX(-2000px);
  }
}

.bounceOutLeft {
  -webkit-animation-name: bounceOutLeft;
  -moz-animation-name: bounceOutLeft;
  -o-animation-name: bounceOutLeft;
  animation-name: bounceOutLeft;
}
@-webkit-keyframes bounceOutRight {
  0% {
    -webkit-transform: translateX(0);
  }

  20% {
    opacity: 1;
    -webkit-transform: translateX(-20px);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateX(2000px);
  }
}

@-moz-keyframes bounceOutRight {
  0% {
    -moz-transform: translateX(0);
  }

  20% {
    opacity: 1;
    -moz-transform: translateX(-20px);
  }

  100% {
    opacity: 0;
    -moz-transform: translateX(2000px);
  }
}

@-o-keyframes bounceOutRight {
  0% {
    -o-transform: translateX(0);
  }

  20% {
    opacity: 1;
    -o-transform: translateX(-20px);
  }

  100% {
    opacity: 0;
    -o-transform: translateX(2000px);
  }
}

@keyframes bounceOutRight {
  0% {
    transform: translateX(0);
  }

  20% {
    opacity: 1;
    transform: translateX(-20px);
  }

  100% {
    opacity: 0;
    transform: translateX(2000px);
  }
}

.bounceOutRight {
  -webkit-animation-name: bounceOutRight;
  -moz-animation-name: bounceOutRight;
  -o-animation-name: bounceOutRight;
  animation-name: bounceOutRight;
}
@-webkit-keyframes rotateIn {
  0% {
    -webkit-transform-origin: center center;
    -webkit-transform: rotate(-200deg);
    opacity: 0;
  }

  100% {
    -webkit-transform-origin: center center;
    -webkit-transform: rotate(0);
    opacity: 1;
  }
}
@-moz-keyframes rotateIn {
  0% {
    -moz-transform-origin: center center;
    -moz-transform: rotate(-200deg);
    opacity: 0;
  }

  100% {
    -moz-transform-origin: center center;
    -moz-transform: rotate(0);
    opacity: 1;
  }
}
@-o-keyframes rotateIn {
  0% {
    -o-transform-origin: center center;
    -o-transform: rotate(-200deg);
    opacity: 0;
  }

  100% {
    -o-transform-origin: center center;
    -o-transform: rotate(0);
    opacity: 1;
  }
}
@keyframes rotateIn {
  0% {
    transform-origin: center center;
    transform: rotate(-200deg);
    opacity: 0;
  }

  100% {
    transform-origin: center center;
    transform: rotate(0);
    opacity: 1;
  }
}

.rotateIn {
  -webkit-animation-name: rotateIn;
  -moz-animation-name: rotateIn;
  -o-animation-name: rotateIn;
  animation-name: rotateIn;
}
@-webkit-keyframes rotateInUpLeft {
  0% {
    -webkit-transform-origin: left bottom;
    -webkit-transform: rotate(90deg);
    opacity: 0;
  }

  100% {
    -webkit-transform-origin: left bottom;
    -webkit-transform: rotate(0);
    opacity: 1;
  }
}

@-moz-keyframes rotateInUpLeft {
  0% {
    -moz-transform-origin: left bottom;
    -moz-transform: rotate(90deg);
    opacity: 0;
  }

  100% {
    -moz-transform-origin: left bottom;
    -moz-transform: rotate(0);
    opacity: 1;
  }
}

@-o-keyframes rotateInUpLeft {
  0% {
    -o-transform-origin: left bottom;
    -o-transform: rotate(90deg);
    opacity: 0;
  }

  100% {
    -o-transform-origin: left bottom;
    -o-transform: rotate(0);
    opacity: 1;
  }
}

@keyframes rotateInUpLeft {
  0% {
    transform-origin: left bottom;
    transform: rotate(90deg);
    opacity: 0;
  }

  100% {
    transform-origin: left bottom;
    transform: rotate(0);
    opacity: 1;
  }
}

.rotateInUpLeft {
  -webkit-animation-name: rotateInUpLeft;
  -moz-animation-name: rotateInUpLeft;
  -o-animation-name: rotateInUpLeft;
  animation-name: rotateInUpLeft;
}
@-webkit-keyframes rotateInDownLeft {
  0% {
    -webkit-transform-origin: left bottom;
    -webkit-transform: rotate(-90deg);
    opacity: 0;
  }

  100% {
    -webkit-transform-origin: left bottom;
    -webkit-transform: rotate(0);
    opacity: 1;
  }
}

@-moz-keyframes rotateInDownLeft {
  0% {
    -moz-transform-origin: left bottom;
    -moz-transform: rotate(-90deg);
    opacity: 0;
  }

  100% {
    -moz-transform-origin: left bottom;
    -moz-transform: rotate(0);
    opacity: 1;
  }
}

@-o-keyframes rotateInDownLeft {
  0% {
    -o-transform-origin: left bottom;
    -o-transform: rotate(-90deg);
    opacity: 0;
  }

  100% {
    -o-transform-origin: left bottom;
    -o-transform: rotate(0);
    opacity: 1;
  }
}

@keyframes rotateInDownLeft {
  0% {
    transform-origin: left bottom;
    transform: rotate(-90deg);
    opacity: 0;
  }

  100% {
    transform-origin: left bottom;
    transform: rotate(0);
    opacity: 1;
  }
}

.rotateInDownLeft {
  -webkit-animation-name: rotateInDownLeft;
  -moz-animation-name: rotateInDownLeft;
  -o-animation-name: rotateInDownLeft;
  animation-name: rotateInDownLeft;
}
@-webkit-keyframes rotateInUpRight {
  0% {
    -webkit-transform-origin: right bottom;
    -webkit-transform: rotate(-90deg);
    opacity: 0;
  }

  100% {
    -webkit-transform-origin: right bottom;
    -webkit-transform: rotate(0);
    opacity: 1;
  }
}

@-moz-keyframes rotateInUpRight {
  0% {
    -moz-transform-origin: right bottom;
    -moz-transform: rotate(-90deg);
    opacity: 0;
  }

  100% {
    -moz-transform-origin: right bottom;
    -moz-transform: rotate(0);
    opacity: 1;
  }
}

@-o-keyframes rotateInUpRight {
  0% {
    -o-transform-origin: right bottom;
    -o-transform: rotate(-90deg);
    opacity: 0;
  }

  100% {
    -o-transform-origin: right bottom;
    -o-transform: rotate(0);
    opacity: 1;
  }
}

@keyframes rotateInUpRight {
  0% {
    transform-origin: right bottom;
    transform: rotate(-90deg);
    opacity: 0;
  }

  100% {
    transform-origin: right bottom;
    transform: rotate(0);
    opacity: 1;
  }
}

.rotateInUpRight {
  -webkit-animation-name: rotateInUpRight;
  -moz-animation-name: rotateInUpRight;
  -o-animation-name: rotateInUpRight;
  animation-name: rotateInUpRight;
}
@-webkit-keyframes rotateInDownRight {
  0% {
    -webkit-transform-origin: right bottom;
    -webkit-transform: rotate(90deg);
    opacity: 0;
  }

  100% {
    -webkit-transform-origin: right bottom;
    -webkit-transform: rotate(0);
    opacity: 1;
  }
}

@-moz-keyframes rotateInDownRight {
  0% {
    -moz-transform-origin: right bottom;
    -moz-transform: rotate(90deg);
    opacity: 0;
  }

  100% {
    -moz-transform-origin: right bottom;
    -moz-transform: rotate(0);
    opacity: 1;
  }
}

@-o-keyframes rotateInDownRight {
  0% {
    -o-transform-origin: right bottom;
    -o-transform: rotate(90deg);
    opacity: 0;
  }

  100% {
    -o-transform-origin: right bottom;
    -o-transform: rotate(0);
    opacity: 1;
  }
}

@keyframes rotateInDownRight {
  0% {
    transform-origin: right bottom;
    transform: rotate(90deg);
    opacity: 0;
  }

  100% {
    transform-origin: right bottom;
    transform: rotate(0);
    opacity: 1;
  }
}

.rotateInDownRight {
  -webkit-animation-name: rotateInDownRight;
  -moz-animation-name: rotateInDownRight;
  -o-animation-name: rotateInDownRight;
  animation-name: rotateInDownRight;
}
@-webkit-keyframes rotateOut {
  0% {
    -webkit-transform-origin: center center;
    -webkit-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -webkit-transform-origin: center center;
    -webkit-transform: rotate(200deg);
    opacity: 0;
  }
}

@-moz-keyframes rotateOut {
  0% {
    -moz-transform-origin: center center;
    -moz-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -moz-transform-origin: center center;
    -moz-transform: rotate(200deg);
    opacity: 0;
  }
}

@-o-keyframes rotateOut {
  0% {
    -o-transform-origin: center center;
    -o-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -o-transform-origin: center center;
    -o-transform: rotate(200deg);
    opacity: 0;
  }
}

@keyframes rotateOut {
  0% {
    transform-origin: center center;
    transform: rotate(0);
    opacity: 1;
  }

  100% {
    transform-origin: center center;
    transform: rotate(200deg);
    opacity: 0;
  }
}

.rotateOut {
  -webkit-animation-name: rotateOut;
  -moz-animation-name: rotateOut;
  -o-animation-name: rotateOut;
  animation-name: rotateOut;
}
@-webkit-keyframes rotateOutUpLeft {
  0% {
    -webkit-transform-origin: left bottom;
    -webkit-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -webkit-transform-origin: left bottom;
    -webkit-transform: rotate(-90deg);
    opacity: 0;
  }
}

@-moz-keyframes rotateOutUpLeft {
  0% {
    -moz-transform-origin: left bottom;
    -moz-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -moz-transform-origin: left bottom;
    -moz-transform: rotate(-90deg);
    opacity: 0;
  }
}

@-o-keyframes rotateOutUpLeft {
  0% {
    -o-transform-origin: left bottom;
    -o-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -o-transform-origin: left bottom;
    -o-transform: rotate(-90deg);
    opacity: 0;
  }
}

@keyframes rotateOutUpLeft {
  0% {
    transform-origin: left bottom;
    transform: rotate(0);
    opacity: 1;
  }

  100% {
    transform-origin: left bottom;
    transform: rotate(-90deg);
    opacity: 0;
  }
}

.rotateOutUpLeft {
  -webkit-animation-name: rotateOutUpLeft;
  -moz-animation-name: rotateOutUpLeft;
  -o-animation-name: rotateOutUpLeft;
  animation-name: rotateOutUpLeft;
}
@-webkit-keyframes rotateOutDownLeft {
  0% {
    -webkit-transform-origin: left bottom;
    -webkit-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -webkit-transform-origin: left bottom;
    -webkit-transform: rotate(90deg);
    opacity: 0;
  }
}

@-moz-keyframes rotateOutDownLeft {
  0% {
    -moz-transform-origin: left bottom;
    -moz-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -moz-transform-origin: left bottom;
    -moz-transform: rotate(90deg);
    opacity: 0;
  }
}

@-o-keyframes rotateOutDownLeft {
  0% {
    -o-transform-origin: left bottom;
    -o-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -o-transform-origin: left bottom;
    -o-transform: rotate(90deg);
    opacity: 0;
  }
}

@keyframes rotateOutDownLeft {
  0% {
    transform-origin: left bottom;
    transform: rotate(0);
    opacity: 1;
  }

  100% {
    transform-origin: left bottom;
    transform: rotate(90deg);
    opacity: 0;
  }
}

.rotateOutDownLeft {
  -webkit-animation-name: rotateOutDownLeft;
  -moz-animation-name: rotateOutDownLeft;
  -o-animation-name: rotateOutDownLeft;
  animation-name: rotateOutDownLeft;
}
@-webkit-keyframes rotateOutUpRight {
  0% {
    -webkit-transform-origin: right bottom;
    -webkit-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -webkit-transform-origin: right bottom;
    -webkit-transform: rotate(90deg);
    opacity: 0;
  }
}

@-moz-keyframes rotateOutUpRight {
  0% {
    -moz-transform-origin: right bottom;
    -moz-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -moz-transform-origin: right bottom;
    -moz-transform: rotate(90deg);
    opacity: 0;
  }
}

@-o-keyframes rotateOutUpRight {
  0% {
    -o-transform-origin: right bottom;
    -o-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -o-transform-origin: right bottom;
    -o-transform: rotate(90deg);
    opacity: 0;
  }
}

@keyframes rotateOutUpRight {
  0% {
    transform-origin: right bottom;
    transform: rotate(0);
    opacity: 1;
  }

  100% {
    transform-origin: right bottom;
    transform: rotate(90deg);
    opacity: 0;
  }
}

.rotateOutUpRight {
  -webkit-animation-name: rotateOutUpRight;
  -moz-animation-name: rotateOutUpRight;
  -o-animation-name: rotateOutUpRight;
  animation-name: rotateOutUpRight;
}
@-webkit-keyframes rotateOutDownRight {
  0% {
    -webkit-transform-origin: right bottom;
    -webkit-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -webkit-transform-origin: right bottom;
    -webkit-transform: rotate(-90deg);
    opacity: 0;
  }
}

@-moz-keyframes rotateOutDownRight {
  0% {
    -moz-transform-origin: right bottom;
    -moz-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -moz-transform-origin: right bottom;
    -moz-transform: rotate(-90deg);
    opacity: 0;
  }
}

@-o-keyframes rotateOutDownRight {
  0% {
    -o-transform-origin: right bottom;
    -o-transform: rotate(0);
    opacity: 1;
  }

  100% {
    -o-transform-origin: right bottom;
    -o-transform: rotate(-90deg);
    opacity: 0;
  }
}

@keyframes rotateOutDownRight {
  0% {
    transform-origin: right bottom;
    transform: rotate(0);
    opacity: 1;
  }

  100% {
    transform-origin: right bottom;
    transform: rotate(-90deg);
    opacity: 0;
  }
}

.rotateOutDownRight {
  -webkit-animation-name: rotateOutDownRight;
  -moz-animation-name: rotateOutDownRight;
  -o-animation-name: rotateOutDownRight;
  animation-name: rotateOutDownRight;
}
@-webkit-keyframes hinge {
  0% {
    -webkit-transform: rotate(0);
    -webkit-transform-origin: top left;
    -webkit-animation-timing-function: ease-in-out;
  }
  20%,
  60% {
    -webkit-transform: rotate(80deg);
    -webkit-transform-origin: top left;
    -webkit-animation-timing-function: ease-in-out;
  }
  40% {
    -webkit-transform: rotate(60deg);
    -webkit-transform-origin: top left;
    -webkit-animation-timing-function: ease-in-out;
  }
  80% {
    -webkit-transform: rotate(60deg) translateY(0);
    opacity: 1;
    -webkit-transform-origin: top left;
    -webkit-animation-timing-function: ease-in-out;
  }
  100% {
    -webkit-transform: translateY(700px);
    opacity: 0;
  }
}

@-moz-keyframes hinge {
  0% {
    -moz-transform: rotate(0);
    -moz-transform-origin: top left;
    -moz-animation-timing-function: ease-in-out;
  }
  20%,
  60% {
    -moz-transform: rotate(80deg);
    -moz-transform-origin: top left;
    -moz-animation-timing-function: ease-in-out;
  }
  40% {
    -moz-transform: rotate(60deg);
    -moz-transform-origin: top left;
    -moz-animation-timing-function: ease-in-out;
  }
  80% {
    -moz-transform: rotate(60deg) translateY(0);
    opacity: 1;
    -moz-transform-origin: top left;
    -moz-animation-timing-function: ease-in-out;
  }
  100% {
    -moz-transform: translateY(700px);
    opacity: 0;
  }
}

@-o-keyframes hinge {
  0% {
    -o-transform: rotate(0);
    -o-transform-origin: top left;
    -o-animation-timing-function: ease-in-out;
  }
  20%,
  60% {
    -o-transform: rotate(80deg);
    -o-transform-origin: top left;
    -o-animation-timing-function: ease-in-out;
  }
  40% {
    -o-transform: rotate(60deg);
    -o-transform-origin: top left;
    -o-animation-timing-function: ease-in-out;
  }
  80% {
    -o-transform: rotate(60deg) translateY(0);
    opacity: 1;
    -o-transform-origin: top left;
    -o-animation-timing-function: ease-in-out;
  }
  100% {
    -o-transform: translateY(700px);
    opacity: 0;
  }
}

@keyframes hinge {
  0% {
    transform: rotate(0);
    transform-origin: top left;
    animation-timing-function: ease-in-out;
  }
  20%,
  60% {
    transform: rotate(80deg);
    transform-origin: top left;
    animation-timing-function: ease-in-out;
  }
  40% {
    transform: rotate(60deg);
    transform-origin: top left;
    animation-timing-function: ease-in-out;
  }
  80% {
    transform: rotate(60deg) translateY(0);
    opacity: 1;
    transform-origin: top left;
    animation-timing-function: ease-in-out;
  }
  100% {
    transform: translateY(700px);
    opacity: 0;
  }
}

.hinge {
  -webkit-animation-name: hinge;
  -moz-animation-name: hinge;
  -o-animation-name: hinge;
  animation-name: hinge;
}
/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */

@-webkit-keyframes rollIn {
  0% {
    opacity: 0;
    -webkit-transform: translateX(-100%) rotate(-120deg);
  }
  100% {
    opacity: 1;
    -webkit-transform: translateX(0px) rotate(0deg);
  }
}

@-moz-keyframes rollIn {
  0% {
    opacity: 0;
    -moz-transform: translateX(-100%) rotate(-120deg);
  }
  100% {
    opacity: 1;
    -moz-transform: translateX(0px) rotate(0deg);
  }
}

@-o-keyframes rollIn {
  0% {
    opacity: 0;
    -o-transform: translateX(-100%) rotate(-120deg);
  }
  100% {
    opacity: 1;
    -o-transform: translateX(0px) rotate(0deg);
  }
}

@keyframes rollIn {
  0% {
    opacity: 0;
    transform: translateX(-100%) rotate(-120deg);
  }
  100% {
    opacity: 1;
    transform: translateX(0px) rotate(0deg);
  }
}

.rollIn {
  -webkit-animation-name: rollIn;
  -moz-animation-name: rollIn;
  -o-animation-name: rollIn;
  animation-name: rollIn;
}
/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */

@-webkit-keyframes rollOut {
  0% {
    opacity: 1;
    -webkit-transform: translateX(0px) rotate(0deg);
  }

  100% {
    opacity: 0;
    -webkit-transform: translateX(100%) rotate(120deg);
  }
}

@-moz-keyframes rollOut {
  0% {
    opacity: 1;
    -moz-transform: translateX(0px) rotate(0deg);
  }

  100% {
    opacity: 0;
    -moz-transform: translateX(100%) rotate(120deg);
  }
}

@-o-keyframes rollOut {
  0% {
    opacity: 1;
    -o-transform: translateX(0px) rotate(0deg);
  }

  100% {
    opacity: 0;
    -o-transform: translateX(100%) rotate(120deg);
  }
}

@keyframes rollOut {
  0% {
    opacity: 1;
    transform: translateX(0px) rotate(0deg);
  }

  100% {
    opacity: 0;
    transform: translateX(100%) rotate(120deg);
  }
}

.rollOut {
  -webkit-animation-name: rollOut;
  -moz-animation-name: rollOut;
  -o-animation-name: rollOut;
  animation-name: rollOut;
}

/* originally authored by Angelo Rohit - https://github.com/angelorohit */

@-webkit-keyframes lightSpeedIn {
  0% {
    -webkit-transform: translateX(100%) skewX(-30deg);
    opacity: 0;
  }
  60% {
    -webkit-transform: translateX(-20%) skewX(30deg);
    opacity: 1;
  }
  80% {
    -webkit-transform: translateX(0%) skewX(-15deg);
    opacity: 1;
  }
  100% {
    -webkit-transform: translateX(0%) skewX(0deg);
    opacity: 1;
  }
}

@-moz-keyframes lightSpeedIn {
  0% {
    -moz-transform: translateX(100%) skewX(-30deg);
    opacity: 0;
  }
  60% {
    -moz-transform: translateX(-20%) skewX(30deg);
    opacity: 1;
  }
  80% {
    -moz-transform: translateX(0%) skewX(-15deg);
    opacity: 1;
  }
  100% {
    -moz-transform: translateX(0%) skewX(0deg);
    opacity: 1;
  }
}

@-o-keyframes lightSpeedIn {
  0% {
    -o-transform: translateX(100%) skewX(-30deg);
    opacity: 0;
  }
  60% {
    -o-transform: translateX(-20%) skewX(30deg);
    opacity: 1;
  }
  80% {
    -o-transform: translateX(0%) skewX(-15deg);
    opacity: 1;
  }
  100% {
    -o-transform: translateX(0%) skewX(0deg);
    opacity: 1;
  }
}

@keyframes lightSpeedIn {
  0% {
    transform: translateX(100%) skewX(-30deg);
    opacity: 0;
  }
  60% {
    transform: translateX(-20%) skewX(30deg);
    opacity: 1;
  }
  80% {
    transform: translateX(0%) skewX(-15deg);
    opacity: 1;
  }
  100% {
    transform: translateX(0%) skewX(0deg);
    opacity: 1;
  }
}

.lightSpeedIn {
  -webkit-animation-name: lightSpeedIn;
  -moz-animation-name: lightSpeedIn;
  -o-animation-name: lightSpeedIn;
  animation-name: lightSpeedIn;

  -webkit-animation-timing-function: ease-out;
  -moz-animation-timing-function: ease-out;
  -o-animation-timing-function: ease-out;
  animation-timing-function: ease-out;
}

.animated.lightSpeedIn {
  -webkit-animation-duration: 0.5s;
  -moz-animation-duration: 0.5s;
  -o-animation-duration: 0.5s;
  animation-duration: 0.5s;
}

/* originally authored by Angelo Rohit - https://github.com/angelorohit */

@-webkit-keyframes lightSpeedOut {
  0% {
    -webkit-transform: translateX(0%) skewX(0deg);
    opacity: 1;
  }
  100% {
    -webkit-transform: translateX(100%) skewX(-30deg);
    opacity: 0;
  }
}

@-moz-keyframes lightSpeedOut {
  0% {
    -moz-transform: translateX(0%) skewX(0deg);
    opacity: 1;
  }
  100% {
    -moz-transform: translateX(100%) skewX(-30deg);
    opacity: 0;
  }
}

@-o-keyframes lightSpeedOut {
  0% {
    -o-transform: translateX(0%) skewX(0deg);
    opacity: 1;
  }
  100% {
    -o-transform: translateX(100%) skewX(-30deg);
    opacity: 0;
  }
}

@keyframes lightSpeedOut {
  0% {
    transform: translateX(0%) skewX(0deg);
    opacity: 1;
  }
  100% {
    transform: translateX(100%) skewX(-30deg);
    opacity: 0;
  }
}

.lightSpeedOut {
  -webkit-animation-name: lightSpeedOut;
  -moz-animation-name: lightSpeedOut;
  -o-animation-name: lightSpeedOut;
  animation-name: lightSpeedOut;

  -webkit-animation-timing-function: ease-in;
  -moz-animation-timing-function: ease-in;
  -o-animation-timing-function: ease-in;
  animation-timing-function: ease-in;
}

.animated.lightSpeedOut {
  -webkit-animation-duration: 0.25s;
  -moz-animation-duration: 0.25s;
  -o-animation-duration: 0.25s;
  animation-duration: 0.25s;
}

/* originally authored by Angelo Rohit - https://github.com/angelorohit */

@-webkit-keyframes wiggle {
  0% {
    -webkit-transform: skewX(9deg);
  }
  10% {
    -webkit-transform: skewX(-8deg);
  }
  20% {
    -webkit-transform: skewX(7deg);
  }
  30% {
    -webkit-transform: skewX(-6deg);
  }
  40% {
    -webkit-transform: skewX(5deg);
  }
  50% {
    -webkit-transform: skewX(-4deg);
  }
  60% {
    -webkit-transform: skewX(3deg);
  }
  70% {
    -webkit-transform: skewX(-2deg);
  }
  80% {
    -webkit-transform: skewX(1deg);
  }
  90% {
    -webkit-transform: skewX(0deg);
  }
  100% {
    -webkit-transform: skewX(0deg);
  }
}

@-moz-keyframes wiggle {
  0% {
    -moz-transform: skewX(9deg);
  }
  10% {
    -moz-transform: skewX(-8deg);
  }
  20% {
    -moz-transform: skewX(7deg);
  }
  30% {
    -moz-transform: skewX(-6deg);
  }
  40% {
    -moz-transform: skewX(5deg);
  }
  50% {
    -moz-transform: skewX(-4deg);
  }
  60% {
    -moz-transform: skewX(3deg);
  }
  70% {
    -moz-transform: skewX(-2deg);
  }
  80% {
    -moz-transform: skewX(1deg);
  }
  90% {
    -moz-transform: skewX(0deg);
  }
  100% {
    -moz-transform: skewX(0deg);
  }
}

@-o-keyframes wiggle {
  0% {
    -o-transform: skewX(9deg);
  }
  10% {
    -o-transform: skewX(-8deg);
  }
  20% {
    -o-transform: skewX(7deg);
  }
  30% {
    -o-transform: skewX(-6deg);
  }
  40% {
    -o-transform: skewX(5deg);
  }
  50% {
    -o-transform: skewX(-4deg);
  }
  60% {
    -o-transform: skewX(3deg);
  }
  70% {
    -o-transform: skewX(-2deg);
  }
  80% {
    -o-transform: skewX(1deg);
  }
  90% {
    -o-transform: skewX(0deg);
  }
  100% {
    -o-transform: skewX(0deg);
  }
}

@keyframes wiggle {
  0% {
    transform: skewX(9deg);
  }
  10% {
    transform: skewX(-8deg);
  }
  20% {
    transform: skewX(7deg);
  }
  30% {
    transform: skewX(-6deg);
  }
  40% {
    transform: skewX(5deg);
  }
  50% {
    transform: skewX(-4deg);
  }
  60% {
    transform: skewX(3deg);
  }
  70% {
    transform: skewX(-2deg);
  }
  80% {
    transform: skewX(1deg);
  }
  90% {
    transform: skewX(0deg);
  }
  100% {
    transform: skewX(0deg);
  }
}

.wiggle {
  -webkit-animation-name: wiggle;
  -moz-animation-name: wiggle;
  -o-animation-name: wiggle;
  animation-name: wiggle;

  -webkit-animation-timing-function: ease-in;
  -moz-animation-timing-function: ease-in;
  -o-animation-timing-function: ease-in;
  animation-timing-function: ease-in;
}

.animated.wiggle {
  -webkit-animation-duration: 0.75s;
  -moz-animation-duration: 0.75s;
  -o-animation-duration: 0.75s;
  animation-duration: 0.75s;
}


================================================
FILE: backend/tests/integration/tests/pruning/website/css/custom-fonts.css
================================================
/* ==================================================
Font-Face Icons
================================================== */

@font-face {
  font-family: "Icons";
  src: url("../fonts/customicon/Icons.eot");
  src:
    url("../fonts/customicon/Icons.eot?#iefix") format("embedded-opentype"),
    url("../fonts/customicon/Icons.woff") format("woff"),
    url("../fonts/customicon/Icons.ttf") format("truetype"),
    url("../fonts/customicon/Icons.svg#Icons") format("svg");
  font-weight: normal;
  font-style: normal;
}

/* Use the following CSS code if you want to use data attributes for inserting your icons */
[data-icon]:before {
  font-family: "Icons";
  content: attr(data-icon);
  speak: none;
  font-weight: normal;
  font-variant: normal;
  text-transform: none;
  line-height: 1;
  -webkit-font-smoothing: antialiased;
}

[class^="font-"]:before,
[class*=" font-"]:before {
  font-family: "Icons";
  speak: none;
  font-style: normal;
  font-weight: normal;
  font-variant: normal;
  text-transform: none;
  -webkit-font-smoothing: antialiased;
}

[class^="font-"],
[class*=" font-"] {
  display: inline-block;
  line-height: 1em;
}

/* Use the following CSS code if you want to have a class per icon */
/*
Instead of a list of all class selectors,
you can use the generic selector below, but it's slower:
[class*="font-icon-"] {
*/
.font-icon-zoom-out,
.font-icon-zoom-in,
.font-icon-wrench,
.font-icon-waves,
.font-icon-warning,
.font-icon-volume-up,
.font-icon-volume-off,
.font-icon-volume-down,
.font-icon-viewport,
.font-icon-user,
.font-icon-user-border,
.font-icon-upload,
.font-icon-upload-2,
.font-icon-unlock,
.font-icon-underline,
.font-icon-tint,
.font-icon-time,
.font-icon-text,
.font-icon-text-width,
.font-icon-text-height,
.font-icon-tags,
.font-icon-tag,
.font-icon-table,
.font-icon-strikethrough,
.font-icon-stop,
.font-icon-step-forward,
.font-icon-step-backward,
.font-icon-stars,
.font-icon-star,
.font-icon-star-line,
.font-icon-star-half,
.font-icon-sort,
.font-icon-sort-up,
.font-icon-sort-down,
.font-icon-social-zerply,
.font-icon-social-youtube,
.font-icon-social-yelp,
.font-icon-social-yahoo,
.font-icon-social-wordpress,
.font-icon-social-virb,
.font-icon-social-vimeo,
.font-icon-social-viddler,
.font-icon-social-twitter,
.font-icon-social-tumblr,
.font-icon-social-stumbleupon,
.font-icon-social-soundcloud,
.font-icon-social-skype,
.font-icon-social-share-this,
.font-icon-social-quora,
.font-icon-social-pinterest,
.font-icon-social-photobucket,
.font-icon-social-paypal,
.font-icon-social-myspace,
.font-icon-social-linkedin,
.font-icon-social-last-fm,
.font-icon-social-grooveshark,
.font-icon-social-google-plus,
.font-icon-social-github,
.font-icon-social-forrst,
.font-icon-social-flickr,
.font-icon-social-facebook,
.font-icon-social-evernote,
.font-icon-social-envato,
.font-icon-social-email,
.font-icon-social-dribbble,
.font-icon-social-digg,
.font-icon-social-deviant-art,
.font-icon-social-blogger,
.font-icon-social-behance,
.font-icon-social-bebo,
.font-icon-social-addthis,
.font-icon-social-500px,
.font-icon-sitemap,
.font-icon-signout,
.font-icon-signin,
.font-icon-signal,
.font-icon-shopping-cart,
.font-icon-search,
.font-icon-rss,
.font-icon-road,
.font-icon-retweet,
.font-icon-resize-vertical,
.font-icon-resize-vertical-2,
.font-icon-resize-small,
.font-icon-resize-horizontal,
.font-icon-resize-horizontal-2,
.font-icon-resize-fullscreen,
.font-icon-resize-full,
.font-icon-repeat,
.font-icon-reorder,
.font-icon-remove,
.font-icon-remove-sign,
.font-icon-remove-circle,
.font-icon-read-more,
.font-icon-random,
.font-icon-question-sign,
.font-icon-pushpin,
.font-icon-pushpin-2,
.font-icon-print,
.font-icon-plus,
.font-icon-plus-sign,
.font-icon-play,
.font-icon-picture,
.font-icon-phone,
.font-icon-phone-sign,
.font-icon-phone-boxed,
.font-icon-pause,
.font-icon-paste,
.font-icon-paper-clip,
.font-icon-ok,
.font-icon-ok-sign,
.font-icon-ok-circle,
.font-icon-music,
.font-icon-move,
.font-icon-money,
.font-icon-minus,
.font-icon-minus-sign,
.font-icon-map,
.font-icon-map-marker,
.font-icon-map-marker-2,
.font-icon-magnet,
.font-icon-magic,
.font-icon-lock,
.font-icon-list,
.font-icon-list-3,
.font-icon-list-2,
.font-icon-link,
.font-icon-layer,
.font-icon-key,
.font-icon-italic,
.font-icon-info,
.font-icon-indent-right,
.font-icon-indent-left,
.font-icon-inbox,
.font-icon-inbox-empty,
.font-icon-home,
.font-icon-heart,
.font-icon-heart-line,
.font-icon-headphones,
.font-icon-headphones-line,
.font-icon-headphones-line-2,
.font-icon-headphones-2,
.font-icon-hdd,
.font-icon-group,
.font-icon-grid,
.font-icon-grid-large,
.font-icon-globe_line,
.font-icon-glass,
.font-icon-glass_2,
.font-icon-gift,
.font-icon-forward,
.font-icon-font,
.font-icon-folder-open,
.font-icon-folder-close,
.font-icon-flag,
.font-icon-fire,
.font-icon-film,
.font-icon-file,
.font-icon-file-empty,
.font-icon-fast-forward,
.font-icon-fast-backward,
.font-icon-facetime,
.font-icon-eye,
.font-icon-eye_disable,
.font-icon-expand-view,
.font-icon-expand-view-3,
.font-icon-expand-view-2,
.font-icon-expand-vertical,
.font-icon-expand-horizontal,
.font-icon-exclamation,
.font-icon-email,
.font-icon-email_2,
.font-icon-eject,
.font-icon-edit,
.font-icon-edit-check,
.font-icon-download,
.font-icon-download_2,
.font-icon-dashboard,
.font-icon-credit-card,
.font-icon-copy,
.font-icon-comments,
.font-icon-comments-line,
.font-icon-comment,
.font-icon-comment-line,
.font-icon-columns,
.font-icon-columns-2,
.font-icon-cogs,
.font-icon-cog,
.font-icon-cloud,
.font-icon-check,
.font-icon-check-empty,
.font-icon-certificate,
.font-icon-camera,
.font-icon-calendar,
.font-icon-bullhorn,
.font-icon-briefcase,
.font-icon-bookmark,
.font-icon-book,
.font-icon-bolt,
.font-icon-bold,
.font-icon-blockquote,
.font-icon-bell,
.font-icon-beaker,
.font-icon-barcode,
.font-icon-ban-circle,
.font-icon-ban-chart,
.font-icon-ban-chart-2,
.font-icon-backward,
.font-icon-asterisk,
.font-icon-arrow-simple-up,
.font-icon-arrow-simple-up-circle,
.font-icon-arrow-simple-right,
.font-icon-arrow-simple-right-circle,
.font-icon-arrow-simple-left,
.font-icon-arrow-simple-left-circle,
.font-icon-arrow-simple-down,
.font-icon-arrow-simple-down-circle,
.font-icon-arrow-round-up,
.font-icon-arrow-round-up-circle,
.font-icon-arrow-round-right,
.font-icon-arrow-round-right-circle,
.font-icon-arrow-round-left,
.font-icon-arrow-round-left-circle,
.font-icon-arrow-round-down,
.font-icon-arrow-round-down-circle,
.font-icon-arrow-light-up,
.font-icon-arrow-light-round-up,
.font-icon-arrow-light-round-up-circle,
.font-icon-arrow-light-round-right,
.font-icon-arrow-light-round-right-circle,
.font-icon-arrow-light-round-left,
.font-icon-arrow-light-round-left-circle,
.font-icon-arrow-light-round-down,
.font-icon-arrow-light-round-down-circle,
.font-icon-arrow-light-right,
.font-icon-arrow-light-left,
.font-icon-arrow-light-down,
.font-icon-align-right,
.font-icon-align-left,
.font-icon-align-justify,
.font-icon-align-center,
.font-icon-adjust {
  font-family: "Icons";
  speak: none;
  font-style: normal;
  font-weight: normal;
  font-variant: normal;
  text-transform: none;
  line-height: 1;
  -webkit-font-smoothing: antialiased;
}
.font-icon-zoom-out:before {
  content: "\e000";
}
.font-icon-zoom-in:before {
  content: "\e001";
}
.font-icon-wrench:before {
  content: "\e002";
}
.font-icon-waves:before {
  content: "\e003";
}
.font-icon-warning:before {
  content: "\e004";
}
.font-icon-volume-up:before {
  content: "\e005";
}
.font-icon-volume-off:before {
  content: "\e006";
}
.font-icon-volume-down:before {
  content: "\e007";
}
.font-icon-viewport:before {
  content: "\e008";
}
.font-icon-user:before {
  content: "\e009";
}
.font-icon-user-border:before {
  content: "\e00a";
}
.font-icon-upload:before {
  content: "\e00b";
}
.font-icon-upload-2:before {
  content: "\e00c";
}
.font-icon-unlock:before {
  content: "\e00d";
}
.font-icon-underline:before {
  content: "\e00e";
}
.font-icon-tint:before {
  content: "\e00f";
}
.font-icon-time:before {
  content: "\e010";
}
.font-icon-text:before {
  content: "\e011";
}
.font-icon-text-width:before {
  content: "\e012";
}
.font-icon-text-height:before {
  content: "\e013";
}
.font-icon-tags:before {
  content: "\e014";
}
.font-icon-tag:before {
  content: "\e015";
}
.font-icon-table:before {
  content: "\e016";
}
.font-icon-strikethrough:before {
  content: "\e017";
}
.font-icon-stop:before {
  content: "\e018";
}
.font-icon-step-forward:before {
  content: "\e019";
}
.font-icon-step-backward:before {
  content: "\e01a";
}
.font-icon-stars:before {
  content: "\e01b";
}
.font-icon-star:before {
  content: "\e01c";
}
.font-icon-star-line:before {
  content: "\e01d";
}
.font-icon-star-half:before {
  content: "\e01e";
}
.font-icon-sort:before {
  content: "\e01f";
}
.font-icon-sort-up:before {
  content: "\e020";
}
.font-icon-sort-down:before {
  content: "\e021";
}
.font-icon-social-zerply:before {
  content: "\e022";
}
.font-icon-social-youtube:before {
  content: "\e023";
}
.font-icon-social-yelp:before {
  content: "\e024";
}
.font-icon-social-yahoo:before {
  content: "\e025";
}
.font-icon-social-wordpress:before {
  content: "\e026";
}
.font-icon-social-virb:before {
  content: "\e027";
}
.font-icon-social-vimeo:before {
  content: "\e028";
}
.font-icon-social-viddler:before {
  content: "\e029";
}
.font-icon-social-twitter:before {
  content: "\e02a";
}
.font-icon-social-tumblr:before {
  content: "\e02b";
}
.font-icon-social-stumbleupon:before {
  content: "\e02c";
}
.font-icon-social-soundcloud:before {
  content: "\e02d";
}
.font-icon-social-skype:before {
  content: "\e02e";
}
.font-icon-social-share-this:before {
  content: "\e02f";
}
.font-icon-social-quora:before {
  content: "\e030";
}
.font-icon-social-pinterest:before {
  content: "\e031";
}
.font-icon-social-photobucket:before {
  content: "\e032";
}
.font-icon-social-paypal:before {
  content: "\e033";
}
.font-icon-social-myspace:before {
  content: "\e034";
}
.font-icon-social-linkedin:before {
  content: "\e035";
}
.font-icon-social-last-fm:before {
  content: "\e036";
}
.font-icon-social-grooveshark:before {
  content: "\e037";
}
.font-icon-social-google-plus:before {
  content: "\e038";
}
.font-icon-social-github:before {
  content: "\e039";
}
.font-icon-social-forrst:before {
  content: "\e03a";
}
.font-icon-social-flickr:before {
  content: "\e03b";
}
.font-icon-social-facebook:before {
  content: "\e03c";
}
.font-icon-social-evernote:before {
  content: "\e03d";
}
.font-icon-social-envato:before {
  content: "\e03e";
}
.font-icon-social-email:before {
  content: "\e03f";
}
.font-icon-social-dribbble:before {
  content: "\e040";
}
.font-icon-social-digg:before {
  content: "\e041";
}
.font-icon-social-deviant-art:before {
  content: "\e042";
}
.font-icon-social-blogger:before {
  content: "\e043";
}
.font-icon-social-behance:before {
  content: "\e044";
}
.font-icon-social-bebo:before {
  content: "\e045";
}
.font-icon-social-addthis:before {
  content: "\e046";
}
.font-icon-social-500px:before {
  content: "\e047";
}
.font-icon-sitemap:before {
  content: "\e048";
}
.font-icon-signout:before {
  content: "\e049";
}
.font-icon-signin:before {
  content: "\e04a";
}
.font-icon-signal:before {
  content: "\e04b";
}
.font-icon-shopping-cart:before {
  content: "\e04c";
}
.font-icon-search:before {
  content: "\e04d";
}
.font-icon-rss:before {
  content: "\e04e";
}
.font-icon-road:before {
  content: "\e04f";
}
.font-icon-retweet:before {
  content: "\e050";
}
.font-icon-resize-vertical:before {
  content: "\e051";
}
.font-icon-resize-vertical-2:before {
  content: "\e052";
}
.font-icon-resize-small:before {
  content: "\e053";
}
.font-icon-resize-horizontal:before {
  content: "\e054";
}
.font-icon-resize-horizontal-2:before {
  content: "\e055";
}
.font-icon-resize-fullscreen:before {
  content: "\e056";
}
.font-icon-resize-full:before {
  content: "\e057";
}
.font-icon-repeat:before {
  content: "\e058";
}
.font-icon-reorder:before {
  content: "\e059";
}
.font-icon-remove:before {
  content: "\e05a";
}
.font-icon-remove-sign:before {
  content: "\e05b";
}
.font-icon-remove-circle:before {
  content: "\e05c";
}
.font-icon-read-more:before {
  content: "\e05d";
}
.font-icon-random:before {
  content: "\e05e";
}
.font-icon-question-sign:before {
  content: "\e05f";
}
.font-icon-pushpin:before {
  content: "\e060";
}
.font-icon-pushpin-2:before {
  content: "\e061";
}
.font-icon-print:before {
  content: "\e062";
}
.font-icon-plus:before {
  content: "\e063";
}
.font-icon-plus-sign:before {
  content: "\e064";
}
.font-icon-play:before {
  content: "\e065";
}
.font-icon-picture:before {
  content: "\e066";
}
.font-icon-phone:before {
  content: "\e067";
}
.font-icon-phone-sign:before {
  content: "\e068";
}
.font-icon-phone-boxed:before {
  content: "\e069";
}
.font-icon-pause:before {
  content: "\e06a";
}
.font-icon-paste:before {
  content: "\e06b";
}
.font-icon-paper-clip:before {
  content: "\e06c";
}
.font-icon-ok:before {
  content: "\e06d";
}
.font-icon-ok-sign:before {
  content: "\e06e";
}
.font-icon-ok-circle:before {
  content: "\e06f";
}
.font-icon-music:before {
  content: "\e070";
}
.font-icon-move:before {
  content: "\e071";
}
.font-icon-money:before {
  content: "\e072";
}
.font-icon-minus:before {
  content: "\e073";
}
.font-icon-minus-sign:before {
  content: "\e074";
}
.font-icon-map:before {
  content: "\e075";
}
.font-icon-map-marker:before {
  content: "\e076";
}
.font-icon-map-marker-2:before {
  content: "\e077";
}
.font-icon-magnet:before {
  content: "\e078";
}
.font-icon-magic:before {
  content: "\e079";
}
.font-icon-lock:before {
  content: "\e07a";
}
.font-icon-list:before {
  content: "\e07b";
}
.font-icon-list-3:before {
  content: "\e07c";
}
.font-icon-list-2:before {
  content: "\e07d";
}
.font-icon-link:before {
  content: "\e07e";
}
.font-icon-layer:before {
  content: "\e07f";
}
.font-icon-key:before {
  content: "\e080";
}
.font-icon-italic:before {
  content: "\e081";
}
.font-icon-info:before {
  content: "\e082";
}
.font-icon-indent-right:before {
  content: "\e083";
}
.font-icon-indent-left:before {
  content: "\e084";
}
.font-icon-inbox:before {
  content: "\e085";
}
.font-icon-inbox-empty:before {
  content: "\e086";
}
.font-icon-home:before {
  content: "\e087";
}
.font-icon-heart:before {
  content: "\e088";
}
.font-icon-heart-line:before {
  content: "\e089";
}
.font-icon-headphones:before {
  content: "\e08a";
}
.font-icon-headphones-line:before {
  content: "\e08b";
}
.font-icon-headphones-line-2:before {
  content: "\e08c";
}
.font-icon-headphones-2:before {
  content: "\e08d";
}
.font-icon-hdd:before {
  content: "\e08e";
}
.font-icon-group:before {
  content: "\e08f";
}
.font-icon-grid:before {
  content: "\e090";
}
.font-icon-grid-large:before {
  content: "\e091";
}
.font-icon-globe_line:before {
  content: "\e092";
}
.font-icon-glass:before {
  content: "\e093";
}
.font-icon-glass_2:before {
  content: "\e094";
}
.font-icon-gift:before {
  content: "\e095";
}
.font-icon-forward:before {
  content: "\e096";
}
.font-icon-font:before {
  content: "\e097";
}
.font-icon-folder-open:before {
  content: "\e098";
}
.font-icon-folder-close:before {
  content: "\e099";
}
.font-icon-flag:before {
  content: "\e09a";
}
.font-icon-fire:before {
  content: "\e09b";
}
.font-icon-film:before {
  content: "\e09c";
}
.font-icon-file:before {
  content: "\e09d";
}
.font-icon-file-empty:before {
  content: "\e09e";
}
.font-icon-fast-forward:before {
  content: "\e09f";
}
.font-icon-fast-backward:before {
  content: "\e0a0";
}
.font-icon-facetime:before {
  content: "\e0a1";
}
.font-icon-eye:before {
  content: "\e0a2";
}
.font-icon-eye_disable:before {
  content: "\e0a3";
}
.font-icon-expand-view:before {
  content: "\e0a4";
}
.font-icon-expand-view-3:before {
  content: "\e0a5";
}
.font-icon-expand-view-2:before {
  content: "\e0a6";
}
.font-icon-expand-vertical:before {
  content: "\e0a7";
}
.font-icon-expand-horizontal:before {
  content: "\e0a8";
}
.font-icon-exclamation:before {
  content: "\e0a9";
}
.font-icon-email:before {
  content: "\e0aa";
}
.font-icon-email_2:before {
  content: "\e0ab";
}
.font-icon-eject:before {
  content: "\e0ac";
}
.font-icon-edit:before {
  content: "\e0ad";
}
.font-icon-edit-check:before {
  content: "\e0ae";
}
.font-icon-download:before {
  content: "\e0af";
}
.font-icon-download_2:before {
  content: "\e0b0";
}
.font-icon-dashboard:before {
  content: "\e0b1";
}
.font-icon-credit-card:before {
  content: "\e0b2";
}
.font-icon-copy:before {
  content: "\e0b3";
}
.font-icon-comments:before {
  content: "\e0b4";
}
.font-icon-comments-line:before {
  content: "\e0b5";
}
.font-icon-comment:before {
  content: "\e0b6";
}
.font-icon-comment-line:before {
  content: "\e0b7";
}
.font-icon-columns:before {
  content: "\e0b8";
}
.font-icon-columns-2:before {
  content: "\e0b9";
}
.font-icon-cogs:before {
  content: "\e0ba";
}
.font-icon-cog:before {
  content: "\e0bb";
}
.font-icon-cloud:before {
  content: "\e0bc";
}
.font-icon-check:before {
  content: "\e0bd";
}
.font-icon-check-empty:before {
  content: "\e0be";
}
.font-icon-certificate:before {
  content: "\e0bf";
}
.font-icon-camera:before {
  content: "\e0c0";
}
.font-icon-calendar:before {
  content: "\e0c1";
}
.font-icon-bullhorn:before {
  content: "\e0c2";
}
.font-icon-briefcase:before {
  content: "\e0c3";
}
.font-icon-bookmark:before {
  content: "\e0c4";
}
.font-icon-book:before {
  content: "\e0c5";
}
.font-icon-bolt:before {
  content: "\e0c6";
}
.font-icon-bold:before {
  content: "\e0c7";
}
.font-icon-blockquote:before {
  content: "\e0c8";
}
.font-icon-bell:before {
  content: "\e0c9";
}
.font-icon-beaker:before {
  content: "\e0ca";
}
.font-icon-barcode:before {
  content: "\e0cb";
}
.font-icon-ban-circle:before {
  content: "\e0cc";
}
.font-icon-ban-chart:before {
  content: "\e0cd";
}
.font-icon-ban-chart-2:before {
  content: "\e0ce";
}
.font-icon-backward:before {
  content: "\e0cf";
}
.font-icon-asterisk:before {
  content: "\e0d0";
}
.font-icon-arrow-simple-up:before {
  content: "\e0d1";
}
.font-icon-arrow-simple-up-circle:before {
  content: "\e0d2";
}
.font-icon-arrow-simple-right:before {
  content: "\e0d3";
}
.font-icon-arrow-simple-right-circle:before {
  content: "\e0d4";
}
.font-icon-arrow-simple-left:before {
  content: "\e0d5";
}
.font-icon-arrow-simple-left-circle:before {
  content: "\e0d6";
}
.font-icon-arrow-simple-down:before {
  content: "\e0d7";
}
.font-icon-arrow-simple-down-circle:before {
  content: "\e0d8";
}
.font-icon-arrow-round-up:before {
  content: "\e0d9";
}
.font-icon-arrow-round-up-circle:before {
  content: "\e0da";
}
.font-icon-arrow-round-right:before {
  content: "\e0db";
}
.font-icon-arrow-round-right-circle:before {
  content: "\e0dc";
}
.font-icon-arrow-round-left:before {
  content: "\e0dd";
}
.font-icon-arrow-round-left-circle:before {
  content: "\e0de";
}
.font-icon-arrow-round-down:before {
  content: "\e0df";
}
.font-icon-arrow-round-down-circle:before {
  content: "\e0e0";
}
.font-icon-arrow-light-up:before {
  content: "\e0e1";
}
.font-icon-arrow-light-round-up:before {
  content: "\e0e2";
}
.font-icon-arrow-light-round-up-circle:before {
  content: "\e0e3";
}
.font-icon-arrow-light-round-right:before {
  content: "\e0e4";
}
.font-icon-arrow-light-round-right-circle:before {
  content: "\e0e5";
}
.font-icon-arrow-light-round-left:before {
  content: "\e0e6";
}
.font-icon-arrow-light-round-left-circle:before {
  content: "\e0e7";
}
.font-icon-arrow-light-round-down:before {
  content: "\e0e8";
}
.font-icon-arrow-light-round-down-circle:before {
  content: "\e0e9";
}
.font-icon-arrow-light-right:before {
  content: "\e0ea";
}
.font-icon-arrow-light-left:before {
  content: "\e0eb";
}
.font-icon-arrow-light-down:before {
  content: "\e0ec";
}
.font-icon-align-right:before {
  content: "\e0ed";
}
.font-icon-align-left:before {
  content: "\e0ee";
}
.font-icon-align-justify:before {
  content: "\e0ef";
}
.font-icon-align-center:before {
  content: "\e0f0";
}
.font-icon-adjust:before {
  content: "\e0f1";
}


================================================
FILE: backend/tests/integration/tests/pruning/website/css/fancybox/jquery.fancybox.css
================================================
/*! fancyBox v2.1.4 fancyapps.com | fancyapps.com/fancybox/#license */
.fancybox-wrap,
.fancybox-skin,
.fancybox-outer,
.fancybox-inner,
.fancybox-image,
.fancybox-wrap iframe,
.fancybox-wrap object,
.fancybox-nav,
.fancybox-nav span,
.fancybox-tmp {
  padding: 0;
  margin: 0;
  border: 0;
  outline: none;
  vertical-align: top;
}

.fancybox-wrap {
  position: absolute;
  top: 0;
  left: 0;
  z-index: 8020;
}

.fancybox-skin {
  position: relative;
  background: #2f3238;
  color: #565656;
  text-shadow: none;
  -webkit-border-radius: 0;
  -moz-border-radius: 0;
  border-radius: 0;
}

.fancybox-opened {
  z-index: 8030;
}

.fancybox-opened .fancybox-skin {
  -webkit-box-shadow: none;
  -moz-box-shadow: none;
  box-shadow: none;
}

.fancybox-outer,
.fancybox-inner {
  position: relative;
}

.fancybox-inner {
  overflow: hidden;
}

.fancybox-type-iframe .fancybox-inner {
  -webkit-overflow-scrolling: touch;
}

.fancybox-error {
  color: #444;
  font-size: 14px;
  line-height: 20px;
  margin: 0;
  padding: 15px;
  white-space: nowrap;
}

.fancybox-image,
.fancybox-iframe {
  display: block;
  width: 100%;
  height: 100%;
}

.fancybox-image {
  max-width: 100%;
  max-height: 100%;
}

#fancybox-loading,
.fancybox-close,
.fancybox-prev span,
.fancybox-next span {
  background-image: url("fancybox_sprite.png") !important;
}

#fancybox-loading {
  position: fixed;
  top: 50%;
  left: 50%;
  margin-top: -22px;
  margin-left: -22px;
  background-position: 0 -108px;
  opacity: 0.8;
  cursor: pointer;
  z-index: 8060;
}

#fancybox-loading div {
  width: 44px;
  height: 44px;
  background: url("fancybox_loading.gif") center center no-repeat;
}

.fancybox-close {
  position: absolute;
  right: 0;
  top: 0;
  width: 40px;
  height: 38px;
  cursor: pointer;
  z-index: 9000;
  background-image: none;

  opacity: 0.5;

  -webkit-transition:
    background 0.1s linear 0s,
    opacity 0.1s linear 0s;
  -moz-transition:
    background 0.1s linear 0s,
    opacity 0.1s linear 0s;
  -o-transition:
    background 0.1s linear 0s,
    opacity 0.1s linear 0s;
  transition:
    background 0.1s linear 0s,
    opacity 0.1s linear 0s;
}

.fancybox-close i {
  left: 50%;
  top: 50%;
  margin: -11px 0 0 -11px;
  font-size: 22px;
  line-height: 1em;
  position: absolute;
  color: #ffffff;
}

.fancybox-close:hover {
  opacity: 1;
}

.fancybox-nav {
  position: absolute;
  top: 0;
  height: 100%;
  cursor: pointer;
  text-decoration: none;
  background: transparent url("blank.gif"); /* helps IE */
  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
  z-index: 8040;
}

.fancybox-prev,
.fancybox-prev span {
  left: 0;
}

.fancybox-next,
.fancybox-next span {
  right: 0;
}

.fancybox-nav span {
  position: absolute;
  top: 50%;
  width: 44px;
  height: 32px;
  margin-top: -25px;
  cursor: pointer;
  z-index: 8040;
  background-image: none;
  background-color: #26292e;
  background-position-y: -38px;
  opacity: 0.5;

  -webkit-transition:
    background 0.1s linear 0s,
    opacity 0.1s linear 0s;
  -moz-transition:
    background 0.1s linear 0s,
    opacity 0.1s linear 0s;
  -o-transition:
    background 0.1s linear 0s,
    opacity 0.1s linear 0s;
  transition:
    background 0.1s linear 0s,
    opacity 0.1s linear 0s;
}
.fancybox-next span {
  background-position-y: -72px;
}
.fancybox-prev span i {
  left: 50%;
  top: 50%;
  margin: -15px 0 0 -17px;
  font-size: 30px;
  line-height: 1em;
  position: absolute;
  color: #ffffff;
}

.fancybox-next span i {
  left: 50%;
  top: 50%;
  margin: -15px 0 0 -15px;
  font-size: 30px;
  line-height: 1em;
  position: absolute;
  color: #ffffff;
}

.fancybox-nav:hover span {
  opacity: 1;
}

.fancybox-tmp {
  position: absolute;
  top: -99999px;
  left: -99999px;
  visibility: hidden;
  max-width: 99999px;
  max-height: 99999px;
  overflow: visible !important;
}

/* Overlay helper */

.fancybox-lock {
  margin: 0 !important;
}

.fancybox-overlay {
  position: absolute;
  top: 0;
  left: 0;
  overflow: hidden !important;
  display: none;
  z-index: 8010;
  background: url("fancybox_overlay.png");
}

.fancybox-overlay-fixed {
  position: fixed;
  bottom: 0;
  right: 0;
}

.fancybox-lock .fancybox-overlay {
  overflow: auto;
  overflow-y: scroll;
}

/* Title helper */

.fancybox-title {
  visibility: hidden;
  position: relative;
  text-shadow: none;
  z-index: 8050;
}

.fancybox-opened .fancybox-title {
  visibility: visible;
}

.fancybox-opened .fancybox-title h4 {
  font-size: 24px;
  color: #fff;
  font-weight: 300;
  margin-bottom: 10px;
}

.fancybox-opened .fancybox-title p {
  font-size: 16px;
  font-weight: 300;
  color: #bbb;
  line-height: 1.6em;
  margin-bottom: 0;
}

.fancybox-title-float-wrap {
  position: absolute;
  bottom: 0;
  right: 50%;
  margin-bottom: -35px;
  z-index: 8050;
  text-align: center;
}

.fancybox-title-float-wrap .child {
  display: inline-block;
  margin-right: -100%;
  padding: 2px 20px;
  background: transparent; /* Fallback for web browsers that doesn't support RGBa */
  background: rgba(0, 0, 0, 0.8);
  -webkit-border-radius: 15px;
  -moz-border-radius: 15px;
  border-radius: 15px;
  text-shadow: 0 1px 2px #222;
  color: #fff;
  font-weight: bold;
  line-height: 24px;
  white-space: nowrap;
}

.fancybox-title-outside-wrap {
  position: relative;
  margin-top: 10px;
  color: #fff;
}

.fancybox-title-inside-wrap {
  padding: 3px 30px 6px;
  background: #61b331;
}

.fancybox-title-over-wrap {
  position: absolute;
  bottom: 0;
  left: 0;
  color: #fff;
  padding: 10px;
  background: #000;
  background: rgba(0, 0, 0, 0.8);
}

@media (max-width: 480px) {
  .fancybox-nav span,
  .fancybox-nav:hover span,
  .fancybox-close,
  .fancybox-close:hover {
    background: transparent;
  }

  .fancybox-close i {
    left: 70px;
    top: 10px;
  }
}

@media (max-width: 320px) {
  .fancybox-close i {
    left: 30px;
    top: 20px;
  }
}


================================================
FILE: backend/tests/integration/tests/pruning/website/css/font-awesome.css
================================================
/*!
 *  Font Awesome 4.0.3 by @davegandy - http://fontawesome.io - @fontawesome
 *  License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)
 */
/* FONT PATH
 * -------------------------- */
@font-face {
  font-family: "FontAwesome";
  src: url("../fonts/fontawesome-webfont.eot?v=4.0.3");
  src:
    url("../fonts/fontawesome-webfont.eot?#iefix&v=4.0.3")
      format("embedded-opentype"),
    url("../fonts/fontawesome-webfont.woff?v=4.0.3") format("woff"),
    url("../fonts/fontawesome-webfont.ttf?v=4.0.3") format("truetype"),
    url("../fonts/fontawesome-webfont.svg?v=4.0.3#fontawesomeregular")
      format("svg");
  font-weight: normal;
  font-style: normal;
}
.fa {
  display: inline-block;
  font-family: FontAwesome;
  font-style: normal;
  font-weight: normal;
  line-height: 1;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
}
/* makes the font 33% larger relative to the icon container */
.fa-lg {
  font-size: 1.3333333333333333em;
  line-height: 0.75em;
  vertical-align: -15%;
}
.fa-2x {
  font-size: 2em;
}
.fa-3x {
  font-size: 3em;
}
.fa-4x {
  font-size: 4em;
}
.fa-5x {
  font-size: 5em;
}
.fa-fw {
  width: 1.2857142857142858em;
  text-align: center;
}
.fa-ul {
  padding-left: 0;
  margin-left: 2.142857142857143em;
  list-style-type: none;
}
.fa-ul > li {
  position: relative;
}
.fa-li {
  position: absolute;
  left: -2.142857142857143em;
  width: 2.142857142857143em;
  top: 0.14285714285714285em;
  text-align: center;
}
.fa-li.fa-lg {
  left: -1.8571428571428572em;
}
.fa-border {
  padding: 0.2em 0.25em 0.15em;
  border: solid 0.08em #eeeeee;
  border-radius: 0.1em;
}
.pull-right {
  float: right;
}
.pull-left {
  float: left;
}
.fa.pull-left {
  margin-right: 0.3em;
}
.fa.pull-right {
  margin-left: 0.3em;
}
.fa-spin {
  -webkit-animation: spin 2s infinite linear;
  -moz-animation: spin 2s infinite linear;
  -o-animation: spin 2s infinite linear;
  animation: spin 2s infinite linear;
}
@-moz-keyframes spin {
  0% {
    -moz-transform: rotate(0deg);
  }
  100% {
    -moz-transform: rotate(359deg);
  }
}
@-webkit-keyframes spin {
  0% {
    -webkit-transform: rotate(0deg);
  }
  100% {
    -webkit-transform: rotate(359deg);
  }
}
@-o-keyframes spin {
  0% {
    -o-transform: rotate(0deg);
  }
  100% {
    -o-transform: rotate(359deg);
  }
}
@-ms-keyframes spin {
  0% {
    -ms-transform: rotate(0deg);
  }
  100% {
    -ms-transform: rotate(359deg);
  }
}
@keyframes spin {
  0% {
    transform: rotate(0deg);
  }
  100% {
    transform: rotate(359deg);
  }
}
.fa-rotate-90 {
  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=1);
  -webkit-transform: rotate(90deg);
  -moz-transform: rotate(90deg);
  -ms-transform: rotate(90deg);
  -o-transform: rotate(90deg);
  transform: rotate(90deg);
}
.fa-rotate-180 {
  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=2);
  -webkit-transform: rotate(180deg);
  -moz-transform: rotate(180deg);
  -ms-transform: rotate(180deg);
  -o-transform: rotate(180deg);
  transform: rotate(180deg);
}
.fa-rotate-270 {
  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=3);
  -webkit-transform: rotate(270deg);
  -moz-transform: rotate(270deg);
  -ms-transform: rotate(270deg);
  -o-transform: rotate(270deg);
  transform: rotate(270deg);
}
.fa-flip-horizontal {
  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1);
  -webkit-transform: scale(-1, 1);
  -moz-transform: scale(-1, 1);
  -ms-transform: scale(-1, 1);
  -o-transform: scale(-1, 1);
  transform: scale(-1, 1);
}
.fa-flip-vertical {
  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1);
  -webkit-transform: scale(1, -1);
  -moz-transform: scale(1, -1);
  -ms-transform: scale(1, -1);
  -o-transform: scale(1, -1);
  transform: scale(1, -1);
}
.fa-stack {
  position: relative;
  display: inline-block;
  width: 2em;
  height: 2em;
  line-height: 2em;
  vertical-align: middle;
}
.fa-stack-1x,
.fa-stack-2x {
  position: absolute;
  left: 0;
  width: 100%;
  text-align: center;
}
.fa-stack-1x {
  line-height: inherit;
}
.fa-stack-2x {
  font-size: 2em;
}
.fa-inverse {
  color: #ffffff;
}
/* Font Awesome uses the Unicode Private Use Area (PUA) to ensure screen
   readers do not read off random characters that represent icons */
.fa-glass:before {
  content: "\f000";
}
.fa-music:before {
  content: "\f001";
}
.fa-search:before {
  content: "\f002";
}
.fa-envelope-o:before {
  content: "\f003";
}
.fa-heart:before {
  content: "\f004";
}
.fa-star:before {
  content: "\f005";
}
.fa-star-o:before {
  content: "\f006";
}
.fa-user:before {
  content: "\f007";
}
.fa-film:before {
  content: "\f008";
}
.fa-th-large:before {
  content: "\f009";
}
.fa-th:before {
  content: "\f00a";
}
.fa-th-list:before {
  content: "\f00b";
}
.fa-check:before {
  content: "\f00c";
}
.fa-times:before {
  content: "\f00d";
}
.fa-search-plus:before {
  content: "\f00e";
}
.fa-search-minus:before {
  content: "\f010";
}
.fa-power-off:before {
  content: "\f011";
}
.fa-signal:before {
  content: "\f012";
}
.fa-gear:before,
.fa-cog:before {
  content: "\f013";
}
.fa-trash-o:before {
  content: "\f014";
}
.fa-home:before {
  content: "\f015";
}
.fa-file-o:before {
  content: "\f016";
}
.fa-clock-o:before {
  content: "\f017";
}
.fa-road:before {
  content: "\f018";
}
.fa-download:before {
  content: "\f019";
}
.fa-arrow-circle-o-down:before {
  content: "\f01a";
}
.fa-arrow-circle-o-up:before {
  content: "\f01b";
}
.fa-inbox:before {
  content: "\f01c";
}
.fa-play-circle-o:before {
  content: "\f01d";
}
.fa-rotate-right:before,
.fa-repeat:before {
  content: "\f01e";
}
.fa-refresh:before {
  content: "\f021";
}
.fa-list-alt:before {
  content: "\f022";
}
.fa-lock:before {
  content: "\f023";
}
.fa-flag:before {
  content: "\f024";
}
.fa-headphones:before {
  content: "\f025";
}
.fa-volume-off:before {
  content: "\f026";
}
.fa-volume-down:before {
  content: "\f027";
}
.fa-volume-up:before {
  content: "\f028";
}
.fa-qrcode:before {
  content: "\f029";
}
.fa-barcode:before {
  content: "\f02a";
}
.fa-tag:before {
  content: "\f02b";
}
.fa-tags:before {
  content: "\f02c";
}
.fa-book:before {
  content: "\f02d";
}
.fa-bookmark:before {
  content: "\f02e";
}
.fa-print:before {
  content: "\f02f";
}
.fa-camera:before {
  content: "\f030";
}
.fa-font:before {
  content: "\f031";
}
.fa-bold:before {
  content: "\f032";
}
.fa-italic:before {
  content: "\f033";
}
.fa-text-height:before {
  content: "\f034";
}
.fa-text-width:before {
  content: "\f035";
}
.fa-align-left:before {
  content: "\f036";
}
.fa-align-center:before {
  content: "\f037";
}
.fa-align-right:before {
  content: "\f038";
}
.fa-align-justify:before {
  content: "\f039";
}
.fa-list:before {
  content: "\f03a";
}
.fa-dedent:before,
.fa-outdent:before {
  content: "\f03b";
}
.fa-indent:before {
  content: "\f03c";
}
.fa-video-camera:before {
  content: "\f03d";
}
.fa-picture-o:before {
  content: "\f03e";
}
.fa-pencil:before {
  content: "\f040";
}
.fa-map-marker:before {
  content: "\f041";
}
.fa-adjust:before {
  content: "\f042";
}
.fa-tint:before {
  content: "\f043";
}
.fa-edit:before,
.fa-pencil-square-o:before {
  content: "\f044";
}
.fa-share-square-o:before {
  content: "\f045";
}
.fa-check-square-o:before {
  content: "\f046";
}
.fa-arrows:before {
  content: "\f047";
}
.fa-step-backward:before {
  content: "\f048";
}
.fa-fast-backward:before {
  content: "\f049";
}
.fa-backward:before {
  content: "\f04a";
}
.fa-play:before {
  content: "\f04b";
}
.fa-pause:before {
  content: "\f04c";
}
.fa-stop:before {
  content: "\f04d";
}
.fa-forward:before {
  content: "\f04e";
}
.fa-fast-forward:before {
  content: "\f050";
}
.fa-step-forward:before {
  content: "\f051";
}
.fa-eject:before {
  content: "\f052";
}
.fa-chevron-left:before {
  content: "\f053";
}
.fa-chevron-right:before {
  content: "\f054";
}
.fa-plus-circle:before {
  content: "\f055";
}
.fa-minus-circle:before {
  content: "\f056";
}
.fa-times-circle:before {
  content: "\f057";
}
.fa-check-circle:before {
  content: "\f058";
}
.fa-question-circle:before {
  content: "\f059";
}
.fa-info-circle:before {
  content: "\f05a";
}
.fa-crosshairs:before {
  content: "\f05b";
}
.fa-times-circle-o:before {
  content: "\f05c";
}
.fa-check-circle-o:before {
  content: "\f05d";
}
.fa-ban:before {
  content: "\f05e";
}
.fa-arrow-left:before {
  content: "\f060";
}
.fa-arrow-right:before {
  content: "\f061";
}
.fa-arrow-up:before {
  content: "\f062";
}
.fa-arrow-down:before {
  content: "\f063";
}
.fa-mail-forward:before,
.fa-share:before {
  content: "\f064";
}
.fa-expand:before {
  content: "\f065";
}
.fa-compress:before {
  content: "\f066";
}
.fa-plus:before {
  content: "\f067";
}
.fa-minus:before {
  content: "\f068";
}
.fa-asterisk:before {
  content: "\f069";
}
.fa-exclamation-circle:before {
  content: "\f06a";
}
.fa-gift:before {
  content: "\f06b";
}
.fa-leaf:before {
  content: "\f06c";
}
.fa-fire:before {
  content: "\f06d";
}
.fa-eye:before {
  content: "\f06e";
}
.fa-eye-slash:before {
  content: "\f070";
}
.fa-warning:before,
.fa-exclamation-triangle:before {
  content: "\f071";
}
.fa-plane:before {
  content: "\f072";
}
.fa-calendar:before {
  content: "\f073";
}
.fa-random:before {
  content: "\f074";
}
.fa-comment:before {
  content: "\f075";
}
.fa-magnet:before {
  content: "\f076";
}
.fa-chevron-up:before {
  content: "\f077";
}
.fa-chevron-down:before {
  content: "\f078";
}
.fa-retweet:before {
  content: "\f079";
}
.fa-shopping-cart:before {
  content: "\f07a";
}
.fa-folder:before {
  content: "\f07b";
}
.fa-folder-open:before {
  content: "\f07c";
}
.fa-arrows-v:before {
  content: "\f07d";
}
.fa-arrows-h:before {
  content: "\f07e";
}
.fa-bar-chart-o:before {
  content: "\f080";
}
.fa-twitter-square:before {
  content: "\f081";
}
.fa-facebook-square:before {
  content: "\f082";
}
.fa-camera-retro:before {
  content: "\f083";
}
.fa-key:before {
  content: "\f084";
}
.fa-gears:before,
.fa-cogs:before {
  content: "\f085";
}
.fa-comments:before {
  content: "\f086";
}
.fa-thumbs-o-up:before {
  content: "\f087";
}
.fa-thumbs-o-down:before {
  content: "\f088";
}
.fa-star-half:before {
  content: "\f089";
}
.fa-heart-o:before {
  content: "\f08a";
}
.fa-sign-out:before {
  content: "\f08b";
}
.fa-linkedin-square:before {
  content: "\f08c";
}
.fa-thumb-tack:before {
  content: "\f08d";
}
.fa-external-link:before {
  content: "\f08e";
}
.fa-sign-in:before {
  content: "\f090";
}
.fa-trophy:before {
  content: "\f091";
}
.fa-github-square:before {
  content: "\f092";
}
.fa-upload:before {
  content: "\f093";
}
.fa-lemon-o:before {
  content: "\f094";
}
.fa-phone:before {
  content: "\f095";
}
.fa-square-o:before {
  content: "\f096";
}
.fa-bookmark-o:before {
  content: "\f097";
}
.fa-phone-square:before {
  content: "\f098";
}
.fa-twitter:before {
  content: "\f099";
}
.fa-facebook:before {
  content: "\f09a";
}
.fa-github:before {
  content: "\f09b";
}
.fa-unlock:before {
  content: "\f09c";
}
.fa-credit-card:before {
  content: "\f09d";
}
.fa-rss:before {
  content: "\f09e";
}
.fa-hdd-o:before {
  content: "\f0a0";
}
.fa-bullhorn:before {
  content: "\f0a1";
}
.fa-bell:before {
  content: "\f0f3";
}
.fa-certificate:before {
  content: "\f0a3";
}
.fa-hand-o-right:before {
  content: "\f0a4";
}
.fa-hand-o-left:before {
  content: "\f0a5";
}
.fa-hand-o-up:before {
  content: "\f0a6";
}
.fa-hand-o-down:before {
  content: "\f0a7";
}
.fa-arrow-circle-left:before {
  content: "\f0a8";
}
.fa-arrow-circle-right:before {
  content: "\f0a9";
}
.fa-arrow-circle-up:before {
  content: "\f0aa";
}
.fa-arrow-circle-down:before {
  content: "\f0ab";
}
.fa-globe:before {
  content: "\f0ac";
}
.fa-wrench:before {
  content: "\f0ad";
}
.fa-tasks:before {
  content: "\f0ae";
}
.fa-filter:before {
  content: "\f0b0";
}
.fa-briefcase:before {
  content: "\f0b1";
}
.fa-arrows-alt:before {
  content: "\f0b2";
}
.fa-group:before,
.fa-users:before {
  content: "\f0c0";
}
.fa-chain:before,
.fa-link:before {
  content: "\f0c1";
}
.fa-cloud:before {
  content: "\f0c2";
}
.fa-flask:before {
  content: "\f0c3";
}
.fa-cut:before,
.fa-scissors:before {
  content: "\f0c4";
}
.fa-copy:before,
.fa-files-o:before {
  content: "\f0c5";
}
.fa-paperclip:before {
  content: "\f0c6";
}
.fa-save:before,
.fa-floppy-o:before {
  content: "\f0c7";
}
.fa-square:before {
  content: "\f0c8";
}
.fa-bars:before {
  content: "\f0c9";
}
.fa-list-ul:before {
  content: "\f0ca";
}
.fa-list-ol:before {
  content: "\f0cb";
}
.fa-strikethrough:before {
  content: "\f0cc";
}
.fa-underline:before {
  content: "\f0cd";
}
.fa-table:before {
  content: "\f0ce";
}
.fa-magic:before {
  content: "\f0d0";
}
.fa-truck:before {
  content: "\f0d1";
}
.fa-pinterest:before {
  content: "\f0d2";
}
.fa-pinterest-square:before {
  content: "\f0d3";
}
.fa-google-plus-square:before {
  content: "\f0d4";
}
.fa-google-plus:before {
  content: "\f0d5";
}
.fa-money:before {
  content: "\f0d6";
}
.fa-caret-down:before {
  content: "\f0d7";
}
.fa-caret-up:before {
  content: "\f0d8";
}
.fa-caret-left:before {
  content: "\f0d9";
}
.fa-caret-right:before {
  content: "\f0da";
}
.fa-columns:before {
  content: "\f0db";
}
.fa-unsorted:before,
.fa-sort:before {
  content: "\f0dc";
}
.fa-sort-down:before,
.fa-sort-asc:before {
  content: "\f0dd";
}
.fa-sort-up:before,
.fa-sort-desc:before {
  content: "\f0de";
}
.fa-envelope:before {
  content: "\f0e0";
}
.fa-linkedin:before {
  content: "\f0e1";
}
.fa-rotate-left:before,
.fa-undo:before {
  content: "\f0e2";
}
.fa-legal:before,
.fa-gavel:before {
  content: "\f0e3";
}
.fa-dashboard:before,
.fa-tachometer:before {
  content: "\f0e4";
}
.fa-comment-o:before {
  content: "\f0e5";
}
.fa-comments-o:before {
  content: "\f0e6";
}
.fa-flash:before,
.fa-bolt:before {
  content: "\f0e7";
}
.fa-sitemap:before {
  content: "\f0e8";
}
.fa-umbrella:before {
  content: "\f0e9";
}
.fa-paste:before,
.fa-clipboard:before {
  content: "\f0ea";
}
.fa-lightbulb-o:before {
  content: "\f0eb";
}
.fa-exchange:before {
  content: "\f0ec";
}
.fa-cloud-download:before {
  content: "\f0ed";
}
.fa-cloud-upload:before {
  content: "\f0ee";
}
.fa-user-md:before {
  content: "\f0f0";
}
.fa-stethoscope:before {
  content: "\f0f1";
}
.fa-suitcase:before {
  content: "\f0f2";
}
.fa-bell-o:before {
  content: "\f0a2";
}
.fa-coffee:before {
  content: "\f0f4";
}
.fa-cutlery:before {
  content: "\f0f5";
}
.fa-file-text-o:before {
  content: "\f0f6";
}
.fa-building-o:before {
  content: "\f0f7";
}
.fa-hospital-o:before {
  content: "\f0f8";
}
.fa-ambulance:before {
  content: "\f0f9";
}
.fa-medkit:before {
  content: "\f0fa";
}
.fa-fighter-jet:before {
  content: "\f0fb";
}
.fa-beer:before {
  content: "\f0fc";
}
.fa-h-square:before {
  content: "\f0fd";
}
.fa-plus-square:before {
  content: "\f0fe";
}
.fa-angle-double-left:before {
  content: "\f100";
}
.fa-angle-double-right:before {
  content: "\f101";
}
.fa-angle-double-up:before {
  content: "\f102";
}
.fa-angle-double-down:before {
  content: "\f103";
}
.fa-angle-left:before {
  content: "\f104";
}
.fa-angle-right:before {
  content: "\f105";
}
.fa-angle-up:before {
  content: "\f106";
}
.fa-angle-down:before {
  content: "\f107";
}
.fa-desktop:before {
  content: "\f108";
}
.fa-laptop:before {
  content: "\f109";
}
.fa-tablet:before {
  content: "\f10a";
}
.fa-mobile-phone:before,
.fa-mobile:before {
  content: "\f10b";
}
.fa-circle-o:before {
  content: "\f10c";
}
.fa-quote-left:before {
  content: "\f10d";
}
.fa-quote-right:before {
  content: "\f10e";
}
.fa-spinner:before {
  content: "\f110";
}
.fa-circle:before {
  content: "\f111";
}
.fa-mail-reply:before,
.fa-reply:before {
  content: "\f112";
}
.fa-github-alt:before {
  content: "\f113";
}
.fa-folder-o:before {
  content: "\f114";
}
.fa-folder-open-o:before {
  content: "\f115";
}
.fa-smile-o:before {
  content: "\f118";
}
.fa-frown-o:before {
  content: "\f119";
}
.fa-meh-o:before {
  content: "\f11a";
}
.fa-gamepad:before {
  content: "\f11b";
}
.fa-keyboard-o:before {
  content: "\f11c";
}
.fa-flag-o:before {
  content: "\f11d";
}
.fa-flag-checkered:before {
  content: "\f11e";
}
.fa-terminal:before {
  content: "\f120";
}
.fa-code:before {
  content: "\f121";
}
.fa-reply-all:before {
  content: "\f122";
}
.fa-mail-reply-all:before {
  content: "\f122";
}
.fa-star-half-empty:before,
.fa-star-half-full:before,
.fa-star-half-o:before {
  content: "\f123";
}
.fa-location-arrow:before {
  content: "\f124";
}
.fa-crop:before {
  content: "\f125";
}
.fa-code-fork:before {
  content: "\f126";
}
.fa-unlink:before,
.fa-chain-broken:before {
  content: "\f127";
}
.fa-question:before {
  content: "\f128";
}
.fa-info:before {
  content: "\f129";
}
.fa-exclamation:before {
  content: "\f12a";
}
.fa-superscript:before {
  content: "\f12b";
}
.fa-subscript:before {
  content: "\f12c";
}
.fa-eraser:before {
  content: "\f12d";
}
.fa-puzzle-piece:before {
  content: "\f12e";
}
.fa-microphone:before {
  content: "\f130";
}
.fa-microphone-slash:before {
  content: "\f131";
}
.fa-shield:before {
  content: "\f132";
}
.fa-calendar-o:before {
  content: "\f133";
}
.fa-fire-extinguisher:before {
  content: "\f134";
}
.fa-rocket:before {
  content: "\f135";
}
.fa-maxcdn:before {
  content: "\f136";
}
.fa-chevron-circle-left:before {
  content: "\f137";
}
.fa-chevron-circle-right:before {
  content: "\f138";
}
.fa-chevron-circle-up:before {
  content: "\f139";
}
.fa-chevron-circle-down:before {
  content: "\f13a";
}
.fa-html5:before {
  content: "\f13b";
}
.fa-css3:before {
  content: "\f13c";
}
.fa-anchor:before {
  content: "\f13d";
}
.fa-unlock-alt:before {
  content: "\f13e";
}
.fa-bullseye:before {
  content: "\f140";
}
.fa-ellipsis-h:before {
  content: "\f141";
}
.fa-ellipsis-v:before {
  content: "\f142";
}
.fa-rss-square:before {
  content: "\f143";
}
.fa-play-circle:before {
  content: "\f144";
}
.fa-ticket:before {
  content: "\f145";
}
.fa-minus-square:before {
  content: "\f146";
}
.fa-minus-square-o:before {
  content: "\f147";
}
.fa-level-up:before {
  content: "\f148";
}
.fa-level-down:before {
  content: "\f149";
}
.fa-check-square:before {
  content: "\f14a";
}
.fa-pencil-square:before {
  content: "\f14b";
}
.fa-external-link-square:before {
  content: "\f14c";
}
.fa-share-square:before {
  content: "\f14d";
}
.fa-compass:before {
  content: "\f14e";
}
.fa-toggle-down:before,
.fa-caret-square-o-down:before {
  content: "\f150";
}
.fa-toggle-up:before,
.fa-caret-square-o-up:before {
  content: "\f151";
}
.fa-toggle-right:before,
.fa-caret-square-o-right:before {
  content: "\f152";
}
.fa-euro:before,
.fa-eur:before {
  content: "\f153";
}
.fa-gbp:before {
  content: "\f154";
}
.fa-dollar:before,
.fa-usd:before {
  content: "\f155";
}
.fa-rupee:before,
.fa-inr:before {
  content: "\f156";
}
.fa-cny:before,
.fa-rmb:before,
.fa-yen:before,
.fa-jpy:before {
  content: "\f157";
}
.fa-ruble:before,
.fa-rouble:before,
.fa-rub:before {
  content: "\f158";
}
.fa-won:before,
.fa-krw:before {
  content: "\f159";
}
.fa-bitcoin:before,
.fa-btc:before {
  content: "\f15a";
}
.fa-file:before {
  content: "\f15b";
}
.fa-file-text:before {
  content: "\f15c";
}
.fa-sort-alpha-asc:before {
  content: "\f15d";
}
.fa-sort-alpha-desc:before {
  content: "\f15e";
}
.fa-sort-amount-asc:before {
  content: "\f160";
}
.fa-sort-amount-desc:before {
  content: "\f161";
}
.fa-sort-numeric-asc:before {
  content: "\f162";
}
.fa-sort-numeric-desc:before {
  content: "\f163";
}
.fa-thumbs-up:before {
  content: "\f164";
}
.fa-thumbs-down:before {
  content: "\f165";
}
.fa-youtube-square:before {
  content: "\f166";
}
.fa-youtube:before {
  content: "\f167";
}
.fa-xing:before {
  content: "\f168";
}
.fa-xing-square:before {
  content: "\f169";
}
.fa-youtube-play:before {
  content: "\f16a";
}
.fa-dropbox:before {
  content: "\f16b";
}
.fa-stack-overflow:before {
  content: "\f16c";
}
.fa-instagram:before {
  content: "\f16d";
}
.fa-flickr:before {
  content: "\f16e";
}
.fa-adn:before {
  content: "\f170";
}
.fa-bitbucket:before {
  content: "\f171";
}
.fa-bitbucket-square:before {
  content: "\f172";
}
.fa-tumblr:before {
  content: "\f173";
}
.fa-tumblr-square:before {
  content: "\f174";
}
.fa-long-arrow-down:before {
  content: "\f175";
}
.fa-long-arrow-up:before {
  content: "\f176";
}
.fa-long-arrow-left:before {
  content: "\f177";
}
.fa-long-arrow-right:before {
  content: "\f178";
}
.fa-apple:before {
  content: "\f179";
}
.fa-windows:before {
  content: "\f17a";
}
.fa-android:before {
  content: "\f17b";
}
.fa-linux:before {
  content: "\f17c";
}
.fa-dribbble:before {
  content: "\f17d";
}
.fa-skype:before {
  content: "\f17e";
}
.fa-foursquare:before {
  content: "\f180";
}
.fa-trello:before {
  content: "\f181";
}
.fa-female:before {
  content: "\f182";
}
.fa-male:before {
  content: "\f183";
}
.fa-gittip:before {
  content: "\f184";
}
.fa-sun-o:before {
  content: "\f185";
}
.fa-moon-o:before {
  content: "\f186";
}
.fa-archive:before {
  content: "\f187";
}
.fa-bug:before {
  content: "\f188";
}
.fa-vk:before {
  content: "\f189";
}
.fa-weibo:before {
  content: "\f18a";
}
.fa-renren:before {
  content: "\f18b";
}
.fa-pagelines:before {
  content: "\f18c";
}
.fa-stack-exchange:before {
  content: "\f18d";
}
.fa-arrow-circle-o-right:before {
  content: "\f18e";
}
.fa-arrow-circle-o-left:before {
  content: "\f190";
}
.fa-toggle-left:before,
.fa-caret-square-o-left:before {
  content: "\f191";
}
.fa-dot-circle-o:before {
  content: "\f192";
}
.fa-wheelchair:before {
  content: "\f193";
}
.fa-vimeo-square:before {
  content: "\f194";
}
.fa-turkish-lira:before,
.fa-try:before {
  content: "\f195";
}
.fa-plus-square-o:before {
  content: "\f196";
}


================================================
FILE: backend/tests/integration/tests/pruning/website/css/style.css
================================================
/*
Author URI: http://webthemez.com/
Note: 
Licence under Creative Commons Attribution 3.0 
Do not remove the back-link in this web template 
-------------------------------------------------------*/

@import url("http://fonts.googleapis.com/css?family=Noto+Serif:400,400italic,700|Open+Sans:400,600,700");
@import url("font-awesome.css");
@import url("animate.css");

body {
  font-family: "Open Sans", Arial, sans-serif;
  font-size: 14px;
  font-weight: 300;
  line-height: 1.6em;
  color: #656565;
}

a:active {
  outline: 0;
}

.clear {
  clear: both;
}

h1,
h2,
h3,
h4,
h5,
h6 {
  font-family: "Open Sans", Arial, sans-serif;
  font-weight: 700;
  line-height: 1.1em;
  color: #333;
  margin-bottom: 20px;
}

.container {
  padding: 0 20px 0 20px;
  position: relative;
}

#wrapper {
  width: 100%;
  margin: 0;
  padding: 0;
}

.row,
.row-fluid {
  margin-bottom: 30px;
}

.row .row,
.row-fluid .row-fluid {
  margin-bottom: 30px;
}

.row.nomargin,
.row-fluid.nomargin {
  margin-bottom: 0;
}

img.img-polaroid {
  margin: 0 0 20px 0;
}
.img-box {
  max-width: 100%;
}
/*  Header
==================================== */

header .navbar {
  margin-bottom: 0;
}

.navbar-default {
  border: none;
}

.navbar-brand {
  color: #222;
  text-transform: uppercase;
  font-size: 24px;
  font-weight: 700;
  line-height: 1em;
  letter-spacing: -1px;
  margin-top: 13px;
  padding: 0 0 0 15px;
}
.navbar-default .navbar-brand {
  color: #61b331;
}

header .navbar-collapse ul.navbar-nav {
  float: right;
  margin-right: 0;
}

header .navbar-default {
  background-color: #ffffff;
}

header .nav li a:hover,
header .nav li a:focus,
header .nav li.active a,
header .nav li.active a:hover,
header .nav li a.dropdown-toggle:hover,
header .nav li a.dropdown-toggle:focus,
header .nav li.active ul.dropdown-menu li a:hover,
header .nav li.active ul.dropdown-menu li.active a {
  -webkit-transition: all 0.3s ease;
  -moz-transition: all 0.3s ease;
  -ms-transition: all 0.3s ease;
  -o-transition: all 0.3s ease;
  transition: all 0.3s ease;
}

header .navbar-default .navbar-nav > .open > a,
header .navbar-default .navbar-nav > .open > a:hover,
header .navbar-default .navbar-nav > .open > a:focus {
  -webkit-transition: all 0.3s ease;
  -moz-transition: all 0.3s ease;
  -ms-transition: all 0.3s ease;
  -o-transition: all 0.3s ease;
  transition: all 0.3s ease;
}

header .navbar {
  min-height: 70px;
  padding: 18px 0;
}

header .navbar-nav > li {
  padding-bottom: 12px;
  padding-top: 12px;
}

header .navbar-nav > li > a {
  padding-bottom: 6px;
  padding-top: 5px;
  margin-left: 2px;
  line-height: 30px;
  font-weight: 700;
  -webkit-transition: all 0.3s ease;
  -moz-transition: all 0.3s ease;
  -ms-transition: all 0.3s ease;
  -o-transition: all 0.3s ease;
  transition: all 0.3s ease;
}

.dropdown-menu li a:hover {
  color: #fff !important;
}

header .nav .caret {
  border-bottom-color: #f5f5f5;
  border-top-color: #f5f5f5;
}
.navbar-default .navbar-nav > .active > a,
.navbar-default .navbar-nav > .active > a:hover,
.navbar-default .navbar-nav > .active > a:focus {
  background-color: #fff;
}
.navbar-default .navbar-nav > .open > a,
.navbar-default .navbar-nav > .open > a:hover,
.navbar-default .navbar-nav > .open > a:focus {
  background-color: #fff;
}

.dropdown-menu {
  box-shadow: none;
  border-radius: 0;
  border: none;
}

.dropdown-menu li:last-child {
  padding-bottom: 0 !important;
  margin-bottom: 0;
}

header .nav li .dropdown-menu {
  padding: 0;
}

header .nav li .dropdown-menu li a {
  line-height: 28px;
  padding: 3px 12px;
}
.item-thumbs img {
  margin-bottom: 15px;
}
.flex-control-paging li a.flex-active {
  background: #000;
  background: rgb(255, 255, 255);
  cursor: default;
}
.flex-control-paging li a {
  width: 30px;
  height: 11px;
  display: block;
  background: #666;
  background: rgba(0, 0, 0, 0.5);
  cursor: pointer;
  text-indent: -9999px;
  -webkit-border-radius: 20px;
  -moz-border-radius: 20px;
  -o-border-radius: 20px;
  border-radius: 20px;
  box-shadow: inset 0 0 3px rgba(0, 0, 0, 0.3);
}
.panel-title > a {
  color: inherit;
  color: #fff;
}
.panel-group .panel-heading + .panel-collapse .panel-body {
  border-top: 1px solid #ddd;
  color: #fff;
  background-color: #9c9c9c;
}
/* --- menu --- */

header .navigation {
  float: right;
}

header ul.nav li {
  border: none;
  margin: 0;
}

header ul.nav li a {
  font-size: 12px;
  border: none;
  font-weight: 700;
  text-transform: uppercase;
}

header ul.nav li ul li a {
  font-size: 12px;
  border: none;
  font-weight: 300;
  text-transform: uppercase;
}

.navbar .nav > li > a {
  color: #848484;
  text-shadow: none;
  border: 1px solid rgba(255, 255, 255, 0) !important;
}

.navbar .nav a:hover {
  background: none;
  color: #14a085 !important;
}

.navbar .nav > .active > a,
.navbar .nav > .active > a:hover {
  background: none;
  font-weight: 700;
}

.navbar .nav > .active > a:active,
.navbar .nav > .active > a:focus {
  background: none;
  outline: 0;
  font-weight: 700;
}

.navbar .nav li .dropdown-menu {
  z-index: 2000;
}

header ul.nav li ul {
  margin-top: 1px;
}
header ul.nav li ul li ul {
  margin: 1px 0 0 1px;
}
.dropdown-menu .dropdown i {
  position: absolute;
  right: 0;
  margin-top: 3px;
  padding-left: 20px;
}

.navbar .nav > li > .dropdown-menu:before {
  display: inline-block;
  border-right: none;
  border-bottom: none;
  border-left: none;
  border-bottom-color: none;
  content: none;
}
.navbar-default .navbar-nav > .active > a,
.navbar-default .navbar-nav > .active > a:hover,
.navbar-default .navbar-nav > .active > a:focus {
  color: #14a085;
}

ul.nav li.dropdown a {
  z-index: 1000;
  display: block;
}

select.selectmenu {
  display: none;
}
.pageTitle {
  color: #fff;
  margin: 30px 0 3px;
  display: inline-block;
}

#featured {
  width: 100%;
  background: #000;
  position: relative;
  margin: 0;
  padding: 0;
}

/*  Sliders
==================================== */
/* --- flexslider --- */

#featured .flexslider {
  padding: 0;
  background: #fff;
  position: relative;
  zoom: 1;
}
.flex-direction-nav .flex-prev {
  left: 0px;
}
.flex-direction-nav .flex-next {
  right: 0px;
}
.flex-caption {
  zoom: 0;
  color: #1c1d21;
  margin: 0 auto;
  padding: 1px;
  position: absolute;
  vertical-align: bottom;
  text-align: center;
  background-color: rgba(255, 255, 255, 0.26);
  bottom: 5%;
  display: block;
  left: 0;
  right: 0;
}
.flex-caption h3 {
  color: #fff;
  letter-spacing: 1px;
  margin-bottom: 8px;
  text-transform: uppercase;
}
.flex-caption p {
  margin: 0 0 15px;
}
.skill-home {
  margin-bottom: 50px;
}
.c1 {
  border: #ed5441 1px solid;
  background: #ed5441;
}
.c2 {
  border: #d867b2 1px solid;
  background: #d867b2;
}
.c3 {
  border: #61b331 1px solid;
  background: #4bc567;
}
.c4 {
  border: #609cec 1px solid;
  background: #26aff0;
}
.skill-home .icons {
  padding: 33px 0 0 0;
  width: 100%;
  height: 178px;
  color: rgb(255, 255, 255);
  font-size: 42px;
  font-size: 76px;
  text-align: center;
  -ms-border-radius: 50%;
  -moz-border-radius: 50%;
  -webkit-border-radius: 50%;
  border-radius: 0;
  display: inline-table;
}
.skill-home h2 {
  padding-top: 20px;
  font-size: 36px;
  font-weight: 700;
}
.testimonial-solid {
  padding: 50px 0 60px 0;
  margin: 0 0 0 0;
  background: #efefef;
  text-align: center;
}
.testi-icon-area {
  text-align: center;
  position: absolute;
  top: -84px;
  margin: 0 auto;
  width: 100%;
  color: #000;
}
.testi-icon-area .quote {
  padding: 15px 0 0 0;
  margin: 0 0 0 0;
  background: #ffffff;
  text-align: center;
  color: #26aff0;
  display: inline-table;
  width: 70px;
  height: 70px;
  -ms-border-radius: 50%;
  -moz-border-radius: 50%;
  -webkit-border-radius: 50%;
  border-radius: 0;
  font-size: 42px;
  border: 1px solid #26aff0;
  display: none;
}

.testi-icon-area .carousel-inner {
  margin: 20px 0;
}
.carousel-indicators {
  bottom: -30px;
}
.team-member {
  text-align: center;
  background-color: #f9f9f9;
  padding-bottom: 15px;
}
.fancybox-title-inside-wrap {
  padding: 3px 30px 6px;
  background: #292929;
}

.item_introtext {
  background-color: rgba(254, 254, 255, 0.66);
  margin: 0 auto;
  display: inline-block;
  padding: 25px;
}
.item_introtext span {
  font-size: 20px;
  display: block;
  font-weight: bold;
}
.item_introtext strong {
  font-size: 50px;
  display: block;
  padding: 14px 0 30px;
}
.item_introtext p {
  font-size: 20px !important;
  color: #1c1d21;
  font-weight: bold;
}

.form-control {
  border-radius: 0;
}

/* Testimonial
----------------------------------*/
.testimonial-area {
  padding: 0 0 0 0;
  margin: 0;
  background: url(../img/low-poly01.jpg) fixed center center;
  background-size: cover;
  -webkit-background-size: cover;
  -moz-background-size: cover;
  -ms-background-size: cover;
  color: red;
}
.testimonial-solid p {
  color: #1f1f1f;
  font-size: 16px;
  line-height: 30px;
  font-style: italic;
}
section.callaction {
  background: #fff;
  padding: 50px 0 0 0;
}

/* Content
==================================== */

#content {
  position: relative;
  background: #fff;
  padding: 50px 0 0px 0;
}

#content img {
  max-width: 100%;
  height: auto;
}

.cta-text {
  text-align: center;
  margin-top: 10px;
}

.big-cta .cta {
  margin-top: 10px;
}

.box {
  width: 100%;
}
.box-gray {
  background: #f8f8f8;
  padding: 20px 20px 30px;
}
.box-gray h4,
.box-gray i {
  margin-bottom: 20px;
}
.box-bottom {
  padding: 20px 0;
  text-align: center;
}
.box-bottom a {
  color: #fff;
  font-weight: 700;
}
.box-bottom a:hover {
  color: #eee;
  text-decoration: none;
}

/* Bottom
==================================== */

#bottom {
  background: #fcfcfc;
  padding: 50px 0 0;
}
/* twitter */
#twitter-wrapper {
  text-align: center;
  width: 70%;
  margin: 0 auto;
}
#twitter em {
  font-style: normal;
  font-size: 13px;
}

#twitter em.twitterTime a {
  font-weight: 600;
}

#twitter ul {
  padding: 0;
  list-style: none;
}
#twitter ul li {
  font-size: 20px;
  line-height: 1.6em;
  font-weight: 300;
  margin-bottom: 20px;
  position: relative;
  word-break: break-word;
}

/* page headline
==================================== */

#inner-headline {
  background: #14a085;
  position: relative;
  margin: 0;
  padding: 0;
  color: #fefefe;
  /* margin: 15px; */
  border-top: 10px solid #11967c;
}

#inner-headline .inner-heading h2 {
  color: #fff;
  margin: 20px 0 0 0;
}

/* --- breadcrumbs --- */
#inner-headline ul.breadcrumb {
  margin: 30px 0 0;
  float: left;
}

#inner-headline ul.breadcrumb li {
  margin-bottom: 0;
  padding-bottom: 0;
}
#inner-headline ul.breadcrumb li {
  font-size: 13px;
  color: #fff;
}

#inner-headline ul.breadcrumb li i {
  color: #dedede;
}

#inner-headline ul.breadcrumb li a {
  color: #fff;
}

ul.breadcrumb li a:hover {
  text-decoration: none;
}

/* Forms
============================= */

/* --- contact form  ---- */
form#contactform input[type="text"] {
  width: 100%;
  border: 1px solid #f5f5f5;
  min-height: 40px;
  padding-left: 20px;
  font-size: 13px;
  padding-right: 20px;
  -webkit-box-sizing: border-box;
  -moz-box-sizing: border-box;
  box-sizing: border-box;
}

form#contactform textarea {
  border: 1px solid #f5f5f5;
  width: 100%;
  padding-left: 20px;
  padding-top: 10px;
  font-size: 13px;
  padding-right: 20px;
  -webkit-box-sizing: border-box;
  -moz-box-sizing: border-box;
  box-sizing: border-box;
}

form#contactform .validation {
  font-size: 11px;
}

#sendmessage {
  border: 1px solid #e6e6e6;
  background: #f6f6f6;
  display: none;
  text-align: center;
  padding: 15px 12px 15px 65px;
  margin: 10px 0;
  font-weight: 600;
  margin-bottom: 30px;
}

#sendmessage.show,
.show {
  display: block;
}

form#commentform input[type="text"] {
  width: 100%;
  min-height: 40px;
  padding-left: 20px;
  font-size: 13px;
  padding-right: 20px;
  -webkit-box-sizing: border-box;
  -moz-box-sizing: border-box;
  box-sizing: border-box;
  -webkit-border-radius: 2px 2px 2px 2px;
  -moz-border-radius: 2px 2px 2px 2px;
  border-radius: 2px 2px 2px 2px;
}

form#commentform textarea {
  width: 100%;
  padding-left: 20px;
  padding-top: 10px;
  font-size: 13px;
  padding-right: 20px;
  -webkit-box-sizing: border-box;
  -moz-box-sizing: border-box;
  box-sizing: border-box;
  -webkit-border-radius: 2px 2px 2px 2px;
  -moz-border-radius: 2px 2px 2px 2px;
  border-radius: 2px 2px 2px 2px;
}

/* --- search form --- */
.search {
  float: right;
  margin: 35px 0 0;
  padding-bottom: 0;
}

#inner-headline form.input-append {
  margin: 0;
  padding: 0;
}

/*  Portfolio
================================ */

.work-nav #filters {
  margin: 0;
  padding: 0;
  list-style: none;
}

.work-nav #filters li {
  margin: 0 10px 30px 0;
  padding: 0;
  float: left;
}

.work-nav #filters li a {
  color: #7f8289;
  font-size: 16px;
  display: block;
}

.work-nav #filters li a:hover {
}

.work-nav #filters li a.selected {
  color: #de5e60;
}

#thumbs {
  margin: 0;
  padding: 0;
}

#thumbs li {
  list-style-type: none;
}

.item-thumbs {
  position: relative;
  overflow: hidden;
  margin-bottom: 30px;
  cursor: pointer;
}

.item-thumbs a + img {
  width: 100%;
}

.item-thumbs .hover-wrap {
  position: absolute;
  display: block;
  width: 100%;
  height: 100%;

  opacity: 0;
  filter: alpha(opacity=0);

  -webkit-transition: all 450ms ease-out 0s;
  -moz-transition: all 450ms ease-out 0s;
  -o-transition: all 450ms ease-out 0s;
  transition: all 450ms ease-out 0s;

  -webkit-transform: rotateY(180deg) scale(0.5, 0.5);
  -moz-transform: rotateY(180deg) scale(0.5, 0.5);
  -ms-transform: rotateY(180deg) scale(0.5, 0.5);
  -o-transform: rotateY(180deg) scale(0.5, 0.5);
  transform: rotateY(180deg) scale(0.5, 0.5);
}

.item-thumbs:hover .hover-wrap,
.item-thumbs.active .hover-wrap {
  opacity: 1;
  filter: alpha(opacity=100);

  -webkit-transform: rotateY(0deg) scale(1, 1);
  -moz-transform: rotateY(0deg) scale(1, 1);
  -ms-transform: rotateY(0deg) scale(1, 1);
  -o-transform: rotateY(0deg) scale(1, 1);
  transform: rotateY(0deg) scale(1, 1);
}

.item-thumbs .hover-wrap .overlay-img {
  position: absolute;
  width: 90%;
  height: 91%;
  opacity: 0.5;
  filter: alpha(opacity=80);
  background: #14a085;
}

.item-thumbs .hover-wrap .overlay-img-thumb {
  position: absolute;
  border-radius: 60px;
  top: 50%;
  left: 45%;
  margin: -16px 0 0 -16px;
  color: #fff;
  font-size: 32px;
  line-height: 1em;
  opacity: 1;
  filter: alpha(opacity=100);
}

ul.portfolio-categ {
  margin: 10px 0 30px 0;
  padding: 0;
  float: left;
  list-style: none;
}

ul.portfolio-categ li {
  margin: 0;
  float: left;
  list-style: none;
  font-size: 13px;
  font-weight: 600;
  border: 1px solid #d5d5d5;
  margin-right: 15px;
}

ul.portfolio-categ li a {
  display: block;
  padding: 8px 20px;
  color: #14a085;
}
ul.portfolio-categ li.active {
  border: 1px solid #d7d8d6;

  background-color: #eaeaea;
}
ul.portfolio-categ li.active a:hover,
ul.portfolio-categ li a:hover,
ul.portfolio-categ li a:focus,
ul.portfolio-categ li a:active {
  text-decoration: none;
  outline: 0;
}
#accordion-alt3 .panel-heading h4 {
  font-size: 13px;
  line-height: 28px;
  color: #6b6b6b;
}
.panel .panel-heading h4 {
  font-weight: 400;
}
.panel-title {
  margin-top: 0;
  margin-bottom: 0;
  font-size: 15px;
  color: inherit;
}
.panel-group .panel {
  margin-bottom: 0;
  border-radius: 2px;
}
.panel {
  margin-bottom: 18px;
  background-color: #b9b9b9;
  border: 1px solid transparent;
  border-radius: 2px;
  -webkit-box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05);
  box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05);
}
#accordion-alt3 .panel-heading h4 a i {
  font-size: 13px;
  line-height: 18px;
  width: 18px;
  height: 18px;
  margin-right: 5px;
  color: #fff;
  text-align: center;
  border-radius: 50%;
  margin-left: 6px;
}
.progress.pb-sm {
  height: 6px !important;
}
.progress {
  box-shadow: inset 0 0 2px rgba(0, 0, 0, 0.1);
}
.progress {
  overflow: hidden;
  height: 18px;
  margin-bottom: 18px;
  background-color: #f5f5f5;
  border-radius: 2px;
  -webkit-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1);
  box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1);
}
.progress .progress-bar.progress-bar-red {
  background: #ed5441;
}
.progress .progress-bar.progress-bar-green {
  background: #51d466;
}
.progress .progress-bar.progress-bar-lblue {
  background: #32c8de;
}
/* --- portfolio detail --- */
.top-wrapper {
  margin-bottom: 20px;
}
.info-blocks {
  margin-bottom: 15px;
}
.info-blocks i.icon-info-blocks {
  float: left;
  color: #318fcf;
  font-size: 30px;
  min-width: 50px;
  margin-top: 6px;
  text-align: center;
  background-color: #efefef;
  padding: 15px;
}
.info-blocks .info-blocks-in {
  padding: 0 10px;
  overflow: hidden;
}
.info-blocks .info-blocks-in h3 {
  color: #555;
  font-size: 20px;
  line-height: 28px;
  margin: 0px;
}
.info-blocks .info-blocks-in p {
  font-size: 12px;
}

blockquote {
  font-size: 16px;
  font-weight: 400;
  font-family: "Noto Serif", serif;
  font-style: italic;
  padding-left: 0;
  color: #a2a2a2;
  line-height: 1.6em;
  border: none;
}

blockquote cite {
  display: block;
  font-size: 12px;
  color: #666;
  margin-top: 10px;
}
blockquote cite:before {
  content: "\2014 \0020";
}
blockquote cite a,
blockquote cite a:visited,
blockquote cite a:visited {
  color: #555;
}

/* --- pullquotes --- */

.pullquote-left {
  display: block;
  color: #a2a2a2;
  font-family: "Noto Serif", serif;
  font-size: 14px;
  line-height: 1.6em;
  padding-left: 20px;
}

.pullquote-right {
  display: block;
  color: #a2a2a2;
  font-family: "Noto Serif", serif;
  font-size: 14px;
  line-height: 1.6em;
  padding-right: 20px;
}

/* --- button --- */
.btn {
  text-align: center;
  background: #318cca;
  color: #fff;
  border-radius: 0;
  padding: 10px 30px;
}
.btn-theme {
  color: #fff;
}
.btn-theme:hover {
  color: #eee;
}

/* --- list style --- */

ul.general {
  list-style: none;
  margin-left: 0;
}

ul.link-list {
  margin: 0;
  padding: 0;
  list-style: none;
}

ul.link-list li {
  margin: 0;
  padding: 2px 0 2px 0;
  list-style: none;
}
footer {
  background: #14a085;
}
footer ul.link-list li a {
  color: #ffffff;
}
footer ul.link-list li a:hover {
  color: #e2e2e2;
}
/* --- Heading style --- */

h4.heading {
  font-weight: 700;
}

.heading {
  margin-bottom: 30px;
}

.heading {
  position: relative;
}

.widgetheading {
  width: 100%;

  padding: 0;
}

#bottom .widgetheading {
  position: relative;
  border-bottom: #e6e6e6 1px solid;
  padding-bottom: 9px;
}

aside .widgetheading {
  position: relative;
  border-bottom: #e9e9e9 1px solid;
  padding-bottom: 9px;
}

footer .widgetheading {
  position: relative;
}

footer .widget .social-network {
  position: relative;
}

#bottom .widget .widgetheading span,
aside .widget .widgetheading span,
footer .widget .widgetheading span {
  position: absolute;
  width: 60px;
  height: 1px;
  bottom: -1px;
  right: 0;
}
.box-area {
  border: 1px solid #f3f3f3;
  padding: 0 15px 12px;
  padding-top: 41px;
  margin-top: -42px;
  text-align: left;
  background-color: #f9f9f9;
  position: relative;
}
/* --- Map --- */
.map {
  position: relative;
  margin-top: -50px;
  margin-bottom: 40px;
}

.map iframe {
  width: 100%;
  height: 450px;
  border: none;
}

.map-grid iframe {
  width: 100%;
  height: 350px;
  border: none;
  margin: 0 0 -5px 0;
  padding: 0;
}

ul.team-detail {
  margin: -10px 0 0 0;
  padding: 0;
  list-style: none;
}

ul.team-detail li {
  border-bottom: 1px dotted #e9e9e9;
  margin: 0 0 15px 0;
  padding: 0 0 15px 0;
  list-style: none;
}

ul.team-detail li label {
  font-size: 13px;
}

ul.team-detail li h4,
ul.team-detail li label {
  margin-bottom: 0;
}

ul.team-detail li ul.social-network {
  border: none;
  margin: 0;
  padding: 0;
}

ul.team-detail li ul.social-network li {
  border: none;
  margin: 0;
}
ul.team-detail li ul.social-network li i {
  margin: 0;
}

.pricing-title {
  background: #fff;
  text-align: center;
  padding: 10px 0 10px 0;
}

.pricing-title h3 {
  font-weight: 600;
  margin-bottom: 0;
}

.pricing-offer {
  background: #fcfcfc;
  text-align: center;
  padding: 40px 0 40px 0;
  font-size: 18px;
  border-top: 1px solid #e6e6e6;
  border-bottom: 1px solid #e6e6e6;
}

.pricing-box.activeItem .pricing-offer {
  color: #fff;
}

.pricing-offer strong {
  font-size: 78px;
  line-height: 89px;
}

.pricing-offer sup {
  font-size: 28px;
}

.pricing-container {
  background: #fff;
  text-align: center;
  font-size: 14px;
}

.pricing-container strong {
  color: #353535;
}

.pricing-container ul {
  list-style: none;
  padding: 0;
  margin: 0;
}

.pricing-container ul li {
  border-bottom: 1px solid #f5f5f5;
  list-style: none;
  padding: 15px 0 15px 0;
  margin: 0 0 0 0;
  color: #222;
}

.pricing-action {
  margin: 0;
  background: #fcfcfc;
  text-align: center;
  padding: 20px 0 30px 0;
}

.pricing-wrapp {
  margin: 0 auto;
  width: 100%;
  background: #fd0000;
}
.pricing-box-item {
  border: 1px solid #f5f5f5;

  background: #f9f9f9;
  position: relative;
  margin: 0 0 20px 0;
  padding: 0;
  -webkit-box-shadow: 0 2px 0 rgba(0, 0, 0, 0.03);
  -moz-box-shadow: 0 2px 0 rgba(0, 0, 0, 0.03);
  box-shadow: 0 2px 0 rgba(0, 0, 0, 0.03);
  -webkit-box-sizing: border-box;
  -moz-box-sizing: border-box;
  box-sizing: border-box;
}

.pricing-box-item .pricing-heading {
  text-align: center;
  padding: 0px 0 0px 0;
  display: block;
}
.pricing-box-item.activeItem .pricing-heading {
  text-align: center;
  padding: 0px 0 1px 0;
  border-bottom: none;
  display: block;
  color: #fff;
}
.pricing-box-item.activeItem .pricing-heading h3 {
}

.pricing-box-item .pricing-heading h3 strong {
  font-size: 20px;
  font-weight: 700;
  letter-spacing: -1px;
}
.pricing-box-item .pricing-heading h3 {
  font-size: 35px;
  font-weight: 300;
  letter-spacing: -1px;
}

.pricing-box-item .pricing-terms {
  text-align: center;
  display: block;
  overflow: hidden;
  padding: 11px 0 5px;
}

.pricing-box-item .pricing-terms h6 {
  font-style: italic;
  margin-top: 10px;
  color: #14a085;
  font-size: 22px;
  font-family: "Noto Serif", serif;
}

.pricing-box-item .icon .price-circled {
  margin: 10px 10px 10px 0;
  display: inline-block !important;
  text-align: center !important;
  color: #fff;
  width: 68px;
  height: 68px;
  padding: 12px;
  font-size: 16px;
  font-weight: 700;
  line-height: 68px;
  text-shadow: none;
  cursor: pointer;
  background-color: #888;
  border-radius: 64px;
  -moz-border-radius: 64px;
  -webkit-border-radius: 64px;
}

.pricing-box-item .pricing-action {
  margin: 0;
  text-align: center;
  padding: 30px 0 30px 0;
}

/* ===== Widgets ===== */

/* --- flickr --- */
.widget .flickr_badge {
  width: 100%;
}
.widget .flickr_badge img {
  margin: 0 9px 20px 0;
}

footer .widget .flickr_badge {
  width: 100%;
}
footer .widget .flickr_badge img {
  margin: 0 9px 20px 0;
}

.flickr_badge img {
  width: 50px;
  height: 50px;
  float: left;
  margin: 0 9px 20px 0;
}

/* --- Recent post widget --- */

.recent-post {
  margin: 20px 0 0 0;
  padding: 0;
  line-height: 18px;
}

.recent-post h5 a:hover {
  text-decoration: none;
}

.recent-post .text h5 a {
  color: #353535;
}

footer {
  padding: 50px 0 0 0;
  color: #f8f8f8;
}

footer a {
  color: #fff;
}

footer a:hover {
  color: #eee;
}

footer h1,
footer h2,
footer h3,
footer h4,
footer h5,
footer h6 {
  color: #fff;
}

footer address {
  line-height: 1.6em;
  color: #ffffff;
}

footer h5 a:hover,
footer a:hover {
  text-decoration: none;
}

ul.social-network {
  list-style: none;
  margin: 0;
}

ul.social-network li {
  display: inline;
  margin: 0 5px;
}

#sub-footer {
  text-shadow: none;
  color: #f5f5f5;
  padding: 0;
  padding-top: 30px;
  margin: 20px 0 0 0;
  background: #14a085;
}

#sub-footer p {
  margin: 0;
  padding: 0;
}

#sub-footer span {
  color: #f5f5f5;
}

.copyright {
  text-align: left;
  font-size: 12px;
}

#sub-footer ul.social-network {
  float: right;
}

/* scroll to top */
.scrollup {
  position: fixed;
  width: 32px;
  height: 32px;
  bottom: 0px;
  right: 20px;
  background: #222;
}

a.scrollup {
  outline: 0;
  text-align: center;
}

a.scrollup:hover,
a.scrollup:active,
a.scrollup:focus {
  opacity: 1;
  text-decoration: none;
}
a.scrollup i {
  margin-top: 10px;
  color: #fff;
}
a.scrollup i:hover {
  text-decoration: none;
}

.absolute {
  position: absolute;
}

.relative {
  position: relative;
}

.aligncenter {
  text-align: center;
}

.aligncenter span {
  margin-left: 0;
}

.floatright {
  float: right;
}

.floatleft {
  float: left;
}

.floatnone {
  float: none;
}

.aligncenter {
  text-align: center;
}

img.pull-left,
.align-left {
  float: left;
  margin: 0 15px 15px 0;
}

.widget img.pull-left {
  float: left;
  margin: 0 15px 15px 0;
}

img.pull-right,
.align-right {
  float: right;
  margin: 0 0 15px 15px;
}

article img.pull-left,
article .align-left {
  float: left;
  margin: 5px 15px 15px 0;
}

article img.pull-right,
article .align-right {
  float: right;
  margin: 5px 0 15px 15px;
}
============================= */ .clear-marginbot {
  margin-bottom: 0;
}

.marginbot10 {
  margin-bottom: 10px;
}
.marginbot20 {
  margin-bottom: 20px;
}
.marginbot30 {
  margin-bottom: 30px;
}
.marginbot40 {
  margin-bottom: 40px;
}

.clear-margintop {
  margin-top: 0;
}

.margintop10 {
  margin-top: 10px;
}

.margintop20 {
  margin-top: 20px;
}

.margintop30 {
  margin-top: 30px;
}

.margintop40 {
  margin-top: 40px;
}

/*  Media queries 
============================= */

@media (min-width: 768px) and (max-width: 979px) {
  a.detail {
    background: none;
    width: 100%;
  }

  footer .widget form input#appendedInputButton {
    display: block;
    width: 91%;
    -webkit-border-radius: 4px 4px 4px 4px;
    -moz-border-radius: 4px 4px 4px 4px;
    border-radius: 4px 4px 4px 4px;
  }

  footer .widget form .input-append .btn {
    display: block;
    width: 100%;
    padding-right: 0;
    padding-left: 0;
    -webkit-box-sizing: border-box;
    -moz-box-sizing: border-box;
    box-sizing: border-box;
    margin-top: 10px;
  }

  ul.related-folio li {
    width: 156px;
    margin: 0 20px 0 0;
  }
}

@media (max-width: 767px) {
  body {
    padding-right: 0;
    padding-left: 0;
  }
  .navbar-brand {
    margin-top: 10px;
    border-bottom: none;
  }
  .navbar-header {
    margin-top: 20px;
    border-bottom: none;
  }

  .navbar-nav {
    border-top: none;
    float: none;
    width: 100%;
  }
  .navbar .nav > .active > a,
  .navbar .nav > .active > a:hover {
    background: none;
    font-weight: 700;
    color: #26aff0;
  }
  header .navbar-nav > li {
    padding-bottom: 0px;
    padding-top: 2px;
  }
  header .nav li .dropdown-menu {
    margin-top: 0;
  }

  .dropdown-menu {
    position: absolute;
    top: 0;
    left: 40px;
    z-index: 1000;
    display: none;
    float: left;
    min-width: 160px;
    padding: 5px 0;
    margin: 2px 0 0;
    font-size: 13px;
    list-style: none;
    background-color: #fff;
    background-clip: padding-box;
    border: 1px solid #f5f5f5;
    border: 1px solid rgba(0, 0, 0, 0.15);
    border-radius: 0;
    -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175);
    box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175);
  }

  .navbar-collapse.collapse {
    border: none;
    overflow: hidden;
  }

  .box {
    border-bottom: 1px solid #e9e9e9;
    padding-bottom: 20px;
  }

  #featured .flexslider .slide-caption {
    width: 90%;
    padding: 2%;
    position: absolute;
    left: 0;
    bottom: -40px;
  }

  #inner-headline .breadcrumb {
    float: left;
    clear: both;
    width: 100%;
  }

  .breadcrumb > li {
    font-size: 13px;
  }

  ul.portfolio li article a i.icon-48 {
    width: 20px;
    height: 20px;
    font-size: 16px;
    line-height: 20px;
  }

  .left-sidebar {
    border-right: none;
    padding: 0 0 0 0;
    border-bottom: 1px dotted #e6e6e6;
    padding-bottom: 10px;
    margin-bottom: 40px;
  }

  .right-sidebar {
    margin-top: 30px;
    border-left: none;
    padding: 0 0 0 0;
  }

  footer .col-lg-1,
  footer .col-lg-2,
  footer .col-lg-3,
  footer .col-lg-4,
  footer .col-lg-5,
  footer .col-lg-6,
  footer .col-lg-7,
  footer .col-lg-8,
  footer .col-lg-9,
  footer .col-lg-10,
  footer .col-lg-11,
  footer .col-lg-12 {
    margin-bottom: 20px;
  }

  #sub-footer ul.social-network {
    float: left;
  }

  [class*="span"] {
    margin-bottom: 20px;
  }
}

@media (max-width: 480px) {
  .bottom-article a.pull-right {
    float: left;
    margin-top: 20px;
  }

  .search {
    float: left;
  }

  .flexslider .flex-caption {
    display: none;
  }

  .cta-text {
    margin: 0 auto;
    text-align: center;
  }

  ul.portfolio li article a i {
    width: 20px;
    height: 20px;
    font-size: 14px;
  }
}

.box-area:before {
  position: absolute;
  width: 100%;
  height: 100%;
  z-index: 0;
  background-color: red;
  content: "";
  position: absolute;
  top: 7px;
  left: -1px;
  width: 100%;
  height: 23px;
  background: #f9f9f9;
  -moz-transform: skewY(-3deg);
  -o-transform: skewY(-3deg);
  -ms-transform: skewY(-3deg);
  -webkit-transform: skewY(-3deg);
  transform: skewY(11deg);
  background-size: cover;
}
.box-area:after {
  position: absolute;
  width: 100%;
  height: 100%;
  z-index: 0;
  background-color: red;
  content: "";
  position: absolute;
  top: 7px;
  left: 1px;
  width: 100%;
  height: 22px;
  background: #f9f9f9;
  -moz-transform: skewY(-3deg);
  -o-transform: skewY(-3deg);
  -ms-transform: skewY(-3deg);
  -webkit-transform: skewY(-3deg);
  transform: skewY(-11deg);
  background-size: cover;
}
.box-area h3 {
  margin-top: -16px;
  z-index: 12;
  position: relative;
}
.courses {
  padding: 50px 0;
}
.carousel-indicators li {
  display: inline-block;
  border: 1px solid #929292;
}
.textbox {
  background-color: #efefef;
  padding: 4px 25px;
}
.textbox h3 {
  margin: 0;
  padding: 22px 0 14px;
  font-size: 18px;
}


================================================
FILE: backend/tests/integration/tests/pruning/website/index.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Above Multi-purpose Free Bootstrap Responsive Template</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="description" content="" />
<meta name="author" content="http://webthemez.com" />
<!-- css -->
<link href="css/bootstrap.min.css" rel="stylesheet" />
<link href="css/fancybox/jquery.fancybox.css" rel="stylesheet">
<link href="css/jcarousel.css" rel="stylesheet" />
<link href="css/flexslider.css" rel="stylesheet" />
<link href="js/owl-carousel/owl.carousel.css" rel="stylesheet">
<link href="css/style.css" rel="stylesheet" />
 
<!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
<!--[if lt IE 9]>
      <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
    <![endif]-->

</head>
<body>
<div id="wrapper">
	<!-- start header -->
	<header>
        <div class="navbar navbar-default navbar-static-top">
            <div class="container">
                <div class="navbar-header">
                    <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
                        <span class="icon-bar"></span>
                        <span class="icon-bar"></span>
                        <span class="icon-bar"></span>
                    </button>
                    <a class="navbar-brand" href="index.html"><img src="img/logo.png" alt="logo"/></a>
                </div>
                <div class="navbar-collapse collapse ">
                    <ul class="nav navbar-nav">
                        <li class="active"><a href="index.html">Home</a></li> 
						<li><a href="about.html">About Us</a></li>
						<li><a href="courses.html">Courses</a></li>
                        <li><a href="portfolio.html">Portfolio</a></li>
                        <li><a href="pricing.html">Pricing</a></li>
                        <li><a href="contact.html">Contact</a></li>
                    </ul>
                </div>
            </div>
        </div>
	</header>
	<!-- end header -->
	<section id="featured">
	 
	<!-- Slider -->
        <div id="main-slider" class="flexslider">
            <ul class="slides">
              <li>
                <img src="img/slides/1.jpg" alt="" />
                <div class="flex-caption">
                   <div class="item_introtext"> 
					<strong>Online Education</strong>
					<p>The best educational template</p> </div>
                </div>
              </li>
              <li>
                <img src="img/slides/2.jpg" alt="" />
                <div class="flex-caption">
                     <div class="item_introtext"> 
					<strong>School Education</strong>
					<p>Get all courses with on-line content</p> </div>
                </div>
              </li>
              <li>
                <img src="img/slides/3.jpg" alt="" />
                <div class="flex-caption">
                     <div class="item_introtext"> 
					<strong>Collage Education</strong>
					<p>Awesome Template get it know</p> </div>
                </div>
              </li>
            </ul>
        </div>
	<!-- end slider -->
 
	</section>
	<section class="callaction">
	<div class="container">
		<div class="row">
			<div class="col-lg-12">
				<div class="aligncenter"><h1 class="aligncenter">Our Featured Courses</h1><span class="clear spacer_responsive_hide_mobile " style="height:13px;display:block;"></span>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus, vero mollitia velit ad consectetur. Alias, laborum excepturi nihil autem nemo numquam, ipsa architecto non, magni consequuntur quam.</div>
			</div>
		</div>
	</div>
	</section>
	<section id="content">
	
	
	<div class="container">
			<div class="row">
		<div class="skill-home"> <div class="skill-home-solid clearfix"> 
		<div class="col-md-3 text-center">
		<span class="icons c1"><i class="fa fa-trophy"></i></span> <div class="box-area">
		<h3>Web Development</h3> <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident</p></div>
		</div>
		<div class="col-md-3 text-center"> 
		<span class="icons c2"><i class="fa fa-picture-o"></i></span> <div class="box-area">
		<h3>UI Design</h3> <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident</p></div>
		</div>
		<div class="col-md-3 text-center"> 
		<span class="icons c3"><i class="fa fa-desktop"></i></span> <div class="box-area">
		<h3>Interaction</h3> <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident</p></div>
		</div>
		<div class="col-md-3 text-center"> 
		<span class="icons c4"><i class="fa fa-globe"></i></span> <div class="box-area">
		<h3>User Experiance</h3> <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident</p>
		</div></div>
		</div></div>
		</div> 
		 

	</div>
	</section>
	<div class="testimonial-area">
    <div class="testimonial-solid">
        <div class="container">
            <div class="testi-icon-area">
                <div class="quote">
                    <i class="fa fa-microphone"></i>
                </div>
            </div>
            <div id="carousel-example-generic" class="carousel slide" data-ride="carousel">
                <ol class="carousel-indicators">
                    <li data-target="#carousel-example-generic" data-slide-to="0" class="">
                        <a href="#"></a>
                    </li>
                    <li data-target="#carousel-example-generic" data-slide-to="1" class="">
                        <a href="#"></a>
                    </li>
                    <li data-target="#carousel-example-generic" data-slide-to="2" class="active">
                        <a href="#"></a>
                    </li>
                    <li data-target="#carousel-example-generic" data-slide-to="3" class="">
                        <a href="#"></a>
                    </li>
                </ol>
                <div class="carousel-inner">
                    <div class="item">
                        <p>Blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi.</p>
                        <p>
                            <b>- Mark John -</b>
                        </p>
                    </div>
                    <div class="item">
                        <p>Blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi.</p>
                        <p>
                            <b>- Jaison Warner -</b>
                        </p>
                    </div>
                    <div class="item active">
                        <p>Blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi.</p>
                        <p>
                            <b>- Tony Antonio -</b>
                        </p>
                    </div>
                    <div class="item">
                        <p>Blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi.</p>
                        <p>
                            <b>- Leena Doe -</b>
                        </p>
                    </div>
                </div>
            </div>
        </div>
    </div>
</div>
<section class="courses">
<div class="container">

		<div class="row">
			<div class="col-lg-12">
				<div class="aligncenter"><h2 class="aligncenter">Courses We Offer</h1><span class="clear spacer_responsive_hide_mobile " style="height:13px;display:block;"></span>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus, vero mollitia velit ad consectetur. Alias, laborum excepturi nihil autem nemo numquam, ipsa architecto non, magni consequuntur quam.</div>
			</div>
		</div>
<div class="row">
            <div class="col-md-4">
			<div class="textbox">
                <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
            <div class="col-md-4">
			<div class="textbox">
                  <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
			<div class="col-md-4">
			<div class="textbox">
                  <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
        </div>
		<div class="row">
            <div class="col-md-4">
			<div class="textbox">
                <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
            <div class="col-md-4">
			<div class="textbox">
                  <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
			<div class="col-md-4">
			<div class="textbox">
                  <h3>Heading Course</h3>
				<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>
            </div> </div>
        </div>
</div>
</section>
	<footer>
	<div class="container">
		<div class="row">
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Our Contact</h5>
					<address>
					<strong>Abovecompany Inc</strong><br>
					JC Main Road, Near Silnile tower<br>
					 Pin-21542 NewYork US.</address>
					<p>
						<i class="icon-phone"></i> (123) 456-789 - 1255-12584 <br>
						<i class="icon-envelope-alt"></i> email@domainname.com
					</p>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Quick Links</h5>
					<ul class="link-list">
						<li><a href="#">Latest Events</a></li>
						<li><a href="#">Terms and conditions</a></li>
						<li><a href="#">Privacy policy</a></li>
						<li><a href="#">Career</a></li>
						<li><a href="#">Contact us</a></li>
					</ul>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Latest posts</h5>
					<ul class="link-list">
						<li><a href="#">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>
						<li><a href="#">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>
						<li><a href="#">Natus error sit voluptatem accusantium doloremque</a></li>
					</ul>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Recent News</h5>
					<ul class="link-list">
						<li><a href="#">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>
						<li><a href="#">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>
						<li><a href="#">Natus error sit voluptatem accusantium doloremque</a></li>
					</ul>
				</div>
			</div>
		</div>
	</div>
	<div id="sub-footer">
		<div class="container">
			<div class="row">
				<div class="col-lg-6">
					<div class="copyright">
						<p>
							<span>&copy; Above Site All right reserved. Template By </span><a href="http://webthemez.com" target="_blank">WebThemez</a>
						</p>
					</div>
				</div>
				<div class="col-lg-6">
					<ul class="social-network">
						<li><a href="#" data-placement="top" title="Facebook"><i class="fa fa-facebook"></i></a></li>
						<li><a href="#" data-placement="top" title="Twitter"><i class="fa fa-twitter"></i></a></li>
						<li><a href="#" data-placement="top" title="Linkedin"><i class="fa fa-linkedin"></i></a></li>
						<li><a href="#" data-placement="top" title="Pinterest"><i class="fa fa-pinterest"></i></a></li>
						<li><a href="#" data-placement="top" title="Google plus"><i class="fa fa-google-plus"></i></a></li>
					</ul>
				</div>
			</div>
		</div>
	</div>
	</footer>
</div>
<a href="#" class="scrollup"><i class="fa fa-angle-up active"></i></a>
<!-- javascript
    ================================================== -->
<!-- Placed at the end of the document so the pages load faster -->
<script src="js/jquery.js"></script>
<script src="js/jquery.easing.1.3.js"></script>
<script src="js/bootstrap.min.js"></script>
<script src="js/jquery.fancybox.pack.js"></script>
<script src="js/jquery.fancybox-media.js"></script> 
<script src="js/portfolio/jquery.quicksand.js"></script>
<script src="js/portfolio/setting.js"></script>
<script src="js/jquery.flexslider.js"></script>
<script src="js/animate.js"></script>
<script src="js/custom.js"></script>
<script src="js/owl-carousel/owl.carousel.js"></script>
</body>
</html>

================================================
FILE: backend/tests/integration/tests/pruning/website/js/animate.js
================================================
jQuery(document).ready(function ($) {
  //animate effect
  $(".e_flash").hover(
    function () {
      $(this).addClass("animated flash");
    },
    function () {
      $(this).removeClass("animated flash");
    },
  );
  $(".e_bounce").hover(
    function () {
      $(this).addClass("animated bounce");
    },
    function () {
      $(this).removeClass("animated bounce");
    },
  );

  $(".e_shake").hover(
    function () {
      $(this).addClass("animated shake");
    },
    function () {
      $(this).removeClass("animated shake");
    },
  );
  $(".e_tada").hover(
    function () {
      $(this).addClass("animated tada");
    },
    function () {
      $(this).removeClass("animated tada");
    },
  );
  $(".e_swing").hover(
    function () {
      $(this).addClass("animated swing");
    },
    function () {
      $(this).removeClass("animated swing");
    },
  );
  $(".e_wobble").hover(
    function () {
      $(this).addClass("animated wobble");
    },
    function () {
      $(this).removeClass("animated wobble");
    },
  );
  $(".e_wiggle").hover(
    function () {
      $(this).addClass("animated wiggle");
    },
    function () {
      $(this).removeClass("animated wiggle");
    },
  );
  $(".e_pulse").hover(
    function () {
      $(this).addClass("animated pulse");
    },
    function () {
      $(this).removeClass("animated pulse");
    },
  );

  $(".e_flip").hover(
    function () {
      $(this).addClass("animated flip");
    },
    function () {
      $(this).removeClass("animated flip");
    },
  );
  $(".e_flipInX").hover(
    function () {
      $(this).addClass("animated flipInX");
    },
    function () {
      $(this).removeClass("animated flipInX");
    },
  );
  $(".e_flipOutX").hover(
    function () {
      $(this).addClass("animated flipOutX");
    },
    function () {
      $(this).removeClass("animated flipOutX");
    },
  );
  $(".e_flipInY").hover(
    function () {
      $(this).addClass("animated flipInY");
    },
    function () {
      $(this).removeClass("animated flipInY");
    },
  );
  $(".e_flipOutY").hover(
    function () {
      $(this).addClass("animated flipOutY");
    },
    function () {
      $(this).removeClass("animated flipOutY");
    },
  );

  //Fading entrances
  $(".e_fadeIn").hover(
    function () {
      $(this).addClass("animated fadeIn");
    },
    function () {
      $(this).removeClass("animated fadeIn");
    },
  );
  $(".e_fadeInUp").hover(
    function () {
      $(this).addClass("animated fadeInUp");
    },
    function () {
      $(this).removeClass("animated fadeInUp");
    },
  );
  $(".e_fadeInDown").hover(
    function () {
      $(this).addClass("animated fadeInDown");
    },
    function () {
      $(this).removeClass("animated fadeInDown");
    },
  );
  $(".e_fadeInLeft").hover(
    function () {
      $(this).addClass("animated fadeInLeft");
    },
    function () {
      $(this).removeClass("animated fadeInLeft");
    },
  );
  $(".e_fadeInRight").hover(
    function () {
      $(this).addClass("animated fadeInRight");
    },
    function () {
      $(this).removeClass("animated fadeInRight");
    },
  );
  $(".e_fadeInUpBig").hover(
    function () {
      $(this).addClass("animated fadeInUpBig");
    },
    function () {
      $(this).removeClass("animated fadeInUpBig");
    },
  );
  $(".e_fadeInUpBig").hover(
    function () {
      $(this).addClass("animated fadeInUpBig");
    },
    function () {
      $(this).removeClass("animated fadeInUpBig");
    },
  );
  $(".e_fadeInDownBig").hover(
    function () {
      $(this).addClass("animated fadeInDownBig");
    },
    function () {
      $(this).removeClass("animated fadeInDownBig");
    },
  );
  $(".e_fadeInLeftBig").hover(
    function () {
      $(this).addClass("animated fadeInLeftBig");
    },
    function () {
      $(this).removeClass("animated fadeInLeftBig");
    },
  );
  $(".e_fadeInRightBig").hover(
    function () {
      $(this).addClass("animated fadeInRightBig");
    },
    function () {
      $(this).removeClass("animated fadeInRightBig");
    },
  );

  //Fading exits
  $(".e_fadeOut").hover(
    function () {
      $(this).addClass("animated fadeOut");
    },
    function () {
      $(this).removeClass("animated fadeOut");
    },
  );
  $(".e_fadeOutUp").hover(
    function () {
      $(this).addClass("animated fadeOutUp");
    },
    function () {
      $(this).removeClass("animated fadeOutUp");
    },
  );
  $(".e_fadeOutDown").hover(
    function () {
      $(this).addClass("animated fadeOutDown");
    },
    function () {
      $(this).removeClass("animated fadeOutDown");
    },
  );
  $(".e_fadeOutLeft").hover(
    function () {
      $(this).addClass("animated fadeOutLeft");
    },
    function () {
      $(this).removeClass("animated fadeOutLeft");
    },
  );
  $(".e_fadeOutRight").hover(
    function () {
      $(this).addClass("animated fadeOutRight");
    },
    function () {
      $(this).removeClass("animated fadeOutRight");
    },
  );
  $(".e_fadeOutUpBig").hover(
    function () {
      $(this).addClass("animated fadeOutUpBig");
    },
    function () {
      $(this).removeClass("animated fadeOutUpBig");
    },
  );
  $(".e_fadeOutDownBig").hover(
    function () {
      $(this).addClass("animated fadeOutDownBig");
    },
    function () {
      $(this).removeClass("animated fadeOutDownBig");
    },
  );
  $(".e_fadeOutLeftBig").hover(
    function () {
      $(this).addClass("animated fadeOutLeftBig");
    },
    function () {
      $(this).removeClass("animated fadeOutLeftBig");
    },
  );
  $(".e_fadeOutRightBig").hover(
    function () {
      $(this).addClass("animated fadeOutRightBig");
    },
    function () {
      $(this).removeClass("animated fadeOutRightBig");
    },
  );

  //Bouncing entrances
  $(".e_bounceIn").hover(
    function () {
      $(this).addClass("animated bounceIn");
    },
    function () {
      $(this).removeClass("animated bounceIn");
    },
  );
  $(".e_bounceInDown").hover(
    function () {
      $(this).addClass("animated bounceInDown");
    },
    function () {
      $(this).removeClass("animated bounceInDown");
    },
  );
  $(".e_bounceInUp").hover(
    function () {
      $(this).addClass("animated bounceInUp");
    },
    function () {
      $(this).removeClass("animated bounceInUp");
    },
  );
  $(".e_bounceInLeft").hover(
    function () {
      $(this).addClass("animated bounceInLeft");
    },
    function () {
      $(this).removeClass("animated bounceInLeft");
    },
  );
  $(".e_bounceInRight").hover(
    function () {
      $(this).addClass("animated bounceInRight");
    },
    function () {
      $(this).removeClass("animated bounceInRight");
    },
  );

  //Bouncing exits
  $(".e_bounceOut").hover(
    function () {
      $(this).addClass("animated bounceOut");
    },
    function () {
      $(this).removeClass("animated bounceOut");
    },
  );
  $(".e_bounceOutDown").hover(
    function () {
      $(this).addClass("animated bounceOutDown");
    },
    function () {
      $(this).removeClass("animated bounceOutDown");
    },
  );
  $(".e_bounceOutUp").hover(
    function () {
      $(this).addClass("animated bounceOutUp");
    },
    function () {
      $(this).removeClass("animated bounceOutUp");
    },
  );
  $(".e_bounceOutLeft").hover(
    function () {
      $(this).addClass("animated bounceOutLeft");
    },
    function () {
      $(this).removeClass("animated bounceOutLeft");
    },
  );
  $(".e_bounceOutRight").hover(
    function () {
      $(this).addClass("animated bounceOutRight");
    },
    function () {
      $(this).removeClass("animated bounceOutRight");
    },
  );

  //Rotating entrances
  $(".e_rotateIn").hover(
    function () {
      $(this).addClass("animated rotateIn");
    },
    function () {
      $(this).removeClass("animated rotateIn");
    },
  );
  $(".e_rotateInDownLeft").hover(
    function () {
      $(this).addClass("animated rotateInDownLeft");
    },
    function () {
      $(this).removeClass("animated rotateInDownLeft");
    },
  );
  $(".e_rotateInDownRight").hover(
    function () {
      $(this).addClass("animated rotateInDownRight");
    },
    function () {
      $(this).removeClass("animated rotateInDownRight");
    },
  );
  $(".e_rotateInUpRight").hover(
    function () {
      $(this).addClass("animated rotateInUpRight");
    },
    function () {
      $(this).removeClass("animated rotateInUpRight");
    },
  );
  $(".e_rotateInUpLeft").hover(
    function () {
      $(this).addClass("animated rotateInUpLeft");
    },
    function () {
      $(this).removeClass("animated rotateInUpLeft");
    },
  );

  //Rotating exits
  $(".e_rotateOut").hover(
    function () {
      $(this).addClass("animated rotateOut");
    },
    function () {
      $(this).removeClass("animated rotateOut");
    },
  );
  $(".e_rotateOutDownLeft").hover(
    function () {
      $(this).addClass("animated rotateOutDownLeft");
    },
    function () {
      $(this).removeClass("animated rotateOutDownLeft");
    },
  );
  $(".e_rotateOutDownRight").hover(
    function () {
      $(this).addClass("animated rotateOutDownRight");
    },
    function () {
      $(this).removeClass("animated rotateOutDownRight");
    },
  );
  $(".e_rotateOutUpLeft").hover(
    function () {
      $(this).addClass("animated rotateOutUpLeft");
    },
    function () {
      $(this).removeClass("animated rotateOutUpLeft");
    },
  );
  $(".e_rotateOutUpRight").hover(
    function () {
      $(this).addClass("animated rotateOutUpRight");
    },
    function () {
      $(this).removeClass("animated rotateOutUpRight");
    },
  );

  //Lightspeed
  $(".e_lightSpeedIn").hover(
    function () {
      $(this).addClass("animated lightSpeedIn");
    },
    function () {
      $(this).removeClass("animated lightSpeedIn");
    },
  );
  $(".e_lightSpeedOut").hover(
    function () {
      $(this).addClass("animated lightSpeedOut");
    },
    function () {
      $(this).removeClass("animated lightSpeedOut");
    },
  );

  //specials
  $(".e_hinge").hover(
    function () {
      $(this).addClass("animated hinge");
    },
    function () {
      $(this).removeClass("animated hinge");
    },
  );
  $(".e_rollIn").hover(
    function () {
      $(this).addClass("animated rollIn");
    },
    function () {
      $(this).removeClass("animated rollIn");
    },
  );
  $(".e_rollOut").hover(
    function () {
      $(this).addClass("animated rollOut");
    },
    function () {
      $(this).removeClass("animated rollOut");
    },
  );
});


================================================
FILE: backend/tests/integration/tests/pruning/website/js/custom.js
================================================
/*global jQuery:false */
jQuery(document).ready(function ($) {
  "use strict";

  //add some elements with animate effect

  $(".big-cta").hover(
    function () {
      $(".cta a").addClass("animated shake");
    },
    function () {
      $(".cta a").removeClass("animated shake");
    },
  );
  $(".box").hover(
    function () {
      $(this).find(".icon").addClass("animated fadeInDown");
      $(this).find("p").addClass("animated fadeInUp");
    },
    function () {
      $(this).find(".icon").removeClass("animated fadeInDown");
      $(this).find("p").removeClass("animated fadeInUp");
    },
  );

  $(".accordion").on("show", function (e) {
    $(e.target)
      .prev(".accordion-heading")
      .find(".accordion-toggle")
      .addClass("active");
    $(e.target)
      .prev(".accordion-heading")
      .find(".accordion-toggle i")
      .removeClass("icon-plus");
    $(e.target)
      .prev(".accordion-heading")
      .find(".accordion-toggle i")
      .addClass("icon-minus");
  });

  $(".accordion").on("hide", function (e) {
    $(this).find(".accordion-toggle").not($(e.target)).removeClass("active");
    $(this)
      .find(".accordion-toggle i")
      .not($(e.target))
      .removeClass("icon-minus");
    $(this).find(".accordion-toggle i").not($(e.target)).addClass("icon-plus");
  });

  // tooltip
  $(".social-network li a, .options_box .color a").tooltip();

  // fancybox
  $(".fancybox").fancybox({
    padding: 0,
    autoResize: true,
    beforeShow: function () {
      this.title = $(this.element).attr("title");
      this.title =
        "<h4>" +
        this.title +
        "</h4>" +
        "<p>" +
        $(this.element).parent().find("img").attr("alt") +
        "</p>";
    },
    helpers: {
      title: { type: "inside" },
    },
  });

  //scroll to top
  $(window).scroll(function () {
    if ($(this).scrollTop() > 100) {
      $(".scrollup").fadeIn();
    } else {
      $(".scrollup").fadeOut();
    }
  });
  $(".scrollup").click(function () {
    $("html, body").animate({ scrollTop: 0 }, 1000);
    return false;
  });
  $("#post-slider").flexslider({
    // Primary Controls
    controlNav: false, //Boolean: Create navigation for paging control of each clide? Note: Leave true for manualControls usage
    directionNav: true, //Boolean: Create navigation for previous/next navigation? (true/false)
    prevText: "Previous", //String: Set the text for the "previous" directionNav item
    nextText: "Next", //String: Set the text for the "next" directionNav item

    // Secondary Navigation
    keyboard: true, //Boolean: Allow slider navigating via keyboard left/right keys
    multipleKeyboard: false, //{NEW} Boolean: Allow keyboard navigation to affect multiple sliders. Default behavior cuts out keyboard navigation with more than one slider present.
    mousewheel: false, //{UPDATED} Boolean: Requires jquery.mousewheel.js (https://github.com/brandonaaron/jquery-mousewheel) - Allows slider navigating via mousewheel
    pausePlay: false, //Boolean: Create pause/play dynamic element
    pauseText: "Pause", //String: Set the text for the "pause" pausePlay item
    playText: "Play", //String: Set the text for the "play" pausePlay item

    // Special properties
    controlsContainer: "", //{UPDATED} Selector: USE CLASS SELECTOR. Declare which container the navigation elements should be appended too. Default container is the FlexSlider element. Example use would be ".flexslider-container". Property is ignored if given element is not found.
    manualControls: "", //Selector: Declare custom control navigation. Examples would be ".flex-control-nav li" or "#tabs-nav li img", etc. The number of elements in your controlNav should match the number of slides/tabs.
    sync: "", //{NEW} Selector: Mirror the actions performed on this slider with another slider. Use with care.
    asNavFor: "", //{NEW} Selector: Internal property exposed for turning the slider into a thumbnail navigation for another slider
  });

  $("#main-slider").flexslider({
    namespace: "flex-", //{NEW} String: Prefix string attached to the class of every element generated by the plugin
    selector: ".slides > li", //{NEW} Selector: Must match a simple pattern. '{container} > {slide}' -- Ignore pattern at your own peril
    animation: "fade", //String: Select your animation type, "fade" or "slide"
    easing: "swing", //{NEW} String: Determines the easing method used in jQuery transitions. jQuery easing plugin is supported!
    direction: "horizontal", //String: Select the sliding direction, "horizontal" or "vertical"
    reverse: false, //{NEW} Boolean: Reverse the animation direction
    animationLoop: true, //Boolean: Should the animation loop? If false, directionNav will received "disable" classes at either end
    smoothHeight: false, //{NEW} Boolean: Allow height of the slider to animate smoothly in horizontal mode
    startAt: 0, //Integer: The slide that the slider should start on. Array notation (0 = first slide)
    slideshow: true, //Boolean: Animate slider automatically
    slideshowSpeed: 7000, //Integer: Set the speed of the slideshow cycling, in milliseconds
    animationSpeed: 600, //Integer: Set the speed of animations, in milliseconds
    initDelay: 0, //{NEW} Integer: Set an initialization delay, in milliseconds
    randomize: false, //Boolean: Randomize slide order

    // Usability features
    pauseOnAction: true, //Boolean: Pause the slideshow when interacting with control elements, highly recommended.
    pauseOnHover: false, //Boolean: Pause the slideshow when hovering over slider, then resume when no longer hovering
    useCSS: true, //{NEW} Boolean: Slider will use CSS3 transitions if available
    touch: true, //{NEW} Boolean: Allow touch swipe navigation of the slider on touch-enabled devices
    video: false, //{NEW} Boolean: If using video in the slider, will prevent CSS3 3D Transforms to avoid graphical glitches

    // Primary Controls
    controlNav: true, //Boolean: Create navigation for paging control of each clide? Note: Leave true for manualControls usage
    directionNav: true, //Boolean: Create navigation for previous/next navigation? (true/false)
    prevText: "Previous", //String: Set the text for the "previous" directionNav item
    nextText: "Next", //String: Set the text for the "next" directionNav item

    // Secondary Navigation
    keyboard: true, //Boolean: Allow slider navigating via keyboard left/right keys
    multipleKeyboard: false, //{NEW} Boolean: Allow keyboard navigation to affect multiple sliders. Default behavior cuts out keyboard navigation with more than one slider present.
    mousewheel: false, //{UPDATED} Boolean: Requires jquery.mousewheel.js (https://github.com/brandonaaron/jquery-mousewheel) - Allows slider navigating via mousewheel
    pausePlay: false, //Boolean: Create pause/play dynamic element
    pauseText: "Pause", //String: Set the text for the "pause" pausePlay item
    playText: "Play", //String: Set the text for the "play" pausePlay item

    // Special properties
    controlsContainer: "", //{UPDATED} Selector: USE CLASS SELECTOR. Declare which container the navigation elements should be appended too. Default container is the FlexSlider element. Example use would be ".flexslider-container". Property is ignored if given element is not found.
    manualControls: "", //Selector: Declare custom control navigation. Examples would be ".flex-control-nav li" or "#tabs-nav li img", etc. The number of elements in your controlNav should match the number of slides/tabs.
    sync: "", //{NEW} Selector: Mirror the actions performed on this slider with another slider. Use with care.
    asNavFor: "", //{NEW} Selector: Internal property exposed for turning the slider into a thumbnail navigation for another slider
  });
});


================================================
FILE: backend/tests/integration/tests/pruning/website/js/flexslider/jquery.flexslider.js
================================================
/*
 * jQuery FlexSlider v1.8
 * http://www.woothemes.com/flexslider/
 *
 * Copyright 2012 WooThemes
 * Free to use under the MIT license.
 * http://www.opensource.org/licenses/mit-license.php
 *
 * Contributing Author: Tyler Smith
 */

(function ($) {
  //FlexSlider: Object Instance
  $.flexslider = function (el, options) {
    var slider = $(el);

    // slider DOM reference for use outside of the plugin
    $.data(el, "flexslider", slider);

    slider.init = function () {
      slider.vars = $.extend({}, $.flexslider.defaults, options);
      $.data(el, "flexsliderInit", true);
      slider.container = $(".slides", slider).eq(0);
      slider.slides = $(".slides:first > li", slider);
      slider.count = slider.slides.length;
      slider.animating = false;
      slider.currentSlide = slider.vars.slideToStart;
      slider.animatingTo = slider.currentSlide;
      slider.atEnd = slider.currentSlide == 0 ? true : false;
      slider.eventType =
        "ontouchstart" in document.documentElement ? "touchstart" : "click";
      slider.cloneCount = 0;
      slider.cloneOffset = 0;
      slider.manualPause = false;
      slider.vertical = slider.vars.slideDirection == "vertical";
      slider.prop = slider.vertical ? "top" : "marginLeft";
      slider.args = {};

      //Test for webbkit CSS3 Animations
      slider.transitions =
        "webkitTransition" in document.body.style && slider.vars.useCSS;
      if (slider.transitions) slider.prop = "-webkit-transform";

      //Test for controlsContainer
      if (slider.vars.controlsContainer != "") {
        slider.controlsContainer = $(slider.vars.controlsContainer).eq(
          $(".slides").index(slider.container),
        );
        slider.containerExists = slider.controlsContainer.length > 0;
      }
      //Test for manualControls
      if (slider.vars.manualControls != "") {
        slider.manualControls = $(
          slider.vars.manualControls,
          slider.containerExists ? slider.controlsContainer : slider,
        );
        slider.manualExists = slider.manualControls.length > 0;
      }

      ///////////////////////////////////////////////////////////////////
      // FlexSlider: Randomize Slides
      if (slider.vars.randomize) {
        slider.slides.sort(function () {
          return Math.round(Math.random()) - 0.5;
        });
        slider.container.empty().append(slider.slides);
      }
      ///////////////////////////////////////////////////////////////////

      ///////////////////////////////////////////////////////////////////
      // FlexSlider: Slider Animation Initialize
      if (slider.vars.animation.toLowerCase() == "slide") {
        if (slider.transitions) {
          slider.setTransition(0);
        }
        slider.css({ overflow: "hidden" });
        if (slider.vars.animationLoop) {
          slider.cloneCount = 2;
          slider.cloneOffset = 1;
          slider.container
            .append(slider.slides.filter(":first").clone().addClass("clone"))
            .prepend(slider.slides.filter(":last").clone().addClass("clone"));
        }
        //create newSlides to capture possible clones
        slider.newSlides = $(".slides:first > li", slider);
        var sliderOffset = -1 * (slider.currentSlide + slider.cloneOffset);
        if (slider.vertical) {
          slider.newSlides.css({
            display: "block",
            width: "100%",
            float: "left",
          });
          slider.container
            .height((slider.count + slider.cloneCount) * 200 + "%")
            .css("position", "absolute")
            .width("100%");
          //Timeout function to give browser enough time to get proper height initially
          setTimeout(function () {
            slider
              .css({ position: "relative" })
              .height(slider.slides.filter(":first").height());
            slider.args[slider.prop] = slider.transitions
              ? "translate3d(0," + sliderOffset * slider.height() + "px,0)"
              : sliderOffset * slider.height() + "px";
            slider.container.css(slider.args);
          }, 100);
        } else {
          slider.args[slider.prop] = slider.transitions
            ? "translate3d(" + sliderOffset * slider.width() + "px,0,0)"
            : sliderOffset * slider.width() + "px";
          slider.container
            .width((slider.count + slider.cloneCount) * 200 + "%")
            .css(slider.args);
          //Timeout function to give browser enough time to get proper width initially
          setTimeout(function () {
            slider.newSlides
              .width(slider.width())
              .css({ float: "left", display: "block" });
          }, 100);
        }
      } else {
        //Default to fade
        //Not supporting fade CSS3 transitions right now
        slider.transitions = false;
        slider.slides
          .css({ width: "100%", float: "left", marginRight: "-100%" })
          .eq(slider.currentSlide)
          .fadeIn(slider.vars.animationDuration);
      }
      ///////////////////////////////////////////////////////////////////

      ///////////////////////////////////////////////////////////////////
      // FlexSlider: Control Nav
      if (slider.vars.controlNav) {
        if (slider.manualExists) {
          slider.controlNav = slider.manualControls;
        } else {
          var controlNavScaffold = $('<ol class="flex-control-nav"></ol>');
          var j = 1;
          for (var i = 0; i < slider.count; i++) {
            controlNavScaffold.append("<li><a>" + j + "</a></li>");
            j++;
          }

          if (slider.containerExists) {
            $(slider.controlsContainer).append(controlNavScaffold);
            slider.controlNav = $(
              ".flex-control-nav li a",
              slider.controlsContainer,
            );
          } else {
            slider.append(controlNavScaffold);
            slider.controlNav = $(".flex-control-nav li a", slider);
          }
        }

        slider.controlNav.eq(slider.currentSlide).addClass("active");

        slider.controlNav.bind(slider.eventType, function (event) {
          event.preventDefault();
          if (!$(this).hasClass("active")) {
            slider.controlNav.index($(this)) > slider.currentSlide
              ? (slider.direction = "next")
              : (slider.direction = "prev");
            slider.flexAnimate(
              slider.controlNav.index($(this)),
              slider.vars.pauseOnAction,
            );
          }
        });
      }
      ///////////////////////////////////////////////////////////////////

      //////////////////////////////////////////////////////////////////
      //FlexSlider: Direction Nav
      if (slider.vars.directionNav) {
        var directionNavScaffold = $(
          '<ul class="flex-direction-nav"><li><a class="prev" href="#">' +
            slider.vars.prevText +
            '</a></li><li><a class="next" href="#">' +
            slider.vars.nextText +
            "</a></li></ul>",
        );

        if (slider.containerExists) {
          $(slider.controlsContainer).append(directionNavScaffold);
          slider.directionNav = $(
            ".flex-direction-nav li a",
            slider.controlsContainer,
          );
        } else {
          slider.append(directionNavScaffold);
          slider.directionNav = $(".flex-direction-nav li a", slider);
        }

        //Set initial disable styles if necessary
        if (!slider.vars.animationLoop) {
          if (slider.currentSlide == 0) {
            slider.directionNav.filter(".prev").addClass("disabled");
          } else if (slider.currentSlide == slider.count - 1) {
            slider.directionNav.filter(".next").addClass("disabled");
          }
        }

        slider.directionNav.bind(slider.eventType, function (event) {
          event.preventDefault();
          var target = $(this).hasClass("next")
            ? slider.getTarget("next")
            : slider.getTarget("prev");

          if (slider.canAdvance(target)) {
            slider.flexAnimate(target, slider.vars.pauseOnAction);
          }
        });
      }
      //////////////////////////////////////////////////////////////////

      //////////////////////////////////////////////////////////////////
      //FlexSlider: Keyboard Nav
      if (slider.vars.keyboardNav && $("ul.slides").length == 1) {
        function keyboardMove(event) {
          if (slider.animating) {
            return;
          } else if (event.keyCode != 39 && event.keyCode != 37) {
            return;
          } else {
            if (event.keyCode == 39) {
              var target = slider.getTarget("next");
            } else if (event.keyCode == 37) {
              var target = slider.getTarget("prev");
            }

            if (slider.canAdvance(target)) {
              slider.flexAnimate(target, slider.vars.pauseOnAction);
            }
          }
        }
        $(document).bind("keyup", keyboardMove);
      }
      //////////////////////////////////////////////////////////////////

      ///////////////////////////////////////////////////////////////////
      // FlexSlider: Mousewheel interaction
      if (slider.vars.mousewheel) {
        slider.mousewheelEvent = /Firefox/i.test(navigator.userAgent)
          ? "DOMMouseScroll"
          : "mousewheel";
        slider.bind(slider.mousewheelEvent, function (e) {
          e.preventDefault();
          e = e ? e : window.event;
          var wheelData = e.detail
              ? e.detail * -1
              : e.originalEvent.wheelDelta / 40,
            target =
              wheelData < 0
                ? slider.getTarget("next")
                : slider.getTarget("prev");

          if (slider.canAdvance(target)) {
            slider.flexAnimate(target, slider.vars.pauseOnAction);
          }
        });
      }
      ///////////////////////////////////////////////////////////////////

      //////////////////////////////////////////////////////////////////
      //FlexSlider: Slideshow Setup
      if (slider.vars.slideshow) {
        //pauseOnHover
        if (slider.vars.pauseOnHover && slider.vars.slideshow) {
          slider.hover(
            function () {
              slider.pause();
            },
            function () {
              if (!slider.manualPause) {
                slider.resume();
              }
            },
          );
        }

        //Initialize animation
        slider.animatedSlides = setInterval(
          slider.animateSlides,
          slider.vars.slideshowSpeed,
        );
      }
      //////////////////////////////////////////////////////////////////

      //////////////////////////////////////////////////////////////////
      //FlexSlider: Pause/Play
      if (slider.vars.pausePlay) {
        var pausePlayScaffold = $(
          '<div class="flex-pauseplay"><span></span></div>',
        );

        if (slider.containerExists) {
          slider.controlsContainer.append(pausePlayScaffold);
          slider.pausePlay = $(
            ".flex-pauseplay span",
            slider.controlsContainer,
          );
        } else {
          slider.append(pausePlayScaffold);
          slider.pausePlay = $(".flex-pauseplay span", slider);
        }

        var pausePlayState = slider.vars.slideshow ? "pause" : "play";
        slider.pausePlay
          .addClass(pausePlayState)
          .text(
            pausePlayState == "pause"
              ? slider.vars.pauseText
              : slider.vars.playText,
          );

        slider.pausePlay.bind(slider.eventType, function (event) {
          event.preventDefault();
          if ($(this).hasClass("pause")) {
            slider.pause();
            slider.manualPause = true;
          } else {
            slider.resume();
            slider.manualPause = false;
          }
        });
      }
      //////////////////////////////////////////////////////////////////

      //////////////////////////////////////////////////////////////////
      //FlexSlider:Touch Swip Gestures
      //Some brilliant concepts adapted from the following sources
      //Source: TouchSwipe - http://www.netcu.de/jquery-touchwipe-iphone-ipad-library
      //Source: SwipeJS - http://swipejs.com
      if ("ontouchstart" in document.documentElement && slider.vars.touch) {
        //For brevity, variables are named for x-axis scrolling
        //The variables are then swapped if vertical sliding is applied
        //This reduces redundant code...I think :)
        //If debugging, recognize variables are named for horizontal scrolling
        var startX,
          startY,
          offset,
          cwidth,
          dx,
          startT,
          scrolling = false;

        slider.each(function () {
          if ("ontouchstart" in document.documentElement) {
            this.addEventListener("touchstart", onTouchStart, false);
          }
        });

        function onTouchStart(e) {
          if (slider.animating) {
            e.preventDefault();
          } else if (e.touches.length == 1) {
            slider.pause();
            cwidth = slider.vertical ? slider.height() : slider.width();
            startT = Number(new Date());
            offset = slider.vertical
              ? (slider.currentSlide + slider.cloneOffset) * slider.height()
              : (slider.currentSlide + slider.cloneOffset) * slider.width();
            startX = slider.vertical ? e.touches[0].pageY : e.touches[0].pageX;
            startY = slider.vertical ? e.touches[0].pageX : e.touches[0].pageY;
            slider.setTransition(0);

            this.addEventListener("touchmove", onTouchMove, false);
            this.addEventListener("touchend", onTouchEnd, false);
          }
        }

        function onTouchMove(e) {
          dx = slider.vertical
            ? startX - e.touches[0].pageY
            : startX - e.touches[0].pageX;
          scrolling = slider.vertical
            ? Math.abs(dx) < Math.abs(e.touches[0].pageX - startY)
            : Math.abs(dx) < Math.abs(e.touches[0].pageY - startY);

          if (!scrolling) {
            e.preventDefault();
            if (slider.vars.animation == "slide" && slider.transitions) {
              if (!slider.vars.animationLoop) {
                dx =
                  dx /
                  ((slider.currentSlide == 0 && dx < 0) ||
                  (slider.currentSlide == slider.count - 1 && dx > 0)
                    ? Math.abs(dx) / cwidth + 2
                    : 1);
              }
              slider.args[slider.prop] = slider.vertical
                ? "translate3d(0," + (-offset - dx) + "px,0)"
                : "translate3d(" + (-offset - dx) + "px,0,0)";
              slider.container.css(slider.args);
            }
          }
        }

        function onTouchEnd(e) {
          slider.animating = false;
          if (
            slider.animatingTo == slider.currentSlide &&
            !scrolling &&
            !(dx == null)
          ) {
            var target =
              dx > 0 ? slider.getTarget("next") : slider.getTarget("prev");
            if (
              (slider.canAdvance(target) &&
                Number(new Date()) - startT < 550 &&
                Math.abs(dx) > 20) ||
              Math.abs(dx) > cwidth / 2
            ) {
              slider.flexAnimate(target, slider.vars.pauseOnAction);
            } else if (slider.vars.animation !== "fade") {
              slider.flexAnimate(
                slider.currentSlide,
                slider.vars.pauseOnAction,
              );
            }
          }

          //Finish the touch by undoing the touch session
          this.removeEventListener("touchmove", onTouchMove, false);
          this.removeEventListener("touchend", onTouchEnd, false);
          startX = null;
          startY = null;
          dx = null;
          offset = null;
        }
      }
      //////////////////////////////////////////////////////////////////

      //////////////////////////////////////////////////////////////////
      //FlexSlider: Resize Functions (If necessary)
      if (slider.vars.animation.toLowerCase() == "slide") {
        $(window).resize(function () {
          if (!slider.animating && slider.is(":visible")) {
            if (slider.vertical) {
              slider.height(slider.slides.filter(":first").height());
              slider.args[slider.prop] =
                -1 *
                  (slider.currentSlide + slider.cloneOffset) *
                  slider.slides.filter(":first").height() +
                "px";
              if (slider.transitions) {
                slider.setTransition(0);
                slider.args[slider.prop] = slider.vertical
                  ? "translate3d(0," + slider.args[slider.prop] + ",0)"
                  : "translate3d(" + slider.args[slider.prop] + ",0,0)";
              }
              slider.container.css(slider.args);
            } else {
              slider.newSlides.width(slider.width());
              slider.args[slider.prop] =
                -1 *
                  (slider.currentSlide + slider.cloneOffset) *
                  slider.width() +
                "px";
              if (slider.transitions) {
                slider.setTransition(0);
                slider.args[slider.prop] = slider.vertical
                  ? "translate3d(0," + slider.args[slider.prop] + ",0)"
                  : "translate3d(" + slider.args[slider.prop] + ",0,0)";
              }
              slider.container.css(slider.args);
            }
          }
        });
      }
      //////////////////////////////////////////////////////////////////

      //FlexSlider: start() Callback
      slider.vars.start(slider);
    };

    //FlexSlider: Animation Actions
    slider.flexAnimate = function (target, pause) {
      if (!slider.animating && slider.is(":visible")) {
        //Animating flag
        slider.animating = true;

        //FlexSlider: before() animation Callback
        slider.animatingTo = target;
        slider.vars.before(slider);

        //Optional paramter to pause slider when making an anmiation call
        if (pause) {
          slider.pause();
        }

        //Update controlNav
        if (slider.vars.controlNav) {
          slider.controlNav.removeClass("active").eq(target).addClass("active");
        }

        //Is the slider at either end
        slider.atEnd = target == 0 || target == slider.count - 1 ? true : false;
        if (!slider.vars.animationLoop && slider.vars.directionNav) {
          if (target == 0) {
            slider.directionNav
              .removeClass("disabled")
              .filter(".prev")
              .addClass("disabled");
          } else if (target == slider.count - 1) {
            slider.directionNav
              .removeClass("disabled")
              .filter(".next")
              .addClass("disabled");
          } else {
            slider.directionNav.removeClass("disabled");
          }
        }

        if (!slider.vars.animationLoop && target == slider.count - 1) {
          slider.pause();
          //FlexSlider: end() of cycle Callback
          slider.vars.end(slider);
        }

        if (slider.vars.animation.toLowerCase() == "slide") {
          var dimension = slider.vertical
            ? slider.slides.filter(":first").height()
            : slider.slides.filter(":first").width();

          if (
            slider.currentSlide == 0 &&
            target == slider.count - 1 &&
            slider.vars.animationLoop &&
            slider.direction != "next"
          ) {
            slider.slideString = "0px";
          } else if (
            slider.currentSlide == slider.count - 1 &&
            target == 0 &&
            slider.vars.animationLoop &&
            slider.direction != "prev"
          ) {
            slider.slideString = -1 * (slider.count + 1) * dimension + "px";
          } else {
            slider.slideString =
              -1 * (target + slider.cloneOffset) * dimension + "px";
          }
          slider.args[slider.prop] = slider.slideString;

          if (slider.transitions) {
            slider.setTransition(slider.vars.animationDuration);
            slider.args[slider.prop] = slider.vertical
              ? "translate3d(0," + slider.slideString + ",0)"
              : "translate3d(" + slider.slideString + ",0,0)";
            slider.container
              .css(slider.args)
              .one("webkitTransitionEnd transitionend", function () {
                slider.wrapup(dimension);
              });
          } else {
            slider.container.animate(
              slider.args,
              slider.vars.animationDuration,
              function () {
                slider.wrapup(dimension);
              },
            );
          }
        } else {
          //Default to Fade
          slider.slides
            .eq(slider.currentSlide)
            .fadeOut(slider.vars.animationDuration);
          slider.slides
            .eq(target)
            .fadeIn(slider.vars.animationDuration, function () {
              slider.wrapup();
            });
        }
      }
    };

    //FlexSlider: Function to minify redundant animation actions
    slider.wrapup = function (dimension) {
      if (slider.vars.animation == "slide") {
        //Jump the slider if necessary
        if (
          slider.currentSlide == 0 &&
          slider.animatingTo == slider.count - 1 &&
          slider.vars.animationLoop
        ) {
          slider.args[slider.prop] = -1 * slider.count * dimension + "px";
          if (slider.transitions) {
            slider.setTransition(0);
            slider.args[slider.prop] = slider.vertical
              ? "translate3d(0," + slider.args[slider.prop] + ",0)"
              : "translate3d(" + slider.args[slider.prop] + ",0,0)";
          }
          slider.container.css(slider.args);
        } else if (
          slider.currentSlide == slider.count - 1 &&
          slider.animatingTo == 0 &&
          slider.vars.animationLoop
        ) {
          slider.args[slider.prop] = -1 * dimension + "px";
          if (slider.transitions) {
            slider.setTransition(0);
            slider.args[slider.prop] = slider.vertical
              ? "translate3d(0," + slider.args[slider.prop] + ",0)"
              : "translate3d(" + slider.args[slider.prop] + ",0,0)";
          }
          slider.container.css(slider.args);
        }
      }
      slider.animating = false;
      slider.currentSlide = slider.animatingTo;
      //FlexSlider: after() animation Callback
      slider.vars.after(slider);
    };

    //FlexSlider: Automatic Slideshow
    slider.animateSlides = function () {
      if (!slider.animating) {
        slider.flexAnimate(slider.getTarget("next"));
      }
    };

    //FlexSlider: Automatic Slideshow Pause
    slider.pause = function () {
      clearInterval(slider.animatedSlides);
      if (slider.vars.pausePlay) {
        slider.pausePlay
          .removeClass("pause")
          .addClass("play")
          .text(slider.vars.playText);
      }
    };

    //FlexSlider: Automatic Slideshow Start/Resume
    slider.resume = function () {
      slider.animatedSlides = setInterval(
        slider.animateSlides,
        slider.vars.slideshowSpeed,
      );
      if (slider.vars.pausePlay) {
        slider.pausePlay
          .removeClass("play")
          .addClass("pause")
          .text(slider.vars.pauseText);
      }
    };

    //FlexSlider: Helper function for non-looping sliders
    slider.canAdvance = function (target) {
      if (!slider.vars.animationLoop && slider.atEnd) {
        if (
          slider.currentSlide == 0 &&
          target == slider.count - 1 &&
          slider.direction != "next"
        ) {
          return false;
        } else if (
          slider.currentSlide == slider.count - 1 &&
          target == 0 &&
          slider.direction == "next"
        ) {
          return false;
        } else {
          return true;
        }
      } else {
        return true;
      }
    };

    //FlexSlider: Helper function to determine animation target
    slider.getTarget = function (dir) {
      slider.direction = dir;
      if (dir == "next") {
        return slider.currentSlide == slider.count - 1
          ? 0
          : slider.currentSlide + 1;
      } else {
        return slider.currentSlide == 0
          ? slider.count - 1
          : slider.currentSlide - 1;
      }
    };

    //FlexSlider: Helper function to set CSS3 transitions
    slider.setTransition = function (dur) {
      slider.container.css({ "-webkit-transition-duration": dur / 1000 + "s" });
    };

    //FlexSlider: Initialize
    slider.init();
  };

  //FlexSlider: Default Settings
  $.flexslider.defaults = {
    animation: "slide", //String: Select your animation type, "fade" or "slide"
    slideDirection: "horizontal", //String: Select the sliding direction, "horizontal" or "vertical"
    slideshow: true, //Boolean: Animate slider automatically
    slideshowSpeed: 7000, //Integer: Set the speed of the slideshow cycling, in milliseconds
    animationDuration: 600, //Integer: Set the speed of animations, in milliseconds
    directionNav: false, //Boolean: Create navigation for previous/next navigation? (true/false)
    controlNav: true, //Boolean: Create navigation for paging control of each clide? Note: Leave true for manualControls usage
    keyboardNav: true, //Boolean: Allow slider navigating via keyboard left/right keys
    mousewheel: false, //Boolean: Allow slider navigating via mousewheel
    prevText: "Previous", //String: Set the text for the "previous" directionNav item
    nextText: "Next", //String: Set the text for the "next" directionNav item
    pausePlay: false, //Boolean: Create pause/play dynamic element
    pauseText: "Pause", //String: Set the text for the "pause" pausePlay item
    playText: "Play", //String: Set the text for the "play" pausePlay item
    randomize: false, //Boolean: Randomize slide order
    slideToStart: 0, //Integer: The slide that the slider should start on. Array notation (0 = first slide)
    animationLoop: true, //Boolean: Should the animation loop? If false, directionNav will received "disable" classes at either end
    pauseOnAction: true, //Boolean: Pause the slideshow when interacting with control elements, highly recommended.
    pauseOnHover: false, //Boolean: Pause the slideshow when hovering over slider, then resume when no longer hovering
    useCSS: true, //Boolean: Override the use of CSS3 Translate3d animations
    touch: true, //Boolean: Disable touchswipe events
    controlsContainer: "", //Selector: Declare which container the navigation elements should be appended too. Default container is the flexSlider element. Example use would be ".flexslider-container", "#container", etc. If the given element is not found, the default action will be taken.
    manualControls: "", //Selector: Declare custom control navigation. Example would be ".flex-control-nav li" or "#tabs-nav li img", etc. The number of elements in your controlNav should match the number of slides/tabs.
    start: function () {}, //Callback: function(slider) - Fires when the slider loads the first slide
    before: function () {}, //Callback: function(slider) - Fires asynchronously with each slider animation
    after: function () {}, //Callback: function(slider) - Fires after each slider animation completes
    end: function () {}, //Callback: function(slider) - Fires when the slider reaches the last slide (asynchronous)
  };

  //FlexSlider: Plugin Function
  $.fn.flexslider = function (options) {
    return this.each(function () {
      var $slides = $(this).find(".slides > li");
      if ($slides.length === 1) {
        $slides.fadeIn(400);
        if (options && options.start) options.start($(this));
      } else if ($(this).data("flexsliderInit") != true) {
        new $.flexslider(this, options);
      }
    });
  };
})(jQuery);


================================================
FILE: backend/tests/integration/tests/pruning/website/js/flexslider/setting.js
================================================
$(window).load(function () {
  $(".flexslider").flexslider();
});


================================================
FILE: backend/tests/integration/tests/pruning/website/js/google-code-prettify/prettify.css
================================================
.com {
  color: #93a1a1;
}
.lit {
  color: #195f91;
}
.pun,
.opn,
.clo {
  color: #93a1a1;
}
.fun {
  color: #dc322f;
}
.str,
.atv {
  color: #d14;
}
.kwd,
.prettyprint .tag {
  color: #1e347b;
}
.typ,
.atn,
.dec,
.var {
  color: teal;
}
.pln {
  color: #48484c;
}

.prettyprint {
  padding: 8px;
  background-color: #f7f7f9;
  border: 1px solid #e1e1e8;
}
.prettyprint.linenums {
  -webkit-box-shadow:
    inset 40px 0 0 #fbfbfc,
    inset 41px 0 0 #ececf0;
  -moz-box-shadow:
    inset 40px 0 0 #fbfbfc,
    inset 41px 0 0 #ececf0;
  box-shadow:
    inset 40px 0 0 #fbfbfc,
    inset 41px 0 0 #ececf0;
}

/* Specify class=linenums on a pre to get line numbering */
ol.linenums {
  margin: 0 0 0 33px; /* IE indents via margin-left */
}
ol.linenums li {
  padding-left: 12px;
  color: #bebec5;
  line-height: 20px;
  text-shadow: 0 1px 0 #fff;
}


================================================
FILE: backend/tests/integration/tests/pruning/website/js/google-code-prettify/prettify.js
================================================
var q = null;
window.PR_SHOULD_USE_CONTINUATION = !0;
(function () {
  function L(a) {
    function m(a) {
      var f = a.charCodeAt(0);
      if (f !== 92) return f;
      var b = a.charAt(1);
      return (f = r[b])
        ? f
        : "0" <= b && b <= "7"
          ? parseInt(a.substring(1), 8)
          : b === "u" || b === "x"
            ? parseInt(a.substring(2), 16)
            : a.charCodeAt(1);
    }
    function e(a) {
      if (a < 32) return (a < 16 ? "\\x0" : "\\x") + a.toString(16);
      a = String.fromCharCode(a);
      if (a === "\\" || a === "-" || a === "[" || a === "]") a = "\\" + a;
      return a;
    }
    function h(a) {
      for (
        var f = a
            .substring(1, a.length - 1)
            .match(
              /\\u[\dA-Fa-f]{4}|\\x[\dA-Fa-f]{2}|\\[0-3][0-7]{0,2}|\\[0-7]{1,2}|\\[\S\s]|[^\\]/g,
            ),
          a = [],
          b = [],
          o = f[0] === "^",
          c = o ? 1 : 0,
          i = f.length;
        c < i;
        ++c
      ) {
        var j = f[c];
        if (/\\[bdsw]/i.test(j)) a.push(j);
        else {
          var j = m(j),
            d;
          c + 2 < i && "-" === f[c + 1]
            ? ((d = m(f[c + 2])), (c += 2))
            : (d = j);
          b.push([j, d]);
          d < 65 ||
            j > 122 ||
            (d < 65 ||
              j > 90 ||
              b.push([Math.max(65, j) | 32, Math.min(d, 90) | 32]),
            d < 97 ||
              j > 122 ||
              b.push([Math.max(97, j) & -33, Math.min(d, 122) & -33]));
        }
      }
      b.sort(function (a, f) {
        return a[0] - f[0] || f[1] - a[1];
      });
      f = [];
      j = [NaN, NaN];
      for (c = 0; c < b.length; ++c)
        (i = b[c]),
          i[0] <= j[1] + 1 ? (j[1] = Math.max(j[1], i[1])) : f.push((j = i));
      b = ["["];
      o && b.push("^");
      b.push.apply(b, a);
      for (c = 0; c < f.length; ++c)
        (i = f[c]),
          b.push(e(i[0])),
          i[1] > i[0] && (i[1] + 1 > i[0] && b.push("-"), b.push(e(i[1])));
      b.push("]");
      return b.join("");
    }
    function y(a) {
      for (
        var f = a.source.match(
            /\[(?:[^\\\]]|\\[\S\s])*]|\\u[\dA-Fa-f]{4}|\\x[\dA-Fa-f]{2}|\\\d+|\\[^\dux]|\(\?[!:=]|[()^]|[^()[\\^]+/g,
          ),
          b = f.length,
          d = [],
          c = 0,
          i = 0;
        c < b;
        ++c
      ) {
        var j = f[c];
        j === "("
          ? ++i
          : "\\" === j.charAt(0) &&
            (j = +j.substring(1)) &&
            j <= i &&
            (d[j] = -1);
      }
      for (c = 1; c < d.length; ++c) -1 === d[c] && (d[c] = ++t);
      for (i = c = 0; c < b; ++c)
        (j = f[c]),
          j === "("
            ? (++i, d[i] === void 0 && (f[c] = "(?:"))
            : "\\" === j.charAt(0) &&
              (j = +j.substring(1)) &&
              j <= i &&
              (f[c] = "\\" + d[i]);
      for (i = c = 0; c < b; ++c)
        "^" === f[c] && "^" !== f[c + 1] && (f[c] = "");
      if (a.ignoreCase && s)
        for (c = 0; c < b; ++c)
          (j = f[c]),
            (a = j.charAt(0)),
            j.length >= 2 && a === "["
              ? (f[c] = h(j))
              : a !== "\\" &&
                (f[c] = j.replace(/[A-Za-z]/g, function (a) {
                  a = a.charCodeAt(0);
                  return "[" + String.fromCharCode(a & -33, a | 32) + "]";
                }));
      return f.join("");
    }
    for (var t = 0, s = !1, l = !1, p = 0, d = a.length; p < d; ++p) {
      var g = a[p];
      if (g.ignoreCase) l = !0;
      else if (
        /[a-z]/i.test(
          g.source.replace(/\\u[\da-f]{4}|\\x[\da-f]{2}|\\[^UXux]/gi, ""),
        )
      ) {
        s = !0;
        l = !1;
        break;
      }
    }
    for (
      var r = { b: 8, t: 9, n: 10, v: 11, f: 12, r: 13 },
        n = [],
        p = 0,
        d = a.length;
      p < d;
      ++p
    ) {
      g = a[p];
      if (g.global || g.multiline) throw Error("" + g);
      n.push("(?:" + y(g) + ")");
    }
    return RegExp(n.join("|"), l ? "gi" : "g");
  }
  function M(a) {
    function m(a) {
      switch (a.nodeType) {
        case 1:
          if (e.test(a.className)) break;
          for (var g = a.firstChild; g; g = g.nextSibling) m(g);
          g = a.nodeName;
          if ("BR" === g || "LI" === g)
            (h[s] = "\n"), (t[s << 1] = y++), (t[(s++ << 1) | 1] = a);
          break;
        case 3:
        case 4:
          (g = a.nodeValue),
            g.length &&
              ((g = p
                ? g.replace(/\r\n?/g, "\n")
                : g.replace(/[\t\n\r ]+/g, " ")),
              (h[s] = g),
              (t[s << 1] = y),
              (y += g.length),
              (t[(s++ << 1) | 1] = a));
      }
    }
    var e = /(?:^|\s)nocode(?:\s|$)/,
      h = [],
      y = 0,
      t = [],
      s = 0,
      l;
    a.currentStyle
      ? (l = a.currentStyle.whiteSpace)
      : window.getComputedStyle &&
        (l = document.defaultView
          .getComputedStyle(a, q)
          .getPropertyValue("white-space"));
    var p = l && "pre" === l.substring(0, 3);
    m(a);
    return { a: h.join("").replace(/\n$/, ""), c: t };
  }
  function B(a, m, e, h) {
    m && ((a = { a: m, d: a }), e(a), h.push.apply(h, a.e));
  }
  function x(a, m) {
    function e(a) {
      for (
        var l = a.d,
          p = [l, "pln"],
          d = 0,
          g = a.a.match(y) || [],
          r = {},
          n = 0,
          z = g.length;
        n < z;
        ++n
      ) {
        var f = g[n],
          b = r[f],
          o = void 0,
          c;
        if (typeof b === "string") c = !1;
        else {
          var i = h[f.charAt(0)];
          if (i) (o = f.match(i[1])), (b = i[0]);
          else {
            for (c = 0; c < t; ++c)
              if (((i = m[c]), (o = f.match(i[1])))) {
                b = i[0];
                break;
              }
            o || (b = "pln");
          }
          if (
            (c = b.length >= 5 && "lang-" === b.substring(0, 5)) &&
            !(o && typeof o[1] === "string")
          )
            (c = !1), (b = "src");
          c || (r[f] = b);
        }
        i = d;
        d += f.length;
        if (c) {
          c = o[1];
          var j = f.indexOf(c),
            k = j + c.length;
          o[2] && ((k = f.length - o[2].length), (j = k - c.length));
          b = b.substring(5);
          B(l + i, f.substring(0, j), e, p);
          B(l + i + j, c, C(b, c), p);
          B(l + i + k, f.substring(k), e, p);
        } else p.push(l + i, b);
      }
      a.e = p;
    }
    var h = {},
      y;
    (function () {
      for (
        var e = a.concat(m), l = [], p = {}, d = 0, g = e.length;
        d < g;
        ++d
      ) {
        var r = e[d],
          n = r[3];
        if (n) for (var k = n.length; --k >= 0; ) h[n.charAt(k)] = r;
        r = r[1];
        n = "" + r;
        p.hasOwnProperty(n) || (l.push(r), (p[n] = q));
      }
      l.push(/[\S\s]/);
      y = L(l);
    })();
    var t = m.length;
    return e;
  }
  function u(a) {
    var m = [],
      e = [];
    a.tripleQuotedStrings
      ? m.push([
          "str",
          /^(?:'''(?:[^'\\]|\\[\S\s]|''?(?=[^']))*(?:'''|$)|"""(?:[^"\\]|\\[\S\s]|""?(?=[^"]))*(?:"""|$)|'(?:[^'\\]|\\[\S\s])*(?:'|$)|"(?:[^"\\]|\\[\S\s])*(?:"|$))/,
          q,
          "'\"",
        ])
      : a.multiLineStrings
        ? m.push([
            "str",
            /^(?:'(?:[^'\\]|\\[\S\s])*(?:'|$)|"(?:[^"\\]|\\[\S\s])*(?:"|$)|`(?:[^\\`]|\\[\S\s])*(?:`|$))/,
            q,
            "'\"`",
          ])
        : m.push([
            "str",
            /^(?:'(?:[^\n\r'\\]|\\.)*(?:'|$)|"(?:[^\n\r"\\]|\\.)*(?:"|$))/,
            q,
            "\"'",
          ]);
    a.verbatimStrings && e.push(["str", /^@"(?:[^"]|"")*(?:"|$)/, q]);
    var h = a.hashComments;
    h &&
      (a.cStyleComments
        ? (h > 1
            ? m.push(["com", /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, q, "#"])
            : m.push([
                "com",
                /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\n\r]*)/,
                q,
                "#",
              ]),
          e.push([
            "str",
            /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/,
            q,
          ]))
        : m.push(["com", /^#[^\n\r]*/, q, "#"]));
    a.cStyleComments &&
      (e.push(["com", /^\/\/[^\n\r]*/, q]),
      e.push(["com", /^\/\*[\S\s]*?(?:\*\/|$)/, q]));
    a.regexLiterals &&
      e.push([
        "lang-regex",
        /^(?:^^\.?|[!+-]|!=|!==|#|%|%=|&|&&|&&=|&=|\(|\*|\*=|\+=|,|-=|->|\/|\/=|:|::|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|[?@[^]|\^=|\^\^|\^\^=|{|\||\|=|\|\||\|\|=|~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\s*(\/(?=[^*/])(?:[^/[\\]|\\[\S\s]|\[(?:[^\\\]]|\\[\S\s])*(?:]|$))+\/)/,
      ]);
    (h = a.types) && e.push(["typ", h]);
    a = ("" + a.keywords).replace(/^ | $/g, "");
    a.length &&
      e.push(["kwd", RegExp("^(?:" + a.replace(/[\s,]+/g, "|") + ")\\b"), q]);
    m.push(["pln", /^\s+/, q, " \r\n\t\xa0"]);
    e.push(
      ["lit", /^@[$_a-z][\w$@]*/i, q],
      ["typ", /^(?:[@_]?[A-Z]+[a-z][\w$@]*|\w+_t\b)/, q],
      ["pln", /^[$_a-z][\w$@]*/i, q],
      [
        "lit",
        /^(?:0x[\da-f]+|(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d\+)(?:e[+-]?\d+)?)[a-z]*/i,
        q,
        "0123456789",
      ],
      ["pln", /^\\[\S\s]?/, q],
      ["pun", /^.[^\s\w"-$'./@\\`]*/, q],
    );
    return x(m, e);
  }
  function D(a, m) {
    function e(a) {
      switch (a.nodeType) {
        case 1:
          if (k.test(a.className)) break;
          if ("BR" === a.nodeName)
            h(a), a.parentNode && a.parentNode.removeChild(a);
          else for (a = a.firstChild; a; a = a.nextSibling) e(a);
          break;
        case 3:
        case 4:
          if (p) {
            var b = a.nodeValue,
              d = b.match(t);
            if (d) {
              var c = b.substring(0, d.index);
              a.nodeValue = c;
              (b = b.substring(d.index + d[0].length)) &&
                a.parentNode.insertBefore(s.createTextNode(b), a.nextSibling);
              h(a);
              c || a.parentNode.removeChild(a);
            }
          }
      }
    }
    function h(a) {
      function b(a, d) {
        var e = d ? a.cloneNode(!1) : a,
          f = a.parentNode;
        if (f) {
          var f = b(f, 1),
            g = a.nextSibling;
          f.appendChild(e);
          for (var h = g; h; h = g) (g = h.nextSibling), f.appendChild(h);
        }
        return e;
      }
      for (; !a.nextSibling; ) if (((a = a.parentNode), !a)) return;
      for (
        var a = b(a.nextSibling, 0), e;
        (e = a.parentNode) && e.nodeType === 1;

      )
        a = e;
      d.push(a);
    }
    var k = /(?:^|\s)nocode(?:\s|$)/,
      t = /\r\n?|\n/,
      s = a.ownerDocument,
      l;
    a.currentStyle
      ? (l = a.currentStyle.whiteSpace)
      : window.getComputedStyle &&
        (l = s.defaultView
          .getComputedStyle(a, q)
          .getPropertyValue("white-space"));
    var p = l && "pre" === l.substring(0, 3);
    for (l = s.createElement("LI"); a.firstChild; ) l.appendChild(a.firstChild);
    for (var d = [l], g = 0; g < d.length; ++g) e(d[g]);
    m === (m | 0) && d[0].setAttribute("value", m);
    var r = s.createElement("OL");
    r.className = "linenums";
    for (var n = Math.max(0, (m - 1) | 0) || 0, g = 0, z = d.length; g < z; ++g)
      (l = d[g]),
        (l.className = "L" + ((g + n) % 10)),
        l.firstChild || l.appendChild(s.createTextNode("\xa0")),
        r.appendChild(l);
    a.appendChild(r);
  }
  function k(a, m) {
    for (var e = m.length; --e >= 0; ) {
      var h = m[e];
      A.hasOwnProperty(h)
        ? window.console &&
          console.warn("cannot override language handler %s", h)
        : (A[h] = a);
    }
  }
  function C(a, m) {
    if (!a || !A.hasOwnProperty(a))
      a = /^\s*</.test(m) ? "default-markup" : "default-code";
    return A[a];
  }
  function E(a) {
    var m = a.g;
    try {
      var e = M(a.h),
        h = e.a;
      a.a = h;
      a.c = e.c;
      a.d = 0;
      C(m, h)(a);
      var k = /\bMSIE\b/.test(navigator.userAgent),
        m = /\n/g,
        t = a.a,
        s = t.length,
        e = 0,
        l = a.c,
        p = l.length,
        h = 0,
        d = a.e,
        g = d.length,
        a = 0;
      d[g] = s;
      var r, n;
      for (n = r = 0; n < g; )
        d[n] !== d[n + 2] ? ((d[r++] = d[n++]), (d[r++] = d[n++])) : (n += 2);
      g = r;
      for (n = r = 0; n < g; ) {
        for (
          var z = d[n], f = d[n + 1], b = n + 2;
          b + 2 <= g && d[b + 1] === f;

        )
          b += 2;
        d[r++] = z;
        d[r++] = f;
        n = b;
      }
      for (d.length = r; h < p; ) {
        var o = l[h + 2] || s,
          c = d[a + 2] || s,
          b = Math.min(o, c),
          i = l[h + 1],
          j;
        if (i.nodeType !== 1 && (j = t.substring(e, b))) {
          k && (j = j.replace(m, "\r"));
          i.nodeValue = j;
          var u = i.ownerDocument,
            v = u.createElement("SPAN");
          v.className = d[a + 1];
          var x = i.parentNode;
          x.replaceChild(v, i);
          v.appendChild(i);
          e < o &&
            ((l[h + 1] = i = u.createTextNode(t.substring(b, o))),
            x.insertBefore(i, v.nextSibling));
        }
        e = b;
        e >= o && (h += 2);
        e >= c && (a += 2);
      }
    } catch (w) {
      "console" in window && console.log(w && w.stack ? w.stack : w);
    }
  }
  var v = ["break,continue,do,else,for,if,return,while"],
    w = [
      [
        v,
        "auto,case,char,const,default,double,enum,extern,float,goto,int,long,register,short,signed,sizeof,static,struct,switch,typedef,union,unsigned,void,volatile",
      ],
      "catch,class,delete,false,import,new,operator,private,protected,public,this,throw,true,try,typeof",
    ],
    F = [
      w,
      "alignof,align_union,asm,axiom,bool,concept,concept_map,const_cast,constexpr,decltype,dynamic_cast,explicit,export,friend,inline,late_check,mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast,template,typeid,typename,using,virtual,where",
    ],
    G = [
      w,
      "abstract,boolean,byte,extends,final,finally,implements,import,instanceof,null,native,package,strictfp,super,synchronized,throws,transient",
    ],
    H = [
      G,
      "as,base,by,checked,decimal,delegate,descending,dynamic,event,fixed,foreach,from,group,implicit,in,interface,internal,into,is,lock,object,out,override,orderby,params,partial,readonly,ref,sbyte,sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,var",
    ],
    w = [
      w,
      "debugger,eval,export,function,get,null,set,undefined,var,with,Infinity,NaN",
    ],
    I = [
      v,
      "and,as,assert,class,def,del,elif,except,exec,finally,from,global,import,in,is,lambda,nonlocal,not,or,pass,print,raise,try,with,yield,False,True,None",
    ],
    J = [
      v,
      "alias,and,begin,case,class,def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo,rescue,retry,self,super,then,true,undef,unless,until,when,yield,BEGIN,END",
    ],
    v = [v, "case,done,elif,esac,eval,fi,function,in,local,set,then,until"],
    K =
      /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)/,
    N = /\S/,
    O = u({
      keywords: [
        F,
        H,
        w,
        "caller,delete,die,do,dump,elsif,eval,exit,foreach,for,goto,if,import,last,local,my,next,no,our,print,package,redo,require,sub,undef,unless,until,use,wantarray,while,BEGIN,END" +
          I,
        J,
        v,
      ],
      hashComments: !0,
      cStyleComments: !0,
      multiLineStrings: !0,
      regexLiterals: !0,
    }),
    A = {};
  k(O, ["default-code"]);
  k(
    x(
      [],
      [
        ["pln", /^[^<?]+/],
        ["dec", /^<!\w[^>]*(?:>|$)/],
        ["com", /^<\!--[\S\s]*?(?:--\>|$)/],
        ["lang-", /^<\?([\S\s]+?)(?:\?>|$)/],
        ["lang-", /^<%([\S\s]+?)(?:%>|$)/],
        ["pun", /^(?:<[%?]|[%?]>)/],
        ["lang-", /^<xmp\b[^>]*>([\S\s]+?)<\/xmp\b[^>]*>/i],
        ["lang-js", /^<script\b[^>]*>([\S\s]*?)(<\/script\b[^>]*>)/i],
        ["lang-css", /^<style\b[^>]*>([\S\s]*?)(<\/style\b[^>]*>)/i],
        ["lang-in.tag", /^(<\/?[a-z][^<>]*>)/i],
      ],
    ),
    ["default-markup", "htm", "html", "mxml", "xhtml", "xml", "xsl"],
  );
  k(
    x(
      [
        ["pln", /^\s+/, q, " \t\r\n"],
        ["atv", /^(?:"[^"]*"?|'[^']*'?)/, q, "\"'"],
      ],
      [
        ["tag", /^^<\/?[a-z](?:[\w-.:]*\w)?|\/?>$/i],
        ["atn", /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],
        ["lang-uq.val", /^=\s*([^\s"'>]*(?:[^\s"'/>]|\/(?=\s)))/],
        ["pun", /^[/<->]+/],
        ["lang-js", /^on\w+\s*=\s*"([^"]+)"/i],
        ["lang-js", /^on\w+\s*=\s*'([^']+)'/i],
        ["lang-js", /^on\w+\s*=\s*([^\s"'>]+)/i],
        ["lang-css", /^style\s*=\s*"([^"]+)"/i],
        ["lang-css", /^style\s*=\s*'([^']+)'/i],
        ["lang-css", /^style\s*=\s*([^\s"'>]+)/i],
      ],
    ),
    ["in.tag"],
  );
  k(x([], [["atv", /^[\S\s]+/]]), ["uq.val"]);
  k(u({ keywords: F, hashComments: !0, cStyleComments: !0, types: K }), [
    "c",
    "cc",
    "cpp",
    "cxx",
    "cyc",
    "m",
  ]);
  k(u({ keywords: "null,true,false" }), ["json"]);
  k(
    u({
      keywords: H,
      hashComments: !0,
      cStyleComments: !0,
      verbatimStrings: !0,
      types: K,
    }),
    ["cs"],
  );
  k(u({ keywords: G, cStyleComments: !0 }), ["java"]);
  k(u({ keywords: v, hashComments: !0, multiLineStrings: !0 }), [
    "bsh",
    "csh",
    "sh",
  ]);
  k(
    u({
      keywords: I,
      hashComments: !0,
      multiLineStrings: !0,
      tripleQuotedStrings: !0,
    }),
    ["cv", "py"],
  );
  k(
    u({
      keywords:
        "caller,delete,die,do,dump,elsif,eval,exit,foreach,for,goto,if,import,last,local,my,next,no,our,print,package,redo,require,sub,undef,unless,until,use,wantarray,while,BEGIN,END",
      hashComments: !0,
      multiLineStrings: !0,
      regexLiterals: !0,
    }),
    ["perl", "pl", "pm"],
  );
  k(
    u({
      keywords: J,
      hashComments: !0,
      multiLineStrings: !0,
      regexLiterals: !0,
    }),
    ["rb"],
  );
  k(u({ keywords: w, cStyleComments: !0, regexLiterals: !0 }), ["js"]);
  k(
    u({
      keywords:
        "all,and,by,catch,class,else,extends,false,finally,for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then,true,try,unless,until,when,while,yes",
      hashComments: 3,
      cStyleComments: !0,
      multilineStrings: !0,
      tripleQuotedStrings: !0,
      regexLiterals: !0,
    }),
    ["coffee"],
  );
  k(x([], [["str", /^[\S\s]+/]]), ["regex"]);
  window.prettyPrintOne = function (a, m, e) {
    var h = document.createElement("PRE");
    h.innerHTML = a;
    e && D(h, e);
    E({ g: m, i: e, h: h });
    return h.innerHTML;
  };
  window.prettyPrint = function (a) {
    function m() {
      for (
        var e = window.PR_SHOULD_USE_CONTINUATION ? l.now() + 250 : Infinity;
        p < h.length && l.now() < e;
        p++
      ) {
        var n = h[p],
          k = n.className;
        if (k.indexOf("prettyprint") >= 0) {
          var k = k.match(g),
            f,
            b;
          if ((b = !k)) {
            b = n;
            for (var o = void 0, c = b.firstChild; c; c = c.nextSibling)
              var i = c.nodeType,
                o =
                  i === 1
                    ? o
                      ? b
                      : c
                    : i === 3
                      ? N.test(c.nodeValue)
                        ? b
                        : o
                      : o;
            b = (f = o === b ? void 0 : o) && "CODE" === f.tagName;
          }
          b && (k = f.className.match(g));
          k && (k = k[1]);
          b = !1;
          for (o = n.parentNode; o; o = o.parentNode)
            if (
              (o.tagName === "pre" ||
                o.tagName === "code" ||
                o.tagName === "xmp") &&
              o.className &&
              o.className.indexOf("prettyprint") >= 0
            ) {
              b = !0;
              break;
            }
          b ||
            ((b = (b = n.className.match(/\blinenums\b(?::(\d+))?/))
              ? b[1] && b[1].length
                ? +b[1]
                : !0
              : !1) && D(n, b),
            (d = { g: k, h: n, i: b }),
            E(d));
        }
      }
      p < h.length ? setTimeout(m, 250) : a && a();
    }
    for (
      var e = [
          document.getElementsByTagName("pre"),
          document.getElementsByTagName("code"),
          document.getElementsByTagName("xmp"),
        ],
        h = [],
        k = 0;
      k < e.length;
      ++k
    )
      for (var t = 0, s = e[k].length; t < s; ++t) h.push(e[k][t]);
    var e = q,
      l = Date;
    l.now ||
      (l = {
        now: function () {
          return +new Date();
        },
      });
    var p = 0,
      d,
      g = /\blang(?:uage)?-([\w.]+)(?!\S)/;
    m();
  };
  window.PR = {
    createSimpleLexer: x,
    registerLangHandler: k,
    sourceDecorator: u,
    PR_ATTRIB_NAME: "atn",
    PR_ATTRIB_VALUE: "atv",
    PR_COMMENT: "com",
    PR_DECLARATION: "dec",
    PR_KEYWORD: "kwd",
    PR_LITERAL: "lit",
    PR_NOCODE: "nocode",
    PR_PLAIN: "pln",
    PR_PUNCTUATION: "pun",
    PR_SOURCE: "src",
    PR_STRING: "str",
    PR_TAG: "tag",
    PR_TYPE: "typ",
  };
})();

// make code pretty
window.prettyPrint && prettyPrint();


================================================
FILE: backend/tests/integration/tests/pruning/website/js/jquery.easing.1.3.js
================================================
/*
 * jQuery Easing v1.3 - http://gsgd.co.uk/sandbox/jquery/easing/
 *
 * Uses the built in easing capabilities added In jQuery 1.1
 * to offer multiple easing options
 *
 * TERMS OF USE - jQuery Easing
 *
 * Open source under the BSD License.
 *
 * Copyright © 2008 George McGinley Smith
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this list of
 * conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright notice, this list
 * of conditions and the following disclaimer in the documentation and/or other materials
 * provided with the distribution.
 *
 * Neither the name of the author nor the names of contributors may be used to endorse
 * or promote products derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 *  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

// t: current time, b: begInnIng value, c: change In value, d: duration
jQuery.easing["jswing"] = jQuery.easing["swing"];

jQuery.extend(jQuery.easing, {
  def: "easeOutQuad",
  swing: function (x, t, b, c, d) {
    //alert(jQuery.easing.default);
    return jQuery.easing[jQuery.easing.def](x, t, b, c, d);
  },
  easeInQuad: function (x, t, b, c, d) {
    return c * (t /= d) * t + b;
  },
  easeOutQuad: function (x, t, b, c, d) {
    return -c * (t /= d) * (t - 2) + b;
  },
  easeInOutQuad: function (x, t, b, c, d) {
    if ((t /= d / 2) < 1) return (c / 2) * t * t + b;
    return (-c / 2) * (--t * (t - 2) - 1) + b;
  },
  easeInCubic: function (x, t, b, c, d) {
    return c * (t /= d) * t * t + b;
  },
  easeOutCubic: function (x, t, b, c, d) {
    return c * ((t = t / d - 1) * t * t + 1) + b;
  },
  easeInOutCubic: function (x, t, b, c, d) {
    if ((t /= d / 2) < 1) return (c / 2) * t * t * t + b;
    return (c / 2) * ((t -= 2) * t * t + 2) + b;
  },
  easeInQuart: function (x, t, b, c, d) {
    return c * (t /= d) * t * t * t + b;
  },
  easeOutQuart: function (x, t, b, c, d) {
    return -c * ((t = t / d - 1) * t * t * t - 1) + b;
  },
  easeInOutQuart: function (x, t, b, c, d) {
    if ((t /= d / 2) < 1) return (c / 2) * t * t * t * t + b;
    return (-c / 2) * ((t -= 2) * t * t * t - 2) + b;
  },
  easeInQuint: function (x, t, b, c, d) {
    return c * (t /= d) * t * t * t * t + b;
  },
  easeOutQuint: function (x, t, b, c, d) {
    return c * ((t = t / d - 1) * t * t * t * t + 1) + b;
  },
  easeInOutQuint: function (x, t, b, c, d) {
    if ((t /= d / 2) < 1) return (c / 2) * t * t * t * t * t + b;
    return (c / 2) * ((t -= 2) * t * t * t * t + 2) + b;
  },
  easeInSine: function (x, t, b, c, d) {
    return -c * Math.cos((t / d) * (Math.PI / 2)) + c + b;
  },
  easeOutSine: function (x, t, b, c, d) {
    return c * Math.sin((t / d) * (Math.PI / 2)) + b;
  },
  easeInOutSine: function (x, t, b, c, d) {
    return (-c / 2) * (Math.cos((Math.PI * t) / d) - 1) + b;
  },
  easeInExpo: function (x, t, b, c, d) {
    return t == 0 ? b : c * Math.pow(2, 10 * (t / d - 1)) + b;
  },
  easeOutExpo: function (x, t, b, c, d) {
    return t == d ? b + c : c * (-Math.pow(2, (-10 * t) / d) + 1) + b;
  },
  easeInOutExpo: function (x, t, b, c, d) {
    if (t == 0) return b;
    if (t == d) return b + c;
    if ((t /= d / 2) < 1) return (c / 2) * Math.pow(2, 10 * (t - 1)) + b;
    return (c / 2) * (-Math.pow(2, -10 * --t) + 2) + b;
  },
  easeInCirc: function (x, t, b, c, d) {
    return -c * (Math.sqrt(1 - (t /= d) * t) - 1) + b;
  },
  easeOutCirc: function (x, t, b, c, d) {
    return c * Math.sqrt(1 - (t = t / d - 1) * t) + b;
  },
  easeInOutCirc: function (x, t, b, c, d) {
    if ((t /= d / 2) < 1) return (-c / 2) * (Math.sqrt(1 - t * t) - 1) + b;
    return (c / 2) * (Math.sqrt(1 - (t -= 2) * t) + 1) + b;
  },
  easeInElastic: function (x, t, b, c, d) {
    var s = 1.70158;
    var p = 0;
    var a = c;
    if (t == 0) return b;
    if ((t /= d) == 1) return b + c;
    if (!p) p = d * 0.3;
    if (a < Math.abs(c)) {
      a = c;
      var s = p / 4;
    } else var s = (p / (2 * Math.PI)) * Math.asin(c / a);
    return (
      -(
        a *
        Math.pow(2, 10 * (t -= 1)) *
        Math.sin(((t * d - s) * (2 * Math.PI)) / p)
      ) + b
    );
  },
  easeOutElastic: function (x, t, b, c, d) {
    var s = 1.70158;
    var p = 0;
    var a = c;
    if (t == 0) return b;
    if ((t /= d) == 1) return b + c;
    if (!p) p = d * 0.3;
    if (a < Math.abs(c)) {
      a = c;
      var s = p / 4;
    } else var s = (p / (2 * Math.PI)) * Math.asin(c / a);
    return (
      a * Math.pow(2, -10 * t) * Math.sin(((t * d - s) * (2 * Math.PI)) / p) +
      c +
      b
    );
  },
  easeInOutElastic: function (x, t, b, c, d) {
    var s = 1.70158;
    var p = 0;
    var a = c;
    if (t == 0) return b;
    if ((t /= d / 2) == 2) return b + c;
    if (!p) p = d * (0.3 * 1.5);
    if (a < Math.abs(c)) {
      a = c;
      var s = p / 4;
    } else var s = (p / (2 * Math.PI)) * Math.asin(c / a);
    if (t < 1)
      return (
        -0.5 *
          (a *
            Math.pow(2, 10 * (t -= 1)) *
            Math.sin(((t * d - s) * (2 * Math.PI)) / p)) +
        b
      );
    return (
      a *
        Math.pow(2, -10 * (t -= 1)) *
        Math.sin(((t * d - s) * (2 * Math.PI)) / p) *
        0.5 +
      c +
      b
    );
  },
  easeInBack: function (x, t, b, c, d, s) {
    if (s == undefined) s = 1.70158;
    return c * (t /= d) * t * ((s + 1) * t - s) + b;
  },
  easeOutBack: function (x, t, b, c, d, s) {
    if (s == undefined) s = 1.70158;
    return c * ((t = t / d - 1) * t * ((s + 1) * t + s) + 1) + b;
  },
  easeInOutBack: function (x, t, b, c, d, s) {
    if (s == undefined) s = 1.70158;
    if ((t /= d / 2) < 1)
      return (c / 2) * (t * t * (((s *= 1.525) + 1) * t - s)) + b;
    return (c / 2) * ((t -= 2) * t * (((s *= 1.525) + 1) * t + s) + 2) + b;
  },
  easeInBounce: function (x, t, b, c, d) {
    return c - jQuery.easing.easeOutBounce(x, d - t, 0, c, d) + b;
  },
  easeOutBounce: function (x, t, b, c, d) {
    if ((t /= d) < 1 / 2.75) {
      return c * (7.5625 * t * t) + b;
    } else if (t < 2 / 2.75) {
      return c * (7.5625 * (t -= 1.5 / 2.75) * t + 0.75) + b;
    } else if (t < 2.5 / 2.75) {
      return c * (7.5625 * (t -= 2.25 / 2.75) * t + 0.9375) + b;
    } else {
      return c * (7.5625 * (t -= 2.625 / 2.75) * t + 0.984375) + b;
    }
  },
  easeInOutBounce: function (x, t, b, c, d) {
    if (t < d / 2)
      return jQuery.easing.easeInBounce(x, t * 2, 0, c, d) * 0.5 + b;
    return (
      jQuery.easing.easeOutBounce(x, t * 2 - d, 0, c, d) * 0.5 + c * 0.5 + b
    );
  },
});

/*
 *
 * TERMS OF USE - EASING EQUATIONS
 *
 * Open source under the BSD License.
 *
 * Copyright © 2001 Robert Penner
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this list of
 * conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright notice, this list
 * of conditions and the following disclaimer in the documentation and/or other materials
 * provided with the distribution.
 *
 * Neither the name of the author nor the names of contributors may be used to endorse
 * or promote products derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 *  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */


================================================
FILE: backend/tests/integration/tests/pruning/website/js/jquery.fancybox-media.js
================================================
/*!
 * Media helper for fancyBox
 * version: 1.0.5 (Tue, 23 Oct 2012)
 * @requires fancyBox v2.0 or later
 *
 * Usage:
 *     $(".fancybox").fancybox({
 *         helpers : {
 *             media: true
 *         }
 *     });
 *
 * Set custom URL parameters:
 *     $(".fancybox").fancybox({
 *         helpers : {
 *             media: {
 *                 youtube : {
 *                     params : {
 *                         autoplay : 0
 *                     }
 *                 }
 *             }
 *         }
 *     });
 *
 * Or:
 *     $(".fancybox").fancybox({,
 *	       helpers : {
 *             media: true
 *         },
 *         youtube : {
 *             autoplay: 0
 *         }
 *     });
 *
 *  Supports:
 *
 *      Youtube
 *          http://www.youtube.com/watch?v=opj24KnzrWo
 *          http://www.youtube.com/embed/opj24KnzrWo
 *          http://youtu.be/opj24KnzrWo
 *      Vimeo
 *          http://vimeo.com/40648169
 *          http://vimeo.com/channels/staffpicks/38843628
 *          http://vimeo.com/groups/surrealism/videos/36516384
 *          http://player.vimeo.com/video/45074303
 *      Metacafe
 *          http://www.metacafe.com/watch/7635964/dr_seuss_the_lorax_movie_trailer/
 *          http://www.metacafe.com/watch/7635964/
 *      Dailymotion
 *          http://www.dailymotion.com/video/xoytqh_dr-seuss-the-lorax-premiere_people
 *      Twitvid
 *          http://twitvid.com/QY7MD
 *      Twitpic
 *          http://twitpic.com/7p93st
 *      Instagram
 *          http://instagr.am/p/IejkuUGxQn/
 *          http://instagram.com/p/IejkuUGxQn/
 *      Google maps
 *          http://maps.google.com/maps?q=Eiffel+Tower,+Avenue+Gustave+Eiffel,+Paris,+France&t=h&z=17
 *          http://maps.google.com/?ll=48.857995,2.294297&spn=0.007666,0.021136&t=m&z=16
 *          http://maps.google.com/?ll=48.859463,2.292626&spn=0.000965,0.002642&t=m&z=19&layer=c&cbll=48.859524,2.292532&panoid=YJ0lq28OOy3VT2IqIuVY0g&cbp=12,151.58,,0,-15.56
 */
(function ($) {
  "use strict";

  //Shortcut for fancyBox object
  var F = $.fancybox,
    format = function (url, rez, params) {
      params = params || "";

      if ($.type(params) === "object") {
        params = $.param(params, true);
      }

      $.each(rez, function (key, value) {
        url = url.replace("$" + key, value || "");
      });

      if (params.length) {
        url += (url.indexOf("?") > 0 ? "&" : "?") + params;
      }

      return url;
    };

  //Add helper object
  F.helpers.media = {
    defaults: {
      youtube: {
        matcher:
          /(youtube\.com|youtu\.be)\/(watch\?v=|v\/|u\/|embed\/?)?(videoseries\?list=(.*)|[\w-]{11}|\?listType=(.*)&list=(.*)).*/i,
        params: {
          autoplay: 1,
          autohide: 1,
          fs: 1,
          rel: 0,
          hd: 1,
          wmode: "opaque",
          enablejsapi: 1,
        },
        type: "iframe",
        url: "//www.youtube.com/embed/$3",
      },
      vimeo: {
        matcher: /(?:vimeo(?:pro)?.com)\/(?:[^\d]+)?(\d+)(?:.*)/,
        params: {
          autoplay: 1,
          hd: 1,
          show_title: 1,
          show_byline: 1,
          show_portrait: 0,
          fullscreen: 1,
        },
        type: "iframe",
        url: "//player.vimeo.com/video/$1",
      },
      metacafe: {
        matcher: /metacafe.com\/(?:watch|fplayer)\/([\w\-]{1,10})/,
        params: {
          autoPlay: "yes",
        },
        type: "swf",
        url: function (rez, params, obj) {
          obj.swf.flashVars = "playerVars=" + $.param(params, true);

          return "//www.metacafe.com/fplayer/" + rez[1] + "/.swf";
        },
      },
      dailymotion: {
        matcher: /dailymotion.com\/video\/(.*)\/?(.*)/,
        params: {
          additionalInfos: 0,
          autoStart: 1,
        },
        type: "swf",
        url: "//www.dailymotion.com/swf/video/$1",
      },
      twitvid: {
        matcher: /twitvid\.com\/([a-zA-Z0-9_\-\?\=]+)/i,
        params: {
          autoplay: 0,
        },
        type: "iframe",
        url: "//www.twitvid.com/embed.php?guid=$1",
      },
      twitpic: {
        matcher:
          /twitpic\.com\/(?!(?:place|photos|events)\/)([a-zA-Z0-9\?\=\-]+)/i,
        type: "image",
        url: "//twitpic.com/show/full/$1/",
      },
      instagram: {
        matcher: /(instagr\.am|instagram\.com)\/p\/([a-zA-Z0-9_\-]+)\/?/i,
        type: "image",
        url: "//$1/p/$2/media/",
      },
      google_maps: {
        matcher: /maps\.google\.([a-z]{2,3}(\.[a-z]{2})?)\/(\?ll=|maps\?)(.*)/i,
        type: "iframe",
        url: function (rez) {
          return (
            "//maps.google." +
            rez[1] +
            "/" +
            rez[3] +
            "" +
            rez[4] +
            "&output=" +
            (rez[4].indexOf("layer=c") > 0 ? "svembed" : "embed")
          );
        },
      },
    },

    beforeLoad: function (opts, obj) {
      var url = obj.href || "",
        type = false,
        what,
        item,
        rez,
        params;

      for (what in opts) {
        item = opts[what];
        rez = url.match(item.matcher);

        if (rez) {
          type = item.type;
          params = $.extend(
            true,
            {},
            item.params,
            obj[what] ||
              ($.isPlainObject(opts[what]) ? opts[what].params : null),
          );

          url =
            $.type(item.url) === "function"
              ? item.url.call(this, rez, params, obj)
              : format(item.url, rez, params);

          break;
        }
      }

      if (type) {
        obj.href = url;
        obj.type = type;

        obj.autoHeight = false;
      }
    },
  };
})(jQuery);


================================================
FILE: backend/tests/integration/tests/pruning/website/js/jquery.fancybox.pack.js
================================================
/*! fancyBox v2.1.4 fancyapps.com | fancyapps.com/fancybox/#license */
(function (C, z, f, r) {
  var q = f(C),
    n = f(z),
    b = (f.fancybox = function () {
      b.open.apply(this, arguments);
    }),
    H = navigator.userAgent.match(/msie/),
    w = null,
    s = z.createTouch !== r,
    t = function (a) {
      return a && a.hasOwnProperty && a instanceof f;
    },
    p = function (a) {
      return a && "string" === f.type(a);
    },
    F = function (a) {
      return p(a) && 0 < a.indexOf("%");
    },
    l = function (a, d) {
      var e = parseInt(a, 10) || 0;
      d && F(a) && (e *= b.getViewport()[d] / 100);
      return Math.ceil(e);
    },
    x = function (a, b) {
      return l(a, b) + "px";
    };
  f.extend(b, {
    version: "2.1.4",
    defaults: {
      padding: 15,
      margin: 20,
      width: 800,
      height: 600,
      minWidth: 100,
      minHeight: 100,
      maxWidth: 9999,
      maxHeight: 9999,
      autoSize: !0,
      autoHeight: !1,
      autoWidth: !1,
      autoResize: !0,
      autoCenter: !s,
      fitToView: !0,
      aspectRatio: !1,
      topRatio: 0.5,
      leftRatio: 0.5,
      scrolling: "auto",
      wrapCSS: "",
      arrows: !0,
      closeBtn: !0,
      closeClick: !1,
      nextClick: !1,
      mouseWheel: !0,
      autoPlay: !1,
      playSpeed: 3e3,
      preload: 3,
      modal: !1,
      loop: !0,
      ajax: { dataType: "html", headers: { "X-fancyBox": !0 } },
      iframe: { scrolling: "auto", preload: !0 },
      swf: {
        wmode: "transparent",
        allowfullscreen: "true",
        allowscriptaccess: "always",
      },
      keys: {
        next: { 13: "left", 34: "up", 39: "left", 40: "up" },
        prev: { 8: "right", 33: "down", 37: "right", 38: "down" },
        close: [27],
        play: [32],
        toggle: [70],
      },
      direction: { next: "left", prev: "right" },
      scrollOutside: !0,
      index: 0,
      type: null,
      href: null,
      content: null,
      title: null,
      tpl: {
        wrap: '<div class="fancybox-wrap" tabIndex="-1"><div class="fancybox-skin"><div class="fancybox-outer"><div class="fancybox-inner"></div></div></div></div>',
        image: '<img class="fancybox-image" src="{href}" alt="" />',
        iframe:
          '<iframe id="fancybox-frame{rnd}" name="fancybox-frame{rnd}" class="fancybox-iframe" frameborder="0" vspace="0" hspace="0" webkitAllowFullScreen mozallowfullscreen allowFullScreen' +
          (H ? ' allowtransparency="true"' : "") +
          "></iframe>",
        error:
          '<p class="fancybox-error">The requested content cannot be loaded.<br/>Please try again later.</p>',
        closeBtn:
          '<a title="Close" class="fancybox-item fancybox-close" href="javascript:;"><i class="font-icon-remove"></i></a>',
        next: '<a title="Next" class="fancybox-nav fancybox-next" href="javascript:;"><span><i class="font-icon-arrow-simple-right"></i></span></a>',
        prev: '<a title="Previous" class="fancybox-nav fancybox-prev" href="javascript:;"><span><i class="font-icon-arrow-simple-left"></i></span></a>',
      },
      openEffect: "fade",
      openSpeed: 250,
      openEasing: "swing",
      openOpacity: !0,
      openMethod: "zoomIn",
      closeEffect: "fade",
      closeSpeed: 250,
      closeEasing: "swing",
      closeOpacity: !0,
      closeMethod: "zoomOut",
      nextEffect: "elastic",
      nextSpeed: 250,
      nextEasing: "swing",
      nextMethod: "changeIn",
      prevEffect: "elastic",
      prevSpeed: 250,
      prevEasing: "swing",
      prevMethod: "changeOut",
      helpers: { overlay: !0, title: !0 },
      onCancel: f.noop,
      beforeLoad: f.noop,
      afterLoad: f.noop,
      beforeShow: f.noop,
      afterShow: f.noop,
      beforeChange: f.noop,
      beforeClose: f.noop,
      afterClose: f.noop,
    },
    group: {},
    opts: {},
    previous: null,
    coming: null,
    current: null,
    isActive: !1,
    isOpen: !1,
    isOpened: !1,
    wrap: null,
    skin: null,
    outer: null,
    inner: null,
    player: { timer: null, isActive: !1 },
    ajaxLoad: null,
    imgPreload: null,
    transitions: {},
    helpers: {},
    open: function (a, d) {
      if (a && (f.isPlainObject(d) || (d = {}), !1 !== b.close(!0)))
        return (
          f.isArray(a) || (a = t(a) ? f(a).get() : [a]),
          f.each(a, function (e, c) {
            var k = {},
              g,
              h,
              j,
              m,
              l;
            "object" === f.type(c) &&
              (c.nodeType && (c = f(c)),
              t(c)
                ? ((k = {
                    href: c.data("fancybox-href") || c.attr("href"),
                    title: c.data("fancybox-title") || c.attr("title"),
                    isDom: !0,
                    element: c,
                  }),
                  f.metadata && f.extend(!0, k, c.metadata()))
                : (k = c));
            g = d.href || k.href || (p(c) ? c : null);
            h = d.title !== r ? d.title : k.title || "";
            m = (j = d.content || k.content) ? "html" : d.type || k.type;
            !m &&
              k.isDom &&
              ((m = c.data("fancybox-type")),
              m ||
                (m = (m = c.prop("class").match(/fancybox\.(\w+)/))
                  ? m[1]
                  : null));
            p(g) &&
              (m ||
                (b.isImage(g)
                  ? (m = "image")
                  : b.isSWF(g)
                    ? (m = "swf")
                    : "#" === g.charAt(0)
                      ? (m = "inline")
                      : p(c) && ((m = "html"), (j = c))),
              "ajax" === m &&
                ((l = g.split(/\s+/, 2)), (g = l.shift()), (l = l.shift())));
            j ||
              ("inline" === m
                ? g
                  ? (j = f(p(g) ? g.replace(/.*(?=#[^\s]+$)/, "") : g))
                  : k.isDom && (j = c)
                : "html" === m
                  ? (j = g)
                  : !m && !g && k.isDom && ((m = "inline"), (j = c)));
            f.extend(k, {
              href: g,
              type: m,
              content: j,
              title: h,
              selector: l,
            });
            a[e] = k;
          }),
          (b.opts = f.extend(!0, {}, b.defaults, d)),
          d.keys !== r &&
            (b.opts.keys = d.keys ? f.extend({}, b.defaults.keys, d.keys) : !1),
          (b.group = a),
          b._start(b.opts.index)
        );
    },
    cancel: function () {
      var a = b.coming;
      a &&
        !1 !== b.trigger("onCancel") &&
        (b.hideLoading(),
        b.ajaxLoad && b.ajaxLoad.abort(),
        (b.ajaxLoad = null),
        b.imgPreload && (b.imgPreload.onload = b.imgPreload.onerror = null),
        a.wrap && a.wrap.stop(!0, !0).trigger("onReset").remove(),
        (b.coming = null),
        b.current || b._afterZoomOut(a));
    },
    close: function (a) {
      b.cancel();
      !1 !== b.trigger("beforeClose") &&
        (b.unbindEvents(),
        b.isActive &&
          (!b.isOpen || !0 === a
            ? (f(".fancybox-wrap").stop(!0).trigger("onReset").remove(),
              b._afterZoomOut())
            : ((b.isOpen = b.isOpened = !1),
              (b.isClosing = !0),
              f(".fancybox-item, .fancybox-nav").remove(),
              b.wrap.stop(!0, !0).removeClass("fancybox-opened"),
              b.transitions[b.current.closeMethod]())));
    },
    play: function (a) {
      var d = function () {
          clearTimeout(b.player.timer);
        },
        e = function () {
          d();
          b.current &&
            b.player.isActive &&
            (b.player.timer = setTimeout(b.next, b.current.playSpeed));
        },
        c = function () {
          d();
          f("body").unbind(".player");
          b.player.isActive = !1;
          b.trigger("onPlayEnd");
        };
      if (!0 === a || (!b.player.isActive && !1 !== a)) {
        if (
          b.current &&
          (b.current.loop || b.current.index < b.group.length - 1)
        )
          (b.player.isActive = !0),
            f("body").bind({
              "afterShow.player onUpdate.player": e,
              "onCancel.player beforeClose.player": c,
              "beforeLoad.player": d,
            }),
            e(),
            b.trigger("onPlayStart");
      } else c();
    },
    next: function (a) {
      var d = b.current;
      d && (p(a) || (a = d.direction.next), b.jumpto(d.index + 1, a, "next"));
    },
    prev: function (a) {
      var d = b.current;
      d && (p(a) || (a = d.direction.prev), b.jumpto(d.index - 1, a, "prev"));
    },
    jumpto: function (a, d, e) {
      var c = b.current;
      c &&
        ((a = l(a)),
        (b.direction = d || c.direction[a >= c.index ? "next" : "prev"]),
        (b.router = e || "jumpto"),
        c.loop &&
          (0 > a && (a = c.group.length + (a % c.group.length)),
          (a %= c.group.length)),
        c.group[a] !== r && (b.cancel(), b._start(a)));
    },
    reposition: function (a, d) {
      var e = b.current,
        c = e ? e.wrap : null,
        k;
      c &&
        ((k = b._getPosition(d)),
        a && "scroll" === a.type
          ? (delete k.position, c.stop(!0, !0).animate(k, 200))
          : (c.css(k), (e.pos = f.extend({}, e.dim, k))));
    },
    update: function (a) {
      var d = a && a.type,
        e = !d || "orientationchange" === d;
      e && (clearTimeout(w), (w = null));
      b.isOpen &&
        !w &&
        (w = setTimeout(
          function () {
            var c = b.current;
            c &&
              !b.isClosing &&
              (b.wrap.removeClass("fancybox-tmp"),
              (e || "load" === d || ("resize" === d && c.autoResize)) &&
                b._setDimension(),
              ("scroll" === d && c.canShrink) || b.reposition(a),
              b.trigger("onUpdate"),
              (w = null));
          },
          e && !s ? 0 : 300,
        ));
    },
    toggle: function (a) {
      b.isOpen &&
        ((b.current.fitToView =
          "boolean" === f.type(a) ? a : !b.current.fitToView),
        s &&
          (b.wrap.removeAttr("style").addClass("fancybox-tmp"),
          b.trigger("onUpdate")),
        b.update());
    },
    hideLoading: function () {
      n.unbind(".loading");
      f("#fancybox-loading").remove();
    },
    showLoading: function () {
      var a, d;
      b.hideLoading();
      a = f('<div id="fancybox-loading"><div></div></div>')
        .click(b.cancel)
        .appendTo("body");
      n.bind("keydown.loading", function (a) {
        if (27 === (a.which || a.keyCode)) a.preventDefault(), b.cancel();
      });
      b.defaults.fixed ||
        ((d = b.getViewport()),
        a.css({
          position: "absolute",
          top: 0.5 * d.h + d.y,
          left: 0.5 * d.w + d.x,
        }));
    },
    getViewport: function () {
      var a = (b.current && b.current.locked) || !1,
        d = { x: q.scrollLeft(), y: q.scrollTop() };
      a
        ? ((d.w = a[0].clientWidth), (d.h = a[0].clientHeight))
        : ((d.w = s && C.innerWidth ? C.innerWidth : q.width()),
          (d.h = s && C.innerHeight ? C.innerHeight : q.height()));
      return d;
    },
    unbindEvents: function () {
      b.wrap && t(b.wrap) && b.wrap.unbind(".fb");
      n.unbind(".fb");
      q.unbind(".fb");
    },
    bindEvents: function () {
      var a = b.current,
        d;
      a &&
        (q.bind(
          "orientationchange.fb" +
            (s ? "" : " resize.fb") +
            (a.autoCenter && !a.locked ? " scroll.fb" : ""),
          b.update,
        ),
        (d = a.keys) &&
          n.bind("keydown.fb", function (e) {
            var c = e.which || e.keyCode,
              k = e.target || e.srcElement;
            if (27 === c && b.coming) return !1;
            !e.ctrlKey &&
              !e.altKey &&
              !e.shiftKey &&
              !e.metaKey &&
              (!k || (!k.type && !f(k).is("[contenteditable]"))) &&
              f.each(d, function (d, k) {
                if (1 < a.group.length && k[c] !== r)
                  return b[d](k[c]), e.preventDefault(), !1;
                if (-1 < f.inArray(c, k)) return b[d](), e.preventDefault(), !1;
              });
          }),
        f.fn.mousewheel &&
          a.mouseWheel &&
          b.wrap.bind("mousewheel.fb", function (d, c, k, g) {
            for (
              var h = f(d.target || null), j = !1;
              h.length &&
              !j &&
              !h.is(".fancybox-skin") &&
              !h.is(".fancybox-wrap");

            )
              (j =
                h[0] &&
                !(h[0].style.overflow && "hidden" === h[0].style.overflow) &&
                ((h[0].clientWidth && h[0].scrollWidth > h[0].clientWidth) ||
                  (h[0].clientHeight &&
                    h[0].scrollHeight > h[0].clientHeight))),
                (h = f(h).parent());
            if (0 !== c && !j && 1 < b.group.length && !a.canShrink) {
              if (0 < g || 0 < k) b.prev(0 < g ? "down" : "left");
              else if (0 > g || 0 > k) b.next(0 > g ? "up" : "right");
              d.preventDefault();
            }
          }));
    },
    trigger: function (a, d) {
      var e,
        c = d || b.coming || b.current;
      if (c) {
        f.isFunction(c[a]) &&
          (e = c[a].apply(c, Array.prototype.slice.call(arguments, 1)));
        if (!1 === e) return !1;
        c.helpers &&
          f.each(c.helpers, function (d, e) {
            e &&
              b.helpers[d] &&
              f.isFunction(b.helpers[d][a]) &&
              ((e = f.extend(!0, {}, b.helpers[d].defaults, e)),
              b.helpers[d][a](e, c));
          });
        f.event.trigger(a + ".fb");
      }
    },
    isImage: function (a) {
      return (
        p(a) &&
        a.match(
          /(^data:image\/.*,)|(\.(jp(e|g|eg)|gif|png|bmp|webp)((\?|#).*)?$)/i,
        )
      );
    },
    isSWF: function (a) {
      return p(a) && a.match(/\.(swf)((\?|#).*)?$/i);
    },
    _start: function (a) {
      var d = {},
        e,
        c;
      a = l(a);
      e = b.group[a] || null;
      if (!e) return !1;
      d = f.extend(!0, {}, b.opts, e);
      e = d.margin;
      c = d.padding;
      "number" === f.type(e) && (d.margin = [e, e, e, e]);
      "number" === f.type(c) && (d.padding = [c, c, c, c]);
      d.modal &&
        f.extend(!0, d, {
          closeBtn: !1,
          closeClick: !1,
          nextClick: !1,
          arrows: !1,
          mouseWheel: !1,
          keys: null,
          helpers: { overlay: { closeClick: !1 } },
        });
      d.autoSize && (d.autoWidth = d.autoHeight = !0);
      "auto" === d.width && (d.autoWidth = !0);
      "auto" === d.height && (d.autoHeight = !0);
      d.group = b.group;
      d.index = a;
      b.coming = d;
      if (!1 === b.trigger("beforeLoad")) b.coming = null;
      else {
        c = d.type;
        e = d.href;
        if (!c)
          return (
            (b.coming = null),
            b.current && b.router && "jumpto" !== b.router
              ? ((b.current.index = a), b[b.router](b.direction))
              : !1
          );
        b.isActive = !0;
        if ("image" === c || "swf" === c)
          (d.autoHeight = d.autoWidth = !1), (d.scrolling = "visible");
        "image" === c && (d.aspectRatio = !0);
        "iframe" === c && s && (d.scrolling = "scroll");
        d.wrap = f(d.tpl.wrap)
          .addClass(
            "fancybox-" +
              (s ? "mobile" : "desktop") +
              " fancybox-type-" +
              c +
              " fancybox-tmp " +
              d.wrapCSS,
          )
          .appendTo(d.parent || "body");
        f.extend(d, {
          skin: f(".fancybox-skin", d.wrap),
          outer: f(".fancybox-outer", d.wrap),
          inner: f(".fancybox-inner", d.wrap),
        });
        f.each(["Top", "Right", "Bottom", "Left"], function (a, b) {
          d.skin.css("padding" + b, x(d.padding[a]));
        });
        b.trigger("onReady");
        if ("inline" === c || "html" === c) {
          if (!d.content || !d.content.length) return b._error("content");
        } else if (!e) return b._error("href");
        "image" === c
          ? b._loadImage()
          : "ajax" === c
            ? b._loadAjax()
            : "iframe" === c
              ? b._loadIframe()
              : b._afterLoad();
      }
    },
    _error: function (a) {
      f.extend(b.coming, {
        type: "html",
        autoWidth: !0,
        autoHeight: !0,
        minWidth: 0,
        minHeight: 0,
        scrolling: "no",
        hasError: a,
        content: b.coming.tpl.error,
      });
      b._afterLoad();
    },
    _loadImage: function () {
      var a = (b.imgPreload = new Image());
      a.onload = function () {
        this.onload = this.onerror = null;
        b.coming.width = this.width;
        b.coming.height = this.height;
        b._afterLoad();
      };
      a.onerror = function () {
        this.onload = this.onerror = null;
        b._error("image");
      };
      a.src = b.coming.href;
      !0 !== a.complete && b.showLoading();
    },
    _loadAjax: function () {
      var a = b.coming;
      b.showLoading();
      b.ajaxLoad = f.ajax(
        f.extend({}, a.ajax, {
          url: a.href,
          error: function (a, e) {
            b.coming && "abort" !== e ? b._error("ajax", a) : b.hideLoading();
          },
          success: function (d, e) {
            "success" === e && ((a.content = d), b._afterLoad());
          },
        }),
      );
    },
    _loadIframe: function () {
      var a = b.coming,
        d = f(a.tpl.iframe.replace(/\{rnd\}/g, new Date().getTime()))
          .attr("scrolling", s ? "auto" : a.iframe.scrolling)
          .attr("src", a.href);
      f(a.wrap).bind("onReset", function () {
        try {
          f(this)
            .find("iframe")
            .hide()
            .attr("src", "//about:blank")
            .end()
            .empty();
        } catch (a) {}
      });
      a.iframe.preload &&
        (b.showLoading(),
        d.one("load", function () {
          f(this).data("ready", 1);
          s || f(this).bind("load.fb", b.update);
          f(this)
            .parents(".fancybox-wrap")
            .width("100%")
            .removeClass("fancybox-tmp")
            .show();
          b._afterLoad();
        }));
      a.content = d.appendTo(a.inner);
      a.iframe.preload || b._afterLoad();
    },
    _preloadImages: function () {
      var a = b.group,
        d = b.current,
        e = a.length,
        c = d.preload ? Math.min(d.preload, e - 1) : 0,
        f,
        g;
      for (g = 1; g <= c; g += 1)
        (f = a[(d.index + g) % e]),
          "image" === f.type && f.href && (new Image().src = f.href);
    },
    _afterLoad: function () {
      var a = b.coming,
        d = b.current,
        e,
        c,
        k,
        g,
        h;
      b.hideLoading();
      if (a && !1 !== b.isActive)
        if (!1 === b.trigger("afterLoad", a, d))
          a.wrap.stop(!0).trigger("onReset").remove(), (b.coming = null);
        else {
          d &&
            (b.trigger("beforeChange", d),
            d.wrap
              .stop(!0)
              .removeClass("fancybox-opened")
              .find(".fancybox-item, .fancybox-nav")
              .remove());
          b.unbindEvents();
          e = a.content;
          c = a.type;
          k = a.scrolling;
          f.extend(b, {
            wrap: a.wrap,
            skin: a.skin,
            outer: a.outer,
            inner: a.inner,
            current: a,
            previous: d,
          });
          g = a.href;
          switch (c) {
            case "inline":
            case "ajax":
            case "html":
              a.selector
                ? (e = f("<div>").html(e).find(a.selector))
                : t(e) &&
                  (e.data("fancybox-placeholder") ||
                    e.data(
                      "fancybox-placeholder",
                      f('<div class="fancybox-placeholder"></div>')
                        .insertAfter(e)
                        .hide(),
                    ),
                  (e = e.show().detach()),
                  a.wrap.bind("onReset", function () {
                    f(this).find(e).length &&
                      e
                        .hide()
                        .replaceAll(e.data("fancybox-placeholder"))
                        .data("fancybox-placeholder", !1);
                  }));
              break;
            case "image":
              e = a.tpl.image.replace("{href}", g);
              break;
            case "swf":
              (e =
                '<object id="fancybox-swf" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" width="100%" height="100%"><param name="movie" value="' +
                g +
                '"></param>'),
                (h = ""),
                f.each(a.swf, function (a, b) {
                  e += '<param name="' + a + '" value="' + b + '"></param>';
                  h += " " + a + '="' + b + '"';
                }),
                (e +=
                  '<embed src="' +
                  g +
                  '" type="application/x-shockwave-flash" width="100%" height="100%"' +
                  h +
                  "></embed></object>");
          }
          (!t(e) || !e.parent().is(a.inner)) && a.inner.append(e);
          b.trigger("beforeShow");
          a.inner.css(
            "overflow",
            "yes" === k ? "scroll" : "no" === k ? "hidden" : k,
          );
          b._setDimension();
          b.reposition();
          b.isOpen = !1;
          b.coming = null;
          b.bindEvents();
          if (b.isOpened) {
            if (d.prevMethod) b.transitions[d.prevMethod]();
          } else
            f(".fancybox-wrap")
              .not(a.wrap)
              .stop(!0)
              .trigger("onReset")
              .remove();
          b.transitions[b.isOpened ? a.nextMethod : a.openMethod]();
          b._preloadImages();
        }
    },
    _setDimension: function () {
      var a = b.getViewport(),
        d = 0,
        e = !1,
        c = !1,
        e = b.wrap,
        k = b.skin,
        g = b.inner,
        h = b.current,
        c = h.width,
        j = h.height,
        m = h.minWidth,
        u = h.minHeight,
        n = h.maxWidth,
        v = h.maxHeight,
        s = h.scrolling,
        q = h.scrollOutside ? h.scrollbarWidth : 0,
        y = h.margin,
        p = l(y[1] + y[3]),
        r = l(y[0] + y[2]),
        z,
        A,
        t,
        D,
        B,
        G,
        C,
        E,
        w;
      e.add(k).add(g).width("auto").height("auto").removeClass("fancybox-tmp");
      y = l(k.outerWidth(!0) - k.width());
      z = l(k.outerHeight(!0) - k.height());
      A = p + y;
      t = r + z;
      D = F(c) ? ((a.w - A) * l(c)) / 100 : c;
      B = F(j) ? ((a.h - t) * l(j)) / 100 : j;
      if ("iframe" === h.type) {
        if (((w = h.content), h.autoHeight && 1 === w.data("ready")))
          try {
            w[0].contentWindow.document.location &&
              (g.width(D).height(9999),
              (G = w.contents().find("body")),
              q && G.css("overflow-x", "hidden"),
              (B = G.height()));
          } catch (H) {}
      } else if (h.autoWidth || h.autoHeight)
        g.addClass("fancybox-tmp"),
          h.autoWidth || g.width(D),
          h.autoHeight || g.height(B),
          h.autoWidth && (D = g.width()),
          h.autoHeight && (B = g.height()),
          g.removeClass("fancybox-tmp");
      c = l(D);
      j = l(B);
      E = D / B;
      m = l(F(m) ? l(m, "w") - A : m);
      n = l(F(n) ? l(n, "w") - A : n);
      u = l(F(u) ? l(u, "h") - t : u);
      v = l(F(v) ? l(v, "h") - t : v);
      G = n;
      C = v;
      h.fitToView && ((n = Math.min(a.w - A, n)), (v = Math.min(a.h - t, v)));
      A = a.w - p;
      r = a.h - r;
      h.aspectRatio
        ? (c > n && ((c = n), (j = l(c / E))),
          j > v && ((j = v), (c = l(j * E))),
          c < m && ((c = m), (j = l(c / E))),
          j < u && ((j = u), (c = l(j * E))))
        : ((c = Math.max(m, Math.min(c, n))),
          h.autoHeight && "iframe" !== h.type && (g.width(c), (j = g.height())),
          (j = Math.max(u, Math.min(j, v))));
      if (h.fitToView)
        if (
          (g.width(c).height(j),
          e.width(c + y),
          (a = e.width()),
          (p = e.height()),
          h.aspectRatio)
        )
          for (; (a > A || p > r) && c > m && j > u && !(19 < d++); )
            (j = Math.max(u, Math.min(v, j - 10))),
              (c = l(j * E)),
              c < m && ((c = m), (j = l(c / E))),
              c > n && ((c = n), (j = l(c / E))),
              g.width(c).height(j),
              e.width(c + y),
              (a = e.width()),
              (p = e.height());
        else
          (c = Math.max(m, Math.min(c, c - (a - A)))),
            (j = Math.max(u, Math.min(j, j - (p - r))));
      q && "auto" === s && j < B && c + y + q < A && (c += q);
      g.width(c).height(j);
      e.width(c + y);
      a = e.width();
      p = e.height();
      e = (a > A || p > r) && c > m && j > u;
      c = h.aspectRatio
        ? c < G && j < C && c < D && j < B
        : (c < G || j < C) && (c < D || j < B);
      f.extend(h, {
        dim: { width: x(a), height: x(p) },
        origWidth: D,
        origHeight: B,
        canShrink: e,
        canExpand: c,
        wPadding: y,
        hPadding: z,
        wrapSpace: p - k.outerHeight(!0),
        skinSpace: k.height() - j,
      });
      !w && h.autoHeight && j > u && j < v && !c && g.height("auto");
    },
    _getPosition: function (a) {
      var d = b.current,
        e = b.getViewport(),
        c = d.margin,
        f = b.wrap.width() + c[1] + c[3],
        g = b.wrap.height() + c[0] + c[2],
        c = { position: "absolute", top: c[0], left: c[3] };
      d.autoCenter && d.fixed && !a && g <= e.h && f <= e.w
        ? (c.position = "fixed")
        : d.locked || ((c.top += e.y), (c.left += e.x));
      c.top = x(Math.max(c.top, c.top + (e.h - g) * d.topRatio));
      c.left = x(Math.max(c.left, c.left + (e.w - f) * d.leftRatio));
      return c;
    },
    _afterZoomIn: function () {
      var a = b.current;
      a &&
        ((b.isOpen = b.isOpened = !0),
        b.wrap.css("overflow", "visible").addClass("fancybox-opened"),
        b.update(),
        (a.closeClick || (a.nextClick && 1 < b.group.length)) &&
          b.inner.css("cursor", "pointer").bind("click.fb", function (d) {
            !f(d.target).is("a") &&
              !f(d.target).parent().is("a") &&
              (d.preventDefault(), b[a.closeClick ? "close" : "next"]());
          }),
        a.closeBtn &&
          f(a.tpl.closeBtn)
            .appendTo(b.skin)
            .bind("click.fb", function (a) {
              a.preventDefault();
              b.close();
            }),
        a.arrows &&
          1 < b.group.length &&
          ((a.loop || 0 < a.index) &&
            f(a.tpl.prev).appendTo(b.outer).bind("click.fb", b.prev),
          (a.loop || a.index < b.group.length - 1) &&
            f(a.tpl.next).appendTo(b.outer).bind("click.fb", b.next)),
        b.trigger("afterShow"),
        !a.loop && a.index === a.group.length - 1
          ? b.play(!1)
          : b.opts.autoPlay &&
            !b.player.isActive &&
            ((b.opts.autoPlay = !1), b.play()));
    },
    _afterZoomOut: function (a) {
      a = a || b.current;
      f(".fancybox-wrap").trigger("onReset").remove();
      f.extend(b, {
        group: {},
        opts: {},
        router: !1,
        current: null,
        isActive: !1,
        isOpened: !1,
        isOpen: !1,
        isClosing: !1,
        wrap: null,
        skin: null,
        outer: null,
        inner: null,
      });
      b.trigger("afterClose", a);
    },
  });
  b.transitions = {
    getOrigPosition: function () {
      var a = b.current,
        d = a.element,
        e = a.orig,
        c = {},
        f = 50,
        g = 50,
        h = a.hPadding,
        j = a.wPadding,
        m = b.getViewport();
      !e &&
        a.isDom &&
        d.is(":visible") &&
        ((e = d.find("img:first")), e.length || (e = d));
      t(e)
        ? ((c = e.offset()),
          e.is("img") && ((f = e.outerWidth()), (g = e.outerHeight())))
        : ((c.top = m.y + (m.h - g) * a.topRatio),
          (c.left = m.x + (m.w - f) * a.leftRatio));
      if ("fixed" === b.wrap.css("position") || a.locked)
        (c.top -= m.y), (c.left -= m.x);
      return (c = {
        top: x(c.top - h * a.topRatio),
        left: x(c.left - j * a.leftRatio),
        width: x(f + j),
        height: x(g + h),
      });
    },
    step: function (a, d) {
      var e,
        c,
        f = d.prop;
      c = b.current;
      var g = c.wrapSpace,
        h = c.skinSpace;
      if ("width" === f || "height" === f)
        (e = d.end === d.start ? 1 : (a - d.start) / (d.end - d.start)),
          b.isClosing && (e = 1 - e),
          (c = "width" === f ? c.wPadding : c.hPadding),
          (c = a - c),
          b.skin[f](l("width" === f ? c : c - g * e)),
          b.inner[f](l("width" === f ? c : c - g * e - h * e));
    },
    zoomIn: function () {
      var a = b.current,
        d = a.pos,
        e = a.openEffect,
        c = "elastic" === e,
        k = f.extend({ opacity: 1 }, d);
      delete k.position;
      c
        ? ((d = this.getOrigPosition()), a.openOpacity && (d.opacity = 0.1))
        : "fade" === e && (d.opacity = 0.1);
      b.wrap.css(d).animate(k, {
        duration: "none" === e ? 0 : a.openSpeed,
        easing: a.openEasing,
        step: c ? this.step : null,
        complete: b._afterZoomIn,
      });
    },
    zoomOut: function () {
      var a = b.current,
        d = a.closeEffect,
        e = "elastic" === d,
        c = { opacity: 0.1 };
      e && ((c = this.getOrigPosition()), a.closeOpacity && (c.opacity = 0.1));
      b.wrap.animate(c, {
        duration: "none" === d ? 0 : a.closeSpeed,
        easing: a.closeEasing,
        step: e ? this.step : null,
        complete: b._afterZoomOut,
      });
    },
    changeIn: function () {
      var a = b.current,
        d = a.nextEffect,
        e = a.pos,
        c = { opacity: 1 },
        f = b.direction,
        g;
      e.opacity = 0.1;
      "elastic" === d &&
        ((g = "down" === f || "up" === f ? "top" : "left"),
        "down" === f || "right" === f
          ? ((e[g] = x(l(e[g]) - 200)), (c[g] = "+=200px"))
          : ((e[g] = x(l(e[g]) + 200)), (c[g] = "-=200px")));
      "none" === d
        ? b._afterZoomIn()
        : b.wrap.css(e).animate(c, {
            duration: a.nextSpeed,
            easing: a.nextEasing,
            complete: b._afterZoomIn,
          });
    },
    changeOut: function () {
      var a = b.previous,
        d = a.prevEffect,
        e = { opacity: 0.1 },
        c = b.direction;
      "elastic" === d &&
        (e["down" === c || "up" === c ? "top" : "left"] =
          ("up" === c || "left" === c ? "-" : "+") + "=200px");
      a.wrap.animate(e, {
        duration: "none" === d ? 0 : a.prevSpeed,
        easing: a.prevEasing,
        complete: function () {
          f(this).trigger("onReset").remove();
        },
      });
    },
  };
  b.helpers.overlay = {
    defaults: {
      closeClick: !0,
      speedOut: 200,
      showEarly: !0,
      css: {},
      locked: !s,
      fixed: !0,
    },
    overlay: null,
    fixed: !1,
    create: function (a) {
      a = f.extend({}, this.defaults, a);
      this.overlay && this.close();
      this.overlay = f('<div class="fancybox-overlay"></div>').appendTo("body");
      this.fixed = !1;
      a.fixed &&
        b.defaults.fixed &&
        (this.overlay.addClass("fancybox-overlay-fixed"), (this.fixed = !0));
    },
    open: function (a) {
      var d = this;
      a = f.extend({}, this.defaults, a);
      this.overlay
        ? this.overlay.unbind(".overlay").width("auto").height("auto")
        : this.create(a);
      this.fixed ||
        (q.bind("resize.overlay", f.proxy(this.update, this)), this.update());
      a.closeClick &&
        this.overlay.bind("click.overlay", function (a) {
          f(a.target).hasClass("fancybox-overlay") &&
            (b.isActive ? b.close() : d.close());
        });
      this.overlay.css(a.css).show();
    },
    close: function () {
      f(".fancybox-overlay").remove();
      q.unbind("resize.overlay");
      this.overlay = null;
      !1 !== this.margin &&
        (f("body").css("margin-right", this.margin), (this.margin = !1));
      this.el && this.el.removeClass("fancybox-lock");
    },
    update: function () {
      var a = "100%",
        b;
      this.overlay.width(a).height("100%");
      H
        ? ((b = Math.max(z.documentElement.offsetWidth, z.body.offsetWidth)),
          n.width() > b && (a = n.width()))
        : n.width() > q.width() && (a = n.width());
      this.overlay.width(a).height(n.height());
    },
    onReady: function (a, b) {
      f(".fancybox-overlay").stop(!0, !0);
      this.overlay ||
        ((this.margin =
          n.height() > q.height() || "scroll" === f("body").css("overflow-y")
            ? f("body").css("margin-right")
            : !1),
        (this.el = z.all && !z.querySelector ? f("html") : f("body")),
        this.create(a));
      a.locked &&
        this.fixed &&
        ((b.locked = this.overlay.append(b.wrap)), (b.fixed = !1));
      !0 === a.showEarly && this.beforeShow.apply(this, arguments);
    },
    beforeShow: function (a, b) {
      b.locked &&
        (this.el.addClass("fancybox-lock"),
        !1 !== this.margin &&
          f("body").css("margin-right", l(this.margin) + b.scrollbarWidth));
      this.open(a);
    },
    onUpdate: function () {
      this.fixed || this.update();
    },
    afterClose: function (a) {
      this.overlay &&
        !b.isActive &&
        this.overlay.fadeOut(a.speedOut, f.proxy(this.close, this));
    },
  };
  b.helpers.title = {
    defaults: { type: "float", position: "bottom" },
    beforeShow: function (a) {
      var d = b.current,
        e = d.title,
        c = a.type;
      f.isFunction(e) && (e = e.call(d.element, d));
      if (p(e) && "" !== f.trim(e)) {
        d = f(
          '<div class="fancybox-title fancybox-title-' +
            c +
            '-wrap">' +
            e +
            "</div>",
        );
        switch (c) {
          case "inside":
            c = b.skin;
            break;
          case "outside":
            c = b.wrap;
            break;
          case "over":
            c = b.inner;
            break;
          default:
            (c = b.skin),
              d.appendTo("body"),
              H && d.width(d.width()),
              d.wrapInner('<span class="child"></span>'),
              (b.current.margin[2] += Math.abs(l(d.css("margin-bottom"))));
        }
        d["top" === a.position ? "prependTo" : "appendTo"](c);
      }
    },
  };
  f.fn.fancybox = function (a) {
    var d,
      e = f(this),
      c = this.selector || "",
      k = function (g) {
        var h = f(this).blur(),
          j = d,
          k,
          l;
        !g.ctrlKey &&
          !g.altKey &&
          !g.shiftKey &&
          !g.metaKey &&
          !h.is(".fancybox-wrap") &&
          ((k = a.groupAttr || "data-fancybox-group"),
          (l = h.attr(k)),
          l || ((k = "rel"), (l = h.get(0)[k])),
          l &&
            "" !== l &&
            "nofollow" !== l &&
            ((h = c.length ? f(c) : e),
            (h = h.filter("[" + k + '="' + l + '"]')),
            (j = h.index(this))),
          (a.index = j),
          !1 !== b.open(h, a) && g.preventDefault());
      };
    a = a || {};
    d = a.index || 0;
    !c || !1 === a.live
      ? e.unbind("click.fb-start").bind("click.fb-start", k)
      : n
          .undelegate(c, "click.fb-start")
          .delegate(
            c + ":not('.fancybox-item, .fancybox-nav')",
            "click.fb-start",
            k,
          );
    this.filter("[data-fancybox-start=1]").trigger("click");
    return this;
  };
  n.ready(function () {
    f.scrollbarWidth === r &&
      (f.scrollbarWidth = function () {
        var a = f(
            '<div style="width:50px;height:50px;overflow:auto"><div/></div>',
          ).appendTo("body"),
          b = a.children(),
          b = b.innerWidth() - b.height(99).innerWidth();
        a.remove();
        return b;
      });
    if (f.support.fixedPosition === r) {
      var a = f.support,
        d = f('<div style="position:fixed;top:20px;"></div>').appendTo("body"),
        e = 20 === d[0].offsetTop || 15 === d[0].offsetTop;
      d.remove();
      a.fixedPosition = e;
    }
    f.extend(b.defaults, {
      scrollbarWidth: f.scrollbarWidth(),
      fixed: f.support.fixedPosition,
      parent: f("body"),
    });
  });
})(window, document, jQuery);


================================================
FILE: backend/tests/integration/tests/pruning/website/js/jquery.flexslider.js
================================================
/*
 * jQuery FlexSlider v2.1
 * http://www.woothemes.com/flexslider/
 *
 * Copyright 2012 WooThemes
 * Free to use under the GPLv2 license.
 * http://www.gnu.org/licenses/gpl-2.0.html
 *
 * Contributing author: Tyler Smith (@mbmufffin)
 */

(function ($) {
  //FlexSlider: Object Instance
  $.flexslider = function (el, options) {
    var slider = $(el),
      vars = $.extend({}, $.flexslider.defaults, options),
      namespace = vars.namespace,
      touch =
        "ontouchstart" in window ||
        (window.DocumentTouch && document instanceof DocumentTouch),
      eventType = touch ? "touchend" : "click",
      vertical = vars.direction === "vertical",
      reverse = vars.reverse,
      carousel = vars.itemWidth > 0,
      fade = vars.animation === "fade",
      asNav = vars.asNavFor !== "",
      methods = {};

    // Store a reference to the slider object
    $.data(el, "flexslider", slider);

    // Privat slider methods
    methods = {
      init: function () {
        slider.animating = false;
        slider.currentSlide = vars.startAt;
        slider.animatingTo = slider.currentSlide;
        slider.atEnd =
          slider.currentSlide === 0 || slider.currentSlide === slider.last;
        slider.containerSelector = vars.selector.substr(
          0,
          vars.selector.search(" "),
        );
        slider.slides = $(vars.selector, slider);
        slider.container = $(slider.containerSelector, slider);
        slider.count = slider.slides.length;
        // SYNC:
        slider.syncExists = $(vars.sync).length > 0;
        // SLIDE:
        if (vars.animation === "slide") vars.animation = "swing";
        slider.prop = vertical ? "top" : "marginLeft";
        slider.args = {};
        // SLIDESHOW:
        slider.manualPause = false;
        // TOUCH/USECSS:
        slider.transitions =
          !vars.video &&
          !fade &&
          vars.useCSS &&
          (function () {
            var obj = document.createElement("div"),
              props = [
                "perspectiveProperty",
                "WebkitPerspective",
                "MozPerspective",
                "OPerspective",
                "msPerspective",
              ];
            for (var i in props) {
              if (obj.style[props[i]] !== undefined) {
                slider.pfx = props[i].replace("Perspective", "").toLowerCase();
                slider.prop = "-" + slider.pfx + "-transform";
                return true;
              }
            }
            return false;
          })();
        // CONTROLSCONTAINER:
        if (vars.controlsContainer !== "")
          slider.controlsContainer =
            $(vars.controlsContainer).length > 0 && $(vars.controlsContainer);
        // MANUAL:
        if (vars.manualControls !== "")
          slider.manualControls =
            $(vars.manualControls).length > 0 && $(vars.manualControls);

        // RANDOMIZE:
        if (vars.randomize) {
          slider.slides.sort(function () {
            return Math.round(Math.random()) - 0.5;
          });
          slider.container.empty().append(slider.slides);
        }

        slider.doMath();

        // ASNAV:
        if (asNav) methods.asNav.setup();

        // INIT
        slider.setup("init");

        // CONTROLNAV:
        if (vars.controlNav) methods.controlNav.setup();

        // DIRECTIONNAV:
        if (vars.directionNav) methods.directionNav.setup();

        // KEYBOARD:
        if (
          vars.keyboard &&
          ($(slider.containerSelector).length === 1 || vars.multipleKeyboard)
        ) {
          $(document).bind("keyup", function (event) {
            var keycode = event.keyCode;
            if (!slider.animating && (keycode === 39 || keycode === 37)) {
              var target =
                keycode === 39
                  ? slider.getTarget("next")
                  : keycode === 37
                    ? slider.getTarget("prev")
                    : false;
              slider.flexAnimate(target, vars.pauseOnAction);
            }
          });
        }
        // MOUSEWHEEL:
        if (vars.mousewheel) {
          slider.bind("mousewheel", function (event, delta, deltaX, deltaY) {
            event.preventDefault();
            var target =
              delta < 0 ? slider.getTarget("next") : slider.getTarget("prev");
            slider.flexAnimate(target, vars.pauseOnAction);
          });
        }

        // PAUSEPLAY
        if (vars.pausePlay) methods.pausePlay.setup();

        // SLIDSESHOW
        if (vars.slideshow) {
          if (vars.pauseOnHover) {
            slider.hover(
              function () {
                if (!slider.manualPlay && !slider.manualPause) slider.pause();
              },
              function () {
                if (!slider.manualPause && !slider.manualPlay) slider.play();
              },
            );
          }
          // initialize animation
          vars.initDelay > 0
            ? setTimeout(slider.play, vars.initDelay)
            : slider.play();
        }

        // TOUCH
        if (touch && vars.touch) methods.touch();

        // FADE&&SMOOTHHEIGHT || SLIDE:
        if (!fade || (fade && vars.smoothHeight))
          $(window).bind("resize focus", methods.resize);

        // API: start() Callback
        setTimeout(function () {
          vars.start(slider);
        }, 200);
      },
      asNav: {
        setup: function () {
          slider.asNav = true;
          slider.animatingTo = Math.floor(slider.currentSlide / slider.move);
          slider.currentItem = slider.currentSlide;
          slider.slides
            .removeClass(namespace + "active-slide")
            .eq(slider.currentItem)
            .addClass(namespace + "active-slide");
          slider.slides.click(function (e) {
            e.preventDefault();
            var $slide = $(this),
              target = $slide.index();
            if (
              !$(vars.asNavFor).data("flexslider").animating &&
              !$slide.hasClass("active")
            ) {
              slider.direction = slider.currentItem < target ? "next" : "prev";
              slider.flexAnimate(target, vars.pauseOnAction, false, true, true);
            }
          });
        },
      },
      controlNav: {
        setup: function () {
          if (!slider.manualControls) {
            methods.controlNav.setupPaging();
          } else {
            // MANUALCONTROLS:
            methods.controlNav.setupManual();
          }
        },
        setupPaging: function () {
          var type =
              vars.controlNav === "thumbnails"
                ? "control-thumbs"
                : "control-paging",
            j = 1,
            item;

          slider.controlNavScaffold = $(
            '<ol class="' +
              namespace +
              "control-nav " +
              namespace +
              type +
              '"></ol>',
          );

          if (slider.pagingCount > 1) {
            for (var i = 0; i < slider.pagingCount; i++) {
              item =
                vars.controlNav === "thumbnails"
                  ? '<img src="' +
                    slider.slides.eq(i).attr("data-thumb") +
                    '"/>'
                  : "<a>" + j + "</a>";
              slider.controlNavScaffold.append("<li>" + item + "</li>");
              j++;
            }
          }

          // CONTROLSCONTAINER:
          slider.controlsContainer
            ? $(slider.controlsContainer).append(slider.controlNavScaffold)
            : slider.append(slider.controlNavScaffold);
          methods.controlNav.set();

          methods.controlNav.active();

          slider.controlNavScaffold.delegate(
            "a, img",
            eventType,
            function (event) {
              event.preventDefault();
              var $this = $(this),
                target = slider.controlNav.index($this);

              if (!$this.hasClass(namespace + "active")) {
                slider.direction =
                  target > slider.currentSlide ? "next" : "prev";
                slider.flexAnimate(target, vars.pauseOnAction);
              }
            },
          );
          // Prevent iOS click event bug
          if (touch) {
            slider.controlNavScaffold.delegate(
              "a",
              "click touchstart",
              function (event) {
                event.preventDefault();
              },
            );
          }
        },
        setupManual: function () {
          slider.controlNav = slider.manualControls;
          methods.controlNav.active();

          slider.controlNav.live(eventType, function (event) {
            event.preventDefault();
            var $this = $(this),
              target = slider.controlNav.index($this);

            if (!$this.hasClass(namespace + "active")) {
              target > slider.currentSlide
                ? (slider.direction = "next")
                : (slider.direction = "prev");
              slider.flexAnimate(target, vars.pauseOnAction);
            }
          });
          // Prevent iOS click event bug
          if (touch) {
            slider.controlNav.live("click touchstart", function (event) {
              event.preventDefault();
            });
          }
        },
        set: function () {
          var selector = vars.controlNav === "thumbnails" ? "img" : "a";
          slider.controlNav = $(
            "." + namespace + "control-nav li " + selector,
            slider.controlsContainer ? slider.controlsContainer : slider,
          );
        },
        active: function () {
          slider.controlNav
            .removeClass(namespace + "active")
            .eq(slider.animatingTo)
            .addClass(namespace + "active");
        },
        update: function (action, pos) {
          if (slider.pagingCount > 1 && action === "add") {
            slider.controlNavScaffold.append(
              $("<li><a>" + slider.count + "</a></li>"),
            );
          } else if (slider.pagingCount === 1) {
            slider.controlNavScaffold.find("li").remove();
          } else {
            slider.controlNav.eq(pos).closest("li").remove();
          }
          methods.controlNav.set();
          slider.pagingCount > 1 &&
          slider.pagingCount !== slider.controlNav.length
            ? slider.update(pos, action)
            : methods.controlNav.active();
        },
      },
      directionNav: {
        setup: function () {
          var directionNavScaffold = $(
            '<ul class="' +
              namespace +
              'direction-nav"><li><a class="' +
              namespace +
              'prev" href="#">' +
              vars.prevText +
              '</a></li><li><a class="' +
              namespace +
              'next" href="#">' +
              vars.nextText +
              "</a></li></ul>",
          );

          // CONTROLSCONTAINER:
          if (slider.controlsContainer) {
            $(slider.controlsContainer).append(directionNavScaffold);
            slider.directionNav = $(
              "." + namespace + "direction-nav li a",
              slider.controlsContainer,
            );
          } else {
            slider.append(directionNavScaffold);
            slider.directionNav = $(
              "." + namespace + "direction-nav li a",
              slider,
            );
          }

          methods.directionNav.update();

          slider.directionNav.bind(eventType, function (event) {
            event.preventDefault();
            var target = $(this).hasClass(namespace + "next")
              ? slider.getTarget("next")
              : slider.getTarget("prev");
            slider.flexAnimate(target, vars.pauseOnAction);
          });
          // Prevent iOS click event bug
          if (touch) {
            slider.directionNav.bind("click touchstart", function (event) {
              event.preventDefault();
            });
          }
        },
        update: function () {
          var disabledClass = namespace + "disabled";
          if (slider.pagingCount === 1) {
            slider.directionNav.addClass(disabledClass);
          } else if (!vars.animationLoop) {
            if (slider.animatingTo === 0) {
              slider.directionNav
                .removeClass(disabledClass)
                .filter("." + namespace + "prev")
                .addClass(disabledClass);
            } else if (slider.animatingTo === slider.last) {
              slider.directionNav
                .removeClass(disabledClass)
                .filter("." + namespace + "next")
                .addClass(disabledClass);
            } else {
              slider.directionNav.removeClass(disabledClass);
            }
          } else {
            slider.directionNav.removeClass(disabledClass);
          }
        },
      },
      pausePlay: {
        setup: function () {
          var pausePlayScaffold = $(
            '<div class="' + namespace + 'pauseplay"><a></a></div>',
          );

          // CONTROLSCONTAINER:
          if (slider.controlsContainer) {
            slider.controlsContainer.append(pausePlayScaffold);
            slider.pausePlay = $(
              "." + namespace + "pauseplay a",
              slider.controlsContainer,
            );
          } else {
            slider.append(pausePlayScaffold);
            slider.pausePlay = $("." + namespace + "pauseplay a", slider);
          }

          methods.pausePlay.update(
            vars.slideshow ? namespace + "pause" : namespace + "play",
          );

          slider.pausePlay.bind(eventType, function (event) {
            event.preventDefault();
            if ($(this).hasClass(namespace + "pause")) {
              slider.manualPause = true;
              slider.manualPlay = false;
              slider.pause();
            } else {
              slider.manualPause = false;
              slider.manualPlay = true;
              slider.play();
            }
          });
          // Prevent iOS click event bug
          if (touch) {
            slider.pausePlay.bind("click touchstart", function (event) {
              event.preventDefault();
            });
          }
        },
        update: function (state) {
          state === "play"
            ? slider.pausePlay
                .removeClass(namespace + "pause")
                .addClass(namespace + "play")
                .text(vars.playText)
            : slider.pausePlay
                .removeClass(namespace + "play")
                .addClass(namespace + "pause")
                .text(vars.pauseText);
        },
      },
      touch: function () {
        var startX,
          startY,
          offset,
          cwidth,
          dx,
          startT,
          scrolling = false;

        el.addEventListener("touchstart", onTouchStart, false);
        function onTouchStart(e) {
          if (slider.animating) {
            e.preventDefault();
          } else if (e.touches.length === 1) {
            slider.pause();
            // CAROUSEL:
            cwidth = vertical ? slider.h : slider.w;
            startT = Number(new Date());
            // CAROUSEL:
            offset =
              carousel && reverse && slider.animatingTo === slider.last
                ? 0
                : carousel && reverse
                  ? slider.limit -
                    (slider.itemW + vars.itemMargin) *
                      slider.move *
                      slider.animatingTo
                  : carousel && slider.currentSlide === slider.last
                    ? slider.limit
                    : carousel
                      ? (slider.itemW + vars.itemMargin) *
                        slider.move *
                        slider.currentSlide
                      : reverse
                        ? (slider.last -
                            slider.currentSlide +
                            slider.cloneOffset) *
                          cwidth
                        : (slider.currentSlide + slider.cloneOffset) * cwidth;
            startX = vertical ? e.touches[0].pageY : e.touches[0].pageX;
            startY = vertical ? e.touches[0].pageX : e.touches[0].pageY;

            el.addEventListener("touchmove", onTouchMove, false);
            el.addEventListener("touchend", onTouchEnd, false);
          }
        }

        function onTouchMove(e) {
          dx = vertical
            ? startX - e.touches[0].pageY
            : startX - e.touches[0].pageX;
          scrolling = vertical
            ? Math.abs(dx) < Math.abs(e.touches[0].pageX - startY)
            : Math.abs(dx) < Math.abs(e.touches[0].pageY - startY);

          if (!scrolling || Number(new Date()) - startT > 500) {
            e.preventDefault();
            if (!fade && slider.transitions) {
              if (!vars.animationLoop) {
                dx =
                  dx /
                  ((slider.currentSlide === 0 && dx < 0) ||
                  (slider.currentSlide === slider.last && dx > 0)
                    ? Math.abs(dx) / cwidth + 2
                    : 1);
              }
              slider.setProps(offset + dx, "setTouch");
            }
          }
        }

        function onTouchEnd(e) {
          // finish the touch by undoing the touch session
          el.removeEventListener("touchmove", onTouchMove, false);

          if (
            slider.animatingTo === slider.currentSlide &&
            !scrolling &&
            !(dx === null)
          ) {
            var updateDx = reverse ? -dx : dx,
              target =
                updateDx > 0
                  ? slider.getTarget("next")
                  : slider.getTarget("prev");

            if (
              slider.canAdvance(target) &&
              ((Number(new Date()) - startT < 550 && Math.abs(updateDx) > 50) ||
                Math.abs(updateDx) > cwidth / 2)
            ) {
              slider.flexAnimate(target, vars.pauseOnAction);
            } else {
              if (!fade)
                slider.flexAnimate(
                  slider.currentSlide,
                  vars.pauseOnAction,
                  true,
                );
            }
          }
          el.removeEventListener("touchend", onTouchEnd, false);
          startX = null;
          startY = null;
          dx = null;
          offset = null;
        }
      },
      resize: function () {
        if (!slider.animating && slider.is(":visible")) {
          if (!carousel) slider.doMath();

          if (fade) {
            // SMOOTH HEIGHT:
            methods.smoothHeight();
          } else if (carousel) {
            //CAROUSEL:
            slider.slides.width(slider.computedW);
            slider.update(slider.pagingCount);
            slider.setProps();
          } else if (vertical) {
            //VERTICAL:
            slider.viewport.height(slider.h);
            slider.setProps(slider.h, "setTotal");
          } else {
            // SMOOTH HEIGHT:
            if (vars.smoothHeight) methods.smoothHeight();
            slider.newSlides.width(slider.computedW);
            slider.setProps(slider.computedW, "setTotal");
          }
        }
      },
      smoothHeight: function (dur) {
        if (!vertical || fade) {
          var $obj = fade ? slider : slider.viewport;
          dur
            ? $obj.animate(
                { height: slider.slides.eq(slider.animatingTo).height() },
                dur,
              )
            : $obj.height(slider.slides.eq(slider.animatingTo).height());
        }
      },
      sync: function (action) {
        var $obj = $(vars.sync).data("flexslider"),
          target = slider.animatingTo;

        switch (action) {
          case "animate":
            $obj.flexAnimate(target, vars.pauseOnAction, false, true);
            break;
          case "play":
            if (!$obj.playing && !$obj.asNav) {
              $obj.play();
            }
            break;
          case "pause":
            $obj.pause();
            break;
        }
      },
    };

    // public methods
    slider.flexAnimate = function (target, pause, override, withSync, fromNav) {
      if (asNav && slider.pagingCount === 1)
        slider.direction = slider.currentItem < target ? "next" : "prev";

      if (
        !slider.animating &&
        (slider.canAdvance(target, fromNav) || override) &&
        slider.is(":visible")
      ) {
        if (asNav && withSync) {
          var master = $(vars.asNavFor).data("flexslider");
          slider.atEnd = target === 0 || target === slider.count - 1;
          master.flexAnimate(target, true, false, true, fromNav);
          slider.direction = slider.currentItem < target ? "next" : "prev";
          master.direction = slider.direction;

          if (
            Math.ceil((target + 1) / slider.visible) - 1 !==
              slider.currentSlide &&
            target !== 0
          ) {
            slider.currentItem = target;
            slider.slides
              .removeClass(namespace + "active-slide")
              .eq(target)
              .addClass(namespace + "active-slide");
            target = Math.floor(target / slider.visible);
          } else {
            slider.currentItem = target;
            slider.slides
              .removeClass(namespace + "active-slide")
              .eq(target)
              .addClass(namespace + "active-slide");
            return false;
          }
        }

        slider.animating = true;
        slider.animatingTo = target;
        // API: before() animation Callback
        vars.before(slider);

        // SLIDESHOW:
        if (pause) slider.pause();

        // SYNC:
        if (slider.syncExists && !fromNav) methods.sync("animate");

        // CONTROLNAV
        if (vars.controlNav) methods.controlNav.active();

        // !CAROUSEL:
        // CANDIDATE: slide active class (for add/remove slide)
        if (!carousel)
          slider.slides
            .removeClass(namespace + "active-slide")
            .eq(target)
            .addClass(namespace + "active-slide");

        // INFINITE LOOP:
        // CANDIDATE: atEnd
        slider.atEnd = target === 0 || target === slider.last;

        // DIRECTIONNAV:
        if (vars.directionNav) methods.directionNav.update();

        if (target === slider.last) {
          // API: end() of cycle Callback
          vars.end(slider);
          // SLIDESHOW && !INFINITE LOOP:
          if (!vars.animationLoop) slider.pause();
        }

        // SLIDE:
        if (!fade) {
          var dimension = vertical
              ? slider.slides.filter(":first").height()
              : slider.computedW,
            margin,
            slideString,
            calcNext;

          // INFINITE LOOP / REVERSE:
          if (carousel) {
            margin =
              vars.itemWidth > slider.w ? vars.itemMargin * 2 : vars.itemMargin;
            calcNext =
              (slider.itemW + margin) * slider.move * slider.animatingTo;
            slideString =
              calcNext > slider.limit && slider.visible !== 1
                ? slider.limit
                : calcNext;
          } else if (
            slider.currentSlide === 0 &&
            target === slider.count - 1 &&
            vars.animationLoop &&
            slider.direction !== "next"
          ) {
            slideString = reverse
              ? (slider.count + slider.cloneOffset) * dimension
              : 0;
          } else if (
            slider.currentSlide === slider.last &&
            target === 0 &&
            vars.animationLoop &&
            slider.direction !== "prev"
          ) {
            slideString = reverse ? 0 : (slider.count + 1) * dimension;
          } else {
            slideString = reverse
              ? (slider.count - 1 - target + slider.cloneOffset) * dimension
              : (target + slider.cloneOffset) * dimension;
          }
          slider.setProps(slideString, "", vars.animationSpeed);
          if (slider.transitions) {
            if (!vars.animationLoop || !slider.atEnd) {
              slider.animating = false;
              slider.currentSlide = slider.animatingTo;
            }
            slider.container.unbind("webkitTransitionEnd transitionend");
            slider.container.bind(
              "webkitTransitionEnd transitionend",
              function () {
                slider.wrapup(dimension);
              },
            );
          } else {
            slider.container.animate(
              slider.args,
              vars.animationSpeed,
              vars.easing,
              function () {
                slider.wrapup(dimension);
              },
            );
          }
        } else {
          // FADE:
          if (!touch) {
            slider.slides
              .eq(slider.currentSlide)
              .fadeOut(vars.animationSpeed, vars.easing);
            slider.slides
              .eq(target)
              .fadeIn(vars.animationSpeed, vars.easing, slider.wrapup);
          } else {
            slider.slides
              .eq(slider.currentSlide)
              .css({ opacity: 0, zIndex: 1 });
            slider.slides.eq(target).css({ opacity: 1, zIndex: 2 });

            slider.slides.unbind("webkitTransitionEnd transitionend");
            slider.slides
              .eq(slider.currentSlide)
              .bind("webkitTransitionEnd transitionend", function () {
                // API: after() animation Callback
                vars.after(slider);
              });

            slider.animating = false;
            slider.currentSlide = slider.animatingTo;
          }
        }
        // SMOOTH HEIGHT:
        if (vars.smoothHeight) methods.smoothHeight(vars.animationSpeed);
      }
    };
    slider.wrapup = function (dimension) {
      // SLIDE:
      if (!fade && !carousel) {
        if (
          slider.currentSlide === 0 &&
          slider.animatingTo === slider.last &&
          vars.animationLoop
        ) {
          slider.setProps(dimension, "jumpEnd");
        } else if (
          slider.currentSlide === slider.last &&
          slider.animatingTo === 0 &&
          vars.animationLoop
        ) {
          slider.setProps(dimension, "jumpStart");
        }
      }
      slider.animating = false;
      slider.currentSlide = slider.animatingTo;
      // API: after() animation Callback
      vars.after(slider);
    };

    // SLIDESHOW:
    slider.animateSlides = function () {
      if (!slider.animating) slider.flexAnimate(slider.getTarget("next"));
    };
    // SLIDESHOW:
    slider.pause = function () {
      clearInterval(slider.animatedSlides);
      slider.playing = false;
      // PAUSEPLAY:
      if (vars.pausePlay) methods.pausePlay.update("play");
      // SYNC:
      if (slider.syncExists) methods.sync("pause");
    };
    // SLIDESHOW:
    slider.play = function () {
      slider.animatedSlides = setInterval(
        slider.animateSlides,
        vars.slideshowSpeed,
      );
      slider.playing = true;
      // PAUSEPLAY:
      if (vars.pausePlay) methods.pausePlay.update("pause");
      // SYNC:
      if (slider.syncExists) methods.sync("play");
    };
    slider.canAdvance = function (target, fromNav) {
      // ASNAV:
      var last = asNav ? slider.pagingCount - 1 : slider.last;
      return fromNav
        ? true
        : asNav &&
            slider.currentItem === slider.count - 1 &&
            target === 0 &&
            slider.direction === "prev"
          ? true
          : asNav &&
              slider.currentItem === 0 &&
              target === slider.pagingCount - 1 &&
              slider.direction !== "next"
            ? false
            : target === slider.currentSlide && !asNav
              ? false
              : vars.animationLoop
                ? true
                : slider.atEnd &&
                    slider.currentSlide === 0 &&
                    target === last &&
                    slider.direction !== "next"
                  ? false
                  : slider.atEnd &&
                      slider.currentSlide === last &&
                      target === 0 &&
                      slider.direction === "next"
                    ? false
                    : true;
    };
    slider.getTarget = function (dir) {
      slider.direction = dir;
      if (dir === "next") {
        return slider.currentSlide === slider.last
          ? 0
          : slider.currentSlide + 1;
      } else {
        return slider.currentSlide === 0
          ? slider.last
          : slider.currentSlide - 1;
      }
    };

    // SLIDE:
    slider.setProps = function (pos, special, dur) {
      var target = (function () {
        var posCheck = pos
            ? pos
            : (slider.itemW + vars.itemMargin) *
              slider.move *
              slider.animatingTo,
          posCalc = (function () {
            if (carousel) {
              return special === "setTouch"
                ? pos
                : reverse && slider.animatingTo === slider.last
                  ? 0
                  : reverse
                    ? slider.limit -
                      (slider.itemW + vars.itemMargin) *
                        slider.move *
                        slider.animatingTo
                    : slider.animatingTo === slider.last
                      ? slider.limit
                      : posCheck;
            } else {
              switch (special) {
                case "setTotal":
                  return reverse
                    ? (slider.count -
                        1 -
                        slider.currentSlide +
                        slider.cloneOffset) *
                        pos
                    : (slider.currentSlide + slider.cloneOffset) * pos;
                case "setTouch":
                  return reverse ? pos : pos;
                case "jumpEnd":
                  return reverse ? pos : slider.count * pos;
                case "jumpStart":
                  return reverse ? slider.count * pos : pos;
                default:
                  return pos;
              }
            }
          })();
        return posCalc * -1 + "px";
      })();

      if (slider.transitions) {
        target = vertical
          ? "translate3d(0," + target + ",0)"
          : "translate3d(" + target + ",0,0)";
        dur = dur !== undefined ? dur / 1000 + "s" : "0s";
        slider.container.css("-" + slider.pfx + "-transition-duration", dur);
      }

      slider.args[slider.prop] = target;
      if (slider.transitions || dur === undefined)
        slider.container.css(slider.args);
    };

    slider.setup = function (type) {
      // SLIDE:
      if (!fade) {
        var sliderOffset, arr;

        if (type === "init") {
          slider.viewport = $('<div class="' + namespace + 'viewport"></div>')
            .css({ overflow: "hidden", position: "relative" })
            .appendTo(slider)
            .append(slider.container);
          // INFINITE LOOP:
          slider.cloneCount = 0;
          slider.cloneOffset = 0;
          // REVERSE:
          if (reverse) {
            arr = $.makeArray(slider.slides).reverse();
            slider.slides = $(arr);
            slider.container.empty().append(slider.slides);
          }
        }
        // INFINITE LOOP && !CAROUSEL:
        if (vars.animationLoop && !carousel) {
          slider.cloneCount = 2;
          slider.cloneOffset = 1;
          // clear out old clones
          if (type !== "init") slider.container.find(".clone").remove();
          slider.container
            .append(slider.slides.first().clone().addClass("clone"))
            .prepend(slider.slides.last().clone().addClass("clone"));
        }
        slider.newSlides = $(vars.selector, slider);

        sliderOffset = reverse
          ? slider.count - 1 - slider.currentSlide + slider.cloneOffset
          : slider.currentSlide + slider.cloneOffset;
        // VERTICAL:
        if (vertical && !carousel) {
          slider.container
            .height((slider.count + slider.cloneCount) * 200 + "%")
            .css("position", "absolute")
            .width("100%");
          setTimeout(
            function () {
              slider.newSlides.css({ display: "block" });
              slider.doMath();
              slider.viewport.height(slider.h);
              slider.setProps(sliderOffset * slider.h, "init");
            },
            type === "init" ? 100 : 0,
          );
        } else {
          slider.container.width(
            (slider.count + slider.cloneCount) * 200 + "%",
          );
          slider.setProps(sliderOffset * slider.computedW, "init");
          setTimeout(
            function () {
              slider.doMath();
              slider.newSlides.css({
                width: slider.computedW,
                float: "left",
                display: "block",
              });
              // SMOOTH HEIGHT:
              if (vars.smoothHeight) methods.smoothHeight();
            },
            type === "init" ? 100 : 0,
          );
        }
      } else {
        // FADE:
        slider.slides.css({
          width: "100%",
          float: "left",
          marginRight: "-100%",
          position: "relative",
        });
        if (type === "init") {
          if (!touch) {
            slider.slides
              .eq(slider.currentSlide)
              .fadeIn(vars.animationSpeed, vars.easing);
          } else {
            slider.slides
              .css({
                opacity: 0,
                display: "block",
                webkitTransition:
                  "opacity " + vars.animationSpeed / 1000 + "s ease",
                zIndex: 1,
              })
              .eq(slider.currentSlide)
              .css({ opacity: 1, zIndex: 2 });
          }
        }
        // SMOOTH HEIGHT:
        if (vars.smoothHeight) methods.smoothHeight();
      }
      // !CAROUSEL:
      // CANDIDATE: active slide
      if (!carousel)
        slider.slides
          .removeClass(namespace + "active-slide")
          .eq(slider.currentSlide)
          .addClass(namespace + "active-slide");
    };

    slider.doMath = function () {
      var slide = slider.slides.first(),
        slideMargin = vars.itemMargin,
        minItems = vars.minItems,
        maxItems = vars.maxItems;

      slider.w = slider.width();
      slider.h = slide.height();
      slider.boxPadding = slide.outerWidth() - slide.width();

      // CAROUSEL:
      if (carousel) {
        slider.itemT = vars.itemWidth + slideMargin;
        slider.minW = minItems ? minItems * slider.itemT : slider.w;
        slider.maxW = maxItems ? maxItems * slider.itemT : slider.w;
        slider.itemW =
          slider.minW > slider.w
            ? (slider.w - slideMargin * minItems) / minItems
            : slider.maxW < slider.w
              ? (slider.w - slideMargin * maxItems) / maxItems
              : vars.itemWidth > slider.w
                ? slider.w
                : vars.itemWidth;
        slider.visible = Math.floor(slider.w / (slider.itemW + slideMargin));
        slider.move =
          vars.move > 0 && vars.move < slider.visible
            ? vars.move
            : slider.visible;
        slider.pagingCount = Math.ceil(
          (slider.count - slider.visible) / slider.move + 1,
        );
        slider.last = slider.pagingCount - 1;
        slider.limit =
          slider.pagingCount === 1
            ? 0
            : vars.itemWidth > slider.w
              ? (slider.itemW + slideMargin * 2) * slider.count -
                slider.w -
                slideMargin
              : (slider.itemW + slideMargin) * slider.count -
                slider.w -
                slideMargin;
      } else {
        slider.itemW = slider.w;
        slider.pagingCount = slider.count;
        slider.last = slider.count - 1;
      }
      slider.computedW = slider.itemW - slider.boxPadding;
    };

    slider.update = function (pos, action) {
      slider.doMath();

      // update currentSlide and slider.animatingTo if necessary
      if (!carousel) {
        if (pos < slider.currentSlide) {
          slider.currentSlide += 1;
        } else if (pos <= slider.currentSlide && pos !== 0) {
          slider.currentSlide -= 1;
        }
        slider.animatingTo = slider.currentSlide;
      }

      // update controlNav
      if (vars.controlNav && !slider.manualControls) {
        if (
          (action === "add" && !carousel) ||
          slider.pagingCount > slider.controlNav.length
        ) {
          methods.controlNav.update("add");
        } else if (
          (action === "remove" && !carousel) ||
          slider.pagingCount < slider.controlNav.length
        ) {
          if (carousel && slider.currentSlide > slider.last) {
            slider.currentSlide -= 1;
            slider.animatingTo -= 1;
          }
          methods.controlNav.update("remove", slider.last);
        }
      }
      // update directionNav
      if (vars.directionNav) methods.directionNav.update();
    };

    slider.addSlide = function (obj, pos) {
      var $obj = $(obj);

      slider.count += 1;
      slider.last = slider.count - 1;

      // append new slide
      if (vertical && reverse) {
        pos !== undefined
          ? slider.slides.eq(slider.count - pos).after($obj)
          : slider.container.prepend($obj);
      } else {
        pos !== undefined
          ? slider.slides.eq(pos).before($obj)
          : slider.container.append($obj);
      }

      // update currentSlide, animatingTo, controlNav, and directionNav
      slider.update(pos, "add");

      // update slider.slides
      slider.slides = $(vars.selector + ":not(.clone)", slider);
      // re-setup the slider to accomdate new slide
      slider.setup();

      //FlexSlider: added() Callback
      vars.added(slider);
    };
    slider.removeSlide = function (obj) {
      var pos = isNaN(obj) ? slider.slides.index($(obj)) : obj;

      // update count
      slider.count -= 1;
      slider.last = slider.count - 1;

      // remove slide
      if (isNaN(obj)) {
        $(obj, slider.slides).remove();
      } else {
        vertical && reverse
          ? slider.slides.eq(slider.last).remove()
          : slider.slides.eq(obj).remove();
      }

      // update currentSlide, animatingTo, controlNav, and directionNav
      slider.doMath();
      slider.update(pos, "remove");

      // update slider.slides
      slider.slides = $(vars.selector + ":not(.clone)", slider);
      // re-setup the slider to accomdate new slide
      slider.setup();

      // FlexSlider: removed() Callback
      vars.removed(slider);
    };

    //FlexSlider: Initialize
    methods.init();
  };

  //FlexSlider: Default Settings
  $.flexslider.defaults = {
    namespace: "flex-", //{NEW} String: Prefix string attached to the class of every element generated by the plugin
    selector: ".slides > li", //{NEW} Selector: Must match a simple pattern. '{container} > {slide}' -- Ignore pattern at your own peril
    animation: "fade", //String: Select your animation type, "fade" or "slide"
    easing: "swing", //{NEW} String: Determines the easing method used in jQuery transitions. jQuery easing plugin is supported!
    direction: "horizontal", //String: Select the sliding direction, "horizontal" or "vertical"
    reverse: false, //{NEW} Boolean: Reverse the animation direction
    animationLoop: true, //Boolean: Should the animation loop? If false, directionNav will received "disable" classes at either end
    smoothHeight: false, //{NEW} Boolean: Allow height of the slider to animate smoothly in horizontal mode
    startAt: 0, //Integer: The slide that the slider should start on. Array notation (0 = first slide)
    slideshow: true, //Boolean: Animate slider automatically
    slideshowSpeed: 7000, //Integer: Set the speed of the slideshow cycling, in milliseconds
    animationSpeed: 600, //Integer: Set the speed of animations, in milliseconds
    initDelay: 0, //{NEW} Integer: Set an initialization delay, in milliseconds
    randomize: false, //Boolean: Randomize slide order

    // Usability features
    pauseOnAction: true, //Boolean: Pause the slideshow when interacting with control elements, highly recommended.
    pauseOnHover: false, //Boolean: Pause the slideshow when hovering over slider, then resume when no longer hovering
    useCSS: true, //{NEW} Boolean: Slider will use CSS3 transitions if available
    touch: true, //{NEW} Boolean: Allow touch swipe navigation of the slider on touch-enabled devices
    video: false, //{NEW} Boolean: If using video in the slider, will prevent CSS3 3D Transforms to avoid graphical glitches

    // Primary Controls
    controlNav: true, //Boolean: Create navigation for paging control of each clide? Note: Leave true for manualControls usage
    directionNav: true, //Boolean: Create navigation for previous/next navigation? (true/false)
    prevText: "Previous", //String: Set the text for the "previous" directionNav item
    nextText: "Next", //String: Set the text for the "next" directionNav item

    // Secondary Navigation
    keyboard: true, //Boolean: Allow slider navigating via keyboard left/right keys
    multipleKeyboard: false, //{NEW} Boolean: Allow keyboard navigation to affect multiple sliders. Default behavior cuts out keyboard navigation with more than one slider present.
    mousewheel: false, //{UPDATED} Boolean: Requires jquery.mousewheel.js (https://github.com/brandonaaron/jquery-mousewheel) - Allows slider navigating via mousewheel
    pausePlay: false, //Boolean: Create pause/play dynamic element
    pauseText: "Pause", //String: Set the text for the "pause" pausePlay item
    playText: "Play", //String: Set the text for the "play" pausePlay item

    // Special properties
    controlsContainer: "", //{UPDATED} jQuery Object/Selector: Declare which container the navigation elements should be appended too. Default container is the FlexSlider element. Example use would be $(".flexslider-container"). Property is ignored if given element is not found.
    manualControls: "", //{UPDATED} jQuery Object/Selector: Declare custom control navigation. Examples would be $(".flex-control-nav li") or "#tabs-nav li img", etc. The number of elements in your controlNav should match the number of slides/tabs.
    sync: "", //{NEW} Selector: Mirror the actions performed on this slider with another slider. Use with care.
    asNavFor: "", //{NEW} Selector: Internal property exposed for turning the slider into a thumbnail navigation for another slider

    // Carousel Options
    itemWidth: 0, //{NEW} Integer: Box-model width of individual carousel items, including horizontal borders and padding.
    itemMargin: 0, //{NEW} Integer: Margin between carousel items.
    minItems: 0, //{NEW} Integer: Minimum number of carousel items that should be visible. Items will resize fluidly when below this.
    maxItems: 0, //{NEW} Integer: Maxmimum number of carousel items that should be visible. Items will resize fluidly when above this limit.
    move: 0, //{NEW} Integer: Number of carousel items that should move on animation. If 0, slider will move all visible items.

    // Callback API
    start: function () {}, //Callback: function(slider) - Fires when the slider loads the first slide
    before: function () {}, //Callback: function(slider) - Fires asynchronously with each slider animation
    after: function () {}, //Callback: function(slider) - Fires after each slider animation completes
    end: function () {}, //Callback: function(slider) - Fires when the slider reaches the last slide (asynchronous)
    added: function () {}, //{NEW} Callback: function(slider) - Fires after a slide is added
    removed: function () {}, //{NEW} Callback: function(slider) - Fires after a slide is removed
  };

  //FlexSlider: Plugin Function
  $.fn.flexslider = function (options) {
    if (options === undefined) options = {};

    if (typeof options === "object") {
      return this.each(function () {
        var $this = $(this),
          selector = options.selector ? options.selector : ".slides > li",
          $slides = $this.find(selector);

        if ($slides.length === 1) {
          $slides.fadeIn(400);
          if (options.start) options.start($this);
        } else if ($this.data("flexslider") == undefined) {
          new $.flexslider(this, options);
        }
      });
    } else {
      // Helper strings to quickly perform functions on the slider
      var $slider = $(this).data("flexslider");
      switch (options) {
        case "play":
          $slider.play();
          break;
        case "pause":
          $slider.pause();
          break;
        case "next":
          $slider.flexAnimate($slider.getTarget("next"), true);
          break;
        case "prev":
        case "previous":
          $slider.flexAnimate($slider.getTarget("prev"), true);
          break;
        default:
          if (typeof options === "number") $slider.flexAnimate(options, true);
      }
    }
  };
})(jQuery);


================================================
FILE: backend/tests/integration/tests/pruning/website/js/jquery.js
================================================
/*! jQuery v1.9.1 | (c) 2005, 2012 jQuery Foundation, Inc. | jquery.org/license
//@ sourceMappingURL=jquery.min.map
*/ (function (a, b) {
  function G(a) {
    var b = (F[a] = {});
    return (
      p.each(a.split(s), function (a, c) {
        b[c] = !0;
      }),
      b
    );
  }
  function J(a, c, d) {
    if (d === b && a.nodeType === 1) {
      var e = "data-" + c.replace(I, "-$1").toLowerCase();
      d = a.getAttribute(e);
      if (typeof d == "string") {
        try {
          d =
            d === "true"
              ? !0
              : d === "false"
                ? !1
                : d === "null"
                  ? null
                  : +d + "" === d
                    ? +d
                    : H.test(d)
                      ? p.parseJSON(d)
                      : d;
        } catch (f) {}
        p.data(a, c, d);
      } else d = b;
    }
    return d;
  }
  function K(a) {
    var b;
    for (b in a) {
      if (b === "data" && p.isEmptyObject(a[b])) continue;
      if (b !== "toJSON") return !1;
    }
    return !0;
  }
  function ba() {
    return !1;
  }
  function bb() {
    return !0;
  }
  function bh(a) {
    return !a || !a.parentNode || a.parentNode.nodeType === 11;
  }
  function bi(a, b) {
    do a = a[b];
    while (a && a.nodeType !== 1);
    return a;
  }
  function bj(a, b, c) {
    b = b || 0;
    if (p.isFunction(b))
      return p.grep(a, function (a, d) {
        var e = !!b.call(a, d, a);
        return e === c;
      });
    if (b.nodeType)
      return p.grep(a, function (a, d) {
        return (a === b) === c;
      });
    if (typeof b == "string") {
      var d = p.grep(a, function (a) {
        return a.nodeType === 1;
      });
      if (be.test(b)) return p.filter(b, d, !c);
      b = p.filter(b, d);
    }
    return p.grep(a, function (a, d) {
      return p.inArray(a, b) >= 0 === c;
    });
  }
  function bk(a) {
    var b = bl.split("|"),
      c = a.createDocumentFragment();
    if (c.createElement) while (b.length) c.createElement(b.pop());
    return c;
  }
  function bC(a, b) {
    return (
      a.getElementsByTagName(b)[0] ||
      a.appendChild(a.ownerDocument.createElement(b))
    );
  }
  function bD(a, b) {
    if (b.nodeType !== 1 || !p.hasData(a)) return;
    var c,
      d,
      e,
      f = p._data(a),
      g = p._data(b, f),
      h = f.events;
    if (h) {
      delete g.handle, (g.events = {});
      for (c in h)
        for (d = 0, e = h[c].length; d < e; d++) p.event.add(b, c, h[c][d]);
    }
    g.data && (g.data = p.extend({}, g.data));
  }
  function bE(a, b) {
    var c;
    if (b.nodeType !== 1) return;
    b.clearAttributes && b.clearAttributes(),
      b.mergeAttributes && b.mergeAttributes(a),
      (c = b.nodeName.toLowerCase()),
      c === "object"
        ? (b.parentNode && (b.outerHTML = a.outerHTML),
          p.support.html5Clone &&
            a.innerHTML &&
            !p.trim(b.innerHTML) &&
            (b.innerHTML = a.innerHTML))
        : c === "input" && bv.test(a.type)
          ? ((b.defaultChecked = b.checked = a.checked),
            b.value !== a.value && (b.value = a.value))
          : c === "option"
            ? (b.selected = a.defaultSelected)
            : c === "input" || c === "textarea"
              ? (b.defaultValue = a.defaultValue)
              : c === "script" && b.text !== a.text && (b.text = a.text),
      b.removeAttribute(p.expando);
  }
  function bF(a) {
    return typeof a.getElementsByTagName != "undefined"
      ? a.getElementsByTagName("*")
      : typeof a.querySelectorAll != "undefined"
        ? a.querySelectorAll("*")
        : [];
  }
  function bG(a) {
    bv.test(a.type) && (a.defaultChecked = a.checked);
  }
  function bY(a, b) {
    if (b in a) return b;
    var c = b.charAt(0).toUpperCase() + b.slice(1),
      d = b,
      e = bW.length;
    while (e--) {
      b = bW[e] + c;
      if (b in a) return b;
    }
    return d;
  }
  function bZ(a, b) {
    return (
      (a = b || a),
      p.css(a, "display") === "none" || !p.contains(a.ownerDocument, a)
    );
  }
  function b$(a, b) {
    var c,
      d,
      e = [],
      f = 0,
      g = a.length;
    for (; f < g; f++) {
      c = a[f];
      if (!c.style) continue;
      (e[f] = p._data(c, "olddisplay")),
        b
          ? (!e[f] && c.style.display === "none" && (c.style.display = ""),
            c.style.display === "" &&
              bZ(c) &&
              (e[f] = p._data(c, "olddisplay", cc(c.nodeName))))
          : ((d = bH(c, "display")),
            !e[f] && d !== "none" && p._data(c, "olddisplay", d));
    }
    for (f = 0; f < g; f++) {
      c = a[f];
      if (!c.style) continue;
      if (!b || c.style.display === "none" || c.style.display === "")
        c.style.display = b ? e[f] || "" : "none";
    }
    return a;
  }
  function b_(a, b, c) {
    var d = bP.exec(b);
    return d ? Math.max(0, d[1] - (c || 0)) + (d[2] || "px") : b;
  }
  function ca(a, b, c, d) {
    var e = c === (d ? "border" : "content") ? 4 : b === "width" ? 1 : 0,
      f = 0;
    for (; e < 4; e += 2)
      c === "margin" && (f += p.css(a, c + bV[e], !0)),
        d
          ? (c === "content" &&
              (f -= parseFloat(bH(a, "padding" + bV[e])) || 0),
            c !== "margin" &&
              (f -= parseFloat(bH(a, "border" + bV[e] + "Width")) || 0))
          : ((f += parseFloat(bH(a, "padding" + bV[e])) || 0),
            c !== "padding" &&
              (f += parseFloat(bH(a, "border" + bV[e] + "Width")) || 0));
    return f;
  }
  function cb(a, b, c) {
    var d = b === "width" ? a.offsetWidth : a.offsetHeight,
      e = !0,
      f = p.support.boxSizing && p.css(a, "boxSizing") === "border-box";
    if (d <= 0 || d == null) {
      d = bH(a, b);
      if (d < 0 || d == null) d = a.style[b];
      if (bQ.test(d)) return d;
      (e = f && (p.support.boxSizingReliable || d === a.style[b])),
        (d = parseFloat(d) || 0);
    }
    return d + ca(a, b, c || (f ? "border" : "content"), e) + "px";
  }
  function cc(a) {
    if (bS[a]) return bS[a];
    var b = p("<" + a + ">").appendTo(e.body),
      c = b.css("display");
    b.remove();
    if (c === "none" || c === "") {
      bI = e.body.appendChild(
        bI ||
          p.extend(e.createElement("iframe"), {
            frameBorder: 0,
            width: 0,
            height: 0,
          }),
      );
      if (!bJ || !bI.createElement)
        (bJ = (bI.contentWindow || bI.contentDocument).document),
          bJ.write("<!doctype html><html><body>"),
          bJ.close();
      (b = bJ.body.appendChild(bJ.createElement(a))),
        (c = bH(b, "display")),
        e.body.removeChild(bI);
    }
    return (bS[a] = c), c;
  }
  function ci(a, b, c, d) {
    var e;
    if (p.isArray(b))
      p.each(b, function (b, e) {
        c || ce.test(a)
          ? d(a, e)
          : ci(a + "[" + (typeof e == "object" ? b : "") + "]", e, c, d);
      });
    else if (!c && p.type(b) === "object")
      for (e in b) ci(a + "[" + e + "]", b[e], c, d);
    else d(a, b);
  }
  function cz(a) {
    return function (b, c) {
      typeof b != "string" && ((c = b), (b = "*"));
      var d,
        e,
        f,
        g = b.toLowerCase().split(s),
        h = 0,
        i = g.length;
      if (p.isFunction(c))
        for (; h < i; h++)
          (d = g[h]),
            (f = /^\+/.test(d)),
            f && (d = d.substr(1) || "*"),
            (e = a[d] = a[d] || []),
            e[f ? "unshift" : "push"](c);
    };
  }
  function cA(a, c, d, e, f, g) {
    (f = f || c.dataTypes[0]), (g = g || {}), (g[f] = !0);
    var h,
      i = a[f],
      j = 0,
      k = i ? i.length : 0,
      l = a === cv;
    for (; j < k && (l || !h); j++)
      (h = i[j](c, d, e)),
        typeof h == "string" &&
          (!l || g[h]
            ? (h = b)
            : (c.dataTypes.unshift(h), (h = cA(a, c, d, e, h, g))));
    return (l || !h) && !g["*"] && (h = cA(a, c, d, e, "*", g)), h;
  }
  function cB(a, c) {
    var d,
      e,
      f = p.ajaxSettings.flatOptions || {};
    for (d in c) c[d] !== b && ((f[d] ? a : e || (e = {}))[d] = c[d]);
    e && p.extend(!0, a, e);
  }
  function cC(a, c, d) {
    var e,
      f,
      g,
      h,
      i = a.contents,
      j = a.dataTypes,
      k = a.responseFields;
    for (f in k) f in d && (c[k[f]] = d[f]);
    while (j[0] === "*")
      j.shift(),
        e === b && (e = a.mimeType || c.getResponseHeader("content-type"));
    if (e)
      for (f in i)
        if (i[f] && i[f].test(e)) {
          j.unshift(f);
          break;
        }
    if (j[0] in d) g = j[0];
    else {
      for (f in d) {
        if (!j[0] || a.converters[f + " " + j[0]]) {
          g = f;
          break;
        }
        h || (h = f);
      }
      g = g || h;
    }
    if (g) return g !== j[0] && j.unshift(g), d[g];
  }
  function cD(a, b) {
    var c,
      d,
      e,
      f,
      g = a.dataTypes.slice(),
      h = g[0],
      i = {},
      j = 0;
    a.dataFilter && (b = a.dataFilter(b, a.dataType));
    if (g[1]) for (c in a.converters) i[c.toLowerCase()] = a.converters[c];
    for (; (e = g[++j]); )
      if (e !== "*") {
        if (h !== "*" && h !== e) {
          c = i[h + " " + e] || i["* " + e];
          if (!c)
            for (d in i) {
              f = d.split(" ");
              if (f[1] === e) {
                c = i[h + " " + f[0]] || i["* " + f[0]];
                if (c) {
                  c === !0
                    ? (c = i[d])
                    : i[d] !== !0 && ((e = f[0]), g.splice(j--, 0, e));
                  break;
                }
              }
            }
          if (c !== !0)
            if (c && a["throws"]) b = c(b);
            else
              try {
                b = c(b);
              } catch (k) {
                return {
                  state: "parsererror",
                  error: c ? k : "No conversion from " + h + " to " + e,
                };
              }
        }
        h = e;
      }
    return { state: "success", data: b };
  }
  function cL() {
    try {
      return new a.XMLHttpRequest();
    } catch (b) {}
  }
  function cM() {
    try {
      return new a.ActiveXObject("Microsoft.XMLHTTP");
    } catch (b) {}
  }
  function cU() {
    return (
      setTimeout(function () {
        cN = b;
      }, 0),
      (cN = p.now())
    );
  }
  function cV(a, b) {
    p.each(b, function (b, c) {
      var d = (cT[b] || []).concat(cT["*"]),
        e = 0,
        f = d.length;
      for (; e < f; e++) if (d[e].call(a, b, c)) return;
    });
  }
  function cW(a, b, c) {
    var d,
      e = 0,
      f = 0,
      g = cS.length,
      h = p.Deferred().always(function () {
        delete i.elem;
      }),
      i = function () {
        var b = cN || cU(),
          c = Math.max(0, j.startTime + j.duration - b),
          d = 1 - (c / j.duration || 0),
          e = 0,
          f = j.tweens.length;
        for (; e < f; e++) j.tweens[e].run(d);
        return (
          h.notifyWith(a, [j, d, c]),
          d < 1 && f ? c : (h.resolveWith(a, [j]), !1)
        );
      },
      j = h.promise({
        elem: a,
        props: p.extend({}, b),
        opts: p.extend(!0, { specialEasing: {} }, c),
        originalProperties: b,
        originalOptions: c,
        startTime: cN || cU(),
        duration: c.duration,
        tweens: [],
        createTween: function (b, c, d) {
          var e = p.Tween(
            a,
            j.opts,
            b,
            c,
            j.opts.specialEasing[b] || j.opts.easing,
          );
          return j.tweens.push(e), e;
        },
        stop: function (b) {
          var c = 0,
            d = b ? j.tweens.length : 0;
          for (; c < d; c++) j.tweens[c].run(1);
          return b ? h.resolveWith(a, [j, b]) : h.rejectWith(a, [j, b]), this;
        },
      }),
      k = j.props;
    cX(k, j.opts.specialEasing);
    for (; e < g; e++) {
      d = cS[e].call(j, a, k, j.opts);
      if (d) return d;
    }
    return (
      cV(j, k),
      p.isFunction(j.opts.start) && j.opts.start.call(a, j),
      p.fx.timer(p.extend(i, { anim: j, queue: j.opts.queue, elem: a })),
      j
        .progress(j.opts.progress)
        .done(j.opts.done, j.opts.complete)
        .fail(j.opts.fail)
        .always(j.opts.always)
    );
  }
  function cX(a, b) {
    var c, d, e, f, g;
    for (c in a) {
      (d = p.camelCase(c)),
        (e = b[d]),
        (f = a[c]),
        p.isArray(f) && ((e = f[1]), (f = a[c] = f[0])),
        c !== d && ((a[d] = f), delete a[c]),
        (g = p.cssHooks[d]);
      if (g && "expand" in g) {
        (f = g.expand(f)), delete a[d];
        for (c in f) c in a || ((a[c] = f[c]), (b[c] = e));
      } else b[d] = e;
    }
  }
  function cY(a, b, c) {
    var d,
      e,
      f,
      g,
      h,
      i,
      j,
      k,
      l = this,
      m = a.style,
      n = {},
      o = [],
      q = a.nodeType && bZ(a);
    c.queue ||
      ((j = p._queueHooks(a, "fx")),
      j.unqueued == null &&
        ((j.unqueued = 0),
        (k = j.empty.fire),
        (j.empty.fire = function () {
          j.unqueued || k();
        })),
      j.unqueued++,
      l.always(function () {
        l.always(function () {
          j.unqueued--, p.queue(a, "fx").length || j.empty.fire();
        });
      })),
      a.nodeType === 1 &&
        ("height" in b || "width" in b) &&
        ((c.overflow = [m.overflow, m.overflowX, m.overflowY]),
        p.css(a, "display") === "inline" &&
          p.css(a, "float") === "none" &&
          (!p.support.inlineBlockNeedsLayout || cc(a.nodeName) === "inline"
            ? (m.display = "inline-block")
            : (m.zoom = 1))),
      c.overflow &&
        ((m.overflow = "hidden"),
        p.support.shrinkWrapBlocks ||
          l.done(function () {
            (m.overflow = c.overflow[0]),
              (m.overflowX = c.overflow[1]),
              (m.overflowY = c.overflow[2]);
          }));
    for (d in b) {
      f = b[d];
      if (cP.exec(f)) {
        delete b[d];
        if (f === (q ? "hide" : "show")) continue;
        o.push(d);
      }
    }
    g = o.length;
    if (g) {
      (h = p._data(a, "fxshow") || p._data(a, "fxshow", {})),
        q
          ? p(a).show()
          : l.done(function () {
              p(a).hide();
            }),
        l.done(function () {
          var b;
          p.removeData(a, "fxshow", !0);
          for (b in n) p.style(a, b, n[b]);
        });
      for (d = 0; d < g; d++)
        (e = o[d]),
          (i = l.createTween(e, q ? h[e] : 0)),
          (n[e] = h[e] || p.style(a, e)),
          e in h ||
            ((h[e] = i.start),
            q &&
              ((i.end = i.start),
              (i.start = e === "width" || e === "height" ? 1 : 0)));
    }
  }
  function cZ(a, b, c, d, e) {
    return new cZ.prototype.init(a, b, c, d, e);
  }
  function c$(a, b) {
    var c,
      d = { height: a },
      e = 0;
    b = b ? 1 : 0;
    for (; e < 4; e += 2 - b)
      (c = bV[e]), (d["margin" + c] = d["padding" + c] = a);
    return b && (d.opacity = d.width = a), d;
  }
  function da(a) {
    return p.isWindow(a)
      ? a
      : a.nodeType === 9
        ? a.defaultView || a.parentWindow
        : !1;
  }
  var c,
    d,
    e = a.document,
    f = a.location,
    g = a.navigator,
    h = a.jQuery,
    i = a.$,
    j = Array.prototype.push,
    k = Array.prototype.slice,
    l = Array.prototype.indexOf,
    m = Object.prototype.toString,
    n = Object.prototype.hasOwnProperty,
    o = String.prototype.trim,
    p = function (a, b) {
      return new p.fn.init(a, b, c);
    },
    q = /[\-+]?(?:\d*\.|)\d+(?:[eE][\-+]?\d+|)/.source,
    r = /\S/,
    s = /\s+/,
    t = /^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,
    u = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/,
    v = /^<(\w+)\s*\/?>(?:<\/\1>|)$/,
    w = /^[\],:{}\s]*$/,
    x = /(?:^|:|,)(?:\s*\[)+/g,
    y = /\\(?:["\\\/bfnrt]|u[\da-fA-F]{4})/g,
    z = /"[^"\\\r\n]*"|true|false|null|-?(?:\d\d*\.|)\d+(?:[eE][\-+]?\d+|)/g,
    A = /^-ms-/,
    B = /-([\da-z])/gi,
    C = function (a, b) {
      return (b + "").toUpperCase();
    },
    D = function () {
      e.addEventListener
        ? (e.removeEventListener("DOMContentLoaded", D, !1), p.ready())
        : e.readyState === "complete" &&
          (e.detachEvent("onreadystatechange", D), p.ready());
    },
    E = {};
  (p.fn = p.prototype =
    {
      constructor: p,
      init: function (a, c, d) {
        var f, g, h, i;
        if (!a) return this;
        if (a.nodeType)
          return (this.context = this[0] = a), (this.length = 1), this;
        if (typeof a == "string") {
          a.charAt(0) === "<" && a.charAt(a.length - 1) === ">" && a.length >= 3
            ? (f = [null, a, null])
            : (f = u.exec(a));
          if (f && (f[1] || !c)) {
            if (f[1])
              return (
                (c = c instanceof p ? c[0] : c),
                (i = c && c.nodeType ? c.ownerDocument || c : e),
                (a = p.parseHTML(f[1], i, !0)),
                v.test(f[1]) && p.isPlainObject(c) && this.attr.call(a, c, !0),
                p.merge(this, a)
              );
            g = e.getElementById(f[2]);
            if (g && g.parentNode) {
              if (g.id !== f[2]) return d.find(a);
              (this.length = 1), (this[0] = g);
            }
            return (this.context = e), (this.selector = a), this;
          }
          return !c || c.jquery
            ? (c || d).find(a)
            : this.constructor(c).find(a);
        }
        return p.isFunction(a)
          ? d.ready(a)
          : (a.selector !== b &&
              ((this.selector = a.selector), (this.context = a.context)),
            p.makeArray(a, this));
      },
      selector: "",
      jquery: "1.8.1",
      length: 0,
      size: function () {
        return this.length;
      },
      toArray: function () {
        return k.call(this);
      },
      get: function (a) {
        return a == null
          ? this.toArray()
          : a < 0
            ? this[this.length + a]
            : this[a];
      },
      pushStack: function (a, b, c) {
        var d = p.merge(this.constructor(), a);
        return (
          (d.prevObject = this),
          (d.context = this.context),
          b === "find"
            ? (d.selector = this.selector + (this.selector ? " " : "") + c)
            : b && (d.selector = this.selector + "." + b + "(" + c + ")"),
          d
        );
      },
      each: function (a, b) {
        return p.each(this, a, b);
      },
      ready: function (a) {
        return p.ready.promise().done(a), this;
      },
      eq: function (a) {
        return (a = +a), a === -1 ? this.slice(a) : this.slice(a, a + 1);
      },
      first: function () {
        return this.eq(0);
      },
      last: function () {
        return this.eq(-1);
      },
      slice: function () {
        return this.pushStack(
          k.apply(this, arguments),
          "slice",
          k.call(arguments).join(","),
        );
      },
      map: function (a) {
        return this.pushStack(
          p.map(this, function (b, c) {
            return a.call(b, c, b);
          }),
        );
      },
      end: function () {
        return this.prevObject || this.constructor(null);
      },
      push: j,
      sort: [].sort,
      splice: [].splice,
    }),
    (p.fn.init.prototype = p.fn),
    (p.extend = p.fn.extend =
      function () {
        var a,
          c,
          d,
          e,
          f,
          g,
          h = arguments[0] || {},
          i = 1,
          j = arguments.length,
          k = !1;
        typeof h == "boolean" && ((k = h), (h = arguments[1] || {}), (i = 2)),
          typeof h != "object" && !p.isFunction(h) && (h = {}),
          j === i && ((h = this), --i);
        for (; i < j; i++)
          if ((a = arguments[i]) != null)
            for (c in a) {
              (d = h[c]), (e = a[c]);
              if (h === e) continue;
              k && e && (p.isPlainObject(e) || (f = p.isArray(e)))
                ? (f
                    ? ((f = !1), (g = d && p.isArray(d) ? d : []))
                    : (g = d && p.isPlainObject(d) ? d : {}),
                  (h[c] = p.extend(k, g, e)))
                : e !== b && (h[c] = e);
            }
        return h;
      }),
    p.extend({
      noConflict: function (b) {
        return a.$ === p && (a.$ = i), b && a.jQuery === p && (a.jQuery = h), p;
      },
      isReady: !1,
      readyWait: 1,
      holdReady: function (a) {
        a ? p.readyWait++ : p.ready(!0);
      },
      ready: function (a) {
        if (a === !0 ? --p.readyWait : p.isReady) return;
        if (!e.body) return setTimeout(p.ready, 1);
        p.isReady = !0;
        if (a !== !0 && --p.readyWait > 0) return;
        d.resolveWith(e, [p]),
          p.fn.trigger && p(e).trigger("ready").off("ready");
      },
      isFunction: function (a) {
        return p.type(a) === "function";
      },
      isArray:
        Array.isArray ||
        function (a) {
          return p.type(a) === "array";
        },
      isWindow: function (a) {
        return a != null && a == a.window;
      },
      isNumeric: function (a) {
        return !isNaN(parseFloat(a)) && isFinite(a);
      },
      type: function (a) {
        return a == null ? String(a) : E[m.call(a)] || "object";
      },
      isPlainObject: function (a) {
        if (!a || p.type(a) !== "object" || a.nodeType || p.isWindow(a))
          return !1;
        try {
          if (
            a.constructor &&
            !n.call(a, "constructor") &&
            !n.call(a.constructor.prototype, "isPrototypeOf")
          )
            return !1;
        } catch (c) {
          return !1;
        }
        var d;
        for (d in a);
        return d === b || n.call(a, d);
      },
      isEmptyObject: function (a) {
        var b;
        for (b in a) return !1;
        return !0;
      },
      error: function (a) {
        throw new Error(a);
      },
      parseHTML: function (a, b, c) {
        var d;
        return !a || typeof a != "string"
          ? null
          : (typeof b == "boolean" && ((c = b), (b = 0)),
            (b = b || e),
            (d = v.exec(a))
              ? [b.createElement(d[1])]
              : ((d = p.buildFragment([a], b, c ? null : [])),
                p.merge(
                  [],
                  (d.cacheable ? p.clone(d.fragment) : d.fragment).childNodes,
                )));
      },
      parseJSON: function (b) {
        if (!b || typeof b != "string") return null;
        b = p.trim(b);
        if (a.JSON && a.JSON.parse) return a.JSON.parse(b);
        if (w.test(b.replace(y, "@").replace(z, "]").replace(x, "")))
          return new Function("return " + b)();
        p.error("Invalid JSON: " + b);
      },
      parseXML: function (c) {
        var d, e;
        if (!c || typeof c != "string") return null;
        try {
          a.DOMParser
            ? ((e = new DOMParser()), (d = e.parseFromString(c, "text/xml")))
            : ((d = new ActiveXObject("Microsoft.XMLDOM")),
              (d.async = "false"),
              d.loadXML(c));
        } catch (f) {
          d = b;
        }
        return (
          (!d ||
            !d.documentElement ||
            d.getElementsByTagName("parsererror").length) &&
            p.error("Invalid XML: " + c),
          d
        );
      },
      noop: function () {},
      globalEval: function (b) {
        b &&
          r.test(b) &&
          (
            a.execScript ||
            function (b) {
              a.eval.call(a, b);
            }
          )(b);
      },
      camelCase: function (a) {
        return a.replace(A, "ms-").replace(B, C);
      },
      nodeName: function (a, b) {
        return a.nodeName && a.nodeName.toUpperCase() === b.toUpperCase();
      },
      each: function (a, c, d) {
        var e,
          f = 0,
          g = a.length,
          h = g === b || p.isFunction(a);
        if (d) {
          if (h) {
            for (e in a) if (c.apply(a[e], d) === !1) break;
          } else for (; f < g; ) if (c.apply(a[f++], d) === !1) break;
        } else if (h) {
          for (e in a) if (c.call(a[e], e, a[e]) === !1) break;
        } else for (; f < g; ) if (c.call(a[f], f, a[f++]) === !1) break;
        return a;
      },
      trim:
        o && !o.call("﻿ ")
          ? function (a) {
              return a == null ? "" : o.call(a);
            }
          : function (a) {
              return a == null ? "" : a.toString().replace(t, "");
            },
      makeArray: function (a, b) {
        var c,
          d = b || [];
        return (
          a != null &&
            ((c = p.type(a)),
            a.length == null ||
            c === "string" ||
            c === "function" ||
            c === "regexp" ||
            p.isWindow(a)
              ? j.call(d, a)
              : p.merge(d, a)),
          d
        );
      },
      inArray: function (a, b, c) {
        var d;
        if (b) {
          if (l) return l.call(b, a, c);
          (d = b.length), (c = c ? (c < 0 ? Math.max(0, d + c) : c) : 0);
          for (; c < d; c++) if (c in b && b[c] === a) return c;
        }
        return -1;
      },
      merge: function (a, c) {
        var d = c.length,
          e = a.length,
          f = 0;
        if (typeof d == "number") for (; f < d; f++) a[e++] = c[f];
        else while (c[f] !== b) a[e++] = c[f++];
        return (a.length = e), a;
      },
      grep: function (a, b, c) {
        var d,
          e = [],
          f = 0,
          g = a.length;
        c = !!c;
        for (; f < g; f++) (d = !!b(a[f], f)), c !== d && e.push(a[f]);
        return e;
      },
      map: function (a, c, d) {
        var e,
          f,
          g = [],
          h = 0,
          i = a.length,
          j =
            a instanceof p ||
            (i !== b &&
              typeof i == "number" &&
              ((i > 0 && a[0] && a[i - 1]) || i === 0 || p.isArray(a)));
        if (j)
          for (; h < i; h++)
            (e = c(a[h], h, d)), e != null && (g[g.length] = e);
        else for (f in a) (e = c(a[f], f, d)), e != null && (g[g.length] = e);
        return g.concat.apply([], g);
      },
      guid: 1,
      proxy: function (a, c) {
        var d, e, f;
        return (
          typeof c == "string" && ((d = a[c]), (c = a), (a = d)),
          p.isFunction(a)
            ? ((e = k.call(arguments, 2)),
              (f = function () {
                return a.apply(c, e.concat(k.call(arguments)));
              }),
              (f.guid = a.guid = a.guid || f.guid || p.guid++),
              f)
            : b
        );
      },
      access: function (a, c, d, e, f, g, h) {
        var i,
          j = d == null,
          k = 0,
          l = a.length;
        if (d && typeof d == "object") {
          for (k in d) p.access(a, c, k, d[k], 1, g, e);
          f = 1;
        } else if (e !== b) {
          (i = h === b && p.isFunction(e)),
            j &&
              (i
                ? ((i = c),
                  (c = function (a, b, c) {
                    return i.call(p(a), c);
                  }))
                : (c.call(a, e), (c = null)));
          if (c)
            for (; k < l; k++)
              c(a[k], d, i ? e.call(a[k], k, c(a[k], d)) : e, h);
          f = 1;
        }
        return f ? a : j ? c.call(a) : l ? c(a[0], d) : g;
      },
      now: function () {
        return new Date().getTime();
      },
    }),
    (p.ready.promise = function (b) {
      if (!d) {
        d = p.Deferred();
        if (e.readyState === "complete") setTimeout(p.ready, 1);
        else if (e.addEventListener)
          e.addEventListener("DOMContentLoaded", D, !1),
            a.addEventListener("load", p.ready, !1);
        else {
          e.attachEvent("onreadystatechange", D),
            a.attachEvent("onload", p.ready);
          var c = !1;
          try {
            c = a.frameElement == null && e.documentElement;
          } catch (f) {}
          c &&
            c.doScroll &&
            (function g() {
              if (!p.isReady) {
                try {
                  c.doScroll("left");
                } catch (a) {
                  return setTimeout(g, 50);
                }
                p.ready();
              }
            })();
        }
      }
      return d.promise(b);
    }),
    p.each(
      "Boolean Number String Function Array Date RegExp Object".split(" "),
      function (a, b) {
        E["[object " + b + "]"] = b.toLowerCase();
      },
    ),
    (c = p(e));
  var F = {};
  (p.Callbacks = function (a) {
    a = typeof a == "string" ? F[a] || G(a) : p.extend({}, a);
    var c,
      d,
      e,
      f,
      g,
      h,
      i = [],
      j = !a.once && [],
      k = function (b) {
        (c = a.memory && b),
          (d = !0),
          (h = f || 0),
          (f = 0),
          (g = i.length),
          (e = !0);
        for (; i && h < g; h++)
          if (i[h].apply(b[0], b[1]) === !1 && a.stopOnFalse) {
            c = !1;
            break;
          }
        (e = !1),
          i && (j ? j.length && k(j.shift()) : c ? (i = []) : l.disable());
      },
      l = {
        add: function () {
          if (i) {
            var b = i.length;
            (function d(b) {
              p.each(b, function (b, c) {
                var e = p.type(c);
                e === "function" && (!a.unique || !l.has(c))
                  ? i.push(c)
                  : c && c.length && e !== "string" && d(c);
              });
            })(arguments),
              e ? (g = i.length) : c && ((f = b), k(c));
          }
          return this;
        },
        remove: function () {
          return (
            i &&
              p.each(arguments, function (a, b) {
                var c;
                while ((c = p.inArray(b, i, c)) > -1)
                  i.splice(c, 1), e && (c <= g && g--, c <= h && h--);
              }),
            this
          );
        },
        has: function (a) {
          return p.inArray(a, i) > -1;
        },
        empty: function () {
          return (i = []), this;
        },
        disable: function () {
          return (i = j = c = b), this;
        },
        disabled: function () {
          return !i;
        },
        lock: function () {
          return (j = b), c || l.disable(), this;
        },
        locked: function () {
          return !j;
        },
        fireWith: function (a, b) {
          return (
            (b = b || []),
            (b = [a, b.slice ? b.slice() : b]),
            i && (!d || j) && (e ? j.push(b) : k(b)),
            this
          );
        },
        fire: function () {
          return l.fireWith(this, arguments), this;
        },
        fired: function () {
          return !!d;
        },
      };
    return l;
  }),
    p.extend({
      Deferred: function (a) {
        var b = [
            ["resolve", "done", p.Callbacks("once memory"), "resolved"],
            ["reject", "fail", p.Callbacks("once memory"), "rejected"],
            ["notify", "progress", p.Callbacks("memory")],
          ],
          c = "pending",
          d = {
            state: function () {
              return c;
            },
            always: function () {
              return e.done(arguments).fail(arguments), this;
            },
            then: function () {
              var a = arguments;
              return p
                .Deferred(function (c) {
                  p.each(b, function (b, d) {
                    var f = d[0],
                      g = a[b];
                    e[d[1]](
                      p.isFunction(g)
                        ? function () {
                            var a = g.apply(this, arguments);
                            a && p.isFunction(a.promise)
                              ? a
                                  .promise()
                                  .done(c.resolve)
                                  .fail(c.reject)
                                  .progress(c.notify)
                              : c[f + "With"](this === e ? c : this, [a]);
                          }
                        : c[f],
                    );
                  }),
                    (a = null);
                })
                .promise();
            },
            promise: function (a) {
              return typeof a == "object" ? p.extend(a, d) : d;
            },
          },
          e = {};
        return (
          (d.pipe = d.then),
          p.each(b, function (a, f) {
            var g = f[2],
              h = f[3];
            (d[f[1]] = g.add),
              h &&
                g.add(
                  function () {
                    c = h;
                  },
                  b[a ^ 1][2].disable,
                  b[2][2].lock,
                ),
              (e[f[0]] = g.fire),
              (e[f[0] + "With"] = g.fireWith);
          }),
          d.promise(e),
          a && a.call(e, e),
          e
        );
      },
      when: function (a) {
        var b = 0,
          c = k.call(arguments),
          d = c.length,
          e = d !== 1 || (a && p.isFunction(a.promise)) ? d : 0,
          f = e === 1 ? a : p.Deferred(),
          g = function (a, b, c) {
            return function (d) {
              (b[a] = this),
                (c[a] = arguments.length > 1 ? k.call(arguments) : d),
                c === h ? f.notifyWith(b, c) : --e || f.resolveWith(b, c);
            };
          },
          h,
          i,
          j;
        if (d > 1) {
          (h = new Array(d)), (i = new Array(d)), (j = new Array(d));
          for (; b < d; b++)
            c[b] && p.isFunction(c[b].promise)
              ? c[b]
                  .promise()
                  .done(g(b, j, c))
                  .fail(f.reject)
                  .progress(g(b, i, h))
              : --e;
        }
        return e || f.resolveWith(j, c), f.promise();
      },
    }),
    (p.support = (function () {
      var b,
        c,
        d,
        f,
        g,
        h,
        i,
        j,
        k,
        l,
        m,
        n = e.createElement("div");
      n.setAttribute("className", "t"),
        (n.innerHTML =
          "  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>"),
        (c = n.getElementsByTagName("*")),
        (d = n.getElementsByTagName("a")[0]),
        (d.style.cssText = "top:1px;float:left;opacity:.5");
      if (!c || !c.length || !d) return {};
      (f = e.createElement("select")),
        (g = f.appendChild(e.createElement("option"))),
        (h = n.getElementsByTagName("input")[0]),
        (b = {
          leadingWhitespace: n.firstChild.nodeType === 3,
          tbody: !n.getElementsByTagName("tbody").length,
          htmlSerialize: !!n.getElementsByTagName("link").length,
          style: /top/.test(d.getAttribute("style")),
          hrefNormalized: d.getAttribute("href") === "/a",
          opacity: /^0.5/.test(d.style.opacity),
          cssFloat: !!d.style.cssFloat,
          checkOn: h.value === "on",
          optSelected: g.selected,
          getSetAttribute: n.className !== "t",
          enctype: !!e.createElement("form").enctype,
          html5Clone:
            e.createElement("nav").cloneNode(!0).outerHTML !== "<:nav></:nav>",
          boxModel: e.compatMode === "CSS1Compat",
          submitBubbles: !0,
          changeBubbles: !0,
          focusinBubbles: !1,
          deleteExpando: !0,
          noCloneEvent: !0,
          inlineBlockNeedsLayout: !1,
          shrinkWrapBlocks: !1,
          reliableMarginRight: !0,
          boxSizingReliable: !0,
          pixelPosition: !1,
        }),
        (h.checked = !0),
        (b.noCloneChecked = h.cloneNode(!0).checked),
        (f.disabled = !0),
        (b.optDisabled = !g.disabled);
      try {
        delete n.test;
      } catch (o) {
        b.deleteExpando = !1;
      }
      !n.addEventListener &&
        n.attachEvent &&
        n.fireEvent &&
        (n.attachEvent(
          "onclick",
          (m = function () {
            b.noCloneEvent = !1;
          }),
        ),
        n.cloneNode(!0).fireEvent("onclick"),
        n.detachEvent("onclick", m)),
        (h = e.createElement("input")),
        (h.value = "t"),
        h.setAttribute("type", "radio"),
        (b.radioValue = h.value === "t"),
        h.setAttribute("checked", "checked"),
        h.setAttribute("name", "t"),
        n.appendChild(h),
        (i = e.createDocumentFragment()),
        i.appendChild(n.lastChild),
        (b.checkClone = i.cloneNode(!0).cloneNode(!0).lastChild.checked),
        (b.appendChecked = h.checked),
        i.removeChild(h),
        i.appendChild(n);
      if (n.attachEvent)
        for (k in { submit: !0, change: !0, focusin: !0 })
          (j = "on" + k),
            (l = j in n),
            l ||
              (n.setAttribute(j, "return;"), (l = typeof n[j] == "function")),
            (b[k + "Bubbles"] = l);
      return (
        p(function () {
          var c,
            d,
            f,
            g,
            h = "padding:0;margin:0;border:0;display:block;overflow:hidden;",
            i = e.getElementsByTagName("body")[0];
          if (!i) return;
          (c = e.createElement("div")),
            (c.style.cssText =
              "visibility:hidden;border:0;width:0;height:0;position:static;top:0;margin-top:1px"),
            i.insertBefore(c, i.firstChild),
            (d = e.createElement("div")),
            c.appendChild(d),
            (d.innerHTML = "<table><tr><td></td><td>t</td></tr></table>"),
            (f = d.getElementsByTagName("td")),
            (f[0].style.cssText = "padding:0;margin:0;border:0;display:none"),
            (l = f[0].offsetHeight === 0),
            (f[0].style.display = ""),
            (f[1].style.display = "none"),
            (b.reliableHiddenOffsets = l && f[0].offsetHeight === 0),
            (d.innerHTML = ""),
            (d.style.cssText =
              "box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;padding:1px;border:1px;display:block;width:4px;margin-top:1%;position:absolute;top:1%;"),
            (b.boxSizing = d.offsetWidth === 4),
            (b.doesNotIncludeMarginInBodyOffset = i.offsetTop !== 1),
            a.getComputedStyle &&
              ((b.pixelPosition =
                (a.getComputedStyle(d, null) || {}).top !== "1%"),
              (b.boxSizingReliable =
                (a.getComputedStyle(d, null) || { width: "4px" }).width ===
                "4px"),
              (g = e.createElement("div")),
              (g.style.cssText = d.style.cssText = h),
              (g.style.marginRight = g.style.width = "0"),
              (d.style.width = "1px"),
              d.appendChild(g),
              (b.reliableMarginRight = !parseFloat(
                (a.getComputedStyle(g, null) || {}).marginRight,
              ))),
            typeof d.style.zoom != "undefined" &&
              ((d.innerHTML = ""),
              (d.style.cssText =
                h + "width:1px;padding:1px;display:inline;zoom:1"),
              (b.inlineBlockNeedsLayout = d.offsetWidth === 3),
              (d.style.display = "block"),
              (d.style.overflow = "visible"),
              (d.innerHTML = "<div></div>"),
              (d.firstChild.style.width = "5px"),
              (b.shrinkWrapBlocks = d.offsetWidth !== 3),
              (c.style.zoom = 1)),
            i.removeChild(c),
            (c = d = f = g = null);
        }),
        i.removeChild(n),
        (c = d = f = g = h = i = n = null),
        b
      );
    })());
  var H = /(?:\{[\s\S]*\}|\[[\s\S]*\])$/,
    I = /([A-Z])/g;
  p.extend({
    cache: {},
    deletedIds: [],
    uuid: 0,
    expando: "jQuery" + (p.fn.jquery + Math.random()).replace(/\D/g, ""),
    noData: {
      embed: !0,
      object: "clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",
      applet: !0,
    },
    hasData: function (a) {
      return (
        (a = a.nodeType ? p.cache[a[p.expando]] : a[p.expando]), !!a && !K(a)
      );
    },
    data: function (a, c, d, e) {
      if (!p.acceptData(a)) return;
      var f,
        g,
        h = p.expando,
        i = typeof c == "string",
        j = a.nodeType,
        k = j ? p.cache : a,
        l = j ? a[h] : a[h] && h;
      if ((!l || !k[l] || (!e && !k[l].data)) && i && d === b) return;
      l || (j ? (a[h] = l = p.deletedIds.pop() || ++p.uuid) : (l = h)),
        k[l] || ((k[l] = {}), j || (k[l].toJSON = p.noop));
      if (typeof c == "object" || typeof c == "function")
        e ? (k[l] = p.extend(k[l], c)) : (k[l].data = p.extend(k[l].data, c));
      return (
        (f = k[l]),
        e || (f.data || (f.data = {}), (f = f.data)),
        d !== b && (f[p.camelCase(c)] = d),
        i ? ((g = f[c]), g == null && (g = f[p.camelCase(c)])) : (g = f),
        g
      );
    },
    removeData: function (a, b, c) {
      if (!p.acceptData(a)) return;
      var d,
        e,
        f,
        g = a.nodeType,
        h = g ? p.cache : a,
        i = g ? a[p.expando] : p.expando;
      if (!h[i]) return;
      if (b) {
        d = c ? h[i] : h[i].data;
        if (d) {
          p.isArray(b) ||
            (b in d
              ? (b = [b])
              : ((b = p.camelCase(b)),
                b in d ? (b = [b]) : (b = b.split(" "))));
          for (e = 0, f = b.length; e < f; e++) delete d[b[e]];
          if (!(c ? K : p.isEmptyObject)(d)) return;
        }
      }
      if (!c) {
        delete h[i].data;
        if (!K(h[i])) return;
      }
      g
        ? p.cleanData([a], !0)
        : p.support.deleteExpando || h != h.window
          ? delete h[i]
          : (h[i] = null);
    },
    _data: function (a, b, c) {
      return p.data(a, b, c, !0);
    },
    acceptData: function (a) {
      var b = a.nodeName && p.noData[a.nodeName.toLowerCase()];
      return !b || (b !== !0 && a.getAttribute("classid") === b);
    },
  }),
    p.fn.extend({
      data: function (a, c) {
        var d,
          e,
          f,
          g,
          h,
          i = this[0],
          j = 0,
          k = null;
        if (a === b) {
          if (this.length) {
            k = p.data(i);
            if (i.nodeType === 1 && !p._data(i, "parsedAttrs")) {
              f = i.attributes;
              for (h = f.length; j < h; j++)
                (g = f[j].name),
                  g.indexOf("data-") === 0 &&
                    ((g = p.camelCase(g.substring(5))), J(i, g, k[g]));
              p._data(i, "parsedAttrs", !0);
            }
          }
          return k;
        }
        return typeof a == "object"
          ? this.each(function () {
              p.data(this, a);
            })
          : ((d = a.split(".", 2)),
            (d[1] = d[1] ? "." + d[1] : ""),
            (e = d[1] + "!"),
            p.access(
              this,
              function (c) {
                if (c === b)
                  return (
                    (k = this.triggerHandler("getData" + e, [d[0]])),
                    k === b && i && ((k = p.data(i, a)), (k = J(i, a, k))),
                    k === b && d[1] ? this.data(d[0]) : k
                  );
                (d[1] = c),
                  this.each(function () {
                    var b = p(this);
                    b.triggerHandler("setData" + e, d),
                      p.data(this, a, c),
                      b.triggerHandler("changeData" + e, d);
                  });
              },
              null,
              c,
              arguments.length > 1,
              null,
              !1,
            ));
      },
      removeData: function (a) {
        return this.each(function () {
          p.removeData(this, a);
        });
      },
    }),
    p.extend({
      queue: function (a, b, c) {
        var d;
        if (a)
          return (
            (b = (b || "fx") + "queue"),
            (d = p._data(a, b)),
            c &&
              (!d || p.isArray(c)
                ? (d = p._data(a, b, p.makeArray(c)))
                : d.push(c)),
            d || []
          );
      },
      dequeue: function (a, b) {
        b = b || "fx";
        var c = p.queue(a, b),
          d = c.length,
          e = c.shift(),
          f = p._queueHooks(a, b),
          g = function () {
            p.dequeue(a, b);
          };
        e === "inprogress" && ((e = c.shift()), d--),
          e &&
            (b === "fx" && c.unshift("inprogress"),
            delete f.stop,
            e.call(a, g, f)),
          !d && f && f.empty.fire();
      },
      _queueHooks: function (a, b) {
        var c = b + "queueHooks";
        return (
          p._data(a, c) ||
          p._data(a, c, {
            empty: p.Callbacks("once memory").add(function () {
              p.removeData(a, b + "queue", !0), p.removeData(a, c, !0);
            }),
          })
        );
      },
    }),
    p.fn.extend({
      queue: function (a, c) {
        var d = 2;
        return (
          typeof a != "string" && ((c = a), (a = "fx"), d--),
          arguments.length < d
            ? p.queue(this[0], a)
            : c === b
              ? this
              : this.each(function () {
                  var b = p.queue(this, a, c);
                  p._queueHooks(this, a),
                    a === "fx" && b[0] !== "inprogress" && p.dequeue(this, a);
                })
        );
      },
      dequeue: function (a) {
        return this.each(function () {
          p.dequeue(this, a);
        });
      },
      delay: function (a, b) {
        return (
          (a = p.fx ? p.fx.speeds[a] || a : a),
          (b = b || "fx"),
          this.queue(b, function (b, c) {
            var d = setTimeout(b, a);
            c.stop = function () {
              clearTimeout(d);
            };
          })
        );
      },
      clearQueue: function (a) {
        return this.queue(a || "fx", []);
      },
      promise: function (a, c) {
        var d,
          e = 1,
          f = p.Deferred(),
          g = this,
          h = this.length,
          i = function () {
            --e || f.resolveWith(g, [g]);
          };
        typeof a != "string" && ((c = a), (a = b)), (a = a || "fx");
        while (h--)
          (d = p._data(g[h], a + "queueHooks")),
            d && d.empty && (e++, d.empty.add(i));
        return i(), f.promise(c);
      },
    });
  var L,
    M,
    N,
    O = /[\t\r\n]/g,
    P = /\r/g,
    Q = /^(?:button|input)$/i,
    R = /^(?:button|input|object|select|textarea)$/i,
    S = /^a(?:rea|)$/i,
    T =
      /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i,
    U = p.support.getSetAttribute;
  p.fn.extend({
    attr: function (a, b) {
      return p.access(this, p.attr, a, b, arguments.length > 1);
    },
    removeAttr: function (a) {
      return this.each(function () {
        p.removeAttr(this, a);
      });
    },
    prop: function (a, b) {
      return p.access(this, p.prop, a, b, arguments.length > 1);
    },
    removeProp: function (a) {
      return (
        (a = p.propFix[a] || a),
        this.each(function () {
          try {
            (this[a] = b), delete this[a];
          } catch (c) {}
        })
      );
    },
    addClass: function (a) {
      var b, c, d, e, f, g, h;
      if (p.isFunction(a))
        return this.each(function (b) {
          p(this).addClass(a.call(this, b, this.className));
        });
      if (a && typeof a == "string") {
        b = a.split(s);
        for (c = 0, d = this.length; c < d; c++) {
          e = this[c];
          if (e.nodeType === 1)
            if (!e.className && b.length === 1) e.className = a;
            else {
              f = " " + e.className + " ";
              for (g = 0, h = b.length; g < h; g++)
                ~f.indexOf(" " + b[g] + " ") || (f += b[g] + " ");
              e.className = p.trim(f);
            }
        }
      }
      return this;
    },
    removeClass: function (a) {
      var c, d, e, f, g, h, i;
      if (p.isFunction(a))
        return this.each(function (b) {
          p(this).removeClass(a.call(this, b, this.className));
        });
      if ((a && typeof a == "string") || a === b) {
        c = (a || "").split(s);
        for (h = 0, i = this.length; h < i; h++) {
          e = this[h];
          if (e.nodeType === 1 && e.className) {
            d = (" " + e.className + " ").replace(O, " ");
            for (f = 0, g = c.length; f < g; f++)
              while (d.indexOf(" " + c[f] + " ") > -1)
                d = d.replace(" " + c[f] + " ", " ");
            e.className = a ? p.trim(d) : "";
          }
        }
      }
      return this;
    },
    toggleClass: function (a, b) {
      var c = typeof a,
        d = typeof b == "boolean";
      return p.isFunction(a)
        ? this.each(function (c) {
            p(this).toggleClass(a.call(this, c, this.className, b), b);
          })
        : this.each(function () {
            if (c === "string") {
              var e,
                f = 0,
                g = p(this),
                h = b,
                i = a.split(s);
              while ((e = i[f++]))
                (h = d ? h : !g.hasClass(e)),
                  g[h ? "addClass" : "removeClass"](e);
            } else if (c === "undefined" || c === "boolean")
              this.className && p._data(this, "__className__", this.className),
                (this.className =
                  this.className || a === !1
                    ? ""
                    : p._data(this, "__className__") || "");
          });
    },
    hasClass: function (a) {
      var b = " " + a + " ",
        c = 0,
        d = this.length;
      for (; c < d; c++)
        if (
          this[c].nodeType === 1 &&
          (" " + this[c].className + " ").replace(O, " ").indexOf(b) > -1
        )
          return !0;
      return !1;
    },
    val: function (a) {
      var c,
        d,
        e,
        f = this[0];
      if (!arguments.length) {
        if (f)
          return (
            (c = p.valHooks[f.type] || p.valHooks[f.nodeName.toLowerCase()]),
            c && "get" in c && (d = c.get(f, "value")) !== b
              ? d
              : ((d = f.value),
                typeof d == "string" ? d.replace(P, "") : d == null ? "" : d)
          );
        return;
      }
      return (
        (e = p.isFunction(a)),
        this.each(function (d) {
          var f,
            g = p(this);
          if (this.nodeType !== 1) return;
          e ? (f = a.call(this, d, g.val())) : (f = a),
            f == null
              ? (f = "")
              : typeof f == "number"
                ? (f += "")
                : p.isArray(f) &&
                  (f = p.map(f, function (a) {
                    return a == null ? "" : a + "";
                  })),
            (c =
              p.valHooks[this.type] || p.valHooks[this.nodeName.toLowerCase()]);
          if (!c || !("set" in c) || c.set(this, f, "value") === b)
            this.value = f;
        })
      );
    },
  }),
    p.extend({
      valHooks: {
        option: {
          get: function (a) {
            var b = a.attributes.value;
            return !b || b.specified ? a.value : a.text;
          },
        },
        select: {
          get: function (a) {
            var b,
              c,
              d,
              e,
              f = a.selectedIndex,
              g = [],
              h = a.options,
              i = a.type === "select-one";
            if (f < 0) return null;
            (c = i ? f : 0), (d = i ? f + 1 : h.length);
            for (; c < d; c++) {
              e = h[c];
              if (
                e.selected &&
                (p.support.optDisabled
                  ? !e.disabled
                  : e.getAttribute("disabled") === null) &&
                (!e.parentNode.disabled ||
                  !p.nodeName(e.parentNode, "optgroup"))
              ) {
                b = p(e).val();
                if (i) return b;
                g.push(b);
              }
            }
            return i && !g.length && h.length ? p(h[f]).val() : g;
          },
          set: function (a, b) {
            var c = p.makeArray(b);
            return (
              p(a)
                .find("option")
                .each(function () {
                  this.selected = p.inArray(p(this).val(), c) >= 0;
                }),
              c.length || (a.selectedIndex = -1),
              c
            );
          },
        },
      },
      attrFn: {},
      attr: function (a, c, d, e) {
        var f,
          g,
          h,
          i = a.nodeType;
        if (!a || i === 3 || i === 8 || i === 2) return;
        if (e && p.isFunction(p.fn[c])) return p(a)[c](d);
        if (typeof a.getAttribute == "undefined") return p.prop(a, c, d);
        (h = i !== 1 || !p.isXMLDoc(a)),
          h &&
            ((c = c.toLowerCase()),
            (g = p.attrHooks[c] || (T.test(c) ? M : L)));
        if (d !== b) {
          if (d === null) {
            p.removeAttr(a, c);
            return;
          }
          return g && "set" in g && h && (f = g.set(a, d, c)) !== b
            ? f
            : (a.setAttribute(c, "" + d), d);
        }
        return g && "get" in g && h && (f = g.get(a, c)) !== null
          ? f
          : ((f = a.getAttribute(c)), f === null ? b : f);
      },
      removeAttr: function (a, b) {
        var c,
          d,
          e,
          f,
          g = 0;
        if (b && a.nodeType === 1) {
          d = b.split(s);
          for (; g < d.length; g++)
            (e = d[g]),
              e &&
                ((c = p.propFix[e] || e),
                (f = T.test(e)),
                f || p.attr(a, e, ""),
                a.removeAttribute(U ? e : c),
                f && c in a && (a[c] = !1));
        }
      },
      attrHooks: {
        type: {
          set: function (a, b) {
            if (Q.test(a.nodeName) && a.parentNode)
              p.error("type property can't be changed");
            else if (
              !p.support.radioValue &&
              b === "radio" &&
              p.nodeName(a, "input")
            ) {
              var c = a.value;
              return a.setAttribute("type", b), c && (a.value = c), b;
            }
          },
        },
        value: {
          get: function (a, b) {
            return L && p.nodeName(a, "button")
              ? L.get(a, b)
              : b in a
                ? a.value
                : null;
          },
          set: function (a, b, c) {
            if (L && p.nodeName(a, "button")) return L.set(a, b, c);
            a.value = b;
          },
        },
      },
      propFix: {
        tabindex: "tabIndex",
        readonly: "readOnly",
        for: "htmlFor",
        class: "className",
        maxlength: "maxLength",
        cellspacing: "cellSpacing",
        cellpadding: "cellPadding",
        rowspan: "rowSpan",
        colspan: "colSpan",
        usemap: "useMap",
        frameborder: "frameBorder",
        contenteditable: "contentEditable",
      },
      prop: function (a, c, d) {
        var e,
          f,
          g,
          h = a.nodeType;
        if (!a || h === 3 || h === 8 || h === 2) return;
        return (
          (g = h !== 1 || !p.isXMLDoc(a)),
          g && ((c = p.propFix[c] || c), (f = p.propHooks[c])),
          d !== b
            ? f && "set" in f && (e = f.set(a, d, c)) !== b
              ? e
              : (a[c] = d)
            : f && "get" in f && (e = f.get(a, c)) !== null
              ? e
              : a[c]
        );
      },
      propHooks: {
        tabIndex: {
          get: function (a) {
            var c = a.getAttributeNode("tabindex");
            return c && c.specified
              ? parseInt(c.value, 10)
              : R.test(a.nodeName) || (S.test(a.nodeName) && a.href)
                ? 0
                : b;
          },
        },
      },
    }),
    (M = {
      get: function (a, c) {
        var d,
          e = p.prop(a, c);
        return e === !0 ||
          (typeof e != "boolean" &&
            (d = a.getAttributeNode(c)) &&
            d.nodeValue !== !1)
          ? c.toLowerCase()
          : b;
      },
      set: function (a, b, c) {
        var d;
        return (
          b === !1
            ? p.removeAttr(a, c)
            : ((d = p.propFix[c] || c),
              d in a && (a[d] = !0),
              a.setAttribute(c, c.toLowerCase())),
          c
        );
      },
    }),
    U ||
      ((N = { name: !0, id: !0, coords: !0 }),
      (L = p.valHooks.button =
        {
          get: function (a, c) {
            var d;
            return (
              (d = a.getAttributeNode(c)),
              d && (N[c] ? d.value !== "" : d.specified) ? d.value : b
            );
          },
          set: function (a, b, c) {
            var d = a.getAttributeNode(c);
            return (
              d || ((d = e.createAttribute(c)), a.setAttributeNode(d)),
              (d.value = b + "")
            );
          },
        }),
      p.each(["width", "height"], function (a, b) {
        p.attrHooks[b] = p.extend(p.attrHooks[b], {
          set: function (a, c) {
            if (c === "") return a.setAttribute(b, "auto"), c;
          },
        });
      }),
      (p.attrHooks.contenteditable = {
        get: L.get,
        set: function (a, b, c) {
          b === "" && (b = "false"), L.set(a, b, c);
        },
      })),
    p.support.hrefNormalized ||
      p.each(["href", "src", "width", "height"], function (a, c) {
        p.attrHooks[c] = p.extend(p.attrHooks[c], {
          get: function (a) {
            var d = a.getAttribute(c, 2);
            return d === null ? b : d;
          },
        });
      }),
    p.support.style ||
      (p.attrHooks.style = {
        get: function (a) {
          return a.style.cssText.toLowerCase() || b;
        },
        set: function (a, b) {
          return (a.style.cssText = "" + b);
        },
      }),
    p.support.optSelected ||
      (p.propHooks.selected = p.extend(p.propHooks.selected, {
        get: function (a) {
          var b = a.parentNode;
          return (
            b && (b.selectedIndex, b.parentNode && b.parentNode.selectedIndex),
            null
          );
        },
      })),
    p.support.enctype || (p.propFix.enctype = "encoding"),
    p.support.checkOn ||
      p.each(["radio", "checkbox"], function () {
        p.valHooks[this] = {
          get: function (a) {
            return a.getAttribute("value") === null ? "on" : a.value;
          },
        };
      }),
    p.each(["radio", "checkbox"], function () {
      p.valHooks[this] = p.extend(p.valHooks[this], {
        set: function (a, b) {
          if (p.isArray(b)) return (a.checked = p.inArray(p(a).val(), b) >= 0);
        },
      });
    });
  var V = /^(?:textarea|input|select)$/i,
    W = /^([^\.]*|)(?:\.(.+)|)$/,
    X = /(?:^|\s)hover(\.\S+|)\b/,
    Y = /^key/,
    Z = /^(?:mouse|contextmenu)|click/,
    $ = /^(?:focusinfocus|focusoutblur)$/,
    _ = function (a) {
      return p.event.special.hover
        ? a
        : a.replace(X, "mouseenter$1 mouseleave$1");
    };
  (p.event = {
    add: function (a, c, d, e, f) {
      var g, h, i, j, k, l, m, n, o, q, r;
      if (a.nodeType === 3 || a.nodeType === 8 || !c || !d || !(g = p._data(a)))
        return;
      d.handler && ((o = d), (d = o.handler), (f = o.selector)),
        d.guid || (d.guid = p.guid++),
        (i = g.events),
        i || (g.events = i = {}),
        (h = g.handle),
        h ||
          ((g.handle = h =
            function (a) {
              return typeof p != "undefined" &&
                (!a || p.event.triggered !== a.type)
                ? p.event.dispatch.apply(h.elem, arguments)
                : b;
            }),
          (h.elem = a)),
        (c = p.trim(_(c)).split(" "));
      for (j = 0; j < c.length; j++) {
        (k = W.exec(c[j]) || []),
          (l = k[1]),
          (m = (k[2] || "").split(".").sort()),
          (r = p.event.special[l] || {}),
          (l = (f ? r.delegateType : r.bindType) || l),
          (r = p.event.special[l] || {}),
          (n = p.extend(
            {
              type: l,
              origType: k[1],
              data: e,
              handler: d,
              guid: d.guid,
              selector: f,
              namespace: m.join("."),
            },
            o,
          )),
          (q = i[l]);
        if (!q) {
          (q = i[l] = []), (q.delegateCount = 0);
          if (!r.setup || r.setup.call(a, e, m, h) === !1)
            a.addEventListener
              ? a.addEventListener(l, h, !1)
              : a.attachEvent && a.attachEvent("on" + l, h);
        }
        r.add &&
          (r.add.call(a, n), n.handler.guid || (n.handler.guid = d.guid)),
          f ? q.splice(q.delegateCount++, 0, n) : q.push(n),
          (p.event.global[l] = !0);
      }
      a = null;
    },
    global: {},
    remove: function (a, b, c, d, e) {
      var f,
        g,
        h,
        i,
        j,
        k,
        l,
        m,
        n,
        o,
        q,
        r = p.hasData(a) && p._data(a);
      if (!r || !(m = r.events)) return;
      b = p.trim(_(b || "")).split(" ");
      for (f = 0; f < b.length; f++) {
        (g = W.exec(b[f]) || []), (h = i = g[1]), (j = g[2]);
        if (!h) {
          for (h in m) p.event.remove(a, h + b[f], c, d, !0);
          continue;
        }
        (n = p.event.special[h] || {}),
          (h = (d ? n.delegateType : n.bindType) || h),
          (o = m[h] || []),
          (k = o.length),
          (j = j
            ? new RegExp(
                "(^|\\.)" +
                  j.split(".").sort().join("\\.(?:.*\\.|)") +
                  "(\\.|$)",
              )
            : null);
        for (l = 0; l < o.length; l++)
          (q = o[l]),
            (e || i === q.origType) &&
              (!c || c.guid === q.guid) &&
              (!j || j.test(q.namespace)) &&
              (!d || d === q.selector || (d === "**" && q.selector)) &&
              (o.splice(l--, 1),
              q.selector && o.delegateCount--,
              n.remove && n.remove.call(a, q));
        o.length === 0 &&
          k !== o.length &&
          ((!n.teardown || n.teardown.call(a, j, r.handle) === !1) &&
            p.removeEvent(a, h, r.handle),
          delete m[h]);
      }
      p.isEmptyObject(m) && (delete r.handle, p.removeData(a, "events", !0));
    },
    customEvent: { getData: !0, setData: !0, changeData: !0 },
    trigger: function (c, d, f, g) {
      if (!f || (f.nodeType !== 3 && f.nodeType !== 8)) {
        var h,
          i,
          j,
          k,
          l,
          m,
          n,
          o,
          q,
          r,
          s = c.type || c,
          t = [];
        if ($.test(s + p.event.triggered)) return;
        s.indexOf("!") >= 0 && ((s = s.slice(0, -1)), (i = !0)),
          s.indexOf(".") >= 0 &&
            ((t = s.split(".")), (s = t.shift()), t.sort());
        if ((!f || p.event.customEvent[s]) && !p.event.global[s]) return;
        (c =
          typeof c == "object"
            ? c[p.expando]
              ? c
              : new p.Event(s, c)
            : new p.Event(s)),
          (c.type = s),
          (c.isTrigger = !0),
          (c.exclusive = i),
          (c.namespace = t.join(".")),
          (c.namespace_re = c.namespace
            ? new RegExp("(^|\\.)" + t.join("\\.(?:.*\\.|)") + "(\\.|$)")
            : null),
          (m = s.indexOf(":") < 0 ? "on" + s : "");
        if (!f) {
          h = p.cache;
          for (j in h)
            h[j].events &&
              h[j].events[s] &&
              p.event.trigger(c, d, h[j].handle.elem, !0);
          return;
        }
        (c.result = b),
          c.target || (c.target = f),
          (d = d != null ? p.makeArray(d) : []),
          d.unshift(c),
          (n = p.event.special[s] || {});
        if (n.trigger && n.trigger.apply(f, d) === !1) return;
        q = [[f, n.bindType || s]];
        if (!g && !n.noBubble && !p.isWindow(f)) {
          (r = n.delegateType || s), (k = $.test(r + s) ? f : f.parentNode);
          for (l = f; k; k = k.parentNode) q.push([k, r]), (l = k);
          l === (f.ownerDocument || e) &&
            q.push([l.defaultView || l.parentWindow || a, r]);
        }
        for (j = 0; j < q.length && !c.isPropagationStopped(); j++)
          (k = q[j][0]),
            (c.type = q[j][1]),
            (o = (p._data(k, "events") || {})[c.type] && p._data(k, "handle")),
            o && o.apply(k, d),
            (o = m && k[m]),
            o && p.acceptData(k) && o.apply(k, d) === !1 && c.preventDefault();
        return (
          (c.type = s),
          !g &&
            !c.isDefaultPrevented() &&
            (!n._default || n._default.apply(f.ownerDocument, d) === !1) &&
            (s !== "click" || !p.nodeName(f, "a")) &&
            p.acceptData(f) &&
            m &&
            f[s] &&
            ((s !== "focus" && s !== "blur") || c.target.offsetWidth !== 0) &&
            !p.isWindow(f) &&
            ((l = f[m]),
            l && (f[m] = null),
            (p.event.triggered = s),
            f[s](),
            (p.event.triggered = b),
            l && (f[m] = l)),
          c.result
        );
      }
      return;
    },
    dispatch: function (c) {
      c = p.event.fix(c || a.event);
      var d,
        e,
        f,
        g,
        h,
        i,
        j,
        k,
        l,
        m,
        n = (p._data(this, "events") || {})[c.type] || [],
        o = n.delegateCount,
        q = [].slice.call(arguments),
        r = !c.exclusive && !c.namespace,
        s = p.event.special[c.type] || {},
        t = [];
      (q[0] = c), (c.delegateTarget = this);
      if (s.preDispatch && s.preDispatch.call(this, c) === !1) return;
      if (o && (!c.button || c.type !== "click"))
        for (f = c.target; f != this; f = f.parentNode || this)
          if (f.disabled !== !0 || c.type !== "click") {
            (h = {}), (j = []);
            for (d = 0; d < o; d++)
              (k = n[d]),
                (l = k.selector),
                h[l] === b && (h[l] = p(l, this).index(f) >= 0),
                h[l] && j.push(k);
            j.length && t.push({ elem: f, matches: j });
          }
      n.length > o && t.push({ elem: this, matches: n.slice(o) });
      for (d = 0; d < t.length && !c.isPropagationStopped(); d++) {
        (i = t[d]), (c.currentTarget = i.elem);
        for (
          e = 0;
          e < i.matches.length && !c.isImmediatePropagationStopped();
          e++
        ) {
          k = i.matches[e];
          if (
            r ||
            (!c.namespace && !k.namespace) ||
            (c.namespace_re && c.namespace_re.test(k.namespace))
          )
            (c.data = k.data),
              (c.handleObj = k),
              (g = (
                (p.event.special[k.origType] || {}).handle || k.handler
              ).apply(i.elem, q)),
              g !== b &&
                ((c.result = g),
                g === !1 && (c.preventDefault(), c.stopPropagation()));
        }
      }
      return s.postDispatch && s.postDispatch.call(this, c), c.result;
    },
    props:
      "attrChange attrName relatedNode srcElement altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(
        " ",
      ),
    fixHooks: {},
    keyHooks: {
      props: "char charCode key keyCode".split(" "),
      filter: function (a, b) {
        return (
          a.which == null &&
            (a.which = b.charCode != null ? b.charCode : b.keyCode),
          a
        );
      },
    },
    mouseHooks: {
      props:
        "button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(
          " ",
        ),
      filter: function (a, c) {
        var d,
          f,
          g,
          h = c.button,
          i = c.fromElement;
        return (
          a.pageX == null &&
            c.clientX != null &&
            ((d = a.target.ownerDocument || e),
            (f = d.documentElement),
            (g = d.body),
            (a.pageX =
              c.clientX +
              ((f && f.scrollLeft) || (g && g.scrollLeft) || 0) -
              ((f && f.clientLeft) || (g && g.clientLeft) || 0)),
            (a.pageY =
              c.clientY +
              ((f && f.scrollTop) || (g && g.scrollTop) || 0) -
              ((f && f.clientTop) || (g && g.clientTop) || 0))),
          !a.relatedTarget &&
            i &&
            (a.relatedTarget = i === a.target ? c.toElement : i),
          !a.which &&
            h !== b &&
            (a.which = h & 1 ? 1 : h & 2 ? 3 : h & 4 ? 2 : 0),
          a
        );
      },
    },
    fix: function (a) {
      if (a[p.expando]) return a;
      var b,
        c,
        d = a,
        f = p.event.fixHooks[a.type] || {},
        g = f.props ? this.props.concat(f.props) : this.props;
      a = p.Event(d);
      for (b = g.length; b; ) (c = g[--b]), (a[c] = d[c]);
      return (
        a.target || (a.target = d.srcElement || e),
        a.target.nodeType === 3 && (a.target = a.target.parentNode),
        (a.metaKey = !!a.metaKey),
        f.filter ? f.filter(a, d) : a
      );
    },
    special: {
      load: { noBubble: !0 },
      focus: { delegateType: "focusin" },
      blur: { delegateType: "focusout" },
      beforeunload: {
        setup: function (a, b, c) {
          p.isWindow(this) && (this.onbeforeunload = c);
        },
        teardown: function (a, b) {
          this.onbeforeunload === b && (this.onbeforeunload = null);
        },
      },
    },
    simulate: function (a, b, c, d) {
      var e = p.extend(new p.Event(), c, {
        type: a,
        isSimulated: !0,
        originalEvent: {},
      });
      d ? p.event.trigger(e, null, b) : p.event.dispatch.call(b, e),
        e.isDefaultPrevented() && c.preventDefault();
    },
  }),
    (p.event.handle = p.event.dispatch),
    (p.removeEvent = e.removeEventListener
      ? function (a, b, c) {
          a.removeEventListener && a.removeEventListener(b, c, !1);
        }
      : function (a, b, c) {
          var d = "on" + b;
          a.detachEvent &&
            (typeof a[d] == "undefined" && (a[d] = null), a.detachEvent(d, c));
        }),
    (p.Event = function (a, b) {
      if (this instanceof p.Event)
        a && a.type
          ? ((this.originalEvent = a),
            (this.type = a.type),
            (this.isDefaultPrevented =
              a.defaultPrevented ||
              a.returnValue === !1 ||
              (a.getPreventDefault && a.getPreventDefault())
                ? bb
                : ba))
          : (this.type = a),
          b && p.extend(this, b),
          (this.timeStamp = (a && a.timeStamp) || p.now()),
          (this[p.expando] = !0);
      else return new p.Event(a, b);
    }),
    (p.Event.prototype = {
      preventDefault: function () {
        this.isDefaultPrevented = bb;
        var a = this.originalEvent;
        if (!a) return;
        a.preventDefault ? a.preventDefault() : (a.returnValue = !1);
      },
      stopPropagation: function () {
        this.isPropagationStopped = bb;
        var a = this.originalEvent;
        if (!a) return;
        a.stopPropagation && a.stopPropagation(), (a.cancelBubble = !0);
      },
      stopImmediatePropagation: function () {
        (this.isImmediatePropagationStopped = bb), this.stopPropagation();
      },
      isDefaultPrevented: ba,
      isPropagationStopped: ba,
      isImmediatePropagationStopped: ba,
    }),
    p.each(
      { mouseenter: "mouseover", mouseleave: "mouseout" },
      function (a, b) {
        p.event.special[a] = {
          delegateType: b,
          bindType: b,
          handle: function (a) {
            var c,
              d = this,
              e = a.relatedTarget,
              f = a.handleObj,
              g = f.selector;
            if (!e || (e !== d && !p.contains(d, e)))
              (a.type = f.origType),
                (c = f.handler.apply(this, arguments)),
                (a.type = b);
            return c;
          },
        };
      },
    ),
    p.support.submitBubbles ||
      (p.event.special.submit = {
        setup: function () {
          if (p.nodeName(this, "form")) return !1;
          p.event.add(this, "click._submit keypress._submit", function (a) {
            var c = a.target,
              d =
                p.nodeName(c, "input") || p.nodeName(c, "button") ? c.form : b;
            d &&
              !p._data(d, "_submit_attached") &&
              (p.event.add(d, "submit._submit", function (a) {
                a._submit_bubble = !0;
              }),
              p._data(d, "_submit_attached", !0));
          });
        },
        postDispatch: function (a) {
          a._submit_bubble &&
            (delete a._submit_bubble,
            this.parentNode &&
              !a.isTrigger &&
              p.event.simulate("submit", this.parentNode, a, !0));
        },
        teardown: function () {
          if (p.nodeName(this, "form")) return !1;
          p.event.remove(this, "._submit");
        },
      }),
    p.support.changeBubbles ||
      (p.event.special.change = {
        setup: function () {
          if (V.test(this.nodeName)) {
            if (this.type === "checkbox" || this.type === "radio")
              p.event.add(this, "propertychange._change", function (a) {
                a.originalEvent.propertyName === "checked" &&
                  (this._just_changed = !0);
              }),
                p.event.add(this, "click._change", function (a) {
                  this._just_changed &&
                    !a.isTrigger &&
                    (this._just_changed = !1),
                    p.event.simulate("change", this, a, !0);
                });
            return !1;
          }
          p.event.add(this, "beforeactivate._change", function (a) {
            var b = a.target;
            V.test(b.nodeName) &&
              !p._data(b, "_change_attached") &&
              (p.event.add(b, "change._change", function (a) {
                this.parentNode &&
                  !a.isSimulated &&
                  !a.isTrigger &&
                  p.event.simulate("change", this.parentNode, a, !0);
              }),
              p._data(b, "_change_attached", !0));
          });
        },
        handle: function (a) {
          var b = a.target;
          if (
            this !== b ||
            a.isSimulated ||
            a.isTrigger ||
            (b.type !== "radio" && b.type !== "checkbox")
          )
            return a.handleObj.handler.apply(this, arguments);
        },
        teardown: function () {
          return p.event.remove(this, "._change"), !V.test(this.nodeName);
        },
      }),
    p.support.focusinBubbles ||
      p.each({ focus: "focusin", blur: "focusout" }, function (a, b) {
        var c = 0,
          d = function (a) {
            p.event.simulate(b, a.target, p.event.fix(a), !0);
          };
        p.event.special[b] = {
          setup: function () {
            c++ === 0 && e.addEventListener(a, d, !0);
          },
          teardown: function () {
            --c === 0 && e.removeEventListener(a, d, !0);
          },
        };
      }),
    p.fn.extend({
      on: function (a, c, d, e, f) {
        var g, h;
        if (typeof a == "object") {
          typeof c != "string" && ((d = d || c), (c = b));
          for (h in a) this.on(h, c, d, a[h], f);
          return this;
        }
        d == null && e == null
          ? ((e = c), (d = c = b))
          : e == null &&
            (typeof c == "string"
              ? ((e = d), (d = b))
              : ((e = d), (d = c), (c = b)));
        if (e === !1) e = ba;
        else if (!e) return this;
        return (
          f === 1 &&
            ((g = e),
            (e = function (a) {
              return p().off(a), g.apply(this, arguments);
            }),
            (e.guid = g.guid || (g.guid = p.guid++))),
          this.each(function () {
            p.event.add(this, a, e, d, c);
          })
        );
      },
      one: function (a, b, c, d) {
        return this.on(a, b, c, d, 1);
      },
      off: function (a, c, d) {
        var e, f;
        if (a && a.preventDefault && a.handleObj)
          return (
            (e = a.handleObj),
            p(a.delegateTarget).off(
              e.namespace ? e.origType + "." + e.namespace : e.origType,
              e.selector,
              e.handler,
            ),
            this
          );
        if (typeof a == "object") {
          for (f in a) this.off(f, c, a[f]);
          return this;
        }
        if (c === !1 || typeof c == "function") (d = c), (c = b);
        return (
          d === !1 && (d = ba),
          this.each(function () {
            p.event.remove(this, a, d, c);
          })
        );
      },
      bind: function (a, b, c) {
        return this.on(a, null, b, c);
      },
      unbind: function (a, b) {
        return this.off(a, null, b);
      },
      live: function (a, b, c) {
        return p(this.context).on(a, this.selector, b, c), this;
      },
      die: function (a, b) {
        return p(this.context).off(a, this.selector || "**", b), this;
      },
      delegate: function (a, b, c, d) {
        return this.on(b, a, c, d);
      },
      undelegate: function (a, b, c) {
        return arguments.length == 1
          ? this.off(a, "**")
          : this.off(b, a || "**", c);
      },
      trigger: function (a, b) {
        return this.each(function () {
          p.event.trigger(a, b, this);
        });
      },
      triggerHandler: function (a, b) {
        if (this[0]) return p.event.trigger(a, b, this[0], !0);
      },
      toggle: function (a) {
        var b = arguments,
          c = a.guid || p.guid++,
          d = 0,
          e = function (c) {
            var e = (p._data(this, "lastToggle" + a.guid) || 0) % d;
            return (
              p._data(this, "lastToggle" + a.guid, e + 1),
              c.preventDefault(),
              b[e].apply(this, arguments) || !1
            );
          };
        e.guid = c;
        while (d < b.length) b[d++].guid = c;
        return this.click(e);
      },
      hover: function (a, b) {
        return this.mouseenter(a).mouseleave(b || a);
      },
    }),
    p.each(
      "blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(
        " ",
      ),
      function (a, b) {
        (p.fn[b] = function (a, c) {
          return (
            c == null && ((c = a), (a = null)),
            arguments.length > 0 ? this.on(b, null, a, c) : this.trigger(b)
          );
        }),
          Y.test(b) && (p.event.fixHooks[b] = p.event.keyHooks),
          Z.test(b) && (p.event.fixHooks[b] = p.event.mouseHooks);
      },
    ),
    (function (a, b) {
      function $(a, b, c, d) {
        (c = c || []), (b = b || q);
        var e,
          f,
          g,
          j,
          k = b.nodeType;
        if (k !== 1 && k !== 9) return [];
        if (!a || typeof a != "string") return c;
        g = h(b);
        if (!g && !d)
          if ((e = L.exec(a)))
            if ((j = e[1])) {
              if (k === 9) {
                f = b.getElementById(j);
                if (!f || !f.parentNode) return c;
                if (f.id === j) return c.push(f), c;
              } else if (
                b.ownerDocument &&
                (f = b.ownerDocument.getElementById(j)) &&
                i(b, f) &&
                f.id === j
              )
                return c.push(f), c;
            } else {
              if (e[2])
                return u.apply(c, t.call(b.getElementsByTagName(a), 0)), c;
              if ((j = e[3]) && X && b.getElementsByClassName)
                return u.apply(c, t.call(b.getElementsByClassName(j), 0)), c;
            }
        return bk(a, b, c, d, g);
      }
      function _(a) {
        return function (b) {
          var c = b.nodeName.toLowerCase();
          return c === "input" && b.type === a;
        };
      }
      function ba(a) {
        return function (b) {
          var c = b.nodeName.toLowerCase();
          return (c === "input" || c === "button") && b.type === a;
        };
      }
      function bb(a, b, c) {
        if (a === b) return c;
        var d = a.nextSibling;
        while (d) {
          if (d === b) return -1;
          d = d.nextSibling;
        }
        return 1;
      }
      function bc(a, b, c, d) {
        var e,
          g,
          h,
          i,
          j,
          k,
          l,
          m,
          n,
          p,
          r = !c && b !== q,
          s = (r ? "<s>" : "") + a.replace(H, "$1<s>"),
          u = y[o][s];
        if (u) return d ? 0 : t.call(u, 0);
        (j = a), (k = []), (m = 0), (n = f.preFilter), (p = f.filter);
        while (j) {
          if (!e || (g = I.exec(j)))
            g && ((j = j.slice(g[0].length)), (h.selector = l)),
              k.push((h = [])),
              (l = ""),
              r && (j = " " + j);
          e = !1;
          if ((g = J.exec(j)))
            (l += g[0]),
              (j = j.slice(g[0].length)),
              (e = h.push({
                part: g.pop().replace(H, " "),
                string: g[0],
                captures: g,
              }));
          for (i in p)
            (g = S[i].exec(j)) &&
              (!n[i] || (g = n[i](g, b, c))) &&
              ((l += g[0]),
              (j = j.slice(g[0].length)),
              (e = h.push({ part: i, string: g.shift(), captures: g })));
          if (!e) break;
        }
        return (
          l && (h.selector = l),
          d ? j.length : j ? $.error(a) : t.call(y(s, k), 0)
        );
      }
      function bd(a, b, e, f) {
        var g = b.dir,
          h = s++;
        return (
          a ||
            (a = function (a) {
              return a === e;
            }),
          b.first
            ? function (b) {
                while ((b = b[g])) if (b.nodeType === 1) return a(b) && b;
              }
            : f
              ? function (b) {
                  while ((b = b[g])) if (b.nodeType === 1 && a(b)) return b;
                }
              : function (b) {
                  var e,
                    f = h + "." + c,
                    i = f + "." + d;
                  while ((b = b[g]))
                    if (b.nodeType === 1) {
                      if ((e = b[o]) === i) return b.sizset;
                      if (typeof e == "string" && e.indexOf(f) === 0) {
                        if (b.sizset) return b;
                      } else {
                        b[o] = i;
                        if (a(b)) return (b.sizset = !0), b;
                        b.sizset = !1;
                      }
                    }
                }
        );
      }
      function be(a, b) {
        return a
          ? function (c) {
              var d = b(c);
              return d && a(d === !0 ? c : d);
            }
          : b;
      }
      function bf(a, b, c) {
        var d,
          e,
          g = 0;
        for (; (d = a[g]); g++)
          f.relative[d.part]
            ? (e = bd(e, f.relative[d.part], b, c))
            : (e = be(
                e,
                f.filter[d.part].apply(null, d.captures.concat(b, c)),
              ));
        return e;
      }
      function bg(a) {
        return function (b) {
          var c,
            d = 0;
          for (; (c = a[d]); d++) if (c(b)) return !0;
          return !1;
        };
      }
      function bh(a, b, c, d) {
        var e = 0,
          f = b.length;
        for (; e < f; e++) $(a, b[e], c, d);
      }
      function bi(a, b, c, d, e, g) {
        var h,
          i = f.setFilters[b.toLowerCase()];
        return (
          i || $.error(b),
          (a || !(h = e)) && bh(a || "*", d, (h = []), e),
          h.length > 0 ? i(h, c, g) : []
        );
      }
      function bj(a, c, d, e) {
        var f,
          g,
          h,
          i,
          j,
          k,
          l,
          m,
          n,
          o,
          p,
          q,
          r,
          s = 0,
          t = a.length,
          v = S.POS,
          w = new RegExp("^" + v.source + "(?!" + A + ")", "i"),
          x = function () {
            var a = 1,
              c = arguments.length - 2;
            for (; a < c; a++) arguments[a] === b && (n[a] = b);
          };
        for (; s < t; s++) {
          (f = a[s]), (g = ""), (m = e);
          for (h = 0, i = f.length; h < i; h++) {
            (j = f[h]), (k = j.string);
            if (j.part === "PSEUDO") {
              v.exec(""), (l = 0);
              while ((n = v.exec(k))) {
                (o = !0), (p = v.lastIndex = n.index + n[0].length);
                if (p > l) {
                  (g += k.slice(l, n.index)),
                    (l = p),
                    (q = [c]),
                    J.test(g) && (m && (q = m), (m = e));
                  if ((r = O.test(g)))
                    (g = g.slice(0, -5).replace(J, "$&*")), l++;
                  n.length > 1 && n[0].replace(w, x),
                    (m = bi(g, n[1], n[2], q, m, r));
                }
                g = "";
              }
            }
            o || (g += k), (o = !1);
          }
          g
            ? J.test(g)
              ? bh(g, m || [c], d, e)
              : $(g, c, d, e ? e.concat(m) : m)
            : u.apply(d, m);
        }
        return t === 1 ? d : $.uniqueSort(d);
      }
      function bk(a, b, e, g, h) {
        a = a.replace(H, "$1");
        var i,
          k,
          l,
          m,
          n,
          o,
          p,
          q,
          r,
          s,
          v = bc(a, b, h),
          w = b.nodeType;
        if (S.POS.test(a)) return bj(v, b, e, g);
        if (g) i = t.call(g, 0);
        else if (v.length === 1) {
          if (
            (o = t.call(v[0], 0)).length > 2 &&
            (p = o[0]).part === "ID" &&
            w === 9 &&
            !h &&
            f.relative[o[1].part]
          ) {
            b = f.find.ID(p.captures[0].replace(R, ""), b, h)[0];
            if (!b) return e;
            a = a.slice(o.shift().string.length);
          }
          (r = ((v = N.exec(o[0].string)) && !v.index && b.parentNode) || b),
            (q = "");
          for (n = o.length - 1; n >= 0; n--) {
            (p = o[n]), (s = p.part), (q = p.string + q);
            if (f.relative[s]) break;
            if (f.order.test(s)) {
              i = f.find[s](p.captures[0].replace(R, ""), r, h);
              if (i == null) continue;
              (a = a.slice(0, a.length - q.length) + q.replace(S[s], "")),
                a || u.apply(e, t.call(i, 0));
              break;
            }
          }
        }
        if (a) {
          (k = j(a, b, h)),
            (c = k.dirruns++),
            i == null &&
              (i = f.find.TAG("*", (N.test(a) && b.parentNode) || b));
          for (n = 0; (m = i[n]); n++) (d = k.runs++), k(m) && e.push(m);
        }
        return e;
      }
      var c,
        d,
        e,
        f,
        g,
        h,
        i,
        j,
        k,
        l,
        m = !0,
        n = "undefined",
        o = ("sizcache" + Math.random()).replace(".", ""),
        q = a.document,
        r = q.documentElement,
        s = 0,
        t = [].slice,
        u = [].push,
        v = function (a, b) {
          return (a[o] = b || !0), a;
        },
        w = function () {
          var a = {},
            b = [];
          return v(function (c, d) {
            return b.push(c) > f.cacheLength && delete a[b.shift()], (a[c] = d);
          }, a);
        },
        x = w(),
        y = w(),
        z = w(),
        A = "[\\x20\\t\\r\\n\\f]",
        B = "(?:\\\\.|[-\\w]|[^\\x00-\\xa0])+",
        C = B.replace("w", "w#"),
        D = "([*^$|!~]?=)",
        E =
          "\\[" +
          A +
          "*(" +
          B +
          ")" +
          A +
          "*(?:" +
          D +
          A +
          "*(?:(['\"])((?:\\\\.|[^\\\\])*?)\\3|(" +
          C +
          ")|)|)" +
          A +
          "*\\]",
        F =
          ":(" +
          B +
          ")(?:\\((?:(['\"])((?:\\\\.|[^\\\\])*?)\\2|([^()[\\]]*|(?:(?:" +
          E +
          ")|[^:]|\\\\.)*|.*))\\)|)",
        G =
          ":(nth|eq|gt|lt|first|last|even|odd)(?:\\(((?:-\\d)?\\d*)\\)|)(?=[^-]|$)",
        H = new RegExp("^" + A + "+|((?:^|[^\\\\])(?:\\\\.)*)" + A + "+$", "g"),
        I = new RegExp("^" + A + "*," + A + "*"),
        J = new RegExp("^" + A + "*([\\x20\\t\\r\\n\\f>+~])" + A + "*"),
        K = new RegExp(F),
        L = /^(?:#([\w\-]+)|(\w+)|\.([\w\-]+))$/,
        M = /^:not/,
        N = /[\x20\t\r\n\f]*[+~]/,
        O = /:not\($/,
        P = /h\d/i,
        Q = /input|select|textarea|button/i,
        R = /\\(?!\\)/g,
        S = {
          ID: new RegExp("^#(" + B + ")"),
          CLASS: new RegExp("^\\.(" + B + ")"),
          NAME: new RegExp("^\\[name=['\"]?(" + B + ")['\"]?\\]"),
          TAG: new RegExp("^(" + B.replace("w", "w*") + ")"),
          ATTR: new RegExp("^" + E),
          PSEUDO: new RegExp("^" + F),
          CHILD: new RegExp(
            "^:(only|nth|last|first)-child(?:\\(" +
              A +
              "*(even|odd|(([+-]|)(\\d*)n|)" +
              A +
              "*(?:([+-]|)" +
              A +
              "*(\\d+)|))" +
              A +
              "*\\)|)",
            "i",
          ),
          POS: new RegExp(G, "ig"),
          needsContext: new RegExp("^" + A + "*[>+~]|" + G, "i"),
        },
        T = function (a) {
          var b = q.createElement("div");
          try {
            return a(b);
          } catch (c) {
            return !1;
          } finally {
            b = null;
          }
        },
        U = T(function (a) {
          return (
            a.appendChild(q.createComment("")),
            !a.getElementsByTagName("*").length
          );
        }),
        V = T(function (a) {
          return (
            (a.innerHTML = "<a href='#'></a>"),
            a.firstChild &&
              typeof a.firstChild.getAttribute !== n &&
              a.firstChild.getAttribute("href") === "#"
          );
        }),
        W = T(function (a) {
          a.innerHTML = "<select></select>";
          var b = typeof a.lastChild.getAttribute("multiple");
          return b !== "boolean" && b !== "string";
        }),
        X = T(function (a) {
          return (
            (a.innerHTML =
              "<div class='hidden e'></div><div class='hidden'></div>"),
            !a.getElementsByClassName || !a.getElementsByClassName("e").length
              ? !1
              : ((a.lastChild.className = "e"),
                a.getElementsByClassName("e").length === 2)
          );
        }),
        Y = T(function (a) {
          (a.id = o + 0),
            (a.innerHTML =
              "<a name='" + o + "'></a><div name='" + o + "'></div>"),
            r.insertBefore(a, r.firstChild);
          var b =
            q.getElementsByName &&
            q.getElementsByName(o).length ===
              2 + q.getElementsByName(o + 0).length;
          return (e = !q.getElementById(o)), r.removeChild(a), b;
        });
      try {
        t.call(r.childNodes, 0)[0].nodeType;
      } catch (Z) {
        t = function (a) {
          var b,
            c = [];
          for (; (b = this[a]); a++) c.push(b);
          return c;
        };
      }
      ($.matches = function (a, b) {
        return $(a, null, null, b);
      }),
        ($.matchesSelector = function (a, b) {
          return $(b, null, null, [a]).length > 0;
        }),
        (g = $.getText =
          function (a) {
            var b,
              c = "",
              d = 0,
              e = a.nodeType;
            if (e) {
              if (e === 1 || e === 9 || e === 11) {
                if (typeof a.textContent == "string") return a.textContent;
                for (a = a.firstChild; a; a = a.nextSibling) c += g(a);
              } else if (e === 3 || e === 4) return a.nodeValue;
            } else for (; (b = a[d]); d++) c += g(b);
            return c;
          }),
        (h = $.isXML =
          function (a) {
            var b = a && (a.ownerDocument || a).documentElement;
            return b ? b.nodeName !== "HTML" : !1;
          }),
        (i = $.contains =
          r.contains
            ? function (a, b) {
                var c = a.nodeType === 9 ? a.documentElement : a,
                  d = b && b.parentNode;
                return (
                  a === d ||
                  !!(d && d.nodeType === 1 && c.contains && c.contains(d))
                );
              }
            : r.compareDocumentPosition
              ? function (a, b) {
                  return b && !!(a.compareDocumentPosition(b) & 16);
                }
              : function (a, b) {
                  while ((b = b.parentNode)) if (b === a) return !0;
                  return !1;
                }),
        ($.attr = function (a, b) {
          var c,
            d = h(a);
          return (
            d || (b = b.toLowerCase()),
            f.attrHandle[b]
              ? f.attrHandle[b](a)
              : W || d
                ? a.getAttribute(b)
                : ((c = a.getAttributeNode(b)),
                  c
                    ? typeof a[b] == "boolean"
                      ? a[b]
                        ? b
                        : null
                      : c.specified
                        ? c.value
                        : null
                    : null)
          );
        }),
        (f = $.selectors =
          {
            cacheLength: 50,
            createPseudo: v,
            match: S,
            order: new RegExp(
              "ID|TAG" + (Y ? "|NAME" : "") + (X ? "|CLASS" : ""),
            ),
            attrHandle: V
              ? {}
              : {
                  href: function (a) {
                    return a.getAttribute("href", 2);
                  },
                  type: function (a) {
                    return a.getAttribute("type");
                  },
                },
            find: {
              ID: e
                ? function (a, b, c) {
                    if (typeof b.getElementById !== n && !c) {
                      var d = b.getElementById(a);
                      return d && d.parentNode ? [d] : [];
                    }
                  }
                : function (a, c, d) {
                    if (typeof c.getElementById !== n && !d) {
                      var e = c.getElementById(a);
                      return e
                        ? e.id === a ||
                          (typeof e.getAttributeNode !== n &&
                            e.getAttributeNode("id").value === a)
                          ? [e]
                          : b
                        : [];
                    }
                  },
              TAG: U
                ? function (a, b) {
                    if (typeof b.getElementsByTagName !== n)
                      return b.getElementsByTagName(a);
                  }
                : function (a, b) {
                    var c = b.getElementsByTagName(a);
                    if (a === "*") {
                      var d,
                        e = [],
                        f = 0;
                      for (; (d = c[f]); f++) d.nodeType === 1 && e.push(d);
                      return e;
                    }
                    return c;
                  },
              NAME: function (a, b) {
                if (typeof b.getElementsByName !== n)
                  return b.getElementsByName(name);
              },
              CLASS: function (a, b, c) {
                if (typeof b.getElementsByClassName !== n && !c)
                  return b.getElementsByClassName(a);
              },
            },
            relative: {
              ">": { dir: "parentNode", first: !0 },
              " ": { dir: "parentNode" },
              "+": { dir: "previousSibling", first: !0 },
              "~": { dir: "previousSibling" },
            },
            preFilter: {
              ATTR: function (a) {
                return (
                  (a[1] = a[1].replace(R, "")),
                  (a[3] = (a[4] || a[5] || "").replace(R, "")),
                  a[2] === "~=" && (a[3] = " " + a[3] + " "),
                  a.slice(0, 4)
                );
              },
              CHILD: function (a) {
                return (
                  (a[1] = a[1].toLowerCase()),
                  a[1] === "nth"
                    ? (a[2] || $.error(a[0]),
                      (a[3] = +(a[3]
                        ? a[4] + (a[5] || 1)
                        : 2 * (a[2] === "even" || a[2] === "odd"))),
                      (a[4] = +(a[6] + a[7] || a[2] === "odd")))
                    : a[2] && $.error(a[0]),
                  a
                );
              },
              PSEUDO: function (a, b, c) {
                var d, e;
                if (S.CHILD.test(a[0])) return null;
                if (a[3]) a[2] = a[3];
                else if ((d = a[4]))
                  K.test(d) &&
                    (e = bc(d, b, c, !0)) &&
                    (e = d.indexOf(")", d.length - e) - d.length) &&
                    ((d = d.slice(0, e)), (a[0] = a[0].slice(0, e))),
                    (a[2] = d);
                return a.slice(0, 3);
              },
            },
            filter: {
              ID: e
                ? function (a) {
                    return (
                      (a = a.replace(R, "")),
                      function (b) {
                        return b.getAttribute("id") === a;
                      }
                    );
                  }
                : function (a) {
                    return (
                      (a = a.replace(R, "")),
                      function (b) {
                        var c =
                          typeof b.getAttributeNode !== n &&
                          b.getAttributeNode("id");
                        return c && c.value === a;
                      }
                    );
                  },
              TAG: function (a) {
                return a === "*"
                  ? function () {
                      return !0;
                    }
                  : ((a = a.replace(R, "").toLowerCase()),
                    function (b) {
                      return b.nodeName && b.nodeName.toLowerCase() === a;
                    });
              },
              CLASS: function (a) {
                var b = x[o][a];
                return (
                  b ||
                    (b = x(
                      a,
                      new RegExp("(^|" + A + ")" + a + "(" + A + "|$)"),
                    )),
                  function (a) {
                    return b.test(
                      a.className ||
                        (typeof a.getAttribute !== n &&
                          a.getAttribute("class")) ||
                        "",
                    );
                  }
                );
              },
              ATTR: function (a, b, c) {
                return b
                  ? function (d) {
                      var e = $.attr(d, a),
                        f = e + "";
                      if (e == null) return b === "!=";
                      switch (b) {
                        case "=":
                          return f === c;
                        case "!=":
                          return f !== c;
                        case "^=":
                          return c && f.indexOf(c) === 0;
                        case "*=":
                          return c && f.indexOf(c) > -1;
                        case "$=":
                          return c && f.substr(f.length - c.length) === c;
                        case "~=":
                          return (" " + f + " ").indexOf(c) > -1;
                        case "|=":
                          return (
                            f === c || f.substr(0, c.length + 1) === c + "-"
                          );
                      }
                    }
                  : function (b) {
                      return $.attr(b, a) != null;
                    };
              },
              CHILD: function (a, b, c, d) {
                if (a === "nth") {
                  var e = s++;
                  return function (a) {
                    var b,
                      f,
                      g = 0,
                      h = a;
                    if (c === 1 && d === 0) return !0;
                    b = a.parentNode;
                    if (b && (b[o] !== e || !a.sizset)) {
                      for (h = b.firstChild; h; h = h.nextSibling)
                        if (h.nodeType === 1) {
                          h.sizset = ++g;
                          if (h === a) break;
                        }
                      b[o] = e;
                    }
                    return (
                      (f = a.sizset - d),
                      c === 0 ? f === 0 : f % c === 0 && f / c >= 0
                    );
                  };
                }
                return function (b) {
                  var c = b;
                  switch (a) {
                    case "only":
                    case "first":
                      while ((c = c.previousSibling))
                        if (c.nodeType === 1) return !1;
                      if (a === "first") return !0;
                      c = b;
                    case "last":
                      while ((c = c.nextSibling))
                        if (c.nodeType === 1) return !1;
                      return !0;
                  }
                };
              },
              PSEUDO: function (a, b, c, d) {
                var e,
                  g = f.pseudos[a] || f.pseudos[a.toLowerCase()];
                return (
                  g || $.error("unsupported pseudo: " + a),
                  g[o]
                    ? g(b, c, d)
                    : g.length > 1
                      ? ((e = [a, a, "", b]),
                        function (a) {
                          return g(a, 0, e);
                        })
                      : g
                );
              },
            },
            pseudos: {
              not: v(function (a, b, c) {
                var d = j(a.replace(H, "$1"), b, c);
                return function (a) {
                  return !d(a);
                };
              }),
              enabled: function (a) {
                return a.disabled === !1;
              },
              disabled: function (a) {
                return a.disabled === !0;
              },
              checked: function (a) {
                var b = a.nodeName.toLowerCase();
                return (
                  (b === "input" && !!a.checked) ||
                  (b === "option" && !!a.selected)
                );
              },
              selected: function (a) {
                return (
                  a.parentNode && a.parentNode.selectedIndex, a.selected === !0
                );
              },
              parent: function (a) {
                return !f.pseudos.empty(a);
              },
              empty: function (a) {
                var b;
                a = a.firstChild;
                while (a) {
                  if (a.nodeName > "@" || (b = a.nodeType) === 3 || b === 4)
                    return !1;
                  a = a.nextSibling;
                }
                return !0;
              },
              contains: v(function (a) {
                return function (b) {
                  return (b.textContent || b.innerText || g(b)).indexOf(a) > -1;
                };
              }),
              has: v(function (a) {
                return function (b) {
                  return $(a, b).length > 0;
                };
              }),
              header: function (a) {
                return P.test(a.nodeName);
              },
              text: function (a) {
                var b, c;
                return (
                  a.nodeName.toLowerCase() === "input" &&
                  (b = a.type) === "text" &&
                  ((c = a.getAttribute("type")) == null ||
                    c.toLowerCase() === b)
                );
              },
              radio: _("radio"),
              checkbox: _("checkbox"),
              file: _("file"),
              password: _("password"),
              image: _("image"),
              submit: ba("submit"),
              reset: ba("reset"),
              button: function (a) {
                var b = a.nodeName.toLowerCase();
                return (b === "input" && a.type === "button") || b === "button";
              },
              input: function (a) {
                return Q.test(a.nodeName);
              },
              focus: function (a) {
                var b = a.ownerDocument;
                return (
                  a === b.activeElement &&
                  (!b.hasFocus || b.hasFocus()) &&
                  (!!a.type || !!a.href)
                );
              },
              active: function (a) {
                return a === a.ownerDocument.activeElement;
              },
            },
            setFilters: {
              first: function (a, b, c) {
                return c ? a.slice(1) : [a[0]];
              },
              last: function (a, b, c) {
                var d = a.pop();
                return c ? a : [d];
              },
              even: function (a, b, c) {
                var d = [],
                  e = c ? 1 : 0,
                  f = a.length;
                for (; e < f; e = e + 2) d.push(a[e]);
                return d;
              },
              odd: function (a, b, c) {
                var d = [],
                  e = c ? 0 : 1,
                  f = a.length;
                for (; e < f; e = e + 2) d.push(a[e]);
                return d;
              },
              lt: function (a, b, c) {
                return c ? a.slice(+b) : a.slice(0, +b);
              },
              gt: function (a, b, c) {
                return c ? a.slice(0, +b + 1) : a.slice(+b + 1);
              },
              eq: function (a, b, c) {
                var d = a.splice(+b, 1);
                return c ? a : d;
              },
            },
          }),
        (k = r.compareDocumentPosition
          ? function (a, b) {
              return a === b
                ? ((l = !0), 0)
                : (
                      !a.compareDocumentPosition || !b.compareDocumentPosition
                        ? a.compareDocumentPosition
                        : a.compareDocumentPosition(b) & 4
                    )
                  ? -1
                  : 1;
            }
          : function (a, b) {
              if (a === b) return (l = !0), 0;
              if (a.sourceIndex && b.sourceIndex)
                return a.sourceIndex - b.sourceIndex;
              var c,
                d,
                e = [],
                f = [],
                g = a.parentNode,
                h = b.parentNode,
                i = g;
              if (g === h) return bb(a, b);
              if (!g) return -1;
              if (!h) return 1;
              while (i) e.unshift(i), (i = i.parentNode);
              i = h;
              while (i) f.unshift(i), (i = i.parentNode);
              (c = e.length), (d = f.length);
              for (var j = 0; j < c && j < d; j++)
                if (e[j] !== f[j]) return bb(e[j], f[j]);
              return j === c ? bb(a, f[j], -1) : bb(e[j], b, 1);
            }),
        [0, 0].sort(k),
        (m = !l),
        ($.uniqueSort = function (a) {
          var b,
            c = 1;
          (l = m), a.sort(k);
          if (l) for (; (b = a[c]); c++) b === a[c - 1] && a.splice(c--, 1);
          return a;
        }),
        ($.error = function (a) {
          throw new Error("Syntax error, unrecognized expression: " + a);
        }),
        (j = $.compile =
          function (a, b, c) {
            var d,
              e,
              f,
              g = z[o][a];
            if (g && g.context === b) return g;
            d = bc(a, b, c);
            for (e = 0, f = d.length; e < f; e++) d[e] = bf(d[e], b, c);
            return (
              (g = z(a, bg(d))), (g.context = b), (g.runs = g.dirruns = 0), g
            );
          }),
        q.querySelectorAll &&
          (function () {
            var a,
              b = bk,
              c = /'|\\/g,
              d = /\=[\x20\t\r\n\f]*([^'"\]]*)[\x20\t\r\n\f]*\]/g,
              e = [],
              f = [":active"],
              g =
                r.matchesSelector ||
                r.mozMatchesSelector ||
                r.webkitMatchesSelector ||
                r.oMatchesSelector ||
                r.msMatchesSelector;
            T(function (a) {
              (a.innerHTML = "<select><option selected=''></option></select>"),
                a.querySelectorAll("[selected]").length ||
                  e.push(
                    "\\[" +
                      A +
                      "*(?:checked|disabled|ismap|multiple|readonly|selected|value)",
                  ),
                a.querySelectorAll(":checked").length || e.push(":checked");
            }),
              T(function (a) {
                (a.innerHTML = "<p test=''></p>"),
                  a.querySelectorAll("[test^='']").length &&
                    e.push("[*^$]=" + A + "*(?:\"\"|'')"),
                  (a.innerHTML = "<input type='hidden'/>"),
                  a.querySelectorAll(":enabled").length ||
                    e.push(":enabled", ":disabled");
              }),
              (e = e.length && new RegExp(e.join("|"))),
              (bk = function (a, d, f, g, h) {
                if (!g && !h && (!e || !e.test(a)))
                  if (d.nodeType === 9)
                    try {
                      return u.apply(f, t.call(d.querySelectorAll(a), 0)), f;
                    } catch (i) {}
                  else if (
                    d.nodeType === 1 &&
                    d.nodeName.toLowerCase() !== "object"
                  ) {
                    var j,
                      k,
                      l,
                      m = d.getAttribute("id"),
                      n = m || o,
                      p = (N.test(a) && d.parentNode) || d;
                    m ? (n = n.replace(c, "\\$&")) : d.setAttribute("id", n),
                      (j = bc(a, d, h)),
                      (n = "[id='" + n + "']");
                    for (k = 0, l = j.length; k < l; k++)
                      j[k] = n + j[k].selector;
                    try {
                      return (
                        u.apply(f, t.call(p.querySelectorAll(j.join(",")), 0)),
                        f
                      );
                    } catch (i) {
                    } finally {
                      m || d.removeAttribute("id");
                    }
                  }
                return b(a, d, f, g, h);
              }),
              g &&
                (T(function (b) {
                  a = g.call(b, "div");
                  try {
                    g.call(b, "[test!='']:sizzle"),
                      f.push(S.PSEUDO.source, S.POS.source, "!=");
                  } catch (c) {}
                }),
                (f = new RegExp(f.join("|"))),
                ($.matchesSelector = function (b, c) {
                  c = c.replace(d, "='$1']");
                  if (!h(b) && !f.test(c) && (!e || !e.test(c)))
                    try {
                      var i = g.call(b, c);
                      if (i || a || (b.document && b.document.nodeType !== 11))
                        return i;
                    } catch (j) {}
                  return $(c, null, null, [b]).length > 0;
                }));
          })(),
        (f.setFilters.nth = f.setFilters.eq),
        (f.filters = f.pseudos),
        ($.attr = p.attr),
        (p.find = $),
        (p.expr = $.selectors),
        (p.expr[":"] = p.expr.pseudos),
        (p.unique = $.uniqueSort),
        (p.text = $.getText),
        (p.isXMLDoc = $.isXML),
        (p.contains = $.contains);
    })(a);
  var bc = /Until$/,
    bd = /^(?:parents|prev(?:Until|All))/,
    be = /^.[^:#\[\.,]*$/,
    bf = p.expr.match.needsContext,
    bg = { children: !0, contents: !0, next: !0, prev: !0 };
  p.fn.extend({
    find: function (a) {
      var b,
        c,
        d,
        e,
        f,
        g,
        h = this;
      if (typeof a != "string")
        return p(a).filter(function () {
          for (b = 0, c = h.length; b < c; b++)
            if (p.contains(h[b], this)) return !0;
        });
      g = this.pushStack("", "find", a);
      for (b = 0, c = this.length; b < c; b++) {
        (d = g.length), p.find(a, this[b], g);
        if (b > 0)
          for (e = d; e < g.length; e++)
            for (f = 0; f < d; f++)
              if (g[f] === g[e]) {
                g.splice(e--, 1);
                break;
              }
      }
      return g;
    },
    has: function (a) {
      var b,
        c = p(a, this),
        d = c.length;
      return this.filter(function () {
        for (b = 0; b < d; b++) if (p.contains(this, c[b])) return !0;
      });
    },
    not: function (a) {
      return this.pushStack(bj(this, a, !1), "not", a);
    },
    filter: function (a) {
      return this.pushStack(bj(this, a, !0), "filter", a);
    },
    is: function (a) {
      return (
        !!a &&
        (typeof a == "string"
          ? bf.test(a)
            ? p(a, this.context).index(this[0]) >= 0
            : p.filter(a, this).length > 0
          : this.filter(a).length > 0)
      );
    },
    closest: function (a, b) {
      var c,
        d = 0,
        e = this.length,
        f = [],
        g = bf.test(a) || typeof a != "string" ? p(a, b || this.context) : 0;
      for (; d < e; d++) {
        c = this[d];
        while (c && c.ownerDocument && c !== b && c.nodeType !== 11) {
          if (g ? g.index(c) > -1 : p.find.matchesSelector(c, a)) {
            f.push(c);
            break;
          }
          c = c.parentNode;
        }
      }
      return (
        (f = f.length > 1 ? p.unique(f) : f), this.pushStack(f, "closest", a)
      );
    },
    index: function (a) {
      return a
        ? typeof a == "string"
          ? p.inArray(this[0], p(a))
          : p.inArray(a.jquery ? a[0] : a, this)
        : this[0] && this[0].parentNode
          ? this.prevAll().length
          : -1;
    },
    add: function (a, b) {
      var c =
          typeof a == "string"
            ? p(a, b)
            : p.makeArray(a && a.nodeType ? [a] : a),
        d = p.merge(this.get(), c);
      return this.pushStack(bh(c[0]) || bh(d[0]) ? d : p.unique(d));
    },
    addBack: function (a) {
      return this.add(a == null ? this.prevObject : this.prevObject.filter(a));
    },
  }),
    (p.fn.andSelf = p.fn.addBack),
    p.each(
      {
        parent: function (a) {
          var b = a.parentNode;
          return b && b.nodeType !== 11 ? b : null;
        },
        parents: function (a) {
          return p.dir(a, "parentNode");
        },
        parentsUntil: function (a, b, c) {
          return p.dir(a, "parentNode", c);
        },
        next: function (a) {
          return bi(a, "nextSibling");
        },
        prev: function (a) {
          return bi(a, "previousSibling");
        },
        nextAll: function (a) {
          return p.dir(a, "nextSibling");
        },
        prevAll: function (a) {
          return p.dir(a, "previousSibling");
        },
        nextUntil: function (a, b, c) {
          return p.dir(a, "nextSibling", c);
        },
        prevUntil: function (a, b, c) {
          return p.dir(a, "previousSibling", c);
        },
        siblings: function (a) {
          return p.sibling((a.parentNode || {}).firstChild, a);
        },
        children: function (a) {
          return p.sibling(a.firstChild);
        },
        contents: function (a) {
          return p.nodeName(a, "iframe")
            ? a.contentDocument || a.contentWindow.document
            : p.merge([], a.childNodes);
        },
      },
      function (a, b) {
        p.fn[a] = function (c, d) {
          var e = p.map(this, b, c);
          return (
            bc.test(a) || (d = c),
            d && typeof d == "string" && (e = p.filter(d, e)),
            (e = this.length > 1 && !bg[a] ? p.unique(e) : e),
            this.length > 1 && bd.test(a) && (e = e.reverse()),
            this.pushStack(e, a, k.call(arguments).join(","))
          );
        };
      },
    ),
    p.extend({
      filter: function (a, b, c) {
        return (
          c && (a = ":not(" + a + ")"),
          b.length === 1
            ? p.find.matchesSelector(b[0], a)
              ? [b[0]]
              : []
            : p.find.matches(a, b)
        );
      },
      dir: function (a, c, d) {
        var e = [],
          f = a[c];
        while (
          f &&
          f.nodeType !== 9 &&
          (d === b || f.nodeType !== 1 || !p(f).is(d))
        )
          f.nodeType === 1 && e.push(f), (f = f[c]);
        return e;
      },
      sibling: function (a, b) {
        var c = [];
        for (; a; a = a.nextSibling) a.nodeType === 1 && a !== b && c.push(a);
        return c;
      },
    });
  var bl =
      "abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",
    bm = / jQuery\d+="(?:null|\d+)"/g,
    bn = /^\s+/,
    bo =
      /<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi,
    bp = /<([\w:]+)/,
    bq = /<tbody/i,
    br = /<|&#?\w+;/,
    bs = /<(?:script|style|link)/i,
    bt = /<(?:script|object|embed|option|style)/i,
    bu = new RegExp("<(?:" + bl + ")[\\s/>]", "i"),
    bv = /^(?:checkbox|radio)$/,
    bw = /checked\s*(?:[^=]|=\s*.checked.)/i,
    bx = /\/(java|ecma)script/i,
    by = /^\s*<!(?:\[CDATA\[|\-\-)|[\]\-]{2}>\s*$/g,
    bz = {
      option: [1, "<select multiple='multiple'>", "</select>"],
      legend: [1, "<fieldset>", "</fieldset>"],
      thead: [1, "<table>", "</table>"],
      tr: [2, "<table><tbody>", "</tbody></table>"],
      td: [3, "<table><tbody><tr>", "</tr></tbody></table>"],
      col: [2, "<table><tbody></tbody><colgroup>", "</colgroup></table>"],
      area: [1, "<map>", "</map>"],
      _default: [0, "", ""],
    },
    bA = bk(e),
    bB = bA.appendChild(e.createElement("div"));
  (bz.optgroup = bz.option),
    (bz.tbody = bz.tfoot = bz.colgroup = bz.caption = bz.thead),
    (bz.th = bz.td),
    p.support.htmlSerialize || (bz._default = [1, "X<div>", "</div>"]),
    p.fn.extend({
      text: function (a) {
        return p.access(
          this,
          function (a) {
            return a === b
              ? p.text(this)
              : this.empty().append(
                  ((this[0] && this[0].ownerDocument) || e).createTextNode(a),
                );
          },
          null,
          a,
          arguments.length,
        );
      },
      wrapAll: function (a) {
        if (p.isFunction(a))
          return this.each(function (b) {
            p(this).wrapAll(a.call(this, b));
          });
        if (this[0]) {
          var b = p(a, this[0].ownerDocument).eq(0).clone(!0);
          this[0].parentNode && b.insertBefore(this[0]),
            b
              .map(function () {
                var a = this;
                while (a.firstChild && a.firstChild.nodeType === 1)
                  a = a.firstChild;
                return a;
              })
              .append(this);
        }
        return this;
      },
      wrapInner: function (a) {
        return p.isFunction(a)
          ? this.each(function (b) {
              p(this).wrapInner(a.call(this, b));
            })
          : this.each(function () {
              var b = p(this),
                c = b.contents();
              c.length ? c.wrapAll(a) : b.append(a);
            });
      },
      wrap: function (a) {
        var b = p.isFunction(a);
        return this.each(function (c) {
          p(this).wrapAll(b ? a.call(this, c) : a);
        });
      },
      unwrap: function () {
        return this.parent()
          .each(function () {
            p.nodeName(this, "body") || p(this).replaceWith(this.childNodes);
          })
          .end();
      },
      append: function () {
        return this.domManip(arguments, !0, function (a) {
          (this.nodeType === 1 || this.nodeType === 11) && this.appendChild(a);
        });
      },
      prepend: function () {
        return this.domManip(arguments, !0, function (a) {
          (this.nodeType === 1 || this.nodeType === 11) &&
            this.insertBefore(a, this.firstChild);
        });
      },
      before: function () {
        if (!bh(this[0]))
          return this.domManip(arguments, !1, function (a) {
            this.parentNode.insertBefore(a, this);
          });
        if (arguments.length) {
          var a = p.clean(arguments);
          return this.pushStack(p.merge(a, this), "before", this.selector);
        }
      },
      after: function () {
        if (!bh(this[0]))
          return this.domManip(arguments, !1, function (a) {
            this.parentNode.insertBefore(a, this.nextSibling);
          });
        if (arguments.length) {
          var a = p.clean(arguments);
          return this.pushStack(p.merge(this, a), "after", this.selector);
        }
      },
      remove: function (a, b) {
        var c,
          d = 0;
        for (; (c = this[d]) != null; d++)
          if (!a || p.filter(a, [c]).length)
            !b &&
              c.nodeType === 1 &&
              (p.cleanData(c.getElementsByTagName("*")), p.cleanData([c])),
              c.parentNode && c.parentNode.removeChild(c);
        return this;
      },
      empty: function () {
        var a,
          b = 0;
        for (; (a = this[b]) != null; b++) {
          a.nodeType === 1 && p.cleanData(a.getElementsByTagName("*"));
          while (a.firstChild) a.removeChild(a.firstChild);
        }
        return this;
      },
      clone: function (a, b) {
        return (
          (a = a == null ? !1 : a),
          (b = b == null ? a : b),
          this.map(function () {
            return p.clone(this, a, b);
          })
        );
      },
      html: function (a) {
        return p.access(
          this,
          function (a) {
            var c = this[0] || {},
              d = 0,
              e = this.length;
            if (a === b)
              return c.nodeType === 1 ? c.innerHTML.replace(bm, "") : b;
            if (
              typeof a == "string" &&
              !bs.test(a) &&
              (p.support.htmlSerialize || !bu.test(a)) &&
              (p.support.leadingWhitespace || !bn.test(a)) &&
              !bz[(bp.exec(a) || ["", ""])[1].toLowerCase()]
            ) {
              a = a.replace(bo, "<$1></$2>");
              try {
                for (; d < e; d++)
                  (c = this[d] || {}),
                    c.nodeType === 1 &&
                      (p.cleanData(c.getElementsByTagName("*")),
                      (c.innerHTML = a));
                c = 0;
              } catch (f) {}
            }
            c && this.empty().append(a);
          },
          null,
          a,
          arguments.length,
        );
      },
      replaceWith: function (a) {
        return bh(this[0])
          ? this.length
            ? this.pushStack(p(p.isFunction(a) ? a() : a), "replaceWith", a)
            : this
          : p.isFunction(a)
            ? this.each(function (b) {
                var c = p(this),
                  d = c.html();
                c.replaceWith(a.call(this, b, d));
              })
            : (typeof a != "string" && (a = p(a).detach()),
              this.each(function () {
                var b = this.nextSibling,
                  c = this.parentNode;
                p(this).remove(), b ? p(b).before(a) : p(c).append(a);
              }));
      },
      detach: function (a) {
        return this.remove(a, !0);
      },
      domManip: function (a, c, d) {
        a = [].concat.apply([], a);
        var e,
          f,
          g,
          h,
          i = 0,
          j = a[0],
          k = [],
          l = this.length;
        if (
          !p.support.checkClone &&
          l > 1 &&
          typeof j == "string" &&
          bw.test(j)
        )
          return this.each(function () {
            p(this).domManip(a, c, d);
          });
        if (p.isFunction(j))
          return this.each(function (e) {
            var f = p(this);
            (a[0] = j.call(this, e, c ? f.html() : b)), f.domManip(a, c, d);
          });
        if (this[0]) {
          (e = p.buildFragment(a, this, k)),
            (g = e.fragment),
            (f = g.firstChild),
            g.childNodes.length === 1 && (g = f);
          if (f) {
            c = c && p.nodeName(f, "tr");
            for (h = e.cacheable || l - 1; i < l; i++)
              d.call(
                c && p.nodeName(this[i], "table")
                  ? bC(this[i], "tbody")
                  : this[i],
                i === h ? g : p.clone(g, !0, !0),
              );
          }
          (g = f = null),
            k.length &&
              p.each(k, function (a, b) {
                b.src
                  ? p.ajax
                    ? p.ajax({
                        url: b.src,
                        type: "GET",
                        dataType: "script",
                        async: !1,
                        global: !1,
                        throws: !0,
                      })
                    : p.error("no ajax")
                  : p.globalEval(
                      (b.text || b.textContent || b.innerHTML || "").replace(
                        by,
                        "",
                      ),
                    ),
                  b.parentNode && b.parentNode.removeChild(b);
              });
        }
        return this;
      },
    }),
    (p.buildFragment = function (a, c, d) {
      var f,
        g,
        h,
        i = a[0];
      return (
        (c = c || e),
        (c = (!c.nodeType && c[0]) || c),
        (c = c.ownerDocument || c),
        a.length === 1 &&
          typeof i == "string" &&
          i.length < 512 &&
          c === e &&
          i.charAt(0) === "<" &&
          !bt.test(i) &&
          (p.support.checkClone || !bw.test(i)) &&
          (p.support.html5Clone || !bu.test(i)) &&
          ((g = !0), (f = p.fragments[i]), (h = f !== b)),
        f ||
          ((f = c.createDocumentFragment()),
          p.clean(a, c, f, d),
          g && (p.fragments[i] = h && f)),
        { fragment: f, cacheable: g }
      );
    }),
    (p.fragments = {}),
    p.each(
      {
        appendTo: "append",
        prependTo: "prepend",
        insertBefore: "before",
        insertAfter: "after",
        replaceAll: "replaceWith",
      },
      function (a, b) {
        p.fn[a] = function (c) {
          var d,
            e = 0,
            f = [],
            g = p(c),
            h = g.length,
            i = this.length === 1 && this[0].parentNode;
          if (
            (i == null ||
              (i && i.nodeType === 11 && i.childNodes.length === 1)) &&
            h === 1
          )
            return g[b](this[0]), this;
          for (; e < h; e++)
            (d = (e > 0 ? this.clone(!0) : this).get()),
              p(g[e])[b](d),
              (f = f.concat(d));
          return this.pushStack(f, a, g.selector);
        };
      },
    ),
    p.extend({
      clone: function (a, b, c) {
        var d, e, f, g;
        p.support.html5Clone ||
        p.isXMLDoc(a) ||
        !bu.test("<" + a.nodeName + ">")
          ? (g = a.cloneNode(!0))
          : ((bB.innerHTML = a.outerHTML), bB.removeChild((g = bB.firstChild)));
        if (
          (!p.support.noCloneEvent || !p.support.noCloneChecked) &&
          (a.nodeType === 1 || a.nodeType === 11) &&
          !p.isXMLDoc(a)
        ) {
          bE(a, g), (d = bF(a)), (e = bF(g));
          for (f = 0; d[f]; ++f) e[f] && bE(d[f], e[f]);
        }
        if (b) {
          bD(a, g);
          if (c) {
            (d = bF(a)), (e = bF(g));
            for (f = 0; d[f]; ++f) bD(d[f], e[f]);
          }
        }
        return (d = e = null), g;
      },
      clean: function (a, b, c, d) {
        var f,
          g,
          h,
          i,
          j,
          k,
          l,
          m,
          n,
          o,
          q,
          r,
          s = b === e && bA,
          t = [];
        if (!b || typeof b.createDocumentFragment == "undefined") b = e;
        for (f = 0; (h = a[f]) != null; f++) {
          typeof h == "number" && (h += "");
          if (!h) continue;
          if (typeof h == "string")
            if (!br.test(h)) h = b.createTextNode(h);
            else {
              (s = s || bk(b)),
                (l = b.createElement("div")),
                s.appendChild(l),
                (h = h.replace(bo, "<$1></$2>")),
                (i = (bp.exec(h) || ["", ""])[1].toLowerCase()),
                (j = bz[i] || bz._default),
                (k = j[0]),
                (l.innerHTML = j[1] + h + j[2]);
              while (k--) l = l.lastChild;
              if (!p.support.tbody) {
                (m = bq.test(h)),
                  (n =
                    i === "table" && !m
                      ? l.firstChild && l.firstChild.childNodes
                      : j[1] === "<table>" && !m
                        ? l.childNodes
                        : []);
                for (g = n.length - 1; g >= 0; --g)
                  p.nodeName(n[g], "tbody") &&
                    !n[g].childNodes.length &&
                    n[g].parentNode.removeChild(n[g]);
              }
              !p.support.leadingWhitespace &&
                bn.test(h) &&
                l.insertBefore(b.createTextNode(bn.exec(h)[0]), l.firstChild),
                (h = l.childNodes),
                l.parentNode.removeChild(l);
            }
          h.nodeType ? t.push(h) : p.merge(t, h);
        }
        l && (h = l = s = null);
        if (!p.support.appendChecked)
          for (f = 0; (h = t[f]) != null; f++)
            p.nodeName(h, "input")
              ? bG(h)
              : typeof h.getElementsByTagName != "undefined" &&
                p.grep(h.getElementsByTagName("input"), bG);
        if (c) {
          q = function (a) {
            if (!a.type || bx.test(a.type))
              return d
                ? d.push(a.parentNode ? a.parentNode.removeChild(a) : a)
                : c.appendChild(a);
          };
          for (f = 0; (h = t[f]) != null; f++)
            if (!p.nodeName(h, "script") || !q(h))
              c.appendChild(h),
                typeof h.getElementsByTagName != "undefined" &&
                  ((r = p.grep(
                    p.merge([], h.getElementsByTagName("script")),
                    q,
                  )),
                  t.splice.apply(t, [f + 1, 0].concat(r)),
                  (f += r.length));
        }
        return t;
      },
      cleanData: function (a, b) {
        var c,
          d,
          e,
          f,
          g = 0,
          h = p.expando,
          i = p.cache,
          j = p.support.deleteExpando,
          k = p.event.special;
        for (; (e = a[g]) != null; g++)
          if (b || p.acceptData(e)) {
            (d = e[h]), (c = d && i[d]);
            if (c) {
              if (c.events)
                for (f in c.events)
                  k[f] ? p.event.remove(e, f) : p.removeEvent(e, f, c.handle);
              i[d] &&
                (delete i[d],
                j
                  ? delete e[h]
                  : e.removeAttribute
                    ? e.removeAttribute(h)
                    : (e[h] = null),
                p.deletedIds.push(d));
            }
          }
      },
    }),
    (function () {
      var a, b;
      (p.uaMatch = function (a) {
        a = a.toLowerCase();
        var b =
          /(chrome)[ \/]([\w.]+)/.exec(a) ||
          /(webkit)[ \/]([\w.]+)/.exec(a) ||
          /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(a) ||
          /(msie) ([\w.]+)/.exec(a) ||
          (a.indexOf("compatible") < 0 &&
            /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(a)) ||
          [];
        return { browser: b[1] || "", version: b[2] || "0" };
      }),
        (a = p.uaMatch(g.userAgent)),
        (b = {}),
        a.browser && ((b[a.browser] = !0), (b.version = a.version)),
        b.chrome ? (b.webkit = !0) : b.webkit && (b.safari = !0),
        (p.browser = b),
        (p.sub = function () {
          function a(b, c) {
            return new a.fn.init(b, c);
          }
          p.extend(!0, a, this),
            (a.superclass = this),
            (a.fn = a.prototype = this()),
            (a.fn.constructor = a),
            (a.sub = this.sub),
            (a.fn.init = function c(c, d) {
              return (
                d && d instanceof p && !(d instanceof a) && (d = a(d)),
                p.fn.init.call(this, c, d, b)
              );
            }),
            (a.fn.init.prototype = a.fn);
          var b = a(e);
          return a;
        });
    })();
  var bH,
    bI,
    bJ,
    bK = /alpha\([^)]*\)/i,
    bL = /opacity=([^)]*)/,
    bM = /^(top|right|bottom|left)$/,
    bN = /^(none|table(?!-c[ea]).+)/,
    bO = /^margin/,
    bP = new RegExp("^(" + q + ")(.*)$", "i"),
    bQ = new RegExp("^(" + q + ")(?!px)[a-z%]+$", "i"),
    bR = new RegExp("^([-+])=(" + q + ")", "i"),
    bS = {},
    bT = { position: "absolute", visibility: "hidden", display: "block" },
    bU = { letterSpacing: 0, fontWeight: 400 },
    bV = ["Top", "Right", "Bottom", "Left"],
    bW = ["Webkit", "O", "Moz", "ms"],
    bX = p.fn.toggle;
  p.fn.extend({
    css: function (a, c) {
      return p.access(
        this,
        function (a, c, d) {
          return d !== b ? p.style(a, c, d) : p.css(a, c);
        },
        a,
        c,
        arguments.length > 1,
      );
    },
    show: function () {
      return b$(this, !0);
    },
    hide: function () {
      return b$(this);
    },
    toggle: function (a, b) {
      var c = typeof a == "boolean";
      return p.isFunction(a) && p.isFunction(b)
        ? bX.apply(this, arguments)
        : this.each(function () {
            (c ? a : bZ(this)) ? p(this).show() : p(this).hide();
          });
    },
  }),
    p.extend({
      cssHooks: {
        opacity: {
          get: function (a, b) {
            if (b) {
              var c = bH(a, "opacity");
              return c === "" ? "1" : c;
            }
          },
        },
      },
      cssNumber: {
        fillOpacity: !0,
        fontWeight: !0,
        lineHeight: !0,
        opacity: !0,
        orphans: !0,
        widows: !0,
        zIndex: !0,
        zoom: !0,
      },
      cssProps: { float: p.support.cssFloat ? "cssFloat" : "styleFloat" },
      style: function (a, c, d, e) {
        if (!a || a.nodeType === 3 || a.nodeType === 8 || !a.style) return;
        var f,
          g,
          h,
          i = p.camelCase(c),
          j = a.style;
        (c = p.cssProps[i] || (p.cssProps[i] = bY(j, i))),
          (h = p.cssHooks[c] || p.cssHooks[i]);
        if (d === b)
          return h && "get" in h && (f = h.get(a, !1, e)) !== b ? f : j[c];
        (g = typeof d),
          g === "string" &&
            (f = bR.exec(d)) &&
            ((d = (f[1] + 1) * f[2] + parseFloat(p.css(a, c))), (g = "number"));
        if (d == null || (g === "number" && isNaN(d))) return;
        g === "number" && !p.cssNumber[i] && (d += "px");
        if (!h || !("set" in h) || (d = h.set(a, d, e)) !== b)
          try {
            j[c] = d;
          } catch (k) {}
      },
      css: function (a, c, d, e) {
        var f,
          g,
          h,
          i = p.camelCase(c);
        return (
          (c = p.cssProps[i] || (p.cssProps[i] = bY(a.style, i))),
          (h = p.cssHooks[c] || p.cssHooks[i]),
          h && "get" in h && (f = h.get(a, !0, e)),
          f === b && (f = bH(a, c)),
          f === "normal" && c in bU && (f = bU[c]),
          d || e !== b
            ? ((g = parseFloat(f)), d || p.isNumeric(g) ? g || 0 : f)
            : f
        );
      },
      swap: function (a, b, c) {
        var d,
          e,
          f = {};
        for (e in b) (f[e] = a.style[e]), (a.style[e] = b[e]);
        d = c.call(a);
        for (e in b) a.style[e] = f[e];
        return d;
      },
    }),
    a.getComputedStyle
      ? (bH = function (b, c) {
          var d,
            e,
            f,
            g,
            h = a.getComputedStyle(b, null),
            i = b.style;
          return (
            h &&
              ((d = h[c]),
              d === "" &&
                !p.contains(b.ownerDocument, b) &&
                (d = p.style(b, c)),
              bQ.test(d) &&
                bO.test(c) &&
                ((e = i.width),
                (f = i.minWidth),
                (g = i.maxWidth),
                (i.minWidth = i.maxWidth = i.width = d),
                (d = h.width),
                (i.width = e),
                (i.minWidth = f),
                (i.maxWidth = g))),
            d
          );
        })
      : e.documentElement.currentStyle &&
        (bH = function (a, b) {
          var c,
            d,
            e = a.currentStyle && a.currentStyle[b],
            f = a.style;
          return (
            e == null && f && f[b] && (e = f[b]),
            bQ.test(e) &&
              !bM.test(b) &&
              ((c = f.left),
              (d = a.runtimeStyle && a.runtimeStyle.left),
              d && (a.runtimeStyle.left = a.currentStyle.left),
              (f.left = b === "fontSize" ? "1em" : e),
              (e = f.pixelLeft + "px"),
              (f.left = c),
              d && (a.runtimeStyle.left = d)),
            e === "" ? "auto" : e
          );
        }),
    p.each(["height", "width"], function (a, b) {
      p.cssHooks[b] = {
        get: function (a, c, d) {
          if (c)
            return a.offsetWidth === 0 && bN.test(bH(a, "display"))
              ? p.swap(a, bT, function () {
                  return cb(a, b, d);
                })
              : cb(a, b, d);
        },
        set: function (a, c, d) {
          return b_(
            a,
            c,
            d
              ? ca(
                  a,
                  b,
                  d,
                  p.support.boxSizing && p.css(a, "boxSizing") === "border-box",
                )
              : 0,
          );
        },
      };
    }),
    p.support.opacity ||
      (p.cssHooks.opacity = {
        get: function (a, b) {
          return bL.test(
            (b && a.currentStyle ? a.currentStyle.filter : a.style.filter) ||
              "",
          )
            ? 0.01 * parseFloat(RegExp.$1) + ""
            : b
              ? "1"
              : "";
        },
        set: function (a, b) {
          var c = a.style,
            d = a.currentStyle,
            e = p.isNumeric(b) ? "alpha(opacity=" + b * 100 + ")" : "",
            f = (d && d.filter) || c.filter || "";
          c.zoom = 1;
          if (b >= 1 && p.trim(f.replace(bK, "")) === "" && c.removeAttribute) {
            c.removeAttribute("filter");
            if (d && !d.filter) return;
          }
          c.filter = bK.test(f) ? f.replace(bK, e) : f + " " + e;
        },
      }),
    p(function () {
      p.support.reliableMarginRight ||
        (p.cssHooks.marginRight = {
          get: function (a, b) {
            return p.swap(a, { display: "inline-block" }, function () {
              if (b) return bH(a, "marginRight");
            });
          },
        }),
        !p.support.pixelPosition &&
          p.fn.position &&
          p.each(["top", "left"], function (a, b) {
            p.cssHooks[b] = {
              get: function (a, c) {
                if (c) {
                  var d = bH(a, b);
                  return bQ.test(d) ? p(a).position()[b] + "px" : d;
                }
              },
            };
          });
    }),
    p.expr &&
      p.expr.filters &&
      ((p.expr.filters.hidden = function (a) {
        return (
          (a.offsetWidth === 0 && a.offsetHeight === 0) ||
          (!p.support.reliableHiddenOffsets &&
            ((a.style && a.style.display) || bH(a, "display")) === "none")
        );
      }),
      (p.expr.filters.visible = function (a) {
        return !p.expr.filters.hidden(a);
      })),
    p.each({ margin: "", padding: "", border: "Width" }, function (a, b) {
      (p.cssHooks[a + b] = {
        expand: function (c) {
          var d,
            e = typeof c == "string" ? c.split(" ") : [c],
            f = {};
          for (d = 0; d < 4; d++) f[a + bV[d] + b] = e[d] || e[d - 2] || e[0];
          return f;
        },
      }),
        bO.test(a) || (p.cssHooks[a + b].set = b_);
    });
  var cd = /%20/g,
    ce = /\[\]$/,
    cf = /\r?\n/g,
    cg =
      /^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,
    ch = /^(?:select|textarea)/i;
  p.fn.extend({
    serialize: function () {
      return p.param(this.serializeArray());
    },
    serializeArray: function () {
      return this.map(function () {
        return this.elements ? p.makeArray(this.elements) : this;
      })
        .filter(function () {
          return (
            this.name &&
            !this.disabled &&
            (this.checked || ch.test(this.nodeName) || cg.test(this.type))
          );
        })
        .map(function (a, b) {
          var c = p(this).val();
          return c == null
            ? null
            : p.isArray(c)
              ? p.map(c, function (a, c) {
                  return { name: b.name, value: a.replace(cf, "\r\n") };
                })
              : { name: b.name, value: c.replace(cf, "\r\n") };
        })
        .get();
    },
  }),
    (p.param = function (a, c) {
      var d,
        e = [],
        f = function (a, b) {
          (b = p.isFunction(b) ? b() : b == null ? "" : b),
            (e[e.length] = encodeURIComponent(a) + "=" + encodeURIComponent(b));
        };
      c === b && (c = p.ajaxSettings && p.ajaxSettings.traditional);
      if (p.isArray(a) || (a.jquery && !p.isPlainObject(a)))
        p.each(a, function () {
          f(this.name, this.value);
        });
      else for (d in a) ci(d, a[d], c, f);
      return e.join("&").replace(cd, "+");
    });
  var cj,
    ck,
    cl = /#.*$/,
    cm = /^(.*?):[ \t]*([^\r\n]*)\r?$/gm,
    cn = /^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,
    co = /^(?:GET|HEAD)$/,
    cp = /^\/\//,
    cq = /\?/,
    cr = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,
    cs = /([?&])_=[^&]*/,
    ct = /^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+)|)|)/,
    cu = p.fn.load,
    cv = {},
    cw = {},
    cx = ["*/"] + ["*"];
  try {
    cj = f.href;
  } catch (cy) {
    (cj = e.createElement("a")), (cj.href = ""), (cj = cj.href);
  }
  (ck = ct.exec(cj.toLowerCase()) || []),
    (p.fn.load = function (a, c, d) {
      if (typeof a != "string" && cu) return cu.apply(this, arguments);
      if (!this.length) return this;
      var e,
        f,
        g,
        h = this,
        i = a.indexOf(" ");
      return (
        i >= 0 && ((e = a.slice(i, a.length)), (a = a.slice(0, i))),
        p.isFunction(c)
          ? ((d = c), (c = b))
          : c && typeof c == "object" && (f = "POST"),
        p
          .ajax({
            url: a,
            type: f,
            dataType: "html",
            data: c,
            complete: function (a, b) {
              d && h.each(d, g || [a.responseText, b, a]);
            },
          })
          .done(function (a) {
            (g = arguments),
              h.html(e ? p("<div>").append(a.replace(cr, "")).find(e) : a);
          }),
        this
      );
    }),
    p.each(
      "ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(
        " ",
      ),
      function (a, b) {
        p.fn[b] = function (a) {
          return this.on(b, a);
        };
      },
    ),
    p.each(["get", "post"], function (a, c) {
      p[c] = function (a, d, e, f) {
        return (
          p.isFunction(d) && ((f = f || e), (e = d), (d = b)),
          p.ajax({ type: c, url: a, data: d, success: e, dataType: f })
        );
      };
    }),
    p.extend({
      getScript: function (a, c) {
        return p.get(a, b, c, "script");
      },
      getJSON: function (a, b, c) {
        return p.get(a, b, c, "json");
      },
      ajaxSetup: function (a, b) {
        return (
          b ? cB(a, p.ajaxSettings) : ((b = a), (a = p.ajaxSettings)),
          cB(a, b),
          a
        );
      },
      ajaxSettings: {
        url: cj,
        isLocal: cn.test(ck[1]),
        global: !0,
        type: "GET",
        contentType: "application/x-www-form-urlencoded; charset=UTF-8",
        processData: !0,
        async: !0,
        accepts: {
          xml: "application/xml, text/xml",
          html: "text/html",
          text: "text/plain",
          json: "application/json, text/javascript",
          "*": cx,
        },
        contents: { xml: /xml/, html: /html/, json: /json/ },
        responseFields: { xml: "responseXML", text: "responseText" },
        converters: {
          "* text": a.String,
          "text html": !0,
          "text json": p.parseJSON,
          "text xml": p.parseXML,
        },
        flatOptions: { context: !0, url: !0 },
      },
      ajaxPrefilter: cz(cv),
      ajaxTransport: cz(cw),
      ajax: function (a, c) {
        function y(a, c, f, i) {
          var k,
            s,
            t,
            u,
            w,
            y = c;
          if (v === 2) return;
          (v = 2),
            h && clearTimeout(h),
            (g = b),
            (e = i || ""),
            (x.readyState = a > 0 ? 4 : 0),
            f && (u = cC(l, x, f));
          if ((a >= 200 && a < 300) || a === 304)
            l.ifModified &&
              ((w = x.getResponseHeader("Last-Modified")),
              w && (p.lastModified[d] = w),
              (w = x.getResponseHeader("Etag")),
              w && (p.etag[d] = w)),
              a === 304
                ? ((y = "notmodified"), (k = !0))
                : ((k = cD(l, u)),
                  (y = k.state),
                  (s = k.data),
                  (t = k.error),
                  (k = !t));
          else {
            t = y;
            if (!y || a) (y = "error"), a < 0 && (a = 0);
          }
          (x.status = a),
            (x.statusText = "" + (c || y)),
            k ? o.resolveWith(m, [s, y, x]) : o.rejectWith(m, [x, y, t]),
            x.statusCode(r),
            (r = b),
            j &&
              n.trigger("ajax" + (k ? "Success" : "Error"), [x, l, k ? s : t]),
            q.fireWith(m, [x, y]),
            j &&
              (n.trigger("ajaxComplete", [x, l]),
              --p.active || p.event.trigger("ajaxStop"));
        }
        typeof a == "object" && ((c = a), (a = b)), (c = c || {});
        var d,
          e,
          f,
          g,
          h,
          i,
          j,
          k,
          l = p.ajaxSetup({}, c),
          m = l.context || l,
          n = m !== l && (m.nodeType || m instanceof p) ? p(m) : p.event,
          o = p.Deferred(),
          q = p.Callbacks("once memory"),
          r = l.statusCode || {},
          t = {},
          u = {},
          v = 0,
          w = "canceled",
          x = {
            readyState: 0,
            setRequestHeader: function (a, b) {
              if (!v) {
                var c = a.toLowerCase();
                (a = u[c] = u[c] || a), (t[a] = b);
              }
              return this;
            },
            getAllResponseHeaders: function () {
              return v === 2 ? e : null;
            },
            getResponseHeader: function (a) {
              var c;
              if (v === 2) {
                if (!f) {
                  f = {};
                  while ((c = cm.exec(e))) f[c[1].toLowerCase()] = c[2];
                }
                c = f[a.toLowerCase()];
              }
              return c === b ? null : c;
            },
            overrideMimeType: function (a) {
              return v || (l.mimeType = a), this;
            },
            abort: function (a) {
              return (a = a || w), g && g.abort(a), y(0, a), this;
            },
          };
        o.promise(x),
          (x.success = x.done),
          (x.error = x.fail),
          (x.complete = q.add),
          (x.statusCode = function (a) {
            if (a) {
              var b;
              if (v < 2) for (b in a) r[b] = [r[b], a[b]];
              else (b = a[x.status]), x.always(b);
            }
            return this;
          }),
          (l.url = ((a || l.url) + "")
            .replace(cl, "")
            .replace(cp, ck[1] + "//")),
          (l.dataTypes = p
            .trim(l.dataType || "*")
            .toLowerCase()
            .split(s)),
          l.crossDomain == null &&
            ((i = ct.exec(l.url.toLowerCase())),
            (l.crossDomain = !(
              !i ||
              (i[1] == ck[1] &&
                i[2] == ck[2] &&
                (i[3] || (i[1] === "http:" ? 80 : 443)) ==
                  (ck[3] || (ck[1] === "http:" ? 80 : 443)))
            ))),
          l.data &&
            l.processData &&
            typeof l.data != "string" &&
            (l.data = p.param(l.data, l.traditional)),
          cA(cv, l, c, x);
        if (v === 2) return x;
        (j = l.global),
          (l.type = l.type.toUpperCase()),
          (l.hasContent = !co.test(l.type)),
          j && p.active++ === 0 && p.event.trigger("ajaxStart");
        if (!l.hasContent) {
          l.data &&
            ((l.url += (cq.test(l.url) ? "&" : "?") + l.data), delete l.data),
            (d = l.url);
          if (l.cache === !1) {
            var z = p.now(),
              A = l.url.replace(cs, "$1_=" + z);
            l.url =
              A + (A === l.url ? (cq.test(l.url) ? "&" : "?") + "_=" + z : "");
          }
        }
        ((l.data && l.hasContent && l.contentType !== !1) || c.contentType) &&
          x.setRequestHeader("Content-Type", l.contentType),
          l.ifModified &&
            ((d = d || l.url),
            p.lastModified[d] &&
              x.setRequestHeader("If-Modified-Since", p.lastModified[d]),
            p.etag[d] && x.setRequestHeader("If-None-Match", p.etag[d])),
          x.setRequestHeader(
            "Accept",
            l.dataTypes[0] && l.accepts[l.dataTypes[0]]
              ? l.accepts[l.dataTypes[0]] +
                  (l.dataTypes[0] !== "*" ? ", " + cx + "; q=0.01" : "")
              : l.accepts["*"],
          );
        for (k in l.headers) x.setRequestHeader(k, l.headers[k]);
        if (!l.beforeSend || (l.beforeSend.call(m, x, l) !== !1 && v !== 2)) {
          w = "abort";
          for (k in { success: 1, error: 1, complete: 1 }) x[k](l[k]);
          g = cA(cw, l, c, x);
          if (!g) y(-1, "No Transport");
          else {
            (x.readyState = 1),
              j && n.trigger("ajaxSend", [x, l]),
              l.async &&
                l.timeout > 0 &&
                (h = setTimeout(function () {
                  x.abort("timeout");
                }, l.timeout));
            try {
              (v = 1), g.send(t, y);
            } catch (B) {
              if (v < 2) y(-1, B);
              else throw B;
            }
          }
          return x;
        }
        return x.abort();
      },
      active: 0,
      lastModified: {},
      etag: {},
    });
  var cE = [],
    cF = /\?/,
    cG = /(=)\?(?=&|$)|\?\?/,
    cH = p.now();
  p.ajaxSetup({
    jsonp: "callback",
    jsonpCallback: function () {
      var a = cE.pop() || p.expando + "_" + cH++;
      return (this[a] = !0), a;
    },
  }),
    p.ajaxPrefilter("json jsonp", function (c, d, e) {
      var f,
        g,
        h,
        i = c.data,
        j = c.url,
        k = c.jsonp !== !1,
        l = k && cG.test(j),
        m =
          k &&
          !l &&
          typeof i == "string" &&
          !(c.contentType || "").indexOf("application/x-www-form-urlencoded") &&
          cG.test(i);
      if (c.dataTypes[0] === "jsonp" || l || m)
        return (
          (f = c.jsonpCallback =
            p.isFunction(c.jsonpCallback)
              ? c.jsonpCallback()
              : c.jsonpCallback),
          (g = a[f]),
          l
            ? (c.url = j.replace(cG, "$1" + f))
            : m
              ? (c.data = i.replace(cG, "$1" + f))
              : k && (c.url += (cF.test(j) ? "&" : "?") + c.jsonp + "=" + f),
          (c.converters["script json"] = function () {
            return h || p.error(f + " was not called"), h[0];
          }),
          (c.dataTypes[0] = "json"),
          (a[f] = function () {
            h = arguments;
          }),
          e.always(function () {
            (a[f] = g),
              c[f] && ((c.jsonpCallback = d.jsonpCallback), cE.push(f)),
              h && p.isFunction(g) && g(h[0]),
              (h = g = b);
          }),
          "script"
        );
    }),
    p.ajaxSetup({
      accepts: {
        script:
          "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript",
      },
      contents: { script: /javascript|ecmascript/ },
      converters: {
        "text script": function (a) {
          return p.globalEval(a), a;
        },
      },
    }),
    p.ajaxPrefilter("script", function (a) {
      a.cache === b && (a.cache = !1),
        a.crossDomain && ((a.type = "GET"), (a.global = !1));
    }),
    p.ajaxTransport("script", function (a) {
      if (a.crossDomain) {
        var c,
          d = e.head || e.getElementsByTagName("head")[0] || e.documentElement;
        return {
          send: function (f, g) {
            (c = e.createElement("script")),
              (c.async = "async"),
              a.scriptCharset && (c.charset = a.scriptCharset),
              (c.src = a.url),
              (c.onload = c.onreadystatechange =
                function (a, e) {
                  if (
                    e ||
                    !c.readyState ||
                    /loaded|complete/.test(c.readyState)
                  )
                    (c.onload = c.onreadystatechange = null),
                      d && c.parentNode && d.removeChild(c),
                      (c = b),
                      e || g(200, "success");
                }),
              d.insertBefore(c, d.firstChild);
          },
          abort: function () {
            c && c.onload(0, 1);
          },
        };
      }
    });
  var cI,
    cJ = a.ActiveXObject
      ? function () {
          for (var a in cI) cI[a](0, 1);
        }
      : !1,
    cK = 0;
  (p.ajaxSettings.xhr = a.ActiveXObject
    ? function () {
        return (!this.isLocal && cL()) || cM();
      }
    : cL),
    (function (a) {
      p.extend(p.support, { ajax: !!a, cors: !!a && "withCredentials" in a });
    })(p.ajaxSettings.xhr()),
    p.support.ajax &&
      p.ajaxTransport(function (c) {
        if (!c.crossDomain || p.support.cors) {
          var d;
          return {
            send: function (e, f) {
              var g,
                h,
                i = c.xhr();
              c.username
                ? i.open(c.type, c.url, c.async, c.username, c.password)
                : i.open(c.type, c.url, c.async);
              if (c.xhrFields) for (h in c.xhrFields) i[h] = c.xhrFields[h];
              c.mimeType &&
                i.overrideMimeType &&
                i.overrideMimeType(c.mimeType),
                !c.crossDomain &&
                  !e["X-Requested-With"] &&
                  (e["X-Requested-With"] = "XMLHttpRequest");
              try {
                for (h in e) i.setRequestHeader(h, e[h]);
              } catch (j) {}
              i.send((c.hasContent && c.data) || null),
                (d = function (a, e) {
                  var h, j, k, l, m;
                  try {
                    if (d && (e || i.readyState === 4)) {
                      (d = b),
                        g &&
                          ((i.onreadystatechange = p.noop), cJ && delete cI[g]);
                      if (e) i.readyState !== 4 && i.abort();
                      else {
                        (h = i.status),
                          (k = i.getAllResponseHeaders()),
                          (l = {}),
                          (m = i.responseXML),
                          m && m.documentElement && (l.xml = m);
                        try {
                          l.text = i.responseText;
                        } catch (a) {}
                        try {
                          j = i.statusText;
                        } catch (n) {
                          j = "";
                        }
                        !h && c.isLocal && !c.crossDomain
                          ? (h = l.text ? 200 : 404)
                          : h === 1223 && (h = 204);
                      }
                    }
                  } catch (o) {
                    e || f(-1, o);
                  }
                  l && f(h, j, l, k);
                }),
                c.async
                  ? i.readyState === 4
                    ? setTimeout(d, 0)
                    : ((g = ++cK),
                      cJ && (cI || ((cI = {}), p(a).unload(cJ)), (cI[g] = d)),
                      (i.onreadystatechange = d))
                  : d();
            },
            abort: function () {
              d && d(0, 1);
            },
          };
        }
      });
  var cN,
    cO,
    cP = /^(?:toggle|show|hide)$/,
    cQ = new RegExp("^(?:([-+])=|)(" + q + ")([a-z%]*)$", "i"),
    cR = /queueHooks$/,
    cS = [cY],
    cT = {
      "*": [
        function (a, b) {
          var c,
            d,
            e,
            f = this.createTween(a, b),
            g = cQ.exec(b),
            h = f.cur(),
            i = +h || 0,
            j = 1;
          if (g) {
            (c = +g[2]), (d = g[3] || (p.cssNumber[a] ? "" : "px"));
            if (d !== "px" && i) {
              i = p.css(f.elem, a, !0) || c || 1;
              do
                (e = j = j || ".5"),
                  (i = i / j),
                  p.style(f.elem, a, i + d),
                  (j = f.cur() / h);
              while (j !== 1 && j !== e);
            }
            (f.unit = d),
              (f.start = i),
              (f.end = g[1] ? i + (g[1] + 1) * c : c);
          }
          return f;
        },
      ],
    };
  (p.Animation = p.extend(cW, {
    tweener: function (a, b) {
      p.isFunction(a) ? ((b = a), (a = ["*"])) : (a = a.split(" "));
      var c,
        d = 0,
        e = a.length;
      for (; d < e; d++) (c = a[d]), (cT[c] = cT[c] || []), cT[c].unshift(b);
    },
    prefilter: function (a, b) {
      b ? cS.unshift(a) : cS.push(a);
    },
  })),
    (p.Tween = cZ),
    (cZ.prototype = {
      constructor: cZ,
      init: function (a, b, c, d, e, f) {
        (this.elem = a),
          (this.prop = c),
          (this.easing = e || "swing"),
          (this.options = b),
          (this.start = this.now = this.cur()),
          (this.end = d),
          (this.unit = f || (p.cssNumber[c] ? "" : "px"));
      },
      cur: function () {
        var a = cZ.propHooks[this.prop];
        return a && a.get ? a.get(this) : cZ.propHooks._default.get(this);
      },
      run: function (a) {
        var b,
          c = cZ.propHooks[this.prop];
        return (
          this.options.duration
            ? (this.pos = b =
                p.easing[this.easing](
                  a,
                  this.options.duration * a,
                  0,
                  1,
                  this.options.duration,
                ))
            : (this.pos = b = a),
          (this.now = (this.end - this.start) * b + this.start),
          this.options.step &&
            this.options.step.call(this.elem, this.now, this),
          c && c.set ? c.set(this) : cZ.propHooks._default.set(this),
          this
        );
      },
    }),
    (cZ.prototype.init.prototype = cZ.prototype),
    (cZ.propHooks = {
      _default: {
        get: function (a) {
          var b;
          return a.elem[a.prop] == null ||
            (!!a.elem.style && a.elem.style[a.prop] != null)
            ? ((b = p.css(a.elem, a.prop, !1, "")), !b || b === "auto" ? 0 : b)
            : a.elem[a.prop];
        },
        set: function (a) {
          p.fx.step[a.prop]
            ? p.fx.step[a.prop](a)
            : a.elem.style &&
                (a.elem.style[p.cssProps[a.prop]] != null || p.cssHooks[a.prop])
              ? p.style(a.elem, a.prop, a.now + a.unit)
              : (a.elem[a.prop] = a.now);
        },
      },
    }),
    (cZ.propHooks.scrollTop = cZ.propHooks.scrollLeft =
      {
        set: function (a) {
          a.elem.nodeType && a.elem.parentNode && (a.elem[a.prop] = a.now);
        },
      }),
    p.each(["toggle", "show", "hide"], function (a, b) {
      var c = p.fn[b];
      p.fn[b] = function (d, e, f) {
        return d == null ||
          typeof d == "boolean" ||
          (!a && p.isFunction(d) && p.isFunction(e))
          ? c.apply(this, arguments)
          : this.animate(c$(b, !0), d, e, f);
      };
    }),
    p.fn.extend({
      fadeTo: function (a, b, c, d) {
        return this.filter(bZ)
          .css("opacity", 0)
          .show()
          .end()
          .animate({ opacity: b }, a, c, d);
      },
      animate: function (a, b, c, d) {
        var e = p.isEmptyObject(a),
          f = p.speed(b, c, d),
          g = function () {
            var b = cW(this, p.extend({}, a), f);
            e && b.stop(!0);
          };
        return e || f.queue === !1 ? this.each(g) : this.queue(f.queue, g);
      },
      stop: function (a, c, d) {
        var e = function (a) {
          var b = a.stop;
          delete a.stop, b(d);
        };
        return (
          typeof a != "string" && ((d = c), (c = a), (a = b)),
          c && a !== !1 && this.queue(a || "fx", []),
          this.each(function () {
            var b = !0,
              c = a != null && a + "queueHooks",
              f = p.timers,
              g = p._data(this);
            if (c) g[c] && g[c].stop && e(g[c]);
            else for (c in g) g[c] && g[c].stop && cR.test(c) && e(g[c]);
            for (c = f.length; c--; )
              f[c].elem === this &&
                (a == null || f[c].queue === a) &&
                (f[c].anim.stop(d), (b = !1), f.splice(c, 1));
            (b || !d) && p.dequeue(this, a);
          })
        );
      },
    }),
    p.each(
      {
        slideDown: c$("show"),
        slideUp: c$("hide"),
        slideToggle: c$("toggle"),
        fadeIn: { opacity: "show" },
        fadeOut: { opacity: "hide" },
        fadeToggle: { opacity: "toggle" },
      },
      function (a, b) {
        p.fn[a] = function (a, c, d) {
          return this.animate(b, a, c, d);
        };
      },
    ),
    (p.speed = function (a, b, c) {
      var d =
        a && typeof a == "object"
          ? p.extend({}, a)
          : {
              complete: c || (!c && b) || (p.isFunction(a) && a),
              duration: a,
              easing: (c && b) || (b && !p.isFunction(b) && b),
            };
      d.duration = p.fx.off
        ? 0
        : typeof d.duration == "number"
          ? d.duration
          : d.duration in p.fx.speeds
            ? p.fx.speeds[d.duration]
            : p.fx.speeds._default;
      if (d.queue == null || d.queue === !0) d.queue = "fx";
      return (
        (d.old = d.complete),
        (d.complete = function () {
          p.isFunction(d.old) && d.old.call(this),
            d.queue && p.dequeue(this, d.queue);
        }),
        d
      );
    }),
    (p.easing = {
      linear: function (a) {
        return a;
      },
      swing: function (a) {
        return 0.5 - Math.cos(a * Math.PI) / 2;
      },
    }),
    (p.timers = []),
    (p.fx = cZ.prototype.init),
    (p.fx.tick = function () {
      var a,
        b = p.timers,
        c = 0;
      for (; c < b.length; c++)
        (a = b[c]), !a() && b[c] === a && b.splice(c--, 1);
      b.length || p.fx.stop();
    }),
    (p.fx.timer = function (a) {
      a() &&
        p.timers.push(a) &&
        !cO &&
        (cO = setInterval(p.fx.tick, p.fx.interval));
    }),
    (p.fx.interval = 13),
    (p.fx.stop = function () {
      clearInterval(cO), (cO = null);
    }),
    (p.fx.speeds = { slow: 600, fast: 200, _default: 400 }),
    (p.fx.step = {}),
    p.expr &&
      p.expr.filters &&
      (p.expr.filters.animated = function (a) {
        return p.grep(p.timers, function (b) {
          return a === b.elem;
        }).length;
      });
  var c_ = /^(?:body|html)$/i;
  (p.fn.offset = function (a) {
    if (arguments.length)
      return a === b
        ? this
        : this.each(function (b) {
            p.offset.setOffset(this, a, b);
          });
    var c,
      d,
      e,
      f,
      g,
      h,
      i,
      j,
      k,
      l,
      m = this[0],
      n = m && m.ownerDocument;
    if (!n) return;
    return (e = n.body) === m
      ? p.offset.bodyOffset(m)
      : ((d = n.documentElement),
        p.contains(d, m)
          ? ((c = m.getBoundingClientRect()),
            (f = da(n)),
            (g = d.clientTop || e.clientTop || 0),
            (h = d.clientLeft || e.clientLeft || 0),
            (i = f.pageYOffset || d.scrollTop),
            (j = f.pageXOffset || d.scrollLeft),
            (k = c.top + i - g),
            (l = c.left + j - h),
            { top: k, left: l })
          : { top: 0, left: 0 });
  }),
    (p.offset = {
      bodyOffset: function (a) {
        var b = a.offsetTop,
          c = a.offsetLeft;
        return (
          p.support.doesNotIncludeMarginInBodyOffset &&
            ((b += parseFloat(p.css(a, "marginTop")) || 0),
            (c += parseFloat(p.css(a, "marginLeft")) || 0)),
          { top: b, left: c }
        );
      },
      setOffset: function (a, b, c) {
        var d = p.css(a, "position");
        d === "static" && (a.style.position = "relative");
        var e = p(a),
          f = e.offset(),
          g = p.css(a, "top"),
          h = p.css(a, "left"),
          i =
            (d === "absolute" || d === "fixed") &&
            p.inArray("auto", [g, h]) > -1,
          j = {},
          k = {},
          l,
          m;
        i
          ? ((k = e.position()), (l = k.top), (m = k.left))
          : ((l = parseFloat(g) || 0), (m = parseFloat(h) || 0)),
          p.isFunction(b) && (b = b.call(a, c, f)),
          b.top != null && (j.top = b.top - f.top + l),
          b.left != null && (j.left = b.left - f.left + m),
          "using" in b ? b.using.call(a, j) : e.css(j);
      },
    }),
    p.fn.extend({
      position: function () {
        if (!this[0]) return;
        var a = this[0],
          b = this.offsetParent(),
          c = this.offset(),
          d = c_.test(b[0].nodeName) ? { top: 0, left: 0 } : b.offset();
        return (
          (c.top -= parseFloat(p.css(a, "marginTop")) || 0),
          (c.left -= parseFloat(p.css(a, "marginLeft")) || 0),
          (d.top += parseFloat(p.css(b[0], "borderTopWidth")) || 0),
          (d.left += parseFloat(p.css(b[0], "borderLeftWidth")) || 0),
          { top: c.top - d.top, left: c.left - d.left }
        );
      },
      offsetParent: function () {
        return this.map(function () {
          var a = this.offsetParent || e.body;
          while (a && !c_.test(a.nodeName) && p.css(a, "position") === "static")
            a = a.offsetParent;
          return a || e.body;
        });
      },
    }),
    p.each(
      { scrollLeft: "pageXOffset", scrollTop: "pageYOffset" },
      function (a, c) {
        var d = /Y/.test(c);
        p.fn[a] = function (e) {
          return p.access(
            this,
            function (a, e, f) {
              var g = da(a);
              if (f === b)
                return g
                  ? c in g
                    ? g[c]
                    : g.document.documentElement[e]
                  : a[e];
              g
                ? g.scrollTo(
                    d ? p(g).scrollLeft() : f,
                    d ? f : p(g).scrollTop(),
                  )
                : (a[e] = f);
            },
            a,
            e,
            arguments.length,
            null,
          );
        };
      },
    ),
    p.each({ Height: "height", Width: "width" }, function (a, c) {
      p.each(
        { padding: "inner" + a, content: c, "": "outer" + a },
        function (d, e) {
          p.fn[e] = function (e, f) {
            var g = arguments.length && (d || typeof e != "boolean"),
              h = d || (e === !0 || f === !0 ? "margin" : "border");
            return p.access(
              this,
              function (c, d, e) {
                var f;
                return p.isWindow(c)
                  ? c.document.documentElement["client" + a]
                  : c.nodeType === 9
                    ? ((f = c.documentElement),
                      Math.max(
                        c.body["scroll" + a],
                        f["scroll" + a],
                        c.body["offset" + a],
                        f["offset" + a],
                        f["client" + a],
                      ))
                    : e === b
                      ? p.css(c, d, e, h)
                      : p.style(c, d, e, h);
              },
              c,
              g ? e : b,
              g,
              null,
            );
          };
        },
      );
    }),
    (a.jQuery = a.$ = p),
    typeof define == "function" &&
      define.amd &&
      define.amd.jQuery &&
      define("jquery", [], function () {
        return p;
      });
})(window);


================================================
FILE: backend/tests/integration/tests/pruning/website/js/portfolio/jquery.quicksand.js
================================================
/*

Quicksand 1.2.2

Reorder and filter items with a nice shuffling animation.

Copyright (c) 2010 Jacek Galanciak (razorjack.net) and agilope.com
Big thanks for Piotr Petrus (riddle.pl) for deep code review and wonderful docs & demos.

Dual licensed under the MIT and GPL version 2 licenses.
http://github.com/jquery/jquery/blob/master/MIT-LICENSE.txt
http://github.com/jquery/jquery/blob/master/GPL-LICENSE.txt

Project site: http://razorjack.net/quicksand
Github site: http://github.com/razorjack/quicksand

*/

(function ($) {
  $.fn.quicksand = function (collection, customOptions) {
    var options = {
      duration: 750,
      easing: "swing",
      attribute: "data-id", // attribute to recognize same items within source and dest
      adjustHeight: "auto", // 'dynamic' animates height during shuffling (slow), 'auto' adjusts it before or after the animation, false leaves height constant
      useScaling: true, // disable it if you're not using scaling effect or want to improve performance
      enhancement: function (c) {}, // Visual enhacement (eg. font replacement) function for cloned elements
      selector: "> *",
      dx: 0,
      dy: 0,
    };
    $.extend(options, customOptions);

    if ($.browser.msie || typeof $.fn.scale == "undefined") {
      // Got IE and want scaling effect? Kiss my ass.
      options.useScaling = false;
    }

    var callbackFunction;
    if (typeof arguments[1] == "function") {
      var callbackFunction = arguments[1];
    } else if (typeof (arguments[2] == "function")) {
      var callbackFunction = arguments[2];
    }

    return this.each(function (i) {
      var val;
      var animationQueue = []; // used to store all the animation params before starting the animation; solves initial animation slowdowns
      var $collection = $(collection).clone(); // destination (target) collection
      var $sourceParent = $(this); // source, the visible container of source collection
      var sourceHeight = $(this).css("height"); // used to keep height and document flow during the animation

      var destHeight;
      var adjustHeightOnCallback = false;

      var offset = $($sourceParent).offset(); // offset of visible container, used in animation calculations
      var offsets = []; // coordinates of every source collection item

      var $source = $(this).find(options.selector); // source collection items

      // Replace the collection and quit if IE6
      if ($.browser.msie && $.browser.version.substr(0, 1) < 7) {
        $sourceParent.html("").append($collection);
        return;
      }

      // Gets called when any animation is finished
      var postCallbackPerformed = 0; // prevents the function from being called more than one time
      var postCallback = function () {
        if (!postCallbackPerformed) {
          postCallbackPerformed = 1;

          // hack:
          // used to be: $sourceParent.html($dest.html()); // put target HTML into visible source container
          // but new webkit builds cause flickering when replacing the collections
          $toDelete = $sourceParent.find("> *");
          $sourceParent.prepend($dest.find("> *"));
          $toDelete.remove();

          if (adjustHeightOnCallback) {
            $sourceParent.css("height", destHeight);
          }
          options.enhancement($sourceParent); // Perform custom visual enhancements on a newly replaced collection
          if (typeof callbackFunction == "function") {
            callbackFunction.call(this);
          }
        }
      };

      // Position: relative situations
      var $correctionParent = $sourceParent.offsetParent();
      var correctionOffset = $correctionParent.offset();
      if ($correctionParent.css("position") == "relative") {
        if ($correctionParent.get(0).nodeName.toLowerCase() == "body") {
        } else {
          correctionOffset.top +=
            parseFloat($correctionParent.css("border-top-width")) || 0;
          correctionOffset.left +=
            parseFloat($correctionParent.css("border-left-width")) || 0;
        }
      } else {
        correctionOffset.top -=
          parseFloat($correctionParent.css("border-top-width")) || 0;
        correctionOffset.left -=
          parseFloat($correctionParent.css("border-left-width")) || 0;
        correctionOffset.top -=
          parseFloat($correctionParent.css("margin-top")) || 0;
        correctionOffset.left -=
          parseFloat($correctionParent.css("margin-left")) || 0;
      }

      // perform custom corrections from options (use when Quicksand fails to detect proper correction)
      if (isNaN(correctionOffset.left)) {
        correctionOffset.left = 0;
      }
      if (isNaN(correctionOffset.top)) {
        correctionOffset.top = 0;
      }

      correctionOffset.left -= options.dx;
      correctionOffset.top -= options.dy;

      // keeps nodes after source container, holding their position
      $sourceParent.css("height", $(this).height());

      // get positions of source collections
      $source.each(function (i) {
        offsets[i] = $(this).offset();
      });

      // stops previous animations on source container
      $(this).stop();
      var dx = 0;
      var dy = 0;
      $source.each(function (i) {
        $(this).stop(); // stop animation of collection items
        var rawObj = $(this).get(0);
        if (rawObj.style.position == "absolute") {
          dx = -options.dx;
          dy = -options.dy;
        } else {
          dx = options.dx;
          dy = options.dy;
        }

        rawObj.style.position = "absolute";
        rawObj.style.margin = "0";

        rawObj.style.top =
          offsets[i].top -
          parseFloat(rawObj.style.marginTop) -
          correctionOffset.top +
          dy +
          "px";
        rawObj.style.left =
          offsets[i].left -
          parseFloat(rawObj.style.marginLeft) -
          correctionOffset.left +
          dx +
          "px";
      });

      // create temporary container with destination collection
      var $dest = $($sourceParent).clone();
      var rawDest = $dest.get(0);
      rawDest.innerHTML = "";
      rawDest.setAttribute("id", "");
      rawDest.style.height = "auto";
      rawDest.style.width = $sourceParent.width() + "px";
      $dest.append($collection);
      // insert node into HTML
      // Note that the node is under visible source container in the exactly same position
      // The browser render all the items without showing them (opacity: 0.0)
      // No offset calculations are needed, the browser just extracts position from underlayered destination items
      // and sets animation to destination positions.
      $dest.insertBefore($sourceParent);
      $dest.css("opacity", 0.0);
      rawDest.style.zIndex = -1;

      rawDest.style.margin = "0";
      rawDest.style.position = "absolute";
      rawDest.style.top = offset.top - correctionOffset.top + "px";
      rawDest.style.left = offset.left - correctionOffset.left + "px";

      if (options.adjustHeight === "dynamic") {
        // If destination container has different height than source container
        // the height can be animated, adjusting it to destination height
        $sourceParent.animate(
          { height: $dest.height() },
          options.duration,
          options.easing,
        );
      } else if (options.adjustHeight === "auto") {
        destHeight = $dest.height();
        if (parseFloat(sourceHeight) < parseFloat(destHeight)) {
          // Adjust the height now so that the items don't move out of the container
          $sourceParent.css("height", destHeight);
        } else {
          //  Adjust later, on callback
          adjustHeightOnCallback = true;
        }
      }

      // Now it's time to do shuffling animation
      // First of all, we need to identify same elements within source and destination collections
      $source.each(function (i) {
        var destElement = [];
        if (typeof options.attribute == "function") {
          val = options.attribute($(this));
          $collection.each(function () {
            if (options.attribute(this) == val) {
              destElement = $(this);
              return false;
            }
          });
        } else {
          destElement = $collection.filter(
            "[" +
              options.attribute +
              "=" +
              $(this).attr(options.attribute) +
              "]",
          );
        }
        if (destElement.length) {
          // The item is both in source and destination collections
          // It it's under different position, let's move it
          if (!options.useScaling) {
            animationQueue.push({
              element: $(this),
              animation: {
                top: destElement.offset().top - correctionOffset.top,
                left: destElement.offset().left - correctionOffset.left,
                opacity: 1.0,
              },
            });
          } else {
            animationQueue.push({
              element: $(this),
              animation: {
                top: destElement.offset().top - correctionOffset.top,
                left: destElement.offset().left - correctionOffset.left,
                opacity: 1.0,
                scale: "1.0",
              },
            });
          }
        } else {
          // The item from source collection is not present in destination collections
          // Let's remove it
          if (!options.useScaling) {
            animationQueue.push({
              element: $(this),
              animation: { opacity: "0.0" },
            });
          } else {
            animationQueue.push({
              element: $(this),
              animation: { opacity: "0.0", scale: "0.0" },
            });
          }
        }
      });

      $collection.each(function (i) {
        // Grab all items from target collection not present in visible source collection

        var sourceElement = [];
        var destElement = [];
        if (typeof options.attribute == "function") {
          val = options.attribute($(this));
          $source.each(function () {
            if (options.attribute(this) == val) {
              sourceElement = $(this);
              return false;
            }
          });

          $collection.each(function () {
            if (options.attribute(this) == val) {
              destElement = $(this);
              return false;
            }
          });
        } else {
          sourceElement = $source.filter(
            "[" +
              options.attribute +
              "=" +
              $(this).attr(options.attribute) +
              "]",
          );
          destElement = $collection.filter(
            "[" +
              options.attribute +
              "=" +
              $(this).attr(options.attribute) +
              "]",
          );
        }

        var animationOptions;
        if (sourceElement.length === 0) {
          // No such element in source collection...
          if (!options.useScaling) {
            animationOptions = {
              opacity: "1.0",
            };
          } else {
            animationOptions = {
              opacity: "1.0",
              scale: "1.0",
            };
          }
          // Let's create it
          d = destElement.clone();
          var rawDestElement = d.get(0);
          rawDestElement.style.position = "absolute";
          rawDestElement.style.margin = "0";
          rawDestElement.style.top =
            destElement.offset().top - correctionOffset.top + "px";
          rawDestElement.style.left =
            destElement.offset().left - correctionOffset.left + "px";
          d.css("opacity", 0.0); // IE
          if (options.useScaling) {
            d.css("transform", "scale(0.0)");
          }
          d.appendTo($sourceParent);

          animationQueue.push({ element: $(d), animation: animationOptions });
        }
      });

      $dest.remove();
      options.enhancement($sourceParent); // Perform custom visual enhancements during the animation
      for (i = 0; i < animationQueue.length; i++) {
        animationQueue[i].element.animate(
          animationQueue[i].animation,
          options.duration,
          options.easing,
          postCallback,
        );
      }
    });
  };
})(jQuery);


================================================
FILE: backend/tests/integration/tests/pruning/website/js/portfolio/setting.js
================================================
jQuery(document).ready(function ($) {
  if (jQuery().quicksand) {
    // Clone applications to get a second collection
    var $data = $(".portfolio").clone();

    //NOTE: Only filter on the main portfolio page, not on the subcategory pages
    $(".filter li").click(function (e) {
      $(".filter li").removeClass("active");
      // Use the last category class as the category to filter by. This means that multiple categories are not supported (yet)
      var filterClass = $(this).attr("class").split(" ").slice(-1)[0];

      if (filterClass == "all") {
        var $filteredData = $data.find(".item-thumbs");
      } else {
        var $filteredData = $data.find(
          ".item-thumbs[data-type=" + filterClass + "]",
        );
      }
      $(".portfolio").quicksand(
        $filteredData,
        {
          duration: 600,
          adjustHeight: "auto",
        },
        function () {
          // Portfolio fancybox
          $(".fancybox").fancybox({
            padding: 0,
            beforeShow: function () {
              this.title = $(this.element).attr("title");
              this.title =
                "<h4>" +
                this.title +
                "</h4>" +
                "<p>" +
                $(this.element).parent().find("img").attr("alt") +
                "</p>";
            },
            helpers: {
              title: { type: "inside" },
            },
          });
        },
      );
      $(this).addClass("active");
      return false;
    });
  } //if quicksand
});


================================================
FILE: backend/tests/integration/tests/pruning/website/js/quicksand/jquery.quicksand.js
================================================
/*

Quicksand 1.2.2

Reorder and filter items with a nice shuffling animation.

Copyright (c) 2010 Jacek Galanciak (razorjack.net) and agilope.com
Big thanks for Piotr Petrus (riddle.pl) for deep code review and wonderful docs & demos.

Dual licensed under the MIT and GPL version 2 licenses.
http://github.com/jquery/jquery/blob/master/MIT-LICENSE.txt
http://github.com/jquery/jquery/blob/master/GPL-LICENSE.txt

Project site: http://razorjack.net/quicksand
Github site: http://github.com/razorjack/quicksand

*/

(function ($) {
  $.fn.quicksand = function (collection, customOptions) {
    var options = {
      duration: 750,
      easing: "swing",
      attribute: "data-id", // attribute to recognize same items within source and dest
      adjustHeight: "auto", // 'dynamic' animates height during shuffling (slow), 'auto' adjusts it before or after the animation, false leaves height constant
      useScaling: true, // disable it if you're not using scaling effect or want to improve performance
      enhancement: function (c) {}, // Visual enhacement (eg. font replacement) function for cloned elements
      selector: "> *",
      dx: 0,
      dy: 0,
    };
    $.extend(options, customOptions);

    if ($.browser.msie || typeof $.fn.scale == "undefined") {
      // Got IE and want scaling effect? Kiss my ass.
      options.useScaling = false;
    }

    var callbackFunction;
    if (typeof arguments[1] == "function") {
      var callbackFunction = arguments[1];
    } else if (typeof (arguments[2] == "function")) {
      var callbackFunction = arguments[2];
    }

    return this.each(function (i) {
      var val;
      var animationQueue = []; // used to store all the animation params before starting the animation; solves initial animation slowdowns
      var $collection = $(collection).clone(); // destination (target) collection
      var $sourceParent = $(this); // source, the visible container of source collection
      var sourceHeight = $(this).css("height"); // used to keep height and document flow during the animation

      var destHeight;
      var adjustHeightOnCallback = false;

      var offset = $($sourceParent).offset(); // offset of visible container, used in animation calculations
      var offsets = []; // coordinates of every source collection item

      var $source = $(this).find(options.selector); // source collection items

      // Replace the collection and quit if IE6
      if ($.browser.msie && $.browser.version.substr(0, 1) < 7) {
        $sourceParent.html("").append($collection);
        return;
      }

      // Gets called when any animation is finished
      var postCallbackPerformed = 0; // prevents the function from being called more than one time
      var postCallback = function () {
        if (!postCallbackPerformed) {
          postCallbackPerformed = 1;

          // hack:
          // used to be: $sourceParent.html($dest.html()); // put target HTML into visible source container
          // but new webkit builds cause flickering when replacing the collections
          $toDelete = $sourceParent.find("> *");
          $sourceParent.prepend($dest.find("> *"));
          $toDelete.remove();

          if (adjustHeightOnCallback) {
            $sourceParent.css("height", destHeight);
          }
          options.enhancement($sourceParent); // Perform custom visual enhancements on a newly replaced collection
          if (typeof callbackFunction == "function") {
            callbackFunction.call(this);
          }
        }
      };

      // Position: relative situations
      var $correctionParent = $sourceParent.offsetParent();
      var correctionOffset = $correctionParent.offset();
      if ($correctionParent.css("position") == "relative") {
        if ($correctionParent.get(0).nodeName.toLowerCase() == "body") {
        } else {
          correctionOffset.top +=
            parseFloat($correctionParent.css("border-top-width")) || 0;
          correctionOffset.left +=
            parseFloat($correctionParent.css("border-left-width")) || 0;
        }
      } else {
        correctionOffset.top -=
          parseFloat($correctionParent.css("border-top-width")) || 0;
        correctionOffset.left -=
          parseFloat($correctionParent.css("border-left-width")) || 0;
        correctionOffset.top -=
          parseFloat($correctionParent.css("margin-top")) || 0;
        correctionOffset.left -=
          parseFloat($correctionParent.css("margin-left")) || 0;
      }

      // perform custom corrections from options (use when Quicksand fails to detect proper correction)
      if (isNaN(correctionOffset.left)) {
        correctionOffset.left = 0;
      }
      if (isNaN(correctionOffset.top)) {
        correctionOffset.top = 0;
      }

      correctionOffset.left -= options.dx;
      correctionOffset.top -= options.dy;

      // keeps nodes after source container, holding their position
      $sourceParent.css("height", $(this).height());

      // get positions of source collections
      $source.each(function (i) {
        offsets[i] = $(this).offset();
      });

      // stops previous animations on source container
      $(this).stop();
      var dx = 0;
      var dy = 0;
      $source.each(function (i) {
        $(this).stop(); // stop animation of collection items
        var rawObj = $(this).get(0);
        if (rawObj.style.position == "absolute") {
          dx = -options.dx;
          dy = -options.dy;
        } else {
          dx = options.dx;
          dy = options.dy;
        }

        rawObj.style.position = "absolute";
        rawObj.style.margin = "0";

        rawObj.style.top =
          offsets[i].top -
          parseFloat(rawObj.style.marginTop) -
          correctionOffset.top +
          dy +
          "px";
        rawObj.style.left =
          offsets[i].left -
          parseFloat(rawObj.style.marginLeft) -
          correctionOffset.left +
          dx +
          "px";
      });

      // create temporary container with destination collection
      var $dest = $($sourceParent).clone();
      var rawDest = $dest.get(0);
      rawDest.innerHTML = "";
      rawDest.setAttribute("id", "");
      rawDest.style.height = "auto";
      rawDest.style.width = $sourceParent.width() + "px";
      $dest.append($collection);
      // insert node into HTML
      // Note that the node is under visible source container in the exactly same position
      // The browser render all the items without showing them (opacity: 0.0)
      // No offset calculations are needed, the browser just extracts position from underlayered destination items
      // and sets animation to destination positions.
      $dest.insertBefore($sourceParent);
      $dest.css("opacity", 0.0);
      rawDest.style.zIndex = -1;

      rawDest.style.margin = "0";
      rawDest.style.position = "absolute";
      rawDest.style.top = offset.top - correctionOffset.top + "px";
      rawDest.style.left = offset.left - correctionOffset.left + "px";

      if (options.adjustHeight === "dynamic") {
        // If destination container has different height than source container
        // the height can be animated, adjusting it to destination height
        $sourceParent.animate(
          { height: $dest.height() },
          options.duration,
          options.easing,
        );
      } else if (options.adjustHeight === "auto") {
        destHeight = $dest.height();
        if (parseFloat(sourceHeight) < parseFloat(destHeight)) {
          // Adjust the height now so that the items don't move out of the container
          $sourceParent.css("height", destHeight);
        } else {
          //  Adjust later, on callback
          adjustHeightOnCallback = true;
        }
      }

      // Now it's time to do shuffling animation
      // First of all, we need to identify same elements within source and destination collections
      $source.each(function (i) {
        var destElement = [];
        if (typeof options.attribute == "function") {
          val = options.attribute($(this));
          $collection.each(function () {
            if (options.attribute(this) == val) {
              destElement = $(this);
              return false;
            }
          });
        } else {
          destElement = $collection.filter(
            "[" +
              options.attribute +
              "=" +
              $(this).attr(options.attribute) +
              "]",
          );
        }
        if (destElement.length) {
          // The item is both in source and destination collections
          // It it's under different position, let's move it
          if (!options.useScaling) {
            animationQueue.push({
              element: $(this),
              animation: {
                top: destElement.offset().top - correctionOffset.top,
                left: destElement.offset().left - correctionOffset.left,
                opacity: 1.0,
              },
            });
          } else {
            animationQueue.push({
              element: $(this),
              animation: {
                top: destElement.offset().top - correctionOffset.top,
                left: destElement.offset().left - correctionOffset.left,
                opacity: 1.0,
                scale: "1.0",
              },
            });
          }
        } else {
          // The item from source collection is not present in destination collections
          // Let's remove it
          if (!options.useScaling) {
            animationQueue.push({
              element: $(this),
              animation: { opacity: "0.0" },
            });
          } else {
            animationQueue.push({
              element: $(this),
              animation: { opacity: "0.0", scale: "0.0" },
            });
          }
        }
      });

      $collection.each(function (i) {
        // Grab all items from target collection not present in visible source collection

        var sourceElement = [];
        var destElement = [];
        if (typeof options.attribute == "function") {
          val = options.attribute($(this));
          $source.each(function () {
            if (options.attribute(this) == val) {
              sourceElement = $(this);
              return false;
            }
          });

          $collection.each(function () {
            if (options.attribute(this) == val) {
              destElement = $(this);
              return false;
            }
          });
        } else {
          sourceElement = $source.filter(
            "[" +
              options.attribute +
              "=" +
              $(this).attr(options.attribute) +
              "]",
          );
          destElement = $collection.filter(
            "[" +
              options.attribute +
              "=" +
              $(this).attr(options.attribute) +
              "]",
          );
        }

        var animationOptions;
        if (sourceElement.length === 0) {
          // No such element in source collection...
          if (!options.useScaling) {
            animationOptions = {
              opacity: "1.0",
            };
          } else {
            animationOptions = {
              opacity: "1.0",
              scale: "1.0",
            };
          }
          // Let's create it
          d = destElement.clone();
          var rawDestElement = d.get(0);
          rawDestElement.style.position = "absolute";
          rawDestElement.style.margin = "0";
          rawDestElement.style.top =
            destElement.offset().top - correctionOffset.top + "px";
          rawDestElement.style.left =
            destElement.offset().left - correctionOffset.left + "px";
          d.css("opacity", 0.0); // IE
          if (options.useScaling) {
            d.css("transform", "scale(0.0)");
          }
          d.appendTo($sourceParent);

          animationQueue.push({ element: $(d), animation: animationOptions });
        }
      });

      $dest.remove();
      options.enhancement($sourceParent); // Perform custom visual enhancements during the animation
      for (i = 0; i < animationQueue.length; i++) {
        animationQueue[i].element.animate(
          animationQueue[i].animation,
          options.duration,
          options.easing,
          postCallback,
        );
      }
    });
  };
})(jQuery);


================================================
FILE: backend/tests/integration/tests/pruning/website/js/quicksand/setting.js
================================================
jQuery.noConflict();
jQuery(document).ready(function($){

if (jQuery().quicksand) {

 	// Clone applications to get a second collection
	var $data = $(".portfolio-area").clone();
	
	//NOTE: Only filter on the main portfolio page, not on the subcategory pages
	$('.portfolio-categ li').click(function(e) {
		$(".filter li").removeClass("active");	
		// Use the last category class as the category to filter by. This means that multiple categories are not supported (yet)
		var filterClass=$(this).attr('class').split(' ').slice(-1)[0];
		
		if (filterClass == 'all') {
			var $filteredData = $data.find('.item-thumbs');
		} else {
			var $filteredData = $data.find('.item-thumbs[data-type=' + filterClass + ']');
		}
		$(".portfolio-area").quicksand($filteredData, {
			duration: 600,
			adjustHeight: 'auto'
		}	
		$(this).addClass("active"); 			
		return false;
	});
	
}//if quicksand

});

================================================
FILE: backend/tests/integration/tests/pruning/website/js/validate.js
================================================
/*global jQuery:false */
jQuery(document).ready(function ($) {
  "use strict";

  //Contact
  $("form.validateform").submit(function () {
    var f = $(this).find(".field"),
      ferror = false,
      emailExp = /^[^\s()<>@,;:\/]+@\w[\w\.-]+\.[a-z]{2,}$/i;

    f.children("input").each(function () {
      // run all inputs

      var i = $(this); // current input
      var rule = i.attr("data-rule");

      if (rule != undefined) {
        var ierror = false; // error flag for current input
        var pos = rule.indexOf(":", 0);
        if (pos >= 0) {
          var exp = rule.substr(pos + 1, rule.length);
          rule = rule.substr(0, pos);
        } else {
          rule = rule.substr(pos + 1, rule.length);
        }

        switch (rule) {
          case "required":
            if (i.val() == "") {
              ferror = ierror = true;
            }
            break;

          case "maxlen":
            if (i.val().length < parseInt(exp)) {
              ferror = ierror = true;
            }
            break;

          case "email":
            if (!emailExp.test(i.val())) {
              ferror = ierror = true;
            }
            break;

          case "checked":
            if (!i.attr("checked")) {
              ferror = ierror = true;
            }
            break;

          case "regexp":
            exp = new RegExp(exp);
            if (!exp.test(i.val())) {
              ferror = ierror = true;
            }
            break;
        }
        i.next(".validation")
          .html(
            ierror
              ? i.attr("data-msg") != undefined
                ? i.attr("data-msg")
                : "wrong Input"
              : "",
          )
          .show("blind");
      }
    });
    f.children("textarea").each(function () {
      // run all inputs

      var i = $(this); // current input
      var rule = i.attr("data-rule");

      if (rule != undefined) {
        var ierror = false; // error flag for current input
        var pos = rule.indexOf(":", 0);
        if (pos >= 0) {
          var exp = rule.substr(pos + 1, rule.length);
          rule = rule.substr(0, pos);
        } else {
          rule = rule.substr(pos + 1, rule.length);
        }

        switch (rule) {
          case "required":
            if (i.val() == "") {
              ferror = ierror = true;
            }
            break;

          case "maxlen":
            if (i.val().length < parseInt(exp)) {
              ferror = ierror = true;
            }
            break;
        }
        i.next(".validation")
          .html(
            ierror
              ? i.attr("data-msg") != undefined
                ? i.attr("data-msg")
                : "wrong Input"
              : "",
          )
          .show("blind");
      }
    });
    if (ferror) return false;
    else var str = $(this).serialize();

    $.ajax({
      type: "POST",
      url: "contact/contact.php",
      data: str,
      success: function (msg) {
        $("#sendmessage").addClass("show");
        $("#errormessage").ajaxComplete(function (event, request, settings) {
          if (msg == "OK") {
            $("#sendmessage").addClass("show");
          } else {
            $("#sendmessage").removeClass("show");
            result = msg;
          }

          $(this).html(result);
        });
      },
    });
    return false;
  });
});


================================================
FILE: backend/tests/integration/tests/pruning/website/portfolio.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <title>Above Multi-purpose Free Bootstrap Responsive Template</title>
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta name="description" content="" />
    <meta name="author" content="http://webthemez.com" />
    <!-- css -->
    <link href="css/bootstrap.min.css" rel="stylesheet" />
    <link href="css/fancybox/jquery.fancybox.css" rel="stylesheet" />
    <link href="css/jcarousel.css" rel="stylesheet" />
    <link href="css/flexslider.css" rel="stylesheet" />
    <link href="css/style.css" rel="stylesheet" />

    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
    <!--[if lt IE 9]>
      <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
    <![endif]-->
  </head>
  <body>
    <div id="wrapper">
      <!-- start header -->
      <header>
        <div class="navbar navbar-default navbar-static-top">
          <div class="container">
            <div class="navbar-header">
              <button
                type="button"
                class="navbar-toggle"
                data-toggle="collapse"
                data-target=".navbar-collapse"
              >
                <span class="icon-bar"></span>
                <span class="icon-bar"></span>
                <span class="icon-bar"></span>
              </button>
              <a class="navbar-brand" href="index.html"
                ><img src="img/logo.png" alt="logo"
              /></a>
            </div>
            <div class="navbar-collapse collapse">
              <ul class="nav navbar-nav">
                <li><a href="index.html">Home</a></li>
                <li><a href="about.html">About Us</a></li>
                <li><a href="courses.html">Courses</a></li>
                <li class="active"><a href="portfolio.html">Portfolio</a></li>
                <li><a href="pricing.html">Pricing</a></li>
                <li><a href="contact.html">Contact</a></li>
              </ul>
            </div>
          </div>
        </div>
      </header>
      <!-- end header -->
      <section id="inner-headline">
        <div class="container">
          <div class="row">
            <div class="col-lg-12">
              <h2 class="pageTitle">Portfolio</h2>
            </div>
          </div>
        </div>
      </section>
      <section id="content">
        <div class="container">
          <div class="row">
            <div class="col-lg-12">
              <ul class="portfolio-categ filter">
                <li class="all active"><a href="#">All</a></li>
                <li class="web"><a href="#" title="">Web design</a></li>
                <li class="icon"><a href="#" title="">Mobile App</a></li>
                <li class="graphic"><a href="#" title="">UI design</a></li>
              </ul>
              <div class="clearfix"></div>
              <div class="row">
                <section id="projects">
                  <ul id="thumbs" class="portfolio">
                    <!-- Item Project and Filter Name -->
                    <li
                      class="item-thumbs col-lg-3 design"
                      data-id="id-0"
                      data-type="web"
                    >
                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->
                      <a
                        class="hover-wrap fancybox"
                        data-fancybox-group="gallery"
                        title="Portfolio name"
                        href="img/works/1.jpg"
                      >
                        <span class="overlay-img"></span>
                        <span class="overlay-img-thumb"
                          ><i class="icon-info-blocks fa fa-code"></i
                        ></span>
                      </a>
                      <!-- Thumb Image and Description -->
                      <img src="img/works/1.jpg" alt="" />
                    </li>
                    <!-- End Item Project -->
                    <!-- Item Project and Filter Name -->
                    <li
                      class="item-thumbs col-lg-3 design"
                      data-id="id-1"
                      data-type="icon"
                    >
                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->
                      <a
                        class="hover-wrap fancybox"
                        data-fancybox-group="gallery"
                        title="Portfolio name"
                        href="img/works/2.jpg"
                      >
                        <span class="overlay-img"></span>
                        <span class="overlay-img-thumb"
                          ><i class="icon-info-blocks fa fa-code"></i
                        ></span>
                      </a>
                      <!-- Thumb Image and Description -->
                      <img src="img/works/2.jpg" alt="" />
                    </li>
                    <!-- End Item Project -->
                    <!-- Item Project and Filter Name -->
                    <li
                      class="item-thumbs col-lg-3 photography"
                      data-id="id-2"
                      data-type="graphic"
                    >
                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->
                      <a
                        class="hover-wrap fancybox"
                        data-fancybox-group="gallery"
                        title="Portfolio name"
                        href="img/works/3.jpg"
                      >
                        <span class="overlay-img"></span>
                        <span class="overlay-img-thumb"
                          ><i class="icon-info-blocks fa fa-code"></i
                        ></span>
                      </a>
                      <!-- Thumb Image and Description -->
                      <img src="img/works/3.jpg" alt="" />
                    </li>
                    <!-- End Item Project -->
                    <!-- Item Project and Filter Name -->
                    <li
                      class="item-thumbs col-lg-3 design"
                      data-id="id-0"
                      data-type="web"
                    >
                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->
                      <a
                        class="hover-wrap fancybox"
                        data-fancybox-group="gallery"
                        title="Portfolio name"
                        href="img/works/4.jpg"
                      >
                        <span class="overlay-img"></span>
                        <span class="overlay-img-thumb"
                          ><i class="icon-info-blocks fa fa-code"></i
                        ></span>
                      </a>
                      <!-- Thumb Image and Description -->
                      <img src="img/works/4.jpg" alt="" />
                    </li>
                    <!-- End Item Project -->
                    <!-- Item Project and Filter Name -->
                    <li
                      class="item-thumbs col-lg-3 photography"
                      data-id="id-4"
                      data-type="web"
                    >
                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->
                      <a
                        class="hover-wrap fancybox"
                        data-fancybox-group="gallery"
                        title="Portfolio name"
                        href="img/works/5.jpg"
                      >
                        <span class="overlay-img"></span>
                        <span class="overlay-img-thumb"
                          ><i class="icon-info-blocks fa fa-code"></i
                        ></span>
                      </a>
                      <!-- Thumb Image and Description -->
                      <img src="img/works/5.jpg" alt="" />
                    </li>
                    <!-- End Item Project -->
                    <!-- Item Project and Filter Name -->
                    <li
                      class="item-thumbs col-lg-3 photography"
                      data-id="id-5"
                      data-type="icon"
                    >
                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->
                      <a
                        class="hover-wrap fancybox"
                        data-fancybox-group="gallery"
                        title="Portfolio name"
                        href="img/works/6.jpg"
                      >
                        <span class="overlay-img"></span>
                        <span class="overlay-img-thumb"
                          ><i class="icon-info-blocks fa fa-code"></i
                        ></span>
                      </a>
                      <!-- Thumb Image and Description -->
                      <img src="img/works/6.jpg" alt="" />
                    </li>
                    <!-- End Item Project -->
                    <li
                      class="item-thumbs col-lg-3 design"
                      data-id="id-0"
                      data-type="web"
                    >
                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->
                      <a
                        class="hover-wrap fancybox"
                        data-fancybox-group="gallery"
                        title="Portfolio name"
                        href="img/works/7.jpg"
                      >
                        <span class="overlay-img"></span>
                        <span class="overlay-img-thumb"
                          ><i class="icon-info-blocks fa fa-code"></i
                        ></span>
                      </a>
                      <!-- Thumb Image and Description -->
                      <img src="img/works/7.jpg" alt="" />
                    </li>
                    <!-- End Item Project -->
                    <!-- Item Project and Filter Name -->
                    <li
                      class="item-thumbs col-lg-3 design"
                      data-id="id-0"
                      data-type="graphic"
                    >
                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->
                      <a
                        class="hover-wrap fancybox"
                        data-fancybox-group="gallery"
                        title="Portfolio name"
                        href="img/works/8.jpg"
                      >
                        <span class="overlay-img"></span>
                        <span class="overlay-img-thumb"
                          ><i class="icon-info-blocks fa fa-code"></i
                        ></span>
                      </a>
                      <!-- Thumb Image and Description -->
                      <img src="img/works/8.jpg" alt="" />
                    </li>
                    <!-- End Item Project -->
                  </ul>
                </section>
              </div>
            </div>
          </div>
        </div>
      </section>
      <footer>
        <div class="container">
          <div class="row">
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Our Contact</h5>
                <address>
                  <strong>Abovecompany Inc</strong><br />
                  JC Main Road, Near Silnile tower<br />
                  Pin-21542 NewYork US.
                </address>
                <p>
                  <i class="icon-phone"></i> (123) 456-789 - 1255-12584 <br />
                  <i class="icon-envelope-alt"></i> email@domainname.com
                </p>
              </div>
            </div>
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Quick Links</h5>
                <ul class="link-list">
                  <li><a href="#">Latest Events</a></li>
                  <li><a href="#">Terms and conditions</a></li>
                  <li><a href="#">Privacy policy</a></li>
                  <li><a href="#">Career</a></li>
                  <li><a href="#">Contact us</a></li>
                </ul>
              </div>
            </div>
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Latest posts</h5>
                <ul class="link-list">
                  <li>
                    <a href="#"
                      >Lorem ipsum dolor sit amet, consectetur adipiscing
                      elit.</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Pellentesque et pulvinar enim. Quisque at tempor
                      ligula</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Natus error sit voluptatem accusantium doloremque</a
                    >
                  </li>
                </ul>
              </div>
            </div>
            <div class="col-lg-3">
              <div class="widget">
                <h5 class="widgetheading">Recent News</h5>
                <ul class="link-list">
                  <li>
                    <a href="#"
                      >Lorem ipsum dolor sit amet, consectetur adipiscing
                      elit.</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Pellentesque et pulvinar enim. Quisque at tempor
                      ligula</a
                    >
                  </li>
                  <li>
                    <a href="#"
                      >Natus error sit voluptatem accusantium doloremque</a
                    >
                  </li>
                </ul>
              </div>
            </div>
          </div>
        </div>
        <div id="sub-footer">
          <div class="container">
            <div class="row">
              <div class="col-lg-6">
                <div class="copyright">
                  <p>
                    <span
                      >&copy; Above Site All right reserved. Template By </span
                    ><a href="http://webthemez.com" target="_blank"
                      >WebThemez</a
                    >
                  </p>
                </div>
              </div>
              <div class="col-lg-6">
                <ul class="social-network">
                  <li>
                    <a href="#" data-placement="top" title="Facebook"
                      ><i class="fa fa-facebook"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Twitter"
                      ><i class="fa fa-twitter"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Linkedin"
                      ><i class="fa fa-linkedin"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Pinterest"
                      ><i class="fa fa-pinterest"></i
                    ></a>
                  </li>
                  <li>
                    <a href="#" data-placement="top" title="Google plus"
                      ><i class="fa fa-google-plus"></i
                    ></a>
                  </li>
                </ul>
              </div>
            </div>
          </div>
        </div>
      </footer>
    </div>
    <a href="#" class="scrollup"><i class="fa fa-angle-up active"></i></a>
    <!-- javascript
    ================================================== -->
    <!-- Placed at the end of the document so the pages load faster -->
    <script src="js/jquery.js"></script>
    <script src="js/jquery.easing.1.3.js"></script>
    <script src="js/bootstrap.min.js"></script>
    <script src="js/jquery.fancybox.pack.js"></script>
    <script src="js/jquery.fancybox-media.js"></script>
    <script src="js/portfolio/jquery.quicksand.js"></script>
    <script src="js/portfolio/setting.js"></script>
    <script src="js/jquery.flexslider.js"></script>
    <script src="js/animate.js"></script>
    <script src="js/custom.js"></script>
  </body>
</html>


================================================
FILE: backend/tests/integration/tests/pruning/website/pricing.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Above Multi-purpose Free Bootstrap Responsive Template</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="description" content="" />
<meta name="author" content="http://webthemez.com" />
<!-- css -->
<link href="css/bootstrap.min.css" rel="stylesheet" />
<link href="css/fancybox/jquery.fancybox.css" rel="stylesheet">
<link href="css/jcarousel.css" rel="stylesheet" />
<link href="css/flexslider.css" rel="stylesheet" />
<link href="css/style.css" rel="stylesheet" />
 
<!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
<!--[if lt IE 9]>
      <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
    <![endif]-->

</head>
<body>
<div id="wrapper">
	<!-- start header -->
		<header>
        <div class="navbar navbar-default navbar-static-top">
            <div class="container">
                <div class="navbar-header">
                    <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
                        <span class="icon-bar"></span>
                        <span class="icon-bar"></span>
                        <span class="icon-bar"></span>
                    </button>
                    <a class="navbar-brand" href="index.html"><img src="img/logo.png" alt="logo"/></a>
                </div>
                <div class="navbar-collapse collapse ">
                    <ul class="nav navbar-nav">
                        <li><a href="index.html">Home</a></li> 
						<li><a href="about.html">About Us</a></li>
						<li><a href="courses.html">Courses</a></li>
                        <li><a href="portfolio.html">Portfolio</a></li>
                        <li class="active"><a href="pricing.html">Pricing</a></li>
                        <li><a href="contact.html">Contact</a></li>
                    </ul>
                </div>
            </div>
        </div>
	</header><!-- end header -->
	<section id="inner-headline">
	<div class="container">
		<div class="row">
			<div class="col-lg-12">
				<h2 class="pageTitle">Pricing</h2>
			</div>
		</div>
	</div>
	<section id="content">
	<div class="container">	 
		<!-- end divider -->
		<div class="row"> 
			<div class="col-lg-3">
				<div class="pricing-box-item">
					<div class="pricing-heading">
						<h3><strong>Basic</strong></h3>
					</div>
					<div class="pricing-terms">
						<h6>&#36;15.00 / Year</h6>
					</div>
					<div class="pricing-container">
						<ul>
							<li><i class="icon-ok"></i> Responsive Design</li>
							<li><i class="icon-ok"></i> Bootstrap Design</li>
							<li><i class="icon-ok"></i> Unlimited Support</li>
							<li><i class="icon-ok"></i> Free Trial version</li>
							<li><i class="icon-ok"></i> HTML5 CSS3 jQuery</li>
						</ul>
					</div>
					<div class="pricing-action">
						<a href="#" class="btn btn-medium btn-theme"><i class="icon-bolt"></i> Get Now</a>
					</div>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="pricing-box-item">
					<div class="pricing-heading">
						<h3><strong>Standard</strong></h3>
					</div>
					<div class="pricing-terms">
						<h6>&#36;20.00 / Year</h6>
					</div>
					<div class="pricing-container">
						<ul>
							<li><i class="icon-ok"></i> Responsive Design</li>
							<li><i class="icon-ok"></i> Bootstrap Design</li>
							<li><i class="icon-ok"></i> Unlimited Support</li>
							<li><i class="icon-ok"></i> Free Trial version</li>
							<li><i class="icon-ok"></i> HTML5 CSS3 jQuery</li>
						</ul>
					</div>
					<div class="pricing-action">
						<a href="#" class="btn btn-medium btn-theme"><i class="icon-bolt"></i> Get Now</a>
					</div>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="pricing-box-item activeItem">
					<div class="pricing-heading">
						<h3><strong>Advanced</strong></h3>
					</div>
					<div class="pricing-terms">
						<h6>&#36;15.00 / Year</h6>
					</div>
					<div class="pricing-container">
						<ul>
							<li><i class="icon-ok"></i> Responsive Design</li>
							<li><i class="icon-ok"></i> Bootstrap Design</li>
							<li><i class="icon-ok"></i> Unlimited Support</li>
							<li><i class="icon-ok"></i> Free Trial version</li>
							<li><i class="icon-ok"></i> HTML5 CSS3 jQuery</li>
						</ul>
					</div>
					<div class="pricing-action">
						<a href="#" class="btn btn-medium btn-theme"><i class="icon-bolt"></i> Get Now</a>
					</div>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="pricing-box-item">
					<div class="pricing-heading">
						<h3><strong>Mighty</strong></h3>
					</div>
					<div class="pricing-terms">
						<h6>&#36;15.00 / Year</h6>
					</div>
					<div class="pricing-container">
						<ul>
							<li><i class="icon-ok"></i> Responsive Design</li>
							<li><i class="icon-ok"></i> Bootstrap Design</li>
							<li><i class="icon-ok"></i> Unlimited Support</li>
							<li><i class="icon-ok"></i> Free Trial version</li>
							<li><i class="icon-ok"></i> HTML5 CSS3 jQuery</li>
						</ul>
					</div>
					<div class="pricing-action">
						<a href="#" class="btn btn-medium btn-theme"><i class="icon-bolt"></i> Get Now</a>
					</div>
				</div>
			</div>
		</div>
	</div>
	</section>
	<footer>
	<div class="container">
		<div class="row">
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Our Contact</h5>
					<address>
					<strong>Abovecompany Inc</strong><br>
					JC Main Road, Near Silnile tower<br>
					 Pin-21542 NewYork US.</address>
					<p>
						<i class="icon-phone"></i> (123) 456-789 - 1255-12584 <br>
						<i class="icon-envelope-alt"></i> email@domainname.com
					</p>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Quick Links</h5>
					<ul class="link-list">
						<li><a href="#">Latest Events</a></li>
						<li><a href="#">Terms and conditions</a></li>
						<li><a href="#">Privacy policy</a></li>
						<li><a href="#">Career</a></li>
						<li><a href="#">Contact us</a></li>
					</ul>
				</div>
			</div>
			<div class="col-lg-3">
				<div class="widget">
					<h5 class="widgetheading">Latest posts</h5>
					<ul class="link-list">
						<li><a href="#">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>
						<li><a href="#">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>
						<li><a href="#">Natus error sit voluptatem accusantium doloremque</a></li>
					</ul>
				</div>
			</div>
			<div class="col-lg-3">
					<div class="widget">
					<h5 class="widgetheading">Recent News</h5>
					<ul class="link-list">
						<li><a href="#">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>
						<li><a href="#">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>
						<li><a href="#">Natus error sit voluptatem accusantium doloremque</a></li>
					</ul>
				</div>
			</div>
		</div>
	</div>
	<div id="sub-footer">
		<div class="container">
			<div class="row">
				<div class="col-lg-6">
					<div class="copyright">
						<p>
							<span>&copy; Above Site All right reserved. Template By </span><a href="http://webthemez.com" target="_blank">WebThemez</a>
						</p>
					</div>
				</div>
				<div class="col-lg-6">
					<ul class="social-network">
						<li><a href="#" data-placement="top" title="Facebook"><i class="fa fa-facebook"></i></a></li>
						<li><a href="#" data-placement="top" title="Twitter"><i class="fa fa-twitter"></i></a></li>
						<li><a href="#" data-placement="top" title="Linkedin"><i class="fa fa-linkedin"></i></a></li>
						<li><a href="#" data-placement="top" title="Pinterest"><i class="fa fa-pinterest"></i></a></li>
						<li><a href="#" data-placement="top" title="Google plus"><i class="fa fa-google-plus"></i></a></li>
					</ul>
				</div>
			</div>
		</div>
	</div>
	</footer>
</div>
<a href="#" class="scrollup"><i class="fa fa-angle-up active"></i></a>
<!-- javascript
    ================================================== -->
<!-- Placed at the end of the document so the pages load faster -->
<script src="js/jquery.js"></script>
<script src="js/jquery.easing.1.3.js"></script>
<script src="js/bootstrap.min.js"></script>
<script src="js/jquery.fancybox.pack.js"></script>
<script src="js/jquery.fancybox-media.js"></script> 
<script src="js/portfolio/jquery.quicksand.js"></script>
<script src="js/portfolio/setting.js"></script>
<script src="js/jquery.flexslider.js"></script>
<script src="js/animate.js"></script>
<script src="js/custom.js"></script>
</body>
</html>

================================================
FILE: backend/tests/integration/tests/pruning/website/readme.txt
================================================
Free Responsive HTML5 Template

Above Educational Bootstrap Responsive template is a modern clean multi-purpose html5 template built with valid HTML5 & CSS3. It's build on top of latest Bootstrap framework 3.3.1 fully responsive web compatible with multi browser and devices. This template can be used for multi-purpose needs like Educational Institutes, colleges, Schools, e-Learning, Training centre, Tutors, Charity, Primary School, business, consultancy, agency, personal portfolio, profile and mobile website.


Key features
-------------
Twitter Bootstrap 3.3.1
Clean & Developer-friendly HTML5 and CSS3 code
100% Responsive Layout Design 
Multi-purpose theme
Google Fonts Support
Font Awesome 
Smooth Scrolling 
Fully Customizable
Contact Form


Credits :
-------
=> Design and developed: "WebThemez"  http://webthemez.com
=> Photos used in template: **Unsplash** - http://unsplash.com
=> For more free web themes: http://webthemez.com
=> Framework : http://getbootstrap.com

License :
-------
**Creative Commons Attribution 3.0** - http://creativecommons.org/licenses/by/3.0/

Note:
All images user here is for demo purpose only, we are not responsible for any copyrights.


================================================
FILE: backend/tests/integration/tests/query_history/test_query_history.py
================================================
import csv
import io
import os
from datetime import datetime
from datetime import timedelta
from datetime import timezone

import pytest

from onyx.configs.constants import QAFeedbackType
from onyx.configs.constants import SessionType
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.query_history import QueryHistoryManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


@pytest.fixture
def setup_chat_session(reset: None) -> tuple[DATestUser, str]:  # noqa: ARG001
    # Create admin user and required resources
    admin_user: DATestUser = UserManager.create(name="admin_user")
    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin_user)
    api_key = APIKeyManager.create(user_performing_action=admin_user)
    LLMProviderManager.create(user_performing_action=admin_user)

    # Seed a document
    cc_pair.documents = []
    cc_pair.documents.append(
        DocumentManager.seed_doc_with_content(
            cc_pair=cc_pair,
            content="The company's revenue in Q1 was $1M",
            api_key=api_key,
        )
    )

    # Create chat session and send a message
    chat_session = ChatSessionManager.create(
        persona_id=0,
        description="Test chat session",
        user_performing_action=admin_user,
    )

    ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="What was the Q1 revenue?",
        user_performing_action=admin_user,
    )

    messages = ChatSessionManager.get_chat_history(
        chat_session=chat_session,
        user_performing_action=admin_user,
    )

    # Add another message to the chat session
    ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message="What about Q2 revenue?",
        user_performing_action=admin_user,
        parent_message_id=messages[-1].id,
    )

    return admin_user, str(chat_session.id)


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Chat history tests are enterprise only",
)
def test_chat_history_endpoints(
    reset: None,  # noqa: ARG001
    setup_chat_session: tuple[DATestUser, str],
) -> None:
    admin_user, first_chat_id = setup_chat_session

    # Get chat history
    history_response = QueryHistoryManager.get_query_history_page(
        user_performing_action=admin_user
    )

    # Verify we got back the one chat session we created
    assert len(history_response.items) == 1

    # Verify the first chat session details
    first_session = history_response.items[0]
    assert first_session.user_email == admin_user.email
    assert first_session.name == "Test chat session"
    assert first_session.first_user_message == "What was the Q1 revenue?"
    assert first_session.first_ai_message is not None
    assert first_session.assistant_id == 0
    assert first_session.feedback_type is None
    assert first_session.flow_type == SessionType.CHAT
    assert first_session.conversation_length == 4  # 2 User messages + 2 AI responses

    # Test date filtering - should return no results
    past_end = datetime.now(tz=timezone.utc) - timedelta(days=1)
    past_start = past_end - timedelta(days=1)
    history_response = QueryHistoryManager.get_query_history_page(
        start_time=past_start,
        end_time=past_end,
        user_performing_action=admin_user,
    )
    assert len(history_response.items) == 0

    # Test get specific chat session endpoint
    session_details = QueryHistoryManager.get_chat_session_admin(
        chat_session_id=first_chat_id,
        user_performing_action=admin_user,
    )

    # Verify the session details
    assert str(session_details.id) == first_chat_id
    assert len(session_details.messages) > 0
    assert session_details.flow_type == SessionType.CHAT

    # Test filtering by feedback
    history_response = QueryHistoryManager.get_query_history_page(
        feedback_type=QAFeedbackType.LIKE,
        user_performing_action=admin_user,
    )
    assert len(history_response.items) == 0


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Chat history tests are enterprise only",
)
def test_chat_history_csv_export(
    reset: None,  # noqa: ARG001
    setup_chat_session: tuple[DATestUser, str],
) -> None:
    admin_user, _ = setup_chat_session

    # Test CSV export endpoint with date filtering
    headers, csv_content = QueryHistoryManager.get_query_history_as_csv(
        user_performing_action=admin_user,
    )
    assert headers["Content-Type"] == "text/csv; charset=utf-8"
    assert "Content-Disposition" in headers

    # Use csv.reader to properly handle newlines inside quoted fields
    csv_rows = list(csv.reader(io.StringIO(csv_content)))
    assert len(csv_rows) == 3  # Header + 2 QA pairs
    assert csv_rows[0][0] == "chat_session_id"
    assert "user_message" in csv_rows[0]
    assert "ai_response" in csv_rows[0]
    assert "What was the Q1 revenue?" in csv_content
    assert "What about Q2 revenue?" in csv_content

    # Test CSV export with date filtering - should return no results
    past_end = datetime.now(tz=timezone.utc) - timedelta(days=1)
    past_start = past_end - timedelta(days=1)
    headers, csv_content = QueryHistoryManager.get_query_history_as_csv(
        start_time=past_start,
        end_time=past_end,
        user_performing_action=admin_user,
    )
    csv_rows = list(csv.reader(io.StringIO(csv_content)))
    assert len(csv_rows) == 1  # Only header, no data rows


================================================
FILE: backend/tests/integration/tests/query_history/test_query_history_pagination.py
================================================
import os
from datetime import datetime

import pytest

from onyx.configs.constants import QAFeedbackType
from tests.integration.common_utils.managers.query_history import QueryHistoryManager
from tests.integration.common_utils.test_models import DAQueryHistoryEntry
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.tests.query_history.utils import (
    setup_chat_sessions_with_different_feedback,
)


def _verify_query_history_pagination(
    chat_sessions: list[DAQueryHistoryEntry],
    user_performing_action: DATestUser,
    page_size: int = 5,
    feedback_type: QAFeedbackType | None = None,
    start_time: datetime | None = None,
    end_time: datetime | None = None,
) -> None:
    retrieved_sessions: list[str] = []

    for i in range(0, len(chat_sessions), page_size):
        paginated_result = QueryHistoryManager.get_query_history_page(
            page_num=i // page_size,
            page_size=page_size,
            feedback_type=feedback_type,
            start_time=start_time,
            end_time=end_time,
            user_performing_action=user_performing_action,
        )

        # Verify that the total items is equal to the length of the chat sessions list
        assert paginated_result.total_items == len(chat_sessions)
        # Verify that the number of items in the page is equal to the page size
        assert len(paginated_result.items) == min(page_size, len(chat_sessions) - i)
        # Add the retrieved chat sessions to the list of retrieved sessions
        retrieved_sessions.extend(
            [str(session.id) for session in paginated_result.items]
        )

    # Create a set of all the expected chat session IDs
    all_expected_sessions = set(str(session.id) for session in chat_sessions)
    # Create a set of all the retrieved chat session IDs
    all_retrieved_sessions = set(retrieved_sessions)

    # Verify that the set of retrieved sessions is equal to the set of expected sessions
    assert all_expected_sessions == all_retrieved_sessions


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Query history tests are enterprise only",
)
def test_query_history_pagination(reset: None) -> None:  # noqa: ARG001
    (
        admin_user,
        chat_sessions_by_feedback_type,
    ) = setup_chat_sessions_with_different_feedback()

    all_chat_sessions = []
    for _, chat_sessions in chat_sessions_by_feedback_type.items():
        all_chat_sessions.extend(chat_sessions)

    # Verify basic pagination with different page sizes
    print("Verifying basic pagination with page size 5")
    _verify_query_history_pagination(
        chat_sessions=all_chat_sessions,
        page_size=5,
        user_performing_action=admin_user,
    )
    print("Verifying basic pagination with page size 10")
    _verify_query_history_pagination(
        chat_sessions=all_chat_sessions,
        page_size=10,
        user_performing_action=admin_user,
    )

    print("Verifying pagination with feedback type LIKE")
    liked_sessions = chat_sessions_by_feedback_type[QAFeedbackType.LIKE]
    _verify_query_history_pagination(
        chat_sessions=liked_sessions,
        feedback_type=QAFeedbackType.LIKE,
        user_performing_action=admin_user,
    )

    print("Verifying pagination with feedback type DISLIKE")
    disliked_sessions = chat_sessions_by_feedback_type[QAFeedbackType.DISLIKE]
    _verify_query_history_pagination(
        chat_sessions=disliked_sessions,
        feedback_type=QAFeedbackType.DISLIKE,
        user_performing_action=admin_user,
    )

    print("Verifying pagination with feedback type MIXED")
    mixed_sessions = chat_sessions_by_feedback_type[QAFeedbackType.MIXED]
    _verify_query_history_pagination(
        chat_sessions=mixed_sessions,
        feedback_type=QAFeedbackType.MIXED,
        user_performing_action=admin_user,
    )

    # Test with a small page size to verify handling of partial pages
    print("Verifying pagination with page size 3")
    _verify_query_history_pagination(
        chat_sessions=all_chat_sessions,
        page_size=3,
        user_performing_action=admin_user,
    )

    # Test with a page size larger than the total number of items
    print("Verifying pagination with page size 50")
    _verify_query_history_pagination(
        chat_sessions=all_chat_sessions,
        page_size=50,
        user_performing_action=admin_user,
    )


================================================
FILE: backend/tests/integration/tests/query_history/test_usage_reports.py
================================================
from datetime import datetime
from datetime import timedelta
from datetime import timezone

from ee.onyx.db.usage_export import get_all_empty_chat_message_entries
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.seeding.chat_history_seeding import seed_chat_history


def test_usage_reports(reset: None) -> None:  # noqa: ARG001
    EXPECTED_SESSIONS = 2048
    MESSAGES_PER_SESSION = 4

    # divide by 2 because only messages of type USER are returned
    EXPECTED_MESSAGES = EXPECTED_SESSIONS * MESSAGES_PER_SESSION / 2

    seed_chat_history(EXPECTED_SESSIONS, MESSAGES_PER_SESSION, 90)

    with get_session_with_current_tenant() as db_session:
        # count of all entries should be exact
        period = (
            datetime.fromtimestamp(0, tz=timezone.utc),
            datetime.now(tz=timezone.utc),
        )

        count = 0
        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
            for entry in entry_batch:
                count += 1

        assert count == EXPECTED_MESSAGES

        # count in a one month time range should be within a certain range statistically
        # this can be improved if we seed the chat history data deterministically
        period = (
            datetime.now(tz=timezone.utc) - timedelta(days=30),
            datetime.now(tz=timezone.utc),
        )

        count = 0
        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
            for entry in entry_batch:
                count += 1

        lower = EXPECTED_MESSAGES // 3 - (EXPECTED_MESSAGES // (3 * 3))
        upper = EXPECTED_MESSAGES // 3 + (EXPECTED_MESSAGES // (3 * 3))
        assert count > lower
        assert count < upper


================================================
FILE: backend/tests/integration/tests/query_history/utils.py
================================================
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor

from onyx.configs.constants import QAFeedbackType
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DAQueryHistoryEntry
from tests.integration.common_utils.test_models import DATestUser


def _create_chat_session_with_feedback(
    admin_user: DATestUser,
    i: int,
    feedback_type: QAFeedbackType | None,
) -> tuple[QAFeedbackType | None, DAQueryHistoryEntry]:
    print(f"Creating chat session {i} with feedback type {feedback_type}")
    # Create chat session with timestamp spread over 30 days
    chat_session = ChatSessionManager.create(
        persona_id=0,
        description=f"Test chat session {i}",
        user_performing_action=admin_user,
    )

    test_session = DAQueryHistoryEntry(
        id=chat_session.id,
        persona_id=0,
        description=f"Test chat session {i}",
        feedback_type=feedback_type,
    )

    # First message in chat
    ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message=f"Question {i}?",
        user_performing_action=admin_user,
    )

    messages = ChatSessionManager.get_chat_history(
        chat_session=chat_session,
        user_performing_action=admin_user,
    )
    if feedback_type == QAFeedbackType.MIXED or feedback_type == QAFeedbackType.DISLIKE:
        ChatSessionManager.create_chat_message_feedback(
            message_id=messages[-1].id,
            is_positive=False,
            user_performing_action=admin_user,
        )

    # Second message with different feedback types
    ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message=f"Follow up {i}?",
        user_performing_action=admin_user,
        parent_message_id=messages[-1].id,
    )

    # Get updated messages to get the ID of the second message
    messages = ChatSessionManager.get_chat_history(
        chat_session=chat_session,
        user_performing_action=admin_user,
    )
    if feedback_type == QAFeedbackType.MIXED or feedback_type == QAFeedbackType.LIKE:
        ChatSessionManager.create_chat_message_feedback(
            message_id=messages[-1].id,
            is_positive=True,
            user_performing_action=admin_user,
        )

    return feedback_type, test_session


def setup_chat_sessions_with_different_feedback() -> (
    tuple[DATestUser, dict[QAFeedbackType | None, list[DAQueryHistoryEntry]]]
):
    # Create admin user and required resources
    admin_user: DATestUser = UserManager.create(name="admin_user")
    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin_user)
    api_key = APIKeyManager.create(user_performing_action=admin_user)
    LLMProviderManager.create(user_performing_action=admin_user)

    # Seed a document
    cc_pair.documents = []
    cc_pair.documents.append(
        DocumentManager.seed_doc_with_content(
            cc_pair=cc_pair,
            content="The company's revenue in Q1 was $1M",
            api_key=api_key,
        )
    )

    chat_sessions_by_feedback_type: dict[
        QAFeedbackType | None, list[DAQueryHistoryEntry]
    ] = {}
    # Use ThreadPoolExecutor to create chat sessions in parallel
    with ThreadPoolExecutor(max_workers=5) as executor:
        # Submit all tasks and store futures
        j = 0
        # Will result in 40 sessions
        number_of_sessions = 10
        futures = []
        for feedback_type in [
            QAFeedbackType.MIXED,
            QAFeedbackType.LIKE,
            QAFeedbackType.DISLIKE,
            None,
        ]:
            futures.extend(
                [
                    executor.submit(
                        _create_chat_session_with_feedback,
                        admin_user,
                        (j * number_of_sessions) + i,
                        feedback_type,
                    )
                    for i in range(number_of_sessions)
                ]
            )
            j += 1

        # Collect results in order
        for future in as_completed(futures):
            feedback_type, chat_session = future.result()
            chat_sessions_by_feedback_type.setdefault(feedback_type, []).append(
                chat_session
            )

    return admin_user, chat_sessions_by_feedback_type


================================================
FILE: backend/tests/integration/tests/reporting/test_usage_export_api.py
================================================
import csv
import os
import time
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from io import BytesIO
from io import StringIO
from uuid import UUID
from zipfile import ZipFile

import pytest
import requests

from ee.onyx.db.usage_export import UsageReportMetadata
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.db.seeding.chat_history_seeding import seed_chat_history
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Usage export is an enterprise feature",
)
class TestUsageExportAPI:
    def test_generate_usage_report(
        self,
        reset: None,  # noqa: ARG002
        admin_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # Seed some chat history data for the report
        seed_chat_history(
            num_sessions=10,
            num_messages=4,
            days=30,
            user_id=UUID(admin_user.id),
            persona_id=DEFAULT_PERSONA_ID,
        )

        # Get initial list of reports
        initial_response = requests.get(
            f"{API_SERVER_URL}/admin/usage-report",
            headers=admin_user.headers,
        )
        assert initial_response.status_code == 200
        initial_reports = initial_response.json()
        initial_count = len(initial_reports)

        # Test generating a report without date filters (all time)
        response = requests.post(
            f"{API_SERVER_URL}/admin/usage-report",
            json={},
            headers=admin_user.headers,
        )
        assert response.status_code == 204

        # Wait for the new report to appear (with timeout)
        max_wait_time = 60  # seconds
        start_time = time.time()
        current_reports = initial_reports

        while time.time() - start_time < max_wait_time:
            check_response = requests.get(
                f"{API_SERVER_URL}/admin/usage-report",
                headers=admin_user.headers,
            )
            assert check_response.status_code == 200
            current_reports = check_response.json()

            if len(current_reports) > initial_count:
                # New report has been generated
                break

            time.sleep(2)

        # Verify a new report was created
        assert len(current_reports) > initial_count

        # Find the new report (should be the first one since they're ordered by time)
        new_report = current_reports[0]
        assert "report_name" in new_report
        assert new_report["report_name"].endswith(".zip")

    def test_generate_usage_report_with_date_range(
        self,
        reset: None,  # noqa: ARG002
        admin_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # Seed some chat history data
        seed_chat_history(
            num_sessions=20,
            num_messages=4,
            days=60,
            user_id=UUID(admin_user.id),
            persona_id=DEFAULT_PERSONA_ID,
        )

        # Get initial list of reports
        initial_response = requests.get(
            f"{API_SERVER_URL}/admin/usage-report",
            headers=admin_user.headers,
        )
        assert initial_response.status_code == 200
        initial_reports = initial_response.json()
        initial_count = len(initial_reports)

        # Generate report for the last 30 days
        period_to = datetime.now(tz=timezone.utc)
        period_from = period_to - timedelta(days=30)

        response = requests.post(
            f"{API_SERVER_URL}/admin/usage-report",
            json={
                "period_from": period_from.isoformat(),
                "period_to": period_to.isoformat(),
            },
            headers=admin_user.headers,
        )
        assert response.status_code == 204

        # Wait for the new report to appear
        max_wait_time = 60
        start_time = time.time()
        current_reports = initial_reports

        while time.time() - start_time < max_wait_time:
            check_response = requests.get(
                f"{API_SERVER_URL}/admin/usage-report",
                headers=admin_user.headers,
            )
            assert check_response.status_code == 200
            current_reports = check_response.json()

            if len(current_reports) > initial_count:
                break

            time.sleep(2)

        assert len(current_reports) > initial_count

        # Find the new report (the one that wasn't in initial_reports)
        new_reports = [r for r in current_reports if r not in initial_reports]
        assert len(new_reports) > 0
        new_report = new_reports[0]

        # Verify the new report has the expected date range
        assert new_report["period_from"] is not None
        assert new_report["period_to"] is not None

    def test_generate_usage_report_invalid_dates(
        self,
        reset: None,  # noqa: ARG002
        admin_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # Test with invalid date format
        response = requests.post(
            f"{API_SERVER_URL}/admin/usage-report",
            json={
                "period_from": "not-a-date",
                "period_to": datetime.now(tz=timezone.utc).isoformat(),
            },
            headers=admin_user.headers,
        )
        assert response.status_code == 400

    def test_fetch_usage_reports(
        self,
        reset: None,  # noqa: ARG002
        admin_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # First generate a report to ensure we have at least one
        seed_chat_history(
            num_sessions=5,
            num_messages=4,
            days=30,
            user_id=UUID(admin_user.id),
            persona_id=DEFAULT_PERSONA_ID,
        )

        # Get initial count
        initial_response = requests.get(
            f"{API_SERVER_URL}/admin/usage-report",
            headers=admin_user.headers,
        )
        assert initial_response.status_code == 200
        initial_count = len(initial_response.json())

        # Generate a report
        generate_response = requests.post(
            f"{API_SERVER_URL}/admin/usage-report",
            json={},
            headers=admin_user.headers,
        )
        assert generate_response.status_code == 204

        # Wait for the new report to appear
        max_wait_time = 15
        start_time = time.time()
        reports = []

        while time.time() - start_time < max_wait_time:
            response = requests.get(
                f"{API_SERVER_URL}/admin/usage-report",
                headers=admin_user.headers,
            )
            assert response.status_code == 200
            reports = response.json()

            if len(reports) > initial_count:
                break

            time.sleep(2)

        # Verify we have at least one report
        assert isinstance(reports, list)
        assert len(reports) > initial_count

        # Validate the structure of the first report
        first_report = reports[0]
        assert "report_name" in first_report
        assert "requestor" in first_report
        assert "time_created" in first_report
        assert "period_from" in first_report
        assert "period_to" in first_report

        # Verify it's a valid UsageReportMetadata object
        report_metadata = UsageReportMetadata(**first_report)
        assert report_metadata.report_name.endswith(".zip")

    def test_read_usage_report(
        self,
        reset: None,  # noqa: ARG002
        admin_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # First generate a report
        seed_chat_history(
            num_sessions=5,
            num_messages=4,
            days=30,
            user_id=UUID(admin_user.id),
            persona_id=DEFAULT_PERSONA_ID,
        )

        # Get initial reports count
        initial_response = requests.get(
            f"{API_SERVER_URL}/admin/usage-report",
            headers=admin_user.headers,
        )
        assert initial_response.status_code == 200
        initial_count = len(initial_response.json())

        generate_response = requests.post(
            f"{API_SERVER_URL}/admin/usage-report",
            json={},
            headers=admin_user.headers,
        )
        assert generate_response.status_code == 204

        # Wait for the new report to appear
        max_wait_time = 15
        start_time = time.time()
        reports = []

        while time.time() - start_time < max_wait_time:
            list_response = requests.get(
                f"{API_SERVER_URL}/admin/usage-report",
                headers=admin_user.headers,
            )
            assert list_response.status_code == 200
            reports = list_response.json()

            if len(reports) > initial_count:
                break

            time.sleep(2)

        assert len(reports) > initial_count

        report_name = reports[0]["report_name"]

        # Download the report
        download_response = requests.get(
            f"{API_SERVER_URL}/admin/usage-report/{report_name}",
            headers=admin_user.headers,
            stream=True,
        )
        assert download_response.status_code == 200
        assert download_response.headers["Content-Type"] == "application/zip"
        assert "Content-Disposition" in download_response.headers
        assert (
            f"filename={report_name}"
            in download_response.headers["Content-Disposition"]
        )

        # Verify it's a valid zip file
        zip_content = BytesIO(download_response.content)
        with ZipFile(zip_content, "r") as zip_file:
            # Check that the zip contains expected files
            file_names = zip_file.namelist()
            assert "chat_messages.csv" in file_names
            assert "users.csv" in file_names

            # Verify chat_messages.csv has the expected columns
            with zip_file.open("chat_messages.csv") as csv_file:
                csv_content = csv_file.read().decode("utf-8")
                csv_reader = csv.DictReader(StringIO(csv_content))

                # Check that all expected columns are present
                expected_columns = {
                    "session_id",
                    "user_id",
                    "flow_type",
                    "time_sent",
                    "assistant_name",
                    "user_email",
                    "number_of_tokens",
                }
                actual_columns = set(csv_reader.fieldnames or [])
                assert (
                    expected_columns == actual_columns
                ), f"Expected columns {expected_columns}, but got {actual_columns}"

                # Verify there's at least one row of data
                rows = list(csv_reader)
                assert len(rows) > 0, "Expected at least one message in the report"

                # Verify the first row has non-empty values for all columns
                first_row = rows[0]
                for column in expected_columns:
                    assert column in first_row, f"Column {column} not found in row"
                    assert first_row[
                        column
                    ], f"Column {column} has empty value in first row"

                # Verify specific new fields have appropriate values
                assert first_row["assistant_name"], "assistant_name should not be empty"
                assert first_row["user_email"], "user_email should not be empty"
                assert first_row[
                    "number_of_tokens"
                ].isdigit(), "number_of_tokens should be a numeric value"
                assert (
                    int(first_row["number_of_tokens"]) >= 0
                ), "number_of_tokens should be non-negative"

    def test_read_nonexistent_report(
        self,
        reset: None,  # noqa: ARG002
        admin_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # Try to download a report that doesn't exist
        response = requests.get(
            f"{API_SERVER_URL}/admin/usage-report/nonexistent_report.zip",
            headers=admin_user.headers,
        )
        assert response.status_code == 404

    def test_non_admin_cannot_generate_report(
        self,
        reset: None,  # noqa: ARG002
        basic_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # Try to generate a report as non-admin
        response = requests.post(
            f"{API_SERVER_URL}/admin/usage-report",
            json={},
            headers=basic_user.headers,
        )
        assert response.status_code == 403

    def test_non_admin_cannot_fetch_reports(
        self,
        reset: None,  # noqa: ARG002
        basic_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # Try to fetch reports as non-admin
        response = requests.get(
            f"{API_SERVER_URL}/admin/usage-report",
            headers=basic_user.headers,
        )
        assert response.status_code == 403

    def test_non_admin_cannot_download_report(
        self,
        reset: None,  # noqa: ARG002
        basic_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # Try to download a report as non-admin
        response = requests.get(
            f"{API_SERVER_URL}/admin/usage-report/some_report.zip",
            headers=basic_user.headers,
        )
        assert response.status_code == 403

    def test_concurrent_report_generation(
        self,
        reset: None,  # noqa: ARG002
        admin_user: DATestUser,  # noqa: ARG002
    ) -> None:
        # Seed some data
        seed_chat_history(
            num_sessions=10,
            num_messages=4,
            days=30,
            user_id=UUID(admin_user.id),
            persona_id=DEFAULT_PERSONA_ID,
        )

        # Get initial count of reports
        initial_response = requests.get(
            f"{API_SERVER_URL}/admin/usage-report",
            headers=admin_user.headers,
        )
        assert initial_response.status_code == 200
        initial_count = len(initial_response.json())

        # Generate multiple reports concurrently
        num_reports = 3
        for i in range(num_reports):
            response = requests.post(
                f"{API_SERVER_URL}/admin/usage-report",
                json={},
                headers=admin_user.headers,
            )
            assert response.status_code == 204

        # Wait for all reports to be generated
        max_wait_time = 120
        start_time = time.time()
        reports = []

        while time.time() - start_time < max_wait_time:
            response = requests.get(
                f"{API_SERVER_URL}/admin/usage-report",
                headers=admin_user.headers,
            )
            assert response.status_code == 200
            reports = response.json()

            if len(reports) >= initial_count + num_reports:
                break

            time.sleep(2)

        # Verify we have at least 3 new reports
        assert len(reports) >= initial_count + num_reports


================================================
FILE: backend/tests/integration/tests/scim/test_scim_groups.py
================================================
"""Integration tests for SCIM group provisioning endpoints.

Covers the full group lifecycle as driven by an IdP (Okta / Azure AD):
1. Create a group via POST /Groups
2. Retrieve a group via GET /Groups/{id}
3. List, filter, and paginate groups via GET /Groups
4. Replace a group via PUT /Groups/{id}
5. Patch a group (add/remove members, rename) via PATCH /Groups/{id}
6. Delete a group via DELETE /Groups/{id}
7. Error cases: duplicate name, not-found, invalid member IDs

All tests are parameterized across IdP request styles (Okta sends lowercase
PATCH ops; Entra sends capitalized ops like ``"Replace"``). The server
normalizes both — these tests verify that.

Auth tests live in test_scim_tokens.py.
User lifecycle tests live in test_scim_users.py.
"""

import pytest
import requests

from onyx.auth.schemas import UserRole
from tests.integration.common_utils.constants import ADMIN_USER_NAME
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.managers.scim_client import ScimClient
from tests.integration.common_utils.managers.scim_token import ScimTokenManager
from tests.integration.common_utils.managers.user import build_email
from tests.integration.common_utils.managers.user import DEFAULT_PASSWORD
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


SCIM_GROUP_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:Group"
SCIM_USER_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:User"
SCIM_PATCH_SCHEMA = "urn:ietf:params:scim:api:messages:2.0:PatchOp"


@pytest.fixture(scope="module", params=["okta", "entra"])
def idp_style(request: pytest.FixtureRequest) -> str:
    """Parameterized IdP style — runs every test with both Okta and Entra request formats."""
    return request.param


@pytest.fixture(scope="module")
def scim_token(idp_style: str) -> str:
    """Create a single SCIM token shared across all tests in this module.

    Creating a new token revokes the previous one, so we create exactly once
    per IdP-style run and reuse. Uses UserManager directly to avoid
    fixture-scope conflicts with the function-scoped admin_user fixture.
    """
    try:
        admin = UserManager.create(name=ADMIN_USER_NAME)
    except Exception:
        admin = UserManager.login_as_user(
            DATestUser(
                id="",
                email=build_email(ADMIN_USER_NAME),
                password=DEFAULT_PASSWORD,
                headers=GENERAL_HEADERS,
                role=UserRole.ADMIN,
                is_active=True,
            )
        )

    token = ScimTokenManager.create(
        name=f"scim-group-tests-{idp_style}",
        user_performing_action=admin,
    ).raw_token
    assert token is not None
    return token


def _make_group_resource(
    display_name: str,
    external_id: str | None = None,
    members: list[dict] | None = None,
) -> dict:
    """Build a minimal SCIM GroupResource payload."""
    resource: dict = {
        "schemas": [SCIM_GROUP_SCHEMA],
        "displayName": display_name,
    }
    if external_id is not None:
        resource["externalId"] = external_id
    if members is not None:
        resource["members"] = members
    return resource


def _make_user_resource(email: str, external_id: str) -> dict:
    """Build a minimal SCIM UserResource payload for member creation."""
    return {
        "schemas": [SCIM_USER_SCHEMA],
        "userName": email,
        "externalId": external_id,
        "name": {"givenName": "Test", "familyName": "User"},
        "active": True,
    }


def _make_patch_request(operations: list[dict], idp_style: str = "okta") -> dict:
    """Build a SCIM PatchOp payload, applying IdP-specific operation casing.

    Entra sends capitalized operations (e.g. ``"Replace"`` instead of
    ``"replace"``). The server's ``normalize_operation`` validator lowercases
    them — these tests verify that both casings are accepted.
    """
    cased_operations = []
    for operation in operations:
        cased = dict(operation)
        if idp_style == "entra":
            cased["op"] = operation["op"].capitalize()
        cased_operations.append(cased)
    return {
        "schemas": [SCIM_PATCH_SCHEMA],
        "Operations": cased_operations,
    }


def _create_scim_user(token: str, email: str, external_id: str) -> requests.Response:
    return ScimClient.post(
        "/Users", token, json=_make_user_resource(email, external_id)
    )


def _create_scim_group(
    token: str,
    display_name: str,
    external_id: str | None = None,
    members: list[dict] | None = None,
) -> requests.Response:
    return ScimClient.post(
        "/Groups",
        token,
        json=_make_group_resource(display_name, external_id, members),
    )


# ------------------------------------------------------------------
# Lifecycle: create → get → list → replace → patch → delete
# ------------------------------------------------------------------


def test_create_group(scim_token: str, idp_style: str) -> None:
    """POST /Groups creates a group and returns 201."""
    name = f"Engineering {idp_style}"
    resp = _create_scim_group(scim_token, name, external_id=f"ext-eng-{idp_style}")
    assert resp.status_code == 201

    body = resp.json()
    assert body["displayName"] == name
    assert body["externalId"] == f"ext-eng-{idp_style}"
    assert body["id"]  # integer ID assigned by server
    assert body["meta"]["resourceType"] == "Group"


def test_create_group_with_members(scim_token: str, idp_style: str) -> None:
    """POST /Groups with members populates the member list."""
    user = _create_scim_user(
        scim_token, f"grp_member1_{idp_style}@example.com", f"ext-gm-{idp_style}"
    ).json()

    resp = _create_scim_group(
        scim_token,
        f"Backend Team {idp_style}",
        external_id=f"ext-backend-{idp_style}",
        members=[{"value": user["id"]}],
    )
    assert resp.status_code == 201

    body = resp.json()
    member_ids = [m["value"] for m in body["members"]]
    assert user["id"] in member_ids


def test_get_group(scim_token: str, idp_style: str) -> None:
    """GET /Groups/{id} returns the group resource including members."""
    user = _create_scim_user(
        scim_token, f"grp_get_m_{idp_style}@example.com", f"ext-ggm-{idp_style}"
    ).json()
    created = _create_scim_group(
        scim_token,
        f"Frontend Team {idp_style}",
        external_id=f"ext-fe-{idp_style}",
        members=[{"value": user["id"]}],
    ).json()

    resp = ScimClient.get(f"/Groups/{created['id']}", scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["id"] == created["id"]
    assert body["displayName"] == f"Frontend Team {idp_style}"
    assert body["externalId"] == f"ext-fe-{idp_style}"
    member_ids = [m["value"] for m in body["members"]]
    assert user["id"] in member_ids


def test_list_groups(scim_token: str, idp_style: str) -> None:
    """GET /Groups returns a ListResponse containing provisioned groups."""
    name = f"DevOps Team {idp_style}"
    _create_scim_group(scim_token, name, external_id=f"ext-devops-{idp_style}")

    resp = ScimClient.get("/Groups", scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["totalResults"] >= 1
    names = [r["displayName"] for r in body["Resources"]]
    assert name in names


def test_list_groups_pagination(scim_token: str, idp_style: str) -> None:
    """GET /Groups with startIndex and count returns correct pagination."""
    _create_scim_group(
        scim_token, f"Page Group A {idp_style}", external_id=f"ext-page-a-{idp_style}"
    )
    _create_scim_group(
        scim_token, f"Page Group B {idp_style}", external_id=f"ext-page-b-{idp_style}"
    )

    resp = ScimClient.get("/Groups?startIndex=1&count=1", scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["startIndex"] == 1
    assert body["itemsPerPage"] == 1
    assert body["totalResults"] >= 2
    assert len(body["Resources"]) == 1


def test_filter_groups_by_display_name(scim_token: str, idp_style: str) -> None:
    """GET /Groups?filter=displayName eq '...' returns only matching groups."""
    name = f"Unique QA Team {idp_style}"
    _create_scim_group(scim_token, name, external_id=f"ext-qa-filter-{idp_style}")

    resp = ScimClient.get(f'/Groups?filter=displayName eq "{name}"', scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["totalResults"] == 1
    assert body["Resources"][0]["displayName"] == name


def test_filter_groups_by_external_id(scim_token: str, idp_style: str) -> None:
    """GET /Groups?filter=externalId eq '...' returns the matching group."""
    ext_id = f"ext-unique-group-id-{idp_style}"
    _create_scim_group(
        scim_token, f"ExtId Filter Group {idp_style}", external_id=ext_id
    )

    resp = ScimClient.get(f'/Groups?filter=externalId eq "{ext_id}"', scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["totalResults"] == 1
    assert body["Resources"][0]["externalId"] == ext_id


def test_replace_group(scim_token: str, idp_style: str) -> None:
    """PUT /Groups/{id} replaces the group resource."""
    created = _create_scim_group(
        scim_token,
        f"Original Name {idp_style}",
        external_id=f"ext-replace-g-{idp_style}",
    ).json()

    user = _create_scim_user(
        scim_token, f"grp_replace_m_{idp_style}@example.com", f"ext-grm-{idp_style}"
    ).json()

    updated_resource = _make_group_resource(
        display_name=f"Renamed Group {idp_style}",
        external_id=f"ext-replace-g-{idp_style}",
        members=[{"value": user["id"]}],
    )
    resp = ScimClient.put(f"/Groups/{created['id']}", scim_token, json=updated_resource)
    assert resp.status_code == 200

    body = resp.json()
    assert body["displayName"] == f"Renamed Group {idp_style}"
    member_ids = [m["value"] for m in body["members"]]
    assert user["id"] in member_ids


def test_replace_group_clears_members(scim_token: str, idp_style: str) -> None:
    """PUT /Groups/{id} with empty members removes all memberships."""
    user = _create_scim_user(
        scim_token, f"grp_clear_m_{idp_style}@example.com", f"ext-gcm-{idp_style}"
    ).json()
    created = _create_scim_group(
        scim_token,
        f"Clear Members Group {idp_style}",
        external_id=f"ext-clear-g-{idp_style}",
        members=[{"value": user["id"]}],
    ).json()

    assert len(created["members"]) == 1

    resp = ScimClient.put(
        f"/Groups/{created['id']}",
        scim_token,
        json=_make_group_resource(
            f"Clear Members Group {idp_style}", f"ext-clear-g-{idp_style}", members=[]
        ),
    )
    assert resp.status_code == 200
    assert resp.json()["members"] == []


def test_patch_add_member(scim_token: str, idp_style: str) -> None:
    """PATCH /Groups/{id} with op=add adds a member."""
    created = _create_scim_group(
        scim_token,
        f"Patch Add Group {idp_style}",
        external_id=f"ext-patch-add-{idp_style}",
    ).json()
    user = _create_scim_user(
        scim_token, f"grp_patch_add_{idp_style}@example.com", f"ext-gpa-{idp_style}"
    ).json()

    resp = ScimClient.patch(
        f"/Groups/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [{"op": "add", "path": "members", "value": [{"value": user["id"]}]}],
            idp_style,
        ),
    )
    assert resp.status_code == 200

    member_ids = [m["value"] for m in resp.json()["members"]]
    assert user["id"] in member_ids


def test_patch_remove_member(scim_token: str, idp_style: str) -> None:
    """PATCH /Groups/{id} with op=remove removes a specific member."""
    user = _create_scim_user(
        scim_token, f"grp_patch_rm_{idp_style}@example.com", f"ext-gpr-{idp_style}"
    ).json()
    created = _create_scim_group(
        scim_token,
        f"Patch Remove Group {idp_style}",
        external_id=f"ext-patch-rm-{idp_style}",
        members=[{"value": user["id"]}],
    ).json()
    assert len(created["members"]) == 1

    resp = ScimClient.patch(
        f"/Groups/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [
                {
                    "op": "remove",
                    "path": f'members[value eq "{user["id"]}"]',
                }
            ],
            idp_style,
        ),
    )
    assert resp.status_code == 200
    assert resp.json()["members"] == []


def test_patch_replace_members(scim_token: str, idp_style: str) -> None:
    """PATCH /Groups/{id} with op=replace on members swaps the entire list."""
    user_a = _create_scim_user(
        scim_token, f"grp_repl_a_{idp_style}@example.com", f"ext-gra-{idp_style}"
    ).json()
    user_b = _create_scim_user(
        scim_token, f"grp_repl_b_{idp_style}@example.com", f"ext-grb-{idp_style}"
    ).json()
    created = _create_scim_group(
        scim_token,
        f"Patch Replace Group {idp_style}",
        external_id=f"ext-patch-repl-{idp_style}",
        members=[{"value": user_a["id"]}],
    ).json()

    # Replace member list: swap A for B
    resp = ScimClient.patch(
        f"/Groups/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [
                {
                    "op": "replace",
                    "path": "members",
                    "value": [{"value": user_b["id"]}],
                }
            ],
            idp_style,
        ),
    )
    assert resp.status_code == 200

    member_ids = [m["value"] for m in resp.json()["members"]]
    assert user_b["id"] in member_ids
    assert user_a["id"] not in member_ids


def test_patch_rename_group(scim_token: str, idp_style: str) -> None:
    """PATCH /Groups/{id} with op=replace on displayName renames the group."""
    created = _create_scim_group(
        scim_token,
        f"Old Group Name {idp_style}",
        external_id=f"ext-rename-g-{idp_style}",
    ).json()

    new_name = f"New Group Name {idp_style}"
    resp = ScimClient.patch(
        f"/Groups/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [{"op": "replace", "path": "displayName", "value": new_name}],
            idp_style,
        ),
    )
    assert resp.status_code == 200
    assert resp.json()["displayName"] == new_name

    # Confirm via GET
    get_resp = ScimClient.get(f"/Groups/{created['id']}", scim_token)
    assert get_resp.json()["displayName"] == new_name


def test_delete_group(scim_token: str, idp_style: str) -> None:
    """DELETE /Groups/{id} removes the group."""
    created = _create_scim_group(
        scim_token,
        f"Delete Me Group {idp_style}",
        external_id=f"ext-del-g-{idp_style}",
    ).json()

    resp = ScimClient.delete(f"/Groups/{created['id']}", scim_token)
    assert resp.status_code == 204

    # Second DELETE returns 404 (group hard-deleted)
    resp2 = ScimClient.delete(f"/Groups/{created['id']}", scim_token)
    assert resp2.status_code == 404


def test_delete_group_preserves_members(scim_token: str, idp_style: str) -> None:
    """DELETE /Groups/{id} removes memberships but does not deactivate users."""
    user = _create_scim_user(
        scim_token, f"grp_del_member_{idp_style}@example.com", f"ext-gdm-{idp_style}"
    ).json()
    created = _create_scim_group(
        scim_token,
        f"Delete With Members {idp_style}",
        external_id=f"ext-del-wm-{idp_style}",
        members=[{"value": user["id"]}],
    ).json()

    resp = ScimClient.delete(f"/Groups/{created['id']}", scim_token)
    assert resp.status_code == 204

    # User should still be active and retrievable
    user_resp = ScimClient.get(f"/Users/{user['id']}", scim_token)
    assert user_resp.status_code == 200
    assert user_resp.json()["active"] is True


# ------------------------------------------------------------------
# Error cases
# ------------------------------------------------------------------


def test_create_group_duplicate_name(scim_token: str, idp_style: str) -> None:
    """POST /Groups with an already-taken displayName returns 409."""
    name = f"Dup Name Group {idp_style}"
    resp1 = _create_scim_group(scim_token, name, external_id=f"ext-dup-g1-{idp_style}")
    assert resp1.status_code == 201

    resp2 = _create_scim_group(scim_token, name, external_id=f"ext-dup-g2-{idp_style}")
    assert resp2.status_code == 409


def test_get_nonexistent_group(scim_token: str) -> None:
    """GET /Groups/{bad-id} returns 404."""
    resp = ScimClient.get("/Groups/999999999", scim_token)
    assert resp.status_code == 404


def test_create_group_with_invalid_member(scim_token: str, idp_style: str) -> None:
    """POST /Groups with a non-existent member UUID returns 400."""
    resp = _create_scim_group(
        scim_token,
        f"Bad Member Group {idp_style}",
        external_id=f"ext-bad-m-{idp_style}",
        members=[{"value": "00000000-0000-0000-0000-000000000000"}],
    )
    assert resp.status_code == 400
    assert "not found" in resp.json()["detail"].lower()


def test_patch_add_nonexistent_member(scim_token: str, idp_style: str) -> None:
    """PATCH /Groups/{id} adding a non-existent member returns 400."""
    created = _create_scim_group(
        scim_token,
        f"Patch Bad Member Group {idp_style}",
        external_id=f"ext-pbm-{idp_style}",
    ).json()

    resp = ScimClient.patch(
        f"/Groups/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [
                {
                    "op": "add",
                    "path": "members",
                    "value": [{"value": "00000000-0000-0000-0000-000000000000"}],
                }
            ],
            idp_style,
        ),
    )
    assert resp.status_code == 400
    assert "not found" in resp.json()["detail"].lower()


def test_patch_add_duplicate_member_is_idempotent(
    scim_token: str, idp_style: str
) -> None:
    """PATCH /Groups/{id} adding an already-present member succeeds silently."""
    user = _create_scim_user(
        scim_token, f"grp_dup_add_{idp_style}@example.com", f"ext-gda-{idp_style}"
    ).json()
    created = _create_scim_group(
        scim_token,
        f"Idempotent Add Group {idp_style}",
        external_id=f"ext-idem-g-{idp_style}",
        members=[{"value": user["id"]}],
    ).json()
    assert len(created["members"]) == 1

    # Add same member again
    resp = ScimClient.patch(
        f"/Groups/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [{"op": "add", "path": "members", "value": [{"value": user["id"]}]}],
            idp_style,
        ),
    )
    assert resp.status_code == 200
    assert len(resp.json()["members"]) == 1  # still just one member


def test_create_group_reserved_name_admin(scim_token: str) -> None:
    """POST /Groups with reserved name 'Admin' returns 409."""
    resp = _create_scim_group(scim_token, "Admin", external_id="ext-reserved-admin")
    assert resp.status_code == 409
    assert "reserved" in resp.json()["detail"].lower()


def test_create_group_reserved_name_basic(scim_token: str) -> None:
    """POST /Groups with reserved name 'Basic' returns 409."""
    resp = _create_scim_group(scim_token, "Basic", external_id="ext-reserved-basic")
    assert resp.status_code == 409
    assert "reserved" in resp.json()["detail"].lower()


def test_replace_group_cannot_rename_to_reserved(
    scim_token: str, idp_style: str
) -> None:
    """PUT /Groups/{id} renaming a group to 'Admin' returns 409."""
    created = _create_scim_group(
        scim_token,
        f"Rename To Reserved {idp_style}",
        external_id=f"ext-rtr-{idp_style}",
    ).json()

    resp = ScimClient.put(
        f"/Groups/{created['id']}",
        scim_token,
        json=_make_group_resource(
            display_name="Admin", external_id=f"ext-rtr-{idp_style}"
        ),
    )
    assert resp.status_code == 409
    assert "reserved" in resp.json()["detail"].lower()


def test_patch_rename_to_reserved_name(scim_token: str, idp_style: str) -> None:
    """PATCH /Groups/{id} renaming a group to 'Basic' returns 409."""
    created = _create_scim_group(
        scim_token,
        f"Patch Rename Reserved {idp_style}",
        external_id=f"ext-prr-{idp_style}",
    ).json()

    resp = ScimClient.patch(
        f"/Groups/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [{"op": "replace", "path": "displayName", "value": "Basic"}],
            idp_style,
        ),
    )
    assert resp.status_code == 409
    assert "reserved" in resp.json()["detail"].lower()


def test_delete_reserved_group_rejected(scim_token: str) -> None:
    """DELETE /Groups/{id} on a reserved group ('Admin') returns 409."""
    # Look up the reserved 'Admin' group via SCIM filter
    resp = ScimClient.get('/Groups?filter=displayName eq "Admin"', scim_token)
    assert resp.status_code == 200
    resources = resp.json()["Resources"]
    assert len(resources) >= 1, "Expected reserved 'Admin' group to exist"
    admin_group_id = resources[0]["id"]

    resp = ScimClient.delete(f"/Groups/{admin_group_id}", scim_token)
    assert resp.status_code == 409
    assert "reserved" in resp.json()["detail"].lower()


def test_scim_created_group_has_basic_permission(
    scim_token: str, idp_style: str
) -> None:
    """POST /Groups assigns the 'basic' permission to the group itself."""
    # Create a SCIM group (no members needed — we check the group's permissions)
    resp = _create_scim_group(
        scim_token,
        f"Basic Perm Group {idp_style}",
        external_id=f"ext-basic-perm-{idp_style}",
    )
    assert resp.status_code == 201
    group_id = resp.json()["id"]

    # Log in as the admin user (created by the scim_token fixture).
    admin = DATestUser(
        id="",
        email=build_email(ADMIN_USER_NAME),
        password=DEFAULT_PASSWORD,
        headers=GENERAL_HEADERS,
        role=UserRole.ADMIN,
        is_active=True,
    )
    admin = UserManager.login_as_user(admin)

    # Verify the group itself was granted the basic permission
    perms_resp = requests.get(
        f"{API_SERVER_URL}/manage/admin/user-group/{group_id}/permissions",
        headers=admin.headers,
    )
    perms_resp.raise_for_status()
    perms = perms_resp.json()
    assert "basic" in perms, f"SCIM group should have 'basic' permission, got: {perms}"


def test_replace_group_cannot_rename_from_reserved(scim_token: str) -> None:
    """PUT /Groups/{id} renaming a reserved group ('Admin') to a non-reserved name returns 409."""
    resp = ScimClient.get('/Groups?filter=displayName eq "Admin"', scim_token)
    assert resp.status_code == 200
    resources = resp.json()["Resources"]
    assert len(resources) >= 1, "Expected reserved 'Admin' group to exist"
    admin_group_id = resources[0]["id"]

    resp = ScimClient.put(
        f"/Groups/{admin_group_id}",
        scim_token,
        json=_make_group_resource(
            display_name="RenamedAdmin", external_id="ext-rename-from-reserved"
        ),
    )
    assert resp.status_code == 409
    assert "reserved" in resp.json()["detail"].lower()


def test_patch_rename_from_reserved_name(scim_token: str, idp_style: str) -> None:
    """PATCH /Groups/{id} renaming a reserved group ('Admin') returns 409."""
    resp = ScimClient.get('/Groups?filter=displayName eq "Admin"', scim_token)
    assert resp.status_code == 200
    resources = resp.json()["Resources"]
    assert len(resources) >= 1, "Expected reserved 'Admin' group to exist"
    admin_group_id = resources[0]["id"]

    resp = ScimClient.patch(
        f"/Groups/{admin_group_id}",
        scim_token,
        json=_make_patch_request(
            [{"op": "replace", "path": "displayName", "value": "RenamedAdmin"}],
            idp_style,
        ),
    )
    assert resp.status_code == 409
    assert "reserved" in resp.json()["detail"].lower()


================================================
FILE: backend/tests/integration/tests/scim/test_scim_tokens.py
================================================
"""Integration tests for SCIM token management.

Covers the admin token API and SCIM bearer-token authentication:
1. Token lifecycle: create, retrieve metadata, use for SCIM requests
2. Token rotation: creating a new token revokes previous tokens
3. Revoked tokens are rejected by SCIM endpoints
4. Non-admin users cannot manage SCIM tokens
5. SCIM requests without a token are rejected
6. Service discovery endpoints work without authentication
7. last_used_at is updated after a SCIM request
"""

import time

import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.scim_client import ScimClient
from tests.integration.common_utils.managers.scim_token import ScimTokenManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


def test_scim_token_lifecycle(admin_user: DATestUser) -> None:
    """Create token → retrieve metadata → use for SCIM request."""
    token = ScimTokenManager.create(
        name="Test SCIM Token",
        user_performing_action=admin_user,
    )

    assert token.raw_token is not None
    assert token.raw_token.startswith("onyx_scim_")
    assert token.is_active is True
    assert "****" in token.token_display

    # GET returns the same metadata but raw_token is None because the
    # server only reveals the raw token once at creation time (it stores
    # only the SHA-256 hash).
    active = ScimTokenManager.get_active(user_performing_action=admin_user)
    assert active == token.model_copy(update={"raw_token": None})

    # Token works for SCIM requests
    response = ScimClient.get("/Users", token.raw_token)
    assert response.status_code == 200
    body = response.json()
    assert "Resources" in body
    assert body["totalResults"] >= 0


def test_scim_token_rotation_revokes_previous(admin_user: DATestUser) -> None:
    """Creating a new token automatically revokes the previous one."""
    first = ScimTokenManager.create(
        name="First Token",
        user_performing_action=admin_user,
    )
    assert first.raw_token is not None

    response = ScimClient.get("/Users", first.raw_token)
    assert response.status_code == 200

    # Create second token — should revoke first
    second = ScimTokenManager.create(
        name="Second Token",
        user_performing_action=admin_user,
    )
    assert second.raw_token is not None

    # Active token should now be the second one
    active = ScimTokenManager.get_active(user_performing_action=admin_user)
    assert active == second.model_copy(update={"raw_token": None})

    # First token rejected, second works
    assert ScimClient.get("/Users", first.raw_token).status_code == 401
    assert ScimClient.get("/Users", second.raw_token).status_code == 200


def test_scim_request_without_token_rejected(
    admin_user: DATestUser,  # noqa: ARG001
) -> None:
    """SCIM endpoints reject requests with no Authorization header."""
    assert ScimClient.get_no_auth("/Users").status_code == 401


def test_scim_request_with_bad_token_rejected(
    admin_user: DATestUser,  # noqa: ARG001
) -> None:
    """SCIM endpoints reject requests with an invalid token."""
    assert ScimClient.get("/Users", "onyx_scim_bogus_token_value").status_code == 401


def test_non_admin_cannot_create_token(
    admin_user: DATestUser,  # noqa: ARG001
) -> None:
    """Non-admin users get 403 when trying to create a SCIM token."""
    basic_user = UserManager.create(name="scim_basic_user")

    response = requests.post(
        f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
        json={"name": "Should Fail"},
        headers=basic_user.headers,
        timeout=60,
    )
    assert response.status_code == 403


def test_non_admin_cannot_get_token(
    admin_user: DATestUser,  # noqa: ARG001
) -> None:
    """Non-admin users get 403 when trying to retrieve SCIM token metadata."""
    basic_user = UserManager.create(name="scim_basic_user2")

    response = requests.get(
        f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
        headers=basic_user.headers,
        timeout=60,
    )
    assert response.status_code == 403


def test_no_active_token_returns_404(new_admin_user: DATestUser) -> None:
    """GET active token returns 404 when no token exists."""
    # new_admin_user depends on the reset fixture, ensuring a clean DB
    # with no active SCIM tokens.
    active = ScimTokenManager.get_active(user_performing_action=new_admin_user)
    assert active is None

    response = requests.get(
        f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
        headers=new_admin_user.headers,
        timeout=60,
    )
    assert response.status_code == 404


def test_service_discovery_no_auth_required(
    admin_user: DATestUser,  # noqa: ARG001
) -> None:
    """Service discovery endpoints work without any authentication."""
    for path in ["/ServiceProviderConfig", "/ResourceTypes", "/Schemas"]:
        response = ScimClient.get_no_auth(path)
        assert response.status_code == 200, f"{path} returned {response.status_code}"


def test_last_used_at_updated_after_scim_request(
    admin_user: DATestUser,
) -> None:
    """last_used_at timestamp is updated after using the token."""
    token = ScimTokenManager.create(
        name="Last Used Token",
        user_performing_action=admin_user,
    )
    assert token.raw_token is not None

    active = ScimTokenManager.get_active(user_performing_action=admin_user)
    assert active is not None
    assert active.last_used_at is None

    # Make a SCIM request, then verify last_used_at is set
    assert ScimClient.get("/Users", token.raw_token).status_code == 200
    time.sleep(0.5)

    active_after = ScimTokenManager.get_active(user_performing_action=admin_user)
    assert active_after is not None
    assert active_after.last_used_at is not None


================================================
FILE: backend/tests/integration/tests/scim/test_scim_users.py
================================================
"""Integration tests for SCIM user provisioning endpoints.

Covers the full user lifecycle as driven by an IdP (Okta / Azure AD):
1. Create a user via POST /Users
2. Retrieve a user via GET /Users/{id}
3. List, filter, and paginate users via GET /Users
4. Replace a user via PUT /Users/{id}
5. Patch a user (deactivate/reactivate) via PATCH /Users/{id}
6. Delete a user via DELETE /Users/{id}
7. Error cases: missing externalId, duplicate email, not-found, seat limit

All tests are parameterized across IdP request styles:
- **Okta**: lowercase PATCH ops, minimal payloads (core schema only).
- **Entra**: capitalized ops (``"Replace"``), enterprise extension data
  (department, manager), and structured email arrays.

The server normalizes both — these tests verify that all IdP-specific fields
are accepted and round-tripped correctly.

Auth, revoked-token, and service-discovery tests live in test_scim_tokens.py.
"""

from datetime import datetime
from datetime import timedelta
from datetime import timezone

import pytest
import redis
import requests

from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicenseSource
from ee.onyx.server.license.models import PlanType
from onyx.auth.schemas import UserRole
from onyx.configs.app_configs import REDIS_DB_NUMBER
from onyx.configs.app_configs import REDIS_HOST
from onyx.configs.app_configs import REDIS_PORT
from onyx.db.enums import AccountType
from onyx.server.settings.models import ApplicationStatus
from tests.integration.common_utils.constants import ADMIN_USER_NAME
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.managers.scim_client import ScimClient
from tests.integration.common_utils.managers.scim_token import ScimTokenManager
from tests.integration.common_utils.managers.user import build_email
from tests.integration.common_utils.managers.user import DEFAULT_PASSWORD
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


SCIM_USER_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:User"
SCIM_ENTERPRISE_USER_SCHEMA = (
    "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User"
)
SCIM_PATCH_SCHEMA = "urn:ietf:params:scim:api:messages:2.0:PatchOp"

_LICENSE_REDIS_KEY = "public:license:metadata"


@pytest.fixture(scope="module", params=["okta", "entra"])
def idp_style(request: pytest.FixtureRequest) -> str:
    """Parameterized IdP style — runs every test with both Okta and Entra request formats."""
    return request.param


@pytest.fixture(scope="module")
def scim_token(idp_style: str) -> str:
    """Create a single SCIM token shared across all tests in this module.

    Creating a new token revokes the previous one, so we create exactly once
    per IdP-style run and reuse. Uses UserManager directly to avoid
    fixture-scope conflicts with the function-scoped admin_user fixture.
    """
    from tests.integration.common_utils.constants import ADMIN_USER_NAME
    from tests.integration.common_utils.constants import GENERAL_HEADERS
    from tests.integration.common_utils.managers.user import build_email
    from tests.integration.common_utils.managers.user import DEFAULT_PASSWORD
    from tests.integration.common_utils.managers.user import UserManager
    from tests.integration.common_utils.test_models import DATestUser

    try:
        admin = UserManager.create(name=ADMIN_USER_NAME)
    except Exception:
        admin = UserManager.login_as_user(
            DATestUser(
                id="",
                email=build_email(ADMIN_USER_NAME),
                password=DEFAULT_PASSWORD,
                headers=GENERAL_HEADERS,
                role=UserRole.ADMIN,
                is_active=True,
            )
        )

    token = ScimTokenManager.create(
        name=f"scim-user-tests-{idp_style}",
        user_performing_action=admin,
    ).raw_token
    assert token is not None
    return token


def _make_user_resource(
    email: str,
    external_id: str,
    given_name: str = "Test",
    family_name: str = "User",
    active: bool = True,
    idp_style: str = "okta",
    department: str | None = None,
    manager_id: str | None = None,
) -> dict:
    """Build a SCIM UserResource payload appropriate for the IdP style.

    Entra sends richer payloads including enterprise extension data (department,
    manager), structured email arrays, and the enterprise schema URN. Okta sends
    minimal payloads with just core user fields.
    """
    resource: dict = {
        "schemas": [SCIM_USER_SCHEMA],
        "userName": email,
        "externalId": external_id,
        "name": {
            "givenName": given_name,
            "familyName": family_name,
        },
        "active": active,
    }
    if idp_style == "entra":
        dept = department or "Engineering"
        mgr = manager_id or "mgr-ext-001"
        resource["schemas"].append(SCIM_ENTERPRISE_USER_SCHEMA)
        resource[SCIM_ENTERPRISE_USER_SCHEMA] = {
            "department": dept,
            "manager": {"value": mgr},
        }
        resource["emails"] = [
            {"value": email, "type": "work", "primary": True},
        ]
    return resource


def _make_patch_request(operations: list[dict], idp_style: str = "okta") -> dict:
    """Build a SCIM PatchOp payload, applying IdP-specific operation casing.

    Entra sends capitalized operations (e.g. ``"Replace"`` instead of
    ``"replace"``). The server's ``normalize_operation`` validator lowercases
    them — these tests verify that both casings are accepted.
    """
    cased_operations = []
    for operation in operations:
        cased = dict(operation)
        if idp_style == "entra":
            cased["op"] = operation["op"].capitalize()
        cased_operations.append(cased)
    return {
        "schemas": [SCIM_PATCH_SCHEMA],
        "Operations": cased_operations,
    }


def _create_scim_user(
    token: str,
    email: str,
    external_id: str,
    idp_style: str = "okta",
) -> requests.Response:
    return ScimClient.post(
        "/Users",
        token,
        json=_make_user_resource(email, external_id, idp_style=idp_style),
    )


def _assert_entra_extension(
    body: dict,
    expected_department: str = "Engineering",
    expected_manager: str = "mgr-ext-001",
) -> None:
    """Assert that Entra enterprise extension fields round-tripped correctly."""
    assert SCIM_ENTERPRISE_USER_SCHEMA in body["schemas"]
    ext = body[SCIM_ENTERPRISE_USER_SCHEMA]
    assert ext["department"] == expected_department
    assert ext["manager"]["value"] == expected_manager


def _assert_entra_emails(body: dict, expected_email: str) -> None:
    """Assert that structured email metadata round-tripped correctly."""
    emails = body["emails"]
    assert len(emails) >= 1
    work_email = next(e for e in emails if e.get("type") == "work")
    assert work_email["value"] == expected_email
    assert work_email["primary"] is True


# ------------------------------------------------------------------
# Lifecycle: create -> get -> list -> replace -> patch -> delete
# ------------------------------------------------------------------


def test_create_user(scim_token: str, idp_style: str) -> None:
    """POST /Users creates a provisioned user and returns 201."""
    email = f"scim_create_{idp_style}@example.com"
    ext_id = f"ext-create-{idp_style}"
    resp = _create_scim_user(scim_token, email, ext_id, idp_style)
    assert resp.status_code == 201

    body = resp.json()
    assert body["userName"] == email
    assert body["externalId"] == ext_id
    assert body["active"] is True
    assert body["id"]  # UUID assigned by server
    assert body["meta"]["resourceType"] == "User"
    assert body["name"]["givenName"] == "Test"
    assert body["name"]["familyName"] == "User"

    if idp_style == "entra":
        _assert_entra_extension(body)
        _assert_entra_emails(body, email)


def test_create_user_default_group_and_account_type(
    scim_token: str, idp_style: str
) -> None:
    """SCIM-provisioned users get Basic default group and STANDARD account_type."""
    email = f"scim_defaults_{idp_style}@example.com"
    ext_id = f"ext-defaults-{idp_style}"
    resp = _create_scim_user(scim_token, email, ext_id, idp_style)
    assert resp.status_code == 201
    user_id = resp.json()["id"]

    # --- Verify group assignment via SCIM GET ---
    get_resp = ScimClient.get(f"/Users/{user_id}", scim_token)
    assert get_resp.status_code == 200
    groups = get_resp.json().get("groups", [])
    group_names = {g["display"] for g in groups}
    assert "Basic" in group_names, f"Expected 'Basic' in groups, got {group_names}"
    assert "Admin" not in group_names, "SCIM user should not be in Admin group"

    # --- Verify account_type via admin API ---
    admin = UserManager.login_as_user(
        DATestUser(
            id="",
            email=build_email(ADMIN_USER_NAME),
            password=DEFAULT_PASSWORD,
            headers=GENERAL_HEADERS,
            role=UserRole.ADMIN,
            is_active=True,
        )
    )
    page = UserManager.get_user_page(
        user_performing_action=admin,
        search_query=email,
    )
    assert page.total_items >= 1
    scim_user_snapshot = next((u for u in page.items if u.email == email), None)
    assert (
        scim_user_snapshot is not None
    ), f"SCIM user {email} not found in user listing"
    assert (
        scim_user_snapshot.account_type == AccountType.STANDARD
    ), f"Expected STANDARD, got {scim_user_snapshot.account_type}"


def test_get_user(scim_token: str, idp_style: str) -> None:
    """GET /Users/{id} returns the user resource with all stored fields."""
    email = f"scim_get_{idp_style}@example.com"
    ext_id = f"ext-get-{idp_style}"
    created = _create_scim_user(scim_token, email, ext_id, idp_style).json()

    resp = ScimClient.get(f"/Users/{created['id']}", scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["id"] == created["id"]
    assert body["userName"] == email
    assert body["externalId"] == ext_id
    assert body["name"]["givenName"] == "Test"
    assert body["name"]["familyName"] == "User"

    if idp_style == "entra":
        _assert_entra_extension(body)
        _assert_entra_emails(body, email)


def test_list_users(scim_token: str, idp_style: str) -> None:
    """GET /Users returns a ListResponse containing provisioned users."""
    email = f"scim_list_{idp_style}@example.com"
    _create_scim_user(scim_token, email, f"ext-list-{idp_style}", idp_style)

    resp = ScimClient.get("/Users", scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["totalResults"] >= 1
    emails = [r["userName"] for r in body["Resources"]]
    assert email in emails


def test_list_users_pagination(scim_token: str, idp_style: str) -> None:
    """GET /Users with startIndex and count returns correct pagination."""
    _create_scim_user(
        scim_token,
        f"scim_page1_{idp_style}@example.com",
        f"ext-page-1-{idp_style}",
        idp_style,
    )
    _create_scim_user(
        scim_token,
        f"scim_page2_{idp_style}@example.com",
        f"ext-page-2-{idp_style}",
        idp_style,
    )

    resp = ScimClient.get("/Users?startIndex=1&count=1", scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["startIndex"] == 1
    assert body["itemsPerPage"] == 1
    assert body["totalResults"] >= 2
    assert len(body["Resources"]) == 1


def test_filter_users_by_username(scim_token: str, idp_style: str) -> None:
    """GET /Users?filter=userName eq '...' returns only matching users."""
    email = f"scim_filter_{idp_style}@example.com"
    _create_scim_user(scim_token, email, f"ext-filter-{idp_style}", idp_style)

    resp = ScimClient.get(f'/Users?filter=userName eq "{email}"', scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["totalResults"] == 1
    assert body["Resources"][0]["userName"] == email


def test_replace_user(scim_token: str, idp_style: str) -> None:
    """PUT /Users/{id} replaces the user resource including enterprise fields."""
    email = f"scim_replace_{idp_style}@example.com"
    ext_id = f"ext-replace-{idp_style}"
    created = _create_scim_user(scim_token, email, ext_id, idp_style).json()

    updated_resource = _make_user_resource(
        email=email,
        external_id=ext_id,
        given_name="Updated",
        family_name="Name",
        idp_style=idp_style,
        department="Product",
    )
    resp = ScimClient.put(f"/Users/{created['id']}", scim_token, json=updated_resource)
    assert resp.status_code == 200

    body = resp.json()
    assert body["name"]["givenName"] == "Updated"
    assert body["name"]["familyName"] == "Name"

    if idp_style == "entra":
        _assert_entra_extension(body, expected_department="Product")
        _assert_entra_emails(body, email)


def test_patch_deactivate_user(scim_token: str, idp_style: str) -> None:
    """PATCH /Users/{id} with active=false deactivates the user."""
    created = _create_scim_user(
        scim_token,
        f"scim_deactivate_{idp_style}@example.com",
        f"ext-deactivate-{idp_style}",
        idp_style,
    ).json()
    assert created["active"] is True

    resp = ScimClient.patch(
        f"/Users/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [{"op": "replace", "path": "active", "value": False}], idp_style
        ),
    )
    assert resp.status_code == 200
    assert resp.json()["active"] is False

    # Confirm via GET
    get_resp = ScimClient.get(f"/Users/{created['id']}", scim_token)
    assert get_resp.json()["active"] is False


def test_patch_reactivate_user(scim_token: str, idp_style: str) -> None:
    """PATCH active=true reactivates a previously deactivated user."""
    created = _create_scim_user(
        scim_token,
        f"scim_reactivate_{idp_style}@example.com",
        f"ext-reactivate-{idp_style}",
        idp_style,
    ).json()

    # Deactivate
    deactivate_resp = ScimClient.patch(
        f"/Users/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [{"op": "replace", "path": "active", "value": False}], idp_style
        ),
    )
    assert deactivate_resp.status_code == 200
    assert deactivate_resp.json()["active"] is False

    # Reactivate
    resp = ScimClient.patch(
        f"/Users/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [{"op": "replace", "path": "active", "value": True}], idp_style
        ),
    )
    assert resp.status_code == 200
    assert resp.json()["active"] is True


def test_delete_user(scim_token: str, idp_style: str) -> None:
    """DELETE /Users/{id} deactivates and removes the SCIM mapping."""
    created = _create_scim_user(
        scim_token,
        f"scim_delete_{idp_style}@example.com",
        f"ext-delete-{idp_style}",
        idp_style,
    ).json()

    resp = ScimClient.delete(f"/Users/{created['id']}", scim_token)
    assert resp.status_code == 204

    # Second DELETE returns 404 per RFC 7644 §3.6 (mapping removed)
    resp2 = ScimClient.delete(f"/Users/{created['id']}", scim_token)
    assert resp2.status_code == 404


# ------------------------------------------------------------------
# Error cases
# ------------------------------------------------------------------


def test_create_user_missing_external_id(scim_token: str, idp_style: str) -> None:
    """POST /Users without externalId succeeds (RFC 7643: externalId is optional)."""
    email = f"scim_no_extid_{idp_style}@example.com"
    resp = ScimClient.post(
        "/Users",
        scim_token,
        json={
            "schemas": [SCIM_USER_SCHEMA],
            "userName": email,
            "active": True,
        },
    )
    assert resp.status_code == 201
    body = resp.json()
    assert body["userName"] == email
    assert body.get("externalId") is None


def test_create_user_duplicate_email(scim_token: str, idp_style: str) -> None:
    """POST /Users with an already-taken email returns 409."""
    email = f"scim_dup_{idp_style}@example.com"
    resp1 = _create_scim_user(scim_token, email, f"ext-dup-1-{idp_style}", idp_style)
    assert resp1.status_code == 201

    resp2 = _create_scim_user(scim_token, email, f"ext-dup-2-{idp_style}", idp_style)
    assert resp2.status_code == 409


def test_get_nonexistent_user(scim_token: str) -> None:
    """GET /Users/{bad-id} returns 404."""
    resp = ScimClient.get("/Users/00000000-0000-0000-0000-000000000000", scim_token)
    assert resp.status_code == 404


def test_filter_users_by_external_id(scim_token: str, idp_style: str) -> None:
    """GET /Users?filter=externalId eq '...' returns the matching user."""
    ext_id = f"ext-unique-filter-id-{idp_style}"
    _create_scim_user(
        scim_token, f"scim_extfilter_{idp_style}@example.com", ext_id, idp_style
    )

    resp = ScimClient.get(f'/Users?filter=externalId eq "{ext_id}"', scim_token)
    assert resp.status_code == 200

    body = resp.json()
    assert body["totalResults"] == 1
    assert body["Resources"][0]["externalId"] == ext_id


# ------------------------------------------------------------------
# Seat-limit enforcement
# ------------------------------------------------------------------


def _seed_license(r: redis.Redis, seats: int) -> None:
    """Write a LicenseMetadata entry into Redis with the given seat cap."""
    now = datetime.now(timezone.utc)
    metadata = LicenseMetadata(
        tenant_id="public",
        organization_name="Test Org",
        seats=seats,
        used_seats=0,  # check_seat_availability recalculates from DB
        plan_type=PlanType.ANNUAL,
        issued_at=now,
        expires_at=now + timedelta(days=365),
        status=ApplicationStatus.ACTIVE,
        source=LicenseSource.MANUAL_UPLOAD,
    )
    r.set(_LICENSE_REDIS_KEY, metadata.model_dump_json(), ex=300)


def test_create_user_seat_limit(scim_token: str, idp_style: str) -> None:
    """POST /Users returns 403 when the seat limit is reached."""
    r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB_NUMBER)

    # admin_user already occupies 1 seat; cap at 1 -> full
    _seed_license(r, seats=1)

    try:
        resp = _create_scim_user(
            scim_token,
            f"scim_blocked_{idp_style}@example.com",
            f"ext-blocked-{idp_style}",
            idp_style,
        )
        assert resp.status_code == 403
        assert "seat" in resp.json()["detail"].lower()
    finally:
        r.delete(_LICENSE_REDIS_KEY)


def test_reactivate_user_seat_limit(scim_token: str, idp_style: str) -> None:
    """PATCH active=true returns 403 when the seat limit is reached."""
    # Create and deactivate a user (before license is seeded)
    created = _create_scim_user(
        scim_token,
        f"scim_reactivate_blocked_{idp_style}@example.com",
        f"ext-reactivate-blocked-{idp_style}",
        idp_style,
    ).json()
    assert created["active"] is True

    deactivate_resp = ScimClient.patch(
        f"/Users/{created['id']}",
        scim_token,
        json=_make_patch_request(
            [{"op": "replace", "path": "active", "value": False}], idp_style
        ),
    )
    assert deactivate_resp.status_code == 200
    assert deactivate_resp.json()["active"] is False

    r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB_NUMBER)

    # Seed license capped at current active users -> reactivation should fail
    _seed_license(r, seats=1)

    try:
        resp = ScimClient.patch(
            f"/Users/{created['id']}",
            scim_token,
            json=_make_patch_request(
                [{"op": "replace", "path": "active", "value": True}], idp_style
            ),
        )
        assert resp.status_code == 403
        assert "seat" in resp.json()["detail"].lower()
    finally:
        r.delete(_LICENSE_REDIS_KEY)


================================================
FILE: backend/tests/integration/tests/search_settings/test_search_settings.py
================================================
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser


SEARCH_SETTINGS_URL = f"{API_SERVER_URL}/search-settings"


def _get_current_search_settings(user: DATestUser) -> dict:
    response = requests.get(
        f"{SEARCH_SETTINGS_URL}/get-current-search-settings",
        headers=user.headers,
    )
    response.raise_for_status()
    return response.json()


def _get_all_search_settings(user: DATestUser) -> dict:
    response = requests.get(
        f"{SEARCH_SETTINGS_URL}/get-all-search-settings",
        headers=user.headers,
    )
    response.raise_for_status()
    return response.json()


def _get_secondary_search_settings(user: DATestUser) -> dict | None:
    response = requests.get(
        f"{SEARCH_SETTINGS_URL}/get-secondary-search-settings",
        headers=user.headers,
    )
    response.raise_for_status()
    return response.json()


def _update_inference_settings(user: DATestUser, settings: dict) -> None:
    response = requests.post(
        f"{SEARCH_SETTINGS_URL}/update-inference-settings",
        json=settings,
        headers=user.headers,
    )
    response.raise_for_status()


def _set_new_search_settings(
    user: DATestUser,
    current_settings: dict,
    enable_contextual_rag: bool = False,
    contextual_rag_llm_name: str | None = None,
    contextual_rag_llm_provider: str | None = None,
) -> requests.Response:
    """POST to set-new-search-settings, deriving the payload from current settings."""
    payload = {
        "model_name": current_settings["model_name"],
        "model_dim": current_settings["model_dim"],
        "normalize": current_settings["normalize"],
        "query_prefix": current_settings.get("query_prefix") or "",
        "passage_prefix": current_settings.get("passage_prefix") or "",
        "provider_type": current_settings.get("provider_type"),
        "index_name": None,
        "multipass_indexing": current_settings.get("multipass_indexing", False),
        "embedding_precision": current_settings["embedding_precision"],
        "reduced_dimension": current_settings.get("reduced_dimension"),
        "enable_contextual_rag": enable_contextual_rag,
        "contextual_rag_llm_name": contextual_rag_llm_name,
        "contextual_rag_llm_provider": contextual_rag_llm_provider,
    }
    return requests.post(
        f"{SEARCH_SETTINGS_URL}/set-new-search-settings",
        json=payload,
        headers=user.headers,
    )


def _cancel_new_embedding(user: DATestUser) -> None:
    response = requests.post(
        f"{SEARCH_SETTINGS_URL}/cancel-new-embedding",
        headers=user.headers,
    )
    response.raise_for_status()


def test_get_current_search_settings(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Verify that GET current search settings returns expected fields."""
    settings = _get_current_search_settings(admin_user)

    assert "model_name" in settings
    assert "model_dim" in settings
    assert "enable_contextual_rag" in settings
    assert "contextual_rag_llm_name" in settings
    assert "contextual_rag_llm_provider" in settings
    assert "index_name" in settings
    assert "embedding_precision" in settings


def test_get_all_search_settings(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Verify that GET all search settings returns current and secondary."""
    all_settings = _get_all_search_settings(admin_user)

    assert "current_settings" in all_settings
    assert "secondary_settings" in all_settings
    assert all_settings["current_settings"] is not None
    assert "model_name" in all_settings["current_settings"]


def test_get_secondary_search_settings_none_by_default(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Verify that no secondary search settings exist by default."""
    secondary = _get_secondary_search_settings(admin_user)
    assert secondary is None


def test_set_contextual_rag_model(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Set contextual RAG LLM model and verify it persists."""
    settings = _get_current_search_settings(admin_user)

    settings["enable_contextual_rag"] = True
    settings["contextual_rag_llm_name"] = llm_provider.default_model_name
    settings["contextual_rag_llm_provider"] = llm_provider.name
    _update_inference_settings(admin_user, settings)

    updated = _get_current_search_settings(admin_user)
    assert updated["contextual_rag_llm_name"] == llm_provider.default_model_name
    assert updated["contextual_rag_llm_provider"] == llm_provider.name


def test_unset_contextual_rag_model(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Set a contextual RAG model, then unset it and verify it becomes None."""
    settings = _get_current_search_settings(admin_user)
    settings["enable_contextual_rag"] = True
    settings["contextual_rag_llm_name"] = llm_provider.default_model_name
    settings["contextual_rag_llm_provider"] = llm_provider.name
    _update_inference_settings(admin_user, settings)

    # Verify it's set
    updated = _get_current_search_settings(admin_user)
    assert updated["contextual_rag_llm_name"] == llm_provider.default_model_name
    assert updated["contextual_rag_llm_provider"] == llm_provider.name

    # Unset by disabling contextual RAG
    updated["enable_contextual_rag"] = False
    updated["contextual_rag_llm_name"] = None
    updated["contextual_rag_llm_provider"] = None
    _update_inference_settings(admin_user, updated)

    # Verify it's unset
    final = _get_current_search_settings(admin_user)
    assert final["contextual_rag_llm_name"] is None
    assert final["contextual_rag_llm_provider"] is None


def test_change_contextual_rag_model(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Change contextual RAG from one model to another and verify the switch."""
    second_provider = LLMProviderManager.create(
        name="second-provider",
        default_model_name="gpt-4o",
        user_performing_action=admin_user,
    )

    settings = _get_current_search_settings(admin_user)
    settings["enable_contextual_rag"] = True
    settings["contextual_rag_llm_name"] = llm_provider.default_model_name
    settings["contextual_rag_llm_provider"] = llm_provider.name
    _update_inference_settings(admin_user, settings)

    updated = _get_current_search_settings(admin_user)
    assert updated["contextual_rag_llm_name"] == llm_provider.default_model_name
    assert updated["contextual_rag_llm_provider"] == llm_provider.name

    # Switch to a different model and provider
    updated["enable_contextual_rag"] = True
    updated["contextual_rag_llm_name"] = second_provider.default_model_name
    updated["contextual_rag_llm_provider"] = second_provider.name
    _update_inference_settings(admin_user, updated)

    final = _get_current_search_settings(admin_user)
    assert final["contextual_rag_llm_name"] == second_provider.default_model_name
    assert final["contextual_rag_llm_provider"] == second_provider.name


def test_change_contextual_rag_provider_only(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Change only the provider while keeping the same model name."""
    shared_model_name = llm_provider.default_model_name
    second_provider = LLMProviderManager.create(
        name="second-provider",
        default_model_name=shared_model_name,
        user_performing_action=admin_user,
    )

    settings = _get_current_search_settings(admin_user)
    settings["enable_contextual_rag"] = True
    settings["contextual_rag_llm_name"] = shared_model_name
    settings["contextual_rag_llm_provider"] = llm_provider.name
    _update_inference_settings(admin_user, settings)

    updated = _get_current_search_settings(admin_user)
    updated["enable_contextual_rag"] = True
    updated["contextual_rag_llm_provider"] = second_provider.name
    _update_inference_settings(admin_user, updated)

    final = _get_current_search_settings(admin_user)
    assert final["contextual_rag_llm_name"] == shared_model_name
    assert final["contextual_rag_llm_provider"] == second_provider.name


def test_enable_contextual_rag_preserved_on_inference_update(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Verify that enable_contextual_rag cannot be toggled via update-inference-settings
    because it is a preserved field."""
    settings = _get_current_search_settings(admin_user)
    original_enable = settings["enable_contextual_rag"]

    # Attempt to flip the flag
    settings["enable_contextual_rag"] = not original_enable
    settings["contextual_rag_llm_name"] = None
    settings["contextual_rag_llm_provider"] = None
    _update_inference_settings(admin_user, settings)

    updated = _get_current_search_settings(admin_user)
    assert updated["enable_contextual_rag"] == original_enable


def test_model_name_preserved_on_inference_update(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Verify that model_name cannot be changed via update-inference-settings
    because it is a preserved field."""
    settings = _get_current_search_settings(admin_user)
    original_model_name = settings["model_name"]

    settings["model_name"] = "some-other-model"
    _update_inference_settings(admin_user, settings)

    updated = _get_current_search_settings(admin_user)
    assert updated["model_name"] == original_model_name


def test_contextual_rag_settings_reflected_in_get_all(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Verify that contextual RAG updates appear in get-all-search-settings."""
    settings = _get_current_search_settings(admin_user)
    settings["enable_contextual_rag"] = True
    settings["contextual_rag_llm_name"] = llm_provider.default_model_name
    settings["contextual_rag_llm_provider"] = llm_provider.name
    _update_inference_settings(admin_user, settings)

    all_settings = _get_all_search_settings(admin_user)
    current = all_settings["current_settings"]
    assert current["contextual_rag_llm_name"] == llm_provider.default_model_name
    assert current["contextual_rag_llm_provider"] == llm_provider.name


def test_update_contextual_rag_nonexistent_provider(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Updating with a provider that does not exist should return 400."""
    settings = _get_current_search_settings(admin_user)
    settings["enable_contextual_rag"] = True
    settings["contextual_rag_llm_name"] = "some-model"
    settings["contextual_rag_llm_provider"] = "nonexistent-provider"

    response = requests.post(
        f"{SEARCH_SETTINGS_URL}/update-inference-settings",
        json=settings,
        headers=admin_user.headers,
    )
    assert response.status_code == 400
    assert "Provider nonexistent-provider not found" in response.json()["detail"]


def test_update_contextual_rag_nonexistent_model(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Updating with a valid provider but a model not in that provider should return 400."""
    settings = _get_current_search_settings(admin_user)
    settings["enable_contextual_rag"] = True
    settings["contextual_rag_llm_name"] = "nonexistent-model"
    settings["contextual_rag_llm_provider"] = llm_provider.name

    response = requests.post(
        f"{SEARCH_SETTINGS_URL}/update-inference-settings",
        json=settings,
        headers=admin_user.headers,
    )
    assert response.status_code == 400
    assert (
        f"Model nonexistent-model not found in provider {llm_provider.name}"
        in response.json()["detail"]
    )


def test_update_contextual_rag_missing_provider_name(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Providing a model name without a provider name should return 400."""
    settings = _get_current_search_settings(admin_user)
    settings["enable_contextual_rag"] = True
    settings["contextual_rag_llm_name"] = "some-model"
    settings["contextual_rag_llm_provider"] = None

    response = requests.post(
        f"{SEARCH_SETTINGS_URL}/update-inference-settings",
        json=settings,
        headers=admin_user.headers,
    )
    assert response.status_code == 400
    assert "Provider name and model name are required" in response.json()["detail"]


def test_update_contextual_rag_missing_model_name(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Providing a provider name without a model name should return 400."""
    settings = _get_current_search_settings(admin_user)
    settings["enable_contextual_rag"] = True
    settings["contextual_rag_llm_name"] = None
    settings["contextual_rag_llm_provider"] = llm_provider.name

    response = requests.post(
        f"{SEARCH_SETTINGS_URL}/update-inference-settings",
        json=settings,
        headers=admin_user.headers,
    )
    assert response.status_code == 400
    assert "Provider name and model name are required" in response.json()["detail"]


def test_set_new_search_settings_with_contextual_rag(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Create new search settings with contextual RAG enabled and verify the
    secondary settings contain the correct provider and model."""
    current = _get_current_search_settings(admin_user)

    response = _set_new_search_settings(
        user=admin_user,
        current_settings=current,
        enable_contextual_rag=True,
        contextual_rag_llm_name=llm_provider.default_model_name,
        contextual_rag_llm_provider=llm_provider.name,
    )
    response.raise_for_status()
    assert "id" in response.json()

    secondary = _get_secondary_search_settings(admin_user)
    assert secondary is not None
    assert secondary["enable_contextual_rag"] is True
    assert secondary["contextual_rag_llm_name"] == llm_provider.default_model_name
    assert secondary["contextual_rag_llm_provider"] == llm_provider.name

    _cancel_new_embedding(admin_user)


def test_set_new_search_settings_without_contextual_rag(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """Create new search settings with contextual RAG disabled and verify
    the secondary settings have no RAG provider."""
    current = _get_current_search_settings(admin_user)

    response = _set_new_search_settings(
        user=admin_user,
        current_settings=current,
        enable_contextual_rag=False,
    )
    response.raise_for_status()

    secondary = _get_secondary_search_settings(admin_user)
    assert secondary is not None
    assert secondary["enable_contextual_rag"] is False
    assert secondary["contextual_rag_llm_name"] is None
    assert secondary["contextual_rag_llm_provider"] is None

    _cancel_new_embedding(admin_user)


def test_set_new_then_update_inference_settings(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Create new secondary settings, then update the current (primary) settings
    with contextual RAG and verify both are visible through get-all."""
    current = _get_current_search_settings(admin_user)

    # Create secondary settings without contextual RAG
    response = _set_new_search_settings(
        user=admin_user,
        current_settings=current,
        enable_contextual_rag=False,
    )
    response.raise_for_status()

    # Update the *current* (primary) settings with a contextual RAG provider
    current["enable_contextual_rag"] = True
    current["contextual_rag_llm_name"] = llm_provider.default_model_name
    current["contextual_rag_llm_provider"] = llm_provider.name
    _update_inference_settings(admin_user, current)

    all_settings = _get_all_search_settings(admin_user)

    primary = all_settings["current_settings"]
    assert primary["contextual_rag_llm_name"] == llm_provider.default_model_name
    assert primary["contextual_rag_llm_provider"] == llm_provider.name

    secondary = all_settings["secondary_settings"]
    assert secondary is not None
    assert secondary["contextual_rag_llm_name"] is None
    assert secondary["contextual_rag_llm_provider"] is None

    _cancel_new_embedding(admin_user)


def test_set_new_search_settings_replaces_previous_secondary(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    llm_provider: DATestLLMProvider,
) -> None:
    """Calling set-new-search-settings twice should retire the first secondary
    and replace it with the second."""
    current = _get_current_search_settings(admin_user)

    # First: no contextual RAG
    resp1 = _set_new_search_settings(
        user=admin_user,
        current_settings=current,
        enable_contextual_rag=False,
    )
    resp1.raise_for_status()
    first_id = resp1.json()["id"]

    # Second: with contextual RAG
    resp2 = _set_new_search_settings(
        user=admin_user,
        current_settings=current,
        enable_contextual_rag=True,
        contextual_rag_llm_name=llm_provider.default_model_name,
        contextual_rag_llm_provider=llm_provider.name,
    )
    resp2.raise_for_status()
    second_id = resp2.json()["id"]

    assert second_id != first_id

    secondary = _get_secondary_search_settings(admin_user)
    assert secondary is not None
    assert secondary["enable_contextual_rag"] is True
    assert secondary["contextual_rag_llm_name"] == llm_provider.default_model_name
    assert secondary["contextual_rag_llm_provider"] == llm_provider.name

    _cancel_new_embedding(admin_user)


================================================
FILE: backend/tests/integration/tests/streaming_endpoints/test_chat_file_attachment.py
================================================
import mimetypes
from typing import Any

import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.file import FileManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.test_file_utils import create_test_image
from tests.integration.common_utils.test_file_utils import create_test_text_file
from tests.integration.common_utils.test_models import DATestUser


def test_send_message_with_image_attachment(admin_user: DATestUser) -> None:
    """Test sending a chat message with an attached image file."""
    LLMProviderManager.create(user_performing_action=admin_user)

    # Create a simple test image
    image_file = create_test_image(width=100, height=100, color="blue")

    # Upload the image file
    file_descriptors, error = FileManager.upload_files(
        files=[("test_image.png", image_file)],
        user_performing_action=admin_user,
    )

    assert not error, f"File upload should succeed, but got error: {error}"
    assert len(file_descriptors) == 1, "Should have uploaded one file"
    assert file_descriptors[0]["type"] == "image", "File should be identified as image"

    # Create a chat session
    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)

    # Send a message with the image attachment
    response = ChatSessionManager.send_message(
        chat_session_id=test_chat_session.id,
        message="What color is this image?",
        user_performing_action=admin_user,
        file_descriptors=file_descriptors,
    )

    # Verify that the message was processed successfully
    assert response.error is None, "Chat response should not have an error"
    assert (
        "blue" in response.full_message.lower()
    ), "Chat response should contain the color of the image"


def test_send_message_with_text_file_attachment(admin_user: DATestUser) -> None:
    """Test sending a chat message with an attached text file."""
    LLMProviderManager.create(user_performing_action=admin_user)

    # Create a simple test text file
    text_file = create_test_text_file(
        "This is a test document.\nIt has multiple lines.\nThis is the third line."
    )

    # Upload the text file
    file_descriptors, error = FileManager.upload_files(
        files=[("test_document.txt", text_file)],
        user_performing_action=admin_user,
    )

    assert not error, f"File upload should succeed, but got error: {error}"
    assert len(file_descriptors) == 1, "Should have uploaded one file"
    assert file_descriptors[0]["type"] in [
        "plain_text",
        "document",
    ], "File should be identified as text or document"

    # Create a chat session
    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)

    # Send a message with the text file attachment
    response = ChatSessionManager.send_message(
        chat_session_id=test_chat_session.id,
        message="Repeat the contents of this file word for word.",
        user_performing_action=admin_user,
        file_descriptors=file_descriptors,
    )

    # Verify that the message was processed successfully
    assert response.error is None, "Chat response should not have an error"
    assert (
        "third line" in response.full_message.lower()
    ), "Chat response should contain the contents of the file"


def _set_token_threshold(admin_user: DATestUser, threshold_k: int) -> None:
    """Set the file token count threshold via admin settings API."""
    response = requests.put(
        f"{API_SERVER_URL}/admin/settings",
        json={"file_token_count_threshold_k": threshold_k},
        headers=admin_user.headers,
    )
    response.raise_for_status()


def _upload_raw(
    filename: str,
    content: bytes,
    user: DATestUser,
) -> dict[str, Any]:
    """Upload a file and return the full JSON response (user_files + rejected_files)."""
    mime_type, _ = mimetypes.guess_type(filename)
    headers = user.headers.copy()
    headers.pop("Content-Type", None)

    response = requests.post(
        f"{API_SERVER_URL}/user/projects/file/upload",
        files=[("files", (filename, content, mime_type or "application/octet-stream"))],
        headers=headers,
    )
    response.raise_for_status()
    return response.json()


def test_csv_over_token_threshold_uploaded_not_indexed(
    admin_user: DATestUser,
) -> None:
    """CSV exceeding token threshold is uploaded (accepted) but skips indexing."""
    _set_token_threshold(admin_user, threshold_k=1)
    try:
        # ~2000 tokens with default tokenizer, well over 1K threshold
        content = ("x " * 100 + "\n") * 20
        result = _upload_raw("large.csv", content.encode(), admin_user)

        assert len(result["user_files"]) == 1, "CSV should be accepted"
        assert len(result["rejected_files"]) == 0, "CSV should not be rejected"
        assert (
            result["user_files"][0]["status"] == "SKIPPED"
        ), "CSV over threshold should be SKIPPED (uploaded but not indexed)"
        assert (
            result["user_files"][0]["chunk_count"] is None
        ), "Skipped file should have no chunks"
    finally:
        _set_token_threshold(admin_user, threshold_k=200)


def test_csv_under_token_threshold_uploaded_and_indexed(
    admin_user: DATestUser,
) -> None:
    """CSV under token threshold is uploaded and queued for indexing."""
    _set_token_threshold(admin_user, threshold_k=200)
    try:
        content = "col1,col2\na,b\n"
        result = _upload_raw("small.csv", content.encode(), admin_user)

        assert len(result["user_files"]) == 1, "CSV should be accepted"
        assert len(result["rejected_files"]) == 0, "CSV should not be rejected"
        assert (
            result["user_files"][0]["status"] == "PROCESSING"
        ), "CSV under threshold should be PROCESSING (queued for indexing)"
    finally:
        _set_token_threshold(admin_user, threshold_k=200)


def test_txt_over_token_threshold_rejected(
    admin_user: DATestUser,
) -> None:
    """Non-exempt file exceeding token threshold is rejected entirely."""
    _set_token_threshold(admin_user, threshold_k=1)
    try:
        # ~2000 tokens, well over 1K threshold. Unlike CSV, .txt is not
        # exempt from the threshold so the file should be rejected.
        content = ("x " * 100 + "\n") * 20
        result = _upload_raw("big.txt", content.encode(), admin_user)

        assert len(result["user_files"]) == 0, "File should not be accepted"
        assert len(result["rejected_files"]) == 1, "File should be rejected"
        assert "token limit" in result["rejected_files"][0]["reason"].lower()
    finally:
        _set_token_threshold(admin_user, threshold_k=200)


================================================
FILE: backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
================================================
import time

from onyx.configs.constants import MessageType
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.conftest import DocumentBuilderType

TERMINATED_RESPONSE_MESSAGE = (
    "Response was terminated prior to completion, try regenerating."
)

LOADING_RESPONSE_MESSAGE = "Message is loading... Please refresh the page soon."


def test_send_two_messages(basic_user: DATestUser) -> None:
    # Create a chat session
    test_chat_session = ChatSessionManager.create(
        persona_id=0,  # Use default persona
        description="Test chat session for multiple messages",
        user_performing_action=basic_user,
    )

    # Send a message to create some data
    response = ChatSessionManager.send_message(
        chat_session_id=test_chat_session.id,
        message="hello",
        user_performing_action=basic_user,
    )
    # Verify that the message was processed successfully
    assert response.error is None, "Chat response should not have an error"
    assert len(response.full_message) > 0, "Chat response should not be empty"

    # Verify that the chat session can be retrieved before deletion
    chat_history = ChatSessionManager.get_chat_history(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    )
    assert (
        len(chat_history) == 3
    ), "Chat session should have 1 system message, 1 user message, and 1 assistant message"

    response2 = ChatSessionManager.send_message(
        chat_session_id=test_chat_session.id,
        message="hello again",
        user_performing_action=basic_user,
        parent_message_id=response.assistant_message_id,
    )

    assert response2.error is None, "Chat response should not have an error"
    assert len(response2.full_message) > 0, "Chat response should not be empty"

    # Verify that the chat session can be retrieved before deletion
    chat_history2 = ChatSessionManager.get_chat_history(
        chat_session=test_chat_session,
        user_performing_action=basic_user,
    )
    assert (
        len(chat_history2) == 5
    ), "Chat session should have 1 system message, 2 user messages, and 2 assistant messages"


def test_send_message_simple_with_history(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    LLMProviderManager.create(user_performing_action=admin_user)

    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)

    response = ChatSessionManager.send_message(
        chat_session_id=test_chat_session.id,
        message="this is a test message",
        user_performing_action=admin_user,
    )

    assert response.error is None, "Chat response should not have an error"
    assert len(response.full_message) > 0


def test_send_message__basic_searches(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
    document_builder: DocumentBuilderType,
) -> None:
    MESSAGE = "run a search for 'test'. Use the internal search tool."
    SHORT_DOC_CONTENT = "test"
    LONG_DOC_CONTENT = "blah blah blah blah" * 100

    LLMProviderManager.create(user_performing_action=admin_user)

    short_doc = document_builder([SHORT_DOC_CONTENT])[0]

    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)
    response = ChatSessionManager.send_message(
        chat_session_id=test_chat_session.id,
        message=MESSAGE,
        user_performing_action=admin_user,
    )
    assert response.error is None, "Chat response should not have an error"
    assert response.top_documents is not None
    assert len(response.top_documents) == 1
    assert response.top_documents[0].document_id == short_doc.id

    # make sure this doc is really long so that it will be split into multiple chunks
    long_doc = document_builder([LONG_DOC_CONTENT])[0]

    # new chat session for simplicity
    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)
    response = ChatSessionManager.send_message(
        chat_session_id=test_chat_session.id,
        message=MESSAGE,
        user_performing_action=admin_user,
    )
    assert response.error is None, "Chat response should not have an error"
    assert response.top_documents is not None
    assert len(response.top_documents) == 2
    # short doc should be more relevant and thus first
    assert response.top_documents[0].document_id == short_doc.id
    assert response.top_documents[1].document_id == long_doc.id


def test_send_message_disconnect_and_cleanup(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    """
    Test that when a client disconnects mid-stream:
    1. Client sends a message and disconnects after receiving just 1 packet
    2. Client checks to see that their message ends up completed

    Note: There is an interim period (between disconnect and checkup) where we expect
    to see some sort of 'loading' message.
    """
    LLMProviderManager.create(user_performing_action=admin_user)

    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)

    # Send a message and disconnect after receiving just 1 packet
    ChatSessionManager.send_message_with_disconnect(
        chat_session_id=test_chat_session.id,
        message="What are some important events that happened today?",
        user_performing_action=admin_user,
        disconnect_after_packets=1,
    )

    # Every 5 seconds, check if we have the latest state of the chat session up to a minute
    increment_seconds = 1
    max_seconds = 60
    msg = TERMINATED_RESPONSE_MESSAGE

    for _ in range(max_seconds // increment_seconds):
        time.sleep(increment_seconds)

        # Get the chat history
        chat_history = ChatSessionManager.get_chat_history(
            chat_session=test_chat_session,
            user_performing_action=admin_user,
        )

        # Find the assistant message
        assistant_message = None
        for chat_obj in chat_history:
            if chat_obj.message_type == MessageType.ASSISTANT:
                assistant_message = chat_obj
                break

        assert assistant_message is not None, "Assistant message should exist"
        msg = assistant_message.message

        if msg != TERMINATED_RESPONSE_MESSAGE and msg != LOADING_RESPONSE_MESSAGE:
            break

    assert (
        msg != TERMINATED_RESPONSE_MESSAGE and msg != LOADING_RESPONSE_MESSAGE
    ), f"Assistant message should no longer be the terminated response message after cleanup, got: {msg}"


================================================
FILE: backend/tests/integration/tests/tags/test_tags.py
================================================
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import Document
from onyx.db.tag import get_structured_tags_for_document
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


def test_tag_creation_and_update(reset: None) -> None:  # noqa: ARG001
    # create admin user
    admin_user: DATestUser = UserManager.create(email="admin@onyx.app")

    # create a minimal file connector
    cc_pair = CCPairManager.create_from_scratch(
        name="KG-Test-FileConnector",
        source=DocumentSource.FILE,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={
            "file_locations": [],
            "file_names": [],
            "zip_metadata_file_id": None,
        },
        user_performing_action=admin_user,
    )
    api_key = APIKeyManager.create(user_performing_action=admin_user)
    api_key.headers.update(admin_user.headers)
    LLMProviderManager.create(user_performing_action=admin_user)

    # create document
    doc1_expected_metadata: dict[str, str | list[str]] = {
        "value": "val",
        "multiple_list": ["a", "b", "c"],
        "single_list": ["x"],
    }
    doc1_expected_tags: set[tuple[str, str, bool]] = {
        ("value", "val", False),
        ("multiple_list", "a", True),
        ("multiple_list", "b", True),
        ("multiple_list", "c", True),
        ("single_list", "x", True),
    }
    doc1 = DocumentManager.seed_doc_with_content(
        cc_pair=cc_pair,
        content="Dummy content",
        document_id="doc1",
        metadata=doc1_expected_metadata,
        api_key=api_key,
    )

    # these are added by the connector
    doc1_expected_metadata["document_id"] = "doc1"
    doc1_expected_tags.add(("document_id", "doc1", False))

    # get document from db
    with get_session_with_current_tenant() as db_session:
        doc1_db = db_session.query(Document).filter(Document.id == doc1.id).first()
        assert doc1_db is not None
        assert doc1_db.id == doc1.id

        doc1_tags = doc1_db.tags

    # check tags
    doc1_tags_data: set[tuple[str, str, bool]] = {
        (tag.tag_key, tag.tag_value, tag.is_list) for tag in doc1_tags
    }
    assert doc1_tags_data == doc1_expected_tags

    # check structured tags
    with get_session_with_current_tenant() as db_session:
        doc1_metadata = get_structured_tags_for_document(doc1.id, db_session)
    assert doc1_metadata == doc1_expected_metadata

    # update metadata
    doc1_new_expected_metadata: dict[str, str | list[str]] = {
        "value": "val2",
        "multiple_list": ["a", "d"],
        "new_value": "new_val",
    }
    doc1_new_expected_tags: set[tuple[str, str, bool]] = {
        ("value", "val2", False),
        ("multiple_list", "a", True),
        ("multiple_list", "d", True),
        ("new_value", "new_val", False),
    }
    doc1_new = DocumentManager.seed_doc_with_content(
        cc_pair=cc_pair,
        content="Dummy content",
        document_id="doc1",
        metadata=doc1_new_expected_metadata,
        api_key=api_key,
    )
    assert doc1_new.id == doc1.id

    # these are added by the connector
    doc1_new_expected_metadata["document_id"] = "doc1"
    doc1_new_expected_tags.add(("document_id", "doc1", False))

    # get new document from db
    with get_session_with_current_tenant() as db_session:
        doc1_new_db = db_session.query(Document).filter(Document.id == doc1.id).first()
        assert doc1_new_db is not None
        assert doc1_new_db.id == doc1.id

        doc1_new_tags = doc1_new_db.tags

    # check tags
    doc1_new_tags_data: set[tuple[str, str, bool]] = {
        (tag.tag_key, tag.tag_value, tag.is_list) for tag in doc1_new_tags
    }
    assert doc1_new_tags_data == doc1_new_expected_tags

    # check structured tags
    with get_session_with_current_tenant() as db_session:
        doc1_new_metadata = get_structured_tags_for_document(doc1.id, db_session)
    assert doc1_new_metadata == doc1_new_expected_metadata


def test_tag_sharing(reset: None) -> None:  # noqa: ARG001
    # create admin user
    admin_user: DATestUser = UserManager.create(email="admin@onyx.app")

    # create a minimal file connector
    cc_pair = CCPairManager.create_from_scratch(
        name="KG-Test-FileConnector",
        source=DocumentSource.FILE,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={
            "file_locations": [],
            "file_names": [],
            "zip_metadata_file_id": None,
        },
        user_performing_action=admin_user,
    )
    api_key = APIKeyManager.create(user_performing_action=admin_user)
    api_key.headers.update(admin_user.headers)
    LLMProviderManager.create(user_performing_action=admin_user)

    # create documents
    doc1_expected_metadata: dict[str, str | list[str]] = {
        "value": "val",
        "list": ["a", "b"],
        "same_key": "x",
    }
    doc1_expected_tags: set[tuple[str, str, bool]] = {
        ("value", "val", False),
        ("list", "a", True),
        ("list", "b", True),
        ("same_key", "x", False),
    }
    doc1 = DocumentManager.seed_doc_with_content(
        cc_pair=cc_pair,
        content="Dummy content",
        document_id="doc1",
        metadata=doc1_expected_metadata,
        api_key=api_key,
    )

    doc2_expected_metadata: dict[str, str | list[str]] = {
        "value": "val",
        "list": ["a", "c"],
        "same_key": ["x"],
    }
    doc2_expected_tags: set[tuple[str, str, bool]] = {
        ("value", "val", False),
        ("list", "a", True),
        ("list", "c", True),
        ("same_key", "x", True),
    }
    doc2 = DocumentManager.seed_doc_with_content(
        cc_pair=cc_pair,
        content="Dummy content",
        document_id="doc2",
        metadata=doc2_expected_metadata,
        api_key=api_key,
    )

    # these are added by the connector
    doc1_expected_metadata["document_id"] = "doc1"
    doc1_expected_tags.add(("document_id", "doc1", False))
    doc2_expected_metadata["document_id"] = "doc2"
    doc2_expected_tags.add(("document_id", "doc2", False))

    # get documents from db
    with get_session_with_current_tenant() as db_session:
        doc1_db = db_session.query(Document).filter(Document.id == doc1.id).first()
        doc2_db = db_session.query(Document).filter(Document.id == doc2.id).first()
        assert doc1_db is not None
        assert doc1_db.id == doc1.id
        assert doc2_db is not None
        assert doc2_db.id == doc2.id

        doc1_tags = doc1_db.tags
        doc2_tags = doc2_db.tags

    # check tags
    doc1_tags_data: set[tuple[str, str, bool]] = {
        (tag.tag_key, tag.tag_value, tag.is_list) for tag in doc1_tags
    }
    assert doc1_tags_data == doc1_expected_tags

    doc2_tags_data: set[tuple[str, str, bool]] = {
        (tag.tag_key, tag.tag_value, tag.is_list) for tag in doc2_tags
    }
    assert doc2_tags_data == doc2_expected_tags

    # check tag sharing
    doc1_tagkv_id: dict[tuple[str, str], int] = {
        (tag.tag_key, tag.tag_value): tag.id for tag in doc1_tags
    }
    doc2_tagkv_id: dict[tuple[str, str], int] = {
        (tag.tag_key, tag.tag_value): tag.id for tag in doc2_tags
    }
    assert doc1_tagkv_id[("value", "val")] == doc2_tagkv_id[("value", "val")]
    assert doc1_tagkv_id[("list", "a")] == doc2_tagkv_id[("list", "a")]
    assert doc1_tagkv_id[("same_key", "x")] != doc2_tagkv_id[("same_key", "x")]


================================================
FILE: backend/tests/integration/tests/tools/test_force_tool_use.py
================================================
"""
Integration test for forced tool use to verify that web_search can be forced.
This test verifies that forcing a tool use works through the complete API flow.
"""

import pytest
from sqlalchemy import select

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import Tool
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.test_models import DATestImageGenerationConfig
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import ToolName


def test_force_tool_use(
    basic_user: DATestUser,
    image_generation_config: DATestImageGenerationConfig,  # noqa: ARG001
) -> None:
    with get_session_with_current_tenant() as db_session:
        image_generation_tool = db_session.execute(
            select(Tool).where(Tool.in_code_tool_id == "ImageGenerationTool")
        ).scalar_one_or_none()
        assert image_generation_tool is not None, "ImageGenerationTool must exist"
        image_generation_tool_id = image_generation_tool.id

    # Create a chat session
    chat_session = ChatSessionManager.create(user_performing_action=basic_user)

    # Send a simple message that wouldn't normally trigger image generation
    # but force the image generation tool to be used
    message = "hi"

    analyzed_response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message=message,
        user_performing_action=basic_user,
        forced_tool_ids=[image_generation_tool_id],
    )

    assert analyzed_response.error is None, "Chat response should not have an error"

    image_generation_tool_used = any(
        tool.tool_name == ToolName.IMAGE_GENERATION
        for tool in analyzed_response.used_tools
    )
    assert (
        image_generation_tool_used
    ), "Image generation tool should have been forced to run"


if __name__ == "__main__":
    # Run with: python -m dotenv -f .vscode/.env run --
    # python -m pytest backend/tests/integration/tests/tools/test_force_tool_use.py -v -s
    pytest.main([__file__, "-v", "-s"])


================================================
FILE: backend/tests/integration/tests/tools/test_image_generation_streaming.py
================================================
"""
Integration test for image generation heartbeat streaming through the /send-message API.
This test verifies that heartbeat packets are properly streamed through the complete API flow.
"""

import time

import pytest

from onyx.server.query_and_chat.streaming_models import StreamingType
from onyx.tools.tool_implementations.images.image_generation_tool import (
    HEARTBEAT_INTERVAL,
)
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.test_models import DATestImageGenerationConfig
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import ToolName

ART_PERSONA_ID = -3


def test_image_generation_streaming(
    basic_user: DATestUser,
    llm_provider: DATestLLMProvider,  # noqa: ARG001
    image_generation_config: DATestImageGenerationConfig,  # noqa: ARG001
) -> None:
    """
    Test image generation to verify:
    1. The image generation tool is invoked successfully
    2. Heartbeat packets are streamed during generation
    3. The response contains the generated image information

    This test uses the actual API without any mocking.
    """
    # Create a chat session with this persona
    chat_session = ChatSessionManager.create(user_performing_action=basic_user)

    # Send a message that should trigger image generation
    # Use explicit instructions to ensure the image generation tool is used
    message = "Please generate an image of a beautiful sunset over the ocean. Use the image generation tool to create this image."

    start_time = time.monotonic()
    analyzed_response = ChatSessionManager.send_message(
        chat_session_id=chat_session.id,
        message=message,
        user_performing_action=basic_user,
    )
    total_time = time.monotonic() - start_time

    assert analyzed_response.error is None, "Chat response should not have an error"

    # 1. Check if image generation tool was used
    image_gen_used = any(
        tool.tool_name == ToolName.IMAGE_GENERATION
        for tool in analyzed_response.used_tools
    )
    assert image_gen_used

    # Verify we received heartbeat packets during image generation
    # Image generation typically takes a few seconds and sends heartbeats
    # every HEARTBEAT_INTERVAL seconds
    expected_heartbeat_packets = max(1, int(total_time / HEARTBEAT_INTERVAL) - 1)
    assert len(analyzed_response.heartbeat_packets) >= expected_heartbeat_packets, (
        f"Expected at least {expected_heartbeat_packets} heartbeats for {total_time:.2f}s execution, "
        f"but got {len(analyzed_response.heartbeat_packets)}"
    )

    # Verify the heartbeat packets have the expected structure
    for packet in analyzed_response.heartbeat_packets:
        assert "obj" in packet, "Heartbeat packet should have 'obj' field"
        assert (
            packet["obj"].get("type") == StreamingType.IMAGE_GENERATION_HEARTBEAT.value
        ), f"Expected heartbeat type to be {StreamingType.IMAGE_GENERATION_HEARTBEAT.value}, got {packet['obj'].get('type')}"
    # 4. Verify image generation tool delta packets with actual image data
    image_tool_results = [
        tool
        for tool in analyzed_response.used_tools
        if tool.tool_name == ToolName.IMAGE_GENERATION
    ]
    assert len(image_tool_results) > 0, "Should have image generation tool results"

    image_tool = image_tool_results[0]
    assert len(image_tool.images) > 0, "Should have generated at least one image"


if __name__ == "__main__":
    # Run with: python -m dotenv -f .vscode/.env run --
    # python -m pytest tests/integration/tests/tools/test_image_generation_heartbeat.py -v -s
    pytest.main([__file__, "-v", "-s"])


================================================
FILE: backend/tests/integration/tests/usergroup/test_add_users_to_group.py
================================================
import os
from uuid import uuid4

import pytest
import requests

from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import DATestUserGroup


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="User group tests are enterprise only",
)
def test_add_users_to_group(reset: None) -> None:  # noqa: ARG001
    admin_user: DATestUser = UserManager.create(name="admin_for_add_user")
    user_to_add: DATestUser = UserManager.create(name="basic_user_to_add")

    user_group: DATestUserGroup = UserGroupManager.create(
        name="add-user-test-group",
        user_ids=[admin_user.id],
        user_performing_action=admin_user,
    )

    UserGroupManager.wait_for_sync(
        user_performing_action=admin_user,
        user_groups_to_check=[user_group],
    )

    updated_user_group = UserGroupManager.add_users(
        user_group=user_group,
        user_ids=[user_to_add.id],
        user_performing_action=admin_user,
    )

    fetched_user_groups = UserGroupManager.get_all(user_performing_action=admin_user)
    fetched_user_group = next(
        group for group in fetched_user_groups if group.id == updated_user_group.id
    )

    fetched_user_ids = {user.id for user in fetched_user_group.users}
    assert admin_user.id in fetched_user_ids
    assert user_to_add.id in fetched_user_ids


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="User group tests are enterprise only",
)
def test_add_users_to_group_invalid_user(reset: None) -> None:  # noqa: ARG001
    admin_user: DATestUser = UserManager.create(name="admin_for_add_user_invalid")

    user_group: DATestUserGroup = UserGroupManager.create(
        name="add-user-invalid-test-group",
        user_ids=[admin_user.id],
        user_performing_action=admin_user,
    )

    invalid_user_id = str(uuid4())
    response = requests.post(
        f"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}/add-users",
        json={"user_ids": [invalid_user_id]},
        headers=admin_user.headers,
    )

    assert response.status_code == 404
    assert "not found" in response.text.lower()


================================================
FILE: backend/tests/integration/tests/usergroup/test_group_membership_updates_user_permissions.py
================================================
import os

import pytest

from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import Permission
from onyx.db.models import PermissionGrant
from onyx.db.models import UserGroup as UserGroupModel
from onyx.db.permissions import recompute_permissions_for_group__no_commit
from onyx.db.permissions import recompute_user_permissions__no_commit
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestUser


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="User group tests are enterprise only",
)
def test_user_gets_permissions_when_added_to_group(
    reset: None,  # noqa: ARG001
) -> None:
    admin_user: DATestUser = UserManager.create(name="admin_for_perm_test")
    basic_user: DATestUser = UserManager.create(name="basic_user_for_perm_test")

    # basic_user starts with only "basic" from the default group
    initial_permissions = UserManager.get_permissions(basic_user)
    assert "basic" in initial_permissions
    assert "add:agents" not in initial_permissions

    # Create a new group and add basic_user
    group = UserGroupManager.create(
        name="perm-test-group",
        user_ids=[admin_user.id, basic_user.id],
        user_performing_action=admin_user,
    )

    # Grant a non-basic permission to the group and recompute
    with get_session_with_current_tenant() as db_session:
        db_group = db_session.get(UserGroupModel, group.id)
        assert db_group is not None
        db_session.add(
            PermissionGrant(
                group_id=db_group.id,
                permission=Permission.ADD_AGENTS,
                grant_source="SYSTEM",
            )
        )
        db_session.flush()
        recompute_user_permissions__no_commit(basic_user.id, db_session)
        db_session.commit()

    # Verify the user gained the new permission (expanded includes read:agents)
    updated_permissions = UserManager.get_permissions(basic_user)
    assert (
        "add:agents" in updated_permissions
    ), f"User should have 'add:agents' after group grant, got: {updated_permissions}"
    assert (
        "read:agents" in updated_permissions
    ), f"User should have implied 'read:agents', got: {updated_permissions}"
    assert "basic" in updated_permissions


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="User group tests are enterprise only",
)
def test_group_permission_change_propagates_to_all_members(
    reset: None,  # noqa: ARG001
) -> None:
    admin_user: DATestUser = UserManager.create(name="admin_propagate")
    user_a: DATestUser = UserManager.create(name="user_a_propagate")
    user_b: DATestUser = UserManager.create(name="user_b_propagate")

    group = UserGroupManager.create(
        name="propagate-test-group",
        user_ids=[admin_user.id, user_a.id, user_b.id],
        user_performing_action=admin_user,
    )

    # Neither user should have add:agents yet
    for u in (user_a, user_b):
        assert "add:agents" not in UserManager.get_permissions(u)

    # Grant add:agents to the group, then batch-recompute
    with get_session_with_current_tenant() as db_session:
        grant = PermissionGrant(
            group_id=group.id,
            permission=Permission.ADD_AGENTS,
            grant_source="SYSTEM",
        )
        db_session.add(grant)
        db_session.flush()
        recompute_permissions_for_group__no_commit(group.id, db_session)
        db_session.commit()

    # Both users should now have the permission (plus implied read:agents)
    for u in (user_a, user_b):
        perms = UserManager.get_permissions(u)
        assert "add:agents" in perms, f"{u.id} missing add:agents: {perms}"
        assert "read:agents" in perms, f"{u.id} missing implied read:agents: {perms}"

    # Soft-delete the grant and recompute — permission should be removed
    with get_session_with_current_tenant() as db_session:
        db_grant = (
            db_session.query(PermissionGrant)
            .filter_by(group_id=group.id, permission=Permission.ADD_AGENTS)
            .first()
        )
        assert db_grant is not None
        db_grant.is_deleted = True
        db_session.flush()
        recompute_permissions_for_group__no_commit(group.id, db_session)
        db_session.commit()

    for u in (user_a, user_b):
        perms = UserManager.get_permissions(u)
        assert "add:agents" not in perms, f"{u.id} still has add:agents: {perms}"


================================================
FILE: backend/tests/integration/tests/usergroup/test_new_group_gets_basic_permission.py
================================================
import os

import pytest

from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestUser


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="User group tests are enterprise only",
)
def test_new_group_gets_basic_permission(reset: None) -> None:  # noqa: ARG001
    admin_user: DATestUser = UserManager.create(name="admin_for_basic_perm")

    user_group = UserGroupManager.create(
        name="basic-perm-test-group",
        user_ids=[admin_user.id],
        user_performing_action=admin_user,
    )

    permissions = UserGroupManager.get_permissions(
        user_group=user_group,
        user_performing_action=admin_user,
    )

    assert (
        "basic" in permissions
    ), f"New group should have 'basic' permission, got: {permissions}"


================================================
FILE: backend/tests/integration/tests/usergroup/test_user_group_deletion.py
================================================
"""
This tests the deletion of a user group with the following foreign key constraints:
- connector_credential_pair
- user
- credential
- llm_provider
- document_set
- token_rate_limit (Not Implemented)
- persona
"""

import os

import pytest

from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.document_set import DocumentSetManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestCredential
from tests.integration.common_utils.test_models import DATestDocumentSet
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestPersona
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import DATestUserGroup
from tests.integration.common_utils.vespa import vespa_fixture


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="User group tests are enterprise only",
)
def test_user_group_deletion(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,  # noqa: ARG001
) -> None:
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # create connectors
    cc_pair = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    # Create user group with a cc_pair and a user
    user_group: DATestUserGroup = UserGroupManager.create(
        user_ids=[admin_user.id],
        cc_pair_ids=[cc_pair.id],
        user_performing_action=admin_user,
    )
    cc_pair.groups = [user_group.id]

    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group], user_performing_action=admin_user
    )
    UserGroupManager.verify(
        user_group=user_group,
        user_performing_action=admin_user,
    )
    CCPairManager.verify(
        cc_pair=cc_pair,
        user_performing_action=admin_user,
    )

    # Create other objects that are related to the user group
    credential: DATestCredential = CredentialManager.create(
        groups=[user_group.id],
        user_performing_action=admin_user,
    )
    document_set: DATestDocumentSet = DocumentSetManager.create(
        cc_pair_ids=[cc_pair.id],
        groups=[user_group.id],
        user_performing_action=admin_user,
    )
    llm_provider: DATestLLMProvider = LLMProviderManager.create(
        groups=[user_group.id],
        user_performing_action=admin_user,
    )
    persona: DATestPersona = PersonaManager.create(
        groups=[user_group.id],
        user_performing_action=admin_user,
    )

    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group], user_performing_action=admin_user
    )
    UserGroupManager.verify(
        user_group=user_group,
        user_performing_action=admin_user,
    )

    # Delete the user group
    UserGroupManager.delete(
        user_group=user_group,
        user_performing_action=admin_user,
    )

    UserGroupManager.wait_for_deletion_completion(
        user_groups_to_check=[user_group], user_performing_action=admin_user
    )

    # Set our expected local representations to empty
    credential.groups = []
    document_set.groups = []
    llm_provider.groups = []
    persona.groups = []

    # Verify that the local representations were updated
    CredentialManager.verify(
        credential=credential,
        user_performing_action=admin_user,
    )

    DocumentSetManager.verify(
        document_set=document_set,
        user_performing_action=admin_user,
    )

    LLMProviderManager.verify(
        llm_provider=llm_provider,
        user_performing_action=admin_user,
    )

    PersonaManager.verify(
        persona=persona,
        user_performing_action=admin_user,
    )


================================================
FILE: backend/tests/integration/tests/usergroup/test_usergroup_syncing.py
================================================
import os

import pytest

from onyx.server.documents.models import DocumentSource
from tests.integration.common_utils.constants import NUM_DOCS
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import DATestUserGroup
from tests.integration.common_utils.vespa import vespa_fixture


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="User group tests are enterprise only",
)
def test_removing_connector(
    reset: None,  # noqa: ARG001
    vespa_client: vespa_fixture,
) -> None:
    # Creating an admin user (first user created is automatically an admin)
    admin_user: DATestUser = UserManager.create(name="admin_user")

    # create api key
    api_key: DATestAPIKey = APIKeyManager.create(
        user_performing_action=admin_user,
    )

    # create connectors
    cc_pair_1 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )
    cc_pair_2 = CCPairManager.create_from_scratch(
        source=DocumentSource.INGESTION_API,
        user_performing_action=admin_user,
    )

    # seed documents
    cc_pair_1.documents = DocumentManager.seed_dummy_docs(
        cc_pair=cc_pair_1,
        num_docs=NUM_DOCS,
        api_key=api_key,
    )

    cc_pair_2.documents = DocumentManager.seed_dummy_docs(
        cc_pair=cc_pair_2,
        num_docs=NUM_DOCS,
        api_key=api_key,
    )

    # Create user group
    user_group_1: DATestUserGroup = UserGroupManager.create(
        cc_pair_ids=[cc_pair_1.id, cc_pair_2.id],
        user_performing_action=admin_user,
    )

    UserGroupManager.wait_for_sync(
        user_groups_to_check=[user_group_1], user_performing_action=admin_user
    )

    UserGroupManager.verify(
        user_group=user_group_1,
        user_performing_action=admin_user,
    )

    # make sure cc_pair_1 docs are user_group_1 only
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_1,
        group_names=[user_group_1.name],
        doc_creating_user=admin_user,
    )

    # make sure cc_pair_2 docs are user_group_1 only
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_2,
        group_names=[user_group_1.name],
        doc_creating_user=admin_user,
    )

    # remove cc_pair_2 from document set
    user_group_1.cc_pair_ids = [cc_pair_1.id]
    UserGroupManager.edit(
        user_group_1,
        user_performing_action=admin_user,
    )

    UserGroupManager.wait_for_sync(
        user_performing_action=admin_user,
    )

    # make sure cc_pair_1 docs are user_group_1 only
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_1,
        group_names=[user_group_1.name],
        doc_creating_user=admin_user,
    )

    # make sure cc_pair_2 docs have no user group
    DocumentManager.verify(
        vespa_client=vespa_client,
        cc_pair=cc_pair_2,
        group_names=[],
        doc_creating_user=admin_user,
    )


================================================
FILE: backend/tests/integration/tests/users/test_default_group_assignment.py
================================================
"""Integration tests for default group assignment on user registration.

Verifies that:
- The first registered user is assigned to the Admin default group
- Subsequent registered users are assigned to the Basic default group
- account_type is set to STANDARD for email/password registrations
"""

from onyx.auth.schemas import UserRole
from onyx.db.enums import AccountType
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestUser


def test_default_group_assignment_on_registration(reset: None) -> None:  # noqa: ARG001
    # Register first user — should become admin
    admin_user: DATestUser = UserManager.create(name="first_user")
    assert admin_user.role == UserRole.ADMIN

    # Register second user — should become basic
    basic_user: DATestUser = UserManager.create(name="second_user")
    assert basic_user.role == UserRole.BASIC

    # Fetch all groups including default ones
    all_groups = UserGroupManager.get_all(
        user_performing_action=admin_user,
        include_default=True,
    )

    # Find the default Admin and Basic groups
    admin_group = next(
        (g for g in all_groups if g.name == "Admin" and g.is_default), None
    )
    basic_group = next(
        (g for g in all_groups if g.name == "Basic" and g.is_default), None
    )
    assert admin_group is not None, "Admin default group not found"
    assert basic_group is not None, "Basic default group not found"

    # Verify admin user is in Admin group and NOT in Basic group
    admin_group_user_ids = {str(u.id) for u in admin_group.users}
    basic_group_user_ids = {str(u.id) for u in basic_group.users}

    assert (
        admin_user.id in admin_group_user_ids
    ), "First user should be in Admin default group"
    assert (
        admin_user.id not in basic_group_user_ids
    ), "First user should NOT be in Basic default group"

    # Verify basic user is in Basic group and NOT in Admin group
    assert (
        basic_user.id in basic_group_user_ids
    ), "Second user should be in Basic default group"
    assert (
        basic_user.id not in admin_group_user_ids
    ), "Second user should NOT be in Admin default group"

    # Verify account_type is STANDARD for both users via user listing API
    paginated_result = UserManager.get_user_page(
        user_performing_action=admin_user,
        page_num=0,
        page_size=10,
    )
    users_by_id = {str(u.id): u for u in paginated_result.items}

    admin_snapshot = users_by_id.get(admin_user.id)
    basic_snapshot = users_by_id.get(basic_user.id)
    assert admin_snapshot is not None, "Admin user not found in user listing"
    assert basic_snapshot is not None, "Basic user not found in user listing"

    assert (
        admin_snapshot.account_type == AccountType.STANDARD
    ), f"Admin user account_type should be STANDARD, got {admin_snapshot.account_type}"
    assert (
        basic_snapshot.account_type == AccountType.STANDARD
    ), f"Basic user account_type should be STANDARD, got {basic_snapshot.account_type}"


================================================
FILE: backend/tests/integration/tests/users/test_password_signup_upgrade.py
================================================
"""Integration tests for password signup upgrade paths.

Verifies that when a BOT or EXT_PERM_USER user signs up via email/password:
- Their account_type is upgraded to STANDARD
- They are assigned to the Basic default group
- They gain the correct effective permissions
"""

import pytest

from onyx.auth.schemas import UserRole
from onyx.db.enums import AccountType
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestUser


def _get_default_group_member_emails(
    admin_user: DATestUser,
    group_name: str,
) -> set[str]:
    """Get the set of emails of all members in a named default group."""
    all_groups = UserGroupManager.get_all(admin_user, include_default=True)
    matched = [g for g in all_groups if g.is_default and g.name == group_name]
    assert matched, f"Default group '{group_name}' not found"
    return {u.email for u in matched[0].users}


@pytest.mark.parametrize(
    "target_role",
    [UserRole.EXT_PERM_USER, UserRole.SLACK_USER],
    ids=["ext_perm_user", "slack_user"],
)
def test_password_signup_upgrade(
    reset: None,  # noqa: ARG001
    target_role: UserRole,
) -> None:
    """When a non-web user signs up via email/password, they should be
    upgraded to STANDARD account_type and assigned to the Basic default group."""
    admin_user: DATestUser = UserManager.create(email="admin@example.com")

    test_email = f"{target_role.value}_upgrade@example.com"
    test_user = UserManager.create(email=test_email)

    test_user = UserManager.set_role(
        user_to_set=test_user,
        target_role=target_role,
        user_performing_action=admin_user,
        explicit_override=True,
    )

    # Verify user was removed from Basic group after downgrade
    basic_emails = _get_default_group_member_emails(admin_user, "Basic")
    assert (
        test_email not in basic_emails
    ), f"{target_role.value} should not be in Basic default group"

    # Re-register with the same email — triggers the password signup upgrade
    upgraded_user = UserManager.create(email=test_email)

    assert upgraded_user.role == UserRole.BASIC

    paginated = UserManager.get_user_page(
        user_performing_action=admin_user,
        page_num=0,
        page_size=10,
    )
    user_snapshot = next(
        (u for u in paginated.items if str(u.id) == upgraded_user.id), None
    )
    assert user_snapshot is not None
    assert (
        user_snapshot.account_type == AccountType.STANDARD
    ), f"Expected STANDARD, got {user_snapshot.account_type}"

    # Verify user is now in the Basic default group
    basic_emails = _get_default_group_member_emails(admin_user, "Basic")
    assert (
        test_email in basic_emails
    ), f"Upgraded user '{test_email}' not found in Basic default group"


def test_password_signup_upgrade_propagates_permissions(
    reset: None,  # noqa: ARG001
) -> None:
    """When an EXT_PERM_USER or SLACK_USER signs up via password, they should
    gain the 'basic' permission through the Basic default group assignment."""
    admin_user: DATestUser = UserManager.create(email="admin@example.com")

    # --- EXT_PERM_USER path ---
    ext_email = "ext_perms_check@example.com"
    ext_user = UserManager.create(email=ext_email)

    initial_perms = UserManager.get_permissions(ext_user)
    assert "basic" in initial_perms

    ext_user = UserManager.set_role(
        user_to_set=ext_user,
        target_role=UserRole.EXT_PERM_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )

    basic_emails = _get_default_group_member_emails(admin_user, "Basic")
    assert ext_email not in basic_emails

    upgraded = UserManager.create(email=ext_email)
    assert upgraded.role == UserRole.BASIC

    perms = UserManager.get_permissions(upgraded)
    assert (
        "basic" in perms
    ), f"Upgraded EXT_PERM_USER should have 'basic' permission, got: {perms}"

    # --- SLACK_USER path ---
    slack_email = "slack_perms_check@example.com"
    slack_user = UserManager.create(email=slack_email)

    slack_user = UserManager.set_role(
        user_to_set=slack_user,
        target_role=UserRole.SLACK_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )

    basic_emails = _get_default_group_member_emails(admin_user, "Basic")
    assert slack_email not in basic_emails

    upgraded = UserManager.create(email=slack_email)
    assert upgraded.role == UserRole.BASIC

    perms = UserManager.get_permissions(upgraded)
    assert (
        "basic" in perms
    ), f"Upgraded SLACK_USER should have 'basic' permission, got: {perms}"


================================================
FILE: backend/tests/integration/tests/users/test_reactivation_groups.py
================================================
"""Integration tests for default group reconciliation on user reactivation.

Verifies that:
- A deactivated user retains default group membership after reactivation
- Reactivation via the admin API reconciles missing group membership
"""

from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.managers.user_group import UserGroupManager
from tests.integration.common_utils.test_models import DATestUser


def _get_default_group_member_emails(
    admin_user: DATestUser,
    group_name: str,
) -> set[str]:
    """Get the set of emails of all members in a named default group."""
    all_groups = UserGroupManager.get_all(admin_user, include_default=True)
    matched = [g for g in all_groups if g.is_default and g.name == group_name]
    assert matched, f"Default group '{group_name}' not found"
    return {u.email for u in matched[0].users}


def test_reactivated_user_retains_default_group(
    reset: None,  # noqa: ARG001
) -> None:
    """Deactivating and reactivating a user should preserve their
    default group membership."""
    admin_user: DATestUser = UserManager.create(name="admin_user")
    basic_user: DATestUser = UserManager.create(name="basic_user")

    # Verify user is in Basic group initially
    basic_emails = _get_default_group_member_emails(admin_user, "Basic")
    assert basic_user.email in basic_emails

    # Deactivate the user
    UserManager.set_status(
        user_to_set=basic_user,
        target_status=False,
        user_performing_action=admin_user,
    )

    # Reactivate the user
    UserManager.set_status(
        user_to_set=basic_user,
        target_status=True,
        user_performing_action=admin_user,
    )

    # Verify user is still in Basic group after reactivation
    basic_emails = _get_default_group_member_emails(admin_user, "Basic")
    assert (
        basic_user.email in basic_emails
    ), "Reactivated user should still be in Basic default group"


================================================
FILE: backend/tests/integration/tests/users/test_seat_limit.py
================================================
"""Integration tests for seat limit enforcement on user creation paths.

Verifies that when a license with a seat limit is active, new user
creation (registration, invite, reactivation) is blocked with HTTP 402.
"""

from datetime import datetime
from datetime import timedelta

import redis
import requests

from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicenseSource
from ee.onyx.server.license.models import PlanType
from onyx.configs.app_configs import REDIS_DB_NUMBER
from onyx.configs.app_configs import REDIS_HOST
from onyx.configs.app_configs import REDIS_PORT
from onyx.server.settings.models import ApplicationStatus
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.managers.user import UserManager

# TenantRedis prefixes every key with "{tenant_id}:".
# Single-tenant deployments use "public" as the tenant id.
_LICENSE_REDIS_KEY = "public:license:metadata"


def _seed_license(r: redis.Redis, seats: int) -> None:
    """Write a LicenseMetadata entry into Redis with the given seat cap."""
    now = datetime.utcnow()
    metadata = LicenseMetadata(
        tenant_id="public",
        organization_name="Test Org",
        seats=seats,
        used_seats=0,  # check_seat_availability recalculates from DB
        plan_type=PlanType.ANNUAL,
        issued_at=now,
        expires_at=now + timedelta(days=365),
        status=ApplicationStatus.ACTIVE,
        source=LicenseSource.MANUAL_UPLOAD,
    )
    r.set(_LICENSE_REDIS_KEY, metadata.model_dump_json(), ex=300)


def _clear_license(r: redis.Redis) -> None:
    r.delete(_LICENSE_REDIS_KEY)


def _redis() -> redis.Redis:
    return redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB_NUMBER)


# ------------------------------------------------------------------
# Registration
# ------------------------------------------------------------------


def test_registration_blocked_when_seats_full(
    reset: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """POST /auth/register returns 402 when the seat limit is reached."""
    r = _redis()

    # First user is admin — occupies 1 seat
    UserManager.create(name="admin_user")

    # License allows exactly 1 seat → already full
    _seed_license(r, seats=1)

    try:
        response = requests.post(
            url=f"{API_SERVER_URL}/auth/register",
            json={
                "email": "blocked@example.com",
                "username": "blocked@example.com",
                "password": "TestPassword123!",
            },
            headers=GENERAL_HEADERS,
        )
        assert response.status_code == 402
    finally:
        _clear_license(r)


# ------------------------------------------------------------------
# Invitation
# ------------------------------------------------------------------


def test_invite_blocked_when_seats_full(reset: None) -> None:  # noqa: ARG001
    """PUT /manage/admin/users returns 402 when the seat limit is reached."""
    r = _redis()

    admin_user = UserManager.create(name="admin_user")

    _seed_license(r, seats=1)

    try:
        response = requests.put(
            url=f"{API_SERVER_URL}/manage/admin/users",
            json={"emails": ["newuser@example.com"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 402
    finally:
        _clear_license(r)


# ------------------------------------------------------------------
# Reactivation
# ------------------------------------------------------------------


def test_reactivation_blocked_when_seats_full(
    reset: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """PATCH /manage/admin/activate-user returns 402 when seats are full."""
    r = _redis()

    admin_user = UserManager.create(name="admin_user")
    basic_user = UserManager.create(name="basic_user")

    # Deactivate the basic user (frees a seat in the DB count)
    UserManager.set_status(
        basic_user, target_status=False, user_performing_action=admin_user
    )

    # Set license to 1 seat — only admin counts now
    _seed_license(r, seats=1)

    try:
        response = requests.patch(
            url=f"{API_SERVER_URL}/manage/admin/activate-user",
            json={"user_email": basic_user.email},
            headers=admin_user.headers,
        )
        assert response.status_code == 402
    finally:
        _clear_license(r)


# ------------------------------------------------------------------
# No license → no enforcement
# ------------------------------------------------------------------


def test_registration_allowed_without_license(
    reset: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """Without a license in Redis, registration is unrestricted."""
    r = _redis()

    # Make sure there is no cached license
    _clear_license(r)

    UserManager.create(name="admin_user")

    # Second user should register without issue
    second_user = UserManager.create(name="second_user")
    assert second_user is not None


================================================
FILE: backend/tests/integration/tests/users/test_slack_user_deactivation.py
================================================
"""Integration tests for Slack user deactivation and reactivation via admin endpoints.

Verifies that:
- Slack users can be deactivated by admins
- Deactivated Slack users can be reactivated by admins
- Reactivation is blocked when the seat limit is reached
"""

from datetime import datetime
from datetime import timedelta

import redis
import requests

from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicenseSource
from ee.onyx.server.license.models import PlanType
from onyx.auth.schemas import UserRole
from onyx.configs.app_configs import REDIS_DB_NUMBER
from onyx.configs.app_configs import REDIS_HOST
from onyx.configs.app_configs import REDIS_PORT
from onyx.server.settings.models import ApplicationStatus
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser

_LICENSE_REDIS_KEY = "public:license:metadata"


def _seed_license(r: redis.Redis, seats: int) -> None:
    now = datetime.utcnow()
    metadata = LicenseMetadata(
        tenant_id="public",
        organization_name="Test Org",
        seats=seats,
        used_seats=0,
        plan_type=PlanType.ANNUAL,
        issued_at=now,
        expires_at=now + timedelta(days=365),
        status=ApplicationStatus.ACTIVE,
        source=LicenseSource.MANUAL_UPLOAD,
    )
    r.set(_LICENSE_REDIS_KEY, metadata.model_dump_json(), ex=300)


def _clear_license(r: redis.Redis) -> None:
    r.delete(_LICENSE_REDIS_KEY)


def _redis() -> redis.Redis:
    return redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB_NUMBER)


def _get_user_is_active(email: str, admin_user: DATestUser) -> bool:
    """Look up a user's is_active flag via the admin users list endpoint."""
    result = UserManager.get_user_page(
        user_performing_action=admin_user,
        search_query=email,
    )
    matching = [u for u in result.items if u.email == email]
    assert len(matching) == 1, f"Expected exactly 1 user with email {email}"
    return matching[0].is_active


def test_slack_user_deactivate_and_reactivate(
    reset: None,  # noqa: ARG001
) -> None:  # noqa: ARG001
    """Admin can deactivate and then reactivate a Slack user."""
    admin_user = UserManager.create(name="admin_user")

    slack_user = UserManager.create(name="slack_test_user")
    slack_user = UserManager.set_role(
        user_to_set=slack_user,
        target_role=UserRole.SLACK_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )

    # Deactivate the Slack user
    UserManager.set_status(
        slack_user, target_status=False, user_performing_action=admin_user
    )
    assert _get_user_is_active(slack_user.email, admin_user) is False

    # Reactivate the Slack user
    UserManager.set_status(
        slack_user, target_status=True, user_performing_action=admin_user
    )
    assert _get_user_is_active(slack_user.email, admin_user) is True


def test_slack_user_reactivation_blocked_by_seat_limit(
    reset: None,  # noqa: ARG001
) -> None:
    """Reactivating a deactivated Slack user returns 402 when seats are full."""
    r = _redis()

    admin_user = UserManager.create(name="admin_user")

    slack_user = UserManager.create(name="slack_test_user")
    slack_user = UserManager.set_role(
        user_to_set=slack_user,
        target_role=UserRole.SLACK_USER,
        user_performing_action=admin_user,
        explicit_override=True,
    )

    UserManager.set_status(
        slack_user, target_status=False, user_performing_action=admin_user
    )

    # License allows 1 seat — only admin counts
    _seed_license(r, seats=1)

    try:
        response = requests.patch(
            url=f"{API_SERVER_URL}/manage/admin/activate-user",
            json={"user_email": slack_user.email},
            headers=admin_user.headers,
        )
        assert response.status_code == 402
    finally:
        _clear_license(r)


================================================
FILE: backend/tests/integration/tests/users/test_user_pagination.py
================================================
from onyx.auth.schemas import UserRole
from onyx.server.models import FullUserSnapshot
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser


# Gets a page of users from the db that match the given parameters and then
# compares that returned page to the list of users passed into the function
# to verify that the pagination and filtering works as expected.
def _verify_user_pagination(
    users: list[DATestUser],
    user_performing_action: DATestUser,
    page_size: int = 5,
    search_query: str | None = None,
    role_filter: list[UserRole] | None = None,
    is_active_filter: bool | None = None,
) -> None:
    retrieved_users: list[FullUserSnapshot] = []

    for i in range(0, len(users), page_size):
        paginated_result = UserManager.get_user_page(
            page_num=i // page_size,
            page_size=page_size,
            search_query=search_query,
            role_filter=role_filter,
            is_active_filter=is_active_filter,
            user_performing_action=user_performing_action,
        )

        # Verify that the total items is equal to the length of the users list
        assert paginated_result.total_items == len(users)
        # Verify that the number of items in the page is equal to the page size
        assert len(paginated_result.items) == page_size
        # Add the retrieved users to the list of retrieved users
        retrieved_users.extend(paginated_result.items)

    # Create a set of all the expected emails
    all_expected_emails = set([user.email for user in users])
    # Create a set of all the retrieved emails
    all_retrieved_emails = set([user.email for user in retrieved_users])

    # Verify that the set of retrieved emails is equal to the set of expected emails
    assert all_expected_emails == all_retrieved_emails


def test_user_pagination(reset: None) -> None:  # noqa: ARG001
    # Create an admin user to perform actions
    user_performing_action: DATestUser = UserManager.create(
        name="admin_performing_action"
    )

    # Create 9 admin users
    admin_users: list[DATestUser] = UserManager.create_test_users(
        user_name_prefix="admin",
        count=9,
        role=UserRole.ADMIN,
        user_performing_action=user_performing_action,
    )

    # Add the user_performing_action to the list of admins
    admin_users.append(user_performing_action)

    # Create 20 basic users
    basic_users: list[DATestUser] = UserManager.create_test_users(
        user_name_prefix="basic",
        count=10,
        role=UserRole.BASIC,
        user_performing_action=user_performing_action,
    )

    # Create 10 global curators
    global_curators: list[DATestUser] = UserManager.create_test_users(
        user_name_prefix="global_curator",
        count=10,
        role=UserRole.GLOBAL_CURATOR,
        user_performing_action=user_performing_action,
    )

    # Create 10 inactive admins
    inactive_admins: list[DATestUser] = UserManager.create_test_users(
        user_name_prefix="inactive_admin",
        count=10,
        role=UserRole.ADMIN,
        is_active=False,
        user_performing_action=user_performing_action,
    )

    # Create 10 global curator users with an email containing "search"
    searchable_curators: list[DATestUser] = UserManager.create_test_users(
        user_name_prefix="search_curator",
        count=10,
        role=UserRole.GLOBAL_CURATOR,
        user_performing_action=user_performing_action,
    )

    # Combine all the users lists into the all_users list
    all_users: list[DATestUser] = (
        admin_users
        + basic_users
        + global_curators
        + inactive_admins
        + searchable_curators
    )
    for user in all_users:
        # Verify that the user's role in the db matches
        # the role in the user object
        assert UserManager.is_role(user, user.role)
        # Verify that the user's status in the db matches
        # the status in the user object
        assert UserManager.is_status(user, user.is_active)

    # Verify pagination
    _verify_user_pagination(
        users=all_users,
        user_performing_action=user_performing_action,
    )

    # Verify filtering by role
    _verify_user_pagination(
        users=admin_users + inactive_admins,
        role_filter=[UserRole.ADMIN],
        user_performing_action=user_performing_action,
    )
    # Verify filtering by status
    _verify_user_pagination(
        users=inactive_admins,
        is_active_filter=False,
        user_performing_action=user_performing_action,
    )
    # Verify filtering by search query
    _verify_user_pagination(
        users=searchable_curators,
        search_query="search",
        user_performing_action=user_performing_action,
    )

    # Verify filtering by role and status
    _verify_user_pagination(
        users=inactive_admins,
        role_filter=[UserRole.ADMIN],
        is_active_filter=False,
        user_performing_action=user_performing_action,
    )

    # Verify filtering by role and search query
    _verify_user_pagination(
        users=searchable_curators,
        role_filter=[UserRole.GLOBAL_CURATOR],
        search_query="search",
        user_performing_action=user_performing_action,
    )

    # Verify filtering by role and status and search query
    _verify_user_pagination(
        users=inactive_admins,
        role_filter=[UserRole.ADMIN],
        is_active_filter=False,
        search_query="inactive_ad",
        user_performing_action=user_performing_action,
    )

    # Verify filtering by multiple roles (admin and global curator)
    _verify_user_pagination(
        users=admin_users + global_curators + inactive_admins + searchable_curators,
        role_filter=[UserRole.ADMIN, UserRole.GLOBAL_CURATOR],
        user_performing_action=user_performing_action,
    )


================================================
FILE: backend/tests/integration/tests/web_search/test_web_search_api.py
================================================
import os

import pytest
import requests

from shared_configs.enums import WebContentProviderType
from shared_configs.enums import WebSearchProviderType
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.test_models import DATestUser


class TestOnyxWebCrawler:
    """
    Integration tests for the Onyx web crawler functionality.

    These tests verify that the built-in crawler can fetch and parse
    content from public websites correctly.
    """

    @pytest.mark.skip(reason="Temporarily disabled")
    def test_fetches_public_url_successfully(self, admin_user: DATestUser) -> None:
        """Test that the crawler can fetch content from a public URL."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["https://example.com/"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200, response.text
        data = response.json()

        assert data["provider_type"] == WebContentProviderType.ONYX_WEB_CRAWLER.value
        assert len(data["results"]) == 1

        result = data["results"][0]
        assert "content" in result
        content = result["content"]

        # example.com is a static page maintained by IANA with known content
        # Verify exact expected text from the page
        assert "Example Domain" in content
        assert "This domain is for use in" in content
        assert "documentation" in content or "illustrative" in content

    @pytest.mark.skip(reason="Temporarily disabled")
    def test_fetches_multiple_urls(self, admin_user: DATestUser) -> None:
        """Test that the crawler can fetch multiple URLs in one request."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={
                "urls": [
                    "https://example.com/",
                    "https://www.iana.org/domains/reserved",
                ]
            },
            headers=admin_user.headers,
        )
        assert response.status_code == 200, response.text
        data = response.json()

        assert data["provider_type"] == WebContentProviderType.ONYX_WEB_CRAWLER.value
        assert len(data["results"]) == 2

        for result in data["results"]:
            assert "content" in result

    def test_handles_nonexistent_domain(self, admin_user: DATestUser) -> None:
        """Test that the crawler handles non-existent domains gracefully."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["https://this-domain-definitely-does-not-exist-12345.com/"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200, response.text
        data = response.json()

        assert data["provider_type"] == WebContentProviderType.ONYX_WEB_CRAWLER.value

        # The API filters out docs with no title/content, so unreachable domains return no results
        assert data["results"] == []

    def test_handles_404_page(self, admin_user: DATestUser) -> None:
        """Test that the crawler handles 404 responses gracefully."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["https://example.com/this-page-does-not-exist-12345"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200, response.text
        data = response.json()

        assert data["provider_type"] == WebContentProviderType.ONYX_WEB_CRAWLER.value

        # Non-200 responses are treated as non-content and filtered out
        assert data["results"] == []

    def test_https_url_with_path(self, admin_user: DATestUser) -> None:
        """Test that the crawler handles HTTPS URLs with paths correctly."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["https://www.iana.org/about"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200, response.text
        data = response.json()

        assert len(data["results"]) == 1
        result = data["results"][0]
        assert "content" in result


class TestSsrfProtection:
    """
    Integration tests for SSRF protection on the /open-urls endpoint.

    These tests verify that the endpoint correctly blocks requests to:
    - Internal/private IP addresses
    - Cloud metadata endpoints
    - Blocked hostnames (Kubernetes, cloud metadata, etc.)
    """

    def test_blocks_localhost_ip(self, admin_user: DATestUser) -> None:
        """Test that requests to localhost (127.0.0.1) are blocked."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["http://127.0.0.1/"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200
        data = response.json()
        # URL should be processed but return empty content (blocked by SSRF protection)
        assert len(data["results"]) == 0 or data["results"][0]["content"] == ""

    def test_blocks_private_ip_10_network(self, admin_user: DATestUser) -> None:
        """Test that requests to 10.x.x.x private network are blocked."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["http://10.0.0.1/"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200
        data = response.json()
        assert len(data["results"]) == 0 or data["results"][0]["content"] == ""

    def test_blocks_private_ip_192_168_network(self, admin_user: DATestUser) -> None:
        """Test that requests to 192.168.x.x private network are blocked."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["http://192.168.1.1/"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200
        data = response.json()
        assert len(data["results"]) == 0 or data["results"][0]["content"] == ""

    def test_blocks_private_ip_172_network(self, admin_user: DATestUser) -> None:
        """Test that requests to 172.16-31.x.x private network are blocked."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["http://172.16.0.1/"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200
        data = response.json()
        assert len(data["results"]) == 0 or data["results"][0]["content"] == ""

    def test_blocks_aws_metadata_endpoint(self, admin_user: DATestUser) -> None:
        """Test that requests to AWS metadata endpoint (169.254.169.254) are blocked."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["http://169.254.169.254/latest/meta-data/"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200
        data = response.json()
        assert len(data["results"]) == 0 or data["results"][0]["content"] == ""

    def test_blocks_kubernetes_metadata_hostname(self, admin_user: DATestUser) -> None:
        """Test that requests to Kubernetes internal hostname are blocked."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["http://kubernetes.default.svc.cluster.local/"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200
        data = response.json()
        assert len(data["results"]) == 0 or data["results"][0]["content"] == ""

    def test_blocks_google_metadata_hostname(self, admin_user: DATestUser) -> None:
        """Test that requests to Google Cloud metadata hostname are blocked."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["http://metadata.google.internal/"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200
        data = response.json()
        assert len(data["results"]) == 0 or data["results"][0]["content"] == ""

    def test_blocks_localhost_with_port(self, admin_user: DATestUser) -> None:
        """Test that requests to localhost with custom port are blocked."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={"urls": ["http://127.0.0.1:8080/metrics"]},
            headers=admin_user.headers,
        )
        assert response.status_code == 200
        data = response.json()
        assert len(data["results"]) == 0 or data["results"][0]["content"] == ""

    def test_multiple_urls_filters_internal(self, admin_user: DATestUser) -> None:
        """Test that internal URLs are filtered while external URLs are processed."""
        response = requests.post(
            f"{API_SERVER_URL}/web-search/open-urls",
            json={
                "urls": [
                    "http://127.0.0.1/",  # Should be blocked
                    "http://192.168.1.1/",  # Should be blocked
                    "https://example.com/",  # Should be allowed (if reachable)
                ]
            },
            headers=admin_user.headers,
        )
        assert response.status_code == 200
        data = response.json()
        # Internal URLs should return empty content
        # The exact behavior depends on whether example.com is reachable
        # but internal URLs should definitely not return sensitive data
        for result in data["results"]:
            # Ensure no result contains internal network data
            content = result.get("content", "")
            # These patterns would indicate SSRF vulnerability
            assert "metrics" not in content.lower() or "example" in content.lower()
            assert "token" not in content.lower() or "example" in content.lower()


# Mark the Exa-dependent tests to skip if no API key
pytestmark_exa = pytest.mark.skipif(
    not os.environ.get("EXA_API_KEY"),
    reason="EXA_API_KEY not set; live web search tests require real credentials",
)


def _activate_exa_provider(admin_user: DATestUser) -> int:
    response = requests.post(
        f"{API_SERVER_URL}/admin/web-search/search-providers",
        json={
            "id": None,
            "name": "integration-exa-provider",
            "provider_type": WebSearchProviderType.EXA.value,
            "config": {},
            "api_key": os.environ["EXA_API_KEY"],
            "api_key_changed": True,
            "activate": True,
        },
        headers=admin_user.headers,
    )
    assert response.status_code == 200, response.text

    provider = response.json()
    assert provider["provider_type"] == WebSearchProviderType.EXA.value
    assert provider["is_active"] is True
    assert provider["has_api_key"] is True

    return provider["id"]


@pytestmark_exa
@pytest.mark.skip(reason="Temporarily disabled")
def test_web_search_endpoints_with_exa(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
) -> None:
    provider_id = _activate_exa_provider(admin_user)
    assert isinstance(provider_id, int)

    search_request = {"queries": ["wikipedia python programming"], "max_results": 3}

    lite_response = requests.post(
        f"{API_SERVER_URL}/web-search/search-lite",
        json=search_request,
        headers=admin_user.headers,
    )
    assert lite_response.status_code == 200, lite_response.text
    lite_data = lite_response.json()

    assert lite_data["provider_type"] == WebSearchProviderType.EXA.value
    assert lite_data["results"], "Expected web search results from Exa"

    urls = [result["url"] for result in lite_data["results"] if result.get("url")][:2]
    assert urls, "Web search should return at least one URL"

    open_response = requests.post(
        f"{API_SERVER_URL}/web-search/open-urls",
        json={"urls": urls},
        headers=admin_user.headers,
    )
    assert open_response.status_code == 200, open_response.text
    open_data = open_response.json()

    assert open_data["provider_type"] == WebContentProviderType.ONYX_WEB_CRAWLER.value
    assert len(open_data["results"]) == len(urls)
    assert all("content" in result for result in open_data["results"])

    combined_response = requests.post(
        f"{API_SERVER_URL}/web-search/search",
        json=search_request,
        headers=admin_user.headers,
    )
    assert combined_response.status_code == 200, combined_response.text
    combined_data = combined_response.json()

    assert combined_data["search_provider_type"] == WebSearchProviderType.EXA.value
    assert (
        combined_data["content_provider_type"]
        == WebContentProviderType.ONYX_WEB_CRAWLER.value
    )
    assert combined_data["search_results"]

    unique_urls = list(
        dict.fromkeys(
            result["url"]
            for result in combined_data["search_results"]
            if result.get("url")
        )
    )
    assert len(combined_data["full_content_results"]) == len(unique_urls)


================================================
FILE: backend/tests/load_env_vars.py
================================================
import os


def load_env_vars(env_file: str = ".env") -> None:
    current_dir = os.path.dirname(os.path.abspath(__file__))
    env_path = os.path.join(current_dir, env_file)
    try:
        with open(env_path, "r") as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith("#"):
                    key, value = line.split("=", 1)
                    os.environ[key] = value.strip()
        print("Successfully loaded environment variables")
    except FileNotFoundError:
        print(f"File {env_file} not found")


================================================
FILE: backend/tests/regression/answer_quality/README.md
================================================
# Search Quality Test Script

This Python script automates the process of running search quality tests for a backend system.

## Features

- Loads configuration from a YAML file
- Sets up Docker environment
- Manages environment variables
- Switches to specified Git branch
- Uploads test documents
- Runs search quality tests
- Cleans up Docker containers (optional)

## Usage

1. Ensure you have the required dependencies installed.
2. Configure the `search_test_config.yaml` file based on the `search_test_config.yaml.template` file.
3. Configure the `.env_eval` file in `deployment/docker_compose` with the correct environment variables.
4. Set up the PYTHONPATH permanently:
   Add the following line to your shell configuration file (e.g., `~/.bashrc`, `~/.zshrc`, or `~/.bash_profile`):
   ```
   export PYTHONPATH=$PYTHONPATH:/path/to/onyx/backend
   ```
   Replace `/path/to/onyx` with the actual path to your Onyx repository.
   After adding this line, restart your terminal or run `source ~/.bashrc` (or the appropriate config file) to apply the changes.
5. Navigate to Onyx repo:

```
cd path/to/onyx
```

6. Navigate to the answer_quality folder:

```
cd backend/tests/regression/answer_quality
```

7. To launch the evaluation environment, run the launch_eval_env.py script (this step can be skipped if you are running the env outside of docker, just leave "environment_name" blank):

```
python launch_eval_env.py
```

8. Run the file_uploader.py script to upload the zip files located at the path "zipped_documents_file"

```
python file_uploader.py
```

9. Run the run_qa.py script to ask questions from the jsonl located at the path "questions_file". This will hit the "query/answer-with-quote" API endpoint.

```
python run_qa.py
```

Note: All data will be saved even after the containers are shut down. There are instructions below to re-launching docker containers using this data.

If you decide to run multiple UIs at the same time, the ports will increment upwards from 3000 (E.g. http://localhost:3001).

To see which port the desired instance is on, look at the ports on the nginx container by running `docker ps` or using docker desktop.

Docker daemon must be running for this to work.

## Configuration

Edit `search_test_config.yaml` to set:

- output_folder
  - This is the folder where the folders for each test will go
  - These folders will contain the postgres/vespa data as well as the results for each test
- zipped_documents_file
  - The path to the zip file containing the files you'd like to test against
- questions_file
  - The path to the yaml containing the questions you'd like to test with
- commit_sha
  - Set this to the SHA of the commit you want to run the test against
  - You must clear all local changes if you want to use this option
  - Set this to null if you want it to just use the code as is
- clean_up_docker_containers
  - Set this to true to automatically delete all docker containers, networks and volumes after the test
- launch_web_ui
  - Set this to true if you want to use the UI during/after the testing process
- only_state
  - Whether to only run Vespa and Postgres
- only_retrieve_docs
  - Set true to only retrieve documents, not LLM response
  - This is to save on API costs
- use_cloud_gpu
  - Set to true or false depending on if you want to use the remote gpu
  - Only need to set this if use_cloud_gpu is true
- model_server_ip
  - This is the ip of the remote model server
  - Only need to set this if use_cloud_gpu is true
- model_server_port
  - This is the port of the remote model server
  - Only need to set this if use_cloud_gpu is true
- environment_name
  - Use this if you would like to relaunch a previous test instance
  - Input the env_name of the test you'd like to re-launch
  - Leave empty to launch referencing local default network locations
- limit
  - Max number of questions you'd like to ask against the dataset
  - Set to null for no limit
- llm
  - Fill this out according to the normal LLM seeding

## Relaunching From Existing Data

To launch an existing set of containers that has already completed indexing, set the environment_name variable. This will launch the docker containers mounted on the volumes of the indicated env_name and will not automatically index any documents or run any QA.

Once these containers are launched you can run file_uploader.py or run_qa.py (assuming you have run the steps in the Usage section above).

- file_uploader.py will upload and index additional zipped files located at the zipped_documents_file path.
- run_qa.py will ask questions located at the questions_file path against the indexed documents.


================================================
FILE: backend/tests/regression/answer_quality/__init__.py
================================================


================================================
FILE: backend/tests/regression/answer_quality/api_utils.py
================================================
import requests
from retry import retry

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.enums import IndexingStatus
from onyx.server.documents.models import ConnectorBase
from tests.regression.answer_quality.cli_utils import get_api_server_host_port

GENERAL_HEADERS = {"Content-Type": "application/json"}


def _api_url_builder(env_name: str, api_path: str) -> str:
    if env_name:
        return f"http://localhost:{get_api_server_host_port(env_name)}" + api_path
    else:
        return "http://localhost:8080" + api_path


@retry(tries=10, delay=10)
def check_indexing_status(env_name: str) -> tuple[int, bool]:
    url = _api_url_builder(env_name, "/manage/admin/connector/indexing-status/")
    try:
        indexing_status_dict = requests.post(
            url, headers=GENERAL_HEADERS, json={"get_all_connectors": True}
        ).json()
    except Exception as e:
        print("Failed to check indexing status, API server is likely starting up:")
        print(f"\t {str(e)}")
        print("trying again")
        raise e

    ongoing_index_attempts = False
    doc_count = 0
    for connectors_by_source in indexing_status_dict:
        connectors = connectors_by_source["indexing_statuses"]
        for connector in connectors:
            status = connector["last_status"]
            if (
                status == IndexingStatus.IN_PROGRESS
                or status == IndexingStatus.NOT_STARTED
            ):
                ongoing_index_attempts = True
            elif status == IndexingStatus.SUCCESS:
                doc_count += 16
            doc_count += connector["docs_indexed"]
            doc_count -= 16

    # all the +16 and -16 are to account for the fact that the indexing status
    # is only updated every 16 documents and will tells us how many are
    # chunked, not indexed. probably need to fix this. in the future!
    if doc_count:
        doc_count += 16
    return doc_count, ongoing_index_attempts


def run_cc_once(env_name: str, connector_id: int, credential_id: int) -> None:
    url = _api_url_builder(env_name, "/manage/admin/connector/run-once/")
    body = {
        "connector_id": connector_id,
        "credential_ids": [credential_id],
        "from_beginning": True,
    }
    print("body:", body)
    response = requests.post(url, headers=GENERAL_HEADERS, json=body)
    if response.status_code == 200:
        print("Connector created successfully:", response.json())
    else:
        print("Failed status_code:", response.status_code)
        print("Failed text:", response.text)


def create_cc_pair(env_name: str, connector_id: int, credential_id: int) -> None:
    url = _api_url_builder(
        env_name, f"/manage/connector/{connector_id}/credential/{credential_id}"
    )

    body = {"name": "zip_folder_contents", "is_public": True, "groups": []}
    print("body:", body)
    response = requests.put(url, headers=GENERAL_HEADERS, json=body)
    if response.status_code == 200:
        print("Connector created successfully:", response.json())
    else:
        print("Failed status_code:", response.status_code)
        print("Failed text:", response.text)


def _get_existing_connector_names(env_name: str) -> list[str]:
    url = _api_url_builder(env_name, "/manage/connector")

    body = {
        "credential_json": {},
        "admin_public": True,
    }
    response = requests.get(url, headers=GENERAL_HEADERS, json=body)
    if response.status_code == 200:
        connectors = response.json()
        return [connector["name"] for connector in connectors]
    else:
        raise RuntimeError(response.__dict__)


def create_connector(env_name: str, file_paths: list[str]) -> int:
    url = _api_url_builder(env_name, "/manage/admin/connector")
    connector_name = base_connector_name = "search_eval_connector"
    existing_connector_names = _get_existing_connector_names(env_name)

    count = 1
    while connector_name in existing_connector_names:
        connector_name = base_connector_name + "_" + str(count)
        count += 1

    connector = ConnectorBase(
        name=connector_name,
        source=DocumentSource.FILE,
        input_type=InputType.LOAD_STATE,
        connector_specific_config={
            "file_locations": file_paths,
            "file_names": [],  # For regression tests, no need for file_names
            "zip_metadata_file_id": None,
        },
        refresh_freq=None,
        prune_freq=None,
        indexing_start=None,
    )

    body = connector.model_dump()
    response = requests.post(url, headers=GENERAL_HEADERS, json=body)
    if response.status_code == 200:
        return response.json()["id"]
    else:
        raise RuntimeError(response.__dict__)


def create_credential(env_name: str) -> int:
    url = _api_url_builder(env_name, "/manage/credential")
    body = {
        "credential_json": {},
        "admin_public": True,
        "source": DocumentSource.FILE,
    }
    response = requests.post(url, headers=GENERAL_HEADERS, json=body)
    if response.status_code == 200:
        print("credential created successfully:", response.json())
        return response.json()["id"]
    else:
        raise RuntimeError(response.__dict__)


@retry(tries=10, delay=2, backoff=2)
def upload_file(env_name: str, zip_file_path: str) -> list[str]:
    files = [
        ("files", open(zip_file_path, "rb")),
    ]

    api_path = _api_url_builder(env_name, "/manage/admin/connector/file/upload")
    try:
        response = requests.post(api_path, files=files)
        response.raise_for_status()  # Raises an HTTPError for bad responses
        print("file uploaded successfully:", response.json())
        return response.json()["file_paths"]
    except Exception as e:
        print("File upload failed, waiting for API server to come up and trying again")
        raise e


================================================
FILE: backend/tests/regression/answer_quality/cli_utils.py
================================================
import json
import os
import socket
import subprocess
import sys
import time
from datetime import datetime
from threading import Thread
from typing import IO

import yaml
from retry import retry


def _run_command(command: str, stream_output: bool = False) -> tuple[str, str]:
    process = subprocess.Popen(
        command,
        shell=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        bufsize=1,
    )

    stdout_lines: list[str] = []
    stderr_lines: list[str] = []

    def process_stream(stream: IO[str], lines: list[str]) -> None:
        for line in stream:
            lines.append(line)
            if stream_output:
                print(
                    line,
                    end="",
                    file=sys.stdout if stream == process.stdout else sys.stderr,
                )

    stdout_thread = Thread(target=process_stream, args=(process.stdout, stdout_lines))
    stderr_thread = Thread(target=process_stream, args=(process.stderr, stderr_lines))

    stdout_thread.start()
    stderr_thread.start()

    stdout_thread.join()
    stderr_thread.join()

    process.wait()

    if process.returncode != 0:
        raise RuntimeError(f"Command failed with error: {''.join(stderr_lines)}")

    return "".join(stdout_lines), "".join(stderr_lines)


def get_current_commit_sha() -> str:
    print("Getting current commit SHA...")
    stdout, _ = _run_command("git rev-parse HEAD")
    sha = stdout.strip()
    print(f"Current commit SHA: {sha}")
    return sha


def switch_to_commit(commit_sha: str) -> None:
    print(f"Switching to commit: {commit_sha}...")
    _run_command(f"git checkout {commit_sha}")
    print(f"Successfully switched to commit: {commit_sha}")
    print("Repository updated successfully.")


def get_docker_container_env_vars(env_name: str) -> dict:
    """
    Retrieves environment variables from "background" and "api_server" Docker containers.
    """
    print(f"Getting environment variables for containers with env_name: {env_name}")

    combined_env_vars = {}
    for container_type in ["background", "api_server"]:
        container_name = _run_command(
            f"docker ps -a --format '{{{{.Names}}}}' | awk '/{container_type}/ && /{env_name}/'"
        )[0].strip()
        if not container_name:
            raise RuntimeError(
                f"No {container_type} container found with env_name: {env_name}"
            )

        env_vars_json = _run_command(
            f"docker inspect --format='{{{{json .Config.Env}}}}' {container_name}"
        )[0]
        env_vars_list = json.loads(env_vars_json.strip())

        for env_var in env_vars_list:
            key, value = env_var.split("=", 1)
            combined_env_vars[key] = value

    return combined_env_vars


def manage_data_directories(env_name: str, base_path: str, use_cloud_gpu: bool) -> None:
    # Use the user's home directory as the base path
    target_path = os.path.join(os.path.expanduser(base_path), env_name)
    directories = {
        "DANSWER_POSTGRES_DATA_DIR": os.path.join(target_path, "postgres/"),
        "DANSWER_VESPA_DATA_DIR": os.path.join(target_path, "vespa/"),
    }
    if not use_cloud_gpu:
        directories["DANSWER_INDEX_MODEL_CACHE_DIR"] = os.path.join(
            target_path, "index_model_cache/"
        )
        directories["DANSWER_INFERENCE_MODEL_CACHE_DIR"] = os.path.join(
            target_path, "inference_model_cache/"
        )

    # Create directories if they don't exist
    for env_var, directory in directories.items():
        os.makedirs(directory, exist_ok=True)
        os.environ[env_var] = directory
        print(f"Set {env_var} to: {directory}")
    results_output_path = os.path.join(target_path, "evaluations_output/")
    os.makedirs(results_output_path, exist_ok=True)


def set_env_variables(
    remote_server_ip: str,
    remote_server_port: str,
    use_cloud_gpu: bool,
    llm_config: dict,
) -> None:
    env_vars: dict = {}
    env_vars["ENV_SEED_CONFIGURATION"] = json.dumps({"llms": [llm_config]})
    env_vars["ENABLE_PAID_ENTERPRISE_EDITION_FEATURES"] = "true"
    if use_cloud_gpu:
        env_vars["MODEL_SERVER_HOST"] = remote_server_ip
        env_vars["MODEL_SERVER_PORT"] = remote_server_port
        env_vars["INDEXING_MODEL_SERVER_HOST"] = remote_server_ip

    for env_var_name, env_var in env_vars.items():
        os.environ[env_var_name] = env_var
        print(f"Set {env_var_name} to: {env_var}")


def _is_port_in_use(port: int) -> bool:
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        return s.connect_ex(("localhost", port)) == 0


def start_docker_compose(
    env_name: str, launch_web_ui: bool, use_cloud_gpu: bool, only_state: bool = False
) -> None:
    print("Starting Docker Compose...")
    os.chdir(os.path.dirname(__file__))
    os.chdir("../../../../deployment/docker_compose/")
    command = (
        f"docker compose -f docker-compose.search-testing.yml -p onyx-{env_name} up -d"
    )
    command += " --build"
    command += " --force-recreate"

    if only_state:
        command += " index relational_db"
    else:
        if use_cloud_gpu:
            command += " --scale indexing_model_server=0"
            command += " --scale inference_model_server=0"
        if launch_web_ui:
            web_ui_port = 3000
            while _is_port_in_use(web_ui_port):
                web_ui_port += 1
            print(f"UI will be launched at http://localhost:{web_ui_port}")
            os.environ["NGINX_PORT"] = str(web_ui_port)
        else:
            command += " --scale web_server=0"
            command += " --scale nginx=0"

    print("Docker Command:\n", command)

    _run_command(command, stream_output=True)
    print("Containers have been launched")


def cleanup_docker(env_name: str) -> None:
    print(
        f"Deleting Docker containers, volumes, and networks for project env_name: {env_name}"
    )

    stdout, _ = _run_command("docker ps -a --format '{{json .}}'")

    containers = [json.loads(line) for line in stdout.splitlines()]
    if not env_name:
        env_name = datetime.now().strftime("-%Y")
    project_name = f"onyx{env_name}"
    containers_to_delete = [
        c for c in containers if c["Names"].startswith(project_name)
    ]

    if not containers_to_delete:
        print(f"No containers found for project: {project_name}")
    else:
        container_ids = " ".join([c["ID"] for c in containers_to_delete])
        _run_command(f"docker rm -f {container_ids}")

        print(
            f"Successfully deleted {len(containers_to_delete)} containers for project: {project_name}"
        )

    stdout, _ = _run_command("docker volume ls --format '{{.Name}}'")

    volumes = stdout.splitlines()

    volumes_to_delete = [v for v in volumes if v.startswith(project_name)]

    if not volumes_to_delete:
        print(f"No volumes found for project: {project_name}")
        return

    # Delete filtered volumes
    volume_names = " ".join(volumes_to_delete)
    _run_command(f"docker volume rm {volume_names}")

    print(
        f"Successfully deleted {len(volumes_to_delete)} volumes for project: {project_name}"
    )
    stdout, _ = _run_command("docker network ls --format '{{.Name}}'")

    networks = stdout.splitlines()

    networks_to_delete = [n for n in networks if env_name in n]

    if not networks_to_delete:
        print(f"No networks found containing env_name: {env_name}")
    else:
        network_names = " ".join(networks_to_delete)
        _run_command(f"docker network rm {network_names}")

        print(
            f"Successfully deleted {len(networks_to_delete)} networks containing env_name: {env_name}"
        )


@retry(tries=5, delay=5, backoff=2)
def get_api_server_host_port(env_name: str) -> str:
    """
    This pulls all containers with the provided env_name
    It then grabs the JSON specific container with a name containing "api_server"
    It then grabs the port info from the JSON and strips out the relevent data
    """
    container_name = "api_server"

    stdout, _ = _run_command("docker ps -a --format '{{json .}}'")
    containers = [json.loads(line) for line in stdout.splitlines()]
    server_jsons = []

    for container in containers:
        if container_name in container["Names"] and env_name in container["Names"]:
            server_jsons.append(container)

    if not server_jsons:
        raise RuntimeError(
            f"No container found containing: {container_name} and {env_name}"
        )
    elif len(server_jsons) > 1:
        raise RuntimeError(
            f"Too many containers matching {container_name} found, please indicate a env_name"
        )
    server_json = server_jsons[0]

    # This is in case the api_server has multiple ports
    client_port = "8080"
    ports = server_json.get("Ports", "")
    port_infos = ports.split(",") if ports else []
    port_dict = {}
    for port_info in port_infos:
        port_arr = port_info.split(":")[-1].split("->") if port_info else []
        if len(port_arr) == 2:
            port_dict[port_arr[1]] = port_arr[0]

    # Find the host port where client_port is in the key
    matching_ports = [value for key, value in port_dict.items() if client_port in key]

    if len(matching_ports) > 1:
        raise RuntimeError(f"Too many ports matching {client_port} found")
    if not matching_ports:
        raise RuntimeError(
            f"No port found containing: {client_port} for container: {container_name} and env_name: {env_name}"
        )
    return matching_ports[0]


# Added function to restart Vespa container
def restart_vespa_container(env_name: str) -> None:
    print(f"Restarting Vespa container for env_name: {env_name}")

    # Find the Vespa container
    stdout, _ = _run_command(
        f"docker ps -a --format '{{{{.Names}}}}' | awk '/index-1/ && /{env_name}/'"
    )
    container_name = stdout.strip()

    if not container_name:
        raise RuntimeError(f"No Vespa container found with env_name: {env_name}")

    # Restart the container
    _run_command(f"docker restart {container_name}")

    print(f"Vespa container '{container_name}' has begun restarting")

    time.sleep(30)
    print(f"Vespa container '{container_name}' has been restarted")


if __name__ == "__main__":
    """
    Running this just cleans up the docker environment for the container indicated by environment_name
    If no environment_name is indicated, will just clean up all onyx docker containers/volumes/networks
    Note: vespa/postgres mounts are not deleted
    """
    current_dir = os.path.dirname(os.path.abspath(__file__))
    config_path = os.path.join(current_dir, "search_test_config.yaml")
    with open(config_path, "r") as file:
        config = yaml.safe_load(file)

    if not isinstance(config, dict):
        raise TypeError("config must be a dictionary")
    cleanup_docker(config["environment_name"])


================================================
FILE: backend/tests/regression/answer_quality/file_uploader.py
================================================
import csv
import os
import tempfile
import time
import zipfile
from pathlib import Path
from types import SimpleNamespace

import yaml

from tests.regression.answer_quality.api_utils import check_indexing_status
from tests.regression.answer_quality.api_utils import create_cc_pair
from tests.regression.answer_quality.api_utils import create_connector
from tests.regression.answer_quality.api_utils import create_credential
from tests.regression.answer_quality.api_utils import run_cc_once
from tests.regression.answer_quality.api_utils import upload_file


def unzip_and_get_file_paths(zip_file_path: str) -> list[str]:
    persistent_dir = tempfile.mkdtemp()
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(persistent_dir)

    file_paths = []
    for root, _, files in os.walk(persistent_dir):
        for file in sorted(files):
            file_paths.append(os.path.join(root, file))

    return file_paths


def create_temp_zip_from_files(file_paths: list[str]) -> str:
    persistent_dir = tempfile.mkdtemp()
    zip_file_path = os.path.join(persistent_dir, "temp.zip")

    with zipfile.ZipFile(zip_file_path, "w") as zip_file:
        for file_path in file_paths:
            zip_file.write(file_path, Path(file_path).name)

    return zip_file_path


def upload_test_files(zip_file_path: str, env_name: str) -> None:
    print("zip:", zip_file_path)
    file_paths = upload_file(env_name, zip_file_path)

    conn_id = create_connector(env_name, file_paths)
    cred_id = create_credential(env_name)

    create_cc_pair(env_name, conn_id, cred_id)
    run_cc_once(env_name, conn_id, cred_id)


def manage_file_upload(zip_file_path: str, env_name: str) -> None:
    start_time = time.time()
    unzipped_file_paths = unzip_and_get_file_paths(zip_file_path)
    total_file_count = len(unzipped_file_paths)
    problem_file_list: list[str] = []

    while True:
        doc_count, ongoing_index_attempts = check_indexing_status(env_name)

        if ongoing_index_attempts:
            print(
                f"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish..."
            )
        elif not doc_count:
            print("No docs indexed, waiting for indexing to start")
            temp_zip_file_path = create_temp_zip_from_files(unzipped_file_paths)
            upload_test_files(temp_zip_file_path, env_name)
            os.unlink(temp_zip_file_path)
        elif (doc_count + len(problem_file_list)) < total_file_count:
            print(f"No ongooing indexing attempts but only {doc_count} docs indexed")
            remaining_files = unzipped_file_paths[doc_count + len(problem_file_list) :]
            problem_file_list.append(remaining_files.pop(0))
            print(
                f"Removing first doc and grabbed last {len(remaining_files)} docs to try agian"
            )
            temp_zip_file_path = create_temp_zip_from_files(remaining_files)
            upload_test_files(temp_zip_file_path, env_name)
            os.unlink(temp_zip_file_path)
        else:
            print(f"Successfully uploaded {doc_count} docs!")
            break

        time.sleep(10)

    if problem_file_list:
        problem_file_csv_path = os.path.join(current_dir, "problem_files.csv")
        with open(problem_file_csv_path, "w", newline="") as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(["Problematic File Paths"])
            for problem_file in problem_file_list:
                csvwriter.writerow([problem_file])

    for file in unzipped_file_paths:
        os.unlink(file)
    print(f"Total time taken: {(time.time() - start_time) / 60} minutes")


if __name__ == "__main__":
    current_dir = os.path.dirname(os.path.abspath(__file__))
    config_path = os.path.join(current_dir, "search_test_config.yaml")
    with open(config_path, "r") as file:
        config = SimpleNamespace(**yaml.safe_load(file))
    file_location = config.zipped_documents_file
    env_name = config.environment_name
    manage_file_upload(file_location, env_name)


================================================
FILE: backend/tests/regression/answer_quality/launch_eval_env.py
================================================
import os
from types import SimpleNamespace

import yaml

from tests.regression.answer_quality.cli_utils import manage_data_directories
from tests.regression.answer_quality.cli_utils import set_env_variables
from tests.regression.answer_quality.cli_utils import start_docker_compose
from tests.regression.answer_quality.cli_utils import switch_to_commit


def load_config(config_filename: str) -> SimpleNamespace:
    current_dir = os.path.dirname(os.path.abspath(__file__))
    config_path = os.path.join(current_dir, config_filename)
    with open(config_path, "r") as file:
        return SimpleNamespace(**yaml.safe_load(file))


def main() -> None:
    config = load_config("search_test_config.yaml")
    if config.environment_name:
        env_name = config.environment_name
        print("launching onyx with environment name:", env_name)
    else:
        print("No env name defined. Not launching docker.")
        print(
            "Please define a name in the config yaml to start a new env or use an existing env"
        )
        return

    set_env_variables(
        config.model_server_ip,
        config.model_server_port,
        config.use_cloud_gpu,
        config.llm,
    )
    manage_data_directories(env_name, config.output_folder, config.use_cloud_gpu)
    if config.commit_sha:
        switch_to_commit(config.commit_sha)

    start_docker_compose(
        env_name, config.launch_web_ui, config.use_cloud_gpu, config.only_state
    )


if __name__ == "__main__":
    main()


================================================
FILE: backend/tests/regression/answer_quality/search_test_config.yaml.template
================================================
# Copy this to search_test_config.yaml and fill in the values to run the eval pipeline
# Don't forget to also update the .env_eval file with the correct values

# Directory where test results will be saved
output_folder: "~/onyx_test_results"

# Path to the zip file containing sample documents
zipped_documents_file: "~/sampledocs.zip"

# Path to the YAML file containing sample questions
questions_file: "~/sample_questions.yaml"

# Git commit SHA to use (null means use current code as is)
commit_sha: null

# Whether to launch a web UI for the test
launch_web_ui: false

# Only retrieve documents, not LLM response
only_retrieve_docs: false

# Whether to use a cloud GPU for processing
use_cloud_gpu: false

# IP address of the model server (placeholder)
model_server_ip: "PUT_PUBLIC_CLOUD_IP_HERE"

# Port of the model server (placeholder)
model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE"

# Name for existing testing env (empty string uses default ports)
environment_name: ""

# Limit on number of tests to run (null means no limit)
limit: null

# LLM configuration
llm:
  # Name of the LLM
  name: "default_test_llm"
  
  # Provider of the LLM (e.g., OpenAI)
  provider: "openai"
  
  # API key
  api_key: "PUT_API_KEY_HERE"
  
  # Default model name to use
  default_model_name: "gpt-4o"
  
  # List of model names to use for testing
  model_names: ["gpt-4o"]


================================================
FILE: backend/tests/regression/search_quality/README.md
================================================
# Search Quality Test Script

This Python script evaluates the search and answer quality for a list of queries, against a ground truth. It will use the currently ingested documents for the search, answer generation, and ground truth comparisons.

## Usage

1. Ensure you have the required dependencies installed and onyx running.

2. Ensure you have `OPENAI_API_KEY` set if you intend to do answer evaluation (enabled by default, unless you run the script with the `-s` flag). Go to the API Keys page in the admin panel, generate a basic api token, and add it to the env file as `ONYX_API_KEY=on_...`.

3. Navigate to Onyx repo, **search_quality** folder:

```
cd path/to/onyx/backend/tests/regression/search_quality
```

4. Copy `test_queries.json.template` to `test_queries.json` and add/remove test queries in it. The fields for each query are:

   - `question: str` the query
   - `ground_truth: list[GroundTruth]` an un-ranked list of expected search results with fields:
      - `doc_source: str` document source (e.g., web, google_drive, linear), used to normalize the links in some cases
      - `doc_link: str` link associated with document, used to find corresponding document in local index
   - `ground_truth_response: Optional[str]` a response with clauses the ideal answer should include
   - `categories: Optional[list[str]]` list of categories, used to aggregate evaluation results

5. Run `run_search_eval.py` to evaluate the queries.  All parameters are optional and have sensible defaults:

```
python run_search_eval.py
  -d --dataset          # Path to the test-set JSON file (default: ./test_queries.json)
  -n --num_search       # Maximum number of documents to retrieve per search (default: 50)
  -a --num_answer       # Maximum number of documents to use for answer evaluation (default: 25)
  -w --max_workers      # Maximum number of concurrent search requests (0 = unlimited, default: 10).
  -r --max_req_rate     # Maximum number of search requests per minute (0 = unlimited, default: 0).
  -q --timeout          # Request timeout in seconds (default: 120)
  -e --api_endpoint     # Base URL of the Onyx API server (default: http://127.0.0.1:8080)
  -s --search_only      # Only perform search and not answer evaluation (default: false)
  -t --tenant_id        # Tenant ID to use for the evaluation (default: None)
```

Note: If you only care about search quality, you should run with the `-s` flag for a significantly faster evaluation. Furthermore, you should set `-r` to 1 if running with federated search enabled to avoid hitting rate limits.

6. After the run, an `eval-YYYY-MM-DD-HH-MM-SS` folder is created containing:

   * `test_queries.json`   – the dataset used with the list of valid queries and corresponding indexed ground truth.
   * `search_results.json` – per-query search and answer details.
   * `results_by_category.csv` – aggregated metrics per category and for "all".
   * `search_position_chart.png` – bar-chart of ground-truth ranks.

You can replace `test_queries.json` with the generated one for a slightly faster loading of the queries the next time around.

================================================
FILE: backend/tests/regression/search_quality/models.py
================================================
from pydantic import BaseModel

from onyx.configs.constants import DocumentSource
from onyx.context.search.models import SavedSearchDoc


class GroundTruth(BaseModel):
    doc_source: DocumentSource
    doc_link: str


class TestQuery(BaseModel):
    question: str
    ground_truth: list[GroundTruth] = []
    ground_truth_response: str | None = None
    categories: list[str] = []

    # autogenerated
    ground_truth_docids: list[str] = []


class EvalConfig(BaseModel):
    max_search_results: int
    max_answer_context: int
    num_workers: int  # 0 = unlimited
    max_request_rate: int  # 0 = unlimited
    request_timeout: int
    api_url: str
    search_only: bool


class OneshotQAResult(BaseModel):
    time_taken: float
    top_documents: list[SavedSearchDoc]
    answer: str | None


class RetrievedDocument(BaseModel):
    document_id: str
    chunk_id: int
    content: str


class AnalysisSummary(BaseModel):
    question: str
    categories: list[str]
    found: bool
    rank: int | None
    total_results: int
    ground_truth_count: int
    response_relevancy: float | None = None
    faithfulness: float | None = None
    factual_correctness: float | None = None
    answer: str | None = None
    retrieved: list[RetrievedDocument] = []
    time_taken: float


class SearchMetrics(BaseModel):
    total_queries: int
    found_count: int

    # for found results
    best_rank: int
    worst_rank: int
    average_rank: float
    top_k_accuracy: dict[int, float]


class AnswerMetrics(BaseModel):
    response_relevancy: float
    faithfulness: float
    factual_correctness: float

    # only for metric computation
    n_response_relevancy: int
    n_faithfulness: int
    n_factual_correctness: int


class CombinedMetrics(SearchMetrics, AnswerMetrics):
    average_time_taken: float


================================================
FILE: backend/tests/regression/search_quality/run_search_eval.py
================================================
import csv
import json
import os
import sys
import time
from collections import defaultdict
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from pathlib import Path
from threading import Event
from threading import Lock
from threading import Semaphore
from typing import cast

import matplotlib.pyplot as plt
import requests
from dotenv import load_dotenv
from matplotlib.patches import Patch
from pydantic import ValidationError
from requests.exceptions import RequestException
from retry import retry

# add onyx/backend to path (since this isn't done automatically when running as a script)
current_dir = Path(__file__).parent
onyx_dir = current_dir.parent.parent.parent.parent
sys.path.append(str(onyx_dir / "backend"))

# load env before app_config loads (since env doesn't get loaded when running as a script)
env_path = onyx_dir / ".vscode" / ".env"
if not env_path.exists():
    raise RuntimeError(
        "Could not find .env file. Please create one in the root .vscode directory."
    )
load_dotenv(env_path)

# pylint: disable=E402
# flake8: noqa: E402

from ee.onyx.server.query_and_chat.models import SearchFullResponse
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import SavedSearchDoc
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
from tests.regression.search_quality.models import AnalysisSummary
from tests.regression.search_quality.models import CombinedMetrics
from tests.regression.search_quality.models import EvalConfig
from tests.regression.search_quality.models import OneshotQAResult
from tests.regression.search_quality.models import TestQuery
from tests.regression.search_quality.utils import compute_overall_scores
from tests.regression.search_quality.utils import find_document_id
from tests.regression.search_quality.utils import get_federated_sources
from tests.regression.search_quality.utils import LazyJsonWriter
from tests.regression.search_quality.utils import ragas_evaluate
from tests.regression.search_quality.utils import search_docs_to_doc_contexts

logger = setup_logger(__name__)

GENERAL_HEADERS = {"Content-Type": "application/json"}
TOP_K_LIST = [1, 3, 5, 10]


class SearchAnswerAnalyzer:
    def __init__(
        self,
        config: EvalConfig,
        tenant_id: str | None = None,
    ):
        if not MULTI_TENANT:
            logger.info("Running in single-tenant mode")
            tenant_id = POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
        elif tenant_id is None:
            raise ValueError("Tenant ID is required for multi-tenant")

        self.config = config
        self.tenant_id = tenant_id

        # shared analysis results
        self._lock = Lock()
        self._progress_counter = 0
        self._result_writer: LazyJsonWriter | None = None
        self.ranks: list[int | None] = []
        self.metrics: dict[str, CombinedMetrics] = defaultdict(
            lambda: CombinedMetrics(
                total_queries=0,
                found_count=0,
                best_rank=config.max_search_results,
                worst_rank=1,
                average_rank=0.0,
                top_k_accuracy={k: 0.0 for k in TOP_K_LIST},
                response_relevancy=0.0,
                faithfulness=0.0,
                factual_correctness=0.0,
                n_response_relevancy=0,
                n_faithfulness=0,
                n_factual_correctness=0,
                average_time_taken=0.0,
            )
        )

    def run_analysis(self, dataset_path: Path, export_path: Path) -> None:
        # load and save the dataset
        dataset = self._load_dataset(dataset_path)
        dataset_size = len(dataset)
        dataset_export_path = export_path / "test_queries.json"
        with dataset_export_path.open("w") as f:
            dataset_serializable = [q.model_dump(mode="json") for q in dataset]
            json.dump(dataset_serializable, f, indent=4)

        result_export_path = export_path / "search_results.json"
        self._result_writer = LazyJsonWriter(result_export_path)

        # set up rate limiting and threading primitives
        interval = (
            60.0 / self.config.max_request_rate
            if self.config.max_request_rate > 0
            else 0.0
        )
        available_workers = Semaphore(self.config.num_workers)
        stop_event = Event()

        def _submit_wrapper(tc: TestQuery) -> AnalysisSummary:
            try:
                return self._run_and_analyze_one(tc, dataset_size)
            except Exception as e:
                logger.error("Error during analysis: %s", e)
                stop_event.set()
                raise
            finally:
                available_workers.release()

        # run the analysis
        logger.info("Starting analysis of %d queries", dataset_size)
        logger.info("Using %d parallel workers", self.config.num_workers)
        logger.info("Exporting search results to %s", result_export_path)

        with ThreadPoolExecutor(
            max_workers=self.config.num_workers or None
        ) as executor:
            # submit requests at configured rate, break early if any error occurs
            futures = []
            for tc in dataset:
                if stop_event.is_set():
                    break

                available_workers.acquire()
                fut = executor.submit(_submit_wrapper, tc)
                futures.append(fut)

                if (
                    len(futures) != dataset_size
                    and interval > 0
                    and not stop_event.is_set()
                ):
                    time.sleep(interval)

            # ensure all tasks finish and surface any exceptions
            for fut in as_completed(futures):
                fut.result()

        if self._result_writer:
            self._result_writer.close()
        self._aggregate_metrics()

    def generate_detailed_report(self, export_path: Path) -> None:
        logger.info("Generating detailed report...")

        csv_path = export_path / "results_by_category.csv"
        with csv_path.open("w", newline="") as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(
                [
                    "category",
                    "total_queries",
                    "found",
                    "percent_found",
                    "best_rank",
                    "worst_rank",
                    "avg_rank",
                    *[f"top_{k}_accuracy" for k in TOP_K_LIST],
                    *(
                        [
                            "avg_response_relevancy",
                            "avg_faithfulness",
                            "avg_factual_correctness",
                        ]
                        if not self.config.search_only
                        else []
                    ),
                    "search_score",
                    *(["answer_score"] if not self.config.search_only else []),
                    "avg_time_taken",
                ]
            )

            for category, metrics in sorted(
                self.metrics.items(), key=lambda c: (0 if c[0] == "all" else 1, c[0])
            ):
                found_count = metrics.found_count
                total_count = metrics.total_queries
                accuracy = found_count / total_count * 100 if total_count > 0 else 0

                print(
                    f"\n{category.upper()}:  total queries: {total_count}\n  found: {found_count} ({accuracy:.1f}%)"
                )
                best_rank = metrics.best_rank if metrics.found_count > 0 else None
                worst_rank = metrics.worst_rank if metrics.found_count > 0 else None
                avg_rank = metrics.average_rank if metrics.found_count > 0 else None
                if metrics.found_count > 0:
                    print(
                        f"  average rank (for found results): {avg_rank:.2f}\n"
                        f"  best rank (for found results): {best_rank:.2f}\n"
                        f"  worst rank (for found results): {worst_rank:.2f}"
                    )
                    for k, acc in metrics.top_k_accuracy.items():
                        print(f"  top-{k} accuracy: {acc:.1f}%")
                if not self.config.search_only:
                    if metrics.n_response_relevancy > 0:
                        print(
                            f"  average response relevancy: {metrics.response_relevancy:.2f}"
                        )
                    if metrics.n_faithfulness > 0:
                        print(f"  average faithfulness: {metrics.faithfulness:.2f}")
                    if metrics.n_factual_correctness > 0:
                        print(
                            f"  average factual correctness: {metrics.factual_correctness:.2f}"
                        )
                search_score, answer_score = compute_overall_scores(metrics)
                print(f"  search score: {search_score:.1f}")
                if not self.config.search_only:
                    print(f"  answer score: {answer_score:.1f}")
                print(f"  average time taken: {metrics.average_time_taken:.2f}s")

                csv_writer.writerow(
                    [
                        category,
                        total_count,
                        found_count,
                        f"{accuracy:.1f}",
                        best_rank or "",
                        worst_rank or "",
                        f"{avg_rank:.2f}" if avg_rank is not None else "",
                        *[f"{acc:.1f}" for acc in metrics.top_k_accuracy.values()],
                        *(
                            [
                                (
                                    f"{metrics.response_relevancy:.2f}"
                                    if metrics.n_response_relevancy > 0
                                    else ""
                                ),
                                (
                                    f"{metrics.faithfulness:.2f}"
                                    if metrics.n_faithfulness > 0
                                    else ""
                                ),
                                (
                                    f"{metrics.factual_correctness:.2f}"
                                    if metrics.n_factual_correctness > 0
                                    else ""
                                ),
                            ]
                            if not self.config.search_only
                            else []
                        ),
                        f"{search_score:.1f}",
                        *(
                            [f"{answer_score:.1f}"]
                            if not self.config.search_only
                            else []
                        ),
                        f"{metrics.average_time_taken:.2f}",
                    ]
                )
        logger.info("Saved category breakdown csv to %s", csv_path)

    def generate_chart(self, export_path: Path) -> None:
        logger.info("Generating search position chart...")

        if len(self.ranks) == 0:
            logger.warning("No results to chart")
            return

        found_count = 0
        not_found_count = 0
        rank_counts: dict[int, int] = defaultdict(int)
        for rank in self.ranks:
            if rank is None:
                not_found_count += 1
            else:
                found_count += 1
                rank_counts[rank] += 1

        # create the data for plotting
        if found_count:
            max_rank = max(rank_counts.keys())
            positions = list(range(1, max_rank + 1))
            counts = [rank_counts.get(pos, 0) for pos in positions]
        else:
            positions = []
            counts = []

        # add the "not found" bar on the far right
        if not_found_count:
            # add some spacing between found positions and "not found"
            not_found_position = (max(positions) + 2) if positions else 1
            positions.append(not_found_position)
            counts.append(not_found_count)

            # create labels for x-axis
            x_labels = [str(pos) for pos in positions[:-1]] + [
                f"not found\n(>{self.config.max_search_results})"
            ]
        else:
            x_labels = [str(pos) for pos in positions]

        # create the figure and bar chart
        plt.figure(figsize=(14, 6))

        # use different colors for found vs not found
        colors = (
            ["#3498db"] * (len(positions) - 1) + ["#e74c3c"]
            if not_found_count > 0
            else ["#3498db"] * len(positions)
        )
        bars = plt.bar(
            positions, counts, color=colors, alpha=0.7, edgecolor="black", linewidth=0.5
        )

        # customize the chart
        plt.xlabel("Position in Search Results", fontsize=12)
        plt.ylabel("Number of Ground Truth Documents", fontsize=12)
        plt.title(
            "Ground Truth Document Positions in Search Results",
            fontsize=14,
            fontweight="bold",
        )
        plt.grid(axis="y", alpha=0.3)

        # add value labels on top of each bar
        for bar, count in zip(bars, counts):
            if count > 0:
                plt.text(
                    bar.get_x() + bar.get_width() / 2,
                    bar.get_height() + 0.1,
                    str(count),
                    ha="center",
                    va="bottom",
                    fontweight="bold",
                )

        # set x-axis labels
        plt.xticks(positions, x_labels, rotation=45 if not_found_count > 0 else 0)

        # add legend if we have both found and not found
        if not_found_count and found_count:
            legend_elements = [
                Patch(facecolor="#3498db", alpha=0.7, label="Found in Results"),
                Patch(facecolor="#e74c3c", alpha=0.7, label="Not Found"),
            ]
            plt.legend(handles=legend_elements, loc="upper right")

        # make layout tight and save
        plt.tight_layout()
        chart_file = export_path / "search_position_chart.png"
        plt.savefig(chart_file, dpi=300, bbox_inches="tight")
        logger.info("Search position chart saved to: %s", chart_file)
        plt.show()

    def _load_dataset(self, dataset_path: Path) -> list[TestQuery]:
        """Load the test dataset from a JSON file and validate the ground truth documents."""
        with dataset_path.open("r") as f:
            dataset_raw: list[dict] = json.load(f)

        with get_session_with_tenant(tenant_id=self.tenant_id) as db_session:
            federated_sources = get_federated_sources(db_session)

        dataset: list[TestQuery] = []
        for datum in dataset_raw:
            # validate the raw datum
            try:
                test_query = TestQuery(**datum)
            except ValidationError as e:
                logger.error("Incorrectly formatted query %s: %s", datum, e)
                continue

            # in case the dataset was copied from the previous run export
            if test_query.ground_truth_docids:
                dataset.append(test_query)
                continue

            # validate and get the ground truth documents
            with get_session_with_tenant(tenant_id=self.tenant_id) as db_session:
                for ground_truth in test_query.ground_truth:
                    if (
                        doc_id := find_document_id(
                            ground_truth, federated_sources, db_session
                        )
                    ) is not None:
                        test_query.ground_truth_docids.append(doc_id)

            if len(test_query.ground_truth_docids) == 0:
                logger.warning(
                    "No ground truth documents found for query: %s, skipping...",
                    test_query.question,
                )
                continue

            dataset.append(test_query)

        return dataset

    @retry(tries=3, delay=1, backoff=2)
    def _perform_search(self, query: str) -> OneshotQAResult:
        """Perform a document search query against the Onyx API and time it."""
        # create the search request
        filters = BaseFilters()
        search_request = SendSearchQueryRequest(
            search_query=query,
            filters=filters,
            num_docs_fed_to_llm_selection=self.config.max_search_results,
            run_query_expansion=False,
            stream=False,
        )

        # send the request
        response = None
        try:
            request_data = search_request.model_dump()
            headers = GENERAL_HEADERS.copy()
            # Add API key if present
            if os.environ.get("ONYX_API_KEY"):
                headers["Authorization"] = f"Bearer {os.environ.get('ONYX_API_KEY')}"

            start_time = time.monotonic()
            response = requests.post(
                url=f"{self.config.api_url}/search/send-search-message",
                json=request_data,
                headers=headers,
                timeout=self.config.request_timeout,
            )
            time_taken = time.monotonic() - start_time
            response.raise_for_status()
            result = SearchFullResponse.model_validate(response.json())

            # extract documents from the search response
            if result.search_docs:
                top_documents = [
                    SavedSearchDoc.from_search_doc(doc)
                    for doc in result.search_docs[: self.config.max_search_results]
                ]
                return OneshotQAResult(
                    time_taken=time_taken,
                    top_documents=top_documents,
                    answer=None,  # search endpoint doesn't generate answers
                )
        except RequestException as e:
            raise RuntimeError(
                f"Search failed for query '{query}': {e}. Response: {response.json()}"
                if response
                else ""
            )
        raise RuntimeError(f"Search returned no documents for query {query}")

    def _run_and_analyze_one(self, test_case: TestQuery, total: int) -> AnalysisSummary:
        result = self._perform_search(test_case.question)

        # compute rank
        rank = None
        found = False
        ground_truths = set(test_case.ground_truth_docids)
        for i, doc in enumerate(result.top_documents, 1):
            if doc.document_id in ground_truths:
                rank = i
                found = True
                break

        # print search progress and result
        with self._lock:
            self._progress_counter += 1
            completed = self._progress_counter
            status = "✓ Found" if found else "✗ Not found"
            rank_info = f" (rank {rank})" if found else ""
            question_snippet = (
                test_case.question[:50] + "..."
                if len(test_case.question) > 50
                else test_case.question
            )
            print(f"[{completed}/{total}] {status}{rank_info}: {question_snippet}")

        # get the search contents
        retrieved = search_docs_to_doc_contexts(result.top_documents, self.tenant_id)

        # do answer evaluation
        response_relevancy: float | None = None
        faithfulness: float | None = None
        factual_correctness: float | None = None
        contexts = [c.content for c in retrieved[: self.config.max_answer_context]]
        if not self.config.search_only:
            if result.answer is None:
                logger.error(
                    "No answer found for query: %s, skipping answer evaluation",
                    test_case.question,
                )
            else:
                try:
                    ragas_result = ragas_evaluate(
                        question=test_case.question,
                        answer=result.answer,
                        contexts=contexts,
                        reference_answer=test_case.ground_truth_response,
                    ).scores[0]
                    response_relevancy = ragas_result["answer_relevancy"]
                    faithfulness = ragas_result["faithfulness"]
                    factual_correctness = ragas_result.get(
                        "factual_correctness(mode=recall)"
                    )
                except Exception as e:
                    logger.error(
                        "Error evaluating answer for query %s: %s",
                        test_case.question,
                        e,
                    )

        # save results
        analysis = AnalysisSummary(
            question=test_case.question,
            categories=test_case.categories,
            found=found,
            rank=rank,
            total_results=len(result.top_documents),
            ground_truth_count=len(test_case.ground_truth_docids),
            answer=result.answer,
            response_relevancy=response_relevancy,
            faithfulness=faithfulness,
            factual_correctness=factual_correctness,
            retrieved=retrieved,
            time_taken=result.time_taken,
        )
        with self._lock:
            self.ranks.append(analysis.rank)
            if self._result_writer:
                self._result_writer.append(analysis.model_dump(mode="json"))
            self._update_metrics(analysis)

        return analysis

    def _update_metrics(self, result: AnalysisSummary) -> None:
        for cat in result.categories + ["all"]:
            self.metrics[cat].total_queries += 1
            self.metrics[cat].average_time_taken += result.time_taken

            if result.found:
                self.metrics[cat].found_count += 1

                rank = cast(int, result.rank)
                self.metrics[cat].best_rank = min(self.metrics[cat].best_rank, rank)
                self.metrics[cat].worst_rank = max(self.metrics[cat].worst_rank, rank)
                self.metrics[cat].average_rank += rank
                for k in TOP_K_LIST:
                    self.metrics[cat].top_k_accuracy[k] += int(rank <= k)

            if self.config.search_only:
                continue
            if result.response_relevancy is not None:
                self.metrics[cat].response_relevancy += result.response_relevancy
                self.metrics[cat].n_response_relevancy += 1
            if result.faithfulness is not None:
                self.metrics[cat].faithfulness += result.faithfulness
                self.metrics[cat].n_faithfulness += 1
            if result.factual_correctness is not None:
                self.metrics[cat].factual_correctness += result.factual_correctness
                self.metrics[cat].n_factual_correctness += 1

    def _aggregate_metrics(self) -> None:
        for cat in self.metrics:
            total = self.metrics[cat].total_queries
            self.metrics[cat].average_time_taken /= total

            if self.metrics[cat].found_count > 0:
                self.metrics[cat].average_rank /= self.metrics[cat].found_count
            for k in TOP_K_LIST:
                self.metrics[cat].top_k_accuracy[k] /= total
                self.metrics[cat].top_k_accuracy[k] *= 100

            if self.config.search_only:
                continue
            if (n := self.metrics[cat].n_response_relevancy) > 0:
                self.metrics[cat].response_relevancy /= n
            if (n := self.metrics[cat].n_faithfulness) > 0:
                self.metrics[cat].faithfulness /= n
            if (n := self.metrics[cat].n_factual_correctness) > 0:
                self.metrics[cat].factual_correctness /= n


def run_search_eval(
    dataset_path: Path,
    config: EvalConfig,
    tenant_id: str | None,
) -> None:
    # check openai api key is set if doing answer eval (must be called that for ragas to recognize)
    if not config.search_only and not os.environ.get("OPENAI_API_KEY"):
        raise RuntimeError(
            "OPENAI_API_KEY is required for answer evaluation. Please add it to the root .vscode/.env file."
        )

    # check onyx api key is set (auth is always required)
    if not os.environ.get("ONYX_API_KEY"):
        raise RuntimeError(
            "ONYX_API_KEY is required. Please create one in the admin panel and add it to the root .vscode/.env file."
        )

    # check onyx is running
    try:
        response = requests.get(
            f"{config.api_url}/health", timeout=config.request_timeout
        )
        response.raise_for_status()
    except RequestException as e:
        raise RuntimeError(f"Could not connect to Onyx API: {e}")

    # create the export folder
    export_folder = current_dir / datetime.now().strftime("eval-%Y-%m-%d-%H-%M-%S")
    export_path = Path(export_folder)
    export_path.mkdir(parents=True, exist_ok=True)
    logger.info("Created export folder: %s", export_path)

    # run the search eval
    analyzer = SearchAnswerAnalyzer(config=config, tenant_id=tenant_id)
    analyzer.run_analysis(dataset_path, export_path)
    analyzer.generate_detailed_report(export_path)
    analyzer.generate_chart(export_path)


if __name__ == "__main__":
    import argparse

    current_dir = Path(__file__).parent
    parser = argparse.ArgumentParser(description="Run search quality evaluation.")
    parser.add_argument(
        "-d",
        "--dataset",
        type=Path,
        default=current_dir / "test_queries.json",
        help="Path to the test-set JSON file (default: %(default)s).",
    )
    parser.add_argument(
        "-n",
        "--num_search",
        type=int,
        default=50,
        help="Maximum number of documents to retrieve per search (default: %(default)s).",
    )
    parser.add_argument(
        "-a",
        "--num_answer",
        type=int,
        default=25,
        help="Maximum number of documents to use for answer evaluation (default: %(default)s).",
    )
    parser.add_argument(
        "-w",
        "--max_workers",
        type=int,
        default=10,
        help="Maximum number of concurrent search requests (0 = unlimited, default: %(default)s).",
    )
    parser.add_argument(
        "-r",
        "--max_req_rate",
        type=int,
        default=0,
        help="Maximum number of search requests per minute (0 = unlimited, default: %(default)s).",
    )
    parser.add_argument(
        "-q",
        "--timeout",
        type=int,
        default=120,
        help="Request timeout in seconds (default: %(default)s).",
    )
    parser.add_argument(
        "-e",
        "--api_endpoint",
        type=str,
        default="http://127.0.0.1:8080",
        help="Base URL of the Onyx API server (default: %(default)s).",
    )
    parser.add_argument(
        "-s",
        "--search_only",
        action="store_true",
        default=False,
        help="Only perform search and not answer evaluation (default: %(default)s).",
    )
    parser.add_argument(
        "-t",
        "--tenant_id",
        type=str,
        default=None,
        help="Tenant ID to use for the evaluation (default: %(default)s).",
    )

    args = parser.parse_args()

    SqlEngine.init_engine(
        pool_size=POSTGRES_API_SERVER_POOL_SIZE,
        max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,
    )

    try:
        run_search_eval(
            args.dataset,
            EvalConfig(
                max_search_results=args.num_search,
                max_answer_context=args.num_answer,
                num_workers=args.max_workers,
                max_request_rate=args.max_req_rate,
                request_timeout=args.timeout,
                api_url=args.api_endpoint,
                search_only=args.search_only,
            ),
            args.tenant_id,
        )
    except Exception as e:
        logger.error("Unexpected error during search evaluation: %s", e)
        raise
    finally:
        SqlEngine.reset_engine()


================================================
FILE: backend/tests/regression/search_quality/test_queries.json.template
================================================
[
    {
        "question": "What is Onyx?",
        "ground_truth": [
            {
                "doc_source": "web",
                "doc_link": "https://docs.onyx.app/welcome"
            }
        ],
        "categories": [
            "keyword",
            "broad",
            "easy"
        ]
    }
]

================================================
FILE: backend/tests/regression/search_quality/utils.py
================================================
import json
import re
from pathlib import Path
from textwrap import indent
from typing import Any
from typing import cast
from typing import TextIO

from ragas import evaluate  # type: ignore[import-not-found,unused-ignore]
from ragas import EvaluationDataset  # type: ignore[import-not-found,unused-ignore]
from ragas import SingleTurnSample  # type: ignore[import-not-found,unused-ignore]
from ragas.dataset_schema import EvaluationResult  # type: ignore[import-not-found,unused-ignore]
from ragas.metrics import FactualCorrectness  # type: ignore[import-not-found,unused-ignore]
from ragas.metrics import Faithfulness  # type: ignore[import-not-found,unused-ignore]
from ragas.metrics import ResponseRelevancy  # type: ignore[import-not-found,unused-ignore]
from sqlalchemy.orm import Session

from onyx.configs.constants import DocumentSource
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import SavedSearchDoc
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.models import Document
from onyx.db.models import FederatedConnector
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.prompts.prompt_utils import build_doc_context_str
from onyx.utils.logger import setup_logger
from tests.regression.search_quality.models import CombinedMetrics
from tests.regression.search_quality.models import GroundTruth
from tests.regression.search_quality.models import RetrievedDocument

logger = setup_logger(__name__)


def get_federated_sources(db_session: Session) -> set[DocumentSource]:
    """Get all federated sources from the database."""
    return {
        source
        for connector in db_session.query(FederatedConnector).all()
        if (source := connector.source.to_non_federated_source()) is not None
    }


def find_document_id(
    ground_truth: GroundTruth,
    federated_sources: set[DocumentSource],
    db_session: Session,
) -> str | None:
    """Find a document by its link and return its id if found."""
    # handle federated sources TODO: maybe make handler dictionary by source if this gets complex
    if ground_truth.doc_source in federated_sources:
        if ground_truth.doc_source == DocumentSource.SLACK:
            groups = re.search(
                r"archives\/([A-Z0-9]+)\/p([0-9]+)", ground_truth.doc_link
            )
            if groups:
                channel_id = groups.group(1)
                message_id = groups.group(2)
                return f"{channel_id}__{message_id[:-6]}.{message_id[-6:]}"

    # preprocess links
    doc_link = ground_truth.doc_link
    if ground_truth.doc_source == DocumentSource.GOOGLE_DRIVE:
        if "/edit" in doc_link:
            doc_link = doc_link.split("/edit", 1)[0]
        elif "/view" in doc_link:
            doc_link = doc_link.split("/view", 1)[0]
    elif ground_truth.doc_source == DocumentSource.FIREFLIES:
        doc_link = doc_link.split("?", 1)[0]

    docs = db_session.query(Document).filter(Document.link.ilike(f"{doc_link}%")).all()
    if len(docs) == 0:
        logger.warning("Could not find ground truth document: %s", doc_link)
        return None
    elif len(docs) > 1:
        logger.warning(
            "Found multiple ground truth documents: %s, using the first one: %s",
            doc_link,
            docs[0].id,
        )
    return docs[0].id


def get_doc_contents(
    docs: list[SavedSearchDoc], tenant_id: str
) -> dict[tuple[str, int], str]:
    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        search_settings = get_current_search_settings(db_session)
        document_index = get_default_document_index(search_settings, None, db_session)

    filters = IndexFilters(access_control_list=None, tenant_id=tenant_id)

    reqs: list[VespaChunkRequest] = [
        VespaChunkRequest(
            document_id=doc.document_id,
            min_chunk_ind=doc.chunk_ind,
            max_chunk_ind=doc.chunk_ind,
        )
        for doc in docs
    ]

    results = document_index.id_based_retrieval(chunk_requests=reqs, filters=filters)
    return {(doc.document_id, doc.chunk_id): doc.content for doc in results}


def search_docs_to_doc_contexts(
    docs: list[SavedSearchDoc], tenant_id: str
) -> list[RetrievedDocument]:
    try:
        doc_contents = get_doc_contents(docs, tenant_id)
    except Exception as e:
        logger.error("Error getting doc contents: %s", e)
        doc_contents = {}

    return [
        RetrievedDocument(
            document_id=doc.document_id,
            chunk_id=doc.chunk_ind,
            content=build_doc_context_str(
                semantic_identifier=doc.semantic_identifier,
                source_type=doc.source_type,
                content=doc_contents.get(
                    (doc.document_id, doc.chunk_ind), f"Blurb: {doc.blurb}"
                ),
                metadata_dict=doc.metadata,
                updated_at=doc.updated_at,
                ind=ind,
                include_metadata=True,
            ),
        )
        for ind, doc in enumerate(docs)
    ]


def ragas_evaluate(
    question: str, answer: str, contexts: list[str], reference_answer: str | None = None
) -> EvaluationResult:
    sample = SingleTurnSample(
        user_input=question,
        retrieved_contexts=contexts,
        response=answer,
        reference=reference_answer,
    )
    dataset = EvaluationDataset([sample])
    return cast(
        EvaluationResult,
        evaluate(
            dataset,
            metrics=[
                ResponseRelevancy(),
                Faithfulness(),
                *(
                    [FactualCorrectness(mode="recall")]
                    if reference_answer is not None
                    else []
                ),
            ],
        ),
    )


def compute_overall_scores(metrics: CombinedMetrics) -> tuple[float, float]:
    """Compute the overall search and answer quality scores.
    The scores are subjective and may require tuning."""
    # search score
    FOUND_RATIO_WEIGHT = 0.4
    TOP_IMPORTANCE = 0.7  # 0-inf, how important is it to be no. 1 over other ranks

    found_ratio = metrics.found_count / metrics.total_queries
    sum_k = sum(1.0 / pow(k, TOP_IMPORTANCE) for k in metrics.top_k_accuracy)
    weighted_topk = sum(
        acc / (pow(k, TOP_IMPORTANCE) * sum_k * 100)
        for k, acc in metrics.top_k_accuracy.items()
    )
    search_score = 100 * (
        FOUND_RATIO_WEIGHT * found_ratio + (1.0 - FOUND_RATIO_WEIGHT) * weighted_topk
    )

    # answer score
    mets = [
        *([metrics.response_relevancy] if metrics.n_response_relevancy > 0 else []),
        *([metrics.faithfulness] if metrics.n_faithfulness > 0 else []),
        *([metrics.factual_correctness] if metrics.n_factual_correctness > 0 else []),
    ]
    answer_score = 100 * sum(mets) / len(mets) if mets else 0.0

    return search_score, answer_score


class LazyJsonWriter:
    def __init__(self, filepath: Path, indent: int = 4) -> None:
        self.filepath = filepath
        self.file: TextIO | None = None
        self.indent = indent

    def append(self, serializable_item: dict[str, Any]) -> None:
        if not self.file:
            self.file = open(self.filepath, "a")
            self.file.write("[\n")
        else:
            self.file.write(",\n")

        data = json.dumps(serializable_item, indent=self.indent)
        self.file.write(indent(data, " " * self.indent))

    def close(self) -> None:
        if not self.file:
            return
        self.file.write("\n]")
        self.file.close()
        self.file = None


================================================
FILE: backend/tests/unit/__init__.py
================================================


================================================
FILE: backend/tests/unit/build/test_rewrite_asset_paths.py
================================================
"""Unit tests for webapp proxy path rewriting/injection."""

from types import SimpleNamespace
from typing import cast
from typing import Literal
from uuid import UUID

import httpx
import pytest
from fastapi import Request
from sqlalchemy.orm import Session

from onyx.server.features.build.api import api
from onyx.server.features.build.api.api import _inject_hmr_fixer
from onyx.server.features.build.api.api import _rewrite_asset_paths
from onyx.server.features.build.api.api import _rewrite_proxy_response_headers

SESSION_ID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
BASE = f"/api/build/sessions/{SESSION_ID}/webapp"


def rewrite(html: str) -> str:
    return _rewrite_asset_paths(html.encode(), SESSION_ID).decode()


def inject(html: str) -> str:
    return _inject_hmr_fixer(html.encode(), SESSION_ID).decode()


class TestNextjsPathRewriting:
    def test_rewrites_bare_next_script_src(self) -> None:
        html = '<script src="/_next/static/chunks/main.js">'
        result = rewrite(html)
        assert f'src="{BASE}/_next/static/chunks/main.js"' in result
        assert '"/_next/' not in result

    def test_rewrites_bare_next_in_single_quotes(self) -> None:
        html = "<link href='/_next/static/css/app.css'>"
        result = rewrite(html)
        assert f"'{BASE}/_next/static/css/app.css'" in result

    def test_rewrites_bare_next_in_url_parens(self) -> None:
        html = "background: url(/_next/static/media/font.woff2)"
        result = rewrite(html)
        assert f"url({BASE}/_next/static/media/font.woff2)" in result

    def test_no_double_prefix_when_already_proxied(self) -> None:
        """assetPrefix makes Next.js emit already-prefixed URLs — must not double-rewrite."""
        already_prefixed = f'<script src="{BASE}/_next/static/chunks/main.js">'
        result = rewrite(already_prefixed)
        # Should be unchanged
        assert result == already_prefixed
        # Specifically, no double path
        assert f"{BASE}/{BASE}" not in result

    def test_rewrites_favicon(self) -> None:
        html = '<link rel="icon" href="/favicon.ico">'
        result = rewrite(html)
        assert f'"{BASE}/favicon.ico"' in result

    def test_rewrites_json_data_path_double_quoted(self) -> None:
        html = 'fetch("/data/tickets.json")'
        result = rewrite(html)
        assert f'"{BASE}/data/tickets.json"' in result

    def test_rewrites_json_data_path_single_quoted(self) -> None:
        html = "fetch('/data/items.json')"
        result = rewrite(html)
        assert f"'{BASE}/data/items.json'" in result

    def test_rewrites_escaped_next_font_path_in_json_script(self) -> None:
        """Next dev can embed font asset paths in JSON-escaped script payloads."""
        html = r'{"src":"\/_next\/static\/media\/font.woff2"}'
        result = rewrite(html)
        assert (
            r'{"src":"\/api\/build\/sessions\/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\/webapp\/_next\/static\/media\/font.woff2"}'
            in result
        )

    def test_rewrites_escaped_next_font_path_in_style_payload(self) -> None:
        """Keep dynamically generated next/font URLs inside the session proxy."""
        html = r'{"css":"@font-face{src:url(\"\/_next\/static\/media\/font.woff2\")"}'
        result = rewrite(html)
        assert (
            r"\/api\/build\/sessions\/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\/webapp\/_next\/static\/media\/font.woff2"
            in result
        )

    def test_rewrites_absolute_next_font_url(self) -> None:
        html = '<link rel="preload" as="font" href="https://craft-dev.onyx.app/_next/static/media/font.woff2">'
        result = rewrite(html)
        assert f'"{BASE}/_next/static/media/font.woff2"' in result

    def test_rewrites_root_hmr_path(self) -> None:
        html = 'new WebSocket("wss://craft-dev.onyx.app/_next/webpack-hmr?id=abc")'
        result = rewrite(html)
        assert '"wss://craft-dev.onyx.app/_next/webpack-hmr?id=abc"' not in result
        assert '"/_next/webpack-hmr?id=abc"' in result

    def test_rewrites_escaped_absolute_next_font_url(self) -> None:
        html = (
            r'{"href":"https:\/\/craft-dev.onyx.app\/_next\/static\/media\/font.woff2"}'
        )
        result = rewrite(html)
        assert (
            r'{"href":"\/api\/build\/sessions\/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\/webapp\/_next\/static\/media\/font.woff2"}'
            in result
        )


class TestRuntimeFixerInjection:
    def test_injects_websocket_rewrite_shim(self) -> None:
        html = "<html><head></head><body></body></html>"
        result = inject(html)
        assert "window.WebSocket = function (url, protocols)" in result
        assert f'var WEBAPP_BASE = "{BASE}"' in result

    def test_injects_hmr_websocket_stub(self) -> None:
        html = "<html><head></head><body></body></html>"
        result = inject(html)
        assert "function MockHmrWebSocket(url)" in result
        assert "return new MockHmrWebSocket(rewriteNextAssetUrl(url));" in result

    def test_injects_before_head_contents(self) -> None:
        html = "<html><head><title>x</title></head><body></body></html>"
        result = inject(html)
        assert result.index(
            "window.WebSocket = function (url, protocols)"
        ) < result.index("<title>x</title>")

    def test_rewritten_hmr_url_still_matches_shim_intercept_logic(self) -> None:
        html = '<html><head></head><body>new WebSocket("wss://craft-dev.onyx.app/_next/webpack-hmr?id=abc")</body></html>'

        rewritten = rewrite(html)
        assert '"wss://craft-dev.onyx.app/_next/webpack-hmr?id=abc"' not in rewritten
        assert 'new WebSocket("/_next/webpack-hmr?id=abc")' in rewritten

        injected = inject(rewritten)

        assert 'new WebSocket("/_next/webpack-hmr?id=abc")' in injected
        assert 'parsedUrl.pathname.indexOf("/_next/webpack-hmr") === 0' in injected


class TestProxyHeaderRewriting:
    def test_rewrites_link_header_font_preload_paths(self) -> None:
        headers = {
            "link": (
                '</_next/static/media/font.woff2>; rel=preload; as="font"; crossorigin, '
                '</_next/static/media/font2.woff2>; rel=preload; as="font"; crossorigin'
            )
        }

        result = _rewrite_proxy_response_headers(headers, SESSION_ID)

        assert f"<{BASE}/_next/static/media/font.woff2>" in result["link"]


class TestProxyRequestWiring:
    def test_proxy_request_rewrites_link_header_on_html_response(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        html = b"<html><head></head><body>ok</body></html>"
        upstream = httpx.Response(
            200,
            headers={
                "content-type": "text/html; charset=utf-8",
                "link": '</_next/static/media/font.woff2>; rel=preload; as="font"',
            },
            content=html,
        )

        monkeypatch.setattr(api, "_get_sandbox_url", lambda *_args: "http://sandbox")

        class FakeClient:
            def __init__(self, *_args: object, **_kwargs: object) -> None:
                pass

            def __enter__(self) -> "FakeClient":
                return self

            def __exit__(self, *_args: object) -> Literal[False]:
                return False

            def get(self, _url: str, headers: dict[str, str]) -> httpx.Response:
                assert "host" not in {key.lower() for key in headers}
                return upstream

        monkeypatch.setattr(api.httpx, "Client", FakeClient)

        request = cast(Request, SimpleNamespace(headers={}, query_params=""))

        response = api._proxy_request(
            "", request, UUID(SESSION_ID), cast(Session, SimpleNamespace())
        )

        assert response.headers["link"] == (
            f'<{BASE}/_next/static/media/font.woff2>; rel=preload; as="font"'
        )

    def test_proxy_request_injects_hmr_fixer_for_html_response(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        upstream = httpx.Response(
            200,
            headers={"content-type": "text/html; charset=utf-8"},
            content=b"<html><head><title>x</title></head><body></body></html>",
        )

        monkeypatch.setattr(api, "_get_sandbox_url", lambda *_args: "http://sandbox")

        class FakeClient:
            def __init__(self, *_args: object, **_kwargs: object) -> None:
                pass

            def __enter__(self) -> "FakeClient":
                return self

            def __exit__(self, *_args: object) -> Literal[False]:
                return False

            def get(self, _url: str, headers: dict[str, str]) -> httpx.Response:
                assert "host" not in {key.lower() for key in headers}
                return upstream

        monkeypatch.setattr(api.httpx, "Client", FakeClient)

        request = cast(Request, SimpleNamespace(headers={}, query_params=""))

        response = api._proxy_request(
            "", request, UUID(SESSION_ID), cast(Session, SimpleNamespace())
        )
        body = cast(bytes, response.body).decode("utf-8")

        assert "window.WebSocket = function (url, protocols)" in body
        assert body.index("window.WebSocket = function (url, protocols)") < body.index(
            "<title>x</title>"
        )

    def test_rewrites_absolute_link_header_font_preload_paths(self) -> None:
        headers = {
            "link": (
                '<https://craft-dev.onyx.app/_next/static/media/font.woff2>; rel=preload; as="font"; crossorigin'
            )
        }

        result = _rewrite_proxy_response_headers(headers, SESSION_ID)

        assert f"<{BASE}/_next/static/media/font.woff2>" in result["link"]


================================================
FILE: backend/tests/unit/ee/conftest.py
================================================
"""Auto-enable EE mode for all tests under tests/unit/ee/."""

import pytest


@pytest.fixture(autouse=True)
def _enable_ee_for_directory(enable_ee: None) -> None:
    """Wraps the shared enable_ee fixture with autouse for this directory."""


================================================
FILE: backend/tests/unit/ee/onyx/db/test_license.py
================================================
"""Tests for license database CRUD operations."""

from datetime import datetime
from datetime import timedelta
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch

from ee.onyx.db.license import check_seat_availability
from ee.onyx.db.license import delete_license
from ee.onyx.db.license import get_license
from ee.onyx.db.license import upsert_license
from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicenseSource
from ee.onyx.server.license.models import PlanType
from onyx.db.models import License
from onyx.server.settings.models import ApplicationStatus


class TestGetLicense:
    """Tests for get_license function."""

    def test_get_existing_license(self) -> None:
        """Test getting an existing license."""
        mock_session = MagicMock()
        mock_license = License(id=1, license_data="test_data")

        # Mock the query chain
        mock_session.execute.return_value.scalars.return_value.first.return_value = (
            mock_license
        )

        result = get_license(mock_session)

        assert result is not None
        assert result.license_data == "test_data"
        mock_session.execute.assert_called_once()

    def test_get_no_license(self) -> None:
        """Test getting when no license exists."""
        mock_session = MagicMock()
        mock_session.execute.return_value.scalars.return_value.first.return_value = None

        result = get_license(mock_session)

        assert result is None


class TestUpsertLicense:
    """Tests for upsert_license function."""

    def test_insert_new_license(self) -> None:
        """Test inserting a new license when none exists."""
        mock_session = MagicMock()
        mock_session.execute.return_value.scalars.return_value.first.return_value = None

        upsert_license(mock_session, "new_license_data")

        # Verify add was called with a License object
        mock_session.add.assert_called_once()
        added_license = mock_session.add.call_args[0][0]
        assert isinstance(added_license, License)
        assert added_license.license_data == "new_license_data"

        mock_session.commit.assert_called_once()
        mock_session.refresh.assert_called_once()

    def test_update_existing_license(self) -> None:
        """Test updating an existing license."""
        mock_session = MagicMock()
        existing_license = License(id=1, license_data="old_data")
        mock_session.execute.return_value.scalars.return_value.first.return_value = (
            existing_license
        )

        upsert_license(mock_session, "updated_license_data")

        # Verify the existing license was updated
        assert existing_license.license_data == "updated_license_data"
        mock_session.add.assert_not_called()  # Should not add new
        mock_session.commit.assert_called_once()
        mock_session.refresh.assert_called_once_with(existing_license)


class TestDeleteLicense:
    """Tests for delete_license function."""

    def test_delete_existing_license(self) -> None:
        """Test deleting an existing license."""
        mock_session = MagicMock()
        existing_license = License(id=1, license_data="test_data")
        mock_session.execute.return_value.scalars.return_value.first.return_value = (
            existing_license
        )

        result = delete_license(mock_session)

        assert result is True
        mock_session.delete.assert_called_once_with(existing_license)
        mock_session.commit.assert_called_once()

    def test_delete_no_license(self) -> None:
        """Test deleting when no license exists."""
        mock_session = MagicMock()
        mock_session.execute.return_value.scalars.return_value.first.return_value = None

        result = delete_license(mock_session)

        assert result is False
        mock_session.delete.assert_not_called()
        mock_session.commit.assert_not_called()


def _make_license_metadata(seats: int = 10) -> LicenseMetadata:
    now = datetime.now(timezone.utc)
    return LicenseMetadata(
        tenant_id="public",
        seats=seats,
        used_seats=0,
        plan_type=PlanType.ANNUAL,
        issued_at=now,
        expires_at=now + timedelta(days=365),
        status=ApplicationStatus.ACTIVE,
        source=LicenseSource.MANUAL_UPLOAD,
    )


class TestCheckSeatAvailabilitySelfHosted:
    """Seat checks for self-hosted (MULTI_TENANT=False)."""

    @patch("ee.onyx.db.license.get_license_metadata", return_value=None)
    def test_no_license_means_unlimited(self, _mock_meta: MagicMock) -> None:
        result = check_seat_availability(MagicMock(), seats_needed=1)
        assert result.available is True

    @patch("ee.onyx.db.license.get_used_seats", return_value=5)
    @patch("ee.onyx.db.license.get_license_metadata")
    def test_seats_available(self, mock_meta: MagicMock, _mock_used: MagicMock) -> None:
        mock_meta.return_value = _make_license_metadata(seats=10)
        result = check_seat_availability(MagicMock(), seats_needed=1)
        assert result.available is True

    @patch("ee.onyx.db.license.get_used_seats", return_value=10)
    @patch("ee.onyx.db.license.get_license_metadata")
    def test_seats_full_blocks_creation(
        self, mock_meta: MagicMock, _mock_used: MagicMock
    ) -> None:
        mock_meta.return_value = _make_license_metadata(seats=10)
        result = check_seat_availability(MagicMock(), seats_needed=1)
        assert result.available is False
        assert result.error_message is not None
        assert "10 of 10" in result.error_message

    @patch("ee.onyx.db.license.get_used_seats", return_value=10)
    @patch("ee.onyx.db.license.get_license_metadata")
    def test_exactly_at_capacity_allows_no_more(
        self, mock_meta: MagicMock, _mock_used: MagicMock
    ) -> None:
        """Filling to 100% is allowed; exceeding is not."""
        mock_meta.return_value = _make_license_metadata(seats=10)
        result = check_seat_availability(MagicMock(), seats_needed=1)
        assert result.available is False

    @patch("ee.onyx.db.license.get_used_seats", return_value=9)
    @patch("ee.onyx.db.license.get_license_metadata")
    def test_filling_to_capacity_is_allowed(
        self, mock_meta: MagicMock, _mock_used: MagicMock
    ) -> None:
        mock_meta.return_value = _make_license_metadata(seats=10)
        result = check_seat_availability(MagicMock(), seats_needed=1)
        assert result.available is True


class TestCheckSeatAvailabilityMultiTenant:
    """Seat checks for multi-tenant cloud (MULTI_TENANT=True).

    Verifies that get_used_seats takes the MULTI_TENANT branch
    and delegates to get_tenant_count.
    """

    @patch("ee.onyx.db.license.MULTI_TENANT", True)
    @patch(
        "ee.onyx.server.tenants.user_mapping.get_tenant_count",
        return_value=5,
    )
    @patch("ee.onyx.db.license.get_license_metadata")
    def test_seats_available_multi_tenant(
        self,
        mock_meta: MagicMock,
        mock_tenant_count: MagicMock,
    ) -> None:
        mock_meta.return_value = _make_license_metadata(seats=10)
        result = check_seat_availability(
            MagicMock(), seats_needed=1, tenant_id="tenant-abc"
        )
        assert result.available is True
        mock_tenant_count.assert_called_once_with("tenant-abc")

    @patch("ee.onyx.db.license.MULTI_TENANT", True)
    @patch(
        "ee.onyx.server.tenants.user_mapping.get_tenant_count",
        return_value=10,
    )
    @patch("ee.onyx.db.license.get_license_metadata")
    def test_seats_full_multi_tenant(
        self,
        mock_meta: MagicMock,
        mock_tenant_count: MagicMock,
    ) -> None:
        mock_meta.return_value = _make_license_metadata(seats=10)
        result = check_seat_availability(
            MagicMock(), seats_needed=1, tenant_id="tenant-abc"
        )
        assert result.available is False
        assert result.error_message is not None
        mock_tenant_count.assert_called_once_with("tenant-abc")


================================================
FILE: backend/tests/unit/ee/onyx/db/test_user_group_rename.py
================================================
"""Tests for user group rename DB operation."""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from ee.onyx.db.user_group import rename_user_group
from onyx.db.models import UserGroup


class TestRenameUserGroup:
    """Tests for rename_user_group function."""

    @patch("ee.onyx.db.user_group.DISABLE_VECTOR_DB", False)
    @patch(
        "ee.onyx.db.user_group._mark_user_group__cc_pair_relationships_outdated__no_commit"
    )
    def test_rename_succeeds_and_triggers_sync(
        self, mock_mark_outdated: MagicMock
    ) -> None:
        mock_session = MagicMock()
        mock_group = MagicMock(spec=UserGroup)
        mock_group.name = "Old Name"
        mock_group.is_up_to_date = True
        mock_session.scalar.return_value = mock_group

        result = rename_user_group(mock_session, user_group_id=1, new_name="New Name")

        assert result.name == "New Name"
        assert result.is_up_to_date is False
        mock_mark_outdated.assert_called_once()
        mock_session.commit.assert_called_once()

    def test_rename_group_not_found(self) -> None:
        mock_session = MagicMock()
        mock_session.scalar.return_value = None

        with pytest.raises(ValueError, match="not found"):
            rename_user_group(mock_session, user_group_id=999, new_name="New Name")

        mock_session.commit.assert_not_called()

    def test_rename_group_syncing_raises(self) -> None:
        mock_session = MagicMock()
        mock_group = MagicMock(spec=UserGroup)
        mock_group.is_up_to_date = False
        mock_session.scalar.return_value = mock_group

        with pytest.raises(ValueError, match="currently syncing"):
            rename_user_group(mock_session, user_group_id=1, new_name="New Name")

        mock_session.commit.assert_not_called()


================================================
FILE: backend/tests/unit/ee/onyx/external_permissions/salesforce/test_postprocessing.py
================================================
from datetime import datetime

from ee.onyx.external_permissions.salesforce.postprocessing import (
    censor_salesforce_chunks,
)
from onyx.configs.app_configs import BLURB_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.salesforce.utils import BASE_DATA_PATH
from onyx.context.search.models import InferenceChunk

SQLITE_DIR = BASE_DATA_PATH


def create_test_chunk(
    doc_id: str,
    chunk_id: int,
    content: str,
    source_links: dict[int, str] | None,
) -> InferenceChunk:
    return InferenceChunk(
        document_id=doc_id,
        chunk_id=chunk_id,
        blurb=content[:BLURB_SIZE],
        content=content,
        source_links=source_links,
        section_continuation=False,
        source_type=DocumentSource.SALESFORCE,
        semantic_identifier="test_chunk",
        title="Test Chunk",
        boost=1,
        score=None,
        hidden=False,
        metadata={},
        match_highlights=[],
        updated_at=datetime.now(),
        image_file_id=None,
        doc_summary="",
        chunk_context="",
    )


def test_validate_salesforce_access_single_object() -> None:
    """Test filtering when chunk has a single Salesforce object reference"""

    section = "This is a test document about a Salesforce object."
    test_content = section
    test_chunk = create_test_chunk(
        doc_id="doc1",
        chunk_id=1,
        content=test_content,
        source_links={0: "https://salesforce.com/object1"},
    )

    # Test when user has access
    filtered_chunks = censor_salesforce_chunks(
        chunks=[test_chunk],
        user_email="test@example.com",
        access_map={"object1": True},
    )
    assert len(filtered_chunks) == 1
    assert filtered_chunks[0].content == test_content

    # Test when user doesn't have access
    filtered_chunks = censor_salesforce_chunks(
        chunks=[test_chunk],
        user_email="test@example.com",
        access_map={"object1": False},
    )
    assert len(filtered_chunks) == 0


def test_validate_salesforce_access_multiple_objects() -> None:
    """Test filtering when chunk has multiple Salesforce object references"""
    section1 = "First part about object1. "
    section2 = "Second part about object2. "
    section3 = "Third part about object3."

    test_content = section1 + section2 + section3
    section1_end = len(section1)
    section2_end = section1_end + len(section2)

    test_chunk = create_test_chunk(
        doc_id="doc1",
        chunk_id=1,
        content=test_content,
        source_links={
            0: "https://salesforce.com/object1",
            section1_end: "https://salesforce.com/object2",
            section2_end: "https://salesforce.com/object3",
        },
    )

    # Test when user has access to all objects
    filtered_chunks = censor_salesforce_chunks(
        chunks=[test_chunk],
        user_email="test@example.com",
        access_map={
            "object1": True,
            "object2": True,
            "object3": True,
        },
    )
    assert len(filtered_chunks) == 1
    assert filtered_chunks[0].content == test_content

    # Test when user has access to some objects
    filtered_chunks = censor_salesforce_chunks(
        chunks=[test_chunk],
        user_email="test@example.com",
        access_map={
            "object1": True,
            "object2": False,
            "object3": True,
        },
    )
    assert len(filtered_chunks) == 1
    assert section1 in filtered_chunks[0].content
    assert section2 not in filtered_chunks[0].content
    assert section3 in filtered_chunks[0].content

    # Test when user has no access
    filtered_chunks = censor_salesforce_chunks(
        chunks=[test_chunk],
        user_email="test@example.com",
        access_map={
            "object1": False,
            "object2": False,
            "object3": False,
        },
    )
    assert len(filtered_chunks) == 0


def test_validate_salesforce_access_multiple_chunks() -> None:
    """Test filtering when there are multiple chunks with different access patterns"""
    section1 = "Content about object1"
    section2 = "Content about object2"

    chunk1 = create_test_chunk(
        doc_id="doc1",
        chunk_id=1,
        content=section1,
        source_links={0: "https://salesforce.com/object1"},
    )
    chunk2 = create_test_chunk(
        doc_id="doc1",
        chunk_id=2,
        content=section2,
        source_links={0: "https://salesforce.com/object2"},
    )

    # Test mixed access
    filtered_chunks = censor_salesforce_chunks(
        chunks=[chunk1, chunk2],
        user_email="test@example.com",
        access_map={
            "object1": True,
            "object2": False,
        },
    )
    assert len(filtered_chunks) == 1
    assert filtered_chunks[0].chunk_id == 1
    assert section1 in filtered_chunks[0].content


def test_validate_salesforce_access_no_source_links() -> None:
    """Test handling of chunks with no source links"""
    section = "Content with no source links"
    test_chunk = create_test_chunk(
        doc_id="doc1",
        chunk_id=1,
        content=section,
        source_links=None,
    )

    filtered_chunks = censor_salesforce_chunks(
        chunks=[test_chunk],
        user_email="test@example.com",
        access_map={},
    )
    assert len(filtered_chunks) == 0


def test_validate_salesforce_access_blurb_update() -> None:
    """Test that blurbs are properly updated based on permitted content"""
    section = "First part about object1. "
    long_content = section * 20  # Make it longer than BLURB_SIZE
    test_chunk = create_test_chunk(
        doc_id="doc1",
        chunk_id=1,
        content=long_content,
        source_links={0: "https://salesforce.com/object1"},
    )

    filtered_chunks = censor_salesforce_chunks(
        chunks=[test_chunk],
        user_email="test@example.com",
        access_map={"object1": True},
    )
    assert len(filtered_chunks) == 1
    assert len(filtered_chunks[0].blurb) <= BLURB_SIZE
    assert filtered_chunks[0].blurb.startswith(section)


================================================
FILE: backend/tests/unit/ee/onyx/external_permissions/sharepoint/test_permission_utils.py
================================================
from collections.abc import Generator
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from ee.onyx.external_permissions.sharepoint.permission_utils import (
    _enumerate_ad_groups_paginated,
)
from ee.onyx.external_permissions.sharepoint.permission_utils import (
    _is_public_item,
)
from ee.onyx.external_permissions.sharepoint.permission_utils import (
    _iter_graph_collection,
)
from ee.onyx.external_permissions.sharepoint.permission_utils import (
    _normalize_email,
)
from ee.onyx.external_permissions.sharepoint.permission_utils import (
    AD_GROUP_ENUMERATION_THRESHOLD,
)
from ee.onyx.external_permissions.sharepoint.permission_utils import (
    get_external_access_from_sharepoint,
)
from ee.onyx.external_permissions.sharepoint.permission_utils import (
    get_sharepoint_external_groups,
)
from ee.onyx.external_permissions.sharepoint.permission_utils import GroupsResult


MODULE = "ee.onyx.external_permissions.sharepoint.permission_utils"
GRAPH_API_BASE = "https://graph.microsoft.com/v1.0"


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _fake_token() -> str:
    return "fake-token"


def _make_graph_page(
    items: list[dict[str, Any]],
    next_link: str | None = None,
) -> dict[str, Any]:
    page: dict[str, Any] = {"value": items}
    if next_link:
        page["@odata.nextLink"] = next_link
    return page


# ---------------------------------------------------------------------------
# _normalize_email
# ---------------------------------------------------------------------------


def test_normalize_email_strips_onmicrosoft() -> None:
    assert _normalize_email("user@contoso.onmicrosoft.com") == "user@contoso.com"


def test_normalize_email_noop_for_normal_domain() -> None:
    assert _normalize_email("user@contoso.com") == "user@contoso.com"


# ---------------------------------------------------------------------------
# _iter_graph_collection
# ---------------------------------------------------------------------------


@patch(f"{MODULE}._graph_api_get")
def test_iter_graph_collection_single_page(mock_get: MagicMock) -> None:
    mock_get.return_value = _make_graph_page([{"id": "1"}, {"id": "2"}])

    items = list(_iter_graph_collection("https://graph/items", _fake_token))
    assert items == [{"id": "1"}, {"id": "2"}]
    mock_get.assert_called_once()


@patch(f"{MODULE}._graph_api_get")
def test_iter_graph_collection_multi_page(mock_get: MagicMock) -> None:
    mock_get.side_effect = [
        _make_graph_page([{"id": "1"}], next_link="https://graph/items?page=2"),
        _make_graph_page([{"id": "2"}]),
    ]

    items = list(_iter_graph_collection("https://graph/items", _fake_token))
    assert items == [{"id": "1"}, {"id": "2"}]
    assert mock_get.call_count == 2


@patch(f"{MODULE}._graph_api_get")
def test_iter_graph_collection_empty(mock_get: MagicMock) -> None:
    mock_get.return_value = _make_graph_page([])
    assert list(_iter_graph_collection("https://graph/items", _fake_token)) == []


# ---------------------------------------------------------------------------
# _enumerate_ad_groups_paginated
# ---------------------------------------------------------------------------


def _mock_graph_get_for_enumeration(
    groups: list[dict[str, Any]],
    members_by_group: dict[str, list[dict[str, Any]]],
) -> Generator[dict[str, Any], None, None]:
    """Return a side_effect function for _graph_api_get that serves
    groups on the /groups URL and members on /groups/{id}/members URLs."""

    def side_effect(
        url: str,
        get_access_token: Any,  # noqa: ARG001
        params: dict[str, str] | None = None,  # noqa: ARG001
    ) -> dict[str, Any]:
        if "/members" in url:
            group_id = url.split("/groups/")[1].split("/members")[0]
            return _make_graph_page(members_by_group.get(group_id, []))
        return _make_graph_page(groups)

    return side_effect  # type: ignore[return-value]


@patch(f"{MODULE}._graph_api_get")
def test_enumerate_ad_groups_yields_groups(mock_get: MagicMock) -> None:
    groups = [
        {"id": "g1", "displayName": "Engineering"},
        {"id": "g2", "displayName": "Marketing"},
    ]
    members = {
        "g1": [{"userPrincipalName": "alice@contoso.com"}],
        "g2": [{"mail": "bob@contoso.onmicrosoft.com"}],
    }
    mock_get.side_effect = _mock_graph_get_for_enumeration(groups, members)

    results = list(
        _enumerate_ad_groups_paginated(
            _fake_token, already_resolved=set(), graph_api_base=GRAPH_API_BASE
        )
    )

    assert len(results) == 2
    eng = next(r for r in results if r.id == "Engineering_g1")
    assert eng.user_emails == ["alice@contoso.com"]
    mkt = next(r for r in results if r.id == "Marketing_g2")
    assert mkt.user_emails == ["bob@contoso.com"]


@patch(f"{MODULE}._graph_api_get")
def test_enumerate_ad_groups_skips_already_resolved(mock_get: MagicMock) -> None:
    groups = [{"id": "g1", "displayName": "Engineering"}]
    mock_get.side_effect = _mock_graph_get_for_enumeration(groups, {})

    results = list(
        _enumerate_ad_groups_paginated(
            _fake_token,
            already_resolved={"Engineering_g1"},
            graph_api_base=GRAPH_API_BASE,
        )
    )
    assert results == []


@patch(f"{MODULE}._graph_api_get")
def test_enumerate_ad_groups_circuit_breaker(mock_get: MagicMock) -> None:
    """Enumeration stops after AD_GROUP_ENUMERATION_THRESHOLD groups."""
    over_limit = AD_GROUP_ENUMERATION_THRESHOLD + 5
    groups = [{"id": f"g{i}", "displayName": f"Group{i}"} for i in range(over_limit)]
    mock_get.side_effect = _mock_graph_get_for_enumeration(groups, {})

    results = list(
        _enumerate_ad_groups_paginated(
            _fake_token, already_resolved=set(), graph_api_base=GRAPH_API_BASE
        )
    )
    assert len(results) <= AD_GROUP_ENUMERATION_THRESHOLD


# ---------------------------------------------------------------------------
# get_sharepoint_external_groups
# ---------------------------------------------------------------------------


def _stub_role_assignment_resolution(
    groups_to_emails: dict[str, set[str]],
) -> tuple[MagicMock, MagicMock]:
    """Return (mock_sleep_and_retry, mock_recursive) pre-configured to
    simulate role-assignment group resolution."""
    mock_sleep = MagicMock()
    mock_recursive = MagicMock(
        return_value=GroupsResult(
            groups_to_emails=groups_to_emails,
            found_public_group=False,
        )
    )
    return mock_sleep, mock_recursive


@patch(f"{MODULE}._get_groups_and_members_recursively")
@patch(f"{MODULE}.sleep_and_retry")
def test_default_skips_ad_enumeration(
    mock_sleep: MagicMock,  # noqa: ARG001
    mock_recursive: MagicMock,
) -> None:
    mock_recursive.return_value = GroupsResult(
        groups_to_emails={"SiteGroup_abc": {"alice@contoso.com"}},
        found_public_group=False,
    )

    results = get_sharepoint_external_groups(
        client_context=MagicMock(),
        graph_client=MagicMock(),
        graph_api_base=GRAPH_API_BASE,
    )

    assert len(results) == 1
    assert results[0].id == "SiteGroup_abc"
    assert results[0].user_emails == ["alice@contoso.com"]


@patch(f"{MODULE}._enumerate_ad_groups_paginated")
@patch(f"{MODULE}._get_groups_and_members_recursively")
@patch(f"{MODULE}.sleep_and_retry")
def test_enumerate_all_includes_ad_groups(
    mock_sleep: MagicMock,  # noqa: ARG001
    mock_recursive: MagicMock,
    mock_enum: MagicMock,
) -> None:
    from ee.onyx.db.external_perm import ExternalUserGroup

    mock_recursive.return_value = GroupsResult(
        groups_to_emails={"SiteGroup_abc": {"alice@contoso.com"}},
        found_public_group=False,
    )
    mock_enum.return_value = [
        ExternalUserGroup(id="ADGroup_xyz", user_emails=["bob@contoso.com"]),
    ]

    results = get_sharepoint_external_groups(
        client_context=MagicMock(),
        graph_client=MagicMock(),
        get_access_token=_fake_token,
        enumerate_all_ad_groups=True,
        graph_api_base=GRAPH_API_BASE,
    )

    assert len(results) == 2
    ids = {r.id for r in results}
    assert ids == {"SiteGroup_abc", "ADGroup_xyz"}
    mock_enum.assert_called_once()


@patch(f"{MODULE}._enumerate_ad_groups_paginated")
@patch(f"{MODULE}._get_groups_and_members_recursively")
@patch(f"{MODULE}.sleep_and_retry")
def test_enumerate_all_without_token_skips(
    mock_sleep: MagicMock,  # noqa: ARG001
    mock_recursive: MagicMock,
    mock_enum: MagicMock,
) -> None:
    """Even if enumerate_all_ad_groups=True, no token means skip."""
    mock_recursive.return_value = GroupsResult(
        groups_to_emails={},
        found_public_group=False,
    )

    results = get_sharepoint_external_groups(
        client_context=MagicMock(),
        graph_client=MagicMock(),
        get_access_token=None,
        enumerate_all_ad_groups=True,
        graph_api_base=GRAPH_API_BASE,
    )

    assert results == []
    mock_enum.assert_not_called()


# ---------------------------------------------------------------------------
# get_external_access_from_sharepoint – site page URL handling
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
    "site_base_url, web_url, expected_relative_url",
    [
        (
            "https://tenant.sharepoint.com/sites/Evan%27sSite",
            "https://tenant.sharepoint.com/sites/Evan%27sSite/SitePages/Home.aspx",
            "/sites/Evan%27sSite/SitePages/Home.aspx",
        ),
        (
            "https://tenant.sharepoint.com/sites/NormalSite",
            "https://tenant.sharepoint.com/sites/NormalSite/SitePages/Page.aspx",
            "/sites/NormalSite/SitePages/Page.aspx",
        ),
        (
            "https://tenant.sharepoint.com/sites/Site%20With%20Spaces",
            "https://tenant.sharepoint.com/sites/Site%20With%20Spaces/SitePages/Doc.aspx",
            "/sites/Site%20With%20Spaces/SitePages/Doc.aspx",
        ),
    ],
    ids=["apostrophe-encoded", "no-special-chars", "space-encoded"],
)
@patch(f"{MODULE}._get_groups_and_members_recursively")
@patch(f"{MODULE}.sleep_and_retry")
def test_site_page_url_not_duplicated(
    mock_sleep: MagicMock,  # noqa: ARG001
    mock_recursive: MagicMock,
    site_base_url: str,
    web_url: str,
    expected_relative_url: str,
) -> None:
    """Regression: the server-relative URL passed to
    get_file_by_server_relative_url must preserve percent-encoding so the
    Office365 library's SPResPath.create_relative() recognises the site prefix
    and doesn't duplicate it."""
    mock_recursive.return_value = GroupsResult(
        groups_to_emails={},
        found_public_group=False,
    )

    ctx = MagicMock()
    ctx.base_url = site_base_url

    site_page = {"webUrl": web_url}

    get_external_access_from_sharepoint(
        client_context=ctx,
        graph_client=MagicMock(),
        drive_name=None,
        drive_item=None,
        site_page=site_page,
    )

    ctx.web.get_file_by_server_relative_url.assert_called_once_with(
        expected_relative_url
    )


# ---------------------------------------------------------------------------
# _is_public_item – sharing link visibility
# ---------------------------------------------------------------------------


def _make_permission(scope: str | None) -> MagicMock:
    perm = MagicMock()
    if scope is None:
        perm.link = None
    else:
        perm.link = MagicMock()
        perm.link.scope = scope
    return perm


def _make_drive_item_with_permissions(
    permissions: list[MagicMock],
) -> MagicMock:
    drive_item = MagicMock()
    drive_item.id = "item-123"
    drive_item.permissions.get_all.return_value = permissions
    return drive_item


@patch(f"{MODULE}.sleep_and_retry", side_effect=lambda query, _label: query)
def test_is_public_item_anonymous_link_when_enabled(
    _mock_sleep: MagicMock,
) -> None:
    drive_item = _make_drive_item_with_permissions([_make_permission("anonymous")])
    assert _is_public_item(drive_item, treat_sharing_link_as_public=True) is True


@patch(f"{MODULE}.sleep_and_retry", side_effect=lambda query, _label: query)
def test_is_public_item_org_link_when_enabled(
    _mock_sleep: MagicMock,
) -> None:
    drive_item = _make_drive_item_with_permissions([_make_permission("organization")])
    assert _is_public_item(drive_item, treat_sharing_link_as_public=True) is True


@patch(f"{MODULE}.sleep_and_retry", side_effect=lambda query, _label: query)
def test_is_public_item_anonymous_link_when_disabled(
    _mock_sleep: MagicMock,
) -> None:
    """When the flag is off, anonymous links do NOT make the item public."""
    drive_item = _make_drive_item_with_permissions([_make_permission("anonymous")])
    assert _is_public_item(drive_item, treat_sharing_link_as_public=False) is False


@patch(f"{MODULE}.sleep_and_retry", side_effect=lambda query, _label: query)
def test_is_public_item_org_link_when_disabled(
    _mock_sleep: MagicMock,
) -> None:
    """When the flag is off, org links do NOT make the item public."""
    drive_item = _make_drive_item_with_permissions([_make_permission("organization")])
    assert _is_public_item(drive_item, treat_sharing_link_as_public=False) is False


@patch(f"{MODULE}.sleep_and_retry", side_effect=lambda query, _label: query)
def test_is_public_item_no_sharing_links(
    _mock_sleep: MagicMock,
) -> None:
    """User-level permissions only — not public even when flag is on."""
    drive_item = _make_drive_item_with_permissions([_make_permission(None)])
    assert _is_public_item(drive_item, treat_sharing_link_as_public=True) is False


@patch(f"{MODULE}.sleep_and_retry", side_effect=lambda query, _label: query)
def test_is_public_item_default_is_false(
    _mock_sleep: MagicMock,
) -> None:
    """Default value of the flag is False, so sharing links are ignored."""
    drive_item = _make_drive_item_with_permissions([_make_permission("anonymous")])
    assert _is_public_item(drive_item) is False


def test_is_public_item_skips_api_call_when_disabled() -> None:
    """When the flag is off, the permissions API is never called."""
    drive_item = MagicMock()
    _is_public_item(drive_item, treat_sharing_link_as_public=False)
    drive_item.permissions.get_all.assert_not_called()


# ---------------------------------------------------------------------------
# get_external_access_from_sharepoint – sharing link integration
# ---------------------------------------------------------------------------


@patch(f"{MODULE}._is_public_item", return_value=True)
@patch(f"{MODULE}.sleep_and_retry")
def test_drive_item_public_when_sharing_link_enabled(
    _mock_sleep: MagicMock,
    _mock_is_public: MagicMock,
) -> None:
    """With treat_sharing_link_as_public=True, a public item returns is_public=True
    and skips role-assignment resolution entirely."""
    drive_item = MagicMock()

    result = get_external_access_from_sharepoint(
        client_context=MagicMock(),
        graph_client=MagicMock(),
        drive_name="Documents",
        drive_item=drive_item,
        site_page=None,
        treat_sharing_link_as_public=True,
    )

    assert result.is_public is True
    assert result.external_user_emails == set()
    assert result.external_user_group_ids == set()


@patch(f"{MODULE}._get_groups_and_members_recursively")
@patch(f"{MODULE}.sleep_and_retry")
@patch(f"{MODULE}._is_public_item", return_value=False)
def test_drive_item_falls_through_when_sharing_link_disabled(
    _mock_is_public: MagicMock,
    mock_sleep: MagicMock,  # noqa: ARG001
    mock_recursive: MagicMock,
) -> None:
    """With treat_sharing_link_as_public=False, the function falls through to
    role-assignment-based permission resolution."""
    mock_recursive.return_value = GroupsResult(
        groups_to_emails={"SiteMembers_abc": {"alice@contoso.com"}},
        found_public_group=False,
    )

    result = get_external_access_from_sharepoint(
        client_context=MagicMock(),
        graph_client=MagicMock(),
        drive_name="Documents",
        drive_item=MagicMock(),
        site_page=None,
        treat_sharing_link_as_public=False,
    )

    assert result.is_public is False
    assert len(result.external_user_group_ids) > 0


================================================
FILE: backend/tests/unit/ee/onyx/hooks/__init__.py
================================================


================================================
FILE: backend/tests/unit/ee/onyx/hooks/test_executor.py
================================================
"""Unit tests for the hook executor."""

import json
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import httpx
import pytest
from pydantic import BaseModel

from ee.onyx.hooks.executor import _execute_hook_impl as execute_hook
from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
from onyx.hooks.points.query_processing import QueryProcessingResponse

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_PAYLOAD: dict[str, Any] = {"query": "test", "user_email": "u@example.com"}
# A valid QueryProcessingResponse payload — used by success-path tests.
_RESPONSE_PAYLOAD: dict[str, Any] = {"query": "better test"}


def _make_hook(
    *,
    is_active: bool = True,
    endpoint_url: str | None = "https://hook.example.com/query",
    api_key: MagicMock | None = None,
    timeout_seconds: float = 5.0,
    fail_strategy: HookFailStrategy = HookFailStrategy.SOFT,
    hook_id: int = 1,
    is_reachable: bool | None = None,
    hook_point: HookPoint = HookPoint.QUERY_PROCESSING,
) -> MagicMock:
    hook = MagicMock()
    hook.is_active = is_active
    hook.endpoint_url = endpoint_url
    hook.api_key = api_key
    hook.timeout_seconds = timeout_seconds
    hook.id = hook_id
    hook.fail_strategy = fail_strategy
    hook.is_reachable = is_reachable
    hook.hook_point = hook_point
    return hook


def _make_api_key(value: str) -> MagicMock:
    api_key = MagicMock()
    api_key.get_value.return_value = value
    return api_key


def _make_response(
    *,
    status_code: int = 200,
    json_return: Any = _RESPONSE_PAYLOAD,
    json_side_effect: Exception | None = None,
) -> MagicMock:
    """Build a response mock with controllable json() behaviour."""
    response = MagicMock()
    response.status_code = status_code
    if json_side_effect is not None:
        response.json.side_effect = json_side_effect
    else:
        response.json.return_value = json_return
    return response


def _setup_client(
    mock_client_cls: MagicMock,
    *,
    response: MagicMock | None = None,
    side_effect: Exception | None = None,
) -> MagicMock:
    """Wire up the httpx.Client mock and return the inner client.

    If side_effect is an httpx.HTTPStatusError, it is raised from
    raise_for_status() (matching real httpx behaviour) and post() returns a
    response mock with the matching status_code set.  All other exceptions are
    raised directly from post().
    """
    mock_client = MagicMock()

    if isinstance(side_effect, httpx.HTTPStatusError):
        error_response = MagicMock()
        error_response.status_code = side_effect.response.status_code
        error_response.raise_for_status.side_effect = side_effect
        mock_client.post = MagicMock(return_value=error_response)
    else:
        mock_client.post = MagicMock(
            side_effect=side_effect, return_value=response if not side_effect else None
        )

    mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)
    mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)
    return mock_client


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture()
def db_session() -> MagicMock:
    return MagicMock()


# ---------------------------------------------------------------------------
# Early-exit guards (no HTTP call, no DB writes)
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
    "multi_tenant,hook",
    [
        # MULTI_TENANT=True exits before the DB lookup — hook is irrelevant.
        pytest.param(True, None, id="multi_tenant"),
        pytest.param(False, None, id="hook_not_found"),
        pytest.param(False, _make_hook(is_active=False), id="hook_inactive"),
        pytest.param(False, _make_hook(endpoint_url=None), id="no_endpoint_url"),
    ],
)
def test_early_exit_returns_skipped_with_no_db_writes(
    db_session: MagicMock,
    multi_tenant: bool,
    hook: MagicMock | None,
) -> None:
    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", multi_tenant),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
        patch(
            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
        ) as mock_log,
    ):
        result = execute_hook(
            db_session=db_session,
            hook_point=HookPoint.QUERY_PROCESSING,
            payload=_PAYLOAD,
            response_type=QueryProcessingResponse,
        )

    assert isinstance(result, HookSkipped)
    mock_update.assert_not_called()
    mock_log.assert_not_called()


# ---------------------------------------------------------------------------
# Successful HTTP call
# ---------------------------------------------------------------------------


def test_success_returns_validated_model_and_sets_reachable(
    db_session: MagicMock,
) -> None:
    hook = _make_hook()

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
        patch(
            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, response=_make_response())
        result = execute_hook(
            db_session=db_session,
            hook_point=HookPoint.QUERY_PROCESSING,
            payload=_PAYLOAD,
            response_type=QueryProcessingResponse,
        )

    assert isinstance(result, QueryProcessingResponse)
    assert result.query == _RESPONSE_PAYLOAD["query"]
    _, update_kwargs = mock_update.call_args
    assert update_kwargs["is_reachable"] is True
    mock_log.assert_not_called()


def test_success_skips_reachable_write_when_already_true(db_session: MagicMock) -> None:
    """Deduplication guard: a hook already at is_reachable=True that succeeds
    must not trigger a DB write."""
    hook = _make_hook(is_reachable=True)

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
        patch("ee.onyx.hooks.executor.create_hook_execution_log__no_commit"),
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, response=_make_response())
        result = execute_hook(
            db_session=db_session,
            hook_point=HookPoint.QUERY_PROCESSING,
            payload=_PAYLOAD,
            response_type=QueryProcessingResponse,
        )

    assert isinstance(result, QueryProcessingResponse)
    assert result.query == _RESPONSE_PAYLOAD["query"]
    mock_update.assert_not_called()


def test_non_dict_json_response_is_a_failure(db_session: MagicMock) -> None:
    """response.json() returning a non-dict (e.g. list) must be treated as failure.
    The server responded, so is_reachable is not updated."""
    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
        patch(
            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(
            mock_client_cls,
            response=_make_response(json_return=["unexpected", "list"]),
        )
        result = execute_hook(
            db_session=db_session,
            hook_point=HookPoint.QUERY_PROCESSING,
            payload=_PAYLOAD,
            response_type=QueryProcessingResponse,
        )

    assert isinstance(result, HookSoftFailed)
    _, log_kwargs = mock_log.call_args
    assert log_kwargs["is_success"] is False
    assert "non-dict" in (log_kwargs["error_message"] or "")
    mock_update.assert_not_called()


def test_json_decode_failure_is_a_failure(db_session: MagicMock) -> None:
    """response.json() raising must be treated as failure with SOFT strategy.
    The server responded, so is_reachable is not updated."""
    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
        patch(
            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(
            mock_client_cls,
            response=_make_response(
                json_side_effect=json.JSONDecodeError("not JSON", "", 0)
            ),
        )
        result = execute_hook(
            db_session=db_session,
            hook_point=HookPoint.QUERY_PROCESSING,
            payload=_PAYLOAD,
            response_type=QueryProcessingResponse,
        )

    assert isinstance(result, HookSoftFailed)
    _, log_kwargs = mock_log.call_args
    assert log_kwargs["is_success"] is False
    assert "non-JSON" in (log_kwargs["error_message"] or "")
    mock_update.assert_not_called()


# ---------------------------------------------------------------------------
# HTTP failure paths
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
    "exception,fail_strategy,expected_type,expected_is_reachable",
    [
        # NetworkError → is_reachable=False
        pytest.param(
            httpx.ConnectError("refused"),
            HookFailStrategy.SOFT,
            HookSoftFailed,
            False,
            id="connect_error_soft",
        ),
        pytest.param(
            httpx.ConnectError("refused"),
            HookFailStrategy.HARD,
            OnyxError,
            False,
            id="connect_error_hard",
        ),
        # 401/403 → is_reachable=False (api_key revoked)
        pytest.param(
            httpx.HTTPStatusError(
                "401",
                request=MagicMock(),
                response=MagicMock(status_code=401, text="Unauthorized"),
            ),
            HookFailStrategy.SOFT,
            HookSoftFailed,
            False,
            id="auth_401_soft",
        ),
        pytest.param(
            httpx.HTTPStatusError(
                "403",
                request=MagicMock(),
                response=MagicMock(status_code=403, text="Forbidden"),
            ),
            HookFailStrategy.HARD,
            OnyxError,
            False,
            id="auth_403_hard",
        ),
        # TimeoutException → no is_reachable write (None)
        pytest.param(
            httpx.TimeoutException("timeout"),
            HookFailStrategy.SOFT,
            HookSoftFailed,
            None,
            id="timeout_soft",
        ),
        pytest.param(
            httpx.TimeoutException("timeout"),
            HookFailStrategy.HARD,
            OnyxError,
            None,
            id="timeout_hard",
        ),
        # Other HTTP errors → no is_reachable write (None)
        pytest.param(
            httpx.HTTPStatusError(
                "500",
                request=MagicMock(),
                response=MagicMock(status_code=500, text="error"),
            ),
            HookFailStrategy.SOFT,
            HookSoftFailed,
            None,
            id="http_status_error_soft",
        ),
        pytest.param(
            httpx.HTTPStatusError(
                "500",
                request=MagicMock(),
                response=MagicMock(status_code=500, text="error"),
            ),
            HookFailStrategy.HARD,
            OnyxError,
            None,
            id="http_status_error_hard",
        ),
    ],
)
def test_http_failure_paths(
    db_session: MagicMock,
    exception: Exception,
    fail_strategy: HookFailStrategy,
    expected_type: type,
    expected_is_reachable: bool | None,
) -> None:
    hook = _make_hook(fail_strategy=fail_strategy)

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
        patch("ee.onyx.hooks.executor.create_hook_execution_log__no_commit"),
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, side_effect=exception)

        if expected_type is OnyxError:
            with pytest.raises(OnyxError) as exc_info:
                execute_hook(
                    db_session=db_session,
                    hook_point=HookPoint.QUERY_PROCESSING,
                    payload=_PAYLOAD,
                    response_type=QueryProcessingResponse,
                )
            assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
        else:
            result = execute_hook(
                db_session=db_session,
                hook_point=HookPoint.QUERY_PROCESSING,
                payload=_PAYLOAD,
                response_type=QueryProcessingResponse,
            )
            assert isinstance(result, expected_type)

    if expected_is_reachable is None:
        mock_update.assert_not_called()
    else:
        mock_update.assert_called_once()
        _, kwargs = mock_update.call_args
        assert kwargs["is_reachable"] is expected_is_reachable


# ---------------------------------------------------------------------------
# Authorization header
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
    "api_key_value,expect_auth_header",
    [
        pytest.param("secret-token", True, id="api_key_present"),
        pytest.param(None, False, id="api_key_absent"),
    ],
)
def test_authorization_header(
    db_session: MagicMock,
    api_key_value: str | None,
    expect_auth_header: bool,
) -> None:
    api_key = _make_api_key(api_key_value) if api_key_value else None
    hook = _make_hook(api_key=api_key)

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch("ee.onyx.hooks.executor.update_hook__no_commit"),
        patch("ee.onyx.hooks.executor.create_hook_execution_log__no_commit"),
        patch("httpx.Client") as mock_client_cls,
    ):
        mock_client = _setup_client(mock_client_cls, response=_make_response())
        execute_hook(
            db_session=db_session,
            hook_point=HookPoint.QUERY_PROCESSING,
            payload=_PAYLOAD,
            response_type=QueryProcessingResponse,
        )

    _, call_kwargs = mock_client.post.call_args
    if expect_auth_header:
        assert call_kwargs["headers"]["Authorization"] == f"Bearer {api_key_value}"
    else:
        assert "Authorization" not in call_kwargs["headers"]


# ---------------------------------------------------------------------------
# Persist session failure
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
    "http_exception,expect_onyx_error",
    [
        pytest.param(None, False, id="success_path"),
        pytest.param(httpx.ConnectError("refused"), True, id="hard_fail_path"),
    ],
)
def test_persist_session_failure_is_swallowed(
    db_session: MagicMock,
    http_exception: Exception | None,
    expect_onyx_error: bool,
) -> None:
    """DB session failure in _persist_result must not mask the real return value or OnyxError."""
    hook = _make_hook(fail_strategy=HookFailStrategy.HARD)

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch(
            "ee.onyx.hooks.executor.get_session_with_current_tenant",
            side_effect=RuntimeError("DB unavailable"),
        ),
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(
            mock_client_cls,
            response=_make_response() if not http_exception else None,
            side_effect=http_exception,
        )

        if expect_onyx_error:
            with pytest.raises(OnyxError) as exc_info:
                execute_hook(
                    db_session=db_session,
                    hook_point=HookPoint.QUERY_PROCESSING,
                    payload=_PAYLOAD,
                    response_type=QueryProcessingResponse,
                )
            assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
        else:
            result = execute_hook(
                db_session=db_session,
                hook_point=HookPoint.QUERY_PROCESSING,
                payload=_PAYLOAD,
                response_type=QueryProcessingResponse,
            )
            assert isinstance(result, QueryProcessingResponse)
            assert result.query == _RESPONSE_PAYLOAD["query"]


# ---------------------------------------------------------------------------
# Response model validation
# ---------------------------------------------------------------------------


class _StrictResponse(BaseModel):
    """Strict model used to reliably trigger a ValidationError in tests."""

    required_field: str  # no default → missing key raises ValidationError


@pytest.mark.parametrize(
    "fail_strategy,expected_type",
    [
        pytest.param(
            HookFailStrategy.SOFT, HookSoftFailed, id="validation_failure_soft"
        ),
        pytest.param(HookFailStrategy.HARD, OnyxError, id="validation_failure_hard"),
    ],
)
def test_response_validation_failure_respects_fail_strategy(
    db_session: MagicMock,
    fail_strategy: HookFailStrategy,
    expected_type: type,
) -> None:
    """A response that fails response_model validation is treated like any other
    hook failure: logged, is_reachable left unchanged, fail_strategy respected."""
    hook = _make_hook(fail_strategy=fail_strategy)

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
        patch(
            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        # Response payload is missing required_field → ValidationError
        _setup_client(mock_client_cls, response=_make_response(json_return={}))

        if expected_type is OnyxError:
            with pytest.raises(OnyxError) as exc_info:
                execute_hook(
                    db_session=db_session,
                    hook_point=HookPoint.QUERY_PROCESSING,
                    payload=_PAYLOAD,
                    response_type=_StrictResponse,
                )
            assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
        else:
            result = execute_hook(
                db_session=db_session,
                hook_point=HookPoint.QUERY_PROCESSING,
                payload=_PAYLOAD,
                response_type=_StrictResponse,
            )
            assert isinstance(result, HookSoftFailed)

    # is_reachable must not be updated — server responded correctly
    mock_update.assert_not_called()
    # failure must be logged
    mock_log.assert_called_once()
    _, log_kwargs = mock_log.call_args
    assert log_kwargs["is_success"] is False
    assert "validation" in (log_kwargs["error_message"] or "").lower()


# ---------------------------------------------------------------------------
# Outer soft-fail guard in execute_hook
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
    "fail_strategy,expected_type",
    [
        pytest.param(HookFailStrategy.SOFT, HookSoftFailed, id="unexpected_exc_soft"),
        pytest.param(HookFailStrategy.HARD, ValueError, id="unexpected_exc_hard"),
    ],
)
def test_unexpected_exception_in_inner_respects_fail_strategy(
    db_session: MagicMock,
    fail_strategy: HookFailStrategy,
    expected_type: type,
) -> None:
    """An unexpected exception raised by _execute_hook_inner (not an OnyxError from
    HARD fail — e.g. a bug or an assertion error) must be swallowed and return
    HookSoftFailed for SOFT strategy, or re-raised for HARD strategy."""
    hook = _make_hook(fail_strategy=fail_strategy)

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch(
            "ee.onyx.hooks.executor._execute_hook_inner",
            side_effect=ValueError("unexpected bug"),
        ),
    ):
        if expected_type is HookSoftFailed:
            result = execute_hook(
                db_session=db_session,
                hook_point=HookPoint.QUERY_PROCESSING,
                payload=_PAYLOAD,
                response_type=QueryProcessingResponse,
            )
            assert isinstance(result, HookSoftFailed)
        else:
            with pytest.raises(ValueError, match="unexpected bug"):
                execute_hook(
                    db_session=db_session,
                    hook_point=HookPoint.QUERY_PROCESSING,
                    payload=_PAYLOAD,
                    response_type=QueryProcessingResponse,
                )


def test_is_reachable_failure_does_not_prevent_log(db_session: MagicMock) -> None:
    """is_reachable update failing (e.g. concurrent hook deletion) must not
    prevent the execution log from being written.

    Simulates the production failure path: update_hook__no_commit raises
    OnyxError(NOT_FOUND) as it would if the hook was concurrently deleted
    between the initial lookup and the reachable update.
    """
    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)

    with (
        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch(
            "ee.onyx.hooks.executor.update_hook__no_commit",
            side_effect=OnyxError(OnyxErrorCode.NOT_FOUND, "hook deleted"),
        ),
        patch(
            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, side_effect=httpx.ConnectError("refused"))
        result = execute_hook(
            db_session=db_session,
            hook_point=HookPoint.QUERY_PROCESSING,
            payload=_PAYLOAD,
            response_type=QueryProcessingResponse,
        )

    assert isinstance(result, HookSoftFailed)
    mock_log.assert_called_once()


================================================
FILE: backend/tests/unit/ee/onyx/server/__init__.py
================================================


================================================
FILE: backend/tests/unit/ee/onyx/server/billing/__init__.py
================================================


================================================
FILE: backend/tests/unit/ee/onyx/server/billing/conftest.py
================================================
"""Shared fixtures and utilities for billing tests."""

from datetime import datetime
from datetime import timezone
from unittest.mock import AsyncMock
from unittest.mock import MagicMock

import pytest

from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import PlanType


@pytest.fixture
def mock_license_payload() -> LicensePayload:
    """Create a valid LicensePayload for testing."""
    return make_license_payload()


@pytest.fixture
def mock_expired_license_payload() -> LicensePayload:
    """Create an expired LicensePayload for testing."""
    return make_license_payload(expired=True)


def make_license_payload(
    tenant_id: str = "tenant_123",
    seats: int = 10,
    expired: bool = False,
) -> LicensePayload:
    """Create a LicensePayload for testing.

    Args:
        tenant_id: The tenant ID
        seats: Number of seats
        expired: If True, creates an expired license
    """
    now = datetime.now(timezone.utc)
    expires_at = (
        datetime(2020, 1, 1, tzinfo=timezone.utc)
        if expired
        else datetime(2030, 1, 1, tzinfo=timezone.utc)
    )

    return LicensePayload(
        version="1.0",
        tenant_id=tenant_id,
        issued_at=now,
        expires_at=expires_at,
        seats=seats,
        plan_type=PlanType.MONTHLY,
    )


def make_mock_response(json_data: dict) -> MagicMock:
    """Create a mock httpx response.

    Args:
        json_data: The JSON data to return from response.json()
    """
    mock_response = MagicMock()
    mock_response.json.return_value = json_data
    mock_response.raise_for_status = MagicMock()
    return mock_response


def make_mock_http_client(
    method: str = "post",
    response: MagicMock | None = None,
    side_effect: Exception | None = None,
) -> MagicMock:
    """Create a mock httpx.AsyncClient context manager.

    Args:
        method: HTTP method to mock ("get" or "post")
        response: Mock response to return
        side_effect: Exception to raise instead of returning response
    """
    mock_client = MagicMock()
    mock_method = AsyncMock(return_value=response, side_effect=side_effect)
    setattr(mock_client.return_value.__aenter__.return_value, method, mock_method)
    return mock_client


================================================
FILE: backend/tests/unit/ee/onyx/server/billing/test_billing_api.py
================================================
"""Tests for the unified billing API endpoints."""

from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from ee.onyx.server.billing.models import BillingInformationResponse
from ee.onyx.server.billing.models import CreateCheckoutSessionResponse
from ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse
from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.billing.models import SubscriptionStatusResponse
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError


class TestCreateCheckoutSession:
    """Tests for create_checkout_session endpoint."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.create_checkout_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_creates_checkout_session_cloud(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
    ) -> None:
        """Should create checkout session for cloud deployment."""
        from ee.onyx.server.billing.api import create_checkout_session
        from ee.onyx.server.billing.models import CreateCheckoutSessionRequest

        mock_get_license.return_value = None
        mock_get_tenant.return_value = "tenant_123"
        mock_service.return_value = CreateCheckoutSessionResponse(
            stripe_checkout_url="https://checkout.stripe.com/session"
        )

        request = CreateCheckoutSessionRequest(billing_period="monthly")
        result = await create_checkout_session(
            request=request, _=MagicMock(), db_session=MagicMock()
        )

        assert result.stripe_checkout_url == "https://checkout.stripe.com/session"
        mock_service.assert_called_once()

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.create_checkout_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_creates_checkout_session_self_hosted(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
    ) -> None:
        """Should create checkout session for self-hosted with license."""
        from ee.onyx.server.billing.api import create_checkout_session
        from ee.onyx.server.billing.models import CreateCheckoutSessionRequest

        mock_get_license.return_value = "license_data_blob"
        mock_get_tenant.return_value = None
        mock_service.return_value = CreateCheckoutSessionResponse(
            stripe_checkout_url="https://checkout.stripe.com/session"
        )

        request = CreateCheckoutSessionRequest(
            billing_period="annual", email="test@example.com"
        )
        result = await create_checkout_session(
            request=request, _=MagicMock(), db_session=MagicMock()
        )

        assert result.stripe_checkout_url == "https://checkout.stripe.com/session"
        call_kwargs = mock_service.call_args[1]
        assert call_kwargs["billing_period"] == "annual"
        assert call_kwargs["email"] == "test@example.com"
        assert call_kwargs["license_data"] == "license_data_blob"

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.create_checkout_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_raises_on_service_error(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
    ) -> None:
        """Should propagate OnyxError when service fails."""
        from ee.onyx.server.billing.api import create_checkout_session

        mock_get_license.return_value = None
        mock_get_tenant.return_value = "tenant_123"
        mock_service.side_effect = OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            "Stripe error",
            status_code_override=502,
        )

        with pytest.raises(OnyxError) as exc_info:
            await create_checkout_session(
                request=None, _=MagicMock(), db_session=MagicMock()
            )

        assert exc_info.value.status_code == 502
        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY
        assert exc_info.value.detail == "Stripe error"


class TestCreateCustomerPortalSession:
    """Tests for create_customer_portal_session endpoint."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.billing.api.create_portal_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_requires_license_for_self_hosted(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,  # noqa: ARG002
    ) -> None:
        """Should reject self-hosted without license."""
        from ee.onyx.server.billing.api import create_customer_portal_session

        mock_get_license.return_value = None
        mock_get_tenant.return_value = None

        with pytest.raises(OnyxError) as exc_info:
            await create_customer_portal_session(
                request=None, _=MagicMock(), db_session=MagicMock()
            )

        assert exc_info.value.status_code == 400
        assert exc_info.value.error_code is OnyxErrorCode.VALIDATION_ERROR
        assert exc_info.value.detail == "No license found"

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.create_portal_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_creates_portal_session(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
    ) -> None:
        """Should create portal session with valid license."""
        from ee.onyx.server.billing.api import create_customer_portal_session

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_service.return_value = CreateCustomerPortalSessionResponse(
            stripe_customer_portal_url="https://billing.stripe.com/portal"
        )

        result = await create_customer_portal_session(
            request=None, _=MagicMock(), db_session=MagicMock()
        )

        assert result.stripe_customer_portal_url == "https://billing.stripe.com/portal"


class TestGetBillingInformation:
    """Tests for get_billing_information endpoint."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_returns_not_subscribed_without_license(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
    ) -> None:
        """Should return subscribed=False for self-hosted without license."""
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = None
        mock_get_tenant.return_value = None

        result = await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert isinstance(result, SubscriptionStatusResponse)
        assert result.subscribed is False

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.get_billing_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_returns_billing_info(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
    ) -> None:
        """Should return billing information with valid license."""
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_service.return_value = BillingInformationResponse(
            tenant_id="tenant_123",
            status="active",
            seats=10,
        )

        result = await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert isinstance(result, BillingInformationResponse)
        assert result.tenant_id == "tenant_123"
        assert result.status == "active"
        assert result.seats == 10


class TestUpdateSeats:
    """Tests for update_seats endpoint."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_requires_license_for_self_hosted(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
    ) -> None:
        """Should reject self-hosted without license."""
        from ee.onyx.server.billing.api import update_seats
        from ee.onyx.server.billing.models import SeatUpdateRequest

        mock_get_license.return_value = None
        mock_get_tenant.return_value = None

        request = SeatUpdateRequest(new_seat_count=10)

        with pytest.raises(OnyxError) as exc_info:
            await update_seats(request=request, _=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 400
        assert exc_info.value.error_code is OnyxErrorCode.VALIDATION_ERROR
        assert exc_info.value.detail == "No license found"

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.get_used_seats")
    @patch("ee.onyx.server.billing.api.update_seat_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_updates_seats_successfully(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
        mock_get_used_seats: MagicMock,
    ) -> None:
        """Should update seats with valid license."""
        from ee.onyx.server.billing.api import update_seats
        from ee.onyx.server.billing.models import SeatUpdateRequest

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_get_used_seats.return_value = 5
        mock_service.return_value = SeatUpdateResponse(
            success=True,
            current_seats=15,
            used_seats=5,
            message="Seats updated to 15",
        )

        request = SeatUpdateRequest(new_seat_count=15)
        result = await update_seats(
            request=request, _=MagicMock(), db_session=MagicMock()
        )

        assert result.success is True
        assert result.current_seats == 15
        assert result.used_seats == 5
        mock_service.assert_called_once_with(
            new_seat_count=15,
            license_data="license_blob",
            tenant_id=None,
        )

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.get_used_seats")
    @patch("ee.onyx.server.billing.api.update_seat_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_handles_billing_service_error(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
        mock_get_used_seats: MagicMock,
    ) -> None:
        """Should propagate OnyxError from service layer."""
        from ee.onyx.server.billing.api import update_seats
        from ee.onyx.server.billing.models import SeatUpdateRequest

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_get_used_seats.return_value = 0
        mock_service.side_effect = OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            "Cannot reduce below 10 seats",
            status_code_override=400,
        )

        request = SeatUpdateRequest(new_seat_count=5)

        with pytest.raises(OnyxError) as exc_info:
            await update_seats(request=request, _=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 400
        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY
        assert exc_info.value.detail == "Cannot reduce below 10 seats"


class TestCircuitBreaker:
    """Tests for the billing circuit breaker functionality."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.billing.api._is_billing_circuit_open")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_returns_503_when_circuit_open(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_circuit_open: MagicMock,
    ) -> None:
        """Should return 503 when circuit breaker is open."""
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open.return_value = True

        with pytest.raises(OnyxError) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 503
        assert exc_info.value.error_code is OnyxErrorCode.SERVICE_UNAVAILABLE
        assert "Connect to Stripe" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.billing.api._open_billing_circuit")
    @patch("ee.onyx.server.billing.api._is_billing_circuit_open")
    @patch("ee.onyx.server.billing.api.get_billing_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_opens_circuit_on_502_error(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
        mock_circuit_open_check: MagicMock,
        mock_open_circuit: MagicMock,
    ) -> None:
        """Should open circuit breaker on 502 error."""
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open_check.return_value = False
        mock_service.side_effect = OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            "Connection failed",
            status_code_override=502,
        )

        with pytest.raises(OnyxError) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 502
        mock_open_circuit.assert_called_once()

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.billing.api._open_billing_circuit")
    @patch("ee.onyx.server.billing.api._is_billing_circuit_open")
    @patch("ee.onyx.server.billing.api.get_billing_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_opens_circuit_on_503_error(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
        mock_circuit_open_check: MagicMock,
        mock_open_circuit: MagicMock,
    ) -> None:
        """Should open circuit breaker on 503 error."""
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open_check.return_value = False
        mock_service.side_effect = OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            "Service unavailable",
            status_code_override=503,
        )

        with pytest.raises(OnyxError) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 503
        mock_open_circuit.assert_called_once()

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.billing.api._open_billing_circuit")
    @patch("ee.onyx.server.billing.api._is_billing_circuit_open")
    @patch("ee.onyx.server.billing.api.get_billing_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_opens_circuit_on_504_error(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
        mock_circuit_open_check: MagicMock,
        mock_open_circuit: MagicMock,
    ) -> None:
        """Should open circuit breaker on 504 error."""
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open_check.return_value = False
        mock_service.side_effect = OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            "Gateway timeout",
            status_code_override=504,
        )

        with pytest.raises(OnyxError) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 504
        mock_open_circuit.assert_called_once()

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.billing.api._open_billing_circuit")
    @patch("ee.onyx.server.billing.api._is_billing_circuit_open")
    @patch("ee.onyx.server.billing.api.get_billing_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_does_not_open_circuit_on_400_error(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
        mock_circuit_open_check: MagicMock,
        mock_open_circuit: MagicMock,
    ) -> None:
        """Should NOT open circuit breaker on 400 error (client error)."""
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open_check.return_value = False
        mock_service.side_effect = OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            "Bad request",
            status_code_override=400,
        )

        with pytest.raises(OnyxError) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 400
        mock_open_circuit.assert_not_called()


class TestResetConnection:
    """Tests for reset_stripe_connection endpoint."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.billing.api._close_billing_circuit")
    async def test_closes_circuit_for_self_hosted(
        self,
        mock_close_circuit: MagicMock,
    ) -> None:
        """Should close circuit breaker for self-hosted deployment."""
        from ee.onyx.server.billing.api import reset_stripe_connection

        result = await reset_stripe_connection(_=MagicMock())

        assert result.success is True
        assert "re-enabled" in result.message.lower()
        mock_close_circuit.assert_called_once()

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", True)
    @patch("ee.onyx.server.billing.api._close_billing_circuit")
    async def test_noop_for_cloud(
        self,
        mock_close_circuit: MagicMock,
    ) -> None:
        """Should be no-op for cloud deployment."""
        from ee.onyx.server.billing.api import reset_stripe_connection

        result = await reset_stripe_connection(_=MagicMock())

        assert result.success is True
        assert "not applicable" in result.message.lower()
        mock_close_circuit.assert_not_called()


class TestCheckoutSessionWithSeats:
    """Tests for checkout session with seats parameter."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.get_used_seats")
    @patch("ee.onyx.server.billing.api.create_checkout_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_passes_seats_parameter(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
        mock_get_used_seats: MagicMock,
    ) -> None:
        """Should pass seats parameter to service."""
        from ee.onyx.server.billing.api import create_checkout_session
        from ee.onyx.server.billing.models import CreateCheckoutSessionRequest

        mock_get_license.return_value = None
        mock_get_tenant.return_value = "tenant_123"
        mock_get_used_seats.return_value = 5
        mock_service.return_value = CreateCheckoutSessionResponse(
            stripe_checkout_url="https://checkout.stripe.com/session"
        )

        request = CreateCheckoutSessionRequest(billing_period="monthly", seats=25)
        await create_checkout_session(
            request=request, _=MagicMock(), db_session=MagicMock()
        )

        call_kwargs = mock_service.call_args[1]
        assert call_kwargs["seats"] == 25

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.create_checkout_service")
    @patch("ee.onyx.server.billing.api._get_tenant_id")
    @patch("ee.onyx.server.billing.api._get_license_data")
    async def test_seats_none_when_not_provided(
        self,
        mock_get_license: MagicMock,
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
    ) -> None:
        """Should pass None for seats when not provided."""
        from ee.onyx.server.billing.api import create_checkout_session
        from ee.onyx.server.billing.models import CreateCheckoutSessionRequest

        mock_get_license.return_value = None
        mock_get_tenant.return_value = "tenant_123"
        mock_service.return_value = CreateCheckoutSessionResponse(
            stripe_checkout_url="https://checkout.stripe.com/session"
        )

        request = CreateCheckoutSessionRequest(billing_period="annual")
        await create_checkout_session(
            request=request, _=MagicMock(), db_session=MagicMock()
        )

        call_kwargs = mock_service.call_args[1]
        assert call_kwargs["seats"] is None


================================================
FILE: backend/tests/unit/ee/onyx/server/billing/test_billing_service.py
================================================
"""Tests for the billing service layer."""

from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import httpx
import pytest

from .conftest import make_mock_http_client
from .conftest import make_mock_response
from ee.onyx.server.billing.models import BillingInformationResponse
from ee.onyx.server.billing.models import CreateCheckoutSessionResponse
from ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse
from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.billing.models import SubscriptionStatusResponse
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError


class TestMakeBillingRequest:
    """Tests for the _make_billing_request helper."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._get_headers")
    @patch("ee.onyx.server.billing.service._get_base_url")
    async def test_makes_post_request(
        self,
        mock_base_url: MagicMock,
        mock_headers: MagicMock,
    ) -> None:
        """Should make POST request with body."""
        from ee.onyx.server.billing.service import _make_billing_request

        mock_base_url.return_value = "https://api.example.com"
        mock_headers.return_value = {"Authorization": "Bearer token"}
        mock_response = make_mock_response({"success": True})
        mock_client = make_mock_http_client("post", response=mock_response)

        with patch("httpx.AsyncClient", mock_client):
            result = await _make_billing_request(
                method="POST",
                path="/test-endpoint",
                body={"key": "value"},
            )

        assert result == {"success": True}

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._get_headers")
    @patch("ee.onyx.server.billing.service._get_base_url")
    async def test_makes_get_request(
        self,
        mock_base_url: MagicMock,
        mock_headers: MagicMock,
    ) -> None:
        """Should make GET request with params."""
        from ee.onyx.server.billing.service import _make_billing_request

        mock_base_url.return_value = "https://api.example.com"
        mock_headers.return_value = {"Authorization": "Bearer token"}
        mock_response = make_mock_response({"data": "test"})
        mock_client = make_mock_http_client("get", response=mock_response)

        with patch("httpx.AsyncClient", mock_client):
            result = await _make_billing_request(
                method="GET",
                path="/test-endpoint",
                params={"tenant_id": "123"},
            )

        assert result == {"data": "test"}

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._get_headers")
    @patch("ee.onyx.server.billing.service._get_base_url")
    async def test_raises_on_http_error(
        self,
        mock_base_url: MagicMock,
        mock_headers: MagicMock,
    ) -> None:
        """Should raise OnyxError on HTTP error."""
        from ee.onyx.server.billing.service import _make_billing_request

        mock_base_url.return_value = "https://api.example.com"
        mock_headers.return_value = {}
        mock_response = make_mock_response({"detail": "Bad request"})
        mock_response.status_code = 400
        error = httpx.HTTPStatusError(
            "Error", request=MagicMock(), response=mock_response
        )
        mock_client = make_mock_http_client("post", side_effect=error)

        with patch("httpx.AsyncClient", mock_client):
            with pytest.raises(OnyxError) as exc_info:
                await _make_billing_request(
                    method="POST",
                    path="/test",
                    error_message="Test failed",
                )

        assert exc_info.value.status_code == 400
        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY
        assert "Bad request" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._get_headers")
    @patch("ee.onyx.server.billing.service._get_base_url")
    async def test_follows_redirects(
        self,
        mock_base_url: MagicMock,
        mock_headers: MagicMock,
    ) -> None:
        """AsyncClient must be created with follow_redirects=True.

        The target server (cloud data plane for self-hosted, control
        plane for cloud) may sit behind nginx that returns 308
        (HTTP→HTTPS). httpx does not follow redirects by default,
        so we must explicitly opt in.
        """
        from ee.onyx.server.billing.service import _make_billing_request

        mock_base_url.return_value = "http://api.example.com"
        mock_headers.return_value = {"Authorization": "Bearer token"}
        mock_response = make_mock_response({"ok": True})
        mock_client = make_mock_http_client("get", response=mock_response)

        with patch("httpx.AsyncClient", mock_client):
            await _make_billing_request(method="GET", path="/test")

        mock_client.assert_called_once_with(timeout=30.0, follow_redirects=True)

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._get_headers")
    @patch("ee.onyx.server.billing.service._get_base_url")
    async def test_raises_on_connection_error(
        self,
        mock_base_url: MagicMock,
        mock_headers: MagicMock,
    ) -> None:
        """Should raise OnyxError on connection error."""
        from ee.onyx.server.billing.service import _make_billing_request

        mock_base_url.return_value = "https://api.example.com"
        mock_headers.return_value = {}
        error = httpx.RequestError("Connection failed")
        mock_client = make_mock_http_client("post", side_effect=error)

        with patch("httpx.AsyncClient", mock_client):
            with pytest.raises(OnyxError) as exc_info:
                await _make_billing_request(method="POST", path="/test")

        assert exc_info.value.status_code == 502
        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY
        assert "Failed to connect" in exc_info.value.detail


class TestCreateCheckoutSession:
    """Tests for create_checkout_session service function."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._make_billing_request")
    async def test_creates_checkout_session(
        self,
        mock_request: AsyncMock,
    ) -> None:
        """Should create checkout session and return URL."""
        from ee.onyx.server.billing.service import create_checkout_session

        mock_request.return_value = {"url": "https://checkout.stripe.com/session"}

        result = await create_checkout_session(
            billing_period="monthly",
            email="test@example.com",
            license_data="license_blob",
            redirect_url="https://app.example.com/success",
        )

        assert isinstance(result, CreateCheckoutSessionResponse)
        assert result.stripe_checkout_url == "https://checkout.stripe.com/session"

        call_kwargs = mock_request.call_args[1]
        assert call_kwargs["method"] == "POST"
        assert call_kwargs["path"] == "/create-checkout-session"
        assert call_kwargs["body"]["billing_period"] == "monthly"
        assert call_kwargs["body"]["email"] == "test@example.com"


class TestCreateCustomerPortalSession:
    """Tests for create_customer_portal_session service function."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._make_billing_request")
    async def test_creates_portal_session(
        self,
        mock_request: AsyncMock,
    ) -> None:
        """Should create portal session and return URL."""
        from ee.onyx.server.billing.service import create_customer_portal_session

        mock_request.return_value = {"url": "https://billing.stripe.com/portal"}

        result = await create_customer_portal_session(
            license_data="license_blob",
            return_url="https://app.example.com/billing",
        )

        assert isinstance(result, CreateCustomerPortalSessionResponse)
        assert result.stripe_customer_portal_url == "https://billing.stripe.com/portal"


class TestGetBillingInformation:
    """Tests for get_billing_information service function."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._make_billing_request")
    async def test_returns_billing_info(
        self,
        mock_request: AsyncMock,
    ) -> None:
        """Should return billing information."""
        from ee.onyx.server.billing.service import get_billing_information

        mock_request.return_value = {
            "tenant_id": "tenant_123",
            "status": "active",
            "seats": 10,
            "billing_period": "monthly",
        }

        result = await get_billing_information(license_data="license_blob")

        assert isinstance(result, BillingInformationResponse)
        assert result.tenant_id == "tenant_123"
        assert result.status == "active"
        assert result.seats == 10

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._make_billing_request")
    async def test_returns_not_subscribed(
        self,
        mock_request: AsyncMock,
    ) -> None:
        """Should return SubscriptionStatusResponse when not subscribed."""
        from ee.onyx.server.billing.service import get_billing_information

        mock_request.return_value = {"subscribed": False}

        result = await get_billing_information(license_data="license_blob")

        assert isinstance(result, SubscriptionStatusResponse)
        assert result.subscribed is False


class TestUpdateSeatCount:
    """Tests for update_seat_count service function."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._make_billing_request")
    async def test_updates_seats(
        self,
        mock_request: AsyncMock,
    ) -> None:
        """Should update seat count and return response."""
        from ee.onyx.server.billing.service import update_seat_count

        mock_request.return_value = {
            "success": True,
            "current_seats": 15,
            "used_seats": 5,
            "message": "Seats updated to 15",
        }

        result = await update_seat_count(
            new_seat_count=15,
            license_data="license_blob",
        )

        assert isinstance(result, SeatUpdateResponse)
        assert result.success is True
        assert result.current_seats == 15
        assert result.used_seats == 5

        call_kwargs = mock_request.call_args[1]
        assert call_kwargs["body"]["new_seat_count"] == 15

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.service._make_billing_request")
    async def test_includes_tenant_id_for_cloud(
        self,
        mock_request: AsyncMock,
    ) -> None:
        """Should include tenant_id in body for cloud deployments."""
        from ee.onyx.server.billing.service import update_seat_count

        mock_request.return_value = {
            "success": True,
            "current_seats": 10,
            "used_seats": 5,
        }

        with patch("ee.onyx.server.billing.service.MULTI_TENANT", True):
            await update_seat_count(
                new_seat_count=10,
                tenant_id="tenant_123",
            )

        call_kwargs = mock_request.call_args[1]
        assert call_kwargs["body"]["tenant_id"] == "tenant_123"


================================================
FILE: backend/tests/unit/ee/onyx/server/billing/test_proxy.py
================================================
"""Tests for the billing proxy endpoints."""

from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import httpx
import pytest

from .conftest import make_license_payload
from .conftest import make_mock_http_client
from .conftest import make_mock_response
from ee.onyx.server.license.models import LicensePayload


class TestProxySeatUpdate:
    """Tests for proxy_seat_update endpoint."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.proxy.forward_to_control_plane")
    async def test_proxies_seat_update(
        self,
        mock_forward: AsyncMock,
    ) -> None:
        """Should forward seat update request to control plane."""
        from ee.onyx.server.billing.models import SeatUpdateRequest
        from ee.onyx.server.tenants.proxy import proxy_seat_update

        mock_forward.return_value = {
            "success": True,
            "current_seats": 15,
            "used_seats": 5,
            "message": "Seats updated",
        }

        license_payload = make_license_payload(tenant_id="tenant_123", seats=10)

        request = SeatUpdateRequest(new_seat_count=15)
        result = await proxy_seat_update(
            request_body=request,
            license_payload=license_payload,
        )

        assert result.success is True
        assert result.current_seats == 15
        assert result.used_seats == 5

        mock_forward.assert_called_once_with(
            "POST",
            "/seats/update",
            body={
                "tenant_id": "tenant_123",
                "new_seat_count": 15,
            },
        )

    @pytest.mark.asyncio
    async def test_rejects_missing_tenant_id(self) -> None:
        """Should reject license without tenant_id."""
        from fastapi import HTTPException

        from ee.onyx.server.billing.models import SeatUpdateRequest
        from ee.onyx.server.tenants.proxy import proxy_seat_update

        # Create a license payload without tenant_id by using a mock
        license_payload = MagicMock(spec=LicensePayload)
        license_payload.tenant_id = None

        request = SeatUpdateRequest(new_seat_count=10)

        with pytest.raises(HTTPException) as exc_info:
            await proxy_seat_update(
                request_body=request,
                license_payload=license_payload,
            )

        assert exc_info.value.status_code == 401
        assert "tenant_id" in exc_info.value.detail


class TestForwardToControlPlane:
    """Tests for forward_to_control_plane helper."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.proxy.generate_data_plane_token")
    @patch("ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL", "https://cp.test")
    async def test_forwards_post_request(
        self,
        mock_token: MagicMock,
    ) -> None:
        """Should forward POST request with JWT auth."""
        from ee.onyx.server.tenants.proxy import forward_to_control_plane

        mock_token.return_value = "jwt_token"
        mock_response = make_mock_response({"result": "success"})
        mock_client = make_mock_http_client("post", response=mock_response)

        with patch("httpx.AsyncClient", mock_client):
            result = await forward_to_control_plane(
                "POST",
                "/test-path",
                body={"key": "value"},
            )

        assert result == {"result": "success"}

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.proxy.generate_data_plane_token")
    @patch("ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL", "https://cp.test")
    async def test_forwards_get_request(
        self,
        mock_token: MagicMock,
    ) -> None:
        """Should forward GET request with params."""
        from ee.onyx.server.tenants.proxy import forward_to_control_plane

        mock_token.return_value = "jwt_token"
        mock_response = make_mock_response({"data": "test"})
        mock_client = make_mock_http_client("get", response=mock_response)

        with patch("httpx.AsyncClient", mock_client):
            result = await forward_to_control_plane(
                "GET",
                "/billing-info",
                params={"tenant_id": "123"},
            )

        assert result == {"data": "test"}

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.proxy.generate_data_plane_token")
    @patch("ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL", "https://cp.test")
    async def test_raises_on_http_error(
        self,
        mock_token: MagicMock,
    ) -> None:
        """Should raise HTTPException on HTTP error."""
        from fastapi import HTTPException

        from ee.onyx.server.tenants.proxy import forward_to_control_plane

        mock_token.return_value = "jwt_token"
        mock_response = make_mock_response({"detail": "Bad request"})
        mock_response.status_code = 400
        error = httpx.HTTPStatusError(
            "Error", request=MagicMock(), response=mock_response
        )
        mock_client = make_mock_http_client("post", side_effect=error)

        with patch("httpx.AsyncClient", mock_client):
            with pytest.raises(HTTPException) as exc_info:
                await forward_to_control_plane("POST", "/test")

        assert exc_info.value.status_code == 400

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.proxy.generate_data_plane_token")
    @patch("ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL", "https://cp.test")
    async def test_raises_on_connection_error(
        self,
        mock_token: MagicMock,
    ) -> None:
        """Should raise HTTPException on connection error."""
        from fastapi import HTTPException

        from ee.onyx.server.tenants.proxy import forward_to_control_plane

        mock_token.return_value = "jwt_token"
        error = httpx.RequestError("Connection failed")
        mock_client = make_mock_http_client("post", side_effect=error)

        with patch("httpx.AsyncClient", mock_client):
            with pytest.raises(HTTPException) as exc_info:
                await forward_to_control_plane("POST", "/test")

        assert exc_info.value.status_code == 502
        assert "Failed to connect" in exc_info.value.detail


class TestVerifyLicenseAuth:
    """Tests for verify_license_auth helper."""

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.tenants.proxy.verify_license_signature")
    @patch("ee.onyx.server.tenants.proxy.is_license_valid")
    async def test_valid_license(
        self,
        mock_is_valid: MagicMock,
        mock_verify: MagicMock,
    ) -> None:
        """Should return payload for valid license."""
        from ee.onyx.server.tenants.proxy import verify_license_auth

        mock_payload = make_license_payload()
        mock_verify.return_value = mock_payload
        mock_is_valid.return_value = True

        result = verify_license_auth("valid_license_blob")

        assert result == mock_payload

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.tenants.proxy.verify_license_signature")
    async def test_invalid_signature(
        self,
        mock_verify: MagicMock,
    ) -> None:
        """Should reject invalid license signature."""
        from fastapi import HTTPException

        from ee.onyx.server.tenants.proxy import verify_license_auth

        mock_verify.side_effect = ValueError("Invalid signature")

        with pytest.raises(HTTPException) as exc_info:
            verify_license_auth("invalid_license")

        assert exc_info.value.status_code == 401
        assert "Invalid license" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.tenants.proxy.verify_license_signature")
    @patch("ee.onyx.server.tenants.proxy.is_license_valid")
    async def test_expired_license_rejected(
        self,
        mock_is_valid: MagicMock,
        mock_verify: MagicMock,
    ) -> None:
        """Should reject expired license when allow_expired=False."""
        from fastapi import HTTPException

        from ee.onyx.server.tenants.proxy import verify_license_auth

        mock_payload = make_license_payload(expired=True)
        mock_verify.return_value = mock_payload
        mock_is_valid.return_value = False

        with pytest.raises(HTTPException) as exc_info:
            verify_license_auth("expired_license", allow_expired=False)

        assert exc_info.value.status_code == 401
        assert "expired" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.tenants.proxy.verify_license_signature")
    @patch("ee.onyx.server.tenants.proxy.is_license_valid")
    async def test_expired_license_allowed(
        self,
        mock_is_valid: MagicMock,
        mock_verify: MagicMock,
    ) -> None:
        """Should accept expired license when allow_expired=True."""
        from ee.onyx.server.tenants.proxy import verify_license_auth

        mock_payload = make_license_payload(expired=True)
        mock_verify.return_value = mock_payload
        mock_is_valid.return_value = False

        result = verify_license_auth("expired_license", allow_expired=True)

        assert result == mock_payload


================================================
FILE: backend/tests/unit/ee/onyx/server/features/__init__.py
================================================


================================================
FILE: backend/tests/unit/ee/onyx/server/features/hooks/__init__.py
================================================


================================================
FILE: backend/tests/unit/ee/onyx/server/features/hooks/test_api.py
================================================
"""Unit tests for ee.onyx.server.features.hooks.api helpers.

Covers:
- _check_ssrf_safety: scheme enforcement and private-IP blocklist
- _validate_endpoint: httpx exception → HookValidateStatus mapping
  ConnectTimeout     → timeout         (any timeout directs user to increase timeout_seconds)
  ConnectError       → cannot_connect  (DNS / TLS failure)
  ReadTimeout et al. → timeout         (TCP connected, server slow)
  Any other exc      → cannot_connect
- _raise_for_validation_failure: HookValidateStatus → OnyxError mapping
"""

from unittest.mock import MagicMock
from unittest.mock import patch

import httpx
import pytest

from ee.onyx.server.features.hooks.api import _check_ssrf_safety
from ee.onyx.server.features.hooks.api import _raise_for_validation_failure
from ee.onyx.server.features.hooks.api import _validate_endpoint
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.models import HookValidateResponse
from onyx.hooks.models import HookValidateStatus

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_URL = "https://example.com/hook"
_API_KEY = "secret"
_TIMEOUT = 5.0


def _mock_response(status_code: int) -> MagicMock:
    response = MagicMock()
    response.status_code = status_code
    return response


# ---------------------------------------------------------------------------
# _check_ssrf_safety
# ---------------------------------------------------------------------------


class TestCheckSsrfSafety:
    def _call(self, url: str) -> None:
        _check_ssrf_safety(url)

    # --- scheme checks ---

    def test_https_is_allowed(self) -> None:
        with patch("onyx.utils.url.socket.getaddrinfo") as mock_dns:
            mock_dns.return_value = [(None, None, None, None, ("93.184.216.34", 0))]
            self._call("https://example.com/hook")  # must not raise

    @pytest.mark.parametrize(
        "url", ["http://example.com/hook", "ftp://example.com/hook"]
    )
    def test_non_https_scheme_rejected(self, url: str) -> None:
        with pytest.raises(OnyxError) as exc_info:
            self._call(url)
        assert exc_info.value.error_code == OnyxErrorCode.BAD_GATEWAY
        assert "https" in (exc_info.value.detail or "").lower()

    # --- private IP blocklist ---

    @pytest.mark.parametrize(
        "ip",
        [
            pytest.param("127.0.0.1", id="loopback"),
            pytest.param("10.0.0.1", id="RFC1918-A"),
            pytest.param("172.16.0.1", id="RFC1918-B"),
            pytest.param("192.168.1.1", id="RFC1918-C"),
            pytest.param("169.254.169.254", id="link-local-IMDS"),
            pytest.param("100.64.0.1", id="shared-address-space"),
            pytest.param("::1", id="IPv6-loopback"),
            pytest.param("fc00::1", id="IPv6-ULA"),
            pytest.param("fe80::1", id="IPv6-link-local"),
        ],
    )
    def test_private_ip_is_blocked(self, ip: str) -> None:
        with (
            patch("onyx.utils.url.socket.getaddrinfo") as mock_dns,
            pytest.raises(OnyxError) as exc_info,
        ):
            mock_dns.return_value = [(None, None, None, None, (ip, 0))]
            self._call("https://internal.example.com/hook")
        assert exc_info.value.error_code == OnyxErrorCode.BAD_GATEWAY
        assert ip in (exc_info.value.detail or "")

    def test_public_ip_is_allowed(self) -> None:
        with patch("onyx.utils.url.socket.getaddrinfo") as mock_dns:
            mock_dns.return_value = [(None, None, None, None, ("93.184.216.34", 0))]
            self._call("https://example.com/hook")  # must not raise

    def test_dns_resolution_failure_raises(self) -> None:
        import socket

        with (
            patch(
                "onyx.utils.url.socket.getaddrinfo",
                side_effect=socket.gaierror("name not found"),
            ),
            pytest.raises(OnyxError) as exc_info,
        ):
            self._call("https://no-such-host.example.com/hook")
        assert exc_info.value.error_code == OnyxErrorCode.BAD_GATEWAY


# ---------------------------------------------------------------------------
# _validate_endpoint
# ---------------------------------------------------------------------------


class TestValidateEndpoint:
    def _call(self, *, api_key: str | None = _API_KEY) -> HookValidateResponse:
        # Bypass SSRF check — tested separately in TestCheckSsrfSafety.
        with patch("ee.onyx.server.features.hooks.api._check_ssrf_safety"):
            return _validate_endpoint(
                endpoint_url=_URL,
                api_key=api_key,
                timeout_seconds=_TIMEOUT,
            )

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_2xx_returns_passed(self, mock_client_cls: MagicMock) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.return_value = (
            _mock_response(200)
        )
        assert self._call().status == HookValidateStatus.passed

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_5xx_returns_passed(self, mock_client_cls: MagicMock) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.return_value = (
            _mock_response(500)
        )
        assert self._call().status == HookValidateStatus.passed

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    @pytest.mark.parametrize("status_code", [401, 403])
    def test_401_403_returns_auth_failed(
        self, mock_client_cls: MagicMock, status_code: int
    ) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.return_value = (
            _mock_response(status_code)
        )
        result = self._call()
        assert result.status == HookValidateStatus.auth_failed
        assert str(status_code) in (result.error_message or "")

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_4xx_non_auth_returns_passed(self, mock_client_cls: MagicMock) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.return_value = (
            _mock_response(422)
        )
        assert self._call().status == HookValidateStatus.passed

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_connect_timeout_returns_timeout(self, mock_client_cls: MagicMock) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.side_effect = (
            httpx.ConnectTimeout("timed out")
        )
        assert self._call().status == HookValidateStatus.timeout

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    @pytest.mark.parametrize(
        "exc",
        [
            httpx.ReadTimeout("read timeout"),
            httpx.WriteTimeout("write timeout"),
            httpx.PoolTimeout("pool timeout"),
        ],
    )
    def test_read_write_pool_timeout_returns_timeout(
        self, mock_client_cls: MagicMock, exc: httpx.TimeoutException
    ) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.side_effect = exc
        assert self._call().status == HookValidateStatus.timeout

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_connect_error_returns_cannot_connect(
        self, mock_client_cls: MagicMock
    ) -> None:
        # Covers DNS failures, TLS errors, and other connection-level errors.
        mock_client_cls.return_value.__enter__.return_value.post.side_effect = (
            httpx.ConnectError("name resolution failed")
        )
        assert self._call().status == HookValidateStatus.cannot_connect

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_arbitrary_exception_returns_cannot_connect(
        self, mock_client_cls: MagicMock
    ) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.side_effect = (
            ConnectionRefusedError("refused")
        )
        assert self._call().status == HookValidateStatus.cannot_connect

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_api_key_sent_as_bearer(self, mock_client_cls: MagicMock) -> None:
        mock_post = mock_client_cls.return_value.__enter__.return_value.post
        mock_post.return_value = _mock_response(200)
        self._call(api_key="mykey")
        _, kwargs = mock_post.call_args
        assert kwargs["headers"]["Authorization"] == "Bearer mykey"

    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_no_api_key_omits_auth_header(self, mock_client_cls: MagicMock) -> None:
        mock_post = mock_client_cls.return_value.__enter__.return_value.post
        mock_post.return_value = _mock_response(200)
        self._call(api_key=None)
        _, kwargs = mock_post.call_args
        assert "Authorization" not in kwargs["headers"]


# ---------------------------------------------------------------------------
# _raise_for_validation_failure
# ---------------------------------------------------------------------------


class TestRaiseForValidationFailure:
    @pytest.mark.parametrize(
        "status, expected_code",
        [
            (HookValidateStatus.auth_failed, OnyxErrorCode.CREDENTIAL_INVALID),
            (HookValidateStatus.timeout, OnyxErrorCode.GATEWAY_TIMEOUT),
            (HookValidateStatus.cannot_connect, OnyxErrorCode.BAD_GATEWAY),
        ],
    )
    def test_raises_correct_error_code(
        self, status: HookValidateStatus, expected_code: OnyxErrorCode
    ) -> None:
        validation = HookValidateResponse(status=status, error_message="some error")
        with pytest.raises(OnyxError) as exc_info:
            _raise_for_validation_failure(validation)
        assert exc_info.value.error_code == expected_code

    def test_auth_failed_passes_error_message_directly(self) -> None:
        validation = HookValidateResponse(
            status=HookValidateStatus.auth_failed, error_message="bad credentials"
        )
        with pytest.raises(OnyxError) as exc_info:
            _raise_for_validation_failure(validation)
        assert exc_info.value.detail == "bad credentials"

    @pytest.mark.parametrize(
        "status", [HookValidateStatus.timeout, HookValidateStatus.cannot_connect]
    )
    def test_timeout_and_cannot_connect_wrap_error_message(
        self, status: HookValidateStatus
    ) -> None:
        validation = HookValidateResponse(status=status, error_message="raw error")
        with pytest.raises(OnyxError) as exc_info:
            _raise_for_validation_failure(validation)
        assert exc_info.value.detail == "Endpoint validation failed: raw error"


# ---------------------------------------------------------------------------
# HookValidateStatus enum string values (API contract)
# ---------------------------------------------------------------------------


class TestHookValidateStatusValues:
    @pytest.mark.parametrize(
        "status, expected",
        [
            (HookValidateStatus.passed, "passed"),
            (HookValidateStatus.auth_failed, "auth_failed"),
            (HookValidateStatus.timeout, "timeout"),
            (HookValidateStatus.cannot_connect, "cannot_connect"),
        ],
    )
    def test_string_values(self, status: HookValidateStatus, expected: str) -> None:
        assert status == expected


================================================
FILE: backend/tests/unit/ee/onyx/server/license/test_api.py
================================================
"""Tests for license API utilities."""

from ee.onyx.server.license.api import _strip_pem_delimiters


class TestStripPemDelimiters:
    """Tests for the PEM delimiter stripping function."""

    def test_strips_pem_delimiters(self) -> None:
        """Content wrapped in PEM delimiters is extracted correctly."""
        content = """-----BEGIN ONYX LICENSE-----
eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==
-----END ONYX LICENSE-----"""

        result = _strip_pem_delimiters(content)

        assert result == "eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ=="

    def test_handles_multiline_content(self) -> None:
        """Multiline base64 content between delimiters is preserved."""
        content = """-----BEGIN ONYX LICENSE-----
eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjog
IjEuMCIsICJ0ZW5hbnRfaWQiOiAidGVz
dCJ9LCAic2lnbmF0dXJlIjogImFiYyJ9
-----END ONYX LICENSE-----"""

        result = _strip_pem_delimiters(content)

        expected = """eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjog
IjEuMCIsICJ0ZW5hbnRfaWQiOiAidGVz
dCJ9LCAic2lnbmF0dXJlIjogImFiYyJ9"""
        assert result == expected

    def test_returns_unchanged_without_delimiters(self) -> None:
        """Content without PEM delimiters is returned unchanged."""
        content = "eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ=="

        result = _strip_pem_delimiters(content)

        assert result == content

    def test_handles_whitespace(self) -> None:
        """Leading/trailing whitespace is handled correctly."""
        content = """
  -----BEGIN ONYX LICENSE-----
eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==
-----END ONYX LICENSE-----
  """

        result = _strip_pem_delimiters(content)

        assert result == "eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ=="

    def test_partial_delimiters_unchanged(self) -> None:
        """Content with only begin or only end delimiter is returned unchanged."""
        begin_only = """-----BEGIN ONYX LICENSE-----
eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ=="""

        end_only = """eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==
-----END ONYX LICENSE-----"""

        assert _strip_pem_delimiters(begin_only) == begin_only.strip()
        assert _strip_pem_delimiters(end_only) == end_only.strip()

    def test_trailing_newlines_stripped_from_raw_input(self) -> None:
        """Raw license strings with trailing newlines from user paste are cleaned."""
        content = "eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\n\n"

        result = _strip_pem_delimiters(content)

        assert result == "eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ=="

    def test_trailing_newlines_stripped_after_pem(self) -> None:
        """Inner content with trailing newlines after PEM stripping is cleaned."""
        content = """-----BEGIN ONYX LICENSE-----
eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==

-----END ONYX LICENSE-----"""

        result = _strip_pem_delimiters(content)

        assert result == "eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ=="


================================================
FILE: backend/tests/unit/ee/onyx/server/middleware/test_license_enforcement.py
================================================
"""Tests for license enforcement middleware."""

from collections.abc import Awaitable
from collections.abc import Callable
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from starlette.requests import Request
from starlette.responses import Response

from ee.onyx.configs.license_enforcement_config import EE_ONLY_PATH_PREFIXES
from ee.onyx.configs.license_enforcement_config import (
    LICENSE_ENFORCEMENT_ALLOWED_PREFIXES,
)
from ee.onyx.server.middleware.license_enforcement import _is_ee_only_path
from ee.onyx.server.middleware.license_enforcement import _is_path_allowed
from onyx.server.settings.models import ApplicationStatus

# Type alias for the middleware harness tuple
MiddlewareHarness = tuple[
    Callable[[Request, Callable[[Request], Awaitable[Response]]], Awaitable[Response]],
    Callable[[Request], Awaitable[Response]],
]

# Paths that should be blocked (core functionality requiring license)
BLOCKED_PATHS = [
    "/chat",
    "/search",
    "/admin/connectors",
    "/connector",
    "/persona",
]


class TestPathAllowlist:
    """Tests for the path allowlist logic.

    Uses LICENSE_ENFORCEMENT_ALLOWED_PREFIXES from the constants module
    as the source of truth to ensure tests stay in sync with production code.
    """

    @pytest.mark.parametrize("path", list(LICENSE_ENFORCEMENT_ALLOWED_PREFIXES))
    def test_allowed_paths_are_allowed(self, path: str) -> None:
        """All paths in LICENSE_ENFORCEMENT_ALLOWED_PREFIXES should be allowed."""
        assert _is_path_allowed(path) is True

    def test_allowed_path_prefix_matching(self) -> None:
        """Subpaths of allowed prefixes should also be allowed."""
        assert _is_path_allowed("/auth/callback/google") is True
        assert _is_path_allowed("/admin/billing/checkout") is True

    @pytest.mark.parametrize("path", BLOCKED_PATHS)
    def test_blocked_paths_are_blocked(self, path: str) -> None:
        """Core functionality paths should be blocked when license is gated."""
        assert _is_path_allowed(path) is False


class TestEEOnlyPaths:
    """Tests for EE-only path detection.

    Uses EE_ONLY_PATH_PREFIXES from the constants module as the source of truth
    to ensure tests stay in sync with production code.
    """

    @pytest.mark.parametrize("path", list(EE_ONLY_PATH_PREFIXES))
    def test_ee_only_paths_are_detected(self, path: str) -> None:
        """All paths in EE_ONLY_PATH_PREFIXES should be detected as EE-only."""
        assert _is_ee_only_path(path) is True

    @pytest.mark.parametrize(
        "path",
        [
            "/chat",
            "/search",
            "/connector",
            "/persona",
        ],
    )
    def test_community_paths_are_not_ee_only(self, path: str) -> None:
        """Community features should not be detected as EE-only."""
        assert _is_ee_only_path(path) is False


class TestLicenseEnforcementMiddleware:
    """Tests for middleware behavior under different conditions."""

    @pytest.fixture
    def middleware_harness(self) -> MiddlewareHarness:
        """Create a test harness for the middleware."""
        from ee.onyx.server.middleware.license_enforcement import (
            add_license_enforcement_middleware,
        )

        app = MagicMock()
        logger = MagicMock()
        captured_middleware: Any = None

        def capture_middleware(
            middleware_type: str,  # noqa: ARG001
        ) -> Callable[[Any], Any]:
            def decorator(func: Any) -> Any:
                nonlocal captured_middleware
                captured_middleware = func
                return func

            return decorator

        app.middleware = capture_middleware
        add_license_enforcement_middleware(app, logger)

        async def call_next(req: Request) -> Response:  # noqa: ARG001
            response = MagicMock()
            response.status_code = 200
            return response

        return captured_middleware, call_next

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED",
        True,
    )
    @patch("ee.onyx.server.middleware.license_enforcement.get_current_tenant_id")
    @patch("ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata")
    async def test_gated_access_status_gets_402(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        middleware_harness: MiddlewareHarness,
    ) -> None:
        """GATED_ACCESS status blocks non-allowlisted paths with 402."""
        mock_get_tenant.return_value = "default"
        mock_metadata = MagicMock()
        mock_metadata.status = ApplicationStatus.GATED_ACCESS
        mock_get_metadata.return_value = mock_metadata

        middleware, call_next = middleware_harness
        mock_request = MagicMock()
        mock_request.url.path = "/api/chat"

        response = await middleware(mock_request, call_next)
        assert response.status_code == 402

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED",
        True,
    )
    @patch("ee.onyx.server.middleware.license_enforcement.get_current_tenant_id")
    @patch("ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata")
    async def test_grace_period_allows_access(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        middleware_harness: MiddlewareHarness,
    ) -> None:
        """GRACE_PERIOD status allows access (for notifications only, not blocking)."""
        mock_get_tenant.return_value = "default"
        mock_metadata = MagicMock()
        mock_metadata.status = ApplicationStatus.GRACE_PERIOD
        mock_metadata.used_seats = 5
        mock_metadata.seats = 10
        mock_get_metadata.return_value = mock_metadata

        middleware, call_next = middleware_harness
        mock_request = MagicMock()
        mock_request.url.path = "/api/chat"

        response = await middleware(mock_request, call_next)
        assert response.status_code == 200

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED",
        True,
    )
    @patch(
        "ee.onyx.server.middleware.license_enforcement.get_session_with_current_tenant"
    )
    @patch("ee.onyx.server.middleware.license_enforcement.refresh_license_cache")
    @patch("ee.onyx.server.middleware.license_enforcement.get_current_tenant_id")
    @patch("ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata")
    async def test_no_license_blocks_ee_only_paths(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        mock_refresh: MagicMock,
        mock_get_session: MagicMock,  # noqa: ARG002
        middleware_harness: MiddlewareHarness,
    ) -> None:
        """No license blocks EE-only paths with 402."""
        mock_get_tenant.return_value = "default"
        mock_get_metadata.return_value = None
        mock_refresh.return_value = None  # Still no license after DB check

        middleware, call_next = middleware_harness
        mock_request = MagicMock()
        mock_request.url.path = "/api/analytics"  # EE-only path

        response = await middleware(mock_request, call_next)
        assert response.status_code == 402

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED",
        True,
    )
    @patch(
        "ee.onyx.server.middleware.license_enforcement.get_session_with_current_tenant"
    )
    @patch("ee.onyx.server.middleware.license_enforcement.refresh_license_cache")
    @patch("ee.onyx.server.middleware.license_enforcement.get_current_tenant_id")
    @patch("ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata")
    async def test_no_license_allows_community_paths(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        mock_refresh: MagicMock,
        mock_get_session: MagicMock,  # noqa: ARG002
        middleware_harness: MiddlewareHarness,
    ) -> None:
        """No license allows community features (non-EE paths)."""
        mock_get_tenant.return_value = "default"
        mock_get_metadata.return_value = None
        mock_refresh.return_value = None  # Still no license after DB check

        middleware, call_next = middleware_harness
        mock_request = MagicMock()
        mock_request.url.path = "/api/chat"  # Community path

        response = await middleware(mock_request, call_next)
        assert response.status_code == 200

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED",
        True,
    )
    @patch("ee.onyx.server.middleware.license_enforcement.get_current_tenant_id")
    @patch("ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata")
    async def test_redis_error_fails_open(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        middleware_harness: MiddlewareHarness,
    ) -> None:
        """Redis errors should not block users - fail open to allow access."""
        from redis.exceptions import RedisError

        mock_get_tenant.return_value = "test_tenant"
        mock_get_metadata.side_effect = RedisError("Connection failed")

        middleware, call_next = middleware_harness
        mock_request = MagicMock()
        mock_request.url.path = "/api/chat"

        response = await middleware(mock_request, call_next)
        assert response.status_code == 200  # Fail open

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED",
        False,
    )
    async def test_disabled_enforcement_allows_all(
        self,
        middleware_harness: MiddlewareHarness,
    ) -> None:
        """When enforcement is disabled, all requests pass through."""
        middleware, call_next = middleware_harness
        mock_request = MagicMock()
        mock_request.url.path = "/api/chat"

        response = await middleware(mock_request, call_next)
        assert response.status_code == 200

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED",
        True,
    )
    @patch("ee.onyx.server.middleware.license_enforcement.get_current_tenant_id")
    @patch("ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata")
    async def test_seat_limit_exceeded_gets_402(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        middleware_harness: MiddlewareHarness,
    ) -> None:
        """Seat limit exceeded returns 402."""
        mock_get_tenant.return_value = "default"
        mock_metadata = MagicMock()
        mock_metadata.status = ApplicationStatus.ACTIVE
        mock_metadata.used_seats = 15
        mock_metadata.seats = 10  # Over limit
        mock_get_metadata.return_value = mock_metadata

        middleware, call_next = middleware_harness
        mock_request = MagicMock()
        mock_request.url.path = "/api/chat"

        response = await middleware(mock_request, call_next)
        assert response.status_code == 402


================================================
FILE: backend/tests/unit/ee/onyx/server/settings/test_license_enforcement_settings.py
================================================
"""Tests for license enforcement in settings API."""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from redis.exceptions import RedisError

from onyx.server.settings.models import ApplicationStatus
from onyx.server.settings.models import Settings

# Fields we assert on across all tests
_ASSERT_FIELDS = {
    "application_status",
    "ee_features_enabled",
    "seat_count",
    "used_seats",
}


def _pick(settings: Settings) -> dict:
    """Extract only the fields under test from a Settings object."""
    return settings.model_dump(include=_ASSERT_FIELDS)


@pytest.fixture
def base_settings() -> Settings:
    """Create base settings for testing."""
    return Settings(
        maximum_chat_retention_days=None,
        gpu_enabled=False,
        application_status=ApplicationStatus.ACTIVE,
    )


class TestApplyLicenseStatusToSettings:
    """Tests for apply_license_status_to_settings function."""

    @patch("ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED", False)
    def test_enforcement_disabled_enables_ee_features(
        self, base_settings: Settings
    ) -> None:
        """When LICENSE_ENFORCEMENT_ENABLED=False, EE features are enabled."""
        from ee.onyx.server.settings.api import apply_license_status_to_settings

        assert base_settings.ee_features_enabled is False
        result = apply_license_status_to_settings(base_settings)
        assert _pick(result) == {
            "application_status": ApplicationStatus.ACTIVE,
            "ee_features_enabled": True,
            "seat_count": None,
            "used_seats": None,
        }

    @patch("ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.settings.api.MULTI_TENANT", True)
    def test_multi_tenant_enables_ee_features(self, base_settings: Settings) -> None:
        """Cloud mode always enables EE features."""
        from ee.onyx.server.settings.api import apply_license_status_to_settings

        result = apply_license_status_to_settings(base_settings)
        assert _pick(result) == {
            "application_status": ApplicationStatus.ACTIVE,
            "ee_features_enabled": True,
            "seat_count": None,
            "used_seats": None,
        }

    @pytest.mark.parametrize(
        "license_status,used_seats,seats,expected",
        [
            (
                ApplicationStatus.GATED_ACCESS,
                3,
                10,
                {
                    "application_status": ApplicationStatus.GATED_ACCESS,
                    "ee_features_enabled": False,
                    "seat_count": None,
                    "used_seats": None,
                },
            ),
            (
                ApplicationStatus.ACTIVE,
                3,
                10,
                {
                    "application_status": ApplicationStatus.ACTIVE,
                    "ee_features_enabled": True,
                    "seat_count": None,
                    "used_seats": None,
                },
            ),
            (
                ApplicationStatus.ACTIVE,
                10,
                10,
                {
                    "application_status": ApplicationStatus.ACTIVE,
                    "ee_features_enabled": True,
                    "seat_count": None,
                    "used_seats": None,
                },
            ),
            (
                ApplicationStatus.GRACE_PERIOD,
                3,
                10,
                {
                    "application_status": ApplicationStatus.ACTIVE,
                    "ee_features_enabled": True,
                    "seat_count": None,
                    "used_seats": None,
                },
            ),
        ],
    )
    @patch("ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.settings.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.settings.api.get_current_tenant_id")
    @patch("ee.onyx.server.settings.api.get_cached_license_metadata")
    def test_self_hosted_license_status_propagation(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        license_status: ApplicationStatus,
        used_seats: int,
        seats: int,
        expected: dict,
        base_settings: Settings,
    ) -> None:
        """Self-hosted: license status controls both application_status and ee_features_enabled."""
        from ee.onyx.server.settings.api import apply_license_status_to_settings

        mock_get_tenant.return_value = "test_tenant"
        mock_metadata = MagicMock()
        mock_metadata.status = license_status
        mock_metadata.used_seats = used_seats
        mock_metadata.seats = seats
        mock_get_metadata.return_value = mock_metadata

        result = apply_license_status_to_settings(base_settings)
        assert _pick(result) == expected

    @patch("ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.settings.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.settings.api.get_current_tenant_id")
    @patch("ee.onyx.server.settings.api.get_cached_license_metadata")
    def test_seat_limit_exceeded_sets_status_and_counts(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        base_settings: Settings,
    ) -> None:
        """Seat limit exceeded sets SEAT_LIMIT_EXCEEDED with counts, keeps EE enabled."""
        from ee.onyx.server.settings.api import apply_license_status_to_settings

        mock_get_tenant.return_value = "test_tenant"
        mock_metadata = MagicMock()
        mock_metadata.status = ApplicationStatus.ACTIVE
        mock_metadata.used_seats = 15
        mock_metadata.seats = 10
        mock_get_metadata.return_value = mock_metadata

        result = apply_license_status_to_settings(base_settings)
        assert _pick(result) == {
            "application_status": ApplicationStatus.SEAT_LIMIT_EXCEEDED,
            "ee_features_enabled": True,
            "seat_count": 10,
            "used_seats": 15,
        }

    @patch("ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.settings.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.settings.api.get_current_tenant_id")
    @patch("ee.onyx.server.settings.api.get_cached_license_metadata")
    def test_expired_license_takes_precedence_over_seat_limit(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        base_settings: Settings,
    ) -> None:
        """Expired license (GATED_ACCESS) takes precedence over seat limit exceeded."""
        from ee.onyx.server.settings.api import apply_license_status_to_settings

        mock_get_tenant.return_value = "test_tenant"
        mock_metadata = MagicMock()
        mock_metadata.status = ApplicationStatus.GATED_ACCESS
        mock_metadata.used_seats = 15
        mock_metadata.seats = 10
        mock_get_metadata.return_value = mock_metadata

        result = apply_license_status_to_settings(base_settings)
        assert _pick(result) == {
            "application_status": ApplicationStatus.GATED_ACCESS,
            "ee_features_enabled": False,
            "seat_count": None,
            "used_seats": None,
        }

    @patch("ee.onyx.server.settings.api.ENTERPRISE_EDITION_ENABLED", True)
    @patch("ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.settings.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.settings.api.refresh_license_cache", return_value=None)
    @patch("ee.onyx.server.settings.api.get_session_with_current_tenant")
    @patch("ee.onyx.server.settings.api.get_current_tenant_id")
    @patch("ee.onyx.server.settings.api.get_cached_license_metadata")
    def test_no_license_with_ee_flag_gates_access(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        _mock_get_session: MagicMock,
        _mock_refresh: MagicMock,
        base_settings: Settings,
    ) -> None:
        """No license + ENTERPRISE_EDITION_ENABLED=true → GATED_ACCESS."""
        from ee.onyx.server.settings.api import apply_license_status_to_settings

        mock_get_tenant.return_value = "test_tenant"
        mock_get_metadata.return_value = None

        result = apply_license_status_to_settings(base_settings)
        assert _pick(result) == {
            "application_status": ApplicationStatus.GATED_ACCESS,
            "ee_features_enabled": False,
            "seat_count": None,
            "used_seats": None,
        }

    @patch("ee.onyx.server.settings.api.ENTERPRISE_EDITION_ENABLED", False)
    @patch("ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.settings.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.settings.api.refresh_license_cache", return_value=None)
    @patch("ee.onyx.server.settings.api.get_session_with_current_tenant")
    @patch("ee.onyx.server.settings.api.get_current_tenant_id")
    @patch("ee.onyx.server.settings.api.get_cached_license_metadata")
    def test_no_license_without_ee_flag_allows_community(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        _mock_get_session: MagicMock,
        _mock_refresh: MagicMock,
        base_settings: Settings,
    ) -> None:
        """No license + ENTERPRISE_EDITION_ENABLED=false → community mode (no gating)."""
        from ee.onyx.server.settings.api import apply_license_status_to_settings

        mock_get_tenant.return_value = "test_tenant"
        mock_get_metadata.return_value = None

        result = apply_license_status_to_settings(base_settings)
        assert _pick(result) == {
            "application_status": ApplicationStatus.ACTIVE,
            "ee_features_enabled": False,
            "seat_count": None,
            "used_seats": None,
        }

    @patch("ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED", True)
    @patch("ee.onyx.server.settings.api.MULTI_TENANT", False)
    @patch("ee.onyx.server.settings.api.get_current_tenant_id")
    @patch("ee.onyx.server.settings.api.get_cached_license_metadata")
    def test_redis_error_disables_ee_features(
        self,
        mock_get_metadata: MagicMock,
        mock_get_tenant: MagicMock,
        base_settings: Settings,
    ) -> None:
        """Redis errors fail closed - disable EE features."""
        from ee.onyx.server.settings.api import apply_license_status_to_settings

        mock_get_tenant.return_value = "test_tenant"
        mock_get_metadata.side_effect = RedisError("Connection failed")

        result = apply_license_status_to_settings(base_settings)
        assert _pick(result) == {
            "application_status": ApplicationStatus.ACTIVE,
            "ee_features_enabled": False,
            "seat_count": None,
            "used_seats": None,
        }


class TestSettingsDefaults:
    """Verify Settings model defaults for CE deployments."""

    def test_default_ee_features_disabled(self) -> None:
        """CE default: ee_features_enabled is False."""
        settings = Settings()
        assert settings.ee_features_enabled is False


================================================
FILE: backend/tests/unit/ee/onyx/server/tenants/test_billing_api.py
================================================
"""Tests for billing API endpoints."""

from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import httpx
import pytest

from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError


class TestGetStripePublishableKey:
    """Tests for get_stripe_publishable_key endpoint."""

    def setup_method(self) -> None:
        """Reset the cache before each test."""
        import ee.onyx.server.tenants.billing_api as billing_api

        billing_api._stripe_publishable_key_cache = None

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE", None)
    @patch(
        "ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_URL",
        "https://example.com/key.txt",
    )
    async def test_fetches_from_s3_when_no_override(self) -> None:
        """Should fetch key from S3 when no env var override is set."""
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

        mock_response = MagicMock()
        mock_response.text = "pk_live_test123"
        mock_response.raise_for_status = MagicMock()

        with patch("httpx.AsyncClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get = AsyncMock(
                return_value=mock_response
            )
            result = await get_stripe_publishable_key()

        assert result.publishable_key == "pk_live_test123"

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE",
        "pk_test_override123",
    )
    async def test_uses_env_var_override_when_set(self) -> None:
        """Should use env var override instead of fetching from S3."""
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

        with patch("httpx.AsyncClient") as mock_client:
            result = await get_stripe_publishable_key()
            # Should not call S3
            mock_client.assert_not_called()

        assert result.publishable_key == "pk_test_override123"

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE",
        "invalid_key",
    )
    async def test_rejects_invalid_env_var_key_format(self) -> None:
        """Should reject keys that don't start with pk_."""
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

        with pytest.raises(OnyxError) as exc_info:
            await get_stripe_publishable_key()

        assert exc_info.value.status_code == 500
        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR
        assert exc_info.value.detail == "Invalid Stripe publishable key format"

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE", None)
    @patch(
        "ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_URL",
        "https://example.com/key.txt",
    )
    async def test_rejects_invalid_s3_key_format(self) -> None:
        """Should reject keys from S3 that don't start with pk_."""
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

        mock_response = MagicMock()
        mock_response.text = "invalid_key"
        mock_response.raise_for_status = MagicMock()

        with patch("httpx.AsyncClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get = AsyncMock(
                return_value=mock_response
            )
            with pytest.raises(OnyxError) as exc_info:
                await get_stripe_publishable_key()

        assert exc_info.value.status_code == 500
        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR
        assert exc_info.value.detail == "Invalid Stripe publishable key format"

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE", None)
    @patch(
        "ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_URL",
        "https://example.com/key.txt",
    )
    async def test_handles_s3_fetch_error(self) -> None:
        """Should return error when S3 fetch fails."""
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

        with patch("httpx.AsyncClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get = AsyncMock(
                side_effect=httpx.HTTPError("Connection failed")
            )
            with pytest.raises(OnyxError) as exc_info:
                await get_stripe_publishable_key()

        assert exc_info.value.status_code == 500
        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR
        assert exc_info.value.detail == "Failed to fetch Stripe publishable key"

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE", None)
    @patch("ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_URL", None)
    async def test_error_when_no_config(self) -> None:
        """Should return error when neither env var nor S3 URL is configured."""
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

        with pytest.raises(OnyxError) as exc_info:
            await get_stripe_publishable_key()

        assert exc_info.value.status_code == 500
        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR
        assert "not configured" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch(
        "ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE",
        "pk_test_cached",
    )
    async def test_caches_key_after_first_fetch(self) -> None:
        """Should cache the key and return it on subsequent calls."""
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

        # First call
        result1 = await get_stripe_publishable_key()
        assert result1.publishable_key == "pk_test_cached"

        # Second call - should use cache even if we change the override
        with patch(
            "ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE",
            "pk_test_different",
        ):
            result2 = await get_stripe_publishable_key()
            # Should still return cached value
            assert result2.publishable_key == "pk_test_cached"


================================================
FILE: backend/tests/unit/ee/onyx/server/tenants/test_product_gating.py
================================================
"""Tests for product gating functions."""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest


class TestIsTenantGated:
    """Tests for is_tenant_gated - the O(1) Redis check used by middleware."""

    @pytest.mark.parametrize(
        "redis_result,expected",
        [
            (True, True),
            (False, False),
            (1, True),  # Redis sismember can return int
            (0, False),
        ],
    )
    @patch("ee.onyx.server.tenants.product_gating.get_redis_replica_client")
    def test_tenant_gated_status(
        self,
        mock_get_redis: MagicMock,
        redis_result: bool | int,
        expected: bool,
    ) -> None:
        """is_tenant_gated correctly interprets Redis sismember result."""
        from ee.onyx.server.tenants.product_gating import is_tenant_gated

        mock_redis = MagicMock()
        mock_redis.sismember.return_value = redis_result
        mock_get_redis.return_value = mock_redis

        assert is_tenant_gated("test_tenant") is expected


class TestUpdateTenantGating:
    """Tests for update_tenant_gating - modifies Redis gated set."""

    @pytest.mark.parametrize(
        "status,should_add_to_set",
        [
            ("gated_access", True),  # Only GATED_ACCESS adds to set
            ("active", False),  # All other statuses remove from set
        ],
    )
    @patch("ee.onyx.server.tenants.product_gating.get_redis_client")
    def test_gating_set_modification(
        self,
        mock_get_redis: MagicMock,
        status: str,
        should_add_to_set: bool,
    ) -> None:
        """update_tenant_gating adds tenant to set only for GATED_ACCESS status."""
        from ee.onyx.server.tenants.product_gating import update_tenant_gating
        from onyx.server.settings.models import ApplicationStatus

        mock_redis = MagicMock()
        mock_get_redis.return_value = mock_redis

        update_tenant_gating("test_tenant", ApplicationStatus(status))

        if should_add_to_set:
            mock_redis.sadd.assert_called_once()
            mock_redis.srem.assert_not_called()
        else:
            mock_redis.srem.assert_called_once()
            mock_redis.sadd.assert_not_called()


================================================
FILE: backend/tests/unit/ee/onyx/server/tenants/test_proxy.py
================================================
"""Tests for proxy endpoints for self-hosted data planes."""

from datetime import datetime
from datetime import timedelta
from datetime import timezone
from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import httpx
import pytest
from fastapi import HTTPException

from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import PlanType
from ee.onyx.server.tenants.proxy import _check_license_enforcement_enabled
from ee.onyx.server.tenants.proxy import _extract_license_from_header
from ee.onyx.server.tenants.proxy import forward_to_control_plane
from ee.onyx.server.tenants.proxy import get_license_payload
from ee.onyx.server.tenants.proxy import get_license_payload_allow_expired
from ee.onyx.server.tenants.proxy import get_optional_license_payload
from ee.onyx.server.tenants.proxy import verify_license_auth


# All tests that use license auth need LICENSE_ENFORCEMENT_ENABLED=True
LICENSE_ENABLED_PATCH = patch(
    "ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED", True
)


def make_license_payload(
    tenant_id: str = "tenant_123",
    expired: bool = False,
) -> LicensePayload:
    """Helper to create a test LicensePayload."""
    now = datetime.now(timezone.utc)
    if expired:
        expires_at = now - timedelta(days=1)
    else:
        expires_at = now + timedelta(days=30)

    return LicensePayload(
        version="1.0",
        tenant_id=tenant_id,
        organization_name="Test Org",
        issued_at=now - timedelta(days=1),
        expires_at=expires_at,
        seats=10,
        plan_type=PlanType.MONTHLY,
    )


class TestLicenseEnforcementCheck:
    """Tests for _check_license_enforcement_enabled function."""

    def test_raises_when_disabled(self) -> None:
        """Test that 501 is raised when LICENSE_ENFORCEMENT_ENABLED=False."""
        with patch("ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED", False):
            with pytest.raises(HTTPException) as exc_info:
                _check_license_enforcement_enabled()

            assert exc_info.value.status_code == 501
            assert "cloud data plane" in str(exc_info.value.detail).lower()

    def test_passes_when_enabled(self) -> None:
        """Test that no exception is raised when LICENSE_ENFORCEMENT_ENABLED=True."""
        with patch("ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED", True):
            _check_license_enforcement_enabled()  # Should not raise


class TestExtractLicenseFromHeader:
    """Tests for _extract_license_from_header helper function."""

    def test_valid_bearer_token(self) -> None:
        """Test extraction of valid Bearer token."""
        result = _extract_license_from_header("Bearer license_data_here", required=True)
        assert result == "license_data_here"

    def test_bearer_with_spaces_in_token(self) -> None:
        """Test that token with spaces is handled correctly (splits on first space only)."""
        result = _extract_license_from_header("Bearer token with spaces", required=True)
        assert result == "token with spaces"

    def test_missing_header_required(self) -> None:
        """Test that missing header raises 401 when required."""
        with pytest.raises(HTTPException) as exc_info:
            _extract_license_from_header(None, required=True)
        assert exc_info.value.status_code == 401

    def test_missing_header_optional(self) -> None:
        """Test that missing header returns None when not required."""
        result = _extract_license_from_header(None, required=False)
        assert result is None

    def test_non_bearer_required(self) -> None:
        """Test that non-Bearer auth raises 401 when required."""
        with pytest.raises(HTTPException) as exc_info:
            _extract_license_from_header("Basic sometoken", required=True)
        assert exc_info.value.status_code == 401

    def test_non_bearer_optional(self) -> None:
        """Test that non-Bearer auth returns None when not required."""
        result = _extract_license_from_header("Basic sometoken", required=False)
        assert result is None

    def test_empty_string_required(self) -> None:
        """Test that empty string raises 401 when required."""
        with pytest.raises(HTTPException) as exc_info:
            _extract_license_from_header("", required=True)
        assert exc_info.value.status_code == 401


class TestVerifyLicenseAuth:
    """Tests for verify_license_auth function."""

    def test_valid_license(self) -> None:
        """Test that a valid license passes verification."""
        payload = make_license_payload()

        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.verify_license_signature"
            ) as mock_verify,
        ):
            mock_verify.return_value = payload

            result = verify_license_auth("valid_license_data", allow_expired=False)

            assert result == payload
            mock_verify.assert_called_once_with("valid_license_data")

    def test_invalid_signature(self) -> None:
        """Test that invalid signature raises 401."""
        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.verify_license_signature"
            ) as mock_verify,
        ):
            mock_verify.side_effect = ValueError("Invalid signature")

            with pytest.raises(HTTPException) as exc_info:
                verify_license_auth("bad_license", allow_expired=False)

            assert exc_info.value.status_code == 401
            assert "Invalid license" in str(exc_info.value.detail)

    def test_expired_license_rejected(self) -> None:
        """Test that expired license raises 401 when not allowed."""
        payload = make_license_payload(expired=True)

        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.verify_license_signature"
            ) as mock_verify,
            patch("ee.onyx.server.tenants.proxy.is_license_valid") as mock_valid,
        ):
            mock_verify.return_value = payload
            mock_valid.return_value = False

            with pytest.raises(HTTPException) as exc_info:
                verify_license_auth("expired_license", allow_expired=False)

            assert exc_info.value.status_code == 401
            assert "expired" in str(exc_info.value.detail).lower()

    def test_expired_license_allowed(self) -> None:
        """Test that expired license is allowed when allow_expired=True."""
        payload = make_license_payload(expired=True)

        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.verify_license_signature"
            ) as mock_verify,
            patch("ee.onyx.server.tenants.proxy.is_license_valid") as mock_valid,
        ):
            mock_verify.return_value = payload
            mock_valid.return_value = False

            result = verify_license_auth("expired_license", allow_expired=True)

            assert result == payload

    def test_raises_501_when_enforcement_disabled(self) -> None:
        """Test that 501 is raised when LICENSE_ENFORCEMENT_ENABLED=False."""
        with patch("ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED", False):
            with pytest.raises(HTTPException) as exc_info:
                verify_license_auth("any_license", allow_expired=False)

            assert exc_info.value.status_code == 501


class TestGetLicensePayload:
    """Tests for get_license_payload dependency."""

    @pytest.mark.asyncio
    async def test_valid_license(self) -> None:
        """Test that valid license returns payload."""
        payload = make_license_payload()

        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.verify_license_signature"
            ) as mock_verify,
            patch("ee.onyx.server.tenants.proxy.is_license_valid") as mock_valid,
        ):
            mock_verify.return_value = payload
            mock_valid.return_value = True

            result = await get_license_payload("Bearer valid_license_data")

            assert result == payload

    @pytest.mark.asyncio
    async def test_missing_auth_header(self) -> None:
        """Test that missing Authorization header raises 401."""
        with LICENSE_ENABLED_PATCH:
            with pytest.raises(HTTPException) as exc_info:
                await get_license_payload(None)

            assert exc_info.value.status_code == 401
            assert "Missing or invalid authorization header" in str(
                exc_info.value.detail
            )

    @pytest.mark.asyncio
    async def test_invalid_auth_format(self) -> None:
        """Test that non-Bearer auth raises 401."""
        with LICENSE_ENABLED_PATCH:
            with pytest.raises(HTTPException) as exc_info:
                await get_license_payload("Basic sometoken")

            assert exc_info.value.status_code == 401


class TestGetLicensePayloadAllowExpired:
    """Tests for get_license_payload_allow_expired dependency."""

    @pytest.mark.asyncio
    async def test_expired_license_allowed(self) -> None:
        """Test that expired license is accepted."""
        payload = make_license_payload(expired=True)

        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.verify_license_signature"
            ) as mock_verify,
        ):
            mock_verify.return_value = payload

            result = await get_license_payload_allow_expired("Bearer expired_license")

            assert result == payload

    @pytest.mark.asyncio
    async def test_missing_auth_header(self) -> None:
        """Test that missing Authorization header raises 401."""
        with LICENSE_ENABLED_PATCH:
            with pytest.raises(HTTPException) as exc_info:
                await get_license_payload_allow_expired(None)

            assert exc_info.value.status_code == 401


class TestGetOptionalLicensePayload:
    """Tests for get_optional_license_payload dependency."""

    @pytest.mark.asyncio
    async def test_no_auth_returns_none(self) -> None:
        """Test that missing auth returns None (for new customers)."""
        with LICENSE_ENABLED_PATCH:
            result = await get_optional_license_payload(None)
            assert result is None

    @pytest.mark.asyncio
    async def test_non_bearer_returns_none(self) -> None:
        """Test that non-Bearer auth returns None."""
        with LICENSE_ENABLED_PATCH:
            result = await get_optional_license_payload("Basic sometoken")
            assert result is None

    @pytest.mark.asyncio
    async def test_valid_license_returns_payload(self) -> None:
        """Test that valid license returns payload."""
        payload = make_license_payload()

        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.verify_license_signature"
            ) as mock_verify,
        ):
            mock_verify.return_value = payload

            result = await get_optional_license_payload("Bearer valid_license")

            assert result == payload

    @pytest.mark.asyncio
    async def test_raises_501_when_enforcement_disabled(self) -> None:
        """Test that 501 is raised when LICENSE_ENFORCEMENT_ENABLED=False."""
        with patch("ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED", False):
            with pytest.raises(HTTPException) as exc_info:
                await get_optional_license_payload(None)

            assert exc_info.value.status_code == 501


class TestForwardToControlPlane:
    """Tests for forward_to_control_plane function."""

    @pytest.mark.asyncio
    async def test_successful_get_request(self) -> None:
        """Test successful GET request forwarding."""
        mock_response = MagicMock()
        mock_response.json.return_value = {"data": "test"}
        mock_response.raise_for_status = MagicMock()

        with (
            patch(
                "ee.onyx.server.tenants.proxy.generate_data_plane_token"
            ) as mock_token,
            patch("ee.onyx.server.tenants.proxy.httpx.AsyncClient") as mock_client,
            patch(
                "ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL",
                "https://control.example.com",
            ),
        ):
            mock_token.return_value = "cp_token"
            mock_client.return_value.__aenter__.return_value.get = AsyncMock(
                return_value=mock_response
            )

            result = await forward_to_control_plane(
                "GET", "/test-endpoint", params={"key": "value"}
            )

            assert result == {"data": "test"}

    @pytest.mark.asyncio
    async def test_successful_post_request(self) -> None:
        """Test successful POST request forwarding."""
        mock_response = MagicMock()
        mock_response.json.return_value = {"url": "https://checkout.stripe.com"}
        mock_response.raise_for_status = MagicMock()

        with (
            patch(
                "ee.onyx.server.tenants.proxy.generate_data_plane_token"
            ) as mock_token,
            patch("ee.onyx.server.tenants.proxy.httpx.AsyncClient") as mock_client,
            patch(
                "ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL",
                "https://control.example.com",
            ),
        ):
            mock_token.return_value = "cp_token"
            mock_client.return_value.__aenter__.return_value.post = AsyncMock(
                return_value=mock_response
            )

            result = await forward_to_control_plane(
                "POST", "/create-checkout-session", body={"tenant_id": "t1"}
            )

            assert result == {"url": "https://checkout.stripe.com"}

    @pytest.mark.asyncio
    async def test_http_error_with_detail(self) -> None:
        """Test HTTP error handling with detail from response."""
        mock_response = MagicMock()
        mock_response.status_code = 404
        mock_response.json.return_value = {"detail": "Tenant not found"}
        mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
            "Not Found",
            request=MagicMock(),
            response=mock_response,
        )

        with (
            patch(
                "ee.onyx.server.tenants.proxy.generate_data_plane_token"
            ) as mock_token,
            patch("ee.onyx.server.tenants.proxy.httpx.AsyncClient") as mock_client,
            patch(
                "ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL",
                "https://control.example.com",
            ),
        ):
            mock_token.return_value = "cp_token"
            mock_client.return_value.__aenter__.return_value.get = AsyncMock(
                return_value=mock_response
            )

            with pytest.raises(HTTPException) as exc_info:
                await forward_to_control_plane("GET", "/billing-information")

            assert exc_info.value.status_code == 404
            assert "Tenant not found" in str(exc_info.value.detail)

    @pytest.mark.asyncio
    async def test_connection_error(self) -> None:
        """Test connection error handling."""
        with (
            patch(
                "ee.onyx.server.tenants.proxy.generate_data_plane_token"
            ) as mock_token,
            patch("ee.onyx.server.tenants.proxy.httpx.AsyncClient") as mock_client,
            patch(
                "ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL",
                "https://control.example.com",
            ),
        ):
            mock_token.return_value = "cp_token"
            mock_client.return_value.__aenter__.return_value.get = AsyncMock(
                side_effect=httpx.RequestError("Connection refused")
            )

            with pytest.raises(HTTPException) as exc_info:
                await forward_to_control_plane("GET", "/test")

            assert exc_info.value.status_code == 502
            assert "Failed to connect to control plane" in str(exc_info.value.detail)

    @pytest.mark.asyncio
    async def test_follows_redirects(self) -> None:
        """Test that AsyncClient is created with follow_redirects=True.

        The control plane may sit behind a reverse proxy that returns
        308 (HTTP→HTTPS). httpx does not follow redirects by default,
        so we must explicitly opt in.
        """
        mock_response = MagicMock()
        mock_response.json.return_value = {"ok": True}
        mock_response.raise_for_status = MagicMock()

        with (
            patch(
                "ee.onyx.server.tenants.proxy.generate_data_plane_token"
            ) as mock_token,
            patch("ee.onyx.server.tenants.proxy.httpx.AsyncClient") as mock_client,
            patch(
                "ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL",
                "http://control.example.com",
            ),
        ):
            mock_token.return_value = "cp_token"
            mock_client.return_value.__aenter__.return_value.get = AsyncMock(
                return_value=mock_response
            )

            await forward_to_control_plane("GET", "/test")

            mock_client.assert_called_once_with(timeout=30.0, follow_redirects=True)

    @pytest.mark.asyncio
    async def test_unsupported_method(self) -> None:
        """Test that unsupported HTTP methods raise ValueError."""
        with (
            patch(
                "ee.onyx.server.tenants.proxy.generate_data_plane_token"
            ) as mock_token,
            patch("ee.onyx.server.tenants.proxy.httpx.AsyncClient"),
            patch(
                "ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL",
                "https://control.example.com",
            ),
        ):
            mock_token.return_value = "cp_token"

            with pytest.raises(ValueError, match="Unsupported HTTP method"):
                await forward_to_control_plane("DELETE", "/test")


class TestProxyCheckoutSessionWithSeats:
    """Tests for proxy checkout session with seats parameter."""

    @pytest.mark.asyncio
    async def test_includes_seats_in_body_when_provided(self) -> None:
        """Should include seats in request body when provided."""
        from ee.onyx.server.tenants.proxy import proxy_create_checkout_session
        from ee.onyx.server.tenants.proxy import CreateCheckoutSessionRequest

        mock_response = MagicMock()
        mock_response.json.return_value = {"url": "https://checkout.stripe.com/session"}
        mock_response.raise_for_status = MagicMock()

        license_payload = make_license_payload()

        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.generate_data_plane_token"
            ) as mock_token,
            patch("ee.onyx.server.tenants.proxy.httpx.AsyncClient") as mock_client,
            patch(
                "ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL",
                "https://control.example.com",
            ),
        ):
            mock_token.return_value = "cp_token"
            mock_post = AsyncMock(return_value=mock_response)
            mock_client.return_value.__aenter__.return_value.post = mock_post

            request = CreateCheckoutSessionRequest(
                billing_period="monthly",
                seats=25,
                email="test@example.com",
            )
            await proxy_create_checkout_session(
                request_body=request,
                license_payload=license_payload,
            )

            # Verify seats was included in the body
            call_kwargs = mock_post.call_args[1]
            body = call_kwargs["json"]
            assert body["seats"] == 25
            assert body["billing_period"] == "monthly"
            assert body["email"] == "test@example.com"
            assert body["tenant_id"] == "tenant_123"

    @pytest.mark.asyncio
    async def test_excludes_seats_when_not_provided(self) -> None:
        """Should not include seats in request body when not provided."""
        from ee.onyx.server.tenants.proxy import proxy_create_checkout_session
        from ee.onyx.server.tenants.proxy import CreateCheckoutSessionRequest

        mock_response = MagicMock()
        mock_response.json.return_value = {"url": "https://checkout.stripe.com/session"}
        mock_response.raise_for_status = MagicMock()

        license_payload = make_license_payload()

        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.generate_data_plane_token"
            ) as mock_token,
            patch("ee.onyx.server.tenants.proxy.httpx.AsyncClient") as mock_client,
            patch(
                "ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL",
                "https://control.example.com",
            ),
        ):
            mock_token.return_value = "cp_token"
            mock_post = AsyncMock(return_value=mock_response)
            mock_client.return_value.__aenter__.return_value.post = mock_post

            request = CreateCheckoutSessionRequest(billing_period="annual")
            await proxy_create_checkout_session(
                request_body=request,
                license_payload=license_payload,
            )

            # Verify seats was NOT included in the body
            call_kwargs = mock_post.call_args[1]
            body = call_kwargs["json"]
            assert "seats" not in body
            assert body["billing_period"] == "annual"

    @pytest.mark.asyncio
    async def test_includes_seats_for_new_customer(self) -> None:
        """Should include seats for new customer without license."""
        from ee.onyx.server.tenants.proxy import proxy_create_checkout_session
        from ee.onyx.server.tenants.proxy import CreateCheckoutSessionRequest

        mock_response = MagicMock()
        mock_response.json.return_value = {"url": "https://checkout.stripe.com/session"}
        mock_response.raise_for_status = MagicMock()

        with (
            LICENSE_ENABLED_PATCH,
            patch(
                "ee.onyx.server.tenants.proxy.generate_data_plane_token"
            ) as mock_token,
            patch("ee.onyx.server.tenants.proxy.httpx.AsyncClient") as mock_client,
            patch(
                "ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL",
                "https://control.example.com",
            ),
        ):
            mock_token.return_value = "cp_token"
            mock_post = AsyncMock(return_value=mock_response)
            mock_client.return_value.__aenter__.return_value.post = mock_post

            request = CreateCheckoutSessionRequest(
                billing_period="monthly",
                seats=10,
            )
            # New customer has no license
            await proxy_create_checkout_session(
                request_body=request,
                license_payload=None,
            )

            # Verify seats was included but no tenant_id
            call_kwargs = mock_post.call_args[1]
            body = call_kwargs["json"]
            assert body["seats"] == 10
            assert "tenant_id" not in body


================================================
FILE: backend/tests/unit/ee/onyx/server/tenants/test_schema_management.py
================================================
"""Tests for schema management functions."""

import pytest

from ee.onyx.server.tenants.schema_management import drop_schema
from ee.onyx.server.tenants.schema_management import validate_tenant_id


class TestValidateTenantId:
    """Tests for validate_tenant_id - validates tenant ID format for SQL safety."""

    @pytest.mark.parametrize(
        "tenant_id",
        [
            # Standard UUID format
            "tenant_0aef62e7-9fbf-4bb6-8894-f1441fca6745",
            "tenant_abcd1234-5678-90ab-cdef-1234567890ab",
            "tenant_00000000-0000-0000-0000-000000000000",
            "tenant_ffffffff-ffff-ffff-ffff-ffffffffffff",
            # AWS instance ID format
            "tenant_i-0d8d7eaa21f5f2fae",
            "tenant_i-0123456789abcdef0",
            "tenant_i-abc",
        ],
    )
    def test_valid_tenant_ids(self, tenant_id: str) -> None:
        """Valid tenant IDs should pass validation."""
        assert validate_tenant_id(tenant_id) is True

    @pytest.mark.parametrize(
        "tenant_id,description",
        [
            # Missing tenant_ prefix
            ("0aef62e7-9fbf-4bb6-8894-f1441fca6745", "missing prefix"),
            ("public", "reserved schema name"),
            ("pg_catalog", "system schema"),
            # Invalid formats
            ("tenant_abc123", "not UUID or instance ID format"),
            ("tenant_", "empty after prefix"),
            ("tenant_i-", "empty instance ID"),
            # SQL injection attempts
            ("tenant_; DROP TABLE users;--", "SQL injection with semicolon"),
            ('tenant_" OR 1=1--', "SQL injection with quote"),
            ("tenant_abc'; DROP SCHEMA public;--", "SQL injection attempt"),
            # Other invalid inputs
            ("tenant_ABCD1234-5678-90AB-CDEF-1234567890AB", "uppercase not allowed"),
            ("../../../etc/passwd", "path traversal"),
            ("", "empty string"),
            ("tenant_i-GHIJ", "invalid hex in instance ID"),
        ],
    )
    def test_invalid_tenant_ids(self, tenant_id: str, description: str) -> None:
        """Invalid tenant IDs should fail validation."""
        assert validate_tenant_id(tenant_id) is False, f"Should reject: {description}"

    def test_uuid_must_be_complete(self) -> None:
        """UUID must have all sections with correct lengths."""
        # Too short
        assert validate_tenant_id("tenant_0aef62e7-9fbf-4bb6-8894") is False
        # Too long
        assert (
            validate_tenant_id("tenant_0aef62e7-9fbf-4bb6-8894-f1441fca6745-extra")
            is False
        )
        # Wrong section lengths
        assert validate_tenant_id("tenant_0aef62e7-9fbf-4bb6-8894-f1441fca674") is False


class TestDropSchemaValidation:
    """Tests for drop_schema input validation (no DB required - fails before SQL)."""

    @pytest.mark.parametrize(
        "dangerous_input,description",
        [
            ("public", "system schema"),
            ("pg_catalog", "postgres catalog"),
            ("tenant_; DROP TABLE users;--", "SQL injection with semicolon"),
            ('tenant_" OR 1=1--', "SQL injection with quote"),
            ("tenant_abc123", "invalid format - not UUID"),
            ("", "empty string"),
        ],
    )
    def test_drop_schema_rejects_invalid_inputs(
        self, dangerous_input: str, description: str
    ) -> None:
        """drop_schema should reject invalid tenant IDs before any SQL runs."""
        with pytest.raises(ValueError, match="Invalid tenant_id format") as exc_info:
            drop_schema(dangerous_input)
        assert dangerous_input in str(
            exc_info.value
        ), f"Error should include input ({description})"


================================================
FILE: backend/tests/unit/ee/onyx/utils/test_encryption.py
================================================
"""Tests for EE AES-CBC encryption/decryption with explicit key support.

With EE mode enabled (via conftest), fetch_versioned_implementation resolves
to the EE implementations, so no patching of the MIT layer is needed.
"""

from unittest.mock import patch

import pytest

from ee.onyx.utils.encryption import _decrypt_bytes
from ee.onyx.utils.encryption import _encrypt_string
from ee.onyx.utils.encryption import _get_trimmed_key
from ee.onyx.utils.encryption import decrypt_bytes_to_string
from ee.onyx.utils.encryption import encrypt_string_to_bytes

EE_MODULE = "ee.onyx.utils.encryption"

# Keys must be exactly 16, 24, or 32 bytes for AES
KEY_16 = "a" * 16
KEY_16_ALT = "b" * 16
KEY_24 = "d" * 24
KEY_32 = "c" * 32


@pytest.fixture(autouse=True)
def _clear_key_cache() -> None:
    _get_trimmed_key.cache_clear()


class TestEncryptDecryptRoundTrip:
    def test_roundtrip_with_env_key(self) -> None:
        with patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", KEY_16):
            encrypted = _encrypt_string("hello world")
            assert encrypted != b"hello world"
            assert _decrypt_bytes(encrypted) == "hello world"

    def test_roundtrip_with_explicit_key(self) -> None:
        encrypted = _encrypt_string("secret data", key=KEY_32)
        assert encrypted != b"secret data"
        assert _decrypt_bytes(encrypted, key=KEY_32) == "secret data"

    def test_roundtrip_no_key(self) -> None:
        """Without any key, data is raw-encoded (no encryption)."""
        with patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", ""):
            encrypted = _encrypt_string("plain text")
            assert encrypted == b"plain text"
            assert _decrypt_bytes(encrypted) == "plain text"

    def test_explicit_key_overrides_env(self) -> None:
        with patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", KEY_16):
            encrypted = _encrypt_string("data", key=KEY_16_ALT)
            with pytest.raises(ValueError):
                _decrypt_bytes(encrypted, key=KEY_16)
            assert _decrypt_bytes(encrypted, key=KEY_16_ALT) == "data"

    def test_different_encryptions_produce_different_bytes(self) -> None:
        """Each encryption uses a random IV, so results differ."""
        a = _encrypt_string("same", key=KEY_16)
        b = _encrypt_string("same", key=KEY_16)
        assert a != b

    def test_roundtrip_empty_string(self) -> None:
        encrypted = _encrypt_string("", key=KEY_16)
        assert encrypted != b""
        assert _decrypt_bytes(encrypted, key=KEY_16) == ""

    def test_roundtrip_unicode(self) -> None:
        text = "日本語テスト 🔐 émojis"
        encrypted = _encrypt_string(text, key=KEY_16)
        assert _decrypt_bytes(encrypted, key=KEY_16) == text


class TestDecryptFallbackBehavior:
    def test_wrong_env_key_falls_back_to_raw_decode(self) -> None:
        """Default key path: AES fails on non-AES data → fallback to raw decode."""
        raw = "readable text".encode()
        with patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", KEY_16):
            assert _decrypt_bytes(raw) == "readable text"

    def test_explicit_wrong_key_raises(self) -> None:
        """Explicit key path: AES fails → raises, no fallback."""
        encrypted = _encrypt_string("secret", key=KEY_16)
        with pytest.raises(ValueError):
            _decrypt_bytes(encrypted, key=KEY_16_ALT)

    def test_explicit_none_key_with_no_env(self) -> None:
        """key=None with empty env → raw decode."""
        with patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", ""):
            assert _decrypt_bytes(b"hello", key=None) == "hello"

    def test_explicit_empty_string_key(self) -> None:
        """key='' means no encryption."""
        encrypted = _encrypt_string("test", key="")
        assert encrypted == b"test"
        assert _decrypt_bytes(encrypted, key="") == "test"


class TestKeyValidation:
    def test_key_too_short_raises(self) -> None:
        with pytest.raises(RuntimeError, match="too short"):
            _encrypt_string("data", key="short")

    def test_16_byte_key(self) -> None:
        encrypted = _encrypt_string("data", key=KEY_16)
        assert _decrypt_bytes(encrypted, key=KEY_16) == "data"

    def test_24_byte_key(self) -> None:
        encrypted = _encrypt_string("data", key=KEY_24)
        assert _decrypt_bytes(encrypted, key=KEY_24) == "data"

    def test_32_byte_key(self) -> None:
        encrypted = _encrypt_string("data", key=KEY_32)
        assert _decrypt_bytes(encrypted, key=KEY_32) == "data"

    def test_long_key_truncated_to_32(self) -> None:
        """Keys longer than 32 bytes are truncated to 32."""
        long_key = "e" * 64
        encrypted = _encrypt_string("data", key=long_key)
        assert _decrypt_bytes(encrypted, key=long_key) == "data"

    def test_20_byte_key_trimmed_to_16(self) -> None:
        """A 20-byte key is trimmed to the largest valid AES size that fits (16)."""
        key_20 = "f" * 20
        encrypted = _encrypt_string("data", key=key_20)
        assert _decrypt_bytes(encrypted, key=key_20) == "data"

        # Verify it was trimmed to 16 by checking that the first 16 bytes
        # of the key can also decrypt it
        key_16_same_prefix = "f" * 16
        assert _decrypt_bytes(encrypted, key=key_16_same_prefix) == "data"

    def test_25_byte_key_trimmed_to_24(self) -> None:
        """A 25-byte key is trimmed to the largest valid AES size that fits (24)."""
        key_25 = "g" * 25
        encrypted = _encrypt_string("data", key=key_25)
        assert _decrypt_bytes(encrypted, key=key_25) == "data"

        key_24_same_prefix = "g" * 24
        assert _decrypt_bytes(encrypted, key=key_24_same_prefix) == "data"

    def test_30_byte_key_trimmed_to_24(self) -> None:
        """A 30-byte key is trimmed to the largest valid AES size that fits (24)."""
        key_30 = "h" * 30
        encrypted = _encrypt_string("data", key=key_30)
        assert _decrypt_bytes(encrypted, key=key_30) == "data"

        key_24_same_prefix = "h" * 24
        assert _decrypt_bytes(encrypted, key=key_24_same_prefix) == "data"


class TestWrapperFunctions:
    """Test encrypt_string_to_bytes / decrypt_bytes_to_string pass key through.

    With EE mode enabled, the wrappers resolve to EE implementations automatically.
    """

    def test_wrapper_passes_key(self) -> None:
        encrypted = encrypt_string_to_bytes("payload", key=KEY_16)
        assert decrypt_bytes_to_string(encrypted, key=KEY_16) == "payload"

    def test_wrapper_no_key_uses_env(self) -> None:
        with patch(f"{EE_MODULE}.ENCRYPTION_KEY_SECRET", KEY_32):
            encrypted = encrypt_string_to_bytes("payload")
            assert decrypt_bytes_to_string(encrypted) == "payload"


================================================
FILE: backend/tests/unit/ee/onyx/utils/test_license_utils.py
================================================
"""Tests for license signature verification utilities."""

import base64
import json
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from unittest.mock import patch

import pytest
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.primitives.asymmetric import rsa

from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import PlanType
from ee.onyx.utils.license import get_license_status
from ee.onyx.utils.license import is_license_valid
from ee.onyx.utils.license import verify_license_signature
from onyx.server.settings.models import ApplicationStatus


def generate_test_key_pair() -> tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]:
    """Generate a test RSA key pair."""
    private_key = rsa.generate_private_key(
        public_exponent=65537,
        key_size=2048,  # Use smaller key for faster tests
    )
    public_key = private_key.public_key()
    return private_key, public_key


def create_signed_license(
    private_key: rsa.RSAPrivateKey,
    payload: LicensePayload,
) -> str:
    """Create a signed license for testing."""
    payload_json = json.dumps(payload.model_dump(mode="json"), sort_keys=True)
    signature = private_key.sign(
        payload_json.encode(),
        padding.PSS(
            mgf=padding.MGF1(hashes.SHA256()),
            salt_length=padding.PSS.MAX_LENGTH,
        ),
        hashes.SHA256(),
    )

    license_data = {
        "payload": payload.model_dump(mode="json"),
        "signature": base64.b64encode(signature).decode(),
    }

    return base64.b64encode(json.dumps(license_data).encode()).decode()


class TestVerifyLicenseSignature:
    """Tests for verify_license_signature function."""

    def test_valid_signature(self) -> None:
        """Test that a valid signature passes verification."""
        private_key, public_key = generate_test_key_pair()

        payload = LicensePayload(
            version="1.0",
            tenant_id="tenant_123",
            issued_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            seats=50,
            plan_type=PlanType.MONTHLY,
        )

        license_data = create_signed_license(private_key, payload)

        # Patch the _get_public_key function to return our test key
        with patch("ee.onyx.utils.license._get_public_key", return_value=public_key):
            result = verify_license_signature(license_data)

        assert result.tenant_id == "tenant_123"
        assert result.seats == 50
        assert result.plan_type == PlanType.MONTHLY

    def test_invalid_signature(self) -> None:
        """Test that an invalid signature fails verification."""
        private_key, public_key = generate_test_key_pair()
        _, different_public_key = generate_test_key_pair()

        payload = LicensePayload(
            version="1.0",
            tenant_id="tenant_123",
            issued_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            seats=50,
            plan_type=PlanType.MONTHLY,
        )

        license_data = create_signed_license(private_key, payload)

        # Patch _get_public_key to return a different key (signature won't match)
        with patch(
            "ee.onyx.utils.license._get_public_key",
            return_value=different_public_key,
        ):
            with pytest.raises(ValueError, match="Invalid license signature"):
                verify_license_signature(license_data)

    def test_tampered_payload(self) -> None:
        """Test that a tampered payload fails verification."""
        private_key, public_key = generate_test_key_pair()

        payload = LicensePayload(
            version="1.0",
            tenant_id="tenant_123",
            issued_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),
            seats=50,
            plan_type=PlanType.MONTHLY,
        )

        # Create valid signature
        payload_json = json.dumps(payload.model_dump(mode="json"), sort_keys=True)
        signature = private_key.sign(
            payload_json.encode(),
            padding.PSS(
                mgf=padding.MGF1(hashes.SHA256()),
                salt_length=padding.PSS.MAX_LENGTH,
            ),
            hashes.SHA256(),
        )

        # Tamper with the payload (change seats)
        tampered_payload = payload.model_dump(mode="json")
        tampered_payload["seats"] = 1000  # Changed!

        license_data = {
            "payload": tampered_payload,
            "signature": base64.b64encode(signature).decode(),
        }

        encoded_license = base64.b64encode(json.dumps(license_data).encode()).decode()

        # Patch _get_public_key to return our test key
        with patch("ee.onyx.utils.license._get_public_key", return_value=public_key):
            with pytest.raises(ValueError, match="Invalid license signature"):
                verify_license_signature(encoded_license)

    def test_invalid_base64(self) -> None:
        """Test that invalid base64 fails."""
        with pytest.raises(ValueError):
            verify_license_signature("not-valid-base64!!!")

    def test_invalid_json(self) -> None:
        """Test that invalid JSON fails."""
        invalid_data = base64.b64encode(b"not json").decode()
        with pytest.raises(ValueError):
            verify_license_signature(invalid_data)


class TestGetLicenseStatus:
    """Tests for get_license_status function."""

    def test_active_license(self) -> None:
        """Test status for an active license."""
        payload = LicensePayload(
            version="1.0",
            tenant_id="tenant_123",
            issued_at=datetime.now(timezone.utc) - timedelta(days=30),
            expires_at=datetime.now(timezone.utc) + timedelta(days=30),
            seats=50,
            plan_type=PlanType.MONTHLY,
        )

        status = get_license_status(payload)
        assert status == ApplicationStatus.ACTIVE

    def test_expired_license_no_grace(self) -> None:
        """Test status for an expired license without grace period."""
        payload = LicensePayload(
            version="1.0",
            tenant_id="tenant_123",
            issued_at=datetime.now(timezone.utc) - timedelta(days=60),
            expires_at=datetime.now(timezone.utc) - timedelta(days=1),
            seats=50,
            plan_type=PlanType.MONTHLY,
        )

        status = get_license_status(payload)
        assert status == ApplicationStatus.GATED_ACCESS

    def test_expired_license_within_grace(self) -> None:
        """Test status for an expired license within grace period."""
        payload = LicensePayload(
            version="1.0",
            tenant_id="tenant_123",
            issued_at=datetime.now(timezone.utc) - timedelta(days=60),
            expires_at=datetime.now(timezone.utc) - timedelta(days=1),
            seats=50,
            plan_type=PlanType.MONTHLY,
        )

        grace_end = datetime.now(timezone.utc) + timedelta(days=29)
        status = get_license_status(payload, grace_period_end=grace_end)
        assert status == ApplicationStatus.GRACE_PERIOD

    def test_grace_period_expired(self) -> None:
        """Test status when grace period has expired."""
        payload = LicensePayload(
            version="1.0",
            tenant_id="tenant_123",
            issued_at=datetime.now(timezone.utc) - timedelta(days=90),
            expires_at=datetime.now(timezone.utc) - timedelta(days=31),
            seats=50,
            plan_type=PlanType.MONTHLY,
        )

        grace_end = datetime.now(timezone.utc) - timedelta(days=1)
        status = get_license_status(payload, grace_period_end=grace_end)
        assert status == ApplicationStatus.GATED_ACCESS


class TestIsLicenseValid:
    """Tests for is_license_valid function."""

    def test_valid_license(self) -> None:
        """Test that an unexpired license is valid."""
        payload = LicensePayload(
            version="1.0",
            tenant_id="tenant_123",
            issued_at=datetime.now(timezone.utc) - timedelta(days=30),
            expires_at=datetime.now(timezone.utc) + timedelta(days=30),
            seats=50,
            plan_type=PlanType.MONTHLY,
        )

        assert is_license_valid(payload) is True

    def test_expired_license(self) -> None:
        """Test that an expired license is invalid."""
        payload = LicensePayload(
            version="1.0",
            tenant_id="tenant_123",
            issued_at=datetime.now(timezone.utc) - timedelta(days=60),
            expires_at=datetime.now(timezone.utc) - timedelta(days=1),
            seats=50,
            plan_type=PlanType.MONTHLY,
        )

        assert is_license_valid(payload) is False


================================================
FILE: backend/tests/unit/federated_connector/slack/test_slack_federated_connnector.py
================================================
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from unittest.mock import patch

import pytest
from pydantic import ValidationError

from onyx.federated_connectors.models import OAuthResult
from onyx.federated_connectors.slack.federated_connector import SlackFederatedConnector
from onyx.federated_connectors.slack.models import SlackEntities

# Constants for mock Slack OAuth response
MOCK_APP_ID = "A093M5L7Q92"
MOCK_USER_ID = "U05SAH6UGUD"
MOCK_SCOPE = "search:read"
MOCK_ACCESS_TOKEN = (
    "xoxe.xoxp-1-Mi0yLTU5MTAx...MDkwN2U0YjlmZmI4YzA1NTYwZjNlMjRiZDYwNGU0ZA"
)
MOCK_REFRESH_TOKEN = (
    "xoxe-1-My0xLTU5MTAxMz...jcyZjA3NDM3YjdhOTRhYmRhMGJmMGVlMzBjNzQ4Y2I"
)
MOCK_TOKEN_TYPE = "user"
MOCK_EXPIRES_IN = 31659
MOCK_TEAM_ID = "T05SS40AFAM"
MOCK_TEAM_NAME = "Onyx Team"


class TestSlackFederatedConnector:
    """Test suite for SlackFederatedConnector"""

    @pytest.fixture
    def test_credentials(self) -> dict[str, str]:
        """Test credentials for Slack connector"""
        return {
            "client_id": "test_client_id",
            "client_secret": "test_client_secret",
            "redirect_uri": "https://test.com/callback",
        }

    @pytest.fixture
    def slack_connector(
        self, test_credentials: dict[str, str]
    ) -> SlackFederatedConnector:
        """Create a SlackFederatedConnector instance for testing"""
        return SlackFederatedConnector(test_credentials)

    @pytest.fixture
    def mock_slack_oauth_response(self) -> dict[str, Any]:
        """Mock Slack OAuth response based on real example"""
        return {
            "ok": True,
            "app_id": MOCK_APP_ID,
            "authed_user": {
                "id": MOCK_USER_ID,
                "scope": MOCK_SCOPE,
                "access_token": MOCK_ACCESS_TOKEN,
                "token_type": MOCK_TOKEN_TYPE,
                "refresh_token": MOCK_REFRESH_TOKEN,
                "expires_in": MOCK_EXPIRES_IN,
            },
            "team": {"id": MOCK_TEAM_ID, "name": MOCK_TEAM_NAME},
            "enterprise": None,
            "is_enterprise_install": False,
        }

    def test_callback_success(
        self,
        slack_connector: SlackFederatedConnector,
        mock_slack_oauth_response: dict[str, Any],
    ) -> None:
        """Test successful OAuth callback handling"""
        # Mock the token exchange method
        with patch.object(
            slack_connector,
            "_exchange_code_for_token",
            return_value=mock_slack_oauth_response,
        ):
            # Simulate callback data with authorization code
            callback_data = {
                "code": "test_auth_code",
                "state": "test_state",
            }
            redirect_uri = "https://test.com/callback"

            # Call the callback method
            result = slack_connector.callback(callback_data, redirect_uri)

            # Assert the result is an OAuthResult
            assert isinstance(result, OAuthResult)

            # Assert OAuth token values are correctly extracted
            assert result.access_token == MOCK_ACCESS_TOKEN
            assert result.refresh_token == MOCK_REFRESH_TOKEN
            assert result.token_type == MOCK_TOKEN_TYPE
            assert result.scope == MOCK_SCOPE

            # Assert expiration time is calculated correctly
            assert result.expires_at is not None
            expected_expires_at = datetime.now(timezone.utc) + timedelta(
                seconds=MOCK_EXPIRES_IN
            )
            # Allow for small time difference due to test execution time
            time_diff = abs((result.expires_at - expected_expires_at).total_seconds())
            assert time_diff < 5  # Within 5 seconds

            # Assert team info is extracted correctly
            assert result.team is not None
            assert result.team["id"] == MOCK_TEAM_ID
            assert result.team["name"] == MOCK_TEAM_NAME

            # Assert user info is extracted correctly
            assert result.user is not None
            assert result.user["id"] == MOCK_USER_ID
            assert result.user["scope"] == MOCK_SCOPE
            assert result.user["token_type"] == MOCK_TOKEN_TYPE

            # Assert raw response is preserved
            assert result.raw_response == mock_slack_oauth_response

    def test_callback_oauth_error(
        self, slack_connector: SlackFederatedConnector
    ) -> None:
        """Test OAuth callback with error response"""
        callback_data = {
            "error": "access_denied",
            "error_description": "User denied access",
        }
        redirect_uri = "https://test.com/callback"

        with pytest.raises(RuntimeError, match="OAuth error received: access_denied"):
            slack_connector.callback(callback_data, redirect_uri)

    def test_callback_missing_code(
        self, slack_connector: SlackFederatedConnector
    ) -> None:
        """Test OAuth callback without authorization code"""
        callback_data = {"state": "test_state"}
        redirect_uri = "https://test.com/callback"

        with pytest.raises(ValueError, match="No authorization code received"):
            slack_connector.callback(callback_data, redirect_uri)

    def test_callback_slack_api_error(
        self, slack_connector: SlackFederatedConnector
    ) -> None:
        """Test OAuth callback when Slack API returns error"""
        # Mock failed token exchange
        mock_error_response = {
            "ok": False,
            "error": "invalid_code",
        }

        with patch.object(
            slack_connector,
            "_exchange_code_for_token",
            return_value=mock_error_response,
        ):
            callback_data = {"code": "invalid_code"}
            redirect_uri = "https://test.com/callback"

            with pytest.raises(
                RuntimeError, match="Failed to exchange authorization code for token"
            ):
                slack_connector.callback(callback_data, redirect_uri)

    def test_callback_without_authed_user(
        self, slack_connector: SlackFederatedConnector
    ) -> None:
        """Test OAuth callback when authed_user is missing from response"""
        # Mock response without authed_user
        mock_response = {
            "ok": True,
            "app_id": MOCK_APP_ID,
            "team": {"id": MOCK_TEAM_ID, "name": MOCK_TEAM_NAME},
        }

        with patch.object(
            slack_connector, "_exchange_code_for_token", return_value=mock_response
        ):
            callback_data = {"code": "test_code"}
            redirect_uri = "https://test.com/callback"

            with pytest.raises(
                RuntimeError, match="Missing authed_user in OAuth response from Slack"
            ):
                slack_connector.callback(callback_data, redirect_uri)

    def test_callback_with_incomplete_authed_user(
        self, slack_connector: SlackFederatedConnector
    ) -> None:
        """Test OAuth callback when authed_user is missing access_token"""
        # Mock response with authed_user but missing access_token
        mock_response = {
            "ok": True,
            "app_id": MOCK_APP_ID,
            "authed_user": {
                "id": MOCK_USER_ID,
                "scope": MOCK_SCOPE,
                "token_type": MOCK_TOKEN_TYPE,
                # Missing access_token
            },
            "team": {"id": MOCK_TEAM_ID, "name": MOCK_TEAM_NAME},
        }

        with patch.object(
            slack_connector, "_exchange_code_for_token", return_value=mock_response
        ):
            callback_data = {"code": "test_code"}
            redirect_uri = "https://test.com/callback"

            result = slack_connector.callback(callback_data, redirect_uri)

            # Should handle gracefully - access_token can be None in some edge cases
            assert result.access_token is None
            assert result.refresh_token is None
            assert result.token_type == MOCK_TOKEN_TYPE
            assert result.scope == MOCK_SCOPE


class TestSlackEntitiesValidation:
    """Test suite for SlackEntities validation"""

    def test_default_values(self) -> None:
        """Test that default values are set correctly"""
        entities = SlackEntities()

        assert entities.search_all_channels is True
        assert entities.channels is None
        assert entities.exclude_channels is None
        assert entities.include_dm is True
        assert entities.include_group_dm is True
        assert entities.include_private_channels is True
        assert entities.default_search_days == 30

    def test_search_all_channels_true(self) -> None:
        """Test search_all_channels=True ignores channels list"""
        entities = SlackEntities(
            search_all_channels=True,
            channels=["general"],  # Should be ignored
        )

        assert entities.search_all_channels is True
        # channels list is present but search_all_channels takes precedence
        assert entities.channels == ["general"]

    def test_search_all_channels_false_with_channels(self) -> None:
        """Test search_all_channels=False with valid channels"""
        entities = SlackEntities(
            search_all_channels=False, channels=["general", "engineering"]
        )

        assert entities.search_all_channels is False
        assert entities.channels == ["general", "engineering"]

    def test_search_all_channels_false_without_channels(self) -> None:
        """Test search_all_channels=False without channels raises error"""
        with pytest.raises(
            ValidationError,
            match="Must specify at least one channel when search_all_channels is False",
        ):
            SlackEntities(search_all_channels=False, channels=None)

        with pytest.raises(
            ValidationError,
            match="Must specify at least one channel when search_all_channels is False",
        ):
            SlackEntities(search_all_channels=False, channels=[])

    def test_channels_validation(self) -> None:
        """Test channel list validation"""
        # Valid channels
        entities = SlackEntities(
            search_all_channels=False, channels=["general", "C12345", "random"]
        )
        assert entities.channels is not None
        assert len(entities.channels) == 3

        # Empty string in channels
        with pytest.raises(
            ValidationError, match="Each channel must be a non-empty string"
        ):
            SlackEntities(search_all_channels=False, channels=["general", ""])

        # Whitespace-only string
        with pytest.raises(
            ValidationError, match="Each channel must be a non-empty string"
        ):
            SlackEntities(search_all_channels=False, channels=["general", "   "])

    def test_exclude_channels_validation(self) -> None:
        """Test exclude channel patterns validation"""
        # Valid patterns
        entities = SlackEntities(exclude_channels=["customer*", "test-*", "private-*"])
        assert entities.exclude_channels is not None
        assert len(entities.exclude_channels) == 3

        # Empty string in patterns
        with pytest.raises(
            ValidationError, match="Each exclude pattern must be a non-empty string"
        ):
            SlackEntities(exclude_channels=["customer*", ""])

        # Whitespace-only pattern
        with pytest.raises(
            ValidationError, match="Each exclude pattern must be a non-empty string"
        ):
            SlackEntities(exclude_channels=["customer*", "   "])

    def test_exclude_channels_with_specific_channels(self) -> None:
        """Test exclude patterns work with specific channel list"""
        entities = SlackEntities(
            search_all_channels=False,
            channels=["general", "customer-X", "customer-Y", "support"],
            exclude_channels=["customer*"],
        )

        assert entities.search_all_channels is False
        assert entities.channels is not None
        assert len(entities.channels) == 4
        assert entities.exclude_channels == ["customer*"]

    def test_direct_message_filtering(self) -> None:
        """Test DM filtering options"""
        # Test disabling 1:1 DMs
        entities_no_dm = SlackEntities(include_dm=False)
        assert entities_no_dm.include_dm is False
        assert entities_no_dm.include_group_dm is True  # Default is True

        # Test disabling group DMs
        entities_no_group_dm = SlackEntities(include_group_dm=False)
        assert entities_no_group_dm.include_dm is True  # Default is True
        assert entities_no_group_dm.include_group_dm is False

        # Test both enabled (defaults)
        entities_both = SlackEntities(include_dm=True, include_group_dm=True)
        assert entities_both.include_dm is True
        assert entities_both.include_group_dm is True

    def test_private_channel_filtering(self) -> None:
        """Test private channel filtering option"""
        entities = SlackEntities(include_private_channels=True)

        assert entities.include_private_channels is True

    def test_default_search_days_validation(self) -> None:
        """Test default_search_days validation"""
        # Valid values
        entities = SlackEntities(default_search_days=7)
        assert entities.default_search_days == 7

        entities = SlackEntities(default_search_days=90)
        assert entities.default_search_days == 90

        entities = SlackEntities(default_search_days=365)
        assert entities.default_search_days == 365

        # Invalid: too small
        with pytest.raises(
            ValidationError, match="default_search_days must be at least 1"
        ):
            SlackEntities(default_search_days=0)

        with pytest.raises(
            ValidationError, match="default_search_days must be at least 1"
        ):
            SlackEntities(default_search_days=-5)

        # Invalid: too large
        with pytest.raises(
            ValidationError, match="default_search_days cannot exceed 365 days"
        ):
            SlackEntities(default_search_days=366)

        with pytest.raises(
            ValidationError, match="default_search_days cannot exceed 365 days"
        ):
            SlackEntities(default_search_days=1000)

    def test_complex_configuration(self) -> None:
        """Test a complex realistic configuration"""
        entities = SlackEntities(
            search_all_channels=False,
            channels=["general", "engineering", "support"],
            exclude_channels=["test-*", "dev-*"],
            include_dm=False,
            include_group_dm=False,
            include_private_channels=True,
        )

        assert entities.search_all_channels is False
        assert entities.channels == ["general", "engineering", "support"]
        assert entities.exclude_channels == ["test-*", "dev-*"]
        assert entities.include_dm is False
        assert entities.include_group_dm is False
        assert entities.include_private_channels is True

    def test_validate_entities_method(self) -> None:
        """Test the validate_entities method in SlackFederatedConnector"""
        # Create a connector for testing
        test_credentials = {
            "client_id": "test_client_id",
            "client_secret": "test_client_secret",
        }
        slack_connector = SlackFederatedConnector(test_credentials)

        # Valid entities
        valid_entities = {
            "search_all_channels": False,
            "channels": ["general", "engineering"],
            "include_dm": False,
            "include_group_dm": False,
            "include_private_channels": True,
        }
        assert slack_connector.validate_entities(valid_entities) is True

        # Invalid entities - channels required when search_all_channels=False
        invalid_entities = {
            "search_all_channels": False,
            "channels": [],  # Empty list
        }
        assert slack_connector.validate_entities(invalid_entities) is False

        # Invalid entities - empty string in channels
        invalid_entities2 = {
            "search_all_channels": False,
            "channels": ["general", ""],
        }
        assert slack_connector.validate_entities(invalid_entities2) is False


================================================
FILE: backend/tests/unit/file_store/test_file_store.py
================================================
import datetime
from collections.abc import Generator
from io import BytesIO
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import Mock
from unittest.mock import patch

import pytest
from sqlalchemy import create_engine
from sqlalchemy import DateTime
from sqlalchemy import Enum
from sqlalchemy import String
from sqlalchemy.orm import DeclarativeBase
from sqlalchemy.orm import Mapped
from sqlalchemy.orm import mapped_column
from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import func

from onyx.configs.constants import FileOrigin
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.file_store import S3BackedFileStore


class DBBaseTest(DeclarativeBase):
    pass


class FileRecord(DBBaseTest):
    __tablename__: str = "file_record"

    # Internal file ID, must be unique across all files
    file_id: Mapped[str] = mapped_column(String, primary_key=True)

    display_name: Mapped[str | None] = mapped_column(String, nullable=True)
    file_origin: Mapped[FileOrigin] = mapped_column(
        Enum(FileOrigin, native_enum=False), nullable=False
    )
    file_type: Mapped[str] = mapped_column(String, default="text/plain")

    # External storage support (S3, MinIO, Azure Blob, etc.)
    bucket_name: Mapped[str] = mapped_column(String, nullable=False)
    object_key: Mapped[str] = mapped_column(String, nullable=False)

    # Timestamps for external storage
    created_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )
    updated_at: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), nullable=False
    )


@pytest.fixture
def db_session() -> Generator[Session, None, None]:
    """Create an in-memory SQLite database for testing"""
    engine = create_engine("sqlite:///:memory:")
    DBBaseTest.metadata.create_all(engine)
    SessionLocal = sessionmaker(bind=engine)
    session = SessionLocal()
    yield session
    session.close()


@pytest.fixture
def sample_content() -> bytes:
    """Sample file content for testing"""
    return b"This is a test file content"


@pytest.fixture
def sample_file_io(sample_content: bytes) -> BytesIO:
    """Sample file IO object for testing"""
    return BytesIO(sample_content)


class TestExternalStorageFileStore:
    """Test external storage file store functionality (S3-compatible)"""

    def test_get_default_file_store_s3(self) -> None:
        """Test that S3 file store is returned when backend is s3"""
        with patch("onyx.configs.app_configs.FILE_STORE_BACKEND", "s3"):
            file_store = get_default_file_store()
            assert isinstance(file_store, S3BackedFileStore)

    def test_s3_client_initialization_with_credentials(self) -> None:
        """Test S3 client initialization with explicit credentials"""
        with patch("boto3.client") as mock_boto3:
            file_store = S3BackedFileStore(
                bucket_name="test-bucket",
                aws_access_key_id="test-key",
                aws_secret_access_key="test-secret",
                aws_region_name="us-west-2",
                s3_endpoint_url=None,
            )
            file_store._get_s3_client()

            # Verify boto3 client was called with the expected arguments
            mock_boto3.assert_called_once()
            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]

            assert call_kwargs["service_name"] == "s3"
            assert call_kwargs["aws_access_key_id"] == "test-key"
            assert call_kwargs["aws_secret_access_key"] == "test-secret"
            assert call_kwargs["region_name"] == "us-west-2"

    def test_s3_client_initialization_with_iam_role(
        self,
        db_session: Session,  # noqa: ARG002
    ) -> None:
        """Test S3 client initialization with IAM role (no explicit credentials)"""
        with patch("boto3.client") as mock_boto3:
            file_store = S3BackedFileStore(
                bucket_name="test-bucket",
                aws_access_key_id=None,
                aws_secret_access_key=None,
                aws_region_name="us-west-2",
                s3_endpoint_url=None,
            )
            file_store._get_s3_client()

            # Verify boto3 client was called with the expected arguments
            mock_boto3.assert_called_once()
            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]

            assert call_kwargs["service_name"] == "s3"
            assert call_kwargs["region_name"] == "us-west-2"
            # Should not have explicit credentials
            assert "aws_access_key_id" not in call_kwargs
            assert "aws_secret_access_key" not in call_kwargs

    def test_s3_bucket_name_configuration(self) -> None:
        """Test S3 bucket name configuration"""
        with patch(
            "onyx.file_store.file_store.S3_FILE_STORE_BUCKET_NAME", "my-test-bucket"
        ):
            file_store = S3BackedFileStore(bucket_name="my-test-bucket")
            bucket_name: str = file_store._get_bucket_name()
            assert bucket_name == "my-test-bucket"

    def test_s3_key_generation_default_prefix(self) -> None:
        """Test S3 key generation with default prefix"""
        with (
            patch("onyx.file_store.file_store.S3_FILE_STORE_PREFIX", "onyx-files"),
            patch(
                "onyx.file_store.file_store.get_current_tenant_id",
                return_value="test-tenant",
            ),
        ):
            file_store = S3BackedFileStore(bucket_name="test-bucket")
            s3_key: str = file_store._get_s3_key("test-file.txt")
            assert s3_key == "onyx-files/test-tenant/test-file.txt"

    def test_s3_key_generation_custom_prefix(self) -> None:
        """Test S3 key generation with custom prefix"""
        with (
            patch("onyx.file_store.file_store.S3_FILE_STORE_PREFIX", "custom-prefix"),
            patch(
                "onyx.file_store.file_store.get_current_tenant_id",
                return_value="test-tenant",
            ),
        ):
            file_store = S3BackedFileStore(
                bucket_name="test-bucket", s3_prefix="custom-prefix"
            )
            s3_key: str = file_store._get_s3_key("test-file.txt")
            assert s3_key == "custom-prefix/test-tenant/test-file.txt"

    def test_s3_key_generation_with_different_tenant_ids(self) -> None:
        """Test S3 key generation with different tenant IDs"""
        with patch("onyx.file_store.file_store.S3_FILE_STORE_PREFIX", "onyx-files"):
            file_store = S3BackedFileStore(bucket_name="test-bucket")

            # Test with tenant ID "tenant-1"
            with patch(
                "onyx.file_store.file_store.get_current_tenant_id",
                return_value="tenant-1",
            ):
                s3_key = file_store._get_s3_key("document.pdf")
                assert s3_key == "onyx-files/tenant-1/document.pdf"

            # Test with tenant ID "tenant-2"
            with patch(
                "onyx.file_store.file_store.get_current_tenant_id",
                return_value="tenant-2",
            ):
                s3_key = file_store._get_s3_key("document.pdf")
                assert s3_key == "onyx-files/tenant-2/document.pdf"

            # Test with default tenant (public)
            with patch(
                "onyx.file_store.file_store.get_current_tenant_id",
                return_value="public",
            ):
                s3_key = file_store._get_s3_key("document.pdf")
                assert s3_key == "onyx-files/public/document.pdf"

    @patch("boto3.client")
    def test_s3_save_file_mock(
        self,
        mock_boto3: MagicMock,
        db_session: Session,  # noqa: ARG002
        sample_file_io: BytesIO,
    ) -> None:
        """Test S3 file saving with mocked S3 client"""
        # Setup S3 mock
        mock_s3_client: Mock = Mock()
        mock_boto3.return_value = mock_s3_client

        # Create a mock database session
        mock_db_session: Mock = Mock()
        mock_db_session.commit = Mock()
        mock_db_session.rollback = Mock()

        with (
            patch(
                "onyx.file_store.file_store.S3_FILE_STORE_BUCKET_NAME", "test-bucket"
            ),
            patch("onyx.file_store.file_store.S3_FILE_STORE_PREFIX", "onyx-files"),
            patch("onyx.file_store.file_store.S3_AWS_ACCESS_KEY_ID", "test-key"),
            patch("onyx.file_store.file_store.S3_AWS_SECRET_ACCESS_KEY", "test-secret"),
        ):
            # Mock the database operation to avoid SQLAlchemy issues
            with patch("onyx.db.file_record.upsert_filerecord") as mock_upsert:
                mock_upsert.return_value = Mock()

                file_store = S3BackedFileStore(bucket_name="test-bucket")

                # This should not raise an exception
                file_store.save_file(
                    file_id="test-file.txt",
                    content=sample_file_io,
                    display_name="Test File",
                    file_origin=FileOrigin.OTHER,
                    file_type="text/plain",
                    db_session=mock_db_session,
                )

                # Verify S3 client was called correctly
                mock_s3_client.put_object.assert_called_once()
                call_args = mock_s3_client.put_object.call_args
                assert call_args[1]["Bucket"] == "test-bucket"
                assert call_args[1]["Key"] == "onyx-files/public/test-file.txt"
                assert call_args[1]["ContentType"] == "text/plain"

    def test_minio_client_initialization(self) -> None:
        """Test S3 client initialization with MinIO endpoint"""
        with (
            patch("boto3.client") as mock_boto3,
            patch("urllib3.disable_warnings"),
        ):
            file_store = S3BackedFileStore(
                bucket_name="test-bucket",
                aws_access_key_id="minioadmin",
                aws_secret_access_key="minioadmin",
                aws_region_name="us-east-1",
                s3_endpoint_url="http://localhost:9000",
                s3_verify_ssl=False,
            )
            file_store._get_s3_client()

            # Verify boto3 client was called with MinIO-specific settings
            mock_boto3.assert_called_once()
            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]

            assert call_kwargs["service_name"] == "s3"
            assert call_kwargs["endpoint_url"] == "http://localhost:9000"
            assert call_kwargs["aws_access_key_id"] == "minioadmin"
            assert call_kwargs["aws_secret_access_key"] == "minioadmin"
            assert call_kwargs["region_name"] == "us-east-1"
            assert call_kwargs["verify"] is False

            # Verify S3 configuration for MinIO
            config = call_kwargs["config"]
            assert config.signature_version == "s3v4"
            assert config.s3["addressing_style"] == "path"

    def test_minio_ssl_verification_enabled(self) -> None:
        """Test MinIO with SSL verification enabled"""
        with patch("boto3.client") as mock_boto3:
            file_store = S3BackedFileStore(
                bucket_name="test-bucket",
                aws_access_key_id="test-key",
                aws_secret_access_key="test-secret",
                s3_endpoint_url="https://minio.example.com",
                s3_verify_ssl=True,
            )
            file_store._get_s3_client()

            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]
            # When SSL verification is enabled, verify should not be in kwargs (defaults to True)
            assert "verify" not in call_kwargs or call_kwargs.get("verify") is not False
            assert call_kwargs["endpoint_url"] == "https://minio.example.com"

    def test_aws_s3_without_endpoint_url(self) -> None:
        """Test that regular AWS S3 doesn't include endpoint URL or custom config"""
        with patch("boto3.client") as mock_boto3:
            file_store = S3BackedFileStore(
                bucket_name="test-bucket",
                aws_access_key_id="test-key",
                aws_secret_access_key="test-secret",
                aws_region_name="us-west-2",
                s3_endpoint_url=None,
            )
            file_store._get_s3_client()

            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]

            # For regular AWS S3, endpoint_url should not be present
            assert "endpoint_url" not in call_kwargs
            assert call_kwargs["service_name"] == "s3"
            assert call_kwargs["region_name"] == "us-west-2"
            # config should not be present for regular AWS S3
            assert "config" not in call_kwargs


class TestFileStoreInterface:
    """Test the general file store interface"""

    def test_file_store_s3_when_configured(self) -> None:
        """Test that S3 file store is returned when configured"""
        with patch("onyx.configs.app_configs.FILE_STORE_BACKEND", "s3"):
            file_store = get_default_file_store()
            assert isinstance(file_store, S3BackedFileStore)

    def test_file_store_postgres_when_configured(self) -> None:
        """Test that Postgres file store is returned when configured"""
        from onyx.file_store.postgres_file_store import PostgresBackedFileStore

        with patch("onyx.configs.app_configs.FILE_STORE_BACKEND", "postgres"):
            file_store = get_default_file_store()
            assert isinstance(file_store, PostgresBackedFileStore)

    def test_file_store_defaults_to_s3(self) -> None:
        """Test that the default backend is s3"""
        file_store = get_default_file_store()
        assert isinstance(file_store, S3BackedFileStore)


================================================
FILE: backend/tests/unit/file_store/test_postgres_file_store.py
================================================
"""Unit tests for PostgresBackedFileStore.

These tests mock the database layer (sessions, raw connections, large objects)
so they run without any external services.
"""

from io import BytesIO
from io import StringIO
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.configs.constants import FileOrigin
from onyx.file_store.postgres_file_store import POSTGRES_BUCKET_SENTINEL
from onyx.file_store.postgres_file_store import PostgresBackedFileStore


@pytest.fixture
def store() -> PostgresBackedFileStore:
    return PostgresBackedFileStore()


def _make_session_ctx(
    mock_session: MagicMock,
) -> Any:
    """Build a context-manager mock that yields mock_session."""
    from contextlib import contextmanager

    @contextmanager
    def _ctx(session: Any = None):  # type: ignore
        yield session if session is not None else mock_session

    return _ctx


def _mock_lobject(oid: int = 42, data: bytes = b"hello") -> MagicMock:
    """Return a mock lobject factory that the raw connection exposes."""
    lobj = MagicMock()
    lobj.oid = oid
    lobj.read = MagicMock(side_effect=[data, b""])
    lobj.write = MagicMock()
    lobj.close = MagicMock()
    lobj.unlink = MagicMock()
    return lobj


class TestInitialize:
    def test_initialize_is_noop(self, store: PostgresBackedFileStore) -> None:
        # Should not raise
        store.initialize()


class TestSaveFile:
    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_save_bytes_content(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        raw_conn = MagicMock()
        lobj = _mock_lobject(oid=99)
        raw_conn.lobject.return_value = lobj
        mock_session.connection.return_value.connection.dbapi_connection = raw_conn

        with (
            patch(
                "onyx.file_store.postgres_file_store.upsert_filerecord"
            ) as mock_upsert_fr,
            patch(
                "onyx.file_store.postgres_file_store.upsert_file_content"
            ) as mock_upsert_fc,
        ):
            content = BytesIO(b"test data")
            file_id = store.save_file(
                content=content,
                display_name="test.txt",
                file_origin=FileOrigin.OTHER,
                file_type="text/plain",
                file_id="my-file-id",
                db_session=mock_session,
            )

        assert file_id == "my-file-id"
        lobj.write.assert_called_once_with(b"test data")

        mock_upsert_fr.assert_called_once()
        fr_kwargs = mock_upsert_fr.call_args[1]
        assert fr_kwargs["file_id"] == "my-file-id"
        assert fr_kwargs["bucket_name"] == POSTGRES_BUCKET_SENTINEL
        assert fr_kwargs["object_key"] == "99"

        mock_upsert_fc.assert_called_once()
        fc_kwargs = mock_upsert_fc.call_args[1]
        assert fc_kwargs["lobj_oid"] == 99
        assert fc_kwargs["file_size"] == len(b"test data")

    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_save_string_io_content(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        """StringIO content should be encoded to UTF-8 bytes."""
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        raw_conn = MagicMock()
        lobj = _mock_lobject(oid=50)
        raw_conn.lobject.return_value = lobj
        mock_session.connection.return_value.connection.dbapi_connection = raw_conn

        with (
            patch("onyx.file_store.postgres_file_store.upsert_filerecord"),
            patch("onyx.file_store.postgres_file_store.upsert_file_content"),
        ):
            content = StringIO("text content")
            file_id = store.save_file(
                content=content,
                display_name="doc.txt",
                file_origin=FileOrigin.OTHER,
                file_type="text/plain",
                db_session=mock_session,
            )

        # Should have generated a UUID file_id
        assert file_id is not None
        lobj.write.assert_called_once_with(b"text content")

    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_save_rolls_back_on_error(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        raw_conn = MagicMock()
        raw_conn.lobject.side_effect = RuntimeError("pg error")
        mock_session.connection.return_value.connection.dbapi_connection = raw_conn

        with pytest.raises(RuntimeError, match="pg error"):
            store.save_file(
                content=BytesIO(b"data"),
                display_name="fail.txt",
                file_origin=FileOrigin.OTHER,
                file_type="text/plain",
                db_session=mock_session,
            )
        mock_session.rollback.assert_called_once()


class TestReadFile:
    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_read_file_in_memory(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        mock_record = MagicMock()
        mock_record.lobj_oid = 42

        raw_conn = MagicMock()
        lobj = _mock_lobject(oid=42, data=b"file contents")
        raw_conn.lobject.return_value = lobj
        mock_session.connection.return_value.connection.dbapi_connection = raw_conn

        with patch(
            "onyx.file_store.postgres_file_store.get_file_content_by_file_id",
            return_value=mock_record,
        ):
            result = store.read_file("my-file", db_session=mock_session)

        assert result.read() == b"file contents"


class TestDeleteFile:
    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_delete_removes_lobject_and_records(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        mock_record = MagicMock()
        mock_record.lobj_oid = 77

        raw_conn = MagicMock()
        lobj = _mock_lobject(oid=77)
        raw_conn.lobject.return_value = lobj
        mock_session.connection.return_value.connection.dbapi_connection = raw_conn

        with (
            patch(
                "onyx.file_store.postgres_file_store.get_file_content_by_file_id",
                return_value=mock_record,
            ),
            patch(
                "onyx.file_store.postgres_file_store.delete_file_content_by_file_id"
            ) as mock_del_fc,
            patch(
                "onyx.file_store.postgres_file_store.delete_filerecord_by_file_id"
            ) as mock_del_fr,
        ):
            store.delete_file("file-77", db_session=mock_session)

        lobj.unlink.assert_called_once()
        mock_del_fc.assert_called_once()
        mock_del_fr.assert_called_once()
        mock_session.commit.assert_called_once()


class TestGetFileSize:
    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_returns_stored_size(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        mock_record = MagicMock()
        mock_record.file_size = 1024

        with patch(
            "onyx.file_store.postgres_file_store.get_file_content_by_file_id",
            return_value=mock_record,
        ):
            size = store.get_file_size("file-1", db_session=mock_session)

        assert size == 1024

    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_returns_none_on_error(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        with patch(
            "onyx.file_store.postgres_file_store.get_file_content_by_file_id",
            side_effect=RuntimeError("not found"),
        ):
            size = store.get_file_size("missing", db_session=mock_session)

        assert size is None


class TestChangeFileId:
    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_reuses_same_lobject(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        """Changing file ID should reuse the same large object (no copy)."""
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        old_fr = MagicMock()
        old_fr.display_name = "doc.pdf"
        old_fr.file_origin = FileOrigin.OTHER
        old_fr.file_type = "application/pdf"
        old_fr.file_metadata = None
        old_fr.object_key = "55"

        with (
            patch(
                "onyx.file_store.postgres_file_store.get_filerecord_by_file_id",
                return_value=old_fr,
            ),
            patch(
                "onyx.file_store.postgres_file_store.upsert_filerecord"
            ) as mock_upsert_fr,
            patch(
                "onyx.file_store.postgres_file_store.transfer_file_content_file_id"
            ) as mock_transfer,
            patch("onyx.file_store.postgres_file_store.delete_filerecord_by_file_id"),
        ):
            store.change_file_id("old-id", "new-id", db_session=mock_session)

        # file_content row should be moved in-place via transfer
        transfer_kwargs = mock_transfer.call_args[1]
        assert transfer_kwargs["old_file_id"] == "old-id"
        assert transfer_kwargs["new_file_id"] == "new-id"

        # New file_record should preserve the same object_key (LO OID)
        fr_kwargs = mock_upsert_fr.call_args[1]
        assert fr_kwargs["file_id"] == "new-id"
        assert fr_kwargs["object_key"] == "55"


class TestHasFile:
    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_returns_true_when_present(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        record = MagicMock()
        record.file_origin = FileOrigin.OTHER
        record.file_type = "text/plain"

        with patch(
            "onyx.file_store.postgres_file_store.get_filerecord_by_file_id_optional",
            return_value=record,
        ):
            assert store.has_file(
                "f1", FileOrigin.OTHER, "text/plain", db_session=mock_session
            )

    @patch(
        "onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none"
    )
    def test_returns_false_when_absent(
        self,
        mock_get_session: MagicMock,
        store: PostgresBackedFileStore,
    ) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value = _make_session_ctx(mock_session)(None)

        with patch(
            "onyx.file_store.postgres_file_store.get_filerecord_by_file_id_optional",
            return_value=None,
        ):
            assert not store.has_file(
                "missing", FileOrigin.OTHER, "text/plain", db_session=mock_session
            )


class TestReadContentBytes:
    def test_bytes_passthrough(self) -> None:
        result = PostgresBackedFileStore._read_content_bytes(BytesIO(b"raw"))
        assert result == b"raw"

    def test_string_encoded_to_utf8(self) -> None:
        result = PostgresBackedFileStore._read_content_bytes(StringIO("hello"))
        assert result == b"hello"


================================================
FILE: backend/tests/unit/model_server/test_embedding.py
================================================
import asyncio
import time
from typing import Any
from typing import List
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from model_server.encoders import embed_text
from model_server.encoders import process_embed_request
from shared_configs.enums import EmbedTextType
from shared_configs.model_server_models import EmbedRequest


@pytest.mark.asyncio
async def test_embed_text_no_model_name() -> None:
    # Test that the function raises an error when no model name is provided
    with pytest.raises(
        ValueError,
        match="Model name must be provided to run embeddings",
    ):
        await embed_text(
            texts=["test1", "test2"],
            model_name=None,
            max_context_length=512,
            normalize_embeddings=True,
            prefix=None,
        )


@pytest.mark.asyncio
async def test_embed_text_local_model() -> None:
    with patch("model_server.encoders.get_embedding_model") as mock_get_model:
        mock_model = MagicMock()
        mock_model.encode.return_value = [[0.1, 0.2], [0.3, 0.4]]
        mock_get_model.return_value = mock_model

        result = await embed_text(
            texts=["test1", "test2"],
            model_name="fake-local-model",
            max_context_length=512,
            normalize_embeddings=True,
            prefix=None,
        )

        assert result == [[0.1, 0.2], [0.3, 0.4]]
        mock_model.encode.assert_called_once()


@pytest.mark.asyncio
async def test_concurrent_embeddings() -> None:
    def mock_encode(
        *args: Any, **kwargs: Any  # noqa: ARG001
    ) -> List[List[float]]:  # noqa: ARG001
        time.sleep(5)
        return [[0.1, 0.2, 0.3]]

    test_req = EmbedRequest(
        texts=["test"],
        model_name="'nomic-ai/nomic-embed-text-v1'",
        deployment_name=None,
        max_context_length=512,
        normalize_embeddings=True,
        api_key=None,
        provider_type=None,
        text_type=EmbedTextType.QUERY,
        manual_query_prefix=None,
        manual_passage_prefix=None,
        api_url=None,
        api_version=None,
        reduced_dimension=None,
    )

    with patch("model_server.encoders.get_embedding_model") as mock_get_model:
        mock_model = MagicMock()
        mock_model.encode = mock_encode
        mock_get_model.return_value = mock_model
        start_time = time.time()

        tasks = [process_embed_request(test_req) for _ in range(5)]
        await asyncio.gather(*tasks)

        end_time = time.time()

        # 5 * 5 seconds = 25 seconds, this test ensures that the embeddings are at least yielding the thread
        # However, the developer may still introduce unnecessary blocking above the mock and this test will
        # still pass as long as it's less than (7 - 5) / 5 seconds
        assert end_time - start_time < 7


================================================
FILE: backend/tests/unit/onyx/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/access/test_user_file_access.py
================================================
"""Tests for user file ACL computation, including shared persona access."""

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

from onyx.access.access import collect_user_file_access
from onyx.access.access import get_access_for_user_files_impl
from onyx.access.utils import prefix_user_email
from onyx.configs.constants import PUBLIC_DOC_PAT


def _make_user(email: str) -> MagicMock:
    user = MagicMock()
    user.email = email
    user.id = uuid4()
    return user


def _make_persona(
    *,
    owner: MagicMock | None = None,
    shared_users: list[MagicMock] | None = None,
    is_public: bool = False,
    deleted: bool = False,
) -> MagicMock:
    persona = MagicMock()
    persona.deleted = deleted
    persona.is_public = is_public
    persona.user_id = owner.id if owner else None
    persona.user = owner
    persona.users = shared_users or []
    return persona


def _make_user_file(
    *,
    owner: MagicMock,
    assistants: list[MagicMock] | None = None,
) -> MagicMock:
    uf = MagicMock()
    uf.id = uuid4()
    uf.user = owner
    uf.user_id = owner.id
    uf.assistants = assistants or []
    return uf


class TestCollectUserFileAccess:
    def test_owner_only(self) -> None:
        owner = _make_user("owner@test.com")
        uf = _make_user_file(owner=owner)

        emails, is_public = collect_user_file_access(uf)

        assert emails == {"owner@test.com"}
        assert is_public is False

    def test_shared_persona_adds_users(self) -> None:
        owner = _make_user("owner@test.com")
        shared = _make_user("shared@test.com")
        persona = _make_persona(owner=owner, shared_users=[shared])
        uf = _make_user_file(owner=owner, assistants=[persona])

        emails, is_public = collect_user_file_access(uf)

        assert emails == {"owner@test.com", "shared@test.com"}
        assert is_public is False

    def test_persona_owner_added(self) -> None:
        """Persona owner (different from file owner) gets access too."""
        file_owner = _make_user("file-owner@test.com")
        persona_owner = _make_user("persona-owner@test.com")
        persona = _make_persona(owner=persona_owner)
        uf = _make_user_file(owner=file_owner, assistants=[persona])

        emails, is_public = collect_user_file_access(uf)

        assert "file-owner@test.com" in emails
        assert "persona-owner@test.com" in emails

    def test_public_persona_makes_file_public(self) -> None:
        owner = _make_user("owner@test.com")
        persona = _make_persona(owner=owner, is_public=True)
        uf = _make_user_file(owner=owner, assistants=[persona])

        emails, is_public = collect_user_file_access(uf)

        assert is_public is True
        assert "owner@test.com" in emails

    def test_deleted_persona_ignored(self) -> None:
        owner = _make_user("owner@test.com")
        shared = _make_user("shared@test.com")
        persona = _make_persona(owner=owner, shared_users=[shared], deleted=True)
        uf = _make_user_file(owner=owner, assistants=[persona])

        emails, is_public = collect_user_file_access(uf)

        assert emails == {"owner@test.com"}
        assert is_public is False

    def test_multiple_personas_combine(self) -> None:
        owner = _make_user("owner@test.com")
        user_a = _make_user("a@test.com")
        user_b = _make_user("b@test.com")
        p1 = _make_persona(owner=owner, shared_users=[user_a])
        p2 = _make_persona(owner=owner, shared_users=[user_b])
        uf = _make_user_file(owner=owner, assistants=[p1, p2])

        emails, is_public = collect_user_file_access(uf)

        assert emails == {"owner@test.com", "a@test.com", "b@test.com"}

    def test_deduplication(self) -> None:
        owner = _make_user("owner@test.com")
        shared = _make_user("shared@test.com")
        p1 = _make_persona(owner=owner, shared_users=[shared])
        p2 = _make_persona(owner=owner, shared_users=[shared])
        uf = _make_user_file(owner=owner, assistants=[p1, p2])

        emails, _ = collect_user_file_access(uf)

        assert emails == {"owner@test.com", "shared@test.com"}


class TestGetAccessForUserFiles:
    def test_shared_user_in_acl(self) -> None:
        """Shared persona users should appear in the ACL."""
        owner = _make_user("owner@test.com")
        shared = _make_user("shared@test.com")
        persona = _make_persona(owner=owner, shared_users=[shared])
        uf = _make_user_file(owner=owner, assistants=[persona])

        db_session = MagicMock()
        with patch(
            "onyx.access.access.fetch_user_files_with_access_relationships",
            return_value=[uf],
        ):
            result = get_access_for_user_files_impl([str(uf.id)], db_session)

        access = result[str(uf.id)]
        acl = access.to_acl()
        assert prefix_user_email("owner@test.com") in acl
        assert prefix_user_email("shared@test.com") in acl
        assert access.is_public is False

    def test_public_persona_sets_public_acl(self) -> None:
        owner = _make_user("owner@test.com")
        persona = _make_persona(owner=owner, is_public=True)
        uf = _make_user_file(owner=owner, assistants=[persona])

        db_session = MagicMock()
        with patch(
            "onyx.access.access.fetch_user_files_with_access_relationships",
            return_value=[uf],
        ):
            result = get_access_for_user_files_impl([str(uf.id)], db_session)

        access = result[str(uf.id)]
        assert access.is_public is True
        acl = access.to_acl()
        assert PUBLIC_DOC_PAT in acl


================================================
FILE: backend/tests/unit/onyx/auth/conftest.py
================================================
from unittest.mock import AsyncMock
from unittest.mock import MagicMock

import pytest

from onyx.db.models import OAuthAccount
from onyx.db.models import User


@pytest.fixture
def mock_user() -> MagicMock:
    """Creates a mock User instance for testing."""
    user = MagicMock(spec=User)
    user.email = "test@example.com"
    user.id = "test-user-id"
    return user


@pytest.fixture
def mock_oauth_account() -> MagicMock:
    """Creates a mock OAuthAccount instance for testing."""
    oauth_account = MagicMock(spec=OAuthAccount)
    oauth_account.oauth_name = "google"
    oauth_account.refresh_token = "test-refresh-token"
    oauth_account.access_token = "test-access-token"
    oauth_account.expires_at = None
    return oauth_account


@pytest.fixture
def mock_user_manager() -> MagicMock:
    """Creates a mock user manager for testing."""
    user_manager = MagicMock()
    user_manager.user_db = MagicMock()
    user_manager.user_db.update_oauth_account = AsyncMock()
    user_manager.user_db.update = AsyncMock()
    return user_manager


@pytest.fixture
def mock_db_session() -> MagicMock:
    """Creates a mock database session for testing."""
    return MagicMock()


================================================
FILE: backend/tests/unit/onyx/auth/test_disposable_email_validator.py
================================================
"""
Tests for disposable email validation.
"""

from onyx.auth.disposable_email_validator import DisposableEmailValidator
from onyx.auth.disposable_email_validator import is_disposable_email


class TestDisposableEmailValidator:
    """Test the DisposableEmailValidator class."""

    def test_singleton_pattern(self) -> None:
        """Test that DisposableEmailValidator is a singleton."""
        validator1 = DisposableEmailValidator()
        validator2 = DisposableEmailValidator()
        assert validator1 is validator2

    def test_fallback_domains_included(self) -> None:
        """Test that fallback domains are always included."""
        validator = DisposableEmailValidator()
        domains = validator.get_domains()

        # Check that our hardcoded fallback domains are present
        assert "trashlify.com" in domains
        assert "10minutemail.com" in domains
        assert "guerrillamail.com" in domains
        assert "mailinator.com" in domains
        assert "tempmail.com" in domains
        assert "throwaway.email" in domains
        assert "yopmail.com" in domains

    def test_is_disposable_trashlify(self) -> None:
        """Test that trashlify.com emails are detected as disposable."""
        assert is_disposable_email("test@trashlify.com") is True
        assert is_disposable_email("user123@trashlify.com") is True
        assert is_disposable_email("4q4k99yca1@trashlify.com") is True

    def test_is_disposable_other_known_domains(self) -> None:
        """Test detection of other known disposable domains."""
        disposable_emails = [
            "test@10minutemail.com",
            "user@guerrillamail.com",
            "temp@mailinator.com",
            "fake@tempmail.com",
            "throw@throwaway.email",
            "yop@yopmail.com",
        ]

        for email in disposable_emails:
            assert is_disposable_email(email) is True, f"{email} should be disposable"

    def test_is_not_disposable_legitimate_domains(self) -> None:
        """Test that legitimate email domains are not flagged."""
        legitimate_emails = [
            "user@gmail.com",
            "employee@company.com",
            "admin@onyx.app",
            "test@outlook.com",
            "person@yahoo.com",
            "contact@protonmail.com",
        ]

        for email in legitimate_emails:
            assert (
                is_disposable_email(email) is False
            ), f"{email} should not be disposable"

    def test_case_insensitive(self) -> None:
        """Test that domain checking is case-insensitive."""
        assert is_disposable_email("test@TRASHLIFY.COM") is True
        assert is_disposable_email("test@Trashlify.Com") is True
        assert is_disposable_email("test@TrAsHlIfY.cOm") is True

    def test_invalid_email_formats(self) -> None:
        """Test handling of invalid email formats."""
        assert is_disposable_email("") is False
        assert is_disposable_email("notanemail") is False
        assert is_disposable_email("@trashlify.com") is False
        assert is_disposable_email("test@") is False
        assert is_disposable_email("@") is False

    def test_email_with_subdomains(self) -> None:
        """Test that emails with subdomains are handled correctly."""
        # The domain should be the last part after @
        assert is_disposable_email("user@mail.trashlify.com") is False
        # Only exact domain matches should trigger

    def test_validator_instance_methods(self) -> None:
        """Test the validator instance methods directly."""
        validator = DisposableEmailValidator()

        # Test is_disposable method
        assert validator.is_disposable("test@trashlify.com") is True
        assert validator.is_disposable("test@gmail.com") is False

        # Test invalid inputs
        assert validator.is_disposable("") is False
        assert validator.is_disposable("invalid") is False
        assert validator.is_disposable("@trashlify.com") is False


================================================
FILE: backend/tests/unit/onyx/auth/test_email.py
================================================
import pytest

from onyx.auth.email_utils import build_user_email_invite
from onyx.auth.email_utils import send_email
from onyx.configs.constants import AuthType
from onyx.configs.constants import ONYX_DEFAULT_APPLICATION_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.server.runtime.onyx_runtime import OnyxRuntime


@pytest.mark.skip(
    reason="This sends real emails, so only run when you really want to test this!"
)
def test_send_user_email_invite() -> None:
    SqlEngine.init_engine(pool_size=20, max_overflow=5)

    application_name = ONYX_DEFAULT_APPLICATION_NAME

    onyx_file = OnyxRuntime.get_emailable_logo()

    subject = f"Invitation to Join {application_name} Organization"

    FROM_EMAIL = "noreply@onyx.app"
    TO_EMAIL = "support@onyx.app"
    text_content, html_content = build_user_email_invite(
        FROM_EMAIL, TO_EMAIL, ONYX_DEFAULT_APPLICATION_NAME, AuthType.CLOUD
    )

    send_email(
        TO_EMAIL,
        subject,
        html_content,
        text_content,
        mail_from=FROM_EMAIL,
        inline_png=("logo.png", onyx_file.data),
    )


================================================
FILE: backend/tests/unit/onyx/auth/test_is_same_origin.py
================================================
import pytest

from onyx.auth.users import _is_same_origin


class TestExactMatch:
    """Origins that are textually identical should always match."""

    @pytest.mark.parametrize(
        "origin",
        [
            "http://localhost:3000",
            "https://app.example.com",
            "https://app.example.com:8443",
            "http://127.0.0.1:8080",
        ],
    )
    def test_identical_origins(self, origin: str) -> None:
        assert _is_same_origin(origin, origin)


class TestLoopbackPortRelaxation:
    """On loopback addresses, port differences should be ignored."""

    @pytest.mark.parametrize(
        "actual,expected",
        [
            ("http://localhost:3001", "http://localhost:3000"),
            ("http://localhost:8080", "http://localhost:3000"),
            ("http://localhost", "http://localhost:3000"),
            ("http://127.0.0.1:3001", "http://127.0.0.1:3000"),
            ("http://[::1]:3001", "http://[::1]:3000"),
        ],
    )
    def test_loopback_different_ports_accepted(
        self, actual: str, expected: str
    ) -> None:
        assert _is_same_origin(actual, expected)

    @pytest.mark.parametrize(
        "actual,expected",
        [
            ("https://localhost:3001", "http://localhost:3000"),
            ("http://localhost:3001", "https://localhost:3000"),
        ],
    )
    def test_loopback_different_scheme_rejected(
        self, actual: str, expected: str
    ) -> None:
        assert not _is_same_origin(actual, expected)

    def test_loopback_hostname_mismatch_rejected(self) -> None:
        assert not _is_same_origin("http://localhost:3001", "http://127.0.0.1:3000")


class TestNonLoopbackStrictPort:
    """Non-loopback origins must match scheme, hostname, AND port."""

    def test_different_port_rejected(self) -> None:
        assert not _is_same_origin(
            "https://app.example.com:8443", "https://app.example.com"
        )

    def test_different_hostname_rejected(self) -> None:
        assert not _is_same_origin("https://evil.com", "https://app.example.com")

    def test_different_scheme_rejected(self) -> None:
        assert not _is_same_origin("http://app.example.com", "https://app.example.com")

    def test_same_port_explicit(self) -> None:
        assert _is_same_origin(
            "https://app.example.com:443", "https://app.example.com:443"
        )


class TestDefaultPortNormalization:
    """Port should be normalized so that omitted default port == explicit default port."""

    def test_http_implicit_vs_explicit_80(self) -> None:
        assert _is_same_origin("http://example.com", "http://example.com:80")

    def test_http_explicit_80_vs_implicit(self) -> None:
        assert _is_same_origin("http://example.com:80", "http://example.com")

    def test_https_implicit_vs_explicit_443(self) -> None:
        assert _is_same_origin("https://example.com", "https://example.com:443")

    def test_https_explicit_443_vs_implicit(self) -> None:
        assert _is_same_origin("https://example.com:443", "https://example.com")

    def test_http_non_default_port_vs_implicit_rejected(self) -> None:
        assert not _is_same_origin("http://example.com:8080", "http://example.com")


class TestTrailingSlash:
    """Trailing slashes should not affect comparison."""

    def test_trailing_slash_on_actual(self) -> None:
        assert _is_same_origin("https://app.example.com/", "https://app.example.com")

    def test_trailing_slash_on_expected(self) -> None:
        assert _is_same_origin("https://app.example.com", "https://app.example.com/")

    def test_trailing_slash_on_both(self) -> None:
        assert _is_same_origin("https://app.example.com/", "https://app.example.com/")


class TestCSWSHScenarios:
    """Realistic attack scenarios that must be rejected."""

    def test_remote_attacker_rejected(self) -> None:
        assert not _is_same_origin("https://evil.com", "http://localhost:3000")

    def test_remote_attacker_same_port_rejected(self) -> None:
        assert not _is_same_origin("http://evil.com:3000", "http://localhost:3000")

    def test_remote_attacker_matching_hostname_different_port(self) -> None:
        assert not _is_same_origin(
            "https://app.example.com:9999", "https://app.example.com"
        )


================================================
FILE: backend/tests/unit/onyx/auth/test_jwt_provisioning.py
================================================
from datetime import datetime
from datetime import timezone
from typing import Any
from unittest.mock import AsyncMock
from unittest.mock import MagicMock

import pytest

from onyx.auth import users as users_module


def test_extract_email_requires_valid_format() -> None:
    """Helper should validate email format before returning value."""
    assert users_module._extract_email_from_jwt({"email": "invalid@"}) is None
    result = users_module._extract_email_from_jwt(
        {"preferred_username": "ValidUser@Example.COM"}
    )
    assert result == "validuser@example.com"


@pytest.mark.asyncio
async def test_get_or_create_user_updates_expiry(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Existing web-login users should be returned and their expiry synced."""
    monkeypatch.setattr(users_module, "TRACK_EXTERNAL_IDP_EXPIRY", True)
    invited_checked: dict[str, str] = {}

    def mark_invited(value: str) -> None:
        invited_checked["email"] = value

    domain_checked: dict[str, str] = {}

    def mark_domain(value: str) -> None:
        domain_checked["email"] = value

    monkeypatch.setattr(users_module, "verify_email_is_invited", mark_invited)
    monkeypatch.setattr(users_module, "verify_email_domain", mark_domain)
    email = "jwt-user@example.com"
    exp_value = 1_700_000_000
    payload: dict[str, Any] = {"email": email, "exp": exp_value}

    existing_user = MagicMock()
    existing_user.email = email
    existing_user.oidc_expiry = None
    existing_user.role.is_web_login.return_value = True

    manager_holder: dict[str, Any] = {}

    class StubUserManager:
        def __init__(self, _user_db: object) -> None:
            manager_holder["instance"] = self
            self.user_db = MagicMock()
            self.user_db.update = AsyncMock()

        async def get_by_email(self, email_arg: str) -> MagicMock:
            assert email_arg == email
            return existing_user

    monkeypatch.setattr(users_module, "UserManager", StubUserManager)
    monkeypatch.setattr(
        users_module,
        "SQLAlchemyUserAdminDB",
        lambda *args, **kwargs: MagicMock(),  # noqa: ARG005
    )

    result = await users_module._get_or_create_user_from_jwt(
        payload, MagicMock(), MagicMock()
    )

    assert result is existing_user
    assert invited_checked["email"] == email
    assert domain_checked["email"] == email
    expected_expiry = datetime.fromtimestamp(exp_value, tz=timezone.utc)
    instance = manager_holder["instance"]
    instance.user_db.update.assert_awaited_once_with(
        existing_user, {"oidc_expiry": expected_expiry}
    )
    assert existing_user.oidc_expiry == expected_expiry


@pytest.mark.asyncio
async def test_get_or_create_user_skips_inactive(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Inactive users should not be re-authenticated via JWT."""
    monkeypatch.setattr(users_module, "TRACK_EXTERNAL_IDP_EXPIRY", True)
    monkeypatch.setattr(users_module, "verify_email_is_invited", lambda _: None)
    monkeypatch.setattr(users_module, "verify_email_domain", lambda *_a, **_kw: None)

    email = "inactive@example.com"
    payload: dict[str, Any] = {"email": email}

    existing_user = MagicMock()
    existing_user.email = email
    existing_user.is_active = False
    existing_user.role.is_web_login.return_value = True

    class StubUserManager:
        def __init__(self, _user_db: object) -> None:
            self.user_db = MagicMock()
            self.user_db.update = AsyncMock()

        async def get_by_email(self, email_arg: str) -> MagicMock:
            assert email_arg == email
            return existing_user

    monkeypatch.setattr(users_module, "UserManager", StubUserManager)
    monkeypatch.setattr(
        users_module,
        "SQLAlchemyUserAdminDB",
        lambda *args, **kwargs: MagicMock(),  # noqa: ARG005
    )

    result = await users_module._get_or_create_user_from_jwt(
        payload, MagicMock(), MagicMock()
    )

    assert result is None


@pytest.mark.asyncio
async def test_get_or_create_user_handles_race_conditions(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """If provisioning races, newly inactive users should still be blocked."""
    monkeypatch.setattr(users_module, "TRACK_EXTERNAL_IDP_EXPIRY", True)
    monkeypatch.setattr(users_module, "verify_email_is_invited", lambda _: None)
    monkeypatch.setattr(users_module, "verify_email_domain", lambda *_a, **_kw: None)

    email = "race@example.com"
    payload: dict[str, Any] = {"email": email}

    inactive_user = MagicMock()
    inactive_user.email = email
    inactive_user.is_active = False
    inactive_user.role.is_web_login.return_value = True

    class StubUserManager:
        def __init__(self, _user_db: object) -> None:
            self.user_db = MagicMock()
            self.user_db.update = AsyncMock()
            self.get_calls = 0

        async def get_by_email(self, email_arg: str) -> MagicMock:
            assert email_arg == email
            if self.get_calls == 0:
                self.get_calls += 1
                raise users_module.exceptions.UserNotExists()
            self.get_calls += 1
            return inactive_user

        async def create(self, *args: Any, **kwargs: Any) -> MagicMock:  # noqa: ARG002
            raise users_module.exceptions.UserAlreadyExists()

    monkeypatch.setattr(users_module, "UserManager", StubUserManager)
    monkeypatch.setattr(
        users_module,
        "SQLAlchemyUserAdminDB",
        lambda *args, **kwargs: MagicMock(),  # noqa: ARG005
    )

    result = await users_module._get_or_create_user_from_jwt(
        payload, MagicMock(), MagicMock()
    )

    assert result is None


@pytest.mark.asyncio
async def test_get_or_create_user_provisions_new_user(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """A brand new JWT user should be provisioned automatically."""
    email = "new-user@example.com"
    payload = {"email": email}
    created_user = MagicMock()
    created_user.email = email
    created_user.oidc_expiry = None
    created_user.role.is_web_login.return_value = True

    monkeypatch.setattr(users_module, "TRACK_EXTERNAL_IDP_EXPIRY", False)
    monkeypatch.setattr(users_module, "generate_password", lambda: "TempPass123!")
    monkeypatch.setattr(users_module, "verify_email_is_invited", lambda _: None)
    monkeypatch.setattr(users_module, "verify_email_domain", lambda *_a, **_kw: None)

    recorded: dict[str, Any] = {}

    class StubUserManager:
        def __init__(self, _user_db: object) -> None:
            recorded["instance"] = self
            self.user_db = MagicMock()
            self.user_db.update = AsyncMock()

        async def get_by_email(self, _email: str) -> MagicMock:
            raise users_module.exceptions.UserNotExists()

        async def create(self, user_create, safe=False, request=None):  # type: ignore[no-untyped-def]  # noqa: ARG002
            recorded["user_create"] = user_create
            recorded["request"] = request
            return created_user

    monkeypatch.setattr(users_module, "UserManager", StubUserManager)
    monkeypatch.setattr(
        users_module,
        "SQLAlchemyUserAdminDB",
        lambda *args, **kwargs: MagicMock(),  # noqa: ARG005
    )

    request = MagicMock()
    result = await users_module._get_or_create_user_from_jwt(
        payload, request, MagicMock()
    )

    assert result is created_user
    created_payload = recorded["user_create"]
    assert created_payload.email == email
    assert created_payload.is_verified is True
    assert recorded["request"] is request


@pytest.mark.asyncio
async def test_get_or_create_user_requires_email_claim() -> None:
    """Tokens without a usable email claim should be ignored."""
    result = await users_module._get_or_create_user_from_jwt(
        {}, MagicMock(), MagicMock()
    )
    assert result is None


================================================
FILE: backend/tests/unit/onyx/auth/test_oauth_refresher.py
================================================
from datetime import datetime
from datetime import timezone
from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from sqlalchemy.ext.asyncio import AsyncSession

from onyx.auth.oauth_refresher import _test_expire_oauth_token
from onyx.auth.oauth_refresher import check_and_refresh_oauth_tokens
from onyx.auth.oauth_refresher import check_oauth_account_has_refresh_token
from onyx.auth.oauth_refresher import get_oauth_accounts_requiring_refresh_token
from onyx.auth.oauth_refresher import refresh_oauth_token
from onyx.db.models import OAuthAccount


@pytest.mark.asyncio
async def test_refresh_oauth_token_success(
    mock_user: MagicMock,
    mock_oauth_account: MagicMock,
    mock_user_manager: MagicMock,
    mock_db_session: AsyncSession,
) -> None:
    """Test successful OAuth token refresh."""
    # Mock HTTP client and response
    mock_response = MagicMock()
    mock_response.status_code = 200
    mock_response.json.return_value = {
        "access_token": "new_token",
        "refresh_token": "new_refresh_token",
        "expires_in": 3600,
    }

    # Create async mock for the client post method
    mock_client = AsyncMock()
    mock_client.post.return_value = mock_response

    # Use fixture values but ensure refresh token exists
    mock_oauth_account.oauth_name = (
        "google"  # Ensure it's google to match the refresh endpoint
    )
    mock_oauth_account.refresh_token = "old_refresh_token"

    # Patch at the module level where it's actually being used
    with patch("onyx.auth.oauth_refresher.httpx.AsyncClient") as client_class_mock:
        # Configure the context manager
        client_instance = mock_client
        client_class_mock.return_value.__aenter__.return_value = client_instance

        # Call the function under test
        result = await refresh_oauth_token(
            mock_user, mock_oauth_account, mock_db_session, mock_user_manager
        )

    # Assertions
    assert result is True
    mock_client.post.assert_called_once()
    mock_user_manager.user_db.update_oauth_account.assert_called_once()

    # Verify token data was updated correctly
    update_data = mock_user_manager.user_db.update_oauth_account.call_args[0][2]
    assert update_data["access_token"] == "new_token"
    assert update_data["refresh_token"] == "new_refresh_token"
    assert "expires_at" in update_data


@pytest.mark.asyncio
async def test_refresh_oauth_token_failure(
    mock_user: MagicMock,
    mock_oauth_account: MagicMock,
    mock_user_manager: MagicMock,
    mock_db_session: AsyncSession,
) -> bool:
    """Test OAuth token refresh failure due to HTTP error."""
    # Mock HTTP client with error response
    mock_response = MagicMock()
    mock_response.status_code = 400  # Simulate error

    # Create async mock for the client post method
    mock_client = AsyncMock()
    mock_client.post.return_value = mock_response

    # Ensure refresh token exists and provider is supported
    mock_oauth_account.oauth_name = "google"
    mock_oauth_account.refresh_token = "old_refresh_token"

    # Patch at the module level where it's actually being used
    with patch("onyx.auth.oauth_refresher.httpx.AsyncClient") as client_class_mock:
        # Configure the context manager
        client_class_mock.return_value.__aenter__.return_value = mock_client

        # Call the function under test
        result = await refresh_oauth_token(
            mock_user, mock_oauth_account, mock_db_session, mock_user_manager
        )

    # Assertions
    assert result is False
    mock_client.post.assert_called_once()
    mock_user_manager.user_db.update_oauth_account.assert_not_called()
    return True


@pytest.mark.asyncio
async def test_refresh_oauth_token_no_refresh_token(
    mock_user: MagicMock,
    mock_oauth_account: MagicMock,
    mock_user_manager: MagicMock,
    mock_db_session: AsyncSession,
) -> None:
    """Test OAuth token refresh when no refresh token is available."""
    # Set refresh token to None
    mock_oauth_account.refresh_token = None
    mock_oauth_account.oauth_name = "google"

    # No need to mock httpx since it shouldn't be called
    result = await refresh_oauth_token(
        mock_user, mock_oauth_account, mock_db_session, mock_user_manager
    )

    # Assertions
    assert result is False


@pytest.mark.asyncio
async def test_check_and_refresh_oauth_tokens(
    mock_user: MagicMock,
    mock_user_manager: MagicMock,
    mock_db_session: AsyncSession,
) -> None:
    """Test checking and refreshing multiple OAuth tokens."""
    # Create mock user with OAuth accounts
    now_timestamp = datetime.now(timezone.utc).timestamp()

    # Create an account that needs refreshing (expiring soon)
    expiring_account = MagicMock(spec=OAuthAccount)
    expiring_account.oauth_name = "google"
    expiring_account.refresh_token = "refresh_token_1"
    expiring_account.expires_at = now_timestamp + 60  # Expires in 1 minute

    # Create an account that doesn't need refreshing (expires later)
    valid_account = MagicMock(spec=OAuthAccount)
    valid_account.oauth_name = "google"
    valid_account.refresh_token = "refresh_token_2"
    valid_account.expires_at = now_timestamp + 3600  # Expires in 1 hour

    # Create an account without a refresh token
    no_refresh_account = MagicMock(spec=OAuthAccount)
    no_refresh_account.oauth_name = "google"
    no_refresh_account.refresh_token = None
    no_refresh_account.expires_at = (
        now_timestamp + 60
    )  # Expiring soon but no refresh token

    # Set oauth_accounts on the mock user
    mock_user.oauth_accounts = [expiring_account, valid_account, no_refresh_account]

    # Mock refresh_oauth_token function
    with patch(
        "onyx.auth.oauth_refresher.refresh_oauth_token", AsyncMock(return_value=True)
    ) as mock_refresh:
        # Call the function under test
        await check_and_refresh_oauth_tokens(
            mock_user, mock_db_session, mock_user_manager
        )

    # Assertions
    assert mock_refresh.call_count == 1  # Should only refresh the expiring account
    # Check it was called with the expiring account
    mock_refresh.assert_called_once_with(
        mock_user, expiring_account, mock_db_session, mock_user_manager
    )


@pytest.mark.asyncio
async def test_get_oauth_accounts_requiring_refresh_token(mock_user: MagicMock) -> None:
    """Test identifying OAuth accounts that need refresh tokens."""
    # Create accounts with and without refresh tokens
    account_with_token = MagicMock(spec=OAuthAccount)
    account_with_token.oauth_name = "google"
    account_with_token.refresh_token = "refresh_token"

    account_without_token = MagicMock(spec=OAuthAccount)
    account_without_token.oauth_name = "google"
    account_without_token.refresh_token = None

    second_account_without_token = MagicMock(spec=OAuthAccount)
    second_account_without_token.oauth_name = "github"
    second_account_without_token.refresh_token = (
        ""  # Empty string should also be treated as missing
    )

    # Set accounts on user
    mock_user.oauth_accounts = [
        account_with_token,
        account_without_token,
        second_account_without_token,
    ]

    # Call the function under test
    accounts_needing_refresh = await get_oauth_accounts_requiring_refresh_token(
        mock_user
    )

    # Assertions
    assert len(accounts_needing_refresh) == 2
    assert account_without_token in accounts_needing_refresh
    assert second_account_without_token in accounts_needing_refresh
    assert account_with_token not in accounts_needing_refresh


@pytest.mark.asyncio
async def test_check_oauth_account_has_refresh_token(
    mock_user: MagicMock, mock_oauth_account: MagicMock
) -> None:
    """Test checking if an OAuth account has a refresh token."""
    # Test with refresh token
    mock_oauth_account.refresh_token = "refresh_token"
    has_token = await check_oauth_account_has_refresh_token(
        mock_user, mock_oauth_account
    )
    assert has_token is True

    # Test with None refresh token
    mock_oauth_account.refresh_token = None
    has_token = await check_oauth_account_has_refresh_token(
        mock_user, mock_oauth_account
    )
    assert has_token is False

    # Test with empty string refresh token
    mock_oauth_account.refresh_token = ""
    has_token = await check_oauth_account_has_refresh_token(
        mock_user, mock_oauth_account
    )
    assert has_token is False


@pytest.mark.asyncio
async def test_expire_oauth_token(
    mock_user: MagicMock,
    mock_oauth_account: MagicMock,
    mock_user_manager: MagicMock,
    mock_db_session: AsyncSession,
) -> None:
    """Tests the testing utility function for token expiration."""
    # Set up the mock account
    mock_oauth_account.oauth_name = "google"
    mock_oauth_account.refresh_token = "test_refresh_token"
    mock_oauth_account.access_token = "test_access_token"

    # Call the function under test
    result = await _test_expire_oauth_token(
        mock_user,
        mock_oauth_account,
        mock_db_session,
        mock_user_manager,
        expire_in_seconds=10,
    )

    # Assertions
    assert result is True
    mock_user_manager.user_db.update_oauth_account.assert_called_once()

    # Verify the expiration time was set correctly
    update_data = mock_user_manager.user_db.update_oauth_account.call_args[0][2]
    assert "expires_at" in update_data

    # Now should be within 10-11 seconds of the set expiration
    now = datetime.now(timezone.utc).timestamp()
    assert update_data["expires_at"] - now >= 8.8  # Allow ~1 second for test execution
    assert update_data["expires_at"] - now <= 11.2  # Allow ~1 second for test execution


================================================
FILE: backend/tests/unit/onyx/auth/test_oidc_pkce.py
================================================
from typing import Any
from typing import cast
from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch
from urllib.parse import parse_qs
from urllib.parse import urlparse

from fastapi import FastAPI
from fastapi import Response
from fastapi.testclient import TestClient
from fastapi_users.authentication import AuthenticationBackend
from fastapi_users.authentication import CookieTransport
from fastapi_users.jwt import generate_jwt
from httpx_oauth.oauth2 import BaseOAuth2
from httpx_oauth.oauth2 import GetAccessTokenError

from onyx.auth.users import CSRF_TOKEN_COOKIE_NAME
from onyx.auth.users import CSRF_TOKEN_KEY
from onyx.auth.users import get_oauth_router
from onyx.auth.users import get_pkce_cookie_name
from onyx.auth.users import PKCE_COOKIE_NAME_PREFIX
from onyx.auth.users import STATE_TOKEN_AUDIENCE
from onyx.error_handling.exceptions import register_onyx_exception_handlers


class _StubOAuthClient:
    def __init__(self) -> None:
        self.name = "openid"
        self.authorization_calls: list[dict[str, str | list[str] | None]] = []
        self.access_token_calls: list[dict[str, str | None]] = []

    async def get_authorization_url(
        self,
        redirect_uri: str,
        state: str | None = None,
        scope: list[str] | None = None,
        code_challenge: str | None = None,
        code_challenge_method: str | None = None,
    ) -> str:
        self.authorization_calls.append(
            {
                "redirect_uri": redirect_uri,
                "state": state,
                "scope": scope,
                "code_challenge": code_challenge,
                "code_challenge_method": code_challenge_method,
            }
        )
        return f"https://idp.example.com/authorize?state={state}"

    async def get_access_token(
        self, code: str, redirect_uri: str, code_verifier: str | None = None
    ) -> dict[str, str | int]:
        self.access_token_calls.append(
            {
                "code": code,
                "redirect_uri": redirect_uri,
                "code_verifier": code_verifier,
            }
        )
        return {
            "access_token": "oidc_access_token",
            "refresh_token": "oidc_refresh_token",
            "expires_at": 1730000000,
        }

    async def get_id_email(self, _access_token: str) -> tuple[str, str | None]:
        return ("oidc_account_id", "oidc_user@example.com")


def _build_test_client(
    enable_pkce: bool,
    login_status_code: int = 302,
) -> tuple[TestClient, _StubOAuthClient, MagicMock]:
    oauth_client = _StubOAuthClient()
    transport = CookieTransport(cookie_name="testsession")

    async def get_strategy() -> MagicMock:
        return MagicMock()

    backend = AuthenticationBackend(
        name="test_backend",
        transport=transport,
        get_strategy=get_strategy,
    )

    login_response = Response(status_code=login_status_code)
    if login_status_code in {301, 302, 303, 307, 308}:
        login_response.headers["location"] = "/app"
    login_response.set_cookie("testsession", "session-token")
    backend.login = AsyncMock(return_value=login_response)  # type: ignore[method-assign]

    user = MagicMock()
    user.is_active = True
    user_manager = MagicMock()
    user_manager.oauth_callback = AsyncMock(return_value=user)
    user_manager.on_after_login = AsyncMock()

    async def get_user_manager() -> MagicMock:
        return user_manager

    router = get_oauth_router(
        oauth_client=cast(BaseOAuth2[Any], oauth_client),
        backend=backend,
        get_user_manager=get_user_manager,
        state_secret="test-secret",
        redirect_url="http://localhost/auth/oidc/callback",
        associate_by_email=True,
        is_verified_by_default=True,
        enable_pkce=enable_pkce,
    )
    app = FastAPI()
    app.include_router(router, prefix="/auth/oidc")
    register_onyx_exception_handlers(app)

    client = TestClient(app, raise_server_exceptions=False)
    return client, oauth_client, user_manager


def _extract_state_from_authorize_response(response: Any) -> str:
    auth_url = response.json()["authorization_url"]
    return parse_qs(urlparse(auth_url).query)["state"][0]


def test_oidc_authorize_omits_pkce_when_flag_disabled() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=False)

    response = client.get("/auth/oidc/authorize")

    assert response.status_code == 200
    assert oauth_client.authorization_calls[0]["code_challenge"] is None
    assert oauth_client.authorization_calls[0]["code_challenge_method"] is None
    assert "fastapiusersoauthcsrf" in response.cookies.keys()
    assert not any(
        key.startswith(PKCE_COOKIE_NAME_PREFIX) for key in response.cookies.keys()
    )


def test_oidc_authorize_adds_pkce_when_flag_enabled() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=True)

    response = client.get("/auth/oidc/authorize")

    assert response.status_code == 200
    assert oauth_client.authorization_calls[0]["code_challenge"] is not None
    assert oauth_client.authorization_calls[0]["code_challenge_method"] == "S256"
    assert any(
        key.startswith(PKCE_COOKIE_NAME_PREFIX) for key in response.cookies.keys()
    )


def test_oidc_callback_fails_when_pkce_cookie_missing() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=True)
    authorize_response = client.get("/auth/oidc/authorize")
    state = _extract_state_from_authorize_response(authorize_response)

    for key in list(client.cookies.keys()):
        if key.startswith(PKCE_COOKIE_NAME_PREFIX):
            del client.cookies[key]

    response = client.get(
        "/auth/oidc/callback", params={"code": "abc123", "state": state}
    )

    assert response.status_code == 400
    assert response.json()["error_code"] == "VALIDATION_ERROR"
    assert oauth_client.access_token_calls == []
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_rejects_bad_state_before_token_exchange() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=True)
    client.get("/auth/oidc/authorize")
    tampered_state = "not-a-valid-state-jwt"
    client.cookies.set(get_pkce_cookie_name(tampered_state), "verifier123")

    response = client.get(
        "/auth/oidc/callback", params={"code": "abc123", "state": tampered_state}
    )

    assert response.status_code == 400
    assert response.json()["error_code"] == "VALIDATION_ERROR"
    assert oauth_client.access_token_calls == []
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_rejects_wrongly_signed_state_before_token_exchange() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=True)
    client.get("/auth/oidc/authorize")
    csrf_token = client.cookies.get(CSRF_TOKEN_COOKIE_NAME)
    assert csrf_token is not None
    tampered_state = generate_jwt(
        {
            "aud": STATE_TOKEN_AUDIENCE,
            CSRF_TOKEN_KEY: csrf_token,
        },
        "wrong-secret",
        3600,
    )
    client.cookies.set(get_pkce_cookie_name(tampered_state), "verifier123")

    response = client.get(
        "/auth/oidc/callback",
        params={"code": "abc123", "state": tampered_state},
    )

    assert response.status_code == 400
    assert response.json()["error_code"] == "VALIDATION_ERROR"
    assert response.json()["detail"] == "ACCESS_TOKEN_DECODE_ERROR"
    assert oauth_client.access_token_calls == []
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_rejects_csrf_mismatch_in_pkce_path() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=True)
    authorize_response = client.get("/auth/oidc/authorize")
    state = _extract_state_from_authorize_response(authorize_response)

    # Keep PKCE verifier cookie intact, but invalidate CSRF match against state JWT.
    client.cookies.set("fastapiusersoauthcsrf", "wrong-csrf-token")

    response = client.get(
        "/auth/oidc/callback",
        params={"code": "abc123", "state": state},
    )

    assert response.status_code == 400
    assert response.json()["error_code"] == "VALIDATION_ERROR"
    assert oauth_client.access_token_calls == []
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_get_access_token_error_is_400() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=True)
    authorize_response = client.get("/auth/oidc/authorize")
    state = _extract_state_from_authorize_response(authorize_response)
    with patch.object(
        oauth_client,
        "get_access_token",
        AsyncMock(side_effect=GetAccessTokenError("token exchange failed")),
    ):
        response = client.get(
            "/auth/oidc/callback", params={"code": "abc123", "state": state}
        )

    assert response.status_code == 400
    assert response.json()["error_code"] == "VALIDATION_ERROR"
    assert response.json()["detail"] == "Authorization code exchange failed"
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_cleans_pkce_cookie_on_idp_error_with_state() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=True)
    authorize_response = client.get("/auth/oidc/authorize")
    state = _extract_state_from_authorize_response(authorize_response)

    response = client.get(
        "/auth/oidc/callback",
        params={"error": "access_denied", "state": state},
    )

    assert response.status_code == 400
    assert response.json()["error_code"] == "VALIDATION_ERROR"
    assert response.json()["detail"] == "Authorization request failed or was denied"
    assert oauth_client.access_token_calls == []
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_cleans_pkce_cookie_on_missing_email() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=True)
    authorize_response = client.get("/auth/oidc/authorize")
    state = _extract_state_from_authorize_response(authorize_response)

    with patch.object(
        oauth_client, "get_id_email", AsyncMock(return_value=("oidc_account_id", None))
    ):
        response = client.get(
            "/auth/oidc/callback", params={"code": "abc123", "state": state}
        )

    assert response.status_code == 400
    assert response.json()["error_code"] == "VALIDATION_ERROR"
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_rejects_wrong_audience_state_before_token_exchange() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=True)
    client.get("/auth/oidc/authorize")
    csrf_token = client.cookies.get(CSRF_TOKEN_COOKIE_NAME)
    assert csrf_token is not None
    wrong_audience_state = generate_jwt(
        {
            "aud": "wrong-audience",
            CSRF_TOKEN_KEY: csrf_token,
        },
        "test-secret",
        3600,
    )
    client.cookies.set(get_pkce_cookie_name(wrong_audience_state), "verifier123")

    response = client.get(
        "/auth/oidc/callback",
        params={"code": "abc123", "state": wrong_audience_state},
    )

    assert response.status_code == 400
    assert response.json()["error_code"] == "VALIDATION_ERROR"
    assert response.json()["detail"] == "ACCESS_TOKEN_DECODE_ERROR"
    assert oauth_client.access_token_calls == []
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_uses_code_verifier_when_pkce_enabled() -> None:
    client, oauth_client, user_manager = _build_test_client(enable_pkce=True)
    authorize_response = client.get("/auth/oidc/authorize")
    state = _extract_state_from_authorize_response(authorize_response)

    with patch(
        "onyx.auth.users.fetch_ee_implementation_or_noop",
        return_value=lambda _email: "tenant_1",
    ):
        response = client.get(
            "/auth/oidc/callback",
            params={"code": "abc123", "state": state},
            follow_redirects=False,
        )

    assert response.status_code == 302
    assert response.headers.get("location") == "/"
    assert oauth_client.access_token_calls[0]["code_verifier"] is not None
    user_manager.oauth_callback.assert_awaited_once()
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_works_without_pkce_when_flag_disabled() -> None:
    client, oauth_client, user_manager = _build_test_client(enable_pkce=False)
    authorize_response = client.get("/auth/oidc/authorize")
    state = _extract_state_from_authorize_response(authorize_response)

    with patch(
        "onyx.auth.users.fetch_ee_implementation_or_noop",
        return_value=lambda _email: "tenant_1",
    ):
        response = client.get(
            "/auth/oidc/callback",
            params={"code": "abc123", "state": state},
            follow_redirects=False,
        )

    assert response.status_code == 302
    assert oauth_client.access_token_calls[0]["code_verifier"] is None
    user_manager.oauth_callback.assert_awaited_once()


def test_oidc_callback_pkce_preserves_redirect_when_backend_login_is_non_redirect() -> (
    None
):
    client, oauth_client, user_manager = _build_test_client(
        enable_pkce=True,
        login_status_code=200,
    )
    authorize_response = client.get("/auth/oidc/authorize")
    state = _extract_state_from_authorize_response(authorize_response)

    with patch(
        "onyx.auth.users.fetch_ee_implementation_or_noop",
        return_value=lambda _email: "tenant_1",
    ):
        response = client.get(
            "/auth/oidc/callback",
            params={"code": "abc123", "state": state},
            follow_redirects=False,
        )

    assert response.status_code == 302
    assert response.headers.get("location") == "/"
    assert oauth_client.access_token_calls[0]["code_verifier"] is not None
    user_manager.oauth_callback.assert_awaited_once()
    assert "Max-Age=0" in response.headers.get("set-cookie", "")


def test_oidc_callback_non_pkce_rejects_csrf_mismatch() -> None:
    client, oauth_client, _ = _build_test_client(enable_pkce=False)
    authorize_response = client.get("/auth/oidc/authorize")
    state = _extract_state_from_authorize_response(authorize_response)

    client.cookies.set(CSRF_TOKEN_COOKIE_NAME, "wrong-csrf-token")

    response = client.get(
        "/auth/oidc/callback",
        params={"code": "abc123", "state": state},
    )

    assert response.status_code == 400
    assert response.json()["error_code"] == "VALIDATION_ERROR"
    assert response.json()["detail"] == "OAUTH_INVALID_STATE"
    # NOTE: In the non-PKCE path, oauth2_authorize_callback exchanges the code
    # before route-body CSRF validation runs. This is a known ordering trade-off.
    assert oauth_client.access_token_calls


================================================
FILE: backend/tests/unit/onyx/auth/test_permissions.py
================================================
"""
Unit tests for onyx.auth.permissions — pure logic and FastAPI dependency.
"""

from unittest.mock import MagicMock

import pytest

from onyx.auth.permissions import ALL_PERMISSIONS
from onyx.auth.permissions import get_effective_permissions
from onyx.auth.permissions import require_permission
from onyx.auth.permissions import resolve_effective_permissions
from onyx.db.enums import Permission
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError


# ---------------------------------------------------------------------------
# resolve_effective_permissions
# ---------------------------------------------------------------------------


class TestResolveEffectivePermissions:
    def test_empty_set(self) -> None:
        assert resolve_effective_permissions(set()) == set()

    def test_basic_no_implications(self) -> None:
        result = resolve_effective_permissions({"basic"})
        assert result == {"basic"}

    def test_single_implication(self) -> None:
        result = resolve_effective_permissions({"add:agents"})
        assert result == {"add:agents", "read:agents"}

    def test_manage_agents_implies_add_and_read(self) -> None:
        """manage:agents directly maps to {add:agents, read:agents}."""
        result = resolve_effective_permissions({"manage:agents"})
        assert result == {"manage:agents", "add:agents", "read:agents"}

    def test_manage_connectors_chain(self) -> None:
        result = resolve_effective_permissions({"manage:connectors"})
        assert result == {"manage:connectors", "add:connectors", "read:connectors"}

    def test_manage_document_sets(self) -> None:
        result = resolve_effective_permissions({"manage:document_sets"})
        assert result == {
            "manage:document_sets",
            "read:document_sets",
            "read:connectors",
        }

    def test_manage_user_groups_implies_all_reads(self) -> None:
        result = resolve_effective_permissions({"manage:user_groups"})
        assert result == {
            "manage:user_groups",
            "read:connectors",
            "read:document_sets",
            "read:agents",
            "read:users",
        }

    def test_admin_override(self) -> None:
        result = resolve_effective_permissions({"admin"})
        assert result == set(ALL_PERMISSIONS)

    def test_admin_with_others(self) -> None:
        result = resolve_effective_permissions({"admin", "basic"})
        assert result == set(ALL_PERMISSIONS)

    def test_multi_group_union(self) -> None:
        result = resolve_effective_permissions(
            {"add:agents", "manage:connectors", "basic"}
        )
        assert result == {
            "basic",
            "add:agents",
            "read:agents",
            "manage:connectors",
            "add:connectors",
            "read:connectors",
        }

    def test_toggle_permission_no_implications(self) -> None:
        result = resolve_effective_permissions({"read:agent_analytics"})
        assert result == {"read:agent_analytics"}

    def test_all_permissions_for_admin(self) -> None:
        result = resolve_effective_permissions({"admin"})
        assert len(result) == len(ALL_PERMISSIONS)


# ---------------------------------------------------------------------------
# get_effective_permissions (expands implied at read time)
# ---------------------------------------------------------------------------


class TestGetEffectivePermissions:
    def test_expands_implied_permissions(self) -> None:
        """Column stores only granted; get_effective_permissions expands implied."""
        user = MagicMock()
        user.effective_permissions = ["add:agents"]
        result = get_effective_permissions(user)
        assert result == {Permission.ADD_AGENTS, Permission.READ_AGENTS}

    def test_admin_expands_to_all(self) -> None:
        user = MagicMock()
        user.effective_permissions = ["admin"]
        result = get_effective_permissions(user)
        assert result == set(Permission)

    def test_basic_stays_basic(self) -> None:
        user = MagicMock()
        user.effective_permissions = ["basic"]
        result = get_effective_permissions(user)
        assert result == {Permission.BASIC_ACCESS}

    def test_empty_column(self) -> None:
        user = MagicMock()
        user.effective_permissions = []
        result = get_effective_permissions(user)
        assert result == set()


# ---------------------------------------------------------------------------
# require_permission (FastAPI dependency)
# ---------------------------------------------------------------------------


class TestRequirePermission:
    @pytest.mark.asyncio
    async def test_admin_bypass(self) -> None:
        """Admin stored in column should pass any permission check."""
        user = MagicMock()
        user.effective_permissions = ["admin"]

        dep = require_permission(Permission.MANAGE_CONNECTORS)
        result = await dep(user=user)
        assert result is user

    @pytest.mark.asyncio
    async def test_has_required_permission(self) -> None:
        user = MagicMock()
        user.effective_permissions = ["manage:connectors"]

        dep = require_permission(Permission.MANAGE_CONNECTORS)
        result = await dep(user=user)
        assert result is user

    @pytest.mark.asyncio
    async def test_implied_permission_passes(self) -> None:
        """manage:connectors implies read:connectors at read time."""
        user = MagicMock()
        user.effective_permissions = ["manage:connectors"]

        dep = require_permission(Permission.READ_CONNECTORS)
        result = await dep(user=user)
        assert result is user

    @pytest.mark.asyncio
    async def test_missing_permission_raises(self) -> None:
        user = MagicMock()
        user.effective_permissions = ["basic"]

        dep = require_permission(Permission.MANAGE_CONNECTORS)
        with pytest.raises(OnyxError) as exc_info:
            await dep(user=user)
        assert exc_info.value.error_code == OnyxErrorCode.INSUFFICIENT_PERMISSIONS

    @pytest.mark.asyncio
    async def test_empty_permissions_fails(self) -> None:
        user = MagicMock()
        user.effective_permissions = []

        dep = require_permission(Permission.BASIC_ACCESS)
        with pytest.raises(OnyxError):
            await dep(user=user)


================================================
FILE: backend/tests/unit/onyx/auth/test_single_tenant_jwt_strategy.py
================================================
import uuid
from datetime import datetime
from datetime import timezone
from unittest.mock import AsyncMock
from unittest.mock import MagicMock

import jwt
import pytest

from onyx.auth.users import SingleTenantJWTStrategy


_TEST_SECRET = "test-secret-key-for-jwt-unit-tests"
_TEST_LIFETIME = 3600  # 1 hour


def _make_strategy(
    lifetime_seconds: int | None = _TEST_LIFETIME,
) -> SingleTenantJWTStrategy:
    return SingleTenantJWTStrategy(
        secret=_TEST_SECRET,
        lifetime_seconds=lifetime_seconds,
    )


def _make_user(user_id: uuid.UUID | None = None) -> MagicMock:
    user = MagicMock()
    user.id = user_id or uuid.uuid4()
    user.email = "test@example.com"
    return user


def _make_user_manager(user: MagicMock) -> MagicMock:
    manager = MagicMock()
    manager.parse_id = MagicMock(return_value=user.id)
    manager.get = AsyncMock(return_value=user)
    return manager


@pytest.mark.asyncio
async def test_write_token_produces_valid_jwt() -> None:
    """write_token should return a JWT whose claims contain sub and iat."""
    strategy = _make_strategy()
    user = _make_user()

    token = await strategy.write_token(user)

    payload = jwt.decode(
        token, _TEST_SECRET, algorithms=["HS256"], audience=["fastapi-users:auth"]
    )
    assert payload["sub"] == str(user.id)
    assert "iat" in payload
    assert "exp" in payload


@pytest.mark.asyncio
async def test_write_token_iat_is_accurate() -> None:
    """The iat claim should be close to the current time."""
    strategy = _make_strategy()
    user = _make_user()
    before = int(datetime.now(timezone.utc).timestamp())

    token = await strategy.write_token(user)

    payload = jwt.decode(
        token, _TEST_SECRET, algorithms=["HS256"], audience=["fastapi-users:auth"]
    )
    after = int(datetime.now(timezone.utc).timestamp())
    assert before <= payload["iat"] <= after


@pytest.mark.asyncio
async def test_read_token_returns_user() -> None:
    """read_token should decode the JWT and return the corresponding user."""
    strategy = _make_strategy()
    user = _make_user()
    manager = _make_user_manager(user)

    token = await strategy.write_token(user)
    result = await strategy.read_token(token, manager)

    assert result is user
    manager.parse_id.assert_called_once_with(str(user.id))
    manager.get.assert_called_once_with(user.id)


@pytest.mark.asyncio
async def test_read_token_returns_none_for_none() -> None:
    """read_token should return None when token is None."""
    strategy = _make_strategy()
    manager = _make_user_manager(_make_user())

    result = await strategy.read_token(None, manager)
    assert result is None


@pytest.mark.asyncio
async def test_read_token_returns_none_for_bad_signature() -> None:
    """read_token should return None for a token signed with a different secret."""
    strategy = _make_strategy()
    user = _make_user()
    manager = _make_user_manager(user)

    bad_strategy = SingleTenantJWTStrategy(secret="wrong-secret", lifetime_seconds=3600)
    bad_token = await bad_strategy.write_token(user)

    result = await strategy.read_token(bad_token, manager)
    assert result is None


@pytest.mark.asyncio
async def test_read_token_returns_none_for_expired_token() -> None:
    """read_token should return None when the token has expired."""
    # lifetime_seconds=0 doesn't set exp, so we craft a token manually
    strategy = _make_strategy()
    user = _make_user()
    manager = _make_user_manager(user)

    expired_payload = {
        "sub": str(user.id),
        "aud": ["fastapi-users:auth"],
        "iat": 1000000000,
        "exp": 1000000001,  # expired long ago
    }
    expired_token = jwt.encode(expired_payload, _TEST_SECRET, algorithm="HS256")

    result = await strategy.read_token(expired_token, manager)
    assert result is None


@pytest.mark.asyncio
async def test_destroy_token_is_noop() -> None:
    """destroy_token should not raise — JWTs can't be server-side invalidated."""
    strategy = _make_strategy()
    user = _make_user()
    token = await strategy.write_token(user)

    # Should complete without error
    await strategy.destroy_token(token, user)


@pytest.mark.asyncio
async def test_refresh_token_returns_new_jwt() -> None:
    """refresh_token should issue a fresh JWT (different from the original)."""
    strategy = _make_strategy()
    user = _make_user()

    original_token = await strategy.write_token(user)
    refreshed_token = await strategy.refresh_token(original_token, user)

    # Tokens contain different iat/exp, so the encoded strings should differ
    # (unless generated in the same second — but we check claims to be safe)
    refreshed_payload = jwt.decode(
        refreshed_token,
        _TEST_SECRET,
        algorithms=["HS256"],
        audience=["fastapi-users:auth"],
    )
    assert refreshed_payload["sub"] == str(user.id)
    assert "iat" in refreshed_payload
    assert "exp" in refreshed_payload


@pytest.mark.asyncio
async def test_refresh_token_with_none_creates_new() -> None:
    """refresh_token(None, user) should create a brand-new token."""
    strategy = _make_strategy()
    user = _make_user()

    token = await strategy.refresh_token(None, user)

    payload = jwt.decode(
        token, _TEST_SECRET, algorithms=["HS256"], audience=["fastapi-users:auth"]
    )
    assert payload["sub"] == str(user.id)


@pytest.mark.asyncio
async def test_write_token_no_lifetime_omits_exp() -> None:
    """When lifetime_seconds is None, the token should have no exp claim."""
    strategy = _make_strategy(lifetime_seconds=None)
    user = _make_user()

    token = await strategy.write_token(user)

    payload = jwt.decode(
        token,
        _TEST_SECRET,
        algorithms=["HS256"],
        audience=["fastapi-users:auth"],
        options={"verify_exp": False},
    )
    assert payload["sub"] == str(user.id)
    assert "exp" not in payload


================================================
FILE: backend/tests/unit/onyx/auth/test_user_create_schema.py
================================================
"""
Unit tests for UserCreate schema dict methods.

Verifies that account_type is always included in create_update_dict
and create_update_dict_superuser.
"""

from onyx.auth.schemas import UserCreate
from onyx.db.enums import AccountType


def test_create_update_dict_includes_default_account_type() -> None:
    uc = UserCreate(email="a@b.com", password="secret123")
    d = uc.create_update_dict()
    assert d["account_type"] == AccountType.STANDARD


def test_create_update_dict_includes_explicit_account_type() -> None:
    uc = UserCreate(
        email="a@b.com", password="secret123", account_type=AccountType.SERVICE_ACCOUNT
    )
    d = uc.create_update_dict()
    assert d["account_type"] == AccountType.STANDARD


def test_create_update_dict_superuser_includes_account_type() -> None:
    uc = UserCreate(email="a@b.com", password="secret123")
    d = uc.create_update_dict_superuser()
    assert d["account_type"] == AccountType.STANDARD


================================================
FILE: backend/tests/unit/onyx/auth/test_user_default_pins.py
================================================
from unittest.mock import AsyncMock
from unittest.mock import MagicMock

import pytest
from sqlalchemy.ext.asyncio import AsyncSession

from onyx.auth.users import UserManager


def _build_db_session(return_ids: list[int]) -> MagicMock:
    scalar_result = MagicMock()
    scalar_result.all.return_value = return_ids
    execute_result = MagicMock()
    execute_result.scalars.return_value = scalar_result

    db_session = MagicMock(spec=AsyncSession)
    db_session.execute = AsyncMock(return_value=execute_result)
    return db_session


@pytest.mark.asyncio
async def test_assign_default_pinned_assistants_populates_ids(
    mock_user: MagicMock,
) -> None:
    user_db = MagicMock()
    user_db.update = AsyncMock()

    user_manager = UserManager(user_db)

    mock_user.pinned_assistants = None

    db_session = _build_db_session([1, 5, 10])

    await user_manager._assign_default_pinned_assistants(mock_user, db_session)

    assert db_session.execute.await_count == 1
    user_db.update.assert_awaited_once()
    await_args = user_db.update.await_args
    assert await_args
    assert await_args.args == (mock_user, {"pinned_assistants": [1, 5, 10]})
    assert mock_user.pinned_assistants == [1, 5, 10]


@pytest.mark.asyncio
async def test_assign_default_pinned_assistants_skips_when_no_defaults(
    mock_user: MagicMock,
) -> None:
    user_db = MagicMock()
    user_db.update = AsyncMock()

    user_manager = UserManager(user_db)
    mock_user.pinned_assistants = None

    db_session = _build_db_session([])

    await user_manager._assign_default_pinned_assistants(mock_user, db_session)

    assert db_session.execute.await_count == 1
    user_db.update.assert_not_awaited()
    assert mock_user.pinned_assistants is None


@pytest.mark.asyncio
async def test_assign_default_pinned_assistants_noop_if_already_set(
    mock_user: MagicMock,
) -> None:
    user_db = MagicMock()
    user_db.update = AsyncMock()

    user_manager = UserManager(user_db)
    mock_user.pinned_assistants = [3]

    db_session = _build_db_session([1, 2, 3])

    await user_manager._assign_default_pinned_assistants(mock_user, db_session)

    user_db.update.assert_not_awaited()
    assert db_session.execute.await_count == 0


================================================
FILE: backend/tests/unit/onyx/auth/test_user_registration.py
================================================
"""
Unit tests for the user registration workflow in UserManager.create().

Tests cover:
1. Disposable email validation (before tenant provisioning)
2. Multi-tenant vs single-tenant invite logic
3. SAML/OIDC SSO bypass behavior
4. Empty whitelist vs populated whitelist scenarios
5. Case-insensitive email matching for existing user checks
"""

from types import TracebackType
from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.auth.schemas import UserCreate
from onyx.auth.users import UserManager
from onyx.configs.constants import AuthType
from onyx.error_handling.exceptions import OnyxError

# Note: Only async test methods are marked with @pytest.mark.asyncio individually
# to avoid warnings on synchronous tests


@pytest.fixture
def mock_user_create() -> UserCreate:
    """Create a mock UserCreate object for testing."""
    return UserCreate(
        email="newuser@example.com",
        password="SecurePassword123!",
        is_verified=False,
    )


@pytest.fixture
def mock_async_session() -> MagicMock:
    """Create a mock async database session."""
    session = MagicMock()
    session.execute = AsyncMock()
    session.scalar = AsyncMock()
    session.commit = AsyncMock()
    session.rollback = AsyncMock()
    return session


class _AsyncSessionContextManager:
    def __init__(self, session: MagicMock) -> None:
        self._session = session

    async def __aenter__(self) -> MagicMock:
        return self._session

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None,
        exc: BaseException | None,
        tb: TracebackType | None,
    ) -> bool:
        return False


def _mock_user_manager_methods(user_manager: UserManager) -> None:
    setattr(user_manager, "validate_password", AsyncMock())
    setattr(user_manager, "_assign_default_pinned_assistants", AsyncMock())


class TestDisposableEmailValidation:
    """Test disposable email validation before tenant provisioning."""

    @pytest.mark.asyncio
    @patch("onyx.auth.users.is_disposable_email")
    @patch("onyx.auth.users.fetch_ee_implementation_or_noop")
    @patch("onyx.auth.users.get_async_session_context_manager")
    @patch("onyx.auth.users.get_user_count", new_callable=AsyncMock)
    async def test_blocks_disposable_email_before_tenant_provision(
        self,
        mock_get_user_count: MagicMock,  # noqa: ARG002
        mock_session_manager: MagicMock,  # noqa: ARG002
        mock_fetch_ee: MagicMock,
        mock_is_disposable: MagicMock,
        mock_user_create: UserCreate,
    ) -> None:
        """Disposable emails should be blocked before tenant provisioning."""
        # Setup
        mock_is_disposable.return_value = True
        user_manager = UserManager(MagicMock())

        # Execute & Assert
        with pytest.raises(OnyxError) as exc:
            await user_manager.create(mock_user_create)

        assert exc.value.status_code == 400
        assert "Disposable email" in exc.value.detail
        # Verify we never got to tenant provisioning
        mock_fetch_ee.assert_not_called()

    @pytest.mark.asyncio
    @patch("onyx.auth.users.is_disposable_email")
    @patch("onyx.auth.users.verify_email_domain")
    @patch("onyx.auth.users.fetch_ee_implementation_or_noop")
    @patch("onyx.auth.users.get_async_session_context_manager")
    @patch("onyx.auth.users.get_user_count", new_callable=AsyncMock)
    @patch("onyx.auth.users.MULTI_TENANT", False)
    async def test_allows_valid_email_domain(
        self,
        mock_get_user_count: MagicMock,
        mock_session_manager: MagicMock,
        mock_fetch_ee: MagicMock,
        mock_verify_domain: MagicMock,
        mock_is_disposable: MagicMock,
        mock_user_create: UserCreate,
        mock_async_session: MagicMock,
    ) -> None:
        """Valid emails should pass domain validation."""
        # Setup
        mock_is_disposable.return_value = False
        mock_verify_domain.return_value = None  # No exception = valid
        mock_fetch_ee.return_value = AsyncMock(return_value="default_schema")
        mock_session_manager.return_value = _AsyncSessionContextManager(
            mock_async_session
        )
        mock_get_user_count.return_value = 0

        user_manager = UserManager(MagicMock())
        _mock_user_manager_methods(user_manager)

        # Mock the user_db to avoid actual database operations
        mock_user_db = MagicMock()
        mock_user_db.create = AsyncMock(return_value=MagicMock(id="test-id"))
        user_manager.user_db = mock_user_db

        try:
            await user_manager.create(mock_user_create)
        except Exception:
            pass  # We just want to verify domain check passed

        # Verify domain validation was called
        mock_verify_domain.assert_called_once_with(
            mock_user_create.email, is_registration=True
        )


class TestMultiTenantInviteLogic:
    """Test invite logic for multi-tenant environments."""

    @patch("onyx.auth.users.SQLAlchemyUserAdminDB")
    @patch("onyx.auth.users.is_disposable_email", return_value=False)
    @patch("onyx.auth.users.verify_email_domain")
    @patch("onyx.auth.users.fetch_ee_implementation_or_noop")
    @patch("onyx.auth.users.get_async_session_context_manager")
    @patch("onyx.auth.users.get_user_count", new_callable=AsyncMock)
    @patch("onyx.auth.users.verify_email_is_invited")
    @patch("onyx.auth.users.MULTI_TENANT", True)
    @patch("onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR")
    @pytest.mark.asyncio
    async def test_first_user_no_invite_required(
        self,
        mock_context_var: MagicMock,
        mock_verify_invited: MagicMock,
        mock_get_user_count: MagicMock,
        mock_session_manager: MagicMock,
        mock_fetch_ee: MagicMock,
        mock_verify_domain: MagicMock,  # noqa: ARG002
        mock_is_disposable: MagicMock,  # noqa: ARG002
        mock_sql_alchemy_db: MagicMock,
        mock_user_create: UserCreate,
        mock_async_session: MagicMock,
    ) -> None:
        """First user in tenant should not require invite."""
        # Setup: No existing users
        mock_get_user_count.return_value = 0
        mock_fetch_ee.return_value = AsyncMock(return_value="tenant_123")
        mock_session_manager.return_value = _AsyncSessionContextManager(
            mock_async_session
        )
        mock_context_var.set.return_value = MagicMock()

        # Mock the user_db to avoid actual database operations
        mock_user_db = MagicMock()
        mock_user_db.create = AsyncMock(return_value=MagicMock(id="test-id"))
        mock_sql_alchemy_db.return_value = mock_user_db

        user_manager = UserManager(MagicMock())
        _mock_user_manager_methods(user_manager)

        try:
            await user_manager.create(mock_user_create)
        except Exception:
            pass

        # Verify invite check was NOT called (user_count = 0)
        mock_verify_invited.assert_not_called()

    @patch("onyx.auth.users.SQLAlchemyUserAdminDB")
    @patch("onyx.auth.users.is_disposable_email", return_value=False)
    @patch("onyx.auth.users.verify_email_domain")
    @patch("onyx.auth.users.fetch_ee_implementation_or_noop")
    @patch("onyx.auth.users.get_async_session_context_manager")
    @patch("onyx.auth.users.get_user_count", new_callable=AsyncMock)
    @patch("onyx.auth.users.verify_email_is_invited")
    @patch("onyx.auth.users.MULTI_TENANT", True)
    @patch("onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR")
    @pytest.mark.asyncio
    async def test_subsequent_user_requires_invite(
        self,
        mock_context_var: MagicMock,
        mock_verify_invited: MagicMock,
        mock_get_user_count: MagicMock,
        mock_session_manager: MagicMock,
        mock_fetch_ee: MagicMock,
        mock_verify_domain: MagicMock,  # noqa: ARG002
        mock_is_disposable: MagicMock,  # noqa: ARG002
        mock_sql_alchemy_db: MagicMock,
        mock_user_create: UserCreate,
        mock_async_session: MagicMock,
    ) -> None:
        """Subsequent users in existing tenant should require invite."""
        # Setup: Existing tenant with users
        mock_get_user_count.return_value = 5
        mock_fetch_ee.return_value = AsyncMock(return_value="tenant_123")
        mock_session_manager.return_value = _AsyncSessionContextManager(
            mock_async_session
        )
        mock_context_var.set.return_value = MagicMock()

        # Mock the user_db to avoid actual database operations
        mock_user_db = MagicMock()
        mock_user_db.create = AsyncMock(return_value=MagicMock(id="test-id"))
        mock_sql_alchemy_db.return_value = mock_user_db

        user_manager = UserManager(MagicMock())
        _mock_user_manager_methods(user_manager)

        try:
            await user_manager.create(mock_user_create)
        except Exception:
            pass

        # Verify invite check WAS called (user_count > 0)
        mock_verify_invited.assert_called_once_with(mock_user_create.email)


class TestSingleTenantInviteLogic:
    """Test invite logic for single-tenant environments."""

    @patch("onyx.auth.users.is_disposable_email", return_value=False)
    @patch("onyx.auth.users.verify_email_domain")
    @patch("onyx.auth.users.fetch_ee_implementation_or_noop")
    @patch("onyx.auth.users.get_async_session_context_manager")
    @patch("onyx.auth.users.get_user_count", new_callable=AsyncMock)
    @patch("onyx.auth.users.verify_email_is_invited")
    @patch("onyx.auth.users.MULTI_TENANT", False)
    @patch("onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR")
    @pytest.mark.asyncio
    async def test_always_checks_invite_list(
        self,
        mock_context_var: MagicMock,
        mock_verify_invited: MagicMock,
        mock_get_user_count: MagicMock,
        mock_session_manager: MagicMock,
        mock_fetch_ee: MagicMock,
        mock_verify_domain: MagicMock,  # noqa: ARG002
        mock_is_disposable: MagicMock,  # noqa: ARG002
        mock_user_create: UserCreate,
        mock_async_session: MagicMock,
    ) -> None:
        """Single-tenant should always check invite list."""
        # Setup
        mock_fetch_ee.return_value = AsyncMock(return_value="default_schema")
        mock_session_manager.return_value = _AsyncSessionContextManager(
            mock_async_session
        )
        mock_get_user_count.return_value = 0
        mock_context_var.set.return_value = MagicMock()

        user_manager = UserManager(MagicMock())
        _mock_user_manager_methods(user_manager)

        # Mock the user_db to avoid actual database operations
        mock_user_db = MagicMock()
        mock_user_db.create = AsyncMock(return_value=MagicMock(id="test-id"))
        user_manager.user_db = mock_user_db

        try:
            await user_manager.create(mock_user_create)
        except Exception:
            pass

        # Verify invite check was called
        mock_verify_invited.assert_called_once_with(mock_user_create.email)


class TestSAMLOIDCBehavior:
    """Test SSO (SAML/OIDC) bypass of invite whitelist."""

    @pytest.mark.parametrize("auth_type", [AuthType.SAML, AuthType.OIDC])
    @patch("onyx.auth.users.get_invited_users")
    @patch("onyx.auth.users.workspace_invite_only_enabled", return_value=True)
    @patch("onyx.auth.users.AUTH_TYPE")
    def test_sso_bypasses_whitelist(
        self,
        mock_auth_type: MagicMock,
        _mock_invite_only: MagicMock,
        mock_get_invited: MagicMock,
        auth_type: AuthType,
    ) -> None:
        """SAML/OIDC should bypass invite whitelist."""
        from onyx.auth.users import verify_email_is_invited

        # Setup
        mock_auth_type.return_value = auth_type
        mock_get_invited.return_value = ["allowed@example.com"]

        # Execute - should not raise even with populated whitelist
        with patch("onyx.auth.users.AUTH_TYPE", auth_type):
            verify_email_is_invited("newuser@example.com")  # Should not raise

    @patch("onyx.auth.users.get_invited_users")
    @patch("onyx.auth.users.workspace_invite_only_enabled", return_value=True)
    @patch("onyx.auth.users.AUTH_TYPE", AuthType.BASIC)
    def test_basic_auth_enforces_whitelist(
        self,
        mock_get_invited: MagicMock,
        _mock_invite_only: MagicMock,
    ) -> None:
        """Basic auth should enforce invite whitelist."""
        from onyx.auth.users import verify_email_is_invited

        # Setup
        mock_get_invited.return_value = ["allowed@example.com"]

        # Execute & Assert
        with pytest.raises(OnyxError) as exc:
            verify_email_is_invited("newuser@example.com")
        assert exc.value.status_code == 403


class TestWhitelistBehavior:
    """Test invite whitelist scenarios."""

    @patch("onyx.auth.users.workspace_invite_only_enabled", return_value=False)
    @patch("onyx.auth.users.get_invited_users")
    @patch("onyx.auth.users.AUTH_TYPE", AuthType.BASIC)
    def test_empty_whitelist_allows_all(
        self,
        mock_get_invited: MagicMock,
        _mock_invite_only: MagicMock,
    ) -> None:
        """Empty whitelist should allow all users."""
        from onyx.auth.users import verify_email_is_invited

        # Setup: Empty whitelist
        mock_get_invited.return_value = []

        # Execute - should not raise
        verify_email_is_invited("anyone@example.com")

    @patch("onyx.auth.users.workspace_invite_only_enabled", return_value=False)
    @patch("onyx.auth.users.get_invited_users")
    @patch("onyx.auth.users.AUTH_TYPE", AuthType.BASIC)
    def test_invite_only_disabled_allows_non_invited_users(
        self,
        mock_get_invited: MagicMock,
        _mock_invite_only: MagicMock,
    ) -> None:
        from onyx.auth.users import verify_email_is_invited

        mock_get_invited.return_value = ["allowed@example.com"]

        verify_email_is_invited("notallowed@example.com")

    @patch("onyx.auth.users.workspace_invite_only_enabled", return_value=True)
    @patch("onyx.auth.users.get_invited_users")
    @patch("onyx.auth.users.AUTH_TYPE", AuthType.BASIC)
    def test_whitelist_blocks_non_invited(
        self,
        mock_get_invited: MagicMock,
        _mock_invite_only: MagicMock,
    ) -> None:
        """Populated whitelist should block non-invited users."""
        from onyx.auth.users import verify_email_is_invited

        # Setup
        mock_get_invited.return_value = ["allowed@example.com"]

        # Execute & Assert
        with pytest.raises(OnyxError) as exc:
            verify_email_is_invited("notallowed@example.com")

        assert exc.value.status_code == 403

    @patch("onyx.auth.users.workspace_invite_only_enabled", return_value=True)
    @patch("onyx.auth.users.get_invited_users")
    @patch("onyx.auth.users.AUTH_TYPE", AuthType.BASIC)
    def test_whitelist_allows_invited_case_insensitive(
        self,
        mock_get_invited: MagicMock,
        _mock_invite_only: MagicMock,
    ) -> None:
        """Whitelist should match emails case-insensitively."""
        from onyx.auth.users import verify_email_is_invited

        # Setup
        mock_get_invited.return_value = ["allowed@example.com"]

        # Execute - should not raise (case-insensitive match)
        verify_email_is_invited("ALLOWED@EXAMPLE.COM")
        verify_email_is_invited("Allowed@Example.Com")


class TestSeatLimitEnforcement:
    """Seat limits block new user creation on self-hosted deployments."""

    def test_adding_user_fails_when_seats_full(self) -> None:
        from onyx.auth.users import enforce_seat_limit

        seat_result = MagicMock(available=False, error_message="Seat limit reached")
        with patch(
            "onyx.auth.users.fetch_ee_implementation_or_noop",
            return_value=lambda *_a, **_kw: seat_result,
        ):
            with pytest.raises(OnyxError) as exc:
                enforce_seat_limit(MagicMock())

            assert exc.value.status_code == 402

    def test_seat_limit_only_enforced_for_self_hosted(self) -> None:
        from onyx.auth.users import enforce_seat_limit

        with patch("onyx.auth.users.MULTI_TENANT", True):
            enforce_seat_limit(MagicMock())  # should not raise


class TestCaseInsensitiveEmailMatching:
    """Test case-insensitive email matching for existing user checks."""

    @patch("onyx.auth.users.is_disposable_email", return_value=False)
    @patch("onyx.auth.users.verify_email_domain")
    @patch("onyx.auth.users.fetch_ee_implementation_or_noop")
    @patch("onyx.auth.users.get_async_session_context_manager")
    @patch("onyx.auth.users.get_user_count", new_callable=AsyncMock)
    @patch("onyx.auth.users.SQLAlchemyUserAdminDB")
    @patch("onyx.auth.users.MULTI_TENANT", True)
    @patch("onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR")
    @pytest.mark.asyncio
    async def test_existing_user_check_case_insensitive(
        self,
        mock_context_var: MagicMock,
        mock_sql_alchemy_db: MagicMock,
        mock_get_user_count: MagicMock,
        mock_session_manager: MagicMock,
        mock_fetch_ee: MagicMock,
        mock_verify_domain: MagicMock,
        mock_is_disposable: MagicMock,  # noqa: ARG002
        mock_async_session: MagicMock,
    ) -> None:
        """Existing user check should use case-insensitive email comparison."""

        # Setup
        mock_get_user_count.return_value = 0  # First user - no invite needed
        mock_fetch_ee.return_value = AsyncMock(return_value="tenant_123")
        mock_session_manager.return_value = _AsyncSessionContextManager(
            mock_async_session
        )
        mock_context_var.set.return_value = MagicMock()

        # Create a result mock
        result_mock = MagicMock()
        result_mock.scalar_one_or_none.return_value = None
        mock_async_session.execute.return_value = result_mock

        user_create = UserCreate(
            email="NewUser@Example.COM",
            password="SecurePassword123!",
            is_verified=False,
        )

        user_manager = UserManager(MagicMock())
        _mock_user_manager_methods(user_manager)

        # Mock the user_db to avoid actual database operations
        mock_user_db = MagicMock()
        mock_user_db.create = AsyncMock(return_value=MagicMock(id="test-id"))
        mock_sql_alchemy_db.return_value = mock_user_db

        try:
            await user_manager.create(user_create)
        except Exception:
            pass

        # Verify flow
        mock_verify_domain.assert_called_once_with(
            user_create.email, is_registration=True
        )

    @patch("onyx.auth.users.is_disposable_email")
    @patch("onyx.auth.users.verify_email_domain")
    @patch("onyx.auth.users.fetch_ee_implementation_or_noop")
    @patch("onyx.auth.users.get_async_session_context_manager")
    @patch("onyx.auth.users.get_user_count", new_callable=AsyncMock)
    @patch("onyx.auth.users.verify_email_is_invited")
    @patch("onyx.auth.users.SQLAlchemyUserAdminDB")
    @patch("onyx.auth.users.MULTI_TENANT", True)
    @patch("onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR")
    @pytest.mark.asyncio
    async def test_full_registration_flow_existing_tenant(
        self,
        mock_context_var: MagicMock,
        mock_sql_alchemy_db: MagicMock,
        mock_verify_invited: MagicMock,
        mock_get_user_count: MagicMock,
        mock_session_manager: MagicMock,
        mock_fetch_ee: MagicMock,
        mock_verify_domain: MagicMock,
        mock_is_disposable: MagicMock,
        mock_user_create: UserCreate,
        mock_async_session: MagicMock,
    ) -> None:
        """Test complete flow: valid email, existing tenant, invite required."""
        # Setup: All validations pass, existing tenant
        mock_is_disposable.return_value = False
        mock_verify_domain.return_value = None
        mock_get_user_count.return_value = 10  # Existing tenant
        mock_fetch_ee.return_value = AsyncMock(return_value="existing_tenant_789")
        mock_session_manager.return_value = _AsyncSessionContextManager(
            mock_async_session
        )
        mock_context_var.set.return_value = MagicMock()

        user_manager = UserManager(MagicMock())
        _mock_user_manager_methods(user_manager)

        # Mock the user_db to avoid actual database operations
        mock_user_db = MagicMock()
        mock_user_db.create = AsyncMock(return_value=MagicMock(id="test-id"))
        mock_sql_alchemy_db.return_value = mock_user_db

        try:
            await user_manager.create(mock_user_create)
        except Exception:
            pass

        # Verify flow
        mock_verify_domain.assert_called_once_with(
            mock_user_create.email, is_registration=True
        )
        mock_verify_invited.assert_called_once()  # Existing tenant = invite needed


================================================
FILE: backend/tests/unit/onyx/auth/test_verify_auth_setting.py
================================================
from unittest.mock import MagicMock

import pytest

import onyx.auth.users as users
from onyx.auth.users import verify_auth_setting
from onyx.configs.constants import AuthType


def test_verify_auth_setting_raises_for_cloud(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Cloud auth type is not valid for self-hosted deployments."""
    monkeypatch.setenv("AUTH_TYPE", "cloud")

    with pytest.raises(ValueError, match="'cloud' is not a valid auth type"):
        verify_auth_setting()


def test_verify_auth_setting_warns_for_disabled(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Disabled auth type logs a deprecation warning."""
    monkeypatch.setenv("AUTH_TYPE", "disabled")

    mock_logger = MagicMock()
    monkeypatch.setattr(users, "logger", mock_logger)
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.BASIC)

    verify_auth_setting()

    mock_logger.warning.assert_called_once()
    assert "no longer supported" in mock_logger.warning.call_args[0][0]


@pytest.mark.parametrize(
    "auth_type",
    [AuthType.BASIC, AuthType.GOOGLE_OAUTH, AuthType.OIDC, AuthType.SAML],
)
def test_verify_auth_setting_valid_auth_types(
    monkeypatch: pytest.MonkeyPatch,
    auth_type: AuthType,
) -> None:
    """Valid auth types work without errors or warnings."""
    monkeypatch.setenv("AUTH_TYPE", auth_type.value)

    mock_logger = MagicMock()
    monkeypatch.setattr(users, "logger", mock_logger)
    monkeypatch.setattr(users, "AUTH_TYPE", auth_type)

    verify_auth_setting()

    mock_logger.warning.assert_not_called()
    mock_logger.notice.assert_called_once_with(f"Using Auth Type: {auth_type.value}")


================================================
FILE: backend/tests/unit/onyx/auth/test_verify_email_domain.py
================================================
import pytest

import onyx.auth.users as users
from onyx.auth.users import verify_email_domain
from onyx.configs.constants import AuthType
from onyx.error_handling.exceptions import OnyxError


def test_verify_email_domain_allows_case_insensitive_match(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    # Configure whitelist to lowercase while email has uppercase domain
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", ["example.com"], raising=False)

    # Should not raise
    verify_email_domain("User@EXAMPLE.COM")


def test_verify_email_domain_rejects_non_whitelisted_domain(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", ["example.com"], raising=False)

    with pytest.raises(OnyxError) as exc:
        verify_email_domain("user@another.com")
    assert exc.value.status_code == 400
    assert "Email domain is not valid" in exc.value.detail


def test_verify_email_domain_invalid_email_format(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", ["example.com"], raising=False)

    with pytest.raises(OnyxError) as exc:
        verify_email_domain("userexample.com")  # missing '@'
    assert exc.value.status_code == 400
    assert "Email is not valid" in exc.value.detail


def test_verify_email_domain_rejects_plus_addressing(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", [], raising=False)
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.CLOUD, raising=False)

    with pytest.raises(OnyxError) as exc:
        verify_email_domain("user+tag@gmail.com")
    assert exc.value.status_code == 400
    assert "'+'" in exc.value.detail


def test_verify_email_domain_allows_plus_for_onyx_app(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", [], raising=False)
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.CLOUD, raising=False)

    # Should not raise for onyx.app domain
    verify_email_domain("user+tag@onyx.app")


def test_verify_email_domain_rejects_dotted_gmail_on_registration(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", [], raising=False)
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.CLOUD, raising=False)

    with pytest.raises(OnyxError) as exc:
        verify_email_domain("first.last@gmail.com", is_registration=True)
    assert exc.value.status_code == 400
    assert "'.'" in exc.value.detail


def test_verify_email_domain_dotted_gmail_allowed_when_not_registration(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", [], raising=False)
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.CLOUD, raising=False)

    # Existing user signing in — should not be blocked
    verify_email_domain("first.last@gmail.com", is_registration=False)


def test_verify_email_domain_allows_dotted_non_gmail_on_registration(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", [], raising=False)
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.CLOUD, raising=False)

    verify_email_domain("first.last@example.com", is_registration=True)


def test_verify_email_domain_dotted_gmail_allowed_when_not_cloud(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", [], raising=False)
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.BASIC, raising=False)

    verify_email_domain("first.last@gmail.com", is_registration=True)


def test_verify_email_domain_rejects_googlemail(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "VALID_EMAIL_DOMAINS", [], raising=False)
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.CLOUD, raising=False)

    with pytest.raises(OnyxError) as exc:
        verify_email_domain("user@googlemail.com")
    assert exc.value.status_code == 400
    assert "gmail.com" in exc.value.detail


================================================
FILE: backend/tests/unit/onyx/auth/test_verify_email_invite.py
================================================
import pytest

import onyx.auth.users as users
from onyx.auth.users import verify_email_is_invited
from onyx.configs.constants import AuthType
from onyx.error_handling.exceptions import OnyxError


@pytest.mark.parametrize("auth_type", [AuthType.SAML, AuthType.OIDC])
def test_verify_email_is_invited_skips_whitelist_for_sso(
    monkeypatch: pytest.MonkeyPatch, auth_type: AuthType
) -> None:
    monkeypatch.setattr(users, "AUTH_TYPE", auth_type, raising=False)
    monkeypatch.setattr(users, "workspace_invite_only_enabled", lambda: True)
    monkeypatch.setattr(
        users,
        "get_invited_users",
        lambda: ["allowed@example.com"],
        raising=False,
    )

    # Should not raise even though whitelist is populated
    verify_email_is_invited("newuser@example.com")


def test_verify_email_is_invited_enforced_for_basic_auth(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.BASIC, raising=False)
    monkeypatch.setattr(users, "workspace_invite_only_enabled", lambda: True)
    monkeypatch.setattr(
        users,
        "get_invited_users",
        lambda: ["allowed@example.com"],
        raising=False,
    )

    with pytest.raises(OnyxError) as exc:
        verify_email_is_invited("newuser@example.com")
    assert exc.value.status_code == 403


def test_verify_email_is_invited_skipped_when_invite_only_disabled(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(users, "AUTH_TYPE", AuthType.BASIC, raising=False)
    monkeypatch.setattr(users, "workspace_invite_only_enabled", lambda: False)
    monkeypatch.setattr(
        users,
        "get_invited_users",
        lambda: ["allowed@example.com"],
        raising=False,
    )

    verify_email_is_invited("newuser@example.com")


================================================
FILE: backend/tests/unit/onyx/background/celery/tasks/tenant_provisioning/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/background/celery/tasks/tenant_provisioning/test_check_available_tenants.py
================================================
"""
Unit tests for the check_available_tenants task.

Tests verify:
- Provisioning loop calls pre_provision_tenant the correct number of times
- Batch size is capped at _MAX_TENANTS_PER_RUN
- A failure in one provisioning call does not stop subsequent calls
- No provisioning happens when pool is already full
- TARGET_AVAILABLE_TENANTS is respected
"""

from unittest.mock import MagicMock

import pytest

from ee.onyx.background.celery.tasks.tenant_provisioning.tasks import (
    _MAX_TENANTS_PER_RUN,
)
from ee.onyx.background.celery.tasks.tenant_provisioning.tasks import (
    check_available_tenants,
)

# Access the underlying function directly, bypassing Celery's task wrapper
# which injects `self` as the first argument when bind=True.
_check_available_tenants = check_available_tenants.run


@pytest.fixture()
def _enable_multi_tenant(monkeypatch: pytest.MonkeyPatch) -> None:
    monkeypatch.setattr(
        "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.MULTI_TENANT",
        True,
    )


@pytest.fixture()
def mock_redis(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
    mock_lock = MagicMock()
    mock_lock.acquire.return_value = True

    mock_client = MagicMock()
    mock_client.lock.return_value = mock_lock

    monkeypatch.setattr(
        "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.get_redis_client",
        lambda tenant_id: mock_client,  # noqa: ARG005
    )
    return mock_client


@pytest.fixture()
def mock_pre_provision(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
    mock = MagicMock(return_value=True)
    monkeypatch.setattr(
        "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.pre_provision_tenant",
        mock,
    )
    return mock


def _mock_available_count(monkeypatch: pytest.MonkeyPatch, count: int) -> None:
    """Set up the DB session mock to return a specific available tenant count."""
    mock_session = MagicMock()
    mock_session.__enter__ = MagicMock(return_value=mock_session)
    mock_session.__exit__ = MagicMock(return_value=False)
    mock_session.query.return_value.count.return_value = count

    monkeypatch.setattr(
        "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.get_session_with_shared_schema",
        lambda: mock_session,
    )


@pytest.mark.usefixtures("_enable_multi_tenant", "mock_redis")
class TestCheckAvailableTenants:
    def test_provisions_all_needed_tenants(
        self,
        monkeypatch: pytest.MonkeyPatch,
        mock_pre_provision: MagicMock,
    ) -> None:
        """When pool has 2 and target is 5, should provision 3."""
        monkeypatch.setattr(
            "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
            5,
        )
        _mock_available_count(monkeypatch, 2)

        _check_available_tenants()

        assert mock_pre_provision.call_count == 3

    def test_batch_capped_at_max_per_run(
        self,
        monkeypatch: pytest.MonkeyPatch,
        mock_pre_provision: MagicMock,
    ) -> None:
        """When pool needs more than _MAX_TENANTS_PER_RUN, cap the batch."""
        monkeypatch.setattr(
            "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
            20,
        )
        _mock_available_count(monkeypatch, 0)

        _check_available_tenants()

        assert mock_pre_provision.call_count == _MAX_TENANTS_PER_RUN

    def test_no_provisioning_when_pool_full(
        self,
        monkeypatch: pytest.MonkeyPatch,
        mock_pre_provision: MagicMock,
    ) -> None:
        """When pool already meets target, should not provision anything."""
        monkeypatch.setattr(
            "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
            5,
        )
        _mock_available_count(monkeypatch, 5)

        _check_available_tenants()

        assert mock_pre_provision.call_count == 0

    def test_no_provisioning_when_pool_exceeds_target(
        self,
        monkeypatch: pytest.MonkeyPatch,
        mock_pre_provision: MagicMock,
    ) -> None:
        """When pool exceeds target, should not provision anything."""
        monkeypatch.setattr(
            "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
            5,
        )
        _mock_available_count(monkeypatch, 8)

        _check_available_tenants()

        assert mock_pre_provision.call_count == 0

    def test_failure_does_not_stop_remaining(
        self,
        monkeypatch: pytest.MonkeyPatch,
        mock_pre_provision: MagicMock,
    ) -> None:
        """If one provisioning fails, the rest should still be attempted."""
        monkeypatch.setattr(
            "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
            5,
        )
        _mock_available_count(monkeypatch, 0)

        # Fail on calls 2 and 4 (1-indexed)
        call_count = 0

        def side_effect() -> bool:
            nonlocal call_count
            call_count += 1
            if call_count in (2, 4):
                raise RuntimeError("provisioning failed")
            return True

        mock_pre_provision.side_effect = side_effect

        _check_available_tenants()

        # All 5 should be attempted despite 2 failures
        assert mock_pre_provision.call_count == 5

    def test_skips_when_not_multi_tenant(
        self,
        monkeypatch: pytest.MonkeyPatch,
        mock_pre_provision: MagicMock,
    ) -> None:
        """Should not provision when multi-tenancy is disabled."""
        monkeypatch.setattr(
            "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.MULTI_TENANT",
            False,
        )

        _check_available_tenants()

        assert mock_pre_provision.call_count == 0

    def test_skips_when_lock_not_acquired(
        self,
        mock_redis: MagicMock,
        mock_pre_provision: MagicMock,
    ) -> None:
        """Should skip when another instance holds the lock."""
        mock_redis.lock.return_value.acquire.return_value = False

        _check_available_tenants()

        assert mock_pre_provision.call_count == 0

    def test_lock_release_failure_does_not_raise(
        self,
        monkeypatch: pytest.MonkeyPatch,
        mock_redis: MagicMock,
        mock_pre_provision: MagicMock,
    ) -> None:
        """LockNotOwnedError on release should be caught, not propagated."""
        from redis.exceptions import LockNotOwnedError

        monkeypatch.setattr(
            "ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
            5,
        )
        _mock_available_count(monkeypatch, 4)

        mock_redis.lock.return_value.release.side_effect = LockNotOwnedError("expired")

        # Should not raise
        _check_available_tenants()

        assert mock_pre_provision.call_count == 1


================================================
FILE: backend/tests/unit/onyx/background/celery/tasks/test_hierarchyfetching_queue.py
================================================
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.background.celery.tasks.hierarchyfetching.tasks import (
    _connector_supports_hierarchy_fetching,
)
from onyx.background.celery.tasks.hierarchyfetching.tasks import (
    check_for_hierarchy_fetching,
)
from onyx.connectors.factory import ConnectorMissingException
from onyx.connectors.interfaces import BaseConnector
from onyx.connectors.interfaces import HierarchyConnector
from onyx.connectors.interfaces import HierarchyOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch

TASKS_MODULE = "onyx.background.celery.tasks.hierarchyfetching.tasks"


class _NonHierarchyConnector(BaseConnector):
    def load_credentials(self, credentials: dict) -> dict | None:  # noqa: ARG002
        return None


class _HierarchyCapableConnector(HierarchyConnector):
    def load_credentials(self, credentials: dict) -> dict | None:  # noqa: ARG002
        return None

    def load_hierarchy(
        self,
        start: SecondsSinceUnixEpoch,  # noqa: ARG002
        end: SecondsSinceUnixEpoch,  # noqa: ARG002
    ) -> HierarchyOutput:
        return
        yield


def _build_cc_pair_mock() -> MagicMock:
    cc_pair = MagicMock()
    cc_pair.connector.source = "mock-source"
    cc_pair.connector.input_type = "mock-input-type"
    return cc_pair


def _build_redis_mock_with_lock() -> tuple[MagicMock, MagicMock]:
    redis_client = MagicMock()
    lock = MagicMock()
    lock.acquire.return_value = True
    lock.owned.return_value = True
    redis_client.lock.return_value = lock
    return redis_client, lock


@patch(f"{TASKS_MODULE}.identify_connector_class")
def test_connector_supports_hierarchy_fetching_false_for_non_hierarchy_connector(
    mock_identify_connector_class: MagicMock,
) -> None:
    mock_identify_connector_class.return_value = _NonHierarchyConnector

    assert _connector_supports_hierarchy_fetching(_build_cc_pair_mock()) is False
    mock_identify_connector_class.assert_called_once_with("mock-source")


@patch(f"{TASKS_MODULE}.task_logger.warning")
@patch(f"{TASKS_MODULE}.identify_connector_class")
def test_connector_supports_hierarchy_fetching_false_when_class_missing(
    mock_identify_connector_class: MagicMock,
    mock_warning: MagicMock,
) -> None:
    mock_identify_connector_class.side_effect = ConnectorMissingException("missing")

    assert _connector_supports_hierarchy_fetching(_build_cc_pair_mock()) is False
    mock_warning.assert_called_once()


@patch(f"{TASKS_MODULE}.identify_connector_class")
def test_connector_supports_hierarchy_fetching_true_for_supported_connector(
    mock_identify_connector_class: MagicMock,
) -> None:
    mock_identify_connector_class.return_value = _HierarchyCapableConnector

    assert _connector_supports_hierarchy_fetching(_build_cc_pair_mock()) is True
    mock_identify_connector_class.assert_called_once_with("mock-source")


@patch(f"{TASKS_MODULE}._try_creating_hierarchy_fetching_task")
@patch(f"{TASKS_MODULE}._is_hierarchy_fetching_due")
@patch(f"{TASKS_MODULE}.get_connector_credential_pair_from_id")
@patch(f"{TASKS_MODULE}.fetch_indexable_standard_connector_credential_pair_ids")
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
@patch(f"{TASKS_MODULE}._connector_supports_hierarchy_fetching")
def test_check_for_hierarchy_fetching_skips_unsupported_connectors(
    mock_supports_hierarchy_fetching: MagicMock,
    mock_get_redis_client: MagicMock,
    mock_get_session: MagicMock,
    mock_fetch_cc_pair_ids: MagicMock,
    mock_get_cc_pair: MagicMock,
    mock_is_due: MagicMock,
    mock_try_create_task: MagicMock,
) -> None:
    redis_client, lock = _build_redis_mock_with_lock()
    mock_get_redis_client.return_value = redis_client
    mock_get_session.return_value.__enter__.return_value = MagicMock()
    mock_fetch_cc_pair_ids.return_value = [123]
    mock_get_cc_pair.return_value = _build_cc_pair_mock()
    mock_supports_hierarchy_fetching.return_value = False
    mock_is_due.return_value = True

    task_app = MagicMock()
    with patch.object(check_for_hierarchy_fetching, "app", task_app):
        result = check_for_hierarchy_fetching.run(tenant_id="test-tenant")

    assert result == 0
    mock_is_due.assert_not_called()
    mock_try_create_task.assert_not_called()
    lock.release.assert_called_once()


@patch(f"{TASKS_MODULE}._try_creating_hierarchy_fetching_task")
@patch(f"{TASKS_MODULE}._is_hierarchy_fetching_due")
@patch(f"{TASKS_MODULE}.get_connector_credential_pair_from_id")
@patch(f"{TASKS_MODULE}.fetch_indexable_standard_connector_credential_pair_ids")
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
@patch(f"{TASKS_MODULE}._connector_supports_hierarchy_fetching")
def test_check_for_hierarchy_fetching_creates_task_for_supported_due_connector(
    mock_supports_hierarchy_fetching: MagicMock,
    mock_get_redis_client: MagicMock,
    mock_get_session: MagicMock,
    mock_fetch_cc_pair_ids: MagicMock,
    mock_get_cc_pair: MagicMock,
    mock_is_due: MagicMock,
    mock_try_create_task: MagicMock,
) -> None:
    redis_client, lock = _build_redis_mock_with_lock()
    cc_pair = _build_cc_pair_mock()
    db_session = MagicMock()
    mock_get_redis_client.return_value = redis_client
    mock_get_session.return_value.__enter__.return_value = db_session
    mock_fetch_cc_pair_ids.return_value = [123]
    mock_get_cc_pair.return_value = cc_pair
    mock_supports_hierarchy_fetching.return_value = True
    mock_is_due.return_value = True
    mock_try_create_task.return_value = "task-id"

    task_app = MagicMock()
    with patch.object(check_for_hierarchy_fetching, "app", task_app):
        result = check_for_hierarchy_fetching.run(tenant_id="test-tenant")

    assert result == 1
    mock_is_due.assert_called_once_with(cc_pair)
    mock_try_create_task.assert_called_once_with(
        celery_app=task_app,
        cc_pair=cc_pair,
        db_session=db_session,
        r=redis_client,
        tenant_id="test-tenant",
    )
    lock.release.assert_called_once()


@patch(f"{TASKS_MODULE}._try_creating_hierarchy_fetching_task")
@patch(f"{TASKS_MODULE}._is_hierarchy_fetching_due")
@patch(f"{TASKS_MODULE}.get_connector_credential_pair_from_id")
@patch(f"{TASKS_MODULE}.fetch_indexable_standard_connector_credential_pair_ids")
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
@patch(f"{TASKS_MODULE}._connector_supports_hierarchy_fetching")
def test_check_for_hierarchy_fetching_skips_supported_connector_when_not_due(
    mock_supports_hierarchy_fetching: MagicMock,
    mock_get_redis_client: MagicMock,
    mock_get_session: MagicMock,
    mock_fetch_cc_pair_ids: MagicMock,
    mock_get_cc_pair: MagicMock,
    mock_is_due: MagicMock,
    mock_try_create_task: MagicMock,
) -> None:
    redis_client, lock = _build_redis_mock_with_lock()
    cc_pair = _build_cc_pair_mock()
    mock_get_redis_client.return_value = redis_client
    mock_get_session.return_value.__enter__.return_value = MagicMock()
    mock_fetch_cc_pair_ids.return_value = [123]
    mock_get_cc_pair.return_value = cc_pair
    mock_supports_hierarchy_fetching.return_value = True
    mock_is_due.return_value = False

    task_app = MagicMock()
    with patch.object(check_for_hierarchy_fetching, "app", task_app):
        result = check_for_hierarchy_fetching.run(tenant_id="test-tenant")

    assert result == 0
    mock_is_due.assert_called_once_with(cc_pair)
    mock_try_create_task.assert_not_called()
    lock.release.assert_called_once()


================================================
FILE: backend/tests/unit/onyx/background/celery/tasks/test_user_file_impl_redis_locking.py
================================================
"""Tests for the _impl functions' redis_locking parameter.

Verifies that:
- redis_locking=True acquires/releases Redis locks and clears queued keys
- redis_locking=False skips all Redis operations entirely
- Both paths execute the same business logic (DB lookup, status check)
"""

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

from onyx.background.celery.tasks.user_file_processing.tasks import (
    delete_user_file_impl,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    process_user_file_impl,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    project_sync_user_file_impl,
)

TASKS_MODULE = "onyx.background.celery.tasks.user_file_processing.tasks"


def _mock_session_returning_none() -> MagicMock:
    """Return a mock session whose .get() returns None (file not found)."""
    session = MagicMock()
    session.get.return_value = None
    return session


# ------------------------------------------------------------------
# process_user_file_impl
# ------------------------------------------------------------------


class TestProcessUserFileImpl:
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_redis_locking_true_acquires_and_releases_lock(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        redis_client = MagicMock()
        lock = MagicMock()
        lock.acquire.return_value = True
        lock.owned.return_value = True
        redis_client.lock.return_value = lock
        mock_get_redis.return_value = redis_client

        session = _mock_session_returning_none()
        mock_get_session.return_value.__enter__.return_value = session

        user_file_id = str(uuid4())
        process_user_file_impl(
            user_file_id=user_file_id,
            tenant_id="test-tenant",
            redis_locking=True,
        )

        mock_get_redis.assert_called_once_with(tenant_id="test-tenant")
        redis_client.delete.assert_called_once()
        lock.acquire.assert_called_once_with(blocking=False)
        lock.release.assert_called_once()

    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_redis_locking_true_skips_when_lock_held(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        redis_client = MagicMock()
        lock = MagicMock()
        lock.acquire.return_value = False
        redis_client.lock.return_value = lock
        mock_get_redis.return_value = redis_client

        process_user_file_impl(
            user_file_id=str(uuid4()),
            tenant_id="test-tenant",
            redis_locking=True,
        )

        lock.acquire.assert_called_once()
        mock_get_session.assert_not_called()

    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_redis_locking_false_skips_redis_entirely(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        session = _mock_session_returning_none()
        mock_get_session.return_value.__enter__.return_value = session

        process_user_file_impl(
            user_file_id=str(uuid4()),
            tenant_id="test-tenant",
            redis_locking=False,
        )

        mock_get_redis.assert_not_called()
        mock_get_session.assert_called_once()

    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_both_paths_call_db_get(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        """Both redis_locking=True and False should call db_session.get(UserFile, ...)."""
        redis_client = MagicMock()
        lock = MagicMock()
        lock.acquire.return_value = True
        lock.owned.return_value = True
        redis_client.lock.return_value = lock
        mock_get_redis.return_value = redis_client

        session = _mock_session_returning_none()
        mock_get_session.return_value.__enter__.return_value = session

        uid = str(uuid4())

        process_user_file_impl(user_file_id=uid, tenant_id="t", redis_locking=True)
        call_count_true = session.get.call_count

        session.reset_mock()
        mock_get_session.reset_mock()
        mock_get_session.return_value.__enter__.return_value = session

        process_user_file_impl(user_file_id=uid, tenant_id="t", redis_locking=False)
        call_count_false = session.get.call_count

        assert call_count_true == call_count_false == 1


# ------------------------------------------------------------------
# delete_user_file_impl
# ------------------------------------------------------------------


class TestDeleteUserFileImpl:
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_redis_locking_true_acquires_and_releases_lock(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        redis_client = MagicMock()
        lock = MagicMock()
        lock.acquire.return_value = True
        lock.owned.return_value = True
        redis_client.lock.return_value = lock
        mock_get_redis.return_value = redis_client

        session = _mock_session_returning_none()
        mock_get_session.return_value.__enter__.return_value = session

        delete_user_file_impl(
            user_file_id=str(uuid4()),
            tenant_id="test-tenant",
            redis_locking=True,
        )

        mock_get_redis.assert_called_once()
        lock.acquire.assert_called_once_with(blocking=False)
        lock.release.assert_called_once()

    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_redis_locking_true_skips_when_lock_held(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        redis_client = MagicMock()
        lock = MagicMock()
        lock.acquire.return_value = False
        redis_client.lock.return_value = lock
        mock_get_redis.return_value = redis_client

        delete_user_file_impl(
            user_file_id=str(uuid4()),
            tenant_id="test-tenant",
            redis_locking=True,
        )

        lock.acquire.assert_called_once()
        mock_get_session.assert_not_called()

    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_redis_locking_false_skips_redis_entirely(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        session = _mock_session_returning_none()
        mock_get_session.return_value.__enter__.return_value = session

        delete_user_file_impl(
            user_file_id=str(uuid4()),
            tenant_id="test-tenant",
            redis_locking=False,
        )

        mock_get_redis.assert_not_called()
        mock_get_session.assert_called_once()


# ------------------------------------------------------------------
# project_sync_user_file_impl
# ------------------------------------------------------------------


@patch(
    f"{TASKS_MODULE}.fetch_user_files_with_access_relationships",
    return_value=[],
)
class TestProjectSyncUserFileImpl:
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_redis_locking_true_acquires_and_releases_lock(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
        _mock_fetch: MagicMock,
    ) -> None:
        redis_client = MagicMock()
        lock = MagicMock()
        lock.acquire.return_value = True
        lock.owned.return_value = True
        redis_client.lock.return_value = lock
        mock_get_redis.return_value = redis_client

        session = _mock_session_returning_none()
        mock_get_session.return_value.__enter__.return_value = session

        project_sync_user_file_impl(
            user_file_id=str(uuid4()),
            tenant_id="test-tenant",
            redis_locking=True,
        )

        mock_get_redis.assert_called_once()
        redis_client.delete.assert_called_once()
        lock.acquire.assert_called_once_with(blocking=False)
        lock.release.assert_called_once()

    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_redis_locking_true_skips_when_lock_held(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
        _mock_fetch: MagicMock,
    ) -> None:
        redis_client = MagicMock()
        lock = MagicMock()
        lock.acquire.return_value = False
        redis_client.lock.return_value = lock
        mock_get_redis.return_value = redis_client

        project_sync_user_file_impl(
            user_file_id=str(uuid4()),
            tenant_id="test-tenant",
            redis_locking=True,
        )

        lock.acquire.assert_called_once()
        mock_get_session.assert_not_called()

    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    @patch(f"{TASKS_MODULE}.get_redis_client")
    def test_redis_locking_false_skips_redis_entirely(
        self,
        mock_get_redis: MagicMock,
        mock_get_session: MagicMock,
        _mock_fetch: MagicMock,
    ) -> None:
        session = _mock_session_returning_none()
        mock_get_session.return_value.__enter__.return_value = session

        project_sync_user_file_impl(
            user_file_id=str(uuid4()),
            tenant_id="test-tenant",
            redis_locking=False,
        )

        mock_get_redis.assert_not_called()
        mock_get_session.assert_called_once()


================================================
FILE: backend/tests/unit/onyx/background/celery/tasks/test_user_file_processing_no_vectordb.py
================================================
"""Tests for no-vector-DB user file processing paths.

Verifies that when DISABLE_VECTOR_DB is True:
- process_user_file_impl calls _process_user_file_without_vector_db (not indexing)
- _process_user_file_without_vector_db extracts text, counts tokens, stores plaintext,
  sets status=COMPLETED and chunk_count=0
- delete_user_file_impl skips vector DB chunk deletion
- project_sync_user_file_impl skips vector DB metadata update
"""

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

from onyx.background.celery.tasks.user_file_processing.tasks import (
    _process_user_file_without_vector_db,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    delete_user_file_impl,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    process_user_file_impl,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    project_sync_user_file_impl,
)
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.db.enums import UserFileStatus

TASKS_MODULE = "onyx.background.celery.tasks.user_file_processing.tasks"
LLM_FACTORY_MODULE = "onyx.llm.factory"


def _make_documents(texts: list[str]) -> list[Document]:
    """Build a list of Document objects with the given section texts."""
    return [
        Document(
            id=str(uuid4()),
            source=DocumentSource.USER_FILE,
            sections=[TextSection(text=t)],
            semantic_identifier=f"test-doc-{i}",
            metadata={},
        )
        for i, t in enumerate(texts)
    ]


def _make_user_file(
    *,
    status: UserFileStatus = UserFileStatus.PROCESSING,
    file_id: str = "test-file-id",
    name: str = "test.txt",
) -> MagicMock:
    """Return a MagicMock mimicking a UserFile ORM instance."""
    uf = MagicMock()
    uf.id = uuid4()
    uf.file_id = file_id
    uf.name = name
    uf.status = status
    uf.token_count = None
    uf.chunk_count = None
    uf.last_project_sync_at = None
    uf.projects = []
    uf.assistants = []
    uf.needs_project_sync = True
    uf.needs_persona_sync = True
    return uf


# ------------------------------------------------------------------
# _process_user_file_without_vector_db — direct tests
# ------------------------------------------------------------------


class TestProcessUserFileWithoutVectorDb:
    @patch(f"{TASKS_MODULE}.store_user_file_plaintext")
    @patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
    @patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
    def test_extracts_and_combines_text(
        self,
        mock_get_llm: MagicMock,  # noqa: ARG002
        mock_get_encode: MagicMock,
        mock_store_plaintext: MagicMock,
    ) -> None:
        mock_encode = MagicMock(return_value=[1, 2, 3, 4, 5])
        mock_get_encode.return_value = mock_encode

        uf = _make_user_file()
        docs = _make_documents(["hello world", "foo bar"])
        db_session = MagicMock()

        _process_user_file_without_vector_db(uf, docs, db_session)

        stored_text = mock_store_plaintext.call_args.kwargs["plaintext_content"]
        assert "hello world" in stored_text
        assert "foo bar" in stored_text

    @patch(f"{TASKS_MODULE}.store_user_file_plaintext")
    @patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
    @patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
    def test_computes_token_count(
        self,
        mock_get_llm: MagicMock,  # noqa: ARG002
        mock_get_encode: MagicMock,
        mock_store_plaintext: MagicMock,  # noqa: ARG002
    ) -> None:
        mock_encode = MagicMock(return_value=list(range(42)))
        mock_get_encode.return_value = mock_encode

        uf = _make_user_file()
        docs = _make_documents(["some text content"])
        db_session = MagicMock()

        _process_user_file_without_vector_db(uf, docs, db_session)

        assert uf.token_count == 42

    @patch(f"{TASKS_MODULE}.store_user_file_plaintext")
    @patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
    @patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
    def test_token_count_falls_back_to_none_on_error(
        self,
        mock_get_llm: MagicMock,
        mock_get_encode: MagicMock,  # noqa: ARG002
        mock_store_plaintext: MagicMock,  # noqa: ARG002
    ) -> None:
        mock_get_llm.side_effect = RuntimeError("No LLM configured")

        uf = _make_user_file()
        docs = _make_documents(["text"])
        db_session = MagicMock()

        _process_user_file_without_vector_db(uf, docs, db_session)

        assert uf.token_count is None

    @patch(f"{TASKS_MODULE}.store_user_file_plaintext")
    @patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
    @patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
    def test_stores_plaintext(
        self,
        mock_get_llm: MagicMock,  # noqa: ARG002
        mock_get_encode: MagicMock,
        mock_store_plaintext: MagicMock,
    ) -> None:
        mock_get_encode.return_value = MagicMock(return_value=[1])

        uf = _make_user_file()
        docs = _make_documents(["content to store"])
        db_session = MagicMock()

        _process_user_file_without_vector_db(uf, docs, db_session)

        mock_store_plaintext.assert_called_once_with(
            user_file_id=uf.id,
            plaintext_content="content to store",
        )

    @patch(f"{TASKS_MODULE}.store_user_file_plaintext")
    @patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
    @patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
    def test_sets_completed_status_and_zero_chunk_count(
        self,
        mock_get_llm: MagicMock,  # noqa: ARG002
        mock_get_encode: MagicMock,
        mock_store_plaintext: MagicMock,  # noqa: ARG002
    ) -> None:
        mock_get_encode.return_value = MagicMock(return_value=[1])

        uf = _make_user_file()
        docs = _make_documents(["text"])
        db_session = MagicMock()

        _process_user_file_without_vector_db(uf, docs, db_session)

        assert uf.status == UserFileStatus.COMPLETED
        assert uf.chunk_count == 0
        assert uf.last_project_sync_at is not None
        db_session.add.assert_called_once_with(uf)
        db_session.commit.assert_called_once()

    @patch(f"{TASKS_MODULE}.store_user_file_plaintext")
    @patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
    @patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
    def test_preserves_deleting_status(
        self,
        mock_get_llm: MagicMock,  # noqa: ARG002
        mock_get_encode: MagicMock,
        mock_store_plaintext: MagicMock,  # noqa: ARG002
    ) -> None:
        mock_get_encode.return_value = MagicMock(return_value=[1])

        uf = _make_user_file(status=UserFileStatus.DELETING)
        docs = _make_documents(["text"])
        db_session = MagicMock()

        _process_user_file_without_vector_db(uf, docs, db_session)

        assert uf.status == UserFileStatus.DELETING
        assert uf.chunk_count == 0


# ------------------------------------------------------------------
# process_user_file_impl — branching on DISABLE_VECTOR_DB
# ------------------------------------------------------------------


class TestProcessImplBranching:
    @patch(f"{TASKS_MODULE}._process_user_file_without_vector_db")
    @patch(f"{TASKS_MODULE}._process_user_file_with_indexing")
    @patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    def test_calls_without_vector_db_when_disabled(
        self,
        mock_get_session: MagicMock,
        mock_with_indexing: MagicMock,
        mock_without_vdb: MagicMock,
    ) -> None:
        uf = _make_user_file()
        session = MagicMock()
        session.get.return_value = uf
        mock_get_session.return_value.__enter__.return_value = session

        connector_mock = MagicMock()
        connector_mock.load_from_state.return_value = [_make_documents(["hello"])]

        with patch(f"{TASKS_MODULE}.LocalFileConnector", return_value=connector_mock):
            process_user_file_impl(
                user_file_id=str(uf.id),
                tenant_id="test-tenant",
                redis_locking=False,
            )

        mock_without_vdb.assert_called_once()
        mock_with_indexing.assert_not_called()

    @patch(f"{TASKS_MODULE}._process_user_file_without_vector_db")
    @patch(f"{TASKS_MODULE}._process_user_file_with_indexing")
    @patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", False)
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    def test_calls_with_indexing_when_vector_db_enabled(
        self,
        mock_get_session: MagicMock,
        mock_with_indexing: MagicMock,
        mock_without_vdb: MagicMock,
    ) -> None:
        uf = _make_user_file()
        session = MagicMock()
        session.get.return_value = uf
        mock_get_session.return_value.__enter__.return_value = session

        connector_mock = MagicMock()
        connector_mock.load_from_state.return_value = [_make_documents(["hello"])]

        with patch(f"{TASKS_MODULE}.LocalFileConnector", return_value=connector_mock):
            process_user_file_impl(
                user_file_id=str(uf.id),
                tenant_id="test-tenant",
                redis_locking=False,
            )

        mock_with_indexing.assert_called_once()
        mock_without_vdb.assert_not_called()

    @patch(f"{TASKS_MODULE}.run_indexing_pipeline")
    @patch(f"{TASKS_MODULE}.store_user_file_plaintext")
    @patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    def test_indexing_pipeline_not_called_when_disabled(
        self,
        mock_get_session: MagicMock,
        mock_store_plaintext: MagicMock,  # noqa: ARG002
        mock_run_pipeline: MagicMock,
    ) -> None:
        """End-to-end: verify run_indexing_pipeline is never invoked."""
        uf = _make_user_file()
        session = MagicMock()
        session.get.return_value = uf
        mock_get_session.return_value.__enter__.return_value = session

        connector_mock = MagicMock()
        connector_mock.load_from_state.return_value = [_make_documents(["content"])]

        with (
            patch(f"{TASKS_MODULE}.LocalFileConnector", return_value=connector_mock),
            patch(f"{LLM_FACTORY_MODULE}.get_default_llm"),
            patch(
                f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func",
                return_value=MagicMock(return_value=[1, 2, 3]),
            ),
        ):
            process_user_file_impl(
                user_file_id=str(uf.id),
                tenant_id="test-tenant",
                redis_locking=False,
            )

        mock_run_pipeline.assert_not_called()


# ------------------------------------------------------------------
# delete_user_file_impl — vector DB skip
# ------------------------------------------------------------------


class TestDeleteImplNoVectorDb:
    @patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
    @patch(f"{TASKS_MODULE}.get_default_file_store")
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    def test_skips_vector_db_deletion(
        self,
        mock_get_session: MagicMock,
        mock_get_file_store: MagicMock,
    ) -> None:
        uf = _make_user_file(status=UserFileStatus.DELETING)
        session = MagicMock()
        session.get.return_value = uf
        mock_get_session.return_value.__enter__.return_value = session
        mock_get_file_store.return_value = MagicMock()

        with (
            patch(f"{TASKS_MODULE}.get_all_document_indices") as mock_get_indices,
            patch(f"{TASKS_MODULE}.get_active_search_settings") as mock_get_ss,
            patch(f"{TASKS_MODULE}.httpx_init_vespa_pool") as mock_vespa_pool,
        ):
            delete_user_file_impl(
                user_file_id=str(uf.id),
                tenant_id="test-tenant",
                redis_locking=False,
            )

            mock_get_indices.assert_not_called()
            mock_get_ss.assert_not_called()
            mock_vespa_pool.assert_not_called()

        session.delete.assert_called_once_with(uf)
        session.commit.assert_called_once()

    @patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
    @patch(f"{TASKS_MODULE}.get_default_file_store")
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    def test_still_deletes_file_store_and_db_record(
        self,
        mock_get_session: MagicMock,
        mock_get_file_store: MagicMock,
    ) -> None:
        uf = _make_user_file(status=UserFileStatus.DELETING)
        session = MagicMock()
        session.get.return_value = uf
        mock_get_session.return_value.__enter__.return_value = session

        file_store = MagicMock()
        mock_get_file_store.return_value = file_store

        delete_user_file_impl(
            user_file_id=str(uf.id),
            tenant_id="test-tenant",
            redis_locking=False,
        )

        assert file_store.delete_file.call_count == 2
        session.delete.assert_called_once_with(uf)
        session.commit.assert_called_once()


# ------------------------------------------------------------------
# project_sync_user_file_impl — vector DB skip
# ------------------------------------------------------------------


class TestProjectSyncImplNoVectorDb:
    @patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    def test_skips_vector_db_update(
        self,
        mock_get_session: MagicMock,
    ) -> None:
        uf = _make_user_file(status=UserFileStatus.COMPLETED)
        session = MagicMock()
        mock_get_session.return_value.__enter__.return_value = session

        with (
            patch(
                f"{TASKS_MODULE}.fetch_user_files_with_access_relationships",
                return_value=[uf],
            ),
            patch(f"{TASKS_MODULE}.get_all_document_indices") as mock_get_indices,
            patch(f"{TASKS_MODULE}.get_active_search_settings") as mock_get_ss,
            patch(f"{TASKS_MODULE}.httpx_init_vespa_pool") as mock_vespa_pool,
        ):
            project_sync_user_file_impl(
                user_file_id=str(uf.id),
                tenant_id="test-tenant",
                redis_locking=False,
            )

            mock_get_indices.assert_not_called()
            mock_get_ss.assert_not_called()
            mock_vespa_pool.assert_not_called()

    @patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
    @patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
    def test_still_clears_sync_flags(
        self,
        mock_get_session: MagicMock,
    ) -> None:
        uf = _make_user_file(status=UserFileStatus.COMPLETED)
        session = MagicMock()
        mock_get_session.return_value.__enter__.return_value = session

        with patch(
            f"{TASKS_MODULE}.fetch_user_files_with_access_relationships",
            return_value=[uf],
        ):
            project_sync_user_file_impl(
                user_file_id=str(uf.id),
                tenant_id="test-tenant",
                redis_locking=False,
            )

        assert uf.needs_project_sync is False
        assert uf.needs_persona_sync is False
        assert uf.last_project_sync_at is not None
        session.add.assert_called_once_with(uf)
        session.commit.assert_called_once()


================================================
FILE: backend/tests/unit/onyx/background/celery/tasks/test_user_file_project_sync_queue.py
================================================
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest

from onyx.background.celery.tasks.user_file_processing.tasks import (
    _user_file_project_sync_queued_key,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    check_for_user_file_project_sync,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    enqueue_user_file_project_sync_task,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
    process_single_user_file_project_sync,
)
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH


def _build_redis_mock_with_lock() -> tuple[MagicMock, MagicMock]:
    redis_client = MagicMock()
    lock = MagicMock()
    lock.acquire.return_value = True
    lock.owned.return_value = True
    redis_client.lock.return_value = lock
    return redis_client, lock


@patch(
    "onyx.background.celery.tasks.user_file_processing.tasks.get_user_file_project_sync_queue_depth"
)
@patch("onyx.background.celery.tasks.user_file_processing.tasks.get_redis_client")
def test_check_for_user_file_project_sync_applies_queue_backpressure(
    mock_get_redis_client: MagicMock,
    mock_get_queue_depth: MagicMock,
) -> None:
    redis_client, lock = _build_redis_mock_with_lock()
    mock_get_redis_client.return_value = redis_client
    mock_get_queue_depth.return_value = USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH + 1

    task_app = MagicMock()
    with patch.object(check_for_user_file_project_sync, "app", task_app):
        check_for_user_file_project_sync.run(tenant_id="test-tenant")

    task_app.send_task.assert_not_called()
    lock.release.assert_called_once()


@patch(
    "onyx.background.celery.tasks.user_file_processing.tasks.enqueue_user_file_project_sync_task"
)
@patch(
    "onyx.background.celery.tasks.user_file_processing.tasks.get_user_file_project_sync_queue_depth"
)
@patch(
    "onyx.background.celery.tasks.user_file_processing.tasks.get_session_with_current_tenant"
)
@patch("onyx.background.celery.tasks.user_file_processing.tasks.get_redis_client")
def test_check_for_user_file_project_sync_skips_duplicates(
    mock_get_redis_client: MagicMock,
    mock_get_session: MagicMock,
    mock_get_queue_depth: MagicMock,
    mock_enqueue: MagicMock,
) -> None:
    redis_client, lock = _build_redis_mock_with_lock()
    mock_get_redis_client.return_value = redis_client
    mock_get_queue_depth.return_value = 0

    user_file_id_one = uuid4()
    user_file_id_two = uuid4()

    session = MagicMock()
    session.execute.return_value.scalars.return_value.all.return_value = [
        user_file_id_one,
        user_file_id_two,
    ]
    mock_get_session.return_value.__enter__.return_value = session
    mock_enqueue.side_effect = [True, False]

    task_app = MagicMock()
    with patch.object(check_for_user_file_project_sync, "app", task_app):
        check_for_user_file_project_sync.run(tenant_id="test-tenant")

    assert mock_enqueue.call_count == 2
    lock.release.assert_called_once()


def test_enqueue_user_file_project_sync_task_sets_guard_and_expiry() -> None:
    redis_client = MagicMock()
    redis_client.set.return_value = True
    celery_app = MagicMock()
    user_file_id = str(uuid4())

    enqueued = enqueue_user_file_project_sync_task(
        celery_app=celery_app,
        redis_client=redis_client,
        user_file_id=user_file_id,
        tenant_id="test-tenant",
        priority=OnyxCeleryPriority.HIGHEST,
    )

    assert enqueued is True
    redis_client.set.assert_called_once_with(
        _user_file_project_sync_queued_key(user_file_id),
        1,
        nx=True,
        ex=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,
    )
    celery_app.send_task.assert_called_once_with(
        OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
        kwargs={"user_file_id": user_file_id, "tenant_id": "test-tenant"},
        queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
        priority=OnyxCeleryPriority.HIGHEST,
        expires=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,
    )


def test_enqueue_user_file_project_sync_task_rolls_back_guard_on_publish_failure() -> (
    None
):
    redis_client = MagicMock()
    redis_client.set.return_value = True
    celery_app = MagicMock()
    celery_app.send_task.side_effect = RuntimeError("publish failed")

    user_file_id = str(uuid4())
    with pytest.raises(RuntimeError):
        enqueue_user_file_project_sync_task(
            celery_app=celery_app,
            redis_client=redis_client,
            user_file_id=user_file_id,
            tenant_id="test-tenant",
        )

    redis_client.delete.assert_called_once_with(
        _user_file_project_sync_queued_key(user_file_id)
    )


@patch("onyx.background.celery.tasks.user_file_processing.tasks.get_redis_client")
def test_process_single_user_file_project_sync_clears_queued_guard_on_pickup(
    mock_get_redis_client: MagicMock,
) -> None:
    redis_client = MagicMock()
    lock = MagicMock()
    lock.acquire.return_value = False
    redis_client.lock.return_value = lock
    mock_get_redis_client.return_value = redis_client

    user_file_id = str(uuid4())
    process_single_user_file_project_sync.run(
        user_file_id=user_file_id,
        tenant_id="test-tenant",
    )

    redis_client.delete.assert_called_once_with(
        _user_file_project_sync_queued_key(user_file_id)
    )


================================================
FILE: backend/tests/unit/onyx/background/celery/test_celery_redis.py
================================================
"""Tests for celery_get_broker_client singleton."""

from collections.abc import Iterator
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.background.celery import celery_redis


@pytest.fixture(autouse=True)
def reset_singleton() -> Iterator[None]:
    """Reset the module-level singleton between tests."""
    celery_redis._broker_client = None
    celery_redis._broker_url = None
    yield
    celery_redis._broker_client = None
    celery_redis._broker_url = None


def _make_mock_app(broker_url: str = "redis://localhost:6379/15") -> MagicMock:
    app = MagicMock()
    app.conf.broker_url = broker_url
    return app


class TestCeleryGetBrokerClient:
    @patch("onyx.background.celery.celery_redis.Redis")
    def test_creates_client_on_first_call(self, mock_redis_cls: MagicMock) -> None:
        mock_client = MagicMock()
        mock_redis_cls.from_url.return_value = mock_client

        app = _make_mock_app()
        result = celery_redis.celery_get_broker_client(app)

        assert result is mock_client
        call_args = mock_redis_cls.from_url.call_args
        assert call_args[0][0] == "redis://localhost:6379/15"
        assert call_args[1]["decode_responses"] is False
        assert call_args[1]["socket_keepalive"] is True
        assert call_args[1]["retry_on_timeout"] is True

    @patch("onyx.background.celery.celery_redis.Redis")
    def test_reuses_cached_client(self, mock_redis_cls: MagicMock) -> None:
        mock_client = MagicMock()
        mock_client.ping.return_value = True
        mock_redis_cls.from_url.return_value = mock_client

        app = _make_mock_app()
        client1 = celery_redis.celery_get_broker_client(app)
        client2 = celery_redis.celery_get_broker_client(app)

        assert client1 is client2
        # from_url called only once
        assert mock_redis_cls.from_url.call_count == 1

    @patch("onyx.background.celery.celery_redis.Redis")
    def test_reconnects_on_ping_failure(self, mock_redis_cls: MagicMock) -> None:
        stale_client = MagicMock()
        stale_client.ping.side_effect = ConnectionError("disconnected")

        fresh_client = MagicMock()
        fresh_client.ping.return_value = True

        mock_redis_cls.from_url.side_effect = [stale_client, fresh_client]

        app = _make_mock_app()

        # First call creates stale_client
        client1 = celery_redis.celery_get_broker_client(app)
        assert client1 is stale_client

        # Second call: ping fails, creates fresh_client
        client2 = celery_redis.celery_get_broker_client(app)
        assert client2 is fresh_client
        assert mock_redis_cls.from_url.call_count == 2

    @patch("onyx.background.celery.celery_redis.Redis")
    def test_uses_broker_url_from_app_config(self, mock_redis_cls: MagicMock) -> None:
        mock_redis_cls.from_url.return_value = MagicMock()

        app = _make_mock_app("redis://custom-host:6380/3")
        celery_redis.celery_get_broker_client(app)

        call_args = mock_redis_cls.from_url.call_args
        assert call_args[0][0] == "redis://custom-host:6380/3"


================================================
FILE: backend/tests/unit/onyx/chat/test_argument_delta_streaming.py
================================================
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.chat.tool_call_args_streaming import maybe_emit_argument_delta
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import ToolCallArgumentDelta
from onyx.utils.jsonriver import Parser


def _make_tool_call_delta(
    index: int = 0,
    name: str | None = None,
    arguments: str | None = None,
    function_is_none: bool = False,
) -> MagicMock:
    """Create a mock tool_call_delta matching the LiteLLM streaming shape."""
    delta = MagicMock()
    delta.index = index
    if function_is_none:
        delta.function = None
    else:
        delta.function = MagicMock()
        delta.function.name = name
        delta.function.arguments = arguments
    return delta


def _make_placement() -> Placement:
    return Placement(turn_index=0, tab_index=0)


def _mock_tool_class(emit: bool = True) -> MagicMock:
    cls = MagicMock()
    cls.should_emit_argument_deltas.return_value = emit
    return cls


def _collect(
    tc_map: dict[int, dict[str, Any]],
    delta: MagicMock,
    placement: Placement | None = None,
    parsers: dict[int, Parser] | None = None,
) -> list[Any]:
    """Run maybe_emit_argument_delta and return the yielded packets."""
    return list(
        maybe_emit_argument_delta(
            tc_map,
            delta,
            placement or _make_placement(),
            parsers if parsers is not None else {},
        )
    )


def _stream_fragments(
    fragments: list[str],
    tc_map: dict[int, dict[str, Any]],
    placement: Placement | None = None,
) -> list[str]:
    """Feed fragments into maybe_emit_argument_delta one by one, returning
    all emitted content values concatenated per-key as a flat list."""
    pl = placement or _make_placement()
    parsers: dict[int, Parser] = {}
    emitted: list[str] = []
    for frag in fragments:
        tc_map[0]["arguments"] += frag
        delta = _make_tool_call_delta(arguments=frag)
        for packet in maybe_emit_argument_delta(tc_map, delta, pl, parsers=parsers):
            obj = packet.obj
            assert isinstance(obj, ToolCallArgumentDelta)
            for value in obj.argument_deltas.values():
                emitted.append(value)
    return emitted


class TestMaybeEmitArgumentDeltaGuards:
    """Tests for conditions that cause no packet to be emitted."""

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_no_emission_when_tool_does_not_opt_in(
        self, mock_get_tool: MagicMock
    ) -> None:
        """Tools that return False from should_emit_argument_deltas emit nothing."""
        mock_get_tool.return_value = _mock_tool_class(emit=False)

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": '{"code": "x'}
        }
        assert _collect(tc_map, _make_tool_call_delta(arguments="x")) == []

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_no_emission_when_tool_class_unknown(
        self, mock_get_tool: MagicMock
    ) -> None:
        mock_get_tool.return_value = None

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "unknown", "arguments": '{"code": "x'}
        }
        assert _collect(tc_map, _make_tool_call_delta(arguments="x")) == []

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_no_emission_when_no_argument_fragment(
        self, mock_get_tool: MagicMock
    ) -> None:
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": '{"code": "x'}
        }
        assert _collect(tc_map, _make_tool_call_delta(arguments=None)) == []

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_no_emission_when_key_value_not_started(
        self, mock_get_tool: MagicMock
    ) -> None:
        """Key exists in JSON but its string value hasn't begun yet."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": '{"code":'}
        }
        assert _collect(tc_map, _make_tool_call_delta(arguments=":")) == []

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_no_emission_before_any_key(self, mock_get_tool: MagicMock) -> None:
        """Only the opening brace has arrived — no key to stream yet."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": "{"}
        }
        assert _collect(tc_map, _make_tool_call_delta(arguments="{")) == []


class TestMaybeEmitArgumentDeltaBasic:
    """Tests for correct packet content and incremental emission."""

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_emits_packet_with_correct_fields(self, mock_get_tool: MagicMock) -> None:
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = ['{"code": "', "print(1)", '"}']

        pl = _make_placement()
        parsers: dict[int, Parser] = {}
        all_packets = []
        for frag in fragments:
            tc_map[0]["arguments"] += frag
            packets = _collect(
                tc_map, _make_tool_call_delta(arguments=frag), pl, parsers
            )
            all_packets.extend(packets)

        assert len(all_packets) >= 1
        # Verify packet structure
        obj = all_packets[0].obj
        assert isinstance(obj, ToolCallArgumentDelta)
        assert obj.tool_type == "python"
        # All emitted content should reconstruct the value
        full_code = ""
        for p in all_packets:
            assert isinstance(p.obj, ToolCallArgumentDelta)
            if "code" in p.obj.argument_deltas:
                full_code += p.obj.argument_deltas["code"]
        assert full_code == "print(1)"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_emits_only_new_content_on_subsequent_call(
        self, mock_get_tool: MagicMock
    ) -> None:
        """After a first emission, subsequent calls emit only the diff."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        parsers: dict[int, Parser] = {}
        pl = _make_placement()

        # First fragment opens the string
        tc_map[0]["arguments"] = '{"code": "abc'
        packets_1 = _collect(
            tc_map, _make_tool_call_delta(arguments='{"code": "abc'), pl, parsers
        )
        code_1 = ""
        for p in packets_1:
            assert isinstance(p.obj, ToolCallArgumentDelta)
            code_1 += p.obj.argument_deltas.get("code", "")
        assert code_1 == "abc"

        # Second fragment appends more
        tc_map[0]["arguments"] = '{"code": "abcdef'
        packets_2 = _collect(
            tc_map, _make_tool_call_delta(arguments="def"), pl, parsers
        )
        code_2 = ""
        for p in packets_2:
            assert isinstance(p.obj, ToolCallArgumentDelta)
            code_2 += p.obj.argument_deltas.get("code", "")
        assert code_2 == "def"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_handles_multiple_keys_sequentially(self, mock_get_tool: MagicMock) -> None:
        """When a second key starts, emissions switch to that key."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = [
            '{"code": "x',
            '", "output": "hello',
            '"}',
        ]

        emitted = _stream_fragments(fragments, tc_map)
        full = "".join(emitted)
        assert "x" in full
        assert "hello" in full

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_delta_spans_key_boundary(self, mock_get_tool: MagicMock) -> None:
        """A single delta contains the end of one value and the start of the next key."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = [
            '{"code": "x',
            'y", "lang": "py',
            '"}',
        ]

        emitted = _stream_fragments(fragments, tc_map)
        full = "".join(emitted)
        assert "xy" in full
        assert "py" in full

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_empty_value_emits_nothing(self, mock_get_tool: MagicMock) -> None:
        """An empty string value has nothing to emit."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        # Opening quote just arrived, value is empty
        tc_map[0]["arguments"] = '{"code": "'
        packets = _collect(tc_map, _make_tool_call_delta(arguments='{"code": "'))
        # No string content yet, so either no packet or empty deltas
        for p in packets:
            assert isinstance(p.obj, ToolCallArgumentDelta)
            assert p.obj.argument_deltas.get("code", "") == ""


class TestMaybeEmitArgumentDeltaDecoding:
    """Tests verifying that JSON escape sequences are properly decoded."""

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_decodes_newlines(self, mock_get_tool: MagicMock) -> None:
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = ['{"code": "line1\\nline2"}']

        emitted = _stream_fragments(fragments, tc_map)
        assert "".join(emitted) == "line1\nline2"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_decodes_tabs(self, mock_get_tool: MagicMock) -> None:
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = ['{"code": "\\tindented"}']

        emitted = _stream_fragments(fragments, tc_map)
        assert "".join(emitted) == "\tindented"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_decodes_escaped_quotes(self, mock_get_tool: MagicMock) -> None:
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = ['{"code": "say \\"hi\\""}']

        emitted = _stream_fragments(fragments, tc_map)
        assert "".join(emitted) == 'say "hi"'

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_decodes_escaped_backslashes(self, mock_get_tool: MagicMock) -> None:
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = ['{"code": "path\\\\dir"}']

        emitted = _stream_fragments(fragments, tc_map)
        assert "".join(emitted) == "path\\dir"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_decodes_unicode_escape(self, mock_get_tool: MagicMock) -> None:
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = ['{"code": "\\u0041"}']

        emitted = _stream_fragments(fragments, tc_map)
        assert "".join(emitted) == "A"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_incomplete_escape_at_end_decoded_on_next_chunk(
        self, mock_get_tool: MagicMock
    ) -> None:
        """A trailing backslash (incomplete escape) is completed in the next chunk."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = ['{"code": "hello\\', 'n"}']

        emitted = _stream_fragments(fragments, tc_map)
        assert "".join(emitted) == "hello\n"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_incomplete_unicode_escape_completed_on_next_chunk(
        self, mock_get_tool: MagicMock
    ) -> None:
        """A partial \\uXX sequence is completed in the next chunk."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = ['{"code": "hello\\u00', '41"}']

        emitted = _stream_fragments(fragments, tc_map)
        assert "".join(emitted) == "helloA"


class TestArgumentDeltaStreamingE2E:
    """Simulates realistic sequences of LLM argument deltas to verify
    the full pipeline produces correct decoded output."""

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_realistic_python_code_streaming(self, mock_get_tool: MagicMock) -> None:
        """Streams: {"code": "print('hello')\\nprint('world')"}"""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = [
            '{"',
            "code",
            '": "',
            "print(",
            "'hello')",
            "\\n",
            "print(",
            "'world')",
            '"}',
        ]

        full = "".join(_stream_fragments(fragments, tc_map))
        assert full == "print('hello')\nprint('world')"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_streaming_with_tabs_and_newlines(self, mock_get_tool: MagicMock) -> None:
        """Streams code with tabs and newlines."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = [
            '{"code": "',
            "if True:",
            "\\n",
            "\\t",
            "pass",
            '"}',
        ]

        full = "".join(_stream_fragments(fragments, tc_map))
        assert full == "if True:\n\tpass"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_split_escape_sequence(self, mock_get_tool: MagicMock) -> None:
        """An escape sequence split across two fragments (backslash in one,
        'n' in the next) should still decode correctly."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = [
            '{"code": "hello',
            "\\",
            "n",
            'world"}',
        ]

        full = "".join(_stream_fragments(fragments, tc_map))
        assert full == "hello\nworld"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_multiple_newlines_and_indentation(self, mock_get_tool: MagicMock) -> None:
        """Streams a multi-line function with multiple escape sequences."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = [
            '{"code": "',
            "def foo():",
            "\\n",
            "\\t",
            "x = 1",
            "\\n",
            "\\t",
            "return x",
            '"}',
        ]

        full = "".join(_stream_fragments(fragments, tc_map))
        assert full == "def foo():\n\tx = 1\n\treturn x"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_two_keys_streamed_sequentially(self, mock_get_tool: MagicMock) -> None:
        """Streams code first, then a second key (language) — both decoded."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = [
            '{"code": "',
            "x = 1",
            '", "language": "',
            "python",
            '"}',
        ]

        emitted = _stream_fragments(fragments, tc_map)
        # Should have emissions for both keys
        full = "".join(emitted)
        assert "x = 1" in full
        assert "python" in full

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_code_containing_dict_literal(self, mock_get_tool: MagicMock) -> None:
        """Python code like `x = {"key": "val"}` contains JSON-like patterns.
        The escaped quotes inside the *outer* JSON value should prevent the
        inner `"key":` from being mistaken for a top-level JSON key."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        # The LLM sends: {"code": "x = {\"key\": \"val\"}"}
        # The inner quotes are escaped as \" in the JSON value.
        fragments = [
            '{"code": "',
            "x = {",
            '\\"key\\"',
            ": ",
            '\\"val\\"',
            "}",
            '"}',
        ]

        full = "".join(_stream_fragments(fragments, tc_map))
        assert full == 'x = {"key": "val"}'

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_code_with_colon_in_value(self, mock_get_tool: MagicMock) -> None:
        """Colons inside the string value should not confuse key detection."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = [
            '{"code": "',
            "url = ",
            '\\"https://example.com\\"',
            '"}',
        ]

        full = "".join(_stream_fragments(fragments, tc_map))
        assert full == 'url = "https://example.com"'


class TestMaybeEmitArgumentDeltaEdgeCases:
    """Edge cases not covered by the standard test classes."""

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_no_emission_when_function_is_none(self, mock_get_tool: MagicMock) -> None:
        """Some delta chunks have function=None (e.g. role-only deltas)."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": '{"code": "x'}
        }
        delta = _make_tool_call_delta(arguments=None, function_is_none=True)
        assert _collect(tc_map, delta) == []

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_multiple_concurrent_tool_calls(self, mock_get_tool: MagicMock) -> None:
        """Two tool calls streaming at different indices in parallel."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""},
            1: {"id": "tc_2", "name": "python", "arguments": ""},
        }

        parsers: dict[int, Parser] = {}
        pl = _make_placement()

        # Feed full JSON to index 0
        tc_map[0]["arguments"] = '{"code": "aaa"}'
        packets_0 = _collect(
            tc_map,
            _make_tool_call_delta(index=0, arguments='{"code": "aaa"}'),
            pl,
            parsers,
        )
        code_0 = ""
        for p in packets_0:
            assert isinstance(p.obj, ToolCallArgumentDelta)
            code_0 += p.obj.argument_deltas.get("code", "")
        assert code_0 == "aaa"

        # Feed full JSON to index 1
        tc_map[1]["arguments"] = '{"code": "bbb"}'
        packets_1 = _collect(
            tc_map,
            _make_tool_call_delta(index=1, arguments='{"code": "bbb"}'),
            pl,
            parsers,
        )
        code_1 = ""
        for p in packets_1:
            assert isinstance(p.obj, ToolCallArgumentDelta)
            code_1 += p.obj.argument_deltas.get("code", "")
        assert code_1 == "bbb"

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_delta_with_four_arguments(self, mock_get_tool: MagicMock) -> None:
        """A single delta contains four complete key-value pairs."""
        mock_get_tool.return_value = _mock_tool_class()

        full = '{"a": "one", "b": "two", "c": "three", "d": "four"}'
        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        tc_map[0]["arguments"] = full
        parsers: dict[int, Parser] = {}
        packets = _collect(
            tc_map, _make_tool_call_delta(arguments=full), parsers=parsers
        )

        # Collect all argument deltas across packets
        all_deltas: dict[str, str] = {}
        for p in packets:
            assert isinstance(p.obj, ToolCallArgumentDelta)
            for k, v in p.obj.argument_deltas.items():
                all_deltas[k] = all_deltas.get(k, "") + v

        assert all_deltas == {
            "a": "one",
            "b": "two",
            "c": "three",
            "d": "four",
        }

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_delta_on_second_arg_after_first_complete(
        self, mock_get_tool: MagicMock
    ) -> None:
        """First argument is fully complete; delta only adds to the second."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }

        fragments = [
            '{"code": "print(1)", "lang": "py',
            '"}',
        ]

        emitted = _stream_fragments(fragments, tc_map)
        full = "".join(emitted)
        assert "print(1)" in full
        assert "py" in full

    @patch("onyx.chat.tool_call_args_streaming._get_tool_class")
    def test_non_string_values_skipped(self, mock_get_tool: MagicMock) -> None:
        """Non-string values (numbers, booleans, null) are skipped — they are
        available in the final tool-call kickoff packet. String arguments
        following them are still emitted."""
        mock_get_tool.return_value = _mock_tool_class()

        tc_map: dict[int, dict[str, Any]] = {
            0: {"id": "tc_1", "name": "python", "arguments": ""}
        }
        fragments = ['{"timeout": 30, "code": "hello"}']

        emitted = _stream_fragments(fragments, tc_map)
        full = "".join(emitted)
        assert full == "hello"


================================================
FILE: backend/tests/unit/onyx/chat/test_chat_utils.py
================================================
"""Tests for chat_utils.py, specifically get_custom_agent_prompt."""

from unittest.mock import MagicMock

from onyx.chat.chat_utils import _build_tool_call_response_history_message
from onyx.chat.chat_utils import get_custom_agent_prompt
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.prompts.chat_prompts import TOOL_CALL_RESPONSE_CROSS_MESSAGE


class TestGetCustomAgentPrompt:
    """Tests for the get_custom_agent_prompt function."""

    def _create_mock_persona(
        self,
        persona_id: int = 1,
        system_prompt: str | None = None,
        replace_base_system_prompt: bool = False,
    ) -> MagicMock:
        """Create a mock Persona with the specified attributes."""
        persona = MagicMock()
        persona.id = persona_id
        persona.system_prompt = system_prompt
        persona.replace_base_system_prompt = replace_base_system_prompt
        return persona

    def _create_mock_chat_session(
        self,
        project: MagicMock | None = None,
    ) -> MagicMock:
        """Create a mock ChatSession with the specified attributes."""
        chat_session = MagicMock()
        chat_session.project = project
        return chat_session

    def _create_mock_project(
        self,
        instructions: str = "",
    ) -> MagicMock:
        """Create a mock UserProject with the specified attributes."""
        project = MagicMock()
        project.instructions = instructions
        return project

    def test_default_persona_no_project(self) -> None:
        """Test that default persona without a project returns None."""
        persona = self._create_mock_persona(persona_id=DEFAULT_PERSONA_ID)
        chat_session = self._create_mock_chat_session(project=None)

        result = get_custom_agent_prompt(persona, chat_session)

        assert result is None

    def test_default_persona_with_project_instructions(self) -> None:
        """Test that default persona in a project returns project instructions."""
        persona = self._create_mock_persona(persona_id=DEFAULT_PERSONA_ID)
        project = self._create_mock_project(instructions="Do X and Y")
        chat_session = self._create_mock_chat_session(project=project)

        result = get_custom_agent_prompt(persona, chat_session)

        assert result == "Do X and Y"

    def test_default_persona_with_empty_project_instructions(self) -> None:
        """Test that default persona in a project with empty instructions returns None."""
        persona = self._create_mock_persona(persona_id=DEFAULT_PERSONA_ID)
        project = self._create_mock_project(instructions="")
        chat_session = self._create_mock_chat_session(project=project)

        result = get_custom_agent_prompt(persona, chat_session)

        assert result is None

    def test_custom_persona_replace_base_prompt_true(self) -> None:
        """Test that custom persona with replace_base_system_prompt=True returns None."""
        persona = self._create_mock_persona(
            persona_id=1,
            system_prompt="Custom system prompt",
            replace_base_system_prompt=True,
        )
        chat_session = self._create_mock_chat_session(project=None)

        result = get_custom_agent_prompt(persona, chat_session)

        assert result is None

    def test_custom_persona_with_system_prompt(self) -> None:
        """Test that custom persona with system_prompt returns the system_prompt."""
        persona = self._create_mock_persona(
            persona_id=1,
            system_prompt="Custom system prompt",
            replace_base_system_prompt=False,
        )
        chat_session = self._create_mock_chat_session(project=None)

        result = get_custom_agent_prompt(persona, chat_session)

        assert result == "Custom system prompt"

    def test_custom_persona_empty_string_system_prompt(self) -> None:
        """Test that custom persona with empty string system_prompt returns None."""
        persona = self._create_mock_persona(
            persona_id=1,
            system_prompt="",
            replace_base_system_prompt=False,
        )
        chat_session = self._create_mock_chat_session(project=None)

        result = get_custom_agent_prompt(persona, chat_session)

        assert result is None

    def test_custom_persona_none_system_prompt(self) -> None:
        """Test that custom persona with None system_prompt returns None."""
        persona = self._create_mock_persona(
            persona_id=1,
            system_prompt=None,
            replace_base_system_prompt=False,
        )
        chat_session = self._create_mock_chat_session(project=None)

        result = get_custom_agent_prompt(persona, chat_session)

        assert result is None

    def test_custom_persona_in_project_uses_persona_prompt(self) -> None:
        """Test that custom persona in a project uses persona's system_prompt, not project instructions."""
        persona = self._create_mock_persona(
            persona_id=1,
            system_prompt="Custom system prompt",
            replace_base_system_prompt=False,
        )
        project = self._create_mock_project(instructions="Project instructions")
        chat_session = self._create_mock_chat_session(project=project)

        result = get_custom_agent_prompt(persona, chat_session)

        # Should use persona's system_prompt, NOT project instructions
        assert result == "Custom system prompt"

    def test_custom_persona_replace_base_in_project(self) -> None:
        """Test that custom persona with replace_base_system_prompt=True in a project still returns None."""
        persona = self._create_mock_persona(
            persona_id=1,
            system_prompt="Custom system prompt",
            replace_base_system_prompt=True,
        )
        project = self._create_mock_project(instructions="Project instructions")
        chat_session = self._create_mock_chat_session(project=project)

        result = get_custom_agent_prompt(persona, chat_session)

        # Should return None because replace_base_system_prompt=True
        assert result is None


class TestBuildToolCallResponseHistoryMessage:
    def test_image_tool_uses_generated_images(self) -> None:
        message = _build_tool_call_response_history_message(
            tool_name="generate_image",
            generated_images=[{"file_id": "img-1", "revised_prompt": "p1"}],
            tool_call_response=None,
        )
        assert message == '[{"file_id": "img-1", "revised_prompt": "p1"}]'

    def test_non_image_tool_uses_placeholder(self) -> None:
        message = _build_tool_call_response_history_message(
            tool_name="web_search",
            generated_images=None,
            tool_call_response='{"raw":"value"}',
        )
        assert message == TOOL_CALL_RESPONSE_CROSS_MESSAGE


================================================
FILE: backend/tests/unit/onyx/chat/test_citation_processor.py
================================================
"""
Unit tests for DynamicCitationProcessor.

This module contains comprehensive tests for the DynamicCitationProcessor class,
which processes streaming tokens from LLMs to extract citations, remove citation
markers from output text, and emit CitationInfo objects.

Key features tested:
- Dynamic citation mapping updates
- Citation extraction and formatting
- Citation removal from output
- CitationInfo emission and tracking
- Edge cases (unicode, code blocks, invalid citations, etc.)
"""

from datetime import datetime

import pytest

from onyx.chat.citation_processor import CitationMapping
from onyx.chat.citation_processor import CitationMode
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.streaming_models import CitationInfo


# ============================================================================
# Helper Functions and Fixtures
# ============================================================================


def create_test_search_doc(
    document_id: str = "test-doc-1",
    link: str | None = "https://example.com/doc1",
    chunk_ind: int = 0,
    semantic_identifier: str = "Test Document",
    blurb: str = "Test blurb",
    source_type: DocumentSource = DocumentSource.WEB,
    boost: int = 1,
    hidden: bool = False,
    metadata: dict | None = None,
    score: float | None = None,
    match_highlights: list[str] | None = None,
) -> SearchDoc:
    """Create a test SearchDoc instance with default or custom values."""
    return SearchDoc(
        document_id=document_id,
        chunk_ind=chunk_ind,
        semantic_identifier=semantic_identifier,
        link=link,
        blurb=blurb,
        source_type=source_type,
        boost=boost,
        hidden=hidden,
        metadata=metadata or {},
        score=score,
        match_highlights=match_highlights or [],
        updated_at=datetime.now(),
    )


def process_tokens(
    processor: DynamicCitationProcessor, tokens: list[str | None]
) -> tuple[str, list[CitationInfo]]:
    """
    Process a list of tokens through the processor and collect results.

    Returns:
        Tuple of (output_text, citations) where:
        - output_text: All string outputs concatenated
        - citations: List of CitationInfo objects emitted
    """
    output_text = ""
    citations = []

    for token in tokens:
        for result in processor.process_token(token):
            if isinstance(result, str):
                output_text += result
            elif isinstance(result, CitationInfo):
                citations.append(result)

    # Flush remaining segment
    for result in processor.process_token(None):
        if isinstance(result, str):
            output_text += result
        elif isinstance(result, CitationInfo):
            citations.append(result)

    return output_text, citations


@pytest.fixture
def mock_search_docs() -> CitationMapping:
    """Create a dictionary of mock SearchDoc objects for testing."""
    return {
        1: create_test_search_doc(
            document_id="doc_1",
            link="https://example.com/doc1",
            semantic_identifier="Document 1",
        ),
        2: create_test_search_doc(
            document_id="doc_2",
            link="https://example.com/doc2",
            semantic_identifier="Document 2",
        ),
        3: create_test_search_doc(
            document_id="doc_3",
            link=None,  # No link
            semantic_identifier="Document 3",
        ),
        4: create_test_search_doc(
            document_id="doc_4",
            link="https://example.com/doc4",
            semantic_identifier="Document 4",
        ),
        5: create_test_search_doc(
            document_id="doc_5",
            link="https://example.com/doc5",
            semantic_identifier="Document 5",
        ),
    }


# ============================================================================
# Initialization Tests
# ============================================================================


def test_default_initialization() -> None:
    """Test default initialization of DynamicCitationProcessor."""
    processor = DynamicCitationProcessor()

    assert processor.citation_to_doc == {}
    assert processor.llm_out == ""
    assert processor.curr_segment == ""
    assert processor.hold == ""
    assert processor.cited_documents_in_order == []
    assert processor.cited_document_ids == set()
    assert processor.recent_cited_documents == set()
    assert processor.non_citation_count == 0


def test_initialization_with_custom_stop_stream() -> None:
    """Test initialization with custom stop_stream."""
    stop_stream = "STOP_TOKEN"
    processor = DynamicCitationProcessor(stop_stream=stop_stream)

    assert processor.stop_stream == stop_stream
    assert processor.citation_to_doc == {}


def test_initial_state_empty() -> None:
    """Test that initial state is empty and ready for use."""
    processor = DynamicCitationProcessor()

    assert processor.get_cited_documents() == []
    assert processor.get_cited_document_ids() == []
    assert processor.num_cited_documents == 0


# ============================================================================
# Citation Mapping Tests
# ============================================================================


def test_update_citation_mapping_single(mock_search_docs: CitationMapping) -> None:
    """Test updating citation mapping with a single mapping."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    assert len(processor.citation_to_doc) == 1
    assert processor.citation_to_doc[1] == mock_search_docs[1]
    assert processor.citation_to_doc[1].document_id == "doc_1"


def test_update_citation_mapping_multiple(
    mock_search_docs: CitationMapping,
) -> None:
    """Test updating citation mapping with multiple mappings."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    assert len(processor.citation_to_doc) == 3
    assert processor.citation_to_doc[1].document_id == "doc_1"
    assert processor.citation_to_doc[2].document_id == "doc_2"
    assert processor.citation_to_doc[3].document_id == "doc_3"


def test_update_citation_mapping_merges(mock_search_docs: CitationMapping) -> None:
    """Test that update_citation_mapping merges with existing mappings."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})
    processor.update_citation_mapping({2: mock_search_docs[2]})

    assert len(processor.citation_to_doc) == 2
    assert processor.citation_to_doc[1] == mock_search_docs[1]
    assert processor.citation_to_doc[2] == mock_search_docs[2]


def test_update_citation_mapping_ignores_duplicate_keys(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that update_citation_mapping ignores duplicate citation numbers.

    This behavior is intentional to handle cases like OpenURL reusing the same
    citation number as a Web Search result - we keep the first one registered.
    """
    processor = DynamicCitationProcessor()
    doc1 = mock_search_docs[1]
    doc2 = create_test_search_doc(
        document_id="doc_1_updated", link="https://updated.com"
    )

    processor.update_citation_mapping({1: doc1})
    processor.update_citation_mapping({1: doc2})

    # First citation should be kept, second one ignored
    assert len(processor.citation_to_doc) == 1
    assert processor.citation_to_doc[1].document_id == "doc_1"
    assert processor.citation_to_doc[1].link == "https://example.com/doc1"


# ============================================================================
# Basic Citation Processing Tests
# ============================================================================


def test_single_citation(mock_search_docs: CitationMapping) -> None:
    """Test processing a single citation [1]."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["Text [", "1", "] here."])

    # Raw citation pattern should be replaced with formatted version
    assert (
        "Text [" not in output
        or "Text [" in output
        and "[[1]](https://example.com/doc1)" in output
    )
    assert "here." in output
    assert len(citations) == 1
    assert citations[0].citation_number == 1
    assert citations[0].document_id == "doc_1"


def test_multiple_citations_comma_separated(
    mock_search_docs: CitationMapping,
) -> None:
    """Test processing multiple citations [1, 2, 3]."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    output, citations = process_tokens(
        processor, ["Text [", "1", ",", " ", "2", ",", "3", "] end."]
    )

    # Raw citation patterns should be replaced with formatted versions
    assert "[[1]](https://example.com/doc1)" in output
    assert "[[2]](https://example.com/doc2)" in output
    assert "[[3]]()" in output
    assert "end." in output
    assert len(citations) == 3
    assert {c.document_id for c in citations} == {"doc_1", "doc_2", "doc_3"}


def test_double_bracket_citation(mock_search_docs: CitationMapping) -> None:
    """Test processing double bracket citation [[1]]."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["Text [[", "1", "]] here."])

    # Double bracket citation should be replaced with formatted version
    assert "[[1]](https://example.com/doc1)" in output
    assert "here." in output
    assert len(citations) == 1
    assert citations[0].citation_number == 1


def test_citation_split_across_tokens(mock_search_docs: CitationMapping) -> None:
    """Test citation split across multiple tokens."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["[", "1", "]"])

    assert "[[1]](https://example.com/doc1)" in output
    assert len(citations) == 1


def test_citation_at_beginning(mock_search_docs: CitationMapping) -> None:
    """Test citation at the beginning of text."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["[", "1", "] Text here."])

    assert "[[1]](https://example.com/doc1)" in output
    assert "Text here." in output
    assert len(citations) == 1


def test_citation_at_end(mock_search_docs: CitationMapping) -> None:
    """Test citation at the end of text."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["Text here [", "1", "]"])

    assert "[[1]](https://example.com/doc1)" in output
    assert "Text here" in output
    assert len(citations) == 1


def test_citation_in_middle(mock_search_docs: CitationMapping) -> None:
    """Test citation in the middle of text."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["Start [", "1", "] end."])

    assert "[[1]](https://example.com/doc1)" in output
    assert "Start" in output and "end." in output
    assert len(citations) == 1


# ============================================================================
# Citation Formatting and Output Tests
# ============================================================================


def test_citation_removed_from_output(mock_search_docs: CitationMapping) -> None:
    """Test that citations are removed from output text."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, _ = process_tokens(processor, ["This is text [", "1", "] with citation."])

    # Raw citation should be replaced with formatted version
    assert "This is text [[1]](https://example.com/doc1) with citation." in output


def test_formatted_citation_yielded_separately(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that formatted citations are yielded separately."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    results = []
    for token in ["Text [", "1", "] here."]:
        for result in processor.process_token(token):
            results.append(result)

    # Should have text chunks and formatted citation
    text_results = [r for r in results if isinstance(r, str)]
    citation_results = [r for r in results if isinstance(r, CitationInfo)]

    assert len(citation_results) == 1
    assert any("[[1]](https://example.com/doc1)" in r for r in text_results)


def test_leading_space_with_existing_space(
    mock_search_docs: CitationMapping,
) -> None:
    """Test leading space handling when space already exists."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, _ = process_tokens(processor, ["Text ", "[", "1", "] here."])
    # Should not add extra space
    assert "Text " in output or "Text [[1]](https://example.com/doc1)" in output


def test_leading_space_without_existing_space(
    mock_search_docs: CitationMapping,
) -> None:
    """Test leading space handling when no space exists."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, _ = process_tokens(processor, ["Text[", "1", "] here."])

    # Should preserve order: text before citation, then citation with space added
    assert "Text [[1]](https://example.com/doc1) here." in output


def test_citation_with_link(mock_search_docs: CitationMapping) -> None:
    """Test citation formatting with link."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, _ = process_tokens(processor, ["Text [", "1", "]"])

    assert "Text [[1]](https://example.com/doc1)" in output


def test_citation_without_link(mock_search_docs: CitationMapping) -> None:
    """Test citation formatting without link."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({3: mock_search_docs[3]})  # doc_3 has no link

    output, _ = process_tokens(processor, ["Text [", "3", "]"])

    assert "Text [[3]]()" in output


def test_multiple_citations_in_sequence(mock_search_docs: CitationMapping) -> None:
    """Test multiple citations formatted in sequence."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    output, citations = process_tokens(
        processor, ["Text [", "1", "][", "2", "][", "3", "]"]
    )

    assert (
        "Text [[1]](https://example.com/doc1)[[2]](https://example.com/doc2)[[3]]()"
        in output
    )
    assert len(citations) == 3


# ============================================================================
# CitationInfo Emission Tests
# ============================================================================


def test_citation_info_emitted_for_new_citation(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that CitationInfo is emitted for new citations."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    _, citations = process_tokens(processor, ["Text [", "1", "]"])

    assert len(citations) == 1
    assert citations[0].citation_number == 1
    assert citations[0].document_id == "doc_1"


def test_citation_info_contains_correct_fields(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that CitationInfo contains correct citation_number and document_id."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})

    _, citations = process_tokens(processor, ["[", "1", "][", "2", "]"])

    assert len(citations) == 2
    citation_numbers = {c.citation_number for c in citations}
    document_ids = {c.document_id for c in citations}
    assert citation_numbers == {1, 2}
    assert document_ids == {"doc_1", "doc_2"}


def test_citation_info_deduplication_recent(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that recent citations don't emit CitationInfo."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    _, citations1 = process_tokens(processor, ["First [", "1", "]"])
    assert len(citations1) == 1

    # Same citation again immediately - should not emit CitationInfo
    _, citations2 = process_tokens(processor, ["Second [", "1", "]"])
    assert len(citations2) == 0  # No new CitationInfo


def test_citation_info_order_matches_first_citation(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that CitationInfo order matches first citation order."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    _, citations = process_tokens(processor, ["[", "3", "][", "1", "][", "2", "]"])

    # Order should be 3, 1, 2 (first citation order)
    assert len(citations) == 3
    assert citations[0].citation_number == 3
    assert citations[1].citation_number == 1
    assert citations[2].citation_number == 2


# ============================================================================
# Citation Order Tracking Tests
# ============================================================================


def test_get_cited_documents_order(mock_search_docs: CitationMapping) -> None:
    """Test that get_cited_documents returns documents in first citation order."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    process_tokens(processor, ["[", "3", "][", "1", "][", "2", "]"])

    cited_docs = processor.get_cited_documents()
    assert len(cited_docs) == 3
    assert cited_docs[0].document_id == "doc_3"
    assert cited_docs[1].document_id == "doc_1"
    assert cited_docs[2].document_id == "doc_2"


def test_get_cited_document_ids_order(mock_search_docs: CitationMapping) -> None:
    """Test that get_cited_document_ids returns IDs in correct order."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    process_tokens(processor, ["[", "2", "][", "1", "][", "3", "]"])

    doc_ids = processor.get_cited_document_ids()
    assert doc_ids == ["doc_2", "doc_1", "doc_3"]


def test_num_cited_documents_property(mock_search_docs: CitationMapping) -> None:
    """Test that num_cited_documents property returns correct count."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    assert processor.num_cited_documents == 0

    process_tokens(processor, ["[", "1", "]"])
    assert processor.num_cited_documents == 1

    process_tokens(processor, ["[", "2", "]"])
    assert processor.num_cited_documents == 2

    # Same document again shouldn't increase count
    process_tokens(processor, ["[", "1", "]"])
    assert processor.num_cited_documents == 2


def test_multiple_citations_same_document_no_duplicate(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that multiple citations of same document don't duplicate in order."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    process_tokens(processor, ["[", "1", "][", "1", "][", "1", "]"])

    cited_docs = processor.get_cited_documents()
    assert len(cited_docs) == 1
    assert cited_docs[0].document_id == "doc_1"


# ============================================================================
# Recent Citation Deduplication Tests
# ============================================================================


def test_recent_citations_no_citation_info(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that recent citations don't emit CitationInfo."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    _, citations1 = process_tokens(processor, ["First [", "1", "]"])
    assert len(citations1) == 1

    _, citations2 = process_tokens(processor, ["Second [", "1", "]"])
    assert len(citations2) == 0  # No CitationInfo for recent citation


def test_recent_citations_still_format_text(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that recent citations still format citation text."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output1, _ = process_tokens(processor, ["First [", "1", "]"])
    assert "[[1]](https://example.com/doc1)" in output1

    output2, _ = process_tokens(processor, ["Second [", "1", "]"])
    assert "[[1]](https://example.com/doc1)" in output2  # Still formatted


def test_reset_recent_citations(mock_search_docs: CitationMapping) -> None:
    """Test that reset_recent_citations clears recent tracker."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    _, citations1 = process_tokens(processor, ["First [", "1", "]"])
    assert len(citations1) == 1

    _, citations2 = process_tokens(processor, ["Second [", "1", "]"])
    assert len(citations2) == 0  # Recent citation

    processor.reset_recent_citations()

    _, citations3 = process_tokens(processor, ["Third [", "1", "]"])
    assert len(citations3) == 0  # Still no CitationInfo (already in cited_documents)


def test_non_citation_count_threshold_resets_recent(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that non-citation count threshold (5) resets recent citations."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    _, citations1 = process_tokens(processor, ["First [", "1", "]"])
    assert len(citations1) == 1

    # Add enough non-citation text to trigger reset (>5 chars)
    _, citations2 = process_tokens(processor, ["Second [", "1", "]"])
    assert len(citations2) == 0  # Recent citation

    # Add text with more than 5 non-citation characters
    _, citations3 = process_tokens(processor, ["Long text here [", "1", "]"])
    # After >5 non-citation chars, recent citations should be cleared
    # But since doc_1 is already in cited_documents, no new CitationInfo
    assert len(citations3) == 0


# ============================================================================
# Invalid Citation Handling Tests
# ============================================================================


def test_citation_not_in_mapping_skipped(
    mock_search_docs: CitationMapping, caplog: pytest.LogCaptureFixture
) -> None:
    """Test that citations with numbers not in mapping are skipped."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["Text [", "99", "] here."])

    assert "[99]" not in output  # Citation removed but not processed
    assert len(citations) == 0
    assert "Citation number 99 not found in mapping" in caplog.text


def test_invalid_citation_format_skipped(
    mock_search_docs: CitationMapping,
    caplog: pytest.LogCaptureFixture,  # noqa: ARG001
) -> None:
    """Test that invalid citation number formats are skipped."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    # This should not match the citation pattern, so it will be left as-is
    output, citations = process_tokens(processor, ["Text [", "abc", "] here."])

    assert len(citations) == 0
    assert "Text [abc] here." in output


def test_empty_citation_content_handled(mock_search_docs: CitationMapping) -> None:
    """Test that empty citation content is handled."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    # Empty citation like [,] should be handled - empty parts are skipped
    output, citations = process_tokens(processor, ["Text [", "1", ",", " ", "2", "]"])

    # Should process both citations, skipping empty parts
    assert len(citations) >= 1  # At least one valid citation


def test_citation_with_non_integer_skipped(
    mock_search_docs: CitationMapping,
    caplog: pytest.LogCaptureFixture,  # noqa: ARG001
) -> None:
    """Test that citations with non-integer content are skipped."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    # This won't match the pattern, but if it did, it would be skipped
    output, citations = process_tokens(processor, ["Text [", "1.5", "]"])

    # The pattern requires integers, so this won't match
    assert len(citations) == 0 or "[1.5]" in output


# ============================================================================
# Unicode Bracket Tests
# ============================================================================


def test_unicode_bracket_citation(mock_search_docs: CitationMapping) -> None:
    """Test processing unicode bracket citation 【1】."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["Text 【", "1", "】 here."])

    assert "【1】" not in output
    assert len(citations) == 1
    assert citations[0].citation_number == 1


def test_unicode_bracket_variant(mock_search_docs: CitationMapping) -> None:
    """Test processing unicode bracket variant ［1］."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["Text ［", "1", "］ here."])

    assert "［1］" not in output
    assert len(citations) == 1


def test_double_unicode_bracket_citation(
    mock_search_docs: CitationMapping,
) -> None:
    """Test processing double unicode bracket citation 【【1】】."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["Text 【【", "1", "】】 here."])

    assert "【【1】】" not in output
    assert len(citations) == 1


def test_mixed_ascii_unicode_brackets(mock_search_docs: CitationMapping) -> None:
    """Test mixed ASCII and unicode brackets."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})

    output, citations = process_tokens(
        processor, ["ASCII [", "1", "] unicode 【", "2", "】"]
    )

    assert "[[1]](https://example.com/doc1)" in output
    assert "[[2]](https://example.com/doc2)" in output
    assert len(citations) == 2


def test_unicode_brackets_split_across_tokens(
    mock_search_docs: CitationMapping,
) -> None:
    """Test unicode brackets split across tokens."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["【", "1", "】"])

    assert "【1】" not in output
    assert len(citations) == 1


# ============================================================================
# Code Block Handling Tests
# ============================================================================


def test_citation_inside_code_block_not_processed(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that citations inside code blocks are not processed."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    tokens: list[str | None] = [
        "Here's code:\n```\n",
        "def example():\n    print('[1]')\n",
        "```\n",
        "End.",
    ]
    output, citations = process_tokens(processor, tokens)

    # Citation inside code block should not be processed
    assert len(citations) == 0
    # Code block should have plaintext added
    assert "```plaintext" in output


def test_code_block_plaintext_added(
    mock_search_docs: CitationMapping,  # noqa: ARG001
) -> None:
    """Test that code blocks with ``` followed by \\n get 'plaintext' added."""
    processor = DynamicCitationProcessor()

    tokens: list[str | None] = ["Code:\n```\n", "def test():\n    pass\n", "```\n"]
    output, _ = process_tokens(processor, tokens)

    assert "```plaintext" in output


def test_citation_outside_code_block_processed(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that citations outside code blocks are processed normally."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    tokens: list[str | None] = [
        "Text [",
        "1",
        "] before code.\n```\n",
        "code here\n",
        "```\n",
        "Text [",
        "1",
        "] after code.",
    ]
    output, citations = process_tokens(processor, tokens)

    # Should have citations before and after code block
    # Same document, so only one CitationInfo (first citation)
    assert len(citations) == 1
    # Citations outside code block should be formatted
    assert "[[1]](https://example.com/doc1)" in output
    # Citation inside code block should remain as-is
    assert "code here" in output


def test_multiple_code_blocks(mock_search_docs: CitationMapping) -> None:
    """Test handling of multiple code blocks."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    tokens: list[str | None] = [
        "First block:\n```\n",
        "code1\n",
        "```\n",
        "Text [",
        "1",
        "]\n",
        "Second block:\n```\n",
        "code2\n",
        "```\n",
    ]
    output, citations = process_tokens(processor, tokens)

    assert "```plaintext" in output
    assert len(citations) == 1


# ============================================================================
# Stop Token Tests
# ============================================================================


def test_stop_token_detection_stops_processing() -> None:
    """Test that stop token detection stops processing."""
    stop_stream = "STOP"
    processor = DynamicCitationProcessor(stop_stream=stop_stream)

    results = []
    for token in ["Text ", "ST", "OP"]:
        for result in processor.process_token(token):
            results.append(result)

    # Try to add more text after stop token
    for result in processor.process_token(" more text"):
        results.append(result)

    # Processing should stop at STOP token - no results after STOP
    output = "".join(r for r in results if isinstance(r, str))
    # The stop token itself should not appear in output
    assert "STOP" not in output or output == ""


def test_partial_stop_token_held_back() -> None:
    """Test that partial stop token is held back."""
    stop_stream = "STOP"
    processor = DynamicCitationProcessor(stop_stream=stop_stream)

    results = []
    for token in ["Text ", "ST"]:
        for result in processor.process_token(token):
            results.append(result)

    # Partial stop token should be held back
    output = "".join(r for r in results if isinstance(r, str))
    # Should have "Text " but "ST" should be held
    assert "Text " in output or output == ""


def test_stop_token_at_different_positions() -> None:
    """Test stop token at different positions."""
    stop_stream = "END"

    # Stop token at beginning - when detected, processing stops for that token
    processor1 = DynamicCitationProcessor(stop_stream=stop_stream)
    results1 = []
    for token in ["END"]:
        for result in processor1.process_token(token):
            results1.append(result)
    # Stop token detection returns early, so no results
    output1 = "".join(r for r in results1 if isinstance(r, str))
    assert output1 == ""  # Stop token detected, no output

    # Stop token in middle - text before stop token is processed
    processor2 = DynamicCitationProcessor(stop_stream=stop_stream)
    results2 = []
    for token in ["Start ", "EN", "D"]:
        for result in processor2.process_token(token):
            results2.append(result)
    output2 = "".join(r for r in results2 if isinstance(r, str))
    # "Start " should be processed before stop token is detected
    assert "Start " in output2
    # Stop token "END" should not appear in output
    assert "END" not in output2


# ============================================================================
# Edge Cases
# ============================================================================


def test_empty_token_stream() -> None:
    """Test processing empty token stream."""
    processor = DynamicCitationProcessor()

    output, citations = process_tokens(processor, [])

    assert output == ""
    assert len(citations) == 0


def test_none_token_flushes_remaining_segment(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that None token (end of stream) flushes remaining segment."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    results = []
    for token in ["Remaining ", "text"]:
        for result in processor.process_token(token):
            results.append(result)

    # Flush with None
    for result in processor.process_token(None):
        results.append(result)

    output = "".join(r for r in results if isinstance(r, str))
    assert "Remaining text" in output


def test_very_long_citation_numbers(
    mock_search_docs: CitationMapping,  # noqa: ARG001
) -> None:
    """Test citations with very long citation numbers."""
    processor = DynamicCitationProcessor()
    # Create a doc with a high citation number
    doc_100 = create_test_search_doc(
        document_id="doc_100", link="https://example.com/doc100"
    )
    processor.update_citation_mapping({100: doc_100})

    output, citations = process_tokens(processor, ["Text [", "100", "]"])

    assert len(citations) == 1
    assert citations[0].citation_number == 100


def test_citations_with_extra_whitespace(
    mock_search_docs: CitationMapping,
) -> None:
    """Test citations with extra whitespace."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})

    # Extra whitespace in citation should be handled (stripped)
    output, citations = process_tokens(processor, ["Text [", "1", ",", " ", "2", "]"])

    assert len(citations) == 2
    assert "[[1]](https://example.com/doc1)" in output
    assert "[[2]](https://example.com/doc2)" in output


def test_consecutive_citations_no_text_between(
    mock_search_docs: CitationMapping,
) -> None:
    """Test consecutive citations without text between."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})

    output, citations = process_tokens(processor, ["[", "1", "][", "2", "]"])

    assert "[[1]](https://example.com/doc1)" in output
    assert "[[2]](https://example.com/doc2)" in output
    assert len(citations) == 2


def test_citations_at_stream_boundaries(mock_search_docs: CitationMapping) -> None:
    """Test citations at stream boundaries."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    # Citation split at very beginning
    output1, citations1 = process_tokens(processor, ["[", "1", "] text"])
    assert len(citations1) == 1
    assert "[[1]](https://example.com/doc1) text" in output1

    # Citation split at very end
    processor2 = DynamicCitationProcessor()
    processor2.update_citation_mapping({1: mock_search_docs[1]})
    output2, citations2 = process_tokens(processor2, ["text [", "1", "]"])
    assert len(citations2) == 1
    assert "text [[1]](https://example.com/doc1)" in output2


# ============================================================================
# Dynamic Mapping Updates Tests
# ============================================================================


def test_process_tokens_then_update_mapping(
    mock_search_docs: CitationMapping,
) -> None:
    """Test processing tokens, updating mapping, then continuing."""
    processor = DynamicCitationProcessor()

    # Process tokens before mapping is set
    output1, citations1 = process_tokens(processor, ["Text [", "1", "]"])
    assert len(citations1) == 0  # No mapping yet

    # Update mapping
    processor.update_citation_mapping({1: mock_search_docs[1]})

    # Continue processing
    output2, citations2 = process_tokens(processor, ["More text [", "1", "]"])
    assert len(citations2) == 1  # Now has mapping


def test_citations_before_mapping_skipped(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that citations before mapping update are skipped."""
    processor = DynamicCitationProcessor()

    output1, citations1 = process_tokens(processor, ["Text [", "1", "]"])
    assert len(citations1) == 0
    assert "[1]" not in output1  # Still removed from output

    processor.update_citation_mapping({1: mock_search_docs[1]})

    output2, citations2 = process_tokens(processor, ["More [", "1", "]"])
    assert len(citations2) == 1


def test_citations_after_mapping_processed(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that citations after mapping update are processed."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    output, citations = process_tokens(processor, ["Text [", "1", "]"])

    assert len(citations) == 1
    assert citations[0].document_id == "doc_1"


def test_multiple_mapping_updates_during_processing(
    mock_search_docs: CitationMapping,
) -> None:
    """Test multiple mapping updates during processing."""
    processor = DynamicCitationProcessor()

    # First mapping
    processor.update_citation_mapping({1: mock_search_docs[1]})
    output1, citations1 = process_tokens(processor, ["[", "1", "]"])
    assert len(citations1) == 1
    assert citations1[0].document_id == "doc_1"

    # Second mapping
    processor.update_citation_mapping({2: mock_search_docs[2]})
    output2, citations2 = process_tokens(processor, ["[", "2", "]"])
    assert len(citations2) == 1

    # Try to update existing citation number (should be ignored due to duplicate filtering)
    doc1_updated = create_test_search_doc(
        document_id="doc_1_updated", link="https://updated.com"
    )
    processor.update_citation_mapping({1: doc1_updated})
    output3, citations3 = process_tokens(processor, ["[", "1", "]"])
    # No new citation because citation 1 already exists and was already cited
    assert len(citations3) == 0
    # Original doc_1 should still be mapped
    assert processor.citation_to_doc[1].document_id == "doc_1"


# ============================================================================
# Integration Tests
# ============================================================================


def test_full_conversation_flow(mock_search_docs: CitationMapping) -> None:
    """Test full conversation flow with multiple turns."""
    processor = DynamicCitationProcessor()

    # Turn 1: Add some documents
    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})
    output1, citations1 = process_tokens(
        processor, ["This is the first response [", "1", "] with citation."]
    )
    assert len(citations1) == 1

    # Turn 2: Add more documents and continue
    processor.update_citation_mapping({3: mock_search_docs[3], 4: mock_search_docs[4]})
    output2, citations2 = process_tokens(
        processor, ["This is the second response [", "3", "][", "4", "]."]
    )
    assert len(citations2) == 2

    # Verify order - should be doc_1, doc_3, doc_4 (first citation order)
    cited_docs = processor.get_cited_documents()
    assert len(cited_docs) == 3  # doc_1, doc_3, doc_4 (doc_2 was never cited)
    assert cited_docs[0].document_id == "doc_1"
    assert cited_docs[1].document_id == "doc_3"
    assert cited_docs[2].document_id == "doc_4"


def test_complex_text_mixed_citations_code_blocks(
    mock_search_docs: CitationMapping,
) -> None:
    """Test complex text with mixed citations and code blocks."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    tokens: list[str | None] = [
        "Here's some text [",
        "1",
        "] with a citation.\n",
        "```\n",
        "def example():\n    print('code')\n",
        "```\n",
        "More text [",
        "2",
        ", ",
        "3",
        "] here.",
    ]
    output, citations = process_tokens(processor, tokens)

    # Citations should be formatted
    assert "[[1]](https://example.com/doc1)" in output
    assert "[[2]](https://example.com/doc2)" in output
    assert "[[3]]()" in output
    assert "```plaintext" in output
    assert len(citations) == 3


def test_real_world_citation_patterns(mock_search_docs: CitationMapping) -> None:
    """Test real-world citation patterns."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    # Simulate a realistic LLM response
    tokens: list[str | None] = [
        "According to recent research [",
        "1",
        "], the findings suggest that ",
        "multiple studies [",
        "2",
        ", ",
        "3",
        "] have confirmed these results. ",
        "However, some researchers [",
        "1",
        "] have raised concerns.",
    ]
    output, citations = process_tokens(processor, tokens)

    # Citations should be formatted
    assert "[[1]](https://example.com/doc1)" in output
    assert "[[2]](https://example.com/doc2)" in output
    assert "[[3]]()" in output
    # Should have CitationInfo for doc_1, doc_2, doc_3 (doc_1 appears twice but only one CitationInfo)
    assert len(citations) == 3
    # Verify order
    doc_ids = [c.document_id for c in citations]
    assert "doc_1" in doc_ids
    assert "doc_2" in doc_ids
    assert "doc_3" in doc_ids


# ============================================================================
# get_next_citation_number Tests
# ============================================================================


def test_get_next_citation_number_empty() -> None:
    """Test get_next_citation_number returns 1 when no citations exist."""
    processor = DynamicCitationProcessor()

    assert processor.get_next_citation_number() == 1


def test_get_next_citation_number_with_citations(
    mock_search_docs: CitationMapping,
) -> None:
    """Test get_next_citation_number returns max + 1 when citations exist."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})

    assert processor.get_next_citation_number() == 3


def test_get_next_citation_number_non_sequential(
    mock_search_docs: CitationMapping,
) -> None:
    """Test get_next_citation_number with non-sequential citation numbers."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 5: mock_search_docs[2], 10: mock_search_docs[3]}
    )

    # Should return max + 1 = 11
    assert processor.get_next_citation_number() == 11


def test_project_files_then_search_tool_citations(
    mock_search_docs: CitationMapping,
) -> None:
    """
    Test that project file citations don't conflict with search tool citations.

    """
    processor = DynamicCitationProcessor()

    # Simulate project files being added (numbered 1, 2, 3)
    project_file_1 = create_test_search_doc(
        document_id="project_file_1",
        link=None,
        semantic_identifier="ProjectFile1.txt",
        source_type=DocumentSource.FILE,
    )
    project_file_2 = create_test_search_doc(
        document_id="project_file_2",
        link=None,
        semantic_identifier="ProjectFile2.txt",
        source_type=DocumentSource.FILE,
    )
    project_file_3 = create_test_search_doc(
        document_id="project_file_3",
        link=None,
        semantic_identifier="ProjectFile3.txt",
        source_type=DocumentSource.FILE,
    )

    processor.update_citation_mapping(
        {1: project_file_1, 2: project_file_2, 3: project_file_3}
    )

    # Verify project files are registered
    assert processor.get_next_citation_number() == 4
    assert len(processor.citation_to_doc) == 3

    # Simulate search tool results starting at the next available number (4)
    starting_citation = processor.get_next_citation_number()
    search_result_1 = mock_search_docs[1]  # Will be citation 4
    search_result_2 = mock_search_docs[2]  # Will be citation 5

    processor.update_citation_mapping(
        {starting_citation: search_result_1, starting_citation + 1: search_result_2}
    )

    # Verify both project files and search results are registered
    assert len(processor.citation_to_doc) == 5
    assert processor.citation_to_doc[1].document_id == "project_file_1"
    assert processor.citation_to_doc[2].document_id == "project_file_2"
    assert processor.citation_to_doc[3].document_id == "project_file_3"
    assert processor.citation_to_doc[4].document_id == "doc_1"
    assert processor.citation_to_doc[5].document_id == "doc_2"

    # Verify all citations work
    output, citations = process_tokens(
        processor,
        [
            "Project [1], [2], [3] and search results [4], [5]",
        ],
    )

    assert "[[1]]" in output
    assert "[[2]]" in output
    assert "[[3]]" in output
    assert "[[4]](https://example.com/doc1)" in output
    assert "[[5]](https://example.com/doc2)" in output
    assert len(citations) == 5


def test_adding_project_files_across_messages(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that adding more project files in subsequent messages works correctly.

    Architecture note: Each message gets a fresh citation processor, so project files
    always start from citation 1. Each message maintains its own independent citation
    space, and old messages use their saved citation mappings for display.

    This test simulates:
    - Message 1: User has 3 project files + runs search
    - Message 2: User adds 2 MORE project files (now 5 total) + runs search
    Both messages should work independently without citation conflicts.
    """
    # ===== MESSAGE 1: 3 project files + search =====
    message1_processor = DynamicCitationProcessor()

    # Add 3 project files (citations 1, 2, 3)
    project_files_msg1 = {
        1: create_test_search_doc(
            document_id="project_file_1", link=None, source_type=DocumentSource.FILE
        ),
        2: create_test_search_doc(
            document_id="project_file_2", link=None, source_type=DocumentSource.FILE
        ),
        3: create_test_search_doc(
            document_id="project_file_3", link=None, source_type=DocumentSource.FILE
        ),
    }
    message1_processor.update_citation_mapping(project_files_msg1)

    # Run search tool (citations 4, 5)
    search_start_msg1 = message1_processor.get_next_citation_number()
    assert search_start_msg1 == 4
    message1_processor.update_citation_mapping(
        {
            4: mock_search_docs[1],
            5: mock_search_docs[2],
        }
    )

    # Verify Message 1 citations
    assert len(message1_processor.citation_to_doc) == 5
    assert message1_processor.citation_to_doc[1].document_id == "project_file_1"
    assert message1_processor.citation_to_doc[4].document_id == "doc_1"

    # ===== MESSAGE 2: 5 project files + search =====
    # Fresh processor for new message (simulates new run_llm_loop() call)
    message2_processor = DynamicCitationProcessor()

    # Add 5 project files (citations 1, 2, 3, 4, 5) - includes 2 NEW files
    project_files_msg2 = {
        1: create_test_search_doc(
            document_id="project_file_1", link=None, source_type=DocumentSource.FILE
        ),
        2: create_test_search_doc(
            document_id="project_file_2", link=None, source_type=DocumentSource.FILE
        ),
        3: create_test_search_doc(
            document_id="project_file_3", link=None, source_type=DocumentSource.FILE
        ),
        4: create_test_search_doc(
            document_id="project_file_4", link=None, source_type=DocumentSource.FILE
        ),  # NEW
        5: create_test_search_doc(
            document_id="project_file_5", link=None, source_type=DocumentSource.FILE
        ),  # NEW
    }
    message2_processor.update_citation_mapping(project_files_msg2)

    # Run search tool (citations 6, 7)
    search_start_msg2 = message2_processor.get_next_citation_number()
    assert search_start_msg2 == 6  # Starts after 5 project files
    message2_processor.update_citation_mapping(
        {
            6: mock_search_docs[3],
            7: mock_search_docs[4],
        }
    )

    # Verify Message 2 citations
    assert len(message2_processor.citation_to_doc) == 7
    assert message2_processor.citation_to_doc[1].document_id == "project_file_1"
    assert message2_processor.citation_to_doc[4].document_id == "project_file_4"  # NEW
    assert message2_processor.citation_to_doc[5].document_id == "project_file_5"  # NEW
    assert message2_processor.citation_to_doc[6].document_id == "doc_3"

    # Verify both messages maintain independent citation spaces
    # Message 1: Citation 4 = search result (doc_1)
    # Message 2: Citation 4 = project file (project_file_4)
    # This is correct - each message has its own citation space
    assert message1_processor.citation_to_doc[4].document_id == "doc_1"
    assert message2_processor.citation_to_doc[4].document_id == "project_file_4"


# ============================================================================
# get_seen_citations Tests
# ============================================================================


def test_get_seen_citations_empty() -> None:
    """Test get_seen_citations returns empty dict when no citations processed."""
    processor = DynamicCitationProcessor()

    seen = processor.get_seen_citations()
    assert seen == {}


def test_get_seen_citations_returns_correct_mapping(
    mock_search_docs: CitationMapping,
) -> None:
    """Test get_seen_citations returns correct citation number to SearchDoc mapping."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    process_tokens(processor, ["[", "1", "][", "3", "]"])  # Note: skipping [2]

    seen = processor.get_seen_citations()
    assert len(seen) == 2
    assert 1 in seen
    assert 3 in seen
    assert 2 not in seen  # Citation 2 was never encountered
    assert seen[1] == mock_search_docs[1]
    assert seen[3] == mock_search_docs[3]


def test_get_seen_citations_accumulates_across_calls(
    mock_search_docs: CitationMapping,
) -> None:
    """Test get_seen_citations accumulates citations across multiple process_token calls."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping(
        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
    )

    # First batch
    process_tokens(processor, ["[", "1", "]"])
    seen1 = processor.get_seen_citations()
    assert len(seen1) == 1
    assert 1 in seen1

    # Second batch
    process_tokens(processor, ["[", "2", "]"])
    seen2 = processor.get_seen_citations()
    assert len(seen2) == 2
    assert 1 in seen2
    assert 2 in seen2

    # Third batch
    process_tokens(processor, ["[", "3", "]"])
    seen3 = processor.get_seen_citations()
    assert len(seen3) == 3
    assert 1 in seen3
    assert 2 in seen3
    assert 3 in seen3


def test_get_seen_citations_same_citation_multiple_times(
    mock_search_docs: CitationMapping,
) -> None:
    """Test that citing the same document multiple times only adds it once to seen_citations."""
    processor = DynamicCitationProcessor()
    processor.update_citation_mapping({1: mock_search_docs[1]})

    # Cite [1] multiple times
    process_tokens(processor, ["[", "1", "][", "1", "][", "1", "]"])

    seen = processor.get_seen_citations()
    assert len(seen) == 1
    assert seen[1] == mock_search_docs[1]


def test_get_seen_citations_with_remove_mode(
    mock_search_docs: CitationMapping,
) -> None:
    """Test get_seen_citations works correctly with REMOVE mode."""
    processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})

    process_tokens(processor, ["[", "1", "][", "2", "]"])

    seen = processor.get_seen_citations()
    assert len(seen) == 2
    assert seen[1].document_id == "doc_1"
    assert seen[2].document_id == "doc_2"


def test_seen_citations_vs_cited_documents(
    mock_search_docs: CitationMapping,
) -> None:
    """Test the difference between seen_citations and cited_documents.

    seen_citations: citation number -> SearchDoc (tracks which citations were parsed)
    cited_documents: list of SearchDocs in first-citation order (for CitationInfo emission)
    """
    # With REMOVE mode, cited_documents won't be populated but seen_citations will be
    processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})

    process_tokens(processor, ["[", "1", "][", "2", "]"])

    # seen_citations should have both
    seen = processor.get_seen_citations()
    assert len(seen) == 2

    # cited_documents should be empty (because citation_mode=REMOVE)
    cited = processor.get_cited_documents()
    assert len(cited) == 0

    # Now test with HYPERLINK mode
    processor2 = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
    processor2.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})
    process_tokens(processor2, ["[", "1", "][", "2", "]"])

    # Both should be populated
    seen2 = processor2.get_seen_citations()
    assert len(seen2) == 2
    cited2 = processor2.get_cited_documents()
    assert len(cited2) == 2


# ============================================================================
# CitationMode Tests
# ============================================================================


class TestCitationModeRemove:
    """Tests for CitationMode.REMOVE - citations are completely removed from output."""

    def test_remove_mode_removes_citations_from_output(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that REMOVE mode removes citation markers from output."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["Text [", "1", "] here."])

        # Citation should be completely removed
        assert "[1]" not in output
        assert "[[1]]" not in output
        # Text should flow naturally
        assert "Text" in output
        assert "here." in output
        # No CitationInfo should be emitted
        assert len(citations) == 0

    def test_remove_mode_no_citation_info_emitted(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that REMOVE mode does not emit CitationInfo objects."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
        )

        output, citations = process_tokens(
            processor, ["Text [", "1", "][", "2", "][", "3", "]"]
        )

        # All citations should be removed
        assert "[1]" not in output
        assert "[2]" not in output
        assert "[3]" not in output
        # No CitationInfo should be emitted
        assert len(citations) == 0

    def test_remove_mode_tracks_seen_citations(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that REMOVE mode still tracks seen citations."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
        )

        process_tokens(processor, ["Text [", "1", "][", "2", "][", "3", "]"])

        # Seen citations should be tracked
        seen = processor.get_seen_citations()
        assert len(seen) == 3
        assert 1 in seen
        assert 2 in seen
        assert 3 in seen
        assert seen[1].document_id == "doc_1"

    def test_remove_mode_handles_double_space(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that REMOVE mode handles spacing correctly (no double spaces)."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["Text [", "1", "] more text."])

        # Should not have double space
        assert "Text  more" not in output

    def test_remove_mode_handles_punctuation_spacing(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that REMOVE mode handles spacing before punctuation correctly."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["Text [", "1", "]."])

        # Should not have space before period
        assert "Text ." not in output

    def test_remove_mode_with_multiple_citations_in_bracket(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode with comma-separated citations [1, 2, 3]."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
        )

        output, citations = process_tokens(
            processor, ["Text [", "1", ", ", "2", ", ", "3", "] end."]
        )

        # Citation should be removed
        assert "[1, 2, 3]" not in output
        # No CitationInfo emitted
        assert len(citations) == 0
        # But seen citations tracked
        seen = processor.get_seen_citations()
        assert len(seen) == 3

    def test_remove_mode_with_unicode_brackets(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode with unicode bracket citation 【1】."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["Text 【", "1", "】 here."])

        # Unicode citation should be removed
        assert "【1】" not in output
        assert len(citations) == 0
        assert len(processor.get_seen_citations()) == 1


class TestCitationModeKeepMarkers:
    """Tests for CitationMode.KEEP_MARKERS - original markers preserved unchanged."""

    def test_keep_markers_mode_preserves_original_citation(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that KEEP_MARKERS mode preserves original [1] format."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["Text [", "1", "] here."])

        # Original citation format should be preserved
        assert "[1]" in output
        # Should NOT have markdown link format
        assert "[[1]](https://example.com/doc1)" not in output
        # No CitationInfo should be emitted
        assert len(citations) == 0

    def test_keep_markers_mode_no_citation_info_emitted(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that KEEP_MARKERS mode does not emit CitationInfo objects."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
        )

        output, citations = process_tokens(
            processor, ["Text [", "1", "][", "2", "][", "3", "]"]
        )

        # Original citations should be preserved
        assert "[1]" in output
        assert "[2]" in output
        assert "[3]" in output
        # No CitationInfo should be emitted
        assert len(citations) == 0

    def test_keep_markers_mode_tracks_seen_citations(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that KEEP_MARKERS mode still tracks seen citations."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
        )

        process_tokens(processor, ["Text [", "1", "][", "2", "][", "3", "]"])

        # Seen citations should be tracked
        seen = processor.get_seen_citations()
        assert len(seen) == 3
        assert 1 in seen
        assert 2 in seen
        assert 3 in seen

    def test_keep_markers_mode_with_double_brackets(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test KEEP_MARKERS mode with double bracket citation [[1]]."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["Text [[", "1", "]] here."])

        # Original double bracket format should be preserved
        assert "[[1]]" in output
        # Should NOT have markdown link format
        assert "[[1]](https://example.com/doc1)" not in output
        # No CitationInfo should be emitted
        assert len(citations) == 0

    def test_keep_markers_mode_with_comma_separated_citations(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test KEEP_MARKERS mode with comma-separated citations [1, 2, 3]."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
        )

        output, citations = process_tokens(
            processor, ["Text [", "1", ", ", "2", ", ", "3", "] end."]
        )

        # Original format should be preserved
        assert "[1, 2, 3]" in output
        # No CitationInfo emitted
        assert len(citations) == 0
        # But seen citations tracked
        seen = processor.get_seen_citations()
        assert len(seen) == 3

    def test_keep_markers_mode_with_unicode_brackets(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test KEEP_MARKERS mode with unicode bracket citation 【1】."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["Text 【", "1", "】 here."])

        # Original unicode bracket format should be preserved
        assert "【1】" in output
        assert len(citations) == 0
        assert len(processor.get_seen_citations()) == 1

    def test_keep_markers_mode_preserves_spacing(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that KEEP_MARKERS mode preserves text spacing naturally."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["Text [", "1", "] more text."])

        # Text should flow naturally with citation
        assert "Text [1] more text." in output or "Text [1]more text." in output


class TestCitationModeHyperlink:
    """Tests for CitationMode.HYPERLINK - citations replaced with markdown links."""

    def test_hyperlink_mode_formats_citation_as_link(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that HYPERLINK mode formats citations as [[n]](url)."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["Text [", "1", "] here."])

        # Should have markdown link format
        assert "[[1]](https://example.com/doc1)" in output
        # Original format should be replaced
        assert "Text [1]" not in output or "[[1]]" in output
        # CitationInfo should be emitted
        assert len(citations) == 1
        assert citations[0].citation_number == 1
        assert citations[0].document_id == "doc_1"

    def test_hyperlink_mode_emits_citation_info(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that HYPERLINK mode emits CitationInfo objects."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
        )

        output, citations = process_tokens(
            processor, ["Text [", "1", "][", "2", "][", "3", "]"]
        )

        # All citations should be formatted
        assert "[[1]](https://example.com/doc1)" in output
        assert "[[2]](https://example.com/doc2)" in output
        assert "[[3]]()" in output
        # CitationInfo should be emitted for each
        assert len(citations) == 3
        citation_numbers = {c.citation_number for c in citations}
        assert citation_numbers == {1, 2, 3}

    def test_hyperlink_mode_tracks_seen_citations(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that HYPERLINK mode tracks seen citations."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2]}
        )

        process_tokens(processor, ["[", "1", "][", "2", "]"])

        # Seen citations should be tracked
        seen = processor.get_seen_citations()
        assert len(seen) == 2
        assert 1 in seen
        assert 2 in seen

    def test_hyperlink_mode_populates_cited_documents(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that HYPERLINK mode populates cited_documents in order."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
        )

        process_tokens(processor, ["[", "3", "][", "1", "][", "2", "]"])

        # cited_documents should be populated in first-citation order
        cited = processor.get_cited_documents()
        assert len(cited) == 3
        assert cited[0].document_id == "doc_3"
        assert cited[1].document_id == "doc_1"
        assert cited[2].document_id == "doc_2"

    def test_hyperlink_mode_is_default(self, mock_search_docs: CitationMapping) -> None:
        """Test that HYPERLINK mode is the default behavior."""
        processor = DynamicCitationProcessor()  # No citation_mode specified
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["Text [", "1", "]"])

        # Should behave like HYPERLINK mode
        assert "[[1]](https://example.com/doc1)" in output
        assert len(citations) == 1


class TestCitationModesWithCodeBlocks:
    """Tests for citation modes behavior with code blocks."""

    def test_remove_mode_ignores_citations_in_code_block(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that REMOVE mode doesn't process citations inside code blocks."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        tokens: list[str | None] = [
            "Here's code:\n```\n",
            "print('[1]')\n",
            "```\n",
            "End.",
        ]
        output, citations = process_tokens(processor, tokens)

        # Citation inside code block should be preserved
        assert "[1]" in output
        assert len(citations) == 0

    def test_keep_markers_mode_ignores_citations_in_code_block(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that KEEP_MARKERS mode doesn't process citations inside code blocks."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        tokens: list[str | None] = [
            "Here's code:\n```\n",
            "print('[1]')\n",
            "```\n",
            "End.",
        ]
        output, citations = process_tokens(processor, tokens)

        # Citation inside code block should be preserved
        assert "[1]" in output
        assert len(citations) == 0

    def test_hyperlink_mode_ignores_citations_in_code_block(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that HYPERLINK mode doesn't process citations inside code blocks."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        tokens: list[str | None] = [
            "Here's code:\n```\n",
            "print('[1]')\n",
            "```\n",
            "End.",
        ]
        output, citations = process_tokens(processor, tokens)

        # Citation inside code block should be preserved (not replaced with link)
        assert "[1]" in output
        # No CitationInfo emitted for citation in code block
        assert len(citations) == 0


# ============================================================================
# Edge Case Tests
# ============================================================================


class TestCitationModeEdgeCases:
    """Edge case tests for citation modes."""

    def test_remove_mode_citation_at_start_of_text(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode when citation is at the very start of text."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["[", "1", "] starts here."])

        assert "[1]" not in output
        assert "starts here." in output
        # Note: When citation is at start, the space after the citation is preserved
        # This is expected behavior - the spacing logic handles trailing spaces before
        # punctuation/space, but leading spaces after removed citations remain
        assert len(citations) == 0

    def test_remove_mode_citation_at_end_of_text(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode when citation is at the very end of text."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["ends here [", "1", "]"])

        assert "[1]" not in output
        assert "ends here" in output
        assert len(citations) == 0

    def test_remove_mode_multiple_consecutive_citations(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode with multiple consecutive citations."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}
        )

        output, citations = process_tokens(
            processor, ["Text [", "1", "][", "2", "][", "3", "] end."]
        )

        assert "[1]" not in output
        assert "[2]" not in output
        assert "[3]" not in output
        assert "Text" in output
        assert "end." in output
        # Should track all citations
        assert len(processor.get_seen_citations()) == 3

    def test_remove_mode_citation_followed_by_newline(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode when citation is followed by newline."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["Text [", "1", "]\nNew line."])

        assert "[1]" not in output
        assert "Text" in output
        assert "New line." in output

    def test_remove_mode_only_citations_no_other_text(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode when text is only citations."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2]}
        )

        output, citations = process_tokens(processor, ["[", "1", "][", "2", "]"])

        # Should still track citations even though output is mostly empty
        assert len(processor.get_seen_citations()) == 2
        assert len(citations) == 0

    def test_keep_markers_mode_citation_at_start(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test KEEP_MARKERS mode when citation is at the start."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["[", "1", "] starts here."])

        assert "[1]" in output
        assert "starts here." in output
        assert len(citations) == 0

    def test_hyperlink_mode_citation_with_special_chars_in_url(
        self,
        mock_search_docs: CitationMapping,  # noqa: ARG002
    ) -> None:
        """Test HYPERLINK mode with special characters in URL."""
        special_doc = create_test_search_doc(
            document_id="special_doc",
            link="https://example.com/doc?param=value&other=123#section",
        )
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping({1: special_doc})

        output, citations = process_tokens(processor, ["Text [", "1", "] here."])

        assert "[[1]](https://example.com/doc?param=value&other=123#section)" in output
        assert len(citations) == 1

    def test_hyperlink_mode_citation_with_no_url(
        self,
        mock_search_docs: CitationMapping,  # noqa: ARG002
    ) -> None:
        """Test HYPERLINK mode when document has no URL."""
        no_url_doc = create_test_search_doc(
            document_id="no_url_doc",
            link=None,
        )
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping({1: no_url_doc})

        output, citations = process_tokens(processor, ["Text [", "1", "] here."])

        # Should still format but with empty link
        assert "[[1]]()" in output
        assert len(citations) == 1

    def test_all_modes_with_citation_in_parentheses(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test all modes with citation inside parentheses (see [1])."""
        for mode in [
            CitationMode.REMOVE,
            CitationMode.KEEP_MARKERS,
            CitationMode.HYPERLINK,
        ]:
            processor = DynamicCitationProcessor(citation_mode=mode)
            processor.update_citation_mapping({1: mock_search_docs[1]})

            output, _ = process_tokens(processor, ["(see [", "1", "])"])

            if mode == CitationMode.REMOVE:
                assert "[1]" not in output
            elif mode == CitationMode.KEEP_MARKERS:
                assert "[1]" in output
            else:  # HYPERLINK
                assert "[[1]]" in output

    def test_all_modes_with_citation_after_comma(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test all modes with citation after comma."""
        for mode in [
            CitationMode.REMOVE,
            CitationMode.KEEP_MARKERS,
            CitationMode.HYPERLINK,
        ]:
            processor = DynamicCitationProcessor(citation_mode=mode)
            processor.update_citation_mapping({1: mock_search_docs[1]})

            output, _ = process_tokens(processor, ["First,[", "1", "] second."])

            if mode == CitationMode.REMOVE:
                assert "[1]" not in output
            elif mode == CitationMode.KEEP_MARKERS:
                assert "[1]" in output
            else:  # HYPERLINK
                assert "[[1]]" in output

    def test_remove_mode_handles_tab_character(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode handles tab character before citation."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["Text\t[", "1", "] more."])

        assert "[1]" not in output
        # Tab should be handled appropriately

    def test_citation_number_zero(
        self,
        mock_search_docs: CitationMapping,  # noqa: ARG002
    ) -> None:
        """Test handling of citation number 0."""
        zero_doc = create_test_search_doc(
            document_id="zero_doc", link="https://zero.com"
        )
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping({0: zero_doc})

        output, citations = process_tokens(processor, ["Text [", "0", "] here."])

        assert "[[0]](https://zero.com)" in output
        assert len(citations) == 1
        assert citations[0].citation_number == 0

    def test_large_citation_numbers(
        self,
        mock_search_docs: CitationMapping,  # noqa: ARG002
    ) -> None:
        """Test handling of large citation numbers."""
        large_doc = create_test_search_doc(
            document_id="large_doc", link="https://large.com"
        )
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping({9999: large_doc})

        output, citations = process_tokens(processor, ["Text [", "9999", "] here."])

        assert "[[9999]](https://large.com)" in output
        assert len(citations) == 1
        assert citations[0].citation_number == 9999

    def test_negative_citation_number_not_processed(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that negative numbers in brackets are not processed as citations."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        # Negative numbers should not be treated as citations
        output, citations = process_tokens(
            processor, ["Array index [-", "1", "] here."]
        )

        # Should not be processed as citation (no mapping for -1)
        assert len(citations) == 0

    def test_mixed_valid_invalid_citations_in_sequence(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test processing mix of valid and invalid citations."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 3: mock_search_docs[3]}
        )

        # Citation 2 is not in mapping
        output, citations = process_tokens(
            processor, ["Text [", "1", "][", "2", "][", "3", "] end."]
        )

        # Should process 1 and 3, skip 2
        assert "[[1]]" in output
        assert "[[3]]" in output
        assert len(citations) == 2
        # 2 should not be in seen citations since it's not in mapping
        seen = processor.get_seen_citations()
        assert 1 in seen
        assert 2 not in seen
        assert 3 in seen

    def test_empty_token_stream(self) -> None:
        """Test processing empty token stream."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)

        output, citations = process_tokens(processor, [])

        assert output == ""
        assert len(citations) == 0

    def test_only_none_token(self) -> None:
        """Test processing only None token (flush signal)."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)

        output, citations = process_tokens(processor, [None])

        assert output == ""
        assert len(citations) == 0

    def test_whitespace_only_tokens(self, mock_search_docs: CitationMapping) -> None:
        """Test processing whitespace-only tokens between citations."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping(
            {1: mock_search_docs[1], 2: mock_search_docs[2]}
        )

        output, citations = process_tokens(
            processor, ["[", "1", "]", "   ", "[", "2", "]"]
        )

        assert "[[1]]" in output
        assert "[[2]]" in output
        assert len(citations) == 2

    def test_unicode_text_around_citations(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test citations surrounded by unicode text."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(
            processor, ["日本語テキスト [", "1", "] 続きのテキスト"]
        )

        assert "[[1]]" in output
        assert "日本語テキスト" in output
        assert "続きのテキスト" in output
        assert len(citations) == 1

    def test_emoji_around_citations(self, mock_search_docs: CitationMapping) -> None:
        """Test citations surrounded by emoji."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(
            processor, ["Great! 🎉 [", "1", "] Amazing! 🚀"]
        )

        assert "[[1]]" in output
        assert "🎉" in output
        assert "🚀" in output
        assert len(citations) == 1


class TestCitationModeWithDifferentProcessors:
    """Test using multiple processors with different modes."""

    def test_separate_processors_different_modes(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test using separate processors with different citation modes."""
        # Processor 1: HYPERLINK mode
        processor1 = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor1.update_citation_mapping({1: mock_search_docs[1]})
        output1, citations1 = process_tokens(processor1, ["Text [", "1", "]"])
        assert "[[1]](https://example.com/doc1)" in output1
        assert len(citations1) == 1

        # Processor 2: KEEP_MARKERS mode
        processor2 = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor2.update_citation_mapping({1: mock_search_docs[1]})
        output2, citations2 = process_tokens(processor2, ["Text [", "1", "]"])
        assert "[1]" in output2
        assert "[[1]]" not in output2
        assert len(citations2) == 0

        # Processor 3: REMOVE mode
        processor3 = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor3.update_citation_mapping({1: mock_search_docs[1]})
        output3, citations3 = process_tokens(processor3, ["Text [", "1", "]"])
        assert "[1]" not in output3
        assert len(citations3) == 0

        # All should track seen citations
        assert len(processor1.get_seen_citations()) == 1
        assert len(processor2.get_seen_citations()) == 1
        assert len(processor3.get_seen_citations()) == 1

    def test_processors_do_not_share_state(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that separate processors do not share state."""
        processor1 = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor1.update_citation_mapping({1: mock_search_docs[1]})
        process_tokens(processor1, ["[", "1", "]"])

        processor2 = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)
        processor2.update_citation_mapping({2: mock_search_docs[2]})
        process_tokens(processor2, ["[", "2", "]"])

        # Each processor should only have its own citations
        assert 1 in processor1.get_seen_citations()
        assert 2 not in processor1.get_seen_citations()
        assert 2 in processor2.get_seen_citations()
        assert 1 not in processor2.get_seen_citations()


class TestRemoveModeSpacingEdgeCases:
    """Detailed spacing edge cases for REMOVE mode."""

    def test_remove_mode_citation_between_sentences(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode with citation between sentences."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(
            processor, ["First sentence. [", "1", "] Second sentence."]
        )

        assert "[1]" not in output
        assert "First sentence." in output
        assert "Second sentence." in output

    def test_remove_mode_citation_before_question_mark(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode with citation before question mark."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["Is this true [", "1", "]?"])

        assert "[1]" not in output
        # Should not have space before question mark
        assert "true ?" not in output

    def test_remove_mode_citation_before_exclamation(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode with citation before exclamation mark."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["Amazing [", "1", "]!"])

        assert "[1]" not in output
        # Should not have space before exclamation
        assert "Amazing !" not in output

    def test_remove_mode_citation_before_semicolon(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode with citation before semicolon."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["First part [", "1", "]; second part."])

        assert "[1]" not in output
        # Should not have space before semicolon
        assert "part ;" not in output

    def test_remove_mode_citation_before_closing_paren(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode with citation before closing parenthesis."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["(see this [", "1", "])"])

        assert "[1]" not in output
        # Should not have space before closing paren
        assert "this )" not in output

    def test_remove_mode_citation_before_closing_bracket(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test REMOVE mode with citation before closing bracket."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, _ = process_tokens(processor, ["[see this [", "1", "]]"])

        assert "[[1]]" not in output


class TestKeepMarkersEdgeCases:
    """Edge cases specific to KEEP_MARKERS mode."""

    def test_keep_markers_exact_text_preservation(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test that KEEP_MARKERS preserves exact original text."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        original_text = "The result [1] shows improvement."
        tokens: list[str | None] = list(
            original_text
        )  # Split into individual characters
        output, _ = process_tokens(processor, tokens)

        # Should preserve the exact text
        assert "[1]" in output

    def test_keep_markers_with_citation_not_in_mapping(
        self, mock_search_docs: CitationMapping
    ) -> None:
        """Test KEEP_MARKERS with citation number not in mapping."""
        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
        processor.update_citation_mapping({1: mock_search_docs[1]})

        output, citations = process_tokens(processor, ["Text [", "99", "] here."])

        # Citation 99 is not in mapping, but text should still be preserved
        # (behavior depends on implementation - citation may be kept or removed)
        assert len(citations) == 0
        # Should not be in seen citations
        assert 99 not in processor.get_seen_citations()


================================================
FILE: backend/tests/unit/onyx/chat/test_citation_utils.py
================================================
"""
Unit tests for citation_utils module.

This module tests the collapse_citations function which renumbers citations
in text to use the smallest possible numbers while respecting existing mappings.
"""

from datetime import datetime

from onyx.chat.citation_processor import CitationMapping
from onyx.chat.citation_utils import collapse_citations
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import SearchDoc


# ============================================================================
# Helper Functions
# ============================================================================


def create_test_search_doc(
    document_id: str = "test-doc-1",
    link: str | None = "https://example.com/doc1",
    chunk_ind: int = 0,
    semantic_identifier: str = "Test Document",
    blurb: str = "Test blurb",
    source_type: DocumentSource = DocumentSource.WEB,
    boost: int = 1,
    hidden: bool = False,
    metadata: dict | None = None,
    score: float | None = None,
    match_highlights: list[str] | None = None,
) -> SearchDoc:
    """Create a test SearchDoc instance with default or custom values."""
    return SearchDoc(
        document_id=document_id,
        chunk_ind=chunk_ind,
        semantic_identifier=semantic_identifier,
        link=link,
        blurb=blurb,
        source_type=source_type,
        boost=boost,
        hidden=hidden,
        metadata=metadata or {},
        score=score,
        match_highlights=match_highlights or [],
        updated_at=datetime.now(),
    )


# ============================================================================
# Basic Functionality Tests
# ============================================================================


class TestCollapseCitationsBasic:
    """Basic functionality tests for collapse_citations."""

    def test_empty_text_and_mappings(self) -> None:
        """Test with empty text and empty mappings."""
        text, mapping = collapse_citations("", {}, {})
        assert text == ""
        assert mapping == {}

    def test_text_without_citations(self) -> None:
        """Test text without any citations remains unchanged."""
        input_text = "This is some text without any citations."
        text, mapping = collapse_citations(input_text, {}, {})
        assert text == input_text
        assert mapping == {}

    def test_empty_existing_mapping_starts_from_one(self) -> None:
        """Test that with empty existing mapping, new citations start from 1."""
        doc1 = create_test_search_doc(document_id="doc_50")
        doc2 = create_test_search_doc(document_id="doc_60")
        new_mapping: CitationMapping = {50: doc1, 60: doc2}

        text, mapping = collapse_citations("See [50] and [60].", {}, new_mapping)

        # Should start from 1 when existing mapping is empty
        assert text == "See [1] and [2]."
        assert set(mapping.keys()) == {1, 2}
        assert mapping[1].document_id == "doc_50"
        assert mapping[2].document_id == "doc_60"

    def test_single_citation_no_existing(self) -> None:
        """Test collapsing a single citation with no existing mappings."""
        doc = create_test_search_doc(document_id="doc_25")
        new_mapping: CitationMapping = {25: doc}

        text, mapping = collapse_citations("See [25] for details.", {}, new_mapping)

        assert text == "See [1] for details."
        assert 1 in mapping
        assert mapping[1].document_id == "doc_25"
        assert len(mapping) == 1

    def test_multiple_citations_no_existing(self) -> None:
        """Test collapsing multiple citations with no existing mappings."""
        doc1 = create_test_search_doc(document_id="doc_100")
        doc2 = create_test_search_doc(document_id="doc_200")
        doc3 = create_test_search_doc(document_id="doc_300")
        new_mapping: CitationMapping = {100: doc1, 200: doc2, 300: doc3}

        text, mapping = collapse_citations(
            "See [100], [200], and [300].", {}, new_mapping
        )

        assert text == "See [1], [2], and [3]."
        assert mapping[1].document_id == "doc_100"
        assert mapping[2].document_id == "doc_200"
        assert mapping[3].document_id == "doc_300"
        assert len(mapping) == 3


class TestCollapseCitationsWithExisting:
    """Tests for collapse_citations with existing citation mappings."""

    def test_continues_from_existing_mapping(self) -> None:
        """Test that new citations start from the next available number."""
        existing_doc = create_test_search_doc(document_id="existing_doc")
        existing_mapping: CitationMapping = {1: existing_doc}

        new_doc = create_test_search_doc(document_id="new_doc")
        new_mapping: CitationMapping = {50: new_doc}

        text, mapping = collapse_citations(
            "See [50] for more.", existing_mapping, new_mapping
        )

        assert text == "See [2] for more."
        assert 1 in mapping
        assert 2 in mapping
        assert mapping[1].document_id == "existing_doc"
        assert mapping[2].document_id == "new_doc"
        assert len(mapping) == 2

    def test_reuses_existing_citation_for_same_document(self) -> None:
        """Test that citations to existing documents use the existing number."""
        doc = create_test_search_doc(document_id="shared_doc")
        existing_mapping: CitationMapping = {1: doc}

        # Same document referenced with a different citation number
        new_doc = create_test_search_doc(document_id="shared_doc")
        new_mapping: CitationMapping = {50: new_doc}

        text, mapping = collapse_citations(
            "See [50] again.", existing_mapping, new_mapping
        )

        assert text == "See [1] again."
        assert len(mapping) == 1
        assert mapping[1].document_id == "shared_doc"

    def test_mixed_existing_and_new_documents(self) -> None:
        """Test with a mix of existing and new documents."""
        existing_doc1 = create_test_search_doc(document_id="doc_a")
        existing_doc2 = create_test_search_doc(document_id="doc_b")
        existing_mapping: CitationMapping = {1: existing_doc1, 2: existing_doc2}

        # 30 refers to existing doc_a, 31 is new, 32 refers to existing doc_b
        new_doc_a = create_test_search_doc(document_id="doc_a")
        new_doc_c = create_test_search_doc(document_id="doc_c")
        new_doc_b = create_test_search_doc(document_id="doc_b")
        new_mapping: CitationMapping = {30: new_doc_a, 31: new_doc_c, 32: new_doc_b}

        text, mapping = collapse_citations(
            "Refs: [30], [31], [32].", existing_mapping, new_mapping
        )

        # [30] -> [1] (doc_a exists as 1)
        # [31] -> [3] (doc_c is new, next available)
        # [32] -> [2] (doc_b exists as 2)
        assert text == "Refs: [1], [3], [2]."
        assert len(mapping) == 3
        assert mapping[1].document_id == "doc_a"
        assert mapping[2].document_id == "doc_b"
        assert mapping[3].document_id == "doc_c"

    def test_existing_mapping_unchanged(self) -> None:
        """Test that existing mapping values are not modified."""
        existing_doc = create_test_search_doc(
            document_id="existing", link="https://existing.com"
        )
        existing_mapping: CitationMapping = {5: existing_doc}

        new_doc = create_test_search_doc(document_id="new_doc")
        new_mapping: CitationMapping = {100: new_doc}

        text, mapping = collapse_citations("[100]", existing_mapping, new_mapping)

        # Existing mapping should be preserved with its original key
        assert 5 in mapping
        assert mapping[5].document_id == "existing"
        assert mapping[5].link == "https://existing.com"
        # New citation should get next available number (6)
        assert 6 in mapping
        assert mapping[6].document_id == "new_doc"


class TestCollapseCitationsMultipleCitations:
    """Tests for multiple citation formats and edge cases."""

    def test_same_citation_multiple_times(self) -> None:
        """Test the same citation appearing multiple times in text."""
        doc = create_test_search_doc(document_id="doc_25")
        new_mapping: CitationMapping = {25: doc}

        text, mapping = collapse_citations(
            "[25] says X. Also [25] says Y.", {}, new_mapping
        )

        assert text == "[1] says X. Also [1] says Y."
        assert len(mapping) == 1
        assert mapping[1].document_id == "doc_25"

    def test_comma_separated_citations(self) -> None:
        """Test comma-separated citations like [1, 2, 3]."""
        doc1 = create_test_search_doc(document_id="doc_10")
        doc2 = create_test_search_doc(document_id="doc_20")
        new_mapping: CitationMapping = {10: doc1, 20: doc2}

        text, mapping = collapse_citations("[10, 20]", {}, new_mapping)

        assert text == "[1, 2]"
        assert len(mapping) == 2

    def test_double_bracket_citations(self) -> None:
        """Test double bracket citations like [[25]]."""
        doc = create_test_search_doc(document_id="doc_25")
        new_mapping: CitationMapping = {25: doc}

        text, mapping = collapse_citations("See [[25]] for info.", {}, new_mapping)

        assert text == "See [[1]] for info."
        assert mapping[1].document_id == "doc_25"

    def test_same_doc_different_old_numbers(self) -> None:
        """Test same document appearing with different citation numbers."""
        doc = create_test_search_doc(document_id="same_doc")
        # Same document with two different citation numbers
        new_mapping: CitationMapping = {
            50: doc,
            60: create_test_search_doc(document_id="same_doc"),
        }

        text, mapping = collapse_citations("[50] and [60]", {}, new_mapping)

        # Both should map to the same new number
        assert text == "[1] and [1]"
        assert len(mapping) == 1
        assert mapping[1].document_id == "same_doc"


class TestCollapseCitationsUnicodeBrackets:
    """Tests for unicode bracket variants."""

    def test_unicode_brackets_chinese(self) -> None:
        """Test Chinese-style brackets 【】."""
        doc = create_test_search_doc(document_id="doc_25")
        new_mapping: CitationMapping = {25: doc}

        text, mapping = collapse_citations("See 【25】 for details.", {}, new_mapping)

        assert text == "See 【1】 for details."
        assert mapping[1].document_id == "doc_25"

    def test_unicode_brackets_fullwidth(self) -> None:
        """Test fullwidth brackets ［］."""
        doc = create_test_search_doc(document_id="doc_25")
        new_mapping: CitationMapping = {25: doc}

        text, mapping = collapse_citations("See ［25］ for details.", {}, new_mapping)

        assert text == "See ［1］ for details."
        assert mapping[1].document_id == "doc_25"

    def test_double_unicode_brackets(self) -> None:
        """Test double unicode brackets 【【25】】."""
        doc = create_test_search_doc(document_id="doc_25")
        new_mapping: CitationMapping = {25: doc}

        text, mapping = collapse_citations("See 【【25】】 for info.", {}, new_mapping)

        assert text == "See 【【1】】 for info."
        assert mapping[1].document_id == "doc_25"


class TestCollapseCitationsEdgeCases:
    """Edge case tests for collapse_citations."""

    def test_citation_not_in_mapping(self) -> None:
        """Test citations in text that aren't in the new mapping are preserved."""
        doc = create_test_search_doc(document_id="doc_25")
        new_mapping: CitationMapping = {25: doc}

        # [99] is not in the mapping, should remain unchanged
        text, mapping = collapse_citations("[25] and [99]", {}, new_mapping)

        assert text == "[1] and [99]"
        assert len(mapping) == 1

    def test_non_sequential_existing_mapping(self) -> None:
        """Test with non-sequential existing mapping numbers."""
        existing_mapping: CitationMapping = {
            5: create_test_search_doc(document_id="doc_5"),
            10: create_test_search_doc(document_id="doc_10"),
        }

        new_doc = create_test_search_doc(document_id="new_doc")
        new_mapping: CitationMapping = {99: new_doc}

        text, mapping = collapse_citations("[99]", existing_mapping, new_mapping)

        # Next available should be max(5, 10) + 1 = 11
        assert text == "[11]"
        assert 5 in mapping
        assert 10 in mapping
        assert 11 in mapping
        assert len(mapping) == 3

    def test_preserves_text_around_citations(self) -> None:
        """Test that text around citations is preserved exactly."""
        doc = create_test_search_doc(document_id="doc_1")
        new_mapping: CitationMapping = {100: doc}

        input_text = "According to the source [100], this is true.\n\nNext paragraph."
        text, mapping = collapse_citations(input_text, {}, new_mapping)

        assert text == "According to the source [1], this is true.\n\nNext paragraph."

    def test_citation_at_start_of_text(self) -> None:
        """Test citation at the very start of text."""
        doc = create_test_search_doc(document_id="doc_1")
        new_mapping: CitationMapping = {50: doc}

        text, mapping = collapse_citations("[50] is the answer.", {}, new_mapping)

        assert text == "[1] is the answer."

    def test_citation_at_end_of_text(self) -> None:
        """Test citation at the very end of text."""
        doc = create_test_search_doc(document_id="doc_1")
        new_mapping: CitationMapping = {50: doc}

        text, mapping = collapse_citations("The answer is [50]", {}, new_mapping)

        assert text == "The answer is [1]"

    def test_adjacent_citations(self) -> None:
        """Test citations immediately adjacent to each other."""
        doc1 = create_test_search_doc(document_id="doc_1")
        doc2 = create_test_search_doc(document_id="doc_2")
        new_mapping: CitationMapping = {50: doc1, 60: doc2}

        text, mapping = collapse_citations("[50][60]", {}, new_mapping)

        assert text == "[1][2]"

    def test_empty_new_mapping_with_existing(self) -> None:
        """Test with existing mapping but no new citations to process."""
        existing_doc = create_test_search_doc(document_id="existing")
        existing_mapping: CitationMapping = {1: existing_doc}

        text, mapping = collapse_citations("No citations here.", existing_mapping, {})

        assert text == "No citations here."
        assert mapping == existing_mapping


class TestCollapseCitationsOrdering:
    """Tests for citation ordering behavior."""

    def test_assigns_numbers_in_order_of_appearance(self) -> None:
        """Test that new numbers are assigned based on order in new_mapping iteration."""
        doc1 = create_test_search_doc(document_id="doc_a")
        doc2 = create_test_search_doc(document_id="doc_b")
        doc3 = create_test_search_doc(document_id="doc_c")
        # Note: dict order is preserved in Python 3.7+
        new_mapping: CitationMapping = {300: doc1, 100: doc2, 200: doc3}

        text, mapping = collapse_citations("[300] [100] [200]", {}, new_mapping)

        # The mapping iteration order determines assignment:
        # 300 -> 1 (first in new_mapping)
        # 100 -> 2 (second in new_mapping)
        # 200 -> 3 (third in new_mapping)
        assert mapping[1].document_id == "doc_a"
        assert mapping[2].document_id == "doc_b"
        assert mapping[3].document_id == "doc_c"
        assert text == "[1] [2] [3]"

    def test_multiple_existing_citations_preserved(self) -> None:
        """Test that all existing citations are preserved in output mapping."""
        existing_mapping: CitationMapping = {
            1: create_test_search_doc(document_id="doc_1"),
            2: create_test_search_doc(document_id="doc_2"),
            3: create_test_search_doc(document_id="doc_3"),
        }

        new_doc = create_test_search_doc(document_id="new_doc")
        new_mapping: CitationMapping = {99: new_doc}

        text, mapping = collapse_citations("[99]", existing_mapping, new_mapping)

        assert text == "[4]"
        # All existing plus the new one
        assert len(mapping) == 4
        assert mapping[1].document_id == "doc_1"
        assert mapping[2].document_id == "doc_2"
        assert mapping[3].document_id == "doc_3"
        assert mapping[4].document_id == "new_doc"


class TestCollapseCitationsComplexScenarios:
    """Complex real-world scenario tests."""

    def test_research_agent_scenario(self) -> None:
        """Test a realistic research agent scenario with multiple tool calls."""
        # First search returned citations 1-5
        existing_mapping: CitationMapping = {
            1: create_test_search_doc(document_id="wiki_python"),
            2: create_test_search_doc(document_id="docs_typing"),
            3: create_test_search_doc(document_id="blog_best_practices"),
        }

        # Second search returned citations starting at 100 (to avoid conflicts)
        # Some docs are the same as before
        new_mapping: CitationMapping = {
            100: create_test_search_doc(document_id="wiki_python"),  # Same as 1
            101: create_test_search_doc(document_id="new_tutorial"),  # New
            102: create_test_search_doc(document_id="docs_typing"),  # Same as 2
            103: create_test_search_doc(document_id="another_new"),  # New
        }

        text, mapping = collapse_citations(
            "According to [100] and [101], also see [102] and [103].",
            existing_mapping,
            new_mapping,
        )

        # [100] -> [1] (wiki_python exists as 1)
        # [101] -> [4] (new_tutorial is new, next after 3)
        # [102] -> [2] (docs_typing exists as 2)
        # [103] -> [5] (another_new is new)
        assert text == "According to [1] and [4], also see [2] and [5]."
        assert len(mapping) == 5
        assert mapping[1].document_id == "wiki_python"
        assert mapping[2].document_id == "docs_typing"
        assert mapping[3].document_id == "blog_best_practices"
        assert mapping[4].document_id == "new_tutorial"
        assert mapping[5].document_id == "another_new"

    def test_long_text_with_many_citations(self) -> None:
        """Test processing longer text with many citations."""
        # Create docs for citations 50-55
        new_mapping: CitationMapping = {
            i: create_test_search_doc(document_id=f"doc_{i}") for i in range(50, 56)
        }

        text = """
        This is a comprehensive document with multiple citations.

        First, we discuss [50] which provides background information.
        Then [51] and [52] offer contrasting viewpoints.

        The middle section references [53] extensively, as seen here [53].

        Finally, [54] and [55] conclude the analysis. Note that [50]
        is referenced again for context.
        """

        result_text, mapping = collapse_citations(text, {}, new_mapping)

        # All 50-55 should be collapsed to 1-6
        assert "[1]" in result_text
        assert "[2]" in result_text
        assert "[3]" in result_text
        assert "[4]" in result_text
        assert "[5]" in result_text
        assert "[6]" in result_text
        # Original numbers should not appear
        assert "[50]" not in result_text
        assert "[51]" not in result_text
        assert len(mapping) == 6


================================================
FILE: backend/tests/unit/onyx/chat/test_compression.py
================================================
"""Unit tests for chat history compression module."""

from datetime import datetime
from datetime import timedelta
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.chat.compression import _build_llm_messages_for_summarization
from onyx.chat.compression import find_summary_for_branch
from onyx.chat.compression import generate_summary
from onyx.chat.compression import get_compression_params
from onyx.chat.compression import get_messages_to_summarize
from onyx.chat.compression import SummaryContent
from onyx.configs.constants import MessageType
from onyx.llm.models import AssistantMessage
from onyx.llm.models import SystemMessage
from onyx.llm.models import UserMessage
from onyx.prompts.compression_prompts import PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK
from onyx.prompts.compression_prompts import PROGRESSIVE_USER_REMINDER
from onyx.prompts.compression_prompts import SUMMARIZATION_CUTOFF_MARKER
from onyx.prompts.compression_prompts import SUMMARIZATION_PROMPT
from onyx.prompts.compression_prompts import USER_REMINDER

# Base time for generating sequential timestamps
BASE_TIME = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)


def create_mock_message(
    id: int,
    message: str,
    token_count: int,
    message_type: MessageType = MessageType.USER,
    chat_session_id: int = 1,
    parent_message_id: int | None = None,
    last_summarized_message_id: int | None = None,
    tool_calls: list | None = None,
) -> MagicMock:
    """Create a mock ChatMessage for testing."""
    mock = MagicMock()
    mock.id = id
    mock.message = message
    mock.token_count = token_count
    mock.message_type = message_type
    mock.chat_session_id = chat_session_id
    mock.parent_message_id = parent_message_id
    mock.last_summarized_message_id = last_summarized_message_id
    mock.tool_calls = tool_calls
    # Generate time_sent based on id for chronological ordering
    mock.time_sent = BASE_TIME + timedelta(minutes=id)
    return mock


def test_no_compression_when_under_threshold() -> None:
    """Should not compress when history is under threshold."""
    result = get_compression_params(
        max_input_tokens=10000,
        current_history_tokens=1000,
        reserved_tokens=2000,
    )
    assert result.should_compress is False


def test_compression_triggered_when_over_threshold() -> None:
    """Should compress when history exceeds threshold."""
    result = get_compression_params(
        max_input_tokens=10000,
        current_history_tokens=7000,
        reserved_tokens=2000,
    )
    assert result.should_compress is True
    assert result.tokens_for_recent > 0


def test_get_messages_returns_summary_content() -> None:
    """Should return SummaryContent with correct structure."""
    messages = [
        create_mock_message(1, "msg1", 100),
        create_mock_message(2, "msg2", 100),
    ]
    result = get_messages_to_summarize(
        chat_history=messages,  # type: ignore[arg-type]
        existing_summary=None,
        tokens_for_recent=50,
    )

    assert isinstance(result, SummaryContent)
    assert hasattr(result, "older_messages")
    assert hasattr(result, "recent_messages")


def test_messages_after_summary_cutoff_only() -> None:
    """Should only include messages after existing summary cutoff."""
    messages = [
        create_mock_message(1, "already summarized", 100),
        create_mock_message(2, "also summarized", 100),
        create_mock_message(3, "new message", 100),
    ]
    existing_summary = MagicMock()
    existing_summary.last_summarized_message_id = 2

    result = get_messages_to_summarize(
        chat_history=messages,  # type: ignore[arg-type]
        existing_summary=existing_summary,
        tokens_for_recent=50,
    )

    all_ids = [m.id for m in result.older_messages + result.recent_messages]
    assert 1 not in all_ids
    assert 2 not in all_ids
    assert 3 in all_ids


def test_no_summary_considers_all_messages() -> None:
    """Without existing summary, all messages should be considered."""
    messages = [
        create_mock_message(1, "msg1", 100),
        create_mock_message(2, "msg2", 100),
        create_mock_message(3, "msg3", 100),
    ]

    result = get_messages_to_summarize(
        chat_history=messages,  # type: ignore[arg-type]
        existing_summary=None,
        tokens_for_recent=50,
    )

    all_ids = [m.id for m in result.older_messages + result.recent_messages]
    assert len(all_ids) == 3


def test_empty_messages_filtered_out() -> None:
    """Messages with empty content should be filtered out."""
    messages = [
        create_mock_message(1, "has content", 100),
        create_mock_message(2, "", 0),
        create_mock_message(3, "also has content", 100),
    ]

    result = get_messages_to_summarize(
        chat_history=messages,  # type: ignore[arg-type]
        existing_summary=None,
        tokens_for_recent=50,
    )

    all_messages = result.older_messages + result.recent_messages
    assert len(all_messages) == 2


def test_empty_history_returns_empty() -> None:
    """Should return empty lists for empty history."""
    result = get_messages_to_summarize(
        chat_history=[],
        existing_summary=None,
        tokens_for_recent=100,
    )
    assert result.older_messages == []
    assert result.recent_messages == []


def test_find_summary_for_branch_returns_matching_branch() -> None:
    """Should return summary whose parent_message_id is in current branch."""
    branch_history = [
        create_mock_message(1, "msg1", 100),
        create_mock_message(2, "msg2", 100),
        create_mock_message(3, "msg3", 100),
    ]

    matching_summary = create_mock_message(
        id=100,
        message="Summary of conversation",
        token_count=50,
        parent_message_id=3,
        last_summarized_message_id=2,
    )

    mock_db = MagicMock()
    mock_db.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
        matching_summary
    ]

    result = find_summary_for_branch(mock_db, branch_history)  # type: ignore[arg-type]

    assert result == matching_summary


def test_find_summary_for_branch_ignores_other_branch() -> None:
    """Should not return summary from a different branch."""
    # Branch B has messages 1, 2, 6, 7 (diverged after message 2)
    branch_b_history = [
        create_mock_message(1, "msg1", 100),
        create_mock_message(2, "msg2", 100),
        create_mock_message(6, "branch b msg1", 100),
        create_mock_message(7, "branch b msg2", 100),
    ]

    # Summary was created on branch A (parent_message_id=5 is NOT in branch B)
    other_branch_summary = create_mock_message(
        id=100,
        message="Summary from branch A",
        token_count=50,
        parent_message_id=5,
        last_summarized_message_id=4,
    )

    mock_db = MagicMock()
    mock_db.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
        other_branch_summary
    ]

    result = find_summary_for_branch(mock_db, branch_b_history)  # type: ignore[arg-type]

    assert result is None


def test_cutoff_always_before_user_message() -> None:
    """Cutoff should always be placed right before a user message.

    If token budget would place the cutoff between tool calls or assistant messages,
    it should be moved to right before the next user message.
    """
    messages = [
        create_mock_message(1, "user question", 100, MessageType.USER),
        create_mock_message(2, "assistant uses tool", 100, MessageType.ASSISTANT),
        create_mock_message(3, "tool response", 100, MessageType.TOOL_CALL_RESPONSE),
        create_mock_message(4, "assistant continues", 100, MessageType.ASSISTANT),
        create_mock_message(5, "user follow up", 100, MessageType.USER),
        create_mock_message(6, "final answer", 100, MessageType.ASSISTANT),
    ]

    # Token budget that would normally cut between messages 3 and 4
    # (keeping ~300 tokens = messages 4, 5, 6)
    result = get_messages_to_summarize(
        chat_history=messages,  # type: ignore[arg-type]
        existing_summary=None,
        tokens_for_recent=300,
    )

    # recent_messages should start with user message (5), not assistant (4)
    assert result.recent_messages[0].message_type == MessageType.USER
    assert result.recent_messages[0].id == 5

    # Messages 1, 2, 4 should be in older_messages (to be summarized)
    # Note: message 3 (TOOL_CALL_RESPONSE) has content so it's included
    older_ids = [m.id for m in result.older_messages]
    assert 1 in older_ids
    assert 2 in older_ids
    assert 4 in older_ids


def test__build_llm_messages_for_summarization_user_messages() -> None:
    """User messages should be converted to UserMessage objects."""
    messages = [
        create_mock_message(1, "Hello", 10, MessageType.USER),
        create_mock_message(2, "How are you?", 15, MessageType.USER),
    ]

    result = _build_llm_messages_for_summarization(messages, {})  # type: ignore[arg-type]

    assert len(result) == 2
    assert all(isinstance(m, UserMessage) for m in result)
    assert result[0].content == "Hello"
    assert result[1].content == "How are you?"


def test__build_llm_messages_for_summarization_assistant_messages() -> None:
    """Assistant messages should be converted to AssistantMessage objects."""
    messages = [
        create_mock_message(1, "I'm doing great!", 20, MessageType.ASSISTANT),
    ]

    result = _build_llm_messages_for_summarization(messages, {})  # type: ignore[arg-type]

    assert len(result) == 1
    assert isinstance(result[0], AssistantMessage)
    assert result[0].content == "I'm doing great!"


def test__build_llm_messages_for_summarization_tool_calls() -> None:
    """Assistant messages with tool calls should be formatted compactly."""
    mock_tool_call = MagicMock()
    mock_tool_call.tool_id = 1
    msg = create_mock_message(
        1, "Using tool", 20, MessageType.ASSISTANT, tool_calls=[mock_tool_call]
    )

    tool_id_to_name = {1: "search"}

    result = _build_llm_messages_for_summarization([msg], tool_id_to_name)

    assert len(result) == 1
    assert isinstance(result[0], AssistantMessage)
    assert result[0].content == "[Used tools: search]"


def test__build_llm_messages_for_summarization_skips_tool_responses() -> None:
    """Tool response messages should be skipped."""
    messages = [
        create_mock_message(1, "User question", 10, MessageType.USER),
        create_mock_message(
            2, "Tool response data", 50, MessageType.TOOL_CALL_RESPONSE
        ),
        create_mock_message(3, "Assistant answer", 20, MessageType.ASSISTANT),
    ]

    result = _build_llm_messages_for_summarization(messages, {})  # type: ignore[arg-type]

    assert len(result) == 2
    assert isinstance(result[0], UserMessage)
    assert isinstance(result[1], AssistantMessage)


def test__build_llm_messages_for_summarization_skips_empty() -> None:
    """Empty messages should be skipped."""
    messages = [
        create_mock_message(1, "Has content", 10, MessageType.USER),
        create_mock_message(2, "", 0, MessageType.USER),
        create_mock_message(3, "Also has content", 10, MessageType.ASSISTANT),
    ]

    result = _build_llm_messages_for_summarization(messages, {})  # type: ignore[arg-type]

    assert len(result) == 2


def test_generate_summary_initial_system_prompt() -> None:
    """Initial summarization should use SUMMARIZATION_PROMPT as system prompt."""
    older_messages = [
        create_mock_message(1, "User msg", 10, MessageType.USER),
        create_mock_message(2, "Assistant reply", 10, MessageType.ASSISTANT),
    ]
    recent_messages = [
        create_mock_message(3, "Recent user msg", 10, MessageType.USER),
    ]

    mock_llm = MagicMock()
    mock_response = MagicMock()
    mock_response.choice.message.content = "Summary of conversation"
    mock_llm.invoke.return_value = mock_response

    with patch("onyx.chat.compression.llm_generation_span"):
        result = generate_summary(
            older_messages=older_messages,  # type: ignore[arg-type]
            recent_messages=recent_messages,  # type: ignore[arg-type]
            llm=mock_llm,
            tool_id_to_name={},
            existing_summary=None,
        )

    assert result == "Summary of conversation"

    # Check the messages passed to the LLM
    call_args = mock_llm.invoke.call_args[0][0]

    # First message should be SystemMessage with just SUMMARIZATION_PROMPT
    assert isinstance(call_args[0], SystemMessage)
    assert call_args[0].content == SUMMARIZATION_PROMPT

    # Should have separate user/assistant messages, not a single concatenated string
    user_messages = [m for m in call_args if isinstance(m, UserMessage)]
    assistant_messages = [m for m in call_args if isinstance(m, AssistantMessage)]

    # Should have: older user msg, cutoff marker, recent user msg, final reminder
    assert len(user_messages) >= 3  # At least: older user, cutoff, reminder
    assert len(assistant_messages) >= 1  # At least: older assistant

    # Final message should be the reminder
    assert isinstance(call_args[-1], UserMessage)
    assert call_args[-1].content == USER_REMINDER


def test_generate_summary_progressive_system_prompt() -> None:
    """Progressive summarization should append PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK to system prompt."""
    older_messages = [
        create_mock_message(1, "User msg", 10, MessageType.USER),
    ]
    recent_messages = [
        create_mock_message(2, "Recent msg", 10, MessageType.USER),
    ]
    existing_summary = "Previous conversation summary"

    mock_llm = MagicMock()
    mock_response = MagicMock()
    mock_response.choice.message.content = "Updated summary"
    mock_llm.invoke.return_value = mock_response

    with patch("onyx.chat.compression.llm_generation_span"):
        result = generate_summary(
            older_messages=older_messages,  # type: ignore[arg-type]
            recent_messages=recent_messages,  # type: ignore[arg-type]
            llm=mock_llm,
            tool_id_to_name={},
            existing_summary=existing_summary,
        )

    assert result == "Updated summary"

    # Check the messages passed to the LLM
    call_args = mock_llm.invoke.call_args[0][0]

    # First message should be SystemMessage with SUMMARIZATION_PROMPT + PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK
    assert isinstance(call_args[0], SystemMessage)
    expected_system = (
        SUMMARIZATION_PROMPT
        + PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK.format(
            previous_summary=existing_summary
        )
    )
    assert call_args[0].content == expected_system

    # Final message should be PROGRESSIVE_USER_REMINDER
    assert isinstance(call_args[-1], UserMessage)
    assert call_args[-1].content == PROGRESSIVE_USER_REMINDER


def test_generate_summary_cutoff_marker_as_separate_message() -> None:
    """Cutoff marker should be sent as a separate UserMessage."""
    older_messages = [
        create_mock_message(1, "User msg", 10, MessageType.USER),
    ]
    recent_messages = [
        create_mock_message(2, "Recent msg", 10, MessageType.USER),
    ]

    mock_llm = MagicMock()
    mock_response = MagicMock()
    mock_response.choice.message.content = "Summary"
    mock_llm.invoke.return_value = mock_response

    with patch("onyx.chat.compression.llm_generation_span"):
        generate_summary(
            older_messages=older_messages,  # type: ignore[arg-type]
            recent_messages=recent_messages,  # type: ignore[arg-type]
            llm=mock_llm,
            tool_id_to_name={},
            existing_summary=None,
        )

    call_args = mock_llm.invoke.call_args[0][0]

    # Find the cutoff marker message
    cutoff_messages = [
        m
        for m in call_args
        if isinstance(m, UserMessage) and SUMMARIZATION_CUTOFF_MARKER in str(m.content)
    ]
    assert len(cutoff_messages) == 1
    assert cutoff_messages[0].content == SUMMARIZATION_CUTOFF_MARKER


def test_generate_summary_messages_are_separate() -> None:
    """Messages should be sent as separate objects, not concatenated into one string."""
    older_messages = [
        create_mock_message(1, "First user message", 10, MessageType.USER),
        create_mock_message(2, "First assistant reply", 10, MessageType.ASSISTANT),
        create_mock_message(3, "Second user message", 10, MessageType.USER),
    ]
    recent_messages = [
        create_mock_message(4, "Recent message", 10, MessageType.USER),
    ]

    mock_llm = MagicMock()
    mock_response = MagicMock()
    mock_response.choice.message.content = "Summary"
    mock_llm.invoke.return_value = mock_response

    with patch("onyx.chat.compression.llm_generation_span"):
        generate_summary(
            older_messages=older_messages,  # type: ignore[arg-type]
            recent_messages=recent_messages,  # type: ignore[arg-type]
            llm=mock_llm,
            tool_id_to_name={},
            existing_summary=None,
        )

    call_args = mock_llm.invoke.call_args[0][0]

    # Should have multiple messages, not just 2 (SystemMessage + single UserMessage)
    assert len(call_args) > 2

    # Count message types
    system_count = sum(1 for m in call_args if isinstance(m, SystemMessage))
    user_count = sum(1 for m in call_args if isinstance(m, UserMessage))
    assistant_count = sum(1 for m in call_args if isinstance(m, AssistantMessage))

    assert system_count == 1  # One system message
    # 3 older user messages + 1 cutoff + 1 recent + 1 reminder = at least 3 user messages
    assert user_count >= 3
    assert assistant_count >= 1  # At least one assistant message from older_messages


================================================
FILE: backend/tests/unit/onyx/chat/test_context_files.py
================================================
"""Tests for the unified context file extraction logic (Phase 5).

Covers:
- resolve_context_user_files: precedence rule (custom persona supersedes project)
- extract_context_files: all-or-nothing context window fit check
- Search filter / search_usage determination in the caller
"""

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import UUID
from uuid import uuid4

from onyx.chat.models import ExtractedContextFiles
from onyx.chat.process_message import determine_search_params
from onyx.chat.process_message import extract_context_files
from onyx.chat.process_message import resolve_context_user_files
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.db.models import UserFile
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import InMemoryChatFile
from onyx.tools.models import SearchToolUsage


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _make_user_file(
    token_count: int = 100,
    name: str = "file.txt",
    file_id: str | None = None,
) -> UserFile:
    file_uuid = UUID(file_id) if file_id else uuid4()
    return UserFile(
        id=file_uuid,
        file_id=str(file_uuid),
        name=name,
        token_count=token_count,
    )


def _make_persona(
    persona_id: int,
    user_files: list | None = None,
) -> MagicMock:
    persona = MagicMock()
    persona.id = persona_id
    persona.user_files = user_files or []
    return persona


def _make_in_memory_file(
    file_id: str,
    content: str = "hello world",
    file_type: ChatFileType = ChatFileType.PLAIN_TEXT,
    filename: str = "file.txt",
) -> InMemoryChatFile:
    return InMemoryChatFile(
        file_id=file_id,
        content=content.encode("utf-8"),
        file_type=file_type,
        filename=filename,
    )


# ===========================================================================
# resolve_context_user_files
# ===========================================================================


class TestResolveContextUserFiles:
    """Precedence rule: custom persona fully supersedes project."""

    def test_custom_persona_with_files_returns_persona_files(self) -> None:
        persona_files = [_make_user_file(), _make_user_file()]
        persona = _make_persona(persona_id=42, user_files=persona_files)
        db_session = MagicMock()

        result = resolve_context_user_files(
            persona=persona, project_id=99, user_id=uuid4(), db_session=db_session
        )

        assert result == persona_files

    def test_custom_persona_without_files_returns_empty(self) -> None:
        """Custom persona with no files should NOT fall through to project."""
        persona = _make_persona(persona_id=42, user_files=[])
        db_session = MagicMock()

        result = resolve_context_user_files(
            persona=persona, project_id=99, user_id=uuid4(), db_session=db_session
        )

        assert result == []

    def test_custom_persona_none_files_returns_empty(self) -> None:
        """Custom persona with user_files=None should NOT fall through."""
        persona = _make_persona(persona_id=42, user_files=None)
        db_session = MagicMock()

        result = resolve_context_user_files(
            persona=persona, project_id=99, user_id=uuid4(), db_session=db_session
        )

        assert result == []

    @patch("onyx.chat.process_message.get_user_files_from_project")
    def test_default_persona_in_project_returns_project_files(
        self, mock_get_files: MagicMock
    ) -> None:
        project_files = [_make_user_file(), _make_user_file()]
        mock_get_files.return_value = project_files
        persona = _make_persona(persona_id=DEFAULT_PERSONA_ID)
        user_id = uuid4()
        db_session = MagicMock()

        result = resolve_context_user_files(
            persona=persona, project_id=99, user_id=user_id, db_session=db_session
        )

        assert result == project_files
        mock_get_files.assert_called_once_with(
            project_id=99, user_id=user_id, db_session=db_session
        )

    def test_default_persona_no_project_returns_empty(self) -> None:
        persona = _make_persona(persona_id=DEFAULT_PERSONA_ID)
        db_session = MagicMock()

        result = resolve_context_user_files(
            persona=persona, project_id=None, user_id=uuid4(), db_session=db_session
        )

        assert result == []

    @patch("onyx.chat.process_message.get_user_files_from_project")
    def test_custom_persona_without_files_ignores_project(
        self, mock_get_files: MagicMock
    ) -> None:
        """Even with a project_id, custom persona means project is invisible."""
        persona = _make_persona(persona_id=7, user_files=[])
        db_session = MagicMock()

        result = resolve_context_user_files(
            persona=persona, project_id=99, user_id=uuid4(), db_session=db_session
        )

        assert result == []
        mock_get_files.assert_not_called()


# ===========================================================================
# extract_context_files
# ===========================================================================


class TestExtractContextFiles:
    """All-or-nothing context window fit check."""

    def test_empty_user_files_returns_empty(self) -> None:
        db_session = MagicMock()
        result = extract_context_files(
            user_files=[],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=db_session,
        )
        assert result.file_texts == []
        assert result.image_files == []
        assert result.use_as_search_filter is False
        assert result.uncapped_token_count is None

    @patch("onyx.chat.process_message.load_in_memory_chat_files")
    def test_files_fit_in_context_are_loaded(self, mock_load: MagicMock) -> None:
        file_id = str(uuid4())
        uf = _make_user_file(token_count=100, file_id=file_id)
        mock_load.return_value = [
            _make_in_memory_file(file_id=file_id, content="file content")
        ]

        result = extract_context_files(
            user_files=[uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        assert result.file_texts == ["file content"]
        assert result.use_as_search_filter is False
        assert result.total_token_count == 100
        assert len(result.file_metadata) == 1
        assert result.file_metadata[0].file_id == file_id

    def test_files_overflow_context_not_loaded(self) -> None:
        """When aggregate tokens exceed 60% of available window, nothing is loaded."""
        uf = _make_user_file(token_count=7000)

        result = extract_context_files(
            user_files=[uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        assert result.file_texts == []
        assert result.image_files == []
        assert result.use_as_search_filter is True
        assert result.uncapped_token_count == 7000
        assert result.total_token_count == 0

    def test_overflow_boundary_exact(self) -> None:
        """Token count exactly at the 60% boundary should trigger overflow."""
        # Available = (10000 - 0) * 0.6 = 6000. Tokens = 6000 → >= threshold.
        uf = _make_user_file(token_count=6000)

        result = extract_context_files(
            user_files=[uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        assert result.use_as_search_filter is True

    @patch("onyx.chat.process_message.load_in_memory_chat_files")
    def test_just_under_boundary_loads(self, mock_load: MagicMock) -> None:
        """Token count just under the 60% boundary should load files."""
        file_id = str(uuid4())
        uf = _make_user_file(token_count=5999, file_id=file_id)
        mock_load.return_value = [_make_in_memory_file(file_id=file_id, content="data")]

        result = extract_context_files(
            user_files=[uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        assert result.use_as_search_filter is False
        assert result.file_texts == ["data"]

    @patch("onyx.chat.process_message.load_in_memory_chat_files")
    def test_multiple_files_aggregate_check(self, mock_load: MagicMock) -> None:
        """Multiple small files that individually fit but collectively overflow."""
        files = [_make_user_file(token_count=2500) for _ in range(3)]
        # 3 * 2500 = 7500 > 6000 threshold

        result = extract_context_files(
            user_files=files,
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        assert result.use_as_search_filter is True
        assert result.file_texts == []
        mock_load.assert_not_called()

    @patch("onyx.chat.process_message.load_in_memory_chat_files")
    def test_reserved_tokens_reduce_available_space(self, mock_load: MagicMock) -> None:
        """Reserved tokens shrink the available window."""
        file_id = str(uuid4())
        uf = _make_user_file(token_count=3000, file_id=file_id)
        # Available = (10000 - 5000) * 0.6 = 3000. Tokens = 3000 → overflow.

        result = extract_context_files(
            user_files=[uf],
            llm_max_context_window=10000,
            reserved_token_count=5000,
            db_session=MagicMock(),
        )

        assert result.use_as_search_filter is True
        mock_load.assert_not_called()

    @patch("onyx.chat.process_message.load_in_memory_chat_files")
    def test_image_files_are_extracted(self, mock_load: MagicMock) -> None:
        file_id = str(uuid4())
        uf = _make_user_file(token_count=50, file_id=file_id)
        mock_load.return_value = [
            InMemoryChatFile(
                file_id=file_id,
                content=b"\x89PNG",
                file_type=ChatFileType.IMAGE,
                filename="photo.png",
            )
        ]

        result = extract_context_files(
            user_files=[uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        assert len(result.image_files) == 1
        assert result.image_files[0].file_id == file_id
        assert result.file_texts == []
        assert result.total_token_count == 50

    @patch("onyx.chat.process_message.load_in_memory_chat_files")
    def test_tool_metadata_file_id_matches_chat_history_file_id(
        self, mock_load: MagicMock
    ) -> None:
        """The file_id in tool metadata (from extract_context_files) and the
        file_id in chat history messages (from build_file_context) must
        agree, otherwise the LLM sees different IDs for the same file across
        turns.

        In production, UserFile.id (UUID PK) differs from UserFile.file_id
        (file-store path). Both pathways should produce the same file_id
        (UserFile.id) for FileReaderTool."""
        from onyx.chat.chat_utils import build_file_context

        user_file_uuid = uuid4()
        file_store_path = f"user_files/{user_file_uuid}/data.csv"

        uf = UserFile(
            id=user_file_uuid,
            file_id=file_store_path,
            name="data.csv",
            token_count=100,
            file_type="text/csv",
        )

        in_memory = InMemoryChatFile(
            file_id=file_store_path,
            content=b"col1,col2\na,b",
            file_type=ChatFileType.TABULAR,
            filename="data.csv",
        )

        mock_load.return_value = [in_memory]

        # Pathway 1: extract_context_files (project/persona context)
        result = extract_context_files(
            user_files=[uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )
        assert len(result.file_metadata_for_tool) == 1
        tool_metadata_file_id = result.file_metadata_for_tool[0].file_id

        # Pathway 2: build_file_context (chat history path)
        # In convert_chat_history, tool_file_id comes from
        # file_descriptor["user_file_id"], which is str(UserFile.id)
        ctx = build_file_context(
            tool_file_id=str(user_file_uuid),
            filename="data.csv",
            file_type=ChatFileType.TABULAR,
        )
        chat_history_file_id = ctx.tool_metadata.file_id

        # Both pathways must produce the same ID for the LLM
        assert tool_metadata_file_id == chat_history_file_id, (
            f"File ID mismatch: extract_context_files uses '{tool_metadata_file_id}' "
            f"but build_file_context uses '{chat_history_file_id}'."
        )

    @patch("onyx.chat.process_message.DISABLE_VECTOR_DB", True)
    def test_overflow_with_vector_db_disabled_provides_tool_metadata(self) -> None:
        """When vector DB is disabled, overflow produces FileToolMetadata."""
        uf = _make_user_file(token_count=7000, name="bigfile.txt")

        result = extract_context_files(
            user_files=[uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        assert result.use_as_search_filter is False
        assert len(result.file_metadata_for_tool) == 1
        assert result.file_metadata_for_tool[0].filename == "bigfile.txt"

    @patch("onyx.chat.process_message.load_in_memory_chat_files")
    def test_metadata_only_files_not_counted_in_aggregate_tokens(
        self, mock_load: MagicMock
    ) -> None:
        """Metadata-only files (TABULAR) should not count toward the token budget."""
        text_file_id = str(uuid4())
        text_uf = _make_user_file(token_count=100, file_id=text_file_id)
        # TABULAR file with large token count — should be excluded from aggregate
        tabular_uf = _make_user_file(
            token_count=50000, name="huge.xlsx", file_id=str(uuid4())
        )
        tabular_uf.file_type = (
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        )

        mock_load.return_value = [
            _make_in_memory_file(file_id=text_file_id, content="text content"),
            InMemoryChatFile(
                file_id=str(tabular_uf.id),
                content=b"binary xlsx",
                file_type=ChatFileType.TABULAR,
                filename="huge.xlsx",
            ),
        ]

        result = extract_context_files(
            user_files=[text_uf, tabular_uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        # Text file fits (100 < 6000), so files should be loaded
        assert result.file_texts == ["text content"]
        # TABULAR file should appear as tool metadata, not in file_texts
        assert len(result.file_metadata_for_tool) == 1
        assert result.file_metadata_for_tool[0].filename == "huge.xlsx"

    @patch("onyx.chat.process_message.load_in_memory_chat_files")
    def test_metadata_only_files_loaded_as_tool_metadata(
        self, mock_load: MagicMock
    ) -> None:
        """When files fit, metadata-only files appear in file_metadata_for_tool."""
        text_file_id = str(uuid4())
        tabular_file_id = str(uuid4())
        text_uf = _make_user_file(token_count=100, file_id=text_file_id)
        tabular_uf = _make_user_file(
            token_count=500, name="data.csv", file_id=tabular_file_id
        )
        tabular_uf.file_type = "text/csv"

        mock_load.return_value = [
            _make_in_memory_file(file_id=text_file_id, content="hello"),
            InMemoryChatFile(
                file_id=tabular_file_id,
                content=b"col1,col2\na,b",
                file_type=ChatFileType.TABULAR,
                filename="data.csv",
            ),
        ]

        result = extract_context_files(
            user_files=[text_uf, tabular_uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        assert result.file_texts == ["hello"]
        assert len(result.file_metadata_for_tool) == 1
        assert result.file_metadata_for_tool[0].filename == "data.csv"
        # TABULAR should not appear in file_metadata (that's for citation)
        assert all(m.filename != "data.csv" for m in result.file_metadata)

    def test_overflow_with_vector_db_preserves_metadata_only_tool_metadata(
        self,
    ) -> None:
        """When text files overflow with vector DB enabled, metadata-only files
        should still be exposed via file_metadata_for_tool since they aren't
        in the vector DB and would otherwise be inaccessible."""
        text_uf = _make_user_file(token_count=7000, name="bigfile.txt")
        tabular_uf = _make_user_file(token_count=500, name="data.xlsx")
        tabular_uf.file_type = (
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        )

        result = extract_context_files(
            user_files=[text_uf, tabular_uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        # Text files overflow → search filter enabled
        assert result.use_as_search_filter is True
        assert result.file_texts == []
        # TABULAR file should still be in tool metadata
        assert len(result.file_metadata_for_tool) == 1
        assert result.file_metadata_for_tool[0].filename == "data.xlsx"

    @patch("onyx.chat.process_message.DISABLE_VECTOR_DB", True)
    def test_overflow_no_vector_db_includes_all_files_in_tool_metadata(self) -> None:
        """When vector DB is disabled and files overflow, all files
        (both text and metadata-only) appear in file_metadata_for_tool."""
        text_uf = _make_user_file(token_count=7000, name="bigfile.txt")
        tabular_uf = _make_user_file(token_count=500, name="data.xlsx")
        tabular_uf.file_type = (
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        )

        result = extract_context_files(
            user_files=[text_uf, tabular_uf],
            llm_max_context_window=10000,
            reserved_token_count=0,
            db_session=MagicMock(),
        )

        assert result.use_as_search_filter is False
        assert len(result.file_metadata_for_tool) == 2
        filenames = {m.filename for m in result.file_metadata_for_tool}
        assert filenames == {"bigfile.txt", "data.xlsx"}


# ===========================================================================
# Search filter + search_usage determination
# ===========================================================================


class TestSearchFilterDetermination:
    """Verify that determine_search_params correctly resolves
    project_id_filter, persona_id_filter, and search_usage based on
    the extraction result and the precedence rule.
    """

    @staticmethod
    def _make_context(
        use_as_search_filter: bool = False,
        file_texts: list[str] | None = None,
        uncapped_token_count: int | None = None,
    ) -> ExtractedContextFiles:
        return ExtractedContextFiles(
            file_texts=file_texts or [],
            image_files=[],
            use_as_search_filter=use_as_search_filter,
            total_token_count=0,
            file_metadata=[],
            uncapped_token_count=uncapped_token_count,
        )

    def test_custom_persona_files_fit_no_filter(self) -> None:
        """Custom persona, files fit → no search filter, AUTO."""
        result = determine_search_params(
            persona_id=42,
            project_id=99,
            extracted_context_files=self._make_context(
                file_texts=["content"],
                uncapped_token_count=100,
            ),
        )
        assert result.project_id_filter is None
        assert result.persona_id_filter is None
        assert result.search_usage == SearchToolUsage.AUTO

    def test_custom_persona_files_overflow_persona_filter(self) -> None:
        """Custom persona, files overflow → persona_id filter, AUTO."""
        result = determine_search_params(
            persona_id=42,
            project_id=99,
            extracted_context_files=self._make_context(use_as_search_filter=True),
        )
        assert result.persona_id_filter == 42
        assert result.project_id_filter is None
        assert result.search_usage == SearchToolUsage.AUTO

    def test_custom_persona_no_files_no_project_leak(self) -> None:
        """Custom persona (no files) in project → nothing leaks from project."""
        result = determine_search_params(
            persona_id=42,
            project_id=99,
            extracted_context_files=self._make_context(),
        )
        assert result.project_id_filter is None
        assert result.persona_id_filter is None
        assert result.search_usage == SearchToolUsage.AUTO

    def test_default_persona_project_files_fit_disables_search(self) -> None:
        """Default persona, project files fit → DISABLED."""
        result = determine_search_params(
            persona_id=DEFAULT_PERSONA_ID,
            project_id=99,
            extracted_context_files=self._make_context(
                file_texts=["content"],
                uncapped_token_count=100,
            ),
        )
        assert result.project_id_filter is None
        assert result.search_usage == SearchToolUsage.DISABLED

    def test_default_persona_project_files_overflow_enables_search(self) -> None:
        """Default persona, project files overflow → ENABLED + project_id filter."""
        result = determine_search_params(
            persona_id=DEFAULT_PERSONA_ID,
            project_id=99,
            extracted_context_files=self._make_context(
                use_as_search_filter=True,
                uncapped_token_count=7000,
            ),
        )
        assert result.project_id_filter == 99
        assert result.persona_id_filter is None
        assert result.search_usage == SearchToolUsage.ENABLED

    def test_default_persona_no_project_auto(self) -> None:
        """Default persona, no project → AUTO."""
        result = determine_search_params(
            persona_id=DEFAULT_PERSONA_ID,
            project_id=None,
            extracted_context_files=self._make_context(),
        )
        assert result.project_id_filter is None
        assert result.search_usage == SearchToolUsage.AUTO

    def test_default_persona_project_no_files_disables_search(self) -> None:
        """Default persona in project with no files → DISABLED."""
        result = determine_search_params(
            persona_id=DEFAULT_PERSONA_ID,
            project_id=99,
            extracted_context_files=self._make_context(),
        )
        assert result.search_usage == SearchToolUsage.DISABLED


================================================
FILE: backend/tests/unit/onyx/chat/test_emitter.py
================================================
"""Unit tests for the Emitter class.

All tests use the streaming mode (merged_queue required). Emitter has a single
code path — no standalone bus.
"""

import queue

from onyx.chat.emitter import Emitter
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningStart


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _placement(
    turn_index: int = 0,
    tab_index: int = 0,
    sub_turn_index: int | None = None,
) -> Placement:
    return Placement(
        turn_index=turn_index,
        tab_index=tab_index,
        sub_turn_index=sub_turn_index,
    )


def _packet(
    turn_index: int = 0,
    tab_index: int = 0,
    sub_turn_index: int | None = None,
) -> Packet:
    """Build a minimal valid packet with an OverallStop payload."""
    return Packet(
        placement=_placement(turn_index, tab_index, sub_turn_index),
        obj=OverallStop(stop_reason="test"),
    )


def _make_emitter(model_idx: int = 0) -> tuple["Emitter", "queue.Queue"]:
    """Return (emitter, queue) wired together."""
    mq: queue.Queue = queue.Queue()
    return Emitter(merged_queue=mq, model_idx=model_idx), mq


# ---------------------------------------------------------------------------
# Queue routing
# ---------------------------------------------------------------------------


class TestEmitterQueueRouting:
    def test_emit_lands_on_merged_queue(self) -> None:
        emitter, mq = _make_emitter()
        emitter.emit(_packet())
        assert not mq.empty()

    def test_queue_item_is_tuple_of_key_and_packet(self) -> None:
        emitter, mq = _make_emitter(model_idx=1)
        emitter.emit(_packet())
        item = mq.get_nowait()
        assert isinstance(item, tuple)
        assert len(item) == 2

    def test_multiple_packets_delivered_fifo(self) -> None:
        emitter, mq = _make_emitter()
        p1 = _packet(turn_index=0)
        p2 = _packet(turn_index=1)
        emitter.emit(p1)
        emitter.emit(p2)
        _, t1 = mq.get_nowait()
        _, t2 = mq.get_nowait()
        assert t1.placement.turn_index == 0
        assert t2.placement.turn_index == 1


# ---------------------------------------------------------------------------
# model_index tagging
# ---------------------------------------------------------------------------


class TestEmitterModelIndexTagging:
    def test_n1_default_model_idx_tags_model_index_zero(self) -> None:
        """N=1: default model_idx=0, so packet gets model_index=0."""
        emitter, mq = _make_emitter(model_idx=0)
        emitter.emit(_packet())
        _key, tagged = mq.get_nowait()
        assert tagged.placement.model_index == 0

    def test_model_idx_one_tags_packet(self) -> None:
        emitter, mq = _make_emitter(model_idx=1)
        emitter.emit(_packet())
        _key, tagged = mq.get_nowait()
        assert tagged.placement.model_index == 1

    def test_model_idx_two_tags_packet(self) -> None:
        """Boundary: third model in a 3-model run."""
        emitter, mq = _make_emitter(model_idx=2)
        emitter.emit(_packet())
        _key, tagged = mq.get_nowait()
        assert tagged.placement.model_index == 2


# ---------------------------------------------------------------------------
# Queue key
# ---------------------------------------------------------------------------


class TestEmitterQueueKey:
    def test_key_equals_model_idx(self) -> None:
        """Drain loop uses the key to route packets; it must match model_idx."""
        emitter, mq = _make_emitter(model_idx=2)
        emitter.emit(_packet())
        key, _ = mq.get_nowait()
        assert key == 2

    def test_n1_key_is_zero(self) -> None:
        emitter, mq = _make_emitter(model_idx=0)
        emitter.emit(_packet())
        key, _ = mq.get_nowait()
        assert key == 0


# ---------------------------------------------------------------------------
# Placement field preservation
# ---------------------------------------------------------------------------


class TestEmitterPlacementPreservation:
    def test_turn_index_is_preserved(self) -> None:
        emitter, mq = _make_emitter()
        emitter.emit(_packet(turn_index=5))
        _, tagged = mq.get_nowait()
        assert tagged.placement.turn_index == 5

    def test_tab_index_is_preserved(self) -> None:
        emitter, mq = _make_emitter()
        emitter.emit(_packet(tab_index=3))
        _, tagged = mq.get_nowait()
        assert tagged.placement.tab_index == 3

    def test_sub_turn_index_is_preserved(self) -> None:
        emitter, mq = _make_emitter()
        emitter.emit(_packet(sub_turn_index=2))
        _, tagged = mq.get_nowait()
        assert tagged.placement.sub_turn_index == 2

    def test_sub_turn_index_none_is_preserved(self) -> None:
        emitter, mq = _make_emitter()
        emitter.emit(_packet(sub_turn_index=None))
        _, tagged = mq.get_nowait()
        assert tagged.placement.sub_turn_index is None

    def test_packet_obj_is_not_modified(self) -> None:
        """The payload object must survive tagging untouched."""
        emitter, mq = _make_emitter()
        original_obj = OverallStop(stop_reason="sentinel")
        pkt = Packet(placement=_placement(), obj=original_obj)
        emitter.emit(pkt)
        _, tagged = mq.get_nowait()
        assert tagged.obj is original_obj

    def test_different_obj_types_are_handled(self) -> None:
        """Any valid PacketObj type passes through correctly."""
        emitter, mq = _make_emitter()
        pkt = Packet(placement=_placement(), obj=ReasoningStart())
        emitter.emit(pkt)
        _, tagged = mq.get_nowait()
        assert isinstance(tagged.obj, ReasoningStart)


================================================
FILE: backend/tests/unit/onyx/chat/test_llm_loop.py
================================================
"""Tests for llm_loop.py, including history construction and empty-response paths."""

from unittest.mock import Mock

import pytest

from onyx.chat.llm_loop import _build_empty_llm_response_error
from onyx.chat.llm_loop import _try_fallback_tool_extraction
from onyx.chat.llm_loop import construct_message_history
from onyx.chat.llm_loop import EmptyLLMResponseError
from onyx.chat.models import ChatLoadedFile
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ContextFileMetadata
from onyx.chat.models import ExtractedContextFiles
from onyx.chat.models import FileToolMetadata
from onyx.chat.models import LlmStepResult
from onyx.chat.models import ToolCallSimple
from onyx.configs.constants import MessageType
from onyx.file_store.models import ChatFileType
from onyx.llm.interfaces import LLMConfig
from onyx.llm.interfaces import ToolChoiceOptions
from onyx.server.query_and_chat.placement import Placement
from onyx.tools.models import ToolCallKickoff


def create_message(
    content: str, message_type: MessageType, token_count: int | None = None
) -> ChatMessageSimple:
    """Helper to create a ChatMessageSimple for testing."""
    if token_count is None:
        # Simple token estimation: ~1 token per 4 characters
        token_count = max(1, len(content) // 4)
    return ChatMessageSimple(
        message=content,
        token_count=token_count,
        message_type=message_type,
    )


def create_assistant_with_tool_call(
    tool_call_id: str, tool_name: str, token_count: int
) -> ChatMessageSimple:
    """Helper to create an ASSISTANT message with tool_calls for testing."""
    tool_call = ToolCallSimple(
        tool_call_id=tool_call_id,
        tool_name=tool_name,
        tool_arguments={},
        token_count=token_count,
    )
    return ChatMessageSimple(
        message="",
        token_count=token_count,
        message_type=MessageType.ASSISTANT,
        tool_calls=[tool_call],
    )


def create_tool_response(
    tool_call_id: str, content: str, token_count: int
) -> ChatMessageSimple:
    """Helper to create a TOOL_CALL_RESPONSE message for testing."""
    return ChatMessageSimple(
        message=content,
        token_count=token_count,
        message_type=MessageType.TOOL_CALL_RESPONSE,
        tool_call_id=tool_call_id,
    )


def create_context_files(
    num_files: int = 0, num_images: int = 0, tokens_per_file: int = 100
) -> ExtractedContextFiles:
    """Helper to create ExtractedContextFiles for testing."""
    file_texts = [f"Project file {i} content" for i in range(num_files)]
    file_metadata = [
        ContextFileMetadata(
            file_id=f"file_{i}",
            filename=f"file_{i}.txt",
            file_content=f"Project file {i} content",
        )
        for i in range(num_files)
    ]
    image_files = [
        ChatLoadedFile(
            file_id=f"image_{i}",
            content=b"",
            file_type=ChatFileType.IMAGE,
            filename=f"image_{i}.png",
            content_text=None,
            token_count=50,
        )
        for i in range(num_images)
    ]
    return ExtractedContextFiles(
        file_texts=file_texts,
        image_files=image_files,
        use_as_search_filter=False,
        total_token_count=num_files * tokens_per_file,
        file_metadata=file_metadata,
        uncapped_token_count=num_files * tokens_per_file,
    )


class TestConstructMessageHistory:
    """Tests for the construct_message_history function."""

    def test_basic_no_truncation(self) -> None:
        """Test basic functionality when all messages fit within token budget."""
        system_prompt = create_message(
            "You are a helpful assistant", MessageType.SYSTEM, 10
        )
        user_msg1 = create_message("Hello", MessageType.USER, 5)
        assistant_msg1 = create_message("Hi there!", MessageType.ASSISTANT, 5)
        user_msg2 = create_message("How are you?", MessageType.USER, 5)

        simple_chat_history = [user_msg1, assistant_msg1, user_msg2]
        context_files = create_context_files()

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=1000,
        )

        # Should have: system, user1, assistant1, user2
        assert len(result) == 4
        assert result[0] == system_prompt
        assert result[1] == user_msg1
        assert result[2] == assistant_msg1
        assert result[3] == user_msg2

    def test_with_custom_agent_prompt(self) -> None:
        """Test that custom agent prompt is inserted before the last user message."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First message", MessageType.USER, 5)
        assistant_msg1 = create_message("Response", MessageType.ASSISTANT, 5)
        user_msg2 = create_message("Second message", MessageType.USER, 5)
        custom_agent = create_message("Custom instructions", MessageType.USER, 10)

        simple_chat_history = [user_msg1, assistant_msg1, user_msg2]
        context_files = create_context_files()

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=custom_agent,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=1000,
        )

        # Should have: system, user1, assistant1, custom_agent, user2
        assert len(result) == 5
        assert result[0] == system_prompt
        assert result[1] == user_msg1
        assert result[2] == assistant_msg1
        assert result[3] == custom_agent  # Before last user message
        assert result[4] == user_msg2

    def test_with_context_files(self) -> None:
        """Test that project files are inserted before the last user message."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First message", MessageType.USER, 5)
        user_msg2 = create_message("Second message", MessageType.USER, 5)

        simple_chat_history = [user_msg1, user_msg2]
        context_files = create_context_files(num_files=2, tokens_per_file=50)

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=1000,
        )

        # Should have: system, user1, context_files_message, user2
        assert len(result) == 4
        assert result[0] == system_prompt
        assert result[1] == user_msg1
        assert (
            result[2].message_type == MessageType.USER
        )  # Project files as user message
        assert "documents" in result[2].message  # Should contain JSON structure
        assert result[3] == user_msg2

    def test_with_reminder_message(self) -> None:
        """Test that reminder message is added at the very end."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg = create_message("Hello", MessageType.USER, 5)
        reminder = create_message("Remember to cite sources", MessageType.USER, 10)

        simple_chat_history = [user_msg]
        context_files = create_context_files()

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=reminder,
            context_files=context_files,
            available_tokens=1000,
        )

        # Should have: system, user, reminder
        assert len(result) == 3
        assert result[0] == system_prompt
        assert result[1] == user_msg
        assert result[2] == reminder  # At the end

    def test_tool_calls_after_last_user_message(self) -> None:
        """Test that tool calls and responses after last user message are preserved."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First message", MessageType.USER, 5)
        assistant_msg1 = create_message("Response", MessageType.ASSISTANT, 5)
        user_msg2 = create_message("Search for X", MessageType.USER, 5)
        assistant_with_tool = create_assistant_with_tool_call("tc_1", "search", 5)
        tool_response = create_tool_response("tc_1", "Search results...", 10)

        simple_chat_history = [
            user_msg1,
            assistant_msg1,
            user_msg2,
            assistant_with_tool,
            tool_response,
        ]
        context_files = create_context_files()

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=1000,
        )

        # Should have: system, user1, assistant1, user2, assistant_with_tool, tool_response
        assert len(result) == 6
        assert result[0] == system_prompt
        assert result[1] == user_msg1
        assert result[2] == assistant_msg1
        assert result[3] == user_msg2
        assert result[4] == assistant_with_tool
        assert result[5] == tool_response

    def test_custom_agent_and_project_before_last_user_with_tools_after(self) -> None:
        """Test correct ordering with custom agent, project files, and tool calls."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First", MessageType.USER, 5)
        user_msg2 = create_message("Second", MessageType.USER, 5)
        assistant_with_tool = create_assistant_with_tool_call("tc_1", "tool", 5)
        custom_agent = create_message("Custom", MessageType.USER, 10)

        simple_chat_history = [user_msg1, user_msg2, assistant_with_tool]
        context_files = create_context_files(num_files=1, tokens_per_file=50)

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=custom_agent,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=1000,
        )

        # Should have: system, user1, custom_agent, context_files, user2, assistant_with_tool
        assert len(result) == 6
        assert result[0] == system_prompt
        assert result[1] == user_msg1
        assert result[2] == custom_agent  # Before last user message
        assert result[3].message_type == MessageType.USER  # Project files
        assert "documents" in result[3].message
        assert result[4] == user_msg2  # Last user message
        assert result[5] == assistant_with_tool  # After last user message

    def test_project_images_attached_to_last_user_message(self) -> None:
        """Test that project images are attached to the last user message."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First", MessageType.USER, 5)
        user_msg2 = create_message("Second", MessageType.USER, 5)

        simple_chat_history = [user_msg1, user_msg2]
        context_files = create_context_files(num_files=0, num_images=2)

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=1000,
        )

        # Last message should have the project images
        last_message = result[-1]
        assert last_message.message == "Second"
        assert last_message.image_files is not None
        assert len(last_message.image_files) == 2
        assert last_message.image_files[0].file_id == "image_0"
        assert last_message.image_files[1].file_id == "image_1"

    def test_project_images_preserve_existing_images(self) -> None:
        """Test that project images are appended to existing images on the user message."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)

        # Create a user message with existing images
        existing_image = ChatLoadedFile(
            file_id="existing_image",
            content=b"",
            file_type=ChatFileType.IMAGE,
            filename="existing.png",
            content_text=None,
            token_count=50,
        )
        user_msg = ChatMessageSimple(
            message="Message with image",
            token_count=5,
            message_type=MessageType.USER,
            image_files=[existing_image],
        )

        simple_chat_history = [user_msg]
        context_files = create_context_files(num_files=0, num_images=1)

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=1000,
        )

        # Last message should have both existing and project images
        last_message = result[-1]
        assert last_message.image_files is not None
        assert len(last_message.image_files) == 2
        assert last_message.image_files[0].file_id == "existing_image"
        assert last_message.image_files[1].file_id == "image_0"

    def test_truncation_from_top(self) -> None:
        """Test that history is truncated from the top when token budget is exceeded."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First", MessageType.USER, 20)
        assistant_msg1 = create_message("Response 1", MessageType.ASSISTANT, 20)
        user_msg2 = create_message("Second", MessageType.USER, 20)
        assistant_msg2 = create_message("Response 2", MessageType.ASSISTANT, 20)
        user_msg3 = create_message("Third", MessageType.USER, 20)

        simple_chat_history = [
            user_msg1,
            assistant_msg1,
            user_msg2,
            assistant_msg2,
            user_msg3,
        ]
        context_files = create_context_files()

        # Budget only allows last 3 messages + system (10 + 20 + 20 + 20 = 70 tokens)
        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=80,
        )

        # Should have: system, user2, assistant2, user3
        # user1 and assistant1 should be truncated
        assert len(result) == 4
        assert result[0] == system_prompt
        assert result[1] == user_msg2  # user1 truncated
        assert result[2] == assistant_msg2
        assert result[3] == user_msg3

    def test_truncation_preserves_last_user_and_messages_after(self) -> None:
        """Test that truncation preserves the last user message and everything after it."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First", MessageType.USER, 30)
        user_msg2 = create_message("Second", MessageType.USER, 20)
        assistant_with_tool = create_assistant_with_tool_call("tc_1", "tool", 20)
        tool_response = create_tool_response("tc_1", "tool_response", 20)

        simple_chat_history = [user_msg1, user_msg2, assistant_with_tool, tool_response]
        context_files = create_context_files()

        # Budget only allows last user message and messages after + system
        # (10 + 20 + 20 + 20 = 70 tokens)
        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=80,
        )

        # Should have: system, user2, assistant_with_tool, tool_response
        # user1 should be truncated, but user2 and everything after preserved
        assert len(result) == 4
        assert result[0] == system_prompt
        assert result[1] == user_msg2  # user1 truncated
        assert result[2] == assistant_with_tool
        assert result[3] == tool_response

    def test_truncation_drops_orphaned_tool_response(self) -> None:
        """If truncation drops an assistant tool call, its orphaned tool response is removed."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First", MessageType.USER, 10)
        assistant_with_tool = create_assistant_with_tool_call("tc_1", "tool", 25)
        tool_response = create_tool_response("tc_1", "tool_response", 5)
        assistant_msg1 = create_message("Used the tool above", MessageType.ASSISTANT, 5)
        user_msg2 = create_message("Latest question", MessageType.USER, 10)

        simple_chat_history = [
            user_msg1,
            assistant_with_tool,
            tool_response,
            assistant_msg1,
            user_msg2,
        ]
        context_files = create_context_files()

        # Remaining history budget is 10 tokens (30 total - 10 system - 10 last user):
        # keeps [tool_response, assistant_msg1] from history_before_last_user,
        # but drops assistant_with_tool, making tool_response orphaned.
        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=30,
        )

        # Orphaned tool response should be removed from final history.
        assert len(result) == 3
        assert result[0] == system_prompt
        assert result[1] == assistant_msg1
        assert result[2] == user_msg2

    def test_preserves_non_orphaned_tool_response(self) -> None:
        """Tool responses remain when their assistant tool call is present."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First", MessageType.USER, 10)
        assistant_with_tool = create_assistant_with_tool_call("tc_1", "tool", 20)
        tool_response = create_tool_response("tc_1", "tool_response", 5)
        user_msg2 = create_message("Latest question", MessageType.USER, 10)

        simple_chat_history = [user_msg1, assistant_with_tool, tool_response, user_msg2]
        context_files = create_context_files()

        # Remaining history budget is 25 tokens (45 total - 10 system - 10 last user):
        # keeps both assistant_with_tool and tool_response in history_before_last_user.
        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=45,
        )

        assert len(result) == 4
        assert result[0] == system_prompt
        assert result[1] == assistant_with_tool
        assert result[2] == tool_response
        assert result[3] == user_msg2

    def test_empty_history(self) -> None:
        """Test handling of empty chat history."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        custom_agent = create_message("Custom", MessageType.USER, 10)
        reminder = create_message("Reminder", MessageType.USER, 10)

        simple_chat_history: list[ChatMessageSimple] = []
        context_files = create_context_files(num_files=1, tokens_per_file=50)

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=custom_agent,
            simple_chat_history=simple_chat_history,
            reminder_message=reminder,
            context_files=context_files,
            available_tokens=1000,
        )

        # Should have: system, custom_agent, context_files, reminder
        assert len(result) == 4
        assert result[0] == system_prompt
        assert result[1] == custom_agent
        assert result[2].message_type == MessageType.USER  # Project files
        assert result[3] == reminder

    def test_no_user_message_raises_error(self) -> None:
        """Test that an error is raised when there's no user message in history."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        assistant_msg = create_message("Response", MessageType.ASSISTANT, 5)
        assistant_with_tool = create_assistant_with_tool_call("tc_1", "tool", 5)

        simple_chat_history = [assistant_msg, assistant_with_tool]
        context_files = create_context_files()

        with pytest.raises(ValueError, match="No user message found"):
            construct_message_history(
                system_prompt=system_prompt,
                custom_agent_prompt=None,
                simple_chat_history=simple_chat_history,
                reminder_message=None,
                context_files=context_files,
                available_tokens=1000,
            )

    def test_not_enough_tokens_for_required_elements(self) -> None:
        """Test error when there aren't enough tokens for required elements."""
        system_prompt = create_message("System", MessageType.SYSTEM, 50)
        user_msg = create_message("Message", MessageType.USER, 50)
        custom_agent = create_message("Custom", MessageType.USER, 50)

        simple_chat_history = [user_msg]
        context_files = create_context_files(num_files=1, tokens_per_file=100)

        # Total required: 50 (system) + 50 (custom) + 100 (project) + 50 (user) = 250
        # But only 200 available
        with pytest.raises(ValueError, match="Not enough tokens"):
            construct_message_history(
                system_prompt=system_prompt,
                custom_agent_prompt=custom_agent,
                simple_chat_history=simple_chat_history,
                reminder_message=None,
                context_files=context_files,
                available_tokens=200,
            )

    def test_not_enough_tokens_for_last_user_and_messages_after(self) -> None:
        """Test error when last user message and messages after don't fit."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First", MessageType.USER, 10)
        user_msg2 = create_message("Second", MessageType.USER, 30)
        assistant_with_tool = create_assistant_with_tool_call("tc_1", "tool", 30)

        simple_chat_history = [user_msg1, user_msg2, assistant_with_tool]
        context_files = create_context_files()

        # Budget: 50 tokens
        # Required: 10 (system) + 30 (user2) + 30 (assistant_with_tool) = 70 tokens
        # After subtracting system: 40 tokens available, but need 60 for user2 + assistant_with_tool
        with pytest.raises(
            ValueError, match="Not enough tokens to include the last user message"
        ):
            construct_message_history(
                system_prompt=system_prompt,
                custom_agent_prompt=None,
                simple_chat_history=simple_chat_history,
                reminder_message=None,
                context_files=context_files,
                available_tokens=50,
            )

    def test_complex_scenario_all_elements(self) -> None:
        """Test a complex scenario with all elements combined."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg1 = create_message("First", MessageType.USER, 10)
        assistant_msg1 = create_message("Response 1", MessageType.ASSISTANT, 10)
        user_msg2 = create_message("Second", MessageType.USER, 10)
        assistant_msg2 = create_message("Response 2", MessageType.ASSISTANT, 10)
        user_msg3 = create_message("Third", MessageType.USER, 10)
        assistant_with_tool = create_assistant_with_tool_call("tc_1", "search", 10)
        tool_response = create_tool_response("tc_1", "Results", 10)
        custom_agent = create_message("Custom instructions", MessageType.USER, 15)
        reminder = create_message("Cite sources", MessageType.USER, 10)

        simple_chat_history = [
            user_msg1,
            assistant_msg1,
            user_msg2,
            assistant_msg2,
            user_msg3,
            assistant_with_tool,
            tool_response,
        ]
        context_files = create_context_files(num_files=2, tokens_per_file=20)

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=custom_agent,
            simple_chat_history=simple_chat_history,
            reminder_message=reminder,
            context_files=context_files,
            available_tokens=1000,
        )

        # Expected order:
        # system, user1, assistant1, user2, assistant2,
        # custom_agent, context_files, user3, assistant_with_tool, tool_response, reminder
        assert len(result) == 11
        assert result[0] == system_prompt
        assert result[1] == user_msg1
        assert result[2] == assistant_msg1
        assert result[3] == user_msg2
        assert result[4] == assistant_msg2
        assert result[5] == custom_agent  # Before last user
        assert (
            result[6].message_type == MessageType.USER
        )  # Project files before last user
        assert "documents" in result[6].message
        assert result[7] == user_msg3  # Last user message
        assert result[8] == assistant_with_tool  # After last user
        assert result[9] == tool_response  # After last user
        assert result[10] == reminder  # At the very end

    def test_context_files_json_format(self) -> None:
        """Test that project files are formatted correctly as JSON."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg = create_message("Hello", MessageType.USER, 5)

        simple_chat_history = [user_msg]
        context_files = create_context_files(num_files=2, tokens_per_file=50)

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=context_files,
            available_tokens=1000,
        )

        # Find the project files message
        project_message = result[1]  # Should be between system and user

        # Verify it's formatted as JSON
        assert "Here are some documents provided for context" in project_message.message
        assert '"documents"' in project_message.message
        assert '"document": 1' in project_message.message
        assert '"document": 2' in project_message.message
        assert '"contents"' in project_message.message
        assert "Project file 0 content" in project_message.message
        assert "Project file 1 content" in project_message.message

    def test_file_metadata_for_tool_produces_message(self) -> None:
        """When context_files has file_metadata_for_tool, a metadata listing
        message should be injected into the history."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg = create_message("Analyze the spreadsheet", MessageType.USER, 5)

        context_files = ExtractedContextFiles(
            file_texts=[],
            image_files=[],
            use_as_search_filter=False,
            total_token_count=0,
            file_metadata=[],
            uncapped_token_count=0,
            file_metadata_for_tool=[
                FileToolMetadata(
                    file_id="xlsx-1",
                    filename="report.xlsx",
                    approx_char_count=100000,
                ),
            ],
        )

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=[user_msg],
            reminder_message=None,
            context_files=context_files,
            available_tokens=1000,
            token_counter=_simple_token_counter,
        )

        # Should have: system, tool_metadata_message, user
        assert len(result) == 3
        metadata_msg = result[1]
        assert metadata_msg.message_type == MessageType.USER
        assert "report.xlsx" in metadata_msg.message
        assert "xlsx-1" in metadata_msg.message

    def test_metadata_only_and_text_files_both_present(self) -> None:
        """When both text content and tool metadata are present, both messages
        should appear in the history."""
        system_prompt = create_message("System", MessageType.SYSTEM, 10)
        user_msg = create_message("Summarize everything", MessageType.USER, 5)

        context_files = ExtractedContextFiles(
            file_texts=["Text file content here"],
            image_files=[],
            use_as_search_filter=False,
            total_token_count=100,
            file_metadata=[
                ContextFileMetadata(
                    file_id="txt-1",
                    filename="notes.txt",
                    file_content="Text file content here",
                ),
            ],
            uncapped_token_count=100,
            file_metadata_for_tool=[
                FileToolMetadata(
                    file_id="xlsx-1",
                    filename="data.xlsx",
                    approx_char_count=50000,
                ),
            ],
        )

        result = construct_message_history(
            system_prompt=system_prompt,
            custom_agent_prompt=None,
            simple_chat_history=[user_msg],
            reminder_message=None,
            context_files=context_files,
            available_tokens=2000,
            token_counter=_simple_token_counter,
        )

        # Should have: system, context_files_message, tool_metadata_message, user
        assert len(result) == 4
        # Context files message (text content)
        assert "documents" in result[1].message
        assert "Text file content here" in result[1].message
        # Tool metadata message
        assert "data.xlsx" in result[2].message
        assert result[3] == user_msg


def _simple_token_counter(text: str) -> int:
    """Approximate token counter for tests (~4 chars per token)."""
    return max(1, len(text) // 4)


def _make_file_metadata(
    file_id: str, filename: str, approx_chars: int = 50_000
) -> FileToolMetadata:
    return FileToolMetadata(
        file_id=file_id, filename=filename, approx_char_count=approx_chars
    )


class TestForgottenFileMetadata:
    """Tests for the forgotten-files mechanism in construct_message_history.

    These cover the scenario where a user attaches a large file to a chat
    message. On the first turn the file content message is in the context
    window. On subsequent turns, it may be truncated by either:
      a) context-window budget limits, or
      b) summary-based truncation removing the message before
         convert_chat_history ever runs — leaving an "orphaned" metadata
         entry with no corresponding file_id-tagged ChatMessageSimple.

    The forgotten-files mechanism must detect both cases and inject a
    lightweight metadata message so the LLM knows to use read_file.
    """

    def _build(
        self,
        simple_chat_history: list[ChatMessageSimple],
        available_tokens: int = 10_000,
        all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,
    ) -> list[ChatMessageSimple]:
        """Shorthand wrapper around construct_message_history."""
        return construct_message_history(
            system_prompt=create_message("system", MessageType.SYSTEM, 5),
            custom_agent_prompt=None,
            simple_chat_history=simple_chat_history,
            reminder_message=None,
            context_files=create_context_files(),
            available_tokens=available_tokens,
            token_counter=_simple_token_counter,
            all_injected_file_metadata=all_injected_file_metadata,
        )

    @staticmethod
    def _find_forgotten_message(
        result: list[ChatMessageSimple],
    ) -> ChatMessageSimple | None:
        """Find the forgotten-files metadata message in the result, if any."""
        for msg in result:
            if "Use the read_file tool" in msg.message:
                return msg
        return None

    # ------------------------------------------------------------------
    # Case 1: file message is still in context — no forgotten-files needed
    # ------------------------------------------------------------------

    def test_file_message_present_no_forgotten_metadata(self) -> None:
        """When the file message fits in context, no forgotten-file message
        should be injected.
        """
        file_meta = _make_file_metadata("file-abc", "moby_dick.txt")
        file_msg = create_message("Contents of moby dick...", MessageType.USER, 50)
        file_msg.file_id = "file-abc"

        history = [
            file_msg,
            create_message("Summarize this", MessageType.ASSISTANT, 20),
            create_message("What's chapter 1?", MessageType.USER, 10),
        ]
        result = self._build(
            history,
            available_tokens=10_000,
            all_injected_file_metadata={"file-abc": file_meta},
        )

        forgotten = self._find_forgotten_message(result)
        assert (
            forgotten is None
        ), "Should not inject forgotten-files when file is in context"
        # The file message itself should still be present
        assert any(m.file_id == "file-abc" for m in result)

    # ------------------------------------------------------------------
    # Case 2: file message dropped by context-window truncation
    # ------------------------------------------------------------------

    def test_file_message_dropped_by_truncation_gets_forgotten_metadata(self) -> None:
        """When the context budget is too tight and the file message gets
        truncated, a forgotten-files metadata message must appear.
        """
        file_meta = _make_file_metadata("file-abc", "moby_dick.txt")
        file_msg = create_message("x" * 2000, MessageType.USER, 500)
        file_msg.file_id = "file-abc"

        history = [
            file_msg,
            create_message("Got it", MessageType.ASSISTANT, 10),
            create_message("Tell me about ch1", MessageType.USER, 10),
        ]

        # Budget is just enough for the system prompt + last messages but
        # NOT the 500-token file message.
        result = self._build(
            history,
            available_tokens=100,
            all_injected_file_metadata={"file-abc": file_meta},
        )

        forgotten = self._find_forgotten_message(result)
        assert forgotten is not None, "Forgotten-files message should be injected"
        assert "moby_dick.txt" in forgotten.message
        assert "file-abc" in forgotten.message

        # The original file message should NOT be in context
        assert not any(
            getattr(m, "file_id", None) == "file-abc"
            and m.message_type == MessageType.USER
            for m in result
            if m is not forgotten
        )

    # ------------------------------------------------------------------
    # Case 3: file message removed by summary truncation ("orphaned" metadata)
    # ------------------------------------------------------------------

    def test_orphaned_metadata_triggers_forgotten_files(self) -> None:
        """Simulates the scenario where summary truncation in process_message
        removed the file's original message BEFORE convert_chat_history ran,
        so no ChatMessageSimple has the file_id tag. The metadata is still
        passed via all_injected_file_metadata and must be treated as dropped.
        """
        file_meta = _make_file_metadata("file-abc", "moby_dick.txt")

        # History has no file_id-tagged message — it was already removed by
        # summary truncation. Only later conversation remains.
        history = [
            create_message("Summary of earlier convo", MessageType.ASSISTANT, 20),
            create_message("Now tell me about chapter 2", MessageType.USER, 10),
        ]

        result = self._build(
            history,
            available_tokens=10_000,
            all_injected_file_metadata={"file-abc": file_meta},
        )

        forgotten = self._find_forgotten_message(result)
        assert (
            forgotten is not None
        ), "Orphaned file metadata should trigger forgotten-files message"
        assert "moby_dick.txt" in forgotten.message
        assert "file-abc" in forgotten.message

    # ------------------------------------------------------------------
    # Case 4: multiple files — one survives, one is dropped
    # ------------------------------------------------------------------

    def test_mixed_files_only_dropped_ones_appear_in_forgotten(self) -> None:
        """When two files exist but only one's message is truncated, only the
        truncated file should appear in the forgotten-files metadata.
        """
        meta_a = _make_file_metadata("file-a", "big_file.txt")
        meta_b = _make_file_metadata("file-b", "small_file.txt")

        # file-a has a huge message that will be dropped, file-b fits
        file_msg_a = create_message("x" * 2000, MessageType.USER, 500)
        file_msg_a.file_id = "file-a"
        file_msg_b = create_message("small content", MessageType.USER, 5)
        file_msg_b.file_id = "file-b"

        history = [
            file_msg_a,
            create_message("ok", MessageType.ASSISTANT, 3),
            file_msg_b,
            create_message("ok", MessageType.ASSISTANT, 3),
            create_message("Compare the two files", MessageType.USER, 10),
        ]

        # Tight budget: system(5) + last-user(10) = 15 min. Give ~50 so
        # file_msg_b(5)+assistant(3)+assistant(3) fit but file_msg_a(500) won't.
        result = self._build(
            history,
            available_tokens=80,
            all_injected_file_metadata={"file-a": meta_a, "file-b": meta_b},
        )

        forgotten = self._find_forgotten_message(result)
        assert forgotten is not None
        assert "big_file.txt" in forgotten.message
        assert "file-a" in forgotten.message
        # file-b should NOT be in the forgotten message — it's still in context
        assert "small_file.txt" not in forgotten.message

    # ------------------------------------------------------------------
    # Case 5: no metadata dict → no forgotten-files message even if dropped
    # ------------------------------------------------------------------

    def test_no_metadata_dict_means_no_forgotten_message(self) -> None:
        """If all_injected_file_metadata is None (FileReaderTool not enabled),
        no forgotten-files message should be emitted even if file messages
        are dropped by truncation.
        """
        file_msg = create_message("x" * 2000, MessageType.USER, 500)
        file_msg.file_id = "file-abc"

        history = [
            file_msg,
            create_message("Got it", MessageType.ASSISTANT, 10),
            create_message("Tell me more", MessageType.USER, 10),
        ]

        result = self._build(
            history,
            available_tokens=100,
            all_injected_file_metadata=None,
        )

        forgotten = self._find_forgotten_message(result)
        assert (
            forgotten is None
        ), "No forgotten-files message when metadata dict is None"

    # ------------------------------------------------------------------
    # Case 6: orphaned metadata with multiple files, all summarized away
    # ------------------------------------------------------------------

    def test_multiple_orphaned_files_all_appear_in_forgotten(self) -> None:
        """All files from summarized-away messages should be listed in the
        forgotten-files message.
        """
        meta_a = _make_file_metadata("file-a", "report.pdf")
        meta_b = _make_file_metadata("file-b", "data.csv")

        # Both original messages were removed by summary truncation;
        # only post-summary messages remain.
        history = [
            create_message("Earlier discussion summarized", MessageType.ASSISTANT, 15),
            create_message("What patterns do you see?", MessageType.USER, 10),
        ]

        result = self._build(
            history,
            available_tokens=10_000,
            all_injected_file_metadata={"file-a": meta_a, "file-b": meta_b},
        )

        forgotten = self._find_forgotten_message(result)
        assert forgotten is not None
        assert "report.pdf" in forgotten.message
        assert "data.csv" in forgotten.message

    # ------------------------------------------------------------------
    # Case 7: file metadata persists across many turns after truncation
    # ------------------------------------------------------------------

    def test_forgotten_metadata_persists_across_many_turns(self) -> None:
        """Simulates the real bug: after the file's original message is
        summarized away, every subsequent turn should still include the
        forgotten-files metadata — not just the first turn after truncation.
        """
        file_meta = _make_file_metadata("file-abc", "moby_dick.txt")

        # Build several turns AFTER the file was already summarized away.
        # Each turn, construct_message_history is called fresh with the
        # same all_injected_file_metadata.
        for turn in range(5):
            messages = [
                create_message("Summary", MessageType.ASSISTANT, 15),
            ]
            # Add some back-and-forth after the summary
            for i in range(turn):
                messages.append(create_message(f"Question {i}", MessageType.USER, 5))
                messages.append(create_message(f"Answer {i}", MessageType.ASSISTANT, 5))
            messages.append(
                create_message(f"Latest question (turn {turn})", MessageType.USER, 5)
            )

            result = self._build(
                messages,
                available_tokens=10_000,
                all_injected_file_metadata={"file-abc": file_meta},
            )

            forgotten = self._find_forgotten_message(result)
            assert (
                forgotten is not None
            ), f"Turn {turn}: forgotten-files message must persist every turn"
            assert "moby_dick.txt" in forgotten.message


class TestFallbackToolExtraction:
    def _tool_defs(self) -> list[dict]:
        return [
            {
                "type": "function",
                "function": {
                    "name": "internal_search",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "queries": {
                                "type": "array",
                                "items": {"type": "string"},
                            }
                        },
                        "required": ["queries"],
                    },
                },
            }
        ]

    def test_noop_if_fallback_was_already_attempted(self) -> None:
        llm_step_result = LlmStepResult(
            reasoning=None,
            answer='{"name":"internal_search","arguments":{"queries":["alpha"]}}',
            tool_calls=None,
        )

        result, attempted = _try_fallback_tool_extraction(
            llm_step_result=llm_step_result,
            tool_choice=ToolChoiceOptions.REQUIRED,
            fallback_extraction_attempted=True,
            tool_defs=self._tool_defs(),
            turn_index=0,
        )

        assert result is llm_step_result
        assert attempted is False

    def test_extracts_from_answer_when_required_and_no_tool_calls(self) -> None:
        llm_step_result = LlmStepResult(
            reasoning=None,
            answer='{"name":"internal_search","arguments":{"queries":["alpha"]}}',
            tool_calls=None,
        )

        result, attempted = _try_fallback_tool_extraction(
            llm_step_result=llm_step_result,
            tool_choice=ToolChoiceOptions.REQUIRED,
            fallback_extraction_attempted=False,
            tool_defs=self._tool_defs(),
            turn_index=3,
        )

        assert attempted is True
        assert result.tool_calls is not None
        assert len(result.tool_calls) == 1
        assert result.tool_calls[0].tool_name == "internal_search"
        assert result.tool_calls[0].tool_args == {"queries": ["alpha"]}
        assert result.tool_calls[0].placement == Placement(turn_index=3)

    def test_falls_back_to_reasoning_when_answer_has_no_tool_calls(self) -> None:
        llm_step_result = LlmStepResult(
            reasoning='{"name":"internal_search","arguments":{"queries":["beta"]}}',
            answer="I should search first.",
            tool_calls=None,
        )

        result, attempted = _try_fallback_tool_extraction(
            llm_step_result=llm_step_result,
            tool_choice=ToolChoiceOptions.REQUIRED,
            fallback_extraction_attempted=False,
            tool_defs=self._tool_defs(),
            turn_index=5,
        )

        assert attempted is True
        assert result.tool_calls is not None
        assert len(result.tool_calls) == 1
        assert result.tool_calls[0].tool_name == "internal_search"
        assert result.tool_calls[0].tool_args == {"queries": ["beta"]}
        assert result.tool_calls[0].placement == Placement(turn_index=5)

    def test_extracts_xml_style_invoke_from_answer_when_required(self) -> None:
        llm_step_result = LlmStepResult(
            reasoning=None,
            answer=(
                '<function_calls><invoke name="internal_search">'
                '<parameter name="queries" string="false">'
                '["Onyx documentation", "Onyx docs", "Onyx platform"]'
                "</parameter></invoke></function_calls>"
            ),
            tool_calls=None,
        )

        result, attempted = _try_fallback_tool_extraction(
            llm_step_result=llm_step_result,
            tool_choice=ToolChoiceOptions.REQUIRED,
            fallback_extraction_attempted=False,
            tool_defs=self._tool_defs(),
            turn_index=7,
        )

        assert attempted is True
        assert result.tool_calls is not None
        assert len(result.tool_calls) == 1
        assert result.tool_calls[0].tool_name == "internal_search"
        assert result.tool_calls[0].tool_args == {
            "queries": ["Onyx documentation", "Onyx docs", "Onyx platform"]
        }
        assert result.tool_calls[0].placement == Placement(turn_index=7)

    def test_extracts_xml_style_invoke_from_answer_when_auto(self) -> None:
        llm_step_result = LlmStepResult(
            reasoning=None,
            # Runtime-faithful shape: filtered answer is empty, raw answer has XML payload.
            answer=None,
            raw_answer=(
                '<function_calls><invoke name="internal_search">'
                '<parameter name="queries" string="false">'
                '["Onyx documentation", "Onyx docs", "Onyx internal docs"]'
                "</parameter></invoke></function_calls>"
            ),
            tool_calls=None,
        )

        result, attempted = _try_fallback_tool_extraction(
            llm_step_result=llm_step_result,
            tool_choice=ToolChoiceOptions.AUTO,
            fallback_extraction_attempted=False,
            tool_defs=self._tool_defs(),
            turn_index=9,
        )

        assert attempted is True
        assert result.tool_calls is not None
        assert len(result.tool_calls) == 1
        assert result.tool_calls[0].tool_name == "internal_search"
        assert result.tool_calls[0].tool_args == {
            "queries": ["Onyx documentation", "Onyx docs", "Onyx internal docs"]
        }
        assert result.tool_calls[0].placement == Placement(turn_index=9)

    def test_extracts_from_raw_answer_when_filtered_answer_has_no_xml(self) -> None:
        llm_step_result = LlmStepResult(
            reasoning=None,
            answer="",
            raw_answer=(
                '<function_calls><invoke name="internal_search">'
                '<parameter name="queries" string="false">'
                '["Onyx documentation", "Onyx docs"]'
                "</parameter></invoke></function_calls>"
            ),
            tool_calls=None,
        )

        result, attempted = _try_fallback_tool_extraction(
            llm_step_result=llm_step_result,
            tool_choice=ToolChoiceOptions.AUTO,
            fallback_extraction_attempted=False,
            tool_defs=self._tool_defs(),
            turn_index=10,
        )

        assert attempted is True
        assert result.tool_calls is not None
        assert len(result.tool_calls) == 1
        assert result.tool_calls[0].tool_name == "internal_search"
        assert result.tool_calls[0].tool_args == {
            "queries": ["Onyx documentation", "Onyx docs"]
        }
        assert result.tool_calls[0].placement == Placement(turn_index=10)

    def test_does_not_attempt_fallback_for_auto_without_tool_call_hints(self) -> None:
        llm_step_result = LlmStepResult(
            reasoning=None,
            answer="Here is a normal answer with no tool call payload.",
            tool_calls=None,
        )

        result, attempted = _try_fallback_tool_extraction(
            llm_step_result=llm_step_result,
            tool_choice=ToolChoiceOptions.AUTO,
            fallback_extraction_attempted=False,
            tool_defs=self._tool_defs(),
            turn_index=2,
        )

        assert result is llm_step_result
        assert attempted is False

    def test_returns_unchanged_when_required_but_nothing_extractable(self) -> None:
        llm_step_result = LlmStepResult(
            reasoning="Need more info.",
            answer="Let me think.",
            tool_calls=None,
        )

        result, attempted = _try_fallback_tool_extraction(
            llm_step_result=llm_step_result,
            tool_choice=ToolChoiceOptions.REQUIRED,
            fallback_extraction_attempted=False,
            tool_defs=self._tool_defs(),
            turn_index=1,
        )

        assert result is llm_step_result
        assert attempted is True
        assert result.tool_calls is None

    def test_noop_when_tool_calls_already_present(self) -> None:
        existing_call = ToolCallKickoff(
            tool_call_id="call_existing",
            tool_name="internal_search",
            tool_args={"queries": ["already-set"]},
            placement=Placement(turn_index=0),
        )
        llm_step_result = LlmStepResult(
            reasoning=None,
            answer='{"name":"internal_search","arguments":{"queries":["alpha"]}}',
            tool_calls=[existing_call],
        )

        result, attempted = _try_fallback_tool_extraction(
            llm_step_result=llm_step_result,
            tool_choice=ToolChoiceOptions.REQUIRED,
            fallback_extraction_attempted=False,
            tool_defs=self._tool_defs(),
            turn_index=0,
        )

        assert result is llm_step_result
        assert attempted is False


class TestEmptyLlmResponseClassification:
    def _make_llm(self, provider: str = "openai", model: str = "gpt-5.2") -> Mock:
        llm = Mock()
        llm.config = LLMConfig(
            model_provider=provider,
            model_name=model,
            temperature=0.0,
            max_input_tokens=4096,
        )
        return llm

    def test_openai_empty_stream_is_classified_as_budget_exceeded(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        monkeypatch.setattr("onyx.chat.llm_loop.is_true_openai_model", lambda *_: True)

        err = _build_empty_llm_response_error(
            llm=self._make_llm(),
            llm_step_result=LlmStepResult(
                reasoning=None,
                answer=None,
                tool_calls=None,
                raw_answer=None,
            ),
            tool_choice=ToolChoiceOptions.AUTO,
        )

        assert isinstance(err, EmptyLLMResponseError)
        assert err.error_code == "BUDGET_EXCEEDED"
        assert err.is_retryable is False
        assert "quota" in err.client_error_msg.lower()

    def test_reasoning_only_response_uses_generic_empty_response_error(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        monkeypatch.setattr("onyx.chat.llm_loop.is_true_openai_model", lambda *_: True)

        err = _build_empty_llm_response_error(
            llm=self._make_llm(),
            llm_step_result=LlmStepResult(
                reasoning="scratchpad only",
                answer=None,
                tool_calls=None,
                raw_answer=None,
            ),
            tool_choice=ToolChoiceOptions.AUTO,
        )

        assert isinstance(err, EmptyLLMResponseError)
        assert err.error_code == "EMPTY_LLM_RESPONSE"
        assert err.is_retryable is True
        assert "quota" not in err.client_error_msg.lower()


================================================
FILE: backend/tests/unit/onyx/chat/test_llm_step.py
================================================
"""Tests for llm_step.py, specifically sanitization and argument parsing."""

from typing import Any

import pytest

from onyx.chat.llm_step import _extract_tool_call_kickoffs
from onyx.chat.llm_step import _increment_turns
from onyx.chat.llm_step import _parse_tool_args_to_dict
from onyx.chat.llm_step import _resolve_tool_arguments
from onyx.chat.llm_step import _XmlToolCallContentFilter
from onyx.chat.llm_step import extract_tool_calls_from_response_text
from onyx.chat.llm_step import translate_history_to_llm_format
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ToolCallSimple
from onyx.configs.constants import MessageType
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLMConfig
from onyx.llm.models import AssistantMessage
from onyx.llm.models import ToolMessage
from onyx.llm.models import UserMessage
from onyx.server.query_and_chat.placement import Placement
from onyx.utils.postgres_sanitization import sanitize_string


class TestSanitizeLlmOutput:
    """Tests for the sanitize_string function."""

    def test_removes_null_bytes(self) -> None:
        """Test that NULL bytes are removed from strings."""
        assert sanitize_string("hello\x00world") == "helloworld"
        assert sanitize_string("\x00start") == "start"
        assert sanitize_string("end\x00") == "end"
        assert sanitize_string("\x00\x00\x00") == ""

    def test_removes_surrogates(self) -> None:
        """Test that UTF-16 surrogates are removed from strings."""
        # Low surrogate
        assert sanitize_string("hello\ud800world") == "helloworld"
        # High surrogate
        assert sanitize_string("hello\udfffworld") == "helloworld"
        # Middle of surrogate range
        assert sanitize_string("test\uda00value") == "testvalue"

    def test_removes_mixed_bad_characters(self) -> None:
        """Test removal of both NULL bytes and surrogates together."""
        assert sanitize_string("a\x00b\ud800c\udfffd") == "abcd"

    def test_preserves_valid_unicode(self) -> None:
        """Test that valid Unicode characters are preserved."""
        # Emojis
        assert sanitize_string("hello 👋 world") == "hello 👋 world"
        # Chinese characters
        assert sanitize_string("你好世界") == "你好世界"
        # Mixed scripts
        assert sanitize_string("Hello мир 世界") == "Hello мир 世界"

    def test_empty_string(self) -> None:
        """Test that empty strings are handled correctly."""
        assert sanitize_string("") == ""

    def test_normal_ascii(self) -> None:
        """Test that normal ASCII strings pass through unchanged."""
        assert sanitize_string("hello world") == "hello world"
        assert sanitize_string('{"key": "value"}') == '{"key": "value"}'


class TestParseToolArgsToDict:
    """Tests for the _parse_tool_args_to_dict function."""

    def test_none_input(self) -> None:
        """Test that None returns empty dict."""
        assert _parse_tool_args_to_dict(None) == {}

    def test_dict_input(self) -> None:
        """Test that dict input is returned with parsed JSON string values."""
        result = _parse_tool_args_to_dict({"key": "value", "num": 42})
        assert result == {"key": "value", "num": 42}

    def test_dict_with_json_string_values(self) -> None:
        """Test that JSON string values in dict are parsed."""
        result = _parse_tool_args_to_dict({"queries": '["q1", "q2"]'})
        assert result == {"queries": ["q1", "q2"]}

    def test_json_string_input(self) -> None:
        """Test that JSON string is parsed to dict."""
        result = _parse_tool_args_to_dict('{"key": "value"}')
        assert result == {"key": "value"}

    def test_double_encoded_json(self) -> None:
        """Test that double-encoded JSON string is parsed correctly."""
        # This is: '"{\\"key\\": \\"value\\"}"'
        double_encoded = '"\\"{\\\\\\"key\\\\\\": \\\\\\"value\\\\\\"}\\"'
        # Actually let's use a simpler approach
        import json

        inner = {"key": "value"}
        single_encoded = json.dumps(inner)  # '{"key": "value"}'
        double_encoded = json.dumps(single_encoded)  # '"{\\"key\\": \\"value\\"}"'
        result = _parse_tool_args_to_dict(double_encoded)
        assert result == {"key": "value"}

    def test_invalid_json_returns_empty_dict(self) -> None:
        """Test that invalid JSON returns empty dict."""
        assert _parse_tool_args_to_dict("not json") == {}
        assert _parse_tool_args_to_dict("{invalid}") == {}

    def test_non_dict_json_returns_empty_dict(self) -> None:
        """Test that non-dict JSON (like arrays) returns empty dict."""
        assert _parse_tool_args_to_dict("[1, 2, 3]") == {}
        assert _parse_tool_args_to_dict('"just a string"') == {}

    def test_non_string_non_dict_returns_empty_dict(self) -> None:
        """Test that non-string, non-dict types return empty dict."""
        assert _parse_tool_args_to_dict(123) == {}
        assert _parse_tool_args_to_dict(["list"]) == {}

    # Sanitization tests

    def test_dict_input_sanitizes_null_bytes(self) -> None:
        """Test that NULL bytes in dict values are sanitized."""
        result = _parse_tool_args_to_dict({"query": "hello\x00world"})
        assert result == {"query": "helloworld"}

    def test_dict_input_sanitizes_surrogates(self) -> None:
        """Test that surrogates in dict values are sanitized."""
        result = _parse_tool_args_to_dict({"query": "hello\ud800world"})
        assert result == {"query": "helloworld"}

    def test_json_string_sanitizes_null_bytes(self) -> None:
        """Test that NULL bytes in JSON string are sanitized before parsing."""
        # JSON with NULL byte in value
        json_str = '{"query": "hello\x00world"}'
        result = _parse_tool_args_to_dict(json_str)
        assert result == {"query": "helloworld"}

    def test_json_string_sanitizes_surrogates(self) -> None:
        """Test that surrogates in JSON string are sanitized before parsing."""
        json_str = '{"query": "hello\ud800world"}'
        result = _parse_tool_args_to_dict(json_str)
        assert result == {"query": "helloworld"}

    def test_nested_dict_values_sanitized(self) -> None:
        """Test that nested JSON string values are also sanitized."""
        # Dict with a JSON string value that contains bad characters
        result = _parse_tool_args_to_dict({"queries": '["q1\x00", "q2\ud800"]'})
        assert result == {"queries": ["q1", "q2"]}

    def test_preserves_valid_unicode_in_dict(self) -> None:
        """Test that valid Unicode is preserved in dict values."""
        result = _parse_tool_args_to_dict({"query": "hello 👋 世界"})
        assert result == {"query": "hello 👋 世界"}

    def test_preserves_valid_unicode_in_json(self) -> None:
        """Test that valid Unicode is preserved in JSON string."""
        json_str = '{"query": "hello 👋 世界"}'
        result = _parse_tool_args_to_dict(json_str)
        assert result == {"query": "hello 👋 世界"}


class TestExtractToolCallsFromResponseText:
    def _tool_defs(self) -> list[dict]:
        return [
            {
                "type": "function",
                "function": {
                    "name": "internal_search",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "queries": {
                                "type": "array",
                                "items": {"type": "string"},
                            }
                        },
                        "required": ["queries"],
                    },
                },
            }
        ]

    def _placement(self) -> Placement:
        return Placement(turn_index=0, tab_index=0, sub_turn_index=None)

    def test_collapses_nested_arguments_duplicate(self) -> None:
        response_text = '{"name":"internal_search","arguments":{"queries":["alpha"]}}'
        tool_calls = extract_tool_calls_from_response_text(
            response_text=response_text,
            tool_definitions=self._tool_defs(),
            placement=self._placement(),
        )
        assert len(tool_calls) == 1
        assert tool_calls[0].tool_name == "internal_search"
        assert tool_calls[0].tool_args == {"queries": ["alpha"]}

    def test_keeps_non_duplicated_sequence(self) -> None:
        response_text = "\n".join(
            [
                '{"name":"internal_search","arguments":{"queries":["alpha"]}}',
                '{"name":"internal_search","arguments":{"queries":["beta"]}}',
            ]
        )
        tool_calls = extract_tool_calls_from_response_text(
            response_text=response_text,
            tool_definitions=self._tool_defs(),
            placement=self._placement(),
        )
        assert len(tool_calls) == 2
        assert [call.tool_args for call in tool_calls] == [
            {"queries": ["alpha"]},
            {"queries": ["beta"]},
        ]

    def test_keeps_intentional_duplicate_tool_calls(self) -> None:
        response_text = "\n".join(
            [
                '{"name":"internal_search","arguments":{"queries":["alpha"]}}',
                '{"name":"internal_search","arguments":{"queries":["alpha"]}}',
            ]
        )
        tool_calls = extract_tool_calls_from_response_text(
            response_text=response_text,
            tool_definitions=self._tool_defs(),
            placement=self._placement(),
        )
        assert len(tool_calls) == 2
        assert [call.tool_args for call in tool_calls] == [
            {"queries": ["alpha"]},
            {"queries": ["alpha"]},
        ]

    def test_extracts_xml_style_invoke_tool_call(self) -> None:
        response_text = """
<function_calls>
<invoke name="internal_search">
<parameter name="queries" string="false">["Onyx documentation", "Onyx docs", "Onyx platform"]</parameter>
</invoke>
</function_calls>
"""
        tool_calls = extract_tool_calls_from_response_text(
            response_text=response_text,
            tool_definitions=self._tool_defs(),
            placement=self._placement(),
        )
        assert len(tool_calls) == 1
        assert tool_calls[0].tool_name == "internal_search"
        assert tool_calls[0].tool_args == {
            "queries": ["Onyx documentation", "Onyx docs", "Onyx platform"]
        }

    def test_ignores_unknown_tool_in_xml_style_invoke(self) -> None:
        response_text = """
<function_calls>
<invoke name="unknown_tool">
<parameter name="queries" string="false">["Onyx docs"]</parameter>
</invoke>
</function_calls>
"""
        tool_calls = extract_tool_calls_from_response_text(
            response_text=response_text,
            tool_definitions=self._tool_defs(),
            placement=self._placement(),
        )
        assert len(tool_calls) == 0


class TestExtractToolCallKickoffs:
    """Tests for the _extract_tool_call_kickoffs function."""

    def test_valid_tool_call(self) -> None:
        tool_call_map = {
            0: {
                "id": "call_123",
                "name": "internal_search",
                "arguments": '{"queries": ["test"]}',
            }
        }
        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)
        assert len(result) == 1
        assert result[0].tool_name == "internal_search"
        assert result[0].tool_args == {"queries": ["test"]}

    def test_invalid_json_arguments_returns_empty_dict(self) -> None:
        """Verify that malformed JSON arguments produce an empty dict
        rather than raising an exception. This confirms the dead try/except
        around _parse_tool_args_to_dict was safe to remove."""
        tool_call_map = {
            0: {
                "id": "call_bad",
                "name": "internal_search",
                "arguments": "not valid json {{{",
            }
        }
        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)
        assert len(result) == 1
        assert result[0].tool_args == {}

    def test_none_arguments_returns_empty_dict(self) -> None:
        tool_call_map = {
            0: {
                "id": "call_none",
                "name": "internal_search",
                "arguments": None,
            }
        }
        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)
        assert len(result) == 1
        assert result[0].tool_args == {}

    def test_skips_entries_missing_id_or_name(self) -> None:
        tool_call_map: dict[int, dict[str, Any]] = {
            0: {"id": None, "name": "internal_search", "arguments": "{}"},
            1: {"id": "call_1", "name": None, "arguments": "{}"},
            2: {"id": "call_2", "name": "internal_search", "arguments": "{}"},
        }
        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)
        assert len(result) == 1
        assert result[0].tool_call_id == "call_2"

    def test_tab_index_auto_increments(self) -> None:
        tool_call_map = {
            0: {"id": "c1", "name": "tool_a", "arguments": "{}"},
            1: {"id": "c2", "name": "tool_b", "arguments": "{}"},
        }
        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)
        assert result[0].placement.tab_index == 0
        assert result[1].placement.tab_index == 1

    def test_tab_index_override(self) -> None:
        tool_call_map = {
            0: {"id": "c1", "name": "tool_a", "arguments": "{}"},
            1: {"id": "c2", "name": "tool_b", "arguments": "{}"},
        }
        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0, tab_index=5)
        assert result[0].placement.tab_index == 5
        assert result[1].placement.tab_index == 5


class TestXmlToolCallContentFilter:
    def test_strips_function_calls_block_single_chunk(self) -> None:
        f = _XmlToolCallContentFilter()
        output = f.process(
            "prefix "
            '<function_calls><invoke name="internal_search">'
            '<parameter name="queries" string="false">["Onyx docs"]</parameter>'
            "</invoke></function_calls> suffix"
        )
        output += f.flush()
        assert output == "prefix  suffix"

    def test_strips_function_calls_block_split_across_chunks(self) -> None:
        f = _XmlToolCallContentFilter()
        chunks = [
            "Start ",
            "<function_",
            'calls><invoke name="internal_search">',
            '<parameter name="queries" string="false">["Onyx docs"]',
            "</parameter></invoke></function_calls>",
            " End",
        ]
        output = "".join(f.process(chunk) for chunk in chunks) + f.flush()
        assert output == "Start  End"

    def test_preserves_non_tool_call_xml(self) -> None:
        f = _XmlToolCallContentFilter()
        output = f.process("A <tag>value</tag> B")
        output += f.flush()
        assert output == "A <tag>value</tag> B"

    def test_does_not_strip_similar_tag_names(self) -> None:
        f = _XmlToolCallContentFilter()
        output = f.process(
            "A <function_calls_v2><invoke>noop</invoke></function_calls_v2> B"
        )
        output += f.flush()
        assert (
            output == "A <function_calls_v2><invoke>noop</invoke></function_calls_v2> B"
        )


class TestIncrementTurns:
    """Tests for the _increment_turns helper used by _close_reasoning_if_active."""

    def test_increments_turn_index_when_no_sub_turn(self) -> None:
        turn, sub = _increment_turns(0, None)
        assert turn == 1
        assert sub is None

    def test_increments_sub_turn_when_present(self) -> None:
        turn, sub = _increment_turns(3, 0)
        assert turn == 3
        assert sub == 1

    def test_increments_sub_turn_from_nonzero(self) -> None:
        turn, sub = _increment_turns(5, 2)
        assert turn == 5
        assert sub == 3


class TestResolveToolArguments:
    """Tests for the _resolve_tool_arguments helper."""

    def test_dict_arguments(self) -> None:
        obj = {"arguments": {"queries": ["test"]}}
        assert _resolve_tool_arguments(obj) == {"queries": ["test"]}

    def test_dict_parameters(self) -> None:
        """Falls back to 'parameters' key when 'arguments' is missing."""
        obj = {"parameters": {"queries": ["test"]}}
        assert _resolve_tool_arguments(obj) == {"queries": ["test"]}

    def test_arguments_takes_precedence_over_parameters(self) -> None:
        obj = {"arguments": {"a": 1}, "parameters": {"b": 2}}
        assert _resolve_tool_arguments(obj) == {"a": 1}

    def test_json_string_arguments(self) -> None:
        obj = {"arguments": '{"queries": ["test"]}'}
        assert _resolve_tool_arguments(obj) == {"queries": ["test"]}

    def test_invalid_json_string_returns_empty_dict(self) -> None:
        obj = {"arguments": "not valid json"}
        assert _resolve_tool_arguments(obj) == {}

    def test_no_arguments_or_parameters_returns_empty_dict(self) -> None:
        obj = {"name": "some_tool"}
        assert _resolve_tool_arguments(obj) == {}

    def test_non_dict_non_string_arguments_returns_none(self) -> None:
        """When arguments resolves to a list or int, returns None."""
        assert _resolve_tool_arguments({"arguments": [1, 2, 3]}) is None
        assert _resolve_tool_arguments({"arguments": 42}) is None


class TestTranslateHistoryToLlmFormat:
    @staticmethod
    def _llm_config(provider: str) -> LLMConfig:
        return LLMConfig(
            model_provider=provider,
            model_name="test-model",
            temperature=0,
            max_input_tokens=8192,
        )

    @staticmethod
    def _tool_history() -> list[ChatMessageSimple]:
        return [
            ChatMessageSimple(
                message="",
                token_count=5,
                message_type=MessageType.ASSISTANT,
                tool_calls=[
                    ToolCallSimple(
                        tool_call_id="51381e0b0",
                        tool_name="internal_search",
                        tool_arguments={"queries": ["alpha"]},
                    )
                ],
            ),
            ChatMessageSimple(
                message="tool result body",
                token_count=5,
                message_type=MessageType.TOOL_CALL_RESPONSE,
                tool_call_id="51381e0b0",
            ),
        ]

    def test_preserves_structured_tool_history_for_non_ollama(self) -> None:
        translated = translate_history_to_llm_format(
            history=self._tool_history(),
            llm_config=self._llm_config(LlmProviderNames.OPENAI),
        )
        assert isinstance(translated, list)

        assert isinstance(translated[0], AssistantMessage)
        assert translated[0].tool_calls is not None
        assert translated[0].tool_calls[0].id == "51381e0b0"
        assert isinstance(translated[1], ToolMessage)
        assert translated[1].tool_call_id == "51381e0b0"

    def test_flattens_tool_history_for_ollama(self) -> None:
        translated = translate_history_to_llm_format(
            history=self._tool_history(),
            llm_config=self._llm_config(LlmProviderNames.OLLAMA_CHAT),
        )
        assert isinstance(translated, list)

        assert isinstance(translated[0], AssistantMessage)
        assert translated[0].tool_calls is None
        assert translated[0].content is not None
        assert "51381e0b0" in translated[0].content

        assert isinstance(translated[1], UserMessage)
        assert "51381e0b0" in translated[1].content
        assert "tool result body" in translated[1].content

    def test_flattens_multiple_assistant_tool_calls_for_ollama(self) -> None:
        history = [
            ChatMessageSimple(
                message="I will use tools now.",
                token_count=5,
                message_type=MessageType.ASSISTANT,
                tool_calls=[
                    ToolCallSimple(
                        tool_call_id="call-a",
                        tool_name="internal_search",
                        tool_arguments={"queries": ["alpha"]},
                    ),
                    ToolCallSimple(
                        tool_call_id="call-b",
                        tool_name="internal_search",
                        tool_arguments={"queries": ["beta"]},
                    ),
                ],
            )
        ]
        translated = translate_history_to_llm_format(
            history=history,
            llm_config=self._llm_config(LlmProviderNames.OLLAMA_CHAT),
        )

        assert isinstance(translated, list)
        assert isinstance(translated[0], AssistantMessage)
        assert translated[0].tool_calls is None
        assert translated[0].content == (
            "I will use tools now.\n"
            '[Tool Call] name=internal_search id=call-a args={"queries": ["alpha"]}\n'
            '[Tool Call] name=internal_search id=call-b args={"queries": ["beta"]}'
        )

    @pytest.mark.parametrize(
        "provider",
        [
            LlmProviderNames.OPENAI,
            LlmProviderNames.OLLAMA_CHAT,
        ],
    )
    def test_tool_call_response_requires_tool_call_id(self, provider: str) -> None:
        with pytest.raises(ValueError, match="tool_call_id is not available"):
            translate_history_to_llm_format(
                history=[
                    ChatMessageSimple(
                        message="tool result body",
                        token_count=5,
                        message_type=MessageType.TOOL_CALL_RESPONSE,
                        tool_call_id=None,
                    )
                ],
                llm_config=self._llm_config(provider),
            )


================================================
FILE: backend/tests/unit/onyx/chat/test_multi_model_streaming.py
================================================
"""Unit tests for multi-model streaming validation and DB helpers.

These are pure unit tests — no real database or LLM calls required.
The validation logic in handle_multi_model_stream fires before any external
calls, so we can trigger it with lightweight mocks.
"""

import time
from collections.abc import Generator
from typing import Any
from typing import cast
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest

from onyx.chat.models import StreamingError
from onyx.configs.constants import MessageType
from onyx.db.chat import set_preferred_response
from onyx.llm.override_models import LLMOverride
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from onyx.utils.variable_functionality import global_version


@pytest.fixture(autouse=True)
def _restore_ee_version() -> Generator[None, None, None]:
    """Reset EE global state after each test.

    Importing onyx.chat.process_message triggers set_is_ee_based_on_env_variable()
    (via the celery client import chain).  Without this fixture, the EE flag stays
    True for the rest of the session and breaks unrelated tests that mock Confluence
    or other connectors and assume EE is disabled.
    """
    original = global_version._is_ee
    yield
    global_version._is_ee = original


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _make_request(**kwargs: Any) -> SendMessageRequest:
    defaults: dict[str, Any] = {
        "message": "hello",
        "chat_session_id": uuid4(),
    }
    defaults.update(kwargs)
    return SendMessageRequest(**defaults)


def _make_override(provider: str = "openai", version: str = "gpt-4") -> LLMOverride:
    return LLMOverride(model_provider=provider, model_version=version)


def _first_from_stream(req: SendMessageRequest, overrides: list[LLMOverride]) -> Any:
    """Return the first item yielded by handle_multi_model_stream."""
    from onyx.chat.process_message import handle_multi_model_stream

    user = MagicMock()
    user.is_anonymous = False
    user.email = "test@example.com"
    db = MagicMock()

    gen = handle_multi_model_stream(req, user, db, overrides)
    return next(gen)


# ---------------------------------------------------------------------------
# handle_multi_model_stream — validation
# ---------------------------------------------------------------------------


class TestRunMultiModelStreamValidation:
    def test_single_override_yields_error(self) -> None:
        """Exactly 1 override is not multi-model — yields StreamingError."""
        req = _make_request()
        result = _first_from_stream(req, [_make_override()])
        assert isinstance(result, StreamingError)
        assert "2-3" in result.error

    def test_four_overrides_yields_error(self) -> None:
        """4 overrides exceeds maximum — yields StreamingError."""
        req = _make_request()
        result = _first_from_stream(
            req,
            [
                _make_override("openai", "gpt-4"),
                _make_override("anthropic", "claude-3"),
                _make_override("google", "gemini-pro"),
                _make_override("cohere", "command-r"),
            ],
        )
        assert isinstance(result, StreamingError)
        assert "2-3" in result.error

    def test_zero_overrides_yields_error(self) -> None:
        """Empty override list yields StreamingError."""
        req = _make_request()
        result = _first_from_stream(req, [])
        assert isinstance(result, StreamingError)
        assert "2-3" in result.error

    def test_deep_research_yields_error(self) -> None:
        """deep_research=True is incompatible with multi-model — yields StreamingError."""
        req = _make_request(deep_research=True)
        result = _first_from_stream(
            req, [_make_override(), _make_override("anthropic", "claude-3")]
        )
        assert isinstance(result, StreamingError)
        assert "not supported" in result.error

    def test_exactly_two_overrides_is_minimum(self) -> None:
        """Boundary: 1 override yields error, 2 overrides passes validation."""
        req = _make_request()
        # 1 override must yield a StreamingError
        result = _first_from_stream(req, [_make_override()])
        assert isinstance(
            result, StreamingError
        ), "1 override should yield StreamingError"
        # 2 overrides must NOT yield a validation StreamingError (may raise later due to
        # missing session, that's OK — validation itself passed)
        try:
            result2 = _first_from_stream(
                req, [_make_override(), _make_override("anthropic", "claude-3")]
            )
            if isinstance(result2, StreamingError) and "2-3" in result2.error:
                pytest.fail(
                    f"2 overrides should pass validation, got StreamingError: {result2.error}"
                )
        except Exception:
            pass  # Any non-validation error means validation passed


# ---------------------------------------------------------------------------
# set_preferred_response — validation (mocked db)
# ---------------------------------------------------------------------------


class TestSetPreferredResponseValidation:
    def test_user_message_not_found(self) -> None:
        db = MagicMock()
        db.get.return_value = None

        with pytest.raises(ValueError, match="not found"):
            set_preferred_response(
                db, user_message_id=999, preferred_assistant_message_id=1
            )

    def test_wrong_message_type(self) -> None:
        """Cannot set preferred response on a non-USER message."""
        db = MagicMock()
        user_msg = MagicMock()
        user_msg.message_type = MessageType.ASSISTANT  # wrong type

        db.get.return_value = user_msg

        with pytest.raises(ValueError, match="not a user message"):
            set_preferred_response(
                db, user_message_id=1, preferred_assistant_message_id=2
            )

    def test_assistant_message_not_found(self) -> None:
        db = MagicMock()
        user_msg = MagicMock()
        user_msg.message_type = MessageType.USER

        # First call returns user_msg, second call (for assistant) returns None
        db.get.side_effect = [user_msg, None]

        with pytest.raises(ValueError, match="not found"):
            set_preferred_response(
                db, user_message_id=1, preferred_assistant_message_id=2
            )

    def test_assistant_not_child_of_user(self) -> None:
        db = MagicMock()
        user_msg = MagicMock()
        user_msg.message_type = MessageType.USER

        assistant_msg = MagicMock()
        assistant_msg.parent_message_id = 999  # different parent

        db.get.side_effect = [user_msg, assistant_msg]

        with pytest.raises(ValueError, match="not a child"):
            set_preferred_response(
                db, user_message_id=1, preferred_assistant_message_id=2
            )

    def test_valid_call_sets_preferred_response_id(self) -> None:
        db = MagicMock()
        user_msg = MagicMock()
        user_msg.message_type = MessageType.USER

        assistant_msg = MagicMock()
        assistant_msg.parent_message_id = 1  # correct parent

        db.get.side_effect = [user_msg, assistant_msg]

        set_preferred_response(db, user_message_id=1, preferred_assistant_message_id=2)

        assert user_msg.preferred_response_id == 2
        assert user_msg.latest_child_message_id == 2


# ---------------------------------------------------------------------------
# LLMOverride — display_name field
# ---------------------------------------------------------------------------


class TestLLMOverrideDisplayName:
    def test_display_name_defaults_none(self) -> None:
        override = LLMOverride(model_provider="openai", model_version="gpt-4")
        assert override.display_name is None

    def test_display_name_set(self) -> None:
        override = LLMOverride(
            model_provider="openai",
            model_version="gpt-4",
            display_name="GPT-4 Turbo",
        )
        assert override.display_name == "GPT-4 Turbo"

    def test_display_name_serializes(self) -> None:
        override = LLMOverride(
            model_provider="anthropic",
            model_version="claude-opus-4-6",
            display_name="Claude Opus",
        )
        d = override.model_dump()
        assert d["display_name"] == "Claude Opus"


# ---------------------------------------------------------------------------
# _run_models — drain loop behaviour
# ---------------------------------------------------------------------------


def _make_setup(n_models: int = 1) -> MagicMock:
    """Minimal ChatTurnSetup mock whose fields pass Pydantic validation in _run_model."""
    setup = MagicMock()
    setup.llms = [MagicMock() for _ in range(n_models)]
    setup.model_display_names = [f"model-{i}" for i in range(n_models)]
    setup.check_is_connected = MagicMock(return_value=True)
    setup.reserved_messages = [MagicMock() for _ in range(n_models)]
    setup.reserved_token_count = 100
    # Fields consumed by SearchToolConfig / CustomToolConfig / FileReaderToolConfig
    # constructors inside _run_model — must be typed correctly for Pydantic.
    setup.new_msg_req.deep_research = False
    setup.new_msg_req.internal_search_filters = None
    setup.new_msg_req.allowed_tool_ids = None
    setup.new_msg_req.include_citations = True
    setup.search_params.project_id_filter = None
    setup.search_params.persona_id_filter = None
    setup.bypass_acl = False
    setup.slack_context = None
    setup.available_files.user_file_ids = []
    setup.available_files.chat_file_ids = []
    setup.forced_tool_id = None
    setup.simple_chat_history = []
    setup.chat_session.id = uuid4()
    setup.user_message.id = None
    setup.custom_tool_additional_headers = None
    setup.mcp_headers = None
    return setup


def _run_models_collect(setup: MagicMock) -> list:
    """Drive _run_models to completion and return all yielded items."""
    from onyx.chat.process_message import _run_models

    return list(_run_models(setup, MagicMock(), MagicMock()))


class TestRunModels:
    """Tests for the _run_models worker-thread drain loop.

    All external dependencies (LLM, DB, tools) are patched out.  Worker threads
    still run but return immediately since run_llm_loop is mocked.
    """

    def test_n1_overall_stop_from_llm_loop_passes_through(self) -> None:
        """OverallStop emitted by run_llm_loop is passed through the drain loop unchanged."""

        def emit_stop(**kwargs: Any) -> None:
            kwargs["emitter"].emit(
                Packet(
                    placement=Placement(turn_index=0),
                    obj=OverallStop(stop_reason="complete"),
                )
            )

        with (
            patch("onyx.chat.process_message.run_llm_loop", side_effect=emit_stop),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch("onyx.chat.process_message.llm_loop_completion_handle"),
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            packets = _run_models_collect(_make_setup(n_models=1))

        stops = [
            p
            for p in packets
            if isinstance(p, Packet) and isinstance(p.obj, OverallStop)
        ]
        assert len(stops) == 1
        stop_obj = stops[0].obj
        assert isinstance(stop_obj, OverallStop)
        assert stop_obj.stop_reason == "complete"

    def test_n1_emitted_packet_has_model_index_zero(self) -> None:
        """Single-model path: model_index is 0 (Emitter defaults model_idx=0)."""

        def emit_one(**kwargs: Any) -> None:
            kwargs["emitter"].emit(
                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
            )

        with (
            patch("onyx.chat.process_message.run_llm_loop", side_effect=emit_one),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch("onyx.chat.process_message.llm_loop_completion_handle"),
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            packets = _run_models_collect(_make_setup(n_models=1))

        reasoning = [
            p
            for p in packets
            if isinstance(p, Packet) and isinstance(p.obj, ReasoningStart)
        ]
        assert len(reasoning) == 1
        assert reasoning[0].placement.model_index == 0

    def test_n2_each_model_packet_tagged_with_its_index(self) -> None:
        """Multi-model path: packets from model 0 get index=0, model 1 gets index=1."""

        def emit_one(**kwargs: Any) -> None:
            # _model_idx is set by _run_model based on position in setup.llms
            emitter = kwargs["emitter"]
            emitter.emit(
                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
            )

        with (
            patch("onyx.chat.process_message.run_llm_loop", side_effect=emit_one),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch("onyx.chat.process_message.llm_loop_completion_handle"),
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            packets = _run_models_collect(_make_setup(n_models=2))

        reasoning = [
            p
            for p in packets
            if isinstance(p, Packet) and isinstance(p.obj, ReasoningStart)
        ]
        assert len(reasoning) == 2
        indices = {p.placement.model_index for p in reasoning}
        assert indices == {0, 1}

    def test_model_error_yields_streaming_error(self) -> None:
        """An exception inside a worker thread is surfaced as a StreamingError."""

        def always_fail(**_kwargs: Any) -> None:
            raise RuntimeError("intentional test failure")

        with (
            patch("onyx.chat.process_message.run_llm_loop", side_effect=always_fail),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch("onyx.chat.process_message.llm_loop_completion_handle"),
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            packets = _run_models_collect(_make_setup(n_models=1))

        errors = [p for p in packets if isinstance(p, StreamingError)]
        assert len(errors) == 1
        assert errors[0].error_code == "MODEL_ERROR"
        assert "intentional test failure" in errors[0].error

    def test_one_model_error_does_not_stop_other_models(self) -> None:
        """A failing model yields StreamingError; the surviving model's packets still arrive."""
        setup = _make_setup(n_models=2)

        def fail_model_0_succeed_model_1(**kwargs: Any) -> None:
            if kwargs["llm"] is setup.llms[0]:
                raise RuntimeError("model 0 failed")
            kwargs["emitter"].emit(
                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
            )

        with (
            patch(
                "onyx.chat.process_message.run_llm_loop",
                side_effect=fail_model_0_succeed_model_1,
            ),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch("onyx.chat.process_message.llm_loop_completion_handle"),
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            packets = _run_models_collect(setup)

        errors = [p for p in packets if isinstance(p, StreamingError)]
        assert len(errors) == 1

        reasoning = [
            p
            for p in packets
            if isinstance(p, Packet) and isinstance(p.obj, ReasoningStart)
        ]
        assert len(reasoning) == 1
        assert reasoning[0].placement.model_index == 1

    def test_cancellation_yields_user_cancelled_stop(self) -> None:
        """If check_is_connected returns False, drain loop emits user_cancelled."""

        def slow_llm(**_kwargs: Any) -> None:
            time.sleep(0.3)  # Outlasts the 50 ms queue-poll interval

        setup = _make_setup(n_models=1)
        setup.check_is_connected = MagicMock(return_value=False)

        with (
            patch("onyx.chat.process_message.run_llm_loop", side_effect=slow_llm),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch("onyx.chat.process_message.llm_loop_completion_handle"),
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            packets = _run_models_collect(setup)

        stops = [
            p
            for p in packets
            if isinstance(p, Packet) and isinstance(p.obj, OverallStop)
        ]
        assert any(
            isinstance(s.obj, OverallStop) and s.obj.stop_reason == "user_cancelled"
            for s in stops
        )

    def test_stop_button_calls_completion_for_all_models(self) -> None:
        """llm_loop_completion_handle must be called for all models when the stop button fires.

        Regression test for the disconnect-cleanup bug: the old
        run_chat_loop_with_state_containers always called completion_callback in
        its finally block (even on disconnect) so the DB message was updated from
        the TERMINATED placeholder to a partial answer.  The new _run_models must
        replicate this — otherwise the integration test
        test_send_message_disconnect_and_cleanup fails because the message stays
        as "Response was terminated prior to completion, try regenerating."
        """

        def slow_llm(**_kwargs: Any) -> None:
            time.sleep(0.3)

        setup = _make_setup(n_models=2)
        setup.check_is_connected = MagicMock(return_value=False)

        with (
            patch("onyx.chat.process_message.run_llm_loop", side_effect=slow_llm),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch(
                "onyx.chat.process_message.llm_loop_completion_handle"
            ) as mock_handle,
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            _run_models_collect(setup)

        # Must be called once per model, not zero times
        assert mock_handle.call_count == 2

    def test_completion_handle_called_for_each_successful_model(self) -> None:
        """llm_loop_completion_handle must be called once per model that succeeded."""
        setup = _make_setup(n_models=2)

        with (
            patch("onyx.chat.process_message.run_llm_loop"),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch(
                "onyx.chat.process_message.llm_loop_completion_handle"
            ) as mock_handle,
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            _run_models_collect(setup)

        assert mock_handle.call_count == 2

    def test_completion_handle_not_called_for_failed_model(self) -> None:
        """llm_loop_completion_handle must be skipped for a model that raised."""

        def always_fail(**_kwargs: Any) -> None:
            raise RuntimeError("fail")

        with (
            patch("onyx.chat.process_message.run_llm_loop", side_effect=always_fail),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch(
                "onyx.chat.process_message.llm_loop_completion_handle"
            ) as mock_handle,
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            _run_models_collect(_make_setup(n_models=1))

        mock_handle.assert_not_called()

    def test_http_disconnect_completion_via_generator_exit(self) -> None:
        """GeneratorExit from HTTP disconnect triggers main-thread completion.

        When the HTTP client closes the connection, Starlette throws GeneratorExit
        into the stream generator. The finally block sets drain_done (signalling
        emitters to stop blocking), waits for workers via executor.shutdown(wait=True),
        then calls llm_loop_completion_handle for each successful model from the main
        thread.

        This is the primary regression for test_send_message_disconnect_and_cleanup:
        the integration test disconnects mid-stream and expects the DB message to be
        updated from the TERMINATED placeholder to the real response.
        """
        import threading

        completion_called = threading.Event()

        def emit_then_block_until_drain(**kwargs: Any) -> None:
            """Emit one packet (to give the drain loop a yield point), then block
            until drain_done is set — simulating a mid-stream LLM call that exits
            promptly once the emitter signals shutdown.
            """
            emitter = kwargs["emitter"]
            emitter.emit(
                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
            )
            # Block until drain_done is set by gen.close(). The Emitter's _drain_done
            # is the same Event that _run_models sets, so this unblocks promptly.
            emitter._drain_done.wait(timeout=5)

        setup = _make_setup(n_models=1)
        # is_connected() always True — HTTP disconnect does NOT set the Redis stop fence.
        setup.check_is_connected = MagicMock(return_value=True)

        with (
            patch(
                "onyx.chat.process_message.run_llm_loop",
                side_effect=emit_then_block_until_drain,
            ),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch(
                "onyx.chat.process_message.llm_loop_completion_handle",
                side_effect=lambda *_, **__: completion_called.set(),
            ) as mock_handle,
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            from onyx.chat.process_message import _run_models

            gen = cast(Generator, _run_models(setup, MagicMock(), MagicMock()))
            first = next(gen)
            assert isinstance(first, Packet)
            # Simulate Starlette closing the stream on HTTP client disconnect.
            # gen.close() → GeneratorExit → finally → drain_done.set() →
            # executor.shutdown(wait=True) → main thread completes models.
            gen.close()

            assert (
                completion_called.is_set()
            ), "main thread must call completion for the successful model"
            assert mock_handle.call_count == 1

    def test_b1_race_disconnect_handler_completes_already_finished_model(self) -> None:
        """B1 regression: model finishes BEFORE GeneratorExit fires.

        The worker exits _run_model before drain_done is set. When gen.close()
        fires afterward, the finally block sets drain_done, waits for workers
        (already done), then the main thread calls llm_loop_completion_handle.

        Contrast with test_http_disconnect_completion_via_generator_exit, which
        tests the opposite ordering (worker finishes AFTER disconnect).
        """
        import threading
        import time

        completion_called = threading.Event()

        def emit_and_return_immediately(**kwargs: Any) -> None:
            # Emit one packet so the drain loop has something to yield, then return
            # immediately — no blocking.  The worker will be done in microseconds.
            kwargs["emitter"].emit(
                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
            )

        setup = _make_setup(n_models=1)
        setup.check_is_connected = MagicMock(return_value=True)

        with (
            patch(
                "onyx.chat.process_message.run_llm_loop",
                side_effect=emit_and_return_immediately,
            ),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch(
                "onyx.chat.process_message.llm_loop_completion_handle",
                side_effect=lambda *_, **__: completion_called.set(),
            ) as mock_handle,
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            from onyx.chat.process_message import _run_models

            gen = cast(Generator, _run_models(setup, MagicMock(), MagicMock()))
            first = next(gen)
            assert isinstance(first, Packet)

            # Give the worker thread time to finish completely (emit + return +
            # finally + self-completion check).  It does almost no work, so 100 ms
            # is far more than enough while still keeping the test fast.
            time.sleep(0.1)

            # Now close — worker is already done, so else-branch handles completion.
            gen.close()

            assert completion_called.wait(
                timeout=5
            ), "disconnect handler must call completion for a model that already finished"
            assert mock_handle.call_count == 1, "completion must be called exactly once"

    def test_stop_button_does_not_call_completion_for_errored_model(self) -> None:
        """B2 regression: stop-button must NOT call completion for an errored model.

        When model 0 raises an exception, its reserved ChatMessage must not be
        saved with 'stopped by user' — that message is wrong for a model that
        errored.  llm_loop_completion_handle must only be called for non-errored
        models when the stop button fires.
        """

        def fail_model_0(**kwargs: Any) -> None:
            if kwargs["llm"] is setup.llms[0]:
                raise RuntimeError("model 0 errored")
            # Model 1: run forever (stop button fires before it finishes)
            time.sleep(10)

        setup = _make_setup(n_models=2)
        # Return False immediately so the stop-button path fires while model 1
        # is still sleeping (model 0 has already errored by then).
        setup.check_is_connected = lambda: False

        with (
            patch("onyx.chat.process_message.run_llm_loop", side_effect=fail_model_0),
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch(
                "onyx.chat.process_message.llm_loop_completion_handle"
            ) as mock_handle,
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            _run_models_collect(setup)

        # Completion must NOT be called for model 0 (it errored).
        # It MAY be called for model 1 (still in-flight when stop fired).
        for call in mock_handle.call_args_list:
            assert (
                call.kwargs.get("llm") is not setup.llms[0]
            ), "llm_loop_completion_handle must not be called for the errored model"

    def test_external_state_container_used_for_model_zero(self) -> None:
        """When provided, external_state_container is used as state_containers[0]."""
        from onyx.chat.chat_state import ChatStateContainer
        from onyx.chat.process_message import _run_models

        external = ChatStateContainer()
        setup = _make_setup(n_models=1)

        with (
            patch("onyx.chat.process_message.run_llm_loop") as mock_llm,
            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
            patch("onyx.chat.process_message.construct_tools", return_value={}),
            patch("onyx.chat.process_message.get_session_with_current_tenant"),
            patch("onyx.chat.process_message.llm_loop_completion_handle"),
            patch(
                "onyx.chat.process_message.get_llm_token_counter",
                return_value=lambda _: 0,
            ),
        ):
            list(
                _run_models(
                    setup, MagicMock(), MagicMock(), external_state_container=external
                )
            )

        # The state_container kwarg passed to run_llm_loop must be the external one
        call_kwargs = mock_llm.call_args.kwargs
        assert call_kwargs["state_container"] is external


================================================
FILE: backend/tests/unit/onyx/chat/test_multi_model_types.py
================================================
"""Unit tests for multi-model answer generation types.

Tests cover:
- Placement.model_index serialization
- MultiModelMessageResponseIDInfo round-trip
- SendMessageRequest.llm_overrides backward compatibility
- ChatMessageDetail new fields
"""

from datetime import datetime
from datetime import timezone
from uuid import uuid4

from onyx.llm.override_models import LLMOverride
from onyx.server.query_and_chat.models import ChatMessageDetail
from onyx.server.query_and_chat.models import ModelResponseSlot
from onyx.server.query_and_chat.models import MultiModelMessageResponseIDInfo
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.placement import Placement


class TestPlacementModelIndex:
    def test_default_none(self) -> None:
        p = Placement(turn_index=0)
        assert p.model_index is None

    def test_set_value(self) -> None:
        p = Placement(turn_index=0, model_index=2)
        assert p.model_index == 2

    def test_serializes(self) -> None:
        p = Placement(turn_index=0, tab_index=1, model_index=1)
        d = p.model_dump()
        assert d["model_index"] == 1

    def test_none_excluded_when_default(self) -> None:
        p = Placement(turn_index=0)
        d = p.model_dump()
        assert d["model_index"] is None


class TestMultiModelMessageResponseIDInfo:
    def test_round_trip(self) -> None:
        info = MultiModelMessageResponseIDInfo(
            user_message_id=42,
            responses=[
                ModelResponseSlot(message_id=43, model_name="gpt-4"),
                ModelResponseSlot(message_id=44, model_name="claude-opus"),
                ModelResponseSlot(message_id=45, model_name="gemini-pro"),
            ],
        )
        d = info.model_dump()
        restored = MultiModelMessageResponseIDInfo(**d)
        assert restored.user_message_id == 42
        assert [s.message_id for s in restored.responses] == [43, 44, 45]
        assert [s.model_name for s in restored.responses] == [
            "gpt-4",
            "claude-opus",
            "gemini-pro",
        ]

    def test_null_user_message_id(self) -> None:
        info = MultiModelMessageResponseIDInfo(
            user_message_id=None,
            responses=[
                ModelResponseSlot(message_id=1, model_name="a"),
                ModelResponseSlot(message_id=2, model_name="b"),
            ],
        )
        assert info.user_message_id is None


class TestSendMessageRequestOverrides:
    def test_llm_overrides_default_none(self) -> None:
        req = SendMessageRequest(
            message="hello",
            chat_session_id=uuid4(),
        )
        assert req.llm_overrides is None

    def test_llm_overrides_accepts_list(self) -> None:
        overrides = [
            LLMOverride(model_provider="openai", model_version="gpt-4"),
            LLMOverride(model_provider="anthropic", model_version="claude-opus"),
        ]
        req = SendMessageRequest(
            message="hello",
            chat_session_id=uuid4(),
            llm_overrides=overrides,
        )
        assert req.llm_overrides is not None
        assert len(req.llm_overrides) == 2

    def test_backward_compat_single_override(self) -> None:
        req = SendMessageRequest(
            message="hello",
            chat_session_id=uuid4(),
            llm_override=LLMOverride(model_provider="openai", model_version="gpt-4"),
        )
        assert req.llm_override is not None
        assert req.llm_overrides is None


class TestChatMessageDetailMultiModel:
    def test_defaults_none(self) -> None:
        from onyx.configs.constants import MessageType

        detail = ChatMessageDetail(
            message_id=1,
            message="hello",
            message_type=MessageType.ASSISTANT,
            time_sent=datetime(2026, 3, 22, tzinfo=timezone.utc),
            files=[],
        )
        assert detail.preferred_response_id is None
        assert detail.model_display_name is None

    def test_set_values(self) -> None:
        from onyx.configs.constants import MessageType

        detail = ChatMessageDetail(
            message_id=1,
            message="hello",
            message_type=MessageType.USER,
            time_sent=datetime(2026, 3, 22, tzinfo=timezone.utc),
            files=[],
            preferred_response_id=42,
            model_display_name="GPT-4",
        )
        assert detail.preferred_response_id == 42
        assert detail.model_display_name == "GPT-4"

    def test_serializes(self) -> None:
        from onyx.configs.constants import MessageType

        detail = ChatMessageDetail(
            message_id=1,
            message="hello",
            message_type=MessageType.ASSISTANT,
            time_sent=datetime(2026, 3, 22, tzinfo=timezone.utc),
            files=[],
            model_display_name="Claude Opus",
        )
        d = detail.model_dump()
        assert d["model_display_name"] == "Claude Opus"
        assert d["preferred_response_id"] is None


================================================
FILE: backend/tests/unit/onyx/chat/test_process_message.py
================================================
import pytest

from onyx.chat.process_message import _resolve_query_processing_hook_result
from onyx.chat.process_message import remove_answer_citations
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
from onyx.hooks.points.query_processing import QueryProcessingResponse


def test_remove_answer_citations_strips_http_markdown_citation() -> None:
    answer = "The answer is Paris [[1]](https://example.com/doc)."

    assert remove_answer_citations(answer) == "The answer is Paris."


def test_remove_answer_citations_strips_empty_markdown_citation() -> None:
    answer = "The answer is Paris [[1]]()."

    assert remove_answer_citations(answer) == "The answer is Paris."


def test_remove_answer_citations_strips_citation_with_parentheses_in_url() -> None:
    answer = (
        "The answer is Paris "
        "[[1]](https://en.wikipedia.org/wiki/Function_(mathematics))."
    )

    assert remove_answer_citations(answer) == "The answer is Paris."


def test_remove_answer_citations_preserves_non_citation_markdown_links() -> None:
    answer = (
        "See [reference](https://example.com/Function_(mathematics)) "
        "for context [[1]](https://en.wikipedia.org/wiki/Function_(mathematics))."
    )

    assert (
        remove_answer_citations(answer)
        == "See [reference](https://example.com/Function_(mathematics)) for context."
    )


# ---------------------------------------------------------------------------
# Query Processing hook response handling (_resolve_query_processing_hook_result)
# ---------------------------------------------------------------------------


def test_hook_skipped_leaves_message_text_unchanged() -> None:
    result = _resolve_query_processing_hook_result(HookSkipped(), "original query")
    assert result == "original query"


def test_hook_soft_failed_leaves_message_text_unchanged() -> None:
    result = _resolve_query_processing_hook_result(HookSoftFailed(), "original query")
    assert result == "original query"


def test_null_query_raises_query_rejected() -> None:
    with pytest.raises(OnyxError) as exc_info:
        _resolve_query_processing_hook_result(
            QueryProcessingResponse(query=None), "original query"
        )
    assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED


def test_empty_string_query_raises_query_rejected() -> None:
    """Empty string is falsy — must be treated as rejection, same as None."""
    with pytest.raises(OnyxError) as exc_info:
        _resolve_query_processing_hook_result(
            QueryProcessingResponse(query=""), "original query"
        )
    assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED


def test_whitespace_only_query_raises_query_rejected() -> None:
    """Whitespace-only string is truthy but meaningless — must be treated as rejection."""
    with pytest.raises(OnyxError) as exc_info:
        _resolve_query_processing_hook_result(
            QueryProcessingResponse(query="   "), "original query"
        )
    assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED


def test_absent_query_field_raises_query_rejected() -> None:
    """query defaults to None when not provided."""
    with pytest.raises(OnyxError) as exc_info:
        _resolve_query_processing_hook_result(
            QueryProcessingResponse(), "original query"
        )
    assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED


def test_rejection_message_surfaced_in_error_when_provided() -> None:
    with pytest.raises(OnyxError) as exc_info:
        _resolve_query_processing_hook_result(
            QueryProcessingResponse(
                query=None, rejection_message="Queries about X are not allowed."
            ),
            "original query",
        )
    assert "Queries about X are not allowed." in str(exc_info.value)


def test_fallback_rejection_message_when_none() -> None:
    """No rejection_message → generic fallback used in OnyxError detail."""
    with pytest.raises(OnyxError) as exc_info:
        _resolve_query_processing_hook_result(
            QueryProcessingResponse(query=None, rejection_message=None),
            "original query",
        )
    assert "No rejection reason was provided." in str(exc_info.value)


def test_nonempty_query_rewrites_message_text() -> None:
    result = _resolve_query_processing_hook_result(
        QueryProcessingResponse(query="rewritten query"), "original query"
    )
    assert result == "rewritten query"


================================================
FILE: backend/tests/unit/onyx/chat/test_process_message_mock_llm.py
================================================
from unittest.mock import Mock

import pytest

from onyx.chat import process_message
from onyx.chat.models import AnswerStream
from onyx.chat.models import StreamingError
from onyx.configs import app_configs
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import SendMessageRequest


def test_mock_llm_response_requires_integration_mode() -> None:
    assert (
        app_configs.INTEGRATION_TESTS_MODE is False
    ), "Unit tests expect INTEGRATION_TESTS_MODE=false."
    assert (
        process_message.INTEGRATION_TESTS_MODE is False
    ), "process_message should reflect INTEGRATION_TESTS_MODE=false in unit tests."

    request = SendMessageRequest(
        message="test",
        mock_llm_response='{"name":"internal_search","arguments":{"queries":["alpha"]}}',
    )
    mock_user = Mock()
    mock_user.id = "user-id"
    mock_user.is_anonymous = False
    mock_user.email = "user@example.com"

    with pytest.raises(
        ValueError,
        match="mock_llm_response can only be used when INTEGRATION_TESTS_MODE=true",
    ):
        next(
            process_message.handle_stream_message_objects(
                new_msg_req=request,
                user=mock_user,
                db_session=Mock(),
            )
        )


def test_gather_stream_returns_empty_answer_when_streaming_error_only() -> None:
    packets: AnswerStream = iter(
        [
            MessageResponseIDInfo(
                user_message_id=None,
                reserved_assistant_message_id=42,
            ),
            StreamingError(
                error="OpenAI quota exceeded",
                error_code="BUDGET_EXCEEDED",
                is_retryable=False,
            ),
        ]
    )

    result = process_message.gather_stream(packets)

    assert result.answer == ""
    assert result.answer_citationless == ""
    assert result.error_msg == "OpenAI quota exceeded"
    assert result.message_id == 42


================================================
FILE: backend/tests/unit/onyx/chat/test_save_chat.py
================================================
"""Tests for save_chat.py.

Covers _extract_referenced_file_descriptors and sanitization in save_chat_turn.
"""

from unittest.mock import MagicMock

from pytest import MonkeyPatch

from onyx.chat import save_chat
from onyx.chat.save_chat import _extract_referenced_file_descriptors
from onyx.file_store.models import ChatFileType
from onyx.tools.models import PythonExecutionFile
from onyx.tools.models import ToolCallInfo


def _make_tool_call_info(
    generated_files: list[PythonExecutionFile] | None = None,
    tool_name: str = "python",
) -> ToolCallInfo:
    return ToolCallInfo(
        parent_tool_call_id=None,
        turn_index=0,
        tab_index=0,
        tool_name=tool_name,
        tool_call_id="tc_1",
        tool_id=1,
        reasoning_tokens=None,
        tool_call_arguments={"code": "print('hi')"},
        tool_call_response="{}",
        generated_files=generated_files,
    )


# ---- _extract_referenced_file_descriptors tests ----


def test_returns_empty_when_no_generated_files() -> None:
    tool_call = _make_tool_call_info(generated_files=None)
    result = _extract_referenced_file_descriptors([tool_call], "some message")
    assert result == []


def test_returns_empty_when_file_not_referenced() -> None:
    files = [
        PythonExecutionFile(
            filename="chart.png",
            file_link="http://localhost/api/chat/file/abc-123",
        )
    ]
    tool_call = _make_tool_call_info(generated_files=files)
    result = _extract_referenced_file_descriptors([tool_call], "Here is your answer.")
    assert result == []


def test_extracts_referenced_file() -> None:
    file_id = "abc-123-def"
    files = [
        PythonExecutionFile(
            filename="chart.png",
            file_link=f"http://localhost/api/chat/file/{file_id}",
        )
    ]
    tool_call = _make_tool_call_info(generated_files=files)
    message = (
        f"Here is the chart: [chart.png](http://localhost/api/chat/file/{file_id})"
    )

    result = _extract_referenced_file_descriptors([tool_call], message)

    assert len(result) == 1
    assert result[0]["id"] == file_id
    assert result[0]["type"] == ChatFileType.IMAGE
    assert result[0]["name"] == "chart.png"


def test_filters_unreferenced_files() -> None:
    referenced_id = "ref-111"
    unreferenced_id = "unref-222"
    files = [
        PythonExecutionFile(
            filename="chart.png",
            file_link=f"http://localhost/api/chat/file/{referenced_id}",
        ),
        PythonExecutionFile(
            filename="data.csv",
            file_link=f"http://localhost/api/chat/file/{unreferenced_id}",
        ),
    ]
    tool_call = _make_tool_call_info(generated_files=files)
    message = f"Here is the chart: [chart.png](http://localhost/api/chat/file/{referenced_id})"

    result = _extract_referenced_file_descriptors([tool_call], message)

    assert len(result) == 1
    assert result[0]["id"] == referenced_id
    assert result[0]["name"] == "chart.png"


def test_extracts_from_multiple_tool_calls() -> None:
    id_1 = "file-aaa"
    id_2 = "file-bbb"
    tc1 = _make_tool_call_info(
        generated_files=[
            PythonExecutionFile(
                filename="plot.png",
                file_link=f"http://localhost/api/chat/file/{id_1}",
            )
        ]
    )
    tc2 = _make_tool_call_info(
        generated_files=[
            PythonExecutionFile(
                filename="report.csv",
                file_link=f"http://localhost/api/chat/file/{id_2}",
            )
        ]
    )
    message = f"[plot.png](http://localhost/api/chat/file/{id_1}) and [report.csv](http://localhost/api/chat/file/{id_2})"

    result = _extract_referenced_file_descriptors([tc1, tc2], message)

    assert len(result) == 2
    ids = {d["id"] for d in result}
    assert ids == {id_1, id_2}


def test_csv_file_type() -> None:
    file_id = "csv-123"
    files = [
        PythonExecutionFile(
            filename="data.csv",
            file_link=f"http://localhost/api/chat/file/{file_id}",
        )
    ]
    tool_call = _make_tool_call_info(generated_files=files)
    message = f"[data.csv](http://localhost/api/chat/file/{file_id})"

    result = _extract_referenced_file_descriptors([tool_call], message)

    assert len(result) == 1
    assert result[0]["type"] == ChatFileType.TABULAR


def test_unknown_extension_defaults_to_plain_text() -> None:
    file_id = "bin-456"
    files = [
        PythonExecutionFile(
            filename="output.xyz",
            file_link=f"http://localhost/api/chat/file/{file_id}",
        )
    ]
    tool_call = _make_tool_call_info(generated_files=files)
    message = f"[output.xyz](http://localhost/api/chat/file/{file_id})"

    result = _extract_referenced_file_descriptors([tool_call], message)

    assert len(result) == 1
    assert result[0]["type"] == ChatFileType.PLAIN_TEXT


def test_skips_tool_calls_without_generated_files() -> None:
    file_id = "img-789"
    tc_no_files = _make_tool_call_info(generated_files=None)
    tc_empty = _make_tool_call_info(generated_files=[])
    tc_with_files = _make_tool_call_info(
        generated_files=[
            PythonExecutionFile(
                filename="result.png",
                file_link=f"http://localhost/api/chat/file/{file_id}",
            )
        ]
    )
    message = f"[result.png](http://localhost/api/chat/file/{file_id})"

    result = _extract_referenced_file_descriptors(
        [tc_no_files, tc_empty, tc_with_files], message
    )

    assert len(result) == 1
    assert result[0]["id"] == file_id


# ---- save_chat_turn sanitization test ----


def test_save_chat_turn_sanitizes_message_and_reasoning(
    monkeypatch: MonkeyPatch,
) -> None:
    mock_tokenizer = MagicMock()
    mock_tokenizer.encode.return_value = [1, 2, 3]
    monkeypatch.setattr(save_chat, "get_tokenizer", lambda *_a, **_kw: mock_tokenizer)

    mock_msg = MagicMock()
    mock_msg.id = 1
    mock_msg.chat_session_id = "test"
    mock_msg.files = None

    mock_session = MagicMock()

    save_chat.save_chat_turn(
        message_text="hello\x00world\ud800",
        reasoning_tokens="think\x00ing\udfff",
        tool_calls=[],
        citation_to_doc={},
        all_search_docs={},
        db_session=mock_session,
        assistant_message=mock_msg,
    )

    assert mock_msg.message == "helloworld"
    assert mock_msg.reasoning_tokens == "thinking"


================================================
FILE: backend/tests/unit/onyx/chat/test_stop_signal_checker.py
================================================
"""Unit tests for stop_signal_checker and chat_processing_checker.

These modules are safety-critical — they control whether a chat stream
continues or stops.  The tests use a simple in-memory CacheBackend stub
so no external services are needed.
"""

from uuid import uuid4

from onyx.cache.interface import CacheBackend
from onyx.cache.interface import CacheLock
from onyx.chat.chat_processing_checker import is_chat_session_processing
from onyx.chat.chat_processing_checker import set_processing_status
from onyx.chat.stop_signal_checker import FENCE_TTL
from onyx.chat.stop_signal_checker import is_connected
from onyx.chat.stop_signal_checker import reset_cancel_status
from onyx.chat.stop_signal_checker import set_fence


class _MemoryCacheBackend(CacheBackend):
    """Minimal in-memory CacheBackend for unit tests."""

    def __init__(self) -> None:
        self._store: dict[str, bytes] = {}

    def get(self, key: str) -> bytes | None:
        return self._store.get(key)

    def set(
        self,
        key: str,
        value: str | bytes | int | float,
        ex: int | None = None,  # noqa: ARG002
    ) -> None:
        if isinstance(value, bytes):
            self._store[key] = value
        else:
            self._store[key] = str(value).encode()

    def delete(self, key: str) -> None:
        self._store.pop(key, None)

    def exists(self, key: str) -> bool:
        return key in self._store

    def expire(self, key: str, seconds: int) -> None:
        pass

    def ttl(self, key: str) -> int:
        return -2 if key not in self._store else -1

    def lock(self, name: str, timeout: float | None = None) -> CacheLock:
        raise NotImplementedError

    def rpush(self, key: str, value: str | bytes) -> None:
        raise NotImplementedError

    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:
        raise NotImplementedError


# ── stop_signal_checker ──────────────────────────────────────────────


class TestSetFence:
    def test_set_fence_true_creates_key(self) -> None:
        cache = _MemoryCacheBackend()
        sid = uuid4()
        set_fence(sid, cache, True)
        assert not is_connected(sid, cache)

    def test_set_fence_false_removes_key(self) -> None:
        cache = _MemoryCacheBackend()
        sid = uuid4()
        set_fence(sid, cache, True)
        set_fence(sid, cache, False)
        assert is_connected(sid, cache)

    def test_set_fence_false_noop_when_absent(self) -> None:
        cache = _MemoryCacheBackend()
        sid = uuid4()
        set_fence(sid, cache, False)
        assert is_connected(sid, cache)

    def test_set_fence_uses_ttl(self) -> None:
        """Verify set_fence passes ex=FENCE_TTL to cache.set."""
        calls: list[dict[str, object]] = []
        cache = _MemoryCacheBackend()
        original_set = cache.set

        def tracking_set(
            key: str,
            value: str | bytes | int | float,
            ex: int | None = None,
        ) -> None:
            calls.append({"key": key, "ex": ex})
            original_set(key, value, ex=ex)

        cache.set = tracking_set  # type: ignore[method-assign]

        set_fence(uuid4(), cache, True)
        assert len(calls) == 1
        assert calls[0]["ex"] == FENCE_TTL


class TestIsConnected:
    def test_connected_when_no_fence(self) -> None:
        cache = _MemoryCacheBackend()
        assert is_connected(uuid4(), cache)

    def test_disconnected_when_fence_set(self) -> None:
        cache = _MemoryCacheBackend()
        sid = uuid4()
        set_fence(sid, cache, True)
        assert not is_connected(sid, cache)

    def test_sessions_are_isolated(self) -> None:
        cache = _MemoryCacheBackend()
        sid1, sid2 = uuid4(), uuid4()
        set_fence(sid1, cache, True)
        assert not is_connected(sid1, cache)
        assert is_connected(sid2, cache)


class TestResetCancelStatus:
    def test_clears_fence(self) -> None:
        cache = _MemoryCacheBackend()
        sid = uuid4()
        set_fence(sid, cache, True)
        reset_cancel_status(sid, cache)
        assert is_connected(sid, cache)

    def test_noop_when_no_fence(self) -> None:
        cache = _MemoryCacheBackend()
        reset_cancel_status(uuid4(), cache)


# ── chat_processing_checker ──────────────────────────────────────────


class TestSetProcessingStatus:
    def test_set_true_marks_processing(self) -> None:
        cache = _MemoryCacheBackend()
        sid = uuid4()
        set_processing_status(sid, cache, True)
        assert is_chat_session_processing(sid, cache)

    def test_set_false_clears_processing(self) -> None:
        cache = _MemoryCacheBackend()
        sid = uuid4()
        set_processing_status(sid, cache, True)
        set_processing_status(sid, cache, False)
        assert not is_chat_session_processing(sid, cache)


class TestIsChatSessionProcessing:
    def test_not_processing_by_default(self) -> None:
        cache = _MemoryCacheBackend()
        assert not is_chat_session_processing(uuid4(), cache)

    def test_sessions_are_isolated(self) -> None:
        cache = _MemoryCacheBackend()
        sid1, sid2 = uuid4(), uuid4()
        set_processing_status(sid1, cache, True)
        assert is_chat_session_processing(sid1, cache)
        assert not is_chat_session_processing(sid2, cache)


================================================
FILE: backend/tests/unit/onyx/connectors/airtable/test_airtable_index_all.py
================================================
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.connectors.airtable.airtable_connector import AirtableConnector
from onyx.connectors.airtable.airtable_connector import parse_airtable_url
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.models import Document


def _make_field_schema(field_id: str, name: str, field_type: str) -> MagicMock:
    field = MagicMock()
    field.id = field_id
    field.name = name
    field.type = field_type
    return field


def _make_table_schema(
    table_id: str,
    table_name: str,
    primary_field_id: str,
    fields: list[MagicMock],
) -> MagicMock:
    schema = MagicMock()
    schema.id = table_id
    schema.name = table_name
    schema.primary_field_id = primary_field_id
    schema.fields = fields
    schema.views = []
    return schema


def _make_record(record_id: str, fields: dict[str, Any]) -> dict[str, Any]:
    return {"id": record_id, "fields": fields}


def _make_base_info(base_id: str, name: str) -> MagicMock:
    info = MagicMock()
    info.id = base_id
    info.name = name
    return info


def _make_table_obj(table_id: str, name: str) -> MagicMock:
    obj = MagicMock()
    obj.id = table_id
    obj.name = name
    return obj


def _setup_mock_api(
    bases: list[dict[str, Any]],
) -> MagicMock:
    """Set up a mock AirtableApi with bases, tables, records, and schemas.

    Args:
        bases: List of dicts with keys: id, name, tables.
               Each table is a dict with: id, name, primary_field_id, fields, records.
               Each field is a dict with: id, name, type.
               Each record is a dict with: id, fields.
    """
    mock_api = MagicMock()

    base_infos = [_make_base_info(b["id"], b["name"]) for b in bases]
    mock_api.bases.return_value = base_infos

    def base_side_effect(base_id: str) -> MagicMock:
        mock_base = MagicMock()
        base_data = next((b for b in bases if b["id"] == base_id), None)
        if not base_data:
            raise ValueError(f"Unknown base: {base_id}")

        table_objs = [_make_table_obj(t["id"], t["name"]) for t in base_data["tables"]]
        mock_base.tables.return_value = table_objs
        return mock_base

    mock_api.base.side_effect = base_side_effect

    def table_side_effect(base_id: str, table_name_or_id: str) -> MagicMock:
        base_data = next((b for b in bases if b["id"] == base_id), None)
        if not base_data:
            raise ValueError(f"Unknown base: {base_id}")

        table_data = next(
            (
                t
                for t in base_data["tables"]
                if t["id"] == table_name_or_id or t["name"] == table_name_or_id
            ),
            None,
        )
        if not table_data:
            raise ValueError(f"Unknown table: {table_name_or_id}")

        mock_table = MagicMock()
        mock_table.name = table_data["name"]
        mock_table.all.return_value = [
            _make_record(r["id"], r["fields"]) for r in table_data["records"]
        ]

        field_schemas = [
            _make_field_schema(f["id"], f["name"], f["type"])
            for f in table_data["fields"]
        ]
        schema = _make_table_schema(
            table_data["id"],
            table_data["name"],
            table_data["primary_field_id"],
            field_schemas,
        )
        mock_table.schema.return_value = schema
        return mock_table

    mock_api.table.side_effect = table_side_effect
    return mock_api


SAMPLE_BASES = [
    {
        "id": "appBASE1",
        "name": "Base One",
        "tables": [
            {
                "id": "tblTABLE1",
                "name": "Table A",
                "primary_field_id": "fld1",
                "fields": [
                    {"id": "fld1", "name": "Name", "type": "singleLineText"},
                    {"id": "fld2", "name": "Notes", "type": "multilineText"},
                ],
                "records": [
                    {"id": "recA1", "fields": {"Name": "Alice", "Notes": "Note A"}},
                    {"id": "recA2", "fields": {"Name": "Bob", "Notes": "Note B"}},
                ],
            },
            {
                "id": "tblTABLE2",
                "name": "Table B",
                "primary_field_id": "fld3",
                "fields": [
                    {"id": "fld3", "name": "Title", "type": "singleLineText"},
                    {"id": "fld4", "name": "Status", "type": "singleSelect"},
                ],
                "records": [
                    {"id": "recB1", "fields": {"Title": "Task 1", "Status": "Done"}},
                ],
            },
        ],
    },
    {
        "id": "appBASE2",
        "name": "Base Two",
        "tables": [
            {
                "id": "tblTABLE3",
                "name": "Table C",
                "primary_field_id": "fld5",
                "fields": [
                    {"id": "fld5", "name": "Item", "type": "singleLineText"},
                ],
                "records": [
                    {"id": "recC1", "fields": {"Item": "Widget"}},
                ],
            },
        ],
    },
]


def _collect_docs(connector: AirtableConnector) -> list[Document]:
    docs: list[Document] = []
    for batch in connector.load_from_state():
        for item in batch:
            if isinstance(item, Document):
                docs.append(item)
    return docs


class TestIndexAll:
    @patch("time.sleep")
    def test_index_all_discovers_all_bases_and_tables(
        self,
        mock_sleep: MagicMock,  # noqa: ARG002
    ) -> None:
        connector = AirtableConnector()
        mock_api = _setup_mock_api(SAMPLE_BASES)
        connector._airtable_client = mock_api

        docs = _collect_docs(connector)

        # 2 records from Table A + 1 from Table B + 1 from Table C = 4
        assert len(docs) == 4
        doc_ids = {d.id for d in docs}
        assert doc_ids == {
            "airtable__recA1",
            "airtable__recA2",
            "airtable__recB1",
            "airtable__recC1",
        }

    @patch("time.sleep")
    def test_index_all_semantic_id_includes_base_name(
        self,
        mock_sleep: MagicMock,  # noqa: ARG002
    ) -> None:
        connector = AirtableConnector()
        mock_api = _setup_mock_api(SAMPLE_BASES)
        connector._airtable_client = mock_api

        docs = _collect_docs(connector)
        docs_by_id = {d.id: d for d in docs}

        assert (
            docs_by_id["airtable__recA1"].semantic_identifier
            == "Base One > Table A: Alice"
        )
        assert (
            docs_by_id["airtable__recB1"].semantic_identifier
            == "Base One > Table B: Task 1"
        )
        assert (
            docs_by_id["airtable__recC1"].semantic_identifier
            == "Base Two > Table C: Widget"
        )

    @patch("time.sleep")
    def test_index_all_hierarchy_source_path(
        self,
        mock_sleep: MagicMock,  # noqa: ARG002
    ) -> None:
        """Verify doc_metadata hierarchy source_path is [base_name, table_name]."""
        connector = AirtableConnector()
        mock_api = _setup_mock_api(SAMPLE_BASES)
        connector._airtable_client = mock_api

        docs = _collect_docs(connector)
        docs_by_id = {d.id: d for d in docs}

        doc_a1 = docs_by_id["airtable__recA1"]
        assert doc_a1.doc_metadata is not None
        assert doc_a1.doc_metadata["hierarchy"]["source_path"] == [
            "Base One",
            "Table A",
        ]
        assert doc_a1.doc_metadata["hierarchy"]["base_name"] == "Base One"
        assert doc_a1.doc_metadata["hierarchy"]["table_name"] == "Table A"

        doc_c1 = docs_by_id["airtable__recC1"]
        assert doc_c1.doc_metadata is not None
        assert doc_c1.doc_metadata["hierarchy"]["source_path"] == [
            "Base Two",
            "Table C",
        ]

    @patch("time.sleep")
    def test_index_all_empty_account(
        self,
        mock_sleep: MagicMock,  # noqa: ARG002
    ) -> None:
        connector = AirtableConnector()
        mock_api = MagicMock()
        mock_api.bases.return_value = []
        connector._airtable_client = mock_api

        docs = _collect_docs(connector)
        assert len(docs) == 0

    @patch("time.sleep")
    def test_index_all_skips_failing_table(
        self,
        mock_sleep: MagicMock,  # noqa: ARG002
    ) -> None:
        """If one table fails, other tables should still be indexed."""
        bases = [
            {
                "id": "appBASE1",
                "name": "Base One",
                "tables": [
                    {
                        "id": "tblGOOD",
                        "name": "Good Table",
                        "primary_field_id": "fld1",
                        "fields": [
                            {"id": "fld1", "name": "Name", "type": "singleLineText"},
                        ],
                        "records": [
                            {"id": "recOK", "fields": {"Name": "Works"}},
                        ],
                    },
                    {
                        "id": "tblBAD",
                        "name": "Bad Table",
                        "primary_field_id": "fldX",
                        "fields": [],
                        "records": [],
                    },
                ],
            },
        ]
        mock_api = _setup_mock_api(bases)

        # Make the bad table raise an error when fetching records
        original_table_side_effect = mock_api.table.side_effect

        def table_with_failure(base_id: str, table_name_or_id: str) -> MagicMock:
            if table_name_or_id == "tblBAD":
                mock_table = MagicMock()
                mock_table.all.side_effect = Exception("API Error")
                mock_table.schema.side_effect = Exception("API Error")
                return mock_table
            return original_table_side_effect(base_id, table_name_or_id)

        mock_api.table.side_effect = table_with_failure
        connector = AirtableConnector()
        connector._airtable_client = mock_api

        docs = _collect_docs(connector)

        # Only the good table's records should come through
        assert len(docs) == 1
        assert docs[0].id == "airtable__recOK"

    @patch("time.sleep")
    def test_index_all_skips_failing_base(
        self,
        mock_sleep: MagicMock,  # noqa: ARG002
    ) -> None:
        """If listing tables for a base fails, other bases should still be indexed."""
        bases_data = [
            {
                "id": "appGOOD",
                "name": "Good Base",
                "tables": [
                    {
                        "id": "tblOK",
                        "name": "OK Table",
                        "primary_field_id": "fld1",
                        "fields": [
                            {"id": "fld1", "name": "Name", "type": "singleLineText"},
                        ],
                        "records": [
                            {"id": "recOK", "fields": {"Name": "Works"}},
                        ],
                    },
                ],
            },
        ]
        mock_api = _setup_mock_api(bases_data)

        # Add a bad base that fails on tables()
        bad_base_info = _make_base_info("appBAD", "Bad Base")
        mock_api.bases.return_value = [
            bad_base_info,
            *mock_api.bases.return_value,
        ]

        original_base_side_effect = mock_api.base.side_effect

        def base_with_failure(base_id: str) -> MagicMock:
            if base_id == "appBAD":
                mock_base = MagicMock()
                mock_base.tables.side_effect = Exception("Permission denied")
                return mock_base
            return original_base_side_effect(base_id)

        mock_api.base.side_effect = base_with_failure

        connector = AirtableConnector()
        connector._airtable_client = mock_api

        docs = _collect_docs(connector)

        assert len(docs) == 1
        assert docs[0].id == "airtable__recOK"


class TestSpecificTableMode:
    def test_specific_table_unchanged(self) -> None:
        """Verify the original single-table behavior still works."""
        bases = [
            {
                "id": "appBASE1",
                "name": "Base One",
                "tables": [
                    {
                        "id": "tblTABLE1",
                        "name": "Table A",
                        "primary_field_id": "fld1",
                        "fields": [
                            {"id": "fld1", "name": "Name", "type": "singleLineText"},
                            {"id": "fld2", "name": "Notes", "type": "multilineText"},
                        ],
                        "records": [
                            {
                                "id": "recA1",
                                "fields": {"Name": "Alice", "Notes": "Note"},
                            },
                        ],
                    },
                ],
            },
        ]
        mock_api = _setup_mock_api(bases)

        connector = AirtableConnector(
            base_id="appBASE1",
            table_name_or_id="tblTABLE1",
        )
        connector._airtable_client = mock_api

        docs = _collect_docs(connector)

        assert len(docs) == 1
        assert docs[0].id == "airtable__recA1"
        # No base name prefix in specific mode
        assert docs[0].semantic_identifier == "Table A: Alice"

    def test_specific_table_resolves_base_name_for_hierarchy(self) -> None:
        """In specific mode, bases() is called to resolve the base name for hierarchy."""
        bases = [
            {
                "id": "appBASE1",
                "name": "Base One",
                "tables": [
                    {
                        "id": "tblTABLE1",
                        "name": "Table A",
                        "primary_field_id": "fld1",
                        "fields": [
                            {"id": "fld1", "name": "Name", "type": "singleLineText"},
                        ],
                        "records": [
                            {"id": "recA1", "fields": {"Name": "Test"}},
                        ],
                    },
                ],
            },
        ]
        mock_api = _setup_mock_api(bases)

        connector = AirtableConnector(
            base_id="appBASE1",
            table_name_or_id="tblTABLE1",
        )
        connector._airtable_client = mock_api

        docs = _collect_docs(connector)

        # bases() is called to resolve the base name for hierarchy source_path
        mock_api.bases.assert_called_once()
        # But base().tables() should NOT be called (no discovery)
        mock_api.base.assert_not_called()
        # Semantic identifier should NOT include base name in specific mode
        assert docs[0].semantic_identifier == "Table A: Test"
        # Hierarchy should include base name for Craft file system
        assert docs[0].doc_metadata is not None
        assert docs[0].doc_metadata["hierarchy"]["source_path"] == [
            "Base One",
            "Table A",
        ]


class TestValidateConnectorSettings:
    def test_validate_index_all_success(self) -> None:
        connector = AirtableConnector()
        mock_api = _setup_mock_api(SAMPLE_BASES)
        connector._airtable_client = mock_api

        # Should not raise
        connector.validate_connector_settings()

    def test_validate_index_all_no_bases(self) -> None:
        connector = AirtableConnector()
        mock_api = MagicMock()
        mock_api.bases.return_value = []
        connector._airtable_client = mock_api

        with pytest.raises(ConnectorValidationError, match="No bases found"):
            connector.validate_connector_settings()

    def test_validate_specific_table_success(self) -> None:
        connector = AirtableConnector(
            base_id="appBASE1",
            table_name_or_id="tblTABLE1",
        )
        mock_api = _setup_mock_api(SAMPLE_BASES)
        connector._airtable_client = mock_api

        # Should not raise
        connector.validate_connector_settings()

    def test_validate_empty_fields_auto_detects_index_all(self) -> None:
        """Empty base_id + table_name_or_id auto-detects as index_all mode."""
        connector = AirtableConnector(
            base_id="",
            table_name_or_id="",
        )
        assert connector.index_all is True

        # Validation should go through the index_all path
        mock_api = _setup_mock_api(SAMPLE_BASES)
        connector._airtable_client = mock_api
        connector.validate_connector_settings()

    def test_validate_specific_table_api_error(self) -> None:
        connector = AirtableConnector(
            base_id="appBAD",
            table_name_or_id="tblBAD",
        )
        mock_api = MagicMock()
        mock_table = MagicMock()
        mock_table.schema.side_effect = Exception("Not found")
        mock_api.table.return_value = mock_table
        connector._airtable_client = mock_api

        with pytest.raises(ConnectorValidationError, match="Failed to access table"):
            connector.validate_connector_settings()


class TestParseAirtableUrl:
    def test_full_url_with_view(self) -> None:
        base_id, table_id, view_id = parse_airtable_url(
            "https://airtable.com/appZqBgQFQ6kWyeZK/tblc9prNLypy7olTV/viwa3yxZvqWnyXftm?blocks=hide"
        )
        assert base_id == "appZqBgQFQ6kWyeZK"
        assert table_id == "tblc9prNLypy7olTV"
        assert view_id == "viwa3yxZvqWnyXftm"

    def test_url_without_view(self) -> None:
        base_id, table_id, view_id = parse_airtable_url(
            "https://airtable.com/appZqBgQFQ6kWyeZK/tblc9prNLypy7olTV"
        )
        assert base_id == "appZqBgQFQ6kWyeZK"
        assert table_id == "tblc9prNLypy7olTV"
        assert view_id is None

    def test_url_without_query_params(self) -> None:
        base_id, table_id, view_id = parse_airtable_url(
            "https://airtable.com/appABC123/tblDEF456/viwGHI789"
        )
        assert base_id == "appABC123"
        assert table_id == "tblDEF456"
        assert view_id == "viwGHI789"

    def test_url_with_trailing_whitespace(self) -> None:
        base_id, table_id, view_id = parse_airtable_url(
            "  https://airtable.com/appABC123/tblDEF456  "
        )
        assert base_id == "appABC123"
        assert table_id == "tblDEF456"

    def test_invalid_url_raises(self) -> None:
        with pytest.raises(ValueError, match="Could not parse"):
            parse_airtable_url("https://google.com/something")

    def test_missing_table_raises(self) -> None:
        with pytest.raises(ValueError, match="Could not parse"):
            parse_airtable_url("https://airtable.com/appABC123")

    def test_empty_string_raises(self) -> None:
        with pytest.raises(ValueError, match="Could not parse"):
            parse_airtable_url("")


class TestAirtableUrlConnector:
    def test_url_sets_base_and_table_ids(self) -> None:
        connector = AirtableConnector(
            airtable_url="https://airtable.com/appZqBgQFQ6kWyeZK/tblc9prNLypy7olTV/viwa3yxZvqWnyXftm?blocks=hide"
        )
        assert connector.base_id == "appZqBgQFQ6kWyeZK"
        assert connector.table_name_or_id == "tblc9prNLypy7olTV"
        assert connector.view_id == "viwa3yxZvqWnyXftm"

    def test_url_without_view_leaves_view_none(self) -> None:
        connector = AirtableConnector(airtable_url="https://airtable.com/appABC/tblDEF")
        assert connector.base_id == "appABC"
        assert connector.table_name_or_id == "tblDEF"
        assert connector.view_id is None

    def test_url_overrides_explicit_base_and_table(self) -> None:
        connector = AirtableConnector(
            base_id="appOLD",
            table_name_or_id="tblOLD",
            airtable_url="https://airtable.com/appNEW/tblNEW",
        )
        assert connector.base_id == "appNEW"
        assert connector.table_name_or_id == "tblNEW"

    def test_url_indexes_correctly(self) -> None:
        """End-to-end: URL-configured connector fetches from the right table."""
        bases = [
            {
                "id": "appFromUrl",
                "name": "URL Base",
                "tables": [
                    {
                        "id": "tblFromUrl",
                        "name": "URL Table",
                        "primary_field_id": "fld1",
                        "fields": [
                            {"id": "fld1", "name": "Name", "type": "singleLineText"},
                        ],
                        "records": [
                            {"id": "recURL1", "fields": {"Name": "From URL"}},
                        ],
                    },
                ],
            },
        ]
        mock_api = _setup_mock_api(bases)

        connector = AirtableConnector(
            airtable_url="https://airtable.com/appFromUrl/tblFromUrl/viwABC"
        )
        connector._airtable_client = mock_api

        docs = _collect_docs(connector)

        assert len(docs) == 1
        assert docs[0].id == "airtable__recURL1"
        assert docs[0].semantic_identifier == "URL Table: From URL"


================================================
FILE: backend/tests/unit/onyx/connectors/asana/test_asana_connector.py
================================================
"""Tests for Asana connector configuration parsing."""

import pytest

from onyx.connectors.asana.connector import AsanaConnector


@pytest.mark.parametrize(
    "project_ids,expected",
    [
        (None, None),
        ("", None),
        ("   ", None),
        (" 123 ", ["123"]),
        (" 123 , , 456 , ", ["123", "456"]),
    ],
)
def test_asana_connector_project_ids_normalization(
    project_ids: str | None, expected: list[str] | None
) -> None:
    connector = AsanaConnector(
        asana_workspace_id=" 1153293530468850 ",
        asana_project_ids=project_ids,
        asana_team_id=" 1210918501948021 ",
    )

    assert connector.workspace_id == "1153293530468850"
    assert connector.project_ids_to_index == expected
    assert connector.asana_team_id == "1210918501948021"


@pytest.mark.parametrize(
    "team_id,expected",
    [
        (None, None),
        ("", None),
        ("   ", None),
        (" 1210918501948021 ", "1210918501948021"),
    ],
)
def test_asana_connector_team_id_normalization(
    team_id: str | None, expected: str | None
) -> None:
    connector = AsanaConnector(
        asana_workspace_id="1153293530468850",
        asana_project_ids=None,
        asana_team_id=team_id,
    )

    assert connector.asana_team_id == expected


================================================
FILE: backend/tests/unit/onyx/connectors/canvas/test_canvas_connector.py
================================================
"""Tests for Canvas connector — client, credentials, conversion."""

from datetime import datetime
from datetime import timezone
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.canvas.client import CanvasApiClient
from onyx.connectors.canvas.connector import CanvasConnector
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.error_handling.exceptions import OnyxError

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

FAKE_BASE_URL = "https://myschool.instructure.com"
FAKE_TOKEN = "fake-canvas-token"


def _mock_course(
    course_id: int = 1,
    name: str = "Intro to CS",
    course_code: str = "CS101",
) -> dict[str, Any]:
    return {
        "id": course_id,
        "name": name,
        "course_code": course_code,
        "created_at": "2025-01-01T00:00:00Z",
        "workflow_state": "available",
    }


def _build_connector(base_url: str = FAKE_BASE_URL) -> CanvasConnector:
    """Build a connector with mocked credential validation."""
    with patch("onyx.connectors.canvas.client.rl_requests") as mock_req:
        mock_req.get.return_value = _mock_response(json_data=[_mock_course()])
        connector = CanvasConnector(canvas_base_url=base_url)
        connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
    return connector


def _mock_page(
    page_id: int = 10,
    title: str = "Syllabus",
    updated_at: str = "2025-06-01T12:00:00Z",
) -> dict[str, Any]:
    return {
        "page_id": page_id,
        "url": "syllabus",
        "title": title,
        "body": "<p>Welcome to the course</p>",
        "created_at": "2025-01-15T00:00:00Z",
        "updated_at": updated_at,
    }


def _mock_assignment(
    assignment_id: int = 20,
    name: str = "Homework 1",
    course_id: int = 1,
    updated_at: str = "2025-06-01T12:00:00Z",
) -> dict[str, Any]:
    return {
        "id": assignment_id,
        "name": name,
        "description": "<p>Solve these problems</p>",
        "html_url": f"{FAKE_BASE_URL}/courses/{course_id}/assignments/{assignment_id}",
        "course_id": course_id,
        "created_at": "2025-01-20T00:00:00Z",
        "updated_at": updated_at,
        "due_at": "2025-02-01T23:59:00Z",
    }


def _mock_announcement(
    announcement_id: int = 30,
    title: str = "Class Cancelled",
    course_id: int = 1,
    posted_at: str = "2025-06-01T12:00:00Z",
) -> dict[str, Any]:
    return {
        "id": announcement_id,
        "title": title,
        "message": "<p>No class today</p>",
        "html_url": f"{FAKE_BASE_URL}/courses/{course_id}/discussion_topics/{announcement_id}",
        "posted_at": posted_at,
    }


def _mock_response(
    status_code: int = 200,
    json_data: Any = None,
    link_header: str = "",
) -> MagicMock:
    """Create a mock HTTP response with status, json, and Link header."""
    resp = MagicMock()
    resp.status_code = status_code
    resp.reason = "OK" if status_code < 300 else "Error"
    resp.json.return_value = json_data if json_data is not None else []
    resp.headers = {"Link": link_header}
    return resp


# ---------------------------------------------------------------------------
# CanvasApiClient.__init__ tests
# ---------------------------------------------------------------------------


class TestCanvasApiClientInit:
    def test_success(self) -> None:
        client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url=FAKE_BASE_URL,
        )

        expected_base_url = f"{FAKE_BASE_URL}/api/v1"
        expected_host = "myschool.instructure.com"

        assert client.base_url == expected_base_url
        assert client._expected_host == expected_host

    def test_normalizes_trailing_slash(self) -> None:
        client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url=f"{FAKE_BASE_URL}/",
        )

        expected_base_url = f"{FAKE_BASE_URL}/api/v1"

        assert client.base_url == expected_base_url

    def test_normalizes_existing_api_v1(self) -> None:
        client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url=f"{FAKE_BASE_URL}/api/v1",
        )

        expected_base_url = f"{FAKE_BASE_URL}/api/v1"

        assert client.base_url == expected_base_url

    def test_rejects_non_https_scheme(self) -> None:
        with pytest.raises(ValueError, match="must use https"):
            CanvasApiClient(
                bearer_token=FAKE_TOKEN,
                canvas_base_url="ftp://myschool.instructure.com",
            )

    def test_rejects_http(self) -> None:
        with pytest.raises(ValueError, match="must use https"):
            CanvasApiClient(
                bearer_token=FAKE_TOKEN,
                canvas_base_url="http://myschool.instructure.com",
            )

    def test_rejects_missing_host(self) -> None:
        with pytest.raises(ValueError, match="must include a valid host"):
            CanvasApiClient(
                bearer_token=FAKE_TOKEN,
                canvas_base_url="https://",
            )


# ---------------------------------------------------------------------------
# CanvasApiClient._build_url tests
# ---------------------------------------------------------------------------


class TestBuildUrl:
    def setup_method(self) -> None:
        self.client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url=FAKE_BASE_URL,
        )

    def test_appends_endpoint(self) -> None:
        result = self.client._build_url("courses")
        expected = f"{FAKE_BASE_URL}/api/v1/courses"

        assert result == expected

    def test_strips_leading_slash_from_endpoint(self) -> None:
        result = self.client._build_url("/courses")
        expected = f"{FAKE_BASE_URL}/api/v1/courses"

        assert result == expected


# ---------------------------------------------------------------------------
# CanvasApiClient._build_headers tests
# ---------------------------------------------------------------------------


class TestBuildHeaders:
    def setup_method(self) -> None:
        self.client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url=FAKE_BASE_URL,
        )

    def test_returns_bearer_auth(self) -> None:
        result = self.client._build_headers()
        expected = {"Authorization": f"Bearer {FAKE_TOKEN}"}

        assert result == expected


# ---------------------------------------------------------------------------
# CanvasApiClient.get tests
# ---------------------------------------------------------------------------


class TestGet:
    def setup_method(self) -> None:
        self.client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url=FAKE_BASE_URL,
        )

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_success_returns_json_and_next_url(self, mock_requests: MagicMock) -> None:
        next_link = f"<{FAKE_BASE_URL}/api/v1/courses?page=2>; " 'rel="next"'
        mock_requests.get.return_value = _mock_response(
            json_data=[{"id": 1}], link_header=next_link
        )

        data, next_url = self.client.get("courses")

        expected_data = [{"id": 1}]
        expected_next = f"{FAKE_BASE_URL}/api/v1/courses?page=2"

        assert data == expected_data
        assert next_url == expected_next

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_success_no_next_page(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[{"id": 1}])

        data, next_url = self.client.get("courses")

        assert data == [{"id": 1}]
        assert next_url is None

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_raises_on_error_status(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(403, {})

        with pytest.raises(OnyxError) as exc_info:
            self.client.get("courses")

        assert exc_info.value.status_code == 403

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_raises_on_404(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(404, {})

        with pytest.raises(OnyxError) as exc_info:
            self.client.get("courses")

        assert exc_info.value.status_code == 404

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_raises_on_429(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(429, {})

        with pytest.raises(OnyxError) as exc_info:
            self.client.get("courses")

        assert exc_info.value.status_code == 429

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_skips_params_when_using_full_url(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[])
        full = f"{FAKE_BASE_URL}/api/v1/courses?page=2"

        self.client.get(params={"per_page": "100"}, full_url=full)

        _, kwargs = mock_requests.get.call_args
        assert kwargs["params"] is None

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_error_extracts_message_from_error_dict(
        self, mock_requests: MagicMock
    ) -> None:
        """Shape 1: {"error": {"message": "Not authorized"}}"""
        mock_requests.get.return_value = _mock_response(
            403, {"error": {"message": "Not authorized"}}
        )

        with pytest.raises(OnyxError) as exc_info:
            self.client.get("courses")

        result = exc_info.value.detail
        expected = "Not authorized"

        assert result == expected

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_error_extracts_message_from_error_string(
        self, mock_requests: MagicMock
    ) -> None:
        """Shape 2: {"error": "Invalid access token"}"""
        mock_requests.get.return_value = _mock_response(
            401, {"error": "Invalid access token"}
        )

        with pytest.raises(OnyxError) as exc_info:
            self.client.get("courses")

        result = exc_info.value.detail
        expected = "Invalid access token"

        assert result == expected

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_error_extracts_message_from_errors_list(
        self, mock_requests: MagicMock
    ) -> None:
        """Shape 3: {"errors": [{"message": "Invalid query"}]}"""
        mock_requests.get.return_value = _mock_response(
            400, {"errors": [{"message": "Invalid query"}]}
        )

        with pytest.raises(OnyxError) as exc_info:
            self.client.get("courses")

        result = exc_info.value.detail
        expected = "Invalid query"

        assert result == expected

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_error_dict_takes_priority_over_errors_list(
        self, mock_requests: MagicMock
    ) -> None:
        """When both error shapes are present, error dict wins."""
        mock_requests.get.return_value = _mock_response(
            403, {"error": "Specific error", "errors": [{"message": "Generic"}]}
        )

        with pytest.raises(OnyxError) as exc_info:
            self.client.get("courses")

        result = exc_info.value.detail
        expected = "Specific error"

        assert result == expected

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_error_falls_back_to_reason_when_no_json_message(
        self, mock_requests: MagicMock
    ) -> None:
        """Empty error body falls back to response.reason."""
        mock_requests.get.return_value = _mock_response(500, {})

        with pytest.raises(OnyxError) as exc_info:
            self.client.get("courses")

        result = exc_info.value.detail
        expected = "Error"  # from _mock_response's reason for >= 300

        assert result == expected

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_invalid_json_on_success_raises(self, mock_requests: MagicMock) -> None:
        """Invalid JSON on a 2xx response raises OnyxError."""
        resp = MagicMock()
        resp.status_code = 200
        resp.json.side_effect = ValueError("No JSON")
        resp.headers = {"Link": ""}
        mock_requests.get.return_value = resp

        with pytest.raises(OnyxError, match="Invalid JSON"):
            self.client.get("courses")

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_invalid_json_on_error_falls_back_to_reason(
        self, mock_requests: MagicMock
    ) -> None:
        """Invalid JSON on a 4xx response falls back to response.reason."""
        resp = MagicMock()
        resp.status_code = 500
        resp.reason = "Internal Server Error"
        resp.json.side_effect = ValueError("No JSON")
        resp.headers = {"Link": ""}
        mock_requests.get.return_value = resp

        with pytest.raises(OnyxError) as exc_info:
            self.client.get("courses")

        result = exc_info.value.detail
        expected = "Internal Server Error"

        assert result == expected


# ---------------------------------------------------------------------------
# CanvasApiClient.paginate tests
# ---------------------------------------------------------------------------


class TestPaginate:
    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_single_page(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(
            json_data=[{"id": 1}, {"id": 2}]
        )
        client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url=FAKE_BASE_URL,
        )

        pages = list(client.paginate("courses"))

        assert len(pages) == 1
        assert pages[0] == [{"id": 1}, {"id": 2}]

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_two_pages(self, mock_requests: MagicMock) -> None:
        next_link = f'<{FAKE_BASE_URL}/api/v1/courses?page=2>; rel="next"'
        page1 = _mock_response(json_data=[{"id": 1}], link_header=next_link)
        page2 = _mock_response(json_data=[{"id": 2}])
        mock_requests.get.side_effect = [page1, page2]
        client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url=FAKE_BASE_URL,
        )

        pages = list(client.paginate("courses"))

        assert len(pages) == 2
        assert pages[0] == [{"id": 1}]
        assert pages[1] == [{"id": 2}]

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_empty_response(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[])
        client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url=FAKE_BASE_URL,
        )

        pages = list(client.paginate("courses"))

        assert pages == []


# ---------------------------------------------------------------------------
# CanvasApiClient._parse_next_link tests
# ---------------------------------------------------------------------------


class TestParseNextLink:
    def setup_method(self) -> None:
        self.client = CanvasApiClient(
            bearer_token=FAKE_TOKEN,
            canvas_base_url="https://canvas.example.com",
        )

    def test_found(self) -> None:
        header = '<https://canvas.example.com/api/v1/courses?page=2>; rel="next"'

        result = self.client._parse_next_link(header)
        expected = "https://canvas.example.com/api/v1/courses?page=2"

        assert result == expected

    def test_not_found(self) -> None:
        header = '<https://canvas.example.com/api/v1/courses?page=1>; rel="current"'

        result = self.client._parse_next_link(header)

        assert result is None

    def test_empty(self) -> None:
        result = self.client._parse_next_link("")

        assert result is None

    def test_multiple_rels(self) -> None:
        header = (
            '<https://canvas.example.com/api/v1/courses?page=1>; rel="current", '
            '<https://canvas.example.com/api/v1/courses?page=2>; rel="next"'
        )

        result = self.client._parse_next_link(header)
        expected = "https://canvas.example.com/api/v1/courses?page=2"

        assert result == expected

    def test_rejects_host_mismatch(self) -> None:
        header = '<https://evil.example.com/api/v1/courses?page=2>; rel="next"'

        with pytest.raises(OnyxError, match="unexpected host"):
            self.client._parse_next_link(header)

    def test_rejects_non_https_link(self) -> None:
        header = '<http://canvas.example.com/api/v1/courses?page=2>; rel="next"'

        with pytest.raises(OnyxError, match="must use https"):
            self.client._parse_next_link(header)


# ---------------------------------------------------------------------------
# CanvasConnector — credential loading
# ---------------------------------------------------------------------------


class TestLoadCredentials:
    def _assert_load_credentials_raises(
        self,
        status_code: int,
        expected_error: type[Exception],
        mock_requests: MagicMock,
    ) -> None:
        """Helper: assert load_credentials raises expected_error for a given status."""
        mock_requests.get.return_value = _mock_response(status_code, {})
        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
        with pytest.raises(expected_error):
            connector.load_credentials({"canvas_access_token": FAKE_TOKEN})

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_load_credentials_success(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[_mock_course()])
        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)

        result = connector.load_credentials({"canvas_access_token": FAKE_TOKEN})

        assert result is None
        assert connector._canvas_client is not None

    def test_canvas_client_raises_without_credentials(self) -> None:
        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)

        with pytest.raises(ConnectorMissingCredentialError):
            _ = connector.canvas_client

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_load_credentials_invalid_token(self, mock_requests: MagicMock) -> None:
        self._assert_load_credentials_raises(401, CredentialExpiredError, mock_requests)

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_load_credentials_insufficient_permissions(
        self, mock_requests: MagicMock
    ) -> None:
        self._assert_load_credentials_raises(
            403, InsufficientPermissionsError, mock_requests
        )


# ---------------------------------------------------------------------------
# CanvasConnector — URL normalization
# ---------------------------------------------------------------------------


class TestConnectorUrlNormalization:
    def test_strips_api_v1_suffix(self) -> None:
        connector = _build_connector(base_url=f"{FAKE_BASE_URL}/api/v1")

        result = connector.canvas_base_url
        expected = FAKE_BASE_URL

        assert result == expected

    def test_strips_trailing_slash(self) -> None:
        connector = _build_connector(base_url=f"{FAKE_BASE_URL}/")

        result = connector.canvas_base_url
        expected = FAKE_BASE_URL

        assert result == expected

    def test_no_change_for_clean_url(self) -> None:
        connector = _build_connector(base_url=FAKE_BASE_URL)

        result = connector.canvas_base_url
        expected = FAKE_BASE_URL

        assert result == expected


# ---------------------------------------------------------------------------
# CanvasConnector — document conversion
# ---------------------------------------------------------------------------


class TestDocumentConversion:
    def setup_method(self) -> None:
        self.connector = _build_connector()

    def test_convert_page_to_document(self) -> None:
        from onyx.connectors.canvas.connector import CanvasPage

        page = CanvasPage(
            page_id=10,
            url="syllabus",
            title="Syllabus",
            body="<p>Welcome</p>",
            created_at="2025-01-15T00:00:00Z",
            updated_at="2025-06-01T12:00:00Z",
            course_id=1,
        )

        doc = self.connector._convert_page_to_document(page)

        expected_id = "canvas-page-1-10"
        expected_metadata = {"course_id": "1", "type": "page"}
        expected_updated_at = datetime(2025, 6, 1, 12, 0, tzinfo=timezone.utc)

        assert doc.id == expected_id
        assert doc.source == DocumentSource.CANVAS
        assert doc.semantic_identifier == "Syllabus"
        assert doc.metadata == expected_metadata
        assert doc.sections[0].link is not None
        assert f"{FAKE_BASE_URL}/courses/1/pages/syllabus" in doc.sections[0].link
        assert doc.doc_updated_at == expected_updated_at

    def test_convert_page_without_body(self) -> None:
        from onyx.connectors.canvas.connector import CanvasPage

        page = CanvasPage(
            page_id=11,
            url="empty-page",
            title="Empty Page",
            body=None,
            created_at="2025-01-15T00:00:00Z",
            updated_at="2025-06-01T12:00:00Z",
            course_id=1,
        )

        doc = self.connector._convert_page_to_document(page)
        section_text = doc.sections[0].text
        assert section_text is not None

        assert "Empty Page" in section_text
        assert "<p>" not in section_text

    def test_convert_assignment_to_document(self) -> None:
        from onyx.connectors.canvas.connector import CanvasAssignment

        assignment = CanvasAssignment(
            id=20,
            name="Homework 1",
            description="<p>Solve these</p>",
            html_url=f"{FAKE_BASE_URL}/courses/1/assignments/20",
            course_id=1,
            created_at="2025-01-20T00:00:00Z",
            updated_at="2025-06-01T12:00:00Z",
            due_at="2025-02-01T23:59:00Z",
        )

        doc = self.connector._convert_assignment_to_document(assignment)

        expected_id = "canvas-assignment-1-20"
        expected_due_text = "Due: February 01, 2025 23:59 UTC"

        assert doc.id == expected_id
        assert doc.source == DocumentSource.CANVAS
        assert doc.semantic_identifier == "Homework 1"
        assert doc.sections[0].text is not None
        assert expected_due_text in doc.sections[0].text

    def test_convert_assignment_without_description(self) -> None:
        from onyx.connectors.canvas.connector import CanvasAssignment

        assignment = CanvasAssignment(
            id=21,
            name="Quiz 1",
            description=None,
            html_url=f"{FAKE_BASE_URL}/courses/1/assignments/21",
            course_id=1,
            created_at="2025-01-20T00:00:00Z",
            updated_at="2025-06-01T12:00:00Z",
            due_at=None,
        )

        doc = self.connector._convert_assignment_to_document(assignment)
        section_text = doc.sections[0].text
        assert section_text is not None

        assert "Quiz 1" in section_text
        assert "Due:" not in section_text

    def test_convert_announcement_to_document(self) -> None:
        from onyx.connectors.canvas.connector import CanvasAnnouncement

        announcement = CanvasAnnouncement(
            id=30,
            title="Class Cancelled",
            message="<p>No class today</p>",
            html_url=f"{FAKE_BASE_URL}/courses/1/discussion_topics/30",
            posted_at="2025-06-01T12:00:00Z",
            course_id=1,
        )

        doc = self.connector._convert_announcement_to_document(announcement)

        expected_id = "canvas-announcement-1-30"
        expected_updated_at = datetime(2025, 6, 1, 12, 0, tzinfo=timezone.utc)

        assert doc.id == expected_id
        assert doc.source == DocumentSource.CANVAS
        assert doc.semantic_identifier == "Class Cancelled"
        assert doc.doc_updated_at == expected_updated_at

    def test_convert_announcement_without_posted_at(self) -> None:
        from onyx.connectors.canvas.connector import CanvasAnnouncement

        announcement = CanvasAnnouncement(
            id=31,
            title="TBD Announcement",
            message=None,
            html_url=f"{FAKE_BASE_URL}/courses/1/discussion_topics/31",
            posted_at=None,
            course_id=1,
        )

        doc = self.connector._convert_announcement_to_document(announcement)

        assert doc.doc_updated_at is None


# ---------------------------------------------------------------------------
# CanvasConnector — validate_connector_settings
# ---------------------------------------------------------------------------


class TestValidateConnectorSettings:
    def _assert_validate_raises(
        self,
        status_code: int,
        expected_error: type[Exception],
        mock_requests: MagicMock,
    ) -> None:
        """Helper: assert validate_connector_settings raises expected_error."""
        success_resp = _mock_response(json_data=[_mock_course()])
        fail_resp = _mock_response(status_code, {})
        mock_requests.get.side_effect = [success_resp, fail_resp]
        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
        connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
        with pytest.raises(expected_error):
            connector.validate_connector_settings()

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_validate_success(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[_mock_course()])
        connector = _build_connector()

        connector.validate_connector_settings()  # should not raise

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_validate_expired_credential(self, mock_requests: MagicMock) -> None:
        self._assert_validate_raises(401, CredentialExpiredError, mock_requests)

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_validate_insufficient_permissions(self, mock_requests: MagicMock) -> None:
        self._assert_validate_raises(403, InsufficientPermissionsError, mock_requests)

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_validate_rate_limited(self, mock_requests: MagicMock) -> None:
        self._assert_validate_raises(429, ConnectorValidationError, mock_requests)

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_validate_unexpected_error(self, mock_requests: MagicMock) -> None:
        self._assert_validate_raises(500, UnexpectedValidationError, mock_requests)


# ---------------------------------------------------------------------------
# _list_* pagination tests
# ---------------------------------------------------------------------------


class TestListCourses:
    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_single_page(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(
            json_data=[_mock_course(1), _mock_course(2, "CS201", "Data Structures")]
        )
        connector = _build_connector()

        result = connector._list_courses()

        assert len(result) == 2
        assert result[0].id == 1
        assert result[1].id == 2

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_empty_response(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[])
        connector = _build_connector()

        result = connector._list_courses()

        assert result == []


class TestListPages:
    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_single_page(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(
            json_data=[_mock_page(10), _mock_page(11, "Notes")]
        )
        connector = _build_connector()

        result = connector._list_pages(course_id=1)

        assert len(result) == 2
        assert result[0].page_id == 10
        assert result[1].page_id == 11

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_empty_response(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[])
        connector = _build_connector()

        result = connector._list_pages(course_id=1)

        assert result == []


class TestListAssignments:
    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_single_page(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(
            json_data=[_mock_assignment(20), _mock_assignment(21, "Quiz 1")]
        )
        connector = _build_connector()

        result = connector._list_assignments(course_id=1)

        assert len(result) == 2
        assert result[0].id == 20
        assert result[1].id == 21

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_empty_response(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[])
        connector = _build_connector()

        result = connector._list_assignments(course_id=1)

        assert result == []


class TestListAnnouncements:
    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_single_page(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(
            json_data=[_mock_announcement(30), _mock_announcement(31, "Update")]
        )
        connector = _build_connector()

        result = connector._list_announcements(course_id=1)

        assert len(result) == 2
        assert result[0].id == 30
        assert result[1].id == 31

    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_empty_response(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[])
        connector = _build_connector()

        result = connector._list_announcements(course_id=1)

        assert result == []


================================================
FILE: backend/tests/unit/onyx/connectors/confluence/test_confluence_checkpointing.py
================================================
import time
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from requests.exceptions import HTTPError

from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.connector import ConfluenceCheckpoint
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import SlimDocument
from tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector
from tests.unit.onyx.connectors.utils import (
    load_everything_from_checkpoint_connector_from_checkpoint,
)

PAGE_SIZE = 2


@pytest.fixture
def confluence_base_url() -> str:
    return "https://example.atlassian.net/wiki"


@pytest.fixture
def space_key() -> str:
    return "TEST"


@pytest.fixture
def mock_confluence_client() -> OnyxConfluence:
    """Create a mock Confluence client with proper typing"""
    # Server mode just Also updates the start value
    return OnyxConfluence(
        is_cloud=False, url="test", credentials_provider=MagicMock(), timeout=None
    )


@pytest.fixture
def confluence_connector(
    confluence_base_url: str, space_key: str, mock_confluence_client: OnyxConfluence
) -> Generator[ConfluenceConnector, None, None]:
    """Create a Confluence connector with a mock client"""
    # NOTE: we test with is_cloud=False for all tests, which is generally fine because the behavior
    # for the two versions is "close enough". If cloud-specific behavior is added, we can parametrize
    # the connector and client fixtures to allow either.
    connector = ConfluenceConnector(
        wiki_base=confluence_base_url,
        space=space_key,
        is_cloud=False,
        labels_to_skip=["secret", "sensitive"],
        timezone_offset=0.0,
        batch_size=2,
    )
    # Initialize the client directly
    connector._confluence_client = mock_confluence_client
    connector._low_timeout_confluence_client = mock_confluence_client
    with patch("onyx.connectors.confluence.connector._SLIM_DOC_BATCH_SIZE", 2):
        yield connector


@pytest.fixture
def create_mock_page() -> Callable[..., dict[str, Any]]:
    def _create_mock_page(
        id: str = "123",
        title: str = "Test Page",
        updated: str = "2023-01-01T12:00:00.000+0000",
        content: str = "Test Content",
        labels: list[str] | None = None,
    ) -> dict[str, Any]:
        """Helper to create a mock Confluence page object"""
        return {
            "id": id,
            "title": title,
            "version": {"when": updated},
            "history": {"lastUpdated": {"when": updated}},
            "body": {"storage": {"value": content}},
            "metadata": {
                "labels": {"results": [{"name": label} for label in (labels or [])]}
            },
            "space": {"key": "TEST"},
            "_links": {"webui": f"/spaces/TEST/pages/{id}"},
        }

    return _create_mock_page


def test_get_cql_query_with_space(confluence_connector: ConfluenceConnector) -> None:
    """Test CQL query generation with space specified"""
    start = datetime(2023, 1, 1, tzinfo=timezone.utc).timestamp()
    end = datetime(2023, 1, 2, tzinfo=timezone.utc).timestamp()

    query = confluence_connector._construct_page_cql_query(start, end)

    # Check that the space part and time part are both in the query
    assert f"space='{confluence_connector.space}'" in query
    assert "lastmodified >= '2023-01-01 00:00'" in query
    assert "lastmodified <= '2023-01-02 00:00'" in query
    assert " and " in query.lower()


def test_get_cql_query_without_space(confluence_base_url: str) -> None:
    """Test CQL query generation without space specified"""
    # Create connector without space key
    connector = ConfluenceConnector(wiki_base=confluence_base_url, is_cloud=True)

    start = datetime(2023, 1, 1, tzinfo=connector.timezone).timestamp()
    end = datetime(2023, 1, 2, tzinfo=connector.timezone).timestamp()

    query = connector._construct_page_cql_query(start, end)

    # Check that only time part is in the query
    assert "space=" not in query
    assert "lastmodified >= '2023-01-01 00:00'" in query
    assert "lastmodified <= '2023-01-02 00:00'" in query


def test_load_from_checkpoint_happy_path(
    confluence_connector: ConfluenceConnector,
    create_mock_page: Callable[..., dict[str, Any]],
) -> None:
    """Test loading from checkpoint - happy path"""
    # Set up mocked pages
    first_updated = datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)
    last_updated = datetime(2023, 1, 3, 12, 0, tzinfo=timezone.utc)
    mock_page1 = create_mock_page(
        id="1", title="Page 1", updated=first_updated.isoformat()
    )
    mock_page2 = create_mock_page(
        id="2", title="Page 2", updated=first_updated.isoformat()
    )
    mock_page3 = create_mock_page(
        id="3", title="Page 3", updated=last_updated.isoformat()
    )

    # Mock paginated_cql_retrieval to return our mock pages
    confluence_client = confluence_connector._confluence_client
    assert confluence_client is not None, "bad test setup"

    # Mock space retrieval for hierarchy nodes (called at start of first batch)
    confluence_client.retrieve_confluence_spaces = MagicMock(  # type: ignore
        return_value=iter([{"key": "TEST", "name": "Test Space"}])
    )

    get_mock = MagicMock()
    confluence_client.get = get_mock  # type: ignore
    get_mock.side_effect = [
        # First page response
        MagicMock(
            json=lambda: {
                "results": [mock_page1, mock_page2],
                "_links": {"next": "rest/api/content/search?cql=type=page&start=2"},
            }
        ),
        # links and attachemnts responses
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
        # next actual page response
        MagicMock(json=lambda: {"results": [mock_page3]}),
        # more links and attachment responses
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
    ]

    # Call load_from_checkpoint
    end_time = time.time()
    outputs = load_everything_from_checkpoint_connector(
        confluence_connector, 0, end_time
    )

    # Check that the documents were returned (hierarchy nodes are filtered out by the test utility)
    assert len(outputs) == 2

    checkpoint_output1 = outputs[0]
    assert len(checkpoint_output1.items) == 2
    document1 = checkpoint_output1.items[0]
    assert isinstance(document1, Document)
    assert document1.id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/1"
    document2 = checkpoint_output1.items[1]
    assert isinstance(document2, Document)
    assert document2.id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/2"
    assert checkpoint_output1.next_checkpoint == ConfluenceCheckpoint(
        has_more=True, next_page_url="rest/api/content/search?cql=type%3Dpage&start=2"
    )

    checkpoint_output2 = outputs[1]
    assert len(checkpoint_output2.items) == 1
    document3 = checkpoint_output2.items[0]
    assert isinstance(document3, Document)
    assert document3.id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/3"
    assert not checkpoint_output2.next_checkpoint.has_more


def test_load_from_checkpoint_with_page_processing_error(
    confluence_connector: ConfluenceConnector,
    create_mock_page: Callable[..., dict[str, Any]],
) -> None:
    """Test loading from checkpoint with a mix of successful and failed page processing"""
    # Set up mocked pages
    mock_page1 = create_mock_page(id="1", title="Page 1")
    mock_page2 = create_mock_page(id="2", title="Page 2")

    # Mock paginated_cql_retrieval to return our mock pages
    confluence_client = confluence_connector._confluence_client
    assert confluence_client is not None, "bad test setup"

    # Mock space retrieval for hierarchy nodes (called at start of first batch)
    confluence_client.retrieve_confluence_spaces = MagicMock(  # type: ignore
        return_value=iter([{"key": "TEST", "name": "Test Space"}])
    )

    get_mock = MagicMock()
    confluence_client.get = get_mock  # type: ignore
    get_mock.side_effect = [
        # First page response
        MagicMock(
            json=lambda: {
                "results": [mock_page1, mock_page2],
                "_links": {"next": "rest/api/content/search?cql=type=page&start=2"},
            }
        ),
        # Comments for page 1
        MagicMock(json=lambda: {"results": []}),
        # Attachments for page 1
        MagicMock(json=lambda: {"results": []}),
        # Comments for page 2
        MagicMock(json=lambda: {"results": []}),
        # Attachments for page 2
        MagicMock(json=lambda: {"results": []}),
        # Second page response (empty)
        MagicMock(
            json=lambda: {
                "results": [],
                "_links": {},
            }
        ),
    ]

    # Mock _convert_page_to_document to fail for the second page
    def mock_convert_side_effect(page: dict[str, Any]) -> Document | ConnectorFailure:
        if page["id"] == "1":
            return Document(
                id=f"{confluence_connector.wiki_base}/spaces/TEST/pages/1",
                sections=[],
                source=DocumentSource.CONFLUENCE,
                semantic_identifier="Page 1",
                metadata={},
            )
        else:
            return ConnectorFailure(
                failed_document=DocumentFailure(
                    document_id=page["id"],
                    document_link=f"{confluence_connector.wiki_base}/spaces/TEST/pages/{page['id']}",
                ),
                failure_message="Failed to process Confluence page",
                exception=Exception("Test error"),
            )

    with patch(
        "onyx.connectors.confluence.connector.ConfluenceConnector._convert_page_to_document",
        side_effect=mock_convert_side_effect,
    ):
        # Call load_from_checkpoint
        end_time = time.time()
        outputs = load_everything_from_checkpoint_connector(
            confluence_connector, 0, end_time
        )

        # Hierarchy nodes are filtered out by test utility
        assert len(outputs) == 1
        checkpoint_output = outputs[0]
        assert len(checkpoint_output.items) == 2

        # First item should be successful
        assert isinstance(checkpoint_output.items[0], Document)
        assert (
            checkpoint_output.items[0].id
            == f"{confluence_connector.wiki_base}/spaces/TEST/pages/1"
        )

        # Second item should be a failure
        assert isinstance(checkpoint_output.items[1], ConnectorFailure)
        assert (
            "Failed to process Confluence page"
            in checkpoint_output.items[1].failure_message
        )


def test_retrieve_all_slim_docs_perm_sync(
    confluence_connector: ConfluenceConnector,
    create_mock_page: Callable[..., dict[str, Any]],
) -> None:
    """Test retrieving all slim documents including hierarchy nodes"""
    # Set up mocked pages
    mock_page1 = create_mock_page(id="1")
    mock_page2 = create_mock_page(id="2")

    # Mock paginated_cql_retrieval to return our mock pages
    confluence_client = confluence_connector._confluence_client
    assert confluence_client is not None, "bad test setup"

    # Mock space retrieval for hierarchy nodes
    confluence_client.retrieve_confluence_spaces = MagicMock(  # type: ignore
        return_value=iter([{"key": "TEST", "name": "Test Space"}])
    )

    get_mock = MagicMock()
    confluence_client.get = get_mock  # type: ignore
    get_mock.side_effect = [
        # First page response
        MagicMock(
            json=lambda: {
                "results": [mock_page1, mock_page2],
                "_links": {"next": "rest/api/content/search?cql=type=page&start=2"},
            }
        ),
        # attachments for page 1
        MagicMock(json=lambda: {"results": []}),
        # attachments for page 2
        MagicMock(json=lambda: {"results": []}),
        # next page of CQL results (empty)
        MagicMock(json=lambda: {"results": []}),
    ]

    # Call retrieve_all_slim_docs_perm_sync
    batches = list(confluence_connector.retrieve_all_slim_docs_perm_sync(0, 100))
    assert get_mock.call_count == 4

    # With batch size of 2, we get:
    # Batch 1: [HierarchyNode(space), SlimDocument(page1)]
    # Batch 2: [SlimDocument(page2)]
    assert len(batches) == 2

    assert len(batches[0]) == 2
    assert isinstance(batches[0][0], HierarchyNode)
    assert batches[0][0].raw_node_id == "TEST"
    assert isinstance(batches[0][1], SlimDocument)
    assert batches[0][1].id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/1"

    assert len(batches[1]) == 1
    assert isinstance(batches[1][0], SlimDocument)
    assert batches[1][0].id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/2"


@pytest.mark.parametrize(
    "status_code,expected_exception,expected_message",
    [
        (
            401,
            CredentialExpiredError,
            "Invalid or expired Confluence credentials",
        ),
        (
            403,
            InsufficientPermissionsError,
            "Insufficient permissions to access Confluence resources",
        ),
        (404, UnexpectedValidationError, "Unexpected Confluence error"),
    ],
)
def test_validate_connector_settings_errors(
    confluence_connector: ConfluenceConnector,
    status_code: int,
    expected_exception: type[Exception],
    expected_message: str,
) -> None:
    """Test validation with various error scenarios"""
    error = HTTPError(response=MagicMock(status_code=status_code))

    with patch(
        "onyx.connectors.confluence.onyx_confluence.OnyxConfluence.retrieve_confluence_spaces"
    ) as mock_retrieve:
        mock_retrieve.side_effect = error

        with pytest.raises(expected_exception) as excinfo:
            confluence_connector.validate_connector_settings()
        assert expected_message in str(excinfo.value)


def test_validate_connector_settings_success(
    confluence_connector: ConfluenceConnector,
) -> None:
    """Test successful validation"""
    low_client = confluence_connector.low_timeout_confluence_client
    with (
        patch.object(
            low_client,
            "retrieve_confluence_spaces",
            return_value=iter([{"key": "TEST"}]),
        ) as mock_retrieve,
        patch.object(
            low_client,
            "get_space",
            return_value={"key": "TEST"},
            create=True,
        ) as mock_get_space,
    ):
        confluence_connector.validate_connector_settings()
        mock_retrieve.assert_called_once()
        mock_get_space.assert_called_once_with(confluence_connector.space)


def test_checkpoint_progress(
    confluence_connector: ConfluenceConnector,
    create_mock_page: Callable[..., dict[str, Any]],
) -> None:
    """Test that the checkpoint's last_updated field is properly updated after processing pages
    and that processed document IDs are stored to avoid reprocessing."""
    # Set up mocked pages with different timestamps
    earlier_timestamp = datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)
    later_timestamp = datetime(2023, 1, 2, 12, 0, tzinfo=timezone.utc)
    latest_timestamp = datetime(2024, 1, 2, 12, 0, tzinfo=timezone.utc)
    mock_page1 = create_mock_page(
        id="1", title="Page 1", updated=earlier_timestamp.isoformat()
    )
    mock_page2 = create_mock_page(
        id="2", title="Page 2", updated=later_timestamp.isoformat()
    )
    mock_page3 = create_mock_page(
        id="3", title="Page 3", updated=latest_timestamp.isoformat()
    )

    # Mock paginated_cql_retrieval to return our mock pages
    confluence_client = confluence_connector._confluence_client
    assert confluence_client is not None, "bad test setup"

    # Mock space retrieval for hierarchy nodes (called at start of first batch)
    confluence_client.retrieve_confluence_spaces = MagicMock(  # type: ignore
        return_value=iter([{"key": "TEST", "name": "Test Space"}])
    )

    get_mock = MagicMock()
    confluence_client.get = get_mock  # type: ignore
    get_mock.side_effect = [
        # First page response
        MagicMock(
            json=lambda: {
                "results": [mock_page1, mock_page2],
                "_links": {"next": "rest/api/content/search?cql=type=page&start=2"},
            }
        ),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
    ]

    # First run - process both pages
    end_time = datetime(2023, 1, 3, tzinfo=timezone.utc).timestamp()
    outputs = load_everything_from_checkpoint_connector(
        confluence_connector, 0, end_time
    )

    # Hierarchy nodes are filtered out by test utility
    first_checkpoint = outputs[0].next_checkpoint

    assert (
        first_checkpoint.next_page_url
        == "rest/api/content/search?cql=type%3Dpage&start=2"
    )
    assert not outputs[-1].next_checkpoint.has_more

    assert len(outputs[0].items) == 2
    assert isinstance(outputs[0].items[0], Document)
    assert outputs[0].items[0].semantic_identifier == "Page 1"
    assert isinstance(outputs[0].items[1], Document)
    assert outputs[0].items[1].semantic_identifier == "Page 2"

    # Second run - same time range but with checkpoint from first run
    # Reset the mock to return the same pages
    get_mock.side_effect = [
        # First page response
        MagicMock(
            json=lambda: {
                "results": [mock_page3],
                "_links": {"next": "rest/api/content/search?cql=type=page&start=3"},
            }
        ),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
        MagicMock(json=lambda: {"results": []}),
    ]

    # Use the checkpoint from first run
    first_checkpoint.has_more = True
    outputs_with_checkpoint = load_everything_from_checkpoint_connector_from_checkpoint(
        confluence_connector, 0, end_time, first_checkpoint
    )

    # Verify only the new page was processed since the others were in last_seen_doc_ids
    assert len(outputs_with_checkpoint) == 2
    assert len(outputs_with_checkpoint[0].items) == 1
    assert isinstance(outputs_with_checkpoint[0].items[0], Document)
    assert outputs_with_checkpoint[0].items[0].semantic_identifier == "Page 3"
    assert not outputs_with_checkpoint[-1].next_checkpoint.has_more


================================================
FILE: backend/tests/unit/onyx/connectors/confluence/test_onyx_confluence.py
================================================
import copy
from typing import Any
from unittest import mock

import pytest
import requests
from requests import HTTPError

from onyx.connectors.confluence.onyx_confluence import (
    _DEFAULT_PAGINATION_LIMIT,
)
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.connectors.interfaces import CredentialsProviderInterface


# Helper to create mock responses
def _create_mock_response(
    status_code: int,
    json_data: dict[str, Any] | None = None,
    url: str = "",
) -> requests.Response:
    response = requests.Response()
    response.status_code = status_code
    response.url = url
    if json_data is not None:
        response.json = mock.Mock(return_value=json_data)  # type: ignore
    if status_code >= 400:
        response.reason = "Mock Error"
    return response


# Helper to create HTTPError
def _create_http_error(
    status_code: int,
    json_data: dict[str, Any] | None = None,
    url: str = "",
) -> requests.Response:
    response = _create_mock_response(status_code, json_data, url)
    response.raise_for_status = mock.Mock(side_effect=HTTPError(response=response))  # type: ignore
    return response


@pytest.fixture
def mock_credentials_provider() -> mock.Mock:
    provider = mock.Mock(spec=CredentialsProviderInterface)
    provider.is_dynamic.return_value = False
    provider.get_credentials.return_value = {"confluence_access_token": "dummy_token"}
    provider.get_tenant_id.return_value = "test_tenant"
    provider.get_provider_key.return_value = "test_key"
    provider.__enter__ = mock.Mock(return_value=None)
    provider.__exit__ = mock.Mock(return_value=None)
    return provider


@pytest.fixture
def confluence_server_client(mock_credentials_provider: mock.Mock) -> OnyxConfluence:
    confluence = OnyxConfluence(
        is_cloud=False,
        url="http://fake-confluence.com",
        credentials_provider=mock_credentials_provider,
        timeout=10,
    )
    # Mock the internal client directly for controlling 'get'
    # We also mock the base URL used by the client internally for easier comparison
    mock_internal_client = mock.Mock()
    mock_internal_client.url = confluence._url
    confluence._confluence = mock_internal_client
    confluence._kwargs = (
        confluence.shared_base_kwargs
    )  # Ensure _kwargs is set for potential re-init
    return confluence


def test_cql_paginate_all_expansions_handles_internal_pagination_error(
    confluence_server_client: OnyxConfluence, caplog: pytest.LogCaptureFixture
) -> None:
    """
    Tests that cql_paginate_all_expansions correctly handles HTTP 500 errors
    during the expansion pagination phase (_paginate_url internal logic),
    retrying with smaller limits down to 1. It simulates successes and failures
    at limit=1 and expects the final error to be raised.

    Specifically, this test:

    1. Calls the top level cql query and gets a response with 3 children.
    2. Calls the expansion for the first child and gets a response with 2 children across 2 pages.
    3. Tries to call the expansion for the second child, gets a 500 error, and retries
       down to the limit of 1.
    4. At limit=1, simulates the following sequence for page requests:
       - Page 1 (start=0): Success
       - Page 2 (start=1): Success
       - Page 3 (start=2): Failure (500)
       - Page 4 (start=3): Failure (500) <- This is the error that should be raised
    5. Calls the expansion for the third child and gets a response with 1 child.
    6. The overall call succeeds.
    """
    caplog.set_level("WARNING")  # To check logging messages

    # Use constants from the client instance, but note the test logic goes below MINIMUM
    _TEST_MINIMUM_LIMIT = 1  # The limit this test expects the retry to reach

    top_level_cql = "test_cql"
    top_level_expand = "child_items"
    base_top_level_path = (
        f"rest/api/content/search?cql={top_level_cql}&expand={top_level_expand}"
    )
    initial_top_level_path = f"{base_top_level_path}&limit={_DEFAULT_PAGINATION_LIMIT}"

    # --- Mock Responses ---
    top_level_raw_response = {
        "results": [
            {
                "id": 1,
                "child_items": {
                    "results": [],  # Populated by _traverse_and_update
                    "_links": {
                        "next": f"/rest/api/content/1/child?limit={_DEFAULT_PAGINATION_LIMIT}"
                    },
                    "size": 0,
                },
            },
            {
                "id": 2,
                "child_items": {
                    "results": [],
                    "_links": {
                        "next": f"/rest/api/content/2/child?limit={_DEFAULT_PAGINATION_LIMIT}"
                    },
                    "size": 0,
                },
            },
            {
                "id": 3,
                "child_items": {
                    "results": [],
                    "_links": {
                        "next": f"/rest/api/content/3/child?limit={_DEFAULT_PAGINATION_LIMIT}"
                    },
                    "size": 0,
                },
            },
        ],
        "_links": {},
        "size": 3,
    }
    top_level_response = _create_mock_response(
        200,
        top_level_raw_response,
        url=initial_top_level_path,
    )

    # Expansion 1 - Needs 2 pages
    exp1_page1_path = f"rest/api/content/1/child?limit={_DEFAULT_PAGINATION_LIMIT}"
    # Note: _paginate_url internally calculates start for the next page
    exp1_page2_path = (
        f"rest/api/content/1/child?start=1&limit={_DEFAULT_PAGINATION_LIMIT}"
    )
    exp1_page1_response = _create_mock_response(
        200,
        {
            "results": [{"child_id": 101}],
            "_links": {"next": f"/{exp1_page2_path}"},
            "size": 1,
        },
        url=exp1_page1_path,
    )
    exp1_page2_response = _create_mock_response(
        200,
        {"results": [{"child_id": 102}], "_links": {}, "size": 1},
        url=exp1_page2_path,
    )

    # Problematic Expansion 2 URLs and Errors during limit reduction
    exp2_base_path = "rest/api/content/2/child"
    exp2_reduction_errors = {}
    limit = _DEFAULT_PAGINATION_LIMIT
    while limit > _TEST_MINIMUM_LIMIT:  # Reduce all the way to 1 for the test
        path = f"{exp2_base_path}?limit={limit}"
        exp2_reduction_errors[path] = _create_http_error(500, url=path)
        new_limit = limit // 2
        limit = max(new_limit, _TEST_MINIMUM_LIMIT)  # Ensure it hits 1

    # Expansion 2 - Pagination at Limit = 1 (2 successes, 2 failures)
    exp2_limit1_page1_path = f"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=0"
    exp2_limit1_page2_path = f"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=1"
    exp2_limit1_page3_path = f"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=2"
    exp2_limit1_page4_path = (
        f"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=3"  # Final failing call
    )
    exp2_limit1_page5_path = (
        f"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=4"  # Returns nothing
    )

    exp2_limit1_page1_response = _create_mock_response(
        200,
        {
            "results": [{"child_id": 201}],
            "_links": {"next": f"/{exp2_limit1_page2_path}"},
            "size": 1,
        },
        url=exp2_limit1_page1_path,
    )
    exp2_limit1_page2_error = _create_http_error(500, url=exp2_limit1_page2_path)
    exp2_limit1_page3_response = _create_mock_response(
        200,
        {
            "results": [{"child_id": 203}],
            "_links": {"next": f"/{exp2_limit1_page4_path}"},
            "size": 1,
        },
        url=exp2_limit1_page3_path,
    )
    exp2_limit1_page4_error = _create_http_error(
        500, url=exp2_limit1_page4_path
    )  # This is the one we expect to bubble up
    exp2_limit1_page5_response = _create_mock_response(
        200, {"results": [], "_links": {}, "size": 0}, url=exp2_limit1_page5_path
    )

    # Expansion 3
    exp3_page1_path = f"rest/api/content/3/child?limit={_DEFAULT_PAGINATION_LIMIT}"
    exp3_page1_response = _create_mock_response(
        200,
        {"results": [{"child_id": 301}], "_links": {}, "size": 1},
        url=exp3_page1_path,
    )

    # --- Side Effect Logic ---
    mock_get_call_paths: list[str] = []
    call_counts: dict[str, int] = {}  # Track calls to specific failing paths

    def get_side_effect(
        path: str,
        params: dict[str, Any] | None = None,  # noqa: ARG001
        advanced_mode: bool = False,  # noqa: ARG001
    ) -> requests.Response:
        path = path.strip("/")
        mock_get_call_paths.append(path)
        call_counts[path] = call_counts.get(path, 0) + 1
        print(f"Mock GET received path: {path} (Call #{call_counts[path]})")

        # Top Level Call
        if path == initial_top_level_path:
            print(f"-> Returning top level response for {path}")
            return top_level_response

        # Expansion 1 - Page 1
        elif path == exp1_page1_path:
            print(f"-> Returning expansion 1 page 1 for {path}")
            return exp1_page1_response

        # Expansion 1 - Page 2
        elif path == exp1_page2_path:
            print(f"-> Returning expansion 1 page 2 for {path}")
            return exp1_page2_response

        # Expansion 2 - Limit Reduction Errors
        elif path in exp2_reduction_errors:
            print(f"-> Failure: Returning response which raises 500 error for {path}")
            return exp2_reduction_errors[path]

        # Expansion 2 - Limit=1 Page 1 (Success)
        elif path == exp2_limit1_page1_path:
            print(f"-> Success: Returning expansion 2 limit 1 page 1 for {path}")
            return exp2_limit1_page1_response

        # Expansion 2 - Limit=1 Page 2 (Failure)
        elif path == exp2_limit1_page2_path:
            print(f"-> Failure: Returning response which raises 500 error for {path}")
            return exp2_limit1_page2_error

        # Expansion 2 - Limit=1 Page 3 (Success)
        elif path == exp2_limit1_page3_path:
            print(f"-> Success: Returning expansion 2 limit 1 page 3 for {path}")
            return exp2_limit1_page3_response

        # Expansion 2 - Limit=1 Page 4 (Failure)
        elif path == exp2_limit1_page4_path:
            print(f"-> Failure: Returning response which raises 500 error for {path}")
            return exp2_limit1_page4_error

        elif path == exp2_limit1_page5_path:
            print(f"-> Returning expansion 2 limit 1 page 5 for {path}")
            return exp2_limit1_page5_response

        # Expansion 3 - Page 1
        elif path == exp3_page1_path:
            print(f"-> Returning expansion 3 page 1 for {path}")
            return exp3_page1_response

        # Fallback
        print(f"!!! Unexpected GET path in mock: {path}")
        raise RuntimeError(f"Unexpected GET path in mock: {path}")

    confluence_server_client._confluence.get.side_effect = get_side_effect

    # --- Execute ---
    # Consume the iterator to trigger the calls
    result = list(
        confluence_server_client.cql_paginate_all_expansions(
            cql=top_level_cql,
            expand=top_level_expand,
            limit=_DEFAULT_PAGINATION_LIMIT,
        )
    )

    # Verify log for the failures during expansion 2 pagination (page 2 + 4)
    assert f"Error in confluence call to /{exp2_limit1_page2_path}" in caplog.text
    assert f"Error in confluence call to /{exp2_limit1_page4_path}" in caplog.text

    # Verify sequence of calls to 'get'
    # 1. Top level
    assert mock_get_call_paths[0] == initial_top_level_path
    # 2. Expansion 1 (page 1)
    assert mock_get_call_paths[1] == exp1_page1_path
    # 3. Expansion 1 (page 2)
    assert mock_get_call_paths[2] == exp1_page2_path
    # 4. Expansion 2 (initial attempt)
    assert (
        mock_get_call_paths[3] == f"{exp2_base_path}?limit={_DEFAULT_PAGINATION_LIMIT}"
    )

    # 5+. Expansion 2 (retries due to 500s, down to limit=1)
    call_index = 4

    # 5+N. Expansion 2 (limit=1, page 1 success)
    assert mock_get_call_paths[call_index] == exp2_limit1_page1_path
    call_index += 1
    # 5+N+1. Expansion 2 (limit=1, page 2 success)
    assert mock_get_call_paths[call_index] == exp2_limit1_page2_path
    call_index += 1
    # 5+N+2. Expansion 2 (limit=1, page 3 failure)
    assert mock_get_call_paths[call_index] == exp2_limit1_page3_path
    call_index += 1

    # 5+N+3. Expansion 2 (limit=1, page 4 failure)
    assert mock_get_call_paths[call_index] == exp2_limit1_page4_path
    call_index += 1

    # 5+N+4. Expansion 2 (limit=1, page 5 success, no results)
    assert mock_get_call_paths[call_index] == exp2_limit1_page5_path
    call_index += 1

    # Ensure Expansion 3 is called, that we continue after the final error-raising call
    assert mock_get_call_paths[call_index] == exp3_page1_path
    call_index += 1

    # Ensure correct number of calls
    assert len(mock_get_call_paths) == call_index

    # Ensure the result is correct
    # NOTE: size does not get updated during _traverse_and_update
    final_results = copy.deepcopy(top_level_raw_response)
    final_results["results"][0]["child_items"]["results"] = [{"child_id": 101}, {"child_id": 102}]  # type: ignore
    final_results["results"][1]["child_items"]["results"] = [{"child_id": 201}, {"child_id": 203}]  # type: ignore
    final_results["results"][2]["child_items"]["results"] = [{"child_id": 301}]  # type: ignore
    assert result == final_results["results"]


def test_paginated_cql_retrieval_handles_pagination_error(
    confluence_server_client: OnyxConfluence, caplog: pytest.LogCaptureFixture
) -> None:
    """
    Tests that paginated_cql_retrieval correctly handles HTTP 500 errors
    during pagination, retrying with smaller limits down to 1, skipping
    the problematic item, and continuing.

    NOTE: in this context, a "page" is a set of results NOT a confluence page.

    Specifically, this test:
    1. Makes an initial CQL call with a limit, gets page 1 successfully.
    2. Attempts to get page 2 (based on the 'next' link), receives a 500 error.
    3. The internal _paginate_url logic retries page 2 with limit=1.
    4. Simulates the following sequence for page 2 retries (limit=1):
       - Item 1 (start=original_start + 0): Success
       - Item 2 (start=original_start + 1): Failure (500) - This item is skipped.
       - Item 3 (start=original_start + 2): Success
       - Item 4 (start=original_start + 3): Success, no more results in this chunk.
    5. The function continues to the next page (page 3) successfully.
    6. Checks that the results from page 1, items 1 & 3 from page 2 (retry),
       and page 3 are all returned.
    7. Verifies the error log for the skipped item (item 2).
    """
    caplog.set_level("WARNING")

    test_cql = "type=page"
    encoded_cql = "type%3Dpage"  # URL encoded version
    test_limit = 4  # Smaller limit for easier testing of page boundaries
    _TEST_MINIMUM_LIMIT = 1

    base_path = f"rest/api/content/search?cql={encoded_cql}"  # Use encoded cql
    page1_path = f"{base_path}&limit={test_limit}"
    # Page 2 starts where page 1 left off (start=test_limit)
    page2_initial_path = f"{base_path}&limit={test_limit}&start={test_limit}"
    # Page 3 starts after the problematic page 2 is processed (start=test_limit * 2)
    page3_path = f"{base_path}&limit={test_limit}&start={test_limit * 2}"

    # --- Mock Responses ---
    # Page 1: Success (4 items)
    page1_response = _create_mock_response(
        200,
        {
            "results": [{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}],
            "_links": {"next": f"/{page2_initial_path}"},
            "size": 4,
        },
        url=page1_path,
    )

    # Page 2: Initial attempt fails with 500
    page2_initial_error = _create_http_error(500, url=page2_initial_path)

    # Page 2: Retry attempts with limit=1
    page2_limit1_start_offset = test_limit  # Start index for page 2 items
    page2_limit1_item1_path = (
        f"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + 0}"
    )
    page2_limit1_item2_path = (
        f"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + 1}"
    )
    page2_limit1_item3_path = (
        f"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + 2}"
    )
    page2_limit1_item4_path = (
        f"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + 3}"
    )

    page2_limit1_item1_response = _create_mock_response(
        200,
        {
            "results": [{"id": 5}],
            "_links": {"next": f"/{page2_limit1_item2_path}"},
            "size": 1,
        },  # Note: next link might be present but we check results
        url=page2_limit1_item1_path,
    )
    page2_limit1_item2_error = _create_http_error(
        500, url=page2_limit1_item2_path
    )  # The failure
    page2_limit1_item3_response = _create_mock_response(
        200,
        {
            "results": [{"id": 7}],
            "_links": {"next": f"/{page2_limit1_item4_path}"},
            "size": 1,
        },
        url=page2_limit1_item3_path,
    )
    page2_limit1_item4_response = _create_mock_response(
        200,
        {
            "results": [{"id": 8}],
            "_links": {"next": f"/{page3_path}"},
            "size": 1,
        },
        url=page2_limit1_item4_path,
    )

    # Page 3: Success (2 items)
    page3_response = _create_mock_response(
        200,
        {"results": [{"id": 9}, {"id": 10}], "_links": {}, "size": 2},  # No more pages
        url=page3_path,
    )

    # --- Side Effect Logic ---
    mock_get_call_paths: list[str] = []
    call_counts: dict[str, int] = {}  # Track calls

    def get_side_effect(
        path: str,
        params: dict[str, Any] | None = None,  # noqa: ARG001
        advanced_mode: bool = False,  # noqa: ARG001
    ) -> requests.Response:
        path = path.strip("/")
        mock_get_call_paths.append(path)
        call_counts[path] = call_counts.get(path, 0) + 1
        print(f"Mock GET received path: {path} (Call #{call_counts[path]})")

        # Page 1
        if path == page1_path:
            print(f"-> Returning page 1 success for {path}")
            return page1_response
        # Page 2 - Initial Failure
        elif path == page2_initial_path:
            print(f"-> Returning page 2 initial 500 error for {path}")
            return page2_initial_error
        # Page 2 - Limit 1 Retries
        elif path == page2_limit1_item1_path:
            print(f"-> Returning page 2 retry item 1 success for {path}")
            return page2_limit1_item1_response
        elif path == page2_limit1_item2_path:
            print(f"-> Returning page 2 retry item 2 500 error for {path}")
            return page2_limit1_item2_error
        elif path == page2_limit1_item3_path:
            print(f"-> Returning page 2 retry item 3 success for {path}")
            return page2_limit1_item3_response
        elif path == page2_limit1_item4_path:
            print(f"-> Returning page 2 retry item 4 success for {path}")
            return page2_limit1_item4_response
        # Page 3
        elif path == page3_path:
            print(f"-> Returning page 3 success for {path}")
            return page3_response
        # Fallback
        else:
            print(f"!!! Unexpected GET path in mock: {path}")
            raise RuntimeError(f"Unexpected GET path in mock: {path}")

    confluence_server_client._confluence.get.side_effect = get_side_effect

    # --- Execute ---
    results = list(
        confluence_server_client.paginated_cql_retrieval(
            cql=test_cql,
            limit=test_limit,
        )
    )

    # --- Assertions ---
    # Verify expected results (ids 1-4 from page 1, 5, 7, 8 from page 2 retry, 9-10 from page 3)
    expected_results = [
        # Page 1
        {"id": 1},
        {"id": 2},
        {"id": 3},
        {"id": 4},
        # Page 2, Item 1 (retry)
        {"id": 5},
        # {"id": 6}, # Skipped due to error
        {"id": 7},  # Page 2, Item 3 (retry)
        {"id": 8},  # Page 2, Item 4 (retry)
        # Page 3
        {"id": 9},
        {"id": 10},
    ]
    assert results == expected_results

    # Verify log for the skipped item failure
    assert f"Error in confluence call to /{page2_limit1_item2_path}" in caplog.text

    # Verify sequence of calls
    expected_calls = [
        page1_path,  # Page 1 success
        page2_initial_path,  # Page 2 initial fail (500)
        # _paginate_url internal retry logic starts here
        page2_limit1_item1_path,  # Page 2 retry item 1 success
        page2_limit1_item2_path,  # Page 2 retry item 2 fail (500) -> logged & skipped
        page2_limit1_item3_path,  # Page 2 retry item 3 success
        page2_limit1_item4_path,  # Page 2 retry item 4 success
        # _paginate_url continues to next calculated page (page 3)
        page3_path,  # Page 3 success
    ]
    assert mock_get_call_paths == expected_calls


def test_paginated_cql_retrieval_skips_completely_failing_page(
    confluence_server_client: OnyxConfluence, caplog: pytest.LogCaptureFixture
) -> None:
    """
    Tests that paginated_cql_retrieval skips an entire page if the initial
    fetch fails and all subsequent limit=1 retries also fail. It should
    then proceed to fetch the next page successfully.
    """
    caplog.set_level("WARNING")

    test_cql = "type=page"
    encoded_cql = "type%3Dpage"
    test_limit = 3  # Small limit for testing
    _TEST_MINIMUM_LIMIT = 1

    base_path = f"rest/api/content/search?cql={encoded_cql}"
    page1_path = f"{base_path}&limit={test_limit}"
    # Page 2 starts where page 1 left off (start=test_limit)
    page2_initial_path = f"{base_path}&limit={test_limit}&start={test_limit}"
    # Page 3 starts after the completely failed page 2 (start=test_limit * 2)
    page3_path = f"{base_path}&limit={test_limit}&start={test_limit * 2}"

    # --- Mock Responses ---
    # Page 1: Success (3 items)
    page1_response = _create_mock_response(
        200,
        {
            "results": [{"id": 1}, {"id": 2}, {"id": 3}],
            "_links": {"next": f"/{page2_initial_path}"},
            "size": 3,
        },
        url=page1_path,
    )

    # Page 2: Initial attempt fails with 500
    page2_initial_error = _create_http_error(500, url=page2_initial_path)

    # Page 2: Retry attempts with limit=1 (ALL fail)
    page2_limit1_start_offset = test_limit
    page2_limit1_retry_errors = {}
    # Generate failing responses for each item expected on page 2
    for i in range(test_limit):
        item_path = f"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + i}"
        page2_limit1_retry_errors[item_path] = _create_http_error(500, url=item_path)

    # Page 3: Success (2 items)
    page3_response = _create_mock_response(
        200,
        {"results": [{"id": 7}, {"id": 8}], "_links": {}, "size": 2},
        url=page3_path,
    )

    # --- Side Effect Logic ---
    mock_get_call_paths: list[str] = []
    call_counts: dict[str, int] = {}

    def get_side_effect(
        path: str,
        params: dict[str, Any] | None = None,  # noqa: ARG001
        advanced_mode: bool = False,  # noqa: ARG001
    ) -> requests.Response:
        path = path.strip("/")
        mock_get_call_paths.append(path)
        call_counts[path] = call_counts.get(path, 0) + 1
        print(f"Mock GET received path: {path} (Call #{call_counts[path]})")

        if path == page1_path:
            print(f"-> Returning page 1 success for {path}")
            return page1_response
        elif path == page2_initial_path:
            print(f"-> Returning page 2 initial 500 error for {path}")
            return page2_initial_error
        elif path in page2_limit1_retry_errors:
            print(f"-> Returning page 2 limit=1 retry 500 error for {path}")
            return page2_limit1_retry_errors[path]
        elif path == page3_path:
            print(f"-> Returning page 3 success for {path}")
            return page3_response
        else:
            print(f"!!! Unexpected GET path in mock: {path}")
            raise RuntimeError(f"Unexpected GET path in mock: {path}")

    confluence_server_client._confluence.get.side_effect = get_side_effect

    # --- Execute ---
    results = list(
        confluence_server_client.paginated_cql_retrieval(
            cql=test_cql,
            limit=test_limit,
        )
    )

    # --- Assertions ---
    # Verify expected results (ids 1-3 from page 1, 7-8 from page 3)
    expected_results = [
        {"id": 1},
        {"id": 2},
        {"id": 3},  # Page 1
        # Page 2 completely skipped
        {"id": 7},
        {"id": 8},  # Page 3
    ]
    assert results == expected_results

    # Verify logs for the failed retry attempts on page 2
    for failed_path in page2_limit1_retry_errors:
        assert f"Error in confluence call to /{failed_path}" in caplog.text
    assert (
        f"Error in confluence call to {page2_initial_path}" not in caplog.text
    )  # Initial error triggers retry, not direct logging in _paginate_url

    # Verify sequence of calls
    expected_calls = [
        page1_path,  # Page 1 success
        page2_initial_path,  # Page 2 initial fail (500)
    ]
    # Add the failed limit=1 retry calls for page 2
    expected_calls.extend(list(page2_limit1_retry_errors.keys()))
    # The retry loop should make one final call to check if there are more items
    # expected_calls.append(page2_limit1_final_empty_path)
    # Add the call to page 3
    expected_calls.append(page3_path)

    assert mock_get_call_paths == expected_calls


def test_paginated_cql_retrieval_cloud_no_retry_on_error(
    mock_credentials_provider: mock.Mock,
) -> None:
    """
    Tests that for Confluence Cloud (is_cloud=True), paginated_cql_retrieval
    does NOT retry on pagination errors and raises HTTPError immediately.
    """
    # Setup Confluence Cloud Client
    confluence_cloud_client = OnyxConfluence(
        is_cloud=True,  # Key difference: Cloud instance
        url="https://fake-cloud.atlassian.net",
        credentials_provider=mock_credentials_provider,
        timeout=10,
    )
    mock_internal_client = mock.Mock()
    mock_internal_client.url = confluence_cloud_client._url
    confluence_cloud_client._confluence = mock_internal_client
    confluence_cloud_client._kwargs = confluence_cloud_client.shared_base_kwargs

    test_cql = "type=page"
    encoded_cql = "type%3Dpage"
    test_limit = 50  # Use a standard limit

    base_path = f"rest/api/content/search?cql={encoded_cql}"
    page1_path = f"{base_path}&limit={test_limit}"
    page2_path = f"{base_path}&limit={test_limit}&start={test_limit}"

    # --- Mock Responses ---
    # Page 1: Success
    page1_response = _create_mock_response(
        200,
        {
            "results": [{"id": i} for i in range(test_limit)],
            "_links": {"next": f"/{page2_path}"},
            "size": test_limit,
        },
        url=page1_path,
    )

    # Page 2: Failure (500)
    page2_error = _create_http_error(500, url=page2_path)

    # --- Side Effect Logic ---
    mock_get_call_paths: list[str] = []

    def get_side_effect(
        path: str,
        params: dict[str, Any] | None = None,  # noqa: ARG001
        advanced_mode: bool = False,  # noqa: ARG001
    ) -> requests.Response:
        path = path.strip("/")
        mock_get_call_paths.append(path)
        print(f"Mock GET received path: {path}")

        if path == page1_path:
            print(f"-> Returning page 1 success for {path}")
            return page1_response
        elif path == page2_path:
            print(f"-> Returning page 2 500 error for {path}")
            return page2_error
        else:
            # No other paths (like limit=1 retries) should be called
            print(f"!!! Unexpected GET path in mock for Cloud test: {path}")
            raise RuntimeError(f"Unexpected GET path in mock for Cloud test: {path}")

    confluence_cloud_client._confluence.get.side_effect = get_side_effect

    # --- Execute & Assert ---
    with pytest.raises(HTTPError) as excinfo:
        # Consume the iterator to trigger calls
        list(
            confluence_cloud_client.paginated_cql_retrieval(
                cql=test_cql,
                limit=test_limit,
            )
        )

    # Verify the error is the one we simulated for page 2
    assert excinfo.value.response == page2_error
    assert excinfo.value.response.status_code == 500
    assert page2_path in excinfo.value.response.url

    # Verify only two calls were made (page 1 success, page 2 fail)
    # Crucially, no retry attempts with different limits should exist.
    assert mock_get_call_paths == [page1_path, page2_path]


================================================
FILE: backend/tests/unit/onyx/connectors/confluence/test_rate_limit_handler.py
================================================
from unittest.mock import Mock

import pytest
from requests import HTTPError

from onyx.connectors.confluence.utils import handle_confluence_rate_limit


@pytest.fixture
def mock_confluence_call() -> Mock:
    return Mock()


# ***** Checking call count to sleep() won't correctly reflect test correctness
# especially since we really need to sleep multiple times and check for
# abort signals moving forward. Disabling this test for now until we come up with
# a better way forward.

# @pytest.mark.parametrize(
#     "status_code,text,retry_after",
#     [
#         (429, "Rate limit exceeded", "5"),
#         (200, "Rate limit exceeded", None),
#         (429, "Some other error", "5"),
#     ],
# )
# def test_rate_limit_handling(
#     mock_confluence_call: Mock, status_code: int, text: str, retry_after: str | None
# ) -> None:
#     with patch("time.sleep") as mock_sleep:
#         mock_confluence_call.side_effect = [
#             HTTPError(
#                 response=Mock(
#                     status_code=status_code,
#                     text=text,
#                     headers={"Retry-After": retry_after} if retry_after else {},
#                 )
#             ),
#         ] * 2 + ["Success"]

#         handled_call = make_confluence_call_handle_rate_limit(mock_confluence_call)
#         result = handled_call()

#         assert result == "Success"
#         assert mock_confluence_call.call_count == 3
#         assert mock_sleep.call_count == 2
#         if retry_after:
#             mock_sleep.assert_called_with(int(retry_after))


# NOTE(rkuo): This tests an older version of rate limiting that is being deprecated
# and probably should go away soon.
def test_non_rate_limit_error(mock_confluence_call: Mock) -> None:
    mock_confluence_call.side_effect = HTTPError(
        response=Mock(status_code=500, text="Internal Server Error")
    )

    handled_call = handle_confluence_rate_limit(mock_confluence_call)

    with pytest.raises(HTTPError):
        handled_call()

    assert mock_confluence_call.call_count == 5


================================================
FILE: backend/tests/unit/onyx/connectors/cross_connector_utils/test_html_utils.py
================================================
import pathlib

from onyx.file_processing.html_utils import parse_html_page_basic


def test_parse_table() -> None:
    dir_path = pathlib.Path(__file__).parent.resolve()
    with open(f"{dir_path}/test_table.html", "r") as file:
        content = file.read()

    parsed = parse_html_page_basic(content)
    expected = "\n\thello\tthere\tgeneral\n\tkenobi\ta\tb\n\tc\td\te"
    assert expected in parsed


================================================
FILE: backend/tests/unit/onyx/connectors/cross_connector_utils/test_rate_limit.py
================================================
import time

from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
)


def test_rate_limit_basic() -> None:
    call_cnt = 0

    @rate_limit_builder(max_calls=2, period=5)
    def func() -> None:
        nonlocal call_cnt
        call_cnt += 1

    start = time.time()

    # Make calls that shouldn't be rate-limited
    func()
    func()
    time_to_finish_non_ratelimited = time.time() - start

    # Make a call which SHOULD be rate-limited
    func()
    time_to_finish_ratelimited = time.time() - start

    assert call_cnt == 3
    assert time_to_finish_non_ratelimited < 1
    assert time_to_finish_ratelimited > 5


================================================
FILE: backend/tests/unit/onyx/connectors/cross_connector_utils/test_table.html
================================================
<p>This page is to ensure we’re able to parse a table into a tsv</p>
<table
  data-table-width="760"
  data-layout="default"
  ac:local-id="3ad64d9f-01f1-4f78-876e-0fdf84e826a6"
>
  <tbody>
    <tr>
      <th>
        <p><strong>hello</strong></p>
      </th>
      <th>
        <p><strong>there</strong></p>
      </th>
      <th>
        <p><strong>general</strong></p>
      </th>
    </tr>
    <tr>
      <td>
        <p>kenobi</p>
      </td>
      <td>
        <p>a</p>
      </td>
      <td>
        <p>b</p>
      </td>
    </tr>
    <tr>
      <td>
        <p>c</p>
      </td>
      <td>
        <p>d</p>
      </td>
      <td>
        <p>e</p>
      </td>
    </tr>
  </tbody>
</table>
<p />


================================================
FILE: backend/tests/unit/onyx/connectors/discord/test_discord_validation.py
================================================
from unittest.mock import AsyncMock
from unittest.mock import patch

import pytest
from discord.errors import LoginFailure

from onyx.connectors.discord.connector import DiscordConnector
from onyx.connectors.exceptions import CredentialInvalidError


def _build_connector(token: str = "fake-bot-token") -> DiscordConnector:
    connector = DiscordConnector()
    connector.load_credentials({"discord_bot_token": token})
    return connector


@patch("onyx.connectors.discord.connector.Client.close", new_callable=AsyncMock)
@patch("onyx.connectors.discord.connector.Client.login", new_callable=AsyncMock)
def test_validate_success(
    mock_login: AsyncMock,
    mock_close: AsyncMock,
) -> None:
    connector = _build_connector()
    connector.validate_connector_settings()

    mock_login.assert_awaited_once_with("fake-bot-token")
    mock_close.assert_awaited_once()


@patch("onyx.connectors.discord.connector.Client.close", new_callable=AsyncMock)
@patch(
    "onyx.connectors.discord.connector.Client.login",
    new_callable=AsyncMock,
    side_effect=LoginFailure("Improper token has been passed."),
)
def test_validate_invalid_token(
    mock_login: AsyncMock,  # noqa: ARG001
    mock_close: AsyncMock,
) -> None:
    connector = _build_connector(token="bad-token")

    with pytest.raises(CredentialInvalidError, match="Invalid Discord bot token"):
        connector.validate_connector_settings()

    mock_close.assert_awaited_once()


================================================
FILE: backend/tests/unit/onyx/connectors/github/test_github_checkpointing.py
================================================
import time
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import cast
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from github import Github
from github import RateLimitExceededException
from github.GithubException import GithubException
from github.Issue import Issue
from github.PaginatedList import PaginatedList
from github.PullRequest import PullRequest
from github.RateLimit import RateLimit
from github.Repository import Repository
from github.Requester import Requester

from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.github.connector import GithubConnector
from onyx.connectors.github.connector import GithubConnectorStage
from onyx.connectors.github.models import SerializedRepository
from onyx.connectors.models import Document
from tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector
from tests.unit.onyx.connectors.utils import (
    load_everything_from_checkpoint_connector_from_checkpoint,
)


@pytest.fixture
def repo_owner() -> str:
    return "test-org"


@pytest.fixture
def repositories() -> str:
    return "test-repo"


@pytest.fixture
def mock_github_client() -> MagicMock:
    """Create a mock GitHub client with proper typing"""
    mock = MagicMock(spec=Github)
    mock.get_repo = MagicMock()
    mock.get_organization = MagicMock()
    mock.get_user = MagicMock()
    mock.get_rate_limit = MagicMock(return_value=MagicMock(spec=RateLimit))
    mock._requester = MagicMock(spec=Requester)
    return mock


@pytest.fixture
def build_github_connector(
    repo_owner: str, repositories: str, mock_github_client: MagicMock
) -> Generator[Callable[..., GithubConnector], None, None]:
    def _github_connector(
        repo_owner: str = repo_owner, repositories: str = repositories
    ) -> GithubConnector:
        connector = GithubConnector(
            repo_owner=repo_owner,
            repositories=repositories,
            include_prs=True,
            include_issues=True,
        )
        connector.github_client = mock_github_client
        return connector

    yield _github_connector


@pytest.fixture
def create_mock_pr() -> Callable[..., MagicMock]:
    def _create_mock_pr(
        number: int = 1,
        title: str = "Test PR",
        body: str = "Test Description",
        state: str = "open",
        merged: bool = False,
        updated_at: datetime = datetime(2023, 1, 1, tzinfo=timezone.utc),
        html_url: str | None = None,
    ) -> MagicMock:
        """Helper to create a mock PullRequest object"""
        mock_pr = MagicMock(spec=PullRequest)
        mock_pr.number = number
        mock_pr.title = title
        mock_pr.body = body
        mock_pr.state = state
        mock_pr.merged = merged
        mock_pr.updated_at = updated_at
        mock_pr.html_url = (
            html_url
            if html_url is not None
            else f"https://github.com/test-org/test-repo/pull/{number}"
        )
        mock_pr.raw_data = {}
        mock_pr.base = MagicMock()
        mock_pr.base.repo = MagicMock()
        mock_pr.base.repo.full_name = "test-org/test-repo"

        return mock_pr

    return _create_mock_pr


@pytest.fixture
def create_mock_issue() -> Callable[..., MagicMock]:
    def _create_mock_issue(
        number: int = 1,
        title: str = "Test Issue",
        body: str = "Test Description",
        state: str = "open",
        updated_at: datetime = datetime(2023, 1, 1, tzinfo=timezone.utc),
    ) -> MagicMock:
        """Helper to create a mock Issue object"""
        mock_issue = MagicMock(spec=Issue)
        mock_issue.number = number
        mock_issue.title = title
        mock_issue.body = body
        mock_issue.state = state
        mock_issue.updated_at = updated_at
        mock_issue.html_url = f"https://github.com/test-org/test-repo/issues/{number}"
        mock_issue.pull_request = None  # Not a PR
        mock_issue.raw_data = {}

        # Mock the nested base.repo.full_name attribute
        mock_issue.repository = MagicMock()
        mock_issue.repository.full_name = "test-org/test-repo"

        return mock_issue

    return _create_mock_issue


@pytest.fixture
def create_mock_repo() -> Callable[..., MagicMock]:
    def _create_mock_repo(
        name: str = "test-repo",
        id: int = 1,
    ) -> MagicMock:
        mock_repo = MagicMock()
        mock_repo.name = name
        mock_repo.id = id

        headers_dict = {"status": "200 OK", "content-type": "application/json"}
        data_dict = {
            "id": id,
            "name": name,
            "full_name": f"test-org/{name}",
            "private": False,
            "description": "Test repository",
        }

        mock_repo.configure_mock(raw_headers=headers_dict, raw_data=data_dict)

        mock_repo.get_pulls = MagicMock()
        mock_repo.get_issues = MagicMock()
        mock_repo.get_contents = MagicMock()

        return mock_repo

    return _create_mock_repo


def test_load_from_checkpoint_happy_path(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
    create_mock_pr: Callable[..., MagicMock],
    create_mock_issue: Callable[..., MagicMock],
) -> None:
    """Test loading from checkpoint - happy path"""
    # Set up mocked repo
    github_connector = build_github_connector()
    mock_repo = create_mock_repo()
    github_connector.github_client = mock_github_client
    mock_github_client.get_repo.return_value = mock_repo

    # Set up mocked PRs and issues
    mock_pr1 = create_mock_pr(number=1, title="PR 1")
    mock_pr2 = create_mock_pr(number=2, title="PR 2")
    mock_issue1 = create_mock_issue(number=1, title="Issue 1")
    mock_issue2 = create_mock_issue(number=2, title="Issue 2")

    # Mock get_pulls and get_issues methods
    mock_repo.get_pulls.return_value = MagicMock()
    mock_repo.get_pulls.return_value.get_page.side_effect = [
        [mock_pr1, mock_pr2],
        [],
    ]
    mock_repo.get_issues.return_value = MagicMock()
    mock_repo.get_issues.return_value.get_page.side_effect = [
        [mock_issue1, mock_issue2],
        [],
    ]

    # Mock SerializedRepository.to_Repository to return our mock repo
    with patch.object(SerializedRepository, "to_Repository", return_value=mock_repo):
        # Call load_from_checkpoint
        end_time = time.time()
        outputs = load_everything_from_checkpoint_connector(
            github_connector, 0, end_time
        )

        # Check that we got all documents and final has_more=False
        assert len(outputs) == 4

        repo_batch = outputs[0]
        assert len(repo_batch.items) == 0
        assert repo_batch.next_checkpoint.has_more is True

        # Check first batch (PRs)
        first_batch = outputs[1]
        assert len(first_batch.items) == 2
        assert isinstance(first_batch.items[0], Document)
        assert first_batch.items[0].id == "https://github.com/test-org/test-repo/pull/1"
        assert isinstance(first_batch.items[1], Document)
        assert first_batch.items[1].id == "https://github.com/test-org/test-repo/pull/2"
        assert first_batch.next_checkpoint.curr_page == 1

        # Check second batch (Issues)
        second_batch = outputs[2]
        assert len(second_batch.items) == 2
        assert isinstance(second_batch.items[0], Document)
        assert (
            second_batch.items[0].id == "https://github.com/test-org/test-repo/issues/1"
        )
        assert isinstance(second_batch.items[1], Document)
        assert (
            second_batch.items[1].id == "https://github.com/test-org/test-repo/issues/2"
        )
        assert second_batch.next_checkpoint.has_more

        # Check third batch (finished checkpoint)
        third_batch = outputs[3]
        assert len(third_batch.items) == 0
        assert third_batch.next_checkpoint.has_more is False


def test_load_from_checkpoint_with_rate_limit(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
    create_mock_pr: Callable[..., MagicMock],
) -> None:
    """Test loading from checkpoint with rate limit handling"""
    # Set up mocked repo
    github_connector = build_github_connector()
    mock_repo = create_mock_repo()
    github_connector.github_client = mock_github_client
    mock_github_client.get_repo.return_value = mock_repo

    # Set up mocked PR
    mock_pr = create_mock_pr()

    # Mock get_pulls to raise RateLimitExceededException on first call
    mock_repo.get_pulls.return_value = MagicMock()
    mock_repo.get_pulls.return_value.get_page.side_effect = [
        RateLimitExceededException(403, {"message": "Rate limit exceeded"}, {}),
        [mock_pr],
        [],
    ]

    # Mock rate limit reset time
    mock_rate_limit = MagicMock(spec=RateLimit)
    mock_rate_limit.core.reset = datetime.now(timezone.utc)
    github_connector.github_client.get_rate_limit.return_value = mock_rate_limit

    # Mock SerializedRepository.to_Repository to return our mock repo
    with patch.object(SerializedRepository, "to_Repository", return_value=mock_repo):
        # Call load_from_checkpoint
        end_time = time.time()
        with patch(
            "onyx.connectors.github.connector.sleep_after_rate_limit_exception"
        ) as mock_sleep:
            outputs = load_everything_from_checkpoint_connector(
                github_connector, 0, end_time
            )

            assert mock_sleep.call_count == 1

        # Check that we got the document after rate limit was handled
        assert len(outputs) >= 2
        assert len(outputs[1].items) == 1
        assert isinstance(outputs[1].items[0], Document)
        assert outputs[1].items[0].id == "https://github.com/test-org/test-repo/pull/1"

        assert outputs[-1].next_checkpoint.has_more is False


def test_load_from_checkpoint_with_empty_repo(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
) -> None:
    """Test loading from checkpoint with an empty repository"""
    # Set up mocked repo
    mock_repo = create_mock_repo()
    github_connector = build_github_connector()
    github_connector.github_client = mock_github_client
    mock_github_client.get_repo.return_value = mock_repo

    # Mock get_pulls and get_issues to return empty lists
    mock_repo.get_pulls.return_value = MagicMock()
    mock_repo.get_pulls.return_value.get_page.return_value = []
    mock_repo.get_issues.return_value = MagicMock()
    mock_repo.get_issues.return_value.get_page.return_value = []

    # Mock SerializedRepository.to_Repository to return our mock repo
    with patch.object(SerializedRepository, "to_Repository", return_value=mock_repo):
        # Call load_from_checkpoint
        end_time = time.time()
        outputs = load_everything_from_checkpoint_connector(
            github_connector, 0, end_time
        )

        # Check that we got no documents
        assert len(outputs) == 2
        assert len(outputs[-1].items) == 0
        assert not outputs[-1].next_checkpoint.has_more


def test_load_from_checkpoint_with_prs_only(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
    create_mock_pr: Callable[..., MagicMock],
) -> None:
    """Test loading from checkpoint with only PRs enabled"""
    # Configure connector to only include PRs
    github_connector = build_github_connector()
    github_connector.include_prs = True
    github_connector.include_issues = False

    # Set up mocked repo
    mock_repo = create_mock_repo()
    github_connector.github_client = mock_github_client
    mock_github_client.get_repo.return_value = mock_repo

    # Set up mocked PRs
    mock_pr1 = create_mock_pr(number=1, title="PR 1")
    mock_pr2 = create_mock_pr(number=2, title="PR 2")

    # Mock get_pulls method
    mock_repo.get_pulls.return_value = MagicMock()
    mock_repo.get_pulls.return_value.get_page.side_effect = [
        [mock_pr1, mock_pr2],
        [],
    ]

    # Mock SerializedRepository.to_Repository to return our mock repo
    with patch.object(SerializedRepository, "to_Repository", return_value=mock_repo):
        # Call load_from_checkpoint
        end_time = time.time()
        outputs = load_everything_from_checkpoint_connector(
            github_connector, 0, end_time
        )

        # Check that we only got PRs
        assert len(outputs) >= 2
        assert len(outputs[1].items) == 2
        assert all(
            isinstance(doc, Document) and "pull" in doc.id for doc in outputs[0].items
        )  # All documents should be PRs

        assert outputs[-1].next_checkpoint.has_more is False


def test_load_from_checkpoint_with_issues_only(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
    create_mock_issue: Callable[..., MagicMock],
) -> None:
    """Test loading from checkpoint with only issues enabled"""
    # Configure connector to only include issues
    github_connector = build_github_connector()
    github_connector.include_prs = False
    github_connector.include_issues = True

    # Set up mocked repo
    mock_repo = create_mock_repo()
    github_connector.github_client = mock_github_client
    mock_github_client.get_repo.return_value = mock_repo

    # Set up mocked issues
    mock_issue1 = create_mock_issue(number=1, title="Issue 1")
    mock_issue2 = create_mock_issue(number=2, title="Issue 2")

    # Mock get_issues method
    mock_repo.get_issues.return_value = MagicMock()
    mock_repo.get_issues.return_value.get_page.side_effect = [
        [mock_issue1, mock_issue2],
        [],
    ]

    # Mock SerializedRepository.to_Repository to return our mock repo
    with patch.object(SerializedRepository, "to_Repository", return_value=mock_repo):
        # Call load_from_checkpoint
        end_time = time.time()
        outputs = load_everything_from_checkpoint_connector(
            github_connector, 0, end_time
        )

        # Check that we only got issues
        assert len(outputs) >= 2
        assert len(outputs[1].items) == 2
        assert all(
            isinstance(doc, Document) and "issues" in doc.id for doc in outputs[0].items
        )  # All documents should be issues
        assert outputs[1].next_checkpoint.has_more

        assert outputs[-1].next_checkpoint.has_more is False


@pytest.mark.parametrize(
    "status_code,expected_exception,expected_message",
    [
        (
            401,
            CredentialExpiredError,
            "GitHub credential appears to be invalid or expired",
        ),
        (
            403,
            InsufficientPermissionsError,
            "Your GitHub token does not have sufficient permissions",
        ),
        (
            404,
            ConnectorValidationError,
            "GitHub repository not found",
        ),
    ],
)
def test_validate_connector_settings_errors(
    build_github_connector: Callable[..., GithubConnector],
    status_code: int,
    expected_exception: type[Exception],
    expected_message: str,
) -> None:
    """Test validation with various error scenarios"""
    error = GithubException(status=status_code, data={}, headers={})

    github_connector = build_github_connector()
    github_client = cast(Github, github_connector.github_client)
    get_repo_mock = cast(MagicMock, github_client.get_repo)
    get_repo_mock.side_effect = error

    with pytest.raises(expected_exception) as excinfo:
        github_connector.validate_connector_settings()
    assert expected_message in str(excinfo.value)


def test_validate_connector_settings_success(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
) -> None:
    """Test successful validation"""
    # Set up mocked repo
    mock_repo = create_mock_repo()
    github_connector = build_github_connector()
    github_connector.github_client = mock_github_client
    mock_github_client.get_repo.return_value = mock_repo

    # Mock get_contents to simulate successful access
    mock_repo.get_contents.return_value = MagicMock()

    github_connector.validate_connector_settings()
    github_connector.github_client.get_repo.assert_called_once_with(
        f"{github_connector.repo_owner}/{github_connector.repositories}"
    )


def test_load_from_checkpoint_with_cursor_fallback(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
    create_mock_pr: Callable[..., MagicMock],
) -> None:
    """Test loading from checkpoint with fallback to cursor-based pagination"""
    # Set up mocked repo
    mock_repo = create_mock_repo()
    github_connector = build_github_connector()
    github_connector.github_client = mock_github_client
    mock_github_client.get_repo.return_value = mock_repo

    # Set up mocked PRs
    mock_pr1 = create_mock_pr(number=1, title="PR 1")
    mock_pr2 = create_mock_pr(number=2, title="PR 2")

    # Create a mock paginated list that will raise the 422 error on get_page
    mock_paginated_list = MagicMock()
    mock_paginated_list.get_page.side_effect = [
        GithubException(
            422,
            {
                "message": "Pagination with the page parameter is not supported for large datasets. Use cursor"
            },
            {},
        ),
    ]

    # Create a new mock for cursor-based pagination
    mock_cursor_paginated_list = MagicMock()
    mock_cursor_paginated_list.__nextUrl = (
        "https://api.github.com/repos/test-org/test-repo/pulls?cursor=abc123"
    )
    mock_cursor_paginated_list.__iter__.return_value = iter([mock_pr1, mock_pr2])

    mock_repo.get_pulls.side_effect = [
        mock_paginated_list,
        mock_cursor_paginated_list,
    ]

    # Mock SerializedRepository.to_Repository to return our mock repo
    with patch.object(SerializedRepository, "to_Repository", return_value=mock_repo):
        # Call load_from_checkpoint
        end_time = time.time()
        outputs = load_everything_from_checkpoint_connector(
            github_connector, 0, end_time
        )

        # Check that we got the documents via cursor-based pagination
        assert len(outputs) >= 2
        assert len(outputs[1].items) == 2
        assert isinstance(outputs[1].items[0], Document)
        assert outputs[1].items[0].id == "https://github.com/test-org/test-repo/pull/1"
        assert isinstance(outputs[1].items[1], Document)
        assert outputs[1].items[1].id == "https://github.com/test-org/test-repo/pull/2"

        # Verify cursor URL is not set in checkpoint since pagination succeeded without failures
        assert outputs[1].next_checkpoint.cursor_url is None
        assert outputs[1].next_checkpoint.num_retrieved == 0


def test_load_from_checkpoint_resume_cursor_pagination(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
    create_mock_pr: Callable[..., MagicMock],
) -> None:
    """Test resuming from a checkpoint that was using cursor-based pagination"""
    # Set up mocked repo
    mock_repo = create_mock_repo()
    github_connector = build_github_connector()
    github_connector.github_client = mock_github_client
    mock_github_client.get_repo.return_value = mock_repo

    # Set up mocked PRs
    mock_pr3 = create_mock_pr(number=3, title="PR 3")
    mock_pr4 = create_mock_pr(number=4, title="PR 4")

    # Create a checkpoint that was using cursor-based pagination
    checkpoint = github_connector.build_dummy_checkpoint()
    checkpoint.cursor_url = (
        "https://api.github.com/repos/test-org/test-repo/pulls?cursor=abc123"
    )
    checkpoint.num_retrieved = 2

    # Mock get_pulls to use cursor-based pagination
    mock_paginated_list = MagicMock()
    mock_paginated_list.__nextUrl = (
        "https://api.github.com/repos/test-org/test-repo/pulls?cursor=def456"
    )
    mock_paginated_list.__iter__.return_value = iter([mock_pr3, mock_pr4])
    mock_repo.get_pulls.return_value = mock_paginated_list

    # Mock SerializedRepository.to_Repository to return our mock repo
    with patch.object(SerializedRepository, "to_Repository", return_value=mock_repo):
        # Call load_from_checkpoint with the checkpoint
        end_time = time.time()
        outputs = load_everything_from_checkpoint_connector_from_checkpoint(
            github_connector, 0, end_time, checkpoint
        )

        # Check that we got the documents via cursor-based pagination
        assert len(outputs) >= 2
        assert len(outputs[1].items) == 2
        assert isinstance(outputs[1].items[0], Document)
        assert outputs[1].items[0].id == "https://github.com/test-org/test-repo/pull/3"
        assert isinstance(outputs[1].items[1], Document)
        assert outputs[1].items[1].id == "https://github.com/test-org/test-repo/pull/4"

        # Verify cursor URL was stored in checkpoint
        assert outputs[1].next_checkpoint.cursor_url is None
        assert outputs[1].next_checkpoint.num_retrieved == 0


def test_load_from_checkpoint_cursor_expiration(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
    create_mock_pr: Callable[..., MagicMock],
) -> None:
    """Test handling of cursor expiration during cursor-based pagination"""
    # Set up mocked repo
    mock_repo = create_mock_repo()
    github_connector = build_github_connector()
    github_connector.github_client = mock_github_client
    mock_github_client.get_repo.return_value = mock_repo

    # Set up mocked PRs
    mock_pr4 = create_mock_pr(number=4, title="PR 4")

    # Create a checkpoint with an expired cursor
    checkpoint = github_connector.build_dummy_checkpoint()
    checkpoint.cursor_url = (
        "https://api.github.com/repos/test-org/test-repo/pulls?cursor=expired"
    )
    checkpoint.num_retrieved = 3  # We've already retrieved 3 items

    # Mock get_pulls to simulate cursor expiration by raising an error before any results
    mock_paginated_list = MagicMock()
    mock_paginated_list.__nextUrl = (
        "https://api.github.com/repos/test-org/test-repo/pulls?cursor=expired"
    )
    mock_paginated_list.__iter__.side_effect = GithubException(
        422, {"message": "Cursor expired"}, {}
    )

    # Create a new mock for successful retrieval after retry
    mock_retry_paginated_list = MagicMock()
    mock_retry_paginated_list.__nextUrl = None

    # Create an iterator that will yield the remaining PR
    def retry_iterator() -> Generator[PullRequest, None, None]:
        yield mock_pr4

    # Create a mock for the _Slice object that will be returned by pag_list[prev_num_objs:]
    mock_slice = MagicMock()
    mock_slice.__iter__.return_value = retry_iterator()

    # Set up the slice behavior for the retry paginated list
    mock_retry_paginated_list.__getitem__.return_value = mock_slice

    # Set up the side effect for get_pulls to return our mocks
    mock_repo.get_pulls.side_effect = [
        mock_paginated_list,
        mock_retry_paginated_list,
    ]

    # Mock SerializedRepository.to_Repository to return our mock repo
    with patch.object(SerializedRepository, "to_Repository", return_value=mock_repo):
        # Call load_from_checkpoint with the checkpoint
        end_time = time.time()
        outputs = load_everything_from_checkpoint_connector_from_checkpoint(
            github_connector, 0, end_time, checkpoint
        )

        # Check that we got the remaining document after retrying from the beginning
        assert len(outputs) >= 2
        assert len(outputs[1].items) == 1
        assert isinstance(outputs[1].items[0], Document)
        assert outputs[1].items[0].id == "https://github.com/test-org/test-repo/pull/4"

        # Verify cursor URL was cleared in checkpoint
        assert outputs[1].next_checkpoint.cursor_url is None
        assert outputs[1].next_checkpoint.num_retrieved == 0

        # Verify that the slice was called with the correct argument
        mock_retry_paginated_list.__getitem__.assert_called_once_with(slice(3, None))


def test_load_from_checkpoint_cursor_pagination_completion(
    build_github_connector: Callable[..., GithubConnector],
    mock_github_client: MagicMock,
    create_mock_repo: Callable[..., MagicMock],
    create_mock_pr: Callable[..., MagicMock],
) -> None:
    """Test behavior when cursor-based pagination completes and moves to next repository"""
    # Set up two repositories
    mock_repo1 = create_mock_repo(name="repo1", id=1)
    mock_repo2 = create_mock_repo(name="repo2", id=2)

    # Initialize connector with no specific repositories, so _get_all_repos is used
    github_connector = build_github_connector(repositories="")
    github_connector.github_client = mock_github_client
    mock_pr1 = create_mock_pr(
        number=1,
        title="PR 1 Repo 1",
        html_url="https://github.com/test-org/repo1/pull/1",
    )
    mock_pr2 = create_mock_pr(
        number=2,
        title="PR 2 Repo 1",
        html_url="https://github.com/test-org/repo1/pull/2",
    )
    mock_pr3 = create_mock_pr(
        number=3,
        title="PR 3 Repo 2",
        html_url="https://github.com/test-org/repo2/pull/3",
    )
    mock_pr4 = create_mock_pr(
        number=4,
        title="PR 4 Repo 2",
        html_url="https://github.com/test-org/repo2/pull/4",
    )
    checkpoint = github_connector.build_dummy_checkpoint()
    mock_paginated_list_repo1_prs = MagicMock(spec=PaginatedList)

    def get_page_repo1_side_effect(page_num: int) -> list[PullRequest]:
        if page_num == 0:
            return [mock_pr1, mock_pr2]
        else:
            return []

    mock_paginated_list_repo1_prs.get_page.side_effect = get_page_repo1_side_effect
    mock_repo2_cursor_paginator = MagicMock(spec=PaginatedList)

    def repo2_cursor_iterator() -> Generator[PullRequest, None, None]:
        print("setting next url to cursor_step_2")
        mock_repo2_cursor_paginator.__nextUrl = "cursor_step_2"
        yield mock_pr3
        print("setting next url to None")
        mock_repo2_cursor_paginator.__nextUrl = None
        yield mock_pr4

    mock_repo2_cursor_paginator.__iter__.return_value = repo2_cursor_iterator()
    mock_repo2_cursor_paginator.__nextUrl = None
    pull_requests_func_invocation_count = 0

    def replacement_pull_requests_func(
        repo: Repository,
    ) -> Callable[[], PaginatedList[PullRequest]]:
        nonlocal pull_requests_func_invocation_count
        pull_requests_func_invocation_count += 1
        current_repo_name = repo.name
        lambda_call_count_for_current_repo = 0

        def git_objs_lambda() -> PaginatedList[PullRequest]:
            nonlocal lambda_call_count_for_current_repo
            lambda_call_count_for_current_repo += 1
            if current_repo_name == mock_repo2.name:
                if lambda_call_count_for_current_repo == 1:
                    pl_for_offset_failure = MagicMock(spec=PaginatedList)

                    def get_page_raises_exception(
                        page_num: int,  # noqa: ARG001
                    ) -> list[PullRequest]:
                        raise GithubException(422, message="use cursor pagination")

                    pl_for_offset_failure.get_page.side_effect = (
                        get_page_raises_exception
                    )
                    return pl_for_offset_failure
                else:
                    return mock_repo2_cursor_paginator
            elif current_repo_name == mock_repo1.name:
                return mock_paginated_list_repo1_prs
            else:
                raise ValueError(f"Unexpected repo name: {current_repo_name}")

        return git_objs_lambda

    mock_requester = MagicMock(spec=Requester)
    github_connector.github_client._requester = mock_requester

    def get_repo_side_effect(repo_id: int) -> MagicMock:
        repo_to_return = None
        headers_dict = None
        data_dict = None
        if repo_id == 1:
            repo_to_return = mock_repo1
            headers_dict = {"status": "200 OK", "content-type": "application/json"}
            data_dict = {
                "id": 1,
                "name": "repo1",
                "full_name": "test-org/repo1",
                "private": False,
                "description": "Test repository",
            }
        elif repo_id == 2:
            repo_to_return = mock_repo2
            headers_dict = {"status": "200 OK", "content-type": "application/json"}
            data_dict = {
                "id": 2,
                "name": "repo2",
                "full_name": "test-org/repo2",
                "private": False,
                "description": "Test repository",
            }
        else:
            raise ValueError(f"Unexpected repo ID: {repo_id}")
        if repo_to_return and headers_dict and data_dict:
            repo_to_return.configure_mock(raw_headers=headers_dict, raw_data=data_dict)
        return repo_to_return

    mock_github_client.get_repo.side_effect = get_repo_side_effect

    def to_repository_side_effect(
        self_serialized_repo: SerializedRepository,
        requester_arg: Requester,  # noqa: ARG001
    ) -> Repository:
        if self_serialized_repo.id == mock_repo1.id:
            return mock_repo1
        elif self_serialized_repo.id == mock_repo2.id:
            return mock_repo2
        raise ValueError(f"Unexpected repo ID: {self_serialized_repo.id}")

    mock_empty_issues_list = MagicMock(spec=PaginatedList)
    mock_empty_issues_list.get_page.return_value = []
    mock_empty_issues_list.__iter__.return_value = iter([])
    type(mock_empty_issues_list)._PaginatedList__nextUrl = None
    mock_repo1.get_issues.return_value = mock_empty_issues_list
    mock_repo2.get_issues.return_value = mock_empty_issues_list
    with (
        patch.object(
            github_connector, "get_all_repos", return_value=[mock_repo1, mock_repo2]
        ),
        patch.object(
            github_connector,
            "_pull_requests_func",
            side_effect=replacement_pull_requests_func,
        ),
        patch.object(
            SerializedRepository,
            "to_Repository",
            side_effect=to_repository_side_effect,
            autospec=True,
        ) as mock_to_repository,
    ):
        end_time = time.time()
        outputs = list(
            load_everything_from_checkpoint_connector_from_checkpoint(
                github_connector, 0, end_time, checkpoint
            )
        )

    # --- Assertions ---
    # Expected outputs: 5 based on the latest logic refinement
    # 1. Initial cp
    # 2. After repo2 PRs (cursor fallback) -> yields cp for repo2 issues
    # 3. After repo2 issues (empty) -> yields cp for repo1 PRs
    # 4. After repo1 PRs (page 0) -> yields cp for repo1 PRs page 1
    # 5. After repo1 PRs (page 1 empty) and repo1 issues (empty) -> yields final cp

    assert (
        len(outputs) == 5
    )  # Initial, Repo2-PRs, Repo2-Issues, Repo1-PRs-P0, Repo1-Issues(final)

    # Output 0: Initial checkpoint, after _get_all_repos
    cp0 = outputs[0].next_checkpoint
    assert cp0.has_more
    assert cp0.cached_repo is not None
    assert cp0.cached_repo.id == mock_repo2.id  # mock_repo2 is popped first
    assert cp0.cached_repo_ids == [mock_repo1.id]
    assert cp0.stage == GithubConnectorStage.PRS
    assert cp0.cursor_url is None

    # Output 1: After processing PRs for mock_repo2 (via cursor fallback)
    # Items should be pr3, pr4
    assert len(outputs[1].items) == 2
    assert all(isinstance(item, Document) for item in outputs[1].items)
    assert {
        item.semantic_identifier for item in cast(list[Document], outputs[1].items)
    } == {"3: PR 3 Repo 2", "4: PR 4 Repo 2"}
    cp1 = outputs[1].next_checkpoint
    assert (
        cp1.has_more
    )  # Still have repo1 in cached_repo_ids at the time checkpoint is yielded
    assert cp1.cached_repo is not None
    assert cp1.cached_repo.id == mock_repo2.id
    assert cp1.stage == GithubConnectorStage.ISSUES  # Moved to issues for repo2
    assert cp1.cursor_url is None  # Cursor completed and reset
    assert cp1.num_retrieved == 0  # Reset
    assert cp1.curr_page == 0  # Reset

    # Output 2: After processing Issues for mock_repo2 (empty)
    assert len(outputs[2].items) == 0
    cp2 = outputs[2].next_checkpoint
    assert cp2.has_more  # Checkpoint yielded BEFORE final has_more check
    assert cp2.cached_repo is not None
    assert cp2.cached_repo.id == mock_repo1.id  # Moved to repo1
    assert cp2.cached_repo_ids == []  # Popped repo1 id
    assert cp2.stage == GithubConnectorStage.PRS  # For repo1
    assert cp2.cursor_url is None

    # Output 3: After processing PRs for mock_repo1 (via offset, page 0)
    assert len(outputs[3].items) == 2
    assert all(isinstance(item, Document) for item in outputs[3].items)
    assert {
        item.semantic_identifier for item in cast(list[Document], outputs[3].items)
    } == {"1: PR 1 Repo 1", "2: PR 2 Repo 1"}
    cp3 = outputs[3].next_checkpoint
    # This checkpoint is returned early because offset had items. has_more reflects state then.
    assert cp3.has_more  # still need to do issues
    assert cp3.cached_repo is not None
    assert cp3.cached_repo.id == mock_repo1.id
    assert cp3.stage == GithubConnectorStage.PRS  # Still PRS stage
    assert cp3.curr_page == 1  # Offset pagination incremented page for PRs
    assert cp3.cursor_url is None

    # Output 4: After processing PRs page 1 (empty) and Issues for mock_repo1 (empty) - Final checkpoint
    assert len(outputs[4].items) == 0
    cp4 = outputs[4].next_checkpoint
    assert not cp4.has_more  # All done
    assert cp4.cached_repo is not None
    assert cp4.cached_repo.id == mock_repo1.id  # Last processed repo
    assert (
        cp4.stage == GithubConnectorStage.PRS
    )  # Reset for a hypothetical next run/repo
    assert cp4.curr_page == 0
    assert cp4.num_retrieved == 0
    assert cp4.cursor_url is None

    # Verify to_Repository calls
    print(mock_to_repository.call_args_list)
    assert (
        mock_to_repository.call_count == 4
    )  # Twice for repo2, twice for repo1 (issues don't need it)
    assert (
        mock_to_repository.call_args_list[0][0][0].id == mock_repo2.id
    )  # First call was for repo2
    assert (
        mock_to_repository.call_args_list[1][0][0].id == mock_repo2.id
    )  # Second call was for repo2
    assert (
        mock_to_repository.call_args_list[2][0][0].id == mock_repo1.id
    )  # Third call was for repo1
    assert (
        mock_to_repository.call_args_list[3][0][0].id == mock_repo1.id
    )  # Fourth call was for repo1

    # Verify _pull_requests_func was invoked for both repos' PR stages
    assert (
        pull_requests_func_invocation_count == 3
    )  # twice for repo2 PRs, once for repo1 PRs


================================================
FILE: backend/tests/unit/onyx/connectors/gmail/test_connector.py
================================================
import datetime
import json
import os
from typing import Any
from typing import cast
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.access.models import ExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.gmail.connector import _build_time_range_query
from onyx.connectors.gmail.connector import GmailCheckpoint
from onyx.connectors.gmail.connector import GmailConnector
from onyx.connectors.gmail.connector import thread_to_document
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from tests.unit.onyx.connectors.utils import (
    load_everything_from_checkpoint_connector_from_checkpoint,
)


def test_thread_to_document() -> None:
    json_path = os.path.join(os.path.dirname(__file__), "thread.json")
    with open(json_path, "r") as f:
        full_email_thread = json.load(f)

    doc = thread_to_document(full_email_thread, "admin@onyx-test.com")
    assert isinstance(doc, Document)
    assert doc.source == DocumentSource.GMAIL
    assert doc.semantic_identifier == "Email Chain 1"
    assert doc.doc_updated_at == datetime.datetime(
        2024, 11, 2, 17, 34, 55, tzinfo=datetime.timezone.utc
    )
    assert len(doc.sections) == 4
    assert doc.metadata == {}


def test_build_time_range_query() -> None:
    time_range_start = 1703066296.159339
    time_range_end = 1704984791.657404
    query = _build_time_range_query(time_range_start, time_range_end)
    assert query == "after:1703066296 before:1704984791"
    query = _build_time_range_query(time_range_start, None)
    assert query == "after:1703066296"
    query = _build_time_range_query(None, time_range_end)
    assert query == "before:1704984791"
    query = _build_time_range_query(0.0, time_range_end)
    assert query == "before:1704984791"
    query = _build_time_range_query(None, None)
    assert query is None


def test_time_str_to_utc() -> None:
    str_to_dt = {
        "Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime(
            2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc
        ),
        "Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime(
            2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc
        ),
        "Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime(
            2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc
        ),
        "30 Jun 2023 18:45:01 +0300": datetime.datetime(
            2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc
        ),
        "22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime(
            2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc
        ),
        "Date: Wed, 27 Aug 2025 11:40:00 +0200": datetime.datetime(
            2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc
        ),
    }
    for strptime, expected_datetime in str_to_dt.items():
        assert time_str_to_utc(strptime) == expected_datetime


def test_gmail_checkpoint_progression() -> None:
    connector = GmailConnector()
    connector._creds = MagicMock()
    connector._primary_admin_email = "admin@example.com"

    user_emails = ["user1@example.com", "user2@example.com"]

    thread_list_responses: dict[str, dict[str | None, dict[str, Any]]] = {
        "user1@example.com": {
            None: {
                "threads": [{"id": "t1"}, {"id": "t2"}],
                "nextPageToken": "token-user1-page2",
            },
            "token-user1-page2": {
                "threads": [{"id": "t3"}],
                "nextPageToken": None,
            },
        },
        "user2@example.com": {
            None: {
                "threads": [{"id": "t4"}],
                "nextPageToken": None,
            }
        },
    }

    full_thread_responses = {
        "user1@example.com": {
            "t1": {"id": "t1"},
            "t2": {"id": "t2"},
            "t3": {"id": "t3"},
        },
        "user2@example.com": {
            "t4": {"id": "t4"},
        },
    }

    class MockRequest:
        def __init__(self, response: dict[str, Any]):
            self._response = response

        def execute(self) -> dict[str, Any]:
            return self._response

    class MockThreadsResource:
        def __init__(self, user_email: str) -> None:
            self._user_email = user_email

        def list(
            self,
            *,
            userId: str,
            fields: str,
            q: str | None = None,  # noqa: ARG002
            pageToken: str | None = None,
            **_: object,
        ) -> MockRequest:
            assert userId == self._user_email
            assert "nextPageToken" in fields
            responses = thread_list_responses[self._user_email]
            key = pageToken or None
            return MockRequest(responses[key])

        def get(
            self,
            *,
            userId: str,
            id: str,
            fields: str,
            **_: object,
        ) -> MockRequest:
            assert userId == self._user_email
            assert "messages" in fields or "payload" in fields
            return MockRequest(full_thread_responses[self._user_email][id])

    class MockUsersResource:
        def __init__(self, user_email: str) -> None:
            self._user_email = user_email

        def threads(self) -> MockThreadsResource:
            return MockThreadsResource(self._user_email)

    class MockGmailService:
        def __init__(self, user_email: str) -> None:
            self._user_email = user_email

        def users(self) -> MockUsersResource:
            return MockUsersResource(self._user_email)

    def fake_get_gmail_service(_: object, user_email: str) -> MockGmailService:
        return MockGmailService(user_email)

    def fake_thread_to_document(
        full_thread: dict[str, object], user_email: str
    ) -> Document:
        thread_id = cast(str, full_thread["id"])
        return Document(
            id=f"{user_email}:{thread_id}",
            semantic_identifier=f"Thread {thread_id}",
            sections=[TextSection(text=f"Body {thread_id}")],
            source=DocumentSource.GMAIL,
            metadata={},
            external_access=ExternalAccess(
                external_user_emails={user_email},
                external_user_group_ids=set(),
                is_public=False,
            ),
        )

    checkpoint = connector.build_dummy_checkpoint()
    assert isinstance(checkpoint, GmailCheckpoint)

    with patch.object(GmailConnector, "_get_all_user_emails", return_value=user_emails):
        with patch(
            "onyx.connectors.gmail.connector.get_gmail_service",
            side_effect=fake_get_gmail_service,
        ):
            with patch(
                "onyx.connectors.gmail.connector.thread_to_document",
                side_effect=fake_thread_to_document,
            ) as mock_thread_to_document:
                outputs = load_everything_from_checkpoint_connector_from_checkpoint(
                    connector=connector,
                    start=0,
                    end=1_000,
                    checkpoint=checkpoint,
                )

    document_ids = [
        item.id
        for output in outputs
        for item in output.items
        if isinstance(item, Document)
    ]

    assert document_ids == [
        "user2@example.com:t4",
        "user1@example.com:t1",
        "user1@example.com:t2",
        "user1@example.com:t3",
    ]

    assert mock_thread_to_document.call_count == 4

    final_checkpoint = outputs[-1].next_checkpoint
    assert isinstance(final_checkpoint, GmailCheckpoint)
    assert final_checkpoint.has_more is False
    assert final_checkpoint.user_emails == []


================================================
FILE: backend/tests/unit/onyx/connectors/gmail/thread.json
================================================
{
  "id": "192edefb315737c3",
  "messages": [
    {
      "id": "192edeff0dc743cf",
      "payload": {
        "headers": [
          {
            "name": "MIME-Version",
            "value": "1.0"
          },
          {
            "name": "Date",
            "value": "Sat, 2 Nov 2024 10:32:57 -0700"
          },
          {
            "name": "Message-ID",
            "value": "<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com>"
          },
          {
            "name": "Subject",
            "value": "Email Chain 1"
          },
          {
            "name": "From",
            "value": "Test Admin Admin <admin@onyx-test.com>"
          },
          {
            "name": "To",
            "value": "test-group-1@onyx-test.com"
          },
          {
            "name": "Content-Type",
            "value": "multipart/alternative; boundary=\"0000000000004480480625f17117\""
          }
        ],
        "parts": [
          {
            "mimeType": "text/plain",
            "body": {
              "data": "VGhpcyBpcyBlbWFpbCAxIGluIGNoYWluIDENCg=="
            }
          },
          {
            "mimeType": "text/html",
            "body": {
              "data": "PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAxIGluIGNoYWluIDE8L2Rpdj4NCg=="
            }
          }
        ]
      }
    },
    {
      "id": "192edf07fbcc8b2c",
      "payload": {
        "headers": [
          {
            "name": "Delivered-To",
            "value": "admin@onyx-test.com"
          },
          {
            "name": "Received",
            "value": "by 2002:a59:b3cc:0:b0:491:1bbc:5e54 with SMTP id g12csp1873533vqt;        Sat, 2 Nov 2024 10:33:34 -0700 (PDT)"
          },
          {
            "name": "X-Received",
            "value": "by 2002:a05:6102:1284:b0:4a9:555b:fb50 with SMTP id ada2fe7eead31-4a9555bfd21mr8428882137.20.1730568814436;        Sat, 02 Nov 2024 10:33:34 -0700 (PDT)"
          },
          {
            "name": "ARC-Seal",
            "value": "i=1; a=rsa-sha256; t=1730568814; cv=none;        d=google.com; s=arc-20240605;        b=A75GBczY/LN8OhNdpZ1VM3opx5VWU3HWYnwCIL9TLBqEpNz2X74TXNkCevJkImB3VF         BkFY7gHg7d8oGdsQvUp2EEdRBXKoYT8P4PTc3ZSD2W8LYU2XCudIbA5xtGObELmI0h0f         bCXT8dE7m6hGJPTg0WPSlkvGs2bY52bmSbCbrnrA/Mx/oyxYPzwv5cMw3CLMXo/8nOLO         FAzrnMTKRqYtn/QvYjUne7PpVSYPk0Edg5261/jn9qatyyL8VePU4FriQTffjAC85Ayc         jikVA5QnsYO79aXJE0SIw4xBHwtOgmyWhU9TPw2NfuQHZWrm39JudUYlmZb8MV4VpX6p         otxw=="
          },
          {
            "name": "ARC-Message-Signature",
            "value": "i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :dkim-signature;        bh=9Eo5wYdnqXP3axXBCAwTODK4DvptOqG5RNct/xfBak4=;        fh=/JhVJcrFVXWWzpGRY8HXA/cCDTQzCntn8VCeyDmjzic=;        b=bkhR3iHOUD64TOG3Mqfd9BMT/2IF9gHEjHZWR/tet5J05UKFhk2d4k69wuSLNJcxlF         dB6zzgt1vvEnCbSV+XBCEG1zW76T/sN6Ldn7+5xomsGFYvTZsW4E7OJqxkedfdpFeWwc         eBlgX765wnBs4ztktDhK6gO8igWx3CaYH5wbX72DV4wqcQpDNpMqNHK7sHrlOG2YJGzV         7i3tli4dJqu1zgQK+lo1or1QQyadFzhbwX2iFdSLTNSNR3s70kqqBOT69lDMv84dfKCp         +hXE0uwjOY/9lGG9rO1/e5WWEDC2BSZ7wzjvvyBRjDG+lavBqTggUizd8W+MlRYXONAX         t7Kg==;        dara=google.com"
          },
          {
            "name": "ARC-Authentication-Results",
            "value": "i=1; mx.google.com;       dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=Z57TqzI7;       spf=none (google.com: test_user_1@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_1@onyx-test.com;       dara=pass header.i=@onyx-test.com"
          },
          {
            "name": "Return-Path",
            "value": "<test_user_1@onyx-test.com>"
          },
          {
            "name": "Received",
            "value": "from mail-sor-f41.google.com (mail-sor-f41.google.com. [209.85.220.41])        by mx.google.com with SMTPS id a1e0cc1a2514c-855dae589a1sor1192309241.6.2024.11.02.10.33.34        for <admin@onyx-test.com>        (Google Transport Security);        Sat, 02 Nov 2024 10:33:34 -0700 (PDT)"
          },
          {
            "name": "Received-SPF",
            "value": "none (google.com: test_user_1@onyx-test.com does not designate permitted sender hosts) client-ip=209.85.220.41;"
          },
          {
            "name": "Authentication-Results",
            "value": "mx.google.com;       dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=Z57TqzI7;       spf=none (google.com: test_user_1@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_1@onyx-test.com;       dara=pass header.i=@onyx-test.com"
          },
          {
            "name": "DKIM-Signature",
            "value": "v=1; a=rsa-sha256; c=relaxed/relaxed;        d=onyx-test-com.20230601.gappssmtp.com; s=20230601; t=1730568814; x=1731173614; darn=onyx-test.com;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :from:to:cc:subject:date:message-id:reply-to;        bh=9Eo5wYdnqXP3axXBCAwTODK4DvptOqG5RNct/xfBak4=;        b=Z57TqzI7sEwwOumQx0z6YhibC1x2CHlNmBjwyQT1mNOUScZbzo6nmH8Ydo7slsTfgZ         rgwKEEYkf/CYlFWGUEzGzc22jVUCSMjNMFB0nEtfj+GPJaNjDR9FxjFLTUfSq64H/RCI         eO9+oEAJHaa5QmceX2yiSJFXNqmVEMJNT+K6CnlbN5gW6CUD2tBt46vW83PVJgxKMc76         A7/eaDxdZDLUvpjHes4SvM7x0eBM9t7w9wb/jEjGqA54HI2YHVcxM4HJxrbCChYn8UoG         7+UOpfOmHTZLdLYgMtSqYanJ3BTENEdyVp2LIOZOhlUT7Hbr9esyeVyy765XTuRAWxmo         DGPQ=="
          },
          {
            "name": "X-Google-DKIM-Signature",
            "value": "v=1; a=rsa-sha256; c=relaxed/relaxed;        d=1e100.net; s=20230601; t=1730568814; x=1731173614;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;        bh=9Eo5wYdnqXP3axXBCAwTODK4DvptOqG5RNct/xfBak4=;        b=fxuobWT2rW8kvQ14LUHbJEJOdCM4uBP+Obo7jL4w0BvwLrBNNbMPqMUc8d8u17dnS7         gczFCprOr5PZnVNmOZMQvmRTJ6poTkWOGQhsOyDOSLNI0IzuaN2wh9qjmFez6Z9nTx3f         Lo0I0uahwzNkExywHC9x0H3NOZlS4074qkyLJObgnOHa5vml8SEcChMuzOQuCSU9wNjO         t26urEoct8LArf0K/xztjxpEpDCgnf4Cr/KmZfi4/2Sjv4jwQzkLVuiwADraHIJbLv1m         UMNs92dakWYK0cBbuwOx/sYpUWWyhVmv6Q0LqXzJjtpY4Z0zsnpI2UCrkAdAOSh7geEJ         LCnw=="
          },
          {
            "name": "X-Gm-Message-State",
            "value": "AOJu0YyCYZOHIzoRHgMd7foUCpX2JYDwPS2XsTjWiMkkR364/mhFKFsQ vixTj7QM6pDecoDxn8pS0btM7b8z+cwo/8hFiYNgp26wK5L0aGymu+M8OuEk/73fuEthWVV0eko B9LvS5+qixa/oNO/HkRJpVTQmAH7OTT25KeZJj0Dd3x1JqsrfiNE="
          },
          {
            "name": "X-Google-Smtp-Source",
            "value": "AGHT+IHCMrQhOT9sgPUOQJL1oVfxMruiLg3BZ5DXqKMdQ7PYF2puka6Ovabv3BPg08CeyS1ovKydIdwHT2uleZkkAaU="
          },
          {
            "name": "X-Received",
            "value": "by 2002:a05:6102:5092:b0:4a3:e05e:f6a3 with SMTP id ada2fe7eead31-4a900e11589mr14462681137.3.1730568813787; Sat, 02 Nov 2024 10:33:33 -0700 (PDT)"
          },
          {
            "name": "MIME-Version",
            "value": "1.0"
          },
          {
            "name": "References",
            "value": "<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com>"
          },
          {
            "name": "In-Reply-To",
            "value": "<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com>"
          },
          {
            "name": "From",
            "value": "test_user_1 1 <test_user_1@onyx-test.com>"
          },
          {
            "name": "Date",
            "value": "Sat, 2 Nov 2024 10:33:22 -0700"
          },
          {
            "name": "Message-ID",
            "value": "<CANSSAx8n6=Kr4sQaGVYaKj63Hdb4=NCffD6OhAADYm+2fe7_dw@mail.gmail.com>"
          },
          {
            "name": "Subject",
            "value": "Re: Email Chain 1"
          },
          {
            "name": "To",
            "value": "Test Admin Admin <admin@onyx-test.com>"
          },
          {
            "name": "Content-Type",
            "value": "multipart/alternative; boundary=\"00000000000067dbf70625f1730f\""
          }
        ],
        "parts": [
          {
            "mimeType": "text/plain",
            "body": {
              "data": "VGhpcyBpcyBlbWFpbCAyIGluIGNoYWluIDENCg0KT24gU2F0LCBOb3YgMiwgMjAyNCBhdCAxMDozM-KAr0FNIFRlc3QgQWRtaW4gQWRtaW4gPGFkbWluQG9ueXgtdGVzdC5jb20-DQp3cm90ZToNCg0KPiBUaGlzIGlzIGVtYWlsIDEgaW4gY2hhaW4gMQ0KPg0K"
            }
          },
          {
            "mimeType": "text/html",
            "body": {
              "data": "PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAyIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj5PbiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0gVGVzdCBBZG1pbiBBZG1pbiAmbHQ7PGEgaHJlZj0ibWFpbHRvOmFkbWluQG9ueXgtdGVzdC5jb20iPmFkbWluQG9ueXgtdGVzdC5jb208L2E-Jmd0OyB3cm90ZTo8YnI-PC9kaXY-PGJsb2NrcXVvdGUgY2xhc3M9ImdtYWlsX3F1b3RlIiBzdHlsZT0ibWFyZ2luOjBweCAwcHggMHB4IDAuOGV4O2JvcmRlci1sZWZ0OjFweCBzb2xpZCByZ2IoMjA0LDIwNCwyMDQpO3BhZGRpbmctbGVmdDoxZXgiPjxkaXYgZGlyPSJsdHIiPlRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxPC9kaXY-DQo8L2Jsb2NrcXVvdGU-PC9kaXY-DQo="
            }
          }
        ]
      }
    },
    {
      "id": "192edf157175fcec",
      "payload": {
        "headers": [
          {
            "name": "MIME-Version",
            "value": "1.0"
          },
          {
            "name": "Date",
            "value": "Sat, 2 Nov 2024 10:34:29 -0700"
          },
          {
            "name": "References",
            "value": "<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com> <CANSSAx8n6=Kr4sQaGVYaKj63Hdb4=NCffD6OhAADYm+2fe7_dw@mail.gmail.com>"
          },
          {
            "name": "In-Reply-To",
            "value": "<CANSSAx8n6=Kr4sQaGVYaKj63Hdb4=NCffD6OhAADYm+2fe7_dw@mail.gmail.com>"
          },
          {
            "name": "Bcc",
            "value": "test_user_3@onyx-test.com"
          },
          {
            "name": "Message-ID",
            "value": "<CABnEGTUEDvhfyOWTCauhTCn5mVXGp6p1=yw65RUsGu8E=c2k4g@mail.gmail.com>"
          },
          {
            "name": "Subject",
            "value": "Fwd: Email Chain 1"
          },
          {
            "name": "From",
            "value": "Test Admin Admin <admin@onyx-test.com>"
          },
          {
            "name": "To",
            "value": "test_user_2 2 <test_user_2@onyx-test.com>"
          },
          {
            "name": "Content-Type",
            "value": "multipart/alternative; boundary=\"000000000000bf7afd0625f1764f\""
          }
        ],
        "parts": [
          {
            "mimeType": "text/plain",
            "body": {
              "data": "VGhpcyBpcyBlbWFpbCAzIGluIGNoYWluIDENCg0KLS0tLS0tLS0tLSBGb3J3YXJkZWQgbWVzc2FnZSAtLS0tLS0tLS0NCkZyb206IHRlc3RfdXNlcl8xIDEgPHRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb20-DQpEYXRlOiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0NClN1YmplY3Q6IFJlOiBFbWFpbCBDaGFpbiAxDQpUbzogVGVzdCBBZG1pbiBBZG1pbiA8YWRtaW5Ab255eC10ZXN0LmNvbT4NCg0KDQpUaGlzIGlzIGVtYWlsIDIgaW4gY2hhaW4gMQ0KDQpPbiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0gVGVzdCBBZG1pbiBBZG1pbiA8YWRtaW5Ab255eC10ZXN0LmNvbT4NCndyb3RlOg0KDQo-IFRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxDQo-DQo="
            }
          },
          {
            "mimeType": "text/html",
            "body": {
              "data": "PGRpdiBkaXI9Imx0ciI-PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAzIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj4tLS0tLS0tLS0tIEZvcndhcmRlZCBtZXNzYWdlIC0tLS0tLS0tLTxicj5Gcm9tOiA8c3Ryb25nIGNsYXNzPSJnbWFpbF9zZW5kZXJuYW1lIiBkaXI9ImF1dG8iPnRlc3RfdXNlcl8xIDE8L3N0cm9uZz4gPHNwYW4gZGlyPSJhdXRvIj4mbHQ7PGEgaHJlZj0ibWFpbHRvOnRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb20iPnRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb208L2E-Jmd0Ozwvc3Bhbj48YnI-RGF0ZTogU2F0LCBOb3YgMiwgMjAyNCBhdCAxMDozM-KAr0FNPGJyPlN1YmplY3Q6IFJlOiBFbWFpbCBDaGFpbiAxPGJyPlRvOiBUZXN0IEFkbWluIEFkbWluICZsdDs8YSBocmVmPSJtYWlsdG86YWRtaW5Ab255eC10ZXN0LmNvbSI-YWRtaW5Ab255eC10ZXN0LmNvbTwvYT4mZ3Q7PGJyPjwvZGl2Pjxicj48YnI-PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAyIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj5PbiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0gVGVzdCBBZG1pbiBBZG1pbiAmbHQ7PGEgaHJlZj0ibWFpbHRvOmFkbWluQG9ueXgtdGVzdC5jb20iIHRhcmdldD0iX2JsYW5rIj5hZG1pbkBvbnl4LXRlc3QuY29tPC9hPiZndDsgd3JvdGU6PGJyPjwvZGl2PjxibG9ja3F1b3RlIGNsYXNzPSJnbWFpbF9xdW90ZSIgc3R5bGU9Im1hcmdpbjowcHggMHB4IDBweCAwLjhleDtib3JkZXItbGVmdDoxcHggc29saWQgcmdiKDIwNCwyMDQsMjA0KTtwYWRkaW5nLWxlZnQ6MWV4Ij48ZGl2IGRpcj0ibHRyIj5UaGlzIGlzIGVtYWlsIDEgaW4gY2hhaW4gMTwvZGl2Pg0KPC9ibG9ja3F1b3RlPjwvZGl2Pg0KPC9kaXY-PC9kaXY-DQo="
            }
          }
        ]
      }
    },
    {
      "id": "192edf1e8f7ecbb4",
      "payload": {
        "headers": [
          {
            "name": "Delivered-To",
            "value": "admin@onyx-test.com"
          },
          {
            "name": "Received",
            "value": "by 2002:a59:b3cc:0:b0:491:1bbc:5e54 with SMTP id g12csp1874156vqt;        Sat, 2 Nov 2024 10:35:07 -0700 (PDT)"
          },
          {
            "name": "X-Received",
            "value": "by 2002:a05:6122:319c:b0:50d:81f9:5210 with SMTP id 71dfb90a1353d-5105d128958mr15853812e0c.13.1730568906834;        Sat, 02 Nov 2024 10:35:06 -0700 (PDT)"
          },
          {
            "name": "ARC-Seal",
            "value": "i=1; a=rsa-sha256; t=1730568906; cv=none;        d=google.com; s=arc-20240605;        b=JUd7S6ql1poKM5ox92op2g2Z67AS8sEkp5f/S+Mr5+7KSichsjAwixWg/YhhRhvaY/         UcykrbdaAeWfCuGtJgSq1nr1z5hB3iAltv/D2XCdJdOXzVDpVvaV9lT/YU6266VKtsnq         gFVKfjyMe/MnNKvDITQL67A2gRvhiR3XWxwEVvrMArMpUb9bbudlF/5L3MQY4BCIvWLL         9uBv1ZnclghscsxspoG3CkULkGqHGUTKq6bPoUn/hOljiVdsVVagoOwhbDEcyMRKUDnm         2t3H7iiujhlBIDbRoLJR/6C+A6AMyNKPAFA3axM6EXrTOADMZ8a0JqFj8O4rktYpRV+d         zHxQ=="
          },
          {
            "name": "ARC-Message-Signature",
            "value": "i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :dkim-signature;        bh=K0g0X/4URFSC1nuXjI7ZESJA66WnWcqwgfHOUDQ/kQo=;        fh=/JhVJcrFVXWWzpGRY8HXA/cCDTQzCntn8VCeyDmjzic=;        b=IarHhl5g5tjBhlMRRXo6WwTzaFOI4Q3w4ebNunftDUHwzV7Qu1hY0y7r3SRNaBb+qD         ZncYUI6PF/Oo7eMG65IloXfu+kHUI8NJMaoERUWgEk21Tj6cOSRO4x/W6V5PSX7a4lWZ         K1cNdAlaiWI09Esv07Vel975Bgrd+XiCwoVgJAAslHOJ2bZwSYWzvwLqdkCRVrAGJQ9/         I80kvOnNVesIFdIR6SGrhdz8xNIIoe60k8PjJRzkmzy/tEeKCYBz6W+NW4xoIaAVmKUw         RvjI8JozUVkGzh+LLyx64MakPCZPWM+ft+D35JodarYh+KesF+HV/Oe7rjaw7JXZ1WoE         OdJQ==;        dara=google.com"
          },
          {
            "name": "ARC-Authentication-Results",
            "value": "i=1; mx.google.com;       dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=1U8JkCbL;       spf=none (google.com: test_user_3@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_3@onyx-test.com;       dara=pass header.i=@onyx-test.com"
          },
          {
            "name": "Return-Path",
            "value": "<test_user_3@onyx-test.com>"
          },
          {
            "name": "Received",
            "value": "from mail-sor-f41.google.com (mail-sor-f41.google.com. [209.85.220.41])        by mx.google.com with SMTPS id 71dfb90a1353d-5106f3f9037sor1051490e0c.7.2024.11.02.10.35.06        for <admin@onyx-test.com>        (Google Transport Security);        Sat, 02 Nov 2024 10:35:06 -0700 (PDT)"
          },
          {
            "name": "Received-SPF",
            "value": "none (google.com: test_user_3@onyx-test.com does not designate permitted sender hosts) client-ip=209.85.220.41;"
          },
          {
            "name": "Authentication-Results",
            "value": "mx.google.com;       dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=1U8JkCbL;       spf=none (google.com: test_user_3@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_3@onyx-test.com;       dara=pass header.i=@onyx-test.com"
          },
          {
            "name": "DKIM-Signature",
            "value": "v=1; a=rsa-sha256; c=relaxed/relaxed;        d=onyx-test-com.20230601.gappssmtp.com; s=20230601; t=1730568906; x=1731173706; darn=onyx-test.com;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :from:to:cc:subject:date:message-id:reply-to;        bh=K0g0X/4URFSC1nuXjI7ZESJA66WnWcqwgfHOUDQ/kQo=;        b=1U8JkCbLjicGtH7otVX3QjKv/XK5fGnmOIVMTD/b9cO1w8ai2GwCuJbBo+z1IuGqto         aRuNCcEqUIaFvVFiezvhL9xg7scIwHHvLOrSpmc0h0JMSx8q4kKaUGKEJpewsYvkStmr         DYv/cUIeaPTIChSuUDV7FVMhf7jIyIaYry3i9/EIlw+on18nD30C9kXwds5yWW8XGvtR         /OUuSdgJzuoNmypUt8v9Ebqd+LP23YTs+78/G1Ag+JjugxxF+C9cm7SxmooWueukRkm8         o8nQO5QVx/y/xsCZdM2XXcKCLcZIntuY48amlfFyIqrhG1/DEM6htD64meMGctNTptQf         jHrw=="
          },
          {
            "name": "X-Google-DKIM-Signature",
            "value": "v=1; a=rsa-sha256; c=relaxed/relaxed;        d=1e100.net; s=20230601; t=1730568906; x=1731173706;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;        bh=K0g0X/4URFSC1nuXjI7ZESJA66WnWcqwgfHOUDQ/kQo=;        b=J4+ozlusGGM1Hn95EZkDeYbExgkyOlAdcY6LcV4Wx1zeI78HtEXGgvqcZ5sP7HzS1X         /A3i7WkgmjpC9bU2/zKLrfXDvYQ7udQwTJtKsKaUo4O65Al7Wtgz8e8rBDYikhqEEAZQ         GbEwqp+qa+v0T4rPhkQKd4zpIE3AUd3eh5u5iF/UEYc1NcyV35uMGWRP4jOK6F67MwS7         73MgObcGqmBH48I4K+ITYAkNEMGOBpY6fheGxCxyDpcG5gbf8swlWX2Dd0EM9H72o+Xb         jvAslOq1lZzPZUgyyZJ2wVEASxF8S7depiOLcTPKwsw+pgXIMAUBExBvu0u4PhO0qG+z         pftQ=="
          },
          {
            "name": "X-Gm-Message-State",
            "value": "AOJu0Yy2r0aT3w7HBU7t0JGla+x3AddG9WdnQT06r6T/HGZwZ9Wp9TUs Orb/HMtgvXivtYFkG14NJkMTBO4EqSynmzaxAvEheDXB1uYE2LS21XoqrvycvYQh3GUHBwUdS8L lE6BUjm4TJfXlZWAqKRxg4C0j1UFSuVdkXf6P1GCsdyKKTeS6A9eohw=="
          },
          {
            "name": "X-Google-Smtp-Source",
            "value": "AGHT+IHXTB7Ar9w/Q3G3gCT19SVELYvWl30pNGuNiTmkYZgMWFS7YUWTkG/DS4/mrjMRXpYuclOLHv8BeOmw9Jovkr4="
          },
          {
            "name": "X-Received",
            "value": "by 2002:a05:6102:3a10:b0:4a9:49:26d2 with SMTP id ada2fe7eead31-4a90109fb68mr15589362137.29.1730568906301; Sat, 02 Nov 2024 10:35:06 -0700 (PDT)"
          },
          {
            "name": "MIME-Version",
            "value": "1.0"
          },
          {
            "name": "References",
            "value": "<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com> <CANSSAx8n6=Kr4sQaGVYaKj63Hdb4=NCffD6OhAADYm+2fe7_dw@mail.gmail.com> <CABnEGTUEDvhfyOWTCauhTCn5mVXGp6p1=yw65RUsGu8E=c2k4g@mail.gmail.com>"
          },
          {
            "name": "In-Reply-To",
            "value": "<CABnEGTUEDvhfyOWTCauhTCn5mVXGp6p1=yw65RUsGu8E=c2k4g@mail.gmail.com>"
          },
          {
            "name": "From",
            "value": "test_user_3 3 <test_user_3@onyx-test.com>"
          },
          {
            "name": "Date",
            "value": "Sat, 2 Nov 2024 10:34:55 -0700"
          },
          {
            "name": "Message-ID",
            "value": "<CACcF+8GU1V2_CcYsUFNOh0+oSkMG=oN-ioyPPXRsD+0Ghr-u-Q@mail.gmail.com>"
          },
          {
            "name": "Subject",
            "value": "Re: Email Chain 1"
          },
          {
            "name": "To",
            "value": "Test Admin Admin <admin@onyx-test.com>"
          },
          {
            "name": "Content-Type",
            "value": "multipart/alternative; boundary=\"000000000000eb82a70625f178cf\""
          }
        ],
        "parts": [
          {
            "mimeType": "text/plain",
            "body": {
              "data": "VGhpcyBpcyBlbWFpbCA0IGluIGNoYWluIDENCg0KT24gU2F0LCBOb3YgMiwgMjAyNCBhdCAxMDozNOKAr0FNIFRlc3QgQWRtaW4gQWRtaW4gPGFkbWluQG9ueXgtdGVzdC5jb20-DQp3cm90ZToNCg0KPiBUaGlzIGlzIGVtYWlsIDMgaW4gY2hhaW4gMQ0KPg0KPiAtLS0tLS0tLS0tIEZvcndhcmRlZCBtZXNzYWdlIC0tLS0tLS0tLQ0KPiBGcm9tOiB0ZXN0X3VzZXJfMSAxIDx0ZXN0X3VzZXJfMUBvbnl4LXRlc3QuY29tPg0KPiBEYXRlOiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0NCj4gU3ViamVjdDogUmU6IEVtYWlsIENoYWluIDENCj4gVG86IFRlc3QgQWRtaW4gQWRtaW4gPGFkbWluQG9ueXgtdGVzdC5jb20-DQo-DQo-DQo-IFRoaXMgaXMgZW1haWwgMiBpbiBjaGFpbiAxDQo-DQo-IE9uIFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzPigK9BTSBUZXN0IEFkbWluIEFkbWluIDxhZG1pbkBvbnl4LXRlc3QuY29tPg0KPiB3cm90ZToNCj4NCj4-IFRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxDQo-Pg0KPg0K"
            }
          },
          {
            "mimeType": "text/html",
            "body": {
              "data": "PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCA0IGluIGNoYWluIDE8YnIgY2xhc3M9ImdtYWlsLUFwcGxlLWludGVyY2hhbmdlLW5ld2xpbmUiPjwvZGl2Pjxicj48ZGl2IGNsYXNzPSJnbWFpbF9xdW90ZSI-PGRpdiBkaXI9Imx0ciIgY2xhc3M9ImdtYWlsX2F0dHIiPk9uIFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzTigK9BTSBUZXN0IEFkbWluIEFkbWluICZsdDs8YSBocmVmPSJtYWlsdG86YWRtaW5Ab255eC10ZXN0LmNvbSI-YWRtaW5Ab255eC10ZXN0LmNvbTwvYT4mZ3Q7IHdyb3RlOjxicj48L2Rpdj48YmxvY2txdW90ZSBjbGFzcz0iZ21haWxfcXVvdGUiIHN0eWxlPSJtYXJnaW46MHB4IDBweCAwcHggMC44ZXg7Ym9yZGVyLWxlZnQ6MXB4IHNvbGlkIHJnYigyMDQsMjA0LDIwNCk7cGFkZGluZy1sZWZ0OjFleCI-PGRpdiBkaXI9Imx0ciI-PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAzIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj4tLS0tLS0tLS0tIEZvcndhcmRlZCBtZXNzYWdlIC0tLS0tLS0tLTxicj5Gcm9tOiA8c3Ryb25nIGNsYXNzPSJnbWFpbF9zZW5kZXJuYW1lIiBkaXI9ImF1dG8iPnRlc3RfdXNlcl8xIDE8L3N0cm9uZz4gPHNwYW4gZGlyPSJhdXRvIj4mbHQ7PGEgaHJlZj0ibWFpbHRvOnRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb20iIHRhcmdldD0iX2JsYW5rIj50ZXN0X3VzZXJfMUBvbnl4LXRlc3QuY29tPC9hPiZndDs8L3NwYW4-PGJyPkRhdGU6IFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzPigK9BTTxicj5TdWJqZWN0OiBSZTogRW1haWwgQ2hhaW4gMTxicj5UbzogVGVzdCBBZG1pbiBBZG1pbiAmbHQ7PGEgaHJlZj0ibWFpbHRvOmFkbWluQG9ueXgtdGVzdC5jb20iIHRhcmdldD0iX2JsYW5rIj5hZG1pbkBvbnl4LXRlc3QuY29tPC9hPiZndDs8YnI-PC9kaXY-PGJyPjxicj48ZGl2IGRpcj0ibHRyIj5UaGlzIGlzIGVtYWlsIDIgaW4gY2hhaW4gMTwvZGl2Pjxicj48ZGl2IGNsYXNzPSJnbWFpbF9xdW90ZSI-PGRpdiBkaXI9Imx0ciIgY2xhc3M9ImdtYWlsX2F0dHIiPk9uIFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzPigK9BTSBUZXN0IEFkbWluIEFkbWluICZsdDs8YSBocmVmPSJtYWlsdG86YWRtaW5Ab255eC10ZXN0LmNvbSIgdGFyZ2V0PSJfYmxhbmsiPmFkbWluQG9ueXgtdGVzdC5jb208L2E-Jmd0OyB3cm90ZTo8YnI-PC9kaXY-PGJsb2NrcXVvdGUgY2xhc3M9ImdtYWlsX3F1b3RlIiBzdHlsZT0ibWFyZ2luOjBweCAwcHggMHB4IDAuOGV4O2JvcmRlci1sZWZ0OjFweCBzb2xpZCByZ2IoMjA0LDIwNCwyMDQpO3BhZGRpbmctbGVmdDoxZXgiPjxkaXYgZGlyPSJsdHIiPlRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxPC9kaXY-DQo8L2Jsb2NrcXVvdGU-PC9kaXY-DQo8L2Rpdj48L2Rpdj4NCjwvYmxvY2txdW90ZT48L2Rpdj4NCg=="
            }
          }
        ]
      }
    }
  ]
}


================================================
FILE: backend/tests/unit/onyx/connectors/google_utils/test_rate_limit_detection.py
================================================
import json

import httplib2  # type: ignore[import-untyped]
from googleapiclient.errors import HttpError  # type: ignore[import-untyped]

from onyx.connectors.google_utils.google_utils import _is_rate_limit_error


def _make_http_error(
    status: int,
    reason: str = "unknown",
    error_reason: str = "",
) -> HttpError:
    resp = httplib2.Response({"status": status})
    if error_reason:
        body = json.dumps(
            {
                "error": {
                    "message": reason,
                    "errors": [{"reason": error_reason, "message": reason}],
                }
            }
        ).encode()
    else:
        body = json.dumps({"error": {"message": reason}}).encode()
    return HttpError(resp, body)


def test_429_is_rate_limit() -> None:
    assert _is_rate_limit_error(_make_http_error(429))


def test_403_user_rate_limit_exceeded() -> None:
    err = _make_http_error(
        403,
        reason="User rate limit exceeded.",
        error_reason="userRateLimitExceeded",
    )
    assert _is_rate_limit_error(err)


def test_403_rate_limit_exceeded() -> None:
    err = _make_http_error(
        403,
        reason="Rate limit exceeded.",
        error_reason="rateLimitExceeded",
    )
    assert _is_rate_limit_error(err)


def test_403_permission_denied_is_not_rate_limit() -> None:
    err = _make_http_error(
        403,
        reason="The caller does not have permission",
        error_reason="forbidden",
    )
    assert not _is_rate_limit_error(err)


def test_404_is_not_rate_limit() -> None:
    assert not _is_rate_limit_error(_make_http_error(404))


def test_500_is_not_rate_limit() -> None:
    assert not _is_rate_limit_error(_make_http_error(500))


================================================
FILE: backend/tests/unit/onyx/connectors/jira/conftest.py
================================================
from collections.abc import Generator
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from jira import JIRA

from onyx.connectors.jira.connector import JiraConnector


@pytest.fixture
def jira_base_url() -> str:
    return "https://jira.example.com"


@pytest.fixture
def project_key() -> str:
    return "TEST"


@pytest.fixture
def user_email() -> str:
    return "test@example.com"


@pytest.fixture
def mock_jira_api_token() -> str:
    return "token123"


@pytest.fixture
def mock_jira_client() -> MagicMock:
    """Create a mock JIRA client with proper typing"""
    mock = MagicMock(spec=JIRA)
    # Add proper return typing for search_issues method
    mock.search_issues = MagicMock()
    # Add proper return typing for project method
    mock.project = MagicMock()
    # Add proper return typing for projects method
    mock.projects = MagicMock()
    return mock


@pytest.fixture
def jira_connector(
    jira_base_url: str, project_key: str, mock_jira_client: MagicMock
) -> Generator[JiraConnector, None, None]:
    connector = JiraConnector(
        jira_base_url=jira_base_url,
        project_key=project_key,
        comment_email_blacklist=["blacklist@example.com"],
        labels_to_skip=["secret", "sensitive"],
    )
    connector._jira_client = mock_jira_client
    connector._jira_client.client_info.return_value = jira_base_url
    with patch("onyx.connectors.jira.connector._JIRA_FULL_PAGE_SIZE", 2):
        yield connector


================================================
FILE: backend/tests/unit/onyx/connectors/jira/test_jira_bulk_fetch.py
================================================
from typing import Any
from unittest.mock import MagicMock

import pytest
import requests
from jira import JIRA
from jira.resources import Issue

from onyx.connectors.jira.connector import bulk_fetch_issues


def _make_raw_issue(issue_id: str) -> dict[str, Any]:
    return {
        "id": issue_id,
        "key": f"TEST-{issue_id}",
        "fields": {"summary": f"Issue {issue_id}"},
    }


def _mock_jira_client() -> MagicMock:
    mock = MagicMock(spec=JIRA)
    mock._options = {"server": "https://jira.example.com"}
    mock._session = MagicMock()
    mock._get_url = MagicMock(
        return_value="https://jira.example.com/rest/api/3/issue/bulkfetch"
    )
    return mock


def test_bulk_fetch_success() -> None:
    """Happy path: all issues fetched in one request."""
    client = _mock_jira_client()
    raw = [_make_raw_issue("1"), _make_raw_issue("2"), _make_raw_issue("3")]
    resp = MagicMock()
    resp.json.return_value = {"issues": raw}
    client._session.post.return_value = resp

    result = bulk_fetch_issues(client, ["1", "2", "3"])
    assert len(result) == 3
    assert all(isinstance(r, Issue) for r in result)
    client._session.post.assert_called_once()


def test_bulk_fetch_splits_on_json_error() -> None:
    """When the full batch fails with JSONDecodeError, sub-batches succeed."""
    client = _mock_jira_client()

    call_count = 0

    def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock:  # noqa: ARG001
        nonlocal call_count
        call_count += 1
        ids = json["issueIdsOrKeys"]
        if len(ids) > 2:
            resp = MagicMock()
            resp.json.side_effect = requests.exceptions.JSONDecodeError(
                "Expecting ',' delimiter", "doc", 2294125
            )
            return resp

        resp = MagicMock()
        resp.json.return_value = {"issues": [_make_raw_issue(i) for i in ids]}
        return resp

    client._session.post.side_effect = _post_side_effect

    result = bulk_fetch_issues(client, ["1", "2", "3", "4"])
    assert len(result) == 4
    returned_ids = {r.raw["id"] for r in result}
    assert returned_ids == {"1", "2", "3", "4"}
    assert call_count > 1


def test_bulk_fetch_raises_on_single_unfetchable_issue() -> None:
    """A single issue that always fails JSON decode raises after splitting."""
    client = _mock_jira_client()

    def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock:  # noqa: ARG001
        ids = json["issueIdsOrKeys"]
        if "bad" in ids:
            resp = MagicMock()
            resp.json.side_effect = requests.exceptions.JSONDecodeError(
                "Expecting ',' delimiter", "doc", 100
            )
            return resp

        resp = MagicMock()
        resp.json.return_value = {"issues": [_make_raw_issue(i) for i in ids]}
        return resp

    client._session.post.side_effect = _post_side_effect

    with pytest.raises(requests.exceptions.JSONDecodeError):
        bulk_fetch_issues(client, ["1", "bad", "2"])


def test_bulk_fetch_non_json_error_propagates() -> None:
    """Non-JSONDecodeError exceptions still propagate."""
    client = _mock_jira_client()

    resp = MagicMock()
    resp.json.side_effect = ValueError("something else broke")
    client._session.post.return_value = resp

    try:
        bulk_fetch_issues(client, ["1"])
        assert False, "Expected ValueError to propagate"
    except ValueError:
        pass


def test_bulk_fetch_with_fields() -> None:
    """Fields parameter is forwarded correctly."""
    client = _mock_jira_client()
    raw = [_make_raw_issue("1")]
    resp = MagicMock()
    resp.json.return_value = {"issues": raw}
    client._session.post.return_value = resp

    bulk_fetch_issues(client, ["1"], fields="summary,description")

    call_payload = client._session.post.call_args[1]["json"]
    assert call_payload["fields"] == ["summary", "description"]


def test_bulk_fetch_recursive_splitting_raises_on_bad_issue() -> None:
    """With a 6-issue batch where one is bad, recursion isolates it and raises."""
    client = _mock_jira_client()
    bad_id = "BAD"

    def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock:  # noqa: ARG001
        ids = json["issueIdsOrKeys"]
        if bad_id in ids:
            resp = MagicMock()
            resp.json.side_effect = requests.exceptions.JSONDecodeError(
                "truncated", "doc", 999
            )
            return resp

        resp = MagicMock()
        resp.json.return_value = {"issues": [_make_raw_issue(i) for i in ids]}
        return resp

    client._session.post.side_effect = _post_side_effect

    with pytest.raises(requests.exceptions.JSONDecodeError):
        bulk_fetch_issues(client, ["1", "2", bad_id, "3", "4", "5"])


================================================
FILE: backend/tests/unit/onyx/connectors/jira/test_jira_checkpointing.py
================================================
import time
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from jira import JIRA
from jira import JIRAError
from jira.resources import Issue

from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.jira.connector import JiraConnector
from onyx.connectors.jira.connector import JiraConnectorCheckpoint
from onyx.connectors.jira.utils import JIRA_SERVER_API_VERSION
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import SlimDocument
from onyx.utils.logger import setup_logger
from tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector

logger = setup_logger()
PAGE_SIZE = 2


@pytest.fixture
def jira_connector(
    jira_base_url: str, project_key: str, mock_jira_client: MagicMock
) -> Generator[JiraConnector, None, None]:
    connector = JiraConnector(
        jira_base_url=jira_base_url,
        project_key=project_key,
        comment_email_blacklist=["blacklist@example.com"],
        labels_to_skip=["secret", "sensitive"],
    )
    connector._jira_client = mock_jira_client
    connector._jira_client.client_info.return_value = jira_base_url
    connector._jira_client._options = MagicMock()
    connector._jira_client._options.return_value = {
        "rest_api_version": JIRA_SERVER_API_VERSION
    }
    with patch("onyx.connectors.jira.connector._JIRA_FULL_PAGE_SIZE", 2):
        yield connector


@pytest.fixture
def create_mock_issue() -> Callable[..., MagicMock]:
    def _create_mock_issue(
        key: str = "TEST-123",
        summary: str = "Test Issue",
        updated: str = "2023-01-01T12:00:00.000+0000",
        description: str = "Test Description",
        labels: list[str] | None = None,
        project_key: str = "TEST",
        project_name: str = "Test Project",
        issuetype_name: str = "Story",
        parent_key: str | None = None,
        parent_issuetype_name: str | None = None,
    ) -> MagicMock:
        """Helper to create a mock Issue object"""
        mock_issue = MagicMock(spec=Issue)
        # Create fields attribute first
        mock_issue.fields = MagicMock()
        mock_issue.key = key
        mock_issue.fields.summary = summary
        mock_issue.fields.updated = updated
        mock_issue.fields.description = description
        mock_issue.fields.labels = labels or []

        # Set up creator and assignee for testing owner extraction
        mock_issue.fields.reporter = MagicMock()
        mock_issue.fields.reporter.displayName = "Test Creator"
        mock_issue.fields.reporter.emailAddress = "creator@example.com"

        mock_issue.fields.assignee = MagicMock()
        mock_issue.fields.assignee.displayName = "Test Assignee"
        mock_issue.fields.assignee.emailAddress = "assignee@example.com"

        # Set up priority, status, and resolution
        mock_issue.fields.priority = MagicMock()
        mock_issue.fields.priority.name = "High"

        mock_issue.fields.status = MagicMock()
        mock_issue.fields.status.name = "In Progress"

        mock_issue.fields.resolution = MagicMock()
        mock_issue.fields.resolution.name = "Fixed"

        # Set up project for hierarchy node generation
        mock_issue.fields.project = MagicMock()
        mock_issue.fields.project.key = project_key
        mock_issue.fields.project.name = project_name

        # Set up issuetype for epic detection
        mock_issue.fields.issuetype = MagicMock()
        mock_issue.fields.issuetype.name = issuetype_name

        # Set up parent field for hierarchy
        if parent_key:
            mock_issue.fields.parent = MagicMock()
            mock_issue.fields.parent.key = parent_key
            mock_issue.fields.parent.fields = MagicMock()
            mock_issue.fields.parent.fields.issuetype = MagicMock()
            mock_issue.fields.parent.fields.issuetype.name = (
                parent_issuetype_name or "Story"
            )
            mock_issue.fields.parent.fields.summary = f"Parent {parent_key}"
        else:
            mock_issue.fields.parent = None

        # Add raw field for accessing through API version check
        mock_issue.raw = {"fields": {"description": description}}

        return mock_issue

    return _create_mock_issue


def test_load_credentials(jira_connector: JiraConnector) -> None:
    """Test loading credentials"""
    with patch("onyx.connectors.jira.connector.build_jira_client") as mock_build_client:
        mock_build_client.return_value = jira_connector._jira_client
        credentials = {
            "jira_user_email": "user@example.com",
            "jira_api_token": "token123",
        }

        result = jira_connector.load_credentials(credentials)

        mock_build_client.assert_called_once_with(
            credentials=credentials,
            jira_base=jira_connector.jira_base,
            scoped_token=False,
        )
        assert result is None
        assert jira_connector._jira_client == mock_build_client.return_value


def test_get_jql_query_with_project(jira_connector: JiraConnector) -> None:
    """Test JQL query generation with project specified"""
    start = datetime(2023, 1, 1, tzinfo=timezone.utc).timestamp()
    end = datetime(2023, 1, 2, tzinfo=timezone.utc).timestamp()

    query = jira_connector._get_jql_query(start, end)

    # Check that the project part and time part are both in the query
    assert f'project = "{jira_connector.jira_project}"' in query
    assert "updated >= '2023-01-01 00:00'" in query
    assert "updated <= '2023-01-02 00:00'" in query
    assert " AND " in query


def test_get_jql_query_without_project(jira_base_url: str) -> None:
    """Test JQL query generation without project specified"""
    # Create connector without project key
    connector = JiraConnector(jira_base_url=jira_base_url)

    start = datetime(2023, 1, 1, tzinfo=timezone.utc).timestamp()
    end = datetime(2023, 1, 2, tzinfo=timezone.utc).timestamp()

    query = connector._get_jql_query(start, end)

    # Check that only time part is in the query
    assert "project =" not in query
    assert "updated >= '2023-01-01 00:00'" in query
    assert "updated <= '2023-01-02 00:00'" in query


def test_load_from_checkpoint_happy_path(
    jira_connector: JiraConnector, create_mock_issue: Callable[..., MagicMock]
) -> None:
    """Test loading from checkpoint - happy path"""
    # Set up mocked issues
    mock_issue1 = create_mock_issue(key="TEST-1", summary="Issue 1")
    mock_issue2 = create_mock_issue(key="TEST-2", summary="Issue 2")
    mock_issue3 = create_mock_issue(key="TEST-3", summary="Issue 3")

    # Only mock the search_issues method
    jira_client = cast(JIRA, jira_connector._jira_client)
    search_issues_mock = cast(MagicMock, jira_client.search_issues)
    search_issues_mock.side_effect = [
        [mock_issue1, mock_issue2],
        [mock_issue3],
        [],
    ]

    # Call load_from_checkpoint
    end_time = time.time()
    outputs = load_everything_from_checkpoint_connector(jira_connector, 0, end_time)

    # Check that the documents were returned
    assert len(outputs) == 2

    checkpoint_output1 = outputs[0]
    assert len(checkpoint_output1.items) == 2
    document1 = checkpoint_output1.items[0]
    assert isinstance(document1, Document)
    assert document1.id == "https://jira.example.com/browse/TEST-1"
    document2 = checkpoint_output1.items[1]
    assert isinstance(document2, Document)
    assert document2.id == "https://jira.example.com/browse/TEST-2"
    assert checkpoint_output1.next_checkpoint == JiraConnectorCheckpoint(
        offset=2,
        has_more=True,
        seen_hierarchy_node_ids=["TEST"],
    )

    checkpoint_output2 = outputs[1]
    assert len(checkpoint_output2.items) == 1
    document3 = checkpoint_output2.items[0]
    assert isinstance(document3, Document)
    assert document3.id == "https://jira.example.com/browse/TEST-3"
    assert checkpoint_output2.next_checkpoint == JiraConnectorCheckpoint(
        offset=3,
        has_more=False,
        seen_hierarchy_node_ids=["TEST"],
    )

    # Check that search_issues was called with the right parameters
    assert search_issues_mock.call_count == 2
    args, kwargs = search_issues_mock.call_args_list[0]
    assert kwargs["startAt"] == 0
    assert kwargs["maxResults"] == PAGE_SIZE

    args, kwargs = search_issues_mock.call_args_list[1]
    assert kwargs["startAt"] == 2
    assert kwargs["maxResults"] == PAGE_SIZE


def test_load_from_checkpoint_with_issue_processing_error(
    jira_connector: JiraConnector, create_mock_issue: Callable[..., MagicMock]
) -> None:
    """Test loading from checkpoint with a mix of successful and failed issue processing across multiple batches"""
    # Set up mocked issues for first batch
    mock_issue1 = create_mock_issue(key="TEST-1", summary="Issue 1")
    mock_issue2 = create_mock_issue(key="TEST-2", summary="Issue 2")
    # Set up mocked issues for second batch
    mock_issue3 = create_mock_issue(key="TEST-3", summary="Issue 3")
    mock_issue4 = create_mock_issue(key="TEST-4", summary="Issue 4")

    # Mock search_issues to return our mock issues in batches
    jira_client = cast(JIRA, jira_connector._jira_client)
    search_issues_mock = cast(MagicMock, jira_client.search_issues)
    search_issues_mock.side_effect = [
        [mock_issue1, mock_issue2],  # First batch
        [mock_issue3, mock_issue4],  # Second batch
        [],  # Empty batch to indicate end
    ]

    # Mock process_jira_issue to succeed for some issues and fail for others
    def mock_process_side_effect(
        jira_base_url: str,  # noqa: ARG001
        issue: Issue,
        *args: Any,  # noqa: ARG001
        **kwargs: Any,  # noqa: ARG001
    ) -> Document | None:
        if issue.key in ["TEST-1", "TEST-3"]:
            return Document(
                id=f"https://jira.example.com/browse/{issue.key}",
                sections=[],
                source=DocumentSource.JIRA,
                semantic_identifier=f"{issue.key}: {issue.fields.summary}",
                title=f"{issue.key} {issue.fields.summary}",
                metadata={},
            )
        else:
            raise Exception(f"Processing error for {issue.key}")

    with patch("onyx.connectors.jira.connector.process_jira_issue") as mock_process:
        mock_process.side_effect = mock_process_side_effect

        # Call load_from_checkpoint
        end_time = time.time()
        outputs = load_everything_from_checkpoint_connector(jira_connector, 0, end_time)

        assert len(outputs) == 3

        # Check first batch
        first_batch = outputs[0]
        assert len(first_batch.items) == 2
        # First item should be successful
        assert isinstance(first_batch.items[0], Document)
        assert first_batch.items[0].id == "https://jira.example.com/browse/TEST-1"
        # Second item should be a failure
        assert isinstance(first_batch.items[1], ConnectorFailure)
        assert first_batch.items[1].failed_document is not None
        assert first_batch.items[1].failed_document.document_id == "TEST-2"
        assert "Failed to process Jira issue" in first_batch.items[1].failure_message
        # Check checkpoint indicates more items (full batch)
        assert first_batch.next_checkpoint.has_more is True
        assert first_batch.next_checkpoint.offset == 2

        # Check second batch
        second_batch = outputs[1]
        assert len(second_batch.items) == 2
        # First item should be successful
        assert isinstance(second_batch.items[0], Document)
        assert second_batch.items[0].id == "https://jira.example.com/browse/TEST-3"
        # Second item should be a failure
        assert isinstance(second_batch.items[1], ConnectorFailure)
        assert second_batch.items[1].failed_document is not None
        assert second_batch.items[1].failed_document.document_id == "TEST-4"
        assert "Failed to process Jira issue" in second_batch.items[1].failure_message
        # Check checkpoint indicates more items
        assert second_batch.next_checkpoint.has_more is True
        assert second_batch.next_checkpoint.offset == 4

        # Check third, empty batch
        third_batch = outputs[2]
        assert len(third_batch.items) == 0
        assert third_batch.next_checkpoint.has_more is False
        assert third_batch.next_checkpoint.offset == 4


def test_load_from_checkpoint_with_skipped_issue(
    jira_connector: JiraConnector, create_mock_issue: Callable[..., MagicMock]
) -> None:
    """Test loading from checkpoint with an issue that should be skipped due to labels"""
    LABEL_TO_SKIP = "secret"
    jira_connector.labels_to_skip = {LABEL_TO_SKIP}

    # Set up mocked issue with a label to skip
    mock_issue = create_mock_issue(
        key="TEST-1", summary="Issue 1", labels=[LABEL_TO_SKIP]
    )

    # Mock search_issues to return our mock issue
    jira_client = cast(JIRA, jira_connector._jira_client)
    search_issues_mock = cast(MagicMock, jira_client.search_issues)
    search_issues_mock.return_value = [mock_issue]

    # Call load_from_checkpoint
    end_time = time.time()
    outputs = load_everything_from_checkpoint_connector(jira_connector, 0, end_time)

    assert len(outputs) == 1
    checkpoint_output = outputs[0]
    # Check that no documents were returned
    assert len(checkpoint_output.items) == 0


def test_retrieve_all_slim_docs_perm_sync(
    jira_connector: JiraConnector, create_mock_issue: Any
) -> None:
    """Test retrieving all slim documents"""
    # Set up mocked issues with proper project fields
    mock_issue1 = create_mock_issue(key="TEST-1", project_key="TEST")
    mock_issue2 = create_mock_issue(key="TEST-2", project_key="TEST")

    # Mock search_issues to return our mock issues
    jira_client = cast(JIRA, jira_connector._jira_client)
    search_issues_mock = cast(MagicMock, jira_client.search_issues)
    search_issues_mock.return_value = [mock_issue1, mock_issue2]

    # Call retrieve_all_slim_docs_perm_sync
    batches = list(jira_connector.retrieve_all_slim_docs_perm_sync(0, 100))

    # Check that a batch was returned (may include hierarchy nodes + slim docs)
    assert len(batches) == 1
    # Filter to just slim documents for checking
    slim_docs = [item for item in batches[0] if isinstance(item, SlimDocument)]
    assert len(slim_docs) == 2
    assert slim_docs[0].id == "https://jira.example.com/browse/TEST-1"
    assert slim_docs[1].id == "https://jira.example.com/browse/TEST-2"

    # Check that search_issues was called
    search_issues_mock.assert_called_once()


@pytest.mark.parametrize(
    "status_code,expected_exception,expected_message",
    [
        (
            401,
            CredentialExpiredError,
            "Jira credential appears to be expired or invalid",
        ),
        (
            403,
            InsufficientPermissionsError,
            "Your Jira token does not have sufficient permissions",
        ),
        (
            # This test used to check for 404 project not found, but the jira validation logic for 404
            # now returns an UnexpectedValidationError when no error text is provided.
            # There's no point in passing the expected message and asserting it exists in the raised error
            # If tested in the UI, wrong project key will still produce the expected error.
            404,
            UnexpectedValidationError,
            "Unexpected Jira error during validation",
        ),
        (
            429,
            ConnectorValidationError,
            "Validation failed due to Jira rate-limits being exceeded",
        ),
    ],
)
def test_validate_connector_settings_errors(
    jira_connector: JiraConnector,
    status_code: int,
    expected_exception: type[Exception],
    expected_message: str,
) -> None:
    """Test validation with various error scenarios"""
    error = JIRAError(status_code=status_code)

    jira_client = cast(JIRA, jira_connector._jira_client)
    project_mock = cast(MagicMock, jira_client.project)
    project_mock.side_effect = error

    with pytest.raises(expected_exception) as excinfo:
        jira_connector.validate_connector_settings()
    assert expected_message in str(excinfo.value)


def test_validate_connector_settings_with_project_success(
    jira_connector: JiraConnector,
) -> None:
    """Test successful validation with project specified"""
    jira_client = cast(JIRA, jira_connector._jira_client)
    project_mock = cast(MagicMock, jira_client.project)
    project_mock.return_value = MagicMock()
    jira_connector.validate_connector_settings()
    project_mock.assert_called_once_with(jira_connector.jira_project)


def test_validate_connector_settings_without_project_success(
    jira_base_url: str,
) -> None:
    """Test successful validation without project specified"""
    connector = JiraConnector(jira_base_url=jira_base_url)
    connector._jira_client = MagicMock()
    connector._jira_client.projects.return_value = [MagicMock()]

    connector.validate_connector_settings()
    connector._jira_client.projects.assert_called_once()


================================================
FILE: backend/tests/unit/onyx/connectors/jira/test_jira_error_handling.py
================================================
"""Tests for Jira connector error handling during indexing."""

import time
from unittest.mock import MagicMock

import pytest
from jira import JIRA
from jira import JIRAError

from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.jira.connector import JiraConnector
from tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector


@pytest.fixture
def jira_connector_with_invalid_project(jira_base_url: str) -> JiraConnector:
    """Create a Jira connector with an invalid project key."""
    connector = JiraConnector(
        jira_base_url=jira_base_url,
        project_key="INVALID_PROJECT",
    )
    mock_client = MagicMock(spec=JIRA)
    mock_client._options = {"rest_api_version": "2"}
    connector._jira_client = mock_client
    return connector


def test_nonexistent_project_error_during_indexing(
    jira_connector_with_invalid_project: JiraConnector,
) -> None:
    """Test that a non-existent project error during indexing is properly handled."""
    # Create a JIRAError that mimics the error from the stack trace
    error = JIRAError(
        status_code=400,
        text='{"errorMessages":["The value \'INVALID_PROJECT\' does not exist for the field \'project\'."],"errors":{}}',
    )

    # Mock search_issues to raise this error
    jira_client = jira_connector_with_invalid_project._jira_client
    assert jira_client is not None
    jira_client.search_issues.side_effect = error  # type: ignore

    # Attempt to load from checkpoint - should raise ConnectorValidationError
    end_time = time.time()
    with pytest.raises(ConnectorValidationError) as excinfo:
        list(
            load_everything_from_checkpoint_connector(
                jira_connector_with_invalid_project, 0, end_time
            )
        )

    # Verify the error message is user-friendly
    error_message = str(excinfo.value)
    assert "does not exist" in error_message or "don't have access" in error_message
    assert "INVALID_PROJECT" in error_message or "project" in error_message.lower()


def test_invalid_jql_error_during_indexing(
    jira_connector_with_invalid_project: JiraConnector,
) -> None:
    """Test that an invalid JQL error during indexing is properly handled."""
    # Create a JIRAError for invalid JQL syntax
    error = JIRAError(
        status_code=400,
        text='{"errorMessages":["Error in the JQL Query: Expecting \')\' before the end of the query."],"errors":{}}',
    )

    # Mock search_issues to raise this error
    jira_client = jira_connector_with_invalid_project._jira_client
    assert jira_client is not None
    jira_client.search_issues.side_effect = error  # type: ignore

    # Attempt to load from checkpoint - should raise ConnectorValidationError
    end_time = time.time()
    with pytest.raises(ConnectorValidationError) as excinfo:
        list(
            load_everything_from_checkpoint_connector(
                jira_connector_with_invalid_project, 0, end_time
            )
        )

    # Verify the error message mentions invalid JQL
    error_message = str(excinfo.value)
    assert "Invalid JQL" in error_message or "JQL" in error_message


def test_credential_expired_error_during_indexing(
    jira_connector_with_invalid_project: JiraConnector,
) -> None:
    """Test that expired credentials during indexing are properly handled."""
    # Create a JIRAError for expired credentials
    error = JIRAError(status_code=401)

    # Mock search_issues to raise this error
    jira_client = jira_connector_with_invalid_project._jira_client
    assert jira_client is not None
    jira_client.search_issues.side_effect = error  # type: ignore

    # Attempt to load from checkpoint - should raise CredentialExpiredError
    end_time = time.time()
    with pytest.raises(CredentialExpiredError) as excinfo:
        list(
            load_everything_from_checkpoint_connector(
                jira_connector_with_invalid_project, 0, end_time
            )
        )

    # Verify the error message mentions credentials
    error_message = str(excinfo.value)
    assert "credential" in error_message.lower() or "401" in error_message


def test_insufficient_permissions_error_during_indexing(
    jira_connector_with_invalid_project: JiraConnector,
) -> None:
    """Test that insufficient permissions during indexing are properly handled."""
    # Create a JIRAError for insufficient permissions
    error = JIRAError(status_code=403)

    # Mock search_issues to raise this error
    jira_client = jira_connector_with_invalid_project._jira_client
    assert jira_client is not None
    jira_client.search_issues.side_effect = error  # type: ignore

    # Attempt to load from checkpoint - should raise InsufficientPermissionsError
    end_time = time.time()
    with pytest.raises(InsufficientPermissionsError) as excinfo:
        list(
            load_everything_from_checkpoint_connector(
                jira_connector_with_invalid_project, 0, end_time
            )
        )

    # Verify the error message mentions permissions
    error_message = str(excinfo.value)
    assert "permission" in error_message.lower() or "403" in error_message


def test_cloud_nonexistent_project_error_during_indexing(
    jira_base_url: str,
) -> None:
    """Test that a non-existent project error for Jira Cloud is properly handled."""
    from requests.exceptions import HTTPError

    # Create a cloud connector
    connector = JiraConnector(
        jira_base_url=jira_base_url,
        project_key="INVALID_PROJECT",
    )
    mock_client = MagicMock()
    mock_client._options = {"rest_api_version": "3"}
    connector._jira_client = mock_client

    # Mock the session get method to return an error response
    mock_response = MagicMock()
    mock_response.status_code = 400
    mock_response.json.return_value = {
        "errorMessages": [
            "The value 'INVALID_PROJECT' does not exist for the field 'project'."
        ],
        "errors": {},
    }

    # Create a proper HTTPError with the response attached
    http_error = HTTPError("400 Client Error: Bad Request")
    http_error.response = mock_response
    mock_response.raise_for_status.side_effect = http_error

    mock_session = MagicMock()
    mock_session.get.return_value = mock_response
    mock_client._session = mock_session
    mock_client._get_url.return_value = (
        "https://api.atlassian.com/ex/jira/cloud-id/rest/api/3/search/jql"
    )

    # Attempt to load from checkpoint - should raise ConnectorValidationError
    end_time = time.time()
    with pytest.raises(ConnectorValidationError) as excinfo:
        list(load_everything_from_checkpoint_connector(connector, 0, end_time))

    # Verify the error message is user-friendly
    error_message = str(excinfo.value)
    assert "does not exist" in error_message or "don't have access" in error_message


================================================
FILE: backend/tests/unit/onyx/connectors/jira/test_jira_large_ticket_handling.py
================================================
from collections.abc import Generator
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from jira.resources import Issue
from pytest_mock import MockFixture

from onyx.connectors.jira.connector import _perform_jql_search
from onyx.connectors.jira.connector import process_jira_issue


@pytest.fixture
def mock_jira_client() -> MagicMock:
    return MagicMock()


@pytest.fixture
def mock_issue_small() -> MagicMock:
    issue = MagicMock(spec=Issue)
    fields = MagicMock()
    fields.description = "Small description"
    fields.comment = MagicMock()
    fields.comment.comments = [
        MagicMock(body="Small comment 1"),
        MagicMock(body="Small comment 2"),
    ]
    fields.reporter = MagicMock()
    fields.reporter.displayName = "John Doe"
    fields.reporter.emailAddress = "john@example.com"
    fields.assignee = MagicMock()
    fields.assignee.displayName = "John Doe"
    fields.assignee.emailAddress = "john@example.com"
    fields.summary = "Small Issue"
    fields.updated = "2023-01-01T00:00:00+0000"
    fields.labels = []

    issue.fields = fields
    issue.key = "SMALL-1"
    return issue


@pytest.fixture
def mock_issue_large() -> MagicMock:
    issue = MagicMock(spec=Issue)
    fields = MagicMock()
    fields.description = "a" * 99_000
    fields.comment = MagicMock()
    fields.comment.comments = [
        MagicMock(body="Large comment " * 1000),
        MagicMock(body="Another large comment " * 1000),
    ]
    fields.reporter = MagicMock()
    fields.reporter.displayName = "Jane Doe"
    fields.reporter.emailAddress = "jane@example.com"
    fields.assignee = MagicMock()
    fields.assignee.displayName = "Jane Doe"
    fields.assignee.emailAddress = "jane@example.com"
    fields.summary = "Large Issue"
    fields.updated = "2023-01-02T00:00:00+0000"
    fields.labels = []

    issue.fields = fields
    issue.key = "LARGE-1"
    return issue


@pytest.fixture
def mock_jira_api_version() -> Generator[Any, Any, Any]:
    with patch("onyx.connectors.jira.utils.JIRA_CLOUD_API_VERSION", "3"):
        with patch("onyx.connectors.jira.utils.JIRA_SERVER_API_VERSION", "2"):
            yield


@pytest.fixture
def patched_environment(
    mock_jira_api_version: MockFixture,  # noqa: ARG001
) -> Generator[Any, Any, Any]:
    yield


def test_fetch_jira_issues_batch_small_ticket(
    mock_jira_client: MagicMock,
    mock_issue_small: MagicMock,
    patched_environment: MockFixture,  # noqa: ARG001
) -> None:
    mock_jira_client.search_issues.return_value = [mock_issue_small]

    # First get the issues via pagination
    issues = list(_perform_jql_search(mock_jira_client, "project = TEST", 0, 50))
    assert len(issues) == 1

    # Then process each issue
    docs = [process_jira_issue("test.com", issue) for issue in issues]
    docs = [doc for doc in docs if doc is not None]  # Filter out None values

    assert len(docs) == 1
    doc = docs[0]
    assert doc is not None  # Type assertion for mypy
    assert doc.id.endswith("/SMALL-1")
    assert doc.sections[0].text is not None
    assert "Small description" in doc.sections[0].text
    assert "Small comment 1" in doc.sections[0].text
    assert "Small comment 2" in doc.sections[0].text


def test_fetch_jira_issues_batch_large_ticket(
    mock_jira_client: MagicMock,
    mock_issue_large: MagicMock,
    patched_environment: MockFixture,  # noqa: ARG001
) -> None:
    mock_jira_client.search_issues.return_value = [mock_issue_large]

    # First get the issues via pagination
    issues = list(_perform_jql_search(mock_jira_client, "project = TEST", 0, 50))
    assert len(issues) == 1

    # Then process each issue
    docs = [process_jira_issue("test.com", issue) for issue in issues]
    docs = [doc for doc in docs if doc is not None]  # Filter out None values

    assert len(docs) == 0  # The large ticket should be skipped


def test_fetch_jira_issues_batch_mixed_tickets(
    mock_jira_client: MagicMock,
    mock_issue_small: MagicMock,
    mock_issue_large: MagicMock,
    patched_environment: MockFixture,  # noqa: ARG001
) -> None:
    mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]

    # First get the issues via pagination
    issues = list(_perform_jql_search(mock_jira_client, "project = TEST", 0, 50))
    assert len(issues) == 2

    # Then process each issue
    docs = [process_jira_issue("test.com", issue) for issue in issues]
    docs = [doc for doc in docs if doc is not None]  # Filter out None values

    assert len(docs) == 1  # Only the small ticket should be included
    doc = docs[0]
    assert doc is not None  # Type assertion for mypy
    assert doc.id.endswith("/SMALL-1")


@patch("onyx.connectors.jira.connector.JIRA_CONNECTOR_MAX_TICKET_SIZE", 50)
def test_fetch_jira_issues_batch_custom_size_limit(
    mock_jira_client: MagicMock,
    mock_issue_small: MagicMock,
    mock_issue_large: MagicMock,
    patched_environment: MockFixture,  # noqa: ARG001
) -> None:
    mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]

    # First get the issues via pagination
    issues = list(_perform_jql_search(mock_jira_client, "project = TEST", 0, 50))
    assert len(issues) == 2

    # Then process each issue
    docs = [process_jira_issue("test.com", issue) for issue in issues]
    docs = [doc for doc in docs if doc is not None]  # Filter out None values

    assert len(docs) == 0  # Both tickets should be skipped due to the low size limit


================================================
FILE: backend/tests/unit/onyx/connectors/jira/test_jira_permission_sync.py
================================================
from datetime import datetime
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync
from onyx.connectors.jira.connector import JiraConnector
from onyx.connectors.jira.utils import JIRA_SERVER_API_VERSION
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.sensitive import make_mock_sensitive_value

pytestmark = pytest.mark.usefixtures("enable_ee")


@pytest.fixture
def mock_jira_cc_pair(
    jira_base_url: str,
    project_key: str,
    user_email: str,
    mock_jira_api_token: str,
) -> MagicMock:
    mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)
    mock_cc_pair.connector = MagicMock()
    mock_cc_pair.credential.credential_json = make_mock_sensitive_value(
        {
            "jira_user_email": user_email,
            "jira_api_token": mock_jira_api_token,
        }
    )
    mock_cc_pair.connector.connector_specific_config = {
        "jira_base_url": jira_base_url,
        "project_key": project_key,
    }
    mock_cc_pair.connector.indexing_start = None

    return mock_cc_pair


@pytest.fixture
def mock_fetch_all_existing_docs_fn() -> MagicMock:
    return MagicMock(return_value=[])


@pytest.fixture
def mock_fetch_all_existing_docs_ids_fn() -> MagicMock:
    return MagicMock(return_value=[])


def test_jira_permission_sync(
    jira_connector: JiraConnector,
    mock_jira_cc_pair: MagicMock,
    mock_fetch_all_existing_docs_fn: MagicMock,
    mock_fetch_all_existing_docs_ids_fn: MagicMock,
) -> None:
    with patch("onyx.connectors.jira.connector.build_jira_client") as mock_build_client:
        mock_build_client.return_value = jira_connector._jira_client
        assert jira_connector._jira_client is not None
        jira_connector._jira_client._options = MagicMock()
        jira_connector._jira_client._options.return_value = {
            "rest_api_version": JIRA_SERVER_API_VERSION
        }

        for doc in jira_doc_sync(
            cc_pair=mock_jira_cc_pair,
            fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,
            fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,
        ):
            print(doc)


def test_jira_doc_sync_passes_indexing_start(
    jira_connector: JiraConnector,
    mock_jira_cc_pair: MagicMock,
    mock_fetch_all_existing_docs_fn: MagicMock,
    mock_fetch_all_existing_docs_ids_fn: MagicMock,
) -> None:
    """Verify that generic_doc_sync derives indexing_start from cc_pair
    and forwards it to retrieve_all_slim_docs_perm_sync."""
    indexing_start_dt = datetime(2025, 6, 1, tzinfo=timezone.utc)
    mock_jira_cc_pair.connector.indexing_start = indexing_start_dt

    with patch("onyx.connectors.jira.connector.build_jira_client") as mock_build_client:
        mock_build_client.return_value = jira_connector._jira_client
        assert jira_connector._jira_client is not None
        jira_connector._jira_client._options = MagicMock()
        jira_connector._jira_client._options.return_value = {
            "rest_api_version": JIRA_SERVER_API_VERSION
        }

        with patch.object(
            type(jira_connector),
            "retrieve_all_slim_docs_perm_sync",
            return_value=iter([]),
        ) as mock_retrieve:
            list(
                jira_doc_sync(
                    cc_pair=mock_jira_cc_pair,
                    fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,
                    fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,
                )
            )

            mock_retrieve.assert_called_once()
            call_kwargs = mock_retrieve.call_args
            assert call_kwargs.kwargs["start"] == indexing_start_dt.timestamp()


def test_jira_doc_sync_passes_none_when_no_indexing_start(
    jira_connector: JiraConnector,
    mock_jira_cc_pair: MagicMock,
    mock_fetch_all_existing_docs_fn: MagicMock,
    mock_fetch_all_existing_docs_ids_fn: MagicMock,
) -> None:
    """Verify that indexing_start is None when the connector has no indexing_start set."""
    mock_jira_cc_pair.connector.indexing_start = None

    with patch("onyx.connectors.jira.connector.build_jira_client") as mock_build_client:
        mock_build_client.return_value = jira_connector._jira_client
        assert jira_connector._jira_client is not None
        jira_connector._jira_client._options = MagicMock()
        jira_connector._jira_client._options.return_value = {
            "rest_api_version": JIRA_SERVER_API_VERSION
        }

        with patch.object(
            type(jira_connector),
            "retrieve_all_slim_docs_perm_sync",
            return_value=iter([]),
        ) as mock_retrieve:
            list(
                jira_doc_sync(
                    cc_pair=mock_jira_cc_pair,
                    fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,
                    fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,
                )
            )

            mock_retrieve.assert_called_once()
            call_kwargs = mock_retrieve.call_args
            assert call_kwargs.kwargs["start"] is None


================================================
FILE: backend/tests/unit/onyx/connectors/mediawiki/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/connectors/mediawiki/test_mediawiki_family.py
================================================
from typing import Final

import pytest
from pytest_mock import MockFixture
from pywikibot.families.wikipedia_family import Family as WikipediaFamily  # type: ignore[import-untyped]
from pywikibot.family import Family  # type: ignore[import-untyped]

from onyx.connectors.mediawiki import family


# Disabling these tests as they are flaky and rely on external wikis that are maintained by just fan communities


NON_BUILTIN_WIKIS: Final[list[tuple[str, str]]] = [
    ("https://fallout.fandom.com", "falloutwiki"),
    ("https://harrypotter.fandom.com/wiki/", "harrypotterwiki"),
    # ("https://artofproblemsolving.com/wiki", "artofproblemsolving"),  # FLAKY
    ("https://www.bogleheads.org/wiki/Main_Page", "bogleheadswiki"),
    ("https://bogleheads.org/wiki/Main_Page", "bogleheadswiki"),
    ("https://www.dandwiki.com/wiki/", "dungeonsanddragons"),
    ("https://wiki.factorio.com/", "factoriowiki"),
]


# TODO: Add support for more builtin family types from `pywikibot.families`.
@pytest.mark.skip(reason="Temporarily skipped")
@pytest.mark.parametrize(
    "url, name, expected",
    [
        (
            "https://en.wikipedia.org",
            "wikipedia",
            WikipediaFamily,
        ),  # Support urls with protocol
        (
            "wikipedia.org",
            "wikipedia",
            WikipediaFamily,
        ),  # Support urls without subdomain
        (
            "en.wikipedia.org",
            "wikipedia",
            WikipediaFamily,
        ),  # Support urls with subdomain
        ("m.wikipedia.org", "wikipedia", WikipediaFamily),
        ("de.wikipedia.org", "wikipedia", WikipediaFamily),
    ],
)
def test_family_class_dispatch_builtins(
    url: str, name: str, expected: type[Family]
) -> None:
    """Test that the family class dispatch function returns the correct family class in several scenarios."""
    assert family.family_class_dispatch(url, name) == expected


@pytest.mark.skip(reason="Temporarily skipped")
@pytest.mark.parametrize("url, name", NON_BUILTIN_WIKIS)
def test_family_class_dispatch_on_non_builtins_generates_new_class_fast(
    url: str, name: str, mocker: MockFixture
) -> None:
    """Test that using the family class dispatch function on an unknown url generates a new family class."""
    mock_generate_family_class = mocker.patch.object(family, "generate_family_class")
    family.family_class_dispatch(url, name)
    mock_generate_family_class.assert_called_once_with(url, name)


@pytest.mark.skip(reason="Temporarily skipped")
@pytest.mark.slow
@pytest.mark.parametrize("url, name", NON_BUILTIN_WIKIS)
def test_family_class_dispatch_on_non_builtins_generates_new_class_slow(
    url: str, name: str
) -> None:
    """Test that using the family class dispatch function on an unknown url generates a new family class.

    This test is slow because it actually performs the network calls to generate the family classes.
    """
    generated_family_class = family.generate_family_class(url, name)
    assert issubclass(generated_family_class, Family)
    dispatch_family_class = family.family_class_dispatch(url, name)
    assert dispatch_family_class == generated_family_class


================================================
FILE: backend/tests/unit/onyx/connectors/mediawiki/test_wiki.py
================================================
from __future__ import annotations

import datetime
import tempfile
from collections.abc import Iterable

import pytest
import pywikibot  # type: ignore[import-untyped]
from pytest_mock import MockFixture

from onyx.connectors.mediawiki import wiki

# Some of these tests are disabled for now due to flakiness with wikipedia as the backend

pywikibot.config.base_dir = tempfile.TemporaryDirectory().name


@pytest.fixture
def site() -> pywikibot.Site:
    return pywikibot.Site("en", "wikipedia")


def test_pywikibot_timestamp_to_utc_datetime() -> None:
    timestamp_without_tzinfo = pywikibot.Timestamp(2023, 12, 27, 15, 38, 49)
    timestamp_min_timezone = timestamp_without_tzinfo.astimezone(datetime.timezone.min)
    timestamp_max_timezone = timestamp_without_tzinfo.astimezone(datetime.timezone.max)
    assert timestamp_min_timezone.tzinfo == datetime.timezone.min
    assert timestamp_max_timezone.tzinfo == datetime.timezone.max
    for timestamp in [
        timestamp_without_tzinfo,
        timestamp_min_timezone,
        timestamp_max_timezone,
    ]:
        dt = wiki.pywikibot_timestamp_to_utc_datetime(timestamp)
        assert dt.tzinfo == datetime.timezone.utc


class MockPage(pywikibot.Page):
    def __init__(
        self, site: pywikibot.Site, title: str, _has_categories: bool = False
    ) -> None:
        super().__init__(site, title)
        self._has_categories = _has_categories
        self.header = "This is a header"
        self._sections = ["This is a section", "This is another section"]

    @property
    def _sections_helper(self) -> list[str]:
        return [
            f"== Section {i} ==\n{section}\n"
            for i, section in enumerate(self._sections)
        ]

    @property
    def text(self) -> str:
        text = self.header + "\n"
        for section in self._sections_helper:
            text += section
        return text

    @property
    def pageid(self) -> str:
        return "1"

    def full_url(self) -> str:
        return "Test URL"

    def categories(
        self,
        with_sort_key: bool = False,  # noqa: ARG002
        total: int | None = None,  # noqa: ARG002
        content: bool = False,  # noqa: ARG002
    ) -> Iterable[pywikibot.Page]:
        if not self._has_categories:
            return []
        return [
            MockPage(self.site, "Test Category1"),
            MockPage(self.site, "Test Category2"),
        ]

    @property
    def latest_revision(self) -> pywikibot.page.Revision:
        return pywikibot.page.Revision(
            timestamp=pywikibot.Timestamp(2023, 12, 27, 15, 38, 49)
        )


@pytest.mark.skip(reason="Test disabled")
def test_get_doc_from_page(site: pywikibot.Site) -> None:
    test_page = MockPage(site, "Test Page", _has_categories=True)
    doc = wiki.get_doc_from_page(test_page, site, wiki.DocumentSource.MEDIAWIKI)
    assert doc.source == wiki.DocumentSource.MEDIAWIKI
    assert doc.title == test_page.title()
    assert doc.doc_updated_at == wiki.pywikibot_timestamp_to_utc_datetime(
        test_page.latest_revision.timestamp
    )
    assert len(doc.sections) == 3
    for section, expected_section in zip(
        doc.sections, test_page._sections_helper + [test_page.header]
    ):
        assert (
            section.text is not None
            and section.text.strip() == expected_section.strip()
        )  # Extra whitespace before/after is okay
        assert section.link and section.link.startswith(test_page.full_url())
    assert doc.semantic_identifier == test_page.title()
    assert doc.metadata == {
        "categories": [category.title() for category in test_page.categories()]
    }
    assert doc.id == f"MEDIAWIKI_{test_page.pageid}_{test_page.full_url()}"


@pytest.mark.skip(reason="Test disabled")
def test_mediawiki_connector_recurse_depth() -> None:
    """Test that the recurse_depth parameter is parsed correctly.

    -1 should be parsed as `True` (for unbounded recursion)
    0 or greater should be parsed as an integer
    Negative values less than -1 should raise a ValueError

    This is the specification dictated by the `pywikibot` library. We do not need to test behavior beyond this.
    """
    hostname = "wikipedia.org"
    categories: list[str] = []
    pages = ["Test Page"]

    # Recurse depth less than -1 raises ValueError
    with pytest.raises(ValueError):
        recurse_depth = -2
        wiki.MediaWikiConnector(hostname, categories, pages, recurse_depth)

    # Recurse depth of -1 gets parsed as `True`
    recurse_depth = -1
    connector = wiki.MediaWikiConnector(hostname, categories, pages, recurse_depth)
    assert connector.recurse_depth is True

    # Recurse depth of 0 or greater gets parsed as an integer
    recurse_depth = 0
    connector = wiki.MediaWikiConnector(hostname, categories, pages, recurse_depth)
    assert connector.recurse_depth == recurse_depth


@pytest.mark.skip(reason="Test disabled")
def test_load_from_state_calls_poll_source_with_nones(mocker: MockFixture) -> None:
    connector = wiki.MediaWikiConnector("wikipedia.org", [], [], 0, "test")
    poll_source = mocker.patch.object(connector, "poll_source")
    connector.load_from_state()
    poll_source.assert_called_once_with(None, None)


================================================
FILE: backend/tests/unit/onyx/connectors/notion/test_notion_datasource.py
================================================
"""Unit tests for Notion connector data source API migration.

Tests the new data source discovery + querying flow and the
data_source_id -> database_id parent resolution.
"""

from unittest.mock import MagicMock
from unittest.mock import patch

from requests.exceptions import HTTPError

from onyx.connectors.notion.connector import NotionConnector
from onyx.connectors.notion.connector import NotionDataSource
from onyx.connectors.notion.connector import NotionPage


def _make_connector() -> NotionConnector:
    connector = NotionConnector()
    connector.load_credentials({"notion_integration_token": "fake-token"})
    return connector


def _mock_response(json_data: dict, status_code: int = 200) -> MagicMock:
    resp = MagicMock()
    resp.json.return_value = json_data
    resp.status_code = status_code
    if status_code >= 400:
        resp.raise_for_status.side_effect = HTTPError(
            f"HTTP {status_code}", response=resp
        )
    else:
        resp.raise_for_status.return_value = None
    return resp


class TestFetchDataSourcesForDatabase:
    def test_multi_source_database(self) -> None:
        connector = _make_connector()
        resp = _mock_response(
            {
                "object": "database",
                "id": "db-1",
                "data_sources": [
                    {"id": "ds-1", "name": "Source A"},
                    {"id": "ds-2", "name": "Source B"},
                ],
            }
        )
        with patch(
            "onyx.connectors.notion.connector.rl_requests.get", return_value=resp
        ):
            result = connector._fetch_data_sources_for_database("db-1")

        assert result == [
            NotionDataSource(id="ds-1", name="Source A"),
            NotionDataSource(id="ds-2", name="Source B"),
        ]

    def test_single_source_database(self) -> None:
        connector = _make_connector()
        resp = _mock_response(
            {
                "object": "database",
                "id": "db-1",
                "data_sources": [{"id": "ds-1", "name": "Only Source"}],
            }
        )
        with patch(
            "onyx.connectors.notion.connector.rl_requests.get", return_value=resp
        ):
            result = connector._fetch_data_sources_for_database("db-1")

        assert result == [NotionDataSource(id="ds-1", name="Only Source")]

    def test_404_returns_empty(self) -> None:
        connector = _make_connector()
        resp = _mock_response({"object": "error"}, status_code=404)
        with patch(
            "onyx.connectors.notion.connector.rl_requests.get", return_value=resp
        ):
            result = connector._fetch_data_sources_for_database("db-missing")

        assert result == []


class TestFetchDataSource:
    def test_query_returns_pages(self) -> None:
        connector = _make_connector()
        resp = _mock_response(
            {
                "results": [
                    {
                        "object": "page",
                        "id": "page-1",
                        "properties": {"Name": {"type": "title", "title": []}},
                    }
                ],
                "next_cursor": None,
            }
        )
        with patch(
            "onyx.connectors.notion.connector.rl_requests.post", return_value=resp
        ):
            result = connector._fetch_data_source("ds-1")

        assert len(result["results"]) == 1
        assert result["results"][0]["id"] == "page-1"
        assert result["next_cursor"] is None

    def test_404_returns_empty_results(self) -> None:
        connector = _make_connector()
        resp = _mock_response({"object": "error"}, status_code=404)
        with patch(
            "onyx.connectors.notion.connector.rl_requests.post", return_value=resp
        ):
            result = connector._fetch_data_source("ds-missing")

        assert result == {"results": [], "next_cursor": None}


class TestGetParentRawId:
    def test_database_id_parent(self) -> None:
        connector = _make_connector()
        parent = {"type": "database_id", "database_id": "db-1"}
        assert connector._get_parent_raw_id(parent) == "db-1"

    def test_data_source_id_with_mapping(self) -> None:
        connector = _make_connector()
        connector._data_source_to_database_map["ds-1"] = "db-1"
        parent = {"type": "data_source_id", "data_source_id": "ds-1"}
        assert connector._get_parent_raw_id(parent) == "db-1"

    def test_data_source_id_without_mapping_falls_back(self) -> None:
        connector = _make_connector()
        connector.workspace_id = "ws-1"
        parent = {"type": "data_source_id", "data_source_id": "ds-unknown"}
        assert connector._get_parent_raw_id(parent) == "ws-1"

    def test_workspace_parent(self) -> None:
        connector = _make_connector()
        connector.workspace_id = "ws-1"
        parent = {"type": "workspace"}
        assert connector._get_parent_raw_id(parent) == "ws-1"

    def test_page_id_parent(self) -> None:
        connector = _make_connector()
        parent = {"type": "page_id", "page_id": "page-1"}
        assert connector._get_parent_raw_id(parent) == "page-1"

    def test_block_id_parent_with_mapping(self) -> None:
        connector = _make_connector()
        connector.workspace_id = "ws-1"
        connector._child_page_parent_map["inline-page-1"] = "containing-page-1"
        parent = {"type": "block_id"}
        assert (
            connector._get_parent_raw_id(parent, page_id="inline-page-1")
            == "containing-page-1"
        )

    def test_block_id_parent_without_mapping_falls_back(self) -> None:
        connector = _make_connector()
        connector.workspace_id = "ws-1"
        parent = {"type": "block_id"}
        assert connector._get_parent_raw_id(parent, page_id="unknown-page") == "ws-1"

    def test_none_parent_defaults_to_workspace(self) -> None:
        connector = _make_connector()
        connector.workspace_id = "ws-1"
        assert connector._get_parent_raw_id(None) == "ws-1"


class TestReadPagesFromDatabaseMultiSource:
    def test_queries_all_data_sources(self) -> None:
        connector = _make_connector()
        connector.workspace_id = "ws-1"

        with (
            patch.object(
                connector,
                "_fetch_data_sources_for_database",
                return_value=[
                    NotionDataSource(id="ds-1", name="Source A"),
                    NotionDataSource(id="ds-2", name="Source B"),
                ],
            ),
            patch.object(
                connector,
                "_fetch_data_source",
                return_value={"results": [], "next_cursor": None},
            ) as mock_fetch_ds,
        ):
            result = connector._read_pages_from_database("db-1")

        assert mock_fetch_ds.call_count == 2
        mock_fetch_ds.assert_any_call("ds-1", None)
        mock_fetch_ds.assert_any_call("ds-2", None)

        assert connector._data_source_to_database_map["ds-1"] == "db-1"
        assert connector._data_source_to_database_map["ds-2"] == "db-1"

        assert result.blocks == []
        assert result.child_page_ids == []
        assert len(result.hierarchy_nodes) == 1
        assert result.hierarchy_nodes[0].raw_node_id == "db-1"

    def test_collects_pages_from_all_sources(self) -> None:
        connector = _make_connector()
        connector.workspace_id = "ws-1"
        connector.recursive_index_enabled = True

        ds1_results = {
            "results": [{"object": "page", "id": "page-from-ds1", "properties": {}}],
            "next_cursor": None,
        }
        ds2_results = {
            "results": [{"object": "page", "id": "page-from-ds2", "properties": {}}],
            "next_cursor": None,
        }

        with (
            patch.object(
                connector,
                "_fetch_data_sources_for_database",
                return_value=[
                    NotionDataSource(id="ds-1", name="Source A"),
                    NotionDataSource(id="ds-2", name="Source B"),
                ],
            ),
            patch.object(
                connector,
                "_fetch_data_source",
                side_effect=[ds1_results, ds2_results],
            ),
        ):
            result = connector._read_pages_from_database("db-1")

        assert "page-from-ds1" in result.child_page_ids
        assert "page-from-ds2" in result.child_page_ids

    def test_pagination_across_pages(self) -> None:
        connector = _make_connector()
        connector.workspace_id = "ws-1"
        connector.recursive_index_enabled = True

        page1 = {
            "results": [{"object": "page", "id": "page-1", "properties": {}}],
            "next_cursor": "cursor-abc",
        }
        page2 = {
            "results": [{"object": "page", "id": "page-2", "properties": {}}],
            "next_cursor": None,
        }

        with (
            patch.object(
                connector,
                "_fetch_data_sources_for_database",
                return_value=[NotionDataSource(id="ds-1", name="Source A")],
            ),
            patch.object(
                connector,
                "_fetch_data_source",
                side_effect=[page1, page2],
            ) as mock_fetch_ds,
        ):
            result = connector._read_pages_from_database("db-1")

        assert mock_fetch_ds.call_count == 2
        mock_fetch_ds.assert_any_call("ds-1", None)
        mock_fetch_ds.assert_any_call("ds-1", "cursor-abc")
        assert "page-1" in result.child_page_ids
        assert "page-2" in result.child_page_ids


class TestInTrashField:
    def test_notion_page_accepts_in_trash(self) -> None:
        page = NotionPage(
            id="page-1",
            created_time="2026-01-01T00:00:00.000Z",
            last_edited_time="2026-01-01T00:00:00.000Z",
            in_trash=False,
            properties={},
            url="https://notion.so/page-1",
        )
        assert page.in_trash is False

    def test_notion_page_in_trash_true(self) -> None:
        page = NotionPage(
            id="page-1",
            created_time="2026-01-01T00:00:00.000Z",
            last_edited_time="2026-01-01T00:00:00.000Z",
            in_trash=True,
            properties={},
            url="https://notion.so/page-1",
        )
        assert page.in_trash is True


class TestFetchDatabaseAsPage:
    def test_handles_missing_properties(self) -> None:
        connector = _make_connector()
        resp = _mock_response(
            {
                "object": "database",
                "id": "db-1",
                "created_time": "2026-01-01T00:00:00.000Z",
                "last_edited_time": "2026-01-01T00:00:00.000Z",
                "in_trash": False,
                "url": "https://notion.so/db-1",
                "title": [{"text": {"content": "My DB"}, "plain_text": "My DB"}],
                "data_sources": [{"id": "ds-1", "name": "Source"}],
            }
        )
        with patch(
            "onyx.connectors.notion.connector.rl_requests.get", return_value=resp
        ):
            page = connector._fetch_database_as_page("db-1")

        assert page.id == "db-1"
        assert page.database_name == "My DB"
        assert page.properties == {}


================================================
FILE: backend/tests/unit/onyx/connectors/salesforce/test_salesforce_custom_config.py
================================================
#!/usr/bin/env python3
"""
Test script for the new custom query configuration functionality in SalesforceConnector.

This demonstrates how to use the new custom_query_config parameter to specify
exactly which fields and associations (child objects) to retrieve for each object type.
"""

import json
from typing import Any

from onyx.connectors.salesforce.connector import _validate_custom_query_config
from onyx.connectors.salesforce.connector import SalesforceConnector
from onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE
from onyx.connectors.salesforce.utils import MODIFIED_FIELD


def test_custom_query_config() -> None:
    """Test the custom query configuration functionality."""

    # Example custom query configuration
    # This specifies exactly which fields and associations to retrieve
    custom_config = {
        ACCOUNT_OBJECT_TYPE: {
            "fields": ["Id", "Name", "Industry", "CreatedDate", MODIFIED_FIELD],
            "associations": {
                "Contact": ["Id", "FirstName", "LastName", "Email"],
                "Opportunity": ["Id", "Name", "StageName", "Amount", "CloseDate"],
            },
        },
        "Lead": {
            "fields": ["Id", "FirstName", "LastName", "Company", "Status"],
            "associations": {},  # No associations for Lead
        },
    }

    # Create connector with custom configuration
    connector = SalesforceConnector(
        batch_size=50, custom_query_config=json.dumps(custom_config)
    )

    print("✅ SalesforceConnector created successfully with custom query config")
    print(f"Parent object list: {connector.parent_object_list}")
    print(f"Custom config keys: {list(custom_config.keys())}")

    # Test that the parent object list is derived from the custom config
    assert connector.parent_object_list == [ACCOUNT_OBJECT_TYPE, "Lead"]
    assert connector.custom_query_config == custom_config

    print("✅ Basic validation passed")


def test_traditional_config() -> None:
    """Test that the traditional requested_objects approach still works."""

    # Traditional approach
    connector = SalesforceConnector(
        batch_size=50, requested_objects=[ACCOUNT_OBJECT_TYPE, "Contact"]
    )

    print("✅ SalesforceConnector created successfully with traditional config")
    print(f"Parent object list: {connector.parent_object_list}")

    # Test that it still works the old way
    assert connector.parent_object_list == [ACCOUNT_OBJECT_TYPE, "Contact"]
    assert connector.custom_query_config is None

    print("✅ Traditional config validation passed")


def test_validation() -> None:
    """Test that invalid configurations are rejected."""

    # Test invalid config structure
    invalid_configs: list[Any] = [
        # Invalid fields type
        {ACCOUNT_OBJECT_TYPE: {"fields": "invalid"}},
        # Invalid associations type
        {ACCOUNT_OBJECT_TYPE: {"associations": "invalid"}},
        # Nested invalid structure
        {ACCOUNT_OBJECT_TYPE: {"associations": {"Contact": {"fields": "invalid"}}}},
    ]

    for i, invalid_config in enumerate(invalid_configs):
        try:
            _validate_custom_query_config(invalid_config)
            assert False, f"Should have raised ValueError for invalid_config[{i}]"
        except ValueError:
            print(f"✅ Correctly rejected invalid config {i}")


if __name__ == "__main__":
    print("Testing SalesforceConnector custom query configuration...")
    print("=" * 60)

    test_custom_query_config()
    print()

    test_traditional_config()
    print()

    test_validation()
    print()

    print("=" * 60)
    print("🎉 All tests passed! The custom query configuration is working correctly.")
    print()
    print("Example usage:")
    print(
        """
# Custom configuration approach
custom_config = {
    ACCOUNT_OBJECT_TYPE: {
        "fields": ["Id", "Name", "Industry"],
        "associations": {
            "Contact": {
                "fields": ["Id", "FirstName", "LastName", "Email"],
                "associations": {}
            }
        }
    }
}

connector = SalesforceConnector(custom_query_config=custom_config)

# Traditional approach (still works)
connector = SalesforceConnector(requested_objects=[ACCOUNT_OBJECT_TYPE, "Contact"])
"""
    )


================================================
FILE: backend/tests/unit/onyx/connectors/salesforce/test_salesforce_sqlite.py
================================================
import csv
import json
import os
import shutil
import tempfile
import time
from collections import defaultdict
from datetime import datetime
from datetime import timezone
from pathlib import Path
from typing import cast

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.connectors.salesforce.doc_conversion import _extract_section
from onyx.connectors.salesforce.doc_conversion import ID_PREFIX
from onyx.connectors.salesforce.onyx_salesforce import OnyxSalesforce
from onyx.connectors.salesforce.salesforce_calls import _bulk_retrieve_from_salesforce
from onyx.connectors.salesforce.salesforce_calls import _make_time_filter_for_sf_type
from onyx.connectors.salesforce.salesforce_calls import _make_time_filtered_query
from onyx.connectors.salesforce.salesforce_calls import get_object_by_id_query
from onyx.connectors.salesforce.sqlite_functions import OnyxSalesforceSQLite
from onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE
from onyx.connectors.salesforce.utils import MODIFIED_FIELD
from onyx.connectors.salesforce.utils import USER_OBJECT_TYPE
from onyx.utils.logger import setup_logger

# from onyx.connectors.salesforce.onyx_salesforce_type import OnyxSalesforceType
# from onyx.connectors.salesforce.salesforce_calls import get_children_of_sf_type

logger = setup_logger()


_VALID_SALESFORCE_IDS = [
    "001bm00000fd9Z3AAI",
    "001bm00000fdYTdAAM",
    "001bm00000fdYTeAAM",
    "001bm00000fdYTfAAM",
    "001bm00000fdYTgAAM",
    "001bm00000fdYThAAM",
    "001bm00000fdYTiAAM",
    "001bm00000fdYTjAAM",
    "001bm00000fdYTkAAM",
    "001bm00000fdYTlAAM",
    "001bm00000fdYTmAAM",
    "001bm00000fdYTnAAM",
    "001bm00000fdYToAAM",
    "500bm00000XoOxtAAF",
    "500bm00000XoOxuAAF",
    "500bm00000XoOxvAAF",
    "500bm00000XoOxwAAF",
    "500bm00000XoOxxAAF",
    "500bm00000XoOxyAAF",
    "500bm00000XoOxzAAF",
    "500bm00000XoOy0AAF",
    "500bm00000XoOy1AAF",
    "500bm00000XoOy2AAF",
    "500bm00000XoOy3AAF",
    "500bm00000XoOy4AAF",
    "500bm00000XoOy5AAF",
    "500bm00000XoOy6AAF",
    "500bm00000XoOy7AAF",
    "500bm00000XoOy8AAF",
    "500bm00000XoOy9AAF",
    "500bm00000XoOyAAAV",
    "500bm00000XoOyBAAV",
    "500bm00000XoOyCAAV",
    "500bm00000XoOyDAAV",
    "500bm00000XoOyEAAV",
    "500bm00000XoOyFAAV",
    "500bm00000XoOyGAAV",
    "500bm00000XoOyHAAV",
    "500bm00000XoOyIAAV",
    "003bm00000EjHCjAAN",
    "003bm00000EjHCkAAN",
    "003bm00000EjHClAAN",
    "003bm00000EjHCmAAN",
    "003bm00000EjHCnAAN",
    "003bm00000EjHCoAAN",
    "003bm00000EjHCpAAN",
    "003bm00000EjHCqAAN",
    "003bm00000EjHCrAAN",
    "003bm00000EjHCsAAN",
    "003bm00000EjHCtAAN",
    "003bm00000EjHCuAAN",
    "003bm00000EjHCvAAN",
    "003bm00000EjHCwAAN",
    "003bm00000EjHCxAAN",
    "003bm00000EjHCyAAN",
    "003bm00000EjHCzAAN",
    "003bm00000EjHD0AAN",
    "003bm00000EjHD1AAN",
    "003bm00000EjHD2AAN",
    "550bm00000EXc2tAAD",
    "006bm000006kyDpAAI",
    "006bm000006kyDqAAI",
    "006bm000006kyDrAAI",
    "006bm000006kyDsAAI",
    "006bm000006kyDtAAI",
    "006bm000006kyDuAAI",
    "006bm000006kyDvAAI",
    "006bm000006kyDwAAI",
    "006bm000006kyDxAAI",
    "006bm000006kyDyAAI",
    "006bm000006kyDzAAI",
    "006bm000006kyE0AAI",
    "006bm000006kyE1AAI",
    "006bm000006kyE2AAI",
    "006bm000006kyE3AAI",
    "006bm000006kyE4AAI",
    "006bm000006kyE5AAI",
    "006bm000006kyE6AAI",
    "006bm000006kyE7AAI",
    "006bm000006kyE8AAI",
    "006bm000006kyE9AAI",
    "006bm000006kyEAAAY",
    "006bm000006kyEBAAY",
    "006bm000006kyECAAY",
    "006bm000006kyEDAAY",
    "006bm000006kyEEAAY",
    "006bm000006kyEFAAY",
    "006bm000006kyEGAAY",
    "006bm000006kyEHAAY",
    "006bm000006kyEIAAY",
    "006bm000006kyEJAAY",
    "005bm000009zy0TAAQ",
    "005bm000009zy25AAA",
    "005bm000009zy26AAA",
    "005bm000009zy28AAA",
    "005bm000009zy29AAA",
    "005bm000009zy2AAAQ",
    "005bm000009zy2BAAQ",
]


def _clear_sf_db(directory: str) -> None:
    """
    Clears the SF DB by deleting all files in the data directory.
    """
    shutil.rmtree(directory, ignore_errors=True)


def _create_csv_file_and_update_db(
    sf_db: OnyxSalesforceSQLite,
    object_type: str,
    records: list[dict],
    filename: str = "test_data.csv",
) -> None:
    """
    Creates a CSV file for the given object type and records.

    Args:
        object_type: The Salesforce object type (e.g. ACCOUNT_OBJECT_TYPE, "Contact")
        records: List of dictionaries containing the record data
        filename: Name of the CSV file to create (default: test_data.csv)
    """
    if not records:
        return

    # Get all unique fields from records
    fields: set[str] = set()
    for record in records:
        fields.update(record.keys())
    fields = set(sorted(list(fields)))  # Sort for consistent order

    # Create CSV file
    with tempfile.TemporaryDirectory() as directory:
        csv_path = os.path.join(directory, filename)
        with open(csv_path, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=fields)
            writer.writeheader()
            for record in records:
                writer.writerow(record)

        # Update the database with the CSV
        sf_db.update_from_csv(object_type, csv_path)


def _create_csv_with_example_data(sf_db: OnyxSalesforceSQLite) -> None:
    """
    Creates CSV files with example data, organized by object type.
    """
    example_data: dict[str, list[dict]] = {
        ACCOUNT_OBJECT_TYPE: [
            {
                "Id": _VALID_SALESFORCE_IDS[0],
                "Name": "Acme Inc.",
                "BillingCity": "New York",
                "Industry": "Technology",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[1],
                "Name": "Globex Corp",
                "BillingCity": "Los Angeles",
                "Industry": "Manufacturing",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[2],
                "Name": "Initech",
                "BillingCity": "Austin",
                "Industry": "Software",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[3],
                "Name": "TechCorp Solutions",
                "BillingCity": "San Francisco",
                "Industry": "Software",
                "AnnualRevenue": 5000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[4],
                "Name": "BioMed Research",
                "BillingCity": "Boston",
                "Industry": "Healthcare",
                "AnnualRevenue": 12000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[5],
                "Name": "Green Energy Co",
                "BillingCity": "Portland",
                "Industry": "Energy",
                "AnnualRevenue": 8000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[6],
                "Name": "DataFlow Analytics",
                "BillingCity": "Seattle",
                "Industry": "Technology",
                "AnnualRevenue": 3000000,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[7],
                "Name": "Cloud Nine Services",
                "BillingCity": "Denver",
                "Industry": "Cloud Computing",
                "AnnualRevenue": 7000000,
            },
        ],
        "Contact": [
            {
                "Id": _VALID_SALESFORCE_IDS[40],
                "FirstName": "John",
                "LastName": "Doe",
                "Email": "john.doe@acme.com",
                "Title": "CEO",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[41],
                "FirstName": "Jane",
                "LastName": "Smith",
                "Email": "jane.smith@acme.com",
                "Title": "CTO",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[42],
                "FirstName": "Bob",
                "LastName": "Johnson",
                "Email": "bob.j@globex.com",
                "Title": "Sales Director",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[43],
                "FirstName": "Sarah",
                "LastName": "Chen",
                "Email": "sarah.chen@techcorp.com",
                "Title": "Product Manager",
                "Phone": "415-555-0101",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[44],
                "FirstName": "Michael",
                "LastName": "Rodriguez",
                "Email": "m.rodriguez@biomed.com",
                "Title": "Research Director",
                "Phone": "617-555-0202",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[45],
                "FirstName": "Emily",
                "LastName": "Green",
                "Email": "emily.g@greenenergy.com",
                "Title": "Sustainability Lead",
                "Phone": "503-555-0303",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[46],
                "FirstName": "David",
                "LastName": "Kim",
                "Email": "david.kim@dataflow.com",
                "Title": "Data Scientist",
                "Phone": "206-555-0404",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[47],
                "FirstName": "Rachel",
                "LastName": "Taylor",
                "Email": "r.taylor@cloudnine.com",
                "Title": "Cloud Architect",
                "Phone": "303-555-0505",
            },
        ],
        "Opportunity": [
            {
                "Id": _VALID_SALESFORCE_IDS[62],
                "Name": "Acme Server Upgrade",
                "Amount": 50000,
                "Stage": "Prospecting",
                "CloseDate": "2024-06-30",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[63],
                "Name": "Globex Manufacturing Line",
                "Amount": 150000,
                "Stage": "Negotiation",
                "CloseDate": "2024-03-15",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[64],
                "Name": "Initech Software License",
                "Amount": 75000,
                "Stage": "Closed Won",
                "CloseDate": "2024-01-30",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[65],
                "Name": "TechCorp AI Implementation",
                "Amount": 250000,
                "Stage": "Needs Analysis",
                "CloseDate": "2024-08-15",
                "Probability": 60,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[66],
                "Name": "BioMed Lab Equipment",
                "Amount": 500000,
                "Stage": "Value Proposition",
                "CloseDate": "2024-09-30",
                "Probability": 75,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[67],
                "Name": "Green Energy Solar Project",
                "Amount": 750000,
                "Stage": "Proposal",
                "CloseDate": "2024-07-15",
                "Probability": 80,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[68],
                "Name": "DataFlow Analytics Platform",
                "Amount": 180000,
                "Stage": "Negotiation",
                "CloseDate": "2024-05-30",
                "Probability": 90,
            },
            {
                "Id": _VALID_SALESFORCE_IDS[69],
                "Name": "Cloud Nine Infrastructure",
                "Amount": 300000,
                "Stage": "Qualification",
                "CloseDate": "2024-10-15",
                "Probability": 40,
            },
        ],
    }

    # Create CSV files for each object type
    for object_type, records in example_data.items():
        _create_csv_file_and_update_db(sf_db, object_type, records)


def _test_query(sf_db: OnyxSalesforceSQLite) -> None:
    """
    Tests querying functionality by verifying:
    1. All expected Account IDs are found
    2. Each Account's data matches what was inserted
    """
    # Expected test data for verification
    expected_accounts: dict[str, dict[str, str | int]] = {
        _VALID_SALESFORCE_IDS[0]: {
            "Name": "Acme Inc.",
            "BillingCity": "New York",
            "Industry": "Technology",
        },
        _VALID_SALESFORCE_IDS[1]: {
            "Name": "Globex Corp",
            "BillingCity": "Los Angeles",
            "Industry": "Manufacturing",
        },
        _VALID_SALESFORCE_IDS[2]: {
            "Name": "Initech",
            "BillingCity": "Austin",
            "Industry": "Software",
        },
        _VALID_SALESFORCE_IDS[3]: {
            "Name": "TechCorp Solutions",
            "BillingCity": "San Francisco",
            "Industry": "Software",
            "AnnualRevenue": 5000000,
        },
        _VALID_SALESFORCE_IDS[4]: {
            "Name": "BioMed Research",
            "BillingCity": "Boston",
            "Industry": "Healthcare",
            "AnnualRevenue": 12000000,
        },
        _VALID_SALESFORCE_IDS[5]: {
            "Name": "Green Energy Co",
            "BillingCity": "Portland",
            "Industry": "Energy",
            "AnnualRevenue": 8000000,
        },
        _VALID_SALESFORCE_IDS[6]: {
            "Name": "DataFlow Analytics",
            "BillingCity": "Seattle",
            "Industry": "Technology",
            "AnnualRevenue": 3000000,
        },
        _VALID_SALESFORCE_IDS[7]: {
            "Name": "Cloud Nine Services",
            "BillingCity": "Denver",
            "Industry": "Cloud Computing",
            "AnnualRevenue": 7000000,
        },
    }

    # Get all Account IDs
    account_ids = sf_db.find_ids_by_type(ACCOUNT_OBJECT_TYPE)

    # Verify we found all expected accounts
    assert len(account_ids) == len(
        expected_accounts
    ), f"Expected {len(expected_accounts)} accounts, found {len(account_ids)}"
    assert set(account_ids) == set(
        expected_accounts.keys()
    ), "Found account IDs don't match expected IDs"

    # Verify each account's data
    for acc_id in account_ids:
        combined = sf_db.get_record(acc_id)
        assert combined is not None, f"Could not find account {acc_id}"

        expected = expected_accounts[acc_id]

        # Verify account data matches
        for key, value in expected.items():
            value = str(value)
            assert (
                combined.data[key] == value
            ), f"Account {acc_id} field {key} expected {value}, got {combined.data[key]}"

    print("All query tests passed successfully!")


def _test_upsert(sf_db: OnyxSalesforceSQLite) -> None:
    """
    Tests upsert functionality by:
    1. Updating an existing account
    2. Creating a new account
    3. Verifying both operations were successful
    """
    # Create CSV for updating an existing account and adding a new one
    update_data: list[dict[str, str | int]] = [
        {
            "Id": _VALID_SALESFORCE_IDS[0],
            "Name": "Acme Inc. Updated",
            "BillingCity": "New York",
            "Industry": "Technology",
            "Description": "Updated company info",
        },
        {
            "Id": _VALID_SALESFORCE_IDS[2],
            "Name": "New Company Inc.",
            "BillingCity": "Miami",
            "Industry": "Finance",
            "AnnualRevenue": 1000000,
        },
    ]

    _create_csv_file_and_update_db(
        sf_db, ACCOUNT_OBJECT_TYPE, update_data, "update_data.csv"
    )

    # Verify the update worked
    updated_record = sf_db.get_record(_VALID_SALESFORCE_IDS[0])
    assert updated_record is not None, "Updated record not found"
    assert updated_record.data["Name"] == "Acme Inc. Updated", "Name not updated"
    assert (
        updated_record.data["Description"] == "Updated company info"
    ), "Description not added"

    # Verify the new record was created
    new_record = sf_db.get_record(_VALID_SALESFORCE_IDS[2])
    assert new_record is not None, "New record not found"
    assert new_record.data["Name"] == "New Company Inc.", "New record name incorrect"
    assert new_record.data["AnnualRevenue"] == "1000000", "New record revenue incorrect"

    print("All upsert tests passed successfully!")


def _test_relationships(sf_db: OnyxSalesforceSQLite) -> None:
    """
    Tests relationship shelf updates and queries by:
    1. Creating test data with relationships
    2. Verifying the relationships are correctly stored
    3. Testing relationship queries
    """
    # Create test data for each object type
    test_data: dict[str, list[dict[str, str | int]]] = {
        "Case": [
            {
                "Id": _VALID_SALESFORCE_IDS[13],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "Subject": "Test Case 1",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[14],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "Subject": "Test Case 2",
            },
        ],
        "Contact": [
            {
                "Id": _VALID_SALESFORCE_IDS[48],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "FirstName": "Test",
                "LastName": "Contact",
            }
        ],
        "Opportunity": [
            {
                "Id": _VALID_SALESFORCE_IDS[62],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "Name": "Test Opportunity",
                "Amount": 100000,
            }
        ],
    }

    # Create and update CSV files for each object type
    for object_type, records in test_data.items():
        _create_csv_file_and_update_db(
            sf_db, object_type, records, "relationship_test.csv"
        )

    # Test relationship queries
    # All these objects should be children of Acme Inc.
    child_ids = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[0])
    assert len(child_ids) == 4, f"Expected 4 child objects, found {len(child_ids)}"
    assert _VALID_SALESFORCE_IDS[13] in child_ids, "Case 1 not found in relationship"
    assert _VALID_SALESFORCE_IDS[14] in child_ids, "Case 2 not found in relationship"
    assert _VALID_SALESFORCE_IDS[48] in child_ids, "Contact not found in relationship"
    assert (
        _VALID_SALESFORCE_IDS[62] in child_ids
    ), "Opportunity not found in relationship"

    # Test querying relationships for a different account (should be empty)
    other_account_children = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[1])
    assert (
        len(other_account_children) == 0
    ), "Expected no children for different account"

    print("All relationship tests passed successfully!")


def _test_account_with_children(sf_db: OnyxSalesforceSQLite) -> None:
    """
    Tests querying all accounts and retrieving their child objects.
    This test verifies that:
    1. All accounts can be retrieved
    2. Child objects are correctly linked
    3. Child object data is complete and accurate
    """
    # First get all account IDs
    account_ids = sf_db.find_ids_by_type(ACCOUNT_OBJECT_TYPE)
    assert len(account_ids) > 0, "No accounts found"

    # For each account, get its children and verify the data
    for account_id in account_ids:
        account = sf_db.get_record(account_id)
        assert account is not None, f"Could not find account {account_id}"

        # Get all child objects
        child_ids = sf_db.get_child_ids(account_id)

        # For Acme Inc., verify specific relationships
        if account_id == _VALID_SALESFORCE_IDS[0]:  # Acme Inc.
            assert (
                len(child_ids) == 4
            ), f"Expected 4 children for Acme Inc., found {len(child_ids)}"

            # Get all child records
            child_records = []
            for child_id in child_ids:
                child_record = sf_db.get_record(child_id)
                if child_record is not None:
                    child_records.append(child_record)
            # Verify Cases
            cases = [r for r in child_records if r.type == "Case"]
            assert (
                len(cases) == 2
            ), f"Expected 2 cases for Acme Inc., found {len(cases)}"
            case_subjects = {case.data["Subject"] for case in cases}
            assert "Test Case 1" in case_subjects, "Test Case 1 not found"
            assert "Test Case 2" in case_subjects, "Test Case 2 not found"

            # Verify Contacts
            contacts = [r for r in child_records if r.type == "Contact"]
            assert (
                len(contacts) == 1
            ), f"Expected 1 contact for Acme Inc., found {len(contacts)}"
            contact = contacts[0]
            assert contact.data["FirstName"] == "Test", "Contact first name mismatch"
            assert contact.data["LastName"] == "Contact", "Contact last name mismatch"

            # Verify Opportunities
            opportunities = [r for r in child_records if r.type == "Opportunity"]
            assert (
                len(opportunities) == 1
            ), f"Expected 1 opportunity for Acme Inc., found {len(opportunities)}"
            opportunity = opportunities[0]
            assert (
                opportunity.data["Name"] == "Test Opportunity"
            ), "Opportunity name mismatch"
            assert opportunity.data["Amount"] == "100000", "Opportunity amount mismatch"

    print("All account with children tests passed successfully!")


def _test_relationship_updates(sf_db: OnyxSalesforceSQLite) -> None:
    """
    Tests that relationships are properly updated when a child object's parent reference changes.
    This test verifies:
    1. Initial relationship is created correctly
    2. When parent reference is updated, old relationship is removed
    3. New relationship is created correctly
    """
    # Create initial test data - Contact linked to Acme Inc.
    initial_contact = [
        {
            "Id": _VALID_SALESFORCE_IDS[40],
            "AccountId": _VALID_SALESFORCE_IDS[0],
            "FirstName": "Test",
            "LastName": "Contact",
        }
    ]
    _create_csv_file_and_update_db(
        sf_db, "Contact", initial_contact, "initial_contact.csv"
    )

    # Verify initial relationship
    acme_children = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[0])
    assert (
        _VALID_SALESFORCE_IDS[40] in acme_children
    ), "Initial relationship not created"

    # Update contact to be linked to Globex Corp instead
    updated_contact = [
        {
            "Id": _VALID_SALESFORCE_IDS[40],
            "AccountId": _VALID_SALESFORCE_IDS[1],
            "FirstName": "Test",
            "LastName": "Contact",
        }
    ]
    _create_csv_file_and_update_db(
        sf_db, "Contact", updated_contact, "updated_contact.csv"
    )

    # Verify old relationship is removed
    acme_children = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[0])
    assert (
        _VALID_SALESFORCE_IDS[40] not in acme_children
    ), "Old relationship not removed"

    # Verify new relationship is created
    globex_children = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[1])
    assert _VALID_SALESFORCE_IDS[40] in globex_children, "New relationship not created"

    print("All relationship update tests passed successfully!")


def _test_get_affected_parent_ids(sf_db: OnyxSalesforceSQLite) -> None:
    """
    Tests get_affected_parent_ids functionality by verifying:
    1. IDs that are directly in the parent_types list are included
    2. IDs that have children in the updated_ids list are included
    3. IDs that are neither of the above are not included
    """
    # Create test data with relationships
    test_data = {
        ACCOUNT_OBJECT_TYPE: [
            {
                "Id": _VALID_SALESFORCE_IDS[0],
                "Name": "Parent Account 1",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[1],
                "Name": "Parent Account 2",
            },
            {
                "Id": _VALID_SALESFORCE_IDS[2],
                "Name": "Not Affected Account",
            },
        ],
        "Contact": [
            {
                "Id": _VALID_SALESFORCE_IDS[40],
                "AccountId": _VALID_SALESFORCE_IDS[0],
                "FirstName": "Child",
                "LastName": "Contact",
            }
        ],
    }

    # Create and update CSV files for test data
    for object_type, records in test_data.items():
        _create_csv_file_and_update_db(sf_db, object_type, records)

    # Test Case 1: Account directly in updated_ids and parent_types
    updated_ids = [_VALID_SALESFORCE_IDS[1]]  # Parent Account 2
    parent_types = set([ACCOUNT_OBJECT_TYPE])
    affected_ids_by_type = defaultdict(set)
    for parent_type, parent_id, _ in sf_db.get_changed_parent_ids_by_type(
        updated_ids, parent_types
    ):
        affected_ids_by_type[parent_type].add(parent_id)
    assert (
        ACCOUNT_OBJECT_TYPE in affected_ids_by_type
    ), "Account type not in affected_ids_by_type"
    assert (
        _VALID_SALESFORCE_IDS[1] in affected_ids_by_type[ACCOUNT_OBJECT_TYPE]
    ), "Direct parent ID not included"

    # Test Case 2: Account with child in updated_ids
    updated_ids = [_VALID_SALESFORCE_IDS[40]]  # Child Contact
    parent_types = set([ACCOUNT_OBJECT_TYPE])
    affected_ids_by_type = defaultdict(set)
    for parent_type, parent_id, _ in sf_db.get_changed_parent_ids_by_type(
        updated_ids, parent_types
    ):
        affected_ids_by_type[parent_type].add(parent_id)
    assert (
        ACCOUNT_OBJECT_TYPE in affected_ids_by_type
    ), "Account type not in affected_ids_by_type"
    assert (
        _VALID_SALESFORCE_IDS[0] in affected_ids_by_type[ACCOUNT_OBJECT_TYPE]
    ), "Parent of updated child not included"

    # Test Case 3: Both direct and indirect affects
    updated_ids = [_VALID_SALESFORCE_IDS[1], _VALID_SALESFORCE_IDS[40]]  # Both cases
    parent_types = set([ACCOUNT_OBJECT_TYPE])
    affected_ids_by_type = defaultdict(set)
    for parent_type, parent_id, _ in sf_db.get_changed_parent_ids_by_type(
        updated_ids, parent_types
    ):
        affected_ids_by_type[parent_type].add(parent_id)
    assert (
        ACCOUNT_OBJECT_TYPE in affected_ids_by_type
    ), "Account type not in affected_ids_by_type"
    affected_ids = affected_ids_by_type[ACCOUNT_OBJECT_TYPE]
    assert len(affected_ids) == 2, "Expected exactly two affected parent IDs"
    assert _VALID_SALESFORCE_IDS[0] in affected_ids, "Parent of child not included"
    assert _VALID_SALESFORCE_IDS[1] in affected_ids, "Direct parent ID not included"
    assert (
        _VALID_SALESFORCE_IDS[2] not in affected_ids
    ), "Unaffected ID incorrectly included"

    # Test Case 4: No matches
    updated_ids = [_VALID_SALESFORCE_IDS[40]]  # Child Contact
    parent_types = set(["Opportunity"])  # Wrong type
    affected_ids_by_type = defaultdict(set)
    for parent_type, parent_id, _ in sf_db.get_changed_parent_ids_by_type(
        updated_ids, parent_types
    ):
        affected_ids_by_type[parent_type].add(parent_id)
    assert len(affected_ids_by_type) == 0, "Should return empty dict when no matches"

    print("All get_affected_parent_ids tests passed successfully!")


def test_salesforce_sqlite() -> None:
    with tempfile.TemporaryDirectory() as directory:
        _clear_sf_db(directory)

        filename = os.path.join(directory, "salesforce_db.sqlite")
        sf_db = OnyxSalesforceSQLite(filename)
        sf_db.connect()
        sf_db.apply_schema()

        _create_csv_with_example_data(sf_db)

        _test_query(sf_db)

        _test_upsert(sf_db)

        _test_relationships(sf_db)

        _test_account_with_children(sf_db)

        _test_relationship_updates(sf_db)

        _test_get_affected_parent_ids(sf_db)

        sf_db.close()

        _clear_sf_db(directory)


@pytest.mark.skip(reason="Enable when credentials are available")
def test_salesforce_bulk_retrieve() -> None:

    username = os.environ["SF_USERNAME"]
    password = os.environ["SF_PASSWORD"]
    security_token = os.environ["SF_SECURITY_TOKEN"]

    sf_client = OnyxSalesforce(
        username=username,
        password=password,
        security_token=security_token,
        domain=None,
    )

    # onyx_sf_type = OnyxSalesforceType("Contact", sf_client)
    sf_object_name = "Contact"
    queryable_fields = sf_client.get_queryable_fields_by_type(sf_object_name)

    intermediate_time = datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc)
    time_filter = _make_time_filter_for_sf_type(
        queryable_fields, 0, intermediate_time.timestamp()
    )
    assert time_filter

    query = _make_time_filtered_query(queryable_fields, sf_object_name, time_filter)

    with tempfile.TemporaryDirectory() as temp_dir:
        object_type, csv_paths = _bulk_retrieve_from_salesforce(
            sf_object_name, query, temp_dir, sf_client
        )

        assert csv_paths

        # Count rows in the downloaded CSV(s)
        total_data_rows = 0
        csv_files_found = []
        for filename in os.listdir(temp_dir):
            # Ensure we only process files ending with .csv and belonging to the correct object type
            # The filename format is expected to be "ObjectType.some_random_id.csv"
            if filename.endswith(".csv") and filename.startswith(f"{object_type}."):
                filepath = os.path.join(temp_dir, filename)
                csv_files_found.append(filepath)
                try:
                    with open(filepath, "r", encoding="utf-8") as f:
                        reader = csv.reader(f)
                        try:
                            next(reader)  # Attempt to skip header
                            # Count data rows
                            num_data_rows = sum(1 for _ in reader)
                            logger.info(
                                f"Counted {num_data_rows} data rows in {filename}"
                            )
                            total_data_rows += num_data_rows
                        except StopIteration:
                            # Handle empty file or file with only header
                            logger.info(
                                f"File {filename} is empty or contains only a header."
                            )
                except Exception as e:
                    logger.error(f"Error reading or counting rows in {filename}: {e}")

        logger.info(
            f"Found {len(csv_files_found)} CSV files for {object_type} in {temp_dir}."
        )
        logger.info(
            f"Total data rows across all CSVs for {object_type}: {total_data_rows}"
        )

        assert total_data_rows > 1100 and total_data_rows < 1200


# def test_salesforce_client_sobjects():

#     username = os.environ["SF_USERNAME"]
#     password = os.environ["SF_PASSWORD"]
#     security_token = os.environ["SF_SECURITY_TOKEN"]

#     sf_client = Salesforce(
#         username=username,
#         password=password,
#         security_token=security_token,
#         domain=None,
#     )

#     # does exist
#     record = sf_client.restful("sobjects/005bm000002bBHtAAM")

#     # does exist
#     record = sf_client.sobjects.get("005bm000002bBHtAAM")

#     # doesn't exist
#     record = sf_client.sobjects.get("01234567890ABCDEFG")


def test_normalize_record() -> None:
    """Test normalize record"""

    expected_str = (
        '{"Id": "001bm00000eu6n5AAA", '
        '"LastModifiedDate": "2024-12-24T18:18:29.000Z", '
        '"BillingStreet": "123 Nowhere Parkway", '
        '"CreatedDate": "2024-12-24T18:18:29.000Z", '
        '"IsDeleted": "false", '
        '"SystemModstamp": "2024-12-24T18:18:29.000Z", '
        '"Name": "Some Company", '
        '"LastModifiedById": "005bm000002bBHtAAM", '
        '"PhotoUrl": "/services/images/photo/001bm00000eu6n5AAA", '
        '"BillingCity": "Some Town", '
        '"CleanStatus": "Pending"}'
    )
    current_dir = Path(__file__).parent
    with open(current_dir / "test_account.csv", "r", newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            assert len(row) == 64

            normalized_record, parent_ids = OnyxSalesforceSQLite.normalize_record(row)
            normalized_record_json_str = json.dumps(normalized_record)
            assert normalized_record_json_str == expected_str
            assert "005bm000002bBHtAAM" in parent_ids
            assert len(parent_ids) == 1


def _get_child_records_by_id_query(
    object_id: str,
    sf_type: str,
    child_relationships: list[str],
    relationships_to_fields: dict[str, set[str]],
) -> str:
    """Returns a SOQL query given the object id, type and child relationships.

    When the query is executed, it comes back as result.records[0][child_relationship(s)]
    """

    SUBQUERY_LIMIT = 10

    query = "SELECT "
    for child_relationship in child_relationships:
        # TODO(rkuo): what happens if there is a very large list of child records?
        # is that possible problem?

        # NOTE: we actually have to list out the subqueries we want.
        # We can't use the following shortcuts:
        #   FIELDS(ALL) can include binary fields, so don't use that
        #   FIELDS(CUSTOM) can include aggregate queries, so don't use that
        fields = relationships_to_fields[child_relationship]
        fields_fragment = ",".join(fields)
        query += f"(SELECT {fields_fragment} FROM {child_relationship} LIMIT {SUBQUERY_LIMIT}), "

    query = query.rstrip(", ")
    query += f" FROM {sf_type} WHERE Id = '{object_id}'"
    return query


# TODO: move these to daily connector tests
@pytest.mark.skip(reason="Enable when credentials are available")
def test_salesforce_connector_single() -> None:
    """Test various manipulations of a single record"""

    # this record has some opportunity child records
    parent_id = "001bm00000BXfhEAAT"
    parent_type = ACCOUNT_OBJECT_TYPE
    parent_types = [parent_type]

    username = os.environ["SF_USERNAME"]
    password = os.environ["SF_PASSWORD"]
    security_token = os.environ["SF_SECURITY_TOKEN"]

    sf_client = OnyxSalesforce(
        username=username,
        password=password,
        security_token=security_token,
        domain=None,
    )

    # onyx_parent_sf_type = OnyxSalesforceType(parent_type, sf_client)

    child_types: set[str] = set()
    parent_to_child_types: dict[str, set[str]] = {}  # map from parent to child types
    parent_to_child_relationships: dict[str, set[str]] = (
        {}
    )  # map from parent to child relationships
    child_to_parent_types: dict[str, set[str]] = (
        {}
    )  # reverse map from child to parent types
    child_relationship_to_queryable_fields: dict[str, set[str]] = {}

    # parent_reference_fields_by_type: dict[str, dict[str, list[str]]] = {}

    # Step 1 - make a list of all the types to download (parent + direct child + USER_OBJECT_TYPE)
    logger.info(f"Parent object types: num={len(parent_types)} list={parent_types}")
    for parent_type_working in parent_types:
        child_types_working = sf_client.get_children_of_sf_type(parent_type_working)
        logger.debug(f"Found {len(child_types)} child types for {parent_type_working}")

        for child_type, child_relationship in child_types_working.items():
            # onyx_sf_type = OnyxSalesforceType(child_type, sf_client)

            # map parent to child type
            if parent_type_working not in parent_to_child_types:
                parent_to_child_types[parent_type_working] = set()
            parent_to_child_types[parent_type_working].add(child_type)

            # map parent to child relationship
            if parent_type_working not in parent_to_child_relationships:
                parent_to_child_relationships[parent_type_working] = set()
            parent_to_child_relationships[parent_type_working].add(child_relationship)

            # reverse map child to parent
            if child_relationship not in child_to_parent_types:
                child_to_parent_types[child_type] = set()
            child_to_parent_types[child_type].add(parent_type_working)

            child_relationship_to_queryable_fields[child_relationship] = (
                sf_client.get_queryable_fields_by_type(child_type)
            )

        child_types.update(list(child_types_working.keys()))
        logger.info(
            f"Child object types: parent={parent_type_working} num={len(child_types_working)} list={child_types_working.keys()}"
        )

    # queryable_fields_attachment = _get_all_queryable_fields_of_sf_type(sf_client, "Attachment")
    # queryable_fields_contact_point_email = _get_all_queryable_fields_of_sf_type(sf_client, "ContactPointEmail")

    # queryable_str = ",".join(queryable_fields_contact_point_email)
    sections: list[TextSection] = []

    queryable_fields = sf_client.get_queryable_fields_by_type(parent_type)
    query = get_object_by_id_query(parent_id, parent_type, queryable_fields)
    result = sf_client.query(query)
    records = result["records"]
    record = records[0]
    assert record["attributes"]["type"] == ACCOUNT_OBJECT_TYPE
    parent_last_modified_date = record.get(MODIFIED_FIELD, "")
    parent_semantic_identifier = record.get("Name", "Unknown Object")
    parent_last_modified_by_id = record.get("LastModifiedById")

    normalized_record, _ = OnyxSalesforceSQLite.normalize_record(record)
    parent_text_section = _extract_section(
        normalized_record, f"https://{sf_client.sf_instance}/{parent_id}"
    )
    sections.append(parent_text_section)

    time_start = time.monotonic()

    # hardcoded testing with just one parent id
    MAX_CHILD_TYPES_IN_QUERY = 20
    child_relationships: list[str] = list(parent_to_child_relationships[parent_type])

    # relationship_status - the child object types added to this dict have been queried
    relationship_status: dict[str, bool] = {}

    child_relationships_batch = []
    for child_relationship in child_relationships:
        # this is binary content, skip it
        if child_relationship == "Attachments":
            continue

        child_relationships_batch.append(child_relationship)
        if len(child_relationships_batch) < MAX_CHILD_TYPES_IN_QUERY:
            continue

        query = _get_child_records_by_id_query(
            parent_id,
            parent_type,
            child_relationships_batch,
            child_relationship_to_queryable_fields,
        )
        print(f"{query=}")

        # sf_type = parent_type
        # query = (
        #     f"SELECT "
        #     f"Id, "
        #     f"(SELECT OwnerId,CreatedDate,Id,Name,BestTimeToContactStartTime,ActiveToDate,"
        #     f"EmailLatestBounceReasonText,CreatedById,LastModifiedDate,LastModifiedById,"
        #     f"PreferenceRank,EmailDomain,BestTimeToContactEndTime,SystemModstamp,EmailMailBox,"
        #     f"LastReferencedDate,UsageType,ActiveFromDate,ParentId,LastViewedDate,IsPrimary,"
        #     f"EmailAddress,EmailLatestBounceDateTime,IsDeleted,BestTimeToContactTimezone "
        #     f"FROM ContactPointEmails LIMIT 10) "
        #     f"FROM {sf_type} WHERE Id = '{parent_id}'"
        # )

        # NOTE: Querying STANDARD and CUSTOM when there are no custom fields results in an
        # non-descriptive error (only root aggregation)
        # sf_type = parent_type
        # query = (
        #     f"SELECT "
        #     f"Id, "
        #     f"(SELECT FIELDS(STANDARD) FROM ContactPointEmails LIMIT 10) "
        #     f"FROM {sf_type} WHERE Id = '{parent_id}'"
        # )

        # query = (
        #     f"SELECT "
        #     f"{sf_type}.Id "
        #     f"FROM {sf_type} WHERE Id = '{parent_id}'"
        # )

        try:
            result = sf_client.query(query)
            print(f"{result=}")
        except Exception:
            logger.exception(f"Query failed: {query=}")
            for child_relationship in child_relationships_batch:
                relationship_status[child_relationship] = False
        else:
            for child_record_key, child_record in result["records"][0].items():
                if child_record_key == "attributes":
                    continue

                if child_record:
                    child_text_section = _extract_section(
                        child_record,
                        f"https://{sf_client.sf_instance}/{child_record_key}",
                    )
                    sections.append(child_text_section)
                    relationship_status[child_record_key] = False
                else:
                    relationship_status[child_record_key] = False
        finally:
            child_relationships_batch.clear()

    if len(child_relationships_batch) > 0:
        query = _get_child_records_by_id_query(
            parent_id,
            parent_types[0],
            child_relationships_batch,
            child_relationship_to_queryable_fields,
        )
        print(f"{query=}")

        try:
            result = sf_client.query(query)
            print(f"{result=}")
        except Exception:
            logger.exception(f"Query failed: {query=}")
            for child_relationship in child_relationships_batch:
                relationship_status[child_relationship] = False
        else:
            for child_record_key, child_record in result["records"][0].items():
                if child_record_key == "attributes":
                    continue

                if child_record:
                    child_text_section = _extract_section(
                        child_record,
                        f"https://{sf_client.sf_instance}/{child_record_key}",
                    )
                    sections.append(child_text_section)
                    relationship_status[child_record_key] = False
                else:
                    relationship_status[child_record_key] = False
        finally:
            child_relationships_batch.clear()

    # get user relationship if present
    primary_owner_list = None
    if parent_last_modified_by_id:
        queryable_user_fields = sf_client.get_queryable_fields_by_type(USER_OBJECT_TYPE)
        query = get_object_by_id_query(
            parent_last_modified_by_id, USER_OBJECT_TYPE, queryable_user_fields
        )
        result = sf_client.query(query)
        user_record = result["records"][0]
        expert_info = BasicExpertInfo(
            first_name=user_record.get("FirstName"),
            last_name=user_record.get("LastName"),
            email=user_record.get("Email"),
            display_name=user_record.get("Name"),
        )

        if (
            expert_info.first_name
            or expert_info.last_name
            or expert_info.email
            or expert_info.display_name
        ):
            primary_owner_list = [expert_info]

    doc = Document(
        id=ID_PREFIX + parent_id,
        sections=cast(list[TextSection | ImageSection], sections),
        source=DocumentSource.SALESFORCE,
        semantic_identifier=parent_semantic_identifier,
        doc_updated_at=time_str_to_utc(parent_last_modified_date),
        primary_owners=primary_owner_list,
        metadata={},
    )

    assert doc is not None

    time_elapsed = time.monotonic() - time_start
    print(f"elapsed={time_elapsed:.2f}")

    print(f"{relationship_status=}")


================================================
FILE: backend/tests/unit/onyx/connectors/salesforce/test_yield_doc_batches.py
================================================
"""Unit tests for _yield_doc_batches and metadata type conversion in SalesforceConnector."""

from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.salesforce.connector import _convert_to_metadata_value
from onyx.connectors.salesforce.connector import SalesforceConnector
from onyx.connectors.salesforce.utils import ID_FIELD
from onyx.connectors.salesforce.utils import MODIFIED_FIELD
from onyx.connectors.salesforce.utils import NAME_FIELD
from onyx.connectors.salesforce.utils import SalesforceObject


class TestConvertToMetadataValue:
    """Tests for the _convert_to_metadata_value helper function."""

    def test_string_value(self) -> None:
        """String values should be returned as-is."""
        assert _convert_to_metadata_value("hello") == "hello"
        assert _convert_to_metadata_value("") == ""

    def test_boolean_true(self) -> None:
        """Boolean True should be converted to string 'True'."""
        assert _convert_to_metadata_value(True) == "True"

    def test_boolean_false(self) -> None:
        """Boolean False should be converted to string 'False'."""
        assert _convert_to_metadata_value(False) == "False"

    def test_integer_value(self) -> None:
        """Integer values should be converted to string."""
        assert _convert_to_metadata_value(42) == "42"
        assert _convert_to_metadata_value(0) == "0"
        assert _convert_to_metadata_value(-100) == "-100"

    def test_float_value(self) -> None:
        """Float values should be converted to string."""
        assert _convert_to_metadata_value(3.14) == "3.14"
        assert _convert_to_metadata_value(0.0) == "0.0"
        assert _convert_to_metadata_value(-2.5) == "-2.5"

    def test_list_of_strings(self) -> None:
        """List of strings should remain as list of strings."""
        result = _convert_to_metadata_value(["a", "b", "c"])
        assert result == ["a", "b", "c"]

    def test_list_of_mixed_types(self) -> None:
        """List with mixed types should have all items converted to strings."""
        result = _convert_to_metadata_value([1, True, 3.14, "text"])
        assert result == ["1", "True", "3.14", "text"]

    def test_empty_list(self) -> None:
        """Empty list should return empty list."""
        assert _convert_to_metadata_value([]) == []


class TestYieldDocBatches:
    """Tests for the _yield_doc_batches method of SalesforceConnector."""

    @pytest.fixture
    def connector(self) -> SalesforceConnector:
        """Create a SalesforceConnector instance with mocked sf_client."""
        connector = SalesforceConnector(
            batch_size=10,
            requested_objects=["Opportunity"],
        )
        # Mock the sf_client property
        mock_sf_client = MagicMock()
        mock_sf_client.sf_instance = "test.salesforce.com"
        connector._sf_client = mock_sf_client
        return connector

    @pytest.fixture
    def mock_sf_db(self) -> MagicMock:
        """Create a mock OnyxSalesforceSQLite object."""
        return MagicMock()

    def _create_salesforce_object(
        self,
        object_id: str,
        object_type: str,
        data: dict[str, Any],
    ) -> SalesforceObject:
        """Helper to create a SalesforceObject with required fields."""
        # Ensure required fields are present
        data.setdefault(ID_FIELD, object_id)
        data.setdefault(MODIFIED_FIELD, "2024-01-15T10:30:00.000Z")
        data.setdefault(NAME_FIELD, f"Test {object_type}")
        return SalesforceObject(id=object_id, type=object_type, data=data)

    @patch("onyx.connectors.salesforce.connector.convert_sf_object_to_doc")
    def test_metadata_type_conversion_for_opportunity(
        self,
        mock_convert: MagicMock,
        connector: SalesforceConnector,
        mock_sf_db: MagicMock,
    ) -> None:
        """Test that Opportunity metadata fields are properly type-converted."""
        parent_id = "006bm000006kyDpAAI"
        parent_type = "Opportunity"

        # Create a parent object with various data types in the fields
        parent_data = {
            ID_FIELD: parent_id,
            NAME_FIELD: "Test Opportunity",
            MODIFIED_FIELD: "2024-01-15T10:30:00.000Z",
            "Account": "Acme Corp",  # string - should become "account" metadata
            "FiscalQuarter": 2,  # int - should be converted to "2"
            "FiscalYear": 2024,  # int - should be converted to "2024"
            "IsClosed": False,  # bool - should be converted to "False"
            "StageName": "Prospecting",  # string
            "Type": "New Business",  # string
            "Amount": 50000.50,  # float - should be converted to "50000.50"
            "CloseDate": "2024-06-30",  # string
            "Probability": 75,  # int - should be converted to "75"
            "CreatedDate": "2024-01-01T00:00:00.000Z",  # string
        }
        parent_object = self._create_salesforce_object(
            parent_id, parent_type, parent_data
        )

        # Setup mock sf_db
        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(
            [(parent_type, parent_id, 1)]
        )
        mock_sf_db.get_record.return_value = parent_object
        mock_sf_db.file_size = 1024

        # Create a mock document that convert_sf_object_to_doc will return
        mock_doc = Document(
            id=f"SALESFORCE_{parent_id}",
            sections=[],
            source=DocumentSource.SALESFORCE,
            semantic_identifier="Test Opportunity",
            metadata={},
        )
        mock_convert.return_value = mock_doc

        # Track parent changes
        parents_changed = 0

        def increment() -> None:
            nonlocal parents_changed
            parents_changed += 1

        # Call _yield_doc_batches
        type_to_processed: dict[str, int] = {}
        changed_ids_to_type = {parent_id: parent_type}
        parent_types = {parent_type}

        batches = list(
            connector._yield_doc_batches(
                mock_sf_db,
                type_to_processed,
                changed_ids_to_type,
                parent_types,
                increment,
            )
        )

        # Verify we got one batch with one document
        assert len(batches) == 1
        docs = batches[0]
        assert len(docs) == 1

        doc = docs[0]
        assert isinstance(doc, Document)

        # Verify metadata type conversions
        # All values should be strings (or list of strings)
        assert doc.metadata["object_type"] == "Opportunity"
        assert doc.metadata["account"] == "Acme Corp"  # string stays string
        assert doc.metadata["fiscal_quarter"] == "2"  # int -> str
        assert doc.metadata["fiscal_year"] == "2024"  # int -> str
        assert doc.metadata["is_closed"] == "False"  # bool -> str
        assert doc.metadata["stage_name"] == "Prospecting"  # string stays string
        assert doc.metadata["type"] == "New Business"  # string stays string
        assert (
            doc.metadata["amount"] == "50000.5"
        )  # float -> str (Python drops trailing zeros)
        assert doc.metadata["close_date"] == "2024-06-30"  # string stays string
        assert doc.metadata["probability"] == "75"  # int -> str
        assert doc.metadata["name"] == "Test Opportunity"  # NAME_FIELD

        # Verify parent was counted
        assert parents_changed == 1
        assert type_to_processed[parent_type] == 1

    @patch("onyx.connectors.salesforce.connector.convert_sf_object_to_doc")
    def test_missing_optional_metadata_fields(
        self,
        mock_convert: MagicMock,
        connector: SalesforceConnector,
        mock_sf_db: MagicMock,
    ) -> None:
        """Test that missing optional metadata fields are not added."""
        parent_id = "006bm000006kyDqAAI"
        parent_type = "Opportunity"

        # Create parent object with only some fields
        parent_data = {
            ID_FIELD: parent_id,
            NAME_FIELD: "Minimal Opportunity",
            MODIFIED_FIELD: "2024-01-15T10:30:00.000Z",
            "StageName": "Closed Won",
            # Notably missing: Amount, Probability, FiscalQuarter, etc.
        }
        parent_object = self._create_salesforce_object(
            parent_id, parent_type, parent_data
        )

        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(
            [(parent_type, parent_id, 1)]
        )
        mock_sf_db.get_record.return_value = parent_object
        mock_sf_db.file_size = 1024

        mock_doc = Document(
            id=f"SALESFORCE_{parent_id}",
            sections=[],
            source=DocumentSource.SALESFORCE,
            semantic_identifier="Minimal Opportunity",
            metadata={},
        )
        mock_convert.return_value = mock_doc

        type_to_processed: dict[str, int] = {}
        changed_ids_to_type = {parent_id: parent_type}
        parent_types = {parent_type}

        batches = list(
            connector._yield_doc_batches(
                mock_sf_db,
                type_to_processed,
                changed_ids_to_type,
                parent_types,
                lambda: None,
            )
        )

        doc = batches[0][0]
        assert isinstance(doc, Document)

        # Only present fields should be in metadata
        assert "stage_name" in doc.metadata
        assert doc.metadata["stage_name"] == "Closed Won"
        assert "name" in doc.metadata
        assert doc.metadata["name"] == "Minimal Opportunity"

        # Missing fields should not be in metadata
        assert "amount" not in doc.metadata
        assert "probability" not in doc.metadata
        assert "fiscal_quarter" not in doc.metadata
        assert "fiscal_year" not in doc.metadata
        assert "is_closed" not in doc.metadata

    @patch("onyx.connectors.salesforce.connector.convert_sf_object_to_doc")
    def test_contact_metadata_fields(
        self,
        mock_convert: MagicMock,
        connector: SalesforceConnector,
        mock_sf_db: MagicMock,
    ) -> None:
        """Test metadata conversion for Contact object type."""
        parent_id = "003bm00000EjHCjAAN"
        parent_type = "Contact"

        parent_data = {
            ID_FIELD: parent_id,
            NAME_FIELD: "John Doe",
            MODIFIED_FIELD: "2024-02-20T14:00:00.000Z",
            "Account": "Globex Corp",
            "CreatedDate": "2024-01-01T00:00:00.000Z",
        }
        parent_object = self._create_salesforce_object(
            parent_id, parent_type, parent_data
        )

        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(
            [(parent_type, parent_id, 1)]
        )
        mock_sf_db.get_record.return_value = parent_object
        mock_sf_db.file_size = 1024

        mock_doc = Document(
            id=f"SALESFORCE_{parent_id}",
            sections=[],
            source=DocumentSource.SALESFORCE,
            semantic_identifier="John Doe",
            metadata={},
        )
        mock_convert.return_value = mock_doc

        type_to_processed: dict[str, int] = {}
        changed_ids_to_type = {parent_id: parent_type}
        parent_types = {parent_type}

        batches = list(
            connector._yield_doc_batches(
                mock_sf_db,
                type_to_processed,
                changed_ids_to_type,
                parent_types,
                lambda: None,
            )
        )

        doc = batches[0][0]
        assert isinstance(doc, Document)

        # Verify Contact-specific metadata
        assert doc.metadata["object_type"] == "Contact"
        assert doc.metadata["account"] == "Globex Corp"
        assert doc.metadata["created_date"] == "2024-01-01T00:00:00.000Z"
        assert doc.metadata["last_modified_date"] == "2024-02-20T14:00:00.000Z"

    @patch("onyx.connectors.salesforce.connector.convert_sf_object_to_doc")
    def test_no_default_attributes_for_unknown_type(
        self,
        mock_convert: MagicMock,
        connector: SalesforceConnector,
        mock_sf_db: MagicMock,
    ) -> None:
        """Test that unknown object types only get object_type metadata."""
        parent_id = "001bm00000fd9Z3AAI"
        parent_type = "CustomObject__c"

        parent_data = {
            ID_FIELD: parent_id,
            NAME_FIELD: "Custom Record",
            MODIFIED_FIELD: "2024-03-01T08:00:00.000Z",
            "CustomField__c": "custom value",
            "NumberField__c": 123,
        }
        parent_object = self._create_salesforce_object(
            parent_id, parent_type, parent_data
        )

        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(
            [(parent_type, parent_id, 1)]
        )
        mock_sf_db.get_record.return_value = parent_object
        mock_sf_db.file_size = 1024

        mock_doc = Document(
            id=f"SALESFORCE_{parent_id}",
            sections=[],
            source=DocumentSource.SALESFORCE,
            semantic_identifier="Custom Record",
            metadata={},
        )
        mock_convert.return_value = mock_doc

        type_to_processed: dict[str, int] = {}
        changed_ids_to_type = {parent_id: parent_type}
        parent_types = {parent_type}

        batches = list(
            connector._yield_doc_batches(
                mock_sf_db,
                type_to_processed,
                changed_ids_to_type,
                parent_types,
                lambda: None,
            )
        )

        doc = batches[0][0]
        assert isinstance(doc, Document)

        # Only object_type should be set for unknown types
        assert doc.metadata["object_type"] == "CustomObject__c"
        # Custom fields should NOT be in metadata (not in _DEFAULT_ATTRIBUTES_TO_KEEP)
        assert "CustomField__c" not in doc.metadata
        assert "NumberField__c" not in doc.metadata

    @patch("onyx.connectors.salesforce.connector.convert_sf_object_to_doc")
    def test_skips_missing_parent_objects(
        self,
        mock_convert: MagicMock,
        connector: SalesforceConnector,
        mock_sf_db: MagicMock,
    ) -> None:
        """Test that missing parent objects are skipped gracefully."""
        parent_id = "006bm000006kyDrAAI"
        parent_type = "Opportunity"

        # get_record returns None for missing object
        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(
            [(parent_type, parent_id, 1)]
        )
        mock_sf_db.get_record.return_value = None
        mock_sf_db.file_size = 1024

        type_to_processed: dict[str, int] = {}
        changed_ids_to_type = {parent_id: parent_type}
        parent_types = {parent_type}

        parents_changed = 0

        def increment() -> None:
            nonlocal parents_changed
            parents_changed += 1

        batches = list(
            connector._yield_doc_batches(
                mock_sf_db,
                type_to_processed,
                changed_ids_to_type,
                parent_types,
                increment,
            )
        )

        # Should yield one empty batch
        assert len(batches) == 1
        assert len(batches[0]) == 0

        # convert_sf_object_to_doc should not have been called
        mock_convert.assert_not_called()

        # Parents changed should still be 0
        assert parents_changed == 0

    @patch("onyx.connectors.salesforce.connector.convert_sf_object_to_doc")
    def test_multiple_documents_batching(
        self,
        mock_convert: MagicMock,
        connector: SalesforceConnector,
        mock_sf_db: MagicMock,
    ) -> None:
        """Test that multiple documents are correctly batched."""
        # Create 3 parent objects
        parent_ids = [
            "006bm000006kyDsAAI",
            "006bm000006kyDtAAI",
            "006bm000006kyDuAAI",
        ]
        parent_type = "Opportunity"

        parent_objects = [
            self._create_salesforce_object(
                pid,
                parent_type,
                {
                    ID_FIELD: pid,
                    NAME_FIELD: f"Opportunity {i}",
                    MODIFIED_FIELD: "2024-01-15T10:30:00.000Z",
                    "IsClosed": i % 2 == 0,  # alternating bool values
                    "Amount": 1000.0 * (i + 1),
                },
            )
            for i, pid in enumerate(parent_ids)
        ]

        # Setup mock to return all three
        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(
            [(parent_type, pid, i + 1) for i, pid in enumerate(parent_ids)]
        )
        mock_sf_db.get_record.side_effect = parent_objects
        mock_sf_db.file_size = 1024

        # Create mock documents
        mock_docs = [
            Document(
                id=f"SALESFORCE_{pid}",
                sections=[],
                source=DocumentSource.SALESFORCE,
                semantic_identifier=f"Opportunity {i}",
                metadata={},
            )
            for i, pid in enumerate(parent_ids)
        ]
        mock_convert.side_effect = mock_docs

        type_to_processed: dict[str, int] = {}
        changed_ids_to_type = {pid: parent_type for pid in parent_ids}
        parent_types = {parent_type}

        batches = list(
            connector._yield_doc_batches(
                mock_sf_db,
                type_to_processed,
                changed_ids_to_type,
                parent_types,
                lambda: None,
            )
        )

        # With batch_size=10, all 3 docs should be in one batch
        assert len(batches) == 1
        assert len(batches[0]) == 3

        # Verify each document has correct metadata
        for i, doc in enumerate(batches[0]):
            assert isinstance(doc, Document)
            assert doc.metadata["object_type"] == "Opportunity"
            assert doc.metadata["is_closed"] == str(i % 2 == 0)
            assert doc.metadata["amount"] == str(1000.0 * (i + 1))

        assert type_to_processed[parent_type] == 3


================================================
FILE: backend/tests/unit/onyx/connectors/sharepoint/test_delta_checkpointing.py
================================================
"""Tests for per-page delta checkpointing in the SharePoint connector (P1-1).

Validates that:
- Delta drives process one page per _load_from_checkpoint call
- Checkpoints persist the delta next_link for resumption
- Crash + resume skips already-processed pages
- BFS (folder-scoped) drives process all items in one call
- 410 Gone triggers a full-resync URL in the checkpoint
- Duplicate document IDs across delta pages are deduplicated
"""

from __future__ import annotations

from collections import deque
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any

import pytest

from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentSource
from onyx.connectors.models import TextSection
from onyx.connectors.sharepoint.connector import DriveItemData
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.connectors.sharepoint.connector import SharepointConnectorCheckpoint
from onyx.connectors.sharepoint.connector import SiteDescriptor

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

SITE_URL = "https://example.sharepoint.com/sites/sample"
DRIVE_WEB_URL = f"{SITE_URL}/Shared Documents"
DRIVE_ID = "fake-drive-id"

# Use a start time in the future so delta URLs include a timestamp token
_START_TS = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
_END_TS = datetime(2026, 1, 1, tzinfo=timezone.utc).timestamp()

# For BFS tests we use epoch so no token is generated
_EPOCH_START: float = 0.0


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _make_item(item_id: str, name: str = "doc.pdf") -> DriveItemData:
    return DriveItemData(
        id=item_id,
        name=name,
        web_url=f"{SITE_URL}/{name}",
        parent_reference_path="/drives/d1/root:",
        drive_id=DRIVE_ID,
    )


def _make_document(item: DriveItemData) -> Document:
    return Document(
        id=item.id,
        source=DocumentSource.SHAREPOINT,
        semantic_identifier=item.name,
        metadata={},
        sections=[TextSection(link=item.web_url, text="content")],
    )


def _consume_generator(
    gen: Generator[Any, None, SharepointConnectorCheckpoint],
) -> tuple[list[Any], SharepointConnectorCheckpoint]:
    """Exhaust a _load_from_checkpoint generator.

    Returns (yielded_items, returned_checkpoint).
    """
    yielded: list[Any] = []
    try:
        while True:
            yielded.append(next(gen))
    except StopIteration as e:
        return yielded, e.value


def _docs_from(yielded: list[Any]) -> list[Document]:
    return [y for y in yielded if isinstance(y, Document)]


def _failures_from(yielded: list[Any]) -> list[ConnectorFailure]:
    return [y for y in yielded if isinstance(y, ConnectorFailure)]


def _build_ready_checkpoint(
    drive_names: list[str] | None = None,
    folder_path: str | None = None,
) -> SharepointConnectorCheckpoint:
    """Checkpoint ready for Phase 3 (sites initialised, drives queued)."""
    cp = SharepointConnectorCheckpoint(has_more=True)
    cp.cached_site_descriptors = deque()
    cp.current_site_descriptor = SiteDescriptor(
        url=SITE_URL,
        drive_name=None,
        folder_path=folder_path,
    )
    cp.cached_drive_names = deque(drive_names or ["Documents"])
    cp.process_site_pages = False
    return cp


def _setup_connector(monkeypatch: pytest.MonkeyPatch) -> SharepointConnector:
    """Create a connector with common methods mocked."""
    connector = SharepointConnector()
    connector._graph_client = object()
    connector.include_site_pages = False

    def fake_resolve_drive(
        self: SharepointConnector,  # noqa: ARG001
        site_descriptor: SiteDescriptor,  # noqa: ARG001
        drive_name: str,  # noqa: ARG001
    ) -> tuple[str, str | None]:
        return (DRIVE_ID, DRIVE_WEB_URL)

    def fake_get_access_token(self: SharepointConnector) -> str:  # noqa: ARG001
        return "fake-access-token"

    monkeypatch.setattr(SharepointConnector, "_resolve_drive", fake_resolve_drive)
    monkeypatch.setattr(
        SharepointConnector, "_get_graph_access_token", fake_get_access_token
    )

    return connector


def _mock_convert(monkeypatch: pytest.MonkeyPatch) -> None:
    """Replace _convert_driveitem_to_document_with_permissions with a trivial stub."""

    def fake_convert(
        driveitem: DriveItemData,
        drive_name: str,  # noqa: ARG001
        ctx: Any = None,  # noqa: ARG001
        graph_client: Any = None,  # noqa: ARG001
        graph_api_base: str = "",  # noqa: ARG001
        include_permissions: bool = False,  # noqa: ARG001
        parent_hierarchy_raw_node_id: str | None = None,  # noqa: ARG001
        access_token: str | None = None,  # noqa: ARG001
        treat_sharing_link_as_public: bool = False,  # noqa: ARG001
    ) -> Document:
        return _make_document(driveitem)

    monkeypatch.setattr(
        "onyx.connectors.sharepoint.connector._convert_driveitem_to_document_with_permissions",
        fake_convert,
    )


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


class TestDeltaPerPageCheckpointing:
    """Delta (non-folder-scoped) drives should process one API page per
    _load_from_checkpoint call, persisting the next-link in between."""

    def test_processes_one_page_per_cycle(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        items_p1 = [_make_item("a"), _make_item("b")]
        items_p2 = [_make_item("c")]
        items_p3 = [_make_item("d"), _make_item("e")]

        call_count = 0

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,  # noqa: ARG001
            drive_id: str,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> tuple[list[DriveItemData], str | None]:
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                return items_p1, "https://graph.microsoft.com/next2"
            if call_count == 2:
                return items_p2, "https://graph.microsoft.com/next3"
            return items_p3, None

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        checkpoint = _build_ready_checkpoint()

        # Call 1: Phase 3a inits drive, Phase 3b processes page 1
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        assert len(_docs_from(yielded)) == 2
        assert (
            checkpoint.current_drive_delta_next_link
            == "https://graph.microsoft.com/next2"
        )
        assert checkpoint.current_drive_id == DRIVE_ID
        assert checkpoint.has_more is True

        # Call 2: Phase 3b processes page 2
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        assert len(_docs_from(yielded)) == 1
        assert (
            checkpoint.current_drive_delta_next_link
            == "https://graph.microsoft.com/next3"
        )

        # Call 3: Phase 3b processes page 3 (last)
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        assert len(_docs_from(yielded)) == 2
        assert checkpoint.current_drive_name is None
        assert checkpoint.current_drive_id is None
        assert checkpoint.current_drive_delta_next_link is None

    def test_resume_after_simulated_crash(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Serialise the checkpoint after page 1, create a fresh connector,
        and verify page 2 is fetched using the saved next-link."""
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        captured_urls: list[str] = []
        call_count = 0

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,
            drive_id: str,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> tuple[list[DriveItemData], str | None]:
            nonlocal call_count
            call_count += 1
            captured_urls.append(page_url)
            if call_count == 1:
                return [_make_item("a")], "https://graph.microsoft.com/next2"
            return [_make_item("b")], None

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        # Process page 1
        checkpoint = _build_ready_checkpoint()
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        _, checkpoint = _consume_generator(gen)
        assert (
            checkpoint.current_drive_delta_next_link
            == "https://graph.microsoft.com/next2"
        )

        # --- Simulate crash: serialise & deserialise checkpoint ---
        saved_json = checkpoint.model_dump_json()
        restored = SharepointConnectorCheckpoint.model_validate_json(saved_json)

        # New connector instance (as if process restarted)
        connector2 = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)
        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        # Resume — should pick up from next2
        gen = connector2._load_from_checkpoint(
            _START_TS, _END_TS, restored, include_permissions=False
        )
        yielded, final_cp = _consume_generator(gen)

        docs = _docs_from(yielded)
        assert len(docs) == 1
        assert docs[0].id == "b"
        assert captured_urls[-1] == "https://graph.microsoft.com/next2"
        assert final_cp.current_drive_name is None
        assert final_cp.current_drive_delta_next_link is None

    def test_single_page_drive_completes_in_one_cycle(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """A drive with only one delta page should init + process + clear
        in a single _load_from_checkpoint call."""
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,  # noqa: ARG001
            drive_id: str,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> tuple[list[DriveItemData], str | None]:
            return [_make_item("only")], None

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        checkpoint = _build_ready_checkpoint()
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, final_cp = _consume_generator(gen)

        assert len(_docs_from(yielded)) == 1
        assert final_cp.current_drive_name is None
        assert final_cp.current_drive_id is None
        assert final_cp.current_drive_delta_next_link is None


class TestBfsPathNoCheckpointing:
    """Folder-scoped (BFS) drives should process all items in one call
    because the BFS queue cannot be cheaply serialised."""

    def test_bfs_processes_all_at_once(self, monkeypatch: pytest.MonkeyPatch) -> None:
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        items = [_make_item("x"), _make_item("y"), _make_item("z")]

        def fake_iter_paged(
            self: SharepointConnector,  # noqa: ARG001
            drive_id: str,  # noqa: ARG001
            folder_path: str | None = None,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> Generator[DriveItemData, None, None]:
            yield from items

        monkeypatch.setattr(
            SharepointConnector, "_iter_drive_items_paged", fake_iter_paged
        )

        checkpoint = _build_ready_checkpoint(folder_path="Engineering/Docs")
        gen = connector._load_from_checkpoint(
            _EPOCH_START, _END_TS, checkpoint, include_permissions=False
        )
        yielded, final_cp = _consume_generator(gen)

        assert len(_docs_from(yielded)) == 3
        assert final_cp.current_drive_name is None
        assert final_cp.current_drive_id is None
        assert final_cp.current_drive_delta_next_link is None


class TestDelta410GoneResync:
    """On 410 Gone the checkpoint should be updated with a full-resync URL
    and the next cycle should re-enumerate from scratch."""

    def test_410_stores_full_resync_url(self, monkeypatch: pytest.MonkeyPatch) -> None:
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        call_count = 0

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,  # noqa: ARG001
            drive_id: str,
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,
        ) -> tuple[list[DriveItemData], str | None]:
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                # Simulate the 410 handler returning a full-resync URL
                full_url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/delta?$top={page_size}"
                return [], full_url
            return [_make_item("recovered")], None

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        checkpoint = _build_ready_checkpoint()

        # Call 1: 3a inits, 3b gets empty page + resync URL
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        assert len(_docs_from(yielded)) == 0
        assert checkpoint.current_drive_delta_next_link is not None
        assert "token=" not in checkpoint.current_drive_delta_next_link

        # Call 2: processes the full resync
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        docs = _docs_from(yielded)
        assert len(docs) == 1
        assert docs[0].id == "recovered"
        assert checkpoint.current_drive_name is None


class TestDeltaPageFetchFailure:
    """If _fetch_one_delta_page raises, the drive should be abandoned with a
    ConnectorFailure and the checkpoint should be cleared for the next drive."""

    def test_page_fetch_error_yields_failure_and_clears_state(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,  # noqa: ARG001
            drive_id: str,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> tuple[list[DriveItemData], str | None]:
            raise RuntimeError("network blip")

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        checkpoint = _build_ready_checkpoint()
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, final_cp = _consume_generator(gen)

        failures = _failures_from(yielded)
        assert len(failures) == 1
        assert "network blip" in failures[0].failure_message
        assert final_cp.current_drive_name is None
        assert final_cp.current_drive_id is None
        assert final_cp.current_drive_delta_next_link is None


class TestDeltaDuplicateDocumentDedup:
    """The Microsoft Graph delta API can return the same item on multiple
    pages.  Documents already yielded should be skipped via
    checkpoint.seen_document_ids."""

    def test_duplicate_across_pages_is_skipped(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Item 'dup' appears on both page 1 and page 2.  It should only be
        yielded once."""
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        call_count = 0

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,  # noqa: ARG001
            drive_id: str,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> tuple[list[DriveItemData], str | None]:
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                return [_make_item("a"), _make_item("dup")], "https://next2"
            return [_make_item("dup"), _make_item("b")], None

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        checkpoint = _build_ready_checkpoint()

        # Page 1: yields a, dup
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        docs = _docs_from(yielded)
        assert [d.id for d in docs] == ["a", "dup"]
        assert "dup" in checkpoint.seen_document_ids

        # Page 2: dup should be skipped, only b yielded
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        docs = _docs_from(yielded)
        assert [d.id for d in docs] == ["b"]

    def test_duplicate_within_same_page_is_skipped(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """If the same item appears twice on a single delta page, only the
        first occurrence should be yielded."""
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,  # noqa: ARG001
            drive_id: str,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> tuple[list[DriveItemData], str | None]:
            return [_make_item("x"), _make_item("x"), _make_item("y")], None

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        checkpoint = _build_ready_checkpoint()
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        docs = _docs_from(yielded)
        assert [d.id for d in docs] == ["x", "y"]

    def test_seen_ids_survive_checkpoint_serialization(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """seen_document_ids must survive JSON serialization so that
        dedup works across crash + resume."""
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        call_count = 0

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,  # noqa: ARG001
            drive_id: str,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> tuple[list[DriveItemData], str | None]:
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                return [_make_item("a")], "https://next2"
            return [_make_item("a"), _make_item("b")], None

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        checkpoint = _build_ready_checkpoint()

        # Page 1
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        _, checkpoint = _consume_generator(gen)
        assert "a" in checkpoint.seen_document_ids

        # Simulate crash: round-trip through JSON
        restored = SharepointConnectorCheckpoint.model_validate_json(
            checkpoint.model_dump_json()
        )
        assert "a" in restored.seen_document_ids

        # Page 2 with restored checkpoint: 'a' should be skipped
        connector2 = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)
        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        gen = connector2._load_from_checkpoint(
            _START_TS, _END_TS, restored, include_permissions=False
        )
        yielded, final_cp = _consume_generator(gen)
        docs = _docs_from(yielded)
        assert [d.id for d in docs] == ["b"]

    def test_no_dedup_across_separate_indexing_runs(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """A fresh checkpoint (new indexing run) should have an empty
        seen_document_ids, so previously-indexed docs are re-processed."""
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,  # noqa: ARG001
            drive_id: str,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> tuple[list[DriveItemData], str | None]:
            return [_make_item("a")], None

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        # First run
        cp1 = _build_ready_checkpoint()
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, cp1, include_permissions=False
        )
        yielded, _ = _consume_generator(gen)
        assert len(_docs_from(yielded)) == 1

        # Second run with a fresh checkpoint — same doc should appear again
        cp2 = _build_ready_checkpoint()
        assert len(cp2.seen_document_ids) == 0
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, cp2, include_permissions=False
        )
        yielded, _ = _consume_generator(gen)
        assert len(_docs_from(yielded)) == 1

    def test_same_id_across_drives_not_skipped(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Graph item IDs are only unique within a drive.  An item in drive B
        that happens to share an ID with an item already seen in drive A must
        NOT be skipped."""
        connector = _setup_connector(monkeypatch)
        _mock_convert(monkeypatch)

        def fake_fetch_page(
            self: SharepointConnector,  # noqa: ARG001
            page_url: str,  # noqa: ARG001
            drive_id: str,  # noqa: ARG001
            start: datetime | None = None,  # noqa: ARG001
            end: datetime | None = None,  # noqa: ARG001
            page_size: int = 200,  # noqa: ARG001
        ) -> tuple[list[DriveItemData], str | None]:
            return [_make_item("shared-id")], None

        monkeypatch.setattr(
            SharepointConnector, "_fetch_one_delta_page", fake_fetch_page
        )

        checkpoint = _build_ready_checkpoint(drive_names=["DriveA", "DriveB"])

        # Drive A: yields the item
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        docs = _docs_from(yielded)
        assert len(docs) == 1
        assert docs[0].id == "shared-id"

        # seen_document_ids should have been cleared when drive A finished
        assert len(checkpoint.seen_document_ids) == 0

        # Drive B: same ID must be yielded again (different drive)
        gen = connector._load_from_checkpoint(
            _START_TS, _END_TS, checkpoint, include_permissions=False
        )
        yielded, checkpoint = _consume_generator(gen)
        docs = _docs_from(yielded)
        assert len(docs) == 1
        assert docs[0].id == "shared-id"


================================================
FILE: backend/tests/unit/onyx/connectors/sharepoint/test_denylist.py
================================================
from __future__ import annotations

import pytest

from onyx.connectors.sharepoint.connector import _build_item_relative_path
from onyx.connectors.sharepoint.connector import _is_path_excluded
from onyx.connectors.sharepoint.connector import _is_site_excluded
from onyx.connectors.sharepoint.connector import DriveItemData
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.connectors.sharepoint.connector import SiteDescriptor


class TestIsSiteExcluded:
    def test_exact_match(self) -> None:
        assert _is_site_excluded(
            "https://contoso.sharepoint.com/sites/archive",
            ["https://contoso.sharepoint.com/sites/archive"],
        )

    def test_trailing_slash_mismatch(self) -> None:
        assert _is_site_excluded(
            "https://contoso.sharepoint.com/sites/archive/",
            ["https://contoso.sharepoint.com/sites/archive"],
        )

    def test_glob_wildcard(self) -> None:
        assert _is_site_excluded(
            "https://contoso.sharepoint.com/sites/archive-2024",
            ["*/sites/archive-*"],
        )

    def test_no_match(self) -> None:
        assert not _is_site_excluded(
            "https://contoso.sharepoint.com/sites/engineering",
            ["https://contoso.sharepoint.com/sites/archive"],
        )

    def test_empty_patterns(self) -> None:
        assert not _is_site_excluded(
            "https://contoso.sharepoint.com/sites/engineering",
            [],
        )

    def test_multiple_patterns(self) -> None:
        patterns = [
            "*/sites/archive-*",
            "*/sites/hr-confidential",
        ]
        assert _is_site_excluded(
            "https://contoso.sharepoint.com/sites/hr-confidential",
            patterns,
        )
        assert not _is_site_excluded(
            "https://contoso.sharepoint.com/sites/engineering",
            patterns,
        )


class TestIsPathExcluded:
    def test_filename_glob(self) -> None:
        assert _is_path_excluded("Engineering/report.tmp", ["*.tmp"])

    def test_filename_only(self) -> None:
        assert _is_path_excluded("report.tmp", ["*.tmp"])

    def test_office_lock_files(self) -> None:
        assert _is_path_excluded("Docs/~$document.docx", ["~$*"])

    def test_folder_glob(self) -> None:
        assert _is_path_excluded("Archive/old/report.docx", ["Archive/*"])

    def test_nested_folder_glob(self) -> None:
        assert _is_path_excluded("Projects/Archive/report.docx", ["*/Archive/*"])

    def test_no_match(self) -> None:
        assert not _is_path_excluded("Engineering/report.docx", ["*.tmp"])

    def test_empty_patterns(self) -> None:
        assert not _is_path_excluded("anything.docx", [])

    def test_multiple_patterns(self) -> None:
        patterns = ["*.tmp", "~$*", "Archive/*"]
        assert _is_path_excluded("test.tmp", patterns)
        assert _is_path_excluded("~$doc.docx", patterns)
        assert _is_path_excluded("Archive/old.pdf", patterns)
        assert not _is_path_excluded("Engineering/report.docx", patterns)


class TestBuildItemRelativePath:
    def test_with_folder(self) -> None:
        assert (
            _build_item_relative_path(
                "/drives/abc/root:/Engineering/API", "report.docx"
            )
            == "Engineering/API/report.docx"
        )

    def test_root_level(self) -> None:
        assert (
            _build_item_relative_path("/drives/abc/root:", "report.docx")
            == "report.docx"
        )

    def test_none_parent(self) -> None:
        assert _build_item_relative_path(None, "report.docx") == "report.docx"

    def test_percent_encoded_folder(self) -> None:
        assert (
            _build_item_relative_path("/drives/abc/root:/My%20Documents", "report.docx")
            == "My Documents/report.docx"
        )

    def test_no_root_marker(self) -> None:
        assert _build_item_relative_path("/drives/abc", "report.docx") == "report.docx"


class TestFilterExcludedSites:
    def test_filters_matching_sites(self) -> None:
        connector = SharepointConnector(
            excluded_sites=["*/sites/archive"],
        )
        descriptors = [
            SiteDescriptor(
                url="https://t.sharepoint.com/sites/archive",
                drive_name=None,
                folder_path=None,
            ),
            SiteDescriptor(
                url="https://t.sharepoint.com/sites/engineering",
                drive_name=None,
                folder_path=None,
            ),
        ]
        result = connector._filter_excluded_sites(descriptors)
        assert len(result) == 1
        assert result[0].url == "https://t.sharepoint.com/sites/engineering"

    def test_empty_excluded_returns_all(self) -> None:
        connector = SharepointConnector(excluded_sites=[])
        descriptors = [
            SiteDescriptor(
                url="https://t.sharepoint.com/sites/a",
                drive_name=None,
                folder_path=None,
            ),
            SiteDescriptor(
                url="https://t.sharepoint.com/sites/b",
                drive_name=None,
                folder_path=None,
            ),
        ]
        result = connector._filter_excluded_sites(descriptors)
        assert len(result) == 2


class TestIsDriveitemExcluded:
    def test_excluded_by_extension(self) -> None:
        connector = SharepointConnector(excluded_paths=["*.tmp"])
        item = DriveItemData(
            id="1",
            name="file.tmp",
            web_url="https://example.com/file.tmp",
            parent_reference_path="/drives/abc/root:/Docs",
        )
        assert connector._is_driveitem_excluded(item)

    def test_not_excluded(self) -> None:
        connector = SharepointConnector(excluded_paths=["*.tmp"])
        item = DriveItemData(
            id="1",
            name="file.docx",
            web_url="https://example.com/file.docx",
            parent_reference_path="/drives/abc/root:/Docs",
        )
        assert not connector._is_driveitem_excluded(item)

    def test_no_patterns_never_excludes(self) -> None:
        connector = SharepointConnector(excluded_paths=[])
        item = DriveItemData(
            id="1",
            name="file.tmp",
            web_url="https://example.com/file.tmp",
            parent_reference_path="/drives/abc/root:/Docs",
        )
        assert not connector._is_driveitem_excluded(item)

    def test_folder_pattern(self) -> None:
        connector = SharepointConnector(excluded_paths=["Archive/*"])
        item = DriveItemData(
            id="1",
            name="old.pdf",
            web_url="https://example.com/old.pdf",
            parent_reference_path="/drives/abc/root:/Archive",
        )
        assert connector._is_driveitem_excluded(item)

    @pytest.mark.parametrize(
        "whitespace_pattern",
        ["", "  ", "\t"],
    )
    def test_whitespace_patterns_ignored(self, whitespace_pattern: str) -> None:
        connector = SharepointConnector(excluded_paths=[whitespace_pattern])
        assert connector.excluded_paths == []

    def test_whitespace_padded_patterns_are_trimmed(self) -> None:
        connector = SharepointConnector(excluded_paths=["  *.tmp  ", " Archive/* "])
        assert connector.excluded_paths == ["*.tmp", "Archive/*"]

        item = DriveItemData(
            id="1",
            name="file.tmp",
            web_url="https://example.com/file.tmp",
            parent_reference_path="/drives/abc/root:/Docs",
        )
        assert connector._is_driveitem_excluded(item)


================================================
FILE: backend/tests/unit/onyx/connectors/sharepoint/test_drive_matching.py
================================================
from __future__ import annotations

from collections import deque
from collections.abc import Generator
from collections.abc import Sequence
from datetime import datetime
from datetime import timezone
from typing import Any

import pytest

from onyx.connectors.models import Document
from onyx.connectors.models import DocumentSource
from onyx.connectors.models import TextSection
from onyx.connectors.sharepoint.connector import DriveItemData
from onyx.connectors.sharepoint.connector import SHARED_DOCUMENTS_MAP
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.connectors.sharepoint.connector import SharepointConnectorCheckpoint
from onyx.connectors.sharepoint.connector import SiteDescriptor


class _FakeQuery:
    def __init__(self, payload: Sequence[Any]) -> None:
        self._payload = payload

    def execute_query(self) -> Sequence[Any]:
        return self._payload


class _FakeDrive:
    def __init__(self, name: str) -> None:
        self.name = name
        self.id = f"fake-drive-id-{name}"
        self.web_url = f"https://example.sharepoint.com/sites/sample/{name}"


class _FakeDrivesCollection:
    def __init__(self, drives: Sequence[_FakeDrive]) -> None:
        self._drives = drives

    def get(self) -> _FakeQuery:
        return _FakeQuery(list(self._drives))


class _FakeSite:
    def __init__(self, drives: Sequence[_FakeDrive]) -> None:
        self.drives = _FakeDrivesCollection(drives)


class _FakeSites:
    def __init__(self, drives: Sequence[_FakeDrive]) -> None:
        self._drives = drives

    def get_by_url(self, _url: str) -> _FakeSite:
        return _FakeSite(self._drives)


class _FakeGraphClient:
    def __init__(self, drives: Sequence[_FakeDrive]) -> None:
        self.sites = _FakeSites(drives)


_SAMPLE_ITEM = DriveItemData(
    id="item-1",
    name="sample.pdf",
    web_url="https://example.sharepoint.com/sites/sample/sample.pdf",
    parent_reference_path=None,
    drive_id="fake-drive-id",
)


def _build_connector(drives: Sequence[_FakeDrive]) -> SharepointConnector:
    connector = SharepointConnector()
    connector._graph_client = _FakeGraphClient(drives)
    return connector


def _fake_iter_drive_items_paged(
    self: SharepointConnector,  # noqa: ARG001
    drive_id: str,  # noqa: ARG001
    folder_path: str | None = None,  # noqa: ARG001
    start: datetime | None = None,  # noqa: ARG001
    end: datetime | None = None,  # noqa: ARG001
    page_size: int = 200,  # noqa: ARG001
) -> Generator[DriveItemData, None, None]:
    yield _SAMPLE_ITEM


def _fake_iter_drive_items_delta(
    self: SharepointConnector,  # noqa: ARG001
    drive_id: str,  # noqa: ARG001
    start: datetime | None = None,  # noqa: ARG001
    end: datetime | None = None,  # noqa: ARG001
    page_size: int = 200,  # noqa: ARG001
) -> Generator[DriveItemData, None, None]:
    yield _SAMPLE_ITEM


@pytest.mark.parametrize(
    ("requested_drive_name", "graph_drive_name"),
    [
        ("Shared Documents", "Documents"),
        ("Freigegebene Dokumente", "Dokumente"),
        ("Documentos compartidos", "Documentos"),
    ],
)
def test_fetch_driveitems_matches_international_drive_names(
    requested_drive_name: str,
    graph_drive_name: str,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    connector = _build_connector([_FakeDrive(graph_drive_name)])
    site_descriptor = SiteDescriptor(
        url="https://example.sharepoint.com/sites/sample",
        drive_name=requested_drive_name,
        folder_path=None,
    )

    monkeypatch.setattr(
        SharepointConnector,
        "_iter_drive_items_delta",
        _fake_iter_drive_items_delta,
    )

    results = list(connector._fetch_driveitems(site_descriptor=site_descriptor))

    assert len(results) == 1
    drive_item, returned_drive_name, drive_web_url = results[0]
    assert drive_item.id == _SAMPLE_ITEM.id
    assert returned_drive_name == requested_drive_name
    assert drive_web_url is not None


@pytest.mark.parametrize(
    ("requested_drive_name", "graph_drive_name"),
    [
        ("Shared Documents", "Documents"),
        ("Freigegebene Dokumente", "Dokumente"),
        ("Documentos compartidos", "Documentos"),
    ],
)
def test_get_drive_items_for_drive_id_matches_map(
    requested_drive_name: str,
    graph_drive_name: str,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    connector = _build_connector([_FakeDrive(graph_drive_name)])
    site_descriptor = SiteDescriptor(
        url="https://example.sharepoint.com/sites/sample",
        drive_name=requested_drive_name,
        folder_path=None,
    )

    monkeypatch.setattr(
        SharepointConnector,
        "_iter_drive_items_delta",
        _fake_iter_drive_items_delta,
    )

    items_iter = connector._get_drive_items_for_drive_id(
        site_descriptor=site_descriptor,
        drive_id="fake-drive-id",
    )

    results = list(items_iter)
    assert len(results) == 1
    assert results[0].id == _SAMPLE_ITEM.id


def test_load_from_checkpoint_maps_drive_name(monkeypatch: pytest.MonkeyPatch) -> None:
    connector = SharepointConnector()
    connector._graph_client = object()
    connector.include_site_pages = False

    captured_drive_names: list[str] = []
    sample_item = DriveItemData(
        id="doc-1",
        name="sample.pdf",
        web_url="https://example.sharepoint.com/sites/sample/sample.pdf",
        parent_reference_path=None,
        drive_id="fake-drive-id",
    )

    def fake_resolve_drive(
        self: SharepointConnector,  # noqa: ARG001
        site_descriptor: SiteDescriptor,  # noqa: ARG001
        drive_name: str,
    ) -> tuple[str, str | None]:
        assert drive_name == "Documents"
        return (
            "fake-drive-id",
            "https://example.sharepoint.com/sites/sample/Documents",
        )

    def fake_fetch_one_delta_page(
        self: SharepointConnector,  # noqa: ARG001
        page_url: str,  # noqa: ARG001
        drive_id: str,  # noqa: ARG001
        start: datetime | None = None,  # noqa: ARG001
        end: datetime | None = None,  # noqa: ARG001
        page_size: int = 200,  # noqa: ARG001
    ) -> tuple[list[DriveItemData], str | None]:
        return [sample_item], None

    def fake_convert(
        driveitem: DriveItemData,  # noqa: ARG001
        drive_name: str,
        ctx: Any,  # noqa: ARG001
        graph_client: Any,  # noqa: ARG001
        graph_api_base: str,  # noqa: ARG001
        include_permissions: bool,  # noqa: ARG001
        parent_hierarchy_raw_node_id: str | None = None,  # noqa: ARG001
        access_token: str | None = None,  # noqa: ARG001
        treat_sharing_link_as_public: bool = False,  # noqa: ARG001
    ) -> Document:
        captured_drive_names.append(drive_name)
        return Document(
            id="doc-1",
            source=DocumentSource.SHAREPOINT,
            semantic_identifier="sample.pdf",
            metadata={},
            sections=[TextSection(link="https://example.com", text="content")],
        )

    def fake_get_access_token(self: SharepointConnector) -> str:  # noqa: ARG001
        return "fake-access-token"

    monkeypatch.setattr(
        SharepointConnector,
        "_resolve_drive",
        fake_resolve_drive,
    )
    monkeypatch.setattr(
        SharepointConnector,
        "_fetch_one_delta_page",
        fake_fetch_one_delta_page,
    )
    monkeypatch.setattr(
        "onyx.connectors.sharepoint.connector._convert_driveitem_to_document_with_permissions",
        fake_convert,
    )
    monkeypatch.setattr(
        SharepointConnector,
        "_get_graph_access_token",
        fake_get_access_token,
    )

    checkpoint = SharepointConnectorCheckpoint(has_more=True)
    checkpoint.cached_site_descriptors = deque()
    checkpoint.current_site_descriptor = SiteDescriptor(
        url="https://example.sharepoint.com/sites/sample",
        drive_name=SHARED_DOCUMENTS_MAP["Documents"],
        folder_path=None,
    )
    checkpoint.cached_drive_names = deque(["Documents"])
    checkpoint.current_drive_name = None
    checkpoint.process_site_pages = False

    generator = connector._load_from_checkpoint(
        start=0,
        end=0,
        checkpoint=checkpoint,
        include_permissions=False,
    )

    all_yielded: list[Any] = []
    try:
        while True:
            all_yielded.append(next(generator))
    except StopIteration:
        pass

    from onyx.connectors.models import HierarchyNode

    documents = [item for item in all_yielded if not isinstance(item, HierarchyNode)]
    hierarchy_nodes = [item for item in all_yielded if isinstance(item, HierarchyNode)]

    assert len(documents) == 1
    assert captured_drive_names == [SHARED_DOCUMENTS_MAP["Documents"]]
    assert len(hierarchy_nodes) >= 1


def test_get_drive_items_uses_delta_when_no_folder_path(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When folder_path is None, _get_drive_items_for_drive_id should use delta."""
    connector = _build_connector([_FakeDrive("Documents")])
    site = SiteDescriptor(
        url="https://example.sharepoint.com/sites/sample",
        drive_name="Documents",
        folder_path=None,
    )

    called_method: list[str] = []

    def fake_delta(
        self: SharepointConnector,  # noqa: ARG001
        drive_id: str,  # noqa: ARG001
        start: datetime | None = None,  # noqa: ARG001
        end: datetime | None = None,  # noqa: ARG001
        page_size: int = 200,  # noqa: ARG001
    ) -> Generator[DriveItemData, None, None]:
        called_method.append("delta")
        yield _SAMPLE_ITEM

    def fake_paged(
        self: SharepointConnector,  # noqa: ARG001
        drive_id: str,  # noqa: ARG001
        folder_path: str | None = None,  # noqa: ARG001
        start: datetime | None = None,  # noqa: ARG001
        end: datetime | None = None,  # noqa: ARG001
        page_size: int = 200,  # noqa: ARG001
    ) -> Generator[DriveItemData, None, None]:
        called_method.append("paged")
        yield _SAMPLE_ITEM

    monkeypatch.setattr(SharepointConnector, "_iter_drive_items_delta", fake_delta)
    monkeypatch.setattr(SharepointConnector, "_iter_drive_items_paged", fake_paged)

    items = connector._get_drive_items_for_drive_id(site, "fake-drive-id")
    list(items)

    assert called_method == ["delta"]


def test_get_drive_items_uses_paged_when_folder_path_set(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When folder_path is set, _get_drive_items_for_drive_id should use BFS."""
    connector = _build_connector([_FakeDrive("Documents")])
    site = SiteDescriptor(
        url="https://example.sharepoint.com/sites/sample",
        drive_name="Documents",
        folder_path="Engineering/Docs",
    )

    called_method: list[str] = []

    def fake_delta(
        self: SharepointConnector,  # noqa: ARG001
        drive_id: str,  # noqa: ARG001
        start: datetime | None = None,  # noqa: ARG001
        end: datetime | None = None,  # noqa: ARG001
        page_size: int = 200,  # noqa: ARG001
    ) -> Generator[DriveItemData, None, None]:
        called_method.append("delta")
        yield _SAMPLE_ITEM

    def fake_paged(
        self: SharepointConnector,  # noqa: ARG001
        drive_id: str,  # noqa: ARG001
        folder_path: str | None = None,  # noqa: ARG001
        start: datetime | None = None,  # noqa: ARG001
        end: datetime | None = None,  # noqa: ARG001
        page_size: int = 200,  # noqa: ARG001
    ) -> Generator[DriveItemData, None, None]:
        called_method.append("paged")
        yield _SAMPLE_ITEM

    monkeypatch.setattr(SharepointConnector, "_iter_drive_items_delta", fake_delta)
    monkeypatch.setattr(SharepointConnector, "_iter_drive_items_paged", fake_paged)

    items = connector._get_drive_items_for_drive_id(site, "fake-drive-id")
    list(items)

    assert called_method == ["paged"]


def test_iter_drive_items_delta_uses_timestamp_token(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Delta iteration should pass the start time as a URL token for incremental sync."""
    connector = SharepointConnector()

    captured_urls: list[str] = []

    def fake_graph_api_get_json(
        self: SharepointConnector,  # noqa: ARG001
        url: str,
        params: dict[str, str] | None = None,  # noqa: ARG001
    ) -> dict[str, Any]:
        captured_urls.append(url)
        return {
            "value": [
                {
                    "id": "file-1",
                    "name": "report.docx",
                    "webUrl": "https://example.sharepoint.com/report.docx",
                    "file": {
                        "mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
                    },
                    "lastModifiedDateTime": "2025-06-15T12:00:00Z",
                    "parentReference": {"path": "/drives/d1/root:", "driveId": "d1"},
                }
            ],
            "@odata.deltaLink": "https://graph.microsoft.com/v1.0/drives/d1/root/delta?token=final",
        }

    monkeypatch.setattr(
        SharepointConnector, "_graph_api_get_json", fake_graph_api_get_json
    )

    start = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc)
    items = list(connector._iter_drive_items_delta("d1", start=start))

    assert len(items) == 1
    assert items[0].id == "file-1"
    assert len(captured_urls) == 1
    assert "token=2025-06-01T00%3A00%3A00%2B00%3A00" in captured_urls[0]


def test_iter_drive_items_delta_full_crawl_when_no_start(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Delta iteration without a start time should do a full enumeration (no token)."""
    connector = SharepointConnector()

    captured_urls: list[str] = []

    def fake_graph_api_get_json(
        self: SharepointConnector,  # noqa: ARG001
        url: str,
        params: dict[str, str] | None = None,  # noqa: ARG001
    ) -> dict[str, Any]:
        captured_urls.append(url)
        return {
            "value": [],
            "@odata.deltaLink": "https://graph.microsoft.com/v1.0/drives/d1/root/delta?token=final",
        }

    monkeypatch.setattr(
        SharepointConnector, "_graph_api_get_json", fake_graph_api_get_json
    )

    list(connector._iter_drive_items_delta("d1"))

    assert len(captured_urls) == 1
    assert "token=" not in captured_urls[0]
    assert captured_urls[0].endswith("/drives/d1/root/delta")


def test_iter_drive_items_delta_skips_folders_and_deleted(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Delta results with folder or deleted facets should be skipped."""
    connector = SharepointConnector()

    def fake_graph_api_get_json(
        self: SharepointConnector,  # noqa: ARG001
        url: str,  # noqa: ARG001
        params: dict[str, str] | None = None,  # noqa: ARG001
    ) -> dict[str, Any]:
        return {
            "value": [
                {"id": "folder-1", "name": "Docs", "folder": {"childCount": 5}},
                {"id": "deleted-1", "name": "old.txt", "deleted": {"state": "deleted"}},
                {
                    "id": "file-1",
                    "name": "keep.pdf",
                    "webUrl": "https://example.sharepoint.com/keep.pdf",
                    "file": {"mimeType": "application/pdf"},
                    "lastModifiedDateTime": "2025-06-15T12:00:00Z",
                    "parentReference": {"path": "/drives/d1/root:", "driveId": "d1"},
                },
            ],
            "@odata.deltaLink": "https://graph.microsoft.com/v1.0/drives/d1/root/delta?token=final",
        }

    monkeypatch.setattr(
        SharepointConnector, "_graph_api_get_json", fake_graph_api_get_json
    )

    items = list(connector._iter_drive_items_delta("d1"))
    assert len(items) == 1
    assert items[0].id == "file-1"


def test_iter_drive_items_delta_handles_410_gone(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """On 410 Gone, delta should fall back to full enumeration."""
    import requests as req

    connector = SharepointConnector()

    call_count = 0

    def fake_graph_api_get_json(
        self: SharepointConnector,  # noqa: ARG001
        url: str,
        params: dict[str, str] | None = None,  # noqa: ARG001
    ) -> dict[str, Any]:
        nonlocal call_count
        call_count += 1

        if call_count == 1 and "token=" in url:
            response = req.Response()
            response.status_code = 410
            raise req.HTTPError(response=response)

        return {
            "value": [
                {
                    "id": "file-1",
                    "name": "doc.pdf",
                    "webUrl": "https://example.sharepoint.com/doc.pdf",
                    "file": {"mimeType": "application/pdf"},
                    "lastModifiedDateTime": "2025-06-15T12:00:00Z",
                    "parentReference": {"path": "/drives/d1/root:", "driveId": "d1"},
                }
            ],
            "@odata.deltaLink": "https://graph.microsoft.com/v1.0/drives/d1/root/delta?token=final",
        }

    monkeypatch.setattr(
        SharepointConnector, "_graph_api_get_json", fake_graph_api_get_json
    )

    start = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc)
    items = list(connector._iter_drive_items_delta("d1", start=start))

    assert len(items) == 1
    assert items[0].id == "file-1"
    assert call_count == 2


================================================
FILE: backend/tests/unit/onyx/connectors/sharepoint/test_fetch_site_pages.py
================================================
"""Unit tests for SharepointConnector._fetch_site_pages error handling.

Covers 404 handling (classic sites / no modern pages) and 400
canvasLayout fallback (corrupt pages causing $expand=canvasLayout to
fail on the LIST endpoint).
"""

from __future__ import annotations

import json
from typing import Any

import pytest
from requests import Response
from requests.exceptions import HTTPError

from onyx.connectors.sharepoint.connector import GRAPH_INVALID_REQUEST_CODE
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.connectors.sharepoint.connector import SiteDescriptor

SITE_URL = "https://tenant.sharepoint.com/sites/ClassicSite"
FAKE_SITE_ID = "tenant.sharepoint.com,abc123,def456"
PAGES_COLLECTION = f"https://graph.microsoft.com/v1.0/sites/{FAKE_SITE_ID}/pages"
SITE_PAGES_BASE = f"{PAGES_COLLECTION}/microsoft.graph.sitePage"


def _site_descriptor() -> SiteDescriptor:
    return SiteDescriptor(url=SITE_URL, drive_name=None, folder_path=None)


def _make_http_error(
    status_code: int,
    error_code: str = "itemNotFound",
    message: str = "Item not found",
) -> HTTPError:
    body = {"error": {"code": error_code, "message": message}}
    response = Response()
    response.status_code = status_code
    response._content = json.dumps(body).encode()
    response.headers["Content-Type"] = "application/json"
    return HTTPError(response=response)


def _setup_connector(
    monkeypatch: pytest.MonkeyPatch,  # noqa: ARG001
) -> SharepointConnector:
    """Create a connector with the graph client and site resolution mocked."""
    connector = SharepointConnector(sites=[SITE_URL])
    connector.graph_api_base = "https://graph.microsoft.com/v1.0"

    mock_sites = type(
        "FakeSites",
        (),
        {
            "get_by_url": staticmethod(
                lambda url: type(  # noqa: ARG005
                    "Q",
                    (),
                    {
                        "execute_query": lambda self: None,  # noqa: ARG005
                        "id": FAKE_SITE_ID,
                    },
                )()
            ),
        },
    )()
    connector._graph_client = type("FakeGraphClient", (), {"sites": mock_sites})()

    return connector


def _patch_graph_api_get_json(
    monkeypatch: pytest.MonkeyPatch,
    fake_fn: Any,
) -> None:
    monkeypatch.setattr(SharepointConnector, "_graph_api_get_json", fake_fn)


class TestFetchSitePages404:
    def test_404_yields_no_pages(self, monkeypatch: pytest.MonkeyPatch) -> None:
        """A 404 from the Pages API should result in zero yielded pages."""
        connector = _setup_connector(monkeypatch)

        def fake_get_json(
            self: SharepointConnector,  # noqa: ARG001
            url: str,  # noqa: ARG001
            params: dict[str, str] | None = None,  # noqa: ARG001
        ) -> dict[str, Any]:
            raise _make_http_error(404)

        _patch_graph_api_get_json(monkeypatch, fake_get_json)

        pages = list(connector._fetch_site_pages(_site_descriptor()))
        assert pages == []

    def test_404_does_not_raise(self, monkeypatch: pytest.MonkeyPatch) -> None:
        """A 404 must not propagate as an exception."""
        connector = _setup_connector(monkeypatch)

        def fake_get_json(
            self: SharepointConnector,  # noqa: ARG001
            url: str,  # noqa: ARG001
            params: dict[str, str] | None = None,  # noqa: ARG001
        ) -> dict[str, Any]:
            raise _make_http_error(404)

        _patch_graph_api_get_json(monkeypatch, fake_get_json)

        for _ in connector._fetch_site_pages(_site_descriptor()):
            pass

    def test_non_404_http_error_still_raises(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Non-404 HTTP errors (e.g. 403) must still propagate."""
        connector = _setup_connector(monkeypatch)

        def fake_get_json(
            self: SharepointConnector,  # noqa: ARG001
            url: str,  # noqa: ARG001
            params: dict[str, str] | None = None,  # noqa: ARG001
        ) -> dict[str, Any]:
            raise _make_http_error(403)

        _patch_graph_api_get_json(monkeypatch, fake_get_json)

        with pytest.raises(HTTPError):
            list(connector._fetch_site_pages(_site_descriptor()))

    def test_successful_fetch_yields_pages(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """When the API succeeds, pages should be yielded normally."""
        connector = _setup_connector(monkeypatch)

        fake_page = {
            "id": "page-1",
            "title": "Hello World",
            "webUrl": f"{SITE_URL}/SitePages/Hello.aspx",
            "lastModifiedDateTime": "2025-06-01T00:00:00Z",
        }

        def fake_get_json(
            self: SharepointConnector,  # noqa: ARG001
            url: str,  # noqa: ARG001
            params: dict[str, str] | None = None,  # noqa: ARG001
        ) -> dict[str, Any]:
            return {"value": [fake_page]}

        _patch_graph_api_get_json(monkeypatch, fake_get_json)

        pages = list(connector._fetch_site_pages(_site_descriptor()))
        assert len(pages) == 1
        assert pages[0]["id"] == "page-1"

    def test_404_on_second_page_stops_pagination(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """If the first API page succeeds but a nextLink returns 404,
        already-yielded pages are kept and iteration stops cleanly."""
        connector = _setup_connector(monkeypatch)

        call_count = 0
        first_page = {
            "id": "page-1",
            "title": "First",
            "webUrl": f"{SITE_URL}/SitePages/First.aspx",
            "lastModifiedDateTime": "2025-06-01T00:00:00Z",
        }

        def fake_get_json(
            self: SharepointConnector,  # noqa: ARG001
            url: str,  # noqa: ARG001
            params: dict[str, str] | None = None,  # noqa: ARG001
        ) -> dict[str, Any]:
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                return {
                    "value": [first_page],
                    "@odata.nextLink": "https://graph.microsoft.com/next",
                }
            raise _make_http_error(404)

        _patch_graph_api_get_json(monkeypatch, fake_get_json)

        pages = list(connector._fetch_site_pages(_site_descriptor()))
        assert len(pages) == 1
        assert pages[0]["id"] == "page-1"


class TestFetchSitePages400Fallback:
    """When $expand=canvasLayout on the LIST endpoint returns 400
    invalidRequest, _fetch_site_pages should fall back to listing
    without expansion, then expanding each page individually."""

    GOOD_PAGE: dict[str, Any] = {
        "id": "good-1",
        "name": "Good.aspx",
        "title": "Good Page",
        "lastModifiedDateTime": "2025-06-01T00:00:00Z",
    }
    BAD_PAGE: dict[str, Any] = {
        "id": "bad-1",
        "name": "Bad.aspx",
        "title": "Bad Page",
        "lastModifiedDateTime": "2025-06-01T00:00:00Z",
    }
    GOOD_PAGE_EXPANDED: dict[str, Any] = {
        **GOOD_PAGE,
        "canvasLayout": {"horizontalSections": []},
    }

    def test_fallback_expands_good_pages_individually(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """On 400 from the LIST expand, the connector should list without
        expand, then GET each page individually with $expand=canvasLayout."""
        connector = _setup_connector(monkeypatch)
        good_page = self.GOOD_PAGE
        bad_page = self.BAD_PAGE
        good_page_expanded = self.GOOD_PAGE_EXPANDED

        def fake_get_json(
            self: SharepointConnector,  # noqa: ARG001
            url: str,
            params: dict[str, str] | None = None,
        ) -> dict[str, Any]:
            if url == SITE_PAGES_BASE and params == {"$expand": "canvasLayout"}:
                raise _make_http_error(
                    400, GRAPH_INVALID_REQUEST_CODE, "Invalid request"
                )
            if url == SITE_PAGES_BASE and params is None:
                return {"value": [good_page, bad_page]}
            expand_params = {"$expand": "canvasLayout"}
            if url == f"{PAGES_COLLECTION}/good-1/microsoft.graph.sitePage":
                assert params == expand_params, f"Expected $expand params, got {params}"
                return good_page_expanded
            if url == f"{PAGES_COLLECTION}/bad-1/microsoft.graph.sitePage":
                assert params == expand_params, f"Expected $expand params, got {params}"
                raise _make_http_error(
                    400, GRAPH_INVALID_REQUEST_CODE, "Invalid request"
                )
            raise AssertionError(f"Unexpected call: {url} {params}")

        _patch_graph_api_get_json(monkeypatch, fake_get_json)
        pages = list(connector._fetch_site_pages(_site_descriptor()))

        assert len(pages) == 2
        assert pages[0].get("canvasLayout") is not None
        assert pages[1].get("canvasLayout") is None
        assert pages[1]["id"] == "bad-1"

    def test_mid_pagination_400_does_not_duplicate(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """If the first paginated batch succeeds but a later nextLink
        returns 400, pages from the first batch must not be re-yielded
        by the fallback."""
        connector = _setup_connector(monkeypatch)
        good_page = self.GOOD_PAGE
        good_page_expanded = self.GOOD_PAGE_EXPANDED
        bad_page = self.BAD_PAGE
        second_page = {
            "id": "page-2",
            "name": "Second.aspx",
            "title": "Second Page",
            "lastModifiedDateTime": "2025-06-01T00:00:00Z",
        }
        next_link = "https://graph.microsoft.com/v1.0/next-page-link"

        def fake_get_json(
            self: SharepointConnector,  # noqa: ARG001
            url: str,
            params: dict[str, str] | None = None,
        ) -> dict[str, Any]:
            if url == SITE_PAGES_BASE and params == {"$expand": "canvasLayout"}:
                return {
                    "value": [good_page],
                    "@odata.nextLink": next_link,
                }
            if url == next_link:
                raise _make_http_error(
                    400, GRAPH_INVALID_REQUEST_CODE, "Invalid request"
                )
            if url == SITE_PAGES_BASE and params is None:
                return {"value": [good_page, bad_page, second_page]}
            expand_params = {"$expand": "canvasLayout"}
            if url == f"{PAGES_COLLECTION}/good-1/microsoft.graph.sitePage":
                assert params == expand_params, f"Expected $expand params, got {params}"
                return good_page_expanded
            if url == f"{PAGES_COLLECTION}/bad-1/microsoft.graph.sitePage":
                assert params == expand_params, f"Expected $expand params, got {params}"
                raise _make_http_error(
                    400, GRAPH_INVALID_REQUEST_CODE, "Invalid request"
                )
            if url == f"{PAGES_COLLECTION}/page-2/microsoft.graph.sitePage":
                assert params == expand_params, f"Expected $expand params, got {params}"
                return {**second_page, "canvasLayout": {"horizontalSections": []}}
            raise AssertionError(f"Unexpected call: {url} {params}")

        _patch_graph_api_get_json(monkeypatch, fake_get_json)
        pages = list(connector._fetch_site_pages(_site_descriptor()))

        ids = [p["id"] for p in pages]
        assert ids == ["good-1", "bad-1", "page-2"]

    def test_non_invalid_request_400_still_raises(
        self, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """A 400 with a different error code (not invalidRequest) should
        propagate, not trigger the fallback."""
        connector = _setup_connector(monkeypatch)

        def fake_get_json(
            self: SharepointConnector,  # noqa: ARG001
            url: str,  # noqa: ARG001
            params: dict[str, str] | None = None,  # noqa: ARG001
        ) -> dict[str, Any]:
            raise _make_http_error(400, "badRequest", "Something else went wrong")

        _patch_graph_api_get_json(monkeypatch, fake_get_json)

        with pytest.raises(HTTPError):
            list(connector._fetch_site_pages(_site_descriptor()))


================================================
FILE: backend/tests/unit/onyx/connectors/sharepoint/test_hierarchy_helpers.py
================================================
"""Unit tests for SharePoint connector hierarchy helper functions."""

from __future__ import annotations

from onyx.connectors.sharepoint.connector import SharepointConnector


def test_extract_folder_path_from_parent_reference_with_folder() -> None:
    """Test extracting folder path when file is in a folder."""
    connector = SharepointConnector()

    # Standard path format: /drives/{drive_id}/root:/folder/path
    path = "/drives/b!abc123def456/root:/Engineering/API"
    result = connector._extract_folder_path_from_parent_reference(path)
    assert result == "Engineering/API"


def test_extract_folder_path_from_parent_reference_nested_folder() -> None:
    """Test extracting folder path from deeply nested folders."""
    connector = SharepointConnector()

    path = "/drives/b!xyz789/root:/Documents/Project/2025/Q1"
    result = connector._extract_folder_path_from_parent_reference(path)
    assert result == "Documents/Project/2025/Q1"


def test_extract_folder_path_from_parent_reference_at_root() -> None:
    """Test extracting folder path when file is at drive root."""
    connector = SharepointConnector()

    # File at root: path ends with "root:" or "root:/"
    path = "/drives/b!abc123/root:"
    result = connector._extract_folder_path_from_parent_reference(path)
    assert result is None


def test_extract_folder_path_from_parent_reference_at_root_with_slash() -> None:
    """Test extracting folder path when file is at drive root (with trailing slash)."""
    connector = SharepointConnector()

    path = "/drives/b!abc123/root:/"
    result = connector._extract_folder_path_from_parent_reference(path)
    assert result is None


def test_extract_folder_path_from_parent_reference_none() -> None:
    """Test extracting folder path when path is None."""
    connector = SharepointConnector()

    result = connector._extract_folder_path_from_parent_reference(None)
    assert result is None


def test_extract_folder_path_from_parent_reference_empty() -> None:
    """Test extracting folder path when path is empty."""
    connector = SharepointConnector()

    result = connector._extract_folder_path_from_parent_reference("")
    assert result is None


def test_extract_folder_path_from_parent_reference_no_root() -> None:
    """Test extracting folder path when path doesn't contain root:/."""
    connector = SharepointConnector()

    # Unusual path format without root:/
    path = "/drives/b!abc123/items/folder"
    result = connector._extract_folder_path_from_parent_reference(path)
    assert result is None


def test_build_folder_url_simple() -> None:
    """Test building folder URL with simple folder path."""
    connector = SharepointConnector()

    site_url = "https://company.sharepoint.com/sites/eng"
    drive_name = "Shared Documents"
    folder_path = "Engineering"

    result = connector._build_folder_url(site_url, drive_name, folder_path)
    expected = "https://company.sharepoint.com/sites/eng/Shared Documents/Engineering"
    assert result == expected


def test_build_folder_url_nested() -> None:
    """Test building folder URL with nested folder path."""
    connector = SharepointConnector()

    site_url = "https://company.sharepoint.com/sites/eng"
    drive_name = "Shared Documents"
    folder_path = "Engineering/API/v2"

    result = connector._build_folder_url(site_url, drive_name, folder_path)
    expected = (
        "https://company.sharepoint.com/sites/eng/Shared Documents/Engineering/API/v2"
    )
    assert result == expected


def test_build_folder_url_with_spaces() -> None:
    """Test building folder URL with spaces in folder path."""
    connector = SharepointConnector()

    site_url = "https://company.sharepoint.com/sites/eng"
    drive_name = "Shared Documents"
    folder_path = "Engineering/API Docs/Version 2"

    result = connector._build_folder_url(site_url, drive_name, folder_path)
    expected = "https://company.sharepoint.com/sites/eng/Shared Documents/Engineering/API Docs/Version 2"
    assert result == expected


================================================
FILE: backend/tests/unit/onyx/connectors/sharepoint/test_rest_client_context_caching.py
================================================
"""Unit tests for SharepointConnector._create_rest_client_context caching."""

from __future__ import annotations

from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.connectors.sharepoint.connector import _REST_CTX_MAX_AGE_S
from onyx.connectors.sharepoint.connector import SharepointConnector

SITE_A = "https://tenant.sharepoint.com/sites/SiteA"
SITE_B = "https://tenant.sharepoint.com/sites/SiteB"

FAKE_CREDS = {"sp_client_id": "x", "sp_directory_id": "y"}


def _make_connector() -> SharepointConnector:
    """Return a SharepointConnector with minimal credentials wired up."""
    connector = SharepointConnector(sites=[SITE_A])
    connector.msal_app = MagicMock()
    connector.sp_tenant_domain = "tenant"
    connector._credential_json = FAKE_CREDS
    return connector


def _noop_load_credentials(connector: SharepointConnector) -> MagicMock:
    """Patch load_credentials to just swap in a fresh MagicMock for msal_app."""

    def _fake_load(creds: dict) -> None:  # noqa: ARG001, ARG002
        connector.msal_app = MagicMock()

    mock = MagicMock(side_effect=_fake_load)
    connector.load_credentials = mock  # type: ignore[method-assign]
    return mock


def _fresh_client_context() -> MagicMock:
    """Return a MagicMock for ClientContext that produces a distinct object per call."""
    mock_cls = MagicMock()
    # Each ClientContext(url).with_access_token(cb) returns a unique sentinel
    mock_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005
    return mock_cls


@patch("onyx.connectors.sharepoint.connector.acquire_token_for_rest")
@patch("onyx.connectors.sharepoint.connector.ClientContext")
def test_returns_cached_context_within_max_age(
    mock_client_ctx_cls: MagicMock,
    _mock_acquire: MagicMock,
) -> None:
    """Repeated calls with the same site_url within the TTL return the same object."""
    mock_client_ctx_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005
    connector = _make_connector()
    _noop_load_credentials(connector)

    ctx1 = connector._create_rest_client_context(SITE_A)
    ctx2 = connector._create_rest_client_context(SITE_A)

    assert ctx1 is ctx2
    assert mock_client_ctx_cls.call_count == 1


@patch("onyx.connectors.sharepoint.connector.time")
@patch("onyx.connectors.sharepoint.connector.acquire_token_for_rest")
@patch("onyx.connectors.sharepoint.connector.ClientContext")
def test_rebuilds_context_after_max_age(
    mock_client_ctx_cls: MagicMock,
    _mock_acquire: MagicMock,
    mock_time: MagicMock,
) -> None:
    """After _REST_CTX_MAX_AGE_S the cached context is replaced."""
    mock_client_ctx_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005
    connector = _make_connector()
    _noop_load_credentials(connector)

    mock_time.monotonic.return_value = 0.0
    ctx1 = connector._create_rest_client_context(SITE_A)

    # Just past the boundary — should rebuild
    mock_time.monotonic.return_value = _REST_CTX_MAX_AGE_S + 1
    ctx2 = connector._create_rest_client_context(SITE_A)

    assert ctx1 is not ctx2
    assert mock_client_ctx_cls.call_count == 2


@patch("onyx.connectors.sharepoint.connector.acquire_token_for_rest")
@patch("onyx.connectors.sharepoint.connector.ClientContext")
def test_rebuilds_context_on_site_change(
    mock_client_ctx_cls: MagicMock,
    _mock_acquire: MagicMock,
) -> None:
    """Switching to a different site_url forces a new context."""
    mock_client_ctx_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005
    connector = _make_connector()
    _noop_load_credentials(connector)

    ctx_a = connector._create_rest_client_context(SITE_A)
    ctx_b = connector._create_rest_client_context(SITE_B)

    assert ctx_a is not ctx_b
    assert mock_client_ctx_cls.call_count == 2


@patch("onyx.connectors.sharepoint.connector.time")
@patch("onyx.connectors.sharepoint.connector.acquire_token_for_rest")
@patch("onyx.connectors.sharepoint.connector.ClientContext")
def test_load_credentials_called_on_rebuild(
    _mock_client_ctx_cls: MagicMock,
    _mock_acquire: MagicMock,
    mock_time: MagicMock,
) -> None:
    """load_credentials is called every time the context is rebuilt."""
    _mock_client_ctx_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005
    connector = _make_connector()
    mock_load = _noop_load_credentials(connector)

    # First call — rebuild (no cache yet)
    mock_time.monotonic.return_value = 0.0
    connector._create_rest_client_context(SITE_A)
    assert mock_load.call_count == 1

    # Second call — cache hit, no rebuild
    mock_time.monotonic.return_value = 100.0
    connector._create_rest_client_context(SITE_A)
    assert mock_load.call_count == 1

    # Third call — expired, rebuild
    mock_time.monotonic.return_value = _REST_CTX_MAX_AGE_S + 1
    connector._create_rest_client_context(SITE_A)
    assert mock_load.call_count == 2

    # Fourth call — site change, rebuild
    mock_time.monotonic.return_value = _REST_CTX_MAX_AGE_S + 2
    connector._create_rest_client_context(SITE_B)
    assert mock_load.call_count == 3


================================================
FILE: backend/tests/unit/onyx/connectors/sharepoint/test_url_parsing.py
================================================
from __future__ import annotations

from onyx.connectors.sharepoint.connector import SharepointConnector


def test_extract_site_and_drive_info_from_share_link() -> None:
    url = "https://tenant.sharepoint.com/:f:/r/sites/SampleSite/Shared%20Documents/Sample%20Folder"

    site_descriptors = SharepointConnector._extract_site_and_drive_info([url])

    assert len(site_descriptors) == 1
    descriptor = site_descriptors[0]
    assert descriptor.url == "https://tenant.sharepoint.com/sites/SampleSite"
    assert descriptor.drive_name == "Shared Documents"
    assert descriptor.folder_path == "Sample Folder"


def test_extract_site_and_drive_info_standard_url() -> None:
    url = (
        "https://tenant.sharepoint.com/sites/SampleSite/Shared%20Documents/Nested/Path"
    )

    site_descriptors = SharepointConnector._extract_site_and_drive_info([url])

    assert len(site_descriptors) == 1
    descriptor = site_descriptors[0]
    assert descriptor.url == "https://tenant.sharepoint.com/sites/SampleSite"
    assert descriptor.drive_name == "Shared Documents"
    assert descriptor.folder_path == "Nested/Path"


================================================
FILE: backend/tests/unit/onyx/connectors/slab/test_slab_validation.py
================================================
from unittest.mock import patch

import pytest

from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.slab.connector import SlabConnector


def _build_connector(base_url: str = "https://myteam.slab.com") -> SlabConnector:
    connector = SlabConnector(base_url=base_url)
    connector.load_credentials({"slab_bot_token": "fake-token"})
    return connector


def test_validate_rejects_missing_scheme() -> None:
    connector = _build_connector(base_url="myteam.slab.com")
    with pytest.raises(ConnectorValidationError, match="https://"):
        connector.validate_connector_settings()


@patch("onyx.connectors.slab.connector.get_all_post_ids", return_value=["id1"])
def test_validate_success(mock_get_posts: object) -> None:  # noqa: ARG001
    connector = _build_connector()
    connector.validate_connector_settings()


@patch(
    "onyx.connectors.slab.connector.get_all_post_ids",
    side_effect=Exception("401 Unauthorized"),
)
def test_validate_bad_token_raises(
    mock_get_posts: object,  # noqa: ARG001
) -> None:  # noqa: ARG001
    connector = _build_connector()
    with pytest.raises(ConnectorValidationError, match="Failed to fetch posts"):
        connector.validate_connector_settings()


================================================
FILE: backend/tests/unit/onyx/connectors/slack/test_message_filtering.py
================================================
import pytest

from onyx.connectors.slack.connector import _bot_inclusive_msg_filter
from onyx.connectors.slack.connector import default_msg_filter
from onyx.connectors.slack.connector import SlackConnector
from onyx.connectors.slack.connector import SlackMessageFilterReason
from onyx.connectors.slack.models import MessageType


# -- default_msg_filter tests --


@pytest.mark.parametrize(
    "message,expected_reason",
    [
        # Regular user message: not filtered
        (
            {"text": "hello", "user": "U123", "ts": "1.0"},
            None,
        ),
        # Bot message with bot_id: filtered as BOT
        (
            {"text": "automated update", "bot_id": "B123", "ts": "1.0"},
            SlackMessageFilterReason.BOT,
        ),
        # App message with app_id: filtered as BOT
        (
            {"text": "app notification", "app_id": "A123", "ts": "1.0"},
            SlackMessageFilterReason.BOT,
        ),
        # Bot message with both bot_id and app_id: filtered as BOT
        (
            {"text": "bot+app", "bot_id": "B1", "app_id": "A1", "ts": "1.0"},
            SlackMessageFilterReason.BOT,
        ),
        # DanswerBot Testing is explicitly allowed through
        (
            {
                "text": "danswer test",
                "bot_id": "B999",
                "bot_profile": {"name": "DanswerBot Testing"},
                "ts": "1.0",
            },
            None,
        ),
        # channel_join subtype: filtered as DISALLOWED
        (
            {"text": "joined", "subtype": "channel_join", "ts": "1.0"},
            SlackMessageFilterReason.DISALLOWED,
        ),
        # channel_leave subtype: filtered as DISALLOWED
        (
            {"text": "left", "subtype": "channel_leave", "ts": "1.0"},
            SlackMessageFilterReason.DISALLOWED,
        ),
        # pinned_item subtype: filtered as DISALLOWED
        (
            {"text": "pinned", "subtype": "pinned_item", "ts": "1.0"},
            SlackMessageFilterReason.DISALLOWED,
        ),
        # Empty subtype: not filtered
        (
            {"text": "normal", "subtype": "", "ts": "1.0"},
            None,
        ),
    ],
    ids=[
        "regular_user_message",
        "bot_id_message",
        "app_id_message",
        "bot_and_app_id",
        "danswerbot_testing_allowed",
        "channel_join",
        "channel_leave",
        "pinned_item",
        "empty_subtype",
    ],
)
def test_default_msg_filter(
    message: MessageType,
    expected_reason: SlackMessageFilterReason | None,
) -> None:
    assert default_msg_filter(message) == expected_reason


# -- _bot_inclusive_msg_filter tests --


@pytest.mark.parametrize(
    "message,expected_reason",
    [
        # Regular user message: not filtered
        (
            {"text": "hello", "user": "U123", "ts": "1.0"},
            None,
        ),
        # Bot message: NOT filtered (this is the whole point)
        (
            {"text": "automated update", "bot_id": "B123", "ts": "1.0"},
            None,
        ),
        # App message: NOT filtered
        (
            {"text": "app notification", "app_id": "A123", "ts": "1.0"},
            None,
        ),
        # channel_join subtype: still filtered as DISALLOWED
        (
            {"text": "joined", "subtype": "channel_join", "ts": "1.0"},
            SlackMessageFilterReason.DISALLOWED,
        ),
        # channel_leave subtype: still filtered as DISALLOWED
        (
            {"text": "left", "subtype": "channel_leave", "ts": "1.0"},
            SlackMessageFilterReason.DISALLOWED,
        ),
    ],
    ids=[
        "regular_user_message",
        "bot_message_allowed",
        "app_message_allowed",
        "channel_join_still_filtered",
        "channel_leave_still_filtered",
    ],
)
def test_bot_inclusive_msg_filter(
    message: MessageType,
    expected_reason: SlackMessageFilterReason | None,
) -> None:
    assert _bot_inclusive_msg_filter(message) == expected_reason


# -- SlackConnector config tests --


def test_default_filter_when_include_bot_messages_false() -> None:
    """When include_bot_messages is False (default), the default filter is used."""
    connector = SlackConnector(use_redis=False)
    assert connector.msg_filter_func is default_msg_filter


def test_bot_inclusive_filter_when_include_bot_messages_true() -> None:
    """When include_bot_messages is True, the bot-inclusive filter is used."""
    connector = SlackConnector(include_bot_messages=True, use_redis=False)
    assert connector.msg_filter_func is _bot_inclusive_msg_filter


def test_include_bot_messages_defaults_to_false() -> None:
    """The include_bot_messages config defaults to False for backward compatibility."""
    connector = SlackConnector(use_redis=False)
    assert connector.include_bot_messages is False


================================================
FILE: backend/tests/unit/onyx/connectors/teams/test_collect_teams.py
================================================
"""Test the OData filtering for MS Teams with special character handling."""

from unittest.mock import MagicMock

from onyx.connectors.teams.connector import _collect_all_teams


def test_special_characters_in_team_names() -> None:
    """Test that team names with special characters use client-side filtering."""
    mock_graph_client = MagicMock()

    # Mock team with special characters
    mock_team = MagicMock()
    mock_team.id = "test-id"
    mock_team.display_name = "Research & Development (R&D) Team"
    mock_team.properties = {}

    # Mock successful responses for client-side filtering
    mock_team_collection = MagicMock()
    mock_team_collection.has_next = False
    mock_team_collection.__iter__ = lambda self: iter([mock_team])  # noqa: ARG005

    mock_get_query = MagicMock()
    mock_top_query = MagicMock()
    mock_top_query.execute_query.return_value = mock_team_collection
    mock_get_query.top.return_value = mock_top_query
    mock_graph_client.teams.get = MagicMock(return_value=mock_get_query)

    # Test with team name containing special characters (has &, parentheses)
    # This should use client-side filtering (get().top()) instead of OData filtering
    result = _collect_all_teams(
        mock_graph_client, ["Research & Development (R&D) Team"]
    )

    # Verify that get().top() was called for client-side filtering
    mock_graph_client.teams.get.assert_called()
    mock_get_query.top.assert_called_with(50)

    # Verify the team was found through client-side filtering
    assert len(result) == 1
    assert result[0].display_name == "Research & Development (R&D) Team"


def test_single_quote_escaping() -> None:
    """Test that team names with single quotes use OData filtering with proper escaping."""
    mock_graph_client = MagicMock()

    # Mock successful responses
    mock_team_collection = MagicMock()
    mock_team_collection.has_next = False
    mock_team_collection.__iter__ = lambda self: iter([])  # noqa: ARG005

    mock_get_query = MagicMock()
    mock_filter_query = MagicMock()
    mock_filter_query.before_execute = MagicMock(return_value=mock_filter_query)
    mock_filter_query.execute_query.return_value = mock_team_collection
    mock_get_query.filter.return_value = mock_filter_query
    mock_graph_client.teams.get = MagicMock(return_value=mock_get_query)

    # Test with a team name containing a single quote (no &, (, ) so uses OData)
    _collect_all_teams(mock_graph_client, ["Team's Group"])

    # Verify OData filter was used (since no special characters)
    mock_graph_client.teams.get.assert_called()
    mock_get_query.filter.assert_called_once()

    # Verify the filter: single quote should be escaped to '' for OData syntax
    filter_arg = mock_get_query.filter.call_args[0][0]
    expected_filter = "displayName eq 'Team''s Group'"
    assert (
        filter_arg == expected_filter
    ), f"Expected: {expected_filter}, Got: {filter_arg}"


def test_helper_functions() -> None:
    """Test the helper functions for team name processing."""
    from onyx.connectors.teams.connector import (
        _escape_odata_string,
        _has_odata_incompatible_chars,
        _can_use_odata_filter,
    )

    # Test OData string escaping
    assert _escape_odata_string("Team's Group") == "Team''s Group"
    assert _escape_odata_string("Normal Team") == "Normal Team"

    # Test special character detection
    assert _has_odata_incompatible_chars(["R&D Team"])
    assert _has_odata_incompatible_chars(["Team (Alpha)"])
    assert not _has_odata_incompatible_chars(["Normal Team"])
    assert not _has_odata_incompatible_chars([])
    assert not _has_odata_incompatible_chars(None)

    # Test filtering strategy determination
    can_use, safe, problematic = _can_use_odata_filter(["Normal Team", "R&D Team"])
    assert can_use
    assert "Normal Team" in safe
    assert "R&D Team" in problematic


================================================
FILE: backend/tests/unit/onyx/connectors/test_connector_factory.py
================================================
"""
Unit tests for lazy loading connector factory to validate:
1. All connector mappings are correct
2. Module paths and class names are valid
3. Error handling works properly
4. Caching functions correctly
"""

import importlib
from unittest.mock import MagicMock
from unittest.mock import Mock
from unittest.mock import patch

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.factory import _connector_cache
from onyx.connectors.factory import _load_connector_class
from onyx.connectors.factory import ConnectorMissingException
from onyx.connectors.factory import identify_connector_class
from onyx.connectors.factory import instantiate_connector
from onyx.connectors.interfaces import BaseConnector
from onyx.connectors.models import InputType
from onyx.connectors.registry import CONNECTOR_CLASS_MAP
from onyx.connectors.registry import ConnectorMapping


class TestConnectorMappingValidation:
    """Test that all connector mappings are valid."""

    def test_all_connector_mappings_exist(self) -> None:
        """Test that all mapped modules and classes actually exist."""
        errors = []

        for source, mapping in CONNECTOR_CLASS_MAP.items():
            try:
                # Try to import the module
                module = importlib.import_module(mapping.module_path)

                # Try to get the class
                connector_class = getattr(module, mapping.class_name)

                # Verify it's a subclass of BaseConnector
                if not issubclass(connector_class, BaseConnector):
                    errors.append(
                        f"{source.value}: {mapping.class_name} is not a BaseConnector subclass"
                    )

            except ImportError as e:
                errors.append(
                    f"{source.value}: Failed to import {mapping.module_path} - {e}"
                )
            except AttributeError as e:
                errors.append(
                    f"{source.value}: Class {mapping.class_name} not found in {mapping.module_path} - {e}"
                )

        if errors:
            pytest.fail("Connector mapping validation failed:\n" + "\n".join(errors))

    def test_no_duplicate_mappings(self) -> None:
        """Test that each DocumentSource only appears once in the mapping."""
        sources = list(CONNECTOR_CLASS_MAP.keys())
        unique_sources = set(sources)

        assert len(sources) == len(
            unique_sources
        ), "Duplicate DocumentSource entries found"

    def test_blob_storage_connectors_correct(self) -> None:
        """Test that all blob storage sources map to the same connector."""
        blob_sources = [
            DocumentSource.S3,
            DocumentSource.R2,
            DocumentSource.GOOGLE_CLOUD_STORAGE,
            DocumentSource.OCI_STORAGE,
        ]

        expected_mapping = ConnectorMapping(
            module_path="onyx.connectors.blob.connector",
            class_name="BlobStorageConnector",
        )

        for source in blob_sources:
            assert (
                CONNECTOR_CLASS_MAP[source] == expected_mapping
            ), f"{source.value} should map to BlobStorageConnector"


class TestConnectorClassLoading:
    """Test the lazy loading mechanism."""

    def setup_method(self) -> None:
        """Clear cache before each test."""
        _connector_cache.clear()

    def test_load_connector_class_success(self) -> None:
        """Test successful connector class loading."""
        # Use a simple connector that should always exist
        connector_class = _load_connector_class(DocumentSource.WEB)

        assert connector_class is not None
        assert issubclass(connector_class, BaseConnector)
        assert connector_class.__name__ == "WebConnector"

    def test_load_connector_class_caching(self) -> None:
        """Test that connector classes are cached after first load."""
        assert len(_connector_cache) == 0

        # Load connector first time
        connector_class1 = _load_connector_class(DocumentSource.WEB)
        assert len(_connector_cache) == 1
        assert DocumentSource.WEB in _connector_cache

        # Load same connector second time - should use cache
        connector_class2 = _load_connector_class(DocumentSource.WEB)
        assert connector_class1 is connector_class2  # Same object reference
        assert len(_connector_cache) == 1  # Cache size unchanged

    @patch("importlib.import_module")
    def test_load_connector_class_import_error(self, mock_import: Mock) -> None:
        """Test handling of import errors."""
        mock_import.side_effect = ImportError("Module not found")

        with pytest.raises(ConnectorMissingException) as exc_info:
            _load_connector_class(DocumentSource.WEB)

        assert (
            "Failed to import WebConnector from onyx.connectors.web.connector"
            in str(exc_info.value)
        )

    @patch("importlib.import_module")
    def test_load_connector_class_attribute_error(self, mock_import: Mock) -> None:
        """Test handling of missing class in module."""

        # Create a custom mock that raises AttributeError for the specific class
        class MockModule:
            def __getattr__(self, name: str) -> MagicMock:
                if name == "WebConnector":
                    raise AttributeError("Class not found")
                return MagicMock()

        mock_import.return_value = MockModule()

        with pytest.raises(ConnectorMissingException) as exc_info:
            _load_connector_class(DocumentSource.WEB)

        assert (
            "Failed to import WebConnector from onyx.connectors.web.connector"
            in str(exc_info.value)
        )


class TestIdentifyConnectorClass:
    """Test the identify_connector_class function."""

    def setup_method(self) -> None:
        """Clear cache before each test."""
        _connector_cache.clear()

    def test_identify_connector_basic(self) -> None:
        """Test basic connector identification."""
        connector_class = identify_connector_class(
            DocumentSource.GITHUB, InputType.SLIM_RETRIEVAL
        )

        assert connector_class is not None
        assert issubclass(connector_class, BaseConnector)
        assert connector_class.__name__ == "GithubConnector"

    def test_identify_connector_slack_special_case(self) -> None:
        """Test Slack connector special handling."""
        # Test POLL input type
        slack_poll = identify_connector_class(DocumentSource.SLACK, InputType.POLL)
        assert slack_poll.__name__ == "SlackConnector"

        # Test SLIM_RETRIEVAL input type
        slack_slim = identify_connector_class(
            DocumentSource.SLACK, InputType.SLIM_RETRIEVAL
        )
        assert slack_slim.__name__ == "SlackConnector"

        # Should be the same class
        assert slack_poll is slack_slim

    def test_identify_connector_without_input_type(self) -> None:
        """Test connector identification without specifying input type."""
        connector_class = identify_connector_class(DocumentSource.GITHUB)

        assert connector_class is not None
        assert connector_class.__name__ == "GithubConnector"


class TestConnectorMappingIntegrity:
    """Test integrity of the connector mapping data."""

    def test_all_document_sources_mapped(self) -> None:
        """Test that all DocumentSource values have mappings (where expected)."""
        # Get all DocumentSource enum values
        all_sources = set(DocumentSource)
        mapped_sources = set(CONNECTOR_CLASS_MAP.keys())

        expected_unmapped = {
            DocumentSource.INGESTION_API,  # This is handled differently
            DocumentSource.REQUESTTRACKER,  # Not yet implemented or special case
            DocumentSource.NOT_APPLICABLE,  # Special placeholder, no connector needed
            DocumentSource.USER_FILE,  # Special placeholder, no connector needed
            DocumentSource.CRAFT_FILE,  # Direct S3 upload via API, no connector needed
            # Add other legitimately unmapped sources here if they exist
        }

        unmapped_sources = all_sources - mapped_sources - expected_unmapped

        if unmapped_sources:
            pytest.fail(
                f"DocumentSource values without connector mappings: {[s.value for s in unmapped_sources]}"
            )

    def test_mapping_format_consistency(self) -> None:
        """Test that all mappings follow the expected format."""
        for source, mapping in CONNECTOR_CLASS_MAP.items():
            assert isinstance(
                mapping, ConnectorMapping
            ), f"{source.value} mapping is not a ConnectorMapping"

            assert isinstance(
                mapping.module_path, str
            ), f"{source.value} module_path is not a string"
            assert isinstance(
                mapping.class_name, str
            ), f"{source.value} class_name is not a string"
            assert mapping.module_path.startswith(
                "onyx.connectors."
            ), f"{source.value} module_path doesn't start with onyx.connectors."
            assert mapping.class_name.endswith(
                "Connector"
            ), f"{source.value} class_name doesn't end with Connector"


class TestInstantiateConnectorIntegration:
    """Test that the lazy loading works with the main instantiate_connector function."""

    def setup_method(self) -> None:
        """Clear cache before each test."""
        _connector_cache.clear()

    def test_instantiate_connector_loads_class_lazily(self) -> None:
        """Test that instantiate_connector triggers lazy loading."""
        from onyx.utils.sensitive import make_mock_sensitive_value

        # Mock the database session and credential
        mock_session = MagicMock()
        mock_credential = MagicMock()
        mock_credential.id = 123
        mock_credential.credential_json = make_mock_sensitive_value({"test": "data"})

        # This should trigger lazy loading but will fail on actual instantiation
        # due to missing real configuration - that's expected
        with pytest.raises(Exception):  # We expect some kind of error due to mock data
            instantiate_connector(
                mock_session,
                DocumentSource.WEB,  # Simple connector
                InputType.SLIM_RETRIEVAL,
                {},  # Empty config
                mock_credential,
            )

        # But the class should have been loaded into cache
        assert DocumentSource.WEB in _connector_cache
        assert _connector_cache[DocumentSource.WEB].__name__ == "WebConnector"


================================================
FILE: backend/tests/unit/onyx/connectors/test_document_metadata_coercion.py
================================================
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentBase
from onyx.connectors.models import TextSection


def _minimal_doc_kwargs(metadata: dict) -> dict:
    return {
        "id": "test-doc",
        "sections": [TextSection(text="hello", link="http://example.com")],
        "source": DocumentSource.NOT_APPLICABLE,
        "semantic_identifier": "Test Doc",
        "metadata": metadata,
    }


def test_int_values_coerced_to_str() -> None:
    doc = Document(**_minimal_doc_kwargs({"count": 42}))
    assert doc.metadata == {"count": "42"}


def test_float_values_coerced_to_str() -> None:
    doc = Document(**_minimal_doc_kwargs({"score": 3.14}))
    assert doc.metadata == {"score": "3.14"}


def test_bool_values_coerced_to_str() -> None:
    doc = Document(**_minimal_doc_kwargs({"active": True}))
    assert doc.metadata == {"active": "True"}


def test_list_of_ints_coerced_to_list_of_str() -> None:
    doc = Document(**_minimal_doc_kwargs({"ids": [1, 2, 3]}))
    assert doc.metadata == {"ids": ["1", "2", "3"]}


def test_list_of_mixed_types_coerced_to_list_of_str() -> None:
    doc = Document(**_minimal_doc_kwargs({"tags": ["a", 1, True, 2.5]}))
    assert doc.metadata == {"tags": ["a", "1", "True", "2.5"]}


def test_list_of_dicts_coerced_to_list_of_str() -> None:
    raw = {"nested": [{"key": "val"}, {"key2": "val2"}]}
    doc = Document(**_minimal_doc_kwargs(raw))
    assert doc.metadata == {"nested": ["{'key': 'val'}", "{'key2': 'val2'}"]}


def test_dict_value_coerced_to_str() -> None:
    raw = {"info": {"inner_key": "inner_val"}}
    doc = Document(**_minimal_doc_kwargs(raw))
    assert doc.metadata == {"info": "{'inner_key': 'inner_val'}"}


def test_none_value_coerced_to_str() -> None:
    doc = Document(**_minimal_doc_kwargs({"empty": None}))
    assert doc.metadata == {"empty": "None"}


def test_already_valid_str_values_unchanged() -> None:
    doc = Document(**_minimal_doc_kwargs({"key": "value"}))
    assert doc.metadata == {"key": "value"}


def test_already_valid_list_of_str_unchanged() -> None:
    doc = Document(**_minimal_doc_kwargs({"tags": ["a", "b", "c"]}))
    assert doc.metadata == {"tags": ["a", "b", "c"]}


def test_empty_metadata_unchanged() -> None:
    doc = Document(**_minimal_doc_kwargs({}))
    assert doc.metadata == {}


def test_mixed_metadata_values() -> None:
    raw = {
        "str_val": "hello",
        "int_val": 99,
        "list_val": [1, "two", 3.0],
        "dict_val": {"nested": True},
    }
    doc = Document(**_minimal_doc_kwargs(raw))
    assert doc.metadata == {
        "str_val": "hello",
        "int_val": "99",
        "list_val": ["1", "two", "3.0"],
        "dict_val": "{'nested': True}",
    }


def test_coercion_works_on_base_class() -> None:
    kwargs = _minimal_doc_kwargs({"count": 42})
    kwargs.pop("source")
    kwargs.pop("id")
    doc = DocumentBase(**kwargs)
    assert doc.metadata == {"count": "42"}


================================================
FILE: backend/tests/unit/onyx/connectors/test_microsoft_graph_env.py
================================================
import pytest
from office365.graph_client import AzureEnvironment  # type: ignore[import-untyped]

from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.microsoft_graph_env import resolve_microsoft_environment


def test_resolve_global_defaults() -> None:
    env = resolve_microsoft_environment(
        "https://graph.microsoft.com", "https://login.microsoftonline.com"
    )
    assert env.environment == AzureEnvironment.Global
    assert env.sharepoint_domain_suffix == "sharepoint.com"


def test_resolve_gcc_high() -> None:
    env = resolve_microsoft_environment(
        "https://graph.microsoft.us", "https://login.microsoftonline.us"
    )
    assert env.environment == AzureEnvironment.USGovernmentHigh
    assert env.graph_host == "https://graph.microsoft.us"
    assert env.authority_host == "https://login.microsoftonline.us"
    assert env.sharepoint_domain_suffix == "sharepoint.us"


def test_resolve_dod() -> None:
    env = resolve_microsoft_environment(
        "https://dod-graph.microsoft.us", "https://login.microsoftonline.us"
    )
    assert env.environment == AzureEnvironment.USGovernmentDoD
    assert env.sharepoint_domain_suffix == "sharepoint.us"


def test_trailing_slashes_are_stripped() -> None:
    env = resolve_microsoft_environment(
        "https://graph.microsoft.us/", "https://login.microsoftonline.us/"
    )
    assert env.environment == AzureEnvironment.USGovernmentHigh


def test_mismatched_authority_raises() -> None:
    with pytest.raises(ConnectorValidationError, match="inconsistent"):
        resolve_microsoft_environment(
            "https://graph.microsoft.us", "https://login.microsoftonline.com"
        )


def test_unknown_graph_host_raises() -> None:
    with pytest.raises(ConnectorValidationError, match="Unsupported"):
        resolve_microsoft_environment(
            "https://graph.example.com", "https://login.example.com"
        )


================================================
FILE: backend/tests/unit/onyx/connectors/utils.py
================================================
from typing import cast
from typing import Generic
from typing import TypeVar

from pydantic import BaseModel

from onyx.connectors.connector_runner import CheckpointOutputWrapper
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document

_ITERATION_LIMIT = 100_000


CT = TypeVar("CT", bound=ConnectorCheckpoint)


class SingleConnectorCallOutput(BaseModel, Generic[CT]):
    items: list[Document | ConnectorFailure]
    next_checkpoint: CT


def load_everything_from_checkpoint_connector(
    connector: CheckpointedConnector[CT],
    start: SecondsSinceUnixEpoch,
    end: SecondsSinceUnixEpoch,
) -> list[SingleConnectorCallOutput[CT]]:

    checkpoint = cast(CT, connector.build_dummy_checkpoint())
    return load_everything_from_checkpoint_connector_from_checkpoint(
        connector, start, end, checkpoint
    )


def load_everything_from_checkpoint_connector_from_checkpoint(
    connector: CheckpointedConnector[CT],
    start: SecondsSinceUnixEpoch,
    end: SecondsSinceUnixEpoch,
    checkpoint: CT,
) -> list[SingleConnectorCallOutput[CT]]:
    num_iterations = 0
    outputs: list[SingleConnectorCallOutput[CT]] = []
    while checkpoint.has_more:
        items: list[Document | ConnectorFailure] = []
        doc_batch_generator = CheckpointOutputWrapper[CT]()(
            connector.load_from_checkpoint(start, end, checkpoint)
        )
        for document, hierarchy_node, failure, next_checkpoint in doc_batch_generator:
            if hierarchy_node is not None:
                continue
            if failure is not None:
                items.append(failure)
            if document is not None:
                items.append(document)
            if next_checkpoint is not None:
                checkpoint = next_checkpoint

        outputs.append(
            SingleConnectorCallOutput(items=items, next_checkpoint=checkpoint)
        )

        num_iterations += 1
        if num_iterations > _ITERATION_LIMIT:
            raise RuntimeError("Too many iterations. Infinite loop?")

    return outputs


================================================
FILE: backend/tests/unit/onyx/connectors/zendesk/test_zendesk_checkpointing.py
================================================
import time
from collections.abc import Callable
from collections.abc import Generator
from typing import Any
from typing import cast
from unittest.mock import call
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from requests.exceptions import HTTPError

from onyx.configs.constants import DocumentSource
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.models import Document
from onyx.connectors.zendesk.connector import ZendeskClient
from onyx.connectors.zendesk.connector import ZendeskConnector
from tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector


@pytest.fixture
def mock_zendesk_client() -> MagicMock:
    """Create a mock Zendesk client"""
    mock = MagicMock(spec=ZendeskClient)
    mock.base_url = "https://test.zendesk.com/api/v2"
    mock.auth = ("test@example.com/token", "test_token")
    mock.make_request = MagicMock()
    return mock


@pytest.fixture
def zendesk_connector(
    mock_zendesk_client: MagicMock,
) -> Generator[ZendeskConnector, None, None]:
    """Create a Zendesk connector with mocked client"""
    connector = ZendeskConnector(content_type="articles")
    connector.client = mock_zendesk_client
    yield connector


@pytest.fixture
def unmocked_zendesk_connector() -> Generator[ZendeskConnector, None, None]:
    """Create a Zendesk connector with unmocked client"""
    zendesk_connector = ZendeskConnector(content_type="articles")
    zendesk_connector.client = ZendeskClient(
        "test", "test@example.com/token", "test_token"
    )
    yield zendesk_connector


@pytest.fixture
def create_mock_article() -> Callable[..., dict[str, Any]]:
    def _create_mock_article(
        id: int = 1,
        title: str = "Test Article",
        body: str = "Test Content",
        updated_at: str = "2023-01-01T12:00:00Z",
        author_id: str = "123",
        label_names: list[str] | None = None,
        draft: bool = False,
    ) -> dict[str, Any]:
        """Helper to create a mock article"""
        return {
            "id": id,
            "title": title,
            "body": body,
            "updated_at": updated_at,
            "author_id": author_id,
            "label_names": label_names or [],
            "draft": draft,
            "html_url": f"https://test.zendesk.com/hc/en-us/articles/{id}",
        }

    return _create_mock_article


@pytest.fixture
def create_mock_ticket() -> Callable[..., dict[str, Any]]:
    def _create_mock_ticket(
        id: int = 1,
        subject: str = "Test Ticket",
        description: str = "Test Description",
        updated_at: str = "2023-01-01T12:00:00Z",
        submitter_id: str = "123",
        status: str = "open",
        priority: str = "normal",
        tags: list[str] | None = None,
        ticket_type: str = "question",
    ) -> dict[str, Any]:
        """Helper to create a mock ticket"""
        return {
            "id": id,
            "subject": subject,
            "description": description,
            "updated_at": updated_at,
            "submitter": submitter_id,
            "status": status,
            "priority": priority,
            "tags": tags or [],
            "type": ticket_type,
            "url": f"https://test.zendesk.com/agent/tickets/{id}",
        }

    return _create_mock_ticket


@pytest.fixture
def create_mock_author() -> Callable[..., dict[str, Any]]:
    def _create_mock_author(
        id: str = "123",
        name: str = "Test User",
        email: str = "test@example.com",
    ) -> dict[str, Any]:
        """Helper to create a mock author"""
        return {
            "user": {
                "id": id,
                "name": name,
                "email": email,
            }
        }

    return _create_mock_author


def test_load_from_checkpoint_articles_happy_path(
    zendesk_connector: ZendeskConnector,
    mock_zendesk_client: MagicMock,
    create_mock_article: Callable[..., dict[str, Any]],
    create_mock_author: Callable[..., dict[str, Any]],
) -> None:
    """Test loading articles from checkpoint - happy path"""
    # Set up mock responses
    mock_article1 = create_mock_article(id=1, title="Article 1")
    mock_article2 = create_mock_article(id=2, title="Article 2")
    mock_author = create_mock_author()

    # Mock API responses
    mock_zendesk_client.make_request.side_effect = [
        # First call: content tags
        {"records": []},
        # Second call: articles page
        {
            "articles": [mock_article1, mock_article2],
            "meta": {
                "has_more": False,
                "after_cursor": None,
            },
        },
        # Third call: author info
        mock_author,
    ]

    # Call load_from_checkpoint
    end_time = time.time()
    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)

    # Check that we got the documents
    assert len(outputs) == 2
    assert outputs[0].next_checkpoint.cached_content_tags is not None

    assert len(outputs[1].items) == 2

    # Check first document
    doc1 = outputs[1].items[0]
    assert isinstance(doc1, Document)
    assert doc1.id == "article:1"
    assert doc1.semantic_identifier == "Article 1"
    assert doc1.source == DocumentSource.ZENDESK

    # Check second document
    doc2 = outputs[1].items[1]
    assert isinstance(doc2, Document)
    assert doc2.id == "article:2"
    assert doc2.semantic_identifier == "Article 2"
    assert doc2.source == DocumentSource.ZENDESK

    # Check checkpoint state
    assert not outputs[1].next_checkpoint.has_more


def test_load_from_checkpoint_tickets_happy_path(
    zendesk_connector: ZendeskConnector,
    mock_zendesk_client: MagicMock,
    create_mock_ticket: Callable[..., dict[str, Any]],
    create_mock_author: Callable[..., dict[str, Any]],
) -> None:
    """Test loading tickets from checkpoint - happy path"""
    # Configure connector for tickets
    zendesk_connector.content_type = "tickets"

    # Set up mock responses
    mock_ticket1 = create_mock_ticket(id=1, subject="Ticket 1")
    mock_ticket2 = create_mock_ticket(id=2, subject="Ticket 2")
    mock_author = create_mock_author()

    # Mock API responses
    mock_zendesk_client.make_request.side_effect = [
        # First call: content tags
        {"records": []},
        # Second call: tickets page
        {
            "tickets": [mock_ticket1, mock_ticket2],
            "end_of_stream": True,
            "end_time": int(time.time()),
        },
        # Third call: author info
        mock_author,
        # Fourth call: comments page
        {"comments": []},
        # Fifth call: comments page
        {"comments": []},
    ]

    zendesk_connector.client = mock_zendesk_client

    # Call load_from_checkpoint
    end_time = time.time()
    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)

    # Check that we got the documents
    assert len(outputs) == 2
    assert outputs[0].next_checkpoint.cached_content_tags is not None
    assert len(outputs[1].items) == 2

    # Check first document
    doc1 = outputs[1].items[0]
    print(doc1, type(doc1))
    assert isinstance(doc1, Document)
    assert doc1.id == "zendesk_ticket_1"
    assert doc1.semantic_identifier == "Ticket #1: Ticket 1"
    assert doc1.source == DocumentSource.ZENDESK

    # Check second document
    doc2 = outputs[1].items[1]
    assert isinstance(doc2, Document)
    assert doc2.id == "zendesk_ticket_2"
    assert doc2.semantic_identifier == "Ticket #2: Ticket 2"
    assert doc2.source == DocumentSource.ZENDESK

    # Check checkpoint state
    assert not outputs[1].next_checkpoint.has_more


def test_load_from_checkpoint_with_rate_limit(
    unmocked_zendesk_connector: ZendeskConnector,
    create_mock_article: Callable[..., dict[str, Any]],
    create_mock_author: Callable[..., dict[str, Any]],
) -> None:
    """Test loading from checkpoint with rate limit handling"""
    zendesk_connector = unmocked_zendesk_connector
    # Set up mock responses
    mock_article = create_mock_article()
    mock_author = create_mock_author()
    author_response = MagicMock()
    author_response.status_code = 200
    author_response.json.return_value = mock_author

    # Create mock responses for requests.get
    rate_limit_response = MagicMock()
    rate_limit_response.status_code = 429
    rate_limit_response.headers = {"Retry-After": "60"}
    rate_limit_response.raise_for_status.side_effect = HTTPError(
        response=rate_limit_response
    )

    success_response = MagicMock()
    success_response.status_code = 200
    success_response.json.return_value = {
        "articles": [mock_article],
        "meta": {
            "has_more": False,
            "after_cursor": None,
        },
    }

    # Mock requests.get to simulate rate limit then success
    with patch("onyx.connectors.zendesk.connector.requests.get") as mock_get:
        mock_get.side_effect = [
            # First call: content tags
            MagicMock(
                status_code=200,
                json=lambda: {"records": [], "meta": {"has_more": False}},
            ),
            # Second call: articles page (rate limited)
            rate_limit_response,
            # Third call: articles page (after rate limit)
            success_response,
            # Fourth call: author info
            author_response,
        ]

        # Call load_from_checkpoint
        end_time = time.time()
        with patch("onyx.connectors.zendesk.connector.time.sleep") as mock_sleep:
            outputs = load_everything_from_checkpoint_connector(
                zendesk_connector, 0, end_time
            )
            mock_sleep.assert_has_calls([call(60), call(0.1)])

        # Check that we got the document after rate limit was handled
        assert len(outputs) == 2
        assert outputs[0].next_checkpoint.cached_content_tags is not None
        assert len(outputs[1].items) == 1
        assert isinstance(outputs[1].items[0], Document)
        assert outputs[1].items[0].id == "article:1"

        # Verify the requests were made with correct parameters
        assert mock_get.call_count == 4
        # First call should be for content tags
        args, kwargs = mock_get.call_args_list[0]
        assert "guide/content_tags" in args[0]
        # Second call should be for articles (rate limited)
        args, kwargs = mock_get.call_args_list[1]
        assert "help_center/articles" in args[0]
        # Third call should be for articles (success)
        args, kwargs = mock_get.call_args_list[2]
        assert "help_center/articles" in args[0]
        # Fourth call should be for author info
        args, kwargs = mock_get.call_args_list[3]
        assert "users/123" in args[0]


def test_load_from_checkpoint_with_empty_response(
    zendesk_connector: ZendeskConnector,
    mock_zendesk_client: MagicMock,
) -> None:
    """Test loading from checkpoint with empty response"""
    # Mock API responses
    mock_zendesk_client.make_request.side_effect = [
        # First call: content tags
        {"records": []},
        # Second call: empty articles page
        {
            "articles": [],
            "meta": {
                "has_more": False,
                "after_cursor": None,
            },
        },
    ]

    # Call load_from_checkpoint
    end_time = time.time()
    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)

    # Check that we got no documents
    assert len(outputs) == 2
    assert outputs[0].next_checkpoint.cached_content_tags is not None
    assert len(outputs[1].items) == 0
    assert not outputs[1].next_checkpoint.has_more


def test_load_from_checkpoint_with_skipped_article(
    zendesk_connector: ZendeskConnector,
    mock_zendesk_client: MagicMock,
    create_mock_article: Callable[..., dict[str, Any]],
) -> None:
    """Test loading from checkpoint with an article that should be skipped"""
    # Set up mock responses with a draft article
    mock_article = create_mock_article(draft=True)
    mock_zendesk_client.make_request.side_effect = [
        # First call: content tags
        {"records": []},
        # Second call: articles page with draft article
        {
            "articles": [mock_article],
            "meta": {
                "has_more": False,
                "after_cursor": None,
            },
        },
    ]

    # Call load_from_checkpoint
    end_time = time.time()
    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)

    # Check that no documents were returned
    assert len(outputs) == 2
    assert outputs[0].next_checkpoint.cached_content_tags is not None
    assert len(outputs[1].items) == 0
    assert not outputs[1].next_checkpoint.has_more


def test_load_from_checkpoint_with_skipped_ticket(
    zendesk_connector: ZendeskConnector,
    mock_zendesk_client: MagicMock,
    create_mock_ticket: Callable[..., dict[str, Any]],
) -> None:
    """Test loading from checkpoint with a deleted ticket"""
    # Configure connector for tickets
    zendesk_connector.content_type = "tickets"

    # Set up mock responses with a deleted ticket
    mock_ticket = create_mock_ticket(status="deleted")
    mock_zendesk_client.make_request.side_effect = [
        # First call: content tags
        {"records": []},
        # Second call: tickets page with deleted ticket
        {
            "tickets": [mock_ticket],
            "end_of_stream": True,
            "end_time": int(time.time()),
        },
    ]

    # Call load_from_checkpoint
    end_time = time.time()
    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)

    # Check that no documents were returned
    assert len(outputs) == 2
    assert outputs[0].next_checkpoint.cached_content_tags is not None
    assert len(outputs[1].items) == 0
    assert not outputs[1].next_checkpoint.has_more


@pytest.mark.parametrize(
    "status_code,expected_exception,expected_message",
    [
        (
            401,
            CredentialExpiredError,
            "Your Zendesk credentials appear to be invalid or expired",
        ),
        (
            403,
            InsufficientPermissionsError,
            "Your Zendesk token does not have sufficient permissions",
        ),
        (
            404,
            ConnectorValidationError,
            "Zendesk resource not found",
        ),
    ],
)
def test_validate_connector_settings_errors(
    zendesk_connector: ZendeskConnector,
    status_code: int,
    expected_exception: type[Exception],
    expected_message: str,
) -> None:
    """Test validation with various error scenarios"""
    mock_response = MagicMock()
    mock_response.status_code = status_code
    error = HTTPError(response=mock_response)

    mock_zendesk_client = cast(MagicMock, zendesk_connector.client)
    mock_zendesk_client.make_request.side_effect = error

    with pytest.raises(expected_exception) as excinfo:
        print("excinfo", excinfo)
        zendesk_connector.validate_connector_settings()

    assert expected_message in str(excinfo.value)


def test_validate_connector_settings_success(
    zendesk_connector: ZendeskConnector,
    mock_zendesk_client: MagicMock,
) -> None:
    """Test successful validation"""
    # Mock successful API response
    mock_zendesk_client.make_request.return_value = {
        "articles": [],
        "meta": {"has_more": False},
    }

    zendesk_connector.validate_connector_settings()


================================================
FILE: backend/tests/unit/onyx/connectors/zendesk/test_zendesk_rate_limit.py
================================================
from __future__ import annotations

import types
from typing import Any
from typing import Dict

import pytest


class _FakeTime:
    """A controllable time module replacement.

    - monotonic(): returns an internal counter (seconds)
    - sleep(x): advances the internal counter by x seconds
    """

    def __init__(self) -> None:
        self._t = 0.0

    def monotonic(self) -> float:
        return self._t

    def sleep(self, seconds: float) -> None:
        # advance time without real waiting
        self._t += float(seconds)


class _FakeResponse:
    def __init__(self, json_payload: Dict[str, Any], status_code: int = 200) -> None:
        self._json = json_payload
        self.status_code = status_code
        self.headers: Dict[str, str] = {}

    def json(self) -> Dict[str, Any]:
        return self._json

    def raise_for_status(self) -> None:
        # simulate OK
        return None


def test_zendesk_client_per_minute_rate_limiting(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    # Import here to allow monkeypatching modules safely
    from onyx.connectors.zendesk.connector import ZendeskClient
    import onyx.connectors.cross_connector_utils.rate_limit_wrapper as rlw
    import onyx.connectors.zendesk.connector as zendesk_mod

    fake_time = _FakeTime()

    # Patch time in both the rate limit wrapper and the zendesk connector module
    monkeypatch.setattr(rlw, "time", fake_time, raising=True)
    monkeypatch.setattr(zendesk_mod, "time", fake_time, raising=True)

    # Stub out requests.get to avoid network and return a minimal valid payload
    calls: list[str] = []

    def _fake_get(
        url: str,
        auth: Any,  # noqa: ARG001
        params: Dict[str, Any],  # noqa: ARG001
    ) -> _FakeResponse:
        calls.append(url)
        # minimal Zendesk list response (articles path)
        return _FakeResponse({"articles": [], "meta": {"has_more": False}})

    monkeypatch.setattr(
        zendesk_mod, "requests", types.SimpleNamespace(get=_fake_get), raising=True
    )

    # Build client with a small limit: 2 calls per 60 seconds
    client = ZendeskClient("subd", "e", "t", calls_per_minute=2)

    # Make three calls in quick succession. The third should be rate limited
    client.make_request("help_center/articles", {"page[size]": 1})
    client.make_request("help_center/articles", {"page[size]": 1})

    # At this point we've used up the 2 allowed calls within the 60s window
    # The next call should trigger sleeps with exponential backoff until >60s elapsed
    client.make_request("help_center/articles", {"page[size]": 1})

    # Ensure we did not actually wait in real time but logically advanced beyond a minute
    assert fake_time.monotonic() >= 60
    # Ensure the HTTP function was invoked three times
    assert len(calls) == 3


================================================
FILE: backend/tests/unit/onyx/context/search/federated/test_slack_query_construction.py
================================================
from unittest.mock import MagicMock

from onyx.context.search.federated.slack_search_utils import (
    build_channel_query_filter,
)
from onyx.context.search.federated.slack_search_utils import matches_exclude_pattern
from onyx.onyxbot.slack.models import ChannelType


class TestChannelPatternMatching:
    """Test glob pattern matching for channel exclusion"""

    def test_exact_match(self) -> None:
        """Test exact channel name match"""
        assert matches_exclude_pattern("customer-support", ["customer-support"]) is True
        assert matches_exclude_pattern("engineering", ["customer-support"]) is False

    def test_glob_pattern_star(self) -> None:
        """Test glob patterns with * wildcard"""
        # Suffix wildcard
        assert matches_exclude_pattern("customer-X", ["customer*"]) is True
        assert matches_exclude_pattern("customer-support", ["customer*"]) is True
        assert matches_exclude_pattern("engineering", ["customer*"]) is False

        # Prefix wildcard
        assert matches_exclude_pattern("test-env", ["*-env"]) is True
        assert matches_exclude_pattern("prod-env", ["*-env"]) is True
        assert matches_exclude_pattern("test-staging", ["*-env"]) is False

        # Infix wildcard
        assert matches_exclude_pattern("customer-test-env", ["customer*env"]) is True
        assert matches_exclude_pattern("customer-prod-env", ["customer*env"]) is True
        assert matches_exclude_pattern("customer-test", ["customer*env"]) is False

    def test_multiple_patterns(self) -> None:
        """Test matching against multiple patterns"""
        patterns = ["test-*", "dev-*", "customer*"]

        assert matches_exclude_pattern("test-env", patterns) is True
        assert matches_exclude_pattern("dev-env", patterns) is True
        assert matches_exclude_pattern("customer-X", patterns) is True
        assert matches_exclude_pattern("prod-env", patterns) is False

    def test_hash_prefix_normalization(self) -> None:
        """Test that # prefix is handled correctly"""
        # Pattern has #, channel name doesn't
        assert matches_exclude_pattern("customer-X", ["#customer*"]) is True

        # Channel name has #, pattern doesn't
        assert matches_exclude_pattern("#customer-X", ["customer*"]) is True

        # Both have #
        assert matches_exclude_pattern("#customer-X", ["#customer*"]) is True

    def test_case_insensitive(self) -> None:
        """Test that matching is case insensitive"""
        assert matches_exclude_pattern("Customer-Support", ["customer*"]) is True
        assert matches_exclude_pattern("CUSTOMER-X", ["customer*"]) is True
        assert matches_exclude_pattern("customer-x", ["CUSTOMER*"]) is True

    def test_whitespace_handling(self) -> None:
        """Test that whitespace is trimmed"""
        assert matches_exclude_pattern(" customer-X ", ["customer*"]) is True
        assert matches_exclude_pattern("customer-X", [" customer* "]) is True


class TestChannelQueryFilterBuilding:
    """Test channel query filter string construction"""

    def test_specific_channels_no_exclude(self) -> None:
        """Test filter with specific channels, no exclusions"""
        entities = {
            "search_all_channels": False,
            "channels": ["general", "engineering"],
        }

        filter_str = build_channel_query_filter(entities)

        assert "in:#general" in filter_str
        assert "in:#engineering" in filter_str
        assert filter_str.count("in:#") == 2

    def test_specific_channels_with_exclude(self) -> None:
        """Test filter with specific channels and exclusions"""
        entities = {
            "search_all_channels": False,
            "channels": ["general", "customer-X", "customer-Y", "support"],
            "exclude_channels": ["customer*"],
        }

        filter_str = build_channel_query_filter(entities)

        # Should include non-customer channels
        assert "in:#general" in filter_str
        assert "in:#support" in filter_str

        # Should exclude customer channels
        assert "customer-X" not in filter_str
        assert "customer-Y" not in filter_str

    def test_all_channels_no_exclude(self) -> None:
        """Test search all channels with no exclusions"""
        entities = {"search_all_channels": True}

        filter_str = build_channel_query_filter(entities)

        # Should return empty string (no filter)
        assert filter_str == ""

    def test_all_channels_with_exclude(self) -> None:
        """Test search all channels with exclusions"""
        entities = {
            "search_all_channels": True,
            "exclude_channels": ["customer*", "test-*"],
        }
        available_channels = [
            "general",
            "customer-X",
            "customer-Y",
            "test-env",
            "support",
        ]

        filter_str = build_channel_query_filter(entities, available_channels)

        # Should use negative filters for excluded channels
        assert "-in:#customer-X" in filter_str
        assert "-in:#customer-Y" in filter_str
        assert "-in:#test-env" in filter_str

        # Should NOT include positive filters (we're searching ALL channels, just excluding some)
        assert "in:#general" not in filter_str
        assert "in:#support" not in filter_str

    def test_empty_channels_list(self) -> None:
        """Test with empty channels list"""
        entities = {"search_all_channels": False, "channels": []}

        # Should raise ValidationError during entity parsing, but if it gets through
        # should return empty string
        try:
            filter_str = build_channel_query_filter(entities)
            assert filter_str == ""
        except Exception:
            # Expected - validation should fail
            pass

    def test_channel_name_normalization(self) -> None:
        """Test that channel names are normalized (# removed)"""
        entities = {
            "search_all_channels": False,
            "channels": ["#general", "engineering"],  # One with #, one without
        }

        filter_str = build_channel_query_filter(entities)

        # Both should be included with in:# prefix
        assert "in:#general" in filter_str
        assert "in:#engineering" in filter_str

    def test_invalid_entities(self) -> None:
        """Test with invalid entities"""
        entities = {"invalid_field": "value"}

        filter_str = build_channel_query_filter(entities)

        # Should return empty string on validation error
        assert filter_str == ""

    def test_no_available_channels(self) -> None:
        """Test exclude patterns when channel list fetch fails"""
        entities = {
            "search_all_channels": True,
            "exclude_channels": ["customer*"],
        }
        available_channels = None  # Channel fetch failed

        filter_str = build_channel_query_filter(entities, available_channels)

        # Should return empty string if we can't fetch channels
        assert filter_str == ""


class TestDateExtraction:
    """Test date range extraction from queries"""

    def test_extract_explicit_days(self) -> None:
        """Test extracting explicit day ranges"""
        from onyx.context.search.federated.slack_search_utils import (
            extract_date_range_from_query,
        )

        mock_llm = MagicMock()

        # Mock LLM response for "last 7 days"
        mock_llm.invoke.return_value = MagicMock()
        mock_llm.invoke.return_value.content = '{"days_back": 7}'

        days = extract_date_range_from_query(
            "show me results from last 7 days", mock_llm, 30
        )

        assert days == 7

    def test_enforce_default_search_days_limit(self) -> None:
        """Test that default_search_days is enforced as hard limit"""
        from onyx.context.search.federated.slack_search_utils import (
            extract_date_range_from_query,
        )

        mock_llm = MagicMock()

        # Mock LLM response for "last 90 days" but limit is 30
        mock_llm.invoke.return_value = MagicMock()
        mock_llm.invoke.return_value.content = '{"days_back": 90}'

        days = extract_date_range_from_query(
            "show me results from last 90 days", mock_llm, 30
        )

        # Should be capped at 30
        assert days == 30

    def test_no_date_mentioned(self) -> None:
        """Test when no date is mentioned in query"""
        from onyx.context.search.federated.slack_search_utils import (
            extract_date_range_from_query,
        )

        mock_llm = MagicMock()

        # Mock LLM response for no date
        mock_llm.invoke.return_value = MagicMock()
        mock_llm.invoke.return_value.content = '{"days_back": null}'

        days = extract_date_range_from_query("show me budget reports", mock_llm, 30)

        # Should use default
        assert days == 30

    def test_llm_failure_fallback(self) -> None:
        """Test fallback when LLM fails"""
        from onyx.context.search.federated.slack_search_utils import (
            extract_date_range_from_query,
        )

        mock_llm = MagicMock()

        # Mock LLM failure
        mock_llm.invoke.side_effect = Exception("LLM error")

        days = extract_date_range_from_query("show me results", mock_llm, 30)

        # Should fall back to default
        assert days == 30


class TestChannelTypeFiltering:
    """Test post-filtering based on channel type"""

    def test_include_public_channels_always(self) -> None:
        """Test that public channels are always included"""
        from onyx.context.search.federated.slack_search_utils import (
            should_include_message,
        )

        entities = {
            "include_dm": False,
            "include_private_channels": False,
        }

        assert should_include_message(ChannelType.PUBLIC_CHANNEL, entities) is True

    def test_filter_dm_based_on_entities(self) -> None:
        """Test DM filtering based on include_dm setting"""
        from onyx.context.search.federated.slack_search_utils import (
            should_include_message,
        )

        # DMs enabled
        entities_with_dm = {"include_dm": True}
        assert should_include_message(ChannelType.IM, entities_with_dm) is True

        # DMs disabled
        entities_no_dm = {"include_dm": False}
        assert should_include_message(ChannelType.IM, entities_no_dm) is False

    def test_filter_group_dm(self) -> None:
        """Test group DM (MPIM) filtering uses include_group_dm setting"""
        from onyx.context.search.federated.slack_search_utils import (
            should_include_message,
        )

        # Group DMs should follow include_group_dm setting
        entities_with_group_dm = {"include_group_dm": True}
        assert should_include_message(ChannelType.MPIM, entities_with_group_dm) is True

        entities_no_group_dm = {"include_group_dm": False}
        assert should_include_message(ChannelType.MPIM, entities_no_group_dm) is False

    def test_filter_private_channels(self) -> None:
        """Test private channel filtering"""
        from onyx.context.search.federated.slack_search_utils import (
            should_include_message,
        )

        # Private channels enabled
        entities_with_private = {"include_private_channels": True}
        assert (
            should_include_message(ChannelType.PRIVATE_CHANNEL, entities_with_private)
            is True
        )

        # Private channels disabled
        entities_no_private = {"include_private_channels": False}
        assert (
            should_include_message(ChannelType.PRIVATE_CHANNEL, entities_no_private)
            is False
        )

    def test_invalid_entities_default_behavior(self) -> None:
        """Test that invalid entities default to including messages"""
        from onyx.context.search.federated.slack_search_utils import (
            should_include_message,
        )

        invalid_entities = {"invalid_field": "value"}

        # Should default to including (safe behavior)
        assert (
            should_include_message(ChannelType.PUBLIC_CHANNEL, invalid_entities) is True
        )


================================================
FILE: backend/tests/unit/onyx/context/search/federated/test_slack_thread_context.py
================================================
"""Tests for Slack thread context fetching with rate limit handling."""

from datetime import datetime
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from slack_sdk.errors import SlackApiError

from onyx.context.search.federated.models import SlackMessage
from onyx.context.search.federated.slack_search import _fetch_thread_context
from onyx.context.search.federated.slack_search import (
    fetch_thread_contexts_with_rate_limit_handling,
)
from onyx.context.search.federated.slack_search import SlackRateLimitError
from onyx.context.search.federated.slack_search import ThreadContextResult


def _create_mock_message(
    message_id: str = "1234567890.123456",
    thread_id: str | None = "1234567890.000000",
    text: str = "test message",
    channel_id: str = "C123456",
) -> SlackMessage:
    """Create a mock SlackMessage for testing."""
    return SlackMessage(
        document_id=f"{channel_id}_{message_id}",
        channel_id=channel_id,
        message_id=message_id,
        thread_id=thread_id,
        link=f"https://slack.com/archives/{channel_id}/p{message_id.replace('.', '')}",
        metadata={"channel": "test-channel"},
        timestamp=datetime.now(),
        recency_bias=1.0,
        semantic_identifier="user in #test-channel: test message",
        text=text,
        highlighted_texts=set(),
        slack_score=1000.0,
    )


class TestSlackRateLimitError:
    """Test SlackRateLimitError exception."""

    def test_exception_is_raised(self) -> None:
        """Test that SlackRateLimitError can be raised and caught."""
        with pytest.raises(SlackRateLimitError):
            raise SlackRateLimitError("Rate limited")


class TestThreadContextResult:
    """Test ThreadContextResult class."""

    def test_success_result(self) -> None:
        """Test creating a success result."""
        result = ThreadContextResult.success("enriched text")
        assert result.text == "enriched text"
        assert not result.is_rate_limited
        assert not result.is_error

    def test_rate_limited_result(self) -> None:
        """Test creating a rate limited result."""
        result = ThreadContextResult.rate_limited("original text")
        assert result.text == "original text"
        assert result.is_rate_limited
        assert not result.is_error

    def test_error_result(self) -> None:
        """Test creating an error result."""
        result = ThreadContextResult.error("original text")
        assert result.text == "original text"
        assert not result.is_rate_limited
        assert result.is_error


class TestFetchThreadContext:
    """Test _fetch_thread_context function."""

    def test_non_thread_message_returns_success(self) -> None:
        """Test that non-thread messages return success with original text."""
        message = _create_mock_message(thread_id=None, text="original text")

        result = _fetch_thread_context(message, "xoxp-token", "T12345")

        assert result.text == "original text"
        assert not result.is_rate_limited
        assert not result.is_error

    @patch("onyx.context.search.federated.slack_search.WebClient")
    def test_rate_limit_returns_rate_limited_result(
        self, mock_webclient_class: MagicMock
    ) -> None:
        """Test that 429 rate limit returns rate_limited result."""
        message = _create_mock_message(text="original text")

        # Create mock response with 429 status
        mock_response = MagicMock()
        mock_response.status_code = 429

        # Create mock client that raises rate limit error
        mock_client = MagicMock()
        mock_client.conversations_replies.side_effect = SlackApiError(
            "ratelimited", mock_response
        )
        mock_webclient_class.return_value = mock_client

        result = _fetch_thread_context(message, "xoxp-token", "T12345")

        assert result.text == "original text"
        assert result.is_rate_limited
        assert not result.is_error

    @patch("onyx.context.search.federated.slack_search.WebClient")
    def test_other_api_error_returns_error_result(
        self, mock_webclient_class: MagicMock
    ) -> None:
        """Test that non-rate-limit API errors return error result."""
        message = _create_mock_message(text="original text")

        # Create mock response with non-429 error
        mock_response = MagicMock()
        mock_response.status_code = 500

        mock_client = MagicMock()
        mock_client.conversations_replies.side_effect = SlackApiError(
            "internal_error", mock_response
        )
        mock_webclient_class.return_value = mock_client

        result = _fetch_thread_context(message, "xoxp-token", "T12345")

        assert result.text == "original text"
        assert not result.is_rate_limited
        assert result.is_error

    @patch("onyx.context.search.federated.slack_search.WebClient")
    def test_unexpected_exception_returns_error_result(
        self, mock_webclient_class: MagicMock
    ) -> None:
        """Test that unexpected exceptions return error result."""
        message = _create_mock_message(text="original text")

        mock_client = MagicMock()
        mock_client.conversations_replies.side_effect = RuntimeError("Network error")
        mock_webclient_class.return_value = mock_client

        result = _fetch_thread_context(message, "xoxp-token", "T12345")

        assert result.text == "original text"
        assert not result.is_rate_limited
        assert result.is_error

    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
    @patch("onyx.context.search.federated.slack_search.WebClient")
    def test_successful_thread_fetch_returns_context(
        self, mock_webclient_class: MagicMock, mock_batch_profiles: MagicMock
    ) -> None:
        """Test that successful thread fetch returns enriched context."""
        message = _create_mock_message(
            message_id="1234567890.123456",
            thread_id="1234567890.000000",
            text="original text",
        )

        # Mock user profile lookup
        mock_batch_profiles.return_value = {
            "U111": "User One",
            "U222": "User Two",
            "U333": "User Three",
        }

        # Create mock response with thread messages
        mock_response = MagicMock()
        mock_response.get.return_value = [
            {
                "text": "Thread starter message",
                "user": "U111",
                "ts": "1234567890.000000",
            },
            {"text": "Reply 1", "user": "U222", "ts": "1234567890.111111"},
            {"text": "Reply 2 (matched)", "user": "U333", "ts": "1234567890.123456"},
        ]
        mock_response.validate.return_value = None

        mock_client = MagicMock()
        mock_client.conversations_replies.return_value = mock_response
        mock_webclient_class.return_value = mock_client

        result = _fetch_thread_context(message, "xoxp-token", "T12345")

        # Should contain thread starter and replies with resolved usernames
        assert "Thread starter message" in result.text
        assert "Reply" in result.text
        assert "User One" in result.text
        assert not result.is_rate_limited
        assert not result.is_error


class TestFetchThreadContextsWithRateLimitHandling:
    """Test fetch_thread_contexts_with_rate_limit_handling function."""

    def test_empty_message_list_returns_empty(self) -> None:
        """Test that empty message list returns empty list."""
        result = fetch_thread_contexts_with_rate_limit_handling(
            slack_messages=[],
            access_token="xoxp-token",
            team_id="T12345",
        )

        assert result == []

    @patch("onyx.context.search.federated.slack_search._fetch_thread_context")
    @patch(
        "onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel"
    )
    def test_batch_processing_respects_batch_size(
        self,
        mock_parallel: MagicMock,
        mock_fetch_context: MagicMock,  # noqa: ARG002
    ) -> None:
        """Test that messages are processed in batches of specified size."""
        messages = [
            _create_mock_message(message_id=f"123456789{i}.000000") for i in range(7)
        ]

        # Mock parallel execution to return ThreadContextResult objects
        mock_parallel.return_value = [
            ThreadContextResult.success("enriched") for _ in range(3)
        ]

        fetch_thread_contexts_with_rate_limit_handling(
            slack_messages=messages,
            access_token="xoxp-token",
            team_id="T12345",
            batch_size=3,
            max_messages=None,
        )

        # Should have called parallel execution 3 times (7 messages / 3 batch = 3 batches)
        assert mock_parallel.call_count == 3

    @patch("onyx.context.search.federated.slack_search._fetch_thread_context")
    @patch(
        "onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel"
    )
    def test_rate_limit_stops_further_batches(
        self,
        mock_parallel: MagicMock,
        mock_fetch_context: MagicMock,  # noqa: ARG002
    ) -> None:
        """Test that rate limiting stops processing of subsequent batches."""
        messages = [
            _create_mock_message(message_id=f"123456789{i}.000000", text=f"msg{i}")
            for i in range(6)
        ]

        # First batch succeeds, second batch has one success and one rate limit
        mock_parallel.side_effect = [
            [
                ThreadContextResult.success("enriched0"),
                ThreadContextResult.success("enriched1"),
            ],
            [
                ThreadContextResult.success("enriched2"),
                ThreadContextResult.rate_limited("msg3"),  # Rate limit hit
            ],
        ]

        result = fetch_thread_contexts_with_rate_limit_handling(
            slack_messages=messages,
            access_token="xoxp-token",
            team_id="T12345",
            batch_size=2,
            max_messages=None,
        )

        # Should have 6 results total
        assert len(result) == 6
        # First 2 should be enriched
        assert result[0] == "enriched0"
        assert result[1] == "enriched1"
        # Second batch: first enriched (preserved!), second rate limited (original text)
        assert result[2] == "enriched2"
        assert result[3] == "msg3"
        # Last 2 (skipped due to rate limit) should be original text
        assert result[4] == "msg4"
        assert result[5] == "msg5"

        # Should only call parallel twice (stopped after rate limit detected)
        assert mock_parallel.call_count == 2

    @patch("onyx.context.search.federated.slack_search._fetch_thread_context")
    @patch(
        "onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel"
    )
    def test_other_errors_dont_stop_processing(
        self,
        mock_parallel: MagicMock,
        mock_fetch_context: MagicMock,  # noqa: ARG002
    ) -> None:
        """Test that non-rate-limit errors don't stop batch processing."""
        messages = [
            _create_mock_message(message_id=f"123456789{i}.000000", text=f"msg{i}")
            for i in range(4)
        ]

        # First batch has an error (not rate limit), second batch succeeds
        mock_parallel.side_effect = [
            [
                ThreadContextResult.success("enriched0"),
                ThreadContextResult.error("msg1"),  # Error but NOT rate limit
            ],
            [
                ThreadContextResult.success("enriched2"),
                ThreadContextResult.success("enriched3"),
            ],
        ]

        result = fetch_thread_contexts_with_rate_limit_handling(
            slack_messages=messages,
            access_token="xoxp-token",
            team_id="T12345",
            batch_size=2,
            max_messages=None,
        )

        # Should have 4 results total
        assert len(result) == 4
        assert result[0] == "enriched0"
        assert result[1] == "msg1"  # Error returns original text
        assert result[2] == "enriched2"
        assert result[3] == "enriched3"

        # Should have called both batches (errors don't stop processing)
        assert mock_parallel.call_count == 2


class TestMaxMessagesLimit:
    """Test max_messages parameter limiting thread context fetches."""

    @patch("onyx.context.search.federated.slack_search._fetch_thread_context")
    @patch(
        "onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel"
    )
    def test_max_messages_limits_context_fetches(
        self,
        mock_parallel: MagicMock,
        mock_fetch_context: MagicMock,  # noqa: ARG002
    ) -> None:
        """Test that only top N messages get thread context when max_messages is set."""
        messages = [
            _create_mock_message(message_id=f"123456789{i}.000000", text=f"msg{i}")
            for i in range(10)
        ]

        # Mock parallel to return ThreadContextResult for messages that are fetched
        mock_parallel.return_value = [
            ThreadContextResult.success("enriched0"),
            ThreadContextResult.success("enriched1"),
            ThreadContextResult.success("enriched2"),
        ]

        result = fetch_thread_contexts_with_rate_limit_handling(
            slack_messages=messages,
            access_token="xoxp-token",
            team_id="T12345",
            batch_size=5,
            max_messages=3,  # Only fetch context for top 3
        )

        # Should have 10 results total
        assert len(result) == 10
        # First 3 should be enriched
        assert result[0] == "enriched0"
        assert result[1] == "enriched1"
        assert result[2] == "enriched2"
        # Remaining 7 should be original text
        for i in range(3, 10):
            assert result[i] == f"msg{i}"

        # Should only call parallel once (3 messages with batch_size=5 = 1 batch)
        assert mock_parallel.call_count == 1

    @patch("onyx.context.search.federated.slack_search._fetch_thread_context")
    @patch(
        "onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel"
    )
    def test_max_messages_none_fetches_all(
        self,
        mock_parallel: MagicMock,
        mock_fetch_context: MagicMock,  # noqa: ARG002
    ) -> None:
        """Test that max_messages=None fetches context for all messages."""
        messages = [
            _create_mock_message(message_id=f"123456789{i}.000000", text=f"msg{i}")
            for i in range(5)
        ]

        mock_parallel.return_value = [
            ThreadContextResult.success(f"enriched{i}") for i in range(5)
        ]

        result = fetch_thread_contexts_with_rate_limit_handling(
            slack_messages=messages,
            access_token="xoxp-token",
            team_id="T12345",
            batch_size=10,
            max_messages=None,  # No limit
        )

        # All 5 should be enriched
        assert len(result) == 5
        for i in range(5):
            assert result[i] == f"enriched{i}"

    @patch("onyx.context.search.federated.slack_search._fetch_thread_context")
    @patch(
        "onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel"
    )
    def test_max_messages_greater_than_total_fetches_all(
        self,
        mock_parallel: MagicMock,
        mock_fetch_context: MagicMock,  # noqa: ARG002
    ) -> None:
        """Test that max_messages > total messages fetches all."""
        messages = [
            _create_mock_message(message_id=f"123456789{i}.000000", text=f"msg{i}")
            for i in range(3)
        ]

        mock_parallel.return_value = [
            ThreadContextResult.success("enriched0"),
            ThreadContextResult.success("enriched1"),
            ThreadContextResult.success("enriched2"),
        ]

        result = fetch_thread_contexts_with_rate_limit_handling(
            slack_messages=messages,
            access_token="xoxp-token",
            team_id="T12345",
            batch_size=10,
            max_messages=100,  # More than we have
        )

        # All 3 should be enriched
        assert len(result) == 3
        for i in range(3):
            assert result[i] == f"enriched{i}"


================================================
FILE: backend/tests/unit/onyx/db/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/db/conftest.py
================================================
"""Fixtures for unit-testing DAL classes with mocked sessions."""

from typing import Any
from unittest.mock import MagicMock

import pytest
from sqlalchemy.orm import Session

from ee.onyx.db.scim import ScimDAL


def model_attrs(obj: object) -> dict[str, Any]:
    """Extract user-set attributes from a SQLAlchemy model instance.

    Filters out SQLAlchemy internal state (``_sa_instance_state``).
    Use this in tests to assert the full set of fields on a model object
    so that adding a new field forces the test to be updated.
    """
    return {k: v for k, v in vars(obj).items() if not k.startswith("_")}


@pytest.fixture
def mock_db_session() -> MagicMock:
    """A MagicMock standing in for a SQLAlchemy Session."""
    return MagicMock(spec=Session)


@pytest.fixture
def scim_dal(mock_db_session: MagicMock) -> ScimDAL:
    """A ScimDAL backed by a mock session."""
    return ScimDAL(mock_db_session)


================================================
FILE: backend/tests/unit/onyx/db/test_assign_default_groups.py
================================================
"""
Unit tests for assign_user_to_default_groups__no_commit in onyx.db.users.

Covers:
1. Standard/service-account users get assigned to the correct default group
2. BOT, EXT_PERM_USER, ANONYMOUS account types are skipped
3. Missing default group raises RuntimeError
4. Already-in-group is a no-op
5. IntegrityError race condition is handled gracefully
6. The function never commits the session
"""

from unittest.mock import MagicMock
from uuid import uuid4

import pytest
from sqlalchemy.exc import IntegrityError

from onyx.db.enums import AccountType
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.db.users import assign_user_to_default_groups__no_commit


def _mock_user(
    account_type: AccountType = AccountType.STANDARD,
    email: str = "test@example.com",
) -> MagicMock:
    user = MagicMock()
    user.id = uuid4()
    user.email = email
    user.account_type = account_type
    return user


def _mock_group(name: str = "Basic", group_id: int = 1) -> MagicMock:
    group = MagicMock()
    group.id = group_id
    group.name = name
    group.is_default = True
    return group


def _make_query_chain(first_return: object = None) -> MagicMock:
    """Returns a mock that supports .filter(...).filter(...).first() chaining."""
    chain = MagicMock()
    chain.filter.return_value = chain
    chain.first.return_value = first_return
    return chain


def _setup_db_session(
    group_result: object = None,
    membership_result: object = None,
) -> MagicMock:
    """Create a db_session mock that routes query(UserGroup) and query(User__UserGroup)."""
    db_session = MagicMock()

    group_chain = _make_query_chain(group_result)
    membership_chain = _make_query_chain(membership_result)

    def query_side_effect(model: type) -> MagicMock:
        if model is UserGroup:
            return group_chain
        if model is User__UserGroup:
            return membership_chain
        return MagicMock()

    db_session.query.side_effect = query_side_effect
    return db_session


def test_standard_user_assigned_to_basic_group() -> None:
    group = _mock_group("Basic")
    db_session = _setup_db_session(group_result=group, membership_result=None)
    savepoint = MagicMock()
    db_session.begin_nested.return_value = savepoint
    user = _mock_user(AccountType.STANDARD)

    assign_user_to_default_groups__no_commit(db_session, user, is_admin=False)

    db_session.add.assert_called_once()
    added = db_session.add.call_args[0][0]
    assert isinstance(added, User__UserGroup)
    assert added.user_id == user.id
    assert added.user_group_id == group.id
    db_session.flush.assert_called_once()


def test_admin_user_assigned_to_admin_group() -> None:
    group = _mock_group("Admin", group_id=2)
    db_session = _setup_db_session(group_result=group, membership_result=None)
    savepoint = MagicMock()
    db_session.begin_nested.return_value = savepoint
    user = _mock_user(AccountType.STANDARD)

    assign_user_to_default_groups__no_commit(db_session, user, is_admin=True)

    db_session.add.assert_called_once()
    added = db_session.add.call_args[0][0]
    assert isinstance(added, User__UserGroup)
    assert added.user_group_id == group.id


@pytest.mark.parametrize(
    "account_type",
    [AccountType.BOT, AccountType.EXT_PERM_USER, AccountType.ANONYMOUS],
)
def test_excluded_account_types_skipped(account_type: AccountType) -> None:
    db_session = MagicMock()
    user = _mock_user(account_type)

    assign_user_to_default_groups__no_commit(db_session, user)

    db_session.query.assert_not_called()
    db_session.add.assert_not_called()


def test_service_account_not_skipped() -> None:
    group = _mock_group("Basic")
    db_session = _setup_db_session(group_result=group, membership_result=None)
    savepoint = MagicMock()
    db_session.begin_nested.return_value = savepoint
    user = _mock_user(AccountType.SERVICE_ACCOUNT)

    assign_user_to_default_groups__no_commit(db_session, user, is_admin=False)

    db_session.add.assert_called_once()


def test_missing_default_group_raises_error() -> None:
    db_session = _setup_db_session(group_result=None)
    user = _mock_user()

    with pytest.raises(RuntimeError, match="Default group .* not found"):
        assign_user_to_default_groups__no_commit(db_session, user)


def test_already_in_group_is_noop() -> None:
    group = _mock_group("Basic")
    existing_membership = MagicMock()
    db_session = _setup_db_session(
        group_result=group, membership_result=existing_membership
    )
    user = _mock_user()

    assign_user_to_default_groups__no_commit(db_session, user)

    db_session.add.assert_not_called()
    db_session.begin_nested.assert_not_called()


def test_integrity_error_race_condition_handled() -> None:
    group = _mock_group("Basic")
    db_session = _setup_db_session(group_result=group, membership_result=None)
    savepoint = MagicMock()
    db_session.begin_nested.return_value = savepoint
    db_session.flush.side_effect = IntegrityError(None, None, Exception("duplicate"))
    user = _mock_user()

    # Should not raise
    assign_user_to_default_groups__no_commit(db_session, user)

    savepoint.rollback.assert_called_once()


def test_no_commit_called_on_successful_assignment() -> None:
    group = _mock_group("Basic")
    db_session = _setup_db_session(group_result=group, membership_result=None)
    savepoint = MagicMock()
    db_session.begin_nested.return_value = savepoint
    user = _mock_user()

    assign_user_to_default_groups__no_commit(db_session, user)

    db_session.commit.assert_not_called()


================================================
FILE: backend/tests/unit/onyx/db/test_chat_sessions.py
================================================
"""Tests for get_chat_sessions_by_user filtering behavior.

Verifies that failed chat sessions (those with only SYSTEM messages) are
correctly filtered out while preserving recently created sessions, matching
the behavior specified in PR #7233.
"""

from datetime import datetime
from datetime import timedelta
from datetime import timezone
from unittest.mock import MagicMock
from uuid import UUID
from uuid import uuid4

import pytest
from sqlalchemy.orm import Session

from onyx.db.chat import get_chat_sessions_by_user
from onyx.db.models import ChatSession


def _make_session(
    user_id: UUID,
    time_created: datetime | None = None,
    time_updated: datetime | None = None,
    description: str = "",
) -> MagicMock:
    """Create a mock ChatSession with the given attributes."""
    session = MagicMock(spec=ChatSession)
    session.id = uuid4()
    session.user_id = user_id
    session.time_created = time_created or datetime.now(timezone.utc)
    session.time_updated = time_updated or session.time_created
    session.description = description
    session.deleted = False
    session.onyxbot_flow = False
    session.project_id = None
    return session


@pytest.fixture
def user_id() -> UUID:
    return uuid4()


@pytest.fixture
def old_time() -> datetime:
    """A timestamp well outside the 5-minute leeway window."""
    return datetime.now(timezone.utc) - timedelta(hours=1)


@pytest.fixture
def recent_time() -> datetime:
    """A timestamp within the 5-minute leeway window."""
    return datetime.now(timezone.utc) - timedelta(minutes=2)


class TestGetChatSessionsByUser:
    """Tests for the failed chat filtering logic in get_chat_sessions_by_user."""

    def test_filters_out_failed_sessions(
        self, user_id: UUID, old_time: datetime
    ) -> None:
        """Sessions with only SYSTEM messages should be excluded."""
        valid_session = _make_session(user_id, time_created=old_time)
        failed_session = _make_session(user_id, time_created=old_time)

        db_session = MagicMock(spec=Session)

        # First execute: returns all sessions
        # Second execute: returns only the valid session's ID (has non-system msgs)
        mock_result_1 = MagicMock()
        mock_result_1.scalars.return_value.all.return_value = [
            valid_session,
            failed_session,
        ]

        mock_result_2 = MagicMock()
        mock_result_2.scalars.return_value.all.return_value = [valid_session.id]

        db_session.execute.side_effect = [mock_result_1, mock_result_2]

        result = get_chat_sessions_by_user(
            user_id=user_id,
            deleted=False,
            db_session=db_session,
            include_failed_chats=False,
        )

        assert len(result) == 1
        assert result[0].id == valid_session.id

    def test_keeps_recent_sessions_without_messages(
        self, user_id: UUID, recent_time: datetime
    ) -> None:
        """Recently created sessions should be kept even without messages."""
        recent_session = _make_session(user_id, time_created=recent_time)

        db_session = MagicMock(spec=Session)

        mock_result_1 = MagicMock()
        mock_result_1.scalars.return_value.all.return_value = [recent_session]

        db_session.execute.side_effect = [mock_result_1]

        result = get_chat_sessions_by_user(
            user_id=user_id,
            deleted=False,
            db_session=db_session,
            include_failed_chats=False,
        )

        assert len(result) == 1
        assert result[0].id == recent_session.id
        # Should only have been called once — no second query needed
        # because the recent session is within the leeway window
        assert db_session.execute.call_count == 1

    def test_include_failed_chats_skips_filtering(
        self, user_id: UUID, old_time: datetime
    ) -> None:
        """When include_failed_chats=True, no filtering should occur."""
        session_a = _make_session(user_id, time_created=old_time)
        session_b = _make_session(user_id, time_created=old_time)

        db_session = MagicMock(spec=Session)

        mock_result = MagicMock()
        mock_result.scalars.return_value.all.return_value = [session_a, session_b]

        db_session.execute.side_effect = [mock_result]

        result = get_chat_sessions_by_user(
            user_id=user_id,
            deleted=False,
            db_session=db_session,
            include_failed_chats=True,
        )

        assert len(result) == 2
        # Only one DB call — no second query for message validation
        assert db_session.execute.call_count == 1

    def test_limit_applied_after_filtering(
        self, user_id: UUID, old_time: datetime
    ) -> None:
        """Limit should be applied after filtering, not before."""
        sessions = [_make_session(user_id, time_created=old_time) for _ in range(5)]
        valid_ids = [s.id for s in sessions[:3]]

        db_session = MagicMock(spec=Session)

        mock_result_1 = MagicMock()
        mock_result_1.scalars.return_value.all.return_value = sessions

        mock_result_2 = MagicMock()
        mock_result_2.scalars.return_value.all.return_value = valid_ids

        db_session.execute.side_effect = [mock_result_1, mock_result_2]

        result = get_chat_sessions_by_user(
            user_id=user_id,
            deleted=False,
            db_session=db_session,
            include_failed_chats=False,
            limit=2,
        )

        assert len(result) == 2
        # Should be the first 2 valid sessions (order preserved)
        assert result[0].id == sessions[0].id
        assert result[1].id == sessions[1].id

    def test_mixed_recent_and_old_sessions(
        self, user_id: UUID, old_time: datetime, recent_time: datetime
    ) -> None:
        """Mix of recent and old sessions should filter correctly."""
        old_valid = _make_session(user_id, time_created=old_time)
        old_failed = _make_session(user_id, time_created=old_time)
        recent_no_msgs = _make_session(user_id, time_created=recent_time)

        db_session = MagicMock(spec=Session)

        mock_result_1 = MagicMock()
        mock_result_1.scalars.return_value.all.return_value = [
            old_valid,
            old_failed,
            recent_no_msgs,
        ]

        mock_result_2 = MagicMock()
        mock_result_2.scalars.return_value.all.return_value = [old_valid.id]

        db_session.execute.side_effect = [mock_result_1, mock_result_2]

        result = get_chat_sessions_by_user(
            user_id=user_id,
            deleted=False,
            db_session=db_session,
            include_failed_chats=False,
        )

        result_ids = {cs.id for cs in result}
        assert old_valid.id in result_ids
        assert recent_no_msgs.id in result_ids
        assert old_failed.id not in result_ids

    def test_empty_result(self, user_id: UUID) -> None:
        """No sessions should return empty list without errors."""
        db_session = MagicMock(spec=Session)

        mock_result = MagicMock()
        mock_result.scalars.return_value.all.return_value = []

        db_session.execute.side_effect = [mock_result]

        result = get_chat_sessions_by_user(
            user_id=user_id,
            deleted=False,
            db_session=db_session,
            include_failed_chats=False,
        )

        assert result == []
        assert db_session.execute.call_count == 1


================================================
FILE: backend/tests/unit/onyx/db/test_dal.py
================================================
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.db.dal import DAL


class TestDALSessionDelegation:
    """Verify that DAL methods delegate correctly to the underlying session."""

    def test_commit(self) -> None:
        session = MagicMock()
        dal = DAL(session)
        dal.commit()
        session.commit.assert_called_once()

    def test_flush(self) -> None:
        session = MagicMock()
        dal = DAL(session)
        dal.flush()
        session.flush.assert_called_once()

    def test_rollback(self) -> None:
        session = MagicMock()
        dal = DAL(session)
        dal.rollback()
        session.rollback.assert_called_once()

    def test_session_property_exposes_underlying_session(self) -> None:
        session = MagicMock()
        dal = DAL(session)
        assert dal.session is session

    def test_commit_propagates_exception(self) -> None:
        session = MagicMock()
        session.commit.side_effect = RuntimeError("db error")
        dal = DAL(session)
        with pytest.raises(RuntimeError, match="db error"):
            dal.commit()


class TestDALFromTenant:
    """Verify the from_tenant context manager lifecycle."""

    @patch("onyx.db.dal.get_session_with_tenant")
    def test_yields_dal_with_tenant_session(self, mock_get_session: MagicMock) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)

        with DAL.from_tenant("tenant_abc") as dal:
            assert isinstance(dal, DAL)
            assert dal.session is mock_session

        mock_get_session.assert_called_once_with(tenant_id="tenant_abc")

    @patch("onyx.db.dal.get_session_with_tenant")
    def test_session_closed_after_context_exits(
        self, mock_get_session: MagicMock
    ) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)

        with DAL.from_tenant("tenant_abc"):
            pass

        mock_get_session.return_value.__exit__.assert_called_once()

    @patch("onyx.db.dal.get_session_with_tenant")
    def test_session_closed_on_exception(self, mock_get_session: MagicMock) -> None:
        mock_session = MagicMock()
        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)

        with pytest.raises(ValueError):
            with DAL.from_tenant("tenant_abc"):
                raise ValueError("something broke")

        mock_get_session.return_value.__exit__.assert_called_once()

    @patch("onyx.db.dal.get_session_with_tenant")
    def test_subclass_from_tenant_returns_subclass_instance(
        self, mock_get_session: MagicMock
    ) -> None:
        """from_tenant uses cls(), so subclasses should get their own type back."""
        mock_session = MagicMock()
        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)

        class MyDAL(DAL):
            pass

        with MyDAL.from_tenant("tenant_abc") as dal:
            assert isinstance(dal, MyDAL)

    @patch("onyx.db.dal.get_session_with_tenant")
    def test_uncommitted_changes_not_auto_committed(
        self, mock_get_session: MagicMock
    ) -> None:
        """Exiting the context manager should NOT auto-commit."""
        mock_session = MagicMock()
        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)

        with DAL.from_tenant("tenant_abc"):
            pass

        mock_session.commit.assert_not_called()


================================================
FILE: backend/tests/unit/onyx/db/test_delete_user.py
================================================
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import UUID
from uuid import uuid4

from onyx.db.models import DocumentSet
from onyx.db.models import DocumentSet__User
from onyx.db.models import Persona
from onyx.db.models import Persona__User
from onyx.db.models import SamlAccount
from onyx.db.models import User__UserGroup
from onyx.db.users import delete_user_from_db


def _mock_user(
    user_id: UUID | None = None, email: str = "test@example.com"
) -> MagicMock:
    user = MagicMock()
    user.id = user_id or uuid4()
    user.email = email
    user.oauth_accounts = []
    return user


def _make_query_chain() -> MagicMock:
    """Returns a mock that supports .filter(...).delete() and .filter(...).update(...)"""
    chain = MagicMock()
    chain.filter.return_value = chain
    return chain


@patch("onyx.db.users.remove_user_from_invited_users")
@patch(
    "onyx.db.users.fetch_ee_implementation_or_noop",
    return_value=lambda **_kwargs: None,
)
def test_delete_user_nulls_out_document_set_ownership(
    _mock_ee: Any, _mock_remove_invited: Any
) -> None:
    user = _mock_user()
    db_session = MagicMock()

    query_chains: dict[type, MagicMock] = {}

    def query_side_effect(model: type) -> MagicMock:
        if model not in query_chains:
            query_chains[model] = _make_query_chain()
        return query_chains[model]

    db_session.query.side_effect = query_side_effect

    delete_user_from_db(user, db_session)

    # Verify DocumentSet.user_id is nulled out (update, not delete)
    doc_set_chain = query_chains[DocumentSet]
    doc_set_chain.filter.assert_called()
    doc_set_chain.filter.return_value.update.assert_called_once_with(
        {DocumentSet.user_id: None}
    )

    # Verify Persona.user_id is nulled out (update, not delete)
    persona_chain = query_chains[Persona]
    persona_chain.filter.assert_called()
    persona_chain.filter.return_value.update.assert_called_once_with(
        {Persona.user_id: None}
    )


@patch("onyx.db.users.remove_user_from_invited_users")
@patch(
    "onyx.db.users.fetch_ee_implementation_or_noop",
    return_value=lambda **_kwargs: None,
)
def test_delete_user_cleans_up_join_tables(
    _mock_ee: Any, _mock_remove_invited: Any
) -> None:
    user = _mock_user()
    db_session = MagicMock()

    query_chains: dict[type, MagicMock] = {}

    def query_side_effect(model: type) -> MagicMock:
        if model not in query_chains:
            query_chains[model] = _make_query_chain()
        return query_chains[model]

    db_session.query.side_effect = query_side_effect

    delete_user_from_db(user, db_session)

    # Join tables should be deleted (not updated)
    for model in [DocumentSet__User, Persona__User, User__UserGroup, SamlAccount]:
        chain = query_chains[model]
        chain.filter.return_value.delete.assert_called_once()


@patch("onyx.db.users.remove_user_from_invited_users")
@patch(
    "onyx.db.users.fetch_ee_implementation_or_noop",
    return_value=lambda **_kwargs: None,
)
def test_delete_user_commits_and_removes_invited(
    _mock_ee: Any, mock_remove_invited: Any
) -> None:
    user = _mock_user(email="deleted@example.com")
    db_session = MagicMock()
    db_session.query.return_value = _make_query_chain()

    delete_user_from_db(user, db_session)

    db_session.delete.assert_called_once_with(user)
    db_session.commit.assert_called_once()
    mock_remove_invited.assert_called_once_with("deleted@example.com")


@patch("onyx.db.users.remove_user_from_invited_users")
@patch(
    "onyx.db.users.fetch_ee_implementation_or_noop",
    return_value=lambda **_kwargs: None,
)
def test_delete_user_deletes_oauth_accounts(
    _mock_ee: Any, _mock_remove_invited: Any
) -> None:
    user = _mock_user()
    oauth1 = MagicMock()
    oauth2 = MagicMock()
    user.oauth_accounts = [oauth1, oauth2]
    db_session = MagicMock()
    db_session.query.return_value = _make_query_chain()

    delete_user_from_db(user, db_session)

    db_session.delete.assert_any_call(oauth1)
    db_session.delete.assert_any_call(oauth2)


================================================
FILE: backend/tests/unit/onyx/db/test_llm_sync.py
================================================
"""Tests for LLM provider model sync functionality."""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.db.llm import sync_model_configurations
from onyx.llm.constants import LlmProviderNames
from onyx.server.manage.llm.models import SyncModelEntry


class TestSyncModelConfigurations:
    """Tests for sync_model_configurations function."""

    def test_inserts_new_models(self) -> None:
        """Test that new models are inserted."""
        # Mock the provider with no existing models
        mock_provider = MagicMock()
        mock_provider.id = 1
        mock_provider.model_configurations = []

        mock_session = MagicMock()

        with patch(
            "onyx.db.llm.fetch_existing_llm_provider", return_value=mock_provider
        ):
            models = [
                SyncModelEntry(
                    name="gpt-4",
                    display_name="GPT-4",
                    max_input_tokens=128000,
                    supports_image_input=True,
                ),
                SyncModelEntry(
                    name="gpt-4o",
                    display_name="GPT-4o",
                    max_input_tokens=128000,
                    supports_image_input=True,
                ),
            ]

            result = sync_model_configurations(
                db_session=mock_session,
                provider_name=LlmProviderNames.OPENAI,
                models=models,
            )

            assert result == 2  # Two new models
            assert (
                mock_session.execute.call_count == 2 * 3
            )  # 2 models * (model insert + chat insert + vision insert)
            mock_session.commit.assert_called_once()

    def test_skips_existing_models(self) -> None:
        """Test that existing models are not overwritten."""
        # Mock existing model
        mock_existing_model = MagicMock()
        mock_existing_model.name = "gpt-4"

        mock_provider = MagicMock()
        mock_provider.id = 1
        mock_provider.model_configurations = [mock_existing_model]

        mock_session = MagicMock()

        with patch(
            "onyx.db.llm.fetch_existing_llm_provider", return_value=mock_provider
        ):
            models = [
                SyncModelEntry(
                    name="gpt-4",  # Existing - should be skipped
                    display_name="GPT-4",
                    max_input_tokens=128000,
                    supports_image_input=True,
                ),
                SyncModelEntry(
                    name="gpt-4o",  # New - should be inserted
                    display_name="GPT-4o",
                    max_input_tokens=128000,
                    supports_image_input=True,
                ),
            ]

            result = sync_model_configurations(
                db_session=mock_session,
                provider_name=LlmProviderNames.OPENAI,
                models=models,
            )

            assert result == 1  # Only one new model
            assert mock_session.execute.call_count == 3

    def test_no_commit_when_no_new_models(self) -> None:
        """Test that commit is not called when no new models."""
        mock_existing_model = MagicMock()
        mock_existing_model.name = "gpt-4"

        mock_provider = MagicMock()
        mock_provider.id = 1
        mock_provider.model_configurations = [mock_existing_model]

        mock_session = MagicMock()

        with patch(
            "onyx.db.llm.fetch_existing_llm_provider", return_value=mock_provider
        ):
            models = [
                SyncModelEntry(
                    name="gpt-4",  # Already exists
                    display_name="GPT-4",
                    max_input_tokens=128000,
                    supports_image_input=True,
                ),
            ]

            result = sync_model_configurations(
                db_session=mock_session,
                provider_name=LlmProviderNames.OPENAI,
                models=models,
            )

            assert result == 0
            mock_session.commit.assert_not_called()

    def test_raises_on_missing_provider(self) -> None:
        """Test that ValueError is raised when provider not found."""
        mock_session = MagicMock()

        with patch("onyx.db.llm.fetch_existing_llm_provider", return_value=None):
            with pytest.raises(ValueError, match="not found"):
                sync_model_configurations(
                    db_session=mock_session,
                    provider_name="nonexistent",
                    models=[SyncModelEntry(name="model", display_name="Model")],
                )

    def test_handles_missing_optional_fields(self) -> None:
        """Test that optional fields default correctly."""
        mock_provider = MagicMock()
        mock_provider.id = 1
        mock_provider.model_configurations = []

        mock_session = MagicMock()

        with patch(
            "onyx.db.llm.fetch_existing_llm_provider", return_value=mock_provider
        ):
            # Model with only required fields (max_input_tokens and supports_image_input default)
            models = [
                SyncModelEntry(
                    name="model-1",
                    display_name="Model 1",
                ),
            ]

            result = sync_model_configurations(
                db_session=mock_session,
                provider_name="custom",
                models=models,
            )

            assert result == 1
            # Verify execute was called with correct defaults
            call_args = mock_session.execute.call_args
            assert call_args is not None


================================================
FILE: backend/tests/unit/onyx/db/test_persona_display_priority.py
================================================
from types import SimpleNamespace
from unittest.mock import MagicMock

import pytest

from onyx.db.persona import update_personas_display_priority


def _persona(persona_id: int, display_priority: int) -> SimpleNamespace:
    return SimpleNamespace(id=persona_id, display_priority=display_priority)


def test_update_display_priority_updates_subset(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    # Precondition
    persona_a = _persona(1, 5)
    persona_b = _persona(2, 6)
    db_session = MagicMock()
    user = MagicMock()
    monkeypatch.setattr(
        "onyx.db.persona.get_raw_personas_for_user",
        lambda user, db_session, **kwargs: [persona_a, persona_b],  # noqa: ARG005
    )

    # Under test
    update_personas_display_priority(
        {persona_a.id: 0}, db_session, user, commit_db_txn=True
    )

    # Postcondition
    assert persona_a.display_priority == 0
    assert persona_b.display_priority == 6
    db_session.commit.assert_called_once_with()


def test_update_display_priority_invalid_ids(monkeypatch: pytest.MonkeyPatch) -> None:
    # Precondition
    persona_a = _persona(1, 5)
    db_session = MagicMock()
    user = MagicMock()
    monkeypatch.setattr(
        "onyx.db.persona.get_raw_personas_for_user",
        lambda user, db_session, **kwargs: [persona_a],  # noqa: ARG005
    )

    # Under test
    with pytest.raises(ValueError):
        update_personas_display_priority(
            {persona_a.id: 0, 99: 1},
            db_session,
            user,
            commit_db_txn=True,
        )

    # Postcondition
    db_session.commit.assert_not_called()


================================================
FILE: backend/tests/unit/onyx/db/test_projects_upload_task_expiry.py
================================================
"""
Unit test verifying that the upload API path sends tasks with expires=.

The upload_files_to_user_files_with_indexing function must include expires=
on every send_task call to prevent phantom task accumulation if the worker
is down or slow.
"""

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.models import UserFile
from onyx.db.projects import upload_files_to_user_files_with_indexing


def _make_mock_user_file() -> MagicMock:
    uf = MagicMock(spec=UserFile)
    uf.id = str(uuid4())
    return uf


@patch("onyx.db.projects.get_current_tenant_id", return_value="test_tenant")
@patch("onyx.db.projects.create_user_files")
@patch(
    "onyx.background.celery.versioned_apps.client.app",
    new_callable=MagicMock,
)
def test_send_task_includes_expires(
    mock_client_app: MagicMock,
    mock_create: MagicMock,
    mock_tenant: MagicMock,  # noqa: ARG001
) -> None:
    """Every send_task call from the upload path must include expires=."""
    user_files = [_make_mock_user_file(), _make_mock_user_file()]
    mock_create.return_value = MagicMock(
        user_files=user_files,
        rejected_files=[],
        id_to_temp_id={},
        skip_indexing_filenames=set(),
        indexable_files=user_files,
    )

    mock_user = MagicMock()
    mock_db_session = MagicMock()

    upload_files_to_user_files_with_indexing(
        files=[],
        project_id=None,
        user=mock_user,
        temp_id_map=None,
        db_session=mock_db_session,
    )

    assert mock_client_app.send_task.call_count == len(user_files)

    for call in mock_client_app.send_task.call_args_list:
        assert call.args[0] == OnyxCeleryTask.PROCESS_SINGLE_USER_FILE
        assert call.kwargs.get("queue") == OnyxCeleryQueues.USER_FILE_PROCESSING
        assert (
            call.kwargs.get("expires") == CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
        ), "send_task must include expires= to prevent phantom task accumulation"


================================================
FILE: backend/tests/unit/onyx/db/test_scim_dal.py
================================================
import logging
from unittest.mock import MagicMock
from uuid import uuid4

import pytest

from ee.onyx.db.scim import ScimDAL
from onyx.db.models import ScimGroupMapping
from onyx.db.models import ScimToken
from onyx.db.models import ScimUserMapping
from tests.unit.onyx.db.conftest import model_attrs


class TestScimDALTokens:
    """Tests for ScimDAL token operations."""

    def test_create_token_adds_to_session(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        user_id = uuid4()

        scim_dal.create_token(
            name="test",
            hashed_token="abc123",
            token_display="****abcd",
            created_by_id=user_id,
        )

        mock_db_session.add.assert_called_once()
        mock_db_session.flush.assert_called_once()
        added_obj = mock_db_session.add.call_args[0][0]
        assert model_attrs(added_obj) == {
            "name": "test",
            "hashed_token": "abc123",
            "token_display": "****abcd",
            "created_by_id": user_id,
        }

    def test_get_token_by_hash_queries_session(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        token = ScimToken(
            id=1,
            name="test-token",
            hashed_token="a" * 64,
            token_display="onyx_scim_****abcd",
            is_active=True,
            created_by_id=uuid4(),
        )
        mock_db_session.scalar.return_value = token

        result = scim_dal.get_token_by_hash("a" * 64)

        assert result is token
        mock_db_session.scalar.assert_called_once()

    def test_revoke_token_sets_inactive(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        token = ScimToken(
            id=1,
            name="test-token",
            hashed_token="a" * 64,
            token_display="onyx_scim_****abcd",
            is_active=True,
            created_by_id=uuid4(),
        )
        mock_db_session.get.return_value = token
        expected = model_attrs(token) | {"is_active": False}

        scim_dal.revoke_token(1)

        assert model_attrs(token) == expected

    def test_revoke_nonexistent_token_raises(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.get.return_value = None

        with pytest.raises(ValueError, match="not found"):
            scim_dal.revoke_token(999)


class TestScimDALUserMappings:
    """Tests for ScimDAL user mapping operations."""

    def test_create_user_mapping(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        user_id = uuid4()

        scim_dal.create_user_mapping(external_id="ext-1", user_id=user_id)

        mock_db_session.add.assert_called_once()
        mock_db_session.flush.assert_called_once()
        added_obj = mock_db_session.add.call_args[0][0]
        assert model_attrs(added_obj) == {
            "external_id": "ext-1",
            "user_id": user_id,
            "scim_username": None,
            "department": None,
            "manager": None,
            "given_name": None,
            "family_name": None,
            "scim_emails_json": None,
        }

    def test_delete_user_mapping(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        mapping = ScimUserMapping(id=1, external_id="ext-1", user_id=uuid4())
        mock_db_session.get.return_value = mapping

        scim_dal.delete_user_mapping(1)

        mock_db_session.delete.assert_called_once_with(mapping)

    def test_delete_nonexistent_user_mapping_is_idempotent(
        self,
        scim_dal: ScimDAL,
        mock_db_session: MagicMock,
        caplog: pytest.LogCaptureFixture,
    ) -> None:
        mock_db_session.get.return_value = None

        with caplog.at_level(logging.WARNING):
            scim_dal.delete_user_mapping(999)

        mock_db_session.delete.assert_not_called()
        assert "SCIM user mapping 999 not found" in caplog.text

    def test_update_user_mapping_external_id(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        mapping = ScimUserMapping(id=1, external_id="old-id", user_id=uuid4())
        mock_db_session.get.return_value = mapping
        expected = model_attrs(mapping) | {"external_id": "new-id"}

        result = scim_dal.update_user_mapping_external_id(1, "new-id")

        assert result is mapping
        assert model_attrs(result) == expected

    def test_update_nonexistent_user_mapping_raises(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.get.return_value = None

        with pytest.raises(ValueError, match="not found"):
            scim_dal.update_user_mapping_external_id(999, "new-id")


class TestScimDALGroupMappings:
    """Tests for ScimDAL group mapping operations."""

    def test_create_group_mapping(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        scim_dal.create_group_mapping(external_id="ext-g1", user_group_id=5)

        mock_db_session.add.assert_called_once()
        mock_db_session.flush.assert_called_once()
        added_obj = mock_db_session.add.call_args[0][0]
        assert model_attrs(added_obj) == {
            "external_id": "ext-g1",
            "user_group_id": 5,
        }

    def test_delete_group_mapping(
        self, scim_dal: ScimDAL, mock_db_session: MagicMock
    ) -> None:
        mapping = ScimGroupMapping(id=1, external_id="ext-g1", user_group_id=10)
        mock_db_session.get.return_value = mapping

        scim_dal.delete_group_mapping(1)

        mock_db_session.delete.assert_called_once_with(mapping)

    def test_delete_nonexistent_group_mapping_is_idempotent(
        self,
        scim_dal: ScimDAL,
        mock_db_session: MagicMock,
        caplog: pytest.LogCaptureFixture,
    ) -> None:
        mock_db_session.get.return_value = None

        with caplog.at_level(logging.WARNING):
            scim_dal.delete_group_mapping(999)

        mock_db_session.delete.assert_not_called()
        assert "SCIM group mapping 999 not found" in caplog.text


================================================
FILE: backend/tests/unit/onyx/db/test_tools.py
================================================
from unittest.mock import MagicMock
from uuid import uuid4

from onyx.db import tools as tools_mod


def test_create_tool_call_no_commit_sanitizes_fields() -> None:
    mock_session = MagicMock()

    tool_call = tools_mod.create_tool_call_no_commit(
        chat_session_id=uuid4(),
        parent_chat_message_id=1,
        turn_number=0,
        tool_id=1,
        tool_call_id="tc-1",
        tool_call_arguments={"task\x00": "research\ud800 topic"},
        tool_call_response="report\x00 text\udfff here",
        tool_call_tokens=10,
        db_session=mock_session,
        reasoning_tokens="reason\x00ing\ud800",
        generated_images=[{"url": "img\x00.png\udfff"}],
    )

    assert tool_call.tool_call_response == "report text here"
    assert tool_call.reasoning_tokens == "reasoning"
    assert tool_call.tool_call_arguments == {"task": "research topic"}
    assert tool_call.generated_images == [{"url": "img.png"}]


================================================
FILE: backend/tests/unit/onyx/db/test_usage.py
================================================
"""Unit tests for tenant usage tracking and limits."""

from datetime import datetime
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.db.usage import check_usage_limit
from onyx.db.usage import get_current_window_start
from onyx.db.usage import get_or_create_tenant_usage
from onyx.db.usage import get_tenant_usage_stats
from onyx.db.usage import increment_usage
from onyx.db.usage import TenantUsageStats
from onyx.db.usage import UsageLimitExceededError
from onyx.db.usage import UsageType


class TestGetCurrentWindowStart:
    """Tests for get_current_window_start function."""

    def test_weekly_window_aligns_to_monday(self) -> None:
        """Test that weekly windows align to Monday 00:00 UTC."""
        with patch("onyx.db.usage.USAGE_LIMIT_WINDOW_SECONDS", 604800):  # 1 week
            window_start = get_current_window_start()

            # Window should be on a Monday
            assert window_start.weekday() == 0  # Monday

            # Window should be at midnight UTC
            assert window_start.hour == 0
            assert window_start.minute == 0
            assert window_start.second == 0
            assert window_start.microsecond == 0

    def test_window_start_is_timezone_aware(self) -> None:
        """Test that window start is timezone-aware."""
        window_start = get_current_window_start()
        assert window_start.tzinfo is not None


class TestGetOrCreateTenantUsage:
    """Tests for get_or_create_tenant_usage function."""

    def test_creates_or_gets_usage_record(self) -> None:
        """Test that get_or_create returns a usage record via atomic upsert."""
        mock_usage = MagicMock()
        mock_usage.llm_cost_cents = 0.0
        mock_usage.chunks_indexed = 0

        mock_session = MagicMock()
        # The new implementation uses INSERT ... ON CONFLICT with RETURNING
        # which calls execute().scalar_one()
        mock_session.execute.return_value.scalar_one.return_value = mock_usage

        window_start = datetime(2024, 1, 1, tzinfo=timezone.utc)
        usage = get_or_create_tenant_usage(mock_session, window_start)

        # Verify execute was called (with the INSERT ... ON CONFLICT statement)
        mock_session.execute.assert_called_once()
        mock_session.flush.assert_called_once()
        assert usage == mock_usage

    def test_returns_usage_record_from_atomic_upsert(self) -> None:
        """Test that the returned usage record comes from the atomic upsert."""
        mock_usage = MagicMock()
        mock_usage.llm_cost_cents = 100.0
        mock_usage.chunks_indexed = 500

        mock_session = MagicMock()
        mock_session.execute.return_value.scalar_one.return_value = mock_usage

        window_start = datetime(2024, 1, 1, tzinfo=timezone.utc)
        usage = get_or_create_tenant_usage(mock_session, window_start)

        assert usage == mock_usage
        assert usage.llm_cost_cents == 100.0
        assert usage.chunks_indexed == 500


class TestGetTenantUsageStats:
    """Tests for get_tenant_usage_stats function."""

    def test_returns_zero_stats_when_no_record_exists(self) -> None:
        """Test that zero stats are returned when no usage record exists."""
        mock_session = MagicMock()
        mock_session.execute.return_value.scalar_one_or_none.return_value = None

        window_start = datetime(2024, 1, 1, tzinfo=timezone.utc)
        stats = get_tenant_usage_stats(mock_session, window_start)

        assert stats.llm_cost_cents == 0.0
        assert stats.chunks_indexed == 0
        assert stats.api_calls == 0
        assert stats.non_streaming_api_calls == 0

    def test_returns_actual_stats_when_record_exists(self) -> None:
        """Test that actual stats are returned when usage record exists."""
        mock_usage = MagicMock()
        mock_usage.window_start = datetime(2024, 1, 1, tzinfo=timezone.utc)
        mock_usage.llm_cost_cents = 250.5
        mock_usage.chunks_indexed = 1000
        mock_usage.api_calls = 50
        mock_usage.non_streaming_api_calls = 10

        mock_session = MagicMock()
        mock_session.execute.return_value.scalar_one_or_none.return_value = mock_usage

        stats = get_tenant_usage_stats(mock_session)

        assert stats.llm_cost_cents == 250.5
        assert stats.chunks_indexed == 1000
        assert stats.api_calls == 50
        assert stats.non_streaming_api_calls == 10


class TestIncrementUsage:
    """Tests for increment_usage function."""

    def test_increments_llm_cost(self) -> None:
        """Test that LLM cost is incremented correctly."""
        mock_usage = MagicMock()
        mock_usage.llm_cost_cents = 100.0

        mock_session = MagicMock()

        with patch("onyx.db.usage.get_or_create_tenant_usage", return_value=mock_usage):
            increment_usage(mock_session, UsageType.LLM_COST, 50.5)

        assert mock_usage.llm_cost_cents == 150.5
        mock_session.flush.assert_called_once()

    def test_increments_chunks_indexed(self) -> None:
        """Test that chunks indexed is incremented correctly."""
        mock_usage = MagicMock()
        mock_usage.chunks_indexed = 500

        mock_session = MagicMock()

        with patch("onyx.db.usage.get_or_create_tenant_usage", return_value=mock_usage):
            increment_usage(mock_session, UsageType.CHUNKS_INDEXED, 100)

        assert mock_usage.chunks_indexed == 600

    def test_increments_api_calls(self) -> None:
        """Test that API calls is incremented correctly."""
        mock_usage = MagicMock()
        mock_usage.api_calls = 10

        mock_session = MagicMock()

        with patch("onyx.db.usage.get_or_create_tenant_usage", return_value=mock_usage):
            increment_usage(mock_session, UsageType.API_CALLS, 1)

        assert mock_usage.api_calls == 11

    def test_increments_non_streaming_calls(self) -> None:
        """Test that non-streaming API calls is incremented correctly."""
        mock_usage = MagicMock()
        mock_usage.non_streaming_api_calls = 5

        mock_session = MagicMock()

        with patch("onyx.db.usage.get_or_create_tenant_usage", return_value=mock_usage):
            increment_usage(mock_session, UsageType.NON_STREAMING_API_CALLS, 1)

        assert mock_usage.non_streaming_api_calls == 6


class TestCheckUsageLimit:
    """Tests for check_usage_limit function."""

    def test_passes_when_under_limit(self) -> None:
        """Test that check passes when usage is under the limit."""
        mock_session = MagicMock()

        mock_stats = TenantUsageStats(
            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),
            llm_cost_cents=100.0,
            chunks_indexed=500,
            api_calls=10,
            non_streaming_api_calls=5,
        )

        with patch("onyx.db.usage.get_tenant_usage_stats", return_value=mock_stats):
            # Should not raise
            check_usage_limit(
                mock_session,
                UsageType.LLM_COST,
                limit=500,
                pending_amount=0,
            )

    def test_passes_when_exactly_at_limit(self) -> None:
        """Test that check passes when usage is exactly at the limit."""
        mock_session = MagicMock()

        mock_stats = TenantUsageStats(
            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),
            llm_cost_cents=500.0,
            chunks_indexed=500,
            api_calls=10,
            non_streaming_api_calls=5,
        )

        with patch("onyx.db.usage.get_tenant_usage_stats", return_value=mock_stats):
            # Should not raise - at limit but not over
            check_usage_limit(
                mock_session,
                UsageType.LLM_COST,
                limit=500,
                pending_amount=0,
            )

    def test_fails_when_over_limit(self) -> None:
        """Test that check fails when usage exceeds the limit."""
        mock_session = MagicMock()

        mock_stats = TenantUsageStats(
            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),
            llm_cost_cents=501.0,
            chunks_indexed=500,
            api_calls=10,
            non_streaming_api_calls=5,
        )

        with patch("onyx.db.usage.get_tenant_usage_stats", return_value=mock_stats):
            with pytest.raises(UsageLimitExceededError) as exc_info:
                check_usage_limit(
                    mock_session,
                    UsageType.LLM_COST,
                    limit=500,
                    pending_amount=0,
                )

            assert exc_info.value.usage_type == UsageType.LLM_COST
            assert exc_info.value.current == 501.0
            assert exc_info.value.limit == 500.0

    def test_fails_when_pending_would_exceed_limit(self) -> None:
        """Test that check fails when pending amount would exceed the limit."""
        mock_session = MagicMock()

        mock_stats = TenantUsageStats(
            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),
            llm_cost_cents=400.0,
            chunks_indexed=500,
            api_calls=10,
            non_streaming_api_calls=5,
        )

        with patch("onyx.db.usage.get_tenant_usage_stats", return_value=mock_stats):
            with pytest.raises(UsageLimitExceededError) as exc_info:
                check_usage_limit(
                    mock_session,
                    UsageType.LLM_COST,
                    limit=500,
                    pending_amount=150,  # 400 + 150 = 550 > 500
                )

            assert exc_info.value.current == 550.0  # includes pending

    def test_checks_chunks_indexed_limit(self) -> None:
        """Test that chunk indexing limit is checked correctly."""
        mock_session = MagicMock()

        mock_stats = TenantUsageStats(
            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),
            llm_cost_cents=100.0,
            chunks_indexed=10001,
            api_calls=10,
            non_streaming_api_calls=5,
        )

        with patch("onyx.db.usage.get_tenant_usage_stats", return_value=mock_stats):
            with pytest.raises(UsageLimitExceededError) as exc_info:
                check_usage_limit(
                    mock_session,
                    UsageType.CHUNKS_INDEXED,
                    limit=10000,
                    pending_amount=0,
                )

            assert exc_info.value.usage_type == UsageType.CHUNKS_INDEXED


class TestUsageLimitExceededError:
    """Tests for UsageLimitExceededError exception."""

    def test_error_message_format(self) -> None:
        """Test that error message is formatted correctly."""
        error = UsageLimitExceededError(
            usage_type=UsageType.LLM_COST,
            current=150.5,
            limit=100.0,
        )

        assert "llm_cost_cents" in str(error)
        assert "150.5" in str(error)
        assert "100" in str(error)

    def test_stores_values(self) -> None:
        """Test that error stores all values correctly."""
        error = UsageLimitExceededError(
            usage_type=UsageType.API_CALLS,
            current=1001,
            limit=1000,
        )

        assert error.usage_type == UsageType.API_CALLS
        assert error.current == 1001
        assert error.limit == 1000


class TestWindowRollover:
    """Tests for window rollover behavior."""

    def test_new_window_resets_usage(self) -> None:
        """Test that a new window has zero usage even if previous window had usage."""
        mock_session = MagicMock()
        mock_session.execute.return_value.scalar_one_or_none.return_value = None

        # Get stats for a new window (no existing record)
        with patch(
            "onyx.db.usage.get_current_window_start",
            return_value=datetime(2024, 1, 8, tzinfo=timezone.utc),
        ):
            stats = get_tenant_usage_stats(mock_session)

        # New window should have zero usage
        assert stats.llm_cost_cents == 0.0
        assert stats.chunks_indexed == 0
        assert stats.api_calls == 0
        assert stats.non_streaming_api_calls == 0


================================================
FILE: backend/tests/unit/onyx/db/test_voice.py
================================================
"""Unit tests for onyx.db.voice module."""

from unittest.mock import MagicMock
from uuid import uuid4

import pytest

from onyx.db.models import VoiceProvider
from onyx.db.voice import deactivate_stt_provider
from onyx.db.voice import deactivate_tts_provider
from onyx.db.voice import delete_voice_provider
from onyx.db.voice import fetch_default_stt_provider
from onyx.db.voice import fetch_default_tts_provider
from onyx.db.voice import fetch_voice_provider_by_id
from onyx.db.voice import fetch_voice_provider_by_type
from onyx.db.voice import fetch_voice_providers
from onyx.db.voice import MAX_VOICE_PLAYBACK_SPEED
from onyx.db.voice import MIN_VOICE_PLAYBACK_SPEED
from onyx.db.voice import set_default_stt_provider
from onyx.db.voice import set_default_tts_provider
from onyx.db.voice import update_user_voice_settings
from onyx.db.voice import upsert_voice_provider
from onyx.error_handling.exceptions import OnyxError


def _make_voice_provider(
    id: int = 1,
    name: str = "Test Provider",
    provider_type: str = "openai",
    is_default_stt: bool = False,
    is_default_tts: bool = False,
) -> VoiceProvider:
    """Create a VoiceProvider instance for testing."""
    provider = VoiceProvider()
    provider.id = id
    provider.name = name
    provider.provider_type = provider_type
    provider.is_default_stt = is_default_stt
    provider.is_default_tts = is_default_tts
    provider.api_key = None
    provider.api_base = None
    provider.custom_config = None
    provider.stt_model = None
    provider.tts_model = None
    provider.default_voice = None
    return provider


class TestFetchVoiceProviders:
    """Tests for fetch_voice_providers."""

    def test_returns_all_providers(self, mock_db_session: MagicMock) -> None:
        providers = [
            _make_voice_provider(id=1, name="Provider A"),
            _make_voice_provider(id=2, name="Provider B"),
        ]
        mock_db_session.scalars.return_value.all.return_value = providers

        result = fetch_voice_providers(mock_db_session)

        assert result == providers
        mock_db_session.scalars.assert_called_once()

    def test_returns_empty_list_when_no_providers(
        self, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.scalars.return_value.all.return_value = []

        result = fetch_voice_providers(mock_db_session)

        assert result == []


class TestFetchVoiceProviderById:
    """Tests for fetch_voice_provider_by_id."""

    def test_returns_provider_when_found(self, mock_db_session: MagicMock) -> None:
        provider = _make_voice_provider(id=1)
        mock_db_session.scalar.return_value = provider

        result = fetch_voice_provider_by_id(mock_db_session, 1)

        assert result is provider
        mock_db_session.scalar.assert_called_once()

    def test_returns_none_when_not_found(self, mock_db_session: MagicMock) -> None:
        mock_db_session.scalar.return_value = None

        result = fetch_voice_provider_by_id(mock_db_session, 999)

        assert result is None


class TestFetchDefaultProviders:
    """Tests for fetch_default_stt_provider and fetch_default_tts_provider."""

    def test_fetch_default_stt_provider_returns_provider(
        self, mock_db_session: MagicMock
    ) -> None:
        provider = _make_voice_provider(id=1, is_default_stt=True)
        mock_db_session.scalar.return_value = provider

        result = fetch_default_stt_provider(mock_db_session)

        assert result is provider

    def test_fetch_default_stt_provider_returns_none_when_no_default(
        self, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.scalar.return_value = None

        result = fetch_default_stt_provider(mock_db_session)

        assert result is None

    def test_fetch_default_tts_provider_returns_provider(
        self, mock_db_session: MagicMock
    ) -> None:
        provider = _make_voice_provider(id=1, is_default_tts=True)
        mock_db_session.scalar.return_value = provider

        result = fetch_default_tts_provider(mock_db_session)

        assert result is provider

    def test_fetch_default_tts_provider_returns_none_when_no_default(
        self, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.scalar.return_value = None

        result = fetch_default_tts_provider(mock_db_session)

        assert result is None


class TestFetchVoiceProviderByType:
    """Tests for fetch_voice_provider_by_type."""

    def test_returns_provider_when_found(self, mock_db_session: MagicMock) -> None:
        provider = _make_voice_provider(id=1, provider_type="openai")
        mock_db_session.scalar.return_value = provider

        result = fetch_voice_provider_by_type(mock_db_session, "openai")

        assert result is provider

    def test_returns_none_when_not_found(self, mock_db_session: MagicMock) -> None:
        mock_db_session.scalar.return_value = None

        result = fetch_voice_provider_by_type(mock_db_session, "nonexistent")

        assert result is None


class TestUpsertVoiceProvider:
    """Tests for upsert_voice_provider."""

    def test_creates_new_provider_when_no_id(self, mock_db_session: MagicMock) -> None:
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None

        upsert_voice_provider(
            db_session=mock_db_session,
            provider_id=None,
            name="New Provider",
            provider_type="openai",
            api_key="test-key",
            api_key_changed=True,
        )

        mock_db_session.add.assert_called_once()
        mock_db_session.flush.assert_called()
        added_obj = mock_db_session.add.call_args[0][0]
        assert added_obj.name == "New Provider"
        assert added_obj.provider_type == "openai"

    def test_updates_existing_provider(self, mock_db_session: MagicMock) -> None:
        existing_provider = _make_voice_provider(id=1, name="Old Name")
        mock_db_session.scalar.return_value = existing_provider
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None

        upsert_voice_provider(
            db_session=mock_db_session,
            provider_id=1,
            name="Updated Name",
            provider_type="elevenlabs",
            api_key="new-key",
            api_key_changed=True,
        )

        mock_db_session.add.assert_not_called()
        assert existing_provider.name == "Updated Name"
        assert existing_provider.provider_type == "elevenlabs"

    def test_raises_when_provider_not_found(self, mock_db_session: MagicMock) -> None:
        mock_db_session.scalar.return_value = None

        with pytest.raises(OnyxError) as exc_info:
            upsert_voice_provider(
                db_session=mock_db_session,
                provider_id=999,
                name="Test",
                provider_type="openai",
                api_key=None,
                api_key_changed=False,
            )

        assert "No voice provider with id 999" in str(exc_info.value)

    def test_does_not_update_api_key_when_not_changed(
        self, mock_db_session: MagicMock
    ) -> None:
        existing_provider = _make_voice_provider(id=1)
        existing_provider.api_key = "original-key"  # type: ignore[assignment]
        original_api_key = existing_provider.api_key
        mock_db_session.scalar.return_value = existing_provider
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None

        upsert_voice_provider(
            db_session=mock_db_session,
            provider_id=1,
            name="Test",
            provider_type="openai",
            api_key="new-key",
            api_key_changed=False,
        )

        # api_key should remain unchanged (same object reference)
        assert existing_provider.api_key is original_api_key

    def test_activates_stt_when_requested(self, mock_db_session: MagicMock) -> None:
        existing_provider = _make_voice_provider(id=1)
        mock_db_session.scalar.return_value = existing_provider
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None
        mock_db_session.execute.return_value = None

        upsert_voice_provider(
            db_session=mock_db_session,
            provider_id=1,
            name="Test",
            provider_type="openai",
            api_key=None,
            api_key_changed=False,
            activate_stt=True,
        )

        assert existing_provider.is_default_stt is True

    def test_activates_tts_when_requested(self, mock_db_session: MagicMock) -> None:
        existing_provider = _make_voice_provider(id=1)
        mock_db_session.scalar.return_value = existing_provider
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None
        mock_db_session.execute.return_value = None

        upsert_voice_provider(
            db_session=mock_db_session,
            provider_id=1,
            name="Test",
            provider_type="openai",
            api_key=None,
            api_key_changed=False,
            activate_tts=True,
        )

        assert existing_provider.is_default_tts is True


class TestDeleteVoiceProvider:
    """Tests for delete_voice_provider."""

    def test_hard_deletes_provider_when_found(self, mock_db_session: MagicMock) -> None:
        provider = _make_voice_provider(id=1)
        mock_db_session.scalar.return_value = provider

        delete_voice_provider(mock_db_session, 1)

        mock_db_session.delete.assert_called_once_with(provider)
        mock_db_session.flush.assert_called_once()

    def test_does_nothing_when_provider_not_found(
        self, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.scalar.return_value = None

        delete_voice_provider(mock_db_session, 999)

        mock_db_session.flush.assert_not_called()


class TestSetDefaultProviders:
    """Tests for set_default_stt_provider and set_default_tts_provider."""

    def test_set_default_stt_provider_deactivates_others(
        self, mock_db_session: MagicMock
    ) -> None:
        provider = _make_voice_provider(id=1)
        mock_db_session.scalar.return_value = provider
        mock_db_session.execute.return_value = None
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None

        result = set_default_stt_provider(db_session=mock_db_session, provider_id=1)

        mock_db_session.execute.assert_called_once()
        assert result.is_default_stt is True

    def test_set_default_stt_provider_raises_when_not_found(
        self, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.scalar.return_value = None

        with pytest.raises(OnyxError) as exc_info:
            set_default_stt_provider(db_session=mock_db_session, provider_id=999)

        assert "No voice provider with id 999" in str(exc_info.value)

    def test_set_default_tts_provider_deactivates_others(
        self, mock_db_session: MagicMock
    ) -> None:
        provider = _make_voice_provider(id=1)
        mock_db_session.scalar.return_value = provider
        mock_db_session.execute.return_value = None
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None

        result = set_default_tts_provider(db_session=mock_db_session, provider_id=1)

        mock_db_session.execute.assert_called_once()
        assert result.is_default_tts is True

    def test_set_default_tts_provider_updates_model_when_provided(
        self, mock_db_session: MagicMock
    ) -> None:
        provider = _make_voice_provider(id=1)
        mock_db_session.scalar.return_value = provider
        mock_db_session.execute.return_value = None
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None

        result = set_default_tts_provider(
            db_session=mock_db_session, provider_id=1, tts_model="tts-1-hd"
        )

        assert result.tts_model == "tts-1-hd"

    def test_set_default_tts_provider_raises_when_not_found(
        self, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.scalar.return_value = None

        with pytest.raises(OnyxError) as exc_info:
            set_default_tts_provider(db_session=mock_db_session, provider_id=999)

        assert "No voice provider with id 999" in str(exc_info.value)


class TestDeactivateProviders:
    """Tests for deactivate_stt_provider and deactivate_tts_provider."""

    def test_deactivate_stt_provider_sets_false(
        self, mock_db_session: MagicMock
    ) -> None:
        provider = _make_voice_provider(id=1, is_default_stt=True)
        mock_db_session.scalar.return_value = provider
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None

        result = deactivate_stt_provider(db_session=mock_db_session, provider_id=1)

        assert result.is_default_stt is False

    def test_deactivate_stt_provider_raises_when_not_found(
        self, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.scalar.return_value = None

        with pytest.raises(OnyxError) as exc_info:
            deactivate_stt_provider(db_session=mock_db_session, provider_id=999)

        assert "No voice provider with id 999" in str(exc_info.value)

    def test_deactivate_tts_provider_sets_false(
        self, mock_db_session: MagicMock
    ) -> None:
        provider = _make_voice_provider(id=1, is_default_tts=True)
        mock_db_session.scalar.return_value = provider
        mock_db_session.flush.return_value = None
        mock_db_session.refresh.return_value = None

        result = deactivate_tts_provider(db_session=mock_db_session, provider_id=1)

        assert result.is_default_tts is False

    def test_deactivate_tts_provider_raises_when_not_found(
        self, mock_db_session: MagicMock
    ) -> None:
        mock_db_session.scalar.return_value = None

        with pytest.raises(OnyxError) as exc_info:
            deactivate_tts_provider(db_session=mock_db_session, provider_id=999)

        assert "No voice provider with id 999" in str(exc_info.value)


class TestUpdateUserVoiceSettings:
    """Tests for update_user_voice_settings."""

    def test_updates_auto_send(self, mock_db_session: MagicMock) -> None:
        user_id = uuid4()

        update_user_voice_settings(mock_db_session, user_id, auto_send=True)

        mock_db_session.execute.assert_called_once()
        mock_db_session.flush.assert_called_once()

    def test_updates_auto_playback(self, mock_db_session: MagicMock) -> None:
        user_id = uuid4()

        update_user_voice_settings(mock_db_session, user_id, auto_playback=True)

        mock_db_session.execute.assert_called_once()
        mock_db_session.flush.assert_called_once()

    def test_updates_playback_speed_within_range(
        self, mock_db_session: MagicMock
    ) -> None:
        user_id = uuid4()

        update_user_voice_settings(mock_db_session, user_id, playback_speed=1.5)

        mock_db_session.execute.assert_called_once()

    def test_clamps_playback_speed_to_min(self, mock_db_session: MagicMock) -> None:
        user_id = uuid4()

        update_user_voice_settings(mock_db_session, user_id, playback_speed=0.1)

        mock_db_session.execute.assert_called_once()
        stmt = mock_db_session.execute.call_args[0][0]
        compiled = stmt.compile(compile_kwargs={"literal_binds": True})
        assert str(MIN_VOICE_PLAYBACK_SPEED) in str(compiled)

    def test_clamps_playback_speed_to_max(self, mock_db_session: MagicMock) -> None:
        user_id = uuid4()

        update_user_voice_settings(mock_db_session, user_id, playback_speed=5.0)

        mock_db_session.execute.assert_called_once()
        stmt = mock_db_session.execute.call_args[0][0]
        compiled = stmt.compile(compile_kwargs={"literal_binds": True})
        assert str(MAX_VOICE_PLAYBACK_SPEED) in str(compiled)

    def test_updates_multiple_settings(self, mock_db_session: MagicMock) -> None:
        user_id = uuid4()

        update_user_voice_settings(
            mock_db_session,
            user_id,
            auto_send=True,
            auto_playback=False,
            playback_speed=1.25,
        )

        mock_db_session.execute.assert_called_once()
        mock_db_session.flush.assert_called_once()

    def test_does_nothing_when_no_settings_provided(
        self, mock_db_session: MagicMock
    ) -> None:
        user_id = uuid4()

        update_user_voice_settings(mock_db_session, user_id)

        mock_db_session.execute.assert_not_called()
        mock_db_session.flush.assert_not_called()


class TestSpeedClampingLogic:
    """Tests for the speed clamping constants and logic."""

    def test_min_speed_constant(self) -> None:
        assert MIN_VOICE_PLAYBACK_SPEED == 0.5

    def test_max_speed_constant(self) -> None:
        assert MAX_VOICE_PLAYBACK_SPEED == 2.0

    def test_clamping_formula(self) -> None:
        """Verify the clamping formula used in update_user_voice_settings."""
        test_cases = [
            (0.1, MIN_VOICE_PLAYBACK_SPEED),
            (0.5, 0.5),
            (1.0, 1.0),
            (1.5, 1.5),
            (2.0, 2.0),
            (3.0, MAX_VOICE_PLAYBACK_SPEED),
        ]
        for speed, expected in test_cases:
            clamped = max(
                MIN_VOICE_PLAYBACK_SPEED, min(MAX_VOICE_PLAYBACK_SPEED, speed)
            )
            assert (
                clamped == expected
            ), f"speed={speed} expected={expected} got={clamped}"


================================================
FILE: backend/tests/unit/onyx/document_index/opensearch/test_get_doc_chunk_id.py
================================================
import pytest

from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.string_filtering import (
    MAX_DOCUMENT_ID_ENCODED_LENGTH,
)
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE


SINGLE_TENANT_STATE = TenantState(
    tenant_id=POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE, multitenant=False
)
MULTI_TENANT_STATE = TenantState(
    tenant_id="tenant_abcdef12-3456-7890-abcd-ef1234567890", multitenant=True
)
EXPECTED_SHORT_TENANT = "abcdef12"


class TestGetOpensearchDocChunkIdSingleTenant:
    def test_basic(self) -> None:
        result = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, "my-doc-id", chunk_index=0
        )
        assert result == f"my-doc-id__{DEFAULT_MAX_CHUNK_SIZE}__0"

    def test_custom_chunk_size(self) -> None:
        result = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, "doc1", chunk_index=3, max_chunk_size=1024
        )
        assert result == "doc1__1024__3"

    def test_special_chars_are_stripped(self) -> None:
        """Tests characters not matching [A-Za-z0-9_.-~] are removed."""
        result = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, "doc/with?special#chars&more%stuff", chunk_index=0
        )
        assert "/" not in result
        assert "?" not in result
        assert "#" not in result
        assert result == f"docwithspecialcharsmorestuff__{DEFAULT_MAX_CHUNK_SIZE}__0"

    def test_short_doc_id_not_hashed(self) -> None:
        """
        Tests that a short doc ID should appear directly in the result, not as a
        hash.
        """
        doc_id = "short-id"
        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
        assert "short-id" in result

    def test_long_doc_id_is_hashed(self) -> None:
        """
        Tests that a doc ID exceeding the max length should be replaced with a
        blake2b hash.
        """
        # Create a doc ID that will exceed max length after the suffix is
        # appended.
        doc_id = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
        # The original doc ID should NOT appear in the result.
        assert doc_id not in result
        # The suffix should still be present.
        assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result

    def test_long_doc_id_hash_is_deterministic(self) -> None:
        doc_id = "x" * MAX_DOCUMENT_ID_ENCODED_LENGTH
        result1 = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, doc_id, chunk_index=5
        )
        result2 = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, doc_id, chunk_index=5
        )
        assert result1 == result2

    def test_long_doc_id_different_inputs_produce_different_hashes(self) -> None:
        doc_id_a = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
        doc_id_b = "b" * MAX_DOCUMENT_ID_ENCODED_LENGTH
        result_a = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, doc_id_a, chunk_index=0
        )
        result_b = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, doc_id_b, chunk_index=0
        )
        assert result_a != result_b

    def test_result_never_exceeds_max_length(self) -> None:
        """
        Tests that the final result should always be under
        MAX_DOCUMENT_ID_ENCODED_LENGTH bytes.
        """
        doc_id = "z" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
        result = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
        )
        assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH

    def test_no_tenant_prefix_in_single_tenant(self) -> None:
        result = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, "mydoc", chunk_index=0
        )
        assert not result.startswith(SINGLE_TENANT_STATE.tenant_id)


class TestGetOpensearchDocChunkIdMultiTenant:
    def test_includes_tenant_prefix(self) -> None:
        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, "mydoc", chunk_index=0)
        assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")

    def test_format(self) -> None:
        result = get_opensearch_doc_chunk_id(
            MULTI_TENANT_STATE, "mydoc", chunk_index=2, max_chunk_size=256
        )
        assert result == f"{EXPECTED_SHORT_TENANT}__mydoc__256__2"

    def test_long_doc_id_is_hashed_multitenant(self) -> None:
        doc_id = "d" * MAX_DOCUMENT_ID_ENCODED_LENGTH
        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
        # Should still have tenant prefix.
        assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")
        # The original doc ID should NOT appear in the result.
        assert doc_id not in result
        # The suffix should still be present.
        assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result

    def test_result_never_exceeds_max_length_multitenant(self) -> None:
        doc_id = "q" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
        result = get_opensearch_doc_chunk_id(
            MULTI_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
        )
        assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH

    def test_different_tenants_produce_different_ids(self) -> None:
        tenant_a = TenantState(
            tenant_id="tenant_aaaaaaaa-0000-0000-0000-000000000000", multitenant=True
        )
        tenant_b = TenantState(
            tenant_id="tenant_bbbbbbbb-0000-0000-0000-000000000000", multitenant=True
        )
        result_a = get_opensearch_doc_chunk_id(tenant_a, "same-doc", chunk_index=0)
        result_b = get_opensearch_doc_chunk_id(tenant_b, "same-doc", chunk_index=0)
        assert result_a != result_b


class TestGetOpensearchDocChunkIdEdgeCases:
    def test_chunk_index_zero(self) -> None:
        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "doc", chunk_index=0)
        assert result.endswith("__0")

    def test_large_chunk_index(self) -> None:
        result = get_opensearch_doc_chunk_id(
            SINGLE_TENANT_STATE, "doc", chunk_index=99999
        )
        assert result.endswith("__99999")

    def test_doc_id_with_only_special_chars_raises(self) -> None:
        """
        Tests that a doc ID that becomes empty after filtering should raise
        ValueError.
        """
        with pytest.raises(ValueError, match="empty after filtering"):
            get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "###???///", chunk_index=0)

    def test_doc_id_at_boundary_length(self) -> None:
        """
        Tests that a doc ID right at the boundary should not be hashed.
        """
        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
        suffix_len = len(suffix.encode("utf-8"))
        # Max doc ID length that won't trigger hashing (must be <
        # max_encoded_length).
        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - 1
        doc_id = "a" * max_doc_len
        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
        assert doc_id in result

    def test_doc_id_at_boundary_length_multitenant(self) -> None:
        """
        Tests that a doc ID right at the boundary should not be hashed in
        multitenant mode.
        """
        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
        suffix_len = len(suffix.encode("utf-8"))
        prefix = f"{EXPECTED_SHORT_TENANT}__"
        prefix_len = len(prefix.encode("utf-8"))
        # Max doc ID length that won't trigger hashing (must be <
        # max_encoded_length).
        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - prefix_len - 1
        doc_id = "a" * max_doc_len
        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
        assert doc_id in result

    def test_doc_id_one_over_boundary_is_hashed(self) -> None:
        """
        Tests that a doc ID one byte over the boundary should be hashed.
        """
        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
        suffix_len = len(suffix.encode("utf-8"))
        # This length will trigger the >= check in filter_and_validate_document_id
        doc_id = "a" * (MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len)
        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
        assert doc_id not in result


================================================
FILE: backend/tests/unit/onyx/document_index/opensearch/test_opensearch_batch_flush.py
================================================
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.access.models import DocumentAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.document_index.interfaces_new import IndexingMetadata
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchDocumentIndex,
)
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocMetadataAwareIndexChunk


def _make_chunk(
    doc_id: str,
    chunk_id: int,
) -> DocMetadataAwareIndexChunk:
    """Creates a minimal DocMetadataAwareIndexChunk for testing."""
    doc = Document(
        id=doc_id,
        sections=[TextSection(text="test", link="http://test.com")],
        source=DocumentSource.FILE,
        semantic_identifier="test_doc",
        metadata={},
    )
    access = DocumentAccess.build(
        user_emails=[],
        user_groups=[],
        external_user_emails=[],
        external_user_group_ids=[],
        is_public=True,
    )
    return DocMetadataAwareIndexChunk(
        chunk_id=chunk_id,
        blurb="test",
        content="test content",
        source_links={0: "http://test.com"},
        image_file_id=None,
        section_continuation=False,
        source_document=doc,
        title_prefix="",
        metadata_suffix_semantic="",
        metadata_suffix_keyword="",
        mini_chunk_texts=None,
        large_chunk_id=None,
        doc_summary="",
        chunk_context="",
        contextual_rag_reserved_tokens=0,
        embeddings=ChunkEmbedding(full_embedding=[0.1] * 10, mini_chunk_embeddings=[]),
        title_embedding=[0.1] * 10,
        tenant_id="test_tenant",
        access=access,
        document_sets=set(),
        user_project=[],
        personas=[],
        boost=0,
        aggregated_chunk_boost_factor=1.0,
        ancestor_hierarchy_node_ids=[],
    )


def _make_index() -> tuple[OpenSearchDocumentIndex, MagicMock]:
    """Creates an OpenSearchDocumentIndex with a mocked client.
    Returns the index and the mock for bulk_index_documents."""
    mock_client = MagicMock()
    mock_bulk = MagicMock()
    mock_client.bulk_index_documents = mock_bulk

    tenant_state = TenantState(tenant_id="test_tenant", multitenant=False)

    index = OpenSearchDocumentIndex.__new__(OpenSearchDocumentIndex)
    index._index_name = "test_index"
    index._client = mock_client
    index._tenant_state = tenant_state

    return index, mock_bulk


def _make_metadata(doc_id: str, chunk_count: int) -> IndexingMetadata:
    return IndexingMetadata(
        doc_id_to_chunk_cnt_diff={
            doc_id: IndexingMetadata.ChunkCounts(
                old_chunk_cnt=0,
                new_chunk_cnt=chunk_count,
            ),
        },
    )


@patch(
    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
    100,
)
def test_single_doc_under_batch_limit_flushes_once() -> None:
    """A document with fewer chunks than MAX_CHUNKS_PER_DOC_BATCH should flush once."""
    index, mock_bulk = _make_index()
    doc_id = "doc_1"
    num_chunks = 50
    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
    metadata = _make_metadata(doc_id, num_chunks)

    with patch.object(index, "delete", return_value=0):
        index.index(chunks, metadata)

    assert mock_bulk.call_count == 1
    batch_arg = mock_bulk.call_args_list[0]
    assert len(batch_arg.kwargs["documents"]) == num_chunks


@patch(
    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
    100,
)
def test_single_doc_over_batch_limit_flushes_multiple_times() -> None:
    """A document with more chunks than MAX_CHUNKS_PER_DOC_BATCH should flush multiple times."""
    index, mock_bulk = _make_index()
    doc_id = "doc_1"
    num_chunks = 250
    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
    metadata = _make_metadata(doc_id, num_chunks)

    with patch.object(index, "delete", return_value=0):
        index.index(chunks, metadata)

    # 250 chunks / 100 per batch = 3 flushes (100 + 100 + 50)
    assert mock_bulk.call_count == 3
    batch_sizes = [len(call.kwargs["documents"]) for call in mock_bulk.call_args_list]
    assert batch_sizes == [100, 100, 50]


@patch(
    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
    100,
)
def test_single_doc_exactly_at_batch_limit() -> None:
    """A document with exactly MAX_CHUNKS_PER_DOC_BATCH chunks should flush once
    (the flush happens on the next chunk, not at the boundary)."""
    index, mock_bulk = _make_index()
    doc_id = "doc_1"
    num_chunks = 100
    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
    metadata = _make_metadata(doc_id, num_chunks)

    with patch.object(index, "delete", return_value=0):
        index.index(chunks, metadata)

    # 100 chunks hit the >= check on chunk 101 which doesn't exist,
    # so final flush handles all 100
    # Actually: the elif fires when len(current_chunks) >= 100, which happens
    # when current_chunks has 100 items and the 101st chunk arrives.
    # With exactly 100 chunks, the 100th chunk makes len == 99, then appended -> 100.
    # No 101st chunk arrives, so the final flush handles all 100.
    assert mock_bulk.call_count == 1


@patch(
    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
    100,
)
def test_single_doc_one_over_batch_limit() -> None:
    """101 chunks for one doc: first 100 flushed when the 101st arrives, then
    the 101st is flushed at the end."""
    index, mock_bulk = _make_index()
    doc_id = "doc_1"
    num_chunks = 101
    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
    metadata = _make_metadata(doc_id, num_chunks)

    with patch.object(index, "delete", return_value=0):
        index.index(chunks, metadata)

    assert mock_bulk.call_count == 2
    batch_sizes = [len(call.kwargs["documents"]) for call in mock_bulk.call_args_list]
    assert batch_sizes == [100, 1]


@patch(
    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
    100,
)
def test_multiple_docs_each_under_limit_flush_per_doc() -> None:
    """Multiple documents each under the batch limit should flush once per document."""
    index, mock_bulk = _make_index()
    chunks = []
    for doc_idx in range(3):
        doc_id = f"doc_{doc_idx}"
        for chunk_idx in range(50):
            chunks.append(_make_chunk(doc_id, chunk_idx))

    metadata = IndexingMetadata(
        doc_id_to_chunk_cnt_diff={
            f"doc_{i}": IndexingMetadata.ChunkCounts(old_chunk_cnt=0, new_chunk_cnt=50)
            for i in range(3)
        },
    )

    with patch.object(index, "delete", return_value=0):
        index.index(chunks, metadata)

    # 3 documents = 3 flushes (one per doc boundary + final)
    assert mock_bulk.call_count == 3


@patch(
    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
    100,
)
def test_delete_called_once_per_document() -> None:
    """Even with multiple flushes for a single document, delete should only be
    called once per document."""
    index, _mock_bulk = _make_index()
    doc_id = "doc_1"
    num_chunks = 250
    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
    metadata = _make_metadata(doc_id, num_chunks)

    with patch.object(index, "delete", return_value=0) as mock_delete:
        index.index(chunks, metadata)

    mock_delete.assert_called_once_with(doc_id, None)


================================================
FILE: backend/tests/unit/onyx/document_index/test_disabled_document_index.py
================================================
"""Tests for DisabledDocumentIndex — verifies all methods raise RuntimeError.

This is the safety net for the DISABLE_VECTOR_DB feature. Every method on
DisabledDocumentIndex must raise RuntimeError with the standard error message
so that any accidental vector-DB call is caught immediately.
"""

import re

import pytest

from onyx.context.search.models import IndexFilters
from onyx.context.search.models import QueryExpansionType
from onyx.db.enums import EmbeddingPrecision
from onyx.document_index.disabled import DisabledDocumentIndex
from onyx.document_index.disabled import VECTOR_DB_DISABLED_ERROR

ESCAPED_ERROR = re.escape(VECTOR_DB_DISABLED_ERROR)


@pytest.fixture
def disabled_index() -> DisabledDocumentIndex:
    return DisabledDocumentIndex(
        index_name="test_index",
        secondary_index_name="test_secondary",
    )


def _stub_filters() -> IndexFilters:
    return IndexFilters(access_control_list=None)


# ------------------------------------------------------------------
# Verifiable
# ------------------------------------------------------------------


def test_ensure_indices_exist_no_raises(
    disabled_index: DisabledDocumentIndex,
) -> None:
    disabled_index.ensure_indices_exist(
        primary_embedding_dim=768,
        primary_embedding_precision=EmbeddingPrecision.FLOAT,
        secondary_index_embedding_dim=None,
        secondary_index_embedding_precision=None,
    )


def test_register_multitenant_indices_raises() -> None:
    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):
        DisabledDocumentIndex.register_multitenant_indices(
            indices=["idx"],
            embedding_dims=[768],
            embedding_precisions=[EmbeddingPrecision.FLOAT],
        )


# ------------------------------------------------------------------
# Indexable
# ------------------------------------------------------------------


def test_index_raises(disabled_index: DisabledDocumentIndex) -> None:
    from dataclasses import dataclass, field

    # We only need a stub — the method raises before inspecting arguments.
    @dataclass
    class _StubBatchParams:
        doc_id_to_previous_chunk_cnt: dict[str, int] = field(default_factory=dict)
        doc_id_to_new_chunk_cnt: dict[str, int] = field(default_factory=dict)
        tenant_id: str = "test"
        large_chunks_enabled: bool = False

    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):
        disabled_index.index(
            chunks=[],
            index_batch_params=_StubBatchParams(),  # type: ignore
        )


# ------------------------------------------------------------------
# Deletable
# ------------------------------------------------------------------


def test_delete_single_raises(disabled_index: DisabledDocumentIndex) -> None:
    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):
        disabled_index.delete_single(
            doc_id="doc-1",
            tenant_id="test",
            chunk_count=None,
        )


# ------------------------------------------------------------------
# Updatable
# ------------------------------------------------------------------


def test_update_single_raises(disabled_index: DisabledDocumentIndex) -> None:
    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):
        disabled_index.update_single(
            doc_id="doc-1",
            tenant_id="test",
            chunk_count=None,
            fields=None,
            user_fields=None,
        )


# ------------------------------------------------------------------
# IdRetrievalCapable
# ------------------------------------------------------------------


def test_id_based_retrieval_raises(
    disabled_index: DisabledDocumentIndex,
) -> None:
    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):
        disabled_index.id_based_retrieval(
            chunk_requests=[],
            filters=_stub_filters(),
        )


# ------------------------------------------------------------------
# HybridCapable
# ------------------------------------------------------------------


def test_hybrid_retrieval_raises(
    disabled_index: DisabledDocumentIndex,
) -> None:
    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):
        disabled_index.hybrid_retrieval(
            query="test",
            query_embedding=[0.0] * 768,
            final_keywords=None,
            filters=_stub_filters(),
            hybrid_alpha=0.5,
            time_decay_multiplier=1.0,
            num_to_retrieve=10,
            ranking_profile_type=QueryExpansionType.KEYWORD,
        )


# ------------------------------------------------------------------
# AdminCapable
# ------------------------------------------------------------------


def test_admin_retrieval_raises(
    disabled_index: DisabledDocumentIndex,
) -> None:
    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):
        disabled_index.admin_retrieval(
            query="test",
            query_embedding=[0.0] * 768,
            filters=_stub_filters(),
        )


# ------------------------------------------------------------------
# RandomCapable
# ------------------------------------------------------------------


def test_random_retrieval_raises(
    disabled_index: DisabledDocumentIndex,
) -> None:
    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):
        disabled_index.random_retrieval(
            filters=_stub_filters(),
        )


# ------------------------------------------------------------------
# Introspection — index_name and secondary_index_name should still work
# ------------------------------------------------------------------


def test_index_names_accessible(disabled_index: DisabledDocumentIndex) -> None:
    assert disabled_index.index_name == "test_index"
    assert disabled_index.secondary_index_name == "test_secondary"


def test_default_names() -> None:
    index = DisabledDocumentIndex()
    assert index.index_name == "disabled"
    assert index.secondary_index_name is None


================================================
FILE: backend/tests/unit/onyx/document_index/vespa/shared_utils/test_utils.py
================================================
from onyx.utils.text_processing import remove_invalid_unicode_chars


def test_remove_invalid_unicode_chars() -> None:
    """Test that invalid Unicode characters are properly removed."""
    # Test removal of illegal XML character 0xFDDB
    text_with_illegal_char = "Valid text \ufddb more text"
    sanitized = remove_invalid_unicode_chars(text_with_illegal_char)
    assert "\ufddb" not in sanitized
    assert sanitized == "Valid text  more text"

    # Test that valid characters are preserved
    valid_text = "Hello, world! 你好世界"
    assert remove_invalid_unicode_chars(valid_text) == valid_text

    # Test multiple invalid characters including 0xFDDB
    text_with_multiple_illegal = "\x00Hello\ufddb World\ufffe!"
    sanitized = remove_invalid_unicode_chars(text_with_multiple_illegal)
    assert all(c not in sanitized for c in ["\x00", "\ufddb", "\ufffe"])
    assert sanitized == "Hello World!"


def test_remove_surrogate_characters() -> None:
    """Test removal of unpaired UTF-16 surrogates that cause 'surrogates not allowed' errors.

    This is the specific error seen when indexing Drive documents with Cohere:
    'utf-8' codec can't encode character '\\udc00' in position X: surrogates not allowed
    """
    # Test low surrogate (the exact error case from Drive indexing with Cohere)
    text_with_low_surrogate = "Text before \udc00 text after"
    sanitized = remove_invalid_unicode_chars(text_with_low_surrogate)
    assert "\udc00" not in sanitized
    assert sanitized == "Text before  text after"

    # Test high surrogate
    text_with_high_surrogate = "Start \ud800 end"
    sanitized = remove_invalid_unicode_chars(text_with_high_surrogate)
    assert "\ud800" not in sanitized
    assert sanitized == "Start  end"

    # Test that the sanitized text can be encoded to UTF-8 without error
    problematic_text = "Document content \udc00 with \ud800 surrogates \udfff here"
    sanitized = remove_invalid_unicode_chars(problematic_text)
    # This should not raise an exception
    sanitized.encode("utf-8")
    assert sanitized == "Document content  with  surrogates  here"


================================================
FILE: backend/tests/unit/onyx/document_index/vespa/test_vespa_batch_flush.py
================================================
"""Unit tests for VespaDocumentIndex.index().

These tests mock all external I/O (HTTP calls, thread pools) and verify
the streaming logic, ID cleaning/mapping, and DocumentInsertionRecord
construction.
"""

from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.access.models import DocumentAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
from onyx.document_index.interfaces_new import IndexingMetadata
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk


def _make_chunk(
    doc_id: str,
    chunk_id: int = 0,
    content: str = "test content",
) -> DocMetadataAwareIndexChunk:
    doc = Document(
        id=doc_id,
        semantic_identifier="test_doc",
        sections=[TextSection(text=content, link=None)],
        source=DocumentSource.NOT_APPLICABLE,
        metadata={},
    )
    index_chunk = IndexChunk(
        chunk_id=chunk_id,
        blurb=content[:50],
        content=content,
        source_links=None,
        image_file_id=None,
        section_continuation=False,
        source_document=doc,
        title_prefix="",
        metadata_suffix_semantic="",
        metadata_suffix_keyword="",
        contextual_rag_reserved_tokens=0,
        doc_summary="",
        chunk_context="",
        mini_chunk_texts=None,
        large_chunk_id=None,
        embeddings=ChunkEmbedding(
            full_embedding=[0.1] * 10,
            mini_chunk_embeddings=[],
        ),
        title_embedding=None,
    )
    access = DocumentAccess.build(
        user_emails=[],
        user_groups=[],
        external_user_emails=[],
        external_user_group_ids=[],
        is_public=True,
    )
    return DocMetadataAwareIndexChunk.from_index_chunk(
        index_chunk=index_chunk,
        access=access,
        document_sets=set(),
        user_project=[],
        personas=[],
        boost=0,
        aggregated_chunk_boost_factor=1.0,
        tenant_id="test_tenant",
    )


def _make_indexing_metadata(
    doc_ids: list[str],
    old_counts: list[int],
    new_counts: list[int],
) -> IndexingMetadata:
    return IndexingMetadata(
        doc_id_to_chunk_cnt_diff={
            doc_id: IndexingMetadata.ChunkCounts(
                old_chunk_cnt=old,
                new_chunk_cnt=new,
            )
            for doc_id, old, new in zip(doc_ids, old_counts, new_counts)
        }
    )


def _stub_enrich(
    doc_id: str,
    old_chunk_cnt: int,
) -> EnrichedDocumentIndexingInfo:
    """Build an EnrichedDocumentIndexingInfo that says 'no chunks to delete'
    when old_chunk_cnt == 0, or 'has existing chunks' otherwise."""
    return EnrichedDocumentIndexingInfo(
        doc_id=doc_id,
        chunk_start_index=0,
        old_version=False,
        chunk_end_index=old_chunk_cnt,
    )


@patch("onyx.document_index.vespa.vespa_document_index.batch_index_vespa_chunks")
@patch("onyx.document_index.vespa.vespa_document_index.delete_vespa_chunks")
@patch(
    "onyx.document_index.vespa.vespa_document_index.get_document_chunk_ids",
    return_value=[],
)
@patch("onyx.document_index.vespa.vespa_document_index._enrich_basic_chunk_info")
@patch(
    "onyx.document_index.vespa.vespa_document_index.BATCH_SIZE",
    3,
)
def test_index_respects_batch_size(
    mock_enrich: MagicMock,
    mock_get_chunk_ids: MagicMock,  # noqa: ARG001
    mock_delete: MagicMock,  # noqa: ARG001
    mock_batch_index: MagicMock,
) -> None:
    """When chunks exceed BATCH_SIZE, batch_index_vespa_chunks is called
    multiple times with correctly sized batches."""
    mock_enrich.return_value = _stub_enrich("doc1", old_chunk_cnt=0)

    index = VespaDocumentIndex(
        index_name="test_index",
        tenant_state=TenantState(tenant_id="test_tenant", multitenant=False),
        large_chunks_enabled=False,
        httpx_client=MagicMock(),
    )

    chunks = [_make_chunk("doc1", chunk_id=i) for i in range(7)]
    metadata = _make_indexing_metadata(["doc1"], old_counts=[0], new_counts=[7])

    results = index.index(chunks=chunks, indexing_metadata=metadata)

    assert len(results) == 1

    # With BATCH_SIZE=3 and 7 chunks: batches of 3, 3, 1
    assert mock_batch_index.call_count == 3
    batch_sizes = [len(c.kwargs["chunks"]) for c in mock_batch_index.call_args_list]
    assert batch_sizes == [3, 3, 1]

    # Verify all chunks are accounted for and in order
    all_indexed = [
        chunk for c in mock_batch_index.call_args_list for chunk in c.kwargs["chunks"]
    ]
    assert len(all_indexed) == 7
    assert [c.chunk_id for c in all_indexed] == list(range(7))


================================================
FILE: backend/tests/unit/onyx/error_handling/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/error_handling/test_exceptions.py
================================================
"""Tests for OnyxError and the global exception handler."""

import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient

from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.error_handling.exceptions import register_onyx_exception_handlers


class TestOnyxError:
    """Unit tests for OnyxError construction and properties."""

    def test_basic_construction(self) -> None:
        err = OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
        assert err.error_code is OnyxErrorCode.NOT_FOUND
        assert err.detail == "Session not found"
        assert err.status_code == 404

    def test_message_defaults_to_code(self) -> None:
        err = OnyxError(OnyxErrorCode.UNAUTHENTICATED)
        assert err.detail == "UNAUTHENTICATED"
        assert str(err) == "UNAUTHENTICATED"

    def test_status_code_override(self) -> None:
        err = OnyxError(
            OnyxErrorCode.BAD_GATEWAY,
            "upstream failed",
            status_code_override=503,
        )
        assert err.status_code == 503
        # error_code still reports its own default
        assert err.error_code.status_code == 502

    def test_no_override_uses_error_code_status(self) -> None:
        err = OnyxError(OnyxErrorCode.RATE_LIMITED, "slow down")
        assert err.status_code == 429

    def test_is_exception(self) -> None:
        err = OnyxError(OnyxErrorCode.INTERNAL_ERROR)
        assert isinstance(err, Exception)


class TestExceptionHandler:
    """Integration test: OnyxError → JSON response via FastAPI TestClient."""

    @pytest.fixture()
    def client(self) -> TestClient:
        app = FastAPI()
        register_onyx_exception_handlers(app)

        @app.get("/boom")
        def _boom() -> None:
            raise OnyxError(OnyxErrorCode.NOT_FOUND, "Thing not found")

        @app.get("/boom-override")
        def _boom_override() -> None:
            raise OnyxError(
                OnyxErrorCode.BAD_GATEWAY,
                "upstream 503",
                status_code_override=503,
            )

        @app.get("/boom-default-msg")
        def _boom_default() -> None:
            raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)

        return TestClient(app, raise_server_exceptions=False)

    def test_returns_correct_status_and_body(self, client: TestClient) -> None:
        resp = client.get("/boom")
        assert resp.status_code == 404
        body = resp.json()
        assert body["error_code"] == "NOT_FOUND"
        assert body["detail"] == "Thing not found"

    def test_status_code_override_in_response(self, client: TestClient) -> None:
        resp = client.get("/boom-override")
        assert resp.status_code == 503
        body = resp.json()
        assert body["error_code"] == "BAD_GATEWAY"
        assert body["detail"] == "upstream 503"

    def test_default_message(self, client: TestClient) -> None:
        resp = client.get("/boom-default-msg")
        assert resp.status_code == 401
        body = resp.json()
        assert body["error_code"] == "UNAUTHENTICATED"
        assert body["detail"] == "UNAUTHENTICATED"


================================================
FILE: backend/tests/unit/onyx/federated_connectors/test_federated_connector_factory.py
================================================
"""
Unit tests for federated connector lazy loading factory to validate:
1. All federated connector mappings are correct
2. Module paths and class names are valid
3. Error handling works properly
4. Caching functions correctly
"""

import importlib
from unittest.mock import MagicMock
from unittest.mock import Mock
from unittest.mock import patch

import pytest

from onyx.configs.constants import FederatedConnectorSource
from onyx.federated_connectors.factory import _federated_connector_cache
from onyx.federated_connectors.factory import _load_federated_connector_class
from onyx.federated_connectors.factory import FederatedConnectorMissingException
from onyx.federated_connectors.factory import get_federated_connector_cls
from onyx.federated_connectors.interfaces import FederatedConnector
from onyx.federated_connectors.registry import FEDERATED_CONNECTOR_CLASS_MAP
from onyx.federated_connectors.registry import FederatedConnectorMapping


class TestFederatedConnectorMappingValidation:
    """Test that all federated connector mappings are valid."""

    def test_all_federated_connector_mappings_exist(self) -> None:
        """Test that all mapped modules and classes actually exist."""
        errors = []

        for source, mapping in FEDERATED_CONNECTOR_CLASS_MAP.items():
            try:
                # Try to import the module
                module = importlib.import_module(mapping.module_path)

                # Try to get the class
                connector_class = getattr(module, mapping.class_name)

                # Verify it's a subclass of FederatedConnector
                if not issubclass(connector_class, FederatedConnector):
                    errors.append(
                        f"{source.value}: {mapping.class_name} is not a FederatedConnector subclass"
                    )

            except ImportError as e:
                errors.append(
                    f"{source.value}: Failed to import {mapping.module_path} - {e}"
                )
            except AttributeError as e:
                errors.append(
                    f"{source.value}: Class {mapping.class_name} not found in {mapping.module_path} - {e}"
                )

        if errors:
            pytest.fail(
                "Federated connector mapping validation failed:\n" + "\n".join(errors)
            )

    def test_no_duplicate_mappings(self) -> None:
        """Test that each FederatedConnectorSource only appears once in the mapping."""
        sources = list(FEDERATED_CONNECTOR_CLASS_MAP.keys())
        unique_sources = set(sources)

        assert len(sources) == len(
            unique_sources
        ), "Duplicate FederatedConnectorSource entries found"

    def test_mapping_format_consistency(self) -> None:
        """Test that all mappings follow the expected format."""
        for source, mapping in FEDERATED_CONNECTOR_CLASS_MAP.items():
            assert isinstance(
                mapping, FederatedConnectorMapping
            ), f"{source.value} mapping is not a FederatedConnectorMapping"

            assert isinstance(
                mapping.module_path, str
            ), f"{source.value} module_path is not a string"
            assert isinstance(
                mapping.class_name, str
            ), f"{source.value} class_name is not a string"
            assert mapping.module_path.startswith(
                "onyx.federated_connectors."
            ), f"{source.value} module_path doesn't start with onyx.federated_connectors."
            assert mapping.class_name.endswith(
                "FederatedConnector"
            ), f"{source.value} class_name doesn't end with FederatedConnector"


class TestFederatedConnectorClassLoading:
    """Test the lazy loading mechanism."""

    def setup_method(self) -> None:
        """Clear cache before each test."""
        _federated_connector_cache.clear()

    def test_load_federated_connector_class_success(self) -> None:
        """Test successful federated connector class loading."""
        connector_class = _load_federated_connector_class(
            FederatedConnectorSource.FEDERATED_SLACK
        )

        assert connector_class is not None
        assert issubclass(connector_class, FederatedConnector)
        assert connector_class.__name__ == "SlackFederatedConnector"

    def test_load_federated_connector_class_caching(self) -> None:
        """Test that federated connector classes are cached after first load."""
        assert len(_federated_connector_cache) == 0

        # Load connector first time
        connector_class1 = _load_federated_connector_class(
            FederatedConnectorSource.FEDERATED_SLACK
        )
        assert len(_federated_connector_cache) == 1
        assert FederatedConnectorSource.FEDERATED_SLACK in _federated_connector_cache

        # Load same connector second time - should use cache
        connector_class2 = _load_federated_connector_class(
            FederatedConnectorSource.FEDERATED_SLACK
        )
        assert connector_class1 is connector_class2  # Same object reference
        assert len(_federated_connector_cache) == 1  # Cache size unchanged

    @patch("importlib.import_module")
    def test_load_federated_connector_class_import_error(
        self, mock_import: Mock
    ) -> None:
        """Test handling of import errors."""
        mock_import.side_effect = ImportError("Module not found")

        with pytest.raises(FederatedConnectorMissingException) as exc_info:
            _load_federated_connector_class(FederatedConnectorSource.FEDERATED_SLACK)

        assert (
            "Failed to import SlackFederatedConnector from onyx.federated_connectors.slack.federated_connector"
            in str(exc_info.value)
        )

    @patch("importlib.import_module")
    def test_load_federated_connector_class_attribute_error(
        self, mock_import: Mock
    ) -> None:
        """Test handling of missing class in module."""

        # Create a custom mock that raises AttributeError for the specific class
        class MockModule:
            def __getattr__(self, name: str) -> MagicMock:
                if name == "SlackFederatedConnector":
                    raise AttributeError("Class not found")
                return MagicMock()

        mock_import.return_value = MockModule()

        with pytest.raises(FederatedConnectorMissingException) as exc_info:
            _load_federated_connector_class(FederatedConnectorSource.FEDERATED_SLACK)

        assert (
            "Failed to import SlackFederatedConnector from onyx.federated_connectors.slack.federated_connector"
            in str(exc_info.value)
        )


class TestGetFederatedConnectorCls:
    """Test the get_federated_connector_cls function."""

    def setup_method(self) -> None:
        """Clear cache before each test."""
        _federated_connector_cache.clear()

    def test_get_federated_connector_cls_basic(self) -> None:
        """Test basic federated connector class retrieval."""
        connector_class = get_federated_connector_cls(
            FederatedConnectorSource.FEDERATED_SLACK
        )

        assert connector_class is not None
        assert issubclass(connector_class, FederatedConnector)
        assert connector_class.__name__ == "SlackFederatedConnector"


class TestFederatedConnectorMappingIntegrity:
    """Test integrity of the federated connector mapping data."""

    def test_all_federated_connector_sources_mapped(self) -> None:
        """Test that all FederatedConnectorSource values have mappings."""
        # Get all FederatedConnectorSource enum values
        all_sources = set(FederatedConnectorSource)
        mapped_sources = set(FEDERATED_CONNECTOR_CLASS_MAP.keys())

        unmapped_sources = all_sources - mapped_sources

        if unmapped_sources:
            pytest.fail(
                f"FederatedConnectorSource values without connector mappings: {[s.value for s in unmapped_sources]}"
            )


================================================
FILE: backend/tests/unit/onyx/federated_connectors/test_oauth_utils.py
================================================
"""Unit tests for federated OAuth state generation and verification.

Uses unittest.mock to patch get_cache_backend so no external services
are needed.  Verifies the generate -> verify round-trip, one-time-use
semantics, TTL propagation, and error handling.
"""

from unittest.mock import patch

import pytest

from onyx.cache.interface import CacheBackend
from onyx.cache.interface import CacheLock
from onyx.federated_connectors.oauth_utils import generate_oauth_state
from onyx.federated_connectors.oauth_utils import OAUTH_STATE_TTL
from onyx.federated_connectors.oauth_utils import OAuthSession
from onyx.federated_connectors.oauth_utils import verify_oauth_state


class _MemoryCacheBackend(CacheBackend):
    """Minimal in-memory CacheBackend for unit tests."""

    def __init__(self) -> None:
        self._store: dict[str, bytes] = {}
        self.set_calls: list[dict[str, object]] = []

    def get(self, key: str) -> bytes | None:
        return self._store.get(key)

    def set(
        self,
        key: str,
        value: str | bytes | int | float,
        ex: int | None = None,
    ) -> None:
        self.set_calls.append({"key": key, "ex": ex})
        if isinstance(value, bytes):
            self._store[key] = value
        else:
            self._store[key] = str(value).encode()

    def delete(self, key: str) -> None:
        self._store.pop(key, None)

    def exists(self, key: str) -> bool:
        return key in self._store

    def expire(self, key: str, seconds: int) -> None:
        pass

    def ttl(self, key: str) -> int:
        return -2 if key not in self._store else -1

    def lock(self, name: str, timeout: float | None = None) -> CacheLock:
        raise NotImplementedError

    def rpush(self, key: str, value: str | bytes) -> None:
        raise NotImplementedError

    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:
        raise NotImplementedError


def _patched(cache: _MemoryCacheBackend):  # type: ignore[no-untyped-def]
    return patch(
        "onyx.federated_connectors.oauth_utils.get_cache_backend",
        return_value=cache,
    )


class TestGenerateAndVerifyRoundTrip:
    def test_round_trip_basic(self) -> None:
        cache = _MemoryCacheBackend()
        with _patched(cache):
            state = generate_oauth_state(
                federated_connector_id=42,
                user_id="user-abc",
            )
            session = verify_oauth_state(state)

        assert session.federated_connector_id == 42
        assert session.user_id == "user-abc"
        assert session.redirect_uri is None
        assert session.additional_data == {}

    def test_round_trip_with_all_fields(self) -> None:
        cache = _MemoryCacheBackend()
        with _patched(cache):
            state = generate_oauth_state(
                federated_connector_id=7,
                user_id="user-xyz",
                redirect_uri="https://example.com/callback",
                additional_data={"scope": "read"},
            )
            session = verify_oauth_state(state)

        assert session.federated_connector_id == 7
        assert session.user_id == "user-xyz"
        assert session.redirect_uri == "https://example.com/callback"
        assert session.additional_data == {"scope": "read"}


class TestOneTimeUse:
    def test_verify_deletes_state(self) -> None:
        cache = _MemoryCacheBackend()
        with _patched(cache):
            state = generate_oauth_state(federated_connector_id=1, user_id="u")
            verify_oauth_state(state)

            with pytest.raises(ValueError, match="OAuth state not found"):
                verify_oauth_state(state)


class TestTTLPropagation:
    def test_default_ttl(self) -> None:
        cache = _MemoryCacheBackend()
        with _patched(cache):
            generate_oauth_state(federated_connector_id=1, user_id="u")

        assert len(cache.set_calls) == 1
        assert cache.set_calls[0]["ex"] == OAUTH_STATE_TTL

    def test_custom_ttl(self) -> None:
        cache = _MemoryCacheBackend()
        with _patched(cache):
            generate_oauth_state(federated_connector_id=1, user_id="u", ttl=600)

        assert cache.set_calls[0]["ex"] == 600


class TestVerifyInvalidState:
    def test_missing_state_raises(self) -> None:
        cache = _MemoryCacheBackend()
        with _patched(cache):
            state = generate_oauth_state(federated_connector_id=1, user_id="u")
            # Manually clear the cache to simulate expiration
            cache._store.clear()

            with pytest.raises(ValueError, match="OAuth state not found"):
                verify_oauth_state(state)


class TestOAuthSessionSerialization:
    def test_to_dict_from_dict_round_trip(self) -> None:
        session = OAuthSession(
            federated_connector_id=5,
            user_id="u-123",
            redirect_uri="https://redir.example.com",
            additional_data={"key": "val"},
        )
        d = session.to_dict()
        restored = OAuthSession.from_dict(d)

        assert restored.federated_connector_id == 5
        assert restored.user_id == "u-123"
        assert restored.redirect_uri == "https://redir.example.com"
        assert restored.additional_data == {"key": "val"}

    def test_from_dict_defaults(self) -> None:
        minimal = {"federated_connector_id": 1, "user_id": "u"}
        session = OAuthSession.from_dict(minimal)
        assert session.redirect_uri is None
        assert session.additional_data == {}


================================================
FILE: backend/tests/unit/onyx/file_processing/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/file_processing/test_image_summarization_errors.py
================================================
"""
Unit tests for image summarization error handling.

Verifies that:
1. LLM errors produce actionable error messages (not base64 dumps)
2. Unsupported MIME type logs include the magic bytes and size
3. The ValueError raised on LLM failure preserves the original exception
"""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.file_processing.image_summarization import _summarize_image
from onyx.file_processing.image_summarization import summarize_image_with_error_handling


class TestSummarizeImageErrorMessage:
    """_summarize_image must not dump base64 image data into error messages."""

    def test_error_message_contains_exception_type_not_base64(self) -> None:
        """The ValueError should contain the original exception info, not message payloads."""
        mock_llm = MagicMock()
        mock_llm.invoke.side_effect = RuntimeError("Connection timeout")

        # A fake base64-encoded image string (should NOT appear in the error)
        fake_encoded = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg..."

        with pytest.raises(ValueError, match="RuntimeError: Connection timeout"):
            _summarize_image(fake_encoded, mock_llm, query="test")

    def test_error_message_does_not_contain_base64(self) -> None:
        """Ensure base64 data is never included in the error message."""
        mock_llm = MagicMock()
        mock_llm.invoke.side_effect = RuntimeError("API error")

        fake_encoded = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA"

        with pytest.raises(ValueError) as exc_info:
            _summarize_image(fake_encoded, mock_llm)

        error_str = str(exc_info.value)
        assert "base64" not in error_str
        assert "iVBOR" not in error_str

    def test_original_exception_is_chained(self) -> None:
        """The ValueError should chain the original exception via __cause__."""
        mock_llm = MagicMock()
        original = RuntimeError("upstream failure")
        mock_llm.invoke.side_effect = original

        with pytest.raises(ValueError) as exc_info:
            _summarize_image("data:image/png;base64,abc", mock_llm)

        assert exc_info.value.__cause__ is original


class TestUnsupportedMimeTypeLogging:
    """summarize_image_with_error_handling should log useful info for unsupported formats."""

    @patch(
        "onyx.file_processing.image_summarization.summarize_image_pipeline",
        side_effect=__import__(
            "onyx.file_processing.image_summarization",
            fromlist=["UnsupportedImageFormatError"],
        ).UnsupportedImageFormatError("unsupported"),
    )
    def test_logs_magic_bytes_and_size(
        self, mock_pipeline: MagicMock  # noqa: ARG002
    ) -> None:
        """The info log should include magic bytes hex and image size."""
        mock_llm = MagicMock()
        # TIFF magic bytes (not in the supported list)
        image_data = b"\x49\x49\x2a\x00" + b"\x00" * 100

        with patch("onyx.file_processing.image_summarization.logger") as mock_logger:
            result = summarize_image_with_error_handling(
                llm=mock_llm,
                image_data=image_data,
                context_name="test_image.tiff",
            )

        assert result is None
        mock_logger.info.assert_called_once()
        log_args = mock_logger.info.call_args
        # Check the format string args contain magic bytes and size
        assert "49492a00" in str(log_args)
        assert "104" in str(log_args)  # 4 + 100 bytes


================================================
FILE: backend/tests/unit/onyx/file_processing/test_image_summarization_litellm_errors.py
================================================
"""
Unit tests verifying that LiteLLM error details are extracted and surfaced
in image summarization error messages.

When the LLM call fails, the error handler should include the status_code,
llm_provider, and model from LiteLLM exceptions so operators can diagnose
the root cause (rate limit, content filter, unsupported vision, etc.)
without needing to dig through LiteLLM internals.
"""

from unittest.mock import MagicMock

import pytest

from onyx.file_processing.image_summarization import _summarize_image


def _make_litellm_style_error(
    *,
    message: str = "API error",
    status_code: int | None = None,
    llm_provider: str | None = None,
    model: str | None = None,
) -> RuntimeError:
    """Create an exception with LiteLLM-style attributes."""
    exc = RuntimeError(message)
    if status_code is not None:
        exc.status_code = status_code  # type: ignore[attr-defined]
    if llm_provider is not None:
        exc.llm_provider = llm_provider  # type: ignore[attr-defined]
    if model is not None:
        exc.model = model  # type: ignore[attr-defined]
    return exc


class TestLiteLLMErrorExtraction:
    """Verify that LiteLLM error attributes are included in the ValueError."""

    def test_status_code_included(self) -> None:
        mock_llm = MagicMock()
        mock_llm.invoke.side_effect = _make_litellm_style_error(
            message="Content filter triggered",
            status_code=400,
            llm_provider="azure",
            model="gpt-4o",
        )

        with pytest.raises(ValueError, match="status_code=400"):
            _summarize_image("data:image/png;base64,abc", mock_llm)

    def test_llm_provider_included(self) -> None:
        mock_llm = MagicMock()
        mock_llm.invoke.side_effect = _make_litellm_style_error(
            message="Bad request",
            status_code=400,
            llm_provider="azure",
        )

        with pytest.raises(ValueError, match="llm_provider=azure"):
            _summarize_image("data:image/png;base64,abc", mock_llm)

    def test_model_included(self) -> None:
        mock_llm = MagicMock()
        mock_llm.invoke.side_effect = _make_litellm_style_error(
            message="Bad request",
            model="gpt-4o",
        )

        with pytest.raises(ValueError, match="model=gpt-4o"):
            _summarize_image("data:image/png;base64,abc", mock_llm)

    def test_all_fields_in_single_message(self) -> None:
        mock_llm = MagicMock()
        mock_llm.invoke.side_effect = _make_litellm_style_error(
            message="Rate limit exceeded",
            status_code=429,
            llm_provider="azure",
            model="gpt-4o",
        )

        with pytest.raises(ValueError) as exc_info:
            _summarize_image("data:image/png;base64,abc", mock_llm)

        msg = str(exc_info.value)
        assert "status_code=429" in msg
        assert "llm_provider=azure" in msg
        assert "model=gpt-4o" in msg
        assert "Rate limit exceeded" in msg

    def test_plain_exception_without_litellm_attrs(self) -> None:
        """Non-LiteLLM exceptions should still produce a useful message."""
        mock_llm = MagicMock()
        mock_llm.invoke.side_effect = ConnectionError("Connection refused")

        with pytest.raises(ValueError) as exc_info:
            _summarize_image("data:image/png;base64,abc", mock_llm)

        msg = str(exc_info.value)
        assert "ConnectionError" in msg
        assert "Connection refused" in msg
        # Should not contain status_code/llm_provider/model
        assert "status_code" not in msg
        assert "llm_provider" not in msg

    def test_no_base64_in_error(self) -> None:
        """Error messages must not contain the full base64 image payload.

        Some LiteLLM exceptions echo the request body (including base64 images)
        in their message.  The truncation guard ensures the bulk of such a
        payload is stripped from the re-raised ValueError.
        """
        mock_llm = MagicMock()
        # Build a long base64-like payload that exceeds the 512-char truncation
        fake_b64_payload = "iVBORw0KGgo" * 100  # ~1100 chars
        fake_b64 = f"data:image/png;base64,{fake_b64_payload}"

        mock_llm.invoke.side_effect = RuntimeError(
            f"Request failed for payload: {fake_b64}"
        )

        with pytest.raises(ValueError) as exc_info:
            _summarize_image(fake_b64, mock_llm)

        msg = str(exc_info.value)
        # The full payload must not appear (truncation should have kicked in)
        assert fake_b64_payload not in msg
        assert "truncated" in msg

    def test_long_error_message_truncated(self) -> None:
        """Exception messages longer than 512 chars are truncated."""
        mock_llm = MagicMock()
        long_msg = "x" * 1000
        mock_llm.invoke.side_effect = RuntimeError(long_msg)

        with pytest.raises(ValueError) as exc_info:
            _summarize_image("data:image/png;base64,abc", mock_llm)

        msg = str(exc_info.value)
        assert "truncated" in msg
        # The full 1000-char string should not appear
        assert long_msg not in msg


================================================
FILE: backend/tests/unit/onyx/file_processing/test_pdf.py
================================================
"""Unit tests for pypdf-dependent PDF processing functions.

Tests cover:
- read_pdf_file: text extraction, metadata, encrypted PDFs, image extraction
- pdf_to_text: convenience wrapper
- is_pdf_protected: password protection detection

Fixture PDFs live in ./fixtures/ and are pre-built so the test layer has no
dependency on pypdf internals (pypdf.generic).
"""

from io import BytesIO
from pathlib import Path

from onyx.file_processing.extract_file_text import pdf_to_text
from onyx.file_processing.extract_file_text import read_pdf_file
from onyx.file_processing.password_validation import is_pdf_protected

FIXTURES = Path(__file__).parent / "fixtures"


def _load(name: str) -> BytesIO:
    return BytesIO((FIXTURES / name).read_bytes())


# ── read_pdf_file ────────────────────────────────────────────────────────


class TestReadPdfFile:
    def test_basic_text_extraction(self) -> None:
        text, _, images = read_pdf_file(_load("simple.pdf"))
        assert "Hello World" in text
        assert images == []

    def test_multi_page_text_extraction(self) -> None:
        text, _, _ = read_pdf_file(_load("multipage.pdf"))
        assert "Page one content" in text
        assert "Page two content" in text

    def test_metadata_extraction(self) -> None:
        _, pdf_metadata, _ = read_pdf_file(_load("with_metadata.pdf"))
        assert pdf_metadata.get("Title") == "My Title"
        assert pdf_metadata.get("Author") == "Jane Doe"

    def test_encrypted_pdf_with_correct_password(self) -> None:
        text, _, _ = read_pdf_file(_load("encrypted.pdf"), pdf_pass="pass123")
        assert "Secret Content" in text

    def test_encrypted_pdf_without_password(self) -> None:
        text, _, _ = read_pdf_file(_load("encrypted.pdf"))
        assert text == ""

    def test_encrypted_pdf_with_wrong_password(self) -> None:
        text, _, _ = read_pdf_file(_load("encrypted.pdf"), pdf_pass="wrong")
        assert text == ""

    def test_empty_pdf(self) -> None:
        text, _, _ = read_pdf_file(_load("empty.pdf"))
        assert text.strip() == ""

    def test_invalid_pdf_returns_empty(self) -> None:
        text, _, images = read_pdf_file(BytesIO(b"this is not a pdf"))
        assert text == ""
        assert images == []

    def test_image_extraction_disabled_by_default(self) -> None:
        _, _, images = read_pdf_file(_load("with_image.pdf"))
        assert images == []

    def test_image_extraction_collects_images(self) -> None:
        _, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
        assert len(images) == 1
        img_bytes, img_name = images[0]
        assert len(img_bytes) > 0
        assert img_name  # non-empty name

    def test_image_callback_streams_instead_of_collecting(self) -> None:
        """With image_callback, images are streamed via callback and not accumulated."""
        collected: list[tuple[bytes, str]] = []

        def callback(data: bytes, name: str) -> None:
            collected.append((data, name))

        _, _, images = read_pdf_file(
            _load("with_image.pdf"), extract_images=True, image_callback=callback
        )
        # Callback received the image
        assert len(collected) == 1
        assert len(collected[0][0]) > 0
        # Returned list is empty when callback is used
        assert images == []


# ── pdf_to_text ──────────────────────────────────────────────────────────


class TestPdfToText:
    def test_returns_text(self) -> None:
        assert "Hello World" in pdf_to_text(_load("simple.pdf"))

    def test_with_password(self) -> None:
        assert "Secret Content" in pdf_to_text(
            _load("encrypted.pdf"), pdf_pass="pass123"
        )

    def test_encrypted_without_password_returns_empty(self) -> None:
        assert pdf_to_text(_load("encrypted.pdf")) == ""


# ── is_pdf_protected ─────────────────────────────────────────────────────


class TestIsPdfProtected:
    def test_unprotected_pdf(self) -> None:
        assert is_pdf_protected(_load("simple.pdf")) is False

    def test_protected_pdf(self) -> None:
        assert is_pdf_protected(_load("encrypted.pdf")) is True

    def test_preserves_file_position(self) -> None:
        pdf = _load("simple.pdf")
        pdf.seek(42)
        is_pdf_protected(pdf)
        assert pdf.tell() == 42


================================================
FILE: backend/tests/unit/onyx/file_processing/test_xlsx_to_text.py
================================================
import io
from typing import cast

import openpyxl
from openpyxl.worksheet.worksheet import Worksheet

from onyx.file_processing.extract_file_text import xlsx_to_text


def _make_xlsx(sheets: dict[str, list[list[str]]]) -> io.BytesIO:
    """Create an in-memory xlsx file from a dict of sheet_name -> matrix of strings."""
    wb = openpyxl.Workbook()
    if wb.active is not None:
        wb.remove(cast(Worksheet, wb.active))
    for sheet_name, rows in sheets.items():
        ws = wb.create_sheet(title=sheet_name)
        for row in rows:
            ws.append(row)
    buf = io.BytesIO()
    wb.save(buf)
    buf.seek(0)
    return buf


class TestXlsxToText:
    def test_single_sheet_basic(self) -> None:
        xlsx = _make_xlsx(
            {
                "Sheet1": [
                    ["Name", "Age"],
                    ["Alice", "30"],
                    ["Bob", "25"],
                ]
            }
        )
        result = xlsx_to_text(xlsx)
        lines = [line for line in result.strip().split("\n") if line.strip()]
        assert len(lines) == 3
        assert "Name" in lines[0]
        assert "Age" in lines[0]
        assert "Alice" in lines[1]
        assert "30" in lines[1]
        assert "Bob" in lines[2]

    def test_multiple_sheets_separated(self) -> None:
        xlsx = _make_xlsx(
            {
                "Sheet1": [["a", "b"]],
                "Sheet2": [["c", "d"]],
            }
        )
        result = xlsx_to_text(xlsx)
        # TEXT_SECTION_SEPARATOR is "\n\n"
        assert "\n\n" in result
        parts = result.split("\n\n")
        assert any("a" in p for p in parts)
        assert any("c" in p for p in parts)

    def test_empty_cells(self) -> None:
        xlsx = _make_xlsx(
            {
                "Sheet1": [
                    ["a", "", "b"],
                    ["", "c", ""],
                ]
            }
        )
        result = xlsx_to_text(xlsx)
        lines = [line for line in result.strip().split("\n") if line.strip()]
        assert len(lines) == 2

    def test_commas_in_cells_are_quoted(self) -> None:
        """Cells containing commas should be quoted in CSV output."""
        xlsx = _make_xlsx(
            {
                "Sheet1": [
                    ["hello, world", "normal"],
                ]
            }
        )
        result = xlsx_to_text(xlsx)
        assert '"hello, world"' in result

    def test_empty_workbook(self) -> None:
        xlsx = _make_xlsx({"Sheet1": []})
        result = xlsx_to_text(xlsx)
        assert result.strip() == ""

    def test_long_empty_row_run_capped(self) -> None:
        """Runs of >2 empty rows should be capped to 2."""
        xlsx = _make_xlsx(
            {
                "Sheet1": [
                    ["header"],
                    [""],
                    [""],
                    [""],
                    [""],
                    ["data"],
                ]
            }
        )
        result = xlsx_to_text(xlsx)
        lines = [line for line in result.strip().split("\n") if line.strip()]
        # 4 empty rows capped to 2, so: header + 2 empty + data = 4 lines
        assert len(lines) == 4
        assert "header" in lines[0]
        assert "data" in lines[-1]

    def test_long_empty_col_run_capped(self) -> None:
        """Runs of >2 empty columns should be capped to 2."""
        xlsx = _make_xlsx(
            {
                "Sheet1": [
                    ["a", "", "", "", "b"],
                    ["c", "", "", "", "d"],
                ]
            }
        )
        result = xlsx_to_text(xlsx)
        lines = [line for line in result.strip().split("\n") if line.strip()]
        assert len(lines) == 2
        # Each row should have 4 fields (a + 2 empty + b), not 5
        # csv format: a,,,b (3 commas = 4 fields)
        first_line = lines[0].strip()
        # Count commas to verify column reduction
        assert first_line.count(",") == 3

    def test_short_empty_runs_kept(self) -> None:
        """Runs of <=2 empty rows/cols should be preserved."""
        xlsx = _make_xlsx(
            {
                "Sheet1": [
                    ["a", "b"],
                    ["", ""],
                    ["", ""],
                    ["c", "d"],
                ]
            }
        )
        result = xlsx_to_text(xlsx)
        lines = [line for line in result.strip().split("\n") if line.strip()]
        # All 4 rows preserved (2 empty rows <= threshold)
        assert len(lines) == 4

    def test_bad_zip_file_returns_empty(self) -> None:
        bad_file = io.BytesIO(b"not a zip file")
        result = xlsx_to_text(bad_file, file_name="test.xlsx")
        assert result == ""

    def test_bad_zip_tilde_file_returns_empty(self) -> None:
        bad_file = io.BytesIO(b"not a zip file")
        result = xlsx_to_text(bad_file, file_name="~$temp.xlsx")
        assert result == ""

    def test_large_sparse_sheet(self) -> None:
        """A sheet with data, a big empty gap, and more data — gap is capped to 2."""
        rows: list[list[str]] = [["row1_data"]]
        rows.extend([[""] for _ in range(10)])
        rows.append(["row2_data"])
        xlsx = _make_xlsx({"Sheet1": rows})
        result = xlsx_to_text(xlsx)
        lines = [line for line in result.strip().split("\n") if line.strip()]
        # 10 empty rows capped to 2: row1_data + 2 empty + row2_data = 4
        assert len(lines) == 4
        assert "row1_data" in lines[0]
        assert "row2_data" in lines[-1]

    def test_quotes_in_cells(self) -> None:
        """Cells containing quotes should be properly escaped."""
        xlsx = _make_xlsx(
            {
                "Sheet1": [
                    ['say "hello"', "normal"],
                ]
            }
        )
        result = xlsx_to_text(xlsx)
        # csv.writer escapes quotes by doubling them
        assert '""hello""' in result

    def test_each_row_is_separate_line(self) -> None:
        """Each row should produce its own line (regression for writerow vs writerows)."""
        xlsx = _make_xlsx(
            {
                "Sheet1": [
                    ["r1c1", "r1c2"],
                    ["r2c1", "r2c2"],
                    ["r3c1", "r3c2"],
                ]
            }
        )
        result = xlsx_to_text(xlsx)
        lines = [line for line in result.strip().split("\n") if line.strip()]
        assert len(lines) == 3
        assert "r1c1" in lines[0] and "r1c2" in lines[0]
        assert "r2c1" in lines[1] and "r2c2" in lines[1]
        assert "r3c1" in lines[2] and "r3c2" in lines[2]


================================================
FILE: backend/tests/unit/onyx/hooks/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/hooks/test_api_dependencies.py
================================================
"""Unit tests for the hooks feature gate."""

from unittest.mock import patch

import pytest

from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.api_dependencies import require_hook_enabled


class TestRequireHookEnabled:
    def test_raises_when_multi_tenant(self) -> None:
        with patch("onyx.hooks.api_dependencies.MULTI_TENANT", True):
            with pytest.raises(OnyxError) as exc_info:
                require_hook_enabled()
        assert exc_info.value.error_code is OnyxErrorCode.SINGLE_TENANT_ONLY
        assert exc_info.value.status_code == 403
        assert "multi-tenant" in exc_info.value.detail

    def test_passes_when_single_tenant(self) -> None:
        with patch("onyx.hooks.api_dependencies.MULTI_TENANT", False):
            require_hook_enabled()  # must not raise


================================================
FILE: backend/tests/unit/onyx/hooks/test_base_spec.py
================================================
import pytest
from pydantic import BaseModel

from onyx.db.enums import HookPoint
from onyx.hooks.points.base import HookPointSpec


def test_init_subclass_raises_for_missing_attrs() -> None:
    with pytest.raises(TypeError, match="must define class attributes"):

        class IncompleteSpec(HookPointSpec):
            hook_point = HookPoint.QUERY_PROCESSING
            # missing display_name, description, payload_model, response_model, etc.

            class _Payload(BaseModel):
                pass

            payload_model = _Payload
            response_model = _Payload


================================================
FILE: backend/tests/unit/onyx/hooks/test_models.py
================================================
import pytest
from pydantic import ValidationError

from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.hooks.models import HookCreateRequest
from onyx.hooks.models import HookUpdateRequest


def test_hook_update_request_rejects_empty() -> None:
    # No fields supplied at all
    with pytest.raises(ValidationError, match="At least one field must be provided"):
        HookUpdateRequest()


def test_hook_update_request_rejects_null_name_when_only_field() -> None:
    # Explicitly setting name=None is rejected as name cannot be cleared
    with pytest.raises(ValidationError, match="name cannot be cleared"):
        HookUpdateRequest(name=None)


def test_hook_update_request_accepts_single_field() -> None:
    req = HookUpdateRequest(name="new name")
    assert req.name == "new name"


def test_hook_update_request_accepts_partial_fields() -> None:
    req = HookUpdateRequest(fail_strategy=HookFailStrategy.SOFT, timeout_seconds=10.0)
    assert req.fail_strategy == HookFailStrategy.SOFT
    assert req.timeout_seconds == 10.0
    assert req.name is None


def test_hook_update_request_rejects_null_name() -> None:
    with pytest.raises(ValidationError, match="name cannot be cleared"):
        HookUpdateRequest(name=None, fail_strategy=HookFailStrategy.SOFT)


def test_hook_update_request_rejects_empty_name() -> None:
    with pytest.raises(ValidationError, match="name cannot be cleared"):
        HookUpdateRequest(name="", fail_strategy=HookFailStrategy.SOFT)


def test_hook_update_request_rejects_null_endpoint_url() -> None:
    with pytest.raises(ValidationError, match="endpoint_url cannot be cleared"):
        HookUpdateRequest(endpoint_url=None, fail_strategy=HookFailStrategy.SOFT)


def test_hook_update_request_rejects_empty_endpoint_url() -> None:
    with pytest.raises(ValidationError, match="endpoint_url cannot be cleared"):
        HookUpdateRequest(endpoint_url="", fail_strategy=HookFailStrategy.SOFT)


def test_hook_update_request_allows_null_api_key() -> None:
    # api_key=null is valid — means "clear the api key"
    req = HookUpdateRequest(api_key=None)
    assert req.api_key is None
    assert "api_key" in req.model_fields_set


def test_hook_update_request_rejects_whitespace_name() -> None:
    with pytest.raises(ValidationError, match="name cannot be cleared"):
        HookUpdateRequest(name="   ", fail_strategy=HookFailStrategy.SOFT)


def test_hook_update_request_rejects_whitespace_endpoint_url() -> None:
    with pytest.raises(ValidationError, match="endpoint_url cannot be cleared"):
        HookUpdateRequest(endpoint_url="   ", fail_strategy=HookFailStrategy.SOFT)


def test_hook_create_request_rejects_whitespace_name() -> None:
    with pytest.raises(ValidationError, match="whitespace-only"):
        HookCreateRequest(
            name="   ",
            hook_point=HookPoint.QUERY_PROCESSING,
            endpoint_url="https://example.com/hook",
        )


def test_hook_create_request_rejects_whitespace_endpoint_url() -> None:
    with pytest.raises(ValidationError, match="whitespace-only"):
        HookCreateRequest(
            name="my hook",
            hook_point=HookPoint.QUERY_PROCESSING,
            endpoint_url="   ",
        )


================================================
FILE: backend/tests/unit/onyx/hooks/test_query_processing_spec.py
================================================
from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.hooks.points.query_processing import QueryProcessingSpec


def test_hook_point_is_query_processing() -> None:
    assert QueryProcessingSpec().hook_point == HookPoint.QUERY_PROCESSING


def test_default_fail_strategy_is_hard() -> None:
    assert QueryProcessingSpec().default_fail_strategy == HookFailStrategy.HARD


def test_default_timeout_seconds() -> None:
    # User is actively waiting — 5s is the documented contract for this hook point
    assert QueryProcessingSpec().default_timeout_seconds == 5.0


def test_input_schema_required_fields() -> None:
    schema = QueryProcessingSpec().input_schema
    assert schema["type"] == "object"
    required = schema["required"]
    assert "query" in required
    assert "user_email" in required
    assert "chat_session_id" in required


def test_input_schema_chat_session_id_is_string() -> None:
    props = QueryProcessingSpec().input_schema["properties"]
    assert props["chat_session_id"]["type"] == "string"


def test_input_schema_query_is_string() -> None:
    props = QueryProcessingSpec().input_schema["properties"]
    assert props["query"]["type"] == "string"


def test_input_schema_user_email_is_nullable() -> None:
    props = QueryProcessingSpec().input_schema["properties"]
    # Pydantic v2 emits anyOf for nullable fields
    assert any(s.get("type") == "null" for s in props["user_email"]["anyOf"])


def test_output_schema_query_is_optional() -> None:
    # query defaults to None (absent = reject); not required in the schema
    schema = QueryProcessingSpec().output_schema
    assert "query" not in schema.get("required", [])


def test_output_schema_query_is_nullable() -> None:
    # null means "reject the query"; Pydantic v2 emits anyOf for nullable fields
    props = QueryProcessingSpec().output_schema["properties"]
    assert any(s.get("type") == "null" for s in props["query"]["anyOf"])


def test_output_schema_rejection_message_is_optional() -> None:
    schema = QueryProcessingSpec().output_schema
    assert "rejection_message" not in schema.get("required", [])


def test_input_schema_no_additional_properties() -> None:
    assert QueryProcessingSpec().input_schema.get("additionalProperties") is False


================================================
FILE: backend/tests/unit/onyx/hooks/test_registry.py
================================================
import pytest

from onyx.db.enums import HookPoint
from onyx.hooks import registry as registry_module
from onyx.hooks.registry import get_all_specs
from onyx.hooks.registry import get_hook_point_spec
from onyx.hooks.registry import validate_registry


def test_registry_covers_all_hook_points() -> None:
    """Every HookPoint enum member must have a registered spec."""
    assert {s.hook_point for s in get_all_specs()} == set(
        HookPoint
    ), f"Missing specs for: {set(HookPoint) - {s.hook_point for s in get_all_specs()}}"


def test_get_hook_point_spec_returns_correct_spec() -> None:
    for hook_point in HookPoint:
        spec = get_hook_point_spec(hook_point)
        assert spec.hook_point == hook_point


def test_get_all_specs_returns_all() -> None:
    specs = get_all_specs()
    assert len(specs) == len(HookPoint)
    assert {s.hook_point for s in specs} == set(HookPoint)


def test_get_hook_point_spec_raises_for_unregistered(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """get_hook_point_spec raises ValueError when a hook point has no spec."""
    monkeypatch.setattr(registry_module, "_REGISTRY", {})
    with pytest.raises(ValueError, match="No spec registered for hook point"):
        get_hook_point_spec(HookPoint.QUERY_PROCESSING)


def test_validate_registry_passes() -> None:
    validate_registry()  # should not raise with the real registry


def test_validate_registry_raises_for_incomplete(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(registry_module, "_REGISTRY", {})
    with pytest.raises(RuntimeError, match="Hook point\\(s\\) have no registered spec"):
        validate_registry()


================================================
FILE: backend/tests/unit/onyx/image_gen/test_provider_building.py
================================================
import json
from unittest.mock import patch

import pytest

from onyx.image_gen.exceptions import ImageProviderCredentialsError
from onyx.image_gen.factory import get_image_generation_provider
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.image_gen.interfaces import ReferenceImage
from onyx.image_gen.providers.azure_img_gen import AzureImageGenerationProvider
from onyx.image_gen.providers.openai_img_gen import OpenAIImageGenerationProvider
from onyx.image_gen.providers.vertex_img_gen import VertexImageGenerationProvider

OPENAI_PROVIDER = "openai"
AZURE_PROVIDER = "azure"
VERTEX_PROVIDER = "vertex_ai"


def _get_default_image_gen_creds() -> ImageGenerationProviderCredentials:
    return ImageGenerationProviderCredentials(
        api_key=None,
        api_base=None,
        api_version=None,
        deployment_name=None,
        custom_config=None,
    )


def test_request_provider_that_no_exist() -> None:
    provider = "nonexistent"
    credentials = _get_default_image_gen_creds()

    with pytest.raises(ValueError):
        get_image_generation_provider(provider, credentials)


def test_build_openai_provider_from_api_key_and_base() -> None:
    credentials = _get_default_image_gen_creds()

    credentials.api_key = "test"
    credentials.api_base = "test"

    provider = OPENAI_PROVIDER

    image_gen_provider = get_image_generation_provider(provider, credentials)

    assert isinstance(image_gen_provider, OpenAIImageGenerationProvider)
    assert image_gen_provider._api_key == "test"
    assert image_gen_provider._api_base == "test"
    assert image_gen_provider.supports_reference_images is True
    assert image_gen_provider.max_reference_images == 16


def test_build_openai_provider_fails_no_api_key() -> None:
    credentials = _get_default_image_gen_creds()

    credentials.api_base = "test"

    provider = OPENAI_PROVIDER

    with pytest.raises(ImageProviderCredentialsError):
        get_image_generation_provider(provider, credentials)


def test_build_azure_provider_from_api_key_and_base_and_version() -> None:
    credentials = _get_default_image_gen_creds()

    credentials.api_key = "test"
    credentials.api_base = "test"
    credentials.api_version = "test"

    provider = AZURE_PROVIDER

    image_gen_provider = get_image_generation_provider(provider, credentials)

    assert isinstance(image_gen_provider, AzureImageGenerationProvider)
    assert image_gen_provider._api_key == "test"
    assert image_gen_provider._api_base == "test"
    assert image_gen_provider._api_version == "test"
    assert image_gen_provider.supports_reference_images is True
    assert image_gen_provider.max_reference_images == 16


def test_build_azure_provider_fails_missing_credential() -> None:
    azure_required = [
        "api_key",
        "api_base",
        "api_version",
    ]

    default_creds = _get_default_image_gen_creds()
    default_creds.api_key = "test"
    default_creds.api_base = "test"
    default_creds.api_version = "test"

    for attribute in azure_required:
        credentials = default_creds.model_copy()
        setattr(credentials, attribute, None)

        with pytest.raises(ImageProviderCredentialsError):
            get_image_generation_provider(AZURE_PROVIDER, credentials)


def test_build_vertex_provider_from_credentials() -> None:
    credentials = _get_default_image_gen_creds()

    vertex_credentials = {
        "project_id": "demo_project_1",
        "private_key_id": "test",
    }

    vertex_json = json.dumps(vertex_credentials)
    credentials.custom_config = {
        "vertex_credentials": vertex_json,
        "vertex_location": "global",
    }
    provider = VERTEX_PROVIDER

    image_gen_provider = get_image_generation_provider(provider, credentials)

    assert isinstance(image_gen_provider, VertexImageGenerationProvider)
    assert image_gen_provider._vertex_credentials == vertex_json
    assert image_gen_provider._vertex_location == "global"
    assert image_gen_provider._vertex_project == "demo_project_1"


def test_build_vertex_provider_with_missing_project_id() -> None:
    credentials = _get_default_image_gen_creds()

    vertex_credentials = {
        "private_key_id": "test",
    }

    vertex_json = json.dumps(vertex_credentials)
    credentials.custom_config = {
        "vertex_credentials": vertex_json,
        "vertex_location": "global",
    }

    with pytest.raises(ImageProviderCredentialsError):
        get_image_generation_provider("vertex_ai", credentials)


def test_openai_provider_uses_image_generation_without_reference_images() -> None:
    provider = OpenAIImageGenerationProvider(
        api_key="test-key",
        api_base="test-base",
    )
    expected_response = object()

    with (
        patch("litellm.image_generation", return_value=expected_response) as mock_gen,
        patch("litellm.image_edit") as mock_edit,
    ):
        response = provider.generate_image(
            prompt="draw a mountain",
            model="gpt-image-1",
            size="1024x1024",
            n=1,
            quality="high",
        )

    assert response is expected_response
    mock_gen.assert_called_once()
    mock_edit.assert_not_called()


def test_openai_provider_uses_image_edit_with_reference_images() -> None:
    provider = OpenAIImageGenerationProvider(
        api_key="test-key",
        api_base="test-base",
    )
    reference_images = [
        ReferenceImage(data=b"image-1-bytes", mime_type="image/png"),
        ReferenceImage(data=b"image-2-bytes", mime_type="image/jpeg"),
    ]
    expected_response = object()

    with (
        patch("litellm.image_generation") as mock_gen,
        patch("litellm.image_edit", return_value=expected_response) as mock_edit,
    ):
        response = provider.generate_image(
            prompt="make this look watercolor",
            model="gpt-image-1",
            size="1024x1024",
            n=1,
            quality="high",
            reference_images=reference_images,
        )

    assert response is expected_response
    mock_gen.assert_not_called()
    mock_edit.assert_called_once()
    assert mock_edit.call_args.kwargs["image"] == [
        b"image-1-bytes",
        b"image-2-bytes",
    ]


def test_openai_provider_rejects_reference_images_for_unsupported_model() -> None:
    provider = OpenAIImageGenerationProvider(api_key="test-key")

    with pytest.raises(ValueError):
        provider.generate_image(
            prompt="edit this image",
            model="dall-e-3",
            size="1024x1024",
            n=1,
            reference_images=[ReferenceImage(data=b"image-1", mime_type="image/png")],
        )


def test_openai_provider_rejects_multiple_reference_images_for_dalle3() -> None:
    provider = OpenAIImageGenerationProvider(api_key="test-key")

    with pytest.raises(
        ValueError,
        match="does not support image edits with reference images",
    ):
        provider.generate_image(
            prompt="edit this image",
            model="dall-e-3",
            size="1024x1024",
            n=1,
            reference_images=[
                ReferenceImage(data=b"image-1", mime_type="image/png"),
                ReferenceImage(data=b"image-2", mime_type="image/png"),
            ],
        )


def test_azure_provider_uses_image_generation_without_reference_images() -> None:
    provider = AzureImageGenerationProvider(
        api_key="test-key",
        api_base="https://azure.example.com",
        api_version="2024-05-01-preview",
        deployment_name="img-deployment",
    )
    expected_response = object()

    with (
        patch("litellm.image_generation", return_value=expected_response) as mock_gen,
        patch("litellm.image_edit") as mock_edit,
    ):
        response = provider.generate_image(
            prompt="draw a skyline",
            model="gpt-image-1",
            size="1024x1024",
            n=1,
            quality="high",
        )

    assert response is expected_response
    mock_gen.assert_called_once()
    mock_edit.assert_not_called()
    assert mock_gen.call_args.kwargs["model"] == "azure/img-deployment"


def test_azure_provider_uses_image_edit_with_reference_images() -> None:
    provider = AzureImageGenerationProvider(
        api_key="test-key",
        api_base="https://azure.example.com",
        api_version="2024-05-01-preview",
        deployment_name="img-deployment",
    )
    reference_images = [
        ReferenceImage(data=b"image-1-bytes", mime_type="image/png"),
        ReferenceImage(data=b"image-2-bytes", mime_type="image/jpeg"),
    ]
    expected_response = object()

    with (
        patch("litellm.image_generation") as mock_gen,
        patch("litellm.image_edit", return_value=expected_response) as mock_edit,
    ):
        response = provider.generate_image(
            prompt="make this noir style",
            model="gpt-image-1",
            size="1024x1024",
            n=1,
            quality="high",
            reference_images=reference_images,
        )

    assert response is expected_response
    mock_gen.assert_not_called()
    mock_edit.assert_called_once()
    assert mock_edit.call_args.kwargs["model"] == "azure/img-deployment"
    assert mock_edit.call_args.kwargs["image"] == [
        b"image-1-bytes",
        b"image-2-bytes",
    ]


def test_azure_provider_rejects_reference_images_for_unsupported_model() -> None:
    provider = AzureImageGenerationProvider(
        api_key="test-key",
        api_base="https://azure.example.com",
        api_version="2024-05-01-preview",
    )

    with pytest.raises(ValueError):
        provider.generate_image(
            prompt="edit this image",
            model="dall-e-3",
            size="1024x1024",
            n=1,
            reference_images=[ReferenceImage(data=b"image-1", mime_type="image/png")],
        )


def test_azure_provider_rejects_multiple_reference_images_for_dalle3() -> None:
    provider = AzureImageGenerationProvider(
        api_key="test-key",
        api_base="https://azure.example.com",
        api_version="2024-05-01-preview",
    )

    with pytest.raises(
        ValueError,
        match="does not support image edits with reference images",
    ):
        provider.generate_image(
            prompt="edit this image",
            model="dall-e-3",
            size="1024x1024",
            n=1,
            reference_images=[
                ReferenceImage(data=b"image-1", mime_type="image/png"),
                ReferenceImage(data=b"image-2", mime_type="image/png"),
            ],
        )


================================================
FILE: backend/tests/unit/onyx/indexing/conftest.py
================================================
import pytest

from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface


class MockHeartbeat(IndexingHeartbeatInterface):
    def __init__(self) -> None:
        self.call_count = 0

    def should_stop(self) -> bool:
        return False

    def progress(self, tag: str, amount: int) -> None:  # noqa: ARG002
        self.call_count += 1


@pytest.fixture
def mock_heartbeat() -> MockHeartbeat:
    return MockHeartbeat()


@pytest.fixture
def embedder() -> DefaultIndexingEmbedder:
    return DefaultIndexingEmbedder(
        model_name="intfloat/e5-base-v2",
        normalize=True,
        query_prefix=None,
        passage_prefix=None,
    )


================================================
FILE: backend/tests/unit/onyx/indexing/test_censoring.py
================================================
import os
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.configs.constants import DocumentSource
from onyx.context.search.models import InferenceChunk
from onyx.db.models import User
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop

_post_query_chunk_censoring = fetch_ee_implementation_or_noop(
    "onyx.external_permissions.post_query_censoring", "_post_query_chunk_censoring"
)


@pytest.mark.skipif(
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
    reason="Permissions tests are enterprise only",
)
class TestPostQueryChunkCensoring:
    @pytest.fixture(autouse=True)
    def setUp(self) -> None:
        self.mock_user = User(id=1, email="test@example.com")
        self.mock_chunk_1 = InferenceChunk(
            document_id="doc1",
            chunk_id=1,
            content="chunk1 content",
            source_type=DocumentSource.SALESFORCE,
            semantic_identifier="doc1_1",
            title="doc1",
            boost=1,
            score=0.9,
            hidden=False,
            metadata={},
            match_highlights=[],
            doc_summary="doc1 summary",
            chunk_context="doc1 context",
            updated_at=None,
            image_file_id=None,
            source_links={},
            section_continuation=False,
            blurb="chunk1",
        )
        self.mock_chunk_2 = InferenceChunk(
            document_id="doc2",
            chunk_id=2,
            content="chunk2 content",
            source_type=DocumentSource.SLACK,
            semantic_identifier="doc2_2",
            title="doc2",
            boost=1,
            score=0.8,
            hidden=False,
            metadata={},
            match_highlights=[],
            doc_summary="doc2 summary",
            chunk_context="doc2 context",
            updated_at=None,
            image_file_id=None,
            source_links={},
            section_continuation=False,
            blurb="chunk2",
        )
        self.mock_chunk_3 = InferenceChunk(
            document_id="doc3",
            chunk_id=3,
            content="chunk3 content",
            source_type=DocumentSource.SALESFORCE,
            semantic_identifier="doc3_3",
            title="doc3",
            boost=1,
            score=0.7,
            hidden=False,
            metadata={},
            match_highlights=[],
            doc_summary="doc3 summary",
            chunk_context="doc3 context",
            updated_at=None,
            image_file_id=None,
            source_links={},
            section_continuation=False,
            blurb="chunk3",
        )
        self.mock_chunk_4 = InferenceChunk(
            document_id="doc4",
            chunk_id=4,
            content="chunk4 content",
            source_type=DocumentSource.SALESFORCE,
            semantic_identifier="doc4_4",
            title="doc4",
            boost=1,
            score=0.6,
            hidden=False,
            metadata={},
            match_highlights=[],
            doc_summary="doc4 summary",
            chunk_context="doc4 context",
            updated_at=None,
            image_file_id=None,
            source_links={},
            section_continuation=False,
            blurb="chunk4",
        )

    @patch(
        "ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources"
    )
    def test_post_query_chunk_censoring_no_user(
        self, mock_get_sources: MagicMock
    ) -> None:
        mock_get_sources.return_value = {DocumentSource.SALESFORCE}
        chunks = [self.mock_chunk_1, self.mock_chunk_2]
        result = _post_query_chunk_censoring(chunks, None)
        assert result == chunks

    @patch(
        "ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources"
    )
    @patch(
        "ee.onyx.external_permissions.post_query_censoring.DOC_SOURCE_TO_CHUNK_CENSORING_FUNCTION"
    )
    def test_post_query_chunk_censoring_salesforce_censored(
        self, mock_censor_func: MagicMock, mock_get_sources: MagicMock
    ) -> None:
        mock_get_sources.return_value = {DocumentSource.SALESFORCE}
        mock_censor_func_impl = MagicMock(
            return_value=[self.mock_chunk_1]
        )  # Only return chunk 1
        mock_censor_func.__getitem__.return_value = mock_censor_func_impl

        chunks = [self.mock_chunk_1, self.mock_chunk_2, self.mock_chunk_3]
        result = _post_query_chunk_censoring(chunks, self.mock_user)
        assert len(result) == 2
        assert self.mock_chunk_1 in result
        assert self.mock_chunk_2 in result
        assert self.mock_chunk_3 not in result
        mock_censor_func_impl.assert_called_once()

    @patch(
        "ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources"
    )
    @patch(
        "ee.onyx.external_permissions.post_query_censoring.DOC_SOURCE_TO_CHUNK_CENSORING_FUNCTION"
    )
    def test_post_query_chunk_censoring_salesforce_error(
        self, mock_censor_func: MagicMock, mock_get_sources: MagicMock
    ) -> None:
        mock_get_sources.return_value = {DocumentSource.SALESFORCE}
        mock_censor_func_impl = MagicMock(side_effect=Exception("Censoring error"))
        mock_censor_func.__getitem__.return_value = mock_censor_func_impl

        chunks = [self.mock_chunk_1, self.mock_chunk_2, self.mock_chunk_3]
        result = _post_query_chunk_censoring(chunks, self.mock_user)
        assert len(result) == 1
        assert self.mock_chunk_2 in result
        mock_censor_func_impl.assert_called_once()

    @patch(
        "ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources"
    )
    @patch(
        "ee.onyx.external_permissions.post_query_censoring.DOC_SOURCE_TO_CHUNK_CENSORING_FUNCTION"
    )
    def test_post_query_chunk_censoring_no_censoring(
        self, mock_censor_func: MagicMock, mock_get_sources: MagicMock
    ) -> None:
        mock_get_sources.return_value = set()  # No sources to censor
        mock_censor_func_impl = MagicMock()
        mock_censor_func.__getitem__.return_value = mock_censor_func_impl

        chunks = [self.mock_chunk_1, self.mock_chunk_2, self.mock_chunk_3]
        result = _post_query_chunk_censoring(chunks, self.mock_user)
        assert result == chunks
        mock_censor_func_impl.assert_not_called()

    @patch(
        "ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources"
    )
    @patch(
        "ee.onyx.external_permissions.post_query_censoring.DOC_SOURCE_TO_CHUNK_CENSORING_FUNCTION"
    )
    def test_post_query_chunk_censoring_order_maintained(
        self, mock_censor_func: MagicMock, mock_get_sources: MagicMock
    ) -> None:
        mock_get_sources.return_value = {DocumentSource.SALESFORCE}
        mock_censor_func_impl = MagicMock(
            return_value=[self.mock_chunk_3, self.mock_chunk_1]
        )  # Return chunk 3 and 1
        mock_censor_func.__getitem__.return_value = mock_censor_func_impl

        chunks = [
            self.mock_chunk_1,
            self.mock_chunk_2,
            self.mock_chunk_3,
            self.mock_chunk_4,
        ]
        result = _post_query_chunk_censoring(chunks, self.mock_user)
        assert len(result) == 3
        assert result[0] == self.mock_chunk_1
        assert result[1] == self.mock_chunk_2
        assert result[2] == self.mock_chunk_3
        assert self.mock_chunk_4 not in result
        mock_censor_func_impl.assert_called_once()


================================================
FILE: backend/tests/unit/onyx/indexing/test_chunker.py
================================================
from typing import Any
from unittest.mock import Mock

import pytest

from onyx.configs.app_configs import USE_CHUNK_SUMMARY
from onyx.configs.app_configs import USE_DOCUMENT_SUMMARY
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.indexing.chunker import Chunker
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.indexing_pipeline import process_image_sections
from onyx.llm.utils import MAX_CONTEXT_TOKENS
from tests.unit.onyx.indexing.conftest import MockHeartbeat


@pytest.mark.parametrize("enable_contextual_rag", [True, False])
def test_chunk_document(
    embedder: DefaultIndexingEmbedder, enable_contextual_rag: bool
) -> None:
    short_section_1 = "This is a short section."
    long_section = (
        "This is a long section that should be split into multiple chunks. " * 100
    )
    short_section_2 = "This is another short section."
    short_section_3 = "This is another short section again."
    short_section_4 = "Final short section."
    semantic_identifier = "Test Document"

    document = Document(
        id="test_doc",
        source=DocumentSource.WEB,
        semantic_identifier=semantic_identifier,
        metadata={"tags": ["tag1", "tag2"]},
        doc_updated_at=None,
        sections=[
            TextSection(text=short_section_1, link="link1"),
            TextSection(text=short_section_2, link="link2"),
            TextSection(text=long_section, link="link3"),
            TextSection(text=short_section_3, link="link4"),
            TextSection(text=short_section_4, link="link5"),
        ],
    )
    indexing_documents = process_image_sections([document])

    mock_llm_invoke_count = 0

    def mock_llm_invoke(
        self: Any, *args: Any, **kwargs: Any  # noqa: ARG001
    ) -> Mock:  # noqa: ARG001
        nonlocal mock_llm_invoke_count
        mock_llm_invoke_count += 1
        m = Mock()
        m.content = f"Test{mock_llm_invoke_count}"
        return m

    mock_llm = Mock()
    mock_llm.invoke = mock_llm_invoke

    chunker = Chunker(
        tokenizer=embedder.embedding_model.tokenizer,
        enable_multipass=False,
        enable_contextual_rag=enable_contextual_rag,
    )
    chunks = chunker.chunk(indexing_documents)

    assert len(chunks) == 5
    assert short_section_1 in chunks[0].content
    assert short_section_3 in chunks[-1].content
    assert short_section_4 in chunks[-1].content
    assert "tag1" in chunks[0].metadata_suffix_keyword
    assert "tag2" in chunks[0].metadata_suffix_semantic

    rag_tokens = MAX_CONTEXT_TOKENS * (
        int(USE_DOCUMENT_SUMMARY) + int(USE_CHUNK_SUMMARY)
    )
    for chunk in chunks:
        assert chunk.contextual_rag_reserved_tokens == (
            rag_tokens if enable_contextual_rag else 0
        )


def test_chunker_heartbeat(
    embedder: DefaultIndexingEmbedder, mock_heartbeat: MockHeartbeat
) -> None:
    document = Document(
        id="test_doc",
        source=DocumentSource.WEB,
        semantic_identifier="Test Document",
        metadata={"tags": ["tag1", "tag2"]},
        doc_updated_at=None,
        sections=[
            TextSection(text="This is a short section.", link="link1"),
        ],
    )
    indexing_documents = process_image_sections([document])

    chunker = Chunker(
        tokenizer=embedder.embedding_model.tokenizer,
        enable_multipass=False,
        callback=mock_heartbeat,
        enable_contextual_rag=False,
    )

    chunks = chunker.chunk(indexing_documents)

    assert mock_heartbeat.call_count == 1
    assert len(chunks) > 0


================================================
FILE: backend/tests/unit/onyx/indexing/test_embed_chunks_in_batches.py
================================================
"""Unit tests for _embed_chunks_to_store.

Tests cover:
  - Single batch, no failures
  - Multiple batches, no failures
  - Failure in a single batch
  - Cross-batch document failure scrubbing
  - Later batches skip already-failed docs
  - Empty input
  - All chunks fail
"""

from collections.abc import Callable
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import DocumentSource
from onyx.connectors.models import TextSection
from onyx.indexing.chunk_batch_store import ChunkBatchStore
from onyx.indexing.indexing_pipeline import _embed_chunks_to_store
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import IndexChunk


def _make_doc(doc_id: str) -> Document:
    return Document(
        id=doc_id,
        semantic_identifier="test",
        source=DocumentSource.FILE,
        sections=[TextSection(text="test", link=None)],
        metadata={},
    )


def _make_chunk(doc_id: str, chunk_id: int) -> DocAwareChunk:
    return DocAwareChunk(
        chunk_id=chunk_id,
        blurb="test",
        content="test content",
        source_links=None,
        image_file_id=None,
        section_continuation=False,
        source_document=_make_doc(doc_id),
        title_prefix="",
        metadata_suffix_semantic="",
        metadata_suffix_keyword="",
        mini_chunk_texts=None,
        large_chunk_id=None,
        doc_summary="",
        chunk_context="",
        contextual_rag_reserved_tokens=0,
    )


def _make_index_chunk(doc_id: str, chunk_id: int) -> IndexChunk:
    """Create an IndexChunk (a DocAwareChunk with embeddings)."""
    return IndexChunk(
        chunk_id=chunk_id,
        blurb="test",
        content="test content",
        source_links=None,
        image_file_id=None,
        section_continuation=False,
        source_document=_make_doc(doc_id),
        title_prefix="",
        metadata_suffix_semantic="",
        metadata_suffix_keyword="",
        mini_chunk_texts=None,
        large_chunk_id=None,
        doc_summary="",
        chunk_context="",
        contextual_rag_reserved_tokens=0,
        embeddings=ChunkEmbedding(
            full_embedding=[0.1] * 10,
            mini_chunk_embeddings=[],
        ),
        title_embedding=None,
    )


def _make_failure(doc_id: str) -> ConnectorFailure:
    return ConnectorFailure(
        failed_document=DocumentFailure(document_id=doc_id, document_link=None),
        failure_message="embedding failed",
        exception=RuntimeError("embedding failed"),
    )


def _mock_embed_success(
    chunks: list[DocAwareChunk], **_kwargs: object
) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
    """Simulate successful embedding of all chunks."""
    return (
        [_make_index_chunk(c.source_document.id, c.chunk_id) for c in chunks],
        [],
    )


def _mock_embed_fail_doc(
    fail_doc_id: str,
) -> Callable[..., tuple[list[IndexChunk], list[ConnectorFailure]]]:
    """Return an embed mock that fails all chunks for a specific doc."""

    def _embed(
        chunks: list[DocAwareChunk], **_kwargs: object
    ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
        successes = [
            _make_index_chunk(c.source_document.id, c.chunk_id)
            for c in chunks
            if c.source_document.id != fail_doc_id
        ]
        failures = (
            [_make_failure(fail_doc_id)]
            if any(c.source_document.id == fail_doc_id for c in chunks)
            else []
        )
        return successes, failures

    return _embed


class TestEmbedChunksInBatches:
    @patch(
        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
    )
    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
    def test_single_batch_no_failures(self, mock_embed: MagicMock) -> None:
        """All chunks fit in one batch and embed successfully."""
        mock_embed.side_effect = _mock_embed_success

        with ChunkBatchStore() as store:
            chunks = [_make_chunk("doc1", i) for i in range(3)]
            result = _embed_chunks_to_store(
                chunks=chunks,
                embedder=MagicMock(),
                tenant_id="test",
                request_id=None,
                store=store,
            )

            assert len(result.successful_chunk_ids) == 3
            assert len(result.connector_failures) == 0

            # Verify stored contents
            assert len(store._batch_files()) == 1
            stored = list(store.stream())
            assert len(stored) == 3

    @patch(
        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
    )
    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
    def test_multiple_batches_no_failures(self, mock_embed: MagicMock) -> None:
        """Chunks are split across multiple batches, all succeed."""
        mock_embed.side_effect = _mock_embed_success

        with ChunkBatchStore() as store:
            chunks = [_make_chunk("doc1", i) for i in range(7)]
            result = _embed_chunks_to_store(
                chunks=chunks,
                embedder=MagicMock(),
                tenant_id="test",
                request_id=None,
                store=store,
            )

            assert len(result.successful_chunk_ids) == 7
            assert len(result.connector_failures) == 0
            assert len(store._batch_files()) == 3  # 3 + 3 + 1

    @patch(
        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
    )
    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
    def test_single_batch_with_failure(self, mock_embed: MagicMock) -> None:
        """One doc fails embedding, its chunks are excluded from results."""
        mock_embed.side_effect = _mock_embed_fail_doc("doc2")

        with ChunkBatchStore() as store:
            chunks = [
                _make_chunk("doc1", 0),
                _make_chunk("doc2", 1),
                _make_chunk("doc1", 2),
            ]
            result = _embed_chunks_to_store(
                chunks=chunks,
                embedder=MagicMock(),
                tenant_id="test",
                request_id=None,
                store=store,
            )

            assert len(result.connector_failures) == 1
            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
            assert "doc2" not in successful_doc_ids
            assert "doc1" in successful_doc_ids

    @patch(
        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
    )
    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
    def test_cross_batch_failure_scrubs_earlier_batch(
        self, mock_embed: MagicMock
    ) -> None:
        """Doc A spans batches 0 and 1.  It succeeds in batch 0 but fails in
        batch 1.  Its chunks should be scrubbed from batch 0's batch file."""
        call_count = 0

        def _embed(
            chunks: list[DocAwareChunk], **_kwargs: object
        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                return _mock_embed_success(chunks)
            else:
                return _mock_embed_fail_doc("docA")(chunks)

        mock_embed.side_effect = _embed

        with ChunkBatchStore() as store:
            chunks = [
                _make_chunk("docA", 0),
                _make_chunk("docA", 1),
                _make_chunk("docA", 2),
                _make_chunk("docA", 3),
                _make_chunk("docB", 0),
                _make_chunk("docB", 1),
            ]
            result = _embed_chunks_to_store(
                chunks=chunks,
                embedder=MagicMock(),
                tenant_id="test",
                request_id=None,
                store=store,
            )

            # docA should be fully excluded from results
            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
            assert "docA" not in successful_doc_ids
            assert "docB" in successful_doc_ids
            assert len(result.connector_failures) == 1

            # Verify batch 0 was scrubbed of docA chunks
            all_stored = list(store.stream())
            stored_doc_ids = {c.source_document.id for c in all_stored}
            assert "docA" not in stored_doc_ids
            assert "docB" in stored_doc_ids

    @patch(
        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
    )
    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
    def test_later_batch_skips_already_failed_doc(self, mock_embed: MagicMock) -> None:
        """If docA fails in batch 0, its chunks in batch 1 are skipped
        entirely (never sent to the embedder)."""
        embedded_doc_ids: list[str] = []

        def _embed(
            chunks: list[DocAwareChunk], **_kwargs: object
        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
            for c in chunks:
                embedded_doc_ids.append(c.source_document.id)
            return _mock_embed_fail_doc("docA")(chunks)

        mock_embed.side_effect = _embed

        with ChunkBatchStore() as store:
            chunks = [
                _make_chunk("docA", 0),
                _make_chunk("docA", 1),
                _make_chunk("docA", 2),
                _make_chunk("docA", 3),
                _make_chunk("docB", 0),
                _make_chunk("docB", 1),
            ]
            _embed_chunks_to_store(
                chunks=chunks,
                embedder=MagicMock(),
                tenant_id="test",
                request_id=None,
                store=store,
            )

        # docA should only appear in batch 0, not batch 1
        batch_1_doc_ids = embedded_doc_ids[3:]
        assert "docA" not in batch_1_doc_ids

    @patch(
        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
    )
    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
    def test_failed_doc_skipped_in_later_batch_while_other_doc_succeeds(
        self, mock_embed: MagicMock
    ) -> None:
        """doc1 spans batches 0 and 1, doc2 only in batch 1.  Batch 0 fails
        doc1.  In batch 1, doc1 chunks should be skipped but doc2 chunks
        should still be embedded successfully."""
        embedded_chunks: list[list[str]] = []

        def _embed(
            chunks: list[DocAwareChunk], **_kwargs: object
        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
            embedded_chunks.append([c.source_document.id for c in chunks])
            return _mock_embed_fail_doc("doc1")(chunks)

        mock_embed.side_effect = _embed

        with ChunkBatchStore() as store:
            chunks = [
                _make_chunk("doc1", 0),
                _make_chunk("doc1", 1),
                _make_chunk("doc1", 2),
                _make_chunk("doc1", 3),
                _make_chunk("doc2", 0),
                _make_chunk("doc2", 1),
            ]
            result = _embed_chunks_to_store(
                chunks=chunks,
                embedder=MagicMock(),
                tenant_id="test",
                request_id=None,
                store=store,
            )

            # doc1 should be fully excluded, doc2 fully included
            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
            assert "doc1" not in successful_doc_ids
            assert "doc2" in successful_doc_ids
            assert len(result.successful_chunk_ids) == 2  # doc2's 2 chunks

            # Batch 1 should only contain doc2 (doc1 was filtered before embedding)
            assert len(embedded_chunks) == 2
            assert "doc1" not in embedded_chunks[1]
            assert embedded_chunks[1] == ["doc2", "doc2"]

            # Verify on-disk state has no doc1 chunks
            all_stored = list(store.stream())
            assert all(c.source_document.id == "doc2" for c in all_stored)

    @patch(
        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
    )
    def test_empty_input(self, mock_embed: MagicMock) -> None:
        """Empty chunk list produces empty results."""
        mock_embed.side_effect = _mock_embed_success

        with ChunkBatchStore() as store:
            result = _embed_chunks_to_store(
                chunks=[],
                embedder=MagicMock(),
                tenant_id="test",
                request_id=None,
                store=store,
            )

            assert len(result.successful_chunk_ids) == 0
            assert len(result.connector_failures) == 0
            mock_embed.assert_not_called()

    @patch(
        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
    )
    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
    def test_all_chunks_fail(self, mock_embed: MagicMock) -> None:
        """When all documents fail, results have no successful chunks."""

        def _fail_all(
            chunks: list[DocAwareChunk], **_kwargs: object
        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
            doc_ids = {c.source_document.id for c in chunks}
            return [], [_make_failure(doc_id) for doc_id in doc_ids]

        mock_embed.side_effect = _fail_all

        with ChunkBatchStore() as store:
            chunks = [_make_chunk("doc1", 0), _make_chunk("doc2", 1)]
            result = _embed_chunks_to_store(
                chunks=chunks,
                embedder=MagicMock(),
                tenant_id="test",
                request_id=None,
                store=store,
            )

            assert len(result.successful_chunk_ids) == 0
            assert len(result.connector_failures) == 2


================================================
FILE: backend/tests/unit/onyx/indexing/test_embedder.py
================================================
from collections.abc import Generator
from unittest.mock import Mock
from unittest.mock import patch

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import IndexChunk
from shared_configs.enums import EmbeddingProvider
from shared_configs.enums import EmbedTextType


@pytest.fixture
def mock_embedding_model() -> Generator[Mock, None, None]:
    with patch("onyx.indexing.embedder.EmbeddingModel") as mock:
        yield mock


@pytest.mark.parametrize(
    "chunk_context, doc_summary",
    [("Test chunk context", "Test document summary"), ("", "")],
)
def test_default_indexing_embedder_embed_chunks(
    mock_embedding_model: Mock, chunk_context: str, doc_summary: str
) -> None:
    # Setup
    embedder = DefaultIndexingEmbedder(
        model_name="test-model",
        normalize=True,
        query_prefix=None,
        passage_prefix=None,
        provider_type=EmbeddingProvider.OPENAI,
    )

    # Mock the encode method of the embedding model
    mock_embedding_model.return_value.encode.side_effect = [
        [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],  # Main chunk embeddings
        [[7.0, 8.0, 9.0]],  # Title embedding
    ]

    # Create test input
    source_doc = Document(
        id="test_doc",
        source=DocumentSource.WEB,
        semantic_identifier="Test Document",
        metadata={"tags": ["tag1", "tag2"]},
        doc_updated_at=None,
        sections=[
            TextSection(text="This is a short section.", link="link1"),
        ],
    )
    chunks: list[DocAwareChunk] = [
        DocAwareChunk(
            chunk_id=1,
            blurb="This is a short section.",
            content="Test chunk",
            source_links={0: "link1"},
            section_continuation=False,
            source_document=source_doc,
            title_prefix="Title: ",
            metadata_suffix_semantic="",
            metadata_suffix_keyword="",
            mini_chunk_texts=None,
            large_chunk_reference_ids=[],
            large_chunk_id=None,
            image_file_id=None,
            chunk_context=chunk_context,
            doc_summary=doc_summary,
            contextual_rag_reserved_tokens=200,
        )
    ]

    # Execute
    result: list[IndexChunk] = embedder.embed_chunks(chunks)

    # Assert
    assert len(result) == 1
    assert isinstance(result[0], IndexChunk)
    assert result[0].content == "Test chunk"
    assert result[0].embeddings == ChunkEmbedding(
        full_embedding=[1.0, 2.0, 3.0],
        mini_chunk_embeddings=[],
    )
    assert result[0].title_embedding == [7.0, 8.0, 9.0]

    # Verify the embedding model was called exactly as follows
    mock_embedding_model.return_value.encode.assert_any_call(
        texts=[f"Title: {doc_summary}Test chunk{chunk_context}"],
        text_type=EmbedTextType.PASSAGE,
        large_chunks_present=False,
        tenant_id=None,
        request_id=None,
    )
    # Same for title only embedding call
    mock_embedding_model.return_value.encode.assert_any_call(
        ["Test Document"],
        text_type=EmbedTextType.PASSAGE,
        tenant_id=None,
        request_id=None,
    )


================================================
FILE: backend/tests/unit/onyx/indexing/test_indexing_pipeline.py
================================================
import threading
from typing import Any
from typing import cast
from typing import List
from unittest.mock import MagicMock
from unittest.mock import Mock
from unittest.mock import patch

import pytest

from onyx.configs.app_configs import MAX_DOCUMENT_CHARS
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentSource
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
from onyx.hooks.points.document_ingestion import DocumentIngestionResponse
from onyx.hooks.points.document_ingestion import DocumentIngestionSection
from onyx.indexing.chunker import Chunker
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.indexing_pipeline import _apply_document_ingestion_hook
from onyx.indexing.indexing_pipeline import add_contextual_summaries
from onyx.indexing.indexing_pipeline import filter_documents
from onyx.indexing.indexing_pipeline import process_image_sections
from onyx.llm.constants import LlmProviderNames
from onyx.llm.model_response import Choice
from onyx.llm.model_response import Message
from onyx.llm.model_response import ModelResponse
from onyx.llm.utils import get_max_input_tokens


def create_test_document(
    doc_id: str = "test_id",
    title: str | None = "Test Title",
    semantic_id: str = "test_semantic_id",
    sections: List[TextSection] | None = None,
) -> Document:
    if sections is None:
        sections = [TextSection(text="Test content", link="test_link")]
    return Document(
        id=doc_id,
        title=title,
        semantic_identifier=semantic_id,
        sections=cast(list[TextSection | ImageSection], sections),
        source=DocumentSource.FILE,
        metadata={},
    )


def test_filter_documents_empty_title_and_content() -> None:
    doc = create_test_document(
        title="", semantic_id="", sections=[TextSection(text="", link="test_link")]
    )
    result = filter_documents([doc])
    assert len(result) == 0


def test_filter_documents_empty_title_with_content() -> None:
    doc = create_test_document(
        title="", sections=[TextSection(text="Valid content", link="test_link")]
    )
    result = filter_documents([doc])
    assert len(result) == 1
    assert result[0].id == "test_id"


def test_filter_documents_empty_content_with_title() -> None:
    doc = create_test_document(
        title="Valid Title", sections=[TextSection(text="", link="test_link")]
    )
    result = filter_documents([doc])
    assert len(result) == 1
    assert result[0].id == "test_id"


def test_filter_documents_exceeding_max_chars() -> None:
    if not MAX_DOCUMENT_CHARS:  # Skip if no max chars configured
        return
    long_text = "a" * (MAX_DOCUMENT_CHARS + 1)
    doc = create_test_document(sections=[TextSection(text=long_text, link="test_link")])
    result = filter_documents([doc])
    assert len(result) == 0


def test_filter_documents_valid_document() -> None:
    doc = create_test_document(
        title="Valid Title",
        sections=[TextSection(text="Valid content", link="test_link")],
    )
    result = filter_documents([doc])
    assert len(result) == 1
    assert result[0].id == "test_id"
    assert result[0].title == "Valid Title"


def test_filter_documents_whitespace_only() -> None:
    doc = create_test_document(
        title="   ",
        semantic_id="  ",
        sections=[TextSection(text="   ", link="test_link")],
    )
    result = filter_documents([doc])
    assert len(result) == 0


def test_filter_documents_semantic_id_no_title() -> None:
    doc = create_test_document(
        title=None,
        semantic_id="Valid Semantic ID",
        sections=[TextSection(text="Valid content", link="test_link")],
    )
    result = filter_documents([doc])
    assert len(result) == 1
    assert result[0].semantic_identifier == "Valid Semantic ID"


def test_filter_documents_multiple_sections() -> None:
    doc = create_test_document(
        sections=[
            TextSection(text="Content 1", link="test_link"),
            TextSection(text="Content 2", link="test_link"),
            TextSection(text="Content 3", link="test_link"),
        ]
    )
    result = filter_documents([doc])
    assert len(result) == 1
    assert len(result[0].sections) == 3


def test_filter_documents_multiple_documents() -> None:
    docs = [
        create_test_document(doc_id="1", title="Title 1"),
        create_test_document(
            doc_id="2", title="", sections=[TextSection(text="", link="test_link")]
        ),  # Should be filtered
        create_test_document(doc_id="3", title="Title 3"),
    ]
    result = filter_documents(docs)
    assert len(result) == 2
    assert {doc.id for doc in result} == {"1", "3"}


def test_filter_documents_empty_batch() -> None:
    result = filter_documents([])
    assert len(result) == 0


@patch("onyx.llm.utils.GEN_AI_MAX_TOKENS", 4096)
@pytest.mark.parametrize("enable_contextual_rag", [True, False])
def test_contextual_rag(
    embedder: DefaultIndexingEmbedder, enable_contextual_rag: bool
) -> None:
    short_section_1 = "This is a short section."
    long_section = (
        "This is a long section that should be split into multiple chunks. " * 100
    )
    short_section_2 = "This is another short section."
    short_section_3 = "This is another short section again."
    short_section_4 = "Final short section."
    semantic_identifier = "Test Document"

    document = Document(
        id="test_doc",
        source=DocumentSource.WEB,
        semantic_identifier=semantic_identifier,
        metadata={"tags": ["tag1", "tag2"]},
        doc_updated_at=None,
        sections=[
            TextSection(text=short_section_1, link="link1"),
            TextSection(text=short_section_2, link="link2"),
            TextSection(text=long_section, link="link3"),
            TextSection(text=short_section_3, link="link4"),
            TextSection(text=short_section_4, link="link5"),
        ],
    )
    indexing_documents = process_image_sections([document])

    mock_llm_invoke_count = 0
    counter_lock = threading.Lock()

    def mock_llm_invoke(
        *args: Any, **kwargs: Any  # noqa: ARG001
    ) -> ModelResponse:  # noqa: ARG001
        nonlocal mock_llm_invoke_count
        with counter_lock:
            mock_llm_invoke_count += 1
        return ModelResponse(
            id=f"test-{mock_llm_invoke_count}",
            created="2024-01-01T00:00:00Z",
            choice=Choice(message=Message(content=f"Test{mock_llm_invoke_count}")),
        )

    llm_tokenizer = embedder.embedding_model.tokenizer

    mock_llm = Mock()
    mock_llm.config.max_input_tokens = get_max_input_tokens(
        model_provider=LlmProviderNames.OPENAI, model_name="gpt-4o"
    )
    mock_llm.invoke = mock_llm_invoke

    chunker = Chunker(
        tokenizer=embedder.embedding_model.tokenizer,
        enable_multipass=False,
        enable_contextual_rag=enable_contextual_rag,
    )
    chunks = chunker.chunk(indexing_documents)

    chunks = add_contextual_summaries(
        chunks=chunks,
        llm=mock_llm,
        tokenizer=llm_tokenizer,
        chunk_token_limit=chunker.chunk_token_limit * 2,
    )

    assert len(chunks) == 5
    assert short_section_1 in chunks[0].content
    assert short_section_3 in chunks[-1].content
    assert short_section_4 in chunks[-1].content
    assert "tag1" in chunks[0].metadata_suffix_keyword
    assert "tag2" in chunks[0].metadata_suffix_semantic

    doc_summary = "Test1" if enable_contextual_rag else ""
    chunk_context = ""
    count = 2
    for chunk in chunks:
        if enable_contextual_rag:
            chunk_context = f"Test{count}"
            count += 1
        assert chunk.doc_summary == doc_summary
        assert chunk.chunk_context == chunk_context


# ---------------------------------------------------------------------------
# _apply_document_ingestion_hook
# ---------------------------------------------------------------------------

_PATCH_EXECUTE_HOOK = "onyx.indexing.indexing_pipeline.execute_hook"


def _make_doc(
    doc_id: str = "doc1",
    sections: list[TextSection | ImageSection] | None = None,
) -> Document:
    if sections is None:
        sections = [TextSection(text="Hello", link="http://example.com")]
    return Document(
        id=doc_id,
        title="Test Doc",
        semantic_identifier="test-doc",
        sections=cast(list[TextSection | ImageSection], sections),
        source=DocumentSource.FILE,
        metadata={},
    )


def test_document_ingestion_hook_skipped_passes_through() -> None:
    doc = _make_doc()
    with patch(_PATCH_EXECUTE_HOOK, return_value=HookSkipped()):
        result = _apply_document_ingestion_hook([doc], MagicMock())
    assert result == [doc]


def test_document_ingestion_hook_soft_failed_passes_through() -> None:
    doc = _make_doc()
    with patch(_PATCH_EXECUTE_HOOK, return_value=HookSoftFailed()):
        result = _apply_document_ingestion_hook([doc], MagicMock())
    assert result == [doc]


def test_document_ingestion_hook_none_sections_drops_document() -> None:
    doc = _make_doc()
    with patch(
        _PATCH_EXECUTE_HOOK,
        return_value=DocumentIngestionResponse(
            sections=None, rejection_reason="PII detected"
        ),
    ):
        result = _apply_document_ingestion_hook([doc], MagicMock())
    assert result == []


def test_document_ingestion_hook_all_invalid_sections_drops_document() -> None:
    """A non-empty list where every section has neither text nor image_file_id drops the doc."""
    doc = _make_doc()
    with patch(
        _PATCH_EXECUTE_HOOK,
        return_value=DocumentIngestionResponse(sections=[DocumentIngestionSection()]),
    ):
        result = _apply_document_ingestion_hook([doc], MagicMock())
    assert result == []


def test_document_ingestion_hook_empty_sections_drops_document() -> None:
    doc = _make_doc()
    with patch(
        _PATCH_EXECUTE_HOOK,
        return_value=DocumentIngestionResponse(sections=[]),
    ):
        result = _apply_document_ingestion_hook([doc], MagicMock())
    assert result == []


def test_document_ingestion_hook_rewrites_text_sections() -> None:
    doc = _make_doc(sections=[TextSection(text="original", link="http://a.com")])
    with patch(
        _PATCH_EXECUTE_HOOK,
        return_value=DocumentIngestionResponse(
            sections=[DocumentIngestionSection(text="rewritten", link="http://b.com")]
        ),
    ):
        result = _apply_document_ingestion_hook([doc], MagicMock())
    assert len(result) == 1
    assert len(result[0].sections) == 1
    section = result[0].sections[0]
    assert isinstance(section, TextSection)
    assert section.text == "rewritten"
    assert section.link == "http://b.com"


def test_document_ingestion_hook_preserves_image_section_order() -> None:
    """Hook receives all sections including images and controls final ordering."""
    image = ImageSection(image_file_id="img-1", link=None)
    doc = _make_doc(
        sections=cast(
            list[TextSection | ImageSection],
            [TextSection(text="original", link=None), image],
        )
    )
    # Hook moves the image before the text section
    with patch(
        _PATCH_EXECUTE_HOOK,
        return_value=DocumentIngestionResponse(
            sections=[
                DocumentIngestionSection(image_file_id="img-1", link=None),
                DocumentIngestionSection(text="rewritten", link=None),
            ]
        ),
    ):
        result = _apply_document_ingestion_hook([doc], MagicMock())
    assert len(result) == 1
    sections = result[0].sections
    assert len(sections) == 2
    assert (
        isinstance(sections[0], ImageSection) and sections[0].image_file_id == "img-1"
    )
    assert isinstance(sections[1], TextSection) and sections[1].text == "rewritten"


def test_document_ingestion_hook_mixed_batch() -> None:
    """Drop one doc, rewrite another, pass through a third."""
    doc_drop = _make_doc(doc_id="drop")
    doc_rewrite = _make_doc(doc_id="rewrite")
    doc_skip = _make_doc(doc_id="skip")

    def _side_effect(**kwargs: Any) -> Any:
        doc_id = kwargs["payload"]["document_id"]
        if doc_id == "drop":
            return DocumentIngestionResponse(sections=None)
        if doc_id == "rewrite":
            return DocumentIngestionResponse(
                sections=[DocumentIngestionSection(text="new text", link=None)]
            )
        return HookSkipped()

    with patch(_PATCH_EXECUTE_HOOK, side_effect=_side_effect):
        result = _apply_document_ingestion_hook(
            [doc_drop, doc_rewrite, doc_skip], MagicMock()
        )

    assert len(result) == 2
    ids = {d.id for d in result}
    assert ids == {"rewrite", "skip"}
    rewritten = next(d for d in result if d.id == "rewrite")
    assert isinstance(rewritten.sections[0], TextSection)
    assert rewritten.sections[0].text == "new text"


================================================
FILE: backend/tests/unit/onyx/indexing/test_personas_in_chunks.py
================================================
"""Tests that persona IDs are correctly propagated through the indexing pipeline.

Covers Phase 1 (schema plumbing) and Phase 2 (write at index time) of the
unify-assistant-project-files plan.
"""

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

from onyx.access.models import DocumentAccess
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentSource
from onyx.connectors.models import TextSection
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk


def _make_index_chunk(
    doc_id: str = "test-file-id",
    content: str = "test content",
) -> IndexChunk:
    embedding = [0.1] * 10
    doc = Document(
        id=doc_id,
        semantic_identifier="test_file.txt",
        sections=[TextSection(text=content, link=None)],
        source=DocumentSource.USER_FILE,
        metadata={},
    )
    return IndexChunk(
        chunk_id=0,
        blurb=content[:50],
        content=content,
        source_links=None,
        image_file_id=None,
        section_continuation=False,
        source_document=doc,
        title_prefix="",
        metadata_suffix_semantic="",
        metadata_suffix_keyword="",
        contextual_rag_reserved_tokens=0,
        doc_summary="",
        chunk_context="",
        mini_chunk_texts=None,
        large_chunk_id=None,
        embeddings=ChunkEmbedding(
            full_embedding=embedding,
            mini_chunk_embeddings=[],
        ),
        title_embedding=None,
    )


def _make_access() -> DocumentAccess:
    return DocumentAccess.build(
        user_emails=["user@example.com"],
        user_groups=[],
        external_user_emails=[],
        external_user_group_ids=[],
        is_public=False,
    )


def test_from_index_chunk_propagates_personas() -> None:
    """Personas list passed to from_index_chunk appears on the result."""
    chunk = _make_index_chunk()
    persona_ids = [10, 20, 30]

    aware_chunk = DocMetadataAwareIndexChunk.from_index_chunk(
        index_chunk=chunk,
        access=_make_access(),
        document_sets=set(),
        user_project=[1],
        personas=persona_ids,
        boost=0,
        aggregated_chunk_boost_factor=1.0,
        tenant_id="test_tenant",
    )

    assert aware_chunk.personas == persona_ids
    assert aware_chunk.user_project == [1]


def test_from_index_chunk_empty_personas() -> None:
    """An empty personas list is preserved (not turned into None or omitted)."""
    chunk = _make_index_chunk()

    aware_chunk = DocMetadataAwareIndexChunk.from_index_chunk(
        index_chunk=chunk,
        access=_make_access(),
        document_sets=set(),
        user_project=[],
        personas=[],
        boost=0,
        aggregated_chunk_boost_factor=1.0,
        tenant_id="test_tenant",
    )

    assert aware_chunk.personas == []


def _make_document(doc_id: str) -> Document:
    return Document(
        id=doc_id,
        semantic_identifier="test_file.txt",
        sections=[TextSection(text="test content", link=None)],
        source=DocumentSource.USER_FILE,
        metadata={},
    )


def _run_adapter_build(
    file_id: str,
    project_ids_map: dict[str, list[int]],
    persona_ids_map: dict[str, list[int]],
) -> list[DocMetadataAwareIndexChunk]:
    """Helper that runs UserFileIndexingAdapter.prepare_enrichment + enrich_chunk
    with all external dependencies mocked."""
    from onyx.indexing.adapters.user_file_indexing_adapter import (
        UserFileIndexingAdapter,
    )
    from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext

    chunk = _make_index_chunk(doc_id=file_id)
    doc = _make_document(doc_id=file_id)

    context = DocumentBatchPrepareContext(
        updatable_docs=[doc],
        id_to_boost_map={},
    )

    adapter = UserFileIndexingAdapter(tenant_id="test_tenant", db_session=MagicMock())

    with (
        patch(
            "onyx.indexing.adapters.user_file_indexing_adapter.fetch_user_project_ids_for_user_files",
            return_value=project_ids_map,
        ),
        patch(
            "onyx.indexing.adapters.user_file_indexing_adapter.fetch_persona_ids_for_user_files",
            return_value=persona_ids_map,
        ),
        patch(
            "onyx.indexing.adapters.user_file_indexing_adapter.get_access_for_user_files",
            return_value={file_id: _make_access()},
        ),
        patch(
            "onyx.indexing.adapters.user_file_indexing_adapter.fetch_chunk_counts_for_user_files",
            return_value=[(file_id, 0)],
        ),
        patch(
            "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
            side_effect=Exception("no LLM in tests"),
        ),
    ):
        enricher = adapter.prepare_enrichment(
            context=context,
            tenant_id="test_tenant",
            chunks=[chunk],
        )
        return [enricher.enrich_chunk(chunk, 1.0)]


def test_prepare_enrichment_includes_persona_ids() -> None:
    """UserFileIndexingAdapter.prepare_enrichment writes persona IDs
    fetched from the DB into each chunk's metadata."""
    file_id = str(uuid4())
    persona_ids = [5, 12]
    project_ids = [3]

    chunks = _run_adapter_build(
        file_id=file_id,
        project_ids_map={file_id: project_ids},
        persona_ids_map={file_id: persona_ids},
    )

    assert len(chunks) == 1
    assert chunks[0].personas == persona_ids
    assert chunks[0].user_project == project_ids


def test_prepare_enrichment_missing_file_defaults_to_empty() -> None:
    """When a file has no persona or project associations in the DB, the
    adapter should default to empty lists (not KeyError or None)."""
    file_id = str(uuid4())

    chunks = _run_adapter_build(
        file_id=file_id,
        project_ids_map={},
        persona_ids_map={},
    )

    assert len(chunks) == 1
    assert chunks[0].personas == []
    assert chunks[0].user_project == []


================================================
FILE: backend/tests/unit/onyx/indexing/test_vespa.py
================================================
from http import HTTPStatus
from typing import Any

import httpx
import pytest
from sqlalchemy.orm import Session

from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from onyx.document_index.document_index_utils import get_both_index_properties
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT


@pytest.mark.skip()
def test_vespa_update() -> None:
    """This Test exercises some ambiguous Vespa behavior and
    shows exactly what happens.
    """

    doc_id = "test-vespa-update"

    with Session(get_sqlalchemy_engine()) as db_session:
        primary_index_name, _, _, _ = get_both_index_properties(db_session)
        endpoint = (
            f"{DOCUMENT_ID_ENDPOINT.format(index_name=primary_index_name)}/{doc_id}"
        )
        with httpx.Client(http2=True) as http_client:
            payload: dict[str, Any] = {}

            # always delete to set up the test, should always be OK
            res = http_client.delete(endpoint)
            assert HTTPStatus.OK == res.status_code

            # Verify the document is not found
            res = http_client.get(endpoint)
            assert HTTPStatus.NOT_FOUND == res.status_code

            # Attempt to update a nonexistent test document. Should return OK
            payload["fields"] = {}
            payload["fields"]["title"] = {"assign": "Best of Bob Dylan"}

            res = http_client.put(
                endpoint,
                headers={"Content-Type": "application/json"},
                json=payload,
            )
            assert HTTPStatus.OK == res.status_code

            # when we look for it, should be NOT_FOUND
            res = http_client.get(endpoint)
            assert HTTPStatus.NOT_FOUND == res.status_code

            # POST/Put new document
            payload = {}
            payload["fields"] = {}
            payload["fields"]["document_id"] = doc_id
            payload["fields"]["title"] = "A Head Full of Dreams"

            res = http_client.post(
                endpoint,
                headers={"Content-Type": "application/json"},
                json=payload,
            )
            assert HTTPStatus.OK == res.status_code

            # when we look for it, now we should find it
            res = http_client.get(endpoint)
            assert HTTPStatus.OK == res.status_code
            d = res.json()

            assert payload["fields"]["title"] == d["fields"]["title"]

            # Attempt to update the document that we know exists. Should return OK
            payload["fields"] = {}
            payload["fields"]["title"] = {"assign": "Remember The Name"}

            res = http_client.put(
                endpoint,
                headers={"Content-Type": "application/json"},
                json=payload,
            )
            assert HTTPStatus.OK == res.status_code

            # verify the change
            res = http_client.get(endpoint)
            assert HTTPStatus.OK == res.status_code
            d = res.json()
            assert payload["fields"]["title"]["assign"] == d["fields"]["title"]

            # always delete to clean up the test, should always be OK
            res = http_client.delete(endpoint)
            assert HTTPStatus.OK == res.status_code

            # Verify the document is not found
            res = http_client.get(endpoint)
            assert HTTPStatus.NOT_FOUND == res.status_code


================================================
FILE: backend/tests/unit/onyx/lazy_handling/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/llm/conftest.py
================================================
"""
Test configuration for LLM tests.

This module loads model metadata enrichments before running tests
so that the model_name_parser has access to the enriched data.
"""

from collections.abc import Generator

import pytest

from onyx.llm.litellm_singleton.config import load_model_metadata_enrichments
from onyx.llm.model_name_parser import parse_litellm_model_name


@pytest.fixture(scope="session", autouse=True)
def load_enrichments() -> Generator[None, None, None]:
    """Load model metadata enrichments before any tests run."""
    load_model_metadata_enrichments()
    # Clear parser cache to ensure fresh lookups
    parse_litellm_model_name.cache_clear()
    yield


================================================
FILE: backend/tests/unit/onyx/llm/test_bedrock_token_limit.py
================================================
"""Tests for get_bedrock_token_limit function."""

from unittest.mock import patch

from onyx.llm.utils import get_bedrock_token_limit


class TestGetBedrockTokenLimit:
    """Tests for Bedrock token limit lookup."""

    def test_parse_from_model_id_suffix_200k(self) -> None:
        """Test parsing :200k suffix."""
        result = get_bedrock_token_limit("anthropic.claude-3-5-sonnet:200k")
        assert result == 200000

    def test_parse_from_model_id_suffix_128k(self) -> None:
        """Test parsing :128k suffix."""
        result = get_bedrock_token_limit("meta.llama3-70b:128k")
        assert result == 128000

    def test_parse_from_model_id_suffix_4k(self) -> None:
        """Test parsing :4k suffix."""
        result = get_bedrock_token_limit("some-model:4k")
        assert result == 4000

    def test_parse_from_model_id_suffix_1000k(self) -> None:
        """Test parsing :1000k suffix (1M context)."""
        result = get_bedrock_token_limit("amazon.nova-pro:1000k")
        assert result == 1000000

    def test_litellm_lookup_with_bedrock_prefix(self) -> None:
        """Test LiteLLM lookup works with bedrock/ prefix."""
        mock_model_map = {
            "bedrock/anthropic.claude-3-5-sonnet": {"max_input_tokens": 200000}
        }
        with patch("onyx.llm.utils.get_model_map", return_value=mock_model_map):
            result = get_bedrock_token_limit("anthropic.claude-3-5-sonnet")
            assert result == 200000

    def test_litellm_lookup_without_prefix(self) -> None:
        """Test LiteLLM lookup works without bedrock/ prefix."""
        mock_model_map = {"anthropic.claude-3-sonnet": {"max_input_tokens": 200000}}
        with patch("onyx.llm.utils.get_model_map", return_value=mock_model_map):
            result = get_bedrock_token_limit("anthropic.claude-3-sonnet")
            assert result == 200000

    def test_litellm_max_tokens_fallback(self) -> None:
        """Test fallback to max_tokens when max_input_tokens not present."""
        mock_model_map = {"bedrock/some-model": {"max_tokens": 32000}}
        with patch("onyx.llm.utils.get_model_map", return_value=mock_model_map):
            result = get_bedrock_token_limit("some-model")
            assert result == 32000

    def test_hardcoded_mapping_claude_3_5(self) -> None:
        """Test hardcoded mapping for Claude 3.5 models."""
        # Mock empty LiteLLM to force mapping lookup
        with patch("onyx.llm.utils.get_model_map", return_value={}):
            result = get_bedrock_token_limit(
                "anthropic.claude-3-5-sonnet-20241022-v2:0"
            )
            assert result == 200000

    def test_hardcoded_mapping_llama3_3(self) -> None:
        """Test hardcoded mapping for Llama 3.3 models (128K context)."""
        with patch("onyx.llm.utils.get_model_map", return_value={}):
            result = get_bedrock_token_limit("meta.llama3-3-70b-instruct-v1:0")
            assert result == 128000

    def test_hardcoded_mapping_llama3_70b(self) -> None:
        """Test hardcoded mapping for Llama 3 70B (8K context)."""
        with patch("onyx.llm.utils.get_model_map", return_value={}):
            result = get_bedrock_token_limit("meta.llama3-70b-instruct-v1:0")
            assert result == 8000

    def test_hardcoded_mapping_nova_pro(self) -> None:
        """Test hardcoded mapping for Nova Pro."""
        with patch("onyx.llm.utils.get_model_map", return_value={}):
            result = get_bedrock_token_limit("amazon.nova-pro-v1:0")
            assert result == 300000

    def test_hardcoded_mapping_mistral_large(self) -> None:
        """Test hardcoded mapping for Mistral Large."""
        with patch("onyx.llm.utils.get_model_map", return_value={}):
            result = get_bedrock_token_limit("mistral.mistral-large-2407-v1:0")
            assert result == 128000

    def test_default_fallback_unknown_model(self) -> None:
        """Test default fallback for unknown models."""
        with patch("onyx.llm.utils.get_model_map", return_value={}):
            result = get_bedrock_token_limit("unknown.model-v1:0")
            # Should fall back to GEN_AI_MODEL_FALLBACK_MAX_TOKENS (32000)
            assert result == 32000

    def test_cross_region_model_id(self) -> None:
        """Test cross-region model ID (us.anthropic.claude-...)."""
        with patch("onyx.llm.utils.get_model_map", return_value={}):
            result = get_bedrock_token_limit(
                "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
            )
            assert result == 200000

    def test_case_insensitive_matching(self) -> None:
        """Test that matching is case-insensitive."""
        with patch("onyx.llm.utils.get_model_map", return_value={}):
            result = get_bedrock_token_limit("ANTHROPIC.CLAUDE-3-5-SONNET")
            assert result == 200000

    def test_suffix_takes_priority_over_litellm(self) -> None:
        """Test that :NNNk suffix takes priority over LiteLLM."""
        mock_model_map = {"bedrock/model": {"max_input_tokens": 50000}}
        with patch("onyx.llm.utils.get_model_map", return_value=mock_model_map):
            # The :100k suffix should be used, not the LiteLLM value
            result = get_bedrock_token_limit("model:100k")
            assert result == 100000

    def test_litellm_exception_falls_through(self) -> None:
        """Test that LiteLLM exceptions fall through to mapping."""
        with patch(
            "onyx.llm.utils.get_model_map", side_effect=Exception("LiteLLM error")
        ):
            # Should still work via hardcoded mapping
            result = get_bedrock_token_limit("anthropic.claude-3-5-sonnet")
            assert result == 200000


================================================
FILE: backend/tests/unit/onyx/llm/test_factory.py
================================================
from unittest.mock import patch

from onyx.llm.constants import LlmProviderNames
from onyx.llm.factory import _build_provider_extra_headers
from onyx.llm.factory import get_llm
from onyx.llm.factory import llm_from_provider
from onyx.llm.well_known_providers.constants import OLLAMA_API_KEY_CONFIG_KEY
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.manage.llm.models import ModelConfigurationView


def test_build_provider_extra_headers_adds_bearer_for_ollama_api_key() -> None:
    headers = _build_provider_extra_headers(
        LlmProviderNames.OLLAMA_CHAT,
        {OLLAMA_API_KEY_CONFIG_KEY: "  test-key  "},
    )

    assert headers == {"Authorization": "Bearer test-key"}


def test_build_provider_extra_headers_keeps_existing_bearer_prefix() -> None:
    headers = _build_provider_extra_headers(
        LlmProviderNames.OLLAMA_CHAT,
        {OLLAMA_API_KEY_CONFIG_KEY: "bearer test-key"},
    )

    assert headers == {"Authorization": "bearer test-key"}


def test_build_provider_extra_headers_ignores_empty_ollama_api_key() -> None:
    headers = _build_provider_extra_headers(
        LlmProviderNames.OLLAMA_CHAT,
        {OLLAMA_API_KEY_CONFIG_KEY: "   "},
    )

    assert headers == {}


def _build_provider_view(
    provider: str,
    max_input_tokens: int | None,
) -> LLMProviderView:
    return LLMProviderView(
        id=1,
        name="test-provider",
        provider=provider,
        model_configurations=[
            ModelConfigurationView(
                name="test-model",
                is_visible=True,
                max_input_tokens=max_input_tokens,
                supports_image_input=False,
            )
        ],
        api_key=None,
        api_base="http://localhost:11434",
        api_version=None,
        custom_config=None,
        is_public=True,
        is_auto_mode=False,
        groups=[],
        personas=[],
        deployment_name=None,
    )


def test_get_llm_sets_ollama_num_ctx_model_kwarg() -> None:
    with patch("onyx.llm.factory.LitellmLLM") as mock_litellm_llm:
        get_llm(
            provider=LlmProviderNames.OLLAMA_CHAT,
            model="test-model",
            deployment_name=None,
            max_input_tokens=4096,
            model_kwargs={"num_ctx": 8192},
        )

        kwargs = mock_litellm_llm.call_args.kwargs
        assert kwargs["model_kwargs"] == {"num_ctx": 8192}


def test_get_llm_does_not_set_ollama_num_ctx_for_non_ollama_provider() -> None:
    with patch("onyx.llm.factory.LitellmLLM") as mock_litellm_llm:
        get_llm(
            provider=LlmProviderNames.OPENAI,
            model="gpt-4o-mini",
            deployment_name=None,
            max_input_tokens=4096,
        )

        kwargs = mock_litellm_llm.call_args.kwargs
        assert kwargs["model_kwargs"] == {}


def test_llm_from_provider_passes_configured_ollama_num_ctx() -> None:
    provider = _build_provider_view(
        provider=LlmProviderNames.OLLAMA_CHAT,
        max_input_tokens=16384,
    )

    with patch("onyx.llm.factory.get_llm") as mock_get_llm:
        llm_from_provider(
            model_name="test-model",
            llm_provider=provider,
        )

        kwargs = mock_get_llm.call_args.kwargs
        assert kwargs["max_input_tokens"] == 16384
        assert kwargs["model_kwargs"] == {"num_ctx": 16384}


def test_llm_from_provider_omits_ollama_num_ctx_when_model_context_unknown() -> None:
    provider = _build_provider_view(
        provider=LlmProviderNames.OLLAMA_CHAT,
        max_input_tokens=None,
    )

    with (
        patch(
            "onyx.llm.factory.get_max_input_tokens_from_llm_provider",
            return_value=32000,
        ),
        patch("onyx.llm.factory.get_llm") as mock_get_llm,
    ):
        llm_from_provider(
            model_name="test-model",
            llm_provider=provider,
        )

        kwargs = mock_get_llm.call_args.kwargs
        assert kwargs["max_input_tokens"] == 32000
        assert kwargs["model_kwargs"] == {}


def test_llm_from_provider_never_sets_ollama_num_ctx_for_non_ollama_provider() -> None:
    provider = _build_provider_view(
        provider=LlmProviderNames.OPENAI,
        max_input_tokens=16384,
    )

    with patch("onyx.llm.factory.get_llm") as mock_get_llm:
        llm_from_provider(
            model_name="test-model",
            llm_provider=provider,
        )

        kwargs = mock_get_llm.call_args.kwargs
        assert kwargs["max_input_tokens"] == 16384
        assert kwargs["model_kwargs"] == {}


================================================
FILE: backend/tests/unit/onyx/llm/test_formatting_reenabled.py
================================================
from onyx.llm.utils import model_needs_formatting_reenabled


def test_gpt_5_exact_match() -> None:
    """Test that gpt-5 model name exactly matches."""
    assert model_needs_formatting_reenabled("gpt-5") is True


def test_o3_exact_match() -> None:
    """Test that o3 model name exactly matches."""
    assert model_needs_formatting_reenabled("o3") is True


def test_o1_exact_match() -> None:
    """Test that o1 model name exactly matches."""
    assert model_needs_formatting_reenabled("o1") is True


def test_gpt_5_with_provider_prefix() -> None:
    """Test that gpt-5 with provider prefix matches."""
    assert model_needs_formatting_reenabled("openai/gpt-5") is True


def test_o3_with_provider_prefix() -> None:
    """Test that o3 with provider prefix matches."""
    assert model_needs_formatting_reenabled("openai/o3") is True


def test_o1_with_provider_prefix() -> None:
    """Test that o1 with provider prefix matches."""
    assert model_needs_formatting_reenabled("openai/o1") is True


def test_gpt_5_with_suffix() -> None:
    """Test that gpt-5 with suffix matches."""
    assert model_needs_formatting_reenabled("gpt-5-preview") is True
    assert model_needs_formatting_reenabled("gpt-5-mini") is True
    assert model_needs_formatting_reenabled("gpt-5-turbo") is True


def test_o3_with_suffix() -> None:
    """Test that o3 with suffix matches."""
    assert model_needs_formatting_reenabled("o3-mini") is True
    assert model_needs_formatting_reenabled("o3-preview") is True
    assert model_needs_formatting_reenabled("o3-max") is True


def test_o1_with_suffix() -> None:
    """Test that o1 with suffix matches."""
    assert model_needs_formatting_reenabled("o1-preview") is True
    assert model_needs_formatting_reenabled("o1-mini") is True
    assert model_needs_formatting_reenabled("o1-max") is True


def test_gpt_5_with_provider_and_suffix() -> None:
    """Test that gpt-5 with provider prefix and suffix matches."""
    assert model_needs_formatting_reenabled("openai/gpt-5-preview") is True
    assert model_needs_formatting_reenabled("openai/gpt-5-mini") is True


def test_o3_with_provider_and_suffix() -> None:
    """Test that o3 with provider prefix and suffix matches."""
    assert model_needs_formatting_reenabled("openai/o3-mini") is True
    assert model_needs_formatting_reenabled("openai/o3-preview") is True


def test_o1_with_provider_and_suffix() -> None:
    """Test that o1 with provider prefix and suffix matches."""
    assert model_needs_formatting_reenabled("openai/o1-preview") is True
    assert model_needs_formatting_reenabled("openai/o1-mini") is True


def test_gpt_5_with_space_boundary() -> None:
    """Test that gpt-5 with space boundary matches."""
    assert model_needs_formatting_reenabled("openai gpt-5") is True
    assert model_needs_formatting_reenabled("gpt-5 preview") is True


def test_o3_with_space_boundary() -> None:
    """Test that o3 with space boundary matches."""
    assert model_needs_formatting_reenabled("openai o3") is True
    assert model_needs_formatting_reenabled("o3 mini") is True


def test_o1_with_space_boundary() -> None:
    """Test that o1 with space boundary matches."""
    assert model_needs_formatting_reenabled("openai o1") is True
    assert model_needs_formatting_reenabled("o1 preview") is True


def test_gpt_5_with_slash_boundary() -> None:
    """Test that gpt-5 with slash boundary matches."""
    assert model_needs_formatting_reenabled("provider/gpt-5") is True
    assert model_needs_formatting_reenabled("gpt-5/version") is True


def test_o3_with_slash_boundary() -> None:
    """Test that o3 with slash boundary matches."""
    assert model_needs_formatting_reenabled("provider/o3") is True
    assert model_needs_formatting_reenabled("o3/version") is True


def test_o1_with_slash_boundary() -> None:
    """Test that o1 with slash boundary matches."""
    assert model_needs_formatting_reenabled("provider/o1") is True
    assert model_needs_formatting_reenabled("o1/version") is True


def test_gpt_4_does_not_match() -> None:
    """Test that gpt-4 does not match."""
    assert model_needs_formatting_reenabled("gpt-4") is False
    assert model_needs_formatting_reenabled("gpt-4-turbo") is False
    assert model_needs_formatting_reenabled("gpt-4o") is False
    assert model_needs_formatting_reenabled("openai/gpt-4") is False


def test_gpt_3_5_does_not_match() -> None:
    """Test that gpt-3.5-turbo does not match."""
    assert model_needs_formatting_reenabled("gpt-3.5-turbo") is False
    assert model_needs_formatting_reenabled("openai/gpt-3.5-turbo") is False


def test_o2_does_not_match() -> None:
    """Test that o2 does not match."""
    assert model_needs_formatting_reenabled("o2") is False
    assert model_needs_formatting_reenabled("o2-preview") is False
    assert model_needs_formatting_reenabled("openai/o2") is False


def test_o4_does_not_match() -> None:
    """Test that o4 does not match."""
    assert model_needs_formatting_reenabled("o4") is False
    assert model_needs_formatting_reenabled("o4-mini") is False
    assert model_needs_formatting_reenabled("openai/o4") is False


def test_other_models_do_not_match() -> None:
    """Test that other common models do not match."""
    assert model_needs_formatting_reenabled("claude-3-5-sonnet-20241022") is False
    assert model_needs_formatting_reenabled("gemini-1.5-pro") is False
    assert model_needs_formatting_reenabled("llama3.1") is False
    assert model_needs_formatting_reenabled("mistral-large") is False


def test_case_sensitivity() -> None:
    """Test that model names are case-sensitive."""
    assert model_needs_formatting_reenabled("GPT-5") is False
    assert model_needs_formatting_reenabled("O3") is False
    assert model_needs_formatting_reenabled("O1") is False
    assert model_needs_formatting_reenabled("Gpt-5") is False


def test_models_with_gpt_5_in_middle() -> None:
    """Test that models containing gpt-5 in the middle match."""
    assert model_needs_formatting_reenabled("something-gpt-5-suffix") is True
    assert model_needs_formatting_reenabled("prefix/gpt-5/suffix") is True


def test_models_with_o3_in_middle() -> None:
    """Test that models containing o3 in the middle match."""
    assert model_needs_formatting_reenabled("something-o3-suffix") is True
    assert model_needs_formatting_reenabled("prefix/o3/suffix") is True


def test_models_with_o1_in_middle() -> None:
    """Test that models containing o1 in the middle match."""
    assert model_needs_formatting_reenabled("something-o1-suffix") is True
    assert model_needs_formatting_reenabled("prefix/o1/suffix") is True


def test_models_that_contain_but_not_match() -> None:
    """Test that models containing the strings but not matching word boundaries do not match."""
    # These should not match because they don't have proper word boundaries
    assert (
        model_needs_formatting_reenabled("gpt-50") is False
    )  # gpt-5 is part of gpt-50
    assert model_needs_formatting_reenabled("o30") is False  # o3 is part of o30
    assert model_needs_formatting_reenabled("o10") is False  # o1 is part of o10
    assert model_needs_formatting_reenabled("gpt-51") is False
    assert (
        model_needs_formatting_reenabled("somethingo3") is False
    )  # no boundary before o3
    assert (
        model_needs_formatting_reenabled("o3something") is False
    )  # no boundary after o3


def test_empty_string() -> None:
    """Test that empty string does not match."""
    assert model_needs_formatting_reenabled("") is False


def test_real_litellm_model_names() -> None:
    """Test with real model names that might appear in litellm."""
    # Based on common patterns from models.litellm.ai
    assert model_needs_formatting_reenabled("openai/gpt-5") is True
    assert model_needs_formatting_reenabled("openai/o3-mini") is True
    assert model_needs_formatting_reenabled("openai/o1-preview") is True

    # These should not match
    assert model_needs_formatting_reenabled("openai/gpt-4o") is False
    assert model_needs_formatting_reenabled("openai/gpt-4-turbo") is False
    assert (
        model_needs_formatting_reenabled("anthropic/claude-3-5-sonnet-20241022")
        is False
    )
    assert model_needs_formatting_reenabled("google/gemini-1.5-pro") is False


================================================
FILE: backend/tests/unit/onyx/llm/test_litellm_monkey_patches.py
================================================
from typing import Any

from litellm.llms.ollama.chat.transformation import OllamaChatCompletionResponseIterator

from onyx.llm.litellm_singleton.monkey_patches import apply_monkey_patches

_UNSET = object()


def _create_iterator() -> OllamaChatCompletionResponseIterator:
    apply_monkey_patches()
    return OllamaChatCompletionResponseIterator(
        streaming_response=iter(()),
        sync_stream=True,
    )


def _build_chunk(
    *,
    thinking: object = _UNSET,
    content: object = _UNSET,
) -> dict[str, Any]:
    message: dict[str, Any] = {"role": "assistant"}
    if thinking is not _UNSET:
        message["thinking"] = thinking
    if content is not _UNSET:
        message["content"] = content

    return {
        "model": "llama3.1",
        "message": message,
        "done": False,
        "prompt_eval_count": 0,
        "eval_count": 0,
    }


def test_ollama_chunk_parser_transitions_from_native_thinking_to_content() -> None:
    iterator = _create_iterator()

    thinking_chunk = _build_chunk(thinking="Let me think")
    content_chunk = _build_chunk(thinking="", content="Final answer")

    thinking_response = iterator.chunk_parser(thinking_chunk)
    content_response = iterator.chunk_parser(content_chunk)

    assert thinking_response.choices[0].delta.reasoning_content == "Let me think"
    assert thinking_response.choices[0].delta.content is None

    assert getattr(content_response.choices[0].delta, "reasoning_content", None) is None
    assert content_response.choices[0].delta.content == "Final answer"
    assert iterator.finished_reasoning_content is True


def test_ollama_chunk_parser_keeps_tagged_thinking_until_close_tag() -> None:
    iterator = _create_iterator()

    start_chunk = _build_chunk(content="<think>step 1")
    middle_chunk = _build_chunk(content="step 2")
    close_chunk = _build_chunk(content="final</think>")

    start_response = iterator.chunk_parser(start_chunk)
    middle_response = iterator.chunk_parser(middle_chunk)
    close_response = iterator.chunk_parser(close_chunk)

    assert start_response.choices[0].delta.reasoning_content == "step 1"
    assert start_response.choices[0].delta.content is None

    assert middle_response.choices[0].delta.reasoning_content == "step 2"
    assert middle_response.choices[0].delta.content is None

    assert getattr(close_response.choices[0].delta, "reasoning_content", None) is None
    assert close_response.choices[0].delta.content == "final"
    assert iterator.finished_reasoning_content is True


def test_ollama_chunk_parser_handles_think_tag_after_native_thinking() -> None:
    iterator = _create_iterator()

    native_thinking_chunk = _build_chunk(thinking="native reasoning")
    tagged_thinking_chunk = _build_chunk(content="<think>tagged reasoning")

    iterator.chunk_parser(native_thinking_chunk)
    tagged_response = iterator.chunk_parser(tagged_thinking_chunk)

    assert tagged_response.choices[0].delta.reasoning_content == "tagged reasoning"
    assert tagged_response.choices[0].delta.content is None


def test_ollama_chunk_parser_preserves_content_when_thinking_and_content_coexist() -> (
    None
):
    iterator = _create_iterator()

    combined_chunk = _build_chunk(
        thinking="Need one thought",
        content="Visible answer token",
    )

    response = iterator.chunk_parser(combined_chunk)

    assert response.choices[0].delta.reasoning_content == "Need one thought"
    assert response.choices[0].delta.content == "Visible answer token"


================================================
FILE: backend/tests/unit/onyx/llm/test_llm_provider_options.py
================================================
from datetime import datetime
from datetime import timezone

import pytest

from onyx.llm.well_known_providers.auto_update_models import (
    LLMProviderRecommendation,
)
from onyx.llm.well_known_providers.auto_update_models import LLMRecommendations
from onyx.llm.well_known_providers.constants import OPENAI_PROVIDER_NAME
from onyx.llm.well_known_providers.constants import VERTEXAI_PROVIDER_NAME
from onyx.llm.well_known_providers.llm_provider_options import (
    model_configurations_for_provider,
)
from onyx.llm.well_known_providers.models import SimpleKnownModel


def _build_recommendations(
    provider_name: str, visible_model_names: list[str]
) -> LLMRecommendations:
    return LLMRecommendations(
        version="test",
        updated_at=datetime.now(timezone.utc),
        providers={
            provider_name: LLMProviderRecommendation(
                default_model=SimpleKnownModel(name=visible_model_names[0]),
                additional_visible_models=[
                    SimpleKnownModel(name=model_name)
                    for model_name in visible_model_names[1:]
                ],
            )
        },
    )


def test_model_configurations_vertex_are_sorted_by_name(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(
        "onyx.llm.well_known_providers.llm_provider_options.fetch_models_for_provider",
        lambda _provider_name: ["zeta-model", "alpha-model", "Beta-model"],
    )
    monkeypatch.setattr(
        "onyx.llm.well_known_providers.llm_provider_options.get_max_input_tokens",
        lambda _model_name, _provider_name: None,
    )
    monkeypatch.setattr(
        "onyx.llm.well_known_providers.llm_provider_options.model_supports_image_input",
        lambda _model_name, _provider_name: False,
    )

    recommendations = _build_recommendations(
        VERTEXAI_PROVIDER_NAME, ["gamma-model", "alpha-model"]
    )

    model_configurations = model_configurations_for_provider(
        VERTEXAI_PROVIDER_NAME, recommendations
    )

    assert [model.name for model in model_configurations] == [
        "alpha-model",
        "Beta-model",
        "gamma-model",
        "zeta-model",
    ]
    assert [model.is_visible for model in model_configurations] == [
        True,
        False,
        True,
        False,
    ]


def test_model_configurations_non_vertex_preserve_provider_order(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setattr(
        "onyx.llm.well_known_providers.llm_provider_options.fetch_models_for_provider",
        lambda _provider_name: ["model-b", "model-a"],
    )
    monkeypatch.setattr(
        "onyx.llm.well_known_providers.llm_provider_options.get_max_input_tokens",
        lambda _model_name, _provider_name: None,
    )
    monkeypatch.setattr(
        "onyx.llm.well_known_providers.llm_provider_options.model_supports_image_input",
        lambda _model_name, _provider_name: False,
    )

    recommendations = _build_recommendations(
        OPENAI_PROVIDER_NAME, ["model-c", "model-a"]
    )

    model_configurations = model_configurations_for_provider(
        OPENAI_PROVIDER_NAME, recommendations
    )

    assert [model.name for model in model_configurations] == [
        "model-b",
        "model-a",
        "model-c",
    ]


================================================
FILE: backend/tests/unit/onyx/llm/test_model_is_reasoning.py
================================================
from onyx.llm.utils import model_is_reasoning_model


def test_model_is_reasoning_model() -> None:
    """Test that reasoning models are correctly identified and non-reasoning models are not"""

    # Models that should be identified as reasoning models
    reasoning_models = [
        ("o3", "openai"),
        ("o3-mini", "openai"),
        ("o4-mini", "openai"),
        ("deepseek-reasoner", "deepseek"),
        ("deepseek-r1", "openrouter/deepseek"),
        ("claude-sonnet-4-20250514", "anthropic"),
    ]

    # Models that should NOT be identified as reasoning models
    non_reasoning_models = [
        ("gpt-4o", "openai"),
        ("claude-3-5-sonnet-20240620", "anthropic"),
    ]

    # Test reasoning models
    for model_name, provider in reasoning_models:
        assert (
            model_is_reasoning_model(model_name, provider) is True
        ), f"Expected {provider}/{model_name} to be identified as a reasoning model"

    # Test non-reasoning models
    for model_name, provider in non_reasoning_models:
        assert (
            model_is_reasoning_model(model_name, provider) is False
        ), f"Expected {provider}/{model_name} to NOT be identified as a reasoning model"


================================================
FILE: backend/tests/unit/onyx/llm/test_model_map.py
================================================
from unittest.mock import patch

import litellm

from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
from onyx.llm.constants import LlmProviderNames
from onyx.llm.utils import find_model_obj
from onyx.llm.utils import get_model_map


def test_partial_match_in_model_map() -> None:
    """
    We should handle adding/not adding the provider prefix to the model name.
    """
    get_model_map.cache_clear()

    model_map = get_model_map()

    _EXPECTED_FIELDS = {
        "input_cost_per_audio_per_second": 0,
        "input_cost_per_audio_per_second_above_128k_tokens": 0,
        "input_cost_per_character": 0,
        "input_cost_per_character_above_128k_tokens": 0,
        "input_cost_per_image": 0,
        "input_cost_per_image_above_128k_tokens": 0,
        "input_cost_per_token": 0,
        "input_cost_per_token_above_128k_tokens": 0,
        "input_cost_per_video_per_second": 0,
        "input_cost_per_video_per_second_above_128k_tokens": 0,
        "max_input_tokens": 131072,
        "max_output_tokens": 8192,
        "max_tokens": 8192,
        "output_cost_per_character": 0,
        "output_cost_per_character_above_128k_tokens": 0,
        "output_cost_per_token": 0,
        "output_cost_per_token_above_128k_tokens": 0,
        "source": "https://aistudio.google.com",
        "supports_audio_output": False,
        "supports_function_calling": True,
        "supports_response_schema": True,
        "supports_system_messages": False,
        "supports_tool_choice": True,
        "supports_vision": True,
    }

    result1 = find_model_obj(
        model_map, LlmProviderNames.OPENAI, "gemini/gemma-3-27b-it"
    )
    assert result1 is not None
    for key, value in _EXPECTED_FIELDS.items():
        assert key in result1
        assert result1[key] == value, "Unexpected value for key: {}".format(key)

    result2 = find_model_obj(model_map, LlmProviderNames.OPENAI, "gemma-3-27b-it")
    assert result2 is not None
    for key, value in _EXPECTED_FIELDS.items():
        assert key in result2
        assert result2[key] == value, "Unexpected value for key: {}".format(key)

    get_model_map.cache_clear()


def test_no_overwrite_in_model_map() -> None:
    """Make sure we use the original entry if it exists."""
    # Create a mock model_cost dict with multiple entries for "onyx-llm"
    mock_original_model_cost = {
        "gpt-4o": {
            "is_correct": True,
        },
        "provider/gpt-4o": {
            "is_correct": False,
        },
    }

    with patch.object(litellm, "model_cost", mock_original_model_cost):
        get_model_map.cache_clear()  # Clear the LRU cache to use the patched data

        model_map = get_model_map()
        result = find_model_obj(model_map, LlmProviderNames.OPENAI, "gpt-4o")
        assert result is not None
        assert result["is_correct"] is True

    get_model_map.cache_clear()


def test_twelvelabs_pegasus_override_present() -> None:
    get_model_map.cache_clear()
    try:
        model_map = get_model_map()
        model_obj = find_model_obj(
            model_map,
            "twelvelabs",
            "us.twelvelabs.pegasus-1-2-v1:0",
        )
        assert model_obj is not None
        assert model_obj["max_input_tokens"] == GEN_AI_MODEL_FALLBACK_MAX_TOKENS
        assert model_obj["max_tokens"] == GEN_AI_MODEL_FALLBACK_MAX_TOKENS
        assert model_obj["supports_reasoning"] is False
    finally:
        get_model_map.cache_clear()


================================================
FILE: backend/tests/unit/onyx/llm/test_model_name_parser.py
================================================
"""
Unit tests for LiteLLM model name parser.

Tests verify that enrichment data is correctly returned from the parser.
"""

from onyx.llm.constants import LlmProviderNames
from onyx.llm.model_name_parser import parse_litellm_model_name


def test_bedrock_model_with_enrichment() -> None:
    """Test parsing a Bedrock model - provider extracted, metadata from enrichment."""
    result = parse_litellm_model_name(
        "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0"
    )

    assert result.raw_name == "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0"
    assert result.provider == LlmProviderNames.BEDROCK
    assert result.vendor == LlmProviderNames.ANTHROPIC
    assert result.display_name == "Claude Sonnet 3.5"
    assert result.provider_display_name == "Claude (Bedrock - Anthropic)"


def test_region_extraction() -> None:
    """Test that region prefix is extracted from model key."""
    result = parse_litellm_model_name(
        "bedrock/eu.anthropic.claude-3-5-sonnet-20241022-v2:0"
    )

    assert result.region == "eu"
    assert result.provider == LlmProviderNames.BEDROCK


def test_direct_provider_inference() -> None:
    """Test that provider is inferred from litellm.model_cost for unprefixed models."""
    result = parse_litellm_model_name("gpt-4o")

    assert result.provider == LlmProviderNames.OPENAI
    assert result.display_name == "GPT-4o"
    assert result.provider_display_name == "GPT (OpenAI)"


def test_unknown_model_fallback() -> None:
    """Test that unknown models get a cleaned-up display name."""
    result = parse_litellm_model_name("some-unknown-model-xyz")

    assert result.raw_name == "some-unknown-model-xyz"
    # Unknown models get title-cased display names
    assert result.display_name == "Some Unknown Model Xyz"
    assert result.vendor is None


================================================
FILE: backend/tests/unit/onyx/llm/test_model_response.py
================================================
from __future__ import annotations

from typing import cast
from typing import TYPE_CHECKING

import pytest

from onyx.llm.model_response import ChatCompletionDeltaToolCall
from onyx.llm.model_response import from_litellm_model_response
from onyx.llm.model_response import from_litellm_model_response_stream
from onyx.llm.model_response import FunctionCall
from onyx.llm.model_response import ModelResponse
from onyx.llm.model_response import ModelResponseStream

if TYPE_CHECKING:
    from litellm.types.utils import (
        ModelResponse as LiteLLMModelResponse,
        ModelResponseStream as LiteLLMModelResponseStream,
    )


class _LiteLLMStreamDouble:
    """
    Lightweight double that mimics the LiteLLM ``ModelResponseStream`` interface
    used by ``from_litellm_model_response_stream``.
    """

    def __init__(self, payload: dict) -> None:
        self._payload = payload

    def model_dump(self) -> dict:
        return self._payload


class _LiteLLMResponseDouble:
    """
    Lightweight double that mimics the LiteLLM ``ModelResponse`` interface
    used by ``from_litellm_model_response``.
    """

    def __init__(self, payload: dict) -> None:
        self._payload = payload

    def model_dump(self) -> dict:
        return self._payload


def _make_stream_double(payload: dict) -> "LiteLLMModelResponseStream":
    """Create a test double for LiteLLM ModelResponseStream."""
    return cast("LiteLLMModelResponseStream", _LiteLLMStreamDouble(payload))


def _make_response_double(payload: dict) -> "LiteLLMModelResponse":
    """Create a test double for LiteLLM ModelResponse."""
    return cast("LiteLLMModelResponse", _LiteLLMResponseDouble(payload))


def _build_tool_call_payload() -> dict:
    return {
        "id": "chatcmpl-f739f09c-7c9b-4dd6-aea7-cf41d4fd2196",
        "created": 1762544538,
        "model": "gpt-5",
        "object": "chat.completion.chunk",
        "choices": [
            {
                "finish_reason": None,
                "index": 0,
                "delta": {
                    "content": "",
                    "tool_calls": [
                        {
                            "id": None,
                            "index": 0,
                            "type": "function",
                            "function": {
                                "arguments": '{"',
                                "name": None,
                            },
                        }
                    ],
                },
            }
        ],
    }


def _build_reasoning_payload() -> dict:
    return {
        "id": "chatcmpl-c2a25682-5715-4ca2-84a9-061498f79626",
        "created": 1762544538,
        "model": "gpt-5",
        "object": "chat.completion.chunk",
        "choices": [
            {
                "finish_reason": None,
                "index": 0,
                "delta": {
                    "reasoning_content": " variations",
                },
            }
        ],
    }


def _build_finish_reason_payload() -> tuple[dict, dict]:
    base_chunk = {
        "id": "chatcmpl-2b136068-c6fb-4af1-97d5-d2c9d84cd52b",
        "created": 1762544448,
        "object": "chat.completion.chunk",
    }

    content_chunk = base_chunk | {
        "choices": [
            {
                "finish_reason": None,
                "index": 0,
                "delta": {
                    "content": "?",
                },
            }
        ],
    }

    final_chunk = base_chunk | {
        "choices": [
            {
                "finish_reason": "stop",
                "index": 0,
                "delta": {},
            }
        ],
    }

    return content_chunk, final_chunk


def _build_multiple_tool_calls_payload() -> dict:
    return {
        "id": "Yn4SaajROLXEnvgP5JTN-AQ",
        "created": 1762819684,
        "model": "gemini-2.5-flash",
        "object": "chat.completion.chunk",
        "choices": [
            {
                "finish_reason": None,
                "index": 0,
                "delta": {
                    "content": None,
                    "tool_calls": [
                        {
                            "id": "call_130bec4755e544ea95f4b1bafd81",
                            "function": {
                                "arguments": '{"queries": ["new agent framework"]}',
                                "name": "internal_search",
                            },
                            "type": "function",
                            "index": 0,
                        },
                        {
                            "id": "call_42273e8ee5ac4c0a97237d6d25a6",
                            "function": {
                                "arguments": '{"queries": ["cheese"]}',
                                "name": "web_search",
                            },
                            "type": "function",
                            "index": 1,
                        },
                    ],
                },
            }
        ],
    }


def _build_non_streaming_response_payload() -> dict:
    return {
        "id": "chatcmpl-abc123",
        "created": 1234567890,
        "model": "gpt-4",
        "object": "chat.completion",
        "choices": [
            {
                "finish_reason": "stop",
                "index": 0,
                "message": {
                    "content": "Hello, world!",
                    "role": "assistant",
                },
            }
        ],
    }


def _build_non_streaming_tool_call_payload() -> dict:
    return {
        "id": "chatcmpl-xyz789",
        "created": 9876543210,
        "model": "gpt-4",
        "object": "chat.completion",
        "choices": [
            {
                "finish_reason": "tool_calls",
                "index": 0,
                "message": {
                    "content": None,
                    "role": "assistant",
                    "tool_calls": [
                        {
                            "id": "call_abc123",
                            "type": "function",
                            "function": {
                                "name": "search_documents",
                                "arguments": '{"query": "test"}',
                            },
                        }
                    ],
                },
            }
        ],
    }


def test_from_litellm_model_response_stream_parses_tool_calls() -> None:
    response = from_litellm_model_response_stream(
        _make_stream_double(_build_tool_call_payload())
    )

    assert isinstance(response, ModelResponseStream)
    assert response.id == "chatcmpl-f739f09c-7c9b-4dd6-aea7-cf41d4fd2196"
    assert response.created == "1762544538"

    tool_calls = response.choice.delta.tool_calls
    assert len(tool_calls) == 1
    assert tool_calls[0] == ChatCompletionDeltaToolCall(
        id=None,
        index=0,
        type="function",
        function=FunctionCall(arguments='{"', name=None),
    )


def test_from_litellm_model_response_stream_preserves_reasoning_content() -> None:
    response = from_litellm_model_response_stream(
        _make_stream_double(_build_reasoning_payload())
    )

    assert response.choice.delta.content is None
    assert response.choice.delta.reasoning_content == " variations"
    assert response.choice.finish_reason is None


@pytest.mark.parametrize("payload", _build_finish_reason_payload())
def test_from_litellm_model_response_stream_handles_content_and_finish_reason(
    payload: dict,
) -> None:
    response = from_litellm_model_response_stream(_make_stream_double(payload))

    assert response.id == "chatcmpl-2b136068-c6fb-4af1-97d5-d2c9d84cd52b"
    assert response.created == "1762544448"
    assert response.choice.index == 0
    if payload["choices"][0]["finish_reason"] == "stop":
        assert response.choice.finish_reason == "stop"
        assert response.choice.delta.content is None
    else:
        assert response.choice.finish_reason is None
        assert response.choice.delta.content == "?"


def test_from_litellm_model_response_stream_parses_multiple_tool_calls() -> None:
    response = from_litellm_model_response_stream(
        _make_stream_double(_build_multiple_tool_calls_payload())
    )

    tool_calls = response.choice.delta.tool_calls
    assert response.id == "Yn4SaajROLXEnvgP5JTN-AQ"
    assert response.created == "1762819684"
    assert response.choice.finish_reason is None
    assert response.choice.delta.content is None
    assert len(tool_calls) == 2
    assert tool_calls[0] == ChatCompletionDeltaToolCall(
        id="call_130bec4755e544ea95f4b1bafd81",
        index=0,
        type="function",
        function=FunctionCall(
            arguments='{"queries": ["new agent framework"]}',
            name="internal_search",
        ),
    )
    assert tool_calls[1] == ChatCompletionDeltaToolCall(
        id="call_42273e8ee5ac4c0a97237d6d25a6",
        index=1,
        type="function",
        function=FunctionCall(
            arguments='{"queries": ["cheese"]}',
            name="web_search",
        ),
    )


def test_from_litellm_model_response_parses_basic_message() -> None:
    response = from_litellm_model_response(
        _make_response_double(_build_non_streaming_response_payload())
    )

    assert isinstance(response, ModelResponse)
    assert response.id == "chatcmpl-abc123"
    assert response.created == "1234567890"
    assert response.choice.finish_reason == "stop"
    assert response.choice.message.content == "Hello, world!"
    assert response.choice.message.role == "assistant"
    assert response.choice.message.tool_calls is None


def test_from_litellm_model_response_parses_tool_calls() -> None:
    response = from_litellm_model_response(
        _make_response_double(_build_non_streaming_tool_call_payload())
    )

    assert isinstance(response, ModelResponse)
    assert response.id == "chatcmpl-xyz789"
    assert response.created == "9876543210"
    assert response.choice.finish_reason == "tool_calls"
    assert response.choice.message.content is None
    assert response.choice.message.role == "assistant"
    assert response.choice.message.tool_calls is not None
    assert len(response.choice.message.tool_calls) == 1

    tool_call = response.choice.message.tool_calls[0]
    assert tool_call.id == "call_abc123"
    assert tool_call.type == "function"
    assert tool_call.function.name == "search_documents"
    assert tool_call.function.arguments == '{"query": "test"}'


================================================
FILE: backend/tests/unit/onyx/llm/test_multi_llm.py
================================================
import os
import threading
import time
from typing import Any
from unittest.mock import ANY
from unittest.mock import patch

import litellm
import pytest
from litellm.types.utils import ChatCompletionDeltaToolCall
from litellm.types.utils import Delta
from litellm.types.utils import Function as LiteLLMFunction

import onyx.llm.models
from onyx.configs.app_configs import MOCK_LLM_RESPONSE
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.model_response import ModelResponse
from onyx.llm.model_response import ModelResponseStream
from onyx.llm.models import AssistantMessage
from onyx.llm.models import FunctionCall
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import ToolCall
from onyx.llm.models import UserMessage
from onyx.llm.multi_llm import LitellmLLM
from onyx.llm.utils import get_max_input_tokens

VERTEX_OPUS_MODELS_REJECTING_OUTPUT_CONFIG = [
    "claude-opus-4-5@20251101",
    "claude-opus-4-6",
]


def _create_delta(
    role: str | None = None,
    content: str | None = None,
    tool_calls: list[ChatCompletionDeltaToolCall] | None = None,
) -> Delta:
    delta = Delta(role=role, content=content)
    # NOTE: for some reason, if you pass tool_calls to the constructor, it doesn't actually
    # get set, so we have to do it this way
    delta.tool_calls = tool_calls
    return delta


def _model_response_to_assistant_message(response: ModelResponse) -> AssistantMessage:
    """Convert a ModelResponse to an AssistantMessage for testing."""
    message = response.choice.message
    tool_calls = None
    if message.tool_calls:
        tool_calls = [
            ToolCall(
                id=tc.id,
                function=FunctionCall(
                    name=tc.function.name or "",
                    arguments=tc.function.arguments or "",
                ),
            )
            for tc in message.tool_calls
        ]
    return AssistantMessage(
        role="assistant",
        content=message.content,
        tool_calls=tool_calls,
    )


def _accumulate_stream_to_assistant_message(
    stream_chunks: list[ModelResponseStream],
) -> AssistantMessage:
    """Accumulate streaming deltas into a final AssistantMessage for testing."""
    accumulated_content = ""
    tool_calls_map: dict[int, dict[str, str]] = {}

    for chunk in stream_chunks:
        delta = chunk.choice.delta

        # Accumulate content
        if delta.content:
            accumulated_content += delta.content

        # Accumulate tool calls
        if delta.tool_calls:
            for tool_call_delta in delta.tool_calls:
                index = tool_call_delta.index

                if index not in tool_calls_map:
                    tool_calls_map[index] = {
                        "id": "",
                        "name": "",
                        "arguments": "",
                    }

                if tool_call_delta.id:
                    tool_calls_map[index]["id"] = tool_call_delta.id

                if tool_call_delta.function:
                    if tool_call_delta.function.name:
                        tool_calls_map[index]["name"] = tool_call_delta.function.name
                    if tool_call_delta.function.arguments:
                        tool_calls_map[index][
                            "arguments"
                        ] += tool_call_delta.function.arguments

    # Convert accumulated tool calls to ToolCall list, sorted by index
    tool_calls = None
    if tool_calls_map:
        tool_calls = [
            ToolCall(
                type="function",
                id=tc_data["id"],
                function=FunctionCall(
                    name=tc_data["name"],
                    arguments=tc_data["arguments"],
                ),
            )
            for index in sorted(tool_calls_map.keys())
            for tc_data in [tool_calls_map[index]]
            if tc_data["id"] and tc_data["name"]
        ]

    return AssistantMessage(
        role="assistant",
        content=accumulated_content if accumulated_content else None,
        tool_calls=tool_calls,
    )


@pytest.fixture
def default_multi_llm() -> LitellmLLM:
    model_provider = LlmProviderNames.OPENAI
    model_name = "gpt-3.5-turbo"

    return LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=model_provider,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=model_provider,
            model_name=model_name,
        ),
    )


def test_multiple_tool_calls(default_multi_llm: LitellmLLM) -> None:
    # Mock the litellm.completion function
    with patch("litellm.completion") as mock_completion:
        # invoke() internally uses stream=True and reassembles via
        # stream_chunk_builder, so the mock must return stream chunks.
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(
                            role="assistant",
                            tool_calls=[
                                ChatCompletionDeltaToolCall(
                                    id="call_1",
                                    function=LiteLLMFunction(
                                        name="get_weather",
                                        arguments='{"location": "New York"}',
                                    ),
                                    type="function",
                                    index=0,
                                ),
                                ChatCompletionDeltaToolCall(
                                    id="call_2",
                                    function=LiteLLMFunction(
                                        name="get_time",
                                        arguments='{"timezone": "EST"}',
                                    ),
                                    type="function",
                                    index=1,
                                ),
                            ],
                        ),
                        finish_reason="tool_calls",
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        # Define input messages
        messages: LanguageModelInput = [
            UserMessage(content="What's the weather and time in New York?")
        ]

        # Define available tools
        tools = [
            {
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "description": "Get the current weather for a location",
                    "parameters": {
                        "type": "object",
                        "properties": {"location": {"type": "string"}},
                        "required": ["location"],
                    },
                },
            },
            {
                "type": "function",
                "function": {
                    "name": "get_time",
                    "description": "Get the current time for a timezone",
                    "parameters": {
                        "type": "object",
                        "properties": {"timezone": {"type": "string"}},
                        "required": ["timezone"],
                    },
                },
            },
        ]

        result = default_multi_llm.invoke(messages, tools)

        # Assert that the result is a ModelResponse
        assert isinstance(result, ModelResponse)

        # Convert to AssistantMessage for easier assertion
        assistant_msg = _model_response_to_assistant_message(result)

        # Assert that the content is None (as per the mock response)
        assert assistant_msg.content is None or assistant_msg.content == ""

        # Assert that there are two tool calls
        assert assistant_msg.tool_calls is not None
        assert len(assistant_msg.tool_calls) == 2

        # Assert the details of the first tool call
        assert assistant_msg.tool_calls[0].id == "call_1"
        assert assistant_msg.tool_calls[0].function.name == "get_weather"
        assert (
            assistant_msg.tool_calls[0].function.arguments == '{"location": "New York"}'
        )

        # Assert the details of the second tool call
        assert assistant_msg.tool_calls[1].id == "call_2"
        assert assistant_msg.tool_calls[1].function.name == "get_time"
        assert assistant_msg.tool_calls[1].function.arguments == '{"timezone": "EST"}'

        # Verify that litellm.completion was called with the correct arguments
        mock_completion.assert_called_once_with(
            model="openai/responses/gpt-3.5-turbo",
            api_key="test_key",
            base_url=None,
            api_version=None,
            custom_llm_provider=None,
            messages=[
                {"role": "user", "content": "What's the weather and time in New York?"}
            ],
            tools=tools,
            stream=True,
            temperature=0.0,  # Default value from GEN_AI_TEMPERATURE
            timeout=30,
            max_tokens=None,
            client=ANY,  # HTTPHandler instance created per-request
            stream_options={"include_usage": True},
            parallel_tool_calls=True,
            mock_response=MOCK_LLM_RESPONSE,
            allowed_openai_params=["tool_choice"],
        )


def test_multiple_tool_calls_streaming(default_multi_llm: LitellmLLM) -> None:
    # Mock the litellm.completion function
    with patch("litellm.completion") as mock_completion:
        # Create a mock response with multiple tool calls using litellm objects
        mock_response = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(
                            role="assistant",
                            tool_calls=[
                                ChatCompletionDeltaToolCall(
                                    id="call_1",
                                    function=LiteLLMFunction(
                                        name="get_weather", arguments='{"location": '
                                    ),
                                    type="function",
                                    index=0,
                                )
                            ],
                        ),
                        finish_reason=None,
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(
                            tool_calls=[
                                ChatCompletionDeltaToolCall(
                                    id="",
                                    function=LiteLLMFunction(arguments='"New York"}'),
                                    type="function",
                                    index=0,
                                )
                            ]
                        ),
                        finish_reason=None,
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(
                            tool_calls=[
                                ChatCompletionDeltaToolCall(
                                    id="call_2",
                                    function=LiteLLMFunction(
                                        name="get_time", arguments='{"timezone": "EST"}'
                                    ),
                                    type="function",
                                    index=1,
                                )
                            ]
                        ),
                        finish_reason="tool_calls",
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
        ]
        mock_completion.return_value = mock_response

        # Define input messages and tools (same as in the non-streaming test)
        messages: LanguageModelInput = [
            UserMessage(content="What's the weather and time in New York?")
        ]

        tools = [
            {
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "description": "Get the current weather for a location",
                    "parameters": {
                        "type": "object",
                        "properties": {"location": {"type": "string"}},
                        "required": ["location"],
                    },
                },
            },
            {
                "type": "function",
                "function": {
                    "name": "get_time",
                    "description": "Get the current time for a timezone",
                    "parameters": {
                        "type": "object",
                        "properties": {"timezone": {"type": "string"}},
                        "required": ["timezone"],
                    },
                },
            },
        ]

        # Call the stream method
        stream_result = list(default_multi_llm.stream(messages, tools))

        # Assert that we received the correct number of chunks
        assert len(stream_result) == 3

        # Assert that each chunk is a ModelResponseStream
        for chunk in stream_result:
            assert isinstance(chunk, ModelResponseStream)

        # Accumulate the stream chunks into a final AssistantMessage
        final_result = _accumulate_stream_to_assistant_message(stream_result)

        # Assert that the final result matches our expectations
        assert isinstance(final_result, AssistantMessage)
        assert final_result.content is None or final_result.content == ""
        assert final_result.tool_calls is not None
        assert len(final_result.tool_calls) == 2
        assert final_result.tool_calls[0].id == "call_1"
        assert final_result.tool_calls[0].function.name == "get_weather"
        assert (
            final_result.tool_calls[0].function.arguments == '{"location": "New York"}'
        )
        assert final_result.tool_calls[1].id == "call_2"
        assert final_result.tool_calls[1].function.name == "get_time"
        assert final_result.tool_calls[1].function.arguments == '{"timezone": "EST"}'

        # Verify that litellm.completion was called with the correct arguments
        mock_completion.assert_called_once_with(
            model="openai/responses/gpt-3.5-turbo",
            api_key="test_key",
            base_url=None,
            api_version=None,
            custom_llm_provider=None,
            messages=[
                {"role": "user", "content": "What's the weather and time in New York?"}
            ],
            tools=tools,
            stream=True,
            temperature=0.0,  # Default value from GEN_AI_TEMPERATURE
            timeout=30,
            max_tokens=None,
            client=ANY,  # HTTPHandler instance created per-stream
            stream_options={"include_usage": True},
            parallel_tool_calls=True,
            mock_response=MOCK_LLM_RESPONSE,
            allowed_openai_params=["tool_choice"],
        )


@pytest.mark.parametrize("model_name", VERTEX_OPUS_MODELS_REJECTING_OUTPUT_CONFIG)
def test_vertex_stream_omits_stream_options(model_name: str) -> None:
    llm = LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=LlmProviderNames.VERTEX_AI,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=LlmProviderNames.VERTEX_AI,
            model_name=model_name,
        ),
    )

    with patch("litellm.completion") as mock_completion:
        mock_completion.return_value = []

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        list(llm.stream(messages))

        kwargs = mock_completion.call_args.kwargs
        assert "stream_options" not in kwargs


def test_openai_auto_reasoning_effort_maps_to_medium() -> None:
    llm = LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=LlmProviderNames.OPENAI,
        model_name="gpt-5.2",
        max_input_tokens=get_max_input_tokens(
            model_provider=LlmProviderNames.OPENAI,
            model_name="gpt-5.2",
        ),
    )

    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
        patch("onyx.llm.multi_llm.is_true_openai_model", return_value=True),
    ):
        mock_completion.return_value = []

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        list(llm.stream(messages, reasoning_effort=ReasoningEffort.AUTO))

        kwargs = mock_completion.call_args.kwargs
        assert kwargs["reasoning"]["effort"] == "medium"


@pytest.mark.parametrize("model_name", VERTEX_OPUS_MODELS_REJECTING_OUTPUT_CONFIG)
def test_vertex_opus_omits_reasoning_effort(model_name: str) -> None:
    llm = LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=LlmProviderNames.VERTEX_AI,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=LlmProviderNames.VERTEX_AI,
            model_name=model_name,
        ),
    )

    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
    ):
        mock_completion.return_value = []

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        list(llm.stream(messages))

        kwargs = mock_completion.call_args.kwargs
        assert "reasoning_effort" not in kwargs


def test_openai_chat_omits_reasoning_params() -> None:
    llm = LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=LlmProviderNames.OPENAI,
        model_name="gpt-5-chat",
        max_input_tokens=get_max_input_tokens(
            model_provider=LlmProviderNames.OPENAI,
            model_name="gpt-5-chat",
        ),
    )

    with (
        patch("litellm.completion") as mock_completion,
        patch(
            "onyx.llm.multi_llm.model_is_reasoning_model", return_value=True
        ) as mock_is_reasoning,
        patch(
            "onyx.llm.multi_llm.is_true_openai_model", return_value=True
        ) as mock_is_openai,
    ):
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(role="assistant", content="Hello"),
                        finish_reason="stop",
                        index=0,
                    )
                ],
                model="gpt-5-chat",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        llm.invoke(messages)

        kwargs = mock_completion.call_args.kwargs
        assert kwargs["model"] == "openai/responses/gpt-5-chat"
        assert "reasoning" not in kwargs
        assert "reasoning_effort" not in kwargs
        assert mock_is_reasoning.called
        assert mock_is_openai.called


def test_user_identity_metadata_enabled(default_multi_llm: LitellmLLM) -> None:
    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER", True),
    ):
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(role="assistant", content="Hello"),
                        finish_reason="stop",
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        identity = LLMUserIdentity(user_id="user_123", session_id="session_abc")

        default_multi_llm.invoke(messages, user_identity=identity)

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert kwargs["user"] == "user_123"
        assert kwargs["metadata"]["session_id"] == "session_abc"


def test_user_identity_user_id_truncated_to_64_chars(
    default_multi_llm: LitellmLLM,
) -> None:
    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER", True),
    ):
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(role="assistant", content="Hello"),
                        finish_reason="stop",
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        long_user_id = "u" * 82
        identity = LLMUserIdentity(user_id=long_user_id, session_id="session_abc")

        default_multi_llm.invoke(messages, user_identity=identity)

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert kwargs["user"] == long_user_id[:64]


def test_user_identity_metadata_disabled_omits_identity(
    default_multi_llm: LitellmLLM,
) -> None:
    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER", False),
    ):
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(role="assistant", content="Hello"),
                        finish_reason="stop",
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        identity = LLMUserIdentity(user_id="user_123", session_id="session_abc")

        default_multi_llm.invoke(messages, user_identity=identity)

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert "user" not in kwargs
        assert "metadata" not in kwargs


def test_existing_metadata_pass_through_when_identity_disabled() -> None:
    model_provider = LlmProviderNames.OPENAI
    model_name = "gpt-3.5-turbo"

    llm = LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=model_provider,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=model_provider,
            model_name=model_name,
        ),
        model_kwargs={"metadata": {"foo": "bar"}},
    )

    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER", False),
    ):
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(role="assistant", content="Hello"),
                        finish_reason="stop",
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        identity = LLMUserIdentity(user_id="user_123", session_id="session_abc")

        llm.invoke(messages, user_identity=identity)

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert "user" not in kwargs
        assert kwargs["metadata"]["foo"] == "bar"


def test_openai_model_invoke_uses_httphandler_client(
    default_multi_llm: LitellmLLM,
) -> None:
    """Test that OpenAI models get an HTTPHandler client passed for invoke()."""
    from litellm import HTTPHandler

    with patch("litellm.completion") as mock_completion:
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(role="assistant", content="Hello"),
                        finish_reason="stop",
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        default_multi_llm.invoke(messages)

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert isinstance(kwargs["client"], HTTPHandler)


def test_openai_model_stream_uses_httphandler_client(
    default_multi_llm: LitellmLLM,
) -> None:
    """Test that OpenAI models get an HTTPHandler client passed for stream()."""
    from litellm import HTTPHandler

    with patch("litellm.completion") as mock_completion:
        mock_completion.return_value = []

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        list(default_multi_llm.stream(messages))

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert isinstance(kwargs["client"], HTTPHandler)


def test_anthropic_model_passes_no_client() -> None:
    """Test that non-OpenAI models (Anthropic) don't get a client passed."""
    llm = LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=LlmProviderNames.ANTHROPIC,
        model_name="claude-3-opus-20240229",
        max_input_tokens=200000,
    )

    with patch("litellm.completion") as mock_completion:
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(role="assistant", content="Hello"),
                        finish_reason="stop",
                        index=0,
                    )
                ],
                model="claude-3-opus-20240229",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        llm.invoke(messages)

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert kwargs["client"] is None


def test_bedrock_model_passes_no_client() -> None:
    """Test that Bedrock models don't get a client passed."""
    llm = LitellmLLM(
        api_key=None,
        timeout=30,
        model_provider=LlmProviderNames.BEDROCK,
        model_name="anthropic.claude-3-sonnet-20240229-v1:0",
        max_input_tokens=200000,
    )

    with patch("litellm.completion") as mock_completion:
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(role="assistant", content="Hello"),
                        finish_reason="stop",
                        index=0,
                    )
                ],
                model="anthropic.claude-3-sonnet-20240229-v1:0",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        llm.invoke(messages)

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert kwargs["client"] is None


def test_azure_openai_model_uses_httphandler_client() -> None:
    """Test that Azure OpenAI models get an HTTPHandler client passed.

    Azure OpenAI uses the same responses API as OpenAI, so it needs
    the same HTTPHandler isolation to avoid connection pool conflicts.
    """
    from litellm import HTTPHandler

    llm = LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=LlmProviderNames.AZURE,
        model_name="gpt-4o",
        api_base="https://my-resource.openai.azure.com",
        api_version="2024-02-15-preview",
        max_input_tokens=128000,
    )

    with patch("litellm.completion") as mock_completion:
        mock_stream_chunks = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(role="assistant", content="Hello"),
                        finish_reason="stop",
                        index=0,
                    )
                ],
                model="gpt-4o",
            ),
        ]
        mock_completion.return_value = mock_stream_chunks

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        llm.invoke(messages)

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert isinstance(kwargs["client"], HTTPHandler)


def test_temporary_env_cleanup(monkeypatch: pytest.MonkeyPatch) -> None:
    # Assign some environment variables
    EXPECTED_ENV_VARS = {
        "TEST_ENV_VAR": "test_value",
        "ANOTHER_ONE": "1",
        "THIRD_ONE": "2",
    }

    CUSTOM_CONFIG = {
        "TEST_ENV_VAR": "fdsfsdf",
        "ANOTHER_ONE": "3",
        "THIS_IS_RANDOM": "123213",
    }

    for env_var, value in EXPECTED_ENV_VARS.items():
        monkeypatch.setenv(env_var, value)

    model_provider = LlmProviderNames.OPENAI
    model_name = "gpt-3.5-turbo"

    llm = LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=model_provider,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=model_provider,
            model_name=model_name,
        ),
        model_kwargs={"metadata": {"foo": "bar"}},
        custom_config=CUSTOM_CONFIG,
    )

    # When custom_config is set, invoke() internally uses stream=True and
    # reassembles via stream_chunk_builder, so the mock must return stream chunks.
    mock_stream_chunks = [
        litellm.ModelResponse(
            id="chatcmpl-123",
            choices=[
                litellm.Choices(
                    delta=_create_delta(role="assistant", content="Hello"),
                    finish_reason="stop",
                    index=0,
                )
            ],
            model="gpt-3.5-turbo",
        ),
    ]

    def on_litellm_completion(
        **kwargs: dict[str, Any],  # noqa: ARG001
    ) -> list[litellm.ModelResponse]:
        # Validate that the environment variables are those in custom config
        for env_var, value in CUSTOM_CONFIG.items():
            assert env_var in os.environ
            assert os.environ[env_var] == value

        return mock_stream_chunks

    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER", False),
    ):
        mock_completion.side_effect = on_litellm_completion

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        identity = LLMUserIdentity(user_id="user_123", session_id="session_abc")

        llm.invoke(messages, user_identity=identity)

        mock_completion.assert_called_once()
        kwargs = mock_completion.call_args.kwargs
        assert kwargs["stream"] is True
        assert "user" not in kwargs
        assert kwargs["metadata"]["foo"] == "bar"

        # Check that the environment variables are back to the original values
        for env_var, value in EXPECTED_ENV_VARS.items():
            assert env_var in os.environ
            assert os.environ[env_var] == value

        # Check that temporary env var from CUSTOM_CONFIG is no longer set
        assert "THIS_IS_RANDOM" not in os.environ


def test_temporary_env_cleanup_on_exception(monkeypatch: pytest.MonkeyPatch) -> None:
    """Verify env vars are restored even when an exception occurs during LLM invocation."""
    # Assign some environment variables
    EXPECTED_ENV_VARS = {
        "TEST_ENV_VAR": "test_value",
        "ANOTHER_ONE": "1",
        "THIRD_ONE": "2",
    }

    CUSTOM_CONFIG = {
        "TEST_ENV_VAR": "fdsfsdf",
        "ANOTHER_ONE": "3",
        "THIS_IS_RANDOM": "123213",
    }

    for env_var, value in EXPECTED_ENV_VARS.items():
        monkeypatch.setenv(env_var, value)

    model_provider = LlmProviderNames.OPENAI
    model_name = "gpt-3.5-turbo"

    llm = LitellmLLM(
        api_key="test_key",
        timeout=30,
        model_provider=model_provider,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=model_provider,
            model_name=model_name,
        ),
        model_kwargs={"metadata": {"foo": "bar"}},
        custom_config=CUSTOM_CONFIG,
    )

    def on_litellm_completion_raises(
        **kwargs: dict[str, Any],  # noqa: ARG001
    ) -> None:  # noqa: ARG001
        # Validate that the environment variables are those in custom config
        for env_var, value in CUSTOM_CONFIG.items():
            assert env_var in os.environ
            assert os.environ[env_var] == value

        # Simulate an error during LLM call
        raise RuntimeError("Simulated LLM API failure")

    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER", False),
    ):
        mock_completion.side_effect = on_litellm_completion_raises

        messages: LanguageModelInput = [UserMessage(content="Hi")]
        identity = LLMUserIdentity(user_id="user_123", session_id="session_abc")

        with pytest.raises(RuntimeError, match="Simulated LLM API failure"):
            llm.invoke(messages, user_identity=identity)

        mock_completion.assert_called_once()

        # Check that the environment variables are back to the original values
        for env_var, value in EXPECTED_ENV_VARS.items():
            assert env_var in os.environ
            assert os.environ[env_var] == value

        # Check that temporary env var from CUSTOM_CONFIG is no longer set
        assert "THIS_IS_RANDOM" not in os.environ


@pytest.mark.parametrize("use_stream", [False, True], ids=["invoke", "stream"])
def test_multithreaded_custom_config_isolation(
    monkeypatch: pytest.MonkeyPatch,
    use_stream: bool,
) -> None:
    """Verify the env lock prevents concurrent LLM calls from seeing each other's custom_config.

    Two LitellmLLM instances with different custom_config dicts call invoke/stream
    concurrently. The _env_lock in temporary_env_and_lock serializes their access so
    each call only ever sees its own env vars—never the other's.
    """
    # Ensure these keys start unset
    monkeypatch.delenv("SHARED_KEY", raising=False)
    monkeypatch.delenv("LLM_A_ONLY", raising=False)
    monkeypatch.delenv("LLM_B_ONLY", raising=False)

    CONFIG_A = {
        "SHARED_KEY": "value_from_A",
        "LLM_A_ONLY": "a_secret",
    }
    CONFIG_B = {
        "SHARED_KEY": "value_from_B",
        "LLM_B_ONLY": "b_secret",
    }

    all_env_keys = list(set(list(CONFIG_A.keys()) + list(CONFIG_B.keys())))

    model_provider = LlmProviderNames.OPENAI
    model_name = "gpt-3.5-turbo"

    llm_a = LitellmLLM(
        api_key="key_a",
        timeout=30,
        model_provider=model_provider,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=model_provider,
            model_name=model_name,
        ),
        custom_config=CONFIG_A,
    )
    llm_b = LitellmLLM(
        api_key="key_b",
        timeout=30,
        model_provider=model_provider,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=model_provider,
            model_name=model_name,
        ),
        custom_config=CONFIG_B,
    )

    # Both invoke (with custom_config) and stream use stream=True at the
    # litellm level, so the mock must return stream chunks.
    mock_stream_chunks = [
        litellm.ModelResponse(
            id="chatcmpl-123",
            choices=[
                litellm.Choices(
                    delta=_create_delta(role="assistant", content="Hi"),
                    finish_reason="stop",
                    index=0,
                )
            ],
            model=model_name,
        ),
    ]

    # Track what each call observed inside litellm.completion.
    # Keyed by api_key so we can identify which LLM instance made the call.
    observed_envs: dict[str, dict[str, str | None]] = {}

    def fake_completion(**kwargs: Any) -> list[litellm.ModelResponse]:
        time.sleep(0.1)  # We expect someone to get caught on the lock
        api_key = kwargs.get("api_key", "")
        label = "A" if api_key == "key_a" else "B"

        snapshot: dict[str, str | None] = {}
        for key in all_env_keys:
            snapshot[key] = os.environ.get(key)
        observed_envs[label] = snapshot

        return mock_stream_chunks

    errors: list[Exception] = []

    def run_llm(llm: LitellmLLM) -> None:
        try:
            messages: LanguageModelInput = [UserMessage(content="Hi")]
            if use_stream:
                list(llm.stream(messages))
            else:
                llm.invoke(messages)
        except Exception as e:
            errors.append(e)

    with patch("litellm.completion", side_effect=fake_completion):
        t_a = threading.Thread(target=run_llm, args=(llm_a,))
        t_b = threading.Thread(target=run_llm, args=(llm_b,))

        t_a.start()
        t_b.start()
        t_a.join(timeout=10)
        t_b.join(timeout=10)

    assert not errors, f"Thread errors: {errors}"
    assert "A" in observed_envs and "B" in observed_envs

    # Thread A must have seen its own config for SHARED_KEY, not B's
    assert observed_envs["A"]["SHARED_KEY"] == "value_from_A"
    assert observed_envs["A"]["LLM_A_ONLY"] == "a_secret"
    # A must NOT see B's exclusive key
    assert observed_envs["A"]["LLM_B_ONLY"] is None

    # Thread B must have seen its own config for SHARED_KEY, not A's
    assert observed_envs["B"]["SHARED_KEY"] == "value_from_B"
    assert observed_envs["B"]["LLM_B_ONLY"] == "b_secret"
    # B must NOT see A's exclusive key
    assert observed_envs["B"]["LLM_A_ONLY"] is None

    # After both calls, env should be clean
    assert os.environ.get("SHARED_KEY") is None
    assert os.environ.get("LLM_A_ONLY") is None
    assert os.environ.get("LLM_B_ONLY") is None


def test_multithreaded_invoke_without_custom_config_skips_env_lock() -> None:
    """Verify that invoke() without custom_config does not acquire the env lock.

    Two LitellmLLM instances without custom_config call invoke concurrently.
    Both should run with stream=False, never touch the env lock, and complete
    without blocking each other.
    """
    from onyx.llm import multi_llm as multi_llm_module

    model_provider = LlmProviderNames.OPENAI
    model_name = "gpt-3.5-turbo"

    llm_a = LitellmLLM(
        api_key="key_a",
        timeout=30,
        model_provider=model_provider,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=model_provider,
            model_name=model_name,
        ),
    )
    llm_b = LitellmLLM(
        api_key="key_b",
        timeout=30,
        model_provider=model_provider,
        model_name=model_name,
        max_input_tokens=get_max_input_tokens(
            model_provider=model_provider,
            model_name=model_name,
        ),
    )

    mock_stream_chunks = [
        litellm.ModelResponse(
            id="chatcmpl-123",
            choices=[
                litellm.Choices(
                    delta=_create_delta(role="assistant", content="Hi"),
                    finish_reason="stop",
                    index=0,
                )
            ],
            model=model_name,
        ),
    ]

    call_kwargs: dict[str, dict[str, Any]] = {}

    def fake_completion(**kwargs: Any) -> list[litellm.ModelResponse]:
        api_key = kwargs.get("api_key", "")
        label = "A" if api_key == "key_a" else "B"
        call_kwargs[label] = kwargs
        return mock_stream_chunks

    errors: list[Exception] = []

    def run_llm(llm: LitellmLLM) -> None:
        try:
            messages: LanguageModelInput = [UserMessage(content="Hi")]
            llm.invoke(messages)
        except Exception as e:
            errors.append(e)

    with (
        patch("litellm.completion", side_effect=fake_completion),
        patch.object(
            multi_llm_module,
            "temporary_env_and_lock",
            wraps=multi_llm_module.temporary_env_and_lock,
        ) as mock_env_lock,
    ):
        t_a = threading.Thread(target=run_llm, args=(llm_a,))
        t_b = threading.Thread(target=run_llm, args=(llm_b,))

        t_a.start()
        t_b.start()
        t_a.join(timeout=10)
        t_b.join(timeout=10)

    assert not errors, f"Thread errors: {errors}"
    assert "A" in call_kwargs and "B" in call_kwargs

    # invoke() always uses stream=True internally (reassembles via stream_chunk_builder)
    assert call_kwargs["A"]["stream"] is True
    assert call_kwargs["B"]["stream"] is True

    # The env lock context manager should never have been called
    mock_env_lock.assert_not_called()


# ---- Tests for Bedrock tool content stripping ----


def test_messages_contain_tool_content_with_tool_role() -> None:
    from onyx.llm.multi_llm import _messages_contain_tool_content

    messages: list[dict[str, Any]] = [
        {"role": "user", "content": "Hello"},
        {"role": "assistant", "content": "I'll search for that."},
        {"role": "tool", "content": "search results", "tool_call_id": "tc_1"},
    ]
    assert _messages_contain_tool_content(messages) is True


def test_messages_contain_tool_content_with_tool_calls() -> None:
    from onyx.llm.multi_llm import _messages_contain_tool_content

    messages: list[dict[str, Any]] = [
        {"role": "user", "content": "Hello"},
        {
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {
                    "id": "tc_1",
                    "type": "function",
                    "function": {"name": "search", "arguments": "{}"},
                }
            ],
        },
    ]
    assert _messages_contain_tool_content(messages) is True


def test_messages_contain_tool_content_without_tools() -> None:
    from onyx.llm.multi_llm import _messages_contain_tool_content

    messages: list[dict[str, Any]] = [
        {"role": "user", "content": "Hello"},
        {"role": "assistant", "content": "Hi there!"},
    ]
    assert _messages_contain_tool_content(messages) is False


def test_strip_tool_content_converts_assistant_tool_calls_to_text() -> None:
    from onyx.llm.multi_llm import _strip_tool_content_from_messages

    messages: list[dict[str, Any]] = [
        {"role": "user", "content": "Search for cats"},
        {
            "role": "assistant",
            "content": "Let me search.",
            "tool_calls": [
                {
                    "id": "tc_1",
                    "type": "function",
                    "function": {
                        "name": "search",
                        "arguments": '{"query": "cats"}',
                    },
                }
            ],
        },
        {
            "role": "tool",
            "content": "Found 3 results about cats.",
            "tool_call_id": "tc_1",
        },
        {"role": "assistant", "content": "Here are the results."},
    ]

    result = _strip_tool_content_from_messages(messages)

    assert len(result) == 4

    # First message unchanged
    assert result[0] == {"role": "user", "content": "Search for cats"}

    # Assistant with tool calls → plain text
    assert result[1]["role"] == "assistant"
    assert "tool_calls" not in result[1]
    assert "Let me search." in result[1]["content"]
    assert "[Tool Call]" in result[1]["content"]
    assert "search" in result[1]["content"]
    assert "tc_1" in result[1]["content"]

    # Tool response → user message
    assert result[2]["role"] == "user"
    assert "[Tool Result]" in result[2]["content"]
    assert "tc_1" in result[2]["content"]
    assert "Found 3 results about cats." in result[2]["content"]

    # Final assistant message unchanged
    assert result[3] == {"role": "assistant", "content": "Here are the results."}


def test_strip_tool_content_handles_assistant_with_no_text_content() -> None:
    from onyx.llm.multi_llm import _strip_tool_content_from_messages

    messages: list[dict[str, Any]] = [
        {
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {
                    "id": "tc_1",
                    "type": "function",
                    "function": {"name": "search", "arguments": "{}"},
                }
            ],
        },
    ]

    result = _strip_tool_content_from_messages(messages)
    assert result[0]["role"] == "assistant"
    assert "[Tool Call]" in result[0]["content"]
    assert "tool_calls" not in result[0]


def test_strip_tool_content_passes_through_non_tool_messages() -> None:
    from onyx.llm.multi_llm import _strip_tool_content_from_messages

    messages: list[dict[str, Any]] = [
        {"role": "system", "content": "You are helpful."},
        {"role": "user", "content": "Hello"},
        {"role": "assistant", "content": "Hi!"},
    ]

    result = _strip_tool_content_from_messages(messages)
    assert result == messages


def test_strip_tool_content_handles_list_content_blocks() -> None:
    from onyx.llm.multi_llm import _strip_tool_content_from_messages

    messages: list[dict[str, Any]] = [
        {
            "role": "assistant",
            "content": [{"type": "text", "text": "Searching now."}],
            "tool_calls": [
                {
                    "id": "tc_1",
                    "type": "function",
                    "function": {"name": "search", "arguments": "{}"},
                }
            ],
        },
        {
            "role": "tool",
            "content": [
                {"type": "text", "text": "result A"},
                {"type": "text", "text": "result B"},
            ],
            "tool_call_id": "tc_1",
        },
    ]

    result = _strip_tool_content_from_messages(messages)

    # Assistant: list content flattened + tool call appended
    assert result[0]["role"] == "assistant"
    assert "Searching now." in result[0]["content"]
    assert "[Tool Call]" in result[0]["content"]
    assert isinstance(result[0]["content"], str)

    # Tool: list content flattened into user message
    assert result[1]["role"] == "user"
    assert "result A" in result[1]["content"]
    assert "result B" in result[1]["content"]
    assert isinstance(result[1]["content"], str)


def test_strip_tool_content_merges_consecutive_tool_results() -> None:
    """Bedrock requires strict user/assistant alternation. Multiple parallel
    tool results must be merged into a single user message."""
    from onyx.llm.multi_llm import _strip_tool_content_from_messages

    messages: list[dict[str, Any]] = [
        {"role": "user", "content": "weather and news?"},
        {
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {
                    "id": "tc_1",
                    "type": "function",
                    "function": {"name": "search_weather", "arguments": "{}"},
                },
                {
                    "id": "tc_2",
                    "type": "function",
                    "function": {"name": "search_news", "arguments": "{}"},
                },
            ],
        },
        {"role": "tool", "content": "sunny 72F", "tool_call_id": "tc_1"},
        {"role": "tool", "content": "headline news", "tool_call_id": "tc_2"},
        {"role": "assistant", "content": "Here are the results."},
    ]

    result = _strip_tool_content_from_messages(messages)

    # user, assistant (flattened), user (merged tool results), assistant
    assert len(result) == 4
    roles = [m["role"] for m in result]
    assert roles == ["user", "assistant", "user", "assistant"]

    # Both tool results merged into one user message
    merged = result[2]["content"]
    assert "tc_1" in merged
    assert "sunny 72F" in merged
    assert "tc_2" in merged
    assert "headline news" in merged


def test_no_tool_choice_sent_when_no_tools(default_multi_llm: LitellmLLM) -> None:
    """Regression test for providers (e.g. Fireworks) that reject tool_choice=null.

    When no tools are provided, tool_choice must not be forwarded to
    litellm.completion() at all — not even as None.
    """
    messages: LanguageModelInput = [UserMessage(content="Hello!")]

    mock_stream_chunks = [
        litellm.ModelResponse(
            id="chatcmpl-123",
            choices=[
                litellm.Choices(
                    delta=_create_delta(role="assistant", content="Hello!"),
                    finish_reason="stop",
                    index=0,
                )
            ],
            model="gpt-3.5-turbo",
        ),
    ]

    with patch("litellm.completion") as mock_completion:
        mock_completion.return_value = mock_stream_chunks

        default_multi_llm.invoke(messages, tools=None)

        _, kwargs = mock_completion.call_args
        assert (
            "tool_choice" not in kwargs
        ), "tool_choice must not be sent to providers when no tools are provided"


def test_bifrost_normalizes_api_base_in_model_kwargs() -> None:
    llm = LitellmLLM(
        api_key="test_key",
        api_base="https://bifrost.example.com/",
        timeout=30,
        model_provider=LlmProviderNames.BIFROST,
        model_name="anthropic/claude-sonnet-4-6",
        max_input_tokens=32000,
    )

    assert llm._custom_llm_provider == "openai"
    assert llm._api_base == "https://bifrost.example.com/v1"
    assert llm._model_kwargs["api_base"] == "https://bifrost.example.com/v1"


def test_prompt_contains_tool_call_history_true() -> None:
    from onyx.llm.multi_llm import _prompt_contains_tool_call_history

    messages: LanguageModelInput = [
        UserMessage(content="What's the weather?"),
        AssistantMessage(
            content=None,
            tool_calls=[
                ToolCall(
                    id="tc_1",
                    function=FunctionCall(name="get_weather", arguments="{}"),
                )
            ],
        ),
    ]
    assert _prompt_contains_tool_call_history(messages) is True


def test_prompt_contains_tool_call_history_false_no_tools() -> None:
    from onyx.llm.multi_llm import _prompt_contains_tool_call_history

    messages: LanguageModelInput = [
        UserMessage(content="Hello"),
        AssistantMessage(content="Hi there!"),
    ]
    assert _prompt_contains_tool_call_history(messages) is False


def test_prompt_contains_tool_call_history_false_user_only() -> None:
    from onyx.llm.multi_llm import _prompt_contains_tool_call_history

    messages: LanguageModelInput = [UserMessage(content="Hello")]
    assert _prompt_contains_tool_call_history(messages) is False


def test_bedrock_claude_drops_thinking_when_thinking_blocks_missing() -> None:
    """When thinking is enabled but assistant messages with tool_calls lack
    thinking_blocks, the thinking param must be dropped to avoid the Bedrock
    BadRequestError about missing thinking blocks."""
    llm = LitellmLLM(
        api_key=None,
        timeout=30,
        model_provider=LlmProviderNames.BEDROCK,
        model_name="anthropic.claude-sonnet-4-20250514-v1:0",
        max_input_tokens=200000,
    )

    messages: LanguageModelInput = [
        UserMessage(content="What's the weather?"),
        AssistantMessage(
            content=None,
            tool_calls=[
                ToolCall(
                    id="tc_1",
                    function=FunctionCall(
                        name="get_weather",
                        arguments='{"city": "Paris"}',
                    ),
                )
            ],
        ),
        onyx.llm.models.ToolMessage(
            content="22°C sunny",
            tool_call_id="tc_1",
        ),
    ]

    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get the weather",
                "parameters": {
                    "type": "object",
                    "properties": {"city": {"type": "string"}},
                },
            },
        }
    ]

    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
    ):
        mock_completion.return_value = []

        list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))

        kwargs = mock_completion.call_args.kwargs
        assert "thinking" not in kwargs, (
            "thinking param should be dropped when thinking_blocks are missing "
            "from assistant messages with tool_calls"
        )


def test_bedrock_claude_keeps_thinking_when_no_tool_history() -> None:
    """When thinking is enabled and there are no historical assistant messages
    with tool_calls, the thinking param should be preserved."""
    llm = LitellmLLM(
        api_key=None,
        timeout=30,
        model_provider=LlmProviderNames.BEDROCK,
        model_name="anthropic.claude-sonnet-4-20250514-v1:0",
        max_input_tokens=200000,
    )

    messages: LanguageModelInput = [
        UserMessage(content="What's the weather?"),
    ]

    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get the weather",
                "parameters": {
                    "type": "object",
                    "properties": {"city": {"type": "string"}},
                },
            },
        }
    ]

    with (
        patch("litellm.completion") as mock_completion,
        patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
    ):
        mock_completion.return_value = []

        list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))

        kwargs = mock_completion.call_args.kwargs
        assert "thinking" in kwargs, (
            "thinking param should be preserved when no assistant messages "
            "with tool_calls exist in history"
        )
        assert kwargs["thinking"]["type"] == "enabled"


def test_bifrost_claude_includes_allowed_openai_params() -> None:
    llm = LitellmLLM(
        api_key="test_key",
        api_base="https://bifrost.example.com",
        timeout=30,
        model_provider=LlmProviderNames.BIFROST,
        model_name="anthropic/claude-sonnet-4-6",
        max_input_tokens=32000,
    )

    messages: LanguageModelInput = [UserMessage(content="Use a tool if needed")]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "lookup",
                "description": "Look up data",
                "parameters": {
                    "type": "object",
                    "properties": {"query": {"type": "string"}},
                    "required": ["query"],
                },
            },
        }
    ]
    mock_stream_chunks = [
        litellm.ModelResponse(
            id="chatcmpl-123",
            choices=[
                litellm.Choices(
                    delta=_create_delta(role="assistant", content="Done"),
                    finish_reason="stop",
                    index=0,
                )
            ],
            model="anthropic/claude-sonnet-4-6",
        ),
    ]

    with patch("litellm.completion") as mock_completion:
        mock_completion.return_value = mock_stream_chunks

        llm.invoke(messages, tools=tools)

        kwargs = mock_completion.call_args.kwargs
        assert kwargs["model"] == "anthropic/claude-sonnet-4-6"
        assert kwargs["base_url"] == "https://bifrost.example.com/v1"
        assert kwargs["custom_llm_provider"] == "openai"
        assert kwargs["allowed_openai_params"] == ["tool_choice"]


================================================
FILE: backend/tests/unit/onyx/llm/test_reasoning_effort_mapping.py
================================================
from onyx.llm.models import OPENAI_REASONING_EFFORT
from onyx.llm.models import ReasoningEffort


# Valid OpenAI reasoning effort values per the API documentation
# https://platform.openai.com/docs/api-reference/responses
VALID_OPENAI_REASONING_EFFORT_VALUES = frozenset(
    {"none", "minimal", "low", "medium", "high", "xhigh"}
)


def test_openai_reasoning_effort_mapping_has_valid_values() -> None:
    """Test that all OPENAI_REASONING_EFFORT mapping values are valid OpenAI API values.

    This test prevents regressions where invalid values like "auto" are passed to the
    OpenAI API, which would result in a 400 Bad Request error.

    The OpenAI API only accepts: 'none', 'minimal', 'low', 'medium', 'high', 'xhigh'
    """
    for effort_level, openai_value in OPENAI_REASONING_EFFORT.items():
        assert openai_value in VALID_OPENAI_REASONING_EFFORT_VALUES, (
            f"OPENAI_REASONING_EFFORT[{effort_level}] = '{openai_value}' is not a valid "
            f"OpenAI reasoning effort value. Valid values are: {sorted(VALID_OPENAI_REASONING_EFFORT_VALUES)}"
        )


def test_openai_reasoning_effort_mapping_covers_all_effort_levels() -> None:
    """Test that OPENAI_REASONING_EFFORT has mappings for all ReasoningEffort values.

    This ensures we don't accidentally forget to add a mapping when new effort levels are added.
    Note: ReasoningEffort.OFF maps to "none" in the OpenAI API.
    """
    # These are the effort levels that should have OpenAI mappings
    expected_effort_levels = {
        ReasoningEffort.AUTO,
        ReasoningEffort.OFF,
        ReasoningEffort.LOW,
        ReasoningEffort.MEDIUM,
        ReasoningEffort.HIGH,
    }

    mapped_effort_levels = set(OPENAI_REASONING_EFFORT.keys())

    assert mapped_effort_levels == expected_effort_levels, (
        f"OPENAI_REASONING_EFFORT mapping is missing or has extra effort levels. "
        f"Expected: {expected_effort_levels}, Got: {mapped_effort_levels}"
    )


def test_reasoning_effort_auto_does_not_map_to_auto() -> None:
    """Explicitly test that ReasoningEffort.AUTO does not map to the string 'auto'.

    OpenAI's API does not accept 'auto' as a value for reasoning.effort.
    This test exists as a specific guard against the bug that caused this test file
    to be created in the first place.
    """
    assert OPENAI_REASONING_EFFORT[ReasoningEffort.AUTO] != "auto", (
        "ReasoningEffort.AUTO must not map to 'auto' - OpenAI API rejects this value. "
        "Use a valid default like 'medium' or 'low' instead."
    )


================================================
FILE: backend/tests/unit/onyx/llm/test_request_context.py
================================================
import contextvars

from onyx.llm.request_context import get_llm_mock_response
from onyx.llm.request_context import reset_llm_mock_response
from onyx.llm.request_context import set_llm_mock_response


def test_reset_llm_mock_response_same_context() -> None:
    token = set_llm_mock_response("mock-response")
    assert get_llm_mock_response() == "mock-response"

    reset_llm_mock_response(token)
    assert get_llm_mock_response() is None


def test_reset_llm_mock_response_different_context() -> None:
    foreign_context = contextvars.copy_context()
    foreign_token = foreign_context.run(set_llm_mock_response, "foreign-response")

    set_llm_mock_response("current-response")
    assert get_llm_mock_response() == "current-response"

    # Should not raise even when token came from another context.
    reset_llm_mock_response(foreign_token)
    assert get_llm_mock_response() is None


================================================
FILE: backend/tests/unit/onyx/llm/test_true_openai_model.py
================================================
from onyx.llm.constants import LlmProviderNames
from onyx.llm.utils import get_model_map
from onyx.llm.utils import is_true_openai_model


class TestIsTrueOpenAIModel:
    """Tests for the is_true_openai_model function using real LiteLLM model registry."""

    def test_real_openai_gpt4(self) -> None:
        """Test that real OpenAI GPT-4 model is correctly identified."""
        assert is_true_openai_model(LlmProviderNames.OPENAI, "gpt-4") is True

    def test_real_openai_gpt4_turbo(self) -> None:
        """Test that real OpenAI GPT-4-turbo model is correctly identified."""
        assert is_true_openai_model(LlmProviderNames.OPENAI, "gpt-4-turbo") is True

    def test_real_openai_gpt35_turbo(self) -> None:
        """Test that real OpenAI GPT-3.5-turbo model is correctly identified."""
        assert is_true_openai_model(LlmProviderNames.OPENAI, "gpt-3.5-turbo") is True

    def test_real_openai_gpt4o(self) -> None:
        """Test that real OpenAI GPT-4o model is correctly identified."""
        assert is_true_openai_model(LlmProviderNames.OPENAI, "gpt-4o") is True

    def test_real_openai_gpt4o_mini(self) -> None:
        """Test that real OpenAI GPT-4o-mini model is correctly identified."""
        assert is_true_openai_model(LlmProviderNames.OPENAI, "gpt-4o-mini") is True

    def test_openai_with_provider_prefix(self) -> None:
        """Test that OpenAI model with provider prefix is correctly identified."""
        assert is_true_openai_model(LlmProviderNames.OPENAI, "openai/gpt-4") is False

    def test_real_openai_with_date_version(self) -> None:
        """Test that OpenAI model with date version is correctly identified."""
        # Check if this specific dated version exists in the registry
        model_map = get_model_map()
        if "openai/gpt-4-0613" in model_map:
            assert is_true_openai_model(LlmProviderNames.OPENAI, "gpt-4-0613") is True

    def test_non_openai_provider_anthropic(self) -> None:
        """Test that non-OpenAI provider (Anthropic) returns False."""
        assert (
            is_true_openai_model(
                LlmProviderNames.ANTHROPIC, "claude-3-5-sonnet-20241022"
            )
            is False
        )

    def test_non_openai_provider_gemini(self) -> None:
        """Test that non-OpenAI provider returns False."""
        assert (
            is_true_openai_model(LlmProviderNames.VERTEX_AI, "gemini-1.5-pro") is False
        )

    def test_non_openai_provider_ollama(self) -> None:
        """Test that Ollama provider returns False."""
        assert is_true_openai_model(LlmProviderNames.OLLAMA_CHAT, "llama3.1") is False

    def test_openai_compatible_not_in_registry(self) -> None:
        """Test that OpenAI-compatible model not in registry returns False."""
        # Custom model served via vLLM or LiteLLM proxy
        assert (
            is_true_openai_model(LlmProviderNames.OPENAI, "custom-llama-model") is False
        )

    def test_openai_compatible_starts_with_o_not_in_registry(self) -> None:
        """Test that model starting with 'o' but not in registry returns False."""
        # This would have returned True with the old implementation
        assert is_true_openai_model(LlmProviderNames.OPENAI, "ollama-model") is False

    def test_empty_model_name(self) -> None:
        """Test that empty model name returns False."""
        assert is_true_openai_model(LlmProviderNames.OPENAI, "") is False

    def test_empty_provider(self) -> None:
        """Test that empty provider returns False."""
        assert is_true_openai_model("", "gpt-4") is False

    def test_case_sensitivity(self) -> None:
        """Test that model names are case-sensitive."""
        # Model names should be case-sensitive
        assert is_true_openai_model(LlmProviderNames.OPENAI, "GPT-4") is False

    def test_none_values_handled(self) -> None:
        """Test that None values are handled gracefully."""
        # Should not crash with None values
        assert is_true_openai_model(LlmProviderNames.OPENAI, None) is False  # type: ignore

    def test_litellm_proxy_custom_model(self) -> None:
        """Test that custom models via LiteLLM proxy return False."""
        # Custom model name not in OpenAI registry
        assert is_true_openai_model(LlmProviderNames.OPENAI, "my-custom-gpt") is False

    def test_vllm_hosted_model(self) -> None:
        """Test that vLLM-hosted models with OpenAI-compatible API return False."""
        # vLLM hosting a custom model with OpenAI-compatible API
        assert (
            is_true_openai_model(LlmProviderNames.OPENAI, "TheBloke/Llama-2-7B-GPTQ")
            is False
        )

    def test_openrouter_openai_model(self) -> None:
        """Test that OpenRouter proxied OpenAI models return False."""
        # OpenRouter is a proxy service, not true OpenAI
        assert (
            is_true_openai_model(LlmProviderNames.OPENROUTER, "openai/gpt-4") is False
        )

    def test_together_ai_model(self) -> None:
        """Test that Together AI models return False."""
        assert is_true_openai_model("together_ai", "mistralai/Mixtral-8x7B") is False

    def test_model_with_custom_suffix(self) -> None:
        """Test that models with custom suffixes not in registry return False."""
        # Custom deployment with suffix
        assert (
            is_true_openai_model(LlmProviderNames.OPENAI, "gpt-4-my-deployment")
            is False
        )

    def test_real_openai_text_embedding_models(self) -> None:
        """Test that real OpenAI text-embedding models are correctly identified."""
        # Check if embedding models are in the registry
        model_map = get_model_map()
        if "openai/text-embedding-ada-002" in model_map:
            assert (
                is_true_openai_model(LlmProviderNames.OPENAI, "text-embedding-ada-002")
                is True
            )
        if "openai/text-embedding-3-small" in model_map:
            assert (
                is_true_openai_model(LlmProviderNames.OPENAI, "text-embedding-3-small")
                is True
            )

    def test_deprecated_openai_models(self) -> None:
        """Test that deprecated but real OpenAI models are still identified correctly."""
        # Check for older models that might still be in registry
        model_map = get_model_map()
        if "openai/gpt-3.5-turbo-instruct" in model_map:
            assert (
                is_true_openai_model(LlmProviderNames.OPENAI, "gpt-3.5-turbo-instruct")
                is True
            )

    def test_azure_openai_model_through_litellm_proxy(self) -> None:
        """Test that Azure OpenAI models are correctly identified."""
        assert is_true_openai_model(LlmProviderNames.LITELLM_PROXY, "gpt-4") is True
        assert is_true_openai_model(LlmProviderNames.LITELLM_PROXY, "gpt-5") is True
        assert is_true_openai_model(LlmProviderNames.LITELLM_PROXY, "gpt-5.1") is True

        assert (
            is_true_openai_model(LlmProviderNames.LITELLM_PROXY, "azure/gpt-4") is True
        )
        assert (
            is_true_openai_model(LlmProviderNames.LITELLM_PROXY, "azure/gpt-5") is True
        )
        assert (
            is_true_openai_model(LlmProviderNames.LITELLM_PROXY, "azure/gpt-5.1")
            is True
        )


================================================
FILE: backend/tests/unit/onyx/llm/test_vision_model_selection_logging.py
================================================
"""
Unit tests for vision model selection logging in get_default_llm_with_vision.

Verifies that operators get clear feedback about:
1. Which vision model was selected and why
2. When the default vision model doesn't support image input
3. When no vision-capable model exists at all
"""

from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.llm.factory import get_default_llm_with_vision


_FACTORY = "onyx.llm.factory"


def _make_mock_model(
    *,
    name: str = "gpt-4o",
    provider: str = "openai",
    provider_id: int = 1,
    flow_types: list[str] | None = None,
) -> MagicMock:
    model = MagicMock()
    model.name = name
    model.llm_provider_id = provider_id
    model.llm_provider.provider = provider
    model.llm_model_flow_types = flow_types or []
    return model


@patch(f"{_FACTORY}.get_session_with_current_tenant")
@patch(f"{_FACTORY}.fetch_default_vision_model")
@patch(f"{_FACTORY}.model_supports_image_input", return_value=True)
@patch(f"{_FACTORY}.llm_from_provider")
@patch(f"{_FACTORY}.LLMProviderView")
@patch(f"{_FACTORY}.logger")
def test_logs_when_using_default_vision_model(
    mock_logger: MagicMock,
    mock_provider_view: MagicMock,  # noqa: ARG001
    mock_llm_from: MagicMock,  # noqa: ARG001
    mock_supports: MagicMock,  # noqa: ARG001
    mock_fetch_default: MagicMock,
    mock_session: MagicMock,  # noqa: ARG001
) -> None:
    mock_fetch_default.return_value = _make_mock_model(name="gpt-4o", provider="azure")

    get_default_llm_with_vision()

    mock_logger.info.assert_called_once()
    log_msg = mock_logger.info.call_args[0][0]
    assert "default vision model" in log_msg.lower()


@patch(f"{_FACTORY}.get_session_with_current_tenant")
@patch(f"{_FACTORY}.fetch_default_vision_model")
@patch(f"{_FACTORY}.model_supports_image_input", return_value=False)
@patch(f"{_FACTORY}.fetch_existing_models", return_value=[])
@patch(f"{_FACTORY}.logger")
def test_warns_when_default_model_lacks_vision(
    mock_logger: MagicMock,
    mock_fetch_models: MagicMock,  # noqa: ARG001
    mock_supports: MagicMock,  # noqa: ARG001
    mock_fetch_default: MagicMock,
    mock_session: MagicMock,  # noqa: ARG001
) -> None:
    mock_fetch_default.return_value = _make_mock_model(
        name="text-only-model", provider="azure"
    )

    result = get_default_llm_with_vision()

    assert result is None
    # Should have warned about the default model not supporting vision
    warning_calls = [
        call
        for call in mock_logger.warning.call_args_list
        if "does not support" in str(call)
    ]
    assert len(warning_calls) >= 1


@patch(f"{_FACTORY}.get_session_with_current_tenant")
@patch(f"{_FACTORY}.fetch_default_vision_model", return_value=None)
@patch(f"{_FACTORY}.fetch_existing_models", return_value=[])
@patch(f"{_FACTORY}.logger")
def test_warns_when_no_models_exist(
    mock_logger: MagicMock,
    mock_fetch_models: MagicMock,  # noqa: ARG001
    mock_fetch_default: MagicMock,  # noqa: ARG001
    mock_session: MagicMock,  # noqa: ARG001
) -> None:
    result = get_default_llm_with_vision()

    assert result is None
    mock_logger.warning.assert_called_once()
    log_msg = mock_logger.warning.call_args[0][0]
    assert "no llm models" in log_msg.lower()


@patch(f"{_FACTORY}.get_session_with_current_tenant")
@patch(f"{_FACTORY}.fetch_default_vision_model", return_value=None)
@patch(f"{_FACTORY}.fetch_existing_models")
@patch(f"{_FACTORY}.model_supports_image_input", return_value=False)
@patch(f"{_FACTORY}.LLMProviderView")
@patch(f"{_FACTORY}.logger")
def test_warns_when_no_model_supports_vision(
    mock_logger: MagicMock,
    mock_provider_view: MagicMock,  # noqa: ARG001
    mock_supports: MagicMock,  # noqa: ARG001
    mock_fetch_models: MagicMock,
    mock_fetch_default: MagicMock,  # noqa: ARG001
    mock_session: MagicMock,  # noqa: ARG001
) -> None:
    mock_fetch_models.return_value = [
        _make_mock_model(name="text-model-1", provider="openai"),
        _make_mock_model(name="text-model-2", provider="azure", provider_id=2),
    ]

    result = get_default_llm_with_vision()

    assert result is None
    warning_calls = [
        call
        for call in mock_logger.warning.call_args_list
        if "no vision-capable model" in str(call).lower()
    ]
    assert len(warning_calls) == 1


================================================
FILE: backend/tests/unit/onyx/natural_language_processing/test_search_nlp_models.py
================================================
from collections.abc import AsyncGenerator
from typing import List
from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from httpx import AsyncClient
from litellm.exceptions import RateLimitError

from onyx.llm.constants import LlmProviderNames
from onyx.natural_language_processing.search_nlp_models import CloudEmbedding
from shared_configs.enums import EmbeddingProvider
from shared_configs.enums import EmbedTextType


@pytest.fixture
async def mock_http_client() -> AsyncGenerator[AsyncMock, None]:
    with patch("httpx.AsyncClient") as mock:
        client = AsyncMock(spec=AsyncClient)
        mock.return_value = client
        client.post = AsyncMock()
        async with client as c:
            yield c


@pytest.fixture
def sample_embeddings() -> List[List[float]]:
    return [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]


@pytest.mark.asyncio
async def test_cloud_embedding_context_manager() -> None:
    async with CloudEmbedding("fake-key", EmbeddingProvider.OPENAI) as embedding:
        assert not embedding._closed
    assert embedding._closed


@pytest.mark.asyncio
async def test_cloud_embedding_explicit_close() -> None:
    embedding = CloudEmbedding("fake-key", EmbeddingProvider.OPENAI)
    assert not embedding._closed
    await embedding.aclose()
    assert embedding._closed


@pytest.mark.asyncio
async def test_openai_embedding(
    mock_http_client: AsyncMock,  # noqa: ARG001
    sample_embeddings: List[List[float]],
) -> None:
    with patch("openai.AsyncOpenAI") as mock_openai:
        mock_client = AsyncMock()
        mock_openai.return_value = mock_client

        mock_response = MagicMock()
        mock_response.data = [MagicMock(embedding=emb) for emb in sample_embeddings]
        mock_client.embeddings.create = AsyncMock(return_value=mock_response)

        embedding = CloudEmbedding("fake-key", EmbeddingProvider.OPENAI)
        result = await embedding._embed_openai(
            ["test1", "test2"], "text-embedding-ada-002", None
        )

        assert result == sample_embeddings
        mock_client.embeddings.create.assert_called_once()


@pytest.mark.asyncio
async def test_rate_limit_handling() -> None:
    with patch(
        "onyx.natural_language_processing.search_nlp_models.CloudEmbedding.embed"
    ) as mock_embed:
        mock_embed.side_effect = RateLimitError(
            "Rate limit exceeded",
            llm_provider=LlmProviderNames.OPENAI,
            model="fake-model",
        )

        embedding = CloudEmbedding("fake-key", EmbeddingProvider.OPENAI)

        with pytest.raises(RateLimitError):
            await embedding.embed(
                texts=["test"],
                model_name="fake-model",
                text_type=EmbedTextType.QUERY,
            )


================================================
FILE: backend/tests/unit/onyx/onyxbot/discord/conftest.py
================================================
"""Fixtures for Discord bot unit tests."""

import random
from collections.abc import Callable
from typing import Any
from unittest.mock import AsyncMock
from unittest.mock import MagicMock

import discord
import pytest


class AsyncIteratorMock:
    """Helper class to mock async iterators like channel.history()."""

    def __init__(self, items: list[Any]) -> None:
        self.items = items
        self.index = 0

    def __aiter__(self) -> "AsyncIteratorMock":
        return self

    async def __anext__(self) -> Any:
        if self.index >= len(self.items):
            raise StopAsyncIteration
        item = self.items[self.index]
        self.index += 1
        return item


def mock_message(
    content: str = "Test message",
    author_bot: bool = False,
    message_type: discord.MessageType = discord.MessageType.default,
    reference: MagicMock | None = None,
    message_id: int | None = None,
    author_id: int | None = None,
    author_display_name: str | None = None,
) -> MagicMock:
    """Helper to create mock Discord messages."""
    msg = MagicMock(spec=discord.Message)
    msg.id = message_id or random.randint(100000, 999999)
    msg.content = content
    msg.author = MagicMock()
    msg.author.id = author_id or random.randint(100000, 999999)
    msg.author.bot = author_bot
    msg.author.display_name = author_display_name or ("Bot" if author_bot else "User")
    msg.type = message_type
    msg.reference = reference
    msg.mentions = []
    msg.role_mentions = []
    msg.channel_mentions = []
    return msg


@pytest.fixture
def mock_bot_user() -> MagicMock:
    """Mock Discord bot user."""
    user = MagicMock(spec=discord.ClientUser)
    user.id = 123456789
    user.display_name = "OnyxBot"
    user.bot = True
    return user


@pytest.fixture
def mock_discord_guild() -> MagicMock:
    """Mock Discord guild with channels."""
    guild = MagicMock(spec=discord.Guild)
    guild.id = 987654321
    guild.name = "Test Server"
    guild.default_role = MagicMock()

    # Create some mock channels
    text_channel = MagicMock(spec=discord.TextChannel)
    text_channel.id = 111111111
    text_channel.name = "general"
    text_channel.type = discord.ChannelType.text
    perms = MagicMock()
    perms.view_channel = True
    text_channel.permissions_for.return_value = perms

    forum_channel = MagicMock(spec=discord.ForumChannel)
    forum_channel.id = 222222222
    forum_channel.name = "forum"
    forum_channel.type = discord.ChannelType.forum
    forum_channel.permissions_for.return_value = perms

    guild.channels = [text_channel, forum_channel]
    guild.text_channels = [text_channel]
    guild.forum_channels = [forum_channel]

    return guild


@pytest.fixture
def mock_discord_message(mock_bot_user: MagicMock) -> MagicMock:  # noqa: ARG001
    """Mock Discord message for testing."""
    msg = MagicMock(spec=discord.Message)
    msg.id = 555555555
    msg.author = MagicMock()
    msg.author.id = 444444444
    msg.author.bot = False
    msg.author.display_name = "TestUser"
    msg.content = "Hello bot"
    msg.guild = MagicMock()
    msg.guild.id = 987654321
    msg.guild.name = "Test Server"
    msg.channel = MagicMock()
    msg.channel.id = 111111111
    msg.channel.name = "general"
    msg.type = discord.MessageType.default
    msg.mentions = []
    msg.role_mentions = []
    msg.channel_mentions = []
    msg.reference = None
    return msg


@pytest.fixture
def mock_thread_with_messages(mock_bot_user: MagicMock) -> MagicMock:
    """Mock Discord thread with message history."""
    thread = MagicMock(spec=discord.Thread)
    thread.id = 666666666
    thread.name = "Test Thread"
    thread.owner_id = mock_bot_user.id
    thread.parent = MagicMock(spec=discord.TextChannel)
    thread.parent.id = 111111111

    # Mock starter message
    starter = mock_message(
        content="Thread starter message",
        author_bot=False,
        message_id=thread.id,
    )

    messages = [
        mock_message(author_bot=False, content="User msg 1", message_id=100),
        mock_message(author_bot=True, content="Bot response", message_id=101),
        mock_message(author_bot=False, content="User msg 2", message_id=102),
    ]

    # Setup async iterator for history
    def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
        return AsyncIteratorMock(messages)

    thread.history = history

    # Mock parent.fetch_message
    async def fetch_starter(msg_id: int) -> MagicMock:
        if msg_id == thread.id:
            return starter
        raise discord.NotFound(MagicMock(), "Not found")

    thread.parent.fetch_message = AsyncMock(side_effect=fetch_starter)

    return thread


@pytest.fixture
def mock_thread_forum_parent() -> MagicMock:
    """Mock thread with ForumChannel parent (special case)."""
    thread = MagicMock(spec=discord.Thread)
    thread.id = 777777777
    thread.name = "Forum Post"
    thread.parent = MagicMock(spec=discord.ForumChannel)
    thread.parent.id = 222222222
    return thread


@pytest.fixture
def mock_reply_chain() -> MagicMock:
    """Mock message with reply chain."""
    # Build chain backwards: msg3 -> msg2 -> msg1
    ref3 = MagicMock()
    ref3.message_id = 1003

    ref2 = MagicMock()
    ref2.message_id = 1002

    msg3 = mock_message(content="Third message", reference=None, message_id=1003)
    msg2 = mock_message(content="Second message", reference=ref3, message_id=1002)
    msg1 = mock_message(content="First message", reference=ref2, message_id=1001)

    # Store messages for lookup
    msg1._chain = {1002: msg2, 1003: msg3}
    msg2._chain = {1003: msg3}

    return msg1


@pytest.fixture
def mock_guild_config_enabled() -> MagicMock:
    """Guild config that is enabled."""
    config = MagicMock()
    config.id = 1
    config.guild_id = 987654321
    config.enabled = True
    config.default_persona_id = 1
    return config


@pytest.fixture
def mock_guild_config_disabled() -> MagicMock:
    """Guild config that is disabled."""
    config = MagicMock()
    config.id = 2
    config.guild_id = 987654321
    config.enabled = False
    config.default_persona_id = None
    return config


@pytest.fixture
def mock_channel_config_factory() -> Callable[..., MagicMock]:
    """Factory fixture for creating channel configs with various settings."""

    def _make_config(
        enabled: bool = True,
        require_bot_invocation: bool = True,
        thread_only_mode: bool = False,
        persona_override_id: int | None = None,
    ) -> MagicMock:
        config = MagicMock()
        config.id = random.randint(1, 1000)
        config.channel_id = 111111111
        config.enabled = enabled
        config.require_bot_invocation = require_bot_invocation
        config.thread_only_mode = thread_only_mode
        config.persona_override_id = persona_override_id
        return config

    return _make_config


@pytest.fixture
def mock_message_with_bot_mention(mock_bot_user: MagicMock) -> MagicMock:
    """Message that mentions the bot."""
    msg = MagicMock(spec=discord.Message)
    msg.id = 888888888
    msg.mentions = [mock_bot_user]
    msg.author = MagicMock()
    msg.author.id = 444444444
    msg.author.bot = False
    msg.author.display_name = "TestUser"
    msg.type = discord.MessageType.default
    msg.content = f"<@{mock_bot_user.id}> hello"
    msg.reference = None
    msg.guild = MagicMock()
    msg.guild.id = 987654321
    msg.channel = MagicMock()
    msg.channel.id = 111111111
    msg.role_mentions = []
    msg.channel_mentions = []
    return msg


@pytest.fixture
def mock_guild_with_members() -> MagicMock:
    """Mock guild for mention resolution."""
    guild = MagicMock(spec=discord.Guild)

    def get_member(member_id: int) -> MagicMock:
        member = MagicMock()
        member.display_name = f"User{member_id}"
        return member

    def get_role(role_id: int) -> MagicMock:
        role = MagicMock()
        role.name = f"Role{role_id}"
        return role

    def get_channel(channel_id: int) -> MagicMock:
        channel = MagicMock()
        channel.name = f"channel{channel_id}"
        return channel

    guild.get_member = get_member
    guild.get_role = get_role
    guild.get_channel = get_channel
    return guild


================================================
FILE: backend/tests/unit/onyx/onyxbot/discord/test_api_client.py
================================================
"""Unit tests for Discord bot API client.

Tests for OnyxAPIClient class functionality.
"""

from typing import Any
from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import aiohttp
import pytest

from onyx.chat.models import ChatFullResponse
from onyx.onyxbot.discord.api_client import OnyxAPIClient
from onyx.onyxbot.discord.constants import API_REQUEST_TIMEOUT
from onyx.onyxbot.discord.exceptions import APIConnectionError
from onyx.onyxbot.discord.exceptions import APIResponseError
from onyx.onyxbot.discord.exceptions import APITimeoutError


class MockAsyncContextManager:
    """Helper class to create proper async context managers for testing."""

    def __init__(
        self, return_value: Any = None, enter_side_effect: Exception | None = None
    ) -> None:
        self.return_value = return_value
        self.enter_side_effect = enter_side_effect

    async def __aenter__(self) -> Any:
        if self.enter_side_effect:
            raise self.enter_side_effect
        return self.return_value

    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        pass


class TestClientLifecycle:
    """Tests for API client lifecycle management."""

    @pytest.mark.asyncio
    async def test_initialize_creates_session(self) -> None:
        """initialize() creates aiohttp session."""
        client = OnyxAPIClient()
        assert client._session is None

        with patch("aiohttp.ClientSession") as mock_session_class:
            mock_session = MagicMock()
            mock_session_class.return_value = mock_session

            await client.initialize()

        assert client._session is not None
        mock_session_class.assert_called_once()

    def test_is_initialized_before_init(self) -> None:
        """is_initialized returns False before initialize()."""
        client = OnyxAPIClient()
        assert client.is_initialized is False

    @pytest.mark.asyncio
    async def test_is_initialized_after_init(self) -> None:
        """is_initialized returns True after initialize()."""
        client = OnyxAPIClient()

        with patch("aiohttp.ClientSession"):
            await client.initialize()

        assert client.is_initialized is True

    @pytest.mark.asyncio
    async def test_close_closes_session(self) -> None:
        """close() closes session and resets is_initialized."""
        client = OnyxAPIClient()

        mock_session = AsyncMock()
        with patch("aiohttp.ClientSession", return_value=mock_session):
            await client.initialize()
            assert client.is_initialized is True

            await client.close()

        assert client.is_initialized is False
        mock_session.close.assert_called_once()

    @pytest.mark.asyncio
    async def test_send_message_not_initialized(self) -> None:
        """send_chat_message() before initialize() raises APIConnectionError."""
        client = OnyxAPIClient()

        with pytest.raises(APIConnectionError) as exc_info:
            await client.send_chat_message("test", "api_key")

        assert "not initialized" in str(exc_info.value)


class TestSendChatMessage:
    """Tests for send_chat_message functionality."""

    @pytest.mark.asyncio
    async def test_send_message_success(self) -> None:
        """Valid request returns ChatFullResponse."""
        client = OnyxAPIClient()

        response_data = {
            "answer": "Test response",
            "citations": [],
            "error_msg": None,
        }

        mock_response = MagicMock()
        mock_response.status = 200
        mock_response.json = AsyncMock(return_value=response_data)

        mock_session = MagicMock()
        mock_session.post = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )

        client._session = mock_session

        with patch.object(
            ChatFullResponse,
            "model_validate",
            return_value=MagicMock(answer="Test response", error_msg=None),
        ):
            result = await client.send_chat_message("Hello", "api_key_123")

        assert result is not None

    @pytest.mark.asyncio
    async def test_send_message_with_persona(self) -> None:
        """persona_id is passed to API."""
        client = OnyxAPIClient()

        response_data = {"answer": "Response", "citations": [], "error_msg": None}

        mock_response = MagicMock()
        mock_response.status = 200
        mock_response.json = AsyncMock(return_value=response_data)

        mock_session = MagicMock()
        mock_post = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )
        mock_session.post = mock_post

        client._session = mock_session

        with patch.object(
            ChatFullResponse,
            "model_validate",
            return_value=MagicMock(answer="Response", error_msg=None),
        ):
            await client.send_chat_message("Hello", "api_key", persona_id=5)

        # Verify persona was included in request
        call_args = mock_post.call_args
        json_data = call_args.kwargs.get("json") or call_args[1].get("json")
        assert json_data is not None

    @pytest.mark.asyncio
    async def test_send_message_401_error(self) -> None:
        """Invalid API key returns APIResponseError with 401."""
        client = OnyxAPIClient()

        mock_response = MagicMock()
        mock_response.status = 401

        mock_session = MagicMock()
        mock_session.post = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )

        client._session = mock_session

        with pytest.raises(APIResponseError) as exc_info:
            await client.send_chat_message("Hello", "bad_key")

        assert exc_info.value.status_code == 401

    @pytest.mark.asyncio
    async def test_send_message_403_error(self) -> None:
        """Persona not accessible returns APIResponseError with 403."""
        client = OnyxAPIClient()

        mock_response = MagicMock()
        mock_response.status = 403

        mock_session = MagicMock()
        mock_session.post = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )

        client._session = mock_session

        with pytest.raises(APIResponseError) as exc_info:
            await client.send_chat_message("Hello", "api_key", persona_id=999)

        assert exc_info.value.status_code == 403

    @pytest.mark.asyncio
    async def test_send_message_timeout(self) -> None:
        """Request timeout raises APITimeoutError."""
        client = OnyxAPIClient()

        mock_session = MagicMock()
        mock_session.post = MagicMock(
            return_value=MockAsyncContextManager(
                enter_side_effect=TimeoutError("Timeout")
            )
        )

        client._session = mock_session

        with pytest.raises(APITimeoutError):
            await client.send_chat_message("Hello", "api_key")

    @pytest.mark.asyncio
    async def test_send_message_connection_error(self) -> None:
        """Network failure raises APIConnectionError."""
        client = OnyxAPIClient()

        mock_session = MagicMock()
        mock_session.post = MagicMock(
            return_value=MockAsyncContextManager(
                enter_side_effect=aiohttp.ClientConnectorError(
                    MagicMock(), OSError("Connection refused")
                )
            )
        )

        client._session = mock_session

        with pytest.raises(APIConnectionError):
            await client.send_chat_message("Hello", "api_key")

    @pytest.mark.asyncio
    async def test_send_message_server_error(self) -> None:
        """500 response raises APIResponseError with 500."""
        client = OnyxAPIClient()

        mock_response = MagicMock()
        mock_response.status = 500
        mock_response.text = AsyncMock(return_value="Internal Server Error")

        mock_session = MagicMock()
        mock_session.post = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )

        client._session = mock_session

        with pytest.raises(APIResponseError) as exc_info:
            await client.send_chat_message("Hello", "api_key")

        assert exc_info.value.status_code == 500


class TestHealthCheck:
    """Tests for health_check functionality."""

    @pytest.mark.asyncio
    async def test_health_check_success(self) -> None:
        """Server healthy returns True."""
        client = OnyxAPIClient()

        mock_response = MagicMock()
        mock_response.status = 200

        mock_session = MagicMock()
        mock_session.get = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )

        client._session = mock_session

        result = await client.health_check()
        assert result is True

    @pytest.mark.asyncio
    async def test_health_check_failure(self) -> None:
        """Server unhealthy returns False."""
        client = OnyxAPIClient()

        mock_response = MagicMock()
        mock_response.status = 503

        mock_session = MagicMock()
        mock_session.get = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )

        client._session = mock_session

        result = await client.health_check()
        assert result is False

    @pytest.mark.asyncio
    async def test_health_check_timeout(self) -> None:
        """Request times out returns False."""
        client = OnyxAPIClient()

        mock_session = MagicMock()
        mock_session.get = MagicMock(
            return_value=MockAsyncContextManager(
                enter_side_effect=TimeoutError("Timeout")
            )
        )

        client._session = mock_session

        result = await client.health_check()
        assert result is False

    @pytest.mark.asyncio
    async def test_health_check_not_initialized(self) -> None:
        """Health check before initialize returns False."""
        client = OnyxAPIClient()

        result = await client.health_check()
        assert result is False


class TestResponseParsing:
    """Tests for API response parsing."""

    @pytest.mark.asyncio
    async def test_response_malformed_json(self) -> None:
        """API returns invalid JSON raises exception."""
        client = OnyxAPIClient()

        mock_response = MagicMock()
        mock_response.status = 200
        mock_response.json = AsyncMock(side_effect=ValueError("Invalid JSON"))

        mock_session = MagicMock()
        mock_session.post = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )

        client._session = mock_session

        with pytest.raises(ValueError):
            await client.send_chat_message("Hello", "api_key")

    @pytest.mark.asyncio
    async def test_response_with_error_msg(self) -> None:
        """200 status but error_msg present - warning logged, response returned."""
        client = OnyxAPIClient()

        response_data = {
            "answer": "Partial response",
            "citations": [],
            "error_msg": "Some warning",
        }

        mock_response = MagicMock()
        mock_response.status = 200
        mock_response.json = AsyncMock(return_value=response_data)

        mock_session = MagicMock()
        mock_session.post = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )

        client._session = mock_session

        mock_result = MagicMock()
        mock_result.answer = "Partial response"
        mock_result.error_msg = "Some warning"

        with patch.object(ChatFullResponse, "model_validate", return_value=mock_result):
            result = await client.send_chat_message("Hello", "api_key")

        # Should still return response
        assert result is not None

    @pytest.mark.asyncio
    async def test_response_empty_answer(self) -> None:
        """answer field is empty string - handled gracefully."""
        client = OnyxAPIClient()

        response_data = {
            "answer": "",
            "citations": [],
            "error_msg": None,
        }

        mock_response = MagicMock()
        mock_response.status = 200
        mock_response.json = AsyncMock(return_value=response_data)

        mock_session = MagicMock()
        mock_session.post = MagicMock(
            return_value=MockAsyncContextManager(return_value=mock_response)
        )

        client._session = mock_session

        mock_result = MagicMock()
        mock_result.answer = ""
        mock_result.error_msg = None

        with patch.object(ChatFullResponse, "model_validate", return_value=mock_result):
            result = await client.send_chat_message("Hello", "api_key")

        # Should return response even with empty answer
        assert result is not None


class TestClientConfiguration:
    """Tests for client configuration."""

    def test_default_timeout(self) -> None:
        """Client uses API_REQUEST_TIMEOUT by default."""
        client = OnyxAPIClient()
        assert client._timeout == API_REQUEST_TIMEOUT

    def test_custom_timeout(self) -> None:
        """Client accepts custom timeout."""
        client = OnyxAPIClient(timeout=60)
        assert client._timeout == 60

    @pytest.mark.asyncio
    async def test_double_initialize_warning(self) -> None:
        """Calling initialize() twice logs warning but doesn't error."""
        client = OnyxAPIClient()

        with patch("aiohttp.ClientSession") as mock_session_class:
            mock_session = MagicMock()
            mock_session_class.return_value = mock_session

            await client.initialize()
            # Second call should be safe
            await client.initialize()

        # Should only create one session
        assert mock_session_class.call_count == 1


================================================
FILE: backend/tests/unit/onyx/onyxbot/discord/test_cache_manager.py
================================================
"""Unit tests for Discord bot cache manager.

Tests for DiscordCacheManager class functionality.
"""

import asyncio
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.onyxbot.discord.cache import DiscordCacheManager


class TestCacheInitialization:
    """Tests for cache initialization."""

    def test_cache_starts_empty(self) -> None:
        """New cache manager has empty caches."""
        cache = DiscordCacheManager()
        assert cache._guild_tenants == {}
        assert cache._api_keys == {}
        assert cache.is_initialized is False

    @pytest.mark.asyncio
    async def test_cache_refresh_all_loads_guilds(self) -> None:
        """refresh_all() loads all active guilds."""
        cache = DiscordCacheManager()

        mock_config1 = MagicMock()
        mock_config1.guild_id = 111111
        mock_config1.enabled = True

        mock_config2 = MagicMock()
        mock_config2.guild_id = 222222
        mock_config2.enabled = True

        with (
            patch(
                "onyx.onyxbot.discord.cache.get_all_tenant_ids",
                return_value=["tenant1"],
            ),
            patch(
                "onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop",
                return_value=lambda: set(),
            ),
            patch("onyx.onyxbot.discord.cache.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.cache.get_guild_configs",
                return_value=[mock_config1, mock_config2],
            ),
            patch(
                "onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key",
                return_value="test_api_key",
            ),
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            await cache.refresh_all()

        assert cache.is_initialized is True
        assert 111111 in cache._guild_tenants
        assert 222222 in cache._guild_tenants
        assert cache._guild_tenants[111111] == "tenant1"
        assert cache._guild_tenants[222222] == "tenant1"

    @pytest.mark.asyncio
    async def test_cache_refresh_provisions_api_key(self) -> None:
        """Refresh for tenant without key creates API key."""
        cache = DiscordCacheManager()

        mock_config = MagicMock()
        mock_config.guild_id = 111111
        mock_config.enabled = True

        with (
            patch(
                "onyx.onyxbot.discord.cache.get_all_tenant_ids",
                return_value=["tenant1"],
            ),
            patch(
                "onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop",
                return_value=lambda: set(),
            ),
            patch("onyx.onyxbot.discord.cache.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.cache.get_guild_configs",
                return_value=[mock_config],
            ),
            patch(
                "onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key",
                return_value="new_api_key",
            ) as mock_provision,
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            await cache.refresh_all()

        assert cache._api_keys.get("tenant1") == "new_api_key"
        mock_provision.assert_called()


class TestCacheLookups:
    """Tests for cache lookup operations."""

    def test_get_tenant_returns_correct(self) -> None:
        """Lookup registered guild returns correct tenant ID."""
        cache = DiscordCacheManager()
        cache._guild_tenants[123456] = "tenant1"

        result = cache.get_tenant(123456)
        assert result == "tenant1"

    def test_get_tenant_returns_none_unknown(self) -> None:
        """Lookup unregistered guild returns None."""
        cache = DiscordCacheManager()

        result = cache.get_tenant(999999)
        assert result is None

    def test_get_api_key_returns_correct(self) -> None:
        """Lookup tenant's API key returns valid key."""
        cache = DiscordCacheManager()
        cache._api_keys["tenant1"] = "api_key_123"

        result = cache.get_api_key("tenant1")
        assert result == "api_key_123"

    def test_get_api_key_returns_none_unknown(self) -> None:
        """Lookup unknown tenant returns None."""
        cache = DiscordCacheManager()

        result = cache.get_api_key("unknown_tenant")
        assert result is None

    def test_get_all_guild_ids(self) -> None:
        """After loading returns all cached guild IDs."""
        cache = DiscordCacheManager()
        cache._guild_tenants = {111: "t1", 222: "t2", 333: "t1"}

        result = cache.get_all_guild_ids()
        assert set(result) == {111, 222, 333}


class TestCacheUpdates:
    """Tests for cache update operations."""

    @pytest.mark.asyncio
    async def test_refresh_guild_adds_new(self) -> None:
        """refresh_guild() for new guild adds it to cache."""
        cache = DiscordCacheManager()

        mock_config = MagicMock()
        mock_config.guild_id = 111111
        mock_config.enabled = True

        with (
            patch("onyx.onyxbot.discord.cache.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.cache.get_guild_configs",
                return_value=[mock_config],
            ),
            patch(
                "onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key",
                return_value="api_key",
            ),
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            await cache.refresh_guild(111111, "tenant1")

        assert cache.get_tenant(111111) == "tenant1"

    @pytest.mark.asyncio
    async def test_refresh_guild_verifies_active(self) -> None:
        """refresh_guild() for disabled guild doesn't add it."""
        cache = DiscordCacheManager()

        mock_config = MagicMock()
        mock_config.guild_id = 111111
        mock_config.enabled = False  # Disabled!

        with (
            patch("onyx.onyxbot.discord.cache.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.cache.get_guild_configs",
                return_value=[mock_config],
            ),
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            await cache.refresh_guild(111111, "tenant1")

        # Should not be added because it's disabled
        assert cache.get_tenant(111111) is None

    def test_remove_guild(self) -> None:
        """remove_guild() removes guild from cache."""
        cache = DiscordCacheManager()
        cache._guild_tenants[111111] = "tenant1"

        cache.remove_guild(111111)

        assert cache.get_tenant(111111) is None

    def test_clear_removes_all(self) -> None:
        """clear() empties all caches."""
        cache = DiscordCacheManager()
        cache._guild_tenants = {111: "t1", 222: "t2"}
        cache._api_keys = {"t1": "key1", "t2": "key2"}
        cache._initialized = True

        cache.clear()

        assert cache._guild_tenants == {}
        assert cache._api_keys == {}
        assert cache.is_initialized is False


class TestThreadSafety:
    """Tests for thread/async safety."""

    @pytest.mark.asyncio
    async def test_concurrent_refresh_no_race(self) -> None:
        """Multiple concurrent refresh_all() calls don't corrupt data."""
        cache = DiscordCacheManager()

        mock_config = MagicMock()
        mock_config.guild_id = 111111
        mock_config.enabled = True

        call_count = 0

        async def slow_refresh() -> tuple[list[int], str]:
            nonlocal call_count
            call_count += 1
            # Simulate slow operation
            await asyncio.sleep(0.01)
            return ([111111], "api_key")

        with (
            patch(
                "onyx.onyxbot.discord.cache.get_all_tenant_ids",
                return_value=["tenant1"],
            ),
            patch(
                "onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop",
                return_value=lambda: set(),
            ),
            patch.object(cache, "_load_tenant_data", side_effect=slow_refresh),
        ):
            # Run multiple concurrent refreshes
            await asyncio.gather(
                cache.refresh_all(),
                cache.refresh_all(),
                cache.refresh_all(),
            )

        # Each refresh should complete without error
        assert cache.is_initialized is True

    @pytest.mark.asyncio
    async def test_concurrent_read_write(self) -> None:
        """Read during refresh doesn't cause exceptions."""
        cache = DiscordCacheManager()
        cache._guild_tenants[111111] = "tenant1"

        async def read_loop() -> None:
            for _ in range(10):
                cache.get_tenant(111111)
                await asyncio.sleep(0.001)

        async def write_loop() -> None:
            for i in range(10):
                cache._guild_tenants[200000 + i] = f"tenant{i}"
                await asyncio.sleep(0.001)

        # Should not raise any exceptions
        await asyncio.gather(read_loop(), write_loop())


class TestAPIKeyProvisioning:
    """Tests for API key provisioning via cache refresh."""

    @pytest.mark.asyncio
    async def test_api_key_created_on_first_refresh(self) -> None:
        """Cache refresh with no existing key creates new API key."""
        cache = DiscordCacheManager()

        mock_config = MagicMock()
        mock_config.guild_id = 111111
        mock_config.enabled = True

        with (
            patch(
                "onyx.onyxbot.discord.cache.get_all_tenant_ids",
                return_value=["tenant1"],
            ),
            patch(
                "onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop",
                return_value=lambda: set(),
            ),
            patch("onyx.onyxbot.discord.cache.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.cache.get_guild_configs",
                return_value=[mock_config],
            ),
            patch(
                "onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key",
                return_value="new_api_key_123",
            ) as mock_create,
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            await cache.refresh_all()

        mock_create.assert_called_once()
        assert cache.get_api_key("tenant1") == "new_api_key_123"

    @pytest.mark.asyncio
    async def test_api_key_cached_after_creation(self) -> None:
        """Subsequent lookups after creation use cached key."""
        cache = DiscordCacheManager()
        cache._api_keys["tenant1"] = "cached_key"

        mock_config = MagicMock()
        mock_config.guild_id = 111111
        mock_config.enabled = True

        with (
            patch(
                "onyx.onyxbot.discord.cache.get_all_tenant_ids",
                return_value=["tenant1"],
            ),
            patch(
                "onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop",
                return_value=lambda: set(),
            ),
            patch("onyx.onyxbot.discord.cache.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.cache.get_guild_configs",
                return_value=[mock_config],
            ),
            patch(
                "onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key",
            ) as mock_create,
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            await cache.refresh_all()

        # Should NOT call create because key is already cached
        mock_create.assert_not_called()
        # Cached key should be preserved after refresh
        assert cache.get_api_key("tenant1") == "cached_key"


class TestGatedTenantHandling:
    """Tests for gated tenant filtering."""

    @pytest.mark.asyncio
    async def test_refresh_skips_gated_tenants(self) -> None:
        """Gated tenant's guilds are not loaded."""
        cache = DiscordCacheManager()

        # tenant2 is gated
        gated_tenants = {"tenant2"}

        mock_config_t1 = MagicMock()
        mock_config_t1.guild_id = 111111
        mock_config_t1.enabled = True

        mock_config_t2 = MagicMock()
        mock_config_t2.guild_id = 222222
        mock_config_t2.enabled = True

        def mock_get_configs(db: MagicMock) -> list[MagicMock]:  # noqa: ARG001
            # Track which tenant this was called for
            return [mock_config_t1]  # Always return same for simplicity

        with (
            patch(
                "onyx.onyxbot.discord.cache.get_all_tenant_ids",
                return_value=["tenant1", "tenant2"],
            ),
            patch(
                "onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop",
                return_value=lambda: gated_tenants,
            ),
            patch("onyx.onyxbot.discord.cache.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.cache.get_guild_configs",
                side_effect=mock_get_configs,
            ),
            patch(
                "onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key",
                return_value="api_key",
            ),
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            await cache.refresh_all()

        # Only tenant1 should be loaded (tenant2 is gated)
        assert "tenant1" in cache._api_keys and 111111 in cache._guild_tenants
        # tenant2's guilds should NOT be in cache
        assert "tenant2" not in cache._api_keys and 222222 not in cache._guild_tenants

    @pytest.mark.asyncio
    async def test_gated_check_calls_ee_function(self) -> None:
        """Refresh all tenants calls fetch_ee_implementation_or_noop."""
        cache = DiscordCacheManager()

        with (
            patch(
                "onyx.onyxbot.discord.cache.get_all_tenant_ids",
                return_value=["tenant1"],
            ),
            patch(
                "onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop",
                return_value=lambda: set(),
            ) as mock_ee,
            patch("onyx.onyxbot.discord.cache.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.cache.get_guild_configs",
                return_value=[],
            ),
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            await cache.refresh_all()

        mock_ee.assert_called_once()

    @pytest.mark.asyncio
    async def test_ungated_tenant_included(self) -> None:
        """Regular (ungated) tenant has guilds loaded normally."""
        cache = DiscordCacheManager()

        mock_config = MagicMock()
        mock_config.guild_id = 111111
        mock_config.enabled = True

        with (
            patch(
                "onyx.onyxbot.discord.cache.get_all_tenant_ids",
                return_value=["tenant1"],
            ),
            patch(
                "onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop",
                return_value=lambda: set(),  # No gated tenants
            ),
            patch("onyx.onyxbot.discord.cache.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.cache.get_guild_configs",
                return_value=[mock_config],
            ),
            patch(
                "onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key",
                return_value="api_key",
            ),
        ):
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            await cache.refresh_all()

        assert cache.get_tenant(111111) == "tenant1"


class TestCacheErrorHandling:
    """Tests for error handling in cache operations."""

    @pytest.mark.asyncio
    async def test_refresh_all_handles_tenant_error(self) -> None:
        """Error loading one tenant doesn't stop others."""
        cache = DiscordCacheManager()

        call_count = 0

        async def mock_load(tenant_id: str) -> tuple[list[int], str]:
            nonlocal call_count
            call_count += 1
            if tenant_id == "tenant1":
                raise Exception("Tenant 1 error")
            return ([222222], "api_key")

        with (
            patch(
                "onyx.onyxbot.discord.cache.get_all_tenant_ids",
                return_value=["tenant1", "tenant2"],
            ),
            patch(
                "onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop",
                return_value=lambda: set(),
            ),
            patch.object(cache, "_load_tenant_data", side_effect=mock_load),
        ):
            await cache.refresh_all()

        # Should still complete and load tenant2
        assert call_count == 2  # Both tenants attempted
        assert cache.get_tenant(222222) == "tenant2"


================================================
FILE: backend/tests/unit/onyx/onyxbot/discord/test_context_builders.py
================================================
"""Unit tests for Discord bot context builders.

Tests the thread and reply context building logic with mocked Discord API.
"""

from typing import Any
from unittest.mock import AsyncMock
from unittest.mock import MagicMock

import discord
import pytest

from onyx.onyxbot.discord.constants import MAX_CONTEXT_MESSAGES
from onyx.onyxbot.discord.handle_message import _build_conversation_context
from onyx.onyxbot.discord.handle_message import _build_reply_chain_context
from onyx.onyxbot.discord.handle_message import _build_thread_context
from onyx.onyxbot.discord.handle_message import _format_messages_as_context
from onyx.onyxbot.discord.handle_message import format_message_content
from tests.unit.onyx.onyxbot.discord.conftest import AsyncIteratorMock
from tests.unit.onyx.onyxbot.discord.conftest import mock_message


class TestThreadContextBuilder:
    """Tests for _build_thread_context function."""

    @pytest.mark.asyncio
    async def test_build_thread_context_basic(
        self, mock_thread_with_messages: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """Thread with messages returns context in order."""
        msg = MagicMock(spec=discord.Message)
        msg.id = 999  # Current message ID
        msg.channel = mock_thread_with_messages

        result = await _build_thread_context(msg, mock_bot_user)

        assert result is not None
        assert "Conversation history" in result
        # Should contain message content
        assert "User msg" in result or "Bot response" in result

    @pytest.mark.asyncio
    async def test_build_thread_context_max_limit(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Thread with 20 messages returns only MAX_CONTEXT_MESSAGES."""
        # Create 20 messages
        messages = [
            mock_message(content=f"Message {i}", message_id=i) for i in range(20)
        ]

        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)

        def history(**kwargs: Any) -> AsyncIteratorMock:
            limit = kwargs.get("limit", MAX_CONTEXT_MESSAGES)
            return AsyncIteratorMock(messages[:limit])

        thread.history = history
        thread.parent.fetch_message = AsyncMock(
            side_effect=discord.NotFound(MagicMock(), "")
        )

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread

        result = await _build_thread_context(msg, mock_bot_user)

        assert result is not None
        # Should only have MAX_CONTEXT_MESSAGES worth of content

    @pytest.mark.asyncio
    async def test_build_thread_context_includes_starter(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Thread with starter message includes it at beginning."""
        starter = mock_message(
            content="This is the thread starter",
            message_id=666666,
        )

        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.fetch_message = AsyncMock(return_value=starter)

        messages = [
            mock_message(content="Reply 1", message_id=1),
            mock_message(content="Reply 2", message_id=2),
        ]

        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
            return AsyncIteratorMock(messages)

        thread.history = history

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread

        result = await _build_thread_context(msg, mock_bot_user)

        assert result is not None
        assert "thread starter" in result

    @pytest.mark.asyncio
    async def test_build_thread_context_filters_system_messages(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Thread with system messages only includes content messages."""
        messages = [
            mock_message(
                content="Normal message", message_type=discord.MessageType.default
            ),
            mock_message(
                content="", message_type=discord.MessageType.pins_add
            ),  # System
            mock_message(
                content="Another normal", message_type=discord.MessageType.reply
            ),
        ]

        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.fetch_message = AsyncMock(
            side_effect=discord.NotFound(MagicMock(), "")
        )

        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
            return AsyncIteratorMock(messages)

        thread.history = history

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread

        result = await _build_thread_context(msg, mock_bot_user)

        # Should not include system message type
        assert result is not None

    @pytest.mark.asyncio
    async def test_build_thread_context_includes_bot_messages(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Bot messages in thread are included for context."""
        messages = [
            mock_message(content="User question", author_bot=False),
            mock_message(
                content="Bot response",
                author_bot=True,
                author_id=mock_bot_user.id,
                author_display_name="OnyxBot",
            ),
        ]

        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.fetch_message = AsyncMock(
            side_effect=discord.NotFound(MagicMock(), "")
        )

        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
            return AsyncIteratorMock(messages)

        thread.history = history

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread

        result = await _build_thread_context(msg, mock_bot_user)

        assert result is not None
        assert "Bot response" in result

    @pytest.mark.asyncio
    async def test_build_thread_context_empty_thread(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Thread with only system messages returns None."""
        messages = [
            mock_message(content="", message_type=discord.MessageType.pins_add),
        ]

        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.fetch_message = AsyncMock(
            side_effect=discord.NotFound(MagicMock(), "")
        )

        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
            return AsyncIteratorMock(messages)

        thread.history = history

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread

        await _build_thread_context(msg, mock_bot_user)
        # Should return None for empty context
        # (depends on implementation - may return None or empty string)

    @pytest.mark.asyncio
    async def test_build_thread_context_forum_channel(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Thread parent is ForumChannel - does NOT fetch starter message."""
        messages = [
            mock_message(content="Forum reply", message_id=1),
        ]

        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.ForumChannel)  # Forum!
        # Set up mock before calling function so we can verify it wasn't called
        thread.parent.fetch_message = AsyncMock()

        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
            return AsyncIteratorMock(messages)

        thread.history = history

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread

        await _build_thread_context(msg, mock_bot_user)

        # Should not try to fetch starter message for forum channels
        thread.parent.fetch_message.assert_not_called()

    @pytest.mark.asyncio
    async def test_build_thread_context_starter_fetch_fails(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Starter message fetch raises NotFound - continues without starter."""
        messages = [
            mock_message(content="Reply message", message_id=1),
        ]

        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.fetch_message = AsyncMock(
            side_effect=discord.NotFound(MagicMock(), "Not found")
        )

        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
            return AsyncIteratorMock(messages)

        thread.history = history

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread

        result = await _build_thread_context(msg, mock_bot_user)

        # Should still return context without starter
        assert result is not None

    @pytest.mark.asyncio
    async def test_build_thread_context_deduplicates_starter(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Starter also in recent history is not duplicated."""
        starter = mock_message(content="Thread starter", message_id=666666)

        messages = [
            starter,  # Starter in history
            mock_message(content="Reply", message_id=1),
        ]

        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.fetch_message = AsyncMock(return_value=starter)

        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
            return AsyncIteratorMock(messages)

        thread.history = history

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread

        result = await _build_thread_context(msg, mock_bot_user)

        # Should only have starter once
        if result:
            assert (
                result.count("Thread starter") <= 2
            )  # At most once in formatted output


class TestReplyChainContextBuilder:
    """Tests for _build_reply_chain_context function."""

    @pytest.mark.asyncio
    async def test_build_reply_chain_single_reply(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Message replies to one message returns 1 message in chain."""
        parent = mock_message(content="Parent message", message_id=100)
        parent.reference = None

        child = MagicMock(spec=discord.Message)
        child.id = 200
        child.reference = MagicMock()
        child.reference.message_id = 100
        child.channel = MagicMock()
        child.channel.fetch_message = AsyncMock(return_value=parent)
        child.channel.name = "general"

        result = await _build_reply_chain_context(child, mock_bot_user)

        assert result is not None
        assert "Parent message" in result

    @pytest.mark.asyncio
    async def test_build_reply_chain_deep_chain(self, mock_bot_user: MagicMock) -> None:
        """A → B → C → D reply chain returns full chain in chronological order."""
        msg_d = mock_message(content="Message D", message_id=4)
        msg_d.reference = None

        msg_c = mock_message(content="Message C", message_id=3)
        ref_c = MagicMock()
        ref_c.message_id = 4
        msg_c.reference = ref_c

        msg_b = mock_message(content="Message B", message_id=2)
        ref_b = MagicMock()
        ref_b.message_id = 3
        msg_b.reference = ref_b

        # Current message replying to B
        ref_a = MagicMock()
        ref_a.message_id = 2

        msg_a = MagicMock(spec=discord.Message)
        msg_a.id = 1
        msg_a.reference = ref_a
        msg_a.channel = MagicMock()
        msg_a.channel.name = "general"

        # Mock fetch to return the chain
        message_map = {2: msg_b, 3: msg_c, 4: msg_d}

        async def fetch_message(msg_id: int) -> MagicMock:
            if msg_id in message_map:
                return message_map[msg_id]
            raise discord.NotFound(MagicMock(), "Not found")

        msg_a.channel.fetch_message = AsyncMock(side_effect=fetch_message)

        result = await _build_reply_chain_context(msg_a, mock_bot_user)

        assert result is not None
        # Should have all messages from the chain

    @pytest.mark.asyncio
    async def test_build_reply_chain_max_depth(self, mock_bot_user: MagicMock) -> None:
        """Chain depth > MAX_CONTEXT_MESSAGES stops at limit."""
        # Create a chain longer than MAX_CONTEXT_MESSAGES
        messages = {}
        for i in range(MAX_CONTEXT_MESSAGES + 5, 0, -1):
            msg = mock_message(content=f"Message {i}", message_id=i)
            if i < MAX_CONTEXT_MESSAGES + 5:
                ref = MagicMock()
                ref.message_id = i + 1
                msg.reference = ref
            else:
                msg.reference = None
            messages[i] = msg

        # Start from message 1
        start = MagicMock(spec=discord.Message)
        start.id = 0
        start.reference = MagicMock()
        start.reference.message_id = 1
        start.channel = MagicMock()
        start.channel.name = "general"

        async def fetch_message(msg_id: int) -> MagicMock:
            if msg_id in messages:
                return messages[msg_id]
            raise discord.NotFound(MagicMock(), "Not found")

        start.channel.fetch_message = AsyncMock(side_effect=fetch_message)

        result = await _build_reply_chain_context(start, mock_bot_user)

        # Should have at most MAX_CONTEXT_MESSAGES
        assert result is not None

    @pytest.mark.asyncio
    async def test_build_reply_chain_no_reply(self, mock_bot_user: MagicMock) -> None:
        """Message is not a reply returns None."""
        msg = MagicMock(spec=discord.Message)
        msg.reference = None

        result = await _build_reply_chain_context(msg, mock_bot_user)
        assert result is None

    @pytest.mark.asyncio
    async def test_build_reply_chain_deleted_message(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Reply to deleted message handles gracefully with partial chain."""
        msg = MagicMock(spec=discord.Message)
        msg.id = 200
        msg.reference = MagicMock()
        msg.reference.message_id = 100
        msg.channel = MagicMock()
        msg.channel.fetch_message = AsyncMock(
            side_effect=discord.NotFound(MagicMock(), "Not found")
        )
        msg.channel.name = "general"

        await _build_reply_chain_context(msg, mock_bot_user)
        # Should handle gracefully - may return None or partial context
        # Either is acceptable

    @pytest.mark.asyncio
    async def test_build_reply_chain_missing_reference_data(
        self, mock_bot_user: MagicMock
    ) -> None:
        """message.reference.message_id is None returns None."""
        msg = MagicMock(spec=discord.Message)
        msg.reference = MagicMock()
        msg.reference.message_id = None

        result = await _build_reply_chain_context(msg, mock_bot_user)
        assert result is None

    @pytest.mark.asyncio
    async def test_build_reply_chain_http_exception(
        self, mock_bot_user: MagicMock
    ) -> None:
        """discord.HTTPException on fetch stops chain."""
        msg = MagicMock(spec=discord.Message)
        msg.id = 200
        msg.reference = MagicMock()
        msg.reference.message_id = 100
        msg.channel = MagicMock()
        msg.channel.fetch_message = AsyncMock(
            side_effect=discord.HTTPException(MagicMock(), "HTTP error")
        )
        msg.channel.name = "general"

        await _build_reply_chain_context(msg, mock_bot_user)
        # Should handle gracefully


class TestCombinedContext:
    """Tests for combined thread + reply context."""

    @pytest.mark.asyncio
    async def test_combined_context_thread_with_reply(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Reply inside thread includes both contexts."""
        # Create a thread with messages
        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.fetch_message = AsyncMock(
            side_effect=discord.NotFound(MagicMock(), "")
        )

        # Thread history
        thread_messages = [
            mock_message(content="Thread msg 1", message_id=1),
            mock_message(content="Thread msg 2", message_id=2),
        ]

        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
            return AsyncIteratorMock(thread_messages)

        thread.history = history

        # Message is a reply to another message in the thread
        parent_msg = mock_message(content="Parent message", message_id=2)
        parent_msg.reference = None

        ref = MagicMock()
        ref.message_id = 2

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread
        msg.reference = ref
        msg.channel.fetch_message = AsyncMock(return_value=parent_msg)
        msg.channel.name = "test-thread"

        result = await _build_conversation_context(msg, mock_bot_user)

        # Should have context from the thread
        assert result is not None
        assert "Conversation history" in result

    @pytest.mark.asyncio
    async def test_build_conversation_context_routes_to_thread(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Message in thread routes to _build_thread_context."""
        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.fetch_message = AsyncMock(
            side_effect=discord.NotFound(MagicMock(), "")
        )

        messages = [mock_message(content="Thread msg")]

        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001
            return AsyncIteratorMock(messages)

        thread.history = history

        msg = MagicMock(spec=discord.Message)
        msg.id = 999
        msg.channel = thread
        msg.reference = None

        result = await _build_conversation_context(msg, mock_bot_user)
        assert result is not None

    @pytest.mark.asyncio
    async def test_build_conversation_context_routes_to_reply(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Message with reference routes to _build_reply_chain_context."""
        parent = mock_message(content="Parent", message_id=100)
        parent.reference = None

        msg = MagicMock(spec=discord.Message)
        msg.id = 200
        msg.channel = MagicMock(spec=discord.TextChannel)  # Not a thread
        msg.reference = MagicMock()
        msg.reference.message_id = 100
        msg.channel.fetch_message = AsyncMock(return_value=parent)
        msg.channel.name = "general"

        result = await _build_conversation_context(msg, mock_bot_user)
        assert result is not None


class TestContextFormatting:
    """Tests for context formatting."""

    def test_format_message_content_mentions(self) -> None:
        """Messages with <@123> mentions are converted to @username."""
        msg = MagicMock(spec=discord.Message)
        msg.content = "Hello <@123456789> how are you?"

        user = MagicMock()
        user.id = 123456789
        user.display_name = "TestUser"
        msg.mentions = [user]
        msg.role_mentions = []
        msg.channel_mentions = []

        result = format_message_content(msg)
        assert "@TestUser" in result
        assert "<@123456789>" not in result

    def test_format_message_content_roles(self) -> None:
        """Messages with <@&456> roles are converted to @rolename."""
        msg = MagicMock(spec=discord.Message)
        msg.content = "Attention <@&456789> members"

        role = MagicMock()
        role.id = 456789
        role.name = "Moderators"
        msg.mentions = []
        msg.role_mentions = [role]
        msg.channel_mentions = []

        result = format_message_content(msg)
        assert "@Moderators" in result
        assert "<@&456789>" not in result

    def test_format_message_content_channels(self) -> None:
        """Messages with <#789> channels are converted to #channelname."""
        msg = MagicMock(spec=discord.Message)
        msg.content = "Check out <#789012>"

        channel = MagicMock()
        channel.id = 789012
        channel.name = "announcements"
        msg.mentions = []
        msg.role_mentions = []
        msg.channel_mentions = [channel]

        result = format_message_content(msg)
        assert "#announcements" in result
        assert "<#789012>" not in result

    def test_context_format_output(self, mock_bot_user: MagicMock) -> None:
        """Build full context has expected format."""
        messages: list[Any] = [
            mock_message(content="Hello bot", author_bot=False),
        ]
        messages[0].type = discord.MessageType.default

        result = _format_messages_as_context(messages, mock_bot_user)

        assert result is not None
        assert "Conversation history" in result
        assert "---" in result

    def test_context_format_with_username(self, mock_bot_user: MagicMock) -> None:
        """Messages from users include @username: prefix."""
        msg = mock_message(content="User message", author_bot=False)
        msg.author.display_name = "TestUser"
        msg.type = discord.MessageType.default

        result = _format_messages_as_context([msg], mock_bot_user)

        assert result is not None
        assert "@TestUser:" in result

    def test_context_format_bot_marker(self, mock_bot_user: MagicMock) -> None:
        """Bot messages in context are marked as OnyxBot:."""
        msg = mock_message(
            content="Bot response",
            author_bot=True,
            author_id=mock_bot_user.id,
        )
        msg.type = discord.MessageType.default

        result = _format_messages_as_context([msg], mock_bot_user)

        assert result is not None
        assert "OnyxBot:" in result


================================================
FILE: backend/tests/unit/onyx/onyxbot/discord/test_discord_utils.py
================================================
"""Unit tests for Discord bot utilities.

Tests for:
- Token management (get_bot_token)
- Registration key parsing (parse_discord_registration_key, generate_discord_registration_key)
"""

from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.onyxbot.discord.utils import get_bot_token
from onyx.server.manage.discord_bot.utils import generate_discord_registration_key
from onyx.server.manage.discord_bot.utils import parse_discord_registration_key
from onyx.server.manage.discord_bot.utils import REGISTRATION_KEY_PREFIX


class TestGetBotToken:
    """Tests for get_bot_token function."""

    def test_get_token_from_env(self) -> None:
        """When env var is set, returns env var."""
        with patch("onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN", "env_token_123"):
            result = get_bot_token()
            assert result == "env_token_123"

    def test_get_token_from_db(self) -> None:
        """When no env var and DB config exists, returns DB token."""
        mock_config = MagicMock()
        mock_config.bot_token = "db_token_456"

        with (
            patch("onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN", None),
            patch("onyx.onyxbot.discord.utils.AUTH_TYPE", "basic"),  # Not CLOUD
            patch("onyx.onyxbot.discord.utils.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.utils.get_discord_bot_config",
                return_value=mock_config,
            ),
        ):
            mock_session.return_value.__enter__ = MagicMock()
            mock_session.return_value.__exit__ = MagicMock()
            result = get_bot_token()
            assert result == "db_token_456"

    def test_get_token_none(self) -> None:
        """When no env var and no DB config, returns None."""
        with (
            patch("onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN", None),
            patch("onyx.onyxbot.discord.utils.AUTH_TYPE", "basic"),  # Not CLOUD
            patch("onyx.onyxbot.discord.utils.get_session_with_tenant") as mock_session,
            patch(
                "onyx.onyxbot.discord.utils.get_discord_bot_config",
                return_value=None,
            ),
        ):
            mock_session.return_value.__enter__ = MagicMock()
            mock_session.return_value.__exit__ = MagicMock()
            result = get_bot_token()
            assert result is None

    def test_get_token_env_priority(self) -> None:
        """When both env var and DB exist, env var takes priority."""
        mock_config = MagicMock()
        mock_config.bot_token = "db_token_456"

        with (
            patch("onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN", "env_token_123"),
            patch(
                "onyx.onyxbot.discord.utils.get_discord_bot_config",
                return_value=mock_config,
            ),
        ):
            result = get_bot_token()
            # Should return env var, not DB token
            assert result == "env_token_123"


class TestParseRegistrationKey:
    """Tests for parse_discord_registration_key function."""

    def test_parse_registration_key_valid(self) -> None:
        """Valid key format returns tenant_id."""
        key = "discord_tenant123.randomtoken"
        result = parse_discord_registration_key(key)
        assert result == "tenant123"

    def test_parse_registration_key_invalid(self) -> None:
        """Malformed key returns None."""
        result = parse_discord_registration_key("malformed_key")
        assert result is None

    def test_parse_registration_key_missing_prefix(self) -> None:
        """Key without 'discord_' prefix returns None."""
        key = "tenant123.randomtoken"
        result = parse_discord_registration_key(key)
        assert result is None

    def test_parse_registration_key_missing_dot(self) -> None:
        """Key without separator '.' returns None."""
        key = "discord_tenant123randomtoken"
        result = parse_discord_registration_key(key)
        assert result is None

    def test_parse_registration_key_empty_token(self) -> None:
        """Key with empty token part returns None."""
        # This test verifies behavior with empty token after dot
        key = "discord_tenant123."
        result = parse_discord_registration_key(key)
        # Current implementation allows empty token, but returns tenant
        # If this should be invalid, update the implementation
        assert result == "tenant123" or result is None

    def test_parse_registration_key_url_encoded_tenant(self) -> None:
        """Tenant ID with URL encoding is decoded correctly."""
        # URL encoded "my tenant" -> "my%20tenant"
        key = "discord_my%20tenant.randomtoken"
        result = parse_discord_registration_key(key)
        assert result == "my tenant"

    def test_parse_registration_key_special_chars(self) -> None:
        """Key with special characters in tenant ID."""
        # Tenant with slashes (URL encoded)
        key = "discord_tenant%2Fwith%2Fslashes.randomtoken"
        result = parse_discord_registration_key(key)
        assert result == "tenant/with/slashes"


class TestGenerateRegistrationKey:
    """Tests for generate_discord_registration_key function."""

    def test_generate_registration_key(self) -> None:
        """Generated key has correct format."""
        key = generate_discord_registration_key("tenant123")

        assert key.startswith(REGISTRATION_KEY_PREFIX)
        assert "tenant123" in key
        assert "." in key

        # Parse it back to verify round-trip
        parsed = parse_discord_registration_key(key)
        assert parsed == "tenant123"

    def test_generate_registration_key_unique(self) -> None:
        """Each generated key is unique."""
        keys = [generate_discord_registration_key("tenant123") for _ in range(10)]
        assert len(set(keys)) == 10  # All unique

    def test_generate_registration_key_special_tenant(self) -> None:
        """Key generation handles special characters in tenant ID."""
        key = generate_discord_registration_key("my tenant/id")

        # Should be URL encoded
        assert "%20" in key or "%2F" in key

        # Parse it back
        parsed = parse_discord_registration_key(key)
        assert parsed == "my tenant/id"


================================================
FILE: backend/tests/unit/onyx/onyxbot/discord/test_message_utils.py
================================================
"""Unit tests for Discord bot message utilities.

Tests for:
- Message splitting (_split_message)
- Citation formatting (_append_citations)
"""

from unittest.mock import MagicMock

from onyx.chat.models import ChatFullResponse
from onyx.onyxbot.discord.constants import MAX_MESSAGE_LENGTH
from onyx.onyxbot.discord.handle_message import _append_citations
from onyx.onyxbot.discord.handle_message import _split_message


class TestSplitMessage:
    """Tests for _split_message function."""

    def test_split_message_under_limit(self) -> None:
        """Message under 2000 chars returns single chunk."""
        content = "x" * 1999
        chunks = _split_message(content)
        assert len(chunks) == 1
        assert chunks[0] == content

    def test_split_message_at_limit(self) -> None:
        """Message exactly at 2000 chars returns single chunk."""
        content = "x" * MAX_MESSAGE_LENGTH
        chunks = _split_message(content)
        assert len(chunks) == 1
        assert chunks[0] == content

    def test_split_message_over_limit(self) -> None:
        """Message over 2000 chars splits into multiple chunks."""
        content = "x" * 2001
        chunks = _split_message(content)
        assert len(chunks) == 2
        # All chunks should be <= MAX_MESSAGE_LENGTH
        for chunk in chunks:
            assert len(chunk) <= MAX_MESSAGE_LENGTH

    def test_split_at_double_newline(self) -> None:
        """Prefers splitting at double newline."""
        # Create content with double newline near the end but before limit
        first_part = "x" * 1500
        second_part = "y" * 1000
        content = f"{first_part}\n\n{second_part}"

        chunks = _split_message(content)
        assert len(chunks) == 2
        # First chunk should end with or right after the double newline
        assert chunks[0].endswith("\n\n") or first_part in chunks[0]

    def test_split_at_single_newline(self) -> None:
        """When no double newline, splits at single newline."""
        first_part = "x" * 1500
        second_part = "y" * 1000
        content = f"{first_part}\n{second_part}"

        chunks = _split_message(content)
        assert len(chunks) == 2

    def test_split_at_period_space(self) -> None:
        """When no newlines, splits at '. ' (period + space)."""
        first_part = "x" * 1500
        second_part = "y" * 1000
        content = f"{first_part}. {second_part}"

        chunks = _split_message(content)
        assert len(chunks) == 2
        # First chunk should include the period
        assert chunks[0].endswith(". ") or chunks[0].endswith(".")

    def test_split_at_space(self) -> None:
        """When no better breakpoints, splits at space."""
        first_part = "x" * 1500
        second_part = "y" * 1000
        content = f"{first_part} {second_part}"

        chunks = _split_message(content)
        assert len(chunks) == 2

    def test_split_no_breakpoint(self) -> None:
        """Handles gracefully when no breakpoints available (hard split)."""
        # 2001 chars with no spaces or newlines
        content = "x" * 2001
        chunks = _split_message(content)
        assert len(chunks) == 2
        # Content should be preserved
        assert "".join(chunks) == content

    def test_split_threshold_50_percent(self) -> None:
        """Breakpoint at less than 50% of limit is skipped."""
        # Put a breakpoint early (at 40% = 800 chars)
        # and another late (at 80% = 1600 chars)
        early_part = "x" * 800
        middle_part = "m" * 800  # Total: 1600
        late_part = "y" * 600  # Total: 2200
        content = f"{early_part}\n\n{middle_part}\n\n{late_part}"

        chunks = _split_message(content)
        # Should prefer the later breakpoint over the 40% one
        assert len(chunks) == 2
        # First chunk should be longer than 800 chars
        assert len(chunks[0]) > 800

    def test_split_multiple_chunks(self) -> None:
        """5000 char message splits into 3 chunks."""
        content = "x" * 5000
        chunks = _split_message(content)
        assert len(chunks) == 3
        # Each chunk should be <= MAX_MESSAGE_LENGTH
        for chunk in chunks:
            assert len(chunk) <= MAX_MESSAGE_LENGTH

    def test_split_preserves_content(self) -> None:
        """Concatenated chunks equal original content."""
        content = "Hello world! " * 200  # About 2600 chars
        chunks = _split_message(content)
        assert "".join(chunks) == content

    def test_split_with_unicode(self) -> None:
        """Handles unicode characters correctly."""
        # Mix of ASCII and unicode
        content = "Hello " + "🎉" * 500 + " World " + "x" * 1500
        chunks = _split_message(content)
        # Should not break in the middle of emoji
        assert "".join(chunks) == content


class TestAppendCitations:
    """Tests for _append_citations function."""

    def _make_response(
        self,
        answer: str,
        citations: list[dict] | None = None,
        documents: list[dict] | None = None,
    ) -> ChatFullResponse:
        """Helper to create ChatFullResponse with citations."""
        response = MagicMock(spec=ChatFullResponse)
        response.answer = answer

        if citations:
            citation_mocks = []
            for c in citations:
                cm = MagicMock()
                cm.citation_number = c.get("num", 1)
                cm.document_id = c.get("doc_id", "doc1")
                citation_mocks.append(cm)
            response.citation_info = citation_mocks
        else:
            response.citation_info = None

        if documents:
            doc_mocks = []
            for d in documents:
                dm = MagicMock()
                dm.document_id = d.get("doc_id", "doc1")
                dm.semantic_identifier = d.get("name", "Source")
                dm.link = d.get("link")
                doc_mocks.append(dm)
            response.top_documents = doc_mocks
        else:
            response.top_documents = None

        return response

    def test_format_citations_empty_list(self) -> None:
        """No citations returns answer unchanged."""
        response = self._make_response("Test answer")
        result = _append_citations("Test answer", response)
        assert result == "Test answer"
        assert "Sources:" not in result

    def test_format_citations_single(self) -> None:
        """Single citation is formatted correctly."""
        response = self._make_response(
            "Test answer",
            citations=[{"num": 1, "doc_id": "doc1"}],
            documents=[
                {
                    "doc_id": "doc1",
                    "name": "Document One",
                    "link": "https://example.com",
                }
            ],
        )
        result = _append_citations("Test answer", response)
        assert "**Sources:**" in result
        assert "[Document One](<https://example.com>)" in result

    def test_format_citations_multiple(self) -> None:
        """Multiple citations are all formatted and numbered."""
        response = self._make_response(
            "Test answer",
            citations=[
                {"num": 1, "doc_id": "doc1"},
                {"num": 2, "doc_id": "doc2"},
                {"num": 3, "doc_id": "doc3"},
            ],
            documents=[
                {"doc_id": "doc1", "name": "Doc 1", "link": "https://example.com/1"},
                {"doc_id": "doc2", "name": "Doc 2", "link": "https://example.com/2"},
                {"doc_id": "doc3", "name": "Doc 3", "link": "https://example.com/3"},
            ],
        )
        result = _append_citations("Test answer", response)
        assert "1. [Doc 1]" in result
        assert "2. [Doc 2]" in result
        assert "3. [Doc 3]" in result

    def test_format_citations_max_five(self) -> None:
        """Only first 5 citations are included."""
        citations = [{"num": i, "doc_id": f"doc{i}"} for i in range(1, 11)]
        documents = [
            {
                "doc_id": f"doc{i}",
                "name": f"Doc {i}",
                "link": f"https://example.com/{i}",
            }
            for i in range(1, 11)
        ]
        response = self._make_response(
            "Test answer", citations=citations, documents=documents
        )
        result = _append_citations("Test answer", response)

        # Should have 5 citations
        assert "1. [Doc 1]" in result
        assert "5. [Doc 5]" in result
        # Should NOT have 6th citation
        assert "6. [Doc 6]" not in result

    def test_format_citation_no_link(self) -> None:
        """Citation without link formats as plain text (no markdown)."""
        response = self._make_response(
            "Test answer",
            citations=[{"num": 1, "doc_id": "doc1"}],
            documents=[{"doc_id": "doc1", "name": "No Link Doc", "link": None}],
        )
        result = _append_citations("Test answer", response)
        assert "1. No Link Doc" in result
        # Should not have markdown link syntax
        assert "[No Link Doc](<" not in result

    def test_format_citation_empty_name(self) -> None:
        """Empty semantic_identifier defaults to 'Source'."""
        response = self._make_response(
            "Test answer",
            citations=[{"num": 1, "doc_id": "doc1"}],
            documents=[{"doc_id": "doc1", "name": "", "link": "https://example.com"}],
        )
        result = _append_citations("Test answer", response)
        # Should use fallback "Source" name
        assert "[Source]" in result or "Source" in result

    def test_format_citation_link_with_brackets(self) -> None:
        """Link with special characters is wrapped with angle brackets."""
        response = self._make_response(
            "Test answer",
            citations=[{"num": 1, "doc_id": "doc1"}],
            documents=[
                {
                    "doc_id": "doc1",
                    "name": "Special Doc",
                    "link": "https://example.com/path?query=value&other=123",
                }
            ],
        )
        result = _append_citations("Test answer", response)
        # Discord markdown uses <link> to prevent embed
        assert "(<https://example.com" in result

    def test_format_citations_sorted_by_number(self) -> None:
        """Citations are sorted by citation number."""
        # Add in reverse order
        response = self._make_response(
            "Test answer",
            citations=[
                {"num": 3, "doc_id": "doc3"},
                {"num": 1, "doc_id": "doc1"},
                {"num": 2, "doc_id": "doc2"},
            ],
            documents=[
                {"doc_id": "doc1", "name": "Doc 1", "link": "https://example.com/1"},
                {"doc_id": "doc2", "name": "Doc 2", "link": "https://example.com/2"},
                {"doc_id": "doc3", "name": "Doc 3", "link": "https://example.com/3"},
            ],
        )
        result = _append_citations("Test answer", response)

        # Find positions
        pos1 = result.find("1. [Doc 1]")
        pos2 = result.find("2. [Doc 2]")
        pos3 = result.find("3. [Doc 3]")

        # Should be in order
        assert pos1 < pos2 < pos3

    def test_format_citations_with_missing_document(self) -> None:
        """Citation referencing non-existent document is skipped."""
        response = self._make_response(
            "Test answer",
            citations=[
                {"num": 1, "doc_id": "doc1"},
                {"num": 2, "doc_id": "doc_missing"},  # No matching document
            ],
            documents=[
                {"doc_id": "doc1", "name": "Doc 1", "link": "https://example.com/1"},
            ],
        )
        result = _append_citations("Test answer", response)
        assert "Doc 1" in result
        # Missing doc should not appear
        assert "doc_missing" not in result.lower()


================================================
FILE: backend/tests/unit/onyx/onyxbot/discord/test_should_respond.py
================================================
"""Unit tests for Discord bot should_respond logic.

Tests the decision tree for when the bot should respond to messages.
"""

from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch

import discord
import pytest

from onyx.onyxbot.discord.handle_message import check_implicit_invocation
from onyx.onyxbot.discord.handle_message import should_respond


class TestBasicShouldRespond:
    """Tests for basic should_respond decision logic."""

    @pytest.mark.asyncio
    async def test_should_respond_guild_disabled(
        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """Guild config enabled=false returns False."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = False

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with patch(
                "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                return_value=mock_guild_config,
            ):
                result = await should_respond(
                    mock_discord_message, "tenant1", mock_bot_user
                )

        assert result.should_respond is False

    @pytest.mark.asyncio
    async def test_should_respond_guild_enabled(
        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """Guild config enabled=true proceeds to channel check."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True
        mock_guild_config.default_persona_id = 1

        mock_channel_config = MagicMock()
        mock_channel_config.enabled = True
        mock_channel_config.require_bot_invocation = False
        mock_channel_config.thread_only_mode = False
        mock_channel_config.persona_override_id = None

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=mock_channel_config,
                ),
            ):
                result = await should_respond(
                    mock_discord_message, "tenant1", mock_bot_user
                )

        assert result.should_respond is True

    @pytest.mark.asyncio
    async def test_should_respond_channel_disabled(
        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """Channel config enabled=false returns False."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True

        mock_channel_config = MagicMock()
        mock_channel_config.enabled = False

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=mock_channel_config,
                ),
            ):
                result = await should_respond(
                    mock_discord_message, "tenant1", mock_bot_user
                )

        assert result.should_respond is False

    @pytest.mark.asyncio
    async def test_should_respond_channel_enabled(
        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """Channel config enabled=true proceeds to mention check."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True
        mock_guild_config.default_persona_id = 2

        mock_channel_config = MagicMock()
        mock_channel_config.enabled = True
        mock_channel_config.require_bot_invocation = False
        mock_channel_config.thread_only_mode = False
        mock_channel_config.persona_override_id = None

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=mock_channel_config,
                ),
            ):
                result = await should_respond(
                    mock_discord_message, "tenant1", mock_bot_user
                )

        assert result.should_respond is True
        assert result.persona_id == 2

    @pytest.mark.asyncio
    async def test_should_respond_channel_not_found(
        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """No channel config returns False (not whitelisted)."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=None,  # No config
                ),
            ):
                result = await should_respond(
                    mock_discord_message, "tenant1", mock_bot_user
                )

        assert result.should_respond is False

    @pytest.mark.asyncio
    async def test_should_respond_require_mention_true_no_mention(
        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """require_bot_invocation=true with no @mention returns False."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True
        mock_guild_config.default_persona_id = 1

        mock_channel_config = MagicMock()
        mock_channel_config.enabled = True
        mock_channel_config.require_bot_invocation = True
        mock_channel_config.thread_only_mode = False
        mock_channel_config.persona_override_id = None

        # No bot mention
        mock_discord_message.mentions = []

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=mock_channel_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.check_implicit_invocation",
                    return_value=False,
                ),
            ):
                result = await should_respond(
                    mock_discord_message, "tenant1", mock_bot_user
                )

        assert result.should_respond is False

    @pytest.mark.asyncio
    async def test_should_respond_require_mention_true_with_mention(
        self, mock_message_with_bot_mention: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """require_bot_invocation=true with @mention returns True."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True
        mock_guild_config.default_persona_id = 1

        mock_channel_config = MagicMock()
        mock_channel_config.enabled = True
        mock_channel_config.require_bot_invocation = True
        mock_channel_config.thread_only_mode = False
        mock_channel_config.persona_override_id = None

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=mock_channel_config,
                ),
            ):
                result = await should_respond(
                    mock_message_with_bot_mention, "tenant1", mock_bot_user
                )

        assert result.should_respond is True

    @pytest.mark.asyncio
    async def test_should_respond_require_mention_false_no_mention(
        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """require_bot_invocation=false with no @mention returns True."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True
        mock_guild_config.default_persona_id = 1

        mock_channel_config = MagicMock()
        mock_channel_config.enabled = True
        mock_channel_config.require_bot_invocation = False
        mock_channel_config.thread_only_mode = False
        mock_channel_config.persona_override_id = None

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=mock_channel_config,
                ),
            ):
                result = await should_respond(
                    mock_discord_message, "tenant1", mock_bot_user
                )

        assert result.should_respond is True


class TestImplicitShouldRespond:
    """Tests for implicit invocation (no @mention required in certain contexts)."""

    @pytest.mark.asyncio
    async def test_implicit_respond_reply_to_bot_message(
        self, mock_bot_user: MagicMock
    ) -> None:
        """User replies to a bot message returns True."""
        # Create a message that replies to the bot
        msg = MagicMock(spec=discord.Message)
        msg.reference = MagicMock()
        msg.reference.message_id = 12345

        # Mock the referenced message as a bot message
        referenced_msg = MagicMock()
        referenced_msg.author.id = mock_bot_user.id

        msg.channel = MagicMock()
        msg.channel.fetch_message = AsyncMock(return_value=referenced_msg)

        result = await check_implicit_invocation(msg, mock_bot_user)
        assert result is True

    @pytest.mark.asyncio
    async def test_implicit_respond_reply_to_user_message(
        self, mock_bot_user: MagicMock
    ) -> None:
        """User replies to another user's message returns False."""
        msg = MagicMock(spec=discord.Message)
        msg.reference = MagicMock()
        msg.reference.message_id = 12345

        # Mock the referenced message as a user message
        referenced_msg = MagicMock()
        referenced_msg.author.id = 999999  # Different from bot

        msg.channel = MagicMock()
        msg.channel.fetch_message = AsyncMock(return_value=referenced_msg)

        result = await check_implicit_invocation(msg, mock_bot_user)
        assert result is False

    @pytest.mark.asyncio
    async def test_implicit_respond_in_bot_owned_thread(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Message in thread owned by bot returns True."""
        thread = MagicMock(spec=discord.Thread)
        thread.owner_id = mock_bot_user.id  # Bot owns the thread
        thread.parent = MagicMock(spec=discord.TextChannel)

        msg = MagicMock(spec=discord.Message)
        msg.reference = None
        msg.channel = thread

        result = await check_implicit_invocation(msg, mock_bot_user)
        assert result is True

    @pytest.mark.asyncio
    async def test_implicit_respond_in_user_owned_thread(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Message in thread owned by user returns False."""
        thread = MagicMock(spec=discord.Thread)
        thread.owner_id = 999999  # User owns the thread
        thread.parent = MagicMock(spec=discord.TextChannel)

        msg = MagicMock(spec=discord.Message)
        msg.reference = None
        msg.channel = thread

        result = await check_implicit_invocation(msg, mock_bot_user)
        assert result is False

    @pytest.mark.asyncio
    async def test_implicit_respond_reply_in_bot_thread(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Reply to user in bot-owned thread returns True (thread context)."""
        thread = MagicMock(spec=discord.Thread)
        thread.owner_id = mock_bot_user.id
        thread.parent = MagicMock(spec=discord.TextChannel)

        # User replying to another user in bot's thread
        referenced_msg = MagicMock()
        referenced_msg.author.id = 888888  # Another user

        msg = MagicMock(spec=discord.Message)
        msg.reference = MagicMock()
        msg.reference.message_id = 12345
        msg.channel = thread
        msg.channel.fetch_message = AsyncMock(return_value=referenced_msg)

        result = await check_implicit_invocation(msg, mock_bot_user)
        # Should return True because it's in bot's thread
        assert result is True

    @pytest.mark.asyncio
    async def test_implicit_respond_thread_from_bot_message(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Thread created from bot message (non-forum) returns True."""
        thread = MagicMock(spec=discord.Thread)
        thread.id = 777777
        thread.owner_id = 999999  # User owns thread but...
        thread.parent = MagicMock(spec=discord.TextChannel)

        # The starter message is from the bot
        starter_msg = MagicMock()
        starter_msg.author.id = mock_bot_user.id
        thread.parent.fetch_message = AsyncMock(return_value=starter_msg)

        msg = MagicMock(spec=discord.Message)
        msg.reference = None
        msg.channel = thread

        result = await check_implicit_invocation(msg, mock_bot_user)
        assert result is True

    @pytest.mark.asyncio
    async def test_implicit_respond_forum_channel_excluded(
        self, mock_bot_user: MagicMock, mock_thread_forum_parent: MagicMock
    ) -> None:
        """Thread parent is ForumChannel - does NOT check starter message."""
        msg = MagicMock(spec=discord.Message)
        msg.reference = None
        msg.channel = mock_thread_forum_parent
        mock_thread_forum_parent.owner_id = 999999  # Not bot

        result = await check_implicit_invocation(msg, mock_bot_user)
        # Should be False - forum threads don't use starter message check
        assert result is False

    @pytest.mark.asyncio
    async def test_implicit_respond_combined_with_mention(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Has @mention AND is implicit - should return True (either works)."""
        thread = MagicMock(spec=discord.Thread)
        thread.owner_id = mock_bot_user.id
        thread.parent = MagicMock(spec=discord.TextChannel)

        msg = MagicMock(spec=discord.Message)
        msg.reference = None
        msg.channel = thread
        msg.mentions = [mock_bot_user]

        result = await check_implicit_invocation(msg, mock_bot_user)
        assert result is True

    @pytest.mark.asyncio
    async def test_implicit_respond_reference_fetch_fails(
        self, mock_bot_user: MagicMock
    ) -> None:
        """discord.NotFound when fetching reply reference returns False."""
        msg = MagicMock(spec=discord.Message)
        msg.reference = MagicMock()
        msg.reference.message_id = 12345
        msg.channel = MagicMock()
        msg.channel.fetch_message = AsyncMock(
            side_effect=discord.NotFound(MagicMock(), "Not found")
        )

        result = await check_implicit_invocation(msg, mock_bot_user)
        assert result is False

    @pytest.mark.asyncio
    async def test_implicit_respond_http_exception(
        self, mock_bot_user: MagicMock
    ) -> None:
        """discord.HTTPException during check returns False."""
        msg = MagicMock(spec=discord.Message)
        msg.reference = MagicMock()
        msg.reference.message_id = 12345
        msg.channel = MagicMock()
        msg.channel.fetch_message = AsyncMock(
            side_effect=discord.HTTPException(MagicMock(), "HTTP error")
        )

        result = await check_implicit_invocation(msg, mock_bot_user)
        assert result is False


class TestThreadOnlyMode:
    """Tests for thread_only_mode behavior."""

    @pytest.mark.asyncio
    async def test_thread_only_mode_message_in_thread(
        self, mock_bot_user: MagicMock
    ) -> None:
        """thread_only_mode=true, message in thread returns True."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True
        mock_guild_config.default_persona_id = 1

        mock_channel_config = MagicMock()
        mock_channel_config.enabled = True
        mock_channel_config.require_bot_invocation = False
        mock_channel_config.thread_only_mode = True
        mock_channel_config.persona_override_id = None

        # Create thread message
        thread = MagicMock(spec=discord.Thread)
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.id = 111111111

        msg = MagicMock(spec=discord.Message)
        msg.guild = MagicMock()
        msg.guild.id = 987654321
        msg.channel = thread
        msg.mentions = []
        msg.reference = None

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=mock_channel_config,
                ),
            ):
                result = await should_respond(msg, "tenant1", mock_bot_user)

        assert result.should_respond is True
        assert result.thread_only_mode is True

    @pytest.mark.asyncio
    async def test_thread_only_mode_false_message_in_channel(
        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock
    ) -> None:
        """thread_only_mode=false, message in channel returns True."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True
        mock_guild_config.default_persona_id = 1

        mock_channel_config = MagicMock()
        mock_channel_config.enabled = True
        mock_channel_config.require_bot_invocation = False
        mock_channel_config.thread_only_mode = False
        mock_channel_config.persona_override_id = None

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=mock_channel_config,
                ),
            ):
                result = await should_respond(
                    mock_discord_message, "tenant1", mock_bot_user
                )

        assert result.should_respond is True
        assert result.thread_only_mode is False


class TestEdgeCases:
    """Edge case tests for should_respond."""

    @pytest.mark.asyncio
    async def test_should_respond_no_guild(self, mock_bot_user: MagicMock) -> None:
        """Message without guild (DM) returns False."""
        msg = MagicMock(spec=discord.Message)
        msg.guild = None

        result = await should_respond(msg, "tenant1", mock_bot_user)
        assert result.should_respond is False

    @pytest.mark.asyncio
    async def test_should_respond_thread_uses_parent_channel_config(
        self, mock_bot_user: MagicMock
    ) -> None:
        """Thread under channel uses parent channel's config."""
        mock_guild_config = MagicMock()
        mock_guild_config.enabled = True
        mock_guild_config.default_persona_id = 1

        mock_channel_config = MagicMock()
        mock_channel_config.enabled = True
        mock_channel_config.require_bot_invocation = False
        mock_channel_config.thread_only_mode = False
        mock_channel_config.persona_override_id = 5  # Specific persona

        # Create thread message
        thread = MagicMock(spec=discord.Thread)
        thread.id = 666666
        thread.parent = MagicMock(spec=discord.TextChannel)
        thread.parent.id = 111111111  # Parent channel ID

        msg = MagicMock(spec=discord.Message)
        msg.guild = MagicMock()
        msg.guild.id = 987654321
        msg.channel = thread
        msg.mentions = []
        msg.reference = None

        with patch(
            "onyx.onyxbot.discord.handle_message.get_session_with_tenant"
        ) as mock_session:
            mock_db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)
            mock_session.return_value.__exit__ = MagicMock()

            with (
                patch(
                    "onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id",
                    return_value=mock_guild_config,
                ),
                patch(
                    "onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids",
                    return_value=mock_channel_config,
                ),
            ):
                result = await should_respond(msg, "tenant1", mock_bot_user)

        assert result.should_respond is True
        # Should use parent's persona override
        assert result.persona_id == 5


================================================
FILE: backend/tests/unit/onyx/onyxbot/test_handle_regular_answer.py
================================================
"""Tests for Slack channel reference resolution and tag filtering
in handle_regular_answer.py."""

from unittest.mock import MagicMock

from slack_sdk.errors import SlackApiError

from onyx.context.search.models import Tag
from onyx.onyxbot.slack.constants import SLACK_CHANNEL_REF_PATTERN
from onyx.onyxbot.slack.handlers.handle_regular_answer import resolve_channel_references


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _mock_client_with_channels(
    channel_map: dict[str, str],
) -> MagicMock:
    """Return a mock WebClient where conversations_info resolves IDs to names."""
    client = MagicMock()

    def _conversations_info(channel: str) -> MagicMock:
        if channel in channel_map:
            resp = MagicMock()
            resp.validate = MagicMock()
            resp.__getitem__ = lambda _self, key: {
                "channel": {
                    "name": channel_map[channel],
                    "is_im": False,
                    "is_mpim": False,
                }
            }[key]
            return resp
        raise SlackApiError("channel_not_found", response=MagicMock())

    client.conversations_info = _conversations_info
    return client


def _mock_logger() -> MagicMock:
    return MagicMock()


# ---------------------------------------------------------------------------
# SLACK_CHANNEL_REF_PATTERN regex tests
# ---------------------------------------------------------------------------


class TestSlackChannelRefPattern:
    def test_matches_bare_channel_id(self) -> None:
        matches = SLACK_CHANNEL_REF_PATTERN.findall("<#C097NBWMY8Y>")
        assert matches == [("C097NBWMY8Y", "")]

    def test_matches_channel_id_with_name(self) -> None:
        matches = SLACK_CHANNEL_REF_PATTERN.findall("<#C097NBWMY8Y|eng-infra>")
        assert matches == [("C097NBWMY8Y", "eng-infra")]

    def test_matches_multiple_channels(self) -> None:
        msg = "compare <#C111AAA> and <#C222BBB|general>"
        matches = SLACK_CHANNEL_REF_PATTERN.findall(msg)
        assert len(matches) == 2
        assert ("C111AAA", "") in matches
        assert ("C222BBB", "general") in matches

    def test_no_match_on_plain_text(self) -> None:
        matches = SLACK_CHANNEL_REF_PATTERN.findall("no channels here")
        assert matches == []

    def test_no_match_on_user_mention(self) -> None:
        matches = SLACK_CHANNEL_REF_PATTERN.findall("<@U12345>")
        assert matches == []


# ---------------------------------------------------------------------------
# resolve_channel_references tests
# ---------------------------------------------------------------------------


class TestResolveChannelReferences:
    def test_resolves_bare_channel_id_via_api(self) -> None:
        client = _mock_client_with_channels({"C097NBWMY8Y": "eng-infra"})
        logger = _mock_logger()

        message, tags = resolve_channel_references(
            message="summary of <#C097NBWMY8Y> this week",
            client=client,
            logger=logger,
        )

        assert message == "summary of #eng-infra this week"
        assert len(tags) == 1
        assert tags[0] == Tag(tag_key="Channel", tag_value="eng-infra")

    def test_uses_name_from_pipe_format_without_api_call(self) -> None:
        client = MagicMock()
        logger = _mock_logger()

        message, tags = resolve_channel_references(
            message="check <#C097NBWMY8Y|eng-infra> for updates",
            client=client,
            logger=logger,
        )

        assert message == "check #eng-infra for updates"
        assert tags == [Tag(tag_key="Channel", tag_value="eng-infra")]
        # Should NOT have called the API since name was in the markup
        client.conversations_info.assert_not_called()

    def test_multiple_channels(self) -> None:
        client = _mock_client_with_channels(
            {
                "C111AAA": "eng-infra",
                "C222BBB": "eng-general",
            }
        )
        logger = _mock_logger()

        message, tags = resolve_channel_references(
            message="compare <#C111AAA> and <#C222BBB>",
            client=client,
            logger=logger,
        )

        assert "#eng-infra" in message
        assert "#eng-general" in message
        assert "<#" not in message
        assert len(tags) == 2
        tag_values = {t.tag_value for t in tags}
        assert tag_values == {"eng-infra", "eng-general"}

    def test_no_channel_references_returns_unchanged(self) -> None:
        client = MagicMock()
        logger = _mock_logger()

        message, tags = resolve_channel_references(
            message="just a normal message with no channels",
            client=client,
            logger=logger,
        )

        assert message == "just a normal message with no channels"
        assert tags == []

    def test_api_failure_skips_channel_gracefully(self) -> None:
        # Client that fails for all channel lookups
        client = _mock_client_with_channels({})
        logger = _mock_logger()

        message, tags = resolve_channel_references(
            message="check <#CBADID123>",
            client=client,
            logger=logger,
        )

        # Message should remain unchanged for the failed channel
        assert "<#CBADID123>" in message
        assert tags == []
        logger.warning.assert_called_once()

    def test_partial_failure_resolves_what_it_can(self) -> None:
        # Only one of two channels resolves
        client = _mock_client_with_channels({"C111AAA": "eng-infra"})
        logger = _mock_logger()

        message, tags = resolve_channel_references(
            message="compare <#C111AAA> and <#CBADID123>",
            client=client,
            logger=logger,
        )

        assert "#eng-infra" in message
        assert "<#CBADID123>" in message  # failed one stays raw
        assert len(tags) == 1
        assert tags[0].tag_value == "eng-infra"

    def test_duplicate_channel_produces_single_tag(self) -> None:
        client = _mock_client_with_channels({"C111AAA": "eng-infra"})
        logger = _mock_logger()

        message, tags = resolve_channel_references(
            message="summarize <#C111AAA> and compare with <#C111AAA>",
            client=client,
            logger=logger,
        )

        assert message == "summarize #eng-infra and compare with #eng-infra"
        assert len(tags) == 1
        assert tags[0].tag_value == "eng-infra"

    def test_mixed_pipe_and_bare_formats(self) -> None:
        client = _mock_client_with_channels({"C222BBB": "random"})
        logger = _mock_logger()

        message, tags = resolve_channel_references(
            message="see <#C111AAA|eng-infra> and <#C222BBB>",
            client=client,
            logger=logger,
        )

        assert "#eng-infra" in message
        assert "#random" in message
        assert len(tags) == 2


================================================
FILE: backend/tests/unit/onyx/onyxbot/test_slack_blocks.py
================================================
from datetime import datetime

import pytest
import pytz
import timeago  # type: ignore

from onyx.configs.constants import DocumentSource
from onyx.context.search.models import SavedSearchDoc
from onyx.onyxbot.slack.blocks import _build_documents_blocks


def _make_saved_doc(updated_at: datetime | None) -> SavedSearchDoc:
    return SavedSearchDoc(
        db_doc_id=1,
        document_id="doc-1",
        chunk_ind=0,
        semantic_identifier="Example Doc",
        link="https://example.com",
        blurb="Some blurb",
        source_type=DocumentSource.FILE,
        boost=0,
        hidden=False,
        metadata={},
        score=0.0,
        match_highlights=[],
        updated_at=updated_at,
        primary_owners=["user@example.com"],
        secondary_owners=None,
        is_relevant=None,
        relevance_explanation=None,
        is_internet=False,
    )


def test_build_documents_blocks_formats_naive_timestamp(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    naive_timestamp: datetime = datetime(2024, 1, 1, 12, 0, 0)
    captured: dict[str, datetime] = {}

    # Save the original timeago.format so we can call it inside the fake
    original_timeago_format = timeago.format

    def fake_timeago_format(doc_dt: datetime, now: datetime) -> str:
        captured["doc"] = doc_dt
        result = original_timeago_format(doc_dt, now)
        captured["result"] = result
        return result

    monkeypatch.setattr(
        "onyx.onyxbot.slack.blocks.timeago.format",
        fake_timeago_format,
    )

    blocks = _build_documents_blocks(
        documents=[_make_saved_doc(updated_at=naive_timestamp)],
        message_id=42,
    )

    assert len(blocks) >= 2
    section_block = blocks[1].to_dict()
    assert "result" in captured
    expected_text = (
        f"<https://example.com|Example Doc>\n_Updated {captured['result']}_\n>"
    )
    assert section_block["text"]["text"] == expected_text

    assert "doc" in captured
    formatted_timestamp: datetime = captured["doc"]
    expected_timestamp: datetime = naive_timestamp.replace(tzinfo=pytz.utc)
    assert formatted_timestamp == expected_timestamp


================================================
FILE: backend/tests/unit/onyx/onyxbot/test_slack_channel_config.py
================================================
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.db.slack_channel_config import create_slack_channel_persona


def test_create_slack_channel_persona_reuses_existing_persona() -> None:
    db_session = MagicMock()
    existing_persona = MagicMock()
    existing_persona.id = 42
    db_session.scalar.return_value = existing_persona

    fake_tool = MagicMock()
    fake_tool.id = 7

    with (
        patch(
            "onyx.db.slack_channel_config.get_builtin_tool",
            return_value=fake_tool,
        ),
        patch("onyx.db.slack_channel_config.upsert_persona") as mock_upsert,
    ):
        mock_upsert.return_value = MagicMock()

        create_slack_channel_persona(
            db_session=db_session,
            channel_name="general",
            document_set_ids=[1],
        )

    mock_upsert.assert_called_once()
    assert mock_upsert.call_args.kwargs["persona_id"] == existing_persona.id


================================================
FILE: backend/tests/unit/onyx/onyxbot/test_slack_formatting.py
================================================
from onyx.onyxbot.slack.formatting import _convert_slack_links_to_markdown
from onyx.onyxbot.slack.formatting import _normalize_link_destinations
from onyx.onyxbot.slack.formatting import _sanitize_html
from onyx.onyxbot.slack.formatting import _transform_outside_code_blocks
from onyx.onyxbot.slack.formatting import format_slack_message
from onyx.onyxbot.slack.utils import remove_slack_text_interactions
from onyx.utils.text_processing import decode_escapes


def test_normalize_citation_link_wraps_url_with_parentheses() -> None:
    message = (
        "See [[1]](https://example.com/Access%20ID%20Card(s)%20Guide.pdf) for details."
    )

    normalized = _normalize_link_destinations(message)

    assert (
        "See [[1]](<https://example.com/Access%20ID%20Card(s)%20Guide.pdf>) for details."
        == normalized
    )


def test_normalize_citation_link_keeps_existing_angle_brackets() -> None:
    message = "[[1]](<https://example.com/Access%20ID%20Card(s)%20Guide.pdf>)"

    normalized = _normalize_link_destinations(message)

    assert message == normalized


def test_normalize_citation_link_handles_multiple_links() -> None:
    message = "[[1]](https://example.com/(USA)%20Guide.pdf) [[2]](https://example.com/Plan(s)%20Overview.pdf)"

    normalized = _normalize_link_destinations(message)

    assert "[[1]](<https://example.com/(USA)%20Guide.pdf>)" in normalized
    assert "[[2]](<https://example.com/Plan(s)%20Overview.pdf>)" in normalized


def test_format_slack_message_keeps_parenthesized_citation_links_intact() -> None:
    message = (
        "Download [[1]](https://example.com/(USA)%20Access%20ID%20Card(s)%20Guide.pdf)"
    )

    formatted = format_slack_message(message)
    rendered = decode_escapes(remove_slack_text_interactions(formatted))

    assert (
        "<https://example.com/(USA)%20Access%20ID%20Card(s)%20Guide.pdf|[1]>"
        in rendered
    )
    assert "|[1]>%20Access%20ID%20Card" not in rendered


def test_slack_style_links_converted_to_clickable_links() -> None:
    message = "Visit <https://example.com/page|Example Page> for details."

    formatted = format_slack_message(message)

    assert "<https://example.com/page|Example Page>" in formatted
    assert "&lt;" not in formatted


def test_slack_style_links_preserved_inside_code_blocks() -> None:
    message = "```\n<https://example.com|click>\n```"

    converted = _convert_slack_links_to_markdown(message)

    assert "<https://example.com|click>" in converted


def test_html_tags_stripped_outside_code_blocks() -> None:
    message = "Hello<br/>world ```<div>code</div>``` after"

    sanitized = _transform_outside_code_blocks(message, _sanitize_html)

    assert "<br" not in sanitized
    assert "<div>code</div>" in sanitized


def test_format_slack_message_block_spacing() -> None:
    message = "Paragraph one.\n\nParagraph two."

    formatted = format_slack_message(message)

    assert "Paragraph one.\n\nParagraph two." == formatted


def test_format_slack_message_code_block_no_trailing_blank_line() -> None:
    message = "```python\nprint('hi')\n```"

    formatted = format_slack_message(message)

    assert formatted.endswith("print('hi')\n```")


def test_format_slack_message_ampersand_not_double_escaped() -> None:
    message = 'She said "hello" & goodbye.'

    formatted = format_slack_message(message)

    assert "&amp;" in formatted
    assert "&quot;" not in formatted


# -- Table rendering tests --


def test_table_renders_as_vertical_cards() -> None:
    message = "| Feature | Status | Owner |\n|---------|--------|-------|\n| Auth | Done | Alice |\n| Search | In Progress | Bob |\n"

    formatted = format_slack_message(message)

    assert "*Auth*\n  • Status: Done\n  • Owner: Alice" in formatted
    assert "*Search*\n  • Status: In Progress\n  • Owner: Bob" in formatted
    # Cards separated by blank line
    assert "Owner: Alice\n\n*Search*" in formatted
    # No raw pipe-and-dash table syntax
    assert "---|" not in formatted


def test_table_single_column() -> None:
    message = "| Name |\n|------|\n| Alice |\n| Bob |\n"

    formatted = format_slack_message(message)

    assert "*Alice*" in formatted
    assert "*Bob*" in formatted


def test_table_embedded_in_text() -> None:
    message = "Here are the results:\n\n| Item | Count |\n|------|-------|\n| Apples | 5 |\n\nThat's all."

    formatted = format_slack_message(message)

    assert "Here are the results:" in formatted
    assert "*Apples*\n  • Count: 5" in formatted
    assert "That's all." in formatted


def test_table_with_formatted_cells() -> None:
    message = "| Name | Link |\n|------|------|\n| **Alice** | [profile](https://example.com) |\n"

    formatted = format_slack_message(message)

    # Bold cell should not double-wrap: *Alice* not **Alice**
    assert "*Alice*" in formatted
    assert "**Alice**" not in formatted
    assert "<https://example.com|profile>" in formatted


def test_table_with_alignment_specifiers() -> None:
    message = "| Left | Center | Right |\n|:-----|:------:|------:|\n| a | b | c |\n"

    formatted = format_slack_message(message)

    assert "*a*\n  • Center: b\n  • Right: c" in formatted


def test_two_tables_in_same_message_use_independent_headers() -> None:
    message = "| A | B |\n|---|---|\n| 1 | 2 |\n\n| X | Y | Z |\n|---|---|---|\n| p | q | r |\n"

    formatted = format_slack_message(message)

    assert "*1*\n  • B: 2" in formatted
    assert "*p*\n  • Y: q\n  • Z: r" in formatted


def test_table_empty_first_column_no_bare_asterisks() -> None:
    message = "| Name | Status |\n|------|--------|\n| | Done |\n"

    formatted = format_slack_message(message)

    # Empty title should not produce "**" (bare asterisks)
    assert "**" not in formatted
    assert "  • Status: Done" in formatted


================================================
FILE: backend/tests/unit/onyx/onyxbot/test_slack_gating.py
================================================
"""Tests for Slack bot gating and seat limit enforcement."""

from collections.abc import Generator
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.server.settings.models import ApplicationStatus

# ---------------------------------------------------------------------------
# Shared helpers
# ---------------------------------------------------------------------------

_HANDLE_MSG = "onyx.onyxbot.slack.handlers.handle_message"
_LISTENER = "onyx.onyxbot.slack.listener"


def _make_socket_request(
    req_type: str = "events_api",
    event: dict | None = None,
) -> MagicMock:
    """Create a mock SocketModeRequest."""
    req = MagicMock()
    req.type = req_type
    if req_type == "events_api":
        req.payload = {
            "event": event or {"type": "message", "channel": "C123", "ts": "1234.5678"}
        }
    elif req_type == "slash_commands":
        req.payload = {"channel_id": "C123"}
    else:
        req.payload = {}
    return req


def _make_license_metadata(
    status: ApplicationStatus = ApplicationStatus.ACTIVE,
) -> MagicMock:
    """Create a mock LicenseMetadata."""
    metadata = MagicMock()
    metadata.status = status
    return metadata


def _ee_side_effect(
    is_gated: bool = False,
    metadata: Any = None,
) -> list:
    """Build fetch_ee_implementation_or_noop side_effect for gating tests.

    Returns callables for: [is_tenant_gated, get_cached_license_metadata].
    """
    return [
        lambda *_a, **_kw: is_gated,
        lambda *_a, **_kw: metadata,
    ]


def _make_message_info(email: str = "user@test.com") -> MagicMock:
    """Create a mock SlackMessageInfo for handle_message tests."""
    info = MagicMock()
    info.channel_to_respond = "C123"
    info.thread_messages = [MagicMock(message="test?")]
    info.sender_id = "U123"
    info.bypass_filters = False
    info.is_slash_command = False
    info.is_bot_dm = False
    info.email = email
    info.msg_to_respond = "1234.5678"
    return info


def _make_channel_config() -> MagicMock:
    """Create a mock SlackChannelConfig."""
    config = MagicMock()
    config.persona = None
    config.channel_config = {}
    return config


# ---------------------------------------------------------------------------
# _check_tenant_gated
# ---------------------------------------------------------------------------


class TestCheckTenantGated:
    """Tests for _check_tenant_gated function."""

    @pytest.fixture(autouse=True)
    def _patch_tenant_id(self) -> Any:
        with patch(f"{_LISTENER}.get_current_tenant_id", return_value="public"):
            yield

    def _call(
        self,
        _mock_fetch_ee: MagicMock,
        event: dict | None = None,
    ) -> tuple[bool, MagicMock]:
        """Call _check_tenant_gated with a fresh client + request."""
        from onyx.onyxbot.slack.listener import _check_tenant_gated

        client = MagicMock()
        client.web_client = MagicMock()
        req = _make_socket_request(event=event)
        result = _check_tenant_gated(client, req)
        return result, client

    @patch(f"{_LISTENER}.fetch_ee_implementation_or_noop")
    def test_active_license_not_gated(self, mock_fetch_ee: MagicMock) -> None:
        metadata = _make_license_metadata()
        mock_fetch_ee.side_effect = _ee_side_effect(metadata=metadata)

        result, _ = self._call(mock_fetch_ee)
        assert result is False

    @patch(f"{_LISTENER}.respond_in_thread_or_channel")
    @patch(f"{_LISTENER}.fetch_ee_implementation_or_noop")
    def test_multi_tenant_gated_blocks_and_responds(
        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock
    ) -> None:
        mock_fetch_ee.side_effect = _ee_side_effect(is_gated=True)

        result, _ = self._call(mock_fetch_ee)

        assert result is True
        mock_respond.assert_called_once()
        assert "subscription has expired" in mock_respond.call_args[1]["text"]

    @patch(f"{_LISTENER}.respond_in_thread_or_channel")
    @patch(f"{_LISTENER}.fetch_ee_implementation_or_noop")
    def test_gated_access_status_blocks(
        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock
    ) -> None:
        metadata = _make_license_metadata(status=ApplicationStatus.GATED_ACCESS)
        mock_fetch_ee.side_effect = _ee_side_effect(metadata=metadata)

        result, _ = self._call(mock_fetch_ee)

        assert result is True
        mock_respond.assert_called_once()

    @pytest.mark.parametrize(
        "event",
        [
            {"type": "message", "channel": "C123", "bot_id": "B456", "ts": "1"},
            {
                "type": "message",
                "channel": "C123",
                "bot_profile": {"id": "B456"},
                "ts": "1",
            },
            {"type": "message", "channel": "C123", "subtype": "bot_message", "ts": "1"},
        ],
        ids=["bot_id", "bot_profile", "subtype_bot_message"],
    )
    @patch(f"{_LISTENER}.respond_in_thread_or_channel")
    @patch(f"{_LISTENER}.fetch_ee_implementation_or_noop")
    def test_bot_message_no_response_sent(
        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock, event: dict
    ) -> None:
        """Bot messages are blocked but no response is sent (prevents loop)."""
        mock_fetch_ee.side_effect = _ee_side_effect(is_gated=True)

        result, _ = self._call(mock_fetch_ee, event=event)

        assert result is True
        mock_respond.assert_not_called()

    @patch(f"{_LISTENER}.respond_in_thread_or_channel")
    @patch(f"{_LISTENER}.fetch_ee_implementation_or_noop")
    def test_app_mention_no_response_sent(
        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock
    ) -> None:
        """app_mention events are blocked silently (dedup with message event)."""
        mock_fetch_ee.side_effect = _ee_side_effect(is_gated=True)

        result, _ = self._call(
            mock_fetch_ee,
            event={"type": "app_mention", "channel": "C123", "ts": "1"},
        )

        assert result is True
        mock_respond.assert_not_called()

    @patch(f"{_LISTENER}.fetch_ee_implementation_or_noop")
    def test_no_license_metadata_not_gated(self, mock_fetch_ee: MagicMock) -> None:
        """No license metadata (CE mode) means not gated."""
        mock_fetch_ee.side_effect = _ee_side_effect(metadata=None)

        result, _ = self._call(mock_fetch_ee)
        assert result is False

    @patch(f"{_LISTENER}.respond_in_thread_or_channel")
    @patch(f"{_LISTENER}.fetch_ee_implementation_or_noop")
    def test_response_uses_thread_ts(
        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock
    ) -> None:
        mock_fetch_ee.side_effect = _ee_side_effect(is_gated=True)

        self._call(
            mock_fetch_ee,
            event={
                "type": "message",
                "channel": "C123",
                "thread_ts": "1111.0000",
                "ts": "2222.0000",
            },
        )

        assert mock_respond.call_args[1]["thread_ts"] == "1111.0000"


# ---------------------------------------------------------------------------
# _extract_channel_from_request
# ---------------------------------------------------------------------------


class TestExtractChannelFromRequest:
    """Tests for _extract_channel_from_request function."""

    @pytest.mark.parametrize(
        "req_type, payload, expected",
        [
            ("events_api", {"event": {"channel": "C123"}}, "C123"),
            ("slash_commands", {"channel_id": "C456"}, "C456"),
            ("interactive", {"container": {"channel_id": "C789"}}, "C789"),
            ("unknown", {}, None),
        ],
    )
    def test_channel_extraction(
        self, req_type: str, payload: dict, expected: str | None
    ) -> None:
        from onyx.onyxbot.slack.listener import _extract_channel_from_request

        req = MagicMock()
        req.type = req_type
        req.payload = payload
        assert _extract_channel_from_request(req) == expected


# ---------------------------------------------------------------------------
# handle_message seat check
# ---------------------------------------------------------------------------


class TestHandleMessageSeatCheck:
    """Tests for seat limit enforcement in handle_message."""

    @pytest.fixture(autouse=True)
    def _common_patches(self) -> Any:
        """Patch side-effect-only dependencies that every test needs."""
        with (
            patch(f"{_HANDLE_MSG}.slack_usage_report"),
            patch(f"{_HANDLE_MSG}.send_msg_ack_to_user"),
        ):
            yield

    @pytest.fixture
    def db_session(self) -> Generator[MagicMock, None, None]:
        with patch(f"{_HANDLE_MSG}.get_session_with_current_tenant") as mock:
            session = MagicMock()
            mock.return_value.__enter__ = MagicMock(return_value=session)
            mock.return_value.__exit__ = MagicMock(return_value=False)
            yield session

    def _call_handle_message(
        self, client: MagicMock | None = None, email: str = "user@test.com"
    ) -> bool:
        from onyx.onyxbot.slack.handlers.handle_message import handle_message

        return handle_message(
            message_info=_make_message_info(email),
            slack_channel_config=_make_channel_config(),
            client=client or MagicMock(),
            feedback_reminder_id=None,
        )

    @pytest.mark.usefixtures("db_session")
    @patch(f"{_HANDLE_MSG}.respond_in_thread_or_channel")
    @patch(f"{_HANDLE_MSG}.fetch_ee_implementation_or_noop")
    @patch(f"{_HANDLE_MSG}.get_user_by_email", return_value=None)
    def test_new_user_blocked_when_seats_exceeded(
        self,
        _mock_get_user: MagicMock,
        mock_fetch_ee: MagicMock,
        mock_respond: MagicMock,
    ) -> None:
        seat_result = MagicMock(available=False, error_message="Seat limit exceeded")
        mock_fetch_ee.return_value = lambda **_kw: seat_result

        result = self._call_handle_message()

        assert result is False
        assert "seat limit" in mock_respond.call_args[1]["text"]
        assert "Onyx administrator" in mock_respond.call_args[1]["text"]

    @pytest.mark.usefixtures("db_session")
    @patch(f"{_HANDLE_MSG}.handle_regular_answer", return_value=False)
    @patch(f"{_HANDLE_MSG}.handle_standard_answers", return_value=False)
    @patch(f"{_HANDLE_MSG}.add_slack_user_if_not_exists")
    @patch(f"{_HANDLE_MSG}.fetch_ee_implementation_or_noop")
    @patch(f"{_HANDLE_MSG}.get_user_by_email")
    def test_existing_user_bypasses_seat_check(
        self,
        mock_get_user: MagicMock,
        mock_fetch_ee: MagicMock,
        _mock_add_user: MagicMock,
        _mock_standard: MagicMock,
        _mock_regular: MagicMock,
    ) -> None:
        mock_get_user.return_value = MagicMock()  # User exists

        self._call_handle_message()

        mock_fetch_ee.assert_not_called()

    @patch(f"{_HANDLE_MSG}.handle_regular_answer", return_value=False)
    @patch(f"{_HANDLE_MSG}.handle_standard_answers", return_value=False)
    @patch(f"{_HANDLE_MSG}.add_slack_user_if_not_exists")
    @patch(f"{_HANDLE_MSG}.fetch_ee_implementation_or_noop")
    @patch(f"{_HANDLE_MSG}.get_user_by_email", return_value=None)
    def test_new_user_allowed_when_seats_available(
        self,
        _mock_get_user: MagicMock,
        mock_fetch_ee: MagicMock,
        mock_add_user: MagicMock,
        _mock_standard: MagicMock,
        _mock_regular: MagicMock,
        db_session: MagicMock,
    ) -> None:
        mock_fetch_ee.return_value = lambda **_kw: MagicMock(available=True)

        self._call_handle_message(email="new@test.com")

        mock_add_user.assert_called_once_with(db_session, "new@test.com")

    @patch(f"{_HANDLE_MSG}.handle_regular_answer", return_value=False)
    @patch(f"{_HANDLE_MSG}.handle_standard_answers", return_value=False)
    @patch(f"{_HANDLE_MSG}.add_slack_user_if_not_exists")
    @patch(f"{_HANDLE_MSG}.fetch_ee_implementation_or_noop")
    @patch(f"{_HANDLE_MSG}.get_user_by_email", return_value=None)
    def test_noop_seat_check_allows_new_user(
        self,
        _mock_get_user: MagicMock,
        mock_fetch_ee: MagicMock,
        mock_add_user: MagicMock,
        _mock_standard: MagicMock,
        _mock_regular: MagicMock,
        db_session: MagicMock,
    ) -> None:
        """CE mode: noop returns None, user is allowed."""
        mock_fetch_ee.return_value = lambda **_kw: None

        self._call_handle_message(email="new@test.com")

        mock_add_user.assert_called_once_with(db_session, "new@test.com")


# ---------------------------------------------------------------------------
# check_seat_availability
# ---------------------------------------------------------------------------


class TestCheckSeatAvailability:
    """Tests for check_seat_availability function."""

    def _check(self, used: int, total: int) -> Any:
        from ee.onyx.db.license import check_seat_availability

        metadata = MagicMock(seats=total)
        with (
            patch("ee.onyx.db.license.get_used_seats", return_value=used),
            patch("ee.onyx.db.license.get_license_metadata", return_value=metadata),
        ):
            return check_seat_availability(MagicMock())

    def test_seats_available(self) -> None:
        result = self._check(used=5, total=10)
        assert result.available is True

    def test_seats_exceeded(self) -> None:
        result = self._check(used=10, total=10)
        assert result.available is False
        assert "Seat limit" in result.error_message

    def test_at_capacity_allows_fill(self) -> None:
        """Filling to exactly 100% is allowed (uses > not >=)."""
        result = self._check(used=9, total=10)
        assert result.available is True

    def test_no_license_allows_unlimited(self) -> None:
        from ee.onyx.db.license import check_seat_availability

        with patch("ee.onyx.db.license.get_license_metadata", return_value=None):
            result = check_seat_availability(MagicMock())
            assert result.available is True


# ---------------------------------------------------------------------------
# get_used_seats
# ---------------------------------------------------------------------------


class TestGetUsedSeats:
    """Tests for get_used_seats — anonymous user exclusion."""

    @patch("ee.onyx.db.license.MULTI_TENANT", False)
    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
    def test_excludes_anonymous_user(self, mock_get_session: MagicMock) -> None:
        from ee.onyx.db.license import get_used_seats

        mock_session = MagicMock()
        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
        mock_session.execute.return_value.scalar.return_value = 3

        assert get_used_seats() == 3
        mock_session.execute.assert_called_once()


================================================
FILE: backend/tests/unit/onyx/prompts/test_prompt_utils.py
================================================
from onyx.prompts.constants import REMINDER_TAG_DESCRIPTION
from onyx.prompts.prompt_utils import replace_reminder_tag


def test_replace_reminder_tag_pattern() -> None:
    prompt = "Some text {{REMINDER_TAG_DESCRIPTION}} more text"
    result = replace_reminder_tag(prompt)
    assert "{{REMINDER_TAG_DESCRIPTION}}" not in result
    assert REMINDER_TAG_DESCRIPTION in result


def test_replace_reminder_tag_no_pattern() -> None:
    prompt = "Some text without any pattern"
    result = replace_reminder_tag(prompt)
    assert result == prompt


================================================
FILE: backend/tests/unit/onyx/redis_ca.pem
================================================
-----BEGIN CERTIFICATE-----
MIIDXzCCAkegAwIBAgILBAAAAAABIVhTCKIwDQYJKoZIhvcNAQELBQAwTDEgMB4G
A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjMxEzARBgNVBAoTCkdsb2JhbFNp
Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMDkwMzE4MTAwMDAwWhcNMjkwMzE4
MTAwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMzETMBEG
A1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCASIwDQYJKoZI
hvcNAQEBBQADggEPADCCAQoCggEBAMwldpB5BngiFvXAg7aEyiie/QV2EcWtiHL8
RgJDx7KKnQRfJMsuS+FggkbhUqsMgUdwbN1k0ev1LKMPgj0MK66X17YUhhB5uzsT
gHeMCOFJ0mpiLx9e+pZo34knlTifBtc+ycsmWQ1z3rDI6SYOgxXG71uL0gRgykmm
KPZpO/bLyCiR5Z2KYVc3rHQU3HTgOu5yLy6c+9C7v/U9AOEGM+iCK65TpjoWc4zd
QQ4gOsC0p6Hpsk+QLjJg6VfLuQSSaGjlOCZgdbKfd/+RFO+uIEn8rUAVSNECMWEZ
XriX7613t2Saer9fwRPvm2L7DWzgVGkWqQPabumDk3F2xmmFghcCAwEAAaNCMEAw
DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFI/wS3+o
LkUkrk1Q+mOai97i3Ru8MA0GCSqGSIb3DQEBCwUAA4IBAQBLQNvAUKr+yAzv95ZU
RUm7lgAJQayzE4aGKAczymvmdLm6AC2upArT9fHxD4q/c2dKg8dEe3jgr25sbwMp
jjM5RcOO5LlXbKr8EpbsU8Yt5CRsuZRj+9xTaGdWPoO4zzUhw8lo/s7awlOqzJCK
6fBdRoyV3XpYKBovHd7NADdBj+1EbddTKJd+82cEHhXXipa0095MJ6RMG3NzdvQX
mcIfeg7jLQitChws/zyrVQ4PkX4268NXSb7hLi18YIvDQVETI53O9zJrlAGomecs
Mx86OyXShkDOOyyGeMlhLxS67ttVb9+E7gUJTb0o2HLO02JQZR7rkpeDMdmztcpH
WD9f
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIGMTCCBBmgAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwajELMAkGA1UEBhMCVVMx
CzAJBgNVBAgMAkNBMQswCQYDVQQHDAJDQTESMBAGA1UECgwJUmVkaXNMYWJzMS0w
KwYDVQQDDCRSZWRpc0xhYnMgUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwHhcN
MTgwMjI1MTUzNzM3WhcNMjgwMjIzMTUzNzM3WjBfMQswCQYDVQQGEwJVUzELMAkG
A1UECAwCQ0ExEjAQBgNVBAoMCVJlZGlzTGFiczEvMC0GA1UEAwwmUkNQIEludGVy
bWVkaWF0ZSBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggIiMA0GCSqGSIb3DQEBAQUA
A4ICDwAwggIKAoICAQDf9dqbxc8Bq7Ctq9rWcxrGNKKHivqLAFpPq02yLPx6fsOv
Tq7GsDChAYBBc4v7Y2Ap9RD5Vs3dIhEANcnolf27QwrG9RMnnvzk8pCvp1o6zSU4
VuOE1W66/O1/7e2rVxyrnTcP7UgK43zNIXu7+tiAqWsO92uSnuMoGPGpeaUm1jym
hjWKtkAwDFSqvHY+XL5qDVBEjeUe+WHkYUg40cAXjusAqgm2hZt29c2wnVrxW25W
P0meNlzHGFdA2AC5z54iRiqj57dTfBTkHoBczQxcyw6hhzxZQ4e5I5zOKjXXEhZN
r0tA3YC14CTabKRus/JmZieyZzRgEy2oti64tmLYTqSlAD78pRL40VNoaSYetXLw
hhNsXCHgWaY6d5bLOc/aIQMAV5oLvZQKvuXAF1IDmhPA+bZbpWipp0zagf1P1H3s
UzsMdn2KM0ejzgotbtNlj5TcrVwpmvE3ktvUAuA+hi3FkVx1US+2Gsp5x4YOzJ7u
P1WPk6ShF0JgnJH2ILdj6kttTWwFzH17keSFICWDfH/+kM+k7Y1v3EXMQXE7y0T9
MjvJskz6d/nv+sQhY04xt64xFMGTnZjlJMzfQNi7zWFLTZnDD0lPowq7l3YiPoTT
t5Xky83lu0KZsZBo0WlWaDG00gLVdtRgVbcuSWxpi5BdLb1kRab66JptWjxwXQID
AQABo4HrMIHoMDoGA1UdHwQzMDEwL6AtoCuGKWh0dHBzOi8vcmwtY2Etc2VydmVy
LnJlZGlzbGFicy5jb20vdjEvY3JsMEYGCCsGAQUFBwEBBDowODA2BggrBgEFBQcw
AYYqaHR0cHM6Ly9ybC1jYS1zZXJ2ZXIucmVkaXNsYWJzLmNvbS92MS9vY3NwMB0G
A1UdDgQWBBQHar5OKvQUpP2qWt6mckzToeCOHDAfBgNVHSMEGDAWgBQi42wH6hM4
L2sujEvLM0/u8lRXTzASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIB
hjANBgkqhkiG9w0BAQsFAAOCAgEAirEn/iTsAKyhd+pu2W3Z5NjCko4NPU0EYUbr
AP7+POK2rzjIrJO3nFYQ/LLuC7KCXG+2qwan2SAOGmqWst13Y+WHp44Kae0kaChW
vcYLXXSoGQGC8QuFSNUdaeg3RbMDYFT04dOkqufeWVccoHVxyTSg9eD8LZuHn5jw
7QDLiEECBmIJHk5Eeo2TAZrx4Yx6ufSUX5HeVjlAzqwtAqdt99uCJ/EL8bgpWbe+
XoSpvUv0SEC1I1dCAhCKAvRlIOA6VBcmzg5Am12KzkqTul12/VEFIgzqu0Zy2Jbc
AUPrYVu/+tOGXQaijy7YgwH8P8n3s7ZeUa1VABJHcxrxYduDDJBLZi+MjheUDaZ1
jQRHYevI2tlqeSBqdPKG4zBY5lS0GiAlmuze5oENt0P3XboHoZPHiqcK3VECgTVh
/BkJcuudETSJcZDmQ8YfoKfBzRQNg2sv/hwvUv73Ss51Sco8GEt2lD8uEdib1Q6z
zDT5lXJowSzOD5ZA9OGDjnSRL+2riNtKWKEqvtEG3VBJoBzu9GoxbAc7wIZLxmli
iF5a/Zf5X+UXD3s4TMmy6C4QZJpAA2egsSQCnraWO2ULhh7iXMysSkF/nzVfZn43
iqpaB8++9a37hWq14ZmOv0TJIDz//b2+KC4VFXWQ5W5QC6whsjT+OlG4p5ZYG0jo
616pxqo=
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIFujCCA6KgAwIBAgIJAJ1aTT1lu2ScMA0GCSqGSIb3DQEBCwUAMGoxCzAJBgNV
BAYTAlVTMQswCQYDVQQIDAJDQTELMAkGA1UEBwwCQ0ExEjAQBgNVBAoMCVJlZGlz
TGFiczEtMCsGA1UEAwwkUmVkaXNMYWJzIFJvb3QgQ2VydGlmaWNhdGUgQXV0aG9y
aXR5MB4XDTE4MDIyNTE1MjA0MloXDTM4MDIyMDE1MjA0MlowajELMAkGA1UEBhMC
VVMxCzAJBgNVBAgMAkNBMQswCQYDVQQHDAJDQTESMBAGA1UECgwJUmVkaXNMYWJz
MS0wKwYDVQQDDCRSZWRpc0xhYnMgUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkw
ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDLEjXy7YrbN5Waau5cd6g1
G5C2tMmeTpZ0duFAPxNU4oE3RHS5gGiok346fUXuUxbZ6QkuzeN2/2Z+RmRcJhQY
Dm0ZgdG4x59An1TJfnzKKoWj8ISmoHS/TGNBdFzXV7FYNLBuqZouqePI6ReC6Qhl
pp45huV32Q3a6IDrrvx7Wo5ZczEQeFNbCeCOQYNDdTmCyEkHqc2AGo8eoIlSTutT
ULOC7R5gzJVTS0e1hesQ7jmqHjbO+VQS1NAL4/5K6cuTEqUl+XhVhPdLWBXJQ5ag
54qhX4v+ojLzeU1R/Vc6NjMvVtptWY6JihpgplprN0Yh2556ewcXMeturcKgXfGJ
xeYzsjzXerEjrVocX5V8BNrg64NlifzTMKNOOv4fVZszq1SIHR8F9ROrqiOdh8iC
JpUbLpXH9hWCSEO6VRMB2xJoKu3cgl63kF30s77x7wLFMEHiwsQRKxooE1UhgS9K
2sO4TlQ1eWUvFvHSTVDQDlGQ6zu4qjbOpb3Q8bQwoK+ai2alkXVR4Ltxe9QlgYK3
StsnPhruzZGA0wbXdpw0bnM+YdlEm5ffSTpNIfgHeaa7Dtb801FtA71ZlH7A6TaI
SIQuUST9EKmv7xrJyx0W1pGoPOLw5T029aTjnICSLdtV9bLwysrLhIYG5bnPq78B
cS+jZHFGzD7PUVGQD01nOQIDAQABo2MwYTAdBgNVHQ4EFgQUIuNsB+oTOC9rLoxL
yzNP7vJUV08wHwYDVR0jBBgwFoAUIuNsB+oTOC9rLoxLyzNP7vJUV08wDwYDVR0T
AQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQADggIBAHfg
z5pMNUAKdMzK1aS1EDdK9yKz4qicILz5czSLj1mC7HKDRy8cVADUxEICis++CsCu
rYOvyCVergHQLREcxPq4rc5Nq1uj6J6649NEeh4WazOOjL4ZfQ1jVznMbGy+fJm3
3Hoelv6jWRG9iqeJZja7/1s6YC6bWymI/OY1e4wUKeNHAo+Vger7MlHV+RuabaX+
hSJ8bJAM59NCM7AgMTQpJCncrcdLeceYniGy5Q/qt2b5mJkQVkIdy4TPGGB+AXDJ
D0q3I/JDRkDUFNFdeW0js7fHdsvCR7O3tJy5zIgEV/o/BCkmJVtuwPYOrw/yOlKj
TY/U7ATAx9VFF6/vYEOMYSmrZlFX+98L6nJtwDqfLB5VTltqZ4H/KBxGE3IRSt9l
FXy40U+LnXzhhW+7VBAvyYX8GEXhHkKU8Gqk1xitrqfBXY74xKgyUSTolFSfFVgj
mcM/X4K45bka+qpkj7Kfv/8D4j6aZekwhN2ly6hhC1SmQ8qjMjpG/mrWOSSHZFmf
ybu9iD2AYHeIOkshIl6xYIa++Q/00/vs46IzAbQyriOi0XxlSMMVtPx0Q3isp+ji
n8Mq9eOuxYOEQ4of8twUkUDd528iwGtEdwf0Q01UyT84S62N8AySl1ZBKXJz6W4F
UhWfa/HQYOAPDdEjNgnVwLI23b8t0TozyCWw7q8h
-----END CERTIFICATE-----


================================================
FILE: backend/tests/unit/onyx/server/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/server/features/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/server/features/hierarchy/test_user_access_info.py
================================================
"""Unit tests for _get_user_access_info helper function.

These tests mock all database operations and don't require a real database.
"""

from unittest.mock import MagicMock
from unittest.mock import patch

from sqlalchemy.orm import Session

from onyx.server.features.hierarchy.api import _get_user_access_info


def test_get_user_access_info_returns_email_and_groups() -> None:
    """_get_user_access_info returns the user's email and external group IDs."""
    mock_user = MagicMock()
    mock_user.email = "test@example.com"
    mock_db_session = MagicMock(spec=Session)

    with patch(
        "onyx.server.features.hierarchy.api.get_user_external_group_ids",
        return_value=["group1", "group2"],
    ):
        email, groups = _get_user_access_info(mock_user, mock_db_session)

    assert email == "test@example.com"
    assert groups == ["group1", "group2"]


def test_get_user_access_info_with_no_groups() -> None:
    """User with no external groups returns empty list."""
    mock_user = MagicMock()
    mock_user.email = "solo@example.com"
    mock_db_session = MagicMock(spec=Session)

    with patch(
        "onyx.server.features.hierarchy.api.get_user_external_group_ids",
        return_value=[],
    ):
        email, groups = _get_user_access_info(mock_user, mock_db_session)

    assert email == "solo@example.com"
    assert groups == []


================================================
FILE: backend/tests/unit/onyx/server/features/hooks/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/server/manage/embedding/test_embedding_api.py
================================================
from types import SimpleNamespace
from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.db.models import SearchSettings
from onyx.server.manage.embedding.api import list_embedding_models
from onyx.server.manage.embedding.api import list_embedding_providers
from onyx.utils.encryption import decrypt_bytes_to_string
from onyx.utils.encryption import encrypt_string_to_bytes
from onyx.utils.encryption import mask_string
from onyx.utils.sensitive import SensitiveValue
from shared_configs.enums import EmbeddingProvider


def _build_sensitive_value(raw_value: str) -> SensitiveValue[str]:
    return SensitiveValue[str](
        encrypted_bytes=encrypt_string_to_bytes(raw_value),
        decrypt_fn=decrypt_bytes_to_string,
    )


def _build_search_settings(raw_api_key: str) -> SimpleNamespace:
    return SimpleNamespace(
        id=7,
        model_name="gemini-embedding-001",
        normalize=False,
        query_prefix="",
        passage_prefix="",
        provider_type=EmbeddingProvider.GOOGLE,
        cloud_provider=SimpleNamespace(
            api_key=_build_sensitive_value(raw_api_key),
            api_url="",
            api_version=None,
            deployment_name=None,
        ),
        api_url="",
    )


def test_list_embedding_models_masks_api_key() -> None:
    raw_api_key = "sk-abcdefghijklmnopqrstuvwxyz1234567890"
    search_settings = _build_search_settings(raw_api_key)

    with patch(
        "onyx.server.manage.embedding.api.get_all_search_settings",
        return_value=[search_settings],
    ):
        response = list_embedding_models(_=MagicMock(), db_session=MagicMock())

    assert len(response) == 1
    assert response[0].api_key == mask_string(raw_api_key)
    assert response[0].api_key != raw_api_key


def test_list_embedding_models_returns_none_for_local_model_api_key() -> None:
    local_search_settings = SimpleNamespace(
        id=1,
        model_name="thenlper/gte-small",
        normalize=False,
        query_prefix="",
        passage_prefix="",
        provider_type=None,
        cloud_provider=None,
        api_url=None,
    )

    with patch(
        "onyx.server.manage.embedding.api.get_all_search_settings",
        return_value=[local_search_settings],
    ):
        response = list_embedding_models(_=MagicMock(), db_session=MagicMock())

    assert len(response) == 1
    assert response[0].api_key is None


def test_list_embedding_providers_uses_sensitive_value_masking_once() -> None:
    raw_api_key = "sk-abcdefghijklmnopqrstuvwxyz1234567890"
    provider_model = SimpleNamespace(
        provider_type=EmbeddingProvider.GOOGLE,
        api_key=_build_sensitive_value(raw_api_key),
        api_url="",
        api_version=None,
        deployment_name=None,
    )

    with patch(
        "onyx.server.manage.embedding.api.fetch_existing_embedding_providers",
        return_value=[provider_model],
    ):
        response = list_embedding_providers(_=MagicMock(), db_session=MagicMock())

    assert len(response) == 1
    assert response[0].api_key == mask_string(raw_api_key)
    assert response[0].api_key != mask_string(mask_string(raw_api_key))


def test_search_settings_api_key_property_returns_raw_value_for_runtime_use() -> None:
    raw_api_key = "sk-runtime-should-use-unmasked-value-1234567890"
    fake_search_settings = SimpleNamespace(
        cloud_provider=SimpleNamespace(api_key=_build_sensitive_value(raw_api_key))
    )

    api_key_property = SearchSettings.__dict__["api_key"]
    assert api_key_property.fget(fake_search_settings) == raw_api_key


================================================
FILE: backend/tests/unit/onyx/server/manage/llm/test_fetch_models_api.py
================================================
"""Tests for LLM model fetch endpoints.

These tests verify the full request/response flow for fetching models
from dynamic providers (Ollama, OpenRouter, Litellm), including the
sync-to-DB behavior when provider_name is specified.
"""

from unittest.mock import MagicMock
from unittest.mock import patch

import httpx
import pytest

from onyx.error_handling.exceptions import OnyxError
from onyx.server.manage.llm.models import BifrostFinalModelResponse
from onyx.server.manage.llm.models import BifrostModelsRequest
from onyx.server.manage.llm.models import LitellmFinalModelResponse
from onyx.server.manage.llm.models import LitellmModelsRequest
from onyx.server.manage.llm.models import LMStudioFinalModelResponse
from onyx.server.manage.llm.models import LMStudioModelsRequest
from onyx.server.manage.llm.models import OllamaFinalModelResponse
from onyx.server.manage.llm.models import OllamaModelsRequest
from onyx.server.manage.llm.models import OpenRouterFinalModelResponse
from onyx.server.manage.llm.models import OpenRouterModelsRequest


class TestGetOllamaAvailableModels:
    """Tests for the Ollama model fetch endpoint."""

    @pytest.fixture
    def mock_ollama_tags_response(self) -> dict:
        """Mock response from Ollama /api/tags endpoint."""
        return {
            "models": [
                {"name": "llama3:latest"},
                {"name": "mistral:7b"},
                {"name": "qwen2.5:14b"},
            ]
        }

    @pytest.fixture
    def mock_ollama_show_response(self) -> dict:
        """Mock response from Ollama /api/show endpoint."""
        return {
            "details": {"family": "llama", "families": ["llama"]},
            "model_info": {
                "general.architecture": "llama",
                "llama.context_length": 8192,
            },
            "capabilities": [
                "completion"
            ],  # Required to pass supports_completion() check
        }

    def test_returns_model_list(
        self, mock_ollama_tags_response: dict, mock_ollama_show_response: dict
    ) -> None:
        """Test that endpoint returns properly formatted model list."""
        from onyx.server.manage.llm.api import get_ollama_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            # Mock GET for /api/tags
            mock_get_response = MagicMock()
            mock_get_response.json.return_value = mock_ollama_tags_response
            mock_get_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_get_response

            # Mock POST for /api/show (called for each model)
            mock_post_response = MagicMock()
            mock_post_response.json.return_value = mock_ollama_show_response
            mock_post_response.raise_for_status = MagicMock()
            mock_httpx.post.return_value = mock_post_response

            request = OllamaModelsRequest(api_base="http://localhost:11434")
            results = get_ollama_available_models(request, MagicMock(), mock_session)

            assert len(results) == 3
            assert all(isinstance(r, OllamaFinalModelResponse) for r in results)
            # Check display names are generated
            assert any("Llama" in r.display_name for r in results)
            assert any("Mistral" in r.display_name for r in results)
            # Results should be alphabetically sorted by model name
            assert [r.name for r in results] == sorted(
                [r.name for r in results], key=str.lower
            )

    def test_syncs_to_db_when_provider_name_specified(
        self, mock_ollama_tags_response: dict, mock_ollama_show_response: dict
    ) -> None:
        """Test that models are synced to DB when provider_name is given."""
        from onyx.server.manage.llm.api import get_ollama_available_models

        mock_session = MagicMock()
        mock_provider = MagicMock()
        mock_provider.id = 1
        mock_provider.model_configurations = []

        with (
            patch("onyx.server.manage.llm.api.httpx") as mock_httpx,
            patch(
                "onyx.db.llm.fetch_existing_llm_provider", return_value=mock_provider
            ),
        ):
            mock_get_response = MagicMock()
            mock_get_response.json.return_value = mock_ollama_tags_response
            mock_get_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_get_response

            mock_post_response = MagicMock()
            mock_post_response.json.return_value = mock_ollama_show_response
            mock_post_response.raise_for_status = MagicMock()
            mock_httpx.post.return_value = mock_post_response

            request = OllamaModelsRequest(
                api_base="http://localhost:11434",
                provider_name="my-ollama",
            )
            get_ollama_available_models(request, MagicMock(), mock_session)

            # Verify DB operations were called
            assert mock_session.execute.call_count == 6
            mock_session.commit.assert_called_once()

    def test_no_sync_when_provider_name_not_specified(
        self, mock_ollama_tags_response: dict, mock_ollama_show_response: dict
    ) -> None:
        """Test that models are NOT synced when provider_name is None."""
        from onyx.server.manage.llm.api import get_ollama_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            mock_get_response = MagicMock()
            mock_get_response.json.return_value = mock_ollama_tags_response
            mock_get_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_get_response

            mock_post_response = MagicMock()
            mock_post_response.json.return_value = mock_ollama_show_response
            mock_post_response.raise_for_status = MagicMock()
            mock_httpx.post.return_value = mock_post_response

            request = OllamaModelsRequest(api_base="http://localhost:11434")
            get_ollama_available_models(request, MagicMock(), mock_session)

            # No DB operations should happen
            mock_session.execute.assert_not_called()
            mock_session.commit.assert_not_called()


class TestGetOpenRouterAvailableModels:
    """Tests for the OpenRouter model fetch endpoint."""

    @pytest.fixture
    def mock_openrouter_response(self) -> dict:
        """Mock response from OpenRouter API."""
        return {
            "data": [
                {
                    "id": "anthropic/claude-3.5-sonnet",
                    "name": "Claude 3.5 Sonnet",
                    "context_length": 200000,
                    "architecture": {"input_modalities": ["text", "image"]},
                },
                {
                    "id": "openai/gpt-4o",
                    "name": "GPT-4o",
                    "context_length": 128000,
                    "architecture": {"input_modalities": ["text", "image"]},
                },
                {
                    "id": "meta-llama/llama-3.1-70b",
                    "name": "Llama 3.1 70B",
                    "context_length": 131072,
                    "architecture": {"input_modalities": ["text"]},
                },
            ]
        }

    def test_returns_model_list(self, mock_openrouter_response: dict) -> None:
        """Test that endpoint returns properly formatted model list."""
        from onyx.server.manage.llm.api import get_openrouter_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_openrouter_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = OpenRouterModelsRequest(
                api_base="https://openrouter.ai/api/v1",
                api_key="test-key",
            )
            results = get_openrouter_available_models(
                request, MagicMock(), mock_session
            )

            assert len(results) == 3
            assert all(isinstance(r, OpenRouterFinalModelResponse) for r in results)
            # Check that models have correct context lengths
            claude = next(r for r in results if "claude" in r.name.lower())
            assert claude.max_input_tokens == 200000

    def test_infers_vision_support(self, mock_openrouter_response: dict) -> None:
        """Test that vision support is correctly inferred from modality."""
        from onyx.server.manage.llm.api import get_openrouter_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_openrouter_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = OpenRouterModelsRequest(
                api_base="https://openrouter.ai/api/v1",
                api_key="test-key",
            )
            results = get_openrouter_available_models(
                request, MagicMock(), mock_session
            )

            # Models with "image" in modality should have vision support
            claude = next(r for r in results if "claude" in r.name.lower())
            llama = next(r for r in results if "llama" in r.name.lower())

            assert claude.supports_image_input is True
            assert llama.supports_image_input is False

    def test_syncs_to_db_when_provider_name_specified(
        self, mock_openrouter_response: dict
    ) -> None:
        """Test that models are synced to DB when provider_name is given."""
        from onyx.server.manage.llm.api import get_openrouter_available_models

        mock_session = MagicMock()
        mock_provider = MagicMock()
        mock_provider.id = 1
        mock_provider.model_configurations = []

        with (
            patch("onyx.server.manage.llm.api.httpx.get") as mock_get,
            patch(
                "onyx.db.llm.fetch_existing_llm_provider", return_value=mock_provider
            ),
        ):
            mock_response = MagicMock()
            mock_response.json.return_value = mock_openrouter_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = OpenRouterModelsRequest(
                api_base="https://openrouter.ai/api/v1",
                api_key="test-key",
                provider_name="my-openrouter",
            )
            get_openrouter_available_models(request, MagicMock(), mock_session)

            # Verify DB operations were called
            assert mock_session.execute.call_count == 8
            mock_session.commit.assert_called_once()

    def test_preserves_existing_models_on_sync(
        self, mock_openrouter_response: dict
    ) -> None:
        """Test that existing models are not overwritten during sync."""
        from onyx.server.manage.llm.api import get_openrouter_available_models

        mock_session = MagicMock()

        # Provider already has claude model
        existing_model = MagicMock()
        existing_model.name = "anthropic/claude-3.5-sonnet"

        mock_provider = MagicMock()
        mock_provider.id = 1
        mock_provider.model_configurations = [existing_model]

        with (
            patch("onyx.server.manage.llm.api.httpx.get") as mock_get,
            patch(
                "onyx.db.llm.fetch_existing_llm_provider", return_value=mock_provider
            ),
        ):
            mock_response = MagicMock()
            mock_response.json.return_value = mock_openrouter_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = OpenRouterModelsRequest(
                api_base="https://openrouter.ai/api/v1",
                api_key="test-key",
                provider_name="my-openrouter",
            )
            get_openrouter_available_models(request, MagicMock(), mock_session)

            # Only 2 new models should be inserted (claude already exists)
            assert mock_session.execute.call_count == 5

    def test_no_sync_when_provider_name_not_specified(
        self, mock_openrouter_response: dict
    ) -> None:
        """Test that models are NOT synced when provider_name is None."""
        from onyx.server.manage.llm.api import get_openrouter_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_openrouter_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = OpenRouterModelsRequest(
                api_base="https://openrouter.ai/api/v1",
                api_key="test-key",
            )
            get_openrouter_available_models(request, MagicMock(), mock_session)

            # No DB operations should happen
            mock_session.execute.assert_not_called()
            mock_session.commit.assert_not_called()


class TestGetLMStudioAvailableModels:
    """Tests for the LM Studio model fetch endpoint."""

    @pytest.fixture
    def mock_lm_studio_response(self) -> dict:
        """Mock response from LM Studio /api/v1/models endpoint."""
        return {
            "models": [
                {
                    "key": "lmstudio-community/Meta-Llama-3-8B",
                    "type": "llm",
                    "display_name": "Meta Llama 3 8B",
                    "max_context_length": 8192,
                    "capabilities": {"vision": False},
                },
                {
                    "key": "lmstudio-community/Qwen2.5-VL-7B",
                    "type": "llm",
                    "display_name": "Qwen 2.5 VL 7B",
                    "max_context_length": 32768,
                    "capabilities": {"vision": True},
                },
                {
                    "key": "text-embedding-nomic-embed-text-v1.5",
                    "type": "embedding",
                    "display_name": "Nomic Embed Text v1.5",
                    "max_context_length": 2048,
                    "capabilities": {},
                },
                {
                    "key": "lmstudio-community/DeepSeek-R1-8B",
                    "type": "llm",
                    "display_name": "DeepSeek R1 8B",
                    "max_context_length": 65536,
                    "capabilities": {"vision": False},
                },
            ]
        }

    def test_returns_model_list(self, mock_lm_studio_response: dict) -> None:
        """Test that endpoint returns properly formatted LLM-only model list."""
        from onyx.server.manage.llm.api import get_lm_studio_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_lm_studio_response
            mock_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_response

            request = LMStudioModelsRequest(api_base="http://localhost:1234")
            results = get_lm_studio_available_models(request, MagicMock(), mock_session)

            # Only LLM-type models should be returned (embedding filtered out)
            assert len(results) == 3
            assert all(isinstance(r, LMStudioFinalModelResponse) for r in results)
            names = [r.name for r in results]
            assert "text-embedding-nomic-embed-text-v1.5" not in names
            # Results should be alphabetically sorted by model name
            assert names == sorted(names, key=str.lower)

    def test_infers_vision_support(self, mock_lm_studio_response: dict) -> None:
        """Test that vision support is correctly read from capabilities."""
        from onyx.server.manage.llm.api import get_lm_studio_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_lm_studio_response
            mock_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_response

            request = LMStudioModelsRequest(api_base="http://localhost:1234")
            results = get_lm_studio_available_models(request, MagicMock(), mock_session)

            qwen = next(r for r in results if "Qwen" in r.display_name)
            llama = next(r for r in results if "Llama" in r.display_name)

            assert qwen.supports_image_input is True
            assert llama.supports_image_input is False

    def test_infers_reasoning_from_model_name(self) -> None:
        """Test that reasoning is inferred from model name when not in capabilities."""
        from onyx.server.manage.llm.api import get_lm_studio_available_models

        mock_session = MagicMock()
        response = {
            "models": [
                {
                    "key": "lmstudio-community/DeepSeek-R1-8B",
                    "type": "llm",
                    "display_name": "DeepSeek R1 8B",
                    "max_context_length": 65536,
                    "capabilities": {},
                },
                {
                    "key": "lmstudio-community/Meta-Llama-3-8B",
                    "type": "llm",
                    "display_name": "Meta Llama 3 8B",
                    "max_context_length": 8192,
                    "capabilities": {},
                },
            ]
        }

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            mock_response = MagicMock()
            mock_response.json.return_value = response
            mock_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_response

            request = LMStudioModelsRequest(api_base="http://localhost:1234")
            results = get_lm_studio_available_models(request, MagicMock(), mock_session)

            deepseek = next(r for r in results if "DeepSeek" in r.display_name)
            llama = next(r for r in results if "Llama" in r.display_name)

            assert deepseek.supports_reasoning is True
            assert llama.supports_reasoning is False

    def test_uses_display_name_from_api(self, mock_lm_studio_response: dict) -> None:
        """Test that display_name from the API is used directly."""
        from onyx.server.manage.llm.api import get_lm_studio_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_lm_studio_response
            mock_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_response

            request = LMStudioModelsRequest(api_base="http://localhost:1234")
            results = get_lm_studio_available_models(request, MagicMock(), mock_session)

            llama = next(r for r in results if "Llama" in r.name)
            assert llama.display_name == "Meta Llama 3 8B"
            assert llama.max_input_tokens == 8192

    def test_strips_trailing_v1_from_api_base(self) -> None:
        """Test that /v1 suffix is stripped before building the native API URL."""
        from onyx.server.manage.llm.api import get_lm_studio_available_models

        mock_session = MagicMock()
        response = {
            "models": [
                {
                    "key": "test-model",
                    "type": "llm",
                    "display_name": "Test",
                    "max_context_length": 4096,
                    "capabilities": {},
                },
            ]
        }

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            mock_response = MagicMock()
            mock_response.json.return_value = response
            mock_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_response

            request = LMStudioModelsRequest(api_base="http://localhost:1234/v1")
            get_lm_studio_available_models(request, MagicMock(), mock_session)

            # Should hit /api/v1/models, not /v1/api/v1/models
            mock_httpx.get.assert_called_once()
            called_url = mock_httpx.get.call_args[0][0]
            assert called_url == "http://localhost:1234/api/v1/models"

    def test_falls_back_to_stored_api_key(self) -> None:
        """Test that stored API key is used when api_key_changed is False."""
        from onyx.server.manage.llm.api import get_lm_studio_available_models

        mock_session = MagicMock()
        mock_provider = MagicMock()
        mock_provider.custom_config = {"LM_STUDIO_API_KEY": "stored-secret"}

        response = {
            "models": [
                {
                    "key": "test-model",
                    "type": "llm",
                    "display_name": "Test",
                    "max_context_length": 4096,
                    "capabilities": {},
                },
            ]
        }

        with (
            patch("onyx.server.manage.llm.api.httpx") as mock_httpx,
            patch(
                "onyx.server.manage.llm.api.fetch_existing_llm_provider",
                return_value=mock_provider,
            ),
        ):
            mock_response = MagicMock()
            mock_response.json.return_value = response
            mock_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_response

            request = LMStudioModelsRequest(
                api_base="http://localhost:1234",
                api_key="masked-value",
                api_key_changed=False,
                provider_name="my-lm-studio",
            )
            get_lm_studio_available_models(request, MagicMock(), mock_session)

            headers = mock_httpx.get.call_args[1]["headers"]
            assert headers["Authorization"] == "Bearer stored-secret"

    def test_uses_submitted_api_key_when_changed(self) -> None:
        """Test that submitted API key is used when api_key_changed is True."""
        from onyx.server.manage.llm.api import get_lm_studio_available_models

        mock_session = MagicMock()
        response = {
            "models": [
                {
                    "key": "test-model",
                    "type": "llm",
                    "display_name": "Test",
                    "max_context_length": 4096,
                    "capabilities": {},
                },
            ]
        }

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            mock_response = MagicMock()
            mock_response.json.return_value = response
            mock_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_response

            request = LMStudioModelsRequest(
                api_base="http://localhost:1234",
                api_key="new-secret",
                api_key_changed=True,
                provider_name="my-lm-studio",
            )
            get_lm_studio_available_models(request, MagicMock(), mock_session)

            headers = mock_httpx.get.call_args[1]["headers"]
            assert headers["Authorization"] == "Bearer new-secret"

    def test_raises_on_empty_models(self) -> None:
        """Test that an error is raised when no models are returned."""
        from onyx.error_handling.exceptions import OnyxError
        from onyx.server.manage.llm.api import get_lm_studio_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            mock_response = MagicMock()
            mock_response.json.return_value = {"models": []}
            mock_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_response

            request = LMStudioModelsRequest(api_base="http://localhost:1234")
            with pytest.raises(OnyxError):
                get_lm_studio_available_models(request, MagicMock(), mock_session)

    def test_raises_on_only_non_llm_models(self) -> None:
        """Test that an error is raised when all models are non-LLM type."""
        from onyx.error_handling.exceptions import OnyxError
        from onyx.server.manage.llm.api import get_lm_studio_available_models

        mock_session = MagicMock()
        response = {
            "models": [
                {
                    "key": "embedding-model",
                    "type": "embedding",
                    "display_name": "Embedding",
                    "max_context_length": 2048,
                    "capabilities": {},
                },
            ]
        }

        with patch("onyx.server.manage.llm.api.httpx") as mock_httpx:
            mock_response = MagicMock()
            mock_response.json.return_value = response
            mock_response.raise_for_status = MagicMock()
            mock_httpx.get.return_value = mock_response

            request = LMStudioModelsRequest(api_base="http://localhost:1234")
            with pytest.raises(OnyxError):
                get_lm_studio_available_models(request, MagicMock(), mock_session)


class TestGetLitellmAvailableModels:
    """Tests for the Litellm proxy model fetch endpoint."""

    @pytest.fixture
    def mock_litellm_response(self) -> dict:
        """Mock response from Litellm /v1/models endpoint."""
        return {
            "data": [
                {
                    "id": "gpt-4o",
                    "object": "model",
                    "created": 1700000000,
                    "owned_by": "openai",
                },
                {
                    "id": "claude-3-5-sonnet",
                    "object": "model",
                    "created": 1700000001,
                    "owned_by": "anthropic",
                },
                {
                    "id": "gemini-pro",
                    "object": "model",
                    "created": 1700000002,
                    "owned_by": "google",
                },
            ]
        }

    def test_returns_model_list(self, mock_litellm_response: dict) -> None:
        """Test that endpoint returns properly formatted model list."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_litellm_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="test-key",
            )
            results = get_litellm_available_models(request, MagicMock(), mock_session)

            assert len(results) == 3
            assert all(isinstance(r, LitellmFinalModelResponse) for r in results)

    def test_model_fields_parsed_correctly(self, mock_litellm_response: dict) -> None:
        """Test that provider_name and model_name are correctly extracted."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_litellm_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="test-key",
            )
            results = get_litellm_available_models(request, MagicMock(), mock_session)

            gpt = next(r for r in results if r.model_name == "gpt-4o")
            assert gpt.provider_name == "openai"

            claude = next(r for r in results if r.model_name == "claude-3-5-sonnet")
            assert claude.provider_name == "anthropic"

    def test_results_sorted_by_model_name(self, mock_litellm_response: dict) -> None:
        """Test that results are alphabetically sorted by model_name."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_litellm_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="test-key",
            )
            results = get_litellm_available_models(request, MagicMock(), mock_session)

            model_names = [r.model_name for r in results]
            assert model_names == sorted(model_names, key=str.lower)

    def test_empty_data_raises_onyx_error(self) -> None:
        """Test that empty model list raises OnyxError."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = {"data": []}
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="test-key",
            )
            with pytest.raises(OnyxError, match="No models found"):
                get_litellm_available_models(request, MagicMock(), mock_session)

    def test_missing_data_key_raises_onyx_error(self) -> None:
        """Test that response without 'data' key raises OnyxError."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = {}
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="test-key",
            )
            with pytest.raises(OnyxError):
                get_litellm_available_models(request, MagicMock(), mock_session)

    def test_skips_unparseable_entries(self) -> None:
        """Test that malformed model entries are skipped without failing."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()
        response_with_bad_entry = {
            "data": [
                {
                    "id": "gpt-4o",
                    "object": "model",
                    "created": 1700000000,
                    "owned_by": "openai",
                },
                # Missing required fields
                {"bad_field": "bad_value"},
            ]
        }

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = response_with_bad_entry
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="test-key",
            )
            results = get_litellm_available_models(request, MagicMock(), mock_session)

            assert len(results) == 1
            assert results[0].model_name == "gpt-4o"

    def test_all_entries_unparseable_raises_onyx_error(self) -> None:
        """Test that OnyxError is raised when all entries fail to parse."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()
        response_all_bad = {
            "data": [
                {"bad_field": "bad_value"},
                {"another_bad": 123},
            ]
        }

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = response_all_bad
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="test-key",
            )
            with pytest.raises(OnyxError, match="No compatible models"):
                get_litellm_available_models(request, MagicMock(), mock_session)

    def test_api_base_trailing_slash_handled(self) -> None:
        """Test that trailing slashes in api_base are handled correctly."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()
        mock_litellm_response = {
            "data": [
                {
                    "id": "gpt-4o",
                    "object": "model",
                    "created": 1700000000,
                    "owned_by": "openai",
                },
            ]
        }

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_litellm_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = LitellmModelsRequest(
                api_base="http://localhost:4000/",
                api_key="test-key",
            )
            get_litellm_available_models(request, MagicMock(), mock_session)

            # Should call /v1/models without double slashes
            call_args = mock_get.call_args
            assert call_args[0][0] == "http://localhost:4000/v1/models"

    def test_connection_failure_raises_onyx_error(self) -> None:
        """Test that connection failures are wrapped in OnyxError."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_get.side_effect = httpx.ConnectError(
                "Connection refused", request=MagicMock()
            )

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="test-key",
            )
            with pytest.raises(OnyxError, match="Failed to fetch LiteLLM proxy models"):
                get_litellm_available_models(request, MagicMock(), mock_session)

    def test_401_raises_authentication_error(self) -> None:
        """Test that a 401 response raises OnyxError with authentication message."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.status_code = 401
            mock_get.side_effect = httpx.HTTPStatusError(
                "Unauthorized", request=MagicMock(), response=mock_response
            )

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="bad-key",
            )
            with pytest.raises(OnyxError, match="Authentication failed"):
                get_litellm_available_models(request, MagicMock(), mock_session)

    def test_404_raises_not_found_error(self) -> None:
        """Test that a 404 response raises OnyxError with endpoint not found message."""
        from onyx.server.manage.llm.api import get_litellm_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.status_code = 404
            mock_get.side_effect = httpx.HTTPStatusError(
                "Not Found", request=MagicMock(), response=mock_response
            )

            request = LitellmModelsRequest(
                api_base="http://localhost:4000",
                api_key="test-key",
            )
            with pytest.raises(OnyxError, match="endpoint not found"):
                get_litellm_available_models(request, MagicMock(), mock_session)


class TestGetBifrostAvailableModels:
    """Tests for the Bifrost model fetch endpoint."""

    @pytest.fixture
    def mock_bifrost_response(self) -> dict:
        """Mock response from Bifrost /v1/models endpoint."""
        return {
            "data": [
                {
                    "id": "anthropic/claude-3-5-sonnet",
                    "name": "Claude 3.5 Sonnet",
                    "context_length": 200000,
                },
                {
                    "id": "openai/gpt-4o",
                    "name": "GPT-4o",
                    "context_length": 128000,
                },
                {
                    "id": "deepseek/deepseek-r1",
                    "name": "DeepSeek R1",
                    "context_length": 64000,
                },
            ]
        }

    def test_returns_model_list(self, mock_bifrost_response: dict) -> None:
        """Test that endpoint returns properly formatted non-embedding models."""
        from onyx.server.manage.llm.api import get_bifrost_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_bifrost_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = BifrostModelsRequest(api_base="https://bifrost.example.com")
            results = get_bifrost_available_models(request, MagicMock(), mock_session)

            assert len(results) == 3
            assert all(isinstance(r, BifrostFinalModelResponse) for r in results)
            assert [r.name for r in results] == sorted(
                [r.name for r in results], key=str.lower
            )

    def test_infers_vision_support(self, mock_bifrost_response: dict) -> None:
        """Test that vision support is inferred from provider/model IDs."""
        from onyx.server.manage.llm.api import get_bifrost_available_models

        mock_session = MagicMock()

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = mock_bifrost_response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = BifrostModelsRequest(api_base="https://bifrost.example.com")
            results = get_bifrost_available_models(request, MagicMock(), mock_session)

            claude = next(r for r in results if r.name == "anthropic/claude-3-5-sonnet")
            gpt4o = next(r for r in results if r.name == "openai/gpt-4o")
            deepseek = next(r for r in results if r.name == "deepseek/deepseek-r1")

            assert claude.supports_image_input is True
            assert gpt4o.supports_image_input is True
            assert deepseek.supports_image_input is False

    def test_existing_v1_suffix_is_not_duplicated(self) -> None:
        """Test that an existing /v1 suffix still hits a single /v1/models endpoint."""
        from onyx.server.manage.llm.api import get_bifrost_available_models

        mock_session = MagicMock()
        response = {"data": [{"id": "openai/gpt-4o", "name": "GPT-4o"}]}

        with patch("onyx.server.manage.llm.api.httpx.get") as mock_get:
            mock_response = MagicMock()
            mock_response.json.return_value = response
            mock_response.raise_for_status = MagicMock()
            mock_get.return_value = mock_response

            request = BifrostModelsRequest(api_base="https://bifrost.example.com/v1")
            get_bifrost_available_models(request, MagicMock(), mock_session)

            called_url = mock_get.call_args[0][0]
            assert called_url == "https://bifrost.example.com/v1/models"

    def test_request_failure_is_logged_and_wrapped(self) -> None:
        """Test that request-layer failures are logged before raising OnyxError."""
        from onyx.server.manage.llm.api import get_bifrost_available_models

        mock_session = MagicMock()

        with (
            patch("onyx.server.manage.llm.api.httpx.get") as mock_get,
            patch("onyx.server.manage.llm.api.logger.warning") as mock_warning,
        ):
            mock_get.side_effect = httpx.ConnectError(
                "Connection refused", request=MagicMock()
            )

            request = BifrostModelsRequest(api_base="https://bifrost.example.com")
            with pytest.raises(OnyxError, match="Failed to fetch Bifrost models"):
                get_bifrost_available_models(request, MagicMock(), mock_session)

            mock_warning.assert_called_once()


================================================
FILE: backend/tests/unit/onyx/server/manage/llm/test_llm_provider_utils.py
================================================
"""Tests for LLM provider utilities."""

from onyx.server.manage.llm.utils import generate_bedrock_display_name
from onyx.server.manage.llm.utils import generate_ollama_display_name
from onyx.server.manage.llm.utils import infer_vision_support
from onyx.server.manage.llm.utils import is_embedding_model
from onyx.server.manage.llm.utils import is_reasoning_model
from onyx.server.manage.llm.utils import is_valid_bedrock_model
from onyx.server.manage.llm.utils import strip_openrouter_vendor_prefix


class TestGenerateBedrockDisplayName:
    """Tests for Bedrock display name generation."""

    def test_claude_model_basic(self) -> None:
        """Test basic Claude model name."""
        result = generate_bedrock_display_name(
            "anthropic.claude-3-5-sonnet-20241022-v2:0"
        )
        assert "Claude" in result
        assert "3.5" in result
        assert "Sonnet" in result

    def test_claude_model_with_region_prefix(self) -> None:
        """Test Claude model with region prefix (cross-region inference)."""
        result = generate_bedrock_display_name(
            "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
        )
        assert "Claude" in result
        assert "(us)" in result

    def test_llama_model(self) -> None:
        """Test Llama model name."""
        result = generate_bedrock_display_name("meta.llama3-70b-instruct-v1:0")
        assert "Llama" in result
        assert "70B" in result or "70b" in result.lower()

    def test_nova_model(self) -> None:
        """Test Amazon Nova model name."""
        result = generate_bedrock_display_name("amazon.nova-pro-v1:0")
        assert "Nova" in result
        assert "Pro" in result

    def test_mistral_model(self) -> None:
        """Test Mistral model name."""
        result = generate_bedrock_display_name("mistral.mistral-large-2407-v1:0")
        assert "Mistral" in result

    def test_removes_version_suffix(self) -> None:
        """Test that version suffixes like :0 are removed."""
        result = generate_bedrock_display_name("anthropic.claude-3-opus:0")
        assert ":0" not in result

    def test_removes_date_stamps(self) -> None:
        """Test that date stamps like -20241022-v2 are removed."""
        result = generate_bedrock_display_name(
            "anthropic.claude-3-5-sonnet-20241022-v2:0"
        )
        assert "20241022" not in result


class TestGenerateOllamaDisplayName:
    """Tests for Ollama display name generation."""

    def test_llama_basic(self) -> None:
        """Test basic Llama model."""
        result = generate_ollama_display_name("llama3:latest")
        assert "Llama" in result

    def test_llama_with_size(self) -> None:
        """Test Llama with size tag."""
        result = generate_ollama_display_name("llama3:70b")
        assert "Llama" in result
        assert "70B" in result

    def test_qwen_model(self) -> None:
        """Test Qwen model."""
        result = generate_ollama_display_name("qwen2.5:7b")
        assert "Qwen" in result
        assert "7B" in result

    def test_mistral_model(self) -> None:
        """Test Mistral model."""
        result = generate_ollama_display_name("mistral:latest")
        assert "Mistral" in result

    def test_deepseek_model(self) -> None:
        """Test DeepSeek model."""
        result = generate_ollama_display_name("deepseek-r1:14b")
        assert "DeepSeek" in result
        assert "14B" in result

    def test_skips_latest_tag(self) -> None:
        """Test that 'latest' tag is not shown."""
        result = generate_ollama_display_name("llama3:latest")
        assert "latest" not in result.lower()

    def test_version_number_preserved(self) -> None:
        """Test that version numbers like 3.3 are preserved."""
        result = generate_ollama_display_name("llama3.3:70b")
        assert "3.3" in result or "3 3" in result  # Either format is acceptable


class TestStripOpenrouterVendorPrefix:
    """Tests for OpenRouter vendor prefix stripping."""

    def test_strips_matching_prefix(self) -> None:
        """Test stripping matching vendor prefix."""
        result = strip_openrouter_vendor_prefix("Microsoft: Phi 4", "microsoft/phi-4")
        assert result == "Phi 4"

    def test_strips_mistral_prefix(self) -> None:
        """Test stripping Mistral prefix."""
        result = strip_openrouter_vendor_prefix(
            "Mistral: Mixtral 8x7B Instruct", "mistralai/mixtral-8x7b"
        )
        assert result == "Mixtral 8x7B Instruct"

    def test_preserves_when_no_prefix(self) -> None:
        """Test preserving name when no prefix pattern."""
        result = strip_openrouter_vendor_prefix(
            "Claude 3.5 Sonnet", "anthropic/claude-3.5-sonnet"
        )
        assert result == "Claude 3.5 Sonnet"

    def test_preserves_when_no_slash_in_id(self) -> None:
        """Test preserving name when no slash in model ID."""
        result = strip_openrouter_vendor_prefix("Some Model", "some-model")
        assert result == "Some Model"

    def test_handles_partial_vendor_match(self) -> None:
        """Test handling partial vendor name matches."""
        # "Mistral" should match "mistralai"
        result = strip_openrouter_vendor_prefix(
            "Mistral: Some Model", "mistralai/some-model"
        )
        assert result == "Some Model"


class TestIsValidBedrockModel:
    """Tests for Bedrock model validation."""

    def test_valid_claude_model(self) -> None:
        """Test valid Claude model."""
        assert is_valid_bedrock_model("anthropic.claude-3-5-sonnet", True) is True

    def test_invalid_embedding_model(self) -> None:
        """Test that embedding models are filtered."""
        assert is_valid_bedrock_model("amazon.titan-embed-text-v1", True) is False

    def test_invalid_image_model(self) -> None:
        """Test that image generation models are filtered."""
        assert is_valid_bedrock_model("stability.stable-diffusion-xl", True) is False

    def test_invalid_non_streaming(self) -> None:
        """Test that non-streaming models are filtered."""
        assert is_valid_bedrock_model("anthropic.claude-3-sonnet", False) is False

    def test_empty_model_id(self) -> None:
        """Test that empty model ID is invalid."""
        assert is_valid_bedrock_model("", True) is False


class TestInferVisionSupport:
    """Tests for vision support inference."""

    def test_claude_3_has_vision(self) -> None:
        """Test Claude 3 models have vision."""
        assert infer_vision_support("anthropic.claude-3-5-sonnet") is True

    def test_claude_4_has_vision(self) -> None:
        """Test Claude 4 models have vision."""
        assert infer_vision_support("anthropic.claude-4-opus") is True

    def test_nova_pro_has_vision(self) -> None:
        """Test Nova Pro has vision."""
        assert infer_vision_support("amazon.nova-pro-v1") is True

    def test_bifrost_claude_has_vision(self) -> None:
        """Test Bifrost Claude models are recognized as vision-capable."""
        assert infer_vision_support("anthropic/claude-3-5-sonnet") is True

    def test_bifrost_gpt4o_has_vision(self) -> None:
        """Test Bifrost GPT-4o models are recognized as vision-capable."""
        assert infer_vision_support("openai/gpt-4o") is True

    def test_mistral_no_vision(self) -> None:
        """Test Mistral doesn't have vision (not in known list)."""
        assert infer_vision_support("mistral.mistral-large") is False


class TestIsReasoningModel:
    """Tests for reasoning model detection."""

    def test_o1_is_reasoning(self) -> None:
        """Test o1 models are detected as reasoning."""
        assert is_reasoning_model("openai/o1-preview", "O1 Preview") is True

    def test_o3_is_reasoning(self) -> None:
        """Test o3 models are detected as reasoning."""
        assert is_reasoning_model("openai/o3-mini", "O3 Mini") is True

    def test_deepseek_r1_is_reasoning(self) -> None:
        """Test DeepSeek R1 is detected as reasoning."""
        assert is_reasoning_model("deepseek/deepseek-r1", "DeepSeek R1") is True

    def test_qwq_is_reasoning(self) -> None:
        """Test QwQ is detected as reasoning."""
        assert is_reasoning_model("qwen/qwq-32b", "QwQ 32B") is True

    def test_gpt_4_not_reasoning(self) -> None:
        """Test GPT-4 is not detected as reasoning."""
        assert is_reasoning_model("openai/gpt-4", "GPT-4") is False

    def test_claude_not_reasoning(self) -> None:
        """Test Claude is not detected as reasoning."""
        assert (
            is_reasoning_model("anthropic/claude-3-5-sonnet", "Claude 3.5 Sonnet")
            is False
        )


class TestIsEmbeddingModel:
    """Tests for embedding model detection."""

    def test_openai_embedding_ada(self) -> None:
        assert is_embedding_model("text-embedding-ada-002") is True

    def test_openai_embedding_3_small(self) -> None:
        assert is_embedding_model("text-embedding-3-small") is True

    def test_openai_embedding_3_large(self) -> None:
        assert is_embedding_model("text-embedding-3-large") is True

    def test_cohere_embed_model(self) -> None:
        assert is_embedding_model("embed-english-v3.0") is True

    def test_bedrock_titan_embed(self) -> None:
        assert is_embedding_model("amazon.titan-embed-text-v1") is True

    def test_gpt4o_not_embedding(self) -> None:
        assert is_embedding_model("gpt-4o") is False

    def test_gpt4_not_embedding(self) -> None:
        assert is_embedding_model("gpt-4") is False

    def test_dall_e_not_embedding(self) -> None:
        assert is_embedding_model("dall-e-3") is False

    def test_unknown_custom_model_not_embedding(self) -> None:
        """Custom/local models not in litellm's model DB should default to False."""
        assert is_embedding_model("my-custom-local-model-v1") is False


================================================
FILE: backend/tests/unit/onyx/server/manage/test_bulk_invite_limit.py
================================================
"""Test bulk invite limit for free trial tenants."""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from fastapi import HTTPException

from onyx.server.manage.models import EmailInviteStatus
from onyx.server.manage.users import bulk_invite_users


@patch("onyx.server.manage.users.MULTI_TENANT", True)
@patch("onyx.server.manage.users.is_tenant_on_trial_fn", return_value=True)
@patch("onyx.server.manage.users.get_current_tenant_id", return_value="test_tenant")
@patch("onyx.server.manage.users.get_invited_users", return_value=[])
@patch("onyx.server.manage.users.get_all_users", return_value=[])
@patch("onyx.server.manage.users.NUM_FREE_TRIAL_USER_INVITES", 5)
def test_trial_tenant_cannot_exceed_invite_limit(*_mocks: None) -> None:
    """Trial tenants cannot invite more users than the configured limit."""
    emails = [f"user{i}@example.com" for i in range(6)]

    with pytest.raises(HTTPException) as exc_info:
        bulk_invite_users(emails=emails)

    assert exc_info.value.status_code == 403
    assert "invite limit" in exc_info.value.detail.lower()


@patch("onyx.server.manage.users.MULTI_TENANT", True)
@patch("onyx.server.manage.users.DEV_MODE", True)
@patch("onyx.server.manage.users.ENABLE_EMAIL_INVITES", False)
@patch("onyx.server.manage.users.is_tenant_on_trial_fn", return_value=True)
@patch("onyx.server.manage.users.get_current_tenant_id", return_value="test_tenant")
@patch("onyx.server.manage.users.get_invited_users", return_value=[])
@patch("onyx.server.manage.users.get_all_users", return_value=[])
@patch("onyx.server.manage.users.write_invited_users", return_value=3)
@patch("onyx.server.manage.users.enforce_seat_limit")
@patch("onyx.server.manage.users.NUM_FREE_TRIAL_USER_INVITES", 5)
@patch(
    "onyx.server.manage.users.fetch_ee_implementation_or_noop",
    return_value=lambda *_args: None,
)
def test_trial_tenant_can_invite_within_limit(*_mocks: None) -> None:
    """Trial tenants can invite users when under the limit."""
    emails = ["user1@example.com", "user2@example.com", "user3@example.com"]

    result = bulk_invite_users(emails=emails)

    assert result.invited_count == 3
    assert result.email_invite_status == EmailInviteStatus.DISABLED


# --- email_invite_status tests ---

_COMMON_PATCHES = [
    patch("onyx.server.manage.users.MULTI_TENANT", False),
    patch("onyx.server.manage.users.get_current_tenant_id", return_value="test_tenant"),
    patch("onyx.server.manage.users.get_invited_users", return_value=[]),
    patch("onyx.server.manage.users.get_all_users", return_value=[]),
    patch("onyx.server.manage.users.write_invited_users", return_value=1),
    patch("onyx.server.manage.users.enforce_seat_limit"),
]


def _with_common_patches(fn: object) -> object:
    for p in reversed(_COMMON_PATCHES):
        fn = p(fn)  # type: ignore
    return fn


@_with_common_patches
@patch("onyx.server.manage.users.ENABLE_EMAIL_INVITES", False)
def test_email_invite_status_disabled(*_mocks: None) -> None:
    """When email invites are disabled, status is disabled."""
    result = bulk_invite_users(emails=["user@example.com"])

    assert result.email_invite_status == EmailInviteStatus.DISABLED


@_with_common_patches
@patch("onyx.server.manage.users.ENABLE_EMAIL_INVITES", True)
@patch("onyx.server.manage.users.EMAIL_CONFIGURED", False)
def test_email_invite_status_not_configured(*_mocks: None) -> None:
    """When email invites are enabled but no server is configured, status is not_configured."""
    result = bulk_invite_users(emails=["user@example.com"])

    assert result.email_invite_status == EmailInviteStatus.NOT_CONFIGURED


@_with_common_patches
@patch("onyx.server.manage.users.ENABLE_EMAIL_INVITES", True)
@patch("onyx.server.manage.users.EMAIL_CONFIGURED", True)
@patch("onyx.server.manage.users.send_user_email_invite")
def test_email_invite_status_sent(mock_send: MagicMock, *_mocks: None) -> None:
    """When email invites are enabled and configured, status is sent."""
    result = bulk_invite_users(emails=["user@example.com"])

    mock_send.assert_called_once()
    assert result.email_invite_status == EmailInviteStatus.SENT


@_with_common_patches
@patch("onyx.server.manage.users.ENABLE_EMAIL_INVITES", True)
@patch("onyx.server.manage.users.EMAIL_CONFIGURED", True)
@patch(
    "onyx.server.manage.users.send_user_email_invite",
    side_effect=Exception("SMTP auth failed"),
)
def test_email_invite_status_send_failed(*_mocks: None) -> None:
    """When email sending throws, status is send_failed and invite is still saved."""
    result = bulk_invite_users(emails=["user@example.com"])

    assert result.email_invite_status == EmailInviteStatus.SEND_FAILED
    assert result.invited_count == 1


================================================
FILE: backend/tests/unit/onyx/server/manage/voice/test_voice_api_validation.py
================================================
import pytest

from onyx.error_handling.exceptions import OnyxError
from onyx.server.manage.voice.api import _validate_voice_api_base


def test_validate_voice_api_base_blocks_private_for_non_azure() -> None:
    with pytest.raises(OnyxError, match="Invalid target URI"):
        _validate_voice_api_base("openai", "http://127.0.0.1:11434")


def test_validate_voice_api_base_allows_private_for_azure() -> None:
    validated = _validate_voice_api_base("azure", "http://127.0.0.1:5000")
    assert validated == "http://127.0.0.1:5000"


def test_validate_voice_api_base_blocks_metadata_for_azure() -> None:
    with pytest.raises(OnyxError, match="Invalid target URI"):
        _validate_voice_api_base("azure", "http://metadata.google.internal/")


def test_validate_voice_api_base_returns_none_for_none() -> None:
    assert _validate_voice_api_base("openai", None) is None


================================================
FILE: backend/tests/unit/onyx/server/scim/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/server/scim/conftest.py
================================================
"""Shared fixtures for SCIM endpoint unit tests."""

from __future__ import annotations

import json
from collections.abc import Generator
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session

from ee.onyx.server.scim.api import ScimJSONResponse
from ee.onyx.server.scim.models import ScimGroupResource
from ee.onyx.server.scim.models import ScimListResponse
from ee.onyx.server.scim.models import ScimName
from ee.onyx.server.scim.models import ScimUserResource
from ee.onyx.server.scim.providers.base import ScimProvider
from ee.onyx.server.scim.providers.entra import EntraProvider
from ee.onyx.server.scim.providers.okta import OktaProvider
from onyx.db.models import ScimToken
from onyx.db.models import ScimUserMapping
from onyx.db.models import User
from onyx.db.models import UserGroup
from onyx.db.models import UserRole

# Every supported SCIM provider must appear here so that all endpoint tests
# run against it.  When adding a new provider, add its class to this list.
SCIM_PROVIDERS: list[type[ScimProvider]] = [OktaProvider, EntraProvider]


@pytest.fixture
def mock_db_session() -> MagicMock:
    """A MagicMock standing in for a SQLAlchemy Session."""
    return MagicMock(spec=Session)


@pytest.fixture
def mock_token() -> MagicMock:
    """A MagicMock standing in for a verified ScimToken."""
    token = MagicMock(spec=ScimToken)
    token.id = 1
    return token


@pytest.fixture(params=SCIM_PROVIDERS, ids=[p.__name__ for p in SCIM_PROVIDERS])
def provider(request: pytest.FixtureRequest) -> ScimProvider:
    """Parameterized provider — runs each test with every provider in SCIM_PROVIDERS."""
    return request.param()


@pytest.fixture
def mock_dal() -> Generator[MagicMock, None, None]:
    """Patch ScimDAL construction in api module and yield the mock instance."""
    with patch("ee.onyx.server.scim.api.ScimDAL") as cls:
        dal = cls.return_value
        # User defaults
        dal.get_user.return_value = None
        dal.get_user_by_email.return_value = None
        dal.get_user_mapping_by_user_id.return_value = None
        dal.get_user_mapping_by_external_id.return_value = None
        dal.list_users.return_value = ([], 0)
        # Group defaults
        dal.get_group.return_value = None
        dal.get_group_by_name.return_value = None
        dal.get_group_mapping_by_group_id.return_value = None
        dal.get_group_mapping_by_external_id.return_value = None
        dal.get_group_members.return_value = []
        dal.list_groups.return_value = ([], 0)
        # User-group relationship defaults
        dal.get_user_groups.return_value = []
        dal.get_users_groups_batch.return_value = {}
        yield dal


def make_scim_user(**kwargs: Any) -> ScimUserResource:
    """Build a ScimUserResource with sensible defaults."""
    defaults: dict[str, Any] = {
        "userName": "test@example.com",
        "externalId": "ext-default",
        "active": True,
        "name": ScimName(givenName="Test", familyName="User"),
    }
    defaults.update(kwargs)
    return ScimUserResource(**defaults)


def make_scim_group(**kwargs: Any) -> ScimGroupResource:
    """Build a ScimGroupResource with sensible defaults."""
    defaults: dict[str, Any] = {"displayName": "Engineering"}
    defaults.update(kwargs)
    return ScimGroupResource(**defaults)


def make_db_user(**kwargs: Any) -> MagicMock:
    """Build a mock User ORM object with configurable attributes."""
    user = MagicMock(spec=User)
    user.id = kwargs.get("id", uuid4())
    user.email = kwargs.get("email", "test@example.com")
    user.is_active = kwargs.get("is_active", True)
    user.personal_name = kwargs.get("personal_name", "Test User")
    user.role = kwargs.get("role", UserRole.BASIC)
    return user


def make_db_group(**kwargs: Any) -> MagicMock:
    """Build a mock UserGroup ORM object with configurable attributes."""
    group = MagicMock(spec=UserGroup)
    group.id = kwargs.get("id", 1)
    group.name = kwargs.get("name", "Engineering")
    group.is_up_for_deletion = kwargs.get("is_up_for_deletion", False)
    group.is_up_to_date = kwargs.get("is_up_to_date", True)
    group.is_default = kwargs.get("is_default", False)
    return group


def make_user_mapping(**kwargs: Any) -> MagicMock:
    """Build a mock ScimUserMapping ORM object with configurable attributes."""
    mapping = MagicMock(spec=ScimUserMapping)
    mapping.id = kwargs.get("id", 1)
    mapping.external_id = kwargs.get("external_id", "ext-default")
    mapping.user_id = kwargs.get("user_id", uuid4())
    mapping.scim_username = kwargs.get("scim_username", None)
    mapping.department = kwargs.get("department", None)
    mapping.manager = kwargs.get("manager", None)
    mapping.given_name = kwargs.get("given_name", None)
    mapping.family_name = kwargs.get("family_name", None)
    mapping.scim_emails_json = kwargs.get("scim_emails_json", None)
    return mapping


def assert_scim_error(result: object, expected_status: int) -> None:
    """Assert *result* is a JSONResponse with the given status code."""
    assert isinstance(result, JSONResponse)
    assert result.status_code == expected_status


# ---------------------------------------------------------------------------
# Response parsing helpers
# ---------------------------------------------------------------------------


def parse_scim_user(result: object, *, status: int = 200) -> ScimUserResource:
    """Assert *result* is a ScimJSONResponse and parse as ScimUserResource."""
    assert isinstance(
        result, ScimJSONResponse
    ), f"Expected ScimJSONResponse, got {type(result).__name__}"
    assert result.status_code == status
    return ScimUserResource.model_validate(json.loads(result.body))


def parse_scim_group(result: object, *, status: int = 200) -> ScimGroupResource:
    """Assert *result* is a ScimJSONResponse and parse as ScimGroupResource."""
    assert isinstance(
        result, ScimJSONResponse
    ), f"Expected ScimJSONResponse, got {type(result).__name__}"
    assert result.status_code == status
    return ScimGroupResource.model_validate(json.loads(result.body))


def parse_scim_list(result: object) -> ScimListResponse:
    """Assert *result* is a ScimJSONResponse and parse as ScimListResponse."""
    assert isinstance(
        result, ScimJSONResponse
    ), f"Expected ScimJSONResponse, got {type(result).__name__}"
    assert result.status_code == 200
    return ScimListResponse.model_validate(json.loads(result.body))


================================================
FILE: backend/tests/unit/onyx/server/scim/test_admin.py
================================================
"""Tests for SCIM admin token management endpoints."""

from datetime import datetime
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest
from fastapi import HTTPException
from sqlalchemy.orm import Session

from ee.onyx.db.scim import ScimDAL
from ee.onyx.server.enterprise_settings.api import create_scim_token
from ee.onyx.server.enterprise_settings.api import get_active_scim_token
from ee.onyx.server.scim.models import ScimTokenCreate
from onyx.db.models import ScimToken
from onyx.db.models import User


@pytest.fixture
def mock_db_session() -> MagicMock:
    return MagicMock(spec=Session)


@pytest.fixture
def scim_dal(mock_db_session: MagicMock) -> ScimDAL:
    return ScimDAL(mock_db_session)


@pytest.fixture
def admin_user() -> User:
    user = User(id=uuid4(), email="admin@test.com")
    user.is_active = True
    return user


def _make_token(token_id: int, name: str, *, is_active: bool = True) -> ScimToken:
    return ScimToken(
        id=token_id,
        name=name,
        hashed_token="h" * 64,
        token_display="onyx_scim_****abcd",
        is_active=is_active,
        created_by_id=uuid4(),
        created_at=datetime(2026, 1, 1),
        last_used_at=None,
    )


class TestGetActiveToken:
    def test_returns_token_metadata(self, scim_dal: ScimDAL, admin_user: User) -> None:
        token = _make_token(1, "prod-token")
        scim_dal._session.scalar.return_value = token  # type: ignore[attr-defined]

        result = get_active_scim_token(_=admin_user, dal=scim_dal)

        assert result.id == 1
        assert result.name == "prod-token"
        assert result.is_active is True

    def test_raises_404_when_no_active_token(
        self, scim_dal: ScimDAL, admin_user: User
    ) -> None:
        scim_dal._session.scalar.return_value = None  # type: ignore[attr-defined]

        with pytest.raises(HTTPException) as exc_info:
            get_active_scim_token(_=admin_user, dal=scim_dal)

        assert exc_info.value.status_code == 404


class TestCreateToken:
    @patch("ee.onyx.server.enterprise_settings.api.generate_scim_token")
    def test_creates_token_and_revokes_previous(
        self,
        mock_generate: MagicMock,
        scim_dal: ScimDAL,
        admin_user: User,
    ) -> None:
        mock_generate.return_value = ("raw_token_val", "hashed_val", "****abcd")

        # Simulate one existing active token that should get revoked
        existing = _make_token(1, "old-token", is_active=True)
        scim_dal._session.scalars.return_value.all.return_value = [existing]  # type: ignore[attr-defined]

        # Simulate DB defaults that would be set on INSERT/flush
        def fake_add(obj: ScimToken) -> None:
            obj.id = 2
            obj.is_active = True
            obj.created_at = datetime(2026, 2, 1)

        scim_dal._session.add.side_effect = fake_add  # type: ignore[attr-defined]

        body = ScimTokenCreate(name="new-token")
        result = create_scim_token(body=body, user=admin_user, dal=scim_dal)

        # Previous token was revoked (by create_token's internal revocation)
        assert existing.is_active is False

        # New token returned with raw value
        assert result.raw_token == "raw_token_val"
        assert result.name == "new-token"
        assert result.is_active is True

        # Session was committed
        scim_dal._session.commit.assert_called_once()  # type: ignore[attr-defined]

    @patch("ee.onyx.server.enterprise_settings.api.generate_scim_token")
    def test_creates_first_token_when_none_exist(
        self,
        mock_generate: MagicMock,
        scim_dal: ScimDAL,
        admin_user: User,
    ) -> None:
        mock_generate.return_value = ("raw_token_val", "hashed_val", "****abcd")

        # No existing tokens
        scim_dal._session.scalars.return_value.all.return_value = []  # type: ignore[attr-defined]

        def fake_add(obj: ScimToken) -> None:
            obj.id = 1
            obj.is_active = True
            obj.created_at = datetime(2026, 2, 1)

        scim_dal._session.add.side_effect = fake_add  # type: ignore[attr-defined]

        body = ScimTokenCreate(name="first-token")
        result = create_scim_token(body=body, user=admin_user, dal=scim_dal)

        assert result.raw_token == "raw_token_val"
        assert result.name == "first-token"
        assert result.is_active is True


================================================
FILE: backend/tests/unit/onyx/server/scim/test_auth.py
================================================
from unittest.mock import MagicMock

import pytest

from ee.onyx.server.scim.auth import _hash_scim_token
from ee.onyx.server.scim.auth import generate_scim_token
from ee.onyx.server.scim.auth import SCIM_TOKEN_PREFIX
from ee.onyx.server.scim.auth import ScimAuthError
from ee.onyx.server.scim.auth import verify_scim_token


class TestGenerateScimToken:
    def test_returns_three_strings(self) -> None:
        raw, hashed, display = generate_scim_token()
        assert isinstance(raw, str)
        assert isinstance(hashed, str)
        assert isinstance(display, str)

    def test_raw_token_has_prefix(self) -> None:
        raw, _, _ = generate_scim_token()
        assert raw.startswith(SCIM_TOKEN_PREFIX)

    def test_hash_is_sha256_hex(self) -> None:
        raw, hashed, _ = generate_scim_token()
        assert len(hashed) == 64
        assert hashed == _hash_scim_token(raw)

    def test_display_shows_last_four_chars(self) -> None:
        raw, _, display = generate_scim_token()
        assert display.endswith(raw[-4:])
        assert "****" in display

    def test_tokens_are_unique(self) -> None:
        tokens = {generate_scim_token()[0] for _ in range(10)}
        assert len(tokens) == 10


class TestHashScimToken:
    def test_deterministic(self) -> None:
        assert _hash_scim_token("test") == _hash_scim_token("test")

    def test_different_inputs_different_hashes(self) -> None:
        assert _hash_scim_token("a") != _hash_scim_token("b")


class TestVerifyScimToken:
    def _make_request(self, auth_header: str | None = None) -> MagicMock:
        request = MagicMock()
        headers: dict[str, str] = {}
        if auth_header is not None:
            headers["Authorization"] = auth_header
        request.headers = headers
        return request

    def _make_dal(self, token: MagicMock | None = None) -> MagicMock:
        dal = MagicMock()
        dal.get_token_by_hash.return_value = token
        return dal

    def test_missing_header_raises_401(self) -> None:
        request = self._make_request(None)
        dal = self._make_dal()
        with pytest.raises(ScimAuthError) as exc_info:
            verify_scim_token(request, dal)
        assert exc_info.value.status_code == 401
        assert "Missing" in str(exc_info.value.detail)

    def test_wrong_prefix_raises_401(self) -> None:
        request = self._make_request("Bearer on_some_api_key")
        dal = self._make_dal()
        with pytest.raises(ScimAuthError) as exc_info:
            verify_scim_token(request, dal)
        assert exc_info.value.status_code == 401

    def test_token_not_in_db_raises_401(self) -> None:
        raw, _, _ = generate_scim_token()
        request = self._make_request(f"Bearer {raw}")
        dal = self._make_dal(token=None)
        with pytest.raises(ScimAuthError) as exc_info:
            verify_scim_token(request, dal)
        assert exc_info.value.status_code == 401
        assert "Invalid" in str(exc_info.value.detail)

    def test_inactive_token_raises_401(self) -> None:
        raw, _, _ = generate_scim_token()
        request = self._make_request(f"Bearer {raw}")
        mock_token = MagicMock()
        mock_token.is_active = False
        dal = self._make_dal(token=mock_token)
        with pytest.raises(ScimAuthError) as exc_info:
            verify_scim_token(request, dal)
        assert exc_info.value.status_code == 401
        assert "revoked" in str(exc_info.value.detail)

    def test_valid_token_returns_token(self) -> None:
        raw, _, _ = generate_scim_token()
        request = self._make_request(f"Bearer {raw}")
        mock_token = MagicMock()
        mock_token.is_active = True
        dal = self._make_dal(token=mock_token)
        result = verify_scim_token(request, dal)
        assert result is mock_token
        dal.get_token_by_hash.assert_called_once()


================================================
FILE: backend/tests/unit/onyx/server/scim/test_entra.py
================================================
"""Comprehensive Entra ID (Azure AD) SCIM compatibility tests.

Covers the full Entra provisioning lifecycle: service discovery, user CRUD
with enterprise extension schema, group CRUD with excludedAttributes, and
all Entra-specific behavioral quirks (PascalCase ops, enterprise URN in
PATCH value dicts).
"""

from __future__ import annotations

import json
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest
from fastapi import Response

from ee.onyx.server.scim.api import create_user
from ee.onyx.server.scim.api import delete_user
from ee.onyx.server.scim.api import get_group
from ee.onyx.server.scim.api import get_resource_types
from ee.onyx.server.scim.api import get_schemas
from ee.onyx.server.scim.api import get_service_provider_config
from ee.onyx.server.scim.api import get_user
from ee.onyx.server.scim.api import list_groups
from ee.onyx.server.scim.api import list_users
from ee.onyx.server.scim.api import patch_group
from ee.onyx.server.scim.api import patch_user
from ee.onyx.server.scim.api import replace_user
from ee.onyx.server.scim.api import ScimJSONResponse
from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
from ee.onyx.server.scim.models import SCIM_USER_SCHEMA
from ee.onyx.server.scim.models import ScimEnterpriseExtension
from ee.onyx.server.scim.models import ScimGroupMember
from ee.onyx.server.scim.models import ScimGroupResource
from ee.onyx.server.scim.models import ScimManagerRef
from ee.onyx.server.scim.models import ScimMappingFields
from ee.onyx.server.scim.models import ScimName
from ee.onyx.server.scim.models import ScimPatchOperation
from ee.onyx.server.scim.models import ScimPatchOperationType
from ee.onyx.server.scim.models import ScimPatchRequest
from ee.onyx.server.scim.models import ScimPatchResourceValue
from ee.onyx.server.scim.models import ScimUserResource
from ee.onyx.server.scim.providers.base import ScimProvider
from ee.onyx.server.scim.providers.entra import EntraProvider
from tests.unit.onyx.server.scim.conftest import make_db_group
from tests.unit.onyx.server.scim.conftest import make_db_user
from tests.unit.onyx.server.scim.conftest import make_scim_user
from tests.unit.onyx.server.scim.conftest import make_user_mapping
from tests.unit.onyx.server.scim.conftest import parse_scim_group
from tests.unit.onyx.server.scim.conftest import parse_scim_list
from tests.unit.onyx.server.scim.conftest import parse_scim_user


@pytest.fixture
def entra_provider() -> ScimProvider:
    """An EntraProvider instance for Entra-specific endpoint tests."""
    return EntraProvider()


# ---------------------------------------------------------------------------
# Service Discovery
# ---------------------------------------------------------------------------


class TestEntraServiceDiscovery:
    """Entra expects enterprise extension in discovery endpoints."""

    def test_service_provider_config_advertises_patch(self) -> None:
        config = get_service_provider_config()
        assert config.patch.supported is True

    def test_resource_types_include_enterprise_extension(self) -> None:
        result = get_resource_types()
        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        assert "Resources" in parsed
        user_type = next(rt for rt in parsed["Resources"] if rt["id"] == "User")
        extension_schemas = [ext["schema"] for ext in user_type["schemaExtensions"]]
        assert SCIM_ENTERPRISE_USER_SCHEMA in extension_schemas

    def test_schemas_include_enterprise_user(self) -> None:
        result = get_schemas()
        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        schema_ids = [s["id"] for s in parsed["Resources"]]
        assert SCIM_ENTERPRISE_USER_SCHEMA in schema_ids

    def test_enterprise_schema_has_expected_attributes(self) -> None:
        result = get_schemas()
        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        enterprise = next(
            s for s in parsed["Resources"] if s["id"] == SCIM_ENTERPRISE_USER_SCHEMA
        )
        attr_names = {a["name"] for a in enterprise["attributes"]}
        assert "department" in attr_names
        assert "manager" in attr_names

    def test_service_discovery_content_type(self) -> None:
        """SCIM responses must use application/scim+json content type."""
        result = get_resource_types()
        assert isinstance(result, ScimJSONResponse)
        assert result.media_type == "application/scim+json"


# ---------------------------------------------------------------------------
# User Lifecycle (Entra-specific)
# ---------------------------------------------------------------------------


class TestEntraUserLifecycle:
    """Test user CRUD through Entra's lens: enterprise schemas, PascalCase ops."""

    @patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
    def test_create_user_includes_enterprise_schema(
        self,
        mock_seats: MagicMock,  # noqa: ARG002
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        mock_dal.get_user_by_email.return_value = None
        resource = make_scim_user(userName="alice@contoso.com")

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result, status=201)
        assert SCIM_ENTERPRISE_USER_SCHEMA in resource.schemas
        assert SCIM_USER_SCHEMA in resource.schemas

    @patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
    def test_create_user_with_enterprise_extension(
        self,
        mock_seats: MagicMock,  # noqa: ARG002
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """Enterprise extension department/manager should round-trip on create."""
        mock_dal.get_user_by_email.return_value = None
        resource = make_scim_user(
            userName="alice@contoso.com",
            enterprise_extension=ScimEnterpriseExtension(
                department="Engineering",
                manager=ScimManagerRef(value="mgr-uuid-123"),
            ),
        )

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result, status=201)
        assert resource.enterprise_extension is not None
        assert resource.enterprise_extension.department == "Engineering"
        assert resource.enterprise_extension.manager is not None
        assert resource.enterprise_extension.manager.value == "mgr-uuid-123"

        # Verify DAL received the enterprise fields
        mock_dal.create_user_mapping.assert_called_once()
        call_kwargs = mock_dal.create_user_mapping.call_args[1]
        assert call_kwargs["fields"] == ScimMappingFields(
            department="Engineering",
            manager="mgr-uuid-123",
            given_name="Test",
            family_name="User",
        )

    def test_get_user_includes_enterprise_schema(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        user = make_db_user(email="alice@contoso.com")
        mock_dal.get_user.return_value = user

        result = get_user(
            user_id=str(user.id),
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result)
        assert SCIM_ENTERPRISE_USER_SCHEMA in resource.schemas

    def test_get_user_returns_enterprise_extension_data(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """GET should return stored enterprise extension data."""
        user = make_db_user(email="alice@contoso.com")
        mock_dal.get_user.return_value = user
        mapping = make_user_mapping(user_id=user.id)
        mapping.department = "Sales"
        mapping.manager = "mgr-456"
        mock_dal.get_user_mapping_by_user_id.return_value = mapping

        result = get_user(
            user_id=str(user.id),
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result)
        assert resource.enterprise_extension is not None
        assert resource.enterprise_extension.department == "Sales"
        assert resource.enterprise_extension.manager is not None
        assert resource.enterprise_extension.manager.value == "mgr-456"

    def test_list_users_includes_enterprise_schema(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        user = make_db_user(email="alice@contoso.com")
        mapping = make_user_mapping(external_id="entra-ext-1", user_id=user.id)
        mock_dal.list_users.return_value = ([(user, mapping)], 1)

        result = list_users(
            filter=None,
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parsed = parse_scim_list(result)
        resource = parsed.Resources[0]
        assert isinstance(resource, ScimUserResource)
        assert SCIM_ENTERPRISE_USER_SCHEMA in resource.schemas

    def test_patch_user_deactivate_with_pascal_case_replace(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """Entra sends ``"Replace"`` (PascalCase) instead of ``"replace"``."""
        user = make_db_user(is_active=True)
        mock_dal.get_user.return_value = user
        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op="Replace",  # type: ignore[arg-type]
                    path="active",
                    value=False,
                )
            ]
        )

        result = patch_user(
            user_id=str(user.id),
            patch_request=patch_req,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        # Mock doesn't propagate the change, so verify via the DAL call
        mock_dal.update_user.assert_called_once()
        call_kwargs = mock_dal.update_user.call_args
        assert call_kwargs[1]["is_active"] is False

    def test_patch_user_add_external_id_with_pascal_case(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """Entra sends ``"Add"`` (PascalCase) instead of ``"add"``."""
        user = make_db_user()
        mock_dal.get_user.return_value = user
        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op="Add",  # type: ignore[arg-type]
                    path="externalId",
                    value="entra-ext-999",
                )
            ]
        )

        result = patch_user(
            user_id=str(user.id),
            patch_request=patch_req,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        # Verify the patched externalId was synced to the DAL
        mock_dal.sync_user_external_id.assert_called_once()
        call_args = mock_dal.sync_user_external_id.call_args
        assert call_args[0][1] == "entra-ext-999"

    def test_patch_user_enterprise_extension_in_value_dict(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """Entra sends enterprise extension URN as key in path-less PATCH value
        dicts — enterprise data should be stored, not ignored."""
        user = make_db_user()
        mock_dal.get_user.return_value = user

        value = ScimPatchResourceValue(active=False)
        assert value.__pydantic_extra__ is not None
        value.__pydantic_extra__[
            "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User"
        ] = {"department": "Engineering"}

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REPLACE,
                    path=None,
                    value=value,
                )
            ]
        )

        result = patch_user(
            user_id=str(user.id),
            patch_request=patch_req,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        # Verify active=False was applied
        mock_dal.update_user.assert_called_once()
        call_kwargs = mock_dal.update_user.call_args
        assert call_kwargs[1]["is_active"] is False
        # Verify enterprise data was passed to DAL
        mock_dal.sync_user_external_id.assert_called_once()
        sync_kwargs = mock_dal.sync_user_external_id.call_args[1]
        assert sync_kwargs["fields"] == ScimMappingFields(
            department="Engineering",
            given_name="Test",
            family_name="User",
            scim_emails_json='[{"value": "test@example.com", "type": "work", "primary": true}]',
        )

    def test_patch_user_remove_external_id(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """PATCH remove op should clear the target field."""
        user = make_db_user()
        mock_dal.get_user.return_value = user
        mapping = make_user_mapping(user_id=user.id)
        mapping.external_id = "ext-to-remove"
        mock_dal.get_user_mapping_by_user_id.return_value = mapping

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REMOVE,
                    path="externalId",
                )
            ]
        )

        result = patch_user(
            user_id=str(user.id),
            patch_request=patch_req,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        # externalId should be cleared (None)
        mock_dal.sync_user_external_id.assert_called_once()
        call_args = mock_dal.sync_user_external_id.call_args
        assert call_args[0][1] is None

    def test_patch_user_emails_primary_eq_true_value(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """PATCH with path emails[primary eq true].value should update
        the primary email entry, not userName."""
        user = make_db_user(email="old@contoso.com")
        mock_dal.get_user.return_value = user

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REPLACE,
                    path="emails[primary eq true].value",
                    value="new@contoso.com",
                )
            ]
        )

        result = patch_user(
            user_id=str(user.id),
            patch_request=patch_req,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result)
        # userName should remain unchanged — emails and userName are separate
        assert resource.userName == "old@contoso.com"
        # Primary email should be updated
        primary_emails = [e for e in resource.emails if e.primary]
        assert len(primary_emails) == 1
        assert primary_emails[0].value == "new@contoso.com"

    def test_patch_user_enterprise_urn_department_path(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """PATCH with dotted enterprise URN path should store department."""
        user = make_db_user()
        mock_dal.get_user.return_value = user

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REPLACE,
                    path="urn:ietf:params:scim:schemas:extension:enterprise:2.0:User:department",
                    value="Marketing",
                )
            ]
        )

        result = patch_user(
            user_id=str(user.id),
            patch_request=patch_req,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        mock_dal.sync_user_external_id.assert_called_once()
        sync_kwargs = mock_dal.sync_user_external_id.call_args[1]
        assert sync_kwargs["fields"] == ScimMappingFields(
            department="Marketing",
            given_name="Test",
            family_name="User",
            scim_emails_json='[{"value": "test@example.com", "type": "work", "primary": true}]',
        )

    def test_replace_user_includes_enterprise_schema(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        user = make_db_user(email="old@contoso.com")
        mock_dal.get_user.return_value = user
        resource = make_scim_user(
            userName="new@contoso.com",
            name=ScimName(givenName="New", familyName="Name"),
        )

        result = replace_user(
            user_id=str(user.id),
            user_resource=resource,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result)
        assert SCIM_ENTERPRISE_USER_SCHEMA in resource.schemas

    def test_replace_user_with_enterprise_extension(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """PUT with enterprise extension should store the fields."""
        user = make_db_user(email="alice@contoso.com")
        mock_dal.get_user.return_value = user
        resource = make_scim_user(
            userName="alice@contoso.com",
            enterprise_extension=ScimEnterpriseExtension(
                department="HR",
                manager=ScimManagerRef(value="boss-id"),
            ),
        )

        result = replace_user(
            user_id=str(user.id),
            user_resource=resource,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        mock_dal.sync_user_external_id.assert_called_once()
        sync_kwargs = mock_dal.sync_user_external_id.call_args[1]
        assert sync_kwargs["fields"] == ScimMappingFields(
            department="HR",
            manager="boss-id",
            given_name="Test",
            family_name="User",
        )

    def test_delete_user_returns_204(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
    ) -> None:
        user = make_db_user()
        mock_dal.get_user.return_value = user
        mock_dal.get_user_mapping_by_user_id.return_value = MagicMock(id=1)

        result = delete_user(
            user_id=str(user.id),
            _token=mock_token,
            db_session=mock_db_session,
        )

        assert isinstance(result, Response)
        assert result.status_code == 204

    def test_double_delete_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
    ) -> None:
        """Second DELETE should return 404 — the SCIM mapping is gone."""
        user = make_db_user()
        mock_dal.get_user.return_value = user
        # No mapping — user was already deleted from SCIM's perspective
        mock_dal.get_user_mapping_by_user_id.return_value = None

        result = delete_user(
            user_id=str(user.id),
            _token=mock_token,
            db_session=mock_db_session,
        )

        assert isinstance(result, ScimJSONResponse)
        assert result.status_code == 404

    def test_name_formatted_preserved_on_create(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """When name.formatted is provided, it should be used as personal_name."""
        mock_dal.get_user_by_email.return_value = None
        resource = make_scim_user(
            userName="alice@contoso.com",
            name=ScimName(
                givenName="Alice",
                familyName="Smith",
                formatted="Dr. Alice Smith",
            ),
        )

        with patch(
            "ee.onyx.server.scim.api._check_seat_availability", return_value=None
        ):
            result = create_user(
                user_resource=resource,
                _token=mock_token,
                provider=entra_provider,
                db_session=mock_db_session,
            )

        parse_scim_user(result, status=201)
        # The User constructor should have received the formatted name
        mock_dal.add_user.assert_called_once()
        created_user = mock_dal.add_user.call_args[0][0]
        assert created_user.personal_name == "Dr. Alice Smith"


# ---------------------------------------------------------------------------
# Group Lifecycle (Entra-specific)
# ---------------------------------------------------------------------------


class TestEntraGroupLifecycle:
    """Test group CRUD with Entra-specific behaviors."""

    def test_get_group_standard_response(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=10, name="Contoso Engineering")
        mock_dal.get_group.return_value = group
        uid = uuid4()
        mock_dal.get_group_members.return_value = [(uid, "alice@contoso.com")]

        result = get_group(
            group_id="10",
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_group(result)
        assert resource.displayName == "Contoso Engineering"
        assert len(resource.members) == 1

    def test_list_groups_with_excluded_attributes_members(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """Entra sends ?excludedAttributes=members on group list queries."""
        group = make_db_group(id=10, name="Engineering")
        uid = uuid4()
        mock_dal.list_groups.return_value = ([(group, "ext-g-1")], 1)
        mock_dal.get_group_members.return_value = [(uid, "alice@contoso.com")]

        result = list_groups(
            filter=None,
            excludedAttributes="members",
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        assert parsed["totalResults"] == 1
        resource = parsed["Resources"][0]
        assert "members" not in resource
        assert resource["displayName"] == "Engineering"

    def test_get_group_with_excluded_attributes_members(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """Entra sends ?excludedAttributes=members on single group GET."""
        group = make_db_group(id=10, name="Engineering")
        uid = uuid4()
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = [(uid, "alice@contoso.com")]

        result = get_group(
            group_id="10",
            excludedAttributes="members",
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        assert "members" not in parsed
        assert parsed["displayName"] == "Engineering"

    @patch("ee.onyx.server.scim.api.apply_group_patch")
    def test_patch_group_add_members_with_pascal_case(
        self,
        mock_apply: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """Entra sends ``"Add"`` (PascalCase) for group member additions."""
        group = make_db_group(id=10)
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = []
        mock_dal.validate_member_ids.return_value = []

        uid = str(uuid4())
        patched = ScimGroupResource(
            id="10",
            displayName="Engineering",
            members=[ScimGroupMember(value=uid)],
        )
        mock_apply.return_value = (patched, [uid], [])

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op="Add",  # type: ignore[arg-type]
                    path="members",
                    value=[ScimGroupMember(value=uid)],
                )
            ]
        )

        result = patch_group(
            group_id="10",
            patch_request=patch_req,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parse_scim_group(result)
        mock_dal.upsert_group_members.assert_called_once()

    @patch("ee.onyx.server.scim.api.apply_group_patch")
    def test_patch_group_remove_member_with_pascal_case(
        self,
        mock_apply: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """Entra sends ``"Remove"`` (PascalCase) for group member removals."""
        group = make_db_group(id=10)
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = []

        uid = str(uuid4())
        patched = ScimGroupResource(id="10", displayName="Engineering", members=[])
        mock_apply.return_value = (patched, [], [uid])

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op="Remove",  # type: ignore[arg-type]
                    path=f'members[value eq "{uid}"]',
                )
            ]
        )

        result = patch_group(
            group_id="10",
            patch_request=patch_req,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parse_scim_group(result)
        mock_dal.remove_group_members.assert_called_once()


# ---------------------------------------------------------------------------
# excludedAttributes (RFC 7644 §3.4.2.5)
# ---------------------------------------------------------------------------


class TestExcludedAttributes:
    """Test excludedAttributes query parameter on GET endpoints."""

    def test_list_groups_excludes_members(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=1, name="Team")
        uid = uuid4()
        mock_dal.list_groups.return_value = ([(group, None)], 1)
        mock_dal.get_group_members.return_value = [(uid, "user@example.com")]

        result = list_groups(
            filter=None,
            excludedAttributes="members",
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        resource = parsed["Resources"][0]
        assert "members" not in resource
        assert "displayName" in resource

    def test_get_group_excludes_members(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=1, name="Team")
        uid = uuid4()
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = [(uid, "user@example.com")]

        result = get_group(
            group_id="1",
            excludedAttributes="members",
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        assert "members" not in parsed
        assert "displayName" in parsed

    def test_list_users_excludes_groups(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        user = make_db_user()
        mapping = make_user_mapping(user_id=user.id)
        mock_dal.list_users.return_value = ([(user, mapping)], 1)
        mock_dal.get_users_groups_batch.return_value = {user.id: [(1, "Engineering")]}

        result = list_users(
            filter=None,
            excludedAttributes="groups",
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        resource = parsed["Resources"][0]
        assert "groups" not in resource
        assert "userName" in resource

    def test_get_user_excludes_groups(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        user = make_db_user()
        mock_dal.get_user.return_value = user
        mock_dal.get_user_groups.return_value = [(1, "Engineering")]

        result = get_user(
            user_id=str(user.id),
            excludedAttributes="groups",
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        assert "groups" not in parsed
        assert "userName" in parsed

    def test_multiple_excluded_attributes(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=1, name="Team")
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = []

        result = get_group(
            group_id="1",
            excludedAttributes="members,externalId",
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        assert isinstance(result, ScimJSONResponse)
        parsed = json.loads(result.body)
        assert "members" not in parsed
        assert "externalId" not in parsed
        assert "displayName" in parsed

    def test_no_excluded_attributes_returns_full_response(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=1, name="Team")
        uid = uuid4()
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = [(uid, "user@example.com")]

        result = get_group(
            group_id="1",
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_group(result)
        assert len(resource.members) == 1


# ---------------------------------------------------------------------------
# Entra Connection Probe
# ---------------------------------------------------------------------------


class TestEntraConnectionProbe:
    """Entra sends a probe request during initial SCIM setup."""

    def test_filter_for_nonexistent_user_returns_empty_list(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        entra_provider: ScimProvider,
    ) -> None:
        """Entra probes with: GET /Users?filter=userName eq "non-existent"&count=1"""
        mock_dal.list_users.return_value = ([], 0)

        result = list_users(
            filter='userName eq "non-existent@contoso.com"',
            startIndex=1,
            count=1,
            _token=mock_token,
            provider=entra_provider,
            db_session=mock_db_session,
        )

        parsed = parse_scim_list(result)
        assert parsed.totalResults == 0
        assert parsed.Resources == []


================================================
FILE: backend/tests/unit/onyx/server/scim/test_filtering.py
================================================
import pytest

from ee.onyx.server.scim.filtering import parse_scim_filter
from ee.onyx.server.scim.filtering import ScimFilter
from ee.onyx.server.scim.filtering import ScimFilterOperator


class TestParseScimFilter:
    """Tests for SCIM filter expression parsing."""

    def test_eq_filter_double_quoted(self) -> None:
        result = parse_scim_filter('userName eq "john@example.com"')
        assert result == ScimFilter(
            attribute="userName",
            operator=ScimFilterOperator.EQUAL,
            value="john@example.com",
        )

    def test_eq_filter_single_quoted(self) -> None:
        result = parse_scim_filter("userName eq 'john@example.com'")
        assert result == ScimFilter(
            attribute="userName",
            operator=ScimFilterOperator.EQUAL,
            value="john@example.com",
        )

    def test_co_filter(self) -> None:
        result = parse_scim_filter('displayName co "Engineering"')
        assert result == ScimFilter(
            attribute="displayName",
            operator=ScimFilterOperator.CONTAINS,
            value="Engineering",
        )

    def test_sw_filter(self) -> None:
        result = parse_scim_filter('userName sw "admin"')
        assert result == ScimFilter(
            attribute="userName",
            operator=ScimFilterOperator.STARTS_WITH,
            value="admin",
        )

    def test_case_insensitive_operator(self) -> None:
        result = parse_scim_filter('userName EQ "test@example.com"')
        assert result is not None
        assert result.operator == ScimFilterOperator.EQUAL

    def test_external_id_filter(self) -> None:
        result = parse_scim_filter('externalId eq "abc-123"')
        assert result == ScimFilter(
            attribute="externalId",
            operator=ScimFilterOperator.EQUAL,
            value="abc-123",
        )

    def test_empty_value(self) -> None:
        result = parse_scim_filter('userName eq ""')
        assert result == ScimFilter(
            attribute="userName",
            operator=ScimFilterOperator.EQUAL,
            value="",
        )

    def test_whitespace_trimming(self) -> None:
        result = parse_scim_filter('  userName eq "test"  ')
        assert result is not None
        assert result.value == "test"

    @pytest.mark.parametrize(
        "filter_string",
        [
            None,
            "",
            "   ",
        ],
    )
    def test_empty_input_returns_none(self, filter_string: str | None) -> None:
        assert parse_scim_filter(filter_string) is None

    @pytest.mark.parametrize(
        "filter_string",
        [
            "userName",  # missing operator and value
            "userName eq",  # missing value
            'userName gt "5"',  # unsupported operator
            'userName ne "test"',  # unsupported operator
            "userName eq unquoted",  # unquoted value
            'a eq "x" and b eq "y"',  # compound filter not supported
        ],
    )
    def test_malformed_input_raises_value_error(self, filter_string: str) -> None:
        with pytest.raises(ValueError, match="Unsupported or malformed"):
            parse_scim_filter(filter_string)


================================================
FILE: backend/tests/unit/onyx/server/scim/test_group_endpoints.py
================================================
"""Unit tests for SCIM Group CRUD endpoints."""

from __future__ import annotations

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

from fastapi import Response

from ee.onyx.server.scim.api import create_group
from ee.onyx.server.scim.api import delete_group
from ee.onyx.server.scim.api import get_group
from ee.onyx.server.scim.api import list_groups
from ee.onyx.server.scim.api import patch_group
from ee.onyx.server.scim.api import replace_group
from ee.onyx.server.scim.models import ScimGroupMember
from ee.onyx.server.scim.models import ScimGroupResource
from ee.onyx.server.scim.models import ScimPatchOperation
from ee.onyx.server.scim.models import ScimPatchOperationType
from ee.onyx.server.scim.models import ScimPatchRequest
from ee.onyx.server.scim.patch import ScimPatchError
from ee.onyx.server.scim.providers.base import ScimProvider
from tests.unit.onyx.server.scim.conftest import assert_scim_error
from tests.unit.onyx.server.scim.conftest import make_db_group
from tests.unit.onyx.server.scim.conftest import make_scim_group
from tests.unit.onyx.server.scim.conftest import parse_scim_group
from tests.unit.onyx.server.scim.conftest import parse_scim_list


class TestListGroups:
    """Tests for GET /scim/v2/Groups."""

    def test_empty_result(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.list_groups.return_value = ([], 0)

        result = list_groups(
            filter=None,
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parsed = parse_scim_list(result)
        assert parsed.totalResults == 0
        assert parsed.Resources == []

    def test_unsupported_filter_returns_400(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.list_groups.side_effect = ValueError(
            "Unsupported filter attribute: userName"
        )

        result = list_groups(
            filter='userName eq "x"',
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 400)

    def test_returns_groups_with_members(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5, name="Engineering")
        uid = uuid4()
        mock_dal.list_groups.return_value = ([(group, "ext-g-1")], 1)
        mock_dal.get_group_members.return_value = [(uid, "alice@example.com")]

        result = list_groups(
            filter=None,
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parsed = parse_scim_list(result)
        assert parsed.totalResults == 1
        resource = parsed.Resources[0]
        assert isinstance(resource, ScimGroupResource)
        assert resource.displayName == "Engineering"
        assert resource.externalId == "ext-g-1"
        assert len(resource.members) == 1
        assert resource.members[0].display == "alice@example.com"


class TestGetGroup:
    """Tests for GET /scim/v2/Groups/{group_id}."""

    def test_returns_scim_resource(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5, name="Engineering")
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = []

        result = get_group(
            group_id="5",
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_group(result)
        assert resource.displayName == "Engineering"
        assert resource.id == "5"

    def test_non_integer_id_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,  # noqa: ARG002
        provider: ScimProvider,
    ) -> None:
        result = get_group(
            group_id="not-a-number",
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)

    def test_not_found_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_group.return_value = None

        result = get_group(
            group_id="999",
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)


class TestCreateGroup:
    """Tests for POST /scim/v2/Groups."""

    @patch("ee.onyx.server.scim.api._validate_and_parse_members")
    def test_success(
        self,
        mock_validate: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_group_by_name.return_value = None
        mock_validate.return_value = ([], None)
        mock_dal.get_group_members.return_value = []

        resource = make_scim_group(displayName="New Group")

        result = create_group(
            group_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_group(result, status=201)
        assert resource.displayName == "New Group"
        mock_dal.add_group.assert_called_once()
        mock_dal.commit.assert_called_once()

    def test_duplicate_name_returns_409(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_group_by_name.return_value = make_db_group()
        resource = make_scim_group()

        result = create_group(
            group_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 409)

    @patch("ee.onyx.server.scim.api._validate_and_parse_members")
    def test_invalid_member_returns_400(
        self,
        mock_validate: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_group_by_name.return_value = None
        mock_validate.return_value = ([], "Invalid member ID: bad-uuid")

        resource = make_scim_group(members=[ScimGroupMember(value="bad-uuid")])

        result = create_group(
            group_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 400)

    @patch("ee.onyx.server.scim.api._validate_and_parse_members")
    def test_nonexistent_member_returns_400(
        self,
        mock_validate: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_group_by_name.return_value = None
        uid = uuid4()
        mock_validate.return_value = ([], f"Member(s) not found: {uid}")

        resource = make_scim_group(members=[ScimGroupMember(value=str(uid))])

        result = create_group(
            group_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 400)

    @patch("ee.onyx.server.scim.api._validate_and_parse_members")
    def test_creates_external_id_mapping(
        self,
        mock_validate: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_group_by_name.return_value = None
        mock_validate.return_value = ([], None)
        mock_dal.get_group_members.return_value = []

        resource = make_scim_group(externalId="ext-g-123")

        result = create_group(
            group_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parse_scim_group(result, status=201)
        mock_dal.create_group_mapping.assert_called_once()


class TestReplaceGroup:
    """Tests for PUT /scim/v2/Groups/{group_id}."""

    @patch("ee.onyx.server.scim.api._validate_and_parse_members")
    def test_success(
        self,
        mock_validate: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5, name="Old Name")
        mock_dal.get_group.return_value = group
        mock_validate.return_value = ([], None)
        mock_dal.get_group_members.return_value = []

        resource = make_scim_group(displayName="New Name")

        result = replace_group(
            group_id="5",
            group_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parse_scim_group(result)
        mock_dal.update_group.assert_called_once_with(group, name="New Name")
        mock_dal.replace_group_members.assert_called_once()
        mock_dal.commit.assert_called_once()

    def test_not_found_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_group.return_value = None

        result = replace_group(
            group_id="999",
            group_resource=make_scim_group(),
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)

    @patch("ee.onyx.server.scim.api._validate_and_parse_members")
    def test_invalid_member_returns_400(
        self,
        mock_validate: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5)
        mock_dal.get_group.return_value = group
        mock_validate.return_value = ([], "Invalid member ID: bad")

        resource = make_scim_group(members=[ScimGroupMember(value="bad")])

        result = replace_group(
            group_id="5",
            group_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 400)

    @patch("ee.onyx.server.scim.api._validate_and_parse_members")
    def test_syncs_external_id(
        self,
        mock_validate: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5)
        mock_dal.get_group.return_value = group
        mock_validate.return_value = ([], None)
        mock_dal.get_group_members.return_value = []

        resource = make_scim_group(externalId="new-ext")

        replace_group(
            group_id="5",
            group_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        mock_dal.sync_group_external_id.assert_called_once_with(5, "new-ext")


class TestPatchGroup:
    """Tests for PATCH /scim/v2/Groups/{group_id}."""

    @patch("ee.onyx.server.scim.api.apply_group_patch")
    def test_rename(
        self,
        mock_apply: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5, name="Old Name")
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = []

        patched = ScimGroupResource(id="5", displayName="New Name", members=[])
        mock_apply.return_value = (patched, [], [])

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REPLACE,
                    path="displayName",
                    value="New Name",
                )
            ]
        )

        result = patch_group(
            group_id="5",
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parse_scim_group(result)
        mock_dal.update_group.assert_called_once_with(group, name="New Name")

    def test_not_found_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_group.return_value = None

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REPLACE,
                    path="displayName",
                    value="X",
                )
            ]
        )

        result = patch_group(
            group_id="999",
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)

    @patch("ee.onyx.server.scim.api.apply_group_patch")
    def test_patch_error_returns_error_response(
        self,
        mock_apply: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5)
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = []

        mock_apply.side_effect = ScimPatchError("Unsupported path", 400)

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REPLACE,
                    path="badPath",
                    value="x",
                )
            ]
        )

        result = patch_group(
            group_id="5",
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 400)

    @patch("ee.onyx.server.scim.api.apply_group_patch")
    def test_add_members(
        self,
        mock_apply: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5)
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = []
        mock_dal.validate_member_ids.return_value = []

        uid = str(uuid4())
        patched = ScimGroupResource(
            id="5",
            displayName="Engineering",
            members=[ScimGroupMember(value=uid)],
        )
        mock_apply.return_value = (patched, [uid], [])

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.ADD,
                    path="members",
                    value=[ScimGroupMember(value=uid)],
                )
            ]
        )

        result = patch_group(
            group_id="5",
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parse_scim_group(result)
        mock_dal.validate_member_ids.assert_called_once()
        mock_dal.upsert_group_members.assert_called_once()

    @patch("ee.onyx.server.scim.api.apply_group_patch")
    def test_add_nonexistent_member_returns_400(
        self,
        mock_apply: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5)
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = []

        uid = uuid4()
        patched = ScimGroupResource(
            id="5",
            displayName="Engineering",
            members=[ScimGroupMember(value=str(uid))],
        )
        mock_apply.return_value = (patched, [str(uid)], [])
        mock_dal.validate_member_ids.return_value = [uid]

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.ADD,
                    path="members",
                    value=[ScimGroupMember(value=str(uid))],
                )
            ]
        )

        result = patch_group(
            group_id="5",
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 400)

    @patch("ee.onyx.server.scim.api.apply_group_patch")
    def test_remove_members(
        self,
        mock_apply: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        group = make_db_group(id=5)
        mock_dal.get_group.return_value = group
        mock_dal.get_group_members.return_value = []

        uid = str(uuid4())
        patched = ScimGroupResource(id="5", displayName="Engineering", members=[])
        mock_apply.return_value = (patched, [], [uid])

        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REMOVE,
                    path=f'members[value eq "{uid}"]',
                )
            ]
        )

        result = patch_group(
            group_id="5",
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parse_scim_group(result)
        mock_dal.remove_group_members.assert_called_once()


class TestDeleteGroup:
    """Tests for DELETE /scim/v2/Groups/{group_id}."""

    def test_success(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
    ) -> None:
        group = make_db_group(id=5)
        mock_dal.get_group.return_value = group
        mapping = MagicMock()
        mapping.id = 1
        mock_dal.get_group_mapping_by_group_id.return_value = mapping

        result = delete_group(
            group_id="5",
            _token=mock_token,
            db_session=mock_db_session,
        )

        assert isinstance(result, Response)
        assert result.status_code == 204
        mock_dal.delete_group_mapping.assert_called_once_with(1)
        mock_dal.delete_group_with_members.assert_called_once_with(group)
        mock_dal.commit.assert_called_once()

    def test_not_found_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
    ) -> None:
        mock_dal.get_group.return_value = None

        result = delete_group(
            group_id="999",
            _token=mock_token,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)

    def test_non_integer_id_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,  # noqa: ARG002
    ) -> None:
        result = delete_group(
            group_id="abc",
            _token=mock_token,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)


================================================
FILE: backend/tests/unit/onyx/server/scim/test_patch.py
================================================
import pytest

from ee.onyx.server.scim.models import ScimEmail
from ee.onyx.server.scim.models import ScimGroupMember
from ee.onyx.server.scim.models import ScimGroupResource
from ee.onyx.server.scim.models import ScimMeta
from ee.onyx.server.scim.models import ScimName
from ee.onyx.server.scim.models import ScimPatchOperation
from ee.onyx.server.scim.models import ScimPatchOperationType
from ee.onyx.server.scim.models import ScimPatchResourceValue
from ee.onyx.server.scim.models import ScimPatchValue
from ee.onyx.server.scim.models import ScimUserResource
from ee.onyx.server.scim.patch import apply_group_patch
from ee.onyx.server.scim.patch import apply_user_patch
from ee.onyx.server.scim.patch import ScimPatchError
from ee.onyx.server.scim.providers.entra import EntraProvider
from ee.onyx.server.scim.providers.okta import OktaProvider

_OKTA_IGNORED = OktaProvider().ignored_patch_paths
_ENTRA_IGNORED = EntraProvider().ignored_patch_paths


def _make_user(**kwargs: object) -> ScimUserResource:
    defaults: dict = {
        "userName": "test@example.com",
        "active": True,
        "name": ScimName(givenName="Test", familyName="User"),
    }
    defaults.update(kwargs)
    return ScimUserResource(**defaults)


def _make_group(**kwargs: object) -> ScimGroupResource:
    defaults: dict = {"displayName": "Engineering"}
    defaults.update(kwargs)
    return ScimGroupResource(**defaults)


def _replace_op(
    path: str | None = None,
    value: ScimPatchValue = None,
) -> ScimPatchOperation:
    return ScimPatchOperation(op=ScimPatchOperationType.REPLACE, path=path, value=value)


def _add_op(
    path: str | None = None,
    value: ScimPatchValue = None,
) -> ScimPatchOperation:
    return ScimPatchOperation(op=ScimPatchOperationType.ADD, path=path, value=value)


def _remove_op(path: str) -> ScimPatchOperation:
    return ScimPatchOperation(op=ScimPatchOperationType.REMOVE, path=path)


class TestApplyUserPatch:
    """Tests for SCIM user PATCH operations."""

    def test_deactivate_user(self) -> None:
        user = _make_user()
        result, _ = apply_user_patch([_replace_op("active", False)], user)
        assert result.active is False
        assert result.userName == "test@example.com"

    def test_activate_user(self) -> None:
        user = _make_user(active=False)
        result, _ = apply_user_patch([_replace_op("active", True)], user)
        assert result.active is True

    def test_replace_given_name(self) -> None:
        user = _make_user()
        result, _ = apply_user_patch([_replace_op("name.givenName", "NewFirst")], user)
        assert result.name is not None
        assert result.name.givenName == "NewFirst"
        assert result.name.familyName == "User"

    def test_replace_family_name(self) -> None:
        user = _make_user()
        result, _ = apply_user_patch([_replace_op("name.familyName", "NewLast")], user)
        assert result.name is not None
        assert result.name.familyName == "NewLast"

    def test_replace_username(self) -> None:
        user = _make_user()
        result, _ = apply_user_patch([_replace_op("userName", "new@example.com")], user)
        assert result.userName == "new@example.com"

    def test_replace_without_path_uses_dict(self) -> None:
        user = _make_user()
        result, _ = apply_user_patch(
            [
                _replace_op(
                    None,
                    ScimPatchResourceValue(active=False, userName="new@example.com"),
                )
            ],
            user,
        )
        assert result.active is False
        assert result.userName == "new@example.com"

    def test_multiple_operations(self) -> None:
        user = _make_user()
        result, _ = apply_user_patch(
            [
                _replace_op("active", False),
                _replace_op("name.givenName", "Updated"),
            ],
            user,
        )
        assert result.active is False
        assert result.name is not None
        assert result.name.givenName == "Updated"

    def test_case_insensitive_path(self) -> None:
        user = _make_user()
        result, _ = apply_user_patch([_replace_op("Active", False)], user)
        assert result.active is False

    def test_original_not_mutated(self) -> None:
        user = _make_user()
        apply_user_patch([_replace_op("active", False)], user)
        assert user.active is True

    def test_unsupported_path_raises(self) -> None:
        user = _make_user()
        with pytest.raises(ScimPatchError, match="Unsupported path"):
            apply_user_patch([_replace_op("unknownField", "value")], user)

    def test_remove_op_clears_field(self) -> None:
        """Remove op should clear the target field (not raise)."""
        user = _make_user(externalId="ext-123")
        result, _ = apply_user_patch([_remove_op("externalId")], user)
        assert result.externalId is None

    def test_remove_unsupported_path_raises(self) -> None:
        """Remove op on unsupported path (e.g. 'active') should raise."""
        user = _make_user()
        with pytest.raises(ScimPatchError, match="Unsupported remove path"):
            apply_user_patch([_remove_op("active")], user)

    def test_replace_without_path_ignores_id(self) -> None:
        """Okta sends 'id' alongside actual changes — it should be silently ignored."""
        user = _make_user()
        result, _ = apply_user_patch(
            [_replace_op(None, ScimPatchResourceValue(active=False, id="some-uuid"))],
            user,
            ignored_paths=_OKTA_IGNORED,
        )
        assert result.active is False

    def test_replace_without_path_ignores_schemas(self) -> None:
        """The 'schemas' key in a value dict should be silently ignored."""
        user = _make_user()
        result, _ = apply_user_patch(
            [
                _replace_op(
                    None,
                    ScimPatchResourceValue(
                        active=False,
                        schemas=["urn:ietf:params:scim:schemas:core:2.0:User"],
                    ),
                )
            ],
            user,
            ignored_paths=_OKTA_IGNORED,
        )
        assert result.active is False

    def test_okta_deactivation_payload(self) -> None:
        """Exact Okta deactivation payload: path-less replace with id + active."""
        user = _make_user()
        result, _ = apply_user_patch(
            [
                _replace_op(
                    None,
                    ScimPatchResourceValue(id="abc-123", active=False),
                )
            ],
            user,
            ignored_paths=_OKTA_IGNORED,
        )
        assert result.active is False
        assert result.userName == "test@example.com"

    def test_replace_displayname(self) -> None:
        user = _make_user()
        result, _ = apply_user_patch(
            [_replace_op("displayName", "New Display Name")], user
        )
        assert result.displayName == "New Display Name"
        assert result.name is not None
        assert result.name.formatted == "New Display Name"

    def test_replace_without_path_complex_value_dict(self) -> None:
        """Okta sends id/schemas/meta alongside actual changes — complex types
        (lists, nested dicts) must not cause Pydantic validation errors."""
        user = _make_user()
        result, _ = apply_user_patch(
            [
                _replace_op(
                    None,
                    ScimPatchResourceValue(
                        active=False,
                        id="some-uuid",
                        schemas=["urn:ietf:params:scim:schemas:core:2.0:User"],
                        meta=ScimMeta(resourceType="User"),
                    ),
                )
            ],
            user,
            ignored_paths=_OKTA_IGNORED,
        )
        assert result.active is False
        assert result.userName == "test@example.com"

    def test_add_operation_works_like_replace(self) -> None:
        user = _make_user()
        result, _ = apply_user_patch([_add_op("externalId", "ext-456")], user)
        assert result.externalId == "ext-456"

    def test_entra_capitalized_replace_op(self) -> None:
        """Entra ID sends ``"Replace"`` instead of ``"replace"``."""
        user = _make_user()
        op = ScimPatchOperation(op="Replace", path="active", value=False)  # type: ignore[arg-type]
        result, _ = apply_user_patch([op], user)
        assert result.active is False

    def test_entra_capitalized_add_op(self) -> None:
        """Entra ID sends ``"Add"`` instead of ``"add"``."""
        user = _make_user()
        op = ScimPatchOperation(op="Add", path="externalId", value="ext-999")  # type: ignore[arg-type]
        result, _ = apply_user_patch([op], user)
        assert result.externalId == "ext-999"

    def test_entra_enterprise_extension_handled(self) -> None:
        """Entra sends the enterprise extension URN as a key in path-less
        PATCH value dicts — enterprise data should be captured in ent_data."""
        user = _make_user()
        value = ScimPatchResourceValue(active=False)
        # Simulate Entra including the enterprise extension URN as extra data
        assert value.__pydantic_extra__ is not None
        value.__pydantic_extra__[
            "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User"
        ] = {"department": "Engineering"}
        result, ent_data = apply_user_patch(
            [_replace_op(None, value)],
            user,
            ignored_paths=_ENTRA_IGNORED,
        )
        assert result.active is False
        assert result.userName == "test@example.com"
        assert ent_data["department"] == "Engineering"

    def test_okta_handles_enterprise_extension_urn(self) -> None:
        """Enterprise extension URN paths are handled universally, even
        for Okta — the data is captured in the enterprise data dict."""
        user = _make_user()
        value = ScimPatchResourceValue(active=False)
        assert value.__pydantic_extra__ is not None
        value.__pydantic_extra__[
            "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User"
        ] = {"department": "Engineering"}
        result, ent_data = apply_user_patch(
            [_replace_op(None, value)],
            user,
            ignored_paths=_OKTA_IGNORED,
        )
        assert result.active is False
        assert ent_data["department"] == "Engineering"

    def test_emails_primary_eq_true_value(self) -> None:
        """emails[primary eq true].value should update the primary email entry."""
        user = _make_user(
            emails=[ScimEmail(value="old@example.com", type="work", primary=True)]
        )
        result, _ = apply_user_patch(
            [_replace_op("emails[primary eq true].value", "new@example.com")], user
        )
        # userName should remain unchanged — emails and userName are separate
        assert result.userName == "test@example.com"
        assert len(result.emails) == 1
        assert result.emails[0].value == "new@example.com"
        assert result.emails[0].primary is True

    def test_enterprise_urn_department_path(self) -> None:
        """Dotted enterprise URN path should set department in ent_data."""
        user = _make_user()
        _, ent_data = apply_user_patch(
            [
                _replace_op(
                    "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User:department",
                    "Marketing",
                )
            ],
            user,
        )
        assert ent_data["department"] == "Marketing"

    def test_enterprise_urn_manager_path(self) -> None:
        """Dotted enterprise URN path for manager should set manager."""
        user = _make_user()
        _, ent_data = apply_user_patch(
            [
                _replace_op(
                    "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User:manager",
                    ScimPatchResourceValue.model_validate({"value": "boss-id"}),
                )
            ],
            user,
        )
        assert ent_data["manager"] == "boss-id"


class TestApplyGroupPatch:
    """Tests for SCIM group PATCH operations."""

    def test_replace_display_name(self) -> None:
        group = _make_group()
        result, added, removed = apply_group_patch(
            [_replace_op("displayName", "New Name")], group
        )
        assert result.displayName == "New Name"
        assert added == []
        assert removed == []

    def test_add_members(self) -> None:
        group = _make_group()
        result, added, removed = apply_group_patch(
            [
                _add_op(
                    "members",
                    [ScimGroupMember(value="user-1"), ScimGroupMember(value="user-2")],
                )
            ],
            group,
        )
        assert len(result.members) == 2
        assert added == ["user-1", "user-2"]
        assert removed == []

    def test_add_members_without_path(self) -> None:
        group = _make_group()
        result, added, _ = apply_group_patch(
            [_add_op(None, [ScimGroupMember(value="user-1")])],
            group,
        )
        assert len(result.members) == 1
        assert added == ["user-1"]

    def test_add_duplicate_member_skipped(self) -> None:
        group = _make_group(members=[ScimGroupMember(value="user-1")])
        result, added, _ = apply_group_patch(
            [
                _add_op(
                    "members",
                    [ScimGroupMember(value="user-1"), ScimGroupMember(value="user-2")],
                )
            ],
            group,
        )
        assert len(result.members) == 2
        assert added == ["user-2"]

    def test_remove_member(self) -> None:
        group = _make_group(
            members=[
                ScimGroupMember(value="user-1"),
                ScimGroupMember(value="user-2"),
            ]
        )
        result, added, removed = apply_group_patch(
            [_remove_op('members[value eq "user-1"]')],
            group,
        )
        assert len(result.members) == 1
        assert result.members[0].value == "user-2"
        assert removed == ["user-1"]
        assert added == []

    def test_remove_nonexistent_member(self) -> None:
        group = _make_group(members=[ScimGroupMember(value="user-1")])
        result, _, removed = apply_group_patch(
            [_remove_op('members[value eq "user-999"]')],
            group,
        )
        assert len(result.members) == 1
        assert removed == []

    def test_mixed_operations(self) -> None:
        group = _make_group(members=[ScimGroupMember(value="user-1")])
        result, added, removed = apply_group_patch(
            [
                _replace_op("displayName", "Renamed"),
                _add_op("members", [ScimGroupMember(value="user-2")]),
                _remove_op('members[value eq "user-1"]'),
            ],
            group,
        )
        assert result.displayName == "Renamed"
        assert added == ["user-2"]
        assert removed == ["user-1"]
        assert len(result.members) == 1

    def test_remove_without_path_raises(self) -> None:
        group = _make_group()
        with pytest.raises(ScimPatchError, match="requires a path"):
            apply_group_patch(
                [ScimPatchOperation(op=ScimPatchOperationType.REMOVE, path=None)],
                group,
            )

    def test_remove_invalid_path_raises(self) -> None:
        group = _make_group()
        with pytest.raises(ScimPatchError, match="Unsupported remove path"):
            apply_group_patch([_remove_op("displayName")], group)

    def test_replace_members_with_path(self) -> None:
        group = _make_group(
            members=[
                ScimGroupMember(value="user-1"),
                ScimGroupMember(value="user-2"),
            ]
        )
        result, added, removed = apply_group_patch(
            [
                _replace_op(
                    "members",
                    [ScimGroupMember(value="user-2"), ScimGroupMember(value="user-3")],
                )
            ],
            group,
        )
        assert len(result.members) == 2
        member_ids = {m.value for m in result.members}
        assert member_ids == {"user-2", "user-3"}
        assert "user-3" in added
        assert "user-1" in removed
        assert "user-2" not in added
        assert "user-2" not in removed

    def test_replace_members_empty_list_clears(self) -> None:
        group = _make_group(
            members=[
                ScimGroupMember(value="user-1"),
                ScimGroupMember(value="user-2"),
            ]
        )
        result, added, removed = apply_group_patch(
            [_replace_op("members", [])],
            group,
        )
        assert len(result.members) == 0
        assert added == []
        assert set(removed) == {"user-1", "user-2"}

    def test_unsupported_replace_path_raises(self) -> None:
        group = _make_group()
        with pytest.raises(ScimPatchError, match="Unsupported path"):
            apply_group_patch([_replace_op("unknownField", "val")], group)

    def test_original_not_mutated(self) -> None:
        group = _make_group()
        apply_group_patch([_replace_op("displayName", "Changed")], group)
        assert group.displayName == "Engineering"

    def test_replace_without_path_ignores_id(self) -> None:
        """Group replace with 'id' in value dict should be silently ignored."""
        group = _make_group()
        result, _, _ = apply_group_patch(
            [
                _replace_op(
                    None, ScimPatchResourceValue(displayName="Updated", id="some-id")
                )
            ],
            group,
            ignored_paths=_OKTA_IGNORED,
        )
        assert result.displayName == "Updated"

    def test_replace_without_path_ignores_schemas(self) -> None:
        group = _make_group()
        result, _, _ = apply_group_patch(
            [
                _replace_op(
                    None,
                    ScimPatchResourceValue(
                        displayName="Updated",
                        schemas=["urn:ietf:params:scim:schemas:core:2.0:Group"],
                    ),
                )
            ],
            group,
            ignored_paths=_OKTA_IGNORED,
        )
        assert result.displayName == "Updated"

    def test_replace_without_path_complex_value_dict(self) -> None:
        """Group PATCH with complex types in value dict (lists, nested dicts)
        must not cause Pydantic validation errors."""
        group = _make_group()
        result, _, _ = apply_group_patch(
            [
                _replace_op(
                    None,
                    ScimPatchResourceValue(
                        displayName="Updated",
                        id="123",
                        schemas=["urn:ietf:params:scim:schemas:core:2.0:Group"],
                        meta=ScimMeta(resourceType="Group"),
                    ),
                )
            ],
            group,
            ignored_paths=_OKTA_IGNORED,
        )
        assert result.displayName == "Updated"


================================================
FILE: backend/tests/unit/onyx/server/scim/test_providers.py
================================================
from unittest.mock import MagicMock
from uuid import UUID
from uuid import uuid4

from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
from ee.onyx.server.scim.models import SCIM_USER_SCHEMA
from ee.onyx.server.scim.models import ScimEmail
from ee.onyx.server.scim.models import ScimGroupMember
from ee.onyx.server.scim.models import ScimGroupResource
from ee.onyx.server.scim.models import ScimMeta
from ee.onyx.server.scim.models import ScimName
from ee.onyx.server.scim.models import ScimUserGroupRef
from ee.onyx.server.scim.models import ScimUserResource
from ee.onyx.server.scim.providers.base import COMMON_IGNORED_PATCH_PATHS
from ee.onyx.server.scim.providers.base import get_default_provider
from ee.onyx.server.scim.providers.entra import _ENTRA_IGNORED_PATCH_PATHS
from ee.onyx.server.scim.providers.entra import EntraProvider
from ee.onyx.server.scim.providers.okta import OktaProvider


def _make_mock_user(
    user_id: UUID | None = None,
    email: str = "test@example.com",
    personal_name: str | None = "Test User",
    is_active: bool = True,
) -> MagicMock:
    user = MagicMock()
    user.id = user_id or uuid4()
    user.email = email
    user.personal_name = personal_name
    user.is_active = is_active
    return user


def _make_mock_group(group_id: int = 42, name: str = "Engineering") -> MagicMock:
    group = MagicMock()
    group.id = group_id
    group.name = name
    return group


class TestOktaProvider:
    def test_name(self) -> None:
        assert OktaProvider().name == "okta"

    def test_ignored_patch_paths(self) -> None:
        assert OktaProvider().ignored_patch_paths == COMMON_IGNORED_PATCH_PATHS

    def test_build_user_resource_basic(self) -> None:
        provider = OktaProvider()
        user = _make_mock_user()
        result = provider.build_user_resource(user, "ext-123")

        assert result == ScimUserResource(
            id=str(user.id),
            externalId="ext-123",
            userName="test@example.com",
            name=ScimName(givenName="Test", familyName="User", formatted="Test User"),
            displayName="Test User",
            emails=[ScimEmail(value="test@example.com", type="work", primary=True)],
            active=True,
            groups=[],
            meta=ScimMeta(resourceType="User"),
        )

    def test_build_user_resource_has_core_schema_only(self) -> None:
        provider = OktaProvider()
        user = _make_mock_user()
        result = provider.build_user_resource(user, "ext-123")
        assert result.schemas == [SCIM_USER_SCHEMA]

    def test_build_user_resource_with_groups(self) -> None:
        provider = OktaProvider()
        user = _make_mock_user()
        groups = [(1, "Engineering"), (2, "Design")]
        result = provider.build_user_resource(user, "ext-123", groups=groups)

        assert result.groups == [
            ScimUserGroupRef(value="1", display="Engineering"),
            ScimUserGroupRef(value="2", display="Design"),
        ]

    def test_build_user_resource_empty_groups(self) -> None:
        provider = OktaProvider()
        user = _make_mock_user()
        result = provider.build_user_resource(user, "ext-123", groups=[])

        assert result.groups == []

    def test_build_user_resource_no_groups(self) -> None:
        provider = OktaProvider()
        user = _make_mock_user()
        result = provider.build_user_resource(user, "ext-123")

        assert result.groups == []

    def test_build_user_resource_name_parsing(self) -> None:
        provider = OktaProvider()
        user = _make_mock_user(personal_name="Jane Doe")
        result = provider.build_user_resource(user, None)

        assert result.name == ScimName(
            givenName="Jane", familyName="Doe", formatted="Jane Doe"
        )

    def test_build_user_resource_single_name(self) -> None:
        provider = OktaProvider()
        user = _make_mock_user(personal_name="Madonna")
        result = provider.build_user_resource(user, None)

        assert result.name == ScimName(
            givenName="Madonna", familyName="", formatted="Madonna"
        )

    def test_build_user_resource_no_name(self) -> None:
        provider = OktaProvider()
        user = _make_mock_user(personal_name=None)
        result = provider.build_user_resource(user, None)

        # Falls back to deriving name from email local part
        assert result.name == ScimName(
            givenName="test", familyName="", formatted="test"
        )
        assert result.displayName is None

    def test_build_user_resource_scim_username_preserves_case(self) -> None:
        """When scim_username is set, userName and emails use original case."""
        provider = OktaProvider()
        user = _make_mock_user(email="alice@example.com")
        result = provider.build_user_resource(
            user, "ext-1", scim_username="Alice@Example.com"
        )

        assert result.userName == "Alice@Example.com"
        assert result.emails[0].value == "Alice@Example.com"

    def test_build_user_resource_scim_username_none_falls_back(self) -> None:
        """When scim_username is None, userName falls back to user.email."""
        provider = OktaProvider()
        user = _make_mock_user(email="alice@example.com")
        result = provider.build_user_resource(user, "ext-1", scim_username=None)

        assert result.userName == "alice@example.com"
        assert result.emails[0].value == "alice@example.com"

    def test_build_group_resource(self) -> None:
        provider = OktaProvider()
        group = _make_mock_group()
        uid1, uid2 = uuid4(), uuid4()
        members: list[tuple[UUID, str | None]] = [
            (uid1, "alice@example.com"),
            (uid2, "bob@example.com"),
        ]

        result = provider.build_group_resource(group, members, "ext-g-1")

        assert result == ScimGroupResource(
            id="42",
            externalId="ext-g-1",
            displayName="Engineering",
            members=[
                ScimGroupMember(value=str(uid1), display="alice@example.com"),
                ScimGroupMember(value=str(uid2), display="bob@example.com"),
            ],
            meta=ScimMeta(resourceType="Group"),
        )

    def test_build_group_resource_empty_members(self) -> None:
        provider = OktaProvider()
        group = _make_mock_group()
        result = provider.build_group_resource(group, [])

        assert result.members == []


class TestEntraProvider:
    def test_name(self) -> None:
        assert EntraProvider().name == "entra"

    def test_ignored_patch_paths(self) -> None:
        paths = EntraProvider().ignored_patch_paths
        assert paths == _ENTRA_IGNORED_PATCH_PATHS
        # Enterprise extension URN is now handled (not ignored)
        assert paths >= COMMON_IGNORED_PATCH_PATHS

    def test_build_user_resource_includes_enterprise_schema(self) -> None:
        provider = EntraProvider()
        user = _make_mock_user()
        result = provider.build_user_resource(user, "ext-entra-1")

        assert result.schemas == [SCIM_USER_SCHEMA, SCIM_ENTERPRISE_USER_SCHEMA]

    def test_build_user_resource_basic(self) -> None:
        provider = EntraProvider()
        user = _make_mock_user()
        result = provider.build_user_resource(user, "ext-entra-1")

        assert result == ScimUserResource(
            schemas=[SCIM_USER_SCHEMA, SCIM_ENTERPRISE_USER_SCHEMA],
            id=str(user.id),
            externalId="ext-entra-1",
            userName="test@example.com",
            name=ScimName(givenName="Test", familyName="User", formatted="Test User"),
            displayName="Test User",
            emails=[ScimEmail(value="test@example.com", type="work", primary=True)],
            active=True,
            groups=[],
            meta=ScimMeta(resourceType="User"),
        )


class TestGetDefaultProvider:
    def test_returns_okta(self) -> None:
        provider = get_default_provider()
        assert isinstance(provider, OktaProvider)


================================================
FILE: backend/tests/unit/onyx/server/scim/test_user_endpoints.py
================================================
"""Unit tests for SCIM User CRUD endpoints."""

from __future__ import annotations

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

from fastapi import Response
from sqlalchemy.exc import IntegrityError

from ee.onyx.server.scim.api import _scim_name_to_str
from ee.onyx.server.scim.api import create_user
from ee.onyx.server.scim.api import delete_user
from ee.onyx.server.scim.api import get_user
from ee.onyx.server.scim.api import list_users
from ee.onyx.server.scim.api import patch_user
from ee.onyx.server.scim.api import replace_user
from ee.onyx.server.scim.models import ScimMappingFields
from ee.onyx.server.scim.models import ScimName
from ee.onyx.server.scim.models import ScimPatchOperation
from ee.onyx.server.scim.models import ScimPatchOperationType
from ee.onyx.server.scim.models import ScimPatchRequest
from ee.onyx.server.scim.models import ScimUserResource
from ee.onyx.server.scim.patch import ScimPatchError
from ee.onyx.server.scim.providers.base import ScimProvider
from tests.unit.onyx.server.scim.conftest import assert_scim_error
from tests.unit.onyx.server.scim.conftest import make_db_user
from tests.unit.onyx.server.scim.conftest import make_scim_user
from tests.unit.onyx.server.scim.conftest import make_user_mapping
from tests.unit.onyx.server.scim.conftest import parse_scim_list
from tests.unit.onyx.server.scim.conftest import parse_scim_user


class TestListUsers:
    """Tests for GET /scim/v2/Users."""

    def test_empty_result(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.list_users.return_value = ([], 0)

        result = list_users(
            filter=None,
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parsed = parse_scim_list(result)
        assert parsed.totalResults == 0
        assert parsed.Resources == []

    def test_returns_users_with_scim_shape(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        user = make_db_user(email="alice@example.com", personal_name="Alice Smith")
        mapping = make_user_mapping(
            external_id="ext-abc", user_id=user.id, scim_username="Alice@example.com"
        )
        mock_dal.list_users.return_value = ([(user, mapping)], 1)

        result = list_users(
            filter=None,
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parsed = parse_scim_list(result)
        assert parsed.totalResults == 1
        assert len(parsed.Resources) == 1
        resource = parsed.Resources[0]
        assert isinstance(resource, ScimUserResource)
        assert resource.userName == "Alice@example.com"
        assert resource.externalId == "ext-abc"

    def test_unsupported_filter_attribute_returns_400(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.list_users.side_effect = ValueError(
            "Unsupported filter attribute: emails"
        )

        result = list_users(
            filter='emails eq "x@y.com"',
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 400)

    def test_invalid_filter_syntax_returns_400(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,  # noqa: ARG002
        provider: ScimProvider,
    ) -> None:
        result = list_users(
            filter="not a valid filter",
            startIndex=1,
            count=100,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 400)


class TestGetUser:
    """Tests for GET /scim/v2/Users/{user_id}."""

    def test_returns_scim_resource(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        user = make_db_user(email="alice@example.com")
        mock_dal.get_user.return_value = user

        result = get_user(
            user_id=str(user.id),
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result)
        assert resource.userName == "alice@example.com"
        assert resource.id == str(user.id)

    def test_invalid_uuid_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,  # noqa: ARG002
        provider: ScimProvider,
    ) -> None:
        result = get_user(
            user_id="not-a-uuid",
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)

    def test_user_not_found_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_user.return_value = None

        result = get_user(
            user_id=str(uuid4()),
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)


class TestCreateUser:
    """Tests for POST /scim/v2/Users."""

    @patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
    def test_success(
        self,
        mock_seats: MagicMock,  # noqa: ARG002
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_user_by_email.return_value = None
        resource = make_scim_user(userName="new@example.com")

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result, status=201)
        assert resource.userName == "new@example.com"
        mock_dal.add_user.assert_called_once()
        mock_dal.commit.assert_called_once()

    @patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
    def test_missing_external_id_still_creates_mapping(
        self,
        mock_seats: MagicMock,  # noqa: ARG002
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        """Mapping is always created to mark user as SCIM-managed."""
        mock_dal.get_user_by_email.return_value = None
        resource = make_scim_user(externalId=None)

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parsed = parse_scim_user(result, status=201)
        assert parsed.userName is not None
        mock_dal.add_user.assert_called_once()
        mock_dal.create_user_mapping.assert_called_once()
        mock_dal.commit.assert_called_once()

    @patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
    def test_duplicate_scim_managed_email_returns_409(
        self,
        mock_seats: MagicMock,  # noqa: ARG002
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        """409 only when the existing user already has a SCIM mapping."""
        existing = make_db_user()
        mock_dal.get_user_by_email.return_value = existing
        mock_dal.get_user_mapping_by_user_id.return_value = make_user_mapping(
            user_id=existing.id
        )
        resource = make_scim_user()

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 409)

    @patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
    def test_existing_user_without_mapping_gets_linked(
        self,
        mock_seats: MagicMock,  # noqa: ARG002
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        """Pre-existing user without SCIM mapping gets adopted (linked)."""
        existing = make_db_user(email="admin@example.com", personal_name=None)
        mock_dal.get_user_by_email.return_value = existing
        mock_dal.get_user_mapping_by_user_id.return_value = None
        resource = make_scim_user(userName="admin@example.com", externalId="ext-admin")

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parsed = parse_scim_user(result, status=201)
        assert parsed.userName == "admin@example.com"
        # Should NOT create a new user — reuse existing
        mock_dal.add_user.assert_not_called()
        # Should sync is_active and personal_name from the SCIM request
        mock_dal.update_user.assert_called_once_with(
            existing, is_active=True, personal_name="Test User"
        )
        # Should create a SCIM mapping for the existing user
        mock_dal.create_user_mapping.assert_called_once()
        mock_dal.commit.assert_called_once()

    @patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
    def test_integrity_error_returns_409(
        self,
        mock_seats: MagicMock,  # noqa: ARG002
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_user_by_email.return_value = None
        mock_dal.add_user.side_effect = IntegrityError("dup", {}, Exception())
        resource = make_scim_user()

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 409)
        mock_dal.rollback.assert_called_once()

    @patch("ee.onyx.server.scim.api._check_seat_availability")
    def test_seat_limit_returns_403(
        self,
        mock_seats: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,  # noqa: ARG002
        provider: ScimProvider,
    ) -> None:
        mock_seats.return_value = "Seat limit reached"
        resource = make_scim_user()

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 403)

    @patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
    def test_creates_external_id_mapping(
        self,
        mock_seats: MagicMock,  # noqa: ARG002
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_user_by_email.return_value = None
        resource = make_scim_user(externalId="ext-123")

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result, status=201)
        assert resource.externalId == "ext-123"
        mock_dal.create_user_mapping.assert_called_once()


class TestReplaceUser:
    """Tests for PUT /scim/v2/Users/{user_id}."""

    def test_success(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        user = make_db_user(email="old@example.com")
        mock_dal.get_user.return_value = user
        resource = make_scim_user(
            userName="new@example.com",
            name=ScimName(givenName="New", familyName="Name"),
        )

        result = replace_user(
            user_id=str(user.id),
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        mock_dal.update_user.assert_called_once()
        mock_dal.commit.assert_called_once()

    def test_not_found_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_user.return_value = None

        result = replace_user(
            user_id=str(uuid4()),
            user_resource=make_scim_user(),
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)

    @patch("ee.onyx.server.scim.api._check_seat_availability")
    def test_reactivation_checks_seats(
        self,
        mock_seats: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        user = make_db_user(is_active=False)
        mock_dal.get_user.return_value = user
        mock_seats.return_value = "No seats"
        resource = make_scim_user(active=True)

        result = replace_user(
            user_id=str(user.id),
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 403)
        mock_seats.assert_called_once()

    def test_syncs_external_id(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        user = make_db_user()
        mock_dal.get_user.return_value = user

        resource = make_scim_user(externalId=None)

        result = replace_user(
            user_id=str(user.id),
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        mock_dal.sync_user_external_id.assert_called_once_with(
            user.id,
            None,
            scim_username="test@example.com",
            fields=ScimMappingFields(
                given_name="Test",
                family_name="User",
            ),
        )


class TestPatchUser:
    """Tests for PATCH /scim/v2/Users/{user_id}."""

    def test_deactivate(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        user = make_db_user(is_active=True)
        mock_dal.get_user.return_value = user
        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REPLACE,
                    path="active",
                    value=False,
                )
            ]
        )

        result = patch_user(
            user_id=str(user.id),
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        mock_dal.update_user.assert_called_once()

    def test_not_found_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        mock_dal.get_user.return_value = None
        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REPLACE,
                    path="active",
                    value=False,
                )
            ]
        )

        result = patch_user(
            user_id=str(uuid4()),
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)

    def test_patch_displayname_persists(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        """PATCH displayName should update personal_name in the DB."""
        user = make_db_user(personal_name="Old Name")
        mock_dal.get_user.return_value = user
        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REPLACE,
                    path="displayName",
                    value="New Display Name",
                )
            ]
        )

        result = patch_user(
            user_id=str(user.id),
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        parse_scim_user(result)
        # Verify the update_user call received the new display name
        call_kwargs = mock_dal.update_user.call_args
        assert call_kwargs[1]["personal_name"] == "New Display Name"

    @patch("ee.onyx.server.scim.api.apply_user_patch")
    def test_patch_error_returns_error_response(
        self,
        mock_apply: MagicMock,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        user = make_db_user()
        mock_dal.get_user.return_value = user
        mock_apply.side_effect = ScimPatchError("Bad op", 400)
        patch_req = ScimPatchRequest(
            Operations=[
                ScimPatchOperation(
                    op=ScimPatchOperationType.REMOVE,
                    path="userName",
                )
            ]
        )

        result = patch_user(
            user_id=str(user.id),
            patch_request=patch_req,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 400)


class TestDeleteUser:
    """Tests for DELETE /scim/v2/Users/{user_id}."""

    def test_success(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
    ) -> None:
        user = make_db_user(is_active=True)
        mock_dal.get_user.return_value = user
        mapping = MagicMock()
        mapping.id = 1
        mock_dal.get_user_mapping_by_user_id.return_value = mapping

        result = delete_user(
            user_id=str(user.id),
            _token=mock_token,
            db_session=mock_db_session,
        )

        assert isinstance(result, Response)
        assert result.status_code == 204
        mock_dal.deactivate_user.assert_called_once_with(user)
        mock_dal.delete_user_mapping.assert_called_once_with(1)
        mock_dal.commit.assert_called_once()

    def test_not_found_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
    ) -> None:
        mock_dal.get_user.return_value = None

        result = delete_user(
            user_id=str(uuid4()),
            _token=mock_token,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)

    def test_invalid_uuid_returns_404(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,  # noqa: ARG002
    ) -> None:
        result = delete_user(
            user_id="not-a-uuid",
            _token=mock_token,
            db_session=mock_db_session,
        )

        assert_scim_error(result, 404)


class TestScimNameToStr:
    """Tests for _scim_name_to_str helper."""

    def test_prefers_formatted_over_components(self) -> None:
        """When client provides formatted, use it — the client knows what it wants."""
        name = ScimName(
            givenName="Jane", familyName="Smith", formatted="Dr. Jane Smith"
        )
        assert _scim_name_to_str(name) == "Dr. Jane Smith"

    def test_given_name_only(self) -> None:
        name = ScimName(givenName="Jane")
        assert _scim_name_to_str(name) == "Jane"

    def test_family_name_only(self) -> None:
        name = ScimName(familyName="Smith")
        assert _scim_name_to_str(name) == "Smith"

    def test_falls_back_to_formatted(self) -> None:
        name = ScimName(formatted="Display Name")
        assert _scim_name_to_str(name) == "Display Name"

    def test_none_returns_none(self) -> None:
        assert _scim_name_to_str(None) is None

    def test_empty_name_returns_none(self) -> None:
        name = ScimName()
        assert _scim_name_to_str(name) is None


class TestEmailCasePreservation:
    """Tests verifying email case is preserved through SCIM endpoints."""

    @patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
    def test_create_preserves_username_case(
        self,
        mock_seats: MagicMock,  # noqa: ARG002
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        """POST /Users with mixed-case userName returns the original case."""
        mock_dal.get_user_by_email.return_value = None
        resource = make_scim_user(userName="Alice@Example.COM")

        result = create_user(
            user_resource=resource,
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result, status=201)
        assert resource.userName == "Alice@Example.COM"
        assert resource.emails[0].value == "Alice@Example.COM"

    def test_get_preserves_username_case(
        self,
        mock_db_session: MagicMock,
        mock_token: MagicMock,
        mock_dal: MagicMock,
        provider: ScimProvider,
    ) -> None:
        """GET /Users/{id} returns the original-case userName from mapping."""
        user = make_db_user(email="alice@example.com")
        mock_dal.get_user.return_value = user
        mapping = make_user_mapping(
            external_id="ext-1",
            user_id=user.id,
            scim_username="Alice@Example.COM",
        )
        mock_dal.get_user_mapping_by_user_id.return_value = mapping

        result = get_user(
            user_id=str(user.id),
            _token=mock_token,
            provider=provider,
            db_session=mock_db_session,
        )

        resource = parse_scim_user(result)
        assert resource.userName == "Alice@Example.COM"
        assert resource.emails[0].value == "Alice@Example.COM"


================================================
FILE: backend/tests/unit/onyx/server/test_full_user_snapshot.py
================================================
import datetime
from unittest.mock import MagicMock
from uuid import uuid4

from onyx.auth.schemas import UserRole
from onyx.db.enums import AccountType
from onyx.server.models import FullUserSnapshot
from onyx.server.models import UserGroupInfo


def _mock_user(
    personal_name: str | None = "Test User",
    created_at: datetime.datetime | None = None,
    updated_at: datetime.datetime | None = None,
) -> MagicMock:
    user = MagicMock()
    user.id = uuid4()
    user.email = "test@example.com"
    user.role = UserRole.BASIC
    user.is_active = True
    user.password_configured = True
    user.personal_name = personal_name
    user.created_at = created_at or datetime.datetime(
        2025, 1, 1, tzinfo=datetime.timezone.utc
    )
    user.updated_at = updated_at or datetime.datetime(
        2025, 6, 15, tzinfo=datetime.timezone.utc
    )
    user.account_type = AccountType.STANDARD
    return user


def test_from_user_model_includes_new_fields() -> None:
    user = _mock_user(personal_name="Alice")
    groups = [UserGroupInfo(id=1, name="Engineering")]

    snapshot = FullUserSnapshot.from_user_model(user, groups=groups)

    assert snapshot.personal_name == "Alice"
    assert snapshot.created_at == user.created_at
    assert snapshot.updated_at == user.updated_at
    assert snapshot.groups == groups


def test_from_user_model_defaults_groups_to_empty() -> None:
    user = _mock_user()
    snapshot = FullUserSnapshot.from_user_model(user)

    assert snapshot.groups == []


def test_from_user_model_personal_name_none() -> None:
    user = _mock_user(personal_name=None)
    snapshot = FullUserSnapshot.from_user_model(user)

    assert snapshot.personal_name is None


================================================
FILE: backend/tests/unit/onyx/server/test_pool_metrics.py
================================================
"""Unit tests for SQLAlchemy connection pool Prometheus metrics."""

import time
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

from fastapi import FastAPI
from sqlalchemy.pool import NullPool

from onyx.server.metrics.postgres_connection_pool import _register_pool_events
from onyx.server.metrics.postgres_connection_pool import PoolStateCollector
from onyx.server.metrics.postgres_connection_pool import (
    setup_postgres_connection_pool_metrics,
)
from onyx.utils.middleware import _build_route_map
from onyx.utils.middleware import _match_route


# --- PoolStateCollector tests ---


def test_pool_state_collector_reports_pool_stats() -> None:
    """Verify the custom collector reads pool.checkedout/checkedin/overflow/size."""
    mock_pool = MagicMock()
    mock_pool.checkedout.return_value = 5
    mock_pool.checkedin.return_value = 35
    mock_pool.overflow.return_value = 2
    mock_pool.size.return_value = 40

    collector = PoolStateCollector()
    collector.add_pool("sync", mock_pool)

    families = collector.collect()
    # 4 GaugeMetricFamilies: checked_out, checked_in, overflow, size
    assert len(families) == 4

    # Convert to dict for easier assertions
    metrics: dict[str, float] = {}
    for family in families:
        for sample in family.samples:
            metrics[f"{sample.name}:{sample.labels['engine']}"] = sample.value

    assert metrics["onyx_db_pool_checked_out:sync"] == 5
    assert metrics["onyx_db_pool_checked_in:sync"] == 35
    assert metrics["onyx_db_pool_overflow:sync"] == 2
    assert metrics["onyx_db_pool_size:sync"] == 40


def test_pool_state_collector_handles_multiple_engines() -> None:
    """Verify the collector reports metrics for multiple engines."""
    sync_pool = MagicMock()
    sync_pool.checkedout.return_value = 10
    sync_pool.checkedin.return_value = 30
    sync_pool.overflow.return_value = 0
    sync_pool.size.return_value = 40

    readonly_pool = MagicMock()
    readonly_pool.checkedout.return_value = 3
    readonly_pool.checkedin.return_value = 7
    readonly_pool.overflow.return_value = 1
    readonly_pool.size.return_value = 10

    collector = PoolStateCollector()
    collector.add_pool("sync", sync_pool)
    collector.add_pool("readonly", readonly_pool)

    families = collector.collect()
    # Each family should have 2 samples (sync + readonly)
    for family in families:
        assert len(list(family.samples)) == 2


# --- Pool event listener tests ---


def _make_conn_record() -> MagicMock:
    """Create a mock connection record with an info dict."""
    record = MagicMock()
    record.info = {}
    return record


def test_checkout_event_stores_endpoint_and_increments_gauge() -> None:
    """Verify checkout event stores handler on conn_record and increments metrics."""
    engine = MagicMock()
    engine.pool = MagicMock()
    listeners: dict[str, Any] = {}

    # Capture event listeners
    with patch("onyx.server.metrics.postgres_connection_pool.event") as mock_event:

        def capture_listener(target: Any, event_name: str) -> Any:  # noqa: ARG001
            def decorator(fn: Any) -> Any:
                listeners[event_name] = fn
                return fn

            return decorator

        mock_event.listens_for.side_effect = capture_listener
        _register_pool_events(engine, "sync")

    conn_record = _make_conn_record()

    with (
        patch(
            "onyx.server.metrics.postgres_connection_pool.CURRENT_ENDPOINT_CONTEXTVAR"
        ) as mock_ctx,
        patch(
            "onyx.server.metrics.postgres_connection_pool.CURRENT_TENANT_ID_CONTEXTVAR"
        ) as mock_tenant_ctx,
        patch(
            "onyx.server.metrics.postgres_connection_pool._connections_held"
        ) as mock_gauge,
        patch("onyx.server.metrics.postgres_connection_pool._checkout_total"),
    ):
        mock_labels = MagicMock()
        mock_gauge.labels.return_value = mock_labels
        mock_ctx.get.return_value = "/api/chat/send-message"
        mock_tenant_ctx.get.return_value = "tenant_xyz"
        listeners["checkout"](None, conn_record, None)

    assert conn_record.info["_metrics_endpoint"] == "/api/chat/send-message"
    assert conn_record.info["_metrics_tenant_id"] == "tenant_xyz"
    assert "_metrics_checkout_time" in conn_record.info
    mock_gauge.labels.assert_called_with(
        handler="/api/chat/send-message", engine="sync", tenant_id="tenant_xyz"
    )
    mock_labels.inc.assert_called_once()


def test_checkin_event_observes_hold_duration() -> None:
    """Verify checkin event reads endpoint from conn_record and observes hold time."""
    engine = MagicMock()
    engine.pool = MagicMock()
    listeners: dict[str, Any] = {}

    with patch("onyx.server.metrics.postgres_connection_pool.event") as mock_event:

        def capture_listener(target: Any, event_name: str) -> Any:  # noqa: ARG001
            def decorator(fn: Any) -> Any:
                listeners[event_name] = fn
                return fn

            return decorator

        mock_event.listens_for.side_effect = capture_listener
        _register_pool_events(engine, "sync")

    conn_record = _make_conn_record()
    conn_record.info["_metrics_endpoint"] = "/api/search"
    conn_record.info["_metrics_tenant_id"] = "tenant_abc"
    conn_record.info["_metrics_checkout_time"] = time.monotonic() - 0.5

    with (
        patch(
            "onyx.server.metrics.postgres_connection_pool._connections_held"
        ) as mock_gauge,
        patch(
            "onyx.server.metrics.postgres_connection_pool._hold_seconds"
        ) as mock_hist,
        patch("onyx.server.metrics.postgres_connection_pool._checkin_total"),
    ):
        mock_labels = MagicMock()
        mock_gauge.labels.return_value = mock_labels
        mock_hist_labels = MagicMock()
        mock_hist.labels.return_value = mock_hist_labels

        listeners["checkin"](None, conn_record)

        mock_gauge.labels.assert_called_with(
            handler="/api/search", engine="sync", tenant_id="tenant_abc"
        )
        mock_labels.dec.assert_called_once()
        mock_hist.labels.assert_called_with(handler="/api/search", engine="sync")
        mock_hist_labels.observe.assert_called_once()
        # Verify the observed duration is roughly 0.5s
        observed = mock_hist_labels.observe.call_args[0][0]
        assert 0.4 < observed < 1.0

    # conn_record.info should be cleaned up
    assert "_metrics_endpoint" not in conn_record.info
    assert "_metrics_tenant_id" not in conn_record.info
    assert "_metrics_checkout_time" not in conn_record.info


def test_checkin_with_missing_endpoint_uses_unknown() -> None:
    """Verify checkin gracefully handles missing endpoint and tenant info."""
    engine = MagicMock()
    engine.pool = MagicMock()
    listeners: dict[str, Any] = {}

    with patch("onyx.server.metrics.postgres_connection_pool.event") as mock_event:

        def capture_listener(target: Any, event_name: str) -> Any:  # noqa: ARG001
            def decorator(fn: Any) -> Any:
                listeners[event_name] = fn
                return fn

            return decorator

        mock_event.listens_for.side_effect = capture_listener
        _register_pool_events(engine, "sync")

    conn_record = _make_conn_record()

    with (
        patch(
            "onyx.server.metrics.postgres_connection_pool._connections_held"
        ) as mock_gauge,
        patch("onyx.server.metrics.postgres_connection_pool._hold_seconds"),
        patch("onyx.server.metrics.postgres_connection_pool._checkin_total"),
    ):
        mock_labels = MagicMock()
        mock_gauge.labels.return_value = mock_labels

        listeners["checkin"](None, conn_record)

        mock_gauge.labels.assert_called_with(
            handler="unknown", engine="sync", tenant_id="unknown"
        )


# --- setup_postgres_connection_pool_metrics tests ---


def test_setup_skips_null_pool_engines() -> None:
    """Verify setup_postgres_connection_pool_metrics skips engines with NullPool."""
    with (
        patch("onyx.server.metrics.postgres_connection_pool.REGISTRY"),
        patch(
            "onyx.server.metrics.postgres_connection_pool._register_pool_events"
        ) as mock_register,
    ):
        null_engine = MagicMock()
        null_engine.pool = MagicMock(spec=NullPool)

        setup_postgres_connection_pool_metrics({"null": null_engine})
        mock_register.assert_not_called()


# --- Route matching tests ---


def test_build_route_map_extracts_api_routes() -> None:
    """Verify _build_route_map extracts APIRoute path regexes."""
    app = FastAPI()

    @app.get("/api/test")
    def test_endpoint() -> dict:
        return {}

    @app.get("/api/items/{item_id}")
    def get_item(item_id: str) -> dict:  # noqa: ARG001
        return {}

    route_map = _build_route_map(app)
    # Should have at least the 2 routes we defined
    templates = [template for _, template in route_map]
    assert "/api/test" in templates
    assert "/api/items/{item_id}" in templates


def test_match_route_resolves_parameterized_paths() -> None:
    """Verify _match_route resolves /api/items/abc-123 to /api/items/{item_id}."""
    app = FastAPI()

    @app.get("/api/items/{item_id}")
    def get_item(item_id: str) -> dict:  # noqa: ARG001
        return {}

    route_map = _build_route_map(app)
    result = _match_route(route_map, "/api/items/abc-123")
    assert result == "/api/items/{item_id}"


def test_match_route_returns_none_for_unknown_paths() -> None:
    """Verify _match_route returns None for paths not in the route map."""
    app = FastAPI()

    @app.get("/api/test")
    def test_endpoint() -> dict:
        return {}

    route_map = _build_route_map(app)
    result = _match_route(route_map, "/api/nonexistent")
    assert result is None


def test_match_route_exact_paths() -> None:
    """Verify _match_route handles exact (non-parameterized) paths."""
    app = FastAPI()

    @app.get("/api/health")
    def health() -> dict:
        return {}

    route_map = _build_route_map(app)
    result = _match_route(route_map, "/api/health")
    assert result == "/api/health"


================================================
FILE: backend/tests/unit/onyx/server/test_projects_file_utils.py
================================================
from io import BytesIO
from unittest.mock import MagicMock

import pytest
from fastapi import UploadFile

from onyx.natural_language_processing import utils as nlp_utils
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import count_tokens
from onyx.server.features.projects import projects_file_utils as utils
from onyx.server.settings.models import Settings


class _Tokenizer(BaseTokenizer):
    def encode(self, text: str) -> list[int]:
        return [1] * len(text)

    def tokenize(self, text: str) -> list[str]:
        return list(text)

    def decode(self, _tokens: list[int]) -> str:
        return ""


class _NonSeekableFile(BytesIO):
    def tell(self) -> int:
        raise OSError("tell not supported")

    def seek(self, *_args: object, **_kwargs: object) -> int:
        raise OSError("seek not supported")


def _make_upload(filename: str, size: int, content: bytes | None = None) -> UploadFile:
    payload = content if content is not None else (b"x" * size)
    return UploadFile(filename=filename, file=BytesIO(payload), size=size)


def _make_upload_no_size(filename: str, content: bytes) -> UploadFile:
    return UploadFile(filename=filename, file=BytesIO(content), size=None)


def _make_settings(upload_size_mb: int = 1, token_threshold_k: int = 100) -> Settings:
    return Settings(
        user_file_max_upload_size_mb=upload_size_mb,
        file_token_count_threshold_k=token_threshold_k,
    )


def _patch_common_dependencies(
    monkeypatch: pytest.MonkeyPatch,
    upload_size_mb: int = 1,
    token_threshold_k: int = 100,
) -> None:
    monkeypatch.setattr(utils, "fetch_default_llm_model", lambda _db: None)
    monkeypatch.setattr(utils, "get_tokenizer", lambda **_kwargs: _Tokenizer())
    monkeypatch.setattr(utils, "is_file_password_protected", lambda **_kwargs: False)
    monkeypatch.setattr(
        utils,
        "load_settings",
        lambda: _make_settings(upload_size_mb, token_threshold_k),
    )


def test_get_upload_size_bytes_falls_back_to_stream_size() -> None:
    upload = UploadFile(filename="example.txt", file=BytesIO(b"abcdef"), size=None)
    upload.file.seek(2)

    size = utils.get_upload_size_bytes(upload)

    assert size == 6
    assert upload.file.tell() == 2


def test_get_upload_size_bytes_logs_warning_when_stream_size_unavailable(
    caplog: pytest.LogCaptureFixture,
) -> None:
    upload = UploadFile(filename="non_seekable.txt", file=_NonSeekableFile(), size=None)

    caplog.set_level("WARNING")
    size = utils.get_upload_size_bytes(upload)

    assert size is None
    assert "Could not determine upload size via stream seek" in caplog.text
    assert "non_seekable.txt" in caplog.text


def test_is_upload_too_large_logs_warning_when_size_unknown(
    monkeypatch: pytest.MonkeyPatch,
    caplog: pytest.LogCaptureFixture,
) -> None:
    upload = _make_upload("size_unknown.txt", size=1)
    monkeypatch.setattr(utils, "get_upload_size_bytes", lambda _upload: None)

    caplog.set_level("WARNING")
    is_too_large = utils.is_upload_too_large(upload, max_bytes=100)

    assert is_too_large is False
    assert "Could not determine upload size; skipping size-limit check" in caplog.text
    assert "size_unknown.txt" in caplog.text


def test_categorize_uploaded_files_accepts_size_under_limit(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    # upload_size_mb=1 → max_bytes = 1*1024*1024; file size 99 is well under
    _patch_common_dependencies(monkeypatch, upload_size_mb=1)
    monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)

    upload = _make_upload("small.png", size=99)
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 1
    assert len(result.rejected) == 0


def test_categorize_uploaded_files_uses_seek_fallback_when_upload_size_missing(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    _patch_common_dependencies(monkeypatch, upload_size_mb=1)
    monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)

    upload = _make_upload_no_size("small.png", content=b"x" * 99)
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 1
    assert len(result.rejected) == 0


def test_categorize_uploaded_files_accepts_size_at_limit(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    _patch_common_dependencies(monkeypatch, upload_size_mb=1)
    monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)

    # 1 MB = 1048576 bytes; file at exactly that boundary should be accepted
    upload = _make_upload("edge.png", size=1048576)
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 1
    assert len(result.rejected) == 0


def test_categorize_uploaded_files_rejects_size_over_limit_with_reason(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    _patch_common_dependencies(monkeypatch, upload_size_mb=1)
    monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)

    upload = _make_upload("large.png", size=1048577)  # 1 byte over 1 MB
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 0
    assert len(result.rejected) == 1
    assert result.rejected[0].reason == "Exceeds 1 MB file size limit"


def test_categorize_uploaded_files_mixed_batch_keeps_valid_and_rejects_oversized(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    _patch_common_dependencies(monkeypatch, upload_size_mb=1)
    monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)

    small = _make_upload("small.png", size=50)
    large = _make_upload("large.png", size=1048577)

    result = utils.categorize_uploaded_files([small, large], MagicMock())

    assert [file.filename for file in result.acceptable] == ["small.png"]
    assert len(result.rejected) == 1
    assert result.rejected[0].filename == "large.png"
    assert result.rejected[0].reason == "Exceeds 1 MB file size limit"


def test_categorize_uploaded_files_enforces_size_limit_always(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    _patch_common_dependencies(monkeypatch, upload_size_mb=1)

    upload = _make_upload("oversized.pdf", size=1048577)
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 0
    assert len(result.rejected) == 1
    assert result.rejected[0].reason == "Exceeds 1 MB file size limit"


def test_categorize_uploaded_files_checks_size_before_text_extraction(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    _patch_common_dependencies(monkeypatch, upload_size_mb=1)

    extract_mock = MagicMock(return_value="this should not run")
    monkeypatch.setattr(utils, "extract_file_text", extract_mock)

    oversized_doc = _make_upload("oversized.pdf", size=1048577)
    result = utils.categorize_uploaded_files([oversized_doc], MagicMock())

    extract_mock.assert_not_called()
    assert len(result.acceptable) == 0
    assert len(result.rejected) == 1
    assert result.rejected[0].reason == "Exceeds 1 MB file size limit"


def test_categorize_enforces_size_limit_when_upload_size_mb_is_positive(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """A positive upload_size_mb is always enforced."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=1)
    monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)

    upload = _make_upload("huge.png", size=1048577, content=b"x")
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 0
    assert len(result.rejected) == 1


def test_categorize_enforces_token_limit_when_threshold_k_is_positive(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """A positive token_threshold_k is always enforced."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=5)
    monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 6000)

    upload = _make_upload("big_image.png", size=100)
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 0
    assert len(result.rejected) == 1


def test_categorize_no_token_limit_when_threshold_k_is_zero(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """token_threshold_k=0 means no token limit; high-token files are accepted."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=0)
    monkeypatch.setattr(
        utils, "estimate_image_tokens_for_upload", lambda _upload: 999_999
    )

    upload = _make_upload("huge_image.png", size=100)
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.rejected) == 0
    assert len(result.acceptable) == 1


def test_categorize_both_limits_enforced(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Both positive limits are enforced; file exceeding token limit is rejected."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=10, token_threshold_k=5)
    monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 6000)

    upload = _make_upload("over_tokens.png", size=100)
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 0
    assert len(result.rejected) == 1
    assert result.rejected[0].reason == "Exceeds 5K token limit"


def test_categorize_rejection_reason_contains_dynamic_values(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Rejection reasons reflect the admin-configured limits, not hardcoded values."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=42, token_threshold_k=7)
    monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 8000)

    # File within size limit but over token limit
    upload = _make_upload("tokens.png", size=100)
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert result.rejected[0].reason == "Exceeds 7K token limit"

    # File over size limit
    _patch_common_dependencies(monkeypatch, upload_size_mb=42, token_threshold_k=7)
    oversized = _make_upload("big.png", size=42 * 1024 * 1024 + 1)
    result2 = utils.categorize_uploaded_files([oversized], MagicMock())

    assert result2.rejected[0].reason == "Exceeds 42 MB file size limit"


# --- count_tokens tests ---


def test_count_tokens_small_text() -> None:
    """Small text should be encoded in a single call and return correct count."""
    tokenizer = _Tokenizer()
    text = "hello world"
    assert count_tokens(text, tokenizer) == len(tokenizer.encode(text))


def test_count_tokens_chunked_matches_single_call() -> None:
    """Chunked encoding should produce the same result as single-call for small text."""
    tokenizer = _Tokenizer()
    text = "a" * 1000
    assert count_tokens(text, tokenizer) == len(tokenizer.encode(text))


def test_count_tokens_large_text_is_chunked(monkeypatch: pytest.MonkeyPatch) -> None:
    """Text exceeding _ENCODE_CHUNK_SIZE should be split into multiple encode calls."""
    monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)
    tokenizer = _Tokenizer()
    text = "a" * 250
    # _Tokenizer returns 1 token per char, so total should be 250
    assert count_tokens(text, tokenizer) == 250


def test_count_tokens_with_token_limit_exits_early(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When token_limit is set and exceeded, count_tokens should stop early."""
    monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)

    encode_call_count = 0
    original_tokenizer = _Tokenizer()

    class _CountingTokenizer(BaseTokenizer):
        def encode(self, text: str) -> list[int]:
            nonlocal encode_call_count
            encode_call_count += 1
            return original_tokenizer.encode(text)

        def tokenize(self, text: str) -> list[str]:
            return list(text)

        def decode(self, _tokens: list[int]) -> str:
            return ""

    tokenizer = _CountingTokenizer()
    # 500 chars → 5 chunks of 100; limit=150 → should stop after 2 chunks
    text = "a" * 500
    result = count_tokens(text, tokenizer, token_limit=150)

    assert result == 200  # 2 chunks × 100 tokens each
    assert encode_call_count == 2, "Should have stopped after 2 chunks"


def test_count_tokens_with_token_limit_not_exceeded(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When token_limit is set but not exceeded, all chunks are encoded."""
    monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)
    tokenizer = _Tokenizer()
    text = "a" * 250
    result = count_tokens(text, tokenizer, token_limit=1000)
    assert result == 250


def test_count_tokens_no_limit_encodes_all_chunks(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Without token_limit, all chunks are encoded regardless of count."""
    monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)
    tokenizer = _Tokenizer()
    text = "a" * 500
    result = count_tokens(text, tokenizer)
    assert result == 500


# --- early exit via token_limit in categorize tests ---


def test_categorize_early_exits_tokenization_for_large_text(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Large text files should be rejected via early-exit tokenization
    without encoding all chunks."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)
    # token_threshold = 1000; _ENCODE_CHUNK_SIZE = 100 → text of 500 chars = 5 chunks
    # Should stop after 2nd chunk (200 tokens > 1000? No... need 1 token per char)
    # With _Tokenizer: 1 token per char. threshold=1000, chunk=100 → need 11 chunks
    # Let's use a bigger text
    monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)
    large_text = "x" * 5000  # 5000 tokens, threshold 1000
    monkeypatch.setattr(utils, "extract_file_text", lambda **_kwargs: large_text)

    encode_call_count = 0
    original_tokenizer = _Tokenizer()

    class _CountingTokenizer(BaseTokenizer):
        def encode(self, text: str) -> list[int]:
            nonlocal encode_call_count
            encode_call_count += 1
            return original_tokenizer.encode(text)

        def tokenize(self, text: str) -> list[str]:
            return list(text)

        def decode(self, _tokens: list[int]) -> str:
            return ""

    monkeypatch.setattr(utils, "get_tokenizer", lambda **_kwargs: _CountingTokenizer())

    upload = _make_upload("big.txt", size=5000, content=large_text.encode())
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.rejected) == 1
    assert "token limit" in result.rejected[0].reason
    # 5000 chars / 100 chunk_size = 50 chunks total; should stop well before all 50
    assert (
        encode_call_count < 50
    ), f"Expected early exit but encoded {encode_call_count} chunks out of 50"


def test_categorize_text_under_token_limit_accepted(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Text files under the token threshold should be accepted with exact count."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)
    small_text = "x" * 500  # 500 tokens < 1000 threshold
    monkeypatch.setattr(utils, "extract_file_text", lambda **_kwargs: small_text)

    upload = _make_upload("ok.txt", size=500, content=small_text.encode())
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 1
    assert result.acceptable_file_to_token_count["ok.txt"] == 500


# --- skip-indexing vs rejection by file type ---


def test_csv_over_token_threshold_accepted_skip_indexing(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """CSV exceeding token threshold is uploaded but flagged to skip indexing."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)
    text = "x" * 2000  # 2000 tokens > 1000 threshold
    monkeypatch.setattr(utils, "extract_file_text", lambda **_kwargs: text)

    upload = _make_upload("large.csv", size=2000, content=text.encode())
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 1
    assert result.acceptable[0].filename == "large.csv"
    assert "large.csv" in result.skip_indexing
    assert len(result.rejected) == 0


def test_csv_under_token_threshold_accepted_and_indexed(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """CSV under token threshold is uploaded and indexed normally."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)
    text = "x" * 500  # 500 tokens < 1000 threshold
    monkeypatch.setattr(utils, "extract_file_text", lambda **_kwargs: text)

    upload = _make_upload("small.csv", size=500, content=text.encode())
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.acceptable) == 1
    assert result.acceptable[0].filename == "small.csv"
    assert "small.csv" not in result.skip_indexing
    assert len(result.rejected) == 0


def test_pdf_over_token_threshold_rejected(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """PDF exceeding token threshold is rejected entirely (not uploaded)."""
    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)
    text = "x" * 2000  # 2000 tokens > 1000 threshold
    monkeypatch.setattr(utils, "extract_file_text", lambda **_kwargs: text)

    upload = _make_upload("big.pdf", size=2000, content=text.encode())
    result = utils.categorize_uploaded_files([upload], MagicMock())

    assert len(result.rejected) == 1
    assert result.rejected[0].filename == "big.pdf"
    assert "1K token limit" in result.rejected[0].reason
    assert len(result.acceptable) == 0


================================================
FILE: backend/tests/unit/onyx/server/test_prometheus_instrumentation.py
================================================
"""Unit tests for Prometheus instrumentation module."""

import threading
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

from fastapi import FastAPI
from fastapi.testclient import TestClient
from prometheus_client import CollectorRegistry
from prometheus_client import Gauge

from onyx.server.metrics.per_tenant import per_tenant_request_callback
from onyx.server.metrics.prometheus_setup import setup_prometheus_metrics
from onyx.server.metrics.slow_requests import slow_request_callback


def _make_info(
    duration: float,
    method: str = "GET",
    handler: str = "/api/test",
    status: str = "200",
) -> Any:
    """Build a fake metrics Info object matching the instrumentator's Info shape."""
    return MagicMock(
        modified_duration=duration,
        method=method,
        modified_handler=handler,
        modified_status=status,
    )


def test_slow_request_callback_increments_above_threshold() -> None:
    with patch("onyx.server.metrics.slow_requests._slow_requests") as mock_counter:
        mock_labels = MagicMock()
        mock_counter.labels.return_value = mock_labels

        info = _make_info(
            duration=2.0, method="POST", handler="/api/chat", status="200"
        )
        slow_request_callback(info)

        mock_counter.labels.assert_called_once_with(
            method="POST", handler="/api/chat", status="200"
        )
        mock_labels.inc.assert_called_once()


def test_slow_request_callback_skips_below_threshold() -> None:
    with patch("onyx.server.metrics.slow_requests._slow_requests") as mock_counter:
        info = _make_info(duration=0.5)
        slow_request_callback(info)

        mock_counter.labels.assert_not_called()


def test_slow_request_callback_skips_at_exact_threshold() -> None:
    with (
        patch("onyx.server.metrics.slow_requests.SLOW_REQUEST_THRESHOLD_SECONDS", 1.0),
        patch("onyx.server.metrics.slow_requests._slow_requests") as mock_counter,
    ):
        info = _make_info(duration=1.0)
        slow_request_callback(info)

        mock_counter.labels.assert_not_called()


def test_setup_attaches_instrumentator_to_app() -> None:
    with patch("onyx.server.metrics.prometheus_setup.Instrumentator") as mock_cls:
        mock_instance = MagicMock()
        mock_instance.instrument.return_value = mock_instance
        mock_cls.return_value = mock_instance

        app = FastAPI()
        setup_prometheus_metrics(app)

        mock_cls.assert_called_once_with(
            should_group_status_codes=False,
            should_ignore_untemplated=False,
            should_group_untemplated=True,
            should_instrument_requests_inprogress=True,
            inprogress_labels=True,
            excluded_handlers=["/health", "/metrics", "/openapi.json"],
        )
        assert mock_instance.add.call_count == 3
        mock_instance.instrument.assert_called_once_with(
            app,
            latency_lowr_buckets=(
                0.01,
                0.025,
                0.05,
                0.1,
                0.25,
                0.5,
                1.0,
                2.5,
                5.0,
                10.0,
            ),
        )
        mock_instance.expose.assert_called_once_with(app)


def test_per_tenant_callback_increments_with_tenant_id() -> None:
    """Verify per-tenant callback reads tenant from contextvar and increments."""
    with (
        patch(
            "onyx.server.metrics.per_tenant.CURRENT_TENANT_ID_CONTEXTVAR"
        ) as mock_ctx,
        patch("onyx.server.metrics.per_tenant._requests_by_tenant") as mock_counter,
    ):
        mock_labels = MagicMock()
        mock_counter.labels.return_value = mock_labels
        mock_ctx.get.return_value = "tenant_abc"

        info = _make_info(
            duration=0.1, method="POST", handler="/api/chat", status="200"
        )
        per_tenant_request_callback(info)

        mock_counter.labels.assert_called_once_with(
            tenant_id="tenant_abc",
            method="POST",
            handler="/api/chat",
            status="200",
        )
        mock_labels.inc.assert_called_once()


def test_per_tenant_callback_falls_back_to_unknown() -> None:
    """Verify per-tenant callback uses 'unknown' when contextvar is None."""
    with (
        patch(
            "onyx.server.metrics.per_tenant.CURRENT_TENANT_ID_CONTEXTVAR"
        ) as mock_ctx,
        patch("onyx.server.metrics.per_tenant._requests_by_tenant") as mock_counter,
    ):
        mock_labels = MagicMock()
        mock_counter.labels.return_value = mock_labels
        mock_ctx.get.return_value = None

        info = _make_info(duration=0.1)
        per_tenant_request_callback(info)

        mock_counter.labels.assert_called_once_with(
            tenant_id="unknown",
            method="GET",
            handler="/api/test",
            status="200",
        )
        mock_labels.inc.assert_called_once()


def test_inprogress_gauge_increments_during_request() -> None:
    """Verify the in-progress gauge goes up while a request is in flight."""
    registry = CollectorRegistry()
    gauge = Gauge(
        "http_requests_inprogress_test",
        "In-progress requests",
        ["method", "handler"],
        registry=registry,
    )

    request_started = threading.Event()
    request_release = threading.Event()

    app = FastAPI()

    @app.get("/slow")
    def slow_endpoint() -> dict:
        gauge.labels(method="GET", handler="/slow").inc()
        request_started.set()
        request_release.wait(timeout=5)
        gauge.labels(method="GET", handler="/slow").dec()
        return {"status": "done"}

    client = TestClient(app, raise_server_exceptions=False)

    def make_request() -> None:
        client.get("/slow")

    thread = threading.Thread(target=make_request)
    thread.start()

    request_started.wait(timeout=5)
    assert gauge.labels(method="GET", handler="/slow")._value.get() == 1.0

    request_release.set()
    thread.join(timeout=5)
    assert gauge.labels(method="GET", handler="/slow")._value.get() == 0.0


def test_inprogress_gauge_tracks_concurrent_requests() -> None:
    """Verify the gauge correctly counts multiple concurrent in-flight requests."""
    registry = CollectorRegistry()
    gauge = Gauge(
        "http_requests_inprogress_concurrent_test",
        "In-progress requests",
        ["method", "handler"],
        registry=registry,
    )

    # 3 parties: 2 request threads + main thread
    barrier = threading.Barrier(3)
    release = threading.Event()

    app = FastAPI()

    @app.get("/concurrent")
    def concurrent_endpoint() -> dict:
        gauge.labels(method="GET", handler="/concurrent").inc()
        barrier.wait(timeout=5)
        release.wait(timeout=5)
        gauge.labels(method="GET", handler="/concurrent").dec()
        return {"status": "done"}

    client = TestClient(app, raise_server_exceptions=False)

    def make_request() -> None:
        client.get("/concurrent")

    t1 = threading.Thread(target=make_request)
    t2 = threading.Thread(target=make_request)
    t1.start()
    t2.start()

    # All 3 threads meet here — both requests are in-flight
    barrier.wait(timeout=5)
    assert gauge.labels(method="GET", handler="/concurrent")._value.get() == 2.0

    release.set()
    t1.join(timeout=5)
    t2.join(timeout=5)
    assert gauge.labels(method="GET", handler="/concurrent")._value.get() == 0.0


================================================
FILE: backend/tests/unit/onyx/server/test_settings_store.py
================================================
import pytest

from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.server.settings import store as settings_store
from onyx.server.settings.models import (
    DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,
)
from onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
from onyx.server.settings.models import Settings


class _FakeKvStore:
    def __init__(self, data: dict | None = None) -> None:
        self._data = data

    def load(self, _key: str) -> dict:
        if self._data is None:
            raise KvKeyNotFoundError()
        return self._data


class _FakeCache:
    def __init__(self) -> None:
        self._vals: dict[str, bytes] = {}

    def get(self, key: str) -> bytes | None:
        return self._vals.get(key)

    def set(self, key: str, value: str, ex: int | None = None) -> None:  # noqa: ARG002
        self._vals[key] = value.encode("utf-8")


def test_load_settings_uses_model_defaults_when_no_stored_value(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When no settings are stored (vector DB enabled), load_settings() should
    resolve the default token threshold to 200."""
    monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore())
    monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
    monkeypatch.setattr(settings_store, "DISABLE_VECTOR_DB", False)

    settings = settings_store.load_settings()

    assert settings.user_file_max_upload_size_mb == DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
    assert (
        settings.file_token_count_threshold_k
        == DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
    )


def test_load_settings_uses_high_token_default_when_vector_db_disabled(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When vector DB is disabled and no settings are stored, the token
    threshold should default to 10000 (10M tokens)."""
    monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore())
    monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
    monkeypatch.setattr(settings_store, "DISABLE_VECTOR_DB", True)

    settings = settings_store.load_settings()

    assert settings.user_file_max_upload_size_mb == DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
    assert (
        settings.file_token_count_threshold_k
        == DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
    )


def test_load_settings_preserves_explicit_value_when_vector_db_disabled(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When vector DB is disabled but admin explicitly set a token threshold,
    that value should be preserved (not overridden by the 10000 default)."""
    stored = Settings(file_token_count_threshold_k=500).model_dump()
    monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
    monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
    monkeypatch.setattr(settings_store, "DISABLE_VECTOR_DB", True)

    settings = settings_store.load_settings()

    assert settings.file_token_count_threshold_k == 500


def test_load_settings_preserves_zero_token_threshold(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """A value of 0 means 'no limit' and should be preserved."""
    stored = Settings(file_token_count_threshold_k=0).model_dump()
    monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
    monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
    monkeypatch.setattr(settings_store, "DISABLE_VECTOR_DB", True)

    settings = settings_store.load_settings()

    assert settings.file_token_count_threshold_k == 0


def test_load_settings_resolves_zero_upload_size_to_default(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """A value of 0 should be treated as unset and resolved to the default."""
    stored = Settings(user_file_max_upload_size_mb=0).model_dump()
    monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
    monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())

    settings = settings_store.load_settings()

    assert settings.user_file_max_upload_size_mb == DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB


def test_load_settings_clamps_upload_size_to_env_max(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When the stored upload size exceeds MAX_ALLOWED_UPLOAD_SIZE_MB, it should
    be clamped to the env-configured maximum."""
    stored = Settings(user_file_max_upload_size_mb=500).model_dump()
    monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
    monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
    monkeypatch.setattr(settings_store, "MAX_ALLOWED_UPLOAD_SIZE_MB", 250)

    settings = settings_store.load_settings()

    assert settings.user_file_max_upload_size_mb == 250


def test_load_settings_preserves_upload_size_within_max(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When the stored upload size is within MAX_ALLOWED_UPLOAD_SIZE_MB, it should
    be preserved unchanged."""
    stored = Settings(user_file_max_upload_size_mb=150).model_dump()
    monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
    monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
    monkeypatch.setattr(settings_store, "MAX_ALLOWED_UPLOAD_SIZE_MB", 250)

    settings = settings_store.load_settings()

    assert settings.user_file_max_upload_size_mb == 150


def test_load_settings_zero_upload_size_resolves_to_default(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """A value of 0 should be treated as unset and resolved to the default,
    clamped to MAX_ALLOWED_UPLOAD_SIZE_MB."""
    stored = Settings(user_file_max_upload_size_mb=0).model_dump()
    monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
    monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
    monkeypatch.setattr(settings_store, "MAX_ALLOWED_UPLOAD_SIZE_MB", 100)
    monkeypatch.setattr(settings_store, "DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB", 100)

    settings = settings_store.load_settings()

    assert settings.user_file_max_upload_size_mb == 100


def test_load_settings_default_clamped_to_max(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB exceeds MAX_ALLOWED_UPLOAD_SIZE_MB,
    the effective default should be min(DEFAULT, MAX)."""
    monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore())
    monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
    monkeypatch.setattr(settings_store, "DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB", 100)
    monkeypatch.setattr(settings_store, "MAX_ALLOWED_UPLOAD_SIZE_MB", 50)

    settings = settings_store.load_settings()

    assert settings.user_file_max_upload_size_mb == 50


================================================
FILE: backend/tests/unit/onyx/server/test_upload_files.py
================================================
import io
import zipfile
from unittest.mock import MagicMock
from unittest.mock import patch
from zipfile import BadZipFile

import pytest
from fastapi import UploadFile
from starlette.datastructures import Headers

from onyx.configs.constants import FileOrigin
from onyx.server.documents.connector import upload_files


def _create_test_zip() -> bytes:
    """Create a simple in-memory zip file containing two text files."""
    buf = io.BytesIO()
    with zipfile.ZipFile(buf, "w") as zf:
        zf.writestr("file1.txt", "hello")
        zf.writestr("file2.txt", "world")
    return buf.getvalue()


def _make_upload_file(content: bytes, filename: str, content_type: str) -> UploadFile:
    return UploadFile(
        file=io.BytesIO(content),
        filename=filename,
        headers=Headers({"content-type": content_type}),
    )


@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_zip_with_unzip_true_extracts_files(
    mock_get_store: MagicMock,
) -> None:
    """When unzip=True (default), a zip upload is extracted into individual files."""
    mock_store = MagicMock()
    mock_store.save_file.side_effect = lambda **kwargs: f"id-{kwargs['display_name']}"
    mock_get_store.return_value = mock_store

    zip_bytes = _create_test_zip()
    upload = _make_upload_file(zip_bytes, "test.zip", "application/zip")

    result = upload_files([upload], FileOrigin.CONNECTOR)

    # Should have extracted the two individual files, not stored the zip itself
    assert len(result.file_paths) == 2
    assert "id-file1.txt" in result.file_paths
    assert "id-file2.txt" in result.file_paths
    assert "file1.txt" in result.file_names
    assert "file2.txt" in result.file_names


@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_zip_with_unzip_false_stores_zip_as_is(
    mock_get_store: MagicMock,
) -> None:
    """When unzip=False, the zip file is stored as-is without extraction."""
    mock_store = MagicMock()
    mock_store.save_file.return_value = "zip-file-id"
    mock_get_store.return_value = mock_store

    zip_bytes = _create_test_zip()
    upload = _make_upload_file(zip_bytes, "site_export.zip", "application/zip")

    result = upload_files([upload], FileOrigin.CONNECTOR, unzip=False)

    # Should store exactly one file (the zip itself)
    assert len(result.file_paths) == 1
    assert result.file_paths[0] == "zip-file-id"
    assert result.file_names == ["site_export.zip"]
    # No zip metadata should be created
    assert result.zip_metadata_file_id is None

    # Verify the stored content is a valid zip
    saved_content: io.BytesIO = mock_store.save_file.call_args[1]["content"]
    saved_content.seek(0)
    with zipfile.ZipFile(saved_content, "r") as zf:
        assert set(zf.namelist()) == {"file1.txt", "file2.txt"}


@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_invalid_zip_with_unzip_false_raises(
    mock_get_store: MagicMock,
) -> None:
    """An invalid zip is rejected even when unzip=False (validation still runs)."""
    mock_get_store.return_value = MagicMock()

    bad_zip = _make_upload_file(b"not a zip", "bad.zip", "application/zip")

    with pytest.raises(BadZipFile):
        upload_files([bad_zip], FileOrigin.CONNECTOR, unzip=False)


@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_multiple_zips_rejected_when_unzip_false(
    mock_get_store: MagicMock,
) -> None:
    """The seen_zip guard rejects a second zip even when unzip=False."""
    mock_store = MagicMock()
    mock_store.save_file.return_value = "zip-id"
    mock_get_store.return_value = mock_store

    zip_bytes = _create_test_zip()
    zip1 = _make_upload_file(zip_bytes, "a.zip", "application/zip")
    zip2 = _make_upload_file(zip_bytes, "b.zip", "application/zip")

    with pytest.raises(Exception, match="Only one zip file"):
        upload_files([zip1, zip2], FileOrigin.CONNECTOR, unzip=False)


================================================
FILE: backend/tests/unit/onyx/test_redis.py
================================================
import os

import pytest
import redis

from onyx.redis.redis_pool import RedisPool
from onyx.utils.logger import setup_logger

logger = setup_logger()


@pytest.mark.skipif(
    os.getenv("REDIS_CLOUD_PYTEST_PASSWORD", "") == "",
    reason="Environment variable REDIS_CLOUD_PYTEST_PASSWORD is not set",
)
def test_redis_ssl() -> None:
    REDIS_PASSWORD = os.environ.get("REDIS_CLOUD_PYTEST_PASSWORD")
    REDIS_HOST = "redis-15414.c267.us-east-1-4.ec2.redns.redis-cloud.com"
    REDIS_PORT = 15414
    REDIS_SSL_CERT_REQS = "required"

    assert REDIS_PASSWORD

    # Construct the path to the CA certificate for the redis ssl test instance
    # it contains no secret data, so it's OK to have checked in!
    current_dir = os.path.dirname(__file__)
    REDIS_SSL_CA_CERTS = os.path.join(current_dir, "redis_ca.pem")

    pool = RedisPool.create_pool(
        host=REDIS_HOST,
        port=REDIS_PORT,
        password=REDIS_PASSWORD,
        ssl=True,
        ssl_cert_reqs=REDIS_SSL_CERT_REQS,
        ssl_ca_certs=REDIS_SSL_CA_CERTS,
    )

    r = redis.Redis(connection_pool=pool)
    assert r.ping()


================================================
FILE: backend/tests/unit/onyx/test_startup_validation.py
================================================
"""Tests for startup validation in no-vector-DB mode.

Verifies that DISABLE_VECTOR_DB raises RuntimeError when combined with
incompatible settings (MULTI_TENANT, ENABLE_CRAFT).
"""

from unittest.mock import patch

import pytest


class TestValidateNoVectorDbSettings:
    @patch("onyx.main.DISABLE_VECTOR_DB", False)
    def test_no_error_when_vector_db_enabled(self) -> None:
        from onyx.main import validate_no_vector_db_settings

        validate_no_vector_db_settings()

    @patch("onyx.main.DISABLE_VECTOR_DB", True)
    @patch("onyx.main.MULTI_TENANT", False)
    @patch("onyx.server.features.build.configs.ENABLE_CRAFT", False)
    def test_no_error_when_no_conflicts(self) -> None:
        from onyx.main import validate_no_vector_db_settings

        validate_no_vector_db_settings()

    @patch("onyx.main.DISABLE_VECTOR_DB", True)
    @patch("onyx.main.MULTI_TENANT", True)
    def test_raises_on_multi_tenant(self) -> None:
        from onyx.main import validate_no_vector_db_settings

        with pytest.raises(RuntimeError, match="MULTI_TENANT"):
            validate_no_vector_db_settings()

    @patch("onyx.main.DISABLE_VECTOR_DB", True)
    @patch("onyx.main.MULTI_TENANT", False)
    @patch("onyx.server.features.build.configs.ENABLE_CRAFT", True)
    def test_raises_on_enable_craft(self) -> None:
        from onyx.main import validate_no_vector_db_settings

        with pytest.raises(RuntimeError, match="ENABLE_CRAFT"):
            validate_no_vector_db_settings()

    @patch("onyx.main.DISABLE_VECTOR_DB", True)
    @patch("onyx.main.MULTI_TENANT", True)
    @patch("onyx.server.features.build.configs.ENABLE_CRAFT", True)
    def test_multi_tenant_checked_before_craft(self) -> None:
        """MULTI_TENANT is checked first, so it should be the error raised."""
        from onyx.main import validate_no_vector_db_settings

        with pytest.raises(RuntimeError, match="MULTI_TENANT"):
            validate_no_vector_db_settings()


================================================
FILE: backend/tests/unit/onyx/tools/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/tools/custom/test_custom_tools.py
================================================
import unittest
import uuid
from typing import Any
from unittest.mock import patch

import pytest

from onyx.server.query_and_chat.placement import Placement
from onyx.tools.models import DynamicSchemaInfo
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.custom.custom_tool import (
    build_custom_tools_from_openapi_schema_and_headers,
)
from onyx.tools.tool_implementations.custom.custom_tool import CustomToolCallSummary
from onyx.tools.tool_implementations.custom.custom_tool import (
    validate_openapi_schema,
)
from onyx.utils.headers import HeaderItemDict


class TestCustomTool(unittest.TestCase):
    """
    Test suite for CustomTool functionality.
    This class tests the creation, running, and result handling of custom tools
    based on OpenAPI schemas.
    """

    def setUp(self) -> None:
        """
        Set up the test environment before each test method.
        Initializes an OpenAPI schema and DynamicSchemaInfo for testing.
        """
        self.openapi_schema: dict[str, Any] = {
            "openapi": "3.0.0",
            "info": {
                "version": "1.0.0",
                "title": "Assistants API",
                "description": "An API for managing assistants",
            },
            "servers": [
                {"url": "http://localhost:8080/CHAT_SESSION_ID/test/MESSAGE_ID"},
            ],
            "paths": {
                "/assistant/{assistant_id}": {
                    "GET": {
                        "summary": "Get a specific Assistant",
                        "operationId": "getAssistant",
                        "parameters": [
                            {
                                "name": "assistant_id",
                                "in": "path",
                                "required": True,
                                "schema": {"type": "string"},
                            }
                        ],
                    },
                    "POST": {
                        "summary": "Create a new Assistant",
                        "operationId": "createAssistant",
                        "parameters": [
                            {
                                "name": "assistant_id",
                                "in": "path",
                                "required": True,
                                "schema": {"type": "string"},
                            }
                        ],
                        "requestBody": {
                            "required": True,
                            "content": {
                                "application/json": {"schema": {"type": "object"}}
                            },
                        },
                    },
                }
            },
        }
        validate_openapi_schema(self.openapi_schema)
        self.dynamic_schema_info: DynamicSchemaInfo = DynamicSchemaInfo(
            chat_session_id=uuid.uuid4(), message_id=20
        )

    @patch("onyx.tools.tool_implementations.custom.custom_tool.requests.request")
    def test_custom_tool_run_get(self, mock_request: unittest.mock.MagicMock) -> None:
        """
        Test the GET method of a custom tool.
        Verifies that the tool correctly constructs the URL and makes the GET request.
        """
        # Mock the response object
        mock_response = unittest.mock.MagicMock()
        mock_response.headers = {"Content-Type": "application/json"}
        mock_response.json.return_value = {"id": "123", "name": "Test Assistant"}
        mock_request.return_value = mock_response

        tools = build_custom_tools_from_openapi_schema_and_headers(
            tool_id=-1,  # dummy tool id
            openapi_schema=self.openapi_schema,
            dynamic_schema_info=self.dynamic_schema_info,
        )

        result = tools[0].run(
            placement=Placement(turn_index=0, tab_index=0),
            override_kwargs=None,
            assistant_id="123",
        )
        expected_url = f"http://localhost:8080/{self.dynamic_schema_info.chat_session_id}/test/{self.dynamic_schema_info.message_id}/assistant/123"
        mock_request.assert_called_once_with("GET", expected_url, json=None, headers={})

        self.assertIsNotNone(result, "Expected a result from the tool run")
        self.assertIsNotNone(
            result.rich_response,
            "Expected rich_response to be set",
        )
        assert isinstance(result.rich_response, CustomToolCallSummary)
        self.assertEqual(
            result.rich_response.tool_name,
            "getAssistant",
            "Tool name in response does not match expected value",
        )

    @patch("onyx.tools.tool_implementations.custom.custom_tool.requests.request")
    def test_custom_tool_run_post(self, mock_request: unittest.mock.MagicMock) -> None:
        """
        Test the POST method of a custom tool.
        Verifies that the tool correctly constructs the URL and makes the POST request with the given body.
        """
        # Mock the response object
        mock_response = unittest.mock.MagicMock()
        mock_response.headers = {"Content-Type": "application/json"}
        mock_response.json.return_value = {"id": "456", "name": "Created Assistant"}
        mock_request.return_value = mock_response

        tools = build_custom_tools_from_openapi_schema_and_headers(
            tool_id=-1,  # dummy tool id
            openapi_schema=self.openapi_schema,
            dynamic_schema_info=self.dynamic_schema_info,
        )

        result = tools[1].run(
            placement=Placement(turn_index=0, tab_index=0),
            override_kwargs=None,
            assistant_id="456",
        )
        expected_url = f"http://localhost:8080/{self.dynamic_schema_info.chat_session_id}/test/{self.dynamic_schema_info.message_id}/assistant/456"
        mock_request.assert_called_once_with(
            "POST", expected_url, json=None, headers={}
        )

        self.assertIsNotNone(result, "Expected a result from the tool run")
        self.assertIsNotNone(
            result.rich_response,
            "Expected rich_response to be set",
        )
        assert isinstance(result.rich_response, CustomToolCallSummary)
        self.assertEqual(
            result.rich_response.tool_name,
            "createAssistant",
            "Tool name in response does not match expected value",
        )

    @patch("onyx.tools.tool_implementations.custom.custom_tool.requests.request")
    def test_custom_tool_with_headers(
        self, mock_request: unittest.mock.MagicMock
    ) -> None:
        """
        Test the custom tool with custom headers.
        Verifies that the tool correctly includes the custom headers in the request.
        """
        # Mock the response object
        mock_response = unittest.mock.MagicMock()
        mock_response.headers = {"Content-Type": "application/json"}
        mock_response.json.return_value = {"id": "123"}
        mock_request.return_value = mock_response

        custom_headers: list[HeaderItemDict] = [
            {"key": "Authorization", "value": "Bearer token123"},
            {"key": "Custom-Header", "value": "CustomValue"},
        ]
        tools = build_custom_tools_from_openapi_schema_and_headers(
            tool_id=-1,  # dummy tool id
            openapi_schema=self.openapi_schema,
            custom_headers=custom_headers,
            dynamic_schema_info=self.dynamic_schema_info,
        )

        tools[0].run(
            placement=Placement(turn_index=0, tab_index=0),
            override_kwargs=None,
            assistant_id="123",
        )
        expected_url = f"http://localhost:8080/{self.dynamic_schema_info.chat_session_id}/test/{self.dynamic_schema_info.message_id}/assistant/123"
        expected_headers = {
            "Authorization": "Bearer token123",
            "Custom-Header": "CustomValue",
        }
        mock_request.assert_called_once_with(
            "GET", expected_url, json=None, headers=expected_headers
        )

    @patch("onyx.tools.tool_implementations.custom.custom_tool.requests.request")
    def test_custom_tool_with_empty_headers(
        self, mock_request: unittest.mock.MagicMock
    ) -> None:
        """
        Test the custom tool with an empty list of custom headers.
        Verifies that the tool correctly handles an empty list of headers.
        """
        # Mock the response object
        mock_response = unittest.mock.MagicMock()
        mock_response.headers = {"Content-Type": "application/json"}
        mock_response.json.return_value = {"id": "123"}
        mock_request.return_value = mock_response

        custom_headers: list[HeaderItemDict] = []
        tools = build_custom_tools_from_openapi_schema_and_headers(
            tool_id=-1,  # dummy tool id
            openapi_schema=self.openapi_schema,
            custom_headers=custom_headers,
            dynamic_schema_info=self.dynamic_schema_info,
        )

        tools[0].run(
            placement=Placement(turn_index=0, tab_index=0),
            override_kwargs=None,
            assistant_id="123",
        )
        expected_url = f"http://localhost:8080/{self.dynamic_schema_info.chat_session_id}/test/{self.dynamic_schema_info.message_id}/assistant/123"
        mock_request.assert_called_once_with("GET", expected_url, json=None, headers={})

    def test_invalid_openapi_schema(self) -> None:
        """
        Test that an invalid OpenAPI schema raises a ValueError.
        """
        invalid_schema: dict[str, Any] = {
            "openapi": "3.0.0",
            "info": {
                "version": "1.0.0",
                "title": "Invalid API",
            },
            # Missing required 'paths' key
        }

        with self.assertRaises(ValueError) as _:
            validate_openapi_schema(invalid_schema)

    def test_custom_tool_final_result(self) -> None:
        """
        Test extracting the final result from a custom tool response.
        Verifies that the tool result can be correctly extracted from the ToolResponse.
        """
        mock_response = ToolResponse(
            rich_response=CustomToolCallSummary(
                response_type="json",
                tool_name="getAssistant",
                tool_result={"id": "789", "name": "Final Assistant"},
            ),
            llm_facing_response='{"id": "789", "name": "Final Assistant"}',
        )

        # Extract the final result from the rich_response
        assert isinstance(mock_response.rich_response, CustomToolCallSummary)
        final_result = mock_response.rich_response.tool_result
        self.assertEqual(
            final_result,
            {"id": "789", "name": "Final Assistant"},
            "Final result does not match expected output",
        )


if __name__ == "__main__":
    pytest.main([__file__])


================================================
FILE: backend/tests/unit/onyx/tools/test_construct_tools_no_vectordb.py
================================================
"""Tests for tool construction when DISABLE_VECTOR_DB is True.

Verifies that:
- SearchTool.is_available() returns False when vector DB is disabled
- OpenURLTool.is_available() returns False when vector DB is disabled
- The force-add SearchTool block is suppressed when DISABLE_VECTOR_DB
- FileReaderTool.is_available() returns True when vector DB is disabled
"""

from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool

APP_CONFIGS_MODULE = "onyx.configs.app_configs"
FILE_READER_MODULE = "onyx.tools.tool_implementations.file_reader.file_reader_tool"


# ------------------------------------------------------------------
# SearchTool.is_available()
# ------------------------------------------------------------------


class TestSearchToolAvailability:
    @patch(f"{APP_CONFIGS_MODULE}.DISABLE_VECTOR_DB", True)
    def test_unavailable_when_vector_db_disabled(self) -> None:
        from onyx.tools.tool_implementations.search.search_tool import SearchTool

        assert SearchTool.is_available(MagicMock()) is False

    @patch("onyx.db.connector.check_user_files_exist", return_value=True)
    @patch(
        "onyx.tools.tool_implementations.search.search_tool.check_federated_connectors_exist",
        return_value=False,
    )
    @patch(
        "onyx.tools.tool_implementations.search.search_tool.check_connectors_exist",
        return_value=False,
    )
    @patch(f"{APP_CONFIGS_MODULE}.DISABLE_VECTOR_DB", False)
    def test_available_when_vector_db_enabled_and_files_exist(
        self,
        mock_connectors: MagicMock,  # noqa: ARG002
        mock_federated: MagicMock,  # noqa: ARG002
        mock_user_files: MagicMock,  # noqa: ARG002
    ) -> None:
        from onyx.tools.tool_implementations.search.search_tool import SearchTool

        assert SearchTool.is_available(MagicMock()) is True


# ------------------------------------------------------------------
# OpenURLTool.is_available()
# ------------------------------------------------------------------


class TestOpenURLToolAvailability:
    @patch(f"{APP_CONFIGS_MODULE}.DISABLE_VECTOR_DB", True)
    def test_unavailable_when_vector_db_disabled(self) -> None:
        from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool

        assert OpenURLTool.is_available(MagicMock()) is False

    @patch(f"{APP_CONFIGS_MODULE}.DISABLE_VECTOR_DB", False)
    def test_available_when_vector_db_enabled(self) -> None:
        from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool

        assert OpenURLTool.is_available(MagicMock()) is True


# ------------------------------------------------------------------
# FileReaderTool.is_available()
# ------------------------------------------------------------------


class TestFileReaderToolAvailability:
    @patch(f"{FILE_READER_MODULE}.DISABLE_VECTOR_DB", True)
    def test_available_when_vector_db_disabled(self) -> None:
        assert FileReaderTool.is_available(MagicMock()) is True

    @patch(f"{FILE_READER_MODULE}.DISABLE_VECTOR_DB", False)
    def test_unavailable_when_vector_db_enabled(self) -> None:
        assert FileReaderTool.is_available(MagicMock()) is False


# ------------------------------------------------------------------
# Force-add SearchTool suppression
# ------------------------------------------------------------------


class TestForceAddSearchToolGuard:
    def test_force_add_block_checks_disable_vector_db(self) -> None:
        """The force-add SearchTool block in construct_tools should include
        `not DISABLE_VECTOR_DB` so that forced search is also suppressed
        without a vector DB."""
        import inspect

        from onyx.tools.tool_constructor import construct_tools

        source = inspect.getsource(construct_tools)
        assert (
            "DISABLE_VECTOR_DB" in source
        ), "construct_tools should reference DISABLE_VECTOR_DB to suppress force-adding SearchTool"


# ------------------------------------------------------------------
# Persona API — _validate_vector_db_knowledge
# ------------------------------------------------------------------


class TestValidateVectorDbKnowledge:
    @patch(
        "onyx.server.features.persona.api.DISABLE_VECTOR_DB",
        True,
    )
    def test_rejects_document_set_ids(self) -> None:
        from fastapi import HTTPException

        from onyx.server.features.persona.api import _validate_vector_db_knowledge

        request = MagicMock()
        request.document_set_ids = [1]
        request.hierarchy_node_ids = []
        request.document_ids = []

        with __import__("pytest").raises(HTTPException) as exc_info:
            _validate_vector_db_knowledge(request)
        assert exc_info.value.status_code == 400
        assert "document sets" in exc_info.value.detail

    @patch(
        "onyx.server.features.persona.api.DISABLE_VECTOR_DB",
        True,
    )
    def test_rejects_hierarchy_node_ids(self) -> None:
        from fastapi import HTTPException

        from onyx.server.features.persona.api import _validate_vector_db_knowledge

        request = MagicMock()
        request.document_set_ids = []
        request.hierarchy_node_ids = [1]
        request.document_ids = []

        with __import__("pytest").raises(HTTPException) as exc_info:
            _validate_vector_db_knowledge(request)
        assert exc_info.value.status_code == 400
        assert "hierarchy nodes" in exc_info.value.detail

    @patch(
        "onyx.server.features.persona.api.DISABLE_VECTOR_DB",
        True,
    )
    def test_rejects_document_ids(self) -> None:
        from fastapi import HTTPException

        from onyx.server.features.persona.api import _validate_vector_db_knowledge

        request = MagicMock()
        request.document_set_ids = []
        request.hierarchy_node_ids = []
        request.document_ids = ["doc-abc"]

        with __import__("pytest").raises(HTTPException) as exc_info:
            _validate_vector_db_knowledge(request)
        assert exc_info.value.status_code == 400
        assert "documents" in exc_info.value.detail

    @patch(
        "onyx.server.features.persona.api.DISABLE_VECTOR_DB",
        True,
    )
    def test_allows_user_files_only(self) -> None:
        from onyx.server.features.persona.api import _validate_vector_db_knowledge

        request = MagicMock()
        request.document_set_ids = []
        request.hierarchy_node_ids = []
        request.document_ids = []

        _validate_vector_db_knowledge(request)

    @patch(
        "onyx.server.features.persona.api.DISABLE_VECTOR_DB",
        False,
    )
    def test_allows_everything_when_vector_db_enabled(self) -> None:
        from onyx.server.features.persona.api import _validate_vector_db_knowledge

        request = MagicMock()
        request.document_set_ids = [1, 2]
        request.hierarchy_node_ids = [3]
        request.document_ids = ["doc-x"]

        _validate_vector_db_knowledge(request)


================================================
FILE: backend/tests/unit/onyx/tools/test_file_reader_tool.py
================================================
"""Tests for the FileReaderTool.

Verifies:
- Tool definition schema is well-formed
- File ID validation (allowlist, UUID format)
- Character range extraction and clamping
- Error handling for missing parameters and non-text files
- is_available() reflects DISABLE_VECTOR_DB
"""

from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4

import pytest

from onyx.file_store.models import ChatFileType
from onyx.file_store.models import InMemoryChatFile
from onyx.server.query_and_chat.placement import Placement
from onyx.tools.models import ToolCallException
from onyx.tools.tool_implementations.file_reader.file_reader_tool import FILE_ID_FIELD
from onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool
from onyx.tools.tool_implementations.file_reader.file_reader_tool import MAX_NUM_CHARS
from onyx.tools.tool_implementations.file_reader.file_reader_tool import NUM_CHARS_FIELD
from onyx.tools.tool_implementations.file_reader.file_reader_tool import (
    START_CHAR_FIELD,
)

TOOL_MODULE = "onyx.tools.tool_implementations.file_reader.file_reader_tool"
_PLACEMENT = Placement(turn_index=0)


def _make_tool(
    user_file_ids: list | None = None,
    chat_file_ids: list | None = None,
) -> FileReaderTool:
    emitter = MagicMock()
    return FileReaderTool(
        tool_id=99,
        emitter=emitter,
        user_file_ids=user_file_ids or [],
        chat_file_ids=chat_file_ids or [],
    )


def _text_file(content: str, filename: str = "test.txt") -> InMemoryChatFile:
    return InMemoryChatFile(
        file_id="some-file-id",
        content=content.encode("utf-8"),
        file_type=ChatFileType.PLAIN_TEXT,
        filename=filename,
    )


# ------------------------------------------------------------------
# Tool metadata
# ------------------------------------------------------------------


class TestToolMetadata:
    def test_tool_name(self) -> None:
        tool = _make_tool()
        assert tool.name == "read_file"

    def test_tool_definition_schema(self) -> None:
        tool = _make_tool()
        defn = tool.tool_definition()
        assert defn["type"] == "function"
        func = defn["function"]
        assert func["name"] == "read_file"
        props = func["parameters"]["properties"]
        assert FILE_ID_FIELD in props
        assert START_CHAR_FIELD in props
        assert NUM_CHARS_FIELD in props
        assert func["parameters"]["required"] == [FILE_ID_FIELD]


# ------------------------------------------------------------------
# File ID validation
# ------------------------------------------------------------------


class TestFileIdValidation:
    def test_rejects_invalid_uuid(self) -> None:
        tool = _make_tool()
        with pytest.raises(ToolCallException, match="Invalid file_id"):
            tool._validate_file_id("not-a-uuid")

    def test_rejects_file_not_in_allowlist(self) -> None:
        tool = _make_tool(user_file_ids=[uuid4()])
        other_id = uuid4()
        with pytest.raises(ToolCallException, match="not in available files"):
            tool._validate_file_id(str(other_id))

    def test_accepts_user_file_id(self) -> None:
        uid = uuid4()
        tool = _make_tool(user_file_ids=[uid])
        assert tool._validate_file_id(str(uid)) == uid

    def test_accepts_chat_file_id(self) -> None:
        cid = uuid4()
        tool = _make_tool(chat_file_ids=[cid])
        assert tool._validate_file_id(str(cid)) == cid


# ------------------------------------------------------------------
# run() — character range extraction
# ------------------------------------------------------------------


class TestRun:
    @patch(f"{TOOL_MODULE}.get_session_with_current_tenant")
    @patch(f"{TOOL_MODULE}.load_user_file")
    def test_returns_full_content_by_default(
        self,
        mock_load_user_file: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        uid = uuid4()
        content = "Hello, world!"
        mock_load_user_file.return_value = _text_file(content)
        mock_get_session.return_value.__enter__.return_value = MagicMock()

        tool = _make_tool(user_file_ids=[uid])
        resp = tool.run(
            placement=_PLACEMENT,
            override_kwargs=MagicMock(),
            **{FILE_ID_FIELD: str(uid)},
        )
        assert content in resp.llm_facing_response

    @patch(f"{TOOL_MODULE}.get_session_with_current_tenant")
    @patch(f"{TOOL_MODULE}.load_user_file")
    def test_respects_start_char_and_num_chars(
        self,
        mock_load_user_file: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        uid = uuid4()
        content = "abcdefghijklmnop"
        mock_load_user_file.return_value = _text_file(content)
        mock_get_session.return_value.__enter__.return_value = MagicMock()

        tool = _make_tool(user_file_ids=[uid])
        resp = tool.run(
            placement=_PLACEMENT,
            override_kwargs=MagicMock(),
            **{FILE_ID_FIELD: str(uid), START_CHAR_FIELD: 4, NUM_CHARS_FIELD: 6},
        )
        assert "efghij" in resp.llm_facing_response

    @patch(f"{TOOL_MODULE}.get_session_with_current_tenant")
    @patch(f"{TOOL_MODULE}.load_user_file")
    def test_clamps_num_chars_to_max(
        self,
        mock_load_user_file: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        uid = uuid4()
        content = "x" * (MAX_NUM_CHARS + 500)
        mock_load_user_file.return_value = _text_file(content)
        mock_get_session.return_value.__enter__.return_value = MagicMock()

        tool = _make_tool(user_file_ids=[uid])
        resp = tool.run(
            placement=_PLACEMENT,
            override_kwargs=MagicMock(),
            **{FILE_ID_FIELD: str(uid), NUM_CHARS_FIELD: MAX_NUM_CHARS + 9999},
        )
        assert f"Characters 0-{MAX_NUM_CHARS}" in resp.llm_facing_response

    @patch(f"{TOOL_MODULE}.get_session_with_current_tenant")
    @patch(f"{TOOL_MODULE}.load_user_file")
    def test_includes_continuation_hint(
        self,
        mock_load_user_file: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        uid = uuid4()
        content = "x" * 100
        mock_load_user_file.return_value = _text_file(content)
        mock_get_session.return_value.__enter__.return_value = MagicMock()

        tool = _make_tool(user_file_ids=[uid])
        resp = tool.run(
            placement=_PLACEMENT,
            override_kwargs=MagicMock(),
            **{FILE_ID_FIELD: str(uid), NUM_CHARS_FIELD: 10},
        )
        assert "use start_char=10 to continue reading" in resp.llm_facing_response

    def test_raises_on_missing_file_id(self) -> None:
        tool = _make_tool()
        with pytest.raises(ToolCallException, match="Missing required"):
            tool.run(
                placement=_PLACEMENT,
                override_kwargs=MagicMock(),
            )

    @patch(f"{TOOL_MODULE}.get_session_with_current_tenant")
    @patch(f"{TOOL_MODULE}.load_user_file")
    def test_raises_on_non_text_file(
        self,
        mock_load_user_file: MagicMock,
        mock_get_session: MagicMock,
    ) -> None:
        uid = uuid4()
        mock_load_user_file.return_value = InMemoryChatFile(
            file_id="img",
            content=b"\x89PNG",
            file_type=ChatFileType.IMAGE,
            filename="photo.png",
        )
        mock_get_session.return_value.__enter__.return_value = MagicMock()

        tool = _make_tool(user_file_ids=[uid])
        with pytest.raises(ToolCallException, match="not a text file"):
            tool.run(
                placement=_PLACEMENT,
                override_kwargs=MagicMock(),
                **{FILE_ID_FIELD: str(uid)},
            )


# ------------------------------------------------------------------
# is_available()
# ------------------------------------------------------------------


class TestIsAvailable:
    @patch(f"{TOOL_MODULE}.DISABLE_VECTOR_DB", True)
    def test_available_when_vector_db_disabled(self) -> None:
        assert FileReaderTool.is_available(MagicMock()) is True

    @patch(f"{TOOL_MODULE}.DISABLE_VECTOR_DB", False)
    def test_unavailable_when_vector_db_enabled(self) -> None:
        assert FileReaderTool.is_available(MagicMock()) is False


================================================
FILE: backend/tests/unit/onyx/tools/test_no_vectordb.py
================================================
"""Tests for tool availability when DISABLE_VECTOR_DB is True.

Verifies that SearchTool and OpenURLTool report themselves as unavailable
when the vector DB is disabled, and that FileReaderTool remains available.
"""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from sqlalchemy.orm import Session


# ------------------------------------------------------------------
# SearchTool
# ------------------------------------------------------------------


@patch("onyx.configs.app_configs.DISABLE_VECTOR_DB", True)
def test_search_tool_unavailable_when_vector_db_disabled() -> None:
    from onyx.tools.tool_implementations.search.search_tool import SearchTool

    db_session = MagicMock(spec=Session)
    assert SearchTool.is_available(db_session) is False


@patch("onyx.configs.app_configs.DISABLE_VECTOR_DB", False)
@patch(
    "onyx.tools.tool_implementations.search.search_tool.check_connectors_exist",
    return_value=True,
)
def test_search_tool_available_when_vector_db_enabled(
    _mock_connectors: MagicMock,
) -> None:
    from onyx.tools.tool_implementations.search.search_tool import SearchTool

    db_session = MagicMock(spec=Session)
    assert SearchTool.is_available(db_session) is True


# ------------------------------------------------------------------
# OpenURLTool
# ------------------------------------------------------------------


@patch("onyx.configs.app_configs.DISABLE_VECTOR_DB", True)
def test_open_url_tool_unavailable_when_vector_db_disabled() -> None:
    from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool

    db_session = MagicMock(spec=Session)
    assert OpenURLTool.is_available(db_session) is False


# ------------------------------------------------------------------
# FileReaderTool — available when vector DB is disabled (for now)
# ------------------------------------------------------------------


@pytest.mark.parametrize("vector_db_disabled", [True, False])
def test_file_reader_tool_available(vector_db_disabled: bool) -> None:
    # Patch where it's *used*, not where it's defined — the module has its own
    # local reference after `from onyx.configs.app_configs import DISABLE_VECTOR_DB`.
    with patch(
        "onyx.tools.tool_implementations.file_reader.file_reader_tool.DISABLE_VECTOR_DB",
        vector_db_disabled,
    ):
        from onyx.tools.tool_implementations.file_reader.file_reader_tool import (
            FileReaderTool,
        )

        db_session = MagicMock(spec=Session)
        assert FileReaderTool.is_available(db_session) is vector_db_disabled


================================================
FILE: backend/tests/unit/onyx/tools/test_python_tool_availability.py
================================================
"""Tests for PythonTool availability based on server_enabled flag and health check.

Verifies that PythonTool reports itself as unavailable when either:
- CODE_INTERPRETER_BASE_URL is not set, or
- CodeInterpreterServer.server_enabled is False in the database, or
- The Code Interpreter service health check fails.

Also verifies that the health check result is cached with a TTL.
"""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from sqlalchemy.orm import Session

TOOL_MODULE = "onyx.tools.tool_implementations.python.python_tool"
CLIENT_MODULE = "onyx.tools.tool_implementations.python.code_interpreter_client"


@pytest.fixture(autouse=True)
def _clear_health_cache() -> None:
    """Reset the health check cache before every test."""
    import onyx.tools.tool_implementations.python.code_interpreter_client as mod

    mod._health_cache = {}


# ------------------------------------------------------------------
# Unavailable when CODE_INTERPRETER_BASE_URL is not set
# ------------------------------------------------------------------


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", None)
def test_python_tool_unavailable_without_base_url() -> None:
    from onyx.tools.tool_implementations.python.python_tool import PythonTool

    db_session = MagicMock(spec=Session)
    assert PythonTool.is_available(db_session) is False


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "")
def test_python_tool_unavailable_with_empty_base_url() -> None:
    from onyx.tools.tool_implementations.python.python_tool import PythonTool

    db_session = MagicMock(spec=Session)
    assert PythonTool.is_available(db_session) is False


# ------------------------------------------------------------------
# Unavailable when server_enabled is False
# ------------------------------------------------------------------


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://localhost:8000")
@patch(f"{TOOL_MODULE}.fetch_code_interpreter_server")
def test_python_tool_unavailable_when_server_disabled(
    mock_fetch: MagicMock,
) -> None:
    from onyx.tools.tool_implementations.python.python_tool import PythonTool

    mock_server = MagicMock()
    mock_server.server_enabled = False
    mock_fetch.return_value = mock_server

    db_session = MagicMock(spec=Session)
    assert PythonTool.is_available(db_session) is False


# ------------------------------------------------------------------
# Health check determines availability when URL + server are OK
# ------------------------------------------------------------------


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://localhost:8000")
@patch(f"{TOOL_MODULE}.fetch_code_interpreter_server")
@patch(f"{TOOL_MODULE}.CodeInterpreterClient")
def test_python_tool_available_when_health_check_passes(
    mock_client_cls: MagicMock,
    mock_fetch: MagicMock,
) -> None:
    from onyx.tools.tool_implementations.python.python_tool import PythonTool

    mock_server = MagicMock()
    mock_server.server_enabled = True
    mock_fetch.return_value = mock_server

    mock_client = MagicMock()
    mock_client.health.return_value = True
    mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)
    mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)

    db_session = MagicMock(spec=Session)
    assert PythonTool.is_available(db_session) is True
    mock_client.health.assert_called_once_with(use_cache=True)


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://localhost:8000")
@patch(f"{TOOL_MODULE}.fetch_code_interpreter_server")
@patch(f"{TOOL_MODULE}.CodeInterpreterClient")
def test_python_tool_unavailable_when_health_check_fails(
    mock_client_cls: MagicMock,
    mock_fetch: MagicMock,
) -> None:
    from onyx.tools.tool_implementations.python.python_tool import PythonTool

    mock_server = MagicMock()
    mock_server.server_enabled = True
    mock_fetch.return_value = mock_server

    mock_client = MagicMock()
    mock_client.health.return_value = False
    mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)
    mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)

    db_session = MagicMock(spec=Session)
    assert PythonTool.is_available(db_session) is False
    mock_client.health.assert_called_once_with(use_cache=True)


# ------------------------------------------------------------------
# Health check is NOT reached when preconditions fail
# ------------------------------------------------------------------


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://localhost:8000")
@patch(f"{TOOL_MODULE}.fetch_code_interpreter_server")
@patch(f"{TOOL_MODULE}.CodeInterpreterClient")
def test_health_check_not_called_when_server_disabled(
    mock_client_cls: MagicMock,
    mock_fetch: MagicMock,
) -> None:
    from onyx.tools.tool_implementations.python.python_tool import PythonTool

    mock_server = MagicMock()
    mock_server.server_enabled = False
    mock_fetch.return_value = mock_server

    db_session = MagicMock(spec=Session)
    assert PythonTool.is_available(db_session) is False
    mock_client_cls.assert_not_called()


# ------------------------------------------------------------------
# Health check caching (tested at the client level)
# ------------------------------------------------------------------


def test_health_check_cached_on_second_call() -> None:
    from onyx.tools.tool_implementations.python.code_interpreter_client import (
        CodeInterpreterClient,
    )

    client = CodeInterpreterClient(base_url="http://fake:9000")
    mock_response = MagicMock()
    mock_response.json.return_value = {"status": "ok"}

    with patch.object(client.session, "get", return_value=mock_response) as mock_get:
        assert client.health(use_cache=True) is True
        assert client.health(use_cache=True) is True
        # Only one HTTP call — the second used the cache
        mock_get.assert_called_once()


@patch(f"{CLIENT_MODULE}.time")
def test_health_check_refreshed_after_ttl_expires(mock_time: MagicMock) -> None:
    from onyx.tools.tool_implementations.python.code_interpreter_client import (
        CodeInterpreterClient,
        _HEALTH_CACHE_TTL_SECONDS,
    )

    client = CodeInterpreterClient(base_url="http://fake:9000")
    mock_response = MagicMock()
    mock_response.json.return_value = {"status": "ok"}

    with patch.object(client.session, "get", return_value=mock_response) as mock_get:
        # First call at t=0 — cache miss
        mock_time.monotonic.return_value = 0.0
        assert client.health(use_cache=True) is True
        assert mock_get.call_count == 1

        # Second call within TTL — cache hit
        mock_time.monotonic.return_value = float(_HEALTH_CACHE_TTL_SECONDS - 1)
        assert client.health(use_cache=True) is True
        assert mock_get.call_count == 1

        # Third call after TTL — cache miss, fresh request
        mock_time.monotonic.return_value = float(_HEALTH_CACHE_TTL_SECONDS + 1)
        assert client.health(use_cache=True) is True
        assert mock_get.call_count == 2


def test_health_check_no_cache_by_default() -> None:
    from onyx.tools.tool_implementations.python.code_interpreter_client import (
        CodeInterpreterClient,
    )

    client = CodeInterpreterClient(base_url="http://fake:9000")
    mock_response = MagicMock()
    mock_response.json.return_value = {"status": "ok"}

    with patch.object(client.session, "get", return_value=mock_response) as mock_get:
        assert client.health() is True
        assert client.health() is True
        # Both calls hit the network when use_cache=False (default)
        assert mock_get.call_count == 2


================================================
FILE: backend/tests/unit/onyx/tools/test_search_utils.py
================================================
"""Unit tests for search utility functions."""

from typing import NamedTuple

import pytest

from onyx.tools.tool_implementations.search.search_tool import deduplicate_queries
from onyx.tools.tool_implementations.search.search_utils import (
    weighted_reciprocal_rank_fusion,
)


# =============================================================================
# Test Data Structures
# =============================================================================


class MockDocument(NamedTuple):
    """Mock document for testing RRF."""

    document_id: str
    content: str


# =============================================================================
# Tests for weighted_reciprocal_rank_fusion
# =============================================================================


class TestWeightedReciprocalRankFusion:
    """Test suite for weighted_reciprocal_rank_fusion function."""

    def test_single_result_list(self) -> None:
        """Test RRF with a single result list."""
        doc_a = MockDocument("doc_a", "Content A")
        doc_b = MockDocument("doc_b", "Content B")
        doc_c = MockDocument("doc_c", "Content C")

        ranked_results = [[doc_a, doc_b, doc_c]]
        weights = [1.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        # With a single list, order should be preserved
        assert len(result) == 3
        assert result[0].document_id == "doc_a"
        assert result[1].document_id == "doc_b"
        assert result[2].document_id == "doc_c"

    def test_two_identical_lists_equal_weights(self) -> None:
        """Test RRF with two identical lists and equal weights."""
        doc_a = MockDocument("doc_a", "Content A")
        doc_b = MockDocument("doc_b", "Content B")
        doc_c = MockDocument("doc_c", "Content C")

        ranked_results = [
            [doc_a, doc_b, doc_c],
            [doc_a, doc_b, doc_c],
        ]
        weights = [1.0, 1.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        # Order should be preserved, but items appear only once
        assert len(result) == 3
        assert result[0].document_id == "doc_a"
        assert result[1].document_id == "doc_b"
        assert result[2].document_id == "doc_c"

    def test_two_different_lists_equal_weights(self) -> None:
        """Test RRF with different result lists and equal weights."""
        doc_a = MockDocument("doc_a", "Content A")
        doc_b = MockDocument("doc_b", "Content B")
        doc_c = MockDocument("doc_c", "Content C")
        doc_d = MockDocument("doc_d", "Content D")

        ranked_results = [
            [doc_a, doc_b, doc_c],
            [doc_c, doc_a, doc_d],
        ]
        weights = [1.0, 1.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        # doc_a and doc_c should rank highest (appear in both lists)
        assert len(result) == 4
        # doc_a appears at rank 1 and 2 in the two lists
        # doc_c appears at rank 3 and 1 in the two lists
        # Both should be at top, exact order depends on tiebreaking
        top_two_ids = {result[0].document_id, result[1].document_id}
        assert top_two_ids == {"doc_a", "doc_c"}

    def test_weighted_lists_higher_weight_dominates(self) -> None:
        """Test that higher weighted list influences ranking more."""
        doc_a = MockDocument("doc_a", "Content A")
        doc_b = MockDocument("doc_b", "Content B")
        doc_c = MockDocument("doc_c", "Content C")

        # First list has higher weight
        ranked_results = [
            [doc_a, doc_b],  # weight 2.0
            [doc_c, doc_a],  # weight 1.0
        ]
        weights = [2.0, 1.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        # doc_a should be first (rank 1 in list 1 with weight 2.0, rank 2 in list 2 with weight 1.0)
        # RRF score for doc_a: 2.0/(50+1) + 1.0/(50+2) = 2.0/51 + 1.0/52 = 0.0392 + 0.0192 = 0.0584
        # RRF score for doc_b: 2.0/(50+2) = 2.0/52 = 0.0385
        # RRF score for doc_c: 1.0/(50+1) = 1.0/51 = 0.0196
        assert len(result) == 3
        assert result[0].document_id == "doc_a"
        assert result[1].document_id == "doc_b"
        assert result[2].document_id == "doc_c"

    def test_empty_result_list(self) -> None:
        """Test RRF with empty result list."""
        ranked_results: list[list[MockDocument]] = [[]]
        weights = [1.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        assert len(result) == 0

    def test_multiple_empty_lists(self) -> None:
        """Test RRF with multiple empty result lists."""
        ranked_results: list[list[MockDocument]] = [[], [], []]
        weights = [1.0, 1.0, 1.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        assert len(result) == 0

    def test_mixed_empty_and_non_empty_lists(self) -> None:
        """Test RRF with mix of empty and non-empty lists."""
        doc_a = MockDocument("doc_a", "Content A")
        doc_b = MockDocument("doc_b", "Content B")

        ranked_results = [
            [],
            [doc_a, doc_b],
            [],
        ]
        weights = [1.0, 1.0, 1.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        assert len(result) == 2
        assert result[0].document_id == "doc_a"
        assert result[1].document_id == "doc_b"

    def test_mismatched_weights_raises_error(self) -> None:
        """Test that mismatched weights and results raises ValueError."""
        doc_a = MockDocument("doc_a", "Content A")

        ranked_results = [[doc_a]]
        weights = [1.0, 2.0]  # Too many weights

        with pytest.raises(ValueError, match="must match"):
            weighted_reciprocal_rank_fusion(
                ranked_results=ranked_results,
                weights=weights,
                id_extractor=lambda doc: doc.document_id,
            )

    def test_custom_k_value(self) -> None:
        """Test RRF with custom k value."""
        doc_a = MockDocument("doc_a", "Content A")
        doc_b = MockDocument("doc_b", "Content B")

        ranked_results = [[doc_a, doc_b]]
        weights = [1.0]

        # With k=10, scores should be: 1/(10+1)=0.091, 1/(10+2)=0.083
        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
            k=10,
        )

        assert len(result) == 2
        assert result[0].document_id == "doc_a"
        assert result[1].document_id == "doc_b"

    def test_deduplication_preserves_first_occurrence(self) -> None:
        """Test that when same document appears in multiple lists, first occurrence is used."""
        doc_a1 = MockDocument("doc_a", "Content A - First")
        doc_a2 = MockDocument("doc_a", "Content A - Second")
        doc_b = MockDocument("doc_b", "Content B")

        ranked_results = [
            [doc_a1, doc_b],
            [doc_a2],  # Same ID as doc_a1
        ]
        weights = [1.0, 1.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        # Should use first occurrence of doc_a
        assert len(result) == 2
        doc_a_result = next(doc for doc in result if doc.document_id == "doc_a")
        assert doc_a_result.content == "Content A - First"

    def test_realistic_semantic_vs_keyword_search_scenario(self) -> None:
        """Test realistic scenario: semantic search vs keyword search with different weights."""
        # Semantic search results
        doc_a = MockDocument("doc_a", "Semantic Result A")
        doc_b = MockDocument("doc_b", "Semantic Result B")
        doc_c = MockDocument("doc_c", "Semantic Result C")

        # Keyword search results (doc_c ranks first, doc_a also appears)
        doc_d = MockDocument("doc_d", "Keyword Result D")

        ranked_results = [
            [doc_a, doc_b, doc_c],  # Semantic: weight 1.2
            [doc_c, doc_a, doc_d],  # Keyword: weight 1.0
        ]
        weights = [1.2, 1.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        # doc_a and doc_c appear in both lists and should rank highest
        assert len(result) == 4
        top_two_ids = {result[0].document_id, result[1].document_id}
        assert top_two_ids == {"doc_a", "doc_c"}

    def test_many_lists_with_varying_weights(self) -> None:
        """Test RRF with multiple lists and varying weights."""
        doc_a = MockDocument("doc_a", "Content A")
        doc_b = MockDocument("doc_b", "Content B")
        doc_c = MockDocument("doc_c", "Content C")
        doc_d = MockDocument("doc_d", "Content D")

        ranked_results = [
            [doc_a, doc_b],  # weight 1.3
            [doc_c, doc_a],  # weight 1.0
            [doc_a, doc_d],  # weight 0.7
            [doc_b, doc_a],  # weight 0.5
        ]
        weights = [1.3, 1.0, 0.7, 0.5]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        # doc_a appears in all 4 lists, should rank first
        assert len(result) == 4
        assert result[0].document_id == "doc_a"

    def test_zero_weight(self) -> None:
        """Test RRF with zero weight for one list."""
        doc_a = MockDocument("doc_a", "Content A")
        doc_b = MockDocument("doc_b", "Content B")
        doc_c = MockDocument("doc_c", "Content C")

        ranked_results = [
            [doc_a, doc_b],  # weight 1.0
            [doc_c],  # weight 0.0 (ignored)
        ]
        weights = [1.0, 0.0]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        # doc_c should rank last due to zero weight
        assert len(result) == 3
        assert result[0].document_id == "doc_a"
        assert result[1].document_id == "doc_b"
        assert result[2].document_id == "doc_c"

    def test_negative_weight(self) -> None:
        """Test RRF with negative weight (should still work mathematically)."""
        doc_a = MockDocument("doc_a", "Content A")
        doc_b = MockDocument("doc_b", "Content B")

        ranked_results = [
            [doc_a, doc_b],  # weight 1.0
            [doc_b, doc_a],  # weight -0.5 (penalizes)
        ]
        weights = [1.0, -0.5]

        result = weighted_reciprocal_rank_fusion(
            ranked_results=ranked_results,
            weights=weights,
            id_extractor=lambda doc: doc.document_id,
        )

        # doc_a should rank higher (benefits from positive weight more)
        # doc_a: 1.0/(50+1) + (-0.5)/(50+2) = 0.0196 - 0.0096 = 0.0100
        # doc_b: 1.0/(50+2) + (-0.5)/(50+1) = 0.0192 - 0.0098 = 0.0094
        assert len(result) == 2
        assert result[0].document_id == "doc_a"
        assert result[1].document_id == "doc_b"


# =============================================================================
# Tests for deduplicate_queries
# =============================================================================


class TestDeduplicateQueries:
    """Test suite for deduplicate_queries function."""

    def test_no_duplicates(self) -> None:
        """Test deduplication with no duplicate queries."""
        queries_with_weights = [
            ("first query", 1.0),
            ("second query", 2.0),
            ("third query", 1.5),
        ]

        result = deduplicate_queries(queries_with_weights)

        assert len(result) == 3
        assert ("first query", 1.0) in result
        assert ("second query", 2.0) in result
        assert ("third query", 1.5) in result

    def test_exact_duplicates(self) -> None:
        """Test deduplication with exact duplicate queries."""
        queries_with_weights = [
            ("same query", 1.0),
            ("same query", 2.0),
            ("same query", 1.5),
        ]

        result = deduplicate_queries(queries_with_weights)

        # Should have one entry with summed weights
        assert len(result) == 1
        assert result[0][0] == "same query"
        assert result[0][1] == 4.5  # 1.0 + 2.0 + 1.5

    def test_case_insensitive_duplicates(self) -> None:
        """Test that deduplication is case-insensitive."""
        queries_with_weights = [
            ("Search Query", 1.0),
            ("search query", 2.0),
            ("SEARCH QUERY", 1.5),
        ]

        result = deduplicate_queries(queries_with_weights)

        # Should have one entry with summed weights
        assert len(result) == 1
        # Should preserve the casing of first occurrence
        assert result[0][0] == "Search Query"
        assert result[0][1] == 4.5  # 1.0 + 2.0 + 1.5

    def test_mixed_duplicates_and_unique(self) -> None:
        """Test deduplication with mix of duplicates and unique queries."""
        queries_with_weights = [
            ("unique query", 1.0),
            ("duplicate query", 2.0),
            ("DUPLICATE QUERY", 1.5),
            ("another unique", 3.0),
        ]

        result = deduplicate_queries(queries_with_weights)

        assert len(result) == 3

        # Check for unique queries
        unique_queries = [q for q, w in result if q == "unique query"]
        assert len(unique_queries) == 1
        unique_weight = [w for q, w in result if q == "unique query"][0]
        assert unique_weight == 1.0

        another_unique_queries = [q for q, w in result if q == "another unique"]
        assert len(another_unique_queries) == 1
        another_weight = [w for q, w in result if q == "another unique"][0]
        assert another_weight == 3.0

        # Check for deduplicated query
        dup_queries = [q for q, w in result if q.lower() == "duplicate query"]
        assert len(dup_queries) == 1
        dup_weight = [w for q, w in result if q.lower() == "duplicate query"][0]
        assert dup_weight == 3.5  # 2.0 + 1.5

    def test_empty_list(self) -> None:
        """Test deduplication with empty list."""
        queries_with_weights: list[tuple[str, float]] = []

        result = deduplicate_queries(queries_with_weights)

        assert len(result) == 0

    def test_single_query(self) -> None:
        """Test deduplication with single query."""
        queries_with_weights = [("single query", 1.5)]

        result = deduplicate_queries(queries_with_weights)

        assert len(result) == 1
        assert result[0] == ("single query", 1.5)

    def test_preserves_first_occurrence_casing(self) -> None:
        """Test that the first occurrence's casing is preserved."""
        queries_with_weights = [
            ("First Version", 1.0),
            ("first version", 2.0),
            ("FIRST VERSION", 3.0),
        ]

        result = deduplicate_queries(queries_with_weights)

        assert len(result) == 1
        # First occurrence casing should be preserved
        assert result[0][0] == "First Version"
        assert result[0][1] == 6.0

    def test_whitespace_differences(self) -> None:
        """Test that queries with different whitespace are treated as different."""
        queries_with_weights = [
            ("query with spaces", 1.0),
            ("query  with  spaces", 2.0),  # Different spacing
            ("query with spaces", 3.0),
        ]

        result = deduplicate_queries(queries_with_weights)

        # Should have two entries (one for single space, one for double)
        assert len(result) == 2

        # Find the summed weight for single-space version
        single_space_weight = [w for q, w in result if q == "query with spaces"][0]
        assert single_space_weight == 4.0  # 1.0 + 3.0

        # Find the weight for double-space version
        double_space_weight = [w for q, w in result if q == "query  with  spaces"][0]
        assert double_space_weight == 2.0

    def test_zero_weights(self) -> None:
        """Test deduplication with zero weights."""
        queries_with_weights = [
            ("query", 0.0),
            ("query", 0.0),
            ("other query", 1.0),
        ]

        result = deduplicate_queries(queries_with_weights)

        assert len(result) == 2
        query_weight = [w for q, w in result if q == "query"][0]
        assert query_weight == 0.0
        other_weight = [w for q, w in result if q == "other query"][0]
        assert other_weight == 1.0

    def test_negative_weights(self) -> None:
        """Test deduplication with negative weights."""
        queries_with_weights = [
            ("query", 2.0),
            ("query", -1.0),
        ]

        result = deduplicate_queries(queries_with_weights)

        assert len(result) == 1
        assert result[0][0] == "query"
        assert result[0][1] == 1.0  # 2.0 + (-1.0)

    def test_realistic_scenario_semantic_and_keyword_queries(self) -> None:
        """Test realistic scenario with semantic and keyword query deduplication."""
        queries_with_weights = [
            ("What is machine learning?", 1.3),  # Semantic query
            ("what is machine learning?", 1.0),  # LLM non-custom query
            ("machine learning definition", 1.0),  # Keyword expansion
            ("machine learning basics", 1.0),  # Keyword expansion
            ("MACHINE LEARNING DEFINITION", 1.0),  # Duplicate keyword (different case)
        ]

        result = deduplicate_queries(queries_with_weights)

        # Should have 3 unique queries after deduplication
        assert len(result) == 3

        # Check that "What is machine learning?" variants were deduplicated
        ml_queries = [
            (q, w) for q, w in result if q.lower() == "what is machine learning?"
        ]
        assert len(ml_queries) == 1
        assert (
            ml_queries[0][0] == "What is machine learning?"
        )  # First occurrence casing
        assert ml_queries[0][1] == 2.3  # 1.3 + 1.0

        # Check that "machine learning definition" variants were deduplicated
        def_queries = [
            (q, w) for q, w in result if q.lower() == "machine learning definition"
        ]
        assert len(def_queries) == 1
        assert (
            def_queries[0][0] == "machine learning definition"
        )  # First occurrence casing
        assert def_queries[0][1] == 2.0  # 1.0 + 1.0

        # Check that "machine learning basics" is present with its original weight
        basics_queries = [
            (q, w) for q, w in result if q.lower() == "machine learning basics"
        ]
        assert len(basics_queries) == 1
        assert basics_queries[0][1] == 1.0

    def test_special_characters_and_punctuation(self) -> None:
        """Test deduplication with special characters and punctuation."""
        queries_with_weights = [
            ("What's the weather?", 1.0),
            ("what's the weather?", 2.0),
            ("WHAT'S THE WEATHER?", 1.5),
        ]

        result = deduplicate_queries(queries_with_weights)

        assert len(result) == 1
        assert result[0][0] == "What's the weather?"
        assert result[0][1] == 4.5

    def test_unicode_characters(self) -> None:
        """Test deduplication with unicode characters."""
        queries_with_weights = [
            ("Café", 1.0),
            ("café", 2.0),
            ("CAFÉ", 1.5),
        ]

        result = deduplicate_queries(queries_with_weights)

        assert len(result) == 1
        assert result[0][0] == "Café"
        assert result[0][1] == 4.5


================================================
FILE: backend/tests/unit/onyx/tools/test_tool_runner.py
================================================
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ToolCallSimple
from onyx.configs.constants import MessageType
from onyx.server.query_and_chat.placement import Placement
from onyx.tools.models import ToolCallKickoff
from onyx.tools.tool_runner import _extract_image_file_ids_from_tool_response_message
from onyx.tools.tool_runner import _extract_recent_generated_image_file_ids
from onyx.tools.tool_runner import _merge_tool_calls


def _make_tool_call(
    tool_name: str,
    tool_args: dict,
    tool_call_id: str = "call_1",
    turn_index: int = 0,
    tab_index: int = 0,
) -> ToolCallKickoff:
    """Helper to create a ToolCallKickoff for testing."""
    return ToolCallKickoff(
        tool_call_id=tool_call_id,
        tool_name=tool_name,
        tool_args=tool_args,
        placement=Placement(turn_index=turn_index, tab_index=tab_index),
    )


class TestMergeToolCalls:
    """Tests for _merge_tool_calls function."""

    def test_empty_list(self) -> None:
        """Empty input returns empty output."""
        result = _merge_tool_calls([])
        assert result == []

    def test_single_search_tool_call_not_merged(self) -> None:
        """A single SearchTool call is returned as-is (no merging needed)."""
        call = _make_tool_call(
            tool_name="internal_search",
            tool_args={"queries": ["query1"]},
            tool_call_id="call_1",
        )
        result = _merge_tool_calls([call])

        assert len(result) == 1
        assert result[0].tool_name == "internal_search"
        assert result[0].tool_args == {"queries": ["query1"]}
        assert result[0].tool_call_id == "call_1"

    def test_single_web_search_tool_call_not_merged(self) -> None:
        """A single WebSearchTool call is returned as-is."""
        call = _make_tool_call(
            tool_name="web_search",
            tool_args={"queries": ["web query"]},
        )
        result = _merge_tool_calls([call])

        assert len(result) == 1
        assert result[0].tool_name == "web_search"
        assert result[0].tool_args == {"queries": ["web query"]}

    def test_single_open_url_tool_call_not_merged(self) -> None:
        """A single OpenURLTool call is returned as-is."""
        call = _make_tool_call(
            tool_name="open_url",
            tool_args={"urls": ["https://example.com"]},
        )
        result = _merge_tool_calls([call])

        assert len(result) == 1
        assert result[0].tool_name == "open_url"
        assert result[0].tool_args == {"urls": ["https://example.com"]}

    def test_multiple_search_tool_calls_merged(self) -> None:
        """Multiple SearchTool calls have their queries merged into one call."""
        calls = [
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["query1", "query2"]},
                tool_call_id="call_1",
            ),
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["query3"]},
                tool_call_id="call_2",
            ),
        ]
        result = _merge_tool_calls(calls)

        assert len(result) == 1
        assert result[0].tool_name == "internal_search"
        assert result[0].tool_args["queries"] == ["query1", "query2", "query3"]
        # Uses first call's ID
        assert result[0].tool_call_id == "call_1"

    def test_multiple_web_search_tool_calls_merged(self) -> None:
        """Multiple WebSearchTool calls have their queries merged."""
        calls = [
            _make_tool_call(
                tool_name="web_search",
                tool_args={"queries": ["web1"]},
                tool_call_id="call_1",
            ),
            _make_tool_call(
                tool_name="web_search",
                tool_args={"queries": ["web2", "web3"]},
                tool_call_id="call_2",
            ),
        ]
        result = _merge_tool_calls(calls)

        assert len(result) == 1
        assert result[0].tool_name == "web_search"
        assert result[0].tool_args["queries"] == ["web1", "web2", "web3"]

    def test_multiple_open_url_tool_calls_merged(self) -> None:
        """Multiple OpenURLTool calls have their urls merged."""
        calls = [
            _make_tool_call(
                tool_name="open_url",
                tool_args={"urls": ["https://a.com"]},
                tool_call_id="call_1",
            ),
            _make_tool_call(
                tool_name="open_url",
                tool_args={"urls": ["https://b.com", "https://c.com"]},
                tool_call_id="call_2",
            ),
        ]
        result = _merge_tool_calls(calls)

        assert len(result) == 1
        assert result[0].tool_name == "open_url"
        assert result[0].tool_args["urls"] == [
            "https://a.com",
            "https://b.com",
            "https://c.com",
        ]

    def test_non_mergeable_tool_not_merged(self) -> None:
        """Non-mergeable tools (e.g., python) are returned as separate calls."""
        calls = [
            _make_tool_call(
                tool_name="python",
                tool_args={"code": "print(1)"},
                tool_call_id="call_1",
            ),
            _make_tool_call(
                tool_name="python",
                tool_args={"code": "print(2)"},
                tool_call_id="call_2",
            ),
        ]
        result = _merge_tool_calls(calls)

        assert len(result) == 2
        assert result[0].tool_args["code"] == "print(1)"
        assert result[1].tool_args["code"] == "print(2)"

    def test_mixed_mergeable_and_non_mergeable(self) -> None:
        """Mix of mergeable and non-mergeable tools handles correctly."""
        calls = [
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["q1"]},
                tool_call_id="search_1",
            ),
            _make_tool_call(
                tool_name="python",
                tool_args={"code": "x = 1"},
                tool_call_id="python_1",
            ),
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["q2"]},
                tool_call_id="search_2",
            ),
        ]
        result = _merge_tool_calls(calls)

        # Should have 2 calls: merged search + python
        assert len(result) == 2

        tool_names = {r.tool_name for r in result}
        assert tool_names == {"internal_search", "python"}

        search_result = next(r for r in result if r.tool_name == "internal_search")
        assert search_result.tool_args["queries"] == ["q1", "q2"]

        python_result = next(r for r in result if r.tool_name == "python")
        assert python_result.tool_args["code"] == "x = 1"

    def test_multiple_different_mergeable_tools(self) -> None:
        """Multiple different mergeable tools each get merged separately."""
        calls = [
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["search1"]},
            ),
            _make_tool_call(
                tool_name="web_search",
                tool_args={"queries": ["web1"]},
            ),
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["search2"]},
            ),
            _make_tool_call(
                tool_name="web_search",
                tool_args={"queries": ["web2"]},
            ),
        ]
        result = _merge_tool_calls(calls)

        # Should have 2 merged calls
        assert len(result) == 2

        search_result = next(r for r in result if r.tool_name == "internal_search")
        assert search_result.tool_args["queries"] == ["search1", "search2"]

        web_result = next(r for r in result if r.tool_name == "web_search")
        assert web_result.tool_args["queries"] == ["web1", "web2"]

    def test_preserves_first_call_placement(self) -> None:
        """Merged call uses the placement from the first call."""
        calls = [
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["q1"]},
                turn_index=1,
                tab_index=2,
            ),
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["q2"]},
                turn_index=3,
                tab_index=4,
            ),
        ]
        result = _merge_tool_calls(calls)

        assert len(result) == 1
        assert result[0].placement.turn_index == 1
        assert result[0].placement.tab_index == 2

    def test_preserves_other_args_from_first_call(self) -> None:
        """Merged call preserves non-merge-field args from the first call."""
        calls = [
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["q1"], "other_param": "value1"},
            ),
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["q2"], "other_param": "value2"},
            ),
        ]
        result = _merge_tool_calls(calls)

        assert len(result) == 1
        assert result[0].tool_args["queries"] == ["q1", "q2"]
        # Other params from first call are preserved
        assert result[0].tool_args["other_param"] == "value1"

    def test_handles_empty_queries_list(self) -> None:
        """Handles calls with empty queries lists."""
        calls = [
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": []},
            ),
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["q1"]},
            ),
        ]
        result = _merge_tool_calls(calls)

        assert len(result) == 1
        assert result[0].tool_args["queries"] == ["q1"]

    def test_handles_missing_merge_field(self) -> None:
        """Handles calls where the merge field is missing entirely."""
        calls = [
            _make_tool_call(
                tool_name="internal_search",
                tool_args={},  # No queries field
            ),
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["q1"]},
            ),
        ]
        result = _merge_tool_calls(calls)

        assert len(result) == 1
        assert result[0].tool_args["queries"] == ["q1"]

    def test_handles_string_value_instead_of_list(self) -> None:
        """Handles edge case where merge field is a string instead of list."""
        calls = [
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": "single_query"},  # String instead of list
            ),
            _make_tool_call(
                tool_name="internal_search",
                tool_args={"queries": ["q2"]},
            ),
        ]
        result = _merge_tool_calls(calls)

        assert len(result) == 1
        # String should be converted to list item
        assert result[0].tool_args["queries"] == ["single_query", "q2"]


class TestImageHistoryExtraction:
    def test_extracts_image_file_ids_from_json_response(self) -> None:
        msg = '[{"file_id":"img-1","revised_prompt":"v1"},{"file_id":"img-2","revised_prompt":"v2"}]'
        assert _extract_image_file_ids_from_tool_response_message(msg) == [
            "img-1",
            "img-2",
        ]

    def test_extracts_recent_generated_image_ids_from_history(self) -> None:
        history = [
            ChatMessageSimple(
                message="",
                token_count=1,
                message_type=MessageType.ASSISTANT,
                tool_calls=[
                    ToolCallSimple(
                        tool_call_id="call_1",
                        tool_name="generate_image",
                        tool_arguments={"prompt": "test"},
                        token_count=1,
                    )
                ],
            ),
            ChatMessageSimple(
                message='[{"file_id":"img-1","revised_prompt":"r1"}]',
                token_count=1,
                message_type=MessageType.TOOL_CALL_RESPONSE,
                tool_call_id="call_1",
            ),
        ]

        assert _extract_recent_generated_image_file_ids(history) == ["img-1"]

    def test_ignores_non_image_tool_responses(self) -> None:
        history = [
            ChatMessageSimple(
                message="",
                token_count=1,
                message_type=MessageType.ASSISTANT,
                tool_calls=[
                    ToolCallSimple(
                        tool_call_id="call_1",
                        tool_name="web_search",
                        tool_arguments={"queries": ["q"]},
                        token_count=1,
                    )
                ],
            ),
            ChatMessageSimple(
                message='[{"file_id":"img-1","revised_prompt":"r1"}]',
                token_count=1,
                message_type=MessageType.TOOL_CALL_RESPONSE,
                tool_call_id="call_1",
            ),
        ]

        assert _extract_recent_generated_image_file_ids(history) == []


================================================
FILE: backend/tests/unit/onyx/tools/test_tool_runner_chat_files.py
================================================
"""
Unit tests for chat_files handling in tool_runner.py.

These tests verify that chat files are properly passed to PythonTool
through the PythonToolOverrideKwargs mechanism.
"""

import pytest

from onyx.tools.models import ChatFile
from onyx.tools.models import PythonToolOverrideKwargs


class TestChatFilesPassingToPythonTool:
    """Tests for passing chat_files to PythonTool."""

    @pytest.fixture
    def sample_chat_files(self) -> list[ChatFile]:
        """Create sample chat files for testing."""
        return [
            ChatFile(filename="test.xlsx", content=b"excel content"),
            ChatFile(filename="data.csv", content=b"col1,col2\n1,2\n3,4"),
        ]

    def test_chat_files_passed_to_python_tool_override_kwargs(
        self,
        sample_chat_files: list[ChatFile],
    ) -> None:
        """Test that PythonToolOverrideKwargs correctly stores chat_files."""
        # Verify the override_kwargs structure stores chat_files correctly
        override_kwargs = PythonToolOverrideKwargs(chat_files=sample_chat_files)

        assert override_kwargs.chat_files == sample_chat_files
        assert len(override_kwargs.chat_files) == 2
        assert override_kwargs.chat_files[0].filename == "test.xlsx"
        assert override_kwargs.chat_files[0].content == b"excel content"
        assert override_kwargs.chat_files[1].filename == "data.csv"

    def test_empty_chat_files_defaults_to_empty_list(self) -> None:
        """Test that empty chat_files defaults to empty list."""
        override_kwargs = PythonToolOverrideKwargs()
        assert override_kwargs.chat_files == []

    def test_none_chat_files_handled_in_tool_runner(self) -> None:
        """Test that None chat_files are handled gracefully in the tool_runner code path.

        The tool_runner.py uses `chat_files or []` pattern when creating
        PythonToolOverrideKwargs, so we verify this pattern works correctly.
        """
        # Simulate the pattern used in tool_runner.py:
        # override_kwargs = PythonToolOverrideKwargs(chat_files=chat_files or [])
        chat_files_param: list[ChatFile] | None = None

        # This is the exact pattern used in tool_runner.py
        override_kwargs = PythonToolOverrideKwargs(
            chat_files=chat_files_param or [],
        )

        assert override_kwargs.chat_files == []
        assert isinstance(override_kwargs.chat_files, list)


class TestChatFileConversion:
    """Tests for ChatLoadedFile to ChatFile conversion."""

    def test_convert_loaded_files_to_chat_files(self) -> None:
        """Test conversion of ChatLoadedFile to ChatFile."""
        from onyx.chat.models import ChatLoadedFile
        from onyx.chat.process_message import _convert_loaded_files_to_chat_files
        from onyx.file_store.models import ChatFileType

        # Create sample ChatLoadedFile objects
        loaded_files = [
            ChatLoadedFile(
                file_id="file-1",
                content=b"test content 1",
                file_type=ChatFileType.DOC,
                filename="document.pdf",
                content_text="parsed text",
                token_count=10,
            ),
            ChatLoadedFile(
                file_id="file-2",
                content=b"csv,data\n1,2",
                file_type=ChatFileType.TABULAR,
                filename="data.csv",
                content_text="csv,data\n1,2",
                token_count=5,
            ),
        ]

        # Convert to ChatFile
        chat_files = _convert_loaded_files_to_chat_files(loaded_files)

        assert len(chat_files) == 2
        assert chat_files[0].filename == "document.pdf"
        assert chat_files[0].content == b"test content 1"
        assert chat_files[1].filename == "data.csv"
        assert chat_files[1].content == b"csv,data\n1,2"

    def test_convert_files_with_none_content_skipped(self) -> None:
        """Test that files with None content are skipped."""
        from onyx.chat.models import ChatLoadedFile
        from onyx.chat.process_message import _convert_loaded_files_to_chat_files
        from onyx.file_store.models import ChatFileType

        loaded_files = [
            ChatLoadedFile(
                file_id="file-1",
                content=b"valid content",
                file_type=ChatFileType.DOC,
                filename="valid.pdf",
                content_text="text",
                token_count=10,
            ),
            ChatLoadedFile(
                file_id="file-2",
                content=b"",
                file_type=ChatFileType.DOC,
                filename="invalid.pdf",
                content_text=None,
                token_count=0,
            ),
        ]

        chat_files = _convert_loaded_files_to_chat_files(loaded_files)

        # Only the file with valid content should be included
        assert len(chat_files) == 1
        assert chat_files[0].filename == "valid.pdf"

    def test_convert_files_with_missing_filename_uses_fallback(self) -> None:
        """Test that files without filename use file_id as fallback."""
        from onyx.chat.models import ChatLoadedFile
        from onyx.chat.process_message import _convert_loaded_files_to_chat_files
        from onyx.file_store.models import ChatFileType

        loaded_files = [
            ChatLoadedFile(
                file_id="abc123",
                content=b"content",
                file_type=ChatFileType.DOC,
                filename=None,
                content_text="text",
                token_count=5,
            ),
        ]

        chat_files = _convert_loaded_files_to_chat_files(loaded_files)

        assert len(chat_files) == 1
        assert chat_files[0].filename == "file_abc123"

    def test_convert_empty_list_returns_empty(self) -> None:
        """Test that empty input returns empty output."""
        from onyx.chat.process_message import _convert_loaded_files_to_chat_files

        chat_files = _convert_loaded_files_to_chat_files([])
        assert chat_files == []


class TestChatFileModel:
    """Tests for the ChatFile model itself."""

    def test_chat_file_creation(self) -> None:
        """Test ChatFile model creation."""
        chat_file = ChatFile(
            filename="test.xlsx",
            content=b"binary content",
        )

        assert chat_file.filename == "test.xlsx"
        assert chat_file.content == b"binary content"

    def test_chat_file_with_unicode_filename(self) -> None:
        """Test ChatFile with unicode filename."""
        chat_file = ChatFile(
            filename="报告.xlsx",
            content=b"content",
        )

        assert chat_file.filename == "报告.xlsx"

    def test_chat_file_with_spaces_in_filename(self) -> None:
        """Test ChatFile with spaces in filename."""
        chat_file = ChatFile(
            filename="my file name.xlsx",
            content=b"content",
        )

        assert chat_file.filename == "my file name.xlsx"


================================================
FILE: backend/tests/unit/onyx/tools/test_tool_utils.py
================================================
import pytest

from onyx.llm.constants import LlmProviderNames
from onyx.tools.utils import explicit_tool_calling_supported


@pytest.mark.parametrize(
    "model_provider, model_name, expected_result",
    [
        (LlmProviderNames.ANTHROPIC, "claude-4-sonnet-20250514", True),
        (
            "another-provider",
            "claude-haiku-4-5-20251001",
            True,
        ),
        (
            LlmProviderNames.ANTHROPIC,
            "claude-3-sonnet-20240229",
            False,
        ),
        (
            LlmProviderNames.BEDROCK,
            "amazon.titan-text-express-v1",
            False,
        ),
        (LlmProviderNames.OPENAI, "gpt-4o", True),
        (LlmProviderNames.OPENAI, "gpt-3.5-turbo-instruct", False),
    ],
)
def test_explicit_tool_calling_supported(
    model_provider: str,
    model_name: str,
    expected_result: bool,
) -> None:
    """
    Anthropic models support tool calling, but
    a) will raise an error if you provide any tool messages and don't provide a list of tools.
    b) will send text before and after generating tool calls.
    We don't want to provide that list of tools because our UI doesn't support sequential
    tool calling yet for (a) and just looks bad for (b), so for now we just treat anthropic
    models as non-tool-calling.

    Additionally, for Bedrock provider, any model containing an anthropic model name as a
    substring should also return False for the same reasons.
    """
    actual_result = explicit_tool_calling_supported(model_provider, model_name)
    assert actual_result == expected_result


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/open_url/data/test_snippet_finding_data.json
================================================
{
	"categories": [
		{
			"category": "find_snippet_simple",
			"tests": [
				{
					"name": "exact_match",
					"content": "The quick brown fox jumps over the lazy dog.",
					"snippet": "The quick brown fox jumps over the lazy dog.",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 0,
						"expected_end_idx": 43
					},
					"allow_buffer": false
				},
				{
					"name": "match_at_start",
					"content": "The weather in Sydney is sunny today.",
					"snippet": "weather in Sydney",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 4,
						"expected_end_idx": 20
					},
					"allow_buffer": false
				},
				{
					"name": "match_at_end",
					"content": "There are multiple things to consider about AI models including how they are trained. That can impact the fine-tuning results.",
					"snippet": "impact the fine-tuning results.",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 95,
						"expected_end_idx": 125
					},
					"allow_buffer": false
				},
				{
					"name": "match_in_middle",
					"content": "the total fertility rate was highest in countries in Africa and central Asia, where most countries had a total fertility rate between 3 to 7 births per woman, and lowest in countries in East Asia, where most countries had a total",
					"snippet": "total fertility rate was highest in countries in Africa and central Asia",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 4,
						"expected_end_idx": 75
					},
					"allow_buffer": false
				},
				{
					"_comment": "We expect the first match to be returned",
					"name": "multiple_matches_in_content",
					"content": "What's the go? Hey there mate. How are you doing? Hey there mate",
					"snippet": "Hey there mate",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 15,
						"expected_end_idx": 28
					},
					"allow_buffer": false
				}
			]
		},
		{
			"category": "find_snippet_normalized",
			"tests": [
				{
					"name": "normalized_exact_match",
					"content": "The quick  brown  fox, jumps  over the lazy   dog!",
					"snippet": "The quick brown fox jumps over the lazy dog",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 0,
						"expected_end_idx": 49
					},
					"allow_buffer": false
				},
				{
					"name": "normalized_match_with_html_entities",
					"content": "You&apos;re our first priority.\nEvery time.\nWe believe everyone should be able to make financial decisions with\nconfidence.",
					"snippet": "everyone should be able to make financial decisions",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 55,
						"expected_end_idx": 105
					},
					"allow_buffer": false
				},
				{
					"name": "multiple_html_entities",
					"content": "Guess&apos;what&hellip;&#39;is up?",
					"snippet": "Guess'what...'is up?",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 0,
						"expected_end_idx": 33
					},
					"allow_buffer": false
				},
				{
					"name": "html_entity_in_snippet",
					"content":"Guess'what...'is up?",
					"snippet": "Guess&apos;what&hellip;&#39;is up?",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 0,
						"expected_end_idx": 19
					},
					"allow_buffer": false
				},
				{
					"name": "multiple_whitespace",
					"content": "Hello  there",
					"snippet": "Hello there",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 0,
						"expected_end_idx": 11
					},
					"allow_buffer": false
				},
				{
					"name": "lots_of_punctuation",
					"content": "Like OMG!!! this, is, crazy!!! right? he said 'i dont even know' but  I do know that...",
					"snippet": "this is crazy",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 12,
						"expected_end_idx": 26
					},
					"allow_buffer": false
				},
				{
					"name": "lots_of_punctuation_again",
					"content": "Like OMG!!! this, is, crazy!!! right? he said 'i don't even know' but  I do know that...",
					"snippet": "i don't even know",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 47,
						"expected_end_idx": 63
					},
					"allow_buffer": false
				},
				{
					"name": "case_insensitive_match",
					"content": "HelLo There",
					"snippet": "hello",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 0,
						"expected_end_idx": 4
					},
					"allow_buffer": false
				},
				{
					"name": "curly_apostrophe_normalization",
					"content": "It’s a test",
					"snippet": "it's a test",
					"expected_result": {
					  "snippet_located": true,
					  "expected_start_idx": 0,
					  "expected_end_idx": 10
					},
					"allow_buffer": false
				},
				{
					"name": "unicode_dash_normalization",
					"content": "pages 3–5 are included",
					"snippet": "3-5",
					"expected_result": {
					  "snippet_located": true,
					  "expected_start_idx": 6,
					  "expected_end_idx": 8
					},
					"allow_buffer": false
				},
				{
					"name": "nbsp_and_tab_whitespace",
					"content": "A\u00A0B\tC",
					"snippet": "A B C",
					"expected_result": {
					  "snippet_located": true,
					  "expected_start_idx": 0,
					  "expected_end_idx": 4
					},
					"allow_buffer": false
				},
				{
					"name": "zero_width_space_inside_word",
					"content": "he\u200Bllo world",
					"snippet": "hello",
					"expected_result": {
					  "snippet_located": true,
					  "expected_start_idx": 0,
					  "expected_end_idx": 5
					},
					"allow_buffer": false
				},
				{
					"name": "unicode_combining_accent_normalization",
					"content": "Cafe\u0301 is open",
					"snippet": "Café",
					"expected_result": {
					  "snippet_located": true,
					  "expected_start_idx": 0,
					  "expected_end_idx": 4
					},
					"allow_buffer": false
				},
				{
					"name": "amp_and_nbsp_entities",
					"content": "Tom &amp; Jerry&nbsp;Show",
					"snippet": "Tom & Jerry Show",
					"expected_result": {
					  "snippet_located": true,
					  "expected_start_idx": 0,
					  "expected_end_idx": 24
					},
					"allow_buffer": false
				},
				{
					"_comment": "We expect to pick the first normalised occurrence",
					"name": "multiple_occurrences_pick_first",
					"content": "foo  bar... foo bar",
					"snippet": "foo bar",
					"expected_result": {
					  "snippet_located": true,
					  "expected_start_idx": 0,
					  "expected_end_idx": 7
					},
					"allow_buffer": false
				},
				{
					"name": "match_on_unicode_character",
					"content": "Sunrise\nSunrise\nSunset\nSunset\n## Hourly Weather\n[Next 48 Hours] \n## Don&#x27;t Miss\n[\n![] \n## A Rare Southern Snowstorm: What You Need To Know\n] [\n![] \n## Multiple Systems To Bring Snow To Great Lakes, Northeast",
					"snippet": "Sunset\nSunset\n## Hourly Weather\n[Next 48 Hours] \n## Don't Miss\n[",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 16,
						"expected_end_idx": 84
					},
					"allow_buffer": false
				},
				{
					"name": "complex_normalisation",
					"content": [
						"* [Press Releases] ",
						"* [Careers] ",
						"* [Solutions] ",
						"* * [WeatherBug] ",
						"* [Consumer] ",
						"* [Corporate] ",
						"* [WeatherBug] ",
						"* [Consumer] ",
						"* [Corporate] ",
						"[![]![]] [![Responsive menu icon]] ",
						"* [Now] ",
						"* [Hourly] ",
						"* [10 Day] ",
						"* * # Today&#x27;s Weather - Sydney, AUS",
						"December 8, 2025",
						"1:01 AM",
						"SYDNEY INTL AIRP",
						"68&#xB0;",
						"Feels Like68&#xB0;",
						"Hi--Lo65&#xB0;F",
						"![30% Chance of Light Rain] ",
						"30% Chance of Light Rain",
						"* [",
						"Live Radar",
						"] ",
						"* * Weather Details",
						"* Windchill68&#xBA;FDaily Rain0&quot;",
						"* Dewpoint54&#xBA;FMonthly Rain--",
						"* Humidity60%Avg. WindENE 9mph",
						"* Pressure29.97&quot;Wind Gust9mph",
						"* Sunrise5:37 AMMoonWaning Gibbous",
						"* "
					],
					"snippet": [
						"[![]![]] [![Responsive menu icon]] ",
						"* [Now] ",
						"* [Hourly] ",
						"* [10 Day] ",
						"* * # Today's Weather - Sydney, AUS",
						"December 8, 2025",
						"1:01 AM",
						"SYDNEY INTL AIRP",
						"68°",
						"Feels Like68°",
						"Hi--Lo65°F",
						"![30% Chance of Light Rain]"
					],
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 140,
						"expected_end_idx": 362
					},
					"allow_buffer": false
				},
				{
					"name": "bad_ampersand",
					"content": [
						"duals focus only on the price returns of the index, dividends play an important role in overall investment returns.",
						"# S&amp;P 500 Total Returns by Year",
						"|Year|Total Return|",
						"2026|1.23%|",
						"2025|17.88%|",
						"2024|25.02%|",
						"2023|26.29%|",
						"2022|-18.11%",
						"2021|28.71%|",
						"2020|18.40%|",
						"2019|31.49%|",
						"2018|-4.38%|",
						"2017|21.83%|",
						"2016|11.96%|",
						"2015|1.38%|",
						"2014|13.69%|",
						"2013|32.39%|",
						"2012|16.00%|",
						"2011|2.11%|",
						"2010|15.06%|",
						"2009|26.46%|",
						"2008|-37.00%",
						"2007|5.49%|",
						"2006|15.79%|",
						"2005|4.91%|",
						"2004|10.88%|",
						"2003|28.68%|",
						"2002|-22.10%",
						"2001|-11.89%",
						"2000|-9.10%|",
						"1999|21.04%|",
						"1998|28.58%|",
						"1997|33.36%|",
						"1996|22.96%|",
						"1995|37.58%|",
						"1994|1.32%|",
						"1993|10.08%|",
						"1992|7.62%|",
						"1991|30."
					],
					"snippet": [
						"# S&P 500 Total Returns by Year",
						"|Year|Total Return|",
						"2026|1.23%|",
						"2025|17.88%|",
						"2024|25.02%|",
						"2023|26.29%|",
						"2022|-18.11%|",
						"2021|28.71%|"
					],
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 116,
						"expected_end_idx": 247
					},
					"allow_buffer": false
				},
				{
					"name": "more_random_code_entities",
					"content": [
						"* [See more FAQs] ",
						"* [Find routing and account numbers] ",
						"## Popular FAQs",
						"* [How do I find my routing and account numbers?] ",
						"* [Is there a fee for Zelle®?] ",
						"* [How do I report suspected fraud?] ",
						"* [See more FAQs] ",
						"# Current mortgage and refinance rates",
						"&zwj;",
						"## Mortgage interest rates today",
						"&#160;",
						"![] ",
						"## Get a customized rate and payment",
						"See how much you could qualify to borrow and what your estimated rate and payment would be. It takes just a few minutes and won’t affect your credit score.",
						"[Estimate your rate] ",
						"&zwj;",
						"## Common question"
					],
					"snippet": [
						"* [See more FAQs] ",
						"# Current mortgage and refinance rates",
						"‍",
						"## Mortgage interest rates today",
						" ",
						"![] ",
						"## Get a customized rate and payment"
					],
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 194,
						"expected_end_idx": 338
					},
					"allow_buffer": false
				}
			]
		},
		{
			"category": "token_matching",
			"tests": [
				{
					"name": "match_on_different_numbers",
					"content": [
						"°C",
						"",
						"28Sun.",
						"",
						"21°13 °C",
						"",
						"29Mon.",
						"",
						"20°12 °C",
						"",
						"30Tue.",
						"",
						"20°13 °C",
						"",
						"### SeptemberWeather Overview",
						"",
						"| | |",
						"| --- | --- |",
						"| Sunny | 13 |",
						"| Cloudy | 3 |",
						"| Rainy | 13 |",
						"| Snowy | 0 |",
						"",
						"#### No. of days:",
						"",
						"Sunny",
						"",
						"13",
						"",
						"Cloudy",
						"",
						"3",
						"",
						"Rainy",
						"",
						"13",
						"",
						"Snowy",
						"",
						"0",
						"",
						"The monthly weather averages in Sydney consist of 13 sunny days, 3 cloudy days, 13 rainy days, and 0 snowy days.",
						"",
						"## Sydney's Locations Weather Conditions",
						"",
						"Locations",
						"",
						"Temp",
						"",
						"Condition",
						"",
						"Humi.",
						"",
						"UV",
						"",
						"Wind Speed & Direction",
						"",
						"[Cook And Phillip Sydney East] ",
						"",
						"13 °C",
						"",
						"Moderate rain",
						"",
						"100%",
						"",
						"0",
						"",
						"27.7 kmph /  NW",
						"",
						"[Day Street] ",
						"",
						"13 °C",
						"",
						"Moderate rain",
						"",
						"100%",
						"",
						"0",
						"",
						"22.3 kmph /  NW",
						"",
						"[Earlwood Sydney East"
					],
					"snippet": [
						"The monthly weather averages in Sydney consist of 9 sunny days, 1 cloudy days, 20 rainy days, and 0 snowy days.",
						"",
						"## Sydney - Weather Conditions australia",
						"",
						"Locations",
						"",
						"Temp",
						"",
						"Condition",
						"",
						"Humi.",
						"",
						"UV",
						"",
						"Wind Speed & Direction",
						""
					],
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 235,
						"expected_end_idx": 448
					},
					"allow_buffer": true
				},
				{
					"_comment": "token_match_boundary_but_different_length",
					"name": "critical_negation_difference",
					"content": "The FDA has approved the new vaccine for emergency use in adults over 18 years old.",
					"snippet": "The FDA has not approved the new vaccine for emergency use in adults over 18 years old.",
					"expected_result": {
						"snippet_located": true,
						"expected_start_idx": 0,
						"expected_end_idx": 82
					},
					"allow_buffer": true
				}
			]
		},
		{
			"category": "no_match",
			"tests": [
				{
					"name": "chicago_difference",
					"snippet": "In Chicago, how much snow falls each winter can vary dramatically. According to the National Weather Service, seasonal snowfall totals from 1900 to today have ranged from a low of **9.8 inches (1920–1921)** to a high of **89.7 inches",
					"content": [
						"While Chicago’s known as the [Windy City], that lake breeze isn’t the only weather phenomenon that might take your breath away when you’re visiting our city. Does it snow in Chicago? Indeed, it does! Although the first measurable snowfall comes later than usual in some years, it definitely does snow in Chicago. If you’re visiting in the winter, take a moment to brush up on when it will snow in Chicago and how much snow will fall each month.",
						"",
						"[Upcoming Events] ",
						"",
						"[Plan Your Visit] ",
						"",
						"## When Will it Snow in Chicago?",
						"",
						"If you live here, you already know that in Chicago, how much snow falls each month can vary widely from year to year! Sometimes the first snow arrives in October, while other years see hardly a flurry through December. So how do meteorologists answer the question of when does it snow in Chicago? Generally, they predict snowfall off and on from November through March, with April getting the occasional dusting. Whenever it does snow in Chicago, be sure to take advantage of the Chicago snow on the area’s many great sledding hills!",
						"",
						"## Chicago Snow Totals by Year",
						"",
						"In Chicago, how much snow falls now compared to previous years? With records going back to 1900, the National Weather Service has tracked how much snow falls in Chicago. Seasonally in the years from 1900-present, the average inches of snow in Chicago have annually ranged from 9.8 inches in 1920 – 1921 to 89.7 inches in 1978 – 1979. That’s quite a range of inches of snow in Chicago! Winter 2020 – 2021 got a total of 48.8, with the year prior getting quite a bit less, just 34.8 inches. Take a look at the annual inches of snow in Chicago over the last decade:",
						"",
						"- 2021 – 2022—32.8 in",
						"- 2020 – 2021—48.8 in",
						"- 2019 – 2020—34.8 in",
						"- 2018 – 2019—49.5 in",
						"- 2017 – 2018—36. in",
						"- 2016 – 2017—26.1 in",
						"- 2015 – 2016—31.2 in",
						"- 2014 – 2015—50.7 in",
						"- 2013 – 2014—82.0 in",
						"- 2012 – 2013—30.1 in",
						"",
						"Here are the winter monthly snowfall ranges for winter 2020-2021:",
						"",
						"- November—0.7 inches of snowfall (normal = 1.8 inches)",
						"- December—2.8 inches of snowfall (normal = 7.6 inches)",
						"- January—21.9 inches of snowfall (normal = 11.3 inches)",
						"- February—21.6 inches of snowfall (normal = 10.7 inches)",
						"- March—1.8 inches of snowfall (normal = 5.5 inches)",
						"- April—none (normal = 1.3 inches)",
						"",
						"## Enjoy Winter Weather in Chicago!",
						"",
						"Now that you know about the typical Chicago snow totals and when it will snow in Chicago, you can pack your parka and boots and prepare to witness the magic of Chicagoland in wintertime! Schedule a [visit to the Skydeck] this winter to see Chicago blanketed in snow from the [most unique vantage point] available. We’re here to help you plan your visit—contact us for details about [dining on The Ledge], [Skydeck engagement parties], and other special event options!"
					],
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"name": "sport-game-change",
					"snippet": [
						"## [MiamiHeatvsBostonCeltics] ",
						"LIVE[Dec 19, 2025] ·7:00 PM EST·TD GardenBoston, MA",
						"```",
						"10:49 [91-98] [MIA] MISS S. Fontecchio 8' driving Layup",
						"10:48 [91-98] [BOS] H. González REBOUND (Off:1 Def:5)"
					],
					"content": [
						"Spoiler-Free NBA Scores, Recaps & Game Ratings",
						"[Wikihoops] ",
						"# Spoiler-Free NBA Scores, Recaps & Game Ratings",
						"[Open Menu] [Close menu] ",
						"[![Logo]] ",
						"[] [] ",
						"## NBA ·2025-26 Regular Season",
						"### 5 Games on Thursday[December 4, 2025] ",
						"## [BostonCelticsvsWashingtonWizards] ",
						"FINAL[Dec 4, 2025] ·7:00 PM EST·Capital One ArenaWashington, DC",
						"```",
						"No play-by-play available",
						"```",
						"1/10",
						"Rating-99%[Vote up] 1[Vote down] 10",
						"* [Show Ratings] ",
						"* [Add bookmark] [Edit bookmark] ",
						"* [Save game] [Remove from saved games] ",
						"## [Golden StateWarriorsvsPhiladelphiaSixers] ",
						"FINAL[Dec 4, 2025] ·7:00 PM EST·Xfinity Mobile ArenaPhiladelphia, PANBA TV",
						"```",
						"No play-by-play available",
						"```",
						"8/10",
						"Rating2493%[Vote up] 26[Vote down] 2",
						"* [Show Ratings] ",
						"* [Add bookmark] [Edit bookmark] ",
						"* [Save game] [Remove from saved games] ",
						"## [Los AngelesLakersvsTorontoRaptors] ",
						"FINAL[Dec 4, 2025] ·7:30 PM EST·Scotiabank ArenaToronto, ON",
						"```",
						"No play-by-play available",
						"```",
						"9/10",
						"Rating2894%[Vote up] 30[Vote down] 2",
						"* [Show Ratings] ",
						"* [Add bookmark] [Edit bookmark] ",
						"* [Save game] [Remove from saved games] ",
						"## [UtahJazzvsBrooklynNets] ",
						"FINAL[Dec 4, 2025] ·7:30 PM EST·Barclays CenterBrooklyn, NY",
						"```",
						"No play-by-play available",
						"```",
						"4/10",
						"Rating-333%[Vote up] 3[Vote down] 6",
						"* [Show Ratings] ",
						"* [Add bookmark] [Edit bookmark] ",
						"* [Save game] [Remove from saved games] ",
						"## [MinnesotaWolvesvsNew OrleansPelicans] ",
						"FINAL[Dec 4, 2025] ·8:00 PM EST·Smoothie King CenterNew Orleans, LA",
						"```",
						"No play-by-play available",
						"```",
						"6/10",
						"Rating157%[Vote up] 4[Vote down] 3",
						"* [Show Ratings] ",
						"* [Add bookmark] [Edit bookmark] ",
						"* [Save game] [Remove from saved games] ",
						"### Homeis where the![W] is",
						"Add the Wikihoops web app to your home screen. No app store download required.",
						"Install Web AppHow to install Progressive Web Apps (PWAs)",
						"* **On Android**, Firefox, Chrome, Edge, Opera, and Samsung Internet Browser all support installing PWAs",
						"* **On iOS**, PWAs may or[may not be supported] ",
						"* Chrome and Edge support installing PWAs on**Linux, Windows, macOS, and Chromebooks**",
						"* **[Google Chrome Help on PWAs] **",
						"* **[Use Web Apps with Firefox for Android] **",
						"### Spoiler Alert",
						"Close",
						"This action may reveal spoilers such as scores and season records.",
						"ContinueCancel"
					],
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"name": "facebook-premier-league",
					"snippet": "Meta © 2025",
					"content": [
						"Fabrizio Romano - 🥹🇵🇹Cristiano's 950 career goals in...",
						"**",
						"Facebook",
						"[",
						"Log In",
						"] ",
						"## Fabrizio Romano&#x27;s Post",
						"[",
						"] ",
						"### [**Fabrizio Romano**] Verified account",
						"[October 25, 2025] ·Shared with Public",
						"![🥹]![🇵🇹] Cristiano's 950 career goals in details…",
						"Real Madrid –450 goals",
						"Manchester United –145 goals",
						"Portugal –143 goals",
						"Al-Nassr –106 goals",
						"Juventus –101 goals",
						"Sporting CP –5 goals",
						"Greatest![🐐] ",
						"[",
						"![May be an image of soccer, football and text that says &#x27;NUNASSK ማGEAB KAFD FD κAF&#x27;] ",
						"] ",
						"All reactions:",
						"91K",
						"1.7K comments",
						"494 shares",
						"**",
						"Like",
						"**",
						"Comment",
						"Most relevant",
						"**",
						"![] Top fan",
						"Debajyoti Nag",
						"I noticed one thing that Fabrizio never added a![🐐] emonji when telling about Ronaldo and messi . But today he just written &quot;Greatest![🐐] &quot; . He knows who&#x27;s the goat![🐐] ",
						"* [11w] ",
						"50",
						"View all 6 replies",
						"Edel Queen",
						"Bring his assists as well you calculating his goals too, Man is with over 300+ assists",
						"* [11w] ",
						"9",
						"View all 6 replies",
						"[",
						"] ",
						"![] Top fan",
						"[Gunna Thèé Gēé] ",
						"\u201cIf Chelsea had won I would have won some money, But I'm happy they lost. Because",
						"money can&#x27;t buy happiness",
						"![😜]![😜]![😜] ",
						"* [11w] ",
						"29",
						"View all 2 replies",
						"[",
						"] ",
						"![] Top fan",
						"[Shafi Mazid] ",
						"Higher than the highest Sky Scrapper. Beyond the debate.![🦿] machine![🦾] ",
						"* [11w] ",
						"6",
						"View 1 reply",
						"[",
						"] ",
						"![] Top fan",
						"[Sicelo Thabethe] ",
						"Road to 200 penalties![🙌🏿]![🙌🏿] ",
						"* [11w] ",
						"3",
						"View all 9 replies",
						"[",
						"] ",
						"[Samtex Ventures] ",
						"Chelsea ordered Garnacho while United gave them Gachagua![🤣]![🤣] ",
						"* [11w] ",
						"17",
						"View all 3 replies",
						"[",
						"] ",
						"![] Top fan",
						"[Anawo Destiny] ",
						"Spanish league conquered",
						"EPL conquered",
						"Own country conquered",
						"Camels league conquered",
						"Seria A conquered",
						"Portugal's league. Started from there!!",
						"6 leagues felt the greatness of the football goat.",
						"But messi went to ligue 1 and was chased out![😂]![😂] ",
						"Went to Hollywood fashion league and has been fighting Tyler Perry and Jason statam on the pitch and was still beating![😂]![😂] ",
						"Know your goat![🐐] ",
						"* [11w] ",
						"3",
						"View 1 reply",
						"![] Top fan",
						"Sïrr Ñicølas",
						"Maturing is realizing tht Ronaldo should have had 990 goals if it wasn&#x27;t because of wht happened when he was 37![😭]![😭] ",
						"* [11w] ",
						"[",
						"] ",
						"[Ziya&#x27;ulhaq Sa&#x27;adan El-dia] ",
						"Did Fabrizio say greatest? He also acknowledges the![🐐] ",
						"* [11w] ",
						"[",
						"] ",
						"[Jona Lalremsanga] ",
						"Now, People who never score Ronaldo&#x27;s Goals for sporting CP (5 goals) will comment and Judge him![🤣] ",
						"* [11w]",
						"https://www.census.gov/quickfacts/fact/table/newyorkcitynewyork/PST045224"
					],
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Similar sentence structure but key words substituted",
					"name": "word_substitution_boundary",
					"content": "The quick brown fox jumps over the lazy dog in the sunny meadow today.",
					"snippet": "The slow grey wolf leaps over the tired cat in the rainy forest today.",
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Same words but completely different order",
					"name": "word_order_scrambled",
					"content": "Machine learning models require large datasets for training and validation.",
					"snippet": "Training and validation require large models for machine learning datasets.",
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Similar numbers in statistics context but different subject",
					"name": "similar_statistics_different_values",
					"content": "In 2024, the corporation disclosed earnings of $45.2 million with 12,500 staff across 28 regions.",
					"snippet": "In 2019, the startup announced losses of $8.7 billion with 350 contractors across 3 cities.",
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Similar product names but different",
					"name": "similar_product_names",
					"content": "The new iPhone 15 Pro Max features an A17 Bionic chip with enhanced neural engine.",
					"snippet": "The new Galaxy S24 Ultra features a Snapdragon 8 Gen 3 chip with enhanced AI engine.",
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Overlapping topic but different phrasing",
					"name": "overlapping_phrases_different_context",
					"content": "Meteorologists predict heavy rain will drench northern areas beginning at dawn tomorrow.",
					"snippet": "The traffic report indicates severe congestion on southern highways clearing by midnight tonight.",
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Similar location-based content",
					"name": "similar_locations_different_details",
					"content": [
						"## Sydney Weather Report",
						"Temperature: 28°C",
						"Humidity: 65%",
						"Wind: NE 15 km/h",
						"Conditions: Partly cloudy with afternoon thunderstorms expected"
					],
					"snippet": [
						"## Melbourne Weather Report",
						"Temperature: 22°C",
						"Humidity: 78%",
						"Wind: SW 20 km/h",
						"Conditions: Mostly overcast with morning showers expected"
					],
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Similar topic with different wording and entities",
					"name": "antonym_substitution",
					"content": "Wall Street rallied sharply as traders demonstrated renewed optimism about semiconductor companies.",
					"snippet": "Bond markets tumbled dramatically while analysts expressed growing pessimism regarding retail corporations.",
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Similar list items but different specifics",
					"name": "similar_list_different_items",
					"content": [
						"Top 5 Programming Languages in 2025:",
						"1. Python - 28.5%",
						"2. JavaScript - 22.1%",
						"3. Java - 15.3%",
						"4. TypeScript - 12.8%",
						"5. C++ - 9.4%"
					],
					"snippet": [
						"Top 5 Programming Languages in 2024:",
						"1. JavaScript - 31.2%",
						"2. Python - 24.7%",
						"3. TypeScript - 18.6%",
						"4. Java - 11.2%",
						"5. Rust - 7.8%"
					],
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Same template different entities",
					"name": "same_template_different_entities",
					"content": "John Smith, CEO of TechCorp Inc., announced the acquisition of DataFlow Systems for $2.3 billion.",
					"snippet": "Jane Doe, CFO of InnovateCo Ltd., announced the merger with CloudSync Solutions for $1.8 billion.",
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Sports score with similar format but different teams and scores",
					"name": "similar_sports_scores",
					"content": [
						"NBA Finals Game 5 Results:",
						"Los Angeles Lakers 112 - Boston Celtics 108",
						"Top Scorer: LeBron James (34 pts)",
						"MVP: Anthony Davis"
					],
					"snippet": [
						"NBA Finals Game 7 Results:",
						"Golden State Warriors 118 - Miami Heat 115",
						"Top Scorer: Stephen Curry (42 pts)",
						"MVP: Draymond Green"
					],
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				},
				{
					"_comment": "Token matcher boundary: Completely different recipe with similar structure",
					"name": "similar_recipe_different_ingredients",
					"content": "Combine 2 cups flour, 1 cup sugar, 3 eggs, and 1/2 cup butter in a bowl. Bake at 350°F for 25 minutes.",
					"snippet": "Blend 4 bananas, 2 avocados, 1 cup honey, and a splash of almond milk until smooth. Chill for 2 hours.",
					"expected_result": {
						"snippet_located": false
					},
					"allow_buffer": true
				}
			]
		}
	]
}

================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/open_url/test_onyx_web_crawler.py
================================================
from __future__ import annotations

import time
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from pydantic import BaseModel

import onyx.tools.tool_implementations.open_url.onyx_web_crawler as crawler_module
from onyx.tools.tool_implementations.open_url.onyx_web_crawler import (
    DEFAULT_CONNECT_TIMEOUT_SECONDS,
)
from onyx.tools.tool_implementations.open_url.onyx_web_crawler import (
    DEFAULT_READ_TIMEOUT_SECONDS,
)
from onyx.tools.tool_implementations.open_url.onyx_web_crawler import OnyxWebCrawler


class FakeResponse(BaseModel):
    status_code: int
    headers: dict[str, str]
    content: bytes
    text: str = ""
    apparent_encoding: str | None = None
    encoding: str | None = None


def test_fetch_url_pdf_with_content_type(monkeypatch: pytest.MonkeyPatch) -> None:
    crawler = OnyxWebCrawler()
    response = FakeResponse(
        status_code=200,
        headers={"Content-Type": "application/pdf"},
        content=b"%PDF-1.4 mock",
    )

    monkeypatch.setattr(
        crawler_module,
        "ssrf_safe_get",
        lambda *args, **kwargs: response,  # noqa: ARG005
    )
    monkeypatch.setattr(
        crawler_module,
        "extract_pdf_text",
        lambda *args, **kwargs: ("pdf text", {"Title": "Doc Title"}),  # noqa: ARG005
    )

    result = crawler._fetch_url("https://example.com/report.pdf")

    assert result.full_content == "pdf text"
    assert result.title == "Doc Title"
    assert result.scrape_successful is True


def test_fetch_url_pdf_with_signature(monkeypatch: pytest.MonkeyPatch) -> None:
    crawler = OnyxWebCrawler()
    response = FakeResponse(
        status_code=200,
        headers={"Content-Type": "application/octet-stream"},
        content=b"%PDF-1.7 mock",
    )

    monkeypatch.setattr(
        crawler_module,
        "ssrf_safe_get",
        lambda *args, **kwargs: response,  # noqa: ARG005
    )
    monkeypatch.setattr(
        crawler_module,
        "extract_pdf_text",
        lambda *args, **kwargs: ("pdf text", {}),  # noqa: ARG005
    )

    result = crawler._fetch_url("https://example.com/files/file.pdf")

    assert result.full_content == "pdf text"
    assert result.title == "file.pdf"
    assert result.scrape_successful is True


def test_fetch_url_decodes_html_bytes(monkeypatch: pytest.MonkeyPatch) -> None:
    crawler = OnyxWebCrawler()
    html_bytes = b"<html><body>caf\xe9</body></html>"
    response = FakeResponse(
        status_code=200,
        headers={"Content-Type": "text/html; charset=iso-8859-1"},
        content=html_bytes,
        text="caf\u00ef\u00bf\u00bd",
    )

    monkeypatch.setattr(
        crawler_module,
        "ssrf_safe_get",
        lambda *args, **kwargs: response,  # noqa: ARG005
    )

    result = crawler._fetch_url("https://example.com/page.html")

    assert "caf\u00e9" in result.full_content
    assert result.scrape_successful is True


def test_fetch_url_pdf_exceeds_size_limit(monkeypatch: pytest.MonkeyPatch) -> None:
    """PDF content exceeding max_pdf_size_bytes should be rejected."""
    crawler = OnyxWebCrawler(max_pdf_size_bytes=100)
    response = FakeResponse(
        status_code=200,
        headers={"Content-Type": "application/pdf"},
        content=b"%PDF-1.4 " + b"x" * 200,  # 209 bytes, exceeds 100 limit
    )

    monkeypatch.setattr(
        crawler_module,
        "ssrf_safe_get",
        lambda *args, **kwargs: response,  # noqa: ARG005
    )

    result = crawler._fetch_url("https://example.com/large.pdf")

    assert result.full_content == ""
    assert result.scrape_successful is False
    assert result.link == "https://example.com/large.pdf"


def test_fetch_url_pdf_within_size_limit(monkeypatch: pytest.MonkeyPatch) -> None:
    """PDF content within max_pdf_size_bytes should be processed normally."""
    crawler = OnyxWebCrawler(max_pdf_size_bytes=500)
    response = FakeResponse(
        status_code=200,
        headers={"Content-Type": "application/pdf"},
        content=b"%PDF-1.4 mock",  # Small content
    )

    monkeypatch.setattr(
        crawler_module,
        "ssrf_safe_get",
        lambda *args, **kwargs: response,  # noqa: ARG005
    )
    monkeypatch.setattr(
        crawler_module,
        "extract_pdf_text",
        lambda *args, **kwargs: ("pdf text", {"Title": "Doc Title"}),  # noqa: ARG005
    )

    result = crawler._fetch_url("https://example.com/small.pdf")

    assert result.full_content == "pdf text"
    assert result.scrape_successful is True


def test_fetch_url_html_exceeds_size_limit(monkeypatch: pytest.MonkeyPatch) -> None:
    """HTML content exceeding max_html_size_bytes should be rejected."""
    crawler = OnyxWebCrawler(max_html_size_bytes=50)
    html_bytes = b"<html><body>" + b"x" * 100 + b"</body></html>"  # Exceeds 50 limit
    response = FakeResponse(
        status_code=200,
        headers={"Content-Type": "text/html"},
        content=html_bytes,
    )

    monkeypatch.setattr(
        crawler_module,
        "ssrf_safe_get",
        lambda *args, **kwargs: response,  # noqa: ARG005
    )

    result = crawler._fetch_url("https://example.com/large.html")

    assert result.full_content == ""
    assert result.scrape_successful is False
    assert result.link == "https://example.com/large.html"


def test_fetch_url_html_within_size_limit(monkeypatch: pytest.MonkeyPatch) -> None:
    """HTML content within max_html_size_bytes should be processed normally."""
    crawler = OnyxWebCrawler(max_html_size_bytes=500)
    html_bytes = b"<html><body>hello world</body></html>"
    response = FakeResponse(
        status_code=200,
        headers={"Content-Type": "text/html"},
        content=html_bytes,
    )

    monkeypatch.setattr(
        crawler_module,
        "ssrf_safe_get",
        lambda *args, **kwargs: response,  # noqa: ARG005
    )

    result = crawler._fetch_url("https://example.com/small.html")

    assert "hello world" in result.full_content
    assert result.scrape_successful is True


# ---------------------------------------------------------------------------
# Helpers for parallel / failure-isolation / timeout tests
# ---------------------------------------------------------------------------


def _make_mock_response(
    *,
    status_code: int = 200,
    content: bytes = b"<html><body>Hello</body></html>",
    content_type: str = "text/html",
    delay: float = 0.0,
) -> MagicMock:
    """Create a mock response that behaves like a requests.Response."""
    resp = MagicMock()
    resp.status_code = status_code
    resp.headers = {"Content-Type": content_type}

    if delay:
        original_content = content

        @property  # type: ignore[misc]
        def _delayed_content(_self: object) -> bytes:
            time.sleep(delay)
            return original_content

        type(resp).content = _delayed_content
    else:
        resp.content = content

    resp.apparent_encoding = None
    resp.encoding = None

    return resp


class TestParallelExecution:
    """Verify that contents() fetches URLs in parallel."""

    @patch("onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get")
    def test_multiple_urls_fetched_concurrently(self, mock_get: MagicMock) -> None:
        """With a per-URL delay, parallel execution should be much faster than sequential."""
        per_url_delay = 0.3
        num_urls = 5
        urls = [f"http://example.com/page{i}" for i in range(num_urls)]

        mock_get.return_value = _make_mock_response(delay=per_url_delay)

        crawler = OnyxWebCrawler()
        start = time.monotonic()
        results = crawler.contents(urls)
        elapsed = time.monotonic() - start

        # Sequential would take ~1.5s; parallel should be well under that
        assert elapsed < per_url_delay * num_urls * 0.7
        assert len(results) == num_urls
        assert all(r.scrape_successful for r in results)

    @patch("onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get")
    def test_empty_urls_returns_empty(self, mock_get: MagicMock) -> None:
        crawler = OnyxWebCrawler()
        results = crawler.contents([])
        assert results == []
        mock_get.assert_not_called()

    @patch("onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get")
    def test_single_url(self, mock_get: MagicMock) -> None:
        mock_get.return_value = _make_mock_response()
        crawler = OnyxWebCrawler()
        results = crawler.contents(["http://example.com"])
        assert len(results) == 1
        assert results[0].scrape_successful


class TestFailureIsolation:
    """Verify that one URL failure doesn't affect others in the batch."""

    @patch("onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get")
    def test_one_failure_doesnt_kill_batch(self, mock_get: MagicMock) -> None:
        good_resp = _make_mock_response()
        bad_resp = _make_mock_response(status_code=500)

        # First and third URLs succeed, second fails
        mock_get.side_effect = [good_resp, bad_resp, good_resp]

        crawler = OnyxWebCrawler()
        results = crawler.contents(["http://a.com", "http://b.com", "http://c.com"])

        assert len(results) == 3
        assert results[0].scrape_successful
        assert not results[1].scrape_successful
        assert results[2].scrape_successful

    @patch("onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get")
    def test_exception_doesnt_kill_batch(self, mock_get: MagicMock) -> None:
        good_resp = _make_mock_response()

        # Second URL raises an exception
        mock_get.side_effect = [
            good_resp,
            RuntimeError("connection reset"),
            _make_mock_response(),
        ]

        crawler = OnyxWebCrawler()
        results = crawler.contents(["http://a.com", "http://b.com", "http://c.com"])

        assert len(results) == 3
        assert results[0].scrape_successful
        assert not results[1].scrape_successful
        assert results[2].scrape_successful

    @patch("onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get")
    def test_ssrf_exception_doesnt_kill_batch(self, mock_get: MagicMock) -> None:
        from onyx.utils.url import SSRFException

        good_resp = _make_mock_response()
        mock_get.side_effect = [
            good_resp,
            SSRFException("blocked"),
            _make_mock_response(),
        ]

        crawler = OnyxWebCrawler()
        results = crawler.contents(
            ["http://a.com", "http://internal.local", "http://c.com"]
        )

        assert len(results) == 3
        assert results[0].scrape_successful
        assert not results[1].scrape_successful
        assert results[2].scrape_successful


class TestTupleTimeout:
    """Verify that separate connect and read timeouts are passed correctly."""

    @patch("onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get")
    def test_default_tuple_timeout(self, mock_get: MagicMock) -> None:
        mock_get.return_value = _make_mock_response()

        crawler = OnyxWebCrawler()
        crawler.contents(["http://example.com"])

        call_kwargs = mock_get.call_args
        assert call_kwargs.kwargs["timeout"] == (
            DEFAULT_CONNECT_TIMEOUT_SECONDS,
            DEFAULT_READ_TIMEOUT_SECONDS,
        )

    @patch("onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get")
    def test_custom_tuple_timeout(self, mock_get: MagicMock) -> None:
        mock_get.return_value = _make_mock_response()

        crawler = OnyxWebCrawler(timeout_seconds=30, connect_timeout_seconds=3)
        crawler.contents(["http://example.com"])

        call_kwargs = mock_get.call_args
        assert call_kwargs.kwargs["timeout"] == (3, 30)


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/open_url/test_snippet_matcher.py
================================================
from __future__ import annotations

import json
import unicodedata  # used to verify NFC expansion test preconditions
from pathlib import Path

import pytest
from pydantic import BaseModel
from pydantic import field_validator

from onyx.tools.tool_implementations.open_url.snippet_matcher import (
    find_snippet_in_content,
)

"""
We want to store tests in the json file in the following format:
{
    "categories": [
        {
            "category":  "...",
            "tests": [
                {
                    "name": "...",
                    "content": "... or ["...", "..."] where each item is a new line",
                    "snippet": "..." or ["...", "..."] where each item is a new line,
                    "expected_result": {
                        "snippet_located": true,
                        "expected_start_idx": 0,
                        "expected_end_idx": 10
                    },
                    "allow_buffer": false (Optional,  default: true)
                },
                ...
            ]
        },
        ...
    ]
}
"""

TEST_DATA_FILE_PATH = Path(__file__).parent / "data" / "test_snippet_finding_data.json"


class TestSchemaResult(BaseModel):
    """
    Expected results from the snippet matcher.
    """

    snippet_located: bool

    # Don't include if snippet_located is False
    expected_start_idx: int = -1
    expected_end_idx: int = -1


class TestSchema(BaseModel):
    """
    A test takes in some content and a snippet.

    Expected result is what we expect the output to be.
    """

    name: str
    content: str
    snippet: str

    expected_result: TestSchemaResult
    allow_buffer: bool = True

    @field_validator("content", "snippet", mode="before")
    @classmethod
    def convert_list_to_string(cls, v: str | list[str]) -> str:
        """
        We want to be able to handle strings or list of strings for content and snippet.
        The client should only see strings though, so we do some parsing here.
        """
        if isinstance(v, list):
            return "\n".join(v)
        return v


class TestCategory(BaseModel):
    """
    A category of tests.
    """

    category: str
    tests: list[TestSchema]


class TestDataFile(BaseModel):
    """
    The root structure of the test data JSON file.
    """

    categories: list[TestCategory]


def load_all_tests() -> list[tuple[str, TestSchema]]:
    """
    Loads all tests from the JSON file and returns them as a list of tuples.

    Each tuple contains (test_id, test_data) where test_id is "{category}_{name}".
    """
    with open(TEST_DATA_FILE_PATH, "r") as file:
        data = json.load(file)

    # Validate the entire file structure using Pydantic
    test_data = TestDataFile.model_validate(data)

    # Collect all tests with their category-prefixed names
    all_tests: list[tuple[str, TestSchema]] = []
    for category in test_data.categories:
        for test in category.tests:
            test_id = f"{category.category}_{test.name}"
            all_tests.append((test_id, test))

    return all_tests


# Load tests at module level for parametrization
_ALL_TESTS = load_all_tests()


@pytest.mark.parametrize(
    "test_data",
    [test for _, test in _ALL_TESTS],
    ids=[test_id for test_id, _ in _ALL_TESTS],
)
def test_snippet_finding(test_data: TestSchema) -> None:
    """
    Tests the snippet matching functionality.

    Each test case is defined in the JSON file and named {category}_{name}.
    """
    result = find_snippet_in_content(test_data.content, test_data.snippet)

    assert (
        result.snippet_located == test_data.expected_result.snippet_located
    ), f"snippet_located mismatch: expected {test_data.expected_result.snippet_located}, got {result.snippet_located}"

    # If buffer is allowed, we let the start and end indices be within 10 characters of where we expect
    BUFFER_SIZE = 10 if test_data.allow_buffer else 0

    assert (
        test_data.expected_result.expected_start_idx - BUFFER_SIZE
        <= result.start_idx
        <= test_data.expected_result.expected_start_idx + BUFFER_SIZE
    ), f"start_idx mismatch: expected {test_data.expected_result.expected_start_idx}, got {result.start_idx}"
    assert (
        test_data.expected_result.expected_end_idx - BUFFER_SIZE
        <= result.end_idx
        <= test_data.expected_result.expected_end_idx + BUFFER_SIZE
    ), f"end_idx mismatch: expected {test_data.expected_result.expected_end_idx}, got {result.end_idx}"


# Characters confirmed to expand from 1 → 2 codepoints under NFC
NFC_EXPANDING_CHARS = [
    ("\u0958", "Devanagari letter qa"),
    ("\u0959", "Devanagari letter khha"),
    ("\u095a", "Devanagari letter ghha"),
]


@pytest.mark.parametrize(
    "char,description",
    NFC_EXPANDING_CHARS,
)
def test_nfc_expanding_char_snippet_match(char: str, description: str) -> None:
    """Snippet matching should produce valid indices for content
    containing characters that expand under NFC normalization."""
    nfc = unicodedata.normalize("NFC", char)
    if len(nfc) <= 1:
        pytest.skip(f"{description} does not expand under NFC on this platform")

    content = f"before {char} after"
    snippet = f"{char} after"

    result = find_snippet_in_content(content, snippet)

    assert result.snippet_located, f"[{description}] Snippet should be found in content"
    assert (
        0 <= result.start_idx < len(content)
    ), f"[{description}] start_idx {result.start_idx} out of bounds"
    assert (
        0 <= result.end_idx < len(content)
    ), f"[{description}] end_idx {result.end_idx} out of bounds"
    assert (
        result.start_idx <= result.end_idx
    ), f"[{description}] start_idx {result.start_idx} > end_idx {result.end_idx}"

    matched = content[result.start_idx : result.end_idx + 1]
    matched_nfc = unicodedata.normalize("NFC", matched)
    snippet_nfc = unicodedata.normalize("NFC", snippet)
    assert (
        snippet_nfc in matched_nfc or matched_nfc in snippet_nfc
    ), f"[{description}] Matched span '{matched}' does not overlap with expected snippet '{snippet}'"


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/open_url/test_url_normalization.py
================================================
"""Unit tests for URL normalization module."""

import pytest

from onyx.configs.constants import DocumentSource
from onyx.tools.tool_implementations.open_url.open_url_tool import _url_lookup_variants
from onyx.tools.tool_implementations.open_url.url_normalization import (
    _detect_source_type,
)
from onyx.tools.tool_implementations.open_url.url_normalization import normalize_url


@pytest.mark.parametrize(
    "url,expected",
    [
        (
            "https://docs.google.com/document/d/1ABC123/edit?tab=t.0",
            "https://docs.google.com/document/d/1ABC123",
        ),
        (
            "https://docs.google.com/document/d/1ABC123/view",
            "https://docs.google.com/document/d/1ABC123",
        ),
        (
            "https://docs.google.com/document/d/1ABC123",
            "https://docs.google.com/document/d/1ABC123",
        ),
        (
            "https://drive.google.com/file/d/1ABC123/view?usp=sharing",
            "https://drive.google.com/file/d/1ABC123",
        ),
        (
            "https://drive.google.com/open?id=1ABC123",
            "https://drive.google.com/file/d/1ABC123",
        ),
        (
            "https://docs.google.com/document/d/1TVE04FYWmyP9j-OJFYcG3tnaLeqBbZ1pauCvmYkNq7c/edit?tab=t.0",
            "https://docs.google.com/document/d/1TVE04FYWmyP9j-OJFYcG3tnaLeqBbZ1pauCvmYkNq7c",
        ),
    ],
)
def test_google_drive_normalization(url: str, expected: str) -> None:
    """Test Google Drive URL normalization."""
    assert normalize_url(url, source_type=DocumentSource.GOOGLE_DRIVE) == expected


@pytest.mark.parametrize(
    "url,expected",
    [
        (
            "https://notion.so/Page-1234567890abcdef1234567890abcdef",
            "12345678-90ab-cdef-1234-567890abcdef",
        ),
        (
            "https://notion.so/page?p=1234567890abcdef1234567890abcdef",
            "12345678-90ab-cdef-1234-567890abcdef",
        ),
        # Edge case: URL with title prefix but valid UUID
        (
            "https://www.notion.so/My-Page-abc123def456ghi789jkl012mno345pq",
            None,  # May not extract correctly if UUID is incomplete
        ),
    ],
)
def test_notion_normalization(url: str, expected: str | None) -> None:
    """Test Notion URL normalization (extracts page ID as UUID)."""
    result = normalize_url(url, source_type=DocumentSource.NOTION)
    assert result == expected


@pytest.mark.parametrize(
    "url,expected",
    [
        (
            "https://workspace.slack.com/archives/C1234567890/p1234567890123456",
            "C1234567890__1234567890.123456",
        ),
        (
            "https://workspace.slack.com/archives/C1234567890/p1234567890123456?thread_ts=1234567890.123456",
            "C1234567890__1234567890.123456",
        ),
    ],
)
def test_slack_normalization(url: str, expected: str) -> None:
    """Test Slack URL normalization (extracts channel_id__thread_ts format)."""
    assert normalize_url(url, source_type=DocumentSource.SLACK) == expected


@pytest.mark.parametrize(
    "url,expected",
    [
        (
            "https://example.atlassian.net/wiki/spaces/SPACE/pages/12345?query=param#section",
            "https://example.atlassian.net/wiki/spaces/SPACE/pages/12345",
        ),
        (
            "https://example.atlassian.net/wiki/spaces/SPACE/pages/12345",
            "https://example.atlassian.net/wiki/spaces/SPACE/pages/12345",
        ),
    ],
)
def test_confluence_normalization(url: str, expected: str) -> None:
    """Test Confluence URL normalization (uses default normalizer)."""
    assert normalize_url(url, source_type=DocumentSource.CONFLUENCE) == expected


@pytest.mark.parametrize(
    "url,expected",
    [
        (
            "https://example.atlassian.net/jira/browse/PROJ-123?query=param#section",
            "https://example.atlassian.net/jira/browse/PROJ-123",
        ),
        (
            "https://example.atlassian.net/jira/software/projects/PROJ/issues/PROJ-123",
            "https://example.atlassian.net/jira/software/projects/PROJ/issues/PROJ-123",
        ),
    ],
)
def test_jira_normalization(url: str, expected: str) -> None:
    """Test Jira URL normalization (uses default normalizer)."""
    assert normalize_url(url, source_type=DocumentSource.JIRA) == expected


@pytest.mark.parametrize(
    "url,expected",
    [
        (
            "https://github.com/owner/repo/blob/main/file.py?query=param#section",
            "https://github.com/owner/repo/blob/main/file.py",
        ),
        (
            "https://github.com/owner/repo/blob/main/file.py",
            "https://github.com/owner/repo/blob/main/file.py",
        ),
    ],
)
def test_github_normalization(url: str, expected: str) -> None:
    """Test GitHub URL normalization (uses default normalizer)."""
    assert normalize_url(url, source_type=DocumentSource.GITHUB) == expected


@pytest.mark.parametrize(
    "url,expected",
    [
        (
            "https://gitlab.com/owner/repo/-/blob/main/file.py?query=param#section",
            "https://gitlab.com/owner/repo/-/blob/main/file.py",
        ),
    ],
)
def test_gitlab_normalization(url: str, expected: str) -> None:
    """Test GitLab URL normalization (uses default normalizer)."""
    assert normalize_url(url, source_type=DocumentSource.GITLAB) == expected


@pytest.mark.parametrize(
    "url,expected",
    [
        (
            "https://example.sharepoint.com/sites/Site/Doc.aspx?query=param#section",
            "https://example.sharepoint.com/sites/Site/Doc.aspx",
        ),
    ],
)
def test_sharepoint_normalization(url: str, expected: str) -> None:
    """Test SharePoint URL normalization (uses default normalizer)."""
    assert normalize_url(url, source_type=DocumentSource.SHAREPOINT) == expected


@pytest.mark.parametrize(
    "url,expected_source",
    [
        (
            "https://docs.google.com/document/d/1ABC123/edit",
            DocumentSource.GOOGLE_DRIVE,
        ),
        ("https://drive.google.com/file/d/123", DocumentSource.GOOGLE_DRIVE),
        ("https://www.notion.so/Page-abc123def456", DocumentSource.NOTION),
        ("https://notion.site/page", DocumentSource.NOTION),
        (
            "https://example.atlassian.net/wiki/spaces/SPACE/pages/123",
            DocumentSource.CONFLUENCE,
        ),
        ("https://example.atlassian.net/jira/browse/PROJ-123", DocumentSource.JIRA),
        ("https://github.com/owner/repo/blob/main/file.py", DocumentSource.GITHUB),
        ("https://gitlab.com/owner/repo", DocumentSource.GITLAB),
        ("https://example.sharepoint.com/sites/Site", DocumentSource.SHAREPOINT),
        ("https://workspace.slack.com/archives/C123/p456", DocumentSource.SLACK),
        ("https://example.com/doc", None),  # Unknown source
    ],
)
def test_detect_source_type(url: str, expected_source: DocumentSource | None) -> None:
    """Test source type detection from URL patterns."""
    assert _detect_source_type(url) == expected_source


@pytest.mark.parametrize(
    "url,expected_source,expected_normalized",
    [
        (
            "https://docs.google.com/document/d/1ABC123/edit",
            DocumentSource.GOOGLE_DRIVE,
            "https://docs.google.com/document/d/1ABC123",
        ),
        (
            "https://www.notion.so/Page-1234567890abcdef1234567890abcdef",
            DocumentSource.NOTION,
            "12345678-90ab-cdef-1234-567890abcdef",
        ),
        (
            "https://example.atlassian.net/wiki/spaces/SPACE/pages/123",
            DocumentSource.CONFLUENCE,
            "https://example.atlassian.net/wiki/spaces/SPACE/pages/123",
        ),
    ],
)
def test_normalize_url_with_auto_detection(
    url: str, expected_source: DocumentSource, expected_normalized: str
) -> None:
    """Test normalize_url auto-detects source type when source_type not provided."""
    detected = _detect_source_type(url)
    assert detected == expected_source

    normalized = normalize_url(url)  # No source_type provided
    assert normalized == expected_normalized


@pytest.mark.parametrize(
    "url,expected",
    [
        (
            "https://example.com/doc?query=param#section",
            "https://example.com/doc",
        ),
        (
            "https://example.com/doc/",
            "https://example.com/doc",
        ),
        (
            "http://example.com/doc",
            "http://example.com/doc",  # Default normalizer preserves scheme
        ),
    ],
)
def test_default_normalizer(url: str, expected: str) -> None:
    """Test default normalizer for connectors without custom normalizers."""
    # Use a source type that doesn't have a custom normalizer
    result = normalize_url(url, source_type=DocumentSource.WEB)
    assert result == expected


def test_normalize_url_returns_none_for_invalid_url() -> None:
    """Test that normalize_url returns None for invalid URLs."""
    assert normalize_url("not-a-url") is None
    assert normalize_url("") is None


def test_normalize_url_with_unknown_source_type() -> None:
    """Test that normalize_url falls back to default for unknown source types."""
    url = "https://example.com/doc?query=param"
    # Use a source type that doesn't have a custom normalizer
    result = normalize_url(url, source_type=DocumentSource.WEB)
    assert result == "https://example.com/doc"


def test_url_lookup_variants_includes_trailing_slash_versions() -> None:
    """Test that variants include both with and without trailing slash."""
    variants = _url_lookup_variants("https://example.com/path")
    assert "https://example.com/path" in variants
    assert "https://example.com/path/" in variants
    assert len(variants) == 2


def test_url_lookup_variants_strips_query_and_fragment() -> None:
    """Test that variants strip query parameters and fragments."""
    variants = _url_lookup_variants("https://example.com/path?a=1#section")
    assert "https://example.com/path" in variants
    assert "https://example.com/path/" in variants
    # Should not include query/fragment variants
    assert "https://example.com/path?a=1" not in variants
    assert "https://example.com/path#section" not in variants


def test_url_lookup_variants_handles_normalized_urls() -> None:
    """Test that variants work correctly with already-normalized URLs."""
    # Test with a Google Drive URL that's already normalized
    variants = _url_lookup_variants("https://docs.google.com/document/d/abc123def456")
    assert "https://docs.google.com/document/d/abc123def456" in variants
    assert "https://docs.google.com/document/d/abc123def456/" in variants


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/python/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/python/test_code_interpreter_client.py
================================================
"""Unit tests for CodeInterpreterClient streaming-to-batch fallback.

When the streaming endpoint (/v1/execute/stream) returns 404 — e.g. because the
code-interpreter service is an older version that doesn't support streaming — the
client should transparently fall back to the batch endpoint (/v1/execute) and
convert the batch response into the same stream-event interface.
"""

from __future__ import annotations

from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.tools.tool_implementations.python.code_interpreter_client import (
    CodeInterpreterClient,
)
from onyx.tools.tool_implementations.python.code_interpreter_client import FileInput
from onyx.tools.tool_implementations.python.code_interpreter_client import (
    StreamOutputEvent,
)
from onyx.tools.tool_implementations.python.code_interpreter_client import (
    StreamResultEvent,
)


def _make_batch_response(
    stdout: str = "",
    stderr: str = "",
    exit_code: int = 0,
    timed_out: bool = False,
    duration_ms: int = 50,
) -> MagicMock:
    """Build a mock ``requests.Response`` for the batch /v1/execute endpoint."""
    resp = MagicMock()
    resp.status_code = 200
    resp.raise_for_status = MagicMock()
    resp.json.return_value = {
        "stdout": stdout,
        "stderr": stderr,
        "exit_code": exit_code,
        "timed_out": timed_out,
        "duration_ms": duration_ms,
        "files": [],
    }
    return resp


def _make_404_response() -> MagicMock:
    """Build a mock ``requests.Response`` that returns 404 (streaming not found)."""
    resp = MagicMock()
    resp.status_code = 404
    return resp


def test_execute_streaming_fallback_to_batch_on_404() -> None:
    """When /v1/execute/stream returns 404, the client should fall back to
    /v1/execute and yield equivalent StreamEvent objects."""

    client = CodeInterpreterClient(base_url="http://fake:9000")

    stream_resp = _make_404_response()
    batch_resp = _make_batch_response(
        stdout="hello world\n",
        stderr="a warning\n",
    )

    urls_called: list[str] = []

    def mock_post(url: str, **_kwargs: object) -> MagicMock:
        urls_called.append(url)
        if url.endswith("/v1/execute/stream"):
            return stream_resp
        if url.endswith("/v1/execute"):
            return batch_resp
        raise AssertionError(f"Unexpected URL: {url}")

    with patch.object(client.session, "post", side_effect=mock_post):
        events = list(client.execute_streaming(code="print('hello world')"))

    # Streaming endpoint was attempted first, then batch
    assert len(urls_called) == 2
    assert urls_called[0].endswith("/v1/execute/stream")
    assert urls_called[1].endswith("/v1/execute")

    # The 404 response must be closed before making the batch call
    stream_resp.close.assert_called_once()

    # _batch_as_stream yields: stdout event, stderr event, result event
    assert len(events) == 3

    assert isinstance(events[0], StreamOutputEvent)
    assert events[0].stream == "stdout"
    assert events[0].data == "hello world\n"

    assert isinstance(events[1], StreamOutputEvent)
    assert events[1].stream == "stderr"
    assert events[1].data == "a warning\n"

    assert isinstance(events[2], StreamResultEvent)
    assert events[2].exit_code == 0
    assert not events[2].timed_out
    assert events[2].duration_ms == 50
    assert events[2].files == []


def test_execute_streaming_fallback_stdout_only() -> None:
    """Fallback with only stdout (no stderr) should yield two events:
    one StreamOutputEvent for stdout and one StreamResultEvent."""

    client = CodeInterpreterClient(base_url="http://fake:9000")

    stream_resp = _make_404_response()
    batch_resp = _make_batch_response(stdout="result: 42\n")

    def mock_post(url: str, **_kwargs: object) -> MagicMock:
        if url.endswith("/v1/execute/stream"):
            return stream_resp
        if url.endswith("/v1/execute"):
            return batch_resp
        raise AssertionError(f"Unexpected URL: {url}")

    with patch.object(client.session, "post", side_effect=mock_post):
        events = list(client.execute_streaming(code="print(42)"))

    # No stderr → only stdout + result
    assert len(events) == 2

    assert isinstance(events[0], StreamOutputEvent)
    assert events[0].stream == "stdout"
    assert events[0].data == "result: 42\n"

    assert isinstance(events[1], StreamResultEvent)
    assert events[1].exit_code == 0


def test_execute_streaming_fallback_preserves_files_param() -> None:
    """When falling back, the files parameter must be forwarded to the
    batch endpoint so staged files are still available for execution."""

    client = CodeInterpreterClient(base_url="http://fake:9000")

    stream_resp = _make_404_response()
    batch_resp = _make_batch_response(stdout="ok\n")

    captured_payloads: list[dict] = []

    def mock_post(url: str, **kwargs: object) -> MagicMock:
        if "json" in kwargs:
            captured_payloads.append(kwargs["json"])  # type: ignore[arg-type]
        if url.endswith("/v1/execute/stream"):
            return stream_resp
        if url.endswith("/v1/execute"):
            return batch_resp
        raise AssertionError(f"Unexpected URL: {url}")

    files_input: list[FileInput] = [{"path": "data.csv", "file_id": "file-abc123"}]

    with patch.object(client.session, "post", side_effect=mock_post):
        events = list(
            client.execute_streaming(
                code="import pandas",
                files=files_input,
            )
        )

    # Both the streaming attempt and the batch fallback should include files
    assert len(captured_payloads) == 2
    for payload in captured_payloads:
        assert payload["files"] == files_input
        assert payload["code"] == "import pandas"

    # Should still yield valid events
    assert any(isinstance(e, StreamResultEvent) for e in events)


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/python/test_python_tool_upload_cache.py
================================================
"""Unit tests for PythonTool file-upload caching.

Verifies that PythonTool reuses code-interpreter file IDs across multiple
run() calls within the same session instead of re-uploading identical content
on every agent loop iteration.
"""

from unittest.mock import MagicMock
from unittest.mock import patch

from onyx.tools.models import ChatFile
from onyx.tools.models import PythonToolOverrideKwargs
from onyx.tools.tool_implementations.python.code_interpreter_client import (
    StreamResultEvent,
)
from onyx.tools.tool_implementations.python.python_tool import PythonTool

TOOL_MODULE = "onyx.tools.tool_implementations.python.python_tool"


def _make_stream_result() -> StreamResultEvent:
    return StreamResultEvent(
        exit_code=0,
        timed_out=False,
        duration_ms=10,
        files=[],
    )


def _make_tool() -> PythonTool:
    emitter = MagicMock()
    return PythonTool(tool_id=1, emitter=emitter)


def _make_override(files: list[ChatFile]) -> PythonToolOverrideKwargs:
    return PythonToolOverrideKwargs(chat_files=files)


def _run_tool(tool: PythonTool, mock_client: MagicMock, files: list[ChatFile]) -> None:
    """Call tool.run() with a mocked CodeInterpreterClient context manager."""
    from onyx.server.query_and_chat.placement import Placement

    mock_client.execute_streaming.return_value = iter([_make_stream_result()])

    ctx = MagicMock()
    ctx.__enter__ = MagicMock(return_value=mock_client)
    ctx.__exit__ = MagicMock(return_value=False)

    placement = Placement(turn_index=0, tab_index=0)
    override = _make_override(files)

    with patch(f"{TOOL_MODULE}.CodeInterpreterClient", return_value=ctx):
        tool.run(placement=placement, override_kwargs=override, code="print('hi')")


# ---------------------------------------------------------------------------
# Cache hit: same content uploaded in a second call reuses the file_id
# ---------------------------------------------------------------------------


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_same_file_uploaded_only_once_across_two_runs() -> None:
    tool = _make_tool()
    client = MagicMock()
    client.upload_file.return_value = "file-id-abc"

    pptx_content = b"fake pptx bytes"
    files = [ChatFile(filename="report.pptx", content=pptx_content)]

    _run_tool(tool, client, files)
    _run_tool(tool, client, files)

    # upload_file should only have been called once across both runs
    client.upload_file.assert_called_once_with(pptx_content, "report.pptx")


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_cached_file_id_is_staged_on_second_run() -> None:
    tool = _make_tool()
    client = MagicMock()
    client.upload_file.return_value = "file-id-abc"

    files = [ChatFile(filename="data.pptx", content=b"content")]

    _run_tool(tool, client, files)

    # On the second run, execute_streaming should still receive the file
    client.execute_streaming.return_value = iter([_make_stream_result()])
    ctx = MagicMock()
    ctx.__enter__ = MagicMock(return_value=client)
    ctx.__exit__ = MagicMock(return_value=False)

    from onyx.server.query_and_chat.placement import Placement

    placement = Placement(turn_index=1, tab_index=0)
    with patch(f"{TOOL_MODULE}.CodeInterpreterClient", return_value=ctx):
        tool.run(
            placement=placement,
            override_kwargs=_make_override(files),
            code="print('hi')",
        )

    # The second execute_streaming call should include the file
    _, kwargs = client.execute_streaming.call_args
    staged_files = kwargs.get("files") or []
    assert any(f["file_id"] == "file-id-abc" for f in staged_files)


# ---------------------------------------------------------------------------
# Cache miss: different content triggers a new upload
# ---------------------------------------------------------------------------


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_different_file_content_uploaded_separately() -> None:
    tool = _make_tool()
    client = MagicMock()
    client.upload_file.side_effect = ["file-id-v1", "file-id-v2"]

    file_v1 = ChatFile(filename="report.pptx", content=b"version 1")
    file_v2 = ChatFile(filename="report.pptx", content=b"version 2")

    _run_tool(tool, client, [file_v1])
    _run_tool(tool, client, [file_v2])

    assert client.upload_file.call_count == 2


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_multiple_distinct_files_each_uploaded_once() -> None:
    tool = _make_tool()
    client = MagicMock()
    client.upload_file.side_effect = ["id-a", "id-b"]

    files = [
        ChatFile(filename="a.pptx", content=b"aaa"),
        ChatFile(filename="b.xlsx", content=b"bbb"),
    ]

    _run_tool(tool, client, files)
    _run_tool(tool, client, files)

    # Two distinct files — each uploaded exactly once
    assert client.upload_file.call_count == 2


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_same_content_different_filename_uploaded_separately() -> None:
    # Identical bytes but different names must each get their own upload slot
    # so both files appear under their respective paths in the workspace.
    tool = _make_tool()
    client = MagicMock()
    client.upload_file.side_effect = ["id-v1", "id-v2"]

    same_bytes = b"shared content"
    files = [
        ChatFile(filename="report_v1.csv", content=same_bytes),
        ChatFile(filename="report_v2.csv", content=same_bytes),
    ]

    _run_tool(tool, client, files)

    assert client.upload_file.call_count == 2


# ---------------------------------------------------------------------------
# No cross-instance sharing: a fresh PythonTool re-uploads everything
# ---------------------------------------------------------------------------


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_new_tool_instance_re_uploads_file() -> None:
    client = MagicMock()
    client.upload_file.side_effect = ["id-session-1", "id-session-2"]

    files = [ChatFile(filename="deck.pptx", content=b"slide data")]

    tool_session_1 = _make_tool()
    _run_tool(tool_session_1, client, files)

    tool_session_2 = _make_tool()
    _run_tool(tool_session_2, client, files)

    # Different instances — each uploads independently
    assert client.upload_file.call_count == 2


# ---------------------------------------------------------------------------
# Upload failure: failed upload is not cached, retried next run
# ---------------------------------------------------------------------------


@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_upload_failure_not_cached() -> None:
    tool = _make_tool()
    client = MagicMock()
    # First call raises, second succeeds
    client.upload_file.side_effect = [Exception("network error"), "file-id-ok"]

    files = [ChatFile(filename="slides.pptx", content=b"data")]

    # First run — upload fails, file is skipped but not cached
    _run_tool(tool, client, files)

    # Second run — should attempt upload again
    _run_tool(tool, client, files)

    assert client.upload_file.call_count == 2


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/websearch/data/tartan.txt
================================================
Three tartans; the left and right are made with the "modern" dye palette; the middle is made with "muted" colours.Tartans come in a wide variety of colours and patterns.1970s [Missoni] tartan knit jumper (sweater) and skirt set

**Tartan** ( [Scottish Gaelic]: _breacan_[\[ˈpɾʲɛxkən\]]), also known as **plaid** ( especialy in American English, is a patterned cloth consisting of crossing horizontal and vertical bands in multiple colours, forming repeating symmetrical patterns known as _setts_. Tartan patterns vary in complexity, from simple two-colour designs to intricate motifs with over twenty hues. Originating in woven wool, tartan is most strongly associated with [Scotland], where it has been used for centuries in traditional clothing such as the [kilt]. Specific tartans are linked to [Scottish clans], families, or regions, with patterns and colours derived historically from local natural dyes (now supplanted by artificial ones). Tartans also serve institutional roles, including [military uniforms] and organisational branding.

Tartan became a symbol of [Scottish identity], especially from the 17th century onward, despite a ban under the [Dress Act 1746] lasting about two generations following the [Jacobite rising of 1745]. The 19th-century [Highland Revival] popularized tartan globally, associating it with [Highland dress] and the [Scottish diaspora]. Today, tartan is used worldwide in clothing, accessories, and design, transcending its traditional roots. Modern tartans are registered for organisations, individuals, and commemorative purposes, with thousands of designs in the [Scottish Register of Tartans].

While often linked to Scottish heritage, tartans exist in other cultures, such as Africa, East and South Asia, and Eastern Europe. The earliest surviving samples of tartan-style cloth are around 3,000 years old and were discovered in [Xinjiang], China.

## Etymology and terminology

\[ [edit] \]

The [English] and [Scots] word _tartan_ is possibly derived from [French] _tiretaine_ meaning ' [linsey-woolsey] cloth'.[\[1\]] [\[2\]] [\[3\]] Other hypotheses are that it derives from [Scottish Gaelic] _tarsainn_ or _tarsuinn_, meaning 'across' or 'crossing over';[\[2\]] [\[3\]] or from French _tartarin_ or _tartaryn_ (occurring in 1454 spelled _tartyn_)[\[4\]] meaning ' [Tartar cloth] '.[\[1\]] It is unrelated to the superficially similar word _[tarlatan] _, which refers to a very open-weave [muslin] similar to [cheesecloth]. _Tartan_ is both a [mass noun] ("12 metres of tartan") and a [count noun] ("12 different tartans").

Today, _tartan_ refers to coloured patterns, though originally did not have to be made up of a pattern at all, as it referred to the type of weave; as late as the 1820s, some tartan cloth was described as "plain coloured ... without pattern".[\[5\]] [\[6\]] Patterned cloth from the [Gaelic] -speaking [Scottish Highlands] was called _breacan_, meaning 'many colours'. Over time, the meanings of _tartan_ and _breacan_ were combined to describe a certain type of pattern on a certain type of cloth.[\[6\]] 

The pattern of a particular tartan is called its _sett_. The sett is made up of a series of lines at specific widths which cross at [right angles] and blend into each other;[\[6\]] the longer term _setting_ is occasionally used.[\[7\]] _Sett_ can refer to either the minimal visual presentation of the complete tartan pattern or to a textual representation of it (in a _thread count_).[\[6\]] 

Today _tartan_ is used more generally to describe the pattern, not limited to textiles, appearing on media such as paper, plastics, packaging, and wall coverings.[\[8\]] [\[6\]] [\[9\]] [\[10\]] 

In North America, the term _plaid_ is commonly used to refer to tartan.[\[11\]] [\[12\]] [\[13\]] [\[a\]] _Plaid_, derived from the Scottish Gaelic _plaide_ meaning 'blanket',[\[16\]] [\[b\]] was first used of any rectangular garment, sometimes made up of tartan,[\[c\]] which could be worn several ways: the [belted plaid] (_breacan féile_) or "great kilt" which preceded the modern [kilt]; the [arisaid] (_earasaid_), a large shawl that could be wrapped into a dress; and several types of shoulder cape, such as the [full plaid] and [fly plaid]. In time, _plaid_ was used to describe blankets themselves.[\[12\]] In former times, the term _plaiding_[\[20\]] or _pladding_[\[21\]] was sometimes used to refer to tartan cloth.

### Weaving construction

\[ [edit] \]

Visualisation of 2/2 twill weave: the black weft threads go two over then two under the orange warp threads, staggered by one thread each pass (resulting in a diagonal pattern). In the actual cloth, the white gaps would be closed.

The [Scottish Register of Tartans] provides the following summary definition of tartan:[\[22\]] 

> Tartan (the design) is a pattern that comprises two or more different solid-coloured stripes that can be of similar but are usually of differing proportions that repeat in a defined sequence. The sequence of the warp colours (long-ways threads) is repeated in same order and size in the weft (cross-ways threads). The majority of such patterns (or setts) are symmetrical, i.e. the pattern repeats in the same colour order and proportions in every direction from the two pivot points. In the less common asymmetric patterns, the colour sequence repeats in blocks as opposed to around alternating pivots but the size and colour sequence of warp and weft remain the same.

Close-up view of traditional tartan cloth, showing pattern of diagonal "ribs" of colour; this is a five-colour tartan, in scarlet red, black, yellow, azure blue, and crimson red.

In more detail, traditional tartan cloth is a tight, staggered 2/2 [twill] weave of [worsted] wool: the horizontal _[weft] _ (also _woof_ or _fill_) is woven in a simple arrangement of two-over-two-under the fixed, vertical _[warp] _, advancing one thread at each pass.[\[15\]] As each thread in the weft crosses threads in the warp, the staggering by one means that each warp thread will also cross two weft threads. The result, when the material is examined closely, is a characteristic 45-degree diagonal pattern of "ribs" where different colours cross.[\[6\]] [\[23\]] Where a thread in the weft crosses threads of the same colour in the warp, this produces a solid colour on the tartan, while a weft thread crossing warp threads of a different colour produces an equal admixture of the two colours alternating, producing the appearance of a third colour – a halftone _blend_ or _mixture_ – when viewed from further back.[\[24\]] [\[6\]] [\[22\]] (The effect is similar to multicolour [halftone] printing, or [cross-hatching] in coloured-pencil art.)[\[6\]] Thus, a set of two base colours produces three different colours including one blend, increasing [quadratically] with the number of base colours; so a set of six base colours produces fifteen blends and a total of twenty-one different perceived colours.[\[6\]] [\[25\]] [\[d\]] This means that the more stripes and colours used, the more blurred and subdued the tartan's pattern becomes.[\[6\]] [\[24\]] Unlike in simple _[checker (chequer)] _ or _dicing_ patterns (like a chessboard), no solid colour in a tartan appears next to another solid colour, only a blend[\[6\]] (solid colours may touch at their corners).[\[26\]] 

James D. Scarlett (2008) offered a definition of a usual tartan pattern (some types of tartan deviate from the particulars of this definition – see below):[\[6\]] 

> The unit of tartan pattern, the _sett_, is a square, composed of a number of rectangles, square and oblong, arranged symmetrically around a central square. Each of these elements occurs four times, at intervals of ninety degrees, and each is rotated ninety degrees in relation to its fellows. The proportions of the elements are determined by the relative widths of the stripes that form them.

The sequence of thread colours in the _sett_ (the minimal design of the tartan, to be duplicated[\[6\]]  – "the DNA of a tartan"),[\[27\]] starts at an edge and either reverses or (rarely) repeats on what are called _pivot points_ or _pivots_.[\[28\]] In diagram A, the sett begins at the first pivot, reverses at the second pivot, continues, then reverses again at the next pivot, and will carry on in this manner horizontally. In diagram B, the sett proceeds in the same way as in the warp but vertically. The diagrams illustrate the construction of a typical _symmetric_[\[29\]] (also _symmetrical_,[\[27\]] _reflective_,[\[27\]] _reversing_,[\[30\]] or _mirroring_)[\[31\]] [\[e\]] tartan. However, on a rare _asymmetric_[\[33\]] ( _asymmetrical_,[\[27\]] or _non-reversing_)[\[33\]] [\[f\]] tartan, the sett does not reverse at the pivots, it just repeats at them.[\[g\]] An old term for the latter type is _cheek_ or _cheeck_ pattern.[\[35\]] Also, some tartans (very few among traditional Scottish tartans) do not have exactly the same sett for the warp and weft. This means the warp and weft will have differing thread counts (see below).[\[h\]] Asymmetric and differing-warp-and-weft patterns are more common in madras cloth (see [§ Indian madras], below) and some other weaving traditions than in Scottish tartan.

- Diagram A, the warp

- Diagram B, the weft

- Diagram C, the tartan. The combining of the warp and weft.


A tartan is recorded by counting the threads of each colour that appear in the sett.[\[i\]] The _[thread count] _ (or _threadcount_, _thread-count_) not only describes the width of the stripes on a sett, but also the colours used (typically abbreviated).[\[27\]] Usually every number in a thread count is an even number[\[42\]] to assist in manufacture. The first and last threads of the thread count are the pivots.[\[28\]] A thread count combined with exact colour information and other weaving details is referred to as a _ticket stamp_[\[43\]] or simply _ticket_.[\[44\]] 

Tartan weaving in [Lochcarron], [Scottish Highlands] 

There is no universally standardised way to write a thread count,[\[37\]] but the different systems are easy to distinguish. As a simple example:

- The thread count "/K4 R24 K24 Y4/" corresponds to a mirroring pattern of 4 black threads, 24 red threads, 24 black threads, 4 yellow threads, in which the beginning black and ending yellow pivots are _not_ repeated (after Y4/, the colours are reversed, first K24 then R24); this is a " _full-count_ at the pivots" thread count.[\[28\]] 
 - An equivalent notation is boldfacing the pivot abbreviations: **K** 4 R24 K24 **Y** 4.
- The same tartan could also be represented as "K/2 R24 K24 Y/2", in markup that indicates that the leading black and trailing yellow _are_ duplicated before continuing from these pivot points (after Y/2, the colours are reversed as Y/2 again, then K24, then R24); this is a " _half-count_ at the pivots" thread count.[\[27\]] 
- In the older and potentially ambiguous style of thread-counting, without the "/" (or bold) notation, a thread count like "K4 R24 K24 Y4" is assumed to be full-count at the pivots, unless the author clearly indicates otherwise.[\[28\]] [\[37\]] [\[j\]] 

In all of these cases, the result is a _half-sett_ thread count, which represents the threading before the pattern mirrors and completes; a full-sett thread count for a mirroring (symmetric) tartan is redundant.[\[37\]] A "/" can also be used between two colour codes (e.g. "W/Y24" for "white/yellow 24") to create even more of a shorthand threadcount for simple tartans in which half of the half-sett pattern is different from the other only in the way of a colour swap;[\[45\]] but this is not a common style of thread-counting.

- An asymmetric tartan, one that does not mirror, would be represented in a _full-sett_ thread count with "..." markup, as "...K4 R24 K24 Y4..." (after Y4, the entire pattern would begin again from K4).[\[27\]] 

Various writers and tartan databases do not use a consistent set of colour names (see [§ Colour, palettes, and meaning], below) and abbreviations,[\[46\]] so a thread count may not be universally understandable without a colour [key/legend]. Some recorders prefer to begin a thread count at the pivot with the colour name (or abbreviation) that is first in alphabetical order (e.g. if there is a white pivot and a blue one, begin with blue),[\[27\]] but this is actually arbitrary.

Though thread counts are quite specific, they can be modified depending on the desired size of the tartan. For example, the sett of a tartan (e.g., 6 inches square – a typical size for kilts)[\[27\]] may be too large to fit upon the face of a [necktie]. In this case, the thread count would be reduced _in proportion_ (e.g. to 3 inches to a side).[\[37\]] In some works, a thread count is reduced to the smallest even number of threads (often down to 2) required to accurately reproduce the design;[\[28\]] in such a case, it is often necessary to up-scale the thread count proportionally for typical use in kilts and plaids.

Before the 19th century, tartan was often woven with thread for the weft that was up to 1/3 thicker than the fine thread used for the warp,[\[6\]] which would result in a rectangular rather than square pattern; the solution was to adjust the weft thread count to return the pattern to square,[\[23\]] or make it non-square on purpose, as is still done in a handful of traditional tartans.[\[h\]] Uneven warp-and-weft thread thickness could also contribute to a striped rather than checked appearance in some tartan samples.[\[47\]] 

The predominant colours of a tartan (the widest bands) are called the _under-check_ (or _under check_, _undercheck_, _under-cheque_);[\[48\]] sometimes the terms _ground_,[\[k\]] _background_,[\[50\]] or _base_[\[50\]] are used instead, especially if there is only one dominant colour. Thin, contrasting lines are referred to as the _over-check_[\[51\]] [\[50\]] (also _over-stripe_ or _overstripe_).[\[52\]] Over-checks in pairs are sometimes referred to as _[tram] lines_, _tramlines_, or _tram tracks_.[\[53\]] Bright over-checks are sometimes bordered on either side (usually both), for extra contrast, by additional thin lines, often black, called _guard lines_ or _guards_.[\[53\]] Historically, the weaver William Wilson & Son of Bannockburn sometimes wove bright over-checks in silk, to give some added shine[\[54\]] [\[55\]] (commercially around 1820–30, but in regimental officers' plaids back to at least 1794).[\[56\]] [\[l\]] Tartan used for plaids (not the belted plaid) often have a [purled] fringe.[\[57\]] 

Zoom-in on a bagpiper's [full plaid] ( [royal Stuart tartan]), showing the purled fringe style typical for such garments

An old-time practice, to the 18th century, was to add an accent on plaids or sometimes kilts in the form of a _[selvedge] _ in [herringbone weave] at the edge, 1–3 inches (2.5–7.6 cm) wide, but still fitting into the colour pattern of the sett;[\[57\]] [\[58\]] a few modern weavers will still produce some tartan in this style. Sometimes more decorative selvedges were used: _Selvedge marks_ were borders (usually on one side only) formed by repeating a colour from the sett in a broad band (often in herringbone), sometimes further bordered by a thin strip of another colour from the sett or decorated in mid-selvedge with two thin strips; these were typically used for the bottoms of belted plaids and kilts,[\[57\]] [\[59\]] and were usually black in military tartans, but could be more colourful in civilian ones.[\[60\]] The more elaborate _selvedge patterns_ were a wider series of narrow stripes using some or all of the colours of the sett; these were almost exclusively used on household tartans (blankets, curtains, etc.), and on two opposing sides of the fabric.[\[60\]] [\[57\]] The very rare _total border_ is an all-four-sides selvedge of a completely different sett; described by Peter Eslea MacDonald (2019) as "an extraordinarily difficult feature to weave and can be regarded as the zenith of the tartan weaver's art",[\[57\]] it only survives in Scottish-style tartan as a handful of 18th-century samples (in Scotland[\[61\]] and [Nova Scotia], Canada, but probably all originally from Scotland).[\[62\]] The style has also been used [in Estonia in the weaving of _suurrätt_ shawls/plaids].

- 18th-century tartan with a herringbone _selvedge_ at the bottom


( [detail])

- Black Watch tartan with a _selvedge mark_ at the bottom (also herringbone)


( [detail])

- Wilsons 1819 blanket tartan with a _selvedge pattern_ on the right

- Bottom-right corner of blanket with _total border_ selvedge; approximation based on photo of real blanket discovered in Nova Scotia, but probably Scottish, c. 1780s


Tartan is usually woven _balanced-warp_ (or just _balanced_), repeating evenly from a pivot point at the centre outwards and with a complete sett finishing at the outer selvedge;[\[7\]] [\[63\]] [\[64\]] e.g. a piece of tartan for a plaid might be 24 setts long and 4 wide. An _offset_, _off-set_, or _unbalanced_ weave is one in which the pattern finishes at the edge in the middle of a pivot colour; this was typically done with pieces intended to be joined (e.g. for a belted plaid or a blanket) to make larger spans of cloth with the pattern continuing across the seam;[\[7\]] [\[64\]] if the tartan had a selvedge mark or selvedge pattern, it was at the other side of the warp.[\[65\]] 

The term _hard tartan_ refers to a version of the cloth woven with very tightly wound, non-fuzzy thread, producing a comparatively rougher and denser (though also thinner) material than is now typical for kilts.[\[66\]] [\[67\]] It was in common use up until the 1830s.[\[47\]] There are extant but uncommon samples of hard tartan from the early 18th century that use the more intricate herringbone instead of twill weave throughout the entire cloth.[\[68\]] 

While modern tartan is primarily a commercial enterprise on large [power looms], tartan was originally the product of rural weavers of the pre-industrial age, and can be produced by a dedicated hobbyist with a strong, stable [hand loom].[\[69\]] [\[70\]] [\[71\]] Since around 1808, the traditional size of the warp [reed] for tartan is 37 inches (94 cm), the length of the Scottish [ell] (previous sizes were sometimes 34 and 40 inches).[\[72\]] Telfer Dunbar (1979) describes the setup thus:[\[72\]] 

> The reed varies in thickness according to the texture of the material to be woven. A thirty-Porter (which contains 20 splits of the reed) or 600-reed, is divided into 600 openings in the breadth of 37 inches. Twenty of these openings are called a Porter and into each opening are put two threads, making 1,200 threads of warp and as many of weft in a square yard of tartan through a 30-Porter reed.

_Splits_ are also referred to as _dents_, and _Porters_ are also called _gangs_.[\[73\]] 

### Styles and design principles

\[ [edit] \]

Traditional tartan patterns can be divided into several style classes. The most basic is a simple two-colour check of thick bands (with or without thin over-checks of one or more other colours). A variant on this splits one or more of the bands, to form squares of smaller squares instead of just big, solid squares; a style heavily favoured in _[Vestiarium Scoticum] _. A complexity step up is the superimposed check, in which a third colour is placed centrally "on top of" or "inside" (surrounded by) one of the base under-check colours, providing a pattern of nested squares, which might then also have thin, bright and/or black over-checks added. Another group is multiple checks, typically of two broad bands of colour on a single dominant "background" (e.g. red, blue, red, green, red – again possibly with contrasting narrow over-checks). The aforementioned types can be combined into more complex tartans. In any of these styles, an over-check is sometimes not a new colour but one of the under-check colours "on top of" the other under-check. A rare style, traditionally used for [arisaid] (_earasaid_) tartans but no longer in much if any Scottish use, is a pattern consisting entirely of thin over-checks, sometimes grouped, "on" a single ground colour, usually white.[\[74\]] M. Martin (1703) reported that the line colours were typically blue, black, and red.[\[75\]] Examples of this style do not survive,[\[76\]] at least not in the tartan databases (there may be preserved museum pieces with such patterns).[\[m\]] Some tartan patterns are more abstract and do not fit into any of these styles,[\[78\]] especially in madras cloth (see [§ Indian madras], below).

- **Most basic check** – [MacGregor red-and-black] (Rob Roy), as simple as it gets: equal proportions of two colours.

- **Basic check modified** – [Wallace] red/dress, black on a slightly larger ground of red, laced with yellow and black over-checks.

- **Split check** – [MacGregor] red-and-green with a wide green band split into three to form a "square of squares", then laced with a white over-check.

- **Superimposed check** – [Ruthven], a red ground with a big green stripe "inside" a bigger blue one, then white and green over-checks.

- **Multiple checks** – [Davidson], a green ground with equal blue and black bands, then with red, blue, and black over-checks.

- **Complex example** – [Ross], combines split-check and multiple-check styles, with one blue and two green split checks on red, with blue and green over-checks.


There are no codified rules or principles of tartan design, but a few writers have offered some considered opinions. Banks & de La Chapelle (2007) summarized, with a view to broad, general tartan use, including for fashion: "Color – and how it is worked – is pivotal to tartan design.... Thus, tartans should be composed of clear, bright colors, but ones sufficiently soft to blend well and thereby create new shades." James D. Scarlett (2008) noted: "the more colours to begin with, the more subdued the final effect",[\[50\]] or put more precisely, "the more stripes to the sett and the more colours used, the more diffuse and 'blurred' the pattern".[\[6\]] That does not necessarily translate into subtlety; a tartan of many colours and stripes can seem "busy".[\[79\]] 

Scarlett (2008), after extensive research into historical Highland patterns (which were dominated by rich red and medium green in about equal weight with dark blue as a blending accent – not accounting for common black lines), suggested that for a balanced and _traditional_ style:[\[6\]] 

> any basic tartan type of design should have for its background, a "high impact" colour and two others, of which one should be the [complement] to the first and the other a darker and more neutral shade; other colours, introduced to break up the pattern or as accents, should be a matter of taste. It is important that no colour should be so strong as to "swamp" another; otherwise, the blending of colours at the crossing will be adversely affected. ... Tartan is a complex abstract art-form with a strong mathematical undertone, far removed from a simple check with a few lines of contrasting colours scattered over it.

Scarlett (1990) provided a more general explanation, traditional styles aside:[\[80\]] 

> Colours for tartan work require to be clear and unambiguous and bright but soft, to give good contrast of both colour and brightness and to mix well so as to give distinctly new shades where two colours cross without any one swamping another.

Further, Scarlett (1990) held that "background checks will show a firm but not harsh contrast and the overchecks will be such as to show clearly" on the under-check (or "background") colours.[\[50\]] He summed up the desired total result as "a harmonious blend of colour and pattern worthy to be looked upon as an art form in its own right".[\[81\]] 

Omitting traditional black lines has a strong softening effect, as in the 1970s Missoni fashion ensemble (top right) and in many madras patterns (see [§ Indian madras], below). A Scottish black-less design (now the [Mar] dress tartan) dates to the 18th century;[\[82\]] another is Ruthven (1842, above), and many of the Ross tartans (e.g. 1886, above), as well as several of the Victorian–Edwardian [MacDougal\[l\]] designs,[\[83\]] are further examples. Various modern tartans also use this effect, e.g. Canadian Maple Leaf (1964, at [§ Regional], below). Clever use of black or another dark colour can produce a [visual perception] of depth.[\[84\]] 

### Colour, palettes, and meaning

\[ [edit] \]

The brighter of the [MacLeod] tartans, known affectionately as the "loud MacLeod", in the saturated _modern_ palette.

There is no set of exact colour standards for tartan hues; thread colour varies from weaver to weaver even for "the same" colour.[\[85\]] A certain range of general colours, however, are traditional in Scottish tartan. These include blue (dark), crimson (rose or dark red), green (medium-dark), black, grey (medium-dark), purple, red (scarlet or bright), tan/brown, white (actually natural undyed wool, called _lachdann_ in Gaelic),[\[86\]] [\[n\]] and yellow.[\[45\]] [\[6\]] [\[o\]] Some additional colours that have been used more rarely are azure (light or sky blue), maroon, and vert (bright or grass green),[\[45\]] plus light grey (as seen in Balmoral tartan, though it is sometimes given as lavender).[\[89\]] Since the opening of the tartan databases to registration of newly designed tartans, including many for organisational and fashion purposes, a wider range of colours have been involved, such as orange[\[90\]] and pink,[\[91\]] which were not often used (as distinct colours rather than as renditions of red) in old traditional tartans.[\[p\]] The [Scottish Register of Tartans] uses a long list of colours keyed to hexadecimal " [Web colours] ", sorting groups of hues into a constrained set of basic codes (but expanded upon the above traditional list, with additional options like dark orange, dark yellow, light purple, etc.).[\[92\]] This helps designers fit their creative tartan into a coding scheme while allowing weavers to produce an approximation of that design from readily stocked yarn supplies.

In the mid-19th century, the [natural dyes] that had been [traditionally used in the Highlands] [\[24\]] [\[93\]] [\[94\]] [\[q\]] (like various [lichens], [alder] bark, [bilberry], [cochineal], [heather], [indigo], [woad], and [yellow bedstraw]) began to be replaced by [artificial dyes], which were easier to use and were more economic for the booming tartan industry,[\[95\]] though also less subtle.[\[96\]] Although [William Morris] in the late-19th-century [Arts and Crafts movement] tried to revive use of British natural dyes, most were so low-yield and so inconsistent from locality to locality (part of the reason for the historical tartan differentiation by area) that they proved to have little mass-production potential, despite some purple dye ( [cudbear]) commercialisation efforts in Glasgow in the 18th century.[\[95\]] The hard-wound, fine wool used in tartan weaving was rather resistant to natural dyes, and some dye baths required days or even weeks.[\[95\]] The dyeing also required [mordants] to fix the colours permanently, usually metallic salts like [alum]; there are records from 1491 of alum being imported to [Leith], though not necessarily all for tartan production in particular.[\[97\]] Some colours of dye were usually imported, especially red cochineal and to some extent blue indigo (both expensive and used to deepen native dyes), from the [Low Countries], with which Scotland had extensive trade since the 15th century.[\[98\]] Aged human urine (called _fual_ or _graith_) was also used, as a colour-deepener, a dye [solubility] agent, a lichen [fermenter], and a final colour-fastness treatment.[\[99\]] All commercially manufactured tartan today is coloured using artificial not natural dyes, even in the less saturated colour palettes.[\[100\]] [\[101\]] 

The hues of colours in any established tartan can be altered to produce variations of the same tartan. Such varying of the hues to taste dates to at least the 1788 pattern book of manufacturer William Wilson & Son of Bannockburn.[\[102\]] Today, the semi-standardised colour schemes or _palettes_ (what marketers might call " [colourways] ")[\[103\]] are divided generally into _modern_, _ancient_, _muted_, and _weathered_ (sometimes with other names, depending on weaver). These terms only refer to relative [dye] ["colourfulness" saturation] levels and do not represent distinct tartans.[\[104\]] [\[105\]] 

ModernAlso known as _ordinary_; refers to darker tartan, with fully saturated colours.[\[101\]] [\[105\]] In a _modern_ palette, setts made up of blue, black, and green tend to be obscured because of the darkness of the colours in this scheme.[\[101\]] AncientAlso known as _old colours_ (OC); refers to a lighter palette of tartan. These hues are ostensibly meant to represent the colours that would result from natural-dyed fabric aging over time. However, the results are not accurate (e.g., in real examples of very old tartan, black often fades toward khaki[\[101\]] or green[\[106\]] while blue remains dark;[\[101\]] and natural dyes are capable of producing some very vibrant colours in the first place, though not very consistently).[\[105\]] [\[80\]] [\[107\]] This style originated in the first half of the 20th century.[\[108\]] [\[105\]] This _ancient_ is not to be confused with the same word in a few names of tartans such as "ancient Campbell".WeatheredAlso called _faded_; refers to tartan that is even lighter (less saturated) than _ancient_, as if exposed for a very long time.[\[105\]] This style was invented in the late 1940s.[\[108\]] MutedRefers to tartan which is between _modern_ and _ancient_ in vibrancy. Although this type of colouring is very recent, dating only from the early 1970s, these hues are thought to be the closest match to the colours attained by natural dyes used before the mid-19th century.[\[105\]] 

Some particular tartan mills have introduced other colour schemes that are unique to that weaver and only available in certain tartans. Two examples are Lochcarron's _antique_,[\[105\]] between _modern_ and _ancient_; and D. C. Dalgliesh's _reproduction_, a slight variation on _weathered_,[\[104\]] dating to the 1940s and claimed to be based on 18th-century samples.[\[109\]] 

A general observation about _ancient/old_, _weathered/faded_, and _muted_ are that they rather uniformly reduce the saturation of all colours, while actual natural-dyed tartan samples show that the historical practice was usually to pair one or more saturated colours with one or more pale ones, for greater clarity and depth, a "harmonious balance".[\[110\]] [\[105\]] [\[104\]] According to Scarlett (1990): "The colours were clear, bright and soft, altogether unlike the eye-searing brilliance or washed-out dullness of modern tartans".[\[81\]] 

The same tartan in the same palette from two manufacturers will not precisely match; there is considerable artistic license involved in exactly how saturated to make a hue.[\[101\]] 

Tartan-generation software can approximate the appearance of a tartan in any of these palettes. The examples below are all the "Prince Charles Edward Stuart" tartan:[\[111\]] 

- _Modern_ palette

- _Ancient_ or _old colours_ palette

- _Weathered_ or _faded_ palette

- _Muted_ palette

- Lochcarron-style _antique_ palette

- D. C. Dalgliesh-style _reproduction_ palette


Scottish tartans that use two or more hues of the same basic colour are fairly rare. The best known is the British royal family's Balmoral[\[112\]] (1853, two greys, both as under-check – see illustration at [§ Family and individual], below). Others include: [Akins] [\[113\]] (1850, two reds, one as over-check and sometimes rendered purple), [MacBean] [\[114\]] (1872, two reds, one as over-check and sometimes rendered purple), Childers Universal regimental[\[115\]] (1907, two greens, both under-check), [Gordon] red[\[116\]] (recorded 1930–1950 but probably considerably older; two blues and two reds, one of each used more or less as over-checks), [Galloway] district hunting/green[\[117\]] [\[118\]] (1939/1950s, two greens, both under-check), [US Air Force Reserve] Pipe Band[\[119\]] (1988, two blues, both under-check), [McCandlish] [\[120\]] [\[121\]] [\[122\]] (1992, three variants, all under-check), [Isle of Skye] district[\[123\]] (1992, three greens, all arguably under-check, nested within each other), and [Chisholm] Colonial[\[124\]] (2008, two blues, one an over-check, the other nearly blended into green). The practice is more common in very recent commercial tartans that have no association with Scottish families or districts, such as the [Loverboy] fashion label tartan[\[125\]] (2018, three blues, one an over-check).

The idea that the various colours used in tartan have a specific meaning is purely a modern one,[\[126\]] notwithstanding a legend that red tartans were "battle tartans", designed so they would not show blood. It is only recently created tartans, such as [Canadian provincial and territorial tartans] (beginning 1950s) and [US state tartans] (beginning 1980s), that are stated to be designed with certain symbolic meaning for the colours used. For example, green sometimes represents [prairies] or forests, blue can represent lakes and rivers, and yellow might stand for various crops.[\[127\]] In the _Scottish Register of Tartans_ (and the databases before it), colour inspiration notes are often recorded by a tartan's designer. However, there is no common set of tartan colour or pattern "motifs" with [allusive] meanings that is shared among designers.[\[r\]] 

More abstractly, from an [art criticism] perspective, design historian [Richard Martin] (1988) wrote of tartans as designs and tartan as a textile class having no truly endemic or objectified meanings, but being an art that "has the property of being a vessel or container of meaning, a design form that exists not only in history but through history", capable of conveying radically different, even contradictory, contextual meanings "ever changing and evolving" through socio-cultural transmutation of the fabric's use. Thus tartan could veer from symbol of anti-union and Jacobite Highland rebellion to emblem of pan-British loyalty to empire in the space of two generations, or serve different fashion markets in the same recent decades as both a sartorial status symbol of traditional values and a punk and grunge rebel banner.[\[130\]] 

### Pre-medieval origins

\[ [edit] \]

Today, tartan is mostly associated with Scotland; however, the oldest tartan-patterned twill cloth[\[131\]] ever discovered dates to a heterogenous culture of the [Tarim Basin], c. 2100 BC through the first centuries AD[\[131\]] [\[132\]] [\[133\]] in what today is [Xinjiang], China, southeast of Kazakhstan. The tartan fabric (along with other types of simple and patterned cloth) was recovered, in excavations beginning in 1978, with other grave goods of the [Tarim or Ürümqi mummies] [\[134\]]  – a group of often [Caucasoid] (light-haired, round-eyed)[\[135\]] [\[136\]] bodies naturally preserved by the arid desert rather than intentionally [mummified]. The most publicised of them is the [Chärchän Man], buried around 1,000 BC with tartan-like leggings in the [Taklamakan Desert].[\[137\]] [\[134\]] Other twill tartan samples (with differing warp and weft) were recovered in the region from the site of Qizilchoqa in 1979, dating to around 1,200 BC; the material was woven with up to six colours and required a sophisticated loom[\[131\]] [\[138\]] (of a type that seems to have originated in the West).[\[134\]] [\[s\]] [Victor H. Mair], an archaeologist and linguist involved in the excavations wrote: "The ancient inhabitants ... undoubtedly had vibrant interactions with peoples of West Central Asia and even further west, since their magnificent textiles possess motifs, dyes, and weaves that are characteristic of cultures that lie in that direction."[\[131\]] 

Textile analysis of that fabric has shown it to be similar to that of ancient Europe.[\[139\]] According to textile historian [Elizabeth J. Wayland Barber], the late [Bronze Age] to early [Iron Age] people of Central Europe, the [Hallstatt culture], which is linked with ancient [Celtic] populations and flourished between the 8th and 6th centuries BC, produced tartan-like textiles. Some of them were discovered in 2004, remarkably preserved, in the Hallstatt salt mines near [Salzburg], Austria; they feature a mix of natural-coloured and dyed wool.[\[2\]] [\[t\]] Some date as early as 1200 BC, and Wayland Barber says of them that: "The overall similarities between Hallstatt plaid twills and recent Scottish ones, right down to the typical weight of the cloth, strongly indicate continuity of tradition. The chief difference is that the Hallstatt plaids contain no more than two colors".[\[140\]] Similar finds have been made elsewhere in Central Europe and Scandinavia.[\[6\]] 

Classical Roman writers made various references to the continental [Gauls], south of Britain, wearing striped or variegated clothing; Latin seems to have lacked an exact word for 'checked'. For example, [Virgil] in the [Aeneid] (29–19 BC, book VIII, line 660) described the Gauls as _virgatis lucent sagulis_ (or _sagalis_) meaning something like 'they shine in striped cloaks' or 'their cloaks are striped brightly'.[\[141\]] [\[142\]] [\[143\]] Other writers used words such as _pictae_ and _virgatae_[\[144\]] with translations like 'marled', 'variegated', 'particoloured', etc. Scarlett (1990) warns: "What is not reasonable is the ready assumption by many modern authors that every time one of these words, or something like it, was used, tartan was intended."[\[145\]] It might have been intended sometimes, or the writer might have just meant linear stripes like [seersucker] cloth. Both Scarlett and Thompson (1992) decry the unsustainable assumption by a few earlier modern writers (e.g. James Grant, 1886) that Gauls must have been running around in clan tartans.[\[145\]] [\[141\]] The Romans particularly wrote of Gauls as wearing striped _[braccae] _ (trousers). E. G. Cody in remarks in his 1885 edition of [John Lesley] 's _Historie of Scotland_ hypothesized that this was actually a Gaulish [loanword] and was [cognate] with Gaelic _breacan_.[\[144\]] This is one of many "tartan legends" that is not well accepted; rather, _braccae_ is considered by modern [linguists] a cognate of English _breeches_, Gaelic _briogais_ ('trousers'), etc.[\[146\]] 

The earliest documented tartan-like cloth in Britain, known as the "Falkirk tartan",[\[147\]] dates from the 3rd century AD.[\[148\]] It was uncovered at [Falkirk] in Stirlingshire, Scotland, near the [Antonine Wall]. The fragment, held in the [National Museum of Scotland], was stuffed into the mouth of an earthenware pot containing almost 2,000 [Roman] coins.[\[149\]] The Falkirk tartan has a simple ["Border check"] design, of undyed light and dark wool.[\[u\]] Other evidence from this period is the surviving fragment of a statue of [Roman Emperor] [Caracalla], once part of the [triumphal arch] of [Volubilis] completed in 217 AD. It depicts a [Caledonian] [Pictish] prisoner wearing tartan [trews] (represented by carving a checked design then [inlaying] it with bronze and silver [alloys] to give a variegated appearance).[\[150\]] [\[v\]] Based on such evidence, tartan researcher James D. Scarlett (1990) believes Scottish tartan to be "of Pictish or earlier origin",[\[151\]] though Brown (2012) notes there is no way to prove or disprove this.[\[152\]] 

Early forms of tartan like this are thought to have been invented in pre-Roman times, and would have been popular among the inhabitants of the northern [Roman provinces] [\[153\]] [\[154\]] as well as in other parts of [Northern Europe] such as [Jutland], where the same pattern was prevalent,[\[155\]] [\[156\]] [\[157\]] and [Bronze Age] [Sweden].[\[158\]] 

That [twill] weave was selected, even in ancient times, is probably no accident; "plain (2/2) twill for a given gauge of yarn, yields a cloth 50% heavier \[denser\] – and hence more weather-proof – than the [simple 1/1 weave]."[\[6\]] According to Scarlett (2008):[\[6\]] 

> \[T\]here are sound reasons why such a type of pattern-textile should have developed almost automatically in isolated, self-sufficient ... communities. Such communities are unlikely to possess large dye-vats, and so cannot piece-dye woven cloth; such processes as [batik] and [tie-dye] are unavailable. ... Stripes are the practical solution, since they use small quantities of a colour at a time and are interspersed with other colours, but the scope is limited ...; stripes across both brighten the colours and add many mixtures. From there on it is really only a matter of getting organised; the now-geometric pattern reduces to a small unit, easier to remember and to follow in a world where little was written down; it is further simplified by being split into two equal halves and, with weft as warp, the weft pattern can be followed from the warp.

Detail of Spanish altarpiece by the "Master of [Estamariu] ", late 14th century, showing a particoloured [cotehardie] with a three-colour, complex tartan

There is little written or pictorial evidence about tartan (much less surviving tartan cloth) from the [medieval era]. Tartan use in Britain between the 3rd-century Falkirk tartan and 16th-century samples, writings, and art is unclear.[\[159\]] [\[160\]] [Cosmo Innes] (1860) wrote that, according to medieval [hagiographies], Scots of the 7th–8th centuries "used cloaks of variegated colour, apparently of home manufacture".[\[161\]] Based on similarities of tartans used by various clans, including the [Murrays], [Sutherlands], and [Gordons], and the history of their family interactions over the centuries, [Thomas Innes of Learney] estimated that a regional "parent" pattern, of a more general style, might date to the 12th or 13th century,[\[162\]] but this is quite speculative. The [cartularies] of [Aberdeen] in the 13th century barred clergymen from wearing "striped" clothing, which could have referred to tartan.[\[163\]] 

In 1333, Italian [Gothic artists] [Simone Martini] and [Lippo Memmi] produced the _[Annunciation with St. Margaret and St. Ansanus] _, a wood-panel painting in [tempera] and [gold leaf]. It [features the archangel Gabriel in a tartan-patterned mantle], with light highlights where the darker stripes meet, perhaps representing jewels, [embroidery], or [supplementary weaving]. Art historians consider it an example of ["Tartar" (Mongol) textile influence]; it likely has no relation to Scottish tartan.[\[w\]] "Tartar" cloth came in a great array of patterns, many more complex than tartan (such as the fine detail in Gabriel's robe in the same painting); patterns of this sort were influential especially on Italian art in the 14th century.

There are several other continental European paintings of tartan-like garments from around this era (even back to the 13th century), but most of them show very simple two-colour [basic check] patterns, or (like the Martini and Memmi _Annunciation_ example) broad squares made by thin lines of one colour on a background of another. Any of them could represent embroidery or [patchwork] rather than woven tartan. There seems to be no indication in surviving records of tartan material being imported from Scotland in this period. In the second half of the 14th century, the artist known only as the "Master of [Estamariu] " (in Catalonia, Spain) painted an altarpiece of St Vincent, [one of the details of which] is a man in a [cotehardie] that is red on one half and a complex three-colour tartan on the other, which is very similar to later-attested Scottish tartans.

Sir [Francis James Grant], mid-20th-century [Lord Lyon King of Arms], noted that records showed the wearing of tartan in Scotland to date as far back as 1440.[\[164\]] However, it is unclear to which records he was referring, and other, later researchers have not matched this early date.

The Glen Affric tartan (c. 1500–1600 AD), discovered in a peat bog in the 1980s_Éscossois sauvage_ ('Savage Scotsman') by [Lucas de Heere], c. 1567–80

The oldest surviving sample of complex, dyed-wool tartan (not just a simple check pattern) in Scotland has been shown through [radiocarbon dating] to be from the 16th century; known as the "Glen Affric tartan", it was discovered in the early 1980s in a [peat bog] near [Glen Affric] in the Scottish Highlands; its faded colours include green, brown, red, and yellow. On loan from the [Scottish Tartans Authority], the 55 cm × 42 cm (22 in × 17 in) artefact went on display at the [V&A Dundee] museum in April 2023.[\[148\]] [\[165\]] [\[166\]] [\[167\]] [\[x\]] 

The earliest certain written reference to tartan by name is in the 1532–33 accounts of the [Treasurer of Scotland]: "Ane uthir tartane galcoit gevin to the king be the Maister Forbes" ('Another tartan coat given to the king by the Master Forbes'),[\[4\]] followed not long after by a 1538 record of clothing made by [Thomas Arthur] for King [James V of Scotland], which includes "heland tertane to be hoiss" ('Highland tartan to be [hose] ').[\[168\]] [\[169\]] [\[170\]] [\[y\]] Plaids were featured a bit earlier; poet [William Dunbar] (c. 1459 – c. 1530) mentions "Five thousand [ellis] ... Of Hieland pladdis".[\[171\]] The earliest surviving [image of a Highlander in what was probably meant to represent tartan] is a 1567–80 [watercolour] by [Lucas de Heere], showing a man in a belted, pleated yellow tunic with a thin-lined checked pattern, a light-red cloak, and tight blue shorts (of a type also seen in period Irish art), with [claymore] and [dirk].[\[172\]] It looks much like medieval illustrations of "Tartar" cloth and thus cannot be certain to represent true tartan. By the late 16th century, there are numerous references to striped or checked plaids. Supposedly, the earliest pattern that is still produced today (though not in continual use) is the Lennox district tartan,[\[173\]] (also adopted as the clan tartan of [Lennox])[\[174\]] said to have been reproduced by D. W. Stewart in 1893 from a portrait of [Margaret Douglas], Countess of Lennox, dating to around 1575.[\[175\]] However, this seems to be legend, as no modern tartan researchers or art historians have identified such a portrait, and the earliest known realistic one of a woman in tartan dates much later, to c. 1700.[\[176\]] Extant portraits of Margaret show her in [velvet] and [brocade].[\[177\]] 

Tartan and Highland dress in the [Elizabethan era] have been said to have become essentially [classless] [\[z\]]  – worn in the Highlands by everyone from high-born [lairds] to common [crofters],[\[183\]] at least by the late 16th century. The historian [John Major] wrote in 1521 that it was the upper class, including warriors, who wore plaids while the common among them wore linen, suggesting that woollen cloth was something of a luxury.[\[184\]] But by 1578, Bishop [John Lesley] of Ross wrote that the [belted plaid] was the general Highland costume of both rich and poor, with the nobility simply able to afford larger plaids with more colours.[\[181\]] (Later, Burt (1726) also wrote of gentlemen having larger plaids than commoners.)[\[20\]] If colours conveyed distinction, it was of social class not clan.[\[185\]] D. W. Stewart (1893) attributed the change, away from linen, to broader manufacture of woollen cloth and "the increased prosperity of the people".[\[181\]] 

Many writers of the period drew parallels between Irish and Highland dress, especially the wearing of a long yellow-dyed shirt called the _[léine] _ or saffron shirt (though probably not actually dyed with expensive imported [saffron]),[\[186\]] worn with a mantle (cloak) over it, and sometimes with [trews].[\[187\]] It is not entirely certain when these mantles were first made of tartan in the Highlands, but the distinctive cloth seems to get its recorded mentions first in the 16th century, starting with Major (1521). In 1556, [Jean de Beaugué], a French witness of Scottish troops at the 1548 [Siege of Haddington], distinguished Lowlanders from Highland "savages", and wrote of the latter as wearing dyed shirts "and a certain light covering made of wool of various colours".[\[188\]] [\[189\]] [\[190\]] [George Buchanan] in 1582 wrote that "plaids of many colours" had a long tradition but that the Highland fashion by his era had mostly shifted to a plainer look, especially brown tones, as a practical matter of camouflage.[\[191\]] [\[aa\]] [Fynes Moryson] wrote in 1598 (published 1617) of common Highland women wearing "plodan", "a course stuffe, of two or three colours in Checker worke".[\[194\]] 

Highland man and woman in tartan, c. 1603–1616, by [Hieronymus Tielsch]. The crude attempt to represent tartan shows a blue and green pattern with red over-check, but did not blend the colours.[\[ab\]] 

Its dense weave requiring specialised skills and equipment, tartan was not generally one individual's work but something of an early [cottage industry] in the Highlands – an often communal activity called _calanas_, including some associated [folk singing traditions]  – with several related occupational specialties (wool comber, dyer, [waulker], [warp] -winder, weaver) among people in a village, part-time or full-time,[\[196\]] especially women.[\[197\]] [\[ac\]] The [spinning wheel] was a late technological arrival in the Highlands, and tartan in this era was woven from fine (but fairly inconsistent) hard-spun yarn that was spun by hand on [drop spindles].[\[6\]] The era's commerce in tartans was centred on [Inverness], the early business records of which are filled with many references to tartan goods.[\[200\]] Tartan patterns were loosely associated with the weavers of particular areas, owing in part to differences in availability of natural dyes,[\[95\]] [\[201\]] [\[93\]] [\[202\]] and it was common for Highlanders to wear whatever was available to them,[\[9\]] often a number of different tartans at the same time.[\[203\]] [\[ad\]] The early tartans found in east-coastal Scotland used red more often, probably because of easier continental-European trade in the red dye [cochineal], while western tartans were more often in blues and greens, owing to the locally available dyes.[\[175\]] (See also [§ Colour, palettes, and meaning].) The greater expense of red dye may have also made it a [status symbol].[\[205\]] Tartan spread at least somewhat out of the Highlands, but was not universally well received. The General Assembly of the [Kirk of Scotland] in 1575 prohibited the ministers and readers of the church (and their wives) from wearing tartan plaids and other " [sumptuous] " clothing,[\[206\]] [\[207\]] while the council of Aberdeen, "a district by no means Highland", in 1576 banned the wearing of plaids (probably meaning belted plaids).[\[208\]] 

A 1594 Irish account by [Lughaidh Ó Cléirigh] of Scottish [gallowglass mercenaries] in Ireland clearly describes the belted plaid, "a mottled garment with numerous colours hanging in folds to the calf of the leg, with a girdle round the loins over the garment".[\[209\]] The privately organised early " [plantations] " (colonies) and later governmental [Plantation of Ulster] brought tartan weaving to Northern Ireland in the late 16th to early 17th centuries.[\[210\]] Many of the new settlers were Scots, and they joined the population already well-established there by centuries of gallowglass and other immigrants. In 1956, the earliest surviving piece of Irish tartan cloth was discovered in peaty loam just outside [Dungiven] in [Northern Ireland], in the form of tartan [trews], along with other non-tartan clothing items.[\[211\]] It was dubbed the "Dungiven tartan" or "Ulster tartan".[\[212\]] The sample was dated using [palynology] to c. 1590–1650[\[213\]] [\[214\]] (the soil that surrounded the cloth was saturated with pollen from [Scots pine], a species imported to Ulster from Scotland by plantationers).[\[215\]] [\[19\]] According to archaeological textile expert [Audrey Henshall], the cloth was probably woven in [County Donegal], Ireland, but the trews tailored in the Scottish Highlands[\[215\]] [\[216\]] at some expense, suggesting someone of rank,[\[217\]] possibly a gallowglass.[\[213\]] Henshall reproduced the tartan for a 1958 exhibit;[\[215\]] [\[19\]] it became popular (and heavily promoted) as a district tartan for Ulster[\[19\]] (both in a faded form, like it was found,[\[218\]] and a bright palette that attempted to reproduce what it may have originally looked like),[\[219\]] and seems to have inspired the later creation of more Irish district tartans.[\[19\]] [\[220\]] (see [§ Regional], below). There is nearly nothing in period source material to suggest that the Irish also habitually wore tartan; one of the only sources that can possibly be interpreted in support of the idea is [William Camden], who wrote in his _Britannia_ (since at least the 1607 edition) that "Highlandmen ... wear after the Irish fashion striped mantles".[\[221\]] [\[222\]] [\[ae\]] 

The earliest image of Scottish soldiers wearing tartan belted plaids and trews; 1631 German engraving by [Georg Köler].

The earliest unambiguous surviving [image of Highlanders in an approximation of tartan] is a watercolour, dating to c. 1603–1616 and rediscovered in the late 20th century, by [Hieronymus Tielsch] or Tielssch. It shows a man's belted plaid, and a woman's plaid (arisaid, _earasaid_) worn as a shawl or cloak over a dress, and also depicts diced short hose and a [blue bonnet].[\[195\]] [\[223\]] [\[ab\]] Clans had for a long time independently raised [militias], and starting in 1603, the British government itself mustered [irregular] militia units in the Highlands, known as the [Independent Highland Companies] (IHCs).[\[224\]] Being Highlanders, they were probably wearing tartan (1631 Highland mercenaries certainly were, and the ICHs were in tartan in 1709[\[224\]] and actual uniforms of tartan by 1725).[\[225\]] [\[226\]] [\[227\]] Tartan was used [as a furnishing fabric], including [bed hangings] at [Ardstinchar Castle] in 1605.[\[228\]] After mention of Highlanders' "striped mantles" in Camden's _Britannia_ of 1607,[\[221\]] poet [John Taylor] wrote in 1618 in _The Pennyless Pilgrimage_ of "tartane" Highland garb in detail (in terms that generally match what was described and illustrated even two centuries later); he noted that it was worn not just by locals but also by visiting British gentlemen.[\[af\]] [\[ag\]] The council of Aberdeen again cracked down on plaids in 1621, this time against their use as women's head-wear,[\[208\]] and the kirk in [Glasgow] had previously, in 1604, forbidden their wear during services;[\[230\]] similar kirk session rulings appeared in [Elgin] in 1624, in [Kinghorn] in 1642 and 1644, and [Monifieth] in 1643, with women's plaids more literarily censured in Edinburgh in 1633 by [William Lithgow].[\[231\]] In 1622, the [Baron Courts] of [Breadalbane] set fixed prices for different complexities of tartan and plain cloth.[\[232\]] 

In 1627, a tartan-dressed body of Highland archers served under the [Earl of Morton].[\[233\]] More independent companies were raised in 1667.[\[224\]] The [earliest image of Scottish soldiers in tartan] is a 1631 [copperplate engraving] by [Georg Köler] (1600–1638); it features Highland mercenaries of the [Thirty Years' War] in the forces of [Gustavus Adolphus] of Sweden.[\[234\]] [\[235\]] Not long after, James Gordon, Parson of [Rothiemay], wrote in _A History of Scots Affairs from 1637 to 1641_ of the belted plaid as "a loose Cloke of several [Ells], striped and party colour'd, which they gird breadthwise with a Leathern Belt ...." He also described the short hose and trews ("trowzes").[\[236\]] A 1653 map, [_Scotia Antiqua_] by [Joan Blaeu], features a cartouche that depicts men in trews and belted plaid; the tartan is crudely represented as just thin lines on a plain background,[\[237\]] and various existing copies are hand-coloured differently. [Daniel Defoe], in _Memoirs of a Cavalier_ (c. 1720) wrote, using materials that probably dated to the [English Civil War], of Highlanders invading Northern England back in 1639 that they had worn "doublet, breeches and stockings, of a stuff they called plaid, striped across red and yellow, with short cloaks of the same".[\[238\]] 

Besides the formerly often-chastised wearing of head-plaids in church, women's dress was not often described (except in earlier times as being similar to men's).[\[ah\]] The Highland and island women's equivalent of the belted plaid was the [arisaid] (_earasaid_), a plaid that could be worn as a large shawl or be wrapped into a dress. Sir [William Brereton] had written in 1634–35 (published 1844) of Lowland women in Edinburgh that: "Many wear (especially of the meaner sort) plaids ... which is \[ _[sic] _\] cast over their heads and covers their faces on both sides, and would reach almost to the ground, but that they pluck them up, and wear them cast under their arms." He also reported that women there wore "six or seven several habits and fashions, some for distinction of widows, wives and maids", including gowns, capes/cloaks, bonnets with [bongrace] veils, and collar ruffs, though he did not address tartan patterns in particular in such garments.[\[240\]] 

While tartan was still made in the Highlands as cottage industry, by 1655 production had centred on [Aberdeen], made there "in greater plenty than \[in\] any other place of the nation whatsoever",[\[21\]] though it was also manufactured in [Glasgow], [Montrose], and [Dundee], much of it for export.[\[21\]] In Glasgow at least, some of the trade was in tartan manufactured in the Highlands and the Hebrides and brought there for sale along with hides and other goods.[\[21\]] Impressed by the trade in Glasgow, [Richard Franck] in his _Northern Memoirs_ of 1658 wrote that the cloth was "the staple of this country".[\[241\]] In 1662, the naturalist [John Ray] wrote of the "party coloured blanket which \[Scots\] call a plad, over their heads and shoulders", and commented that a Scotsman even of the lower class was "clad like a gentleman" because the habit in this time was to spend extraordinarily on clothing,[\[242\]] a habit that seems to have gone back to the late 16th century.[\[243\]] A Thomas Kirk of Yorkshire commented on trews, plaids, and possibly kilts of "plaid colour" in 1677;[\[244\]] more material by Kirk was printed in the 1891 _Early Travellers in Scotland_ edited by [Peter Hume Brown], recording "plad wear" in the form of belted plaids, trews, and hose.[\[245\]] A poem by [William Cleland] in 1678 had Scottish officers in trews and shoulder plaids, and soldiers in belted plaids.[\[246\]] In 1689, Thomas Morer, an English clergyman to Scottish regiments, described Lowland women as frequently wearing plaids despite otherwise dressing mostly like the English.[\[247\]] 

Mungo Murray, c. 1683, by [John Michael Wright] ( [Scottish National Portrait Gallery] version), featuring a very complex tartan

The [earliest known realistic portrait in tartan Highland dress] is a piece (which exists in three versions) by [John Michael Wright], showing a very complicated tartan of brown, black, and two hues of red;[\[248\]] it is dated to c. 1683 and is of Mungo Murray, son of [John Murray, Marquess of Atholl].[\[249\]] [\[ai\]] 

In 1688, William Sacheverell, [lieutenant governor of the Isle of Man], wrote of the tartan plaids of the women of [Mull] in the [Inner Hebrides] as "much finer, the colours more lively, and the squares larger than the men's .... This serves them for a veil, and covers both head and body."[\[251\]] In the 1691 poem _The Grameid_,[\[252\]] James Philip of Almerieclose described the 1689 [Battle of Killiecrankie] in terms that seem to suggest that some clan militias had uniform tartan [liveries], and some historians have interpreted it thus.[\[253\]] [\[254\]] 

It is not until the early 18th century that regional uniformity in tartan, sufficient to identify the area of origin, is reported to have occurred.[\[159\]] [Martin Martin], in _A Description of the Western Islands of Scotland_, published in 1703, wrote, after describing trews and belted plaids "of divers Colours ... agreeable to the nicest Fancy", that tartans could be used to distinguish the inhabitants of different places.[\[aj\]] Martin did not mention anything like the use of a special pattern by each family.

In 1709, the [Independent Highland Companies] were wearing everyday Highland dress, not uniforms of a particular tartan, to better blend in with civilians and detect [Jacobite] treachery.[\[224\]] In 1713, the [Royal Company of Archers] (a royal bodyguard unit first formed in 1676),[\[257\]] became the first unit in service to the British crown who adopted a particular tartan as a part of their formal uniform. The militiamen of [Clan Grant] may have been all in green-and-red tartan (details unspecified) as early as 1703–04[\[258\]] [\[175\]] and wearing a uniform tartan livery by 1715.[\[259\]] It is not a surviving pattern, and modern Grant tartans are of much later date.[\[260\]] (For details on early uniform tartans, see [Regimental tartan § Pre-regiment military use].)

An account of the Highland men in 1711 had it that they all, including "those of the better sort", wore the belted plaid.[\[261\]] A 1723 account suggested that gentlemen, at least when commingling with the English, were more likely to wear tartan trews and hose with their attendants in the belted plaid,[\[261\]] which Burt also observed;[\[262\]] trews were also more practical for horseback riding.[\[263\]] Also around 1723, short tartan jackets, called in Gaelic _còta-goirid_, sometimes with slashed sleeves and worn with a matching waistcoat, made their first appearance and began supplanting, in Highland dress, the plain-coloured [doublets] that were common throughout European dress of the era; the _còta-goirid_ was often worn with matching trews and a shoulder plaid that might or might not match, but could also be worn with a belted plaid.[\[264\]] [\[ak\]] 

Rachel Gordon of Abergeldie, c. 1700 – the earliest known formal portrait of a woman in tartan

M. Martin (1703) wrote that the "vulgar" [Hebridean] women still wore the [arisaid] wrap/dress,[\[265\]] describing it as "a white Plad, having a few small Stripes of black, blue, and red; it reach'd from the Neck to the Heels, and was tied before on the Breast with a Buckle of Silver, or Brass", some very ornate. He said they also wore a decorated belt, scarlet sleeves, and head kerchiefs of linen.[\[266\]] Martin was not the only period source to suggest it was primarily the wear of the common women, with upper-class Highland ladies in the 18th century more likely to wear tailored gowns, dresses, and [riding habits], often of imported material, as did Lowland and English women.[\[176\]] [\[267\]] Highland women's dress was also sometimes simply in linear stripes rather than tartan, a cloth called _iomairt_ ( [drugget]).[\[176\]] From the late 18th century, as the arisaid was increasingly set aside for contemporary womenswear, while Highland men continued wearing the belted plaid.,[\[268\]] the ladies' plaids were reduced to smaller "screens" – fringed shawls used as headdresses and as dress accessories,[\[267\]] "a gentrification of the arisaid".[\[176\]] (Wilsons continued producing these in the first half of the 19th century.)[\[176\]] [John Macky] in _A Journey Through Scotland_ (1723) wrote of Scottish women wearing, when about, such tartan plaids over their heads and bodies, over English-style dress, and likened the practice to continental women wearing black wraps for church, market, and other functions.[\[247\]] [Edmund Burt], an Englishman who spent years in and around Inverness, wrote in 1727–1737 (published 1754) that the women there also wore such plaids, made of fine [worsted] wool or even of [silk], that they were sometimes used to cover the head, and that they were worn long, to the ankle, on one side. He added that in Edinburgh (far to the southeast) they were also worn, with ladies indicating their [Whig] or [Tory] political stance by which side they wore long (though he did not remember which side was which).[\[269\]] In Edinburgh, perennial disapproval of the "barbarous habitte" of women wearing plaids over their heads returned in 1753 writings of [William Maitland]. Women first appear in known painted portraits with tartan c. 1700, with that of Rachel Gordon of [Abergeldie]; more early examples are found in 1742 and 1749 paintings by William Mosman,. They show plaids (in tartans that do not survive as modern patterns) worn loosely around the shoulders by sitters in typical European-fashion dresses.[\[270\]] Some entire dresses of tartan feature in mid-18th-century portraits, but they are uncommon.[\[176\]] In the Jacobite period, tartan was sometimes also used as trim, e.g. on hats. Plaids were worn also as part of wedding outfits. The monied sometimes had entire wedding dresses of tartan, some in silk, and even devised custom tartans for weddings, typically based on existing patterns with colours changed.[\[267\]] 

Highland soldier and family, the woman in an [arisaid]; by [Martin Engelbrecht] c. 1717–1754[\[al\]] 

Portraits became more popular among the Highland elite starting in the early 18th century.[\[272\]] Similar cloth to that in the c. 1683 Mungo Murray portrait appears in [the 1708 portrait] of the young [John Campbell of Glenorchy], attributed to [Charles Jervas]; and [the c. 1712 portrait] of [Kenneth Sutherland, Lord Duffus], by [Richard Waitt].[\[273\]] This style of very "busy" but brown-dominated tartan seems to have been fairly common through the early 18th century, and is quite different from later patterns.[\[274\]] As the century wore on, bolder setts came to dominate, judging from later portraits and surviving cloth and clothing samples. By the early 18th century, tartan manufacture (and weaving in general) were centred in [Bannockburn], Stirling; this is where the eventually dominant tartan weaver William Wilson & Son, founded c. 1765, were based.[\[275\]] [\[am\]] 

Judging from rare surviving samples, the predominant civilian tartan colours of this period, in addition to white (undyed wool) and black, were rich reds and greens and rather dark blues, not consistent from area to area; where a good black was available, dark blue was less used.[\[6\]] The sett of a typical Highland pattern of the era as shown in portraits was red with broad bands of green and/or blue, sometimes with fine-line over-checks.[\[6\]] [\[an\]] [Oil portraiture] was the province of the privileged, and " [Sunday best] " tartans with red grounds were commonly worn in them as a [status symbol], from the early 18th century, the dye typically being made from expensive imported [cochineal].[\[176\]] [\[277\]] Green and blue more generally predominated owing to their relative ease of production with locally available dyes, with more difficult yellow[\[ao\]] and red dyes commonly being saved for thin over-check lines[\[279\]] (a practice that continued, e.g. in military and consequently many clan tartans, through to the 19th century – see [Regimental tartans]). However, even local-dyestuff blues were often over-dyed with some amount of imported [indigo] for a richer colour.[\[49\]] 

#### Union protest and Jacobite rebellion

\[ [edit] \]

The [Treaty] and [Acts of Union] in 1706–07, which did away with the separate [Parliament of Scotland], led to [Scottish Lowlanders] adopting tartan in large numbers for the first time, as a symbol of protest against the union.[\[280\]] [\[281\]] It was worn not just by men (regardless of social class),[\[282\]] but even influential Edinburgh ladies,[\[280\]] [\[283\]] well into the 1790s.[\[284\]] By the beginning of the 18th century, there was also some demand for tartan in England, to be used for curtains, bedding, nightgowns, etc., and weavers in [Norwich], Norfolk, and some other English cities were attempting to duplicate Scottish product, but were considered the lower-quality option.[\[261\]] 

[Charles Edward Stuart], "Bonnie Prince Charlie", in tartan and blue bonnet with Jacobite white cockade; portrait by [William Mosman] c. 1750

The most effective fighters for [Jacobitism] were the supporting Scottish clans, leading to an association of tartan and [Highland dress] with the Jacobite cause to restore the Catholic [Stuart dynasty] to the throne of England, Scotland, and Ireland. This included [great kilts], and [trews] (trousers) with great coats, all typically of tartan cloth, as well as the [blue bonnet]. The British parliament had considered banning the belted plaid after the [Jacobite rising of 1715], but did not.[\[285\]] Highland garb came to form something of a Jacobite uniform,[\[284\]] [\[286\]] even worn by Prince [Charles Edward Stuart] ("Bonnie Prince Charlie") himself by the mid-18th century,[\[287\]] [\[ap\]] mostly in propaganda portraits (with inconsistent tartans) but also by eyewitness account at [Culloden].[\[293\]] By this period, sometimes a belted plaid was worn over tartan trews and jacket (in patterns that need not match).[\[294\]] 

A pattern from a coat (probably Jacobite) known to date to the period of the 1745 uprising

Burt had concurred c. 1728, as did his 1818 editor [Robert Jamieson], with Buchanan's much earlier 1582 observation that tartans were often in colours intended to blend into heather and other natural surroundings.[\[295\]] This may just represent prejudices of English writers of the period, however, at least by the mid-18th century. Extant samples of Culloden-era cloth are sometimes quite colourful. One example is [a pattern found on a coat] (probably Jacobite) known to date to around the 1745 uprising; while it has faded to olive and navy tones, the sett is a bold one of green, blue, black, red, yellow, white, and light blue (in diminishing proportions). While an approximation of the pattern was first published in D. W. Stewart (1893), the colours and proportions were wrong; the original coat was rediscovered and re-examined in 2007.[\[296\]] [\[297\]] Another surviving Culloden sample, predominantly red with broad bands of blue, green, and black, and some thin over-check lines, consists of a largely intact entire plaid that belonged on one John Moir; it was donated to the National Museum of Scotland in 2019.[\[298\]] 

There is a legend that a particular still-extant tartan was used by the Jacobites as an identifier even prior to " [the '15] ". This story can be traced to W. & A. Smith (1850) in _Authenticated Tartans of the Clans and Families of Scotland_, in which they claimed that a pattern they published was received from an unnamed woman then still living who in turn claimed a family tradition that the tartan dated to 1712, long before her birth, but for which there is no evidence.[\[32\]] This hearsay tale was later repeated as if known fact by other books, e.g., Adam Frank's _What Is My Tartan?_ in 1896,[\[299\]] and Margaret MacDougall's 1974 revision of Robert Bain's 1938 _Clans and Tartans of Scotland_.[\[aq\]] Even the often credulous Innes of Learney (1938) did not believe it.[\[302\]] The pattern in question does date to at least c. 1815–26, because it was collected by the [Highland Society of London] during that span.[\[32\]] But there is no substantiated evidence of Jacobites using a consistent tartan, much less one surviving to the present.

Independent Highland Companies were re-raised from Scottish clans loyal to the Hanoverian monarchy during 1725–29.[\[303\]] [\[ar\]] [\[304\]] This time they wore uniform tartans of blue, black, and green, presumably with differencing over-check lines.[\[305\]] [\[304\]] [\[225\]] They were all normalised to one tartan during 1725–33[\[225\]] [\[227\]] [\[226\]] [\[306\]] (a pattern which probably does not survive to the present day).[\[175\]] The uniform tartan appears to have changed into a new tartan, known today as Black Watch or Government, when the companies amalgamated to become the [42nd (Black Watch)] regiment in 1739. (See [Regimental tartan].)

#### Proscription and its aftermath

\[ [edit] \]

After the failure of the [Jacobite rising of 1745], efforts to pacify the Highlands and weaken the cultural and political power of the clans[\[307\]] [\[308\]] led to the [Dress Act 1746], part of the [Act of Proscription] to disarm the Highlanders. Because tartan Highland dress was so strongly symbolically linked to the militant Jacobite cause,[\[309\]] the act – a highly political throwback to the long-abandoned [sumptuary laws] [\[309\]]  – banned the wearing of Highland dress by men and boys in Scotland north of the [River Forth] (i.e. in the Highlands),[\[as\]] except for the landed gentry[\[at\]] and the Highland regiments of the British Army.[\[311\]] The law was based on 16th century bans against the wearing of traditional [Irish clothing] in the [Kingdom of Ireland] by the [Dublin Castle administration].[\[312\]] Sir [Walter Scott] wrote of the Dress Act: "There was knowledge of mankind in the prohibition, since it divested the Highlanders of a dress which was closely in association with their habits of Clanship and of war."[\[313\]] 

Tartans recorded shortly after the act (thus probably being patterns in use in the period before proscription) show that a general pattern was used in a wide area, with minor changes being made by individual weavers to taste.[\[234\]] E.g., the tartan today used as the main (red) [Mackintosh] clan tartan,[\[314\]] recorded by the [Highland Society of London] around 1815, was found in variants from [Perthshire] and [Badenoch] along the [Great Glen] to [Loch Moy].[\[234\]] Other such groups can be found, e.g. a [Huntly] -centred [Murray] / [Sutherland] / [Gordon] cluster analysed as clearly related by Innes of Learney (1938)[\[162\]]  – distinguished from a different Huntly/ [MacRae] / [Ross] / [Grant] group identified by [Scottish Register of Tartans] and tartan researcher Peter Eslea MacDonald of [Scottish Tartans Authority].[\[315\]] [\[316\]] But Scarlett (1990) says that "the old patterns available are too few in number to permit a detailed study of such pattern distributions" throughout the Highlands.[\[234\]] Portraits of the era also show that tartan was increasingly made with identical or near-identical [warp and weft] patterns, which had not always been the case earlier, and that the tartan cloth used was of the fine twill, with even-warp-and-weft thickness, still used today for kilts.[\[254\]] [\[272\]] 

Although the Dress Act, contrary to popular later belief, did not ban all tartan[\[317\]] (or bagpipes, or Gaelic), and women, noblemen, and soldiers continued to wear tartan,[\[318\]] it nevertheless effectively severed the everyday tradition of Highlanders wearing primarily tartan, as it imposed the wearing of non-Highland clothing common in the rest of Europe for two generations.[\[311\]] [\[319\]] (While some Highlanders defied the act,[\[320\]] [\[321\]] there were stiff criminal penalties.)[\[322\]] It had a demoralising effect,[\[au\]] and the goal of this and related measures to integrate the Highlanders into Lowland and broader British society[\[312\]] was largely successful.[\[309\]] [\[324\]] By the 1770s, Highland dress seemed all but extinct.[\[325\]] However, the act may also ironically have helped to "galvanize clan consciousness" under that suppression;[\[326\]] Scottish clans, in romanticised form, were to come roaring back in the "clan tartans" run of the [Regency] (late [Georgian]) to [Victorian] period.

Jacobite women continued wearing tartan during the proscription (1749 portrait of [Flora MacDonald] by [Allan Ramsay] and [Joseph van Aken]; the tartan is a [Tullibardine] area pattern, later the [Murray] of Tullibardine clan tartan).[\[327\]] 

In the interim, Jacobite women continued using tartan profusely, for clothing (from dresses to shoes), curtains, and everyday items.[\[328\]] [\[318\]] While [Classicism] -infused portraiture of 18th-century clan nobles (often painted outside Scotland) typically showed them in tartan and "Highland" dress, much of it was loyalist regimental military stylings, the antithesis of Jacobite messaging;[\[329\]] it foreshadowed a major shift in the politics of tartan (see [§ Late Georgian], below). Nevertheless, this profuse application of tartan could be seen as rebellious to some extent, with the [reified] Highlander becoming "a heroic and classical figure, the [legatee] of primitive virtues."[\[330\]] And by the 1760s, tartan had become increasingly associated with Scotland in general, not just the Highlands, especially in the English mind.[\[331\]] 

Helen Murray of Ochtertyre, daughter and eldest child of Sir Patrick Murray of Ochtertyre, 4th Bt; c. 1750, artist uncertain. The tartans of the bodice and skirt do not match exactly, and are not surviving patterns.[\[332\]] 

After much outcry (as the ban applied to Jacobites and loyalists alike), the Dress Act was repealed in 1782, primarily through efforts of the Highland Society of London;[\[333\]] the repeal bill was introduced by [James Graham, Marquis of Graham] (later Duke of Montrose).[\[334\]] Some Highlanders resumed their traditional dress,[\[335\]] but overall it had been abandoned by its former peasant wearers, taken up instead by the upper and middle classes, as a fashion.[\[336\]] Tartan had been "culturally relocated as a picturesque ensemble or as the clothing of a hardy and effective fighting force" for the crown, not a symbol of direct rebellion.[\[337\]] R. Martin (1988) calls this transmutation "the great [bifurcation] in tartan dress",[\[338\]] the cloth being largely (forcibly) abandoned by the original Highland provincials then taken up by the military and consequently by non-Highlander civilians. During the prohibition, traditional Highland techniques of wool spinning and dyeing, and the weaving of tartan, had sharply declined.[\[95\]] [\[310\]] [\[104\]] Commercial production of tartan was to become re-centred in the Lowlands, in factory villages along the fringe of the Highlands,[\[339\]] among companies like Wilsons of [Bannockburn] (then the dominant manufacturer),[\[340\]] with the rise of demand for [tartan for military regimental dress].[\[341\]] Some tartan weaving continued in the Highlands,[\[342\]] [\[343\]] and would even see a boost in the late Georgian period.[\[342\]] Tartan by this era had also become popular in Lowland areas including [Fife] and [Lothian] and the urban centres of [Edinburgh] and [Stirling].[\[317\]] From 1797 to 1830,[\[275\]] Wilsons were exporting large quantities of tartan (for both men's and women's clothing), first to the British colonies in [Grenada] and [Jamaica] (where the affordable, durable, and bright material was popular for clothing [enslaved people]),[\[339\]] and had clients in England, Northern and Central Europe, and a bit later in North and South America and the Mediterranean.[\[344\]] [\[345\]] However, by the end of the 18th century, Wilsons had "stiff competition" (in civilian tartan) from English weavers in [Norwich].[\[346\]] 

Because the Dress Act had not applied to the military or gentry, tartan gradually had become associated with the affluent, rather than " [noble savage] " Highlanders,[\[347\]] [\[348\]] [\[349\]] from the late 18th century and into the 19th,[\[350\]] along with patriotic military-influenced clothing styles in general;[\[351\]] tartan and militarised Highland dress were being revived among the fashion-conscious across Britain, even among women with military relatives.[\[352\]] The clans, Jacobitism, and anti-unionism (none of them any longer an actual threat of civil unrest) were increasingly viewed with a sense of nostalgia,[\[183\]] [\[353\]] [\[354\]] [\[349\]] especially after the death of Prince Charles Edward Stuart in 1788,[\[355\]] even as Highland regiments proved their loyalty and worth.[\[349\]] Adopting the airs of a [Tory] sort of tartaned " [Highlandism] "[\[356\]] provided a post-union and resigned sense of national (and militarily elite) distinction from the rest of Britain, without threatening [empire].[\[357\]] Even the future [George IV] donned Highland regalia for a [masquerade ball] in 1789.[\[358\]] By the 1790s, some of the gentry were helping design tartans for their own personal use, according to surviving records from Wilsons.[\[183\]] [Jane (Maxwell) Gordon, Duchess of Gordon], was said to have "introduced tartan to [\[royal\] court]  ... wearing a plaid of the Black Watch, to which her son had just been appointed", in 1792; she triggered a fashion of wearing tartan in London and Paris, though was not immune to caricature by the disapproving.[\[359\]] 

R. Martin (1988) wrote, from a [historiographical] perspective, that after the Dress Act:[\[338\]] 

> the idea of Highland dress was stored in the collective historical attic; when it was revived in the years leading up to 1822, it had been forgotten by some two or three generations in civilian dress and could be remembered, however deceptively, however naively, to have been the ancient dress of the Highlands, not that so recently worn as the standard peasant dress before 1746. The ban on tartan was hugely successful, but so inimical to a natural historical process, that it promoted the violent re-assertion of the tartan, sanctioned by a spurious sense of history, in the next century.

The tumultuous events of 18th-century Scotland led to not just broader public use of tartan cloth, but two particular enduring tartan categories: regimental tartans and eventually clan tartans.

Soldiers from a Highland regiment c. 1744 wearing tartan [belted plaids] (great kilts).

After the period of the early clan militias and the [Independent Highland Companies] (IHCs), over 100 battalions of line, fencible, militia, and volunteer regiments were raised, between c. 1739 and the end of the Napoleonic Wars in 1815, in or predominantly in the Highlands,[\[360\]] a substantial proportion of them in Highland dress. Of these units, only some had distinct uniform tartans, and of those, only a small number were recorded to the present day.

_The [Sword Dance] _ by David Cunliffe, 1853, depicting men of the 42nd and 93rd. The dancer in the centre wears the 42nd's red band tartan.

The IHCs were amalgamated in 1739 to become the [43rd (later 42nd) Regiment of Foot],[\[361\]] called the Black Watch.[\[362\]] It was the first proper governmental [Highland regiment], part of the [British Army], and they wore the [belted plaid] ("great kilt") for dress, and the tailored [small kilt] for undress uniform.[\[363\]] [\[304\]] [\[364\]] For the former garment,[\[365\]] they used a distinctive tartan, which was designed for the unit.[\[366\]] It was originally called the "42nd tartan",[\[366\]] so it probably was not adopted until after the unit was renumbered the 42nd in 1749.[\[225\]] It seems likely that the tartan was based on those used by the IHCs earlier, but with double black "tram line" over-checks added.[\[225\]] [\[367\]] The Black Watch pattern was used by various other regiments, and it has been estimated that to clothe them all, some 30–40 miles (48–64 km) of the tartan had to be woven before 1750 alone.[\[59\]] It became the basis of various later regimental (and eventually clan) tartans.[\[225\]] It remains popular in general-public use under the names "Black Watch", "Government", and any of "old [Campbell] ", "hunting [Grant] ", or "hunting [Munro] ",[\[225\]] but today officially called "Government No. 1" by the military. (See illustration in [§ Popular designs], below.) The 42nd had separate tratans for its small kilt until c. 1814[\[365\]] [\[368\]] (also used for [grenadiers] ' belted plaids),[\[365\]] for [pipers],[\[369\]] [\[370\]] and for drummers.[\[371\]] 

After the [Jacobite uprisings], raising a regiment in service to the king was, for many Scottish [lairds], a way of rehabilitating the family name, assuring new-found loyalty to the [Hanoverian crown], and currying royal favour (even regaining forfeited estates).[\[372\]] Exempt from the [Dress Act], men in these Highland regiments of the [empire] were given Highland dress, and the "kilts and pipes that were once considered barbaric were now seen as ‘safe’ nationalism" within the army.[\[373\]] From c. 1770 onward into the 19th century, virtually all the regimental tartan was produced by the company William Wilson & Son of [Bannockburn], the dominant tartan weaver.[\[340\]] Regimental uniforms, including tartans, were left – usually within the general Black Watch-based colour scheme of black, blue, and green – to their commanders.[\[374\]] [\[375\]] 

[72nd Duke of Albany's Own Highlanders] during a trews-wearing period, c. 1844, in the tartan named for Prince Charles Edward Stuart

Some surviving early regimental tartans include:

- [Loudoun's Highlanders] (64th, raised in 1745), used a tartan similar to Black Watch, but with over-checks of red and yellow, and lacking the two black "tram lines" of Black Watch.[\[376\]] [\[377\]] 
- The [78th (Highlanders)] or Ross-shire Buffs (raised 1793), [MacLeod's Highlanders] (73rd, later 71st, raised 1777–78), and the original [Seaforth Highland Regiment] (78th, later 72nd, raised 1778)[\[av\]] first used Black Watch, then in 1787 adopted a variant of it with thin over-checks of red and white.[\[378\]] [\[379\]] [\[380\]] It eventually became the [Clan Mackenzie] tartan,[\[379\]] and it remains used as an official British military tartan, designated "Government No. 5A". A slight variation, with yellow in place of white, became one of the [Clan MacLeod] tartans.[\[381\]] 
- The [74th (Highland) Regiment of Foot] (raised 1787)[\[aw\]] used another variant of the Black Watch tartan with a black-guarded white over-check.[\[382\]] Also in 1787, the [75th (Highland) Regiment], later 75th (Stirlingshire), probably used a more distinct tartan, not based on Black Watch, of purple and black on a green ground, with thin white and black over-checks; it was later called "No. 64 or Abercromby" by Wilsons, and though it did not become adopted as an [Abercromby/Abercrombie clan] tartan, variants of it became two unrelated clan patterns.[\[383\]] 
- The [Gordon Highlanders] (100th, later 92nd, raised 1794) also wore [an altered Black Watch], this time with a thin yellow over-check.[\[384\]] [\[385\]] In a rare show of competition to Wilsons, the pattern was designed in 1793 and supplied by weaver William Forsyth of [Huntly], Aberdeen.[\[386\]] This pattern became the main tartan of [Clan Gordon].[\[387\]] [\[388\]] Something nearly identical (perhaps with the yellow over-check in a different width) was also used by the [8th (Rothesay and Caithness) Fencibles].[\[389\]] 
- The [Cameronian Volunteers] (79th, later Queen's Own Cameron Highlanders, raised 1793) used [a comparatively distinct tartan], later the family tartan of the [Cameron of Erracht] branch of [Clan Cameron].[\[56\]] [\[390\]] [\[391\]] It is structurally much like Black Watch, but without black over-checks and with a number of yellow and red over-checks. It has been said to have been designed by the unit leaderor a family member.[\[56\]] [\[392\]] 
- The [Fraser Fencibles] (raised 1794–95) used a tartan with a red ground and green and blue bands, unrelated to the Black Watch style.[\[393\]] [\[394\]] 
- The [Sutherland Highlanders (93rd)] raised 1799, and later the [Argyll and Sutherland Highlanders] (Princess Louise's, formed 1881 by amalgamation of the 93rd with the [91st Argyllshire Highlanders]), may have worn a lightened version of Black Watch,[\[395\]] [\[396\]] now sometimes used as one of the [Clan Sutherland] tartans;[\[397\]] [\[398\]] it is also still militarily used as sett "Government No. 1A".
- The Loyal Clan Donnachie Volunteers (raised in 1803) had its own uniform tartan, which was later adopted as the hunting [Robertson/Donnachie/Duncan] clan tartan.[\[399\]] [\[400\]] 
- The [Duke of Albany's Own Highlanders] (formerly Seaforth's 72nd), during a [trews] -wearing period of 1823–1881, wore a [tartan called Prince Charles Edward Stuart], similar to [royal Stewart],[\[111\]] as [shown in a period painting]. Identified in surviving cloth samples from the mid-18th century[\[63\]] (before the regiment) it is one of the oldest setts in continuous production.[\[63\]] 

For more detail, and an image gallery of these setts, see [Regimental tartan].

An Italian woman inspects the kilts of two pipe majors in Rome, 1944, toward the end of kilts as undress uniform in Highland regiments

By the turn of the 18th and 19th centuries, women in Scotland were especially "desirous to dress in the uniform plaids of their husbands", in particularly fine-quality cloth, according to records of Wilsons of Bannockburn.[\[359\]] After the Highland regiments proved themselves fearless and effective in various military campaigns, the glory associated with them did much to keep alive, initially among the gentry and later the general public, an interest in tartan and kilts, which might have otherwise slipped into obscurity due to the Dress Act's prohibition.[\[401\]] The belted plaid was abandoned in favour of the small kilt, around 1814.[\[348\]] [\[365\]] After the "clan tartanry" rush of the early to mid-19th century (see below), various later Highland regiments adopted some of the recently minted clan tartans for their uniforms (reversing the original regimental-into-clan-tartan flow). Some of these adoptions remain in regimental use today.

The [Lowland regiments] (dating in some form to 1633 and never before dressed in Highland garb but in a variant of regular army uniform) were outfitted in tartan trews in 1881. This both linked them with and distinguished them from the tartan-kilted Highland regiments.[\[402\]] Typically the "Government" (Black Watch) tartan was used, though some units later diversified. Several Highland regiments were again assigned new tartans that were clan tartans rather than unit-specific ones, into the early 20th century.[\[303\]] 

Today, about a dozen tartans are officially used (and half a dozen more unofficially) between all of the surviving historical [Scottish regiments], which have largely been amalgamated since 2006 as battalions into the [Royal Regiment of Scotland], part of the [Scottish, Welsh and Irish Division].[\[403\]] These tartans are only worn in dress and pipe-band uniforms, after the practical uniform changes introduced in the early part of World War II, which did away with tartan kilts and trews in undress uniforms. (For further information on these tartans and the modern units using them, see [List of tartans § UK military or government tartans].) Some military units in other countries also have their own tartans. In all, there are at least 38 documented tartans that have at one time or another been associated with regiments, though many of them also with clans.[\[ax\]] 

With an exception dating to 1618[\[405\]] and another to c. 1703–1715[\[259\]] (neither of which appear to have survived), it is generally regarded that tartans associated by name with [Scottish clans] mostly date to the early-to-mid 19th century,[\[9\]] [\[406\]] [\[407\]] [\[408\]] [\[141\]] [\[409\]] [\[410\]] some few to the late 18th at the earliest,[\[93\]] [\[411\]] [\[349\]] depending on how one defines "clan tartan". The belief that the clan tartans are an "ancient" system of symbolic family differentiation is pervasive, even passionate, but lacks substantive evidence even as it is overwhelmed by counter-evidence. It is what J. C. Thompson (1992) called "the Great Tartan Myth",[\[141\]] and James D. Scarlett (1990) "the Tartan Cult".[\[256\]] Lt.-Col. M. M. Haldane (1931) called it an assumption, which "has acquired such a formidable weight from mere reiteration" without "critical examination of evidence".[\[412\]] Barnes & Allen (1956) observed:[\[93\]] 

> There is no doubt that many 'setts' had been traditional to certain districts for centuries, but the theory that they were a sort of Clan uniform seems now to have been quite discredited.

Responding to the claim that clan tartans have "an ancient political significance", [Richard Martin], curator of the [Fashion Institute of Technology] museum and later the Costume Institute at the [Metropolitan Museum of Art], wrote (1988): "\[This\] assertion about history is wrong and can be demonstrated to be [perniciously] wrong".[\[413\]] According to [National Galleries of Scotland] curator [A. E. Haswell Miller] (1956):[\[409\]] 

> To sum up, the presumed heraldic or "family badge" significance of the tartan has no documentary support, and the establishment of the myth can be accounted for by a happy coincidence of the desire of the potential customers, the manufacturer and the salesman. Although the antiquity of the "clan tartans" is exaggerated, what might be termed their unofficial registration took place during the nineteenth century, and if we are prepared to accept some hundred and fifty years as sufficient to create "tradition", it may be excusable to accept the _fait accompli_ as a pleasant – and perhaps not entirely useless – national vanity.

[Highland-dress] researcher and curator John Telfer Dunbar added:[\[414\]] 

> The desire to give to relics of all kinds greater antiquity than they truly possess is manifold. It is a pity that tradition should be degraded in this way and the acceptance of such claims by later students has been a constant obstacle to research. The more difficult task of searching back to original sources has often been avoided in favour of easy acceptance.

Just that sort of research was performed by Peter Eslea MacDonald of the [Scottish Tartans Authority], who – using every available surviving company record and sample – reconstructed and traced the history of tartan patterns from the leading weaver of the late Georgian through Edwardian eras, a company instrumental in the actual design, spread, and acceptance of clan tartans. His conclusion:[\[410\]] 

> Today, books and shops dealing with [Highland dress] will be mainly, if not exclusively, concerned with clan tartans. They may seek to suggest that these are the actual patterns worn by the Scottish clans throughout history, up to and including the [Battle of Culloden] in 1746. This is not the case. The majority of the pre-1850 patterns bearing clan names can only be traced back to the early 19th century and to the famous weaving firm of William Wilson & Son of Bannockburn, near Stirling.

The notion of clan tartans has been called "an astonishingly successful marketing story"[\[406\]] and an example of an [invented tradition],[\[415\]] though one that became very well-accepted by the clans to whom it pertained and by the weaving industry starting in 1815, as well as by the general public from around 1822 – "adopted enthusiastically by both wearer and seller alike".[\[416\]] 

Precursors of clan tartans were regionally distinctive tartans (since at least the early 18th century, perhaps even the 16th), regimental uniform tartans (from 1725 onward), and personal tartans of nobles (dating to perhaps the mid-18th century if not earlier).

Today, clan tartans are an important aspect of Scottish clans, and every clan has at least one tartan attributed to its name (some officially, some not, and in a few cases one tartan is shared between multiple clans). Clan tartans may not have actually been traditional, but they became conventional.

### Long-running debate

\[ [edit] \]

John Campbell of the Bank, 1749, by [William Mosman]. The present official [Clan Campbell] tartans are predominantly blue, green and black.[\[417\]] 

Various writers on tartans have supported or opposed the idea of clans long using distinctive tartans as an identifying badge, interpreting the scarce evidence as suited their viewpoint.[\[ay\]] Where one saw a militia uniform, or an individual noble's plaid, another saw a clan identifier. The 19th-century Celtic scholar [John Francis Campbell] of Islay was certain that while tartans in general were quite old, "uniform clan tartans are no older than clan regiments", a view backed by Haldane (1931) in a series of articles in _[The Scots Magazine] _,[\[419\]] followed by many tartan writers later.

The earliest evidence summarised below could have been more a matter of militia uniform than clan-wide dress; a distinction in that era is difficult to be certain of today, because troops then were led by landed gentry and a unit was raised largely on its commander's land from his clansmen.[\[az\]] Such definitional uncertainty could also apply to the 1691 _Grameid_ poem;[\[252\]] describing what appear to be some soldierly uniform tartans,[\[ba\]] it could be reinterpreted as supporting an early notion of clan tartans, if one wanted to define that as 'what most of the men of a clan were wearing into battle'; Scarlett (1990) confirms that there has been "fiery argument" in favour of a clan tartans interpretation.[\[254\]] However, [Robert Jamieson] (1818) reported that the "field dress" plaids of [Highland] men, for war and hunting, were different from their everyday dress – made of coarser material and using patterns intended to blend into natural surroundings, the _cath dath_ or _cath da'_ ('war colour').[\[94\]] [\[bb\]] This casts some doubt on interpretation of militia tartans as general clan tartans. Most of the later regimental uniform tartans (which did not become adopted as clan tartans until around the early 19th century or the late 18th in a few cases, when they did at all) were variations on the dark, green-based Black Watch tartan, as detailed above.

J. C. Thompson (1992) noted "a typical Victorian inclination to cite previous authors with little or no attempt to evaluate their statements .... Modern analysis cannot afford to be so uncritical."[\[421\]] Scarlett (1990) relatedly observed:[\[422\]] 

> Wishful proofs are found in profusion in the literature of tartan, early and late, and consist of stating an opinion as a fact and adding some more or less relevant historical reference in support, either implying or stating that this proves the point. That it proves nothing at all is neither here nor there, so long as the manner of the presentation is sufficiently authoritative; given this treatment the wildest theory will be accepted, copied from one book to the next and so enter tartan lore. It is almost axiomatic that the wilder the theory the more acceptable it will be ....

Even D. W. Stewart (1893), who had sometimes been sympathetic toward the idea of clan tartans existing before the 19th century, wrote:[\[423\]] 

> Some ... assure us that the antiquity of the so-called clan patterns is very great, and many writers allege in general terms that these designs were used as a clan distinction from the earliest period. ... The halo of romance surrounding the [Jacobite] struggle inclined many, and still induces others, to accept as authentic and reliable, statements which in different circumstances would be more closely sifted. Thus it is that the tartans ... have won much favour, and those who find one represented as bearing their name accept it as their ancient clan pattern without the inconvenience of investigation, or of posing any awkward questions.

The Victorians also engaged in some imaginative invention. Aside from the outright forgery of the " [Sobieski Stuarts] " (see [§ 19th century broad adoption], below), another extreme case is [Charles Rogers], who in his _Social Life in Scotland_ (1884–86) fantastically claimed that the ancient [Picts] ' figural designs – which were painted or tattooed on their bodies, and they went into battle nude [\[424\]] – must have been "denoting the families or [septs] to which they belonged" and thus "This practice originated the tartan of Celtic clans."[\[425\]] Another asserted that tartan was invented around a thousand years ago by [Saint Margaret of Scotland].[\[425\]] 

Aside from the unreliability of early writers (and later copiers of them), part of the confusion and debate about clan tartans comes down to definitions. Sir [Thomas Innes of Learney], writing in 1938 and described as "immensely keen on \[tartan\] codification and the importance of it",[\[426\]] was one of the firmest proponents of the idea of very old clan tartans (in the particular sense of 'patterns consistently used for a period by certain clans', not 'patterns named for certain clans and claimed by them to the present').[\[bc\]] He held that some setts gradually became associated with particular families (clans and septs thereof) over time;[\[427\]] clan territories had mostly become stable by the 16th century.[\[428\]] D. W. Stewart's 1893 reference shows various cases of old district tartans later sometimes being identified for a time with specific families before 19th-century adoption of their own (usually different) clan tartans.[\[bd\]] Innes of Learney wrote of clan tartans that (notwithstanding the unusual 1618 case covered below) "the tendency was rather to insist upon a similarity of general hue than on similarity of detail",[\[431\]] a vague sense that is not what "clan tartan" usually refers to. He also reasoned that "it was not until about the 18th century that the clan tartans became _conscious_ and _acknowledged_ badges of identification".[\[432\]] However, the surviving period source material lacks this "acknowledgement" and does not actually suggest broad adoption of formal clan tartans (with clan names, particularity of detail, and a symbolic, identifying intent) until the early 19th century.

The " [Sobieski Stuarts] " (1842) and later D. W. Stewart (1893) made much of some changes to the [feu duty] paid in woven cloth by locals of Noraboll on the island of [Islay] to their lords. In 1587, under the [Macleans], the cloth was to be white, black, and green; in 1617, under the [Mackenzies], the demanded cloth-rent changed to white, black, and grey. These writers were sure, without any further evidence, that this represented a change of clan tartans.[\[433\]] [\[be\]] 

The only clear instance of a clan-based and specific [livery] tartan to an early date, rather than simply regional and later regimental uniformity, is found in a 1618 letter from [Sir Robert Gordon of Gordonstoun] (in the employ of the [Earl of Sutherland]) to [Murray of Pulrossie], chieftain of the [Murray] branch in [Sutherland] but subordinate to the Earl of Sutherland, chief of [Clan Sutherland] (in turn recently become subordinate to the [Gordon earls]). The letter (rediscovered in 1909) requested Pulrossie "to remove the red and white lines from the plaides of his men so as to bring their dress into harmony with that of the other septs" of Sutherland.[\[405\]] The letter does not specify the tartan to which to conform; there have been sharply conflicting interpretations, and it is not even certain that it was a tartan that survived to the present.[\[bf\]] 

This 1714 portrait, by [Richard Waitt], of the piper to the chief of [Clan Grant] does show a broad green-ish and red tartan, but it does not match any modern Grant pattern.[\[439\]] [\[bg\]] 

A case of general colour-matching: In 1703–04, the chief of [Clan Grant] ordered that his " [fencible] " men obtain clothing in red and green tartan[\[258\]] (vaguely described as "broad springed"[\[258\]] but not specified in detail).[\[175\]] The material seems not to have been provided by Grant for them in a centralised way, but left to each man to furnish by his own means (on penalty of a fine).[\[258\]] "He did not order them to wear the 'Clan Grant Tartan', as one would expect if such a tartan existed at that time."[\[441\]] Some of the modern Grant tartans also use red and green; one was designed by Wilsons of Bannockburn in 1819 as "New Bruce" and shortly adopted by both Grant of [Redcastle] [\[442\]] and [Clan Drummond];[\[443\]] one was reconstructed from an 1838 portrait;[\[444\]] another first appeared in the dubious _Vestiarium Scoticum_ of 1842[\[445\]] [\[446\]] (see below); and so on – none with pre-19th-century history. Nevertheless, D. W. Stewart (1893) proclaimed on this thin material that here was "a complete chain of evidence ... of the existence of a uniform clan pattern at the very start of the eighteenth century" – despite his own observation that portraits of leading members of the Grant family in this era do not show them wearing consistent tartans,[\[258\]] [\[254\]] much less ones that agree with modern "official" Grant tartans.[\[407\]] [\[447\]] [\[bh\]] Scarlett (1990), though thinking this presaged "the Clan Tartan Idea", notes that "had the men of [Strathspey] been accustomed to wearing uniform tartans it would not have been necessary to order them to do so"[\[254\]] (twice over). He also observes that the lairds of Grant in this period were unusually bent on uniformity, one of them even issuing moustache regulations for clansmen;[\[254\]] the Grant red-and-green order cannot be taken as typical of everyday Highland practice. Telfer Dunbar (1979) notes that Highland military discipline hardly existed: "To these independent Highland chieftains restraint of any kind was irksome and unbearable, and to impose any rigid military discipline on their followers ... \[was\] found to be impossible."[\[448\]] Nevertheless, Mackay (1924) corroborates Grant militia wearing a livery tartan in 1715.[\[259\]] 

In 1718, [Allan Ramsay] (the writer, father of the artist by the same name) published the poem _Tartana_, which combined colours with Latinised family names: "... If shining red [Campbella] 's cheeks adorn .... If lin'd with green [Stuarta] 's Plaid we view ... Or thine [Ramseia], edg'd around with blue ...." This has sometimes been taken as evidence of early clan tartans, despite possibly just referring to the edging and lining of garments (coloured facings were common on jackets of the time).[\[449\]] [\[bi\]] Worse for this hypothesis, the Campbell tartans are predominantly green, Stuart/Stewart red, and Ramsay red and green. The extant red Campbell tartans are all modern reconstructions of patterns (that are unlike each other) from portraits;[\[451\]] [\[452\]] [\[453\]] Stewart/Stuart tartans with significant green date to the early 19th century[\[454\]] [\[455\]] [\[456\]] [\[457\]] [\[458\]] or much later;[\[459\]] [\[460\]] [\[461\]] and the Ramsay blue hunting sett dates to 1950.[\[462\]] 

A Victorian volume, _Old and New Edinburgh_ (1884) by [James Grant],[\[463\]] stated that one Rev. Joseph Robertson MacGregor "attired himself in a full suit of the MacGregor tartan" in 1782, upon repeal of the Dress Act. But it misquoted the original source (and contained other errors). The original, _A Series of Original Portraits and Caricature Etchings_ (1842) by [John Kay], read: "dressed himself in the Highland costume peculiar to his clan", and says nothing of tartan, much less a suit of clan tartan.[\[464\]] While 1782 is within the late-18th-century range accepted by some researchers for some informal early clan tartans, this is not clear evidence of one.

### Lack of further evidence of early adoption

\[ [edit] \]

[John Lesley], bishop of Ross, in 1578 wrote a great deal about Highland customs, including dress, but did not include clan tartans (despite later being claimed to have been the original keeper of the _Vestiarium Scoticum_ clan-tartans manuscript, now known to be a 19th-century forgery).[\[465\]] 
In 1688, William Sacheverell, a [Manx] politician, described Hebrideans of the [Isle of Mull] all wearing plaids, but the women in a different style of colour and pattern – not a consistent "clan" tartan.[\[251\]] Rev. Thomas Morer in 1689 described Highland garb in some detail, including tartan plaids and hose (made from the same cloth), but mentions no clan patterns.[\[466\]] [Daniel Defoe] (c. 1720) wrote also in considerable detail of Highland warriors of the prior century, and noted that the men were organised into "companies, all of a name", each led by "one of their own clan or family", yet he never mentions any distinction between tartans of these different groups, instead describing them all as wearing tartan with red and yellow over-checks,[\[238\]] strongly implying a regional style. This pattern of 17th- through 18th-century writings providing specifics of tartan and Highland dress, but nothing about clan tartans, is consistent.

Contemporary portraits show that although tartan is of an early date, the pattern worn depended not on the wearer's clan, but rather regional style and personal taste. They frequently depict subjects wearing multiple tartans at once.[\[432\]] Nor do the tartans shown match current clan tartans.[\[467\]] [\[468\]] For example, the famous painting [_The MacDonald Boys Playing Golf_] (1740s), attributed usually to [William Mosman] but sometimes to [Jeremiah Davison], shows them wearing five different tartans, and they are not surviving patterns (except as later reconstructions from the painting).[\[bj\]] Period tartans were also often of differing [warp and weft] (giving more of a striped than checked appearance), unlike modern symmetrical patterns.[\[470\]] Sometimes the portraits were copied, but with tartans that do not match, as if the designs were up to artistic whim.[\[471\]] As Scarlett (1990) put it:[\[472\]] 

> "\[T\]hese portraits have one thing in common: in no case does the tartan shown bear any close resemblance to the modern 'Clan' tartan. ... There is a great lack of evidence to show that the pattern of a tartan had any important significance in the early eighteenth century

D. W. Stewart (1893) had also noted this, about both portrait tartans and "examples of tartan fabrics which can be proved to date from the risings of 1715 and 1745".[\[423\]] Many of the portraits by [Allan Ramsay] the younger show the same shoulder plaid but with colours changed, suggesting it was the artist's own studio prop and used for modelling purposes by his clients who apparently did not care about the tartan pattern.[\[26\]] [\[176\]] [\[327\]] According to [Scottish National Portrait Gallery] keeper A. E. Haswell Miller (1956):[\[409\]] 

> Authentic documentation of the tartan previous to the 19th century is limited to a comparatively small number of contemporary portraits, and is negative so far as it provides any suggestion of [heraldic] significance or "clan badge" intention.

[David Morier] 's _[An Incident in the Rebellion of 1745] _. The tartans shown generally do not resemble modern ones.

According to Trevor-Roper (1983):[\[473\]] 

> contemporary evidence concerning the [rebellion of 1745]  – whether pictorial, [sartorial], or literary – shows no
> differentiation of clans, no continuity of setts.... Tartans were a matter of private taste, or necessity, only.

[David Morier] 's well-known mid-18th-century [painting] of the Highland charge at the 1745 [Battle of Culloden] shows eight Highlanders wearing over twenty different tartans which have been analysed in detail;[\[474\]] very few of the setts painted resemble today's clan tartans,[\[305\]] [\[313\]] though they are similar to existing samples of tartan cloth from the era.[\[475\]] [\[bk\]] The method of identifying Highlander friend from foe was not through tartans but by the colour of the [bonnet] 's [cockade] or ribbon, or perhaps by the different plant sprigs worn in the cockade of the bonnet.[\[185\]] [\[141\]] [\[bl\]] [\[bm\]] In particular, the government Highland militia forces wore a badge in the form of a black cockade with red [saltire]; according to Mackay Scobie (1946), "each individual wore his own Highland dress with varied tartans, with the only uniform part being the ' [Hanoverian] ' cockade and large coloured cross on the bonnet."[\[482\]] A 1745 letter on the Jacobite troops at Culloden describes "all ye Forces as well Horse as foot were in Highland Dress except ye body Guards wh. wore Blue bound wth Red"; i.e., only the bodyguards were wearing a uniform, and it was not of Highland dress.[\[483\]] 

One of many tartan legends has it that the Highland-dress ban of the [Dress Act] was enacted because tartans were used as clan-identifying symbols or uniforms, but not a trace of this idea can be found in period sources. To the contrary, Burt (1727–37) was explicit that English objection to Highland dress (since perhaps 1703–04)[\[339\]] was _general_, because the garb served to distinguish the Highlanders as a people apart from the [Lowlanders] and other British (not distinguish Highlander from Highlander).[\[484\]] [\[bn\]] Defoe (c. 1720) likewise mocked Highland dress as what he saw as a clownish costume that set Highlanders apart from everyone else, not each other.[\[238\]] Similarly, in an account of Jacobite trials, it was asked whether defendants had worn "Highland cloaths" in general, with no mention of clan-identifying patterns.[\[485\]] Extant [MacDonald] tartan fragments from the Battle of Culloden do not match each other or any current clan tartan named MacDonald.[\[481\]] [Lord President] [Duncan Forbes of Culloden], keen on punishing the Jacobites with disarmament and other penalties, wrote a detailed letter laying out _pro_ and _con_ points (mostly _con_) regarding the proposed Highland-dress ban before Parliament passed it, yet never indicated anything like clan tartans, something that would have been a key argument to address.[\[486\]] 

C. C. P. Lawson (1967) raised a point of logic: "Remembering the continuous clan feuds and the consequent state of more or less perpetual hostilities, a recognisable clan plaid would have been a positive danger to the wearer outside his own territory."[\[487\]] This may explain why the handful of early apparent examples of groups of men in similar tartan seem to have the nature of militia uniforms and are mentioned in the context of " [fencible] " bodies or outright battle (possibly aside from the 1618 case).[\[bo\]] Lawson also states: " [The '45] supplies no evidence that tartans were used as clan insignia .... Relics of those tartans which were worn at Culloden or of the pre-1745 period bear no resemblance to any known modern tartan."[\[487\]] The [Lord Lyon King of Arms] in 1948, Sir [Francis James Grant], wrote that pre-1745 tartans were qualitatively different from those of the 19th century and later.[\[488\]] [Scottish United Services Museum] curator Dunbar (1979) notes this as well.[\[489\]] 

The Jacobite poets wrote much about the rousing appeal of Highland clans and Highland dress, even tartan specifically, but never mentioned clan tartans.[\[490\]] Similarly, multiple large volumes of traditional Highland folklore were collected and published by [John Francis Campbell] in 1860–62 (revised 1890–93), and [Alexander Carmichael] (who also collected tartan samples) in 1900, but the period materials in them are devoid of any recorded references to clan tartans[\[490\]] (despite post-dating the popularisation of the notion among city-dwellers and the upper class).

The idea of groups of men wearing the exact same tartan as an identifier is thought to originate (aside, again, from the odd 1618 case) from [Highland regiment] units in the 18th century, starting with the Black Watch in 1739/1749.[\[305\]] [\[225\]] According to Trevor-Roper (1983):[\[491\]] 

> \[I\]t was probably their use of it which gave birth to the idea of differentiating tartan by clans; for as the Highland regiments were multiplied ... so their tartan uniforms were differentiated; and when the wearing of tartan by civilians was resumed, and the romantic movement encouraged the cult of the clan, the same principle of differentiation was easily transferred from regiment to clan.

Particular regiments were often dominated by men raised from the same clan lands, and this may have blurred the line between regimental uniform and clan-identifying tartan. (And several tartans of extinct regiments survive today as clan tartans.) Newsome (2006) writes: "the practice of clans wearing these regimental tartans may have in fact been the inspiration for the 'clan tartan' system as we now know it."[\[225\]] Telfer Dunbar (1979), on the idea of the early [Independent Highland Companies] using distinct uniform tartans: "I feel sure that here we have much of the 'clan tartan' origin."[\[492\]] The end of the 18th and beginning of the 19th centuries brought an unprecedented level of influence of military clothing styles, including Highland regimental, on civilian attire (even for women), especially among the social elite connected to regiments.[\[351\]] Some regimental tartans appear to have been named after their commanding officers, and this may be how they came to be associated with family/clan names over time.[\[493\]] [\[379\]] Banks & de La Chapelle (2007):[\[334\]] 

> the notion of differentiation of tartan by clans might have evolved from this desire to distinguish on Highland regiment uniform from another. Certainly, its classification for military use laid the groundwork for many subsequent designs and the movement toward uniformity.

Scarlett (1990) also observed the connection to regional or "district" tartans:[\[256\]] 

> \[B\]asic patterns prevailed over wide areas and were modified by local weavers for their own ends. It can easily be seen that a local pattern of this kind, made for a captive clientele, might have become identified with the people of that locality who were themselves predominantly of one Clan or family group and its adherents and, when the belief grew up that Clan tartans had been worn since the beginning of time, have become, by retrospection, the Clan tartan of that group. There is no evidence that the Highlanders themselves looked on tartan in that light, however ....

Unknown Jacobite lady in Tullabardine tartan, c. 1740–1750, attributed to [Cosmo Alexander] 

Haswell Miller (1956) similarly noted: "We can ... readily accept that certain dyes would prevail in different regions and that traditional types of pattern might be followed in various parts."[\[409\]] [Martin Martin] in 1703 had described tartans as being identifiably specific to particular regions, but not clans.[\[255\]] There are numerous cases of tartans loosely associated with districts later becoming clan tartans. The best-documented case[\[7\]] is the [Tullibardine] pattern, one of the few modern clan tartans that can be traced (at all, not as a clan tartan) to the pre-proscription period.[\[327\]] [\[7\]] It was long associated with [Perthshire], and later adopted as the [Murray] of Tullibardine clan tartan, but sold by Wilsons as simply "Tullibardine" as late as c. 1830–40, and it was found for sale in a market by W. & A. Smith around 1850, who also said it was worn then by [Charles Murray, Earl of Dunmore];[\[327\]] the first record of the pattern as "Murray of Tullibardine" is in their 1850 book.[\[7\]] It appears in at least five early portraits; four date to c. 1740–1750, the first of an unknown female sitter attributed to [Cosmo Alexander],[\[bp\]] and three by [Allan Ramsay] (with the cloth painting completed by [Joseph van Aken]) which are not of any known Murrays (but of [a Campbell], [a MacLeod], and [a MacDonald]).[\[327\]] It is not until 1770 that a known Murray is painted wearing it ( [John Murray, Earl of Dunmore], by [Joshua Reynolds]),[\[327\]] which still does not necessarily make it a "clan tartan" at that early a date ("evidence for its historic use by that branch \[of Clan Murray\] is circumstantial at best").[\[7\]] The oldest version of it differs slightly as to colours and sett from the modern clan version.[\[7\]] [\[bq\]] 

Similarly, according to the [Scottish Register of Tartans], [the district tartan] for [Huntly],[\[315\]] originating in more complex form as [the personal tartan] of a Marchioness of Huntly[\[495\]] (probably [Henrietta Mordaunt]), was in use as a regional tartan since at least "the '45", and worn at Culloden by clansmen of [Brodie], [Forbes], [Gordon], [MacRae], [Munro], and [Ross], "which gives a strong indication of the greater antiquity of the 'District' setts compared to the Clan tartans."[\[495\]] 

Some surviving early records of tartan manufacture are those of the Orphan Hospital Manufactory and Paul's Work, in Edinburgh, for the period 1734–37 and 1751–52; tartans were not named but given numeric designations such as "No. 2nd".[\[169\]] In 1745, the _[Caledonian Mercury] _ of Edinburgh carried an advertisement for a "Great Choice of Tartans, the newest Patterns" – not clan or even district tartans, but newly devised ones, suggesting a fashion market driven by novelty not supposed "heraldic" traditions. Even clan-tartans booster D. W. Stewart (1893) conceded: "This advertisement, it may be urged, is a stumbling-block in the way of those who argue for the antiquity of clan patterns; for it seems peculiar that, when the city was filled with Highlanders of all ranks and many clans, they should be offered not their ancient setts ...."[\[496\]] Other advertisements for tartan from 1745 to the early 19th century did not mention clans, or focus on the patterns at all, but rather on the forms in which the cloth could be ordered.[\[497\]] Even immediately after the repeal of the Dress Act in 1782, the demand was for "latest patterns and bright colours",[\[498\]] with no hint of a family heraldry aspect.

William Wilson & Son of Bannockburn, just south of the dividing line between the Highlands and Lowlands,[\[499\]] were the first large-scale commercial tartan producers;[\[105\]] founded c. 1765,[\[9\]] they had become the foremost supplier of tartan to the military by around 1770, and the dominant tartan weaver in general.[\[340\]] It was an endeavor that required the introduction of tartan recording, of standardisation of setts and dyes, and of consistency and quality control.[\[9\]] [\[500\]] [\[499\]] Wilsons corresponded with their agents (especially the son, James Wilson)[\[102\]] in the Highlands to get information and samples of cloth from the various districts to enable them to reproduce "perfectly genuine patterns". Wilsons recorded over 200 setts in addition to ones they designed in-house, collected in their 1819 _Key Pattern Book_ of around 250 setts[\[9\]] (among earlier in-house volumes to the 1770s). These tartans were numbered, named after places, or given fanciful names such as " [Rob Roy] ", later sometimes family names (after prominent members), sometimes foreign names like "Coburg", but usually not those of clans,[\[275\]] [\[399\]] [\[501\]] [\[141\]] nor, when they did, often matching present clan patterns.[\[502\]] [\[br\]] A large proportion of the modern clan tartans, however, _can_ be traced to this work – just often originally with numbers or unrelated names.[\[399\]] The evidence of direct adoption from Wilsons happening frequently completely overwhelms "ancient clan tartans" sentiment.[\[bs\]] 

The _Scottish National Dictionary_, in providing an unusually discursive definition of _tartan_, includes: "\[T\]owards the end of the 18th century and largely through the enterprise of Messrs Wilson, weavers in Bannockburn, a series of tartans, each ascribed to a certain clan, was devised and is now accepted as authoritative, though almost entirely unhistorical."[\[515\]] Analysing the direct and strong influence of Wilsons' _Key Pattern Book_ ( _KPB_) on the later adoption of clan tartans (see next section), Eslea MacDonald (2012) concluded:[\[516\]] 

> Some of the 1819 _KPB_ setts no longer retain their original names, others were altered or were the basis for a number of variations which were named or simply numbered .... Whatever their origins, these patterns gave rise to the idea of clan tartans as we know them today. In a very few cases a pattern's origins may have indeed been a lot older than the 1819 _KPB_ but their contemporary names were almost always the work of Wilsons or subsequent writers.

The Cockburn Collection of 56 tartan samples (some of them duplicates) was put together between 1810 and c. 1825 (most likely 1816–25)[\[517\]] [\[399\]] by Lt.-Gen. Sir William Cockburn, and is now in the [Mitchell Library] in [Glasgow].[\[399\]] [\[518\]] This collection does ascribe particular family names to many of these setts (probably naming them after prominent individuals),[\[517\]] but only sometimes corresponding to current clan tartan associations (indeed, some patterns that are today associated with particular clans were given multiple different names in the Cockburn Collection).[\[bt\]] There are many conflicts in name-to-pattern associations between this collection and that of the [Highland Society of London] around the same time.[\[517\]] 

Even [David Stewart of Garth], who was to become one of the chief proponents of the idea of clan tartans, observed in 1814 only that various heads of families seemed to have selected personal tartans and that there were also district tartans.[\[520\]] When Garth and his Highland Society of London solicited clan tartans from chiefs in 1815 (see below), Col. Alexander Robertson of Struan, Chief of [Clan Robertson/Donnachaidh/Duncan], wrote back:[\[521\]] 

> It does not appear to be appertained, either by tradition or by authentick history, that the different Clans in the Highlands of Scotland, wore any distinctive pattern or tartan. It is well known that they all had particular [Colours, or Standards], emblematical of some of their most honourable attachments, but as far as I have been able to discover, they wore no uniform Garb.

At the beginning of the 19th century, a letter from an Inverness tailor to Wilsons of Bannockburn requested fine tartan cloth to be used for women's clothing, because the fashion was to wear husbands' regimental tartans (not clan tartans).[\[359\]] In 1829, responding negatively to the idea of Lowland and Borders "clans" wearing their own tartans, Sir [Walter Scott]  – who was instrumental in helping start the clan-tartans fervour in the first place – wrote "where had slept this universal custom that nowhere, unless in this MS. \[the draft _Vestiarium Scoticum_, published ultimately in 1842\] is it even heard of? ... I would rather suppose that the author had been some tartan-weaver zealous for his craft, who wished to extend the use of tartan over the whole kingdom."[\[522\]] Also in the same year, he wrote: "The idea of distinguishing the clans by their tartans is but a fashion of modern date in the Highlands themselves".[\[523\]] 

Another of the tartan legends has it that [Alexander Gordon, 4th Duke of Gordon], commissioned the design of a clan tartan based on Black Watch in 1793, kept one of three designs, then passed the other two on to [cadet branches] of the family.[\[524\]] This tale can be traced in unembellished form to 1793 records of weaver William Forsyth of Huntly which do not say this at all, only that Forsyth provided three potential designs for a _regiment_ tartan, with yellow over-checks in various configurations, of which the Duke selected no. 2 for the unit, the [92nd Gordon Highlanders].[\[386\]] 

Scarlett (1990) surmises that there must have been _some_ informal clan tartans – a confluence of district tartans that had become associated with particular families, and adoptions of regimental uniform tartans by them – by the late 18th century, otherwise there is no explanation for where Stewart of Garth got the idea.[\[525\]] Scottish United Services Museum curator Maj. I. H. Mackay Scobie (1942),[\[411\]] Haswell Miller (1947),[\[526\]] and Barnes & Allen (1956),[\[93\]] also zeroed in on this timeframe. Eslea MacDonald (2010-11) observes, for example, the Murrays using the common Tullibardine regional pattern in portraits and in bed hangings at their clan seat, [Blair Castle], 1770 – c. 1780 and possibly earlier.[\[327\]] Telfer Dunbar (1979), considering the 1703–04 Grant proclamation and the early regiments, suggests that "any uniformity of tartan was only to be found in an organised body of troops, or the 'tail' or following of a chief."[\[441\]] These possible comparatively early, informal clan tartans of the late-18th-century simply cannot usually be identified (when they survived) until the early 19th century.

### 19th century broad adoption

\[ [edit] \]

It has been suggested by a modern chief of [Clan Campbell] and another of the clan executives that the clan had informally adopted what is now known as [old Campbell or Black Watch] tartan by the early 19th century, because so many of their men were already wearing it as part of regimental uniform[\[417\]] (three of the Independent Highland Companies that amalgamated into the [Black Watch] regiment in 1739–1751 were Campbell units).[\[492\]] Some time in or after 1806, when he became clan chief, the city-dwelling politician [George Campbell, 6th Duke of Argyll], created his own personal tartan, of Black Watch with a thin over-check of white and yellow added,[\[527\]] "to differentiate himself from the rest of the Campbells", i.e. because they were already so often wearing Black Watch.[\[417\]] This essentially may have been one of the earliest attested surviving clan tartans (and the duke's variant was an early declared personal tartan of a noble).[\[bu\]] 

Maj.-Gen. [David Stewart of Garth], c. 1820, in [royal Stewart tartan] 

The idea arose among Scottish expatriates (especially in the [Celtic societies], which encouraged members to wear "appropriate" tartans),[\[528\]] eager to "preserve" Highland culture,[\[9\]] [\[528\]] [\[529\]] that tartans had traditionally been named and that the names represented clan affiliations.[\[9\]] Among them was Maj.-Gen. [David Stewart of Garth], a Black Watch veteran and vice-president of the [Highland Society of London] [\[530\]] (founded 1778).[\[399\]] He and fellow members Sir [John Sinclair] and [Andrew Robertson] were among the first proponents of the idea of clans being identified by tartans, despite the lack of evidence.[\[530\]] [\[531\]] [\[532\]] [\[517\]] [\[bv\]] The society also counted among its members the [Prince of Wales] [\[534\]] (the future [George IV], who was to become instrumental to clan "tartanry" in 1822) and two [dukes], among various itinerant actual Scots[\[535\]]  – including [James Macpherson] of " [Ossian] " fame (or infamy).[\[536\]] 

[Elizabeth Gordon (_née_ Brodie), Duchess of Gordon], c. 1813–1814 by [Alfred Edward Chalon]; she appears to be wearing Black Watch (42nd regiment) tartan, as it lacks the yellow over-check of 92nd Regiment, which became the Gordon clan tartan. This was only about a year before the Highland Society solicited clan patterns.

On 8 April 1815, the society resolved that the clan chiefs each "be respectfully solicited to furnish the Society with as much of the Tartan of his Lordship's Clan as will serve to Show the Pattern and to Authenticate the Same by Attaching Thereunto a Card bearing the Impression of his Lordship's Arms."[\[537\]] Many had no idea of what their tartan might be or whether they had one,[\[538\]] some provided only a vague description, and some claimed they had none.[\[534\]] But plenty were keen to comply and to provide authentic signed and [sealed] samples;[\[537\]] [\[534\]] [\[539\]] many (possibly most) turned to Wilsons of Bannockburn for a design,[\[528\]] [\[504\]] while some directly adopted a regimental tartan as their own,[\[399\]] [\[9\]] [\[bw\]] and still others adapted designs from old portraits of clan nobles.[\[534\]] [\[bx\]] [Alexander Wentworth Macdonald], Baron [Macdonald], wrote back to the society: "Being really ignorant of what is exactly The Macdonald Tartan, I request you will have the goodness to exert every Means in your power to Obtain a perfectly genuine Pattern, Such as Will Warrant me in Authenticating it with my Arms."[\[537\]] Finding no agreement within his clan on a pattern, Robertson of Struan ended up adopting the regimental tartan of the Loyal Clan Donnachie (Robertson) Volunteers; being based on the Black Watch pattern, it could not pre-date the late 18th century.[\[542\]] On the other hand, [Sir John Macgregor Murray] of [Clan Gregor], who had spent most of his life in England and India, was writing instructions on the use of his clan's tartan by December 1818.[\[543\]] In 1819, Wilsons were engaged in correspondence to "send ... specimens of all coloured Tartans used by these Clans ...said to exceed thirty in number", to a writer in Italy preparing a book on clan tartans;[\[544\]] the same year, they also produced their _Key Pattern Book_ of over 200 tartans (representing only a fraction of their total tartan output, presumably the most marketable designs, and not always under the same names as found in contemporary collections of Wilsons' tartan samples such as the Cockburn collection and that of the Highland Society).[\[545\]] 

According to Trevor-Roper (1983), Wilsons were in a direct "alliance" with the Highland Society of London by 1819; the former saw a great marketing opportunity, and the latter provided a veneer of respectability as Wilsons helped the society pin tartans to clan names.[\[546\]] [\[547\]] Banks & de La Chapelle (2007) concur: "The Wilson firm worked in tandem with the Highland Society, preparing tartan samples for the latter to certify as belonging to one clan or another."[\[502\]] Clan nobles (who sometimes contradicted each other, within the same clan, on what their tartan was or should be)[\[548\]] were apparently also "ready to adopt changes at the mere dictation of fancy" to improve designs.[\[549\]] From the "authentications" they received 1815–26, the society built up a clan-tartan collection (now in the [National Museum of Scotland]), with 34 authenticated specimens and about 40 others.[\[534\]] [\[550\]] [\[by\]] Other such societies generated more interest, belief, and demand.[\[551\]] According to the analysis by Eslea MacDonald (2012), "Most of the pieces sealed \[by clan chiefs\] and deposited with the Society at that time were patterns woven, and in the majority of cases appear to have been designed, by Wilsons. This obviously means they could not have existed prior to c1765 when William Wilson started his business."[\[542\]] So many of Wilsons' stock tartans from their _Key Pattern Book_ of 1819 were being renamed for clans that J. C. Thompson (1992) wrote: "Clearly the naming of tartans was just getting started in 1819",[\[141\]] and: "There was nothing people wanted more than an ancient clan tartan system, and they were determined to have one."[\[552\]] By 1821, advertisements for tartan cloth had shifted to include language like "true", "warranted", and "original", and began to stress antiquity and family connections.[\[553\]] 

The 1822 [visit of George IV to Scotland], in Highland garb and with a great deal of tartan-festooned public ceremony (arranged by Stewart of Garth and [romanticist] writer Sir [Walter Scott] of the [Celtic Society of Edinburgh]), had a profound tartan-boosting effect, including the invention of new clan-specific tartans to suit[\[554\]] [\[399\]] [\[555\]] (or renaming of old tartans to have clan names),[\[9\]] [\[556\]] as clan chiefs had been asked to attend in clan tartans.[\[9\]] It caused a boom in the tartan-weaving business,[\[503\]] and a broader public notion that tartans should be named for families.[\[399\]] [\[9\]] "When these two \[Scott and Stewart of Garth\] stage-managed the King's visit ... they fixed the Clan Tartan idea in the public mind."[\[556\]] Wilsons' pattern book in 1822 had expanded significantly with tartans named for clans, in addition to all their numbered setts.[\[514\]] According to R. Martin (1988), Wilsons and other weavers were made aware of the king's planned visit three or four years in advance, and had all that time to pad their catalogues with additional designs and to assign clan names to patterns often "probably picked entirely out of the air."[\[557\]] He added that "anyone looking at the tartan pattern books of 1819 to 1822 would have realized the cacophony of different names for the same \[pattern\], the chaos of clan attributions, and the complete capriciousness of that association."[\[558\]] A telling letter from a tailor, archived among the Wilsons papers, to the company in 1822 asked: "Please send me a piece of [Rose] tartan, and if there isn't one, please send me a different pattern and call it Rose."[\[505\]] 

By 1824, an invitation to the [Atholl Gathering], one of the earliest of the modern [Highland games] festivals, made it clear that participants should arrive "in the plaids or Tartans of their Clans".[\[559\]] In 1829, Sir [Thomas Dick Lauder] complained to Walter Scott about all the "uncouth, spurious, modern \[tartans\] which are every day manufactured, christened after particular names, and worn as genuine",[\[560\]] and also of "clans ... at this moment ignorantly disputing for the right to the same tartans which in fact belong to none of them but are merely modern inventions for clothing Regimental Highlanders".[\[561\]] Scott himself was backpedalling away from what he had helped create, and was suspicious of the recent claims about "ancient" clan tartans: "it has been the bane of Scottish literature and disgrace of her antiquities, that we have manifested an eager propensity to believe without inquiry and propagate the errors which we adopt too hastily ourselves."[\[562\]] 

The [Scott] tartan invented by the "Sobieski Stuarts" around 1829, eventually published in the 1842 _Vestiarium_. Based on the c. 1819 [MacGregor], the tartan was rejected (along with other Lowland family tartans) by [Walter Scott], but remains the most popular Scott tartan.[\[563\]] 

A wave of highly dubious books were published, all purporting to reveal true clan histories and tartans; they presented little in the way of evidence, but they caused enthusiastic adoption of clan tartans. The first of these, in 1831, was _The Scottish Gaël or Celtic Manners, as Preserved Among the Highlanders_ by [James Logan], containing 54 tartans (based on Wilsons' collection, that of the Highland Society of London, and other sources he alleged but did not name, plus some he collected or devised himself); the author ignored advice from Wilsons on which were actually old tartans, and included some erroneous, fictitious, and incomplete setts.[\[564\]] [\[565\]] [\[bz\]] He also included untenable assertions about the designs' antiquity; "Logan took the line that everything Highland was rooted impossibly far in the past", and was mocked in _[The Pall Mall Gazette] _ for it.[\[556\]] Meanwhile, Wilsons and other weavers simply adopted some patterns from his book due to demand,[\[566\]] [\[503\]] [\[567\]] and also took to inventing all-new "clan tartans" to keep up with the growing market for patterns associated with names.[\[568\]] The archived correspondence of Wilsons in the 1830s shows that the company was frequently pressured by merchants for the "truest" and "real" clan patterns.[\[569\]] Logan, despite himself being involved in sham clan tartanry, observed that "fanciful varieties of tartan ... were being passed off as genuine" by Wilsons and other weavers.[\[505\]] 

Logan was followed in 1842 by _[Vestiarium Scoticum] _ by the so-called [Sobieski Stuarts], purporting to contain 75 centuries-old clan tartans, illustrated in great detail but from vague textual descriptions.[\[570\]] Although it is now known to have been largely a forgery,[\[571\]] [\[572\]] [\[ca\]] many of the visual tartan designs in this "final – and fantastic – codification"[\[572\]] of clan tartans were nevertheless adopted and still survive as accepted tartans of clans,[\[576\]] [\[577\]] especially for Lowland clan names (which had hitherto never been associated with tartan or Highland garb at all).[\[399\]] [\[183\]] [\[578\]] [\[579\]] Starting in 1822, [Borders] families had been redefining themselves as clans, and the book encouraged more of them to take on clan tartans and open clan societies.[\[580\]] Modern critics have even praised the lasting socio-cultural accomplishement of the Sobieski-Stuarts' works in helping establish a systemic clan-tartans legend while recognizing the bogus nature of their material.[\[cb\]] 

(The socio-political background of these events and their overall impact on tartan in general are presented at [§ Late Georgian] and [§ Victorian], below.)

Trevor-Roper (1983) believed that the Sobieski Stuarts had been in direct communication with manufacturers like Wilsons, and were advising clan chiefs on which tartans to choose, from as early as 1819;[\[582\]] J. C. Thompson (1992) agreed.[\[552\]] Dick Lauder certainly said they were doing so by 1829, and that Wilsons were already weaving many Sobieski Stuart samples by that year;[\[583\]] the company's own records the same year confirm orders for designs from the Sobieski Stuarts.[\[584\]] _Vestiarium_ was followed soon after by _The Costume of the Clans_ published by the Sobieski Stuarts in 1845;[\[566\]] the illustrations it provided, allegedly based on portraits, have proven to be largely a mixture of error and invention.[\[585\]] By 1849, John Sobieski Stuart was in discussion with a publisher to produce a new, cheaper edition of _Vestiarium_, in a series of small volumes "so that it might be rendered as available as possible to manufacturers and the trades in general concerned in Tartan ... and it was for the\[ir\] advantage and use ... that I consented to the publication." The same letter also proposed binding the manufacturers by contract to produce tartans that conformed exactly to the Sobieski Stuarts' specifications.[\[586\]] 

Weavers like Wilsons were complicit, not passive, in the tartan boom. They had lost much of their military and export markets after major wars ended and colonies in the Americas and elsewhere had become more self-sufficient.[\[509\]] "The concept of differentiated clan tartans, newly popularized, was codified and developed by canny manufacturers .... Since the repeal of the \[Dress Act\], these tartan makers saw the prospect of a vast new market."[\[276\]] According to [Alastair Campbell of Airds]:[\[416\]] 

> One factor which has been decisive throughout the history of the development of the modern system \[of clan tartans\] has been the influence of the tartan manufacturers .... As with any marketing organisation it was important to maintain a steady flow of "new products", and every year new patterns were produced .... The idea of individual tartans providing a clan or family identity was a most attractive one, which was adopted enthusiastically by both wearer and seller alike.

" [Maclachlan] ", a romanticised Highland warrior image from Logan and McIan's _The Clans of the Scottish Highlands_, 1843

This heavy promotion for decades of the clan-tartans idea has been described as "inciting a rush to lay claim to the tartan to which one's family was 'entitled'".[\[587\]] Other 19th-century clan-tartan works followed.[\[576\]] Logan (by then president of the Highland Society of London)[\[582\]] returned, with illustrator [Robert Ranald McIan], with _The Clans of the Scottish Highlands_ in several volumes 1843–1849, which had inconsistently hand-coloured portraits of chiefs in clan tartans, which he stated were "acknowledged by the present chiefs and clans".[\[399\]] _The Clans of the Highlands of Scotland_ in 1850 by Thomas Smibert drew heavily on Wilsons' patterns and on Logan.[\[588\]] In the same year, _Authenticated Tartans of the Clans and Families of Scotland_ by William & Andrew Smith was based on trade sources such as Wilsons, competing mill Romanes & Paterson of Edinburgh, and army clothier George Hunter's pre-1822 collection of setts (and some consultation with historian [W. F. Skene]).[\[588\]] [\[421\]] Also in 1850, Gen. James Browne published _History of the Highlands and the Highland Clans_, another _Vestiarium_ knock-off.[\[582\]] 

In 1871, Gaelic folklorist and Highland dress fancier [John Francis Campbell] of Islay wrote in _Clan Tartans_:[\[589\]] 

> I have come to the conclusion that Sir Walter Scott and my friends the Editors of the _Vestiarium Scoticum_ and Scotch Manufacturers of tartans are together responsible for the present flourishing and luxuriant crop of brilliant clan tartans .... I do not believe that the distinctions which are now made as to Clan Tartans ever prevailed at all, till Tartan became an important manufacture in Scotland in the reign of George the 4th

J. Claude produced the tartan pattern sample book _Clans Originaux_ in Paris c. 1880, and some tartans were adopted from it,[\[cc\]] though its 185 samples were mostly of already-known tartans.[\[19\]] A second edition of _The Costume of the Clans_ was published in 1892.[\[592\]] Another influential book was Donald W. Stewart's _Old & Rare Scottish Tartans_ (1893), which included swatches of fabric; several accepted clan tartans date to this work.[\[593\]] 

Books of this era also introduced lists of alleged clan [septs], families of different surnames (often of English, [Norman], or other non-Gaelic derivation) supposedly linked to particular clans as "extended family". It was a means of greatly increasing tartan sales by attaching many more names to extant tartan designs, but not well-grounded in any historical reality.[\[594\]] [\[595\]] [\[412\]] Two such works, both published by W. & A. K. Johnston were: _Tartans of the Clans and Septs of Scotland_ by James Grant in 1886, revised by Henry Whyte in 1906 in more of a picture-book format (three tartans make their first appearance in the 1886 edition,[\[596\]] and various more in the 1906 version, with no provenance);[\[597\]] and _What Is My Tartan? or, The Clans of Scotland, with Their Septs and Dependents_ by Adam Frank in 1896.[\[399\]] 

The romanticised notion of clan tartans had become deeply embedded in the Scottish imagination and further afield.[\[183\]] [\[598\]] "\[I\]t all got mixed up in the public mind and the myth of tartan as a kind of [heraldry] became established, not only in the eyes of outsiders, even the Clansfolk believed it".[\[599\]] On the cusp of the [Scottish Renaissance] and [Gaelic Revival], most clans (including major Lowland families) had been assigned and had generally accepted one or more tartans by the late 19th century.

### 20th century consolidation

\[ [edit] \]

Charles E. N. Leith Hay, 1905 portrait by [John Ernest Breun], in Edwardian daywear Highland dress, kilt in a dark rendition of the [Hay and Leith] tartan. Most clan tartans were settled by the turn of the 19th and 20th centuries.

The first [Edwardian] book on the subject (aside from a larger 1906 "library edition" of Whyte as _The Scottish Clans and Their Tartans with Notes_),[\[597\]] was Frank Adam's 1908 _The Clans, Septs & Regiments of the Scottish Highlands_, which remains in print today (though in drastically edited form, by Sir [Thomas Innes of Learney]).[\[399\]] [\[600\]] A variety of books, with colour plates, had been affordably and widely published about clan tartans by the mid-20th century. Three popular ones were _The Clans and Tartans of Scotland_ by Robert Bain, 1938 (the first to use photographic [halftone] prints; revised and updated many times through 1983);[\[601\]] _The Tartans of the Clans and Families of Scotland_ by Innes of Learney (later to become the [Lord Lyon King of Arms] as well as a founder of the [Scottish Tartans Society]),[\[602\]] 1938, advancing some clan-tartanry ideas his Lord Lyon predecessor Sir [Francis James Grant] considered "humbug";[\[600\]] and _The Scottish Clans & Their Tartans_ published by W. & A. K. Johnston, 1945 (later editions re-titled _The Scottish Tartans with Historical Sketches_, edited by Innes of Learney), and based on previous works by Grant and Whyte. Many others followed in successive decades.[\[399\]] 

400 clan and district tartan samples at the headquarters of the weaver Lochcarron of Scotland

The mass-market books (some with over 200 tartans illustrated) did much to cement the idea of clan tartans in the public imagination, as well as to consistently anchor particular tartans to particular clans. And the works were in more general agreement with one another than had been the Victorian "authorities".[\[cd\]] They also simultaneously increased the number of clans with their own assigned tartans, and reduced the number of tartans claimed to be those of certain clans to a more manageable number, probably after consultation with clan chiefs and clan society officers. They did, however, typically include sept lists, which today are widely regarded as bogus[\[594\]] (though many present-day clan associations still use them, as a means of attracting larger membership).

Almost every extant clan (with or without a chief) had at least one tartan associated with it by this era. Many clans have several well-accepted tartans. Sometimes they represent different branches of the family; e.g., there are separate tartans for [Campbell] of Breadalbane, Campbell of Cawdor, and Campbell of Loudoun, in addition to the general "old" Campbell tartan. In other cases, they are (at least ostensibly) for specific purposes such as hunting, mourning, formal dress occasions, or Highland dance competition; e.g., the [MacFarlane] dress[\[604\]] and hunting tartans[\[605\]] are different. (See [§ Tartans for specific purposes], below.)

An important, more scholarly work was 1950's _The Setts of the Scottish Tartans_ by Donald C. Stewart[\[606\]] [\[ce\]] (son of the aforementioned D. W. Stewart).[\[399\]] The younger Stewart has been hailed as "the founder of serious tartan research"; originated now-standard methods for indexing tartans; and would go on to help expose the _Vestiarium Scoticum_ as a fraud, in _Scotland's Forged Tartans_, co-authored with J. Charles Thompson in 1980.[\[399\]] [\[608\]] 

In the late 20th century to present, clan and other tartans also have been catalogued in [databases]. (See [§ Registration], below.) A small number of new official clan tartans (mostly specific-purpose "side" tartans, like dance tartans) were registered in tartan databases in the 21st century.[\[cf\]] 

Regarding modern misrepresentations of clan tartans on historical figures in films and even museums, Scarlett (1990) wrote: "so widely have the tartan myths been spread that any script- or guide-book writer will, in complete ignorance, write the most arrant nonsense and never think that it might not be true. ... Once false information has been disseminated by a supposedly authoritative body it is virtually impossible to correct it."[\[610\]] 

### Recognition by clan chiefs

\[ [edit] \]

The "officialness" of clan tartans has varied widely, and still does today. Although it is possible for anyone to create a tartan and assign it any name they wish, the only person with the authority to make a clan's tartan "official" is the chief.[\[399\]] 

Some clans [have had no chiefs] for some time, while only a majority subset of those with living chiefs in the modern era made direct proclamations as to their clan tartans and registered them with the [Lord Lyon].[\[cg\]] Some time after the launch of the [Scottish Register of Tartans] (SRT) in 2009, the Lord Lyon stopped recording clan tartans, deferring to SRT for this purpose. (See [§ Registration], below.) Some of the clan tartans were simply adopted by custom,[\[ch\]] and have remained rather consistent into the 21st century. A clan booth at a [Highland games] event is likely to proudly display at least their best-known clan tartan, regardless whether a chief has declared it official.

However, some chiefs have been quite adamant about what their clan's legitimate tartans are. Some time prior to 1890, [George Campbell, 8th Duke of Argyll], chief of [Clan Campbell], is said to have specified the main Campbell tartan, as distinct from that of the [Campbell of Cawdor] sett, after a portrait had depicted him in the latter and also supposedly at the prompting of the War Office, perhaps with regard to [Argyll and Sutherland Highlanders] uniforms.[\[613\]] [Ian Campbell, 12th Duke of Argyll], Clan Campbell chief in the late 20th century, excoriated attempts to claim there were other than the four aforementioned particular Campbell tartans (and specifically rejected the personal-variant tartan of the 6th Duke).[\[417\]] Similarly, [Sir Malcolm MacGregor], chief of [Clan Gregor], wrote in 2012 that only four MacGregor tartans (plus a newer dance tartan) are legitimate, out of 10 or more alleged ones found in a tartan database, which he blamed on "indiscriminate commercialisation ... disingenuous and lead\[ing\] to confusion".[\[614\]] 

In at least one instance, a clan tartan appears in the [coat of arms] of a clan chief and is considered by the Lord Lyon as the "proper" tartan of the clan: The crest of the chief of [Clan MacLennan] is _A demi-piper all [Proper], garbed in the proper tartan of the Clan Maclennan_.[\[615\]] [\[ci\]] 

Some chief-authenticated clan tartans are quite late arrivals. In 1961, the [Clan Davidson] main tartan was replaced (and registered with the Lord Lyon) by one of multiple disputed chiefs, Sir David Davidson of Allt Dinnie, with a design dating to 1893, in place of an older white-striped version.[\[616\]] Chief [Charles Shaw of Tordarroch] in 1971 replaced the old [Shaw] tartan (a Black Watch variant based on a misprinted image in Logan & McIan (1847))[\[617\]] with a new pair (dress[\[618\]] and hunting)[\[619\]] designed in 1969 by D. C. Stewart based on more historical sources.[\[620\]] [Clan Mar] had no approved tartan until Chief [Margaret of Mar] registered one in 1978 (from a design that may pre-date 1850);[\[621\]] their dress/red tartan was not adopted until 1992 (from a design dating to the 18th century).[\[82\]] The [MacLeod] red tartan was approved by Chief [John MacLeod of MacLeod] in 1982, to join much longer-standing yellow and blue tartans of the clan; it was based loosely on what appears in a 1748 portrait of Chief [Norman MacLeod] by [Allan Ramsay] and [Joseph van Aken].[\[622\]] Baron [David Lumsden of Cushnie-Lumsden] in 1996 approved the [Clan Lumsden] hunting sett by Peter Eslea MacDonald[\[623\]] (though technically the baron was just the chieftain of the Cushnie-Lumsden branch). In 1998, Chief [Dugald MacTavish of Dunardry] approved a 1958 design as the [MacTavish] dress tartan.[\[624\]] In 2005, Chief Gillem Lumsden of that Ilk registered a new main [Lumsden] tartan with the Lord Lyon,[\[625\]] based closely on that of a c. 1790 Lumsden family waistcoat.[\[626\]] Also in 2005, a pattern for [Duncan of Sketraw] was approved by Chieftain John Duncan of Sketraw, based on a 1930s design.[\[627\]] In 2007, Chief [Fergus D. H. Macdowall of Garthland] designed the [Clan MacDowall] tartan (the clan previously used [MacDougall] or [Galloway] district); he registered it with the Lord Lyon and [Scottish Tartans Authority] in 2008.[\[628\]] [\[629\]] The [Cochrane] hunting tartan was designed personally by Chief [Iain A. D. B. Cochrane, Earl of Dundonald], in 2008.[\[630\]] The [Clan Carruthers] tartan was approved by Chief Simon Peter Carruthers of Holmains in 2017.[\[631\]] 

Aside from regimental and clan usage, tartan has seen broad (and sometimes highly politicised) use by the general public in the modern era. By the 19th century, the Highland [romantic revival], inspired by [James Macpherson] 's " [Ossian] " poems and the writings of Sir [Walter Scott], led to wider interest in tartan and other things felt to be [Gaelic] and [Celtic]. Clubs like the [Celtic societies] welcomed [Lowlanders], and tartan was rapidly [appropriated] [\[557\]] as part of the [Scottish national identity] [\[632\]] [\[633\]] (and part of broader British dress as a familiar [exoticism]).[\[634\]] [\[635\]] 

"The New Fashion, or The Scotsman in Paris", from a series of Parisian fashion prints, 1815

The period of widened public interest in tartan and Highland dress after the repeal of the Dress Act in 1782 has been called the **Highland Revival**.[\[32\]] [\[cj\]] While tartan had already seen more nationwide use from 1707, as a [Scottish nationalism] symbol against [union with England],[\[281\]] it was turned on its ear to become a romanticised symbol of union loyalism in the early 19th century,[\[183\]] [\[637\]] an era in which prominent conflicts caused a patriotic influence of military (including Highland) style on civilian clothing,[\[ck\]] even among women[\[351\]] [\[640\]] despite its overtly masculine focus.[\[641\]] [\[642\]] [\[176\]] First among the northern gentry and later among the common people more broadly, there was a renewed interest in tartan and Highland dress, despite the long period of prohibition – largely due to the glory associated with the Highland regiments' exemplary service in various military campaigns.[\[401\]] "Highlandism"[\[643\]] became a romantic, mythologised (even fictionalised) and colourful [escapism] [\[644\]] [\[349\]] even as Lowland Scotland itself was becoming one of the most [industrialised] places on earth, and the entire nation was undergoing the social upheavals of union and [empire], of large-scale warfare, of urbanisation, and of modernisation during the [Scottish Enlightenment].[\[645\]] The bloody [French Revolution] of 1789–1799 had also helped inspire a British setting aside of old [Stuart] and [Hanoverian] rivalry.[\[646\]] 

Before the clan tartans rush began in 1815, tartan was already being aggressively marketed to the general public as "fancy" cloth with names that commemorated famous events and people, even fictional characters from books and songs, e.g. " [Waterloo] ", " [Flora MacDonald] ", "Sir Walter Scott", " [Wellington] ", " [Maggie Lauder] ", and " [Meg Merrilees] ". This inspired a novel perception that tartans should be named.[\[647\]] Some of the designs by leading weaver Wilsons of Bannockburn by this period were considered recognisable on sight.[\[648\]] 

In 1822, Maj.-Gen. [David Stewart of Garth], who was with both the [Highland Society of London] and the [Celtic Society of Edinburgh],[\[649\]] [\[532\]] published _Sketches of the Character, Manners, and Present State of the Highlanders of Scotland_, the first of a number of 19th-century books lionising the Highlanders, the clans, and the tartaned regiments.[\[641\]] [\[539\]] The various Celtic/Highland societies throughout Britain had already been driving a rise in tartan demand since the late 18th century.[\[650\]] [\[375\]] The societies liked wearing Highland dress – in their own assimilated, urban idiom,[\[cl\]] such as tartan [frock coats] [\[355\]]  – and devising new tartans; it has been suggested that they were engaging in a sort of "internal colonisation", imposing what they wanted to see rather than simply recording what was traditionally Highland.[\[651\]] Aside from tartan fabric's increasing use in non-Highland styles of clothing, Highland dress itself had already become highly stylised, quite removed from the simplicity of its peasant origins;[\[652\]] this was a trend that would continue throughout the later Victorian period.

#### The King's jaunt in tartan

\[ [edit] \]

_[George IV in Highland Dress] _. [David Wilkie] 's idealised depiction of [the king], in full [Highland regalia], during his visit to Scotland in 1822[\[cm\]] 

The popularity of tartan was greatly increased by the royal [visit of King George IV] of the United Kingdom to Edinburgh in 1822, with other nobles including [Lord Mayor of London] Sir [William Curtis],[\[654\]] in Highland garb. George was the first reigning monarch to visit Scotland in 171 years.[\[554\]] The pageantry invented for the event, which was nicknamed "the King's Jaunt", brought a sudden consumer-driven demand for tartan cloth[\[503\]] and made it the [national dress] of the whole of Scotland.[\[554\]] [\[655\]] [\[632\]] [\[656\]] The 21 days of festivities were organised by the Jacobitism-romanticising but staunchly unionist[\[183\]] Walter Scott, who was another co-founder of the Celtic Society of Edinburgh, and military officer [David Stewart of Garth].[\[530\]] They urged Scots (most of whom were Lowlanders) to attend "all plaided and plumed in their tartan array"[\[657\]] in "complete national costume".[\[655\]] One contemporary writer sarcastically described the pomp that surrounded the celebrations as "Sir Walter's Celtified Pageantry",[\[657\]] [\[658\]] and another as a "plaided panorama".[\[655\]] Clan chiefs, expected to be kilted, had little choice but to take the event seriously, and arrived to show their loyalty in something of a panic, with tartaned retinues of half a dozen[\[276\]] to up to 50 per clan[\[659\]] (equipped at great expense, and with only about a month's official notice), in a city overflowing with Highlanders, Lowlanders, and English spectators decked in tartan,[\[660\]] [\[504\]] a sight that Scott's own son-in-law and biographer [John Gibson Lockhart] called a "Celtic hallucination".[\[502\]] Thousands of spectators attended the many events arranged for the visit.[\[276\]] The formal ball, reserved for the gentry, required Highland dress for admittance, and some 300 tailors were employed to supply it.[\[659\]] 

The royal endorsement of tartan and Highland-wear did much to erase any lingering association of them with the servile peasant class of the Highlands[\[661\]] (or the region's bands of mountain bandits, for that matter).[\[659\]] Because Scott had become "the acknowledged preserver of Scotland's past" through his historical novels, the legend he helped create of tartan and Highland dress as a Scotland-wide tradition rooted in antiquity was widely and quickly accepted, despite its ignoring and erasing of cultural diversity within the country[\[652\]] (of Gaels, [Norse–Gaels], [Scoto-Normans], and Lowlanders of largely [Anglo-Saxon] extraction). "A bogus tartan caricature of \[Scotland\] had been drawn and accepted, even by those who mocked it, and it would develop in perspective and colour."[\[662\]] George IV's visit – which was not just theatrical but thoroughly political, in marrying Hanoverian power and loyalty to Stuart ideology and pride[\[663\]]  – has been described in by [Angus Calder] (1994) as the catalyst by which "a Union of practical convenience became a Union of irrational love and fears, sublimated in militarism, tartanry, royalism and, eventually imperialism".[\[664\]] R. Martin (1988) added: "it would seem that this visit presages the acts of orchestrated political propaganda that we have come to know very well in the 20th century."[\[557\]] 

Portrait of [John Crichton-Stuart, 2nd Marquess of Bute], by [Henry Raeburn], c. 1829, showing adaptation of tartan to [Regency-era] clothing styles, like this red-lined cloak

Following the royal visit, the tartan industry boomed,[\[665\]] and the number of available tartans increased tenfold;[\[666\]] in 1822, Wilsons' pattern book had numbered setts in the hundreds, and introduced many more with proper names.[\[514\]] Scarlett (1990) writes that "Tartan was no longer the dress of northern barbarians or political dissidents; it had become respectable and the garb of loyal subjects."[\[667\]] Books which documented tartans began to appear and added to the " [tartanry] " craze. [James Logan] 's romanticised work[\[567\]] _The Scottish Gaël_ (1831) was the first such publication, and led the weaving industry to adopt new patterns, even Logan's invented or erroneous ones.[\[566\]] 

The result of these flurries of attention has been described as an "astonishing frenzy of excitement into which \[patronage of tartanry\] threw the citizens of Edinburgh and much of the rest of Scotland".[\[668\]] 

From the 1820s, Georgian and then Victorian portraiture of clan nobles continued the earlier theme of regimentally re-styled Highland dress, with jewels, gold, and other symbols of aristocracy – a "synthetic Gaelicism".[\[669\]] The funerals of [Sir John Macgregor Murray] and [Alasdair Ranaldson Macdonell of Glengarry], in 1822 and 1823 respectively, were marked by tartan, [bagpipes], and "wailing" of clansmen – "a feudal sight in an increasingly industrial age".[\[670\]] A large public tartan affair was the 1824 [Atholl Gathering] [\[559\]] (an annual event that, after a period of abeyance, continues to the present). From the end of proscription through the Georgian promotion, "distrust of the Highlands became fascination",[\[671\]] and tartan and Highland garb "moved from the periphery to the very center, accompanied by all the processes of forgetting and imaginative re-creation".[\[672\]] Tartan, no longer the everyday traditional dress of Highland "barbarians", had become, in altered form, all the rage among the Scottish upper and even middle classes as formal attire.[\[673\]] This popularisation of tartan increased its marketability in the Lowlands, in England, and in the colonies, and provided a boost to the Scottish textile industry.[\[342\]] 

French tartan fashions from _Costumes Parisiens_, 1826

Tartan had begun making appearances in civilian Georgian fashion throughout Britain and into continental Europe, as illustrated in publications such as London's _[Gallery of Fashion] _ (1787) and _[La Belle Assemblée] _ (1808), and (after Paris was famously occupied by Highland regiments during the [Waterloo campaign] and the fall of [Napoleon] in 1815)[\[674\]] [\[641\]] [\[675\]] in the French periodicals _Le Prétexte_ (1815)[\[676\]] and _Costumes Parisiens_ (1826); tartan was in vogue in Paris in particular in this period,[\[677\]] [\[678\]] and approximations of Highland soldiers even appeared in Parisian plays at the time.[\[679\]] Tartans associated with family names became popular, but there was also a brisk trade in new tartans commissioned for societies, to commemorate events, in honour of famous persons, and designed simply to personal aesthetic taste.[\[680\]] Manufacturers struggled to keep up with demand.[\[681\]] [\[632\]] By 1819, dominant tartan weaver Wilsons of Bannockburn[\[340\]] (also a carpet and ribbon weaver)[\[682\]] was keenly interested in exploiting the civilian market, due to a reduction in regimental demand, and introduced many more patterns, providing cloth in various grades.[\[683\]] By 1820, the company had access to 132 [looms];[\[684\]] they experienced a four-fold increase in output in 1821, leading up to George IV's visit,[\[655\]] after which they acquired 40 more looms[\[505\]] [\[666\]] in an add-on building,[\[681\]] named the Royal George after the king,[\[505\]] and expanded into a new mill in 1822, [mechanising] more and more to keep up with demand.[\[684\]] They stopped weaving [muslin] to focus on tartan,[\[681\]] and produced it in a range of qualities from finest [merino] wool to cheap [linsey-woolsey] blends, demonstrating that whatever high-class associations tartan had taken on, there was significant working-class demand.[\[685\]] In 1829, a merchant wrote to Wilsons that "We are like to be torn to pieces for tartan; the demand is so great we cannot supply our customers", and there was great demand for the newest patterns.[\[569\]] 

Illustration of Victorian women weaving at power looms in a textile factory (this one in Denmark, but the scene in Wilsons of Bannockburn at its peak would have been very similar).

Georgian and later Victorian entrepreneurs not only created new tartans, but new tartan objects called _tartanware_, starting as far back as the proscription period in the form of wine glasses decorated with tartan and enamel Jacobite portraits.[\[313\]] Tartan decorated an assortment of common household objects, such as [snuffboxes], jewellery cases, tableware, sewing accessories, desk items, and even doorknobs and furniture – a tartan knick-knack market for tourists that continues through the present in the Highlands.[\[686\]] Visitors to the Highlands went home with tartanware, and Scotland-based businesses sent tartanware out as gifts to customers. Some of the more popular tartans used were the [Stewart], [MacDonald], [MacGregor], [MacDuff], [MacBeth], and one fancifully named "Prince Charlie".[\[687\]] [\[688\]] [\[689\]] Today, tartanware is widely collected in England and Scotland.[\[690\]] There was a symbiotic relationship between tartanware production and interest in tartans generated by books on the subject: a tartanware manufacturer from 1820 onward was W. & A. Smith, of [Mauchline], also incidentally the publishers of _Authenticated Tartans of the Clans and Families of Scotland_ (1850).;[\[691\]] tartanware was sometimes more specifically called Mauchlinware.[\[692\]] 

Leading up to the beginning of [Queen Victoria] 's reign in 1837, tartan was a brisk trade in London, Manchester, and other English cities and towns.[\[693\]] In 1839, the [Eglinton Tournament], a [medieval re-enactment] featuring [jousting] and a [ball], was organised in [North Ayrshire] by [Archibald Montgomerie, Earl of Eglinton]; it drew some 100,000 spectators, who had been asked to attend in plaids, and included [George Murray, Duke of Atholl], arriving with an entire regiment in tartan, his newly re-formed [Atholl Highlanders] [\[694\]] (which still exists as Europe's last remaining private military force).

_Scene in the Highlands with Portraits of the Duchess of Bedford and Duke of Gordon_ (in various tartans), by [Edwin Landseer], 1825\. The [Highlands were being cleared] of native people, for deer hunting preserves and sheep pastures

#### _Vestiarium Scoticum_

\[ [edit] \]

The first publication showing colour plates of an array of tartans was the _[Vestiarium Scoticum] _ (meaning 'wardrobe of the Scots'), published in 1842,[\[576\]] and it included a first: tartans for Lowland families. It was the work of two brothers: John Carter Allen and Charles Manning Allen, from [Surrey], England, who used a variety of assumed names. The two implied they were grandsons of Prince [Charles Edward Stuart] and [Princess Louise of Stolberg-Gedern], and consequently later became known as the " [Sobieski Stuarts] ". They claimed further that the _Vestiarium_ was based on a 1571 manuscript on clan tartans – a manuscript which they never managed to produce. It was not known at the time, but many of the tartans were simply invented by the brothers, and others were taken from early-19th-century sources like the Cockburn and Wilson collections.[\[695\]] [\[399\]] The brothers heavily favoured basic checks, or crudely divided checks, with thin over-checks added; they had an identifiable style of tartans, assessment of which has varied from "few can be called inspired"[\[50\]] to "quite novel and singularly gorgeous".[\[696\]] The _Vestiarium_ was followed by their equally dubious _The Costume of the Clans_ in 1845.[\[566\]] The books, which "added mystery, romance and some spurious historical documentation to the subject",[\[572\]] triggered another wave of interest in tartans, and the enthusiasm generated by these publications led the way for numerous tartan books in the later 19th century.[\[657\]] [\[576\]] [\[cn\]] 

The sudden availability (and almost unquestioning acceptance) of Lowland tartans helped spread tartan further in popularity. "The \[tartan\] cult was gathering strength and tartan was no longer 'Highland', it had become 'Scottish'."[\[599\]] 

#### The Queen and "Balmorality"

\[ [edit] \]

A silk and velvet late-Victorian young woman's tartan dress, 1878, probably made in England

Twenty years after her uncle's royal visit to Scotland, Victoria and her husband [Prince Albert] made their first trip to the Scottish Highlands in 1842; she was the first monarch to set foot in the Highlands since [Mary, Queen of Scots], in the 16th century.[\[697\]] The visit involved her large royal party being met with several theatrical tartan-kilted welcomes by Highland nobility and their retinues, with much sycophantic newspaper fanfare (while the common people were experiencing considerable misery); the Queen wrote: "It seemed as if a great chieftain in olden feudal times was receiving his sovereign".[\[698\]] The monarch's early trips to Scotland were seen as a royal endorsement and had a transformative effect on the image of the country, as a now-loyal land of tartan, [pipers], and kilted martial display.[\[699\]] 

Victoria and Albert leased [Balmoral Castle], in [Aberdeenshire], in 1848 (and bought it in 1852)[\[700\]] as a private royal [demesne] and hired a local architect to re-model the estate in feudalised [Scots baronial] style, starting a "sham-castles" trend.[\[701\]] Prince Albert personally took care of the interior design, where he made great use of tartan. He used the royal Stewart (red) and the hunting Stewart (green) tartans for carpets, while using the dress Stewart (red and white) for curtains and upholstery.[\[577\]] Prince Albert (who often wore the kilt at Balmoral) is said to have created the Balmoral tartan, still used as a royal tartan today.[\[702\]] (See illustration at [§ Family and individual], below.) They even decorated their [carriage] with tartan.[\[703\]] [\[704\]] Their adoption of a showy form of Highland dress inspired adoption by subject "who would have previously left Highland dress to the festivals of the Scots."[\[692\]] 

The royal couple spent a considerable amount of time at their Scottish estate (nearly 7 years in total),[\[705\]] [\[co\]] and in doing so hosted "Highland" activities. Victoria was attended by pipers,[\[707\]] and her children were attired in Highland dress.[\[708\]] Prince Albert himself loved watching the [Highland games] [\[709\]] and the pair became patrons of the [Braemar Gathering].[\[710\]] (Support from and attendance by various nobles may have helped preserve such events to the present, but it also "tartanised" them permanently, all the way into the 21st century.)[\[711\]] The royal enthusiasm for and patronage of Highland things generated more [early tourism] to the Highlands,[\[587\]] [\[712\]] [\[cp\]] and a boost to business in the region as far as [Perth] and [Edinburgh].[\[714\]] It also spread tartan-wearing to other northern British lords and ladies, who began to invent complicated etiquette rules of dress for Highland garb, which had the effect of increasing the sense that it was upper-class attire.[\[183\]] [\[715\]] [\[599\]] (See [§ Etiquette], below.) Adoption of tartan continued to spread into England; [Thomas Osborne, Duke of Leeds], in [West Yorkshire], devised a [livery] tartan for his men in 1848.[\[716\]] Tartan, though a "pseudo- [Caledonian] masquerade",[\[717\]] had become "the stuff of loyalty to the crown",[\[718\]] with "a spurious royal and aristocratic cachet".[\[719\]] This royal promotion was also noted abroad, with the effect that tartan became one of the widest-recognised [cultural-identity] symbols for the entire British country.[\[720\]] 

Despite their considerable devotion to charity (up to 20% of their [Privy Purse] income),[\[721\]] Victoria and Albert, along with their friends in the northern gentry, have been accused of using their "Balmorality" – a term coined by [George Scott-Moncrieff] (1932) to refer to upper-class appropriation of Highland cultural trappings, marked by "hypocrisy" and "false sentiment" – to trivialise and even fictionalise history.[\[722\]] According to Fiona K. Armstrong (2017), they engaged in long-term, tartan-blanketed escapism from the uncertainties of modernising, industrialised society and from pressing British societal problems, while worsening those problems in the actual Highlands.[\[722\]] The queen's Balmoral residency also had another detrimental effect on the Scottish Highlands; inspired by her residency, aristocrats who lived outside the Highlands began purchasing estates in the region, resulting in land-ownership disparities that persist into the present day.[\[723\]] The Highlands during Victoria's reign also became more accessible by road, rail, and boat.[\[709\]] 

A late Victorian style, this two-piece tartan suit dates to about 1875–1880

As the tartan and "romantic Highlands" craze swept over Scotland, the real Highland population suffered grievously from the [Hungry Forties] as well as the [Highland Clearances], when thousands of Gaelic-speaking Scots from the Highlands and Isles were evicted by landlords (often the very men who would have been their clan chiefs) to make way for sheep[\[657\]] [\[724\]] and for expansive deer-hunting preserves.[\[725\]] Scots were also largely disenfranchised from voting, and the Highlands were running out of young men, in great regimental demand to fight and die in foreign wars for the empire, and many emigrating otherwise,[\[726\]] with Victoria and Albert directly patronising [emigration societies].[\[727\]] Nearly 2 million Scots moved to non-European destinations during the Victorian era (more than half the native-born Scottish people of the period), and took a measure of Highlandism with them[\[728\]]  – "many of the generally understood images of the Highlands were held to be 'real' by people at the time".[\[729\]] This would have strong tartan-promoting results among the [Scottish diaspora] later;[\[730\]] Scarlett (1990) calls it a "tartan hunger that has been abroad from late Victorian times to the present day".[\[731\]] 

[Thomas Babington Macaulay] wrote in 1848 of the romantic reinvention of Highland customs as somehow _generally_ Scottish: "Soon the vulgar imagination was so completely occupied by [plaids], [targets], and [claymores], that, by most Englishmen, _Scotchman_ and _Highlander_ were regarded as synonymous words."[\[732\]] In 1849, Sir [John Graham Dalyell] asserted that "forty years ago no reputable gentleman would have appeared in a kilt in the streets of Edinburgh."[\[733\]] Scott-Moncrieff (1932) likewise wrote of tartans being "misconceived" and worn all over Scotland (and even England) in the Victorian era as a part of the Queen's influence.[\[734\]] Increasingly-urban Scotland was putting on a "rural face"[\[735\]] (a trend that would continue with " [kailyard] " literature). Tartanry and Highlandism were popular in part as a counter to a sense (especially among the aristocracy) that Scotland was losing its separate national identity in the Georgian to Victorian era, being ever more [Anglicised] as just "North Britain" amid empire-wide modernisation.[\[736\]] 

[Kenneth MacLeay] 's 1866 portrait of a [MacLachlan], a [Graham], a [MacFarlane], and a [Colquhoun], for Victoria's _Highlanders of Scotland_ book project.

In an 1849 letter to a publisher about a planned second edition of _Vestiarium Scoticum_, John Sobieski Stuart noted that tartan had become "extensively worn and manufactured" on the continent, as far away as France, Germany, [Bohemia], and Hungary; he also expressed an interest in working directly with tartanware and tartan book makers W. & A. Smith of Mauchline.[\[737\]] The same year, the Duke and [Duchess of Atholl] (whose entire estate was prescribed tartan livery)[\[738\]] hosted a Highland-dress affair in London, the [Royal Caledonian Ball], the first known charity ball (still a sold-out annual event today).[\[739\]] The 1859 opening of the massive [Loch Katrine] waterworks (to pump fresh water to [Glasgow], running out of [well] water) was attended by Queen Victoria, with the Atholl Highlanders (cannon in tow), the Celtic Society of Glasgow, and an [honour-guard] unit called the Glasgow Volunteers putting on a tartan- and piper-laden display for the newspapers; it was a confluence of modern engineering and romantic–patriotic tartanry.[\[740\]] When the Prince Consort died in 1861, Victoria commissioned a tartan-kilted statue of Albert at Balmoral by [William Theed].[\[741\]] 

According to Jonathan Faiers (2008), Victoria had actually intentionally made tartan more popular for the benefit of the British textile industry.[\[742\]] By the 1860s, tartan was not only as popular in London as in Scotland,[\[743\]] leading weaver Wilsons of Bannockburn produced £80,000 of product per year, and employed 500–600 people. (It amalgamated with another of the family businesses, a carpet-weaving operation, in 1867, which continued to 1924.)[\[275\]] Around 1860, new [synthetic aniline dyes] allowed for production of tartans in vivid colours at more affordable prices, and their lower cost translated into more consumption of tartan by the middle class.[\[707\]] 

The first permanent colour photograph, by [Thomas Sutton] in 1861, was of a tartan ribbon.

As modernisation marched on, the world's first permanent [colour photograph], taken by [Thomas Sutton] (using the three-colour process developed by Scottish physicist [James Clerk Maxwell]) in 1861, was of a tartan ribbon.[\[744\]] It was created by using red, blue, and yellow filters to create three photographs which were then combined into a composite. R. Martin (1988) notes that there was a confluence of unrelated technological "junctions and serendipities" in the mid-19th century that together broadly promoted tartan, including photography, consistently bright and more economical artificial dyes, affordable [colour book printing], mass-production of soft but durable fine textiles, and applicability of printed patterns to middle-class products like tartanware – all "far-removed from the true peasant history of tartan."[\[745\]] Ian Brown (2012), a professor with a focus on Scottish literature and culture, has written that while George IV and Victoria (not to mention business interests in their wake, like the Wilsons of Bannockburn and the Smiths of Mauchline) seemed to have been "the winner taking over the loser's tokens", the renewed public interest in tartan within and beyond Scotland was not entirely owing to them, especially given the international interest in Highland-romantic works of Walter Scott and "Ossian". The acceptance of and even enthusiasm for tartan among the post-proscription upper class can be seen as a necessary attempt at reconciliation within a culturally diverse country, and the influence ran both ways, with old Scottish nationalism transmuting into a new unionism that demanded recognition of Scottish interests and institutions. "In short, it is an open question whether George IV in a kilt and Victoria and Albert at Balmoral are appropriating and subverting a set of values, or whether they are being appropriated and subverted."[\[746\]] Even the 1822 "King's Jaunt" had been stage-managed by two Scots with a keen interest in romanticising and promoting Gaelic and broader Scottish culture (historico-traditional accuracy notwithstanding),[\[652\]] and the Atholls' deep and tartan-arrayed involvement in Victoria's activities in the north can be viewed in the same light.[\[747\]] Both George IV[\[748\]] [\[749\]] [\[750\]] and Victoria,[\[751\]] primarily of German [House of Hanover] stock, came to identify strongly with their quite thin Scottish [House of Stuart] genealogy.

[Prince Arthur] dressed up as [Bonnie Prince Charlie] for the 1871 Waverley Ball

The 1863 funeral of the Duke of Atholl was another anachronistically feudal, tartan-and-pipers pageant.[\[752\]] In 1866–1870, Victoria and the Duchess of Atholl commissioned artist [Kenneth MacLeay] in Edinburgh to produce a series of [watercolours] of statuesque men in tartan Highland gear, representing common people from [ghillies] to shepherds and fishermen, "as they _now_ are". Prints were published in 1870 as _Highlanders of Scotland: Portraits Illustrative of the Principal Clans and Followings, and the Retainers of the Royal Household at Balmoral_, with text by Amelia (Emily) Murray MacGregor, an attendant of Victoria as well as a [Clan Gregor] historian and the first female Gaelic lecturer. A tartanistical fantasy, as well as another exercise in "Highlander as [noble savage] ", the art book necessitated canvassing Scottish aristocrats for outfits and suitable models ("specimens"), as the everyday people did not look the hyper-masculine part, were not able to afford such Highland-dress extravagances as were to be illustrated, and were more likely to be wearing trousers than kilts.[\[753\]] The resulting book is the most detailed record of the "proper", codified Victorian-era Highland dress and accessories, which "removed tartan from its blustery nonchalance to an ordered set of adornments"[\[743\]]  – most of which survive to the present, Highland dress being remarkably resistant to further major stylistic changes, Victorian styles having become "traditional". Tartan had also become more established throughout the 1850s and 1860s as a textile for European-fashionable rather than Highland women's clothing, from [bodices] and dresses to [sashes] and shawls (the never-extinguished ladies' plaids).[\[743\]] The tartan sash in particular was a favourite of the Queen,[\[743\]] and remains a common womenswear option, worn several different ways in modern Highland dress,[\[754\]] though it has little to do with original Highland clothing before the 19th century; it is an adaptation of the plaid to a style of the European nobility.

In 1871, at the Waverley Ball, a [fancy dress] affair in London, the Prince of Wales (the future King [Edward VII]) and his brother [Prince Arthur], long accustomed to Highland dress, arrived tartaned out as an old-time [Lord of the Isles] and as [Bonnie Prince Charlie], respectively.[\[755\]] In 1872, [ethnologist] Jacob Falke wrote that "In Scotland indeed the plaid has still some importance, but it is an object of manufacture, and ... its motives have long ago become the common property of fashion, and indeed have become so permeated by it that what is genuine and old in it is scarcely to be recognised".[\[756\]] Since its 1880 re-opening, the Gaelic Society of Perth in the Lowlands held festivities that involved much piping and tartan-wear, into the early 20th century, despite the language-preservation organisation having nothing to do with Highland dress or _[pibroch] _; being swathed in tartan had somehow become vital to such events.[\[757\]] By 1883, Highland dress as proper [courtly] attire had become highly regulated, aristocratic, and formal, but "inclusive" in one sense – the tartan-wear was permitted at court for essentially anyone claiming Highland origins or land-ownership (even if natively English), not just the gentles of the well-established clans.[\[758\]] 

In the Victorian era, tartan garments for women as well as men continued to be featured in fashion catalogues, in styles not derived from Highland costume, such as everyday suits and dresses.[\[587\]] Tartan had also become popular for children's clothing in continental Europe,[\[759\]] inspired by the royal children of Victoria.[\[760\]] In the United States, tartan was similarly worked into school uniforms, especially [at Catholic schools].[\[761\]] The late 19th century saw tartan (sometimes in silk) in fashion throughout Europe, including in France (e.g. Paris, Lyon, and Alsace) and Italy,[\[762\]] and as far from Britain as Russia.[\[763\]] (See [c. 1855 French master weaver's illustration] of complex tartan-making.) Founded in 1898, [Walker's Shortbread] has long been sold in [royal Stewart tartan] packaging around the world (especially for [Christmas] and [Hogmanay]).[\[764\]] 

### 20th century to present

\[ [edit] \]

[Edward, Duke of Windsor], in a tartan necktie, 1945

In the [Edwardian era], tartan had become less a component of men's clothing (with the decline in kilt-wearing) but more an important part of women's fashion,[\[76\]] including fanciful _[haute couture] _ designs [from Paris] that had no connection to Highland style,[\[587\]] and many accessories such as [petticoats], [stockings], and [blouses]; masculine accessories included [braces (suspenders)], [neckties], [cummerbunds], and socks.[\[76\]] 

[Edward VII] himself had grown up wearing Highland dress frequently.[\[708\]] There was also in this period into the 1920s a market for Highland-dress etiquette booklets, which tied into the era's "dress sense" of decorum and class[\[426\]] (see also [§ Etiquette], below). Because of its associations with the British aristocracy, Scottish clans, and Highland military, tartan had developed an air of dignity and exclusivity.[\[765\]] Because of this, tartan was to make periodic resurgences in the world of fashion. The tartan uniforms of the [Scottish Regiments] were an important recruiting tool during World War I; as [Archibald Primrose, Lord Rosebery], put it: "there is nothing so magnificent in our army as the swing of a kilted regiment".[\[766\]] Tartan's Georgian re-orientation as a symbol representing unionism and empire continued well into the first half of the 20th century,[\[767\]] though outright tartanry and Highlandism on the part of the upper class waned, especially after about 1920.[\[768\]] Nevertheless, [Edward VIII], later Duke of Windsor, was a life-long devotee of tartan, often wearing more than one at a time.[\[587\]] 

[Catholic school uniform] skirts, using a wide variety of tartans

Tartan patterns (often simple, unnamed ones) remained commonly used for skirts and [pinafore dresses] (jumper dresses) in Catholic and other private [school uniform] codes in North America and also in public and private schools in New Zealand. The style spread to many other places, including South America, [Japan] [\[685\]] (which sometimes imports tartan directly from Scotland),[\[769\]] and Hong Kong.

[Harry Lauder] in one of his Highland outfits, 1922

[Harry Lauder] (properly Sir Henry – he was knighted for his [war-effort] fundraising during World War I) became world-famous in the 1910s and 1920s, on a [dance hall] and [vaudeville] entertainment platform of tartan Highland dress, a thick [Scots] accent, and folksy songs about an idealised, rural Scotland, like his hit " [Roamin' in the Gloamin'] ". At one point, he was the highest-paid performer in the world, and toured the United States, Australia, South Africa, and of course the UK to sold-out audiences. A Lowlander himself, Lauder has been credited with (and blamed for) keeping alive a tartanry-and-Highlandism image of Scotland, with critics calling him a "kilted clown" who promoted the idea of Scotsmen "clothed like the chieftain of Clan McCrazy".[\[770\]] 

#### Diaspora and globalisation

\[ [edit] \]

By the mid-20th century,[\[771\]] annual [Highland games] events, modelled on the traditional events in Scotland, had been established not just in Scotland but throughout the United States, Canada, Australia, New Zealand, and South Africa, among other places with a notable [Scottish diaspora], which totals about 50 million people worldwide.[\[772\]] There are dozens of such events in Scotland,[\[773\]] and at least 260 annual Highland games events worldwide as of 2000,[\[774\]] more than 100 of them in the US alone, and dozens more in Canada.[\[773\]] They are closely intertwined with [bagpipe] band competitions (which date to 1781), a lasting source of tartan imagery in their regiment-inspired Highland uniforms.[\[775\]] 

[Massed bands] at the [Glengarry Highland Games], Maxville, Ontario, Canada, 2006

The games' rather flamboyantly[\[776\]] tartaned [subculture] is sustained outside Scotland primarily by multi-generational Scottish descendants rather than by direct Scottish expatriates.[\[777\]] [\[778\]] 

Mystic Highland Pipe Band at [Tartan Day] parade, New York City, 2002

[Tartan Day], an annual [symbolic ethnicity] holiday among the Scottish diaspora, is a growing affair celebrated on 6 April, the date on which the [Declaration of Arbroath] was signed in 1320. Tartan Day was first declared in [Nova Scotia] in 1987, and was essentially nation-wide in Canada by the 1990s. It has since spread to Australia (with varying levels of official recognition, 1989–1996), the US (1998), and other places including New Zealand,[\[779\]] and even Argentina[\[780\]] and Paris, France.[\[781\]] In [New York City], it has turned into an entire Tartan Week since 1999, with honorary "grand marshals" that are usually Scottish celebrities.[\[782\]] 

The term _tartanism_ (as distinct from _tartanry_) has been coined by Ian Brown (2012) for this international tokenisation of tartan as an [ethnic-identity] symbol, evolving to some degree independently to suit diasporic cultural needs and unrestrained by the views of the originating Scottish "home" culture.[\[783\]] According to Ian Maitland Hume (2001), tartan and the kilt are powerful symbols that "encapsulate many facets of a heritage which people aspire to access ... a part-mythical family origin for those seeking roots".[\[784\]] 

The [Scottish Tartans Museum and Heritage Center] was opened by the [Scottish Tartans Society] in 1988 in [Highlands, North Carolina]; in 1994, it moved to nearby [Franklin]. The museum, which runs independently of STS, features over 600 tartans on display, including specimens dating to c. 1725, and Highland dress examples to ca. 1800.[\[785\]] (STS also operated a Scottish Tartans Museum in Edinburgh,[\[786\]] but it closed when STS did in 2000.) A major exhibition on tartan was produced by the [Fashion Institute of Technology] in New York 1988–89, and another was created for the [Edinburgh Festival] in 1989.[\[787\]] Others followed in Italy in 2003, and Japan in 2018.[\[788\]] In April 2023, the Victoria and Albert Museum of Dundee ( [V&A Dundee]) opened a design exhibit (running until January 2024) about tartan and its "shifting context", with goals of "challenging preconceptions of what tartan is, whether that be from a historical sense or fashion sense".[\[789\]] [\[790\]] 

D. Gordon Teall of Teallach, of the Scottish Tartans Society, observed in 1994:[\[791\]] 

> Tartans have always formed part of Scotland's historic heritage and it is a compliment to their country that they have become so widespread throughout the English and Gaelic speaking world. They are probably more popular now than they have ever been because they have come to symbolise the spirit of families, clans and districts and, more recently, corporate institutions.

Even as tartan has been bent to the cultural needs of the diaspora, as "the most straightforward and outward sign of ... affinity with Scottishness", and bent to the commercial intents of fashion, tourism, entertainment, and other industries, tartan's reception by native Scots in Scotland has been less favourable for decades, even the last century or so. Reasons include a feeling that it is not really a symbol of broad Scottish national identity because of its specifically Gaelic and Highland origin; the "Highlandist" and imperialist foisting of it on the entire country as national costume in the late Georgian through Victorian eras; distorted views of Scottish people promulgated by Lauder and other tartaned entertainers of a century ago; an academic view of tartary and Lowland [kailyard literature] as two halves of a low-brow, romanticising vulgarity (reinforced in recent decades by the " [Tartan Army] " fandom of the Scotland national football team reinvigorating a working-class attachment to kilts and tartan); and historically inaccurate portrayal of Scotland by tartan-heavy Hollywood productions like [_Brigadoon_] (1954) and _[Braveheart] _ (1995).[\[792\]] Brancaz (2016) argues that "looking at tartan through the lens of the intelligentsia fails to account for its enduring appeal and resilience. ... \[T\]he wearing of kilts and tartans at weddings, funerals, and _[cèilidhs] _ in Scotland has increasingly been interpreted as a form of cultural reappropriation."[\[793\]] 

#### Industry and politics

\[ [edit] \]

In 2006, the [British Ministry of Defence] sparked controversy when it allowed foreign woollen mills to bid for the government contracts to provide the tartans used by the Scottish troops (newly amalgamated as battalions into the [Royal Regiment of Scotland]), and lowered the formerly very high standards for the cloth.[\[225\]] 

Following a [bill] submitted in the [Scottish Parliament] in February 2007,[\[794\]] Scotland's [enterprise minister] announced in July 2007 that the [National Archives of Scotland] would set up a national register of tartans.[\[795\]] The announcement stated that "Tartan's importance to Scotland cannot be overestimated. It is deeply embedded in Scottish culture and is an internationally recognised symbol of Scotland."[\[795\]] This was later reiterated in 2013 through the BBC.[\[796\]] The ministry cited an industry report indicating that "the tartan industry is a significant contributor to the overall [Scottish economy]; and larger ... than suggested by previous industry estimates", and is the basis for some 200 businesses, 4,000 jobs, and £350 million in annual [GDP] in Scotland.[\[795\]] The bill passed in October 2008, and the [Scottish Register of Tartans] launched in February 2009.[\[794\]] (See [§ Registration], below.)

General tartan-pattern clothing shot up in popularity again starting around 2010

_[The Observer] _ reported in 2010 that tartan clothing had become more popular than ever before, crossing subcultural, social-class, and age-group lines, and showing in that year a 540% sales increase in Britain from only two years earlier.[\[797\]] Around the same time, there began a resurgence in tartan kilt wearing among Scottish young people "as a mark of a vibrant, modern Scotland".[\[798\]] [\[799\]] [\[800\]] [\[801\]] This has interrupted a generations-long trend of native Scottish disaffection toward tartan as stereotyping [kitsch].[\[802\]] [\[803\]] An online survey by [BBC] in 2012 found that 52% of respondents strongly or very strongly disagreed with the premise "Walter Scott's re-branding of all Scots as tartan-wearing Highlanders has been a hindrance to Scotland's cultural development", and only a third agreed.[\[804\]] Tartan in mainstream, international fashion experienced another resurgence starting in 2019.[\[805\]] 

Contemporary [Scottish nationalism] has been said to be "fed, in part, by tartan and Jacobite nostalgia".[\[806\]] After avoidance of tartan since the 1970s (especially by Scottish [liberals]),[\[807\]] the cloth has been politicised again as a nationalist symbol (as it was in the early 18th century), especially during the [2014 Scottish independence referendum] and in the [Scottish National Party] 's 2015 campaign.[\[800\]] (Perhaps owing to this messaging shift, the VisitScotland agency around the same time changed its tourism advertising to minimise, though not eliminate, tartan imagery.)[\[808\]] [Murray Pittock] (2002) writes that the [neo-Jacobitism] is "both irritating kitsch and a language of identity" for modern Scots.[\[809\]] After several decades of intellectual hostility toward tartan (e.g. in [Tom Nairn] 's 1977 _The Break-up of Britain: Crisis and Neo-nationalism_, and [Hugh Trevor-Roper] 's posthumous 2008 _The Invention of Scotland_), an "academic re-assessment of tartan" began in the early 21st century, relying on a wider range of early and modern source material,[\[810\]] in [historiographical], multidisciplinary [edited volumes] including _Scottish History: The Power of the Past_ (eds. [Edward J. Cowan] and [Richard J. Finlay], 2002) and _From Tartan to Tartany_ (ed. Ian Brown, 2010).

Major commercial weavers (tartan mills) of traditional tartan cloth that are operating today include Lochcarron of Scotland[\[811\]] [\[812\]] in [Lochcarron] and [Selkirk]; Ingles Buchan in [Glasgow];[\[399\]] [\[812\]] House of Edgar (also a Highland dress vendor, and a subsidiary of Macnaughton Holdings) in [Perth];[\[399\]] Johnstons of [Elgin] (also a wool clothing maker),[\[399\]] Strathmore Woollen in [Forfar],[\[399\]] and D. C. Dalgliesh in Selkirk,[\[105\]] all three of which are now part of the Edinburgh-based Scotweb, under the [trade name] Clan;[\[813\]] Prickly Thistle (also a women's clothing maker) in [Evanton] and [Edinburgh];[\[814\]] The Tartan Weaving Mill (also a weaving museum, and a subsidiary of Gold Brothers) in Edinburgh;[\[815\]] Andrew Elliot Ltd in [Selkirk]; Stevens & Graham (specialising mostly in tartan rugs and carpet) in [Rutherglen]; Marton Mills in [West Yorkshire], England; [Cambrian Woollen Mill], in Powys, Wales; West Coast Woollen Mills in [Vancouver], British Columbia, Canada;[\[816\]] GK Textiles in [Port Moody], BC (formerly Fraser & Kirkbright, Vancouver);[\[817\]] and [Pendleton Woolen Mills] in Portland, Oregon, US.[\[216\]] The modern trade in wool tartan fabric has three principal markets: Highland dress, high fashion (with significant business from France and Italy), and furnishing.[\[818\]] [\[19\]] 

Popular tartans (including for kilts and other Highland dress, as well as for school uniforms) have increasingly been manufactured, primarily in the UK, in [poly-viscose] (PV),[\[819\]] a blend of the artificial materials [polyester] and [viscose] (rayon), typically in a 65% polyester to 35% viscose ratio.[\[820\]] [\[821\]] PV is promoted as washable, durable, crease-resistant but heat-settable for permanent pleating, shrinkage-resistant, stain-resistant, colour-fast, low- [pilling], hypoallergenic, not attractive to [clothes moths], more "breatheable" than polyester (thus good for athletics), lower cost than wool, and lighter weight than wool, but said to have a wool-like texture.[\[822\]] [\[823\]] [\[824\]] [\[825\]] [\[826\]] It also does not rely on animal industry, so it appeals to [vegans].[\[822\]] [\[823\]] Large-scale global manufacturers of tartan-patterned cloth in a variety of cotton, polyester, viscose, [nylon], etc., materials and blends include Başkan Tekstil in [Istanbul] and [Bursa], Turkey; and Jeen Wei Enterprises in [Taichung], Taiwan; while a leading maker of tartan [ribbon] is Satab in [Saint-Just-Malmont], France.[\[827\]] Tartan designs have long been produced in low-cost [cotton] in large quantities in China.[\[816\]] 

Carol Craig (2003) writes: "Like it or not, tartan is a very sophisticated branding and marketing tool for Scotland."[\[828\]] In a tartan-as-marketing analysis, Paterson (2001) observed that continued internationalisation of tartan manufacture, design, and consumption has diluted the associative "Scottishness" of tartan and its value as a national identifier. He blames this in part on Scottish weavers' failure to adapt to market demands for a wider range of fabric applications, as well as the businesses' own complicity in broadening tartan's perceived cultural identity, e.g. in creating tartans for non-Scottish families, places, and organisations.[\[829\]] 

(For particular 20th-century to present-day tartans, see also [§ Corporate and commercial] and [§ Fashion], below.)

Scene from 1954 _[Brigadoon] _ film, with kilts and tartan trews

In 1947, the tartan-laden Broadway musical _[Brigadoon] _ (followed by [a film version] in 1954 and a television adaptation in 1966) renewed an excessively romanticised notion of the Highlands and Highland dress. A critical review called it a "whimsical dream-world" that was "overloaded with Hollywood-Scottish trappings".[\[830\]] (The production is generally not well received by actual Scots.)[\[831\]] [\[832\]] 

Tartan suits were popular in the [mod subculture] of Great Britain of the early to mid-1960s and its late 1970s [revival].

" [Tartan Army] " Scottish football fans at a match in Milan, Italy, in 2005

Since the 1970s, the fandom of the [Scotland men's national football (soccer) team] have been collectively referred to by the nickname " [Tartan Army] ", with fans often sporting tartan clothing (including kilts) at matches.

The [Bay City Rollers] in the Netherlands in 1976, sporting some tartan shirts and a tartan-trimmed jacket

Popular in the mid-1970s, Scottish teeny-bopper band the [Bay City Rollers] were described by the _[British Hit Singles & Albums] _ reference book as "tartan teen sensations from [Edinburgh] ".[\[833\]] 

A German [punk] wearing a piece of the [royal Stewart tartan], 1984

Tartan became a common element of [punk subculture] starting in the late 1970s. [Punk music] was a way for youth in the British Isles to voice their discontent with the [ruling class] and with modern society. The unorthodox use of tartan (especially the [royal Stewart]), which had long been associated with authority and [gentility], was then seen as an expression of that discontent. In this way, tartan – worn unconventionally – became an [anti-establishment] symbol. This was entirely on purpose according to [Vivienne Westwood], a designer deeply involved in early punk fashion;[\[765\]] [\[834\]] the idea was "to seize the very fabric of the Establishment in order to reverse its meaning and perhaps to challenge society's design."[\[835\]] American punks often wore tartan skirts, a "subversion" of the Catholic school-girl uniform, and kilts have also been worn in the punk scene since the late 1970s, especially in the UK.[\[801\]] Baggy tartan pants later proved popular among [pop-punks] and [skate punks], and tartan-lined jackets among [ska punks]. (For further information, see [Punk fashion].) From the late 1990s, kilts (mostly modernised " [utility kilts] " but sometimes traditional ones) have become relatively popular even in North American post-punk subculture (e.g. the [goth] – [industrial], [emo], and [steampunk] scenes), though often in black rather than tartan.

After the 1970s, Westwood, who continued to work extenstively with tartan, was joined by other big-name _couturiers_. These included [Ralph Lauren] and [Laura Ashley], whose designs promoted tartan as a mainstream modern clothing option "with traditional grace and style" for both women and men;[\[811\]] [\[835\]] [Stephen Sprouse],[\[835\]] credited with a 1980s combination of "uptown sophistication in clothing with a downtown punk and pop sensibility";[\[836\]] and later [Alexander McQueen],[\[837\]] who was "consciously repoliticising the cloth".[\[319\]] Others have included [Jean Paul Gaultier], [Tommy Hilfiger] (who made tartan central to his fall 2000 collection), [Christian Lacroix], [Yves Saint Laurent], [Giorgio Armani], and [Gianfranco Ferré].[\[838\]] [\[835\]] A tartan outfit designed by Westwood featured on a [commemorative UK postage stamp] issued by the [Royal Mail] in 2012 celebrating "Great British Fashion".[\[839\]] 

Tartan/plaid [flannel] shirts, emblematic of the working class, re-entered mainstream fashion through a series of [subcultural] adoptions, originating primarily in the western United States. First, the style became a staple of [cholo] style in and around Los Angeles, from the 1970s. From there, the style later became adopted by [hip hop fashion] in the 1990s, especially the [West Coast hip hop] lifestyle.[\[840\]] Tartan flannel shirts also became quintessentially part of (and androgynous within) the [grunge] scene (starting in Seattle) of the late 1980s to 2000s.[\[841\]] There was fashion cross-pollination between these youth-culture movements,[\[842\]] and the fashion industry has found this confluence very marketable.[\[843\]] 

A resurgence of interest in tartan and kilts (and even Scottish tourism)[\[832\]] [\[844\]] has been generated in recent times by major Hollywood productions[\[845\]] like the [_Highlander_ franchise] (1986–2007),[\[846\]] [\[845\]] _[Four Weddings and a Funeral] _ (1994),[\[847\]] _[Braveheart] _ (1995),[\[848\]] [\[849\]] [\[850\]] _[Rob Roy] _ (1995),[\[848\]] [\[850\]] [\[851\]] _[Brave] _ (2012),[\[852\]] and the television series _[Outlander] _ (2014–, with a follow-on [travelogue documentary] series, _[Men in Kilts] _).[\[853\]] Many of these featured custom-designed tartans.[\[846\]] [\[854\]] 

Tartan clothing has appeared frequently in _[Doctor Who] _. The [Fourth Doctor] ( [Tom Baker]) wore a [Wallace] tartan scarf on _[Terror of the Zygons] _,[\[855\]] and his robot-dog companion [K9] had a tartan collar.[\[854\]] The [Sixth Doctor] ( [Colin Baker]) had a signature patchwork frock coat that included segments in three different tartans, and also typically wore a tartan waistcoat in a fourth sett under it.[\[856\]] The [Seventh Doctor] ( [Sylvester McCoy]) wore a crimson and black tartan scarf on _[Time and the Rani] _. [Clara Oswald] ( [Jenna Coleman]), the companion of the [Eleventh Doctor] ( [Matt Smith]) and the [Twelfth Doctor] ( [Peter Capaldi]), wore a [Campbell] tartan dress on " [The Name of the Doctor] " and a Wallace skirt on " [The Time of the Doctor] " and " [Deep Breath] ".[\[857\]] [Annabel Scholey] as Claire Brown, in the [Thirteenth Doctor] ( [Jodie Whittaker]) serial _[Flux] _, wears a 1960s-style muted tartan dress.[\[858\]] The [Fourteenth Doctor] ( [David Tennant]) wore a brown tartan suit in the [60th anniversary specials].[\[859\]] 

- 1980s _Doctor Who_ patchwork costume of the [Sixth Doctor], with at least three tartans involved

- Royal Stewart again, as a mod/ska-punk jacket lining, 2007

- [Rita Ora] performing in Glasgow in 2018, wearing a tartan trench coat made of at least five different setts

- [Grunge fashion] still alive and well in 2019, featuring a lot of tartan/plaid shirts

- A rather impractical tartan gown by [Christopher John Rogers], 2020–21, on display at the [Metropolitan Museum of Art] Costume Institute's exhibit _In America: A Lexicon of Fashion_


One of the most popular tartans is the [royal Stewart], ostensibly the personal tartan of the [British monarch], since George IV declared it his own (though it was probably designed by the [Sobieski Stuarts],[\[700\]] albeit based on mid-18th-century pattern called "Prince Charles Edward Stuart").[\[111\]] [\[63\]] The "royal" sett was first published in 1831 in the book _The Scottish Gaël_ by James Logan. In addition to its use in clothing, such as skirts and scarves, royal Stewart tartan has also appeared on biscuit tins for Scottish [shortbread],[\[860\]] and it has also long been favoured by the British [punk] scene.

Another tartan in very common use by the general public is [Black Watch] (also known as old [Campbell], [Grant] hunting, and Government).[\[127\]] This tartan, a dark variant (and ancestor) of the main Clan Campbell tartan, has long been used by military units in the [British Army] and other [Commonwealth] forces.

Early manufacturer Wilsons of Bannockburn made many "fashion", "fancy", or "national" tartans with catalogue numbers or fanciful names, without any association with particular families, districts, or organisations; two popular ones still in use are both usually called "Caledonia". Wilsons No. 3 is found in their 1819 _Key Pattern Book_ and is comparatively simple,[\[861\]] while No. 144 is more complex, though of a similar colour scheme, and seems to date to the late 18th century.[\[862\]] (The numbering suggests the other does as well.) Some other tartans in this "Caledonia" group were later claimed by clans; e.g. Caledonia No. 43 or "Kidd" became one of the [MacPherson] tartans.[\[863\]] [\[861\]] 

- Royal Stewart tartan

- Black Watch tartan

- Wilsons' No. 3 tartan, named Caledonia

- Wilsons' No. 155, also often called Caledonia


In the general fashion industry, various patterns are technically tartan but are not treated as tartan _s_ in the clan or district sense. The very basic red-and-black Rob Roy or Robert Roy MacGregor pattern, the oldest of the [Clan Gregor] setts (though named after [Rob Roy] in the Victorian period),[\[864\]] is also in broad use (often with changed colours) as one of the most common patterns used in [flannel] cloth for clothing and bedding; in the US, it is often called "buffalo plaid",[\[865\]] a term of uncertain derivation.[\[866\]] When the Rob Roy sett is changed to a white ground with any other colour this forms the most common [gingham] cloth style. Gingham is often given a wider setting, to form a lattice appearance (sometimes called "windowpane plaid" or "windowpane check").[\[867\]] When that pattern is given one or more additional over-check colours, the result is the pattern known as [tattersall].[\[867\]] 

- One of the most common [flannel] patterns, "buffalo plaid" is just Rob Roy MacGregor tartan (originally red and black) rendered in any of various colours

- Rob Roy changed to white and any other colour becomes [gingham] 

- Windowpane gingham

- Windowpane gingham with two or more over-checks becomes [tattersall] 


## Tartans for specific purposes

\[ [edit] \]

"Tartan of Pride", designed in 2008;[\[868\]] one of over a dozen [LGBT] -themed modern "fashion" tartans

In addition to clan tartans, many tartan patterns have been developed for individuals, families, districts and towns, institutions, corporations, and events.[\[9\]] They have even been created for particular religious and [ethnic groups],[\[cq\]] and for sociological groups like the [LGBT] community.[\[799\]] [\[873\]] Tartan has had a long history with the military, and today some military units – particularly those within the [Commonwealth]  – have tartan dress uniforms.[\[874\]] (See [List of tartans § UK military or government tartans].)

Many districts, cities, and towns in Scotland have their own tartans, mostly dating to the 20th century (though some few district tartans are quite old),[\[cr\]] and not always official; many were just created for marketing to tourists,[\[875\]] and some are copyrighted works tied to specific vendors.[\[876\]] They are intended primarily for those to whom a clan tartan does not seem to apply (see [§ Etiquette], below). At least two [local government councils in Scotland] have official tartans.[\[877\]] 

The Maple Leaf tartan, designed in 1964,[\[878\]] has been an official symbol of Canada since 2011.[\[879\]] 

In addition to the traditional district and modern geographic tartans of Scotland, new designs have been created for places in other countries. Only some regional tartans are officially recognised by the government bodies of the places the designs represent.

The [pan-Celticism] movement has inspired the creation of "national" (in the sense of [Celtic nations]) and sometimes regional tartans "to emphasise the ... bonds with other Celtic countries"[\[880\]] outside of Scotland; none of these appear to have any official recognition. There are [tartans of Cornwall], long a part of Devonshire in England (the designs date from 1963 to the 1980s);[\[cs\]] [Wales] (from 1967 onward[\[ct\]]  – sometimes with false claims of antiquity by marketers);[\[891\]] the [Isle of Man] (from 1946, many by D. G. Teall of the Scottish Tartans Society, and several asymmetric);[\[cu\]] [Brittany] in France (from 2002);[\[cv\]] [Galicia] in Spain (from 1990);[\[cw\]] and especially [Ireland] (from 1956).

After the discovery of the "Dungiven tartan" (see [§ 16th century], above) and its marketing as a district tartan for Ulster, Scottish weavers (and in two cases English, and in another American) decided to tap an Irish and especially Irish-American market by introducing a profusion of national, province, and county tartans for Ireland and [Northern Ireland], generally based on established Scottish tartans with some colour changes.[\[19\]] [\[216\]] These geographical tartans, which (aside from the Dungiven/Ulster reconstruction of 1956) date to 1970 and later,[\[220\]] do not have any official recognition, and are purely a product of the industry.[\[19\]] [\[214\]] One weaver even introduced a competing set of Irish national and county tartans in 1996, different from the previous offerings.[\[214\]] "The influence of native Irish people, either as suppliers or consumers of Irish tartans, would appear to be minimal."[\[19\]] 

Further afield, all but two [Canadian provinces and territories] have [official tartans], with the first dating from 1956. Neither [Quebec] nor [Nunavut], Canada's newest territory, have formally adopted patterns. [Alberta], meanwhile, has two official tartans, including a dress one. All but Quebec's were registered with the [Court of the Lord Lyon] in Scotland.[\[908\]] Canada has an official national tartan that was originally designed to commemorate the introduction of its new maple leaf flag, and was made an official national emblem in 2011.[\[879\]] Various Canadian regions (like [Labrador] and [Cape Breton Island]), counties, municipalities, and institutions also have official tartans.[\[cx\]] 

[Tartans have been created for Australia]; its capital city, [Canberra]; each of its [states]; and some of its [local government areas]; but only some of those tartans have been officially adopted or recognised by the relevant governments in Australia. [US states] have [official tartans], with the first dating from 1988.

### Hunting, mourning, dress, and dance

\[ [edit] \]

[Highland dancing], at a 2008 [Highland games] event, in [Aboyne dresses] with dance tartans that feature a lot of white

A tartan is sometimes differentiated from another with the same name by a label: _hunting_, _mourning_, _dress_, or _dance_. The first three of these ideas are the result of Victorian fondness for dress etiquette and show[\[911\]] (and weaver marketing);[\[66\]] the last is more recent.

**Hunting tartans** tend to be made up of subdued colours, such as dark blues, greens, and browns.[\[912\]] Although there is some evidence of early tartans with camouflage colours going back to the 16th century, hunting tartans, despite the name, have very little to do with actual hunting.[\[12\]] 

**Mourning tartans**, though quite rare, are associated with death and funerals. They are usually designed using combinations of black and white, or by replacing bright colours such as reds and yellows in a traditional tartan with black, white, or grey.[\[913\]] 

**Dress tartans** are usually special tartans for [formal-dress] occasions[\[914\]] (e.g. dress Stewart[\[915\]] is distinct from both the main [royal Stewart tartan] and the hunting Stewart,[\[916\]] among several other tartans attributed to [Stewart/Stuart]). In a few cases, a dress tartan is simply the main tartan of the clan.[\[cy\]] Dress tartans that do differ from main clan tartans are sometimes entirely different (e.g. [MacMillan] [\[36\]] and MacMillan dress[\[921\]] are unrelated designs), while in most cases they are based on the main tartan but with colour differences (e.g. Stewart). Some dress tartans are very modern,[\[82\]] [\[624\]] but some date back to the era of the _Vestiarium Scoticum_.[\[922\]] 

**Dance tartans**, intended for [Highland dance] outfits, for either sex, are inspired (like most dress tartans before them) by the [arisaid] (_earasaid_ tartans thought to have been worn by Highland women in the 17th and 18th centuries, which often featured white as a major colour, as do typical dance tartans today (most or all of which date to the 20th century or later). Some dance tartans are named "arisaid" rather than "dance", e.g. [Fraser] arisaid.[\[923\]] [\[cz\]] 

There has been some confusion between dress and dance tartans, especially since the idea of the latter developed from the former.[\[da\]] Most dress tartans, including some of the oldest, also have white in them, and have been used for dance competition in lieu of a dance-specific tartan, so are easy to mistake for dance tartans, which almost invariably have white in them.[\[926\]] [\[db\]] 

### Family and individual

\[ [edit] \]

A large proportion of non-clan tartans in all of the modern tartan databases have always been family tartans, promulgated mostly from the late 20th century for family names that are not clans or listed as [septs] of clans. These are usually Scottish surnames, but the _[Scottish Register of Tartans] _ ( _SRT_) database increasingly includes new family tartans for names that are not Scottish or even British. Most family tartans have no copyright claim, since they are intended for use by anyone with the surname or an extended-family connection. The _SRT_ classifies them together with clan tartans in a "clan/family" category if they have history that pre-dates _SRT_ or if they are newer and are approved by a legally recognized clan chief or family head, but in a "name" category if they are newer and lack such imprimatur.

The [British royal family] 's own Balmoral tartan (designed c. 1852). It is incidentally one of the few long-established tartans with multiple hues of the same colour (two greys, in this case).

A few non-clan family tartans have an older pedigree. The best known is Balmoral tartan, reserved for the [British royal family] and personal [pipers] thereof, since its creation by [Prince Albert] c. 1852.[\[dc\]] (See also further discussion under [§ Etiquette], below.) Some clans recognise tartans for specific family branches and septs that are not themselves generally regarded as clans. For example, [Clan Robertson/Donnachaidh/Duncan] acknowledges separate, established tartans (some of them quite old) for Inches, MacGlashan, MacInroy, MacLagan, MacPhee, MacWilliam, Reid, and Robinson,[\[928\]] and they are all registered in the _SRT_.

Since the late 1960s, various weavers have marketed (primarily to Irish Americans) some tartans with Irish family names, without any involvement by family members.[\[216\]] There had also been a legend that the rare _Clans Originaux_ (1880) contained Irish family tartans, but this was finally disproven in 2003.[\[19\]] [\[dd\]] There is one case of a formal [Irish clan] /family tartan, however: The [Clan Cian] Society commissioned a tartan for Cian of [Ely], and registered it with the [Chief Herald of Ireland] in 1983.[\[19\]] [\[216\]] (Even this has an Irish-American connection, as the chief resided in California, and the society is US-headquartered.)[\[930\]] Similarly, a commercial operation in [Cardiff] named Wales Tartan Centre (supplied by [Cambrian Woollen Mill]) has since the early 2000s promoted a long series of tartans named for common or prominent Welsh family names; they are unusual in often having odd-numbered thread counts, and having a different [warp and weft] (producing rectangular rather than square patterns), probably to distinguish them from the Scottish style.[\[891\]] [\[931\]] 

For the [much narrower sense of _family_], the _SRT_ registers also as "name" tartans those that are created by individuals for only themselves and their immediate-family members, often for weddings; these usually have a copyright claim. One of the earliest tartans named for a specific person[\[de\]] is the "Janet Wilson sett", entered into the late 1770s records of Wilsons of Bannockburn and believed to refer to the company founder's wife or daughter-in-law, though made as one of their publicly available patterns.[\[317\]] [\[df\]] 

### Corporate and commercial

\[ [edit] \]

Numerous Scottish brands use tartan, and some have unique tartans. Various not-for-profit organisations also have corporate tartans. Probably the earliest case was that of the Ancient Caledonian Society of London (founded in 1786 and defunct since 1837), which used what is believed to have been a consistent tartan[\[934\]] for its members' [frock coats] (which, unusually, featured [brocade] woven into the tartan, of [Jacobite] white roses – it may be what 1767 advertisements called "flowered tartan"); only one known example of the coat survives.[\[355\]] [\[935\]] 

Scottish airline [Loganair] in its tartan livery

As an example of a modern commercial tartan, [Irn-Bru] (introduced in 1901), the best-selling [soft drink] in Scotland,[\[936\]] has its own tartan.[\[937\]] Scottish regional airline [Loganair] uses tartan livery, including on the tails of its planes, and has two registered corporate tartans.[\[938\]] "Racing Stewart"[\[939\]] is a pattern created in 1995 for the [Jackie Stewart] Formula One car-racing team.[\[940\]] 

"DunBroch", a tartan devised by Disney/Pixar for fictional characters in the animated film _[Brave] _

The "corporate" category is one of the fastest-growing in the official _[Scottish Register of Tartans] _ ( _SRT_) database, with a large number of Scottish (and American and other) companies and societies registering organisational tartans. These are generally protected by [copyright] and sometimes [trademark] law. These tartans vary in purpose from general corporate livery, to special event tartans, to tartans for fictional characters.

Two examples of the latter are [Sanrio] 's 2004 creation of a predominantly pink tartan for [Hello Kitty];[\[91\]] and the 2011 creation by [Disney] / [Pixar] of the DunBroch tartan for the family of the main character, Mérida, of the animated Highland fantasy/adventure film _[Brave] _.[\[941\]] 

Handbag in [Burberry] check

An early example of a tartan created by and for the fashion industry, and surely the most famous, is "Burberry check". It was introduced in the 1920s for the lining of trench coats made by [Burberry] of London, but has been used for all manner of clothing and accessories since 1967[\[942\]] (with another major marketing push in 2001) and is emblematic of the company and its upscale product line.[\[943\]] (For additional information, including a legal dispute, see [§ Legal protection], below.)

A fast-growing category in the _SRT_ is that of "fashion" tartans, created by companies and individual designers simply for aesthetic reasons, without any association with a particular clan, family, region, etc. Like organisational tartans, most of these have a copyright claim attached to them.

A prominent example: In 2017, Scottish fashion designer [Charles Jeffrey] designed a signature tartan for his Loverboy label, registering it in the _SRT_.[\[125\]] 

Manufacture and use of tartan (at least in the Scottish context) is regulated, formally and informally, in three ways: _registration_ (recording of a tartan and its association, if any, with a particular family, organisation, person, event, etc.); _legal protection_ of a tartan as intellectual property (trademark, copyright); and _etiquette_ (socio-cultural norms regarding the use of tartan and Highland dress).

Coat of arms of the [Scottish Register of Tartans] 

The naming and registration of "official" clan tartans began in 1815, when the [Highland Society of London] solicited clan tartans from clan chiefs.

Following recognition by a clan chief of a tartan as a clan tartan, the chief was formerly able to petition the [Lord Lyon King of Arms], the Scottish [heraldic] authority, to register it as a formal clan tartan.[\[dg\]] Once approved by the Lord Lyon, after recommendation by the Advisory Committee on Tartan, the clan tartan was then recorded in the Lyon Court Books.[\[305\]] However, leading up to the launch of the [Scottish Register of Tartans] in 2009 (see below for details), the office of the Lord Lyon stopped providing this tartan-recording process (though its statutory authority was not changed by the Tartans Bill).

Modern-day tartans can be created and registered by anyone, with the Scottish Register of Tartans. Modern registered tartans include ones for Scottish and other districts, cities, and towns; for Irish counties (devised since the 1990s)[\[105\]] and families (for example, the surname [Fitzpatrick] has two registered tartans[\[944\]]); for organisations and companies; and even for specific events or individuals. Tartans are also being created in record numbers among the [Scottish diaspora] in the United States, Canada, Australia, New Zealand, etc., especially for places, military divisions, pipe bands, and individuals and their immediate families.

Until the late 20th century, instead of a central official tartan registry, independent organisations located in Scotland, Canada, and the United States documented and recorded tartans.[\[945\]] In 1963, an organisation called the [Scottish Tartans Society] (now defunct, and originally named Scottish Tartans Information Centre)[\[602\]] was created to record and preserve every known tartan design.[\[946\]] The society's _Register of All Publicly Known Tartans_ ( _RAPKT_) contained about 2,700 different designs of tartan.[\[947\]] Registration of new designs was not free of charge. The society, however, ran into financial troubles in 2000, and folded.[\[948\]] [\[399\]] 

Former members of that society formed two new Scotland-based entities – the [Scottish Tartans Authority] (STA, 1996 – before STS closed) and the [Scottish Tartans World Register] (STWR, 2000 – the [trade name] of a private company, Tartan Registration Ltd).[\[399\]] Both of these organisations initially based their databases on the _RAPKT_. STA's database, the _International Tartan Index_ ( _ITI_) consisted of about 3,500 different tartans (with over 7,000, counting variants) as of 2004.[\[947\]] The online _ITI_ was later rebranded _The Tartan Ferret_. STWR's self-titled _Scottish Tartans World Register_ database was made up of about 3,000 different designs as of 2004.[\[947\]] Both organisations were registered as Scottish [charities] and recorded new tartans (free in the case of STA and for a fee in the case of STWR) on request.[\[949\]] [\[950\]] 

In the interim, a jointly Scotland- and US-based organisation, International Association of Tartan Studies and Tartan Educational & Cultural Association (IATS/TECA) emerged in 1984[\[399\]] and published its own _TartanArt_ database in the early 1990s as Microsoft Windows software which was much used in the North American kilt-making trade. IATS/TECA was absorbed by STA by 2005.[\[399\]] 

The [Scottish Register of Tartans] (SRT) is Scotland's official tartan register, and was established in 2009.[\[951\]] SRT is maintained and administered by the [National Archives of Scotland] (NAS), a statutory body based in [Edinburgh].[\[952\]] The aim of the register is to provide a definitive and accessible resource to promote and preserve tartans. It is also intended to be the definitive source for the registration of new tartans (if they pass criteria for inclusion and a registration fee is paid). The database itself – also named simply _Scottish Register of Tartans_, and sometimes called _TartanRegister_ from its domain name – is made up of the pre-existing registers of STA and STWR as they were at the time of SRT's launch (preserving the STA's and STWR's registration numbers, dates, and other details in the SRT data), plus new registrations from 5 February 2009 onward. On the register's website, users can register new tartans, search for existing tartans and request their thread counts, and receive notifications of newly registered tartans.[\[951\]] [\[953\]] 

STWR became defunct some time after 2008. STA later closed the _ITI/Tartan Ferret_ to new registrations, and in late 2022 removed the search feature from the STA website (pending a site redesign), deferring to the Scottish Register of Tartans, which now appears to be the only operating tartan registry. STA continues offline work on the _ITI_ database, correcting errors, importing new _SRT_ additions, and recording historical patterns newly discovered in museum holdings, etc.

Some modern tartans are protected by [trademark] law, and the trademark proprietor can, in certain circumstances, prevent others from selling that tartan.[\[127\]] An example is the " [Burberry] check" of the English fashion house, an instantly recognisable tartan that is very well known around the world.[\[954\]] [\[dh\]] 

Unlike [trademark registration] and [copyright registration], the Scottish Register of Tartans (SRT) and its authorising Tartans Bill do not create any new or enhanced [intellectual property] rights through the act of registration (nor provide any enforcement mechanism other than removal of infringing entries from the registry).[\[958\]] 

SRT, however, permits registrants optionally to assert and record copyright and/or trademark claims over their new tartans, for designs that are eligible for such protection under other established law[\[959\]] (such as the [Copyright, Designs and Patents Act 1988]; and the [Scotland Act 1998], which took over copyright and trademark registration and enforcement in Scotland)[\[960\]] and lists such tartans as restricted. An SRT registration "provides evidence of the existence and date of \[the\] design",[\[961\]] which helps establish the copyright date under the [Berne Copyright Convention]. Such legal protections apply only to comparatively recently created tartans; old clan, regimental, and district tartans are outside the protection periods of such intellectual property laws.[\[43\]] 

SRT also permits the listing of intended _use and manufacture_ restriction preferences, but has no enforcement capability,[\[962\]] and also includes a statement that "No other rights can be conferred."[\[961\]] British tartan weavers, such as Lochcarron and D. C. Dalgliesh, generally will not produce material in an SRT "restricted" tartan without written evidence of permission from the copyright/trademark claimant. In additional furtherance of intellectual property concerns, the SRT also refuses to register a new tartan that is confusingly similar to any existing one (as determined by an SRT review process).[\[963\]] 

The application of copyright law to tartans is not well tested. The leading British legal case on textile copyright, concerned with designs printed on fabric, is _[Designer Guild Ltd v Russell Williams (Textiles) Ltd] _ (2000), finding for fairly broad copyright protection in textile works that involve creative originality.[\[964\]] In 2008, two tartan pattern copyright holders, Rosemary Nicolson Samios and weaver Lochcarron of Scotland, took legal action for infringement of an [Isle of Skye] district sett (designed 1993) and the [Princess Diana] Memorial sett (designed 1997), respectively, against the Gold Brothers firm of Surinder, Galab, Malap, and Dildar Singh, who operate dozens of stores in Scotland and online that sell primarily Chinese-made tartan objects or "tartan- [tat] ", including cheap Highland-dress outfits, for the tourist market.[\[876\]] The Isle of Skye tartan was considerably profitable for Samios, after the pattern was popularised by Queen [Elizabeth II] wearing it in 1999. The Princess Diana sett was designed by Alistair Buchan of Lochcanrron and of the [Scottish Tartans Authority] as a charity fundraiser. A British court on 2 July 2008 issued an [interim interdict] (preliminary injunction) against Gold Brothers' sale of Isle of Skye goods, after a police search found hundreds of metres of the pattern in Chinese-made cloth in the company's warehouse.[\[876\]] [\[965\]] [\[966\]] [\[967\]] Both cases may have been [settled out-of-court] because published news regarding them ceases in 2008. A more recent case, _Abraham Moon & Sons Ltd v. Thornber & Others_ (2012), actually involved tartan. It held that the textual _ticket stamp_ (a detailed set of weaving instructions, i.e. a thread count with additional information on precise colours, etc.) used to produce a tartan designed in-house by the claimant had been infringed, was protected as a literary work, and _also_ constituted a "recording" of the graphical work of the tartan and thus was independently protected as a work of artistic craftsmanship.[\[43\]] [\[968\]] As of 2020, the decision was being appealed, as it conflicted with previous caselaw, e.g. _Hensher v Restawile_ (1976), holding such instructions to be uncopyrightable.[\[969\]] [\[43\]] 

While tartan arguably could be classified as a form of [intangible cultural heritage],[\[970\]] and its value to identifying Scottish products both in Scotland and internationally has been recognised and exploited for a long time,[\[940\]] tartan is not protected by either [geographical indication (protected designation of origin)] law, nor _[sui generis] _ legislation specific to that kind of product.[\[971\]] [Harris tweed], another textile associated more narrowly with Scotland, does have such protection. In 1998, Keith Lumsden, research officer of the [Scottish Tartans Society], proposed that the word _tartan_ be prohibited for use to market a textile, unless the design was accepted in an official governmental tartan registry (which did not then exist).[\[940\]] When the Scottish Parliament finally authorised the Scottish Register of Tartans in 2008, it did not include anything like this sort of trade protection. According to Michael B. Paterson (2001): "No mechanism exists to protect \[traditional Scottish\] tartan from 'misuse' by interests having nothing to do with Scotland or Scotland's interests", though the tartan registries "play an important, if weak, role in asserting Scotland's [cultural rights] in relation to tartan."[\[972\]] 

Scottish actor [Sean Connery] at a [Tartan Day] celebration in Washington DC. When [knighted] by Queen Elizabeth II in 2000, he wore this green-and-black hunting-tartan kilt of his mother's [Clan Maclean].

Since the [Victorian era], authorities on tartan have claimed that there is an [etiquette] to wearing tartan, specifically tartan attributed to clans or families. In the same line of opinion, some tartans attributed to the [British royal family] have been claimed to be "off limits" to non-royalty.[\[973\]] [\[974\]] Even so, there are no laws or universally accepted rules on who can or cannot wear a particular tartan. (Some writers have nevertheless asserted their existence anyway, e.g. Alexander Campbell in 1890, regarding different Campbell tartans.)[\[975\]] The concept of the entitlement to certain tartans has led to the term _universal tartan_, or _free tartan_, which describes tartan which can be worn by anyone without controversy. Traditional examples of such are the Black Watch, Caledonia, hunting Stewart, and Jacobite tartans, [shepherds' check], and district tartans.[\[976\]] [\[799\]] [\[977\]] The published marketing of tartans for simple fashion purposes without any association to a place or body dates back to at least 1745,[\[496\]] and much of Wilsons' output through the 19th century consisted of "fancy" patterns for the general public.[\[978\]] Some recently created designs intended for everyone (though some are exclusive to particular weavers or Highland-dress outfitters) have names including Braveheart, Clansman, European Union, Highlander, Independence, Pride of Scotland, Rainbow, Scotland 2000, Scotland the Brave, Scottish National, Scottish Parliament, Spirit of Scotland, Stone of Destiny, and Twenty First Century.[\[979\]] 

Books on Scottish clans list guidelines,[\[127\]] but are not always in agreement. One such opinion is that people not bearing a clan surname, or surname claimed as a sept of a clan, should not wear the tartan of their mother's clan.[\[980\]] This opinion is reinforced by the fact that in the Scottish clan system, the Lord Lyon states that membership to a clan technically passes through the surname. This means that children who bear their father's surname belong to the father's clan (if any), and that children who bear their mother's surname (her [maiden name]) belong to their mother's clan (if any).[\[981\]] Also, the Lord Lyon states that a clan tartan should only be worn by those who profess allegiance to that clan's chief.[\[982\]] 

Some clan societies even claim that certain tartans are the personal property of a chief or chieftain, and in some cases they allow or deny their clansfolk "permission" to wear that tartan.[\[di\]] According to the [Scottish Tartans Authority] – which is an establishment of the Scottish tartan industry – the Balmoral tartan should not be worn by anyone who is not part of the British royal family. Even so, some weavers outside of the United Kingdom ignore the "longstanding convention" of the British royal family's "right" to this tartan. The society also claims that non-royals who wear this tartan are treated with "great disdain" by the Scottish tartan industry.[\[984\]] [\[dj\]] 

Generally, a more liberal attitude had been taken by those in the business of selling tartan, holding that anyone may wear any tartan they like. Under the liberal view, claimed "rules" are mere conventions (some of which are recent creations), with different levels of importance depending on the symbolic meaning of the tartan on some particular occasion.

The [Standing Council of Scottish Chiefs] has also taken a fairly flexible position (organisationally; some specific individual chiefs may have a narrower or looser take, and not all chiefs are members). Aside from opposing the creation of a new tartan using a clan's name without the chief's permission, their website states (adopting more loosely some ideas from the Lord Lyon view):[\[987\]] 

> There are no strict rules on who has the right to wear a particular tartan. People normally wear only the tartan (if any) of their surname, or a "district tartan" connected with where they live or where their family come from. Wearing a particular clan tartan indicates that the wearer bears an allegiance to the chief of that clan.

Some Highland-dress historians have taken a dim view of regulatory intents and proclamations with regard to tartans; Scottish National Portrait Gallery curator A. E. Haswell Miller wrote that "to claim special entitlement to a tartan in the same manner as heraldic arms is certainly absurd", because evidence suggests that the idea was just invented by writers of the late 18th to mid-19th centuries.[\[988\]] Sir [Thomas Dick Lauder] expressed similar views as far back as 1829, right in the middle of the "clan tartanry" rush, dismissing both the then-new adoption of "official" clan tartans and attempts by clans to claim regimental ones.[\[561\]] 

While tartan has been most closely associated with Scotland, and dating back to the Roman period was perhaps associated with Northwestern Europe in general, it is likely that the idea of using patterns of rectangles and lines has independently occurred many times, in any cultures with weaving.[\[989\]] Basic tartan "is almost as primitive a weave as it is possible to make ... probably the earliest form of patterened fabric anywhere."[\[2\]] Surviving pre-modern historical examples seem sparse, however.

Modern tartan-style cloth in a wide variety of materials and patterns from simple to complex is available and used today around the world, often simply as a style of cloth and without any association with Scotland.

Maasai men c. 1906–1918, one wearing a tartan _shúkà_; photo by [Walther Dobbertin] 

Among the [Maasai people] of Kenya and Tanzania, the _shúkà_ is a [cotton] blanket-like garment (what Scots would call a plaid) worn as a wrap, and very commonly in a tartan pattern, though sometimes linearly striped or of one colour.[\[990\]] _Shúkà_ are predominantly red, though sometimes seen in blue and other colours.

[Maasai] men in _shúkà_; [Narok County], Kenya, 2018

_Shúkà_ were originally of painted (typically red) leather, but Maasai have had access to [plain-weave] cotton fabric for some time, imported to the region by Americans since the 1860s.[\[990\]] [Joseph Thomas Last], a British [missionary], in 1883 described the Maasai as particularly fond of red and white cloth, to be worn by higher-status men (though he did not mention tartan in particular);[\[991\]] a 1903 report also had them typically wearing red blanket-like garments, after a time of favouring blue.[\[990\]] The Maasai were loosely allied with the British, 1895–1904,[\[992\]] and the latter made heavy use of [Scottish regiments] in African conflicts, bringing tartan with them. However, " [Guinea] cloth" (mostly produced in India), sometimes red and blue checked, was a common commodity in 18th-century western Africa, pre-dating [British West Africa]; whether it relates at all to _shúkà_ is unknown.[\[993\]] _Shúkà_ patterns usually lack the thin black lines common in Scottish tartans.

A nomadic [cattle-pastoralist] culture, without their own weaving tradition, the Maasai have been described as unusually culturally conservative and resistant to modernisation.[\[994\]] Nevertheless, they have always engaged in trade to get goods they do not make themselves,[\[991\]] and have made local traditional use of modern materials.[\[995\]] The Maasai approach has been to resist yet assimilate [colonial] and post-colonial influences.[\[996\]] 

Although there is evidence of tartan usage among the Maasai to at least the period 1906–1918, when [Walther Dobbertin] photographed a tartan _shúkà_ in what was then [German East Africa], the current bright tartan and striped style of _shúkà_ appears to have been adopted primarily in the 1960s[\[993\]] [\[997\]] (partly in response to national-level clothing modernisation pressure), supplanting leather but keeping the same form-factor.[\[996\]] The shift in outward form without affecting function led one writer to quip that Maasai dress "has undergone dramatic changes while not changing at all".[\[998\]] Tartan-patterned cloth is not typically used for other Maasai garments besides _shúkà_.

The _shúkà_ has become so emblematic of the Maasai that there is some discussion (driven by the Maasai themselves) at the national and regional level about protecting it as a form of [cultural property].[\[999\]] While it has been claimed that _shúkà_ patterns, at least at one time, conveyed particular meanings,[\[dk\]] and there historically have long been weaving operations in various African areas,[\[1000\]] most _shúkà_ today that are not mass-manufactured in [Dar es Salaam] or [Mombasa] actually come from China, not Africa.[\[993\]] 

### East and South Asia

\[ [edit] \]

The earliest-discovered tartan fabric in the world was discovered in Western China, in the context of the Tarim mummies, dated to c. 2100 BC through the first centuries BC (See [§ Pre-medieval origins], above). Today, tartan is still woven in China, both as a traditional fabric and in large commercial quantities for export.[\[816\]] 

- Chinese man in traditional hat of silk tartan with wool pompons, 2008

- Historical [brocade] of the [Zhuang people] in [Yunnan], China (photo 2011). It is often very complex material, but sometimes simple tartan like this.

- A tartan [cheongsam] (qipao) at a [Hong Kong] clothier in 2021

- Tartan and other textiles for sale in bulk at [Yen Chow Street Hawker Bazaar], Hong Kong, 2022

- A simple three-colour tartan pattern being woven on a hand loom in [Pilikula] heritage village, India, 2016

- Indian [sari] in a two-colour tartan pattern with highlights at the crossings of the black lines, which may be [embroidery] or [supplementary weaving] 

- A modern, elaborate _kōshijima_ dress from Japan's [lolita fashion] subculture, 2018


Four Bhutanese men, 2012, in _gho_ robes, with four different _mathra_ patterns, from vary narrow to quite broad

In [Bhutan], traditional men's robes (_gho_)[\[1001\]] and knee-stockings (_omso_, similar to [argyle] socks),[\[1002\]] and women's dresses (_kira_)[\[1003\]] are traditional [national costume] styles that are largely mandatory for public dress since 1963.[\[1004\]] [\[dl\]] Tartan (generally called _mathra_ or, after the district of its primary production, _[Bumthang] mathra_,[\[1006\]] [\[dm\]] among other names for specific patterns) is among the many common textile styles for these garments, some much more elaborate (generally called _yathra_)[\[1008\]] than tartan. The tartan cloths are woven traditionally in [yak] and [sheep] wool, but today also in [cotton] and [raw silk].[\[1009\]] 

_Gira_ dress featuring "X" patterns where the white stripes meet, produced by supplementary weaving

_Mathra_ is woven primarily with a red ground. Some specific tartan/plaid styles of Bhutan are: broad-checked _thra bom_; narrow-checked _thra charuru_; _sethra_ ('golden pattern'), an orange or rust ground with yellow and sometimes black checks (with black, it is more specifically called _sethra dokhana_, and without, _dalapgi sethra_); red, blue, and black patterns on a white ground, in at least four varieties called _pangtsi_ (specifically red and black on white),[\[1010\]] _[Decheling] kamtham_, and other names;[\[1011\]] and another style is named _burai mathra_.[\[1012\]] Some of these fabrics feature [supplementary weft] decorative patterns (flowers, etc.) added to the tartan, with an [embroidered] or [brocaded] appearance, generally called _pesar_ ('new pattern'); one such style is more specifically called _sethra metho chen_, the yellow-orange pattern with flowers added. There are also patterns of simple linear stripes that do not cross each other (generally called _adha\[ng\] mathra_ or _aikapur_), with various names for specific styles.[\[1013\]] 

Samples of tartan madras cloth, showing its muted look

Madras is a patterened, light-weight, breatheable, cotton cloth named for the Madras (now [Chennai]) area of India.[\[1014\]] Traditional madras is hand-woven from lumpy, [carded] -cotton thread, and coloured with natural dyes which may bleed together upon washing to create a more muted pattern than typical tartan, as well as a rougher texture.[\[1015\]] Madras also has a "softer" look because it typically lacks the black lines found in most Scottish tartans. Madras cloth dates to at least the 16th century, produced in a variety of patterns, including religious designs and floral prints.[\[1015\]] It is unclear if tartan patterns were among the original designs, though they became very popular later. Weaving, primarily for export, in Madras/Chennai became a large-scale commercial enterprise after the British [East India Company] came to control the area in the mid-17th century.[\[1016\]] Major production of this style of cloth also took place in [Cambay State] (present-day [Gujarat]).[\[1017\]] 

Madras, ideal for warm-weather wear, became popular in the Philippines (where it is known as _cambaya_)[\[1017\]] and the Caribbean;[\[1015\]] mainly in undyed form, it was also exported to Europe.[\[1015\]] Tartan madras reached America by 1718, and appeared in the 1897 [Sears] catalogue.[\[1015\]] It was popular in the United States in the 1930s and again in the 1960s, often associated with [preppy] style.[\[1015\]] Substantial export of the cloth to South Africa began in 1958.[\[1015\]] 

Modern madras cloth is commonly in tartan patterns, but also simply striped ( [seersucker]). Unlike Scottish-style tartan, madras is not woven in 2/2 [twill] pattern, but is a [muslin] of [plain weave];[\[1015\]] it thus, when viewed up close, features a "pepper and salt" colour mixture where colours cross[\[6\]] (a [dot matrix], technically), not staggered diagonal lines (see [detail image]). It also usually lacks black lines.

[Woodcut] image of Japanese _[kabuki] _ actor Iwai Hanshiro IV dressed in _kōshi_, 1780s

In Japan, tartan patterns called _kōshi_格子 (also _koushi_ or _goushi_, literally
'lattice') or _kōshijima_格子縞 date back to at least the 18th century,[\[409\]] possibly the 17th[\[1018\]] in the [Edo period] (1603–1867), and were popular for _[kabuki] _ theatrical costuming, which inspired general public use by both sexes, for the _[kosode] _ (precursor of the _[kimono] _),
the _[obi] _, and other garments.[\[1019\]] The name is a reference to the details of _[shoji] _ room dividers, the grid pattern said to stand for strength, with larger stripes representing more power.[\[1019\]] _Kōshi_ range from simple checked patterns to complex multi-colour weaves. [Ikat] thread-dyeing techniques were sometimes employed before the weaving, such that a colour in the pattern was mottled,[\[1019\]] and parts of the design may sometimes have been [embroidered], [supplementary-woven], or dyed-over for additional highlight or contrast.[\[1019\]] Some styles have particular names, such as _misuji-kōshi_ ('three-striped lattice')[\[1019\]] and _futasuji-kōshi_ ('forked lattice').[\[1020\]] A pattern with larger squares is more generally called _ogoshi_ or with smaller squares _kogoshi_.[\[1021\]] 

It is unclear whether there was a Scottish tartan influence on the development of _kōshi_. The Edo period pre-dates the [Perry Expedition] of 1853–1854 and its opening of Japan to general [Western] trade, but mostly post-dates early European contact from 1543 to the closure of Japan to outsiders in 1639 under the _[sakoku] _ isolationist policy.

Nothing suggests that particular patterns have been associated with specific families or [Japanese clans].

Today, _kōshijima_ is the general Japanese word for 'tartan/plaid, checked pattern'.[\[1022\]] Tartan is popular in present-day Japan, both for high fashion and for streetwear,[\[319\]] as well as [school uniforms].[\[685\]] Since the 1960s, the Japanese department store chain [Isetan] has used an emblematic tartan as a marketing tool (e.g. on all its shopping bags); the pattern is based on some [MacMillan] tartans.[\[1023\]] Japan hosted a major museum exhibit about tartan in 2018.[\[1024\]] 

### Eastern Europe to Western Asia

\[ [edit] \]

Tartan-style patterns are common throughout Southeastern Europe.

[John Francis Campbell] (1862) described the native weaving of the [Sámi] (Lapps) of northern Europe as being hand-loom tartan.[\[989\]] 

Considerably to the southeast, the [Tatars] [\[dn\]] and [Chuvash], [Turkic] peoples of [Tatarstan] and [Chuvashia], respectively, in the Russian Federation, have worn tartan, striped, and other patterns since at least the 19th century.

- Detail of [Serbian] tartan folk dress, densely [pleated], 2017

- Tartan patterns used in a [Bulgarian folk] costume

- Simple [shepherd's check] tartan being woven by [Pomaks] in Greece, 2007

- Example of 1920s tartan cloth from [Belarus], in a complex non-twill [damask] weave

- Silk tartan cloth of white, grey, and golden thread from [Lithuania] 

- [Estonian] woman wearing a tartan _suurrätt_ (plaid/shawl)

- Another Estonian _suurrätt_, with a total-border pattern of more complexity than the simple central pattern

- [Tatars] in [Kazan] in 1870, wearing tartan, stripes, and other patterns

- [Chuvashian] example, c. 1870


[Robert Jamieson], writing in 1818 as editor of [Edmund Burt] 's 1727–37 _Letters of a Gentleman in the North of Scotland_, said that in his era, married women of the north-western provinces of [Russia] wore tartan plaids "of massy silk, richly varied, with broad cross-bars of gold and silver tissue".[\[94\]] This seems quite distinct from Scottish-style construction.

[Alexander Pushkin] wearing a tartan cape; by [Orest Kiprensky], 1827

The Russian poet [Alexander Pushkin] (1799–1837), who was influenced by the romantic-Highlands writings of [Walter Scott],[\[1025\]] [\[1026\]] posed for one of the most famous paintings in Russia, the [1827 portrait] by [Orest Kiprensky]. Pushkin wears what looks at first like a Scottish-style tartan [shoulder plaid], but is more probably a sleeveless "Almaviva" cape/cloak, a style in fashion at the time and known to have been worn by Pushkin.[\[763\]] 

Tartan was commented on in the _Moscow Telegraph_ in 1826 as being in broad fashion in the city for all sorts of garments (often as a decorative accent).[\[763\]] Scottish-style plaids apparently did come into some fashion in Russia as women's wear for a space during the mid-to-late 19th century, a style picked up from stage productions; some 19th century Russian [paintings illustrate] use of plaids as [shawls].[\[763\]] Tartan (and plain-striped) shawls were also common among the [Volga Germans] and [Bessarabia Germans] in Russia; a mixture of hand-woven (originally as bedclothes and other household goods) and mass-produced in Russia, the shawls became emblematic of the German-from-Russia [diaspora] in North and South America from the nineteenth century to the mid-20th.[\[1027\]] [\[1028\]] 

Around the end of the 19th century, the Russian equivalent of Regency and Victorian British tartanware objects, such as decorative [Fedoskino] boxes with tartan accents in a style called _Shotlandka_Шотландка (literally 'Scotlandish'), were produced by companies like the Lukutin Manufactory on the outskirts of [Moscow].[\[1029\]] 

Today, _shotlandka_ or _shotlandki_шотландки are simply Russian words for 'tartan/plaid' generally.[\[1030\]] 

- 1839 portrait of Maria Arkadievna Bek by [Pimen Orlov] may illustrate one of the Russian plaids with silver thread

- Posthumous portrait of [Alexander Pushkin] by [Carl Peter Mazer], 1839, shows him in a red and green tartan dressing gown.[\[763\]] 

- Tatyana Petrovna Musina-Pushkina, Princess Kropotkina (1800–1865), portrait c. 1840s by unknown artist


#### Adoption by the Māori

\[ [edit] \]

[Pōtatau Te Wherowhero], the first [Māori] [king], adopted a particular house tartan with design elements symbolizing his ancestry, such as inner stripes representing [migration canoes] that first arrived in Aotearoa New Zealand; this tartan was presented by his descendant [Tūheitia Paki] to [Charles III] in the former's visit to Buckingham Palace in May 2023.[\[1031\]] His following [tribes] concentrated around [Northland] have also adopted green tartans.[\[1032\]] 

- [Drugget], a coarse and often linearly striped cloth that was common in the Scottish Western Isles
- [Flannel], a type of fuzzy cloth often produced in a tartan pattern
- [Hodden], a non-tartan cloth of undyed wool, sometimes also used for kilts, especially for non-Scottish pipe bands
- [List of tartans] 
- [Mackinaw cloth], a dense woollen cloth often produced in tartan patterns
- [Madras (cloth)], cotton cloth of India often woven in tartan patterns
- [Tartan Day], a day of celebration, in Canada, Australia, the US, and some other countries, recognising the influence of Scottish immigration
- [Tartanry] 

- [Argyle (pattern)] 
- [Battenburg markings], a check (dicing) pattern used on UK emergency vehicles
- [Border tartan] 
- [Check (pattern)] or chequer
- [Gingham] (Vichy check)
- [Glen plaid] 
- [Harlequin print] 
- [Herringbone (cloth)] 
- [Houndstooth] 
- [Sillitoe tartan], a check (dicing, not actually tartan) pattern commonly used on police headgear
- [Tattersall (cloth)] 

001. **[^] **The use of _plaid_ to mean 'tartan' has not been _exclusively_ North American; in 1808, the London publication _[La Belle Assemblée] _ referred to "plaid scarfs".[\[14\]] Also, it has sometimes been claimed that _plaid_ refers to all such patterns generally, and _tartan_ only to patterns of Scottish clans,[\[15\]] but there is no support for this idea in works of tartan scholarship.
002. **[^] **[MacBain (1911)], p. 277. [Cognate] words in other languages are the [Luwian] _pldtmn_ and later [Latin] _paludamentum_ for 'cloak'. The _paludamentum_ was a cloak put on by Roman officers in time of war.[\[17\]] [\[18\]] 
003. **[^] **Solid-colour, non-tartan kilts were often thought to be an Irish invention of the late 19th century, but an example of a belted plaid or "great kilt" from Scotland was found in a 1635 portrait of Sir Duncan Campbell of [Loch Awe],[\[19\]] among other Scottish examples.
004. **[^] **The two Scarlett sources provide two exact formulas which seem at first to be contradictory, but one is for number of blends and the other for number of colours total.
005. **[^] **The term _mirroring_ can be ambiguous, because the longer phrase _mirror pattern_ may refer to "one in which ... two alternating ground motifs are the same size and arrangement but in different colours."[\[32\]] 
006. **[^] **The term _repeating_[\[27\]] has also been used, as distinct from _mirroring_, but is so ambiguous that sometimes the same patterns are referred to as _non-repeating_.[\[33\]] Neither term will be used further in this article.
007. **[^] **A well-known example is the main [Buchanan] tartan.[\[34\]] 
008. ^ [_**a**_] [_**b**_] An example is the most popular [MacMillan] tartan, in which the warp and weft are different, though similar; the largest blocks of colour are green rectangles instead of squares.[\[36\]] 
009. **[^] **Early collectors of tartan, like Logan in 1831, recorded setts by measuring the width of each stripe in eighths of an inch.[\[37\]] [\[38\]] A persistent legend that tartans were originally recorded on little "pattern sticks" has been dispelled as a " [telephone game] "-style progressive, willful misunderstanding of an early description of the warp as wrapped on a warp beam/roller for the loom. It was poorly described by [Martin Martin] in 1703 as "an exact Pattern of the Plad on a piece of Wood", which Logan (1831) misunderstood as a small stick used as a perpetual "record" of the tartan pattern on it, after which the " [Sobieski Stuarts] " in 1842 blatantly falsified a supposed 16th-century description of "pattern sticks", and Archibald Campbell (1890) repeated the story again as factual. No such artefact has ever been found by modern researchers, and the idea has been described as impractical because the threads would not stay put indefinitely, and it would make much more sense to simply write or draw the pattern on paper, or keep a strip of the woven material.[\[39\]] [\[40\]] Mackay (1924) claimed he had seen some examples and appeared to describe warp beams, but then claimed they were used as a long-term record of "clan tartans" of the area.[\[41\]] Eslea MacDonald (2015) points out that Mackay had a tendency toward "manipulating the evidence" when advancing his ideas about very old clan tartans, and that he made up a fake-Gaelic name for the alleged pattern sticks.[\[40\]] 
010. **[^] **For example, [Stewart, D. C. (1974)], [Scarlett (1990)], and Scottish Register of Tartans (2009–) all use full-count-at-pivots "bare" thread counts, without slash or bold notation, while [Eslea MacDonald (2012)] uses them to represent half-count-at-pivots, but states this explicitly.
011. **[^] **_Ground_ in this sense dates to at least 1895. [Telfer Dunbar (1979)], pp. 112–113, quoting 1895 letter: "... dress Stuart tartan on a white instead of on a red ground .... the 'Stuart hunting-tartan' on a green ground". However, _ground_ has a different meaning at the thread-dyeing stage, where it refers to a first layer of colour which is then over-dyed with another, either to deepen the hue or make a new ones, e.g. purple from blue over red.[\[49\]] 
012. **[^] **Wilsons, the near-exclusive producer of Georgian through Victorian regimental tartan, produced different grades of cloth for officers, sergeants, and enlisted.[\[56\]] 
013. **[^] **Scarlett (1990) provided [a reconstruction of what 17th- to early 18th-century arisaid tartans probably basically looked like], based on the appearance of later wider-banded "bar blanket" tartans which evolved from the arisaid setts. His sample is modernised in being simplified, symmetrical, mirroring, and not having a decorative selvedge.[\[77\]] 
014. **[^] **The French term _écru_ has also been applied,[\[87\]] but is ambiguous, as it technically refers to the colour of undyed [linen] not wool, and has been taken to indicate a richer, sandy range of hues in English usage than in French.[\[88\]] 
015. **[^] **The [Lord Lyon] 's colour-coding system actually had three reds: "gules/scarlet", "red" (a dull red), and "crimson".[\[45\]] But it is not entirely clear what the difference between them is.
016. **[^] **Multiple hues of pink appear in Wilsons' colour lists around the early 19th century, which included colours for wool and other weaving, but orange does not.[\[87\]] 
017. **[^] **For lists of such natural dye materials and their preparation, see: Kok, Annette (1979) \[1962\]. "Appendix: Early Scottish Highland Dyes"". In Dunbar, John Telfer (ed.). _History of Highland Dress_. London: B. T. Batsford Ltd. pp. 222–240. Some additional such information is available in: [Mackay (1924)], pp. 59–64; and [Eslea MacDonald (2012)], pp. 76–77. See also: [Campbell, J. F. (1893)], p. 335.
018. **[^] **A romantic legend about such a thing goes back quite a way, however. According to Innes of Learney (1971): "The late J. G. Mackay, like Lord Archibald Campbell, claimed that clan tartans were not only deliberately arranged, but formed an elaborate system of identification by dress, as technical as [armorial bearings]  .... \[T\]artans were never intended to, and did not, have the precise distinctions and ready recognisability of armorial bearings. Mr Mackay gives much interesting information ...; he does not, however, succeed in adducing evidence that there was a scientific system of arrangement, and circumstances are against the existence of a _science_."[\[128\]] In summary, Mackay believed that lines of various colours formed a [heraldic] system of [cadency (differencing)] between related family branches.[\[129\]] The argument depends on the Victorian-era clan tartans having been used in the 17th–18th centuries, but all modern tartan scholarship shows this idea to be broadly false.
019. **[^] **For a photograph of one of the Tarim cloth fragments and a reproduction of what the fabric may have originally looked like, see: Spada (2019).[\[138\]] 
020. **[^] **For a photograph of one of the Salzburg cloth fragments, see: Belfrage, Anna (30 April 2016). ["Of mummies in tartan"]. _AnnaBelfrage.com_. Retrieved 10 June 2023.
021. **[^] **For a photograph of the Falkirk cloth fragment, see: ["Record: Cloth (Fragment) – found at Falkirk Stirlingshire"]. _NMS.Scran.ac.uk_. [National Museums Scotland]. 2015. Retrieved 3 June 2023.
022. **[^] **For a photograph of the Caracalla statue fragment, see (about 1/3 down the page): Lamley, Hamish (21 February 2022). ["Pictish Fashion"]. _PictaviaLeather.co.uk_. Retrieved 10 June 2023.
023. **[^] **Nor any relation to the modern [Tatar people] (see [§ Russian shotlandka]).
024. **[^] **This historic Glen Affric tartan is not to be confused with various competing modern district tartans named "Glen Affic" available from some vendors such as Clan/Scotweb, Stevens & Graham, and Spoonflower.
025. **[^] **There are possible mentions earlier, to the 14th century, in both [Early Scots] and [Middle English] using the French-borrowed terms _tiretain_ and _tartarin_ in various spellings, but they do not clearly refer to tartan, either the cloth or the pattern, but rather seem to refer to valued cloth in general.[\[4\]] 
026. **[^] **There is a legend that during some period there was a "caste" system by which chiefs were entitled to up to seven colours in a tartan, fewer colours were allowed for clansmen according to position in the social hierarchy, and just single-coloured cloth for servants. Barnes & Allen (1956)[\[93\]] attributed the idea to Frank Adam (1908). He did indeed write that "it is said" there was such a system, but cited no evidence.[\[143\]] The fancy is from Logan (1831), who cites nothing but an ancient " [Achy Edgathach] " of Ireland.[\[178\]] That was a legendary ancient Irish king in the _[Lebor Gabála Érenn] _, said to have passed a [sumptuary law] limiting clothing colours by social status, during his very short reign of only four years (some time between 1537 and 1155 BC).[\[179\]] It is old Irish folklore and nothing to do with history of Scottish tartan. Scarlett (1990): "it is difficult to allow such a tale any credibility".[\[145\]] No modern Highland-dress scholars repeat it seriously (the last one to do so seems to have been Mackay (1924) who said it applied to " [Druidical] times"),[\[180\]] and the idea is contradicted by existence of old regional tartans of complexity, and by chiefs adopting tartans of marked simplicity. Practically, the extra dye and weaving-labour expenses of complicated tartans meant that they cost more and so were more often worn by monied persons,[\[145\]] as clearly reported by [John Lesley] (1578)[\[181\]] and [Robert Heron] (1799).[\[182\]] 
027. **[^] **Buchanan (1582): "They delight in marbled cloths, especially that have stripes of sundrie colours; they love chiefly purple and blue; their predecessors used short mantles or plaids of divers colours, sundrie ways divided, and among some the same custom is observed to this day, but for the most part they are brown, most near to the colour of the hadder \[ [heather] \], to the effect, when they lie among the hadders, the bright colour of their plaids shall not [bewray] them."[\[192\]] (Buchanan's wording was recycled in 1603, in the anonymous _Certayn Mattere Concerning Scotland_.)[\[193\]] 
028. ^ [_**a**_] [_**b**_] The attempt to depict tartan is fairly crude, done as divided stripes, instead of a staggered pattern of blending rectangles, though it is possible it represents a weave with differing [warp and weft], which could produce more of a striped pattern. The exact details shown in the image are open to other question, because the artist illustrated an imaginative sword that is a combination of a Scottish [claymore] hilt with the blade and quillions of a German [Landsknecht] sword of a type more familiar to the German ( [Silesian]) artist. Also, Telfer Dunbar (1979) called the colours yellow, blue, and red (not green, blue, and red), so the palette accuracy of the photo could be in doubt.[\[195\]] The original art is in [Huntington Library] MSS: HM 25863, f. 28r.
029. **[^] **There is a recurrent legend running through Victorian works on tartan that the tartan cloth for each Highland man was usually made at home singly by his wife or mother,[\[198\]] but this proves to be an impractical idea,[\[199\]] for which there is no evidence, and considerable evidence against, including rich folk tradition of (mostly women's) group labour.
030. **[^] **Innes of Learney (1938/1971) believed that Highlanders wore multiple tartans because some were personal (perhaps inherited), some geographical, and some clan-specific,[\[204\]] but presented no real evidence for this hypothesis. The idea can be traced to Lord Archibald Campbell (1890), who asserted (with no evidence at all) that a Highlander wearing multiple tartans at once could be explained by him donning the pattern of his commander, his own paternal clan, and maternal clan.[\[198\]] 
031. **[^] **Even this, however, is ambiguous, and could mean that the Highlanders wore striped mantles, and worse their mantles in the same fashion that the Irish wore their own mantles, striped or not. And "striped" does not necessarily mean tartan. The Camden material is also contemporaneous with the Plantation of Ulster.
032. **[^] **[Taylor]: " ... all and every man in generall in one habit .... For once in the yeere, ... many of the nobility and gentry of the kingdome (for their pleasure) doe come into these Highland countries to hunt, where they doe conforme themselves to the habite of the Highlandmen, who for the most part speake nothing but Irish \[i.e. Gaelic\] .... Their habite is shooes, with but one sole apiece; stockings (which they call short hose) made of a warm stuff of divers colours, which they call tartane; as for breeches, many of them, nor their forefathers, never wore any, but a jerkin of the same stuff that their hose is of; their garters being bands, or wreathes of hay or straw; with a plaed about their shoulders, which is a mantle of divers colours, much finer and lighter stuffe than their hose; with blue flat caps on their heads ...."[\[229\]] 
033. **[^] **Adam (1908/1970) makes the surprising claim that in Taylor's time, "any one who assumed the tartan of the clan was considered as being under the special protection of that clan" and implies that Taylor said this.[\[190\]] Adam invented it, as nothing like this is in Taylor's original material. Taylor simply said that visitors wearing Highland dress would be "conquered with kindnesse, and the sport will be plentifull".
034. **[^] **E.g. in the revised 1707 edition of Rev. [James Brome] 's _Travels over England, Scotland and Wales_, is material partly adapted from Buchanan (1582): "They go habited in Mantles striped, or streaked with divers colours about the Shoulders, which they call Plodden, with a Coat girt close to their Bodies, and commonly are naked up their Legs, but wear Sandals upon the Soles of their Feet, and their Women go clad much after the same Fashion."[\[239\]] This suggests a span of at least 1582–1707 of Highland fashion being rather consistent and unisex.
035. **[^] **The same artist earlier painted [a three-in-one portrait of actor-playwright John Lacy], in 1675, which featured trews and belted plaid,[\[250\]] but the tartan there is very casually represented as simple red and blue lines on white.
036. **[^] **[Martin Martin] (1703) wrote: "each Isle differs from the other in their fancy of making Plaids, as to the Stripes in Breadth and Colours. This Humour is as different thro the main Land of the Highlands, in-so-far that they who have seen these Places are able, at the first view of a Man's Plaid to guess the Place of his Residence ...."[\[255\]] Scarlett (1990) says some earlier writers used this to just assume "a fully organised system of District tartans at that time" though Martin said nothing of the sort.[\[256\]] Scarlett considered Martin's account to have "a rather sweeping style that suggests some exaggeration", but generally plausible on other evidence of particular patterns, with minor variations, being common across wide areas.[\[256\]] 
037. **[^] **These tartan jackets are not to be confused with the later short [regimental Highland doublet] styles, borrowed directly from the military [Highland regiments] starting in the late 18th century; these are also of plain colour, not tartan.
038. **[^] **Sources conflict sharply on the date. Telfer Dunbar (1979), relying on Mackay Scobie, says 1717-1739;[\[271\]] while Eslea MacDonald (2016), relying on R. W. Munro's _Highland Clans & Tartans_ (1977), says 1754.[\[176\]] 
039. **[^] **Banks & de La Chapelle (2007) give an implausible 1724 date for the founding of Wilsons,[\[276\]] which does not agree with other scholarship, and they cite no source for the assertion.
040. **[^] **Scarlett (2008): "Red, blue and green have been recorded as the first colours to appear in all primitive art, so there may be some deep physiological or psychological reason for the predominance of these colours."[\[6\]] 
041. **[^] **Britain has many native plants that can produce at least a thin yellow, but they seem not to have been favoured, except as a ground-colour for over-dyeing with blue to create green.[\[278\]] 
042. **[^] **A small piece of tartan believed to be from a plaid of Bonnie Prince Charlie, given in 1746 to Lady [Anne Mackintosh] of [Clan Farquharson], survives in the [National Records of Scotland].[\[169\]] The prince apparently had a habit of giving out plaids as thanks for hospitality, and several recorded (but quite different) tartans are said to have come from these plaids, e.g. SRT 4220,[\[288\]] 4421,[\[289\]] 4422,[\[290\]] and 4423.[\[291\]] According to Telfer Dunbar (1979), various museums and other collections hold at least 40 pieces of tartan claimed to have been worn by "the Young Pretender", eight at the Battle of Culloden, and they cannot all be genuine.[\[292\]] One sample in particular is more likely than the others to be legitimate.[\[64\]] 
043. **[^] **There are several other tartans called "Jacobite". One dates to c. 1850,[\[67\]] or might be a bit older and is probably a Wilsons design,[\[32\]] and the others are more recent commercial inventions of c. 1930[\[300\]] and the late 20th century.[\[301\]] 
044. **[^] **Telfer Dunbar (1979) says the correct year is 1725 and that 1729 was an error introduced by Stewart of Garth (1822) and copied by later authors.[\[224\]] 
045. **[^] **Specifically, as defined in an earlier act of Parliament, north of the "Highland line" running from Perth in the east to Dumbarton in the west.[\[310\]] 
046. **[^] **The Dress Act _per se_ did not enumerate exceptions for the nobility, but the enclosing Act of Proscription did.
047. **[^] **[Lt.-Col. Sir John MacGregor Murray], newly chief of [Clan Gregor] and later vice-president of the Highland Society of London, wrote of the difficulty of raising a new regiment, in 1803: "It will require much to rekindle the martial spirit of our ancestors, which has, unfortunately, been systematically broken down – we were so long degraded by the privation of our arms and dress, and so much unmanned by being converted into manufacturers".[\[323\]] 
048. **[^] **Not to be confused with the second Seaforth's Highlanders, also raised as the 78th, in 1793. The original Seaforth's Highlanders were amalgamated with other units under the [Childers Reforms] to become the 1881 [Seaforth Highlanders].
049. **[^] **Not to be confused with the earlier [74th Regiment of (Highland) Foot], raised 1777.
050. **[^] **The commercial tartan weaver D. C. Dalgliesh provides a list of those that they supply, and it includes a mix of obscure tartans from defunct regiments, ones still used today for surviving regiments, tartans of overseas units that were "Highland" only in name, some that are now only associated with clans, and a number that are/were reserved for military pipe-band use and were not used in regular dress or undress uniforms.[\[404\]] 
051. **[^] **As one example, in _[The Lockhart Papers] _, first published in 1714, is a passage describing how opposing battatlions of [MacDonalds] from different places could only tell each other apart by colour of bonnet cockade. D. W. Stewart (1893) leapt to the conclusion they must have worn the same tartan, despite the material saying nothing of the sort[\[418\]] (they could have been wearing whatever tartans they happened to have, not uniforms, making tartan meaningless for distinguishing units of men).[\[141\]] 
052. **[^] **D. W. Stewart (1893) sometimes leaned toward the uniform interpretation: "It appears from the regulations issued to the retainers of the [Clan Grant] [anent] the wearing of a uniform tartan that distinctive patterns were in use, at least for military purpose, or on occasion of great gatherings".[\[261\]] The Grant case is covered in detail later.
053. **[^] **D. W. Stewart (1893) again came down on the "uniform" side, despite otherwise being a booster of the idea of early clan tartans;[\[253\]] so did Scarlett (1990).[\[254\]] 
054. **[^] **Adam (1908/1970) confirmed that there were two different grades of tartan worn,[\[190\]] as did Logan (1831), but both are sources of dubious quality. Scarlett (1990, 2008) also observes that there were once at least two kinds of tartan weave, a coarse, dense sort in which the weft threads were thicker than the warp,[\[6\]] and a finer equal-twill weave, seen often in portraits, that is more like the kilt cloth produced today.[\[272\]] (But he does not describe one as being specially intended for war.) Such a fineness split seems to have continued for a long time; Wilsons of Bannockburn manufactured regimental tartan in both coarse ("hard tartan") and fine qualities as late as 1819,[\[420\]] perhaps as an undress and dress distinction, or enlisted and officer.
055. **[^] **Innes of Learney's [motte-and-bailey tactic] when it comes to what "clan tartan" means is exemplified by his supposition that similar tartans used in lands of [Murray], Murray [of Athol], and [Sutherland] must mean they went back to a common tribal tartan "from the twelfth century" (which is not attested), and that: "It was no doubt 'the Murrays' tartan' without being ' _The_ Murray tartan'".[\[162\]] 
056. **[^] **E.g., [the district tartan] of [Huntly] [\[315\]] was sometimes called [Brodie], sometimes associated instead with [Forbes] or [Gordon], while Forbes did not have a distinct clan tartan until the key date of 1822, nor Brodie until the beginning of the 19th century.[\[429\]] The several tartans named Gordon all date to 1798 or later (and that earliest one was adopted from a 1793 regimental tartan).[\[430\]] 
057. **[^] **However, not only is it not certain that a single cloth of mixed colours was intended, rather than three cloths of distinct colours, Stewart contradicted himself: When the lands in question were restored to the MacLeans in 1630, the grey did not revert to green but remained gras, i.e. grey. Nevertheless, Stewart asserted: "The explanation is simple enough. White and black and green are the only colours in the oldest authenticated Mac Lean tartan."[\[434\]] But that design dates only to the fraudulent 1842 _Vestiarium Scoticum_ and is not "authenticated" by anything;[\[435\]] several other (red-based) MacLean tartans date to at least 1819 ( _STR_ reference nos. 2603, 2605, and 2606).
058. **[^] **The Scottish Tartans Society seemed to think it was something very similar to [Black Watch], with the red-and-white-striped Murray of Pulrossie version somehow, despite its 1618 prohibition, eventually becoming the primary Sutherland tartan.[\[436\]] Innes of Learney also supported the interpretation that it was a dark Black Watch-style tartan, related to others used in the region.[\[405\]] On the other hand, House of Gordon USA, a clan society, proclaims: "It was a Red Gordon!",[\[437\]] referring to a primarily red and teal tartan, also known as [old Huntly], recorded in 1819,[\[438\]] and appearing in a "stripey" variant, with differing warp and weft, in the [1766 painting of William Gordon]. The society does not publish any basis for their assertion.
059. **[^] **The piper's name was William Cumming. Telfer Dunbar (1979) describes this tartan, and that of a companion portrait of Alastair Grant Mòr "the Champion", as also showing thin yellow over-checks which are not really visible in this photo. He also describes the green as "grey".[\[440\]] 
060. **[^] **This problem of no consistent tartans in old family portraits recurs in other clans, such as [Murray] and [MacDonald], going back to the 18th century.[\[407\]] Trevor-Roper (1983) also notes this inconsistency among Highland portraits,[\[185\]] as does Haswell Miller (1956).[\[409\]] 
061. **[^] **Willie Scobie, in 2012, railed against "an influential and determined body of opinion set against the idea of clan tartans having existed prior to the late 18th century", analysed the _Tartana_ lines in light of known clan tartans, found no correspondences aside from the Royal Company of Archers (supposedly using a Stuart tartan, which in reality they did not,[\[257\]] and not being a clan anyway) having green edging on their jackets, and nevertheless decided: "we have in this piece of literature strong (one is almost tempted to say irrefutable) evidence of the existence of clan tartans in the year 1718."[\[450\]] 
062. **[^] **Thompson (1992)[\[141\]] said none of them survive; but the coat of the older boy is in what is now known as "MacDonald, Lord of the Isles", though the sett was reconstructed from the painting.[\[469\]] 
063. **[^] **A legend started by Lord Archibald Campbell (1890), who was working from a copy of the painting not the original, is that in his words: "No more conclusive proof of distinctive clan colours has been exhibited and it silences all dispute on the question at once and for all time."[\[476\]] Modern researchers do not take this seriously. E.g., J. Telfer Dunbar's evaluation: "This is an extraordinary claim as the tartans are clearly and accurately shown and not one of them agree with any clan tartans as known when Lord Archibald was writing or even to-day."[\[198\]] Scarlett (1990) pointed out that Campbell himself claimed, later in 1899, that the models for the painting were Jacobite prisoners.[\[477\]] If that were the case, they could be wearing whatever they were told to put on, even material supplied by the painter; i.e. the tartans depicted would be accurate representations of the cloth of the period but could not signify anything, even if they did match. The Jacobite Relics and Rare Scottish Antiquities Exhibition of 1946 agreed that the models were prisoners.[\[478\]] 
064. **[^] **James Ray, who served in the government forces at the Battle of Culloden, wrote in 1752: "In their flight I came up with a pretty young Highlander, who called out to me, Hold your Hand, I'm a Cambell. On which I asked him, Where's your Bonnet? He reply'd, Somebody have snatched it off my Head. I only mention this to shew how we distinguished our loyal Clans from the Rebels; they being dress'd and equip'd all in one Way, except the Bonnet; ours having a red or yellow Cross of Cloath or Ribbon; theirs a white Cockade".[\[479\]] Telfer Dunbar (1979): "If it had been possible to distinguish a Campbell by a 'Clan Campbell' tartan, either Ray would have done so or else remarked on the fact that the man was not wearing an identifiable tartan."[\[480\]] 
065. **[^] **_A Journal of the Expedition of Prince Charles Edward in 1745, by a Highland Officer_ provides this account: "We M'Donalds were much preplex'd, in the event of ane ingagement, how to distinguish ourselves from our bretheren and nighbours the M'Donalds of Sky, seeing we were both Highlanders and both wore heather in our bonnets, only our white cockades made some distinction".[\[418\]] Telfer Dunbar (1979): "If all the MacDonalds wore the same tartan, surely the writer would have mentioned this rather than the heather which they wore in their bonnets. A common tartan would have been much more confusing than a sprig of heather."[\[481\]] Also, this particular case does not demonstrate that _all_ the clans had different emblematic plants. Clan plants, like clan tartans, were solicited from chiefs in the early 19th century by the Highland Society of London, and there is no evidence of widespread assignment before then – only this single-clan mention in one period source.
066. **[^] **And because the belted plaid in particular, as very practical for outdoor wear but not as work clothing, was believed to be conducive to a life of idle shirking and outright banditry.[\[484\]] 
067. **[^] **Even the Sutherland/Pulrossie letter of 1618 referred specifically to "the plaides of his men",[\[405\]] which is suggestive of his militia, not his entire clan.
068. **[^] **Sometimes said to be [Jean "Jenny" Cameron], without conclusive evidence; there are five other identity candidates.[\[494\]] 
069. **[^] **Another legend, started by James Grant (1886), has it that the tartan goes back to " [Charles, first Earl of Dunmore], second son of the first Marquis of Tullibardine", but this was just a bad mis-reading of the Smith brothers (1850), from whom Grant plagiarised, referring to the _then-current_ Earl of Dunmore.[\[327\]] [\[7\]] 
070. **[^] **E.g., Telfer Dunbar (1979) provides this example: Wilsons' popular "Gordon" was green, purple, and black with over-checks of seven colours, and "unlike the present clan pattern".[\[442\]] 
071. **[^] **One example is today's [Macpherson], adopted in 1817, which was originally "Caledonia" then "No. 43", "No. 155", or "Kidd" in Wilsons' pattern books.[\[399\]] [\[503\]] (There is no "Clan Kidd"; the Kidd in question was a bulk orderer who used the tartan to clothe slaves in the West Indies.[\[504\]] Confusion seems to have arisen when Wilson also assigned the pattern the name "Macpherson" after another West Indies customer by that name.[\[505\]] Another is [Campbell of Cawdor], originally "No. 230" or " [Argyll] ", after the county.[\[506\]] [\[507\]] A complex example is the case of [Abercrombie] or [Abercromby].[\[508\]] Logan (1831) first published the tartan usually used for this name, but he modified it[\[509\]] from an 1805 Wilsons tartan record for "No. 64" or "Abercrombie", named for Sir [Ralph Abercrombie] not an entire family.[\[510\]] The design first popular for "Abercrombie" in the early 18th century changed names somehow[\[509\]] to [Graham] then later became today's Graham of [Montrose] tartan.[\[511\]] Wilsons' "Abercromby with yellow"[\[509\]] is today's [Campbell of Breadalbane] [\[512\]] after also being used by a fencible regiment.[\[513\]] The main [Buchanan] tartan, famous for being asymmetric, originated as a Wilsons fashion tartan around 1800 and was not adopted as a clan tartan until the 1830s.[\[34\]] "Logan" was invented by Wilsons, named after a merchant, and changed several times until it sold well.[\[514\]] "Drummond" was originally Wilsons' "Perth".[\[514\]] Wilson's 1819 pattern "Regent" turned into the [MacLaren] clan tartan by 1830, with a shift from purple to blue.[\[96\]] Scarlett (1990) and the Scottish Register of Tartans provide numerous other examples of modern "clan" tartans actually just being renamed generic/fashion/fancy, regimental, and famous-individual tartans from Wilsons, when they were not taken from the later forgery _Vestiarium Scoticum_.
072. **[^] **A prime example is the Black Watch tartan, which Cockburn collected four times and assigned the names "Campbell Argyll", "Grant", "Munro" and "Sutherland".[\[519\]] [\[517\]] 
073. **[^] **According to a documentary, [Clan Gregor], the [Gordons], and a [MacDonald] branch might also have had early informal clan tartans around this period.[\[183\]] However, the chief of the MacDonalds indicated not knowing of a clan tartan in 1815, and the tartan that was the subject of the 1618 Gordon/Murray/Sutherland letter is uncertain.
074. **[^] **Stewart of Garth may have had financial motivations for promoting an aristocratic ["tartanry" or "Highlandism"] and attaching his name to it prominently – like many other Scottish [lairds], he was in dire fiscal shape.[\[533\]] 
075. **[^] **At least six at once claimed the Black Watch regimental tartan,[\[517\]] [\[534\]] and "Several chiefs were asked to resubmit a different tartan in order to be seen to be different and thus support the idea of historical clan tartans."[\[517\]] In some cases, minor alterations were made, e.g. [Forbes] was devised in 1822 by adding a white over-check to Black Watch.[\[540\]] 
076. **[^] **There are numerous examples, but a prominent case is that two of the [Lord of the Isles] tartan variants were taken from portraits dating to the third quarter of the 18th century.[\[541\]] This practice, incidentally, has contributed to confusion about the age of clan tartans; a tartan adopted officially by a clan in 1850 from a painting dating to 1750 might misleadingly be said to be "a clan tartan dating to 1750".
077. **[^] **The authenticated samples bore seals of clan chiefs, while submissions received without such authentications were sealed by society secretary George Wedderburn.[\[56\]] The society collected tartans in general as well, and amassed 586 by 1987.[\[534\]] 
078. **[^] **Some faulty (according to Wilsons) clan patterns included in Logan (1831) were those for [Abercrombie], [Douglas], and [Graham], but there were more.[\[564\]] 
079. **[^] **In fairness, only most of the tartans in _Vestiarium_ were made up; almost a dozen had previously appeared in collections like those of Cockburn and Wilson.[\[573\]] Telfer Dunbar (1979) also considered that the Sobieski Stuarts' more general material on the history and then-present of Highland dress was of considerable value, at least when its sources could be traced.[\[574\]] Of the tartans material, [Walter Scott] fairly charitably wrote that the brothers had "an exaggerating imagination, which possibly deceives even themselves".[\[575\]] 
080. **[^] **R. Martin (1988): "I would like to excuse the prevarications of the Sobieski-Stuart brothers with a nod to [Baudrillard]; they lied and they cheated, but they did something quite extraordinary in ascribing a meaning to textile design that has more or less stuck: false as it is, the Sobieski-Stuarts fostered a myth of textile identification and implication that has served a continuing and compelling social need for well over a hundred years. They may have been factually wrong, but culturally very right."[\[581\]] 
081. **[^] **E.g., the usual tartan of [Clan Home] dates to _Clans Originaux_.[\[590\]] Another is [Brodie] hunting;[\[591\]] it was also later included in _Old & Rare Scottish Tartans_. A third is MacBean.[\[114\]] 
082. **[^] **See Scarlett (1990), chapter "The Setts of the Tartans", for numerous examples of names with 5 or even 10 "clan tartans", most of them traceable to Wilsons, Logan, or the Sobieski Stewarts.[\[603\]] For a quick visual example of conflicting claimed clan tartans, many of them dating to the Victorian to Edwardian periods, see the "MacDougal" search results in the _Scottish Register of Tartans_;[\[83\]] the list for that name is not much polluted by recent individual and "fashion" entries.
083. **[^] **Revised in 1974, D. C. Stewart's _The Setts of the Scottish Tartans_ has been further updated and expanded by James D. Scarlett in 1990 as _Tartan: The Highland Textile_,[\[607\]] perhaps the most definitive work on tartan published so far (though by no means the largest in terms of number of tartans illustrated; it is a book of research not of pictures).
084. **[^] **E.g. the red variant of the 1975 MacGregor dance tartan dates to 2005.[\[609\]] 
085. **[^] **_Electric Scotland_ published an annotated list of clans and their tartans' Lord Lyon registration status. The list is much shorter than some other clan lists, because it omits clans that have not applied to the Lord Lyon for tartan registry at all; it lists only those with Lyon-recorded tartans or those then in process of such registration.[\[611\]] 
086. **[^] **Example: The [Clan Watson] tartan dates to c. 1932 and appears to have been created by one of two ministers (sources disagree), based on the MacRae hunting and Gordon tartans.[\[612\]] 
087. **[^] **The Highland [MacLennans] use the same tartan as the Lowland [Logans]. Clan Logan is [without a chief].
088. **[^] **Eslea MacDonald (2022) defines this "Highland Revival" period as the 1782 end of the Dress Act to the beginning of Victoria's reign in 1837.[\[32\]] The utility and accuracy of this term when constratined to Victoria's accession is questionable, because revivalism of Highland cultural trappings did not abate during her reign but actually intensified markedly. Also, the term _tartan revival_ has been used, with essentially the same meaning, though without closely prescribed dates.[\[636\]] 
089. **[^] **In this era, soldiering, especially as an officer, was the "aristocratic profession _par excellence_",[\[638\]] and this had a strong effect on fashion. In Highland dress of the period, sometimes civilian and military styles were commingled.[\[639\]] 
090. **[^] **Not to universal approval. The chief of [Clan MacDonell of Glengarry] wrote of a Celtic Society of Edinburgh gathering: "I never saw so much tartan before in my life, with so little Highland material ... they have no right to burlesque the national character or dress of the Highlands."[\[539\]] 
091. **[^] **[David Wilkie] 's portrait of [George IV] depicts the king as being much slimmer than he actually was. Wilkie covered up the fact that the king's kilt was too short – sitting well above the knees – and also left out the pink tights the king wore to hide his bare legs.[\[653\]] 
092. **[^] **A detailed summary of the 19th-century tartan books can be found in [D. W. Stewart (1893)], pp. 57–61.
093. **[^] **Queen Victoria wrote of her time in Scotland: "... I feel a sort of reverence in going over these scenes in this most beautiful country, which I am proud to call my own, where there was such devoted loyalty to the family of my ancestors – for [Stuart blood] is in my veins, and I am now their representative, and the people are as devoted and loyal to me as they were to that unhappy race".[\[706\]] 
094. **[^] **There were "tartanitis"-infused travel books of the era to go along with the tourism, e.g. _A Tour in Tartan-land_ by [Rev. Edward "Cuthbert Bede" Bradley] (1863).[\[713\]] 
095. **[^] **As examples, modern tartans have been created for [Chinese], [Jewish],[\[869\]] [Muslim],[\[870\]] and [Sikh] [\[871\]] communities, as well as Italian Scots.[\[872\]] 
096. **[^] **Wilsons of Bannockburn created several of the comparatively old ones – [Aberdeen], [Crieff], [Dundee], [Glasgow], and [Perth]  – simply by naming patterns after the places in which they were the most popular.[\[442\]] 
097. **[^] **Cornish "national" examples:[\[881\]] [\[882\]] [\[883\]] [\[884\]] [\[885\]] [\[886\]] 
098. **[^] **Welsh national examples:[\[880\]] [\[887\]] [\[888\]] [\[889\]] [\[890\]] 
099. **[^] **Manx national examples:[\[892\]] [\[893\]] [\[894\]] [\[895\]] [\[896\]] [\[897\]] [\[898\]] [\[899\]] [\[900\]] [\[901\]] [\[902\]] The last of these is inexplicably assigned a date of 1863 in _SRT_, but with a note that seems to indicate it was designed by D. G. Teall of STS in 1981.
100. **[^] **Breton "national" examples:[\[781\]] [\[903\]] [\[904\]] 
101. **[^] **Galician "national" examples:,[\[905\]] [\[906\]] [\[907\]] 
102. **[^] **For example, [Bruce County] has an official tartan.[\[909\]] An example of a Canadian municipality with an official tartan is [Beauport, Quebec City].[\[910\]] 
103. **[^] **E.g., [Matheson] dress[\[917\]] is also known simply as Matheson, and is distinguished from a Matheson hunting tartan.[\[918\]] As with many Scottish names, there are an accumulation of other fashion and individual tartan designs named "Matheson",[\[919\]] but the only two recognised by the Clan Matheson Society are Matheson \[dress\] and Matheson hunting.[\[920\]] Similarly, [Shaw of Tordarroch] dress[\[618\]] is the main tartan, and is distinguished from a hunting variant,[\[619\]] with the old, erroneous "Shaw" tartan being retained only as a memorial tartan for a particular family figure.[\[617\]] 
104. **[^] **A photo in Adam (1908/1970) confirms that tartans with white stripes were used for Highland dance outfits at least as far back as the Edwardian period, though the style of [female dance-competition dress] has notably changed toward kilt-length instead of mid-calf skirts since then.[\[924\]] 
105. **[^] **Some writers have confused them as late as the 1980s (which suggests that dance tartans as a _conventional_ category unto themselves may date to the 1990s and later, though some specific dance tartans date to at least the mid-1970s).[\[925\]] E.g., J. C. Thompson (1989) conflates dance and dress tartans and treats all dress tartans as if they were white-bearing,[\[101\]] despite the clear fact that some dress tartans of considerable age do not have white in them, e.g. [Matheson] dress from c. 1850.[\[917\]] 
106. **[^] **The white-heavy MacGregor dance tartan (in three colour variants dating to 1975–2005) is confusingly listed in the _Scottish Register of Tartans_ as both dance and dress,[\[925\]] but the chief of [Clan Gregor] insists it is for dancers only,[\[614\]] so it is demonstrably not a general dress-wear tartan. Several other dance tartans are listed also as dress tartans in the _SRT_, but most appear to be "fashion" inventions by individuals or by woollen mills and are not associated with clans or districts.[\[926\]] 
107. **[^] **Possibly as early as 1850, and based on the Hay Stewart tartan or on royal Stewart, both probably by the Sobieski Stuarts.[\[700\]] It is often misdated to 1853.[\[927\]] 
108. **[^] **An example of a writer uncritically perpetuating the story can be found in M. B. Paterson (2001).[\[929\]] 
109. **[^] **As noted above, an early regimental tartan of 1787 was for a while called "Mackenzie–MacLeod" after two commanders, but this was a troop uniform tartan, not one for the named individuals.
110. **[^] **The sett actually survives in two variants in the _SRT_, created for an 1880 wedding; they are now sometimes used as Wilson family tartans.[\[932\]] [\[933\]] 
111. **[^] **The Lord Lyon would only accept formal clan tartan registrations from clan chiefs; this excluded chiefless [armigerous clans] from tartan registration with the Lord Lyon, whether or not they had latter-day clan associations/societies. However, many now-armigerous clans _were_ able to register tartans with the Lord Lyon before they became chiefless, and these registrations remain in the Lyon Court Books. The Lord Lyon seemed to consider a clan that _has had_ a chief to remain a clan and not just a family/surname (the Lord Lyon did not do any registration of family tartans, i.e. those for non-clan surnames), though a statement by the Lord Lyon on this matter in 2002 is not as clearly worded as it could have been.[\[611\]] 
112. **[^] **In 2003, [Burberry] demanded members of the tartan industry to stop trading a certain Thomson Camel tartan.[\[955\]] Burberry claimed this tartan was confusingly similar to their Burberry check and that it thus infringed their registered trademark.[\[956\]] Burberry took legal action again in 2013 to protect its tartan trademark in China.[\[957\]] 
113. **[^] **For example, the Clan Cameron Association website states that the Cameron of Lochiel tartan "is the personal tartan of the Chief and his immediate family; as a rule it should not be worn by clansfolk".[\[983\]] 
114. **[^] **Since 1937, the only non-royals permitted by the British royal family to wear the Balmoral tartan are the monarch's own personal piper and pipers at the royal Balmoral estate. Even royal family members only wear it with the permission of the monarch.[\[700\]] The official website of the [monarchy of the United Kingdom] claims the tartan is not available for purchase.[\[985\]] [\[986\]] 
115. **[^] **Oyange-Ngando (2018): "the intentional and specific arrangement of colour where each bears a certain meaning, for example a colour arrangement could represent age, clan or marital status of an individual". Oyange-Ngando's paper cites many sources, but cites none at all for this claim. Modern photos of Maasai show members of the same tribe/clan wearing a wide variety of _shúkà_ patterns, seemingly to taste.
116. **[^] **They are prescribed dress in at least in the more populous places. Remote areas, inhabited largely by ethnic minorities, still exhibit local traditional dress norms that differ from area to area.[\[1005\]] 
117. **[^] **Just _Bumthang_ by itself is a term for a type of woolen cloth, regardless of pattern.[\[1007\]] 
118. **[^] **Not to be confused with the [Mongols], who were called "Ta\[r\]tars" by medieval Europeans, and supplied patterned cloth among other trade goods (see [§ Medieval], above).

0001. ^ [_**a**_] [_**b**_] Harper, Douglas. ["tartan (n.)"]. _Online Etymology Dictionary_. [Archived] from the original on 5 August 2017. Retrieved 4 March 2018.
0002. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Banks & de La Chapelle (2007)], p. 57.
0003. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 3.
0004. ^ [_**a**_] [_**b**_] [_**c**_] ["Tartan(e), Tertan(e), n. Also: (tartain)"]. _A Dictionary of the Older Scottish Tongue (up to 1700)_. Dictionaries of the Scots Language SCIO / University of Glasgow. 2001. [Archived] from the original on 15 July 2023. Retrieved 14 July 2023.
0005. **[^] **[Scarlett (1990)], p. 11
0006. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] [_**l**_] [_**m**_] [_**n**_] [_**o**_] [_**p**_] [_**q**_] [_**r**_] [_**s**_] [_**t**_] [_**u**_] [_**v**_] [_**w**_] [_**x**_] [_**y**_] [_**z**_] Scarlett, James D. (2008). ["Submission from James D. Scarlett"] (PDF). [Scottish Parliament]. Archived from [the original] (PDF) on 19 December 2008. Retrieved 12 October 2008.
0007. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] Eslea MacDonald, Peter (November 2010). ["The Murray of Tullibardine Tartan – A Re-appraisal"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 25 June 2023. Retrieved 25 June 2023.
0008. **[^] **[Tuckett (2016)], p. 2, citing: Scarlett, James D. (1997). "Tartan: The Highland cloth and Highland art form". In Butt, John; Ponting, Kenneth (eds.). _Scottish Textile History_. Aberdeen University Press. p. 71.
0009. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] [_**l**_] [_**m**_] Newsome, Matthew Allan C. (1994). ["Introduction to Tartan"]. Franklin, North Carolina: Scottish Tartans Museum. Archived from [the original] on 10 February 2006. Retrieved 31 May 2010.
0010. **[^] **[Cheape (2012)], pp. 15–16.
0011. **[^] **Griest, Terry L. (1986). _Scottish Tartans and Family Names_. Harp & Lion Press. p. 2. The words tartan and plaid have come to be used synonymously, particularly in North America. This usage is incorrect when referring to Scottish tartan
0012. ^ [_**a**_] [_**b**_] [_**c**_] ["Frequently Asked Questions"]. _ScottishTartans.org_. Archived from [the original] on 17 April 2000. Retrieved 16 October 2008.
0013. **[^] **[Cheape (2012)], p. 15.
0014. **[^] **[Tuckett (2016)], p. 10.
0015. ^ [_**a**_] [_**b**_] [Black (1959)], p. 3.
0016. **[^] **[Telfer Dunbar (1979)], p. 2.
0017. **[^] **[Ramsay, William] (1875). ["Paludamentum"]. In [Smith, William] (ed.). _A Dictionary of Greek and Roman Antiquities_. London: John Murray. pp. 853–854. Retrieved 28 May 2023 – via University of Chicago.
0018. **[^] **"plaid". [_Merriam-Webster's Collegiate Dictionary_] (11th ed.). [Merriam-Webster]. 2003\. p. 947. [ISBN] [0877798095].
0019. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] [_**l**_] Newsome, Matthew; Wilkinson, Todd (2010). ["Hibernean Dress, Caledonian Custom: A brief history of Irish kilts and tartan"]. _ScottishTartans.org_. Scottish Tartans Museum. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.
0020. ^ [_**a**_] [_**b**_] See, e.g., [Mackay (1924)], p. 94, quoting: Burt (1726).
0021. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Telfer Dunbar (1979)], p. 40, quoting: Tucker, Thomas (1824) \[1655\]. [Murray, John Archibald] (ed.). [_Report upon the Settlement of the Revenues of Excise and Customs in Scotland_]. Bannatyne Club Press. [Archived] from the original on 15 July 2023. Retrieved 8 July 2023 – via Google Books.
0022. ^ [_**a**_] [_**b**_] ["Frequently Asked Questions"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2017. [Archived] from the original on 1 July 2023. Retrieved 20 June 2023.
0023. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 55.
0024. ^ [_**a**_] [_**b**_] [_**c**_] [Banks & de La Chapelle (2007)], p. 61.
0025. **[^] **[Scarlett (1990)], p. 46.
0026. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 23.
0027. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] ["Threadcount"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. [Archived] from the original on 3 April 2019. Retrieved 10 June 2023.
0028. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Scarlett (1990)], p. 62
0029. **[^] **["Berwick-upon-Tweed (symmetric)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 21 June 2023. Retrieved 20 June 2023.
0030. **[^] **["Tartan Details - Campbell of Lochnell Dress)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 21 June 2023. Retrieved 20 June 2023.
0031. **[^] **["Tartan Details - Unnamed C18th - Cf 4445"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 21 June 2023. Retrieved 20 June 2023.
0032. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] Eslea MacDonald, Peter (April 2022). ["The Jacobite Tartan"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 23 June 2023. Retrieved 23 April 2023.
0033. ^ [_**a**_] [_**b**_] [_**c**_] Eslea MacDonald, Peter (October 2018). ["Tartan from Isabella Fraser's Wedding Dress 1785"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 30 November 2023. Retrieved 20 June 2023.
0034. ^ [_**a**_] [_**b**_] ["Tartan Details - Buchanan – 1800"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.
0035. **[^] **[Scarlett (1990)], p. 53.
0036. ^ [_**a**_] [_**b**_] ["Tartan Details - MacMillan Anc (Clans Originaux)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.
0037. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] ["What's a Threadcount"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 4 June 2004. Retrieved 16 October 2008.
0038. **[^] **[Scarlett (1990)], p. 17.
0039. **[^] **[Scarlett (1990)], pp. 6–7.
0040. ^ [_**a**_] [_**b**_] Eslea MacDonald, Peter (2015). ["Pattern Sticks – Fact or Fiction?"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 24 June 2023. Retrieved 23 June 2023.
0041. **[^] **[Mackay (1924)], p. 46.
0042. **[^] **[Scarlett (1990)], p. 51.
0043. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Blakely (2015)], p. 13.
0044. **[^] **Urquhart, Blair; Cruickshank, Kris (2006). _Textile32_ (Windows software) (v3.2 ed.). Comrie, Perthshire: Tartan Software / [Scottish Tartans Authority] International Tartan Index. "Ticket" menu.
0045. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 63.
0046. **[^] **See "complaint" about this in: [Scarlett (1990)], pp. 55–56.
0047. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 88.
0048. **[^] **See usage at, e.g.: ["Tartan Details - Edmonton Scottish Society"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2021. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.
0049. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 227.
0050. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [Scarlett (1990)], p. 47.
0051. **[^] **[Banks & de La Chapelle (2007)], p. 65.
0052. **[^] **Usage example: [Scarlett (1990)], p. 33, footnote 6.
0053. ^ [_**a**_] [_**b**_] See usage at, e.g.: ["Tartan Details - Rankin (Dalgleish) #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.
0054. **[^] **[Scarlett (1990)], p. 52.
0055. **[^] **[Telfer Dunbar (1979)], pp. 145, 151.
0056. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Eslea MacDonald, Peter (January 2012). ["The Original Cameron of Erracht Cloth?"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 25 June 2023. Retrieved 24 June 2023.
0057. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Eslea MacDonald, Peter (February 2019). ["Traditional selvedge decoration on tartan cloth"] (PDF). _ScottishTartans.co.uk_. Retrieved 23 June 2023.
0058. **[^] **[Scarlett (1990)], pp. 52–53.
0059. ^ [_**a**_] [_**b**_] Eslea MacDonald, Peter (April 2020). ["The Use of a Selvedge Mark on Early Military Tartan"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 3 July 2023. Retrieved 3 July 2023.
0060. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 18.
0061. **[^] **Eslea MacDonald, Peter (September 2018). ["An 18th Century Plaid belonging to the Maclaines of Lochbuie"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 19 January 2023. Retrieved 23 June 2023.
0062. **[^] **Eslea MacDonald, Peter (2004). ["Two Tartan Plaids from Antigonish County, Nova Scotia"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 19 January 2023. Retrieved 23 June 2023.
0063. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Eslea MacDonald, Peter (January 2016). ["A portion of joined plaiding at Glamis Castle – Prince Charles Edward tartan"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 29 June 2023. Retrieved 28 June 2023.
0064. ^ [_**a**_] [_**b**_] [_**c**_] Eslea MacDonald, Peter (November 2014). ["An Unnamed 18th Century Jacobite Era Plaid – Carlisle Museum"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 30 November 2023. Retrieved 28 June 2023.
0065. **[^] **[Eslea MacDonald (2012)], p. 17.
0066. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 42.
0067. ^ [_**a**_] [_**b**_] See usage at, e.g.: ["Tartan Details - Jacobite, Old"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 10 June 2023.
0068. **[^] **[Telfer Dunbar (1979)], pp. 49–50.
0069. **[^] **[Scarlett (1990)], pp. 49–53, 181–183.
0070. **[^] **Tilson Davis, Linda (2018). _Weaving Tartans: A Guide for Contemporary Handweavers_. Amazon Digital Services LLC - Kdp. [ISBN] [9781723818028].
0071. **[^] **[Black (1959)].
0072. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], pp. 146–147.
0073. **[^] **[Eslea MacDonald (2012)], pp. 16–17.
0074. **[^] **[Scarlett (1990)], pp. 46–48.
0075. **[^] **[Mackay (1924)], p. 49.
0076. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 40.
0077. **[^] **[Scarlett (1990)], p. 40, plate 5(b).
0078. **[^] **[Scarlett (1990)], p. 185.
0079. **[^] **For example: ["Tartan Details - Dundee Wallace"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 12 June 2023. Retrieved 11 June 2023.
0080. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 45.
0081. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 7.
0082. ^ [_**a**_] [_**b**_] [_**c**_] ["Tartan Details - Mar Dress"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0083. ^ [_**a**_] [_**b**_] ["Search Results \[MacDougal\]"]. _TartanRegister.gov.uk_. 2023. Retrieved 13 June 2023.
0084. **[^] **["Tartan Details - Innes of Learney Hunting (Personal)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.
0085. **[^] **[Scarlett (1990)], p. viii.
0086. **[^] **["Women's Dress"]. _TartansAuthority.com_. [Scottish Tartans Authority]. 2010\. Archived from [the original] on 6 July 2022. Retrieved 9 July 2023. Quoting: [Logan, James]; [McIan, Robert Ronald] (1845–1847). _Clans of the Scottish Highlands_. London: Ackermann & Co.
0087. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 10.
0088. **[^] **Maerz, Aloys John; Paul, Morris Rea (1930). [_A Dictionary of Color_]. New York: [McGraw-Hill]. p. 149. [LCCN] [30016563]. [OCLC] [1150631] – via Internet Archive. There is a newer 1950 2nd edition, but both versions are collector's items that are difficult to find except via [inter-library loan].
0089. **[^] **[Scarlett (1990)], pp. 67–68.
0090. **[^] **["Tartan Details - Prince of Orange"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 9 June 2023. Retrieved 9 June 2023.
0091. ^ [_**a**_] [_**b**_] ["Tartan Details - Hello Kitty"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.
0092. **[^] **["Tartan Register colours"] (PDF). _TartanRegister.gov.uk_. Scottish Register of Tartans. 2022. Retrieved 9 June 2023.
0093. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [Barnes & Allen (1956)]: p. 266.
0094. ^ [_**a**_] [_**b**_] [_**c**_] [Stewart, D. W. (1893)], p. 33.
0095. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Telfer Dunbar (1979)], pp. 222–224.
0096. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 9.
0097. **[^] **[Telfer Dunbar (1979)], p. 225.
0098. **[^] **[Telfer Dunbar (1979)], pp. 227–229.
0099. **[^] **[Telfer Dunbar (1979)], pp. 226, 228, 231, 239–240.
0100. **[^] **Eslea MacDonald, Peter. ["The Use of Colour in Tartan"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 4 June 2004. Retrieved 22 October 2008.
0101. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] Thompson, J. Charles (1989). "Color Schemes". _So You're Going to Wear the Kilt_ (3rd revised ed.). Arlington, Virginia: Heraldic Art. pp. 34–37\. [ISBN] [0862280176].
0102. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 145.
0103. **[^] **Actually, this term has been used in this specific context: Urquhart, Blair; Cruickshank, Kris (2006). _Textile32_ (Windows software) (v3.2 ed.). Comrie, Perthshire: Tartan Software / [Scottish Tartans Authority] International Tartan Index. "Select Colours for Pattern" menu. Select a colourway...
0104. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Eslea MacDonald, Peter (2014). ["The Use of Colour in Tartan"]. _ScottishTartans.co.uk_. Retrieved 16 May 2023.
0105. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] Newsome, Matthew Allan C. (2006). ["Tartan Colors — A Photo Essay"]. _Albanach_. Retrieved 16 May 2023.
0106. **[^] **[Telfer Dunbar (1979)], p. 238.
0107. **[^] **[Telfer Dunbar (1979)], p. 50.
0108. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 43. Scarlett says "toward the end of the Great War" (WWI), while Newsome (2006) says "some time after World War II".
0109. **[^] **_The Story of Reproduction Tartans_ (brochure). Selkirk, Scotland: D. C. Dalgliesh Ltd. c. 1990s.\[ _[self-published source] _\]
0110. **[^] **[Eslea MacDonald (2012)], p. 8.
0111. ^ [_**a**_] [_**b**_] [_**c**_] ["Tartan Details - Stewart, Prince Charles Edward"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 25 June 2023. SRT's entry on this tartan attempts to illustrate it in the original regimental version with azure and olive green, but mis-renders it in tones that are nearly grey and do not agree with SRT's own colour codes.
0112. **[^] **["Tartan Details - Balmoral (Original)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 1 April 2017. Retrieved 8 June 2023.
0113. **[^] **["Tartan Details - Akins Clan (Personal)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 12 June 2023. Retrieved 11 June 2023.
0114. ^ [_**a**_] [_**b**_] ["Tartan Details - MacBean (Clan)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 8 June 2023.
0115. **[^] **Illustrated, but with an incorrect "Childers (Gurkha Rifles)" name, here: ["Tartan Details - Childers (Gurkha Rifles)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 20 June 2023. Retrieved 14 June 2023. On the naming confusion, see: [Scarlett (1990)], pp. 32–33.
0116. **[^] **["Tartan Details - Gordon Red"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 19 June 2023.
0117. **[^] **["Tartan Details - Galloway Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.
0118. **[^] **["Tartan Details - Galloway Green (yellow line)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.
0119. **[^] **["Tartan Details - US Air Force Reserve Pipe Band"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.
0120. **[^] **["Tartan Details - McCandlish Red"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 15 May 2023. Retrieved 9 June 2023.
0121. **[^] **["Tartan Details - Hunting Green"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 9 June 2023.
0122. **[^] **["Tartan Details - McCandlish Dress Grey"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 9 June 2023.
0123. **[^] **["Tartan Details - Isle of Skye"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.
0124. **[^] **["Tartan Details - Chisholm Colonial"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.
0125. ^ [_**a**_] [_**b**_] ["Tartan details - LOVERBOY"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2018. [Archived] from the original on 30 December 2021. Retrieved 1 January 2022.
0126. **[^] **[Scarlett (1990)], p. 48.
0127. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [MacDonald, M. (1995)] p. 48.
0128. **[^] **[Innes of Learney (1971)], p. 6. Citing: [Mackay (1924)]; and: [Campbell, A. (1890)].
0129. **[^] **[Mackay (1924)], pp. 37–38, 40–41, 45–46.
0130. **[^] **[Martin, R. (1988)], pp. 60–61 and throughout.
0131. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Mair, Victor H. (2016). ["Ancient Mummies of the Tarim Basin: Discovering Early Inhabitants of Eastern Central Asia"]. _Expedition Magazine_. Vol. 58, no. 2. Philadelphia: [Penn Museum]. [Archived] from the original on 28 September 2023. Retrieved 10 June 2023.
0132. **[^] **[Mallory, J. P.]; [Mair, Victor H.] (2000). _The Tarim Mummies: Ancient China and the Mystery of the Earliest Peoples from the West_. London: Thames & Hudson. p. 237. [ISBN] [9780500051016].
0133. **[^] **["The genomic origins of the Bronze Age Tarim Basin mummies"]. _European Nucleotide Archive_. School of Life Sciences, [Jilin University]. 20 August 2021. [Archived] from the original on 16 February 2022. Retrieved 10 June 2023.
0134. ^ [_**a**_] [_**b**_] [_**c**_] Seenan, Gerard (24 January 1999). ["Preserved with the mummies, clues to an ancient mystery: Tattered fabric that could hold the key to early history"]. _[The Guardian] _. Retrieved 10 June 2023.
0135. **[^] **[Mallory & Mair (2000)], p. 191.
0136. **[^] **Shuicheng, Li (2003). ["Ancient Interactions in Eurasia and Northwest China: Revisiting J. G. Andersson's Legacy"]. _Bulletin of the Museum of Far Eastern Antiquities_. **75**. Stockholm: Fälth & Hässler: 13.
0137. **[^] **Coonan, Clifford (28 August 2006). ["A meeting of civilisations: The mystery of China's Celtic mummies"]. _[The Independent] _. Archived from [the original] on 3 April 2008. Retrieved 11 October 2008.
0138. ^ [_**a**_] [_**b**_] Spada, Gianfranco (31 October 2019). ["Qizilchoqa Tartan Tissue – Anonymous"]. _Geometricae_. Valencia / London: Center for International Research on Concrete Art. [ISSN] [2605-5309]. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.
0139. **[^] **Fortson, Benjamin W. (2004). _Indo-European Language and Culture: An Introduction_. Blackwell Publishing. p. 352. [ISBN] [1405103167].
0140. **[^] **Newsome, Matthew Allan C. (2005). ["Who Says Tartan Is Just for Scots?"]. _Albanach.org_. Retrieved 14 July 2023. Quoting: [Wayland Barber, Elizabeth J.] (2000) \[1999\]. _The Mummies of Ürümchi_. London: W. W. Norton & Co. [ISBN] [9780393320190].
0141. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [Thompson (1992)], p. iv.
0142. **[^] **[Scarlett (1990)], pp. 9–10.
0143. ^ [_**a**_] [_**b**_] [Adam (1908/1970)], p. 385.
0144. ^ [_**a**_] [_**b**_] [Stewart (1893)], p. 8, citing: [Lesley, John] (1885) \[1571\]. Cody, E. G. (ed.). _The Historie of Scotland_. Vol. 1. Edinburgh: Scottish Text Society.
0145. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 10.
0146. **[^] **Delamarre, Xavier (2008). _Dictionnaire de la langue gauloise: Une approche linguistique du vieux-celtique continental_ (in French). Errance. [ISBN] [9782877723695].
0147. **[^] **["Tartan Details - Falkirk"]. _TartaRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 1 February 2024. Retrieved 2 February 2024.
0148. ^ [_**a**_] [_**b**_] Chen, Min (7 April 2023). ["A Tattered Scrap of Fabric, Unearthed From a Peat Bog in the Scottish Highlands, Is the World's Oldest Piece of Tartan"]. _Artnet News_. [Archived] from the original on 18 May 2023. Retrieved 18 May 2023.
0149. **[^] **["Who wore Scotland's oldest piece of tartan?"]. _[The Scotsman] _. Retrieved 11 February 2020.
0150. **[^] **["Earliest depiction of Scottish tartan discovered on Roman statue"]. _[The Scotsman] _. 4 December 2012. [Archived] from the original on 8 June 2023. Retrieved 8 June 2023. A newswire story, essentially the same article is repeated [at _The Herald_] [Archived] 8 June 2023 at the [Wayback Machine] and in shorter form [at _BBC News_].
0151. **[^] **[Scarlett (1990)], pp. ix–x.
0152. **[^] **[Brown (2012)], p. 2.
0153. **[^] **["Tartan – Shepherd / Falkirk"]. Scottish Tartans World Register. Archived from [the original] on 4 October 2011. Retrieved 8 October 2008.
0154. **[^] **["Falkirk tartan"]. Search Results. [National Museums Scotland]. [Archived] from the original on 23 August 2017. Retrieved 8 October 2008.
0155. **[^] **Wild, J. P. (2002). "The Textile Industries of Roman Britain". _Britannia_. **33**: 1–42\. [doi]: [10.2307/1558851]. [JSTOR] [1558851].
0156. **[^] **Wild, J. P. (1964). "The Textile Term _Scutulatus_". _The Classical Quarterly_. New Series. **14** (2): 263–266\. [doi]: [10.1017/S0009838800023818]. [JSTOR] [637730]. [S2CID] [170603077].
0157. **[^] **Harrison, Mark (1993). _Anglo-Saxon Thegn, 449–1066 A.D_. Osprey Publishing. p. 17. [ISBN] [1855323494].
0158. **[^] **[Telfer Dunbar (1979)], p. 48.
0159. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], pp. 66, 68.
0160. **[^] **[Stewart, D. W. (1893)], p. 1 \[A\].
0161. **[^] **[Innes, Cosmo] (1860). [_Scotland in the Middle Ages: Sketches of Early Scottish History and Social Progress_]. Edinburgh: Edmonston and Douglas. p. 227 – via Google Books. Also cited in: [Campbell, J. F. (1893)], p. 335.
0162. ^ [_**a**_] [_**b**_] [_**c**_] [Innes of Learney (1971)], p. 10.
0163. **[^] **[Stewart, D. W. (1893)], p. 2.
0164. **[^] **[Telfer Dunbar (1979)], p. 14; quoting an article in _[The Scotsman] _, 7 December 1948, summarizing a Grant presentation the night before at the Celtic Union of Edinburgh.
0165. **[^] **Richardson, Alan (27 March 2023). ["Oldest tartan found to date back to 16th Century"]. _BBC_. Retrieved 28 March 2023.
0166. **[^] **["Scotland's oldest tartan discovered by Scottish Tartans Authority"]. [V&A Dundee]. [Archived] from the original on 23 May 2024. Retrieved 22 May 2023.
0167. **[^] **Killgrove, Kristina (1 April 2023). ["Oldest Scottish tartan ever found was preserved in a bog for over 400 years"]. _Live Science_. Future US. [Archived] from the original on 22 May 2023. Retrieved 22 May 2023.
0168. **[^] **[Maria Hayward], _Stuart Style: Monarchy, Dress and the Scottish Male Elite_ (Yale, 2020), p. 39 citing [National Records of Scotland] E21/34 f.63v: [Rosalind K. Marshall], "To be the Kingis Grace ane Dowblett: The Costume of James V, King of Scots", Costume, 28:1 (2014), p. 16: [James Balfour Paul], _Accounts of the Treasurer_, 6 (Edinburgh, 1905), pp. 79–80, 436–437.
0169. ^ [_**a**_] [_**b**_] [_**c**_] ["Sources in the National Records of Scotland"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. [Archived] from the original on 19 May 2023. Retrieved 28 May 2023.
0170. **[^] **[Stewart, D. W. (1893)], pp. 4–5.
0171. **[^] **[Dunbar, William] (1834). Laing, David (ed.). [_The Poems of William Dunbar_] (PDF). Vol. II. Edinburgh: Laing & Forbes. p. 38. [Archived] (PDF) from the original on 6 June 2023. Retrieved 6 June 2023. Also quoted in: [Mackay (1924)], p. 53.
0172. **[^] **[Telfer Dunbar (1979)], pp. 51–52, and plate 7.
0173. **[^] **["Tartan Details - Lennox"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.
0174. **[^] **["Tartan & Symbols"]. _ClanLennox.org_. Clan Lennox Council of Commissioners. 2019. [Archived] from the original on 22 June 2023. Retrieved 22 June 2023.
0175. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Smith, Philip D. Jr. (2020). ["History of Tartan"]. _ClanChiefs.org.uk_. [Standing Council of Scottish Chiefs]. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023. (Article first published on _PanAlba_.)
0176. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] Eslea MacDonald, Peter (2016). ["Musings on the Arisaid and Other Female Dress"] (PDF). _ScottishTartans.org.uk_. [Archived] (PDF) from the original on 22 June 2023. Retrieved 21 June 2023.
0177. **[^] **See gallery at Douglas Archives, which also does not repeat the 1575 tartan story: ["Margaret, Countess of Lennox"]. _The Douglas Archives: A collection of historical and genealogical research_. Clan Douglas Society. 30 September 2021. [Archived] from the original on 22 June 2023. Retrieved 22 June 2023.
0178. **[^] **[Logan, James] (1831). [_The Scottish Gaël; Or, Celtic Manners, as Preserved Among the Highlanders: Being an Historical and Descriptive Account of the Inhabitants, Antiquities and National Peculiarities of Scotland_]. Cornhill, Aberdeen: Smith, Elder & Co. p. 231. [Archived] from the original on 8 September 2023. Retrieved 19 August 2023 – via Google Books.
0179. **[^] **[Macalister, R. A. Stewart]; Murphy, Michael, eds. (2008). [_Lebor Gabála Érenn: The Book of the Taking of Ireland_] (PDF). Vol. Part VI: Index D–F. University College Cork. "Eochu Édgathach" entry. [Archived] (PDF) from the original on 11 June 2023. Retrieved 10 June 2023 – via CELT: [Corpus of Electronic Texts].
0180. **[^] **[Mackay (1924)], p. 35.
0181. ^ [_**a**_] [_**b**_] [_**c**_] [Stewart, D. W. (1893)], p. 7.
0182. **[^] **[Adam (1908/1970)], p. 385, citing: [Heron, Robert] (1799). _History of Scotland_. Edinburgh/London: T. Cadell Jun. & W. Davies.
0183. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] Neil, Tim (dir.) (2013). [_Spinning a Yarn: The Dubious History of Scottish Tartan_] (Television production). [BBC Television]. [Archived] from the original on 18 May 2023. Retrieved 17 May 2023 – via YouTube.
0184. **[^] **[Stewart, D. W. (1893)], pp. 3–4.
0185. ^ [_**a**_] [_**b**_] [_**c**_] [Trevor-Roper (1983)], p. 23.
0186. **[^] **[Telfer Dunbar (1979)], p. 234.
0187. **[^] **[Telfer Dunbar (1979)], pp. 27–33, 35; quotes numerous period sources.
0188. **[^] **Caldwell, David; Oleksy, Vicky; Rhodes, Bess (2023). _The Battle of Pinkie, 1547: The Last Battle Between the Independent Kingdoms of Scotland and England_. Oxford: [Oxbow Books]. p. 85. [ISBN] [9781789259735].
0189. **[^] **[Telfer Dunbar (1979)], p. 27.
0190. ^ [_**a**_] [_**b**_] [_**c**_] [Adam (1908/1970)], p. 387.
0191. **[^] **[Stewart, D. W. (1893)], p. 9.
0192. **[^] **[Stewart, D. W. (1893)], p. 9. It also appears in [Banks & de La Chapelle (2007)] p. 68, citing: Grant, I. F.; Cheape, Hugh (1997). _Periods in Highland History_. New York: Barnes & Noble. p. 8. [ISBN] [9780760717158].; and (in the original Early Modern English) in [Mackay (1923)], p. 67.
0193. **[^] **[Campbell, J. F. (1893)], p. 336.
0194. **[^] **[Telfer Dunbar (1979)], pp. 32–33, 92. This excerpt was left out of later republications, and is found only in Dunbar, among the later writers; he tracked down the original 1617 book.
0195. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 53, and plate 8.
0196. **[^] **[Scarlett (1990)], pp. 4, 6–7.
0197. **[^] **[Telfer Dunbar (1979)], pp. 224, 229, 239.
0198. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], p. 72, quoting: [Campbell, A. (1890)].
0199. **[^] **[Scarlett (1990)], pp. 5, 239.
0200. **[^] **[Banks & de La Chapelle (2007)], pp. 68, 70.
0201. **[^] **[Banks & de La Chapelle (2007)], pp. 65–66.
0202. **[^] **[Campbell, J. F. (1893)], p. 336: "tartan was anciently worn, and ... particular patterns were worn in certain districts." See also p. 335, on natural dyes.
0203. **[^] **[Hinderks (2014)], p. 2, citing: Nicholson, Robin (November 2005). "From Ramsay's _Flora MacDonald_ to Raeburn's _MacNab_: The Use of Tartan as a Symbol of Identity". _Textile History_. **36** (2): 149. [doi]: [10.1179/004049605x61546]. [S2CID] [192109063].
0204. **[^] **[Innes of Learney (1971)], pp. 8–9.
0205. **[^] **[Hinderks (2014)], p. 3.
0206. **[^] **[Stewart, D. W. (1893)], pp. 9–10.
0207. **[^] **[Telfer Dunbar (1979)], p. 91.
0208. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], pp. 10–11.
0209. **[^] **[Stewart, D. W. (1893)], p. 14. Also quoted with a slightly different translation in [Telfer Dunbar (1979)], p. 32.
0210. **[^] **[Stewart, D. W. (1893)], p. 15.
0211. **[^] **[Henshall, Audrey S.]; Seaby, Wilfred A. (1961–1962). "The Dungiven Costume". _Ulster Journal of Archaeology_. 3rd series. 24–25\. Ulster Archaeological Society: 119–142\. [JSTOR] [20627382].
0212. **[^] **["Irish Tartan and the Irish Kilt"]. _Donaldsons.scot_. Donaldsons of Scotland. 2023. "The Origins of Irish Tartans" section. [Archived] from the original on 21 May 2023. Retrieved 30 May 2023.
0213. ^ [_**a**_] [_**b**_] Wilton, Brian (1 August 2019). ["The history of district tartans"]. _History Scotland_. Warners Group Publications. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.
0214. ^ [_**a**_] [_**b**_] [_**c**_] [Paterson, M. B. (2001)], p. 182.
0215. ^ [_**a**_] [_**b**_] [_**c**_] Smith, Clifford (2004). ["Tartan and Kilts"]. _UlsterScotsAgency.com_. [Ulster-Scots Agency]. Archived from [the original] on 14 August 2009. Retrieved 30 May 2023.
0216. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Newsome, Matthew (18 November 2005). ["Irish Tartans: Scottish tartans in disguise?"]. _Albanach_. [Archived] from the original on 23 May 2024. Retrieved 30 May 2023.
0217. **[^] **Dickson, Leslie (1995). ["Ulster Tartan"]. _Ullans: The Magazine for Ulster-Scots_ (3). Ulster-Scots Academy. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.
0218. **[^] **["Tartan Details - Ulster (Original)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.
0219. **[^] **["Tartan Details - Ulster (Red (Reconstruction))"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.
0220. ^ [_**a**_] [_**b**_] Newsome, Matthew (16 March 2008). ["Rethinking Irish Tartans"]. _Albanach_. [Archived] from the original on 23 May 2024. Retrieved 30 May 2023.
0221. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], p. 15. Stewart was reading the 1607 Latin edition.
0222. **[^] **[Telfer Dunbar (1979)], p. 33. Dunbar was reading the abridged English edition of 1617.
0223. **[^] **Working, Laura (25 October 2016). ["Humanism in the Desert: Transculturality at the Huntington Library"]. _TIDE: Travel, Transculturality, and Identity in England, c. 1500–1700_. University of Oxford. [Archived] from the original on 17 June 2023. Retrieved 16 June 2023.
0224. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Telfer Dunbar (1979)], p. 155.
0225. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] Newsome, Matthew Allan C. (17 July 2016). ["The Original Military Tartan – the Black Watch"]. _Albanach_. [Archived] from the original on 23 May 2024. Retrieved 10 June 2023. Citing: Scarlett, James D. (2003). _The Origins and Development of Military Tartans: A Re-Appraisal_. Partizan Press. [ISBN] [1858185009].
0226. ^ [_**a**_] [_**b**_] Anderson, Donald (Spring 1939). "The Earliest Appearance of the Black Watch". _Journal of the Society for Army Historical Research_. **18** (69): 16–20\. [JSTOR] [44219779].
0227. ^ [_**a**_] [_**b**_] [Scarlett (1990)], pp. 26–27. The period material is also quoted at length in: [Telfer Dunbar (1979)], p. 157.
0228. **[^] **Paterson, James (1847). _History of the County of Ayr_. Vol. 1. pp. 380–382.
0229. **[^] **[Telfer Dunbar (1979)], pp. 33–34. Also quoted in: [Banks & de La Chapelle (2007)], p. 70. And: [Stewart, D. W. (1893)], pp. 15–16.
0230. **[^] **[Telfer Dunbar (1979)], p. 93.
0231. **[^] **[Telfer Dunbar (1979)], pp. 94–95.
0232. **[^] **[Telfer Dunbar (1979)], p. 37.
0233. **[^] **[Campbell, J. F. (1893)], pp. 369–370.
0234. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 12.
0235. **[^] **[Banks & de La Chapelle (2007)], p. 63.
0236. **[^] **[Telfer Dunbar (1979)], p. 35.
0237. **[^] **[Telfer Dunbar (1979)], p. 53 and title page. Dunbar incorrectly dates the map to 1643, though it is clearly marked 1653 in Roman numerals.
0238. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], pp. 38–39.
0239. **[^] **[Telfer Dunbar (1979)], pp. 39–40.
0240. **[^] **[Telfer Dunbar (1979)], pp. 37–38.
0241. **[^] **[Telfer Dunbar (1979)], p. 41, quoting: [Franck, Richard] (1821) \[1658\]. _Northern Memoirs_. Edinburgh/London: Archibald Constable & Co. / Hurst, Robinson & Co. Dunbar gives a date of 1656, but the book was written and first published in 1658.
0242. **[^] **[Telfer Dunbar (1979)], p. 41, quoting: [Ray, John] (1846) \[1662\]. _Memorials of John Ray_. London: Ray Society.
0243. **[^] **[Telfer Dunbar (1979)], pp. 91–92.
0244. **[^] **[Telfer Dunbar (1979)], pp. 41–42.
0245. **[^] **[Telfer Dunbar (1979)], p. 42, quoting: [Hume Brown, Peter], ed. (1891). [_Early Travellers in Scotland, 1295–1689_]. Edinburgh: David Douglas – via Internet Archive.
0246. **[^] **[Telfer Dunbar (1979)], pp. 43–44.
0247. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 97.
0248. **[^] **[Telfer Dunbar (1979)], pp. 55–56, and plate 9. When Dunbar was writing, the portrait was thought to date to c. 1660 and to depict either an unknown Highland chieftain or the actor-playwright [John Lacy].
0249. **[^] **[Wright, John Michael]. ["Lord Mungo Murray \[Am Morair Mungo Moireach\], 1668–1700. Son of 1st Marquess of Atholl"]. _NationalGalleries.org_. National Galleries of Scotland. [Archived] from the original on 29 January 2023. Retrieved 16 June 2023.
0250. **[^] **[Telfer Dunbar (1979)], p. 56.
0251. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], p. 24. Also quoted in: [Telfer Dunbar (1979)], pp. 44, 96.
0252. ^ [_**a**_] [_**b**_] Philip of Almerieclose, James (1888) \[1691\]. [_The Grameid: An Heroic Poem Descriptive of the Campaign of Viscount Dundee in 1689_]. Translated by Murdoch, Alexander D. Edinburgh: Scottish Historical Society. Retrieved 8 June 2023 – via Internet Archive.
0253. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], pp. 22–23: " _The Grameid_, written in 1691, contains many references to the clothing and uniforms of the Highland army serving under [Viscount Dundee]."
0254. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [Scarlett (1990)], p. 13.
0255. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 12. Also quoted in: [Stewart, D. W. (1893)], p. 25; and: [Telfer Dunbar (1979)], p. 45.
0256. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 34.
0257. ^ [_**a**_] [_**b**_] Eslea MacDonald, Peter (19 January 2012). ["Tartans of the Royal Company of Archers"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 24 June 2023. Retrieved 23 June 2023.
0258. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Stewart, D. W. (1893)], pp. 26–28.
0259. ^ [_**a**_] [_**b**_] [_**c**_] [Mackay (1924)], p. 50, at footnote.
0260. **[^] **See sources cited in the [§ Clan tartans] section.
0261. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Stewart, D. W. (1893)], p. 29.
0262. **[^] **[Stewart, D. W. (1893)], pp. 31–32.
0263. **[^] **[Campbell, J. F. (1893)], p. 347.
0264. **[^] **[Telfer Dunbar (1979)], pp. 62–66, plates 16–19, 23–24, 28–29, 30, 33–35, 44.
0265. **[^] **[MacBain (1911)], p. 151. Also quoted in: [Telfer Dunbar (1979)], pp. 92–93.
0266. **[^] **[Stewart, D. W. (1893)], p. 25–26.
0267. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], pp. 39–40.
0268. **[^] **[Brown (2012)], p. 6; citing: Stiùbhart, Domnhall Uilleam (2009). "Highland rogues and roots of Highland romanticism". In MacLachlan, Christopher (ed.). _Crossing the Highland Line_. Glasgow: Association for Scottish Literary Studies.
0269. **[^] **[Stewart, D. W. (1893)], pp. 30–31. Also quoted in: [Telfer Dunbar (1979)], p. 99.
0270. **[^] **[Telfer Dunbar (1979)], p. 100.
0271. **[^] **[Telfer Dunbar (1979)], p. 172.
0272. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 22
0273. **[^] **[Telfer Dunbar (1979)], pp. 57–58.
0274. **[^] **[Telfer Dunbar (1979)], pp. 55–58.
0275. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Telfer Dunbar (1979)], p. 144.
0276. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Banks & de La Chapelle (2007)], p. 100.
0277. **[^] **[Telfer Dunbar (1979)], p. 229.
0278. **[^] **[Telfer Dunbar (1979)], pp. 231–234.
0279. **[^] **[Telfer Dunbar (1979)], pp. 39, 49.
0280. ^ [_**a**_] [_**b**_] [Innes of Learney (1971)], pp. 10–11.
0281. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)] p. 75.
0282. **[^] **[Stewart, D. W. (1893)], p. 35.
0283. **[^] **[Banks & de La Chapelle (2007)] pp. 17, 24.
0284. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 17.
0285. **[^] **[Trevor-Roper (1983)], pp. 20–21.
0286. **[^] **[Hinderks (2014)], p. 5, citing (among others): MacInnes, Allan (2007). "Jacobitism in Scotland: Episodic Cause of National Movement?". _The Scottish Historical Review_. **86** (2): 229–251\. [doi]: [10.3366/shr.2007.86.2.225]. [S2CID] [154561509].
0287. **[^] **[Banks & de La Chapelle (2007)] pp. 24, 78.
0288. **[^] **["Tartan Details - Unnamed C18th - Prince Charles Edward"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 28 May 2023. Retrieved 28 May 2023.
0289. **[^] **["Tartan Details - Unnamed C18th - Prince Charles Edward #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 28 May 2023. Retrieved 28 May 2023.
0290. **[^] **["Tartan Details - Prince Charles Edward (Edinburgh)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 28 May 2023. Retrieved 28 May 2023.
0291. **[^] **["Tartan Details - Unnamed C18th - Prince Charles Edward #4"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 28 May 2023. Retrieved 28 May 2023.
0292. **[^] **[Telfer Dunbar (1979)], pp. 87–89.
0293. **[^] **[Telfer Dunbar (1979)], pp. 82–86, 90.
0294. **[^] **[Telfer Dunbar (1979)], p. 76, plates 25, 27.
0295. **[^] **[Stewart, D. W. (1893)], pp. 32–33.
0296. **[^] **Eslea MacDonald, Peter (September 2021). ["Culloden Tartan"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 19 January 2023. Retrieved 26 June 2023.
0297. **[^] **["Tartan Details - Culloden 1746 - Original"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 27 June 2023. Retrieved 26 June 2023.
0298. **[^] **Drysdale, Neil (13 November 2019). ["Valuable Culloden plaid donated to the National Museum of Scotland"]. _[The Press and Journal] _. [Archived] from the original on 15 July 2023. Retrieved 15 July 2023.
0299. **[^] **["Tartan Details - Jacobite - 1850"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 10 June 2023.
0300. **[^] **["Tartan Details - Jacobite #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 23 May 2024. Retrieved 10 June 2023.
0301. **[^] **["Tartan Details - Jacobite Dress #1"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.
0302. **[^] **[Innes of Learney (1971)], pp. 10–11: "No doubt the 'Jacobite' (political) tartan and a number of older Lowland tartans were invented at this time \[1840s\]".
0303. ^ [_**a**_] [_**b**_] [Cowan, Paul] (2021). ["Quick Guide to the Scottish Regiments"]. _Scottish Military Disasters_. [Archived] from the original on 23 May 2024. Retrieved 18 May 2023. This is the updated website version of the book: Cowan, Paul (2008). _Scottish Military Disasters_. Neil Wilson Publishing.
0304. ^ [_**a**_] [_**b**_] [_**c**_] [Groves (1893)]: p. 2.
0305. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Campbell of Airds, Alastair] (2000). _A History of Clan Campbell; Volume 1, From Origins to the Battle of Flodden_. Edinburgh: [Edinburgh University Press]. pp. 259–261\. [ISBN] [1902930177].
0306. **[^] **[Telfer Dunbar (1979)], p. 158.
0307. **[^] **[Armstrong (2017)], p. 196.
0308. **[^] **[Hinderks (2014)], p. 5.
0309. ^ [_**a**_] [_**b**_] [_**c**_] [Hinderks (2014)], pp. 5–7, citing (among others): Cheape, Hugh (2012) \[2010\]. "Gheibhte breacain charnaid". In Brown, Ian (ed.). _From Tartan to Tartanry: Scottish Culture, History and Myth_. Edinburgh University Press. [ISBN] [9780748664641].
0310. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 3.
0311. ^ [_**a**_] [_**b**_] Eslea MacDonald, Peter (March 2021). ["Act of Proscription 1746: The Tartan Ban – Fact or Myth?"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 23 May 2024. Retrieved 13 May 2023.
0312. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 3.
0313. ^ [_**a**_] [_**b**_] [_**c**_] [Banks & de La Chapelle (2007)] p. 84.
0314. **[^] **["Tartan Details - MacKintosh"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 11 June 2023. Retrieved 11 June 2023.
0315. ^ [_**a**_] [_**b**_] [_**c**_] ["Tartan Details - Marchioness of Huntly's"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 19 June 2023. Retrieved 20 June 2023. SRT's record is mistitled "Marchioness of Huntly's" instead of "Huntly". At the separate [record for the Marchioness tartan] [Archived] 19 June 2023 at the [Wayback Machine] they correctly identify it as such. SRT also incorrectly states this tartan was published in Wilsons' _Key Pattern Book_ of 1819; that again applies to the Marchioness entry.
0316. **[^] **["Tartan Details - Gordon, Red (1819)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 3 July 2023. Retrieved 19 June 2023.
0317. ^ [_**a**_] [_**b**_] [_**c**_] [Tuckett (2016)], p. 6.
0318. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)] pp. 84–85.
0319. ^ [_**a**_] [_**b**_] [_**c**_] Gardiner, Karen (29 March 2023). ["The real history of tartan, from the Scottish Highlands to the streets of Tokyo"]. _[National Geographic] _. Archived from [the original] on 29 March 2023.
0320. **[^] **[Scarlett (1990)], p. 15, footnote 9.
0321. **[^] **[Hinderks (2014)], p. 10, citing: Cheape, Hugh (1991). _Tartan: The Highland Habit_. Edinburgh: National Museums Scotland. p. 49.
0322. **[^] **[Hinderks (2014)], pp. 6–7, citing: Faiers, Jonathan (2008). _Tartan_. Oxford: Berg / Victoria & Albert Museum. pp. 107–108.
0323. **[^] **[Armstrong (2017)], pp. 34, 36–37.
0324. **[^] **[Telfer Dunbar (1979)], p. 9.
0325. **[^] **[Trevor-Roper (1983)], p. 24.
0326. **[^] **[Banks & de La Chapelle (2007)], p. 24.
0327. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] Eslea MacDonald, Peter (November 2010). ["The early use of the Murray of Tullibardine Tartan"] (PDF). _ScottishTartans.co.uk_. Retrieved 24 June 2023.
0328. **[^] **[Hinderks (2014)], p. 9, citing: Tuckett, Sally J. S. (2009). ["National Dress, Gender and Scotland: 1745–1822"]. _Textile History_. **40** (2: Researching the Garment Industry): 22. [doi]: [10.1179/004049609x12504376351308]. [S2CID] [161283151].
0329. **[^] **[Hinderks (2014)], p. 9, citing: Coltman, Viccy (2010). ["Party-coloured Plaid? Portraits of Eighteenth-century Scots in Tartan"]. _Textile History_. **41** (2: Researching the Garment Industry): 189. [doi]: [10.1179/174329510X12798919710635]. [S2CID] [154382977].
0330. **[^] **[Banks & de La Chapelle (2007)], p. 85; quoting: Cheape, Hugh (2005). _The Changing Image of the Highlands After 1745_. Benjamin West in Focus. National Gallieries of Scotland.
0331. **[^] **[Brown (2012)], p. 3.
0332. **[^] **Eslea MacDonald, Peter (May 2014). ["Murray of Ochtertyre"] (PDF). _ScottishTartans.co.uk_. Retrieved 21 June 2023.
0333. **[^] **[Armstrong (2017)], p. 22, citing _Collins Encyclopaedia_.
0334. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], p. 87.
0335. **[^] **[Stewart, D. W. (1893)], pp. 40, 42.
0336. **[^] **[Trevor-Roper (1983)], pp. 24–25.
0337. **[^] **[Hinderks (2014)], p. 9, quoting: [Nicholson (2005)], p. 158.
0338. ^ [_**a**_] [_**b**_] [Martin, R. (1988)], p. 53.
0339. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 14.
0340. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Tuckett (2016)], p. 19.
0341. **[^] **[Tuckett (2016)], pp. 4–5, 7.
0342. ^ [_**a**_] [_**b**_] [_**c**_] [Hinderks (2014)], p. 10, citing: [Dziennik (2012)], pp. 127–129.
0343. **[^] **[Campbell, J. F. (1893)], p. 172.
0344. **[^] **[Telfer Dunbar (1979)], pp. 149–150.
0345. **[^] **[Tuckett (2016)], pp. 6–7, 12–13.
0346. **[^] **[Telfer Dunbar (1979)], pp. 145–146.
0347. **[^] **[Armstrong (2017)], pp. 5, 19, 24.
0348. ^ [_**a**_] [_**b**_] [Trevor-Roper (1983)], p. 25.
0349. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Scarlett (1990)], p. 35.
0350. **[^] **[Banks & de La Chapelle (2007)] p. 85.
0351. ^ [_**a**_] [_**b**_] [_**c**_] [Tuckett (2016)], pp. 10–11.
0352. **[^] **[Telfer Dunbar (1979)], pp. 8–10.
0353. **[^] **[Tuckett (2016)], pp. 18–19.
0354. **[^] **[Armstrong (2017)], pp. 10, 14, 17, 55–56.
0355. ^ [_**a**_] [_**b**_] [_**c**_] Eslea MacDonald, Peter (October 2020). ["An 18th century Tartan Dress Coat of the Ancient Caledonian Society"] (PDF). _ScottishTartans.co.uk_. Retrieved 30 June 2023.
0356. **[^] **[Armstrong (2017)], p. 32; credits the term "Tory Highlandism" to: Cookson, J. E. (1999). "The Napoleonic Wars, military Scotland and Tory Highlandism in the early nineteenth century". _Scottish Historical Review_. **78** (1): 60–75\. [doi]: [10.3366/shr.1999.78.1.60].
0357. **[^] **[Armstrong (2017)], pp. 14, 18, 44, 55–56, 196–197.
0358. **[^] **[Brown (2012)], p. 5; citing: [Pittock, Murray] (2009). "To see ourselves as other see us". _European Journal of English Studies_. **13** (3): 298..
0359. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], p. 102.
0360. **[^] **[Telfer Dunbar (1979)], p. 10.
0361. **[^] **Simpson, Peter (1996). _The Independent Highland Companies, 1603–1760_. Edinburgh: J. Donald. pp. 116–117\. [ISBN] [9780859764322].
0362. **[^] **["Empire: 1815–1915"]. _TheBlackWatch.co.uk_. Archived from [the original] on 17 May 2016. Retrieved 8 May 2016.
0363. **[^] **[Telfer Dunbar (1979)], pp. 180–181, citing: Sumner, Percy (1948). "\[title unspecified by source\]". _Journal of the Society for Army Historical Research_. **XXVI** (106). Citing in turn the regiment's own order books, originally reproduced in _The Red Hackle_ in October 1935.
0364. **[^] **[Campbell, J. F. (1893)], p. 343.
0365. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Telfer Dunbar (1979)], p. 178, relying on Mackay Scobie.
0366. ^ [_**a**_] [_**b**_] [Groves (1893)]: p. 3.
0367. **[^] **[Scarlett (1990)], pp. 29–30.
0368. **[^] **[Scarlett (1990)], pp. 26–28.
0369. **[^] **Eslea MacDonald, Peter (October 2015). ["42nd Regiment Band or Musicians' Tartan"] (PDF). _ScottishTartans.co.uk_. Retrieved 24 June 2023.
0370. **[^] **["Tartan Details - 42nd Regiment (Musicians)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 8 June 2023.
0371. **[^] **["Tartan Details - 42nd Regt - Drummers' Plaid"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 26 June 2023.
0372. **[^] **[Armstrong (2017)], p. 112.
0373. **[^] **[Armstrong (2017)], p. 20.
0374. **[^] **[Tuckett (2016)], pp. 9–10.
0375. ^ [_**a**_] [_**b**_] [Hinderks (2014)], p. 8, citing: Dziennik, Matthew P. (2012). ["Whig Tartan: Material Culture and Its Use in the Scottish Highlands, 1746–1815"]. _Past & Present_ (217): 125, 136. [doi]: [10.1093/pastj/gts025].
0376. **[^] **["Tartan Details - Loudoun's Highlanders"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 20 June 2023.
0377. **[^] **Described and illustrated in: [Scarlett (1990)], pp. 27, 29, plate 2(a).
0378. **[^] **[Telfer Dunbar (1979)], pp. 159, 184. Telfer Dunbar refers to them by their amalgamated, post-Childers Reforms names, but they are the same original regiments, 71st MacLeod's and 72nd Seaforth's.
0379. ^ [_**a**_] [_**b**_] [_**c**_] [Barnes & Allen (1956)]: pp. 84–86.
0380. **[^] **["Tartan Details - 78th Highlanders Regiment"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 20 June 2023.
0381. **[^] **["Tartan Details - MacLeod, Green"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 6 August 2023.
0382. **[^] **["Tartan Details - 74th Regiment of Foot"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 May 2023.
0383. **[^] **[Eslea MacDonald (2012)], p. 20.
0384. **[^] **[Barnes & Allen (1956)]: pp. 86–87.
0385. **[^] **["Tartan Details - 92nd Regiment (Gordon)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023.
0386. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], pp. 159–160.
0387. **[^] **["Tartan Details - Gordon Clan"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023.
0388. **[^] **["Tartans"]. _HouseOfGordonUSA.org_. House of Gordon USA. 2020. Retrieved 24 June 2023.
0389. **[^] **[Telfer Dunbar (1979)], pp. 185–186.
0390. **[^] **["Tartan Details - Cameron of Erracht"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 7 June 2023.
0391. **[^] **["Tartan Details - 79th Regiment"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 21 June 2023. This version is a slightly different setting as to hues, going a bit darker, but is clearly the same tartan as Cameron of Earracht.
0392. **[^] **[Barnes & Allen (1956)]: p. 86.
0393. **[^] **Browne, James (1854). [_History of the Highlands and of the Highland Clans: With an Extensive Selection from the Hitherto Inedited Stuart Papers_]. Vol. 4. A. Fullarton & Co. p. 377 – via Google Books.
0394. **[^] **["Tartan Details - Inverness Fencibles"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 1 July 2023. This source incorrectly lists this as the Inverness Fencibles tartan and conflates the two units; the [Inverness-shire Fencibles], as they were properly named, was a completely different unit, raised the same year, and their tartan is unknown.
0395. **[^] **["Tartan Details - 42nd Regiment"]. Scottish Register of Tartans. Retrieved 8 June 2023.
0396. **[^] **[Scarlett (1990)], p. 31, says that Robert Bain's _The Clans and Tartans of Scotland_ (1953 ed.) confirms this lightened Black Watch for the 93rd.
0397. **[^] **["Tartan Details - Sutherland #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. Retrieved 8 June 2023.
0398. **[^] **["Tartan Details - Sutherland 42nd"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. Retrieved 22 June 2023.
0399. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] [_**l**_] [_**m**_] [_**n**_] [_**o**_] [_**p**_] [_**q**_] [_**r**_] [_**s**_] [_**t**_] [_**u**_] [_**v**_] [_**w**_] [_**x**_] [_**y**_] [_**z**_] [_**aa**_] Newsome, Matthew Allan C. (2005). ["Sources of the Tartans"]. _Albanach_. Retrieved 16 May 2023.
0400. **[^] **Moncreiffe of That Ilk, Iain (1962) \[1954\]. _The Robertsons (Clan Donnachaidh of Atholl)_. Edinburgh: W. & A. K. Johnston & G. W. Bacon Ltd. p. 9 (fig. opposite).
0401. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], p. 87; citing: [Stewart, D. C. (1974)], p. 2.
0402. **[^] **Carman, W. Y. (1957). _British Military Uniforms from Contemporary Pictures_. London: Leonard Hill. pp. 146, 152.
0403. **[^] **["Militaria Dictionary and Beginner's Guide"]. _KellyBadge.co.uk_. Ellesmere, Shropshire: Ian G. Kelly (Militaria). 2000. "Tartan Numbering System" section. Retrieved 8 June 2023.
0404. **[^] **["Regimental Tartans"]. _DCDalgliesh.co.uk_. D. C. Dalgliesh Ltd. 2023. Retrieved 19 May 2023.
0405. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Innes of Learney (1971)], pp. 9–10.
0406. ^ [_**a**_] [_**b**_] [Paterson, M. B. (2001)], p. 155.
0407. ^ [_**a**_] [_**b**_] [_**c**_] Ward, Philip; Edwards, Julia (2012) \[1978\]. [_The Book of Common Fallacies_]. Skyhorse Publishing. p. 422. [ISBN] [9781616083366]. Retrieved 29 May 2023.
0408. **[^] **[Trevor-Roper (1983)], pp. 28–30.
0409. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [Haswell Miller, A. E.] (1956). [Donaldson, Gordon] (ed.). _Common Errors in Scottish History_. London: Historical Association / George Philip & Son. Quoted at length in: [Telfer Dunbar (1979)], pp. 17–18; also quoted in: McGann, Kass (2003). ["The Question of Clan Tartans"]. _ReconstructingHistory.com_. "The Evolution of the Kilt" series. Archived from [the original] on 22 April 2008. Retrieved 10 June 2023.
0410. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 2.
0411. ^ [_**a**_] [_**b**_] Mackay Scobie, Iain Hamilton (June 1942). "Tartan and Clan Tartan". _Chambers Journal_. Quoted in: [Telfer Dunbar (1979)], pp. 14–15; and [McGann (2003)].
0412. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], pp. 15–16, quoting: Haldane, M. M. (1931). "The Great Clan Tartan Myth". _[The Scots Magazine] _. **16** (1): 44–51. (Haldane is also cited, as a debate opponent, in Innes of Learney (1971).)
0413. **[^] **[Martin, R. (1988)], p. 51; responding to claims in: Lurie, Alison (1981). _The Language of Clothes_. Random House. [ISBN] [9780394513027].
0414. **[^] **[Telfer Dunbar (1979)], p. 57.
0415. **[^] **[Trevor-Roper (1983)], pp. 28 _ff._
0416. ^ [_**a**_] [_**b**_] [Paterson, M. B. (2001)], p. 161, quoting: [Campbell of Airds, Alastair] (1998) \[1994\]. "Tartan and the Highland dress". In [Way of Plean, George]; Squire, Romily (eds.). [_Collins Scottish Clan & Family Encyclopedia_]. HarperCollins / Standing Council of Scottish Chiefs. pp. 37–38\. [ISBN] [9780760711200] – via Internet Archive.
0417. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] ["Official Position on Clan Campbell Tartans"]. _CCSNA.org_. Clan Campbell Society (North America). 2018. Retrieved 13 May 2023. Quoting letter of Chief Ian Campbell in considerable detail.
0418. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], p. 36, quoting: [Lockhart, George] (1817). "A Journal of the Expedition of Prince Charles Edward in 1745, by a Highland Officer". In [Aufrère, Anthony] (ed.). _Lockhart Papers_. Vol. II. p. 505.
0419. **[^] **[Armstrong (2017)], p. 23, citing the following, source of both the Campbell of Islay and Haldane statements: [Haldane (1931)].
0420. **[^] **[Scarlett (1990)], pp. 27–28.
0421. ^ [_**a**_] [_**b**_] [Thompson (1992)], p. iii.
0422. **[^] **[Scarlett (1990), p. 9] 
0423. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 57, quoting: Stewart, Donald William (August 1892). "Tartans in Family Portraits". _The Scottish Antiquary, or, Northern Notes and Queries_. note 455.
0424. **[^] **Wagner, Paul; Reynolds, Wayne (2002). _Pictish Warrior: AD 297–841_. "Warrior" series. Vol. 50. Osprey. p. 28.
0425. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 23.
0426. ^ [_**a**_] [_**b**_] [Paterson, M. B. (2001)], p. 162, quoting Hugh Cheape, from a 1999 interview.
0427. **[^] **[Innes of Learney (1971)], pp. 8, 10.
0428. **[^] **[Innes of Learney (1971)], p. 8, citing: Fraser of Reelig, Charles Ian (1930). _Some Notes on Highland Tartans_. Inverness: The Northern Chronicle Office.
0429. **[^] **[Stewart, D. W. (1893)], at "Brodie" and "Huntley".
0430. **[^] **["Search Results \[Gordon\]"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2023. Retrieved 24 May 2023.
0431. **[^] **[Innes of Learney (1971)], p. 8.
0432. ^ [_**a**_] [_**b**_] [Innes of Learney (1971)], p. 9.
0433. **[^] **[Stewart, D. W. (1893)], pp. 12–13.
0434. **[^] **[Stewart, D. W. (1893)], p. 13.
0435. **[^] **["Tartan Details - MacLean of Duart Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 11 June 2023.
0436. **[^] **["Tartan Details - Sutherland"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 20 May 2023.
0437. **[^] **["Tartans"]. _HouseOfGordonUSA.org_. House of Gordon USA. 2020. Retrieved 20 May 2023.
0438. **[^] **["Tartan Details - Gordon, Red (1819)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 20 May 2023.
0439. **[^] **[Telfer Dunbar (1979)], plate 13.
0440. **[^] **[Telfer Dunbar (1979)], pp. 58–60.
0441. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 62.
0442. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], p. 146.
0443. **[^] **["Tartan Details - Grant (1819 #1)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.
0444. **[^] **["Tartan Details - Grant (1838)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.
0445. **[^] **[Zaczek & Phillips (2013)], p. 153.
0446. **[^] **["Tartan Details - Grant (Vestiarium Scoticum)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.
0447. **[^] **[Telfer Dunbar (1979)], pp. 58–62.
0448. **[^] **[Telfer Dunbar (1979)], pp. 191–192; quoting: Drummond-Norie, William (1898). _Loyal Lochaber and Its Associations Historical, Genealogical, and Traditionary_. Glasgow: Morison Bros.
0449. **[^] **See, e.g.: [Telfer Dunbar (1979)], p. 173.
0450. **[^] **Scobie, Willie (2012). ["A Case for Clan Tartans"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 23 June 2021. Retrieved 24 May 2023.
0451. **[^] **["Tartan Details - Campbell of Armaddie"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0452. **[^] **["Tartan Details - Campbell of Lochlane"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0453. **[^] **["Tartan Details - Campbell, Red"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0454. **[^] **["Tartan Details - Stewart of Ardshiel"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0455. **[^] **["Tartan Details - Stewart of Atholl"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0456. **[^] **["Tartan Details - Stewart, Hunting #1"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0457. **[^] **["Tartan Details - Stewart, Old"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0458. **[^] **["Tartan Details -Stewart of Appin Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0459. **[^] **["Tartan Details - Stewart, Hunting #1"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0460. **[^] **["Tartan Details - Stewart, Hunting #3"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0461. **[^] **["Tartan Details - Stewart, Green"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0462. **[^] **["Tartan Details - Ramsay Blue Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.
0463. **[^] **[Grant, James] (1884). [_Cassell's Old and New Edinburgh_]. Vol. II. London: Cassell & Co. p. 235 – via Google Books.
0464. **[^] **[Kay, John]; Paton, Hugh (1842). ["No. LXIV: The Rev. Joseph Robertson MacGregor, First Minister of the Edinburgh Gaelic Chapel"]. _A Series of Original Portraits and Caricature Etchings_. Vol. I, part I. Edinburgh: Hugh Paton. pp. 152–153. Retrieved 29 May 2023 – via Google Books.
0465. **[^] **[Telfer Dunbar (1979)], p. 130.
0466. **[^] **[Telfer Dunbar (1979)], p. 44.
0467. **[^] **[Stewart, D. W. (1893)], pp. 28–29.
0468. **[^] **[Telfer Dunbar (1979)], p. 17.
0469. **[^] **["Tartan Details - MacDonald, Lord of The Isles"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 24 June 2023.
0470. **[^] **[Telfer Dunbar (1979)], pp. 61, 64, 71, 75, 88 plates 11–12, 35. "The tartan is of a typical pre-clan tartan style."
0471. **[^] **[Telfer Dunbar (1979)], pp. 57–60, 69, 71.
0472. **[^] **[Scarlett (1990)], pp. 13, 22.
0473. **[^] **[Trevor-Roper (1983)], p. 23, citing also: McClintock, H. F. (1943). _Old Highland Dress and Tartans_ (2nd ed.). Dundalk: Dundalgan Press. And: [Telfer Dunbar (1979)].
0474. **[^] **[Telfer Dunbar (1979)], pp. 73–75.
0475. **[^] **[Telfer Dunbar (1979)], p. 75.
0476. **[^] **[Campbell, A. (1890)], p. vi.
0477. **[^] **[Scarlett (1990)], p. 23, quoting: Campbell, Archibald (1899). [_Highland Dress, Arms and Ornament_]. Westminster: Constable & Co. – via Internet Archive.
0478. **[^] **[Telfer Dunbar (1979)], p. 73.
0479. **[^] **Ray, James (1752). [_Compleat History of the Rebellion, From Its First Rise, in 1745, to Its Total Suppression at the Glorious Battle of Culloden, in April 1746_]. p. 344 – via Google Books.
0480. **[^] **[Telfer Dunbar (1979)], p. 19.
0481. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 20.
0482. **[^] **[Telfer Dunbar (1979)], pp. 19, 163–164, quoting two articles (untitled by Dunbar) by I. H. Mackay Scobie in the _Journal of the Society for Army Historical Research_, 1941 and 1946.
0483. **[^] **[Telfer Dunbar (1979)], p. 90.
0484. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 4; Quotes the entire passage from Burt (1727–37, published 1754).
0485. **[^] **[Telfer Dunbar (1979)], pp. 19–20.
0486. **[^] **[Telfer Dunbar (1979)], pp. 4–6; quotes the entire letter from Forbes of Culloden to the Lord Lyon (1746).
0487. ^ [_**a**_] [_**b**_] Lawson, Cecil C. P. (1967) \[1941\]. _A History of the Uniforms of the British Army_. Vol. II. London: Norman Military Publications. p. 61. Quoted in: [Telfer Dunbar (1979)], p. 15.
0488. **[^] **[Telfer Dunbar (1979)], p. 15.
0489. **[^] **[Telfer Dunbar (1979)], pp. 47, 50: "fine specimens of pre-nineteenth-century tartans with their lovely colour combinations and interesting weaves are far removed from the 'clan' tartans of later times.  ... \[A\] number of old hard-spun splaid genuinely pre-1745 \[are\] unlike any modern 'clan' tartan patterns."
0490. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], pp. 16–17.
0491. **[^] **[Trevor-Roper (1983)], pp. 25–26.
0492. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 159.
0493. **[^] **[Hinderks (2014)], p. 8, citing: Bolton, Andrew (2003). _Bravehearts: Men in Skirts_. Victoria & Albert Museum. p. 99. [ISBN] [9780810965584].
0494. **[^] **Eslea MacDonald, Peter (November 2010). ["A Jacobite Lady Reveals Her True Colours"] (PDF). _ScottishTartans.co.uk_. Retrieved 25 June 2023.
0495. ^ [_**a**_] [_**b**_] ["Tartan Details - Huntly"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023. Citing previously unpublished research of James D. Scarlett (2005). The SRT record is confusingly named "Huntly" (which can refer to multiple tartans) rather than the more precise name "Marchioness of Huntly's", which SRT [misapplied to a different tartan].
0496. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], p. 36. Also \[mis-\]quoted in: [Trevor-Roper (1983)], p. 23.
0497. **[^] **[Tuckett (2016)], p. 16.
0498. **[^] **[Telfer Dunbar (1979)], p. 14.
0499. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], pp. 3–4.
0500. **[^] **[Tuckett (2016)], p. 9.
0501. **[^] **Mills, N. J.; Carswell, A. L. (1998). "Wilson of Bannockburn and the Clothing of the Highland Regiments". _Journal of the Society for Army Historical Research_. **76** (307): 177. [JSTOR] [44230132].
0502. ^ [_**a**_] [_**b**_] [_**c**_] [Banks & de La Chapelle (2007)] p. 104.
0503. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Tuckett (2016)], p. 17.
0504. ^ [_**a**_] [_**b**_] [_**c**_] [Trevor-Roper (1983)], p. 30.
0505. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Eslea MacDonald (2012)], p. 5.
0506. **[^] **[Tuckett (2016)], p. 29, footnote 92.
0507. **[^] **["Tartan Details - Campbell of Cawdor"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. Retrieved 6 June 2023.
0508. **[^] **["Tartan Details - Abercrombie (Wilsons' No.2/64)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.
0509. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 64.
0510. **[^] **["Tartan Details - Abercrombie"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.
0511. **[^] **["Tartan Details - Graham of Montrose #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.
0512. **[^] **["Tartan Details - Campbell of Breadalbane #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.
0513. **[^] **["Tartan Details - Campbell of Breadalbane"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.
0514. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Telfer Dunbar (1979)], p. 149.
0515. **[^] **["Tartan"]. _Scottish National Dictionary (1700–)_. Dictionaries of the Scots Language SCIO / University of Glasgow. 2005 \[1974\]. Retrieved 10 July 2023.
0516. **[^] **[Eslea MacDonald (2012)], pp. 5–6.
0517. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] Eslea MacDonald, Peter (February 2023). ["The Cockburn Collection"] (PDF). _ScottishTartans.co.uk_. Retrieved 24 June 2023.
0518. **[^] **Cockburn, William (c. 1820). _A collection of old hard tartans made by William Cockburn of Cockburn, Bart. between the years 1810–1820_.
0519. **[^] **["Tartan Details - Black Watch (Government)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 May 2023.
0520. **[^] **[Paterson, M. B. (2001)], p. 157, quoting Stewart of Garth letter to [Andrew Robertson], from biography: Robert, James Ervine (1998). _The First Highlander: Major-General David Stewart of Garth CB, 1768–1829_. East Linton: Tuckwell. [ISBN] [1862320500].
0521. **[^] **[Paterson, M. B. (2001)], p. 158, quoting from letters in J. I. Robertson's _The First Highlander_ (1998) again.
0522. **[^] **[Telfer Dunbar (1979)], p. 119.
0523. **[^] **[Telfer Dunbar (1979)], p. 124.
0524. **[^] **This is reported as fact by the US-based House of Gordon society, which also makes other unsupportable assertions: ["Tartans"]. _HouseOfGordonUSA.org_. House of Gordon USA. 2020. Retrieved 11 July 2023.
0525. **[^] **[Scarlett (1990)], p. 35. For "confluence of district ... and ... regimental", see entire chapters running pp. 9–36; wherein the arguments are made in stages.
0526. **[^] **[Haswell Miller, A. E.] (November 1947). "\[title not given in source\]". _[Scotland's Magazine] _. Cited in: [Telfer Dunbar (1979)], p. 17.
0527. **[^] **["Tartan Details - Campbell of Argyll"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 May 2023.
0528. ^ [_**a**_] [_**b**_] [_**c**_] [Tuckett (2016)], p. 15.
0529. **[^] **[Armstrong (2017)], pp. 36, 283–284.
0530. ^ [_**a**_] [_**b**_] [_**c**_] ["Major-General David Stewart of Garth"]. _TheBlackWatch.org_. Perth: Black Watch Museum / Scottish Tourist Board. 27 November 2019. Retrieved 21 May 2023.
0531. **[^] **[Tuckett (2016)], p. 28, footnote 88.
0532. ^ [_**a**_] [_**b**_] [Trevor-Roper (1983)], pp. 28–29.
0533. **[^] **[Armstrong (2017)], pp. 107, 172.
0534. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [Paterson, M. B. (2001)], pp. 158–159.
0535. **[^] **[Armstrong (2017)], p. 21.
0536. **[^] **[Trevor-Roper (1983)], p. 26.
0537. ^ [_**a**_] [_**b**_] [_**c**_] [Urquhart (1994)], p. 18.
0538. **[^] **[Scarlett (1990)], p. 19: "the Clan Chiefs of the time appear to have been singularly lacking in knowledge of the tartans that they and their forbears should have been wearing since the Celtic mists parted – at least in what was fast becoming the popular imagination", as late as the 1850s.
0539. ^ [_**a**_] [_**b**_] [_**c**_] [Armstrong (2017)], p. 37.
0540. **[^] **[Telfer Dunbar (1979)], p. 160.
0541. **[^] **Eslea MacDonald, Peter (March 2020). ["The Lord of the Isles Tartans"] (PDF). _ScottishTartans.co.uk_. Retrieved 24 May 2023.
0542. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 4.
0543. **[^] **[Armstrong (2017)], p. 40.
0544. **[^] **[Telfer Dunbar (1979)], p. 139.
0545. **[^] **[Eslea MacDonald (2012)], p. 7.
0546. **[^] **[Trevor-Roper (1983)], pp. 30, 32.
0547. **[^] **[Banks & de La Chapelle (2007)] p. 104; citing: [Prebble (2000)], p. 105.
0548. **[^] **[Scarlett (1990)], pp. 19–20, citing archived correspondence between Stewart of Garth and Robertson of Struan.
0549. **[^] **[Scarlett (1990)], p. 19, quoting: Smibert, Thomas (1850). _The Clans of the Highlands of Scotland_. J. Hogg.
0550. **[^] **["Tartan Details - Murray of Atholl"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 May 2023.
0551. **[^] **[Tuckett (2016)], pp. 15–17.
0552. ^ [_**a**_] [_**b**_] [Thompson (1992)], p. v.
0553. **[^] **[Tuckett (2016)], pp. 16–17.
0554. ^ [_**a**_] [_**b**_] [_**c**_] [Moncreiffe of that Ilk 1967]: p. 24.
0555. **[^] **[Armstrong (2017)], p. 57.
0556. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 36.
0557. ^ [_**a**_] [_**b**_] [_**c**_] [Martin, R. (1988)], p. 54.
0558. **[^] **[Martin, R. (1988)], p. 55.
0559. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 148, citing quotation from: Webster, David (2011). _The World History of Highland Games_. Edinburgh: Luath. p. 65.
0560. **[^] **[Trevor-Roper (1983)], p. 33. Also quoted with different punctuation in: [Telfer Dunbar (1979)], p. 116.
0561. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 116.
0562. **[^] **[Telfer Dunbar (1979)], p. 118.
0563. **[^] **["Tartan Details - Scott"]. _TartanRegister.gov.uk_. The Scottish Register of Tartans. 16 April 2010. Retrieved 7 June 2023.
0564. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 153.
0565. **[^] **[Scarlett (1990)], pp. 64, 188–193.
0566. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Banks & de La Chapelle (2007)] pp. 106–108. They actually get publication date of _The Costume of the Clans_ off by one year; it was 1845 as confirmed in all other sources, like Telfer Dunbar (1979).
0567. ^ [_**a**_] [_**b**_] [Armstrong (2017)], pp. 60–61.
0568. **[^] **[Banks & de La Chapelle (2007)] p. 107.
0569. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 151.
0570. **[^] **[Telfer Dunbar (1979)], p. 142.
0571. **[^] **Stewart, Donald Calder; Thompson, J. Charles (1980). Scarlett, James (ed.). _Scotland's Forged Tartans: An Analytical Study of the Vestiarium Scoticum_. Edinburgh: Paul Harris Publishing. [ISBN] [0904505677].\[ _[page needed] _\]
0572. ^ [_**a**_] [_**b**_] [_**c**_] [Armstrong (2017)], p. 61.
0573. **[^] **[Scarlett (1990)], p. 195, quoting: [Stewart, D. C. (1974)].
0574. **[^] **[Telfer Dunbar (1979)], pp. 103, 107, 111.
0575. **[^] **[Telfer Dunbar (1979)], p. 131.
0576. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Eslea MacDonald, Peter. ["A Short History of Tartan"]. _ScottishTartans.co.uk_. Retrieved 7 October 2008.
0577. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)] p. 108.
0578. **[^] **[Armstrong (2017)], pp. 11, 61–62.
0579. **[^] **[Scarlett (1990)], p. 18.
0580. **[^] **[Armstrong (2017)], pp. 4, 11, 49, 57, 157, 236.
0581. **[^] **[Martin, R. (1988)], p. 56.
0582. ^ [_**a**_] [_**b**_] [_**c**_] [Trevor-Roper (1983)], p. 39.
0583. **[^] **[Telfer Dunbar (1979)], pp. 116–117.
0584. **[^] **[Telfer Dunbar (1979)], pp. 138–139.
0585. **[^] **[Scarlett (1990)], pp. 195–196, quoting: [Stewart, D. C. (1974)].
0586. **[^] **[Telfer Dunbar (1979)], pp. 140–141.
0587. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Banks & de La Chapelle (2007)] p. 26.
0588. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 196, quoting: [Stewart, D. C. (1974)].
0589. **[^] **[Telfer Dunbar (1979)], p. 129.
0590. **[^] **["Tartan Details - Home (Clans Originaux)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023.
0591. **[^] **["Tartan Details - Brodie Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0592. **[^] **[Telfer Dunbar (1979)], p. 103.
0593. **[^] **[Scarlett (1990)], p. 197, quoting: [Stewart, D. C. (1974)].
0594. ^ [_**a**_] [_**b**_] Duncan of Sketraw, John A. (4 April 2009). ["The Romantic Myth of Scottish Clan Septs"]. _ScotsHistoryOnline.co.uk_. Archived from [the original] on 12 March 2022. Retrieved 13 May 2023.
0595. **[^] **[Scarlett (1990)], pp. 37–38.
0596. **[^] **[Scarlett (1990)], pp. 196–197, quoting: [Stewart, D. C. (1974)].
0597. ^ [_**a**_] [_**b**_] [Scarlett (1990)], pp. 20, 197–198, quoting: [Stewart, D. C. (1974)].
0598. **[^] **[Scarlett (1990)], p. 19: "what was fast becoming the popular imagination" by about 1850.
0599. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 37.
0600. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 198, quoting: [Stewart, D. C. (1974)].
0601. **[^] **[Scarlett (1990)], pp. 198–199, quoting: [Stewart, D. C. (1974)].
0602. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. ix.
0603. **[^] **[Scarlett (1990)], pp. 64–180.
0604. **[^] **["Tartan Details - MacFarlane Dress"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0605. **[^] **["Tartan Details - MacFarlane Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0606. **[^] **[Stewart, D. C. (1974)].
0607. **[^] **[Scarlett (1990)].
0608. **[^] **[Scarlett (1990)], p. 21.
0609. **[^] **["Tartan Details - MacGregor Dress Red (Dance)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2005. Retrieved 13 May 2023.
0610. **[^] **[Scarlett (1990)], p. 24.
0611. ^ [_**a**_] [_**b**_] McIntyre, Alastair, ed. (2023) \[2002\]. ["Official Scottish Clans and Families"]. _ElectricScotland.com_. Retrieved 15 May 2023. This list appears to be regularly maintained, at least as of 2023.
0612. **[^] **["Tartan Details - Watson"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023.
0613. **[^] **[Campbell, A. (1890)], p. 55.
0614. ^ [_**a**_] [_**b**_] MacGregor of MacGregor, Malcolm (28 April 2012). ["Our Tartan"]. _ACGSUS.org_. American Clan Gregor Society. Retrieved 13 May 2023.
0615. **[^] **[Way of Plean; Squire (2000)], p. 214.
0616. **[^] **["Tartan Details - Davidson"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 18 June 2023.
0617. ^ [_**a**_] [_**b**_] ["Tartan Details - Shaw"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 11 June 2023.
0618. ^ [_**a**_] [_**b**_] ["Tartan Details - Shaw of Tordarroch Red (Dress)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 11 June 2023.
0619. ^ [_**a**_] [_**b**_] ["Tartan Details - Shaw of Tordarroch Green (Hunting)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 11 June 2023.
0620. **[^] **[Scarlett (1990)], p. 19.
0621. **[^] **["Tartan Details - Mar Tribe"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0622. **[^] **["Tartan Details - MacLeod Red"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 May 2023.
0623. **[^] **["Tartan Details: Lumsden Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 3 October 2023.
0624. ^ [_**a**_] [_**b**_] ["Tartan Details - MacTavish Dress"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0625. **[^] **["Tartan Details - Lumsden"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2013. Retrieved 19 June 2023.
0626. **[^] **["Tartan Details - Lumsden (Waistcoat)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023.
0627. **[^] **["Tartan Details: Duncan of Sketraw"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 2 October 2023.
0628. **[^] **["Tartan Details - MacDowall"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2008. Retrieved 13 June 2023.
0629. **[^] **["Clan MacDowall Tartans"]. _MacDowall.wixsite.com_. Clan MacDowall Society. 2013. Retrieved 13 June 2023.
0630. **[^] **["Tartan Details - Cochrane Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2015. Retrieved 10 June 2023.
0631. **[^] **["Tartan Details: Carruthers"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2017. Retrieved 2 October 2023.
0632. ^ [_**a**_] [_**b**_] [_**c**_] Milne, N. C. (2010). [_Scottish Culture and Traditions_]. Paragon Publishing. p. 138. [ISBN] [9781899820795]. Retrieved 28 May 2023 – via Google Books.
0633. **[^] **[Armstrong (2017)], pp. 15–16.
0634. **[^] **[Tuckett (2016)], p. 8.
0635. **[^] **[Armstrong (2017)], p. 32.
0636. **[^] **[Banks & de La Chapelle (2007)], p. 92.
0637. **[^] **[Armstrong (2017)], p. 147.
0638. **[^] **[Armstrong (2017)], p. 31, quoting: [Burke, John Bernard] (1914). _A Genealogical and Heraldic Dictionary of the Peerage and Baronetage of the British Empire_ (106th ed.). London: Harrison. p. 1803.
0639. **[^] **[Telfer Dunbar (1979)], p. 80.
0640. **[^] **[Armstrong (2017)], pp. 14, 20.
0641. ^ [_**a**_] [_**b**_] [_**c**_] McNeil, Kenneth (2007). "Britain's 'imperial man': Walter Scott, David Stewart, and Highland Masculinity". [_Scotland, Britain, Empire_]. Ohio State University Press. pp. 83–84. Retrieved 29 May 2023.
0642. **[^] **[Armstrong (2017)], p. 117, 155.
0643. **[^] **[Armstrong (2017)], pp. 1, 3–5, _ff._
0644. **[^] **[Armstrong (2017)], p. 56.
0645. **[^] **[Armstrong (2017)], pp. 5, 14–16, 18–19, 24–26, 56.
0646. **[^] **[Armstrong (2017)], pp. 110–111, 150, 197.
0647. **[^] **[Scarlett (1990)], pp. 36–37.
0648. **[^] **[Telfer Dunbar (1979)], p. 147.
0649. **[^] **[Armstrong (2017)], pp. 36–37.
0650. **[^] **[Paterson, M. B. (2001)], p. 160.
0651. **[^] **[Hinderks (2014)], pp. 8–9, citing: [Dziennik (2012)], p. 136. And: [Nicholson (2005)], p. 160. And: [Harvie, Christopher] (1977). _Scotland and Nationalism: Scottish Society and Politics 1707 to the Present_. London: Routledge. pp. 13–14.
0652. ^ [_**a**_] [_**b**_] [_**c**_] [Banks & de La Chapelle (2007)], pp. 99–100; quoting: Clyde, Robert (1995). _From Rebel to Hero: The Changing Image of the Highlander, 1745–1830_. Tuckwell Press. p. 129. [ISBN] [9781862320277].
0653. **[^] **["An incident during the visit of George IV to Edinburgh, 1822"]. [National Galleries Scotland]. Retrieved 9 January 2017.
0654. **[^] **[Telfer Dunbar (1979)], pp. 12, 18.
0655. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Tuckett (2016)], p. 18.
0656. **[^] **[Telfer Dunbar (1979)], pp. 3, 9.
0657. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Magnusson, Magnus] (2003). _Scotland: The Story of a Nation_. Grove Press. pp. 653–654\. [ISBN] [0802139329].
0658. **[^] **Duncan, Ian (2007). [_Scott's Shadow: The Novel in Romantic Edinburgh_]. [Princeton University Press]. pp. 7–8\. [ISBN] [9780691043838].
0659. ^ [_**a**_] [_**b**_] [_**c**_] Brown, Angie (13 August 2022). ["How the king's visit saw kilts become Scotland's national dress"]. _[BBC News] _. Retrieved 7 July 2023.
0660. **[^] **[Armstrong (2017)], pp. 48–52.
0661. **[^] **[Banks & de La Chapelle (2007)] p. 105.
0662. **[^] **[Armstrong (2017)], p. 54, quoting: Prebble, John (1988). _The King's Jaunt: George IV in Scotland, August 1822, "One and Twenty Daft Days"_. London: Collins. p. 364.
0663. **[^] **[Armstrong (2017)], pp. 52–53, 57, 283.
0664. **[^] **Calder, Angus (1994). _Revolving Culture: Notes from the Scottish Republic_. London & New York: I.B. Tauris. p. 103. Quoted in: [Porter (1998)], p. 2.
0665. **[^] **[Trevor-Roper (1983)], p. 31.
0666. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 54.
0667. **[^] **[Scarlett (1990)], p. 15.
0668. **[^] **[Paterson, M. B. (2001)], p. 159, footnote 13.
0669. **[^] **[Armstrong (2017)], pp. 43–44. The term "synthetic Gaelicism" is attributed to: Morton, H. V. (1937) \[1929\]. _In Search of Scotland_. New York: Dodd, Mead and Company. p. 132.
0670. **[^] **[Armstrong (2017)], p. 42.
0671. **[^] **[Hinderks (2014)], p. 9, quoting: [Cheape (1991)], p. 49.
0672. **[^] **[Hinderks (2014)], p. 9, quoting: [Bolton (2003)], p. 100.
0673. **[^] **[Hinderks (2014)], p. 10. Citing: Hobsbawm & Ranger (1983), p. 24. And: Thorburn, W. A. (1976). ["Military Origins of Scottish National Dress"]. _Costume_. **10** (1): 29, 33. [doi]: [10.1179/cos.1976.10.1.29].
0674. **[^] **[Paterson, M. B. (2001)], pp. 78, 160–161
0675. **[^] **[Trevor-Roper (1983)], p. 28.
0676. **[^] **[Tuckett (2016)], p. 11.
0677. **[^] **[Armstrong (2017)], p. 126.
0678. **[^] **[Telfer Dunbar (1979)], p. 11.
0679. **[^] **[Telfer Dunbar (1979)], p. 175.
0680. **[^] **[Tuckett (2016)], pp. 17–18.
0681. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], p. 150.
0682. **[^] **[Telfer Dunbar (1979)], p. 117.
0683. **[^] **[Telfer Dunbar (1979)], pp. 147–149.
0684. ^ [_**a**_] [_**b**_] [Tuckett (2016)], pp. 7–9.
0685. ^ [_**a**_] [_**b**_] [_**c**_] [Tuckett (2016)], p. 20.
0686. **[^] **[Banks & de La Chapelle (2007)] p. 109; citing: [Zacek & Phillips (2013)], p. 74.
0687. **[^] **See extensive treatment in: [von Fürstenberg, Princess Ira]; Nicolls, Andrew (1996). _Tartanware: Souvenirs from Scotland_. Trafalgar Square Press. [ISBN] [9781857935141].
0688. **[^] **[Banks & de La Chapelle (2007)] pp. 21–22.
0689. **[^] **[Armstrong (2017)], p. 84.
0690. **[^] **["19th-century Scottish kitch is today's collectible"]. _CoastalAntiques.com_. Collecting tartanware. Archived from [the original] on 16 September 2004. Retrieved 25 October 2008.
0691. **[^] **[Paterson, M. B. (2001)], p. 168, footnote 30.
0692. ^ [_**a**_] [_**b**_] [Martin, R. (1988)], p. 57.
0693. **[^] **[Armstrong (2017)], pp. 83–84.
0694. **[^] **[Armstrong (2017)], pp. 112–113.
0695. **[^] **Wilton, Brian. ["History of Tartan"]. _TartansAuthority.com_. Crieff, Scotland: [Scottish Tartans Authority]. Archived from [the original] on 22 March 2004. Retrieved 6 October 2008.
0696. **[^] **[Telfer Dunbar (1979)], p. 127, quoting an 1847 review.
0697. **[^] **[Armstrong (2017)], pp. 67, 198.
0698. **[^] **[Armstrong (2017)], pp. 68–70, 88–89.
0699. **[^] **[Armstrong (2017)], pp. 74, 98, 102.
0700. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Eslea MacDonald, Peter (June 2019). ["The Balmoral Tartan"] (PDF). _ScottishTartans.co.uk_. Retrieved 30 June 2023.
0701. **[^] **[Armstrong (2017)], pp. 74–76, 85, 95.
0702. **[^] **["Tartan in Royal Dress"]. Royal Collection Trust. Retrieved 3 February 2020.
0703. **[^] **[Banks & de La Chapelle (2007)], pp. 34, 108.
0704. **[^] **[Armstrong (2017)], pp. 83–84, 99.
0705. **[^] **[Armstrong (2017)], p. 62.
0706. **[^] **[Queen Victoria] (1885). [_More leaves from the journal of a life in the Highlands, from 1862 to 1882_] (New ed.). London: Smith, Elder & Co. p. 173.
0707. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 207.
0708. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 101.
0709. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], pp. 108–109.
0710. **[^] **[Armstrong (2017)], pp. 93, 205.
0711. **[^] **[Armstrong (2017)], pp. 93–94, 104, 107, 148–150, 236.
0712. **[^] **[Armstrong (2017)], pp. 19, 59, 65, 87–88, 192.
0713. **[^] **[Paterson, M. B. (2001)], p. 161.
0714. **[^] **[Armstrong (2017)], pp. 6, 57, 87.
0715. **[^] **[Armstrong (2017)], pp. 12, 272.
0716. **[^] **[Armstrong (2017)], p. 93.
0717. **[^] **[Armstrong (2017)], p. 87, quoting: [Brown, Ivor J. C.] (1955). _Balmoral: The History of a Home_. London: Collins. p. 15.
0718. **[^] **[Armstrong (2017)], p. 84, quoting: [Monod, Paul]; [Pittock, Murray G. H.]; [Szechi, Daniel], eds. (2010). _Loyalty and Identity: Jacobites at Home and Abroad_. Basingstoke: Palgrave Macmillan. p. 43.
0719. **[^] **[Armstrong (2017)], p. 84, quoting: [Thompson, Dorothy] (1990). _Queen Victoria: The Woman, the Monarch and the People_. London: Virago. p. 54.
0720. **[^] **[Armstrong (2017)], p. 88, citing: [Devine, T. M.] (2000). _The Scottish Nation, 1700–2000_. London: Allen Lane. p. 231.
0721. **[^] **[Armstrong (2017)], pp. 82, 87, 91.
0722. ^ [_**a**_] [_**b**_] [Armstrong (2017)], pp. 6, 16, 58, 72, 77–79, 81, 92, 150, 167, 204–206, 271–272. The term "Balmorality" is attributed to: [Scott-Moncrieff, George] (1932). "Balmorality". In [Thomson, David Cleghorn] (ed.). _Scotland in Quest of Her Youth: A Scrutiny_. London: Oliver & Boyd. pp. 69–86.
0723. **[^] **[Armstrong (2017)], pp. 58, 65, 85, 95, 99–100, 103, 109, 237, 262.
0724. **[^] **[Armstrong (2017)], pp. 6, 11, 59, 78, 204, 241.
0725. **[^] **[Armstrong (2017)], pp. 99–100, 119–121, 237, 262.
0726. **[^] **[Armstrong (2017)], pp. 95–96, 103, 167, 176, 212–213, 245.
0727. **[^] **[Armstrong (2017)], pp. 6, 59, 78, 103, 285.
0728. **[^] **[Armstrong (2017)], pp. 167, 254.
0729. **[^] **[Armstrong (2017)], p. 259, quoting: Withers, C. W. J. (1992). "The historical creation of the Scottish Highlands". In Donnachie, Ian; [Whatley, Christopher] (eds.). _The Manufacture of Scottish History_. Edinburgh: Polygon. p. 155.
0730. **[^] **[Armstrong (2017)], pp. 254, 259.
0731. **[^] **[Scarlett (1990)], p. 16.
0732. **[^] **Macaulay, Thomas Babington (1848). "Chapter XIII". [_The History of England from the Accession of James II:_]. § 284–285.
0733. **[^] **[Armstrong (2017)], p. 272.
0734. **[^] **[Armstrong (2017)], pp. 251–252, quoting: [Scott-Moncrieff (1932)], p. 75.
0735. **[^] **[Armstrong (2017)], p. 81, quoting: [Devine (2000)], p. 231.
0736. **[^] **[Armstrong (2017)], pp. 108–109, 125, 275.
0737. **[^] **[Telfer Dunbar (1979)], p. 141.
0738. **[^] **[Armstrong (2017)], p. 243.
0739. **[^] **[Armstrong (2017)], pp. 124–125, 237, 254, 262.
0740. **[^] **[Armstrong (2017)], pp. 178–184.
0741. **[^] **[Martin, R. (1988)], pp. 57–58.
0742. **[^] **[Armstrong (2017)], p. 84, citing: [Faiers (2008)], p. 193.
0743. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Martin, R. (1988)], p. 58.
0744. **[^] **Jacobson, Ralph E; Ray, Sidney F.; Attridge, Geoffrey G.; Axford, Norman R. (2000). [_The Manual of Photography: Photographic and Digital Imaging_]. Focal Press. p. 228. [ISBN] [0240515749].
0745. **[^] **[Martin, R. (1988)], pp. 56–57.
0746. **[^] **[Brown (2012)], pp. 6–7.
0747. **[^] **[Armstrong (2017)], _passim_; much of this work is an anaysis of the "symbiotic" Highlandism relationship between Victoria and the Atholls.
0748. **[^] **Dorrian, Mark (2006). ["The King and the City: On the Iconology of George IV in Edinburgh"] (PDF). _Edinburgh Architecture Research_. **30**: 32\. [ISSN] [0140-5039]. Retrieved 10 July 2023.
0749. **[^] **["King George IV"]. _Undiscovered Scotland_. 2007. Retrieved 10 July 2023.
0750. **[^] **Campbell, Jeannie (17 June 2022). ["Royal visit to Edinburgh 1822 – The Background, Part One"]. _Bagpipe News_. Retrieved 10 July 2023.
0751. **[^] **[Armstrong (2017)], p. 63; see especially footnote 33.
0752. **[^] **[Armstrong (2017)], p. 185.
0753. **[^] **[Armstrong (2017)], pp. 209–221. The "as they _now_ are" quote is on p. 219.
0754. **[^] **["Wearing of Sashes by Ladies in Evening Dress"] (PDF). [Court of the Lord Lyon]. 2009 – via Society of Scottish Armigers. SSA indicates this was originally published by the Lord Lyon, and the text seems to indicate this, but the LL website no longer provides such a document.
0755. **[^] **[Armstrong (2017)], p. 198.
0756. **[^] **Falke, Jacob (1872). ["National Domestic Industry"] (PDF). _The Workshop_. **5** (3): 33–36\. [doi]: [10.2307/25586655]. [JSTOR] [25586655]. Retrieved 13 July 2023.
0757. **[^] **[Armstrong (2017)], pp. 239, 242, 279.
0758. **[^] **[Armstrong (2017)], pp. 190–191.
0759. **[^] **[Armstrong (2017)], p. 125.
0760. **[^] **[Martin, R. (1988)], pp. 58, 60.
0761. **[^] **See detailed treatment in: Dwyer-McNulty, Sally (2014). _Common Threads: A Cultural History of Clothing in American Catholicism_. University of North Carolina Press. [ISBN] [9781469614106].
0762. **[^] **[Paterson, M. B. (2001)], pp. 174–175.
0763. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Kirsanova, Raisa (September 2016). ["Scottish tartans and Russian Romanticism"]. _Clothing Cultures_. **3** (3): 237–245\. [doi]: [10.1386/cc.3.3.237\_1]. Retrieved 26 May 2023.
0764. **[^] **["All you need to know about Walkers Shortbread"]. _The Scotsman_. Retrieved 18 October 2021.
0765. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], pp. 26–27.
0766. **[^] **[Mackay (1924)], p. 21.
0767. **[^] **[Paterson, M. B. (2001)], pp. 81–82, 130.
0768. **[^] **[Armstrong (2017)], pp. 236, 256.
0769. **[^] **[Paterson, M. B. (2001)], p. 27, footnote 24.
0770. **[^] **[Armstrong (2017)], pp. 3, 277.
0771. **[^] **[Armstrong (2017)], p. 256.
0772. **[^] **[Armstrong (2017)], p. 283.
0773. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 264.
0774. **[^] **[Paterson, M. B. (2001)], p. 190.
0775. **[^] **[Armstrong (2017)], pp. 260, 273.
0776. **[^] **[Paterson, M. B. (2001)], p. 218.
0777. **[^] **[Paterson, M. B. (2001)], pp. 195, 218.
0778. **[^] **[Armstrong (2017)], pp. 253–254, 280.
0779. **[^] **["National Tartan Day"]. _Channel 39 Southern Television_. 1 July 2008. Retrieved 9 April 2023.
0780. **[^] **Gilchrist, Jim (15 December 2008). "Stories of Homecoming: We're on the march with Argentina's Scots". _[The Scotsman] _. p. 18.
0781. ^ [_**a**_] [_**b**_] ["Tartan Details - Brittany National"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0782. **[^] **["About Us"]. _NYCTartanWeek.org_. National Tartan Day New York Committee. 2023. Retrieved 3 June 2023.
0783. **[^] **[Brown (2012)], p. 7.
0784. **[^] **[Armstrong (2017)], p. 256, quoting: Maitland Hume, Ian (2001). _The contemporary role of the kilt and tartan in the construction and expression of Scottish American identity_ (PhD). University of Edinburgh.
0785. **[^] **["About Us"]. _ScottishTartansMuseum.org_. Franklin, North Carolina: Scottish Tartans Museum and Heritage Center. 2020. Retrieved 28 May 2023.
0786. **[^] **[Paterson, M. B. (2001)], p. 180.
0787. **[^] **[Paterson, M. B. (2001)], p. 131.
0788. **[^] **Wylie, James (2022). ["The People's Tartan: Ba part of reinventing tartan"]. _VAM.ac.uk_. [V&A Dundee]. Retrieved 29 May 2023.
0789. **[^] **["Exhibition: Tartan – on until Sunday 14 January 2024"]. _VAM.ac.uk_. [V&A Dundee]. 2023. Retrieved 26 May 2023.
0790. **[^] **Bamford, Abbey (3 April 2023). ["Plaid weaves grid concept into V&A Dundee Tartan exhibition space"]. _[Design Week] _. Retrieved 26 May 2023.
0791. **[^] **[Paterson, M. B. (2001)], p. 154, quoting Teall of Teallach's foreword in Blair Urquhart's _Identifying Tartans_.
0792. **[^] **[Brancaz (2016)], paras. 9–10, 12, 14–15. Citing: Ray, Celeste (May 2010). _Ancestral Clanscapes and Transatlantic Tartaneers_. Symposium on Return Migration. Edinburgh: Scottish Centre for Diaspora Studies. pp. 6–7. Republished as: Ray, Celeste (2012). "Ancestral clanscapes and transatlantic tartaneers". In Varricchio, Mario (ed.). _Back to Caledonia: Scottish Homecomings from the Seventeenth Century to the Present_. Birlinn. pp. 168–188\. [ISBN] [9781906566449]. Also citing: McArthur, Colin (2003). _Brigadoon, Braveheart and the Scots: Distortions of Scotland in Hollywood Cinema_. "Cinema and Society" series. London: I. B. Tauris. [ISBN] [9781860649271].
0793. **[^] **[Brancaz (2016)], para. 15.
0794. ^ [_**a**_] [_**b**_] ["What's New"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Archived from [the original] on 12 April 2009. Retrieved 13 March 2020.
0795. ^ [_**a**_] [_**b**_] [_**c**_] [Mather, Jim]; MacKenzie, George (9 July 2007). ["National Tartan Register to be set up"]. _Gov.scot_. [Government of Scotland]. Archived from [the original] on 16 November 2020. Retrieved 27 May 2023.
0796. **[^] **[Armstrong (2017)], p. 2275.
0797. **[^] **Fisher, Alice (10 April 2010). ["Why the world has gone mad for plaid"]. "Fashion" department. _[The Observer] _. Retrieved 28 May 2020.
0798. **[^] **Pittock, M. G. H. (2013) \[2008\]. _The Road to Independence? Scotland in the Balance_ (2nd ed.). London: Reaktion Books. p. 150.
0799. ^ [_**a**_] [_**b**_] [_**c**_] [Paterson, M. B. (2001)], p. 172.
0800. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 274.
0801. ^ [_**a**_] [_**b**_] [Cheape (2012)], p. 14.
0802. **[^] **This is much of the theme of [Paterson, M. B. (2001)], "Chapter 5: Tartan (Case Study 1)", pp. 152–188.
0803. **[^] **[Armstrong (2017)], pp. 238, 268.
0804. **[^] **[Armstrong (2017)], p. 268.
0805. **[^] **Watson, Nicola (8 March 2019). ["Tartan Is the Next Big Thing: 5 Standout Fall/Winter 2019 Trends That You Need to Know of"]. _[Her World] _. [SPH Magazines]. Retrieved 10 July 2023.
0806. **[^] **[Armstrong (2017)], pp. 12, 280–281.
0807. **[^] **[Armstrong (2017)], pp. 12, 269, 291.
0808. **[^] **[Armstrong (2017)], p. 276.
0809. **[^] **[Armstrong (2017)], p. 278, quoting: [Pittock, Murray G. H.] (2002). "The Jacobite Cult". In [Cowan, Edward J.]; [Finlay, Richard J.] (eds.). _Scottish History: The Power of the Past_. Edinburgh University Press. p. 208.
0810. **[^] **[Brancaz (2016)], paras. 17–21.
0811. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], p. 33.
0812. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 259.
0813. **[^] **["Our Brands"]. _Clan.com_. Scotweb Marketing Ltd. 2023. Retrieved 15 July 2023.\[ _[self-published source] _\]
0814. **[^] **Johnston, Kevin (dir.) (4 April 2021). "Clans and Tartans". [_Men in Kilts: A Roadtrip with Sam and Graham_]. Season 1. Episode 7. Starz.
0815. **[^] **["Tartan Weaving Mill & Exhibition"]. _Gazetteer for Scotland_. University of Edinburgh / Royal Scottish Geographical Society. 2022. Retrieved 12 July 2023.
0816. ^ [_**a**_] [_**b**_] [_**c**_] [Paterson, M. B. (2001)], p. 175.
0817. **[^] **["Tartans"]. _MacDougall.org_. Clan MacDougall Society of North America. 2021. Retrieved 13 June 2023.
0818. **[^] **[Paterson, M. B. (2001)], p. 153.
0819. **[^] **[Paterson, M. B. (2001)], p. 174. (PV was imprecisely referred to as "vinyl viscose" here.)
0820. **[^] **Example manufacturer specs: ["Polyviscose Plaid Fabric Made to Order in 91 Tartans"]. _ScotlandShop.com_. 2021. Retrieved 30 May 2023.
0821. **[^] **Example manufacturer specs: ["Poly Viscose Tartan Swatches"]. _Kilts-n-Stuff.com_. Celtic Croft. Retrieved 30 May 2023.
0822. ^ [_**a**_] [_**b**_] Example marketing: Moloney, Eve (22 December 2021). ["What Is Polyviscose Fabric?"]. _TheScotlandKiltCompany.co.uk_. Retrieved 30 May 2023.
0823. ^ [_**a**_] [_**b**_] Example marketing: Fiddes, Nick (2020). ["Spotlight on Poly-viscose fabric – its uses, nature, benefits and drawbacks"]. _Clan.com_. Retrieved 30 May 2023.
0824. **[^] **Example marketing: ["Everything You Need to Know About Poly-Viscose Fabric"]. _Kils-n-Stuff.com_. Celtic Croft. 20 October 2020. Retrieved 30 May 2023.
0825. **[^] **Example marketing: ["11 – 12 oz. Poly Viscose Tartan Cloth"]. _USAKilts.com_. 2020. Retrieved 30 May 2023.
0826. **[^] **Example marketing: ["Poly-Viscose"]. _AtlantaKilts.com_. 2022. Retrieved 30 May 2023.
0827. **[^] **[Paterson, M. B. (2001)], pp. 176–177.
0828. **[^] **[Armstrong (2017)], p. 269, quoting: Craig, Carol (2003). _The Scots' Crisis of Confidence_. Edinburgh: Big Thinking. p. 27.
0829. **[^] **[Paterson, M. B. (2001)], pp. 184–186.
0830. **[^] **"Brigadoon". _[The Monthly Film Bulletin] _. **22** (258): 99. July 1955.
0831. **[^] **Beatty, John (Summer 2003). ["Scotland and Its Image: The Brigadoon Syndrome"]. _The Scotia News_. **2** (3 \[total issue no. 7\]). Saltire Society of New York. Retrieved 29 May 2023 – via City University of New York.
0832. ^ [_**a**_] [_**b**_] ["Kilt movies pay off Hollywood lift for Scottish tourism"]. _[The Herald] _. Glasgow. 25 August 1996. Retrieved 29 May 2023.
0833. **[^] **Roberts, David (2006). _British Hit Singles & Albums_ (19th ed.). London: Guinness World Records. p. 45. [ISBN] [1904994105].
0834. **[^] **Ash, Juliet; Wright, Lee (1988). _Components of Dress: Design, Manufacturing, and Image-making in the Fashion Industry_. Routledge. p. 63. [ISBN] [0415006473].
0835. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Martin, R. (1988)], p. 59.
0836. **[^] **Norwich, William (5 March 2004). ["Stephen Sprouse, Design Pioneer, Dies at 50"]. _[The New York Times] _. Retrieved 6 July 2023.
0837. **[^] **Several examples of McQueen's bold tartan designs are illustrated about half-way through this long article: Truong, Alain (3 May 2011). ["Alexander McQueen's Iconic Designs in Costume Institute Retrospective at Metropolitan Museum"]. _Eloge de l'Art_. Retrieved 23 June 2023.
0838. **[^] **[Paterson, M. B. (2001)], pp. 152–153.
0839. **[^] **Milligans, Lauren (15 May 2012). ["Designer Stamps - Great British Fashion Stamps"]. _British Vogue_. Retrieved 24 September 2022.
0840. **[^] **["Checkerboard and Tartan: How Two Patterns Changed Connotation Through Centuries"]. _One Block Down_. 2021. Retrieved 12 July 2023.
0841. **[^] **Komar, Marlen (30 March 2016). ["The Evolution of Androgynous Fashion Throughout the 20th Century"]. _[Bustle] _. Retrieved 28 May 2023.
0842. **[^] **Elan, Priya (13 April 2016). ["How A$AP Rocky, Rihanna and Kanye West reinvented grunge style"]. _[The Guardian] _. Retrieved 28 May 2023.
0843. **[^] **Kobel, Peter (2 April 1993). ["Smells Like Big Bucks"]. _[Entertainment Weekly] _. [Archived] from the original on 14 October 2007. Retrieved 28 May 2023.
0844. **[^] **[Paterson, M. B. (2001)], pp. 285, 289.
0845. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 262.
0846. ^ [_**a**_] [_**b**_] Doran, Tom (May 2015). ["Tartans Made for the Movies"]. _The Scotia News_. **9** (5 \[total issue 50\]). Saltire Society of New York. Archived from [the original] on 25 January 2022. Retrieved 29 May 2023 – via City University of New York.
0847. **[^] **[Paterson, M. B. (2001)], pp. 64, 287.
0848. ^ [_**a**_] [_**b**_] [Paterson, M. B. (2001)], pp. 285–287, 304.
0849. **[^] **Greig, Finlay (4 September 2020). ["10 Braveheart inaccuracies: historical blunders in the Mel Gibson film about the Wars of Scottish Independence"]. _[The Scotsman] _. Retrieved 29 May 2023.
0850. ^ [_**a**_] [_**b**_] Shattuck, Kathryn (21 May 1995). ["If That's His Wallace Kilt, Who Stole The Yellow Stripes?"]. "Film" section. _[The New York Times] _. Retrieved 29 May 2023.
0851. **[^] **[Armstrong (2017)], p. 30.
0852. **[^] **Butson, Mackenzi (6 May 2020). ["Disney/Pixar's Brave: 10 Aspects of Scottish Culture Explored in the Film"]. _[Screen Rant] _. Retrieved 29 May 2023.
0853. **[^] **[Armstrong (2017)], pp. 11, 237, 259, 262, 280, 290.
0854. ^ [_**a**_] [_**b**_] Martin, Carol A. L. (2020). ["Film & Television: When authenticity is important, some costumers go as far as designing a special tartan"]. _CuriousAndUnusualTartans.com_. Retrieved 29 May 2023.
0855. **[^] **Nicol, Danny (2018). _Doctor Who: A British Alien?_. Springer. p. 93.
0856. **[^] **Delgado, Gabriela (11 September 2021). ["Doctor Who: Every Doctor's Signature Outfit, Ranked from Worst to Best"]. _[Comic Book Resources] _. Retrieved 10 July 2023.
0857. **[^] **[Behind The Lens - The Time of the Doctor - Doctor Who: Christmas Special 2013 - BBC]  – [YouTube], official BBC channel, 26 December 2013. Received 14 July 2018.
0858. **[^] **["Claire Brown"]. _Doctor Who World_. 2021. Retrieved 18 June 2023.
0859. **[^] **["Doctor Who: Jodie Whittaker's regeneration reveals a new Doctor"]. _BBC News_. 23 October 2022. Retrieved 26 October 2022.
0860. **[^] **["The stories behind 7 of Scotland's most popular tartans"]. _[The Scotsman] _. 8 February 2018. Retrieved 8 June 2023.
0861. ^ [_**a**_] [_**b**_] ["Tartan Details - Caledonia No 3"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 3 July 2023.
0862. **[^] **["Tartan Details - Caledonia"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 3 July 2023.
0863. **[^] **["Tartan Details - MacPherson #5"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 16 July 2023.
0864. **[^] **["Tartan Details - Rob Roy Macgregor"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 3 July 2023.
0865. **[^] **[" **buffalo plaid** in American English"]. _CollinsDictionary.com_. HarperCollins. 2023. Retrieved 12 July 2023.
0866. **[^] **Garton, Christine (16 December 2019). ["A Brief History of Buffalo Plaid"]. _PieceWork Magazine_. Long Thread Media. Retrieved 12 July 2023.
0867. ^ [_**a**_] [_**b**_] Schlueter, Preston (12 January 2020). ["Grids, Plaids, and Windowpanes: Checked Patterns in Menswear and How to Wear Them"]. _Gentleman's Gazette_.
0868. **[^] **["Tartan Details - Pride, The Tartan of"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 May 2023.
0869. **[^] **Schwartzapfel, Beth (17 July 2008). ["Scots design Jewish tartan"]. _[The Jewish Daily Forward] _. Sound the Bagpipes. Retrieved 10 May 2009.
0870. **[^] **["Tartan Details - Scottish Islamic"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2012. Retrieved 13 November 2023.
0871. **[^] **["Tartan Details - Sikh"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 November 2023.
0872. **[^] **[Paterson, M. B. (2001)], p 127.
0873. **[^] **["Search Results \[rainbow\]"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2023. Retrieved 18 June 2023. More can be found with keyword searches "LGBT" and "pride" (though most of the latter are false-positives).
0874. **[^] **Hutcheson, Colin W. ["Regimental Tartans"]. _TartansAuthority.com_. Scottish Tartans Authority. Retrieved 1 May 2010.
0875. **[^] **[Scarlett (1990)], pp. 43–44.
0876. ^ [_**a**_] [_**b**_] [_**c**_] Newsome, Matthew Allan C. (2008). ["Purveyors of 'Tartan Tat' Taken to Task"]. _Albanach.org_. Retrieved 14 July 2023. Originally published in _The Scottish Banner_, September 2008.
0877. **[^] **["Check out our new tartan"]. _[The Scotsman] _. 17 September 2008. Retrieved 24 September 2008.
0878. **[^] **["Tartan Details - Maple Leaf"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 7 June 2023.
0879. ^ [_**a**_] [_**b**_] ["Tartans"]. [Department of Canadian Heritage]. Archived from [the original] on 16 August 2002. Retrieved 24 September 2008.
0880. ^ [_**a**_] [_**b**_] ["Tartan Details - Welsh National"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0881. **[^] **["Tartan Details - Cornish National"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0882. **[^] **["Tartan Details - Cornish National #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0883. **[^] **["Tartan Details - Cornish Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0884. **[^] **["Tartan Details - Cornish National Day"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0885. **[^] **["Tartan Details - St. Piran Cornish Flag"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0886. **[^] **["Tartan Details - St. Piran Dress"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0887. **[^] **["Tartan Details - Welsh National #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0888. **[^] **["Tartan Details - Welsh National #3"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0889. **[^] **["Tartan Details - Welsh Assembly"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0890. **[^] **["Tartan Details - Spirit of Wales"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0891. ^ [_**a**_] [_**b**_] Newsome, Matthew Allan C. (15 April 2005). ["Welsh Tartans"]. _Albanach.org_. Retrieved 13 June 2023.
0892. **[^] **["Tartan Details - Manx National"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0893. **[^] **["Tartan Details - Manx National #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0894. **[^] **["Tartan Details - Manx Centenary"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0895. **[^] **["Tartan Details - Ellan Vannin"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0896. **[^] **["Tartan Details - Ellan Vannin (1958)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0897. **[^] **["Tartan Details - Manx Dress"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0898. **[^] **["Tartan Details - Manx Ellan Vannin"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0899. **[^] **["Tartan Details - Manx Heritage"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0900. **[^] **["Tartan Details - Manx Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0901. **[^] **["Tartan Details - Isle of Man"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0902. **[^] **["Tartan Details - Manx Mannin Plaid"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.
0903. **[^] **["Tartan Details - Brittany National Walking"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0904. **[^] **["Tartan Details - Grey Breton"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0905. **[^] **["Tartan Details - Galicia"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0906. **[^] **["Tartan Details - Gallaecia - Galicia National"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0907. **[^] **["Tartan Details - Gallaecia (Unofficial)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0908. **[^] **["The Government of Canada Invites Canadians to Celebrate Tartan Day"]. [Department of Canadian Heritage]. 5 April 2008. Archived from [the original] on 8 June 2011. Retrieved 24 September 2008.
0909. **[^] **["Tartan Details – Bruce County"]. _The Scottish Register of Tartans_. Retrieved 9 January 2017.
0910. **[^] **["Tartan Details – Ville de Beauport"]. _The Scottish Register of Tartans_. Retrieved 9 January 2017.
0911. **[^] **[Scarlett (1990)], p. 38.
0912. **[^] **["Hunting Tartans"]. _Tartans.Scotland.net_. Archived from [the original] on 17 April 2020. Retrieved 12 June 2023.
0913. **[^] **["Mourning Tartans"]. _Tartans.Scotland.net_. Archived from [the original] on 7 August 2020. Retrieved 20 October 2008.
0914. **[^] **[Innes of Learney (1971)], p. 69.
0915. **[^] **["Tartan Details - Stewart, Dress #1"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023. Stuart dress exists in two other variants in the same database.
0916. **[^] **["Hunting Stewart"]. _StewartSociety.org_. Stewart Society. 2016. Retrieved 13 May 2023.
0917. ^ [_**a**_] [_**b**_] ["Tartan Details - Matheson Dress"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0918. **[^] **["Tartan Details - Matheson Hunting"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0919. **[^] **["Search Results \[Matheson\]"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2023. Retrieved 10 June 2023.
0920. **[^] **["Information"]. _ClanMatheson.org_. Clan Matheson Society. 2022. "Heraldry" pop-up. Retrieved 10 June 2023.
0921. **[^] **["Tartan Details - MacMillan Dress"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0922. **[^] **E.g.: ["Tartan Details - Wallace Dress"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0923. **[^] **["Tartan Details - Fraser Arisaid"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.
0924. **[^] **[Adam (1908/1970)], plate XIV, after p. 384.
0925. ^ [_**a**_] [_**b**_] ["Tartan Details - MacGregor Dress Burgundy (Dance)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023.

 ["Tartan Details - MacGregor Dress Green (Dance)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023.

 ["Tartan Details - MacGregor Dress Red (Dance)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023.
0926. ^ [_**a**_] [_**b**_] See: ["Search Results \[dance\]"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2023. Retrieved 13 May 2023. Nearly all dance tartans that have a clan or district association (i.e. are not recent creations by individuals or commercial weavers for fashion purposes) use white as a major colour.
0927. **[^] **["The Queen's Tartans"]. _MacGregorAndMacDuff.co.uk_. 13 September 2022. Retrieved 14 May 2023.
0928. **[^] **["Tartans of Clan Donnachaidh"]. _Donnachaidh.com_. Pitlochry, Scotland: Clan Donnachaidh Society. 2018. Archived from [the original] on 20 May 2023. Retrieved 19 May 2023.
0929. **[^] **[Paterson, M. B. (2001)], pp. 182–183.
0930. **[^] **["Tartan Details - Cian of Ely"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 May 2023.
0931. **[^] **["Tartan Details - Thomas of Wales"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.
0932. **[^] **["Tartan Details - Wilson (Janet)"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.
0933. **[^] **["Tartan Details - Wilson (Janet) #2"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.
0934. **[^] **["Tartan Details - Ancient Caledonian Society"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 May 2023.
0935. **[^] **["Ancient Caledonian Society Dress Coat (Circa 1786)"] (PDF). _HighlandSocietyOfLondon.org_. [Highland Society of London]. 2020. Retrieved 14 May 2023.
0936. **[^] **["How this brand has outsold Coke in Scotland for over a century"]. _CBC.ca_. Canadian Broadcasting Company. Retrieved 18 October 2021.
0937. **[^] **["Tartan Details - Irn Bru"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 June 2023.
0938. **[^] **["Search Results \[Loganair\]"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2017. Retrieved 14 May 2023.
0939. **[^] **["Tartan Details - Racing Stewart"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 June 2023.
0940. ^ [_**a**_] [_**b**_] [_**c**_] [Paterson, M. B. (2001)], p. 168.
0941. **[^] **Martin, Carol A. L. (2012). ["Jun 22: The Premiere of the Film _Brave_"]. _CuriousAndUnusualTartans.com_. Retrieved 14 May 2023.
0942. **[^] **["Burberry Check"]. _BurberryPLC.com_. [Burberry]. 2022\. Archived from [the original] on 14 May 2023. Retrieved 14 May 2023.
0943. **[^] **[Banks & de La Chapelle (2007)], pp. 33–34.
0944. **[^] **["Heraldry & Tartan – The Fitzpatrick – Mac Giolla Phádraig Clan Society"]. _FitzpatrickSociety.com_. Retrieved 9 November 2021.
0945. **[^] **["Consultation on the Creation of a Register of Tartan"] (PDF). _Scottish-Parliament.uk_. [Scottish Parliament]. Archived from [the original] (PDF) on 27 October 2005. Retrieved 9 September 2008.
0946. **[^] **["Scottish Tartans Society"]. _Scottish Tartans World Register_. Archived from [the original] on 4 October 2011. Retrieved 15 July 2023.
0947. ^ [_**a**_] [_**b**_] [_**c**_] Newsome, Matthew A. C. (2004). ["What's the 'Official' Word on Tartans?"]. _Albanach.org_. Retrieved 15 July 2023. Originally published in _The Scottish Banner_, December 2004.
0948. **[^] **["Scottish Register of Tartans Bill"] (PDF). _Scottish-Parliament.uk_. [Scottish Parliament]. Archived from [the original] (PDF) on 19 December 2008. Retrieved 8 September 2008.
0949. **[^] **["About us"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 24 March 2004. Retrieved 12 September 2008.
0950. **[^] **["About the Scottish Tartan World Register"]. Scottish Tartans World Register. Retrieved 7 September 2008.
0951. ^ [_**a**_] [_**b**_] ["\[Homepage\]"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 4 November 2020.
0952. **[^] **["About Us"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. Retrieved 8 February 2009.
0953. **[^] **["Guidance"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 8 February 2009.
0954. **[^] **Haig, Matt (2004). [_Brand Royalty: How the World's Top 100 Brands Thrive and Survive_]. Kogan Page Publishers. p. 143. [ISBN] [0749442573].
0955. **[^] **["Tartan Details - Thomson Camel"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 31 May 2023.
0956. **[^] **McDougall, Liam (18 May 2003). ["Fashion giant Burberry tries to kill off traditional tartan rival"]. _[Sunday Herald] _. Archived from [the original] on 10 March 2007. Retrieved 7 May 2009 – via FindArticles.com.
0957. **[^] **["Burberry moves to protect iconic tartan in China"]. _[Reuters].com_. 28 November 2013. Retrieved 13 July 2023.
0958. **[^] **[Blakely (2015)], pp. 9, 11.
0959. **[^] **[Blakely (2015)], pp. 12–13.
0960. **[^] **["Scotland Act 1998 – Section C4: Intellectual Property"]. _Legislation.gov.uk_. [The National Archives (United Kingdom)]. 1998. Retrieved 28 May 2023.
0961. ^ [_**a**_] [_**b**_] ["Guidance – Copyright and design right"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. Retrieved 28 May 2023.
0962. **[^] **["Guidance – Restrictions"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. Retrieved 28 May 2023.
0963. **[^] **["Guidance – Unique and 'sufficiently different' designs"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. Retrieved 15 May 2023.
0964. **[^] **["United Kingdom – House of Lords Gives Expansive View of Copyright Protection"]. _Ladas.com_. Ladas & Parry LLP. February 2002. Archived from [the original] on 16 June 2013. Retrieved 28 May 2023.
0965. **[^] **MacQueen, H. (5 July 2008). ["Pass off in the Royal Mile?"]. _Scots Law News_. University of Edinburgh School of Law.
0966. **[^] **Boag-Thomson, Jonanna; Carlyle, Paul (13 August 2008). ["Tartan possibly from over the sea by certain not from Skye ends up in court"]. _Lexology.com_. Retrieved 15 July 2023.
0967. **[^] **Robertson, John (3 July 2008). ["Ban on 'Queen's tartan' sales over breach of copyright law"]. _[The Scotsman] _ – via TapaTalk Kilt Forum.
0968. **[^] **Swan, Charles (20 November 2012). ["Fabric Ticket Stamps are Copyright Artistic Works: _Abraham Moon & Sons Ltd v Thornber and Others_"]. _SwanTurton.com_. London: Swan Turton LLP. Retrieved 28 May 2023.
0969. **[^] **Clark, Simon; Sefton, Sara; Linsner, Marc (November 2020). ["A Fabric Design Has Been Found to be a Work of Artistic Craftsmanship: Will Response Clothing Cause a Shift in How UK Copyright is Assessed?"] (PDF). _European Intellectual Property Review_ (9). Reuters: 617–618. Retrieved 28 May 2023.
0970. **[^] **[Blakely (2015)], pp. 1–3, 13.
0971. **[^] **[Blakely (2015)], pp. 13–16.
0972. **[^] **[Paterson, M. B. (2001)], p. 186.
0973. **[^] **["Royal Tartans"]. _Tartans.Scotland.net_. Archived from [the original] on 7 August 2020. Retrieved 18 October 2008.
0974. **[^] **Hutcheson, Colin W. ["Royal Tartans"]. _TartansAuthority.com_. Scottish Tartans Authority. Retrieved 9 January 2017.
0975. **[^] **[Campbell, A. (1890)], p. 55: "No \[Campbell of\] Argyll has any right to the red stripe or the light blue as worn by this \[Campbell of Cawdor\] branch of the Campbell Clan."
0976. **[^] **["Universal Tartans"]. _Tartans.Scotland.net_. Archived from [the original] on 4 January 2018. Retrieved 18 October 2008.
0977. **[^] **[Scarlett 1990], pp. 38, 44
0978. **[^] **[Telfer Dunbar (1979)], pp. 144–154.
0979. **[^] **[Paterson, M. B. (2001)], pp. 172–173.
0980. **[^] **[_The Scottish Clans and Their Tartans_ (2005)] p. 14.
0981. **[^] **["Information Leaflet No.2 – Scottish Crest Badges"] (PDF). _ScotArmigers.net_. [Court of the Lord Lyon]. Retrieved 27 December 2011.
0982. **[^] **["Tartans"]. [Court of the Lord Lyon]. Archived from [the original] on 14 January 2008. Retrieved 16 October 2008.
0983. **[^] **["The 'Basics' of Clan Cameron"]. _Clan-Cameron.org_. Retrieved 18 October 2008.
0984. **[^] **["Tartan – FAQ"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 4 June 2004. Retrieved 18 October 2008.
0985. **[^] **["Piping in the Balmoral Tartan"]. _Royal.gov.uk_. Archived from [the original] on 4 December 2008. Retrieved 20 October 2008.
0986. **[^] **["Royals 'banned' public from wearing official Balmoral tartan"]. _[The Scotsman] _. 25 August 2016. Retrieved 15 July 2023.
0987. **[^] **["Tartans"]. _ClanChiefs.org_. [Standing Council of Scottish Chiefs]. 2016. Retrieved 30 May 2023.
0988. **[^] **[Telfer Dunbar (1979)], pp. 17–18, quoting Haswell Miller's "The Truth About Tartan", _Scotland's Magazine_ (November 1947), and summarising his material in _Common Errors in Scottish History_ (1956).
0989. ^ [_**a**_] [_**b**_] [Campbell, J. F. (1893)], pp. 333–334.
0990. ^ [_**a**_] [_**b**_] [_**c**_] [Oyange-Ngando (2018)], p. 5
0991. ^ [_**a**_] [_**b**_] [Last, Joseph Thomas] (1883). ["A visit to the Masai people living beyond the borders of the Nguru country"]. _[Proceedings of the Royal Geographical Society] _. **5** (9): 530–531. Retrieved 27 May 2023.
0992. **[^] **Waller, Richard (October 1976). ["The Maasai and the British 1895–1905: The Origins of an Alliance"]. _The Journal of African History_. **17** (4). Cambridge University Press: 529–553\. [doi]: [10.1017/S002185370001505X]. [S2CID] [154867998]. Retrieved 27 May 2023.
0993. ^ [_**a**_] [_**b**_] [_**c**_] Huang, Nellie (8 September 2016). ["Cultural Fabric: The Maasai's Shuka"]. _GAdventures.com_. Retrieved 27 May 2023.
0994. **[^] **[Oyange-Ngando (2018)], p. 4
0995. **[^] **[Oyange-Ngando (2018)], p. 12.
0996. ^ [_**a**_] [_**b**_] Mastamet-Mason, Anne; Müller, Karla; van der Merwe, Nicolette (2017). [_History of African indigenous costumes and textiles: Towards decolonising a fashion design curriculum_] (PDF). 14th National Design Education Conference. [Tshwane University of Technology]: Design Education Forum of South Africa. p. 147. Retrieved 27 May 2023.
0997. **[^] **["Maasai"]. _Junior Worldmark Encyclopedia of World Cultures_. 1999\. Archived from [the original] on 8 July 2012.
0998. **[^] **Klumpp, Donna Rey (18 July 2013). ["An Historical Overview of Maasai Dress"]. _The Journal of the Costume Society of America_. **7** (1): 95. [doi]: [10.1179/036121181803657846]. Retrieved 27 May 2023.
0999. **[^] **[Oyange-Ngando (2018)], pp. 1, 2, 4, 7 _ff._
1000. **[^] **Spring, Chris (2020). "Textiles of Eastern and Southern Africa". In Harris, Jennifer (ed.). [_A Companion to Textile Culture_]. Wiley. pp. 145–163\. [doi]: [10.1002/9781118768730.ch8]. [ISBN] [9781118768907]. [S2CID] [225574795]. Retrieved 27 May 2023.
1001. **[^] **[Altmann (2015)], pp. 21–22, 27–28.
1002. **[^] **[Altmann (2015)], pp. 22–23.
1003. **[^] **[Altmann (2015)], pp. 21–22, 29–30, 35–36.
1004. **[^] **[Altmann (2015)], p. 19.
1005. **[^] **[Altmann (2015)], pp. 47–83 _ff._
1006. **[^] **[Altmann (2015)], pp. 63, 67, 257 _ff._
1007. **[^] **[Altmann (2015)], p. 63.
1008. **[^] **[Altmann (2015)], p. 332.
1009. **[^] **[Altmann (2015)], p. 257.
1010. **[^] **[Altmann (2015)], pp. 258, 380.
1011. **[^] **[Altmann (2015)], pp. 257–258, 337, 380.
1012. **[^] **[Altmann (2015)], p. 349.
1013. **[^] **[Altmann (2015)], pp. 139, 257–258, 337, 349.
1014. **[^] **Lynch, Anette; Mitchell D., Strauss (2014). _Ethnic Dress in the United States: A Cultural Encyclopedia_. Rowman and Littlefield. p. 189. [ISBN] [9780759121508].
1015. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] German, Deb (9 June 2015). ["Checkered Past: A Brief History of the Madras Plaid Shirt"]. _Orvis News_. Archived from [the original] on 3 December 2020. Retrieved 26 May 2023.
1016. **[^] **Schneider, Sven Raphael (21 June 2019). ["Madras Guide – How the Shirt, Pants & Jackets Became Popular"]. _Gentlemans Gazette_. [Archived] from the original on 2 June 2017. Retrieved 26 May 2022.
1017. ^ [_**a**_] [_**b**_] Coo, Stéphanie Marie R. (3 October 2014). [_Clothing and the colonial culture of appearances in nineteenth century Spanish Philippines (1820-1896)_] (PhD). Université Nice Sophia Antipolis. [Archived] from the original on 26 October 2022. Retrieved 26 May 2023.
1018. **[^] **[Paterson, M. B. (2001)], p. 171, footnote 36.
1019. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] ["Utagawa Kunisada (Toyokuni III)"]. Scholten Japanese Art. 2022. Cites "Waseda University Theatre Museum, ref. no. 002-1384" as its own source.
1020. **[^] **["Japanese checkered fabric by the yard, Futasuji-koushi (forked grid)"]. _Bansyo-Fabric.com_. 2023. Retrieved 15 May 2023.
1021. **[^] **["Koshi (Plaid)"]. _Japanese Futon Glossary_. Futon Tokyo. 2019. Retrieved 15 May 2023.
1022. **[^] **["Meaning of 格子縞 in Japanese"]. _Japanese Dictionary_. Nihongo Master. 2023. Retrieved 15 May 2023.
1023. **[^] **["Tartan Details - MacMillan/Isetan"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2012. Retrieved 7 June 2023.
1024. **[^] **Wylie, James (2022). ["The People's Tartan: Be part of reinventing tartan"]. _VAM.ac.uk_. [V&A Dundee]. Retrieved 29 May 2023.
1025. **[^] **Greene, Militsa (July 1965). "Pushkin and Sir Walter Scott". _Forum for Modern Language Studies_. **I** (3): 207–215\. [doi]: [10.1093/fmls/I.3.207].
1026. **[^] **Hoisington, Sona Stephan (1981). "Pushkin's Belkin and the Mystifications of Sir Walter Scott". _Comparative Literature_. **33** (4): 342–57\. [doi]: [10.2307/1770468]. [JSTOR] [1770468].
1027. **[^] **Braaten, Ann Wiley; DeLong, Marilyn R. (3 December 2013). ["Shawls of the Germans from Russia"]. _Textile History_. **44** (2): 197–213\. [doi]: [10.1179/0040496913Z.00000000028]. [S2CID] [161759277].
1028. **[^] **Braaten, Ann Wiley (November 2005). [_German from Russia Immigrants' Shawls Brought to the U.S.A.: A Material Culture Study_] (PhD). University of Minnesota. Retrieved 13 July 2023. This URL provides a partial preview without subscription, which contains the text for which it is cited here.
1029. **[^] **Example: ["233: An Exceptional Russian Red Tartan Ground Lacquer Casket after Geftler's _Sphinxes on the Neva Embankment_"]. _FreemansAuction.com_. Philadelphia: [Freeman's Auctioneers & Appraisers]. 29 November 2018. Retrieved 26 May 2023.
1030. **[^] **Seitova, F. Z. (2022). ["Peculiarities of Teaching the Terminology of the Textile Industry in English"]. In Muldagalieva, A. A.; Kulzhanbekova, G. K.; Baymuratova, I. A.; Tleulinova, M. B. (eds.). _Materials of Scientific and Methodological Articles: Current Trends in Interdisciplinary Issues_. Almaty: Al-Farabi Kazakh National University. p. 364. [ISBN] [9786010459557]. Retrieved 13 July 2023.
1031. **[^] **Symons, Todd (6 May 2023). ["Māori King presents King Charles III with coronation gifts, will join royals in royal box for coronation concert"]. _[Newshub] _. Archived from [the original] on 6 May 2023. Retrieved 11 December 2023.
1032. **[^] **Muru-Lanning, Charlotte (9 December 2020). ["From fedoras to fascinators: A history of Māori and hats"]. _[The Spinoff] _. Retrieved 3 September 2023.

- _The Scottish Clans and Their Tartans_. Kessinger Publishing. 2005. [ISBN] [141796815X]. (Originally published by W. & A. K. Johnston & G. W. Bacon Ltd, Edinburgh and London, 1939.)
- Adam, Frank (1970) \[1908\]. [Innes of Learney, Thomas] (ed.). [_The Clans, Septs, and Regiments of the Scottish Highlands_] (8th ed.). Clearfield. [ISBN] [9780717945009] – via Internet Archive.
- Altmann, Karin (2015). [_Fabric of Life: Textile Arts in Bhutan – Culture, Tradition and Transformation_]. Berlin / Boston: De Gruyter. [ISBN] [9783110428612] – via Google Books.
- Armstrong, Fiona Kathryne (31 August 2017). [_Highlandism: Its value to Scotland and how a queen and two aristocratic women promoted the phenomenon in the Victorian age_] (PhD). University of Strathclyde. [doi]: [10.48730/2m47-md74]. Retrieved 28 May 2023.
- Banks, Jeffrey; de La Chapelle, Doria (2007). [_Tartan: Romancing the Plaid_]. New York: Rizzoli. [ISBN] [9780847829828]. Retrieved 4 June 2023 – via Internet Archive.
- Barnes, R. Money; Allen, C. Kennedy (1956). _The Uniforms & History of the Scottish Regiments_. Seeley, Service & Co.
- Black, Mary E. (1959). "Tartan Study". [_The Sett and Weaving of Tartans_] (PDF). Shelby, North Carolina: Shelby Mills. Retrieved 14 May 2023. This source appears to be reliable on weaving technique, but not on history of tartan.
- Blakely, Megan Rae (November 2015). ["Pattern Recognition: Governmental Regulation of Tartan and Commodification of Culture"] (PDF). _International Journal of Cultural Property_. **22** (4): 487–504\. [doi]: [10.1017/s0940739115000284]. [S2CID] [152102254]. Retrieved 28 May 2023. (URL is to full-text pre-print copy; page numbers cited refer to this copy.) For a more detailed treatment of ICH and trade-regulation issues, see also: Blakely, Megan Rae (2018). [_Intellectual property and intangible cultural heritage in Celtic-derived countries_] (PDF) (PhD). [University of Glasgow]. pp. 3, 78–103. Retrieved 28 May 2023.
- Brancaz, Lauren Ann-Killian (2016). ["The Homecoming of Tartan: How Scotland and North America Collaborate in Shaping Tartan"]. _Études écossaises_ (18): 69–87\. [doi]: [10.4000/etudesecossaises.1074]. [S2CID] [131473903].
- Brown, Ian (2012). "Introduction: Tartan, Tartanry and Hybridity". _From Tartan to Tartany: Scottish Culture, History and Myth_. Edinburgh University Press. [ISBN] [9780748664641].
- Campbell, Archibald (1890). [_The Children of the Mist: Or, the Scottish Clansmen in Peace and War_]. Edinburgh/London: W. & A. K. Johnston – via Internet Archive.
- [Campbell, John Francis] (1893) \[1862\]. [_Popular Tales of the West Highlands_]. Vol. IV (new \[2nd\] ed.). Paisley / London: Alexander Gardner – via Internet Archive.
- Cheape, Hugh (2012). " _Gheibhte Breacain Charnaid_ ('Scarlet Tartans Would Be Got ...'): The Re-invention of Tradition". In Brown, Ian (ed.). _From Tartan to Tartanry: Scottish Culture, History and Myth_. Edinburgh University Press. [ISBN] [9780748664641].
- Cheape, Hugh (2006) \[1991\]. _Tartan: The Highland Habit_ (3rd ed.). Edinburgh: National Museums of Scotland. [ISBN] [9781905267026].
- Eslea MacDonald, Peter, ed. (2012) \[1997\]. _The 1819 Key Pattern Book: One Hundred Original Tartans_ (2nd ed.). J. J. Munro Trust. [ISBN] [9780957186507].
- [Groves, John Percy] (1893). [_History of the 42nd Royal Highlanders – "The Black Watch" now the first battalion "The Black Watch" (Royal Highlanders) 1729–1893_]. Edinburgh: [W. & A. K. Johnston] – via Internet Archive.
- Hinderks, Victoria (2014). ["The Politicization of Scottish Dress: A Study of Highland Garb"]. _Constellations_. **5** (2). University of Alberta. [doi]: [10.29173/cons22033]. Retrieved 3 June 2023.
- [Innes of Learney, Thomas] (1971) \[1938\]. [_The Tartans of the Clans and Families of Scotland_] (8th ed.). Edinburgh: [Johnston and Bacon]. [ISBN] [9780717945016] – via Internet Archive.
- MacBain, Alexander (1911). [_An Etymological Dictionary of the Gaelic language_]. Stirling: Eneas Mackay – via Internet Archive.
- MacDonald, Micheil (1995). _The Clans of Scotland: The History and Landscape of the Scottish Clans_. London: Grange Books. [ISBN] [1856277496].
- Mackay, J. G. (1924). [_The Romantic Story of the Highland Garb and the Tartan_]. Stirling: Eneas Mackay – via Internet Archive.
- [Martin, Richard] (16–18 September 1988). ["Transmutations of the Tartan: Attributed Meanings to Tartan Design"]. _Textiles as Primary Sources: Proceedings_. First Textile Society of America Symposium. [Textile Society of America] / [Minneapolis Institute of Art]. No. 646. [Archived] from the original on 19 May 2022. Retrieved 6 July 2023.
- [Moncreiffe of that Ilk, Iain] (1967). _The Highland Clans_. London: Barrie & Rocklif.
- Oyange-Ngando, Elizabeth (2018). ["Fashion as property in traditional culture: A Maasai case study"]. _Journal of Intellectual Property Law & Practice_. **13** (11): 878–883\. [doi]: [10.1093/jiplp/jpy119]. Retrieved 27 May 2023. The PDF available here has different page numbering (starting 1, not 878); page numbers cited here refer to this PDF version.
- Paterson, Michael Bennis (2001). [_Selling Scotland: Towards an intercultural approach to export marketing involving differentiation on the basis of 'Scottishness'_] (PDF) (PhD). [University of Glasgow]. Retrieved 29 May 2023.
- Porter, James (1998). ["The Folklore of Northern Scotland: Five Discourses on Cultural Representation"]. _[Folklore] _. **109** (1–2). Taylor & Francis: 1–14\. [doi]: [10.1080/0015587X.1998.9715956]. Archived from [the original] on 29 May 2023.
- Scarlett, James D. (1990). _Tartan: The Highland Textile_. London: Shepheard-Walwyn. [ISBN] [9780856831201].
- Stewart, Donald C. (1974) \[1950\]. _The Setts of the Scottish Tartans with Descriptive and Historical Notes_ (revised ed.). London: Shepheard-Walwyn. [ISBN] [9780442278854].
- Stewart, Donald William (1893). [_Old & Rare Scottish Tartans_]. Edinburgh: George P. Johnson.
- Telfer Dunbar, John (1979) \[1962\]. _History of Highland Dress_ (2nd ed.). London: B. T. Batsford. [ISBN] [071341894X]. The original edition is actually superior; it has the same text but more colour plates; it is available online: Telfer Dunbar, John (1962). [_History of Highland Dress_] (1st ed.). Philadelphia: Doufour Editions – via Internet Archive.
- Thompson, J. Charles (1992). "Introduction". [_Scottish Tartans in Full Color_]. New York: Dover. [ISBN] [9780486270463] – via Google Books. A collection of illustrations from James Grant's _The Tartans of the Clans of Scotland_ (1886), without Grant's dubious text.
- [Trevor-Roper, Hugh] (1983). "The Highland Tradition of Scotland". In [Hobsbawm, Eric]; Ranger, Terence (eds.). [_The Invention of Tradition_]. Cambridge University Press. [ISBN] [0521246458] – via Internet Archive.
- Tuckett, Sally J. S. (2016). ["Reassessing the romance: Tartan as a popular commodity, c. 1770–1830"] (PDF). _Scottish Historical Review_. **95** (2): 182–202\. [doi]: [10.3366/shr.2016.0295]. (URL is to full-text pre-print copy; page numbers cited refer to this copy.)
- Urquhart, Blair, ed. (1994). [_Identifying Tartans_]. London: The Apple Press/Book Sales. [ISBN] [1850764999] – via Internet Archive. (Also known by the title _Tartans: The New Compact Study Guide and Identifier_.)
- [Way of Plean, George]; Squire, Romilly (2000) \[1995\]. [_Clans & Tartans_]. Collins Pocket Reference. Glasgow: HarperCollins. [ISBN] [0004725018] – via Internet Archive.
- Zaczek, Iain; Phillips, Charles (2013) \[2004\]. _The Illustrated Encyclopedia of Tartan_. Wigston, Leicestershire: Southwater/Anness. [ISBN] [9781780192758].

Wikimedia Commons has media related to [Tartans].

Look up _**[tartan] **_ in Wiktionary, the free dictionary.

- [McIan, Robert Ranald] (2013) \[1843\]. [" _Clans of the Scottish Highlands_ Fashion Plates"]. _Metropolitan Museum of Art Libraries_ – via [OCLC].org. A collection of illustrations from James Logan's _The Clans of the Scottish Highlands_ (1843) without Logan's dubious text.
- ["The Scottish Register of Tartans"]. _TartanRegister.gov.uk_. – the Scottish government's official tartan registry

================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/websearch/test_brave_client.py
================================================
from __future__ import annotations

from typing import Any
from typing import cast

import pytest
import requests
from fastapi import HTTPException

import onyx.tools.tool_implementations.web_search.clients.brave_client as brave_module
from onyx.tools.tool_implementations.web_search.clients.brave_client import (
    BraveClient,
)


class DummyResponse:
    def __init__(
        self,
        *,
        status_code: int,
        payload: dict[str, Any] | None = None,
        text: str = "",
    ) -> None:
        self.status_code = status_code
        self._payload = payload
        self.text = text

    def raise_for_status(self) -> None:
        if self.status_code >= 400:
            http_error = requests.HTTPError(f"{self.status_code} Client Error")
            http_error.response = cast(requests.Response, self)
            raise http_error

    def json(self) -> dict[str, Any]:
        if self._payload is None:
            raise ValueError("No JSON payload")
        return self._payload


def test_search_maps_brave_response(monkeypatch: pytest.MonkeyPatch) -> None:
    client = BraveClient(api_key="test-key", num_results=5)

    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001
        return DummyResponse(
            status_code=200,
            payload={
                "web": {
                    "results": [
                        {
                            "title": "Result 1",
                            "url": "https://example.com/one",
                            "description": "Snippet 1",
                        },
                        {
                            "title": "Result without URL",
                            "description": "Should be skipped",
                        },
                    ]
                }
            },
        )

    monkeypatch.setattr(brave_module.requests, "get", _mock_get)

    results = client.search("onyx")

    assert len(results) == 1
    assert results[0].title == "Result 1"
    assert results[0].link == "https://example.com/one"
    assert results[0].snippet == "Snippet 1"


def test_search_caps_count_to_brave_max(monkeypatch: pytest.MonkeyPatch) -> None:
    client = BraveClient(api_key="test-key", num_results=100)
    captured_count: str | None = None

    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001
        nonlocal captured_count
        captured_count = kwargs["params"]["count"]
        return DummyResponse(status_code=200, payload={"web": {"results": []}})

    monkeypatch.setattr(brave_module.requests, "get", _mock_get)

    client.search("onyx")

    assert captured_count == "20"


def test_search_includes_optional_params(monkeypatch: pytest.MonkeyPatch) -> None:
    client = BraveClient(
        api_key="test-key",
        num_results=5,
        country="us",
        search_lang="en",
        ui_lang="en-US",
        safesearch="moderate",
        freshness="pw",
    )
    captured_params: dict[str, str] | None = None

    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001
        nonlocal captured_params
        captured_params = kwargs["params"]
        return DummyResponse(status_code=200, payload={"web": {"results": []}})

    monkeypatch.setattr(brave_module.requests, "get", _mock_get)

    client.search("onyx")

    assert captured_params is not None
    assert captured_params["country"] == "US"
    assert captured_params["search_lang"] == "en"
    assert captured_params["ui_lang"] == "en-US"
    assert captured_params["safesearch"] == "moderate"
    assert captured_params["freshness"] == "pw"


def test_search_raises_descriptive_error_on_http_failure(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    client = BraveClient(api_key="test-key", num_results=5)

    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001
        return DummyResponse(
            status_code=401,
            payload={"error": {"message": "Unauthorized"}},
        )

    monkeypatch.setattr(brave_module.requests, "get", _mock_get)

    with pytest.raises(ValueError, match="status 401"):
        client.search("onyx")


def test_search_does_not_retry_non_retryable_http_errors(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    client = BraveClient(api_key="test-key", num_results=5)
    calls = 0

    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001
        nonlocal calls
        calls += 1
        return DummyResponse(
            status_code=401,
            payload={"error": {"message": "Unauthorized"}},
        )

    monkeypatch.setattr(brave_module.requests, "get", _mock_get)

    with pytest.raises(ValueError, match="status 401"):
        client.search("onyx")
    assert calls == 1


@pytest.mark.parametrize(
    ("kwargs", "expected_error"),
    [
        ({"country": "USA"}, "country"),
        ({"safesearch": "invalid"}, "safesearch"),
        ({"freshness": "invalid"}, "freshness"),
        ({"timeout_seconds": 0}, "timeout_seconds"),
    ],
)
def test_constructor_rejects_invalid_config_values(
    kwargs: dict[str, Any],
    expected_error: str,
) -> None:
    with pytest.raises(ValueError, match=expected_error):
        BraveClient(api_key="test-key", **kwargs)


def test_test_connection_maps_invalid_key_errors() -> None:
    client = BraveClient(api_key="test-key")

    def _mock_search(query: str) -> list[Any]:  # noqa: ARG001
        raise ValueError("Brave search failed (status 401): Unauthorized")

    client.search = _mock_search  # type: ignore[method-assign]

    with pytest.raises(HTTPException, match="Invalid Brave API key"):
        client.test_connection()


def test_test_connection_maps_rate_limit_errors() -> None:
    client = BraveClient(api_key="test-key")

    def _mock_search(query: str) -> list[Any]:  # noqa: ARG001
        raise ValueError("Brave search failed (status 429): Too many requests")

    client.search = _mock_search  # type: ignore[method-assign]

    with pytest.raises(HTTPException, match="rate limit exceeded"):
        client.test_connection()


def test_test_connection_propagates_unexpected_errors() -> None:
    client = BraveClient(api_key="test-key")

    def _mock_search(query: str) -> list[Any]:  # noqa: ARG001
        raise RuntimeError("unexpected parsing bug")

    client.search = _mock_search  # type: ignore[method-assign]

    with pytest.raises(RuntimeError, match="unexpected parsing bug"):
        client.test_connection()


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/websearch/test_web_search_providers.py
================================================
import pytest

from onyx.tools.tool_implementations.web_search.clients.brave_client import (
    BraveClient,
)
from onyx.tools.tool_implementations.web_search.providers import (
    build_search_provider_from_config,
)
from onyx.tools.tool_implementations.web_search.providers import (
    provider_requires_api_key,
)
from shared_configs.enums import WebSearchProviderType


def test_provider_requires_api_key() -> None:
    """Test that provider_requires_api_key correctly identifies which providers need API keys."""
    assert provider_requires_api_key(WebSearchProviderType.EXA) is True
    assert provider_requires_api_key(WebSearchProviderType.BRAVE) is True
    assert provider_requires_api_key(WebSearchProviderType.SERPER) is True
    assert provider_requires_api_key(WebSearchProviderType.GOOGLE_PSE) is True
    assert provider_requires_api_key(WebSearchProviderType.SEARXNG) is False


def test_build_searxng_provider_without_api_key() -> None:
    """Test that SearXNG provider can be built without an API key."""
    provider = build_search_provider_from_config(
        provider_type=WebSearchProviderType.SEARXNG,
        api_key=None,
        config={"searxng_base_url": "http://localhost:8080"},
    )
    assert provider is not None


def test_build_searxng_provider_requires_base_url() -> None:
    """Test that SearXNG provider requires a base URL."""
    with pytest.raises(ValueError, match="Please provide a URL"):
        build_search_provider_from_config(
            provider_type=WebSearchProviderType.SEARXNG,
            api_key=None,
            config={},
        )


def test_build_exa_provider_requires_api_key() -> None:
    """Test that Exa provider requires an API key."""
    with pytest.raises(ValueError, match="API key is required"):
        build_search_provider_from_config(
            provider_type=WebSearchProviderType.EXA,
            api_key=None,
            config={},
        )


def test_build_brave_provider_requires_api_key() -> None:
    """Test that Brave provider requires an API key."""
    with pytest.raises(ValueError, match="API key is required"):
        build_search_provider_from_config(
            provider_type=WebSearchProviderType.BRAVE,
            api_key=None,
            config={},
        )


def test_build_brave_provider_with_optional_config() -> None:
    provider = build_search_provider_from_config(
        provider_type=WebSearchProviderType.BRAVE,
        api_key="test-api-key",
        config={
            "country": "us",
            "search_lang": "en",
            "ui_lang": "en-US",
            "safesearch": "strict",
            "freshness": "pm",
            "timeout_seconds": "12",
        },
    )
    assert isinstance(provider, BraveClient)
    assert provider._country == "US"  # noqa: SLF001
    assert provider._search_lang == "en"  # noqa: SLF001
    assert provider._ui_lang == "en-US"  # noqa: SLF001
    assert provider._safesearch == "strict"  # noqa: SLF001
    assert provider._freshness == "pm"  # noqa: SLF001
    assert provider._timeout_seconds == 12  # noqa: SLF001


def test_build_brave_provider_rejects_invalid_timeout() -> None:
    with pytest.raises(ValueError, match="timeout_seconds"):
        build_search_provider_from_config(
            provider_type=WebSearchProviderType.BRAVE,
            api_key="test-api-key",
            config={"timeout_seconds": "not-an-int"},
        )


def test_build_serper_provider_requires_api_key() -> None:
    """Test that Serper provider requires an API key."""
    with pytest.raises(ValueError, match="API key is required"):
        build_search_provider_from_config(
            provider_type=WebSearchProviderType.SERPER,
            api_key=None,
            config={},
        )


def test_build_google_pse_provider_requires_api_key() -> None:
    """Test that Google PSE provider requires an API key."""
    with pytest.raises(ValueError, match="API key is required"):
        build_search_provider_from_config(
            provider_type=WebSearchProviderType.GOOGLE_PSE,
            api_key=None,
            config={"search_engine_id": "test-cx"},
        )


def test_build_google_pse_provider_requires_search_engine_id() -> None:
    """Test that Google PSE provider requires a search engine ID."""
    with pytest.raises(ValueError, match="search engine id"):
        build_search_provider_from_config(
            provider_type=WebSearchProviderType.GOOGLE_PSE,
            api_key="test-api-key",
            config={},
        )


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/websearch/test_web_search_tool_run.py
================================================
from __future__ import annotations

from typing import Any
from typing import cast
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.server.query_and_chat.placement import Placement
from onyx.tools.models import ToolCallException
from onyx.tools.models import WebSearchToolOverrideKwargs
from onyx.tools.tool_implementations.web_search.models import WebSearchResult
from onyx.tools.tool_implementations.web_search.web_search_tool import (
    _normalize_queries_input,
)
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool


def _make_result(
    title: str = "Title", link: str = "https://example.com"
) -> WebSearchResult:
    return WebSearchResult(title=title, link=link, snippet="snippet")


def _make_tool(mock_provider: Any) -> WebSearchTool:
    """Instantiate WebSearchTool with all DB/provider deps mocked out."""
    provider_model = MagicMock()
    provider_model.provider_type = "brave"
    provider_model.api_key = MagicMock()
    provider_model.api_key.get_value.return_value = "fake-key"
    provider_model.config = {}

    with (
        patch(
            "onyx.tools.tool_implementations.web_search.web_search_tool.get_session_with_current_tenant"
        ) as mock_session_ctx,
        patch(
            "onyx.tools.tool_implementations.web_search.web_search_tool.fetch_active_web_search_provider",
            return_value=provider_model,
        ),
        patch(
            "onyx.tools.tool_implementations.web_search.web_search_tool.build_search_provider_from_config",
            return_value=mock_provider,
        ),
    ):
        mock_session_ctx.return_value.__enter__ = MagicMock(return_value=MagicMock())
        mock_session_ctx.return_value.__exit__ = MagicMock(return_value=False)
        tool = WebSearchTool(tool_id=1, emitter=MagicMock())

    return tool


def _run(tool: WebSearchTool, queries: Any) -> list[str]:
    """Call tool.run() and return the list of query strings passed to provider.search."""
    placement = Placement(turn_index=0, tab_index=0)
    override_kwargs = WebSearchToolOverrideKwargs(starting_citation_num=1)
    tool.run(placement=placement, override_kwargs=override_kwargs, queries=queries)
    search_mock = cast(MagicMock, tool._provider.search)  # noqa: SLF001
    return [call.args[0] for call in search_mock.call_args_list]


class TestNormalizeQueriesInput:
    """Unit tests for _normalize_queries_input (coercion + sanitization)."""

    def test_bare_string_returns_single_element_list(self) -> None:
        assert _normalize_queries_input("hello") == ["hello"]

    def test_bare_string_stripped_and_sanitized(self) -> None:
        assert _normalize_queries_input("  hello  ") == ["hello"]
        # Control chars (e.g. null) removed; no space inserted
        assert _normalize_queries_input("hello\x00world") == ["helloworld"]

    def test_empty_string_returns_empty_list(self) -> None:
        assert _normalize_queries_input("") == []
        assert _normalize_queries_input("   ") == []

    def test_list_of_strings_returned_sanitized(self) -> None:
        assert _normalize_queries_input(["a", "b"]) == ["a", "b"]
        # Leading/trailing space stripped; control chars (e.g. tab) removed
        assert _normalize_queries_input(["  a  ", "b\tb"]) == ["a", "bb"]

    def test_list_none_skipped(self) -> None:
        assert _normalize_queries_input(["a", None, "b"]) == ["a", "b"]

    def test_list_non_string_coerced(self) -> None:
        assert _normalize_queries_input([1, "two"]) == ["1", "two"]

    def test_list_whitespace_only_dropped(self) -> None:
        assert _normalize_queries_input(["a", "", "  ", "b"]) == ["a", "b"]

    def test_non_list_non_string_returns_empty_list(self) -> None:
        assert _normalize_queries_input(42) == []
        assert _normalize_queries_input({}) == []


class TestWebSearchToolRunQueryCoercion:
    def test_list_of_strings_dispatches_each_query(self) -> None:
        """Normal case: list of queries → one search call per query."""
        mock_provider = MagicMock()
        mock_provider.search.return_value = [_make_result()]
        mock_provider.supports_site_filter = False
        tool = _make_tool(mock_provider)

        dispatched = _run(tool, ["python decorators", "python generators"])

        # run_functions_tuples_in_parallel uses a thread pool; call_args_list order is non-deterministic.
        assert sorted(dispatched) == ["python decorators", "python generators"]

    def test_bare_string_dispatches_as_single_query(self) -> None:
        """LLM returns a bare string instead of an array — must NOT be split char-by-char."""
        mock_provider = MagicMock()
        mock_provider.search.return_value = [_make_result()]
        mock_provider.supports_site_filter = False
        tool = _make_tool(mock_provider)

        dispatched = _run(tool, "what is the capital of France")

        assert len(dispatched) == 1
        assert dispatched[0] == "what is the capital of France"

    def test_bare_string_does_not_search_individual_characters(self) -> None:
        """Regression: single-char searches must not occur."""
        mock_provider = MagicMock()
        mock_provider.search.return_value = [_make_result()]
        mock_provider.supports_site_filter = False
        tool = _make_tool(mock_provider)

        dispatched = _run(tool, "hi")
        for query_arg in dispatched:
            assert (
                len(query_arg) > 1
            ), f"Single-character query dispatched: {query_arg!r}"

    def test_control_characters_sanitized_before_dispatch(self) -> None:
        """Queries with control chars have those chars removed before dispatch."""
        mock_provider = MagicMock()
        mock_provider.search.return_value = [_make_result()]
        mock_provider.supports_site_filter = False
        tool = _make_tool(mock_provider)

        dispatched = _run(tool, ["foo\x00bar", "baz\tbaz"])

        # run_functions_tuples_in_parallel uses a thread pool; call_args_list is in
        # execution order, not submission order, so compare in sorted order.
        assert sorted(dispatched) == ["bazbaz", "foobar"]

    def test_all_empty_or_whitespace_raises_tool_call_exception(self) -> None:
        """When normalization yields no valid queries, run() raises ToolCallException."""
        mock_provider = MagicMock()
        mock_provider.supports_site_filter = False
        tool = _make_tool(mock_provider)
        placement = Placement(turn_index=0, tab_index=0)
        override_kwargs = WebSearchToolOverrideKwargs(starting_citation_num=1)

        with pytest.raises(ToolCallException) as exc_info:
            tool.run(
                placement=placement,
                override_kwargs=override_kwargs,
                queries="   ",
            )

        assert "No valid" in str(exc_info.value)
        cast(MagicMock, mock_provider.search).assert_not_called()


================================================
FILE: backend/tests/unit/onyx/tools/tool_implementations/websearch/test_websearch_utils.py
================================================
from pathlib import Path

from onyx.tools.tool_implementations.open_url.models import WebContent
from onyx.tools.tool_implementations.web_search.utils import (
    inference_section_from_internet_page_scrape,
)

CONTENT_FILE = Path(__file__).parent / "data" / "tartan.txt"

# inference_section_from_internet_page_scrape will cull the content to 15000 characters
MAX_NUM_CHARS_WEB_CONTENT = 15000
TRUNCATED_CONTENT_SUFFIX = " [...truncated]"
TRUNCATED_CONTENT_PREFIX = "[...truncated] "


def get_text_from_file(file_path: Path) -> str:
    with open(file_path, "r") as file:
        return file.read()


def get_tartan_text() -> str:
    return get_text_from_file(CONTENT_FILE)


def create_web_content_object(text: str) -> WebContent:
    return WebContent(
        full_content=text,
        title="Tartan",
        link="https://en.wikipedia.org/wiki/Tartan",
        published_date=None,
        scrape_successful=True,
    )


def test_no_snippet_provided() -> None:
    tartan_text = get_tartan_text()
    web_content = create_web_content_object(tartan_text)

    section = inference_section_from_internet_page_scrape(web_content, "")

    # Section will be of length min(MAX_NUM_CHARS_WEB_CONTENT, len(tartan_text))
    assert len(section.combined_content) == MAX_NUM_CHARS_WEB_CONTENT + len(
        TRUNCATED_CONTENT_SUFFIX
    )

    # Get the combined_content without the truncated suffix
    combined_content_without_suffix = section.combined_content[
        :MAX_NUM_CHARS_WEB_CONTENT
    ]

    # Check that we have the first 15000 characters of the tartan text
    assert combined_content_without_suffix == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT]
    assert (
        section.combined_content
        == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT] + TRUNCATED_CONTENT_SUFFIX
    )


def test_snippet_lower_bound_() -> None:
    tartan_text = get_tartan_text()
    web_content = create_web_content_object(tartan_text)

    snippet = (
        'Close-up view of traditional tartan cloth, showing pattern of diagonal "ribs" of colour; '
        "this is a five-colour tartan, in scarlet red, black, yellow..."
    )

    section = inference_section_from_internet_page_scrape(web_content, snippet)

    assert len(section.combined_content) == MAX_NUM_CHARS_WEB_CONTENT + len(
        TRUNCATED_CONTENT_SUFFIX
    )

    no_suffix = section.combined_content[:MAX_NUM_CHARS_WEB_CONTENT]

    assert no_suffix == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT]
    assert section.combined_content == no_suffix + TRUNCATED_CONTENT_SUFFIX


def test_snippet_provided_after_limit() -> None:
    tartan_text = get_tartan_text()
    web_content = create_web_content_object(tartan_text)

    snippet = (
        'Transmutations of the Tartan: Attributed Meanings to Tartan Design"]. '
        "_Textiles as Primary Sources: Proceedings_. First Textile Society of America Symposium."
    )

    section = inference_section_from_internet_page_scrape(web_content, snippet)

    assert (
        len(section.combined_content)
        == len(TRUNCATED_CONTENT_PREFIX) + MAX_NUM_CHARS_WEB_CONTENT
    )

    no_prefix = section.combined_content[len(TRUNCATED_CONTENT_PREFIX) :]
    # We should get the last 15000 characters of the tartan text
    index = len(tartan_text) - MAX_NUM_CHARS_WEB_CONTENT

    assert no_prefix == tartan_text[index:]
    assert section.combined_content == TRUNCATED_CONTENT_PREFIX + no_prefix


def test_snippet_provided_in_middle() -> None:
    tartan_text = get_tartan_text()
    web_content = create_web_content_object(tartan_text)

    snippet = "marketing as a district tartan for Ulster, Scottish weavers (and in two cases English, and in another American)"

    SNIPPET_START_LOCATION_IN_TEXT = 215398

    section = inference_section_from_internet_page_scrape(web_content, snippet)

    assert len(section.combined_content) == len(
        TRUNCATED_CONTENT_PREFIX
    ) + MAX_NUM_CHARS_WEB_CONTENT + len(TRUNCATED_CONTENT_SUFFIX)

    no_prefix = section.combined_content[len(TRUNCATED_CONTENT_PREFIX) :]
    no_affix = no_prefix[:MAX_NUM_CHARS_WEB_CONTENT]

    # expected start index of the snippet
    expected_start_idx = SNIPPET_START_LOCATION_IN_TEXT
    expected_end_idx = expected_start_idx + len(snippet) - 1

    top_padding = (MAX_NUM_CHARS_WEB_CONTENT - len(snippet)) // 2
    bottom_padding = MAX_NUM_CHARS_WEB_CONTENT - len(snippet) - top_padding

    assert (
        no_affix
        == tartan_text[
            expected_start_idx - top_padding : expected_end_idx + bottom_padding + 1
        ]
    )

    assert section.combined_content == (
        TRUNCATED_CONTENT_PREFIX
        + tartan_text[
            expected_start_idx - top_padding : expected_end_idx + bottom_padding + 1
        ]
        + TRUNCATED_CONTENT_SUFFIX
    )


def test_bad_snippet() -> None:
    tartan_text = get_tartan_text()
    web_content = create_web_content_object(tartan_text)

    snippet = "This is a bad snippet"
    # We expect the fallback (from top) to occur
    section = inference_section_from_internet_page_scrape(web_content, snippet)

    # Section will be of length min(MAX_NUM_CHARS_WEB_CONTENT, len(tartan_text))
    assert len(section.combined_content) == MAX_NUM_CHARS_WEB_CONTENT + len(
        TRUNCATED_CONTENT_SUFFIX
    )

    # Get the combined_content without the truncated suffix
    combined_content_without_suffix = section.combined_content[
        :MAX_NUM_CHARS_WEB_CONTENT
    ]

    # Check that we have the first 15000 characters of the tartan text
    assert combined_content_without_suffix == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT]
    assert (
        section.combined_content
        == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT] + TRUNCATED_CONTENT_SUFFIX
    )


def test_similar_snippet_in_middle_fuzzy_match() -> None:
    tartan_text = get_tartan_text()
    web_content = create_web_content_object(tartan_text)

    # In the actual text, the word "English" is used instead of "British"
    # This is very similar though, so we expect a fuzzy match to occur
    snippet = "marketing as a district tartan for Ulster, Scottish weavers (and in two cases British, and in another American)"

    SNIPPET_START_LOCATION_IN_TEXT = 215398

    section = inference_section_from_internet_page_scrape(web_content, snippet)

    assert len(section.combined_content) == len(
        TRUNCATED_CONTENT_PREFIX
    ) + MAX_NUM_CHARS_WEB_CONTENT + len(TRUNCATED_CONTENT_SUFFIX)

    no_prefix = section.combined_content[len(TRUNCATED_CONTENT_PREFIX) :]
    no_affix = no_prefix[:MAX_NUM_CHARS_WEB_CONTENT]

    # expected start index of the snippet
    expected_start_idx = SNIPPET_START_LOCATION_IN_TEXT
    expected_end_idx = expected_start_idx + len(snippet) - 1

    top_padding = (MAX_NUM_CHARS_WEB_CONTENT - len(snippet)) // 2
    bottom_padding = MAX_NUM_CHARS_WEB_CONTENT - len(snippet) - top_padding

    assert (
        no_affix
        == tartan_text[
            expected_start_idx - top_padding : expected_end_idx + bottom_padding + 1
        ]
    )

    assert section.combined_content == (
        TRUNCATED_CONTENT_PREFIX
        + tartan_text[
            expected_start_idx - top_padding : expected_end_idx + bottom_padding + 1
        ]
        + TRUNCATED_CONTENT_SUFFIX
    )


================================================
FILE: backend/tests/unit/onyx/tracing/__init__.py
================================================


================================================
FILE: backend/tests/unit/onyx/tracing/test_tracing_setup.py
================================================
"""Unit tests for tracing setup functions."""

import importlib
import os
from unittest.mock import patch

from onyx.configs import app_configs
from onyx.tracing import setup as tracing_setup


def test_setup_tracing_with_no_creds() -> None:
    """Test that setup_tracing returns empty list when no credentials are configured."""
    # Ensure no tracing credentials are set
    os.environ.pop("BRAINTRUST_API_KEY", None)
    os.environ.pop("LANGFUSE_SECRET_KEY", None)
    os.environ.pop("LANGFUSE_PUBLIC_KEY", None)

    # Reload modules to pick up environment changes
    importlib.reload(app_configs)
    importlib.reload(tracing_setup)

    # Reset the initialized flag
    tracing_setup._initialized = False

    # Call the function - should return empty list
    result = tracing_setup.setup_tracing()
    assert result == []


def test_setup_tracing_is_idempotent() -> None:
    """Test that setup_tracing only initializes once."""
    # Ensure no tracing credentials are set
    os.environ.pop("BRAINTRUST_API_KEY", None)
    os.environ.pop("LANGFUSE_SECRET_KEY", None)
    os.environ.pop("LANGFUSE_PUBLIC_KEY", None)

    # Reload modules
    importlib.reload(app_configs)
    importlib.reload(tracing_setup)

    # Reset the initialized flag
    tracing_setup._initialized = False

    # First call
    tracing_setup.setup_tracing()

    # Second call should return empty (already initialized)
    result2 = tracing_setup.setup_tracing()
    assert result2 == []

    # Clean up
    tracing_setup._initialized = False


def test_setup_tracing_with_braintrust_creds() -> None:
    """Test that setup_tracing initializes Braintrust when credentials are available."""
    # Set Braintrust credentials
    os.environ["BRAINTRUST_API_KEY"] = "test-api-key"
    os.environ["BRAINTRUST_PROJECT"] = "test-project"
    os.environ.pop("LANGFUSE_SECRET_KEY", None)
    os.environ.pop("LANGFUSE_PUBLIC_KEY", None)

    # Reload modules to pick up new environment variables
    importlib.reload(app_configs)
    importlib.reload(tracing_setup)

    # Reset the initialized flag
    tracing_setup._initialized = False

    # Mock the _setup_braintrust function to avoid actual initialization
    with patch.object(tracing_setup, "_setup_braintrust") as mock_setup:
        result = tracing_setup.setup_tracing()
        mock_setup.assert_called_once()
        assert "braintrust" in result

    # Clean up
    os.environ.pop("BRAINTRUST_API_KEY", None)
    os.environ.pop("BRAINTRUST_PROJECT", None)
    tracing_setup._initialized = False
    importlib.reload(app_configs)


def test_setup_tracing_with_langfuse_creds() -> None:
    """Test that setup_tracing initializes Langfuse when credentials are available."""
    # Set Langfuse credentials
    os.environ["LANGFUSE_SECRET_KEY"] = "test-secret-key"
    os.environ["LANGFUSE_PUBLIC_KEY"] = "test-public-key"
    os.environ.pop("BRAINTRUST_API_KEY", None)

    # Reload modules to pick up new environment variables
    importlib.reload(app_configs)
    importlib.reload(tracing_setup)

    # Reset the initialized flag
    tracing_setup._initialized = False

    # Mock the _setup_langfuse function to avoid actual initialization
    with patch.object(tracing_setup, "_setup_langfuse") as mock_setup:
        result = tracing_setup.setup_tracing()
        mock_setup.assert_called_once()
        assert "langfuse" in result

    # Clean up
    os.environ.pop("LANGFUSE_SECRET_KEY", None)
    os.environ.pop("LANGFUSE_PUBLIC_KEY", None)
    tracing_setup._initialized = False
    importlib.reload(app_configs)


def test_setup_tracing_with_both_providers() -> None:
    """Test that setup_tracing initializes both providers when both credentials are available."""
    # Set both credentials
    os.environ["BRAINTRUST_API_KEY"] = "test-api-key"
    os.environ["BRAINTRUST_PROJECT"] = "test-project"
    os.environ["LANGFUSE_SECRET_KEY"] = "test-secret-key"
    os.environ["LANGFUSE_PUBLIC_KEY"] = "test-public-key"

    # Reload modules to pick up new environment variables
    importlib.reload(app_configs)
    importlib.reload(tracing_setup)

    # Reset the initialized flag
    tracing_setup._initialized = False

    # Mock both setup functions to avoid actual initialization
    with (
        patch.object(tracing_setup, "_setup_braintrust") as mock_bt,
        patch.object(tracing_setup, "_setup_langfuse") as mock_lf,
    ):
        result = tracing_setup.setup_tracing()
        mock_bt.assert_called_once()
        mock_lf.assert_called_once()
        assert "braintrust" in result
        assert "langfuse" in result

    # Clean up
    os.environ.pop("BRAINTRUST_API_KEY", None)
    os.environ.pop("BRAINTRUST_PROJECT", None)
    os.environ.pop("LANGFUSE_SECRET_KEY", None)
    os.environ.pop("LANGFUSE_PUBLIC_KEY", None)
    tracing_setup._initialized = False
    importlib.reload(app_configs)


================================================
FILE: backend/tests/unit/onyx/utils/test_gpu_utils.py
================================================
"""
Test cases for onyx/utils/gpu_utils.py with DISABLE_MODEL_SERVER environment variable
"""

import os
from unittest import TestCase
from unittest.mock import MagicMock
from unittest.mock import patch

import requests

from onyx.utils.gpu_utils import _get_gpu_status_from_model_server


class TestGPUUtils(TestCase):
    """Test cases for GPU utilities with DISABLE_MODEL_SERVER support"""

    @patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "true"})
    def test_disable_model_server_true(self) -> None:
        """Test that GPU status returns False when DISABLE_MODEL_SERVER is true"""
        result = _get_gpu_status_from_model_server(indexing=False)
        assert result is False

    @patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "True"})
    def test_disable_model_server_capital_true(self) -> None:
        """Test that GPU status returns False when DISABLE_MODEL_SERVER is True (capital)"""
        # "True" WILL trigger disable because .lower() is called
        result = _get_gpu_status_from_model_server(indexing=False)
        assert result is False

    @patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "1"})
    @patch("requests.get")
    def test_disable_model_server_one(self, mock_get: MagicMock) -> None:
        """Test that GPU status makes request when DISABLE_MODEL_SERVER is 1"""
        # "1" should NOT trigger disable (only "true" should)
        mock_response = MagicMock()
        mock_response.json.return_value = {"gpu_available": True}
        mock_get.return_value = mock_response

        result = _get_gpu_status_from_model_server(indexing=False)
        assert result is True
        mock_get.assert_called_once()

    @patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "yes"})
    @patch("requests.get")
    def test_disable_model_server_yes(self, mock_get: MagicMock) -> None:
        """Test that GPU status makes request when DISABLE_MODEL_SERVER is yes"""
        # "yes" should NOT trigger disable (only "true" should)
        mock_response = MagicMock()
        mock_response.json.return_value = {"gpu_available": False}
        mock_get.return_value = mock_response

        result = _get_gpu_status_from_model_server(indexing=True)
        assert result is False
        mock_get.assert_called_once()

    @patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "false"})
    @patch("requests.get")
    def test_disable_model_server_false(self, mock_get: MagicMock) -> None:
        """Test that GPU status makes request when DISABLE_MODEL_SERVER is false"""
        mock_response = MagicMock()
        mock_response.json.return_value = {"gpu_available": True}
        mock_get.return_value = mock_response

        result = _get_gpu_status_from_model_server(indexing=True)
        assert result is True
        mock_get.assert_called_once()

    @patch.dict(os.environ, {}, clear=True)
    @patch("requests.get")
    def test_disable_model_server_not_set(self, mock_get: MagicMock) -> None:
        """Test that GPU status makes request when DISABLE_MODEL_SERVER is not set"""
        mock_response = MagicMock()
        mock_response.json.return_value = {"gpu_available": False}
        mock_get.return_value = mock_response

        result = _get_gpu_status_from_model_server(indexing=False)
        assert result is False
        mock_get.assert_called_once()

    @patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "true"})
    def test_disabled_host_fallback(self) -> None:
        """Test that disabled host is handled correctly via environment variable"""
        result = _get_gpu_status_from_model_server(indexing=True)
        assert result is False

    @patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "false"})
    @patch("requests.get")
    def test_request_exception_handling(self, mock_get: MagicMock) -> None:
        """Test that exceptions are properly raised when GPU status request fails"""
        mock_get.side_effect = requests.RequestException("Connection error")

        with self.assertRaises(requests.RequestException):
            _get_gpu_status_from_model_server(indexing=False)

    @patch.dict(os.environ, {"DISABLE_MODEL_SERVER": "true"})
    @patch("requests.get")
    def test_gpu_status_request_with_disable(self, mock_get: MagicMock) -> None:
        """Test that no request is made when DISABLE_MODEL_SERVER is true"""
        result = _get_gpu_status_from_model_server(indexing=True)
        assert result is False
        # Verify that no HTTP request was made
        mock_get.assert_not_called()


================================================
FILE: backend/tests/unit/onyx/utils/test_json_river.py
================================================
"""Tests for the jsonriver incremental JSON parser."""

import json

import pytest

from onyx.utils.jsonriver import JsonValue
from onyx.utils.jsonriver import Parser


def _all_deltas(chunks: list[str]) -> list[JsonValue]:
    """Feed chunks one at a time and collect all emitted deltas."""
    parser = Parser()
    deltas: list[JsonValue] = []
    for chunk in chunks:
        deltas.extend(parser.feed(chunk))
    deltas.extend(parser.finish())
    return deltas


class TestParseComplete:
    """Parsing complete JSON in a single chunk."""

    def test_simple_object(self) -> None:
        deltas = _all_deltas(['{"a": 1}'])
        assert any(r == {"a": 1.0} or r == {"a": 1} for r in deltas)

    def test_simple_array(self) -> None:
        deltas = _all_deltas(["[1, 2, 3]"])
        assert any(isinstance(r, list) for r in deltas)

    def test_simple_string(self) -> None:
        deltas = _all_deltas(['"hello"'])
        assert "hello" in deltas or any("hello" in str(r) for r in deltas)

    def test_null(self) -> None:
        deltas = _all_deltas(["null"])
        assert None in deltas

    def test_boolean_true(self) -> None:
        deltas = _all_deltas(["true"])
        assert True in deltas

    def test_boolean_false(self) -> None:
        deltas = _all_deltas(["false"])
        assert any(r is False for r in deltas)

    def test_number(self) -> None:
        deltas = _all_deltas(["42"])
        assert 42.0 in deltas

    def test_negative_number(self) -> None:
        deltas = _all_deltas(["-3.14"])
        assert any(abs(r - (-3.14)) < 1e-10 for r in deltas if isinstance(r, float))

    def test_empty_object(self) -> None:
        deltas = _all_deltas(["{}"])
        assert {} in deltas

    def test_empty_array(self) -> None:
        deltas = _all_deltas(["[]"])
        assert [] in deltas


class TestStreamingDeltas:
    """Incremental feeding produces correct deltas."""

    def test_object_string_value_streamed_char_by_char(self) -> None:
        chunks = list('{"code": "abc"}')
        deltas = _all_deltas(chunks)
        str_parts = []
        for d in deltas:
            if isinstance(d, dict) and "code" in d:
                val = d["code"]
                if isinstance(val, str):
                    str_parts.append(val)
        assert "".join(str_parts) == "abc"

    def test_object_streamed_in_two_halves(self) -> None:
        deltas = _all_deltas(['{"name": "Al', 'ice"}'])
        str_parts = []
        for d in deltas:
            if isinstance(d, dict) and "name" in d:
                val = d["name"]
                if isinstance(val, str):
                    str_parts.append(val)
        assert "".join(str_parts) == "Alice"

    def test_multiple_keys_streamed(self) -> None:
        deltas = _all_deltas(['{"a": "x', '", "b": "y"}'])
        a_parts: list[str] = []
        b_parts: list[str] = []
        for d in deltas:
            if isinstance(d, dict):
                if "a" in d and isinstance(d["a"], str):
                    a_parts.append(d["a"])
                if "b" in d and isinstance(d["b"], str):
                    b_parts.append(d["b"])
        assert "".join(a_parts) == "x"
        assert "".join(b_parts) == "y"

    def test_deltas_only_contain_new_string_content(self) -> None:
        parser = Parser()
        d1 = parser.feed('{"msg": "hel')
        d2 = parser.feed('lo"}')
        parser.finish()

        msg_parts = []
        for d in d1 + d2:
            if isinstance(d, dict) and "msg" in d:
                val = d["msg"]
                if isinstance(val, str):
                    msg_parts.append(val)
        assert "".join(msg_parts) == "hello"

        # Each delta should only contain new chars, not repeat previous ones
        if len(msg_parts) == 2:
            assert msg_parts[0] == "hel"
            assert msg_parts[1] == "lo"


class TestEscapeSequences:
    """JSON escape sequences are decoded correctly, even across chunk boundaries."""

    def test_newline_escape(self) -> None:
        deltas = _all_deltas(['{"text": "line1\\nline2"}'])
        text_parts = []
        for d in deltas:
            if isinstance(d, dict) and "text" in d and isinstance(d["text"], str):
                text_parts.append(d["text"])
        assert "".join(text_parts) == "line1\nline2"

    def test_tab_escape(self) -> None:
        deltas = _all_deltas(['{"t": "a\\tb"}'])
        parts = []
        for d in deltas:
            if isinstance(d, dict) and "t" in d and isinstance(d["t"], str):
                parts.append(d["t"])
        assert "".join(parts) == "a\tb"

    def test_escaped_quote(self) -> None:
        deltas = _all_deltas(['{"q": "say \\"hi\\""}'])
        parts = []
        for d in deltas:
            if isinstance(d, dict) and "q" in d and isinstance(d["q"], str):
                parts.append(d["q"])
        assert "".join(parts) == 'say "hi"'

    def test_unicode_escape(self) -> None:
        deltas = _all_deltas(['{"u": "\\u0041\\u0042"}'])
        parts = []
        for d in deltas:
            if isinstance(d, dict) and "u" in d and isinstance(d["u"], str):
                parts.append(d["u"])
        assert "".join(parts) == "AB"

    def test_escape_split_across_chunks(self) -> None:
        deltas = _all_deltas(['{"x": "a\\', 'nb"}'])
        parts = []
        for d in deltas:
            if isinstance(d, dict) and "x" in d and isinstance(d["x"], str):
                parts.append(d["x"])
        assert "".join(parts) == "a\nb"

    def test_unicode_escape_split_across_chunks(self) -> None:
        deltas = _all_deltas(['{"u": "\\u00', '41"}'])
        parts = []
        for d in deltas:
            if isinstance(d, dict) and "u" in d and isinstance(d["u"], str):
                parts.append(d["u"])
        assert "".join(parts) == "A"

    def test_backslash_escape(self) -> None:
        deltas = _all_deltas(['{"p": "c:\\\\dir"}'])
        parts = []
        for d in deltas:
            if isinstance(d, dict) and "p" in d and isinstance(d["p"], str):
                parts.append(d["p"])
        assert "".join(parts) == "c:\\dir"


class TestNestedStructures:
    """Nested objects and arrays produce correct deltas."""

    def test_nested_object(self) -> None:
        deltas = _all_deltas(['{"outer": {"inner": "val"}}'])
        found = False
        for d in deltas:
            if isinstance(d, dict) and "outer" in d:
                outer = d["outer"]
                if isinstance(outer, dict) and "inner" in outer:
                    found = True
        assert found

    def test_array_of_strings(self) -> None:
        deltas = _all_deltas(['["a', '", "b"]'])
        all_items: list[str] = []
        for d in deltas:
            if isinstance(d, list):
                for item in d:
                    if isinstance(item, str):
                        all_items.append(item)
            elif isinstance(d, str):
                all_items.append(d)
        joined = "".join(all_items)
        assert "a" in joined
        assert "b" in joined

    def test_object_with_number_and_bool(self) -> None:
        deltas = _all_deltas(['{"count": 42, "active": true}'])
        has_count = False
        has_active = False
        for d in deltas:
            if isinstance(d, dict):
                if "count" in d and d["count"] == 42.0:
                    has_count = True
                if "active" in d and d["active"] is True:
                    has_active = True
        assert has_count
        assert has_active

    def test_object_with_null_value(self) -> None:
        deltas = _all_deltas(['{"key": null}'])
        found = False
        for d in deltas:
            if isinstance(d, dict) and "key" in d and d["key"] is None:
                found = True
        assert found


class TestComputeDelta:
    """Direct tests for the _compute_delta static method."""

    def test_none_prev_returns_current(self) -> None:
        assert Parser._compute_delta(None, {"a": "b"}) == {"a": "b"}

    def test_string_delta(self) -> None:
        assert Parser._compute_delta("hel", "hello") == "lo"

    def test_string_no_change(self) -> None:
        assert Parser._compute_delta("same", "same") is None

    def test_dict_new_key(self) -> None:
        assert Parser._compute_delta({"a": "x"}, {"a": "x", "b": "y"}) == {"b": "y"}

    def test_dict_string_append(self) -> None:
        assert Parser._compute_delta({"code": "def"}, {"code": "def hello()"}) == {
            "code": " hello()"
        }

    def test_dict_no_change(self) -> None:
        assert Parser._compute_delta({"a": 1}, {"a": 1}) is None

    def test_list_new_items(self) -> None:
        assert Parser._compute_delta([1, 2], [1, 2, 3]) == [3]

    def test_list_last_item_updated(self) -> None:
        assert Parser._compute_delta(["a"], ["ab"]) == ["ab"]

    def test_list_no_change(self) -> None:
        assert Parser._compute_delta([1, 2], [1, 2]) is None

    def test_primitive_change(self) -> None:
        assert Parser._compute_delta(1, 2) == 2

    def test_primitive_no_change(self) -> None:
        assert Parser._compute_delta(42, 42) is None


class TestParserLifecycle:
    """Edge cases around parser state and lifecycle."""

    def test_feed_after_finish_returns_empty(self) -> None:
        parser = Parser()
        parser.feed('{"a": 1}')
        parser.finish()
        assert parser.feed("more") == []

    def test_empty_feed_returns_empty(self) -> None:
        parser = Parser()
        assert parser.feed("") == []

    def test_whitespace_only_returns_empty(self) -> None:
        parser = Parser()
        assert parser.feed("   ") == []

    def test_finish_with_trailing_whitespace(self) -> None:
        parser = Parser()
        # Trailing whitespace terminates the number, so feed() emits it
        deltas = parser.feed("42  ")
        assert 42.0 in deltas
        parser.finish()  # Should not raise

    def test_finish_with_trailing_content_raises(self) -> None:
        parser = Parser()
        # Feed a complete JSON value followed by non-whitespace in one chunk
        parser.feed('{"a": 1} extra')
        with pytest.raises(ValueError, match="Unexpected trailing"):
            parser.finish()

    def test_finish_flushes_pending_number(self) -> None:
        parser = Parser()
        deltas = parser.feed("42")
        # Number has no terminator, so feed() can't emit it yet
        assert deltas == []
        final = parser.finish()
        assert 42.0 in final


class TestToolCallSimulation:
    """Simulate the LLM tool-call streaming use case."""

    def test_python_tool_call_streaming(self) -> None:
        full_json = json.dumps({"code": "print('hello world')"})
        chunk_size = 5
        chunks = [
            full_json[i : i + chunk_size] for i in range(0, len(full_json), chunk_size)
        ]

        parser = Parser()
        code_parts: list[str] = []
        for chunk in chunks:
            for delta in parser.feed(chunk):
                if isinstance(delta, dict) and "code" in delta:
                    val = delta["code"]
                    if isinstance(val, str):
                        code_parts.append(val)
        for delta in parser.finish():
            if isinstance(delta, dict) and "code" in delta:
                val = delta["code"]
                if isinstance(val, str):
                    code_parts.append(val)
        assert "".join(code_parts) == "print('hello world')"

    def test_multi_arg_tool_call(self) -> None:
        full = '{"query": "search term", "num_results": 5}'
        chunks = [full[:15], full[15:30], full[30:]]

        parser = Parser()
        query_parts: list[str] = []
        has_num_results = False
        for chunk in chunks:
            for delta in parser.feed(chunk):
                if isinstance(delta, dict):
                    if "query" in delta and isinstance(delta["query"], str):
                        query_parts.append(delta["query"])
                    if "num_results" in delta:
                        has_num_results = True
        for delta in parser.finish():
            if isinstance(delta, dict):
                if "query" in delta and isinstance(delta["query"], str):
                    query_parts.append(delta["query"])
                if "num_results" in delta:
                    has_num_results = True
        assert "".join(query_parts) == "search term"
        assert has_num_results

    def test_code_with_newlines_and_escapes(self) -> None:
        code = 'def greet(name):\n    print(f"Hello, {name}!")\n    return True'
        full = json.dumps({"code": code})
        chunk_size = 8
        chunks = [full[i : i + chunk_size] for i in range(0, len(full), chunk_size)]

        parser = Parser()
        code_parts: list[str] = []
        for chunk in chunks:
            for delta in parser.feed(chunk):
                if isinstance(delta, dict) and "code" in delta:
                    val = delta["code"]
                    if isinstance(val, str):
                        code_parts.append(val)
        for delta in parser.finish():
            if isinstance(delta, dict) and "code" in delta:
                val = delta["code"]
                if isinstance(val, str):
                    code_parts.append(val)
        assert "".join(code_parts) == code

    def test_single_char_streaming(self) -> None:
        full = '{"key": "value"}'
        parser = Parser()
        key_parts: list[str] = []
        for ch in full:
            for delta in parser.feed(ch):
                if isinstance(delta, dict) and "key" in delta:
                    val = delta["key"]
                    if isinstance(val, str):
                        key_parts.append(val)
        for delta in parser.finish():
            if isinstance(delta, dict) and "key" in delta:
                val = delta["key"]
                if isinstance(val, str):
                    key_parts.append(val)
        assert "".join(key_parts) == "value"


================================================
FILE: backend/tests/unit/onyx/utils/test_postgres_sanitization.py
================================================
from pytest import MonkeyPatch

from onyx.access.models import ExternalAccess
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentSource
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import IndexAttemptMetadata
from onyx.connectors.models import TextSection
from onyx.db.enums import HierarchyNodeType
from onyx.indexing import indexing_pipeline
from onyx.utils.postgres_sanitization import sanitize_document_for_postgres
from onyx.utils.postgres_sanitization import sanitize_hierarchy_node_for_postgres
from onyx.utils.postgres_sanitization import sanitize_json_like
from onyx.utils.postgres_sanitization import sanitize_string


# ---- sanitize_string tests ----


def test_sanitize_string_strips_nul_bytes() -> None:
    assert sanitize_string("hello\x00world") == "helloworld"
    assert sanitize_string("\x00\x00\x00") == ""
    assert sanitize_string("clean") == "clean"


def test_sanitize_string_strips_high_surrogates() -> None:
    assert sanitize_string("before\ud800after") == "beforeafter"
    assert sanitize_string("a\udbffb") == "ab"


def test_sanitize_string_strips_low_surrogates() -> None:
    assert sanitize_string("before\udc00after") == "beforeafter"
    assert sanitize_string("a\udfffb") == "ab"


def test_sanitize_string_strips_nul_and_surrogates_together() -> None:
    assert sanitize_string("he\x00llo\ud800 wo\udfffrld\x00") == "hello world"


def test_sanitize_string_preserves_valid_unicode() -> None:
    assert sanitize_string("café ☕ 日本語 😀") == "café ☕ 日本語 😀"


def test_sanitize_string_empty_input() -> None:
    assert sanitize_string("") == ""


# ---- sanitize_json_like tests ----


def test_sanitize_json_like_handles_plain_string() -> None:
    assert sanitize_json_like("he\x00llo\ud800") == "hello"


def test_sanitize_json_like_handles_nested_dict() -> None:
    dirty = {
        "ke\x00y": "va\ud800lue",
        "nested": {"inne\x00r": "de\udfffep"},
    }
    assert sanitize_json_like(dirty) == {
        "key": "value",
        "nested": {"inner": "deep"},
    }


def test_sanitize_json_like_handles_list_with_surrogates() -> None:
    dirty = ["a\x00", "b\ud800", {"c\udc00": "d\udfff"}]
    assert sanitize_json_like(dirty) == ["a", "b", {"c": "d"}]


def test_sanitize_json_like_handles_tuple() -> None:
    dirty = ("a\x00", "b\ud800")
    assert sanitize_json_like(dirty) == ("a", "b")


def test_sanitize_json_like_passes_through_non_strings() -> None:
    assert sanitize_json_like(42) == 42
    assert sanitize_json_like(3.14) == 3.14
    assert sanitize_json_like(True) is True
    assert sanitize_json_like(None) is None


# ---- sanitize_document_for_postgres tests ----


def test_sanitize_document_for_postgres_removes_nul_bytes() -> None:
    document = Document(
        id="doc\x00-id",
        source=DocumentSource.FILE,
        semantic_identifier="sem\x00-id",
        title="ti\x00tle",
        parent_hierarchy_raw_node_id="parent\x00-id",
        sections=[TextSection(link="lin\x00k", text="te\x00xt")],
        metadata={"ke\x00y": "va\x00lue", "list\x00key": ["a\x00", "b"]},
        doc_metadata={
            "j\x00son": {
                "in\x00ner": "va\x00l",
                "arr": ["x\x00", {"dee\x00p": "y\x00"}],
            }
        },
        primary_owners=[BasicExpertInfo(display_name="Ali\x00ce", email="a\x00@x.com")],
        secondary_owners=[BasicExpertInfo(first_name="Bo\x00b", last_name="Sm\x00ith")],
        external_access=ExternalAccess(
            external_user_emails={"user\x00@example.com"},
            external_user_group_ids={"gro\x00up-1"},
            is_public=False,
        ),
    )

    sanitized = sanitize_document_for_postgres(document)

    assert sanitized.id == "doc-id"
    assert sanitized.semantic_identifier == "sem-id"
    assert sanitized.title == "title"
    assert sanitized.parent_hierarchy_raw_node_id == "parent-id"
    assert sanitized.sections[0].link == "link"
    assert sanitized.sections[0].text == "text"
    assert sanitized.metadata == {"key": "value", "listkey": ["a", "b"]}
    assert sanitized.doc_metadata == {
        "json": {"inner": "val", "arr": ["x", {"deep": "y"}]}
    }
    assert sanitized.primary_owners is not None
    assert sanitized.primary_owners[0].display_name == "Alice"
    assert sanitized.primary_owners[0].email == "a@x.com"
    assert sanitized.secondary_owners is not None
    assert sanitized.secondary_owners[0].first_name == "Bob"
    assert sanitized.secondary_owners[0].last_name == "Smith"
    assert sanitized.external_access is not None
    assert sanitized.external_access.external_user_emails == {"user@example.com"}
    assert sanitized.external_access.external_user_group_ids == {"group-1"}

    # Ensure original document is not mutated
    assert document.id == "doc\x00-id"
    assert document.metadata == {"ke\x00y": "va\x00lue", "list\x00key": ["a\x00", "b"]}


def test_sanitize_hierarchy_node_for_postgres_removes_nul_bytes() -> None:
    node = HierarchyNode(
        raw_node_id="raw\x00-id",
        raw_parent_id="paren\x00t-id",
        display_name="fol\x00der",
        link="https://exa\x00mple.com",
        node_type=HierarchyNodeType.FOLDER,
        external_access=ExternalAccess(
            external_user_emails={"a\x00@example.com"},
            external_user_group_ids={"g\x00-1"},
            is_public=True,
        ),
    )

    sanitized = sanitize_hierarchy_node_for_postgres(node)

    assert sanitized.raw_node_id == "raw-id"
    assert sanitized.raw_parent_id == "parent-id"
    assert sanitized.display_name == "folder"
    assert sanitized.link == "https://example.com"
    assert sanitized.external_access is not None
    assert sanitized.external_access.external_user_emails == {"a@example.com"}
    assert sanitized.external_access.external_user_group_ids == {"g-1"}


def test_index_doc_batch_prepare_sanitizes_before_db_ops(
    monkeypatch: MonkeyPatch,
) -> None:
    document = Document(
        id="doc\x00id",
        source=DocumentSource.FILE,
        semantic_identifier="sem\x00id",
        sections=[TextSection(text="content", link="li\x00nk")],
        metadata={"ke\x00y": "va\x00lue"},
    )

    captured: dict[str, object] = {}

    def _get_documents_by_ids(db_session: object, document_ids: list[str]) -> list:
        _ = db_session, document_ids
        return []

    monkeypatch.setattr(
        indexing_pipeline, "get_documents_by_ids", _get_documents_by_ids
    )

    def _capture_upsert_documents_in_db(**kwargs: object) -> None:
        captured["upsert_documents"] = kwargs["documents"]

    monkeypatch.setattr(
        indexing_pipeline, "_upsert_documents_in_db", _capture_upsert_documents_in_db
    )

    def _capture_doc_cc_pair(*args: object) -> None:
        captured["cc_pair_doc_ids"] = args[3]

    monkeypatch.setattr(
        indexing_pipeline,
        "upsert_document_by_connector_credential_pair",
        _capture_doc_cc_pair,
    )

    def _noop_link_hierarchy_nodes_to_documents(
        db_session: object,
        document_ids: list[str],
        source: DocumentSource,
        commit: bool,
    ) -> int:
        _ = db_session, document_ids, source, commit
        return 0

    monkeypatch.setattr(
        indexing_pipeline,
        "link_hierarchy_nodes_to_documents",
        _noop_link_hierarchy_nodes_to_documents,
    )

    context = indexing_pipeline.index_doc_batch_prepare(
        documents=[document],
        index_attempt_metadata=IndexAttemptMetadata(connector_id=1, credential_id=2),
        db_session=object(),  # type: ignore[arg-type]
        ignore_time_skip=True,
    )

    assert context is not None
    assert context.updatable_docs[0].id == "docid"
    assert context.updatable_docs[0].semantic_identifier == "semid"
    assert context.updatable_docs[0].metadata == {"key": "value"}
    assert captured["cc_pair_doc_ids"] == ["docid"]

    upsert_documents = captured["upsert_documents"]
    assert isinstance(upsert_documents, list)
    assert upsert_documents[0].id == "docid"


================================================
FILE: backend/tests/unit/onyx/utils/test_sensitive.py
================================================
"""Tests for SensitiveValue wrapper class."""

import json
from typing import Any

import pytest

from onyx.utils.sensitive import SensitiveAccessError
from onyx.utils.sensitive import SensitiveValue


def _encrypt_string(value: str) -> bytes:
    """Simple mock encryption (just encoding for tests)."""
    return value.encode("utf-8")


def _decrypt_string(value: bytes) -> str:
    """Simple mock decryption (just decoding for tests)."""
    return value.decode("utf-8")


class TestSensitiveValueString:
    """Tests for SensitiveValue with string values."""

    def test_get_value_raw(self) -> None:
        """Test getting raw unmasked value."""
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("my-secret-token"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        assert sensitive.get_value(apply_mask=False) == "my-secret-token"

    def test_get_value_masked(self) -> None:
        """Test getting masked value with default masking."""
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("my-very-long-secret-token-here"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        result = sensitive.get_value(apply_mask=True)
        # Default mask_string shows first 4 and last 4 chars
        assert result == "my-v...here"

    def test_get_value_masked_short_string(self) -> None:
        """Test that short strings are fully masked."""
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("short"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        result = sensitive.get_value(apply_mask=True)
        # Short strings get fully masked
        assert result == "••••••••••••"

    def test_get_value_custom_mask_fn(self) -> None:
        """Test using a custom masking function."""
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        result = sensitive.get_value(
            apply_mask=True,
            mask_fn=lambda x: "REDACTED",  # noqa: ARG005
        )
        assert result == "REDACTED"

    def test_str_raises_error(self) -> None:
        """Test that str() raises SensitiveAccessError."""
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        with pytest.raises(SensitiveAccessError):
            str(sensitive)

    def test_repr_is_safe(self) -> None:
        """Test that repr() doesn't expose the value."""
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        result = repr(sensitive)
        assert "secret" not in result
        assert "SensitiveValue" in result
        assert "get_value" in result

    def test_iter_raises_error(self) -> None:
        """Test that iteration raises SensitiveAccessError."""
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        with pytest.raises(SensitiveAccessError):
            for _ in sensitive:  # type: ignore[attr-defined]
                pass

    def test_getitem_raises_error(self) -> None:
        """Test that subscript access raises SensitiveAccessError."""
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        with pytest.raises(SensitiveAccessError):
            _ = sensitive[0]

    def test_bool_returns_true(self) -> None:
        """Test that bool() works for truthiness checks."""
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        assert bool(sensitive) is True

    def test_equality_with_same_value(self) -> None:
        """Test equality comparison between SensitiveValues with same encrypted bytes."""
        encrypted = _encrypt_string("secret")
        sensitive1 = SensitiveValue(
            encrypted_bytes=encrypted,
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        sensitive2 = SensitiveValue(
            encrypted_bytes=encrypted,
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        assert sensitive1 == sensitive2

    def test_equality_with_different_value(self) -> None:
        """Test equality comparison between SensitiveValues with different encrypted bytes."""
        sensitive1 = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret1"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        sensitive2 = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret2"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        assert sensitive1 != sensitive2

    def test_equality_with_non_sensitive_returns_not_equal(self) -> None:
        """Test that comparing with non-SensitiveValue is always not-equal.

        Returns NotImplemented so Python falls back to identity comparison.
        This is required for compatibility with SQLAlchemy's attribute tracking.
        """
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret"),
            decrypt_fn=_decrypt_string,
            is_json=False,
        )
        assert not (sensitive == "secret")


class TestSensitiveValueJson:
    """Tests for SensitiveValue with JSON/dict values."""

    def test_get_value_raw_dict(self) -> None:
        """Test getting raw unmasked dict value."""
        data: dict[str, Any] = {"api_key": "secret-key", "username": "user123"}
        sensitive: SensitiveValue[dict[str, Any]] = SensitiveValue(
            encrypted_bytes=_encrypt_string(json.dumps(data)),
            decrypt_fn=_decrypt_string,
            is_json=True,
        )
        result = sensitive.get_value(apply_mask=False)
        assert result == data

    def test_get_value_masked_dict(self) -> None:
        """Test getting masked dict value with default masking."""
        data = {"api_key": "my-very-long-api-key-value", "username": "user123456789"}
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string(json.dumps(data)),
            decrypt_fn=_decrypt_string,
            is_json=True,
        )
        result = sensitive.get_value(apply_mask=True)
        # Values should be masked
        assert "my-very-long-api-key-value" not in str(result)
        assert "user123456789" not in str(result)

    def test_getitem_raises_error_for_dict(self) -> None:
        """Test that subscript access raises SensitiveAccessError for dict."""
        data = {"api_key": "secret"}
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string(json.dumps(data)),
            decrypt_fn=_decrypt_string,
            is_json=True,
        )
        with pytest.raises(SensitiveAccessError):
            _ = sensitive["api_key"]

    def test_iter_raises_error_for_dict(self) -> None:
        """Test that iteration raises SensitiveAccessError for dict."""
        data = {"api_key": "secret"}
        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string(json.dumps(data)),
            decrypt_fn=_decrypt_string,
            is_json=True,
        )
        with pytest.raises(SensitiveAccessError):
            for _ in sensitive:  # type: ignore[attr-defined]
                pass


class TestSensitiveValueCaching:
    """Tests for lazy decryption caching."""

    def test_decryption_is_cached(self) -> None:
        """Test that decryption result is cached."""
        decrypt_count = [0]

        def counting_decrypt(value: bytes) -> str:
            decrypt_count[0] += 1
            return value.decode("utf-8")

        sensitive = SensitiveValue(
            encrypted_bytes=_encrypt_string("secret"),
            decrypt_fn=counting_decrypt,
            is_json=False,
        )

        # First access
        sensitive.get_value(apply_mask=False)
        assert decrypt_count[0] == 1

        # Second access should use cached value
        sensitive.get_value(apply_mask=False)
        assert decrypt_count[0] == 1

        # Masked access should also use cached value
        sensitive.get_value(apply_mask=True)
        assert decrypt_count[0] == 1


================================================
FILE: backend/tests/unit/onyx/utils/test_sensitive_typing.py
================================================
"""
Tests demonstrating static type checking for SensitiveValue.

Run with: mypy tests/unit/onyx/utils/test_sensitive_typing.py --ignore-missing-imports

These tests show what mypy will catch when SensitiveValue is misused.
"""

from typing import Any

# This file demonstrates what mypy will catch.
# The commented-out code below would produce type errors.


def demonstrate_correct_usage() -> None:
    """Shows correct patterns that pass type checking."""
    from onyx.utils.sensitive import SensitiveValue
    from onyx.utils.encryption import encrypt_string_to_bytes, decrypt_bytes_to_string

    # Create a SensitiveValue
    encrypted = encrypt_string_to_bytes('{"api_key": "secret"}')
    sensitive: SensitiveValue[dict[str, Any]] = SensitiveValue(
        encrypted_bytes=encrypted,
        decrypt_fn=decrypt_bytes_to_string,
        is_json=True,
    )

    # CORRECT: Using get_value() to access the value
    raw_dict: dict[str, Any] = sensitive.get_value(apply_mask=False)
    assert raw_dict["api_key"] == "secret"

    masked_dict: dict[str, Any] = sensitive.get_value(apply_mask=True)
    assert "secret" not in str(masked_dict)

    # CORRECT: Using bool for truthiness
    if sensitive:
        print("Value exists")


# The code below demonstrates what mypy would catch.
# Uncomment to see the type errors.
"""
def demonstrate_incorrect_usage() -> None:
    '''Shows patterns that mypy will flag as errors.'''
    from onyx.utils.sensitive import SensitiveValue
    from onyx.utils.encryption import encrypt_string_to_bytes, decrypt_bytes_to_string

    encrypted = encrypt_string_to_bytes('{"api_key": "secret"}')
    sensitive: SensitiveValue[dict[str, Any]] = SensitiveValue(
        encrypted_bytes=encrypted,
        decrypt_fn=decrypt_bytes_to_string,
        is_json=True,
    )

    # ERROR: SensitiveValue doesn't support subscript access
    # mypy error: Value of type "SensitiveValue[dict[str, Any]]" is not indexable
    api_key = sensitive["api_key"]

    # ERROR: SensitiveValue doesn't support iteration
    # mypy error: "SensitiveValue[dict[str, Any]]" has no attribute "__iter__"
    for key in sensitive:
        print(key)

    # ERROR: Can't pass SensitiveValue where dict is expected
    # mypy error: Argument 1 has incompatible type "SensitiveValue[dict[str, Any]]"; expected "dict[str, Any]"
    def process_dict(d: dict[str, Any]) -> None:
        pass
    process_dict(sensitive)

    # ERROR: Can't use .get() on SensitiveValue
    # mypy error: "SensitiveValue[dict[str, Any]]" has no attribute "get"
    value = sensitive.get("api_key")
"""


def test_correct_usage_passes() -> None:
    """This test runs the correct usage demonstration."""
    demonstrate_correct_usage()


================================================
FILE: backend/tests/unit/onyx/utils/test_telemetry.py
================================================
from typing import Any
from unittest.mock import Mock

from onyx.configs.constants import MilestoneRecordType
from onyx.utils import telemetry as telemetry_utils


def test_mt_cloud_telemetry_noop_when_not_multi_tenant(monkeypatch: Any) -> None:
    fetch_impl = Mock()
    monkeypatch.setattr(
        telemetry_utils,
        "fetch_versioned_implementation_with_fallback",
        fetch_impl,
    )
    # mt_cloud_telemetry reads the module-local imported symbol, so patch this path.
    monkeypatch.setattr("onyx.utils.telemetry.MULTI_TENANT", False)

    telemetry_utils.mt_cloud_telemetry(
        tenant_id="tenant-1",
        distinct_id="12345678-1234-1234-1234-123456789abc",
        event=MilestoneRecordType.USER_MESSAGE_SENT,
        properties={"origin": "web"},
    )

    fetch_impl.assert_not_called()


def test_mt_cloud_telemetry_calls_event_telemetry_when_multi_tenant(
    monkeypatch: Any,
) -> None:
    event_telemetry = Mock()
    fetch_impl = Mock(return_value=event_telemetry)
    monkeypatch.setattr(
        telemetry_utils,
        "fetch_versioned_implementation_with_fallback",
        fetch_impl,
    )
    # mt_cloud_telemetry reads the module-local imported symbol, so patch this path.
    monkeypatch.setattr("onyx.utils.telemetry.MULTI_TENANT", True)

    telemetry_utils.mt_cloud_telemetry(
        tenant_id="tenant-1",
        distinct_id="12345678-1234-1234-1234-123456789abc",
        event=MilestoneRecordType.USER_MESSAGE_SENT,
        properties={"origin": "web"},
    )

    fetch_impl.assert_called_once_with(
        module="onyx.utils.telemetry",
        attribute="event_telemetry",
        fallback=telemetry_utils.noop_fallback,
    )
    event_telemetry.assert_called_once_with(
        "12345678-1234-1234-1234-123456789abc",
        MilestoneRecordType.USER_MESSAGE_SENT,
        {"origin": "web", "tenant_id": "tenant-1"},
    )


def test_mt_cloud_identify_noop_when_not_multi_tenant(monkeypatch: Any) -> None:
    fetch_impl = Mock()
    monkeypatch.setattr(
        telemetry_utils,
        "fetch_versioned_implementation_with_fallback",
        fetch_impl,
    )
    monkeypatch.setattr("onyx.utils.telemetry.MULTI_TENANT", False)

    telemetry_utils.mt_cloud_identify(
        distinct_id="12345678-1234-1234-1234-123456789abc",
        properties={"email": "user@example.com"},
    )

    fetch_impl.assert_not_called()


def test_mt_cloud_identify_calls_identify_user_when_multi_tenant(
    monkeypatch: Any,
) -> None:
    identify_user = Mock()
    fetch_impl = Mock(return_value=identify_user)
    monkeypatch.setattr(
        telemetry_utils,
        "fetch_versioned_implementation_with_fallback",
        fetch_impl,
    )
    monkeypatch.setattr("onyx.utils.telemetry.MULTI_TENANT", True)

    telemetry_utils.mt_cloud_identify(
        distinct_id="12345678-1234-1234-1234-123456789abc",
        properties={"email": "user@example.com"},
    )

    fetch_impl.assert_called_once_with(
        module="onyx.utils.telemetry",
        attribute="identify_user",
        fallback=telemetry_utils.noop_fallback,
    )
    identify_user.assert_called_once_with(
        "12345678-1234-1234-1234-123456789abc",
        {"email": "user@example.com"},
    )


================================================
FILE: backend/tests/unit/onyx/utils/test_threadpool_concurrency.py
================================================
import contextvars
import threading
import time
from collections.abc import Generator
from collections.abc import Iterator
from concurrent.futures import ThreadPoolExecutor

import pytest

from onyx.utils.threadpool_concurrency import parallel_yield
from onyx.utils.threadpool_concurrency import run_in_background
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.threadpool_concurrency import ThreadSafeDict
from onyx.utils.threadpool_concurrency import wait_on_background

# Create a context variable for testing
test_context_var = contextvars.ContextVar("test_var", default="default")


def test_run_with_timeout_completes() -> None:
    """Test that a function that completes within timeout works correctly"""

    def quick_function(x: int) -> int:
        return x * 2

    result = run_with_timeout(1.0, quick_function, x=21)
    assert result == 42


@pytest.mark.parametrize("slow,timeout", [(1, 0.1), (0.3, 0.2)])
def test_run_with_timeout_raises_on_timeout(slow: float, timeout: float) -> None:
    """Test that a function that exceeds timeout raises TimeoutError"""

    def slow_function() -> None:
        time.sleep(slow)

    start = time.monotonic()
    with pytest.raises(TimeoutError) as exc_info:
        run_with_timeout(timeout, slow_function)
    elapsed = time.monotonic() - start

    assert f"timed out after {timeout} seconds" in str(exc_info.value)
    assert elapsed >= timeout
    # Should return around the timeout duration, not the full sleep duration
    assert elapsed == pytest.approx(timeout, abs=0.8)


@pytest.mark.filterwarnings("ignore::pytest.PytestUnhandledThreadExceptionWarning")
def test_run_with_timeout_propagates_exceptions() -> None:
    """Test that other exceptions from the function are propagated properly"""

    def error_function() -> None:
        raise ValueError("Test error")

    with pytest.raises(ValueError) as exc_info:
        run_with_timeout(1.0, error_function)

    assert "Test error" in str(exc_info.value)


def test_run_with_timeout_with_args_and_kwargs() -> None:
    """Test that args and kwargs are properly passed to the function"""

    def complex_function(x: int, y: int, multiply: bool = False) -> int:
        if multiply:
            return x * y
        return x + y

    # Test with just positional args
    result1 = run_with_timeout(1.0, complex_function, x=5, y=3)
    assert result1 == 8

    # Test with positional and keyword args
    result2 = run_with_timeout(1.0, complex_function, x=5, y=3, multiply=True)
    assert result2 == 15


def test_run_in_background_and_wait_success() -> None:
    """Test that run_in_background and wait_on_background work correctly for successful execution"""

    def background_function(x: int) -> int:
        time.sleep(0.1)  # Small delay to ensure it's actually running in background
        return x * 2

    # Start the background task
    task = run_in_background(background_function, 21)

    # Verify we can do other work while task is running
    start_time = time.time()
    result = wait_on_background(task)
    elapsed = time.time() - start_time

    assert result == 42
    # sometimes slightly flaky
    assert elapsed >= 0.095  # Verify we actually waited for the sleep


@pytest.mark.filterwarnings("ignore::pytest.PytestUnhandledThreadExceptionWarning")
def test_run_in_background_propagates_exceptions() -> None:
    """Test that exceptions in background tasks are properly propagated"""

    def error_function() -> None:
        time.sleep(0.1)  # Small delay to ensure it's actually running in background
        raise ValueError("Test background error")

    task = run_in_background(error_function)

    with pytest.raises(ValueError) as exc_info:
        wait_on_background(task)

    assert "Test background error" in str(exc_info.value)


def test_run_in_background_with_args_and_kwargs() -> None:
    """Test that args and kwargs are properly passed to the background function"""

    def complex_function(x: int, y: int, multiply: bool = False) -> int:
        time.sleep(0.1)  # Small delay to ensure it's actually running in background
        if multiply:
            return x * y
        return x + y

    # Test with args
    task1 = run_in_background(complex_function, 5, 3)
    result1 = wait_on_background(task1)
    assert result1 == 8

    # Test with args and kwargs
    task2 = run_in_background(complex_function, 5, 3, multiply=True)
    result2 = wait_on_background(task2)
    assert result2 == 15


def test_multiple_background_tasks() -> None:
    """Test running multiple background tasks concurrently"""

    def slow_add(x: int, y: int) -> int:
        time.sleep(0.2)  # Make each task take some time
        return x + y

    # Start multiple tasks
    start_time = time.time()
    task1 = run_in_background(slow_add, 1, 2)
    task2 = run_in_background(slow_add, 3, 4)
    task3 = run_in_background(slow_add, 5, 6)

    # Wait for all results
    result1 = wait_on_background(task1)
    result2 = wait_on_background(task2)
    result3 = wait_on_background(task3)
    elapsed = time.time() - start_time

    # Verify results
    assert result1 == 3
    assert result2 == 7
    assert result3 == 11

    # Verify tasks ran in parallel (total time should be ~0.2s, not ~0.6s)
    assert 0.2 <= elapsed < 0.4  # Allow some buffer for test environment variations


def test_thread_safe_dict_basic_operations() -> None:
    """Test basic operations of ThreadSafeDict"""
    d = ThreadSafeDict[str, int]()

    # Test setting and getting
    d["a"] = 1
    assert d["a"] == 1

    # Test get with default
    assert d.get("a", None) == 1
    assert d.get("b", 2) == 2

    # Test deletion
    del d["a"]
    assert "a" not in d

    # Test length
    d["x"] = 10
    d["y"] = 20
    assert len(d) == 2

    # Test iteration
    keys = sorted(d.keys())
    assert keys == ["x", "y"]

    # Test items and values
    assert dict(d.items()) == {"x": 10, "y": 20}
    assert sorted(d.values()) == [10, 20]


def test_thread_safe_dict_concurrent_access() -> None:
    """Test ThreadSafeDict with concurrent access from multiple threads"""
    d = ThreadSafeDict[str, int]()
    num_threads = 10
    iterations = 1000

    def increment_values() -> None:
        for i in range(iterations):
            key = str(i % 5)  # Use 5 different keys
            # Get current value or 0 if not exists, increment, then store
            d.atomic_get_set(key, lambda x: x + 1, 0)

    # Create and start threads
    threads = []
    for _ in range(num_threads):
        t = threading.Thread(target=increment_values)
        threads.append(t)
        t.start()

    # Wait for all threads to complete
    for t in threads:
        t.join()

    # Verify results
    # Each key should have been incremented (num_threads * iterations) / 5 times
    expected_value = (num_threads * iterations) // 5
    for i in range(5):
        assert d[str(i)] == expected_value


def test_thread_safe_dict_bulk_operations() -> None:
    """Test bulk operations of ThreadSafeDict"""
    d = ThreadSafeDict[str, int]()

    # Test update with dict
    d.update({"a": 1, "b": 2})
    assert dict(d.items()) == {"a": 1, "b": 2}

    # Test update with kwargs
    d.update(c=3, d=4)
    assert dict(d.items()) == {"a": 1, "b": 2, "c": 3, "d": 4}

    # Test clear
    d.clear()
    assert len(d) == 0


def test_thread_safe_dict_concurrent_bulk_operations() -> None:
    """Test ThreadSafeDict with concurrent bulk operations"""
    d = ThreadSafeDict[str, int]()
    num_threads = 5

    def bulk_update(start: int) -> None:
        # Each thread updates with its own range of numbers
        updates = {str(i): i for i in range(start, start + 20)}
        d.update(updates)
        time.sleep(0.01)  # Add some delay to increase chance of thread overlap

    # Run updates concurrently
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        futures = [executor.submit(bulk_update, i * 20) for i in range(num_threads)]
        for future in futures:
            future.result()

    # Verify results
    assert len(d) == num_threads * 20
    # Verify all numbers from 0 to (num_threads * 20) are present
    for i in range(num_threads * 20):
        assert d[str(i)] == i


def test_thread_safe_dict_atomic_operations() -> None:
    """Test atomic operations with ThreadSafeDict's lock"""
    d = ThreadSafeDict[str, list[int]]()
    d["numbers"] = []

    def append_numbers(start: int) -> None:
        numbers = d["numbers"]
        with d.lock:
            for i in range(start, start + 5):
                numbers.append(i)
                time.sleep(0.001)  # Add delay to increase chance of thread overlap
        d["numbers"] = numbers

    # Run concurrent append operations
    threads = []
    for i in range(4):  # 4 threads, each adding 5 numbers
        t = threading.Thread(target=append_numbers, args=(i * 5,))
        threads.append(t)
        t.start()

    for t in threads:
        t.join()

    # Verify results
    numbers = d["numbers"]
    assert len(numbers) == 20  # 4 threads * 5 numbers each
    assert sorted(numbers) == list(range(20))  # All numbers 0-19 should be present


def test_parallel_yield_basic() -> None:
    """Test that parallel_yield correctly yields values from multiple generators."""

    def make_gen(values: list[int], delay: float) -> Generator[int, None, None]:
        for v in values:
            time.sleep(delay)
            yield v

    # Create generators with different delays
    gen1 = make_gen([1, 4, 7], 0.1)  # Slower generator
    gen2 = make_gen([2, 5, 8], 0.05)  # Faster generator
    gen3 = make_gen([3, 6, 9], 0.15)  # Slowest generator

    # Collect results with timestamps
    results: list[tuple[float, int]] = []
    start_time = time.time()

    for value in parallel_yield([gen1, gen2, gen3]):
        results.append((time.time() - start_time, value))

    # Verify all values were yielded
    assert sorted(v for _, v in results) == list(range(1, 10))

    # Verify that faster generators yielded earlier
    # Group results by generator (values 1,4,7 are gen1, 2,5,8 are gen2, 3,6,9 are gen3)
    gen1_times = [t for t, v in results if v in (1, 4, 7)]
    gen2_times = [t for t, v in results if v in (2, 5, 8)]
    gen3_times = [t for t, v in results if v in (3, 6, 9)]

    # Average times for each generator
    avg_gen1 = sum(gen1_times) / len(gen1_times)
    avg_gen2 = sum(gen2_times) / len(gen2_times)
    avg_gen3 = sum(gen3_times) / len(gen3_times)

    # Verify gen2 (fastest) has lowest average time
    assert avg_gen2 < avg_gen1
    assert avg_gen2 < avg_gen3


def test_parallel_yield_empty_generators() -> None:
    """Test parallel_yield with empty generators."""

    def empty_gen() -> Iterator[int]:
        if False:
            yield 0  # Makes this a generator function

    gens = [empty_gen() for _ in range(3)]
    results = list(parallel_yield(gens))
    assert len(results) == 0


def test_parallel_yield_different_lengths() -> None:
    """Test parallel_yield with generators of different lengths."""

    def make_gen(count: int) -> Iterator[int]:
        for i in range(count):
            yield i
            time.sleep(0.01)  # Small delay to ensure concurrent execution

    gens = [
        make_gen(1),  # Yields: [0]
        make_gen(3),  # Yields: [0, 1, 2]
        make_gen(2),  # Yields: [0, 1]
    ]

    results = list(parallel_yield(gens))
    assert len(results) == 6  # Total number of items from all generators
    assert sorted(results) == [0, 0, 0, 1, 1, 2]


def test_parallel_yield_exception_handling() -> None:
    """Test parallel_yield handles exceptions in generators properly."""

    def failing_gen() -> Iterator[int]:
        yield 1
        raise ValueError("Generator failure")

    def normal_gen() -> Iterator[int]:
        yield 2
        yield 3

    gens = [failing_gen(), normal_gen()]

    with pytest.raises(ValueError, match="Generator failure"):
        list(parallel_yield(gens))


def test_parallel_yield_non_blocking() -> None:
    """Test parallel_yield with non-blocking generators (simple ranges)."""

    def range_gen(start: int, end: int) -> Iterator[int]:
        for i in range(start, end):
            yield i

    # Create three overlapping ranges
    gens = [range_gen(0, 100), range_gen(100, 200), range_gen(200, 300)]

    results = list(parallel_yield(gens))

    # Verify no values are missing
    assert len(results) == 300  # Should have all values from 0 to 299
    assert sorted(results) == list(range(300))


================================================
FILE: backend/tests/unit/onyx/utils/test_threadpool_contextvars.py
================================================
import contextvars
import time

from onyx.utils.threadpool_concurrency import FunctionCall
from onyx.utils.threadpool_concurrency import run_functions_in_parallel
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.threadpool_concurrency import run_in_background
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.threadpool_concurrency import wait_on_background

# Create a test contextvar
test_var = contextvars.ContextVar("test_var", default="default")


def get_contextvar_value() -> str:
    """Helper function that runs in a thread and returns the contextvar value"""
    # Add a small sleep to ensure we're actually running in a different thread
    time.sleep(0.1)
    return test_var.get()


def test_run_with_timeout_preserves_contextvar() -> None:
    """Test that run_with_timeout preserves contextvar values"""
    # Set a value in the main thread
    test_var.set("test_value")

    # Run function with timeout and verify the value is preserved
    result = run_with_timeout(1.0, get_contextvar_value)
    assert result == "test_value"


def test_run_functions_in_parallel_preserves_contextvar() -> None:
    """Test that run_functions_in_parallel preserves contextvar values"""
    # Set a value in the main thread
    test_var.set("parallel_test")

    # Create multiple function calls
    function_calls = [
        FunctionCall(get_contextvar_value),
        FunctionCall(get_contextvar_value),
    ]

    # Run in parallel and verify all results have the correct value
    results = run_functions_in_parallel(function_calls)

    for result_id, value in results.items():
        assert value == "parallel_test"


def test_run_functions_tuples_preserves_contextvar() -> None:
    """Test that run_functions_tuples_in_parallel preserves contextvar values"""
    # Set a value in the main thread
    test_var.set("tuple_test")

    # Create list of function tuples
    functions_with_args = [
        (get_contextvar_value, ()),
        (get_contextvar_value, ()),
    ]

    # Run in parallel and verify all results have the correct value
    results = run_functions_tuples_in_parallel(functions_with_args)

    for result in results:
        assert result == "tuple_test"


def test_nested_contextvar_modifications() -> None:
    """Test that modifications to contextvars in threads don't affect other threads"""

    def modify_and_return_contextvar(new_value: str) -> tuple[str, str]:
        """Helper that modifies the contextvar and returns both values"""
        original = test_var.get()
        test_var.set(new_value)
        time.sleep(0.1)  # Ensure threads overlap
        return original, test_var.get()

    # Set initial value
    test_var.set("initial")

    # Run multiple functions that modify the contextvar
    functions_with_args = [
        (modify_and_return_contextvar, ("thread1",)),
        (modify_and_return_contextvar, ("thread2",)),
    ]

    results = run_functions_tuples_in_parallel(functions_with_args)

    # Verify each thread saw the initial value and its own modification
    for original, modified in results:
        assert original == "initial"  # Each thread should see the initial value
        assert modified in [
            "thread1",
            "thread2",
        ]  # Each thread should see its own modification

    # Verify the main thread's value wasn't affected
    assert test_var.get() == "initial"


def test_contextvar_isolation_between_runs() -> None:
    """Test that contextvar changes don't leak between separate parallel runs"""

    def set_and_return_contextvar(value: str) -> str:
        test_var.set(value)
        return test_var.get()

    # First run
    test_var.set("first_run")
    first_results = run_functions_tuples_in_parallel(
        [
            (set_and_return_contextvar, ("thread1",)),
            (set_and_return_contextvar, ("thread2",)),
        ]
    )

    # Verify first run results
    assert all(result in ["thread1", "thread2"] for result in first_results)

    # Second run should still see the main thread's value
    assert test_var.get() == "first_run"

    # Second run with different value
    test_var.set("second_run")
    second_results = run_functions_tuples_in_parallel(
        [
            (set_and_return_contextvar, ("thread3",)),
            (set_and_return_contextvar, ("thread4",)),
        ]
    )

    # Verify second run results
    assert all(result in ["thread3", "thread4"] for result in second_results)


def test_run_in_background_preserves_contextvar() -> None:
    """Test that run_in_background preserves contextvar values and modifications are isolated"""

    def modify_and_sleep() -> tuple[str, str]:
        """Modifies contextvar, sleeps, and returns original, modified, and final values"""
        original = test_var.get()
        test_var.set("modified_in_background")
        time.sleep(0.1)  # Ensure we can check main thread during execution
        final = test_var.get()
        return original, final

    # Set initial value in main thread
    token = test_var.set("initial_value")
    try:
        # Start background task
        task = run_in_background(modify_and_sleep)

        # Verify main thread value remains unchanged while task runs
        assert test_var.get() == "initial_value"

        # Get results from background thread
        original, modified = wait_on_background(task)

        # Verify the background thread:
        # 1. Saw the initial value
        assert original == "initial_value"
        # 2. Successfully modified its own copy
        assert modified == "modified_in_background"

        # Verify main thread value is still unchanged after task completion
        assert test_var.get() == "initial_value"
    finally:
        # Clean up
        test_var.reset(token)


================================================
FILE: backend/tests/unit/onyx/utils/test_url_ssrf.py
================================================
"""
Unit tests for SSRF protection in URL validation utilities.

These tests verify that the SSRF protection correctly blocks
requests to internal/private IP addresses and other potentially dangerous destinations.
"""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.utils.url import _is_ip_private_or_reserved
from onyx.utils.url import _validate_and_resolve_url
from onyx.utils.url import ssrf_safe_get
from onyx.utils.url import SSRFException
from onyx.utils.url import validate_outbound_http_url


class TestIsIpPrivateOrReserved:
    """Tests for the _is_ip_private_or_reserved helper function."""

    def test_loopback_ipv4(self) -> None:
        """Test that IPv4 loopback addresses are detected as private."""
        assert _is_ip_private_or_reserved("127.0.0.1") is True
        assert _is_ip_private_or_reserved("127.0.0.2") is True
        assert _is_ip_private_or_reserved("127.255.255.255") is True

    def test_loopback_ipv6(self) -> None:
        """Test that IPv6 loopback addresses are detected as private."""
        assert _is_ip_private_or_reserved("::1") is True

    def test_private_class_a(self) -> None:
        """Test that private Class A addresses (10.x.x.x) are detected."""
        assert _is_ip_private_or_reserved("10.0.0.1") is True
        assert _is_ip_private_or_reserved("10.255.255.255") is True

    def test_private_class_b(self) -> None:
        """Test that private Class B addresses (172.16-31.x.x) are detected."""
        assert _is_ip_private_or_reserved("172.16.0.1") is True
        assert _is_ip_private_or_reserved("172.31.255.255") is True

    def test_private_class_c(self) -> None:
        """Test that private Class C addresses (192.168.x.x) are detected."""
        assert _is_ip_private_or_reserved("192.168.0.1") is True
        assert _is_ip_private_or_reserved("192.168.255.255") is True

    def test_link_local(self) -> None:
        """Test that link-local addresses are detected as private."""
        assert _is_ip_private_or_reserved("169.254.0.1") is True
        assert _is_ip_private_or_reserved("169.254.255.255") is True

    def test_cloud_metadata_ips(self) -> None:
        """Test that cloud metadata service IPs are detected."""
        assert _is_ip_private_or_reserved("169.254.169.254") is True  # AWS/GCP/Azure
        assert _is_ip_private_or_reserved("169.254.170.2") is True  # AWS ECS

    def test_multicast(self) -> None:
        """Test that multicast addresses are detected."""
        assert _is_ip_private_or_reserved("224.0.0.1") is True
        assert _is_ip_private_or_reserved("239.255.255.255") is True

    def test_unspecified(self) -> None:
        """Test that unspecified addresses are detected."""
        assert _is_ip_private_or_reserved("0.0.0.0") is True
        assert _is_ip_private_or_reserved("::") is True

    def test_public_ips(self) -> None:
        """Test that public IP addresses are not flagged as private."""
        assert _is_ip_private_or_reserved("8.8.8.8") is False  # Google DNS
        assert _is_ip_private_or_reserved("1.1.1.1") is False  # Cloudflare DNS
        assert _is_ip_private_or_reserved("104.16.0.1") is False  # Cloudflare
        assert _is_ip_private_or_reserved("142.250.80.46") is False  # Google

    def test_invalid_ip(self) -> None:
        """Test that invalid IPs are treated as potentially unsafe."""
        assert _is_ip_private_or_reserved("not-an-ip") is True
        assert _is_ip_private_or_reserved("") is True


class TestValidateAndResolveUrl:
    """Tests for the _validate_and_resolve_url function."""

    def test_empty_url(self) -> None:
        """Test that empty URLs raise ValueError."""
        with pytest.raises(ValueError, match="URL cannot be empty"):
            _validate_and_resolve_url("")

    def test_invalid_scheme_ftp(self) -> None:
        """Test that non-HTTP schemes are rejected."""
        with pytest.raises(SSRFException, match="Invalid URL scheme"):
            _validate_and_resolve_url("ftp://example.com/file.txt")

    def test_invalid_scheme_file(self) -> None:
        """Test that file:// scheme is rejected."""
        with pytest.raises(SSRFException, match="Invalid URL scheme"):
            _validate_and_resolve_url("file:///etc/passwd")

    def test_invalid_scheme_gopher(self) -> None:
        """Test that gopher:// scheme is rejected."""
        with pytest.raises(SSRFException, match="Invalid URL scheme"):
            _validate_and_resolve_url("gopher://localhost:70/")

    def test_valid_http_scheme(self) -> None:
        """Test that http scheme is accepted for public URLs."""
        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [
                (2, 1, 6, "", ("93.184.216.34", 80))  # example.com's IP
            ]
            ip, hostname, port = _validate_and_resolve_url("http://example.com/")
            assert ip == "93.184.216.34"
            assert hostname == "example.com"
            assert port == 80

    def test_valid_https_scheme(self) -> None:
        """Test that https scheme is accepted for public URLs."""
        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [(2, 1, 6, "", ("93.184.216.34", 443))]
            ip, hostname, port = _validate_and_resolve_url("https://example.com/")
            assert ip == "93.184.216.34"
            assert hostname == "example.com"
            assert port == 443

    def test_localhost_ipv4(self) -> None:
        """Test that localhost (127.0.0.1) is blocked."""
        with pytest.raises(SSRFException, match="internal/private IP"):
            _validate_and_resolve_url("http://127.0.0.1/")

    def test_localhost_hostname(self) -> None:
        """Test that 'localhost' hostname is blocked."""
        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [(2, 1, 6, "", ("127.0.0.1", 80))]
            with pytest.raises(
                SSRFException, match="Access to hostname 'localhost' is not allowed."
            ):
                _validate_and_resolve_url("http://localhost/")

    def test_private_ip_10_network(self) -> None:
        """Test that 10.x.x.x addresses are blocked."""
        with pytest.raises(SSRFException, match="internal/private IP"):
            _validate_and_resolve_url("http://10.0.0.1/")

    def test_private_ip_172_network(self) -> None:
        """Test that 172.16-31.x.x addresses are blocked."""
        with pytest.raises(SSRFException, match="internal/private IP"):
            _validate_and_resolve_url("http://172.16.0.1/")

    def test_private_ip_192_168_network(self) -> None:
        """Test that 192.168.x.x addresses are blocked."""
        with pytest.raises(SSRFException, match="internal/private IP"):
            _validate_and_resolve_url("http://192.168.1.1/")

    def test_aws_metadata_endpoint(self) -> None:
        """Test that AWS metadata endpoint is blocked."""
        with pytest.raises(
            SSRFException, match="Access to hostname '169.254.169.254' is not allowed."
        ):
            _validate_and_resolve_url("http://169.254.169.254/latest/meta-data/")

    def test_blocked_hostname_kubernetes(self) -> None:
        """Test that Kubernetes internal hostnames are blocked."""
        with pytest.raises(SSRFException, match="not allowed"):
            _validate_and_resolve_url("http://kubernetes.default.svc.cluster.local/")

    def test_blocked_hostname_metadata_google(self) -> None:
        """Test that Google metadata hostname is blocked."""
        with pytest.raises(SSRFException, match="not allowed"):
            _validate_and_resolve_url("http://metadata.google.internal/")

    def test_url_with_credentials(self) -> None:
        """Test that URLs with embedded credentials are blocked."""
        with pytest.raises(SSRFException, match="embedded credentials"):
            _validate_and_resolve_url("http://user:pass@example.com/")

    def test_url_with_port(self) -> None:
        """Test that URLs with ports are handled correctly."""
        # Internal IP with custom port should be blocked
        with pytest.raises(SSRFException, match="internal/private IP"):
            _validate_and_resolve_url("http://127.0.0.1:8080/metrics")

        # Public IP with custom port should be allowed
        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [(2, 1, 6, "", ("93.184.216.34", 8080))]
            ip, hostname, port = _validate_and_resolve_url("http://example.com:8080/")
            assert ip == "93.184.216.34"
            assert port == 8080

    def test_hostname_resolving_to_private_ip(self) -> None:
        """Test that hostnames resolving to private IPs are blocked."""
        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [(2, 1, 6, "", ("192.168.1.100", 80))]
            with pytest.raises(SSRFException, match="internal/private IP"):
                _validate_and_resolve_url("http://internal-service.company.com/")

    def test_multiple_dns_records_one_private(self) -> None:
        """Test that a hostname with mixed public/private IPs is blocked."""
        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [
                (2, 1, 6, "", ("93.184.216.34", 80)),  # Public
                (2, 1, 6, "", ("10.0.0.1", 80)),  # Private
            ]
            with pytest.raises(SSRFException, match="internal/private IP"):
                _validate_and_resolve_url("http://dual-stack.example.com/")

    def test_dns_resolution_failure(self) -> None:
        """Test that DNS resolution failures are handled safely."""
        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            import socket

            mock_getaddrinfo.side_effect = socket.gaierror("Name resolution failed")
            with pytest.raises(SSRFException, match="Could not resolve hostname"):
                _validate_and_resolve_url("http://nonexistent-domain-12345.invalid/")


class TestSsrfSafeGet:
    """Tests for the ssrf_safe_get function."""

    def test_blocks_private_ip(self) -> None:
        """Test that requests to private IPs are blocked."""
        with pytest.raises(SSRFException, match="internal/private IP"):
            ssrf_safe_get("http://192.168.1.1/")

    def test_blocks_localhost(self) -> None:
        """Test that requests to localhost are blocked."""
        with pytest.raises(SSRFException, match="internal/private IP"):
            ssrf_safe_get("http://127.0.0.1/")

    def test_blocks_metadata_endpoint(self) -> None:
        """Test that requests to cloud metadata endpoints are blocked."""
        with pytest.raises(
            SSRFException, match="Access to hostname '169.254.169.254' is not allowed."
        ):
            ssrf_safe_get("http://169.254.169.254/")

    def test_makes_request_to_validated_ip_http(self) -> None:
        """Test that HTTP requests are made to the validated IP."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.is_redirect = False

        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [(2, 1, 6, "", ("93.184.216.34", 80))]

            with patch("onyx.utils.url.requests.get") as mock_get:
                mock_get.return_value = mock_response

                response = ssrf_safe_get("http://example.com/path")

                # Verify the request was made to the IP, not the hostname
                mock_get.assert_called_once()
                call_args = mock_get.call_args
                assert "93.184.216.34" in call_args[0][0]
                # Verify Host header is set
                assert call_args[1]["headers"]["Host"] == "example.com"
                assert response == mock_response

    def test_makes_request_with_original_url_https(self) -> None:
        """Test that HTTPS requests use original URL for TLS."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.is_redirect = False

        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [(2, 1, 6, "", ("93.184.216.34", 443))]

            with patch("onyx.utils.url.requests.get") as mock_get:
                mock_get.return_value = mock_response

                response = ssrf_safe_get("https://example.com/path")

                # For HTTPS, we use original URL for TLS
                mock_get.assert_called_once()
                call_args = mock_get.call_args
                assert call_args[0][0] == "https://example.com/path"
                assert response == mock_response

    def test_passes_custom_headers(self) -> None:
        """Test that custom headers are passed through."""
        mock_response = MagicMock()
        mock_response.is_redirect = False

        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [(2, 1, 6, "", ("93.184.216.34", 80))]

            with patch("onyx.utils.url.requests.get") as mock_get:
                mock_get.return_value = mock_response

                custom_headers = {"User-Agent": "TestBot/1.0"}
                ssrf_safe_get("http://example.com/", headers=custom_headers)

                call_args = mock_get.call_args
                assert call_args[1]["headers"]["User-Agent"] == "TestBot/1.0"

    def test_passes_timeout(self) -> None:
        """Test that timeout is passed through, including tuple form."""
        mock_response = MagicMock()
        mock_response.is_redirect = False

        with patch("onyx.utils.url.socket.getaddrinfo") as mock_getaddrinfo:
            mock_getaddrinfo.return_value = [(2, 1, 6, "", ("93.184.216.34", 80))]

            with patch("onyx.utils.url.requests.get") as mock_get:
                mock_get.return_value = mock_response

                ssrf_safe_get("http://example.com/", timeout=(5, 15))

                call_args = mock_get.call_args
                assert call_args[1]["timeout"] == (5, 15)


class TestValidateOutboundHttpUrl:
    def test_rejects_private_ip_by_default(self) -> None:
        with pytest.raises(SSRFException, match="internal/private IP"):
            validate_outbound_http_url("http://127.0.0.1:8000")

    def test_allows_private_ip_when_explicitly_enabled(self) -> None:
        validated_url = validate_outbound_http_url(
            "http://127.0.0.1:8000", allow_private_network=True
        )
        assert validated_url == "http://127.0.0.1:8000"

    def test_blocks_metadata_hostname_when_private_is_enabled(self) -> None:
        with pytest.raises(SSRFException, match="not allowed"):
            validate_outbound_http_url(
                "http://metadata.google.internal/latest",
                allow_private_network=True,
            )


================================================
FILE: backend/tests/unit/onyx/utils/test_vespa_query.py
================================================
from datetime import datetime
from datetime import timedelta
from datetime import timezone

from onyx.configs.constants import DocumentSource
from onyx.configs.constants import INDEX_SEPARATOR
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import Tag
from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
    build_vespa_filters,
)
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import PERSONAS
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import USER_PROJECT
from shared_configs.configs import MULTI_TENANT


class TestBuildVespaFilters:
    def test_empty_filters(self) -> None:
        """Test with empty filters object."""
        filters = IndexFilters(access_control_list=[])
        result = build_vespa_filters(filters)
        assert result == f"!({HIDDEN}=true) and "

        # With trailing AND removed
        result = build_vespa_filters(filters, remove_trailing_and=True)
        assert result == f"!({HIDDEN}=true)"

    def test_include_hidden(self) -> None:
        """Test with include_hidden flag."""
        filters = IndexFilters(access_control_list=[])
        result = build_vespa_filters(filters, include_hidden=True)
        assert result == ""  # No filters applied when including hidden

        # With some other filter to ensure proper AND chaining
        filters = IndexFilters(access_control_list=[], source_type=[DocumentSource.WEB])
        result = build_vespa_filters(filters, include_hidden=True)
        assert result == f'({SOURCE_TYPE} contains "web") and '

    def test_acl(self) -> None:
        """Test with acls — uses weightedSet operator for efficient matching."""
        # Single ACL
        filters = IndexFilters(access_control_list=["user1"])
        result = build_vespa_filters(filters)
        assert (
            result
            == f'!({HIDDEN}=true) and weightedSet(access_control_list, {{"user1":1}}) and '
        )

        # Multiple ACL's
        filters = IndexFilters(access_control_list=["user2", "group2"])
        result = build_vespa_filters(filters)
        assert (
            result
            == f'!({HIDDEN}=true) and weightedSet(access_control_list, {{"user2":1, "group2":1}}) and '
        )

    def test_tenant_filter(self) -> None:
        """Test tenant ID filtering."""
        # With tenant ID
        if MULTI_TENANT:
            filters = IndexFilters(access_control_list=[], tenant_id="tenant1")
            result = build_vespa_filters(filters)
            assert (
                f'!({HIDDEN}=true) and ({TENANT_ID} contains "tenant1") and ' == result
            )

        # No tenant ID
        filters = IndexFilters(access_control_list=[], tenant_id=None)
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

    def test_source_type_filter(self) -> None:
        """Test source type filtering."""
        # Single source type
        filters = IndexFilters(access_control_list=[], source_type=[DocumentSource.WEB])
        result = build_vespa_filters(filters)
        assert f'!({HIDDEN}=true) and ({SOURCE_TYPE} contains "web") and ' == result

        # Multiple source types
        filters = IndexFilters(
            access_control_list=[],
            source_type=[DocumentSource.WEB, DocumentSource.JIRA],
        )
        result = build_vespa_filters(filters)
        assert (
            f'!({HIDDEN}=true) and ({SOURCE_TYPE} contains "web" or {SOURCE_TYPE} contains "jira") and '
            == result
        )

        # Empty source type list
        filters = IndexFilters(access_control_list=[], source_type=[])
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

    def test_tag_filters(self) -> None:
        """Test tag filtering."""
        # Single tag
        filters = IndexFilters(
            access_control_list=[], tags=[Tag(tag_key="color", tag_value="red")]
        )
        result = build_vespa_filters(filters)
        assert (
            f'!({HIDDEN}=true) and ({METADATA_LIST} contains "color{INDEX_SEPARATOR}red") and '
            == result
        )

        # Multiple tags
        filters = IndexFilters(
            access_control_list=[],
            tags=[
                Tag(tag_key="color", tag_value="red"),
                Tag(tag_key="size", tag_value="large"),
            ],
        )
        result = build_vespa_filters(filters)
        expected = (
            f'!({HIDDEN}=true) and ({METADATA_LIST} contains "color{INDEX_SEPARATOR}red" '
            f'or {METADATA_LIST} contains "size{INDEX_SEPARATOR}large") and '
        )
        assert expected == result

        # Empty tags list
        filters = IndexFilters(access_control_list=[], tags=[])
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

    def test_document_sets_filter(self) -> None:
        """Test document sets filtering."""
        # Single document set
        filters = IndexFilters(access_control_list=[], document_set=["set1"])
        result = build_vespa_filters(filters)
        assert f'!({HIDDEN}=true) and ({DOCUMENT_SETS} contains "set1") and ' == result

        # Multiple document sets
        filters = IndexFilters(access_control_list=[], document_set=["set1", "set2"])
        result = build_vespa_filters(filters)
        assert (
            f'!({HIDDEN}=true) and ({DOCUMENT_SETS} contains "set1" or {DOCUMENT_SETS} contains "set2") and '
            == result
        )

        # Empty document sets
        filters = IndexFilters(access_control_list=[], document_set=[])
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

    def test_user_project_filter(self) -> None:
        """Test user project filtering.

        project_id_filter alone does NOT trigger a knowledge scope restriction
        (an agent with no explicit knowledge should search everything).
        It only participates when explicit knowledge filters are present.
        """
        # project_id_filter alone → no restriction
        filters = IndexFilters(access_control_list=[], project_id_filter=789)
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

        # project_id_filter with document_set → both OR'd
        filters = IndexFilters(
            access_control_list=[], project_id_filter=789, document_set=["set1"]
        )
        result = build_vespa_filters(filters)
        assert (
            f'!({HIDDEN}=true) and (({DOCUMENT_SETS} contains "set1") or ({USER_PROJECT} contains "789")) and '
            == result
        )

        # No project id filter
        filters = IndexFilters(access_control_list=[], project_id_filter=None)
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

    def test_time_cutoff_filter(self) -> None:
        """Test time cutoff filtering."""
        # With cutoff time
        cutoff_time = datetime(2023, 1, 1, tzinfo=timezone.utc)
        filters = IndexFilters(access_control_list=[], time_cutoff=cutoff_time)
        result = build_vespa_filters(filters)
        cutoff_secs = int(cutoff_time.timestamp())
        assert (
            f"!({HIDDEN}=true) and !({DOC_UPDATED_AT} < {cutoff_secs}) and " == result
        )

        # No cutoff time
        filters = IndexFilters(access_control_list=[], time_cutoff=None)
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

        # Test untimed logic (when cutoff is old enough)
        old_cutoff = datetime.now(timezone.utc) - timedelta(days=100)
        filters = IndexFilters(access_control_list=[], time_cutoff=old_cutoff)
        result = build_vespa_filters(filters)
        old_cutoff_secs = int(old_cutoff.timestamp())
        assert (
            f"!({HIDDEN}=true) and !({DOC_UPDATED_AT} < {old_cutoff_secs}) and "
            == result
        )

    def test_combined_filters(self) -> None:
        """Test combining multiple filter types.

        Knowledge-scope filters (document_set, project_id_filter, persona_id_filter)
        are OR'd together, while all other filters are AND'd.
        """
        filters = IndexFilters(
            access_control_list=["user1", "group1"],
            source_type=[DocumentSource.WEB],
            tags=[Tag(tag_key="color", tag_value="red")],
            document_set=["set1"],
            project_id_filter=789,
            persona_id_filter=42,
            time_cutoff=datetime(2023, 1, 1, tzinfo=timezone.utc),
        )

        result = build_vespa_filters(filters)

        expected = f"!({HIDDEN}=true) and "
        expected += 'weightedSet(access_control_list, {"user1":1, "group1":1}) and '
        expected += f'({SOURCE_TYPE} contains "web") and '
        expected += f'({METADATA_LIST} contains "color{INDEX_SEPARATOR}red") and '
        # Knowledge scope filters are OR'd together
        # (persona_id_filter is primary, project_id_filter is additive — order reflects this)
        expected += (
            f'(({DOCUMENT_SETS} contains "set1")'
            f' or ({PERSONAS} contains "42")'
            f' or ({USER_PROJECT} contains "789")'
            f") and "
        )
        cutoff_secs = int(datetime(2023, 1, 1, tzinfo=timezone.utc).timestamp())
        expected += f"!({DOC_UPDATED_AT} < {cutoff_secs}) and "

        assert expected == result

        # With trailing AND removed
        result_no_trailing = build_vespa_filters(filters, remove_trailing_and=True)
        assert expected[:-5] == result_no_trailing  # Remove trailing " and "

    def test_knowledge_scope_single_filter_not_wrapped(self) -> None:
        """When only one knowledge-scope filter is present it should not
        be wrapped in an extra OR group."""
        filters = IndexFilters(access_control_list=[], document_set=["set1"])
        result = build_vespa_filters(filters)
        assert f'!({HIDDEN}=true) and ({DOCUMENT_SETS} contains "set1") and ' == result

    def test_persona_id_filter_is_primary_knowledge_scope(self) -> None:
        """persona_id_filter alone should trigger a knowledge scope restriction
        (a persona with user files IS explicit knowledge)."""
        filters = IndexFilters(access_control_list=[], persona_id_filter=42)
        result = build_vespa_filters(filters)
        assert f'!({HIDDEN}=true) and ({PERSONAS} contains "42") and ' == result

    def test_persona_id_filter_with_project_id_filter(self) -> None:
        """When persona_id_filter triggers the scope, project_id_filter should be
        OR'd in additively."""
        filters = IndexFilters(
            access_control_list=[], persona_id_filter=42, project_id_filter=789
        )
        result = build_vespa_filters(filters)
        expected = (
            f"!({HIDDEN}=true) and "
            f'(({PERSONAS} contains "42") or ({USER_PROJECT} contains "789")) and '
        )
        assert expected == result

    def test_knowledge_scope_document_set_and_persona_filter_ored(self) -> None:
        """Document set filter and persona_id_filter must be OR'd so that
        connector documents (in the set) and persona user files can
        both be found."""
        filters = IndexFilters(
            access_control_list=[],
            document_set=["engineering"],
            persona_id_filter=42,
        )
        result = build_vespa_filters(filters)
        expected = f'!({HIDDEN}=true) and (({DOCUMENT_SETS} contains "engineering") or ({PERSONAS} contains "42")) and '
        assert expected == result

    def test_acl_large_list_uses_weighted_set(self) -> None:
        """Verify that large ACL lists produce a weightedSet clause
        instead of OR-chained contains — this is what prevents Vespa
        HTTP 400 errors for users with thousands of permission groups."""
        acl = [f"external_group:google_drive_{i}" for i in range(10_000)]
        acl += ["user_email:user@example.com", "__PUBLIC__"]
        filters = IndexFilters(access_control_list=acl)
        result = build_vespa_filters(filters)

        assert "weightedSet(access_control_list, {" in result
        # Must NOT contain OR-chained contains clauses
        assert "access_control_list contains" not in result
        # All entries should be present
        assert '"external_group:google_drive_0":1' in result
        assert '"external_group:google_drive_9999":1' in result
        assert '"user_email:user@example.com":1' in result
        assert '"__PUBLIC__":1' in result

    def test_acl_empty_strings_filtered(self) -> None:
        """Empty strings in the ACL list should be filtered out."""
        filters = IndexFilters(access_control_list=["user1", "", "group1"])
        result = build_vespa_filters(filters)
        assert (
            result
            == f'!({HIDDEN}=true) and weightedSet(access_control_list, {{"user1":1, "group1":1}}) and '
        )

        # All empty
        filters = IndexFilters(access_control_list=["", ""])
        result = build_vespa_filters(filters)
        assert result == f"!({HIDDEN}=true) and "

    def test_empty_or_none_values(self) -> None:
        """Test with empty or None values in filter lists."""
        # Empty strings in document set
        filters = IndexFilters(
            access_control_list=[], document_set=["set1", "", "set2"]
        )
        result = build_vespa_filters(filters)
        assert (
            f'!({HIDDEN}=true) and ({DOCUMENT_SETS} contains "set1" or {DOCUMENT_SETS} contains "set2") and '
            == result
        )

        # All empty strings in document set
        filters = IndexFilters(access_control_list=[], document_set=["", ""])
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result


================================================
FILE: backend/tests/unit/onyx/utils/test_vespa_tasks.py
================================================
from types import SimpleNamespace
from typing import Any

from onyx.background.celery.tasks.vespa import tasks as vespa_tasks


class _StubRedisDocumentSet:
    """Lightweight stand-in for RedisDocumentSet used by monitor tests."""

    reset_called = False

    @staticmethod
    def get_id_from_fence_key(key: str) -> str | None:
        parts = key.split("_")
        return parts[-1] if len(parts) == 3 else None

    def __init__(self, tenant_id: str, object_id: str) -> None:  # noqa: ARG002
        self.taskset_key = f"documentset_taskset_{object_id}"
        self._payload = 0

    @property
    def fenced(self) -> bool:
        return True

    @property
    def payload(self) -> int:
        return self._payload

    def reset(self) -> None:
        self.__class__.reset_called = True


def _setup_common_patches(monkeypatch: Any, document_set: Any) -> dict[str, bool]:
    calls: dict[str, bool] = {"deleted": False, "synced": False}

    monkeypatch.setattr(vespa_tasks, "RedisDocumentSet", _StubRedisDocumentSet)

    monkeypatch.setattr(
        vespa_tasks,
        "get_document_set_by_id",
        lambda db_session, document_set_id: document_set,  # noqa: ARG005
    )

    def _delete(document_set_row: Any, db_session: Any) -> None:  # noqa: ARG001
        calls["deleted"] = True

    monkeypatch.setattr(vespa_tasks, "delete_document_set", _delete)

    def _mark(document_set_id: Any, db_session: Any) -> None:  # noqa: ARG001
        calls["synced"] = True

    monkeypatch.setattr(vespa_tasks, "mark_document_set_as_synced", _mark)

    monkeypatch.setattr(
        vespa_tasks,
        "update_sync_record_status",
        lambda db_session, entity_id, sync_type, sync_status, num_docs_synced: None,  # noqa: ARG005
    )

    return calls


def test_monitor_preserves_federated_only_document_set(monkeypatch: Any) -> None:
    document_set = SimpleNamespace(
        connector_credential_pairs=[],
        federated_connectors=[object()],
    )

    calls = _setup_common_patches(monkeypatch, document_set)

    vespa_tasks.monitor_document_set_taskset(
        tenant_id="tenant",
        key_bytes=b"documentset_fence_1",
        r=SimpleNamespace(scard=lambda key: 0),  # type: ignore[arg-type]  # noqa: ARG005
        db_session=SimpleNamespace(),  # type: ignore[arg-type]
    )

    assert calls["synced"] is True
    assert calls["deleted"] is False


def test_monitor_deletes_document_set_with_no_connectors(monkeypatch: Any) -> None:
    document_set = SimpleNamespace(
        connector_credential_pairs=[],
        federated_connectors=[],
    )

    calls = _setup_common_patches(monkeypatch, document_set)

    vespa_tasks.monitor_document_set_taskset(
        tenant_id="tenant",
        key_bytes=b"documentset_fence_2",
        r=SimpleNamespace(scard=lambda key: 0),  # type: ignore[arg-type]  # noqa: ARG005
        db_session=SimpleNamespace(),  # type: ignore[arg-type]
    )

    assert calls["deleted"] is True
    assert calls["synced"] is False


================================================
FILE: backend/tests/unit/onyx/voice/providers/test_azure_provider.py
================================================
import pytest

from onyx.voice.providers.azure import AzureVoiceProvider


def test_azure_provider_extracts_region_from_target_uri() -> None:
    provider = AzureVoiceProvider(
        api_key="key",
        api_base="https://westus.api.cognitive.microsoft.com/",
        custom_config={},
    )
    assert provider.speech_region == "westus"


def test_azure_provider_normalizes_uppercase_region() -> None:
    provider = AzureVoiceProvider(
        api_key="key",
        api_base=None,
        custom_config={"speech_region": "WestUS2"},
    )
    assert provider.speech_region == "westus2"


def test_azure_provider_rejects_invalid_speech_region() -> None:
    with pytest.raises(ValueError, match="Invalid Azure speech_region"):
        AzureVoiceProvider(
            api_key="key",
            api_base=None,
            custom_config={"speech_region": "westus/../../etc"},
        )


================================================
FILE: backend/tests/unit/onyx/voice/providers/test_azure_ssml.py
================================================
import io
import struct
import wave

import pytest

from onyx.voice.providers.azure import AzureVoiceProvider


# --- _is_azure_cloud_url ---


def test_is_azure_cloud_url_speech_microsoft() -> None:
    assert AzureVoiceProvider._is_azure_cloud_url(
        "https://eastus.tts.speech.microsoft.com/cognitiveservices/v1"
    )


def test_is_azure_cloud_url_cognitive_microsoft() -> None:
    assert AzureVoiceProvider._is_azure_cloud_url(
        "https://westus.api.cognitive.microsoft.com/"
    )


def test_is_azure_cloud_url_rejects_custom_host() -> None:
    assert not AzureVoiceProvider._is_azure_cloud_url("https://my-custom-host.com/")


def test_is_azure_cloud_url_rejects_none() -> None:
    assert not AzureVoiceProvider._is_azure_cloud_url(None)


# --- _extract_speech_region_from_uri ---


def test_extract_region_from_tts_url() -> None:
    assert (
        AzureVoiceProvider._extract_speech_region_from_uri(
            "https://eastus.tts.speech.microsoft.com/cognitiveservices/v1"
        )
        == "eastus"
    )


def test_extract_region_from_cognitive_api_url() -> None:
    assert (
        AzureVoiceProvider._extract_speech_region_from_uri(
            "https://eastus.api.cognitive.microsoft.com/"
        )
        == "eastus"
    )


def test_extract_region_returns_none_for_custom_domain() -> None:
    """Custom domains use resource name, not region — must use speech_region config."""
    assert (
        AzureVoiceProvider._extract_speech_region_from_uri(
            "https://myresource.cognitiveservices.azure.com/"
        )
        is None
    )


def test_extract_region_returns_none_for_none() -> None:
    assert AzureVoiceProvider._extract_speech_region_from_uri(None) is None


# --- _validate_speech_region ---


def test_validate_region_normalizes_to_lowercase() -> None:
    assert AzureVoiceProvider._validate_speech_region("WestUS2") == "westus2"


def test_validate_region_accepts_hyphens() -> None:
    assert AzureVoiceProvider._validate_speech_region("us-east-1") == "us-east-1"


def test_validate_region_rejects_path_traversal() -> None:
    with pytest.raises(ValueError, match="Invalid Azure speech_region"):
        AzureVoiceProvider._validate_speech_region("westus/../../etc")


def test_validate_region_rejects_dots() -> None:
    with pytest.raises(ValueError, match="Invalid Azure speech_region"):
        AzureVoiceProvider._validate_speech_region("west.us")


# --- _pcm16_to_wav ---


def test_pcm16_to_wav_produces_valid_wav() -> None:
    samples = [32767, -32768, 0, 1234]
    pcm_data = struct.pack(f"<{len(samples)}h", *samples)
    wav_bytes = AzureVoiceProvider._pcm16_to_wav(pcm_data, sample_rate=16000)

    with wave.open(io.BytesIO(wav_bytes), "rb") as wav_file:
        assert wav_file.getnchannels() == 1
        assert wav_file.getsampwidth() == 2
        assert wav_file.getframerate() == 16000
        frames = wav_file.readframes(4)
        recovered = struct.unpack(f"<{len(samples)}h", frames)
        assert list(recovered) == samples


# --- URL Construction ---


def test_get_tts_url_cloud() -> None:
    provider = AzureVoiceProvider(
        api_key="key", api_base=None, custom_config={"speech_region": "eastus"}
    )
    assert (
        provider._get_tts_url()
        == "https://eastus.tts.speech.microsoft.com/cognitiveservices/v1"
    )


def test_get_stt_url_cloud() -> None:
    provider = AzureVoiceProvider(
        api_key="key", api_base=None, custom_config={"speech_region": "westus2"}
    )
    assert "westus2.stt.speech.microsoft.com" in provider._get_stt_url()


def test_get_tts_url_self_hosted() -> None:
    provider = AzureVoiceProvider(
        api_key="key", api_base="http://localhost:5000", custom_config={}
    )
    assert provider._get_tts_url() == "http://localhost:5000/cognitiveservices/v1"


def test_get_tts_url_self_hosted_strips_trailing_slash() -> None:
    provider = AzureVoiceProvider(
        api_key="key", api_base="http://localhost:5000/", custom_config={}
    )
    assert provider._get_tts_url() == "http://localhost:5000/cognitiveservices/v1"


# --- _is_self_hosted ---


def test_is_self_hosted_true_for_custom_endpoint() -> None:
    provider = AzureVoiceProvider(
        api_key="key", api_base="http://localhost:5000", custom_config={}
    )
    assert provider._is_self_hosted() is True


def test_is_self_hosted_false_for_azure_cloud() -> None:
    provider = AzureVoiceProvider(
        api_key="key",
        api_base="https://eastus.api.cognitive.microsoft.com/",
        custom_config={},
    )
    assert provider._is_self_hosted() is False


# --- Resampling ---


def test_resample_pcm16_passthrough() -> None:
    from onyx.voice.providers.azure import AzureStreamingTranscriber

    t = AzureStreamingTranscriber.__new__(AzureStreamingTranscriber)
    t.input_sample_rate = 16000
    t.target_sample_rate = 16000

    data = struct.pack("<4h", 100, 200, 300, 400)
    assert t._resample_pcm16(data) == data


def test_resample_pcm16_downsamples() -> None:
    from onyx.voice.providers.azure import AzureStreamingTranscriber

    t = AzureStreamingTranscriber.__new__(AzureStreamingTranscriber)
    t.input_sample_rate = 24000
    t.target_sample_rate = 16000

    input_samples = [1000, 2000, 3000, 4000, 5000, 6000]
    data = struct.pack(f"<{len(input_samples)}h", *input_samples)

    result = t._resample_pcm16(data)
    assert len(result) // 2 == 4


def test_resample_pcm16_empty_data() -> None:
    from onyx.voice.providers.azure import AzureStreamingTranscriber

    t = AzureStreamingTranscriber.__new__(AzureStreamingTranscriber)
    t.input_sample_rate = 24000
    t.target_sample_rate = 16000

    assert t._resample_pcm16(b"") == b""


================================================
FILE: backend/tests/unit/onyx/voice/providers/test_elevenlabs_provider.py
================================================
import struct

from onyx.voice.providers.elevenlabs import _http_to_ws_url
from onyx.voice.providers.elevenlabs import DEFAULT_ELEVENLABS_API_BASE
from onyx.voice.providers.elevenlabs import ElevenLabsSTTMessageType
from onyx.voice.providers.elevenlabs import ElevenLabsVoiceProvider


# --- _http_to_ws_url ---


def test_http_to_ws_url_converts_https_to_wss() -> None:
    assert _http_to_ws_url("https://api.elevenlabs.io") == "wss://api.elevenlabs.io"


def test_http_to_ws_url_converts_http_to_ws() -> None:
    assert _http_to_ws_url("http://localhost:8080") == "ws://localhost:8080"


def test_http_to_ws_url_passes_through_other_schemes() -> None:
    assert _http_to_ws_url("wss://already.ws") == "wss://already.ws"


def test_http_to_ws_url_preserves_path() -> None:
    assert (
        _http_to_ws_url("https://api.elevenlabs.io/v1/tts")
        == "wss://api.elevenlabs.io/v1/tts"
    )


# --- StrEnum comparison ---


def test_stt_message_type_compares_as_string() -> None:
    """StrEnum members should work in string comparisons (e.g. from JSON)."""
    assert str(ElevenLabsSTTMessageType.COMMITTED_TRANSCRIPT) == "committed_transcript"
    assert isinstance(ElevenLabsSTTMessageType.ERROR, str)


# --- Resampling ---


def test_resample_pcm16_passthrough_when_same_rate() -> None:
    from onyx.voice.providers.elevenlabs import ElevenLabsStreamingTranscriber

    t = ElevenLabsStreamingTranscriber.__new__(ElevenLabsStreamingTranscriber)
    t.input_sample_rate = 16000
    t.target_sample_rate = 16000

    data = struct.pack("<4h", 100, 200, 300, 400)
    assert t._resample_pcm16(data) == data


def test_resample_pcm16_downsamples() -> None:
    """24kHz -> 16kHz should produce fewer samples (ratio 3:2)."""
    from onyx.voice.providers.elevenlabs import ElevenLabsStreamingTranscriber

    t = ElevenLabsStreamingTranscriber.__new__(ElevenLabsStreamingTranscriber)
    t.input_sample_rate = 24000
    t.target_sample_rate = 16000

    input_samples = [1000, 2000, 3000, 4000, 5000, 6000]
    data = struct.pack(f"<{len(input_samples)}h", *input_samples)

    result = t._resample_pcm16(data)
    output_samples = struct.unpack(f"<{len(result) // 2}h", result)

    assert len(output_samples) == 4


def test_resample_pcm16_clamps_to_int16_range() -> None:
    from onyx.voice.providers.elevenlabs import ElevenLabsStreamingTranscriber

    t = ElevenLabsStreamingTranscriber.__new__(ElevenLabsStreamingTranscriber)
    t.input_sample_rate = 24000
    t.target_sample_rate = 16000

    input_samples = [32767, -32768, 32767, -32768, 32767, -32768]
    data = struct.pack(f"<{len(input_samples)}h", *input_samples)

    result = t._resample_pcm16(data)
    output_samples = struct.unpack(f"<{len(result) // 2}h", result)
    for s in output_samples:
        assert -32768 <= s <= 32767


# --- Provider Model Defaulting ---


def test_provider_defaults_invalid_stt_model() -> None:
    provider = ElevenLabsVoiceProvider(api_key="test", stt_model="invalid_model")
    assert provider.stt_model == "scribe_v1"


def test_provider_defaults_invalid_tts_model() -> None:
    provider = ElevenLabsVoiceProvider(api_key="test", tts_model="invalid_model")
    assert provider.tts_model == "eleven_multilingual_v2"


def test_provider_accepts_valid_models() -> None:
    provider = ElevenLabsVoiceProvider(
        api_key="test", stt_model="scribe_v2_realtime", tts_model="eleven_turbo_v2_5"
    )
    assert provider.stt_model == "scribe_v2_realtime"
    assert provider.tts_model == "eleven_turbo_v2_5"


def test_provider_defaults_api_base() -> None:
    provider = ElevenLabsVoiceProvider(api_key="test")
    assert provider.api_base == DEFAULT_ELEVENLABS_API_BASE


def test_provider_get_available_voices_returns_copy() -> None:
    provider = ElevenLabsVoiceProvider(api_key="test")
    voices = provider.get_available_voices()
    voices.clear()
    assert len(provider.get_available_voices()) > 0


================================================
FILE: backend/tests/unit/onyx/voice/providers/test_openai_provider.py
================================================
import io
import struct
import wave

from onyx.voice.providers.openai import _create_wav_header
from onyx.voice.providers.openai import _http_to_ws_url
from onyx.voice.providers.openai import OpenAIRealtimeMessageType
from onyx.voice.providers.openai import OpenAIVoiceProvider


# --- _http_to_ws_url ---


def test_http_to_ws_url_converts_https_to_wss() -> None:
    assert _http_to_ws_url("https://api.openai.com") == "wss://api.openai.com"


def test_http_to_ws_url_converts_http_to_ws() -> None:
    assert _http_to_ws_url("http://localhost:9090") == "ws://localhost:9090"


def test_http_to_ws_url_passes_through_ws() -> None:
    assert _http_to_ws_url("wss://already.ws") == "wss://already.ws"


# --- StrEnum comparison ---


def test_realtime_message_type_compares_as_string() -> None:
    assert str(OpenAIRealtimeMessageType.ERROR) == "error"
    assert (
        str(OpenAIRealtimeMessageType.TRANSCRIPTION_DELTA)
        == "conversation.item.input_audio_transcription.delta"
    )
    assert isinstance(OpenAIRealtimeMessageType.ERROR, str)


# --- _create_wav_header ---


def test_wav_header_is_44_bytes() -> None:
    assert len(_create_wav_header(1000)) == 44


def test_wav_header_chunk_size_matches_data_length() -> None:
    data_length = 2000
    header = _create_wav_header(data_length)
    chunk_size = struct.unpack_from("<I", header, 4)[0]
    assert chunk_size == 36 + data_length


def test_wav_header_byte_rate() -> None:
    header = _create_wav_header(100, sample_rate=24000, channels=1, bits_per_sample=16)
    byte_rate = struct.unpack_from("<I", header, 28)[0]
    assert byte_rate == 24000 * 1 * 16 // 8


def test_wav_header_produces_valid_wav() -> None:
    """Header + PCM data should parse as valid WAV."""
    data_length = 100
    pcm_data = b"\x00" * data_length
    header = _create_wav_header(data_length, sample_rate=24000)

    with wave.open(io.BytesIO(header + pcm_data), "rb") as wav_file:
        assert wav_file.getnchannels() == 1
        assert wav_file.getsampwidth() == 2
        assert wav_file.getframerate() == 24000
        assert wav_file.getnframes() == data_length // 2


# --- Provider Defaults ---


def test_provider_default_models() -> None:
    provider = OpenAIVoiceProvider(api_key="test")
    assert provider.stt_model == "whisper-1"
    assert provider.tts_model == "tts-1"
    assert provider.default_voice == "alloy"


def test_provider_custom_models() -> None:
    provider = OpenAIVoiceProvider(
        api_key="test",
        stt_model="gpt-4o-transcribe",
        tts_model="tts-1-hd",
        default_voice="nova",
    )
    assert provider.stt_model == "gpt-4o-transcribe"
    assert provider.tts_model == "tts-1-hd"
    assert provider.default_voice == "nova"


def test_provider_get_available_voices_returns_copy() -> None:
    provider = OpenAIVoiceProvider(api_key="test")
    voices = provider.get_available_voices()
    voices.clear()
    assert len(provider.get_available_voices()) > 0


================================================
FILE: backend/tests/unit/scripts/__init__.py
================================================


================================================
FILE: backend/tests/unit/server/metrics/test_celery_task_metrics.py
================================================
"""Tests for generic Celery task lifecycle Prometheus metrics."""

from collections.abc import Iterator
from unittest.mock import MagicMock

import pytest

from onyx.server.metrics.celery_task_metrics import _task_start_times
from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
from onyx.server.metrics.celery_task_metrics import TASK_COMPLETED
from onyx.server.metrics.celery_task_metrics import TASK_DURATION
from onyx.server.metrics.celery_task_metrics import TASK_STARTED
from onyx.server.metrics.celery_task_metrics import TASKS_ACTIVE


@pytest.fixture(autouse=True)
def reset_metrics() -> Iterator[None]:
    """Clear metric state between tests."""
    _task_start_times.clear()
    yield
    _task_start_times.clear()


def _make_task(name: str = "test_task", queue: str = "test_queue") -> MagicMock:
    task = MagicMock()
    task.name = name
    task.request = MagicMock()
    task.request.delivery_info = {"routing_key": queue}
    return task


class TestCeleryTaskPrerun:
    def test_increments_started_and_active(self) -> None:
        task = _make_task()
        before_started = TASK_STARTED.labels(
            task_name="test_task", queue="test_queue"
        )._value.get()
        before_active = TASKS_ACTIVE.labels(
            task_name="test_task", queue="test_queue"
        )._value.get()

        on_celery_task_prerun("task-1", task)

        after_started = TASK_STARTED.labels(
            task_name="test_task", queue="test_queue"
        )._value.get()
        after_active = TASKS_ACTIVE.labels(
            task_name="test_task", queue="test_queue"
        )._value.get()

        assert after_started == before_started + 1
        assert after_active == before_active + 1

    def test_records_start_time(self) -> None:
        task = _make_task()
        on_celery_task_prerun("task-1", task)
        assert "task-1" in _task_start_times

    def test_noop_when_task_is_none(self) -> None:
        on_celery_task_prerun("task-1", None)
        assert "task-1" not in _task_start_times

    def test_noop_when_task_id_is_none(self) -> None:
        task = _make_task()
        on_celery_task_prerun(None, task)
        # Should not crash

    def test_handles_missing_delivery_info(self) -> None:
        task = _make_task()
        task.request.delivery_info = None
        on_celery_task_prerun("task-1", task)
        assert "task-1" in _task_start_times


class TestCeleryTaskPostrun:
    def test_increments_completed_success(self) -> None:
        task = _make_task()
        on_celery_task_prerun("task-1", task)

        before = TASK_COMPLETED.labels(
            task_name="test_task", queue="test_queue", outcome="success"
        )._value.get()

        on_celery_task_postrun("task-1", task, "SUCCESS")

        after = TASK_COMPLETED.labels(
            task_name="test_task", queue="test_queue", outcome="success"
        )._value.get()
        assert after == before + 1

    def test_increments_completed_failure(self) -> None:
        task = _make_task()
        on_celery_task_prerun("task-1", task)

        before = TASK_COMPLETED.labels(
            task_name="test_task", queue="test_queue", outcome="failure"
        )._value.get()

        on_celery_task_postrun("task-1", task, "FAILURE")

        after = TASK_COMPLETED.labels(
            task_name="test_task", queue="test_queue", outcome="failure"
        )._value.get()
        assert after == before + 1

    def test_decrements_active(self) -> None:
        task = _make_task()
        on_celery_task_prerun("task-1", task)

        active_before = TASKS_ACTIVE.labels(
            task_name="test_task", queue="test_queue"
        )._value.get()

        on_celery_task_postrun("task-1", task, "SUCCESS")

        active_after = TASKS_ACTIVE.labels(
            task_name="test_task", queue="test_queue"
        )._value.get()
        assert active_after == active_before - 1

    def test_observes_duration(self) -> None:
        task = _make_task()
        on_celery_task_prerun("task-1", task)

        before_count = TASK_DURATION.labels(
            task_name="test_task", queue="test_queue"
        )._sum.get()

        on_celery_task_postrun("task-1", task, "SUCCESS")

        after_count = TASK_DURATION.labels(
            task_name="test_task", queue="test_queue"
        )._sum.get()
        # Duration should have increased (at least slightly)
        assert after_count > before_count

    def test_cleans_up_start_time(self) -> None:
        task = _make_task()
        on_celery_task_prerun("task-1", task)
        assert "task-1" in _task_start_times

        on_celery_task_postrun("task-1", task, "SUCCESS")
        assert "task-1" not in _task_start_times

    def test_noop_when_task_is_none(self) -> None:
        on_celery_task_postrun("task-1", None, "SUCCESS")

    def test_handles_missing_start_time(self) -> None:
        """Postrun without prerun should not crash."""
        task = _make_task()
        on_celery_task_postrun("task-1", task, "SUCCESS")
        # Should not raise


================================================
FILE: backend/tests/unit/server/metrics/test_indexing_pipeline_collectors.py
================================================
"""Tests for indexing pipeline Prometheus collectors."""

from collections.abc import Iterator
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
from onyx.server.metrics.indexing_pipeline import QueueDepthCollector


@pytest.fixture(autouse=True)
def _mock_broker_client() -> Iterator[None]:
    """Patch celery_get_broker_client for all collector tests."""
    with patch(
        "onyx.background.celery.celery_redis.celery_get_broker_client",
        return_value=MagicMock(),
    ):
        yield


class TestQueueDepthCollector:
    def test_returns_empty_when_factory_not_set(self) -> None:
        collector = QueueDepthCollector()
        assert collector.collect() == []

    def test_returns_empty_describe(self) -> None:
        collector = QueueDepthCollector()
        assert collector.describe() == []

    def test_collects_queue_depths(self) -> None:
        collector = QueueDepthCollector(cache_ttl=0)
        collector.set_celery_app(MagicMock())

        with (
            patch(
                "onyx.server.metrics.indexing_pipeline.celery_get_queue_length",
                return_value=5,
            ),
            patch(
                "onyx.server.metrics.indexing_pipeline.celery_get_unacked_task_ids",
                return_value={"task-1", "task-2"},
            ),
        ):
            families = collector.collect()

        assert len(families) == 3
        depth_family = families[0]
        unacked_family = families[1]
        age_family = families[2]

        assert depth_family.name == "onyx_queue_depth"
        assert len(depth_family.samples) > 0
        for sample in depth_family.samples:
            assert sample.value == 5

        assert unacked_family.name == "onyx_queue_unacked"
        unacked_labels = {s.labels["queue"] for s in unacked_family.samples}
        assert "docfetching" in unacked_labels
        assert "docprocessing" in unacked_labels

        assert age_family.name == "onyx_queue_oldest_task_age_seconds"
        for sample in unacked_family.samples:
            assert sample.value == 2

    def test_handles_redis_error_gracefully(self) -> None:
        collector = QueueDepthCollector(cache_ttl=0)
        MagicMock()
        collector.set_celery_app(MagicMock())

        with patch(
            "onyx.server.metrics.indexing_pipeline.celery_get_queue_length",
            side_effect=Exception("connection lost"),
        ):
            families = collector.collect()

        # Returns stale cache (empty on first call)
        assert families == []

    def test_caching_returns_stale_within_ttl(self) -> None:
        collector = QueueDepthCollector(cache_ttl=60)
        MagicMock()
        collector.set_celery_app(MagicMock())

        with (
            patch(
                "onyx.server.metrics.indexing_pipeline.celery_get_queue_length",
                return_value=5,
            ),
            patch(
                "onyx.server.metrics.indexing_pipeline.celery_get_unacked_task_ids",
                return_value=set(),
            ),
        ):
            first = collector.collect()

        # Second call within TTL should return cached result without calling Redis
        with patch(
            "onyx.server.metrics.indexing_pipeline.celery_get_queue_length",
            side_effect=Exception("should not be called"),
        ):
            second = collector.collect()

        assert first is second  # Same object, from cache

    def test_error_returns_stale_cache(self) -> None:
        collector = QueueDepthCollector(cache_ttl=0)
        MagicMock()
        collector.set_celery_app(MagicMock())

        # First call succeeds
        with (
            patch(
                "onyx.server.metrics.indexing_pipeline.celery_get_queue_length",
                return_value=10,
            ),
            patch(
                "onyx.server.metrics.indexing_pipeline.celery_get_unacked_task_ids",
                return_value=set(),
            ),
        ):
            good_result = collector.collect()

        assert len(good_result) == 3
        assert good_result[0].samples[0].value == 10

        # Second call fails — should return stale cache, not empty
        with patch(
            "onyx.server.metrics.indexing_pipeline.celery_get_queue_length",
            side_effect=Exception("Redis down"),
        ):
            stale_result = collector.collect()

        assert stale_result is good_result


class TestIndexAttemptCollector:
    def test_returns_empty_when_not_configured(self) -> None:
        collector = IndexAttemptCollector()
        assert collector.collect() == []

    def test_returns_empty_describe(self) -> None:
        collector = IndexAttemptCollector()
        assert collector.describe() == []

    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
    def test_collects_index_attempts(
        self,
        mock_get_session: MagicMock,
        mock_get_tenants: MagicMock,
    ) -> None:
        collector = IndexAttemptCollector(cache_ttl=0)
        collector.configure()

        mock_get_tenants.return_value = ["public"]

        mock_session = MagicMock()
        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)

        from onyx.db.enums import IndexingStatus

        mock_row = (
            IndexingStatus.IN_PROGRESS,
            MagicMock(value="web"),
            81,
            "Table Tennis Blade Guide",
            2,
        )
        mock_session.query.return_value.join.return_value.join.return_value.filter.return_value.group_by.return_value.all.return_value = [
            mock_row
        ]

        families = collector.collect()
        assert len(families) == 1
        assert families[0].name == "onyx_index_attempts_active"
        assert len(families[0].samples) == 1
        sample = families[0].samples[0]
        assert sample.labels == {
            "status": "in_progress",
            "source": "web",
            "tenant_id": "public",
            "connector_name": "Table Tennis Blade Guide",
            "cc_pair_id": "81",
        }
        assert sample.value == 2

    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
    def test_handles_db_error_gracefully(
        self,
        mock_get_tenants: MagicMock,
    ) -> None:
        collector = IndexAttemptCollector(cache_ttl=0)
        collector.configure()

        mock_get_tenants.side_effect = Exception("DB down")
        families = collector.collect()
        # No stale cache, so returns empty
        assert families == []

    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
    def test_skips_none_tenant_ids(
        self,
        mock_get_tenants: MagicMock,
    ) -> None:
        collector = IndexAttemptCollector(cache_ttl=0)
        collector.configure()

        mock_get_tenants.return_value = [None]
        families = collector.collect()
        assert len(families) == 1  # Returns the gauge family, just with no samples
        assert len(families[0].samples) == 0


class TestConnectorHealthCollector:
    def test_returns_empty_when_not_configured(self) -> None:
        collector = ConnectorHealthCollector()
        assert collector.collect() == []

    def test_returns_empty_describe(self) -> None:
        collector = ConnectorHealthCollector()
        assert collector.describe() == []

    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
    def test_collects_connector_health(
        self,
        mock_get_session: MagicMock,
        mock_get_tenants: MagicMock,
    ) -> None:
        collector = ConnectorHealthCollector(cache_ttl=0)
        collector.configure()

        mock_get_tenants.return_value = ["public"]

        mock_session = MagicMock()
        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)

        now = datetime.now(tz=timezone.utc)
        last_success = now - timedelta(hours=2)

        mock_status = MagicMock(value="ACTIVE")
        mock_source = MagicMock(value="google_drive")
        # Row: (id, status, in_error, last_success, name, source)
        mock_row = (
            42,
            mock_status,
            True,  # in_repeated_error_state
            last_success,
            "My GDrive Connector",
            mock_source,
        )
        mock_session.query.return_value.join.return_value.all.return_value = [mock_row]

        # Mock the index attempt queries (error counts + docs counts)
        mock_session.query.return_value.filter.return_value.group_by.return_value.all.return_value = (
            []
        )

        families = collector.collect()

        assert len(families) == 6
        names = {f.name for f in families}
        assert names == {
            "onyx_connector_last_success_age_seconds",
            "onyx_connector_in_error_state",
            "onyx_connectors_by_status",
            "onyx_connectors_in_error_total",
            "onyx_connector_docs_indexed",
            "onyx_connector_error_count",
        }

        staleness = next(
            f for f in families if f.name == "onyx_connector_last_success_age_seconds"
        )
        assert len(staleness.samples) == 1
        assert staleness.samples[0].value == pytest.approx(7200, abs=5)

        error_state = next(
            f for f in families if f.name == "onyx_connector_in_error_state"
        )
        assert error_state.samples[0].value == 1.0

        by_status = next(f for f in families if f.name == "onyx_connectors_by_status")
        assert by_status.samples[0].labels == {
            "tenant_id": "public",
            "status": "ACTIVE",
        }
        assert by_status.samples[0].value == 1

        error_total = next(
            f for f in families if f.name == "onyx_connectors_in_error_total"
        )
        assert error_total.samples[0].value == 1

    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
    def test_skips_staleness_when_no_last_success(
        self,
        mock_get_session: MagicMock,
        mock_get_tenants: MagicMock,
    ) -> None:
        collector = ConnectorHealthCollector(cache_ttl=0)
        collector.configure()

        mock_get_tenants.return_value = ["public"]

        mock_session = MagicMock()
        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)

        mock_status = MagicMock(value="INITIAL_INDEXING")
        mock_source = MagicMock(value="slack")
        mock_row = (
            10,
            mock_status,
            False,
            None,  # no last_successful_index_time
            0,
            mock_source,
        )
        mock_session.query.return_value.join.return_value.all.return_value = [mock_row]

        families = collector.collect()

        staleness = next(
            f for f in families if f.name == "onyx_connector_last_success_age_seconds"
        )
        assert len(staleness.samples) == 0

    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
    def test_handles_db_error_gracefully(
        self,
        mock_get_tenants: MagicMock,
    ) -> None:
        collector = ConnectorHealthCollector(cache_ttl=0)
        collector.configure()

        mock_get_tenants.side_effect = Exception("DB down")
        families = collector.collect()
        assert families == []


================================================
FILE: backend/tests/unit/server/metrics/test_indexing_pipeline_setup.py
================================================
"""Tests for indexing pipeline setup."""

from unittest.mock import MagicMock

from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
from onyx.server.metrics.indexing_pipeline import RedisHealthCollector


class TestCollectorCeleryAppSetup:
    def test_queue_depth_collector_uses_celery_app(self) -> None:
        """QueueDepthCollector.set_celery_app stores the app for broker access."""
        collector = QueueDepthCollector()
        mock_app = MagicMock()
        collector.set_celery_app(mock_app)
        assert collector._celery_app is mock_app

    def test_redis_health_collector_uses_celery_app(self) -> None:
        """RedisHealthCollector.set_celery_app stores the app for broker access."""
        collector = RedisHealthCollector()
        mock_app = MagicMock()
        collector.set_celery_app(mock_app)
        assert collector._celery_app is mock_app


================================================
FILE: backend/tests/unit/server/metrics/test_indexing_task_metrics.py
================================================
"""Tests for per-connector indexing task Prometheus metrics."""

from collections.abc import Iterator
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.server.metrics.indexing_task_metrics import _connector_cache
from onyx.server.metrics.indexing_task_metrics import _indexing_start_times
from onyx.server.metrics.indexing_task_metrics import ConnectorInfo
from onyx.server.metrics.indexing_task_metrics import INDEXING_TASK_COMPLETED
from onyx.server.metrics.indexing_task_metrics import INDEXING_TASK_DURATION
from onyx.server.metrics.indexing_task_metrics import INDEXING_TASK_STARTED
from onyx.server.metrics.indexing_task_metrics import on_indexing_task_postrun
from onyx.server.metrics.indexing_task_metrics import on_indexing_task_prerun


@pytest.fixture(autouse=True)
def reset_state() -> Iterator[None]:
    """Clear caches and state between tests.

    Sets CURRENT_TENANT_ID_CONTEXTVAR to a realistic value so cache keys
    are never keyed on an empty string.
    """
    from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

    token = CURRENT_TENANT_ID_CONTEXTVAR.set("test_tenant")
    _connector_cache.clear()
    _indexing_start_times.clear()
    yield
    _connector_cache.clear()
    _indexing_start_times.clear()
    CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


def _make_task(name: str) -> MagicMock:
    task = MagicMock()
    task.name = name
    return task


def _mock_db_lookup(
    source: str = "google_drive", name: str = "My Google Drive"
) -> tuple:
    """Return (session_patch, cc_pair_patch) context managers for DB mocking."""
    mock_cc_pair = MagicMock()
    mock_cc_pair.name = name
    mock_cc_pair.connector.source.value = source

    session_patch = patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
    cc_pair_patch = patch(
        "onyx.db.connector_credential_pair.get_connector_credential_pair_from_id",
        return_value=mock_cc_pair,
    )
    return session_patch, cc_pair_patch


class TestIndexingTaskPrerun:
    def test_skips_non_indexing_task(self) -> None:
        task = _make_task("some_other_task")
        kwargs = {"cc_pair_id": 1, "tenant_id": "public"}
        on_indexing_task_prerun("task-1", task, kwargs)
        assert "task-1" not in _indexing_start_times

    def test_emits_started_for_docfetching(self) -> None:
        # Pre-populate cache to avoid DB lookup (tenant-scoped key)
        _connector_cache[("test_tenant", 42)] = ConnectorInfo(
            source="google_drive", name="My Google Drive"
        )

        task = _make_task("connector_doc_fetching_task")
        kwargs = {"cc_pair_id": 42, "tenant_id": "tenant-1"}

        before = INDEXING_TASK_STARTED.labels(
            task_name="connector_doc_fetching_task",
            source="google_drive",
            tenant_id="tenant-1",
            cc_pair_id="42",
        )._value.get()

        on_indexing_task_prerun("task-1", task, kwargs)

        after = INDEXING_TASK_STARTED.labels(
            task_name="connector_doc_fetching_task",
            source="google_drive",
            tenant_id="tenant-1",
            cc_pair_id="42",
        )._value.get()

        assert after == before + 1
        assert "task-1" in _indexing_start_times

    def test_emits_started_for_docprocessing(self) -> None:
        _connector_cache[("test_tenant", 10)] = ConnectorInfo(
            source="slack", name="Slack Connector"
        )

        task = _make_task("docprocessing_task")
        kwargs = {"cc_pair_id": 10, "tenant_id": "public"}

        on_indexing_task_prerun("task-2", task, kwargs)
        assert "task-2" in _indexing_start_times

    def test_cache_hit_avoids_db_call(self) -> None:
        _connector_cache[("test_tenant", 42)] = ConnectorInfo(
            source="confluence", name="Engineering Confluence"
        )

        task = _make_task("connector_doc_fetching_task")
        kwargs = {"cc_pair_id": 42, "tenant_id": "public"}

        # No DB patches needed — cache should be used
        on_indexing_task_prerun("task-1", task, kwargs)
        assert "task-1" in _indexing_start_times

    def test_db_lookup_on_cache_miss(self) -> None:
        """On first encounter of a cc_pair_id, does a DB lookup and caches."""
        mock_cc_pair = MagicMock()
        mock_cc_pair.name = "Notion Workspace"
        mock_cc_pair.connector.source.value = "notion"

        mock_session = MagicMock()
        mock_session.__enter__ = MagicMock(return_value=MagicMock())
        mock_session.__exit__ = MagicMock(return_value=False)

        with (
            patch(
                "onyx.server.metrics.indexing_task_metrics._resolve_connector"
            ) as mock_resolve,
        ):
            mock_resolve.return_value = ConnectorInfo(
                source="notion", name="Notion Workspace"
            )

            task = _make_task("connector_doc_fetching_task")
            kwargs = {"cc_pair_id": 77, "tenant_id": "public"}

            on_indexing_task_prerun("task-1", task, kwargs)
            mock_resolve.assert_called_once_with(77)

    def test_missing_cc_pair_returns_unknown(self) -> None:
        """When _resolve_connector can't find the cc_pair, uses 'unknown'."""
        with patch(
            "onyx.server.metrics.indexing_task_metrics._resolve_connector"
        ) as mock_resolve:
            mock_resolve.return_value = ConnectorInfo(source="unknown", name="unknown")

            task = _make_task("connector_doc_fetching_task")
            kwargs = {"cc_pair_id": 999, "tenant_id": "public"}

            on_indexing_task_prerun("task-1", task, kwargs)
            assert "task-1" in _indexing_start_times

    def test_skips_when_cc_pair_id_missing(self) -> None:
        task = _make_task("connector_doc_fetching_task")
        kwargs = {"tenant_id": "public"}
        on_indexing_task_prerun("task-1", task, kwargs)
        assert "task-1" not in _indexing_start_times

    def test_db_error_does_not_crash(self) -> None:
        with patch(
            "onyx.server.metrics.indexing_task_metrics._resolve_connector",
            side_effect=Exception("DB down"),
        ):
            task = _make_task("connector_doc_fetching_task")
            kwargs = {"cc_pair_id": 1, "tenant_id": "public"}
            # Should not raise
            on_indexing_task_prerun("task-1", task, kwargs)


class TestIndexingTaskPostrun:
    def test_skips_non_indexing_task(self) -> None:
        task = _make_task("some_other_task")
        kwargs = {"cc_pair_id": 1, "tenant_id": "public"}
        on_indexing_task_postrun("task-1", task, kwargs, "SUCCESS")
        # Should not raise

    def test_emits_completed_and_duration(self) -> None:
        _connector_cache[("test_tenant", 42)] = ConnectorInfo(
            source="google_drive", name="Marketing Drive"
        )

        task = _make_task("docprocessing_task")
        kwargs = {"cc_pair_id": 42, "tenant_id": "public"}

        # Simulate prerun
        on_indexing_task_prerun("task-1", task, kwargs)

        before_completed = INDEXING_TASK_COMPLETED.labels(
            task_name="docprocessing_task",
            source="google_drive",
            tenant_id="public",
            cc_pair_id="42",
            outcome="success",
        )._value.get()

        before_duration = INDEXING_TASK_DURATION.labels(
            task_name="docprocessing_task",
            source="google_drive",
            tenant_id="public",
        )._sum.get()

        on_indexing_task_postrun("task-1", task, kwargs, "SUCCESS")

        after_completed = INDEXING_TASK_COMPLETED.labels(
            task_name="docprocessing_task",
            source="google_drive",
            tenant_id="public",
            cc_pair_id="42",
            outcome="success",
        )._value.get()

        after_duration = INDEXING_TASK_DURATION.labels(
            task_name="docprocessing_task",
            source="google_drive",
            tenant_id="public",
        )._sum.get()

        assert after_completed == before_completed + 1
        assert after_duration > before_duration

    def test_failure_outcome(self) -> None:
        _connector_cache[("test_tenant", 42)] = ConnectorInfo(
            source="slack", name="Slack"
        )

        task = _make_task("connector_doc_fetching_task")
        kwargs = {"cc_pair_id": 42, "tenant_id": "public"}

        on_indexing_task_prerun("task-1", task, kwargs)

        before = INDEXING_TASK_COMPLETED.labels(
            task_name="connector_doc_fetching_task",
            source="slack",
            tenant_id="public",
            cc_pair_id="42",
            outcome="failure",
        )._value.get()

        on_indexing_task_postrun("task-1", task, kwargs, "FAILURE")

        after = INDEXING_TASK_COMPLETED.labels(
            task_name="connector_doc_fetching_task",
            source="slack",
            tenant_id="public",
            cc_pair_id="42",
            outcome="failure",
        )._value.get()

        assert after == before + 1

    def test_handles_postrun_without_prerun(self) -> None:
        """Postrun for an indexing task without a matching prerun should not crash."""
        _connector_cache[("test_tenant", 42)] = ConnectorInfo(
            source="slack", name="Slack"
        )

        task = _make_task("docprocessing_task")
        kwargs = {"cc_pair_id": 42, "tenant_id": "public"}

        # No prerun — should still emit completed counter, just skip duration
        on_indexing_task_postrun("task-1", task, kwargs, "SUCCESS")


class TestResolveConnector:
    def test_failed_lookup_not_cached(self) -> None:
        """When DB lookup returns None, result should NOT be cached."""
        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

        token = CURRENT_TENANT_ID_CONTEXTVAR.set("test-tenant")
        try:
            with (
                patch("onyx.db.engine.sql_engine.get_session_with_current_tenant"),
                patch(
                    "onyx.db.connector_credential_pair"
                    ".get_connector_credential_pair_from_id",
                    return_value=None,
                ),
            ):
                from onyx.server.metrics.indexing_task_metrics import _resolve_connector

                result = _resolve_connector(999)
                assert result.source == "unknown"
                # Should NOT be cached so subsequent calls can retry
                assert ("test-tenant", 999) not in _connector_cache
        finally:
            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

    def test_exception_not_cached(self) -> None:
        """When DB lookup raises, result should NOT be cached."""
        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

        token = CURRENT_TENANT_ID_CONTEXTVAR.set("test-tenant")
        try:
            with (
                patch(
                    "onyx.db.engine.sql_engine.get_session_with_current_tenant",
                    side_effect=Exception("DB down"),
                ),
            ):
                from onyx.server.metrics.indexing_task_metrics import _resolve_connector

                result = _resolve_connector(888)
                assert result.source == "unknown"
                assert ("test-tenant", 888) not in _connector_cache
        finally:
            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

    def test_successful_lookup_is_cached(self) -> None:
        """When DB lookup succeeds, result should be cached."""
        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

        token = CURRENT_TENANT_ID_CONTEXTVAR.set("test-tenant")
        try:
            mock_cc_pair = MagicMock()
            mock_cc_pair.name = "My Drive"
            mock_cc_pair.connector.source.value = "google_drive"

            with (
                patch("onyx.db.engine.sql_engine.get_session_with_current_tenant"),
                patch(
                    "onyx.db.connector_credential_pair"
                    ".get_connector_credential_pair_from_id",
                    return_value=mock_cc_pair,
                ),
            ):
                from onyx.server.metrics.indexing_task_metrics import _resolve_connector

                result = _resolve_connector(777)
                assert result.source == "google_drive"
                assert result.name == "My Drive"
                assert ("test-tenant", 777) in _connector_cache
        finally:
            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


================================================
FILE: backend/tests/unit/server/metrics/test_metrics_server.py
================================================
"""Tests for the Prometheus metrics server module."""

from collections.abc import Iterator
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.server.metrics.metrics_server import _DEFAULT_PORTS
from onyx.server.metrics.metrics_server import start_metrics_server


@pytest.fixture(autouse=True)
def reset_server_state() -> Iterator[None]:
    """Reset the global _server_started between tests."""
    import onyx.server.metrics.metrics_server as mod

    mod._server_started = False
    yield
    mod._server_started = False


class TestStartMetricsServer:
    @patch("onyx.server.metrics.metrics_server.start_http_server")
    def test_uses_default_port_for_known_worker(self, mock_start: MagicMock) -> None:
        port = start_metrics_server("monitoring")
        assert port == _DEFAULT_PORTS["monitoring"]
        mock_start.assert_called_once_with(_DEFAULT_PORTS["monitoring"])

    @patch("onyx.server.metrics.metrics_server.start_http_server")
    @patch.dict("os.environ", {"PROMETHEUS_METRICS_PORT": "9999"})
    def test_env_var_overrides_default(self, mock_start: MagicMock) -> None:
        port = start_metrics_server("monitoring")
        assert port == 9999
        mock_start.assert_called_once_with(9999)

    @patch("onyx.server.metrics.metrics_server.start_http_server")
    @patch.dict("os.environ", {"PROMETHEUS_METRICS_ENABLED": "false"})
    def test_disabled_via_env_var(self, mock_start: MagicMock) -> None:
        port = start_metrics_server("monitoring")
        assert port is None
        mock_start.assert_not_called()

    @patch("onyx.server.metrics.metrics_server.start_http_server")
    def test_unknown_worker_type_no_env_var(self, mock_start: MagicMock) -> None:
        port = start_metrics_server("unknown_worker")
        assert port is None
        mock_start.assert_not_called()

    @patch("onyx.server.metrics.metrics_server.start_http_server")
    def test_idempotent(self, mock_start: MagicMock) -> None:
        port1 = start_metrics_server("monitoring")
        port2 = start_metrics_server("monitoring")
        assert port1 == _DEFAULT_PORTS["monitoring"]
        assert port2 is None
        mock_start.assert_called_once()

    @patch("onyx.server.metrics.metrics_server.start_http_server")
    def test_handles_os_error(self, mock_start: MagicMock) -> None:
        mock_start.side_effect = OSError("Address already in use")
        port = start_metrics_server("monitoring")
        assert port is None

    @patch("onyx.server.metrics.metrics_server.start_http_server")
    @patch.dict("os.environ", {"PROMETHEUS_METRICS_PORT": "not_a_number"})
    def test_invalid_port_env_var_returns_none(self, mock_start: MagicMock) -> None:
        port = start_metrics_server("monitoring")
        assert port is None
        mock_start.assert_not_called()


================================================
FILE: backend/tests/unit/server/metrics/test_opensearch_search_metrics.py
================================================
"""Tests for OpenSearch search Prometheus metrics."""

from unittest.mock import patch

from onyx.document_index.opensearch.constants import OpenSearchSearchType
from onyx.server.metrics.opensearch_search import _client_duration
from onyx.server.metrics.opensearch_search import _search_total
from onyx.server.metrics.opensearch_search import _searches_in_progress
from onyx.server.metrics.opensearch_search import _server_duration
from onyx.server.metrics.opensearch_search import observe_opensearch_search
from onyx.server.metrics.opensearch_search import track_opensearch_search_in_progress


class TestObserveOpenSearchSearch:
    def test_increments_counter(self) -> None:
        search_type = OpenSearchSearchType.HYBRID
        before = _search_total.labels(search_type=search_type.value)._value.get()
        observe_opensearch_search(search_type, 0.1, 50)
        after = _search_total.labels(search_type=search_type.value)._value.get()
        assert after == before + 1

    def test_observes_client_duration(self) -> None:
        search_type = OpenSearchSearchType.KEYWORD
        before_sum = _client_duration.labels(search_type=search_type.value)._sum.get()
        observe_opensearch_search(search_type, 0.25, 100)
        after_sum = _client_duration.labels(search_type=search_type.value)._sum.get()
        assert after_sum == before_sum + 0.25

    def test_observes_server_duration(self) -> None:
        search_type = OpenSearchSearchType.SEMANTIC
        before_sum = _server_duration.labels(search_type=search_type.value)._sum.get()
        observe_opensearch_search(search_type, 0.3, 200)
        after_sum = _server_duration.labels(search_type=search_type.value)._sum.get()
        # 200ms should be recorded as 0.2s.
        assert after_sum == before_sum + 0.2

    def test_server_took_none_skips_server_histogram(self) -> None:
        search_type = OpenSearchSearchType.UNKNOWN
        before_server = _server_duration.labels(
            search_type=search_type.value
        )._sum.get()
        before_client = _client_duration.labels(
            search_type=search_type.value
        )._sum.get()
        before_total = _search_total.labels(search_type=search_type.value)._value.get()

        observe_opensearch_search(search_type, 0.1, None)

        # Server histogram should NOT be observed.
        after_server = _server_duration.labels(search_type=search_type.value)._sum.get()
        assert after_server == before_server

        # Client histogram and counter should still work.
        after_client = _client_duration.labels(search_type=search_type.value)._sum.get()
        after_total = _search_total.labels(search_type=search_type.value)._value.get()
        assert after_client == before_client + 0.1
        assert after_total == before_total + 1

    def test_exceptions_do_not_propagate(self) -> None:
        search_type = OpenSearchSearchType.RANDOM
        with patch.object(
            _search_total.labels(search_type=search_type.value),
            "inc",
            side_effect=RuntimeError("boom"),
        ):
            # Should not raise.
            observe_opensearch_search(search_type, 0.1, 50)


class TestTrackOpenSearchSearchInProgress:
    def test_gauge_increments_and_decrements(self) -> None:
        search_type = OpenSearchSearchType.HYBRID
        before = _searches_in_progress.labels(
            search_type=search_type.value
        )._value.get()

        with track_opensearch_search_in_progress(search_type):
            during = _searches_in_progress.labels(
                search_type=search_type.value
            )._value.get()
            assert during == before + 1

        after = _searches_in_progress.labels(search_type=search_type.value)._value.get()
        assert after == before

    def test_gauge_decrements_on_exception(self) -> None:
        search_type = OpenSearchSearchType.SEMANTIC
        before = _searches_in_progress.labels(
            search_type=search_type.value
        )._value.get()

        raised = False
        try:
            with track_opensearch_search_in_progress(search_type):
                raise ValueError("simulated search failure")
        except ValueError:
            raised = True
        assert raised

        after = _searches_in_progress.labels(search_type=search_type.value)._value.get()
        assert after == before

    def test_inc_exception_does_not_break_search(self) -> None:
        search_type = OpenSearchSearchType.KEYWORD
        before = _searches_in_progress.labels(
            search_type=search_type.value
        )._value.get()

        with patch.object(
            _searches_in_progress.labels(search_type=search_type.value),
            "inc",
            side_effect=RuntimeError("boom"),
        ):
            # Context manager should still yield without decrementing.
            with track_opensearch_search_in_progress(search_type):
                # Search logic would execute here.
                during = _searches_in_progress.labels(
                    search_type=search_type.value
                )._value.get()
                assert during == before

        after = _searches_in_progress.labels(search_type=search_type.value)._value.get()
        assert after == before


================================================
FILE: backend/tests/unit/server/metrics/test_worker_health.py
================================================
"""Tests for WorkerHeartbeatMonitor and WorkerHealthCollector."""

import time
from unittest.mock import MagicMock

from onyx.server.metrics.indexing_pipeline import WorkerHealthCollector
from onyx.server.metrics.indexing_pipeline import WorkerHeartbeatMonitor


class TestWorkerHeartbeatMonitor:
    def test_heartbeat_registers_worker(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        monitor._on_heartbeat({"hostname": "primary@host1"})

        status = monitor.get_worker_status()
        assert "primary@host1" in status
        assert status["primary@host1"] is True

    def test_multiple_workers(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        monitor._on_heartbeat({"hostname": "primary@host1"})
        monitor._on_heartbeat({"hostname": "docfetching@host1"})
        monitor._on_heartbeat({"hostname": "monitoring@host1"})

        status = monitor.get_worker_status()
        assert len(status) == 3
        assert all(alive for alive in status.values())

    def test_offline_removes_worker(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        monitor._on_heartbeat({"hostname": "primary@host1"})
        monitor._on_offline({"hostname": "primary@host1"})

        status = monitor.get_worker_status()
        assert "primary@host1" not in status

    def test_stale_heartbeat_marks_worker_down(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        with monitor._lock:
            monitor._worker_last_seen["primary@host1"] = (
                time.monotonic() - monitor._HEARTBEAT_TIMEOUT_SECONDS - 10
            )

        status = monitor.get_worker_status()
        assert status["primary@host1"] is False

    def test_very_stale_worker_is_pruned(self) -> None:
        """Workers dead for 2x the timeout are pruned from the dict."""
        monitor = WorkerHeartbeatMonitor(MagicMock())
        with monitor._lock:
            monitor._worker_last_seen["gone@host1"] = (
                time.monotonic() - monitor._HEARTBEAT_TIMEOUT_SECONDS * 2 - 10
            )

        status = monitor.get_worker_status()
        assert "gone@host1" not in status
        assert monitor.get_worker_status() == {}

    def test_heartbeat_refreshes_stale_worker(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        with monitor._lock:
            monitor._worker_last_seen["primary@host1"] = (
                time.monotonic() - monitor._HEARTBEAT_TIMEOUT_SECONDS - 10
            )
        assert monitor.get_worker_status()["primary@host1"] is False

        monitor._on_heartbeat({"hostname": "primary@host1"})
        assert monitor.get_worker_status()["primary@host1"] is True

    def test_ignores_empty_hostname(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        monitor._on_heartbeat({})
        monitor._on_heartbeat({"hostname": ""})
        monitor._on_offline({})

        assert monitor.get_worker_status() == {}

    def test_returns_full_hostname_as_key(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        monitor._on_heartbeat({"hostname": "docprocessing@my-long-host.local"})

        status = monitor.get_worker_status()
        assert "docprocessing@my-long-host.local" in status

    def test_start_is_idempotent(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        # Mock the thread so we don't actually start one
        mock_thread = MagicMock()
        mock_thread.is_alive.return_value = True
        monitor._thread = mock_thread
        monitor._running = True

        # Second start should be a no-op
        monitor.start()
        # Thread constructor should not have been called again
        assert monitor._thread is mock_thread

    def test_thread_safety(self) -> None:
        """get_worker_status should not raise even if heartbeats arrive concurrently."""
        monitor = WorkerHeartbeatMonitor(MagicMock())
        monitor._on_heartbeat({"hostname": "primary@host1"})
        status = monitor.get_worker_status()
        monitor._on_heartbeat({"hostname": "primary@host1"})
        status2 = monitor.get_worker_status()
        assert status == status2


class TestWorkerHealthCollector:
    def test_returns_empty_when_no_monitor(self) -> None:
        collector = WorkerHealthCollector(cache_ttl=0)
        assert collector.collect() == []

    def test_collects_active_workers(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        monitor._on_heartbeat({"hostname": "primary@host1"})
        monitor._on_heartbeat({"hostname": "docfetching@host1"})
        monitor._on_heartbeat({"hostname": "monitoring@host1"})

        collector = WorkerHealthCollector(cache_ttl=0)
        collector.set_monitor(monitor)

        families = collector.collect()
        assert len(families) == 2

        active = families[0]
        assert active.name == "onyx_celery_active_worker_count"
        assert active.samples[0].value == 3

        up = families[1]
        assert up.name == "onyx_celery_worker_up"
        assert len(up.samples) == 3
        # Labels use short names (before @)
        labels = {s.labels["worker"] for s in up.samples}
        assert labels == {"primary", "docfetching", "monitoring"}
        for sample in up.samples:
            assert sample.value == 1

    def test_reports_dead_worker(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        monitor._on_heartbeat({"hostname": "primary@host1"})
        with monitor._lock:
            monitor._worker_last_seen["monitoring@host1"] = (
                time.monotonic() - monitor._HEARTBEAT_TIMEOUT_SECONDS - 10
            )

        collector = WorkerHealthCollector(cache_ttl=0)
        collector.set_monitor(monitor)

        families = collector.collect()
        active = families[0]
        assert active.samples[0].value == 1

        up = families[1]
        samples_by_name = {s.labels["worker"]: s.value for s in up.samples}
        assert samples_by_name["primary"] == 1
        assert samples_by_name["monitoring"] == 0

    def test_empty_monitor_returns_zero(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())

        collector = WorkerHealthCollector(cache_ttl=0)
        collector.set_monitor(monitor)

        families = collector.collect()
        assert len(families) == 2
        active = families[0]
        assert active.samples[0].value == 0
        up = families[1]
        assert up.name == "onyx_celery_worker_up"
        assert len(up.samples) == 0


================================================
FILE: backend/tests/unit/tools/__init__.py
================================================


================================================
FILE: backend/tests/unit/tools/test_memory_tool_packets.py
================================================
"""Tests for memory tool streaming packet emissions."""

import queue
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from onyx.chat.emitter import Emitter
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.session_loading import create_memory_packets
from onyx.server.query_and_chat.streaming_models import MemoryToolDelta
from onyx.server.query_and_chat.streaming_models import MemoryToolStart
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
from onyx.tools.tool_implementations.memory.memory_tool import MemoryToolOverrideKwargs
from onyx.tools.tool_implementations.memory.models import MemoryToolResponse


@pytest.fixture
def emitter_queue() -> queue.Queue:
    return queue.Queue()


@pytest.fixture
def emitter(emitter_queue: queue.Queue) -> Emitter:
    return Emitter(merged_queue=emitter_queue)


@pytest.fixture
def mock_llm() -> MagicMock:
    return MagicMock()


@pytest.fixture
def memory_tool(emitter: Emitter, mock_llm: MagicMock) -> MemoryTool:
    return MemoryTool(tool_id=1, emitter=emitter, llm=mock_llm)


@pytest.fixture
def placement() -> Placement:
    return Placement(turn_index=0, tab_index=0)


@pytest.fixture
def override_kwargs() -> MemoryToolOverrideKwargs:
    return MemoryToolOverrideKwargs(
        user_name="Test User",
        user_email="test@example.com",
        user_role=None,
        existing_memories=["User likes dark mode"],
        chat_history=[],
    )


class TestMemoryToolEmitStart:
    def test_emit_start_emits_memory_tool_start_packet(
        self,
        memory_tool: MemoryTool,
        emitter_queue: queue.Queue,
        placement: Placement,
    ) -> None:
        memory_tool.emit_start(placement)

        _key, packet = emitter_queue.get_nowait()
        assert isinstance(packet.obj, MemoryToolStart)
        assert packet.placement is not None
        assert packet.placement.turn_index == placement.turn_index
        assert packet.placement.tab_index == placement.tab_index
        assert packet.placement.model_index == 0  # emitter stamps model_index=0

    def test_emit_start_with_different_placement(
        self,
        memory_tool: MemoryTool,
        emitter_queue: queue.Queue,
    ) -> None:
        placement = Placement(turn_index=2, tab_index=1)
        memory_tool.emit_start(placement)

        _key, packet = emitter_queue.get_nowait()
        assert packet.placement.turn_index == 2
        assert packet.placement.tab_index == 1


class TestMemoryToolRun:
    @patch("onyx.tools.tool_implementations.memory.memory_tool.process_memory_update")
    def test_run_emits_delta_for_add_operation(
        self,
        mock_process: MagicMock,
        memory_tool: MemoryTool,
        emitter_queue: queue.Queue,
        placement: Placement,
        override_kwargs: MemoryToolOverrideKwargs,
    ) -> None:
        mock_process.return_value = ("User prefers Python", None)

        memory_tool.run(
            placement=placement,
            override_kwargs=override_kwargs,
            memory="User prefers Python",
        )

        _key, packet = emitter_queue.get_nowait()
        assert isinstance(packet.obj, MemoryToolDelta)
        assert packet.obj.memory_text == "User prefers Python"
        assert packet.obj.operation == "add"
        assert packet.obj.memory_id is None
        assert packet.obj.index is None

    @patch("onyx.tools.tool_implementations.memory.memory_tool.process_memory_update")
    def test_run_emits_delta_for_update_operation(
        self,
        mock_process: MagicMock,
        memory_tool: MemoryTool,
        emitter_queue: queue.Queue,
        placement: Placement,
        override_kwargs: MemoryToolOverrideKwargs,
    ) -> None:
        mock_process.return_value = ("User prefers light mode", 0)

        memory_tool.run(
            placement=placement,
            override_kwargs=override_kwargs,
            memory="User prefers light mode",
        )

        _key, packet = emitter_queue.get_nowait()
        assert isinstance(packet.obj, MemoryToolDelta)
        assert packet.obj.memory_text == "User prefers light mode"
        assert packet.obj.operation == "update"
        assert packet.obj.memory_id is None
        assert packet.obj.index == 0

    @patch("onyx.tools.tool_implementations.memory.memory_tool.process_memory_update")
    def test_run_returns_tool_response_with_rich_response(
        self,
        mock_process: MagicMock,
        memory_tool: MemoryTool,
        placement: Placement,
        override_kwargs: MemoryToolOverrideKwargs,
    ) -> None:
        mock_process.return_value = ("User prefers Python", None)

        result = memory_tool.run(
            placement=placement,
            override_kwargs=override_kwargs,
            memory="User prefers Python",
        )

        assert isinstance(result.rich_response, MemoryToolResponse)
        assert result.rich_response.memory_text == "User prefers Python"
        assert result.rich_response.index_to_replace is None
        assert "User prefers Python" in result.llm_facing_response


class TestCreateMemoryPackets:
    def test_produces_start_delta_end_for_add(self) -> None:
        packets = create_memory_packets(
            memory_text="User likes Python",
            operation="add",
            memory_id=None,
            turn_index=1,
            tab_index=0,
        )

        assert len(packets) == 3
        assert isinstance(packets[0].obj, MemoryToolStart)
        assert isinstance(packets[1].obj, MemoryToolDelta)
        assert isinstance(packets[2].obj, SectionEnd)

        delta = packets[1].obj
        assert isinstance(delta, MemoryToolDelta)
        assert delta.memory_text == "User likes Python"
        assert delta.operation == "add"
        assert delta.memory_id is None
        assert delta.index is None

    def test_produces_start_delta_end_for_update(self) -> None:
        packets = create_memory_packets(
            memory_text="User prefers light mode",
            operation="update",
            memory_id=42,
            turn_index=3,
            tab_index=1,
            index=5,
        )

        assert len(packets) == 3
        assert isinstance(packets[0].obj, MemoryToolStart)
        assert isinstance(packets[1].obj, MemoryToolDelta)
        assert isinstance(packets[2].obj, SectionEnd)

        delta = packets[1].obj
        assert isinstance(delta, MemoryToolDelta)
        assert delta.memory_text == "User prefers light mode"
        assert delta.operation == "update"
        assert delta.memory_id == 42
        assert delta.index == 5

    def test_placement_is_set_correctly(self) -> None:
        packets = create_memory_packets(
            memory_text="test",
            operation="add",
            memory_id=None,
            turn_index=5,
            tab_index=2,
        )

        for packet in packets:
            assert packet.placement.turn_index == 5
            assert packet.placement.tab_index == 2


================================================
FILE: contributor_ip_assignment/EE_Contributor_IP_Assignment_Agreement.md
================================================
# Enterprise Edition Contribution IP Assignment Agreement (DanswerAI, Inc.)

**Effective Date:** ______________________

This Enterprise Edition Contribution IP Assignment Agreement (the “**Agreement**”) is entered into by and between:

- **DanswerAI, Inc.** (“**Company**”), the maintainer of the Onyx product, and  
- **Contributor:** ______________________ (“**Contributor**”)

Company and Contributor may be referred to individually as a “**Party**” and collectively as the “**Parties**.”

## 1. Purpose and scope

Onyx’s repository is primarily licensed under the MIT License, but includes **proprietary-licensed Enterprise Edition components** (as defined below). This Agreement applies **only** to Contributions made to the Enterprise Edition components and is intended to ensure Company owns all rights necessary to license, distribute, and commercialize Enterprise Edition features.

## 2. Definitions

2.1 **“Enterprise Edition” or “EE”** means (a) any source code, documentation, configuration, assets, tests, build scripts, or other materials located in or under **any directory named `ee`** anywhere in the repository (including nested paths), and (b) any other files or directories that are explicitly marked as proprietary or Enterprise Edition in repository documentation, file headers, or license notices, and (c) any derivative works, modifications, or additions to the foregoing.

2.2 **“Contribution(s)”** means any work of authorship (including code, documentation, or other materials) that Contributor submits to Company for inclusion in EE, including via pull request, patch, commit, issue attachment, email, or any other submission method accepted by Company, and any modifications to existing EE materials.

2.3 **“Intellectual Property Rights”** means all rights worldwide in and to copyrights, moral rights, neighboring rights, trade secrets, mask work rights, design rights, database rights, patent rights, and any other proprietary rights, whether registered or unregistered.

## 3. Assignment of rights

3.1 **Assignment.** To the maximum extent permitted by law, Contributor hereby **assigns and transfers to Company**, and agrees to assign and transfer to Company, **all right, title, and interest** in and to all Contributions and all associated Intellectual Property Rights, including all rights to reproduce, prepare derivative works, distribute, publicly perform, publicly display, and otherwise exploit the Contributions in any manner.

3.2 **Future rights and further assurances.** Contributor agrees to execute and deliver (including electronically) any documents and take any actions reasonably requested by Company to perfect, record, or enforce Company’s rights in the Contributions. If Contributor fails to do so after reasonable request, Contributor appoints Company as Contributor’s attorney-in-fact solely to execute such documents on Contributor’s behalf.

3.3 **Work made for hire (where applicable).** To the extent any Contribution qualifies as a “work made for hire” under applicable law, it shall be deemed a work made for hire for Company. If not, it is assigned under Section 3.1.

## 4. Moral rights waiver

To the extent permitted by law, Contributor **waives and agrees not to assert** any moral rights (including rights of attribution and integrity) or similar rights in the Contributions against Company or Company’s licensees, successors, or assigns.

## 5. Patent rights (assignment / license)

5.1 **Patent assignment.** To the maximum extent permitted by law, Contributor hereby assigns to Company all right, title, and interest in any patent rights that are **necessarily infringed** by making, using, selling, offering for sale, importing, or otherwise exploiting the Contributions or EE as incorporated with the Contributions.

5.2 **Fallback patent license.** If any patent rights cannot be assigned as a matter of law, Contributor grants Company a **perpetual, irrevocable, worldwide, transferable, sublicensable, royalty-free** license under such patent rights to make, have made, use, sell, offer for sale, import, and otherwise exploit the Contributions and EE.

## 6. Contributor representations

Contributor represents and warrants that:

6.1 **Authority.** Contributor has the legal right and authority to enter into this Agreement and to make the assignments and grants herein.

6.2 **Originality / rights clearance.** Each Contribution is original to Contributor or Contributor has secured all necessary rights and permissions to submit it and to assign the rights described in this Agreement.

6.3 **No third-party restrictions.** Contributions are not subject to any employment, contractor, academic, or other agreement that would conflict with this Agreement or restrict assignment to Company. Contributor has not included any code or materials that require disclosure of source code or impose “copyleft” or similar reciprocal obligations on EE (including but not limited to GPL, AGPL, LGPL (in a way that would impose reciprocity on EE), or other licenses that would require EE to be distributed under different terms), unless Company has expressly agreed in writing.

6.4 **No confidential information.** Contributor will not submit any confidential or proprietary information of any third party (including an employer) as part of a Contribution.

## 7. Relationship to MIT-licensed portions of the repo

This Agreement applies **only** to Contributions to EE as defined in Section 2.1. Contributions made solely to MIT-licensed portions of the repository remain governed by the repository’s applicable open-source licensing and contribution terms, unless a separate written agreement states otherwise.

## 8. No obligation; consideration

8.1 **No obligation to accept.** Company has no obligation to accept, merge, or distribute any Contribution.

8.2 **Consideration.** Contributor agrees that the opportunity to contribute to EE and Company’s potential acceptance and use of the Contributions are adequate consideration for the assignments and grants in this Agreement.

## 9. Limitation of liability

To the maximum extent permitted by law, **neither Party** will be liable to the other for any indirect, incidental, special, consequential, or punitive damages arising out of this Agreement.

## 10. Governing law; venue

This Agreement is governed by the laws of the **State of California**, excluding conflict-of-laws rules. The Parties agree to exclusive jurisdiction and venue in the state or federal courts located in **California**, unless prohibited by applicable law.

## 11. Miscellaneous

11.1 **Entire agreement.** This Agreement is the entire agreement between the Parties regarding EE Contributions and supersedes all prior or contemporaneous understandings on that subject.

11.2 **Amendment.** Any amendment must be in writing and signed by both Parties.

11.3 **Severability.** If any provision is held unenforceable, the remaining provisions remain in full force and effect.

11.4 **Counterparts; electronic signatures.** This Agreement may be executed in counterparts, including via electronic signature, each of which is deemed an original.

---

## Signatures

**COMPANY:** DanswerAI, Inc.  
By: ____________________________________  
Name: __________________________________  
Title: ___________________________________  
Date: ___________________________________

**CONTRIBUTOR:**  
Signature: _______________________________  
Name: ___________________________________  
Email: ___________________________________  
Date: ___________________________________

================================================
FILE: ct.yaml
================================================
# See https://github.com/helm/chart-testing#configuration

# still have to specify this on the command line for list-changed
chart-dirs:
  - deployment/helm/charts

# must be kept in sync with Chart.yaml
chart-repos:
  - vespa=https://onyx-dot-app.github.io/vespa-helm-charts
  - opensearch=https://opensearch-project.github.io/helm-charts
  - ingress-nginx=https://kubernetes.github.io/ingress-nginx
  - postgresql=https://cloudnative-pg.github.io/charts
  - redis=https://ot-container-kit.github.io/helm-charts
  - minio=https://charts.min.io/
  - code-interpreter=https://onyx-dot-app.github.io/python-sandbox/
  
# have seen postgres take 10 min to pull ... so 15 min seems like a good timeout?
helm-extra-args: --debug --timeout 900s

# nginx appears to not work on kind, likely due to lack of loadbalancer support
# helm-extra-set-args also only works on the command line, not in this yaml
# helm-extra-set-args: --set=nginx.enabled=false

validate-maintainers: false


================================================
FILE: cubic.yaml
================================================
# yaml-language-server: $schema=https://cubic.dev/schema/cubic-repository-config.schema.json
version: 1

reviews:
  enabled: true
  sensitivity: medium
  incremental_commits: true
  check_drafts: false

  custom_instructions: |
    Use explicit type annotations for variables to enhance code clarity,
    especially when moving type hints around in the code.

    Use `contributing_guides/best_practices.md` as core review context.
    Prefer consistency with existing patterns, fix issues in code you touch,
    avoid tacking new features onto muddy interfaces, fail loudly instead of
    silently swallowing errors, keep code strictly typed, preserve clear state
    boundaries, remove duplicate or dead logic, break up overly long functions,
    avoid hidden import-time side effects, respect module boundaries, and favor
    correctness-by-construction over relying on callers to use an API correctly.

    Reference these files for additional context:
    - `contributing_guides/best_practices.md` — Best practices for contributing to the codebase
    - `CLAUDE.md` — Project instructions and coding standards
    - `backend/alembic/README.md` — Migration guidance, including multi-tenant migration behavior
    - `deployment/helm/charts/onyx/values-lite.yaml` — Lite deployment Helm values and service assumptions
    - `deployment/docker_compose/docker-compose.onyx-lite.yml` — Lite deployment Docker Compose overlay and disabled service behavior

  ignore:
    files:
      - greptile.json
      - cubic.yaml

  custom_rules:
    - name: TODO format
      description: >
        Whenever a TODO is added, there must always be an associated name or
        ticket in the style of TODO(name): ... or TODO(1234): ...

    - name: Frontend standards
      description: >
        For frontend changes, enforce all standards described in the
        web/AGENTS.md file.
      include:
        - web/**
        - desktop/**

    - name: No debugging code
      description: >
        Remove temporary debugging code before merging to production,
        especially tenant-specific debugging logs.

    - name: No hardcoded booleans
      description: >
        When hardcoding a boolean variable to a constant value, remove the
        variable entirely and clean up all places where it's used rather than
        just setting it to a constant.

    - name: Multi-tenant awareness
      description: >
        Code changes must consider both multi-tenant and single-tenant
        deployments. In multi-tenant mode, preserve tenant isolation, ensure
        tenant context is propagated correctly, and avoid assumptions that only
        hold for a single shared schema or globally shared state. In
        single-tenant mode, avoid introducing unnecessary tenant-specific
        requirements or cloud-only control-plane dependencies.

    - name: Onyx lite compatibility
      description: >
        Code changes must consider both regular Onyx deployments and Onyx lite
        deployments. Lite deployments disable the vector DB, Redis, model
        servers, and background workers by default, use PostgreSQL-backed
        cache/auth/file storage, and rely on the API server to handle
        background work. Do not assume those services are available unless the
        code path is explicitly limited to full deployments.

    - name: OnyxError over HTTPException
      description: >
        Never raise HTTPException directly in business code. Use
        `raise OnyxError(OnyxErrorCode.XXX, "message")` from
        `onyx.error_handling.exceptions`. A global FastAPI exception handler
        converts OnyxError into structured JSON responses with
        {"error_code": "...", "detail": "..."}. Error codes are defined in
        `onyx.error_handling.error_codes.OnyxErrorCode`. For upstream errors
        with dynamic HTTP status codes, use `status_code_override`:
        `raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)`.
      include:
        - backend/**/*.py

issues:
  fix_with_cubic_buttons: true
  pr_comment_fixes: true
  fix_commits_to_pr: true


================================================
FILE: deployment/.gitignore
================================================
.env*
secrets.yaml


================================================
FILE: deployment/README.md
================================================
Documentation for how to deploy Onyx can be found in our official docs:
https://docs.onyx.app/deployment/overview


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/README.md
================================================
# Onyx AWS ECS Fargate CloudFormation Deployment

This directory contains CloudFormation templates and scripts to deploy Onyx on AWS ECS Fargate.

## Configuration

All configuration parameters are stored in a single JSON file: `onyx_config.json`. This file contains all the parameters needed for the different CloudFormation stacks.

Example:
```json
{
  "OnyxNamespace": "onyx",
  "Environment": "production",
  "EFSName": "onyx-efs",
  "AWSRegion": "us-east-2",
  "VpcID": "YOUR_VPC_ID",
  "SubnetIDs": "YOUR_SUBNET_ID1,YOUR_SUBNET_ID2",
  "DomainName": "YOUR_DOMAIN e.g ecs.onyx.app",
  "ValidationMethod": "DNS",
  "HostedZoneId": ""
}
```

### Required Parameters

- `Environment`: Used to prefix all stack names during deployment. This is required.
- `OnyxNamespace`: Namespace for the Onyx deployment.
- `EFSName`: Name for the Elastic File System.
- `AWSRegion`: AWS region where resources will be deployed.
- `VpcID`: ID of the VPC where Onyx will be deployed.
- `SubnetIDs`: Comma-separated list of subnet IDs for deployment.
- `DomainName`: Domain name for the Onyx deployment.
- `ValidationMethod`: Method for domain validation (typically "DNS").
- [optional] `HostedZoneId`: Route 53 hosted zone ID (only if using Route 53 for DNS).

The deployment script automatically extracts the needed parameters for each CloudFormation template based on the parameter names defined in the templates.

## Deployment Order

The deployment follows this order:

1. Infrastructure stacks:
   - EFS
   - Cluster
   - ACM

2. Service stacks:
   - Postgres
   - Redis
   - Vespa Engine
   - Model Server (Indexing)
   - Model Server (Inference)
   - Backend API Server
   - Backend Background Server
   - Web Server
   - Nginx

## Usage

To deploy:
```bash
./deploy.sh
```

To uninstall:
```bash
./uninstall.sh
```


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/deploy.sh
================================================
#!/bin/bash

# Function to remove comments from JSON and output valid JSON
remove_comments() {
    sed 's/\/\/.*$//' "$1" | grep -v '^[[:space:]]*$'
}

# Variables
TEMPLATE_DIR="$(pwd)"
SERVICE_DIR="$TEMPLATE_DIR/services"

# Unified config file
CONFIG_FILE="onyx_config.jsonl"

# Try to get AWS_REGION from config, fallback to default if not found
AWS_REGION_FROM_CONFIG=$(remove_comments "$CONFIG_FILE" | jq -r '.AWSRegion // empty')
if [ -n "$AWS_REGION_FROM_CONFIG" ]; then
    AWS_REGION="$AWS_REGION_FROM_CONFIG"
else
    AWS_REGION="${AWS_REGION:-us-east-2}"
fi

# Get environment from config file
ENVIRONMENT=$(remove_comments "$CONFIG_FILE" | jq -r '.Environment')
if [ -z "$ENVIRONMENT" ] || [ "$ENVIRONMENT" == "null" ]; then
    echo "Missing Environment in $CONFIG_FILE. Please add the Environment field."
    exit 1
fi

# Try to get S3_BUCKET from config, fallback to default if not found
S3_BUCKET_FROM_CONFIG=$(remove_comments "$CONFIG_FILE" | jq -r '.S3Bucket // empty')
if [ -n "$S3_BUCKET_FROM_CONFIG" ]; then
    S3_BUCKET="$S3_BUCKET_FROM_CONFIG"
else
    S3_BUCKET="${S3_BUCKET:-onyx-ecs-fargate-configs}"
fi

INFRA_ORDER=(
  "onyx_efs_template.yaml"
  "onyx_cluster_template.yaml"
  "onyx_acm_template.yaml"
)

# Deployment order for services
SERVICE_ORDER=(
  "onyx_postgres_service_template.yaml"
  "onyx_redis_service_template.yaml"
  "onyx_vespaengine_service_template.yaml"
  "onyx_model_server_indexing_service_template.yaml"
  "onyx_model_server_inference_service_template.yaml"
  "onyx_backend_api_server_service_template.yaml"
  "onyx_backend_background_server_service_template.yaml"
  "onyx_web_server_service_template.yaml"
  "onyx_nginx_service_template.yaml"
)

# Function to validate a CloudFormation template
validate_template() {
  local template_file=$1
  echo "Validating template: $template_file..."
  aws cloudformation validate-template --template-body file://"$template_file" --region "$AWS_REGION" > /dev/null
  if [ $? -ne 0 ]; then
    echo "Error: Validation failed for $template_file. Exiting."
    exit 1
  fi
  echo "Validation succeeded for $template_file."
}

# Function to create CloudFormation parameters from JSON
create_parameters_from_json() {
  local template_file=$1
  local temp_params_file="${template_file%.yaml}_parameters.json"
  
  # Convert the config file contents to CloudFormation parameter format
  echo "[" > "$temp_params_file"
  
  # Process all key-value pairs from the config file
  local first=true
  remove_comments "$CONFIG_FILE" | jq -r 'to_entries[] | select(.value != null and .value != "") | "\(.key)|\(.value)"' | while IFS='|' read -r key value; do
    if [ "$first" = true ]; then
      first=false
    else
      echo "," >> "$temp_params_file"
    fi
    echo "    {\"ParameterKey\": \"$key\", \"ParameterValue\": \"$value\"}" >> "$temp_params_file"
  done
  
  echo "]" >> "$temp_params_file"
  
  # Debug output - display the created parameters file
  echo "Generated parameters file: $temp_params_file" >&2
  echo "Contents:" >&2
  cat "$temp_params_file" >&2
  
  # Return just the filename
  echo "$temp_params_file"
}

# Function to deploy a CloudFormation stack
deploy_stack() {
  local stack_name=$1
  local template_file=$2

  echo "Checking if stack $stack_name exists..."
  if aws cloudformation describe-stacks --stack-name "$stack_name" --region "$AWS_REGION" > /dev/null 2>&1; then
    echo "Stack $stack_name already exists. Skipping deployment."
    return 0
  fi
  
  # Create temporary parameters file for this template
  local temp_params_file=$(create_parameters_from_json "$template_file")
  
  # Special handling for SubnetIDs parameter if needed
  if grep -q "SubnetIDs" "$template_file"; then
    echo "Template uses SubnetIDs parameter, ensuring it's properly formatted..."
    # Make sure we're passing SubnetIDs as a comma-separated list
    local subnet_ids=$(remove_comments "$CONFIG_FILE" | jq -r '.SubnetIDs // empty')
    if [ -n "$subnet_ids" ]; then
      echo "Using SubnetIDs from config: $subnet_ids"
    else
      echo "Warning: SubnetIDs not found in config but template requires it."
    fi
  fi
  
  echo "Deploying stack: $stack_name with template: $template_file and generated config from: $CONFIG_FILE..."
  aws cloudformation deploy \
    --stack-name "$stack_name" \
    --template-file "$template_file" \
    --parameter-overrides file://"$temp_params_file" \
    --capabilities CAPABILITY_IAM CAPABILITY_NAMED_IAM CAPABILITY_AUTO_EXPAND \
    --region "$AWS_REGION" \
    --no-cli-auto-prompt > /dev/null

  if [ $? -ne 0 ]; then
    echo "Error: Deployment failed for $stack_name. Exiting."
    exit 1
  fi
  
  # Clean up temporary parameter file
  rm "$temp_params_file"
  
  echo "Stack deployed successfully: $stack_name."
}

convert_underscores_to_hyphens() {
  local input_string="$1"
  local converted_string="${input_string//_/-}"
  echo "$converted_string"
}

deploy_infra_stacks() {
    for template_name in "${INFRA_ORDER[@]}"; do
      # Skip ACM template if HostedZoneId is not set
      if [[ "$template_name" == "onyx_acm_template.yaml" ]]; then
        HOSTED_ZONE_ID=$(remove_comments "$CONFIG_FILE" | jq -r '.HostedZoneId')
        if [ -z "$HOSTED_ZONE_ID" ] || [ "$HOSTED_ZONE_ID" == "" ] || [ "$HOSTED_ZONE_ID" == "null" ]; then
          echo "Skipping ACM template deployment because HostedZoneId is not set in $CONFIG_FILE"
          continue
        fi
      fi

      template_file="$template_name"
      stack_name="$ENVIRONMENT-$(basename "$template_name" _template.yaml)"
      stack_name=$(convert_underscores_to_hyphens "$stack_name")

      if [ -f "$template_file" ]; then
        validate_template "$template_file"
        deploy_stack "$stack_name" "$template_file"
      else
        echo "Warning: Template file $template_file not found. Skipping."
      fi
    done
}

deploy_services_stacks() { 
    for template_name in "${SERVICE_ORDER[@]}"; do
      template_file="$SERVICE_DIR/$template_name"
      stack_name="$ENVIRONMENT-$(basename "$template_name" _template.yaml)"
      stack_name=$(convert_underscores_to_hyphens "$stack_name")

      if [ -f "$template_file" ]; then
        validate_template "$template_file"
        deploy_stack "$stack_name" "$template_file"
      else
        echo "Warning: Template file $template_file not found. Skipping."
      fi
    done
}

echo "Starting deployment of Onyx to ECS Fargate Cluster..."
deploy_infra_stacks
deploy_services_stacks

echo "All templates validated and deployed successfully."


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/onyx_acm_template.yaml
================================================
AWSTemplateFormatVersion: '2010-09-09'
Description: CloudFormation template to create an ACM Certificate.

Parameters:
  DomainName:
    Type: String
    Description: The primary domain name for the certificate (e.g., example.com).
    Default: example.com
  Environment:
    Type: String
    Default: production
  ValidationMethod:
    Type: String
    Default: DNS

Resources:
  Certificate:
    Type: AWS::CertificateManager::Certificate
    Properties:
      DomainName: !Ref DomainName
      ValidationMethod: !Ref ValidationMethod
      Tags:
        - Key: env
          Value: !Ref Environment

Outputs:
  OutputAcm:
    Description: ACM Cert Id
    Value: !Ref Certificate
    Export:
      Name: !Sub ${AWS::StackName}-OnyxCertificate


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/onyx_cluster_template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Description: The template used to create an ECS Cluster from the ECS Console.

Parameters:
  Environment:
    Type: String
    Description: The environment that is used in the name of the cluster as well.
  OnyxNamespace:
    Type: String
    Default: onyx
  VpcID:
    Type: String
    Default: vpc-098cfa79d637dabff

Resources:
  ECSCluster:
    Type: AWS::ECS::Cluster
    Properties:
      ClusterName: !Sub ${Environment}-onyx-cluster
      CapacityProviders:
        - FARGATE
        - FARGATE_SPOT
      ClusterSettings:
        - Name: containerInsights
          Value: enhanced
      ServiceConnectDefaults:
        Namespace: !Sub ${Environment}-onyx-cluster
      Tags:
        - Key: env
          Value: !Ref Environment
        - Key: app
          Value: onyx

  S3Bucket:
    Type: AWS::S3::Bucket
    Properties:
      BucketName: !Sub ${Environment}-onyx-ecs-fargate-configs
      AccessControl: Private
      BucketEncryption:
        ServerSideEncryptionConfiguration:
          - ServerSideEncryptionByDefault:
              SSEAlgorithm: AES256
      PublicAccessBlockConfiguration:
        BlockPublicAcls: true
        BlockPublicPolicy: true
        IgnorePublicAcls: true
        RestrictPublicBuckets: true

  PrivateDnsNamespace:
    Type: AWS::ServiceDiscovery::PrivateDnsNamespace
    Properties:
      Description: AWS Cloud Map private DNS namespace for resources for onyx website.
      Vpc: !Ref VpcID
      Name: !Ref OnyxNamespace
      Properties:
        DnsProperties:
          SOA:
            TTL: 50

  ECSTaskRole:
    Type: AWS::IAM::Role
    Properties:
      RoleName: !Sub ${Environment}-OnyxEcsTaskRole
      AssumeRolePolicyDocument:
        Version: "2012-10-17"
        Statement:
          - Effect: Allow
            Principal:
              Service: ecs-tasks.amazonaws.com
            Action: sts:AssumeRole
      Policies:
        - PolicyName: "EFSPolicy"
          PolicyDocument:
            Version: "2012-10-17"
            Statement:
              - Sid: "VisualEditor0"
                Effect: Allow
                Action:
                  - "elasticfilesystem:*"
                Resource:
                  - !Sub "arn:aws:elasticfilesystem:*:${AWS::AccountId}:access-point/*"
                  - !Sub "arn:aws:elasticfilesystem:*:${AWS::AccountId}:file-system/*"
              - Sid: "VisualEditor1"
                Effect: Allow
                Action: "elasticfilesystem:*"
                Resource: "*"
        - PolicyName: "S3Policy"
          PolicyDocument:
            Version: "2012-10-17"
            Statement:
              - Sid: "VisualEditor0"
                Effect: Allow
                Action:
                  - "s3:GetObject"
                  - "s3:ListBucket"
                Resource:
                  - !Sub "arn:aws:s3:::${Environment}-onyx-ecs-fargate-configs/*"
                  - !Sub "arn:aws:s3:::${Environment}-onyx-ecs-fargate-configs"

  ECSTaskExecutionRole:
    Type: AWS::IAM::Role
    Properties:
      RoleName: !Sub ${Environment}-OnyxECSTaskExecutionRole
      AssumeRolePolicyDocument:
        Version: "2012-10-17"
        Statement:
          - Effect: Allow
            Principal:
              Service: ecs-tasks.amazonaws.com
            Action: sts:AssumeRole
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy
      Policies:
        - PolicyName: "CloudWatchLogsPolicy"
          PolicyDocument:
            Version: "2012-10-17"
            Statement:
              - Sid: "VisualEditor0"
                Effect: Allow
                Action: "logs:CreateLogGroup"
                Resource: !Sub "arn:aws:logs:*:${AWS::AccountId}:log-group:*"
        - PolicyName: "SecretsManagerPolicy"
          PolicyDocument:
            Version: "2012-10-17"
            Statement:
              - Effect: Allow
                Action:
                  - secretsmanager:GetSecretValue
                Resource:
                  - !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password-*
                  - !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/onyx/user-auth-secret-*

Outputs:
  OutputEcsCluster:
    Description: Onyx ECS Cluster
    Value: !Ref ECSCluster
    Export:
      Name: !Sub ${AWS::StackName}-ECSClusterName
  OutputECSTaskRole:
    Description: Onyx ECS Task Role
    Value: !Ref ECSTaskRole
    Export:
      Name: !Sub ${AWS::StackName}-ECSTaskRole
  OutputECSTaskExecutionRole:
    Description: Onyx ECS TaskExecutionRole
    Value: !Ref ECSTaskExecutionRole
    Export:
      Name: !Sub ${AWS::StackName}-ECSTaskExecutionRole
  OutputOnyxNamespace:
    Description: Onyx CloudMap namespace ID for ECS service discvoery.
    Value: !Ref PrivateDnsNamespace
    Export:
      Name: !Sub ${AWS::StackName}-OnyxNamespace
  OutputOnyxNamespaceName:
    Description: Onyx CloudMap namespace domain name for ECS service discvoery.
    Value: !Ref OnyxNamespace
    Export:
      Name: !Sub ${AWS::StackName}-OnyxNamespaceName


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/onyx_config.jsonl
================================================
{
  // Naming, likely doesn't need to be changed
  "OnyxNamespace": "onyx",
  "Environment": "production",
  "EFSName": "onyx-efs",

  // Region and VPC Stuff
  "AWSRegion": "us-east-2",
  "VpcID": "YOUR_VPC_ID",
  "SubnetIDs": "YOUR_SUBNET_ID1,YOUR_SUBNET_ID2",

  // Domain and ACM Stuff
  "DomainName": "YOUR_DOMAIN e.g ecs.onyx.app",
  "ValidationMethod": "DNS",
  "HostedZoneId": ""  // Only specify if using Route 53 for DNS
} 

================================================
FILE: deployment/aws_ecs_fargate/cloudformation/onyx_efs_template.yaml
================================================
Parameters:

  EFSName:
    Type: String
    Default: onyx-efs
  Environment:
    Type: String
    Default: production
  VpcID:
    Type: String
    Default: vpc-0f230ca52bb04c722 
  SubnetIDs:
    Type: CommaDelimitedList
    Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"

Resources:

  OnyxEfs:
    Type: AWS::EFS::FileSystem
    Properties:
      BackupPolicy: 
        Status: ENABLED
      Encrypted: True
      PerformanceMode: generalPurpose
      FileSystemTags:
        - Key: Name
          Value: !Sub ${Environment}-${EFSName}-${AWS::Region}-${AWS::AccountId}
      FileSystemProtection:
        ReplicationOverwriteProtection: ENABLED
      ThroughputMode: elastic

  VespaEngineTmpEfsAccessPoint:
    Type: AWS::EFS::AccessPoint
    Properties:
      AccessPointTags: 
        - Key: Name
          Value: vespaengine-tmp
      FileSystemId: !Ref OnyxEfs
      RootDirectory: 
        CreationInfo:
          OwnerGid: "1000"
          OwnerUid: "1000"
          Permissions: "0755"
        Path: /var/tmp

  VespaEngineDataEfsAccessPoint:
    Type: AWS::EFS::AccessPoint
    Properties:
      AccessPointTags: 
        - Key: Name
          Value: vespaengine-data
      FileSystemId: !Ref OnyxEfs
      RootDirectory: 
        CreationInfo:
          OwnerGid: "1000"
          OwnerUid: "1000"
          Permissions: "0755"
        Path: /opt/vespa/var

  PostgresDataEfsAccessPoint:
    Type: AWS::EFS::AccessPoint
    Properties:
      AccessPointTags: 
        - Key: Name
          Value: postgres-data
      FileSystemId: !Ref OnyxEfs
      RootDirectory: 
        CreationInfo:
          OwnerGid: "1000"
          OwnerUid: "1000"
          Permissions: "0755"
        Path: /var/lib/postgresql/data

  EFSMountTarget1:
    DependsOn: OnyxEfs
    Type: AWS::EFS::MountTarget
    Properties:
      FileSystemId: !Ref OnyxEfs
      SubnetId: !Select [0, !Ref SubnetIDs]
      SecurityGroups:
        - !Ref EFSSecurityGroupMountTargets

  EFSMountTarget2:
    DependsOn: OnyxEfs
    Type: AWS::EFS::MountTarget
    Properties:
      FileSystemId: !Ref OnyxEfs
      SubnetId: !Select [1, !Ref SubnetIDs]
      SecurityGroups:
        - !Ref EFSSecurityGroupMountTargets

  EFSSecurityGroupMountTargets:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: Security Group for EFS Mount Targets
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - IpProtocol: tcp
          FromPort: 2049
          ToPort: 2049
          CidrIp: 0.0.0.0/0

Outputs:
  OutputOnyxEfsId:
    Description: Onyx Filesystem Id
    Value: !Ref OnyxEfs
    Export:
      Name: !Sub ${AWS::StackName}-OnyxEfsId
  OutputVespaEngineTmpEfsAccessPoint:
    Description: VespaEngine Tmp AP
    Value: !Ref VespaEngineTmpEfsAccessPoint
    Export:
      Name: !Sub ${AWS::StackName}-VespaEngineTmpEfsAccessPoint
  OutputVespaEngineDataEfsAccessPoint:
    Description: VespaEngine Data Ap
    Value: !Ref VespaEngineDataEfsAccessPoint
    Export:
      Name: !Sub ${AWS::StackName}-VespaEngineDataEfsAccessPoint
  OutputPostgresDataEfsAccessPoint:
    Description: Postgres Data AP
    Value: !Ref PostgresDataEfsAccessPoint
    Export:
      Name: !Sub ${AWS::StackName}-PostgresDataEfsAccessPoint
  OutputEFSSecurityGroupMountTargets:
    Description: EFS Security Group
    Value: !Ref EFSSecurityGroupMountTargets
    Export:
      Name: !Sub ${AWS::StackName}-EFSSecurityGroupMountTargets


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/services/onyx_backend_api_server_service_template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Description: CloudFormation template for Onyx Backend Api Server TaskDefinition
Parameters:
  Environment:
    Type: String
  SubnetIDs:
    Type: CommaDelimitedList
    Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"
  VpcID:
    Type: String
    Default: vpc-098cfa79d637dabff
  ServiceName:
    Type: String
    Default: onyx-backend-api-server
  TaskCpu:
    Type: String
    Default: "2048"
  TaskMemory:
    Type: String
    Default: "4096"
  TaskDesiredCount:
    Type: Number
    Default: 1

Resources:

  ECSService:
    Type: AWS::ECS::Service
    Properties:
      Cluster:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSClusterName"
      CapacityProviderStrategy:
        - CapacityProvider: FARGATE
          Base: 0
          Weight: 1
      TaskDefinition: !Ref TaskDefinition
      ServiceName: !Sub ${Environment}-${ServiceName}-service
      SchedulingStrategy: REPLICA
      DesiredCount: !Ref TaskDesiredCount
      AvailabilityZoneRebalancing: ENABLED
      NetworkConfiguration:
        AwsvpcConfiguration:
          AssignPublicIp: ENABLED
          SecurityGroups:
            - Ref: SecurityGroup
          Subnets: !Ref SubnetIDs
      PlatformVersion: LATEST
      DeploymentConfiguration:
        MaximumPercent: 200
        MinimumHealthyPercent: 100
        DeploymentCircuitBreaker:
          Enable: true
          Rollback: true
      DeploymentController:
        Type: ECS
      ServiceConnectConfiguration:
        Enabled: false
      ServiceRegistries:
        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn
      Tags:
        - Key: app
          Value: onyx
        - Key: service
          Value: !Ref ServiceName
        - Key: env
          Value: !Ref Environment
      EnableECSManagedTags: true

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.
      GroupName: !Sub ${Environment}-ecs-${ServiceName}
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - FromPort: 8080
          ToPort: 8080
          IpProtocol: tcp
          CidrIp: 0.0.0.0/0
        - FromPort: 8080
          ToPort: 8080
          IpProtocol: tcp
          CidrIpv6: "::/0"

  ServiceDiscoveryService:
    Type: "AWS::ServiceDiscovery::Service"
    Properties:
      Name: !Sub ${Environment}-${ServiceName}-service
      DnsConfig:
        DnsRecords:
          - Type: "A"
            TTL: 15
      NamespaceId:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespace"
      HealthCheckCustomConfig:
        FailureThreshold: 1
  
  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    Properties:
      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition
      TaskRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskRole"
      ExecutionRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskExecutionRole"
      NetworkMode: awsvpc
      RequiresCompatibilities:
        - FARGATE
      Cpu: !Ref TaskCpu
      Memory: !Ref TaskMemory
      RuntimePlatform:
        CpuArchitecture: ARM64
        OperatingSystemFamily: LINUX
      ContainerDefinitions:
        - Name: onyx-backend
          Image: onyxdotapp/onyx-backend:latest
          Cpu: 0
          Essential: true
          Command:
            - "/bin/sh"
            - "-c"
            - |
              alembic upgrade head && echo "Starting Onyx Api Server" && uvicorn onyx.main:app --host 0.0.0.0 --port 8080
          PortMappings:
            - Name: backend
              ContainerPort: 8080
              HostPort: 8080
              Protocol: tcp
              AppProtocol: http
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: "25m"
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: ecs
          Environment:
            - Name: REDIS_HOST
              Value: !Sub 
                - "${Environment}-onyx-redis-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: MODEL_SERVER_HOST
              Value: !Sub 
                - "${Environment}-onyx-model-server-inference-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: VESPA_HOST
              Value: !Sub 
                - "${Environment}-onyx-vespaengine-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: POSTGRES_HOST
              Value: !Sub 
                - "${Environment}-onyx-postgres-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: INDEXING_MODEL_SERVER_HOST
              Value: !Sub 
                - "${Environment}-onyx-model-server-indexing-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: AUTH_TYPE
              Value: basic
          Secrets:
            - Name: POSTGRES_PASSWORD
              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password
            - Name: USER_AUTH_SECRET
              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/onyx/user-auth-secret
          VolumesFrom: []
          SystemControls: []

  ECSAutoScalingTarget:
    Type: AWS::ApplicationAutoScaling::ScalableTarget
    DependsOn: ECSService
    Properties:
      MaxCapacity: 5
      MinCapacity: 1
      ResourceId: !Sub
        - "service/${ImportedCluster}/${Environment}-${ServiceName}-service"
        - ImportedCluster: !ImportValue
            'Fn::Sub': "${Environment}-onyx-cluster-ECSClusterName"
          ServiceName: !Ref ServiceName
          Environment: !Ref Environment
      ScalableDimension: ecs:service:DesiredCount
      ServiceNamespace: ecs

  ECSAutoScalingPolicy:
    Type: AWS::ApplicationAutoScaling::ScalingPolicy
    Properties:
      PolicyName: !Sub ${Environment}-${ServiceName}-service-cpu-scaleout
      ScalingTargetId: !Ref ECSAutoScalingTarget
      PolicyType: TargetTrackingScaling
      TargetTrackingScalingPolicyConfiguration:
        TargetValue: 75
        PredefinedMetricSpecification:
          PredefinedMetricType: ECSServiceAverageCPUUtilization
        ScaleOutCooldown: 60
        ScaleInCooldown: 60

  ECSAutoScalingPolicyMemory:
    Type: AWS::ApplicationAutoScaling::ScalingPolicy
    Properties:
      PolicyName: !Sub ${Environment}-${ServiceName}-service-mem-scaleout
      ScalingTargetId: !Ref ECSAutoScalingTarget
      PolicyType: TargetTrackingScaling
      TargetTrackingScalingPolicyConfiguration:
        TargetValue: 80
        PredefinedMetricSpecification:
          PredefinedMetricType: ECSServiceAverageMemoryUtilization
        ScaleOutCooldown: 60
        ScaleInCooldown: 60


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/services/onyx_backend_background_server_service_template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Description: CloudFormation template for Onyx Backend Background Server TaskDefinition
Parameters:
  Environment:
    Type: String
  SubnetIDs:
    Type: CommaDelimitedList
    Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"
  VpcID:
    Type: String
    Default: vpc-098cfa79d637dabff
  ServiceName:
    Type: String
    Default: onyx-backend-background-server
  TaskCpu:
    Type: String
    Default: "2048"
  TaskMemory:
    Type: String
    Default: "4096"
  TaskDesiredCount:
    Type: Number
    Default: 1

Resources:

  ECSService:
    Type: AWS::ECS::Service
    Properties:
      Cluster:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSClusterName"
      CapacityProviderStrategy:
        - CapacityProvider: FARGATE
          Base: 0
          Weight: 1
      TaskDefinition: !Ref TaskDefinition
      ServiceName: !Sub ${Environment}-${ServiceName}-service
      SchedulingStrategy: REPLICA
      DesiredCount: !Ref TaskDesiredCount
      AvailabilityZoneRebalancing: ENABLED
      NetworkConfiguration:
        AwsvpcConfiguration:
          AssignPublicIp: ENABLED
          SecurityGroups:
            - Ref: SecurityGroup
          Subnets: !Ref SubnetIDs
      PlatformVersion: LATEST
      DeploymentConfiguration:
        MaximumPercent: 200
        MinimumHealthyPercent: 100
        DeploymentCircuitBreaker:
          Enable: true
          Rollback: true
      DeploymentController:
        Type: ECS
      ServiceConnectConfiguration:
        Enabled: false
      ServiceRegistries:
        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn
      Tags:
        - Key: app
          Value: onyx
        - Key: service
          Value: !Ref ServiceName
        - Key: env
          Value: !Ref Environment
      EnableECSManagedTags: true

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.
      GroupName: !Sub ${Environment}-ecs-${ServiceName}
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - FromPort: 8080
          ToPort: 8080
          IpProtocol: tcp
          CidrIp: 0.0.0.0/0
        - FromPort: 8080
          ToPort: 8080
          IpProtocol: tcp
          CidrIpv6: "::/0"

  ServiceDiscoveryService:
    Type: "AWS::ServiceDiscovery::Service"
    Properties:
      Name: !Sub ${Environment}-${ServiceName}-service
      DnsConfig:
        DnsRecords:
          - Type: "A"
            TTL: 15
      NamespaceId:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespace"
      HealthCheckCustomConfig:
        FailureThreshold: 1
  
  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    Properties:
      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition
      TaskRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskRole"
      ExecutionRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskExecutionRole"
      NetworkMode: awsvpc
      RequiresCompatibilities:
        - FARGATE
      Cpu: !Ref TaskCpu
      Memory: !Ref TaskMemory
      RuntimePlatform:
        CpuArchitecture: ARM64
        OperatingSystemFamily: LINUX
      ContainerDefinitions:
        - Name: onyx-backend-background
          Image: onyxdotapp/onyx-backend:latest
          Cpu: 0
          Essential: true
          Command:
            - "/usr/bin/supervisord"
            - "-c"
            - "/etc/supervisor/conf.d/supervisord.conf"
          PortMappings:
            - Name: backend
              ContainerPort: 8080
              HostPort: 8080
              Protocol: tcp
              AppProtocol: http
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: "25m"
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: ecs
          Environment:
            - Name: REDIS_HOST
              Value: !Sub 
                - "${Environment}-onyx-redis-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: MODEL_SERVER_HOST
              Value: !Sub 
                - "${Environment}-onyx-model-server-inference-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: VESPA_HOST
              Value: !Sub 
                - "${Environment}-onyx-vespaengine-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: POSTGRES_HOST
              Value: !Sub 
                - "${Environment}-onyx-postgres-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: INDEXING_MODEL_SERVER_HOST
              Value: !Sub 
                - "${Environment}-onyx-model-server-indexing-service.${ImportedNamespace}"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
            - Name: AUTH_TYPE
              Value: basic
          Secrets:
            - Name: POSTGRES_PASSWORD
              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password
            - Name: USER_AUTH_SECRET
              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/onyx/user-auth-secret
          VolumesFrom: []
          SystemControls: []


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/services/onyx_model_server_indexing_service_template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Description: CloudFormation template for Onyx Model Server Indexing TaskDefinition
Parameters:
  Environment:
    Type: String
  SubnetIDs:
    Type: CommaDelimitedList
    Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"
  VpcID:
    Type: String
    Default: vpc-098cfa79d637dabff
  ServiceName:
    Type: String
    Default: onyx-model-server-indexing
  TaskCpu:
    Type: String
    Default: "2048"
  TaskMemory:
    Type: String
    Default: "4096"
  TaskDesiredCount:
    Type: Number
    Default: 1

Resources:

  ECSService:
    Type: AWS::ECS::Service
    Properties:
      Cluster:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSClusterName"
      CapacityProviderStrategy:
        - CapacityProvider: FARGATE
          Base: 0
          Weight: 1
      TaskDefinition: !Ref TaskDefinition
      ServiceName: !Sub ${Environment}-${ServiceName}-service
      SchedulingStrategy: REPLICA
      DesiredCount: !Ref TaskDesiredCount
      AvailabilityZoneRebalancing: ENABLED
      NetworkConfiguration:
        AwsvpcConfiguration:
          AssignPublicIp: ENABLED
          SecurityGroups:
            - Ref: SecurityGroup
          Subnets: !Ref SubnetIDs
      PlatformVersion: LATEST
      DeploymentConfiguration:
        MaximumPercent: 200
        MinimumHealthyPercent: 100
        DeploymentCircuitBreaker:
          Enable: true
          Rollback: true
      DeploymentController:
        Type: ECS
      ServiceConnectConfiguration:
        Enabled: false
      ServiceRegistries:
        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn
      Tags:
        - Key: app
          Value: onyx
        - Key: service
          Value: !Ref ServiceName
        - Key: env
          Value: !Ref Environment
      EnableECSManagedTags: true

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.
      GroupName: !Sub ${Environment}-ecs-${ServiceName}
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - FromPort: 9000
          ToPort: 9000
          IpProtocol: tcp
          CidrIp: 0.0.0.0/0
        - FromPort: 9000
          ToPort: 9000
          IpProtocol: tcp
          CidrIpv6: "::/0"

  ServiceDiscoveryService:
    Type: "AWS::ServiceDiscovery::Service"
    Properties:
      Name: !Sub ${Environment}-${ServiceName}-service
      DnsConfig:
        DnsRecords:
          - Type: "A"
            TTL: 15
      NamespaceId:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespace"
      HealthCheckCustomConfig:
        FailureThreshold: 1
  
  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    Properties:
      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition
      TaskRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskRole"
      ExecutionRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskExecutionRole"
      NetworkMode: awsvpc
      RequiresCompatibilities:
        - FARGATE
      Cpu: !Ref TaskCpu
      Memory: !Ref TaskMemory
      RuntimePlatform:
        CpuArchitecture: ARM64
        OperatingSystemFamily: LINUX
      ContainerDefinitions:
        - Name: onyx-model-server-indexing
          Image: onyxdotapp/onyx-model-server:latest
          Cpu: 0
          Essential: true
          Command:
            - "/bin/sh"
            - "-c"
            - >
              if [ "${DISABLE_MODEL_SERVER}" = "True" ] || [ "${DISABLE_MODEL_SERVER}" = "true" ]; then echo 'Skipping service...';
              exit 0; else exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; fi
          PortMappings:
            - Name: model-server
              ContainerPort: 9000
              HostPort: 9000
              Protocol: tcp
              AppProtocol: http
          Environment:
            - Name: LOG_LEVEL
              Value: info
            - Name: INDEXING_ONLY
              Value: True
            - Name: VESPA_SEARCHER_THREADS
              Value: "1"
          MountPoints:
            - SourceVolume: efs-volume
              ContainerPath: /app/.cache/huggingface/
              ReadOnly: false
          VolumesFrom: []
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: "25m"
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: "ecs"
          SystemControls: []
      Volumes:
        - Name: efs-volume
          EFSVolumeConfiguration:
            FilesystemId:
              Fn::ImportValue:
                Fn::Sub: "${Environment}-onyx-efs-OnyxEfsId" 
            RootDirectory: "/"


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/services/onyx_model_server_inference_service_template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Description: CloudFormation template for Onyx Model Server Inference TaskDefinition
Parameters:
  Environment:
    Type: String
  SubnetIDs:
    Type: CommaDelimitedList
    Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"
  VpcID:
    Type: String
    Default: vpc-098cfa79d637dabff
  ServiceName:
    Type: String
    Default: onyx-model-server-inference
  TaskCpu:
    Type: String
    Default: "2048"
  TaskMemory:
    Type: String
    Default: "4096"
  TaskDesiredCount:
    Type: Number
    Default: 1

Resources:

  ECSService:
    Type: AWS::ECS::Service
    Properties:
      Cluster:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSClusterName"
      CapacityProviderStrategy:
        - CapacityProvider: FARGATE
          Base: 0
          Weight: 1
      TaskDefinition: !Ref TaskDefinition
      ServiceName: !Sub ${Environment}-${ServiceName}-service
      SchedulingStrategy: REPLICA
      DesiredCount: !Ref TaskDesiredCount
      AvailabilityZoneRebalancing: ENABLED
      NetworkConfiguration:
        AwsvpcConfiguration:
          AssignPublicIp: ENABLED
          SecurityGroups:
            - Ref: SecurityGroup
          Subnets: !Ref SubnetIDs
      PlatformVersion: LATEST
      DeploymentConfiguration:
        MaximumPercent: 200
        MinimumHealthyPercent: 100
        DeploymentCircuitBreaker:
          Enable: true
          Rollback: true
      DeploymentController:
        Type: ECS
      ServiceConnectConfiguration:
        Enabled: false
      ServiceRegistries:
        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn
      Tags:
        - Key: app
          Value: onyx
        - Key: service
          Value: !Ref ServiceName
        - Key: env
          Value: !Ref Environment
      EnableECSManagedTags: true

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.
      GroupName: !Sub ${Environment}-ecs-${ServiceName}
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - FromPort: 9000
          ToPort: 9000
          IpProtocol: tcp
          CidrIp: 0.0.0.0/0
        - FromPort: 9000
          ToPort: 9000
          IpProtocol: tcp
          CidrIpv6: "::/0"

  ServiceDiscoveryService:
    Type: "AWS::ServiceDiscovery::Service"
    Properties:
      Name: !Sub ${Environment}-${ServiceName}-service
      DnsConfig:
        DnsRecords:
          - Type: "A"
            TTL: 15
      NamespaceId:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespace"
      HealthCheckCustomConfig:
        FailureThreshold: 1
  
  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    Properties:
      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition
      TaskRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskRole"
      ExecutionRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskExecutionRole"
      NetworkMode: awsvpc
      RequiresCompatibilities:
        - FARGATE
      Cpu: !Ref TaskCpu
      Memory: !Ref TaskMemory
      RuntimePlatform:
        CpuArchitecture: ARM64
        OperatingSystemFamily: LINUX
      ContainerDefinitions:
        - Name: onyx-model-server-inference
          Image: onyxdotapp/onyx-model-server:latest
          Cpu: 0
          Essential: true
          Command:
            - "/bin/sh"
            - "-c"
            - >
              if [ "${DISABLE_MODEL_SERVER}" = "True" ] || [ "${DISABLE_MODEL_SERVER}" = "true" ]; then echo 'Skipping service...';
              exit 0; else exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; fi
          PortMappings:
            - Name: model-server
              ContainerPort: 9000
              HostPort: 9000
              Protocol: tcp
              AppProtocol: http
          Environment:
            - Name: LOG_LEVEL
              Value: info
          MountPoints:
            - SourceVolume: efs-volume
              ContainerPath: /app/.cache/huggingface/
              ReadOnly: false
          VolumesFrom: []
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: "25m"
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: "ecs"
          SystemControls: []
      Volumes:
        - Name: efs-volume
          EFSVolumeConfiguration:
            FilesystemId:
              Fn::ImportValue:
                Fn::Sub: "${Environment}-onyx-efs-OnyxEfsId" 
            RootDirectory: "/"

  ECSAutoScalingTarget:
    Type: AWS::ApplicationAutoScaling::ScalableTarget
    DependsOn: ECSService
    Properties:
      MaxCapacity: 5
      MinCapacity: 1
      ResourceId: !Sub
        - "service/${ImportedCluster}/${Environment}-${ServiceName}-service"
        - ImportedCluster: !ImportValue
            'Fn::Sub': "${Environment}-onyx-cluster-ECSClusterName"
          ServiceName: !Ref ServiceName
          Environment: !Ref Environment
      ScalableDimension: ecs:service:DesiredCount
      ServiceNamespace: ecs

  ECSAutoScalingPolicy:
    Type: AWS::ApplicationAutoScaling::ScalingPolicy
    Properties:
      PolicyName: !Sub ${Environment}-${ServiceName}-service-cpu-scaleout
      ScalingTargetId: !Ref ECSAutoScalingTarget
      PolicyType: TargetTrackingScaling
      TargetTrackingScalingPolicyConfiguration:
        TargetValue: 75
        PredefinedMetricSpecification:
          PredefinedMetricType: ECSServiceAverageCPUUtilization
        ScaleOutCooldown: 60
        ScaleInCooldown: 60

  ECSAutoScalingPolicyMemory:
    Type: AWS::ApplicationAutoScaling::ScalingPolicy
    Properties:
      PolicyName: !Sub ${Environment}-${ServiceName}-service-memory-scaleout
      ScalingTargetId: !Ref ECSAutoScalingTarget
      PolicyType: TargetTrackingScaling
      TargetTrackingScalingPolicyConfiguration:
        TargetValue: 80
        PredefinedMetricSpecification:
          PredefinedMetricType: ECSServiceAverageMemoryUtilization
        ScaleOutCooldown: 60
        ScaleInCooldown: 60


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/services/onyx_nginx_service_template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Description: "The template used to create an ECS Service from the ECS Console."

Parameters:
  SubnetIDs:
      Type: CommaDelimitedList
      Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"
  VpcID:
      Type: String
      Default: vpc-098cfa79d637dabff
  HostedZoneId:
      Type: String
      Default: ''
  DomainName:
      Type: String
      Default: demo.danswer.ai
  Environment:
    Type: String
  ServiceName:
    Type: String
    Default: onyx-nginx
  OnyxNamespace:
    Type: String
    Default: onyx
  OnyxBackendApiServiceName:
    Type: String
    Default: onyx-backend-api-server-service
  OnyxWebServerServiceName:
    Type: String
    Default: onyx-web-server-service
  TaskCpu:
    Type: String
    Default: "512"
  TaskMemory:
    Type: String
    Default: "1024"
  TaskDesiredCount:
    Type: Number
    Default: 1
  GitHubConfigUrl:
    Type: String
    Default: "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx/app.conf.template"
    Description: "URL to the nginx configuration file on GitHub"
  GitHubRunScriptUrl:
    Type: String
    Default: "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx/run-nginx.sh"
    Description: "URL to the nginx run script on GitHub"

Conditions:
  CreateRoute53: !Not 
    - !Equals 
      - !Ref HostedZoneId
      - ''

Resources:
  ECSService:
    Type: "AWS::ECS::Service"
    DependsOn: LoadBalancer
    Properties:
      Cluster:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSClusterName"
      CapacityProviderStrategy:
        - CapacityProvider: "FARGATE"
          Base: 0
          Weight: 1
      TaskDefinition: !Ref TaskDefinition
      ServiceName: !Sub ${Environment}-${ServiceName}
      SchedulingStrategy: "REPLICA"
      DesiredCount: !Ref TaskDesiredCount
      AvailabilityZoneRebalancing: "ENABLED"
      NetworkConfiguration:
        AwsvpcConfiguration:
          AssignPublicIp: "ENABLED"
          SecurityGroups: 
            - !Ref SecurityGroup
          Subnets: !Ref SubnetIDs
      PlatformVersion: "LATEST"
      DeploymentConfiguration:
        MaximumPercent: 200
        MinimumHealthyPercent: 100
        DeploymentCircuitBreaker:
          Enable: true
          Rollback: true
      DeploymentController:
        Type: "ECS"
      ServiceConnectConfiguration:
        Enabled: false
      ServiceRegistries:
        - RegistryArn: !GetAtt
            - "ServiceDiscoveryService"
            - "Arn"
      Tags:
        - Key: app
          Value: onyx
        - Key: service
          Value: !Ref ServiceName
        - Key: env
          Value: !Ref Environment
      EnableECSManagedTags: true
      LoadBalancers:
        - ContainerName: nginx
          ContainerPort: 80
          TargetGroupArn: !Ref TargetGroup

  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    Properties:
      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition
      ContainerDefinitions:
        - Name: nginx
          Image: nginx:1.25.5-alpine
          Cpu: 0
          PortMappings:
            - Name: nginx-80-tcp
              ContainerPort: 80
              HostPort: 80
              Protocol: tcp
          Essential: true
          Command:
            - /bin/sh
            - -c
            - dos2unix /etc/nginx/conf.d/run-nginx.sh && /etc/nginx/conf.d/run-nginx.sh app.conf.template
          Environment:
            - Name: EMAIL
              Value: ""
            - Name: DOMAIN
              Value: !Ref DomainName
            - Name: ONYX_BACKEND_API_HOST
              Value: !Sub ${Environment}-${OnyxBackendApiServiceName}.${OnyxNamespace}
            - Name: ONYX_WEB_SERVER_HOST
              Value: !Sub ${Environment}-${OnyxWebServerServiceName}.${OnyxNamespace}
          MountPoints:
            - SourceVolume: efs-volume
              ContainerPath: /etc/nginx/conf.d
          VolumesFrom: []
          DependsOn:
            - ContainerName: github-sync-container
              Condition: SUCCESS
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${Environment}-OnyxNginxTaskDefinition
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: 25m
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: ecs
          SystemControls: []
        - Name: github-sync-container
          Image: curlimages/curl:latest
          Cpu: 128
          MemoryReservation: 256
          PortMappings: []
          Essential: false
          Command:
            - sh
            - -c
            - !Sub |
              curl -L ${GitHubConfigUrl} -o /etc/nginx/conf.d/app.conf.template && 
              curl -L ${GitHubRunScriptUrl} -o /etc/nginx/conf.d/run-nginx.sh && 
              chmod 644 /etc/nginx/conf.d/app.conf.template && 
              chmod 755 /etc/nginx/conf.d/run-nginx.sh && 
              exit 0 || exit 1
          MountPoints:
            - SourceVolume: efs-volume
              ContainerPath: /etc/nginx/conf.d
          VolumesFrom: []
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${Environment}-github-sync-configs-TaskDefinition
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: 25m
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: ecs
          SystemControls: []
      TaskRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskRole"
      ExecutionRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskExecutionRole" 
      NetworkMode: awsvpc
      Volumes:
        - Name: efs-volume
          EFSVolumeConfiguration:
            FilesystemId:
              Fn::ImportValue:
                Fn::Sub: "${Environment}-onyx-efs-OnyxEfsId"
            RootDirectory: /
      PlacementConstraints: []
      RequiresCompatibilities:
        - FARGATE
      Cpu: !Ref TaskCpu
      Memory: !Ref TaskMemory
      EnableFaultInjection: false 

  SecurityGroup:
    Type: "AWS::EC2::SecurityGroup"
    Properties:
      GroupDescription: !Sub "Security group for ${ServiceName}"
      GroupName: !Sub ${Environment}-ecs-${ServiceName}
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - FromPort: 80
          ToPort: 80
          IpProtocol: "tcp"
          CidrIp: "0.0.0.0/0"
        - FromPort: 80
          ToPort: 80
          IpProtocol: "tcp"
          CidrIpv6: "::/0"

  ServiceDiscoveryService:
    Type: "AWS::ServiceDiscovery::Service"
    Properties:
      Name: !Ref ServiceName
      DnsConfig:
        DnsRecords:
          - Type: "A"
            TTL: 15
      NamespaceId:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespace"
      HealthCheckCustomConfig:
        FailureThreshold: 1

  LoadBalancer:
    Type: AWS::ElasticLoadBalancingV2::LoadBalancer
    DependsOn: SecurityGroup
    Properties:
      Type: application
      Scheme: internet-facing
      Subnets: !Ref SubnetIDs
      SecurityGroups: 
        - !Ref SecurityGroup

  LoadBalancerListener:
    Type: AWS::ElasticLoadBalancingV2::Listener
    Properties:
      LoadBalancerArn: !Ref LoadBalancer
      Port: 80
      Protocol: HTTP
      DefaultActions:
        - Type: forward
          TargetGroupArn: !Ref TargetGroup

  TargetGroup:
    Type: AWS::ElasticLoadBalancingV2::TargetGroup
    Properties:
      HealthCheckEnabled: True
      HealthCheckIntervalSeconds: 30
      HealthCheckPort: 80
      HealthCheckPath: /api/health
      HealthCheckProtocol: HTTP
      HealthCheckTimeoutSeconds: 20
      HealthyThresholdCount: 3
      Port: 80
      Protocol: HTTP
      ProtocolVersion: HTTP1
      VpcId: !Ref VpcID
      TargetType: ip

  Route53Record:
    Type: AWS::Route53::RecordSet
    Condition: CreateRoute53
    Properties:
      HostedZoneId: !Ref HostedZoneId
      Name: !Ref DomainName
      Type: A
      AliasTarget:
        DNSName: !GetAtt LoadBalancer.DNSName
        HostedZoneId: !GetAtt LoadBalancer.CanonicalHostedZoneID
        EvaluateTargetHealth: false

Outputs:
  ECSService:
    Description: "The created service."
    Value: !Ref "ECSService"
  ServiceDiscoveryService:
    Value: !Ref "ServiceDiscoveryService"
  OutputOnyxLoadBalancerDNSName:
    Description: LoadBalancer DNSName
    Value: !GetAtt LoadBalancer.DNSName
    Export:
      Name: !Sub ${AWS::StackName}-OnyxLoadBalancerDNSName


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/services/onyx_postgres_service_template.yaml
================================================
AWSTemplateFormatVersion: '2010-09-09'
Parameters:
  Environment:
    Type: String
    Default: production
  SubnetIDs:
    Type: CommaDelimitedList
    Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"
  VpcID:
    Type: String
    Default: vpc-098cfa79d637dabff
  ServiceName:
    Type: String
    Default: onyx-postgres
  TaskCpu:
    Type: String
    Default: "1024"
  TaskMemory:
    Type: String
    Default: "2048"
  TaskDesiredCount:
    Type: Number
    Default: 1

Resources:

  ECSService:
    Type: AWS::ECS::Service
    Properties:
      Cluster:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSClusterName"
      CapacityProviderStrategy:
        - CapacityProvider: FARGATE
          Base: 0
          Weight: 1
      TaskDefinition: !Ref TaskDefinition
      ServiceName: !Sub ${Environment}-${ServiceName}-service
      SchedulingStrategy: REPLICA
      DesiredCount: !Ref TaskDesiredCount
      AvailabilityZoneRebalancing: DISABLED
      NetworkConfiguration:
        AwsvpcConfiguration:
          AssignPublicIp: ENABLED
          SecurityGroups:
            - !Ref SecurityGroup
          Subnets: !Ref SubnetIDs
      PlatformVersion: LATEST
      DeploymentConfiguration:
        MaximumPercent: 100
        MinimumHealthyPercent: 0
        DeploymentCircuitBreaker:
          Enable: true
          Rollback: true
      DeploymentController:
        Type: ECS
      ServiceConnectConfiguration:
        Enabled: false
      ServiceRegistries:
        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn
      Tags:
        - Key: app
          Value: onyx
        - Key: service
          Value: !Ref ServiceName
        - Key: env
          Value: !Ref Environment
      EnableECSManagedTags: true

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.
      GroupName: !Sub ${Environment}-${ServiceName}
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - FromPort: 5432
          ToPort: 5432
          IpProtocol: tcp
          CidrIp: 0.0.0.0/0
        - FromPort: 5432
          ToPort: 5432
          IpProtocol: tcp
          CidrIpv6: "::/0"
        - FromPort: 2049
          ToPort: 2049
          IpProtocol: tcp
          SourceSecurityGroupId:
            Fn::ImportValue:
              Fn::Sub: "${Environment}-onyx-efs-EFSSecurityGroupMountTargets"

  ServiceDiscoveryService:
    Type: "AWS::ServiceDiscovery::Service"
    Properties:
      Name: !Sub ${Environment}-${ServiceName}-service
      DnsConfig:
        DnsRecords:
          - Type: "A"
            TTL: 15
      NamespaceId:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespace"
      HealthCheckCustomConfig:
        FailureThreshold: 1

  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    Properties:
      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition
      TaskRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskRole"
      ExecutionRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskExecutionRole"
      NetworkMode: awsvpc
      RequiresCompatibilities:
        - FARGATE
      Cpu: !Ref TaskCpu
      Memory: !Ref TaskMemory
      RuntimePlatform:
        CpuArchitecture: ARM64
        OperatingSystemFamily: LINUX
      Volumes:
        - Name: efs-volume-data
          EFSVolumeConfiguration:
            FilesystemId:
              Fn::ImportValue:
                Fn::Sub: "${Environment}-onyx-efs-OnyxEfsId"
            RootDirectory: "/"
            TransitEncryption: ENABLED
            AuthorizationConfig:
              AccessPointId:
                Fn::ImportValue:
                  Fn::Sub: "${Environment}-onyx-efs-PostgresDataEfsAccessPoint"
      ContainerDefinitions:
        - Name: !Ref ServiceName
          Image: postgres:15.2-alpine
          Cpu: 0
          Essential: true
          StopTimeout: 30
          Command:
            - "-c"
            - "max_connections=250"
          PortMappings:
            - Name: postgres
              ContainerPort: 5432
              HostPort: 5432
              Protocol: tcp
              AppProtocol: http
          Environment:
            - Name: POSTGRES_USER
              Value: postgres
            - Name: PGSSLMODE
              Value: require
            - Name: POSTGRES_DB
              Value: postgres
          Secrets:
            - Name: POSTGRES_PASSWORD
              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password
          MountPoints:
            - SourceVolume: efs-volume-data
              ContainerPath: /var/lib/postgresql/data
              ReadOnly: false
            - SourceVolume: efs-volume-data
              ContainerPath: /var/lib/postgresql
              ReadOnly: false
          User: "1000"
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: /ecs/OnyxPostgresTaskDefinition
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: "25m"
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: ecs


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/services/onyx_redis_service_template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Description: CloudFormation template for Onyx Redis TaskDefinition
Parameters:
  Environment:
    Type: String
  SubnetIDs:
    Type: CommaDelimitedList
    Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"
  VpcID:
    Type: String
    Default: vpc-098cfa79d637dabff
  ServiceName:
    Type: String
    Default: onyx-redis
  TaskCpu:
    Type: String
    Default: "1024"
  TaskMemory:
    Type: String
    Default: "2048"
  TaskDesiredCount:
    Type: Number
    Default: 1

Resources:

  ECSService:
    Type: AWS::ECS::Service
    Properties:
      Cluster:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSClusterName"
      CapacityProviderStrategy:
        - CapacityProvider: FARGATE
          Base: 0
          Weight: 1
      TaskDefinition: !Ref TaskDefinition
      ServiceName: !Sub ${Environment}-${ServiceName}-service
      SchedulingStrategy: REPLICA
      DesiredCount: !Ref TaskDesiredCount
      AvailabilityZoneRebalancing: ENABLED
      NetworkConfiguration:
        AwsvpcConfiguration:
          AssignPublicIp: ENABLED
          SecurityGroups:
            - Ref: SecurityGroup
          Subnets: !Ref SubnetIDs
      PlatformVersion: LATEST
      DeploymentConfiguration:
        MaximumPercent: 200
        MinimumHealthyPercent: 100
        DeploymentCircuitBreaker:
          Enable: true
          Rollback: true
      DeploymentController:
        Type: ECS
      ServiceConnectConfiguration:
        Enabled: false
      ServiceRegistries:
        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn
      Tags:
        - Key: app
          Value: onyx
        - Key: service
          Value: !Ref ServiceName
        - Key: env
          Value: !Ref Environment
      EnableECSManagedTags: true

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.
      GroupName: !Sub ${Environment}-ecs-${ServiceName}
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - FromPort: 6379
          ToPort: 6379
          IpProtocol: tcp
          CidrIp: 0.0.0.0/0
        - FromPort: 6379
          ToPort: 6379
          IpProtocol: tcp
          CidrIpv6: "::/0"

  ServiceDiscoveryService:
    Type: "AWS::ServiceDiscovery::Service"
    Properties:
      Name: !Sub ${Environment}-${ServiceName}-service
      DnsConfig:
        DnsRecords:
          - Type: "A"
            TTL: 15
      NamespaceId:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespace"
      HealthCheckCustomConfig:
        FailureThreshold: 1
  
  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    Properties:
      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition
      TaskRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskRole"
      ExecutionRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskExecutionRole"
      NetworkMode: awsvpc
      RequiresCompatibilities:
        - FARGATE
      Cpu: !Ref TaskCpu
      Memory: !Ref TaskMemory
      RuntimePlatform:
        CpuArchitecture: ARM64
        OperatingSystemFamily: LINUX
      ContainerDefinitions:
        - Name: redis 
          Image: redis:7.4-alpine 
          Cpu: 0
          Essential: true
          Command:
            - "redis-server"
            - "--save"
            - "\"\""
            - "--appendonly"
            - "no"
          PortMappings:
            - Name: redis_port
              ContainerPort: 6379
              HostPort: 6379
              Protocol: tcp
              AppProtocol: http
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: "25m"
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: ecs
          Environment: []
          VolumesFrom: []
          SystemControls: []


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/services/onyx_vespaengine_service_template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Description: CloudFormation template for Onyx Vespa Engine TaskDefinition
Parameters:
  Environment:
    Type: String
  SubnetIDs:
    Type: CommaDelimitedList
    Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"
  VpcID:
    Type: String
    Default: vpc-098cfa79d637dabff
  ServiceName:
    Type: String
    Default: onyx-vespaengine
  TaskCpu:
    Type: String
    Default: "4096"
  TaskMemory:
    Type: String
    Default: "16384"
  TaskDesiredCount:
    Type: Number
    Default: 1

Resources:

  ECSService:
    Type: AWS::ECS::Service
    Properties:
      Cluster:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSClusterName"
      CapacityProviderStrategy:
        - CapacityProvider: FARGATE
          Base: 0
          Weight: 1
      TaskDefinition: !Ref TaskDefinition
      ServiceName: !Sub ${Environment}-${ServiceName}-service
      SchedulingStrategy: REPLICA
      DesiredCount: !Ref TaskDesiredCount
      AvailabilityZoneRebalancing: ENABLED
      NetworkConfiguration:
        AwsvpcConfiguration:
          AssignPublicIp: ENABLED
          SecurityGroups:
            - Ref: SecurityGroup
          Subnets: !Ref SubnetIDs
      PlatformVersion: LATEST
      DeploymentConfiguration:
        MaximumPercent: 200
        MinimumHealthyPercent: 100
        DeploymentCircuitBreaker:
          Enable: true
          Rollback: true
      DeploymentController:
        Type: ECS
      ServiceConnectConfiguration:
        Enabled: false
      ServiceRegistries:
        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn
      Tags:
        - Key: app
          Value: onyx
        - Key: service
          Value: !Ref ServiceName
        - Key: env
          Value: !Ref Environment
      EnableECSManagedTags: true

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.
      GroupName: !Sub ${Environment}-ecs-${ServiceName}
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - FromPort: 19071
          ToPort: 19071
          IpProtocol: tcp
          CidrIp: 0.0.0.0/0
        - FromPort: 19071
          ToPort: 19071
          IpProtocol: tcp
          CidrIpv6: "::/0"
        - FromPort: 8081
          ToPort: 8081
          IpProtocol: tcp
          CidrIp: 0.0.0.0/0
        - FromPort: 8081
          ToPort: 8081
          IpProtocol: tcp
          CidrIpv6: "::/0"
        - FromPort: 2049
          ToPort: 2049
          IpProtocol: tcp
          SourceSecurityGroupId:
            Fn::ImportValue:
              Fn::Sub: "${Environment}-onyx-efs-EFSSecurityGroupMountTargets"

  ServiceDiscoveryService:
    Type: "AWS::ServiceDiscovery::Service"
    Properties:
      Name: !Sub ${Environment}-${ServiceName}-service
      DnsConfig:
        DnsRecords:
          - Type: "A"
            TTL: 15
      NamespaceId:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespace"
      HealthCheckCustomConfig:
        FailureThreshold: 1
  
  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    Properties:
      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition
      TaskRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskRole"
      ExecutionRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskExecutionRole"
      NetworkMode: awsvpc
      RequiresCompatibilities:
        - FARGATE
      Cpu: !Ref TaskCpu
      Memory: !Ref TaskMemory
      RuntimePlatform:
        CpuArchitecture: ARM64
        OperatingSystemFamily: LINUX
      ContainerDefinitions:
        - Name: vespaengine
          Image: vespaengine/vespa:8.609.39
          Cpu: 0
          Essential: true
          PortMappings:
            - Name: vespaengine_port
              ContainerPort: 19071
              HostPort: 19071
              Protocol: tcp
              AppProtocol: http
            - Name: vespaengine_port2
              ContainerPort: 8081
              HostPort: 8081
              Protocol: tcp
              AppProtocol: http
          MountPoints:
            - SourceVolume: efs-volume-data
              ContainerPath: /opt/vespa/var
              ReadOnly: false
            - SourceVolume: efs-volume-tmp
              ContainerPath: /var/tmp
              ReadOnly: false
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: /ecs/OnyxVespaEngineTaskDefinition
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: "25m"
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: ecs
          User: "1000"
          Environment:
            - Name: VESPA_SKIP_UPGRADE_CHECK
              Value: "true"
          VolumesFrom: []
          SystemControls: []
      Volumes:
        - Name: efs-volume-tmp
          EFSVolumeConfiguration:
            FilesystemId:
              Fn::ImportValue:
                Fn::Sub: "${Environment}-onyx-efs-OnyxEfsId"
            RootDirectory: "/"
            TransitEncryption: ENABLED
            AuthorizationConfig:
              AccessPointId:
                Fn::ImportValue:
                  Fn::Sub: "${Environment}-onyx-efs-VespaEngineTmpEfsAccessPoint"
        - Name: efs-volume-data
          EFSVolumeConfiguration:
            FilesystemId:
              Fn::ImportValue:
                Fn::Sub: "${Environment}-onyx-efs-OnyxEfsId"
            RootDirectory: "/"
            TransitEncryption: ENABLED
            AuthorizationConfig:
              AccessPointId:
                Fn::ImportValue:
                  Fn::Sub: "${Environment}-onyx-efs-VespaEngineDataEfsAccessPoint"


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/services/onyx_web_server_service_template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Description: CloudFormation template for Onyx Web Server TaskDefinition
Parameters:
  Environment:
    Type: String
  SubnetIDs:
    Type: CommaDelimitedList
    Description: "Comma-delimited list of at least two subnet IDs in different Availability Zones"
  VpcID:
    Type: String
    Default: vpc-098cfa79d637dabff
  ServiceName:
    Type: String
    Default: onyx-web-server
  TaskCpu:
    Type: String
    Default: "1024"
  TaskMemory:
    Type: String
    Default: "2048"
  TaskDesiredCount:
    Type: Number
    Default: 1

Resources:

  ECSService:
    Type: AWS::ECS::Service
    Properties:
      Cluster:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSClusterName"
      CapacityProviderStrategy:
        - CapacityProvider: FARGATE
          Base: 0
          Weight: 1
      TaskDefinition: !Ref TaskDefinition
      ServiceName: !Sub ${Environment}-${ServiceName}-service
      SchedulingStrategy: REPLICA
      DesiredCount: !Ref TaskDesiredCount
      AvailabilityZoneRebalancing: ENABLED
      NetworkConfiguration:
        AwsvpcConfiguration:
          AssignPublicIp: ENABLED
          SecurityGroups:
            - Ref: SecurityGroup
          Subnets: !Ref SubnetIDs
      PlatformVersion: LATEST
      DeploymentConfiguration:
        MaximumPercent: 200
        MinimumHealthyPercent: 100
        DeploymentCircuitBreaker:
          Enable: true
          Rollback: true
      DeploymentController:
        Type: ECS
      ServiceConnectConfiguration:
        Enabled: false
      ServiceRegistries:
        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn
      Tags:
        - Key: app
          Value: onyx
        - Key: service
          Value: !Ref ServiceName
        - Key: env
          Value: !Ref Environment
      EnableECSManagedTags: true

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.
      GroupName: !Sub ${Environment}-ecs-${ServiceName}
      VpcId: !Ref VpcID
      SecurityGroupIngress:
        - FromPort: 3000
          ToPort: 3000
          IpProtocol: tcp
          CidrIp: 0.0.0.0/0
        - FromPort: 3000
          ToPort: 3000
          IpProtocol: tcp
          CidrIpv6: "::/0"

  ServiceDiscoveryService:
    Type: "AWS::ServiceDiscovery::Service"
    Properties:
      Name: !Sub ${Environment}-${ServiceName}-service
      DnsConfig:
        DnsRecords:
          - Type: "A"
            TTL: 15
      NamespaceId:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespace"
      HealthCheckCustomConfig:
        FailureThreshold: 1
  
  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    Properties:
      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition
      TaskRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskRole"
      ExecutionRoleArn:
        Fn::ImportValue:
          Fn::Sub: "${Environment}-onyx-cluster-ECSTaskExecutionRole"
      NetworkMode: awsvpc
      RequiresCompatibilities:
        - FARGATE
      Cpu: !Ref TaskCpu
      Memory: !Ref TaskMemory
      RuntimePlatform:
        CpuArchitecture: ARM64
        OperatingSystemFamily: LINUX
      ContainerDefinitions:
        - Name: onyx-webserver
          Image: onyxdotapp/onyx-web-server:latest
          Cpu: 0
          Essential: true
          PortMappings:
            - Name: webserver
              ContainerPort: 3000
              HostPort: 3000
              Protocol: tcp
          Environment:
            - Name: INTERNAL_URL
              Value: !Sub
                - "http://${Environment}-onyx-backend-api-server-service.${ImportedNamespace}:8080"
                - ImportedNamespace: !ImportValue
                    Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}
              mode: non-blocking
              awslogs-create-group: "true"
              max-buffer-size: "25m"
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: ecs
          User: "1000"
          VolumesFrom: []
          SystemControls: []

  ECSAutoScalingTarget:
    Type: AWS::ApplicationAutoScaling::ScalableTarget
    DependsOn: ECSService
    Properties:
      MaxCapacity: 5
      MinCapacity: 1
      ResourceId: !Sub
        - "service/${ImportedCluster}/${Environment}-${ServiceName}-service"
        - ImportedCluster: !ImportValue
            'Fn::Sub': "${Environment}-onyx-cluster-ECSClusterName"
          ServiceName: !Ref ServiceName
          Environment: !Ref Environment
      ScalableDimension: ecs:service:DesiredCount
      ServiceNamespace: ecs

  ECSAutoScalingPolicy:
    Type: AWS::ApplicationAutoScaling::ScalingPolicy
    Properties:
      PolicyName: !Sub ${Environment}-${ServiceName}-service-cpu-scaleout
      ScalingTargetId: !Ref ECSAutoScalingTarget
      PolicyType: TargetTrackingScaling
      TargetTrackingScalingPolicyConfiguration:
        TargetValue: 75
        PredefinedMetricSpecification:
          PredefinedMetricType: ECSServiceAverageCPUUtilization
        ScaleOutCooldown: 60
        ScaleInCooldown: 60

  ECSAutoScalingPolicyMemory:
    Type: AWS::ApplicationAutoScaling::ScalingPolicy
    Properties:
      PolicyName: !Sub ${Environment}-${ServiceName}-service-memory-scaleout
      ScalingTargetId: !Ref ECSAutoScalingTarget
      PolicyType: TargetTrackingScaling
      TargetTrackingScalingPolicyConfiguration:
        TargetValue: 80
        PredefinedMetricSpecification:
          PredefinedMetricType: ECSServiceAverageMemoryUtilization
        ScaleOutCooldown: 60
        ScaleInCooldown: 60


================================================
FILE: deployment/aws_ecs_fargate/cloudformation/uninstall.sh
================================================
#!/bin/bash

AWS_REGION="${AWS_REGION:-us-west-1}"

# Reference to consolidated config
CONFIG_FILE="onyx_config.json"

# Get environment from config file
ENVIRONMENT=$(jq -r '.Environment' "$CONFIG_FILE")
if [ -z "$ENVIRONMENT" ] || [ "$ENVIRONMENT" == "null" ]; then
    echo "Missing Environment in $CONFIG_FILE. Please add the Environment field."
    exit 1
fi

# Try to get S3_BUCKET from config, fallback to default if not found
S3_BUCKET_FROM_CONFIG=$(jq -r '.S3Bucket // empty' "$CONFIG_FILE")
if [ -n "$S3_BUCKET_FROM_CONFIG" ]; then
    S3_BUCKET="$S3_BUCKET_FROM_CONFIG"
else
    S3_BUCKET="${S3_BUCKET:-onyx-ecs-fargate-configs}"
fi

STACK_NAMES=(
  "${ENVIRONMENT}-onyx-nginx-service"
  "${ENVIRONMENT}-onyx-web-server-service"
  "${ENVIRONMENT}-onyx-backend-background-server-service"
  "${ENVIRONMENT}-onyx-backend-api-server-service"
  "${ENVIRONMENT}-onyx-model-server-inference-service"
  "${ENVIRONMENT}-onyx-model-server-indexing-service"
  "${ENVIRONMENT}-onyx-vespaengine-service"
  "${ENVIRONMENT}-onyx-redis-service"
  "${ENVIRONMENT}-onyx-postgres-service"
  "${ENVIRONMENT}-onyx-cluster"
  "${ENVIRONMENT}-onyx-acm"
  "${ENVIRONMENT}-onyx-efs"
  )

delete_stack() {
  local stack_name=$1

  if [ "$stack_name" == "${ENVIRONMENT}-onyx-cluster" ]; then
      echo "Removing all objects and directories from the onyx config s3 bucket."
      aws s3 rm "s3://${ENVIRONMENT}-${S3_BUCKET}" --recursive
      sleep 5
  fi

  echo "Checking if stack $stack_name exists..."
  if aws cloudformation describe-stacks --stack-name "$stack_name" --region "$AWS_REGION" > /dev/null 2>&1; then
  	echo "Deleting stack: $stack_name..."
  	aws cloudformation delete-stack \
		--stack-name "$stack_name" \
		--region "$AWS_REGION"
	
	echo "Waiting for stack $stack_name to be deleted..."
	aws cloudformation wait stack-delete-complete \
		--stack-name "$stack_name" \
		--region "$AWS_REGION"

	if [ $? -eq 0 ]; then
		echo "Stack $stack_name deleted successfully."
		sleep 10
	else
		echo "Failed to delete stack $stack_name. Exiting."
		exit 1
	fi
  else
	echo "Stack $stack_name does not exist, skipping."
	return 0
  fi	
}

for stack_name in "${STACK_NAMES[@]}"; do
  delete_stack "$stack_name"
done

echo "All stacks deleted successfully."


================================================
FILE: deployment/data/nginx/app.conf.template
================================================
# Log format to include request latency
log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" '
                '$status $body_bytes_sent "$http_referer" '
                '"$http_user_agent" "$http_x_forwarded_for" '
                'rt=$request_time';

upstream api_server {
    # fail_timeout=0 means we always retry an upstream even if it failed
    # to return a good HTTP response

    # for UNIX domain socket setups
    #server unix:/tmp/gunicorn.sock fail_timeout=0;

    # for a TCP configuration
    # TODO: use gunicorn to manage multiple processes
    server ${ONYX_BACKEND_API_HOST}:8080 fail_timeout=0;
}

upstream web_server {
    server ${ONYX_WEB_SERVER_HOST}:3000 fail_timeout=0;
}

# Conditionally include MCP upstream configuration
include /etc/nginx/conf.d/mcp_upstream.conf.inc;

# WebSocket support: only set Connection "upgrade" for actual upgrade requests
map $http_upgrade $connection_upgrade {
    default upgrade;
    ''      close;
}

server {
    listen 80 default_server;

    client_max_body_size 5G;    # Maximum upload size

    access_log /var/log/nginx/access.log custom_main;

    # Conditionally include MCP location configuration
    include /etc/nginx/conf.d/mcp.conf.inc;

    location ~ ^/scim(/.*)?$ {
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;
        proxy_http_version 1.1;
        proxy_buffering off;
        proxy_redirect off;
        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;
        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;
        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;
        proxy_pass http://api_server;
    }

    # Match both /api/* and /openapi.json in a single rule
    location ~ ^/(api|openapi.json)(/.*)?$ {
        # Rewrite /api prefixed matched paths
        rewrite ^/api(/.*)$ $1 break;

        # misc headers
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;

        # need to use 1.1 to support chunked transfers and WebSocket
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection $connection_upgrade;
        proxy_buffering off;

        # timeout settings
        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;
        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;
        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;

        # we don't want nginx trying to do something clever with
        # redirects, we set the Host: header above already.
        proxy_redirect off;
        proxy_pass http://api_server;
    }

    location / {
        # misc headers
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;

        proxy_http_version 1.1;

        # timeout settings
        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;
        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;
        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;

        # we don't want nginx trying to do something clever with
        # redirects, we set the Host: header above already.
        proxy_redirect off;
        proxy_pass http://web_server;
    }

}


================================================
FILE: deployment/data/nginx/app.conf.template.no-letsencrypt
================================================
# Log format to include request latency
log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" '
                '$status $body_bytes_sent "$http_referer" '
                '"$http_user_agent" "$http_x_forwarded_for" '
                'rt=$request_time';

upstream api_server {
    # fail_timeout=0 means we always retry an upstream even if it failed
    # to return a good HTTP response

    # for UNIX domain socket setups
    #server unix:/tmp/gunicorn.sock fail_timeout=0;

    # for a TCP configuration
    # TODO: use gunicorn to manage multiple processes
    server api_server:8080 fail_timeout=0;
}

upstream web_server {
    server web_server:3000 fail_timeout=0;
}

# Conditionally include MCP upstream configuration
include /etc/nginx/conf.d/mcp_upstream.conf.inc;

# WebSocket support: only set Connection "upgrade" for actual upgrade requests
map $http_upgrade $connection_upgrade {
    default upgrade;
    ''      close;
}

server {
    listen 80 default_server;

    client_max_body_size 5G;    # Maximum upload size

    access_log /var/log/nginx/access.log custom_main;

    # Conditionally include MCP location configuration
    include /etc/nginx/conf.d/mcp.conf.inc;

    location ~ ^/scim(/.*)?$ {
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;
        proxy_http_version 1.1;
        proxy_buffering off;
        proxy_redirect off;
        proxy_pass http://api_server;
    }

    # Match both /api/* and /openapi.json in a single rule
    location ~ ^/(api|openapi.json)(/.*)?$ {
        # Rewrite /api prefixed matched paths
        rewrite ^/api(/.*)$ $1 break;

        # misc headers
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;

        # need to use 1.1 to support chunked transfers and WebSocket
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection $connection_upgrade;
        proxy_buffering off;

        # we don't want nginx trying to do something clever with
        # redirects, we set the Host: header above already.
        proxy_redirect off;
        proxy_pass http://api_server;
    }

    location / {
        # misc headers
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;

        proxy_http_version 1.1;

        # we don't want nginx trying to do something clever with
        # redirects, we set the Host: header above already.
        proxy_redirect off;
        proxy_pass http://web_server;
    }
}

server {
    listen 443 ssl default_server;

    client_max_body_size 5G;    # Maximum upload size
    
    location / {
        # misc headers
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        # don't use forwarded schema, host, or port here - this is the entry point
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host; 
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;

        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection $connection_upgrade;
        proxy_buffering off;
        # we don't want nginx trying to do something clever with
        # redirects, we set the Host: header above already.
        proxy_redirect off;
        proxy_pass http://localhost:80;
    }

    ssl_certificate /etc/nginx/sslcerts/${SSL_CERT_FILE_NAME};
    ssl_certificate_key /etc/nginx/sslcerts/${SSL_CERT_KEY_FILE_NAME};
}


================================================
FILE: deployment/data/nginx/app.conf.template.prod
================================================
# Log format to include request latency
log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" '
                '$status $body_bytes_sent "$http_referer" '
                '"$http_user_agent" "$http_x_forwarded_for" '
                'rt=$request_time';

upstream api_server {
    # fail_timeout=0 means we always retry an upstream even if it failed
    # to return a good HTTP response

    # for UNIX domain socket setups
    #server unix:/tmp/gunicorn.sock fail_timeout=0;

    # for a TCP configuration
    # TODO: use gunicorn to manage multiple processes
    server ${ONYX_BACKEND_API_HOST}:8080 fail_timeout=0;
}

upstream web_server {
    server ${ONYX_WEB_SERVER_HOST}:3000 fail_timeout=0;
}

# Conditionally include MCP upstream configuration
include /etc/nginx/conf.d/mcp_upstream.conf.inc;

# WebSocket support: only set Connection "upgrade" for actual upgrade requests
map $http_upgrade $connection_upgrade {
    default upgrade;
    ''      close;
}

server {
    listen 80 default_server;

    client_max_body_size 5G;    # Maximum upload size

    access_log /var/log/nginx/access.log custom_main;

    # Conditionally include MCP location configuration 
    include /etc/nginx/conf.d/mcp.conf.inc;

    location ~ ^/scim(/.*)?$ {
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;
        proxy_http_version 1.1;
        proxy_buffering off;
        proxy_redirect off;
        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;
        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;
        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;
        proxy_pass http://api_server;
    }

    # Match both /api/* and /openapi.json in a single rule
    location ~ ^/(api|openapi.json)(/.*)?$ {
        # Rewrite /api prefixed matched paths
        rewrite ^/api(/.*)$ $1 break;

        # misc headers
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;

        # need to use 1.1 to support chunked transfers and WebSocket
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection $connection_upgrade;
        proxy_buffering off;

        # timeout settings
        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;
        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;
        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;

        # we don't want nginx trying to do something clever with
        # redirects, we set the Host: header above already.
        proxy_redirect off;
        proxy_pass http://api_server;
    }

    location / {
        # misc headers
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;

        proxy_http_version 1.1;

        # timeout settings
        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;
        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;
        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;

        # we don't want nginx trying to do something clever with
        # redirects, we set the Host: header above already.
        proxy_redirect off;
        proxy_pass http://web_server;
    }

    location /.well-known/acme-challenge/ {
        root /var/www/certbot;
    }
}

server {
    listen 443 ssl default_server;

    client_max_body_size 5G;    # Maximum upload size

    location / {
        # misc headers
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        # don't use forwarded schema, host, or port here - this is the entry point
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $host;
        proxy_set_header X-Forwarded-Port $server_port;
        proxy_set_header Host $host;

        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection $connection_upgrade;
        proxy_buffering off;

        # timeout settings
        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;
        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;
        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;

        # we don't want nginx trying to do something clever with
        # redirects, we set the Host: header above already.
        proxy_redirect off;
        proxy_pass http://localhost:80;
    }

    ssl_certificate /etc/letsencrypt/live/${DOMAIN}/fullchain.pem;
    ssl_certificate_key /etc/letsencrypt/live/${DOMAIN}/privkey.pem;
    include /etc/letsencrypt/options-ssl-nginx.conf;
    ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
}


================================================
FILE: deployment/data/nginx/mcp.conf.inc.template
================================================
# MCP Server - Model Context Protocol for LLM integrations
# Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.
location ~ ^/mcp(/.*)?$ {
    # misc headers
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header X-Forwarded-Proto $scheme;
    proxy_set_header X-Forwarded-Host $host;
    proxy_set_header X-Forwarded-Port $server_port;
    proxy_set_header Host $host;

    # Standard HTTP 1.1
    proxy_http_version 1.1;

    # Timeouts for MCP requests
    proxy_connect_timeout 30s;
    proxy_send_timeout 300s;
    proxy_read_timeout 300s;

    proxy_redirect off;
    rewrite ^/mcp(/.*)$ $1 break;
    rewrite ^/mcp/?$ / break;
    proxy_pass http://mcp_server;
}


================================================
FILE: deployment/data/nginx/mcp_upstream.conf.inc.template
================================================
upstream mcp_server {
    server ${ONYX_MCP_SERVER_HOST}:8090 fail_timeout=0;
}


================================================
FILE: deployment/data/nginx/run-nginx.sh
================================================
# fill in the template
export ONYX_BACKEND_API_HOST="${ONYX_BACKEND_API_HOST:-api_server}"
export ONYX_WEB_SERVER_HOST="${ONYX_WEB_SERVER_HOST:-web_server}"
export ONYX_MCP_SERVER_HOST="${ONYX_MCP_SERVER_HOST:-mcp_server}"

export SSL_CERT_FILE_NAME="${SSL_CERT_FILE_NAME:-ssl.crt}"
export SSL_CERT_KEY_FILE_NAME="${SSL_CERT_KEY_FILE_NAME:-ssl.key}"

# Nginx timeout settings (in seconds)
export NGINX_PROXY_CONNECT_TIMEOUT="${NGINX_PROXY_CONNECT_TIMEOUT:-300}"
export NGINX_PROXY_SEND_TIMEOUT="${NGINX_PROXY_SEND_TIMEOUT:-300}"
export NGINX_PROXY_READ_TIMEOUT="${NGINX_PROXY_READ_TIMEOUT:-300}"

echo "Using API server host: $ONYX_BACKEND_API_HOST"
echo "Using web server host: $ONYX_WEB_SERVER_HOST"
echo "Using MCP server host: $ONYX_MCP_SERVER_HOST"
echo "Using nginx proxy timeouts - connect: ${NGINX_PROXY_CONNECT_TIMEOUT}s, send: ${NGINX_PROXY_SEND_TIMEOUT}s, read: ${NGINX_PROXY_READ_TIMEOUT}s"

envsubst '$DOMAIN $SSL_CERT_FILE_NAME $SSL_CERT_KEY_FILE_NAME $ONYX_BACKEND_API_HOST $ONYX_WEB_SERVER_HOST $ONYX_MCP_SERVER_HOST $NGINX_PROXY_CONNECT_TIMEOUT $NGINX_PROXY_SEND_TIMEOUT $NGINX_PROXY_READ_TIMEOUT' < "/etc/nginx/conf.d/$1" > /etc/nginx/conf.d/app.conf

# Conditionally create MCP server configuration
if [ "${MCP_SERVER_ENABLED}" = "True" ] || [ "${MCP_SERVER_ENABLED}" = "true" ]; then
  echo "MCP server is enabled, creating MCP configuration..."
  envsubst '$ONYX_MCP_SERVER_HOST' < "/etc/nginx/conf.d/mcp_upstream.conf.inc.template" > /etc/nginx/conf.d/mcp_upstream.conf.inc
  envsubst '$ONYX_MCP_SERVER_HOST' < "/etc/nginx/conf.d/mcp.conf.inc.template" > /etc/nginx/conf.d/mcp.conf.inc
else
  echo "MCP server is disabled, removing MCP configuration..."
  # Leave empty placeholder files so nginx includes do not fail
  # These files are empty because MCP server is disabled
  echo "# Empty file - MCP server is disabled" > /etc/nginx/conf.d/mcp_upstream.conf.inc
  echo "# Empty file - MCP server is disabled" > /etc/nginx/conf.d/mcp.conf.inc
fi

# wait for the api_server to be ready
echo "Waiting for API server to boot up; this may take a minute or two..."
echo "If this takes more than ~5 minutes, check the logs of the API server container for errors with the following command:"
echo
echo "docker logs onyx-api_server-1"
echo

while true; do
  # Use curl to send a request and capture the HTTP status code
  status_code=$(curl -o /dev/null -s -w "%{http_code}\n" "http://${ONYX_BACKEND_API_HOST}:8080/health")
  
  # Check if the status code is 200
  if [ "$status_code" -eq 200 ]; then
    echo "API server responded with 200, starting nginx..."
    break  # Exit the loop
  else
    echo "API server responded with $status_code, retrying in 5 seconds..."
    sleep 5  # Sleep for 5 seconds before retrying
  fi
done

# Start nginx and reload every 6 hours
while :; do sleep 6h & wait; nginx -s reload; done & nginx -g "daemon off;"


================================================
FILE: deployment/docker_compose/README.md
================================================
# Welcome to Onyx

To set up Onyx there are several options, Onyx supports the following for deployment:
1. Quick guided install via the install.sh script
2. Pulling the repo and running `docker compose up -d` from the deployment/docker_compose directory
  - Note, it is recommended to copy over the env.template file to .env and edit the necessary values
3. For large scale deployments leveraging Kubernetes, there are two options, Helm or Terraform.

This README focuses on the easiest guided deployment which is via install.sh.

**For more detailed guides, please refer to the documentation: https://docs.onyx.app/deployment/overview**

## install.sh script

```
curl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh && chmod +x install.sh && ./install.sh
```

This provides a guided installation of Onyx via Docker Compose. It will deploy the latest version of Onyx
and set up the volumes to ensure data is persisted across deployments or upgrades.

The script will create an onyx_data directory, all necessary files for the deployment will be stored in
there. Note that no application critical data is stored in that directory so even if you delete it, the
data needed to restore the app will not be destroyed.

The data about chats, users, etc. are instead stored as named Docker Volumes. This is managed by Docker
and where it is stored will depend on your Docker setup. You can always delete these as well by running
the install.sh script with --delete-data.

To shut down the deployment without deleting, use install.sh --shutdown.

### Upgrading the deployment
Onyx maintains backwards compatibility across all minor versions following SemVer. If following the install.sh script (or through Docker Compose), you can
upgrade it by first bringing down the containers. To do this, use `install.sh --shutdown`
(or `docker compose down` from the directory with the docker-compose.yml file).

After the containers are stopped, you can safely upgrade by either re-running the `install.sh` script (if you left the values as default which is latest,
then it will automatically update to latest each time the script is run). If you are more comfortable running docker compose commands, you can also run
commands directly from the directory with the docker-compose.yml file. First verify the version you want in the environment file (see below),
(if using `latest` tag, be sure to run `docker compose pull`) and run `docker compose up` to restart the services on the latest version

### Environment variables
The Docker Compose files try to look for a .env file in the same directory. The `install.sh` script sets it up from a file called env.template which is
downloaded during the initial setup. Feel free to edit the .env file to customize your deployment. The most important / common changed values are
located near the top of the file.

IMAGE_TAG is the version of Onyx to run. It is recommended to leave it as latest to get all updates with each redeployment.


================================================
FILE: deployment/docker_compose/docker-compose.dev.yml
================================================
# Docker Compose Override for Development/Testing
# This file exposes service ports for development and testing purposes
#
# Usage:
#   docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
#
# Or set COMPOSE_FILE environment variable:
#   export COMPOSE_FILE=docker-compose.yml:docker-compose.dev.yml
#   docker compose up -d --wait

services:
  api_server:
    ports:
      - "8080:8080"
    deploy:
      resources:
        limits:
          cpus: "${API_SERVER_CPU_LIMIT:-0}"
          memory: "${API_SERVER_MEM_LIMIT:-0}"

  # Uncomment the block below to enable the MCP server for Onyx.
  # mcp_server:
  #   ports:
  #     - "8090:8090"

  relational_db:
    ports:
      - "5432:5432"

  index:
    ports:
      - "19071:19071"
      - "8081:8081"
    healthcheck:
      # We use 19071 because the application server at 8010 is not ready until
      # it is activated/configured by the application.
      test: ["CMD-SHELL", "curl -sf http://localhost:19071/state/v1/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 30s

  opensearch:
    ports:
      - "9200:9200"
    # Rootless Docker can reject the base OpenSearch ulimit settings, so clear
    # the inherited block entirely in the dev override.
    ulimits: !reset null
    environment:
      - bootstrap.memory_lock=false

  inference_model_server:
    ports:
      - "9000:9000"

  cache:
    ports:
      - "6379:6379"

  minio:
    # use different ports to avoid conflicts with model servers
    ports:
      - "9004:9000"
      - "9005:9001"

  code-interpreter:
    ports:
      - "8000:8000"


================================================
FILE: deployment/docker_compose/docker-compose.mcp-api-key-test.yml
================================================
name: onyx

services:
  mcp_api_key_server:
    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:latest}
    restart: on-failure
    working_dir: /workspace
    environment:
      - MCP_API_KEY_TEST_PORT=${MCP_API_KEY_TEST_PORT:-8005}
      - MCP_API_KEY=${MCP_API_KEY:-test-api-key-12345}
      - MCP_SERVER_HOST=${MCP_API_KEY_SERVER_HOST:-0.0.0.0}
      - MCP_SERVER_PUBLIC_HOST=${MCP_API_KEY_SERVER_PUBLIC_HOST:-host.docker.internal}
    command: >
      /bin/sh -c "
      python backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_api_key.py ${MCP_API_KEY:-test-api-key-12345} ${MCP_API_KEY_TEST_PORT:-8005}
      "
    ports:
      - "${MCP_API_KEY_TEST_PORT:-8005}:${MCP_API_KEY_TEST_PORT:-8005}"
    volumes:
      - ../..:/workspace:ro


================================================
FILE: deployment/docker_compose/docker-compose.mcp-oauth-test.yml
================================================
name: onyx

services:
  mcp_oauth_server:
    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
    restart: on-failure
    working_dir: /workspace
    environment:
      - MCP_OAUTH_CLIENT_ID=${MCP_OAUTH_CLIENT_ID:-}
      - MCP_OAUTH_CLIENT_SECRET=${MCP_OAUTH_CLIENT_SECRET:-}
      - MCP_OAUTH_ISSUER=${MCP_OAUTH_ISSUER:-}
      - MCP_OAUTH_JWKS_URI=${MCP_OAUTH_JWKS_URI:-}
      - MCP_OAUTH_USERNAME=${MCP_OAUTH_USERNAME:-}
      - MCP_OAUTH_PASSWORD=${MCP_OAUTH_PASSWORD:-}
      - MCP_OAUTH_REQUIRED_SCOPES=${MCP_OAUTH_REQUIRED_SCOPES:-mcp:use}
      - MCP_TEST_SERVER_PORT=${MCP_TEST_SERVER_PORT:-8004}
      - MCP_SERVER_PORT=${MCP_TEST_SERVER_PORT:-8004}
      - MCP_SERVER_HOST=${MCP_SERVER_HOST:-0.0.0.0}
      - MCP_SERVER_PUBLIC_HOST=${MCP_SERVER_PUBLIC_HOST:-host.docker.internal}
      - MCP_SERVER_PUBLIC_URL=${MCP_SERVER_PUBLIC_URL:-}
    command: >
      /bin/sh -c "
      python backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_oauth.py ${MCP_TEST_SERVER_PORT:-8004}
      "
    ports:
      - "${MCP_TEST_SERVER_PORT:-8004}:${MCP_TEST_SERVER_PORT:-8004}"
    volumes:
      - ../..:/workspace:ro


================================================
FILE: deployment/docker_compose/docker-compose.multitenant-dev.yml
================================================
name: onyx

services:
  api_server:
    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}
    build:
      context: ../../backend
      dockerfile: Dockerfile
    command: >
      /bin/sh -c "
      alembic -n schema_private upgrade head &&
      echo \"Starting Onyx Api Server\" &&
      uvicorn onyx.main:app --host 0.0.0.0 --port 8080"
    depends_on:
      - relational_db
      - index
      - cache
      - inference_model_server
      - minio
    restart: unless-stopped
    ports:
      - "8080:8080"
    environment:
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
      - MULTI_TENANT=true
      - LOG_LEVEL=DEBUG
      - AUTH_TYPE=cloud
      - REQUIRE_EMAIL_VERIFICATION=false
      - DISABLE_TELEMETRY=true
      - IMAGE_TAG=test
      - DEV_MODE=true
      # Auth Settings
      - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-}
      - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
      - VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-}
      - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}
      - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}
      - SMTP_SERVER=${SMTP_SERVER:-}
      - SMTP_PORT=${SMTP_PORT:-587}
      - SMTP_USER=${SMTP_USER:-}
      - SMTP_PASS=${SMTP_PASS:-}
      - ENABLE_EMAIL_INVITES=${ENABLE_EMAIL_INVITES:-}
      - EMAIL_FROM=${EMAIL_FROM:-}
      - OAUTH_CLIENT_ID=${OAUTH_CLIENT_ID:-}
      - OAUTH_CLIENT_SECRET=${OAUTH_CLIENT_SECRET:-}
      - OPENID_CONFIG_URL=${OPENID_CONFIG_URL:-}
      - TRACK_EXTERNAL_IDP_EXPIRY=${TRACK_EXTERNAL_IDP_EXPIRY:-}
      - CORS_ALLOWED_ORIGIN=${CORS_ALLOWED_ORIGIN:-}
      # Gen AI Settings
      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
      - LLM_SOCKET_READ_TIMEOUT=${LLM_SOCKET_READ_TIMEOUT:-}
      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
      - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
      - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
      - GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
      # Query Options
      - DOC_TIME_DECAY=${DOC_TIME_DECAY:-}
      - HYBRID_ALPHA=${HYBRID_ALPHA:-}
      - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
      # Other services
      - POSTGRES_HOST=relational_db
      - POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-}
      - VESPA_HOST=index
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=cache
      - WEB_DOMAIN=${WEB_DOMAIN:-}
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - S3_FILE_STORE_BUCKET_NAME=${S3_FILE_STORE_BUCKET_NAME:-}
      # Don't change the NLP model configs unless you know what you're doing
      - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-}
      - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
      - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
      - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
      - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}
      - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-}
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
      - CODE_INTERPRETER_BASE_URL=${CODE_INTERPRETER_BASE_URL:-http://code-interpreter:8000}
      - LOG_ONYX_MODEL_INTERACTIONS=${LOG_ONYX_MODEL_INTERACTIONS:-}
      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
      - LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-}
      - LOG_POSTGRES_LATENCY=${LOG_POSTGRES_LATENCY:-}
      - LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-}
      - CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-}
      - LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS=${LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS:-}
      # Egnyte OAuth Configs
      - EGNYTE_CLIENT_ID=${EGNYTE_CLIENT_ID:-}
      - EGNYTE_CLIENT_SECRET=${EGNYTE_CLIENT_SECRET:-}
      - EGNYTE_LOCALHOST_OVERRIDE=${EGNYTE_LOCALHOST_OVERRIDE:-}
      # Linear OAuth Configs
      - LINEAR_CLIENT_ID=${LINEAR_CLIENT_ID:-}
      - LINEAR_CLIENT_SECRET=${LINEAR_CLIENT_SECRET:-}
      # Analytics Configs
      - SENTRY_DSN=${SENTRY_DSN:-}
      # Chat Configs
      - HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}

      # Show extra/uncommon connectors
      - SHOW_EXTRA_CONNECTORS=${SHOW_EXTRA_CONNECTORS:-true}

      # Enables the use of bedrock models or IAM Auth
      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}
      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}
      - AWS_REGION_NAME=${AWS_REGION_NAME:-}
      - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
      # Seeding configuration
      - USE_IAM_AUTH=${USE_IAM_AUTH:-}
      - OPENAI_DEFAULT_API_KEY=${OPENAI_DEFAULT_API_KEY:-}

      # Vespa Language Forcing
      # See: https://docs.vespa.ai/en/linguistics.html
      - VESPA_LANGUAGE_OVERRIDE=${VESPA_LANGUAGE_OVERRIDE:-}
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  background:
    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}
    build:
      context: ../../backend
      dockerfile: Dockerfile
    command: >
      /bin/sh -c "
      if [ -f /etc/ssl/certs/custom-ca.crt ]; then
        update-ca-certificates;
      fi &&
      /app/scripts/supervisord_entrypoint.sh"
    depends_on:
      - relational_db
      - index
      - cache
      - inference_model_server
      - indexing_model_server
    restart: unless-stopped
    environment:
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
      - MULTI_TENANT=true
      - LOG_LEVEL=DEBUG
      - AUTH_TYPE=cloud
      - REQUIRE_EMAIL_VERIFICATION=false
      - DISABLE_TELEMETRY=true
      - IMAGE_TAG=test
      - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
      - JWT_PUBLIC_KEY_URL=${JWT_PUBLIC_KEY_URL:-}
      # Gen AI Settings (Needed by OnyxBot)
      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
      - LLM_SOCKET_READ_TIMEOUT=${LLM_SOCKET_READ_TIMEOUT:-}
      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
      - GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-}
      - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
      - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
      - GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
      # Query Options
      - DOC_TIME_DECAY=${DOC_TIME_DECAY:-}
      - HYBRID_ALPHA=${HYBRID_ALPHA:-}
      - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
      # Other Services
      - POSTGRES_HOST=relational_db
      - POSTGRES_USER=${POSTGRES_USER:-}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}
      - DB_READONLY_USER=${DB_READONLY_USER:-}
      - DB_READONLY_PASSWORD=${DB_READONLY_PASSWORD:-}
      - POSTGRES_DB=${POSTGRES_DB:-}
      - POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-}
      - VESPA_HOST=index
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=cache
      - WEB_DOMAIN=${WEB_DOMAIN:-}
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - S3_FILE_STORE_BUCKET_NAME=${S3_FILE_STORE_BUCKET_NAME:-}
      # Don't change the NLP model configs unless you know what you're doing
      - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
      - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
      - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
      - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}
      - ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-}
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
      # Indexing Configs
      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-}
      - ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-}
      - DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-}
      - CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-}
      - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=${CONFLUENCE_CONNECTOR_LABELS_TO_SKIP:-}
      - JIRA_CONNECTOR_LABELS_TO_SKIP=${JIRA_CONNECTOR_LABELS_TO_SKIP:-}
      - WEB_CONNECTOR_VALIDATE_URLS=${WEB_CONNECTOR_VALIDATE_URLS:-}
      - JIRA_SERVER_API_VERSION=${JIRA_SERVER_API_VERSION:-}
      - JIRA_CLOUD_API_VERSION=${JIRA_CLOUD_API_VERSION:-}
      - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-}
      - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-}
      - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-}
      - MAX_DOCUMENT_CHARS=${MAX_DOCUMENT_CHARS:-}
      - MAX_FILE_SIZE_BYTES=${MAX_FILE_SIZE_BYTES:-}
      # Egnyte OAuth Configs
      - EGNYTE_CLIENT_ID=${EGNYTE_CLIENT_ID:-}
      - EGNYTE_CLIENT_SECRET=${EGNYTE_CLIENT_SECRET:-}
      - EGNYTE_LOCALHOST_OVERRIDE=${EGNYTE_LOCALHOST_OVERRIDE:-}
      # Lienar OAuth Configs
      - LINEAR_CLIENT_ID=${LINEAR_CLIENT_ID:-}
      - LINEAR_CLIENT_SECRET=${LINEAR_CLIENT_SECRET:-}
      # Celery Configs (defaults are set in the supervisord.conf file.
      # prefer doing that to have one source of defaults)
      - CELERY_WORKER_DOCFETCHING_CONCURRENCY=${CELERY_WORKER_DOCFETCHING_CONCURRENCY:-}
      - CELERY_WORKER_DOCPROCESSING_CONCURRENCY=${CELERY_WORKER_DOCPROCESSING_CONCURRENCY:-}
      - CELERY_WORKER_LIGHT_CONCURRENCY=${CELERY_WORKER_LIGHT_CONCURRENCY:-}
      - CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-}

      # Onyx SlackBot Configs
      - ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER=${ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER:-}
      - ONYX_BOT_FEEDBACK_VISIBILITY=${ONYX_BOT_FEEDBACK_VISIBILITY:-}
      - ONYX_BOT_DISPLAY_ERROR_MSGS=${ONYX_BOT_DISPLAY_ERROR_MSGS:-}
      - NOTIFY_SLACKBOT_NO_ANSWER=${NOTIFY_SLACKBOT_NO_ANSWER:-}
      - ONYX_BOT_MAX_QPM=${ONYX_BOT_MAX_QPM:-}
      - ONYX_BOT_MAX_WAIT_TIME=${ONYX_BOT_MAX_WAIT_TIME:-}
      # Discord Bot Configuration (runs via supervisord, requires DISCORD_BOT_TOKEN to be set)
      # IMPORTANT: Only one Discord bot instance can run per token - do not scale background workers
      - DISCORD_BOT_TOKEN=${DISCORD_BOT_TOKEN:-}
      - DISCORD_BOT_INVOKE_CHAR=${DISCORD_BOT_INVOKE_CHAR:-!}
      # API Server connection for Discord bot message processing
      - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}
      - API_SERVER_HOST=${API_SERVER_HOST:-api_server}
      # Logging
      # Leave this on pretty please? Nothing sensitive is collected!
      - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
      - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs
      # Log all of Onyx prompts and interactions with the LLM
      - LOG_ONYX_MODEL_INTERACTIONS=${LOG_ONYX_MODEL_INTERACTIONS:-}
      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}

      # Analytics Configs
      - SENTRY_DSN=${SENTRY_DSN:-}

      # Enterprise Edition stuff
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
      - USE_IAM_AUTH=${USE_IAM_AUTH:-}
      - AWS_REGION_NAME=${AWS_REGION_NAME:-}
      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
      # Seeding configuration
      - OPENAI_DEFAULT_API_KEY=${OPENAI_DEFAULT_API_KEY:-}
    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
    # volumes:
    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    # Uncomment the following lines if you need to include a custom CA certificate
    # This section enables the use of a custom CA certificate
    # If present, the custom CA certificate is mounted as a volume
    # The container checks for its existence and updates the system's CA certificates
    # This allows for secure communication with services using custom SSL certificates
    # Optional volume mount for CA certificate
    # volumes:
    #   # Maps to the CA_CERT_PATH environment variable in the Dockerfile
    #   - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro

  web_server:
    image: ${ONYX_WEB_SERVER_IMAGE:-onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}}
    build:
      context: ../../web
      dockerfile: Dockerfile
      args:
        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}
        - NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}
        # Enterprise Edition only
        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
        # DO NOT TURN ON unless you have EXPLICIT PERMISSION from Onyx.
        - NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}
    depends_on:
      - api_server
    restart: unless-stopped
    environment:
      - INTERNAL_URL=http://api_server:8080
      - WEB_DOMAIN=${WEB_DOMAIN:-}
      - THEME_IS_DARK=${THEME_IS_DARK:-}

      # Enterprise Edition only
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
      - NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL:-}

  # Uncomment the block below to enable the MCP server for Onyx.
  # mcp_server:
  #   image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}
  #   build:
  #     context: ../../backend
  #     dockerfile: Dockerfile
  #   command: >
  #     /bin/sh -c "if [ \"${MCP_SERVER_ENABLED:-}\" != \"True\" ] && [ \"${MCP_SERVER_ENABLED:-}\" != \"true\" ]; then
  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';
  #       exit 0;
  #     else
  #       exec python -m onyx.mcp_server_main;
  #     fi"
  #   ports:
  #     - "8090:8090"
  #   env_file:
  #     - path: .env
  #       required: false
  #   depends_on:
  #     - relational_db
  #     - cache
  #   restart: "no"
  #   environment:
  #     - POSTGRES_HOST=relational_db
  #     - REDIS_HOST=cache
  #     # MCP Server Configuration
  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}
  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}
  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}
  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}
  #     - API_SERVER_HOST=api_server
  #   extra_hosts:
  #     - "host.docker.internal:host-gateway"
  #   logging:
  #     driver: json-file
  #     options:
  #       max-size: "50m"
  #       max-file: "6"
  #   volumes:
  #     - mcp_server_logs:/var/log/onyx

  inference_model_server:
    image: ${ONYX_MODEL_SERVER_IMAGE:-onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: on-failure
    environment:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}

      # Analytics Configs
      - SENTRY_DSN=${SENTRY_DSN:-}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - model_cache_huggingface:/app/.cache/huggingface/
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  indexing_model_server:
    image: ${ONYX_MODEL_SERVER_IMAGE:-onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: on-failure
    environment:
      - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      - INDEXING_ONLY=True
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
      - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}

      # Analytics Configs
      - SENTRY_DSN=${SENTRY_DSN:-}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - indexing_huggingface_model_cache:/app/.cache/huggingface/
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  relational_db:
    image: postgres:15.2-alpine
    shm_size: 1g
    command: -c 'max_connections=250'
    restart: unless-stopped
    environment:
      - POSTGRES_USER=${POSTGRES_USER:-postgres}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
      - DB_READONLY_USER=${DB_READONLY_USER:-}
      - DB_READONLY_PASSWORD=${DB_READONLY_PASSWORD:-}
    ports:
      - "5432:5432"
    volumes:
      - db_volume:/var/lib/postgresql/data

  # This container name cannot have an underscore in it due to Vespa expectations of the URL
  index:
    image: vespaengine/vespa:8.609.39
    restart: unless-stopped
    environment:
      - VESPA_SKIP_UPGRADE_CHECK=true
    ports:
      - "19071:19071"
      - "8081:8081"
    volumes:
      - vespa_volume:/opt/vespa/var
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  opensearch:
    image: opensearchproject/opensearch:3.4.0
    restart: unless-stopped
    # Controls whether this service runs. In order to enable it, add
    # opensearch-enabled to COMPOSE_PROFILES in the environment for this
    # docker-compose.
    # NOTE: Now enabled on by default. To explicitly disable this service,
    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not
    # list the profile, or when running docker compose, include all desired
    # service names but this one. Additionally set
    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.
    # profiles: ["opensearch-enabled"]
    environment:
      # We need discovery.type=single-node so that OpenSearch doesn't try
      # forming a cluster and waiting for other nodes to become live.
      - discovery.type=single-node
      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
      # We do this to avoid unstable performance from page swaps.
      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.
      # Java heap should be ~50% of memory limit. For now we assume a limit of
      # 4g although in practice the container can request more than this.
      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/
      # Xms is the starting size, Xmx is the maximum size. These should be the
      # same.
      - "OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g"
    volumes:
      - opensearch-data:/usr/share/opensearch/data
    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
    ulimits:
      # Similarly to bootstrap.memory_lock, we don't want to impose limits on
      # how much memory a process can lock from being swapped.
      memlock:
        soft: -1 # Set memlock to unlimited (no soft or hard limit).
        hard: -1
      nofile:
        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.
        hard: 65536
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  nginx:
    image: nginx:1.25.5-alpine
    restart: unless-stopped
    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`
    # if api_server / web_server are not up
    depends_on:
      - api_server
      - web_server
    environment:
      - DOMAIN=localhost
    ports:
      - "${HOST_PORT_80:-80}:80"
      - "${HOST_PORT:-3000}:80" # allow for localhost:3000 usage, since that is the norm
    volumes:
      - ../data/nginx:/nginx-templates:ro
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    command: >
      /bin/sh -c "rm -f /etc/nginx/conf.d/default.conf
      && cp -a /nginx-templates/. /etc/nginx/conf.d/
      && sed 's/\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh
      && chmod +x /tmp/run-nginx.sh
      && /tmp/run-nginx.sh app.conf.template"

  minio:
    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1
    restart: unless-stopped
    ports:
      - "9004:9000"
      - "9005:9001"
    environment:
      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}
    volumes:
      - minio_data:/data
    command: server /data --console-address ":9001"
    healthcheck:
      test: ["CMD", "mc", "ready", "local"]
      interval: 30s
      timeout: 20s
      retries: 3

  cache:
    image: redis:7.4-alpine
    restart: unless-stopped
    ports:
      - "6379:6379"
    # docker silently mounts /data even without an explicit volume mount, which enables
    # persistence. explicitly setting save and appendonly forces ephemeral behavior.
    command: redis-server --save "" --appendonly no
    # Use tmpfs to prevent creation of anonymous volumes for /data
    tmpfs:
      - /data

  code-interpreter:
    image: onyxdotapp/code-interpreter:${CODE_INTERPRETER_IMAGE_TAG:-latest}
    command: ["bash", "./entrypoint.sh", "code-interpreter-api"]
    restart: unless-stopped
    env_file:
      - path: .env
        required: false

    # Below is needed for the `docker-out-of-docker` execution mode
    user: root
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock

    # uncomment below + comment out the above to use the `docker-in-docker` execution mode
    # privileged: true

volumes:
  db_volume:
  vespa_volume: # Created by the container itself
  minio_data:

  model_cache_huggingface:
  indexing_huggingface_model_cache:
  # mcp_server_logs:
  # Persistent data for OpenSearch.
  opensearch-data:


================================================
FILE: deployment/docker_compose/docker-compose.onyx-lite.yml
================================================
# =============================================================================
# ONYX LITE — MINIMAL DEPLOYMENT OVERLAY
# =============================================================================
# Overlay to run Onyx in a minimal configuration: no vector database (Vespa),
# no Redis, no model servers, and no background workers. Only PostgreSQL is
# required. In this mode, connectors and RAG search are disabled, but the core
# chat experience (LLM conversations, tools, user file uploads, Projects,
# Agent knowledge, code interpreter) still works.
#
# Usage:
#   docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml up -d
#
# With dev ports:
#   docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml \
#                  -f docker-compose.dev.yml up -d --wait
#
# This overlay:
#   - Moves Vespa (index), both model servers, OpenSearch, MinIO,
#     Redis (cache), and the background worker to profiles so they do
#     not start by default
#   - Makes depends_on references to removed services optional
#   - Sets DISABLE_VECTOR_DB=true on the api_server
#   - Uses PostgreSQL for caching and auth instead of Redis
#   - Uses PostgreSQL for file storage instead of S3/MinIO
#
# To selectively bring services back:
#   --profile vectordb          Vespa + indexing model server
#   --profile inference         Inference model server
#   --profile background        Background worker (Celery) — also needs redis
#   --profile redis             Redis cache
#   --profile opensearch        OpenSearch
#   --profile s3-filestore      MinIO (S3-compatible file store)
# =============================================================================

name: onyx

services:
  api_server:
    depends_on:
      index:
        condition: service_started
        required: false
      opensearch:
        condition: service_started
        required: false
      cache:
        condition: service_started
        required: false
      inference_model_server:
        condition: service_started
        required: false
      minio:
        condition: service_started
        required: false
    environment:
      - DISABLE_VECTOR_DB=true
      - FILE_STORE_BACKEND=postgres
      - CACHE_BACKEND=postgres
      - AUTH_BACKEND=postgres

  # Move the background worker to a profile so it does not start by default.
  # The API server handles all background work in lite mode.
  background:
    profiles: ["background"]
    depends_on:
      index:
        condition: service_started
        required: false
      inference_model_server:
        condition: service_started
        required: false
      indexing_model_server:
        condition: service_started
        required: false

  # Move Redis to a profile so it does not start by default.
  # The Postgres cache backend replaces Redis in lite mode.
  cache:
    profiles: ["redis"]

  # Move Vespa and indexing model server to a profile so they do not start.
  index:
    profiles: ["vectordb"]

  indexing_model_server:
    profiles: ["vectordb"]

  # Inference model server is only needed for local embeddings, not for LLM chat.
  inference_model_server:
    profiles: ["inference"]

  # OpenSearch is not needed in lite mode (no indexing).
  opensearch:
    profiles: ["opensearch"]

  # MinIO is not needed in lite mode (Postgres handles file storage).
  minio:
    profiles: ["s3-filestore"]


================================================
FILE: deployment/docker_compose/docker-compose.prod-cloud.yml
================================================
name: onyx

services:
  api_server:
    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.cloud
    command: >
      /bin/sh -c "alembic -n schema_private upgrade head &&
      echo \"Starting Onyx Api Server\" &&
      uvicorn onyx.main:app --host 0.0.0.0 --port 8080"
    depends_on:
      - relational_db
      - index
      - cache
      - inference_model_server
      - minio
    restart: unless-stopped
    environment:
      - AUTH_TYPE=${AUTH_TYPE:-oidc}
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
    env_file:
      - path: .env
        required: false
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  background:
    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile
    command: /app/scripts/supervisord_entrypoint.sh
    depends_on:
      - relational_db
      - index
      - cache
      - inference_model_server
      - indexing_model_server
    restart: unless-stopped
    environment:
      - AUTH_TYPE=${AUTH_TYPE:-oidc}
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - DISCORD_BOT_TOKEN=${DISCORD_BOT_TOKEN:-}
      - DISCORD_BOT_INVOKE_CHAR=${DISCORD_BOT_INVOKE_CHAR:-!}
      # API Server connection for Discord bot message processing
      - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}
      - API_SERVER_HOST=${API_SERVER_HOST:-api_server}
    env_file:
      - path: .env
        required: false
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  web_server:
    image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}
    build:
      context: ../../web
      dockerfile: Dockerfile
      args:
        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}
        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
        - NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}
    depends_on:
      - api_server
    restart: unless-stopped
    environment:
      - INTERNAL_URL=http://api_server:8080
    env_file:
      - path: .env
        required: false
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  # Uncomment the block below to enable the MCP server for Onyx.
  # mcp_server:
  #   image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
  #   build:
  #     context: ../../backend
  #     dockerfile: Dockerfile
  #   command: >
  #     /bin/sh -c "if [ \"${MCP_SERVER_ENABLED:-}\" != \"True\" ] && [ \"${MCP_SERVER_ENABLED:-}\" != \"true\" ]; then
  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';
  #       exit 0;
  #     else
  #       exec python -m onyx.mcp_server_main;
  #     fi"
  #   env_file:
  #     - path: .env
  #       required: false
  #   depends_on:
  #     - relational_db
  #     - cache
  #   restart: "no"
  #   environment:
  #     - POSTGRES_HOST=relational_db
  #     - REDIS_HOST=cache
  #     # MCP Server Configuration
  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}
  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}
  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}
  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}
  #     - API_SERVER_HOST=${API_SERVER_HOST:-api_server}
  #   extra_hosts:
  #     - "host.docker.internal:host-gateway"
  #   logging:
  #     driver: json-file
  #     options:
  #       max-size: "50m"
  #       max-file: "6"
  #   volumes:
  #     - mcp_server_logs:/var/log/onyx

  relational_db:
    image: postgres:15.2-alpine
    shm_size: 1g
    command: -c 'max_connections=250'
    restart: unless-stopped
    # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file
    env_file:
      - path: .env
        required: false
    volumes:
      - db_volume:/var/lib/postgresql/data
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  inference_model_server:
    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: on-failure
    environment:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - model_cache_huggingface:/app/.cache/huggingface/
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  indexing_model_server:
    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: on-failure
    environment:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      - INDEXING_ONLY=True
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - indexing_huggingface_model_cache:/app/.cache/huggingface/
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  # This container name cannot have an underscore in it due to Vespa expectations of the URL
  index:
    image: vespaengine/vespa:8.609.39
    restart: unless-stopped
    environment:
      - VESPA_SKIP_UPGRADE_CHECK=true
    ports:
      - "19071:19071"
      - "8081:8081"
    volumes:
      - vespa_volume:/opt/vespa/var
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  opensearch:
    image: opensearchproject/opensearch:3.4.0
    restart: unless-stopped
    # Controls whether this service runs. In order to enable it, add
    # opensearch-enabled to COMPOSE_PROFILES in the environment for this
    # docker-compose.
    # NOTE: Now enabled on by default. To explicitly disable this service,
    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not
    # list the profile, or when running docker compose, include all desired
    # service names but this one. Additionally set
    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.
    # profiles: ["opensearch-enabled"]
    environment:
      # We need discovery.type=single-node so that OpenSearch doesn't try
      # forming a cluster and waiting for other nodes to become live.
      - discovery.type=single-node
      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
      # We do this to avoid unstable performance from page swaps.
      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.
      # Java heap should be ~50% of memory limit. For now we assume a limit of
      # 4g although in practice the container can request more than this.
      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/
      # Xms is the starting size, Xmx is the maximum size. These should be the
      # same.
      - "OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g"
    volumes:
      - opensearch-data:/usr/share/opensearch/data
    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
    ulimits:
      # Similarly to bootstrap.memory_lock, we don't want to impose limits on
      # how much memory a process can lock from being swapped.
      memlock:
        soft: -1 # Set memlock to unlimited (no soft or hard limit).
        hard: -1
      nofile:
        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.
        hard: 65536
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  nginx:
    image: nginx:1.25.5-alpine
    restart: unless-stopped
    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`
    # if api_server / web_server are not up
    depends_on:
      - api_server
      - web_server
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ../data/nginx:/nginx-templates:ro
      - ../data/certbot/conf:/etc/letsencrypt
      - ../data/certbot/www:/var/www/certbot
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    command: >
      /bin/sh -c "rm -f /etc/nginx/conf.d/default.conf
      && cp -a /nginx-templates/. /etc/nginx/conf.d/
      && sed 's/\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh
      && chmod +x /tmp/run-nginx.sh
      && /tmp/run-nginx.sh app.conf.template.prod"
    env_file:
      - .env.nginx
    environment:
      # Nginx proxy timeout settings (in seconds)
      - NGINX_PROXY_CONNECT_TIMEOUT=${NGINX_PROXY_CONNECT_TIMEOUT:-300}
      - NGINX_PROXY_SEND_TIMEOUT=${NGINX_PROXY_SEND_TIMEOUT:-300}
      - NGINX_PROXY_READ_TIMEOUT=${NGINX_PROXY_READ_TIMEOUT:-300}

  # follows https://pentacent.medium.com/nginx-and-lets-encrypt-with-docker-in-less-than-5-minutes-b4b8a60d3a71
  certbot:
    image: certbot/certbot
    restart: unless-stopped
    volumes:
      - ../data/certbot/conf:/etc/letsencrypt
      - ../data/certbot/www:/var/www/certbot
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'"

  minio:
    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1
    restart: unless-stopped
    environment:
      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}
    volumes:
      - minio_data:/data
    command: server /data --console-address ":9001"
    healthcheck:
      test: ["CMD", "mc", "ready", "local"]
      interval: 30s
      timeout: 20s
      retries: 3

  cache:
    image: redis:7.4-alpine
    restart: unless-stopped
    # docker silently mounts /data even without an explicit volume mount, which enables
    # persistence. explicitly setting save and appendonly forces ephemeral behavior.
    command: redis-server --save "" --appendonly no
    # Use tmpfs to prevent creation of anonymous volumes for /data
    tmpfs:
      - /data

volumes:
  db_volume:
  vespa_volume:
  minio_data:
  # Created by the container itself
  model_cache_huggingface:
  indexing_huggingface_model_cache:
  # mcp_server_logs:
  # Persistent data for OpenSearch.
  opensearch-data:


================================================
FILE: deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml
================================================
name: onyx

services:
  api_server:
    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile
    command: >
      /bin/sh -c "alembic upgrade head &&
      echo \"Starting Onyx Api Server\" &&
      uvicorn onyx.main:app --host 0.0.0.0 --port 8080"
    depends_on:
      - relational_db
      - index
      - cache
      - inference_model_server
      - minio
    restart: unless-stopped
    environment:
      - AUTH_TYPE=${AUTH_TYPE:-oidc}
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - CODE_INTERPRETER_BASE_URL=${CODE_INTERPRETER_BASE_URL:-http://code-interpreter:8000}
      - USE_IAM_AUTH=${USE_IAM_AUTH}
      - AWS_REGION_NAME=${AWS_REGION_NAME-}
      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}
    env_file:
      - path: .env
        required: false
    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
    # volumes:
    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    volumes:
      # optional, only for debugging purposes
      - api_server_logs:/var/log/onyx
      # Shared volume for persistent document storage (Craft file-system mode)
      - file-system:/app/file-system

  background:
    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile
    command: /app/scripts/supervisord_entrypoint.sh
    depends_on:
      - relational_db
      - index
      - cache
      - inference_model_server
      - indexing_model_server
    restart: unless-stopped
    environment:
      - AUTH_TYPE=${AUTH_TYPE:-oidc}
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
      - USE_IAM_AUTH=${USE_IAM_AUTH}
      - AWS_REGION_NAME=${AWS_REGION_NAME-}
      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}
    env_file:
      - path: .env
        required: false
    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
    # volumes:
    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes:
      - background_logs:/var/log/onyx
      # Shared volume for persistent document storage (Craft file-system mode)
      - file-system:/app/file-system
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  web_server:
    image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}
    build:
      context: ../../web
      dockerfile: Dockerfile
      args:
        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}
        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
    depends_on:
      - api_server
    restart: unless-stopped
    environment:
      - INTERNAL_URL=http://api_server:8080
    env_file:
      - path: .env
        required: false
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  # Uncomment the block below to enable the MCP server for Onyx.
  # mcp_server:
  #   image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
  #   build:
  #     context: ../../backend
  #     dockerfile: Dockerfile
  #   command: >
  #     /bin/sh -c "if [ \"${MCP_SERVER_ENABLED:-}\" != \"True\" ] && [ \"${MCP_SERVER_ENABLED:-}\" != \"true\" ]; then
  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';
  #       exit 0;
  #     else
  #       exec python -m onyx.mcp_server_main;
  #     fi"
  #   env_file:
  #     - path: .env
  #       required: false
  #   depends_on:
  #     - relational_db
  #     - cache
  #   restart: "no"
  #   environment:
  #     - POSTGRES_HOST=relational_db
  #     - REDIS_HOST=cache
  #     # MCP Server Configuration
  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}
  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}
  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}
  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}
  #     - API_SERVER_HOST=${API_SERVER_HOST:-api_server}
  #   extra_hosts:
  #     - "host.docker.internal:host-gateway"
  #   logging:
  #     driver: json-file
  #     options:
  #       max-size: "50m"
  #       max-file: "6"
  #   volumes:
  #     - mcp_server_logs:/var/log/onyx

  inference_model_server:
    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: on-failure
    environment:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - model_cache_huggingface:/app/.cache/huggingface/
      # optional, only for debugging purposes
      - inference_model_server_logs:/var/log/onyx
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  indexing_model_server:
    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: on-failure
    environment:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      - INDEXING_ONLY=True
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - indexing_huggingface_model_cache:/app/.cache/huggingface/
      # optional, only for debugging purposes
      - indexing_model_server_logs:/var/log/onyx
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  relational_db:
    image: postgres:15.2-alpine
    shm_size: 1g
    command: -c 'max_connections=250'
    restart: unless-stopped
    # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file
    env_file:
      - path: .env
        required: false
    volumes:
      - db_volume:/var/lib/postgresql/data
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  # This container name cannot have an underscore in it due to Vespa expectations of the URL
  index:
    image: vespaengine/vespa:8.609.39
    restart: unless-stopped
    environment:
      - VESPA_SKIP_UPGRADE_CHECK=true
    ports:
      - "19071:19071"
      - "8081:8081"
    volumes:
      - vespa_volume:/opt/vespa/var
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  opensearch:
    image: opensearchproject/opensearch:3.4.0
    restart: unless-stopped
    # Controls whether this service runs. In order to enable it, add
    # opensearch-enabled to COMPOSE_PROFILES in the environment for this
    # docker-compose.
    # NOTE: Now enabled on by default. To explicitly disable this service,
    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not
    # list the profile, or when running docker compose, include all desired
    # service names but this one. Additionally set
    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.
    # profiles: ["opensearch-enabled"]
    environment:
      # We need discovery.type=single-node so that OpenSearch doesn't try
      # forming a cluster and waiting for other nodes to become live.
      - discovery.type=single-node
      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
      # We do this to avoid unstable performance from page swaps.
      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.
      # Java heap should be ~50% of memory limit. For now we assume a limit of
      # 4g although in practice the container can request more than this.
      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/
      # Xms is the starting size, Xmx is the maximum size. These should be the
      # same.
      - "OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g"
    volumes:
      - opensearch-data:/usr/share/opensearch/data
    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
    ulimits:
      # Similarly to bootstrap.memory_lock, we don't want to impose limits on
      # how much memory a process can lock from being swapped.
      memlock:
        soft: -1 # Set memlock to unlimited (no soft or hard limit).
        hard: -1
      nofile:
        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.
        hard: 65536
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  nginx:
    image: nginx:1.25.5-alpine
    restart: unless-stopped
    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`
    # if api_server / web_server are not up
    depends_on:
      - api_server
      - web_server
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ../data/nginx:/nginx-templates:ro
      - ../data/sslcerts:/etc/nginx/sslcerts
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    command: >
      /bin/sh -c "rm -f /etc/nginx/conf.d/default.conf
      && cp -a /nginx-templates/. /etc/nginx/conf.d/
      && sed 's/\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh
      && chmod +x /tmp/run-nginx.sh
      && /tmp/run-nginx.sh app.conf.template.prod.no-letsencrypt"
    env_file:
      - .env.nginx
    environment:
      # Nginx proxy timeout settings (in seconds)
      - NGINX_PROXY_CONNECT_TIMEOUT=${NGINX_PROXY_CONNECT_TIMEOUT:-300}
      - NGINX_PROXY_SEND_TIMEOUT=${NGINX_PROXY_SEND_TIMEOUT:-300}
      - NGINX_PROXY_READ_TIMEOUT=${NGINX_PROXY_READ_TIMEOUT:-300}

  minio:
    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1
    restart: unless-stopped
    environment:
      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}
    volumes:
      - minio_data:/data
    command: server /data --console-address ":9001"
    healthcheck:
      test: ["CMD", "mc", "ready", "local"]
      interval: 30s
      timeout: 20s
      retries: 3

  cache:
    image: redis:7.4-alpine
    restart: unless-stopped
    # docker silently mounts /data even without an explicit volume mount, which enables
    # persistence. explicitly setting save and appendonly forces ephemeral behavior.
    command: redis-server --save "" --appendonly no
    # Use tmpfs to prevent creation of anonymous volumes for /data
    tmpfs:
      - /data

  code-interpreter:
    image: onyxdotapp/code-interpreter:${CODE_INTERPRETER_IMAGE_TAG:-latest}
    command: ["bash", "./entrypoint.sh", "code-interpreter-api"]
    restart: unless-stopped
    env_file:
      - path: .env
        required: false

    # Below is needed for the `docker-out-of-docker` execution mode
    user: root
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock

    # uncomment below + comment out the above to use the `docker-in-docker` execution mode
    # privileged: true

volumes:
  db_volume:
  vespa_volume:
  minio_data:
  # Created by the container itself
  model_cache_huggingface:
  indexing_huggingface_model_cache:
  # for logs that we don't want to lose on container restarts
  api_server_logs:
  background_logs:
  inference_model_server_logs:
  indexing_model_server_logs:
  # mcp_server_logs:
  # Shared volume for persistent document storage (Craft file-system mode)
  file-system:
  # Persistent data for OpenSearch.
  opensearch-data:


================================================
FILE: deployment/docker_compose/docker-compose.prod.yml
================================================
name: onyx

services:
  api_server:
    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile
    command: >
      /bin/sh -c "
      alembic upgrade head &&
      echo \"Starting Onyx Api Server\" &&
      uvicorn onyx.main:app --host 0.0.0.0 --port 8080"
    depends_on:
      - relational_db
      - index
      - cache
      - minio
      - inference_model_server
    restart: unless-stopped
    environment:
      - AUTH_TYPE=${AUTH_TYPE:-oidc}
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - CODE_INTERPRETER_BASE_URL=${CODE_INTERPRETER_BASE_URL:-http://code-interpreter:8000}
      - USE_IAM_AUTH=${USE_IAM_AUTH}
      - AWS_REGION_NAME=${AWS_REGION_NAME-}
      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}
    env_file:
      - path: .env
        required: false
    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
    # volumes:
    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    volumes:
      - api_server_logs:/var/log/onyx
      # Shared volume for persistent document storage (Craft file-system mode)
      - file-system:/app/file-system

  background:
    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile
    command: >
      /bin/sh -c "
      if [ -f /etc/ssl/certs/custom-ca.crt ]; then
        update-ca-certificates;
      fi &&
      /app/scripts/supervisord_entrypoint.sh"
    depends_on:
      - relational_db
      - index
      - cache
      - inference_model_server
      - indexing_model_server
    restart: unless-stopped
    environment:
      - AUTH_TYPE=${AUTH_TYPE:-oidc}
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
      - USE_IAM_AUTH=${USE_IAM_AUTH}
      - AWS_REGION_NAME=${AWS_REGION_NAME-}
      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - DISCORD_BOT_TOKEN=${DISCORD_BOT_TOKEN:-}
      - DISCORD_BOT_INVOKE_CHAR=${DISCORD_BOT_INVOKE_CHAR:-!}
      # API Server connection for Discord bot message processing
      - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}
      - API_SERVER_HOST=${API_SERVER_HOST:-api_server}
      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}
    env_file:
      - path: .env
        required: false
    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
    # volumes:
    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes:
      - background_logs:/var/log/onyx
      # Shared volume for persistent document storage (Craft file-system mode)
      - file-system:/app/file-system
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    # Uncomment the following lines if you need to include a custom CA certificate
    # This section enables the use of a custom CA certificate
    # If present, the custom CA certificate is mounted as a volume
    # The container checks for its existence and updates the system's CA certificates
    # This allows for secure communication with services using custom SSL certificates
    # volumes:
    #   # Maps to the CA_CERT_PATH environment variable in the Dockerfile
    #   - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro

  web_server:
    image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}
    build:
      context: ../../web
      dockerfile: Dockerfile
      args:
        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}
        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
        - NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}
    depends_on:
      - api_server
    restart: unless-stopped
    environment:
      - INTERNAL_URL=http://api_server:8080
    env_file:
      - path: .env
        required: false
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  # Uncomment the block below to enable the MCP server for Onyx.
  # mcp_server:
  #   image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
  #   build:
  #     context: ../../backend
  #     dockerfile: Dockerfile
  #   command: >
  #     /bin/sh -c "if [ \"${MCP_SERVER_ENABLED:-}\" != \"True\" ] && [ \"${MCP_SERVER_ENABLED:-}\" != \"true\" ]; then
  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';
  #       exit 0;
  #     else
  #       exec python -m onyx.mcp_server_main;
  #     fi"
  #   env_file:
  #     - path: .env
  #       required: false
  #   depends_on:
  #     - relational_db
  #     - cache
  #   restart: "no"
  #   environment:
  #     - POSTGRES_HOST=relational_db
  #     - REDIS_HOST=cache
  #     # MCP Server Configuration
  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}
  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}
  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}
  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}
  #     - API_SERVER_HOST=${API_SERVER_HOST:-api_server}
  #   extra_hosts:
  #     - "host.docker.internal:host-gateway"
  #   logging:
  #     driver: json-file
  #     options:
  #       max-size: "50m"
  #       max-file: "6"
  #   volumes:
  #     - mcp_server_logs:/var/log/onyx

  relational_db:
    image: postgres:15.2-alpine
    shm_size: 1g
    command: -c 'max_connections=250'
    restart: unless-stopped
    # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file
    env_file:
      - path: .env
        required: false
    volumes:
      - db_volume:/var/lib/postgresql/data
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  inference_model_server:
    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: unless-stopped
    environment:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - model_cache_huggingface:/app/.cache/huggingface/
      # optional, only for debugging purposes
      - inference_model_server_logs:/var/log/onyx
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  indexing_model_server:
    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: unless-stopped
    environment:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      - INDEXING_ONLY=True
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - indexing_huggingface_model_cache:/app/.cache/huggingface/
      # optional, only for debugging purposes
      - indexing_model_server_logs:/var/log/onyx
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  # This container name cannot have an underscore in it due to Vespa expectations of the URL
  index:
    image: vespaengine/vespa:8.609.39
    restart: unless-stopped
    environment:
      - VESPA_SKIP_UPGRADE_CHECK=true
    ports:
      - "19071:19071"
      - "8081:8081"
    volumes:
      - vespa_volume:/opt/vespa/var
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  opensearch:
    image: opensearchproject/opensearch:3.4.0
    restart: unless-stopped
    # Controls whether this service runs. In order to enable it, add
    # opensearch-enabled to COMPOSE_PROFILES in the environment for this
    # docker-compose.
    # NOTE: Now enabled on by default. To explicitly disable this service,
    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not
    # list the profile, or when running docker compose, include all desired
    # service names but this one. Additionally set
    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.
    # profiles: ["opensearch-enabled"]
    environment:
      # We need discovery.type=single-node so that OpenSearch doesn't try
      # forming a cluster and waiting for other nodes to become live.
      - discovery.type=single-node
      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
      # We do this to avoid unstable performance from page swaps.
      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.
      # Java heap should be ~50% of memory limit. For now we assume a limit of
      # 4g although in practice the container can request more than this.
      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/
      # Xms is the starting size, Xmx is the maximum size. These should be the
      # same.
      - "OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g"
    volumes:
      - opensearch-data:/usr/share/opensearch/data
    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
    ulimits:
      # Similarly to bootstrap.memory_lock, we don't want to impose limits on
      # how much memory a process can lock from being swapped.
      memlock:
        soft: -1 # Set memlock to unlimited (no soft or hard limit).
        hard: -1
      nofile:
        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.
        hard: 65536
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  nginx:
    image: nginx:1.25.5-alpine
    restart: unless-stopped
    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`
    # if api_server / web_server are not up
    depends_on:
      - api_server
      - web_server
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ../data/nginx:/nginx-templates:ro
      - ../data/certbot/conf:/etc/letsencrypt
      - ../data/certbot/www:/var/www/certbot
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    command: >
      /bin/sh -c "rm -f /etc/nginx/conf.d/default.conf
      && cp -a /nginx-templates/. /etc/nginx/conf.d/
      && sed 's/\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh
      && chmod +x /tmp/run-nginx.sh
      && /tmp/run-nginx.sh app.conf.template.prod"
    env_file:
      - .env.nginx
    environment:
      # Nginx proxy timeout settings (in seconds)
      - NGINX_PROXY_CONNECT_TIMEOUT=${NGINX_PROXY_CONNECT_TIMEOUT:-300}
      - NGINX_PROXY_SEND_TIMEOUT=${NGINX_PROXY_SEND_TIMEOUT:-300}
      - NGINX_PROXY_READ_TIMEOUT=${NGINX_PROXY_READ_TIMEOUT:-300}

  # follows https://pentacent.medium.com/nginx-and-lets-encrypt-with-docker-in-less-than-5-minutes-b4b8a60d3a71
  certbot:
    image: certbot/certbot
    restart: unless-stopped
    volumes:
      - ../data/certbot/conf:/etc/letsencrypt
      - ../data/certbot/www:/var/www/certbot
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'"

  minio:
    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1
    restart: unless-stopped
    environment:
      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}
    volumes:
      - minio_data:/data
    command: server /data --console-address ":9001"
    healthcheck:
      test: ["CMD", "mc", "ready", "local"]
      interval: 30s
      timeout: 20s
      retries: 3

  cache:
    image: redis:7.4-alpine
    restart: unless-stopped
    # docker silently mounts /data even without an explicit volume mount, which enables
    # persistence. explicitly setting save and appendonly forces ephemeral behavior.
    command: redis-server --save "" --appendonly no
    # Use tmpfs to prevent creation of anonymous volumes for /data
    tmpfs:
      - /data

  code-interpreter:
    image: onyxdotapp/code-interpreter:${CODE_INTERPRETER_IMAGE_TAG:-latest}
    command: ["bash", "./entrypoint.sh", "code-interpreter-api"]
    restart: unless-stopped
    env_file:
      - path: .env
        required: false

    # Below is needed for the `docker-out-of-docker` execution mode
    user: root
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock

    # uncomment below + comment out the above to use the `docker-in-docker` execution mode
    # privileged: true

volumes:
  db_volume:
  vespa_volume:
  minio_data:
  # Created by the container itself
  model_cache_huggingface:
  indexing_huggingface_model_cache:
  # for logs that we don't want to lose on container restarts
  api_server_logs:
  background_logs:
  inference_model_server_logs:
  indexing_model_server_logs:
  # mcp_server_logs:
  # Shared volume for persistent document storage (Craft file-system mode)
  file-system:
  # Persistent data for OpenSearch.
  opensearch-data:


================================================
FILE: deployment/docker_compose/docker-compose.resources.yml
================================================
# Docker service resource limits. Most are commented out by default.
# 'background' service has preset (override-able) limits due to variable resource needs.
# Uncomment and set env vars for specific service limits.
# See: https://docs.danswer.dev/deployment/resource-sizing for details.

services:
  background:
    deploy:
      resources:
        limits:
          # 6 CPUs, 10GB of memory. Very generous, primarily to prevent OOM crashing the host machine.
          cpus: ${BACKGROUND_CPU_LIMIT:-6}
          memory: ${BACKGROUND_MEM_LIMIT:-10g}
  #       reservations:
  #         cpus: ${BACKGROUND_CPU_RESERVATION}
  #         memory: ${BACKGROUND_MEM_RESERVATION}
  #
  nginx:
    deploy:
      resources:
        limits:
          cpus: ${NGINX_CPU_LIMIT:-1}
          memory: ${NGINX_MEM_LIMIT:-1g}
  #      reservations:
  #        cpus: ${NGINX_CPU_RESERVATION:}
  #        memory: ${NGINX_MEM_RESERVATION}
  #
  api_server:
    deploy:
      resources:
        limits:
          cpus: ${API_SERVER_CPU_LIMIT:-2}
          memory: ${API_SERVER_MEM_LIMIT:-4g}
  #      reservations:
  #        cpus: ${API_SERVER_CPU_RESERVATION}
  #        memory: ${API_SERVER_MEM_RESERVATION}
  #
  #   index:
  #     deploy:
  #       resources:
  #         limits:
  #           cpus: ${VESPA_CPU_LIMIT}
  #           memory: ${VESPA_MEM_LIMIT}
  #         reservations:
  #           cpus: ${VESPA_CPU_RESERVATION}
  #           memory: ${VESPA_MEM_RESERVATION}
  #
  inference_model_server:
    deploy:
      resources:
        limits:
          # cpus: ${INFERENCE_CPU_LIMIT}
          memory: ${INFERENCE_MEM_LIMIT:-5g}
  #       reservations:
  #         cpus: ${INFERENCE_CPU_RESERVATION}
  #         memory: ${INFERENCE_MEM_RESERVATION}
  #
  indexing_model_server:
    deploy:
      resources:
        limits:
          # cpus: ${INDEXING_CPU_LIMIT}
          memory: ${INDEXING_MEM_LIMIT:-5g}
  #       reservations:
  #         cpus: ${INDEXING_CPU_RESERVATION}
  #         memory: ${INDEXING_MEM_RESERVATION}
  #
  relational_db:
    deploy:
      resources:
        limits:
          cpus: ${POSTGRES_CPU_LIMIT:-2}
          memory: ${POSTGRES_MEM_LIMIT:-4g}
  #         reservations:
  #           cpus: ${POSTGRES_CPU_RESERVATION}
  #           memory: ${POSTGRES_MEM_RESERVATION}

  # minio:
  #   deploy:
  #     resources:
  #       limits:
  #         cpus: ${MINIO_CPU_LIMIT:-1}
  #         memory: ${MINIO_MEM_LIMIT:-1g}
  #       reservations:
  #         cpus: ${MINIO_CPU_RESERVATION}
  #         memory: ${MINIO_MEM_RESERVATION}


================================================
FILE: deployment/docker_compose/docker-compose.search-testing.yml
================================================
name: onyx

services:
  api_server:
    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile
    command: >
      /bin/sh -c "alembic upgrade head &&
      echo \"Starting Onyx Api Server\" &&
      uvicorn onyx.main:app --host 0.0.0.0 --port 8080"
    depends_on:
      - relational_db
      - index
      - cache
      - minio
    restart: unless-stopped
    ports:
      - "8080"
    env_file:
      - .env_eval
    environment:
      - AUTH_TYPE=basic
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
      - ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
      # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
      - LICENSE_ENFORCEMENT_ENABLED=false
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - S3_FILE_STORE_BUCKET_NAME=${S3_FILE_STORE_BUCKET_NAME:-}
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  background:
    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile
    command: /app/scripts/supervisord_entrypoint.sh
    depends_on:
      - relational_db
      - index
      - cache
    restart: unless-stopped
    env_file:
      - .env_eval
    environment:
      - AUTH_TYPE=basic
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
      - ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
      # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
      - LICENSE_ENFORCEMENT_ENABLED=false
      # MinIO configuration
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - S3_FILE_STORE_BUCKET_NAME=${S3_FILE_STORE_BUCKET_NAME:-}
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes:
      - log_store:/var/log/onyx
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  web_server:
    image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}
    build:
      context: ../../web
      dockerfile: Dockerfile
      args:
        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}

        # Enterprise Edition only
        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
        # DO NOT TURN ON unless you have EXPLICIT PERMISSION from Onyx.
        - NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}
    depends_on:
      - api_server
    restart: unless-stopped
    environment:
      - INTERNAL_URL=http://api_server:8080
      - WEB_DOMAIN=${WEB_DOMAIN:-}
      - THEME_IS_DARK=${THEME_IS_DARK:-}

      # Enterprise Edition only
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}

  inference_model_server:
    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: on-failure
    environment:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      - LOG_LEVEL=${LOG_LEVEL:-debug}
    volumes:
      - inference_model_cache_huggingface:/app/.cache/huggingface/
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  indexing_model_server:
    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: on-failure
    environment:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      - INDEXING_ONLY=True
      - LOG_LEVEL=${LOG_LEVEL:-debug}
      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}
    volumes:
      - inference_model_cache_huggingface:/app/.cache/huggingface/
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  relational_db:
    image: postgres:15.2-alpine
    shm_size: 1g
    command: -c 'max_connections=250'
    restart: unless-stopped
    environment:
      - POSTGRES_USER=${POSTGRES_USER:-postgres}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
      - DB_READONLY_USER=${DB_READONLY_USER:-}
      - DB_READONLY_PASSWORD=${DB_READONLY_PASSWORD:-}
    ports:
      - "5432"
    volumes:
      - db_volume:/var/lib/postgresql/data

  # This container name cannot have an underscore in it due to Vespa expectations of the URL
  index:
    image: vespaengine/vespa:8.609.39
    restart: unless-stopped
    environment:
      - VESPA_SKIP_UPGRADE_CHECK=true
    ports:
      - "19071:19071"
      - "8081:8081"
    volumes:
      - vespa_volume:/opt/vespa/var
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  nginx:
    image: nginx:1.25.5-alpine
    restart: unless-stopped
    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`
    # if api_server / web_server are not up
    depends_on:
      - api_server
      - web_server
    environment:
      - DOMAIN=localhost
    ports:
      - "${NGINX_PORT:-3000}:80" # allow for localhost:3000 usage, since that is the norm
    volumes:
      - ../data/nginx:/nginx-templates:ro
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    command: >
      /bin/sh -c "rm -f /etc/nginx/conf.d/default.conf
      && cp -a /nginx-templates/. /etc/nginx/conf.d/
      && sed 's/\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh
      && chmod +x /tmp/run-nginx.sh
      && /tmp/run-nginx.sh app.conf.template"

  minio:
    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1
    restart: unless-stopped
    ports:
      - "9004:9000"
      - "9005:9001"
    environment:
      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}
    volumes:
      - minio_data:/data
    command: server /data --console-address ":9001"
    healthcheck:
      test: ["CMD", "mc", "ready", "local"]
      interval: 30s
      timeout: 20s
      retries: 3

  cache:
    image: redis:7.4-alpine
    restart: unless-stopped
    ports:
      - "6379:6379"
    # docker silently mounts /data even without an explicit volume mount, which enables
    # persistence. explicitly setting save and appendonly forces ephemeral behavior.
    command: redis-server --save "" --appendonly no
    # Use tmpfs to prevent creation of anonymous volumes for /data
    tmpfs:
      - /data

volumes:
  inference_model_cache_huggingface:
  db_volume:
    driver: local
    driver_opts:
      type: none
      o: bind
      device: ${DANSWER_POSTGRES_DATA_DIR:-./postgres_data}
  vespa_volume:
    driver: local
    driver_opts:
      type: none
      o: bind
      device: ${DANSWER_VESPA_DATA_DIR:-./vespa_data}
  log_store: # for logs that we don't want to lose on container restarts
  minio_data:


================================================
FILE: deployment/docker_compose/docker-compose.yml
================================================
# =============================================================================
# ONYX DOCKER COMPOSE
# =============================================================================
# This is the default configuration for Onyx. This file is fairly configurable,
# also see env.template for possible settings.
#
# PRODUCTION DEPLOYMENT CHECKLIST:
# To convert this setup to a production deployment following best practices,
# follow the checklist below. Note that there are other ways to secure the Onyx
# deployment so these are not strictly necessary for all teams.
#
# 1. SECURITY HARDENING:
#    - Remove all port exposures except nginx (80/443)
#    - Comment out ports for: api_server, relational_db, index, cache, minio
#
# 2. SSL/TLS SETUP:
#    - Uncomment the certbot service (see below)
#    - Add SSL certificate volumes to nginx service
#    - Change nginx command from app.conf.template to app.conf.template.prod
#
# 3. ENVIRONMENT CONFIGURATION:
#    - Replace env_file with explicit environment variables
#
# 4. AUTHENTICATION:
#    - Select an authentication method like Basic, Google OAuth, OIDC, or SAML
#
# 5. CA CERTIFICATES:
#    - Uncomment custom CA certificate volumes if needed
#
# 6. DOMAIN CONFIGURATION:
#    - Set proper DOMAIN environment variable for nginx
#    - Configure DNS and SSL certificates
#
# For a complete production setup, refer to docker-compose.prod.yml
# =============================================================================

name: onyx

services:
  api_server:
    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}
    build:
      context: ../../backend
      dockerfile: Dockerfile
      args:
        - ENABLE_CRAFT=${ENABLE_CRAFT:-false}
    command: >
      /bin/sh -c "alembic upgrade head &&
      echo \"Starting Onyx Api Server\" &&
      uvicorn onyx.main:app --host 0.0.0.0 --port 8080"
    # Check env.template and copy to .env for env vars
    env_file:
      - path: .env
        required: false
    depends_on:
      relational_db:
        condition: service_started
      index:
        condition: service_started
      opensearch:
        condition: service_started
        required: false
      cache:
        condition: service_started
      inference_model_server:
        condition: service_started
      minio:
        condition: service_started
        required: false
    restart: unless-stopped
    # DEV: To expose ports, either:
    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "8080:8080"
    environment:
      # Auth Settings
      - AUTH_TYPE=${AUTH_TYPE:-basic}
      - FILE_STORE_BACKEND=${FILE_STORE_BACKEND:-s3}
      - POSTGRES_HOST=${POSTGRES_HOST:-relational_db}
      - VESPA_HOST=${VESPA_HOST:-index}
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=${REDIS_HOST:-cache}
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - CODE_INTERPRETER_BASE_URL=${CODE_INTERPRETER_BASE_URL:-http://code-interpreter:8000}
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      # Onyx Craft configuration (disabled by default, set ENABLE_CRAFT=true in .env to enable)
      # Use --include-craft with install script, or manually set in .env file
      - ENABLE_CRAFT=${ENABLE_CRAFT:-false}
      - OUTPUTS_TEMPLATE_PATH=${OUTPUTS_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs}
      - VENV_TEMPLATE_PATH=${VENV_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv}
      - WEB_TEMPLATE_PATH=${WEB_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web}
      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}
    # PRODUCTION: Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
    # volumes:
    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    healthcheck:
      test:
        [
          "CMD",
          "python",
          "-c",
          "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')",
        ]
      interval: 30s
      timeout: 20s
      retries: 3
      start_period: 25s
    # Optional, only for debugging purposes
    volumes:
      - api_server_logs:/var/log/onyx
      # Shared volume for persistent document storage (Craft file-system mode)
      - file-system:/app/file-system

  background:
    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}
    build:
      context: ../../backend
      dockerfile: Dockerfile
      args:
        - ENABLE_CRAFT=${ENABLE_CRAFT:-false}
    command: >
      /bin/sh -c "
      if [ -f /app/scripts/setup_craft_templates.sh ]; then
        /app/scripts/setup_craft_templates.sh;
      fi &&
      if [ -f /etc/ssl/certs/custom-ca.crt ]; then
        update-ca-certificates;
      fi &&
      /app/scripts/supervisord_entrypoint.sh"
    env_file:
      - path: .env
        required: false
    depends_on:
      relational_db:
        condition: service_started
      index:
        condition: service_started
      opensearch:
        condition: service_started
        required: false
      cache:
        condition: service_started
      inference_model_server:
        condition: service_started
      indexing_model_server:
        condition: service_started
    restart: unless-stopped
    environment:
      - FILE_STORE_BACKEND=${FILE_STORE_BACKEND:-s3}
      - POSTGRES_HOST=${POSTGRES_HOST:-relational_db}
      - VESPA_HOST=${VESPA_HOST:-index}
      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}
      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}
      - REDIS_HOST=${REDIS_HOST:-cache}
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}
      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}
      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}
      - DISCORD_BOT_TOKEN=${DISCORD_BOT_TOKEN:-}
      - DISCORD_BOT_INVOKE_CHAR=${DISCORD_BOT_INVOKE_CHAR:-!}
      # API Server connection for Discord bot message processing
      - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}
      - API_SERVER_HOST=${API_SERVER_HOST:-api_server}
      # Onyx Craft configuration (set up automatically on container startup)
      - ENABLE_CRAFT=${ENABLE_CRAFT:-false}
      - OUTPUTS_TEMPLATE_PATH=${OUTPUTS_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs}
      - VENV_TEMPLATE_PATH=${VENV_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv}
      - WEB_TEMPLATE_PATH=${WEB_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web}
      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}
    # PRODUCTION: Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
    # volumes:
    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    # Optional, only for debugging purposes
    volumes:
      - background_logs:/var/log/onyx
      # Shared volume for persistent document storage (Craft file-system mode)
      - file-system:/app/file-system
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    # PRODUCTION: Uncomment the following lines if you need to include a custom CA certificate
    # This section enables the use of a custom CA certificate
    # If present, the custom CA certificate is mounted as a volume
    # The container checks for its existence and updates the system's CA certificates
    # This allows for secure communication with services using custom SSL certificates
    # Optional volume mount for CA certificate
    # volumes:
    #   # Maps to the CA_CERT_PATH environment variable in the Dockerfile
    #   - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro

  web_server:
    image: ${ONYX_WEB_SERVER_IMAGE:-onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}}
    build:
      context: ../../web
      dockerfile: Dockerfile
      args:
        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}
        - NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}
        # Enterprise Edition only
        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
        # DO NOT TURN ON unless you have EXPLICIT PERMISSION from Onyx.
        - NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}
        - NODE_OPTIONS=${NODE_OPTIONS:-"--max-old-space-size=4096"}
    env_file:
      - path: .env
        required: false
    depends_on:
      - api_server
    restart: unless-stopped
    environment:
      - INTERNAL_URL=${INTERNAL_URL:-http://api_server:8080}

  # Uncomment the block below to enable the MCP server for Onyx.
  # mcp_server:
  #   image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}
  #   build:
  #     context: ../../backend
  #     dockerfile: Dockerfile
  #   command: >
  #     /bin/sh -c "if [ \"${MCP_SERVER_ENABLED:-}\" != \"True\" ] && [ \"${MCP_SERVER_ENABLED:-}\" != \"true\" ]; then
  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';
  #       exit 0;
  #     else
  #       exec python -m onyx.mcp_server_main;
  #     fi"
  #   env_file:
  #     - path: .env
  #       required: false
  #   depends_on:
  #     - relational_db
  #     - cache
  #   restart: "no"
  #   environment:
  #     - POSTGRES_HOST=${POSTGRES_HOST:-relational_db}
  #     - REDIS_HOST=${REDIS_HOST:-cache}
  #     # MCP Server Configuration
  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}
  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}
  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}
  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}
  #     - API_SERVER_HOST=${API_SERVER_HOST:-api_server}
  #   extra_hosts:
  #     - "host.docker.internal:host-gateway"
  #   logging:
  #     driver: json-file
  #     options:
  #       max-size: "50m"
  #       max-file: "6"
  #   # Optional, only for debugging purposes
  #   volumes:
  #     - mcp_server_logs:/var/log/onyx

  inference_model_server:
    image: ${ONYX_MODEL_SERVER_IMAGE:-onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    # GPU Support: Uncomment the following lines to enable GPU support
    # Requires nvidia-container-toolkit to be installed on the host
    # deploy:
    #   resources:
    #     reservations:
    #       devices:
    #         - driver: nvidia
    #           count: all
    #           capabilities: [gpu]
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    env_file:
      - path: .env
        required: false
    restart: unless-stopped
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - model_cache_huggingface:/app/.cache/huggingface/
      # Optional, only for debugging purposes
      - inference_model_server_logs:/var/log/onyx
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    healthcheck:
      test:
        [
          "CMD",
          "python",
          "-c",
          "import urllib.request; urllib.request.urlopen('http://localhost:9000/api/health')",
        ]
      interval: 20s
      timeout: 5s
      retries: 3

  indexing_model_server:
    image: ${ONYX_MODEL_SERVER_IMAGE:-onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    # GPU Support: Uncomment the following lines to enable GPU support
    # Requires nvidia-container-toolkit to be installed on the host
    # deploy:
    #   resources:
    #     reservations:
    #       devices:
    #         - driver: nvidia
    #           count: all
    #           capabilities: [gpu]
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER:-}\" = \"true\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    env_file:
      - path: .env
        required: false
    restart: unless-stopped
    environment:
      - INDEXING_ONLY=True
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - indexing_huggingface_model_cache:/app/.cache/huggingface/
      # Optional, only for debugging purposes
      - indexing_model_server_logs:/var/log/onyx
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    healthcheck:
      test:
        [
          "CMD",
          "python",
          "-c",
          "import urllib.request; urllib.request.urlopen('http://localhost:9000/api/health')",
        ]
      interval: 20s
      timeout: 5s
      retries: 3

  relational_db:
    image: postgres:15.2-alpine
    shm_size: 1g
    command: -c 'max_connections=250'
    env_file:
      - path: .env
        required: false
    restart: unless-stopped
    # PRODUCTION: Override the defaults by passing in the environment variables
    environment:
      - POSTGRES_USER=${POSTGRES_USER:-postgres}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
    # DEV: To expose ports, either:
    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "5432:5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"]
      interval: 10s
      timeout: 5s
      retries: 5
    volumes:
      - db_volume:/var/lib/postgresql/data

  # This container name cannot have an underscore in it due to Vespa expectations of the URL
  index:
    image: vespaengine/vespa:8.609.39
    restart: unless-stopped
    env_file:
      - path: .env
        required: false
    environment:
      - VESPA_SKIP_UPGRADE_CHECK=${VESPA_SKIP_UPGRADE_CHECK:-true}
    # DEV: To expose ports, either:
    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "19071:19071"
    #   - "8081:8081"
    volumes:
      - vespa_volume:/opt/vespa/var
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  opensearch:
    image: opensearchproject/opensearch:3.4.0
    restart: unless-stopped
    # Controls whether this service runs. In order to enable it, add
    # opensearch-enabled to COMPOSE_PROFILES in the environment for this
    # docker-compose.
    # NOTE: Now enabled on by default. To explicitly disable this service,
    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not
    # list the profile, or when running docker compose, include all desired
    # service names but this one. Additionally set
    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.
    # profiles: ["opensearch-enabled"]
    environment:
      # We need discovery.type=single-node so that OpenSearch doesn't try
      # forming a cluster and waiting for other nodes to become live.
      - discovery.type=single-node
      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}
      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
      # We do this to avoid unstable performance from page swaps.
      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.
      # Java heap should be ~50% of memory limit. For now we assume a limit of
      # 4g although in practice the container can request more than this.
      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/
      # Xms is the starting size, Xmx is the maximum size. These should be the
      # same.
      - "OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g"
    volumes:
      - opensearch-data:/usr/share/opensearch/data
    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/
    ulimits:
      # Similarly to bootstrap.memory_lock, we don't want to impose limits on
      # how much memory a process can lock from being swapped.
      memlock:
        soft: -1 # Set memlock to unlimited (no soft or hard limit).
        hard: -1
      nofile:
        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.
        hard: 65536
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"

  nginx:
    image: nginx:1.25.5-alpine
    restart: unless-stopped
    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`
    # if api_server / web_server are not up
    depends_on:
      - api_server
      - web_server
    env_file:
      - path: .env
        required: false
    environment:
      - DOMAIN=localhost
      # Nginx proxy timeout settings (in seconds)
      - NGINX_PROXY_CONNECT_TIMEOUT=${NGINX_PROXY_CONNECT_TIMEOUT:-300}
      - NGINX_PROXY_SEND_TIMEOUT=${NGINX_PROXY_SEND_TIMEOUT:-300}
      - NGINX_PROXY_READ_TIMEOUT=${NGINX_PROXY_READ_TIMEOUT:-300}
    ports:
      - "${HOST_PORT_80:-80}:80"
      - "${HOST_PORT:-3000}:80" # allow for localhost:3000 usage, since that is the norm
    volumes:
      # Mount templates read-only; the startup command copies them into
      # the writable /etc/nginx/conf.d/ inside the container.  This avoids
      # "Permission denied" errors on Windows Docker bind mounts.
      - ../data/nginx:/nginx-templates:ro
    # PRODUCTION: Add SSL certificate volumes for HTTPS support:
    #   - ../data/certbot/conf:/etc/letsencrypt
    #   - ../data/certbot/www:/var/www/certbot
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    # The specified script waits for the api_server to start up.
    # Without this we've seen issues where nginx shows no error logs but
    # does not receive any traffic
    # PRODUCTION: Change to app.conf.template.prod for production nginx config
    command: >
      /bin/sh -c "rm -f /etc/nginx/conf.d/default.conf
      && cp -a /nginx-templates/. /etc/nginx/conf.d/
      && sed 's/\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh
      && chmod +x /tmp/run-nginx.sh
      && /tmp/run-nginx.sh app.conf.template"

  cache:
    image: redis:7.4-alpine
    restart: unless-stopped
    # DEV: To expose ports, either:
    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "6379:6379"
    # docker silently mounts /data even without an explicit volume mount, which enables
    # persistence. explicitly setting save and appendonly forces ephemeral behavior.
    command: redis-server --save "" --appendonly no
    env_file:
      - path: .env
        required: false
    # Use tmpfs to prevent creation of anonymous volumes for /data
    tmpfs:
      - /data

  minio:
    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1
    profiles: ["s3-filestore"]
    restart: unless-stopped
    # DEV: To expose ports, either:
    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "9004:9000"
    #   - "9005:9001"
    env_file:
      - path: .env
        required: false
    environment:
      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
      # Note: we've seen the default bucket creation logic not work in some cases
      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}
    volumes:
      - minio_data:/data
    command: server /data --console-address ":9001"
    healthcheck:
      test: ["CMD", "mc", "ready", "local"]
      interval: 30s
      timeout: 20s
      retries: 3

  code-interpreter:
    image: onyxdotapp/code-interpreter:${CODE_INTERPRETER_IMAGE_TAG:-latest}
    command: ["bash", "./entrypoint.sh", "code-interpreter-api"]
    restart: unless-stopped
    env_file:
      - path: .env
        required: false

    # Below is needed for the `docker-out-of-docker` execution mode
    # For Linux rootless Docker, set DOCKER_SOCK_PATH=${XDG_RUNTIME_DIR}/docker.sock
    user: root
    volumes:
      - ${DOCKER_SOCK_PATH:-/var/run/docker.sock}:/var/run/docker.sock

    # uncomment below + comment out the above to use the `docker-in-docker` execution mode
    # privileged: true

  # PRODUCTION: Uncomment the following certbot service for SSL certificate management
  # certbot:
  #   image: certbot/certbot
  #   restart: unless-stopped
  #   volumes:
  #     - ../data/certbot/conf:/etc/letsencrypt
  #     - ../data/certbot/www:/var/www/certbot
  #   logging:
  #     driver: json-file
  #     options:
  #       max-size: "50m"
  #       max-file: "6"
  #   entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'"

volumes:
  # Necessary for persisting data for use
  db_volume:
  vespa_volume: # Created by the container itself
  minio_data:
  # Caches to prevent re-downloading models, not strictly necessary
  model_cache_huggingface:
  indexing_huggingface_model_cache:
  # Logs preserved across container restarts
  api_server_logs:
  background_logs:
  # mcp_server_logs:
  inference_model_server_logs:
  indexing_model_server_logs:
  # Shared volume for persistent document storage (Craft file-system mode)
  file-system:
  # Persistent data for OpenSearch.
  opensearch-data:


================================================
FILE: deployment/docker_compose/env.nginx.template
================================================
# DOMAIN is necessary for https setup, EMAIL is optional
DOMAIN=
EMAIL=

# If using the `no-letsencrypt` setup, the below are required.
# They specify the path within /onyx/deployment/data/sslcerts directory
# where the certificate / certificate key can be found. You can either
# name your certificate / certificate key files to follow the convention
# below or adjust these to match your naming conventions.
SSL_CERT_FILE_NAME=ssl.cert
SSL_CERT_KEY_FILE_NAME=ssl.key


================================================
FILE: deployment/docker_compose/env.prod.template
================================================
# Fill in the values and copy the contents of this file to .env in the deployment directory.
# Some valid default values are provided where applicable, delete the variables which you don't set values for.
# This is only necessary when using the docker-compose.prod.yml compose file.


# Could be something like onyx.companyname.com
WEB_DOMAIN=http://localhost:3000

# The following are for configuring User Authentication, supported flows are:
# disabled
# basic (standard username / password)
# google_oauth (login with google/gmail account)
# oidc
# saml
AUTH_TYPE=google_oauth

# Set the values below to use with Google OAuth
GOOGLE_OAUTH_CLIENT_ID=
GOOGLE_OAUTH_CLIENT_SECRET=
SECRET=

# if using basic auth and you want to require email verification, 
# then uncomment / set the following
#REQUIRE_EMAIL_VERIFICATION=true
#SMTP_USER=your-email@company.com
#SMTP_PASS=your-gmail-password

# The below are only needed if you aren't using gmail as your SMTP
#SMTP_SERVER=  
#SMTP_PORT=
# When missing SMTP_USER, this is used instead
#EMAIL_FROM=

# OpenID Connect (OIDC)
#OPENID_CONFIG_URL=
#OIDC_PKCE_ENABLED=

# SAML config directory for OneLogin compatible setups
#SAML_CONF_DIR=


# How long before user needs to reauthenticate, default to 7 days. (cookie expiration time)
SESSION_EXPIRE_TIME_SECONDS=604800


# Use the below to specify a list of allowed user domains, only checked if user Auth is turned on
# e.g. `VALID_EMAIL_DOMAINS=example.com,example.org` will only allow users
# with an @example.com or an @example.org email
#VALID_EMAIL_DOMAINS=


# Default values here are what Postgres uses by default, feel free to change.
POSTGRES_USER=postgres
POSTGRES_PASSWORD=password


# Default values here for the read-only user for the knowledge graph and other future read-only purposes. 
# Please change password!
DB_READONLY_USER=db_readonly_user
DB_READONLY_PASSWORD=password

# If setting the vespa language is required, set this ('en', 'de', etc.).
# See: https://docs.vespa.ai/en/linguistics.html 
#VESPA_LANGUAGE_OVERRIDE=

# Show extra/uncommon connectors
# See https://docs.onyx.app/admins/connectors/overview for a full list of connectors
SHOW_EXTRA_CONNECTORS=False


================================================
FILE: deployment/docker_compose/env.template
================================================
# Copy this file to .env so it's picked up by the docker compose yaml files
# Uncomment the values you would like to set
# No edits necessary, works out of the box


################################################################################
## COMMONLY MODIFIED CONFIGURATIONS
################################################################################
## Version of Onyx to deploy, default is latest (main built nightly)
## For Craft support, use: IMAGE_TAG=craft-latest
IMAGE_TAG=latest

## Onyx Craft Configuration
## Craft enables AI-powered web app building within Onyx (disabled by default)
## To enable Craft, uncomment the lines below (and comment out the above)
## or use --include-craft with the install script
## This adds Node.js 20 and opencode CLI to the image at build time
# ENABLE_CRAFT=true
# IMAGE_TAG=craft-latest

## Auth Settings
### https://docs.onyx.app/deployment/authentication
AUTH_TYPE=basic
# SESSION_EXPIRE_TIME_SECONDS=
### Recommended for basic auth - used for signing password reset and verification tokens
### If using install.sh, this will be auto-generated
### If setting manually, run: openssl rand -hex 32
USER_AUTH_SECRET=""
### Recommend to set this for security
# ENCRYPTION_KEY_SECRET=
### Optional
# API_KEY_HASH_ROUNDS=
### You can add a comma separated list of domains like onyx.app, only those domains will be allowed to signup/log in
# VALID_EMAIL_DOMAINS=

## Chat Configuration
# HARD_DELETE_CHATS=
# MAX_ALLOWED_UPLOAD_SIZE_MB=250
# Default per-user upload size limit (MB) when no admin value is set.
# Automatically clamped to MAX_ALLOWED_UPLOAD_SIZE_MB at runtime.
# DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB=100

## Base URL for redirects
# WEB_DOMAIN=

## Enterprise Features, requires a paid plan and licenses
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=false


################################################################################
## SERVICES CONFIGURATIONS
################################################################################
## Database Configuration
POSTGRES_USER=postgres
POSTGRES_PASSWORD=password
# POSTGRES_DB=
# POSTGRES_DEFAULT_SCHEMA=
# POSTGRES_USE_NULL_POOL=
# POSTGRES_API_SERVER_POOL_SIZE=
# POSTGRES_API_SERVER_POOL_OVERFLOW
# POSTGRES_IDLE_SESSIONS_TIMEOUT=
# POSTGRES_POOL_RECYCLE=
# DB_READONLY_USER=
# DB_READONLY_PASSWORD=

## File Store Backend: "s3" (default, uses MinIO) or "postgres" (no extra services needed)
## COMPOSE_PROFILES activates the MinIO service. To use PostgreSQL file storage instead,
## remove s3-filestore from COMPOSE_PROFILES and set FILE_STORE_BACKEND=postgres.
COMPOSE_PROFILES=s3-filestore
FILE_STORE_BACKEND=s3
## Setting for enabling OpenSearch.
OPENSEARCH_FOR_ONYX_ENABLED=true

## MinIO/S3 Configuration (only needed when FILE_STORE_BACKEND=s3)
S3_ENDPOINT_URL=http://minio:9000
S3_AWS_ACCESS_KEY_ID=minioadmin
S3_AWS_SECRET_ACCESS_KEY=minioadmin
S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
MINIO_ROOT_USER=minioadmin
MINIO_ROOT_PASSWORD=minioadmin

## Nginx Proxy Timeout Configuration (in seconds)
## These settings control how long nginx waits for upstream servers (api_server/web_server)
## Increase these values if you experience timeout errors with long-running requests
# NGINX_PROXY_CONNECT_TIMEOUT=300
# NGINX_PROXY_SEND_TIMEOUT=300
# NGINX_PROXY_READ_TIMEOUT=300

## MCP Server Configuration
## The MCP (Model Context Protocol) server allows external MCP clients to interact with Onyx
## Set to true to enable the MCP server (disabled by default)
# MCP_SERVER_ENABLED=false
## Port for the MCP server (defaults to 8090)
# MCP_SERVER_PORT=8090
## CORS origins for MCP clients (comma-separated list)
# MCP_SERVER_CORS_ORIGINS=

## Discord Bot Configuration
## The Discord bot allows users to interact with Onyx from Discord servers
## Bot token from Discord Developer Portal (required to enable the bot)
# DISCORD_BOT_TOKEN=
## Command prefix for bot commands (default: "!")
# DISCORD_BOT_INVOKE_CHAR=!

## Celery Configuration
# CELERY_BROKER_POOL_LIMIT=
# CELERY_WORKER_DOCFETCHING_CONCURRENCY=
# CELERY_WORKER_DOCPROCESSING_CONCURRENCY=
# CELERY_WORKER_LIGHT_CONCURRENCY=
# CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER=

## AWS Configuration
# AWS_ACCESS_KEY_ID=
# AWS_SECRET_ACCESS_KEY=
# AWS_REGION_NAME=
# Set to true when using IAM authentication for Postgres connections.
USE_IAM_AUTH=false


################################################################################
## DEVELOPER, DEBUGGING, AND LOGGING
################################################################################
## Logging and Telemetry
LOG_LEVEL=info
LOG_ONYX_MODEL_INTERACTIONS=False
# LOG_VESPA_TIMING_INFORMATION=
# LOG_ENDPOINT_LATENCY=
# LOG_POSTGRES_LATENCY=
# LOG_POSTGRES_CONN_COUNTS=
# DISABLE_TELEMETRY=

## Feature Flags
# SHOW_EXTRA_CONNECTORS=true
# DISABLE_MODEL_SERVER=false

## Analytics
# SENTRY_DSN=

## Demo/Testing
# MOCK_CONNECTOR_FILE_PATH=


################################################################################
## ADVANCED CONFIGURATIONS
################################################################################
## SlackBot Configuration
# ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER=
# ONYX_BOT_FEEDBACK_VISIBILITY=
# ONYX_BOT_DISPLAY_ERROR_MSGS=
# NOTIFY_SLACKBOT_NO_ANSWER=
# ONYX_BOT_MAX_QPM=
# ONYX_BOT_MAX_WAIT_TIME=

## Advanced Auth Settings
# GOOGLE_OAUTH_CLIENT_ID=
# GOOGLE_OAUTH_CLIENT_SECRET=
# REQUIRE_EMAIL_VERIFICATION=
# SMTP_SERVER=
# SMTP_PORT=
# SMTP_USER=
# SMTP_PASS=
# ENABLE_EMAIL_INVITES=
# EMAIL_FROM=
# OAUTH_CLIENT_ID=
# OAUTH_CLIENT_SECRET=
# OPENID_CONFIG_URL=
# OIDC_PKCE_ENABLED=
# TRACK_EXTERNAL_IDP_EXPIRY=
# CORS_ALLOWED_ORIGIN=
# INTEGRATION_TESTS_MODE=
# JWT_PUBLIC_KEY_URL=

## Gen AI Settings
# GEN_AI_MAX_TOKENS=
# LLM_SOCKET_READ_TIMEOUT=
# MAX_CHUNKS_FED_TO_CHAT=
# DISABLE_LITELLM_STREAMING=
# LITELLM_EXTRA_HEADERS=
# GEN_AI_API_KEY=
# GENERATIVE_MODEL_ACCESS_CHECK_FREQ=
# LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS=

## Query Options
# DOC_TIME_DECAY=
# HYBRID_ALPHA=
# EDIT_KEYWORD_QUERY=
# USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH=

## Model Configuration
# EMBEDDING_BATCH_SIZE=
# DOCUMENT_ENCODER_MODEL=
# DOC_EMBEDDING_DIM=
# NORMALIZE_EMBEDDINGS=
# ASYM_QUERY_PREFIX=
# ASYM_PASSAGE_PREFIX=
# DISABLE_RERANK_FOR_STREAMING=
# MODEL_SERVER_PORT=
# INDEX_BATCH_SIZE=
# MIN_THREADS_ML_MODELS=
# CLIENT_EMBEDDING_TIMEOUT=

## Indexing Configuration
# VESPA_SEARCHER_THREADS=
# ENABLED_CONNECTOR_TYPES=
# DISABLE_INDEX_UPDATE_ON_SWAP=
# CONTINUE_ON_CONNECTOR_FAILURE=
# CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=
# JIRA_CONNECTOR_LABELS_TO_SKIP=
# WEB_CONNECTOR_VALIDATE_URLS=
# JIRA_SERVER_API_VERSION=
# JIRA_CLOUD_API_VERSION=
# GONG_CONNECTOR_START_TIME=
# NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=
# GITHUB_CONNECTOR_BASE_URL=
# MAX_DOCUMENT_CHARS=
# MAX_FILE_SIZE_BYTES=

## OAuth Connector Configs
# EGNYTE_CLIENT_ID=
# EGNYTE_CLIENT_SECRET=
# EGNYTE_LOCALHOST_OVERRIDE=
# LINEAR_CLIENT_ID=
# LINEAR_CLIENT_SECRET=

## Miscellaneous
# ONYX_QUERY_HISTORY_TYPE=
# CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=
# VESPA_LANGUAGE_OVERRIDE=

## Frontend Configs
# THEME_IS_DARK=
# NEXT_PUBLIC_DISABLE_LOGOUT=
# NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=
# NEXT_PUBLIC_THEME=
# NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=
# NEXT_PUBLIC_CUSTOM_REFRESH_URL=

## Pointer to services
POSTGRES_HOST=relational_db
VESPA_HOST=index
REDIS_HOST=cache
MODEL_SERVER_HOST=inference_model_server
INDEXING_MODEL_SERVER_HOST=indexing_model_server
INTERNAL_URL=http://api_server:8080


================================================
FILE: deployment/docker_compose/init-letsencrypt.sh
================================================
#!/bin/bash

# .env.nginx file must be present in the same directory as this script and
# must set DOMAIN (and optionally EMAIL)
set -o allexport
source .env.nginx
set +o allexport

# Function to determine correct docker compose command
docker_compose_cmd() {
  if command -v docker-compose >/dev/null 2>&1; then
    echo "docker-compose"
  elif command -v docker compose >/dev/null 2>&1; then
    echo "docker compose"
  else
    echo 'Error: docker-compose or docker compose is not installed.' >&2
    exit 1
  fi
}

# Assign appropriate Docker Compose command
COMPOSE_CMD=$(docker_compose_cmd)

# Only add www to domain list if domain wasn't explicitly set as a subdomain
if [[ ! $DOMAIN == www.* ]]; then
    domains=("$DOMAIN" "www.$DOMAIN")
else
    domains=("$DOMAIN")
fi

rsa_key_size=4096
data_path="../data/certbot"
email="$EMAIL" # Adding a valid address is strongly recommended
staging=0 # Set to 1 if you're testing your setup to avoid hitting request limits

if [ -d "$data_path" ]; then
  read -p "Existing data found for $domains. Continue and replace existing certificate? (y/N) " decision
  if [ "$decision" != "Y" ] && [ "$decision" != "y" ]; then
    exit
  fi
fi


if [ ! -e "$data_path/conf/options-ssl-nginx.conf" ] || [ ! -e "$data_path/conf/ssl-dhparams.pem" ]; then
  echo "### Downloading recommended TLS parameters ..."
  mkdir -p "$data_path/conf"
  curl -s https://raw.githubusercontent.com/certbot/certbot/master/certbot-nginx/certbot_nginx/_internal/tls_configs/options-ssl-nginx.conf > "$data_path/conf/options-ssl-nginx.conf"
  curl -s https://raw.githubusercontent.com/certbot/certbot/master/certbot/certbot/ssl-dhparams.pem > "$data_path/conf/ssl-dhparams.pem"
  echo
fi

echo "### Creating dummy certificate for $domains ..."
path="/etc/letsencrypt/live/$domains"
mkdir -p "$data_path/conf/live/$domains"
$COMPOSE_CMD -f docker-compose.prod.yml run  --name onyx --rm --entrypoint "\
  openssl req -x509 -nodes -newkey rsa:$rsa_key_size -days 1\
    -keyout '$path/privkey.pem' \
    -out '$path/fullchain.pem' \
    -subj '/CN=localhost'" certbot
echo


echo "### Starting nginx ..."
$COMPOSE_CMD -f docker-compose.prod.yml up --force-recreate -d nginx
echo

echo "Waiting for nginx to be ready, this may take a minute..."
while true; do
  # Use curl to send a request and capture the HTTP status code
  status_code=$(curl -o /dev/null -s -w "%{http_code}\n" "http://localhost/api/health")
  
  # Check if the status code is 200
  if [ "$status_code" -eq 200 ]; then
    break  # Exit the loop
  else
    echo "Nginx is not ready yet, retrying in 5 seconds..."
    sleep 5  # Sleep for 5 seconds before retrying
  fi
done

echo "### Deleting dummy certificate for $domains ..."
$COMPOSE_CMD -f docker-compose.prod.yml run  --name onyx --rm --entrypoint "\
  rm -Rf /etc/letsencrypt/live/$domains && \
  rm -Rf /etc/letsencrypt/archive/$domains && \
  rm -Rf /etc/letsencrypt/renewal/$domains.conf" certbot
echo


echo "### Requesting Let's Encrypt certificate for $domains ..."
#Join $domains to -d args
domain_args=""
for domain in "${domains[@]}"; do
  domain_args="$domain_args -d $domain"
done

# Select appropriate email arg
case "$email" in
  "") email_arg="--register-unsafely-without-email" ;;
  *) email_arg="--email $email" ;;
esac

# Enable staging mode if needed
if [ $staging != "0" ]; then staging_arg="--staging"; fi

$COMPOSE_CMD -f docker-compose.prod.yml run --name onyx --rm --entrypoint "\
  certbot certonly --webroot -w /var/www/certbot \
    $staging_arg \
    $email_arg \
    $domain_args \
    --rsa-key-size $rsa_key_size \
    --agree-tos \
    --force-renewal" certbot
echo

echo "### Renaming certificate directory if needed ..."
$COMPOSE_CMD -f docker-compose.prod.yml run --name onyx --rm --entrypoint "\
  sh -c 'for domain in $domains; do \
    numbered_dir=\$(find /etc/letsencrypt/live -maxdepth 1 -type d -name \"\$domain-00*\" | sort -r | head -n1); \
    if [ -n \"\$numbered_dir\" ]; then \
      mv \"\$numbered_dir\" /etc/letsencrypt/live/\$domain; \
    fi; \
  done'" certbot

echo "### Reloading nginx ..."
$COMPOSE_CMD -f docker-compose.prod.yml up --force-recreate -d


================================================
FILE: deployment/docker_compose/install.ps1
================================================
# Onyx Installer for Windows
# Usage: .\install.ps1 [OPTIONS]
# Remote (with params):
#   & ([scriptblock]::Create((irm https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.ps1))) -Lite -NoPrompt
# Remote (defaults only, configure via interaction during script):
#   irm https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.ps1 | iex

param(
    [switch]$Shutdown,
    [switch]$DeleteData,
    [switch]$IncludeCraft,
    [switch]$Lite,
    [switch]$Local,
    [switch]$NoPrompt,
    [switch]$DryRun,
    [switch]$ShowVerbose,
    [switch]$Help
)

$ErrorActionPreference = "Stop"

# Runs a native command with stderr silenced and ErrorActionPreference=Continue.
function Invoke-NativeQuiet {
    param([scriptblock]$Command, [switch]$PassThru)
    $prev = $ErrorActionPreference
    $ErrorActionPreference = "Continue"
    try {
        if ($PassThru) { & $Command 2>$null }
        else           { $null = & $Command 2>$null }
    } finally { $ErrorActionPreference = $prev }
}

$script:ExpectedDockerRamGB = 10
$script:ExpectedDiskGB = 32
$script:InstallRoot = if ($env:INSTALL_PREFIX) { $env:INSTALL_PREFIX } else { "onyx_data" }
$script:LiteComposeFile = "docker-compose.onyx-lite.yml"
$script:GitHubRawUrl = "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose"
$script:NginxBaseUrl = "https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx"
$script:CurrentStep = 0
$script:TotalSteps = 10
$script:ComposeCmdType = $null
$script:LiteMode = $Lite.IsPresent
$script:IncludeCraftMode = $IncludeCraft.IsPresent
$script:IsWindowsServer = (Get-CimInstance Win32_OperatingSystem).ProductType -ne 1

# ── Output Helpers ───────────────────────────────────────────────────────────

function Print-Success  { param([string]$Message) Write-Host "[OK] $Message" -ForegroundColor Green }
function Print-OnyxError{ param([string]$Message) Write-Host "[X]  $Message" -ForegroundColor Red }
function Print-Info     { param([string]$Message) Write-Host "[i]  $Message" -ForegroundColor Yellow }
function Print-Warning  { param([string]$Message) Write-Host "[!]  $Message" -ForegroundColor Yellow }

function Print-Step {
    param([string]$Title)
    $script:CurrentStep++
    Write-Host "`n=== $Title - Step $($script:CurrentStep)/$($script:TotalSteps) ===`n" -ForegroundColor Cyan
}

function Test-Interactive {
    return -not $NoPrompt
}

function Prompt-OrDefault {
    param([string]$PromptText, [string]$DefaultValue)
    if (-not (Test-Interactive)) { return $DefaultValue }
    $reply = Read-Host $PromptText
    if ([string]::IsNullOrWhiteSpace($reply)) { return $DefaultValue }
    return $reply
}

function Confirm-Action {
    param([string]$Description)
    $reply = (Prompt-OrDefault "Install $Description? (Y/n) [default: Y]" "Y").Trim().ToLower()
    if ($reply -match '^n') {
        Print-Warning "Skipping: $Description"
        return $false
    }
    return $true
}

function Prompt-VersionTag {
    Print-Info "Which tag would you like to deploy?"
    if ($script:IncludeCraftMode) {
        Write-Host "  - Press Enter for craft-latest (recommended for Craft)"
        Write-Host "  - Type a specific tag (e.g., craft-v1.0.0)"
        $version = Prompt-OrDefault "Enter tag [default: craft-latest]" "craft-latest"
    } else {
        Write-Host "  - Press Enter for edge (recommended)"
        Write-Host "  - Type a specific tag (e.g., v0.1.0)"
        $version = Prompt-OrDefault "Enter tag [default: edge]" "edge"
    }
    if     ($script:IncludeCraftMode -and $version -eq "craft-latest") { Print-Info "Selected: craft-latest (Craft enabled)" }
    elseif ($version -eq "edge") { Print-Info "Selected: edge (latest nightly)" }
    else   { Print-Info "Selected: $version" }
    return $version
}

function Prompt-DeploymentMode {
    param([string]$LiteOverlayPath)
    if ($script:LiteMode) { Print-Info "Deployment mode: Lite (set via -Lite flag)"; return }
    Print-Info "Which deployment mode would you like?"
    Write-Host "  1) Lite      - Minimal deployment (no Vespa, Redis, or model servers)"
    Write-Host "                  LLM chat, tools, file uploads, and Projects still work"
    Write-Host "  2) Standard  - Full deployment with search, connectors, and RAG"
    $modeChoice = Prompt-OrDefault "Choose a mode (1 or 2) [default: 1]" "1"
    if ($modeChoice -eq "2") {
        Print-Info "Selected: Standard mode"
    } else {
        $script:LiteMode = $true
        Print-Info "Selected: Lite mode"
        if (-not (Ensure-OnyxFile $LiteOverlayPath "$($script:GitHubRawUrl)/$($script:LiteComposeFile)" $script:LiteComposeFile)) { exit 1 }
    }
}

function Assert-NotCraftLite {
    param([string]$Tag)
    if (-not ($script:LiteMode -and $Tag -match '^craft-')) { return }
    Print-OnyxError "Cannot use a craft image tag ($Tag) with Lite mode."
    Print-Info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
    exit 1
}

function Refresh-PathFromRegistry {
    $env:Path = [System.Environment]::GetEnvironmentVariable("Path", "Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path", "User")
}

function Get-NativeVersionString {
    param([scriptblock]$Command)
    $output = Invoke-NativeQuiet -PassThru $Command
    $match = [regex]::Match(($output -join ""), '(\d+\.\d+\.\d+)')
    if ($match.Success) { return $match.Value }
    return "unknown"
}

# ── Download Helpers ─────────────────────────────────────────────────────────

function Download-OnyxFile {
    param([string]$Url, [string]$Output)
    for ($attempt = 1; $attempt -le 3; $attempt++) {
        try {
            [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
            Invoke-WebRequest -Uri $Url -OutFile $Output -UseBasicParsing -ErrorAction Stop
            return
        } catch {
            if ($attempt -eq 3) { throw }
            Start-Sleep -Seconds 2
        }
    }
}

function Ensure-OnyxFile {
    param([string]$Path, [string]$Url, [string]$Description)
    if ($Local) {
        if (Test-Path $Path) { Print-Success "Using existing $Description"; return $true }
        Print-OnyxError "Required file missing: $Description ($Path)"
        return $false
    }
    Print-Info "Downloading $Description..."
    try {
        Download-OnyxFile -Url $Url -Output $Path
        Print-Success "$Description downloaded"
        return $true
    } catch {
        Print-OnyxError "Failed to download $Description"
        return $false
    }
}

# ── .env File Helpers ────────────────────────────────────────────────────────

function Set-EnvFileValue {
    param([string]$Path, [string]$Key, [string]$Value, [switch]$Uncomment)
    $lines = Get-Content $Path
    $found = $false
    $result = @()
    foreach ($line in $lines) {
        if ($Uncomment -and $line -match "^\s*#\s*${Key}=") {
            $result += "${Key}=${Value}"; $found = $true
        } elseif ($line -match "^${Key}=") {
            $result += "${Key}=${Value}"; $found = $true
        } else { $result += $line }
    }
    if (-not $found) { $result += "${Key}=${Value}" }
    Write-Utf8NoBom -Path $Path -Content (($result -join "`n") + "`n")
}

function Get-EnvFileValue {
    param([string]$Path, [string]$Key)
    $match = Select-String -Path $Path -Pattern "^${Key}=(.*)" | Select-Object -First 1
    if ($match) { return $match.Matches.Groups[1].Value.Trim().Trim('"', "'") }
    return $null
}

function New-SecureSecret {
    $bytes = New-Object byte[] 32
    $rng = [System.Security.Cryptography.RandomNumberGenerator]::Create()
    $rng.GetBytes($bytes); $rng.Dispose()
    return ($bytes | ForEach-Object { $_.ToString("x2") }) -join ''
}

# ── Docker Compose ───────────────────────────────────────────────────────────

function Get-ComposeFileArgs {
    param([switch]$AutoDetect)
    $fileArgs = @("-f", "docker-compose.yml")
    $litePath = Join-Path $script:InstallRoot "deployment\$($script:LiteComposeFile)"
    if ($script:LiteMode -or ($AutoDetect -and (Test-Path $litePath))) {
        $fileArgs += @("-f", $script:LiteComposeFile)
    }
    return $fileArgs
}

function Invoke-Compose {
    param([switch]$AutoDetect, [Parameter(ValueFromRemainingArguments)][string[]]$Arguments)
    $deployDir = Join-Path $script:InstallRoot "deployment"
    $fileArgs = Get-ComposeFileArgs -AutoDetect:$AutoDetect
    $prev = $ErrorActionPreference; $ErrorActionPreference = "Continue"
    Push-Location $deployDir
    try {
        if ($script:ComposeCmdType -eq "plugin") { & docker @(@("compose") + $fileArgs + $Arguments) }
        else { & docker-compose @($fileArgs + $Arguments) }
        return $LASTEXITCODE
    } finally { Pop-Location; $ErrorActionPreference = $prev }
}

function Initialize-ComposeCommand {
    Invoke-NativeQuiet { docker compose version }
    if ($LASTEXITCODE -eq 0) { $script:ComposeCmdType = "plugin"; return $true }
    if (Get-Command docker-compose -ErrorAction SilentlyContinue) { $script:ComposeCmdType = "standalone"; return $true }
    $script:ComposeCmdType = $null; return $false
}

# ── Utilities ────────────────────────────────────────────────────────────────

function Compare-SemVer {
    param([string]$Version1, [string]$Version2)
    $parts1 = ($Version1 -split '\.') + @("0","0","0")
    $parts2 = ($Version2 -split '\.') + @("0","0","0")
    for ($i = 0; $i -lt 3; $i++) {
        $v1 = 0; $v2 = 0
        [void][int]::TryParse($parts1[$i], [ref]$v1)
        [void][int]::TryParse($parts2[$i], [ref]$v2)
        if ($v1 -lt $v2) { return -1 }
        if ($v1 -gt $v2) { return 1 }
    }
    return 0
}

function Test-PortAvailable {
    param([int]$Port)
    try { $tcp = New-Object System.Net.Sockets.TcpClient; $tcp.Connect("127.0.0.1", $Port); $tcp.Close(); return $false }
    catch { return $true }
}

function Find-AvailablePort {
    param([int]$StartPort = 3000)
    for ($port = $StartPort; $port -le 65535; $port++) {
        if (Test-PortAvailable $port) { return $port }
    }
    return $StartPort
}

function Get-DockerMemoryMB {
    foreach ($p in @((Join-Path $env:APPDATA "Docker\settings.json"), (Join-Path $env:LOCALAPPDATA "Docker\settings.json"))) {
        if (-not (Test-Path $p)) { continue }
        try {
            $s = Get-Content $p -Raw | ConvertFrom-Json
            if ($s.memoryMiB -and $s.memoryMiB -gt 0) { return [int]$s.memoryMiB }
        } catch { }
    }
    try {
        $info = Invoke-NativeQuiet -PassThru { docker system info }
        $mem = $info | Where-Object { $_ -match "Total Memory" } | Select-Object -First 1
        if ($mem -match '(\d+\.?\d*)\s*GiB') { return [int]([double]$Matches[1] * 1024) }
    } catch { }
    return 0
}

function Test-OnyxHealth {
    param([int]$Port)
    Print-Info "Checking Onyx service health..."
    Write-Host "Containers are healthy, waiting for database migrations and service initialization to finish."
    for ($attempt = 1; $attempt -le 600; $attempt++) {
        try {
            $r = Invoke-WebRequest -Uri "http://localhost:$Port" -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop
            if ($r.StatusCode -in @(200, 301, 302, 303, 307, 308)) { return $true }
        } catch { }
        $m = [math]::Floor($attempt / 60); $s = $attempt % 60
        $dots = "." * (($attempt % 3) + 1); $pad = " " * (3 - $dots.Length)
        Write-Host -NoNewline "`rChecking Onyx service${dots}${pad} (${m}m ${s}s elapsed)"
        Start-Sleep -Seconds 1
    }
    Write-Host ""; return $false
}

function Test-IsAdmin {
    $id = [Security.Principal.WindowsIdentity]::GetCurrent()
    (New-Object Security.Principal.WindowsPrincipal($id)).IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator)
}

function Invoke-ElevatedRelaunch {
    if (Test-IsAdmin) { return $false }
    Print-Info "Administrator privileges required. Relaunching as Administrator..."
    if (-not $PSCommandPath) { Print-Warning "Cannot determine script path. Please re-run as Administrator."; return $false }
    $argList = @("-NoProfile", "-ExecutionPolicy", "Bypass", "-File", "`"$PSCommandPath`"")
    if ($Shutdown)     { $argList += "-Shutdown" }
    if ($DeleteData)   { $argList += "-DeleteData" }
    if ($IncludeCraft) { $argList += "-IncludeCraft" }
    if ($Lite)         { $argList += "-Lite" }
    if ($Local)        { $argList += "-Local" }
    if ($NoPrompt)     { $argList += "-NoPrompt" }
    if ($DryRun)       { $argList += "-DryRun" }
    if ($ShowVerbose)  { $argList += "-ShowVerbose" }
    try { $proc = Start-Process powershell -ArgumentList $argList -Verb RunAs -Wait -PassThru; exit $proc.ExitCode }
    catch { Print-Warning "UAC elevation was declined or failed."; return $false }
}

function Write-Utf8NoBom {
    param([string]$Path, [string]$Content)
    [System.IO.File]::WriteAllText($Path, $Content, (New-Object System.Text.UTF8Encoding($false)))
}

# ── Help / Shutdown / Delete ─────────────────────────────────────────────────

function Show-OnyxHelp {
    $help = "Onyx Installation Script for Windows`n"
    $help += "`nUsage: .\install.ps1 [OPTIONS]`n"
    $help += "`nOptions:"
    $help += "`n  -IncludeCraft  Enable Onyx Craft (AI-powered web app building)"
    $help += "`n  -Lite          Deploy Onyx Lite (no Vespa, Redis, or model servers)"
    $help += "`n  -Local         Use existing config files instead of downloading from GitHub"
    $help += "`n  -Shutdown      Stop (pause) Onyx containers"
    $help += "`n  -DeleteData    Remove all Onyx data (containers, volumes, and files)"
    $help += "`n  -NoPrompt      Run non-interactively with defaults (for CI/automation)"
    $help += "`n  -DryRun        Show what would be done without making changes"
    $help += "`n  -ShowVerbose   Show detailed output for debugging"
    $help += "`n  -Help          Show this help message"
    $help += "`n`nExamples:"
    $help += "`n  .\install.ps1                    # Install Onyx"
    $help += "`n  .\install.ps1 -Lite              # Install Onyx Lite"
    $help += "`n  .\install.ps1 -IncludeCraft      # Install with Craft enabled"
    $help += "`n  .\install.ps1 -Shutdown          # Pause Onyx services"
    $help += "`n  .\install.ps1 -DeleteData        # Completely remove Onyx"
    $help += "`n  .\install.ps1 -Local             # Re-run using existing config"
    $help += "`n  .\install.ps1 -NoPrompt          # Non-interactive install"
    Write-Host $help
}

function Invoke-OnyxShutdown {
    Write-Host "`n=== Shutting down Onyx ===`n" -ForegroundColor Cyan
    $deployDir = Join-Path $script:InstallRoot "deployment"
    if (-not (Test-Path (Join-Path $deployDir "docker-compose.yml"))) {
        Print-Warning "Onyx deployment not found. Nothing to shutdown."
        return
    }
    if (-not (Initialize-ComposeCommand)) { Print-OnyxError "Docker Compose not found."; exit 1 }
    $stopArgs = @("stop")
    $result = Invoke-Compose -AutoDetect @stopArgs
    if ($result -ne 0) { Print-OnyxError "Failed to stop containers"; exit 1 }
    Print-Success "Onyx containers stopped (paused)"
}

function Invoke-OnyxDeleteData {
    Write-Host "`n=== WARNING: This will permanently delete all Onyx data ===`n" -ForegroundColor Red
    Print-Warning "This action will remove all Onyx containers, volumes, files, and user data."
    if (Test-Interactive) {
        $confirm = Prompt-OrDefault "Type 'DELETE' to confirm" ""
        if ($confirm -ne "DELETE") { Print-Info "Operation cancelled."; return }
    } else {
        Print-OnyxError "Cannot confirm destructive operation in non-interactive mode."
        exit 1
    }
    $deployDir = Join-Path $script:InstallRoot "deployment"
    if ((Test-Path (Join-Path $deployDir "docker-compose.yml")) -and (Initialize-ComposeCommand)) {
        $downArgs = @("down", "-v")
        $result = Invoke-Compose -AutoDetect @downArgs
        if ($result -eq 0) { Print-Success "Containers and volumes removed" }
        else { Print-OnyxError "Failed to remove containers" }
    }
    if (Test-Path $script:InstallRoot) {
        Remove-Item -Recurse -Force $script:InstallRoot
        Print-Success "Data directories removed"
    }
    Print-Success "All Onyx data has been permanently deleted!"
}

# ── Docker Daemon ────────────────────────────────────────────────────────────

function Wait-ForDockerDaemon {
    param([int]$MaxWait = 60)
    Print-Info "Waiting for Docker daemon to become ready (up to ${MaxWait} seconds)..."
    $waited = 0; $lastError = ""; $unchangedErrorCount = 0
    while ($waited -lt $MaxWait) {
        Start-Sleep -Seconds 3; $waited += 3
        $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue"
        $dockerOutput = & docker info 2>&1
        $ErrorActionPreference = $prevEAP
        $errRecords = @($dockerOutput | Where-Object { $_ -is [System.Management.Automation.ErrorRecord] })
        $currentError = if ($errRecords.Count -gt 0) { $errRecords[0].ToString() } else { "" }
        if ($LASTEXITCODE -eq 0) { Write-Host ""; Print-Success "Docker daemon is running"; return $true }
        if ($currentError) {
            if ($currentError -eq $lastError) { $unchangedErrorCount++ } else { $unchangedErrorCount = 0; $lastError = $currentError }
            if ($unchangedErrorCount -ge 5) {
                Write-Host ""; Print-OnyxError "Docker daemon is not starting. Persistent error after ${waited}s:"
                Write-Host "    $lastError" -ForegroundColor Red; return $false
            }
        }
        $dots = "." * (($waited / 3 % 3) + 1); $pad = " " * (3 - $dots.Length)
        Write-Host -NoNewline "`rWaiting for Docker daemon${dots}${pad} (${waited}s elapsed)"
    }
    Write-Host ""; Print-OnyxError "Docker daemon did not respond within ${MaxWait} seconds."
    if ($lastError) { Print-Info "Last error: $lastError" }
    return $false
}

function Fix-DockerCredStore {
    $configFile = Join-Path $env:USERPROFILE ".docker\config.json"
    if (-not (Test-Path $configFile)) { return }
    try {
        $rawBytes = [System.IO.File]::ReadAllBytes($configFile)
        $hasBom = $rawBytes.Length -ge 3 -and $rawBytes[0] -eq 0xEF -and $rawBytes[1] -eq 0xBB -and $rawBytes[2] -eq 0xBF
        $raw = [System.IO.File]::ReadAllText($configFile).TrimStart([char]0xFEFF)
        $config = $raw | ConvertFrom-Json
        $needsRewrite = $hasBom
        # Check property existence (not truthiness -- "" is falsy in PS)
        if ($null -ne $config.PSObject.Properties['credsStore']) {
            Print-Info "Removing credsStore='$($config.credsStore)' from Docker config..."
            $config.PSObject.Properties.Remove('credsStore')
            $needsRewrite = $true
        }
        if ($null -ne $config.PSObject.Properties['credHelpers']) {
            Print-Info "Removing credHelpers from Docker config..."
            $config.PSObject.Properties.Remove('credHelpers')
            $needsRewrite = $true
        }
        if ($needsRewrite) {
            Write-Utf8NoBom -Path $configFile -Content ($config | ConvertTo-Json -Depth 10)
            Print-Success "Docker credential config cleaned"
        }
    } catch {
        Print-Warning "Could not update Docker config: $_"
        try { Write-Utf8NoBom -Path $configFile -Content '{}'; Print-Success "Docker config reset" }
        catch { Print-Warning "Could not reset Docker config: $_" }
    }

}

function Register-DockerService {
    if (Get-Service docker -ErrorAction SilentlyContinue) { return $true }
    Print-Info "Docker service not registered. Looking for dockerd.exe..."
    $candidates = @(
        (Join-Path $env:ProgramFiles "Docker\Docker\resources\dockerd.exe"),
        (Join-Path $env:ProgramFiles "Docker\dockerd.exe"),
        (Join-Path $env:ProgramFiles "Docker\Docker\dockerd.exe")
    )
    $dockerExe = Get-Command docker -ErrorAction SilentlyContinue
    if ($dockerExe) { $candidates = @((Join-Path (Split-Path $dockerExe.Source) "dockerd.exe")) + $candidates }
    $dockerdPath = $null
    foreach ($c in $candidates) { if (Test-Path $c) { $dockerdPath = $c; break } }
    if (-not $dockerdPath) {
        Print-OnyxError "Could not find dockerd.exe to register as a service."
        return $false
    }
    Print-Info "Found dockerd at: $dockerdPath"
    Invoke-NativeQuiet { & $dockerdPath --register-service }
    if ($LASTEXITCODE -ne 0) {
        Print-Warning "dockerd --register-service failed (code $LASTEXITCODE), trying sc.exe..."
        Invoke-NativeQuiet { sc.exe create docker binPath= "`"$dockerdPath`" --run-service" start= auto }
    }
    if (-not (Get-Service docker -ErrorAction SilentlyContinue)) {
        Print-OnyxError "Failed to register Docker as a Windows service."
        return $false
    }
    Print-Success "Docker service registered"
    return $true
}

function Start-DockerDaemon {
    Invoke-NativeQuiet { docker info }
    if ($LASTEXITCODE -eq 0) { return $true }

    if ($script:IsWindowsServer) {
        Print-Info "Windows Server detected - starting Docker..."
        # Prefer Docker Desktop if installed (provides Linux containers);
        # native dockerd on Windows Server only supports Windows containers.
        $ddExe = "${env:ProgramFiles}\Docker\Docker\Docker Desktop.exe"
        if (Test-Path $ddExe) {
            Print-Info "Docker Desktop is installed - using it for Linux container support."
            # Stop native Docker service if running to avoid pipe conflicts
            $svc = Get-Service docker -ErrorAction SilentlyContinue
            if ($svc -and $svc.Status -eq 'Running') {
                Print-Info "Stopping native Docker Engine service to avoid conflicts..."
                Stop-Service docker -Force -ErrorAction SilentlyContinue
                Start-Sleep -Seconds 3
            }
            Fix-DockerCredStore
            Start-Process $ddExe
            if (Wait-ForDockerDaemon -MaxWait 120) { return $true }
            Print-Warning "Docker Desktop did not start. Falling back to Docker Engine service..."
        }
        # Fallback: native dockerd service (Windows containers only)
        if (-not (Register-DockerService)) { return $false }
        Fix-DockerCredStore
        try { Start-Service docker -ErrorAction Stop; Print-Success "Docker service started" }
        catch { Print-Warning "Failed to start Docker service: $_"; return $false }
        return (Wait-ForDockerDaemon -MaxWait 60)
    }

    # Windows Desktop - start Docker Desktop
    Print-Info "Starting Docker Desktop..."
    $launchPath = $null
    foreach ($path in @(
        "${env:ProgramFiles}\Docker\Docker\Docker Desktop.exe",
        "${env:ProgramFiles(x86)}\Docker\Docker\Docker Desktop.exe",
        "${env:LOCALAPPDATA}\Docker\Docker Desktop.exe"
    )) {
        if (Test-Path $path) { Start-Process $path; $launchPath = $path; break }
    }
    if (-not $launchPath) {
        try { Start-Process "Docker Desktop" -ErrorAction Stop }
        catch { Print-Warning "Could not find Docker Desktop executable."; return $false }
    }
    if (-not (Wait-ForDockerDaemon -MaxWait 120)) {
        $proc = Get-Process "Docker Desktop" -ErrorAction SilentlyContinue
        if ($proc) { Print-Info "Docker Desktop IS running (PID: $($proc.Id)), but the daemon is not responding." }
        else { Print-Warning "Docker Desktop process is NOT running - it may have crashed." }
        Print-Info "Try starting Docker Desktop manually, check WSL2 status, or restart your computer."
        return $false
    }
    Print-Info "Waiting 15 seconds for Docker Desktop to fully stabilize..."
    Start-Sleep -Seconds 15
    return $true
}

# ── Docker Install ───────────────────────────────────────────────────────────

function Install-DockerEngine {
    Print-Info "Windows Server detected - Docker Engine is required."
    if (-not (Confirm-Action "Docker Engine (Windows Server)")) { exit 1 }
    if (-not (Test-IsAdmin)) { Invoke-ElevatedRelaunch }

    try {
        $feature = Get-WindowsFeature -Name Containers -ErrorAction Stop
        if ($feature.InstallState -ne 'Installed') {
            Print-Info "Installing Windows Containers feature..."
            $result = Install-WindowsFeature -Name Containers -ErrorAction Stop
            if ($result.RestartNeeded -eq 'Yes') {
                Print-Warning "A reboot is required. Please restart and re-run this script."
                exit 0
            }
            Print-Success "Containers feature installed"
        }
    } catch { Print-Warning "Could not check/install Containers feature: $_" }

    $installed = $false

    if (-not $installed) {
        Print-Info "Attempting Docker install via DockerMsftProvider..."
        try {
            if (-not (Get-PackageProvider -Name NuGet -ErrorAction SilentlyContinue)) {
                Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force | Out-Null
            }
            if (-not (Get-Module DockerMsftProvider -ListAvailable -ErrorAction SilentlyContinue)) {
                Install-Module -Name DockerMsftProvider -Repository PSGallery -Force
            }
            Install-Package -Name docker -ProviderName DockerMsftProvider -Force | Out-Null
            $installed = $true
            Print-Success "Docker installed via DockerMsftProvider"
        } catch { Print-Warning "DockerMsftProvider failed: $_" }
    }

    if (-not $installed) {
        Print-Info "Downloading Docker binaries directly..."
        try {
            $page = Invoke-WebRequest -Uri "https://download.docker.com/win/static/stable/x86_64/" -UseBasicParsing -ErrorAction Stop
            $latestZip = $page.Links | Where-Object { $_.href -match '^docker-\d+.*\.zip$' } |
                Sort-Object href -Descending | Select-Object -First 1
            if (-not $latestZip) { throw "Could not find Docker zip" }
            $zipPath = Join-Path $env:TEMP "docker-ce.zip"
            Download-OnyxFile -Url "https://download.docker.com/win/static/stable/x86_64/$($latestZip.href)" -Output $zipPath
            Expand-Archive -Path $zipPath -DestinationPath $env:ProgramFiles -Force
            Remove-Item -Force $zipPath -ErrorAction SilentlyContinue
            $dockerPath = Join-Path $env:ProgramFiles "docker"
            $machinePath = [System.Environment]::GetEnvironmentVariable("Path", "Machine")
            if ($machinePath -notlike "*$dockerPath*") {
                [System.Environment]::SetEnvironmentVariable("Path", "$machinePath;$dockerPath", "Machine")
            }
            Refresh-PathFromRegistry
            & "$dockerPath\dockerd.exe" --register-service
            $installed = $true
            Print-Success "Docker installed and registered as service"
        } catch { Print-Warning "Direct binary install failed: $_" }
    }

    if (-not $installed) {
        Print-OnyxError "Could not install Docker Engine on Windows Server."
        Print-Info "Install manually: https://docs.docker.com/engine/install/binaries/#install-server-and-client-binaries-on-windows"
        exit 1
    }

    try { Start-Service docker -ErrorAction Stop; Print-Success "Docker service started" }
    catch { Print-OnyxError "Failed to start Docker service: $_"; exit 1 }
    Install-ComposePlugin
    if (-not (Wait-ForDockerDaemon -MaxWait 30)) { Print-OnyxError "Docker installed but daemon not responding."; exit 1 }
    Print-Success "Docker Engine installed and running on Windows Server"
}

function Install-ComposePlugin {
    Invoke-NativeQuiet { docker compose version }
    if ($LASTEXITCODE -eq 0) { return }
    if (-not (Confirm-Action "Docker Compose plugin")) { return }
    Print-Info "Installing Docker Compose plugin..."
    $dest = Join-Path $env:ProgramFiles "docker\cli-plugins"
    New-Item -ItemType Directory -Force -Path $dest | Out-Null
    try {
        Download-OnyxFile -Url "https://github.com/docker/compose/releases/latest/download/docker-compose-windows-x86_64.exe" -Output (Join-Path $dest "docker-compose.exe")
        Print-Success "Docker Compose plugin installed"
    } catch {
        Print-Warning "Failed to install Docker Compose plugin: $_"
    }
}

function Install-Wsl {
    Invoke-NativeQuiet { wsl --status }
    if ($LASTEXITCODE -eq 0) { Print-Success "WSL2 is available"; return $true }
    if (-not (Confirm-Action "WSL2 (required for Docker)")) { return $false }
    Print-Info "Installing WSL2..."
    try {
        $proc = Start-Process wsl -ArgumentList "--install", "--no-distribution" -Wait -PassThru -NoNewWindow
        if ($proc.ExitCode -eq 0) { Print-Success "WSL2 installed"; return $true }
        Print-Warning "WSL2 install exited with code $($proc.ExitCode). A reboot may be required."
        return $false
    } catch { Print-Warning "Failed to install WSL2: $_"; return $false }
}

function Install-DockerDesktop {
    Print-Info "Docker Desktop is required but not installed."
    if (-not (Confirm-Action "Docker Desktop")) { exit 1 }
    if (-not (Test-IsAdmin)) { Invoke-ElevatedRelaunch }
    $wslReady = Install-Wsl
    $installed = $false

    if (Get-Command winget -ErrorAction SilentlyContinue) {
        Print-Info "Installing Docker Desktop via winget..."
        winget install Docker.DockerDesktop --accept-package-agreements --accept-source-agreements
        if ($LASTEXITCODE -eq 0) { Print-Success "Docker Desktop installed via winget"; $installed = $true }
    }

    if (-not $installed -and (Get-Command choco -ErrorAction SilentlyContinue)) {
        Print-Info "Installing Docker Desktop via Chocolatey..."
        choco install docker-desktop -y
        if ($LASTEXITCODE -eq 0) { Print-Success "Docker Desktop installed via Chocolatey"; $installed = $true }
    }

    if (-not $installed) {
        Print-Info "Downloading Docker Desktop installer directly..."
        $installerPath = Join-Path $env:TEMP "DockerDesktopInstaller_$([System.IO.Path]::GetRandomFileName().Split('.')[0]).exe"
        try {
            Download-OnyxFile -Url "https://desktop.docker.com/win/main/amd64/Docker%20Desktop%20Installer.exe" -Output $installerPath
            $proc = Start-Process -FilePath $installerPath -ArgumentList "install", "--quiet", "--accept-license" -Wait -PassThru -NoNewWindow
            if ($proc.ExitCode -eq 0) {
                Print-Success "Docker Desktop installed via direct download"; $installed = $true
            } elseif ($proc.ExitCode -eq 3) {
                Print-Warning "Prerequisites not met."
                if (-not $wslReady) { Print-OnyxError "WSL2 is required. Run: wsl --install --no-distribution, then reboot." }
                else { Print-Info "A reboot may be needed. Restart and re-run this script." }
            } else {
                Print-Warning "Installer exited with code $($proc.ExitCode)."
                if (-not (Test-IsAdmin)) { Print-Info "Try re-running as Administrator." }
            }
        } catch { Print-Warning "Direct download failed: $_" }
        finally { Remove-Item -Force $installerPath -ErrorAction SilentlyContinue }
    }

    if (-not $installed) {
        Print-OnyxError "Could not install Docker Desktop automatically."
        Print-Info "Install manually: https://docs.docker.com/desktop/install/windows-install/"
        exit 1
    }

    Refresh-PathFromRegistry
    if (-not (Get-Command docker -ErrorAction SilentlyContinue)) {
        Print-OnyxError "Docker installed but 'docker' command not available. Restart your terminal and re-run."
        exit 1
    }
    if (-not (Start-DockerDaemon)) {
        Print-OnyxError "Docker Desktop installed but could not be started. Launch it from the Start Menu and re-run."
        exit 1
    }
    Print-Success "Docker Desktop installed and running"
}

function Invoke-WslInstall {
    Print-Info "Native Docker on Windows Server only supports Windows containers."
    Print-Info "Onyx will be installed via WSL2 (Windows Subsystem for Linux)."
    if (-not (Confirm-Action "Onyx via WSL2 (installs WSL2 + Ubuntu + Docker inside Linux)")) { exit 1 }
    if (-not (Test-IsAdmin)) { Invoke-ElevatedRelaunch }

    # Free memory by stopping the Windows Docker service (not needed once we use WSL2)
    $svc = Get-Service docker -ErrorAction SilentlyContinue
    if ($svc -and $svc.Status -eq 'Running') {
        Print-Info "Stopping Windows Docker service to free memory for WSL2..."
        Stop-Service docker -Force -ErrorAction SilentlyContinue
        Start-Sleep -Seconds 3
    }

    # Check available memory before proceeding
    try {
        $os = Get-CimInstance Win32_OperatingSystem
        $freeGB = [math]::Round($os.FreePhysicalMemory / 1MB, 1)
        $totalGB = [math]::Round($os.TotalVisibleMemorySize / 1MB, 1)
        Print-Info "System memory: ${totalGB}GB total, ${freeGB}GB free"
        if ($totalGB -lt 4) {
            Print-OnyxError "Onyx requires at least 4GB RAM (Lite mode) or 10GB RAM (Standard mode)."
            Print-Info "This machine has ${totalGB}GB total. Consider using a larger instance."
            exit 1
        }
    } catch {}

    # Ensure WSL2 is available
    Invoke-NativeQuiet { wsl --status }
    if ($LASTEXITCODE -ne 0) {
        if (-not (Confirm-Action "WSL2 (Windows Subsystem for Linux)")) { exit 1 }
        Print-Info "Installing WSL2..."
        try {
            $proc = Start-Process wsl -ArgumentList "--install", "--no-distribution" -Wait -PassThru -NoNewWindow
            if ($proc.ExitCode -ne 0) {
                Print-OnyxError "WSL2 installation failed (code $($proc.ExitCode)). A reboot may be needed."
                Print-Info "After rebooting, re-run this script."
                exit 1
            }
        } catch {
            Print-OnyxError "Could not install WSL2: $_"
            exit 1
        }
    }
    Print-Success "WSL2 is available"

    # Ensure Ubuntu is installed in WSL
    $distros = (Invoke-NativeQuiet -PassThru { wsl -l -q }) -join "`n"
    if ($distros -notmatch "Ubuntu") {
        Print-Info "Installing Ubuntu in WSL2..."
        $proc = Start-Process wsl -ArgumentList "--install", "-d", "Ubuntu" -Wait -PassThru -NoNewWindow
        if ($proc.ExitCode -ne 0) {
            Print-OnyxError "Ubuntu installation failed. Try manually: wsl --install -d Ubuntu"
            Print-Info "If this is a memory error, this machine needs at least 4GB RAM."
            exit 1
        }
    }
    Print-Success "Ubuntu is available in WSL2"

    # Build the install.sh invocation to run inside WSL2
    Print-Info "Handing off to the Linux install script inside WSL2..."
    $bashArgs = @()
    if ($script:LiteMode) { $bashArgs += "--lite" }
    if ($script:IncludeCraftMode) { $bashArgs += "--include-craft" }
    if ($NoPrompt) { $bashArgs += "--no-prompt" }
    if ($ShowVerbose) { $bashArgs += "--verbose" }

    $installUrl = "$($script:GitHubRawUrl)/install.sh"
    $bashCmd = "curl -fsSL '$installUrl' | bash -s -- $($bashArgs -join ' ')"
    Print-Info "Running: $bashCmd"
    wsl -d Ubuntu -- bash -c $bashCmd
    $wslExit = $LASTEXITCODE

    if ($wslExit -eq 0) {
        Print-Success "Onyx installation complete (via WSL2)"
        # Determine the port Onyx is running on inside WSL
        Print-Info "Onyx should be accessible at http://localhost:3000"
        Print-Info "WSL2 automatically forwards ports to the Windows host."
    } else {
        Print-OnyxError "Installation inside WSL2 exited with code $wslExit"
        Print-Info "You can debug by running: wsl -d Ubuntu"
    }
    exit $wslExit
}

function Install-Docker {
    if ($script:IsWindowsServer) { Install-DockerEngine } else { Install-DockerDesktop }
}

# ── Main Installation Flow ───────────────────────────────────────────────────

function Main {
    if ($Help) { Show-OnyxHelp; return }
    if ($PSVersionTable.PSVersion.Major -lt 5) { Print-OnyxError "PowerShell 5+ required (found $($PSVersionTable.PSVersion))"; exit 1 }
    if ($script:LiteMode -and $script:IncludeCraftMode) {
        Print-OnyxError "-Lite and -IncludeCraft cannot be used together."
        exit 1
    }
    if ($script:LiteMode) { $script:ExpectedDockerRamGB = 4; $script:ExpectedDiskGB = 16 }
    if ($Shutdown)   { Invoke-OnyxShutdown; return }
    if ($DeleteData) { Invoke-OnyxDeleteData; return }

    if (-not (Get-Command docker -ErrorAction SilentlyContinue)) { Install-Docker }

    # Banner
    $edition = if ($script:IsWindowsServer) { "Windows Server" } else { "Windows Desktop" }
    Write-Host "`n   ____`n  / __ \`n | |  | |_ __  _   ___  __`n | |  | | '_ \| | | \ \/ /`n | |__| | | | | |_| |>  < `n  \____/|_| |_|\__, /_/\_\`n                __/ |`n               |___/" -ForegroundColor Cyan
    Write-Host "Welcome to Onyx Installation Script (Windows)"
    Write-Host "=============================================="
    Print-Success "$edition detected"
    Write-Host "This script will:" -ForegroundColor Yellow
    Write-Host "1. Download deployment files for Onyx into a new '$($script:InstallRoot)' directory"
    Write-Host "2. Check your system resources (Docker, memory, disk space)"
    Write-Host "3. Guide you through deployment options (version, authentication)"

    if (Test-Interactive) {
        Write-Host "`nPlease acknowledge and press Enter to continue..." -ForegroundColor Yellow
        $null = Prompt-OrDefault "" ""
    } else {
        Write-Host "`nRunning in non-interactive mode - proceeding automatically..." -ForegroundColor Yellow
    }

    if ($DryRun) {
        Print-Info "Dry run mode - showing what would happen:"
        Write-Host "  - Install root: $($script:InstallRoot)  Lite: $($script:LiteMode)  Craft: $($script:IncludeCraftMode)"
        Write-Host "  - OS: Windows $([System.Environment]::OSVersion.Version)  PS: $($PSVersionTable.PSVersion)"
        Print-Success "Dry run complete (no changes made)"
        return
    }
    if ($ShowVerbose) { Print-Info "Verbose mode enabled" }

    # ── Step 1: Verify Docker ─────────────────────────────────────────────
    Print-Step "Verifying Docker installation"
    if (-not (Get-Command docker -ErrorAction SilentlyContinue)) { Install-Docker }
    $dockerVersion = Get-NativeVersionString { docker --version }
    Print-Success "Docker $dockerVersion is installed"

    if (-not (Initialize-ComposeCommand)) {
        if ($script:IsWindowsServer) {
            Install-ComposePlugin
            if (-not (Initialize-ComposeCommand)) { Print-OnyxError "Docker Compose could not be installed."; exit 1 }
        } else {
            Print-OnyxError "Docker Compose is not installed. Docker Desktop includes it."
            Print-Info "Visit: https://docs.docker.com/desktop/install/windows-install/"
            exit 1
        }
    }
    $composeVersion = Get-NativeVersionString { if ($script:ComposeCmdType -eq "plugin") { docker compose version } else { docker-compose --version } }
    Print-Success "Docker Compose $composeVersion is installed ($($script:ComposeCmdType))"

    Invoke-NativeQuiet { docker info }
    if ($LASTEXITCODE -ne 0) {
        $label = if ($script:IsWindowsServer) { "Docker service" } else { "Docker Desktop" }
        Print-Info "Docker daemon is not running. Starting $label..."
        if (-not (Start-DockerDaemon)) { Print-OnyxError "Could not start Docker. Start it manually and re-run."; exit 1 }
    }
    Print-Success "Docker daemon is running"
    if ($script:IsWindowsServer) { Fix-DockerCredStore }

    # Verify Docker is running Linux containers (Onyx images are Linux-based)
    $osType = ((Invoke-NativeQuiet -PassThru { docker info --format '{{.OSType}}' }) -join "").Trim()
    if ($osType -eq "windows") {
        Print-Warning "Docker is running in Windows containers mode, but Onyx requires Linux containers."
        $switchCli = Join-Path $env:ProgramFiles "Docker\Docker\DockerCli.exe"
        $switched = $false
        if (Test-Path $switchCli) {
            Print-Info "Attempting to switch to Linux containers via DockerCli..."
            try { & $switchCli -SwitchLinuxEngine 2>$null } catch {}
            Start-Sleep -Seconds 15
            for ($w = 0; $w -lt 12; $w++) {
                Invoke-NativeQuiet { docker info }
                if ($LASTEXITCODE -eq 0) { break }
                Start-Sleep -Seconds 5
            }
            $osType2 = ((Invoke-NativeQuiet -PassThru { docker info --format '{{.OSType}}' }) -join "").Trim()
            $switched = ($osType2 -eq "linux")
        }
        if ($switched) {
            Print-Success "Switched to Linux containers"
        } else {
            Print-Info "Native Docker on Windows Server only supports Windows containers."
            Print-Info "Switching to WSL2 approach for Linux container support..."
            Invoke-WslInstall
        }
    }

    # ── Step 2: Verify Resources ──────────────────────────────────────────
    Print-Step "Verifying Docker resources"
    $memoryMB = Get-DockerMemoryMB
    if ($memoryMB -gt 0) {
        $memoryGB = [math]::Round($memoryMB / 1024, 1)
        $memoryDisplay = if ($memoryGB -ge 1) { "~${memoryGB}GB" } else { "${memoryMB}MB" }
        Print-Info "Docker memory allocation: $memoryDisplay"
    } else {
        Print-Warning "Could not determine memory allocation"
        $memoryDisplay = "unknown"
    }

    $diskAvailableGB = [math]::Floor((Get-PSDrive -Name (Get-Location).Drive.Name).Free / 1GB)
    Print-Info "Available disk space: ${diskAvailableGB}GB"

    $resourceWarning = $false
    if ($memoryMB -gt 0 -and $memoryMB -lt ($script:ExpectedDockerRamGB * 1024)) {
        Print-Warning "Less than $($script:ExpectedDockerRamGB)GB RAM available (found: $memoryDisplay)"
        $resourceWarning = $true
    }
    if ($diskAvailableGB -lt $script:ExpectedDiskGB) {
        Print-Warning "Less than $($script:ExpectedDiskGB)GB disk space available (found: ${diskAvailableGB}GB)"
        $resourceWarning = $true
    }
    if ($resourceWarning) {
        Print-Warning "Onyx recommends at least $($script:ExpectedDockerRamGB)GB RAM and $($script:ExpectedDiskGB)GB disk for standard mode."
        Print-Warning "Lite mode requires less (1-4GB RAM, 8-16GB disk) but has no vector database."
        $reply = (Prompt-OrDefault "Do you want to continue anyway? (Y/n)" "y").Trim().ToLower()
        if ($reply -notmatch '^y') { Print-Info "Installation cancelled."; exit 1 }
        Print-Info "Proceeding despite resource limitations..."
    }

    # ── Step 3: Create Directories ────────────────────────────────────────
    Print-Step "Creating directory structure"
    if (Test-Path $script:InstallRoot) { Print-Info "Using existing $($script:InstallRoot) directory" }
    $deploymentDir = Join-Path $script:InstallRoot "deployment"
    New-Item -ItemType Directory -Force -Path $deploymentDir | Out-Null
    New-Item -ItemType Directory -Force -Path (Join-Path $script:InstallRoot "data\nginx\local") | Out-Null
    Print-Success "Directory structure created"

    # ── Step 4: Download Config Files ─────────────────────────────────────
    if ($Local) { Print-Step "Verifying existing configuration files" }
    else { Print-Step "Downloading Onyx configuration files" }

    $composeDest = Join-Path $deploymentDir "docker-compose.yml"
    if (-not (Ensure-OnyxFile $composeDest "$($script:GitHubRawUrl)/docker-compose.yml" "docker-compose.yml")) { exit 1 }

    if ($composeVersion -ne "unknown" -and (Compare-SemVer $composeVersion "2.24.0") -lt 0) {
        Print-Warning "Docker Compose $composeVersion is older than 2.24.0 (required for env_file format)."
        Print-Info "Update Docker Desktop or install a newer Docker Compose. Installation may fail."
        $reply = (Prompt-OrDefault "Continue anyway? (Y/n)" "y").Trim().ToLower()
        if ($reply -notmatch '^y') { exit 1 }
    }

    $liteOverlayPath = Join-Path $deploymentDir $script:LiteComposeFile
    if ($script:LiteMode) {
        if (-not (Ensure-OnyxFile $liteOverlayPath "$($script:GitHubRawUrl)/$($script:LiteComposeFile)" $script:LiteComposeFile)) { exit 1 }
    }

    $envTemplateDest = Join-Path $deploymentDir "env.template"
    if (-not (Ensure-OnyxFile $envTemplateDest "$($script:GitHubRawUrl)/env.template" "env.template")) { exit 1 }
    if (-not (Ensure-OnyxFile (Join-Path $script:InstallRoot "data\nginx\app.conf.template") "$($script:NginxBaseUrl)/app.conf.template" "nginx/app.conf.template")) { exit 1 }
    if (-not (Ensure-OnyxFile (Join-Path $script:InstallRoot "data\nginx\run-nginx.sh") "$($script:NginxBaseUrl)/run-nginx.sh" "nginx/run-nginx.sh")) { exit 1 }
    if (-not (Ensure-OnyxFile (Join-Path $script:InstallRoot "README.md") "$($script:GitHubRawUrl)/README.md" "README.md")) { exit 1 }

    $gitkeep = Join-Path $script:InstallRoot "data\nginx\local\.gitkeep"
    if (-not (Test-Path $gitkeep)) { New-Item -ItemType File -Force -Path $gitkeep | Out-Null }
    Print-Success "All configuration files ready"

    # ── Step 5: Deployment Config ─────────────────────────────────────────
    Print-Step "Setting up deployment configs"
    $envFile = Join-Path $deploymentDir ".env"

    # Check if services are already running
    if ((Test-Path $composeDest) -and (Initialize-ComposeCommand)) {
        $running = @()
        $psArgs = @("ps", "-q")
        try { $running = @(Invoke-Compose -AutoDetect @psArgs 2>$null | Where-Object { $_ }) } catch { }
        if ($running.Count -gt 0) {
            Print-OnyxError "Onyx services are currently running!"
            Print-Info "Run '.\install.ps1 -Shutdown' first, then re-run this script."
            exit 1
        }
    }

    $version = "latest"

    if (Test-Path $envFile) {
        Print-Info "Existing .env file found. What would you like to do?"
        Write-Host "  - Press Enter to restart with current configuration"
        Write-Host "  - Type 'update' to update to a newer version"
        $reply = Prompt-OrDefault "Choose an option [default: restart]" ""

        Prompt-DeploymentMode -LiteOverlayPath $liteOverlayPath

        if ($reply -eq "update") {
            $version = Prompt-VersionTag
            Assert-NotCraftLite $version
            Set-EnvFileValue -Path $envFile -Key "IMAGE_TAG" -Value $version
            Print-Success "Updated IMAGE_TAG to $version"
            if ($version -match '^craft-') {
                Set-EnvFileValue -Path $envFile -Key "ENABLE_CRAFT" -Value "true" -Uncomment
            }
        } else {
            Assert-NotCraftLite (Get-EnvFileValue -Path $envFile -Key "IMAGE_TAG")
            Print-Info "Keeping existing configuration"
        }
        if ($script:LiteMode) {
            $profiles = Get-EnvFileValue -Path $envFile -Key "COMPOSE_PROFILES"
            if ($profiles -and $profiles -match 's3-filestore') {
                Set-EnvFileValue -Path $envFile -Key "COMPOSE_PROFILES" -Value ""
            }
        }
    } else {
        Print-Info "No existing .env file found. Setting up new deployment..."
        Prompt-DeploymentMode -LiteOverlayPath $liteOverlayPath
        if ($script:LiteMode -and $script:IncludeCraftMode) {
            Print-OnyxError "-IncludeCraft cannot be used with Lite mode."
            exit 1
        }
        if ($script:LiteMode) { $script:ExpectedDockerRamGB = 4; $script:ExpectedDiskGB = 16 }

        $version = Prompt-VersionTag
        Assert-NotCraftLite $version

        Copy-Item -Path $envTemplateDest -Destination $envFile -Force
        Set-EnvFileValue -Path $envFile -Key "IMAGE_TAG" -Value $version
        Print-Success "IMAGE_TAG set to $version"
        if ($script:LiteMode) { Set-EnvFileValue -Path $envFile -Key "COMPOSE_PROFILES" -Value "" }
        Set-EnvFileValue -Path $envFile -Key "AUTH_TYPE" -Value "basic"
        Print-Success "Basic authentication enabled"
        Set-EnvFileValue -Path $envFile -Key "USER_AUTH_SECRET" -Value "`"$(New-SecureSecret)`""
        Print-Success "Generated secure USER_AUTH_SECRET"
        if ($script:IncludeCraftMode -or $version -match '^craft-') {
            Set-EnvFileValue -Path $envFile -Key "ENABLE_CRAFT" -Value "true" -Uncomment
            Print-Success "Onyx Craft enabled"
        } else {
            Print-Info "Onyx Craft disabled (use -IncludeCraft to enable)"
        }
        Print-Success ".env file created"
        Print-Info "You can customize .env later for OAuth/SAML, AI models, domain settings, and Craft."
    }

    # Clean up stale lite overlay if standard mode was selected
    if (-not $script:LiteMode -and (Test-Path $liteOverlayPath)) {
        Remove-Item -Force $liteOverlayPath
        Print-Info "Removed previous lite overlay (switching to standard mode)"
    }

    # ── Step 6: Check Ports ───────────────────────────────────────────────
    Print-Step "Checking for available ports"
    $availablePort = Find-AvailablePort 3000
    if ($availablePort -ne 3000) { Print-Info "Port 3000 in use, using port $availablePort" }
    else { Print-Info "Port 3000 is available" }
    $env:HOST_PORT = $availablePort
    Print-Success "Using port $availablePort for nginx"

    $currentImageTag = Get-EnvFileValue -Path $envFile -Key "IMAGE_TAG"
    $useLatest = ($currentImageTag -eq "edge" -or $currentImageTag -eq "latest" -or $currentImageTag -match '^craft-')
    if ($useLatest) { Print-Info "Using '$currentImageTag' tag - will force pull and recreate containers" }

    # For pinned version tags, re-download config files from that tag so the
    # compose file matches the images being pulled (the initial download used main).
    if (-not $useLatest -and -not $Local) {
        $pinnedBase = "https://raw.githubusercontent.com/onyx-dot-app/onyx/$currentImageTag/deployment"
        Print-Info "Fetching config files matching tag $currentImageTag..."
        try {
            Download-OnyxFile "$pinnedBase/docker_compose/docker-compose.yml" $composeDest
            try { Download-OnyxFile "$pinnedBase/data/nginx/app.conf.template" (Join-Path $script:InstallRoot "data\nginx\app.conf.template") } catch {}
            try { Download-OnyxFile "$pinnedBase/data/nginx/run-nginx.sh" (Join-Path $script:InstallRoot "data\nginx\run-nginx.sh") } catch {}
            if ($script:LiteMode) {
                try { Download-OnyxFile "$pinnedBase/docker_compose/$($script:LiteComposeFile)" $liteOverlayPath } catch {}
            }
            Print-Success "Config files updated to match $currentImageTag"
        } catch {
            Print-Warning "Tag $currentImageTag not found on GitHub - using main branch configs"
        }
    }

    # ── Step 7: Pull Images ───────────────────────────────────────────────
    Print-Step "Pulling Docker images"
    Print-Info "This may take several minutes depending on your internet connection..."

    $pullArgs = @("pull"); if (-not $ShowVerbose) { $pullArgs += "--quiet" }
    if ((Invoke-Compose @pullArgs) -ne 0) { Print-OnyxError "Failed to download Docker images"; exit 1 }
    Print-Success "Docker images downloaded successfully"

    # ── Step 8: Start Services ────────────────────────────────────────────
    Print-Step "Starting Onyx services"
    Print-Info "Launching containers..."
    $upArgs = @("up", "-d")
    if ($useLatest) { $upArgs += @("--pull", "always", "--force-recreate") }
    $upResult = Invoke-Compose @upArgs
    if ($upResult -ne 0) { Print-OnyxError "Failed to start Onyx services"; exit 1 }

    # ── Step 9: Container Health ──────────────────────────────────────────
    Print-Step "Verifying container health"
    Start-Sleep -Seconds 10
    $restartIssues = $false
    $containerIds = @()
    $psArgs = @("ps", "-q")
    try { $containerIds = @(Invoke-Compose @psArgs 2>$null | Where-Object { $_ }) } catch { }

    foreach ($cid in $containerIds) {
        if ([string]::IsNullOrWhiteSpace($cid)) { continue }
        $name = (& docker inspect --format '{{.Name}}' $cid 2>$null).TrimStart('/')
        $restarts = 0; try { $restarts = [int](& docker inspect --format '{{.RestartCount}}' $cid 2>$null) } catch { }
        $status = & docker inspect --format '{{.State.Status}}' $cid 2>$null
        if ($status -eq "running" -and $restarts -gt 2) {
            Print-OnyxError "$name is in a restart loop (restarted $restarts times)"; $restartIssues = $true
        } elseif ($status -eq "running") { Print-Success "$name is healthy" }
        elseif ($status -eq "restarting") { Print-OnyxError "$name is stuck restarting"; $restartIssues = $true }
        else { Print-Warning "$name status: $status" }
    }

    if ($restartIssues) {
        Print-OnyxError "Some containers are experiencing issues!"
        $cmd = if ($script:ComposeCmdType -eq "plugin") { "docker compose" } else { "docker-compose" }
        Print-Info "Check logs: cd `"$(Join-Path $script:InstallRoot 'deployment')`" && $cmd $((Get-ComposeFileArgs) -join ' ') logs"
        Print-Info "For help, contact: founders@onyx.app"
        exit 1
    }

    # ── Step 10: Complete ─────────────────────────────────────────────────
    Print-Step "Installation Complete!"
    Print-Success "All containers are running successfully!"
    $port = if ($env:HOST_PORT) { $env:HOST_PORT } else { 3000 }

    if (Test-OnyxHealth -Port $port) {
        Write-Host "============================================" -ForegroundColor Green
        Write-Host "   Onyx service is ready!                   " -ForegroundColor Green
        Write-Host "============================================" -ForegroundColor Green
    } else {
        Print-Warning "Health check timed out after 10 minutes"
        Print-Info "Containers are running, but the web service may still be initializing."
        Write-Host "============================================" -ForegroundColor Yellow
        Write-Host "   Onyx containers are running              " -ForegroundColor Yellow
        Write-Host "============================================" -ForegroundColor Yellow
    }

    Print-Info "Access Onyx at: http://localhost:$port"
    Print-Info "Visit http://localhost:$port/auth/signup to create your admin account"
    Print-Info "The first user created will automatically have admin privileges"

    if ($script:LiteMode) {
        Print-Info "Running in Lite mode - Vespa, Redis, model servers, and background workers are NOT started."
        Print-Info "Connectors and RAG search are disabled. LLM chat, tools, Projects still work."
    }

    Print-Info "See the README in $($script:InstallRoot) for more information."
    Print-Info "For help or issues, contact: founders@onyx.app"
}

Main


================================================
FILE: deployment/docker_compose/install.sh
================================================
#!/bin/bash

set -euo pipefail

# Expected resource requirements (overridden below if --lite)
EXPECTED_DOCKER_RAM_GB=10
EXPECTED_DISK_GB=32

# Parse command line arguments
SHUTDOWN_MODE=false
DELETE_DATA_MODE=false
INCLUDE_CRAFT=false  # Disabled by default, use --include-craft to enable
LITE_MODE=false       # Disabled by default, use --lite to enable
USE_LOCAL_FILES=false # Disabled by default, use --local to skip downloading config files
NO_PROMPT=false
DRY_RUN=false
VERBOSE=false

while [[ $# -gt 0 ]]; do
    case $1 in
        --shutdown)
            SHUTDOWN_MODE=true
            shift
            ;;
        --delete-data)
            DELETE_DATA_MODE=true
            shift
            ;;
        --include-craft)
            INCLUDE_CRAFT=true
            shift
            ;;
        --lite)
            LITE_MODE=true
            shift
            ;;
        --local)
            USE_LOCAL_FILES=true
            shift
            ;;
        --no-prompt)
            NO_PROMPT=true
            shift
            ;;
        --dry-run)
            DRY_RUN=true
            shift
            ;;
        --verbose)
            VERBOSE=true
            shift
            ;;
        --help|-h)
            echo "Onyx Installation Script"
            echo ""
            echo "Usage: $0 [OPTIONS]"
            echo ""
            echo "Options:"
            echo "  --include-craft  Enable Onyx Craft (AI-powered web app building)"
            echo "  --lite           Deploy Onyx Lite (no Vespa, Redis, or model servers)"
            echo "  --local          Use existing config files instead of downloading from GitHub"
            echo "  --shutdown       Stop (pause) Onyx containers"
            echo "  --delete-data    Remove all Onyx data (containers, volumes, and files)"
            echo "  --no-prompt      Run non-interactively with defaults (for CI/automation)"
            echo "  --dry-run        Show what would be done without making changes"
            echo "  --verbose        Show detailed output for debugging"
            echo "  --help, -h       Show this help message"
            echo ""
            echo "Examples:"
            echo "  $0                    # Install Onyx"
            echo "  $0 --lite             # Install Onyx Lite (minimal deployment)"
            echo "  $0 --include-craft    # Install Onyx with Craft enabled"
            echo "  $0 --shutdown         # Pause Onyx services"
            echo "  $0 --delete-data      # Completely remove Onyx and all data"
            echo "  $0 --local            # Re-run using existing config files on disk"
            echo "  $0 --no-prompt        # Non-interactive install with defaults"
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            echo "Use --help for usage information"
            exit 1
            ;;
    esac
done

if [[ "$VERBOSE" = true ]]; then
    set -x
fi

if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
    echo "ERROR: --lite and --include-craft cannot be used together."
    echo "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
    exit 1
fi

# When --lite is passed as a flag, lower resource thresholds early (before the
# resource check). When lite is chosen interactively, the thresholds are adjusted
# after the resource check has already passed with the standard thresholds —
# which is the safer direction.
if [[ "$LITE_MODE" = true ]]; then
    EXPECTED_DOCKER_RAM_GB=4
    EXPECTED_DISK_GB=16
fi

INSTALL_ROOT="${INSTALL_PREFIX:-onyx_data}"

LITE_COMPOSE_FILE="docker-compose.onyx-lite.yml"

# Build the -f flags for docker compose.
# Pass "true" as $1 to auto-detect a previously-downloaded lite overlay
# (used by shutdown/delete-data so users don't need to remember --lite).
compose_file_args() {
    local auto_detect="${1:-false}"
    local args="-f docker-compose.yml"
    if [[ "$LITE_MODE" = true ]] || { [[ "$auto_detect" = true ]] && [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; }; then
        args="$args -f ${LITE_COMPOSE_FILE}"
    fi
    echo "$args"
}

# --- Downloader detection (curl with wget fallback) ---
DOWNLOADER=""
detect_downloader() {
    if command -v curl &> /dev/null; then
        DOWNLOADER="curl"
        return 0
    fi
    if command -v wget &> /dev/null; then
        DOWNLOADER="wget"
        return 0
    fi
    echo "ERROR: Neither curl nor wget found. Please install one and retry."
    exit 1
}
detect_downloader

download_file() {
    local url="$1"
    local output="$2"
    if [[ "$DOWNLOADER" == "curl" ]]; then
        curl -fsSL --retry 3 --retry-delay 2 --retry-connrefused -o "$output" "$url"
    else
        wget -q --tries=3 --timeout=20 -O "$output" "$url"
    fi
}

# Ensures a required file is present. With --local, verifies the file exists on
# disk. Otherwise, downloads it from the given URL. Returns 0 on success, 1 on
# failure (caller should handle the exit).
ensure_file() {
    local path="$1"
    local url="$2"
    local desc="$3"

    if [[ "$USE_LOCAL_FILES" = true ]]; then
        if [[ -f "$path" ]]; then
            print_success "Using existing ${desc}"
            return 0
        fi
        print_error "Required file missing: ${desc} (${path})"
        return 1
    fi

    print_info "Downloading ${desc}..."
    if download_file "$url" "$path" 2>/dev/null; then
        print_success "${desc} downloaded"
        return 0
    fi
    print_error "Failed to download ${desc}"
    print_info "Please ensure you have internet connection and try again"
    return 1
}

# --- Interactive prompt helpers ---
is_interactive() {
    [[ "$NO_PROMPT" = false ]] && [[ -r /dev/tty ]] && [[ -w /dev/tty ]]
}

read_prompt_line() {
    local prompt_text="$1"
    if ! is_interactive; then
        REPLY=""
        return
    fi
    [[ -n "$prompt_text" ]] && printf "%s" "$prompt_text" > /dev/tty
    IFS= read -r REPLY < /dev/tty || REPLY=""
}

read_prompt_char() {
    local prompt_text="$1"
    if ! is_interactive; then
        REPLY=""
        return
    fi
    [[ -n "$prompt_text" ]] && printf "%s" "$prompt_text" > /dev/tty
    IFS= read -r -n 1 REPLY < /dev/tty || REPLY=""
    printf "\n" > /dev/tty
}

prompt_or_default() {
    local prompt_text="$1"
    local default_value="$2"
    read_prompt_line "$prompt_text"
    [[ -z "$REPLY" ]] && REPLY="$default_value"
    return 0
}

prompt_yn_or_default() {
    local prompt_text="$1"
    local default_value="$2"
    read_prompt_char "$prompt_text"
    [[ -z "$REPLY" ]] && REPLY="$default_value"
    return 0
}

confirm_action() {
    local description="$1"
    prompt_yn_or_default "Install ${description}? (Y/n) [default: Y] " "Y"
    if [[ "$REPLY" =~ ^[Nn] ]]; then
        print_warning "Skipping: ${description}"
        return 1
    fi
    return 0
}

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
BOLD='\033[1m'
NC='\033[0m' # No Color

# Step counter variables
CURRENT_STEP=0
TOTAL_STEPS=10

# Print colored output
print_success() {
    echo -e "${GREEN}✓${NC} $1"
}

print_error() {
    echo -e "${RED}✗${NC} $1"
}

print_info() {
    echo -e "${YELLOW}ℹ${NC} $1"
}

print_step() {
    CURRENT_STEP=$((CURRENT_STEP + 1))
    echo ""
    echo -e "${BLUE}${BOLD}=== $1 - Step ${CURRENT_STEP}/${TOTAL_STEPS} ===${NC}"
    echo ""
}

print_warning() {
    echo -e "${YELLOW}⚠${NC}  $1"
}

# Handle shutdown mode
if [ "$SHUTDOWN_MODE" = true ]; then
    echo ""
    echo -e "${BLUE}${BOLD}=== Shutting down Onyx ===${NC}"
    echo ""
    
    if [ -d "${INSTALL_ROOT}/deployment" ]; then
        print_info "Stopping Onyx containers..."

        # Check if docker-compose.yml exists
        if [ -f "${INSTALL_ROOT}/deployment/docker-compose.yml" ]; then
            # Determine compose command
            if docker compose version &> /dev/null; then
                COMPOSE_CMD="docker compose"
            elif command -v docker-compose &> /dev/null; then
                COMPOSE_CMD="docker-compose"
            else
                print_error "Docker Compose not found. Cannot stop containers."
                exit 1
            fi

            # Stop containers (without removing them)
            (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) stop)
            if [ $? -eq 0 ]; then
                print_success "Onyx containers stopped (paused)"
            else
                print_error "Failed to stop containers"
                exit 1
            fi
        else
            print_warning "docker-compose.yml not found in ${INSTALL_ROOT}/deployment"
        fi
    else
        print_warning "Onyx data directory not found. Nothing to shutdown."
    fi

    echo ""
    print_success "Onyx shutdown complete!"
    exit 0
fi

# Handle delete data mode
if [ "$DELETE_DATA_MODE" = true ]; then
    echo ""
    echo -e "${RED}${BOLD}=== WARNING: This will permanently delete all Onyx data ===${NC}"
    echo ""
    print_warning "This action will remove:"
    echo "  • All Onyx containers and volumes"
    echo "  • All downloaded files and configurations"
    echo "  • All user data and documents"
    echo ""
    if is_interactive; then
        prompt_or_default "Are you sure you want to continue? Type 'DELETE' to confirm: " ""
        echo "" > /dev/tty
        if [ "$REPLY" != "DELETE" ]; then
            print_info "Operation cancelled."
            exit 0
        fi
    else
        print_error "Cannot confirm destructive operation in non-interactive mode."
        print_info "Run interactively or remove the ${INSTALL_ROOT} directory manually."
        exit 1
    fi

    print_info "Removing Onyx containers and volumes..."

    if [ -d "${INSTALL_ROOT}/deployment" ]; then
        # Check if docker-compose.yml exists
        if [ -f "${INSTALL_ROOT}/deployment/docker-compose.yml" ]; then
            # Determine compose command
            if docker compose version &> /dev/null; then
                COMPOSE_CMD="docker compose"
            elif command -v docker-compose &> /dev/null; then
                COMPOSE_CMD="docker-compose"
            else
                print_error "Docker Compose not found. Cannot remove containers."
                exit 1
            fi

            # Stop and remove containers with volumes
            (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) down -v)
            if [ $? -eq 0 ]; then
                print_success "Onyx containers and volumes removed"
            else
                print_error "Failed to remove containers and volumes"
            fi
        fi
    fi

    print_info "Removing data directories..."
    if [ -d "${INSTALL_ROOT}" ]; then
        rm -rf "${INSTALL_ROOT}"
        print_success "Data directories removed"
    else
        print_warning "No ${INSTALL_ROOT} directory found"
    fi

    echo ""
    print_success "All Onyx data has been permanently deleted!"
    exit 0
fi

# --- Auto-install Docker (Linux only) ---
# Runs before the banner so a group-based re-exec doesn't repeat it.
install_docker_linux() {
    local distro_id=""
    if [[ -f /etc/os-release ]]; then
        distro_id="$(. /etc/os-release && echo "${ID:-}")"
    fi

    case "$distro_id" in
        amzn)
            print_info "Detected Amazon Linux — installing Docker via package manager..."
            if command -v dnf &> /dev/null; then
                sudo dnf install -y docker
            else
                sudo yum install -y docker
            fi
            ;;
        *)
            print_info "Installing Docker via get.docker.com..."
            download_file "https://get.docker.com" /tmp/get-docker.sh
            sudo sh /tmp/get-docker.sh
            rm -f /tmp/get-docker.sh
            ;;
    esac

    sudo systemctl start docker 2>/dev/null || sudo service docker start 2>/dev/null || true
    sudo systemctl enable docker 2>/dev/null || true
}

# Detect OS (including WSL)
IS_WSL=false
if [[ -n "${WSL_DISTRO_NAME:-}" ]] || grep -qi microsoft /proc/version 2>/dev/null; then
    IS_WSL=true
fi

# Dry-run: show plan and exit
if [[ "$DRY_RUN" = true ]]; then
    print_info "Dry run mode — showing what would happen:"
    echo "  • Install root: ${INSTALL_ROOT}"
    echo "  • Lite mode: ${LITE_MODE}"
    echo "  • Include Craft: ${INCLUDE_CRAFT}"
    echo "  • OS type: ${OSTYPE:-unknown} (WSL: ${IS_WSL})"
    echo "  • Downloader: ${DOWNLOADER}"
    echo ""
    print_success "Dry run complete (no changes made)"
    exit 0
fi

if ! command -v docker &> /dev/null; then
    if [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; then
        print_info "Docker is required but not installed."
        if ! confirm_action "Docker Engine"; then
            print_error "Docker is required to run Onyx."
            exit 1
        fi
        install_docker_linux
        if ! command -v docker &> /dev/null; then
            print_error "Docker installation failed."
            echo "  Visit: https://docs.docker.com/get-docker/"
            exit 1
        fi
        print_success "Docker installed successfully"
    fi
fi

# --- Auto-install Docker Compose plugin (Linux only) ---
if command -v docker &> /dev/null \
    && ! docker compose version &> /dev/null \
    && ! command -v docker-compose &> /dev/null \
    && { [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; }; then

    print_info "Docker Compose is required but not installed."
    if ! confirm_action "Docker Compose plugin"; then
        print_error "Docker Compose is required to run Onyx."
        exit 1
    fi
    COMPOSE_ARCH="$(uname -m)"
    COMPOSE_URL="https://github.com/docker/compose/releases/latest/download/docker-compose-linux-${COMPOSE_ARCH}"
    COMPOSE_DIR="/usr/local/lib/docker/cli-plugins"
    COMPOSE_TMP="$(mktemp)"
    sudo mkdir -p "$COMPOSE_DIR"
    if download_file "$COMPOSE_URL" "$COMPOSE_TMP"; then
        sudo mv "$COMPOSE_TMP" "$COMPOSE_DIR/docker-compose"
        sudo chmod +x "$COMPOSE_DIR/docker-compose"
        if docker compose version &> /dev/null; then
            print_success "Docker Compose plugin installed"
        else
            print_error "Docker Compose plugin installed but not detected."
            echo "  Visit: https://docs.docker.com/compose/install/"
            exit 1
        fi
    else
        rm -f "$COMPOSE_TMP"
        print_error "Failed to download Docker Compose plugin."
        echo "  Visit: https://docs.docker.com/compose/install/"
        exit 1
    fi
fi

# On Linux, ensure the current user can talk to the Docker daemon without
# sudo.  If necessary, add them to the "docker" group and re-exec the
# script under that group so the rest of the install proceeds normally.
if command -v docker &> /dev/null \
    && { [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; } \
    && [[ "$(id -u)" -ne 0 ]] \
    && ! docker info &> /dev/null; then
    if [[ "${_ONYX_REEXEC:-}" = "1" ]]; then
        print_error "Cannot connect to Docker after group re-exec."
        print_info "Log out and back in, then run the script again."
        exit 1
    fi
    if ! getent group docker &> /dev/null; then
        sudo groupadd docker
    fi
    print_info "Adding $USER to the docker group..."
    sudo usermod -aG docker "$USER"
    print_info "Re-launching with docker group active..."
    exec sg docker -c "_ONYX_REEXEC=1 bash $(printf '%q ' "$0" "$@")"
fi

# ASCII Art Banner
echo ""
echo -e "${BLUE}${BOLD}"
echo "  ____                    "
echo " / __ \                   "
echo "| |  | |_ __  _   ___  __ "
echo "| |  | | '_ \| | | \ \/ / "
echo "| |__| | | | | |_| |>  <  "
echo " \____/|_| |_|\__, /_/\_\ "
echo "               __/ |      "
echo "              |___/       "
echo -e "${NC}"
echo "Welcome to Onyx Installation Script"
echo "===================================="
echo ""

# User acknowledgment section
echo -e "${YELLOW}${BOLD}This script will:${NC}"
echo "1. Download deployment files for Onyx into a new '${INSTALL_ROOT}' directory"
echo "2. Check your system resources (Docker, memory, disk space)"
echo "3. Guide you through deployment options (version, authentication)"
echo ""

if is_interactive; then
    echo -e "${YELLOW}${BOLD}Please acknowledge and press Enter to continue...${NC}"
    read_prompt_line ""
    echo ""
else
    echo -e "${YELLOW}${BOLD}Running in non-interactive mode - proceeding automatically...${NC}"
    echo ""
fi

# GitHub repo base URL - using main branch
GITHUB_RAW_URL="https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose"

# Check system requirements
print_step "Verifying Docker installation"

# Check Docker
if ! command -v docker &> /dev/null; then
    print_error "Docker is not installed. Please install Docker first."
    echo "Visit: https://docs.docker.com/get-docker/"
    exit 1
fi
DOCKER_VERSION=$(docker --version | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)
print_success "Docker $DOCKER_VERSION is installed"

# Check Docker Compose
if docker compose version &> /dev/null; then
    COMPOSE_VERSION=$(docker compose version | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)
    COMPOSE_CMD="docker compose"
    if [ -z "$COMPOSE_VERSION" ]; then
        # Handle non-standard versions like "dev" - assume recent enough
        COMPOSE_VERSION="dev"
        print_success "Docker Compose (dev build) is installed (plugin)"
    else
        print_success "Docker Compose $COMPOSE_VERSION is installed (plugin)"
    fi
elif command -v docker-compose &> /dev/null; then
    COMPOSE_VERSION=$(docker-compose --version | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)
    COMPOSE_CMD="docker-compose"
    if [ -z "$COMPOSE_VERSION" ]; then
        COMPOSE_VERSION="dev"
        print_success "Docker Compose (dev build) is installed (standalone)"
    else
        print_success "Docker Compose $COMPOSE_VERSION is installed (standalone)"
    fi
else
    print_error "Docker Compose is not installed. Please install Docker Compose first."
    echo "Visit: https://docs.docker.com/compose/install/"
    exit 1
fi

# Returns 0 if $1 <= $2, 1 if $1 > $2
# Handles missing or non-numeric parts gracefully (treats them as 0)
version_compare() {
    local version1="${1:-0.0.0}"
    local version2="${2:-0.0.0}"

    local v1_major v1_minor v1_patch v2_major v2_minor v2_patch
    v1_major=$(echo "$version1" | cut -d. -f1)
    v1_minor=$(echo "$version1" | cut -d. -f2)
    v1_patch=$(echo "$version1" | cut -d. -f3)
    v2_major=$(echo "$version2" | cut -d. -f1)
    v2_minor=$(echo "$version2" | cut -d. -f2)
    v2_patch=$(echo "$version2" | cut -d. -f3)

    # Default non-numeric or empty parts to 0
    [[ "$v1_major" =~ ^[0-9]+$ ]] || v1_major=0
    [[ "$v1_minor" =~ ^[0-9]+$ ]] || v1_minor=0
    [[ "$v1_patch" =~ ^[0-9]+$ ]] || v1_patch=0
    [[ "$v2_major" =~ ^[0-9]+$ ]] || v2_major=0
    [[ "$v2_minor" =~ ^[0-9]+$ ]] || v2_minor=0
    [[ "$v2_patch" =~ ^[0-9]+$ ]] || v2_patch=0

    if [ "$v1_major" -lt "$v2_major" ]; then return 0
    elif [ "$v1_major" -gt "$v2_major" ]; then return 1; fi

    if [ "$v1_minor" -lt "$v2_minor" ]; then return 0
    elif [ "$v1_minor" -gt "$v2_minor" ]; then return 1; fi

    [ "$v1_patch" -le "$v2_patch" ]
}

# Check Docker daemon
if ! docker info &> /dev/null; then
    if [[ "$OSTYPE" == "darwin"* ]]; then
        print_info "Docker daemon is not running. Starting Docker Desktop..."
        open -a Docker
        # Wait up to 120 seconds for Docker to be ready
        DOCKER_WAIT=0
        DOCKER_MAX_WAIT=120
        while ! docker info &> /dev/null; do
            if [ $DOCKER_WAIT -ge $DOCKER_MAX_WAIT ]; then
                print_error "Docker Desktop did not start within ${DOCKER_MAX_WAIT} seconds."
                print_info "Please start Docker Desktop manually and re-run this script."
                exit 1
            fi
            printf "\r\033[KWaiting for Docker Desktop to start... (%ds)" "$DOCKER_WAIT"
            sleep 2
            DOCKER_WAIT=$((DOCKER_WAIT + 2))
        done
        echo ""
        print_success "Docker Desktop is now running"
    else
        print_error "Docker daemon is not running. Please start Docker."
        exit 1
    fi
else
    print_success "Docker daemon is running"
fi

# Check Docker resources
print_step "Verifying Docker resources"

# Get Docker system info
DOCKER_INFO=$(docker system info 2>/dev/null)

# Try to get memory allocation (method varies by platform)
if [[ "$OSTYPE" == "darwin"* ]]; then
    # macOS - Docker Desktop
    if command -v jq &> /dev/null && [ -f ~/Library/Group\ Containers/group.com.docker/settings.json ]; then
        MEMORY_MB=$(cat ~/Library/Group\ Containers/group.com.docker/settings.json 2>/dev/null | jq '.memoryMiB // 0' 2>/dev/null || echo "0")
    else
        # Try to get from docker system info
        MEMORY_BYTES=$(docker system info 2>/dev/null | grep -i "total memory" | grep -oE '[0-9]+\.[0-9]+' | head -1)
        if [ -n "$MEMORY_BYTES" ]; then
            # Convert from GiB to MB (multiply by 1024)
            MEMORY_MB=$(echo "$MEMORY_BYTES * 1024" | bc 2>/dev/null | cut -d. -f1)
            if [ -z "$MEMORY_MB" ]; then
                MEMORY_MB="0"
            fi
        else
            MEMORY_MB="0"
        fi
    fi
else
    # Linux - Native Docker
    MEMORY_KB=$(grep MemTotal /proc/meminfo | grep -oE '[0-9]+' || echo "0")
    MEMORY_MB=$((MEMORY_KB / 1024))
fi

# Convert to GB for display
if [ "$MEMORY_MB" -gt 0 ]; then
    MEMORY_GB=$(awk "BEGIN {printf \"%.1f\", $MEMORY_MB / 1024}")
    if [ "$(awk "BEGIN {print ($MEMORY_MB >= 1024)}")" = "1" ]; then
        MEMORY_DISPLAY="~${MEMORY_GB}GB"
    else
        MEMORY_DISPLAY="${MEMORY_MB}MB"
    fi
    if [[ "$OSTYPE" == "darwin"* ]]; then
        print_info "Docker memory allocation: ${MEMORY_DISPLAY}"
    else
        print_info "System memory: ${MEMORY_DISPLAY} (Docker uses host memory directly)"
    fi
else
    print_warning "Could not determine memory allocation"
    MEMORY_DISPLAY="unknown"
    MEMORY_MB=0
fi

# Check disk space (different commands for macOS vs Linux)
if [[ "$OSTYPE" == "darwin"* ]]; then
    # macOS uses -g for GB
    DISK_AVAILABLE=$(df -g . | awk 'NR==2 {print $4}')
else
    # Linux uses -BG for GB
    DISK_AVAILABLE=$(df -BG . | awk 'NR==2 {print $4}' | sed 's/G//')
fi
print_info "Available disk space: ${DISK_AVAILABLE}GB"

# Resource requirements check
RESOURCE_WARNING=false
EXPECTED_RAM_MB=$((EXPECTED_DOCKER_RAM_GB * 1024))

if [ "$MEMORY_MB" -gt 0 ] && [ "$MEMORY_MB" -lt "$EXPECTED_RAM_MB" ]; then
    print_warning "Less than ${EXPECTED_DOCKER_RAM_GB}GB RAM available (found: ${MEMORY_DISPLAY})"
    RESOURCE_WARNING=true
fi

if [ "$DISK_AVAILABLE" -lt "$EXPECTED_DISK_GB" ]; then
    print_warning "Less than ${EXPECTED_DISK_GB}GB disk space available (found: ${DISK_AVAILABLE}GB)"
    RESOURCE_WARNING=true
fi

if [ "$RESOURCE_WARNING" = true ]; then
    echo ""
    print_warning "Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance in standard mode."
    print_warning "Lite mode requires less resources (1-4GB RAM, 8-16GB disk depending on usage), but does not include a vector database."
    echo ""
    prompt_yn_or_default "Do you want to continue anyway? (Y/n): " "y"
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
        print_info "Installation cancelled. Please allocate more resources and try again."
        exit 1
    fi
    print_info "Proceeding with installation despite resource limitations..."
fi

# Create directory structure
print_step "Creating directory structure"
if [ -d "${INSTALL_ROOT}" ]; then
    print_info "Directory structure already exists"
    print_success "Using existing ${INSTALL_ROOT} directory"
fi
mkdir -p "${INSTALL_ROOT}/deployment"
mkdir -p "${INSTALL_ROOT}/data/nginx/local"
print_success "Directory structure created"

# Ensure all required configuration files are present
NGINX_BASE_URL="https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx"

if [[ "$USE_LOCAL_FILES" = true ]]; then
    print_step "Verifying existing configuration files"
else
    print_step "Downloading Onyx configuration files"
    print_info "This step downloads all necessary configuration files from GitHub..."
fi

ensure_file "${INSTALL_ROOT}/deployment/docker-compose.yml" \
    "${GITHUB_RAW_URL}/docker-compose.yml" "docker-compose.yml" || exit 1

# Check Docker Compose version compatibility after obtaining docker-compose.yml
if [ "$COMPOSE_VERSION" != "dev" ] && version_compare "$COMPOSE_VERSION" "2.24.0"; then
    print_warning "Docker Compose version $COMPOSE_VERSION is older than 2.24.0"
    echo ""
    print_warning "The docker-compose.yml file uses the newer env_file format that requires Docker Compose 2.24.0 or later."
    echo ""
    print_info "To use this configuration with your current Docker Compose version, you have two options:"
    echo ""
    echo "1. Upgrade Docker Compose to version 2.24.0 or later (recommended)"
    echo "   Visit: https://docs.docker.com/compose/install/"
    echo ""
    echo "2. Manually replace all env_file sections in docker-compose.yml"
    echo "   Change from:"
    echo "     env_file:"
    echo "       - path: .env"
    echo "         required: false"
    echo "   To:"
    echo "     env_file: .env"
    echo ""
    print_warning "The installation will continue, but may fail if Docker Compose cannot parse the file."
    echo ""
    prompt_yn_or_default "Do you want to continue anyway? (Y/n): " "y"
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
        print_info "Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file."
        exit 1
    fi
    print_info "Proceeding with installation despite Docker Compose version compatibility issues..."
fi

# Ask for deployment mode (standard vs lite) unless already set via --lite flag
if [[ "$LITE_MODE" = false ]]; then
    print_info "Which deployment mode would you like?"
    echo ""
    echo "  1) Lite      - Minimal deployment (no Vespa, Redis, or model servers)"
    echo "                  LLM chat, tools, file uploads, and Projects still work"
    echo "  2) Standard  - Full deployment with search, connectors, and RAG"
    echo ""
    prompt_or_default "Choose a mode (1 or 2) [default: 1]: " "1"
    echo ""

    case "$REPLY" in
        2)
            print_info "Selected: Standard mode"
            ;;
        *)
            LITE_MODE=true
            print_info "Selected: Lite mode"
            ;;
    esac
else
    print_info "Deployment mode: Lite (set via --lite flag)"
fi

if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
    print_error "--include-craft cannot be used with Lite mode."
    print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
    exit 1
fi

if [[ "$LITE_MODE" = true ]]; then
    EXPECTED_DOCKER_RAM_GB=4
    EXPECTED_DISK_GB=16
fi

# Handle lite overlay file based on selected mode
if [[ "$LITE_MODE" = true ]]; then
    ensure_file "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" \
        "${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}" "${LITE_COMPOSE_FILE}" || exit 1
elif [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; then
    rm -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}"
    print_info "Removed previous lite overlay (switching to standard mode)"
fi

ensure_file "${INSTALL_ROOT}/deployment/env.template" \
    "${GITHUB_RAW_URL}/env.template" "env.template" || exit 1

ensure_file "${INSTALL_ROOT}/data/nginx/app.conf.template" \
    "$NGINX_BASE_URL/app.conf.template" "nginx/app.conf.template" || exit 1

ensure_file "${INSTALL_ROOT}/data/nginx/run-nginx.sh" \
    "$NGINX_BASE_URL/run-nginx.sh" "nginx/run-nginx.sh" || exit 1
chmod +x "${INSTALL_ROOT}/data/nginx/run-nginx.sh"

ensure_file "${INSTALL_ROOT}/README.md" \
    "${GITHUB_RAW_URL}/README.md" "README.md" || exit 1

touch "${INSTALL_ROOT}/data/nginx/local/.gitkeep"
print_success "All configuration files ready"

# Set up deployment configuration
print_step "Setting up deployment configs"
ENV_FILE="${INSTALL_ROOT}/deployment/.env"
ENV_TEMPLATE="${INSTALL_ROOT}/deployment/env.template"
# Check if services are already running
if [ -d "${INSTALL_ROOT}/deployment" ] && [ -f "${INSTALL_ROOT}/deployment/docker-compose.yml" ]; then
    # Determine compose command
    if docker compose version &> /dev/null; then
        COMPOSE_CMD="docker compose"
    elif command -v docker-compose &> /dev/null; then
        COMPOSE_CMD="docker-compose"
    else
        COMPOSE_CMD=""
    fi

    if [ -n "$COMPOSE_CMD" ]; then
        # Check if any containers are running
        RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) ps -q 2>/dev/null | wc -l)
        if [ "$RUNNING_CONTAINERS" -gt 0 ]; then
            print_error "Onyx services are currently running!"
            echo ""
            print_info "To make configuration changes, you must first shut down the services."
            echo ""
            print_info "Please run the following command to shut down Onyx:"
            echo -e "   ${BOLD}./install.sh --shutdown${NC}"
            echo ""
            print_info "Then run this script again to make your changes."
            exit 1
        fi
    fi
fi

if [ -f "$ENV_FILE" ]; then
    print_info "Existing .env file found. What would you like to do?"
    echo ""
    echo "• Press Enter to restart with current configuration"
    echo "• Type 'update' to update to a newer version"
    echo ""
    prompt_or_default "Choose an option [default: restart]: " ""
    echo ""

    if [ "$REPLY" = "update" ]; then
        print_info "Update selected. Which tag would you like to deploy?"
        echo ""
        echo "• Press Enter for edge (recommended)"
        echo "• Type a specific tag (e.g., v0.1.0)"
        echo ""
        if [ "$INCLUDE_CRAFT" = true ]; then
            prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
            VERSION="$REPLY"
        else
            prompt_or_default "Enter tag [default: edge]: " "edge"
            VERSION="$REPLY"
        fi
        echo ""

        if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
            print_info "Selected: craft-latest (Craft enabled)"
        elif [ "$VERSION" = "edge" ]; then
            print_info "Selected: edge (latest nightly)"
        else
            print_info "Selected: $VERSION"
        fi

        # Reject craft image tags when running in lite mode
        if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
            print_error "Cannot use a craft image tag (${VERSION}) with --lite."
            print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
            exit 1
        fi

        # Update .env file with new version
        print_info "Updating configuration for version $VERSION..."
        if grep -q "^IMAGE_TAG=" "$ENV_FILE"; then
            # Update existing IMAGE_TAG line
            sed -i.bak "s/^IMAGE_TAG=.*/IMAGE_TAG=$VERSION/" "$ENV_FILE"
        else
            # Add IMAGE_TAG line if it doesn't exist
            echo "IMAGE_TAG=$VERSION" >> "$ENV_FILE"
        fi
        print_success "Updated IMAGE_TAG to $VERSION in .env file"

        # If using craft image, also enable ENABLE_CRAFT
        if [[ "$VERSION" == craft-* ]]; then
            sed -i.bak 's/^#* *ENABLE_CRAFT=.*/ENABLE_CRAFT=true/' "$ENV_FILE" 2>/dev/null || true
            print_success "ENABLE_CRAFT set to true"
        fi
        print_success "Configuration updated for upgrade"
    else
        # Reject restarting a craft deployment in lite mode
        EXISTING_TAG=$(grep "^IMAGE_TAG=" "$ENV_FILE" | head -1 | cut -d'=' -f2 | tr -d ' "'"'"'')
        if [[ "$LITE_MODE" = true ]] && [[ "${EXISTING_TAG:-}" == craft-* ]]; then
            print_error "Cannot restart a craft deployment (${EXISTING_TAG}) with --lite."
            print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
            exit 1
        fi

        print_info "Keeping existing configuration..."
        print_success "Will restart with current settings"
    fi

    # Ensure COMPOSE_PROFILES is cleared when running in lite mode on an
    # existing .env (the template ships with s3-filestore enabled).
    if [[ "$LITE_MODE" = true ]] && grep -q "^COMPOSE_PROFILES=.*s3-filestore" "$ENV_FILE" 2>/dev/null; then
        sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' "$ENV_FILE" 2>/dev/null || true
        print_success "Cleared COMPOSE_PROFILES for lite mode"
    fi
else
    print_info "No existing .env file found. Setting up new deployment..."
    echo ""

    # Ask for version
    print_info "Which tag would you like to deploy?"
    echo ""
    if [ "$INCLUDE_CRAFT" = true ]; then
        echo "• Press Enter for craft-latest (recommended for Craft)"
        echo "• Type a specific tag (e.g., craft-v1.0.0)"
        echo ""
        prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
        VERSION="$REPLY"
    else
        echo "• Press Enter for edge (recommended)"
        echo "• Type a specific tag (e.g., v0.1.0)"
        echo ""
        prompt_or_default "Enter tag [default: edge]: " "edge"
        VERSION="$REPLY"
    fi
    echo ""

    if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
        print_info "Selected: craft-latest (Craft enabled)"
    elif [ "$VERSION" = "edge" ]; then
        print_info "Selected: edge (latest nightly)"
    else
        print_info "Selected: $VERSION"
    fi

    # Ask for authentication schema
    # echo ""
    # print_info "Which authentication schema would you like to set up?"
    # echo ""
    # echo "1) Basic - Username/password authentication"
    # echo "2) No Auth - Open access (development/testing)"
    # echo ""
    # read -p "Choose an option (1) [default 1]: " -r AUTH_CHOICE
    # echo ""

    # case "${AUTH_CHOICE:-1}" in
    #     1)
    #         AUTH_SCHEMA="basic"
    #         print_info "Selected: Basic authentication"
    #         ;;
    #     # 2)
    #     #     AUTH_SCHEMA="disabled"
    #     #     print_info "Selected: No authentication"
    #     #     ;;
    #     *)
    #         AUTH_SCHEMA="basic"
    #         print_info "Invalid choice, using basic authentication"
    #         ;;
    # esac

    # TODO (jessica): Uncomment this once no auth users still have an account
    # Use basic auth by default
    AUTH_SCHEMA="basic"

    # Reject craft image tags when running in lite mode (must check before writing .env)
    if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
        print_error "Cannot use a craft image tag (${VERSION}) with --lite."
        print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
        exit 1
    fi

    # Create .env file from template
    print_info "Creating .env file with your selections..."
    cp "$ENV_TEMPLATE" "$ENV_FILE"

    # Update IMAGE_TAG with selected version
    print_info "Setting IMAGE_TAG to $VERSION..."
    sed -i.bak "s/^IMAGE_TAG=.*/IMAGE_TAG=$VERSION/" "$ENV_FILE"
    print_success "IMAGE_TAG set to $VERSION"

    # In lite mode, clear COMPOSE_PROFILES so profiled services (MinIO, etc.)
    # stay disabled — the template ships with s3-filestore enabled by default.
    if [[ "$LITE_MODE" = true ]]; then
        sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' "$ENV_FILE" 2>/dev/null || true
        print_success "Cleared COMPOSE_PROFILES for lite mode"
    fi

    # Configure basic authentication (default)
    sed -i.bak 's/^AUTH_TYPE=.*/AUTH_TYPE=basic/' "$ENV_FILE" 2>/dev/null || true
    print_success "Basic authentication enabled in configuration"

    # Check if openssl is available
    if ! command -v openssl &> /dev/null; then
        print_error "openssl is required to generate secure secrets but was not found."
        exit 1
    fi

    # Generate a secure USER_AUTH_SECRET
    USER_AUTH_SECRET=$(openssl rand -hex 32)
    sed -i.bak "s/^USER_AUTH_SECRET=.*/USER_AUTH_SECRET=\"$USER_AUTH_SECRET\"/" "$ENV_FILE" 2>/dev/null || true

    # Configure Craft based on flag or if using a craft-* image tag
    # By default, env.template has Craft commented out (disabled)
    if [ "$INCLUDE_CRAFT" = true ] || [[ "$VERSION" == craft-* ]]; then
        # Set ENABLE_CRAFT=true for runtime configuration (handles commented and uncommented lines)
        sed -i.bak 's/^#* *ENABLE_CRAFT=.*/ENABLE_CRAFT=true/' "$ENV_FILE" 2>/dev/null || true
        print_success "Onyx Craft enabled (ENABLE_CRAFT=true)"
    else
        print_info "Onyx Craft disabled (use --include-craft to enable)"
    fi

    print_success ".env file created with your preferences"
    echo ""
    print_info "IMPORTANT: The .env file has been configured with your selections."
    print_info "You can customize it later for:"
    echo "  • Advanced authentication (OAuth, SAML, etc.)"
    echo "  • AI model configuration"
    echo "  • Domain settings (for production)"
    echo "  • Onyx Craft (set ENABLE_CRAFT=true)"
    echo ""
fi

# Function to check if a port is available
is_port_available() {
    local port=$1

    # Try netcat first if available
    if command -v nc &> /dev/null; then
        # Try to connect to the port, if it fails, the port is available
        ! nc -z localhost "$port" 2>/dev/null
    # Fallback using curl/telnet approach
    elif command -v curl &> /dev/null; then
        # Try to connect with curl, if it fails, the port might be available
        ! curl -s --max-time 1 --connect-timeout 1 "http://localhost:$port" >/dev/null 2>&1
    # Final fallback using lsof if available
    elif command -v lsof &> /dev/null; then
        # Check if any process is listening on the port
        ! lsof -i ":$port" >/dev/null 2>&1
    else
        # No port checking tools available, assume port is available
        print_warning "No port checking tools available (nc, curl, lsof). Assuming port $port is available."
        return 0
    fi
}

# Function to find the first available port starting from a given port
find_available_port() {
    local start_port=${1:-3000}
    local port=$start_port

    while [ $port -le 65535 ]; do
        if is_port_available "$port"; then
            echo "$port"
            return 0
        fi
        port=$((port + 1))
    done

    # If no port found, return the original port as fallback
    echo "$start_port"
    return 1
}

# Check for port checking tools availability
PORT_CHECK_AVAILABLE=false
if command -v nc &> /dev/null || command -v curl &> /dev/null || command -v lsof &> /dev/null; then
    PORT_CHECK_AVAILABLE=true
fi

if [ "$PORT_CHECK_AVAILABLE" = false ]; then
    print_warning "No port checking tools found (nc, curl, lsof). Port detection may not work properly."
    print_info "Consider installing one of these tools for reliable automatic port detection."
fi

# Find available port for nginx
print_step "Checking for available ports"
AVAILABLE_PORT=$(find_available_port 3000)

if [ "$AVAILABLE_PORT" != "3000" ]; then
    print_info "Port 3000 is in use, found available port: $AVAILABLE_PORT"
else
    print_info "Port 3000 is available"
fi

# Export HOST_PORT for docker-compose
export HOST_PORT=$AVAILABLE_PORT
print_success "Using port $AVAILABLE_PORT for nginx"

# Determine if we're using a floating tag (edge, latest, craft-*) that should force pull
# Read IMAGE_TAG from .env file and remove any quotes or whitespace
CURRENT_IMAGE_TAG=$(grep "^IMAGE_TAG=" "$ENV_FILE" | head -1 | cut -d'=' -f2 | tr -d ' "'"'"'')
if [ "$CURRENT_IMAGE_TAG" = "edge" ] || [ "$CURRENT_IMAGE_TAG" = "latest" ] || [[ "$CURRENT_IMAGE_TAG" == craft-* ]]; then
    USE_LATEST=true
    if [[ "$CURRENT_IMAGE_TAG" == craft-* ]]; then
        print_info "Using craft tag '$CURRENT_IMAGE_TAG' - will force pull and recreate containers"
    else
        print_info "Using '$CURRENT_IMAGE_TAG' tag - will force pull and recreate containers"
    fi
else
    USE_LATEST=false
fi

# For pinned version tags, re-download config files from that tag so the
# compose file matches the images being pulled (the initial download used main).
if [[ "$USE_LATEST" = false ]] && [[ "$USE_LOCAL_FILES" = false ]]; then
    PINNED_BASE="https://raw.githubusercontent.com/onyx-dot-app/onyx/${CURRENT_IMAGE_TAG}/deployment"
    print_info "Fetching config files matching tag ${CURRENT_IMAGE_TAG}..."
    if download_file "${PINNED_BASE}/docker_compose/docker-compose.yml" "${INSTALL_ROOT}/deployment/docker-compose.yml" 2>/dev/null; then
        download_file "${PINNED_BASE}/data/nginx/app.conf.template" "${INSTALL_ROOT}/data/nginx/app.conf.template" 2>/dev/null || true
        download_file "${PINNED_BASE}/data/nginx/run-nginx.sh" "${INSTALL_ROOT}/data/nginx/run-nginx.sh" 2>/dev/null || true
        chmod +x "${INSTALL_ROOT}/data/nginx/run-nginx.sh"
        if [[ "$LITE_MODE" = true ]]; then
            download_file "${PINNED_BASE}/docker_compose/${LITE_COMPOSE_FILE}" \
                "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" 2>/dev/null || true
        fi
        print_success "Config files updated to match ${CURRENT_IMAGE_TAG}"
    else
        print_warning "Tag ${CURRENT_IMAGE_TAG} not found on GitHub — using main branch configs"
    fi
fi

# Pull Docker images with reduced output
print_step "Pulling Docker images"
print_info "This may take several minutes depending on your internet connection..."
echo ""
print_info "Downloading Docker images (this may take a while)..."
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) pull --quiet)
if [ $? -eq 0 ]; then
    print_success "Docker images downloaded successfully"
else
    print_error "Failed to download Docker images"
    exit 1
fi

# Start services
print_step "Starting Onyx services"
print_info "Launching containers..."
echo ""
if [ "$USE_LATEST" = true ]; then
    print_info "Force pulling latest images and recreating containers..."
    (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d --pull always --force-recreate)
else
    (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d)
fi
if [ $? -ne 0 ]; then
    print_error "Failed to start Onyx services"
    exit 1
fi

# Monitor container startup
print_step "Verifying container health"
print_info "Waiting for containers to initialize (10 seconds)..."

# Progress bar for waiting
for i in {1..10}; do
    printf "\r[%-10s] %d%%" $(printf '#%.0s' $(seq 1 $((i*10/10)))) $((i*100/10))
    sleep 1
done
echo ""
echo ""

# Check for restart loops
print_info "Checking container health status..."
RESTART_ISSUES=false
CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) ps -q 2>/dev/null)

for CONTAINER in $CONTAINERS; do
    PROJECT_NAME="$(basename "$INSTALL_ROOT")_deployment_"
    CONTAINER_NAME=$(docker inspect --format '{{.Name}}' "$CONTAINER" | sed "s/^\/\|^${PROJECT_NAME}//g")
    RESTART_COUNT=$(docker inspect --format '{{.RestartCount}}' "$CONTAINER")
    STATUS=$(docker inspect --format '{{.State.Status}}' "$CONTAINER")

    if [ "$STATUS" = "running" ]; then
        if [ "$RESTART_COUNT" -gt 2 ]; then
            print_error "$CONTAINER_NAME is in a restart loop (restarted $RESTART_COUNT times)"
            RESTART_ISSUES=true
        else
            print_success "$CONTAINER_NAME is healthy"
        fi
    elif [ "$STATUS" = "restarting" ]; then
        print_error "$CONTAINER_NAME is stuck restarting"
        RESTART_ISSUES=true
    else
        print_warning "$CONTAINER_NAME status: $STATUS"
    fi
done

echo ""

if [ "$RESTART_ISSUES" = true ]; then
    print_error "Some containers are experiencing issues!"
    echo ""
    print_info "Please check the logs for more information:"
    echo "  (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) logs)"

    echo ""
    print_info "If the issue persists, please contact: founders@onyx.app"
    echo "Include the output of the logs command in your message."
    exit 1
fi

# Health check function
check_onyx_health() {
    local max_attempts=600  # 10 minutes * 60 attempts per minute (every 1 second)
    local attempt=1
    local port=${HOST_PORT:-3000}

    print_info "Checking Onyx service health..."
    echo "Containers are healthy, waiting for database migrations and service initialization to finish."
    echo ""

    while [ $attempt -le $max_attempts ]; do
        local http_code=""
        if [[ "$DOWNLOADER" == "curl" ]]; then
            http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port" 2>/dev/null || echo "000")
        else
            http_code=$(wget -q --spider -S "http://localhost:$port" 2>&1 | grep "HTTP/" | tail -1 | awk '{print $2}' || echo "000")
        fi
        if echo "$http_code" | grep -qE "^(200|301|302|303|307|308)$"; then
            return 0
        fi

        # Show animated progress with time elapsed
        local elapsed=$((attempt))
        local minutes=$((elapsed / 60))
        local seconds=$((elapsed % 60))

        # Create animated dots with fixed spacing (cycle through 1-3 dots)
        local dots=""
        case $((attempt % 3)) in
            0) dots=".  " ;;
            1) dots=".. " ;;
            2) dots="..." ;;
        esac

        # Clear line and show progress with fixed spacing
        printf "\r\033[KChecking Onyx service%s (%dm %ds elapsed)" "$dots" "$minutes" "$seconds"

        sleep 1
        attempt=$((attempt + 1))
    done

    echo ""  # New line after the progress line
    return 1
}

# Success message
print_step "Installation Complete!"
print_success "All containers are running successfully!"
echo ""

# Run health check
if check_onyx_health; then
    echo ""
    echo -e "${GREEN}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo -e "${GREEN}${BOLD}   🎉 Onyx service is ready! 🎉${NC}"
    echo -e "${GREEN}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
else
    print_warning "Health check timed out after 10 minutes"
    print_info "Containers are running, but the web service may still be initializing (or something went wrong)"
    echo ""
    echo -e "${YELLOW}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo -e "${YELLOW}${BOLD}   ⚠️  Onyx containers are running ⚠️${NC}"
    echo -e "${YELLOW}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
fi
echo ""
print_info "Access Onyx at:"
echo -e "   ${BOLD}http://localhost:${HOST_PORT}${NC}"
echo ""
print_info "If authentication is enabled, you can create your admin account here:"
echo "   • Visit http://localhost:${HOST_PORT}/auth/signup to create your admin account"
echo "   • The first user created will automatically have admin privileges"
echo ""
if [[ "$LITE_MODE" = true ]]; then
    echo ""
    print_info "Running in Lite mode — the following services are NOT started:"
    echo "  • Vespa (vector database)"
    echo "  • Redis (cache)"
    echo "  • Model servers (embedding/inference)"
    echo "  • Background workers (Celery)"
    echo ""
    print_info "Connectors and RAG search are disabled. LLM chat, tools, user file"
    print_info "uploads, Projects, Agent knowledge, and code interpreter still work."
fi
echo ""
print_info "Refer to the README in the ${INSTALL_ROOT} directory for more information."
echo ""
print_info "For help or issues, contact: founders@onyx.app"
echo ""

# --- GitHub star prompt (inspired by oh-my-codex) ---
# Only prompt in interactive mode and only if gh CLI is available.
# Uses the GitHub API directly (PUT /user/starred) like oh-my-codex.
if is_interactive && command -v gh &>/dev/null; then
    prompt_yn_or_default "Enjoying Onyx? Star the repo on GitHub? [Y/n] " "Y"
    if [[ ! "$REPLY" =~ ^[Nn] ]]; then
        if GH_PAGER= gh api -X PUT /user/starred/onyx-dot-app/onyx < /dev/null >/dev/null 2>&1; then
            print_success "Thanks for the star!"
        else
            print_info "Star us at: https://github.com/onyx-dot-app/onyx"
        fi
    fi
fi


================================================
FILE: deployment/helm/README.md
================================================
# Dependency updates (when subchart versions are bumped)
* If updating subcharts, you need to run this before committing!
* cd charts/onyx
* helm dependency update .

# Local testing

## One time setup
* brew install kind
* Ensure you have no config at ~/.kube/config
* kind create cluster
* mv ~/.kube/config ~/.kube/kind-config

## Automated install and test with ct
* export KUBECONFIG=~/.kube/kind-config
* kubectl config use-context kind-kind
* from source root run the following. This does a very basic test against the web server
  * ct install --all --helm-extra-set-args="--set=nginx.enabled=false" --debug --config ct.yaml

## Output template to file and inspect
* cd charts/onyx
* helm template test-output . --set auth.opensearch.values.opensearch_admin_password='StrongPassword123!' > test-output.yaml

## Test the entire cluster manually
* cd charts/onyx
* helm install onyx . -n onyx --set postgresql.primary.persistence.enabled=false --set auth.opensearch.values.opensearch_admin_password='StrongPassword123!'
  * the postgres flag is to keep the storage ephemeral for testing. You probably don't want to set that in prod.
  * the OpenSearch admin password must be set on first install unless you are supplying `auth.opensearch.existingSecret`.
  * no flag for ephemeral vespa storage yet, might be good for testing
* kubectl -n onyx port-forward service/onyx-nginx 8080:80
  * this will forward the local port 8080 to the installed chart for you to run tests, etc.
* When you are finished
  * helm uninstall onyx -n onyx
  * Vespa leaves behind a PVC. Delete it if you are completely done.
    * k -n onyx get pvc
    * k -n onyx delete pvc vespa-storage-da-vespa-0
  * If you didn't disable Postgres persistence earlier, you may want to delete that PVC too.

## Run as non-root user
By default, some onyx containers run as root. If you'd like to explicitly run the onyx containers as a non-root user, update the values.yaml file for the following components:
  * `celery_shared`, `api`, `webserver`, `indexCapability`, `inferenceCapability`
    ```yaml
    securityContext:
      runAsNonRoot: true
      runAsUser: 1001
    ```
  * `vespa`
    ```yaml
    podSecurityContext:
      fsGroup: 1000
    securityContext:
      privileged: false
      runAsUser: 1000
    ```

## Resourcing
In the helm charts, we have resource suggestions for all Onyx-owned components. 
These are simply initial suggestions, and may need to be tuned for your specific use case.

Please talk to us in Slack if you have any questions!

## Autoscaling options
The chart renders Kubernetes HorizontalPodAutoscalers by default. To keep this behavior, leave
`autoscaling.engine` as `hpa` and adjust the per-component `autoscaling.*` values as needed.

If you would like to use KEDA ScaledObjects instead:

1. Install and manage the KEDA operator in your cluster yourself (for example via the official KEDA Helm chart). KEDA is no longer packaged as a dependency of the Onyx chart.
2. Set `autoscaling.engine: keda` in your `values.yaml` and enable autoscaling for the components you want to scale.

When `autoscaling.engine` is set to `keda`, the chart will render the existing ScaledObject templates; otherwise HPAs will be rendered.


================================================
FILE: deployment/helm/charts/onyx/.gitignore
================================================
### Helm ###
# Chart dependencies
**/charts/*.tgz


================================================
FILE: deployment/helm/charts/onyx/.helmignore
================================================
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/


================================================
FILE: deployment/helm/charts/onyx/Chart.yaml
================================================
apiVersion: v2
name: onyx
description: A Helm chart for Kubernetes
home: https://www.onyx.app/
sources:
  - "https://github.com/onyx-dot-app/onyx"
type: application
version: 0.4.39
appVersion: latest
annotations:
  category: Productivity
  licenses: MIT
  images: |
    - name: webserver
      image: docker.io/onyxdotapp/onyx-web-server:latest
    - name: background
      image: docker.io/onyxdotapp/onyx-backend:latest
    - name: vespa
      image: vespaengine/vespa:8.609.39
dependencies:
  - name: cloudnative-pg
    version: 0.26.0
    repository: https://cloudnative-pg.github.io/charts
    condition: postgresql.enabled
    alias: postgresql
  - name: vespa
    version: 0.2.25
    repository: https://onyx-dot-app.github.io/vespa-helm-charts
    condition: vespa.enabled
  - name: opensearch
    version: 3.4.0
    repository: https://opensearch-project.github.io/helm-charts
    condition: opensearch.enabled
  - name: ingress-nginx
    version: 4.13.3
    repository: https://kubernetes.github.io/ingress-nginx
    condition: nginx.enabled
    alias: nginx
  - name: redis
    version: 0.16.6
    repository: https://ot-container-kit.github.io/helm-charts
    condition: redis.enabled
  - name: minio
    version: 5.4.0
    repository: https://charts.min.io/
    condition: minio.enabled
  - name: code-interpreter
    version: 0.3.1
    repository: https://onyx-dot-app.github.io/python-sandbox/
    condition: codeInterpreter.enabled


================================================
FILE: deployment/helm/charts/onyx/ci/ct-values.yaml
================================================
# Values for chart-testing (ct lint/install)
# This file is automatically used by ct when running lint and install commands
auth:
  opensearch:
    values:
      opensearch_admin_password: "placeholder-OpenSearch1!"
  userauth:
    values:
      user_auth_secret: "placeholder-for-ci-testing"


================================================
FILE: deployment/helm/charts/onyx/dashboards/indexing-pipeline.json
================================================
{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": {
          "type": "grafana",
          "uid": "-- Grafana --"
        },
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 1,
  "id": null,
  "links": [],
  "panels": [
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 0
      },
      "id": 100,
      "panels": [],
      "title": "At a glance",
      "type": "row"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Total number of Celery workers responding to heartbeat ping. Checked every 60 seconds from the monitoring worker.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "red",
                "value": null
              },
              {
                "color": "yellow",
                "value": 1
              },
              {
                "color": "green",
                "value": 3
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 4,
        "x": 0,
        "y": 1
      },
      "id": 23,
      "options": {
        "colorMode": "background",
        "graphMode": "none",
        "justifyMode": "center",
        "orientation": "auto",
        "percentChangeColorMode": "standard",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "showPercentChange": false,
        "textMode": "value_and_name",
        "wideLayout": true
      },
      "pluginVersion": "10.4.1",
      "targets": [
        {
          "expr": "onyx_celery_active_worker_count",
          "legendFormat": "Active workers",
          "refId": "A"
        }
      ],
      "title": "Active workers",
      "type": "stat"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Total count of connectors currently in a repeated error state. Yellow at 1+, red at 5+.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "yellow",
                "value": 1
              },
              {
                "color": "red",
                "value": 5
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 4,
        "x": 4,
        "y": 1
      },
      "id": 13,
      "options": {
        "colorMode": "background",
        "graphMode": "none",
        "justifyMode": "center",
        "orientation": "auto",
        "percentChangeColorMode": "standard",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "showPercentChange": false,
        "textMode": "value_and_name",
        "wideLayout": true
      },
      "pluginVersion": "10.4.1",
      "targets": [
        {
          "expr": "sum(onyx_connectors_in_error_total{tenant_id=~\"$tenant_id\"})",
          "legendFormat": "Connectors in error",
          "refId": "A"
        }
      ],
      "title": "Connectors in error",
      "type": "stat"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Total tasks waiting across all Celery queues right now.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "yellow",
                "value": 50
              },
              {
                "color": "red",
                "value": 200
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 4,
        "x": 8,
        "y": 1
      },
      "id": 101,
      "options": {
        "colorMode": "background",
        "graphMode": "none",
        "justifyMode": "center",
        "orientation": "auto",
        "percentChangeColorMode": "standard",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "showPercentChange": false,
        "textMode": "value_and_name",
        "wideLayout": true
      },
      "pluginVersion": "10.4.1",
      "targets": [
        {
          "expr": "sum(onyx_queue_depth)",
          "legendFormat": "Queue depth",
          "refId": "A"
        }
      ],
      "title": "Total queue depth",
      "type": "stat"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "p95 indexing task duration across all sources in the last 5 minutes.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "yellow",
                "value": 30
              },
              {
                "color": "red",
                "value": 90
              }
            ]
          },
          "unit": "s"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 4,
        "x": 12,
        "y": 1
      },
      "id": 102,
      "options": {
        "colorMode": "background",
        "graphMode": "none",
        "justifyMode": "center",
        "orientation": "auto",
        "percentChangeColorMode": "standard",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "showPercentChange": false,
        "textMode": "value_and_name",
        "wideLayout": true
      },
      "pluginVersion": "10.4.1",
      "targets": [
        {
          "expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_indexing_task_duration_seconds_bucket{tenant_id=~\"$tenant_id\"}[5m])))",
          "legendFormat": "p95 duration",
          "refId": "A"
        }
      ],
      "title": "p95 task duration",
      "type": "stat"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Percentage of indexing tasks that failed in the last 5 minutes.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "yellow",
                "value": 1
              },
              {
                "color": "red",
                "value": 5
              }
            ]
          },
          "unit": "percent"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 4,
        "x": 16,
        "y": 1
      },
      "id": 103,
      "options": {
        "colorMode": "background",
        "graphMode": "none",
        "justifyMode": "center",
        "orientation": "auto",
        "percentChangeColorMode": "standard",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "showPercentChange": false,
        "textMode": "value_and_name",
        "wideLayout": true
      },
      "pluginVersion": "10.4.1",
      "targets": [
        {
          "expr": "100 * sum(rate(onyx_indexing_task_completed_total{outcome=\"failure\", tenant_id=~\"$tenant_id\"}[5m])) / sum(rate(onyx_indexing_task_completed_total{tenant_id=~\"$tenant_id\"}[5m]))",
          "legendFormat": "Error rate",
          "refId": "A"
        }
      ],
      "title": "Task error rate",
      "type": "stat"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Per-worker heartbeat status. Green = responding to ping. Red = not responding. Workers removed after 10 consecutive missed pings.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [
            {
              "options": {
                "0": {
                  "color": "red",
                  "text": "DOWN"
                }
              },
              "type": "value"
            },
            {
              "options": {
                "1": {
                  "color": "green",
                  "text": "UP"
                }
              },
              "type": "value"
            }
          ],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "red",
                "value": null
              },
              {
                "color": "green",
                "value": 1
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 4,
        "x": 20,
        "y": 1
      },
      "id": 24,
      "options": {
        "colorMode": "background",
        "graphMode": "none",
        "justifyMode": "auto",
        "orientation": "auto",
        "percentChangeColorMode": "standard",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "showPercentChange": false,
        "textMode": "value_and_name",
        "wideLayout": true
      },
      "pluginVersion": "10.4.1",
      "targets": [
        {
          "expr": "onyx_celery_worker_up",
          "legendFormat": "{{worker}}",
          "refId": "A"
        }
      ],
      "title": "Worker heartbeats",
      "type": "stat"
    },
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 4
      },
      "id": 25,
      "panels": [],
      "title": "Connector Health",
      "type": "row"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Current indexing attempts by status and connector.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "bars",
            "fillOpacity": 50,
            "stacking": {
              "group": "A",
              "mode": "normal"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 5
      },
      "id": 3,
      "options": {
        "legend": {
          "calcs": ["last"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "hideZeros": false,
          "mode": "multi",
          "sort": "none"
        }
      },
      "targets": [
        {
          "expr": "onyx_index_attempts_active{tenant_id=~\"$tenant_id\", source=~\"$source\", connector_name=~\"$connector_name\"}",
          "legendFormat": "{{status}} / {{source}} / {{connector_name}}",
          "refId": "A"
        }
      ],
      "title": "Active Index Attempts",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Distribution of all connectors by their current status.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "mappings": []
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 6,
        "x": 12,
        "y": 5
      },
      "id": 11,
      "options": {
        "legend": {
          "calcs": ["last"],
          "displayMode": "table",
          "placement": "right",
          "showLegend": true
        },
        "pieType": "pie",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "tooltip": {
          "hideZeros": false,
          "mode": "single",
          "sort": "none"
        }
      },
      "targets": [
        {
          "expr": "sum by (status) (onyx_connectors_by_status{tenant_id=~\"$tenant_id\"})",
          "legendFormat": "{{status}}",
          "refId": "A"
        }
      ],
      "title": "Connectors by Status",
      "type": "piechart"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Time since each connector last completed a successful index. Sorted by staleness.",
      "fieldConfig": {
        "defaults": {
          "custom": {
            "align": "auto",
            "cellOptions": {
              "type": "auto"
            },
            "inspect": false
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          }
        },
        "overrides": [
          {
            "matcher": {
              "id": "byName",
              "options": "Age"
            },
            "properties": [
              {
                "id": "unit",
                "value": "s"
              },
              {
                "id": "custom.cellOptions",
                "value": {
                  "type": "color-background"
                }
              },
              {
                "id": "thresholds",
                "value": {
                  "mode": "absolute",
                  "steps": [
                    {
                      "color": "green",
                      "value": null
                    },
                    {
                      "color": "yellow",
                      "value": 3600
                    },
                    {
                      "color": "red",
                      "value": 86400
                    }
                  ]
                }
              }
            ]
          },
          {
            "matcher": {
              "id": "byName",
              "options": "Failed attempts"
            },
            "properties": [
              {
                "id": "custom.cellOptions",
                "value": {
                  "type": "color-background"
                }
              },
              {
                "id": "thresholds",
                "value": {
                  "mode": "absolute",
                  "steps": [
                    {
                      "color": "green",
                      "value": null
                    },
                    {
                      "color": "yellow",
                      "value": 1
                    },
                    {
                      "color": "red",
                      "value": 5
                    }
                  ]
                }
              }
            ]
          }
        ]
      },
      "gridPos": {
        "h": 12,
        "w": 16,
        "x": 0,
        "y": 13
      },
      "id": 9,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": ["sum"],
          "show": false
        },
        "showHeader": true
      },
      "targets": [
        {
          "expr": "topk(20, onyx_connector_last_success_age_seconds{tenant_id=~\"$tenant_id\", source=~\"$source\", connector_name=~\"$connector_name\"})",
          "format": "table",
          "instant": true,
          "refId": "A"
        },
        {
          "expr": "onyx_connector_error_count{tenant_id=~\"$tenant_id\", source=~\"$source\", connector_name=~\"$connector_name\"}",
          "format": "table",
          "instant": true,
          "refId": "B"
        },
        {
          "expr": "onyx_connector_docs_indexed{tenant_id=~\"$tenant_id\", source=~\"$source\", connector_name=~\"$connector_name\"}",
          "format": "table",
          "instant": true,
          "refId": "C"
        }
      ],
      "title": "Connector staleness \u2014 age since last success",
      "transformations": [
        {
          "id": "merge",
          "options": {}
        },
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "Time": true,
              "__name__": true,
              "cc_pair_id": true,
              "instance": true,
              "job": true
            },
            "renameByName": {
              "Value #A": "Age",
              "Value #B": "Failed attempts",
              "Value #C": "Docs indexed",
              "connector_name": "Connector",
              "source": "Source",
              "tenant_id": "Tenant"
            }
          }
        },
        {
          "id": "sortBy",
          "options": {
            "fields": {},
            "sort": [
              {
                "desc": true,
                "field": "Age"
              }
            ]
          }
        }
      ],
      "type": "table"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Connectors that have failed repeatedly and entered an error state.",
      "fieldConfig": {
        "defaults": {
          "custom": {
            "align": "auto",
            "cellOptions": {
              "type": "auto"
            },
            "inspect": false
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 12,
        "w": 8,
        "x": 16,
        "y": 13
      },
      "id": 10,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": ["sum"],
          "show": false
        },
        "showHeader": true
      },
      "targets": [
        {
          "expr": "onyx_connector_in_error_state{tenant_id=~\"$tenant_id\", source=~\"$source\", connector_name=~\"$connector_name\"} == 1",
          "format": "table",
          "instant": true,
          "refId": "A"
        }
      ],
      "title": "Connectors in error state",
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "Time": true,
              "Value": true,
              "__name__": true,
              "cc_pair_id": true,
              "instance": true,
              "job": true
            },
            "renameByName": {
              "connector_name": "Connector",
              "source": "Source",
              "tenant_id": "Tenant"
            }
          }
        }
      ],
      "type": "table"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Total documents indexed and total failed index attempts per connector.",
      "fieldConfig": {
        "defaults": {
          "custom": {
            "align": "auto",
            "cellOptions": {
              "type": "auto"
            },
            "footer": {
              "reducers": []
            },
            "inspect": false
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          }
        },
        "overrides": [
          {
            "matcher": {
              "id": "byName",
              "options": "Failed Attempts"
            },
            "properties": [
              {
                "id": "thresholds",
                "value": {
                  "mode": "absolute",
                  "steps": [
                    {
                      "color": "green",
                      "value": 0
                    },
                    {
                      "color": "yellow",
                      "value": 1
                    },
                    {
                      "color": "red",
                      "value": 5
                    }
                  ]
                }
              },
              {
                "id": "custom.cellOptions",
                "value": {
                  "type": "color-background"
                }
              }
            ]
          }
        ]
      },
      "gridPos": {
        "h": 10,
        "w": 24,
        "x": 0,
        "y": 25
      },
      "id": 15,
      "options": {
        "cellHeight": "sm",
        "showHeader": true
      },
      "targets": [
        {
          "expr": "onyx_connector_docs_indexed{tenant_id=~\"$tenant_id\", source=~\"$source\", connector_name=~\"$connector_name\"}",
          "format": "table",
          "instant": true,
          "refId": "A"
        },
        {
          "expr": "onyx_connector_error_count{tenant_id=~\"$tenant_id\", source=~\"$source\", connector_name=~\"$connector_name\"}",
          "format": "table",
          "instant": true,
          "refId": "B"
        }
      ],
      "title": "Docs Indexed & Failed Attempts per Connector",
      "transformations": [
        {
          "id": "merge",
          "options": {}
        },
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "Time": true,
              "__name__": true,
              "cc_pair_id": true,
              "instance": true,
              "job": true,
              "tenant_id": true
            },
            "renameByName": {
              "Value #A": "Docs Indexed",
              "Value #B": "Failed Attempts",
              "connector_name": "Connector",
              "source": "Source"
            }
          }
        },
        {
          "id": "sortBy",
          "options": {
            "fields": {},
            "sort": [
              {
                "desc": true,
                "field": "Failed Attempts"
              }
            ]
          }
        }
      ],
      "type": "table"
    },
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 35
      },
      "id": 26,
      "panels": [],
      "title": "Indexing Pipeline",
      "type": "row"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Rate of completed indexing tasks per minute stacked by source. Successes and failures are separate series.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "bars",
            "fillOpacity": 80,
            "stacking": {
              "group": "A",
              "mode": "normal"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": [
          {
            "matcher": {
              "id": "byFrameRefID",
              "options": "B"
            },
            "properties": [
              {
                "id": "color",
                "value": {
                  "fixedColor": "red",
                  "mode": "fixed"
                }
              },
              {
                "id": "custom.fillOpacity",
                "value": 70
              }
            ]
          }
        ]
      },
      "gridPos": {
        "h": 8,
        "w": 14,
        "x": 0,
        "y": 36
      },
      "id": 5,
      "options": {
        "legend": {
          "calcs": ["sum"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "hideZeros": true,
          "mode": "multi",
          "sort": "desc"
        }
      },
      "targets": [
        {
          "expr": "sum by (source) (rate(onyx_indexing_task_completed_total{outcome=\"success\", source=~\"$source\", tenant_id=~\"$tenant_id\"}[5m])) * 60",
          "legendFormat": "{{source}} (success)",
          "refId": "A"
        },
        {
          "expr": "sum by (source) (rate(onyx_indexing_task_completed_total{outcome=\"failure\", source=~\"$source\", tenant_id=~\"$tenant_id\"}[5m])) * 60",
          "legendFormat": "{{source}} (failure)",
          "refId": "B"
        }
      ],
      "title": "Indexing throughput \u2014 success + failures stacked",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "p95 as solid line, p50 as dashed line per source.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "line",
            "fillOpacity": 8,
            "lineWidth": 2
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "s"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 10,
        "x": 14,
        "y": 36
      },
      "id": 6,
      "options": {
        "legend": {
          "calcs": ["mean", "max"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "hideZeros": false,
          "mode": "multi",
          "sort": "desc"
        }
      },
      "targets": [
        {
          "expr": "histogram_quantile(0.95, sum by (source, le) (rate(onyx_indexing_task_duration_seconds_bucket{source=~\"$source\", tenant_id=~\"$tenant_id\"}[5m])))",
          "legendFormat": "p95 {{source}}",
          "refId": "A"
        },
        {
          "expr": "histogram_quantile(0.50, sum by (source, le) (rate(onyx_indexing_task_duration_seconds_bucket{source=~\"$source\", tenant_id=~\"$tenant_id\"}[5m])))",
          "legendFormat": "p50 {{source}}",
          "refId": "B"
        }
      ],
      "title": "Task latency \u2014 p95 (solid) + p50 (dashed) per source",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Completed indexing tasks per minute broken down by individual connector and outcome.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "line",
            "fillOpacity": 10,
            "lineWidth": 2
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 14,
        "x": 0,
        "y": 44
      },
      "id": 7,
      "options": {
        "legend": {
          "calcs": ["last"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "hideZeros": false,
          "mode": "multi",
          "sort": "none"
        }
      },
      "targets": [
        {
          "expr": "sum by (source, connector_name, outcome) (rate(onyx_indexing_task_completed_total{source=~\"$source\", tenant_id=~\"$tenant_id\", connector_name=~\"$connector_name\"}[5m])) * 60",
          "legendFormat": "{{source}} / {{connector_name}} ({{outcome}})",
          "refId": "A"
        }
      ],
      "title": "Throughput by connector",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "95th percentile indexing task duration per individual connector.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "line",
            "fillOpacity": 5,
            "lineWidth": 2
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "s"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 10,
        "x": 14,
        "y": 44
      },
      "id": 8,
      "options": {
        "legend": {
          "calcs": ["last"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "hideZeros": false,
          "mode": "multi",
          "sort": "none"
        }
      },
      "targets": [
        {
          "expr": "histogram_quantile(0.95, sum by (source, connector_name, le) (rate(onyx_indexing_task_duration_seconds_bucket{source=~\"$source\", tenant_id=~\"$tenant_id\", connector_name=~\"$connector_name\"}[5m])))",
          "legendFormat": "p95 {{source}} / {{connector_name}}",
          "refId": "A"
        }
      ],
      "title": "p95 duration by connector",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Distribution of task execution times.",
      "fieldConfig": {
        "defaults": {
          "custom": {
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "scaleDistribution": {
              "log": 2,
              "type": "log"
            }
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 14,
        "x": 0,
        "y": 52
      },
      "id": 22,
      "options": {
        "calculate": false,
        "cellGap": 1,
        "color": {
          "exponent": 0.5,
          "fill": "dark-orange",
          "mode": "scheme",
          "reverse": false,
          "scale": "exponential",
          "scheme": "Oranges",
          "steps": 64
        },
        "filterValues": {
          "le": 1e-9
        },
        "legend": {
          "show": true
        },
        "rowsFrame": {
          "layout": "auto"
        },
        "tooltip": {
          "mode": "single",
          "show": true,
          "showColorScale": false,
          "yHistogram": true
        },
        "yAxis": {
          "axisPlacement": "left",
          "reverse": false,
          "unit": "s"
        }
      },
      "targets": [
        {
          "expr": "sum(increase(onyx_indexing_task_duration_seconds_bucket{source=~\"$source\", tenant_id=~\"$tenant_id\"}[5m])) by (le)",
          "format": "heatmap",
          "legendFormat": "{{le}}",
          "refId": "A"
        }
      ],
      "title": "Task runtime heatmap",
      "type": "heatmap"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Rate of task retries, revocations, and rejections per minute. Should be near zero.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "line",
            "fillOpacity": 10,
            "lineWidth": 2
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 10,
        "x": 14,
        "y": 52
      },
      "id": 21,
      "options": {
        "legend": {
          "calcs": ["last"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "noValue": "No events (healthy)",
        "tooltip": {
          "hideZeros": false,
          "mode": "multi",
          "sort": "none"
        }
      },
      "targets": [
        {
          "expr": "rate(onyx_celery_task_retried_total[5m]) * 60",
          "legendFormat": "retry: {{task_name}}",
          "refId": "A"
        },
        {
          "expr": "rate(onyx_celery_task_revoked_total[5m]) * 60",
          "legendFormat": "revoked: {{task_name}}",
          "refId": "B"
        },
        {
          "expr": "rate(onyx_celery_task_rejected_total[5m]) * 60",
          "legendFormat": "rejected: {{task_name}}",
          "refId": "C"
        }
      ],
      "title": "Error events \u2014 retries / revocations / rejections",
      "type": "timeseries"
    },
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 60
      },
      "id": 27,
      "panels": [],
      "title": "Queue Infrastructure",
      "type": "row"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Number of tasks waiting in each Celery queue.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "line",
            "fillOpacity": 10,
            "lineWidth": 2
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 14,
        "x": 0,
        "y": 61
      },
      "id": 1,
      "options": {
        "legend": {
          "calcs": ["last", "max"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "hideZeros": true,
          "mode": "multi",
          "sort": "desc"
        }
      },
      "targets": [
        {
          "expr": "onyx_queue_depth",
          "legendFormat": "{{queue}}",
          "refId": "A"
        }
      ],
      "title": "Queue depth",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Rate of change of queue depth per minute.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "line",
            "fillOpacity": 10,
            "lineWidth": 2
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "yellow",
                "value": 10
              },
              {
                "color": "red",
                "value": 100
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 10,
        "x": 14,
        "y": 61
      },
      "id": 16,
      "options": {
        "legend": {
          "calcs": ["last"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "hideZeros": false,
          "mode": "multi",
          "sort": "none"
        }
      },
      "targets": [
        {
          "expr": "deriv(onyx_queue_depth[5m]) * 60",
          "legendFormat": "{{queue}}",
          "refId": "A"
        }
      ],
      "title": "Queue growth rate (tasks/min)",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Age of the oldest waiting task in each non-empty queue.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "line",
            "fillOpacity": 10,
            "lineWidth": 2
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "yellow",
                "value": 60
              },
              {
                "color": "red",
                "value": 300
              }
            ]
          },
          "unit": "s"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 24,
        "x": 0,
        "y": 69
      },
      "id": 17,
      "options": {
        "legend": {
          "calcs": ["last"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "noValue": "No queued tasks (healthy)",
        "tooltip": {
          "hideZeros": false,
          "mode": "multi",
          "sort": "none"
        }
      },
      "targets": [
        {
          "expr": "onyx_queue_oldest_task_age_seconds > 0",
          "legendFormat": "{{queue}}",
          "refId": "A"
        }
      ],
      "title": "Oldest task age per queue",
      "type": "timeseries"
    },
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 75
      },
      "id": 28,
      "panels": [],
      "title": "Redis & Workers",
      "type": "row"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Redis memory consumption over time. Peak shown as a dashed line.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "line",
            "fillOpacity": 10,
            "lineWidth": 2
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bytes"
        },
        "overrides": [
          {
            "matcher": {
              "id": "byName",
              "options": "Peak"
            },
            "properties": [
              {
                "id": "custom.lineStyle",
                "value": {
                  "dash": [4, 4],
                  "fill": "dash"
                }
              },
              {
                "id": "custom.lineWidth",
                "value": 1
              },
              {
                "id": "custom.fillOpacity",
                "value": 0
              }
            ]
          }
        ]
      },
      "gridPos": {
        "h": 7,
        "w": 8,
        "x": 0,
        "y": 76
      },
      "id": 18,
      "options": {
        "legend": {
          "calcs": ["last"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "hideZeros": false,
          "mode": "multi",
          "sort": "none"
        }
      },
      "targets": [
        {
          "expr": "onyx_redis_memory_used_bytes",
          "legendFormat": "Used",
          "refId": "A"
        },
        {
          "expr": "onyx_redis_memory_peak_bytes",
          "legendFormat": "Peak",
          "refId": "B"
        }
      ],
      "title": "Redis memory \u2014 used vs peak",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Ratio of OS-allocated memory to Redis-used memory. >1.5 = significant fragmentation. <1.0 = Redis is swapping to disk (critical).",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "max": 3,
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "red",
                "value": 0
              },
              {
                "color": "green",
                "value": 1
              },
              {
                "color": "yellow",
                "value": 1.5
              },
              {
                "color": "red",
                "value": 2
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "w": 4,
        "x": 8,
        "y": 76
      },
      "id": 19,
      "options": {
        "minVizHeight": 75,
        "minVizWidth": 75,
        "orientation": "auto",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "showThresholdLabels": true,
        "showThresholdMarkers": true,
        "sizing": "auto"
      },
      "targets": [
        {
          "expr": "max(last_over_time(onyx_redis_memory_fragmentation_ratio[2m]))",
          "legendFormat": "Fragmentation",
          "refId": "A"
        }
      ],
      "title": "Redis fragmentation ratio",
      "type": "gauge"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Number of active Redis client connections. A steadily rising count indicates a connection leak.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "drawStyle": "line",
            "fillOpacity": 10,
            "lineWidth": 2
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": 0
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "w": 6,
        "x": 12,
        "y": 76
      },
      "id": 20,
      "options": {
        "legend": {
          "calcs": ["last"],
          "displayMode": "table",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "hideZeros": false,
          "mode": "multi",
          "sort": "none"
        }
      },
      "targets": [
        {
          "expr": "onyx_redis_connected_clients",
          "legendFormat": "Clients",
          "refId": "A"
        }
      ],
      "title": "Redis connected clients",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "Health of each Prometheus scrape target. Green = UP. Red = DOWN.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [
            {
              "options": {
                "0": {
                  "color": "red",
                  "text": "DOWN"
                }
              },
              "type": "value"
            },
            {
              "options": {
                "1": {
                  "color": "green",
                  "text": "UP"
                }
              },
              "type": "value"
            }
          ],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "red",
                "value": 0
              },
              {
                "color": "green",
                "value": 1
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "w": 6,
        "x": 18,
        "y": 76
      },
      "id": 14,
      "options": {
        "colorMode": "background",
        "graphMode": "none",
        "justifyMode": "auto",
        "orientation": "auto",
        "percentChangeColorMode": "standard",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "showPercentChange": false,
        "textMode": "value_and_name",
        "wideLayout": true
      },
      "targets": [
        {
          "expr": "up{job=~\"onyx-.*\"}",
          "legendFormat": "{{job}}",
          "refId": "A"
        }
      ],
      "title": "Scrape target health",
      "type": "stat"
    }
  ],
  "refresh": "10s",
  "schemaVersion": 39,
  "tags": ["onyx", "indexing", "prometheus"],
  "templating": {
    "list": [
      {
        "hide": 0,
        "includeAll": false,
        "multi": false,
        "name": "DS_PROMETHEUS",
        "options": [],
        "query": "prometheus",
        "refresh": 1,
        "type": "datasource"
      },
      {
        "allValue": ".*",
        "datasource": {
          "type": "prometheus",
          "uid": "${DS_PROMETHEUS}"
        },
        "definition": "label_values(onyx_connector_last_success_age_seconds, source)",
        "hide": 0,
        "includeAll": true,
        "multi": true,
        "name": "source",
        "options": [],
        "query": "label_values(onyx_connector_last_success_age_seconds, source)",
        "refresh": 1,
        "type": "query"
      },
      {
        "allValue": ".*",
        "datasource": {
          "type": "prometheus",
          "uid": "${DS_PROMETHEUS}"
        },
        "definition": "label_values(onyx_connector_last_success_age_seconds, connector_name)",
        "hide": 0,
        "includeAll": true,
        "multi": true,
        "name": "connector_name",
        "options": [],
        "query": "label_values(onyx_connector_last_success_age_seconds, connector_name)",
        "refresh": 1,
        "type": "query"
      },
      {
        "allValue": ".*",
        "datasource": {
          "type": "prometheus",
          "uid": "${DS_PROMETHEUS}"
        },
        "definition": "label_values(onyx_connector_last_success_age_seconds, tenant_id)",
        "hide": 0,
        "includeAll": true,
        "multi": true,
        "name": "tenant_id",
        "options": [],
        "query": "label_values(onyx_connector_last_success_age_seconds, tenant_id)",
        "refresh": 1,
        "type": "query"
      }
    ]
  },
  "time": {
    "from": "now-3h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "browser",
  "title": "Onyx Indexing Pipeline",
  "uid": "onyx-indexing-pipeline",
  "version": 1,
  "weekStart": ""
}


================================================
FILE: deployment/helm/charts/onyx/templates/_helpers.tpl
================================================
{{/*
Expand the name of the chart.
*/}}
{{- define "onyx.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "onyx.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "onyx.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "onyx.labels" -}}
helm.sh/chart: {{ include "onyx.chart" . }}
{{ include "onyx.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "onyx.selectorLabels" -}}
app.kubernetes.io/name: {{ include "onyx.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "onyx.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "onyx.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

{{/*
Set secret name
*/}}
{{- define "onyx.secretName" -}}
{{- default .secretName .existingSecret }}
{{- end }}

{{/*
Create env vars from secrets
*/}}
{{- define "onyx.envSecrets" -}}
    {{- range $secretSuffix, $secretContent := .Values.auth }}
    {{- if and (ne $secretContent.enabled false) ($secretContent.secretKeys) }}
    {{- range $name, $key := $secretContent.secretKeys }}
- name: {{ $name | upper | replace "-" "_" | quote }}
  valueFrom:
    secretKeyRef:
      name: {{ include "onyx.secretName" $secretContent }}
      key: {{ default $name $key }}
    {{- end }}
    {{- end }}
    {{- end }}
{{- end }}

{{/*
Helpers for mounting a psql convenience script into pods.
*/}}
{{- define "onyx.pgInto.enabled" -}}
{{- if and .Values.tooling .Values.tooling.pgInto .Values.tooling.pgInto.enabled }}true{{- end }}
{{- end }}

{{- define "onyx.pgInto.configMapName" -}}
{{- printf "%s-pginto" (include "onyx.fullname" .) -}}
{{- end }}

{{- define "onyx.pgInto.checksumAnnotation" -}}
{{- if (include "onyx.pgInto.enabled" .) }}
checksum/pginto: {{ include (print $.Template.BasePath "/tooling-pginto-configmap.yaml") . | sha256sum }}
{{- end }}
{{- end }}

{{- define "onyx.pgInto.volumeMount" -}}
{{- if (include "onyx.pgInto.enabled" .) }}
- name: pginto-script
  mountPath: {{ default "/usr/local/bin/pginto" .Values.tooling.pgInto.mountPath }}
  subPath: pginto
  readOnly: true
{{- end }}
{{- end }}

{{- define "onyx.pgInto.volume" -}}
{{- if (include "onyx.pgInto.enabled" .) }}
- name: pginto-script
  configMap:
    name: {{ include "onyx.pgInto.configMapName" . }}
    defaultMode: 0755
{{- end }}
{{- end }}

{{- define "onyx.renderVolumeMounts" -}}
{{- $pginto := include "onyx.pgInto.volumeMount" .ctx -}}
{{- $existing := .volumeMounts -}}
{{- if or $pginto $existing -}}
volumeMounts:
{{- if $pginto }}
{{ $pginto | nindent 2 }}
{{- end }}
{{- if $existing }}
{{ toYaml $existing | nindent 2 }}
{{- end }}
{{- end -}}
{{- end }}

{{- define "onyx.renderVolumes" -}}
{{- $pginto := include "onyx.pgInto.volume" .ctx -}}
{{- $existing := .volumes -}}
{{- if or $pginto $existing -}}
volumes:
{{- if $pginto }}
{{ $pginto | nindent 2 }}
{{- end }}
{{- if $existing }}
{{ toYaml $existing | nindent 2 }}
{{- end }}
{{- end -}}
{{- end }}

{{/*
Return the configured autoscaling engine; defaults to HPA when unset.
*/}}
{{- define "onyx.autoscaling.engine" -}}
{{- $engine := default "hpa" .Values.autoscaling.engine -}}
{{- $engine | lower -}}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/api-deployment.yaml
================================================
{{- if gt (int .Values.api.replicaCount) 0 }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-api-server
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.api.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  {{- if not .Values.api.autoscaling.enabled }}
  replicas: {{ .Values.api.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.api.deploymentLabels }}
      {{- toYaml .Values.api.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
        {{- $pgIntoChecksum := include "onyx.pgInto.checksumAnnotation" . }}
        {{- if $pgIntoChecksum }}
        {{- $pgIntoChecksum | nindent 8 }}
        {{- end }}
      {{- with .Values.api.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.api.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.api.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.api.podSecurityContext | nindent 8 }}
      {{- with .Values.api.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.api.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.api.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: api-server
          securityContext:
            {{- toYaml .Values.api.securityContext | nindent 12 }}
          image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command:
            - "/bin/sh"
            - "-c"
            - |
              alembic upgrade head &&
              echo "Starting Onyx Api Server" &&
              uvicorn onyx.main:app --host {{ .Values.global.host }} --port {{ .Values.api.containerPorts.server }}
          ports:
            - name: api-server-port
              containerPort: {{ .Values.api.containerPorts.server }}
              protocol: TCP
          resources:
            {{- toYaml .Values.api.resources | nindent 12 }}
          {{- with .Values.api.startupProbe }}
          startupProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.api.readinessProbe }}
          readinessProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.api.livenessProbe }}
          livenessProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- $apiVolumeMounts := include "onyx.renderVolumeMounts" (dict "ctx" . "volumeMounts" .Values.api.volumeMounts) }}
          {{- if $apiVolumeMounts }}
          {{- $apiVolumeMounts | nindent 10 }}
          {{- end }}
      {{- $apiVolumes := include "onyx.renderVolumes" (dict "ctx" . "volumes" .Values.api.volumes) }}
      {{- if $apiVolumes }}
      {{- $apiVolumes | nindent 6 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/api-hpa.yaml
================================================
{{- if and (.Values.api.autoscaling.enabled) (ne (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-api
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}
  minReplicas: {{ .Values.api.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.api.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.api.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.api.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.api.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.api.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/api-scaledobject.yaml
================================================
{{- if and (.Values.api.autoscaling.enabled) (eq (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: {{ include "onyx.fullname" . }}-api
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-api-server
  minReplicaCount: {{ .Values.api.autoscaling.minReplicas | default 1 }}
  maxReplicaCount: {{ .Values.api.autoscaling.maxReplicas | default 10 }}
  pollingInterval: {{ .Values.api.autoscaling.pollingInterval | default 30 }}
  cooldownPeriod: {{ .Values.api.autoscaling.cooldownPeriod | default 300 }}
  {{- if hasKey .Values.api.autoscaling "idleReplicaCount" }}
  idleReplicaCount: {{ .Values.api.autoscaling.idleReplicaCount }}
  {{- end }}
  {{- if .Values.api.autoscaling.customTriggers }}
  fallback:
    failureThreshold: {{ .Values.api.autoscaling.failureThreshold | default 3 }}
    replicas: {{ .Values.api.autoscaling.fallbackReplicas | default 1 }}
  {{- end }}
  triggers:
    {{- if .Values.api.autoscaling.targetCPUUtilizationPercentage }}
    - type: cpu
      metricType: Utilization
      metadata:
        value: "{{ .Values.api.autoscaling.targetCPUUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.api.autoscaling.targetMemoryUtilizationPercentage }}
    - type: memory
      metricType: Utilization
      metadata:
        value: "{{ .Values.api.autoscaling.targetMemoryUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.api.autoscaling.customTriggers }}
    {{- toYaml .Values.api.autoscaling.customTriggers | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/api-service.yaml
================================================
apiVersion: v1
kind: Service
metadata:
  # INTERNAL_URL env variable depends on this, don't change without changing INTERNAL_URL
  name: {{ include "onyx.fullname" . }}-api-service
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- if .Values.api.deploymentLabels }}
    {{- toYaml .Values.api.deploymentLabels | nindent 4 }}
    {{- end }}
spec:
  type: {{ .Values.api.service.type }}
  ports:
    - port: {{ .Values.api.service.servicePort }}
      targetPort: {{ .Values.api.service.targetPort }}
      protocol: TCP
      name: {{ .Values.api.service.portName }}
  selector:
    {{- include "onyx.selectorLabels" . | nindent 4 }}
    {{- if .Values.api.deploymentLabels }}
    {{- toYaml .Values.api.deploymentLabels | nindent 4 }}
    {{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/auth-secrets.yaml
================================================
{{- if hasKey .Values.auth "secretKeys" }}
{{- fail "ERROR: Secrets handling has been refactored under 'auth' and must be updated before upgrading to this chart version." }}
{{- end }}
{{- range $secretKey, $secretContent := .Values.auth }}
{{- if and (empty $secretContent.existingSecret) (or (not (hasKey $secretContent "enabled")) $secretContent.enabled) }}
{{- $secretName := include "onyx.secretName" $secretContent }}
{{- $existingSecret := lookup "v1" "Secret" $.Release.Namespace $secretName }}
{{- /* Pre-validate: fail before emitting YAML if any required value is missing */ -}}
{{- range $name, $value := $secretContent.values }}
{{- if and (empty $value) (not (and $existingSecret (hasKey $existingSecret.data $name))) }}
{{- fail (printf "Secret value for '%s' is required but not set and no existing secret found. Please set auth.%s.values.%s in values.yaml" $name $secretKey $name) }}
{{- end }}
{{- end }}
---
apiVersion: v1
kind: Secret
metadata:
  name: {{ $secretName }}
type: Opaque
stringData:
{{- range $name, $value := $secretContent.values }}
{{- if not (empty $value) }}
  {{ $name }}: {{ $value | quote }}
{{- else if and $existingSecret (hasKey $existingSecret.data $name) }}
  {{ $name }}: {{ index $existingSecret.data $name | b64dec | quote }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-beat.yaml
================================================
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_beat.replicaCount) 0) }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-celery-beat
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.celery_beat.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  replicas: {{ .Values.celery_beat.replicaCount }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.celery_beat.deploymentLabels }}
      {{- toYaml .Values.celery_beat.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.celery_beat.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.celery_beat.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.celery_beat.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}
      {{- with .Values.celery_beat.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_beat.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_beat.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: celery-beat
          securityContext:
            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}
          image: "{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command:
            [
              "celery",
              "-A",
              "onyx.background.celery.versioned_apps.beat",
              "beat",
              {{ printf "--loglevel=%s" .Values.celery_beat.logLevel | quote }},
            ]
          resources:
            {{- toYaml .Values.celery_beat.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.celery_beat.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          startupProbe:
            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}
          readinessProbe:
            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe readiness
                    --filename /tmp/onyx_k8s_beat_readiness.txt
          livenessProbe:
            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe liveness
                    --filename /tmp/onyx_k8s_beat_liveness.txt
      {{- with .Values.celery_beat.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-docfetching-hpa.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_docfetching.autoscaling.enabled) (ne (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docfetching
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-docfetching
  minReplicas: {{ .Values.celery_worker_docfetching.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.celery_worker_docfetching.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.celery_worker_docfetching.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_docfetching.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.celery_worker_docfetching.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_docfetching.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-docfetching-metrics-service.yaml
================================================
{{- /* Metrics port must match the default in metrics_server.py (_DEFAULT_PORTS).
       Do NOT use PROMETHEUS_METRICS_PORT env var in Helm — each worker needs its own port. */ -}}
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_docfetching.replicaCount) 0) }}
apiVersion: v1
kind: Service
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docfetching-metrics
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- if .Values.celery_worker_docfetching.deploymentLabels }}
    {{- toYaml .Values.celery_worker_docfetching.deploymentLabels | nindent 4 }}
    {{- end }}
    metrics: "true"
spec:
  type: ClusterIP
  ports:
    - port: 9092
      targetPort: metrics
      protocol: TCP
      name: metrics
  selector:
    {{- include "onyx.selectorLabels" . | nindent 4 }}
    {{- if .Values.celery_worker_docfetching.deploymentLabels }}
    {{- toYaml .Values.celery_worker_docfetching.deploymentLabels | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-docfetching-scaledobject.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_docfetching.autoscaling.enabled) (eq (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docfetching
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-docfetching
  minReplicaCount: {{ .Values.celery_worker_docfetching.autoscaling.minReplicas | default 1 }}
  maxReplicaCount: {{ .Values.celery_worker_docfetching.autoscaling.maxReplicas | default 20 }}
  pollingInterval: {{ .Values.celery_worker_docfetching.autoscaling.pollingInterval | default 30 }}
  cooldownPeriod: {{ .Values.celery_worker_docfetching.autoscaling.cooldownPeriod | default 300 }}
  {{- if hasKey .Values.celery_worker_docfetching.autoscaling "idleReplicaCount" }}
  idleReplicaCount: {{ .Values.celery_worker_docfetching.autoscaling.idleReplicaCount }}
  {{- end }}
  {{- if .Values.celery_worker_docfetching.autoscaling.customTriggers }}
  fallback:
    failureThreshold: {{ .Values.celery_worker_docfetching.autoscaling.failureThreshold | default 3 }}
    replicas: {{ .Values.celery_worker_docfetching.autoscaling.fallbackReplicas | default 1 }}
  {{- end }}
  triggers:
    {{- if .Values.celery_worker_docfetching.autoscaling.targetCPUUtilizationPercentage }}
    - type: cpu
      metadata:
        type: Utilization
        value: "{{ .Values.celery_worker_docfetching.autoscaling.targetCPUUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_docfetching.autoscaling.targetMemoryUtilizationPercentage }}
    - type: memory
      metadata:
        type: Utilization
        value: "{{ .Values.celery_worker_docfetching.autoscaling.targetMemoryUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_docfetching.autoscaling.customTriggers }}
    {{- toYaml .Values.celery_worker_docfetching.autoscaling.customTriggers | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-docfetching.yaml
================================================
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_docfetching.replicaCount) 0) }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docfetching
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.celery_worker_docfetching.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  {{- if not .Values.celery_worker_docfetching.autoscaling.enabled }}
  replicas: {{ .Values.celery_worker_docfetching.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.celery_worker_docfetching.deploymentLabels }}
      {{- toYaml .Values.celery_worker_docfetching.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.celery_worker_docfetching.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.celery_worker_docfetching.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.celery_worker_docfetching.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}
      {{- with .Values.celery_worker_docfetching.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_docfetching.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_docfetching.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: celery-worker-docfetching
          securityContext:
            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}
          image: "{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command:
            [
              "celery",
              "-A",
              "onyx.background.celery.versioned_apps.docfetching",
              "worker",
              "--pool=threads",
              "--concurrency=2",
              "--prefetch-multiplier=1",
              {{ printf "--loglevel=%s" .Values.celery_worker_docfetching.logLevel | quote }},
              "--hostname=docfetching@%n",
              "-Q",
              "connector_doc_fetching",
            ]
          ports:
            - name: metrics
              containerPort: 9092
              protocol: TCP
          resources:
            {{- toYaml .Values.celery_worker_docfetching.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.celery_worker_docfetching.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          startupProbe:
            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}
          readinessProbe:
            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe readiness
                    --filename /tmp/onyx_k8s_docfetching_readiness.txt
          livenessProbe:
            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe liveness
                    --filename /tmp/onyx_k8s_docfetching_liveness.txt
      {{- with .Values.celery_worker_docfetching.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-docprocessing-hpa.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_docprocessing.autoscaling.enabled) (ne (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docprocessing
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-docprocessing
  minReplicas: {{ .Values.celery_worker_docprocessing.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.celery_worker_docprocessing.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.celery_worker_docprocessing.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_docprocessing.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.celery_worker_docprocessing.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_docprocessing.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-docprocessing-metrics-service.yaml
================================================
{{- /* Metrics port must match the default in metrics_server.py (_DEFAULT_PORTS).
       Do NOT use PROMETHEUS_METRICS_PORT env var in Helm — each worker needs its own port. */ -}}
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_docprocessing.replicaCount) 0) }}
apiVersion: v1
kind: Service
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docprocessing-metrics
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- if .Values.celery_worker_docprocessing.deploymentLabels }}
    {{- toYaml .Values.celery_worker_docprocessing.deploymentLabels | nindent 4 }}
    {{- end }}
    metrics: "true"
spec:
  type: ClusterIP
  ports:
    - port: 9093
      targetPort: metrics
      protocol: TCP
      name: metrics
  selector:
    {{- include "onyx.selectorLabels" . | nindent 4 }}
    {{- if .Values.celery_worker_docprocessing.deploymentLabels }}
    {{- toYaml .Values.celery_worker_docprocessing.deploymentLabels | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-docprocessing-scaledobject.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_docprocessing.autoscaling.enabled) (eq (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docprocessing
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-docprocessing
  minReplicaCount: {{ .Values.celery_worker_docprocessing.autoscaling.minReplicas | default 1 }}
  maxReplicaCount: {{ .Values.celery_worker_docprocessing.autoscaling.maxReplicas | default 10 }}
  pollingInterval: {{ .Values.celery_worker_docprocessing.autoscaling.pollingInterval | default 30 }}
  cooldownPeriod: {{ .Values.celery_worker_docprocessing.autoscaling.cooldownPeriod | default 300 }}
  {{- if hasKey .Values.celery_worker_docprocessing.autoscaling "idleReplicaCount" }}
  idleReplicaCount: {{ .Values.celery_worker_docprocessing.autoscaling.idleReplicaCount }}
  {{- end }}
  {{- if .Values.celery_worker_docprocessing.autoscaling.customTriggers }}
  fallback:
    failureThreshold: {{ .Values.celery_worker_docprocessing.autoscaling.failureThreshold | default 3 }}
    replicas: {{ .Values.celery_worker_docprocessing.autoscaling.fallbackReplicas | default 1 }}
  {{- end }}
  triggers:
    {{- if .Values.celery_worker_docprocessing.autoscaling.targetCPUUtilizationPercentage }}
    - type: cpu
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_docprocessing.autoscaling.targetCPUUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_docprocessing.autoscaling.targetMemoryUtilizationPercentage }}
    - type: memory
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_docprocessing.autoscaling.targetMemoryUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_docprocessing.autoscaling.customTriggers }}
    {{- toYaml .Values.celery_worker_docprocessing.autoscaling.customTriggers | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-docprocessing.yaml
================================================
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_docprocessing.replicaCount) 0) }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docprocessing
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.celery_worker_docprocessing.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  {{- if not .Values.celery_worker_docprocessing.autoscaling.enabled }}
  replicas: {{ .Values.celery_worker_docprocessing.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.celery_worker_docprocessing.deploymentLabels }}
      {{- toYaml .Values.celery_worker_docprocessing.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.celery_worker_docprocessing.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.celery_worker_docprocessing.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.celery_worker_docprocessing.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}
      {{- with .Values.celery_worker_docprocessing.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_docprocessing.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_docprocessing.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: celery-worker-docprocessing
          securityContext:
            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}
          image: "{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command:
            [
              "celery",
              "-A",
              "onyx.background.celery.versioned_apps.docprocessing",
              "worker",
              "--pool=threads",
              "--concurrency=6",
              "--prefetch-multiplier=1",
              {{ printf "--loglevel=%s" .Values.celery_worker_docprocessing.logLevel | quote }},
              "--hostname=docprocessing@%n",
              "-Q",
              "docprocessing",
            ]
          ports:
            - name: metrics
              containerPort: 9093
              protocol: TCP
          resources:
            {{- toYaml .Values.celery_worker_docprocessing.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            - name: ENABLE_MULTIPASS_INDEXING
              value: "{{ .Values.celery_worker_docprocessing.enableMiniChunk }}"
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.celery_worker_docprocessing.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          startupProbe:
            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}
          readinessProbe:
            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe readiness
                    --filename /tmp/onyx_k8s_docprocessing_readiness.txt
          livenessProbe:
            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe liveness
                    --filename /tmp/onyx_k8s_docprocessing_liveness.txt
      {{- with .Values.celery_worker_docprocessing.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-heavy-hpa.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_heavy.autoscaling.enabled) (ne (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-heavy
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-heavy
  minReplicas: {{ .Values.celery_worker_heavy.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.celery_worker_heavy.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.celery_worker_heavy.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_heavy.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.celery_worker_heavy.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_heavy.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-heavy-scaledobject.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_heavy.autoscaling.enabled) (eq (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-heavy
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-heavy
  minReplicaCount: {{ .Values.celery_worker_heavy.autoscaling.minReplicas | default 1 }}
  maxReplicaCount: {{ .Values.celery_worker_heavy.autoscaling.maxReplicas | default 10 }}
  pollingInterval: {{ .Values.celery_worker_heavy.autoscaling.pollingInterval | default 30 }}
  cooldownPeriod: {{ .Values.celery_worker_heavy.autoscaling.cooldownPeriod | default 300 }}
  {{- if hasKey .Values.celery_worker_heavy.autoscaling "idleReplicaCount" }}
  idleReplicaCount: {{ .Values.celery_worker_heavy.autoscaling.idleReplicaCount }}
  {{- end }}
  {{- if .Values.celery_worker_heavy.autoscaling.customTriggers }}
  fallback:
    failureThreshold: {{ .Values.celery_worker_heavy.autoscaling.failureThreshold | default 3 }}
    replicas: {{ .Values.celery_worker_heavy.autoscaling.fallbackReplicas | default 1 }}
  {{- end }}
  triggers:
    {{- if .Values.celery_worker_heavy.autoscaling.targetCPUUtilizationPercentage }}
    - type: cpu
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_heavy.autoscaling.targetCPUUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_heavy.autoscaling.targetMemoryUtilizationPercentage }}
    - type: memory
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_heavy.autoscaling.targetMemoryUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_heavy.autoscaling.customTriggers }}
    {{- toYaml .Values.celery_worker_heavy.autoscaling.customTriggers | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-heavy.yaml
================================================
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_heavy.replicaCount) 0) }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-heavy
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.celery_worker_heavy.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  {{- if not .Values.celery_worker_heavy.autoscaling.enabled }}
  replicas: {{ .Values.celery_worker_heavy.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.celery_worker_heavy.deploymentLabels }}
      {{- toYaml .Values.celery_worker_heavy.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.celery_worker_heavy.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.celery_worker_heavy.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.celery_worker_heavy.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}
      {{- with .Values.celery_worker_heavy.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_heavy.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_heavy.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: celery-worker-heavy
          securityContext:
            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}
          image: "{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command:
            [
              "celery",
              "-A",
              "onyx.background.celery.versioned_apps.heavy",
              "worker",
              {{ printf "--loglevel=%s" .Values.celery_worker_heavy.logLevel | quote }},
              "--hostname=heavy@%n",
              "-Q",
              "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,sandbox",
            ]
          resources:
            {{- toYaml .Values.celery_worker_heavy.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.celery_worker_heavy.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          startupProbe:
            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}
          readinessProbe:
            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe readiness
                    --filename /tmp/onyx_k8s_heavy_readiness.txt
          livenessProbe:
            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe liveness
                    --filename /tmp/onyx_k8s_heavy_liveness.txt
      {{- with .Values.celery_worker_heavy.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-light-hpa.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_light.autoscaling.enabled) (ne (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-light
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-light
  minReplicas: {{ .Values.celery_worker_light.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.celery_worker_light.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.celery_worker_light.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_light.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.celery_worker_light.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_light.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-light-scaledobject.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_light.autoscaling.enabled) (eq (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-light
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-light
  minReplicaCount: {{ .Values.celery_worker_light.autoscaling.minReplicas | default 1 }}
  maxReplicaCount: {{ .Values.celery_worker_light.autoscaling.maxReplicas | default 10 }}
  pollingInterval: {{ .Values.celery_worker_light.autoscaling.pollingInterval | default 30 }}
  cooldownPeriod: {{ .Values.celery_worker_light.autoscaling.cooldownPeriod | default 300 }}
  {{- if hasKey .Values.celery_worker_light.autoscaling "idleReplicaCount" }}
  idleReplicaCount: {{ .Values.celery_worker_light.autoscaling.idleReplicaCount }}
  {{- end }}
  {{- if .Values.celery_worker_light.autoscaling.customTriggers }}
  fallback:
    failureThreshold: {{ .Values.celery_worker_light.autoscaling.failureThreshold | default 3 }}
    replicas: {{ .Values.celery_worker_light.autoscaling.fallbackReplicas | default 1 }}
  {{- end }}
  triggers:
    {{- if .Values.celery_worker_light.autoscaling.targetCPUUtilizationPercentage }}
    - type: cpu
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_light.autoscaling.targetCPUUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_light.autoscaling.targetMemoryUtilizationPercentage }}
    - type: memory
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_light.autoscaling.targetMemoryUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_light.autoscaling.customTriggers }}
    {{- toYaml .Values.celery_worker_light.autoscaling.customTriggers | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-light.yaml
================================================
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_light.replicaCount) 0) }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-light
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.celery_worker_light.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  {{- if not .Values.celery_worker_light.autoscaling.enabled }}
  replicas: {{ .Values.celery_worker_light.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.celery_worker_light.deploymentLabels }}
      {{- toYaml .Values.celery_worker_light.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.celery_worker_light.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.celery_worker_light.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.celery_worker_light.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}
      {{- with .Values.celery_worker_light.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_light.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_light.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: celery-worker-light
          securityContext:
            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}
          image: "{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command:
            [
              "celery",
              "-A",
              "onyx.background.celery.versioned_apps.light",
              "worker",
              {{ printf "--loglevel=%s" .Values.celery_worker_light.logLevel | quote }},
              "--hostname=light@%n",
              "-Q",
              "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,opensearch_migration",
            ]
          resources:
            {{- toYaml .Values.celery_worker_light.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.celery_worker_light.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          startupProbe:
            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}
          readinessProbe:
            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe readiness
                    --filename /tmp/onyx_k8s_light_readiness.txt
          livenessProbe:
            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe liveness
                    --filename /tmp/onyx_k8s_light_liveness.txt
      {{- with .Values.celery_worker_light.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-monitoring-hpa.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_monitoring.autoscaling.enabled) (ne (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-monitoring
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-monitoring
  minReplicas: {{ .Values.celery_worker_monitoring.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.celery_worker_monitoring.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.celery_worker_monitoring.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_monitoring.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.celery_worker_monitoring.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_monitoring.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-monitoring-metrics-service.yaml
================================================
{{- /* Metrics port must match the default in metrics_server.py (_DEFAULT_PORTS).
       Do NOT use PROMETHEUS_METRICS_PORT env var in Helm — each worker needs its own port. */ -}}
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_monitoring.replicaCount) 0) }}
apiVersion: v1
kind: Service
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-monitoring-metrics
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- if .Values.celery_worker_monitoring.deploymentLabels }}
    {{- toYaml .Values.celery_worker_monitoring.deploymentLabels | nindent 4 }}
    {{- end }}
    metrics: "true"
spec:
  type: ClusterIP
  ports:
    - port: 9096
      targetPort: metrics
      protocol: TCP
      name: metrics
  selector:
    {{- include "onyx.selectorLabels" . | nindent 4 }}
    {{- if .Values.celery_worker_monitoring.deploymentLabels }}
    {{- toYaml .Values.celery_worker_monitoring.deploymentLabels | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-monitoring-scaledobject.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_monitoring.autoscaling.enabled) (eq (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-monitoring
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-monitoring
  minReplicaCount: {{ .Values.celery_worker_monitoring.autoscaling.minReplicas | default 1 }}
  maxReplicaCount: {{ .Values.celery_worker_monitoring.autoscaling.maxReplicas | default 10 }}
  pollingInterval: {{ .Values.celery_worker_monitoring.autoscaling.pollingInterval | default 30 }}
  cooldownPeriod: {{ .Values.celery_worker_monitoring.autoscaling.cooldownPeriod | default 300 }}
  {{- if hasKey .Values.celery_worker_monitoring.autoscaling "idleReplicaCount" }}
  idleReplicaCount: {{ .Values.celery_worker_monitoring.autoscaling.idleReplicaCount }}
  {{- end }}
  {{- if .Values.celery_worker_monitoring.autoscaling.customTriggers }}
  fallback:
    failureThreshold: {{ .Values.celery_worker_monitoring.autoscaling.failureThreshold | default 3 }}
    replicas: {{ .Values.celery_worker_monitoring.autoscaling.fallbackReplicas | default 1 }}
  {{- end }}
  triggers:
    {{- if .Values.celery_worker_monitoring.autoscaling.targetCPUUtilizationPercentage }}
    - type: cpu
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_monitoring.autoscaling.targetCPUUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_monitoring.autoscaling.targetMemoryUtilizationPercentage }}
    - type: memory
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_monitoring.autoscaling.targetMemoryUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_monitoring.autoscaling.customTriggers }}
    {{- toYaml .Values.celery_worker_monitoring.autoscaling.customTriggers | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-monitoring.yaml
================================================
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_monitoring.replicaCount) 0) }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-monitoring
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.celery_worker_monitoring.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  {{- if not .Values.celery_worker_monitoring.autoscaling.enabled }}
  replicas: {{ .Values.celery_worker_monitoring.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.celery_worker_monitoring.deploymentLabels }}
      {{- toYaml .Values.celery_worker_monitoring.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.celery_worker_monitoring.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.celery_worker_monitoring.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.celery_worker_monitoring.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}
      {{- with .Values.celery_worker_monitoring.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_monitoring.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_monitoring.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: celery-worker-monitoring
          securityContext:
            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}
          image: "{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command:
            [
              "celery",
              "-A",
              "onyx.background.celery.versioned_apps.monitoring",
              "worker",
              {{ printf "--loglevel=%s" .Values.celery_worker_monitoring.logLevel | quote }},
              "--hostname=monitoring@%n",
              "-Q",
              "monitoring",
            ]
          ports:
            - name: metrics
              containerPort: 9096
              protocol: TCP
          resources:
            {{- toYaml .Values.celery_worker_monitoring.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.celery_worker_monitoring.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          startupProbe:
            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}
          readinessProbe:
            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe readiness
                    --filename /tmp/onyx_k8s_monitoring_readiness.txt
          livenessProbe:
            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe liveness
                    --filename /tmp/onyx_k8s_monitoring_liveness.txt
      {{- with .Values.celery_worker_monitoring.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-primary-hpa.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_primary.autoscaling.enabled) (ne (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-primary
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-primary
  minReplicas: {{ .Values.celery_worker_primary.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.celery_worker_primary.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.celery_worker_primary.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_primary.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.celery_worker_primary.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_primary.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-primary-scaledobject.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_primary.autoscaling.enabled) (eq (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-primary
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-primary
  minReplicaCount: {{ .Values.celery_worker_primary.autoscaling.minReplicas | default 1 }}
  maxReplicaCount: {{ .Values.celery_worker_primary.autoscaling.maxReplicas | default 10 }}
  pollingInterval: {{ .Values.celery_worker_primary.autoscaling.pollingInterval | default 30 }}
  cooldownPeriod: {{ .Values.celery_worker_primary.autoscaling.cooldownPeriod | default 300 }}
  {{- if hasKey .Values.celery_worker_primary.autoscaling "idleReplicaCount" }}
  idleReplicaCount: {{ .Values.celery_worker_primary.autoscaling.idleReplicaCount }}
  {{- end }}
  {{- if .Values.celery_worker_primary.autoscaling.customTriggers }}
  fallback:
    failureThreshold: {{ .Values.celery_worker_primary.autoscaling.failureThreshold | default 3 }}
    replicas: {{ .Values.celery_worker_primary.autoscaling.fallbackReplicas | default 1 }}
  {{- end }}
  triggers:
    {{- if .Values.celery_worker_primary.autoscaling.targetCPUUtilizationPercentage }}
    - type: cpu
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_primary.autoscaling.targetCPUUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_primary.autoscaling.targetMemoryUtilizationPercentage }}
    - type: memory
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_primary.autoscaling.targetMemoryUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_primary.autoscaling.customTriggers }}
    {{- toYaml .Values.celery_worker_primary.autoscaling.customTriggers | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-primary.yaml
================================================
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_primary.replicaCount) 0) }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-primary
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.celery_worker_primary.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  {{- if not .Values.celery_worker_primary.autoscaling.enabled }}
  replicas: {{ .Values.celery_worker_primary.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.celery_worker_primary.deploymentLabels }}
      {{- toYaml .Values.celery_worker_primary.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.celery_worker_primary.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.celery_worker_primary.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.celery_worker_primary.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}
      {{- with .Values.celery_worker_primary.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_primary.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_primary.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: celery-worker-primary
          securityContext:
            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}
          image: "{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command:
            [
              "celery",
              "-A",
              "onyx.background.celery.versioned_apps.primary",
              "worker",
              {{ printf "--loglevel=%s" .Values.celery_worker_primary.logLevel | quote }},
              "--hostname=primary@%n",
              "-Q",
              "celery,periodic_tasks",
            ]
          resources:
            {{- toYaml .Values.celery_worker_primary.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.celery_worker_primary.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          startupProbe:
            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}
          readinessProbe:
            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe readiness
                    --filename /tmp/onyx_k8s_primary_readiness.txt
          livenessProbe:
            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe liveness
                    --filename /tmp/onyx_k8s_primary_liveness.txt
      {{- with .Values.celery_worker_primary.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-servicemonitors.yaml
================================================
{{- if and .Values.monitoring.serviceMonitors.enabled .Values.vectorDB.enabled }}
{{- if gt (int .Values.celery_worker_monitoring.replicaCount) 0 }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-monitoring
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.monitoring.serviceMonitors.labels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  namespaceSelector:
    matchNames:
      - {{ .Release.Namespace }}
  selector:
    matchLabels:
      app: {{ .Values.celery_worker_monitoring.deploymentLabels.app }}
      metrics: "true"
  endpoints:
    - port: metrics
      path: /metrics
      interval: 30s
      scrapeTimeout: 10s
{{- end }}
{{- if gt (int .Values.celery_worker_docfetching.replicaCount) 0 }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docfetching
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.monitoring.serviceMonitors.labels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  namespaceSelector:
    matchNames:
      - {{ .Release.Namespace }}
  selector:
    matchLabels:
      app: {{ .Values.celery_worker_docfetching.deploymentLabels.app }}
      metrics: "true"
  endpoints:
    - port: metrics
      path: /metrics
      interval: 30s
      scrapeTimeout: 10s
{{- end }}
{{- if gt (int .Values.celery_worker_docprocessing.replicaCount) 0 }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-docprocessing
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.monitoring.serviceMonitors.labels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  namespaceSelector:
    matchNames:
      - {{ .Release.Namespace }}
  selector:
    matchLabels:
      app: {{ .Values.celery_worker_docprocessing.deploymentLabels.app }}
      metrics: "true"
  endpoints:
    - port: metrics
      path: /metrics
      interval: 30s
      scrapeTimeout: 10s
{{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-user-file-processing-hpa.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_user_file_processing.autoscaling.enabled) (ne (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-user-file-processing
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-user-file-processing
  minReplicas: {{ .Values.celery_worker_user_file_processing.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.celery_worker_user_file_processing.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.celery_worker_user_file_processing.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_user_file_processing.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.celery_worker_user_file_processing.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.celery_worker_user_file_processing.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-user-file-processing-scaledobject.yaml
================================================
{{- if and .Values.vectorDB.enabled (.Values.celery_worker_user_file_processing.autoscaling.enabled) (eq (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-user-file-processing
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-celery-worker-user-file-processing
  minReplicaCount: {{ .Values.celery_worker_user_file_processing.autoscaling.minReplicas | default 1 }}
  maxReplicaCount: {{ .Values.celery_worker_user_file_processing.autoscaling.maxReplicas | default 10 }}
  pollingInterval: {{ .Values.celery_worker_user_file_processing.autoscaling.pollingInterval | default 30 }}
  cooldownPeriod: {{ .Values.celery_worker_user_file_processing.autoscaling.cooldownPeriod | default 300 }}
  {{- if hasKey .Values.celery_worker_user_file_processing.autoscaling "idleReplicaCount" }}
  idleReplicaCount: {{ .Values.celery_worker_user_file_processing.autoscaling.idleReplicaCount }}
  {{- end }}
  {{- if .Values.celery_worker_user_file_processing.autoscaling.customTriggers }}
  fallback:
    failureThreshold: {{ .Values.celery_worker_user_file_processing.autoscaling.failureThreshold | default 3 }}
    replicas: {{ .Values.celery_worker_user_file_processing.autoscaling.fallbackReplicas | default 1 }}
  {{- end }}
  triggers:
    {{- if .Values.celery_worker_user_file_processing.autoscaling.targetCPUUtilizationPercentage }}
    - type: cpu
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_user_file_processing.autoscaling.targetCPUUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_user_file_processing.autoscaling.targetMemoryUtilizationPercentage }}
    - type: memory
      metricType: Utilization
      metadata:
        value: "{{ .Values.celery_worker_user_file_processing.autoscaling.targetMemoryUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.celery_worker_user_file_processing.autoscaling.customTriggers }}
    {{- toYaml .Values.celery_worker_user_file_processing.autoscaling.customTriggers | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/celery-worker-user-file-processing.yaml
================================================
{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_user_file_processing.replicaCount) 0) }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-celery-worker-user-file-processing
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.celery_worker_user_file_processing.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  {{- if not .Values.celery_worker_user_file_processing.autoscaling.enabled }}
  replicas: {{ .Values.celery_worker_user_file_processing.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.celery_worker_user_file_processing.deploymentLabels }}
      {{- toYaml .Values.celery_worker_user_file_processing.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.celery_worker_user_file_processing.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.celery_worker_user_file_processing.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.celery_worker_user_file_processing.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}
      {{- with .Values.celery_worker_user_file_processing.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_user_file_processing.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.celery_worker_user_file_processing.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: celery-worker-user-file-processing
          securityContext:
            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}
          image: "{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command:
            [
              "celery",
              "-A",
              "onyx.background.celery.versioned_apps.user_file_processing",
              "worker",
              {{ printf "--loglevel=%s" .Values.celery_worker_user_file_processing.logLevel | quote }},
              "--hostname=user-file-processing@%n",
              "-Q",
              "user_file_processing,user_file_project_sync,user_file_delete",
            ]
          resources:
            {{- toYaml .Values.celery_worker_user_file_processing.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.celery_worker_user_file_processing.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          startupProbe:
            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}
          readinessProbe:
            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe readiness
                    --filename /tmp/onyx_k8s_userfileprocessing_readiness.txt
          livenessProbe:
            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}
            exec:
              command:
                - /bin/bash
                - -c
                - >
                    python onyx/background/celery/celery_k8s_probe.py
                    --probe liveness
                    --filename /tmp/onyx_k8s_userfileprocessing_liveness.txt
      {{- with .Values.celery_worker_user_file_processing.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/configmap.yaml
================================================
apiVersion: v1
kind: ConfigMap
metadata:
  name: {{ .Values.config.envConfigMapName }}
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
data:
  INTERNAL_URL: "http://{{ include "onyx.fullname" . }}-api-service:{{ .Values.api.service.servicePort | default 8080 }}"
  {{- if .Values.postgresql.enabled }}
  POSTGRES_HOST: {{ .Release.Name }}-{{ default "postgresql" .Values.postgresql.nameOverride }}-rw
  {{- end }}
  {{- if .Values.vespa.enabled }}
  VESPA_HOST: {{ .Values.vespa.name }}.{{ .Values.vespa.service.name }}.{{ .Release.Namespace }}.svc.cluster.local
  {{- end }}
  {{- if .Values.opensearch.enabled }}
  OPENSEARCH_HOST: {{ .Values.opensearch.clusterName }}-{{ .Values.opensearch.nodeGroup }}.{{ .Release.Namespace }}.svc.cluster.local
  OPENSEARCH_REST_API_PORT: "9200"
  ENABLE_OPENSEARCH_INDEXING_FOR_ONYX: "true"
  ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX: "false"
  {{- end }}
  {{- if .Values.redis.enabled }}
  REDIS_HOST: {{ .Values.redis.redisStandalone.name | default .Release.Name }}-master
  {{- end }}
  MODEL_SERVER_HOST: "{{ include "onyx.fullname" . }}-inference-model-service"
  {{- if .Values.vectorDB.enabled }}
  INDEXING_MODEL_SERVER_HOST: "{{ include "onyx.fullname" . }}-indexing-model-service"
  DISABLE_VECTOR_DB: "false"
  {{- else }}
  DISABLE_VECTOR_DB: "true"
  {{- end }}
{{- range $key, $value := .Values.configMap }}
{{- if not (empty $value) }}
  {{ $key }}: "{{ $value }}"
{{- end }}
{{- end }}
  {{- if .Values.minio.enabled }}
  S3_ENDPOINT_URL: "http://{{ .Release.Name }}-minio:{{ default 9000 .Values.minio.service.port }}"
  {{- end }}
  {{- if .Values.codeInterpreter.enabled }}
  CODE_INTERPRETER_BASE_URL: "http://{{ .Release.Name }}-code-interpreter:{{ .Values.codeInterpreter.service.port }}"
  {{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/discordbot.yaml
================================================
{{- if .Values.discordbot.enabled }}
# Discord bot MUST run as a single replica - Discord only allows one client connection per bot token.
# Do NOT enable HPA or increase replicas. Message processing is offloaded to scalable API pods via HTTP.
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-discordbot
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.discordbot.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  # CRITICAL: Discord bots cannot be horizontally scaled - only one WebSocket connection per token is allowed
  replicas: 1
  strategy:
    type: Recreate  # Ensure old pod is terminated before new one starts to avoid duplicate connections
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.discordbot.deploymentLabels }}
      {{- toYaml .Values.discordbot.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.discordbot.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.discordbot.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.discordbot.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.discordbot.podSecurityContext | nindent 8 }}
      {{- with .Values.discordbot.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.discordbot.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.discordbot.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: discordbot
          securityContext:
            {{- toYaml .Values.discordbot.securityContext | nindent 12 }}
          image: "{{ .Values.discordbot.image.repository }}:{{ .Values.discordbot.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command: ["python", "onyx/onyxbot/discord/client.py"]
          resources:
            {{- toYaml .Values.discordbot.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
            # Discord bot token - required for bot to connect
            {{- if .Values.discordbot.botToken }}
            - name: DISCORD_BOT_TOKEN
              value: {{ .Values.discordbot.botToken | quote }}
            {{- end }}
            {{- if .Values.discordbot.botTokenSecretName }}
            - name: DISCORD_BOT_TOKEN
              valueFrom:
                secretKeyRef:
                  name: {{ .Values.discordbot.botTokenSecretName }}
                  key: {{ .Values.discordbot.botTokenSecretKey | default "token" }}
            {{- end }}
            # Command prefix for bot commands (default: "!")
            {{- if .Values.discordbot.invokeChar }}
            - name: DISCORD_BOT_INVOKE_CHAR
              value: {{ .Values.discordbot.invokeChar | quote }}
            {{- end }}
          {{- with .Values.discordbot.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      {{- with .Values.discordbot.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/grafana-dashboards.yaml
================================================
{{- if .Values.monitoring.grafana.dashboards.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: {{ include "onyx.fullname" . }}-indexing-pipeline-dashboard
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    grafana_dashboard: "1"
  annotations:
    grafana_folder: "Onyx"
data:
  onyx-indexing-pipeline.json: |
    {{- .Files.Get "dashboards/indexing-pipeline.json" | nindent 4 }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/indexing-model-deployment.yaml
================================================
{{- if and .Values.vectorDB.enabled (gt (int .Values.indexCapability.replicaCount) 0) }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-indexing-model
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.indexCapability.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  replicas: {{ .Values.indexCapability.replicaCount }}
  {{- with .Values.indexCapability.strategy }}
  strategy:
    {{- toYaml . | nindent 4 }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.indexCapability.deploymentLabels }}
      {{- toYaml .Values.indexCapability.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.indexCapability.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.indexCapability.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.indexCapability.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- if .Values.indexCapability.podSecurityContext }}
      securityContext:
        {{- toYaml .Values.indexCapability.podSecurityContext | nindent 8 }}
      {{- end }}
      {{- with .Values.indexCapability.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.indexCapability.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.indexCapability.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
      - name: {{ .Values.indexCapability.name }}
        image: "{{ .Values.indexCapability.image.repository }}:{{ .Values.indexCapability.image.tag | default .Values.global.version }}"
        imagePullPolicy: {{ .Values.global.pullPolicy }}
        command: [ "uvicorn", "model_server.main:app", "--host", "{{ .Values.global.host }}", "--port", "{{ .Values.indexCapability.containerPorts.server }}", "--limit-concurrency", "{{ .Values.indexCapability.limitConcurrency }}" ]
        ports:
        - name: model-server
          containerPort: {{ .Values.indexCapability.containerPorts.server }}
          protocol: TCP
        envFrom:
          - configMapRef:
              name: {{ .Values.config.envConfigMapName }}
        env:
          - name: INDEXING_ONLY
            value: "{{ default "True" .Values.indexCapability.indexingOnly }}"
          {{- include "onyx.envSecrets" . | nindent 10}}
        {{- if .Values.indexCapability.securityContext }}
        securityContext:
          {{- toYaml .Values.indexCapability.securityContext | nindent 10 }}
        {{- end }}
        {{- if .Values.indexCapability.resources }}
        resources:
          {{- toYaml .Values.indexCapability.resources | nindent 10 }}
        {{- end }}
        {{- with .Values.indexCapability.startupProbe }}
        startupProbe:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- with .Values.indexCapability.readinessProbe }}
        readinessProbe:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- with .Values.indexCapability.livenessProbe }}
        livenessProbe:
          {{- toYaml . | nindent 10 }}
        {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/indexing-model-service.yaml
================================================
{{- if .Values.vectorDB.enabled }}
apiVersion: v1
kind: Service
metadata:
  name: {{ include "onyx.fullname" . }}-indexing-model-service
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  selector:
    {{- include "onyx.selectorLabels" . | nindent 4 }}
    {{- if .Values.indexCapability.deploymentLabels }}
    {{- toYaml .Values.indexCapability.deploymentLabels | nindent 4 }}
    {{- end }}
  ports:
    - name: {{ .Values.indexCapability.service.portName }}
      protocol: TCP
      port: {{ .Values.indexCapability.service.servicePort  }}
      targetPort: {{ .Values.indexCapability.service.targetPort }}
  type: {{ .Values.indexCapability.service.type }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/inference-model-deployment.yaml
================================================
{{- if gt (int .Values.inferenceCapability.replicaCount) 0 }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-inference-model
  labels:
    {{- range .Values.inferenceCapability.labels }}
    {{ .key }}: {{ .value }}
    {{- end }}
spec:
  replicas: {{ .Values.inferenceCapability.replicaCount }}
  {{- with .Values.inferenceCapability.strategy }}
  strategy:
    {{- toYaml . | nindent 4 }}
  {{- end }}
  selector:
    matchLabels:
      {{- range .Values.inferenceCapability.labels }}
      {{ .key }}: {{ .value }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      labels:
        {{- range .Values.inferenceCapability.podLabels }}
        {{ .key }}: {{ .value }}
        {{- end }}
    spec:
      {{- if .Values.inferenceCapability.podSecurityContext }}
      securityContext:
        {{- toYaml .Values.inferenceCapability.podSecurityContext | nindent 8 }}
      {{- end }}
      {{- with .Values.inferenceCapability.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.inferenceCapability.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.inferenceCapability.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
      - name: model-server-inference
        image: "{{ .Values.inferenceCapability.image.repository }}:{{ .Values.inferenceCapability.image.tag | default .Values.global.version }}"
        imagePullPolicy: {{ .Values.global.pullPolicy }}
        command: [ "uvicorn", "model_server.main:app", "--host", "{{ .Values.global.host }}", "--port", "{{ .Values.inferenceCapability.containerPorts.server }}" ]
        ports:
        - name: model-server
          containerPort: {{ .Values.inferenceCapability.containerPorts.server }}
          protocol: TCP
        envFrom:
        - configMapRef:
            name: {{ .Values.config.envConfigMapName }}
        env:
          {{- include "onyx.envSecrets" . | nindent 12}}
        {{- if .Values.inferenceCapability.securityContext }}
        securityContext:
          {{- toYaml .Values.inferenceCapability.securityContext | nindent 10 }}
        {{- end }}
        {{- if .Values.inferenceCapability.resources }}
        resources:
          {{- toYaml .Values.inferenceCapability.resources | nindent 10 }}
        {{- end }}
        {{- with .Values.inferenceCapability.startupProbe }}
        startupProbe:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- with .Values.inferenceCapability.readinessProbe }}
        readinessProbe:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- with .Values.inferenceCapability.livenessProbe }}
        livenessProbe:
          {{- toYaml . | nindent 10 }}
        {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/inference-model-service.yaml
================================================
apiVersion: v1
kind: Service
metadata:
  name: {{ include "onyx.fullname" . }}-inference-model-service
spec:
  type: {{ .Values.inferenceCapability.service.type }}
  ports:
    - port: {{ .Values.inferenceCapability.service.servicePort}}
      targetPort: {{ .Values.inferenceCapability.service.targetPort }}
      protocol: TCP
      name: {{ .Values.inferenceCapability.service.portName }}
  selector:
    {{- range .Values.inferenceCapability.labels }}
    {{ .key }}: {{ .value }}
    {{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/ingress-api.yaml
================================================
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: {{ include "onyx.fullname" . }}-ingress-api
  annotations:
    {{- if not .Values.ingress.className }}
    kubernetes.io/ingress.class: nginx
    {{- end }}
    nginx.ingress.kubernetes.io/rewrite-target: /$2
    nginx.ingress.kubernetes.io/use-regex: "true"
    cert-manager.io/cluster-issuer: {{ include "onyx.fullname" . }}-letsencrypt
spec:
  {{- if .Values.ingress.className }}
  ingressClassName: {{ .Values.ingress.className }}
  {{- end }}
  rules:
    - host: {{ .Values.ingress.api.host }}
      http:
        paths:
          - path: /api(/|$)(.*)
            pathType: ImplementationSpecific
            backend:
              service:
                name: {{ include "onyx.fullname" . }}-api-service
                port:
                  number: {{ .Values.api.service.servicePort }}
  tls:
    - hosts:
        - {{ .Values.ingress.api.host }}
      secretName: {{ include "onyx.fullname" . }}-ingress-api-tls
{{- end }}

================================================
FILE: deployment/helm/charts/onyx/templates/ingress-mcp.yaml
================================================
{{- if and .Values.ingress.enabled .Values.mcpServer.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: {{ include "onyx.fullname" . }}-ingress-mcp
  annotations:
    {{- if not .Values.ingress.className }}
    kubernetes.io/ingress.class: nginx
    {{- end }}
    nginx.ingress.kubernetes.io/rewrite-target: /$2
    nginx.ingress.kubernetes.io/use-regex: "true"
    cert-manager.io/cluster-issuer: {{ include "onyx.fullname" . }}-letsencrypt
spec:
  {{- if .Values.ingress.className }}
  ingressClassName: {{ .Values.ingress.className }}
  {{- end }}
  rules:
    - host: {{ .Values.ingress.api.host }}
      http:
        paths:
          - path: /mcp(/|$)(.*)
            pathType: ImplementationSpecific
            backend:
              service:
                name: {{ include "onyx.fullname" . }}-mcp-server-service
                port:
                  number: {{ .Values.mcpServer.service.servicePort }}
  tls:
    - hosts:
        - {{ .Values.ingress.api.host }}
      secretName: {{ include "onyx.fullname" . }}-ingress-mcp-tls
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/ingress-webserver.yaml
================================================
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: {{ include "onyx.fullname" . }}-ingress-webserver
  annotations:
    {{- if not .Values.ingress.className }}
    kubernetes.io/ingress.class: nginx
    {{- end }}
    cert-manager.io/cluster-issuer: {{ include "onyx.fullname" . }}-letsencrypt
    kubernetes.io/tls-acme: "true"
spec:
  {{- if .Values.ingress.className }}
  ingressClassName: {{ .Values.ingress.className }}
  {{- end }}
  rules:
    - host: {{ .Values.ingress.webserver.host }}
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: {{ include "onyx.fullname" . }}-webserver
                port:
                  number: {{ .Values.webserver.service.servicePort }}
  tls:
    - hosts:
        - {{ .Values.ingress.webserver.host }}
      secretName: {{ include "onyx.fullname" . }}-ingress-webserver-tls
{{- end }}

================================================
FILE: deployment/helm/charts/onyx/templates/lets-encrypt.yaml
================================================
{{- if .Values.letsencrypt.enabled -}}
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
  name: {{ include "onyx.fullname" . }}-letsencrypt
spec:
  acme:
    # The ACME server URL
    server: https://acme-v02.api.letsencrypt.org/directory
    # Email address used for ACME registration
    email: {{ .Values.letsencrypt.email }}
    # Name of a secret used to store the ACME account private key
    privateKeySecretRef:
      name: {{ include "onyx.fullname" . }}-letsencrypt
    # Enable the HTTP-01 challenge provider
    solvers:
      - http01:
          ingress:
            class: nginx
{{- end }}

================================================
FILE: deployment/helm/charts/onyx/templates/mcp-server-deployment.yaml
================================================
{{- if .Values.mcpServer.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-mcp-server
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.mcpServer.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  replicas: {{ .Values.mcpServer.replicaCount }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.mcpServer.deploymentLabels }}
      {{- toYaml .Values.mcpServer.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.mcpServer.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.mcpServer.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.mcpServer.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.mcpServer.podSecurityContext | nindent 8 }}
      {{- with .Values.mcpServer.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.mcpServer.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.mcpServer.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: mcp-server
          securityContext:
            {{- toYaml .Values.mcpServer.securityContext | nindent 12 }}
          image: "{{ .Values.mcpServer.image.repository }}:{{ .Values.mcpServer.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command: ["python", "onyx/mcp_server_main.py"]
          ports:
            - name: mcp-server-port
              containerPort: {{ .Values.mcpServer.containerPorts.server }}
              protocol: TCP
          livenessProbe:
            httpGet:
              path: /health
              port: mcp-server-port
            initialDelaySeconds: {{ .Values.mcpServer.livenessProbe.initialDelaySeconds }}
            periodSeconds: {{ .Values.mcpServer.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ .Values.mcpServer.livenessProbe.timeoutSeconds }}
            failureThreshold: {{ .Values.mcpServer.livenessProbe.failureThreshold }}
          readinessProbe:
            httpGet:
              path: /health
              port: mcp-server-port
            initialDelaySeconds: {{ .Values.mcpServer.readinessProbe.initialDelaySeconds }}
            periodSeconds: {{ .Values.mcpServer.readinessProbe.periodSeconds }}
            timeoutSeconds: {{ .Values.mcpServer.readinessProbe.timeoutSeconds }}
            failureThreshold: {{ .Values.mcpServer.readinessProbe.failureThreshold }}
          resources:
            {{- toYaml .Values.mcpServer.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            - name: MCP_SERVER_ENABLED
              value: "true"
            - name: MCP_SERVER_PORT
              value: "{{ .Values.mcpServer.containerPorts.server }}"
            - name: MCP_SERVER_HOST
              value: "{{ .Values.global.host }}"
            {{- if .Values.mcpServer.corsOrigins }}
            - name: MCP_SERVER_CORS_ORIGINS
              value: "{{ .Values.mcpServer.corsOrigins }}"
            {{- end }}
            # API server connection for authentication and proxying
            # Uses full override variable to set the port instead of using default 8080
            - name: API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS
              value: "http://{{ include "onyx.fullname" . }}-api-service:{{ .Values.api.service.servicePort }}"
            {{- include "onyx.envSecrets" . | nindent 12 }}
          {{- with .Values.mcpServer.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      {{- with .Values.mcpServer.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/mcp-server-service.yaml
================================================
{{- if .Values.mcpServer.enabled }}
apiVersion: v1
kind: Service
metadata:
  name: {{ include "onyx.fullname" . }}-mcp-server-service
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- if .Values.mcpServer.deploymentLabels }}
    {{- toYaml .Values.mcpServer.deploymentLabels | nindent 4 }}
    {{- end }}
spec:
  type: {{ .Values.mcpServer.service.type }}
  ports:
    - port: {{ .Values.mcpServer.service.servicePort }}
      targetPort: {{ .Values.mcpServer.service.targetPort }}
      protocol: TCP
      name: {{ .Values.mcpServer.service.portName }}
  selector:
    {{- include "onyx.selectorLabels" . | nindent 4 }}
    {{- if .Values.mcpServer.deploymentLabels }}
    {{- toYaml .Values.mcpServer.deploymentLabels | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/nginx-conf.yaml
================================================
###############################################################################
# NOTE: If you make changes to this file, increment the following in values.yaml
# before running `helm upgrade` to trigger an automatic nginx restart:
#
#   nginx.controller.podAnnotations:
#     onyx.app/nginx-config-version: "<new_version>"
#
# Otherwise, changes won't apply until you manually restart the nginx pods.
###############################################################################

apiVersion: v1
kind: ConfigMap
metadata:
  name: onyx-nginx-conf
data:
  upstreams.conf: |
    upstream api_server {
        server {{ include "onyx.fullname" . }}-api-service:{{ .Values.api.service.servicePort }} fail_timeout=0;
    }

    upstream web_server {
        server {{ include "onyx.fullname" . }}-webserver:{{ .Values.webserver.service.servicePort }} fail_timeout=0;
    }
    {{- if .Values.mcpServer.enabled }}

    upstream mcp_server {
        server {{ include "onyx.fullname" . }}-mcp-server-service:{{ .Values.mcpServer.service.servicePort }} fail_timeout=0;
    }
    {{- end }}

    # WebSocket support: only set Connection "upgrade" for actual upgrade requests
    map $http_upgrade $connection_upgrade {
        default upgrade;
        ''      close;
    }

  server.conf: |
    server {
        listen 1024;
        server_name $$DOMAIN;

        client_max_body_size 5G;
        {{- if .Values.mcpServer.enabled }}

        # MCP Server - Model Context Protocol for LLM integrations
        # Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.
        location ~ ^/mcp(/.*)?$ {
            rewrite ^/mcp(/.*)$ $1 break;
            rewrite ^/mcp/?$ / break;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            proxy_set_header X-Forwarded-Host $host;
            proxy_set_header Host $host;
            proxy_http_version 1.1;
            proxy_buffering off;
            proxy_redirect off;
            # timeout settings
            proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;
            proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;
            proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;
            proxy_pass http://mcp_server;
        }
        {{- end }}

        location ~ ^/scim(/.*)?$ {
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            proxy_set_header X-Forwarded-Host $host;
            proxy_set_header Host $host;
            proxy_http_version 1.1;
            proxy_buffering off;
            proxy_redirect off;
            # timeout settings
            proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;
            proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;
            proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;
            proxy_pass http://api_server;
        }

        location ~ ^/(api|openapi\.json)(/.*)?$ {
            rewrite ^/api(/.*)$ $1 break;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            proxy_set_header X-Forwarded-Host $host;
            proxy_set_header Host $host;
            proxy_http_version 1.1;
            proxy_set_header Upgrade $http_upgrade;
            proxy_set_header Connection $connection_upgrade;
            proxy_buffering off;
            proxy_redirect off;
            # timeout settings
            proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;
            proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;
            proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;
            proxy_pass http://api_server;
        }

        location / {
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            proxy_set_header X-Forwarded-Host $host;
            proxy_set_header Host $host;
            proxy_http_version 1.1;
            proxy_redirect off;
            # timeout settings
            proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;
            proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;
            proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;
            proxy_pass http://web_server;
        }
    }


================================================
FILE: deployment/helm/charts/onyx/templates/serviceaccount.yaml
================================================
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
  name: {{ include "onyx.serviceAccountName" . }}
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
  {{- with .Values.serviceAccount.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/slackbot.yaml
================================================
{{- if .Values.slackbot.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-slackbot
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.slackbot.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.slackbot.deploymentLabels }}
      {{- toYaml .Values.slackbot.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.slackbot.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.slackbot.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.slackbot.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.slackbot.podSecurityContext | nindent 8 }}
      {{- with .Values.slackbot.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.slackbot.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.slackbot.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: slackbot
          securityContext:
            {{- toYaml .Values.slackbot.securityContext | nindent 12 }}
          image: "{{ .Values.slackbot.image.repository }}:{{ .Values.slackbot.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          command: ["python", "onyx/onyxbot/slack/listener.py"]
          resources:
            {{- toYaml .Values.slackbot.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.slackbot.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      {{- with .Values.slackbot.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/tests/test-connection.yaml
================================================
{{- if gt (int .Values.webserver.replicaCount) 0 }}
apiVersion: v1
kind: Pod
metadata:
  name: "{{ include "onyx.fullname" . }}-test-connection"
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
  annotations:
    "helm.sh/hook": test
    "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
spec:
  containers:
    - name: curl
      image: curlimages/curl:8.10.1
      command:
        - /bin/sh
        - -c
      args:
        - |
          SVC="{{ include "onyx.fullname" . }}-webserver"
          PORT="{{ .Values.webserver.service.servicePort }}"
          URL="http://${SVC}:${PORT}/"
          for i in $(seq 1 40); do
            echo "Attempt $i: curl ${URL}"
            # Treat any successful TCP/HTTP response as success (even 5xx).
            # curl exits 0 on HTTP 4xx/5xx if -f is not used; non-zero indicates connection error.
            if curl --connect-timeout 3 --max-time 5 -sS -o /dev/null "$${URL}"; then
              echo "Connection succeeded"
              exit 0
            fi
            sleep 10
          done
          echo "Service not reachable after 40 attempts"
          exit 1
  restartPolicy: Never
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/tooling-pginto-configmap.yaml
================================================
{{- if (include "onyx.pgInto.enabled" .) }}
apiVersion: v1
kind: ConfigMap
metadata:
  name: {{ include "onyx.pgInto.configMapName" . }}
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
data:
  pginto: |
    #!/usr/bin/env sh
    set -eu

    HOST="${PGINTO_HOST:-${POSTGRES_HOST:-localhost}}"
    PORT="${POSTGRES_PORT:-5432}"
    USER="${POSTGRES_USER:-postgres}"
    DB="${POSTGRES_DB:-postgres}"
    PSQL_BIN="${PGINTO_PSQL_BIN:-{{ default "psql" .Values.tooling.pgInto.psqlBinary }}}"
    USE_IAM="$(printf '%s' "${USE_IAM_AUTH:-false}" | tr '[:upper:]' '[:lower:]')"

    if ! command -v "${PSQL_BIN}" >/dev/null 2>&1; then
      echo "psql client '${PSQL_BIN}' not found in PATH" >&2
      exit 1
    fi

    if [ "${USE_IAM}" = "true" ]; then
      REGION="${AWS_REGION:-${AWS_DEFAULT_REGION:-${AWS_REGION_NAME:-}}}"
      if [ -z "${REGION}" ]; then
        REGION="$(printf "%s\n" "${HOST}" | sed -n 's/.*\.\([a-z0-9-]*\)\.rds\.amazonaws\.com.*/\1/p')"
      fi
      if [ -z "${REGION}" ]; then
        echo "USE_IAM_AUTH is true but AWS region is not set (AWS_REGION/AWS_DEFAULT_REGION/AWS_REGION_NAME)" >&2
        exit 1
      fi
      PY_BIN="$(command -v python3 || command -v python || true)"
      if [ -z "${PY_BIN}" ]; then
        echo "python is required to generate RDS IAM auth token" >&2
        exit 1
      fi
      if [ -z "${PGSSLMODE:-}" ]; then
        export PGSSLMODE=require
      fi
      PGPASSWORD="$("${PY_BIN}" -c 'import sys,boto3; host,port,user,region=sys.argv[1:]; port_int=int(port); token=boto3.client("rds", region_name=region).generate_db_auth_token(DBHostname=host, Port=port_int, DBUsername=user); sys.stdout.write(token)' "${HOST}" "${PORT}" "${USER}" "${REGION}")"
      if [ -z "${PGPASSWORD}" ]; then
        echo "failed to generate IAM auth token" >&2
        exit 1
      fi
      export PGPASSWORD
    else
      if [ -z "${PGPASSWORD:-}" ] && [ -n "${POSTGRES_PASSWORD:-}" ]; then
        export PGPASSWORD="${POSTGRES_PASSWORD}"
      fi

      if [ -z "${PGPASSWORD:-}" ]; then
        printf "Postgres password: " >&2
        if command -v stty >/dev/null 2>&1; then
          stty -echo || true
        fi
        if ! read -r PGPASSWORD; then
          echo "failed to read password" >&2
          exit 1
        fi
        if command -v stty >/dev/null 2>&1; then
          stty echo || true
        fi
        printf "\n" >&2
        export PGPASSWORD
      fi
    fi

    if [ -n "${POSTGRES_SSLMODE:-}" ] && [ -z "${PGSSLMODE:-}" ]; then
      export PGSSLMODE="${POSTGRES_SSLMODE}"
    fi

    echo "Connecting to ${DB} on ${HOST}:${PORT} as ${USER}"
    exec "${PSQL_BIN}" -h "${HOST}" -p "${PORT}" -U "${USER}" "${DB}" "$@"
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/webserver-deployment.yaml
================================================
{{- if gt (int .Values.webserver.replicaCount) 0 }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-web-server
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- with .Values.webserver.deploymentLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
spec:
  {{- if not .Values.webserver.autoscaling.enabled }}
  replicas: {{ .Values.webserver.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.webserver.deploymentLabels }}
      {{- toYaml .Values.webserver.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      annotations:
        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
      {{- with .Values.webserver.podAnnotations }}
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.webserver.deploymentLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
        {{- with .Values.webserver.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.webserver.podSecurityContext | nindent 8 }}
      {{- with .Values.webserver.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.webserver.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.webserver.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: web-server
          securityContext:
            {{- toYaml .Values.webserver.securityContext | nindent 12 }}
          image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Values.global.version }}"
          imagePullPolicy: {{ .Values.global.pullPolicy }}
          ports:
            - name: http
              containerPort: {{ .Values.webserver.containerPorts.server }}
              protocol: TCP
          resources:
            {{- toYaml .Values.webserver.resources | nindent 12 }}
          {{- with .Values.webserver.startupProbe }}
          startupProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.webserver.readinessProbe }}
          readinessProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.webserver.livenessProbe }}
          livenessProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "onyx.envSecrets" . | nindent 12}}
          {{- with .Values.webserver.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      {{- with .Values.webserver.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/webserver-hpa.yaml
================================================
{{- if and (.Values.webserver.autoscaling.enabled) (ne (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-webserver
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}
  minReplicas: {{ .Values.webserver.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.webserver.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/webserver-scaledobject.yaml
================================================
{{- if and (.Values.webserver.autoscaling.enabled) (eq (include "onyx.autoscaling.engine" .) "keda") }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: {{ include "onyx.fullname" . }}-web-server
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}-web-server
  minReplicaCount: {{ .Values.webserver.autoscaling.minReplicas }}
  maxReplicaCount: {{ .Values.webserver.autoscaling.maxReplicas }}
  pollingInterval: {{ .Values.webserver.autoscaling.pollingInterval | default 30 }}
  cooldownPeriod: {{ .Values.webserver.autoscaling.cooldownPeriod | default 300 }}
  {{- if hasKey .Values.webserver.autoscaling "idleReplicaCount" }}
  idleReplicaCount: {{ .Values.webserver.autoscaling.idleReplicaCount }}
  {{- end }}
  {{- if .Values.webserver.autoscaling.customTriggers }}
  fallback:
    failureThreshold: {{ .Values.webserver.autoscaling.failureThreshold | default 3 }}
    replicas: {{ .Values.webserver.autoscaling.fallbackReplicas | default 1 }}
  {{- end }}
  triggers:
    {{- if .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}
    - type: cpu
      metricType: Utilization
      metadata:
        value: "{{ .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}
    - type: memory
      metricType: Utilization
      metadata:
        value: "{{ .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}"
    {{- end }}
    {{- if .Values.webserver.autoscaling.customTriggers }}
    {{- toYaml .Values.webserver.autoscaling.customTriggers | nindent 4 }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates/webserver-service.yaml
================================================
apiVersion: v1
kind: Service
metadata:
  name: {{ include "onyx.fullname" . }}-webserver
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
    {{- if .Values.webserver.deploymentLabels }}
    {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }}
    {{- end }}
spec:
  type: {{ .Values.webserver.service.type }}
  ports:
    - port: {{ .Values.webserver.service.servicePort }}
      targetPort: {{ .Values.webserver.service.targetPort }}
      protocol: TCP
      name: http
  selector:
    {{- include "onyx.selectorLabels" . | nindent 4 }}
    {{- if .Values.webserver.deploymentLabels }}
    {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }}
    {{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates_disabled/background-deployment.yaml
================================================
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ include "onyx.fullname" . }}-background
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  {{- if not .Values.background.autoscaling.enabled }}
  replicas: {{ .Values.background.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "onyx.selectorLabels" . | nindent 6 }}
      {{- if .Values.background.deploymentLabels }}
      {{- toYaml .Values.background.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      {{- with .Values.background.podAnnotations }}
      annotations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "onyx.labels" . | nindent 8 }}
        {{- with .Values.background.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "onyx.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.background.podSecurityContext | nindent 8 }}
      containers:
        - name: background
          securityContext:
            {{- toYaml .Values.background.securityContext | nindent 12 }}
          image: "{{ .Values.background.image.repository }}:{{ .Values.background.image.tag | default .Chart.AppVersion }}"
          imagePullPolicy: {{ .Values.background.image.pullPolicy }}
          command: ["/usr/bin/supervisord"]
          resources:
            {{- toYaml .Values.background.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            - name: ENABLE_MULTIPASS_INDEXING
              value: "{{ .Values.background.enableMiniChunk }}"
            {{- include "onyx.envSecrets" . | nindent 12}}


================================================
FILE: deployment/helm/charts/onyx/templates_disabled/background-hpa.yaml
================================================
{{- if .Values.background.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: {{ include "onyx.fullname" . }}-background
  labels:
    {{- include "onyx.labels" . | nindent 4 }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "onyx.fullname" . }}
  minReplicas: {{ .Values.background.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.background.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.background.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.background.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.background.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.background.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}


================================================
FILE: deployment/helm/charts/onyx/templates_disabled/onyx-secret.yaml
================================================
{{- if not .Values.auth.existingSecret -}}
apiVersion: v1
kind: Secret
metadata:
  name: {{ include "onyx.secretName" . }}
type: Opaque
stringData:
  {{- range $name, $value := .Values.auth.secrets }}
  {{ $name }}: {{ $value | quote }}
  {{- end }}
{{- end }}

================================================
FILE: deployment/helm/charts/onyx/values-lite.yaml
================================================
# =============================================================================
# ONYX LITE — MINIMAL DEPLOYMENT VALUES
# =============================================================================
# Minimal Onyx deployment: no vector database, no Redis, no model servers.
# Only PostgreSQL is required. Connectors and RAG search are disabled, but the
# core chat experience (LLM conversations, tools, user file uploads, Projects,
# Agent knowledge) still works.
#
# Usage:
#   helm install onyx ./deployment/helm/charts/onyx \
#     -f ./deployment/helm/charts/onyx/values-lite.yaml
#
# Or merged with your own overrides:
#   helm install onyx ./deployment/helm/charts/onyx \
#     -f ./deployment/helm/charts/onyx/values-lite.yaml \
#     -f my-overrides.yaml
# =============================================================================

vectorDB:
  enabled: false

vespa:
  enabled: false

redis:
  enabled: false

configMap:
  CACHE_BACKEND: "postgres"
  AUTH_BACKEND: "postgres"
  FILE_STORE_BACKEND: "postgres"


================================================
FILE: deployment/helm/charts/onyx/values.yaml
================================================
# Default values for onyx.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

global:
  # Global version for all Onyx components (overrides .Chart.AppVersion)
  version: "latest"
  # Global pull policy for all Onyx component images
  pullPolicy: "IfNotPresent"
  # Host for all Onyx components
  host: "0.0.0.0"

postgresql:
  enabled: true
  # IMPORTANT: This nameOverride is required for the CloudNativePG operator to find itself.
  # The operator looks for a deployment with label app.kubernetes.io/name=cloudnative-pg,
  # but since the subchart is aliased as "postgresql", Helm defaults to that name.
  nameOverride: cloudnative-pg
  cluster:
    instances: 1
    storage:
      storageClass: ""
      size: 10Gi
    enableSuperuserAccess: true
    superuserSecret:
      name: onyx-postgresql  # keep in sync with auth.postgresql

# -- Master toggle for vector database support. When false:
#   - Sets DISABLE_VECTOR_DB=true on all backend pods
#   - Skips the indexing model server deployment (embeddings not needed)
#   - Skips ALL celery worker deployments (beat, primary, light, heavy,
#     monitoring, user-file-processing, docprocessing, docfetching) — the
#     API server handles background work via FastAPI BackgroundTasks
#   - You should also set vespa.enabled=false and opensearch.enabled=false
#     to prevent those subcharts from deploying
vectorDB:
  enabled: true

vespa:
  name: da-vespa-0
  service:
    name: vespa-service
  volumeClaimTemplates:
    - metadata:
        name: vespa-storage
      spec:
        accessModes:
          - ReadWriteOnce
        resources:
          requests:
            storage: 30Gi
        storageClassName: ""
  enabled: true
  replicaCount: 1
  image:
    repository: vespa
    tag: "8.609.39"
  podAnnotations: {}
  podLabels:
    app: vespa
    app.kubernetes.io/instance: onyx
    app.kubernetes.io/name: vespa
  securityContext:
    privileged: true
    runAsUser: 0
  resources:
    # The Vespa Helm chart specifies default resources, which are quite modest. We override
    # them here to increase chances of the chart running successfully. If you plan to index at
    # scale, you will likely need to increase these limits further.
    # At large scale, it is recommended to use a dedicated Vespa cluster / Vespa cloud.
    requests:
      cpu: 4000m
      memory: 8000Mi
    limits:
      cpu: 8000m
      memory: 32000Mi

opensearch:
  # Enabled by default. Override to false and set the appropriate env vars in
  # the instance-specific values yaml if using AWS-managed OpenSearch, or simply
  # override to false to entirely disable.
  enabled: true
  # These values are passed to the opensearch subchart.
  # See https://github.com/opensearch-project/helm-charts/blob/main/charts/opensearch/values.yaml

  singleNode: true  # Forces replicas=1, sets discovery.type=single-node

  # Determines service DNS: onyx-opensearch-master.<namespace>.svc.cluster.local
  clusterName: "onyx-opensearch"
  nodeGroup: "master"
  masterService: "onyx-opensearch-master"

  replicas: 1

  image:
    repository: "opensearchproject/opensearch"
    tag: ""  # Empty uses chart's appVersion (3.4.0).

  # The security plugin requires OPENSEARCH_INITIAL_ADMIN_PASSWORD for
  # OpenSearch 2.12+.
  # See https://docs.opensearch.org/latest/install-and-configure/install-opensearch/helm/#prerequisites
  extraEnvs:
    - name: OPENSEARCH_INITIAL_ADMIN_PASSWORD
      valueFrom:
        secretKeyRef:
          name: onyx-opensearch  # Must match auth.opensearch.secretName or auth.opensearch.existingSecret if defined.
          key: opensearch_admin_password  # Must match auth.opensearch.secretKeys value.

  resources:
    requests:
      cpu: 2000m
      memory: 4Gi
    limits:
      cpu: 4000m
      memory: 8Gi

  persistence:
    enabled: true
    size: 30Gi
    storageClass: ""

  # Java heap should be ~50% of memory limit.
  # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/
  # Xms is the starting size, Xmx is the maximum size. These should be the same.
  opensearchJavaOpts: "-Xmx4g -Xms4g"

persistent:
  storageClassName: ""

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""

autoscaling:
  # Valid options: 'hpa' (default) or 'keda'.
  # Set to 'keda' to render KEDA ScaledObjects for components that have autoscaling enabled.
  # When using KEDA you must install and manage the KEDA operator separately; it is not bundled with this chart.
  engine: hpa

inferenceCapability:
  service:
    portName: modelserver
    type: ClusterIP
    servicePort: 9000
    targetPort: 9000
  name: inference-model-server
  replicaCount: 1
  labels:
    - key: app
      value: inference-model-server
  image:
    repository: onyxdotapp/onyx-model-server
    # Overrides the image tag whose default is the chart appVersion.
    tag: ""
  containerPorts:
    server: 9000
  podLabels:
    - key: app
      value: inference-model-server
  resources:
    requests:
      cpu: 2000m
      memory: 3Gi
    limits:
      cpu: 4000m
      memory: 10Gi
  # Optional health probes
  # Example:
  # readinessProbe:
  #   httpGet:
  #     path: /health
  #     port: model-server
  startupProbe: {}
  readinessProbe: {}
  livenessProbe: {}
  podSecurityContext: {}
  securityContext:
    privileged: true
    runAsUser: 0
  nodeSelector: {}
  tolerations: []
  affinity: {}
  # Deployment strategy - use Recreate or RollingUpdate with maxSurge: 0 to terminate old pod first
  # This prevents pending pods when cluster resources are constrained
  strategy: {}
  # Example for RollingUpdate that terminates old pod first:
  # strategy:
  #   type: RollingUpdate
  #   rollingUpdate:
  #     maxSurge: 0
  #     maxUnavailable: 1


indexCapability:
  service:
    portName: modelserver
    type: ClusterIP
    servicePort: 9000
    targetPort: 9000
  replicaCount: 1
  name: indexing-model-server
  deploymentLabels:
    app: indexing-model-server
  podLabels:
    scope: onyx-backend
  indexingOnly: "True"
  podAnnotations: {}
  containerPorts:
    server: 9000
  image:
    repository: onyxdotapp/onyx-model-server
    # Overrides the image tag whose default is the chart appVersion.
    tag: ""
  limitConcurrency: 10
  resources:
    requests:
      cpu: 4000m
      memory: 3Gi
    limits:
      cpu: 6000m
      memory: 6Gi
  # Optional health probes
  # Example:
  # readinessProbe:
  #   httpGet:
  #     path: /health
  #     port: model-server
  startupProbe: {}
  readinessProbe: {}
  livenessProbe: {}
  podSecurityContext: {}
  securityContext:
    privileged: true
    runAsUser: 0
  nodeSelector: {}
  tolerations: []
  affinity: {}
  # Deployment strategy - use Recreate or RollingUpdate with maxSurge: 0 to terminate old pod first
  # This prevents pending pods when cluster resources are constrained
  strategy: {}
  # Example for RollingUpdate that terminates old pod first:
  # strategy:
  #   type: RollingUpdate
  #   rollingUpdate:
  #     maxSurge: 0
  #     maxUnavailable: 1
config:
  envConfigMapName: env-configmap

tooling:
  pgInto:
    # -- Mounts a small helper script into app pods that opens psql using the pod's POSTGRES_* env vars.
    enabled: false
    # -- Where to place the helper inside the container.
    mountPath: /usr/local/bin/pginto
    # -- Which client binary to call; change if your image uses a non-default path.
    psqlBinary: psql

monitoring:
  grafana:
    dashboards:
      # -- Set to true to deploy Grafana dashboard ConfigMaps for the Onyx indexing pipeline.
      # Requires kube-prometheus-stack (or equivalent) with the Grafana sidecar enabled and watching this namespace.
      # The sidecar must be configured with label selector: grafana_dashboard=1
      enabled: false
  serviceMonitors:
    # -- Set to true to deploy ServiceMonitor resources for Celery worker metrics endpoints.
    # Requires the Prometheus Operator CRDs (included in kube-prometheus-stack).
    # Use `labels` to match your Prometheus CR's serviceMonitorSelector (e.g. release: onyx-monitoring).
    enabled: false
    labels: {}

serviceAccount:
  # Specifies whether a service account should be created
  create: false
  # Automatically mount a ServiceAccount's API credentials?
  automount: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""

nginx:
  enabled: true
  # Nginx proxy timeout settings (in seconds)
  timeouts:
    connect: 300  # Time to establish connection with upstream server
    send: 300     # Time to send request to upstream server
    read: 300     # Time to read response from upstream server
  controller:
    containerPort:
      http: 1024

    # NOTE: When onyx-nginx-conf changes, nginx pods need to restart.
    # The ingress-nginx subchart doesn't auto-detect our custom ConfigMap changes.
    # Workaround: Helm upgrade will restart if the following annotation value changes.
    podAnnotations:
      onyx.app/nginx-config-version: "3"

    # Propagate DOMAIN into nginx so server_name continues to use the same env var
    extraEnvs:
      - name: DOMAIN
        value: localhost

    config:
      # Expose DOMAIN to the nginx config and pull in our custom snippets
      main-snippet: |
        env DOMAIN;
      http-snippet: |
        include /etc/nginx/custom-snippets/upstreams.conf;
        include /etc/nginx/custom-snippets/server.conf;

    # Mount the existing nginx ConfigMap that holds the upstream and server snippets
    extraVolumes:
      - name: nginx-config
        configMap:
          name: onyx-nginx-conf
    extraVolumeMounts:
      - name: nginx-config
        mountPath: /etc/nginx/custom-snippets
        readOnly: true

    service:
      type: LoadBalancer
      ports:
        http: 80
      targetPorts:
        http: http

webserver:
  replicaCount: 1
  image:
    repository: onyxdotapp/onyx-web-server
    # Overrides the image tag whose default is the chart appVersion.
    tag: ""
  deploymentLabels:
    app: web-server
  podAnnotations: {}
  podLabels:
    scope: onyx-frontend
  podSecurityContext:
    {}
    # fsGroup: 2000

  securityContext:
    {}
    # capabilities:
    #   drop:
    #   - ALL
    # readOnlyRootFilesystem: true
    # runAsNonRoot: true
    # runAsUser: 1000

  containerPorts:
    server: 3000

  service:
    type: ClusterIP
    servicePort: 3000
    targetPort: http

  resources:
    requests:
      cpu: 200m
      memory: 512Mi
    limits:
      cpu: 1000m
      memory: 1Gi

  # Optional health probes
  # Example:
  # readinessProbe:
  #   httpGet:
  #     path: /api/health
  #     port: http
  startupProbe: {}
  readinessProbe: {}
  livenessProbe: {}

  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 100
    targetCPUUtilizationPercentage: 80
    targetMemoryUtilizationPercentage: 80
    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')
    pollingInterval: 30  # seconds
    cooldownPeriod: 300  # seconds
    failureThreshold: 3  # number of failures before fallback
    fallbackReplicas: 1  # replicas to maintain on failure
    # Custom triggers for advanced KEDA configurations
    # Example: customTriggers: []
    #   - type: prometheus
    #     metadata:
    #       serverAddress: http://prometheus:9090
    #       metricName: http_requests_per_second
    #       threshold: '100'
    customTriggers: []

  # Additional volumes on the output Deployment definition.
  volumes: []
  # - name: foo
  #   secret:
  #     secretName: mysecret
  #     optional: false

  # Additional volumeMounts on the output Deployment definition.
  volumeMounts: []
  # - name: foo
  #   mountPath: "/etc/foo"
  #   readOnly: true

  nodeSelector: {}
  tolerations: []
  affinity: {}

api:
  replicaCount: 1
  image:
    repository: onyxdotapp/onyx-backend
    # Overrides the image tag whose default is the chart appVersion.
    tag: ""
  deploymentLabels:
    app: api-server
  podAnnotations: {}
  podLabels:
    scope: onyx-backend

  containerPorts:
    server: 8080

  podSecurityContext:
    {}
    # fsGroup: 2000

  securityContext:
    {}
    # capabilities:
    #   drop:
    #   - ALL
    # readOnlyRootFilesystem: true
    # runAsNonRoot: true
    # runAsUser: 1000

  service:
    type: ClusterIP
    servicePort: 8080
    targetPort: api-server-port
    portName: api-server-port

  resources:
    requests:
      cpu: 500m
      memory: 1Gi
    limits:
      cpu: 1000m
      memory: 3Gi

  # Optional health probes
  # Example:
  # readinessProbe:
  #   httpGet:
  #     path: /health
  #     port: api-server-port
  startupProbe: {}
  readinessProbe: {}
  livenessProbe: {}

  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 100
    targetCPUUtilizationPercentage: 80
    targetMemoryUtilizationPercentage: 80
    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')
    pollingInterval: 30  # seconds
    cooldownPeriod: 300  # seconds
    failureThreshold: 3  # number of failures before fallback
    fallbackReplicas: 1  # replicas to maintain on failure
    # Custom triggers for advanced KEDA configurations
    # Example: customTriggers: []
    #   - type: prometheus
    #     metadata:
    #       serverAddress: http://prometheus:9090
    #       metricName: http_requests_per_second
    #       threshold: '100'
    customTriggers: []

  # Additional volumes on the output Deployment definition.
  volumes: []
  # - name: foo
  #   secret:
  #     secretName: mysecret
  #     optional: false

  # Additional volumeMounts on the output Deployment definition.
  volumeMounts: []
  # - name: foo
  #   mountPath: "/etc/foo"
  #   readOnly: true

  nodeSelector: {}
  tolerations: []
  affinity: {}


######################################################################
#
# Background workers
#
######################################################################

celery_shared:
  image:
    repository: onyxdotapp/onyx-backend
    tag: ""  # Overrides the image tag whose default is the chart appVersion.
  startupProbe:
    # startupProbe fails after 2m
    exec:
      command: ["test", "-f", "/app/onyx/main.py"]
    failureThreshold: 24
    periodSeconds: 5
    timeoutSeconds: 3
  readinessProbe:
    # readinessProbe fails after 15s + 2m of inactivity
    # it's ok to see the readinessProbe fail transiently while the container starts
    initialDelaySeconds: 15
    periodSeconds: 5
    failureThreshold: 24
    timeoutSeconds: 3
  livenessProbe:
    # livenessProbe fails after 5m of inactivity
    initialDelaySeconds: 60
    periodSeconds: 60
    failureThreshold: 5
    timeoutSeconds: 3
  podSecurityContext: {}
  securityContext:
    privileged: true
    runAsUser: 0

celery_beat:
  replicaCount: 1
  logLevel: INFO
  podAnnotations: {}
  podLabels:
    scope: onyx-backend-celery
  deploymentLabels:
    app: celery-beat
  resources:
    requests:
      cpu: 500m
      memory: 512Mi
    limits:
      cpu: 1000m
      memory: 1Gi
  volumes: []  # Additional volumes on the output Deployment definition.
  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.
  nodeSelector: {}
  tolerations: []
  affinity: {}

celery_worker_heavy:
  replicaCount: 1
  logLevel: INFO
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 10
    targetCPUUtilizationPercentage: 80
    targetMemoryUtilizationPercentage: 80
    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')
    pollingInterval: 30  # seconds
    cooldownPeriod: 300  # seconds
    failureThreshold: 3  # number of failures before fallback
    fallbackReplicas: 1  # replicas to maintain on failure
    # Custom triggers for advanced KEDA configurations
    customTriggers: []
  podAnnotations: {}
  podLabels:
    scope: onyx-backend-celery
  deploymentLabels:
    app: celery-worker-heavy
  resources:
    requests:
      cpu: 500m
      memory: 512Mi
    limits:
      cpu: 1000m
      memory: 2Gi
  volumes: []  # Additional volumes on the output Deployment definition.
  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.
  nodeSelector: {}
  tolerations: []
  affinity: {}

celery_worker_docprocessing:
  replicaCount: 1
  logLevel: INFO
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 20
    targetCPUUtilizationPercentage: 80
    targetMemoryUtilizationPercentage: 80
    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')
    pollingInterval: 30  # seconds
    cooldownPeriod: 300  # seconds
    failureThreshold: 3  # number of failures before fallback
    fallbackReplicas: 1  # replicas to maintain on failure
    # Custom triggers for advanced KEDA configurations
    customTriggers: []
  podAnnotations: {}
  podLabels:
    scope: onyx-backend-celery
  deploymentLabels:
    app: celery-worker-docprocessing
  resources:
    requests:
      cpu: 500m
      memory: 2Gi
    limits:
      cpu: 1000m
      memory: 12Gi
  volumes: []  # Additional volumes on the output Deployment definition.
  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.
  nodeSelector: {}
  tolerations: []
  affinity: {}

celery_worker_light:
  replicaCount: 1
  logLevel: INFO
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 10
    targetCPUUtilizationPercentage: 80
    targetMemoryUtilizationPercentage: 80
    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')
    pollingInterval: 30  # seconds
    cooldownPeriod: 300  # seconds
    failureThreshold: 3  # number of failures before fallback
    fallbackReplicas: 1  # replicas to maintain on failure
    # Custom triggers for advanced KEDA configurations
    customTriggers: []
  podAnnotations: {}
  podLabels:
    scope: onyx-backend-celery
  deploymentLabels:
    app: celery-worker-light
  resources:
    requests:
      cpu: 250m
      memory: 512Mi
    limits:
      cpu: 2000m
      memory: 4Gi
  volumes: []  # Additional volumes on the output Deployment definition.
  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.
  nodeSelector: {}
  tolerations: []
  affinity: {}

celery_worker_monitoring:
  replicaCount: 1
  logLevel: INFO
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 10
    targetCPUUtilizationPercentage: 80
    targetMemoryUtilizationPercentage: 80
    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')
    pollingInterval: 30  # seconds
    cooldownPeriod: 300  # seconds
    failureThreshold: 3  # number of failures before fallback
    fallbackReplicas: 1  # replicas to maintain on failure
    # Custom triggers for advanced KEDA configurations
    customTriggers: []
  podAnnotations: {}
  podLabels:
    scope: onyx-backend-celery
  deploymentLabels:
    app: celery-worker-monitoring
  resources:
    requests:
      cpu: 500m
      memory: 512Mi
    limits:
      cpu: 1000m
      memory: 4Gi
  volumes: []  # Additional volumes on the output Deployment definition.
  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.
  nodeSelector: {}
  tolerations: []
  affinity: {}

celery_worker_primary:
  replicaCount: 1
  logLevel: INFO
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 10
    targetCPUUtilizationPercentage: 80
    targetMemoryUtilizationPercentage: 80
    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')
    pollingInterval: 30  # seconds
    cooldownPeriod: 300  # seconds
    failureThreshold: 3  # number of failures before fallback
    fallbackReplicas: 1  # replicas to maintain on failure
    # Custom triggers for advanced KEDA configurations
    customTriggers: []
  podAnnotations: {}
  podLabels:
    scope: onyx-backend-celery
  deploymentLabels:
    app: celery-worker-primary
  resources:
    requests:
      cpu: 500m
      memory: 2Gi
    limits:
      cpu: 1000m
      memory: 4Gi
  volumes: []  # Additional volumes on the output Deployment definition.
  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.
  nodeSelector: {}
  tolerations: []
  affinity: {}

celery_worker_user_file_processing:
  replicaCount: 1
  logLevel: INFO
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 10
    targetCPUUtilizationPercentage: 80
    targetMemoryUtilizationPercentage: 80
    # KEDA specific configurations
    pollingInterval: 30  # seconds
    cooldownPeriod: 300  # seconds
    failureThreshold: 3  # number of failures before fallback
    fallbackReplicas: 1  # replicas to maintain on failure
    # Custom triggers for advanced KEDA configurations
    customTriggers: []
  podAnnotations: {}
  podLabels:
    scope: onyx-backend-celery
  deploymentLabels:
    app: celery-worker-user-file-processing
  resources:
    requests:
      cpu: 500m
      memory: 512Mi
    limits:
      cpu: 2000m
      memory: 2Gi
  volumes: []  # Additional volumes on the output Deployment definition.
  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.
  nodeSelector: {}
  tolerations: []
  affinity: {}

# Discord bot for Onyx
# The bot offloads message processing to scalable API pods via HTTP requests.
discordbot:
  enabled: false  # Disabled by default - requires bot token configuration
  # Bot token can be provided directly or via a Kubernetes secret
  # Option 1: Direct token (not recommended for production)
  botToken: ""
  # Option 2: Reference a Kubernetes secret (recommended)
  botTokenSecretName: ""  # Name of the secret containing the bot token
  botTokenSecretKey: "token"  # Key within the secret (default: "token")
  # Command prefix for bot commands (default: "!")
  invokeChar: "!"
  image:
    repository: onyxdotapp/onyx-backend
    tag: ""  # Overrides the image tag whose default is the chart appVersion.
  podAnnotations: {}
  podLabels:
    scope: onyx-backend
  deploymentLabels:
    app: discord-bot
  podSecurityContext:
    {}
  securityContext:
    {}
  resources:
    requests:
      cpu: "500m"
      memory: "512Mi"
    limits:
      cpu: "1000m"
      memory: "2000Mi"
  volumes: []
  volumeMounts: []
  nodeSelector: {}
  tolerations: []
  affinity: {}

slackbot:
  enabled: true
  replicaCount: 1
  image:
    repository: onyxdotapp/onyx-backend
    tag: ""  # Overrides the image tag whose default is the chart appVersion.
  podAnnotations: {}
  podLabels:
    scope: onyx-backend
  deploymentLabels:
    app: slack-bot
  podSecurityContext:
    {}
  securityContext:
    {}
  resources:
    requests:
      cpu: "500m"
      memory: "512Mi"
    limits:
      cpu: "1000m"
      memory: "2000Mi"
  nodeSelector: {}
  tolerations: []
  affinity: {}

# Onyx Model Context Protocol (MCP) Server
# Allows LLMs to use Onyx like invoking tools or accessing resources
mcpServer:
  enabled: false  # Disabled by default
  replicaCount: 1
  image:
    repository: onyxdotapp/onyx-backend
    tag: ""  # Overrides the image tag whose default is the chart appVersion.
  # CORS origins for MCP clients (comma-separated)
  # Example: "https://claude.ai,https://app.cursor.sh"
  corsOrigins: ""
  podAnnotations: {}
  podLabels:
    scope: onyx-backend
  deploymentLabels:
    app: mcp-server
  containerPorts:
    server: 8090
  service:
    type: ClusterIP
    servicePort: 8090
    targetPort: mcp-server-port
    portName: mcp-server-port
  podSecurityContext: {}
  securityContext: {}
  resources:
    requests:
      cpu: "250m"
      memory: "256Mi"
    limits:
      cpu: "500m"
      memory: "512Mi"
  livenessProbe:
    initialDelaySeconds: 10
    periodSeconds: 30
    timeoutSeconds: 5
    failureThreshold: 3
  readinessProbe:
    initialDelaySeconds: 5
    periodSeconds: 10
    timeoutSeconds: 5
    failureThreshold: 3
  volumes: []
  volumeMounts: []
  nodeSelector: {}
  tolerations: []
  affinity: {}

celery_worker_docfetching:
  replicaCount: 1
  logLevel: INFO
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 20
    targetCPUUtilizationPercentage: 80
    targetMemoryUtilizationPercentage: 80
    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')
    pollingInterval: 30  # seconds
    cooldownPeriod: 300  # seconds
    failureThreshold: 3  # number of failures before fallback
    fallbackReplicas: 1  # replicas to maintain on failure
    # Custom triggers for advanced KEDA configurations
    customTriggers: []
  podAnnotations: {}
  podLabels:
    scope: onyx-backend-celery
  deploymentLabels:
    app: celery-worker-docfetching
  resources:
    requests:
      cpu: 500m
      memory: 2Gi
    limits:
      cpu: 1000m
      memory: 16Gi
  volumes: []  # Additional volumes on the output Deployment definition.
  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.
  nodeSelector: {}
  tolerations: []
  affinity: {}

######################################################################
#
# End background workers section
#
######################################################################

redis:
  enabled: true
  redisStandalone:
    image: quay.io/opstree/redis
    tag: v7.0.15
    imagePullPolicy: IfNotPresent
    serviceType: ClusterIP
    resources:
      requests:
        cpu: 100m
        memory: 128Mi
      limits:
        cpu: 500m
        memory: 512Mi
    # Use existing secret for Redis password
    redisSecret:
      secretName: onyx-redis
      secretKey: redis_password
  # Redis configuration
  externalConfig:
    enabled: true
    data: |
      appendonly no
      save ""
      maxmemory 400mb
      maxmemory-policy allkeys-lru
      timeout 0
      tcp-keepalive 300
  storageSpec:
    volumeClaimTemplate:
      spec:
        accessModes: ["ReadWriteOnce"]
        resources:
          requests:
            storage: 1Gi

minio:
  enabled: true
  mode: standalone
  replicas: 1
  drivesPerNode: 1
  existingSecret: onyx-objectstorage
  buckets:
    - name: onyx-file-store-bucket
  persistence:
    enabled: true
    size: 30Gi
    storageClass: ""
  service:
    type: ClusterIP
    port: 9000
  consoleService:
    type: ClusterIP
    port: 9001
  nodeSelector: {}
  tolerations: []
  affinity: {}

# Code Interpreter - Python code execution service (beta feature)
codeInterpreter:
  enabled: true

  replicaCount: 1

  image:
    repository: onyxdotapp/code-interpreter
    pullPolicy: Always
    tag: ""  # Empty uses chart appVersion

  # Service configuration
  service:
    type: ClusterIP
    port: 8000
    targetPort: 8000

  # Execution limits
  codeInterpreter:
    maxExecTimeoutMs: 60000
    maxOutputBytes: "1000000"
    cpuTimeLimitSec: 5
    memoryLimitMb: 256
    host: "0.0.0.0"
    port: 8000

    # Kubernetes executor configuration (creates pods for code execution)
    kubernetesExecutor:
      namespace: ""  # Empty = same namespace as release
      image: ""  # Empty = default sandbox image
      serviceAccount: ""
      podResources:
        limits:
          cpu: "1"
          memory: "256Mi"
        requests:
          cpu: "100m"
          memory: "64Mi"

  # API container resources
  resources:
    requests:
      cpu: 100m
      memory: 128Mi
    limits:
      cpu: 1000m
      memory: 512Mi

  # RBAC for pod management (required for kubernetes executor)
  rbac:
    create: true

  # Security context
  podSecurityContext:
    runAsNonRoot: true
    runAsUser: 1000
    fsGroup: 1000

  securityContext:
    capabilities:
      drop:
        - ALL
    readOnlyRootFilesystem: false
    runAsNonRoot: true
    runAsUser: 1000
    allowPrivilegeEscalation: false

  # Health probes
  livenessProbe:
    httpGet:
      path: /health
      port: http
    initialDelaySeconds: 10
    periodSeconds: 10

  readinessProbe:
    httpGet:
      path: /health
      port: http
    initialDelaySeconds: 5
    periodSeconds: 5

  nodeSelector: {}
  tolerations: []
  affinity: {}

  # Optional features
  ingress:
    enabled: false
  networkPolicy:
    enabled: false
  serviceAccount:
    create: true
    automount: true

ingress:
  enabled: false
  className: ""
  api:
    host: onyx.local
  webserver:
    host: onyx.local

letsencrypt:
  enabled: false
  email: "abc@abc.com"

# -- Governs all Secrets created or used by this chart. Values set by this chart will be base64 encoded in the k8s cluster.
auth:
  postgresql:
    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.
    enabled: true
    # -- Overwrite the default secret name, ignored if existingSecret is defined
    secretName: 'onyx-postgresql'
    # -- Use a secret specified elsewhere
    existingSecret: ""
    # -- This defines the env var to secret map, key is always upper-cased as an env var
    secretKeys:
      # CloudNativePG requires `username` and `password` keys for the superuser secret.
      POSTGRES_USER: username
      POSTGRES_PASSWORD: password
    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.
    values:
      username: "postgres"
      password: "postgres"
  redis:
    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.
    enabled: true
    # -- Overwrite the default secret name, ignored if existingSecret is defined
    secretName: 'onyx-redis'
    # -- Use a secret specified elsewhere
    existingSecret: ""
    # -- This defines the env var to secret map, key is always upper-cased as an env var
    secretKeys:
      REDIS_PASSWORD: redis_password
    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.
    values:
      redis_password: "password"
  objectstorage:
    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.
    enabled: true
    # -- Overwrite the default secret name, ignored if existingSecret is defined
    secretName: 'onyx-objectstorage'
    # -- Use a secret specified elsewhere
    existingSecret: ""
    # -- This defines the env var to secret map, key is always upper-cased as an env var
    secretKeys:
      S3_AWS_ACCESS_KEY_ID: s3_aws_access_key_id
      S3_AWS_SECRET_ACCESS_KEY: s3_aws_secret_access_key
    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.
    values:
      s3_aws_access_key_id: "minioadmin"
      s3_aws_secret_access_key: "minioadmin"
      rootUser: "minioadmin"
      rootPassword: "minioadmin"
  oauth:
    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.
    enabled: false
    # -- Overwrite the default secret name, ignored if existingSecret is defined
    secretName: 'onyx-oauth'
    # -- Use a secret specified elsewhere
    existingSecret: ""
    # -- This defines the env var to secret map, key is always upper-cased as an env var
    secretKeys:
      OAUTH_CLIENT_ID: "oauth_client_id"
      OAUTH_CLIENT_SECRET: "oauth_client_secret"
    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.
    values:
      oauth_client_id: ""
      oauth_client_secret: ""
  smtp:
    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.
    enabled: false
    # -- Overwrite the default secret name, ignored if existingSecret is defined
    secretName: 'onyx-smtp'
    # -- Use a secret specified elsewhere
    existingSecret: ""
    # -- This defines the env var to secret map, key is always upper-cased as an env var
    secretKeys:
      SMTP_PASS: "smtp_pass"
    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.
    values:
      smtp_pass: ""
  dbreadonly:
    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.
    enabled: false
    # -- Overwrite the default secret name, ignored if existingSecret is defined
    secretName: 'onyx-dbreadonly'
    # -- Use a secret specified elsewhere
    existingSecret: ""
    # -- This defines the env var to secret map, key is always upper-cased as an env var
    secretKeys:
      DB_READONLY_USER: db_readonly_user
      DB_READONLY_PASSWORD: db_readonly_password
    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.
    values:
      db_readonly_user: ""
      db_readonly_password: ""
  opensearch:
    # Enable or disable this secret entirely. Will remove from env var
    # configurations and remove any created secrets.
    # Enabled by default. Override to false and set the appropriate env vars in
    # the instance-specific values yaml if using AWS-managed OpenSearch, or
    # simply override to false to entirely disable.
    enabled: true
    # Overwrite the default secret name, ignored if existingSecret is defined.
    secretName: 'onyx-opensearch'
    # Use a secret specified elsewhere.
    existingSecret: ""
    # This defines the env var to secret map, key is always upper-cased as an
    # env var.
    secretKeys:
      OPENSEARCH_ADMIN_USERNAME: opensearch_admin_username
      OPENSEARCH_ADMIN_PASSWORD: opensearch_admin_password
    # Secrets values IF existingSecret is empty. Key here must match the value
    # in secretKeys to be used. Values will be base64 encoded in the k8s
    # cluster.
    # For the bundled OpenSearch chart, the admin password is consumed during
    # initial cluster setup. Changing this value later will update Onyx's
    # client credentials, but will not rotate the OpenSearch admin password.
    # Set this before first install or use existingSecret to preserve the
    # current secret on upgrade.
    # Password must meet OpenSearch complexity requirements:
    # min 8 chars, uppercase, lowercase, digit, and special character.
    # Required when auth.opensearch.enabled=true and no existing secret exists.
    values:
      opensearch_admin_username: "admin"
      opensearch_admin_password: ""
  userauth:
    # -- Used for password reset / verification tokens and OAuth/OIDC state signing.
    # Disabled by default to preserve upgrade compatibility for existing Helm customers.
    enabled: false
    # -- Overwrite the default secret name, ignored if existingSecret is defined
    secretName: 'onyx-userauth'
    # -- Use a secret specified elsewhere
    existingSecret: ""
    # -- This defines the env var to secret map
    secretKeys:
      USER_AUTH_SECRET: user_auth_secret
    # -- Secret value. Required when this secret is enabled - generate with: openssl rand -hex 32
    # If not set, helm install/upgrade will fail when auth.userauth.enabled=true.
    values:
      user_auth_secret: ""

configMap:
  # Auth type: "basic" (default), "google_oauth", "oidc", or "saml"
  # UPGRADE NOTE: Default changed from "disabled" to "basic" in 0.4.34.
  # Set auth.userauth.enabled=true and provide auth.userauth.values.user_auth_secret
  # before enabling flows that require it.
  AUTH_TYPE: "basic"
  # Enable PKCE for OIDC login flow. Leave empty/false for backward compatibility.
  OIDC_PKCE_ENABLED: ""
  # 1 Day Default
  SESSION_EXPIRE_TIME_SECONDS: "86400"
  # Can be something like onyx.app, as an extra double-check
  VALID_EMAIL_DOMAINS: ""
  # For sending verification emails, true or false
  REQUIRE_EMAIL_VERIFICATION: ""
  # If unspecified then defaults to 'smtp.gmail.com'
  SMTP_SERVER: ""
  # For sending verification emails, if unspecified then defaults to '587'
  SMTP_PORT: ""
# 'your-email@company.com'
  SMTP_USER: ""
  # 'your-gmail-password'
  # SMTP_PASS: ""
  # 'your-email@company.com' SMTP_USER missing used instead
  EMAIL_FROM: ""
  # MinIO/S3 Configuration override
  S3_ENDPOINT_URL: ""  # only used if minio is not enabled
  S3_FILE_STORE_BUCKET_NAME: ""
  # Gen AI Settings
  GEN_AI_MAX_TOKENS: ""
  LLM_SOCKET_READ_TIMEOUT: "60"
  MAX_CHUNKS_FED_TO_CHAT: ""
  # Query Options
  DOC_TIME_DECAY: ""
  HYBRID_ALPHA: ""
  EDIT_KEYWORD_QUERY: ""
  # Don't change the NLP models unless you know what you're doing
  EMBEDDING_BATCH_SIZE: ""
  DOCUMENT_ENCODER_MODEL: ""
  NORMALIZE_EMBEDDINGS: ""
  ASYM_QUERY_PREFIX: ""
  ASYM_PASSAGE_PREFIX: ""
  DISABLE_RERANK_FOR_STREAMING: ""
  MODEL_SERVER_PORT: ""
  MIN_THREADS_ML_MODELS: ""
  # Indexing Configs
  VESPA_SEARCHER_THREADS: ""
  NUM_INDEXING_WORKERS: ""
  DISABLE_INDEX_UPDATE_ON_SWAP: ""
  DASK_JOB_CLIENT_ENABLED: ""
  CONTINUE_ON_CONNECTOR_FAILURE: ""
  EXPERIMENTAL_CHECKPOINTING_ENABLED: ""
  CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: ""
  JIRA_CLOUD_API_VERSION: ""
  JIRA_SERVER_API_VERSION: ""
  GONG_CONNECTOR_START_TIME: ""
  NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: ""
  # Worker Parallelism
  CELERY_WORKER_DOCPROCESSING_CONCURRENCY: ""
  CELERY_WORKER_LIGHT_CONCURRENCY: ""
  CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER: ""
  CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY: ""
  # OnyxBot SlackBot Configs
  ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER: ""
  ONYX_BOT_DISPLAY_ERROR_MSGS: ""
  ONYX_BOT_RESPOND_EVERY_CHANNEL: ""
  NOTIFY_SLACKBOT_NO_ANSWER: ""
  DISCORD_BOT_TOKEN: ""
  DISCORD_BOT_INVOKE_CHAR: ""
  # Logging
  # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3
  DISABLE_TELEMETRY: ""
  LOG_LEVEL: ""
  LOG_ALL_MODEL_INTERACTIONS: ""
  LOG_ONYX_MODEL_INTERACTIONS: ""
  LOG_VESPA_TIMING_INFORMATION: ""
  # Shared or Non-backend Related
  WEB_DOMAIN: "http://localhost:3000"
  # DOMAIN used by nginx
  DOMAIN: "localhost"
  # Chat Configs
  HARD_DELETE_CHATS: ""
  MAX_ALLOWED_UPLOAD_SIZE_MB: ""
  DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB: ""


================================================
FILE: deployment/terraform/modules/aws/README.md
================================================
# Onyx AWS modules

## Overview
This directory contains Terraform modules to provision the core AWS infrastructure for Onyx:

- `vpc`: Creates a VPC with public/private subnets sized for EKS
- `eks`: Provisions an Amazon EKS cluster, essential addons (EBS CSI, metrics server, cluster autoscaler), and optional IRSA for S3 access
- `postgres`: Creates an Amazon RDS for PostgreSQL instance and returns a connection URL
- `redis`: Creates an ElastiCache for Redis replication group
- `s3`: Creates an S3 bucket and locks access to a provided S3 VPC endpoint
- `opensearch`: Creates an Amazon OpenSearch domain for managed search workloads
- `onyx`: A higher-level composition that wires the above modules together for a complete, opinionated stack

Use the `onyx` module if you want a working EKS + Postgres + Redis + S3 stack with sane defaults. Use the individual modules if you need more granular control.

## Quickstart (copy/paste)
The snippet below shows a minimal working example that:
- Sets up providers
- Waits for EKS to be ready
- Configures `kubernetes` and `helm` providers against the created cluster
- Provisions the full Onyx AWS stack via the `onyx` module

```hcl
locals {
  region = "us-west-2"
}

provider "aws" {
  region = local.region
}

module "onyx" {
  # If your root module is next to this modules/ directory:
  # source = "./modules/aws/onyx"
  # If referencing from this repo as a template, adjust the path accordingly.
  source = "./modules/aws/onyx"

  region            = local.region
  name              = "onyx"            # used as a prefix and workspace-aware
  postgres_username = "pgusername"
  postgres_password = "your-postgres-password"
  # create_vpc    = true  # default true; set to false to use an existing VPC (see below)
}

resource "null_resource" "wait_for_cluster" {
  provisioner "local-exec" {
    command = "aws eks wait cluster-active --name ${module.onyx.cluster_name} --region ${local.region}"
  }
}

data "aws_eks_cluster" "eks" {
  name       = module.onyx.cluster_name
  depends_on = [null_resource.wait_for_cluster]
}

data "aws_eks_cluster_auth" "eks" {
  name       = module.onyx.cluster_name
  depends_on = [null_resource.wait_for_cluster]
}

provider "kubernetes" {
  host                   = data.aws_eks_cluster.eks.endpoint
  cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks.certificate_authority[0].data)
  token                  = data.aws_eks_cluster_auth.eks.token
}

provider "helm" {
  kubernetes {
    host                   = data.aws_eks_cluster.eks.endpoint
    cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks.certificate_authority[0].data)
    token                  = data.aws_eks_cluster_auth.eks.token
  }
}

# Optional: expose handy outputs at the root module level
output "cluster_name" {
  value = module.onyx.cluster_name
}
output "postgres_connection_url" {
  value     = module.onyx.postgres_connection_url
  sensitive = true
}
output "redis_connection_url" {
  value     = module.onyx.redis_connection_url
  sensitive = true
}
```

Apply with:

```bash
terraform init
terraform apply
```

### Using an existing VPC
If you already have a VPC and subnets, disable VPC creation and provide IDs, CIDR, and the ID of the existing S3 gateway endpoint in that VPC:

```hcl
module "onyx" {
  source = "./modules/aws/onyx"

  region            = local.region
  name              = "onyx"
  postgres_username = "pgusername"
  postgres_password = "your-postgres-password"

  create_vpc       = false
  vpc_id           = "vpc-xxxxxxxx"
  private_subnets  = ["subnet-aaaa", "subnet-bbbb", "subnet-cccc"]
  public_subnets   = ["subnet-dddd", "subnet-eeee", "subnet-ffff"]
  vpc_cidr_block   = "10.0.0.0/16"
  s3_vpc_endpoint_id = "vpce-xxxxxxxxxxxxxxxxx"
}
```

## What each module does

### `onyx`
- Orchestrates `vpc`, `eks`, `postgres`, `redis`, and `s3`
- Names resources using `name` and the current Terraform workspace
- Exposes convenient outputs:
  - `cluster_name`: EKS cluster name
  - `postgres_connection_url` (sensitive): `postgres://...`
  - `redis_connection_url` (sensitive): hostname:port

Inputs (common):
- `name` (default `onyx`), `region` (default `us-west-2`), `tags`
- `postgres_username`, `postgres_password`
- `create_vpc` (default true) or existing VPC details and `s3_vpc_endpoint_id`
- WAF controls such as `waf_allowed_ip_cidrs`, `waf_common_rule_set_count_rules`, rate limits, geo restrictions, and logging retention
- Optional OpenSearch controls such as `enable_opensearch`, sizing, credentials, and log retention

### `vpc`
- Builds a VPC sized for EKS with multiple private and public subnets
- Outputs: `vpc_id`, `private_subnets`, `public_subnets`, `vpc_cidr_block`, `s3_vpc_endpoint_id`

### `eks`
- Creates the EKS cluster and node groups
- Enables addons: EBS CSI driver, metrics server, cluster autoscaler
- Optionally configures IRSA for S3 access to specified buckets
- Outputs: `cluster_name`, `cluster_endpoint`, `cluster_certificate_authority_data`, `s3_access_role_arn` (if created)

Key inputs include:
- `cluster_name`, `cluster_version` (default `1.33`)
- `vpc_id`, `subnet_ids`
- `public_cluster_enabled` (default true), `private_cluster_enabled` (default false)
- `cluster_endpoint_public_access_cidrs` (optional)
- `eks_managed_node_groups` (defaults include a main and a vespa-dedicated group with GP3 volumes)
- `s3_bucket_names` (optional list). If set, creates an IRSA role and Kubernetes service account for S3 access

### `postgres`
- Amazon RDS for PostgreSQL with parameterized instance size, storage, version
- Accepts VPC/subnets and ingress CIDRs; returns a ready-to-use connection URL

### `redis`
- ElastiCache for Redis (transit encryption enabled by default)
- Supports optional `auth_token` and instance sizing
- Outputs endpoint, port, and whether SSL is enabled

### `s3`
- Creates an S3 bucket for file storage and scopes access to the provided S3 gateway VPC endpoint

### `opensearch`
- Creates an Amazon OpenSearch domain inside the VPC
- Supports custom subnets, security groups, fine-grained access control, encryption, and CloudWatch log publishing
- Outputs domain endpoints, ARN, and the managed security group ID when it creates one

## Installing the Onyx Helm chart (after Terraform)
Once the cluster is active, deploy application workloads via Helm. You can use the chart in `deployment/helm/charts/onyx`.

```bash
# Set kubeconfig to your new cluster (if you’re not using the TF providers for kubernetes/helm)
aws eks update-kubeconfig --name $(terraform output -raw cluster_name) --region ${AWS_REGION:-us-west-2}

kubectl create namespace onyx --dry-run=client -o yaml | kubectl apply -f -

# If using AWS S3 via IRSA created by the EKS module, consider disabling MinIO
# Replace the path below with the absolute or correct relative path to the onyx Helm chart
helm upgrade --install onyx /path/to/onyx/deployment/helm/charts/onyx \
  --namespace onyx \
  --set minio.enabled=false \
  --set serviceAccount.create=false \
  --set serviceAccount.name=onyx-s3-access
```

Notes:
- The EKS module can create an IRSA role plus a Kubernetes `ServiceAccount` named `onyx-s3-access` (by default in namespace `onyx`) when `s3_bucket_names` is provided. Use that service account in the Helm chart to avoid static S3 credentials.
- If you prefer MinIO inside the cluster, leave `minio.enabled=true` (default) and skip IRSA.

## Workflow tips
- First apply can be infra-only; once EKS is active, install the Helm chart.
- Use Terraform workspaces to create isolated environments; the `onyx` module automatically includes the workspace in resource names.

## Security
- Database and Redis connection outputs are marked sensitive. Handle them carefully.
- When using IRSA, avoid storing long-lived S3 credentials in secrets.


================================================
FILE: deployment/terraform/modules/aws/eks/main.tf
================================================
locals {
  s3_bucket_arns = [for name in var.s3_bucket_names : {
    bucket_arn     = "arn:aws:s3:::${name}"
    bucket_objects = "arn:aws:s3:::${name}/*"
  }]
}

module "eks" {
  source  = "terraform-aws-modules/eks/aws"
  version = "~> 20.0"

  cluster_name    = var.cluster_name
  cluster_version = var.cluster_version

  vpc_id                                   = var.vpc_id
  subnet_ids                               = var.subnet_ids
  cluster_endpoint_public_access           = var.public_cluster_enabled
  cluster_endpoint_private_access          = var.private_cluster_enabled
  cluster_endpoint_public_access_cidrs     = var.cluster_endpoint_public_access_cidrs
  enable_cluster_creator_admin_permissions = true

  # Control plane logging
  cluster_enabled_log_types              = var.cluster_enabled_log_types
  cloudwatch_log_group_retention_in_days = var.cloudwatch_log_group_retention_in_days

  eks_managed_node_group_defaults = {
    ami_type = "AL2023_x86_64_STANDARD"
  }

  eks_managed_node_groups = {
    for k, v in var.eks_managed_node_groups : k => merge(v,
      {
        instance_types = v.instance_types != null ? v.instance_types : (
          k == "main" ? var.main_node_instance_types :
          k == "vespa" ? var.vespa_node_instance_types :
          v.instance_types
        )
      },
      # Only add subnet_ids override for vespa node group if specified
      k == "vespa" && length(var.vespa_node_subnet_ids) > 0 ? {
        subnet_ids = var.vespa_node_subnet_ids
      } : {}
    )
  }

  tags = var.tags
}

# https://aws.amazon.com/blogs/containers/amazon-ebs-csi-driver-is-now-generally-available-in-amazon-eks-add-ons/
data "aws_iam_policy" "ebs_csi_policy" {
  arn = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy"
}

module "irsa-ebs-csi" {
  source  = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc"
  version = "4.7.0"

  create_role                   = true
  role_name                     = "AmazonEKSTFEBSCSIRole-${module.eks.cluster_name}"
  provider_url                  = module.eks.oidc_provider
  role_policy_arns              = [data.aws_iam_policy.ebs_csi_policy.arn]
  oidc_fully_qualified_subjects = ["system:serviceaccount:kube-system:ebs-csi-controller-sa"]

  depends_on = [module.eks]
}

# Create the EBS CSI Driver addon for volume provisioning.
resource "aws_eks_addon" "ebs-csi" {
  cluster_name             = module.eks.cluster_name
  addon_name               = "aws-ebs-csi-driver"
  service_account_role_arn = module.irsa-ebs-csi.iam_role_arn
  tags                     = var.tags

  depends_on = [module.eks]
}

# Create GP3 storage class for EBS volumes
resource "kubernetes_storage_class" "gp3_default" {
  count = var.create_gp3_storage_class ? 1 : 0
  metadata {
    name = "gp3"
    annotations = {
      "storageclass.kubernetes.io/is-default-class" = "true"
    }
  }

  storage_provisioner    = "ebs.csi.aws.com"
  reclaim_policy         = "Delete"
  volume_binding_mode    = "WaitForFirstConsumer"
  allow_volume_expansion = true

  parameters = {
    type = "gp3"
  }

  depends_on = [aws_eks_addon.ebs-csi]
}

# Create some important addons for the EKS cluster.
module "eks_blueprints_addons" {
  source  = "aws-ia/eks-blueprints-addons/aws"
  version = "1.16.3"

  cluster_name      = module.eks.cluster_name
  cluster_endpoint  = module.eks.cluster_endpoint
  cluster_version   = module.eks.cluster_version
  oidc_provider_arn = module.eks.oidc_provider_arn

  enable_aws_load_balancer_controller = true
  enable_karpenter                    = false
  enable_metrics_server               = true
  enable_cluster_autoscaler           = true

  depends_on = [module.eks]
}

# Create IAM policy for S3 access (optional)
resource "aws_iam_policy" "s3_access_policy" {
  count       = length(var.s3_bucket_names) == 0 ? 0 : 1
  name        = "${module.eks.cluster_name}-s3-access-policy"
  description = "Policy for S3 access from EKS cluster"

  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Effect = "Allow"
        Action = [
          "s3:GetObject",
          "s3:PutObject",
          "s3:DeleteObject",
          "s3:ListBucket"
        ]
        Resource = flatten([
          for a in local.s3_bucket_arns : [a.bucket_arn, a.bucket_objects]
        ])
      }
    ]
  })
}

# Create IAM role for workload access using IRSA (S3 + RDS)
module "irsa-workload-access" {
  count   = length(var.s3_bucket_names) == 0 ? 0 : 1
  source  = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc"
  version = "4.7.0"

  create_role                   = true
  role_name                     = "AmazonEKSTFWorkloadAccessRole-${module.eks.cluster_name}"
  provider_url                  = module.eks.oidc_provider
  role_policy_arns              = [aws_iam_policy.s3_access_policy[0].arn]
  oidc_fully_qualified_subjects = ["system:serviceaccount:${var.irsa_service_account_namespace}:${var.irsa_service_account_name}"]

  depends_on = [module.eks]
}

# Create Kubernetes service account for S3 access (optional)
resource "kubernetes_service_account" "s3_access" {
  count = length(var.s3_bucket_names) == 0 ? 0 : 1
  metadata {
    name      = var.irsa_service_account_name
    namespace = var.irsa_service_account_namespace
    annotations = {
      "eks.amazonaws.com/role-arn" = module.irsa-workload-access[0].iam_role_arn
    }
  }
}

# If RDS IAM auth is enabled, create a policy to allow the workload IRSA role to connect to RDS using IAM auth
resource "aws_iam_policy" "rds_iam_connect_policy" {
  count       = var.enable_rds_iam_for_service_account && var.rds_db_connect_arn != null ? 1 : 0
  name        = "${module.eks.cluster_name}-rds-iam-connect-policy"
  description = "Allow EKS service account to connect to RDS using IAM auth"

  policy = jsonencode({
    Version = "2012-10-17",
    Statement = [
      {
        Effect = "Allow",
        Action = [
          "rds-db:connect"
        ],
        Resource = [
          var.rds_db_connect_arn
        ]
      }
    ]
  })
}

resource "aws_iam_role_policy_attachment" "attach_rds_connect_to_workload_role" {
  count      = var.enable_rds_iam_for_service_account && var.rds_db_connect_arn != null ? 1 : 0
  role       = module.irsa-workload-access[0].iam_role_name
  policy_arn = aws_iam_policy.rds_iam_connect_policy[0].arn

  depends_on = [module.irsa-workload-access]
}


================================================
FILE: deployment/terraform/modules/aws/eks/outputs.tf
================================================
output "cluster_name" {
  value = module.eks.cluster_name
}

output "cluster_endpoint" {
  value = module.eks.cluster_endpoint
}

output "cluster_certificate_authority_data" {
  value     = module.eks.cluster_certificate_authority_data
  sensitive = true
}

output "workload_irsa_role_arn" {
  description = "ARN of the IAM role for workloads (S3 + optional RDS)"
  value       = length(module.irsa-workload-access) > 0 ? module.irsa-workload-access[0].iam_role_arn : null
}


================================================
FILE: deployment/terraform/modules/aws/eks/variables.tf
================================================
variable "cluster_name" {
  type        = string
  description = "The name of the cluster"
}

variable "cluster_version" {
  type        = string
  description = "The EKS version of the cluster"
  default     = "1.33"
}

variable "vpc_id" {
  type        = string
  description = "The ID of the VPC"
}

variable "subnet_ids" {
  type        = list(string)
  description = "The IDs of the subnets"
}

variable "public_cluster_enabled" {
  type        = bool
  description = "Whether to enable public cluster access"
  default     = true
}

variable "private_cluster_enabled" {
  type        = bool
  description = "Whether to enable private cluster access"
  default     = false
}

variable "cluster_endpoint_public_access_cidrs" {
  type        = list(string)
  description = "List of CIDR blocks allowed to access the public EKS API endpoint"
  default     = []
}

variable "main_node_instance_types" {
  type        = list(string)
  description = "Instance types for the main node group"
  default     = ["m7i.4xlarge"]
}

variable "vespa_node_instance_types" {
  type        = list(string)
  description = "Instance types for the Vespa node group"
  default     = ["m6i.2xlarge"]
}

variable "vespa_node_subnet_ids" {
  type        = list(string)
  description = "Subnet IDs for the Vespa node group (must be in same AZ as Vespa PV). If not specified, uses all cluster subnets."
  default     = []
}

variable "eks_managed_node_groups" {
  type        = map(any)
  description = "EKS managed node groups with EBS volume configuration"
  default = {
    # Main node group for all pods except Vespa
    main = {
      name           = "main-node-group"
      instance_types = null # Will be set from var.main_node_instance_types
      min_size       = 1
      max_size       = 5
      # EBS volume configuration
      block_device_mappings = {
        xvda = {
          device_name = "/dev/xvda"
          ebs = {
            volume_size           = 50
            volume_type           = "gp3"
            encrypted             = true
            delete_on_termination = true
            iops                  = 3000
            throughput            = 125
          }
        }
      }
      # No taints for main node group
      taints = []
    }
    # Vespa dedicated node group
    vespa = {
      name           = "vespa-node-group"
      instance_types = null # Will be set from var.vespa_node_instance_types
      min_size       = 1
      max_size       = 1
      # Larger EBS volume for Vespa storage
      block_device_mappings = {
        xvda = {
          device_name = "/dev/xvda"
          ebs = {
            volume_size           = 100
            volume_type           = "gp3"
            encrypted             = true
            delete_on_termination = true
            iops                  = 3000
            throughput            = 125
          }
        }
      }
      # Taint to ensure only Vespa pods can schedule here
      taints = [
        {
          key    = "vespa-dedicated"
          value  = "true"
          effect = "NO_SCHEDULE"
        }
      ]
    }
  }
}

variable "tags" {
  type        = map(string)
  description = "Tags to apply to the resources"
  default     = {}
}

variable "create_gp3_storage_class" {
  type        = bool
  description = "Whether to create the gp3 storage class. The gp3 storage class will be patched to make it default and allow volume expansion."
  default     = true
}

variable "s3_bucket_names" {
  type        = list(string)
  description = "List of S3 bucket names that workloads in this cluster are allowed to access via IRSA. If empty, no S3 access role/policy/service account will be created."
  default     = []
}

variable "irsa_service_account_namespace" {
  type        = string
  description = "Namespace for IRSA-enabled Kubernetes service accounts (used by S3 and RDS)"
  default     = "onyx"
}

variable "irsa_service_account_name" {
  type        = string
  description = "Name of the IRSA-enabled Kubernetes service account for workload access (S3 + optional RDS)"
  default     = "onyx-workload-access"
}

variable "enable_rds_iam_for_service_account" {
  type        = bool
  description = "Whether to create a dedicated RDS IRSA role and service account (grants rds-db:connect)"
  default     = false
}

variable "rds_db_username" {
  type        = string
  description = "Database username to allow via rds-db:connect"
  default     = null
}

variable "rds_db_connect_arn" {
  type        = string
  description = "Full rds-db:connect ARN to allow (required when enable_rds_iam_for_service_account is true)"
  default     = null
}

variable "cluster_enabled_log_types" {
  type        = list(string)
  description = "EKS control plane log types to enable (valid: api, audit, authenticator, controllerManager, scheduler)"
  default     = ["api", "audit", "authenticator", "controllerManager", "scheduler"]

  validation {
    condition     = alltrue([for t in var.cluster_enabled_log_types : contains(["api", "audit", "authenticator", "controllerManager", "scheduler"], t)])
    error_message = "Each entry must be one of: api, audit, authenticator, controllerManager, scheduler."
  }
}

variable "cloudwatch_log_group_retention_in_days" {
  type        = number
  description = "Number of days to retain EKS control plane logs in CloudWatch (0 = never expire)"
  default     = 30

  validation {
    condition     = contains([0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653], var.cloudwatch_log_group_retention_in_days)
    error_message = "Must be a valid CloudWatch retention value (0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653)."
  }
}


================================================
FILE: deployment/terraform/modules/aws/onyx/main.tf
================================================
locals {
  workspace       = terraform.workspace
  name            = var.name
  merged_tags     = merge(var.tags, { tenant = local.name, environment = local.workspace })
  vpc_name        = "${var.name}-vpc-${local.workspace}"
  cluster_name    = "${var.name}-${local.workspace}"
  bucket_name     = "${var.name}-file-store-${local.workspace}"
  redis_name      = "${var.name}-redis-${local.workspace}"
  postgres_name   = "${var.name}-postgres-${local.workspace}"
  opensearch_name = var.opensearch_domain_name != null ? var.opensearch_domain_name : "${var.name}-opensearch-${local.workspace}"

  vpc_id          = var.create_vpc ? module.vpc[0].vpc_id : var.vpc_id
  private_subnets = var.create_vpc ? module.vpc[0].private_subnets : var.private_subnets
  public_subnets  = var.create_vpc ? module.vpc[0].public_subnets : var.public_subnets
  vpc_cidr_block  = var.create_vpc ? module.vpc[0].vpc_cidr_block : var.vpc_cidr_block
}

provider "aws" {
  region = var.region
  default_tags {
    tags = local.merged_tags
  }
}

module "vpc" {
  source = "../vpc"

  count    = var.create_vpc ? 1 : 0
  vpc_name = local.vpc_name
  tags     = local.merged_tags
}

module "redis" {
  source        = "../redis"
  name          = local.redis_name
  vpc_id        = local.vpc_id
  subnet_ids    = local.private_subnets
  instance_type = "cache.m6g.xlarge"
  ingress_cidrs = [local.vpc_cidr_block]
  tags          = local.merged_tags

  # Pass Redis authentication token as a sensitive input variable
  auth_token = var.redis_auth_token
}

module "postgres" {
  source        = "../postgres"
  identifier    = local.postgres_name
  vpc_id        = local.vpc_id
  subnet_ids    = local.private_subnets
  ingress_cidrs = [local.vpc_cidr_block]

  username            = var.postgres_username
  password            = var.postgres_password
  tags                = local.merged_tags
  enable_rds_iam_auth = var.enable_iam_auth

  backup_retention_period = var.postgres_backup_retention_period
  backup_window           = var.postgres_backup_window
}

module "s3" {
  source             = "../s3"
  bucket_name        = local.bucket_name
  tags               = local.merged_tags
  s3_vpc_endpoint_id = var.create_vpc ? module.vpc[0].s3_vpc_endpoint_id : var.s3_vpc_endpoint_id
}

module "eks" {
  source          = "../eks"
  cluster_name    = local.cluster_name
  vpc_id          = local.vpc_id
  subnet_ids      = concat(local.private_subnets, local.public_subnets)
  tags            = local.merged_tags
  s3_bucket_names = [local.bucket_name]

  # Wire RDS IAM connection for the same IRSA service account used by apps
  enable_rds_iam_for_service_account = var.enable_iam_auth
  rds_db_username                    = var.postgres_username
  rds_db_connect_arn                 = var.rds_db_connect_arn

  # These variables must be defined in variables.tf or passed in via parent module
  public_cluster_enabled               = var.public_cluster_enabled
  private_cluster_enabled              = var.private_cluster_enabled
  cluster_endpoint_public_access_cidrs = var.cluster_endpoint_public_access_cidrs

  # Control plane logging
  cluster_enabled_log_types              = var.eks_cluster_enabled_log_types
  cloudwatch_log_group_retention_in_days = var.eks_cloudwatch_log_group_retention_in_days
}

module "waf" {
  source = "../waf"

  name = local.name
  tags = local.merged_tags

  # WAF configuration with sensible defaults
  allowed_ip_cidrs                      = var.waf_allowed_ip_cidrs
  common_rule_set_count_rules           = var.waf_common_rule_set_count_rules
  rate_limit_requests_per_5_minutes     = var.waf_rate_limit_requests_per_5_minutes
  api_rate_limit_requests_per_5_minutes = var.waf_api_rate_limit_requests_per_5_minutes
  geo_restriction_countries             = var.waf_geo_restriction_countries
  enable_logging                        = var.waf_enable_logging
  log_retention_days                    = var.waf_log_retention_days
}

module "opensearch" {
  source = "../opensearch"
  count  = var.enable_opensearch ? 1 : 0

  name   = local.opensearch_name
  vpc_id = local.vpc_id
  # Prefer setting subnet_ids explicitly if the state of private_subnets is
  # unclear.
  subnet_ids    = length(var.opensearch_subnet_ids) > 0 ? var.opensearch_subnet_ids : slice(local.private_subnets, 0, 3)
  ingress_cidrs = [local.vpc_cidr_block]
  tags          = local.merged_tags

  # Reuse EKS security groups
  security_group_ids = [module.eks.node_security_group_id, module.eks.cluster_security_group_id]

  # Configuration
  engine_version                = var.opensearch_engine_version
  instance_type                 = var.opensearch_instance_type
  instance_count                = var.opensearch_instance_count
  dedicated_master_enabled      = var.opensearch_dedicated_master_enabled
  dedicated_master_type         = var.opensearch_dedicated_master_type
  multi_az_with_standby_enabled = var.opensearch_multi_az_with_standby_enabled
  ebs_volume_size               = var.opensearch_ebs_volume_size
  ebs_throughput                = var.opensearch_ebs_throughput

  # Authentication
  internal_user_database_enabled = var.opensearch_internal_user_database_enabled
  master_user_name               = var.opensearch_master_user_name
  master_user_password           = var.opensearch_master_user_password

  # Logging
  enable_logging     = var.opensearch_enable_logging
  log_retention_days = var.opensearch_log_retention_days
}


================================================
FILE: deployment/terraform/modules/aws/onyx/outputs.tf
================================================
output "redis_connection_url" {
  value     = module.redis.redis_endpoint
  sensitive = true
}

output "cluster_name" {
  value = module.eks.cluster_name
}

output "postgres_endpoint" {
  description = "RDS endpoint hostname"
  value       = module.postgres.endpoint
}

output "postgres_port" {
  description = "RDS port"
  value       = module.postgres.port
}

output "postgres_db_name" {
  description = "RDS database name"
  value       = module.postgres.db_name
}

output "postgres_username" {
  description = "RDS master username"
  value       = module.postgres.username
  sensitive   = true
}

output "postgres_dbi_resource_id" {
  description = "RDS DB instance resource id"
  value       = module.postgres.dbi_resource_id
}

output "opensearch_endpoint" {
  description = "OpenSearch domain endpoint"
  value       = var.enable_opensearch ? module.opensearch[0].domain_endpoint : null
}

output "opensearch_dashboard_endpoint" {
  description = "OpenSearch Dashboards endpoint"
  value       = var.enable_opensearch ? module.opensearch[0].kibana_endpoint : null
}

output "opensearch_domain_arn" {
  description = "OpenSearch domain ARN"
  value       = var.enable_opensearch ? module.opensearch[0].domain_arn : null
}


================================================
FILE: deployment/terraform/modules/aws/onyx/variables.tf
================================================
variable "name" {
  type        = string
  description = "Name of the Onyx resources. Example: 'onyx'"
  default     = "onyx"
}

variable "region" {
  type        = string
  description = "AWS region for all resources"
  default     = "us-west-2"
}

variable "create_vpc" {
  type        = bool
  description = "Whether to create a new VPC"
  default     = true
}

variable "vpc_id" {
  type        = string
  description = "ID of the VPC. Required if create_vpc is false."
  default     = null
}

variable "private_subnets" {
  type        = list(string)
  description = "Private subnets. Required if create_vpc is false."
  default     = [] # This will default to 0.0.0.0/0 if not provided
}

variable "public_subnets" {
  type        = list(string)
  description = "Public subnets. Required if create_vpc is false."
  default     = []
}

variable "vpc_cidr_block" {
  type        = string
  description = "VPC CIDR block. Required if create_vpc is false."
  default     = null
}

variable "s3_vpc_endpoint_id" {
  type        = string
  description = "ID of an existing S3 gateway VPC endpoint when reusing an existing VPC"
  default     = null

  validation {
    condition     = var.create_vpc || var.s3_vpc_endpoint_id != null
    error_message = "s3_vpc_endpoint_id must be provided when create_vpc is false."
  }
}

variable "tags" {
  type        = map(string)
  description = "Base tags applied to all AWS resources"
  default = {
    "project" = "onyx"
  }
}

variable "postgres_username" {
  type        = string
  description = "Username for the postgres database"
  default     = "postgres"
  sensitive   = true
}

variable "postgres_password" {
  type        = string
  description = "Password for the postgres database"
  default     = null
  sensitive   = true
}

variable "public_cluster_enabled" {
  type        = bool
  description = "Whether to enable public cluster access"
  default     = true
}

variable "private_cluster_enabled" {
  type        = bool
  description = "Whether to enable private cluster access"
  default     = false # Should be true for production, false for dev/staging
}

variable "cluster_endpoint_public_access_cidrs" {
  type        = list(string)
  description = "CIDR blocks allowed to access the public EKS API endpoint"
  default     = []
}

variable "redis_auth_token" {
  type        = string
  description = "Authentication token for the Redis cluster"
  default     = null
  sensitive   = true
}

variable "enable_iam_auth" {
  type        = bool
  description = "Enable AWS IAM authentication for the RDS Postgres instance and wire IRSA policies"
  default     = false
}

variable "rds_db_connect_arn" {
  type        = string
  description = "Full rds-db:connect ARN to pass to the EKS module. Required when enable_rds_iam_auth is true."
  default     = null
}

# WAF Configuration Variables
variable "waf_rate_limit_requests_per_5_minutes" {
  type        = number
  description = "Rate limit for requests per 5 minutes per IP address"
  default     = 2000
}

variable "waf_allowed_ip_cidrs" {
  type        = list(string)
  description = "Optional IPv4 CIDR ranges allowed through the WAF. Leave empty to disable IP allowlisting."
  default     = []
}

variable "waf_common_rule_set_count_rules" {
  type        = list(string)
  description = "Subrules within AWSManagedRulesCommonRuleSet to override to COUNT instead of BLOCK."
  default     = []
}

variable "waf_api_rate_limit_requests_per_5_minutes" {
  type        = number
  description = "Rate limit for API requests per 5 minutes per IP address"
  default     = 1000
}

variable "waf_geo_restriction_countries" {
  type        = list(string)
  description = "List of country codes to block. Leave empty to disable geo restrictions"
  default     = []
}

variable "waf_enable_logging" {
  type        = bool
  description = "Enable WAF logging to CloudWatch"
  default     = true
}

variable "waf_log_retention_days" {
  type        = number
  description = "Number of days to retain WAF logs"
  default     = 90
}

# OpenSearch Configuration Variables
variable "enable_opensearch" {
  type        = bool
  description = "Whether to create an OpenSearch domain"
  default     = false
}

variable "opensearch_engine_version" {
  type        = string
  description = "OpenSearch engine version"
  default     = "3.3"
}

variable "opensearch_instance_type" {
  type        = string
  description = "Instance type for OpenSearch data nodes"
  default     = "r8g.large.search"
}

variable "opensearch_instance_count" {
  type        = number
  description = "Number of OpenSearch data nodes"
  default     = 3
}

variable "opensearch_dedicated_master_enabled" {
  type        = bool
  description = "Whether to enable dedicated master nodes for OpenSearch"
  default     = true
}

variable "opensearch_dedicated_master_type" {
  type        = string
  description = "Instance type for dedicated master nodes"
  default     = "m7g.large.search"
}

variable "opensearch_multi_az_with_standby_enabled" {
  type        = bool
  description = "Whether to enable Multi-AZ with Standby deployment"
  default     = true
}

variable "opensearch_ebs_volume_size" {
  type        = number
  description = "EBS volume size in GiB per OpenSearch node"
  default     = 512
}

variable "opensearch_ebs_throughput" {
  type        = number
  description = "Throughput in MiB/s for gp3 volumes"
  default     = 256
}

variable "opensearch_internal_user_database_enabled" {
  type        = bool
  description = "Whether to enable the internal user database for fine-grained access control"
  default     = true
}

variable "opensearch_master_user_name" {
  type        = string
  description = "Master user name for OpenSearch internal user database"
  default     = null
  sensitive   = true
}

variable "opensearch_master_user_password" {
  type        = string
  description = "Master user password for OpenSearch internal user database"
  default     = null
  sensitive   = true
}

variable "opensearch_domain_name" {
  type        = string
  description = "Override the OpenSearch domain name. If null, defaults to {name}-opensearch-{workspace}."
  default     = null
}

variable "opensearch_enable_logging" {
  type    = bool
  default = false
}

variable "opensearch_log_retention_days" {
  type        = number
  description = "Number of days to retain OpenSearch CloudWatch logs (0 = never expire)"
  default     = 0
}

variable "opensearch_subnet_ids" {
  type        = list(string)
  description = "Subnet IDs for OpenSearch. If empty, uses first 3 private subnets."
  default     = []
}

# RDS Backup Configuration
variable "postgres_backup_retention_period" {
  type        = number
  description = "Number of days to retain automated RDS backups (0 to disable)"
  default     = 7
}

variable "postgres_backup_window" {
  type        = string
  description = "Preferred UTC time window for automated RDS backups (hh24:mi-hh24:mi)"
  default     = "03:00-04:00"
}

# EKS Control Plane Logging
variable "eks_cluster_enabled_log_types" {
  type        = list(string)
  description = "EKS control plane log types to enable (valid: api, audit, authenticator, controllerManager, scheduler)"
  default     = ["api", "audit", "authenticator", "controllerManager", "scheduler"]
}

variable "eks_cloudwatch_log_group_retention_in_days" {
  type        = number
  description = "Number of days to retain EKS control plane logs in CloudWatch (0 = never expire)"
  default     = 30

  validation {
    condition     = contains([0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653], var.eks_cloudwatch_log_group_retention_in_days)
    error_message = "Must be a valid CloudWatch retention value (0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653)."
  }
}


================================================
FILE: deployment/terraform/modules/aws/onyx/versions.tf
================================================
terraform {
  required_version = ">= 1.12.0"

  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.100"
    }
    helm = {
      source  = "hashicorp/helm"
      version = "~> 2.16"
    }
    kubernetes = {
      source  = "hashicorp/kubernetes"
      version = "~> 2.37"
    }
  }
}


================================================
FILE: deployment/terraform/modules/aws/opensearch/main.tf
================================================
# OpenSearch domain security group
resource "aws_security_group" "opensearch_sg" {
  count       = length(var.security_group_ids) > 0 ? 0 : 1
  name        = "${var.name}-sg"
  description = "Allow inbound traffic to OpenSearch from VPC"
  vpc_id      = var.vpc_id
  tags        = var.tags

  ingress {
    from_port   = 443
    to_port     = 443
    protocol    = "tcp"
    cidr_blocks = var.ingress_cidrs
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }
}

# Service-linked role for OpenSearch (required for VPC deployment)
# This may already exist in your account - if so, import it or set create_service_linked_role = false
resource "aws_iam_service_linked_role" "opensearch" {
  count            = var.create_service_linked_role ? 1 : 0
  aws_service_name = "opensearchservice.amazonaws.com"
}

# IAM policy for OpenSearch access
data "aws_caller_identity" "current" {}
data "aws_region" "current" {}

# KMS key lookup for encryption at rest
data "aws_kms_key" "opensearch" {
  key_id = "alias/aws/es"
}

# Access policy - allows all principals within the VPC (secured by VPC + security groups)
data "aws_iam_policy_document" "opensearch_access" {
  statement {
    effect = "Allow"

    principals {
      type        = "AWS"
      identifiers = ["*"]
    }

    actions = ["es:*"]

    resources = [
      "arn:aws:es:${data.aws_region.current.id}:${data.aws_caller_identity.current.account_id}:domain/${var.name}/*"
    ]
  }
}

# OpenSearch domain
resource "aws_opensearch_domain" "main" {
  domain_name    = var.name
  engine_version = "OpenSearch_${var.engine_version}"

  cluster_config {
    instance_type                 = var.instance_type
    instance_count                = var.instance_count
    zone_awareness_enabled        = var.zone_awareness_enabled
    dedicated_master_enabled      = var.dedicated_master_enabled
    dedicated_master_type         = var.dedicated_master_enabled ? var.dedicated_master_type : null
    dedicated_master_count        = var.dedicated_master_enabled ? var.dedicated_master_count : null
    multi_az_with_standby_enabled = var.multi_az_with_standby_enabled
    warm_enabled                  = var.warm_enabled
    warm_type                     = var.warm_enabled ? var.warm_type : null
    warm_count                    = var.warm_enabled ? var.warm_count : null

    dynamic "zone_awareness_config" {
      for_each = var.zone_awareness_enabled ? [1] : []
      content {
        availability_zone_count = var.availability_zone_count
      }
    }

    dynamic "cold_storage_options" {
      for_each = var.cold_storage_enabled ? [1] : []
      content {
        enabled = true
      }
    }
  }

  ebs_options {
    ebs_enabled = true
    volume_type = var.ebs_volume_type
    volume_size = var.ebs_volume_size
    iops        = var.ebs_volume_type == "gp3" || var.ebs_volume_type == "io1" ? var.ebs_iops : null
    throughput  = var.ebs_volume_type == "gp3" ? var.ebs_throughput : null
  }

  vpc_options {
    subnet_ids         = var.subnet_ids
    security_group_ids = length(var.security_group_ids) > 0 ? var.security_group_ids : [aws_security_group.opensearch_sg[0].id]
  }

  encrypt_at_rest {
    enabled    = true
    kms_key_id = var.kms_key_id != null ? var.kms_key_id : data.aws_kms_key.opensearch.arn
  }

  node_to_node_encryption {
    enabled = true
  }

  domain_endpoint_options {
    enforce_https       = true
    tls_security_policy = var.tls_security_policy
  }

  advanced_security_options {
    enabled                        = true
    anonymous_auth_enabled         = false
    internal_user_database_enabled = var.internal_user_database_enabled

    dynamic "master_user_options" {
      for_each = var.internal_user_database_enabled ? [1] : []
      content {
        master_user_name     = var.master_user_name
        master_user_password = var.master_user_password
      }
    }

    dynamic "master_user_options" {
      for_each = var.internal_user_database_enabled ? [] : [1]
      content {
        master_user_arn = var.master_user_arn
      }
    }
  }

  advanced_options = var.advanced_options

  access_policies = data.aws_iam_policy_document.opensearch_access.json

  auto_tune_options {
    desired_state       = var.auto_tune_enabled ? "ENABLED" : "DISABLED"
    rollback_on_disable = var.auto_tune_rollback_on_disable
  }

  off_peak_window_options {
    enabled = var.off_peak_window_enabled

    dynamic "off_peak_window" {
      for_each = var.off_peak_window_enabled ? [1] : []
      content {
        window_start_time {
          hours   = var.off_peak_window_start_hours
          minutes = var.off_peak_window_start_minutes
        }
      }
    }
  }

  software_update_options {
    auto_software_update_enabled = var.auto_software_update_enabled
  }

  dynamic "log_publishing_options" {
    for_each = var.enable_logging ? ["INDEX_SLOW_LOGS", "SEARCH_SLOW_LOGS", "ES_APPLICATION_LOGS"] : []
    content {
      cloudwatch_log_group_arn = "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:${local.log_group_name}"
      log_type                 = log_publishing_options.value
    }
  }

  tags = var.tags

  depends_on = [
    aws_iam_service_linked_role.opensearch,
    aws_cloudwatch_log_resource_policy.opensearch
  ]

  lifecycle {
    precondition {
      condition     = !var.internal_user_database_enabled || var.master_user_name != null
      error_message = "master_user_name is required when internal_user_database_enabled is true."
    }
    precondition {
      condition     = !var.internal_user_database_enabled || var.master_user_password != null
      error_message = "master_user_password is required when internal_user_database_enabled is true."
    }
  }
}

# CloudWatch log group for OpenSearch
locals {
  log_group_name = var.log_group_name != null ? var.log_group_name : "/aws/OpenSearchService/domains/${var.name}/search-logs"
}

resource "aws_cloudwatch_log_group" "opensearch" {
  count             = var.enable_logging ? 1 : 0
  name              = local.log_group_name
  retention_in_days = var.log_retention_days
  tags              = var.tags
}

# CloudWatch log resource policy for OpenSearch
data "aws_iam_policy_document" "opensearch_log_policy" {
  count = var.enable_logging ? 1 : 0

  statement {
    effect = "Allow"

    principals {
      type        = "Service"
      identifiers = ["es.amazonaws.com"]
    }

    actions = [
      "logs:PutLogEvents",
      "logs:CreateLogStream",
    ]

    resources = ["arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:${local.log_group_name}:*"]
  }
}

resource "aws_cloudwatch_log_resource_policy" "opensearch" {
  count           = var.enable_logging ? 1 : 0
  policy_name     = "OpenSearchService-${var.name}-Search-logs"
  policy_document = data.aws_iam_policy_document.opensearch_log_policy[0].json
}


================================================
FILE: deployment/terraform/modules/aws/opensearch/outputs.tf
================================================
output "domain_endpoint" {
  description = "The endpoint of the OpenSearch domain"
  value       = aws_opensearch_domain.main.endpoint
}

output "domain_arn" {
  description = "The ARN of the OpenSearch domain"
  value       = aws_opensearch_domain.main.arn
}

output "domain_id" {
  description = "The unique identifier for the OpenSearch domain"
  value       = aws_opensearch_domain.main.domain_id
}

output "domain_name" {
  description = "The name of the OpenSearch domain"
  value       = aws_opensearch_domain.main.domain_name
}

output "kibana_endpoint" {
  description = "The OpenSearch Dashboards endpoint"
  value       = aws_opensearch_domain.main.dashboard_endpoint
}

output "security_group_id" {
  description = "The ID of the OpenSearch security group"
  value       = length(aws_security_group.opensearch_sg) > 0 ? aws_security_group.opensearch_sg[0].id : null
}


================================================
FILE: deployment/terraform/modules/aws/opensearch/variables.tf
================================================
variable "name" {
  description = "Name of the OpenSearch domain"
  type        = string
}

variable "vpc_id" {
  description = "ID of the VPC to deploy the OpenSearch domain into"
  type        = string
}

variable "subnet_ids" {
  description = "List of subnet IDs for the OpenSearch domain"
  type        = list(string)
}

variable "ingress_cidrs" {
  description = "CIDR blocks allowed to access OpenSearch"
  type        = list(string)
}

variable "engine_version" {
  description = "OpenSearch engine version (e.g., 2.17, 3.3)"
  type        = string
  default     = "3.3"
}

variable "instance_type" {
  description = "Instance type for data nodes"
  type        = string
  default     = "r8g.large.search"
}

variable "instance_count" {
  description = "Number of data nodes"
  type        = number
  default     = 3
}

variable "zone_awareness_enabled" {
  description = "Whether to enable zone awareness for the cluster"
  type        = bool
  default     = true
}

variable "availability_zone_count" {
  description = "Number of availability zones (2 or 3)"
  type        = number
  default     = 3
}

variable "dedicated_master_enabled" {
  description = "Whether to enable dedicated master nodes"
  type        = bool
  default     = true
}

variable "dedicated_master_type" {
  description = "Instance type for dedicated master nodes"
  type        = string
  default     = "m7g.large.search"
}

variable "dedicated_master_count" {
  description = "Number of dedicated master nodes (must be 3 or 5)"
  type        = number
  default     = 3
}

variable "multi_az_with_standby_enabled" {
  description = "Whether to enable Multi-AZ with Standby deployment"
  type        = bool
  default     = true
}

variable "warm_enabled" {
  description = "Whether to enable warm storage"
  type        = bool
  default     = false
}

variable "warm_type" {
  description = "Instance type for warm nodes"
  type        = string
  default     = "ultrawarm1.medium.search"
}

variable "warm_count" {
  description = "Number of warm nodes"
  type        = number
  default     = 2
}

variable "cold_storage_enabled" {
  description = "Whether to enable cold storage"
  type        = bool
  default     = false
}

variable "ebs_volume_type" {
  description = "EBS volume type (gp3, gp2, io1)"
  type        = string
  default     = "gp3"
}

variable "ebs_volume_size" {
  description = "EBS volume size in GB per node"
  type        = number
  default     = 512
}

variable "ebs_iops" {
  description = "IOPS for gp3/io1 volumes"
  type        = number
  default     = 3000
}

variable "ebs_throughput" {
  description = "Throughput in MiB/s for gp3 volumes"
  type        = number
  default     = 256
}

variable "kms_key_id" {
  description = "KMS key ID for encryption at rest (uses AWS managed key if not specified)"
  type        = string
  default     = null
}

variable "tls_security_policy" {
  description = "TLS security policy for HTTPS endpoints"
  type        = string
  default     = "Policy-Min-TLS-1-2-2019-07"
}

variable "internal_user_database_enabled" {
  description = "Whether to enable the internal user database for fine-grained access control"
  type        = bool
  default     = true
}

variable "master_user_name" {
  description = "Master user name for internal user database"
  type        = string
  default     = null
  sensitive   = true
}

variable "master_user_password" {
  description = "Master user password for internal user database"
  type        = string
  default     = null
  sensitive   = true
}

variable "master_user_arn" {
  description = "IAM ARN for the master user (used when internal_user_database_enabled is false)"
  type        = string
  default     = null
}

variable "advanced_options" {
  description = "Advanced options for OpenSearch"
  type        = map(string)
  default = {
    "indices.fielddata.cache.size"           = "20"
    "indices.query.bool.max_clause_count"    = "1024"
    "override_main_response_version"         = "false"
    "rest.action.multi.allow_explicit_index" = "true"
  }
}

variable "auto_tune_enabled" {
  description = "Whether to enable Auto-Tune"
  type        = bool
  default     = true
}

variable "auto_tune_rollback_on_disable" {
  description = "Whether to roll back Auto-Tune changes when disabled"
  type        = string
  default     = "NO_ROLLBACK"
}

variable "off_peak_window_enabled" {
  description = "Whether to enable off-peak window for maintenance"
  type        = bool
  default     = true
}

variable "off_peak_window_start_hours" {
  description = "Hour (UTC) when off-peak window starts (0-23)"
  type        = number
  default     = 6
}

variable "off_peak_window_start_minutes" {
  description = "Minutes when off-peak window starts (0-59)"
  type        = number
  default     = 0
}

variable "auto_software_update_enabled" {
  description = "Whether to enable automatic software updates"
  type        = bool
  default     = false
}

variable "enable_logging" {
  description = "Whether to enable CloudWatch logging"
  type        = bool
  default     = false
}

variable "create_service_linked_role" {
  description = "Whether to create the OpenSearch service-linked role (set to false if it already exists)"
  type        = bool
  default     = false
}

variable "log_retention_days" {
  description = "Number of days to retain CloudWatch logs"
  type        = number
  default     = 30
}

variable "security_group_ids" {
  description = "Existing security group IDs to attach. If empty, a new SG is created."
  type        = list(string)
  default     = []
}

variable "log_group_name" {
  description = "CloudWatch log group name. Defaults to AWS console convention."
  type        = string
  default     = null
}

variable "tags" {
  description = "Tags to apply to OpenSearch resources"
  type        = map(string)
  default     = {}
}


================================================
FILE: deployment/terraform/modules/aws/postgres/main.tf
================================================
resource "aws_db_subnet_group" "this" {
  name       = "${var.identifier}-subnet-group"
  subnet_ids = var.subnet_ids
  tags       = var.tags
}

resource "aws_security_group" "this" {
  name        = "${var.identifier}-sg"
  description = "Allow PostgreSQL access"
  vpc_id      = var.vpc_id
  tags        = var.tags

  ingress {
    description = "Postgres ingress"
    from_port   = 5432
    to_port     = 5432
    protocol    = "tcp"
    cidr_blocks = var.ingress_cidrs
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }
}

resource "aws_db_instance" "this" {
  identifier        = var.identifier
  db_name           = var.db_name
  engine            = "postgres"
  engine_version    = var.engine_version
  instance_class    = var.instance_type
  allocated_storage = var.storage_gb
  username          = var.username
  password          = var.password

  # Enable IAM authentication for the RDS instance
  iam_database_authentication_enabled = var.enable_rds_iam_auth

  db_subnet_group_name   = aws_db_subnet_group.this.name
  vpc_security_group_ids = [aws_security_group.this.id]
  publicly_accessible    = false
  deletion_protection    = true
  storage_encrypted      = true

  # Automated backups
  backup_retention_period = var.backup_retention_period
  backup_window           = var.backup_window

  tags = var.tags
}

# CloudWatch alarm for CPU utilization monitoring
resource "aws_cloudwatch_metric_alarm" "cpu_utilization" {
  alarm_name          = "${var.identifier}-cpu-utilization"
  alarm_description   = "RDS CPU utilization for ${var.identifier}"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = var.cpu_alarm_evaluation_periods
  metric_name         = "CPUUtilization"
  namespace           = "AWS/RDS"
  period              = var.cpu_alarm_period
  statistic           = "Average"
  threshold           = var.cpu_alarm_threshold
  treat_missing_data  = "missing"

  alarm_actions = var.alarm_actions
  ok_actions    = var.alarm_actions

  dimensions = {
    DBInstanceIdentifier = aws_db_instance.this.identifier
  }

  tags = var.tags
}

# CloudWatch alarm for disk IO monitoring
resource "aws_cloudwatch_metric_alarm" "read_iops" {
  alarm_name          = "${var.identifier}-read-iops"
  alarm_description   = "RDS ReadIOPS for ${var.identifier}"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = var.iops_alarm_evaluation_periods
  metric_name         = "ReadIOPS"
  namespace           = "AWS/RDS"
  period              = var.iops_alarm_period
  statistic           = "Average"
  threshold           = var.read_iops_alarm_threshold
  treat_missing_data  = "missing"

  alarm_actions = var.alarm_actions
  ok_actions    = var.alarm_actions

  dimensions = {
    DBInstanceIdentifier = aws_db_instance.this.identifier
  }

  tags = var.tags
}

# CloudWatch alarm for freeable memory monitoring
resource "aws_cloudwatch_metric_alarm" "freeable_memory" {
  alarm_name          = "${var.identifier}-freeable-memory"
  alarm_description   = "RDS freeable memory for ${var.identifier}"
  comparison_operator = "LessThanThreshold"
  evaluation_periods  = var.memory_alarm_evaluation_periods
  metric_name         = "FreeableMemory"
  namespace           = "AWS/RDS"
  period              = var.memory_alarm_period
  statistic           = "Average"
  threshold           = var.memory_alarm_threshold
  treat_missing_data  = "missing"

  alarm_actions = var.alarm_actions
  ok_actions    = var.alarm_actions

  dimensions = {
    DBInstanceIdentifier = aws_db_instance.this.identifier
  }

  tags = var.tags
}


================================================
FILE: deployment/terraform/modules/aws/postgres/outputs.tf
================================================
output "endpoint" {
  description = "RDS endpoint hostname"
  value       = aws_db_instance.this.endpoint
}

output "port" {
  description = "RDS port"
  value       = aws_db_instance.this.port
}

output "db_name" {
  description = "Database name"
  value       = aws_db_instance.this.db_name
}

output "username" {
  description = "Master username"
  value       = aws_db_instance.this.username
  sensitive   = true
}

output "dbi_resource_id" {
  description = "DB instance resource ID used for IAM auth resource ARNs"
  value       = aws_db_instance.this.resource_id
}


================================================
FILE: deployment/terraform/modules/aws/postgres/variables.tf
================================================
variable "identifier" {
  type        = string
  description = "Identifier for the database and related resources"
}

variable "db_name" {
  type        = string
  description = "Name of the database"
  default     = "postgres"
}

variable "instance_type" {
  type        = string
  description = "Instance type"
  default     = "db.t4g.large" # 2 vCPU and 8 GB of memory
}

variable "storage_gb" {
  type        = number
  description = "Storage size in GB"
  default     = 20
}

variable "engine_version" {
  type        = string
  description = "Engine version"
  default     = "17"
}

variable "vpc_id" {
  type        = string
  description = "VPC ID"
}

variable "subnet_ids" {
  type        = list(string)
  description = "Subnet IDs"
}

variable "ingress_cidrs" {
  type        = list(string)
  description = "Ingress CIDR blocks"
}

variable "username" {
  type        = string
  description = "Username for the database"
  default     = "postgres"
  sensitive   = true
}

variable "password" {
  type        = string
  description = "Password for the database"
  default     = null
  sensitive   = true
}

variable "tags" {
  type        = map(string)
  description = "Tags to apply to RDS resources"
  default     = {}
}

variable "enable_rds_iam_auth" {
  type        = bool
  description = "Enable AWS IAM database authentication for this RDS instance"
  default     = false
}

variable "backup_retention_period" {
  type        = number
  description = "Number of days to retain automated backups (0 to disable)"
  default     = 7

  validation {
    condition     = var.backup_retention_period >= 0 && var.backup_retention_period <= 35
    error_message = "backup_retention_period must be between 0 and 35 (AWS RDS limit)."
  }
}

variable "backup_window" {
  type        = string
  description = "Preferred UTC time window for automated backups (hh24:mi-hh24:mi)"
  default     = "03:00-04:00"

  validation {
    condition     = can(regex("^([01]\\d|2[0-3]):[0-5]\\d-([01]\\d|2[0-3]):[0-5]\\d$", var.backup_window))
    error_message = "backup_window must be in hh24:mi-hh24:mi format (e.g. \"03:00-04:00\")."
  }
}

# CloudWatch CPU alarm configuration
variable "cpu_alarm_threshold" {
  type        = number
  description = "CPU utilization percentage threshold for the CloudWatch alarm"
  default     = 80

  validation {
    condition     = var.cpu_alarm_threshold >= 0 && var.cpu_alarm_threshold <= 100
    error_message = "cpu_alarm_threshold must be between 0 and 100 (percentage)."
  }
}

variable "cpu_alarm_evaluation_periods" {
  type        = number
  description = "Number of consecutive periods the threshold must be breached before alarming"
  default     = 3

  validation {
    condition     = var.cpu_alarm_evaluation_periods >= 1
    error_message = "cpu_alarm_evaluation_periods must be at least 1."
  }
}

variable "cpu_alarm_period" {
  type        = number
  description = "Period in seconds over which the CPU metric is evaluated"
  default     = 300

  validation {
    condition     = var.cpu_alarm_period >= 60 && var.cpu_alarm_period % 60 == 0
    error_message = "cpu_alarm_period must be a multiple of 60 seconds and at least 60 (CloudWatch requirement)."
  }
}

variable "memory_alarm_threshold" {
  type        = number
  description = "Freeable memory threshold in bytes. Alarm fires when memory drops below this value."
  default     = 256000000 # 256 MB

  validation {
    condition     = var.memory_alarm_threshold > 0
    error_message = "memory_alarm_threshold must be greater than 0."
  }
}

variable "memory_alarm_evaluation_periods" {
  type        = number
  description = "Number of consecutive periods the threshold must be breached before alarming"
  default     = 3

  validation {
    condition     = var.memory_alarm_evaluation_periods >= 1
    error_message = "memory_alarm_evaluation_periods must be at least 1."
  }
}

variable "memory_alarm_period" {
  type        = number
  description = "Period in seconds over which the freeable memory metric is evaluated"
  default     = 300

  validation {
    condition     = var.memory_alarm_period >= 60 && var.memory_alarm_period % 60 == 0
    error_message = "memory_alarm_period must be a multiple of 60 seconds and at least 60 (CloudWatch requirement)."
  }
}

variable "read_iops_alarm_threshold" {
  type        = number
  description = "ReadIOPS threshold. Alarm fires when IOPS exceeds this value."
  default     = 3000

  validation {
    condition     = var.read_iops_alarm_threshold > 0
    error_message = "read_iops_alarm_threshold must be greater than 0."
  }
}

variable "iops_alarm_evaluation_periods" {
  type        = number
  description = "Number of consecutive periods the IOPS threshold must be breached before alarming"
  default     = 3

  validation {
    condition     = var.iops_alarm_evaluation_periods >= 1
    error_message = "iops_alarm_evaluation_periods must be at least 1."
  }
}

variable "iops_alarm_period" {
  type        = number
  description = "Period in seconds over which the IOPS metric is evaluated"
  default     = 300

  validation {
    condition     = var.iops_alarm_period >= 60 && var.iops_alarm_period % 60 == 0
    error_message = "iops_alarm_period must be a multiple of 60 seconds and at least 60 (CloudWatch requirement)."
  }
}

variable "alarm_actions" {
  type        = list(string)
  description = "List of ARNs to notify when the alarm transitions state (e.g. SNS topic ARNs)"
  default     = []
}


================================================
FILE: deployment/terraform/modules/aws/redis/main.tf
================================================
# Define the Redis security group
resource "aws_security_group" "redis_sg" {
  name        = "${var.name}-sg"
  description = "Allow inbound traffic from EKS to Redis"
  vpc_id      = var.vpc_id
  tags        = var.tags

  # Standard Redis port
  ingress {
    from_port   = 6379
    to_port     = 6379
    protocol    = "tcp"
    cidr_blocks = var.ingress_cidrs
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }
}

resource "aws_elasticache_subnet_group" "elasticache_subnet_group" {
  name       = "${var.name}-subnet-group"
  subnet_ids = var.subnet_ids
  tags       = var.tags
}

# The actual Redis instance
resource "aws_elasticache_replication_group" "redis" {
  replication_group_id = var.name
  description          = "Redis cluster for ${var.name}"
  engine               = "redis"
  node_type            = var.instance_type
  num_cache_clusters   = 1
  parameter_group_name = "default.redis7"
  engine_version       = "7.0"
  port                 = 6379
  security_group_ids   = [aws_security_group.redis_sg.id]
  subnet_group_name    = aws_elasticache_subnet_group.elasticache_subnet_group.name

  # Enable transit encryption (SSL/TLS)
  transit_encryption_enabled = var.transit_encryption_enabled

  # Enable encryption at rest
  at_rest_encryption_enabled = true

  # Enable authentication if auth_token is provided
  # If transit_encryption_enabled is true, AWS requires an auth_token to be set.
  auth_token = var.auth_token
  tags       = var.tags
}


================================================
FILE: deployment/terraform/modules/aws/redis/outputs.tf
================================================
output "redis_endpoint" {
  description = "The endpoint of the Redis cluster"
  value       = aws_elasticache_replication_group.redis.primary_endpoint_address
}

output "redis_port" {
  description = "The port of the Redis cluster"
  value       = aws_elasticache_replication_group.redis.port
}

output "redis_ssl_enabled" {
  description = "Whether SSL/TLS is enabled for Redis"
  value       = var.transit_encryption_enabled
}


================================================
FILE: deployment/terraform/modules/aws/redis/variables.tf
================================================
variable "name" {
  description = "The name of the redis instance"
  type        = string
}

variable "vpc_id" {
  description = "The ID of the vpc to deploy the redis instance into"
  type        = string
}

variable "subnet_ids" {
  description = "The subnets of the vpc to deploy into"
  type        = list(string)
}

variable "ingress_cidrs" {
  description = "CIDR block to allow ingress from"
  type        = list(string)
}

variable "instance_type" {
  description = "The instance type of the redis instance"
  type        = string
  default     = "cache.m5.large" # 2 vCPU and 6 GB of memory
}

variable "transit_encryption_enabled" {
  description = "Enable transit encryption (SSL/TLS) for Redis"
  type        = bool
  default     = true
}

variable "auth_token" {
  description = "The password used to access a password protected server"
  type        = string
  default     = null
  sensitive   = true
}

variable "tags" {
  description = "Tags to apply to ElastiCache resources"
  type        = map(string)
  default     = {}
}


================================================
FILE: deployment/terraform/modules/aws/s3/main.tf
================================================
resource "aws_s3_bucket" "bucket" {
  bucket = var.bucket_name
  tags   = var.tags
}

resource "aws_s3_bucket_policy" "bucket_policy" {
  bucket = aws_s3_bucket.bucket.id

  policy = jsonencode({
    Version = "2012-10-17",
    Statement = [
      {
        Sid       = "AllowAccessViaVPCE",
        Effect    = "Allow",
        Principal = "*", # Update this to be the specific IAM roles, users, or service principals as needed
        Action = [
          "s3:GetObject",
          "s3:ListBucket"
        ],
        Resource = [
          aws_s3_bucket.bucket.arn,
          "${aws_s3_bucket.bucket.arn}/*"
        ],
        Condition = {
          StringEquals = {
            "aws:SourceVpce" = var.s3_vpc_endpoint_id
          }
        }
      }
    ]
  })
}


================================================
FILE: deployment/terraform/modules/aws/s3/variables.tf
================================================
variable "bucket_name" {
  type        = string
  description = "Name of the S3 bucket"
}

variable "tags" {
  type        = map(string)
  description = "Tags to apply to S3 resources"
  default     = {}
}

variable "s3_vpc_endpoint_id" {
  type        = string
  description = "ID of the S3 gateway VPC endpoint allowed to access this bucket"
}


================================================
FILE: deployment/terraform/modules/aws/vpc/main.tf
================================================
# Get the availability zones for the region without requiring opt-in
data "aws_availability_zones" "available" {
  filter {
    name   = "opt-in-status"
    values = ["opt-in-not-required"]
  }
}

data "aws_region" "current" {}

module "vpc" {
  source  = "terraform-aws-modules/vpc/aws"
  version = "5.0.0"

  name = var.vpc_name

  cidr = var.cidr_block
  azs  = slice(data.aws_availability_zones.available.names, 0, 3)

  private_subnets         = var.private_subnets
  public_subnets          = var.public_subnets
  map_public_ip_on_launch = true

  enable_nat_gateway   = true
  single_nat_gateway   = false
  enable_dns_hostnames = true

  public_subnet_tags = {
    "kubernetes.io/role/elb" = "1"
  }

  private_subnet_tags = {
    "kubernetes.io/role/internal-elb" = "1"
  }

  tags = var.tags
}

data "aws_route_tables" "this" {
  filter {
    name   = "vpc-id"
    values = [module.vpc.vpc_id]
  }

  depends_on = [module.vpc]
}

resource "aws_vpc_endpoint" "s3" {
  vpc_id            = module.vpc.vpc_id
  service_name      = "com.amazonaws.${data.aws_region.current.name}.s3"
  vpc_endpoint_type = "Gateway"
  route_table_ids   = data.aws_route_tables.this.ids
  tags              = var.tags
}


================================================
FILE: deployment/terraform/modules/aws/vpc/outputs.tf
================================================
output "vpc_id" {
  value = module.vpc.vpc_id
}

output "private_subnets" {
  value = module.vpc.private_subnets
}

output "public_subnets" {
  value = module.vpc.public_subnets
}

output "vpc_cidr_block" {
  value = module.vpc.vpc_cidr_block
}

output "s3_vpc_endpoint_id" {
  description = "ID of the S3 gateway VPC endpoint created for this VPC"
  value       = aws_vpc_endpoint.s3.id
}


================================================
FILE: deployment/terraform/modules/aws/vpc/variables.tf
================================================
variable "vpc_name" {
  type        = string
  description = "The name of the VPC"
  default     = "onyx-vpc"
}

variable "cidr_block" {
  type        = string
  description = "The CIDR block for the VPC"
  default     = "10.0.0.0/16"
}

variable "private_subnets" {
  type        = list(string)
  description = "The private subnets for the VPC"
  default     = ["10.0.0.0/21", "10.0.8.0/21", "10.0.16.0/21", "10.0.24.0/21", "10.0.32.0/21"]
}

variable "public_subnets" {
  type        = list(string)
  description = "The public subnets for the VPC"
  default     = ["10.0.40.0/21", "10.0.48.0/21", "10.0.56.0/21"]
}

variable "tags" {
  type        = map(string)
  description = "Tags to apply to all VPC-related resources"
  default     = {}
}


================================================
FILE: deployment/terraform/modules/aws/waf/main.tf
================================================
locals {
  name                  = var.name
  tags                  = var.tags
  ip_allowlist_enabled  = length(var.allowed_ip_cidrs) > 0
  managed_rule_priority = local.ip_allowlist_enabled ? 1 : 0
}

resource "aws_wafv2_ip_set" "allowed_ips" {
  count = local.ip_allowlist_enabled ? 1 : 0

  name               = "${local.name}-allowed-ips"
  description        = "IP allowlist for ${local.name}"
  scope              = "REGIONAL"
  ip_address_version = "IPV4"
  addresses          = var.allowed_ip_cidrs

  tags = local.tags
}

# AWS WAFv2 Web ACL
resource "aws_wafv2_web_acl" "main" {
  name        = "${local.name}-web-acl"
  description = "WAF Web ACL for ${local.name}"
  scope       = "REGIONAL"

  default_action {
    allow {}
  }

  dynamic "rule" {
    for_each = local.ip_allowlist_enabled ? [1] : []
    content {
      name     = "BlockRequestsOutsideAllowedIPs"
      priority = 1

      action {
        block {}
      }

      statement {
        not_statement {
          statement {
            ip_set_reference_statement {
              arn = aws_wafv2_ip_set.allowed_ips[0].arn
            }
          }
        }
      }

      visibility_config {
        cloudwatch_metrics_enabled = true
        metric_name                = "BlockRequestsOutsideAllowedIPsMetric"
        sampled_requests_enabled   = true
      }
    }
  }

  # AWS Managed Rules - Core Rule Set
  rule {
    name     = "AWSManagedRulesCommonRuleSet"
    priority = 1 + local.managed_rule_priority

    override_action {
      none {}
    }

    statement {
      managed_rule_group_statement {
        name        = "AWSManagedRulesCommonRuleSet"
        vendor_name = "AWS"

        dynamic "rule_action_override" {
          for_each = var.common_rule_set_count_rules
          content {
            name = rule_action_override.value
            action_to_use {
              count {}
            }
          }
        }
      }
    }

    visibility_config {
      cloudwatch_metrics_enabled = true
      metric_name                = "AWSManagedRulesCommonRuleSetMetric"
      sampled_requests_enabled   = true
    }
  }

  # AWS Managed Rules - Known Bad Inputs
  rule {
    name     = "AWSManagedRulesKnownBadInputsRuleSet"
    priority = 2 + local.managed_rule_priority

    override_action {
      none {}
    }

    statement {
      managed_rule_group_statement {
        name        = "AWSManagedRulesKnownBadInputsRuleSet"
        vendor_name = "AWS"
      }
    }

    visibility_config {
      cloudwatch_metrics_enabled = true
      metric_name                = "AWSManagedRulesKnownBadInputsRuleSetMetric"
      sampled_requests_enabled   = true
    }
  }

  # Rate Limiting Rule
  rule {
    name     = "RateLimitRule"
    priority = 3 + local.managed_rule_priority

    action {
      block {}
    }

    statement {
      rate_based_statement {
        limit              = var.rate_limit_requests_per_5_minutes
        aggregate_key_type = "IP"
      }
    }

    visibility_config {
      cloudwatch_metrics_enabled = true
      metric_name                = "RateLimitRuleMetric"
      sampled_requests_enabled   = true
    }
  }

  # Geo Restriction (if enabled)
  dynamic "rule" {
    for_each = length(var.geo_restriction_countries) > 0 ? [1] : []
    content {
      name     = "GeoRestrictionRule"
      priority = 4 + local.managed_rule_priority

      action {
        block {}
      }

      statement {
        geo_match_statement {
          country_codes = var.geo_restriction_countries
        }
      }

      visibility_config {
        cloudwatch_metrics_enabled = true
        metric_name                = "GeoRestrictionRuleMetric"
        sampled_requests_enabled   = true
      }
    }
  }

  # IP Rate Limiting
  rule {
    name     = "APIRateLimitRule"
    priority = 5 + local.managed_rule_priority

    action {
      block {}
    }

    statement {
      rate_based_statement {
        limit              = var.api_rate_limit_requests_per_5_minutes
        aggregate_key_type = "IP"
      }
    }

    visibility_config {
      cloudwatch_metrics_enabled = true
      metric_name                = "APIRateLimitRuleMetric"
      sampled_requests_enabled   = true
    }
  }

  # SQL Injection Protection
  rule {
    name     = "AWSManagedRulesSQLiRuleSet"
    priority = 6 + local.managed_rule_priority

    override_action {
      none {}
    }

    statement {
      managed_rule_group_statement {
        name        = "AWSManagedRulesSQLiRuleSet"
        vendor_name = "AWS"
      }
    }

    visibility_config {
      cloudwatch_metrics_enabled = true
      metric_name                = "AWSManagedRulesSQLiRuleSetMetric"
      sampled_requests_enabled   = true
    }
  }

  # Anonymous IP Protection
  rule {
    name     = "AWSManagedRulesAnonymousIpList"
    priority = 7 + local.managed_rule_priority

    override_action {
      none {}
    }

    statement {
      managed_rule_group_statement {
        name        = "AWSManagedRulesAnonymousIpList"
        vendor_name = "AWS"
      }
    }

    visibility_config {
      cloudwatch_metrics_enabled = true
      metric_name                = "AWSManagedRulesAnonymousIpListMetric"
      sampled_requests_enabled   = true
    }
  }

  visibility_config {
    cloudwatch_metrics_enabled = true
    metric_name                = "${local.name}WebACLMetric"
    sampled_requests_enabled   = true
  }

  tags = local.tags
}

# WAF Logging Configuration (simplified - just CloudWatch)
resource "aws_cloudwatch_log_group" "waf_logs" {
  count             = var.enable_logging ? 1 : 0
  name              = "/aws/waf/${local.name}"
  retention_in_days = var.log_retention_days

  tags = local.tags
}


================================================
FILE: deployment/terraform/modules/aws/waf/outputs.tf
================================================
output "web_acl_arn" {
  description = "ARN of the WAF Web ACL"
  value       = aws_wafv2_web_acl.main.arn
}

output "web_acl_id" {
  description = "ID of the WAF Web ACL"
  value       = aws_wafv2_web_acl.main.id
}

output "web_acl_name" {
  description = "Name of the WAF Web ACL"
  value       = aws_wafv2_web_acl.main.name
}

output "log_group_name" {
  description = "Name of the CloudWatch log group for WAF logs"
  value       = var.enable_logging ? aws_cloudwatch_log_group.waf_logs[0].name : null
}


================================================
FILE: deployment/terraform/modules/aws/waf/variables.tf
================================================
variable "name" {
  type        = string
  description = "Name prefix for WAF resources"
}

variable "tags" {
  type        = map(string)
  description = "Tags to apply to all WAF resources"
  default     = {}
}

variable "allowed_ip_cidrs" {
  type        = list(string)
  description = "Optional IPv4 CIDR ranges allowed to reach the application. Leave empty to disable IP allowlisting."
  default     = []
}

variable "common_rule_set_count_rules" {
  type        = list(string)
  description = "Subrules within AWSManagedRulesCommonRuleSet to override to COUNT instead of BLOCK."
  default     = []
}

variable "rate_limit_requests_per_5_minutes" {
  type        = number
  description = "Rate limit for requests per 5 minutes per IP address"
  default     = 2000
}

variable "api_rate_limit_requests_per_5_minutes" {
  type        = number
  description = "Rate limit for API requests per 5 minutes per IP address"
  default     = 1000
}

variable "geo_restriction_countries" {
  type        = list(string)
  description = "List of country codes to block. Leave empty to disable geo restrictions"
  default     = []
}

variable "enable_logging" {
  type        = bool
  description = "Enable WAF logging to S3"
  default     = true
}

variable "log_retention_days" {
  type        = number
  description = "Number of days to retain WAF logs"
  default     = 90
}


================================================
FILE: desktop/.gitignore
================================================
# Dependencies
node_modules/

# Build outputs
dist/
src-tauri/target/

# IDE
.vscode/
.idea/
*.swp
*.swo

# OS
.DS_Store
Thumbs.db

# Logs
*.log
npm-debug.log*

# Local env files
.env
.env.local

# Generated files
src-tauri/gen/schemas/acl-manifests.json


================================================
FILE: desktop/README.md
================================================
# Onyx Desktop

A lightweight macOS desktop application for [Onyx Cloud](https://cloud.onyx.app).

Built with [Tauri](https://tauri.app) for minimal bundle size (~10MB vs Electron's 150MB+).

## Features

- 🪶 **Lightweight** - Native macOS WebKit, no bundled Chromium
- ⌨️ **Keyboard Shortcuts** - Quick navigation and actions
- 🪟 **Native Feel** - macOS-style title bar with traffic lights
- 💾 **Window State** - Remembers size/position between sessions
- 🔗 **Multi-window** - Open multiple Onyx windows

## Keyboard Shortcuts

| Shortcut | Action           |
| -------- | ---------------- |
| `⌘ N`    | New Chat         |
| `⌘ ⇧ N`  | New Window       |
| `⌘ R`    | Reload           |
| `⌘ [`    | Go Back          |
| `⌘ ]`    | Go Forward       |
| `⌘ ,`    | Open Config File |
| `⌘ W`    | Close Window     |
| `⌘ Q`    | Quit             |

## Prerequisites

1. **Rust** (latest stable)

   ```bash
   curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
   source $HOME/.cargo/env
   ```

2. **Node.js** (18+)

   ```bash
   # Using homebrew
   brew install node

   # Or using nvm
   nvm install 18
   ```

3. **Xcode Command Line Tools**
   ```bash
   xcode-select --install
   ```

## Development

```bash
# Install dependencies
npm install

# Run in development mode
npm run dev

# Run in debug mode
npm run debug
```

## Building

### Build for current architecture

```bash
npm run build
```

### Build Universal Binary (Intel + Apple Silicon)

```bash
# First, add the targets
rustup target add x86_64-apple-darwin
rustup target add aarch64-apple-darwin

# Build universal binary
npm run build:dmg
```

The built `.dmg` will be in `src-tauri/target/release/bundle/dmg/`.

## Project Structure

```
onyx-desktop/
├── package.json          # Node dependencies & scripts
├── src/
│   └── index.html        # Fallback/loading page
└── src-tauri/
    ├── Cargo.toml        # Rust dependencies
    ├── tauri.conf.json   # Tauri configuration
    ├── build.rs          # Build script
    ├── icons/            # App icons
    └── src/
        └── main.rs       # Rust backend code
```

## Icons

Before building, add your app icons to `src-tauri/icons/`:

- `32x32.png`
- `128x128.png`
- `128x128@2x.png`
- `icon.icns` (macOS)
- `icon.ico` (Windows, optional)

You can generate these from a 1024x1024 source image using:

```bash
# Using tauri's icon generator
npm run tauri icon path/to/your-icon.png
```

## Customization

### Self-Hosted / Custom Server URL

The app defaults to `https://cloud.onyx.app` but supports any Onyx instance.

**Config file location:**

- macOS: `~/Library/Application Support/app.onyx.desktop/config.json`
- Linux: `~/.config/app.onyx.desktop/config.json`
- Windows: `%APPDATA%/app.onyx.desktop/config.json`

**To use a self-hosted instance:**

1. Launch the app once (creates default config)
2. Press `⌘ ,` to open the config file, or edit it manually
3. Change the `server_url`:

```json
{
  "server_url": "https://your-onyx-instance.company.com",
  "window_title": "Onyx"
}
```

4. Restart the app

**Quick edit via terminal:**

```bash
# macOS
open -t ~/Library/Application\ Support/app.onyx.desktop/config.json

# Or use any editor
code ~/Library/Application\ Support/app.onyx.desktop/config.json
```

### Change the default URL in build

Edit `src-tauri/tauri.conf.json`:

```json
{
  "app": {
    "windows": [
      {
        "url": "https://your-onyx-instance.com"
      }
    ]
  }
}
```

### Add more shortcuts

Edit `src-tauri/src/main.rs` in the `setup_shortcuts` function.

### Window appearance

Modify the window configuration in `src-tauri/tauri.conf.json`:

- `titleBarStyle`: `"Overlay"` (macOS native) or `"Visible"`
- `decorations`: Window chrome
- `transparent`: For custom backgrounds

## Troubleshooting

### "Unable to resolve host"

Make sure you have an internet connection. The app loads content from `cloud.onyx.app`.

### Build fails on M1/M2 Mac

```bash
# Ensure you have the right target
rustup target add aarch64-apple-darwin
```

### Code signing for distribution

For distributing outside the App Store, you'll need to:

1. Get an Apple Developer certificate
2. Sign the app: `codesign --deep --force --sign "Developer ID" target/release/bundle/macos/Onyx.app`
3. Notarize with Apple

## License

MIT


================================================
FILE: desktop/package.json
================================================
{
  "name": "onyx-desktop",
  "version": "0.0.0-dev",
  "description": "Lightweight desktop app for Onyx Cloud",
  "scripts": {
    "dev": "tauri dev",
    "debug": "tauri dev -- -- --debug",
    "build": "tauri build",
    "build:dmg": "tauri build --target universal-apple-darwin",
    "build:linux": "tauri build --bundles deb,rpm"
  },
  "dependencies": {
    "@tauri-apps/api": "^2.10.1"
  },
  "devDependencies": {
    "@tauri-apps/cli": "^2.10.1"
  }
}


================================================
FILE: desktop/scripts/generate-icons.sh
================================================
#!/bin/bash
# Icon generation script for Onyx Desktop
# Requires: ImageMagick (brew install imagemagick)

set -e

ICON_DIR="src-tauri/icons"
SOURCE_SVG="$ICON_DIR/icon.svg"

# Check if ImageMagick is installed
if ! command -v magick &> /dev/null; then
    echo "ImageMagick not found. Install with: brew install imagemagick"
    exit 1
fi

echo "Generating icons from $SOURCE_SVG..."

# Generate PNG icons
magick -background none "$SOURCE_SVG" -resize 32x32 "$ICON_DIR/32x32.png"
magick -background none "$SOURCE_SVG" -resize 128x128 "$ICON_DIR/128x128.png"
magick -background none "$SOURCE_SVG" -resize 256x256 "$ICON_DIR/128x128@2x.png"

# Generate macOS .icns
# Create iconset directory
ICONSET="$ICON_DIR/icon.iconset"
mkdir -p "$ICONSET"

magick -background none "$SOURCE_SVG" -resize 16x16 "$ICONSET/icon_16x16.png"
magick -background none "$SOURCE_SVG" -resize 32x32 "$ICONSET/icon_16x16@2x.png"
magick -background none "$SOURCE_SVG" -resize 32x32 "$ICONSET/icon_32x32.png"
magick -background none "$SOURCE_SVG" -resize 64x64 "$ICONSET/icon_32x32@2x.png"
magick -background none "$SOURCE_SVG" -resize 128x128 "$ICONSET/icon_128x128.png"
magick -background none "$SOURCE_SVG" -resize 256x256 "$ICONSET/icon_128x128@2x.png"
magick -background none "$SOURCE_SVG" -resize 256x256 "$ICONSET/icon_256x256.png"
magick -background none "$SOURCE_SVG" -resize 512x512 "$ICONSET/icon_256x256@2x.png"
magick -background none "$SOURCE_SVG" -resize 512x512 "$ICONSET/icon_512x512.png"
magick -background none "$SOURCE_SVG" -resize 1024x1024 "$ICONSET/icon_512x512@2x.png"

# Convert to icns (macOS only)
if command -v iconutil &> /dev/null; then
    iconutil -c icns "$ICONSET" -o "$ICON_DIR/icon.icns"
    rm -rf "$ICONSET"
    echo "Generated icon.icns"
else
    echo "iconutil not found (not on macOS?), skipping .icns generation"
fi

# Generate Windows .ico
magick "$ICON_DIR/32x32.png" "$ICON_DIR/128x128.png" "$ICON_DIR/icon.ico"

echo "Done! Icons generated in $ICON_DIR/"
ls -la "$ICON_DIR/"


================================================
FILE: desktop/src/index.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Onyx</title>
    <link
      href="https://fonts.googleapis.com/css2?family=Hanken+Grotesk:wght@400;500;600;700&display=swap"
      rel="stylesheet"
    />
    <style>
      :root {
        --background-900: #f5f5f5;
        --background-800: #ffffff;
        --text-light-05: rgba(0, 0, 0, 0.95);
        --text-light-03: rgba(0, 0, 0, 0.6);
        --white-10: rgba(0, 0, 0, 0.1);
        --white-15: rgba(0, 0, 0, 0.15);
        --white-20: rgba(0, 0, 0, 0.2);
        --white-30: rgba(0, 0, 0, 0.3);
        --font-hanken-grotesk: "Hanken Grotesk", -apple-system,
          BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
      }

      .dark {
        --background-900: #1a1a1a;
        --background-800: #262626;
        --text-light-05: rgba(255, 255, 255, 0.95);
        --text-light-03: rgba(255, 255, 255, 0.6);
        --white-10: rgba(255, 255, 255, 0.08);
        --white-15: rgba(255, 255, 255, 0.12);
        --white-20: rgba(255, 255, 255, 0.15);
        --white-30: rgba(255, 255, 255, 0.25);
      }

      * {
        box-sizing: border-box;
        margin: 0;
        padding: 0;
      }

      body {
        font-family: var(--font-hanken-grotesk);
        background: linear-gradient(
          135deg,
          var(--background-900) 0%,
          var(--background-800) 100%
        );
        min-height: 100vh;
        color: var(--text-light-05);
        display: flex;
        align-items: center;
        justify-content: center;
        padding: 20px;
        -webkit-user-select: none;
        user-select: none;
        transition:
          background 0.3s ease,
          color 0.3s ease;
      }

      .titlebar {
        position: fixed;
        top: 0;
        left: 0;
        right: 0;
        height: 28px;
        -webkit-app-region: drag;
        z-index: 10000;
      }

      .settings-container {
        max-width: 500px;
        width: 100%;
        opacity: 0;
        transform: translateY(8px);
        pointer-events: none;
        transition:
          opacity 0.18s ease,
          transform 0.18s ease;
      }

      body.show-settings .settings-container {
        opacity: 1;
        transform: translateY(0);
        pointer-events: auto;
      }

      .settings-panel {
        background: var(--background-800);
        backdrop-filter: blur(24px);
        border-radius: 16px;
        border: 1px solid var(--white-10);
        overflow: hidden;
        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
        transition:
          background 0.3s ease,
          border 0.3s ease;
      }

      .dark .settings-panel {
        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
      }

      .settings-header {
        padding: 24px;
        border-bottom: 1px solid var(--white-10);
        display: flex;
        align-items: center;
        gap: 12px;
      }

      .settings-icon {
        width: 40px;
        height: 40px;
        border-radius: 12px;
        background: var(--background-900);
        display: flex;
        align-items: center;
        justify-content: center;
        overflow: hidden;
        transition: background 0.3s ease;
      }

      .settings-icon svg {
        width: 24px;
        height: 24px;
        color: var(--text-light-05);
        transition: color 0.3s ease;
      }

      .settings-title {
        font-size: 20px;
        font-weight: 600;
        color: var(--text-light-05);
      }

      .settings-content {
        padding: 24px;
      }

      .settings-section {
        margin-bottom: 32px;
      }

      .settings-section:last-child {
        margin-bottom: 0;
      }

      .section-title {
        font-size: 11px;
        font-weight: 600;
        text-transform: uppercase;
        letter-spacing: 0.05em;
        color: var(--text-light-03);
        margin-bottom: 12px;
      }

      .settings-group {
        background: var(--background-900);
        border-radius: 16px;
        padding: 4px;
        transition: background 0.3s ease;
      }

      .setting-row {
        display: flex;
        justify-content: space-between;
        align-items: center;
        padding: 12px;
      }

      .setting-row-content {
        display: flex;
        flex-direction: column;
        gap: 4px;
        flex: 1;
      }

      .setting-label {
        font-size: 14px;
        font-weight: 400;
        color: var(--text-light-05);
      }

      .setting-description {
        font-size: 12px;
        color: var(--text-light-03);
      }

      .setting-divider {
        height: 1px;
        background: var(--white-10);
        margin: 0 4px;
      }

      .input-field {
        width: 100%;
        padding: 10px 12px;
        border: 1px solid var(--white-10);
        border-radius: 8px;
        font-size: 14px;
        background: var(--background-800);
        color: var(--text-light-05);
        font-family: var(--font-hanken-grotesk);
        transition: all 0.2s;
        -webkit-app-region: no-drag;
      }

      .input-field:focus {
        outline: none;
        border-color: var(--white-30);
        background: var(--background-900);
        box-shadow: 0 0 0 2px var(--white-10);
      }

      .input-field::placeholder {
        color: var(--text-light-03);
      }

      .input-field.error {
        border-color: #ef4444;
      }

      .error-message {
        color: #ef4444;
        font-size: 12px;
        margin-top: 4px;
        padding-left: 12px;
        display: none;
      }

      .error-message.visible {
        display: block;
      }

      .toggle-switch {
        position: relative;
        display: inline-block;
        width: 44px;
        height: 24px;
        flex-shrink: 0;
      }

      .toggle-switch input {
        opacity: 0;
        width: 0;
        height: 0;
      }

      .toggle-slider {
        position: absolute;
        cursor: pointer;
        top: 0;
        left: 0;
        right: 0;
        bottom: 0;
        background-color: var(--white-15);
        transition: 0.3s;
        border-radius: 24px;
      }

      .toggle-slider:before {
        position: absolute;
        content: "";
        height: 18px;
        width: 18px;
        left: 3px;
        bottom: 3px;
        background-color: var(--background-800);
        box-shadow: 0 1px 3px rgba(0, 0, 0, 0.2);
        transition: 0.3s;
        border-radius: 50%;
      }

      .dark .toggle-slider:before {
        box-shadow: 0 1px 3px rgba(0, 0, 0, 0.5);
      }

      input:checked + .toggle-slider {
        background-color: var(--white-30);
      }

      input:checked + .toggle-slider:before {
        transform: translateX(20px);
      }

      .button {
        padding: 12px 24px;
        border-radius: 8px;
        border: none;
        cursor: pointer;
        font-size: 14px;
        font-weight: 600;
        transition: all 0.2s;
        font-family: var(--font-hanken-grotesk);
        width: 100%;
        margin-top: 24px;
        -webkit-app-region: no-drag;
      }

      .button.primary {
        background: #286df8;
        color: white;
      }

      .button.primary:hover {
        background: #1e5cd6;
        box-shadow: 0 4px 12px rgba(40, 109, 248, 0.3);
      }

      .button.primary:disabled {
        opacity: 0.5;
        cursor: not-allowed;
        box-shadow: none;
      }

      kbd {
        background: var(--white-10);
        border: 1px solid var(--white-15);
        border-radius: 4px;
        padding: 2px 6px;
        font-family: monospace;
        font-weight: 500;
        color: var(--text-light-05);
        font-size: 11px;
        transition: all 0.3s ease;
      }
    </style>
  </head>
  <body>
    <div class="titlebar"></div>

    <div class="settings-container">
      <div class="settings-panel">
        <div class="settings-header">
          <div class="settings-icon">
            <svg
              viewBox="0 0 56 56"
              xmlns="http://www.w3.org/2000/svg"
              fill="currentColor"
            >
              <path
                fill-rule="evenodd"
                clip-rule="evenodd"
                d="M28 0 10.869 7.77 28 15.539l17.131-7.77L28 0Zm0 40.461-17.131 7.77L28 56l17.131-7.77L28 40.461Zm20.231-29.592L56 28.001l-7.769 17.131L40.462 28l7.769-17.131ZM15.538 28 7.77 10.869 0 28l7.769 17.131L15.538 28Z"
              />
            </svg>
          </div>
          <h1 class="settings-title">Settings</h1>
        </div>

        <div class="settings-content">
          <section class="settings-section">
            <div class="section-title">GENERAL</div>
            <div class="settings-group">
              <div class="setting-row">
                <div class="setting-row-content">
                  <label class="setting-label" for="onyxDomain"
                    >Root Domain</label
                  >
                  <div class="setting-description">
                    The root URL for your Onyx instance
                  </div>
                </div>
              </div>
              <div class="setting-divider"></div>
              <div class="setting-row" style="padding: 12px">
                <input
                  type="text"
                  id="onyxDomain"
                  class="input-field"
                  placeholder="https://cloud.onyx.app"
                  autocomplete="off"
                  autocorrect="off"
                  autocapitalize="off"
                  spellcheck="false"
                />
              </div>
              <div class="error-message" id="errorMessage">
                Please enter a valid URL starting with http:// or https://
              </div>
            </div>
          </section>

          <button class="button primary" id="saveBtn">Save & Connect</button>
        </div>
      </div>
    </div>

    <script>
      // Import Tauri API
      const { invoke } = window.__TAURI__.core;

      // Configuration
      const DEFAULT_DOMAIN = "https://cloud.onyx.app";
      let currentServerUrl = "";

      // DOM elements
      const domainInput = document.getElementById("onyxDomain");
      const errorMessage = document.getElementById("errorMessage");
      const saveBtn = document.getElementById("saveBtn");

      // Theme detection based on system preferences
      function applySystemTheme() {
        const darkModeQuery = window.matchMedia("(prefers-color-scheme: dark)");

        function updateTheme(e) {
          if (e.matches) {
            document.documentElement.classList.add("dark");
            document.body.classList.add("dark");
          } else {
            document.documentElement.classList.remove("dark");
            document.body.classList.remove("dark");
          }
        }

        // Apply initial theme
        updateTheme(darkModeQuery);

        // Listen for changes
        darkModeQuery.addEventListener("change", updateTheme);
      }

      function showSettings() {
        document.body.classList.add("show-settings");
      }

      // Apply system theme immediately
      applySystemTheme();

      // Initialize the app
      async function init() {
        try {
          const bootstrap = await invoke("get_bootstrap_state");
          currentServerUrl = bootstrap.server_url;

          // Set the input value
          domainInput.value = currentServerUrl || DEFAULT_DOMAIN;

          // Check if user came here explicitly (via Settings menu/shortcut)
          const urlParams = new URLSearchParams(window.location.search);
          const isExplicitSettings =
            window.location.hash === "#settings" ||
            urlParams.get("settings") === "true";

          // If user explicitly opened settings, show modal
          if (isExplicitSettings) {
            // Modal is already visible, user can edit and save
            showSettings();
            return;
          }

          // Otherwise, check if this is first launch
          // First launch = config doesn't exist
          if (!bootstrap.config_exists || !currentServerUrl) {
            // First launch - show modal, require user to configure
            showSettings();
            return;
          }

          // Not first launch and not explicit settings
          // Auto-redirect to configured domain
          window.location.href = currentServerUrl;
        } catch (error) {
          // On error, default to cloud
          domainInput.value = DEFAULT_DOMAIN;
          showSettings();
        }
      }

      // Validate URL
      function validateUrl(url) {
        const trimmedUrl = url.trim();
        if (!trimmedUrl) {
          return { valid: false, error: "URL cannot be empty" };
        }
        if (
          !trimmedUrl.startsWith("http://") &&
          !trimmedUrl.startsWith("https://")
        ) {
          return {
            valid: false,
            error: "URL must start with http:// or https://",
          };
        }
        try {
          new URL(trimmedUrl);
          return { valid: true, url: trimmedUrl };
        } catch {
          return { valid: false, error: "Please enter a valid URL" };
        }
      }

      // Show error
      function showError(message) {
        domainInput.classList.add("error");
        errorMessage.textContent = message;
        errorMessage.classList.add("visible");
      }

      // Clear error
      function clearError() {
        domainInput.classList.remove("error");
        errorMessage.classList.remove("visible");
      }

      // Save configuration
      async function saveConfiguration() {
        clearError();

        const validation = validateUrl(domainInput.value);
        if (!validation.valid) {
          showError(validation.error);
          return;
        }

        try {
          saveBtn.disabled = true;
          saveBtn.textContent = "Saving...";

          // Call Tauri command to save the URL
          await invoke("set_server_url", { url: validation.url });

          // Success - redirect to the new URL (login page)
          window.location.href = validation.url;
        } catch (error) {
          showError(error || "Failed to save configuration");
          saveBtn.disabled = false;
          saveBtn.textContent = "Save & Connect";
        }
      }

      // Event listeners
      domainInput.addEventListener("input", clearError);
      domainInput.addEventListener("keypress", (e) => {
        if (e.key === "Enter") {
          saveConfiguration();
        }
      });
      saveBtn.addEventListener("click", saveConfiguration);

      // Initialize when DOM is ready
      if (document.readyState === "loading") {
        document.addEventListener("DOMContentLoaded", init);
      } else {
        init();
      }
    </script>
  </body>
</html>


================================================
FILE: desktop/src/titlebar.js
================================================
// Custom title bar for Onyx Desktop
// This script injects a draggable title bar that matches Onyx design system

(function () {
  const TITLEBAR_ID = "onyx-desktop-titlebar";
  const TITLEBAR_HEIGHT = 36;
  const STYLE_ID = "onyx-desktop-titlebar-style";
  const VIEWPORT_VAR = "--onyx-desktop-viewport-height";

  // Wait for DOM to be ready
  if (document.readyState === "loading") {
    document.addEventListener("DOMContentLoaded", init);
  } else {
    init();
  }

  function getInvoke() {
    if (window.__TAURI__?.core?.invoke) return window.__TAURI__.core.invoke;
    if (window.__TAURI__?.invoke) return window.__TAURI__.invoke;
    if (window.__TAURI_INTERNALS__?.invoke)
      return window.__TAURI_INTERNALS__.invoke;
    return null;
  }

  async function startWindowDrag() {
    const invoke = getInvoke();

    if (invoke) {
      try {
        await invoke("start_drag_window");
        return;
      } catch (err) {}
    }

    const appWindow =
      window.__TAURI__?.window?.getCurrent?.() ??
      window.__TAURI__?.window?.appWindow;

    if (appWindow?.startDragging) {
      try {
        await appWindow.startDragging();
      } catch (err) {}
    }
  }

  function injectStyles() {
    if (document.getElementById(STYLE_ID)) return;
    const style = document.createElement("style");
    style.id = STYLE_ID;
    style.textContent = `
      :root {
        --onyx-desktop-titlebar-height: ${TITLEBAR_HEIGHT}px;
        --onyx-desktop-viewport-height: 100dvh;
        --onyx-desktop-safe-height: calc(var(--onyx-desktop-viewport-height) - var(--onyx-desktop-titlebar-height));
      }

      @supports not (height: 100dvh) {
        :root {
          --onyx-desktop-viewport-height: 100vh;
        }
      }

      html,
      body {
        height: var(--onyx-desktop-viewport-height);
        min-height: var(--onyx-desktop-viewport-height);
        margin: 0;
        padding: 0;
        overflow: hidden;
      }

      body {
        padding-top: var(--onyx-desktop-titlebar-height) !important;
        box-sizing: border-box;
      }

      body > div#__next,
      body > div#root,
      body > main {
        height: var(--onyx-desktop-safe-height);
        min-height: var(--onyx-desktop-safe-height);
        overflow: auto;
      }

      /* Override common Tailwind viewport helpers so content fits under the titlebar */
      .h-screen {
        height: var(--onyx-desktop-safe-height) !important;
      }

      .min-h-screen {
        min-height: var(--onyx-desktop-safe-height) !important;
      }

      .max-h-screen {
        max-height: var(--onyx-desktop-safe-height) !important;
      }

      #${TITLEBAR_ID} {
        cursor: default !important;
        -webkit-user-select: none !important;
        user-select: none !important;
        -webkit-app-region: drag;
        background: rgba(255, 255, 255, 0.85);
        height: var(--onyx-desktop-titlebar-height);
      }

      /* Dark mode support */
      .dark #${TITLEBAR_ID} {
        background: linear-gradient(180deg, rgba(18, 18, 18, 0.82) 0%, rgba(18, 18, 18, 0.72) 100%);
        border-bottom-color: rgba(255, 255, 255, 0.08);
      }
    `;
    document.head.appendChild(style);
  }

  function updateTitleBarTheme(isDark) {
    const titleBar = document.getElementById(TITLEBAR_ID);
    if (!titleBar) return;

    if (isDark) {
      titleBar.style.background =
        "linear-gradient(180deg, rgba(18, 18, 18, 0.82) 0%, rgba(18, 18, 18, 0.72) 100%)";
      titleBar.style.borderBottom = "1px solid rgba(255, 255, 255, 0.08)";
      titleBar.style.boxShadow = "0 8px 28px rgba(0, 0, 0, 0.2)";
    } else {
      titleBar.style.background =
        "linear-gradient(180deg, rgba(255, 255, 255, 0.94) 0%, rgba(255, 255, 255, 0.78) 100%)";
      titleBar.style.borderBottom = "1px solid rgba(0, 0, 0, 0.06)";
      titleBar.style.boxShadow = "0 8px 28px rgba(0, 0, 0, 0.04)";
    }
  }

  function buildTitleBar() {
    const titleBar = document.createElement("div");
    titleBar.id = TITLEBAR_ID;
    titleBar.setAttribute("data-tauri-drag-region", "");

    titleBar.addEventListener("mousedown", (e) => {
      // Only start drag on left click and not on buttons/inputs
      const nonDraggable = [
        "BUTTON",
        "INPUT",
        "TEXTAREA",
        "A",
        "SELECT",
        "OPTION",
      ];
      if (e.button === 0 && !nonDraggable.includes(e.target.tagName)) {
        e.preventDefault();
        startWindowDrag();
      }
    });

    // Apply initial styles matching current theme
    const htmlHasDark = document.documentElement.classList.contains("dark");
    const bodyHasDark = document.body?.classList.contains("dark");
    const isDark = htmlHasDark || bodyHasDark;

    // Apply styles matching Onyx design system with translucent glass effect
    titleBar.style.cssText = `
      position: fixed;
      top: 0;
      left: 0;
      right: 0;
      height: ${TITLEBAR_HEIGHT}px;
      background: linear-gradient(180deg, rgba(255, 255, 255, 0.94) 0%, rgba(255, 255, 255, 0.78) 100%);
      border-bottom: 1px solid rgba(0, 0, 0, 0.06);
      box-shadow: 0 8px 28px rgba(0, 0, 0, 0.04);
      z-index: 999999;
      display: flex;
      align-items: center;
      justify-content: center;
      cursor: default;
      user-select: none;
      -webkit-user-select: none;
      font-family: 'Hanken Grotesk', -apple-system, BlinkMacSystemFont, sans-serif;
      backdrop-filter: blur(18px) saturate(180%);
      -webkit-backdrop-filter: blur(18px) saturate(180%);
      -webkit-app-region: drag;
      padding: 0 12px;
      transition: background 0.3s ease, border-bottom 0.3s ease, box-shadow 0.3s ease;
    `;

    // Apply correct theme
    updateTitleBarTheme(isDark);

    return titleBar;
  }

  function mountTitleBar() {
    if (!document.body) {
      return;
    }

    const existing = document.getElementById(TITLEBAR_ID);
    if (existing?.parentElement === document.body) {
      // Update theme on existing titlebar
      const htmlHasDark = document.documentElement.classList.contains("dark");
      const bodyHasDark = document.body?.classList.contains("dark");
      const isDark = htmlHasDark || bodyHasDark;
      updateTitleBarTheme(isDark);
      return;
    }

    if (existing) {
      existing.remove();
    }

    const titleBar = buildTitleBar();
    document.body.insertBefore(titleBar, document.body.firstChild);
    injectStyles();

    // Ensure theme is applied immediately after mount
    setTimeout(() => {
      const htmlHasDark = document.documentElement.classList.contains("dark");
      const bodyHasDark = document.body?.classList.contains("dark");
      const isDark = htmlHasDark || bodyHasDark;
      updateTitleBarTheme(isDark);
    }, 0);
  }

  function syncViewportHeight() {
    const viewportHeight =
      window.visualViewport?.height ??
      document.documentElement?.clientHeight ??
      window.innerHeight;

    if (viewportHeight) {
      document.documentElement.style.setProperty(
        VIEWPORT_VAR,
        `${viewportHeight}px`,
      );
    }
  }

  function observeThemeChanges() {
    let lastKnownTheme = null;

    function checkAndUpdateTheme() {
      // Check both html and body for dark class (some apps use body)
      const htmlHasDark = document.documentElement.classList.contains("dark");
      const bodyHasDark = document.body?.classList.contains("dark");
      const isDark = htmlHasDark || bodyHasDark;

      if (lastKnownTheme !== isDark) {
        lastKnownTheme = isDark;
        updateTitleBarTheme(isDark);
      }
    }

    // Immediate check on setup
    checkAndUpdateTheme();

    // Watch for theme changes on the HTML element
    const themeObserver = new MutationObserver(() => {
      checkAndUpdateTheme();
    });

    themeObserver.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ["class"],
    });

    // Also observe body if it exists
    if (document.body) {
      const bodyObserver = new MutationObserver(() => {
        checkAndUpdateTheme();
      });
      bodyObserver.observe(document.body, {
        attributes: true,
        attributeFilter: ["class"],
      });
    }

    // Also check periodically in case classList is manipulated directly
    // or the theme loads asynchronously after page load
    const intervalId = setInterval(() => {
      checkAndUpdateTheme();
    }, 300);

    // Clean up after 30 seconds once theme should be stable
    setTimeout(() => {
      clearInterval(intervalId);
      // But keep checking every 2 seconds for manual theme changes
      setInterval(() => {
        checkAndUpdateTheme();
      }, 2000);
    }, 30000);
  }

  function init() {
    mountTitleBar();
    syncViewportHeight();
    observeThemeChanges();

    window.addEventListener("resize", syncViewportHeight, { passive: true });
    window.visualViewport?.addEventListener("resize", syncViewportHeight, {
      passive: true,
    });

    // Keep it around even if the app DOM re-renders
    const observer = new MutationObserver(() => {
      if (!document.getElementById(TITLEBAR_ID)) {
        mountTitleBar();
      }
    });

    observer.observe(document.documentElement, {
      childList: true,
      subtree: true,
    });

    // Fallback keep-alive check
    setInterval(() => {
      if (!document.getElementById(TITLEBAR_ID)) {
        mountTitleBar();
      }
    }, 1500);
  }
})();


================================================
FILE: desktop/src-tauri/Cargo.toml
================================================
[package]
name = "onyx"
version = "0.0.0-dev"
description = "Lightweight desktop app for Onyx Cloud"
authors = ["you"]
edition = "2021"

[build-dependencies]
tauri-build = { version = "2.5", features = [] }

[dependencies]
tauri = { version = "2.10", features = ["macos-private-api", "tray-icon", "image-png"] }
tauri-plugin-shell = "2.3.5"
tauri-plugin-window-state = "2.4.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
uuid = { version = "1.0", features = ["v4"] }
directories = "5.0"
tokio = { version = "1", features = ["time"] }
window-vibrancy = "0.7.1"
url = "2.5"

[features]
default = ["custom-protocol"]
custom-protocol = ["tauri/custom-protocol"]
devtools = ["tauri/devtools"]


================================================
FILE: desktop/src-tauri/build.rs
================================================
fn main() {
    tauri_build::build()
}


================================================
FILE: desktop/src-tauri/gen/schemas/acl-manifests.json
================================================
{"core":{"default_permission":{"identifier":"default","description":"Default core plugins set.","permissions":["core:path:default","core:event:default","core:window:default","core:webview:default","core:app:default","core:image:default","core:resources:default","core:menu:default","core:tray:default"]},"permissions":{},"permission_sets":{},"global_scope_schema":null},"core:app":{"default_permission":{"identifier":"default","description":"Default permissions for the plugin.","permissions":["allow-version","allow-name","allow-tauri-version","allow-identifier","allow-bundle-type","allow-register-listener","allow-remove-listener"]},"permissions":{"allow-app-hide":{"identifier":"allow-app-hide","description":"Enables the app_hide command without any pre-configured scope.","commands":{"allow":["app_hide"],"deny":[]}},"allow-app-show":{"identifier":"allow-app-show","description":"Enables the app_show command without any pre-configured scope.","commands":{"allow":["app_show"],"deny":[]}},"allow-bundle-type":{"identifier":"allow-bundle-type","description":"Enables the bundle_type command without any pre-configured scope.","commands":{"allow":["bundle_type"],"deny":[]}},"allow-default-window-icon":{"identifier":"allow-default-window-icon","description":"Enables the default_window_icon command without any pre-configured scope.","commands":{"allow":["default_window_icon"],"deny":[]}},"allow-fetch-data-store-identifiers":{"identifier":"allow-fetch-data-store-identifiers","description":"Enables the fetch_data_store_identifiers command without any pre-configured scope.","commands":{"allow":["fetch_data_store_identifiers"],"deny":[]}},"allow-identifier":{"identifier":"allow-identifier","description":"Enables the identifier command without any pre-configured scope.","commands":{"allow":["identifier"],"deny":[]}},"allow-name":{"identifier":"allow-name","description":"Enables the name command without any pre-configured scope.","commands":{"allow":["name"],"deny":[]}},"allow-register-listener":{"identifier":"allow-register-listener","description":"Enables the register_listener command without any pre-configured scope.","commands":{"allow":["register_listener"],"deny":[]}},"allow-remove-data-store":{"identifier":"allow-remove-data-store","description":"Enables the remove_data_store command without any pre-configured scope.","commands":{"allow":["remove_data_store"],"deny":[]}},"allow-remove-listener":{"identifier":"allow-remove-listener","description":"Enables the remove_listener command without any pre-configured scope.","commands":{"allow":["remove_listener"],"deny":[]}},"allow-set-app-theme":{"identifier":"allow-set-app-theme","description":"Enables the set_app_theme command without any pre-configured scope.","commands":{"allow":["set_app_theme"],"deny":[]}},"allow-set-dock-visibility":{"identifier":"allow-set-dock-visibility","description":"Enables the set_dock_visibility command without any pre-configured scope.","commands":{"allow":["set_dock_visibility"],"deny":[]}},"allow-tauri-version":{"identifier":"allow-tauri-version","description":"Enables the tauri_version command without any pre-configured scope.","commands":{"allow":["tauri_version"],"deny":[]}},"allow-version":{"identifier":"allow-version","description":"Enables the version command without any pre-configured scope.","commands":{"allow":["version"],"deny":[]}},"deny-app-hide":{"identifier":"deny-app-hide","description":"Denies the app_hide command without any pre-configured scope.","commands":{"allow":[],"deny":["app_hide"]}},"deny-app-show":{"identifier":"deny-app-show","description":"Denies the app_show command without any pre-configured scope.","commands":{"allow":[],"deny":["app_show"]}},"deny-bundle-type":{"identifier":"deny-bundle-type","description":"Denies the bundle_type command without any pre-configured scope.","commands":{"allow":[],"deny":["bundle_type"]}},"deny-default-window-icon":{"identifier":"deny-default-window-icon","description":"Denies the default_window_icon command without any pre-configured scope.","commands":{"allow":[],"deny":["default_window_icon"]}},"deny-fetch-data-store-identifiers":{"identifier":"deny-fetch-data-store-identifiers","description":"Denies the fetch_data_store_identifiers command without any pre-configured scope.","commands":{"allow":[],"deny":["fetch_data_store_identifiers"]}},"deny-identifier":{"identifier":"deny-identifier","description":"Denies the identifier command without any pre-configured scope.","commands":{"allow":[],"deny":["identifier"]}},"deny-name":{"identifier":"deny-name","description":"Denies the name command without any pre-configured scope.","commands":{"allow":[],"deny":["name"]}},"deny-register-listener":{"identifier":"deny-register-listener","description":"Denies the register_listener command without any pre-configured scope.","commands":{"allow":[],"deny":["register_listener"]}},"deny-remove-data-store":{"identifier":"deny-remove-data-store","description":"Denies the remove_data_store command without any pre-configured scope.","commands":{"allow":[],"deny":["remove_data_store"]}},"deny-remove-listener":{"identifier":"deny-remove-listener","description":"Denies the remove_listener command without any pre-configured scope.","commands":{"allow":[],"deny":["remove_listener"]}},"deny-set-app-theme":{"identifier":"deny-set-app-theme","description":"Denies the set_app_theme command without any pre-configured scope.","commands":{"allow":[],"deny":["set_app_theme"]}},"deny-set-dock-visibility":{"identifier":"deny-set-dock-visibility","description":"Denies the set_dock_visibility command without any pre-configured scope.","commands":{"allow":[],"deny":["set_dock_visibility"]}},"deny-tauri-version":{"identifier":"deny-tauri-version","description":"Denies the tauri_version command without any pre-configured scope.","commands":{"allow":[],"deny":["tauri_version"]}},"deny-version":{"identifier":"deny-version","description":"Denies the version command without any pre-configured scope.","commands":{"allow":[],"deny":["version"]}}},"permission_sets":{},"global_scope_schema":null},"core:event":{"default_permission":{"identifier":"default","description":"Default permissions for the plugin, which enables all commands.","permissions":["allow-listen","allow-unlisten","allow-emit","allow-emit-to"]},"permissions":{"allow-emit":{"identifier":"allow-emit","description":"Enables the emit command without any pre-configured scope.","commands":{"allow":["emit"],"deny":[]}},"allow-emit-to":{"identifier":"allow-emit-to","description":"Enables the emit_to command without any pre-configured scope.","commands":{"allow":["emit_to"],"deny":[]}},"allow-listen":{"identifier":"allow-listen","description":"Enables the listen command without any pre-configured scope.","commands":{"allow":["listen"],"deny":[]}},"allow-unlisten":{"identifier":"allow-unlisten","description":"Enables the unlisten command without any pre-configured scope.","commands":{"allow":["unlisten"],"deny":[]}},"deny-emit":{"identifier":"deny-emit","description":"Denies the emit command without any pre-configured scope.","commands":{"allow":[],"deny":["emit"]}},"deny-emit-to":{"identifier":"deny-emit-to","description":"Denies the emit_to command without any pre-configured scope.","commands":{"allow":[],"deny":["emit_to"]}},"deny-listen":{"identifier":"deny-listen","description":"Denies the listen command without any pre-configured scope.","commands":{"allow":[],"deny":["listen"]}},"deny-unlisten":{"identifier":"deny-unlisten","description":"Denies the unlisten command without any pre-configured scope.","commands":{"allow":[],"deny":["unlisten"]}}},"permission_sets":{},"global_scope_schema":null},"core:image":{"default_permission":{"identifier":"default","description":"Default permissions for the plugin, which enables all commands.","permissions":["allow-new","allow-from-bytes","allow-from-path","allow-rgba","allow-size"]},"permissions":{"allow-from-bytes":{"identifier":"allow-from-bytes","description":"Enables the from_bytes command without any pre-configured scope.","commands":{"allow":["from_bytes"],"deny":[]}},"allow-from-path":{"identifier":"allow-from-path","description":"Enables the from_path command without any pre-configured scope.","commands":{"allow":["from_path"],"deny":[]}},"allow-new":{"identifier":"allow-new","description":"Enables the new command without any pre-configured scope.","commands":{"allow":["new"],"deny":[]}},"allow-rgba":{"identifier":"allow-rgba","description":"Enables the rgba command without any pre-configured scope.","commands":{"allow":["rgba"],"deny":[]}},"allow-size":{"identifier":"allow-size","description":"Enables the size command without any pre-configured scope.","commands":{"allow":["size"],"deny":[]}},"deny-from-bytes":{"identifier":"deny-from-bytes","description":"Denies the from_bytes command without any pre-configured scope.","commands":{"allow":[],"deny":["from_bytes"]}},"deny-from-path":{"identifier":"deny-from-path","description":"Denies the from_path command without any pre-configured scope.","commands":{"allow":[],"deny":["from_path"]}},"deny-new":{"identifier":"deny-new","description":"Denies the new command without any pre-configured scope.","commands":{"allow":[],"deny":["new"]}},"deny-rgba":{"identifier":"deny-rgba","description":"Denies the rgba command without any pre-configured scope.","commands":{"allow":[],"deny":["rgba"]}},"deny-size":{"identifier":"deny-size","description":"Denies the size command without any pre-configured scope.","commands":{"allow":[],"deny":["size"]}}},"permission_sets":{},"global_scope_schema":null},"core:menu":{"default_permission":{"identifier":"default","description":"Default permissions for the plugin, which enables all commands.","permissions":["allow-new","allow-append","allow-prepend","allow-insert","allow-remove","allow-remove-at","allow-items","allow-get","allow-popup","allow-create-default","allow-set-as-app-menu","allow-set-as-window-menu","allow-text","allow-set-text","allow-is-enabled","allow-set-enabled","allow-set-accelerator","allow-set-as-windows-menu-for-nsapp","allow-set-as-help-menu-for-nsapp","allow-is-checked","allow-set-checked","allow-set-icon"]},"permissions":{"allow-append":{"identifier":"allow-append","description":"Enables the append command without any pre-configured scope.","commands":{"allow":["append"],"deny":[]}},"allow-create-default":{"identifier":"allow-create-default","description":"Enables the create_default command without any pre-configured scope.","commands":{"allow":["create_default"],"deny":[]}},"allow-get":{"identifier":"allow-get","description":"Enables the get command without any pre-configured scope.","commands":{"allow":["get"],"deny":[]}},"allow-insert":{"identifier":"allow-insert","description":"Enables the insert command without any pre-configured scope.","commands":{"allow":["insert"],"deny":[]}},"allow-is-checked":{"identifier":"allow-is-checked","description":"Enables the is_checked command without any pre-configured scope.","commands":{"allow":["is_checked"],"deny":[]}},"allow-is-enabled":{"identifier":"allow-is-enabled","description":"Enables the is_enabled command without any pre-configured scope.","commands":{"allow":["is_enabled"],"deny":[]}},"allow-items":{"identifier":"allow-items","description":"Enables the items command without any pre-configured scope.","commands":{"allow":["items"],"deny":[]}},"allow-new":{"identifier":"allow-new","description":"Enables the new command without any pre-configured scope.","commands":{"allow":["new"],"deny":[]}},"allow-popup":{"identifier":"allow-popup","description":"Enables the popup command without any pre-configured scope.","commands":{"allow":["popup"],"deny":[]}},"allow-prepend":{"identifier":"allow-prepend","description":"Enables the prepend command without any pre-configured scope.","commands":{"allow":["prepend"],"deny":[]}},"allow-remove":{"identifier":"allow-remove","description":"Enables the remove command without any pre-configured scope.","commands":{"allow":["remove"],"deny":[]}},"allow-remove-at":{"identifier":"allow-remove-at","description":"Enables the remove_at command without any pre-configured scope.","commands":{"allow":["remove_at"],"deny":[]}},"allow-set-accelerator":{"identifier":"allow-set-accelerator","description":"Enables the set_accelerator command without any pre-configured scope.","commands":{"allow":["set_accelerator"],"deny":[]}},"allow-set-as-app-menu":{"identifier":"allow-set-as-app-menu","description":"Enables the set_as_app_menu command without any pre-configured scope.","commands":{"allow":["set_as_app_menu"],"deny":[]}},"allow-set-as-help-menu-for-nsapp":{"identifier":"allow-set-as-help-menu-for-nsapp","description":"Enables the set_as_help_menu_for_nsapp command without any pre-configured scope.","commands":{"allow":["set_as_help_menu_for_nsapp"],"deny":[]}},"allow-set-as-window-menu":{"identifier":"allow-set-as-window-menu","description":"Enables the set_as_window_menu command without any pre-configured scope.","commands":{"allow":["set_as_window_menu"],"deny":[]}},"allow-set-as-windows-menu-for-nsapp":{"identifier":"allow-set-as-windows-menu-for-nsapp","description":"Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope.","commands":{"allow":["set_as_windows_menu_for_nsapp"],"deny":[]}},"allow-set-checked":{"identifier":"allow-set-checked","description":"Enables the set_checked command without any pre-configured scope.","commands":{"allow":["set_checked"],"deny":[]}},"allow-set-enabled":{"identifier":"allow-set-enabled","description":"Enables the set_enabled command without any pre-configured scope.","commands":{"allow":["set_enabled"],"deny":[]}},"allow-set-icon":{"identifier":"allow-set-icon","description":"Enables the set_icon command without any pre-configured scope.","commands":{"allow":["set_icon"],"deny":[]}},"allow-set-text":{"identifier":"allow-set-text","description":"Enables the set_text command without any pre-configured scope.","commands":{"allow":["set_text"],"deny":[]}},"allow-text":{"identifier":"allow-text","description":"Enables the text command without any pre-configured scope.","commands":{"allow":["text"],"deny":[]}},"deny-append":{"identifier":"deny-append","description":"Denies the append command without any pre-configured scope.","commands":{"allow":[],"deny":["append"]}},"deny-create-default":{"identifier":"deny-create-default","description":"Denies the create_default command without any pre-configured scope.","commands":{"allow":[],"deny":["create_default"]}},"deny-get":{"identifier":"deny-get","description":"Denies the get command without any pre-configured scope.","commands":{"allow":[],"deny":["get"]}},"deny-insert":{"identifier":"deny-insert","description":"Denies the insert command without any pre-configured scope.","commands":{"allow":[],"deny":["insert"]}},"deny-is-checked":{"identifier":"deny-is-checked","description":"Denies the is_checked command without any pre-configured scope.","commands":{"allow":[],"deny":["is_checked"]}},"deny-is-enabled":{"identifier":"deny-is-enabled","description":"Denies the is_enabled command without any pre-configured scope.","commands":{"allow":[],"deny":["is_enabled"]}},"deny-items":{"identifier":"deny-items","description":"Denies the items command without any pre-configured scope.","commands":{"allow":[],"deny":["items"]}},"deny-new":{"identifier":"deny-new","description":"Denies the new command without any pre-configured scope.","commands":{"allow":[],"deny":["new"]}},"deny-popup":{"identifier":"deny-popup","description":"Denies the popup command without any pre-configured scope.","commands":{"allow":[],"deny":["popup"]}},"deny-prepend":{"identifier":"deny-prepend","description":"Denies the prepend command without any pre-configured scope.","commands":{"allow":[],"deny":["prepend"]}},"deny-remove":{"identifier":"deny-remove","description":"Denies the remove command without any pre-configured scope.","commands":{"allow":[],"deny":["remove"]}},"deny-remove-at":{"identifier":"deny-remove-at","description":"Denies the remove_at command without any pre-configured scope.","commands":{"allow":[],"deny":["remove_at"]}},"deny-set-accelerator":{"identifier":"deny-set-accelerator","description":"Denies the set_accelerator command without any pre-configured scope.","commands":{"allow":[],"deny":["set_accelerator"]}},"deny-set-as-app-menu":{"identifier":"deny-set-as-app-menu","description":"Denies the set_as_app_menu command without any pre-configured scope.","commands":{"allow":[],"deny":["set_as_app_menu"]}},"deny-set-as-help-menu-for-nsapp":{"identifier":"deny-set-as-help-menu-for-nsapp","description":"Denies the set_as_help_menu_for_nsapp command without any pre-configured scope.","commands":{"allow":[],"deny":["set_as_help_menu_for_nsapp"]}},"deny-set-as-window-menu":{"identifier":"deny-set-as-window-menu","description":"Denies the set_as_window_menu command without any pre-configured scope.","commands":{"allow":[],"deny":["set_as_window_menu"]}},"deny-set-as-windows-menu-for-nsapp":{"identifier":"deny-set-as-windows-menu-for-nsapp","description":"Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope.","commands":{"allow":[],"deny":["set_as_windows_menu_for_nsapp"]}},"deny-set-checked":{"identifier":"deny-set-checked","description":"Denies the set_checked command without any pre-configured scope.","commands":{"allow":[],"deny":["set_checked"]}},"deny-set-enabled":{"identifier":"deny-set-enabled","description":"Denies the set_enabled command without any pre-configured scope.","commands":{"allow":[],"deny":["set_enabled"]}},"deny-set-icon":{"identifier":"deny-set-icon","description":"Denies the set_icon command without any pre-configured scope.","commands":{"allow":[],"deny":["set_icon"]}},"deny-set-text":{"identifier":"deny-set-text","description":"Denies the set_text command without any pre-configured scope.","commands":{"allow":[],"deny":["set_text"]}},"deny-text":{"identifier":"deny-text","description":"Denies the text command without any pre-configured scope.","commands":{"allow":[],"deny":["text"]}}},"permission_sets":{},"global_scope_schema":null},"core:path":{"default_permission":{"identifier":"default","description":"Default permissions for the plugin, which enables all commands.","permissions":["allow-resolve-directory","allow-resolve","allow-normalize","allow-join","allow-dirname","allow-extname","allow-basename","allow-is-absolute"]},"permissions":{"allow-basename":{"identifier":"allow-basename","description":"Enables the basename command without any pre-configured scope.","commands":{"allow":["basename"],"deny":[]}},"allow-dirname":{"identifier":"allow-dirname","description":"Enables the dirname command without any pre-configured scope.","commands":{"allow":["dirname"],"deny":[]}},"allow-extname":{"identifier":"allow-extname","description":"Enables the extname command without any pre-configured scope.","commands":{"allow":["extname"],"deny":[]}},"allow-is-absolute":{"identifier":"allow-is-absolute","description":"Enables the is_absolute command without any pre-configured scope.","commands":{"allow":["is_absolute"],"deny":[]}},"allow-join":{"identifier":"allow-join","description":"Enables the join command without any pre-configured scope.","commands":{"allow":["join"],"deny":[]}},"allow-normalize":{"identifier":"allow-normalize","description":"Enables the normalize command without any pre-configured scope.","commands":{"allow":["normalize"],"deny":[]}},"allow-resolve":{"identifier":"allow-resolve","description":"Enables the resolve command without any pre-configured scope.","commands":{"allow":["resolve"],"deny":[]}},"allow-resolve-directory":{"identifier":"allow-resolve-directory","description":"Enables the resolve_directory command without any pre-configured scope.","commands":{"allow":["resolve_directory"],"deny":[]}},"deny-basename":{"identifier":"deny-basename","description":"Denies the basename command without any pre-configured scope.","commands":{"allow":[],"deny":["basename"]}},"deny-dirname":{"identifier":"deny-dirname","description":"Denies the dirname command without any pre-configured scope.","commands":{"allow":[],"deny":["dirname"]}},"deny-extname":{"identifier":"deny-extname","description":"Denies the extname command without any pre-configured scope.","commands":{"allow":[],"deny":["extname"]}},"deny-is-absolute":{"identifier":"deny-is-absolute","description":"Denies the is_absolute command without any pre-configured scope.","commands":{"allow":[],"deny":["is_absolute"]}},"deny-join":{"identifier":"deny-join","description":"Denies the join command without any pre-configured scope.","commands":{"allow":[],"deny":["join"]}},"deny-normalize":{"identifier":"deny-normalize","description":"Denies the normalize command without any pre-configured scope.","commands":{"allow":[],"deny":["normalize"]}},"deny-resolve":{"identifier":"deny-resolve","description":"Denies the resolve command without any pre-configured scope.","commands":{"allow":[],"deny":["resolve"]}},"deny-resolve-directory":{"identifier":"deny-resolve-directory","description":"Denies the resolve_directory command without any pre-configured scope.","commands":{"allow":[],"deny":["resolve_directory"]}}},"permission_sets":{},"global_scope_schema":null},"core:resources":{"default_permission":{"identifier":"default","description":"Default permissions for the plugin, which enables all commands.","permissions":["allow-close"]},"permissions":{"allow-close":{"identifier":"allow-close","description":"Enables the close command without any pre-configured scope.","commands":{"allow":["close"],"deny":[]}},"deny-close":{"identifier":"deny-close","description":"Denies the close command without any pre-configured scope.","commands":{"allow":[],"deny":["close"]}}},"permission_sets":{},"global_scope_schema":null},"core:tray":{"default_permission":{"identifier":"default","description":"Default permissions for the plugin, which enables all commands.","permissions":["allow-new","allow-get-by-id","allow-remove-by-id","allow-set-icon","allow-set-menu","allow-set-tooltip","allow-set-title","allow-set-visible","allow-set-temp-dir-path","allow-set-icon-as-template","allow-set-show-menu-on-left-click"]},"permissions":{"allow-get-by-id":{"identifier":"allow-get-by-id","description":"Enables the get_by_id command without any pre-configured scope.","commands":{"allow":["get_by_id"],"deny":[]}},"allow-new":{"identifier":"allow-new","description":"Enables the new command without any pre-configured scope.","commands":{"allow":["new"],"deny":[]}},"allow-remove-by-id":{"identifier":"allow-remove-by-id","description":"Enables the remove_by_id command without any pre-configured scope.","commands":{"allow":["remove_by_id"],"deny":[]}},"allow-set-icon":{"identifier":"allow-set-icon","description":"Enables the set_icon command without any pre-configured scope.","commands":{"allow":["set_icon"],"deny":[]}},"allow-set-icon-as-template":{"identifier":"allow-set-icon-as-template","description":"Enables the set_icon_as_template command without any pre-configured scope.","commands":{"allow":["set_icon_as_template"],"deny":[]}},"allow-set-menu":{"identifier":"allow-set-menu","description":"Enables the set_menu command without any pre-configured scope.","commands":{"allow":["set_menu"],"deny":[]}},"allow-set-show-menu-on-left-click":{"identifier":"allow-set-show-menu-on-left-click","description":"Enables the set_show_menu_on_left_click command without any pre-configured scope.","commands":{"allow":["set_show_menu_on_left_click"],"deny":[]}},"allow-set-temp-dir-path":{"identifier":"allow-set-temp-dir-path","description":"Enables the set_temp_dir_path command without any pre-configured scope.","commands":{"allow":["set_temp_dir_path"],"deny":[]}},"allow-set-title":{"identifier":"allow-set-title","description":"Enables the set_title command without any pre-configured scope.","commands":{"allow":["set_title"],"deny":[]}},"allow-set-tooltip":{"identifier":"allow-set-tooltip","description":"Enables the set_tooltip command without any pre-configured scope.","commands":{"allow":["set_tooltip"],"deny":[]}},"allow-set-visible":{"identifier":"allow-set-visible","description":"Enables the set_visible command without any pre-configured scope.","commands":{"allow":["set_visible"],"deny":[]}},"deny-get-by-id":{"identifier":"deny-get-by-id","description":"Denies the get_by_id command without any pre-configured scope.","commands":{"allow":[],"deny":["get_by_id"]}},"deny-new":{"identifier":"deny-new","description":"Denies the new command without any pre-configured scope.","commands":{"allow":[],"deny":["new"]}},"deny-remove-by-id":{"identifier":"deny-remove-by-id","description":"Denies the remove_by_id command without any pre-configured scope.","commands":{"allow":[],"deny":["remove_by_id"]}},"deny-set-icon":{"identifier":"deny-set-icon","description":"Denies the set_icon command without any pre-configured scope.","commands":{"allow":[],"deny":["set_icon"]}},"deny-set-icon-as-template":{"identifier":"deny-set-icon-as-template","description":"Denies the set_icon_as_template command without any pre-configured scope.","commands":{"allow":[],"deny":["set_icon_as_template"]}},"deny-set-menu":{"identifier":"deny-set-menu","description":"Denies the set_menu command without any pre-configured scope.","commands":{"allow":[],"deny":["set_menu"]}},"deny-set-show-menu-on-left-click":{"identifier":"deny-set-show-menu-on-left-click","description":"Denies the set_show_menu_on_left_click command without any pre-configured scope.","commands":{"allow":[],"deny":["set_show_menu_on_left_click"]}},"deny-set-temp-dir-path":{"identifier":"deny-set-temp-dir-path","description":"Denies the set_temp_dir_path command without any pre-configured scope.","commands":{"allow":[],"deny":["set_temp_dir_path"]}},"deny-set-title":{"identifier":"deny-set-title","description":"Denies the set_title command without any pre-configured scope.","commands":{"allow":[],"deny":["set_title"]}},"deny-set-tooltip":{"identifier":"deny-set-tooltip","description":"Denies the set_tooltip command without any pre-configured scope.","commands":{"allow":[],"deny":["set_tooltip"]}},"deny-set-visible":{"identifier":"deny-set-visible","description":"Denies the set_visible command without any pre-configured scope.","commands":{"allow":[],"deny":["set_visible"]}}},"permission_sets":{},"global_scope_schema":null},"core:webview":{"default_permission":{"identifier":"default","description":"Default permissions for the plugin.","permissions":["allow-get-all-webviews","allow-webview-position","allow-webview-size","allow-internal-toggle-devtools"]},"permissions":{"allow-clear-all-browsing-data":{"identifier":"allow-clear-all-browsing-data","description":"Enables the clear_all_browsing_data command without any pre-configured scope.","commands":{"allow":["clear_all_browsing_data"],"deny":[]}},"allow-create-webview":{"identifier":"allow-create-webview","description":"Enables the create_webview command without any pre-configured scope.","commands":{"allow":["create_webview"],"deny":[]}},"allow-create-webview-window":{"identifier":"allow-create-webview-window","description":"Enables the create_webview_window command without any pre-configured scope.","commands":{"allow":["create_webview_window"],"deny":[]}},"allow-get-all-webviews":{"identifier":"allow-get-all-webviews","description":"Enables the get_all_webviews command without any pre-configured scope.","commands":{"allow":["get_all_webviews"],"deny":[]}},"allow-internal-toggle-devtools":{"identifier":"allow-internal-toggle-devtools","description":"Enables the internal_toggle_devtools command without any pre-configured scope.","commands":{"allow":["internal_toggle_devtools"],"deny":[]}},"allow-print":{"identifier":"allow-print","description":"Enables the print command without any pre-configured scope.","commands":{"allow":["print"],"deny":[]}},"allow-reparent":{"identifier":"allow-reparent","description":"Enables the reparent command without any pre-configured scope.","commands":{"allow":["reparent"],"deny":[]}},"allow-set-webview-auto-resize":{"identifier":"allow-set-webview-auto-resize","description":"Enables the set_webview_auto_resize command without any pre-configured scope.","commands":{"allow":["set_webview_auto_resize"],"deny":[]}},"allow-set-webview-background-color":{"identifier":"allow-set-webview-background-color","description":"Enables the set_webview_background_color command without any pre-configured scope.","commands":{"allow":["set_webview_background_color"],"deny":[]}},"allow-set-webview-focus":{"identifier":"allow-set-webview-focus","description":"Enables the set_webview_focus command without any pre-configured scope.","commands":{"allow":["set_webview_focus"],"deny":[]}},"allow-set-webview-position":{"identifier":"allow-set-webview-position","description":"Enables the set_webview_position command without any pre-configured scope.","commands":{"allow":["set_webview_position"],"deny":[]}},"allow-set-webview-size":{"identifier":"allow-set-webview-size","description":"Enables the set_webview_size command without any pre-configured scope.","commands":{"allow":["set_webview_size"],"deny":[]}},"allow-set-webview-zoom":{"identifier":"allow-set-webview-zoom","description":"Enables the set_webview_zoom command without any pre-configured scope.","commands":{"allow":["set_webview_zoom"],"deny":[]}},"allow-webview-close":{"identifier":"allow-webview-close","description":"Enables the webview_close command without any pre-configured scope.","commands":{"allow":["webview_close"],"deny":[]}},"allow-webview-hide":{"identifier":"allow-webview-hide","description":"Enables the webview_hide command without any pre-configured scope.","commands":{"allow":["webview_hide"],"deny":[]}},"allow-webview-position":{"identifier":"allow-webview-position","description":"Enables the webview_position command without any pre-configured scope.","commands":{"allow":["webview_position"],"deny":[]}},"allow-webview-show":{"identifier":"allow-webview-show","description":"Enables the webview_show command without any pre-configured scope.","commands":{"allow":["webview_show"],"deny":[]}},"allow-webview-size":{"identifier":"allow-webview-size","description":"Enables the webview_size command without any pre-configured scope.","commands":{"allow":["webview_size"],"deny":[]}},"deny-clear-all-browsing-data":{"identifier":"deny-clear-all-browsing-data","description":"Denies the clear_all_browsing_data command without any pre-configured scope.","commands":{"allow":[],"deny":["clear_all_browsing_data"]}},"deny-create-webview":{"identifier":"deny-create-webview","description":"Denies the create_webview command without any pre-configured scope.","commands":{"allow":[],"deny":["create_webview"]}},"deny-create-webview-window":{"identifier":"deny-create-webview-window","description":"Denies the create_webview_window command without any pre-configured scope.","commands":{"allow":[],"deny":["create_webview_window"]}},"deny-get-all-webviews":{"identifier":"deny-get-all-webviews","description":"Denies the get_all_webviews command without any pre-configured scope.","commands":{"allow":[],"deny":["get_all_webviews"]}},"deny-internal-toggle-devtools":{"identifier":"deny-internal-toggle-devtools","description":"Denies the internal_toggle_devtools command without any pre-configured scope.","commands":{"allow":[],"deny":["internal_toggle_devtools"]}},"deny-print":{"identifier":"deny-print","description":"Denies the print command without any pre-configured scope.","commands":{"allow":[],"deny":["print"]}},"deny-reparent":{"identifier":"deny-reparent","description":"Denies the reparent command without any pre-configured scope.","commands":{"allow":[],"deny":["reparent"]}},"deny-set-webview-auto-resize":{"identifier":"deny-set-webview-auto-resize","description":"Denies the set_webview_auto_resize command without any pre-configured scope.","commands":{"allow":[],"deny":["set_webview_auto_resize"]}},"deny-set-webview-background-color":{"identifier":"deny-set-webview-background-color","description":"Denies the set_webview_background_color command without any pre-configured scope.","commands":{"allow":[],"deny":["set_webview_background_color"]}},"deny-set-webview-focus":{"identifier":"deny-set-webview-focus","description":"Denies the set_webview_focus command without any pre-configured scope.","commands":{"allow":[],"deny":["set_webview_focus"]}},"deny-set-webview-position":{"identifier":"deny-set-webview-position","description":"Denies the set_webview_position command without any pre-configured scope.","commands":{"allow":[],"deny":["set_webview_position"]}},"deny-set-webview-size":{"identifier":"deny-set-webview-size","description":"Denies the set_webview_size command without any pre-configured scope.","commands":{"allow":[],"deny":["set_webview_size"]}},"deny-set-webview-zoom":{"identifier":"deny-set-webview-zoom","description":"Denies the set_webview_zoom command without any pre-configured scope.","commands":{"allow":[],"deny":["set_webview_zoom"]}},"deny-webview-close":{"identifier":"deny-webview-close","description":"Denies the webview_close command without any pre-configured scope.","commands":{"allow":[],"deny":["webview_close"]}},"deny-webview-hide":{"identifier":"deny-webview-hide","description":"Denies the webview_hide command without any pre-configured scope.","commands":{"allow":[],"deny":["webview_hide"]}},"deny-webview-position":{"identifier":"deny-webview-position","description":"Denies the webview_position command without any pre-configured scope.","commands":{"allow":[],"deny":["webview_position"]}},"deny-webview-show":{"identifier":"deny-webview-show","description":"Denies the webview_show command without any pre-configured scope.","commands":{"allow":[],"deny":["webview_show"]}},"deny-webview-size":{"identifier":"deny-webview-size","description":"Denies the webview_size command without any pre-configured scope.","commands":{"allow":[],"deny":["webview_size"]}}},"permission_sets":{},"global_scope_schema":null},"core:window":{"default_permission":{"identifier":"default","description":"Default permissions for the plugin.","permissions":["allow-get-all-windows","allow-scale-factor","allow-inner-position","allow-outer-position","allow-inner-size","allow-outer-size","allow-is-fullscreen","allow-is-minimized","allow-is-maximized","allow-is-focused","allow-is-decorated","allow-is-resizable","allow-is-maximizable","allow-is-minimizable","allow-is-closable","allow-is-visible","allow-is-enabled","allow-title","allow-current-monitor","allow-primary-monitor","allow-monitor-from-point","allow-available-monitors","allow-cursor-position","allow-theme","allow-is-always-on-top","allow-internal-toggle-maximize"]},"permissions":{"allow-available-monitors":{"identifier":"allow-available-monitors","description":"Enables the available_monitors command without any pre-configured scope.","commands":{"allow":["available_monitors"],"deny":[]}},"allow-center":{"identifier":"allow-center","description":"Enables the center command without any pre-configured scope.","commands":{"allow":["center"],"deny":[]}},"allow-close":{"identifier":"allow-close","description":"Enables the close command without any pre-configured scope.","commands":{"allow":["close"],"deny":[]}},"allow-create":{"identifier":"allow-create","description":"Enables the create command without any pre-configured scope.","commands":{"allow":["create"],"deny":[]}},"allow-current-monitor":{"identifier":"allow-current-monitor","description":"Enables the current_monitor command without any pre-configured scope.","commands":{"allow":["current_monitor"],"deny":[]}},"allow-cursor-position":{"identifier":"allow-cursor-position","description":"Enables the cursor_position command without any pre-configured scope.","commands":{"allow":["cursor_position"],"deny":[]}},"allow-destroy":{"identifier":"allow-destroy","description":"Enables the destroy command without any pre-configured scope.","commands":{"allow":["destroy"],"deny":[]}},"allow-get-all-windows":{"identifier":"allow-get-all-windows","description":"Enables the get_all_windows command without any pre-configured scope.","commands":{"allow":["get_all_windows"],"deny":[]}},"allow-hide":{"identifier":"allow-hide","description":"Enables the hide command without any pre-configured scope.","commands":{"allow":["hide"],"deny":[]}},"allow-inner-position":{"identifier":"allow-inner-position","description":"Enables the inner_position command without any pre-configured scope.","commands":{"allow":["inner_position"],"deny":[]}},"allow-inner-size":{"identifier":"allow-inner-size","description":"Enables the inner_size command without any pre-configured scope.","commands":{"allow":["inner_size"],"deny":[]}},"allow-internal-toggle-maximize":{"identifier":"allow-internal-toggle-maximize","description":"Enables the internal_toggle_maximize command without any pre-configured scope.","commands":{"allow":["internal_toggle_maximize"],"deny":[]}},"allow-is-always-on-top":{"identifier":"allow-is-always-on-top","description":"Enables the is_always_on_top command without any pre-configured scope.","commands":{"allow":["is_always_on_top"],"deny":[]}},"allow-is-closable":{"identifier":"allow-is-closable","description":"Enables the is_closable command without any pre-configured scope.","commands":{"allow":["is_closable"],"deny":[]}},"allow-is-decorated":{"identifier":"allow-is-decorated","description":"Enables the is_decorated command without any pre-configured scope.","commands":{"allow":["is_decorated"],"deny":[]}},"allow-is-enabled":{"identifier":"allow-is-enabled","description":"Enables the is_enabled command without any pre-configured scope.","commands":{"allow":["is_enabled"],"deny":[]}},"allow-is-focused":{"identifier":"allow-is-focused","description":"Enables the is_focused command without any pre-configured scope.","commands":{"allow":["is_focused"],"deny":[]}},"allow-is-fullscreen":{"identifier":"allow-is-fullscreen","description":"Enables the is_fullscreen command without any pre-configured scope.","commands":{"allow":["is_fullscreen"],"deny":[]}},"allow-is-maximizable":{"identifier":"allow-is-maximizable","description":"Enables the is_maximizable command without any pre-configured scope.","commands":{"allow":["is_maximizable"],"deny":[]}},"allow-is-maximized":{"identifier":"allow-is-maximized","description":"Enables the is_maximized command without any pre-configured scope.","commands":{"allow":["is_maximized"],"deny":[]}},"allow-is-minimizable":{"identifier":"allow-is-minimizable","description":"Enables the is_minimizable command without any pre-configured scope.","commands":{"allow":["is_minimizable"],"deny":[]}},"allow-is-minimized":{"identifier":"allow-is-minimized","description":"Enables the is_minimized command without any pre-configured scope.","commands":{"allow":["is_minimized"],"deny":[]}},"allow-is-resizable":{"identifier":"allow-is-resizable","description":"Enables the is_resizable command without any pre-configured scope.","commands":{"allow":["is_resizable"],"deny":[]}},"allow-is-visible":{"identifier":"allow-is-visible","description":"Enables the is_visible command without any pre-configured scope.","commands":{"allow":["is_visible"],"deny":[]}},"allow-maximize":{"identifier":"allow-maximize","description":"Enables the maximize command without any pre-configured scope.","commands":{"allow":["maximize"],"deny":[]}},"allow-minimize":{"identifier":"allow-minimize","description":"Enables the minimize command without any pre-configured scope.","commands":{"allow":["minimize"],"deny":[]}},"allow-monitor-from-point":{"identifier":"allow-monitor-from-point","description":"Enables the monitor_from_point command without any pre-configured scope.","commands":{"allow":["monitor_from_point"],"deny":[]}},"allow-outer-position":{"identifier":"allow-outer-position","description":"Enables the outer_position command without any pre-configured scope.","commands":{"allow":["outer_position"],"deny":[]}},"allow-outer-size":{"identifier":"allow-outer-size","description":"Enables the outer_size command without any pre-configured scope.","commands":{"allow":["outer_size"],"deny":[]}},"allow-primary-monitor":{"identifier":"allow-primary-monitor","description":"Enables the primary_monitor command without any pre-configured scope.","commands":{"allow":["primary_monitor"],"deny":[]}},"allow-request-user-attention":{"identifier":"allow-request-user-attention","description":"Enables the request_user_attention command without any pre-configured scope.","commands":{"allow":["request_user_attention"],"deny":[]}},"allow-scale-factor":{"identifier":"allow-scale-factor","description":"Enables the scale_factor command without any pre-configured scope.","commands":{"allow":["scale_factor"],"deny":[]}},"allow-set-always-on-bottom":{"identifier":"allow-set-always-on-bottom","description":"Enables the set_always_on_bottom command without any pre-configured scope.","commands":{"allow":["set_always_on_bottom"],"deny":[]}},"allow-set-always-on-top":{"identifier":"allow-set-always-on-top","description":"Enables the set_always_on_top command without any pre-configured scope.","commands":{"allow":["set_always_on_top"],"deny":[]}},"allow-set-background-color":{"identifier":"allow-set-background-color","description":"Enables the set_background_color command without any pre-configured scope.","commands":{"allow":["set_background_color"],"deny":[]}},"allow-set-badge-count":{"identifier":"allow-set-badge-count","description":"Enables the set_badge_count command without any pre-configured scope.","commands":{"allow":["set_badge_count"],"deny":[]}},"allow-set-badge-label":{"identifier":"allow-set-badge-label","description":"Enables the set_badge_label command without any pre-configured scope.","commands":{"allow":["set_badge_label"],"deny":[]}},"allow-set-closable":{"identifier":"allow-set-closable","description":"Enables the set_closable command without any pre-configured scope.","commands":{"allow":["set_closable"],"deny":[]}},"allow-set-content-protected":{"identifier":"allow-set-content-protected","description":"Enables the set_content_protected command without any pre-configured scope.","commands":{"allow":["set_content_protected"],"deny":[]}},"allow-set-cursor-grab":{"identifier":"allow-set-cursor-grab","description":"Enables the set_cursor_grab command without any pre-configured scope.","commands":{"allow":["set_cursor_grab"],"deny":[]}},"allow-set-cursor-icon":{"identifier":"allow-set-cursor-icon","description":"Enables the set_cursor_icon command without any pre-configured scope.","commands":{"allow":["set_cursor_icon"],"deny":[]}},"allow-set-cursor-position":{"identifier":"allow-set-cursor-position","description":"Enables the set_cursor_position command without any pre-configured scope.","commands":{"allow":["set_cursor_position"],"deny":[]}},"allow-set-cursor-visible":{"identifier":"allow-set-cursor-visible","description":"Enables the set_cursor_visible command without any pre-configured scope.","commands":{"allow":["set_cursor_visible"],"deny":[]}},"allow-set-decorations":{"identifier":"allow-set-decorations","description":"Enables the set_decorations command without any pre-configured scope.","commands":{"allow":["set_decorations"],"deny":[]}},"allow-set-effects":{"identifier":"allow-set-effects","description":"Enables the set_effects command without any pre-configured scope.","commands":{"allow":["set_effects"],"deny":[]}},"allow-set-enabled":{"identifier":"allow-set-enabled","description":"Enables the set_enabled command without any pre-configured scope.","commands":{"allow":["set_enabled"],"deny":[]}},"allow-set-focus":{"identifier":"allow-set-focus","description":"Enables the set_focus command without any pre-configured scope.","commands":{"allow":["set_focus"],"deny":[]}},"allow-set-focusable":{"identifier":"allow-set-focusable","description":"Enables the set_focusable command without any pre-configured scope.","commands":{"allow":["set_focusable"],"deny":[]}},"allow-set-fullscreen":{"identifier":"allow-set-fullscreen","description":"Enables the set_fullscreen command without any pre-configured scope.","commands":{"allow":["set_fullscreen"],"deny":[]}},"allow-set-icon":{"identifier":"allow-set-icon","description":"Enables the set_icon command without any pre-configured scope.","commands":{"allow":["set_icon"],"deny":[]}},"allow-set-ignore-cursor-events":{"identifier":"allow-set-ignore-cursor-events","description":"Enables the set_ignore_cursor_events command without any pre-configured scope.","commands":{"allow":["set_ignore_cursor_events"],"deny":[]}},"allow-set-max-size":{"identifier":"allow-set-max-size","description":"Enables the set_max_size command without any pre-configured scope.","commands":{"allow":["set_max_size"],"deny":[]}},"allow-set-maximizable":{"identifier":"allow-set-maximizable","description":"Enables the set_maximizable command without any pre-configured scope.","commands":{"allow":["set_maximizable"],"deny":[]}},"allow-set-min-size":{"identifier":"allow-set-min-size","description":"Enables the set_min_size command without any pre-configured scope.","commands":{"allow":["set_min_size"],"deny":[]}},"allow-set-minimizable":{"identifier":"allow-set-minimizable","description":"Enables the set_minimizable command without any pre-configured scope.","commands":{"allow":["set_minimizable"],"deny":[]}},"allow-set-overlay-icon":{"identifier":"allow-set-overlay-icon","description":"Enables the set_overlay_icon command without any pre-configured scope.","commands":{"allow":["set_overlay_icon"],"deny":[]}},"allow-set-position":{"identifier":"allow-set-position","description":"Enables the set_position command without any pre-configured scope.","commands":{"allow":["set_position"],"deny":[]}},"allow-set-progress-bar":{"identifier":"allow-set-progress-bar","description":"Enables the set_progress_bar command without any pre-configured scope.","commands":{"allow":["set_progress_bar"],"deny":[]}},"allow-set-resizable":{"identifier":"allow-set-resizable","description":"Enables the set_resizable command without any pre-configured scope.","commands":{"allow":["set_resizable"],"deny":[]}},"allow-set-shadow":{"identifier":"allow-set-shadow","description":"Enables the set_shadow command without any pre-configured scope.","commands":{"allow":["set_shadow"],"deny":[]}},"allow-set-simple-fullscreen":{"identifier":"allow-set-simple-fullscreen","description":"Enables the set_simple_fullscreen command without any pre-configured scope.","commands":{"allow":["set_simple_fullscreen"],"deny":[]}},"allow-set-size":{"identifier":"allow-set-size","description":"Enables the set_size command without any pre-configured scope.","commands":{"allow":["set_size"],"deny":[]}},"allow-set-size-constraints":{"identifier":"allow-set-size-constraints","description":"Enables the set_size_constraints command without any pre-configured scope.","commands":{"allow":["set_size_constraints"],"deny":[]}},"allow-set-skip-taskbar":{"identifier":"allow-set-skip-taskbar","description":"Enables the set_skip_taskbar command without any pre-configured scope.","commands":{"allow":["set_skip_taskbar"],"deny":[]}},"allow-set-theme":{"identifier":"allow-set-theme","description":"Enables the set_theme command without any pre-configured scope.","commands":{"allow":["set_theme"],"deny":[]}},"allow-set-title":{"identifier":"allow-set-title","description":"Enables the set_title command without any pre-configured scope.","commands":{"allow":["set_title"],"deny":[]}},"allow-set-title-bar-style":{"identifier":"allow-set-title-bar-style","description":"Enables the set_title_bar_style command without any pre-configured scope.","commands":{"allow":["set_title_bar_style"],"deny":[]}},"allow-set-visible-on-all-workspaces":{"identifier":"allow-set-visible-on-all-workspaces","description":"Enables the set_visible_on_all_workspaces command without any pre-configured scope.","commands":{"allow":["set_visible_on_all_workspaces"],"deny":[]}},"allow-show":{"identifier":"allow-show","description":"Enables the show command without any pre-configured scope.","commands":{"allow":["show"],"deny":[]}},"allow-start-dragging":{"identifier":"allow-start-dragging","description":"Enables the start_dragging command without any pre-configured scope.","commands":{"allow":["start_dragging"],"deny":[]}},"allow-start-resize-dragging":{"identifier":"allow-start-resize-dragging","description":"Enables the start_resize_dragging command without any pre-configured scope.","commands":{"allow":["start_resize_dragging"],"deny":[]}},"allow-theme":{"identifier":"allow-theme","description":"Enables the theme command without any pre-configured scope.","commands":{"allow":["theme"],"deny":[]}},"allow-title":{"identifier":"allow-title","description":"Enables the title command without any pre-configured scope.","commands":{"allow":["title"],"deny":[]}},"allow-toggle-maximize":{"identifier":"allow-toggle-maximize","description":"Enables the toggle_maximize command without any pre-configured scope.","commands":{"allow":["toggle_maximize"],"deny":[]}},"allow-unmaximize":{"identifier":"allow-unmaximize","description":"Enables the unmaximize command without any pre-configured scope.","commands":{"allow":["unmaximize"],"deny":[]}},"allow-unminimize":{"identifier":"allow-unminimize","description":"Enables the unminimize command without any pre-configured scope.","commands":{"allow":["unminimize"],"deny":[]}},"deny-available-monitors":{"identifier":"deny-available-monitors","description":"Denies the available_monitors command without any pre-configured scope.","commands":{"allow":[],"deny":["available_monitors"]}},"deny-center":{"identifier":"deny-center","description":"Denies the center command without any pre-configured scope.","commands":{"allow":[],"deny":["center"]}},"deny-close":{"identifier":"deny-close","description":"Denies the close command without any pre-configured scope.","commands":{"allow":[],"deny":["close"]}},"deny-create":{"identifier":"deny-create","description":"Denies the create command without any pre-configured scope.","commands":{"allow":[],"deny":["create"]}},"deny-current-monitor":{"identifier":"deny-current-monitor","description":"Denies the current_monitor command without any pre-configured scope.","commands":{"allow":[],"deny":["current_monitor"]}},"deny-cursor-position":{"identifier":"deny-cursor-position","description":"Denies the cursor_position command without any pre-configured scope.","commands":{"allow":[],"deny":["cursor_position"]}},"deny-destroy":{"identifier":"deny-destroy","description":"Denies the destroy command without any pre-configured scope.","commands":{"allow":[],"deny":["destroy"]}},"deny-get-all-windows":{"identifier":"deny-get-all-windows","description":"Denies the get_all_windows command without any pre-configured scope.","commands":{"allow":[],"deny":["get_all_windows"]}},"deny-hide":{"identifier":"deny-hide","description":"Denies the hide command without any pre-configured scope.","commands":{"allow":[],"deny":["hide"]}},"deny-inner-position":{"identifier":"deny-inner-position","description":"Denies the inner_position command without any pre-configured scope.","commands":{"allow":[],"deny":["inner_position"]}},"deny-inner-size":{"identifier":"deny-inner-size","description":"Denies the inner_size command without any pre-configured scope.","commands":{"allow":[],"deny":["inner_size"]}},"deny-internal-toggle-maximize":{"identifier":"deny-internal-toggle-maximize","description":"Denies the internal_toggle_maximize command without any pre-configured scope.","commands":{"allow":[],"deny":["internal_toggle_maximize"]}},"deny-is-always-on-top":{"identifier":"deny-is-always-on-top","description":"Denies the is_always_on_top command without any pre-configured scope.","commands":{"allow":[],"deny":["is_always_on_top"]}},"deny-is-closable":{"identifier":"deny-is-closable","description":"Denies the is_closable command without any pre-configured scope.","commands":{"allow":[],"deny":["is_closable"]}},"deny-is-decorated":{"identifier":"deny-is-decorated","description":"Denies the is_decorated command without any pre-configured scope.","commands":{"allow":[],"deny":["is_decorated"]}},"deny-is-enabled":{"identifier":"deny-is-enabled","description":"Denies the is_enabled command without any pre-configured scope.","commands":{"allow":[],"deny":["is_enabled"]}},"deny-is-focused":{"identifier":"deny-is-focused","description":"Denies the is_focused command without any pre-configured scope.","commands":{"allow":[],"deny":["is_focused"]}},"deny-is-fullscreen":{"identifier":"deny-is-fullscreen","description":"Denies the is_fullscreen command without any pre-configured scope.","commands":{"allow":[],"deny":["is_fullscreen"]}},"deny-is-maximizable":{"identifier":"deny-is-maximizable","description":"Denies the is_maximizable command without any pre-configured scope.","commands":{"allow":[],"deny":["is_maximizable"]}},"deny-is-maximized":{"identifier":"deny-is-maximized","description":"Denies the is_maximized command without any pre-configured scope.","commands":{"allow":[],"deny":["is_maximized"]}},"deny-is-minimizable":{"identifier":"deny-is-minimizable","description":"Denies the is_minimizable command without any pre-configured scope.","commands":{"allow":[],"deny":["is_minimizable"]}},"deny-is-minimized":{"identifier":"deny-is-minimized","description":"Denies the is_minimized command without any pre-configured scope.","commands":{"allow":[],"deny":["is_minimized"]}},"deny-is-resizable":{"identifier":"deny-is-resizable","description":"Denies the is_resizable command without any pre-configured scope.","commands":{"allow":[],"deny":["is_resizable"]}},"deny-is-visible":{"identifier":"deny-is-visible","description":"Denies the is_visible command without any pre-configured scope.","commands":{"allow":[],"deny":["is_visible"]}},"deny-maximize":{"identifier":"deny-maximize","description":"Denies the maximize command without any pre-configured scope.","commands":{"allow":[],"deny":["maximize"]}},"deny-minimize":{"identifier":"deny-minimize","description":"Denies the minimize command without any pre-configured scope.","commands":{"allow":[],"deny":["minimize"]}},"deny-monitor-from-point":{"identifier":"deny-monitor-from-point","description":"Denies the monitor_from_point command without any pre-configured scope.","commands":{"allow":[],"deny":["monitor_from_point"]}},"deny-outer-position":{"identifier":"deny-outer-position","description":"Denies the outer_position command without any pre-configured scope.","commands":{"allow":[],"deny":["outer_position"]}},"deny-outer-size":{"identifier":"deny-outer-size","description":"Denies the outer_size command without any pre-configured scope.","commands":{"allow":[],"deny":["outer_size"]}},"deny-primary-monitor":{"identifier":"deny-primary-monitor","description":"Denies the primary_monitor command without any pre-configured scope.","commands":{"allow":[],"deny":["primary_monitor"]}},"deny-request-user-attention":{"identifier":"deny-request-user-attention","description":"Denies the request_user_attention command without any pre-configured scope.","commands":{"allow":[],"deny":["request_user_attention"]}},"deny-scale-factor":{"identifier":"deny-scale-factor","description":"Denies the scale_factor command without any pre-configured scope.","commands":{"allow":[],"deny":["scale_factor"]}},"deny-set-always-on-bottom":{"identifier":"deny-set-always-on-bottom","description":"Denies the set_always_on_bottom command without any pre-configured scope.","commands":{"allow":[],"deny":["set_always_on_bottom"]}},"deny-set-always-on-top":{"identifier":"deny-set-always-on-top","description":"Denies the set_always_on_top command without any pre-configured scope.","commands":{"allow":[],"deny":["set_always_on_top"]}},"deny-set-background-color":{"identifier":"deny-set-background-color","description":"Denies the set_background_color command without any pre-configured scope.","commands":{"allow":[],"deny":["set_background_color"]}},"deny-set-badge-count":{"identifier":"deny-set-badge-count","description":"Denies the set_badge_count command without any pre-configured scope.","commands":{"allow":[],"deny":["set_badge_count"]}},"deny-set-badge-label":{"identifier":"deny-set-badge-label","description":"Denies the set_badge_label command without any pre-configured scope.","commands":{"allow":[],"deny":["set_badge_label"]}},"deny-set-closable":{"identifier":"deny-set-closable","description":"Denies the set_closable command without any pre-configured scope.","commands":{"allow":[],"deny":["set_closable"]}},"deny-set-content-protected":{"identifier":"deny-set-content-protected","description":"Denies the set_content_protected command without any pre-configured scope.","commands":{"allow":[],"deny":["set_content_protected"]}},"deny-set-cursor-grab":{"identifier":"deny-set-cursor-grab","description":"Denies the set_cursor_grab command without any pre-configured scope.","commands":{"allow":[],"deny":["set_cursor_grab"]}},"deny-set-cursor-icon":{"identifier":"deny-set-cursor-icon","description":"Denies the set_cursor_icon command without any pre-configured scope.","commands":{"allow":[],"deny":["set_cursor_icon"]}},"deny-set-cursor-position":{"identifier":"deny-set-cursor-position","description":"Denies the set_cursor_position command without any pre-configured scope.","commands":{"allow":[],"deny":["set_cursor_position"]}},"deny-set-cursor-visible":{"identifier":"deny-set-cursor-visible","description":"Denies the set_cursor_visible command without any pre-configured scope.","commands":{"allow":[],"deny":["set_cursor_visible"]}},"deny-set-decorations":{"identifier":"deny-set-decorations","description":"Denies the set_decorations command without any pre-configured scope.","commands":{"allow":[],"deny":["set_decorations"]}},"deny-set-effects":{"identifier":"deny-set-effects","description":"Denies the set_effects command without any pre-configured scope.","commands":{"allow":[],"deny":["set_effects"]}},"deny-set-enabled":{"identifier":"deny-set-enabled","description":"Denies the set_enabled command without any pre-configured scope.","commands":{"allow":[],"deny":["set_enabled"]}},"deny-set-focus":{"identifier":"deny-set-focus","description":"Denies the set_focus command without any pre-configured scope.","commands":{"allow":[],"deny":["set_focus"]}},"deny-set-focusable":{"identifier":"deny-set-focusable","description":"Denies the set_focusable command without any pre-configured scope.","commands":{"allow":[],"deny":["set_focusable"]}},"deny-set-fullscreen":{"identifier":"deny-set-fullscreen","description":"Denies the set_fullscreen command without any pre-configured scope.","commands":{"allow":[],"deny":["set_fullscreen"]}},"deny-set-icon":{"identifier":"deny-set-icon","description":"Denies the set_icon command without any pre-configured scope.","commands":{"allow":[],"deny":["set_icon"]}},"deny-set-ignore-cursor-events":{"identifier":"deny-set-ignore-cursor-events","description":"Denies the set_ignore_cursor_events command without any pre-configured scope.","commands":{"allow":[],"deny":["set_ignore_cursor_events"]}},"deny-set-max-size":{"identifier":"deny-set-max-size","description":"Denies the set_max_size command without any pre-configured scope.","commands":{"allow":[],"deny":["set_max_size"]}},"deny-set-maximizable":{"identifier":"deny-set-maximizable","description":"Denies the set_maximizable command without any pre-configured scope.","commands":{"allow":[],"deny":["set_maximizable"]}},"deny-set-min-size":{"identifier":"deny-set-min-size","description":"Denies the set_min_size command without any pre-configured scope.","commands":{"allow":[],"deny":["set_min_size"]}},"deny-set-minimizable":{"identifier":"deny-set-minimizable","description":"Denies the set_minimizable command without any pre-configured scope.","commands":{"allow":[],"deny":["set_minimizable"]}},"deny-set-overlay-icon":{"identifier":"deny-set-overlay-icon","description":"Denies the set_overlay_icon command without any pre-configured scope.","commands":{"allow":[],"deny":["set_overlay_icon"]}},"deny-set-position":{"identifier":"deny-set-position","description":"Denies the set_position command without any pre-configured scope.","commands":{"allow":[],"deny":["set_position"]}},"deny-set-progress-bar":{"identifier":"deny-set-progress-bar","description":"Denies the set_progress_bar command without any pre-configured scope.","commands":{"allow":[],"deny":["set_progress_bar"]}},"deny-set-resizable":{"identifier":"deny-set-resizable","description":"Denies the set_resizable command without any pre-configured scope.","commands":{"allow":[],"deny":["set_resizable"]}},"deny-set-shadow":{"identifier":"deny-set-shadow","description":"Denies the set_shadow command without any pre-configured scope.","commands":{"allow":[],"deny":["set_shadow"]}},"deny-set-simple-fullscreen":{"identifier":"deny-set-simple-fullscreen","description":"Denies the set_simple_fullscreen command without any pre-configured scope.","commands":{"allow":[],"deny":["set_simple_fullscreen"]}},"deny-set-size":{"identifier":"deny-set-size","description":"Denies the set_size command without any pre-configured scope.","commands":{"allow":[],"deny":["set_size"]}},"deny-set-size-constraints":{"identifier":"deny-set-size-constraints","description":"Denies the set_size_constraints command without any pre-configured scope.","commands":{"allow":[],"deny":["set_size_constraints"]}},"deny-set-skip-taskbar":{"identifier":"deny-set-skip-taskbar","description":"Denies the set_skip_taskbar command without any pre-configured scope.","commands":{"allow":[],"deny":["set_skip_taskbar"]}},"deny-set-theme":{"identifier":"deny-set-theme","description":"Denies the set_theme command without any pre-configured scope.","commands":{"allow":[],"deny":["set_theme"]}},"deny-set-title":{"identifier":"deny-set-title","description":"Denies the set_title command without any pre-configured scope.","commands":{"allow":[],"deny":["set_title"]}},"deny-set-title-bar-style":{"identifier":"deny-set-title-bar-style","description":"Denies the set_title_bar_style command without any pre-configured scope.","commands":{"allow":[],"deny":["set_title_bar_style"]}},"deny-set-visible-on-all-workspaces":{"identifier":"deny-set-visible-on-all-workspaces","description":"Denies the set_visible_on_all_workspaces command without any pre-configured scope.","commands":{"allow":[],"deny":["set_visible_on_all_workspaces"]}},"deny-show":{"identifier":"deny-show","description":"Denies the show command without any pre-configured scope.","commands":{"allow":[],"deny":["show"]}},"deny-start-dragging":{"identifier":"deny-start-dragging","description":"Denies the start_dragging command without any pre-configured scope.","commands":{"allow":[],"deny":["start_dragging"]}},"deny-start-resize-dragging":{"identifier":"deny-start-resize-dragging","description":"Denies the start_resize_dragging command without any pre-configured scope.","commands":{"allow":[],"deny":["start_resize_dragging"]}},"deny-theme":{"identifier":"deny-theme","description":"Denies the theme command without any pre-configured scope.","commands":{"allow":[],"deny":["theme"]}},"deny-title":{"identifier":"deny-title","description":"Denies the title command without any pre-configured scope.","commands":{"allow":[],"deny":["title"]}},"deny-toggle-maximize":{"identifier":"deny-toggle-maximize","description":"Denies the toggle_maximize command without any pre-configured scope.","commands":{"allow":[],"deny":["toggle_maximize"]}},"deny-unmaximize":{"identifier":"deny-unmaximize","description":"Denies the unmaximize command without any pre-configured scope.","commands":{"allow":[],"deny":["unmaximize"]}},"deny-unminimize":{"identifier":"deny-unminimize","description":"Denies the unminimize command without any pre-configured scope.","commands":{"allow":[],"deny":["unminimize"]}}},"permission_sets":{},"global_scope_schema":null},"shell":{"default_permission":{"identifier":"default","description":"This permission set configures which\nshell functionality is exposed by default.\n\n#### Granted Permissions\n\nIt allows to use the `open` functionality with a reasonable\nscope pre-configured. It will allow opening `http(s)://`,\n`tel:` and `mailto:` links.\n","permissions":["allow-open"]},"permissions":{"allow-execute":{"identifier":"allow-execute","description":"Enables the execute command without any pre-configured scope.","commands":{"allow":["execute"],"deny":[]}},"allow-kill":{"identifier":"allow-kill","description":"Enables the kill command without any pre-configured scope.","commands":{"allow":["kill"],"deny":[]}},"allow-open":{"identifier":"allow-open","description":"Enables the open command without any pre-configured scope.","commands":{"allow":["open"],"deny":[]}},"allow-spawn":{"identifier":"allow-spawn","description":"Enables the spawn command without any pre-configured scope.","commands":{"allow":["spawn"],"deny":[]}},"allow-stdin-write":{"identifier":"allow-stdin-write","description":"Enables the stdin_write command without any pre-configured scope.","commands":{"allow":["stdin_write"],"deny":[]}},"deny-execute":{"identifier":"deny-execute","description":"Denies the execute command without any pre-configured scope.","commands":{"allow":[],"deny":["execute"]}},"deny-kill":{"identifier":"deny-kill","description":"Denies the kill command without any pre-configured scope.","commands":{"allow":[],"deny":["kill"]}},"deny-open":{"identifier":"deny-open","description":"Denies the open command without any pre-configured scope.","commands":{"allow":[],"deny":["open"]}},"deny-spawn":{"identifier":"deny-spawn","description":"Denies the spawn command without any pre-configured scope.","commands":{"allow":[],"deny":["spawn"]}},"deny-stdin-write":{"identifier":"deny-stdin-write","description":"Denies the stdin_write command without any pre-configured scope.","commands":{"allow":[],"deny":["stdin_write"]}}},"permission_sets":{},"global_scope_schema":{"$schema":"http://json-schema.org/draft-07/schema#","anyOf":[{"additionalProperties":false,"properties":{"args":{"allOf":[{"$ref":"#/definitions/ShellScopeEntryAllowedArgs"}],"description":"The allowed arguments for the command execution."},"cmd":{"description":"The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.","type":"string"},"name":{"description":"The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.","type":"string"}},"required":["cmd","name"],"type":"object"},{"additionalProperties":false,"properties":{"args":{"allOf":[{"$ref":"#/definitions/ShellScopeEntryAllowedArgs"}],"description":"The allowed arguments for the command execution."},"name":{"description":"The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.","type":"string"},"sidecar":{"description":"If this command is a sidecar command.","type":"boolean"}},"required":["name","sidecar"],"type":"object"}],"definitions":{"ShellScopeEntryAllowedArg":{"anyOf":[{"description":"A non-configurable argument that is passed to the command in the order it was specified.","type":"string"},{"additionalProperties":false,"description":"A variable that is set while calling the command from the webview API.","properties":{"raw":{"default":false,"description":"Marks the validator as a raw regex, meaning the plugin should not make any modification at runtime.\n\nThis means the regex will not match on the entire string by default, which might be exploited if your regex allow unexpected input to be considered valid. When using this option, make sure your regex is correct.","type":"boolean"},"validator":{"description":"[regex] validator to require passed values to conform to an expected input.\n\nThis will require the argument value passed to this variable to match the `validator` regex before it will be executed.\n\nThe regex string is by default surrounded by `^...$` to match the full string. For example the `https?://\\w+` regex would be registered as `^https?://\\w+$`.\n\n[regex]: <https://docs.rs/regex/latest/regex/#syntax>","type":"string"}},"required":["validator"],"type":"object"}],"description":"A command argument allowed to be executed by the webview API."},"ShellScopeEntryAllowedArgs":{"anyOf":[{"description":"Use a simple boolean to allow all or disable all arguments to this command configuration.","type":"boolean"},{"description":"A specific set of [`ShellScopeEntryAllowedArg`] that are valid to call for the command configuration.","items":{"$ref":"#/definitions/ShellScopeEntryAllowedArg"},"type":"array"}],"description":"A set of command arguments allowed to be executed by the webview API.\n\nA value of `true` will allow any arguments to be passed to the command. `false` will disable all arguments. A list of [`ShellScopeEntryAllowedArg`] will set those arguments as the only valid arguments to be passed to the attached command configuration."}},"description":"Shell scope entry.","title":"ShellScopeEntry"}},"window-state":{"default_permission":{"identifier":"default","description":"This permission set configures what kind of\noperations are available from the window state plugin.\n\n#### Granted Permissions\n\nAll operations are enabled by default.\n\n","permissions":["allow-filename","allow-restore-state","allow-save-window-state"]},"permissions":{"allow-filename":{"identifier":"allow-filename","description":"Enables the filename command without any pre-configured scope.","commands":{"allow":["filename"],"deny":[]}},"allow-restore-state":{"identifier":"allow-restore-state","description":"Enables the restore_state command without any pre-configured scope.","commands":{"allow":["restore_state"],"deny":[]}},"allow-save-window-state":{"identifier":"allow-save-window-state","description":"Enables the save_window_state command without any pre-configured scope.","commands":{"allow":["save_window_state"],"deny":[]}},"deny-filename":{"identifier":"deny-filename","description":"Denies the filename command without any pre-configured scope.","commands":{"allow":[],"deny":["filename"]}},"deny-restore-state":{"identifier":"deny-restore-state","description":"Denies the restore_state command without any pre-configured scope.","commands":{"allow":[],"deny":["restore_state"]}},"deny-save-window-state":{"identifier":"deny-save-window-state","description":"Denies the save_window_state command without any pre-configured scope.","commands":{"allow":[],"deny":["save_window_state"]}}},"permission_sets":{},"global_scope_schema":null}}

================================================
FILE: desktop/src-tauri/gen/schemas/capabilities.json
================================================
{}

================================================
FILE: desktop/src-tauri/gen/schemas/desktop-schema.json
================================================
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "CapabilityFile",
  "description": "Capability formats accepted in a capability file.",
  "anyOf": [
    {
      "description": "A single capability.",
      "allOf": [
        {
          "$ref": "#/definitions/Capability"
        }
      ]
    },
    {
      "description": "A list of capabilities.",
      "type": "array",
      "items": {
        "$ref": "#/definitions/Capability"
      }
    },
    {
      "description": "A list of capabilities.",
      "type": "object",
      "required": [
        "capabilities"
      ],
      "properties": {
        "capabilities": {
          "description": "The list of capabilities.",
          "type": "array",
          "items": {
            "$ref": "#/definitions/Capability"
          }
        }
      }
    }
  ],
  "definitions": {
    "Capability": {
      "description": "A grouping and boundary mechanism developers can use to isolate access to the IPC layer.\n\nIt controls application windows' and webviews' fine grained access to the Tauri core, application, or plugin commands. If a webview or its window is not matching any capability then it has no access to the IPC layer at all.\n\nThis can be done to create groups of windows, based on their required system access, which can reduce impact of frontend vulnerabilities in less privileged windows. Windows can be added to a capability by exact name (e.g. `main-window`) or glob patterns like `*` or `admin-*`. A Window can have none, one, or multiple associated capabilities.\n\n## Example\n\n```json { \"identifier\": \"main-user-files-write\", \"description\": \"This capability allows the `main` window on macOS and Windows access to `filesystem` write related commands and `dialog` commands to enable programmatic access to files selected by the user.\", \"windows\": [ \"main\" ], \"permissions\": [ \"core:default\", \"dialog:open\", { \"identifier\": \"fs:allow-write-text-file\", \"allow\": [{ \"path\": \"$HOME/test.txt\" }] }, ], \"platforms\": [\"macOS\",\"windows\"] } ```",
      "type": "object",
      "required": [
        "identifier",
        "permissions"
      ],
      "properties": {
        "identifier": {
          "description": "Identifier of the capability.\n\n## Example\n\n`main-user-files-write`",
          "type": "string"
        },
        "description": {
          "description": "Description of what the capability is intended to allow on associated windows.\n\nIt should contain a description of what the grouped permissions should allow.\n\n## Example\n\nThis capability allows the `main` window access to `filesystem` write related commands and `dialog` commands to enable programmatic access to files selected by the user.",
          "default": "",
          "type": "string"
        },
        "remote": {
          "description": "Configure remote URLs that can use the capability permissions.\n\nThis setting is optional and defaults to not being set, as our default use case is that the content is served from our local application.\n\n:::caution Make sure you understand the security implications of providing remote sources with local system access. :::\n\n## Example\n\n```json { \"urls\": [\"https://*.mydomain.dev\"] } ```",
          "anyOf": [
            {
              "$ref": "#/definitions/CapabilityRemote"
            },
            {
              "type": "null"
            }
          ]
        },
        "local": {
          "description": "Whether this capability is enabled for local app URLs or not. Defaults to `true`.",
          "default": true,
          "type": "boolean"
        },
        "windows": {
          "description": "List of windows that are affected by this capability. Can be a glob pattern.\n\nIf a window label matches any of the patterns in this list, the capability will be enabled on all the webviews of that window, regardless of the value of [`Self::webviews`].\n\nOn multiwebview windows, prefer specifying [`Self::webviews`] and omitting [`Self::windows`] for a fine grained access control.\n\n## Example\n\n`[\"main\"]`",
          "type": "array",
          "items": {
            "type": "string"
          }
        },
        "webviews": {
          "description": "List of webviews that are affected by this capability. Can be a glob pattern.\n\nThe capability will be enabled on all the webviews whose label matches any of the patterns in this list, regardless of whether the webview's window label matches a pattern in [`Self::windows`].\n\n## Example\n\n`[\"sub-webview-one\", \"sub-webview-two\"]`",
          "type": "array",
          "items": {
            "type": "string"
          }
        },
        "permissions": {
          "description": "List of permissions attached to this capability.\n\nMust include the plugin name as prefix in the form of `${plugin-name}:${permission-name}`. For commands directly implemented in the application itself only `${permission-name}` is required.\n\n## Example\n\n```json [ \"core:default\", \"shell:allow-open\", \"dialog:open\", { \"identifier\": \"fs:allow-write-text-file\", \"allow\": [{ \"path\": \"$HOME/test.txt\" }] } ] ```",
          "type": "array",
          "items": {
            "$ref": "#/definitions/PermissionEntry"
          },
          "uniqueItems": true
        },
        "platforms": {
          "description": "Limit which target platforms this capability applies to.\n\nBy default all platforms are targeted.\n\n## Example\n\n`[\"macOS\",\"windows\"]`",
          "type": [
            "array",
            "null"
          ],
          "items": {
            "$ref": "#/definitions/Target"
          }
        }
      }
    },
    "CapabilityRemote": {
      "description": "Configuration for remote URLs that are associated with the capability.",
      "type": "object",
      "required": [
        "urls"
      ],
      "properties": {
        "urls": {
          "description": "Remote domains this capability refers to using the [URLPattern standard](https://urlpattern.spec.whatwg.org/).\n\n## Examples\n\n- \"https://*.mydomain.dev\": allows subdomains of mydomain.dev - \"https://mydomain.dev/api/*\": allows any subpath of mydomain.dev/api",
          "type": "array",
          "items": {
            "type": "string"
          }
        }
      }
    },
    "PermissionEntry": {
      "description": "An entry for a permission value in a [`Capability`] can be either a raw permission [`Identifier`] or an object that references a permission and extends its scope.",
      "anyOf": [
        {
          "description": "Reference a permission or permission set by identifier.",
          "allOf": [
            {
              "$ref": "#/definitions/Identifier"
            }
          ]
        },
        {
          "description": "Reference a permission or permission set by identifier and extends its scope.",
          "type": "object",
          "allOf": [
            {
              "if": {
                "properties": {
                  "identifier": {
                    "anyOf": [
                      {
                        "description": "This permission set configures which\nshell functionality is exposed by default.\n\n#### Granted Permissions\n\nIt allows to use the `open` functionality with a reasonable\nscope pre-configured. It will allow opening `http(s)://`,\n`tel:` and `mailto:` links.\n\n#### This default permission set includes:\n\n- `allow-open`",
                        "type": "string",
                        "const": "shell:default",
                        "markdownDescription": "This permission set configures which\nshell functionality is exposed by default.\n\n#### Granted Permissions\n\nIt allows to use the `open` functionality with a reasonable\nscope pre-configured. It will allow opening `http(s)://`,\n`tel:` and `mailto:` links.\n\n#### This default permission set includes:\n\n- `allow-open`"
                      },
                      {
                        "description": "Enables the execute command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-execute",
                        "markdownDescription": "Enables the execute command without any pre-configured scope."
                      },
                      {
                        "description": "Enables the kill command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-kill",
                        "markdownDescription": "Enables the kill command without any pre-configured scope."
                      },
                      {
                        "description": "Enables the open command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-open",
                        "markdownDescription": "Enables the open command without any pre-configured scope."
                      },
                      {
                        "description": "Enables the spawn command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-spawn",
                        "markdownDescription": "Enables the spawn command without any pre-configured scope."
                      },
                      {
                        "description": "Enables the stdin_write command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-stdin-write",
                        "markdownDescription": "Enables the stdin_write command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the execute command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-execute",
                        "markdownDescription": "Denies the execute command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the kill command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-kill",
                        "markdownDescription": "Denies the kill command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the open command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-open",
                        "markdownDescription": "Denies the open command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the spawn command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-spawn",
                        "markdownDescription": "Denies the spawn command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the stdin_write command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-stdin-write",
                        "markdownDescription": "Denies the stdin_write command without any pre-configured scope."
                      }
                    ]
                  }
                }
              },
              "then": {
                "properties": {
                  "allow": {
                    "items": {
                      "title": "ShellScopeEntry",
                      "description": "Shell scope entry.",
                      "anyOf": [
                        {
                          "type": "object",
                          "required": [
                            "cmd",
                            "name"
                          ],
                          "properties": {
                            "args": {
                              "description": "The allowed arguments for the command execution.",
                              "allOf": [
                                {
                                  "$ref": "#/definitions/ShellScopeEntryAllowedArgs"
                                }
                              ]
                            },
                            "cmd": {
                              "description": "The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.",
                              "type": "string"
                            },
                            "name": {
                              "description": "The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.",
                              "type": "string"
                            }
                          },
                          "additionalProperties": false
                        },
                        {
                          "type": "object",
                          "required": [
                            "name",
                            "sidecar"
                          ],
                          "properties": {
                            "args": {
                              "description": "The allowed arguments for the command execution.",
                              "allOf": [
                                {
                                  "$ref": "#/definitions/ShellScopeEntryAllowedArgs"
                                }
                              ]
                            },
                            "name": {
                              "description": "The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.",
                              "type": "string"
                            },
                            "sidecar": {
                              "description": "If this command is a sidecar command.",
                              "type": "boolean"
                            }
                          },
                          "additionalProperties": false
                        }
                      ]
                    }
                  },
                  "deny": {
                    "items": {
                      "title": "ShellScopeEntry",
                      "description": "Shell scope entry.",
                      "anyOf": [
                        {
                          "type": "object",
                          "required": [
                            "cmd",
                            "name"
                          ],
                          "properties": {
                            "args": {
                              "description": "The allowed arguments for the command execution.",
                              "allOf": [
                                {
                                  "$ref": "#/definitions/ShellScopeEntryAllowedArgs"
                                }
                              ]
                            },
                            "cmd": {
                              "description": "The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.",
                              "type": "string"
                            },
                            "name": {
                              "description": "The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.",
                              "type": "string"
                            }
                          },
                          "additionalProperties": false
                        },
                        {
                          "type": "object",
                          "required": [
                            "name",
                            "sidecar"
                          ],
                          "properties": {
                            "args": {
                              "description": "The allowed arguments for the command execution.",
                              "allOf": [
                                {
                                  "$ref": "#/definitions/ShellScopeEntryAllowedArgs"
                                }
                              ]
                            },
                            "name": {
                              "description": "The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.",
                              "type": "string"
                            },
                            "sidecar": {
                              "description": "If this command is a sidecar command.",
                              "type": "boolean"
                            }
                          },
                          "additionalProperties": false
                        }
                      ]
                    }
                  }
                }
              },
              "properties": {
                "identifier": {
                  "description": "Identifier of the permission or permission set.",
                  "allOf": [
                    {
                      "$ref": "#/definitions/Identifier"
                    }
                  ]
                }
              }
            },
            {
              "properties": {
                "identifier": {
                  "description": "Identifier of the permission or permission set.",
                  "allOf": [
                    {
                      "$ref": "#/definitions/Identifier"
                    }
                  ]
                },
                "allow": {
                  "description": "Data that defines what is allowed by the scope.",
                  "type": [
                    "array",
                    "null"
                  ],
                  "items": {
                    "$ref": "#/definitions/Value"
                  }
                },
                "deny": {
                  "description": "Data that defines what is denied by the scope. This should be prioritized by validation logic.",
                  "type": [
                    "array",
                    "null"
                  ],
                  "items": {
                    "$ref": "#/definitions/Value"
                  }
                }
              }
            }
          ],
          "required": [
            "identifier"
          ]
        }
      ]
    },
    "Identifier": {
      "description": "Permission identifier",
      "oneOf": [
        {
          "description": "Default core plugins set.\n#### This default permission set includes:\n\n- `core:path:default`\n- `core:event:default`\n- `core:window:default`\n- `core:webview:default`\n- `core:app:default`\n- `core:image:default`\n- `core:resources:default`\n- `core:menu:default`\n- `core:tray:default`",
          "type": "string",
          "const": "core:default",
          "markdownDescription": "Default core plugins set.\n#### This default permission set includes:\n\n- `core:path:default`\n- `core:event:default`\n- `core:window:default`\n- `core:webview:default`\n- `core:app:default`\n- `core:image:default`\n- `core:resources:default`\n- `core:menu:default`\n- `core:tray:default`"
        },
        {
          "description": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-version`\n- `allow-name`\n- `allow-tauri-version`\n- `allow-identifier`\n- `allow-bundle-type`\n- `allow-register-listener`\n- `allow-remove-listener`",
          "type": "string",
          "const": "core:app:default",
          "markdownDescription": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-version`\n- `allow-name`\n- `allow-tauri-version`\n- `allow-identifier`\n- `allow-bundle-type`\n- `allow-register-listener`\n- `allow-remove-listener`"
        },
        {
          "description": "Enables the app_hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-app-hide",
          "markdownDescription": "Enables the app_hide command without any pre-configured scope."
        },
        {
          "description": "Enables the app_show command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-app-show",
          "markdownDescription": "Enables the app_show command without any pre-configured scope."
        },
        {
          "description": "Enables the bundle_type command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-bundle-type",
          "markdownDescription": "Enables the bundle_type command without any pre-configured scope."
        },
        {
          "description": "Enables the default_window_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-default-window-icon",
          "markdownDescription": "Enables the default_window_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the fetch_data_store_identifiers command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-fetch-data-store-identifiers",
          "markdownDescription": "Enables the fetch_data_store_identifiers command without any pre-configured scope."
        },
        {
          "description": "Enables the identifier command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-identifier",
          "markdownDescription": "Enables the identifier command without any pre-configured scope."
        },
        {
          "description": "Enables the name command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-name",
          "markdownDescription": "Enables the name command without any pre-configured scope."
        },
        {
          "description": "Enables the register_listener command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-register-listener",
          "markdownDescription": "Enables the register_listener command without any pre-configured scope."
        },
        {
          "description": "Enables the remove_data_store command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-remove-data-store",
          "markdownDescription": "Enables the remove_data_store command without any pre-configured scope."
        },
        {
          "description": "Enables the remove_listener command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-remove-listener",
          "markdownDescription": "Enables the remove_listener command without any pre-configured scope."
        },
        {
          "description": "Enables the set_app_theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-set-app-theme",
          "markdownDescription": "Enables the set_app_theme command without any pre-configured scope."
        },
        {
          "description": "Enables the set_dock_visibility command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-set-dock-visibility",
          "markdownDescription": "Enables the set_dock_visibility command without any pre-configured scope."
        },
        {
          "description": "Enables the tauri_version command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-tauri-version",
          "markdownDescription": "Enables the tauri_version command without any pre-configured scope."
        },
        {
          "description": "Enables the version command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-version",
          "markdownDescription": "Enables the version command without any pre-configured scope."
        },
        {
          "description": "Denies the app_hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-app-hide",
          "markdownDescription": "Denies the app_hide command without any pre-configured scope."
        },
        {
          "description": "Denies the app_show command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-app-show",
          "markdownDescription": "Denies the app_show command without any pre-configured scope."
        },
        {
          "description": "Denies the bundle_type command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-bundle-type",
          "markdownDescription": "Denies the bundle_type command without any pre-configured scope."
        },
        {
          "description": "Denies the default_window_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-default-window-icon",
          "markdownDescription": "Denies the default_window_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the fetch_data_store_identifiers command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-fetch-data-store-identifiers",
          "markdownDescription": "Denies the fetch_data_store_identifiers command without any pre-configured scope."
        },
        {
          "description": "Denies the identifier command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-identifier",
          "markdownDescription": "Denies the identifier command without any pre-configured scope."
        },
        {
          "description": "Denies the name command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-name",
          "markdownDescription": "Denies the name command without any pre-configured scope."
        },
        {
          "description": "Denies the register_listener command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-register-listener",
          "markdownDescription": "Denies the register_listener command without any pre-configured scope."
        },
        {
          "description": "Denies the remove_data_store command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-remove-data-store",
          "markdownDescription": "Denies the remove_data_store command without any pre-configured scope."
        },
        {
          "description": "Denies the remove_listener command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-remove-listener",
          "markdownDescription": "Denies the remove_listener command without any pre-configured scope."
        },
        {
          "description": "Denies the set_app_theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-set-app-theme",
          "markdownDescription": "Denies the set_app_theme command without any pre-configured scope."
        },
        {
          "description": "Denies the set_dock_visibility command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-set-dock-visibility",
          "markdownDescription": "Denies the set_dock_visibility command without any pre-configured scope."
        },
        {
          "description": "Denies the tauri_version command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-tauri-version",
          "markdownDescription": "Denies the tauri_version command without any pre-configured scope."
        },
        {
          "description": "Denies the version command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-version",
          "markdownDescription": "Denies the version command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-listen`\n- `allow-unlisten`\n- `allow-emit`\n- `allow-emit-to`",
          "type": "string",
          "const": "core:event:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-listen`\n- `allow-unlisten`\n- `allow-emit`\n- `allow-emit-to`"
        },
        {
          "description": "Enables the emit command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:allow-emit",
          "markdownDescription": "Enables the emit command without any pre-configured scope."
        },
        {
          "description": "Enables the emit_to command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:allow-emit-to",
          "markdownDescription": "Enables the emit_to command without any pre-configured scope."
        },
        {
          "description": "Enables the listen command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:allow-listen",
          "markdownDescription": "Enables the listen command without any pre-configured scope."
        },
        {
          "description": "Enables the unlisten command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:allow-unlisten",
          "markdownDescription": "Enables the unlisten command without any pre-configured scope."
        },
        {
          "description": "Denies the emit command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:deny-emit",
          "markdownDescription": "Denies the emit command without any pre-configured scope."
        },
        {
          "description": "Denies the emit_to command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:deny-emit-to",
          "markdownDescription": "Denies the emit_to command without any pre-configured scope."
        },
        {
          "description": "Denies the listen command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:deny-listen",
          "markdownDescription": "Denies the listen command without any pre-configured scope."
        },
        {
          "description": "Denies the unlisten command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:deny-unlisten",
          "markdownDescription": "Denies the unlisten command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-from-bytes`\n- `allow-from-path`\n- `allow-rgba`\n- `allow-size`",
          "type": "string",
          "const": "core:image:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-from-bytes`\n- `allow-from-path`\n- `allow-rgba`\n- `allow-size`"
        },
        {
          "description": "Enables the from_bytes command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-from-bytes",
          "markdownDescription": "Enables the from_bytes command without any pre-configured scope."
        },
        {
          "description": "Enables the from_path command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-from-path",
          "markdownDescription": "Enables the from_path command without any pre-configured scope."
        },
        {
          "description": "Enables the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-new",
          "markdownDescription": "Enables the new command without any pre-configured scope."
        },
        {
          "description": "Enables the rgba command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-rgba",
          "markdownDescription": "Enables the rgba command without any pre-configured scope."
        },
        {
          "description": "Enables the size command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-size",
          "markdownDescription": "Enables the size command without any pre-configured scope."
        },
        {
          "description": "Denies the from_bytes command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-from-bytes",
          "markdownDescription": "Denies the from_bytes command without any pre-configured scope."
        },
        {
          "description": "Denies the from_path command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-from-path",
          "markdownDescription": "Denies the from_path command without any pre-configured scope."
        },
        {
          "description": "Denies the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-new",
          "markdownDescription": "Denies the new command without any pre-configured scope."
        },
        {
          "description": "Denies the rgba command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-rgba",
          "markdownDescription": "Denies the rgba command without any pre-configured scope."
        },
        {
          "description": "Denies the size command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-size",
          "markdownDescription": "Denies the size command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-append`\n- `allow-prepend`\n- `allow-insert`\n- `allow-remove`\n- `allow-remove-at`\n- `allow-items`\n- `allow-get`\n- `allow-popup`\n- `allow-create-default`\n- `allow-set-as-app-menu`\n- `allow-set-as-window-menu`\n- `allow-text`\n- `allow-set-text`\n- `allow-is-enabled`\n- `allow-set-enabled`\n- `allow-set-accelerator`\n- `allow-set-as-windows-menu-for-nsapp`\n- `allow-set-as-help-menu-for-nsapp`\n- `allow-is-checked`\n- `allow-set-checked`\n- `allow-set-icon`",
          "type": "string",
          "const": "core:menu:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-append`\n- `allow-prepend`\n- `allow-insert`\n- `allow-remove`\n- `allow-remove-at`\n- `allow-items`\n- `allow-get`\n- `allow-popup`\n- `allow-create-default`\n- `allow-set-as-app-menu`\n- `allow-set-as-window-menu`\n- `allow-text`\n- `allow-set-text`\n- `allow-is-enabled`\n- `allow-set-enabled`\n- `allow-set-accelerator`\n- `allow-set-as-windows-menu-for-nsapp`\n- `allow-set-as-help-menu-for-nsapp`\n- `allow-is-checked`\n- `allow-set-checked`\n- `allow-set-icon`"
        },
        {
          "description": "Enables the append command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-append",
          "markdownDescription": "Enables the append command without any pre-configured scope."
        },
        {
          "description": "Enables the create_default command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-create-default",
          "markdownDescription": "Enables the create_default command without any pre-configured scope."
        },
        {
          "description": "Enables the get command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-get",
          "markdownDescription": "Enables the get command without any pre-configured scope."
        },
        {
          "description": "Enables the insert command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-insert",
          "markdownDescription": "Enables the insert command without any pre-configured scope."
        },
        {
          "description": "Enables the is_checked command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-is-checked",
          "markdownDescription": "Enables the is_checked command without any pre-configured scope."
        },
        {
          "description": "Enables the is_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-is-enabled",
          "markdownDescription": "Enables the is_enabled command without any pre-configured scope."
        },
        {
          "description": "Enables the items command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-items",
          "markdownDescription": "Enables the items command without any pre-configured scope."
        },
        {
          "description": "Enables the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-new",
          "markdownDescription": "Enables the new command without any pre-configured scope."
        },
        {
          "description": "Enables the popup command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-popup",
          "markdownDescription": "Enables the popup command without any pre-configured scope."
        },
        {
          "description": "Enables the prepend command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-prepend",
          "markdownDescription": "Enables the prepend command without any pre-configured scope."
        },
        {
          "description": "Enables the remove command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-remove",
          "markdownDescription": "Enables the remove command without any pre-configured scope."
        },
        {
          "description": "Enables the remove_at command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-remove-at",
          "markdownDescription": "Enables the remove_at command without any pre-configured scope."
        },
        {
          "description": "Enables the set_accelerator command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-accelerator",
          "markdownDescription": "Enables the set_accelerator command without any pre-configured scope."
        },
        {
          "description": "Enables the set_as_app_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-as-app-menu",
          "markdownDescription": "Enables the set_as_app_menu command without any pre-configured scope."
        },
        {
          "description": "Enables the set_as_help_menu_for_nsapp command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-as-help-menu-for-nsapp",
          "markdownDescription": "Enables the set_as_help_menu_for_nsapp command without any pre-configured scope."
        },
        {
          "description": "Enables the set_as_window_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-as-window-menu",
          "markdownDescription": "Enables the set_as_window_menu command without any pre-configured scope."
        },
        {
          "description": "Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-as-windows-menu-for-nsapp",
          "markdownDescription": "Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope."
        },
        {
          "description": "Enables the set_checked command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-checked",
          "markdownDescription": "Enables the set_checked command without any pre-configured scope."
        },
        {
          "description": "Enables the set_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-enabled",
          "markdownDescription": "Enables the set_enabled command without any pre-configured scope."
        },
        {
          "description": "Enables the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-icon",
          "markdownDescription": "Enables the set_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_text command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-text",
          "markdownDescription": "Enables the set_text command without any pre-configured scope."
        },
        {
          "description": "Enables the text command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-text",
          "markdownDescription": "Enables the text command without any pre-configured scope."
        },
        {
          "description": "Denies the append command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-append",
          "markdownDescription": "Denies the append command without any pre-configured scope."
        },
        {
          "description": "Denies the create_default command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-create-default",
          "markdownDescription": "Denies the create_default command without any pre-configured scope."
        },
        {
          "description": "Denies the get command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-get",
          "markdownDescription": "Denies the get command without any pre-configured scope."
        },
        {
          "description": "Denies the insert command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-insert",
          "markdownDescription": "Denies the insert command without any pre-configured scope."
        },
        {
          "description": "Denies the is_checked command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-is-checked",
          "markdownDescription": "Denies the is_checked command without any pre-configured scope."
        },
        {
          "description": "Denies the is_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-is-enabled",
          "markdownDescription": "Denies the is_enabled command without any pre-configured scope."
        },
        {
          "description": "Denies the items command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-items",
          "markdownDescription": "Denies the items command without any pre-configured scope."
        },
        {
          "description": "Denies the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-new",
          "markdownDescription": "Denies the new command without any pre-configured scope."
        },
        {
          "description": "Denies the popup command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-popup",
          "markdownDescription": "Denies the popup command without any pre-configured scope."
        },
        {
          "description": "Denies the prepend command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-prepend",
          "markdownDescription": "Denies the prepend command without any pre-configured scope."
        },
        {
          "description": "Denies the remove command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-remove",
          "markdownDescription": "Denies the remove command without any pre-configured scope."
        },
        {
          "description": "Denies the remove_at command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-remove-at",
          "markdownDescription": "Denies the remove_at command without any pre-configured scope."
        },
        {
          "description": "Denies the set_accelerator command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-accelerator",
          "markdownDescription": "Denies the set_accelerator command without any pre-configured scope."
        },
        {
          "description": "Denies the set_as_app_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-as-app-menu",
          "markdownDescription": "Denies the set_as_app_menu command without any pre-configured scope."
        },
        {
          "description": "Denies the set_as_help_menu_for_nsapp command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-as-help-menu-for-nsapp",
          "markdownDescription": "Denies the set_as_help_menu_for_nsapp command without any pre-configured scope."
        },
        {
          "description": "Denies the set_as_window_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-as-window-menu",
          "markdownDescription": "Denies the set_as_window_menu command without any pre-configured scope."
        },
        {
          "description": "Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-as-windows-menu-for-nsapp",
          "markdownDescription": "Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope."
        },
        {
          "description": "Denies the set_checked command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-checked",
          "markdownDescription": "Denies the set_checked command without any pre-configured scope."
        },
        {
          "description": "Denies the set_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-enabled",
          "markdownDescription": "Denies the set_enabled command without any pre-configured scope."
        },
        {
          "description": "Denies the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-icon",
          "markdownDescription": "Denies the set_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_text command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-text",
          "markdownDescription": "Denies the set_text command without any pre-configured scope."
        },
        {
          "description": "Denies the text command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-text",
          "markdownDescription": "Denies the text command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-resolve-directory`\n- `allow-resolve`\n- `allow-normalize`\n- `allow-join`\n- `allow-dirname`\n- `allow-extname`\n- `allow-basename`\n- `allow-is-absolute`",
          "type": "string",
          "const": "core:path:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-resolve-directory`\n- `allow-resolve`\n- `allow-normalize`\n- `allow-join`\n- `allow-dirname`\n- `allow-extname`\n- `allow-basename`\n- `allow-is-absolute`"
        },
        {
          "description": "Enables the basename command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-basename",
          "markdownDescription": "Enables the basename command without any pre-configured scope."
        },
        {
          "description": "Enables the dirname command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-dirname",
          "markdownDescription": "Enables the dirname command without any pre-configured scope."
        },
        {
          "description": "Enables the extname command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-extname",
          "markdownDescription": "Enables the extname command without any pre-configured scope."
        },
        {
          "description": "Enables the is_absolute command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-is-absolute",
          "markdownDescription": "Enables the is_absolute command without any pre-configured scope."
        },
        {
          "description": "Enables the join command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-join",
          "markdownDescription": "Enables the join command without any pre-configured scope."
        },
        {
          "description": "Enables the normalize command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-normalize",
          "markdownDescription": "Enables the normalize command without any pre-configured scope."
        },
        {
          "description": "Enables the resolve command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-resolve",
          "markdownDescription": "Enables the resolve command without any pre-configured scope."
        },
        {
          "description": "Enables the resolve_directory command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-resolve-directory",
          "markdownDescription": "Enables the resolve_directory command without any pre-configured scope."
        },
        {
          "description": "Denies the basename command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-basename",
          "markdownDescription": "Denies the basename command without any pre-configured scope."
        },
        {
          "description": "Denies the dirname command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-dirname",
          "markdownDescription": "Denies the dirname command without any pre-configured scope."
        },
        {
          "description": "Denies the extname command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-extname",
          "markdownDescription": "Denies the extname command without any pre-configured scope."
        },
        {
          "description": "Denies the is_absolute command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-is-absolute",
          "markdownDescription": "Denies the is_absolute command without any pre-configured scope."
        },
        {
          "description": "Denies the join command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-join",
          "markdownDescription": "Denies the join command without any pre-configured scope."
        },
        {
          "description": "Denies the normalize command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-normalize",
          "markdownDescription": "Denies the normalize command without any pre-configured scope."
        },
        {
          "description": "Denies the resolve command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-resolve",
          "markdownDescription": "Denies the resolve command without any pre-configured scope."
        },
        {
          "description": "Denies the resolve_directory command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-resolve-directory",
          "markdownDescription": "Denies the resolve_directory command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-close`",
          "type": "string",
          "const": "core:resources:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-close`"
        },
        {
          "description": "Enables the close command without any pre-configured scope.",
          "type": "string",
          "const": "core:resources:allow-close",
          "markdownDescription": "Enables the close command without any pre-configured scope."
        },
        {
          "description": "Denies the close command without any pre-configured scope.",
          "type": "string",
          "const": "core:resources:deny-close",
          "markdownDescription": "Denies the close command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-get-by-id`\n- `allow-remove-by-id`\n- `allow-set-icon`\n- `allow-set-menu`\n- `allow-set-tooltip`\n- `allow-set-title`\n- `allow-set-visible`\n- `allow-set-temp-dir-path`\n- `allow-set-icon-as-template`\n- `allow-set-show-menu-on-left-click`",
          "type": "string",
          "const": "core:tray:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-get-by-id`\n- `allow-remove-by-id`\n- `allow-set-icon`\n- `allow-set-menu`\n- `allow-set-tooltip`\n- `allow-set-title`\n- `allow-set-visible`\n- `allow-set-temp-dir-path`\n- `allow-set-icon-as-template`\n- `allow-set-show-menu-on-left-click`"
        },
        {
          "description": "Enables the get_by_id command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-get-by-id",
          "markdownDescription": "Enables the get_by_id command without any pre-configured scope."
        },
        {
          "description": "Enables the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-new",
          "markdownDescription": "Enables the new command without any pre-configured scope."
        },
        {
          "description": "Enables the remove_by_id command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-remove-by-id",
          "markdownDescription": "Enables the remove_by_id command without any pre-configured scope."
        },
        {
          "description": "Enables the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-icon",
          "markdownDescription": "Enables the set_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_icon_as_template command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-icon-as-template",
          "markdownDescription": "Enables the set_icon_as_template command without any pre-configured scope."
        },
        {
          "description": "Enables the set_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-menu",
          "markdownDescription": "Enables the set_menu command without any pre-configured scope."
        },
        {
          "description": "Enables the set_show_menu_on_left_click command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-show-menu-on-left-click",
          "markdownDescription": "Enables the set_show_menu_on_left_click command without any pre-configured scope."
        },
        {
          "description": "Enables the set_temp_dir_path command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-temp-dir-path",
          "markdownDescription": "Enables the set_temp_dir_path command without any pre-configured scope."
        },
        {
          "description": "Enables the set_title command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-title",
          "markdownDescription": "Enables the set_title command without any pre-configured scope."
        },
        {
          "description": "Enables the set_tooltip command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-tooltip",
          "markdownDescription": "Enables the set_tooltip command without any pre-configured scope."
        },
        {
          "description": "Enables the set_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-visible",
          "markdownDescription": "Enables the set_visible command without any pre-configured scope."
        },
        {
          "description": "Denies the get_by_id command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-get-by-id",
          "markdownDescription": "Denies the get_by_id command without any pre-configured scope."
        },
        {
          "description": "Denies the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-new",
          "markdownDescription": "Denies the new command without any pre-configured scope."
        },
        {
          "description": "Denies the remove_by_id command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-remove-by-id",
          "markdownDescription": "Denies the remove_by_id command without any pre-configured scope."
        },
        {
          "description": "Denies the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-icon",
          "markdownDescription": "Denies the set_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_icon_as_template command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-icon-as-template",
          "markdownDescription": "Denies the set_icon_as_template command without any pre-configured scope."
        },
        {
          "description": "Denies the set_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-menu",
          "markdownDescription": "Denies the set_menu command without any pre-configured scope."
        },
        {
          "description": "Denies the set_show_menu_on_left_click command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-show-menu-on-left-click",
          "markdownDescription": "Denies the set_show_menu_on_left_click command without any pre-configured scope."
        },
        {
          "description": "Denies the set_temp_dir_path command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-temp-dir-path",
          "markdownDescription": "Denies the set_temp_dir_path command without any pre-configured scope."
        },
        {
          "description": "Denies the set_title command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-title",
          "markdownDescription": "Denies the set_title command without any pre-configured scope."
        },
        {
          "description": "Denies the set_tooltip command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-tooltip",
          "markdownDescription": "Denies the set_tooltip command without any pre-configured scope."
        },
        {
          "description": "Denies the set_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-visible",
          "markdownDescription": "Denies the set_visible command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-get-all-webviews`\n- `allow-webview-position`\n- `allow-webview-size`\n- `allow-internal-toggle-devtools`",
          "type": "string",
          "const": "core:webview:default",
          "markdownDescription": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-get-all-webviews`\n- `allow-webview-position`\n- `allow-webview-size`\n- `allow-internal-toggle-devtools`"
        },
        {
          "description": "Enables the clear_all_browsing_data command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-clear-all-browsing-data",
          "markdownDescription": "Enables the clear_all_browsing_data command without any pre-configured scope."
        },
        {
          "description": "Enables the create_webview command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-create-webview",
          "markdownDescription": "Enables the create_webview command without any pre-configured scope."
        },
        {
          "description": "Enables the create_webview_window command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-create-webview-window",
          "markdownDescription": "Enables the create_webview_window command without any pre-configured scope."
        },
        {
          "description": "Enables the get_all_webviews command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-get-all-webviews",
          "markdownDescription": "Enables the get_all_webviews command without any pre-configured scope."
        },
        {
          "description": "Enables the internal_toggle_devtools command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-internal-toggle-devtools",
          "markdownDescription": "Enables the internal_toggle_devtools command without any pre-configured scope."
        },
        {
          "description": "Enables the print command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-print",
          "markdownDescription": "Enables the print command without any pre-configured scope."
        },
        {
          "description": "Enables the reparent command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-reparent",
          "markdownDescription": "Enables the reparent command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_auto_resize command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-auto-resize",
          "markdownDescription": "Enables the set_webview_auto_resize command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_background_color command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-background-color",
          "markdownDescription": "Enables the set_webview_background_color command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_focus command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-focus",
          "markdownDescription": "Enables the set_webview_focus command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-position",
          "markdownDescription": "Enables the set_webview_position command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-size",
          "markdownDescription": "Enables the set_webview_size command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_zoom command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-zoom",
          "markdownDescription": "Enables the set_webview_zoom command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_close command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-close",
          "markdownDescription": "Enables the webview_close command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-hide",
          "markdownDescription": "Enables the webview_hide command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-position",
          "markdownDescription": "Enables the webview_position command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_show command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-show",
          "markdownDescription": "Enables the webview_show command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-size",
          "markdownDescription": "Enables the webview_size command without any pre-configured scope."
        },
        {
          "description": "Denies the clear_all_browsing_data command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-clear-all-browsing-data",
          "markdownDescription": "Denies the clear_all_browsing_data command without any pre-configured scope."
        },
        {
          "description": "Denies the create_webview command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-create-webview",
          "markdownDescription": "Denies the create_webview command without any pre-configured scope."
        },
        {
          "description": "Denies the create_webview_window command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-create-webview-window",
          "markdownDescription": "Denies the create_webview_window command without any pre-configured scope."
        },
        {
          "description": "Denies the get_all_webviews command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-get-all-webviews",
          "markdownDescription": "Denies the get_all_webviews command without any pre-configured scope."
        },
        {
          "description": "Denies the internal_toggle_devtools command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-internal-toggle-devtools",
          "markdownDescription": "Denies the internal_toggle_devtools command without any pre-configured scope."
        },
        {
          "description": "Denies the print command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-print",
          "markdownDescription": "Denies the print command without any pre-configured scope."
        },
        {
          "description": "Denies the reparent command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-reparent",
          "markdownDescription": "Denies the reparent command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_auto_resize command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-auto-resize",
          "markdownDescription": "Denies the set_webview_auto_resize command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_background_color command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-background-color",
          "markdownDescription": "Denies the set_webview_background_color command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_focus command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-focus",
          "markdownDescription": "Denies the set_webview_focus command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-position",
          "markdownDescription": "Denies the set_webview_position command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-size",
          "markdownDescription": "Denies the set_webview_size command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_zoom command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-zoom",
          "markdownDescription": "Denies the set_webview_zoom command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_close command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-close",
          "markdownDescription": "Denies the webview_close command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-hide",
          "markdownDescription": "Denies the webview_hide command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-position",
          "markdownDescription": "Denies the webview_position command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_show command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-show",
          "markdownDescription": "Denies the webview_show command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-size",
          "markdownDescription": "Denies the webview_size command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-get-all-windows`\n- `allow-scale-factor`\n- `allow-inner-position`\n- `allow-outer-position`\n- `allow-inner-size`\n- `allow-outer-size`\n- `allow-is-fullscreen`\n- `allow-is-minimized`\n- `allow-is-maximized`\n- `allow-is-focused`\n- `allow-is-decorated`\n- `allow-is-resizable`\n- `allow-is-maximizable`\n- `allow-is-minimizable`\n- `allow-is-closable`\n- `allow-is-visible`\n- `allow-is-enabled`\n- `allow-title`\n- `allow-current-monitor`\n- `allow-primary-monitor`\n- `allow-monitor-from-point`\n- `allow-available-monitors`\n- `allow-cursor-position`\n- `allow-theme`\n- `allow-is-always-on-top`\n- `allow-internal-toggle-maximize`",
          "type": "string",
          "const": "core:window:default",
          "markdownDescription": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-get-all-windows`\n- `allow-scale-factor`\n- `allow-inner-position`\n- `allow-outer-position`\n- `allow-inner-size`\n- `allow-outer-size`\n- `allow-is-fullscreen`\n- `allow-is-minimized`\n- `allow-is-maximized`\n- `allow-is-focused`\n- `allow-is-decorated`\n- `allow-is-resizable`\n- `allow-is-maximizable`\n- `allow-is-minimizable`\n- `allow-is-closable`\n- `allow-is-visible`\n- `allow-is-enabled`\n- `allow-title`\n- `allow-current-monitor`\n- `allow-primary-monitor`\n- `allow-monitor-from-point`\n- `allow-available-monitors`\n- `allow-cursor-position`\n- `allow-theme`\n- `allow-is-always-on-top`\n- `allow-internal-toggle-maximize`"
        },
        {
          "description": "Enables the available_monitors command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-available-monitors",
          "markdownDescription": "Enables the available_monitors command without any pre-configured scope."
        },
        {
          "description": "Enables the center command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-center",
          "markdownDescription": "Enables the center command without any pre-configured scope."
        },
        {
          "description": "Enables the close command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-close",
          "markdownDescription": "Enables the close command without any pre-configured scope."
        },
        {
          "description": "Enables the create command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-create",
          "markdownDescription": "Enables the create command without any pre-configured scope."
        },
        {
          "description": "Enables the current_monitor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-current-monitor",
          "markdownDescription": "Enables the current_monitor command without any pre-configured scope."
        },
        {
          "description": "Enables the cursor_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-cursor-position",
          "markdownDescription": "Enables the cursor_position command without any pre-configured scope."
        },
        {
          "description": "Enables the destroy command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-destroy",
          "markdownDescription": "Enables the destroy command without any pre-configured scope."
        },
        {
          "description": "Enables the get_all_windows command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-get-all-windows",
          "markdownDescription": "Enables the get_all_windows command without any pre-configured scope."
        },
        {
          "description": "Enables the hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-hide",
          "markdownDescription": "Enables the hide command without any pre-configured scope."
        },
        {
          "description": "Enables the inner_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-inner-position",
          "markdownDescription": "Enables the inner_position command without any pre-configured scope."
        },
        {
          "description": "Enables the inner_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-inner-size",
          "markdownDescription": "Enables the inner_size command without any pre-configured scope."
        },
        {
          "description": "Enables the internal_toggle_maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-internal-toggle-maximize",
          "markdownDescription": "Enables the internal_toggle_maximize command without any pre-configured scope."
        },
        {
          "description": "Enables the is_always_on_top command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-always-on-top",
          "markdownDescription": "Enables the is_always_on_top command without any pre-configured scope."
        },
        {
          "description": "Enables the is_closable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-closable",
          "markdownDescription": "Enables the is_closable command without any pre-configured scope."
        },
        {
          "description": "Enables the is_decorated command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-decorated",
          "markdownDescription": "Enables the is_decorated command without any pre-configured scope."
        },
        {
          "description": "Enables the is_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-enabled",
          "markdownDescription": "Enables the is_enabled command without any pre-configured scope."
        },
        {
          "description": "Enables the is_focused command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-focused",
          "markdownDescription": "Enables the is_focused command without any pre-configured scope."
        },
        {
          "description": "Enables the is_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-fullscreen",
          "markdownDescription": "Enables the is_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Enables the is_maximizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-maximizable",
          "markdownDescription": "Enables the is_maximizable command without any pre-configured scope."
        },
        {
          "description": "Enables the is_maximized command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-maximized",
          "markdownDescription": "Enables the is_maximized command without any pre-configured scope."
        },
        {
          "description": "Enables the is_minimizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-minimizable",
          "markdownDescription": "Enables the is_minimizable command without any pre-configured scope."
        },
        {
          "description": "Enables the is_minimized command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-minimized",
          "markdownDescription": "Enables the is_minimized command without any pre-configured scope."
        },
        {
          "description": "Enables the is_resizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-resizable",
          "markdownDescription": "Enables the is_resizable command without any pre-configured scope."
        },
        {
          "description": "Enables the is_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-visible",
          "markdownDescription": "Enables the is_visible command without any pre-configured scope."
        },
        {
          "description": "Enables the maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-maximize",
          "markdownDescription": "Enables the maximize command without any pre-configured scope."
        },
        {
          "description": "Enables the minimize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-minimize",
          "markdownDescription": "Enables the minimize command without any pre-configured scope."
        },
        {
          "description": "Enables the monitor_from_point command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-monitor-from-point",
          "markdownDescription": "Enables the monitor_from_point command without any pre-configured scope."
        },
        {
          "description": "Enables the outer_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-outer-position",
          "markdownDescription": "Enables the outer_position command without any pre-configured scope."
        },
        {
          "description": "Enables the outer_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-outer-size",
          "markdownDescription": "Enables the outer_size command without any pre-configured scope."
        },
        {
          "description": "Enables the primary_monitor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-primary-monitor",
          "markdownDescription": "Enables the primary_monitor command without any pre-configured scope."
        },
        {
          "description": "Enables the request_user_attention command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-request-user-attention",
          "markdownDescription": "Enables the request_user_attention command without any pre-configured scope."
        },
        {
          "description": "Enables the scale_factor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-scale-factor",
          "markdownDescription": "Enables the scale_factor command without any pre-configured scope."
        },
        {
          "description": "Enables the set_always_on_bottom command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-always-on-bottom",
          "markdownDescription": "Enables the set_always_on_bottom command without any pre-configured scope."
        },
        {
          "description": "Enables the set_always_on_top command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-always-on-top",
          "markdownDescription": "Enables the set_always_on_top command without any pre-configured scope."
        },
        {
          "description": "Enables the set_background_color command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-background-color",
          "markdownDescription": "Enables the set_background_color command without any pre-configured scope."
        },
        {
          "description": "Enables the set_badge_count command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-badge-count",
          "markdownDescription": "Enables the set_badge_count command without any pre-configured scope."
        },
        {
          "description": "Enables the set_badge_label command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-badge-label",
          "markdownDescription": "Enables the set_badge_label command without any pre-configured scope."
        },
        {
          "description": "Enables the set_closable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-closable",
          "markdownDescription": "Enables the set_closable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_content_protected command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-content-protected",
          "markdownDescription": "Enables the set_content_protected command without any pre-configured scope."
        },
        {
          "description": "Enables the set_cursor_grab command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-cursor-grab",
          "markdownDescription": "Enables the set_cursor_grab command without any pre-configured scope."
        },
        {
          "description": "Enables the set_cursor_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-cursor-icon",
          "markdownDescription": "Enables the set_cursor_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_cursor_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-cursor-position",
          "markdownDescription": "Enables the set_cursor_position command without any pre-configured scope."
        },
        {
          "description": "Enables the set_cursor_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-cursor-visible",
          "markdownDescription": "Enables the set_cursor_visible command without any pre-configured scope."
        },
        {
          "description": "Enables the set_decorations command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-decorations",
          "markdownDescription": "Enables the set_decorations command without any pre-configured scope."
        },
        {
          "description": "Enables the set_effects command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-effects",
          "markdownDescription": "Enables the set_effects command without any pre-configured scope."
        },
        {
          "description": "Enables the set_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-enabled",
          "markdownDescription": "Enables the set_enabled command without any pre-configured scope."
        },
        {
          "description": "Enables the set_focus command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-focus",
          "markdownDescription": "Enables the set_focus command without any pre-configured scope."
        },
        {
          "description": "Enables the set_focusable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-focusable",
          "markdownDescription": "Enables the set_focusable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-fullscreen",
          "markdownDescription": "Enables the set_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Enables the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-icon",
          "markdownDescription": "Enables the set_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_ignore_cursor_events command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-ignore-cursor-events",
          "markdownDescription": "Enables the set_ignore_cursor_events command without any pre-configured scope."
        },
        {
          "description": "Enables the set_max_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-max-size",
          "markdownDescription": "Enables the set_max_size command without any pre-configured scope."
        },
        {
          "description": "Enables the set_maximizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-maximizable",
          "markdownDescription": "Enables the set_maximizable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_min_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-min-size",
          "markdownDescription": "Enables the set_min_size command without any pre-configured scope."
        },
        {
          "description": "Enables the set_minimizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-minimizable",
          "markdownDescription": "Enables the set_minimizable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_overlay_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-overlay-icon",
          "markdownDescription": "Enables the set_overlay_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-position",
          "markdownDescription": "Enables the set_position command without any pre-configured scope."
        },
        {
          "description": "Enables the set_progress_bar command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-progress-bar",
          "markdownDescription": "Enables the set_progress_bar command without any pre-configured scope."
        },
        {
          "description": "Enables the set_resizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-resizable",
          "markdownDescription": "Enables the set_resizable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_shadow command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-shadow",
          "markdownDescription": "Enables the set_shadow command without any pre-configured scope."
        },
        {
          "description": "Enables the set_simple_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-simple-fullscreen",
          "markdownDescription": "Enables the set_simple_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Enables the set_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-size",
          "markdownDescription": "Enables the set_size command without any pre-configured scope."
        },
        {
          "description": "Enables the set_size_constraints command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-size-constraints",
          "markdownDescription": "Enables the set_size_constraints command without any pre-configured scope."
        },
        {
          "description": "Enables the set_skip_taskbar command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-skip-taskbar",
          "markdownDescription": "Enables the set_skip_taskbar command without any pre-configured scope."
        },
        {
          "description": "Enables the set_theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-theme",
          "markdownDescription": "Enables the set_theme command without any pre-configured scope."
        },
        {
          "description": "Enables the set_title command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-title",
          "markdownDescription": "Enables the set_title command without any pre-configured scope."
        },
        {
          "description": "Enables the set_title_bar_style command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-title-bar-style",
          "markdownDescription": "Enables the set_title_bar_style command without any pre-configured scope."
        },
        {
          "description": "Enables the set_visible_on_all_workspaces command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-visible-on-all-workspaces",
          "markdownDescription": "Enables the set_visible_on_all_workspaces command without any pre-configured scope."
        },
        {
          "description": "Enables the show command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-show",
          "markdownDescription": "Enables the show command without any pre-configured scope."
        },
        {
          "description": "Enables the start_dragging command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-start-dragging",
          "markdownDescription": "Enables the start_dragging command without any pre-configured scope."
        },
        {
          "description": "Enables the start_resize_dragging command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-start-resize-dragging",
          "markdownDescription": "Enables the start_resize_dragging command without any pre-configured scope."
        },
        {
          "description": "Enables the theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-theme",
          "markdownDescription": "Enables the theme command without any pre-configured scope."
        },
        {
          "description": "Enables the title command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-title",
          "markdownDescription": "Enables the title command without any pre-configured scope."
        },
        {
          "description": "Enables the toggle_maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-toggle-maximize",
          "markdownDescription": "Enables the toggle_maximize command without any pre-configured scope."
        },
        {
          "description": "Enables the unmaximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-unmaximize",
          "markdownDescription": "Enables the unmaximize command without any pre-configured scope."
        },
        {
          "description": "Enables the unminimize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-unminimize",
          "markdownDescription": "Enables the unminimize command without any pre-configured scope."
        },
        {
          "description": "Denies the available_monitors command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-available-monitors",
          "markdownDescription": "Denies the available_monitors command without any pre-configured scope."
        },
        {
          "description": "Denies the center command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-center",
          "markdownDescription": "Denies the center command without any pre-configured scope."
        },
        {
          "description": "Denies the close command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-close",
          "markdownDescription": "Denies the close command without any pre-configured scope."
        },
        {
          "description": "Denies the create command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-create",
          "markdownDescription": "Denies the create command without any pre-configured scope."
        },
        {
          "description": "Denies the current_monitor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-current-monitor",
          "markdownDescription": "Denies the current_monitor command without any pre-configured scope."
        },
        {
          "description": "Denies the cursor_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-cursor-position",
          "markdownDescription": "Denies the cursor_position command without any pre-configured scope."
        },
        {
          "description": "Denies the destroy command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-destroy",
          "markdownDescription": "Denies the destroy command without any pre-configured scope."
        },
        {
          "description": "Denies the get_all_windows command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-get-all-windows",
          "markdownDescription": "Denies the get_all_windows command without any pre-configured scope."
        },
        {
          "description": "Denies the hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-hide",
          "markdownDescription": "Denies the hide command without any pre-configured scope."
        },
        {
          "description": "Denies the inner_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-inner-position",
          "markdownDescription": "Denies the inner_position command without any pre-configured scope."
        },
        {
          "description": "Denies the inner_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-inner-size",
          "markdownDescription": "Denies the inner_size command without any pre-configured scope."
        },
        {
          "description": "Denies the internal_toggle_maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-internal-toggle-maximize",
          "markdownDescription": "Denies the internal_toggle_maximize command without any pre-configured scope."
        },
        {
          "description": "Denies the is_always_on_top command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-always-on-top",
          "markdownDescription": "Denies the is_always_on_top command without any pre-configured scope."
        },
        {
          "description": "Denies the is_closable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-closable",
          "markdownDescription": "Denies the is_closable command without any pre-configured scope."
        },
        {
          "description": "Denies the is_decorated command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-decorated",
          "markdownDescription": "Denies the is_decorated command without any pre-configured scope."
        },
        {
          "description": "Denies the is_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-enabled",
          "markdownDescription": "Denies the is_enabled command without any pre-configured scope."
        },
        {
          "description": "Denies the is_focused command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-focused",
          "markdownDescription": "Denies the is_focused command without any pre-configured scope."
        },
        {
          "description": "Denies the is_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-fullscreen",
          "markdownDescription": "Denies the is_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Denies the is_maximizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-maximizable",
          "markdownDescription": "Denies the is_maximizable command without any pre-configured scope."
        },
        {
          "description": "Denies the is_maximized command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-maximized",
          "markdownDescription": "Denies the is_maximized command without any pre-configured scope."
        },
        {
          "description": "Denies the is_minimizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-minimizable",
          "markdownDescription": "Denies the is_minimizable command without any pre-configured scope."
        },
        {
          "description": "Denies the is_minimized command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-minimized",
          "markdownDescription": "Denies the is_minimized command without any pre-configured scope."
        },
        {
          "description": "Denies the is_resizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-resizable",
          "markdownDescription": "Denies the is_resizable command without any pre-configured scope."
        },
        {
          "description": "Denies the is_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-visible",
          "markdownDescription": "Denies the is_visible command without any pre-configured scope."
        },
        {
          "description": "Denies the maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-maximize",
          "markdownDescription": "Denies the maximize command without any pre-configured scope."
        },
        {
          "description": "Denies the minimize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-minimize",
          "markdownDescription": "Denies the minimize command without any pre-configured scope."
        },
        {
          "description": "Denies the monitor_from_point command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-monitor-from-point",
          "markdownDescription": "Denies the monitor_from_point command without any pre-configured scope."
        },
        {
          "description": "Denies the outer_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-outer-position",
          "markdownDescription": "Denies the outer_position command without any pre-configured scope."
        },
        {
          "description": "Denies the outer_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-outer-size",
          "markdownDescription": "Denies the outer_size command without any pre-configured scope."
        },
        {
          "description": "Denies the primary_monitor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-primary-monitor",
          "markdownDescription": "Denies the primary_monitor command without any pre-configured scope."
        },
        {
          "description": "Denies the request_user_attention command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-request-user-attention",
          "markdownDescription": "Denies the request_user_attention command without any pre-configured scope."
        },
        {
          "description": "Denies the scale_factor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-scale-factor",
          "markdownDescription": "Denies the scale_factor command without any pre-configured scope."
        },
        {
          "description": "Denies the set_always_on_bottom command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-always-on-bottom",
          "markdownDescription": "Denies the set_always_on_bottom command without any pre-configured scope."
        },
        {
          "description": "Denies the set_always_on_top command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-always-on-top",
          "markdownDescription": "Denies the set_always_on_top command without any pre-configured scope."
        },
        {
          "description": "Denies the set_background_color command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-background-color",
          "markdownDescription": "Denies the set_background_color command without any pre-configured scope."
        },
        {
          "description": "Denies the set_badge_count command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-badge-count",
          "markdownDescription": "Denies the set_badge_count command without any pre-configured scope."
        },
        {
          "description": "Denies the set_badge_label command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-badge-label",
          "markdownDescription": "Denies the set_badge_label command without any pre-configured scope."
        },
        {
          "description": "Denies the set_closable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-closable",
          "markdownDescription": "Denies the set_closable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_content_protected command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-content-protected",
          "markdownDescription": "Denies the set_content_protected command without any pre-configured scope."
        },
        {
          "description": "Denies the set_cursor_grab command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-cursor-grab",
          "markdownDescription": "Denies the set_cursor_grab command without any pre-configured scope."
        },
        {
          "description": "Denies the set_cursor_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-cursor-icon",
          "markdownDescription": "Denies the set_cursor_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_cursor_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-cursor-position",
          "markdownDescription": "Denies the set_cursor_position command without any pre-configured scope."
        },
        {
          "description": "Denies the set_cursor_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-cursor-visible",
          "markdownDescription": "Denies the set_cursor_visible command without any pre-configured scope."
        },
        {
          "description": "Denies the set_decorations command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-decorations",
          "markdownDescription": "Denies the set_decorations command without any pre-configured scope."
        },
        {
          "description": "Denies the set_effects command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-effects",
          "markdownDescription": "Denies the set_effects command without any pre-configured scope."
        },
        {
          "description": "Denies the set_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-enabled",
          "markdownDescription": "Denies the set_enabled command without any pre-configured scope."
        },
        {
          "description": "Denies the set_focus command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-focus",
          "markdownDescription": "Denies the set_focus command without any pre-configured scope."
        },
        {
          "description": "Denies the set_focusable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-focusable",
          "markdownDescription": "Denies the set_focusable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-fullscreen",
          "markdownDescription": "Denies the set_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Denies the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-icon",
          "markdownDescription": "Denies the set_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_ignore_cursor_events command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-ignore-cursor-events",
          "markdownDescription": "Denies the set_ignore_cursor_events command without any pre-configured scope."
        },
        {
          "description": "Denies the set_max_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-max-size",
          "markdownDescription": "Denies the set_max_size command without any pre-configured scope."
        },
        {
          "description": "Denies the set_maximizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-maximizable",
          "markdownDescription": "Denies the set_maximizable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_min_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-min-size",
          "markdownDescription": "Denies the set_min_size command without any pre-configured scope."
        },
        {
          "description": "Denies the set_minimizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-minimizable",
          "markdownDescription": "Denies the set_minimizable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_overlay_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-overlay-icon",
          "markdownDescription": "Denies the set_overlay_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-position",
          "markdownDescription": "Denies the set_position command without any pre-configured scope."
        },
        {
          "description": "Denies the set_progress_bar command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-progress-bar",
          "markdownDescription": "Denies the set_progress_bar command without any pre-configured scope."
        },
        {
          "description": "Denies the set_resizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-resizable",
          "markdownDescription": "Denies the set_resizable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_shadow command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-shadow",
          "markdownDescription": "Denies the set_shadow command without any pre-configured scope."
        },
        {
          "description": "Denies the set_simple_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-simple-fullscreen",
          "markdownDescription": "Denies the set_simple_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Denies the set_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-size",
          "markdownDescription": "Denies the set_size command without any pre-configured scope."
        },
        {
          "description": "Denies the set_size_constraints command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-size-constraints",
          "markdownDescription": "Denies the set_size_constraints command without any pre-configured scope."
        },
        {
          "description": "Denies the set_skip_taskbar command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-skip-taskbar",
          "markdownDescription": "Denies the set_skip_taskbar command without any pre-configured scope."
        },
        {
          "description": "Denies the set_theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-theme",
          "markdownDescription": "Denies the set_theme command without any pre-configured scope."
        },
        {
          "description": "Denies the set_title command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-title",
          "markdownDescription": "Denies the set_title command without any pre-configured scope."
        },
        {
          "description": "Denies the set_title_bar_style command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-title-bar-style",
          "markdownDescription": "Denies the set_title_bar_style command without any pre-configured scope."
        },
        {
          "description": "Denies the set_visible_on_all_workspaces command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-visible-on-all-workspaces",
          "markdownDescription": "Denies the set_visible_on_all_workspaces command without any pre-configured scope."
        },
        {
          "description": "Denies the show command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-show",
          "markdownDescription": "Denies the show command without any pre-configured scope."
        },
        {
          "description": "Denies the start_dragging command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-start-dragging",
          "markdownDescription": "Denies the start_dragging command without any pre-configured scope."
        },
        {
          "description": "Denies the start_resize_dragging command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-start-resize-dragging",
          "markdownDescription": "Denies the start_resize_dragging command without any pre-configured scope."
        },
        {
          "description": "Denies the theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-theme",
          "markdownDescription": "Denies the theme command without any pre-configured scope."
        },
        {
          "description": "Denies the title command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-title",
          "markdownDescription": "Denies the title command without any pre-configured scope."
        },
        {
          "description": "Denies the toggle_maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-toggle-maximize",
          "markdownDescription": "Denies the toggle_maximize command without any pre-configured scope."
        },
        {
          "description": "Denies the unmaximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-unmaximize",
          "markdownDescription": "Denies the unmaximize command without any pre-configured scope."
        },
        {
          "description": "Denies the unminimize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-unminimize",
          "markdownDescription": "Denies the unminimize command without any pre-configured scope."
        },
        {
          "description": "This permission set configures which\nshell functionality is exposed by default.\n\n#### Granted Permissions\n\nIt allows to use the `open` functionality with a reasonable\nscope pre-configured. It will allow opening `http(s)://`,\n`tel:` and `mailto:` links.\n\n#### This default permission set includes:\n\n- `allow-open`",
          "type": "string",
          "const": "shell:default",
          "markdownDescription": "This permission set configures which\nshell functionality is exposed by default.\n\n#### Granted Permissions\n\nIt allows to use the `open` functionality with a reasonable\nscope pre-configured. It will allow opening `http(s)://`,\n`tel:` and `mailto:` links.\n\n#### This default permission set includes:\n\n- `allow-open`"
        },
        {
          "description": "Enables the execute command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-execute",
          "markdownDescription": "Enables the execute command without any pre-configured scope."
        },
        {
          "description": "Enables the kill command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-kill",
          "markdownDescription": "Enables the kill command without any pre-configured scope."
        },
        {
          "description": "Enables the open command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-open",
          "markdownDescription": "Enables the open command without any pre-configured scope."
        },
        {
          "description": "Enables the spawn command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-spawn",
          "markdownDescription": "Enables the spawn command without any pre-configured scope."
        },
        {
          "description": "Enables the stdin_write command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-stdin-write",
          "markdownDescription": "Enables the stdin_write command without any pre-configured scope."
        },
        {
          "description": "Denies the execute command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-execute",
          "markdownDescription": "Denies the execute command without any pre-configured scope."
        },
        {
          "description": "Denies the kill command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-kill",
          "markdownDescription": "Denies the kill command without any pre-configured scope."
        },
        {
          "description": "Denies the open command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-open",
          "markdownDescription": "Denies the open command without any pre-configured scope."
        },
        {
          "description": "Denies the spawn command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-spawn",
          "markdownDescription": "Denies the spawn command without any pre-configured scope."
        },
        {
          "description": "Denies the stdin_write command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-stdin-write",
          "markdownDescription": "Denies the stdin_write command without any pre-configured scope."
        },
        {
          "description": "This permission set configures what kind of\noperations are available from the window state plugin.\n\n#### Granted Permissions\n\nAll operations are enabled by default.\n\n\n#### This default permission set includes:\n\n- `allow-filename`\n- `allow-restore-state`\n- `allow-save-window-state`",
          "type": "string",
          "const": "window-state:default",
          "markdownDescription": "This permission set configures what kind of\noperations are available from the window state plugin.\n\n#### Granted Permissions\n\nAll operations are enabled by default.\n\n\n#### This default permission set includes:\n\n- `allow-filename`\n- `allow-restore-state`\n- `allow-save-window-state`"
        },
        {
          "description": "Enables the filename command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:allow-filename",
          "markdownDescription": "Enables the filename command without any pre-configured scope."
        },
        {
          "description": "Enables the restore_state command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:allow-restore-state",
          "markdownDescription": "Enables the restore_state command without any pre-configured scope."
        },
        {
          "description": "Enables the save_window_state command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:allow-save-window-state",
          "markdownDescription": "Enables the save_window_state command without any pre-configured scope."
        },
        {
          "description": "Denies the filename command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:deny-filename",
          "markdownDescription": "Denies the filename command without any pre-configured scope."
        },
        {
          "description": "Denies the restore_state command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:deny-restore-state",
          "markdownDescription": "Denies the restore_state command without any pre-configured scope."
        },
        {
          "description": "Denies the save_window_state command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:deny-save-window-state",
          "markdownDescription": "Denies the save_window_state command without any pre-configured scope."
        }
      ]
    },
    "Value": {
      "description": "All supported ACL values.",
      "anyOf": [
        {
          "description": "Represents a null JSON value.",
          "type": "null"
        },
        {
          "description": "Represents a [`bool`].",
          "type": "boolean"
        },
        {
          "description": "Represents a valid ACL [`Number`].",
          "allOf": [
            {
              "$ref": "#/definitions/Number"
            }
          ]
        },
        {
          "description": "Represents a [`String`].",
          "type": "string"
        },
        {
          "description": "Represents a list of other [`Value`]s.",
          "type": "array",
          "items": {
            "$ref": "#/definitions/Value"
          }
        },
        {
          "description": "Represents a map of [`String`] keys to [`Value`]s.",
          "type": "object",
          "additionalProperties": {
            "$ref": "#/definitions/Value"
          }
        }
      ]
    },
    "Number": {
      "description": "A valid ACL number.",
      "anyOf": [
        {
          "description": "Represents an [`i64`].",
          "type": "integer",
          "format": "int64"
        },
        {
          "description": "Represents a [`f64`].",
          "type": "number",
          "format": "double"
        }
      ]
    },
    "Target": {
      "description": "Platform target.",
      "oneOf": [
        {
          "description": "MacOS.",
          "type": "string",
          "enum": [
            "macOS"
          ]
        },
        {
          "description": "Windows.",
          "type": "string",
          "enum": [
            "windows"
          ]
        },
        {
          "description": "Linux.",
          "type": "string",
          "enum": [
            "linux"
          ]
        },
        {
          "description": "Android.",
          "type": "string",
          "enum": [
            "android"
          ]
        },
        {
          "description": "iOS.",
          "type": "string",
          "enum": [
            "iOS"
          ]
        }
      ]
    },
    "ShellScopeEntryAllowedArg": {
      "description": "A command argument allowed to be executed by the webview API.",
      "anyOf": [
        {
          "description": "A non-configurable argument that is passed to the command in the order it was specified.",
          "type": "string"
        },
        {
          "description": "A variable that is set while calling the command from the webview API.",
          "type": "object",
          "required": [
            "validator"
          ],
          "properties": {
            "raw": {
              "description": "Marks the validator as a raw regex, meaning the plugin should not make any modification at runtime.\n\nThis means the regex will not match on the entire string by default, which might be exploited if your regex allow unexpected input to be considered valid. When using this option, make sure your regex is correct.",
              "default": false,
              "type": "boolean"
            },
            "validator": {
              "description": "[regex] validator to require passed values to conform to an expected input.\n\nThis will require the argument value passed to this variable to match the `validator` regex before it will be executed.\n\nThe regex string is by default surrounded by `^...$` to match the full string. For example the `https?://\\w+` regex would be registered as `^https?://\\w+$`.\n\n[regex]: <https://docs.rs/regex/latest/regex/#syntax>",
              "type": "string"
            }
          },
          "additionalProperties": false
        }
      ]
    },
    "ShellScopeEntryAllowedArgs": {
      "description": "A set of command arguments allowed to be executed by the webview API.\n\nA value of `true` will allow any arguments to be passed to the command. `false` will disable all arguments. A list of [`ShellScopeEntryAllowedArg`] will set those arguments as the only valid arguments to be passed to the attached command configuration.",
      "anyOf": [
        {
          "description": "Use a simple boolean to allow all or disable all arguments to this command configuration.",
          "type": "boolean"
        },
        {
          "description": "A specific set of [`ShellScopeEntryAllowedArg`] that are valid to call for the command configuration.",
          "type": "array",
          "items": {
            "$ref": "#/definitions/ShellScopeEntryAllowedArg"
          }
        }
      ]
    }
  }
}

================================================
FILE: desktop/src-tauri/gen/schemas/macOS-schema.json
================================================
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "CapabilityFile",
  "description": "Capability formats accepted in a capability file.",
  "anyOf": [
    {
      "description": "A single capability.",
      "allOf": [
        {
          "$ref": "#/definitions/Capability"
        }
      ]
    },
    {
      "description": "A list of capabilities.",
      "type": "array",
      "items": {
        "$ref": "#/definitions/Capability"
      }
    },
    {
      "description": "A list of capabilities.",
      "type": "object",
      "required": [
        "capabilities"
      ],
      "properties": {
        "capabilities": {
          "description": "The list of capabilities.",
          "type": "array",
          "items": {
            "$ref": "#/definitions/Capability"
          }
        }
      }
    }
  ],
  "definitions": {
    "Capability": {
      "description": "A grouping and boundary mechanism developers can use to isolate access to the IPC layer.\n\nIt controls application windows' and webviews' fine grained access to the Tauri core, application, or plugin commands. If a webview or its window is not matching any capability then it has no access to the IPC layer at all.\n\nThis can be done to create groups of windows, based on their required system access, which can reduce impact of frontend vulnerabilities in less privileged windows. Windows can be added to a capability by exact name (e.g. `main-window`) or glob patterns like `*` or `admin-*`. A Window can have none, one, or multiple associated capabilities.\n\n## Example\n\n```json { \"identifier\": \"main-user-files-write\", \"description\": \"This capability allows the `main` window on macOS and Windows access to `filesystem` write related commands and `dialog` commands to enable programmatic access to files selected by the user.\", \"windows\": [ \"main\" ], \"permissions\": [ \"core:default\", \"dialog:open\", { \"identifier\": \"fs:allow-write-text-file\", \"allow\": [{ \"path\": \"$HOME/test.txt\" }] }, ], \"platforms\": [\"macOS\",\"windows\"] } ```",
      "type": "object",
      "required": [
        "identifier",
        "permissions"
      ],
      "properties": {
        "identifier": {
          "description": "Identifier of the capability.\n\n## Example\n\n`main-user-files-write`",
          "type": "string"
        },
        "description": {
          "description": "Description of what the capability is intended to allow on associated windows.\n\nIt should contain a description of what the grouped permissions should allow.\n\n## Example\n\nThis capability allows the `main` window access to `filesystem` write related commands and `dialog` commands to enable programmatic access to files selected by the user.",
          "default": "",
          "type": "string"
        },
        "remote": {
          "description": "Configure remote URLs that can use the capability permissions.\n\nThis setting is optional and defaults to not being set, as our default use case is that the content is served from our local application.\n\n:::caution Make sure you understand the security implications of providing remote sources with local system access. :::\n\n## Example\n\n```json { \"urls\": [\"https://*.mydomain.dev\"] } ```",
          "anyOf": [
            {
              "$ref": "#/definitions/CapabilityRemote"
            },
            {
              "type": "null"
            }
          ]
        },
        "local": {
          "description": "Whether this capability is enabled for local app URLs or not. Defaults to `true`.",
          "default": true,
          "type": "boolean"
        },
        "windows": {
          "description": "List of windows that are affected by this capability. Can be a glob pattern.\n\nIf a window label matches any of the patterns in this list, the capability will be enabled on all the webviews of that window, regardless of the value of [`Self::webviews`].\n\nOn multiwebview windows, prefer specifying [`Self::webviews`] and omitting [`Self::windows`] for a fine grained access control.\n\n## Example\n\n`[\"main\"]`",
          "type": "array",
          "items": {
            "type": "string"
          }
        },
        "webviews": {
          "description": "List of webviews that are affected by this capability. Can be a glob pattern.\n\nThe capability will be enabled on all the webviews whose label matches any of the patterns in this list, regardless of whether the webview's window label matches a pattern in [`Self::windows`].\n\n## Example\n\n`[\"sub-webview-one\", \"sub-webview-two\"]`",
          "type": "array",
          "items": {
            "type": "string"
          }
        },
        "permissions": {
          "description": "List of permissions attached to this capability.\n\nMust include the plugin name as prefix in the form of `${plugin-name}:${permission-name}`. For commands directly implemented in the application itself only `${permission-name}` is required.\n\n## Example\n\n```json [ \"core:default\", \"shell:allow-open\", \"dialog:open\", { \"identifier\": \"fs:allow-write-text-file\", \"allow\": [{ \"path\": \"$HOME/test.txt\" }] } ] ```",
          "type": "array",
          "items": {
            "$ref": "#/definitions/PermissionEntry"
          },
          "uniqueItems": true
        },
        "platforms": {
          "description": "Limit which target platforms this capability applies to.\n\nBy default all platforms are targeted.\n\n## Example\n\n`[\"macOS\",\"windows\"]`",
          "type": [
            "array",
            "null"
          ],
          "items": {
            "$ref": "#/definitions/Target"
          }
        }
      }
    },
    "CapabilityRemote": {
      "description": "Configuration for remote URLs that are associated with the capability.",
      "type": "object",
      "required": [
        "urls"
      ],
      "properties": {
        "urls": {
          "description": "Remote domains this capability refers to using the [URLPattern standard](https://urlpattern.spec.whatwg.org/).\n\n## Examples\n\n- \"https://*.mydomain.dev\": allows subdomains of mydomain.dev - \"https://mydomain.dev/api/*\": allows any subpath of mydomain.dev/api",
          "type": "array",
          "items": {
            "type": "string"
          }
        }
      }
    },
    "PermissionEntry": {
      "description": "An entry for a permission value in a [`Capability`] can be either a raw permission [`Identifier`] or an object that references a permission and extends its scope.",
      "anyOf": [
        {
          "description": "Reference a permission or permission set by identifier.",
          "allOf": [
            {
              "$ref": "#/definitions/Identifier"
            }
          ]
        },
        {
          "description": "Reference a permission or permission set by identifier and extends its scope.",
          "type": "object",
          "allOf": [
            {
              "if": {
                "properties": {
                  "identifier": {
                    "anyOf": [
                      {
                        "description": "This permission set configures which\nshell functionality is exposed by default.\n\n#### Granted Permissions\n\nIt allows to use the `open` functionality with a reasonable\nscope pre-configured. It will allow opening `http(s)://`,\n`tel:` and `mailto:` links.\n\n#### This default permission set includes:\n\n- `allow-open`",
                        "type": "string",
                        "const": "shell:default",
                        "markdownDescription": "This permission set configures which\nshell functionality is exposed by default.\n\n#### Granted Permissions\n\nIt allows to use the `open` functionality with a reasonable\nscope pre-configured. It will allow opening `http(s)://`,\n`tel:` and `mailto:` links.\n\n#### This default permission set includes:\n\n- `allow-open`"
                      },
                      {
                        "description": "Enables the execute command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-execute",
                        "markdownDescription": "Enables the execute command without any pre-configured scope."
                      },
                      {
                        "description": "Enables the kill command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-kill",
                        "markdownDescription": "Enables the kill command without any pre-configured scope."
                      },
                      {
                        "description": "Enables the open command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-open",
                        "markdownDescription": "Enables the open command without any pre-configured scope."
                      },
                      {
                        "description": "Enables the spawn command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-spawn",
                        "markdownDescription": "Enables the spawn command without any pre-configured scope."
                      },
                      {
                        "description": "Enables the stdin_write command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:allow-stdin-write",
                        "markdownDescription": "Enables the stdin_write command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the execute command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-execute",
                        "markdownDescription": "Denies the execute command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the kill command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-kill",
                        "markdownDescription": "Denies the kill command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the open command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-open",
                        "markdownDescription": "Denies the open command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the spawn command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-spawn",
                        "markdownDescription": "Denies the spawn command without any pre-configured scope."
                      },
                      {
                        "description": "Denies the stdin_write command without any pre-configured scope.",
                        "type": "string",
                        "const": "shell:deny-stdin-write",
                        "markdownDescription": "Denies the stdin_write command without any pre-configured scope."
                      }
                    ]
                  }
                }
              },
              "then": {
                "properties": {
                  "allow": {
                    "items": {
                      "title": "ShellScopeEntry",
                      "description": "Shell scope entry.",
                      "anyOf": [
                        {
                          "type": "object",
                          "required": [
                            "cmd",
                            "name"
                          ],
                          "properties": {
                            "args": {
                              "description": "The allowed arguments for the command execution.",
                              "allOf": [
                                {
                                  "$ref": "#/definitions/ShellScopeEntryAllowedArgs"
                                }
                              ]
                            },
                            "cmd": {
                              "description": "The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.",
                              "type": "string"
                            },
                            "name": {
                              "description": "The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.",
                              "type": "string"
                            }
                          },
                          "additionalProperties": false
                        },
                        {
                          "type": "object",
                          "required": [
                            "name",
                            "sidecar"
                          ],
                          "properties": {
                            "args": {
                              "description": "The allowed arguments for the command execution.",
                              "allOf": [
                                {
                                  "$ref": "#/definitions/ShellScopeEntryAllowedArgs"
                                }
                              ]
                            },
                            "name": {
                              "description": "The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.",
                              "type": "string"
                            },
                            "sidecar": {
                              "description": "If this command is a sidecar command.",
                              "type": "boolean"
                            }
                          },
                          "additionalProperties": false
                        }
                      ]
                    }
                  },
                  "deny": {
                    "items": {
                      "title": "ShellScopeEntry",
                      "description": "Shell scope entry.",
                      "anyOf": [
                        {
                          "type": "object",
                          "required": [
                            "cmd",
                            "name"
                          ],
                          "properties": {
                            "args": {
                              "description": "The allowed arguments for the command execution.",
                              "allOf": [
                                {
                                  "$ref": "#/definitions/ShellScopeEntryAllowedArgs"
                                }
                              ]
                            },
                            "cmd": {
                              "description": "The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.",
                              "type": "string"
                            },
                            "name": {
                              "description": "The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.",
                              "type": "string"
                            }
                          },
                          "additionalProperties": false
                        },
                        {
                          "type": "object",
                          "required": [
                            "name",
                            "sidecar"
                          ],
                          "properties": {
                            "args": {
                              "description": "The allowed arguments for the command execution.",
                              "allOf": [
                                {
                                  "$ref": "#/definitions/ShellScopeEntryAllowedArgs"
                                }
                              ]
                            },
                            "name": {
                              "description": "The name for this allowed shell command configuration.\n\nThis name will be used inside of the webview API to call this command along with any specified arguments.",
                              "type": "string"
                            },
                            "sidecar": {
                              "description": "If this command is a sidecar command.",
                              "type": "boolean"
                            }
                          },
                          "additionalProperties": false
                        }
                      ]
                    }
                  }
                }
              },
              "properties": {
                "identifier": {
                  "description": "Identifier of the permission or permission set.",
                  "allOf": [
                    {
                      "$ref": "#/definitions/Identifier"
                    }
                  ]
                }
              }
            },
            {
              "properties": {
                "identifier": {
                  "description": "Identifier of the permission or permission set.",
                  "allOf": [
                    {
                      "$ref": "#/definitions/Identifier"
                    }
                  ]
                },
                "allow": {
                  "description": "Data that defines what is allowed by the scope.",
                  "type": [
                    "array",
                    "null"
                  ],
                  "items": {
                    "$ref": "#/definitions/Value"
                  }
                },
                "deny": {
                  "description": "Data that defines what is denied by the scope. This should be prioritized by validation logic.",
                  "type": [
                    "array",
                    "null"
                  ],
                  "items": {
                    "$ref": "#/definitions/Value"
                  }
                }
              }
            }
          ],
          "required": [
            "identifier"
          ]
        }
      ]
    },
    "Identifier": {
      "description": "Permission identifier",
      "oneOf": [
        {
          "description": "Default core plugins set.\n#### This default permission set includes:\n\n- `core:path:default`\n- `core:event:default`\n- `core:window:default`\n- `core:webview:default`\n- `core:app:default`\n- `core:image:default`\n- `core:resources:default`\n- `core:menu:default`\n- `core:tray:default`",
          "type": "string",
          "const": "core:default",
          "markdownDescription": "Default core plugins set.\n#### This default permission set includes:\n\n- `core:path:default`\n- `core:event:default`\n- `core:window:default`\n- `core:webview:default`\n- `core:app:default`\n- `core:image:default`\n- `core:resources:default`\n- `core:menu:default`\n- `core:tray:default`"
        },
        {
          "description": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-version`\n- `allow-name`\n- `allow-tauri-version`\n- `allow-identifier`\n- `allow-bundle-type`\n- `allow-register-listener`\n- `allow-remove-listener`",
          "type": "string",
          "const": "core:app:default",
          "markdownDescription": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-version`\n- `allow-name`\n- `allow-tauri-version`\n- `allow-identifier`\n- `allow-bundle-type`\n- `allow-register-listener`\n- `allow-remove-listener`"
        },
        {
          "description": "Enables the app_hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-app-hide",
          "markdownDescription": "Enables the app_hide command without any pre-configured scope."
        },
        {
          "description": "Enables the app_show command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-app-show",
          "markdownDescription": "Enables the app_show command without any pre-configured scope."
        },
        {
          "description": "Enables the bundle_type command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-bundle-type",
          "markdownDescription": "Enables the bundle_type command without any pre-configured scope."
        },
        {
          "description": "Enables the default_window_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-default-window-icon",
          "markdownDescription": "Enables the default_window_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the fetch_data_store_identifiers command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-fetch-data-store-identifiers",
          "markdownDescription": "Enables the fetch_data_store_identifiers command without any pre-configured scope."
        },
        {
          "description": "Enables the identifier command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-identifier",
          "markdownDescription": "Enables the identifier command without any pre-configured scope."
        },
        {
          "description": "Enables the name command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-name",
          "markdownDescription": "Enables the name command without any pre-configured scope."
        },
        {
          "description": "Enables the register_listener command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-register-listener",
          "markdownDescription": "Enables the register_listener command without any pre-configured scope."
        },
        {
          "description": "Enables the remove_data_store command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-remove-data-store",
          "markdownDescription": "Enables the remove_data_store command without any pre-configured scope."
        },
        {
          "description": "Enables the remove_listener command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-remove-listener",
          "markdownDescription": "Enables the remove_listener command without any pre-configured scope."
        },
        {
          "description": "Enables the set_app_theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-set-app-theme",
          "markdownDescription": "Enables the set_app_theme command without any pre-configured scope."
        },
        {
          "description": "Enables the set_dock_visibility command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-set-dock-visibility",
          "markdownDescription": "Enables the set_dock_visibility command without any pre-configured scope."
        },
        {
          "description": "Enables the tauri_version command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-tauri-version",
          "markdownDescription": "Enables the tauri_version command without any pre-configured scope."
        },
        {
          "description": "Enables the version command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:allow-version",
          "markdownDescription": "Enables the version command without any pre-configured scope."
        },
        {
          "description": "Denies the app_hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-app-hide",
          "markdownDescription": "Denies the app_hide command without any pre-configured scope."
        },
        {
          "description": "Denies the app_show command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-app-show",
          "markdownDescription": "Denies the app_show command without any pre-configured scope."
        },
        {
          "description": "Denies the bundle_type command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-bundle-type",
          "markdownDescription": "Denies the bundle_type command without any pre-configured scope."
        },
        {
          "description": "Denies the default_window_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-default-window-icon",
          "markdownDescription": "Denies the default_window_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the fetch_data_store_identifiers command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-fetch-data-store-identifiers",
          "markdownDescription": "Denies the fetch_data_store_identifiers command without any pre-configured scope."
        },
        {
          "description": "Denies the identifier command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-identifier",
          "markdownDescription": "Denies the identifier command without any pre-configured scope."
        },
        {
          "description": "Denies the name command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-name",
          "markdownDescription": "Denies the name command without any pre-configured scope."
        },
        {
          "description": "Denies the register_listener command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-register-listener",
          "markdownDescription": "Denies the register_listener command without any pre-configured scope."
        },
        {
          "description": "Denies the remove_data_store command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-remove-data-store",
          "markdownDescription": "Denies the remove_data_store command without any pre-configured scope."
        },
        {
          "description": "Denies the remove_listener command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-remove-listener",
          "markdownDescription": "Denies the remove_listener command without any pre-configured scope."
        },
        {
          "description": "Denies the set_app_theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-set-app-theme",
          "markdownDescription": "Denies the set_app_theme command without any pre-configured scope."
        },
        {
          "description": "Denies the set_dock_visibility command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-set-dock-visibility",
          "markdownDescription": "Denies the set_dock_visibility command without any pre-configured scope."
        },
        {
          "description": "Denies the tauri_version command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-tauri-version",
          "markdownDescription": "Denies the tauri_version command without any pre-configured scope."
        },
        {
          "description": "Denies the version command without any pre-configured scope.",
          "type": "string",
          "const": "core:app:deny-version",
          "markdownDescription": "Denies the version command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-listen`\n- `allow-unlisten`\n- `allow-emit`\n- `allow-emit-to`",
          "type": "string",
          "const": "core:event:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-listen`\n- `allow-unlisten`\n- `allow-emit`\n- `allow-emit-to`"
        },
        {
          "description": "Enables the emit command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:allow-emit",
          "markdownDescription": "Enables the emit command without any pre-configured scope."
        },
        {
          "description": "Enables the emit_to command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:allow-emit-to",
          "markdownDescription": "Enables the emit_to command without any pre-configured scope."
        },
        {
          "description": "Enables the listen command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:allow-listen",
          "markdownDescription": "Enables the listen command without any pre-configured scope."
        },
        {
          "description": "Enables the unlisten command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:allow-unlisten",
          "markdownDescription": "Enables the unlisten command without any pre-configured scope."
        },
        {
          "description": "Denies the emit command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:deny-emit",
          "markdownDescription": "Denies the emit command without any pre-configured scope."
        },
        {
          "description": "Denies the emit_to command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:deny-emit-to",
          "markdownDescription": "Denies the emit_to command without any pre-configured scope."
        },
        {
          "description": "Denies the listen command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:deny-listen",
          "markdownDescription": "Denies the listen command without any pre-configured scope."
        },
        {
          "description": "Denies the unlisten command without any pre-configured scope.",
          "type": "string",
          "const": "core:event:deny-unlisten",
          "markdownDescription": "Denies the unlisten command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-from-bytes`\n- `allow-from-path`\n- `allow-rgba`\n- `allow-size`",
          "type": "string",
          "const": "core:image:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-from-bytes`\n- `allow-from-path`\n- `allow-rgba`\n- `allow-size`"
        },
        {
          "description": "Enables the from_bytes command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-from-bytes",
          "markdownDescription": "Enables the from_bytes command without any pre-configured scope."
        },
        {
          "description": "Enables the from_path command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-from-path",
          "markdownDescription": "Enables the from_path command without any pre-configured scope."
        },
        {
          "description": "Enables the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-new",
          "markdownDescription": "Enables the new command without any pre-configured scope."
        },
        {
          "description": "Enables the rgba command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-rgba",
          "markdownDescription": "Enables the rgba command without any pre-configured scope."
        },
        {
          "description": "Enables the size command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:allow-size",
          "markdownDescription": "Enables the size command without any pre-configured scope."
        },
        {
          "description": "Denies the from_bytes command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-from-bytes",
          "markdownDescription": "Denies the from_bytes command without any pre-configured scope."
        },
        {
          "description": "Denies the from_path command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-from-path",
          "markdownDescription": "Denies the from_path command without any pre-configured scope."
        },
        {
          "description": "Denies the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-new",
          "markdownDescription": "Denies the new command without any pre-configured scope."
        },
        {
          "description": "Denies the rgba command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-rgba",
          "markdownDescription": "Denies the rgba command without any pre-configured scope."
        },
        {
          "description": "Denies the size command without any pre-configured scope.",
          "type": "string",
          "const": "core:image:deny-size",
          "markdownDescription": "Denies the size command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-append`\n- `allow-prepend`\n- `allow-insert`\n- `allow-remove`\n- `allow-remove-at`\n- `allow-items`\n- `allow-get`\n- `allow-popup`\n- `allow-create-default`\n- `allow-set-as-app-menu`\n- `allow-set-as-window-menu`\n- `allow-text`\n- `allow-set-text`\n- `allow-is-enabled`\n- `allow-set-enabled`\n- `allow-set-accelerator`\n- `allow-set-as-windows-menu-for-nsapp`\n- `allow-set-as-help-menu-for-nsapp`\n- `allow-is-checked`\n- `allow-set-checked`\n- `allow-set-icon`",
          "type": "string",
          "const": "core:menu:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-append`\n- `allow-prepend`\n- `allow-insert`\n- `allow-remove`\n- `allow-remove-at`\n- `allow-items`\n- `allow-get`\n- `allow-popup`\n- `allow-create-default`\n- `allow-set-as-app-menu`\n- `allow-set-as-window-menu`\n- `allow-text`\n- `allow-set-text`\n- `allow-is-enabled`\n- `allow-set-enabled`\n- `allow-set-accelerator`\n- `allow-set-as-windows-menu-for-nsapp`\n- `allow-set-as-help-menu-for-nsapp`\n- `allow-is-checked`\n- `allow-set-checked`\n- `allow-set-icon`"
        },
        {
          "description": "Enables the append command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-append",
          "markdownDescription": "Enables the append command without any pre-configured scope."
        },
        {
          "description": "Enables the create_default command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-create-default",
          "markdownDescription": "Enables the create_default command without any pre-configured scope."
        },
        {
          "description": "Enables the get command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-get",
          "markdownDescription": "Enables the get command without any pre-configured scope."
        },
        {
          "description": "Enables the insert command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-insert",
          "markdownDescription": "Enables the insert command without any pre-configured scope."
        },
        {
          "description": "Enables the is_checked command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-is-checked",
          "markdownDescription": "Enables the is_checked command without any pre-configured scope."
        },
        {
          "description": "Enables the is_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-is-enabled",
          "markdownDescription": "Enables the is_enabled command without any pre-configured scope."
        },
        {
          "description": "Enables the items command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-items",
          "markdownDescription": "Enables the items command without any pre-configured scope."
        },
        {
          "description": "Enables the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-new",
          "markdownDescription": "Enables the new command without any pre-configured scope."
        },
        {
          "description": "Enables the popup command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-popup",
          "markdownDescription": "Enables the popup command without any pre-configured scope."
        },
        {
          "description": "Enables the prepend command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-prepend",
          "markdownDescription": "Enables the prepend command without any pre-configured scope."
        },
        {
          "description": "Enables the remove command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-remove",
          "markdownDescription": "Enables the remove command without any pre-configured scope."
        },
        {
          "description": "Enables the remove_at command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-remove-at",
          "markdownDescription": "Enables the remove_at command without any pre-configured scope."
        },
        {
          "description": "Enables the set_accelerator command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-accelerator",
          "markdownDescription": "Enables the set_accelerator command without any pre-configured scope."
        },
        {
          "description": "Enables the set_as_app_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-as-app-menu",
          "markdownDescription": "Enables the set_as_app_menu command without any pre-configured scope."
        },
        {
          "description": "Enables the set_as_help_menu_for_nsapp command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-as-help-menu-for-nsapp",
          "markdownDescription": "Enables the set_as_help_menu_for_nsapp command without any pre-configured scope."
        },
        {
          "description": "Enables the set_as_window_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-as-window-menu",
          "markdownDescription": "Enables the set_as_window_menu command without any pre-configured scope."
        },
        {
          "description": "Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-as-windows-menu-for-nsapp",
          "markdownDescription": "Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope."
        },
        {
          "description": "Enables the set_checked command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-checked",
          "markdownDescription": "Enables the set_checked command without any pre-configured scope."
        },
        {
          "description": "Enables the set_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-enabled",
          "markdownDescription": "Enables the set_enabled command without any pre-configured scope."
        },
        {
          "description": "Enables the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-icon",
          "markdownDescription": "Enables the set_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_text command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-set-text",
          "markdownDescription": "Enables the set_text command without any pre-configured scope."
        },
        {
          "description": "Enables the text command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:allow-text",
          "markdownDescription": "Enables the text command without any pre-configured scope."
        },
        {
          "description": "Denies the append command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-append",
          "markdownDescription": "Denies the append command without any pre-configured scope."
        },
        {
          "description": "Denies the create_default command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-create-default",
          "markdownDescription": "Denies the create_default command without any pre-configured scope."
        },
        {
          "description": "Denies the get command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-get",
          "markdownDescription": "Denies the get command without any pre-configured scope."
        },
        {
          "description": "Denies the insert command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-insert",
          "markdownDescription": "Denies the insert command without any pre-configured scope."
        },
        {
          "description": "Denies the is_checked command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-is-checked",
          "markdownDescription": "Denies the is_checked command without any pre-configured scope."
        },
        {
          "description": "Denies the is_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-is-enabled",
          "markdownDescription": "Denies the is_enabled command without any pre-configured scope."
        },
        {
          "description": "Denies the items command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-items",
          "markdownDescription": "Denies the items command without any pre-configured scope."
        },
        {
          "description": "Denies the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-new",
          "markdownDescription": "Denies the new command without any pre-configured scope."
        },
        {
          "description": "Denies the popup command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-popup",
          "markdownDescription": "Denies the popup command without any pre-configured scope."
        },
        {
          "description": "Denies the prepend command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-prepend",
          "markdownDescription": "Denies the prepend command without any pre-configured scope."
        },
        {
          "description": "Denies the remove command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-remove",
          "markdownDescription": "Denies the remove command without any pre-configured scope."
        },
        {
          "description": "Denies the remove_at command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-remove-at",
          "markdownDescription": "Denies the remove_at command without any pre-configured scope."
        },
        {
          "description": "Denies the set_accelerator command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-accelerator",
          "markdownDescription": "Denies the set_accelerator command without any pre-configured scope."
        },
        {
          "description": "Denies the set_as_app_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-as-app-menu",
          "markdownDescription": "Denies the set_as_app_menu command without any pre-configured scope."
        },
        {
          "description": "Denies the set_as_help_menu_for_nsapp command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-as-help-menu-for-nsapp",
          "markdownDescription": "Denies the set_as_help_menu_for_nsapp command without any pre-configured scope."
        },
        {
          "description": "Denies the set_as_window_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-as-window-menu",
          "markdownDescription": "Denies the set_as_window_menu command without any pre-configured scope."
        },
        {
          "description": "Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-as-windows-menu-for-nsapp",
          "markdownDescription": "Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope."
        },
        {
          "description": "Denies the set_checked command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-checked",
          "markdownDescription": "Denies the set_checked command without any pre-configured scope."
        },
        {
          "description": "Denies the set_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-enabled",
          "markdownDescription": "Denies the set_enabled command without any pre-configured scope."
        },
        {
          "description": "Denies the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-icon",
          "markdownDescription": "Denies the set_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_text command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-set-text",
          "markdownDescription": "Denies the set_text command without any pre-configured scope."
        },
        {
          "description": "Denies the text command without any pre-configured scope.",
          "type": "string",
          "const": "core:menu:deny-text",
          "markdownDescription": "Denies the text command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-resolve-directory`\n- `allow-resolve`\n- `allow-normalize`\n- `allow-join`\n- `allow-dirname`\n- `allow-extname`\n- `allow-basename`\n- `allow-is-absolute`",
          "type": "string",
          "const": "core:path:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-resolve-directory`\n- `allow-resolve`\n- `allow-normalize`\n- `allow-join`\n- `allow-dirname`\n- `allow-extname`\n- `allow-basename`\n- `allow-is-absolute`"
        },
        {
          "description": "Enables the basename command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-basename",
          "markdownDescription": "Enables the basename command without any pre-configured scope."
        },
        {
          "description": "Enables the dirname command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-dirname",
          "markdownDescription": "Enables the dirname command without any pre-configured scope."
        },
        {
          "description": "Enables the extname command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-extname",
          "markdownDescription": "Enables the extname command without any pre-configured scope."
        },
        {
          "description": "Enables the is_absolute command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-is-absolute",
          "markdownDescription": "Enables the is_absolute command without any pre-configured scope."
        },
        {
          "description": "Enables the join command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-join",
          "markdownDescription": "Enables the join command without any pre-configured scope."
        },
        {
          "description": "Enables the normalize command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-normalize",
          "markdownDescription": "Enables the normalize command without any pre-configured scope."
        },
        {
          "description": "Enables the resolve command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-resolve",
          "markdownDescription": "Enables the resolve command without any pre-configured scope."
        },
        {
          "description": "Enables the resolve_directory command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:allow-resolve-directory",
          "markdownDescription": "Enables the resolve_directory command without any pre-configured scope."
        },
        {
          "description": "Denies the basename command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-basename",
          "markdownDescription": "Denies the basename command without any pre-configured scope."
        },
        {
          "description": "Denies the dirname command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-dirname",
          "markdownDescription": "Denies the dirname command without any pre-configured scope."
        },
        {
          "description": "Denies the extname command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-extname",
          "markdownDescription": "Denies the extname command without any pre-configured scope."
        },
        {
          "description": "Denies the is_absolute command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-is-absolute",
          "markdownDescription": "Denies the is_absolute command without any pre-configured scope."
        },
        {
          "description": "Denies the join command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-join",
          "markdownDescription": "Denies the join command without any pre-configured scope."
        },
        {
          "description": "Denies the normalize command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-normalize",
          "markdownDescription": "Denies the normalize command without any pre-configured scope."
        },
        {
          "description": "Denies the resolve command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-resolve",
          "markdownDescription": "Denies the resolve command without any pre-configured scope."
        },
        {
          "description": "Denies the resolve_directory command without any pre-configured scope.",
          "type": "string",
          "const": "core:path:deny-resolve-directory",
          "markdownDescription": "Denies the resolve_directory command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-close`",
          "type": "string",
          "const": "core:resources:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-close`"
        },
        {
          "description": "Enables the close command without any pre-configured scope.",
          "type": "string",
          "const": "core:resources:allow-close",
          "markdownDescription": "Enables the close command without any pre-configured scope."
        },
        {
          "description": "Denies the close command without any pre-configured scope.",
          "type": "string",
          "const": "core:resources:deny-close",
          "markdownDescription": "Denies the close command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-get-by-id`\n- `allow-remove-by-id`\n- `allow-set-icon`\n- `allow-set-menu`\n- `allow-set-tooltip`\n- `allow-set-title`\n- `allow-set-visible`\n- `allow-set-temp-dir-path`\n- `allow-set-icon-as-template`\n- `allow-set-show-menu-on-left-click`",
          "type": "string",
          "const": "core:tray:default",
          "markdownDescription": "Default permissions for the plugin, which enables all commands.\n#### This default permission set includes:\n\n- `allow-new`\n- `allow-get-by-id`\n- `allow-remove-by-id`\n- `allow-set-icon`\n- `allow-set-menu`\n- `allow-set-tooltip`\n- `allow-set-title`\n- `allow-set-visible`\n- `allow-set-temp-dir-path`\n- `allow-set-icon-as-template`\n- `allow-set-show-menu-on-left-click`"
        },
        {
          "description": "Enables the get_by_id command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-get-by-id",
          "markdownDescription": "Enables the get_by_id command without any pre-configured scope."
        },
        {
          "description": "Enables the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-new",
          "markdownDescription": "Enables the new command without any pre-configured scope."
        },
        {
          "description": "Enables the remove_by_id command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-remove-by-id",
          "markdownDescription": "Enables the remove_by_id command without any pre-configured scope."
        },
        {
          "description": "Enables the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-icon",
          "markdownDescription": "Enables the set_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_icon_as_template command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-icon-as-template",
          "markdownDescription": "Enables the set_icon_as_template command without any pre-configured scope."
        },
        {
          "description": "Enables the set_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-menu",
          "markdownDescription": "Enables the set_menu command without any pre-configured scope."
        },
        {
          "description": "Enables the set_show_menu_on_left_click command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-show-menu-on-left-click",
          "markdownDescription": "Enables the set_show_menu_on_left_click command without any pre-configured scope."
        },
        {
          "description": "Enables the set_temp_dir_path command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-temp-dir-path",
          "markdownDescription": "Enables the set_temp_dir_path command without any pre-configured scope."
        },
        {
          "description": "Enables the set_title command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-title",
          "markdownDescription": "Enables the set_title command without any pre-configured scope."
        },
        {
          "description": "Enables the set_tooltip command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-tooltip",
          "markdownDescription": "Enables the set_tooltip command without any pre-configured scope."
        },
        {
          "description": "Enables the set_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:allow-set-visible",
          "markdownDescription": "Enables the set_visible command without any pre-configured scope."
        },
        {
          "description": "Denies the get_by_id command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-get-by-id",
          "markdownDescription": "Denies the get_by_id command without any pre-configured scope."
        },
        {
          "description": "Denies the new command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-new",
          "markdownDescription": "Denies the new command without any pre-configured scope."
        },
        {
          "description": "Denies the remove_by_id command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-remove-by-id",
          "markdownDescription": "Denies the remove_by_id command without any pre-configured scope."
        },
        {
          "description": "Denies the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-icon",
          "markdownDescription": "Denies the set_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_icon_as_template command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-icon-as-template",
          "markdownDescription": "Denies the set_icon_as_template command without any pre-configured scope."
        },
        {
          "description": "Denies the set_menu command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-menu",
          "markdownDescription": "Denies the set_menu command without any pre-configured scope."
        },
        {
          "description": "Denies the set_show_menu_on_left_click command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-show-menu-on-left-click",
          "markdownDescription": "Denies the set_show_menu_on_left_click command without any pre-configured scope."
        },
        {
          "description": "Denies the set_temp_dir_path command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-temp-dir-path",
          "markdownDescription": "Denies the set_temp_dir_path command without any pre-configured scope."
        },
        {
          "description": "Denies the set_title command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-title",
          "markdownDescription": "Denies the set_title command without any pre-configured scope."
        },
        {
          "description": "Denies the set_tooltip command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-tooltip",
          "markdownDescription": "Denies the set_tooltip command without any pre-configured scope."
        },
        {
          "description": "Denies the set_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:tray:deny-set-visible",
          "markdownDescription": "Denies the set_visible command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-get-all-webviews`\n- `allow-webview-position`\n- `allow-webview-size`\n- `allow-internal-toggle-devtools`",
          "type": "string",
          "const": "core:webview:default",
          "markdownDescription": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-get-all-webviews`\n- `allow-webview-position`\n- `allow-webview-size`\n- `allow-internal-toggle-devtools`"
        },
        {
          "description": "Enables the clear_all_browsing_data command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-clear-all-browsing-data",
          "markdownDescription": "Enables the clear_all_browsing_data command without any pre-configured scope."
        },
        {
          "description": "Enables the create_webview command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-create-webview",
          "markdownDescription": "Enables the create_webview command without any pre-configured scope."
        },
        {
          "description": "Enables the create_webview_window command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-create-webview-window",
          "markdownDescription": "Enables the create_webview_window command without any pre-configured scope."
        },
        {
          "description": "Enables the get_all_webviews command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-get-all-webviews",
          "markdownDescription": "Enables the get_all_webviews command without any pre-configured scope."
        },
        {
          "description": "Enables the internal_toggle_devtools command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-internal-toggle-devtools",
          "markdownDescription": "Enables the internal_toggle_devtools command without any pre-configured scope."
        },
        {
          "description": "Enables the print command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-print",
          "markdownDescription": "Enables the print command without any pre-configured scope."
        },
        {
          "description": "Enables the reparent command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-reparent",
          "markdownDescription": "Enables the reparent command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_auto_resize command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-auto-resize",
          "markdownDescription": "Enables the set_webview_auto_resize command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_background_color command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-background-color",
          "markdownDescription": "Enables the set_webview_background_color command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_focus command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-focus",
          "markdownDescription": "Enables the set_webview_focus command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-position",
          "markdownDescription": "Enables the set_webview_position command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-size",
          "markdownDescription": "Enables the set_webview_size command without any pre-configured scope."
        },
        {
          "description": "Enables the set_webview_zoom command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-set-webview-zoom",
          "markdownDescription": "Enables the set_webview_zoom command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_close command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-close",
          "markdownDescription": "Enables the webview_close command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-hide",
          "markdownDescription": "Enables the webview_hide command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-position",
          "markdownDescription": "Enables the webview_position command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_show command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-show",
          "markdownDescription": "Enables the webview_show command without any pre-configured scope."
        },
        {
          "description": "Enables the webview_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:allow-webview-size",
          "markdownDescription": "Enables the webview_size command without any pre-configured scope."
        },
        {
          "description": "Denies the clear_all_browsing_data command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-clear-all-browsing-data",
          "markdownDescription": "Denies the clear_all_browsing_data command without any pre-configured scope."
        },
        {
          "description": "Denies the create_webview command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-create-webview",
          "markdownDescription": "Denies the create_webview command without any pre-configured scope."
        },
        {
          "description": "Denies the create_webview_window command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-create-webview-window",
          "markdownDescription": "Denies the create_webview_window command without any pre-configured scope."
        },
        {
          "description": "Denies the get_all_webviews command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-get-all-webviews",
          "markdownDescription": "Denies the get_all_webviews command without any pre-configured scope."
        },
        {
          "description": "Denies the internal_toggle_devtools command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-internal-toggle-devtools",
          "markdownDescription": "Denies the internal_toggle_devtools command without any pre-configured scope."
        },
        {
          "description": "Denies the print command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-print",
          "markdownDescription": "Denies the print command without any pre-configured scope."
        },
        {
          "description": "Denies the reparent command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-reparent",
          "markdownDescription": "Denies the reparent command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_auto_resize command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-auto-resize",
          "markdownDescription": "Denies the set_webview_auto_resize command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_background_color command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-background-color",
          "markdownDescription": "Denies the set_webview_background_color command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_focus command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-focus",
          "markdownDescription": "Denies the set_webview_focus command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-position",
          "markdownDescription": "Denies the set_webview_position command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-size",
          "markdownDescription": "Denies the set_webview_size command without any pre-configured scope."
        },
        {
          "description": "Denies the set_webview_zoom command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-set-webview-zoom",
          "markdownDescription": "Denies the set_webview_zoom command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_close command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-close",
          "markdownDescription": "Denies the webview_close command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-hide",
          "markdownDescription": "Denies the webview_hide command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-position",
          "markdownDescription": "Denies the webview_position command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_show command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-show",
          "markdownDescription": "Denies the webview_show command without any pre-configured scope."
        },
        {
          "description": "Denies the webview_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:webview:deny-webview-size",
          "markdownDescription": "Denies the webview_size command without any pre-configured scope."
        },
        {
          "description": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-get-all-windows`\n- `allow-scale-factor`\n- `allow-inner-position`\n- `allow-outer-position`\n- `allow-inner-size`\n- `allow-outer-size`\n- `allow-is-fullscreen`\n- `allow-is-minimized`\n- `allow-is-maximized`\n- `allow-is-focused`\n- `allow-is-decorated`\n- `allow-is-resizable`\n- `allow-is-maximizable`\n- `allow-is-minimizable`\n- `allow-is-closable`\n- `allow-is-visible`\n- `allow-is-enabled`\n- `allow-title`\n- `allow-current-monitor`\n- `allow-primary-monitor`\n- `allow-monitor-from-point`\n- `allow-available-monitors`\n- `allow-cursor-position`\n- `allow-theme`\n- `allow-is-always-on-top`\n- `allow-internal-toggle-maximize`",
          "type": "string",
          "const": "core:window:default",
          "markdownDescription": "Default permissions for the plugin.\n#### This default permission set includes:\n\n- `allow-get-all-windows`\n- `allow-scale-factor`\n- `allow-inner-position`\n- `allow-outer-position`\n- `allow-inner-size`\n- `allow-outer-size`\n- `allow-is-fullscreen`\n- `allow-is-minimized`\n- `allow-is-maximized`\n- `allow-is-focused`\n- `allow-is-decorated`\n- `allow-is-resizable`\n- `allow-is-maximizable`\n- `allow-is-minimizable`\n- `allow-is-closable`\n- `allow-is-visible`\n- `allow-is-enabled`\n- `allow-title`\n- `allow-current-monitor`\n- `allow-primary-monitor`\n- `allow-monitor-from-point`\n- `allow-available-monitors`\n- `allow-cursor-position`\n- `allow-theme`\n- `allow-is-always-on-top`\n- `allow-internal-toggle-maximize`"
        },
        {
          "description": "Enables the available_monitors command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-available-monitors",
          "markdownDescription": "Enables the available_monitors command without any pre-configured scope."
        },
        {
          "description": "Enables the center command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-center",
          "markdownDescription": "Enables the center command without any pre-configured scope."
        },
        {
          "description": "Enables the close command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-close",
          "markdownDescription": "Enables the close command without any pre-configured scope."
        },
        {
          "description": "Enables the create command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-create",
          "markdownDescription": "Enables the create command without any pre-configured scope."
        },
        {
          "description": "Enables the current_monitor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-current-monitor",
          "markdownDescription": "Enables the current_monitor command without any pre-configured scope."
        },
        {
          "description": "Enables the cursor_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-cursor-position",
          "markdownDescription": "Enables the cursor_position command without any pre-configured scope."
        },
        {
          "description": "Enables the destroy command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-destroy",
          "markdownDescription": "Enables the destroy command without any pre-configured scope."
        },
        {
          "description": "Enables the get_all_windows command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-get-all-windows",
          "markdownDescription": "Enables the get_all_windows command without any pre-configured scope."
        },
        {
          "description": "Enables the hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-hide",
          "markdownDescription": "Enables the hide command without any pre-configured scope."
        },
        {
          "description": "Enables the inner_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-inner-position",
          "markdownDescription": "Enables the inner_position command without any pre-configured scope."
        },
        {
          "description": "Enables the inner_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-inner-size",
          "markdownDescription": "Enables the inner_size command without any pre-configured scope."
        },
        {
          "description": "Enables the internal_toggle_maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-internal-toggle-maximize",
          "markdownDescription": "Enables the internal_toggle_maximize command without any pre-configured scope."
        },
        {
          "description": "Enables the is_always_on_top command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-always-on-top",
          "markdownDescription": "Enables the is_always_on_top command without any pre-configured scope."
        },
        {
          "description": "Enables the is_closable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-closable",
          "markdownDescription": "Enables the is_closable command without any pre-configured scope."
        },
        {
          "description": "Enables the is_decorated command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-decorated",
          "markdownDescription": "Enables the is_decorated command without any pre-configured scope."
        },
        {
          "description": "Enables the is_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-enabled",
          "markdownDescription": "Enables the is_enabled command without any pre-configured scope."
        },
        {
          "description": "Enables the is_focused command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-focused",
          "markdownDescription": "Enables the is_focused command without any pre-configured scope."
        },
        {
          "description": "Enables the is_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-fullscreen",
          "markdownDescription": "Enables the is_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Enables the is_maximizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-maximizable",
          "markdownDescription": "Enables the is_maximizable command without any pre-configured scope."
        },
        {
          "description": "Enables the is_maximized command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-maximized",
          "markdownDescription": "Enables the is_maximized command without any pre-configured scope."
        },
        {
          "description": "Enables the is_minimizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-minimizable",
          "markdownDescription": "Enables the is_minimizable command without any pre-configured scope."
        },
        {
          "description": "Enables the is_minimized command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-minimized",
          "markdownDescription": "Enables the is_minimized command without any pre-configured scope."
        },
        {
          "description": "Enables the is_resizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-resizable",
          "markdownDescription": "Enables the is_resizable command without any pre-configured scope."
        },
        {
          "description": "Enables the is_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-is-visible",
          "markdownDescription": "Enables the is_visible command without any pre-configured scope."
        },
        {
          "description": "Enables the maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-maximize",
          "markdownDescription": "Enables the maximize command without any pre-configured scope."
        },
        {
          "description": "Enables the minimize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-minimize",
          "markdownDescription": "Enables the minimize command without any pre-configured scope."
        },
        {
          "description": "Enables the monitor_from_point command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-monitor-from-point",
          "markdownDescription": "Enables the monitor_from_point command without any pre-configured scope."
        },
        {
          "description": "Enables the outer_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-outer-position",
          "markdownDescription": "Enables the outer_position command without any pre-configured scope."
        },
        {
          "description": "Enables the outer_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-outer-size",
          "markdownDescription": "Enables the outer_size command without any pre-configured scope."
        },
        {
          "description": "Enables the primary_monitor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-primary-monitor",
          "markdownDescription": "Enables the primary_monitor command without any pre-configured scope."
        },
        {
          "description": "Enables the request_user_attention command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-request-user-attention",
          "markdownDescription": "Enables the request_user_attention command without any pre-configured scope."
        },
        {
          "description": "Enables the scale_factor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-scale-factor",
          "markdownDescription": "Enables the scale_factor command without any pre-configured scope."
        },
        {
          "description": "Enables the set_always_on_bottom command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-always-on-bottom",
          "markdownDescription": "Enables the set_always_on_bottom command without any pre-configured scope."
        },
        {
          "description": "Enables the set_always_on_top command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-always-on-top",
          "markdownDescription": "Enables the set_always_on_top command without any pre-configured scope."
        },
        {
          "description": "Enables the set_background_color command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-background-color",
          "markdownDescription": "Enables the set_background_color command without any pre-configured scope."
        },
        {
          "description": "Enables the set_badge_count command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-badge-count",
          "markdownDescription": "Enables the set_badge_count command without any pre-configured scope."
        },
        {
          "description": "Enables the set_badge_label command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-badge-label",
          "markdownDescription": "Enables the set_badge_label command without any pre-configured scope."
        },
        {
          "description": "Enables the set_closable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-closable",
          "markdownDescription": "Enables the set_closable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_content_protected command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-content-protected",
          "markdownDescription": "Enables the set_content_protected command without any pre-configured scope."
        },
        {
          "description": "Enables the set_cursor_grab command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-cursor-grab",
          "markdownDescription": "Enables the set_cursor_grab command without any pre-configured scope."
        },
        {
          "description": "Enables the set_cursor_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-cursor-icon",
          "markdownDescription": "Enables the set_cursor_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_cursor_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-cursor-position",
          "markdownDescription": "Enables the set_cursor_position command without any pre-configured scope."
        },
        {
          "description": "Enables the set_cursor_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-cursor-visible",
          "markdownDescription": "Enables the set_cursor_visible command without any pre-configured scope."
        },
        {
          "description": "Enables the set_decorations command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-decorations",
          "markdownDescription": "Enables the set_decorations command without any pre-configured scope."
        },
        {
          "description": "Enables the set_effects command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-effects",
          "markdownDescription": "Enables the set_effects command without any pre-configured scope."
        },
        {
          "description": "Enables the set_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-enabled",
          "markdownDescription": "Enables the set_enabled command without any pre-configured scope."
        },
        {
          "description": "Enables the set_focus command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-focus",
          "markdownDescription": "Enables the set_focus command without any pre-configured scope."
        },
        {
          "description": "Enables the set_focusable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-focusable",
          "markdownDescription": "Enables the set_focusable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-fullscreen",
          "markdownDescription": "Enables the set_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Enables the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-icon",
          "markdownDescription": "Enables the set_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_ignore_cursor_events command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-ignore-cursor-events",
          "markdownDescription": "Enables the set_ignore_cursor_events command without any pre-configured scope."
        },
        {
          "description": "Enables the set_max_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-max-size",
          "markdownDescription": "Enables the set_max_size command without any pre-configured scope."
        },
        {
          "description": "Enables the set_maximizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-maximizable",
          "markdownDescription": "Enables the set_maximizable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_min_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-min-size",
          "markdownDescription": "Enables the set_min_size command without any pre-configured scope."
        },
        {
          "description": "Enables the set_minimizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-minimizable",
          "markdownDescription": "Enables the set_minimizable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_overlay_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-overlay-icon",
          "markdownDescription": "Enables the set_overlay_icon command without any pre-configured scope."
        },
        {
          "description": "Enables the set_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-position",
          "markdownDescription": "Enables the set_position command without any pre-configured scope."
        },
        {
          "description": "Enables the set_progress_bar command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-progress-bar",
          "markdownDescription": "Enables the set_progress_bar command without any pre-configured scope."
        },
        {
          "description": "Enables the set_resizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-resizable",
          "markdownDescription": "Enables the set_resizable command without any pre-configured scope."
        },
        {
          "description": "Enables the set_shadow command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-shadow",
          "markdownDescription": "Enables the set_shadow command without any pre-configured scope."
        },
        {
          "description": "Enables the set_simple_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-simple-fullscreen",
          "markdownDescription": "Enables the set_simple_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Enables the set_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-size",
          "markdownDescription": "Enables the set_size command without any pre-configured scope."
        },
        {
          "description": "Enables the set_size_constraints command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-size-constraints",
          "markdownDescription": "Enables the set_size_constraints command without any pre-configured scope."
        },
        {
          "description": "Enables the set_skip_taskbar command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-skip-taskbar",
          "markdownDescription": "Enables the set_skip_taskbar command without any pre-configured scope."
        },
        {
          "description": "Enables the set_theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-theme",
          "markdownDescription": "Enables the set_theme command without any pre-configured scope."
        },
        {
          "description": "Enables the set_title command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-title",
          "markdownDescription": "Enables the set_title command without any pre-configured scope."
        },
        {
          "description": "Enables the set_title_bar_style command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-title-bar-style",
          "markdownDescription": "Enables the set_title_bar_style command without any pre-configured scope."
        },
        {
          "description": "Enables the set_visible_on_all_workspaces command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-set-visible-on-all-workspaces",
          "markdownDescription": "Enables the set_visible_on_all_workspaces command without any pre-configured scope."
        },
        {
          "description": "Enables the show command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-show",
          "markdownDescription": "Enables the show command without any pre-configured scope."
        },
        {
          "description": "Enables the start_dragging command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-start-dragging",
          "markdownDescription": "Enables the start_dragging command without any pre-configured scope."
        },
        {
          "description": "Enables the start_resize_dragging command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-start-resize-dragging",
          "markdownDescription": "Enables the start_resize_dragging command without any pre-configured scope."
        },
        {
          "description": "Enables the theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-theme",
          "markdownDescription": "Enables the theme command without any pre-configured scope."
        },
        {
          "description": "Enables the title command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-title",
          "markdownDescription": "Enables the title command without any pre-configured scope."
        },
        {
          "description": "Enables the toggle_maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-toggle-maximize",
          "markdownDescription": "Enables the toggle_maximize command without any pre-configured scope."
        },
        {
          "description": "Enables the unmaximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-unmaximize",
          "markdownDescription": "Enables the unmaximize command without any pre-configured scope."
        },
        {
          "description": "Enables the unminimize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:allow-unminimize",
          "markdownDescription": "Enables the unminimize command without any pre-configured scope."
        },
        {
          "description": "Denies the available_monitors command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-available-monitors",
          "markdownDescription": "Denies the available_monitors command without any pre-configured scope."
        },
        {
          "description": "Denies the center command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-center",
          "markdownDescription": "Denies the center command without any pre-configured scope."
        },
        {
          "description": "Denies the close command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-close",
          "markdownDescription": "Denies the close command without any pre-configured scope."
        },
        {
          "description": "Denies the create command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-create",
          "markdownDescription": "Denies the create command without any pre-configured scope."
        },
        {
          "description": "Denies the current_monitor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-current-monitor",
          "markdownDescription": "Denies the current_monitor command without any pre-configured scope."
        },
        {
          "description": "Denies the cursor_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-cursor-position",
          "markdownDescription": "Denies the cursor_position command without any pre-configured scope."
        },
        {
          "description": "Denies the destroy command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-destroy",
          "markdownDescription": "Denies the destroy command without any pre-configured scope."
        },
        {
          "description": "Denies the get_all_windows command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-get-all-windows",
          "markdownDescription": "Denies the get_all_windows command without any pre-configured scope."
        },
        {
          "description": "Denies the hide command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-hide",
          "markdownDescription": "Denies the hide command without any pre-configured scope."
        },
        {
          "description": "Denies the inner_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-inner-position",
          "markdownDescription": "Denies the inner_position command without any pre-configured scope."
        },
        {
          "description": "Denies the inner_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-inner-size",
          "markdownDescription": "Denies the inner_size command without any pre-configured scope."
        },
        {
          "description": "Denies the internal_toggle_maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-internal-toggle-maximize",
          "markdownDescription": "Denies the internal_toggle_maximize command without any pre-configured scope."
        },
        {
          "description": "Denies the is_always_on_top command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-always-on-top",
          "markdownDescription": "Denies the is_always_on_top command without any pre-configured scope."
        },
        {
          "description": "Denies the is_closable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-closable",
          "markdownDescription": "Denies the is_closable command without any pre-configured scope."
        },
        {
          "description": "Denies the is_decorated command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-decorated",
          "markdownDescription": "Denies the is_decorated command without any pre-configured scope."
        },
        {
          "description": "Denies the is_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-enabled",
          "markdownDescription": "Denies the is_enabled command without any pre-configured scope."
        },
        {
          "description": "Denies the is_focused command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-focused",
          "markdownDescription": "Denies the is_focused command without any pre-configured scope."
        },
        {
          "description": "Denies the is_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-fullscreen",
          "markdownDescription": "Denies the is_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Denies the is_maximizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-maximizable",
          "markdownDescription": "Denies the is_maximizable command without any pre-configured scope."
        },
        {
          "description": "Denies the is_maximized command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-maximized",
          "markdownDescription": "Denies the is_maximized command without any pre-configured scope."
        },
        {
          "description": "Denies the is_minimizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-minimizable",
          "markdownDescription": "Denies the is_minimizable command without any pre-configured scope."
        },
        {
          "description": "Denies the is_minimized command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-minimized",
          "markdownDescription": "Denies the is_minimized command without any pre-configured scope."
        },
        {
          "description": "Denies the is_resizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-resizable",
          "markdownDescription": "Denies the is_resizable command without any pre-configured scope."
        },
        {
          "description": "Denies the is_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-is-visible",
          "markdownDescription": "Denies the is_visible command without any pre-configured scope."
        },
        {
          "description": "Denies the maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-maximize",
          "markdownDescription": "Denies the maximize command without any pre-configured scope."
        },
        {
          "description": "Denies the minimize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-minimize",
          "markdownDescription": "Denies the minimize command without any pre-configured scope."
        },
        {
          "description": "Denies the monitor_from_point command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-monitor-from-point",
          "markdownDescription": "Denies the monitor_from_point command without any pre-configured scope."
        },
        {
          "description": "Denies the outer_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-outer-position",
          "markdownDescription": "Denies the outer_position command without any pre-configured scope."
        },
        {
          "description": "Denies the outer_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-outer-size",
          "markdownDescription": "Denies the outer_size command without any pre-configured scope."
        },
        {
          "description": "Denies the primary_monitor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-primary-monitor",
          "markdownDescription": "Denies the primary_monitor command without any pre-configured scope."
        },
        {
          "description": "Denies the request_user_attention command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-request-user-attention",
          "markdownDescription": "Denies the request_user_attention command without any pre-configured scope."
        },
        {
          "description": "Denies the scale_factor command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-scale-factor",
          "markdownDescription": "Denies the scale_factor command without any pre-configured scope."
        },
        {
          "description": "Denies the set_always_on_bottom command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-always-on-bottom",
          "markdownDescription": "Denies the set_always_on_bottom command without any pre-configured scope."
        },
        {
          "description": "Denies the set_always_on_top command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-always-on-top",
          "markdownDescription": "Denies the set_always_on_top command without any pre-configured scope."
        },
        {
          "description": "Denies the set_background_color command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-background-color",
          "markdownDescription": "Denies the set_background_color command without any pre-configured scope."
        },
        {
          "description": "Denies the set_badge_count command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-badge-count",
          "markdownDescription": "Denies the set_badge_count command without any pre-configured scope."
        },
        {
          "description": "Denies the set_badge_label command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-badge-label",
          "markdownDescription": "Denies the set_badge_label command without any pre-configured scope."
        },
        {
          "description": "Denies the set_closable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-closable",
          "markdownDescription": "Denies the set_closable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_content_protected command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-content-protected",
          "markdownDescription": "Denies the set_content_protected command without any pre-configured scope."
        },
        {
          "description": "Denies the set_cursor_grab command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-cursor-grab",
          "markdownDescription": "Denies the set_cursor_grab command without any pre-configured scope."
        },
        {
          "description": "Denies the set_cursor_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-cursor-icon",
          "markdownDescription": "Denies the set_cursor_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_cursor_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-cursor-position",
          "markdownDescription": "Denies the set_cursor_position command without any pre-configured scope."
        },
        {
          "description": "Denies the set_cursor_visible command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-cursor-visible",
          "markdownDescription": "Denies the set_cursor_visible command without any pre-configured scope."
        },
        {
          "description": "Denies the set_decorations command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-decorations",
          "markdownDescription": "Denies the set_decorations command without any pre-configured scope."
        },
        {
          "description": "Denies the set_effects command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-effects",
          "markdownDescription": "Denies the set_effects command without any pre-configured scope."
        },
        {
          "description": "Denies the set_enabled command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-enabled",
          "markdownDescription": "Denies the set_enabled command without any pre-configured scope."
        },
        {
          "description": "Denies the set_focus command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-focus",
          "markdownDescription": "Denies the set_focus command without any pre-configured scope."
        },
        {
          "description": "Denies the set_focusable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-focusable",
          "markdownDescription": "Denies the set_focusable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-fullscreen",
          "markdownDescription": "Denies the set_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Denies the set_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-icon",
          "markdownDescription": "Denies the set_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_ignore_cursor_events command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-ignore-cursor-events",
          "markdownDescription": "Denies the set_ignore_cursor_events command without any pre-configured scope."
        },
        {
          "description": "Denies the set_max_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-max-size",
          "markdownDescription": "Denies the set_max_size command without any pre-configured scope."
        },
        {
          "description": "Denies the set_maximizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-maximizable",
          "markdownDescription": "Denies the set_maximizable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_min_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-min-size",
          "markdownDescription": "Denies the set_min_size command without any pre-configured scope."
        },
        {
          "description": "Denies the set_minimizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-minimizable",
          "markdownDescription": "Denies the set_minimizable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_overlay_icon command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-overlay-icon",
          "markdownDescription": "Denies the set_overlay_icon command without any pre-configured scope."
        },
        {
          "description": "Denies the set_position command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-position",
          "markdownDescription": "Denies the set_position command without any pre-configured scope."
        },
        {
          "description": "Denies the set_progress_bar command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-progress-bar",
          "markdownDescription": "Denies the set_progress_bar command without any pre-configured scope."
        },
        {
          "description": "Denies the set_resizable command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-resizable",
          "markdownDescription": "Denies the set_resizable command without any pre-configured scope."
        },
        {
          "description": "Denies the set_shadow command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-shadow",
          "markdownDescription": "Denies the set_shadow command without any pre-configured scope."
        },
        {
          "description": "Denies the set_simple_fullscreen command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-simple-fullscreen",
          "markdownDescription": "Denies the set_simple_fullscreen command without any pre-configured scope."
        },
        {
          "description": "Denies the set_size command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-size",
          "markdownDescription": "Denies the set_size command without any pre-configured scope."
        },
        {
          "description": "Denies the set_size_constraints command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-size-constraints",
          "markdownDescription": "Denies the set_size_constraints command without any pre-configured scope."
        },
        {
          "description": "Denies the set_skip_taskbar command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-skip-taskbar",
          "markdownDescription": "Denies the set_skip_taskbar command without any pre-configured scope."
        },
        {
          "description": "Denies the set_theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-theme",
          "markdownDescription": "Denies the set_theme command without any pre-configured scope."
        },
        {
          "description": "Denies the set_title command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-title",
          "markdownDescription": "Denies the set_title command without any pre-configured scope."
        },
        {
          "description": "Denies the set_title_bar_style command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-title-bar-style",
          "markdownDescription": "Denies the set_title_bar_style command without any pre-configured scope."
        },
        {
          "description": "Denies the set_visible_on_all_workspaces command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-set-visible-on-all-workspaces",
          "markdownDescription": "Denies the set_visible_on_all_workspaces command without any pre-configured scope."
        },
        {
          "description": "Denies the show command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-show",
          "markdownDescription": "Denies the show command without any pre-configured scope."
        },
        {
          "description": "Denies the start_dragging command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-start-dragging",
          "markdownDescription": "Denies the start_dragging command without any pre-configured scope."
        },
        {
          "description": "Denies the start_resize_dragging command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-start-resize-dragging",
          "markdownDescription": "Denies the start_resize_dragging command without any pre-configured scope."
        },
        {
          "description": "Denies the theme command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-theme",
          "markdownDescription": "Denies the theme command without any pre-configured scope."
        },
        {
          "description": "Denies the title command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-title",
          "markdownDescription": "Denies the title command without any pre-configured scope."
        },
        {
          "description": "Denies the toggle_maximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-toggle-maximize",
          "markdownDescription": "Denies the toggle_maximize command without any pre-configured scope."
        },
        {
          "description": "Denies the unmaximize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-unmaximize",
          "markdownDescription": "Denies the unmaximize command without any pre-configured scope."
        },
        {
          "description": "Denies the unminimize command without any pre-configured scope.",
          "type": "string",
          "const": "core:window:deny-unminimize",
          "markdownDescription": "Denies the unminimize command without any pre-configured scope."
        },
        {
          "description": "This permission set configures which\nshell functionality is exposed by default.\n\n#### Granted Permissions\n\nIt allows to use the `open` functionality with a reasonable\nscope pre-configured. It will allow opening `http(s)://`,\n`tel:` and `mailto:` links.\n\n#### This default permission set includes:\n\n- `allow-open`",
          "type": "string",
          "const": "shell:default",
          "markdownDescription": "This permission set configures which\nshell functionality is exposed by default.\n\n#### Granted Permissions\n\nIt allows to use the `open` functionality with a reasonable\nscope pre-configured. It will allow opening `http(s)://`,\n`tel:` and `mailto:` links.\n\n#### This default permission set includes:\n\n- `allow-open`"
        },
        {
          "description": "Enables the execute command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-execute",
          "markdownDescription": "Enables the execute command without any pre-configured scope."
        },
        {
          "description": "Enables the kill command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-kill",
          "markdownDescription": "Enables the kill command without any pre-configured scope."
        },
        {
          "description": "Enables the open command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-open",
          "markdownDescription": "Enables the open command without any pre-configured scope."
        },
        {
          "description": "Enables the spawn command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-spawn",
          "markdownDescription": "Enables the spawn command without any pre-configured scope."
        },
        {
          "description": "Enables the stdin_write command without any pre-configured scope.",
          "type": "string",
          "const": "shell:allow-stdin-write",
          "markdownDescription": "Enables the stdin_write command without any pre-configured scope."
        },
        {
          "description": "Denies the execute command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-execute",
          "markdownDescription": "Denies the execute command without any pre-configured scope."
        },
        {
          "description": "Denies the kill command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-kill",
          "markdownDescription": "Denies the kill command without any pre-configured scope."
        },
        {
          "description": "Denies the open command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-open",
          "markdownDescription": "Denies the open command without any pre-configured scope."
        },
        {
          "description": "Denies the spawn command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-spawn",
          "markdownDescription": "Denies the spawn command without any pre-configured scope."
        },
        {
          "description": "Denies the stdin_write command without any pre-configured scope.",
          "type": "string",
          "const": "shell:deny-stdin-write",
          "markdownDescription": "Denies the stdin_write command without any pre-configured scope."
        },
        {
          "description": "This permission set configures what kind of\noperations are available from the window state plugin.\n\n#### Granted Permissions\n\nAll operations are enabled by default.\n\n\n#### This default permission set includes:\n\n- `allow-filename`\n- `allow-restore-state`\n- `allow-save-window-state`",
          "type": "string",
          "const": "window-state:default",
          "markdownDescription": "This permission set configures what kind of\noperations are available from the window state plugin.\n\n#### Granted Permissions\n\nAll operations are enabled by default.\n\n\n#### This default permission set includes:\n\n- `allow-filename`\n- `allow-restore-state`\n- `allow-save-window-state`"
        },
        {
          "description": "Enables the filename command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:allow-filename",
          "markdownDescription": "Enables the filename command without any pre-configured scope."
        },
        {
          "description": "Enables the restore_state command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:allow-restore-state",
          "markdownDescription": "Enables the restore_state command without any pre-configured scope."
        },
        {
          "description": "Enables the save_window_state command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:allow-save-window-state",
          "markdownDescription": "Enables the save_window_state command without any pre-configured scope."
        },
        {
          "description": "Denies the filename command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:deny-filename",
          "markdownDescription": "Denies the filename command without any pre-configured scope."
        },
        {
          "description": "Denies the restore_state command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:deny-restore-state",
          "markdownDescription": "Denies the restore_state command without any pre-configured scope."
        },
        {
          "description": "Denies the save_window_state command without any pre-configured scope.",
          "type": "string",
          "const": "window-state:deny-save-window-state",
          "markdownDescription": "Denies the save_window_state command without any pre-configured scope."
        }
      ]
    },
    "Value": {
      "description": "All supported ACL values.",
      "anyOf": [
        {
          "description": "Represents a null JSON value.",
          "type": "null"
        },
        {
          "description": "Represents a [`bool`].",
          "type": "boolean"
        },
        {
          "description": "Represents a valid ACL [`Number`].",
          "allOf": [
            {
              "$ref": "#/definitions/Number"
            }
          ]
        },
        {
          "description": "Represents a [`String`].",
          "type": "string"
        },
        {
          "description": "Represents a list of other [`Value`]s.",
          "type": "array",
          "items": {
            "$ref": "#/definitions/Value"
          }
        },
        {
          "description": "Represents a map of [`String`] keys to [`Value`]s.",
          "type": "object",
          "additionalProperties": {
            "$ref": "#/definitions/Value"
          }
        }
      ]
    },
    "Number": {
      "description": "A valid ACL number.",
      "anyOf": [
        {
          "description": "Represents an [`i64`].",
          "type": "integer",
          "format": "int64"
        },
        {
          "description": "Represents a [`f64`].",
          "type": "number",
          "format": "double"
        }
      ]
    },
    "Target": {
      "description": "Platform target.",
      "oneOf": [
        {
          "description": "MacOS.",
          "type": "string",
          "enum": [
            "macOS"
          ]
        },
        {
          "description": "Windows.",
          "type": "string",
          "enum": [
            "windows"
          ]
        },
        {
          "description": "Linux.",
          "type": "string",
          "enum": [
            "linux"
          ]
        },
        {
          "description": "Android.",
          "type": "string",
          "enum": [
            "android"
          ]
        },
        {
          "description": "iOS.",
          "type": "string",
          "enum": [
            "iOS"
          ]
        }
      ]
    },
    "ShellScopeEntryAllowedArg": {
      "description": "A command argument allowed to be executed by the webview API.",
      "anyOf": [
        {
          "description": "A non-configurable argument that is passed to the command in the order it was specified.",
          "type": "string"
        },
        {
          "description": "A variable that is set while calling the command from the webview API.",
          "type": "object",
          "required": [
            "validator"
          ],
          "properties": {
            "raw": {
              "description": "Marks the validator as a raw regex, meaning the plugin should not make any modification at runtime.\n\nThis means the regex will not match on the entire string by default, which might be exploited if your regex allow unexpected input to be considered valid. When using this option, make sure your regex is correct.",
              "default": false,
              "type": "boolean"
            },
            "validator": {
              "description": "[regex] validator to require passed values to conform to an expected input.\n\nThis will require the argument value passed to this variable to match the `validator` regex before it will be executed.\n\nThe regex string is by default surrounded by `^...$` to match the full string. For example the `https?://\\w+` regex would be registered as `^https?://\\w+$`.\n\n[regex]: <https://docs.rs/regex/latest/regex/#syntax>",
              "type": "string"
            }
          },
          "additionalProperties": false
        }
      ]
    },
    "ShellScopeEntryAllowedArgs": {
      "description": "A set of command arguments allowed to be executed by the webview API.\n\nA value of `true` will allow any arguments to be passed to the command. `false` will disable all arguments. A list of [`ShellScopeEntryAllowedArg`] will set those arguments as the only valid arguments to be passed to the attached command configuration.",
      "anyOf": [
        {
          "description": "Use a simple boolean to allow all or disable all arguments to this command configuration.",
          "type": "boolean"
        },
        {
          "description": "A specific set of [`ShellScopeEntryAllowedArg`] that are valid to call for the command configuration.",
          "type": "array",
          "items": {
            "$ref": "#/definitions/ShellScopeEntryAllowedArg"
          }
        }
      ]
    }
  }
}

================================================
FILE: desktop/src-tauri/icons/android/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
  <foreground android:drawable="@mipmap/ic_launcher_foreground"/>
  <background android:drawable="@color/ic_launcher_background"/>
</adaptive-icon>

================================================
FILE: desktop/src-tauri/icons/android/values/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
  <color name="ic_launcher_background">#fff</color>
</resources>

================================================
FILE: desktop/src-tauri/src/main.rs
================================================
// Prevents additional console window on Windows in release
#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]

use directories::ProjectDirs;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use std::sync::{Mutex, RwLock};
use std::io::Write as IoWrite;
use std::time::SystemTime;
#[cfg(target_os = "macos")]
use std::time::Duration;
use tauri::image::Image;
use tauri::menu::{
    CheckMenuItem, Menu, MenuBuilder, MenuItem, PredefinedMenuItem, SubmenuBuilder, HELP_SUBMENU_ID,
};
use tauri::tray::{TrayIconBuilder, TrayIconEvent};
#[cfg(target_os = "macos")]
use tauri::WebviewWindow;
use tauri::Wry;
use tauri::{
    webview::PageLoadPayload, AppHandle, Manager, Webview, WebviewUrl, WebviewWindowBuilder,
};
#[cfg(target_os = "macos")]
use tokio::time::sleep;
use url::Url;
#[cfg(target_os = "macos")]
use window_vibrancy::{apply_vibrancy, NSVisualEffectMaterial};

// ============================================================================
// Configuration
// ============================================================================

const DEFAULT_SERVER_URL: &str = "https://cloud.onyx.app";
const CONFIG_FILE_NAME: &str = "config.json";
#[cfg(target_os = "macos")]
const TITLEBAR_SCRIPT: &str = include_str!("../../src/titlebar.js");
const TRAY_ID: &str = "onyx-tray";
const TRAY_ICON_BYTES: &[u8] = include_bytes!("../icons/tray-icon.png");
const TRAY_MENU_OPEN_APP_ID: &str = "tray_open_app";
const TRAY_MENU_OPEN_CHAT_ID: &str = "tray_open_chat";
const TRAY_MENU_SHOW_IN_BAR_ID: &str = "tray_show_in_menu_bar";
const TRAY_MENU_QUIT_ID: &str = "tray_quit";
const MENU_SHOW_MENU_BAR_ID: &str = "show_menu_bar";
const MENU_HIDE_DECORATIONS_ID: &str = "hide_window_decorations";
const CHAT_LINK_INTERCEPT_SCRIPT: &str = r##"
(() => {
  if (window.__ONYX_CHAT_LINK_INTERCEPT_INSTALLED__) {
    return;
  }

  window.__ONYX_CHAT_LINK_INTERCEPT_INSTALLED__ = true;

  function isChatSessionPage() {
    try {
      const currentUrl = new URL(window.location.href);
      return (
        currentUrl.pathname.startsWith("/app") &&
        currentUrl.searchParams.has("chatId")
      );
    } catch {
      return false;
    }
  }

  function getAllowedNavigationUrl(rawUrl) {
    try {
      const parsed = new URL(String(rawUrl), window.location.href);
      const scheme = parsed.protocol.toLowerCase();
      if (!["http:", "https:", "mailto:", "tel:"].includes(scheme)) {
        return null;
      }
      return parsed;
    } catch {
      return null;
    }
  }

  async function openWithTauri(url) {
    try {
      const invoke =
        window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;
      if (typeof invoke !== "function") {
        return false;
      }

      await invoke("open_in_browser", { url });
      return true;
    } catch {
      return false;
    }
  }

  function handleChatNavigation(rawUrl) {
    const parsedUrl = getAllowedNavigationUrl(rawUrl);
    if (!parsedUrl) {
      return false;
    }

    const safeUrl = parsedUrl.toString();
    const scheme = parsedUrl.protocol.toLowerCase();
    if (scheme === "mailto:" || scheme === "tel:") {
      void openWithTauri(safeUrl).then((opened) => {
        if (!opened) {
          window.location.assign(safeUrl);
        }
      });
      return true;
    }

    window.location.assign(safeUrl);
    return true;
  }

  document.addEventListener(
    "click",
    (event) => {
      if (!isChatSessionPage() || event.defaultPrevented) {
        return;
      }

      const element = event.target;
      if (!(element instanceof Element)) {
        return;
      }

      const anchor = element.closest("a");
      if (!(anchor instanceof HTMLAnchorElement)) {
        return;
      }

      const target = (anchor.getAttribute("target") || "").toLowerCase();
      if (target !== "_blank") {
        return;
      }

      const href = anchor.getAttribute("href");
      if (!href || href.startsWith("#")) {
        return;
      }

      if (!handleChatNavigation(href)) {
        return;
      }

      event.preventDefault();
      event.stopPropagation();
    },
    true
  );

  const nativeWindowOpen = window.open;
  window.open = function(url, target, features) {
    const resolvedTarget = typeof target === "string" ? target.toLowerCase() : "";
    const shouldNavigateInPlace = resolvedTarget === "" || resolvedTarget === "_blank";

    if (
      isChatSessionPage() &&
      shouldNavigateInPlace &&
      url != null &&
      String(url).length > 0
    ) {
      if (!handleChatNavigation(url)) {
        return null;
      }
      return null;
    }

    if (typeof nativeWindowOpen === "function") {
      return nativeWindowOpen.call(window, url, target, features);
    }
    return null;
  };
})();
"##;

#[cfg(not(target_os = "macos"))]
const MENU_KEY_HANDLER_SCRIPT: &str = r#"
(() => {
  if (window.__ONYX_MENU_KEY_HANDLER__) return;
  window.__ONYX_MENU_KEY_HANDLER__ = true;

  let altHeld = false;

  function invoke(cmd) {
    const fn_ =
      window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;
    if (typeof fn_ === 'function') fn_(cmd);
  }

  function releaseAltAndHideMenu() {
    if (!altHeld) {
      return;
    }
    altHeld = false;
    invoke('hide_menu_bar_temporary');
  }

  document.addEventListener('keydown', (e) => {
    if (e.key === 'Alt') {
      if (!altHeld) {
        altHeld = true;
        invoke('show_menu_bar_temporarily');
      }
      return;
    }
    if (e.altKey && e.key === 'F1') {
      e.preventDefault();
      e.stopPropagation();
      altHeld = false;
      invoke('toggle_menu_bar');
      return;
    }
  }, true);

  document.addEventListener('keyup', (e) => {
    if (e.key === 'Alt' && altHeld) {
      releaseAltAndHideMenu();
    }
  }, true);

  window.addEventListener('blur', () => {
    releaseAltAndHideMenu();
  });

  document.addEventListener('visibilitychange', () => {
    if (document.hidden) {
      releaseAltAndHideMenu();
    }
  });
})();
"#;

const CONSOLE_CAPTURE_SCRIPT: &str = r#"
(() => {
  if (window.__ONYX_CONSOLE_CAPTURE__) return;
  window.__ONYX_CONSOLE_CAPTURE__ = true;

  const levels = ['log', 'warn', 'error', 'info', 'debug'];
  const originals = {};

  levels.forEach(level => {
    originals[level] = console[level];
    console[level] = function(...args) {
      originals[level].apply(console, args);
      try {
        const invoke =
          window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;
        if (typeof invoke === 'function') {
          const message = args.map(a => {
            try { return typeof a === 'string' ? a : JSON.stringify(a); }
            catch { return String(a); }
          }).join(' ');
          invoke('log_from_frontend', { level, message });
        }
      } catch {}
    };
  });

  window.addEventListener('error', (event) => {
    try {
      const invoke =
        window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;
      if (typeof invoke === 'function') {
        invoke('log_from_frontend', {
          level: 'error',
          message: `[uncaught] ${event.message} at ${event.filename}:${event.lineno}:${event.colno}`
        });
      }
    } catch {}
  });

  window.addEventListener('unhandledrejection', (event) => {
    try {
      const invoke =
        window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;
      if (typeof invoke === 'function') {
        invoke('log_from_frontend', {
          level: 'error',
          message: `[unhandled rejection] ${event.reason}`
        });
      }
    } catch {}
  });
})();
"#;

const MENU_TOGGLE_DEVTOOLS_ID: &str = "toggle_devtools";
const MENU_OPEN_DEBUG_LOG_ID: &str = "open_debug_log";

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AppConfig {
    pub server_url: String,

    #[serde(default = "default_window_title")]
    pub window_title: String,

    #[serde(default = "default_show_menu_bar")]
    pub show_menu_bar: bool,

    #[serde(default)]
    pub hide_window_decorations: bool,
}

fn default_window_title() -> String {
    "Onyx".to_string()
}

fn default_show_menu_bar() -> bool {
    true
}

impl Default for AppConfig {
    fn default() -> Self {
        Self {
            server_url: DEFAULT_SERVER_URL.to_string(),
            window_title: default_window_title(),
            show_menu_bar: true,
            hide_window_decorations: false,
        }
    }
}

/// Get the config directory path
fn get_config_dir() -> Option<PathBuf> {
    ProjectDirs::from("app", "onyx", "onyx-desktop").map(|dirs| dirs.config_dir().to_path_buf())
}

/// Get the full config file path
fn get_config_path() -> Option<PathBuf> {
    get_config_dir().map(|dir| dir.join(CONFIG_FILE_NAME))
}

/// Load config from file, or create default if it doesn't exist
fn load_config() -> (AppConfig, bool) {
    let config_path = match get_config_path() {
        Some(path) => path,
        None => {
            return (AppConfig::default(), false);
        }
    };

    if !config_path.exists() {
        return (AppConfig::default(), false);
    }

    match fs::read_to_string(&config_path) {
        Ok(contents) => match serde_json::from_str(&contents) {
            Ok(config) => (config, true),
            Err(_) => (AppConfig::default(), false),
        },
        Err(_) => (AppConfig::default(), false),
    }
}

/// Save config to file
fn save_config(config: &AppConfig) -> Result<(), String> {
    let config_dir = get_config_dir().ok_or("Could not determine config directory")?;
    let config_path = config_dir.join(CONFIG_FILE_NAME);

    // Ensure config directory exists
    fs::create_dir_all(&config_dir).map_err(|e| format!("Failed to create config dir: {}", e))?;

    let json = serde_json::to_string_pretty(config)
        .map_err(|e| format!("Failed to serialize config: {}", e))?;

    fs::write(&config_path, json).map_err(|e| format!("Failed to write config: {}", e))?;

    Ok(())
}

// ============================================================================
// Debug Mode
// ============================================================================

fn is_debug_mode() -> bool {
    std::env::args().any(|arg| arg == "--debug") || std::env::var("ONYX_DEBUG").is_ok()
}

fn get_debug_log_path() -> Option<PathBuf> {
    get_config_dir().map(|dir| dir.join("frontend_debug.log"))
}

fn init_debug_log_file() -> Option<fs::File> {
    let log_path = get_debug_log_path()?;
    if let Some(parent) = log_path.parent() {
        let _ = fs::create_dir_all(parent);
    }
    fs::OpenOptions::new()
        .create(true)
        .append(true)
        .open(&log_path)
        .ok()
}

fn format_utc_timestamp() -> String {
    let now = SystemTime::now()
        .duration_since(SystemTime::UNIX_EPOCH)
        .unwrap_or_default();
    let total_secs = now.as_secs();
    let millis = now.subsec_millis();

    let days = total_secs / 86400;
    let secs_of_day = total_secs % 86400;
    let hours = secs_of_day / 3600;
    let mins = (secs_of_day % 3600) / 60;
    let secs = secs_of_day % 60;

    // Days since Unix epoch -> Y/M/D via civil calendar arithmetic
    let z = days as i64 + 719468;
    let era = z / 146097;
    let doe = z - era * 146097;
    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
    let y = yoe + era * 400;
    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
    let mp = (5 * doy + 2) / 153;
    let d = doy - (153 * mp + 2) / 5 + 1;
    let m = if mp < 10 { mp + 3 } else { mp - 9 };
    let y = if m <= 2 { y + 1 } else { y };

    format!(
        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z",
        y, m, d, hours, mins, secs, millis
    )
}

fn inject_console_capture(webview: &Webview) {
    let _ = webview.eval(CONSOLE_CAPTURE_SCRIPT);
}

fn maybe_open_devtools(app: &AppHandle, window: &tauri::WebviewWindow) {
    #[cfg(any(debug_assertions, feature = "devtools"))]
    {
        let state = app.state::<ConfigState>();
        if state.debug_mode {
            window.open_devtools();
        }
    }
    #[cfg(not(any(debug_assertions, feature = "devtools")))]
    {
        let _ = (app, window);
    }
}

// Global config state
struct ConfigState {
    config: RwLock<AppConfig>,
    config_initialized: RwLock<bool>,
    app_base_url: RwLock<Option<Url>>,
    menu_temporarily_visible: RwLock<bool>,
    debug_mode: bool,
    debug_log_file: Mutex<Option<fs::File>>,
}

fn focus_main_window(app: &AppHandle) {
    if let Some(window) = app.get_webview_window("main") {
        let _ = window.unminimize();
        let _ = window.show();
        let _ = window.set_focus();
    } else {
        trigger_new_window(app);
    }
}

fn trigger_new_chat(app: &AppHandle) {
    let state = app.state::<ConfigState>();
    let server_url = state.config.read().unwrap().server_url.clone();

    if let Some(window) = app.get_webview_window("main") {
        let url = format!("{}/chat", server_url);
        let _ = window.eval(&format!("window.location.href = '{}'", url));
    }
}

fn trigger_new_window(app: &AppHandle) {
    let state = app.state::<ConfigState>();
    let server_url = state.config.read().unwrap().server_url.clone();
    let handle = app.clone();

    tauri::async_runtime::spawn(async move {
        let window_label = format!("onyx-{}", uuid::Uuid::new_v4());
        let builder = WebviewWindowBuilder::new(
            &handle,
            &window_label,
            WebviewUrl::External(server_url.parse().unwrap()),
        )
        .title("Onyx")
        .inner_size(1200.0, 800.0)
        .min_inner_size(800.0, 600.0)
        .transparent(true);

        #[cfg(target_os = "macos")]
        let builder = builder
            .title_bar_style(tauri::TitleBarStyle::Overlay)
            .hidden_title(true);

        #[cfg(target_os = "linux")]
        let builder = builder.background_color(tauri::window::Color(0x1a, 0x1a, 0x2e, 0xff));

        if let Ok(window) = builder.build() {
            #[cfg(target_os = "macos")]
            {
                let _ = apply_vibrancy(&window, NSVisualEffectMaterial::Sidebar, None, None);
                inject_titlebar(window.clone());
            }

            apply_settings_to_window(&handle, &window);
            maybe_open_devtools(&handle, &window);
            let _ = window.set_focus();
        }
    });
}

fn open_docs() {
    let _ = open_in_default_browser("https://docs.onyx.app");
}

fn open_settings(app: &AppHandle) {
    // Navigate main window to the settings page (index.html) with settings flag
    let state = app.state::<ConfigState>();
    let settings_url = state
        .app_base_url
        .read()
        .unwrap()
        .as_ref()
        .cloned()
        .and_then(|mut url| {
            url.set_query(None);
            url.set_fragment(Some("settings"));
            url.set_path("/");
            Some(url)
        })
        .or_else(|| Url::parse("tauri://localhost/#settings").ok());

    if let Some(window) = app.get_webview_window("main") {
        if let Some(url) = settings_url {
            let _ = window.navigate(url);
        }
    }
}

fn same_origin(left: &Url, right: &Url) -> bool {
    left.scheme() == right.scheme()
        && left.host_str() == right.host_str()
        && left.port_or_known_default() == right.port_or_known_default()
}

fn is_chat_session_url(url: &Url) -> bool {
    url.path().starts_with("/app") && url.query_pairs().any(|(key, _)| key == "chatId")
}

fn should_open_in_external_browser(current_url: &Url, destination_url: &Url) -> bool {
    if !is_chat_session_url(current_url) {
        return false;
    }

    match destination_url.scheme() {
        "mailto" | "tel" => true,
        "http" | "https" => !same_origin(current_url, destination_url),
        _ => false,
    }
}

fn open_in_default_browser(url: &str) -> bool {
    #[cfg(target_os = "macos")]
    {
        return Command::new("open").arg(url).status().is_ok();
    }
    #[cfg(target_os = "linux")]
    {
        return Command::new("xdg-open").arg(url).status().is_ok();
    }
    #[cfg(target_os = "windows")]
    {
        return Command::new("rundll32")
            .arg("url.dll,FileProtocolHandler")
            .arg(url)
            .status()
            .is_ok();
    }
    #[allow(unreachable_code)]
    false
}

#[tauri::command]
fn open_in_browser(url: String) -> Result<(), String> {
    let parsed_url = Url::parse(&url).map_err(|_| "Invalid URL".to_string())?;
    match parsed_url.scheme() {
        "http" | "https" | "mailto" | "tel" => {}
        _ => return Err("Unsupported URL scheme".to_string()),
    }

    if open_in_default_browser(parsed_url.as_str()) {
        Ok(())
    } else {
        Err("Failed to open URL in default browser".to_string())
    }
}

fn inject_chat_link_intercept(webview: &Webview) {
    let _ = webview.eval(CHAT_LINK_INTERCEPT_SCRIPT);
}

fn handle_toggle_devtools(app: &AppHandle) {
    #[cfg(any(debug_assertions, feature = "devtools"))]
    {
        let windows: Vec<_> = app.webview_windows().into_values().collect();
        let any_open = windows.iter().any(|w| w.is_devtools_open());
        for window in &windows {
            if any_open {
                window.close_devtools();
            } else {
                window.open_devtools();
            }
        }
    }
    #[cfg(not(any(debug_assertions, feature = "devtools")))]
    {
        let _ = app;
    }
}

fn handle_open_debug_log() {
    let log_path = match get_debug_log_path() {
        Some(p) => p,
        None => return,
    };

    if !log_path.exists() {
        eprintln!("[ONYX DEBUG] Log file does not exist yet: {:?}", log_path);
        return;
    }

    let url_path = log_path.to_string_lossy().replace('\\', "/");
    let _ = open_in_default_browser(&format!(
        "file:///{}",
        url_path.trim_start_matches('/')
    ));
}

// ============================================================================
// Tauri Commands
// ============================================================================

#[tauri::command]
fn log_from_frontend(level: String, message: String, state: tauri::State<ConfigState>) {
    if !state.debug_mode {
        return;
    }
    let timestamp = format_utc_timestamp();
    let log_line = format!("[{}] [{}] {}", timestamp, level.to_uppercase(), message);

    eprintln!("{}", log_line);

    if let Ok(mut guard) = state.debug_log_file.lock() {
        if let Some(ref mut file) = *guard {
            let _ = writeln!(file, "{}", log_line);
            let _ = file.flush();
        }
    }
}

/// Get the current server URL
#[tauri::command]
fn get_server_url(state: tauri::State<ConfigState>) -> String {
    state.config.read().unwrap().server_url.clone()
}

#[derive(Serialize)]
struct BootstrapState {
    server_url: String,
    config_exists: bool,
}

/// Get the server URL plus whether a config file exists
#[tauri::command]
fn get_bootstrap_state(state: tauri::State<ConfigState>) -> BootstrapState {
    let server_url = state.config.read().unwrap().server_url.clone();
    let config_initialized = *state.config_initialized.read().unwrap();
    let config_exists =
        config_initialized && get_config_path().map(|path| path.exists()).unwrap_or(false);

    BootstrapState {
        server_url,
        config_exists,
    }
}

/// Set a new server URL and save to config
#[tauri::command]
fn set_server_url(state: tauri::State<ConfigState>, url: String) -> Result<String, String> {
    // Validate URL
    if !url.starts_with("http://") && !url.starts_with("https://") {
        return Err("URL must start with http:// or https://".to_string());
    }

    let mut config = state.config.write().unwrap();
    config.server_url = url.trim_end_matches('/').to_string();
    save_config(&config)?;
    *state.config_initialized.write().unwrap() = true;

    Ok(config.server_url.clone())
}

/// Get the config file path (so users know where to edit)
#[tauri::command]
fn get_config_path_cmd() -> Result<String, String> {
    get_config_path()
        .map(|p| p.to_string_lossy().to_string())
        .ok_or_else(|| "Could not determine config path".to_string())
}

/// Open the config file in the default editor
#[tauri::command]
fn open_config_file() -> Result<(), String> {
    let config_path = get_config_path().ok_or("Could not determine config path")?;

    // Ensure config exists
    if !config_path.exists() {
        save_config(&AppConfig::default())?;
    }

    #[cfg(target_os = "macos")]
    {
        std::process::Command::new("open")
            .arg("-t")
            .arg(&config_path)
            .spawn()
            .map_err(|e| format!("Failed to open config: {}", e))?;
    }

    #[cfg(target_os = "linux")]
    {
        std::process::Command::new("xdg-open")
            .arg(&config_path)
            .spawn()
            .map_err(|e| format!("Failed to open config: {}", e))?;
    }

    #[cfg(target_os = "windows")]
    {
        std::process::Command::new("notepad")
            .arg(&config_path)
            .spawn()
            .map_err(|e| format!("Failed to open config: {}", e))?;
    }

    Ok(())
}

/// Open the config directory in file manager
#[tauri::command]
fn open_config_directory() -> Result<(), String> {
    let config_dir = get_config_dir().ok_or("Could not determine config directory")?;

    // Ensure directory exists
    fs::create_dir_all(&config_dir).map_err(|e| format!("Failed to create config dir: {}", e))?;

    #[cfg(target_os = "macos")]
    {
        std::process::Command::new("open")
            .arg(&config_dir)
            .spawn()
            .map_err(|e| format!("Failed to open directory: {}", e))?;
    }

    #[cfg(target_os = "linux")]
    {
        std::process::Command::new("xdg-open")
            .arg(&config_dir)
            .spawn()
            .map_err(|e| format!("Failed to open directory: {}", e))?;
    }

    #[cfg(target_os = "windows")]
    {
        std::process::Command::new("explorer")
            .arg(&config_dir)
            .spawn()
            .map_err(|e| format!("Failed to open directory: {}", e))?;
    }

    Ok(())
}

/// Navigate to a specific path on the configured server
#[tauri::command]
fn navigate_to(window: tauri::WebviewWindow, state: tauri::State<ConfigState>, path: &str) {
    let base_url = state.config.read().unwrap().server_url.clone();
    let url = format!("{}{}", base_url, path);
    let _ = window.eval(&format!("window.location.href = '{}'", url));
}

/// Reload the current page
#[tauri::command]
fn reload_page(window: tauri::WebviewWindow) {
    let _ = window.eval("window.location.reload()");
}

/// Go back in history
#[tauri::command]
fn go_back(window: tauri::WebviewWindow) {
    let _ = window.eval("window.history.back()");
}

/// Go forward in history
#[tauri::command]
fn go_forward(window: tauri::WebviewWindow) {
    let _ = window.eval("window.history.forward()");
}

/// Open a new window
#[tauri::command]
async fn new_window(app: AppHandle, state: tauri::State<'_, ConfigState>) -> Result<(), String> {
    let server_url = state.config.read().unwrap().server_url.clone();
    let window_label = format!("onyx-{}", uuid::Uuid::new_v4());

    let builder = WebviewWindowBuilder::new(
        &app,
        &window_label,
        WebviewUrl::External(
            server_url
                .parse()
                .map_err(|e| format!("Invalid URL: {}", e))?,
        ),
    )
    .title("Onyx")
    .inner_size(1200.0, 800.0)
    .min_inner_size(800.0, 600.0)
    .transparent(true);

    #[cfg(target_os = "macos")]
    let builder = builder
        .title_bar_style(tauri::TitleBarStyle::Overlay)
        .hidden_title(true);

    #[cfg(target_os = "linux")]
    let builder = builder.background_color(tauri::window::Color(0x1a, 0x1a, 0x2e, 0xff));

    let window = builder.build().map_err(|e| e.to_string())?;

    #[cfg(target_os = "macos")]
    {
        let _ = apply_vibrancy(&window, NSVisualEffectMaterial::Sidebar, None, None);
        inject_titlebar(window.clone());
    }

    apply_settings_to_window(&app, &window);
    maybe_open_devtools(&app, &window);

    Ok(())
}

/// Reset config to defaults
#[tauri::command]
fn reset_config(state: tauri::State<ConfigState>) -> Result<(), String> {
    let mut config = state.config.write().unwrap();
    *config = AppConfig::default();
    save_config(&config)?;
    *state.config_initialized.write().unwrap() = true;
    Ok(())
}

#[cfg(target_os = "macos")]
fn inject_titlebar(window: WebviewWindow) {
    let script = TITLEBAR_SCRIPT.to_string();
    tauri::async_runtime::spawn(async move {
        // Keep trying for a few seconds to survive navigations and slow loads
        let delays = [0u64, 200, 600, 1200, 2000, 4000, 6000, 8000, 10000];
        for delay in delays {
            if delay > 0 {
                sleep(Duration::from_millis(delay)).await;
            }
            let _ = window.eval(&script);
        }
    });
}

/// Start dragging the window
#[tauri::command]
async fn start_drag_window(window: tauri::Window) -> Result<(), String> {
    window.start_dragging().map_err(|e| e.to_string())
}

// ============================================================================
// Window Settings
// ============================================================================

fn find_check_menu_item(
    app: &AppHandle,
    id: &str,
) -> Option<CheckMenuItem<tauri::Wry>> {
    let menu = app.menu()?;
    for item in menu.items().ok()? {
        if let Some(submenu) = item.as_submenu() {
            for sub_item in submenu.items().ok()? {
                if let Some(check) = sub_item.as_check_menuitem() {
                    if check.id().as_ref() == id {
                        return Some(check.clone());
                    }
                }
            }
        }
    }
    None
}

fn apply_settings_to_window(app: &AppHandle, window: &tauri::WebviewWindow) {
    if cfg!(target_os = "macos") {
        return;
    }
    let state = app.state::<ConfigState>();
    let config = state.config.read().unwrap();
    let temp_visible = *state.menu_temporarily_visible.read().unwrap();
    if !config.show_menu_bar && !temp_visible {
        let _ = window.hide_menu();
    }
    if config.hide_window_decorations {
        let _ = window.set_decorations(false);
    }
}

fn handle_menu_bar_toggle(app: &AppHandle) {
    if cfg!(target_os = "macos") {
        return;
    }
    let state = app.state::<ConfigState>();
    let show = {
        let mut config = state.config.write().unwrap();
        config.show_menu_bar = !config.show_menu_bar;
        let _ = save_config(&config);
        config.show_menu_bar
    };

    *state.menu_temporarily_visible.write().unwrap() = false;

    for (_, window) in app.webview_windows() {
        if show {
            let _ = window.show_menu();
        } else {
            let _ = window.hide_menu();
        }
    }
}

fn handle_decorations_toggle(app: &AppHandle) {
    if cfg!(target_os = "macos") {
        return;
    }
    let state = app.state::<ConfigState>();
    let hide = {
        let mut config = state.config.write().unwrap();
        config.hide_window_decorations = !config.hide_window_decorations;
        let _ = save_config(&config);
        config.hide_window_decorations
    };

    for (_, window) in app.webview_windows() {
        let _ = window.set_decorations(!hide);
    }
}

#[tauri::command]
fn toggle_menu_bar(app: AppHandle) {
    if cfg!(target_os = "macos") {
        return;
    }
    handle_menu_bar_toggle(&app);

    let state = app.state::<ConfigState>();
    let checked = state.config.read().unwrap().show_menu_bar;
    if let Some(check) = find_check_menu_item(&app, MENU_SHOW_MENU_BAR_ID) {
        let _ = check.set_checked(checked);
    }
}

#[tauri::command]
fn show_menu_bar_temporarily(app: AppHandle) {
    if cfg!(target_os = "macos") {
        return;
    }
    let state = app.state::<ConfigState>();
    if state.config.read().unwrap().show_menu_bar {
        return;
    }

    let mut temp = state.menu_temporarily_visible.write().unwrap();
    if *temp {
        return;
    }
    *temp = true;
    drop(temp);

    for (_, window) in app.webview_windows() {
        let _ = window.show_menu();
    }
}

#[tauri::command]
fn hide_menu_bar_temporary(app: AppHandle) {
    if cfg!(target_os = "macos") {
        return;
    }
    let state = app.state::<ConfigState>();
    let mut temp = state.menu_temporarily_visible.write().unwrap();
    if !*temp {
        return;
    }
    *temp = false;
    drop(temp);

    if state.config.read().unwrap().show_menu_bar {
        return;
    }

    for (_, window) in app.webview_windows() {
        let _ = window.hide_menu();
    }
}

// ============================================================================
// Menu Setup
// ============================================================================

fn setup_app_menu(app: &AppHandle) -> tauri::Result<()> {
    let menu = app.menu().unwrap_or(Menu::default(app)?);

    let new_chat_item = MenuItem::with_id(app, "new_chat", "New Chat", true, Some("CmdOrCtrl+N"))?;
    let new_window_item = MenuItem::with_id(
        app,
        "new_window",
        "New Window",
        true,
        Some("CmdOrCtrl+Shift+N"),
    )?;
    let settings_item = MenuItem::with_id(
        app,
        "open_settings",
        "Settings...",
        true,
        Some("CmdOrCtrl+Comma"),
    )?;
    let docs_item = MenuItem::with_id(app, "open_docs", "Onyx Documentation", true, None::<&str>)?;

    if let Some(file_menu) = menu
        .items()?
        .into_iter()
        .filter_map(|item| item.as_submenu().cloned())
        .find(|submenu| submenu.text().ok().as_deref() == Some("File"))
    {
        file_menu.insert_items(&[&new_chat_item, &new_window_item, &settings_item], 0)?;
    } else {
        let file_menu = SubmenuBuilder::new(app, "File")
            .items(&[
                &new_chat_item,
                &new_window_item,
                &settings_item,
                &PredefinedMenuItem::close_window(app, None)?,
            ])
            .build()?;
        menu.prepend(&file_menu)?;
    }

    #[cfg(not(target_os = "macos"))]
    {
        let config = app.state::<ConfigState>();
        let config_guard = config.config.read().unwrap();

        let show_menu_bar_item = CheckMenuItem::with_id(
            app,
            MENU_SHOW_MENU_BAR_ID,
            "Show Menu Bar",
            true,
            config_guard.show_menu_bar,
            None::<&str>,
        )?;

        let hide_decorations_item = CheckMenuItem::with_id(
            app,
            MENU_HIDE_DECORATIONS_ID,
            "Hide Window Decorations",
            true,
            config_guard.hide_window_decorations,
            None::<&str>,
        )?;

        drop(config_guard);

        if let Some(window_menu) = menu
            .items()?
            .into_iter()
            .filter_map(|item| item.as_submenu().cloned())
            .find(|submenu| submenu.text().ok().as_deref() == Some("Window"))
        {
            window_menu.append(&show_menu_bar_item)?;
            window_menu.append(&hide_decorations_item)?;
        } else {
            let window_menu = SubmenuBuilder::new(app, "Window")
                .item(&show_menu_bar_item)
                .item(&hide_decorations_item)
                .build()?;

            let items = menu.items()?;
            let help_idx = items
                .iter()
                .position(|item| {
                    item.as_submenu()
                        .and_then(|s| s.text().ok())
                        .as_deref()
                        == Some("Help")
                })
                .unwrap_or(items.len());
            menu.insert(&window_menu, help_idx)?;
        }
    }

    if let Some(help_menu) = menu
        .get(HELP_SUBMENU_ID)
        .and_then(|item| item.as_submenu().cloned())
    {
        help_menu.append(&docs_item)?;
    } else {
        let help_menu = SubmenuBuilder::with_id(app, HELP_SUBMENU_ID, "Help")
            .item(&docs_item)
            .build()?;
        menu.append(&help_menu)?;
    }

    let state = app.state::<ConfigState>();
    if state.debug_mode {
        let toggle_devtools_item = MenuItem::with_id(
            app,
            MENU_TOGGLE_DEVTOOLS_ID,
            "Toggle DevTools",
            true,
            Some("F12"),
        )?;
        let open_log_item = MenuItem::with_id(
            app,
            MENU_OPEN_DEBUG_LOG_ID,
            "Open Debug Log",
            true,
            None::<&str>,
        )?;

        let debug_menu = SubmenuBuilder::new(app, "Debug")
            .item(&toggle_devtools_item)
            .item(&open_log_item)
            .build()?;
        menu.append(&debug_menu)?;
    }

    app.set_menu(menu)?;
    Ok(())
}

fn build_tray_menu(app: &AppHandle) -> tauri::Result<Menu<Wry>> {
    let open_app = MenuItem::with_id(app, TRAY_MENU_OPEN_APP_ID, "Open Onyx", true, None::<&str>)?;
    let open_chat = MenuItem::with_id(
        app,
        TRAY_MENU_OPEN_CHAT_ID,
        "Open Chat Window",
        true,
        None::<&str>,
    )?;
    let show_in_menu_bar = CheckMenuItem::with_id(
        app,
        TRAY_MENU_SHOW_IN_BAR_ID,
        "Show in Menu Bar",
        true,
        true,
        None::<&str>,
    )?;
    // Keep it visible/pinned without letting users uncheck (avoids orphaning the tray)
    let _ = show_in_menu_bar.set_enabled(false);
    let quit = PredefinedMenuItem::quit(app, Some("Quit Onyx"))?;

    MenuBuilder::new(app)
        .item(&open_app)
        .item(&open_chat)
        .separator()
        .item(&show_in_menu_bar)
        .separator()
        .item(&quit)
        .build()
}

fn handle_tray_menu_event(app: &AppHandle, id: &str) {
    match id {
        TRAY_MENU_OPEN_APP_ID => {
            focus_main_window(app);
        }
        TRAY_MENU_OPEN_CHAT_ID => {
            focus_main_window(app);
            trigger_new_chat(app);
        }
        TRAY_MENU_QUIT_ID => {
            app.exit(0);
        }
        TRAY_MENU_SHOW_IN_BAR_ID => {
            // No-op for now; the item stays checked/disabled to indicate it's pinned.
        }
        _ => {}
    }
}

fn setup_tray_icon(app: &AppHandle) -> tauri::Result<()> {
    let mut builder = TrayIconBuilder::with_id(TRAY_ID).tooltip("Onyx");

    let tray_icon = Image::from_bytes(TRAY_ICON_BYTES)
        .ok()
        .or_else(|| app.default_window_icon().cloned());

    if let Some(icon) = tray_icon {
        builder = builder.icon(icon);

        #[cfg(target_os = "macos")]
        {
            builder = builder.icon_as_template(true);
        }
    }

    if let Ok(menu) = build_tray_menu(app) {
        builder = builder.menu(&menu);
    }

    builder
        .on_tray_icon_event(|tray, event| {
            if let TrayIconEvent::Click { .. } = event {
                focus_main_window(tray.app_handle());
            }
        })
        .on_menu_event(|app, event| handle_tray_menu_event(app, event.id().as_ref()))
        .build(app)?;

    Ok(())
}

// ============================================================================
// Main
// ============================================================================

fn main() {
    let (config, config_initialized) = load_config();
    let debug_mode = is_debug_mode();

    let debug_log_file = if debug_mode {
        eprintln!("[ONYX DEBUG] Debug mode enabled");
        if let Some(path) = get_debug_log_path() {
            eprintln!("[ONYX DEBUG] Frontend logs: {}", path.display());
        }
        eprintln!("[ONYX DEBUG] DevTools will open automatically");
        eprintln!("[ONYX DEBUG] Capturing console.log/warn/error/info/debug from webview");
        init_debug_log_file()
    } else {
        None
    };

    tauri::Builder::default()
        .plugin(tauri_plugin_shell::init())
        .plugin(
            tauri::plugin::Builder::<Wry>::new("chat-external-navigation-handler")
                .on_navigation(|webview, destination_url| {
                    let Ok(current_url) = webview.url() else {
                        return true;
                    };

                    if should_open_in_external_browser(&current_url, destination_url) {
                        if !open_in_default_browser(destination_url.as_str()) {
                            eprintln!(
                                "Failed to open external URL in default browser: {}",
                                destination_url
                            );
                        }
                        return false;
                    }

                    true
                })
                .build(),
        )
        .plugin(tauri_plugin_window_state::Builder::default().build())
        .manage(ConfigState {
            config: RwLock::new(config),
            config_initialized: RwLock::new(config_initialized),
            app_base_url: RwLock::new(None),
            menu_temporarily_visible: RwLock::new(false),
            debug_mode,
            debug_log_file: Mutex::new(debug_log_file),
        })
        .invoke_handler(tauri::generate_handler![
            get_server_url,
            get_bootstrap_state,
            set_server_url,
            get_config_path_cmd,
            open_in_browser,
            open_config_file,
            open_config_directory,
            navigate_to,
            reload_page,
            go_back,
            go_forward,
            new_window,
            reset_config,
            start_drag_window,
            toggle_menu_bar,
            show_menu_bar_temporarily,
            hide_menu_bar_temporary,
            log_from_frontend
        ])
        .on_menu_event(|app, event| match event.id().as_ref() {
            "open_docs" => open_docs(),
            "new_chat" => trigger_new_chat(app),
            "new_window" => trigger_new_window(app),
            "open_settings" => open_settings(app),
            "show_menu_bar" => handle_menu_bar_toggle(app),
            "hide_window_decorations" => handle_decorations_toggle(app),
            MENU_TOGGLE_DEVTOOLS_ID => handle_toggle_devtools(app),
            MENU_OPEN_DEBUG_LOG_ID => handle_open_debug_log(),
            _ => {}
        })
        .setup(move |app| {
            let app_handle = app.handle();

            if let Err(e) = setup_app_menu(&app_handle) {
                eprintln!("Failed to setup menu: {}", e);
            }

            if let Err(e) = setup_tray_icon(&app_handle) {
                eprintln!("Failed to setup tray icon: {}", e);
            }

            // Setup main window with vibrancy effect
            if let Some(window) = app.get_webview_window("main") {
                // Apply vibrancy effect for translucent glass look
                #[cfg(target_os = "macos")]
                {
                    let _ = apply_vibrancy(&window, NSVisualEffectMaterial::Sidebar, None, None);
                }

                if let Ok(url) = window.url() {
                    let mut base_url = url;
                    base_url.set_query(None);
                    base_url.set_fragment(None);
                    base_url.set_path("/");
                    *app.state::<ConfigState>().app_base_url.write().unwrap() = Some(base_url);
                }

                #[cfg(target_os = "macos")]
                inject_titlebar(window.clone());

                apply_settings_to_window(&app_handle, &window);
                maybe_open_devtools(&app_handle, &window);

                let _ = window.set_focus();
            }

            Ok(())
        })
        .on_page_load(|webview: &Webview, _payload: &PageLoadPayload| {
            inject_chat_link_intercept(webview);

            {
                let app = webview.app_handle();
                let state = app.state::<ConfigState>();
                if state.debug_mode {
                    inject_console_capture(webview);
                }
            }

            #[cfg(not(target_os = "macos"))]
            {
                let _ = webview.eval(MENU_KEY_HANDLER_SCRIPT);

                let app = webview.app_handle();
                let state = app.state::<ConfigState>();
                let config = state.config.read().unwrap();
                let temp_visible = *state.menu_temporarily_visible.read().unwrap();
                let label = webview.label().to_string();
                if !config.show_menu_bar && !temp_visible {
                    if let Some(win) = app.get_webview_window(&label) {
                        let _ = win.hide_menu();
                    }
                }
                if config.hide_window_decorations {
                    if let Some(win) = app.get_webview_window(&label) {
                        let _ = win.set_decorations(false);
                    }
                }
            }

            #[cfg(target_os = "macos")]
            let _ = webview.eval(TITLEBAR_SCRIPT);
        })
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
}


================================================
FILE: desktop/src-tauri/tauri.conf.json
================================================
{
  "$schema": "https://schema.tauri.app/config/2.0.0",
  "productName": "Onyx",
  "version": "0.0.0-dev",
  "identifier": "app.onyx.desktop",
  "build": {
    "beforeBuildCommand": "",
    "beforeDevCommand": "",
    "frontendDist": "../src"
  },
  "app": {
    "withGlobalTauri": true,
    "windows": [
      {
        "title": "Onyx",
        "label": "main",
        "url": "index.html",
        "width": 1200,
        "height": 800,
        "minWidth": 800,
        "minHeight": 600,
        "resizable": true,
        "fullscreen": false,
        "decorations": true,
        "transparent": true,
        "backgroundColor": "#1a1a2e",
        "titleBarStyle": "Overlay",
        "hiddenTitle": true,
        "acceptFirstMouse": true,
        "tabbingIdentifier": "onyx"
      }
    ],
    "security": {
      "csp": null
    },
    "macOSPrivateApi": true
  },
  "bundle": {
    "active": true,
    "targets": "all",
    "icon": [
      "icons/32x32.png",
      "icons/128x128.png",
      "icons/128x128@2x.png",
      "icons/icon.icns",
      "icons/icon.ico"
    ],
    "category": "Productivity",
    "shortDescription": "Onyx Cloud Desktop App",
    "longDescription": "A lightweight desktop wrapper for Onyx Cloud - your AI-powered knowledge assistant.",
    "macOS": {
      "entitlements": null,
      "exceptionDomain": "cloud.onyx.app",
      "minimumSystemVersion": "10.15",
      "signingIdentity": null,
      "dmg": {
        "windowSize": {
          "width": 660,
          "height": 400
        }
      }
    }
  },
  "plugins": {
    "shell": {
      "open": true
    }
  }
}


================================================
FILE: docker-bake.hcl
================================================
group "default" {
  targets = ["backend", "model-server", "web"]
}

variable "BACKEND_REPOSITORY" {
  default = "onyxdotapp/onyx-backend"
}

variable "WEB_SERVER_REPOSITORY" {
  default = "onyxdotapp/onyx-web-server"
}

variable "MODEL_SERVER_REPOSITORY" {
  default = "onyxdotapp/onyx-model-server"
}

variable "INTEGRATION_REPOSITORY" {
  default = "onyxdotapp/onyx-integration"
}

variable "CLI_REPOSITORY" {
  default = "onyxdotapp/onyx-cli"
}

variable "TAG" {
  default = "latest"
}

target "backend" {
  context    = "backend"
  dockerfile = "Dockerfile"

  cache-from = ["type=registry,ref=${BACKEND_REPOSITORY}:latest"]
  cache-to   = ["type=inline"]

  tags      = ["${BACKEND_REPOSITORY}:${TAG}"]
}

target "web" {
  context    = "web"
  dockerfile = "Dockerfile"

  cache-from = ["type=registry,ref=${WEB_SERVER_REPOSITORY}:latest"]
  cache-to   = ["type=inline"]

  tags      = ["${WEB_SERVER_REPOSITORY}:${TAG}"]
}

target "model-server" {
  context = "backend"

  dockerfile = "Dockerfile.model_server"

  cache-from = ["type=registry,ref=${MODEL_SERVER_REPOSITORY}:latest"]
  cache-to   = ["type=inline"]

  tags      = ["${MODEL_SERVER_REPOSITORY}:${TAG}"]
}

target "integration" {
  context    = "backend"
  dockerfile = "tests/integration/Dockerfile"

  // Provide the base image via build context from the backend target
  contexts = {
    base = "target:backend"
  }

  tags      = ["${INTEGRATION_REPOSITORY}:${TAG}"]
}

target "cli" {
  context    = "cli"
  dockerfile = "Dockerfile"

  cache-from = ["type=registry,ref=${CLI_REPOSITORY}:latest"]
  cache-to   = ["type=inline"]

  tags      = ["${CLI_REPOSITORY}:${TAG}"]
}


================================================
FILE: docs/METRICS.md
================================================
# Onyx Prometheus Metrics Reference

## Adding New Metrics

All Prometheus metrics live in the `backend/onyx/server/metrics/` package. Follow these steps to add a new metric.

### 1. Choose the right file (or create a new one)

| File | Purpose |
|------|---------|
| `metrics/slow_requests.py` | Slow request counter + callback |
| `metrics/postgres_connection_pool.py` | SQLAlchemy connection pool metrics |
| `metrics/prometheus_setup.py` | FastAPI instrumentator config (orchestrator) |

If your metric is a standalone concern (e.g. cache hit rates, queue depths), create a new file under `metrics/` and keep one metric concept per file.

### 2. Define the metric

Use `prometheus_client` types directly at module level:

```python
# metrics/my_metric.py
from prometheus_client import Counter

_my_counter = Counter(
    "onyx_my_counter_total",          # Always prefix with onyx_
    "Human-readable description",
    ["label_a", "label_b"],           # Keep label cardinality low
)
```

**Naming conventions:**
- Prefix all metric names with `onyx_`
- Counters: `_total` suffix (e.g. `onyx_api_slow_requests_total`)
- Histograms: `_seconds` or `_bytes` suffix for durations/sizes
- Gauges: no special suffix

**Label cardinality:** Avoid high-cardinality labels (raw user IDs, UUIDs, raw paths). Use route templates like `/api/items/{item_id}` instead of `/api/items/abc-123`.

### 3. Wire it into the instrumentator (if request-scoped)

If your metric needs to run on every HTTP request, write a callback and register it in `prometheus_setup.py`:

```python
# metrics/my_metric.py
from prometheus_fastapi_instrumentator.metrics import Info

def my_metric_callback(info: Info) -> None:
    _my_counter.labels(label_a=info.method, label_b=info.modified_handler).inc()
```

```python
# metrics/prometheus_setup.py
from onyx.server.metrics.my_metric import my_metric_callback

# Inside setup_prometheus_metrics():
instrumentator.add(my_metric_callback)
```

### 4. Wire it into setup_prometheus_metrics (if infrastructure-scoped)

For metrics that attach to engines, pools, or background systems, add a setup function and call it from `setup_prometheus_metrics()` in `metrics/prometheus_setup.py`:

```python
# metrics/my_metric.py
def setup_my_metrics(resource: SomeResource) -> None:
    # Register collectors, attach event listeners, etc.
    ...
```

```python
# metrics/prometheus_setup.py — inside setup_prometheus_metrics()
from onyx.server.metrics.my_metric import setup_my_metrics

def setup_prometheus_metrics(app, engines=None) -> None:
    setup_my_metrics(resource)  # Add your call here
    ...
```

All metrics initialization is funneled through the single `setup_prometheus_metrics()` call in `onyx/main.py:lifespan()`. Do not add separate setup calls to `main.py`.

### 5. Write tests

Add tests in `backend/tests/unit/onyx/server/`. Use `unittest.mock.patch` to mock the prometheus objects — don't increment real global counters in tests.

### 6. Document the metric

Add your metric to the reference tables below in this file. Include the metric name, type, labels, and description.

### 7. Update Grafana dashboards

After deploying, add panels to the relevant Grafana dashboard:

1. Open Grafana and navigate to the Onyx dashboard (or create a new one)
2. Add a new panel — choose the appropriate visualization:
   - **Counters** → use `rate()` in a time series panel (e.g. `rate(onyx_my_counter_total[5m])`)
   - **Histograms** → use `histogram_quantile()` for percentiles, or `_sum/_count` for averages
   - **Gauges** → display directly as a stat or gauge panel
3. Add meaningful thresholds and alerts where appropriate
4. Group related panels into rows (e.g. "API Performance", "Database Pool")

---

## API Server Metrics

These metrics are exposed at `GET /metrics` on the API server.

### Built-in (via `prometheus-fastapi-instrumentator`)

| Metric | Type | Labels | Description |
|--------|------|--------|-------------|
| `http_requests_total` | Counter | `method`, `status`, `handler` | Total request count |
| `http_request_duration_highr_seconds` | Histogram | _(none)_ | High-resolution latency (many buckets, no labels) |
| `http_request_duration_seconds` | Histogram | `method`, `handler` | Latency by handler (custom buckets for P95/P99) |
| `http_request_size_bytes` | Summary | `handler` | Incoming request content length |
| `http_response_size_bytes` | Summary | `handler` | Outgoing response content length |
| `http_requests_inprogress` | Gauge | `method`, `handler` | Currently in-flight requests |

### Custom (via `onyx.server.metrics`)

| Metric | Type | Labels | Description |
|--------|------|--------|-------------|
| `onyx_api_slow_requests_total` | Counter | `method`, `handler`, `status` | Requests exceeding `SLOW_REQUEST_THRESHOLD_SECONDS` (default 1s) |

### Configuration

| Env Var | Default | Description |
|---------|---------|-------------|
| `SLOW_REQUEST_THRESHOLD_SECONDS` | `1.0` | Duration threshold for slow request counting |

### Instrumentator Settings

- `should_group_status_codes=False` — Reports exact HTTP status codes (e.g. 401, 403, 500)
- `should_instrument_requests_inprogress=True` — Enables the in-progress request gauge
- `inprogress_labels=True` — Breaks down in-progress gauge by `method` and `handler`
- `excluded_handlers=["/health", "/metrics", "/openapi.json"]` — Excludes noisy endpoints from metrics

## Database Pool Metrics

These metrics provide visibility into SQLAlchemy connection pool state across all three engines (`sync`, `async`, `readonly`). Collected via `onyx.server.metrics.postgres_connection_pool`.

### Pool State (via custom Prometheus collector — snapshot on each scrape)

| Metric | Type | Labels | Description |
|--------|------|--------|-------------|
| `onyx_db_pool_checked_out` | Gauge | `engine` | Currently checked-out connections |
| `onyx_db_pool_checked_in` | Gauge | `engine` | Idle connections available in the pool |
| `onyx_db_pool_overflow` | Gauge | `engine` | Current overflow connections beyond `pool_size` |
| `onyx_db_pool_size` | Gauge | `engine` | Configured pool size (constant) |

### Pool Lifecycle (via SQLAlchemy pool event listeners)

| Metric | Type | Labels | Description |
|--------|------|--------|-------------|
| `onyx_db_pool_checkout_total` | Counter | `engine` | Total connection checkouts from the pool |
| `onyx_db_pool_checkin_total` | Counter | `engine` | Total connection checkins to the pool |
| `onyx_db_pool_connections_created_total` | Counter | `engine` | Total new database connections created |
| `onyx_db_pool_invalidations_total` | Counter | `engine` | Total connection invalidations |
| `onyx_db_pool_checkout_timeout_total` | Counter | `engine` | Total connection checkout timeouts |

### Per-Endpoint Attribution (via pool events + endpoint context middleware)

| Metric | Type | Labels | Description |
|--------|------|--------|-------------|
| `onyx_db_connections_held_by_endpoint` | Gauge | `handler`, `engine` | DB connections currently held, by endpoint |
| `onyx_db_connection_hold_seconds` | Histogram | `handler`, `engine` | Duration a DB connection is held by an endpoint |

Engine label values: `sync` (main read-write), `async` (async sessions), `readonly` (read-only user).

Connections from background tasks (Celery) or boot-time warmup appear as `handler="unknown"`.

## OpenSearch Search Metrics

These metrics track OpenSearch search latency and throughput. Collected via `onyx.server.metrics.opensearch_search`.

| Metric | Type | Labels | Description |
|--------|------|--------|-------------|
| `onyx_opensearch_search_client_duration_seconds` | Histogram | `search_type` | Client-side end-to-end latency (network + serialization + server execution) |
| `onyx_opensearch_search_server_duration_seconds` | Histogram | `search_type` | Server-side execution time from OpenSearch `took` field |
| `onyx_opensearch_search_total` | Counter | `search_type` | Total search requests sent to OpenSearch |
| `onyx_opensearch_searches_in_progress` | Gauge | `search_type` | Currently in-flight OpenSearch searches |

Search type label values: See `OpenSearchSearchType`.

---

## Example PromQL Queries

### Which endpoints are saturated right now?

```promql
# Top 10 endpoints by in-progress requests
topk(10, http_requests_inprogress)
```

### What's the P99 latency per endpoint?

```promql
# P99 latency by handler over the last 5 minutes
histogram_quantile(0.99, sum by (handler, le) (rate(http_request_duration_seconds_bucket[5m])))
```

### Which endpoints have the highest request rate?

```promql
# Requests per second by handler, top 10
topk(10, sum by (handler) (rate(http_requests_total[5m])))
```

### Which endpoints are returning errors?

```promql
# 5xx error rate by handler
sum by (handler) (rate(http_requests_total{status=~"5.."}[5m]))
```

### Slow request hotspots

```promql
# Slow requests per minute by handler
sum by (handler) (rate(onyx_api_slow_requests_total[5m])) * 60
```

### Latency trending up?

```promql
# Compare P50 latency now vs 1 hour ago
histogram_quantile(0.5, sum by (le) (rate(http_request_duration_highr_seconds_bucket[5m])))
  -
histogram_quantile(0.5, sum by (le) (rate(http_request_duration_highr_seconds_bucket[5m] offset 1h)))
```

### Overall request throughput

```promql
# Total requests per second across all endpoints
sum(rate(http_requests_total[5m]))
```

### Pool utilization (% of capacity in use)

```promql
# Sync pool utilization: checked-out / (pool_size + max_overflow)
# NOTE: Replace 10 with your actual POSTGRES_API_SERVER_POOL_OVERFLOW value.
onyx_db_pool_checked_out{engine="sync"} / (onyx_db_pool_size{engine="sync"} + 10) * 100
```

### Pool approaching exhaustion?

```promql
# Alert when checked-out connections exceed 80% of pool capacity
# NOTE: Replace 10 with your actual POSTGRES_API_SERVER_POOL_OVERFLOW value.
onyx_db_pool_checked_out{engine="sync"} > 0.8 * (onyx_db_pool_size{engine="sync"} + 10)
```

### Which endpoints are hogging DB connections?

```promql
# Top 10 endpoints by connections currently held
topk(10, onyx_db_connections_held_by_endpoint{engine="sync"})
```

### Which endpoints hold connections the longest?

```promql
# P99 connection hold time by endpoint
histogram_quantile(0.99, sum by (handler, le) (rate(onyx_db_connection_hold_seconds_bucket{engine="sync"}[5m])))
```

### Connection checkout/checkin rate

```promql
# Checkouts per second by engine
sum by (engine) (rate(onyx_db_pool_checkout_total[5m]))
```

### OpenSearch P99 search latency by type

```promql
# P99 client-side latency by search type
histogram_quantile(0.99, sum by (search_type, le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))
```

### OpenSearch search throughput

```promql
# Searches per second by type
sum by (search_type) (rate(onyx_opensearch_search_total[5m]))
```

### OpenSearch concurrent searches

```promql
# Total in-flight searches across all instances
sum(onyx_opensearch_searches_in_progress)
```

### OpenSearch network overhead

```promql
# Difference between client and server P50 reveals network/serialization cost.
histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))
  -
histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))
```


================================================
FILE: examples/assistants-api/topics_analyzer.py
================================================
import argparse
import os
import time
from datetime import datetime
from datetime import timedelta
from datetime import timezone

from openai import OpenAI


ASSISTANT_NAME = "Topic Analyzer"
SYSTEM_PROMPT = """
You are a helpful assistant that analyzes topics by searching through available \
documents and providing insights. These available documents come from common \
workplace tools like Slack, emails, Confluence, Google Drive, etc.

When analyzing a topic:
1. Search for relevant information using the search tool
2. Synthesize the findings into clear insights
3. Highlight key trends, patterns, or notable developments
4. Maintain objectivity and cite sources where relevant
"""
USER_PROMPT = """
Please analyze and provide insights about this topic: {topic}.

IMPORTANT: do not mention things that are not relevant to the specified topic. \
If there is no relevant information, just say "No relevant information found."
"""


def wait_on_run(client: OpenAI, run, thread):  # type: ignore
    while run.status == "queued" or run.status == "in_progress":
        run = client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id,
        )
        time.sleep(0.5)
    return run


def show_response(messages) -> None:  # type: ignore
    # Get only the assistant's response text
    for message in messages.data[::-1]:
        if message.role == "assistant":
            for content in message.content:
                if content.type == "text":
                    print(content.text)
                    break


def analyze_topics(topics: list[str]) -> None:
    openai_api_key = os.environ.get(
        "OPENAI_API_KEY", "<your OpenAI API key if not set as env var>"
    )
    onyx_api_key = os.environ.get(
        "DANSWER_API_KEY", "<your Onyx API key if not set as env var>"
    )
    client = OpenAI(
        api_key=openai_api_key,
        base_url="http://localhost:8080/openai-assistants",
        default_headers={
            "Authorization": f"Bearer {onyx_api_key}",
        },
    )

    # Create an assistant if it doesn't exist
    try:
        assistants = client.beta.assistants.list(limit=100)
        # Find the Topic Analyzer assistant if it exists
        assistant = next((a for a in assistants.data if a.name == ASSISTANT_NAME))
        client.beta.assistants.delete(assistant.id)
    except Exception:
        pass

    assistant = client.beta.assistants.create(
        name=ASSISTANT_NAME,
        instructions=SYSTEM_PROMPT,
        tools=[{"type": "SearchTool"}],  # type: ignore
        model="gpt-4o",
    )

    # Process each topic individually
    for topic in topics:
        thread = client.beta.threads.create()
        message = client.beta.threads.messages.create(
            thread_id=thread.id,
            role="user",
            content=USER_PROMPT.format(topic=topic),
        )

        run = client.beta.threads.runs.create(
            thread_id=thread.id,
            assistant_id=assistant.id,
            tools=[
                {  # type: ignore
                    "type": "SearchTool",
                    "retrieval_details": {
                        "run_search": "always",
                        "filters": {
                            "time_cutoff": str(
                                datetime.now(timezone.utc) - timedelta(days=7)
                            )
                        },
                    },
                }
            ],
        )

        run = wait_on_run(client, run, thread)
        messages = client.beta.threads.messages.list(
            thread_id=thread.id, order="asc", after=message.id
        )
        print(f"\nAnalysis for topic: {topic}")
        print("-" * 40)
        show_response(messages)
        print()


# Example usage
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Analyze specific topics")
    parser.add_argument("topics", nargs="+", help="Topics to analyze (one or more)")

    args = parser.parse_args()
    analyze_topics(args.topics)


================================================
FILE: examples/widget/.eslintrc.json
================================================
{
  "extends": "next/core-web-vitals"
}


================================================
FILE: examples/widget/.gitignore
================================================
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.js
.yarn/install-state.gz

# testing
/coverage

# next.js
/.next/
/out/

# production
/build

# misc
.DS_Store
*.pem

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# local env files
.env*.local

# vercel
.vercel

# typescript
*.tsbuildinfo
next-env.d.ts


================================================
FILE: examples/widget/README.md
================================================
# Onyx Chat Bot Widget

Note: The widget requires a Onyx API key, which is a paid (cloud/enterprise) feature.

This is a code example for how you can use Onyx's APIs to build a chat bot widget for a website! The main code to look at can be found in `src/app/widget/Widget.tsx`.

## Getting Started

To get the widget working on your webpage, follow these steps:

### 1. Install Dependencies

Ensure you have the necessary dependencies installed. From the `examples/widget/README.md` file:

```bash
npm i
```

### 2. Set Environment Variables

Make sure to set the environment variables `NEXT_PUBLIC_API_URL` and `NEXT_PUBLIC_API_KEY` in a `.env` file at the root of your project:

```bash
NEXT_PUBLIC_API_URL=
NEXT_PUBLIC_API_KEY=
```

### 3. Run the Development Server

Start the development server to see the widget in action.

```bash
npm run dev
```

Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.

### 4. Integrate the Widget

To integrate the widget into your webpage, you can use the `ChatWidget` component. Here’s an example of how to include it in a page component:

```jsx
import ChatWidget from "path/to/ChatWidget";
function MyPage() {
  return (
    <div>
      <h1>My Webpage</h1>
      <ChatWidget />
    </div>
  );
}
export default MyPage;
```

### 5. Deploy

Once you are satisfied with the widget, you can build and start the application for production:

```bash
npm run build
npm run start
```

### Custom Styling and Configuration

If you need to customize the widget, you can modify the `ChatWidget` component in the `examples/widget/src/app/widget/Widget.tsx` file.

By following these steps, you should be able to get the chat widget working on your webpage.

If you want to get fancier, then take a peek at the Chat implementation within Onyx itself [here](https://github.com/onyx-dot-app/onyx/blob/main/web/src/app/chat/ChatPage.tsx#L82).


================================================
FILE: examples/widget/next.config.mjs
================================================
/** @type {import('next').NextConfig} */
const nextConfig = {};

export default nextConfig;


================================================
FILE: examples/widget/package.json
================================================
{
  "name": "widget",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "dev": "next dev",
    "build": "next build",
    "start": "next start",
    "lint": "next lint"
  },
  "dependencies": {
    "next": "^16.1.7",
    "react": "^19",
    "react-dom": "^19",
    "react-markdown": "^10.1.0"
  },
  "devDependencies": {
    "@tailwindcss/postcss": "^4.1.18",
    "@types/node": "^25",
    "@types/react": "^19",
    "@types/react-dom": "^19",
    "autoprefixer": "^10.4.23",
    "eslint": "^9",
    "eslint-config-next": "16.1.2",
    "postcss": "^8.5.6",
    "tailwindcss": "^4.1.18",
    "typescript": "^5"
  }
}


================================================
FILE: examples/widget/postcss.config.mjs
================================================
/** @type {import('postcss-load-config').Config} */
const config = {
  plugins: {
    "@tailwindcss/postcss": {},
  },
};

export default config;


================================================
FILE: examples/widget/src/app/globals.css
================================================
@import "tailwindcss";


================================================
FILE: examples/widget/src/app/layout.tsx
================================================
import type { Metadata } from "next";
import { Inter } from "next/font/google";

import "./globals.css";

const inter = Inter({ subsets: ["latin"] });

export const metadata: Metadata = {
  title: "Example Onyx Widget",
  description: "Example Onyx Widget",
};

export default function RootLayout({
  children,
}: Readonly<{
  children: React.ReactNode;
}>) {
  return (
    <html lang="en">
      <body className={inter.className}>{children}</body>
    </html>
  );
}


================================================
FILE: examples/widget/src/app/page.tsx
================================================
import { ChatWidget } from "./widget/Widget";

export default function Home() {
  return (
    <main className="flex min-h-screen flex-col items-center justify-between p-24">
      <ChatWidget />
    </main>
  );
}


================================================
FILE: examples/widget/src/app/widget/Widget.tsx
================================================
"use client";

import React, { useState } from "react";
import ReactMarkdown from "react-markdown";

const API_URL = process.env.NEXT_PUBLIC_API_URL || "http://localhost:8080";
const API_KEY = process.env.NEXT_PUBLIC_API_KEY || "";

type NonEmptyObject = { [k: string]: any };

const processSingleChunk = <T extends NonEmptyObject>(
  chunk: string,
  currPartialChunk: string | null,
): [T | null, string | null] => {
  const completeChunk = (currPartialChunk || "") + chunk;
  try {
    // every complete chunk should be valid JSON
    const chunkJson = JSON.parse(completeChunk);
    return [chunkJson, null];
  } catch (err) {
    // if it's not valid JSON, then it's probably an incomplete chunk
    return [null, completeChunk];
  }
};

const processRawChunkString = <T extends NonEmptyObject>(
  rawChunkString: string,
  previousPartialChunk: string | null,
): [T[], string | null] => {
  /* This is required because, in practice, we see that nginx does not send over
  each chunk one at a time even with buffering turned off. Instead,
  chunks are sometimes in batches or are sometimes incomplete */
  if (!rawChunkString) {
    return [[], null];
  }
  const chunkSections = rawChunkString
    .split("\n")
    .filter((chunk) => chunk.length > 0);
  let parsedChunkSections: T[] = [];
  let currPartialChunk = previousPartialChunk;
  chunkSections.forEach((chunk) => {
    const [processedChunk, partialChunk] = processSingleChunk<T>(
      chunk,
      currPartialChunk,
    );
    if (processedChunk) {
      parsedChunkSections.push(processedChunk);
      currPartialChunk = null;
    } else {
      currPartialChunk = partialChunk;
    }
  });

  return [parsedChunkSections, currPartialChunk];
};

async function* handleStream<T extends NonEmptyObject>(
  streamingResponse: Response,
): AsyncGenerator<T[], void, unknown> {
  const reader = streamingResponse.body?.getReader();
  const decoder = new TextDecoder("utf-8");

  let previousPartialChunk: string | null = null;
  while (true) {
    const rawChunk = await reader?.read();
    if (!rawChunk) {
      throw new Error("Unable to process chunk");
    }
    const { done, value } = rawChunk;
    if (done) {
      break;
    }

    const [completedChunks, partialChunk] = processRawChunkString<T>(
      decoder.decode(value, { stream: true }),
      previousPartialChunk,
    );
    if (!completedChunks.length && !partialChunk) {
      break;
    }
    previousPartialChunk = partialChunk as string | null;

    yield await Promise.resolve(completedChunks);
  }
}

async function* sendMessage({
  message,
  chatSessionId,
  parentMessageId,
}: {
  message: string;
  chatSessionId?: number;
  parentMessageId?: number;
}) {
  if (!chatSessionId || !parentMessageId) {
    // Create a new chat session if one doesn't exist
    const createSessionResponse = await fetch(
      `${API_URL}/chat/create-chat-session`,
      {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
          Authorization: `Bearer ${API_KEY}`,
        },
        body: JSON.stringify({
          // or specify an assistant you have defined
          persona_id: 0,
        }),
      },
    );

    if (!createSessionResponse.ok) {
      const errorJson = await createSessionResponse.json();
      const errorMsg = errorJson.message || errorJson.detail || "";
      throw Error(`Failed to create chat session - ${errorMsg}`);
    }

    const sessionData = await createSessionResponse.json();
    chatSessionId = sessionData.chat_session_id;
  }

  const sendMessageResponse = await fetch(`${API_URL}/chat/send-message`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      Authorization: `Bearer ${API_KEY}`,
    },
    body: JSON.stringify({
      chat_session_id: chatSessionId,
      parent_message_id: parentMessageId || null,
      message: message,
      prompt_id: null,
      search_doc_ids: null,
      file_descriptors: [],
      // checkout https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/search/models.py#L105 for
      // all available options
      retrieval_options: {
        run_search: "always",
        filters: null,
      },
      query_override: null,
    }),
  });
  if (!sendMessageResponse.ok) {
    const errorJson = await sendMessageResponse.json();
    const errorMsg = errorJson.message || errorJson.detail || "";
    throw Error(`Failed to send message - ${errorMsg}`);
  }

  yield* handleStream<NonEmptyObject>(sendMessageResponse);
}

export const ChatWidget = () => {
  const [messages, setMessages] = useState<{ text: string; isUser: boolean }[]>(
    [],
  );
  const [inputText, setInputText] = useState("");
  const [isLoading, setIsLoading] = useState(false);

  const handleSubmit = async (e: React.FormEvent) => {
    e.preventDefault();
    if (inputText.trim()) {
      const initialPrevMessages = messages;
      setMessages([...initialPrevMessages, { text: inputText, isUser: true }]);
      setInputText("");
      setIsLoading(true);

      try {
        const messageGenerator = sendMessage({
          message: inputText,
          chatSessionId: undefined,
          parentMessageId: undefined,
        });
        let fullResponse = "";

        for await (const chunks of messageGenerator) {
          for (const chunk of chunks) {
            if ("answer_piece" in chunk) {
              fullResponse += chunk.answer_piece;
              setMessages([
                ...initialPrevMessages,
                { text: inputText, isUser: true },
                { text: fullResponse, isUser: false },
              ]);
            }
          }
        }
      } catch (error) {
        console.error("Error sending message:", error);
        setMessages((prevMessages) => [
          ...prevMessages,
          { text: "An error occurred. Please try again.", isUser: false },
        ]);
      } finally {
        setIsLoading(false);
      }
    }
  };

  return (
    <div
      className="
      fixed
      bottom-4
      right-4
      z-50
      bg-white
      rounded-lg
      shadow-xl
      w-96
      h-[32rem]
      flex
      flex-col
      overflow-hidden
      transition-all
      duration-300
      ease-in-out
    "
    >
      <div
        className="
        bg-gradient-to-r
        from-blue-600
        to-blue-800
        text-white
        p-4
        font-bold
        flex
        justify-between
        items-center
      "
      >
        <span>Chat Support</span>
      </div>
      <div
        className="
        flex-grow
        overflow-y-auto
        p-4
        space-y-4
        bg-gray-50
        border-b
        border-gray-200
      "
      >
        {messages.map((message, index) => (
          <div
            key={index}
            className={`
            flex
            ${message.isUser ? "justify-end" : "justify-start"}
          `}
          >
            <div
              className={`
              max-w-[75%]
              p-3
              rounded-lg
              ${
                message.isUser
                  ? "bg-blue-500 text-white"
                  : "bg-white text-black"
              }
              shadow
            `}
            >
              <ReactMarkdown>{message.text}</ReactMarkdown>
            </div>
          </div>
        ))}
        {isLoading && (
          <div className="flex justify-center">
            <div className="animate-pulse flex space-x-2">
              <div className="w-2 h-2 bg-gray-500 rounded-full"></div>
              <div className="w-2 h-2 bg-gray-500 rounded-full"></div>
              <div className="w-2 h-2 bg-gray-500 rounded-full"></div>
            </div>
          </div>
        )}
      </div>
      <form
        onSubmit={handleSubmit}
        className="
        p-4
        bg-white
        border-t
        border-gray-200
      "
      >
        <div className="relative">
          <input
            type="text"
            value={inputText}
            onChange={(e) => setInputText(e.target.value)}
            placeholder="Type a message..."
            className="
              w-full
              p-2
              pr-10
              border
              border-gray-300
              rounded-full
              focus:outline-none
              focus:ring-2
              focus:ring-blue-500
              focus:border-transparent
            "
            disabled={isLoading}
          />
          <button
            type="submit"
            disabled={isLoading}
            className="
              absolute
              right-2
              top-1/2
              transform
              -translate-y-1/2
              text-blue-500
              hover:text-blue-600
              focus:outline-none
            "
          >
            <svg
              xmlns="http://www.w3.org/2000/svg"
              className="h-6 w-6"
              fill="none"
              viewBox="0 0 24 24"
              stroke="currentColor"
            >
              <path
                strokeLinecap="round"
                strokeLinejoin="round"
                strokeWidth={2}
                d="M12 19l9 2-9-18-9 18 9-2zm0 0v-8"
              />
            </svg>
          </button>
        </div>
      </form>
    </div>
  );
};


================================================
FILE: examples/widget/tailwind.config.ts
================================================
import type { Config } from "tailwindcss";

const config: Config = {
  content: [
    "./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
    "./src/components/**/*.{js,ts,jsx,tsx,mdx}",
    "./src/app/**/*.{js,ts,jsx,tsx,mdx}",
  ],
  theme: {
    extend: {
      backgroundImage: {
        "gradient-radial": "radial-gradient(var(--tw-gradient-stops))",
        "gradient-conic":
          "conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))",
      },
    },
  },
  plugins: [],
};
export default config;


================================================
FILE: examples/widget/tsconfig.json
================================================
{
  "compilerOptions": {
    "lib": [
      "dom",
      "dom.iterable",
      "esnext"
    ],
    "allowJs": true,
    "skipLibCheck": true,
    "strict": true,
    "noEmit": true,
    "esModuleInterop": true,
    "module": "esnext",
    "moduleResolution": "bundler",
    "resolveJsonModule": true,
    "isolatedModules": true,
    "jsx": "preserve",
    "incremental": true,
    "plugins": [
      {
        "name": "next"
      }
    ],
    "paths": {
      "@/*": [
        "./src/*"
      ]
    },
    "target": "ES2017"
  },
  "include": [
    "next-env.d.ts",
    "**/*.ts",
    "**/*.tsx",
    ".next/types/**/*.ts",
    ".next/dev/types/**/*.ts"
  ],
  "exclude": [
    "node_modules"
  ]
}


================================================
FILE: extensions/chrome/LICENSE
================================================
MIT License

Copyright (c) 2025 DanswerAI, Inc.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: extensions/chrome/README.md
================================================
# Onyx Chrome Extension

The Onyx chrome extension lets you research, create, and automate with LLMs powered by your team's unique knowledge. Just hit Ctrl + O on Mac or Alt + O on Windows to instantly access Onyx in your browser:

💡 Know what your company knows, instantly with the Onyx sidebar
💬 Chat: Onyx provides a natural language chat interface as the main way of interacting with the features.
🌎 Internal Search: Ask questions and get answers from all your team's knowledge, powered by Onyx's 50+ connectors to all the tools your team uses
🚀 With a simple Ctrl + O on Mac or Alt + O on Windows - instantly summarize information from any work application

⚡️ Get quick access to the work resources you need.
🆕 Onyx new tab page puts all of your company’s knowledge at your fingertips
🤖 Access custom AI Agents for unique use cases, and give them access to tools to take action.

—

Onyx connects with dozens of popular workplace apps like Google Drive, Jira, Confluence, Slack, and more. Use this extension if you have an account created by your team admin.

## Installation

For Onyx Cloud Users, please visit the Chrome Plugin Store (pending approval still)

## Development

- Load unpacked extension in your browser
- Modify files in `src` directory
- Refresh extension in Chrome

## Contributing

Submit issues or pull requests for improvements


================================================
FILE: extensions/chrome/manifest.json
================================================
{
  "manifest_version": 3,
  "name": "Onyx",
  "version": "1.1",
  "description": "Onyx lets you research, create, and automate with LLMs powered by your team's unique knowledge",
  "permissions": [
    "sidePanel",
    "storage",
    "activeTab",
    "tabs"
  ],
  "host_permissions": ["<all_urls>"],
  "background": {
    "service_worker": "service_worker.js",
    "type": "module"
  },
  "action": {
    "default_icon": {
      "16": "public/icon16.png",
      "48": "public/icon48.png",
      "128": "public/icon128.png"
    },
    "default_popup": "src/pages/popup.html"
  },
  "icons": {
    "16": "public/icon16.png",
    "48": "public/icon48.png",
    "128": "public/icon128.png"
  },
  "options_page": "src/pages/options.html",
  "chrome_url_overrides": {
    "newtab": "src/pages/onyx_home.html"
  },
  "commands": {
    "toggleNewTabOverride": {
      "suggested_key": {
        "default": "Ctrl+Shift+O",
        "mac": "Command+Shift+O"
      },
      "description": "Toggle Onyx New Tab Override"
    },
    "openSidePanel": {
      "suggested_key": {
        "default": "Ctrl+O",
        "windows": "Alt+O",
        "mac": "MacCtrl+O"
      },
      "description": "Open Onyx Side Panel"
    }
  },
  "side_panel": {
    "default_path": "src/pages/panel.html"
  },
  "omnibox": {
    "keyword": "onyx"
  },
  "content_scripts": [
    {
      "matches": ["<all_urls>"],
      "js": ["src/utils/selection-icon.js"],
      "css": ["src/styles/selection-icon.css"]
    }
  ],
  "web_accessible_resources": [
    {
      "resources": ["public/icon32.png"],
      "matches": ["<all_urls>"]
    }
  ]
}


================================================
FILE: extensions/chrome/service_worker.js
================================================
import {
  DEFAULT_ONYX_DOMAIN,
  CHROME_SPECIFIC_STORAGE_KEYS,
  ACTIONS,
  SIDE_PANEL_PATH,
} from "./src/utils/constants.js";

// Track side panel state per window
const sidePanelOpenState = new Map();

// Open welcome page on first install
chrome.runtime.onInstalled.addListener((details) => {
  if (details.reason === "install") {
    chrome.storage.local.get(
      { [CHROME_SPECIFIC_STORAGE_KEYS.ONBOARDING_COMPLETE]: false },
      (result) => {
        if (!result[CHROME_SPECIFIC_STORAGE_KEYS.ONBOARDING_COMPLETE]) {
          chrome.tabs.create({ url: "src/pages/welcome.html" });
        }
      },
    );
  }
});

async function setupSidePanel() {
  if (chrome.sidePanel) {
    try {
      // Don't auto-open side panel on action click since we have a popup menu
      await chrome.sidePanel.setPanelBehavior({
        openPanelOnActionClick: false,
      });
    } catch (error) {
      console.error("Error setting up side panel:", error);
    }
  }
}

async function openSidePanel(tabId) {
  try {
    await chrome.sidePanel.open({ tabId });
  } catch (error) {
    console.error("Error opening side panel:", error);
  }
}

function encodeUserPrompt(text) {
  return encodeURIComponent(text).replace(/\(/g, "%28").replace(/\)/g, "%29");
}

async function sendToOnyx(info, tab) {
  const selectedText = encodeUserPrompt(info.selectionText);
  const currentUrl = encodeURIComponent(tab.url);

  try {
    const result = await chrome.storage.local.get({
      [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN,
    });
    const url = `${
      result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]
    }${SIDE_PANEL_PATH}?user-prompt=${selectedText}`;

    await openSidePanel(tab.id);
    chrome.runtime.sendMessage({
      action: ACTIONS.OPEN_SIDE_PANEL_WITH_INPUT,
      url: url,
      pageUrl: tab.url,
    });
  } catch (error) {
    console.error("Error sending to Onyx:", error);
  }
}

async function toggleNewTabOverride() {
  try {
    const result = await chrome.storage.local.get(
      CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB,
    );
    const newValue =
      !result[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];
    await chrome.storage.local.set({
      [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: newValue,
    });

    chrome.notifications.create({
      type: "basic",
      iconUrl: "icon.png",
      title: "Onyx New Tab",
      message: `New Tab Override ${newValue ? "enabled" : "disabled"}`,
    });

    // Send a message to inform all tabs about the change
    chrome.tabs.query({}, (tabs) => {
      tabs.forEach((tab) => {
        chrome.tabs.sendMessage(tab.id, {
          action: "newTabOverrideToggled",
          value: newValue,
        });
      });
    });
  } catch (error) {
    console.error("Error toggling new tab override:", error);
  }
}

// Note: This listener won't fire when a popup is defined in manifest.json
// The popup will show instead. This is kept as a fallback if popup is removed.
chrome.action.onClicked.addListener((tab) => {
  openSidePanel(tab.id);
});

chrome.commands.onCommand.addListener(async (command) => {
  if (command === ACTIONS.SEND_TO_ONYX) {
    try {
      const [tab] = await chrome.tabs.query({
        active: true,
        lastFocusedWindow: true,
      });
      if (tab) {
        const response = await chrome.tabs.sendMessage(tab.id, {
          action: ACTIONS.GET_SELECTED_TEXT,
        });
        const selectedText = response?.selectedText || "";
        sendToOnyx({ selectionText: selectedText }, tab);
      }
    } catch (error) {
      console.error("Error sending to Onyx:", error);
    }
  } else if (command === ACTIONS.TOGGLE_NEW_TAB_OVERRIDE) {
    toggleNewTabOverride();
  } else if (command === ACTIONS.CLOSE_SIDE_PANEL) {
    try {
      await chrome.sidePanel.hide();
    } catch (error) {
      console.error("Error closing side panel via command:", error);
    }
  } else if (command === ACTIONS.OPEN_SIDE_PANEL) {
    chrome.tabs.query({ active: true, lastFocusedWindow: true }, (tabs) => {
      if (tabs && tabs.length > 0) {
        const tab = tabs[0];
        const windowId = tab.windowId;
        const isOpen = sidePanelOpenState.get(windowId) || false;

        if (isOpen) {
          chrome.sidePanel.setOptions({ enabled: false }, () => {
            chrome.sidePanel.setOptions({ enabled: true });
            sidePanelOpenState.set(windowId, false);
          });
        } else {
          chrome.sidePanel.open({ tabId: tab.id });
          sidePanelOpenState.set(windowId, true);
        }
      }
    });
    return;
  } else {
    console.log("Unhandled command:", command);
  }
});

async function sendActiveTabUrlToPanel() {
  try {
    const [tab] = await chrome.tabs.query({
      active: true,
      lastFocusedWindow: true,
    });
    if (tab?.url) {
      chrome.runtime.sendMessage({
        action: ACTIONS.TAB_URL_UPDATED,
        url: tab.url,
      });
    }
  } catch (error) {
    console.error("[Onyx SW] Error sending tab URL:", error);
  }
}

chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
  if (request.action === ACTIONS.GET_CURRENT_ONYX_DOMAIN) {
    chrome.storage.local.get(
      { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN },
      (result) => {
        sendResponse({
          [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]:
            result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN],
        });
      },
    );
    return true;
  }
  if (request.action === ACTIONS.CLOSE_SIDE_PANEL) {
    closeSidePanel();
    chrome.storage.local.get(
      { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN },
      (result) => {
        chrome.tabs.create({
          url: `${result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]}/auth/login`,
          active: true,
        });
      },
    );
    return true;
  }
  if (request.action === ACTIONS.OPEN_SIDE_PANEL_WITH_INPUT) {
    const { selectedText, pageUrl } = request;
    const tabId = sender.tab?.id;
    const windowId = sender.tab?.windowId;

    if (tabId && windowId) {
      chrome.storage.local.get(
        { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN },
        (result) => {
          const encodedText = encodeUserPrompt(selectedText);
          const onyxDomain = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];
          const url = `${onyxDomain}${SIDE_PANEL_PATH}?user-prompt=${encodedText}`;

          chrome.storage.session.set({
            pendingInput: {
              url: url,
              pageUrl: pageUrl,
              timestamp: Date.now(),
            },
          });

          chrome.sidePanel
            .open({ windowId })
            .then(() => {
              chrome.runtime.sendMessage({
                action: ACTIONS.OPEN_ONYX_WITH_INPUT,
                url: url,
                pageUrl: pageUrl,
              });
            })
            .catch((error) => {
              console.error(
                "[Onyx SW] Error opening side panel with text:",
                error,
              );
            });
        },
      );
    } else {
      console.error("[Onyx SW] Missing tabId or windowId");
    }
    return true;
  }
  if (request.action === ACTIONS.TAB_READING_ENABLED) {
    chrome.storage.session.set({ tabReadingEnabled: true });
    sendActiveTabUrlToPanel();
    return false;
  }
  if (request.action === ACTIONS.TAB_READING_DISABLED) {
    chrome.storage.session.set({ tabReadingEnabled: false });
    return false;
  }
});

chrome.storage.onChanged.addListener((changes, namespace) => {
  if (
    namespace === "local" &&
    changes[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]
  ) {
    const newValue =
      changes[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]
        .newValue;

    if (newValue === false) {
      chrome.runtime.openOptionsPage();
    }
  }
});

chrome.windows.onRemoved.addListener((windowId) => {
  sidePanelOpenState.delete(windowId);
});

chrome.omnibox.setDefaultSuggestion({
  description: 'Search Onyx for "%s"',
});

chrome.omnibox.onInputEntered.addListener(async (text) => {
  try {
    const result = await chrome.storage.local.get({
      [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN,
    });

    const domain = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];
    const searchUrl = `${domain}/chat?user-prompt=${encodeURIComponent(text)}`;

    chrome.tabs.update({ url: searchUrl });
  } catch (error) {
    console.error("Error handling omnibox search:", error);
  }
});

chrome.omnibox.onInputChanged.addListener((text, suggest) => {
  if (text.trim()) {
    suggest([
      {
        content: text,
        description: `Search Onyx for "<match>${text}</match>"`,
      },
    ]);
  }
});

chrome.tabs.onActivated.addListener(async (activeInfo) => {
  const result = await chrome.storage.session.get({ tabReadingEnabled: false });
  if (!result.tabReadingEnabled) return;
  try {
    const tab = await chrome.tabs.get(activeInfo.tabId);
    if (tab.url) {
      chrome.runtime.sendMessage({
        action: ACTIONS.TAB_URL_UPDATED,
        url: tab.url,
      });
    }
  } catch (error) {
    console.error("[Onyx SW] Error on tab activated:", error);
  }
});

chrome.tabs.onUpdated.addListener(async (tabId, changeInfo, tab) => {
  if (!changeInfo.url) return;
  const result = await chrome.storage.session.get({ tabReadingEnabled: false });
  if (!result.tabReadingEnabled) return;
  try {
    const [activeTab] = await chrome.tabs.query({
      active: true,
      lastFocusedWindow: true,
    });
    if (activeTab?.id === tabId) {
      chrome.runtime.sendMessage({
        action: ACTIONS.TAB_URL_UPDATED,
        url: changeInfo.url,
      });
    }
  } catch (error) {
    console.error("[Onyx SW] Error on tab updated:", error);
  }
});

setupSidePanel();


================================================
FILE: extensions/chrome/src/pages/onyx_home.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta http-equiv="Permissions-Policy" content="clipboard-write=(self)" />
    <title>Onyx Home</title>
    <link rel="stylesheet" href="../styles/shared.css" />
    <style>
      body,
      html {
        margin: 0;
        padding: 0;
        width: 100%;
        height: 100vh;
        overflow: hidden;
      }

      @media (prefers-color-scheme: dark) {
        html,
        body {
          background-color: #000;
        }
      }

      @media (prefers-color-scheme: light) {
        html,
        body {
          background-color: #f6f6f6;
        }
      }

      #background {
        position: fixed;
        top: 0;
        left: 0;
        width: 100%;
        height: 100%;
        background-size: cover;
        background-position: center;
        background-repeat: no-repeat;
        transition: opacity 0.5s ease-in-out;
      }

      #content {
        position: relative;
        width: 100%;
        height: 100%;
        opacity: 0;
        transition: opacity 0.5s ease-in-out;
      }

      iframe {
        border: none;
        width: 100%;
        height: 100%;
        position: absolute;
        top: 0;
        left: 0;
        visibility: hidden;
      }
    </style>
  </head>

  <body>
    <div id="background"></div>
    <div id="content">
      <iframe
        id="onyx-iframe"
        allowfullscreen
        allow="clipboard-read; clipboard-write"
      ></iframe>
    </div>
    <script src="onyx_home.js" type="module"></script>
  </body>
</html>


================================================
FILE: extensions/chrome/src/pages/onyx_home.js
================================================
import {
  CHROME_MESSAGE,
  CHROME_SPECIFIC_STORAGE_KEYS,
  WEB_MESSAGE,
} from "../utils/constants.js";
import {
  showErrorModal,
  hideErrorModal,
  initErrorModal,
} from "../utils/error-modal.js";
import { getOnyxDomain } from "../utils/storage.js";

(function () {
  let mainIframe = document.getElementById("onyx-iframe");
  let preloadedIframe = null;
  const background = document.getElementById("background");
  const content = document.getElementById("content");
  const DEFAULT_LIGHT_BACKGROUND_IMAGE =
    "https://images.unsplash.com/photo-1692520883599-d543cfe6d43d?q=80&w=2666&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D";
  const DEFAULT_DARK_BACKGROUND_IMAGE =
    "https://images.unsplash.com/photo-1692520883599-d543cfe6d43d?q=80&w=2666&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D";

  let iframeLoadTimeout;
  let iframeLoaded = false;

  initErrorModal();

  async function preloadChatInterface() {
    preloadedIframe = document.createElement("iframe");

    const domain = await getOnyxDomain();
    preloadedIframe.src = domain + "/chat";
    preloadedIframe.style.opacity = "0";
    preloadedIframe.style.visibility = "hidden";
    preloadedIframe.style.transition = "opacity 0.3s ease-in";
    preloadedIframe.style.border = "none";
    preloadedIframe.style.width = "100%";
    preloadedIframe.style.height = "100%";
    preloadedIframe.style.position = "absolute";
    preloadedIframe.style.top = "0";
    preloadedIframe.style.left = "0";
    preloadedIframe.style.zIndex = "1";
    content.appendChild(preloadedIframe);
  }

  function setIframeSrc(url) {
    mainIframe.src = url;
    startIframeLoadTimeout();
    iframeLoaded = false;
  }

  function startIframeLoadTimeout() {
    clearTimeout(iframeLoadTimeout);
    iframeLoadTimeout = setTimeout(() => {
      if (!iframeLoaded) {
        try {
          if (
            mainIframe.contentWindow.location.pathname.includes("/auth/login")
          ) {
            showLoginPage();
          } else {
            showErrorModal(mainIframe.src);
          }
        } catch (error) {
          showErrorModal(mainIframe.src);
        }
      }
    }, 2500);
  }

  function showLoginPage() {
    background.style.opacity = "0";
    mainIframe.style.opacity = "1";
    mainIframe.style.visibility = "visible";
    content.style.opacity = "1";
    hideErrorModal();
  }

  function setTheme(theme, customBackgroundImage) {
    const imageUrl =
      customBackgroundImage ||
      (theme === "dark"
        ? DEFAULT_DARK_BACKGROUND_IMAGE
        : DEFAULT_LIGHT_BACKGROUND_IMAGE);
    background.style.backgroundImage = `url('${imageUrl}')`;
  }

  function fadeInContent() {
    content.style.transition = "opacity 0.5s ease-in";
    mainIframe.style.transition = "opacity 0.5s ease-in";
    content.style.opacity = "0";
    mainIframe.style.opacity = "0";
    mainIframe.style.visibility = "visible";

    requestAnimationFrame(() => {
      content.style.opacity = "1";
      mainIframe.style.opacity = "1";

      setTimeout(() => {
        background.style.transition = "opacity 0.3s ease-out";
        background.style.opacity = "0";
      }, 500);
    });
  }

  function checkOnyxPreference() {
    chrome.storage.local.get(
      [
        CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB,
        CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN,
      ],
      (items) => {
        let useOnyxAsDefaultNewTab =
          items[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];

        if (useOnyxAsDefaultNewTab === undefined) {
          useOnyxAsDefaultNewTab = !!(
            localStorage.getItem(
              CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB,
            ) === "1"
          );
          chrome.storage.local.set({
            [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]:
              useOnyxAsDefaultNewTab,
          });
        }

        if (!useOnyxAsDefaultNewTab) {
          chrome.tabs.update({
            url: "chrome://new-tab-page",
          });
          return;
        }

        setIframeSrc(items[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN] + "/nrf");
      },
    );
  }

  function loadThemeAndBackground() {
    chrome.storage.local.get(
      [
        CHROME_SPECIFIC_STORAGE_KEYS.THEME,
        CHROME_SPECIFIC_STORAGE_KEYS.BACKGROUND_IMAGE,
        CHROME_SPECIFIC_STORAGE_KEYS.DARK_BG_URL,
        CHROME_SPECIFIC_STORAGE_KEYS.LIGHT_BG_URL,
      ],
      function (result) {
        const theme = result[CHROME_SPECIFIC_STORAGE_KEYS.THEME] || "light";
        const customBackgroundImage =
          result[CHROME_SPECIFIC_STORAGE_KEYS.BACKGROUND_IMAGE];
        const darkBgUrl = result[CHROME_SPECIFIC_STORAGE_KEYS.DARK_BG_URL];
        const lightBgUrl = result[CHROME_SPECIFIC_STORAGE_KEYS.LIGHT_BG_URL];

        let backgroundImage;
        if (customBackgroundImage) {
          backgroundImage = customBackgroundImage;
        } else if (theme === "dark" && darkBgUrl) {
          backgroundImage = darkBgUrl;
        } else if (theme === "light" && lightBgUrl) {
          backgroundImage = lightBgUrl;
        }

        setTheme(theme, backgroundImage);
        checkOnyxPreference();
      },
    );
  }

  function loadNewPage(newSrc) {
    if (preloadedIframe && preloadedIframe.contentWindow) {
      preloadedIframe.contentWindow.postMessage(
        { type: WEB_MESSAGE.PAGE_CHANGE, href: newSrc },
        "*",
      );
    } else {
      console.error("Preloaded iframe not available");
    }
  }

  function completePendingPageLoad() {
    if (preloadedIframe) {
      preloadedIframe.style.visibility = "visible";
      preloadedIframe.style.opacity = "1";
      preloadedIframe.style.zIndex = "1";
      mainIframe.style.zIndex = "2";
      mainIframe.style.opacity = "0";

      setTimeout(() => {
        if (content.contains(mainIframe)) {
          content.removeChild(mainIframe);
        }

        mainIframe = preloadedIframe;
        mainIframe.id = "onyx-iframe";
        mainIframe.style.zIndex = "";
        iframeLoaded = true;
        clearTimeout(iframeLoadTimeout);
      }, 200);
    } else {
      console.warn("No preloaded iframe available");
    }
  }

  chrome.storage.onChanged.addListener(function (changes, namespace) {
    if (namespace === "local" && changes.useOnyxAsDefaultNewTab) {
      checkOnyxPreference();
    }
  });

  window.addEventListener("message", function (event) {
    if (event.data.type === CHROME_MESSAGE.SET_DEFAULT_NEW_TAB) {
      chrome.storage.local.set({ useOnyxAsDefaultNewTab: event.data.value });
    } else if (event.data.type === CHROME_MESSAGE.ONYX_APP_LOADED) {
      clearTimeout(iframeLoadTimeout);
      hideErrorModal();
      fadeInContent();
      iframeLoaded = true;
    } else if (event.data.type === CHROME_MESSAGE.PREFERENCES_UPDATED) {
      const { theme, backgroundUrl } = event.data.payload;
      chrome.storage.local.set(
        {
          [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: theme,
          [CHROME_SPECIFIC_STORAGE_KEYS.BACKGROUND_IMAGE]: backgroundUrl,
        },
        () => {},
      );
    } else if (event.data.type === CHROME_MESSAGE.LOAD_NEW_PAGE) {
      loadNewPage(event.data.href);
    } else if (event.data.type === CHROME_MESSAGE.LOAD_NEW_CHAT_PAGE) {
      completePendingPageLoad();
    }
  });

  mainIframe.onload = function () {
    clearTimeout(iframeLoadTimeout);
    startIframeLoadTimeout();
  };

  mainIframe.onerror = function (error) {
    showErrorModal(mainIframe.src);
  };

  loadThemeAndBackground();
  preloadChatInterface();
})();


================================================
FILE: extensions/chrome/src/pages/options.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta http-equiv="Permissions-Policy" content="clipboard-write=(self)" />
    <title>Onyx - Settings</title>
    <link rel="stylesheet" href="../styles/shared.css" />
    <style>
      :root {
        --background-900: #0a0a0a;
        --background-800: #1a1a1a;
        --text-light-05: rgba(255, 255, 255, 0.95);
        --text-light-03: rgba(255, 255, 255, 0.6);
        --white-10: rgba(255, 255, 255, 0.1);
        --white-15: rgba(255, 255, 255, 0.15);
        --white-20: rgba(255, 255, 255, 0.2);
        --white-30: rgba(255, 255, 255, 0.3);
        --white-40: rgba(255, 255, 255, 0.4);
        --white-80: rgba(255, 255, 255, 0.8);
        --black-40: rgba(0, 0, 0, 0.4);
      }

      * {
        box-sizing: border-box;
      }

      body {
        margin: 0;
        padding: 0;
        font-family: var(--font-hanken-grotesk);
        background: linear-gradient(
          135deg,
          var(--background-900) 0%,
          var(--background-800) 100%
        );
        min-height: 100vh;
        color: var(--text-light-05);
        transition: background 0.3s ease;
      }

      body.light-theme {
        --background-900: #f5f5f5;
        --background-800: #ffffff;
        --text-light-05: rgba(0, 0, 0, 0.95);
        --text-light-03: rgba(0, 0, 0, 0.6);
        background: linear-gradient(135deg, #f5f5f5 0%, #ffffff 100%);
      }

      body.light-theme .settings-panel {
        background: linear-gradient(
          to bottom,
          rgba(255, 255, 255, 0.95),
          rgba(245, 245, 245, 0.95)
        );
        border: 1px solid rgba(0, 0, 0, 0.1);
      }

      body.light-theme .settings-header {
        border-bottom: 1px solid rgba(0, 0, 0, 0.1);
      }

      body.light-theme .settings-icon {
        background: rgba(0, 0, 0, 0.05);
      }

      body.light-theme .theme-toggle {
        background: rgba(0, 0, 0, 0.05);
        border: 1px solid rgba(0, 0, 0, 0.1);
      }

      body.light-theme .theme-toggle:hover {
        background: rgba(0, 0, 0, 0.08);
      }

      body.light-theme .theme-toggle svg {
        stroke: rgba(0, 0, 0, 0.95);
      }

      body.light-theme .settings-group {
        background: rgba(0, 0, 0, 0.03);
      }

      body.light-theme .setting-divider {
        background: rgba(0, 0, 0, 0.1);
      }

      body.light-theme .input-field {
        border: 1px solid rgba(0, 0, 0, 0.1);
        background: rgba(0, 0, 0, 0.05);
        color: rgba(0, 0, 0, 0.95);
      }

      body.light-theme .input-field:focus {
        outline: none;
        border-color: rgba(0, 0, 0, 0.25);
        background: rgba(0, 0, 0, 0.08);
        box-shadow: 0 0 0 2px rgba(0, 0, 0, 0.05);
        color: rgba(0, 0, 0, 0.95);
      }

      body.light-theme .status-container {
        background: rgba(0, 0, 0, 0.03);
      }

      body.light-theme .button.secondary {
        background: rgba(0, 0, 0, 0.05);
        color: rgba(0, 0, 0, 0.95);
      }

      body.light-theme .button.secondary:hover {
        background: rgba(0, 0, 0, 0.08);
      }

      body.light-theme .toggle-slider {
        background-color: rgba(0, 0, 0, 0.15);
      }

      body.light-theme input:checked + .toggle-slider {
        background-color: rgba(0, 0, 0, 0.3);
      }

      body.light-theme .toggle-slider:before {
        background-color: white;
        box-shadow: 0 1px 3px rgba(0, 0, 0, 0.2);
      }

      .settings-container {
        max-width: 500px;
        width: 100%;
        margin: 0 auto;
        padding: 40px 20px;
      }

      .settings-panel {
        background: linear-gradient(
          to bottom,
          rgba(10, 10, 10, 0.95),
          rgba(26, 26, 26, 0.95)
        );
        backdrop-filter: blur(24px);
        border-radius: 16px;
        border: 1px solid var(--white-10);
        overflow: hidden;
        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
      }

      .settings-header {
        padding: 24px;
        border-bottom: 1px solid var(--white-10);
        display: flex;
        align-items: center;
        justify-content: space-between;
        background: transparent;
      }

      .settings-header-left {
        display: flex;
        align-items: center;
        gap: 12px;
      }

      .settings-icon {
        width: 40px;
        height: 40px;
        border-radius: 12px;
        background: white;
        display: flex;
        align-items: center;
        justify-content: center;
        overflow: hidden;
      }

      .settings-icon img {
        width: 100%;
        height: 100%;
        object-fit: contain;
        padding: 6px;
      }

      .settings-title {
        font-size: 20px;
        font-weight: 600;
        color: var(--text-light-05);
        margin: 0;
      }

      .theme-toggle {
        display: flex;
        align-items: center;
        gap: 8px;
        padding: 6px 12px;
        border-radius: 999px;
        background: var(--white-10);
        border: 1px solid var(--white-10);
        cursor: pointer;
        transition: all 0.2s;
      }

      .theme-toggle:hover {
        background: var(--white-15);
      }

      .theme-toggle svg {
        width: 16px;
        height: 16px;
        stroke: var(--text-light-05);
      }

      .settings-content {
        padding: 24px;
      }

      .settings-section {
        margin-bottom: 32px;
      }

      .settings-section:last-child {
        margin-bottom: 0;
      }

      .section-title {
        font-size: 11px;
        font-weight: 600;
        text-transform: uppercase;
        letter-spacing: 0.05em;
        color: var(--text-light-03);
        margin-bottom: 12px;
      }

      .settings-group {
        background: rgba(255, 255, 255, 0.05);
        border-radius: 16px;
        padding: 4px;
      }

      .setting-row {
        display: flex;
        justify-content: space-between;
        align-items: center;
        padding: 12px;
      }

      .setting-row-content {
        display: flex;
        flex-direction: column;
        gap: 4px;
        flex: 1;
      }

      .setting-label {
        font-size: 14px;
        font-weight: 400;
        color: var(--text-light-05);
      }

      .setting-description {
        font-size: 12px;
        color: var(--text-light-03);
      }

      .setting-divider {
        height: 1px;
        background: var(--white-10);
        margin: 0 4px;
      }

      .input-field {
        width: 100%;
        padding: 10px 12px;
        border: 1px solid var(--white-10);
        border-radius: 8px;
        font-size: 14px;
        background: rgba(255, 255, 255, 0.05);
        color: var(--text-light-05);
        font-family: var(--font-hanken-grotesk);
        transition: all 0.2s;
        margin: 0;
      }

      .input-field:focus {
        outline: none;
        border-color: var(--white-30);
        background: rgba(255, 255, 255, 0.1);
        box-shadow: 0 0 0 2px rgba(255, 255, 255, 0.1);
        color: var(--text-light-05);
      }

      .input-field::placeholder {
        color: var(--text-light-03);
      }

      .setting-row .input-field {
        margin-top: 0;
      }

      .toggle-switch {
        position: relative;
        display: inline-block;
        width: 44px;
        height: 24px;
      }

      .toggle-switch input {
        opacity: 0;
        width: 0;
        height: 0;
      }

      .toggle-slider {
        position: absolute;
        cursor: pointer;
        top: 0;
        left: 0;
        right: 0;
        bottom: 0;
        background-color: rgba(255, 255, 255, 0.2);
        transition: 0.3s;
        border-radius: 24px;
      }

      .toggle-slider:before {
        position: absolute;
        content: "";
        height: 18px;
        width: 18px;
        left: 3px;
        bottom: 3px;
        background-color: white;
        transition: 0.3s;
        border-radius: 50%;
      }

      input:checked + .toggle-slider {
        background-color: rgba(255, 255, 255, 0.4);
      }

      input:checked + .toggle-slider:before {
        transform: translateX(20px);
      }

      .status-container {
        margin-top: 20px;
        padding: 12px;
        background: rgba(255, 255, 255, 0.05);
        border-radius: 8px;
        opacity: 0;
        transition: opacity 0.3s;
      }

      .status-container.show {
        opacity: 1;
      }

      .status-message {
        margin: 0 0 12px 0;
        color: var(--text-light-05);
        font-size: 14px;
        line-height: 1.5;
      }

      .button {
        padding: 10px 20px;
        border-radius: 8px;
        border: none;
        cursor: pointer;
        font-size: 14px;
        font-weight: 500;
        transition: all 0.2s;
        font-family: var(--font-hanken-grotesk);
      }

      .button.secondary {
        background: var(--white-10);
        color: var(--text-light-05);
        width: 100%;
      }

      .button.secondary:hover {
        background: var(--white-15);
      }

      kbd {
        background: rgba(255, 255, 255, 0.1);
        border: 1px solid var(--white-10);
        border-radius: 4px;
        padding: 2px 6px;
        font-family: monospace;
        font-weight: 500;
        color: var(--text-light-05);
        font-size: 11px;
      }

      @media (max-width: 600px) {
        .settings-container {
          padding: 20px 16px;
        }

        .settings-header {
          padding: 20px;
        }

        .settings-content {
          padding: 20px;
        }
      }
    </style>
  </head>

  <body>
    <div class="settings-container">
      <div class="settings-panel">
        <div class="settings-header">
          <div class="settings-header-left">
            <div class="settings-icon">
              <img src="../../public/icon48.png" alt="Onyx" />
            </div>
            <h1 class="settings-title">Settings</h1>
          </div>
          <button
            class="theme-toggle"
            id="themeToggle"
            aria-label="Toggle theme"
          >
            <svg
              id="themeIcon"
              viewBox="0 0 24 24"
              fill="none"
              stroke="currentColor"
            >
              <circle cx="12" cy="12" r="4"></circle>
              <path
                d="M12 2v2m0 16v2M4.93 4.93l1.41 1.41m11.32 11.32l1.41 1.41M2 12h2m16 0h2M4.93 19.07l1.41-1.41M17.66 6.34l1.41-1.41"
              ></path>
            </svg>
          </button>
        </div>

        <div class="settings-content">
          <!-- General Section -->
          <section class="settings-section">
            <div class="section-title">General</div>
            <div class="settings-group">
              <div class="setting-row">
                <div class="setting-row-content">
                  <label class="setting-label" for="onyxDomain"
                    >Root Domain</label
                  >
                  <div class="setting-description">
                    The root URL for your Onyx instance
                  </div>
                </div>
              </div>
              <div class="setting-divider"></div>
              <div class="setting-row" style="padding: 12px">
                <input
                  type="text"
                  id="onyxDomain"
                  class="input-field"
                  placeholder="https://cloud.onyx.app"
                />
              </div>
              <div class="setting-divider"></div>
              <div class="setting-row">
                <div class="setting-row-content">
                  <label class="setting-label" for="useOnyxAsDefault"
                    >Use Onyx as new tab page</label
                  >
                </div>
                <label class="toggle-switch">
                  <input type="checkbox" id="useOnyxAsDefault" />
                  <span class="toggle-slider"></span>
                </label>
              </div>
            </div>
          </section>

          <!-- Search Engine Section -->
          <section class="settings-section">
            <div class="section-title">Search Engine</div>
            <div class="settings-group">
              <div class="setting-row">
                <div class="setting-row-content">
                  <label class="setting-label">Use Onyx in Address Bar</label>
                  <div class="setting-description">
                    Type <kbd>onyx</kbd> followed by a space in Chrome's address
                    bar, then enter your search query and press Enter
                  </div>
                </div>
              </div>
              <div class="setting-divider"></div>
              <div class="setting-row">
                <div class="setting-row-content">
                  <div class="setting-description">
                    Searches will be directed to your configured Onyx instance
                    at the Root Domain above
                  </div>
                </div>
              </div>
            </div>
          </section>

          <!-- Status Message -->
          <div id="statusContainer" class="status-container">
            <p id="status" class="status-message"></p>
            <button id="newTab" class="button secondary" style="display: none">
              Open New Tab to Test
            </button>
          </div>
        </div>
      </div>
    </div>
    <script type="module" src="options.js"></script>
  </body>
</html>


================================================
FILE: extensions/chrome/src/pages/options.js
================================================
import {
  CHROME_SPECIFIC_STORAGE_KEYS,
  DEFAULT_ONYX_DOMAIN,
} from "../utils/constants.js";

document.addEventListener("DOMContentLoaded", function () {
  const domainInput = document.getElementById("onyxDomain");
  const useOnyxAsDefaultToggle = document.getElementById("useOnyxAsDefault");
  const statusContainer = document.getElementById("statusContainer");
  const statusElement = document.getElementById("status");
  const newTabButton = document.getElementById("newTab");
  const themeToggle = document.getElementById("themeToggle");
  const themeIcon = document.getElementById("themeIcon");

  let currentTheme = "dark";

  function updateThemeIcon(theme) {
    if (!themeIcon) return;

    if (theme === "light") {
      themeIcon.innerHTML = `
        <circle cx="12" cy="12" r="4"></circle>
        <path d="M12 2v2m0 16v2M4.93 4.93l1.41 1.41m11.32 11.32l1.41 1.41M2 12h2m16 0h2M4.93 19.07l1.41-1.41M17.66 6.34l1.41-1.41"></path>
      `;
    } else {
      themeIcon.innerHTML = `
        <path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"></path>
      `;
    }
  }

  function loadStoredValues() {
    chrome.storage.local.get(
      {
        [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN,
        [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: false,
        [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: "dark",
      },
      (result) => {
        if (domainInput)
          domainInput.value = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];
        if (useOnyxAsDefaultToggle)
          useOnyxAsDefaultToggle.checked =
            result[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];

        currentTheme = result[CHROME_SPECIFIC_STORAGE_KEYS.THEME] || "dark";
        updateThemeIcon(currentTheme);

        document.body.className = currentTheme === "light" ? "light-theme" : "";
      },
    );
  }

  function saveSettings() {
    const domain = domainInput.value.trim();
    const useOnyxAsDefault = useOnyxAsDefaultToggle
      ? useOnyxAsDefaultToggle.checked
      : false;

    chrome.storage.local.set(
      {
        [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: domain,
        [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]:
          useOnyxAsDefault,
        [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: currentTheme,
      },
      () => {
        showStatusMessage(
          useOnyxAsDefault
            ? "Settings updated. Open a new tab to test it out. Click on the extension icon to bring up Onyx from any page."
            : "Settings updated.",
        );
      },
    );
  }

  function showStatusMessage(message) {
    if (statusElement) {
      const useOnyxAsDefault = useOnyxAsDefaultToggle
        ? useOnyxAsDefaultToggle.checked
        : false;

      statusElement.textContent =
        message ||
        (useOnyxAsDefault
          ? "Settings updated. Open a new tab to test it out. Click on the extension icon to bring up Onyx from any page."
          : "Settings updated.");

      if (newTabButton) {
        newTabButton.style.display = useOnyxAsDefault ? "block" : "none";
      }
    }

    if (statusContainer) {
      statusContainer.classList.add("show");
    }

    setTimeout(hideStatusMessage, 5000);
  }

  function hideStatusMessage() {
    if (statusContainer) {
      statusContainer.classList.remove("show");
    }
  }

  function toggleTheme() {
    currentTheme = currentTheme === "light" ? "dark" : "light";
    updateThemeIcon(currentTheme);

    document.body.className = currentTheme === "light" ? "light-theme" : "";

    chrome.storage.local.set({
      [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: currentTheme,
    });
  }

  function openNewTab() {
    chrome.tabs.create({});
  }

  if (domainInput) {
    domainInput.addEventListener("input", () => {
      clearTimeout(domainInput.saveTimeout);
      domainInput.saveTimeout = setTimeout(saveSettings, 1000);
    });
  }

  if (useOnyxAsDefaultToggle) {
    useOnyxAsDefaultToggle.addEventListener("change", saveSettings);
  }

  if (themeToggle) {
    themeToggle.addEventListener("click", toggleTheme);
  }

  if (newTabButton) {
    newTabButton.addEventListener("click", openNewTab);
  }

  loadStoredValues();
});


================================================
FILE: extensions/chrome/src/pages/panel.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta http-equiv="Permissions-Policy" content="clipboard-write=(self)" />
    <title>Onyx Panel</title>
    <link rel="stylesheet" href="../styles/shared.css" />
    <style>
      body,
      html {
        margin: 0;
        padding: 0;
        width: 100%;
        height: 100vh;
        overflow: hidden;
      }

      #loading-screen {
        position: fixed;
        top: 0;
        left: 0;
        width: 100%;
        height: 100%;
        background-color: #f5f5f5;
        display: flex;
        flex-direction: column;
        justify-content: center;
        align-items: center;
        z-index: 1000;
        transition: opacity 0.5s ease-in-out;
      }

      #logo {
        width: 100px;
        height: 100px;
        background-image: url("/public/logo.png");
        background-size: contain;
        background-repeat: no-repeat;
        background-position: center;
        animation: pulse 2s infinite;
      }

      @keyframes pulse {
        0% {
          transform: scale(1);
        }

        50% {
          transform: scale(1.1);
        }

        100% {
          transform: scale(1);
        }
      }

      #loading-text {
        color: #0a0a0a;
        margin-top: 20px;
        font-size: 1.125rem;
        font-weight: 600;
        text-align: center;
      }

      iframe {
        border: none;
        width: 100%;
        height: 100%;
        position: absolute;
        top: 0;
        left: 0;
        opacity: 0;
        transition: opacity 0.5s ease-in-out;
      }
    </style>
  </head>

  <body>
    <div id="loading-screen">
      <div id="logo"></div>
      <div id="loading-text">Loading Onyx...</div>
    </div>
    <iframe
      id="onyx-panel-iframe"
      allow="clipboard-read; clipboard-write"
    ></iframe>
    <script src="../utils/error-modal.js" type="module"></script>
    <script src="panel.js" type="module"></script>
  </body>
</html>


================================================
FILE: extensions/chrome/src/pages/panel.js
================================================
import { showErrorModal, showAuthModal } from "../utils/error-modal.js";
import {
  ACTIONS,
  CHROME_MESSAGE,
  WEB_MESSAGE,
  CHROME_SPECIFIC_STORAGE_KEYS,
  SIDE_PANEL_PATH,
} from "../utils/constants.js";
(function () {
  const iframe = document.getElementById("onyx-panel-iframe");
  const loadingScreen = document.getElementById("loading-screen");

  let currentUrl = "";
  let iframeLoaded = false;
  let iframeLoadTimeout;
  let authRequired = false;

  // Returns the origin of the Onyx app loaded in the iframe.
  // We derive the origin from iframe.src so postMessage payloads
  // (including tab URLs) are only delivered to the expected page.
  // Throws if iframe.src is not a valid URL — this is intentional:
  // postMessage must never fall back to the unsafe wildcard "*".
  function getIframeOrigin() {
    return new URL(iframe.src).origin;
  }

  async function checkPendingInput() {
    try {
      const result = await chrome.storage.session.get("pendingInput");
      if (result.pendingInput) {
        const { url, pageUrl, timestamp } = result.pendingInput;
        if (Date.now() - timestamp < 5000) {
          setIframeSrc(url, pageUrl);
          await chrome.storage.session.remove("pendingInput");
          return true;
        }
        await chrome.storage.session.remove("pendingInput");
      }
    } catch (error) {
      console.error("[Onyx Panel] Error checking pending input:", error);
    }
    return false;
  }

  async function initializePanel() {
    loadingScreen.style.display = "flex";
    loadingScreen.style.opacity = "1";
    iframe.style.opacity = "0";

    // Check for pending input first (from selection icon click)
    const hasPendingInput = await checkPendingInput();
    if (!hasPendingInput) {
      loadOnyxDomain();
    }
  }

  function setIframeSrc(url, pageUrl) {
    iframe.src = url;
    currentUrl = pageUrl;
  }

  function sendWebsiteToIframe(pageUrl) {
    if (iframe.contentWindow && pageUrl !== currentUrl) {
      iframe.contentWindow.postMessage(
        {
          type: WEB_MESSAGE.PAGE_CHANGE,
          url: pageUrl,
        },
        getIframeOrigin(),
      );
      currentUrl = pageUrl;
    }
  }

  function startIframeLoadTimeout() {
    iframeLoadTimeout = setTimeout(() => {
      if (!iframeLoaded) {
        if (authRequired) {
          showAuthModal();
        } else {
          showErrorModal(iframe.src);
        }
      }
    }, 2500);
  }

  function handleMessage(event) {
    // Only trust messages from the Onyx app iframe.
    // Check both source identity and origin so that a cross-origin page
    // navigated to inside the iframe cannot send privileged extension
    // messages (e.g. TAB_READING_ENABLED) after iframe.src changes.
    // getIframeOrigin() throws if iframe.src is not yet a valid URL —
    // catching it here fails closed (message is rejected, not processed).
    if (event.source !== iframe.contentWindow) return;
    try {
      if (event.origin !== getIframeOrigin()) return;
    } catch {
      return;
    }
    if (event.data.type === CHROME_MESSAGE.ONYX_APP_LOADED) {
      clearTimeout(iframeLoadTimeout);
      iframeLoaded = true;
      showIframe();
      if (iframe.contentWindow) {
        iframe.contentWindow.postMessage(
          { type: "PANEL_READY" },
          getIframeOrigin(),
        );
      }
    } else if (event.data.type === CHROME_MESSAGE.AUTH_REQUIRED) {
      authRequired = true;
    } else if (event.data.type === CHROME_MESSAGE.TAB_READING_ENABLED) {
      chrome.runtime.sendMessage({ action: ACTIONS.TAB_READING_ENABLED });
    } else if (event.data.type === CHROME_MESSAGE.TAB_READING_DISABLED) {
      chrome.runtime.sendMessage({ action: ACTIONS.TAB_READING_DISABLED });
    }
  }

  function showIframe() {
    iframe.style.opacity = "1";
    loadingScreen.style.opacity = "0";
    setTimeout(() => {
      loadingScreen.style.display = "none";
    }, 500);
  }

  async function loadOnyxDomain() {
    const response = await chrome.runtime.sendMessage({
      action: ACTIONS.GET_CURRENT_ONYX_DOMAIN,
    });
    if (response && response[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]) {
      setIframeSrc(
        response[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN] + SIDE_PANEL_PATH,
        "",
      );
    } else {
      console.warn("Onyx domain not found, using default");
      const domain = await getOnyxDomain();
      setIframeSrc(domain + SIDE_PANEL_PATH, "");
    }
  }

  chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
    if (request.action === ACTIONS.OPEN_ONYX_WITH_INPUT) {
      setIframeSrc(request.url, request.pageUrl);
    } else if (request.action === ACTIONS.UPDATE_PAGE_URL) {
      sendWebsiteToIframe(request.pageUrl);
    } else if (request.action === ACTIONS.TAB_URL_UPDATED) {
      if (iframe.contentWindow) {
        iframe.contentWindow.postMessage(
          { type: CHROME_MESSAGE.TAB_URL_UPDATED, url: request.url },
          getIframeOrigin(),
        );
      }
    }
  });

  window.addEventListener("message", handleMessage);

  initializePanel();
  startIframeLoadTimeout();
})();


================================================
FILE: extensions/chrome/src/pages/popup.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta http-equiv="Permissions-Policy" content="clipboard-write=(self)" />
    <title>Onyx</title>
    <link rel="stylesheet" href="../styles/shared.css" />
    <style>
      :root {
        --background-900: #0a0a0a;
        --background-800: #1a1a1a;
        --text-light-05: rgba(255, 255, 255, 0.95);
        --text-light-03: rgba(255, 255, 255, 0.6);
        --white-10: rgba(255, 255, 255, 0.1);
        --white-15: rgba(255, 255, 255, 0.15);
        --white-20: rgba(255, 255, 255, 0.2);
      }

      * {
        box-sizing: border-box;
      }

      body {
        width: 300px;
        margin: 0;
        padding: 0;
        font-family: var(--font-hanken-grotesk);
        background: linear-gradient(
          135deg,
          var(--background-900) 0%,
          var(--background-800) 100%
        );
        color: var(--text-light-05);
      }

      .popup-container {
        padding: 16px;
      }

      .popup-header {
        display: flex;
        align-items: center;
        gap: 12px;
        padding-bottom: 16px;
        border-bottom: 1px solid var(--white-10);
        margin-bottom: 16px;
      }

      .popup-icon {
        width: 36px;
        height: 36px;
        border-radius: 10px;
        background: white;
        display: flex;
        align-items: center;
        justify-content: center;
        overflow: hidden;
      }

      .popup-icon img {
        width: 100%;
        height: 100%;
        object-fit: contain;
        padding: 4px;
      }

      .popup-title {
        margin: 0;
        font-size: 18px;
        font-weight: 600;
        color: var(--text-light-05);
      }

      .menu-button-content {
        display: flex;
        align-items: center;
        justify-content: space-between;
        width: 100%;
      }

      .menu-button-text {
        display: flex;
        align-items: center;
        gap: 10px;
      }

      .menu-button-shortcut {
        font-size: 11px;
        color: var(--text-light-03);
        font-weight: 400;
        margin-left: auto;
      }

      .settings-group {
        background: rgba(255, 255, 255, 0.05);
        border-radius: 12px;
        padding: 4px;
        margin-bottom: 12px;
      }

      .setting-row {
        display: flex;
        justify-content: space-between;
        align-items: center;
        padding: 12px;
      }

      .setting-label {
        font-size: 14px;
        font-weight: 400;
        color: var(--text-light-05);
      }

      .setting-divider {
        height: 1px;
        background: var(--white-10);
        margin: 0 4px;
      }

      .menu-button {
        background: rgba(255, 255, 255, 0.05);
        border: none;
        padding: 12px;
        width: 100%;
        text-align: left;
        cursor: pointer;
        font-size: 14px;
        color: var(--text-light-05);
        font-weight: 400;
        transition: background 0.2s;
        border-radius: 12px;
        font-family: var(--font-hanken-grotesk);
      }

      .menu-button:hover {
        background: rgba(255, 255, 255, 0.1);
      }

      .menu-button svg {
        width: 18px;
        height: 18px;
        stroke: var(--text-light-05);
        fill: none;
        stroke-width: 2;
        stroke-linecap: round;
        stroke-linejoin: round;
      }

      .button-group {
        display: flex;
        flex-direction: column;
        gap: 8px;
      }

      .toggle-switch {
        position: relative;
        display: inline-block;
        width: 44px;
        height: 24px;
      }

      .toggle-switch input {
        opacity: 0;
        width: 0;
        height: 0;
      }

      .toggle-slider {
        position: absolute;
        cursor: pointer;
        top: 0;
        left: 0;
        right: 0;
        bottom: 0;
        background-color: rgba(255, 255, 255, 0.2);
        transition: 0.3s;
        border-radius: 24px;
      }

      .toggle-slider:before {
        position: absolute;
        content: "";
        height: 18px;
        width: 18px;
        left: 3px;
        bottom: 3px;
        background-color: white;
        transition: 0.3s;
        border-radius: 50%;
      }

      input:checked + .toggle-slider {
        background-color: rgba(255, 255, 255, 0.4);
      }

      input:checked + .toggle-slider:before {
        transform: translateX(20px);
      }
    </style>
  </head>
  <body>
    <div class="popup-container">
      <div class="popup-header">
        <div class="popup-icon">
          <img src="../../public/icon48.png" alt="Onyx" />
        </div>
        <h2 class="popup-title">Onyx</h2>
      </div>

      <div class="settings-group">
        <div class="setting-row">
          <label class="setting-label" for="defaultNewTabToggle">
            Use Onyx as new tab page
          </label>
          <label class="toggle-switch">
            <input type="checkbox" id="defaultNewTabToggle" />
            <span class="toggle-slider"></span>
          </label>
        </div>
      </div>

      <div class="button-group">
        <button class="menu-button" id="openSidePanel">
          <div class="menu-button-content">
            <div class="menu-button-text">
              <svg viewBox="0 0 24 24">
                <rect x="3" y="3" width="18" height="18" rx="2" ry="2"></rect>
                <line x1="15" y1="3" x2="15" y2="21"></line>
              </svg>
              Open Onyx Panel
            </div>
            <span class="menu-button-shortcut">Ctrl+O</span>
          </div>
        </button>

        <button class="menu-button" id="openOptions">
          <div class="menu-button-text">
            <svg viewBox="0 0 24 24">
              <circle cx="12" cy="12" r="3"></circle>
              <path
                d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1 0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2 2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2 0 0 1 0-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0 0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1 2.83 0l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2 2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0 1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"
              ></path>
            </svg>
            Extension Settings
          </div>
        </button>
      </div>
    </div>
    <script type="module" src="popup.js"></script>
  </body>
</html>


================================================
FILE: extensions/chrome/src/pages/popup.js
================================================
import { CHROME_SPECIFIC_STORAGE_KEYS } from "../utils/constants.js";

document.addEventListener("DOMContentLoaded", async function () {
  const defaultNewTabToggle = document.getElementById("defaultNewTabToggle");
  const openSidePanelButton = document.getElementById("openSidePanel");
  const openOptionsButton = document.getElementById("openOptions");

  async function loadSetting() {
    const result = await chrome.storage.local.get({
      [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: false,
    });
    if (defaultNewTabToggle) {
      defaultNewTabToggle.checked =
        result[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];
    }
  }

  async function toggleSetting() {
    const currentValue = defaultNewTabToggle.checked;
    await chrome.storage.local.set({
      [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: currentValue,
    });
  }

  async function openSidePanel() {
    try {
      const [tab] = await chrome.tabs.query({
        active: true,
        currentWindow: true,
      });
      if (tab && chrome.sidePanel) {
        await chrome.sidePanel.open({ tabId: tab.id });
        window.close();
      }
    } catch (error) {
      console.error("Error opening side panel:", error);
    }
  }

  function openOptions() {
    chrome.runtime.openOptionsPage();
    window.close();
  }

  await loadSetting();

  if (defaultNewTabToggle) {
    defaultNewTabToggle.addEventListener("change", toggleSetting);
  }

  if (openSidePanelButton) {
    openSidePanelButton.addEventListener("click", openSidePanel);
  }

  if (openOptionsButton) {
    openOptionsButton.addEventListener("click", openOptions);
  }
});


================================================
FILE: extensions/chrome/src/pages/welcome.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Welcome to Onyx</title>
    <link rel="preconnect" href="https://fonts.googleapis.com" />
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
    <link
      href="https://fonts.googleapis.com/css2?family=Hanken+Grotesk:wght@300;400;500;600;700&display=swap"
      rel="stylesheet"
    />
    <link rel="stylesheet" href="../styles/shared.css" />
    <style>
      :root {
        --background-900: #0a0a0a;
        --background-800: #1a1a1a;
        --text-light-05: rgba(255, 255, 255, 0.95);
        --text-light-03: rgba(255, 255, 255, 0.6);
        --white-10: rgba(255, 255, 255, 0.1);
        --white-15: rgba(255, 255, 255, 0.15);
        --white-20: rgba(255, 255, 255, 0.2);
        --white-30: rgba(255, 255, 255, 0.3);
        --white-40: rgba(255, 255, 255, 0.4);
        --white-80: rgba(255, 255, 255, 0.8);
        --black-40: rgba(0, 0, 0, 0.4);
      }

      * {
        box-sizing: border-box;
      }

      body {
        margin: 0;
        padding: 0;
        font-family: var(--font-hanken-grotesk);
        background: linear-gradient(
          135deg,
          var(--background-900) 0%,
          var(--background-800) 100%
        );
        min-height: 100vh;
        color: var(--text-light-05);
        transition: background 0.3s ease;
        display: flex;
        align-items: center;
        justify-content: center;
      }

      body.light-theme {
        --background-900: #f5f5f5;
        --background-800: #ffffff;
        --text-light-05: rgba(0, 0, 0, 0.95);
        --text-light-03: rgba(0, 0, 0, 0.6);
        background: linear-gradient(135deg, #f5f5f5 0%, #ffffff 100%);
      }

      body.light-theme .welcome-panel {
        background: linear-gradient(
          to bottom,
          rgba(255, 255, 255, 0.95),
          rgba(245, 245, 245, 0.95)
        );
        border: 1px solid rgba(0, 0, 0, 0.1);
      }

      body.light-theme .welcome-header {
        border-bottom: 1px solid rgba(0, 0, 0, 0.1);
      }

      body.light-theme .logo-container {
        background: rgba(0, 0, 0, 0.05);
      }

      body.light-theme .theme-toggle {
        background: rgba(0, 0, 0, 0.05);
        border: 1px solid rgba(0, 0, 0, 0.1);
      }

      body.light-theme .theme-toggle:hover {
        background: rgba(0, 0, 0, 0.08);
      }

      body.light-theme .theme-toggle svg {
        stroke: rgba(0, 0, 0, 0.95);
      }

      body.light-theme .input-field {
        border: 1px solid rgba(0, 0, 0, 0.1);
        background: rgba(0, 0, 0, 0.05);
        color: rgba(0, 0, 0, 0.95);
      }

      body.light-theme .input-field:focus {
        outline: none;
        border-color: rgba(0, 0, 0, 0.25);
        background: rgba(0, 0, 0, 0.08);
        box-shadow: 0 0 0 2px rgba(0, 0, 0, 0.05);
      }

      body.light-theme .input-field::placeholder {
        color: rgba(0, 0, 0, 0.4);
      }

      body.light-theme .toggle-slider {
        background-color: rgba(0, 0, 0, 0.15);
      }

      body.light-theme input:checked + .toggle-slider {
        background-color: rgba(0, 0, 0, 0.3);
      }

      body.light-theme .toggle-slider:before {
        background-color: white;
        box-shadow: 0 1px 3px rgba(0, 0, 0, 0.2);
      }

      body.light-theme .step-dot {
        background: rgba(0, 0, 0, 0.2);
      }

      body.light-theme .step-dot.active {
        background: rgba(0, 0, 0, 0.6);
      }

      body.light-theme .btn-primary {
        background: rgba(0, 0, 0, 0.9);
        color: white;
      }

      body.light-theme .btn-primary:hover {
        background: rgba(0, 0, 0, 0.8);
      }

      body.light-theme .btn-secondary {
        background: rgba(0, 0, 0, 0.05);
        color: rgba(0, 0, 0, 0.95);
      }

      body.light-theme .btn-secondary:hover {
        background: rgba(0, 0, 0, 0.08);
      }

      body.light-theme .settings-group {
        background: rgba(0, 0, 0, 0.03);
      }

      body.light-theme .setting-divider {
        background: rgba(0, 0, 0, 0.1);
      }

      .welcome-container {
        max-width: 480px;
        width: 100%;
        margin: 0 auto;
        padding: 40px 20px;
      }

      .welcome-panel {
        background: linear-gradient(
          to bottom,
          rgba(10, 10, 10, 0.95),
          rgba(26, 26, 26, 0.95)
        );
        backdrop-filter: blur(24px);
        border-radius: 20px;
        border: 1px solid var(--white-10);
        overflow: hidden;
        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
        animation: panelFadeIn 0.5s ease-out;
      }

      @keyframes panelFadeIn {
        from {
          opacity: 0;
          transform: translateY(20px);
        }
        to {
          opacity: 1;
          transform: translateY(0);
        }
      }

      .welcome-header {
        padding: 24px;
        border-bottom: 1px solid var(--white-10);
        display: flex;
        align-items: center;
        justify-content: space-between;
      }

      .header-left {
        display: flex;
        align-items: center;
        gap: 14px;
      }

      .logo-container {
        width: 48px;
        height: 48px;
        border-radius: 14px;
        background: white;
        display: flex;
        align-items: center;
        justify-content: center;
        overflow: hidden;
      }

      .logo-container img {
        width: 100%;
        height: 100%;
        object-fit: contain;
        padding: 8px;
      }

      .welcome-title {
        font-size: 22px;
        font-weight: 600;
        color: var(--text-light-05);
        margin: 0;
      }

      .theme-toggle {
        display: flex;
        align-items: center;
        gap: 8px;
        padding: 8px 12px;
        border-radius: 999px;
        background: var(--white-10);
        border: 1px solid var(--white-10);
        cursor: pointer;
        transition: all 0.2s;
      }

      .theme-toggle:hover {
        background: var(--white-15);
      }

      .theme-toggle svg {
        width: 18px;
        height: 18px;
        stroke: var(--text-light-05);
      }

      .welcome-content {
        padding: 32px 24px;
      }

      /* Step indicator */
      .step-indicator {
        display: flex;
        justify-content: center;
        gap: 8px;
        margin-bottom: 32px;
      }

      .step-dot {
        width: 8px;
        height: 8px;
        border-radius: 50%;
        background: var(--white-20);
        transition: all 0.3s ease;
      }

      .step-dot.active {
        background: var(--white-80);
        transform: scale(1.2);
      }

      /* Steps */
      .step {
        display: none;
        animation: stepFadeIn 0.4s ease-out;
      }

      .step.active {
        display: block;
      }

      @keyframes stepFadeIn {
        from {
          opacity: 0;
          transform: translateX(20px);
        }
        to {
          opacity: 1;
          transform: translateX(0);
        }
      }

      .step-title {
        font-size: 24px;
        font-weight: 600;
        margin: 0 0 8px 0;
        text-align: center;
      }

      .step-description {
        font-size: 15px;
        color: var(--text-light-03);
        text-align: center;
        margin: 0 0 28px 0;
        line-height: 1.5;
      }

      /* Form elements */
      .input-group {
        margin-bottom: 24px;
      }

      .input-label {
        display: block;
        font-size: 13px;
        font-weight: 500;
        color: var(--text-light-03);
        margin-bottom: 8px;
        text-transform: uppercase;
        letter-spacing: 0.03em;
      }

      .input-field {
        width: 100%;
        padding: 14px 16px;
        border: 1px solid var(--white-10);
        border-radius: 12px;
        font-size: 15px;
        background: rgba(255, 255, 255, 0.95);
        color: rgba(0, 0, 0, 0.9);
        font-family: var(--font-hanken-grotesk);
        transition: all 0.2s;
      }

      .input-field:focus {
        outline: none;
        border-color: var(--white-30);
        background: rgba(255, 255, 255, 1);
        box-shadow: 0 0 0 3px rgba(255, 255, 255, 0.15);
      }

      .input-field::placeholder {
        color: rgba(0, 0, 0, 0.4);
      }

      /* Settings group for step 2 */
      .settings-group {
        background: rgba(255, 255, 255, 0.05);
        border-radius: 16px;
        padding: 4px;
        margin-bottom: 24px;
      }

      .setting-row {
        display: flex;
        justify-content: space-between;
        align-items: center;
        padding: 16px;
      }

      .setting-content {
        display: flex;
        flex-direction: column;
        gap: 4px;
        flex: 1;
        padding-right: 16px;
      }

      .setting-label {
        font-size: 15px;
        font-weight: 500;
        color: var(--text-light-05);
      }

      .setting-description {
        font-size: 13px;
        color: var(--text-light-03);
        line-height: 1.4;
      }

      .setting-divider {
        height: 1px;
        background: var(--white-10);
        margin: 0 8px;
      }

      /* Toggle switch */
      .toggle-switch {
        position: relative;
        display: inline-block;
        width: 52px;
        height: 28px;
        flex-shrink: 0;
      }

      .toggle-switch input {
        opacity: 0;
        width: 0;
        height: 0;
      }

      .toggle-slider {
        position: absolute;
        cursor: pointer;
        top: 0;
        left: 0;
        right: 0;
        bottom: 0;
        background-color: rgba(255, 255, 255, 0.2);
        transition: 0.3s;
        border-radius: 28px;
      }

      .toggle-slider:before {
        position: absolute;
        content: "";
        height: 22px;
        width: 22px;
        left: 3px;
        bottom: 3px;
        background-color: white;
        transition: 0.3s;
        border-radius: 50%;
      }

      input:checked + .toggle-slider {
        background-color: rgba(255, 255, 255, 0.4);
      }

      input:checked + .toggle-slider:before {
        transform: translateX(24px);
      }

      /* Buttons */
      .button-group {
        display: flex;
        gap: 12px;
        margin-top: 8px;
      }

      .btn {
        flex: 1;
        padding: 14px 24px;
        border-radius: 12px;
        border: none;
        cursor: pointer;
        font-size: 15px;
        font-weight: 500;
        font-family: var(--font-hanken-grotesk);
        transition: all 0.2s;
      }

      .btn-primary {
        background: rgba(255, 255, 255, 0.95);
        color: #0a0a0a;
      }

      .btn-primary:hover {
        background: rgba(255, 255, 255, 0.85);
        transform: translateY(-1px);
      }

      .btn-secondary {
        background: var(--white-10);
        color: var(--text-light-05);
      }

      .btn-secondary:hover {
        background: var(--white-15);
      }

      .btn:active {
        transform: translateY(0);
      }

      /* Success animation for completion */
      .success-icon {
        width: 64px;
        height: 64px;
        margin: 0 auto 24px;
        border-radius: 50%;
        background: rgba(255, 255, 255, 0.1);
        display: flex;
        align-items: center;
        justify-content: center;
        animation: successPop 0.5s ease-out;
      }

      .success-icon svg {
        width: 32px;
        height: 32px;
        stroke: var(--text-light-05);
        stroke-width: 2.5;
      }

      @keyframes successPop {
        0% {
          transform: scale(0);
          opacity: 0;
        }
        50% {
          transform: scale(1.1);
        }
        100% {
          transform: scale(1);
          opacity: 1;
        }
      }

      @media (max-width: 500px) {
        .welcome-container {
          padding: 20px 16px;
        }

        .welcome-content {
          padding: 24px 20px;
        }

        .step-title {
          font-size: 20px;
        }

        .button-group {
          flex-direction: column;
        }
      }
    </style>
  </head>

  <body>
    <div class="welcome-container">
      <div class="welcome-panel">
        <div class="welcome-header">
          <div class="header-left">
            <div class="logo-container">
              <img src="../../public/icon48.png" alt="Onyx" />
            </div>
            <h1 class="welcome-title">Onyx</h1>
          </div>
          <button
            class="theme-toggle"
            id="themeToggle"
            aria-label="Toggle theme"
          >
            <svg
              id="themeIcon"
              viewBox="0 0 24 24"
              fill="none"
              stroke="currentColor"
            >
              <path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"></path>
            </svg>
          </button>
        </div>

        <div class="welcome-content">
          <div class="step-indicator">
            <div class="step-dot active" data-step="1"></div>
            <div class="step-dot" data-step="2"></div>
          </div>

          <!-- Step 1: Root Domain -->
          <div class="step active" id="step1">
            <h2 class="step-title">Welcome to Onyx</h2>
            <p class="step-description">
              Enter your Onyx instance URL to get started. This is where your
              Onyx deployment is hosted.
            </p>

            <div class="input-group">
              <label class="input-label" for="onyxDomain">Root Domain</label>
              <input
                type="text"
                id="onyxDomain"
                class="input-field"
                placeholder="https://cloud.onyx.app"
              />
            </div>

            <div class="button-group">
              <button class="btn btn-primary" id="continueBtn">Continue</button>
            </div>
          </div>

          <!-- Step 2: New Tab Setting -->
          <div class="step" id="step2">
            <h2 class="step-title">Customize Your Experience</h2>
            <p class="step-description">
              Set Onyx as your new tab page for quick access to your AI
              assistant.
            </p>

            <div class="settings-group">
              <div class="setting-row">
                <div class="setting-content">
                  <span class="setting-label">Use Onyx as new tab page</span>
                  <span class="setting-description"
                    >Open Onyx every time you create a new tab</span
                  >
                </div>
                <label class="toggle-switch">
                  <input type="checkbox" id="useOnyxAsDefault" checked />
                  <span class="toggle-slider"></span>
                </label>
              </div>
            </div>

            <div class="button-group">
              <button class="btn btn-secondary" id="backBtn">Back</button>
              <button class="btn btn-primary" id="finishBtn">
                Get Started
              </button>
            </div>
          </div>
        </div>
      </div>
    </div>
    <script type="module" src="welcome.js"></script>
  </body>
</html>


================================================
FILE: extensions/chrome/src/pages/welcome.js
================================================
import {
  CHROME_SPECIFIC_STORAGE_KEYS,
  DEFAULT_ONYX_DOMAIN,
} from "../utils/constants.js";

document.addEventListener("DOMContentLoaded", function () {
  const domainInput = document.getElementById("onyxDomain");
  const useOnyxAsDefaultToggle = document.getElementById("useOnyxAsDefault");
  const continueBtn = document.getElementById("continueBtn");
  const backBtn = document.getElementById("backBtn");
  const finishBtn = document.getElementById("finishBtn");
  const themeToggle = document.getElementById("themeToggle");
  const themeIcon = document.getElementById("themeIcon");

  const step1 = document.getElementById("step1");
  const step2 = document.getElementById("step2");
  const stepDots = document.querySelectorAll(".step-dot");

  let currentStep = 1;
  let currentTheme = "dark";

  // Initialize theme based on system preference or stored value
  function initTheme() {
    chrome.storage.local.get(
      { [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: null },
      (result) => {
        const storedTheme = result[CHROME_SPECIFIC_STORAGE_KEYS.THEME];
        if (storedTheme) {
          currentTheme = storedTheme;
        } else {
          // Check system preference
          currentTheme = window.matchMedia("(prefers-color-scheme: light)")
            .matches
            ? "light"
            : "dark";
        }
        applyTheme();
      },
    );
  }

  function applyTheme() {
    document.body.className = currentTheme === "light" ? "light-theme" : "";
    updateThemeIcon();
  }

  function updateThemeIcon() {
    if (!themeIcon) return;

    if (currentTheme === "light") {
      themeIcon.innerHTML = `
        <circle cx="12" cy="12" r="4"></circle>
        <path d="M12 2v2m0 16v2M4.93 4.93l1.41 1.41m11.32 11.32l1.41 1.41M2 12h2m16 0h2M4.93 19.07l1.41-1.41M17.66 6.34l1.41-1.41"></path>
      `;
    } else {
      themeIcon.innerHTML = `
        <path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"></path>
      `;
    }
  }

  function toggleTheme() {
    currentTheme = currentTheme === "light" ? "dark" : "light";
    applyTheme();
    chrome.storage.local.set({
      [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: currentTheme,
    });
  }

  function goToStep(step) {
    if (step === 1) {
      step2.classList.remove("active");
      setTimeout(() => {
        step1.classList.add("active");
      }, 50);
    } else if (step === 2) {
      step1.classList.remove("active");
      setTimeout(() => {
        step2.classList.add("active");
      }, 50);
    }

    stepDots.forEach((dot) => {
      const dotStep = parseInt(dot.dataset.step);
      if (dotStep === step) {
        dot.classList.add("active");
      } else {
        dot.classList.remove("active");
      }
    });

    currentStep = step;
  }

  // Validate domain input
  function validateDomain(domain) {
    if (!domain) return false;
    try {
      new URL(domain);
      return true;
    } catch {
      return false;
    }
  }

  function handleContinue() {
    const domain = domainInput.value.trim();

    if (domain && !validateDomain(domain)) {
      domainInput.style.borderColor = "rgba(255, 100, 100, 0.5)";
      domainInput.focus();
      return;
    }

    domainInput.style.borderColor = "";
    goToStep(2);
  }

  function handleBack() {
    goToStep(1);
  }

  function handleFinish() {
    const domain = domainInput.value.trim() || DEFAULT_ONYX_DOMAIN;
    const useOnyxAsDefault = useOnyxAsDefaultToggle.checked;

    chrome.storage.local.set(
      {
        [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: domain,
        [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]:
          useOnyxAsDefault,
        [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: currentTheme,
        [CHROME_SPECIFIC_STORAGE_KEYS.ONBOARDING_COMPLETE]: true,
      },
      () => {
        // Open a new tab if they enabled the new tab feature, otherwise just close
        if (useOnyxAsDefault) {
          chrome.tabs.create({}, () => {
            window.close();
          });
        } else {
          window.close();
        }
      },
    );
  }

  // Load any existing values (in case user returns to this page)
  function loadStoredValues() {
    chrome.storage.local.get(
      {
        [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: "",
        [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: true,
      },
      (result) => {
        if (result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]) {
          domainInput.value = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];
        }
        useOnyxAsDefaultToggle.checked =
          result[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];
      },
    );
  }

  if (themeToggle) {
    themeToggle.addEventListener("click", toggleTheme);
  }

  if (continueBtn) {
    continueBtn.addEventListener("click", handleContinue);
  }

  if (backBtn) {
    backBtn.addEventListener("click", handleBack);
  }

  if (finishBtn) {
    finishBtn.addEventListener("click", handleFinish);
  }

  // Allow Enter key to proceed
  if (domainInput) {
    domainInput.addEventListener("keydown", (e) => {
      if (e.key === "Enter") {
        handleContinue();
      }
    });
  }

  initTheme();
  loadStoredValues();
});


================================================
FILE: extensions/chrome/src/styles/selection-icon.css
================================================
#onyx-selection-icon {
  position: fixed;
  z-index: 2147483647;
  width: 32px;
  height: 32px;
  border-radius: 50%;
  background-color: #ffffff;
  border: 1px solid #e0e0e0;
  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
  cursor: pointer;
  display: flex;
  align-items: center;
  justify-content: center;
  opacity: 0;
  transform: scale(0.8);
  transition:
    opacity 0.15s ease,
    transform 0.15s ease,
    box-shadow 0.15s ease;
  pointer-events: none;
}

#onyx-selection-icon.visible {
  opacity: 1;
  transform: scale(1);
  pointer-events: auto;
}

#onyx-selection-icon:hover {
  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
  transform: scale(1.1);
}

#onyx-selection-icon:active {
  transform: scale(0.95);
}

#onyx-selection-icon img {
  width: 20px;
  height: 20px;
  pointer-events: none;
}


================================================
FILE: extensions/chrome/src/styles/shared.css
================================================
/* Import Hanken Grotesk font */
@import url("https://fonts.googleapis.com/css2?family=Hanken+Grotesk:wght@300;400;500;600;700&display=swap");

:root {
  --primary-color: #4285f4;
  --primary-hover-color: #3367d6;
  --secondary-color: #f1f3f4;
  --secondary-hover-color: #e8eaed;
  --text-color: #333;
  --text-light-color: #666;
  --background-color: #f1f3f4;
  --card-background-color: #fff;
  --border-color: #ccc;
  --font-family: Arial, sans-serif;
  --font-hanken-grotesk: "Hanken Grotesk", sans-serif;
}

body {
  font-family: var(--font-hanken-grotesk);
  margin: 0;
  padding: 0;
}

.container {
  max-width: 500px;
  width: 90%;
  margin: 0 auto;
}

.card {
  background-color: var(--card-background-color);
  padding: 25px;
  border-radius: 10px;
  box-shadow: 0 3px 5px rgba(0, 0, 0, 0.1);
}

h1 {
  color: var(--text-color);
  font-size: 24px;
  font-weight: 600;
  margin-top: 0;
  margin-bottom: 20px;
}

.option-group {
  margin-bottom: 20px;
}

label {
  display: block;
  margin-bottom: 5px;
  color: var(--text-light-color);
  font-weight: 400;
  font-size: 16px;
}

input[type="text"] {
  width: 100%;
  padding: 8px;
  border: 1px solid var(--border-color);
  border-radius: 4px;
  font-size: 14px;
  background-color: var(--card-background-color);
  color: var(--text-color);
}

.button {
  width: 100%;
  padding: 10px 20px;
  border-radius: 5px;
  border: none;
  cursor: pointer;
  font-size: 16px;
  font-weight: 500;
  transition: background-color 0.3s;
}

.button.primary {
  background-color: var(--primary-color);
  color: #fff;
}

.button.primary:hover {
  background-color: var(--primary-hover-color);
}

.button.secondary {
  background-color: var(--secondary-color);
  color: var(--text-color);
}

.button.secondary:hover {
  background-color: var(--secondary-hover-color);
}

.status-container {
  margin-top: 10px;
  margin-bottom: 15px;
}

.status-message {
  margin: 0 0 10px 0;
  color: var(--text-color);
  font-weight: 500;
  text-align: center;
  font-size: 16px;
  transition: opacity 0.5s ease-in-out;
}

kbd {
  background-color: var(--secondary-color);
  border: 1px solid var(--border-color);
  border-radius: 3px;
  padding: 2px 5px;
  font-family: monospace;
  font-weight: 500;
  color: var(--text-color);
}

.toggle-label {
  display: flex;
  justify-content: space-between;
  align-items: center;
}

.toggle-switch {
  position: relative;
  display: inline-block;
  width: 50px;
  height: 24px;
}

.toggle-switch input {
  opacity: 0;
  width: 0;
  height: 0;
}

.slider {
  position: absolute;
  cursor: pointer;
  top: 0;
  left: 0;
  right: 0;
  bottom: 0;
  background-color: var(--secondary-color);
  transition: 0.4s;
  border-radius: 24px;
}

.slider:before {
  position: absolute;
  content: "";
  height: 20px;
  width: 20px;
  left: 2px;
  bottom: 2px;
  background-color: white;
  transition: 0.4s;
  border-radius: 50%;
}

input:checked + .slider {
  background-color: var(--primary-color);
}

input:checked + .slider:before {
  transform: translateX(26px);
}


================================================
FILE: extensions/chrome/src/utils/constants.js
================================================
export const THEMES = {
  LIGHT: "light",
  DARK: "dark",
};

export const DEFAULT_ONYX_DOMAIN = "http://localhost:3000";

export const SIDE_PANEL_PATH = "/nrf/side-panel";

export const ACTIONS = {
  GET_SELECTED_TEXT: "getSelectedText",
  GET_CURRENT_ONYX_DOMAIN: "getCurrentOnyxDomain",
  UPDATE_PAGE_URL: "updatePageUrl",
  SEND_TO_ONYX: "sendToOnyx",
  OPEN_SIDE_PANEL: "openSidePanel",
  TOGGLE_NEW_TAB_OVERRIDE: "toggleNewTabOverride",
  OPEN_SIDE_PANEL_WITH_INPUT: "openSidePanelWithInput",
  OPEN_ONYX_WITH_INPUT: "openOnyxWithInput",
  CLOSE_SIDE_PANEL: "closeSidePanel",
  TAB_URL_UPDATED: "tabUrlUpdated",
  TAB_READING_ENABLED: "tabReadingEnabled",
  TAB_READING_DISABLED: "tabReadingDisabled",
};

export const CHROME_SPECIFIC_STORAGE_KEYS = {
  ONYX_DOMAIN: "onyxExtensionDomain",
  USE_ONYX_AS_DEFAULT_NEW_TAB: "onyxExtensionDefaultNewTab",
  THEME: "onyxExtensionTheme",
  BACKGROUND_IMAGE: "onyxExtensionBackgroundImage",
  DARK_BG_URL: "onyxExtensionDarkBgUrl",
  LIGHT_BG_URL: "onyxExtensionLightBgUrl",
  ONBOARDING_COMPLETE: "onyxExtensionOnboardingComplete",
};

export const CHROME_MESSAGE = {
  PREFERENCES_UPDATED: "PREFERENCES_UPDATED",
  ONYX_APP_LOADED: "ONYX_APP_LOADED",
  SET_DEFAULT_NEW_TAB: "SET_DEFAULT_NEW_TAB",
  LOAD_NEW_CHAT_PAGE: "LOAD_NEW_CHAT_PAGE",
  LOAD_NEW_PAGE: "LOAD_NEW_PAGE",
  AUTH_REQUIRED: "AUTH_REQUIRED",
  TAB_READING_ENABLED: "TAB_READING_ENABLED",
  TAB_READING_DISABLED: "TAB_READING_DISABLED",
  TAB_URL_UPDATED: "TAB_URL_UPDATED",
};

export const WEB_MESSAGE = {
  PAGE_CHANGE: "PAGE_CHANGE",
};


================================================
FILE: extensions/chrome/src/utils/content.js
================================================
let sidePanel = null;

function createSidePanel() {
  sidePanel = document.createElement("div");
  sidePanel.id = "onyx-side-panel";
  sidePanel.style.cssText = `
    position: fixed;
    top: 0;
    right: -400px;
    width: 400px;
    height: 100%;
    background-color: white;
    box-shadow: -2px 0 5px rgba(0,0,0,0.2);
    transition: right 0.3s ease-in-out;
    z-index: 9999;
  `;

  const iframe = document.createElement("iframe");
  iframe.style.cssText = `
    width: 100%;
    height: 100%;
    border: none;
  `;

  chrome.runtime.sendMessage(
    { action: ACTIONS.GET_CURRENT_ONYX_DOMAIN },
    function (response) {
      iframe.src = response[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];
    },
  );

  sidePanel.appendChild(iframe);
  document.body.appendChild(sidePanel);
}


================================================
FILE: extensions/chrome/src/utils/error-modal.js
================================================
import {
  CHROME_SPECIFIC_STORAGE_KEYS,
  DEFAULT_ONYX_DOMAIN,
  ACTIONS,
} from "./constants.js";

const errorModalHTML = `
  <div id="error-modal">
    <div class="modal-backdrop"></div>
    <div class="modal-content">
      <div class="modal-header">
        <div class="modal-icon">
          <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
            <circle cx="12" cy="12" r="10"></circle>
            <line x1="12" y1="8" x2="12" y2="12"></line>
            <line x1="12" y1="16" x2="12.01" y2="16"></line>
          </svg>
        </div>
        <h2>Configuration Error</h2>
      </div>
      <div class="modal-body">
        <p class="modal-description">The Onyx configuration needs to be updated. Please check your settings or contact your Onyx administrator.</p>
        <div class="url-display">
          <span class="url-label">Attempted to load:</span>
          <span id="attempted-url" class="url-value"></span>
        </div>
      </div>
      <div class="modal-footer">
        <div class="button-container">
          <button id="open-options" class="button primary">Open Extension Options</button>
          <button id="disable-override" class="button secondary">Disable New Tab Override</button>
        </div>
      </div>
    </div>
  </div>
`;

const style = document.createElement("style");
style.textContent = `
  :root {
    --background-900: #0a0a0a;
    --background-800: #1a1a1a;
    --text-light-05: rgba(255, 255, 255, 0.95);
    --text-light-03: rgba(255, 255, 255, 0.6);
    --white-10: rgba(255, 255, 255, 0.1);
    --white-15: rgba(255, 255, 255, 0.15);
    --white-20: rgba(255, 255, 255, 0.2);
    --white-30: rgba(255, 255, 255, 0.3);
  }

  #error-modal {
    position: fixed;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    display: none;
    align-items: center;
    justify-content: center;
    z-index: 2000;
    font-family: var(--font-hanken-grotesk), 'Hanken Grotesk', sans-serif;
  }

  #error-modal .modal-backdrop {
    position: absolute;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    background: rgba(0, 0, 0, 0.7);
    backdrop-filter: blur(8px);
  }

  #error-modal .modal-content {
    position: relative;
    background: linear-gradient(to bottom, rgba(10, 10, 10, 0.95), rgba(26, 26, 26, 0.95));
    backdrop-filter: blur(24px);
    border-radius: 16px;
    border: 1px solid var(--white-10);
    max-width: 95%;
    width: 500px;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
    overflow: hidden;
  }

  #error-modal .modal-header {
    padding: 24px;
    border-bottom: 1px solid var(--white-10);
    display: flex;
    align-items: center;
    gap: 12px;
  }

  #error-modal .modal-icon {
    width: 40px;
    height: 40px;
    border-radius: 12px;
    background: rgba(255, 87, 87, 0.15);
    display: flex;
    align-items: center;
    justify-content: center;
    flex-shrink: 0;
  }

  #error-modal .modal-icon svg {
    width: 24px;
    height: 24px;
    stroke: #ff5757;
  }

  #error-modal .modal-icon.auth-icon {
    background: rgba(66, 133, 244, 0.15);
  }

  #error-modal .modal-icon.auth-icon svg {
    stroke: #4285f4;
  }

  #error-modal h2 {
    margin: 0;
    color: var(--text-light-05);
    font-size: 20px;
    font-weight: 600;
  }

  #error-modal .modal-body {
    padding: 24px;
  }

  #error-modal .modal-description {
    color: var(--text-light-05);
    margin: 0 0 20px 0;
    font-size: 14px;
    line-height: 1.6;
    font-weight: 400;
  }

  #error-modal .url-display {
    background: rgba(255, 255, 255, 0.05);
    border-radius: 8px;
    padding: 12px;
    border: 1px solid var(--white-10);
  }

  #error-modal .url-label {
    display: block;
    font-size: 12px;
    color: var(--text-light-03);
    margin-bottom: 6px;
    font-weight: 500;
    text-transform: uppercase;
    letter-spacing: 0.05em;
  }

  #error-modal .url-value {
    display: block;
    font-size: 13px;
    color: var(--text-light-05);
    word-break: break-all;
    font-family: monospace;
    line-height: 1.5;
  }

  #error-modal .modal-footer {
    padding: 0 24px 24px 24px;
  }

  #error-modal .button-container {
    display: flex;
    flex-direction: column;
    gap: 10px;
    margin-bottom: 16px;
  }

  #error-modal .button {
    padding: 12px 20px;
    border-radius: 8px;
    border: none;
    cursor: pointer;
    font-size: 14px;
    font-weight: 500;
    transition: all 0.2s;
    font-family: var(--font-hanken-grotesk), 'Hanken Grotesk', sans-serif;
  }

  #error-modal .button.primary {
    background: rgba(255, 255, 255, 0.15);
    color: var(--text-light-05);
    border: 1px solid var(--white-10);
  }

  #error-modal .button.primary:hover {
    background: rgba(255, 255, 255, 0.2);
    border-color: var(--white-20);
  }

  #error-modal .button.secondary {
    background: rgba(255, 255, 255, 0.05);
    color: var(--text-light-05);
    border: 1px solid var(--white-10);
  }

  #error-modal .button.secondary:hover {
    background: rgba(255, 255, 255, 0.1);
    border-color: var(--white-15);
  }

  #error-modal kbd {
    background: rgba(255, 255, 255, 0.1);
    border: 1px solid var(--white-10);
    border-radius: 4px;
    padding: 2px 6px;
    font-family: monospace;
    font-weight: 500;
    color: var(--text-light-05);
    font-size: 11px;
  }

  @media (min-width: 768px) {
    #error-modal .button-container {
      flex-direction: row;
    }

    #error-modal .button {
      flex: 1;
    }
  }
`;

const authModalHTML = `
  <div id="error-modal">
    <div class="modal-backdrop"></div>
    <div class="modal-content">
      <div class="modal-header">
        <div class="modal-icon auth-icon">
          <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
            <rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
            <path d="M7 11V7a5 5 0 0 1 10 0v4"></path>
          </svg>
        </div>
        <h2>Authentication Required</h2>
      </div>
      <div class="modal-body">
        <p class="modal-description">You need to log in to access Onyx. Click the button below to authenticate.</p>
      </div>
      <div class="modal-footer">
        <div class="button-container">
          <button id="open-auth" class="button primary">Log In to Onyx</button>
        </div>
      </div>
    </div>
  </div>
`;

let errorModal, attemptedUrlSpan, openOptionsButton, disableOverrideButton;

let authModal, openAuthButton;

export function initErrorModal() {
  if (!document.getElementById("error-modal")) {
    const link = document.createElement("link");
    link.rel = "stylesheet";
    link.href = "../styles/shared.css";
    document.head.appendChild(link);

    document.body.insertAdjacentHTML("beforeend", errorModalHTML);
    document.head.appendChild(style);

    errorModal = document.getElementById("error-modal");
    authModal = document.getElementById("error-modal");
    attemptedUrlSpan = document.getElementById("attempted-url");
    openOptionsButton = document.getElementById("open-options");
    disableOverrideButton = document.getElementById("disable-override");

    openOptionsButton.addEventListener("click", (e) => {
      e.preventDefault();
      chrome.runtime.openOptionsPage();
    });

    disableOverrideButton.addEventListener("click", () => {
      chrome.storage.local.set({ useOnyxAsDefaultNewTab: false }, () => {
        chrome.tabs.update({ url: "chrome://new-tab-page" });
      });
    });
  }
}

export function showErrorModal(url) {
  if (!errorModal) {
    initErrorModal();
  }
  if (errorModal) {
    errorModal.style.display = "flex";
    errorModal.style.zIndex = "9999";
    attemptedUrlSpan.textContent = url;
    document.body.style.overflow = "hidden";
  }
}

export function hideErrorModal() {
  if (errorModal) {
    errorModal.style.display = "none";
    document.body.style.overflow = "auto";
  }
}

export function checkModalVisibility() {
  return errorModal
    ? window.getComputedStyle(errorModal).display !== "none"
    : false;
}

export function initAuthModal() {
  if (!document.getElementById("error-modal")) {
    const link = document.createElement("link");
    link.rel = "stylesheet";
    link.href = "../styles/shared.css";
    document.head.appendChild(link);

    document.body.insertAdjacentHTML("beforeend", authModalHTML);
    document.head.appendChild(style);

    authModal = document.getElementById("error-modal");
    openAuthButton = document.getElementById("open-auth");

    openAuthButton.addEventListener("click", (e) => {
      e.preventDefault();
      chrome.storage.local.get(
        { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN },
        (result) => {
          const onyxDomain = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];
          chrome.runtime.sendMessage(
            { action: ACTIONS.CLOSE_SIDE_PANEL },
            () => {
              if (chrome.runtime.lastError) {
                console.error(
                  "Error closing side panel:",
                  chrome.runtime.lastError,
                );
              }
              chrome.tabs.create(
                {
                  url: `${onyxDomain}/auth/login`,
                  active: true,
                },
                (_) => {
                  if (chrome.runtime.lastError) {
                    console.error(
                      "Error opening auth tab:",
                      chrome.runtime.lastError,
                    );
                  }
                },
              );
            },
          );
        },
      );
    });
  }
}

export function showAuthModal() {
  if (!authModal) {
    initAuthModal();
  }
  if (authModal) {
    authModal.style.display = "flex";
    authModal.style.zIndex = "9999";
    document.body.style.overflow = "hidden";
  }
}

export function hideAuthModal() {
  if (authModal) {
    authModal.style.display = "none";
    document.body.style.overflow = "auto";
  }
}


================================================
FILE: extensions/chrome/src/utils/selection-icon.js
================================================
(function () {
  const OPEN_SIDE_PANEL_WITH_INPUT = "openSidePanelWithInput";

  let selectionIcon = null;
  let currentSelectedText = "";

  function createSelectionIcon() {
    if (selectionIcon) return;

    selectionIcon = document.createElement("div");
    selectionIcon.id = "onyx-selection-icon";

    const img = document.createElement("img");
    img.src = chrome.runtime.getURL("public/icon32.png");
    img.alt = "Search with Onyx";

    selectionIcon.appendChild(img);
    document.body.appendChild(selectionIcon);

    selectionIcon.addEventListener("mousedown", handleIconClick);
  }

  function showIcon(text) {
    if (!selectionIcon) {
      createSelectionIcon();
    }

    currentSelectedText = text;

    const selection = window.getSelection();
    if (!selection.rangeCount) return;

    const range = selection.getRangeAt(0);
    const rect = range.getBoundingClientRect();

    const iconSize = 32;
    const offset = 4;

    let posX = rect.right + offset;
    let posY = rect.bottom + offset;

    if (posX + iconSize > window.innerWidth) {
      posX = rect.left - iconSize - offset;
    }
    if (posY + iconSize > window.innerHeight) {
      posY = rect.top - iconSize - offset;
    }

    posX = Math.max(
      offset,
      Math.min(posX, window.innerWidth - iconSize - offset),
    );
    posY = Math.max(
      offset,
      Math.min(posY, window.innerHeight - iconSize - offset),
    );

    selectionIcon.style.left = `${posX}px`;
    selectionIcon.style.top = `${posY}px`;
    selectionIcon.classList.add("visible");
  }

  function hideIcon() {
    if (selectionIcon) {
      selectionIcon.classList.remove("visible");
    }
    currentSelectedText = "";
  }

  function handleIconClick(e) {
    e.preventDefault();
    e.stopPropagation();

    const textToSend = currentSelectedText;

    if (textToSend) {
      chrome.runtime.sendMessage(
        {
          action: OPEN_SIDE_PANEL_WITH_INPUT,
          selectedText: textToSend,
          pageUrl: window.location.href,
        },
        (response) => {
          if (chrome.runtime.lastError) {
            console.error(
              "[Onyx] Error sending message:",
              chrome.runtime.lastError.message,
            );
          } else {
          }
        },
      );
    }

    hideIcon();
  }

  document.addEventListener("mouseup", (e) => {
    if (
      e.target.id === "onyx-selection-icon" ||
      e.target.closest("#onyx-selection-icon")
    ) {
      return;
    }

    setTimeout(() => {
      const selection = window.getSelection();
      const selectedText = selection.toString().trim();

      if (selectedText && selectedText.length > 0) {
        showIcon(selectedText);
      } else {
        hideIcon();
      }
    }, 10);
  });

  document.addEventListener("mousedown", (e) => {
    if (
      e.target.id !== "onyx-selection-icon" &&
      !e.target.closest("#onyx-selection-icon")
    ) {
      const selection = window.getSelection();
      const selectedText = selection.toString().trim();
      if (!selectedText) {
        hideIcon();
      }
    }
  });

  document.addEventListener(
    "scroll",
    () => {
      hideIcon();
    },
    true,
  );

  document.addEventListener("selectionchange", () => {
    const selection = window.getSelection();
    const selectedText = selection.toString().trim();
    if (!selectedText) {
      hideIcon();
    }
  });

  if (document.readyState === "loading") {
    document.addEventListener("DOMContentLoaded", createSelectionIcon);
  } else {
    createSelectionIcon();
  }
})();


================================================
FILE: extensions/chrome/src/utils/storage.js
================================================
import {
  DEFAULT_ONYX_DOMAIN,
  CHROME_SPECIFIC_STORAGE_KEYS,
} from "./constants.js";

export async function getOnyxDomain() {
  const result = await chrome.storage.local.get({
    [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN,
  });
  return result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];
}

export function setOnyxDomain(domain, callback) {
  chrome.storage.local.set(
    { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: domain },
    callback,
  );
}

export function getOnyxDomainSync() {
  return new Promise((resolve) => {
    getOnyxDomain(resolve);
  });
}


================================================
FILE: profiling/grafana/dashboards/onyx/opensearch-search-latency.json
================================================
{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": { "type": "grafana", "uid": "-- Grafana --" },
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 1,
  "id": null,
  "links": [],
  "liveNow": true,
  "panels": [
    {
      "title": "Client-Side Search Latency (P50 / P95 / P99)",
      "description": "End-to-end latency as measured by the Python client, including network round-trip and serialization overhead.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
      "id": 1,
      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic" },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisLabel": "seconds",
            "axisPlacement": "auto",
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "lineInterpolation": "smooth",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": { "type": "linear" },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": { "group": "A", "mode": "none" },
            "thresholdsStyle": { "mode": "dashed" }
          },
          "thresholds": {
            "mode": "absolute",
            "steps": [
              { "color": "green", "value": null },
              { "color": "yellow", "value": 0.5 },
              { "color": "red", "value": 2.0 }
            ]
          },
          "unit": "s",
          "min": 0
        },
        "overrides": []
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
          "legendFormat": "P50",
          "refId": "A"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
          "legendFormat": "P95",
          "refId": "B"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
          "legendFormat": "P99",
          "refId": "C"
        }
      ]
    },
    {
      "title": "Server-Side Search Latency (P50 / P95 / P99)",
      "description": "OpenSearch server-side execution time from the 'took' field in the response. Does not include network or client-side overhead.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
      "id": 2,
      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic" },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisLabel": "seconds",
            "axisPlacement": "auto",
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "lineInterpolation": "smooth",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": { "type": "linear" },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": { "group": "A", "mode": "none" },
            "thresholdsStyle": { "mode": "dashed" }
          },
          "thresholds": {
            "mode": "absolute",
            "steps": [
              { "color": "green", "value": null },
              { "color": "yellow", "value": 0.5 },
              { "color": "red", "value": 2.0 }
            ]
          },
          "unit": "s",
          "min": 0
        },
        "overrides": []
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
          "legendFormat": "P50",
          "refId": "A"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
          "legendFormat": "P95",
          "refId": "B"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
          "legendFormat": "P99",
          "refId": "C"
        }
      ]
    },
    {
      "title": "Client-Side Latency by Search Type (P95)",
      "description": "P95 client-side latency broken down by search type (hybrid, keyword, semantic, random, doc_id_retrieval).",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
      "id": 3,
      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic" },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisLabel": "seconds",
            "axisPlacement": "auto",
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "lineInterpolation": "smooth",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": { "type": "linear" },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": { "group": "A", "mode": "none" },
            "thresholdsStyle": { "mode": "off" }
          },
          "unit": "s",
          "min": 0
        },
        "overrides": []
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.95, sum by (search_type, le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
          "legendFormat": "{{ search_type }}",
          "refId": "A"
        }
      ]
    },
    {
      "title": "Search Throughput by Type",
      "description": "Searches per second broken down by search type.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
      "id": 4,
      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic" },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisLabel": "searches/s",
            "axisPlacement": "auto",
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "lineInterpolation": "smooth",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": { "type": "linear" },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": { "group": "A", "mode": "normal" },
            "thresholdsStyle": { "mode": "off" }
          },
          "unit": "ops",
          "min": 0
        },
        "overrides": []
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "sum by (search_type) (rate(onyx_opensearch_search_total[5m]))",
          "legendFormat": "{{ search_type }}",
          "refId": "A"
        }
      ]
    },
    {
      "title": "Concurrent Searches In Progress",
      "description": "Number of OpenSearch searches currently in flight, broken down by search type. Summed across all instances.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
      "id": 5,
      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic" },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisLabel": "searches",
            "axisPlacement": "auto",
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "lineInterpolation": "smooth",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": { "type": "linear" },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": { "group": "A", "mode": "normal" },
            "thresholdsStyle": { "mode": "off" }
          },
          "min": 0
        },
        "overrides": []
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "sum by (search_type) (onyx_opensearch_searches_in_progress)",
          "legendFormat": "{{ search_type }}",
          "refId": "A"
        }
      ]
    },
    {
      "title": "Client vs Server Latency Overhead (P50)",
      "description": "Difference between client-side and server-side P50 latency. Reveals network, serialization, and untracked OpenSearch overhead.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
      "id": 6,
      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic" },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisLabel": "seconds",
            "axisPlacement": "auto",
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "lineInterpolation": "smooth",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": { "type": "linear" },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": { "group": "A", "mode": "none" },
            "thresholdsStyle": { "mode": "off" }
          },
          "unit": "s",
          "min": 0
        },
        "overrides": []
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m]))) - histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
          "legendFormat": "Client - Server overhead (P50)",
          "refId": "A"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
          "legendFormat": "Client P50",
          "refId": "B"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
          "legendFormat": "Server P50",
          "refId": "C"
        }
      ]
    }
  ],
  "refresh": "5s",
  "schemaVersion": 37,
  "style": "dark",
  "tags": ["onyx", "opensearch", "search", "latency"],
  "templating": {
    "list": [
      {
        "current": {
          "text": "Prometheus",
          "value": "prometheus"
        },
        "includeAll": false,
        "name": "DS_PROMETHEUS",
        "options": [],
        "query": "prometheus",
        "refresh": 1,
        "type": "datasource"
      }
    ]
  },
  "time": { "from": "now-60m", "to": "now" },
  "timepicker": {
    "refresh_intervals": ["5s", "10s", "30s", "1m"]
  },
  "timezone": "",
  "title": "Onyx OpenSearch Search Latency",
  "uid": "onyx-opensearch-search-latency",
  "version": 0,
  "weekStart": ""
}


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools>=61"]
build-backend = "setuptools.build_meta"

[project]
name = "onyx"
version = "0.0.0"
requires-python = ">=3.11"
# Shared dependencies between backend and model_server
dependencies = [
    "aioboto3==15.1.0",
    "cohere==5.6.1",
    "fastapi==0.133.1",
    "google-genai==1.52.0",
    "litellm==1.81.6",
    "openai==2.14.0",
    "pydantic==2.11.7",
    "prometheus_client>=0.21.1",
    "prometheus_fastapi_instrumentator==7.1.0",
    "retry==0.9.2", # This pulls in py which is in CVE-2022-42969, must remove py from image
    "sentry-sdk==2.14.0",
    "uvicorn==0.35.0",
    "voyageai==0.2.3",
    "brotli>=1.2.0",
    "claude-agent-sdk>=0.1.19",
    "agent-client-protocol>=0.7.1",
    "discord-py==2.4.0",
    "kubernetes>=31.0.0",
]

[project.optional-dependencies]
# Main backend application dependencies
backend = [
    "aiohttp==3.13.4",
    "alembic==1.10.4",
    "asyncpg==0.30.0",
    "atlassian-python-api==3.41.16",
    "azure-cognitiveservices-speech==1.38.0",
    "beautifulsoup4==4.12.3",
    "boto3==1.39.11",
    "boto3-stubs[s3]==1.39.11",
    "celery==5.5.1",
    "chardet==5.2.0",
    "chonkie==1.0.10",
    "dask==2026.1.1",
    "ddtrace==3.10.0",
    "discord.py==2.4.0",
    "distributed==2026.1.1",
    "fastapi-users==15.0.4",
    "fastapi-users-db-sqlalchemy==7.0.0",
    "fastapi-limiter==0.1.6",
    "fastmcp==3.2.0",
    "filelock==3.20.3",
    "google-api-python-client==2.86.0",
    "google-auth-httplib2==0.1.0",
    "google-auth-oauthlib==1.0.0",
    # GPT4All library has issues running on Macs and python:3.11.4-slim-bookworm
    # will reintroduce this when library version catches up
    # "gpt4all==2.0.2",
    "httpcore==1.0.9",
    "httpx[http2]==0.28.1",
    "httpx-oauth==0.15.1",
    "huggingface-hub==0.35.3",
    "inflection==0.5.1",
    "jira==3.10.5",
    "jsonref==1.1.0",
    "kubernetes==31.0.0",
    "trafilatura==1.12.2",
    "langchain-core==1.2.22",
    "lazy_imports==1.0.1",
    "lxml==5.3.0",
    "Mako==1.2.4",
    "markitdown[pdf, docx, pptx, xlsx, xls]==0.1.2",
    "mcp[cli]==1.26.0",
    "msal==1.34.0",
    "msoffcrypto-tool==5.4.2",
    "Office365-REST-Python-Client==2.6.2",
    "oauthlib==3.2.2",
    # NOTE: This is frozen to avoid https://foss.heptapod.net/openpyxl/openpyxl/-/issues/2147
    "openpyxl==3.0.10",
    "opensearch-py==3.0.0",
    "passlib==1.7.4",
    "playwright==1.55.0",
    "psutil==7.1.3",
    "psycopg2-binary==2.9.9",
    "puremagic==1.28",
    "pyairtable==3.0.1",
    "pycryptodome==3.19.1",
    "PyGithub==2.5.0",
    "pympler==1.1",
    "python-dateutil==2.8.2",
    "python-gitlab==5.6.0",
    "python-pptx==0.6.23",
    "pypandoc_binary==1.16.2",
    "pypdf==6.9.2",
    "pytest-mock==3.12.0",
    "pytest-playwright==0.7.0",
    "python-docx==1.1.2",
    "python-dotenv==1.1.1",
    "python-multipart==0.0.22",
    "pywikibot==9.0.0",
    "redis==5.0.8",
    "requests==2.33.0",
    "requests-oauthlib==1.3.1",
    "rfc3986==1.5.0",
    "simple-salesforce==1.12.6",
    "slack-sdk==3.20.2",
    "SQLAlchemy[mypy]==2.0.15",
    "starlette==0.49.3",
    "supervisor==4.3.0",
    "RapidFuzz==3.13.0",
    "tiktoken==0.7.0",
    "timeago==1.0.16",
    "types-openpyxl==3.0.4.7",
    "unstructured==0.18.27",
    "unstructured-client==0.42.6",
    "zulip==0.8.2",
    "hubspot-api-client==11.1.0",
    "asana==5.0.8",
    "dropbox==12.0.2",
    "shapely==2.0.6",
    "stripe==10.12.0",
    "urllib3==2.6.3",
    "mistune==3.2.0",
    "sendgrid==6.12.5",
    "exa_py==1.15.4",
    "braintrust==0.3.9",
    "langfuse==3.10.0",
    "nest_asyncio==1.6.0",
    "openinference-instrumentation==0.1.42",
    "opentelemetry-proto>=1.39.0",
    "python3-saml==1.15.0",
    "xmlsec==1.3.14",
]

# Dev tools
dev = [
    "black==25.1.0",
    "celery-types==0.19.0",
    "faker==40.1.2",
    "hatchling==1.28.0",
    "ipykernel==6.29.5",
    "manygo==0.2.0",
    "matplotlib==3.10.8",
    "mypy-extensions==1.0.0",
    "mypy==1.13.0",
    "onyx-devtools==0.7.2",
    "openapi-generator-cli==7.17.0",
    "pandas-stubs~=2.3.3",
    "pre-commit==3.2.2",
    "pytest-alembic==0.12.1",
    "pytest-asyncio==1.3.0",
    "pytest-dotenv==0.5.2",
    "pytest-repeat==0.9.4",
    "pytest-xdist==3.8.0",
    "pytest==8.3.5",
    "release-tag==0.5.2",
    "reorder-python-imports-black==3.14.0",
    "ruff==0.12.0",
    "types-beautifulsoup4==4.12.0.3",
    "types-html5lib==1.1.11.13",
    "types-oauthlib==3.2.0.9",
    "types-passlib==1.7.7.20240106",
    "types-Pillow==10.2.0.20240822",
    "types-psutil==7.1.3.20251125",
    "types-psycopg2==2.9.21.10",
    "types-python-dateutil==2.8.19.13",
    "types-PyYAML==6.0.12.11",
    "types-pytz==2023.3.1.1",
    "types-regex==2023.3.23.1",
    "types-requests==2.32.0.20250328",
    "types-retry==0.9.9.3",
    "types-setuptools==68.0.0.3",
    "zizmor==1.18.0",
]

# Enterprise Edition features
ee = [
    "posthog==3.7.4",
]

# Model server specific dependencies (ML packages)
model_server = [
    "accelerate==1.6.0",
    "einops==0.8.1",
    "numpy==2.4.1",
    "safetensors==0.5.3",
    "sentence-transformers==4.0.2",
    "torch==2.9.1",
    "transformers==4.53.0",
    "sentry-sdk[fastapi,celery,starlette]==2.14.0",
]

[tool.mypy]
plugins = "sqlalchemy.ext.mypy.plugin"
mypy_path = "backend"
explicit_package_bases = true
disallow_untyped_defs = true
warn_unused_ignores = true
enable_error_code = ["possibly-undefined"]
strict_equality = true
# Patterns match paths whether mypy is run from backend/ (CI) or repo root (e.g. VS Code extension with target ./backend)
exclude = [
  "(?:^|/)generated/",
  "(?:^|/)\\.venv/",
  "(?:^|/)onyx/server/features/build/sandbox/kubernetes/docker/skills/",
  "(?:^|/)onyx/server/features/build/sandbox/kubernetes/docker/templates/",
]

[[tool.mypy.overrides]]
module = "alembic.versions.*"
disable_error_code = ["var-annotated"]

[[tool.mypy.overrides]]
module = "alembic_tenants.versions.*"
disable_error_code = ["var-annotated"]

[[tool.mypy.overrides]]
module = "generated.*"
follow_imports = "silent"
ignore_errors = true

[[tool.mypy.overrides]]
module = "transformers.*"
follow_imports = "skip"
ignore_errors = true

[tool.uv.workspace]
members = ["backend", "tools/ods"]

[tool.basedpyright]
include = ["backend"]
exclude = ["backend/generated", "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx", "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv"]
typeCheckingMode = "off"

[tool.ruff]
line-length = 130
target-version = "py311"

[tool.ruff.lint]
ignore = [
  "E501", # Long lines are handled by Black.
]
select = [
  "ARG",
  "E",
  "F",
  "S324",
  "W",
]

[tool.setuptools.packages.find]
where = ["backend"]
include = ["onyx*", "tests*"]


================================================
FILE: web/.dockerignore
================================================
node_modules
.next
/tests/

# Explicitly include src/app/build (overrides .gitignore /build pattern)
!src/app/build


================================================
FILE: web/.eslintrc.json
================================================
{
  "extends": "next/core-web-vitals",
  "plugins": ["unused-imports"],
  "rules": {
    "@next/next/no-img-element": "off",
    "react-hooks/exhaustive-deps": "off",
    "no-unused-vars": "off",
    "@typescript-eslint/no-unused-vars": "off",
    "unused-imports/no-unused-imports": "warn",
    "unused-imports/no-unused-vars": [
      "warn",
      {
        "vars": "all",
        "varsIgnorePattern": "^_",
        "args": "after-used",
        "argsIgnorePattern": "^_",
        "ignoreRestSiblings": true
      }
    ]
  }
}


================================================
FILE: web/.gitignore
================================================
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
.env.sentry-build-plugin

# dependencies
node_modules
/.pnp
.pnp.js

# testing
/coverage

# next.js
/.next/
/out/

# production
/build

# misc
.DS_Store
*.pem

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*

# local env files
.env*.local

# vercel
.vercel

# typescript
*.tsbuildinfo
next-env.d.ts

# playwright testing temp files
/admin*_auth.json
/worker*_auth.json
/user_auth.json
/build-archive.log
/test-results
/output/

# generated clients ... in particular, the API to the Onyx backend itself!
/src/lib/generated
.jest-cache

# storybook
storybook-static


================================================
FILE: web/.prettierignore
================================================
**/.git
**/.svn
**/.hg
**/node_modules
**/.next
**/.vscode

================================================
FILE: web/.prettierrc.json
================================================
{
  "trailingComma": "es5"
}


================================================
FILE: web/.storybook/Introduction.mdx
================================================
import { Meta } from "@storybook/blocks";

<Meta title="Getting Started" />

# Onyx Storybook

A living catalog for browsing, testing, and documenting Onyx UI components in isolation.

---

## What is this?

This Storybook contains interactive examples of every reusable UI component in the Onyx frontend. Each component has a dedicated page with:

- **Live demos** you can interact with directly
- **Controls** to tweak props and see how the component responds
- **Auto-generated docs** showing the full props API
- **Dark mode toggle** in the toolbar to preview both themes

---

## Navigating Storybook

### Sidebar

The left sidebar organizes components by layer:

- **opal/core** — Low-level primitives (`Interactive`, `Hoverable`)
- **opal/components** — Design system atoms (`Button`, `OpenButton`, `Tag`)
- **Layouts** — Structural layouts (`Content`, `ContentAction`, `IllustrationContent`)
- **refresh-components** — App-level components (inputs, modals, tables, text, etc.)

Click any component to see its stories. Click **Docs** to see the auto-generated props table.

### Controls panel

At the bottom of each story, the **Controls** panel lets you change props in real time. Toggle booleans, pick from enums, type in strings — the preview updates instantly.

### Theme toggle

Use the paint roller icon in the top toolbar to switch between **light** and **dark** mode. All components use CSS variables that automatically adapt.

---

## Running locally

```bash
cd web
npm run storybook        # dev server on :6006
npm run storybook:build  # static build to storybook-static/
```

---

## Adding a new story

Stories are **co-located** next to their component:

```
lib/opal/src/components/buttons/Button/
├── components.tsx       ← the component
├── Button.stories.tsx   ← the story
├── styles.css
└── README.md
```

### Minimal template

```tsx
import type { Meta, StoryObj } from "@storybook/react";
import { MyComponent } from "./MyComponent";

const meta: Meta<typeof MyComponent> = {
  title: "opal/components/MyComponent",  // sidebar path
  component: MyComponent,
  tags: ["autodocs"],                     // auto-generate docs page
};

export default meta;
type Story = StoryObj<typeof MyComponent>;

export const Default: Story = {
  args: {
    title: "Hello",
  },
};

export const WithCustomLayout: Story = {
  render: () => (
    <div className="flex gap-2">
      <MyComponent title="One" />
      <MyComponent title="Two" />
    </div>
  ),
};
```

### Conventions

- **Title format:** `opal/core/Name`, `opal/components/Name`, `Layouts/Name`, or `refresh-components/Name`
- **Tags:** Add `tags: ["autodocs"]` to auto-generate a docs page from props
- **Decorators:** If your component needs `TooltipPrimitive.Provider` (anything with tooltips), add it as a decorator
- **Layout:** Use `parameters: { layout: "fullscreen" }` for modals/popovers that use portals

---

## Deployment

Production builds deploy to [onyx-storybook.vercel.app](https://onyx-storybook.vercel.app) automatically when PRs touching component files merge to `main`.

Monitored paths:

- `web/lib/opal/**`
- `web/src/refresh-components/**`
- `web/.storybook/**`


================================================
FILE: web/.storybook/README.md
================================================
# Onyx Storybook

Storybook is an isolated development environment for UI components. It renders each component in a standalone "story" outside of the main app, so you can visually verify appearance, interact with props, and catch regressions without navigating through the full application.

The Onyx Storybook covers the full component library — from low-level `@opal/core` primitives up through `refresh-components` — giving designers and engineers a shared reference for every visual state.

**Production:** [onyx-storybook.vercel.app](https://onyx-storybook.vercel.app)

## Running Locally

```bash
cd web
npm run storybook        # dev server on http://localhost:6006
npm run storybook:build  # static build to storybook-static/
```

The dev server hot-reloads when you edit a component or story file.

## Writing Stories

Stories are **co-located** next to their component source:

```
lib/opal/src/core/interactive/
├── components.tsx              ← the component
├── Interactive.stories.tsx     ← the story
└── styles.css

src/refresh-components/buttons/
├── Button.tsx
└── Button.stories.tsx
```

### Minimal Template

```tsx
import type { Meta, StoryObj } from "@storybook/react";
import { MyComponent } from "./MyComponent";

const meta: Meta<typeof MyComponent> = {
  title: "Category/MyComponent",   // sidebar path
  component: MyComponent,
  tags: ["autodocs"],               // generates a docs page from props
};

export default meta;
type Story = StoryObj<typeof MyComponent>;

export const Default: Story = {
  args: { label: "Hello" },
};
```

### Conventions

- **Title format:** `Core/Name`, `Components/Name`, `Layouts/Name`, or `refresh-components/category/Name`
- **Tags:** Add `tags: ["autodocs"]` to auto-generate a props docs page
- **Decorators:** Components that use Radix tooltips need a `TooltipPrimitive.Provider` decorator
- **Layout:** Use `parameters: { layout: "fullscreen" }` for modals/popovers that use portals

## Dark Mode

Use the theme toggle (paint roller icon) in the Storybook toolbar to switch between light and dark modes. This adds/removes the `dark` class on the preview body, matching the app's `darkMode: "class"` Tailwind config. All color tokens from `colors.css` adapt automatically.

## Deployment

The production Storybook is deployed as a static site on Vercel. The build runs `npm run storybook:build` which outputs to `storybook-static/`, and Vercel serves that directory.

Deploys are triggered on merges to `main` when files in `web/lib/opal/`, `web/src/refresh-components/`, or `web/.storybook/` change.

## Component Layers

The sidebar organizes components by their layer in the design system:

| Layer | Path | Examples |
|-------|------|----------|
| **Core** | `lib/opal/src/core/` | Interactive, Hoverable |
| **Components** | `lib/opal/src/components/` | Button, OpenButton, Tag |
| **Layouts** | `lib/opal/src/layouts/` | Content, ContentAction, IllustrationContent |
| **refresh-components** | `src/refresh-components/` | Inputs, tables, modals, text, cards, tiles, etc. |


================================================
FILE: web/.storybook/main.ts
================================================
import type { StorybookConfig } from "@storybook/react-vite";
import path from "path";

const config: StorybookConfig = {
  stories: [
    "./*.mdx",
    "../lib/opal/src/**/*.stories.@(ts|tsx)",
    "../src/refresh-components/**/*.stories.@(ts|tsx)",
  ],
  addons: ["@storybook/addon-essentials", "@storybook/addon-themes"],
  framework: {
    name: "@storybook/react-vite",
    options: {},
  },
  staticDirs: ["../public"],
  docs: {
    autodocs: "tag",
  },
  typescript: {
    reactDocgen: "react-docgen-typescript",
  },
  viteFinal: async (config) => {
    config.resolve = config.resolve ?? {};
    config.resolve.alias = {
      ...config.resolve.alias,
      "@": path.resolve(__dirname, "../src"),
      "@opal": path.resolve(__dirname, "../lib/opal/src"),
      "@public": path.resolve(__dirname, "../public"),
      // Next.js module stubs for Vite
      "next/link": path.resolve(__dirname, "mocks/next-link.tsx"),
      "next/navigation": path.resolve(__dirname, "mocks/next-navigation.tsx"),
      "next/image": path.resolve(__dirname, "mocks/next-image.tsx"),
    };

    // Process CSS with Tailwind via PostCSS
    config.css = config.css ?? {};
    config.css.postcss = path.resolve(__dirname, "..");

    return config;
  },
};

export default config;


================================================
FILE: web/.storybook/mocks/next-image.tsx
================================================
import React from "react";

interface ImageProps {
  src: string;
  alt: string;
  width?: number;
  height?: number;
  fill?: boolean;
  [key: string]: unknown;
}

function Image({ src, alt, width, height, fill, ...props }: ImageProps) {
  const fillStyle: React.CSSProperties = fill
    ? { position: "absolute", inset: 0, width: "100%", height: "100%" }
    : {};
  return (
    <img
      {...(props as React.ImgHTMLAttributes<HTMLImageElement>)}
      src={src}
      alt={alt}
      width={fill ? undefined : width}
      height={fill ? undefined : height}
      style={{ ...(props.style as React.CSSProperties), ...fillStyle }}
    />
  );
}

export default Image;


================================================
FILE: web/.storybook/mocks/next-link.tsx
================================================
import React from "react";

interface LinkProps {
  href: string;
  children: React.ReactNode;
  [key: string]: unknown;
}

function Link({
  href,
  children,
  prefetch: _prefetch,
  scroll: _scroll,
  shallow: _shallow,
  replace: _replace,
  passHref: _passHref,
  locale: _locale,
  legacyBehavior: _legacyBehavior,
  ...props
}: LinkProps) {
  return (
    <a href={href} {...props}>
      {children}
    </a>
  );
}

export default Link;


================================================
FILE: web/.storybook/mocks/next-navigation.tsx
================================================
export function useRouter() {
  return {
    push: (_url: string) => {},
    replace: (_url: string) => {},
    back: () => {},
    forward: () => {},
    refresh: () => {},
    prefetch: (_url: string) => Promise.resolve(),
  };
}

export function usePathname() {
  return "/";
}

export function useSearchParams() {
  return new URLSearchParams() as ReadonlyURLSearchParams;
}

export function useParams() {
  return {};
}

export function redirect(_url: string): never {
  throw new Error("redirect() called in Storybook");
}

export function notFound(): never {
  throw new Error("notFound() called in Storybook");
}


================================================
FILE: web/.storybook/preview-head.html
================================================
<!-- Preconnect for fonts loaded via globals.css @import -->
<link
  rel="preconnect"
  href="https://fonts.googleapis.com"
  crossorigin="anonymous"
/>
<link
  rel="preconnect"
  href="https://fonts.gstatic.com"
  crossorigin="anonymous"
/>


================================================
FILE: web/.storybook/preview.ts
================================================
import type { Preview } from "@storybook/react";
import { withThemeByClassName } from "@storybook/addon-themes";
import "../src/app/globals.css";

const preview: Preview = {
  parameters: {
    layout: "centered",
    backgrounds: { disable: true },
    controls: {
      matchers: {
        color: /(background|color)$/i,
        date: /Date$/i,
      },
    },
  },
  decorators: [
    withThemeByClassName({
      themes: {
        light: "",
        dark: "dark",
      },
      defaultTheme: "light",
    }),
  ],
};

export default preview;


================================================
FILE: web/@types/favicon-fetch.d.ts
================================================
declare module "favicon-fetch" {
  interface FaviconFetchOptions {
    uri: string;
  }

  function faviconFetch(options: FaviconFetchOptions): string | null;

  export default faviconFetch;
}


================================================
FILE: web/@types/images.d.ts
================================================
declare module "*.png" {
  const content: string;
  export default content;
}

declare module "*.svg" {
  const content: string;
  export default content;
}

declare module "*.jpeg" {
  const content: string;
  export default content;
}

declare module "*.jpg" {
  const content: string;
  export default content;
}

declare module "*.gif" {
  const content: string;
  export default content;
}

declare module "*.webp" {
  const content: string;
  export default content;
}

declare module "*.ico" {
  const content: string;
  export default content;
}


================================================
FILE: web/AGENTS.md
================================================
# Frontend Standards

This file is the single source of truth for frontend coding standards across all Onyx frontend
projects (including, but not limited to, `/web`, `/desktop`).

# Components

UI components are spread across several directories while the codebase migrates to Opal:

- **`web/lib/opal/src/`** — The Opal design system. Preferred for all new components.
- **`web/src/refresh-components/`** — Production components not yet migrated to Opal.
- **`web/src/sections/`** — Feature-specific composite components (cards, modals, etc.).
- **`web/src/layouts/`** — Page-level layout components (settings pages, etc.).

**Do NOT use anything from `web/src/components/`** — this directory contains legacy components
that are being phased out. Always prefer Opal first; fall back to `refresh-components` only for
components not yet available in Opal.

## Opal Layouts (`lib/opal/src/layouts/`)

All layout primitives are imported from `@opal/layouts`. They handle sizing, font selection, icon
alignment, and optional inline editing.

```typescript
import { Content, ContentAction, IllustrationContent } from "@opal/layouts";
```

### Content

**Use this for any combination of icon + title + description.**

A two-axis layout component that automatically routes to the correct internal layout
(`ContentXl`, `ContentLg`, `ContentMd`, `ContentSm`) based on `sizePreset` and `variant`:

| sizePreset | variant | Routes to | Layout |
|---|---|---|---|
| `headline` / `section` | `heading` | `ContentXl` | Icon on top (flex-col) |
| `headline` / `section` | `section` | `ContentLg` | Icon inline (flex-row) |
| `main-content` / `main-ui` / `secondary` | `section` / `heading` | `ContentMd` | Compact inline |
| `main-content` / `main-ui` / `secondary` | `body` | `ContentSm` | Body text layout |

```typescript
<Content
  sizePreset="main-ui"
  variant="section"
  icon={SvgSettings}
  title="Settings"
  description="Manage your preferences"
/>
```

### ContentAction

**Use this when a Content block needs right-side actions** (buttons, badges, icons, etc.).

Wraps `Content` and adds a `rightChildren` slot. Accepts all `Content` props plus:
- `rightChildren`: `ReactNode` — actions rendered on the right
- `paddingVariant`: `SizeVariant` — controls outer padding

```typescript
<ContentAction
  sizePreset="main-ui"
  variant="section"
  icon={SvgUser}
  title="John Doe"
  description="Admin"
  rightChildren={<Button icon={SvgEdit}>Edit</Button>}
/>
```

### IllustrationContent

**Use this for empty states, error pages, and informational placeholders.**

A vertically-stacked, center-aligned layout that pairs a large illustration (7.5rem x 7.5rem)
with a title and optional description.

```typescript
import SvgNoResult from "@opal/illustrations/no-result";

<IllustrationContent
  illustration={SvgNoResult}
  title="No results found"
  description="Try adjusting your search or filters."
/>
```

Props:
- `illustration`: `IconFunctionComponent` — optional, from `@opal/illustrations`
- `title`: `string` — required
- `description`: `string` — optional

## Settings Page Layout (`src/layouts/settings-layouts.tsx`)

**Use this for all admin/settings pages.** Provides a standardized layout with scroll-aware
sticky headers, centered content containers, and responsive behavior.

```typescript
import SettingsLayouts from "@/layouts/settings-layouts";

function MySettingsPage() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={SvgSettings}
        title="Account Settings"
        description="Manage your account preferences"
        rightChildren={<Button>Save</Button>}
      >
        <InputTypeIn placeholder="Search settings..." />
      </SettingsLayouts.Header>

      <SettingsLayouts.Body>
        <Card>Settings content here</Card>
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}
```

Sub-components:
- **`SettingsLayouts.Root`** — Wrapper with centered, scrollable container. Width options:
  `"sm"` (672px), `"sm-md"` (752px), `"md"` (872px, default), `"lg"` (992px), `"full"` (100%).
- **`SettingsLayouts.Header`** — Sticky header with icon, title, description, optional
  `rightChildren` actions, optional `children` below (e.g., search/filter), optional `backButton`,
  and optional `separator`. Automatically shows a scroll shadow when scrolled.
- **`SettingsLayouts.Body`** — Content container with consistent padding and vertical spacing.

## Cards (`src/sections/cards/`)

**When building a card that displays information about a specific entity (agent, document set,
file, connector, etc.), add it to `web/src/sections/cards/`.**

Each card is a self-contained component focused on a single entity type. Cards typically include
entity identification (name, avatar, icon), summary information, and quick actions.

```typescript
import AgentCard from "@/sections/cards/AgentCard";
import DocumentSetCard from "@/sections/cards/DocumentSetCard";
import FileCard from "@/sections/cards/FileCard";
```

Guidelines:
- One card per entity type — keep card-specific logic within the card component.
- Cards should be reusable across different pages and contexts.
- Use shared components from `@opal/components`, `@opal/layouts`, and `@/refresh-components`
  inside cards — do not duplicate layout or styling logic.

## Button (`components/buttons/button/`)

**Always use the Opal `Button`.** Do not use raw `<button>` elements.

Built on `Interactive.Stateless` > `Interactive.Container`, so it inherits the full color/state
system automatically.

```typescript
import { Button } from "@opal/components/buttons/button/components";

// Labeled button
<Button variant="default" prominence="primary" icon={SvgPlus}>
  Create
</Button>

// Icon-only button (omit children)
<Button variant="default" prominence="tertiary" icon={SvgTrash} size="sm" />
```

Key props:
- `variant`: `"default"` | `"action"` | `"danger"` | `"none"`
- `prominence`: `"primary"` | `"secondary"` | `"tertiary"` | `"internal"`
- `size`: `"lg"` | `"md"` | `"sm"` | `"xs"` | `"2xs"` | `"fit"`
- `icon`, `rightIcon`, `children`, `disabled`, `href`, `tooltip`

## Core Primitives (`core/`)

The `core/` directory contains the lowest-level building blocks that power all Opal components.
**Most code should not interface with these directly** — use higher-level components like `Button`,
`Content`, and `ContentAction` instead. These are documented here for understanding, not everyday use.

### Interactive (`core/interactive/`)

The foundational layer for all clickable/interactive surfaces. Defines the color matrix for
hover, active, and disabled states.

- **`Interactive.Stateless`** — Color system for stateless elements (buttons, links). Applies
  variant/prominence/state combinations via CSS custom properties.
- **`Interactive.Stateful`** — Color system for stateful elements (toggles, sidebar items, selects).
  Uses `state` (`"empty"` | `"filled"` | `"selected"`) instead of prominence.
- **`Interactive.Container`** — Structural box providing height, rounding, padding, and border.
  Shared by both Stateless and Stateful. Renders as `<div>`, `<button>`, or `<Link>` depending
  on context.
- **`Interactive.Foldable`** — Zero-width collapsible wrapper with CSS grid animation.

### Disabled (`core/disabled/`)

A pure CSS wrapper that applies disabled visuals (`opacity-50`, `cursor-not-allowed`,
`pointer-events: none`) to a single child element via Radix `Slot`. Has no React context —
Interactive primitives and buttons manage their own disabled state via a `disabled` prop.

### Hoverable (`core/animations/`)

A standardized way to provide "opacity-100 on hover" behavior. Instead of manually wiring
`opacity-0 group-hover:opacity-100` with Tailwind, use `Hoverable` for consistent, coordinated
hover-to-reveal patterns.

- **`Hoverable.Root`** — Wraps a hover group. Tracks mouse enter/leave and broadcasts hover
  state to descendants via a per-group React context.
- **`Hoverable.Item`** — Marks an element that should appear on hover. Supports two modes:
  - **Group mode** (`group` prop provided): visibility driven by a matching `Hoverable.Root`
    ancestor. Throws if no matching Root is found.
  - **Local mode** (`group` omitted): uses CSS `:hover` on the item itself.

```typescript
import { Hoverable } from "@opal/core";

// Group mode — hovering anywhere on the row reveals the trash icon
<Hoverable.Root group="row">
  <div className="flex items-center gap-2">
    <span>Row content</span>
    <Hoverable.Item group="row" variant="opacity-on-hover">
      <SvgTrash />
    </Hoverable.Item>
  </div>
</Hoverable.Root>

// Local mode — hovering the item itself reveals it
<Hoverable.Item variant="opacity-on-hover">
  <SvgTrash />
</Hoverable.Item>
```

# Best Practices

## 0. Size Variant Defaults

**When using `SizeVariants` (or any subset like `PaddingVariants`, `RoundingVariants`) as a prop
type, always default to `"md"`.**

**Reason:** `"md"` is the standard middle-of-the-road preset across the design system. Consistent
defaults make components predictable — callers only need to specify a size when they want something
other than the norm.

```typescript
// ✅ Good — default to "md"
function MyCard({ padding = "md", rounding = "md" }: MyCardProps) { ... }

// ❌ Bad — arbitrary or inconsistent defaults
function MyCard({ padding = "sm", rounding = "lg" }: MyCardProps) { ... }
```

## 1. Tailwind Dark Mode

**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**

**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.

**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.

```typescript
// ✅ Good - Standard components use `tailwind-themes/tailwind.config.js` / `src/app/css/colors.css`
<div className="bg-background-neutral-03 text-text-02">
  Content
</div>

// ✅ Good - Logo icons with dark mode handling via createLogoIcon
export const GithubIcon = createLogoIcon(githubLightIcon, {
  monochromatic: true,  // Will apply dark:invert internally
});

export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally
});

// ❌ Bad - Manual dark mode overrides
<div className="bg-white dark:bg-black text-black dark:text-white">
  Content
</div>
```

## 2. Icon Usage

**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**

**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.

```typescript
// ✅ Good
import SvgX from "@/icons/x";
import SvgMoreHorizontal from "@/icons/more-horizontal";

// ❌ Bad
import { User } from "lucide-react";
import { FiSearch } from "react-icons/fi";
```

**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
If you need help with this step, reach out to `raunak@onyx.app`.

## 3. Text Rendering

**Use the Opal `Text` component for all text rendering. Avoid "naked" text nodes.**

**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It uses
string-enum props (`font` and `color`) for font preset and color selection. Inline markdown is
opt-in via the `markdown()` function from `@opal/types`.

```typescript
// ✅ Good — Opal Text with string-enum props
import { Text } from "@opal/components";

function UserCard({ name }: { name: string }) {
  return (
    <Text font="main-ui-action" color="text-03">
      {name}
    </Text>
  )
}

// ✅ Good — inline markdown via markdown()
import { markdown } from "@opal/utils";

<Text font="main-ui-body" color="text-05">
  {markdown("*Hello*, **world**! Visit [Onyx](https://onyx.app) and run `onyx start`.")}
</Text>

// ✅ Good — plain strings are never parsed as markdown
<Text font="main-ui-body" color="text-03">
  {userProvidedString}
</Text>

// ✅ Good — component props that support optional markdown use `string | RichStr`
import type { RichStr } from "@opal/types";

interface MyCardProps {
  title: string | RichStr;
}

// ❌ Bad — legacy boolean-flag API (still works but deprecated)
import Text from "@/refresh-components/texts/Text";
<Text text03 mainUiAction>{name}</Text>

// ❌ Bad — naked text nodes
<div>
  <h2>{name}</h2>
  <p>User details</p>
</div>
```

Key props:
- `font`: `TextFont` — font preset (e.g., `"main-ui-body"`, `"heading-h2"`, `"secondary-action"`)
- `color`: `TextColor` — text color (e.g., `"text-03"`, `"text-inverted-05"`)
- `as`: `"p" | "span" | "li" | "h1" | "h2" | "h3"` — HTML tag (default: `"span"`)
- `nowrap`: `boolean` — prevent text wrapping

**`RichStr` convention:** When creating new components, any string prop that will be rendered as
visible text in the DOM (e.g., `title`, `description`, `label`) should be typed as
`string | RichStr` instead of plain `string`. This gives callers opt-in markdown support via
`markdown()` without requiring any additional props or API surface on the component.

```typescript
import type { RichStr } from "@opal/types";
import { Text } from "@opal/components";

// ✅ Good — new components accept string | RichStr and render via Text
interface InfoCardProps {
  title: string | RichStr;
  description?: string | RichStr;
}

function InfoCard({ title, description }: InfoCardProps) {
  return (
    <div>
      <Text font="main-ui-action">{title}</Text>
      {description && (
        <Text font="secondary-body" color="text-03">{description}</Text>
      )}
    </div>
  );
}

// ❌ Bad — plain string props block markdown support for callers
interface InfoCardProps {
  title: string;
  description?: string;
}
```

## 4. Component Usage

**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**

**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.

```typescript
// ✅ Good
import Button from '@/refresh-components/buttons/Button'
import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
import SvgPlusCircle from '@/icons/plus-circle'

function ContactForm() {
  return (
    <form>
      <InputTypeIn placeholder="Search..." />
      <Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
    </form>
  )
}

// ❌ Bad
function ContactForm() {
  return (
    <form>
      <input placeholder="Name" />
      <textarea placeholder="Message" />
      <button type="submit">Submit</button>
    </form>
  )
}
```

## 5. Colors

**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**

**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.

**Available color categories:**

- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
- **Actions:** `action-link-XX`, `action-danger-XX`
- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.

```typescript
// ✅ Good - Use custom Onyx color classes
<div className="bg-background-neutral-01 border border-border-02" />
<div className="bg-background-tint-02 border border-border-01" />
<div className="bg-status-success-01" />
<div className="bg-action-link-01" />
<div className="bg-theme-primary-05" />

// ❌ Bad - Do NOT use standard Tailwind colors
<div className="bg-gray-100 border border-gray-300 text-gray-600" />
<div className="bg-white border border-slate-200" />
<div className="bg-green-100 text-green-700" />
<div className="bg-blue-100 text-blue-600" />
<div className="bg-indigo-500" />
```

## 6. Data Fetching

**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**

**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).

# Stylistic Preferences

## 1. Import Standards

**Always use absolute imports with the `@` prefix.**

**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.

```typescript
// ✅ Good
import { Button } from "@/components/ui/button";
import { useAuth } from "@/hooks/useAuth";
import { Text } from "@/refresh-components/texts/Text";

// ❌ Bad
import { Button } from "../../../components/ui/button";
import { useAuth } from "./hooks/useAuth";
```

## 2. React Component Functions

**Prefer regular functions over arrow functions for React components.**

**Reason:** Functions just become easier to read.

```typescript
// ✅ Good
function UserProfile({ userId }: UserProfileProps) {
  return <div>User Profile</div>
}

// ❌ Bad
const UserProfile = ({ userId }: UserProfileProps) => {
  return <div>User Profile</div>
}
```

## 3. Props Interface Extraction

**Extract prop types into their own interface definitions. Keep prop interfaces in the same file
as the component they belong to. Non-prop types (shared models, API response shapes, enums, etc.)
should be placed in a co-located `interfaces.ts` file.**

**Reason:** Prop interfaces are tightly coupled to their component and rarely imported elsewhere,
so co-location keeps things simple. Shared types belong in `interfaces.ts` so they can be
imported without pulling in component code.

```typescript
// ✅ Good — props interface in the same file as the component
// UserCard.tsx
interface UserCardProps {
  user: User
  showActions?: boolean
  onEdit?: (userId: string) => void
}

function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
  return <div>User Card</div>
}

// ✅ Good — shared types in interfaces.ts
// interfaces.ts
export interface User {
  id: string
  name: string
  role: UserRole
}

export type UserRole = "admin" | "member" | "viewer"

// ❌ Bad — inline prop types
function UserCard({
  user,
  showActions = false,
  onEdit
}: {
  user: User
  showActions?: boolean
  onEdit?: (userId: string) => void
}) {
  return <div>User Card</div>
}
```

## 4. Spacing Guidelines

**Prefer padding over margins for spacing. When a library component exposes a padding prop
(e.g., `paddingVariant`), use that prop instead of wrapping it in a `<div>` with padding classes.
If a library component does not expose a padding override and you find yourself adding a wrapper
div for spacing, consider updating the library component to accept one.**

**Reason:** We want to consolidate usage to paddings instead of margins, and minimize wrapper
divs that exist solely for spacing.

```typescript
// ✅ Good — use the component's padding prop
<ContentAction paddingVariant="md" ... />

// ✅ Good — padding utilities when no component prop exists
<div className="p-4 space-y-2">
  <div className="p-2">Content</div>
</div>

// ❌ Bad — wrapper div just for spacing
<div className="p-4">
  <ContentAction ... />
</div>

// ❌ Bad — margins
<div className="m-4 space-y-2">
  <div className="m-2">Content</div>
</div>
```

## 5. Class Name Utilities

**Use the `cn` utility instead of raw string formatting for classNames.**

**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.

```typescript
import { cn } from '@/lib/utils'

// ✅ Good
<div className={cn(
  'base-class',
  isActive && 'active-class',
  className
)}>
  Content
</div>

// ❌ Bad
<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
  Content
</div>
```

## 6. Custom Hooks Organization

**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**

**Reason:** This is just a layout preference. Keeps code clean.

```typescript
// web/src/hooks/useUserData.ts
export function useUserData(userId: string) {
  // hook implementation
}

// web/src/hooks/useLocalStorage.ts
export function useLocalStorage<T>(key: string, initialValue: T) {
  // hook implementation
}
```


================================================
FILE: web/Dockerfile
================================================
FROM node:20-alpine AS base

LABEL com.onyx.maintainer="founders@onyx.app"
LABEL com.onyx.description="This image is the web/frontend container of Onyx which \
contains code for both the Community and Enterprise editions of Onyx. If you do not \
have a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \
Edition features outside of personal development or testing purposes. Please reach out to \
founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"

# Step 1. Install dependencies + rebuild the source code only when needed
FROM base AS builder
# Check https://github.com/nodejs/docker-node/tree/b4117f9333da4138b03a546ec926ef50a31506c3#nodealpine to understand why libc6-compat might be needed.
RUN apk add --no-cache libc6-compat
WORKDIR /app

# Copy package files first for better layer caching
# This layer will be cached unless package.json or package-lock.json changes
COPY package.json package-lock.json ./
COPY lib/opal/package.json ./lib/opal/

# Install dependencies
RUN npm ci

# pull in source code / package.json / package-lock.json
COPY . .

# needed to get the `standalone` dir we expect later
ENV NEXT_PRIVATE_STANDALONE=true

# Disable automatic telemetry collection
ENV NEXT_TELEMETRY_DISABLED=1

# Environment variables must be present at build time
# https://github.com/vercel/next.js/discussions/14030
# NOTE: if you add something here, make sure to add it to the runner as well


ARG NEXT_PUBLIC_THEME
ENV NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME}

ARG NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED
ENV NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED}

ARG NEXT_PUBLIC_DISABLE_LOGOUT
ENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT}

ARG NEXT_PUBLIC_CUSTOM_REFRESH_URL
ENV NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL}

ARG NEXT_PUBLIC_POSTHOG_KEY
ARG NEXT_PUBLIC_POSTHOG_HOST
ENV NEXT_PUBLIC_POSTHOG_KEY=${NEXT_PUBLIC_POSTHOG_KEY}
ENV NEXT_PUBLIC_POSTHOG_HOST=${NEXT_PUBLIC_POSTHOG_HOST}

ARG NEXT_PUBLIC_CLOUD_ENABLED
ENV NEXT_PUBLIC_CLOUD_ENABLED=${NEXT_PUBLIC_CLOUD_ENABLED}

ARG NEXT_PUBLIC_SENTRY_DSN
ENV NEXT_PUBLIC_SENTRY_DSN=${NEXT_PUBLIC_SENTRY_DSN}

ARG NEXT_PUBLIC_GTM_ENABLED
ENV NEXT_PUBLIC_GTM_ENABLED=${NEXT_PUBLIC_GTM_ENABLED}

ARG NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED
ENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}

ARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK
ENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}

ARG NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY
ENV NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY}

ARG NEXT_PUBLIC_RECAPTCHA_SITE_KEY
ENV NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${NEXT_PUBLIC_RECAPTCHA_SITE_KEY}

ARG SENTRY_RELEASE
ENV SENTRY_RELEASE=${SENTRY_RELEASE}

# Add NODE_OPTIONS argument
ARG NODE_OPTIONS

# SENTRY_AUTH_TOKEN is injected via BuildKit secret mount so it is never written
# to any image layer, build cache, or registry manifest.
# Use NODE_OPTIONS in the build command
RUN --mount=type=secret,id=sentry_auth_token,env=SENTRY_AUTH_TOKEN \
    NODE_OPTIONS="${NODE_OPTIONS}" npx next build

# Step 2. Production image, copy all the files and run next
FROM base AS runner
WORKDIR /app

# Remove global node modules, since they are not needed by the actual app
# (all dependencies are copied over into the `/app` dir itself). These
# global modules may be outdated and trigger security scans.
RUN rm -rf /usr/local/lib/node_modules

# Not needed, set by compose
# ENV NODE_ENV production

# Disable automatic telemetry collection
ENV NEXT_TELEMETRY_DISABLED=1

# Don't run production as root
RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs
USER nextjs

# Add back in if we add anything to `public`
COPY --from=builder /app/public ./public

# Automatically leverage output traces to reduce image size
# https://nextjs.org/docs/advanced-features/output-file-tracing
COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static

# Environment variables must be redefined at run time
# NOTE: if you add something here, make sure to add it to the builder as well


# allow user to specify custom feedback options
ARG NEXT_PUBLIC_THEME
ENV NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME}

ARG NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED
ENV NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED}

ARG NEXT_PUBLIC_DISABLE_LOGOUT
ENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT}

ARG NEXT_PUBLIC_CUSTOM_REFRESH_URL
ENV NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL}

ARG NEXT_PUBLIC_POSTHOG_KEY
ARG NEXT_PUBLIC_POSTHOG_HOST
ENV NEXT_PUBLIC_POSTHOG_KEY=${NEXT_PUBLIC_POSTHOG_KEY}
ENV NEXT_PUBLIC_POSTHOG_HOST=${NEXT_PUBLIC_POSTHOG_HOST}

ARG NEXT_PUBLIC_CLOUD_ENABLED
ENV NEXT_PUBLIC_CLOUD_ENABLED=${NEXT_PUBLIC_CLOUD_ENABLED}

ARG NEXT_PUBLIC_SENTRY_DSN
ENV NEXT_PUBLIC_SENTRY_DSN=${NEXT_PUBLIC_SENTRY_DSN}

ARG NEXT_PUBLIC_GTM_ENABLED
ENV NEXT_PUBLIC_GTM_ENABLED=${NEXT_PUBLIC_GTM_ENABLED}

ARG NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED
ENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}

ARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK
ENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}

ARG NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY
ENV NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY}

ARG NEXT_PUBLIC_RECAPTCHA_SITE_KEY
ENV NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${NEXT_PUBLIC_RECAPTCHA_SITE_KEY}

ARG SENTRY_RELEASE
ENV SENTRY_RELEASE=${SENTRY_RELEASE}

# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION}

# Note: Don't expose ports here, Compose will handle that for us if necessary.
# If you want to run this without compose, specify the ports to
# expose via cli

CMD ["node", "server.js"]


================================================
FILE: web/README.md
================================================
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/web/README.md"} -->

This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app).

## Getting Started

Install node / npm: https://docs.npmjs.com/downloading-and-installing-node-js-and-npm
Install all dependencies: `npm i`.

Then, run the development server:

```bash
npm run dev
```

Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.

_Note:_ if you are having problems accessing the ^, try setting the `WEB_DOMAIN` env variable to
`http://127.0.0.1:3000` and accessing it there.

> [!TIP]
> Packages are installed automatically when switching branches after `package.json` changes with [pre-commit](https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md#formatting-and-linting) configured.

### Connecting to a Cloud Backend

To test your local frontend development server against a cloud backend (e.g., staging or production),
create a `.env.local` file in the `web/` directory with the following configuration:

```text
# Point local dev server to cloud backend
INTERNAL_URL=https://st-dev.onyx.app/api

# Debug auth cookie for authenticating against remote backend
# This cookie is automatically injected into API requests when in development mode
# To get this value:
#   1. Go to https://st-dev.onyx.app (or your target backend URL) and log in
#   2. Open DevTools (F12) → Application → Cookies → [your backend domain]
#   3. Find the "fastapiusersauth" cookie and copy its value
#   4. Paste the value below (without quotes)
# Note: This cookie may expire, so you may need to refresh it periodically
DEBUG_AUTH_COOKIE=your_cookie_value_here
```

By default, this does _NOT_ override existing cookies, so if you've logged in previously, you
may need to delete the cookies for the `localhost` domain.

**Important notes:**

- The `.env.local` file should be created in the `web/` directory (same level as `package.json`)
- After creating or modifying `.env.local`, restart your development server for changes to take effect
- The `DEBUG_AUTH_COOKIE` is only used in development mode (`NODE_ENV=development`)
- If `INTERNAL_URL` is not set, the frontend will connect to the local backend at `http://127.0.0.1:8080`
- Keep your `.env.local` file secure and never commit it to version control (it should already be in `.gitignore`)

## Testing

This testing process will reset your application into a clean state.
Don't run these tests if you don't want to do this!

Bring up the entire application.

0. Install playwright dependencies

```bash
npx playwright install
```

1. Run playwright

```bash
npx playwright test
```

To run a single test:

```bash
npx playwright test landing-page.spec.ts
```

If running locally, interactive options can help you see exactly what is happening in
the test.

```bash
npx playwright test --ui
npx playwright test --headed
```

2. Inspect results

By default, playwright.config.ts is configured to output the results to:

```bash
web/output/playwright/
```

3. Visual regression screenshots

Screenshots are captured automatically during test runs and saved to `web/output/screenshots/`.
To compare screenshots across CI runs, use:

```bash
ods screenshot-diff compare --project admin
```

For more information, see [tools/ods/README.md](https://github.com/onyx-dot-app/onyx/blob/main/tools/ods/README.md#screenshot-diff---visual-regression-testing).


================================================
FILE: web/components.json
================================================
{
  "$schema": "https://ui.shadcn.com/schema.json",
  "style": "default",
  "rsc": true,
  "tsx": true,
  "tailwind": {
    "config": "tailwind-themes/tailwind.config.js",
    "css": "src/app/globals.css",
    "baseColor": "neutral",
    "cssVariables": false,
    "prefix": ""
  },
  "aliases": {
    "components": "@/components",
    "utils": "@/lib/utils",
    "ui": "@/components/ui",
    "lib": "@/lib",
    "hooks": "@/hooks"
  }
}


================================================
FILE: web/jest.config.js
================================================
/**
 * Jest configuration with separate projects for different test environments.
 *
 * We use two separate projects:
 * 1. "unit" - Node environment for pure unit tests (no DOM needed)
 * 2. "integration" - jsdom environment for React integration tests
 *
 * This allows us to run tests with the correct environment automatically
 * without needing @jest-environment comments in every test file.
 */

// Shared configuration
const sharedConfig = {
  preset: "ts-jest",
  setupFilesAfterEnv: ["<rootDir>/tests/setup/jest.setup.ts"],

  // Performance: Use 50% of CPU cores for parallel execution
  maxWorkers: "50%",

  moduleNameMapper: {
    // Mock CSS files (before path alias resolution)
    // CSS/static assets cannot be executed in tests and must be mocked
    "^@/.*\\.(css|less|scss|sass)$": "<rootDir>/tests/setup/mocks/cssMock.js",
    "^katex/dist/katex.min.css$": "<rootDir>/tests/setup/mocks/cssMock.js",
    "\\.(css|less|scss|sass)$": "<rootDir>/tests/setup/mocks/cssMock.js",
    // Mock static file imports
    "\\.(jpg|jpeg|png|gif|svg|woff|woff2|ttf|eot)$":
      "<rootDir>/tests/setup/fileMock.js",
    // Mock specific components that have complex dependencies
    "^@/providers/UserProvider$":
      "<rootDir>/tests/setup/mocks/components/UserProvider.tsx",
    // Path aliases (must come after specific mocks)
    "^@/(.*)$": "<rootDir>/src/$1",
    "^@tests/(.*)$": "<rootDir>/tests/$1",
    "^@opal$": "<rootDir>/lib/opal/src/index.ts",
    "^@opal/(.*)$": "<rootDir>/lib/opal/src/$1",
  },

  testPathIgnorePatterns: ["/node_modules/", "/tests/e2e/", "/.next/"],

  // Transform ES Modules in node_modules to CommonJS for Jest compatibility
  // Add packages here when you encounter: "SyntaxError: Unexpected token 'export'"
  // These packages ship as ESM and need to be transformed to work in Jest
  transformIgnorePatterns: [
    "/node_modules/(?!(" +
      [
        // Auth & Security
        "jose",
        // UI Libraries
        "@radix-ui",
        "@headlessui",
        "@phosphor-icons",
        // Testing & Mocking
        "msw",
        "until-async",
        // Language Detection
        "linguist-languages",
        // Markdown & Syntax Highlighting
        "react-markdown",
        "remark-.*", // All remark packages
        "rehype-.*", // All rehype packages
        "unified",
        "lowlight",
        "highlight\\.js",
        // Markdown Utilities
        "bail",
        "is-plain-obj",
        "trough",
        "vfile",
        "vfile-.*", // All vfile packages
        "unist-.*", // All unist packages
        "mdast-.*", // All mdast packages
        "hast-.*", // All hast packages
        "hastscript",
        "micromark.*", // All micromark packages
        "decode-named-character-reference",
        "character-entities",
        "devlop",
        "comma-separated-tokens",
        "property-information",
        "space-separated-tokens",
        "html-void-elements",
        "html-url-attributes",
        "aria-attributes",
        "web-namespaces",
        "svg-tag-names",
        "style-to-object",
        "inline-style-parser",
        "ccount",
        "escape-string-regexp",
        "markdown-table",
        "longest-streak",
        "zwitch",
        "trim-lines",
        "stringify-entities",
        "estree-.*", // All estree packages
      ].join("|") +
      ")/)",
  ],

  transform: {
    "^.+\\.(t|j)sx?$": [
      "ts-jest",
      {
        // Performance: Disable type-checking in tests (types are checked by tsc)
        isolatedModules: true,
        tsconfig: {
          jsx: "react-jsx",
          // Allow ts-jest to process JavaScript files from node_modules
          allowJs: true,
        },
      },
    ],
  },

  // Performance: Cache results between runs
  cache: true,
  cacheDirectory: "<rootDir>/.jest-cache",

  collectCoverageFrom: [
    "src/**/*.{ts,tsx}",
    "!src/**/*.d.ts",
    "!src/**/*.stories.tsx",
  ],

  coveragePathIgnorePatterns: ["/node_modules/", "/tests/", "/.next/"],

  // Performance: Clear mocks automatically between tests
  clearMocks: true,
  resetMocks: false,
  restoreMocks: false,
};

module.exports = {
  projects: [
    {
      displayName: "unit",
      ...sharedConfig,
      testEnvironment: "node",
      testMatch: [
        // Pure unit tests that don't need DOM
        "**/src/**/codeUtils.test.ts",
        "**/src/lib/**/*.test.ts",
        "**/src/app/**/services/*.test.ts",
        "**/src/app/**/utils/*.test.ts",
        "**/src/app/**/hooks/*.test.ts", // Pure packet processor tests
        "**/src/hooks/**/*.test.ts",
        "**/src/refresh-components/**/*.test.ts",
        "**/src/refresh-pages/**/*.test.ts",
        "**/src/sections/**/*.test.ts",
        "**/src/components/**/*.test.ts",
        // Add more patterns here as you add more unit tests
      ],
    },
    {
      displayName: "integration",
      ...sharedConfig,
      testEnvironment: "jsdom",
      testMatch: [
        // React component integration tests
        "**/src/app/**/*.test.tsx",
        "**/src/components/**/*.test.tsx",
        "**/src/lib/**/*.test.tsx",
        "**/src/providers/**/*.test.tsx",
        "**/src/refresh-components/**/*.test.tsx",
        "**/src/hooks/**/*.test.tsx",
        "**/src/sections/**/*.test.tsx",
        // Add more patterns here as you add more integration tests
      ],
    },
  ],
};


================================================
FILE: web/lib/opal/README.md
================================================
# Opal

A Typescript component library for Onyx.

## Usage

```tsx
import { Button } from "@opal/components";

function MyComponent() {
  return <Button onClick={() => console.log("Clicked!")}>Click me</Button>;
}
```

## Build

Opal is built in such a way that it _reuses_ the `/web/node_modules` directory.
Therefore, builds don't incur duplicate space-costs (i.e., what would have happened if Opal had its own `node_modules`).
If you want to add dependencies to Opal, define that dependency inside of `/web/lib/opal/package.json` under `peerDependencies`.
Then, go to `/web` and run the install:

```sh
npm i

# Or, if you prefer `bun`
bun i
```

Those dependencies will then install inside of `/web/node_modules` and be available to Opal.

## Structure

```
/web/lib/opal/
├── src/
│   ├── core/           # Low-level primitives (Interactive, Hoverable)
│   ├── components/     # High-level React components (Button, SelectButton, OpenButton, Tag)
│   ├── layouts/        # Layout primitives (Content, ContentAction, IllustrationContent)
│   └── index.ts        # Main export file
├── package.json
├── tsconfig.json
└── README.md
```

## Conventions

- **Directory names** are kebab-case (e.g. `select-button/`, `open-button/`, `content-action/`)
- **Each component directory** contains `components.tsx`, `styles.css` (if needed), and `README.md`
- **Imports** use `@opal/` path aliases (e.g. `@opal/components`, `@opal/core`)


================================================
FILE: web/lib/opal/package.json
================================================
{
  "name": "@onyx/opal",
  "version": "0.0.1",
  "exports": {
    "./components": {
      "types": "./src/components/index.ts",
      "default": "./src/components/index.ts"
    },
    "./layouts": {
      "types": "./src/layouts/index.ts",
      "default": "./src/layouts/index.ts"
    },
    "./core": {
      "types": "./src/core/index.ts",
      "default": "./src/core/index.ts"
    },
    "./icons": {
      "types": "./src/icons/index.ts",
      "default": "./src/icons/index.ts"
    },
    "./illustrations": {
      "types": "./src/illustrations/index.ts",
      "default": "./src/illustrations/index.ts"
    },
    "./types": {
      "types": "./src/types.ts",
      "default": "./src/types.ts"
    },
    "./utils": {
      "types": "./src/utils.ts",
      "default": "./src/utils.ts"
    }
  }
}


================================================
FILE: web/lib/opal/scripts/README.md
================================================
# SVG-to-TSX Conversion Scripts

## Overview

Integrating `@svgr/webpack` into the TypeScript compiler was not working via the recommended route (Next.js webpack configuration).
The automatic SVG-to-React component conversion was causing compilation issues and import resolution problems.
Therefore, we manually convert each SVG into a TSX file using SVGR CLI with a custom template.

All scripts in this directory should be run from the **opal package root** (`web/lib/opal/`).

## Directory Layout

```
web/lib/opal/
├── scripts/                          # SVG conversion tooling (this directory)
│   ├── convert-svg.sh                # Converts SVGs into React components
│   └── icon-template.js              # Shared SVGR template (used for both icons and illustrations)
├── src/
│   ├── icons/                        # Small, single-colour icons (stroke = currentColor)
│   └── illustrations/                # Larger, multi-colour illustrations (colours preserved)
└── package.json
```

## Icons vs Illustrations

| | Icons | Illustrations |
|---|---|---|
| **Import path** | `@opal/icons` | `@opal/illustrations` |
| **Location** | `src/icons/` | `src/illustrations/` |
| **Colour** | Overridable via `currentColor` | Fixed — original SVG colours preserved |
| **Script flag** | (none) | `--illustration` |

## Files in This Directory

### `icon-template.js`

A custom SVGR template that generates components with the following features:
- Imports `IconProps` from `@opal/types` for consistent typing
- Supports the `size` prop for controlling icon dimensions
- Includes `width` and `height` attributes bound to the `size` prop
- Maintains all standard SVG props (className, color, title, etc.)

### `convert-svg.sh`

Converts an SVG into a React component. Behaviour depends on the mode:

**Icon mode** (default):
- Strips `stroke`, `stroke-opacity`, `width`, and `height` attributes
- Adds `width={size}`, `height={size}`, and `stroke="currentColor"`
- Result is colour-overridable via CSS `color` property

**Illustration mode** (`--illustration`):
- Strips only `width` and `height` attributes (all colours preserved)
- Adds `width={size}` and `height={size}`
- Does **not** add `stroke="currentColor"` — illustrations keep their original colours

Both modes automatically delete the source SVG file after successful conversion.

## Adding New SVGs

### Icons

```sh
# From web/lib/opal/
./scripts/convert-svg.sh src/icons/my-icon.svg
```

Then add the export to `src/icons/index.ts`:
```ts
export { default as SvgMyIcon } from "@opal/icons/my-icon";
```

### Illustrations

```sh
# From web/lib/opal/
./scripts/convert-svg.sh --illustration src/illustrations/my-illustration.svg
```

Then add the export to `src/illustrations/index.ts`:
```ts
export { default as SvgMyIllustration } from "@opal/illustrations/my-illustration";
```

## Manual Conversion

If you prefer to run the SVGR command directly:

**For icons** (strips colours):
```sh
bunx @svgr/cli <file>.svg --typescript --svgo-config '{"plugins":[{"name":"removeAttrs","params":{"attrs":["stroke","stroke-opacity","width","height"]}}]}' --template scripts/icon-template.js > <file>.tsx
```

**For illustrations** (preserves colours):
```sh
bunx @svgr/cli <file>.svg --typescript --svgo-config '{"plugins":[{"name":"removeAttrs","params":{"attrs":["width","height"]}}]}' --template scripts/icon-template.js > <file>.tsx
```

After running either manual command, remember to delete the original SVG file.


================================================
FILE: web/lib/opal/scripts/convert-svg.sh
================================================
#!/bin/bash

# Convert an SVG file to a TypeScript React component.
#
# By default, converts to a colour-overridable icon (stroke colours stripped, replaced with currentColor).
# With --illustration, converts to a fixed-colour illustration (all original colours preserved).
#
# Usage (from the opal package root — web/lib/opal/):
#   ./scripts/convert-svg.sh src/icons/<filename.svg>
#   ./scripts/convert-svg.sh --illustration src/illustrations/<filename.svg>

ILLUSTRATION=false

# Parse flags
while [[ "$1" == --* ]]; do
  case "$1" in
    --illustration)
      ILLUSTRATION=true
      shift
      ;;
    *)
      echo "Unknown flag: $1" >&2
      echo "Usage: ./scripts/convert-svg.sh [--illustration] <filename.svg>" >&2
      exit 1
      ;;
  esac
done

if [ -z "$1" ]; then
  echo "Usage: ./scripts/convert-svg.sh [--illustration] <filename.svg>" >&2
  exit 1
fi

SVG_FILE="$1"

# Check if file exists
if [ ! -f "$SVG_FILE" ]; then
  echo "Error: File '$SVG_FILE' not found" >&2
  exit 1
fi

# Check if it's an SVG file
if [[ ! "$SVG_FILE" == *.svg ]]; then
  echo "Error: File must have .svg extension" >&2
  exit 1
fi

# Get the base name without extension
BASE_NAME="${SVG_FILE%.svg}"

# Build the SVGO config based on mode
if [ "$ILLUSTRATION" = true ]; then
  # Illustrations: only strip width and height (preserve all colours)
  SVGO_CONFIG='{"plugins":[{"name":"removeAttrs","params":{"attrs":["width","height"]}}]}'
else
  # Icons: strip stroke, stroke-opacity, width, and height
  SVGO_CONFIG='{"plugins":[{"name":"removeAttrs","params":{"attrs":["stroke","stroke-opacity","width","height"]}}]}'
fi

# Resolve the template path relative to this script (not the caller's CWD)
SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")"

# Run the conversion into a temp file so a failed run doesn't destroy an existing .tsx
TMPFILE="${BASE_NAME}.tsx.tmp"
bunx @svgr/cli "$SVG_FILE" --typescript --svgo-config "$SVGO_CONFIG" --template "${SCRIPT_DIR}/icon-template.js" > "$TMPFILE"

if [ $? -eq 0 ]; then
  # Verify the temp file has content before replacing the destination
  if [ ! -s "$TMPFILE" ]; then
    rm -f "$TMPFILE"
    echo "Error: Output file was not created or is empty" >&2
    exit 1
  fi

  mv "$TMPFILE" "${BASE_NAME}.tsx" || { echo "Error: Failed to move temp file" >&2; exit 1; }

  # Post-process the file to add width and height attributes bound to the size prop
  # Using perl for cross-platform compatibility (works on macOS, Linux, Windows with WSL)
  # Note: perl -i returns 0 even on some failures, so we validate the output

  perl -i -pe 's/<svg/<svg width={size} height={size}/g' "${BASE_NAME}.tsx"
  if [ $? -ne 0 ]; then
    echo "Error: Failed to add width/height attributes" >&2
    exit 1
  fi

  # Icons additionally get stroke="currentColor"
  if [ "$ILLUSTRATION" = false ]; then
    perl -i -pe 's/\{\.\.\.props\}/stroke="currentColor" {...props}/g' "${BASE_NAME}.tsx"
    if [ $? -ne 0 ]; then
      echo "Error: Failed to add stroke attribute" >&2
      exit 1
    fi
  fi

  # Verify the file still exists and has content after post-processing
  if [ ! -s "${BASE_NAME}.tsx" ]; then
    echo "Error: Output file corrupted during post-processing" >&2
    exit 1
  fi

  # Verify required attributes are present in the output
  if ! grep -q 'width={size}' "${BASE_NAME}.tsx" || ! grep -q 'height={size}' "${BASE_NAME}.tsx"; then
    echo "Error: Post-processing did not add required attributes" >&2
    exit 1
  fi

  # For icons, also verify stroke="currentColor" was added
  if [ "$ILLUSTRATION" = false ]; then
    if ! grep -q 'stroke="currentColor"' "${BASE_NAME}.tsx"; then
      echo "Error: Post-processing did not add stroke=\"currentColor\"" >&2
      exit 1
    fi
  fi

  echo "Created ${BASE_NAME}.tsx"
  rm "$SVG_FILE"
  echo "Deleted $SVG_FILE"
else
  rm -f "$TMPFILE"
  echo "Error: Conversion failed" >&2
  exit 1
fi


================================================
FILE: web/lib/opal/scripts/icon-template.js
================================================
// Template for SVGR to generate icon components with size prop support
const template = (variables, { tpl }) => {
  return tpl`
import type { IconProps } from "@opal/types";

const ${variables.componentName} = ({ size, ...props }: IconProps) => (
  ${variables.jsx}
);

${variables.exports};
`;
};

module.exports = template;


================================================
FILE: web/lib/opal/src/components/README.md
================================================
# Opal Components

High-level UI components built on the [`@opal/core`](../core/) primitives. Every component in this directory delegates state styling (hover, active, disabled) to `Interactive.Stateless` or `Interactive.Stateful` via CSS data-attributes and the `--interactive-foreground` / `--interactive-foreground-icon` custom properties — no duplicated Tailwind class maps.

## Package export

Components are exposed via:

```ts
import { Button, SelectButton, OpenButton, Tag } from "@opal/components";
```

The barrel file at `index.ts` re-exports each component and its prop types. Each component imports its own `styles.css` internally.

## Components

| Component | Description | Docs |
|-----------|-------------|------|
| [Button](./buttons/button/) | Label and/or icon-only stateless button | [README](./buttons/button/README.md) |
| [SelectButton](./buttons/select-button/) | Stateful toggle button with optional foldable content | [README](./buttons/select-button/README.md) |
| [OpenButton](./buttons/open-button/) | Trigger button with rotating chevron for popovers | [README](./buttons/open-button/README.md) |
| [Tag](./tag/) | Small colored label for status/category metadata | [README](./tag/README.md) |

## Adding new components

1. Create a directory under `components/` in kebab-case (e.g. `components/inputs/text-input/`)
2. Add a `styles.css` for layout-only CSS (colors come from Interactive primitives)
3. Add a `components.tsx` with the component and its exported props type
4. Import `styles.css` at the top of your `components.tsx`
5. Add a `README.md` inside the component directory with architecture, props, and usage examples
6. In `components/index.ts`, re-export the component:
   ```ts
   export { TextInput, type TextInputProps } from "@opal/components/inputs/text-input/components";
   ```


================================================
FILE: web/lib/opal/src/components/buttons/Button/Button.stories.tsx
================================================
import React from "react";
import type { Meta, StoryObj } from "@storybook/react";
import { Button } from "@opal/components";
import { SvgPlus, SvgArrowRight, SvgSettings } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof Button> = {
  title: "opal/components/Button",
  component: Button,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Button>;

export const Default: Story = {
  args: {
    children: "Button",
    variant: "default",
    prominence: "primary",
  },
};

const VARIANTS = ["default", "action", "danger"] as const;
const PROMINENCES = ["primary", "secondary", "tertiary"] as const;

export const VariantProminenceGrid: Story = {
  render: () => (
    <div
      style={{
        display: "grid",
        gridTemplateColumns: "auto repeat(3, 1fr)",
        gap: 12,
        alignItems: "center",
      }}
    >
      {/* Header row */}
      <div />
      {PROMINENCES.map((p) => (
        <div
          key={p}
          style={{
            fontWeight: 600,
            textAlign: "center",
            textTransform: "capitalize",
          }}
        >
          {p}
        </div>
      ))}

      {/* Variant rows */}
      {VARIANTS.map((variant) => (
        <React.Fragment key={variant}>
          <div style={{ fontWeight: 600, textTransform: "capitalize" }}>
            {variant}
          </div>
          {PROMINENCES.map((prominence) => (
            <Button
              key={`${variant}-${prominence}`}
              variant={variant}
              prominence={prominence}
            >
              {`${variant} ${prominence}`}
            </Button>
          ))}
        </React.Fragment>
      ))}
    </div>
  ),
};

export const WithLeftIcon: Story = {
  args: {
    icon: SvgPlus,
    children: "Add item",
  },
};

export const WithRightIcon: Story = {
  args: {
    rightIcon: SvgArrowRight,
    children: "Continue",
  },
};

export const IconOnly: Story = {
  args: {
    icon: SvgSettings,
  },
};

export const Sizes: Story = {
  render: () => (
    <div style={{ display: "flex", alignItems: "center", gap: 12 }}>
      {(["lg", "md", "sm", "xs", "2xs", "fit"] as const).map((size) => (
        <Button key={size} size={size} icon={SvgPlus}>
          {size}
        </Button>
      ))}
    </div>
  ),
};

export const Foldable: Story = {
  args: {
    foldable: true,
    icon: SvgPlus,
    children: "Add item",
  },
};

export const Disabled: Story = {
  args: {
    disabled: true,
    children: "Disabled",
  },
};

export const WidthFull: Story = {
  args: {
    width: "full",
    children: "Full width",
  },
  decorators: [
    (Story) => (
      <div style={{ width: 400 }}>
        <Story />
      </div>
    ),
  ],
};

export const AsLink: Story = {
  args: {
    href: "https://example.com",
    children: "Visit site",
    rightIcon: SvgArrowRight,
  },
};

export const WithTooltip: Story = {
  args: {
    icon: SvgSettings,
    tooltip: "Open settings",
    tooltipSide: "bottom",
  },
};

export const ResponsiveHideText: Story = {
  args: {
    icon: SvgPlus,
    children: "Create",
    responsiveHideText: true,
  },
};

export const InternalProminence: Story = {
  args: {
    variant: "default",
    prominence: "internal",
    children: "Internal",
  },
};


================================================
FILE: web/lib/opal/src/components/buttons/button/README.md
================================================
# Button

**Import:** `import { Button, type ButtonProps } from "@opal/components";`

A single component that handles both labeled buttons and icon-only buttons. Built on `Interactive.Stateless` > `Interactive.Container`.

## Architecture

```
Interactive.Stateless          <- variant, prominence, interaction, disabled, href, onClick
  └─ Interactive.Container     <- height, rounding, padding (from `size`), border (auto for secondary)
       └─ div.opal-button.interactive-foreground
            ├─ div > Icon?       (interactive-foreground-icon)
            ├─ <span>?           .opal-button-label
            └─ div > RightIcon?  (interactive-foreground-icon)
```

- **Colors are not in the Button.** `Interactive.Stateless` sets `background-color`, `--interactive-foreground`, and `--interactive-foreground-icon` per variant/prominence/state. Descendants opt in via the `.interactive-foreground` and `.interactive-foreground-icon` utility classes.
- **Icon-only buttons render as squares** because `Interactive.Container` enforces `min-width >= height`.
- **Border is automatic for `prominence="secondary"`.** The Container receives `border={prominence === "secondary"}` internally.

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `variant` | `"default" \| "action" \| "danger" \| "none"` | `"default"` | Color variant |
| `prominence` | `"primary" \| "secondary" \| "tertiary" \| "internal"` | `"primary"` | Color prominence |
| `interaction` | `"rest" \| "hover" \| "active"` | `"rest"` | JS-controlled interaction override |
| `icon` | `IconFunctionComponent` | — | Left icon |
| `children` | `string` | — | Label text. Omit for icon-only buttons |
| `rightIcon` | `IconFunctionComponent` | — | Right icon |
| `responsiveHideText` | `boolean` | `false` | Hides label on small screens |
| `size` | `SizeVariant` | `"lg"` | Size preset |
| `type` | `"submit" \| "button" \| "reset"` | `"button"` | HTML button type |
| `width` | `WidthVariant` | — | Width preset |
| `tooltip` | `string` | — | Tooltip text |
| `tooltipSide` | `TooltipSide` | `"top"` | Tooltip placement |
| `disabled` | `boolean` | `false` | Disables the button |
| `href` | `string` | — | URL; renders as a link |

## Usage

```tsx
import { Button } from "@opal/components";
import { SvgPlus, SvgArrowRight } from "@opal/icons";

// Primary button with label
<Button variant="default" onClick={handleClick}>Save changes</Button>

// Icon-only button (renders as a square)
<Button icon={SvgPlus} prominence="tertiary" size="sm" />

// Secondary button (auto border)
<Button rightIcon={SvgArrowRight} prominence="secondary">Continue</Button>

// Interaction override (e.g. inside a popover trigger)
<Button icon={SvgFilter} prominence="tertiary" interaction={isOpen ? "hover" : "rest"} />
```


================================================
FILE: web/lib/opal/src/components/buttons/button/components.tsx
================================================
import "@opal/components/tooltip.css";
import { Interactive, type InteractiveStatelessProps } from "@opal/core";
import type {
  ContainerSizeVariants,
  ExtremaSizeVariants,
  RichStr,
} from "@opal/types";
import { Text } from "@opal/components";
import type { TooltipSide } from "@opal/components";
import type { IconFunctionComponent } from "@opal/types";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type ButtonContentProps =
  | {
      icon?: IconFunctionComponent;
      children: string | RichStr;
      rightIcon?: IconFunctionComponent;
      responsiveHideText?: never;
    }
  | {
      icon: IconFunctionComponent;
      children?: string | RichStr;
      rightIcon?: IconFunctionComponent;
      responsiveHideText?: boolean;
    };

type ButtonProps = InteractiveStatelessProps &
  ButtonContentProps & {
    /**
     * Size preset — controls gap, text size, and Container height/rounding.
     */
    size?: ContainerSizeVariants;

    /** Tooltip text shown on hover. */
    tooltip?: string;

    /** Width preset. `"fit"` shrink-wraps, `"full"` stretches to parent width. */
    width?: ExtremaSizeVariants;

    /** Which side the tooltip appears on. */
    tooltipSide?: TooltipSide;

    /** Applies disabled styling and suppresses clicks. */
    disabled?: boolean;
  };

// ---------------------------------------------------------------------------
// Button
// ---------------------------------------------------------------------------

function Button({
  icon: Icon,
  children,
  rightIcon: RightIcon,
  size = "lg",
  type = "button",
  width,
  tooltip,
  tooltipSide = "top",
  responsiveHideText = false,
  disabled,
  ...interactiveProps
}: ButtonProps) {
  const isLarge = size === "lg";

  const labelEl = children ? (
    responsiveHideText ? (
      <span className="hidden md:inline whitespace-nowrap">
        <Text
          font={isLarge ? "main-ui-body" : "secondary-body"}
          color="inherit"
        >
          {children}
        </Text>
      </span>
    ) : (
      <Text
        font={isLarge ? "main-ui-body" : "secondary-body"}
        color="inherit"
        nowrap
      >
        {children}
      </Text>
    )
  ) : null;

  const button = (
    <Interactive.Stateless
      type={type}
      disabled={disabled}
      {...interactiveProps}
    >
      <Interactive.Container
        type={type}
        border={interactiveProps.prominence === "secondary"}
        heightVariant={size}
        widthVariant={width}
        roundingVariant={isLarge ? "md" : size === "2xs" ? "xs" : "sm"}
      >
        <div className="flex flex-row items-center gap-1">
          {iconWrapper(Icon, size, !!children)}

          {labelEl}
          {responsiveHideText ? (
            <span className="hidden md:inline-flex">
              {iconWrapper(RightIcon, size, !!children)}
            </span>
          ) : (
            iconWrapper(RightIcon, size, !!children)
          )}
        </div>
      </Interactive.Container>
    </Interactive.Stateless>
  );

  if (tooltip) {
    return (
      <TooltipPrimitive.Root>
        <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
        <TooltipPrimitive.Portal>
          <TooltipPrimitive.Content
            className="opal-tooltip"
            side={tooltipSide}
            sideOffset={4}
          >
            {tooltip}
          </TooltipPrimitive.Content>
        </TooltipPrimitive.Portal>
      </TooltipPrimitive.Root>
    );
  }

  return button;
}

export { Button, type ButtonProps };


================================================
FILE: web/lib/opal/src/components/buttons/chevron.css
================================================
.opal-button-chevron {
  transition: rotate 200ms ease;
}

.interactive[data-interaction="hover"] .opal-button-chevron,
.interactive[data-interaction="active"] .opal-button-chevron {
  rotate: -180deg;
}


================================================
FILE: web/lib/opal/src/components/buttons/chevron.tsx
================================================
import "@opal/components/buttons/chevron.css";
import type { IconProps } from "@opal/types";
import { SvgChevronDownSmall } from "@opal/icons";
import { cn } from "@opal/utils";

/**
 * Chevron icon that rotates 180° when its parent `.interactive` enters
 * hover / active state.  Shared by OpenButton, FilterButton, and any
 * future button that needs an animated dropdown indicator.
 *
 * Stable component identity — never causes React to remount the SVG.
 */
function ChevronIcon({ className, ...props }: IconProps) {
  return (
    <SvgChevronDownSmall
      className={cn(className, "opal-button-chevron")}
      {...props}
    />
  );
}

export { ChevronIcon };


================================================
FILE: web/lib/opal/src/components/buttons/filter-button/FilterButton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { FilterButton } from "@opal/components";
import { Disabled as DisabledProvider } from "@opal/core";
import { SvgUser, SvgActions, SvgTag } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof FilterButton> = {
  title: "opal/components/FilterButton",
  component: FilterButton,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof FilterButton>;

export const Empty: Story = {
  args: {
    icon: SvgUser,
    children: "Everyone",
  },
};

export const Active: Story = {
  args: {
    icon: SvgUser,
    active: true,
    children: "By alice@example.com",
    onClear: () => console.log("clear"),
  },
};

export const Open: Story = {
  args: {
    icon: SvgActions,
    interaction: "hover",
    children: "All Actions",
  },
};

export const ActiveOpen: Story = {
  args: {
    icon: SvgActions,
    active: true,
    interaction: "hover",
    children: "2 selected",
    onClear: () => console.log("clear"),
  },
};

export const Disabled: Story = {
  args: {
    icon: SvgTag,
    children: "All Tags",
  },
  decorators: [
    (Story) => (
      <DisabledProvider disabled>
        <Story />
      </DisabledProvider>
    ),
  ],
};

export const DisabledActive: Story = {
  args: {
    icon: SvgTag,
    active: true,
    children: "2 tags",
    onClear: () => console.log("clear"),
  },
  decorators: [
    (Story) => (
      <DisabledProvider disabled>
        <Story />
      </DisabledProvider>
    ),
  ],
};

export const StateComparison: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 12, alignItems: "center" }}>
      <FilterButton icon={SvgUser} onClear={() => undefined}>
        Everyone
      </FilterButton>
      <FilterButton icon={SvgUser} active onClear={() => console.log("clear")}>
        By alice@example.com
      </FilterButton>
    </div>
  ),
};

export const WithTooltip: Story = {
  args: {
    icon: SvgUser,
    children: "Everyone",
    tooltip: "Filter by creator",
    tooltipSide: "bottom",
  },
};


================================================
FILE: web/lib/opal/src/components/buttons/filter-button/README.md
================================================
# FilterButton

**Import:** `import { FilterButton, type FilterButtonProps } from "@opal/components";`

A stateful filter trigger with a built-in chevron (when empty) and a clear button (when selected). Hardcodes `variant="select-filter"` and delegates to `Interactive.Stateful`, adding automatic open-state detection from Radix `data-state`. Designed to sit inside a `Popover.Trigger` for filter dropdowns.

## Relationship to OpenButton

FilterButton shares a similar call stack to `OpenButton`:

```
Interactive.Stateful → Interactive.Container → content row (icon + label + trailing indicator)
```

FilterButton is a **narrower, filter-specific** variant:

- It hardcodes `variant="select-filter"` (OpenButton uses `"select-heavy"`)
- It exposes `active?: boolean` instead of the raw `state` prop (maps to `"selected"` / `"empty"` internally)
- When active, the chevron is hidden via `visibility` and an absolutely-positioned clear `Button` with `prominence="tertiary"` overlays it — placed as a sibling outside the `<button>` to avoid nesting buttons
- It uses the shared `ChevronIcon` from `buttons/chevron` (same as OpenButton)
- It does not support `foldable`, `size`, or `width` — it is always `"lg"`

## Architecture

```
div.relative                               <- bounding wrapper
  Interactive.Stateful                     <- variant="select-filter", interaction, state
    └─ Interactive.Container (button)      <- height="lg", default rounding/padding
         └─ div.interactive-foreground
              ├─ div > Icon                (interactive-foreground-icon)
              ├─ <span>                    label text
              └─ ChevronIcon               (when empty)
                 OR spacer div             (when selected — reserves chevron space)
  div.absolute                             <- clear Button overlay (when selected)
    └─ Button (SvgX, size="2xs", prominence="tertiary")
```

- **Open-state detection** reads `data-state="open"` injected by Radix triggers (e.g. `Popover.Trigger`), falling back to the explicit `interaction` prop.
- **Chevron rotation** uses the shared `ChevronIcon` component and `buttons/chevron.css`, which rotates 180deg when `data-interaction="hover"`.
- **Clear button** is absolutely positioned outside the `<button>` element tree to avoid invalid nested `<button>` elements. An invisible spacer inside the button reserves the same space so layout doesn't shift between states.

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `icon` | `IconFunctionComponent` | **required** | Left icon component |
| `children` | `string` | **required** | Label text between icon and trailing indicator |
| `active` | `boolean` | `false` | Whether the filter has an active selection |
| `onClear` | `() => void` | **required** | Called when the clear (X) button is clicked |
| `interaction` | `"rest" \| "hover" \| "active"` | auto | JS-controlled interaction override. Falls back to Radix `data-state="open"`. |
| `tooltip` | `string` | — | Tooltip text shown on hover |
| `tooltipSide` | `TooltipSide` | `"top"` | Which side the tooltip appears on |

## Usage

```tsx
import { FilterButton } from "@opal/components";
import { SvgUser } from "@opal/icons";

// Inside a Popover (auto-detects open state)
<Popover.Trigger asChild>
  <FilterButton
    icon={SvgUser}
    active={hasSelection}
    onClear={() => clearSelection()}
  >
    {hasSelection ? selectionLabel : "Everyone"}
  </FilterButton>
</Popover.Trigger>
```


================================================
FILE: web/lib/opal/src/components/buttons/filter-button/components.tsx
================================================
import {
  Interactive,
  type InteractiveStatefulInteraction,
  type InteractiveStatefulProps,
} from "@opal/core";
import type { TooltipSide } from "@opal/components";
import type { IconFunctionComponent, RichStr } from "@opal/types";
import { Text } from "@opal/components";
import { SvgX } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
import { ChevronIcon } from "@opal/components/buttons/chevron";
import { Button } from "@opal/components/buttons/button/components";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface FilterButtonProps
  extends Omit<InteractiveStatefulProps, "variant" | "state" | "children"> {
  /** Left icon — always visible. */
  icon: IconFunctionComponent;

  /** Label text between icon and trailing indicator. */
  children: string | RichStr;

  /** Whether the filter has an active selection. @default false */
  active?: boolean;

  /** Called when the clear (X) button is clicked in active state. */
  onClear: () => void;

  /** Tooltip text shown on hover. */
  tooltip?: string;

  /** Which side the tooltip appears on. */
  tooltipSide?: TooltipSide;
}

// ---------------------------------------------------------------------------
// FilterButton
// ---------------------------------------------------------------------------

function FilterButton({
  icon: Icon,
  children,
  onClear,
  tooltip,
  tooltipSide = "top",
  active = false,
  interaction,
  ...statefulProps
}: FilterButtonProps) {
  // Derive open state: explicit prop > Radix data-state (injected via Slot chain)
  const dataState = (statefulProps as Record<string, unknown>)["data-state"] as
    | string
    | undefined;
  const resolvedInteraction: InteractiveStatefulInteraction =
    interaction ?? (dataState === "open" ? "hover" : "rest");

  const button = (
    <div className="relative">
      <Interactive.Stateful
        {...statefulProps}
        variant="select-filter"
        interaction={resolvedInteraction}
        state={active ? "selected" : "empty"}
      >
        <Interactive.Container type="button">
          <div className="flex flex-row items-center gap-1">
            {iconWrapper(Icon, "lg", true)}
            <Text font="main-ui-action" color="inherit" nowrap>
              {children}
            </Text>
            <div style={{ visibility: active ? "hidden" : "visible" }}>
              {iconWrapper(ChevronIcon, "lg", true)}
            </div>
          </div>
        </Interactive.Container>
      </Interactive.Stateful>

      {active && (
        <div className="absolute right-2 top-1/2 -translate-y-1/2">
          {/* Force hover state so the X stays visually prominent against
              the inverted selected background — without this it renders
              dimmed and looks disabled. */}
          <Button
            icon={SvgX}
            size="2xs"
            prominence="tertiary"
            tooltip="Clear filter"
            interaction="hover"
            onClick={(e) => {
              e.stopPropagation();
              onClear();
            }}
          />
        </div>
      )}
    </div>
  );

  if (!tooltip) return button;

  return (
    <TooltipPrimitive.Root>
      <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
      <TooltipPrimitive.Portal>
        <TooltipPrimitive.Content
          className="opal-tooltip"
          side={tooltipSide}
          sideOffset={4}
        >
          {tooltip}
        </TooltipPrimitive.Content>
      </TooltipPrimitive.Portal>
    </TooltipPrimitive.Root>
  );
}

export { FilterButton, type FilterButtonProps };


================================================
FILE: web/lib/opal/src/components/buttons/icon-wrapper.tsx
================================================
import type { ContainerSizeVariants } from "@opal/types";
import type { IconFunctionComponent } from "@opal/types";
import { cn } from "@opal/utils";

const iconVariants = {
  lg: { padding: "p-0.5", size: 1 },
  md: { padding: "p-0.5", size: 1 },
  sm: { padding: "p-0", size: 1 },
  xs: { padding: "p-0.5", size: 0.75 },
  "2xs": { padding: "p-0", size: 0.75 },
  fit: { padding: "p-0.5", size: 1 },
} as const;

function iconWrapper(
  Icon: IconFunctionComponent | undefined,
  size: ContainerSizeVariants,
  includeSpacer: boolean
) {
  const { padding: p, size: s } = iconVariants[size];

  return Icon ? (
    <div className={cn("interactive-foreground-icon", p)}>
      <Icon
        className="shrink-0"
        style={{
          height: `${s}rem`,
          width: `${s}rem`,
        }}
      />
    </div>
  ) : includeSpacer ? (
    <div />
  ) : null;
}

export { iconWrapper, iconVariants };


================================================
FILE: web/lib/opal/src/components/buttons/line-item-button/README.md
================================================
# LineItemButton

**Import:** `import { LineItemButton, type LineItemButtonProps } from "@opal/components";`

A composite component that wraps `Interactive.Stateful > Interactive.Container > ContentAction` into a single API. Use it for selectable list rows such as model pickers, menu items, or any row that acts like a button.

## Architecture

```
Interactive.Stateful         <- selectVariant, state, interaction, onClick, href, ref
  └─ Interactive.Container   <- type, width, roundingVariant
       └─ ContentAction      <- withInteractive, paddingVariant="lg"
            ├─ Content       <- icon, title, description, sizePreset, variant, ...
            └─ rightChildren
```

`paddingVariant` is hardcoded to `"lg"` and `withInteractive` is always `true`. These are not exposed as props.

## Props

### Interactive surface

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `selectVariant` | `"select-light" \| "select-heavy"` | `"select-light"` | Interactive select variant |
| `state` | `InteractiveStatefulState` | `"empty"` | Value state (`"empty"`, `"filled"`, `"selected"`) |
| `interaction` | `InteractiveStatefulInteraction` | `"rest"` | JS-controlled interaction state override |
| `onClick` | `MouseEventHandler<HTMLElement>` | — | Click handler |
| `href` | `string` | — | Renders an anchor instead of a div |
| `target` | `string` | — | Anchor target (e.g. `"_blank"`) |
| `group` | `string` | — | Interactive group key |
| `ref` | `React.Ref<HTMLElement>` | — | Forwarded ref |

### Sizing

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `roundingVariant` | `InteractiveContainerRoundingVariant` | `"md"` | Corner rounding preset (height is content-driven) |
| `width` | `WidthVariant` | `"full"` | Container width |
| `type` | `"submit" \| "button" \| "reset"` | `"button"` | HTML button type |
| `tooltip` | `string` | — | Tooltip text shown on hover |
| `tooltipSide` | `TooltipSide` | `"top"` | Tooltip side |

### Content (pass-through to ContentAction)

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `title` | `string` | **(required)** | Row label |
| `icon` | `IconFunctionComponent` | — | Left icon |
| `description` | `string` | — | Description below the title |
| `sizePreset` | `SizePreset` | `"headline"` | Content size preset |
| `variant` | `ContentVariant` | `"heading"` | Content layout variant |
| `rightChildren` | `ReactNode` | — | Content after the label (e.g. action button) |

All other `ContentAction` / `Content` props (`editable`, `onTitleChange`, `optional`, `auxIcon`, `tag`, etc.) are also passed through. Note: `withInteractive` is always `true` inside `LineItemButton` and cannot be overridden.

## Usage

```tsx
import { LineItemButton } from "@opal/components";

// Simple selectable row
<LineItemButton
  selectVariant="select-heavy"
  state={isSelected ? "selected" : "empty"}
  roundingVariant="sm"
  onClick={handleClick}
  title="gpt-4o"
  sizePreset="main-ui"
  variant="section"
/>

// With right-side action
<LineItemButton
  selectVariant="select-heavy"
  state={isSelected ? "selected" : "empty"}
  onClick={handleClick}
  title="claude-opus-4"
  sizePreset="main-ui"
  variant="section"
  rightChildren={<Tag title="Default" color="blue" />}
/>
```


================================================
FILE: web/lib/opal/src/components/buttons/line-item-button/components.tsx
================================================
import {
  Interactive,
  type InteractiveStatefulProps,
  InteractiveContainerRoundingVariant,
} from "@opal/core";
import type { ExtremaSizeVariants } from "@opal/types";
import type { TooltipSide } from "@opal/components";
import type { DistributiveOmit } from "@opal/types";
import type { ContentActionProps } from "@opal/layouts/content-action/components";
import { ContentAction } from "@opal/layouts";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type ContentPassthroughProps = DistributiveOmit<
  ContentActionProps,
  "paddingVariant" | "widthVariant" | "ref"
>;

type LineItemButtonOwnProps = Pick<
  InteractiveStatefulProps,
  | "state"
  | "interaction"
  | "onClick"
  | "href"
  | "target"
  | "group"
  | "ref"
  | "type"
> & {
  /** Interactive select variant. @default "select-light" */
  selectVariant?: "select-light" | "select-heavy";

  /** Corner rounding preset (height is always content-driven). @default "md" */
  roundingVariant?: InteractiveContainerRoundingVariant;

  /** Container width. @default "full" */
  width?: ExtremaSizeVariants;

  /** Tooltip text shown on hover. */
  tooltip?: string;

  /** Which side the tooltip appears on. @default "top" */
  tooltipSide?: TooltipSide;
};

type LineItemButtonProps = ContentPassthroughProps & LineItemButtonOwnProps;

// ---------------------------------------------------------------------------
// LineItemButton
// ---------------------------------------------------------------------------

function LineItemButton({
  // Interactive surface
  selectVariant = "select-light",
  state,
  interaction,
  onClick,
  href,
  target,
  group,
  ref,
  type = "button",

  // Sizing
  roundingVariant = "md",
  width = "full",
  tooltip,
  tooltipSide = "top",

  // ContentAction pass-through
  ...contentActionProps
}: LineItemButtonProps) {
  const item = (
    <Interactive.Stateful
      variant={selectVariant}
      state={state}
      interaction={interaction}
      onClick={onClick}
      href={href}
      target={target}
      group={group}
      ref={ref}
    >
      <Interactive.Container
        type={type}
        widthVariant={width}
        heightVariant="lg"
        roundingVariant={roundingVariant}
      >
        <ContentAction
          {...(contentActionProps as ContentActionProps)}
          paddingVariant="fit"
        />
      </Interactive.Container>
    </Interactive.Stateful>
  );

  if (!tooltip) return item;

  return (
    <TooltipPrimitive.Root>
      <TooltipPrimitive.Trigger asChild>{item}</TooltipPrimitive.Trigger>
      <TooltipPrimitive.Portal>
        <TooltipPrimitive.Content
          className="opal-tooltip"
          side={tooltipSide}
          sideOffset={4}
        >
          {tooltip}
        </TooltipPrimitive.Content>
      </TooltipPrimitive.Portal>
    </TooltipPrimitive.Root>
  );
}

export { LineItemButton, type LineItemButtonProps };


================================================
FILE: web/lib/opal/src/components/buttons/open-button/OpenButton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { OpenButton } from "@opal/components";
import { Disabled as DisabledProvider } from "@opal/core";
import { SvgSettings } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof OpenButton> = {
  title: "opal/components/OpenButton",
  component: OpenButton,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof OpenButton>;

export const Default: Story = {
  args: {
    children: "Select option",
  },
};

export const WithIcon: Story = {
  args: {
    icon: SvgSettings,
    children: "Settings",
  },
};

export const Open: Story = {
  args: {
    interaction: "hover",
    children: "Open state",
  },
};

export const Foldable: Story = {
  args: {
    foldable: true,
    icon: SvgSettings,
    children: "Settings",
  },
};

export const FoldableDisabled: Story = {
  args: {
    foldable: true,
    icon: SvgSettings,
    children: "Settings",
  },
  decorators: [
    (Story) => (
      <DisabledProvider disabled>
        <Story />
      </DisabledProvider>
    ),
  ],
};

export const Sizes: Story = {
  render: () => (
    <div style={{ display: "flex", alignItems: "center", gap: 12 }}>
      {(["lg", "md", "sm", "xs", "2xs"] as const).map((size) => (
        <OpenButton key={size} size={size}>
          {size}
        </OpenButton>
      ))}
    </div>
  ),
};

export const WithTooltip: Story = {
  args: {
    icon: SvgSettings,
    children: "Settings",
    tooltip: "Open settings",
    tooltipSide: "bottom",
  },
};


================================================
FILE: web/lib/opal/src/components/buttons/open-button/README.md
================================================
# OpenButton

**Import:** `import { OpenButton, type OpenButtonProps } from "@opal/components";`

A trigger button with a built-in chevron that rotates when open. Hardcodes `variant="select-heavy"` and delegates to `Interactive.Stateful`, adding automatic open-state detection from Radix `data-state`. Designed to work automatically with Radix primitives while also supporting explicit control via the `interaction` prop.

## Relationship to SelectButton

OpenButton is structurally near-identical to `SelectButton` — both share the same call stack:

```
Interactive.Stateful → Interactive.Container → content row (icon + label + trailing icon)
```

OpenButton is a **tighter, specialized use-case** of SelectButton:

- It hardcodes `variant="select-heavy"` (SelectButton exposes `variant`)
- It adds a built-in chevron with CSS-driven rotation (SelectButton has no chevron)
- It auto-detects Radix `data-state="open"` to derive `interaction` (SelectButton has no Radix awareness)
- It does not support `rightIcon` (SelectButton does)

Both components support `foldable` using the same pattern: `interactive-foldable-host` class + `Interactive.Foldable` wrapper around the label and trailing icon. When foldable, the left icon stays visible while the rest collapses. If you change the foldable implementation in one, update the other to match.

If you need a general-purpose stateful toggle, use `SelectButton`. If you need a popover/dropdown trigger with a chevron, use `OpenButton`.

## Architecture

```
Interactive.Stateful           <- variant="select-heavy", interaction, state, disabled, onClick
  └─ Interactive.Container     <- height, rounding, padding (from `size`)
       └─ div.opal-button.interactive-foreground [.interactive-foldable-host]
            ├─ div > Icon?                 (interactive-foreground-icon)
            ├─ [Foldable]?                 (wraps label + chevron when foldable)
            │    ├─ <span>?                .opal-button-label
            │    └─ div > ChevronIcon      .opal-open-button-chevron
            └─ <span>? / ChevronIcon       (non-foldable)
```

- **`interaction` controls both the chevron and the hover visual state.** When `interaction` is `"hover"` (explicitly or via Radix `data-state="open"`), the chevron rotates 180° and the hover background activates.
- **Open-state detection** is dual-resolution: the explicit `interaction` prop takes priority; otherwise the component reads `data-state="open"` injected by Radix triggers (e.g. `Popover.Trigger`).
- **Chevron rotation** is CSS-driven via `.interactive[data-interaction="hover"] .opal-open-button-chevron { rotate: -180deg }`. The `ChevronIcon` is a stable named component (not an inline function) to preserve React element identity across renders.

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `state` | `"empty" \| "filled" \| "selected"` | `"empty"` | Current value state |
| `interaction` | `"rest" \| "hover" \| "active"` | auto | JS-controlled interaction override. Falls back to Radix `data-state="open"` when omitted. |
| `icon` | `IconFunctionComponent` | — | Left icon component |
| `children` | `string` | — | Content between icon and chevron |
| `foldable` | `boolean` | `false` | When `true`, requires both `icon` and `children`; the left icon stays visible while the label + chevron collapse when not hovered. If `tooltip` is omitted on a disabled foldable button, the label text is used as the tooltip. |
| `size` | `SizeVariant` | `"lg"` | Size preset controlling height, rounding, and padding |
| `width` | `WidthVariant` | — | Width preset |
| `tooltip` | `string` | — | Tooltip text shown on hover |
| `tooltipSide` | `TooltipSide` | `"top"` | Which side the tooltip appears on |
| `disabled` | `boolean` | `false` | Disables the button |

## Usage

```tsx
import { OpenButton } from "@opal/components";
import { SvgFilter } from "@opal/icons";

// Basic usage with Radix Popover (auto-detects open state)
<Popover.Trigger asChild>
  <OpenButton>Select option</OpenButton>
</Popover.Trigger>

// Explicit interaction control
<OpenButton interaction={isExpanded ? "hover" : "rest"} onClick={toggle}>
  Advanced settings
</OpenButton>

// With left icon
<OpenButton icon={SvgFilter} state="filled">
  Filters
</OpenButton>
```


================================================
FILE: web/lib/opal/src/components/buttons/open-button/components.tsx
================================================
import {
  Interactive,
  type InteractiveStatefulProps,
  type InteractiveStatefulInteraction,
} from "@opal/core";
import type {
  ContainerSizeVariants,
  ExtremaSizeVariants,
  RichStr,
} from "@opal/types";
import { Text } from "@opal/components";
import type { InteractiveContainerRoundingVariant } from "@opal/core";
import type { TooltipSide } from "@opal/components";
import type { IconFunctionComponent } from "@opal/types";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { cn } from "@opal/utils";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
import { ChevronIcon } from "@opal/components/buttons/chevron";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

/**
 * Content props — a discriminated union on `foldable` that enforces:
 *
 * - `foldable: true`  → `icon` and `children` are required (icon stays visible,
 *                        label + chevron fold away)
 * - `foldable?: false` → at least one of `icon` or `children` must be provided
 */
type OpenButtonContentProps =
  | {
      foldable: true;
      icon: IconFunctionComponent;
      children: string | RichStr;
    }
  | {
      foldable?: false;
      icon?: IconFunctionComponent;
      children: string | RichStr;
    }
  | {
      foldable?: false;
      icon: IconFunctionComponent;
      children?: string | RichStr;
    };

type OpenButtonVariant = "select-light" | "select-heavy" | "select-tinted";

type OpenButtonProps = Omit<InteractiveStatefulProps, "variant"> & {
  variant?: OpenButtonVariant;
} & OpenButtonContentProps & {
    /**
     * Size preset — controls gap, text size, and Container height/rounding.
     */
    size?: ContainerSizeVariants;

    /** Width preset. */
    width?: ExtremaSizeVariants;

    /**
     * Content justify mode. When `"between"`, icon+label group left and
     * chevron pushes to the right edge. Default keeps all items in a
     * tight `gap-1` row.
     */
    justifyContent?: "between";

    /** Tooltip text shown on hover. */
    tooltip?: string;

    /** Which side the tooltip appears on. */
    tooltipSide?: TooltipSide;

    /** Override the default rounding derived from `size`. */
    roundingVariant?: InteractiveContainerRoundingVariant;

    /** Applies disabled styling and suppresses clicks. */
    disabled?: boolean;
  };

// ---------------------------------------------------------------------------
// OpenButton
// ---------------------------------------------------------------------------

function OpenButton({
  icon: Icon,
  children,
  size = "lg",
  foldable,
  width,
  justifyContent,
  tooltip,
  tooltipSide = "top",
  roundingVariant: roundingVariantOverride,
  interaction,
  variant = "select-heavy",
  disabled,
  ...statefulProps
}: OpenButtonProps) {
  // Derive open state: explicit prop → Radix data-state (injected via Slot chain)
  const dataState = (statefulProps as Record<string, unknown>)["data-state"] as
    | string
    | undefined;
  const resolvedInteraction: InteractiveStatefulInteraction =
    interaction ?? (dataState === "open" ? "hover" : "rest");

  const isLarge = size === "lg";

  const labelEl = children ? (
    <Text
      font={isLarge ? "main-ui-body" : "secondary-body"}
      color="inherit"
      nowrap
    >
      {children}
    </Text>
  ) : null;

  const button = (
    <Interactive.Stateful
      variant={variant}
      interaction={resolvedInteraction}
      disabled={disabled}
      {...statefulProps}
    >
      <Interactive.Container
        type="button"
        heightVariant={size}
        widthVariant={width}
        roundingVariant={
          roundingVariantOverride ??
          (isLarge ? "md" : size === "2xs" ? "xs" : "sm")
        }
      >
        <div
          className={cn(
            "flex flex-row items-center",
            justifyContent === "between" ? "w-full justify-between" : "gap-1",
            foldable &&
              justifyContent !== "between" &&
              "interactive-foldable-host"
          )}
        >
          {justifyContent === "between" ? (
            <>
              <span className="flex flex-row items-center gap-1">
                {iconWrapper(Icon, size, !foldable && !!children)}
                {labelEl}
              </span>
              {iconWrapper(ChevronIcon, size, !!children)}
            </>
          ) : foldable ? (
            <>
              {iconWrapper(Icon, size, !foldable && !!children)}
              <Interactive.Foldable>
                {labelEl}
                {iconWrapper(ChevronIcon, size, !!children)}
              </Interactive.Foldable>
            </>
          ) : (
            <>
              {iconWrapper(Icon, size, !foldable && !!children)}
              {labelEl}
              {iconWrapper(ChevronIcon, size, !!children)}
            </>
          )}
        </div>
      </Interactive.Container>
    </Interactive.Stateful>
  );

  const resolvedTooltip =
    tooltip ?? (foldable && disabled && children ? children : undefined);

  if (!resolvedTooltip) return button;

  return (
    <TooltipPrimitive.Root>
      <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
      <TooltipPrimitive.Portal>
        <TooltipPrimitive.Content
          className="opal-tooltip"
          side={tooltipSide}
          sideOffset={4}
        >
          <Text>{resolvedTooltip}</Text>
        </TooltipPrimitive.Content>
      </TooltipPrimitive.Portal>
    </TooltipPrimitive.Root>
  );
}

export { OpenButton, type OpenButtonProps };


================================================
FILE: web/lib/opal/src/components/buttons/select-button/README.md
================================================
# SelectButton

**Import:** `import { SelectButton, type SelectButtonProps } from "@opal/components";`

A stateful button for togglable selections — the stateful counterpart to `Button`. Built on `Interactive.Stateful` > `Interactive.Container`.

## Relationship to OpenButton

SelectButton and `OpenButton` are structurally near-identical — both share the same call stack:

```
Interactive.Stateful → Interactive.Container → content row (icon + label + trailing icon)
```

`OpenButton` is a **tighter, specialized use-case** of SelectButton:

- OpenButton hardcodes `variant="select-heavy"` (SelectButton exposes `variant`)
- OpenButton adds a built-in chevron with CSS-driven rotation (SelectButton has no chevron)
- OpenButton auto-detects Radix `data-state="open"` to derive `interaction` (SelectButton has no Radix awareness)
- OpenButton does not support `rightIcon` (SelectButton does)

Both components support `foldable` using the same pattern: `interactive-foldable-host` class + `Interactive.Foldable` wrapper around the label and trailing icon. When foldable, the left icon stays visible while the rest collapses. If you change the foldable implementation in one, update the other to match.

Use SelectButton for general-purpose stateful toggles. Use `OpenButton` for popover/dropdown triggers with a chevron.

## Architecture

```
Interactive.Stateful           <- variant, state, interaction, disabled, onClick
  └─ Interactive.Container     <- height, rounding, padding (from `size`)
       └─ div.opal-select-button.interactive-foreground
            ├─ Icon?           (interactive-foreground-icon)
            ├─ [Foldable]?     (wraps label + rightIcon when foldable)
            │    ├─ <span>     .opal-select-button-label
            │    └─ RightIcon?
            └─ <span>? / RightIcon?  (non-foldable)
```

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `variant` | `"select-light" \| "select-heavy" \| "sidebar"` | `"select-heavy"` | Stateful color variant |
| `state` | `"empty" \| "filled" \| "selected"` | `"empty"` | Current value state |
| `interaction` | `"rest" \| "hover" \| "active"` | `"rest"` | JS-controlled interaction override |
| `icon` | `IconFunctionComponent` | — | Left icon |
| `children` | `string` | — | Label text |
| `rightIcon` | `IconFunctionComponent` | — | Right icon |
| `foldable` | `boolean` | `false` | When `true`, label + rightIcon collapse when not hovered |
| `size` | `SizeVariant` | `"lg"` | Size preset |
| `width` | `WidthVariant` | — | Width preset |
| `tooltip` | `string` | — | Tooltip text |
| `tooltipSide` | `TooltipSide` | `"top"` | Tooltip placement |
| `disabled` | `boolean` | `false` | Disables the button |

## Usage

```tsx
import { SelectButton } from "@opal/components";
import { SvgStar } from "@opal/icons";

// Basic toggle
<SelectButton
  icon={SvgStar}
  state={isFavorite ? "selected" : "empty"}
  onClick={toggleFavorite}
>
  Favorite
</SelectButton>

// Foldable — icon stays visible, label folds away
<SelectButton
  foldable
  icon={SvgStar}
  state="empty"
>
  Favorite
</SelectButton>
```


================================================
FILE: web/lib/opal/src/components/buttons/select-button/components.tsx
================================================
"use client";

import "@opal/components/buttons/select-button/styles.css";
import { Interactive, type InteractiveStatefulProps } from "@opal/core";
import type {
  ContainerSizeVariants,
  ExtremaSizeVariants,
  RichStr,
} from "@opal/types";
import { Text } from "@opal/components";
import type { TooltipSide } from "@opal/components";
import type { IconFunctionComponent } from "@opal/types";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { cn } from "@opal/utils";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

/**
 * Content props — a discriminated union on `foldable` that enforces:
 *
 * - `foldable: true`  → `icon` and `children` are required (icon stays visible,
 *                        label + rightIcon fold away)
 * - `foldable?: false` → at least one of `icon` or `children` must be provided
 */
type SelectButtonContentProps =
  | {
      foldable: true;
      icon: IconFunctionComponent;
      children: string | RichStr;
      rightIcon?: IconFunctionComponent;
    }
  | {
      foldable?: false;
      icon?: IconFunctionComponent;
      children: string | RichStr;
      rightIcon?: IconFunctionComponent;
    }
  | {
      foldable?: false;
      icon: IconFunctionComponent;
      children?: string | RichStr;
      rightIcon?: IconFunctionComponent;
    };

type SelectButtonProps = InteractiveStatefulProps &
  SelectButtonContentProps & {
    /**
     * Size preset — controls gap, text size, and Container height/rounding.
     */
    size?: ContainerSizeVariants;

    /** Tooltip text shown on hover. */
    tooltip?: string;

    /** Width preset. `"fit"` shrink-wraps, `"full"` stretches to parent width. */
    width?: ExtremaSizeVariants;

    /** Which side the tooltip appears on. */
    tooltipSide?: TooltipSide;

    /** Applies disabled styling and suppresses clicks. */
    disabled?: boolean;
  };

// ---------------------------------------------------------------------------
// SelectButton
// ---------------------------------------------------------------------------

function SelectButton({
  icon: Icon,
  children,
  rightIcon: RightIcon,
  size = "lg",
  type = "button",
  foldable,
  width,
  tooltip,
  tooltipSide = "top",
  disabled,
  ...statefulProps
}: SelectButtonProps) {
  const isLarge = size === "lg";

  const labelEl = children ? (
    <Text
      font={isLarge ? "main-ui-body" : "secondary-body"}
      color="inherit"
      nowrap
    >
      {children}
    </Text>
  ) : null;

  const button = (
    <Interactive.Stateful disabled={disabled} {...statefulProps}>
      <Interactive.Container
        type={type}
        heightVariant={size}
        widthVariant={width}
        roundingVariant={isLarge ? "md" : size === "2xs" ? "xs" : "sm"}
      >
        <div
          className={cn(
            "opal-select-button",
            foldable && "interactive-foldable-host"
          )}
        >
          {iconWrapper(Icon, size, !foldable && !!children)}

          {foldable ? (
            <Interactive.Foldable>
              {labelEl}
              {iconWrapper(RightIcon, size, !!children)}
            </Interactive.Foldable>
          ) : (
            <>
              {labelEl}
              {iconWrapper(RightIcon, size, !!children)}
            </>
          )}
        </div>
      </Interactive.Container>
    </Interactive.Stateful>
  );

  const resolvedTooltip =
    tooltip ?? (foldable && disabled && children ? children : undefined);

  if (!resolvedTooltip) return button;

  return (
    <TooltipPrimitive.Root>
      <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
      <TooltipPrimitive.Portal>
        <TooltipPrimitive.Content
          className="opal-tooltip"
          side={tooltipSide}
          sideOffset={4}
        >
          <Text>{resolvedTooltip}</Text>
        </TooltipPrimitive.Content>
      </TooltipPrimitive.Portal>
    </TooltipPrimitive.Root>
  );
}

export { SelectButton, type SelectButtonProps };


================================================
FILE: web/lib/opal/src/components/buttons/select-button/styles.css
================================================
/* SelectButton — layout only; colors handled by Interactive.Stateful */

.opal-select-button {
  @apply flex flex-row items-center gap-1;
}


================================================
FILE: web/lib/opal/src/components/buttons/sidebar-tab/README.md
================================================
# SidebarTab

**Import:** `import { SidebarTab, type SidebarTabProps } from "@opal/components";`

A sidebar navigation tab built on `Interactive.Stateful` > `Interactive.Container`. Designed for admin and app sidebars.

## Architecture

```
div.relative
  └─ Interactive.Stateful        <- variant (sidebar-heavy | sidebar-light), state, disabled
       └─ Interactive.Container  <- rounding, height, width
            ├─ Link?             (absolute overlay for client-side navigation)
            ├─ rightChildren?    (absolute, above Link for inline actions)
            └─ ContentAction     (icon + title + truncation spacer)
```

- **`sidebar-heavy`** (default) — muted when unselected (text-03/text-02), bold when selected (text-04/text-03)
- **`sidebar-light`** — uniformly muted across all states (text-02/text-02)
- **Disabled** — both variants use text-02 foreground, transparent background, no hover/active states
- **Navigation** uses an absolutely positioned `<Link>` overlay rather than `href` on the Interactive element, so `rightChildren` can sit above it with `pointer-events-auto`.

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `variant` | `"sidebar-heavy" \| "sidebar-light"` | `"sidebar-heavy"` | Sidebar color variant |
| `selected` | `boolean` | `false` | Active/selected state |
| `icon` | `IconFunctionComponent` | — | Left icon |
| `children` | `ReactNode` | — | Label text or custom content |
| `disabled` | `boolean` | `false` | Disables the tab |
| `folded` | `boolean` | `false` | Collapses label, shows tooltip on hover |
| `nested` | `boolean` | `false` | Renders spacer instead of icon for indented items |
| `href` | `string` | — | Client-side navigation URL |
| `onClick` | `MouseEventHandler` | — | Click handler |
| `type` | `ButtonType` | — | HTML button type |
| `rightChildren` | `ReactNode` | — | Actions rendered on the right side |

## Usage

```tsx
import { SidebarTab } from "@opal/components";
import { SvgSettings, SvgLock } from "@opal/icons";

// Active tab
<SidebarTab icon={SvgSettings} href="/admin/settings" selected>
  Settings
</SidebarTab>

// Muted variant
<SidebarTab icon={SvgSettings} variant="sidebar-light">
  Exit Admin Panel
</SidebarTab>

// Disabled enterprise-only tab
<SidebarTab icon={SvgLock} disabled>
  Groups
</SidebarTab>

// Folded sidebar (icon only, tooltip on hover)
<SidebarTab icon={SvgSettings} href="/admin/settings" folded>
  Settings
</SidebarTab>
```


================================================
FILE: web/lib/opal/src/components/buttons/sidebar-tab/SidebarTab.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { SidebarTab } from "@opal/components/buttons/sidebar-tab/components";
import {
  SvgSettings,
  SvgUsers,
  SvgLock,
  SvgArrowUpCircle,
  SvgTrash,
} from "@opal/icons";
import { Button } from "@opal/components";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof SidebarTab> = {
  title: "opal/components/SidebarTab",
  component: SidebarTab,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 260, background: "var(--background-neutral-01)" }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof SidebarTab>;

export const Default: Story = {
  args: {
    icon: SvgSettings,
    children: "Settings",
  },
};

export const Selected: Story = {
  args: {
    icon: SvgSettings,
    children: "Settings",
    selected: true,
  },
};

export const Light: Story = {
  args: {
    icon: SvgSettings,
    children: "Settings",
    variant: "sidebar-light",
  },
};

export const Disabled: Story = {
  args: {
    icon: SvgLock,
    children: "Enterprise Only",
    disabled: true,
  },
};

export const WithRightChildren: Story = {
  args: {
    icon: SvgUsers,
    children: "Users",
    rightChildren: (
      <Button
        icon={SvgTrash}
        size="xs"
        prominence="tertiary"
        variant="danger"
      />
    ),
  },
};

export const SidebarExample: Story = {
  render: () => (
    <div className="flex flex-col">
      <SidebarTab icon={SvgSettings} selected>
        LLM Models
      </SidebarTab>
      <SidebarTab icon={SvgSettings}>Web Search</SidebarTab>
      <SidebarTab icon={SvgUsers}>Users</SidebarTab>
      <SidebarTab icon={SvgLock} disabled>
        Groups
      </SidebarTab>
      <SidebarTab icon={SvgLock} disabled>
        SCIM
      </SidebarTab>
      <SidebarTab icon={SvgArrowUpCircle}>Upgrade Plan</SidebarTab>
    </div>
  ),
};


================================================
FILE: web/lib/opal/src/components/buttons/sidebar-tab/components.tsx
================================================
"use client";

import React from "react";
import type { ButtonType, IconFunctionComponent } from "@opal/types";
import type { Route } from "next";
import { Interactive, type InteractiveStatefulVariant } from "@opal/core";
import { ContentAction } from "@opal/layouts";
import { Text } from "@opal/components";
import Link from "next/link";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import "@opal/components/tooltip.css";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface SidebarTabProps {
  /** Collapses the label, showing only the icon. */
  folded?: boolean;

  /** Marks this tab as the currently active/selected item. */
  selected?: boolean;

  /**
   * Sidebar color variant.
   * @default "sidebar-heavy"
   */
  variant?: Extract<
    InteractiveStatefulVariant,
    "sidebar-light" | "sidebar-heavy"
  >;

  /** Renders an empty spacer in place of the icon for nested items. */
  nested?: boolean;

  /** Disables the tab — applies muted colors and suppresses clicks. */
  disabled?: boolean;

  onClick?: React.MouseEventHandler<HTMLElement>;
  href?: string;
  type?: ButtonType;
  icon?: IconFunctionComponent;
  children?: React.ReactNode;

  /** Content rendered on the right side (e.g. action buttons). */
  rightChildren?: React.ReactNode;
}

// ---------------------------------------------------------------------------
// SidebarTab
// ---------------------------------------------------------------------------

/**
 * Sidebar navigation tab built on `Interactive.Stateful` > `Interactive.Container`.
 *
 * Uses `sidebar-heavy` (default) or `sidebar-light` (via `variant`) variants
 * for color styling. Supports an overlay `Link` for client-side navigation,
 * `rightChildren` for inline actions, and folded mode with an auto-tooltip.
 */
function SidebarTab({
  folded,
  selected,
  variant = "sidebar-heavy",
  nested,
  disabled,

  onClick,
  href,
  type,
  icon,
  rightChildren,
  children,
}: SidebarTabProps) {
  const Icon =
    icon ??
    (nested
      ? ((() => (
          <div className="w-6" aria-hidden="true" />
        )) as IconFunctionComponent)
      : null);

  // The `rightChildren` node is absolutely positioned to sit on top of the
  // overlay Link. A zero-width spacer reserves truncation space for the title.
  const truncationSpacer = rightChildren && (
    <div className="w-0 group-hover/SidebarTab:w-6" />
  );

  const content = (
    <div className="relative">
      <Interactive.Stateful
        variant={variant}
        state={selected ? "selected" : "empty"}
        disabled={disabled}
        onClick={onClick}
        type="button"
        group="group/SidebarTab"
      >
        <Interactive.Container
          roundingVariant="sm"
          heightVariant="lg"
          widthVariant="full"
          type={type}
        >
          {href && !disabled && (
            <Link
              href={href as Route}
              scroll={false}
              className="absolute z-[99] inset-0 rounded-08"
              tabIndex={-1}
            />
          )}

          {!folded && rightChildren && (
            <div className="absolute z-[100] right-1.5 top-0 bottom-0 flex flex-col justify-center items-center pointer-events-auto">
              {rightChildren}
            </div>
          )}

          {typeof children === "string" ? (
            <ContentAction
              icon={Icon ?? undefined}
              title={folded ? "" : children}
              sizePreset="main-ui"
              variant="body"
              widthVariant="full"
              paddingVariant="fit"
              rightChildren={truncationSpacer}
            />
          ) : (
            <div className="flex flex-row items-center gap-2 flex-1">
              {Icon && (
                <div className="flex items-center justify-center p-0.5">
                  <Icon className="h-[1rem] w-[1rem] text-text-03" />
                </div>
              )}
              {children}
              {truncationSpacer}
            </div>
          )}
        </Interactive.Container>
      </Interactive.Stateful>
    </div>
  );

  if (typeof children !== "string") return content;
  if (folded) {
    return (
      <TooltipPrimitive.Root>
        <TooltipPrimitive.Trigger asChild>{content}</TooltipPrimitive.Trigger>
        <TooltipPrimitive.Portal>
          <TooltipPrimitive.Content
            className="opal-tooltip"
            side="right"
            sideOffset={4}
          >
            <Text>{children}</Text>
          </TooltipPrimitive.Content>
        </TooltipPrimitive.Portal>
      </TooltipPrimitive.Root>
    );
  }
  return content;
}

export { SidebarTab, type SidebarTabProps };


================================================
FILE: web/lib/opal/src/components/cards/card/Card.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { Card } from "@opal/components";

const BACKGROUND_VARIANTS = ["none", "light", "heavy"] as const;
const BORDER_VARIANTS = ["none", "dashed", "solid"] as const;
const PADDING_VARIANTS = ["fit", "2xs", "xs", "sm", "md", "lg"] as const;
const ROUNDING_VARIANTS = ["xs", "sm", "md", "lg"] as const;

const meta: Meta<typeof Card> = {
  title: "opal/components/Card",
  component: Card,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Card>;

export const Default: Story = {
  render: () => (
    <Card>
      <p>
        Default card with light background, no border, sm padding, md rounding.
      </p>
    </Card>
  ),
};

export const BackgroundVariants: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-96">
      {BACKGROUND_VARIANTS.map((bg) => (
        <Card key={bg} background={bg} border="solid">
          <p>backgroundVariant: {bg}</p>
        </Card>
      ))}
    </div>
  ),
};

export const BorderVariants: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-96">
      {BORDER_VARIANTS.map((border) => (
        <Card key={border} border={border}>
          <p>borderVariant: {border}</p>
        </Card>
      ))}
    </div>
  ),
};

export const PaddingVariants: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-96">
      {PADDING_VARIANTS.map((padding) => (
        <Card key={padding} padding={padding} border="solid">
          <p>paddingVariant: {padding}</p>
        </Card>
      ))}
    </div>
  ),
};

export const RoundingVariants: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-96">
      {ROUNDING_VARIANTS.map((rounding) => (
        <Card key={rounding} rounding={rounding} border="solid">
          <p>roundingVariant: {rounding}</p>
        </Card>
      ))}
    </div>
  ),
};

export const AllCombinations: Story = {
  render: () => (
    <div className="flex flex-col gap-8">
      {PADDING_VARIANTS.map((padding) => (
        <div key={padding}>
          <p className="font-bold pb-2">paddingVariant: {padding}</p>
          <div className="grid grid-cols-3 gap-4">
            {BACKGROUND_VARIANTS.map((bg) =>
              BORDER_VARIANTS.map((border) => (
                <Card
                  key={`${padding}-${bg}-${border}`}
                  padding={padding}
                  background={bg}
                  border={border}
                >
                  <p className="text-xs">
                    bg: {bg}, border: {border}
                  </p>
                </Card>
              ))
            )}
          </div>
        </div>
      ))}
    </div>
  ),
};


================================================
FILE: web/lib/opal/src/components/cards/card/README.md
================================================
# Card

**Import:** `import { Card, type CardProps } from "@opal/components";`

A plain container component with configurable background, border, padding, and rounding. Uses a simple `<div>` internally with `overflow-clip`.

## Architecture

Padding and rounding are controlled independently:

| `padding` | Class   |
|-----------|---------|
| `"lg"`    | `p-6`   |
| `"md"`    | `p-4`   |
| `"sm"`    | `p-2`   |
| `"xs"`    | `p-1`   |
| `"2xs"`   | `p-0.5` |
| `"fit"`   | `p-0`   |

| `rounding` | Class        |
|------------|--------------|
| `"xs"`     | `rounded-04` |
| `"sm"`     | `rounded-08` |
| `"md"`     | `rounded-12` |
| `"lg"`     | `rounded-16` |

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `padding` | `PaddingVariants` | `"sm"` | Padding preset |
| `rounding` | `RoundingVariants` | `"md"` | Border-radius preset |
| `background` | `"none" \| "light" \| "heavy"` | `"light"` | Background fill intensity |
| `border` | `"none" \| "dashed" \| "solid"` | `"none"` | Border style |
| `ref` | `React.Ref<HTMLDivElement>` | — | Ref forwarded to the root div |
| `children` | `React.ReactNode` | — | Card content |

## Usage

```tsx
import { Card } from "@opal/components";

// Default card (light background, no border, sm padding, md rounding)
<Card>
  <h2>Card Title</h2>
  <p>Card content</p>
</Card>

// Large padding + rounding with solid border
<Card padding="lg" rounding="lg" border="solid">
  <p>Spacious card</p>
</Card>

// Compact card with solid border
<Card padding="xs" rounding="sm" border="solid">
  <p>Compact card</p>
</Card>

// Empty state card
<Card background="none" border="dashed">
  <p>No items yet</p>
</Card>
```


================================================
FILE: web/lib/opal/src/components/cards/card/components.tsx
================================================
import "@opal/components/cards/card/styles.css";
import type { PaddingVariants, RoundingVariants } from "@opal/types";
import { cardPaddingVariants, cardRoundingVariants } from "@opal/shared";
import { cn } from "@opal/utils";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type BackgroundVariant = "none" | "light" | "heavy";
type BorderVariant = "none" | "dashed" | "solid";

type CardProps = {
  /**
   * Padding preset.
   *
   * | Value   | Class   |
   * |---------|---------|
   * | `"lg"`  | `p-6`   |
   * | `"md"`  | `p-4`   |
   * | `"sm"`  | `p-2`   |
   * | `"xs"`  | `p-1`   |
   * | `"2xs"` | `p-0.5` |
   * | `"fit"` | `p-0`   |
   *
   * @default "md"
   */
  padding?: PaddingVariants;

  /**
   * Border-radius preset.
   *
   * | Value  | Class        |
   * |--------|--------------|
   * | `"xs"` | `rounded-04` |
   * | `"sm"` | `rounded-08` |
   * | `"md"` | `rounded-12` |
   * | `"lg"` | `rounded-16` |
   *
   * @default "md"
   */
  rounding?: RoundingVariants;

  /**
   * Background fill intensity.
   * - `"none"`: transparent background.
   * - `"light"`: subtle tinted background (`bg-background-tint-00`).
   * - `"heavy"`: stronger tinted background (`bg-background-tint-01`).
   *
   * @default "light"
   */
  background?: BackgroundVariant;

  /**
   * Border style.
   * - `"none"`: no border.
   * - `"dashed"`: dashed border.
   * - `"solid"`: solid border.
   *
   * @default "none"
   */
  border?: BorderVariant;

  /** Ref forwarded to the root `<div>`. */
  ref?: React.Ref<HTMLDivElement>;

  children?: React.ReactNode;
};

// ---------------------------------------------------------------------------
// Card
// ---------------------------------------------------------------------------

function Card({
  padding: paddingProp = "md",
  rounding: roundingProp = "md",
  background = "light",
  border = "none",
  ref,
  children,
}: CardProps) {
  const padding = cardPaddingVariants[paddingProp];
  const rounding = cardRoundingVariants[roundingProp];

  return (
    <div
      ref={ref}
      className={cn("opal-card", padding, rounding)}
      data-background={background}
      data-border={border}
    >
      {children}
    </div>
  );
}

// ---------------------------------------------------------------------------
// Exports
// ---------------------------------------------------------------------------

export { Card, type CardProps, type BackgroundVariant, type BorderVariant };


================================================
FILE: web/lib/opal/src/components/cards/card/styles.css
================================================
.opal-card {
  @apply w-full overflow-clip;
}

/* Background variants */
.opal-card[data-background="none"] {
  @apply bg-transparent;
}

.opal-card[data-background="light"] {
  @apply bg-background-tint-00;
}

.opal-card[data-background="heavy"] {
  @apply bg-background-tint-01;
}

/* Border variants */
.opal-card[data-border="none"] {
  border: none;
}

.opal-card[data-border="dashed"] {
  @apply border border-dashed;
}

.opal-card[data-border="solid"] {
  @apply border;
}


================================================
FILE: web/lib/opal/src/components/cards/empty-message-card/EmptyMessageCard.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { EmptyMessageCard } from "@opal/components";
import { SvgSparkle, SvgUsers } from "@opal/icons";

const PADDING_VARIANTS = ["fit", "2xs", "xs", "sm", "md", "lg"] as const;

const meta: Meta<typeof EmptyMessageCard> = {
  title: "opal/components/EmptyMessageCard",
  component: EmptyMessageCard,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof EmptyMessageCard>;

export const Default: Story = {
  args: {
    title: "No items available.",
  },
};

export const WithCustomIcon: Story = {
  args: {
    icon: SvgSparkle,
    title: "No agents selected.",
  },
};

export const PaddingVariants: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-96">
      {PADDING_VARIANTS.map((padding) => (
        <EmptyMessageCard
          key={padding}
          padding={padding}
          title={`padding: ${padding}`}
        />
      ))}
    </div>
  ),
};

export const Multiple: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-96">
      <EmptyMessageCard title="No models available." />
      <EmptyMessageCard icon={SvgSparkle} title="No agents selected." />
      <EmptyMessageCard icon={SvgUsers} title="No groups added." />
    </div>
  ),
};


================================================
FILE: web/lib/opal/src/components/cards/empty-message-card/README.md
================================================
# EmptyMessageCard

**Import:** `import { EmptyMessageCard, type EmptyMessageCardProps } from "@opal/components";`

A pre-configured Card for empty states. Renders a transparent card with a dashed border containing a muted icon and message text using the `Content` layout.

## Props

| Prop      | Type                        | Default    | Description                      |
| --------- | --------------------------- | ---------- | -------------------------------- |
| `icon`    | `IconFunctionComponent`     | `SvgEmpty` | Icon displayed alongside the title |
| `title`   | `string`                    | —          | Primary message text (required)  |
| `padding` | `PaddingVariants`           | `"sm"`     | Padding preset for the card      |
| `ref`     | `React.Ref<HTMLDivElement>` | —          | Ref forwarded to the root div    |

## Usage

```tsx
import { EmptyMessageCard } from "@opal/components";
import { SvgSparkle, SvgFileText } from "@opal/icons";

// Default empty state
<EmptyMessageCard title="No items yet." />

// With custom icon
<EmptyMessageCard icon={SvgSparkle} title="No agents selected." />

// With custom padding
<EmptyMessageCard padding="xs" icon={SvgFileText} title="No documents available." />
```


================================================
FILE: web/lib/opal/src/components/cards/empty-message-card/components.tsx
================================================
import { Card } from "@opal/components/cards/card/components";
import { Content } from "@opal/layouts";
import { SvgEmpty } from "@opal/icons";
import type { IconFunctionComponent, PaddingVariants } from "@opal/types";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type EmptyMessageCardProps = {
  /** Icon displayed alongside the title. */
  icon?: IconFunctionComponent;

  /** Primary message text. */
  title: string;

  /** Padding preset for the card. @default "md" */
  padding?: PaddingVariants;

  /** Ref forwarded to the root Card div. */
  ref?: React.Ref<HTMLDivElement>;
};

// ---------------------------------------------------------------------------
// EmptyMessageCard
// ---------------------------------------------------------------------------

function EmptyMessageCard({
  icon = SvgEmpty,
  title,
  padding = "md",
  ref,
}: EmptyMessageCardProps) {
  return (
    <Card
      ref={ref}
      background="none"
      border="dashed"
      padding={padding}
      rounding="md"
    >
      <Content
        icon={icon}
        title={title}
        sizePreset="secondary"
        variant="body"
        prominence="muted"
      />
    </Card>
  );
}

// ---------------------------------------------------------------------------
// Exports
// ---------------------------------------------------------------------------

export { EmptyMessageCard, type EmptyMessageCardProps };


================================================
FILE: web/lib/opal/src/components/cards/select-card/README.md
================================================
# SelectCard

**Import:** `import { SelectCard, type SelectCardProps } from "@opal/components";`

A stateful interactive card — the card counterpart to [`SelectButton`](../../buttons/select-button/README.md). Built on `Interactive.Stateful` (Slot) with a structural `<div>` that owns padding, rounding, border, and overflow. Always uses the `select-card` Interactive.Stateful variant internally.

## Relationship to Card

`Card` is a plain, non-interactive container. `SelectCard` adds stateful interactivity (hover, active, disabled, state-driven colors) by wrapping its root div with `Interactive.Stateful`. Both share the same independent `padding` / `rounding` API.

## Relationship to SelectButton

SelectCard and SelectButton share the same call stack:

```
Interactive.Stateful → structural element → content
```

The key differences:

- SelectCard renders a `<div>` (not `Interactive.Container`) — cards have their own rounding scale and don't need Container's height/min-width.
- SelectCard has no `foldable` prop — use `Interactive.Foldable` directly inside children.
- SelectCard's children are fully composable — use `CardHeaderLayout`, `ContentAction`, `Content`, buttons, etc. inside.

## Architecture

```
Interactive.Stateful (variant="select-card")  <- state, interaction, disabled, onClick
  └─ div.opal-select-card                    <- padding, rounding, border, overflow
       └─ children (composable)
```

The `Interactive.Stateful` Slot merges onto the div, producing a single DOM element with both `.opal-select-card` and `.interactive` classes plus `data-interactive-*` attributes. This activates the Stateful color matrix for backgrounds and `--interactive-foreground` / `--interactive-foreground-icon` CSS properties for descendants.

## Props

Inherits **all** props from `InteractiveStatefulProps` (except `variant`, which is hardcoded to `select-card`) plus:

| Prop | Type | Default | Description |
|---|---|---|---|
| `padding` | `PaddingVariants` | `"sm"` | Padding preset |
| `rounding` | `RoundingVariants` | `"lg"` | Border-radius preset |
| `ref` | `React.Ref<HTMLDivElement>` | — | Ref forwarded to the root div |
| `children` | `React.ReactNode` | — | Card content |

### Padding scale

| `padding` | Class   |
|-----------|---------|
| `"lg"`    | `p-6`   |
| `"md"`    | `p-4`   |
| `"sm"`    | `p-2`   |
| `"xs"`    | `p-1`   |
| `"2xs"`   | `p-0.5` |
| `"fit"`   | `p-0`   |

### Rounding scale

| `rounding` | Class        |
|------------|--------------|
| `"xs"`     | `rounded-04` |
| `"sm"`     | `rounded-08` |
| `"md"`     | `rounded-12` |
| `"lg"`     | `rounded-16` |

### State colors (`select-card` variant)

| State | Rest background | Rest foreground |
|---|---|---|
| `empty` | transparent | `text-04` / icon `text-03` |
| `filled` | `background-tint-00` | `text-04` / icon `text-03` |
| `selected` | `action-link-01` | `action-link-05` |

The selected state also gets a `border-action-link-05` via SelectCard's CSS.

## CSS

SelectCard's stylesheet (`styles.css`) provides:

- `w-full overflow-clip border` on all states
- `border-action-link-05` when `data-interactive-state="selected"`

All background and foreground colors come from the Interactive.Stateful CSS, not from SelectCard.

## Usage

### Provider selection card

```tsx
import { SelectCard } from "@opal/components";
import { CardHeaderLayout } from "@opal/layouts";

<SelectCard state="selected" onClick={handleClick}>
  <CardHeaderLayout
    icon={SvgGlobe}
    title="Google"
    description="Search engine"
    sizePreset="main-ui"
    variant="section"
    rightChildren={<Button icon={SvgCheckSquare} variant="action" prominence="tertiary">Current Default</Button>}
    bottomRightChildren={
      <Button icon={SvgSettings} size="sm" prominence="tertiary" />
    }
  />
</SelectCard>
```

### Disconnected state (clickable)

```tsx
<SelectCard state="empty" onClick={handleConnect}>
  <CardHeaderLayout
    icon={SvgCloud}
    title="OpenAI"
    description="Not configured"
    sizePreset="main-ui"
    variant="section"
    rightChildren={<Button rightIcon={SvgArrowExchange} prominence="tertiary">Connect</Button>}
  />
</SelectCard>
```

### With foldable hover-reveal

```tsx
<SelectCard state="filled">
  <CardHeaderLayout
    icon={SvgCloud}
    title="OpenAI"
    description="Connected"
    sizePreset="main-ui"
    variant="section"
    rightChildren={
      <div className="interactive-foldable-host flex items-center">
        <Interactive.Foldable>
          <Button rightIcon={SvgArrowRightCircle} prominence="tertiary">
            Set as Default
          </Button>
        </Interactive.Foldable>
      </div>
    }
  />
</SelectCard>
```


================================================
FILE: web/lib/opal/src/components/cards/select-card/SelectCard.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { SelectCard } from "@opal/components";
import { Button } from "@opal/components";
import { Content } from "@opal/layouts";
import {
  SvgArrowExchange,
  SvgArrowRightCircle,
  SvgCheckSquare,
  SvgGlobe,
  SvgSettings,
  SvgUnplug,
} from "@opal/icons";
import { Interactive } from "@opal/core";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import type { Decorator } from "@storybook/react";

const withTooltipProvider: Decorator = (Story) => (
  <TooltipPrimitive.Provider>
    <Story />
  </TooltipPrimitive.Provider>
);

const STATES = ["empty", "filled", "selected"] as const;
const PADDING_VARIANTS = ["fit", "2xs", "xs", "sm", "md", "lg"] as const;
const ROUNDING_VARIANTS = ["xs", "sm", "md", "lg"] as const;

const meta = {
  title: "opal/components/SelectCard",
  component: SelectCard,
  tags: ["autodocs"],
  decorators: [withTooltipProvider],
  parameters: {
    layout: "centered",
  },
} satisfies Meta<typeof SelectCard>;

export default meta;

type Story = StoryObj<typeof meta>;

// ---------------------------------------------------------------------------
// Stories
// ---------------------------------------------------------------------------

export const Default: Story = {
  render: () => (
    <div className="w-96">
      <SelectCard state="empty">
        <div className="p-2">
          <Content
            sizePreset="main-ui"
            variant="section"
            icon={SvgGlobe}
            title="Google Search"
            description="Web search provider"
          />
        </div>
      </SelectCard>
    </div>
  ),
};

export const AllStates: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-96">
      {STATES.map((state) => (
        <SelectCard key={state} state={state}>
          <div className="p-2">
            <Content
              sizePreset="main-ui"
              variant="section"
              icon={SvgGlobe}
              title={`State: ${state}`}
              description="Hover to see interaction states."
            />
          </div>
        </SelectCard>
      ))}
    </div>
  ),
};

export const Clickable: Story = {
  render: () => (
    <div className="w-96">
      <SelectCard state="empty" onClick={() => alert("Card clicked")}>
        <div className="p-2">
          <Content
            sizePreset="main-ui"
            variant="section"
            icon={SvgGlobe}
            title="Clickable Card"
            description="Click anywhere on this card."
          />
        </div>
      </SelectCard>
    </div>
  ),
};

export const WithActions: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-[28rem]">
      {/* Disconnected */}
      <SelectCard state="empty" onClick={() => {}}>
        <div className="flex flex-row items-stretch w-full">
          <div className="flex-1 p-2">
            <Content
              sizePreset="main-ui"
              variant="section"
              icon={SvgGlobe}
              title="Disconnected"
              description="Click to connect."
            />
          </div>
          <div className="flex items-center">
            <Button prominence="tertiary" rightIcon={SvgArrowExchange}>
              Connect
            </Button>
          </div>
        </div>
      </SelectCard>

      {/* Connected with foldable */}
      <SelectCard state="filled">
        <div className="flex flex-row items-stretch w-full">
          <div className="flex-1 p-2">
            <Content
              sizePreset="main-ui"
              variant="section"
              icon={SvgGlobe}
              title="Connected"
              description="Hover to reveal Set as Default."
            />
          </div>
          <div className="flex flex-col items-end justify-between">
            <div className="interactive-foldable-host flex items-center">
              <Interactive.Foldable>
                <Button prominence="tertiary" rightIcon={SvgArrowRightCircle}>
                  Set as Default
                </Button>
              </Interactive.Foldable>
            </div>
            <div className="flex flex-row px-1 pb-1">
              <Button
                icon={SvgUnplug}
                tooltip="Disconnect"
                prominence="tertiary"
                size="sm"
              />
              <Button
                icon={SvgSettings}
                tooltip="Edit"
                prominence="tertiary"
                size="sm"
              />
            </div>
          </div>
        </div>
      </SelectCard>

      {/* Selected */}
      <SelectCard state="selected">
        <div className="flex flex-row items-stretch w-full">
          <div className="flex-1 p-2">
            <Content
              sizePreset="main-ui"
              variant="section"
              icon={SvgGlobe}
              title="Selected"
              description="Currently the default provider."
            />
          </div>
          <div className="flex flex-col items-end justify-between">
            <Button
              variant="action"
              prominence="tertiary"
              icon={SvgCheckSquare}
            >
              Current Default
            </Button>
            <div className="flex flex-row px-1 pb-1">
              <Button
                icon={SvgUnplug}
                tooltip="Disconnect"
                prominence="tertiary"
                size="sm"
              />
              <Button
                icon={SvgSettings}
                tooltip="Edit"
                prominence="tertiary"
                size="sm"
              />
            </div>
          </div>
        </div>
      </SelectCard>
    </div>
  ),
};

export const PaddingVariants: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-96">
      {PADDING_VARIANTS.map((padding) => (
        <SelectCard key={padding} state="filled" padding={padding}>
          <Content
            sizePreset="main-ui"
            variant="section"
            icon={SvgGlobe}
            title={`paddingVariant: ${padding}`}
            description="Shows padding differences."
          />
        </SelectCard>
      ))}
    </div>
  ),
};

export const RoundingVariants: Story = {
  render: () => (
    <div className="flex flex-col gap-4 w-96">
      {ROUNDING_VARIANTS.map((rounding) => (
        <SelectCard key={rounding} state="filled" rounding={rounding}>
          <Content
            sizePreset="main-ui"
            variant="section"
            icon={SvgGlobe}
            title={`roundingVariant: ${rounding}`}
            description="Shows rounding differences."
          />
        </SelectCard>
      ))}
    </div>
  ),
};


================================================
FILE: web/lib/opal/src/components/cards/select-card/components.tsx
================================================
import "@opal/components/cards/select-card/styles.css";
import type { PaddingVariants, RoundingVariants } from "@opal/types";
import { cardPaddingVariants, cardRoundingVariants } from "@opal/shared";
import { cn } from "@opal/utils";
import { Interactive, type InteractiveStatefulProps } from "@opal/core";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type SelectCardProps = Omit<InteractiveStatefulProps, "variant"> & {
  /**
   * Padding preset.
   *
   * | Value   | Class   |
   * |---------|---------|
   * | `"lg"`  | `p-6`   |
   * | `"md"`  | `p-4`   |
   * | `"sm"`  | `p-2`   |
   * | `"xs"`  | `p-1`   |
   * | `"2xs"` | `p-0.5` |
   * | `"fit"` | `p-0`   |
   *
   * @default "md"
   */
  padding?: PaddingVariants;

  /**
   * Border-radius preset.
   *
   * | Value  | Class        |
   * |--------|--------------|
   * | `"xs"` | `rounded-04` |
   * | `"sm"` | `rounded-08` |
   * | `"md"` | `rounded-12` |
   * | `"lg"` | `rounded-16` |
   *
   * @default "md"
   */
  rounding?: RoundingVariants;

  /** Ref forwarded to the root `<div>`. */
  ref?: React.Ref<HTMLDivElement>;

  children?: React.ReactNode;
};

// ---------------------------------------------------------------------------
// SelectCard
// ---------------------------------------------------------------------------

/**
 * A stateful interactive card — the card counterpart to `SelectButton`.
 *
 * Built on `Interactive.Stateful` (Slot) → a structural `<div>`. The
 * Stateful system owns background and foreground colors; the card owns
 * padding, rounding, border, and overflow.
 *
 * Children are fully composable — use `ContentAction`, `Content`, buttons,
 * `Interactive.Foldable`, etc. inside.
 *
 * @example
 * ```tsx
 * <SelectCard state="selected" onClick={handleClick}>
 *   <ContentAction
 *     icon={SvgGlobe}
 *     title="Google"
 *     description="Search engine"
 *     rightChildren={<Button>Set as Default</Button>}
 *   />
 * </SelectCard>
 * ```
 */
function SelectCard({
  padding: paddingProp = "md",
  rounding: roundingProp = "md",
  ref,
  children,
  ...statefulProps
}: SelectCardProps) {
  const padding = cardPaddingVariants[paddingProp];
  const rounding = cardRoundingVariants[roundingProp];

  return (
    <Interactive.Stateful {...statefulProps} variant="select-card">
      <div ref={ref} className={cn("opal-select-card", padding, rounding)}>
        {children}
      </div>
    </Interactive.Stateful>
  );
}

// ---------------------------------------------------------------------------
// Exports
// ---------------------------------------------------------------------------

export { SelectCard, type SelectCardProps };


================================================
FILE: web/lib/opal/src/components/cards/select-card/styles.css
================================================
/* SelectCard — structural styles; colors handled by Interactive.Stateful */

.opal-select-card {
  @apply w-full overflow-clip border;
}

.opal-select-card[data-interactive-state="selected"] {
  @apply border-action-link-05;
}


================================================
FILE: web/lib/opal/src/components/index.ts
================================================
import "@opal/components/tooltip.css";

/* Shared types */
export type TooltipSide = "top" | "bottom" | "left" | "right";

/* Button */
export {
  Button,
  type ButtonProps,
} from "@opal/components/buttons/button/components";

/* SelectButton */
export {
  SelectButton,
  type SelectButtonProps,
} from "@opal/components/buttons/select-button/components";

/* OpenButton */
export {
  OpenButton,
  type OpenButtonProps,
} from "@opal/components/buttons/open-button/components";

/* FilterButton */
export {
  FilterButton,
  type FilterButtonProps,
} from "@opal/components/buttons/filter-button/components";

/* LineItemButton */
export {
  LineItemButton,
  type LineItemButtonProps,
} from "@opal/components/buttons/line-item-button/components";

/* SidebarTab */
export {
  SidebarTab,
  type SidebarTabProps,
} from "@opal/components/buttons/sidebar-tab/components";

/* Text */
export {
  Text,
  type TextProps,
  type TextFont,
  type TextColor,
} from "@opal/components/text/components";

/* Tag */
export {
  Tag,
  type TagProps,
  type TagColor,
} from "@opal/components/tag/components";

/* Card */
export {
  Card,
  type CardProps,
  type BackgroundVariant,
  type BorderVariant,
} from "@opal/components/cards/card/components";

/* SelectCard */
export {
  SelectCard,
  type SelectCardProps,
} from "@opal/components/cards/select-card/components";

/* EmptyMessageCard */
export {
  EmptyMessageCard,
  type EmptyMessageCardProps,
} from "@opal/components/cards/empty-message-card/components";

/* Pagination */
export {
  Pagination,
  type PaginationProps,
  type PaginationSize,
} from "@opal/components/pagination/components";

/* Table */
export { Table } from "@opal/components/table/components";
export { createTableColumns } from "@opal/components/table/columns";
export type { DataTableProps } from "@opal/components/table/components";


================================================
FILE: web/lib/opal/src/components/pagination/Pagination.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { Pagination } from "@opal/components";
import { useState } from "react";

const meta: Meta<typeof Pagination> = {
  title: "opal/components/Pagination",
  component: Pagination,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Pagination>;

// ===========================================================================
// variant="simple"
// ===========================================================================

export const Simple: Story = {
  args: {
    variant: "simple",
    currentPage: 3,
    totalPages: 10,
  },
};

export const SimpleSmall: Story = {
  args: {
    variant: "simple",
    currentPage: 2,
    totalPages: 8,
    size: "sm",
  },
};

export const SimpleWithUnits: Story = {
  args: {
    variant: "simple",
    currentPage: 1,
    totalPages: 5,
    units: "pages",
  },
};

export const SimpleArrowsOnly: Story = {
  args: {
    variant: "simple",
    currentPage: 2,
    totalPages: 8,
    hidePages: true,
  },
};

export const SimpleAllSizes: Story = {
  render: () => (
    <div className="flex flex-col gap-4 items-start">
      {(["lg", "md", "sm"] as const).map((size) => (
        <div key={size} className="flex flex-col gap-1">
          <span className="font-secondary-body text-text-03">
            size=&quot;{size}&quot;
          </span>
          <Pagination
            variant="simple"
            currentPage={3}
            totalPages={10}
            size={size}
          />
        </div>
      ))}
    </div>
  ),
};

// ===========================================================================
// variant="count"
// ===========================================================================

export const Count: Story = {
  args: {
    variant: "count",
    pageSize: 10,
    totalItems: 95,
    currentPage: 2,
    totalPages: 10,
  },
};

export const CountWithUnits: Story = {
  args: {
    variant: "count",
    pageSize: 25,
    totalItems: 203,
    currentPage: 1,
    totalPages: 9,
    units: "items",
  },
};

export const CountArrowsOnly: Story = {
  args: {
    variant: "count",
    pageSize: 10,
    totalItems: 50,
    currentPage: 2,
    totalPages: 5,
    hidePages: true,
  },
};

export const CountAllSizes: Story = {
  render: () => (
    <div className="flex flex-col gap-4 items-start">
      {(["lg", "md", "sm"] as const).map((size) => (
        <div key={size} className="flex flex-col gap-1">
          <span className="font-secondary-body text-text-03">
            size=&quot;{size}&quot;
          </span>
          <Pagination
            variant="count"
            pageSize={10}
            totalItems={95}
            currentPage={3}
            totalPages={10}
            size={size}
            units="items"
          />
        </div>
      ))}
    </div>
  ),
};

// ===========================================================================
// variant="list" (default)
// ===========================================================================

export const List: Story = {
  args: {
    currentPage: 5,
    totalPages: 20,
    onChange: () => {},
  },
};

export const ListFewPages: Story = {
  args: {
    currentPage: 2,
    totalPages: 4,
    onChange: () => {},
  },
};

export const ListAllSizes: Story = {
  render: () => (
    <div className="flex flex-col gap-4 items-start">
      {(["lg", "md", "sm"] as const).map((size) => (
        <div key={size} className="flex flex-col gap-1">
          <span className="font-secondary-body text-text-03">
            size=&quot;{size}&quot;
          </span>
          <Pagination
            currentPage={3}
            totalPages={10}
            onChange={() => {}}
            size={size}
          />
        </div>
      ))}
    </div>
  ),
};

// ===========================================================================
// Interactive
// ===========================================================================

function InteractiveSimpleDemo() {
  const [page, setPage] = useState(1);
  return (
    <div className="flex flex-col gap-4 items-start">
      <Pagination
        variant="simple"
        currentPage={page}
        totalPages={15}
        onChange={setPage}
        units="pages"
      />
      <span className="font-secondary-body text-text-03">
        Current page: {page}
      </span>
    </div>
  );
}

export const InteractiveSimple: Story = {
  render: () => <InteractiveSimpleDemo />,
};

function InteractiveListDemo() {
  const [page, setPage] = useState(1);
  return (
    <div className="flex flex-col gap-4 items-start">
      <Pagination currentPage={page} totalPages={15} onChange={setPage} />
      <span className="font-secondary-body text-text-03">
        Current page: {page}
      </span>
    </div>
  );
}

export const InteractiveList: Story = {
  render: () => <InteractiveListDemo />,
};

function InteractiveCountDemo() {
  const [page, setPage] = useState(1);
  const pageSize = 10;
  const totalItems = 95;
  const totalPages = Math.ceil(totalItems / pageSize);
  return (
    <div className="flex flex-col gap-4 items-start">
      <Pagination
        variant="count"
        currentPage={page}
        totalPages={totalPages}
        pageSize={pageSize}
        totalItems={totalItems}
        onChange={setPage}
        units="items"
      />
      <span className="font-secondary-body text-text-03">
        Current page: {page}
      </span>
    </div>
  );
}

export const InteractiveCount: Story = {
  render: () => <InteractiveCountDemo />,
};


================================================
FILE: web/lib/opal/src/components/pagination/README.md
================================================
# Pagination

**Import:** `import { Pagination, type PaginationProps } from "@opal/components";`

Page navigation with three display variants and prev/next arrow controls.

## Variants

### `"list"` (default)

Numbered page buttons with ellipsis truncation for large page counts.

```tsx
<Pagination currentPage={3} totalPages={10} onChange={setPage} />
```

### `"simple"`

Compact `currentPage/totalPages` display with prev/next arrows. Can be reduced to just arrows via `hidePages`.

```tsx
// With summary (default)
<Pagination variant="simple" currentPage={1} totalPages={5} onChange={setPage} />

// Arrows only
<Pagination variant="simple" currentPage={1} totalPages={5} onChange={setPage} hidePages />

// With units
<Pagination variant="simple" currentPage={1} totalPages={5} onChange={setPage} units="pages" />
```

### `"count"`

Item-count display (`X~Y of Z`) with prev/next arrows. Designed for table footers.

```tsx
// Basic
<Pagination
  variant="count"
  pageSize={10}
  totalItems={95}
  currentPage={2}
  totalPages={10}
  onChange={setPage}
/>

// With units
<Pagination
  variant="count"
  pageSize={10}
  totalItems={95}
  currentPage={2}
  totalPages={10}
  onChange={setPage}
  units="items"
/>
```

## Props (shared)

| Prop | Type | Default | Description |
|---|---|---|---|
| `variant` | `"list" \| "simple" \| "count"` | `"list"` | Display variant |
| `currentPage` | `number` | **(required)** | 1-based current page number |
| `totalPages` | `number` | **(required)** | Total number of pages |
| `onChange` | `(page: number) => void` | — | Called when the page changes |
| `size` | `PaginationSize` | `"lg"` | Button and text sizing |

## Props (variant-specific)

### `"simple"`

| Prop | Type | Default | Description |
|---|---|---|---|
| `hidePages` | `boolean` | `false` | Hides the `currentPage/totalPages` text between arrows |
| `units` | `string` | — | Label after the summary (e.g. `"pages"`), always 4px spacing |

### `"count"`

| Prop | Type | Default | Description |
|---|---|---|---|
| `pageSize` | `number` | **(required)** | Items per page (for range calculation) |
| `totalItems` | `number` | **(required)** | Total item count |
| `hidePages` | `boolean` | `false` | Hides the current page number between arrows |
| `units` | `string` | — | Label after the total (e.g. `"items"`), always 4px spacing |

### `PaginationSize`

`"lg" | "md" | "sm"`


================================================
FILE: web/lib/opal/src/components/pagination/components.tsx
================================================
"use client";

import { Button } from "@opal/components";
import { Disabled } from "@opal/core";
import { SvgArrowRight, SvgChevronLeft, SvgChevronRight } from "@opal/icons";
import { containerSizeVariants } from "@opal/shared";
import type { RichStr, WithoutStyles } from "@opal/types";
import { Text } from "@opal/components";
import { toPlainString } from "@opal/components/text/InlineMarkdown";
import { cn } from "@opal/utils";
import * as PopoverPrimitive from "@radix-ui/react-popover";
import {
  useState,
  type ChangeEvent,
  type HTMLAttributes,
  type KeyboardEvent,
  type ReactNode,
} from "react";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type PaginationSize = "lg" | "md" | "sm";

/**
 * Compact `currentPage / totalPages` display with prev/next arrows.
 */
interface SimplePaginationProps
  extends Omit<WithoutStyles<HTMLAttributes<HTMLDivElement>>, "onChange"> {
  variant: "simple";
  /** The 1-based current page number. */
  currentPage: number;
  /** Total number of pages. */
  totalPages: number;
  /** Called when the page changes. */
  onChange?: (page: number) => void;
  /** Controls button and text sizing. Default: `"lg"`. */
  size?: PaginationSize;
  /** Hides the `currentPage/totalPages` summary text between arrows. Default: `false`. */
  hidePages?: boolean;
  /** Unit label shown after the summary (e.g. `"pages"`). Always has 4px spacing. */
  units?: string | RichStr;
}

/**
 * Item-count display (`X~Y of Z`) with prev/next arrows.
 * Designed for table footers.
 */
interface CountPaginationProps
  extends Omit<WithoutStyles<HTMLAttributes<HTMLDivElement>>, "onChange"> {
  variant: "count";
  /** The 1-based current page number. */
  currentPage: number;
  /** Total number of pages. */
  totalPages: number;
  /** Number of items displayed per page. Used to compute the visible range. */
  pageSize: number;
  /** Total number of items across all pages. */
  totalItems: number;
  /** Called when the page changes. */
  onChange?: (page: number) => void;
  /** Controls button and text sizing. Default: `"lg"`. */
  size?: PaginationSize;
  /** Hides the current page number between the arrows. Default: `false`. */
  hidePages?: boolean;
  /** Unit label shown after the total count (e.g. `"items"`). Always has 4px spacing. */
  units?: string | RichStr;
}

/**
 * Numbered page buttons with ellipsis truncation for large page counts.
 * This is the default variant.
 */
interface ListPaginationProps
  extends Omit<WithoutStyles<HTMLAttributes<HTMLDivElement>>, "onChange"> {
  variant?: "list";
  /** The 1-based current page number. */
  currentPage: number;
  /** Total number of pages. */
  totalPages: number;
  /** Called when the page changes. */
  onChange: (page: number) => void;
  /** Controls button and text sizing. Default: `"lg"`. */
  size?: PaginationSize;
}

/**
 * Discriminated union of all pagination variants.
 * Use `variant` to select between `"simple"`, `"count"`, and `"list"` (default).
 */
type PaginationProps =
  | SimplePaginationProps
  | CountPaginationProps
  | ListPaginationProps;

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

/**
 * Computes the page numbers to display.
 *
 * - <=7 pages: render all pages individually (no ellipsis).
 * - >7 pages: always render exactly 7 slots (numbers or ellipsis).
 *   First and last page are always shown. Ellipsis takes one slot.
 *
 * Examples for totalPages=20:
 * - page 1:  `1  2  3  4  5  ...  20`
 * - page 4:  `1  2  3  4  5  ...  20`
 * - page 5:  `1  ...  4  5  6  ...  20`
 * - page 16: `1  ...  15  16  17  ...  20`
 * - page 17: `1  ...  16  17  18  19  20`
 * - page 20: `1  ...  16  17  18  19  20`
 */
function getPageNumbers(
  currentPage: number,
  totalPages: number
): (number | string)[] {
  if (totalPages <= 7) {
    const pages: number[] = [];
    for (let i = 1; i <= totalPages; i++) pages.push(i);
    return pages;
  }

  // Always 7 slots. First and last are always page 1 and totalPages.
  // That leaves 5 inner slots.

  // Near the start: no start-ellipsis needed
  // Slots: 1, 2, 3, 4, 5, ..., totalPages
  if (currentPage <= 4) {
    return [1, 2, 3, 4, 5, "end-ellipsis", totalPages];
  }

  // Near the end: no end-ellipsis needed
  // Slots: 1, ..., tp-4, tp-3, tp-2, tp-1, tp
  if (currentPage >= totalPages - 3) {
    return [
      1,
      "start-ellipsis",
      totalPages - 4,
      totalPages - 3,
      totalPages - 2,
      totalPages - 1,
      totalPages,
    ];
  }

  // Middle: both ellipses
  // Slots: 1, ..., cur-1, cur, cur+1, ..., totalPages
  return [
    1,
    "start-ellipsis",
    currentPage - 1,
    currentPage,
    currentPage + 1,
    "end-ellipsis",
    totalPages,
  ];
}

function monoClass(size: PaginationSize): string {
  return size === "sm" ? "font-secondary-mono" : "font-main-ui-mono";
}

function textClasses(size: PaginationSize, style: "mono" | "muted"): string {
  if (style === "mono") return monoClass(size);
  return size === "sm" ? "font-secondary-body" : "font-main-ui-muted";
}

const PAGE_NUMBER_FONT: Record<
  PaginationSize,
  { active: string; inactive: string }
> = {
  lg: {
    active: "font-main-ui-body text-text-04",
    inactive: "font-main-ui-muted text-text-02",
  },
  md: {
    active: "font-secondary-action text-text-04",
    inactive: "font-secondary-body text-text-02",
  },
  sm: {
    active: "font-secondary-action text-text-04",
    inactive: "font-secondary-body text-text-02",
  },
};

// ---------------------------------------------------------------------------
// GoToPagePopup
// ---------------------------------------------------------------------------

interface GoToPagePopupProps {
  totalPages: number;
  onSubmit: (page: number) => void;
  children: ReactNode;
}

function GoToPagePopup({ totalPages, onSubmit, children }: GoToPagePopupProps) {
  const [open, setOpen] = useState(false);
  const [value, setValue] = useState("");

  const parsed = parseInt(value, 10);
  const isValid = !isNaN(parsed) && parsed >= 1 && parsed <= totalPages;

  function handleChange(e: ChangeEvent<HTMLInputElement>) {
    const raw = e.target.value;
    if (raw === "" || /^\d+$/.test(raw)) {
      setValue(raw);
    }
  }

  function handleSubmit() {
    if (!isValid) return;
    onSubmit(parsed);
    setOpen(false);
    setValue("");
  }

  function handleKeyDown(e: KeyboardEvent<HTMLInputElement>) {
    if (e.key === "Enter") {
      handleSubmit();
    }
  }

  return (
    <PopoverPrimitive.Root
      open={open}
      onOpenChange={(next) => {
        setOpen(next);
        if (!next) setValue("");
      }}
    >
      <PopoverPrimitive.Trigger asChild>{children}</PopoverPrimitive.Trigger>
      <PopoverPrimitive.Portal>
        <PopoverPrimitive.Content
          className={cn(
            "flex items-center gap-1 p-1",
            "bg-background-neutral-00 rounded-12 border border-border-01 shadow-md z-popover",
            "data-[state=open]:animate-in data-[state=closed]:animate-out",
            "data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",
            "data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95"
          )}
          sideOffset={4}
        >
          {/* TODO(@raunakab): migrate this input to the opal Input component once inputs have been migrated into Opal */}
          <input
            type="text"
            inputMode="numeric"
            value={value}
            onChange={handleChange}
            onKeyDown={handleKeyDown}
            placeholder="Go to page"
            autoFocus
            className={cn(
              "w-[7rem] bg-transparent px-1.5 py-1 rounded-08",
              containerSizeVariants.lg.height,
              "border border-border-02 focus:outline-none focus:border-border-04",
              "font-main-ui-body",
              "text-text-04 placeholder:text-text-02"
            )}
          />
          <Disabled disabled={!isValid}>
            <Button
              icon={SvgArrowRight}
              size="lg"
              onClick={handleSubmit}
              tooltip="Go to page"
            />
          </Disabled>
        </PopoverPrimitive.Content>
      </PopoverPrimitive.Portal>
    </PopoverPrimitive.Root>
  );
}

// ---------------------------------------------------------------------------
// Nav buttons (shared across all variants)
// ---------------------------------------------------------------------------

interface NavButtonsProps {
  currentPage: number;
  totalPages: number;
  onChange: (page: number) => void;
  size: PaginationSize;
  children?: ReactNode;
}

function NavButtons({
  currentPage,
  totalPages,
  onChange,
  size,
  children,
}: NavButtonsProps) {
  return (
    <>
      <Disabled disabled={currentPage <= 1}>
        <Button
          icon={SvgChevronLeft}
          onClick={() => onChange(Math.max(1, currentPage - 1))}
          size={size}
          prominence="tertiary"
          tooltip="Previous page"
        />
      </Disabled>
      {children}
      <Disabled disabled={currentPage >= totalPages}>
        <Button
          icon={SvgChevronRight}
          onClick={() => onChange(Math.min(totalPages, currentPage + 1))}
          size={size}
          prominence="tertiary"
          tooltip="Next page"
        />
      </Disabled>
    </>
  );
}

// ---------------------------------------------------------------------------
// PaginationSimple
// ---------------------------------------------------------------------------

function PaginationSimple({
  currentPage,
  totalPages,
  onChange,
  size = "lg",
  hidePages = false,
  units,
  ...props
}: SimplePaginationProps) {
  const handleChange = (page: number) => onChange?.(page);

  const label = `${currentPage}/${totalPages}${
    units ? ` ${toPlainString(units)}` : ""
  }`;

  return (
    <div {...props} className="flex items-center">
      <NavButtons
        currentPage={currentPage}
        totalPages={totalPages}
        onChange={handleChange}
        size={size}
      >
        {!hidePages && (
          <GoToPagePopup totalPages={totalPages} onSubmit={handleChange}>
            <Button size={size} prominence="tertiary">
              {label}
            </Button>
          </GoToPagePopup>
        )}
      </NavButtons>
    </div>
  );
}

// ---------------------------------------------------------------------------
// PaginationCount
// ---------------------------------------------------------------------------

function PaginationCount({
  pageSize,
  totalItems,
  currentPage,
  totalPages,
  onChange,
  size = "lg",
  hidePages = false,
  units,
  ...props
}: CountPaginationProps) {
  const handleChange = (page: number) => onChange?.(page);
  const rangeStart = totalItems === 0 ? 0 : (currentPage - 1) * pageSize + 1;
  const rangeEnd = Math.min(currentPage * pageSize, totalItems);

  return (
    <div {...props} className="flex items-center gap-1">
      {/* Summary: range of total [units] */}
      <span
        className={cn(
          "inline-flex items-center gap-1",
          monoClass(size),
          "text-text-03"
        )}
      >
        {rangeStart}~{rangeEnd}
        <span className={textClasses(size, "muted")}>of</span>
        {totalItems}
        {units && (
          <Text
            color="inherit"
            font={size === "sm" ? "secondary-body" : "main-ui-muted"}
          >
            {units}
          </Text>
        )}
      </span>

      {/* Buttons: < [page] > */}
      <div className="flex items-center">
        <NavButtons
          currentPage={currentPage}
          totalPages={totalPages}
          onChange={handleChange}
          size={size}
        >
          {!hidePages && (
            <GoToPagePopup totalPages={totalPages} onSubmit={handleChange}>
              <Button size={size} prominence="tertiary">
                {String(currentPage)}
              </Button>
            </GoToPagePopup>
          )}
        </NavButtons>
      </div>
    </div>
  );
}

// ---------------------------------------------------------------------------
// PaginationList (default)
// ---------------------------------------------------------------------------

function PaginationList({
  currentPage,
  totalPages,
  onChange,
  size = "lg",
  ...props
}: ListPaginationProps) {
  const pageNumbers = getPageNumbers(currentPage, totalPages);
  const fonts = PAGE_NUMBER_FONT[size];

  return (
    <div {...props} className="flex items-center gap-1">
      <NavButtons
        currentPage={currentPage}
        totalPages={totalPages}
        onChange={onChange}
        size={size}
      >
        <div className="flex items-center">
          {pageNumbers.map((page) => {
            if (typeof page === "string") {
              return (
                <GoToPagePopup
                  key={page}
                  totalPages={totalPages}
                  onSubmit={onChange}
                >
                  <Button
                    size={size}
                    prominence="tertiary"
                    icon={({ className: iconClassName }) => (
                      <div
                        className={cn(
                          iconClassName,
                          "flex flex-col justify-center",
                          fonts.inactive
                        )}
                      >
                        ...
                      </div>
                    )}
                  />
                </GoToPagePopup>
              );
            }

            const isActive = page === currentPage;

            return (
              <Button
                key={page}
                onClick={() => onChange(page)}
                size={size}
                prominence="tertiary"
                interaction={isActive ? "hover" : "rest"}
                icon={({ className: iconClassName }) => (
                  <div
                    className={cn(
                      iconClassName,
                      "flex flex-col justify-center",
                      isActive ? fonts.active : fonts.inactive
                    )}
                  >
                    {page}
                  </div>
                )}
              />
            );
          })}
        </div>
      </NavButtons>
    </div>
  );
}

// ---------------------------------------------------------------------------
// Pagination (entry point)
// ---------------------------------------------------------------------------

/**
 * Page navigation component with three variants:
 *
 * - `"list"` (default) — Numbered page buttons with ellipsis truncation.
 * - `"simple"` — Compact `currentPage / totalPages` with prev/next arrows.
 * - `"count"` — Item-count display (`X~Y of Z`) with prev/next arrows.
 *
 * All variants include a "go to page" popup activated by clicking on the
 * page indicator (simple/count) or the ellipsis (list).
 *
 * @example
 * ```tsx
 * // List (default)
 * <Pagination currentPage={3} totalPages={10} onChange={setPage} />
 *
 * // Simple
 * <Pagination variant="simple" currentPage={1} totalPages={5} onChange={setPage} />
 *
 * // Count
 * <Pagination variant="count" pageSize={10} totalItems={95} currentPage={2} totalPages={10} onChange={setPage} />
 * ```
 */
function Pagination(props: PaginationProps) {
  const normalized = {
    ...props,
    totalPages: Math.max(1, props.totalPages),
    currentPage: Math.max(
      1,
      Math.min(props.currentPage, Math.max(1, props.totalPages))
    ),
  };
  const variant = normalized.variant ?? "list";
  switch (variant) {
    case "simple":
      return <PaginationSimple {...(normalized as SimplePaginationProps)} />;
    case "count":
      return <PaginationCount {...(normalized as CountPaginationProps)} />;
    case "list":
      return <PaginationList {...(normalized as ListPaginationProps)} />;
  }
}

export { Pagination, type PaginationProps, type PaginationSize };


================================================
FILE: web/lib/opal/src/components/table/ActionsContainer.tsx
================================================
"use client";

import { useTableSize } from "@opal/components/table/TableSizeContext";

interface ActionsContainerProps {
  type: "head" | "cell";
  /** Pass-through click handler (e.g. stopPropagation on body cells). */
  onClick?: (e: React.MouseEvent) => void;
  children: React.ReactNode;
}

export default function ActionsContainer({
  type,
  children,
  onClick,
}: ActionsContainerProps) {
  const size = useTableSize();
  const Tag = type === "head" ? "th" : "td";

  return (
    <Tag
      className="tbl-actions"
      data-type={type}
      data-size={size}
      onClick={onClick}
    >
      <div className="flex h-full items-center justify-end">{children}</div>
    </Tag>
  );
}


================================================
FILE: web/lib/opal/src/components/table/ColumnSortabilityPopover.tsx
================================================
"use client";

import { useState } from "react";
import {
  type Table,
  type ColumnDef,
  type RowData,
  type SortingState,
} from "@tanstack/react-table";
import { Button, LineItemButton } from "@opal/components";
import { useTableSize } from "@opal/components/table/TableSizeContext";
import { SvgArrowUpDown, SvgSortOrder, SvgCheck } from "@opal/icons";
import Popover from "@/refresh-components/Popover";
import Divider from "@/refresh-components/Divider";
import Text from "@/refresh-components/texts/Text";

// ---------------------------------------------------------------------------
// Popover UI
// ---------------------------------------------------------------------------

interface SortingPopoverProps<TData extends RowData = RowData> {
  table: Table<TData>;
  sorting: SortingState;
  footerText?: string;
  ascendingLabel?: string;
  descendingLabel?: string;
}

function SortingPopover<TData extends RowData>({
  table,
  sorting,
  footerText,
  ascendingLabel = "Ascending",
  descendingLabel = "Descending",
}: SortingPopoverProps<TData>) {
  const size = useTableSize();
  const [open, setOpen] = useState(false);
  const sortableColumns = table
    .getAllLeafColumns()
    .filter((col) => col.getCanSort());

  const currentSort = sorting[0] ?? null;

  return (
    <Popover open={open} onOpenChange={setOpen}>
      <Popover.Trigger asChild>
        <Button
          icon={currentSort === null ? SvgArrowUpDown : SvgSortOrder}
          interaction={open ? "hover" : "rest"}
          size={size === "md" ? "sm" : "md"}
          prominence="tertiary"
          tooltip="Sort"
        />
      </Popover.Trigger>

      <Popover.Content width="lg" align="end" side="bottom">
        <Popover.Menu
          footer={
            footerText ? (
              <div className="px-2 py-1">
                <Text secondaryBody text03>
                  {footerText}
                </Text>
              </div>
            ) : undefined
          }
        >
          <Divider showTitle text="Sort by" />

          <LineItemButton
            selectVariant="select-heavy"
            state={currentSort === null ? "selected" : "empty"}
            title="Manual Ordering"
            sizePreset="main-ui"
            rightChildren={
              currentSort === null ? (
                <SvgCheck size={16} className="text-action-link-05" />
              ) : undefined
            }
            onClick={() => {
              table.resetSorting();
            }}
          />

          {sortableColumns.map((column) => {
            const isSorted = currentSort?.id === column.id;
            const label =
              typeof column.columnDef.header === "string"
                ? column.columnDef.header
                : column.id;

            return (
              <LineItemButton
                key={column.id}
                selectVariant="select-heavy"
                state={isSorted ? "selected" : "empty"}
                title={label}
                sizePreset="main-ui"
                rightChildren={
                  isSorted ? (
                    <SvgCheck size={16} className="text-action-link-05" />
                  ) : undefined
                }
                onClick={() => {
                  if (isSorted) {
                    table.resetSorting();
                    return;
                  }
                  column.toggleSorting(false);
                }}
              />
            );
          })}

          {currentSort !== null && (
            <>
              <Divider showTitle text="Sorting Order" />

              <LineItemButton
                selectVariant="select-heavy"
                state={!currentSort.desc ? "selected" : "empty"}
                title={ascendingLabel}
                sizePreset="main-ui"
                rightChildren={
                  !currentSort.desc ? (
                    <SvgCheck size={16} className="text-action-link-05" />
                  ) : undefined
                }
                onClick={() => {
                  table.setSorting([{ id: currentSort.id, desc: false }]);
                }}
              />

              <LineItemButton
                selectVariant="select-heavy"
                state={currentSort.desc ? "selected" : "empty"}
                title={descendingLabel}
                sizePreset="main-ui"
                rightChildren={
                  currentSort.desc ? (
                    <SvgCheck size={16} className="text-action-link-05" />
                  ) : undefined
                }
                onClick={() => {
                  table.setSorting([{ id: currentSort.id, desc: true }]);
                }}
              />
            </>
          )}
        </Popover.Menu>
      </Popover.Content>
    </Popover>
  );
}

// ---------------------------------------------------------------------------
// Column definition factory
// ---------------------------------------------------------------------------

interface CreateSortingColumnOptions {
  footerText?: string;
  ascendingLabel?: string;
  descendingLabel?: string;
}

function createSortingColumn<TData>(
  options?: CreateSortingColumnOptions
): ColumnDef<TData, unknown> {
  return {
    id: "__sorting",
    size: 44,
    enableHiding: false,
    enableSorting: false,
    enableResizing: false,
    header: ({ table }) => (
      <SortingPopover
        table={table}
        sorting={table.getState().sorting}
        footerText={options?.footerText}
        ascendingLabel={options?.ascendingLabel}
        descendingLabel={options?.descendingLabel}
      />
    ),
    cell: () => null,
  };
}

export { SortingPopover, createSortingColumn };


================================================
FILE: web/lib/opal/src/components/table/ColumnVisibilityPopover.tsx
================================================
"use client";

import { useState } from "react";
import {
  type Table,
  type ColumnDef,
  type RowData,
  type VisibilityState,
} from "@tanstack/react-table";
import { Button, LineItemButton, Tag } from "@opal/components";
import { useTableSize } from "@opal/components/table/TableSizeContext";
import { SvgColumn, SvgCheck } from "@opal/icons";
import Popover from "@/refresh-components/Popover";
import Divider from "@/refresh-components/Divider";

// ---------------------------------------------------------------------------
// Popover UI
// ---------------------------------------------------------------------------

interface ColumnVisibilityPopoverProps<TData extends RowData = RowData> {
  table: Table<TData>;
  columnVisibility: VisibilityState;
}

function ColumnVisibilityPopover<TData extends RowData>({
  table,
  columnVisibility,
}: ColumnVisibilityPopoverProps<TData>) {
  const size = useTableSize();
  const [open, setOpen] = useState(false);

  // User-defined columns only (exclude internal qualifier/actions)
  const dataColumns = table
    .getAllLeafColumns()
    .filter(
      (col) =>
        !col.id.startsWith("__") &&
        col.id !== "qualifier" &&
        typeof col.columnDef.header === "string" &&
        col.columnDef.header.trim() !== ""
    );

  return (
    <Popover open={open} onOpenChange={setOpen}>
      <Popover.Trigger asChild>
        <Button
          icon={SvgColumn}
          interaction={open ? "hover" : "rest"}
          size={size === "md" ? "sm" : "md"}
          prominence="tertiary"
          tooltip="Columns"
        />
      </Popover.Trigger>

      <Popover.Content width="lg" align="end" side="bottom">
        <Divider showTitle text="Shown Columns" />
        <Popover.Menu>
          {dataColumns.map((column) => {
            const canHide = column.getCanHide();
            const isVisible = columnVisibility[column.id] !== false;
            const label =
              typeof column.columnDef.header === "string"
                ? column.columnDef.header
                : column.id;

            return (
              <LineItemButton
                key={column.id}
                selectVariant="select-heavy"
                state={isVisible ? "selected" : "empty"}
                title={label}
                sizePreset="main-ui"
                rightChildren={
                  !canHide ? (
                    <div className="flex items-center">
                      <Tag title="Always Shown" color="blue" />
                    </div>
                  ) : isVisible ? (
                    <SvgCheck size={16} className="text-action-link-05" />
                  ) : undefined
                }
                onClick={canHide ? () => column.toggleVisibility() : undefined}
              />
            );
          })}
        </Popover.Menu>
      </Popover.Content>
    </Popover>
  );
}

// ---------------------------------------------------------------------------
// Column definition factory
// ---------------------------------------------------------------------------

function createColumnVisibilityColumn<TData>(): ColumnDef<TData, unknown> {
  return {
    id: "__columnVisibility",
    size: 44,
    enableHiding: false,
    enableSorting: false,
    enableResizing: false,
    header: ({ table }) => (
      <ColumnVisibilityPopover
        table={table}
        columnVisibility={table.getState().columnVisibility}
      />
    ),
    cell: () => null,
  };
}

export { ColumnVisibilityPopover, createColumnVisibilityColumn };


================================================
FILE: web/lib/opal/src/components/table/DragOverlayRow.tsx
================================================
import { memo } from "react";
import { type Row, flexRender } from "@tanstack/react-table";
import TableRow from "@opal/components/table/TableRow";
import TableCell from "@opal/components/table/TableCell";
import QualifierContainer from "@opal/components/table/QualifierContainer";
import TableQualifier from "@opal/components/table/TableQualifier";
import ActionsContainer from "@opal/components/table/ActionsContainer";
import type {
  OnyxColumnDef,
  OnyxQualifierColumn,
} from "@opal/components/table/types";

interface DragOverlayRowProps<TData> {
  row: Row<TData>;
  columnWidths?: Record<string, number>;
  columnKindMap?: Map<string, OnyxColumnDef<TData>>;
  qualifierColumn?: OnyxQualifierColumn<TData> | null;
  isSelectable?: boolean;
}

function DragOverlayRowInner<TData>({
  row,
  columnWidths,
  columnKindMap,
  qualifierColumn,
  isSelectable = false,
}: DragOverlayRowProps<TData>) {
  const tableWidth = columnWidths
    ? Object.values(columnWidths).reduce((sum, w) => sum + w, 0)
    : undefined;

  return (
    <table
      className="border-collapse"
      style={{
        tableLayout: "fixed",
        ...(tableWidth != null ? { width: tableWidth } : { minWidth: "100%" }),
      }}
    >
      {columnWidths && (
        <colgroup>
          {row.getVisibleCells().map((cell) => (
            <col
              key={cell.column.id}
              style={{ width: columnWidths[cell.column.id] }}
            />
          ))}
        </colgroup>
      )}
      <tbody>
        <TableRow selected={row.getIsSelected()}>
          {row.getVisibleCells().map((cell) => {
            const colDef = columnKindMap?.get(cell.column.id);

            if (colDef?.kind === "qualifier" && qualifierColumn) {
              return (
                <QualifierContainer key={cell.id} type="cell">
                  <TableQualifier
                    content={qualifierColumn.content}
                    icon={qualifierColumn.getContent?.(row.original)}
                    imageSrc={qualifierColumn.getImageSrc?.(row.original)}
                    imageAlt={qualifierColumn.getImageAlt?.(row.original)}
                    background={qualifierColumn.background}
                    iconSize={qualifierColumn.iconSize}
                    selectable={isSelectable}
                    selected={isSelectable && row.getIsSelected()}
                  />
                </QualifierContainer>
              );
            }

            if (colDef?.kind === "actions") {
              return (
                <ActionsContainer key={cell.id} type="cell">
                  {flexRender(cell.column.columnDef.cell, cell.getContext())}
                </ActionsContainer>
              );
            }

            return (
              <TableCell key={cell.id}>
                {flexRender(cell.column.columnDef.cell, cell.getContext())}
              </TableCell>
            );
          })}
        </TableRow>
      </tbody>
    </table>
  );
}

const DragOverlayRow = memo(DragOverlayRowInner) as typeof DragOverlayRowInner;

export default DragOverlayRow;
export type { DragOverlayRowProps };


================================================
FILE: web/lib/opal/src/components/table/Footer.tsx
================================================
"use client";

import { Button, Pagination, SelectButton } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { useTableSize } from "@opal/components/table/TableSizeContext";
import { SvgEye, SvgXCircle } from "@opal/icons";
import type { ReactNode } from "react";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type SelectionState = "none" | "partial" | "all";

/**
 * Footer mode for tables with selectable rows.
 * Displays a selection message on the left (with optional view/clear actions)
 * and a `count`-type pagination on the right.
 */
interface FooterSelectionModeProps {
  mode: "selection";
  /** Whether the table supports selecting multiple rows. */
  multiSelect: boolean;
  /** Current selection state: `"none"`, `"partial"`, or `"all"`. */
  selectionState: SelectionState;
  /** Number of currently selected items. */
  selectedCount: number;
  /** Toggle view-filter on/off. */
  onView?: () => void;
  /** Whether the view-filter is currently active. */
  isViewingSelected?: boolean;
  /** Clears all selections. */
  onClear?: () => void;
  /** Number of items displayed per page. */
  pageSize: number;
  /** Total number of items across all pages. */
  totalItems: number;
  /** The 1-based current page number. */
  currentPage: number;
  /** Total number of pages. */
  totalPages: number;
  /** Called when the user navigates to a different page. */
  onPageChange: (page: number) => void;
  /** Unit label for count pagination. @default "items" */
  units?: string;
}

/**
 * Footer mode for read-only tables (no row selection).
 * Displays "Showing X~Y of Z" on the left and a `list`-type pagination
 * on the right.
 */
interface FooterSummaryModeProps {
  mode: "summary";
  /** First item number in the current page (e.g. `1`). */
  rangeStart: number;
  /** Last item number in the current page (e.g. `25`). */
  rangeEnd: number;
  /** Total number of items across all pages. */
  totalItems: number;
  /** The 1-based current page number. */
  currentPage: number;
  /** Total number of pages. */
  totalPages: number;
  /** Called when the user navigates to a different page. */
  onPageChange: (page: number) => void;
  /** Optional extra element rendered after the summary text (e.g. a download icon). */
  leftExtra?: ReactNode;
  /** Unit label for the summary text, e.g. "users". */
  units?: string;
}

/**
 * Discriminated union of footer modes.
 * Use `mode: "selection"` for tables with selectable rows, or
 * `mode: "summary"` for read-only tables.
 */
export type FooterProps = FooterSelectionModeProps | FooterSummaryModeProps;

// ---------------------------------------------------------------------------
// Footer
// ---------------------------------------------------------------------------

function getSelectionMessage(
  state: SelectionState,
  multi: boolean,
  count: number,
  isViewingSelected: boolean
): string {
  if (state === "none" && !isViewingSelected) {
    return multi ? "Select items to continue" : "Select an item to continue";
  }
  if (!multi) return "Item selected";
  return `${count} item${count !== 1 ? "s" : ""} selected`;
}

/**
 * Table footer combining status information on the left with pagination on the
 * right. Use `mode: "selection"` for tables with selectable rows, or
 * `mode: "summary"` for read-only tables.
 */
export default function Footer(props: FooterProps) {
  const resolvedSize = useTableSize();
  const isSmall = resolvedSize === "md";
  return (
    <div
      className="table-footer flex w-full items-center justify-between border-t border-border-01"
      data-size={resolvedSize}
    >
      {/* Left side */}
      <div className="flex items-center gap-1 px-1">
        {props.mode === "selection" ? (
          <SelectionLeft
            selectionState={props.selectionState}
            multiSelect={props.multiSelect}
            selectedCount={props.selectedCount}
            onView={props.onView}
            isViewingSelected={props.isViewingSelected}
            onClear={props.onClear}
            isSmall={isSmall}
          />
        ) : (
          <>
            <SummaryLeft
              rangeStart={props.rangeStart}
              rangeEnd={props.rangeEnd}
              totalItems={props.totalItems}
              units={props.units}
              isSmall={isSmall}
            />
            {props.leftExtra}
          </>
        )}
      </div>

      {/* Right side */}
      <div className="flex items-center gap-2 px-1 py-2">
        {props.mode === "selection" ? (
          <Pagination
            variant="count"
            pageSize={props.pageSize}
            totalItems={props.totalItems}
            currentPage={props.currentPage}
            totalPages={props.totalPages}
            onChange={props.onPageChange}
            units={props.units}
            size={isSmall ? "sm" : "md"}
          />
        ) : (
          <Pagination
            currentPage={props.currentPage}
            totalPages={props.totalPages}
            onChange={props.onPageChange}
            size={isSmall ? "md" : "lg"}
          />
        )}
      </div>
    </div>
  );
}

// ---------------------------------------------------------------------------
// Footer — left-side content
// ---------------------------------------------------------------------------

interface SelectionLeftProps {
  selectionState: SelectionState;
  multiSelect: boolean;
  selectedCount: number;
  onView?: () => void;
  isViewingSelected?: boolean;
  onClear?: () => void;
  isSmall: boolean;
}

function SelectionLeft({
  selectionState,
  multiSelect,
  selectedCount,
  onView,
  isViewingSelected = false,
  onClear,
  isSmall,
}: SelectionLeftProps) {
  const message = getSelectionMessage(
    selectionState,
    multiSelect,
    selectedCount,
    isViewingSelected
  );
  const hasSelection = selectionState !== "none";
  // Show buttons when items are selected OR when the view filter is active
  const showActions = hasSelection || isViewingSelected;

  return (
    <div className="flex flex-row gap-1 items-center justify-center w-fit flex-shrink-0 h-fit px-1">
      {isSmall ? (
        <Text
          secondaryAction={hasSelection}
          secondaryBody={!hasSelection}
          text03
        >
          {message}
        </Text>
      ) : (
        <Text mainUiBody={hasSelection} mainUiMuted={!hasSelection} text03>
          {message}
        </Text>
      )}

      {showActions && (
        <div className="flex flex-row items-center w-fit flex-shrink-0 h-fit">
          {onView && (
            <SelectButton
              icon={SvgEye}
              state={isViewingSelected ? "selected" : "empty"}
              onClick={onView}
              tooltip="View selected"
              size={isSmall ? "sm" : "md"}
            />
          )}
          {onClear && (
            <Button
              icon={SvgXCircle}
              onClick={onClear}
              tooltip="Deselect all"
              size={isSmall ? "sm" : "md"}
              prominence="tertiary"
            />
          )}
        </div>
      )}
    </div>
  );
}

interface SummaryLeftProps {
  rangeStart: number;
  rangeEnd: number;
  totalItems: number;
  units?: string;
  isSmall: boolean;
}

function SummaryLeft({
  rangeStart,
  rangeEnd,
  totalItems,
  units,
  isSmall,
}: SummaryLeftProps) {
  const suffix = units ? ` ${units}` : "";
  return (
    <div className="flex flex-row gap-1 items-center w-fit h-fit px-1">
      {isSmall ? (
        <Text secondaryBody text03>
          Showing{" "}
          <Text as="span" secondaryMono text03>
            {rangeStart}~{rangeEnd}
          </Text>{" "}
          of{" "}
          <Text as="span" secondaryMono text03>
            {totalItems}
          </Text>
          {suffix}
        </Text>
      ) : (
        <Text mainUiMuted text03>
          Showing{" "}
          <Text as="span" mainUiMono text03>
            {rangeStart}~{rangeEnd}
          </Text>{" "}
          of{" "}
          <Text as="span" mainUiMono text03>
            {totalItems}
          </Text>
          {suffix}
        </Text>
      )}
    </div>
  );
}


================================================
FILE: web/lib/opal/src/components/table/QualifierContainer.tsx
================================================
"use client";

import { useTableSize } from "@opal/components/table/TableSizeContext";

interface QualifierContainerProps {
  type: "head" | "cell";
  children?: React.ReactNode;
  /** Pass-through click handler (e.g. stopPropagation on body cells). */
  onClick?: (e: React.MouseEvent) => void;
}

export default function QualifierContainer({
  type,
  children,
  onClick,
}: QualifierContainerProps) {
  const resolvedSize = useTableSize();

  const Tag = type === "head" ? "th" : "td";

  return (
    <Tag
      className="tbl-qualifier"
      data-type={type}
      data-size={resolvedSize}
      onClick={onClick}
    >
      <div className="flex h-full items-center justify-center">{children}</div>
    </Tag>
  );
}


================================================
FILE: web/lib/opal/src/components/table/README.md
================================================
# Table

Config-driven table component with sorting, pagination, column visibility,
row selection, drag-and-drop reordering, and server-side mode.

## Usage

```tsx
import { Table, createTableColumns } from "@opal/components";
import { SvgUser } from "@opal/icons";

interface User {
  id: string;
  email: string;
  name: string | null;
  status: "active" | "invited";
}

const tc = createTableColumns<User>();

const columns = [
  tc.qualifier({ content: "icon", getContent: () => SvgUser }),
  tc.column("email", {
    header: "Name",
    weight: 22,
    cell: (email, row) => <span>{row.name ?? email}</span>,
  }),
  tc.column("status", {
    header: "Status",
    weight: 14,
    cell: (status) => <span>{status}</span>,
  }),
  tc.actions(),
];

function UsersTable({ users }: { users: User[] }) {
  return (
    <Table
      data={users}
      columns={columns}
      getRowId={(r) => r.id}
      pageSize={10}
      footer={{}}
    />
  );
}
```

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `data` | `TData[]` | required | Row data array |
| `columns` | `OnyxColumnDef<TData>[]` | required | Column definitions from `createTableColumns()` |
| `getRowId` | `(row: TData) => string` | required | Unique row identifier |
| `pageSize` | `number` | `10` | Rows per page (`Infinity` disables pagination) |
| `size` | `"md" \| "lg"` | `"lg"` | Density variant |
| `footer` | `DataTableFooterConfig` | — | Footer configuration (mode is derived from `selectionBehavior`) |
| `initialSorting` | `SortingState` | — | Initial sort state |
| `initialColumnVisibility` | `VisibilityState` | — | Initial column visibility |
| `draggable` | `DataTableDraggableConfig` | — | Enable drag-and-drop reordering |
| `onSelectionChange` | `(ids: string[]) => void` | — | Selection callback |
| `onRowClick` | `(row: TData) => void` | — | Row click handler |
| `searchTerm` | `string` | — | Global text filter |
| `height` | `number \| string` | — | Max scrollable height |
| `serverSide` | `ServerSideConfig` | — | Server-side pagination/sorting/filtering |
| `emptyState` | `ReactNode` | — | Empty state content |

## Column Builder

`createTableColumns<TData>()` returns a builder with:

- `tc.qualifier(opts)` — leading avatar/icon/checkbox column
- `tc.column(accessor, opts)` — data column with sorting/resizing
- `tc.displayColumn(opts)` — non-accessor custom column
- `tc.actions(opts)` — trailing actions column with visibility/sorting popovers

## Footer

The footer mode is derived automatically from `selectionBehavior`:
- **Selection footer** (when `selectionBehavior` is `"single-select"` or `"multi-select"`) — shows selection count, optional view/clear buttons, count pagination
- **Summary footer** (when `selectionBehavior` is `"no-select"` or omitted) — shows "Showing X\~Y of Z", list pagination, optional extra element


================================================
FILE: web/lib/opal/src/components/table/Table.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { Table, createTableColumns } from "@opal/components";
import { SvgUser } from "@opal/icons";

// ---------------------------------------------------------------------------
// Sample data
// ---------------------------------------------------------------------------

interface User {
  id: string;
  email: string;
  name: string;
  role: "admin" | "user" | "viewer";
  status: "active" | "invited" | "inactive";
}

const USERS: User[] = [
  {
    id: "1",
    email: "alice@example.com",
    name: "Alice Johnson",
    role: "admin",
    status: "active",
  },
  {
    id: "2",
    email: "bob@example.com",
    name: "Bob Smith",
    role: "user",
    status: "active",
  },
  {
    id: "3",
    email: "carol@example.com",
    name: "Carol White",
    role: "viewer",
    status: "invited",
  },
  {
    id: "4",
    email: "dave@example.com",
    name: "Dave Brown",
    role: "user",
    status: "inactive",
  },
  {
    id: "5",
    email: "eve@example.com",
    name: "Eve Davis",
    role: "admin",
    status: "active",
  },
  {
    id: "6",
    email: "frank@example.com",
    name: "Frank Miller",
    role: "viewer",
    status: "active",
  },
  {
    id: "7",
    email: "grace@example.com",
    name: "Grace Lee",
    role: "user",
    status: "invited",
  },
  {
    id: "8",
    email: "hank@example.com",
    name: "Hank Wilson",
    role: "user",
    status: "active",
  },
  {
    id: "9",
    email: "iris@example.com",
    name: "Iris Taylor",
    role: "viewer",
    status: "active",
  },
  {
    id: "10",
    email: "jack@example.com",
    name: "Jack Moore",
    role: "admin",
    status: "active",
  },
  {
    id: "11",
    email: "kate@example.com",
    name: "Kate Anderson",
    role: "user",
    status: "inactive",
  },
  {
    id: "12",
    email: "leo@example.com",
    name: "Leo Thomas",
    role: "viewer",
    status: "active",
  },
];

// ---------------------------------------------------------------------------
// Columns
// ---------------------------------------------------------------------------

const tc = createTableColumns<User>();

const columns = [
  tc.qualifier({
    content: "icon",
    getContent: () => SvgUser,
    background: true,
  }),
  tc.column("name", { header: "Name", weight: 25 }),
  tc.column("email", { header: "Email", weight: 30 }),
  tc.column("role", { header: "Role", weight: 15 }),
  tc.column("status", { header: "Status", weight: 15 }),
  tc.actions(),
];

// ---------------------------------------------------------------------------
// Story
// ---------------------------------------------------------------------------

const meta: Meta<typeof Table> = {
  title: "opal/components/Table",
  component: Table,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Table>;

export const Default: Story = {
  render: () => (
    <Table
      data={USERS}
      columns={columns}
      getRowId={(r) => r.id}
      pageSize={8}
      footer={{}}
    />
  ),
};


================================================
FILE: web/lib/opal/src/components/table/TableBody.tsx
================================================
"use client";

import type { ReactNode } from "react";
import {
  DndContext,
  DragOverlay,
  type DragStartEvent,
  type DragEndEvent,
  type CollisionDetection,
  type Modifier,
  type SensorDescriptor,
  type SensorOptions,
} from "@dnd-kit/core";
import {
  SortableContext,
  verticalListSortingStrategy,
} from "@dnd-kit/sortable";
import type { WithoutStyles } from "@/types";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface DraggableProps {
  dndContextProps: {
    sensors: SensorDescriptor<SensorOptions>[];
    collisionDetection: CollisionDetection;
    modifiers: Modifier[];
    onDragStart: (event: DragStartEvent) => void;
    onDragEnd: (event: DragEndEvent) => void;
    onDragCancel: () => void;
  };
  sortableItems: string[];
  activeId: string | null;
  isEnabled: boolean;
}

interface TableBodyProps
  extends WithoutStyles<React.HTMLAttributes<HTMLTableSectionElement>> {
  ref?: React.Ref<HTMLTableSectionElement>;
  /** DnD context props from useDraggableRows — enables drag-and-drop reordering */
  dndSortable?: DraggableProps;
  /** Render function for the drag overlay row */
  renderDragOverlay?: (activeId: string) => ReactNode;
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

function TableBody({
  ref,
  dndSortable,
  renderDragOverlay,
  ...props
}: TableBodyProps) {
  if (dndSortable?.isEnabled) {
    const { dndContextProps, sortableItems, activeId } = dndSortable;
    return (
      <DndContext
        sensors={dndContextProps.sensors}
        collisionDetection={dndContextProps.collisionDetection}
        modifiers={dndContextProps.modifiers}
        onDragStart={dndContextProps.onDragStart}
        onDragEnd={dndContextProps.onDragEnd}
        onDragCancel={dndContextProps.onDragCancel}
      >
        <SortableContext
          items={sortableItems}
          strategy={verticalListSortingStrategy}
        >
          <tbody ref={ref} {...props} />
        </SortableContext>
        <DragOverlay dropAnimation={null}>
          {activeId && renderDragOverlay ? renderDragOverlay(activeId) : null}
        </DragOverlay>
      </DndContext>
    );
  }

  return <tbody ref={ref} {...props} />;
}

export default TableBody;
export type { TableBodyProps, DraggableProps };


================================================
FILE: web/lib/opal/src/components/table/TableCell.tsx
================================================
import { cn } from "@opal/utils";
import { useTableSize } from "@opal/components/table/TableSizeContext";
import type { WithoutStyles } from "@/types";

interface TableCellProps
  extends WithoutStyles<React.TdHTMLAttributes<HTMLTableCellElement>> {
  children: React.ReactNode;
  /** Explicit pixel width for the cell. */
  width?: number;
}

export default function TableCell({
  width,
  children,
  ...props
}: TableCellProps) {
  const resolvedSize = useTableSize();
  return (
    <td
      className="tbl-cell overflow-hidden"
      data-size={resolvedSize}
      style={width != null ? { width } : undefined}
      {...props}
    >
      <div
        className={cn("tbl-cell-inner", "flex items-center overflow-hidden")}
        data-size={resolvedSize}
      >
        {children}
      </div>
    </td>
  );
}

export type { TableCellProps };


================================================
FILE: web/lib/opal/src/components/table/TableElement.tsx
================================================
"use client";

import React from "react";
import { cn } from "@opal/utils";
import { useTableSize } from "@opal/components/table/TableSizeContext";
import type { WithoutStyles } from "@/types";
import type { ExtremaSizeVariants, SizeVariants } from "@opal/types";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type TableSize = Extract<SizeVariants, "md" | "lg">;
type TableVariant = "rows" | "cards";
type SelectionBehavior = "no-select" | "single-select" | "multi-select";

interface TableProps
  extends WithoutStyles<React.TableHTMLAttributes<HTMLTableElement>> {
  ref?: React.Ref<HTMLTableElement>;
  /** Visual row variant. @default "cards" */
  variant?: TableVariant;
  /** Row selection behavior. @default "no-select" */
  selectionBehavior?: SelectionBehavior;
  /** Height behavior. `"fit"` = shrink to content, `"full"` = fill available space. */
  heightVariant?: ExtremaSizeVariants;
  /** Explicit pixel width for the table (e.g. from `table.getTotalSize()`).
   *  When provided the table uses exactly this width instead of stretching
   *  to fill its container, which prevents `table-layout: fixed` from
   *  redistributing extra space across columns on resize. */
  width?: number;
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

function Table({
  ref,
  variant = "cards",
  selectionBehavior = "no-select",
  heightVariant,
  width,
  ...props
}: TableProps) {
  const size = useTableSize();
  return (
    <table
      ref={ref}
      className={cn("border-separate border-spacing-0", !width && "min-w-full")}
      style={{ width }}
      data-size={size}
      data-variant={variant}
      data-selection={selectionBehavior}
      data-height={heightVariant}
      {...props}
    />
  );
}

export default Table;
export type { TableProps, TableSize, TableVariant, SelectionBehavior };


================================================
FILE: web/lib/opal/src/components/table/TableHead.tsx
================================================
import { cn } from "@opal/utils";
import Text from "@/refresh-components/texts/Text";
import { useTableSize } from "@opal/components/table/TableSizeContext";
import type { WithoutStyles } from "@/types";
import { Button } from "@opal/components";
import { SvgChevronDown, SvgChevronUp, SvgHandle, SvgSort } from "@opal/icons";
import type { IconFunctionComponent } from "@opal/types";

export type SortDirection = "none" | "ascending" | "descending";

/**
 * A table header cell with optional sort controls and a resize handle indicator.
 * Renders as a `<th>` element with Figma-matched typography and spacing.
 */
interface TableHeadCustomProps {
  /** Header label content. */
  children: React.ReactNode;
  /** Current sort state. When omitted, no sort button is shown. */
  sorted?: SortDirection;
  /** Called when the sort button is clicked. Required to show the sort button. */
  onSort?: () => void;
  /** When `true`, renders a thin resize handle on the right edge. */
  resizable?: boolean;
  /** Called when a resize drag begins on the handle. Attach TanStack's
   *  `header.getResizeHandler()` here to enable column resizing. */
  onResizeStart?: (event: React.MouseEvent | React.TouchEvent) => void;
  /** Override the sort icon for this column. Receives the current sort state and
   *  returns the icon component to render. Falls back to the built-in icons. */
  icon?: (sorted: SortDirection) => IconFunctionComponent;
  /** Text alignment for the column. Defaults to `"left"`. */
  alignment?: "left" | "center" | "right";
  /** Column width in pixels. Applied as an inline style on the `<th>`. */
  width?: number;
  /** When `true`, shows a bottom border on hover. Defaults to `true`. */
  bottomBorder?: boolean;
}

type TableHeadProps = WithoutStyles<
  TableHeadCustomProps &
    Omit<
      React.ThHTMLAttributes<HTMLTableCellElement>,
      keyof TableHeadCustomProps
    >
>;

/**
 * Table header cell primitive. Displays a column label with optional sort
 * functionality and a resize handle indicator.
 */
function defaultSortIcon(sorted: SortDirection): IconFunctionComponent {
  switch (sorted) {
    case "ascending":
      return SvgChevronUp;
    case "descending":
      return SvgChevronDown;
    default:
      return SvgSort;
  }
}

const alignmentThClass = {
  left: "text-left",
  center: "text-center",
  right: "text-right",
} as const;

const alignmentFlexClass = {
  left: "justify-start",
  center: "justify-center",
  right: "justify-end",
} as const;

export default function TableHead({
  children,
  sorted,
  onSort,
  icon: iconFn = defaultSortIcon,
  resizable,
  onResizeStart,
  alignment = "left",
  width,
  bottomBorder = true,
  ...thProps
}: TableHeadProps) {
  const resolvedSize = useTableSize();
  const isSmall = resolvedSize === "md";
  return (
    <th
      {...thProps}
      style={width != null ? { width } : undefined}
      className={cn("table-head group", alignmentThClass[alignment])}
      data-size={resolvedSize}
      data-bottom-border={bottomBorder || undefined}
    >
      <div className="flex items-center gap-1">
        <div className="table-head-label">
          <Text
            mainUiAction={!isSmall}
            secondaryAction={isSmall}
            text04
            className="truncate"
          >
            {children}
          </Text>
        </div>
        <div
          className={cn(
            "table-head-sort",
            "opacity-0 group-hover:opacity-100 transition-opacity"
          )}
        >
          {onSort && (
            <Button
              icon={iconFn(sorted ?? "none")}
              onClick={onSort}
              tooltip="Sort"
              tooltipSide="top"
              prominence="internal"
              size="sm"
            />
          )}
        </div>
      </div>
      {resizable && (
        <div
          onMouseDown={onResizeStart}
          onTouchStart={onResizeStart}
          className={cn(
            "absolute right-0 top-0 flex h-full items-center",
            "text-border-02",
            "opacity-0 group-hover:opacity-100",
            "cursor-col-resize",
            "select-none touch-none"
          )}
        >
          <SvgHandle size={22} className="stroke-border-02" />
        </div>
      )}
    </th>
  );
}


================================================
FILE: web/lib/opal/src/components/table/TableHeader.tsx
================================================
import type { WithoutStyles } from "@/types";

interface TableHeaderProps
  extends WithoutStyles<React.HTMLAttributes<HTMLTableSectionElement>> {
  ref?: React.Ref<HTMLTableSectionElement>;
}

function TableHeader({ ref, ...props }: TableHeaderProps) {
  return <thead ref={ref} {...props} />;
}

export default TableHeader;
export type { TableHeaderProps };


================================================
FILE: web/lib/opal/src/components/table/TableQualifier.tsx
================================================
"use client";

import React from "react";
import { cn } from "@opal/utils";
import { useTableSize } from "@opal/components/table/TableSizeContext";
import type { IconFunctionComponent } from "@opal/types";
import type { QualifierContentType } from "@opal/components/table/types";
import Checkbox from "@/refresh-components/inputs/Checkbox";

interface TableQualifierProps {
  /** Content type displayed in the qualifier */
  content: QualifierContentType;
  /** Disables interaction */
  disabled?: boolean;
  /** Whether to show a selection checkbox overlay */
  selectable?: boolean;
  /** Whether the row is currently selected */
  selected?: boolean;
  /** Called when the checkbox is toggled */
  onSelectChange?: (selected: boolean) => void;
  /** Icon component to render (for "icon" content). */
  icon?: IconFunctionComponent;
  /** Image source URL (for "image" content). */
  imageSrc?: string;
  /** Image alt text (for "image" content). */
  imageAlt?: string;
  /** Show a tinted background container behind the content. */
  background?: boolean;
  /** Icon size preset. `"lg"` = 28/24, `"md"` = 20/16. @default "md" */
  iconSize?: "lg" | "md";
}

const iconSizesMap = {
  lg: { lg: 28, md: 24 },
  md: { lg: 20, md: 16 },
} as const;

function getOverlayStyles(selected: boolean, disabled: boolean) {
  if (disabled) {
    return selected ? "flex bg-action-link-00" : "hidden";
  }
  if (selected) {
    return "flex bg-action-link-00";
  }
  return "flex opacity-0 group-hover/row:opacity-100 group-focus-within/row:opacity-100 bg-background-tint-01";
}

function TableQualifier({
  content,
  disabled = false,
  selectable = false,
  selected = false,
  onSelectChange,
  icon: Icon,
  imageSrc,
  imageAlt = "",
  background = false,
  iconSize: iconSizePreset = "md",
}: TableQualifierProps) {
  const resolvedSize = useTableSize();
  const iconSize = iconSizesMap[iconSizePreset][resolvedSize];
  const overlayStyles = getOverlayStyles(selected, disabled);

  function renderContent() {
    switch (content) {
      case "icon":
        return Icon ? <Icon size={iconSize} /> : null;

      case "image":
        return imageSrc ? (
          <img
            src={imageSrc}
            alt={imageAlt}
            className="h-full w-full rounded-08 object-cover"
          />
        ) : null;

      case "simple":
      default:
        return null;
    }
  }

  const inner = renderContent();
  const showBackground = background && content !== "simple";

  return (
    <div
      className={cn(
        "group relative inline-flex shrink-0 items-center justify-center",
        resolvedSize === "lg" ? "h-9 w-9" : "h-7 w-7",
        disabled ? "cursor-not-allowed" : "cursor-default"
      )}
    >
      {showBackground ? (
        <div
          className={cn(
            "flex items-center justify-center overflow-hidden rounded-08 transition-colors",
            resolvedSize === "lg" ? "h-9 w-9" : "h-7 w-7",
            disabled
              ? "bg-background-neutral-03"
              : selected
                ? "bg-action-link-00"
                : "bg-background-tint-01"
          )}
        >
          {inner}
        </div>
      ) : (
        inner
      )}

      {/* Selection overlay */}
      {selectable && (
        <div
          className={cn(
            "absolute inset-0 items-center justify-center rounded-08",
            content === "simple" ? "flex" : overlayStyles
          )}
        >
          <Checkbox
            checked={selected}
            onCheckedChange={onSelectChange}
            disabled={disabled}
          />
        </div>
      )}
    </div>
  );
}

export default TableQualifier;


================================================
FILE: web/lib/opal/src/components/table/TableRow.tsx
================================================
"use client";

import { cn } from "@opal/utils";
import { useTableSize } from "@opal/components/table/TableSizeContext";
import type { WithoutStyles } from "@/types";
import { useSortable } from "@dnd-kit/sortable";
import { CSS } from "@dnd-kit/utilities";
import { SvgHandle } from "@opal/icons";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

export interface TableRowProps
  extends WithoutStyles<React.HTMLAttributes<HTMLTableRowElement>> {
  ref?: React.Ref<HTMLTableRowElement>;
  selected?: boolean;
  /** Disables interaction and applies disabled styling */
  disabled?: boolean;
  /** When provided, makes this row sortable via @dnd-kit */
  sortableId?: string;
  /** Show drag handle overlay. Defaults to true when sortableId is set. */
  showDragHandle?: boolean;
}

// ---------------------------------------------------------------------------
// Internal: sortable row
// ---------------------------------------------------------------------------

function SortableTableRow({
  sortableId,
  showDragHandle = true,
  selected,
  disabled,
  ref: _externalRef,
  children,
  ...props
}: TableRowProps) {
  const resolvedSize = useTableSize();

  const {
    attributes,
    listeners,
    setNodeRef,
    transform,
    transition,
    isDragging,
  } = useSortable({ id: sortableId! });

  const style: React.CSSProperties = {
    transform: CSS.Transform.toString(transform),
    transition,
    opacity: isDragging ? 0 : undefined,
  };

  return (
    <tr
      ref={setNodeRef}
      style={style}
      className="tbl-row group/row"
      data-drag-handle={showDragHandle || undefined}
      data-selected={selected || undefined}
      data-disabled={disabled || undefined}
      {...attributes}
      {...props}
    >
      {children}
      {showDragHandle && (
        <td
          style={{
            width: 0,
            padding: 0,
            position: "relative",
            zIndex: 20,
          }}
        >
          <button
            type="button"
            className={cn(
              "absolute right-0 top-1/2 -translate-y-1/2 cursor-grab",
              "opacity-0 group-hover/row:opacity-100 transition-opacity",
              "flex items-center justify-center rounded"
            )}
            aria-label="Drag to reorder"
            onMouseDown={(e) => e.preventDefault()}
            {...listeners}
          >
            <SvgHandle
              size={resolvedSize === "md" ? 12 : 16}
              className="text-border-02"
            />
          </button>
        </td>
      )}
    </tr>
  );
}

// ---------------------------------------------------------------------------
// Main component
// ---------------------------------------------------------------------------

export default function TableRow({
  sortableId,
  showDragHandle,
  selected,
  disabled,
  ref,
  ...props
}: TableRowProps) {
  if (sortableId) {
    return (
      <SortableTableRow
        sortableId={sortableId}
        showDragHandle={showDragHandle}
        selected={selected}
        disabled={disabled}
        ref={ref}
        {...props}
      />
    );
  }

  return (
    <tr
      ref={ref}
      className="tbl-row group/row"
      data-selected={selected || undefined}
      data-disabled={disabled || undefined}
      {...props}
    />
  );
}


================================================
FILE: web/lib/opal/src/components/table/TableSizeContext.tsx
================================================
"use client";

import { createContext, useContext } from "react";
import type { SizeVariants } from "@opal/types";

type TableSize = Extract<SizeVariants, "md" | "lg">;

const TableSizeContext = createContext<TableSize>("lg");

interface TableSizeProviderProps {
  size: TableSize;
  children: React.ReactNode;
}

function TableSizeProvider({ size, children }: TableSizeProviderProps) {
  return (
    <TableSizeContext.Provider value={size}>
      {children}
    </TableSizeContext.Provider>
  );
}

function useTableSize(): TableSize {
  return useContext(TableSizeContext);
}

export { TableSizeProvider, useTableSize };
export type { TableSize };


================================================
FILE: web/lib/opal/src/components/table/columns.ts
================================================
import type { ReactNode } from "react";
import {
  createColumnHelper,
  type ColumnDef,
  type DeepKeys,
  type DeepValue,
  type CellContext,
} from "@tanstack/react-table";
import type {
  ColumnWidth,
  QualifierContentType,
  OnyxQualifierColumn,
  OnyxDataColumn,
  OnyxDisplayColumn,
  OnyxActionsColumn,
} from "@opal/components/table/types";
import type { TableSize } from "@opal/components/table/TableSizeContext";
import type { IconFunctionComponent } from "@opal/types";
import type { SortDirection } from "@opal/components/table/TableHead";

// ---------------------------------------------------------------------------
// Qualifier column config
// ---------------------------------------------------------------------------

interface QualifierConfig<TData> {
  /** Content type for body-row `<TableQualifier>`. @default "simple" */
  content?: QualifierContentType;
  /** Return the icon component to render for a row (for "icon" content). */
  getContent?: (row: TData) => IconFunctionComponent;
  /** Return the image URL to render for a row (for "image" content). */
  getImageSrc?: (row: TData) => string;
  /** Return the image alt text for a row (for "image" content). @default "" */
  getImageAlt?: (row: TData) => string;
  /** Show a tinted background container behind the content. @default false */
  background?: boolean;
  /** Icon size preset. `"lg"` = 28/24, `"md"` = 20/16. @default "md" */
  iconSize?: "lg" | "md";
}

// ---------------------------------------------------------------------------
// Data column config
// ---------------------------------------------------------------------------

interface DataColumnConfig<TData, TValue> {
  /** Column header label. */
  header: string;
  /** Custom cell renderer. If omitted, the value is rendered as a string. */
  cell?: (value: TValue, row: TData) => ReactNode;
  /** Enable sorting for this column. @default true */
  enableSorting?: boolean;
  /** Enable resizing for this column. @default true */
  enableResizing?: boolean;
  /** Enable hiding for this column. @default true */
  enableHiding?: boolean;
  /** Override the sort icon for this column. */
  icon?: (sorted: SortDirection) => IconFunctionComponent;
  /** Column weight for proportional distribution. @default 20 */
  weight?: number;
}

// ---------------------------------------------------------------------------
// Display column config
// ---------------------------------------------------------------------------

interface DisplayColumnConfig<TData> {
  /** Unique column ID. */
  id: string;
  /** Column header label. */
  header?: string;
  /** Cell renderer. */
  cell: (row: TData) => ReactNode;
  /** Column width config. */
  width: ColumnWidth;
  /** Enable hiding. @default true */
  enableHiding?: boolean;
}

// ---------------------------------------------------------------------------
// Actions column config
// ---------------------------------------------------------------------------

interface ActionsConfig<TData = any> {
  /** Show column visibility popover. @default true */
  showColumnVisibility?: boolean;
  /** Show sorting popover. @default true */
  showSorting?: boolean;
  /** Footer text for the sorting popover. */
  sortingFooterText?: string;
  /** Optional cell renderer for row-level action buttons. */
  cell?: (row: TData) => ReactNode;
}

// ---------------------------------------------------------------------------
// Builder return type
// ---------------------------------------------------------------------------

interface TableColumnsBuilder<TData> {
  /** Create a qualifier (leading avatar/checkbox) column. */
  qualifier(config?: QualifierConfig<TData>): OnyxQualifierColumn<TData>;

  /** Create a data (accessor) column. */
  column<TKey extends DeepKeys<TData>>(
    accessor: TKey,
    config: DataColumnConfig<TData, DeepValue<TData, TKey>>
  ): OnyxDataColumn<TData>;

  /** Create a display (non-accessor) column. */
  displayColumn(config: DisplayColumnConfig<TData>): OnyxDisplayColumn<TData>;

  /** Create an actions column (visibility/sorting popovers). */
  actions(config?: ActionsConfig<TData>): OnyxActionsColumn<TData>;
}

// ---------------------------------------------------------------------------
// Factory
// ---------------------------------------------------------------------------

/**
 * Creates a typed column builder for a given row type.
 *
 * Internally uses TanStack's `createColumnHelper<TData>()` to get free
 * `DeepKeys`/`DeepValue` inference for accessor columns.
 *
 * **Important**: Define columns at module scope or wrap in `useMemo` to avoid
 * creating new array references per render.
 *
 * @example
 * ```ts
 * const tc = createTableColumns<TeamMember>();
 * const columns = [
 *   tc.qualifier({ content: "icon", getContent: (r) => UserIcon }),
 *   tc.column("name", { header: "Name", weight: 23 }),
 *   tc.column("email", { header: "Email", weight: 28 }),
 *   tc.actions(),
 * ];
 * ```
 */
export function createTableColumns<TData>(): TableColumnsBuilder<TData> {
  const helper = createColumnHelper<TData>();

  return {
    qualifier(config?: QualifierConfig<TData>): OnyxQualifierColumn<TData> {
      const content = config?.content ?? "simple";

      const def: ColumnDef<TData, any> = helper.display({
        id: "qualifier",
        enableResizing: false,
        enableSorting: false,
        enableHiding: false,
        // Cell rendering is handled by DataTable based on the qualifier config
        cell: () => null,
      });

      return {
        kind: "qualifier",
        id: "qualifier",
        def,
        width: (size: TableSize) =>
          size === "md" ? { fixed: 36 } : { fixed: 44 },
        content,
        getContent: config?.getContent,
        getImageSrc: config?.getImageSrc,
        getImageAlt: config?.getImageAlt,
        background: config?.background,
        iconSize: config?.iconSize,
      };
    },

    column<TKey extends DeepKeys<TData>>(
      accessor: TKey,
      config: DataColumnConfig<TData, DeepValue<TData, TKey>>
    ): OnyxDataColumn<TData> {
      const {
        header,
        cell,
        enableSorting = true,
        enableResizing = true,
        enableHiding = true,
        icon,
        weight = 20,
      } = config;

      const def = helper.accessor(accessor as any, {
        header,
        enableSorting,
        enableResizing,
        enableHiding,
        cell: cell
          ? (info: CellContext<TData, any>) =>
              cell(info.getValue(), info.row.original)
          : undefined,
      }) as ColumnDef<TData, any>;

      return {
        kind: "data",
        id: accessor as string,
        def,
        width: { weight, minWidth: Math.max(header.length * 8 + 40, 80) },
        icon,
      };
    },

    displayColumn(
      config: DisplayColumnConfig<TData>
    ): OnyxDisplayColumn<TData> {
      const { id, header, cell, width, enableHiding = true } = config;

      const def: ColumnDef<TData, any> = helper.display({
        id,
        header: header ?? undefined,
        enableHiding,
        enableSorting: false,
        enableResizing: false,
        cell: (info) => cell(info.row.original),
      });

      return {
        kind: "display",
        id,
        def,
        width,
      };
    },

    actions(config?: ActionsConfig<TData>): OnyxActionsColumn<TData> {
      const def: ColumnDef<TData, any> = {
        id: "__actions",
        enableHiding: false,
        enableSorting: false,
        enableResizing: false,
        // Header rendering is handled by DataTable based on the actions config
        header: () => null,
        cell: config?.cell
          ? (info: CellContext<TData, any>) => config.cell!(info.row.original)
          : () => null,
      };

      const showVisibility = config?.showColumnVisibility ?? true;
      const showSorting = config?.showSorting ?? true;
      const buttonCount = (showVisibility ? 1 : 0) + (showSorting ? 1 : 0);

      // Icon button sizes: "md" button = 28px, "sm" button = 24px
      // px-1 on .tbl-actions = 4px each side = 8px total
      const BUTTON_MD = 28;
      const BUTTON_SM = 24;
      const PADDING = 8;

      return {
        kind: "actions",
        id: "__actions",
        def,
        width: (size: TableSize) => ({
          fixed:
            Math.max(
              buttonCount * (size === "md" ? BUTTON_SM : BUTTON_MD),
              size === "md" ? BUTTON_SM : BUTTON_MD
            ) + PADDING,
        }),
        showColumnVisibility: showVisibility,
        showSorting: showSorting,
        sortingFooterText: config?.sortingFooterText,
      };
    },
  };
}


================================================
FILE: web/lib/opal/src/components/table/components.tsx
================================================
"use client";
"use no memo";

import "@opal/components/table/styles.css";

import { useEffect, useMemo } from "react";
import { flexRender } from "@tanstack/react-table";
import useDataTable, {
  toOnyxSortDirection,
} from "@opal/components/table/hooks/useDataTable";
import useColumnWidths from "@opal/components/table/hooks/useColumnWidths";
import useDraggableRows from "@opal/components/table/hooks/useDraggableRows";
import TableElement from "@opal/components/table/TableElement";
import TableHeader from "@opal/components/table/TableHeader";
import TableBody from "@opal/components/table/TableBody";
import TableRow from "@opal/components/table/TableRow";
import TableHead from "@opal/components/table/TableHead";
import TableCell from "@opal/components/table/TableCell";
import TableQualifier from "@opal/components/table/TableQualifier";
import QualifierContainer from "@opal/components/table/QualifierContainer";
import ActionsContainer from "@opal/components/table/ActionsContainer";
import DragOverlayRow from "@opal/components/table/DragOverlayRow";
import Footer from "@opal/components/table/Footer";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import { TableSizeProvider } from "@opal/components/table/TableSizeContext";
import { ColumnVisibilityPopover } from "@opal/components/table/ColumnVisibilityPopover";
import { SortingPopover } from "@opal/components/table/ColumnSortabilityPopover";
import type { WidthConfig } from "@opal/components/table/hooks/useColumnWidths";
import type { ColumnDef } from "@tanstack/react-table";
import { cn } from "@opal/utils";
import type {
  DataTableProps as BaseDataTableProps,
  DataTableFooterConfig,
  OnyxColumnDef,
  OnyxDataColumn,
  OnyxQualifierColumn,
  OnyxActionsColumn,
} from "@opal/components/table/types";
import type { TableSize } from "@opal/components/table/TableSizeContext";

// ---------------------------------------------------------------------------
// SelectionBehavior
// ---------------------------------------------------------------------------

type SelectionBehavior = "no-select" | "single-select" | "multi-select";

export type DataTableProps<TData> = BaseDataTableProps<TData> & {
  /** Row selection behavior. @default "no-select" */
  selectionBehavior?: SelectionBehavior;
};

// ---------------------------------------------------------------------------
// Internal: resolve size-dependent widths and build TanStack columns
// ---------------------------------------------------------------------------

interface ProcessedColumns<TData> {
  tanstackColumns: ColumnDef<TData, any>[];
  widthConfig: WidthConfig;
  qualifierColumn: OnyxQualifierColumn<TData> | null;
  /** Map from column ID → OnyxColumnDef for dispatch in render loops. */
  columnKindMap: Map<string, OnyxColumnDef<TData>>;
}

function processColumns<TData>(
  columns: OnyxColumnDef<TData>[],
  size: TableSize
): ProcessedColumns<TData> {
  const tanstackColumns: ColumnDef<TData, any>[] = [];
  const fixedColumnIds = new Set<string>();
  const columnWeights: Record<string, number> = {};
  const columnMinWidths: Record<string, number> = {};
  const columnKindMap = new Map<string, OnyxColumnDef<TData>>();
  let qualifierColumn: OnyxQualifierColumn<TData> | null = null;
  let firstDataColumnSeen = false;

  for (const col of columns) {
    const resolvedWidth =
      typeof col.width === "function" ? col.width(size) : col.width;

    // Clone def to avoid mutating the caller's column definitions
    const clonedDef: ColumnDef<TData, any> = {
      ...col.def,
      id: col.id,
      size:
        "fixed" in resolvedWidth ? resolvedWidth.fixed : resolvedWidth.weight,
    };

    // First data column is never hideable
    if (col.kind === "data" && !firstDataColumnSeen) {
      firstDataColumnSeen = true;
      clonedDef.enableHiding = false;
    }

    tanstackColumns.push(clonedDef);

    const id = col.id;
    columnKindMap.set(id, col);

    if ("fixed" in resolvedWidth) {
      fixedColumnIds.add(id);
    } else {
      columnWeights[id] = resolvedWidth.weight;
      columnMinWidths[id] = resolvedWidth.minWidth ?? 50;
    }

    if (col.kind === "qualifier") qualifierColumn = col;
  }

  return {
    tanstackColumns,
    widthConfig: { fixedColumnIds, columnWeights, columnMinWidths },
    qualifierColumn,
    columnKindMap,
  };
}

// ---------------------------------------------------------------------------
// DataTable component
// ---------------------------------------------------------------------------

/**
 * Config-driven table component that wires together `useDataTable`,
 * `useColumnWidths`, and `useDraggableRows` automatically.
 *
 * Full flexibility via the column definitions from `createTableColumns()`.
 *
 * @example
 * ```tsx
 * const tc = createTableColumns<TeamMember>();
 * const columns = [
 *   tc.qualifier({ content: "icon", getContent: (r) => UserIcon }),
 *   tc.column("name", { header: "Name", weight: 23 }),
 *   tc.column("email", { header: "Email", weight: 28 }),
 *   tc.actions(),
 * ];
 *
 * <Table data={items} columns={columns} footer={{}} />
 * ```
 */
export function Table<TData>(props: DataTableProps<TData>) {
  const {
    data,
    columns,
    getRowId,
    pageSize,
    initialSorting,
    initialColumnVisibility,
    initialRowSelection,
    initialViewSelected,
    draggable,
    footer,
    size = "lg",
    variant = "cards",
    selectionBehavior = "no-select",
    onSelectionChange,
    onRowClick,
    searchTerm,
    height,
    serverSide,
    emptyState,
  } = props;

  const effectivePageSize = pageSize ?? (footer ? 10 : data.length);

  // Whether the qualifier column should exist in the DOM.
  // Derived from the column definitions: if a qualifier column exists with
  // content !== "simple", always show it. If content === "simple" (or no
  // qualifier column defined), show only for multi-select (checkboxes).
  const qualifierColDef = columns.find(
    (c): c is OnyxQualifierColumn<TData> => c.kind === "qualifier"
  );
  const hasQualifierColumn =
    (qualifierColDef != null && qualifierColDef.content !== "simple") ||
    selectionBehavior === "multi-select";

  // 1. Process columns (memoized on columns + size)
  const { tanstackColumns, widthConfig, qualifierColumn, columnKindMap } =
    useMemo(() => {
      const processed = processColumns(columns, size);
      if (!hasQualifierColumn) {
        // Remove qualifier from TanStack columns and width config entirely
        return {
          ...processed,
          tanstackColumns: processed.tanstackColumns.filter(
            (c) => c.id !== "qualifier"
          ),
          widthConfig: {
            ...processed.widthConfig,
            fixedColumnIds: new Set(
              Array.from(processed.widthConfig.fixedColumnIds).filter(
                (id) => id !== "qualifier"
              )
            ),
          },
          qualifierColumn: null,
        };
      }
      return processed;
    }, [columns, size, hasQualifierColumn]);

  // 2. Call useDataTable
  const {
    table,
    currentPage,
    totalPages,
    totalItems,
    setPage,
    pageSize: resolvedPageSize,
    selectionState,
    selectedCount,
    selectedRowIds,
    clearSelection,
    toggleAllPageRowsSelected,
    toggleAllRowsSelected,
    isAllPageRowsSelected,
    isAllRowsSelected,
    isViewingSelected,
    enterViewMode,
    exitViewMode,
  } = useDataTable({
    data,
    columns: tanstackColumns,
    pageSize: effectivePageSize,
    initialSorting,
    initialColumnVisibility,
    initialRowSelection,
    initialViewSelected,
    getRowId,
    onSelectionChange,
    searchTerm,
    serverSide: serverSide
      ? {
          totalItems: serverSide.totalItems,
          onSortingChange: serverSide.onSortingChange,
          onPaginationChange: serverSide.onPaginationChange,
          onSearchTermChange: serverSide.onSearchTermChange,
        }
      : undefined,
  });

  // 3. Call useColumnWidths
  const { containerRef, columnWidths, createResizeHandler } = useColumnWidths({
    headers: table.getHeaderGroups()[0]?.headers ?? [],
    ...widthConfig,
  });

  // 4. Call useDraggableRows (conditional — disabled in server-side mode)
  useEffect(() => {
    if (process.env.NODE_ENV !== "production" && serverSide && draggable) {
      console.warn(
        "DataTable: `draggable` is ignored when `serverSide` is enabled. " +
          "Drag-and-drop reordering is not supported with server-side pagination."
      );
    }
  }, [!!serverSide, !!draggable]); // eslint-disable-line react-hooks/exhaustive-deps
  const effectiveDraggable = serverSide ? undefined : draggable;
  const draggableReturn = useDraggableRows({
    data,
    getRowId,
    enabled: !!effectiveDraggable && table.getState().sorting.length === 0,
    onReorder: effectiveDraggable?.onReorder,
  });

  const hasDraggable = !!effectiveDraggable;

  const isSelectable = selectionBehavior !== "no-select";
  const isMultiSelect = selectionBehavior === "multi-select";
  // Checkboxes appear for any selectable table
  const showQualifierCheckbox = isSelectable;

  // ---------------------------------------------------------------------------
  // Render
  // ---------------------------------------------------------------------------

  const isServerLoading = !!serverSide?.isLoading;

  function renderFooter(footerConfig: DataTableFooterConfig) {
    // Mode derived from selectionBehavior — single/multi-select use selection
    // footer, no-select uses summary footer.
    if (isSelectable) {
      return (
        <Footer
          mode="selection"
          multiSelect={isMultiSelect}
          selectionState={selectionState}
          selectedCount={selectedCount}
          onClear={
            footerConfig.onClear ??
            (() => {
              if (isViewingSelected) exitViewMode();
              clearSelection();
            })
          }
          onView={
            !serverSide
              ? isViewingSelected
                ? exitViewMode
                : enterViewMode
              : undefined
          }
          isViewingSelected={isViewingSelected}
          pageSize={resolvedPageSize}
          totalItems={totalItems}
          currentPage={currentPage}
          totalPages={totalPages}
          onPageChange={setPage}
          units={footerConfig.units}
        />
      );
    }

    // Summary mode (no-select only)
    const rangeStart =
      totalItems === 0
        ? 0
        : !isFinite(resolvedPageSize)
          ? 1
          : (currentPage - 1) * resolvedPageSize + 1;
    const rangeEnd = !isFinite(resolvedPageSize)
      ? totalItems
      : Math.min(currentPage * resolvedPageSize, totalItems);

    return (
      <Footer
        mode="summary"
        rangeStart={rangeStart}
        rangeEnd={rangeEnd}
        totalItems={totalItems}
        currentPage={currentPage}
        totalPages={totalPages}
        onPageChange={setPage}
        leftExtra={footerConfig.leftExtra}
        units={footerConfig.units}
      />
    );
  }

  return (
    <TableSizeProvider size={size}>
      <div>
        <div
          className={cn(
            "overflow-x-auto transition-opacity duration-150",
            isServerLoading && "opacity-50 pointer-events-none"
          )}
          ref={containerRef}
          style={{
            ...(height != null
              ? {
                  maxHeight:
                    typeof height === "number" ? `${height}px` : height,
                  overflowY: "auto" as const,
                }
              : undefined),
          }}
        >
          <TableElement
            variant={variant}
            selectionBehavior={selectionBehavior}
            width={
              Object.keys(columnWidths).length > 0
                ? Object.values(columnWidths).reduce((sum, w) => sum + w, 0)
                : undefined
            }
          >
            <colgroup>
              {table.getVisibleLeafColumns().map((col) => (
                <col
                  key={col.id}
                  style={
                    columnWidths[col.id] != null
                      ? { width: columnWidths[col.id] }
                      : undefined
                  }
                />
              ))}
            </colgroup>
            <TableHeader>
              {table.getHeaderGroups().map((headerGroup) => (
                <TableRow key={headerGroup.id}>
                  {headerGroup.headers.map((header, headerIndex) => {
                    const colDef = columnKindMap.get(header.id);

                    // Qualifier header — select-all checkbox only for multi-select
                    if (colDef?.kind === "qualifier") {
                      return (
                        <QualifierContainer key={header.id} type="head">
                          {isMultiSelect && (
                            <Checkbox
                              checked={isAllRowsSelected}
                              indeterminate={
                                !isAllRowsSelected && selectedCount > 0
                              }
                              onCheckedChange={(checked) => {
                                // Indeterminate → clear all; otherwise toggle normally
                                if (!isAllRowsSelected && selectedCount > 0) {
                                  toggleAllRowsSelected(false);
                                } else {
                                  toggleAllRowsSelected(checked);
                                }
                              }}
                            />
                          )}
                        </QualifierContainer>
                      );
                    }

                    // Actions header
                    if (colDef?.kind === "actions") {
                      const actionsDef = colDef as OnyxActionsColumn<TData>;
                      return (
                        <ActionsContainer key={header.id} type="head">
                          {actionsDef.showColumnVisibility !== false && (
                            <ColumnVisibilityPopover
                              table={table}
                              columnVisibility={
                                table.getState().columnVisibility
                              }
                            />
                          )}
                          {actionsDef.showSorting !== false && (
                            <SortingPopover
                              table={table}
                              sorting={table.getState().sorting}
                              footerText={actionsDef.sortingFooterText}
                            />
                          )}
                        </ActionsContainer>
                      );
                    }

                    // Data / Display header
                    const canSort = header.column.getCanSort();
                    const sortDir = header.column.getIsSorted();
                    const nextHeader = headerGroup.headers[headerIndex + 1];
                    const canResize =
                      header.column.getCanResize() &&
                      !!nextHeader &&
                      !widthConfig.fixedColumnIds.has(nextHeader.id);

                    const dataCol =
                      colDef?.kind === "data"
                        ? (colDef as OnyxDataColumn<TData>)
                        : null;

                    return (
                      <TableHead
                        key={header.id}
                        width={columnWidths[header.id]}
                        sorted={
                          canSort ? toOnyxSortDirection(sortDir) : undefined
                        }
                        onSort={
                          canSort
                            ? () => header.column.toggleSorting()
                            : undefined
                        }
                        icon={dataCol?.icon}
                        resizable={canResize}
                        onResizeStart={
                          canResize
                            ? createResizeHandler(header.id, nextHeader.id)
                            : undefined
                        }
                      >
                        {flexRender(
                          header.column.columnDef.header,
                          header.getContext()
                        )}
                      </TableHead>
                    );
                  })}
                </TableRow>
              ))}
            </TableHeader>

            <TableBody
              dndSortable={hasDraggable ? draggableReturn : undefined}
              renderDragOverlay={
                hasDraggable
                  ? (activeId) => {
                      const row = table
                        .getRowModel()
                        .rows.find((r) => getRowId(r.original) === activeId);
                      if (!row) return null;
                      return (
                        <DragOverlayRow
                          row={row}
                          columnWidths={columnWidths}
                          columnKindMap={columnKindMap}
                          qualifierColumn={qualifierColumn}
                          isSelectable={isSelectable}
                        />
                      );
                    }
                  : undefined
              }
            >
              {emptyState && table.getRowModel().rows.length === 0 && (
                <tr>
                  <td colSpan={table.getVisibleLeafColumns().length}>
                    {emptyState}
                  </td>
                </tr>
              )}
              {table.getRowModel().rows.map((row) => {
                const rowId = hasDraggable ? getRowId(row.original) : undefined;

                return (
                  <TableRow
                    key={row.id}
                    sortableId={rowId}
                    selected={row.getIsSelected()}
                    onClick={() => {
                      if (
                        hasDraggable &&
                        draggableReturn.wasDraggingRef.current
                      ) {
                        return;
                      }
                      if (onRowClick) {
                        onRowClick(row.original);
                      } else if (isSelectable) {
                        if (!isMultiSelect) {
                          // single-select: clear all, then select this row
                          table.toggleAllRowsSelected(false);
                        }
                        row.toggleSelected();
                      }
                    }}
                  >
                    {row.getVisibleCells().map((cell) => {
                      const cellColDef = columnKindMap.get(cell.column.id);

                      // Qualifier cell
                      if (cellColDef?.kind === "qualifier") {
                        const qDef = cellColDef as OnyxQualifierColumn<TData>;

                        return (
                          <QualifierContainer
                            key={cell.id}
                            type="cell"
                            onClick={(e) => e.stopPropagation()}
                          >
                            <TableQualifier
                              content={qDef.content}
                              icon={qDef.getContent?.(row.original)}
                              imageSrc={qDef.getImageSrc?.(row.original)}
                              imageAlt={qDef.getImageAlt?.(row.original)}
                              background={qDef.background}
                              iconSize={qDef.iconSize}
                              selectable={showQualifierCheckbox}
                              selected={
                                showQualifierCheckbox && row.getIsSelected()
                              }
                              onSelectChange={
                                showQualifierCheckbox
                                  ? (checked) => {
                                      if (!isMultiSelect) {
                                        table.toggleAllRowsSelected(false);
                                      }
                                      row.toggleSelected(checked);
                                    }
                                  : undefined
                              }
                            />
                          </QualifierContainer>
                        );
                      }

                      // Actions cell
                      if (cellColDef?.kind === "actions") {
                        return (
                          <ActionsContainer
                            key={cell.id}
                            type="cell"
                            onClick={(e) => e.stopPropagation()}
                          >
                            {flexRender(
                              cell.column.columnDef.cell,
                              cell.getContext()
                            )}
                          </ActionsContainer>
                        );
                      }

                      // Data / Display cell
                      return (
                        <TableCell
                          key={cell.id}
                          data-column-id={cell.column.id}
                        >
                          {flexRender(
                            cell.column.columnDef.cell,
                            cell.getContext()
                          )}
                        </TableCell>
                      );
                    })}
                  </TableRow>
                );
              })}
            </TableBody>
          </TableElement>
        </div>

        {footer && renderFooter(footer)}
      </div>
    </TableSizeProvider>
  );
}


================================================
FILE: web/lib/opal/src/components/table/hooks/useColumnWidths.ts
================================================
"use client";

/**
 * useColumnWidths — Proportional column widths with splitter resize.
 *
 * WHY NOT TANSTACK'S BUILT-IN COLUMN SIZING?
 *
 * TanStack Table's column resize system (columnSizing state,
 * header.getResizeHandler(), columnResizeMode) doesn't support the
 * behavior our design requires:
 *
 * 1. No proportional fill — TanStack uses absolute pixel widths from
 *    columnDef.size. When the container is wider than the sum of sizes,
 *    the extra space is not distributed. We need weight-based proportional
 *    distribution so columns fill the container at any width.
 *
 * 2. No splitter semantics — TanStack's resize changes one column's size
 *    in isolation (the total table width grows/shrinks). We need "splitter"
 *    behavior: dragging column i's right edge grows column i and shrinks
 *    column i+1 by the same amount, keeping the total fixed. This prevents
 *    the actions column from jittering.
 *
 * 3. No per-column min-width enforcement during drag — TanStack only has a
 *    global minSize default. We enforce per-column min-widths and clamp the
 *    drag delta so neither the dragged column nor its neighbor can shrink
 *    below their floor.
 *
 * 4. No weight-based resize persistence — TanStack stores absolute pixel
 *    deltas. When the window resizes after a column drag, the proportions
 *    drift. We store weights, so a user-resized column scales proportionally
 *    with the container — the ratio is preserved, not the pixel count.
 *
 * APPROACH:
 *
 * We still rely on TanStack for everything else (sorting, pagination,
 * visibility, row selection). Only column width computation and resize
 * interaction are handled here. The columnDef.size values are used as
 * initial weights, and TanStack's enableResizing / getCanResize() flags
 * are still respected in the render loop.
 */

import {
  useState,
  useRef,
  useEffect,
  useLayoutEffect,
  useCallback,
} from "react";
import { Header } from "@tanstack/react-table";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

/** Extracted config ready to pass to useColumnWidths. */
export interface WidthConfig {
  fixedColumnIds: Set<string>;
  columnWeights: Record<string, number>;
  columnMinWidths: Record<string, number>;
}

interface UseColumnWidthsOptions {
  /** Visible headers from TanStack's first header group. */
  headers: Header<any, unknown>[];
  /** Column IDs that have fixed pixel widths (e.g. qualifier, actions). */
  fixedColumnIds: Set<string>;
  /** Explicit column weights (takes precedence over columnDef.size). */
  columnWeights?: Record<string, number>;
  /** Per-column minimum widths for data (non-fixed) columns. */
  columnMinWidths: Record<string, number>;
}

interface UseColumnWidthsReturn {
  /** Attach to the scrollable container for width measurement. */
  containerRef: React.RefObject<HTMLDivElement | null>;
  /** Computed pixel widths keyed by column ID. */
  columnWidths: Record<string, number>;
  /** Factory to create a splitter resize handler for a column pair. */
  createResizeHandler: (
    columnId: string,
    neighborId: string
  ) => (event: React.MouseEvent | React.TouchEvent) => void;
}

// ---------------------------------------------------------------------------
// Internal: measure container width via ResizeObserver
// ---------------------------------------------------------------------------

/** Tracks an element's content width via ResizeObserver, returning a ref and the current width. */
function useElementWidth(): [React.RefObject<HTMLDivElement | null>, number] {
  const ref = useRef<HTMLDivElement>(null);
  const [width, setWidth] = useState(0);
  useLayoutEffect(() => {
    const el = ref.current;
    if (!el) return;
    setWidth(el.clientWidth);
    const ro = new ResizeObserver((entries) => {
      const entry = entries[0];
      if (entry) setWidth(entry.contentRect.width);
    });
    ro.observe(el);
    return () => ro.disconnect();
  }, []);
  return [ref, width];
}

// ---------------------------------------------------------------------------
// Pure function: compute pixel widths from weights
// ---------------------------------------------------------------------------

/** Converts column weights into pixel widths, enforcing per-column minimums and fixed-column sizes. */
function computeColumnWidths(
  containerWidth: number,
  headers: Header<any, unknown>[],
  customWeights: Record<string, number>,
  fixedColumnIds: Set<string>,
  columnWeights: Record<string, number>,
  columnMinWidths: Record<string, number>
): Record<string, number> {
  const result: Record<string, number> = {};

  let fixedTotal = 0;
  const dataColumns: { id: string; weight: number; minWidth: number }[] = [];

  for (const h of headers) {
    const baseSize = h.column.columnDef.size ?? 20;
    if (fixedColumnIds.has(h.id)) {
      fixedTotal += baseSize;
    } else {
      dataColumns.push({
        id: h.id,
        weight: customWeights[h.id] ?? columnWeights[h.id] ?? baseSize,
        minWidth: columnMinWidths[h.id] ?? 50,
      });
    }
  }

  const tableMinWidth =
    fixedTotal + dataColumns.reduce((sum, col) => sum + col.minWidth, 0);
  const tableWidth =
    containerWidth > 0 ? Math.max(containerWidth, tableMinWidth) : 0;

  if (tableWidth === 0) {
    for (const h of headers) {
      result[h.id] = h.column.columnDef.size ?? 20;
    }
    return result;
  }

  const available = tableWidth - fixedTotal;

  // Iterative proportional allocation with min-width clamping.
  // Each pass clamps columns whose proportional share falls below their
  // minimum, then redistributes remaining space. Repeats until stable.
  let clampedTotal = 0;
  const clamped = new Set<string>();

  let stable = false;
  while (!stable) {
    stable = true;
    const unclamped = dataColumns.filter((col) => !clamped.has(col.id));
    const unclampedWeight = unclamped.reduce((s, c) => s + c.weight, 0);
    const remaining = available - clampedTotal;

    for (const col of unclamped) {
      const proportional = remaining * (col.weight / unclampedWeight);
      if (proportional < col.minWidth) {
        result[col.id] = col.minWidth;
        clampedTotal += col.minWidth;
        clamped.add(col.id);
        stable = false;
      }
    }
  }

  // Distribute remaining space among unclamped columns
  const unclampedCols = dataColumns.filter((col) => !clamped.has(col.id));
  const unclampedWeight = unclampedCols.reduce((s, c) => s + c.weight, 0);
  const remainingSpace = available - clampedTotal;
  let assigned = 0;

  for (let i = 0; i < unclampedCols.length; i++) {
    const col = unclampedCols[i]!;
    if (i === unclampedCols.length - 1) {
      result[col.id] = remainingSpace - assigned;
    } else {
      const w = Math.round(remainingSpace * (col.weight / unclampedWeight));
      result[col.id] = w;
      assigned += w;
    }
  }

  // Fixed columns keep their base size
  for (const h of headers) {
    if (fixedColumnIds.has(h.id)) {
      result[h.id] = h.column.columnDef.size ?? 20;
    }
  }

  return result;
}

// ---------------------------------------------------------------------------
// Pure function: create a splitter resize handler for a column pair
// ---------------------------------------------------------------------------

/** Creates a mouse/touch drag handler that redistributes weight between two adjacent columns. */
function createSplitterResizeHandler(
  columnId: string,
  neighborId: string,
  startColumnWidth: number,
  startNeighborWidth: number,
  startColumnWeight: number,
  startNeighborWeight: number,
  columnMinWidth: number,
  neighborMinWidth: number,
  setter: (value: React.SetStateAction<Record<string, number>>) => void,
  isDraggingRef: React.MutableRefObject<boolean>
): (event: React.MouseEvent | React.TouchEvent) => void {
  return (event: React.MouseEvent | React.TouchEvent) => {
    const startX =
      "touches" in event ? event.touches[0]!.clientX : event.clientX;

    isDraggingRef.current = true;

    const onMove = (e: MouseEvent | TouchEvent) => {
      const currentX =
        "touches" in e
          ? (e as TouchEvent).touches[0]!.clientX
          : (e as MouseEvent).clientX;
      const rawDelta = currentX - startX;
      const minDelta = columnMinWidth - startColumnWidth;
      const maxDelta = startNeighborWidth - neighborMinWidth;
      const delta = Math.max(minDelta, Math.min(maxDelta, rawDelta));

      setter((prev) => ({
        ...prev,
        [columnId]:
          startColumnWeight * ((startColumnWidth + delta) / startColumnWidth),
        [neighborId]:
          startNeighborWeight *
          ((startNeighborWidth - delta) / startNeighborWidth),
      }));
    };

    const onUp = () => {
      document.removeEventListener("mousemove", onMove);
      document.removeEventListener("mouseup", onUp);
      document.removeEventListener("touchmove", onMove);
      document.removeEventListener("touchend", onUp);
      document.removeEventListener("touchcancel", onUp);
      document.body.style.userSelect = "";
      document.body.style.cursor = "";
      isDraggingRef.current = false;
    };

    document.body.style.userSelect = "none";
    document.body.style.cursor = "col-resize";
    document.addEventListener("mousemove", onMove);
    document.addEventListener("mouseup", onUp);
    document.addEventListener("touchmove", onMove);
    document.addEventListener("touchend", onUp);
    document.addEventListener("touchcancel", onUp);
  };
}

// ---------------------------------------------------------------------------
// Hook
// ---------------------------------------------------------------------------

/**
 * Computes proportional column pixel widths from weights and provides
 * splitter-style resize handlers that keep total table width constant.
 *
 * @example
 * ```tsx
 * const { containerRef, columnWidths, createResizeHandler } = useColumnWidths({
 *   headers: table.getHeaderGroups()[0].headers,
 *   fixedColumnIds: new Set(["actions"]),
 *   columnMinWidths: { name: 72, status: 80 },
 * });
 * ```
 */
export default function useColumnWidths({
  headers,
  fixedColumnIds,
  columnWeights = {},
  columnMinWidths,
}: UseColumnWidthsOptions): UseColumnWidthsReturn {
  const [containerRef, containerWidth] = useElementWidth();
  const [customWeights, setCustomWeights] = useState<Record<string, number>>(
    {}
  );
  const isDraggingRef = useRef(false);

  useEffect(() => {
    return () => {
      if (isDraggingRef.current) {
        document.body.style.userSelect = "";
        document.body.style.cursor = "";
      }
    };
  }, []);

  const columnWidths = computeColumnWidths(
    containerWidth,
    headers,
    customWeights,
    fixedColumnIds,
    columnWeights,
    columnMinWidths
  );

  const createResizeHandler = useCallback(
    (columnId: string, neighborId: string) => {
      const header = headers.find((h) => h.id === columnId);
      const neighbor = headers.find((h) => h.id === neighborId);

      return createSplitterResizeHandler(
        columnId,
        neighborId,
        columnWidths[columnId] ?? 0,
        columnWidths[neighborId] ?? 0,
        customWeights[columnId] ??
          columnWeights[columnId] ??
          header?.column.columnDef.size ??
          20,
        customWeights[neighborId] ??
          columnWeights[neighborId] ??
          neighbor?.column.columnDef.size ??
          20,
        columnMinWidths[columnId] ?? 50,
        columnMinWidths[neighborId] ?? 50,
        setCustomWeights,
        isDraggingRef
      );
    },
    [headers, columnWidths, customWeights, columnWeights, columnMinWidths]
  );

  return { containerRef, columnWidths, createResizeHandler };
}


================================================
FILE: web/lib/opal/src/components/table/hooks/useDataTable.ts
================================================
"use client";
"use no memo";

import { useState, useEffect, useMemo, useRef } from "react";
import {
  useReactTable,
  getCoreRowModel,
  getSortedRowModel,
  getPaginationRowModel,
  getFilteredRowModel,
  type Table,
  type ColumnDef,
  type RowData,
  type SortingState,
  type RowSelectionState,
  type ColumnSizingState,
  type PaginationState,
  type ColumnResizeMode,
  type TableOptions,
  type VisibilityState,
} from "@tanstack/react-table";

// ---------------------------------------------------------------------------
// Exported types
// ---------------------------------------------------------------------------

export type OnyxSortDirection = "none" | "ascending" | "descending";
export type OnyxSelectionState = "none" | "partial" | "all";

// ---------------------------------------------------------------------------
// Exported utility
// ---------------------------------------------------------------------------

/**
 * Convert a TanStack sort direction to an Onyx sort direction string.
 *
 * This is a **named export** (not on the return object) because it is used
 * statically inside JSX header loops, not tied to hook state.
 */
export function toOnyxSortDirection(
  dir: false | "asc" | "desc"
): OnyxSortDirection {
  if (dir === "asc") return "ascending";
  if (dir === "desc") return "descending";
  return "none";
}

// ---------------------------------------------------------------------------
// Global filter value (combines view-mode + text search)
// ---------------------------------------------------------------------------

interface GlobalFilterValue {
  selectedIds: Set<string> | null;
  searchTerm: string;
}

// ---------------------------------------------------------------------------
// Hook options & return types
// ---------------------------------------------------------------------------

/** Keys managed internally — callers cannot override these via `tableOptions`. */
type ManagedKeys =
  | "data"
  | "columns"
  | "state"
  | "onSortingChange"
  | "onRowSelectionChange"
  | "onColumnSizingChange"
  | "onColumnVisibilityChange"
  | "onPaginationChange"
  | "onGlobalFilterChange"
  | "getCoreRowModel"
  | "getSortedRowModel"
  | "getPaginationRowModel"
  | "getFilteredRowModel"
  | "globalFilterFn"
  | "columnResizeMode"
  | "enableRowSelection"
  | "enableColumnResizing"
  | "getRowId";

/**
 * Options accepted by {@link useDataTable}.
 *
 * Only `data` and `columns` are required — everything else has sensible defaults.
 */
interface UseDataTableOptions<TData extends RowData> {
  /** The row data array. */
  data: TData[];
  /** TanStack column definitions. */
  columns: ColumnDef<TData, any>[];
  /** Rows per page. Set `Infinity` to disable pagination. @default 10 */
  pageSize?: number;
  /** Whether rows can be selected. @default true */
  enableRowSelection?: boolean;
  /** Whether columns can be resized. @default true */
  enableColumnResizing?: boolean;
  /** Stable row identity function. TanStack tracks selection by ID instead of array index. */
  getRowId: TableOptions<TData>["getRowId"];
  /** Resize strategy. @default "onChange" */
  columnResizeMode?: ColumnResizeMode;
  /** Initial sorting state. @default [] */
  initialSorting?: SortingState;
  /** Initial column visibility state. @default {} */
  initialColumnVisibility?: VisibilityState;
  /** Initial row selection state. Keys are row IDs (from `getRowId`), values are `true`. @default {} */
  initialRowSelection?: RowSelectionState;
  /** When true AND `initialRowSelection` is non-empty, start in view-selected mode (filtered to selected rows). @default false */
  initialViewSelected?: boolean;
  /** Called whenever the set of selected row IDs changes. */
  onSelectionChange?: (selectedIds: string[]) => void;
  /** Search term for global text filtering. Rows are filtered to those containing
   *  the term in any accessor column value (case-insensitive). */
  searchTerm?: string;
  /** Server-side configuration. When provided, enables manual pagination/sorting/filtering. */
  serverSide?: {
    totalItems: number;
    onSortingChange: (sorting: SortingState) => void;
    onPaginationChange: (pageIndex: number, pageSize: number) => void;
    onSearchTermChange: (searchTerm: string) => void;
  };
  /** Escape-hatch: extra options spread into `useReactTable`. Managed keys are excluded. */
  tableOptions?: Partial<Omit<TableOptions<TData>, ManagedKeys>>;
}

/**
 * Values returned by {@link useDataTable}.
 */
interface UseDataTableReturn<TData extends RowData> {
  /** Full TanStack table instance for rendering. */
  table: Table<TData>;

  // Pagination (1-based, matching Onyx Footer)
  /** Current page number (1-based). */
  currentPage: number;
  /** Total number of pages. */
  totalPages: number;
  /** Total number of rows. */
  totalItems: number;
  /** Rows per page. */
  pageSize: number;
  /** Navigate to a page (1-based, clamped to valid range). */
  setPage: (page: number) => void;
  /** Whether pagination is active (pageSize is finite). */
  isPaginated: boolean;

  // Selection (pre-computed for Onyx Footer)
  /** Aggregate selection state for the current page. */
  selectionState: OnyxSelectionState;
  /** Number of selected rows. */
  selectedCount: number;
  /** Whether every row on the current page is selected. */
  isAllPageRowsSelected: boolean;
  /** IDs of currently selected rows (derived from `getRowId`). */
  selectedRowIds: string[];
  /** Deselect all rows. */
  clearSelection: () => void;
  /** Select or deselect all rows on the current page. */
  toggleAllPageRowsSelected: (selected: boolean) => void;
  /** Select or deselect all rows across all pages. */
  toggleAllRowsSelected: (selected: boolean) => void;
  /** Whether every row across all pages is selected. */
  isAllRowsSelected: boolean;

  // View-mode (filter to selected rows)
  /** Whether the table is currently filtered to show only selected rows. */
  isViewingSelected: boolean;
  /** Enter view mode — freeze the current selection as a filter. */
  enterViewMode: () => void;
  /** Exit view mode — remove the selection filter. */
  exitViewMode: () => void;
}

// ---------------------------------------------------------------------------
// Hook
// ---------------------------------------------------------------------------

/**
 * Wraps TanStack `useReactTable` with Onyx-specific defaults and derived
 * state so that consumers only need to provide `data` + `columns`.
 *
 * @example
 * ```tsx
 * const {
 *   table, currentPage, totalPages, setPage, pageSize,
 *   selectionState, selectedCount, clearSelection,
 * } = useDataTable({ data: rows, columns });
 * ```
 */
export default function useDataTable<TData extends RowData>(
  options: UseDataTableOptions<TData>
): UseDataTableReturn<TData> {
  const {
    data,
    columns,
    pageSize: pageSizeOption = 10,
    enableRowSelection = true,
    enableColumnResizing = true,
    columnResizeMode = "onChange",
    initialSorting = [],
    initialColumnVisibility = {},
    initialRowSelection = {},
    initialViewSelected = false,
    getRowId,
    onSelectionChange,
    searchTerm,
    serverSide,
    tableOptions,
  } = options;

  const isServerSide = !!serverSide;

  // ---- internal state -----------------------------------------------------
  const [sorting, setSorting] = useState<SortingState>(initialSorting);
  const [rowSelection, setRowSelection] =
    useState<RowSelectionState>(initialRowSelection);
  const [columnSizing, setColumnSizing] = useState<ColumnSizingState>({});
  const [columnVisibility, setColumnVisibility] = useState<VisibilityState>(
    initialColumnVisibility
  );
  const [pagination, setPagination] = useState<PaginationState>({
    pageIndex: 0,
    pageSize: pageSizeOption,
  });
  /** Combined global filter: view-mode (selected IDs) + text search. */
  const initialSelectedIds =
    initialViewSelected && Object.keys(initialRowSelection).length > 0
      ? new Set(Object.keys(initialRowSelection))
      : null;
  const [globalFilter, setGlobalFilter] = useState<GlobalFilterValue>({
    selectedIds: initialSelectedIds,
    searchTerm: "",
  });

  // ---- sync pageSize prop to internal state --------------------------------
  useEffect(() => {
    setPagination((prev) => ({
      ...prev,
      pageSize: pageSizeOption,
      pageIndex: 0,
    }));
  }, [pageSizeOption]);

  // ---- sync external searchTerm prop into combined filter state ------------
  // (client-side only — server-side uses separate callbacks instead)
  const preSearchPageRef = useRef<number>(0);

  useEffect(() => {
    if (isServerSide) return;
    const term = searchTerm ?? "";
    const wasSearching = !!globalFilter.searchTerm;

    if (!wasSearching && term) {
      // Entering search — save current page, reset to 0
      preSearchPageRef.current = pagination.pageIndex;
      setPagination((p) => ({ ...p, pageIndex: 0 }));
    } else if (wasSearching && !term) {
      // Clearing search — restore saved page
      setPagination((p) => ({ ...p, pageIndex: preSearchPageRef.current }));
    }

    setGlobalFilter((prev) => ({ ...prev, searchTerm: term }));
    // eslint-disable-next-line react-hooks/exhaustive-deps -- Intentionally
    // omits `globalFilter` and `pagination.pageIndex`: we only read snapshot
    // values to detect the search enter/clear transition, not to react to
    // every filter or page change.
  }, [searchTerm, isServerSide]);

  // ---- server-side: 3 separate callbacks -----------------------------------
  // Single ref for the whole serverSide config — prevents effects from
  // re-firing when the consumer passes an inline object each render.
  const serverSideRef = useRef(serverSide);
  serverSideRef.current = serverSide;

  useEffect(() => {
    if (!isServerSide) return;
    serverSideRef.current!.onSortingChange(sorting);
  }, [sorting, isServerSide]);

  useEffect(() => {
    if (!isServerSide) return;
    serverSideRef.current!.onPaginationChange(
      pagination.pageIndex,
      pagination.pageSize
    );
  }, [pagination.pageIndex, pagination.pageSize, isServerSide]);

  useEffect(() => {
    if (!isServerSide) return;
    setPagination((p) => ({ ...p, pageIndex: 0 }));
    serverSideRef.current!.onSearchTermChange(searchTerm ?? "");
  }, [searchTerm, isServerSide]);

  // ---- TanStack table instance --------------------------------------------
  const serverPageCount = isServerSide
    ? isFinite(pagination.pageSize) && pagination.pageSize > 0
      ? Math.ceil((serverSide!.totalItems || 0) / pagination.pageSize)
      : 1
    : undefined;

  const tableOpts: TableOptions<TData> = {
    data,
    columns,
    getRowId,
    state: {
      sorting,
      rowSelection,
      columnSizing,
      columnVisibility,
      pagination,
      ...(isServerSide ? {} : { globalFilter }),
    },
    onSortingChange: isServerSide
      ? (updater) => {
          setSorting(updater);
          setPagination((p) => ({ ...p, pageIndex: 0 }));
        }
      : setSorting,
    onRowSelectionChange: setRowSelection,
    onColumnSizingChange: setColumnSizing,
    onColumnVisibilityChange: setColumnVisibility,
    onPaginationChange: setPagination,
    getCoreRowModel: getCoreRowModel(),
    // We manage page resets explicitly (search enter/clear, view mode,
    // pageSize change) so disable TanStack's auto-reset which would
    // clobber our restored page index when the filter changes.
    autoResetPageIndex: false,
    columnResizeMode,
    enableRowSelection,
    enableColumnResizing,
    ...tableOptions,
  };

  if (isServerSide) {
    tableOpts.manualPagination = true;
    tableOpts.manualSorting = true;
    tableOpts.manualFiltering = true;
    tableOpts.pageCount = serverPageCount;
  } else {
    tableOpts.onGlobalFilterChange = setGlobalFilter;
    tableOpts.getSortedRowModel = getSortedRowModel();
    tableOpts.getPaginationRowModel = getPaginationRowModel();
    tableOpts.getFilteredRowModel = getFilteredRowModel();
    tableOpts.globalFilterFn = (
      row,
      _columnId,
      filterValue: GlobalFilterValue
    ) => {
      // View-mode filter (selected IDs)
      if (
        filterValue.selectedIds != null &&
        !filterValue.selectedIds.has(row.id)
      ) {
        return false;
      }
      // Text search filter
      if (filterValue.searchTerm) {
        const term = filterValue.searchTerm.toLowerCase();
        return row.getAllCells().some((cell) => {
          const value = cell.getValue();
          if (value == null) return false;
          return String(value).toLowerCase().includes(term);
        });
      }
      return true;
    };
  }

  const table = useReactTable(tableOpts);

  // ---- derived values -----------------------------------------------------
  const isAllPageRowsSelected = table.getIsAllPageRowsSelected();
  const isSomePageRowsSelected = table.getIsSomePageRowsSelected();

  const selectionState: OnyxSelectionState = isAllPageRowsSelected
    ? "all"
    : isSomePageRowsSelected
      ? "partial"
      : "none";

  const selectedRowIds = useMemo(
    () => Object.keys(rowSelection),
    [rowSelection]
  );
  const selectedCount = selectedRowIds.length;
  const totalPages = Math.max(1, table.getPageCount());
  const currentPage = pagination.pageIndex + 1;
  const hasActiveFilter =
    !isServerSide &&
    (globalFilter.selectedIds != null || !!globalFilter.searchTerm);
  const totalItems = isServerSide
    ? serverSide!.totalItems
    : hasActiveFilter
      ? table.getPrePaginationRowModel().rows.length
      : data.length;
  const isPaginated = isFinite(pagination.pageSize);

  // ---- keep view-mode filter in sync with selection ----------------------
  // When in view-selected mode, deselecting a row should remove it from
  // the visible set so it disappears immediately.
  useEffect(() => {
    if (isServerSide) return;
    if (globalFilter.selectedIds == null) return;

    const currentIds = new Set(Object.keys(rowSelection));
    // Remove any ID from the filter that is no longer selected
    let changed = false;
    const next = new Set<string>();
    globalFilter.selectedIds.forEach((id) => {
      if (currentIds.has(id)) {
        next.add(id);
      } else {
        changed = true;
      }
    });
    if (changed) {
      setGlobalFilter((prev) => ({ ...prev, selectedIds: next }));
    }
    // eslint-disable-next-line react-hooks/exhaustive-deps -- only react to
    // selection changes while in view mode
  }, [rowSelection, isServerSide]);

  // ---- selection change callback ------------------------------------------
  const isFirstRenderRef = useRef(true);
  const onSelectionChangeRef = useRef(onSelectionChange);
  onSelectionChangeRef.current = onSelectionChange;

  useEffect(() => {
    if (isFirstRenderRef.current) {
      isFirstRenderRef.current = false;
      // Still fire the callback on first render if there's an initial selection
      if (selectedRowIds.length > 0) {
        onSelectionChangeRef.current?.(selectedRowIds);
      }
      return;
    }
    onSelectionChangeRef.current?.(selectedRowIds);
  }, [selectedRowIds]);

  // ---- actions ------------------------------------------------------------
  const setPage = (page: number) => {
    const clamped = Math.max(1, Math.min(page, totalPages));
    setPagination((prev) => ({ ...prev, pageIndex: clamped - 1 }));
  };

  const clearSelection = () => {
    table.resetRowSelection();
  };

  const toggleAllPageRowsSelected = (selected: boolean) => {
    table.toggleAllPageRowsSelected(selected);
  };

  // TODO (@raunakab): In server-side mode, these only operate on the loaded
  // page data, not all rows across all pages. TanStack can't select rows it
  // doesn't have. Fixing this requires a server-side callback (e.g.
  // `onSelectAll`) and a `totalItems`-aware selection model.
  const toggleAllRowsSelected = (selected: boolean) => {
    table.toggleAllRowsSelected(selected);
  };

  const isAllRowsSelected = table.getIsAllRowsSelected();

  // ---- view mode (filter to selected rows) --------------------------------
  const isViewingSelected = globalFilter.selectedIds != null;

  const enterViewMode = () => {
    if (isServerSide) return;
    if (selectedRowIds.length > 0) {
      setGlobalFilter((prev) => ({
        ...prev,
        selectedIds: new Set(selectedRowIds),
      }));
      setPagination((prev) => ({ ...prev, pageIndex: 0 }));
    }
  };

  const exitViewMode = () => {
    if (isServerSide) return;
    setGlobalFilter((prev) => ({ ...prev, selectedIds: null }));
    setPagination((prev) => ({ ...prev, pageIndex: 0 }));
  };

  return {
    table,
    currentPage,
    totalPages,
    totalItems,
    pageSize: pagination.pageSize,
    setPage,
    isPaginated,
    selectionState,
    selectedCount,
    selectedRowIds,
    isAllPageRowsSelected,
    isAllRowsSelected,
    clearSelection,
    toggleAllPageRowsSelected,
    toggleAllRowsSelected,
    isViewingSelected,
    enterViewMode,
    exitViewMode,
  };
}


================================================
FILE: web/lib/opal/src/components/table/hooks/useDraggableRows.ts
================================================
"use client";

import { useState, useCallback, useMemo, useRef } from "react";
import {
  useSensors,
  useSensor,
  PointerSensor,
  KeyboardSensor,
  closestCenter,
  type DragStartEvent,
  type DragEndEvent,
} from "@dnd-kit/core";
import { arrayMove, sortableKeyboardCoordinates } from "@dnd-kit/sortable";
import { restrictToVerticalAxis } from "@dnd-kit/modifiers";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface UseDraggableRowsOptions<TData> {
  /** Current display-order data. */
  data: TData[];
  /** Extract a unique string ID from each row. */
  getRowId: (row: TData) => string;
  /** Whether DnD row reordering is active (e.g. set to `false` when column sorting is active). @default true */
  enabled?: boolean;
  /** Called after a successful reorder with the new ID order and a map of changed positions. */
  onReorder?: (
    ids: string[],
    changedOrders: Record<string, number>
  ) => void | Promise<void>;
}

interface DraggableRowsReturn {
  /** Props to pass to TableBody's `dndSortable` prop. */
  dndContextProps: {
    sensors: ReturnType<typeof useSensors>;
    collisionDetection: typeof closestCenter;
    modifiers: Array<typeof restrictToVerticalAxis>;
    onDragStart: (event: DragStartEvent) => void;
    onDragEnd: (event: DragEndEvent) => void;
    onDragCancel: () => void;
  };
  /** Ordered list of IDs for SortableContext. */
  sortableItems: string[];
  /** ID of the currently dragged row, or null. */
  activeId: string | null;
  /** Whether a drag is in progress. */
  isDragging: boolean;
  /** Whether DnD is enabled. */
  isEnabled: boolean;
  /** Ref that is `true` briefly after a drag ends, used to suppress the trailing click. */
  wasDraggingRef: React.RefObject<boolean>;
}

// ---------------------------------------------------------------------------
// Hook
// ---------------------------------------------------------------------------

/**
 * Manages drag-and-drop row reordering using @dnd-kit, providing sensor
 * configuration, sortable item IDs, drag state, and a reorder callback
 * that reports only the changed positions.
 *
 * @example
 * ```tsx
 * const { dndContextProps, sortableItems, activeId } = useDraggableRows({
 *   data: rows,
 *   getRowId: (row) => row.id,
 *   onReorder: (ids, changed) => saveNewOrder(changed),
 * });
 * ```
 */
export default function useDraggableRows<TData>(
  options: UseDraggableRowsOptions<TData>
): DraggableRowsReturn {
  const { data, getRowId, enabled = true, onReorder } = options;

  const [activeId, setActiveId] = useState<string | null>(null);
  const wasDraggingRef = useRef(false);

  const sensors = useSensors(
    useSensor(PointerSensor, {
      activationConstraint: { distance: 5 },
    }),
    useSensor(KeyboardSensor, {
      coordinateGetter: sortableKeyboardCoordinates,
    })
  );

  const sortableItems = useMemo(
    () => data.map((row) => getRowId(row)),
    [data, getRowId]
  );

  const sortableIndexMap = useMemo(() => {
    const map = new Map<string, number>();
    for (let i = 0; i < sortableItems.length; i++) {
      const item = sortableItems[i];
      if (item !== undefined) {
        map.set(item, i);
      }
    }
    return map;
  }, [sortableItems]);

  const handleDragStart = useCallback((event: DragStartEvent) => {
    setActiveId(String(event.active.id));
  }, []);

  const handleDragEnd = useCallback(
    (event: DragEndEvent) => {
      setActiveId(null);
      // Suppress the trailing click event that the browser fires after pointerup.
      wasDraggingRef.current = true;
      requestAnimationFrame(() => {
        wasDraggingRef.current = false;
      });
      if (event.activatorEvent instanceof PointerEvent) {
        (document.activeElement as HTMLElement)?.blur();
      }
      const { active, over } = event;
      if (!over || active.id === over.id) return;

      const oldIndex = sortableIndexMap.get(String(active.id));
      const newIndex = sortableIndexMap.get(String(over.id));
      if (oldIndex === undefined || newIndex === undefined) return;

      const reordered = arrayMove(sortableItems, oldIndex, newIndex);

      const minIdx = Math.min(oldIndex, newIndex);
      const maxIdx = Math.max(oldIndex, newIndex);
      const changedOrders: Record<string, number> = {};
      for (let i = minIdx; i <= maxIdx; i++) {
        const id = reordered[i];
        if (id !== undefined) {
          changedOrders[id] = i;
        }
      }

      onReorder?.(reordered, changedOrders);
    },
    [sortableItems, sortableIndexMap, onReorder]
  );

  const handleDragCancel = useCallback(() => {
    setActiveId(null);
  }, []);

  return {
    dndContextProps: {
      sensors,
      collisionDetection: closestCenter,
      modifiers: [restrictToVerticalAxis],
      onDragStart: handleDragStart,
      onDragEnd: handleDragEnd,
      onDragCancel: handleDragCancel,
    },
    sortableItems,
    activeId,
    isDragging: activeId !== null,
    isEnabled: enabled,
    wasDraggingRef,
  };
}


================================================
FILE: web/lib/opal/src/components/table/styles.css
================================================
/* Imports shared timing tokens (--interactive-duration, --interactive-easing) */
@import "@opal/core/interactive/shared.css";

/* ---------------------------------------------------------------------------
 * Table primitives — data-attribute driven styling
 * Follows the same pattern as card.css / line-item.css.
 * ------------------------------------------------------------------------- */

/* ---- TableCell ---- */

.tbl-cell[data-size="lg"] {
  @apply px-1 py-0.5;
}
.tbl-cell[data-size="md"] {
  @apply pl-0.5 pr-1.5 py-1.5;
}

.tbl-cell-inner[data-size="lg"] {
  @apply h-10 px-1;
}
.tbl-cell-inner[data-size="md"] {
  @apply h-6 px-0.5;
}

/* ---- TableHead ---- */

.table-head {
  @apply relative;
}
.table-head[data-size="lg"] {
  @apply px-2 py-1;
}
.table-head[data-size="md"] {
  @apply px-2 py-1;
}
.table-head[data-bottom-border] {
  @apply border-b border-transparent hover:border-border-03;
}

/* Inner text wrapper */
.table-head[data-size="lg"] .table-head-label {
  @apply py-2 px-0.5;
}
.table-head[data-size="md"] .table-head-label {
  @apply py-1;
}

/* Sort button wrapper */
.table-head[data-size="lg"] .table-head-sort {
  @apply py-1.5;
}

/* ---- TableRow (base) ---- */

.tbl-row > td {
  @apply bg-background-tint-00;
  transition: background-color var(--interactive-duration)
    var(--interactive-easing);
}

.tbl-row[data-selected] > td {
  @apply bg-[var(--action-link-01)];
}

.tbl-row[data-disabled] {
  @apply pointer-events-none;
}

/* Suppress default focus ring on rows — the row bg is the indicator */
.tbl-row:focus,
.tbl-row:focus-visible {
  outline: none;
}

/* ---- variant="rows" — traditional borders, no gaps ---- */

table[data-variant="rows"] .tbl-row > td {
  @apply border-b border-border-01;
}

/* Hover/focus only for selectable tables */
table[data-variant="rows"][data-selection="single-select"] .tbl-row,
table[data-variant="rows"][data-selection="multi-select"] .tbl-row {
  @apply cursor-pointer;
}
table[data-variant="rows"][data-selection="single-select"] .tbl-row:hover > td,
table[data-variant="rows"][data-selection="multi-select"] .tbl-row:hover > td {
  @apply bg-background-tint-02;
}
table[data-variant="rows"] .tbl-row:focus-visible > td,
table[data-variant="rows"] .tbl-row:has(:focus-visible) > td {
  @apply bg-action-link-01;
}

/* ---- variant="cards" — rounded cards with gap ---- */

table[data-variant="cards"] .tbl-row > td {
  @apply bg-clip-padding border-y-[2px] border-x-0 border-transparent;
}
table[data-variant="cards"] .tbl-row > td:first-child {
  @apply rounded-l-12;
}
table[data-variant="cards"] .tbl-row > td:last-child {
  @apply rounded-r-12;
}

/* When a drag handle is present the second-to-last td gets the rounding */
table[data-variant="cards"] .tbl-row[data-drag-handle] > td:nth-last-child(2) {
  @apply rounded-r-12;
}
table[data-variant="cards"] .tbl-row[data-drag-handle] > td:last-child {
  border-radius: 0;
}

/* Hover/focus only for selectable tables */
table[data-variant="cards"][data-selection="single-select"] .tbl-row,
table[data-variant="cards"][data-selection="multi-select"] .tbl-row {
  @apply cursor-pointer;
}
table[data-variant="cards"][data-selection="single-select"] .tbl-row:hover > td,
table[data-variant="cards"][data-selection="multi-select"] .tbl-row:hover > td {
  @apply bg-background-tint-02;
}
table[data-variant="cards"] .tbl-row:focus-visible > td,
table[data-variant="cards"] .tbl-row:has(:focus-visible) > td {
  @apply bg-action-link-01;
}

/* ---- QualifierContainer ---- */

.tbl-qualifier[data-type="head"] {
  @apply w-px whitespace-nowrap py-1;
}
.tbl-qualifier[data-type="head"][data-size="md"] {
  @apply py-0.5;
}

.tbl-qualifier[data-type="cell"] {
  @apply w-px whitespace-nowrap py-1;
}
.tbl-qualifier[data-type="cell"][data-size="md"] {
  @apply py-0.5;
}

/* ---- ActionsContainer ---- */

.tbl-actions {
  @apply w-px whitespace-nowrap px-1;
}
.tbl-actions[data-type="head"] {
  @apply px-2 py-1;
}

/* ---- Footer ---- */

.table-footer[data-size="lg"] {
  @apply min-h-[2.75rem];
}
.table-footer[data-size="md"] {
  @apply min-h-[2.25rem];
}


================================================
FILE: web/lib/opal/src/components/table/types.ts
================================================
import type { ReactNode } from "react";
import type {
  ColumnDef,
  SortingState,
  VisibilityState,
} from "@tanstack/react-table";
import type { TableSize } from "@opal/components/table/TableSizeContext";
import type { TableVariant } from "@opal/components/table/TableElement";
import type { IconFunctionComponent } from "@opal/types";
import type { SortDirection } from "@opal/components/table/TableHead";

// ---------------------------------------------------------------------------
// Column width (mirrors useColumnWidths types)
// ---------------------------------------------------------------------------

/** Width config for a data column (participates in proportional distribution). */
export interface DataColumnWidth {
  weight: number;
  minWidth?: number;
}

/** Width config for a fixed column (exact pixels, no proportional distribution). */
export interface FixedColumnWidth {
  fixed: number;
}

export type ColumnWidth = DataColumnWidth | FixedColumnWidth;

// ---------------------------------------------------------------------------
// Column kind discriminant
// ---------------------------------------------------------------------------

export type QualifierContentType = "simple" | "icon" | "image";

export type OnyxColumnKind = "qualifier" | "data" | "display" | "actions";

// ---------------------------------------------------------------------------
// Column definitions (discriminated union on `kind`)
// ---------------------------------------------------------------------------

interface OnyxColumnBase<TData> {
  kind: OnyxColumnKind;
  /** Stable column identifier (mirrors the TanStack column ID). */
  id: string;
  def: ColumnDef<TData, any>;
  width: ColumnWidth | ((size: TableSize) => ColumnWidth);
}

/** Qualifier column — leading avatar/icon/checkbox column. */
export interface OnyxQualifierColumn<TData> extends OnyxColumnBase<TData> {
  kind: "qualifier";
  /** Content type for body-row `<TableQualifier>`. */
  content: QualifierContentType;
  /** Return the icon component to render for a row (for "icon" content). */
  getContent?: (row: TData) => IconFunctionComponent;
  /** Return the image URL to render for a row (for "image" content). */
  getImageSrc?: (row: TData) => string;
  /** Return the image alt text for a row (for "image" content). @default "" */
  getImageAlt?: (row: TData) => string;
  /** Show a tinted background container behind the content. @default false */
  background?: boolean;
  /** Icon size preset. Use `"lg"` for avatars, `"md"` for regular icons. @default "md" */
  iconSize?: "lg" | "md";
}

/** Data column — accessor-based column with sorting/resizing. */
export interface OnyxDataColumn<TData> extends OnyxColumnBase<TData> {
  kind: "data";
  /** Override the sort icon for this column. */
  icon?: (sorted: SortDirection) => IconFunctionComponent;
}

/** Display column — non-accessor column with custom rendering. */
export interface OnyxDisplayColumn<TData> extends OnyxColumnBase<TData> {
  kind: "display";
}

/** Actions column — fixed column with visibility/sorting popovers. */
export interface OnyxActionsColumn<TData> extends OnyxColumnBase<TData> {
  kind: "actions";
  /** Show column visibility popover. @default true */
  showColumnVisibility?: boolean;
  /** Show sorting popover. @default true */
  showSorting?: boolean;
  /** Footer text for the sorting popover. */
  sortingFooterText?: string;
}

/** Discriminated union of all column types. */
export type OnyxColumnDef<TData> =
  | OnyxQualifierColumn<TData>
  | OnyxDataColumn<TData>
  | OnyxDisplayColumn<TData>
  | OnyxActionsColumn<TData>;

// ---------------------------------------------------------------------------
// Server-side pagination / sorting / search
// ---------------------------------------------------------------------------

/** Server-side configuration for DataTable. */
export interface ServerSideConfig {
  /** Total row count from the server. Used to compute page count. */
  totalItems: number;
  /** Whether data is currently being fetched. Shows loading state. */
  isLoading?: boolean;
  /** Fired when sorting state changes. */
  onSortingChange: (sorting: SortingState) => void;
  /** Fired when pagination changes (including page resets from sort/search). */
  onPaginationChange: (pageIndex: number, pageSize: number) => void;
  /** Fired when searchTerm changes. */
  onSearchTermChange: (searchTerm: string) => void;
}

// ---------------------------------------------------------------------------
// DataTable props
// ---------------------------------------------------------------------------

export interface DataTableDraggableConfig {
  /** Called after a successful reorder with the new ID order and changed positions. */
  onReorder: (
    ids: string[],
    changedOrders: Record<string, number>
  ) => void | Promise<void>;
}

/** Footer configuration. Mode is derived from `selectionBehavior` automatically. */
export interface DataTableFooterConfig {
  /** Handler for the "Clear" button (multi-select only). When omitted, the default clearSelection is used. */
  onClear?: () => void;
  /** Unit label for count pagination, e.g. "users", "documents" (multi-select only). */
  units?: string;
  /** Optional extra element rendered after the summary text, e.g. a download icon (summary mode only). */
  leftExtra?: ReactNode;
}

export interface DataTableProps<TData> {
  /** Row data array. */
  data: TData[];
  /** Column definitions created via `createTableColumns()`. */
  columns: OnyxColumnDef<TData>[];
  /** Extract a unique string ID from each row. Used for stable row identity. */
  getRowId: (row: TData) => string;
  /** Rows per page. Set `Infinity` to disable pagination. @default 10 */
  pageSize?: number;
  /** Initial sorting state. */
  initialSorting?: SortingState;
  /** Initial column visibility state. */
  initialColumnVisibility?: VisibilityState;
  /** Initial row selection state. Keys are row IDs (from `getRowId`), values are `true`. */
  initialRowSelection?: Record<string, boolean>;
  /** When true AND `initialRowSelection` is non-empty, start in view-selected mode. @default false */
  initialViewSelected?: boolean;
  /** Enable drag-and-drop row reordering. */
  draggable?: DataTableDraggableConfig;
  /** Footer configuration. */
  footer?: DataTableFooterConfig;
  /** Table size variant. @default "lg" */
  size?: TableSize;
  /** Visual row variant. @default "cards" */
  variant?: TableVariant;
  /** Called whenever the set of selected row IDs changes. Receives IDs produced by `getRowId`. */
  onSelectionChange?: (selectedIds: string[]) => void;
  /** Called when a row is clicked (replaces the default selection toggle). */
  onRowClick?: (row: TData) => void;
  /** Search term for global text filtering. When provided, rows are filtered
   *  to those containing the term in any accessor column value (case-insensitive). */
  searchTerm?: string;
  /**
   * Max height of the scrollable table area. When set, the table body scrolls
   * vertically while the header stays pinned at the top.
   * Accepts a pixel number (e.g. `300`) or a CSS value string (e.g. `"50vh"`).
   */
  height?: number | string;
  /**
   * Enable server-side mode. When provided:
   * - TanStack uses manualPagination/manualSorting/manualFiltering
   * - `data` should contain only the current page's rows
   * - Dragging is automatically disabled
   * - Fires separate callbacks for sorting, pagination, and search changes
   */
  serverSide?: ServerSideConfig;
  /** Content to render inside the table body when there are no rows. */
  emptyState?: React.ReactNode;
}


================================================
FILE: web/lib/opal/src/components/tag/README.md
================================================
# Tag

**Import:** `import { Tag, type TagProps } from "@opal/components";`

A small colored label used to annotate items with status, category, or metadata. Fixed at 1rem height, uses `font-figure-small-value`.

## Props

| Prop | Type | Default | Description |
|---|---|---|---|
| `title` | `string` | **(required)** | Tag label text |
| `color` | `TagColor` | `"gray"` | Color variant |
| `icon` | `IconFunctionComponent` | — | Optional icon before the title |

### `TagColor`

`"green" | "blue" | "purple" | "amber" | "gray"`

| Color | Background | Text |
|---|---|---|
| `green` | `theme-green-01` | `theme-green-05` |
| `blue` | `theme-blue-01` | `theme-blue-05` |
| `purple` | `theme-purple-01` | `theme-purple-05` |
| `amber` | `theme-amber-01` | `theme-amber-05` |
| `gray` | `background-tint-02` | `text-03` |

## Usage Examples

```tsx
import { Tag } from "@opal/components";
import SvgStar from "@opal/icons/star";

// Basic
<Tag title="New" color="green" />

// With icon
<Tag icon={SvgStar} title="Featured" color="purple" />

// Default gray
<Tag title="Draft" />
```

## Usage inside Content

Tag can be rendered as an accessory inside `Content`'s ContentMd via the `tag` prop:

```tsx
import { Content } from "@opal/layouts";
import SvgSearch from "@opal/icons/search";

<Content
  icon={SvgSearch}
  sizePreset="main-ui"
  title="My Item"
  tag={{ title: "New", color: "green" }}
/>
```


================================================
FILE: web/lib/opal/src/components/tag/Tag.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { Tag } from "@opal/components";
import { SvgAlertCircle } from "@opal/icons";

const TAG_COLORS = ["green", "purple", "blue", "gray", "amber"] as const;

const meta: Meta<typeof Tag> = {
  title: "opal/components/Tag",
  component: Tag,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Tag>;

export const Default: Story = {
  args: {
    title: "Label",
  },
};

export const AllColors: Story = {
  render: () => (
    <div className="flex items-center gap-2">
      {TAG_COLORS.map((color) => (
        <Tag key={color} title={color} color={color} />
      ))}
    </div>
  ),
};

export const WithIcon: Story = {
  args: {
    title: "Alert",
    icon: SvgAlertCircle,
  },
};

export const AllColorsWithIcon: Story = {
  render: () => (
    <div className="flex items-center gap-2">
      {TAG_COLORS.map((color) => (
        <Tag key={color} title={color} color={color} icon={SvgAlertCircle} />
      ))}
    </div>
  ),
};


================================================
FILE: web/lib/opal/src/components/tag/components.tsx
================================================
import "@opal/components/tag/styles.css";
import type { IconFunctionComponent, RichStr } from "@opal/types";
import { Text } from "@opal/components";
import { cn } from "@opal/utils";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type TagColor = "green" | "purple" | "blue" | "gray" | "amber";

type TagSize = "sm" | "md";

interface TagProps {
  /** Optional icon component. */
  icon?: IconFunctionComponent;

  /** Tag label text. */
  title: string | RichStr;

  /** Color variant. Default: `"gray"`. */
  color?: TagColor;

  /** Size variant. Default: `"sm"`. */
  size?: TagSize;
}

// ---------------------------------------------------------------------------
// Color config
// ---------------------------------------------------------------------------

const COLOR_CONFIG: Record<TagColor, { bg: string; text: string }> = {
  green: { bg: "bg-theme-green-01", text: "text-theme-green-05" },
  blue: { bg: "bg-theme-blue-01", text: "text-theme-blue-05" },
  purple: { bg: "bg-theme-purple-01", text: "text-theme-purple-05" },
  amber: { bg: "bg-theme-amber-01", text: "text-theme-amber-05" },
  gray: { bg: "bg-background-tint-02", text: "text-text-03" },
};

// ---------------------------------------------------------------------------
// Tag
// ---------------------------------------------------------------------------

function Tag({ icon: Icon, title, color = "gray", size = "sm" }: TagProps) {
  const config = COLOR_CONFIG[color];

  return (
    <div
      className={cn("opal-auxiliary-tag", config.bg, config.text)}
      data-size={size}
    >
      {Icon && (
        <div className="opal-auxiliary-tag-icon-container">
          <Icon className={cn("opal-auxiliary-tag-icon", config.text)} />
        </div>
      )}
      <Text
        font={size === "md" ? "secondary-body" : "figure-small-value"}
        color="inherit"
        nowrap
      >
        {title}
      </Text>
    </div>
  );
}

export { Tag, type TagProps, type TagColor, type TagSize };


================================================
FILE: web/lib/opal/src/components/tag/styles.css
================================================
/* ---------------------------------------------------------------------------
   AuxiliaryTag

   Fixed height of 1rem (16px). Icon is 0.75rem (12px) with p-0.5 (2px)
   padding to match the font-figure-small-value line-height (12px).
   --------------------------------------------------------------------------- */

.opal-auxiliary-tag {
  @apply flex flex-row items-center shrink-0;
  height: 1rem;
  border-radius: 0.25rem;
  padding: 0 0.25rem;
  gap: 0;
}

.opal-auxiliary-tag[data-size="md"] {
  height: 1.375rem;
  padding: 0 0.375rem;
  border-radius: 0.375rem;
}

.opal-auxiliary-tag-icon-container {
  display: flex;
  align-items: center;
  justify-content: center;
  padding: 1px;
}

.opal-auxiliary-tag-icon {
  width: 10px;
  height: 10px;
}

.opal-auxiliary-tag-title {
  white-space: nowrap;
}


================================================
FILE: web/lib/opal/src/components/text/InlineMarkdown.tsx
================================================
import type { ReactNode } from "react";
import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";

import type { RichStr } from "@opal/types";

// ---------------------------------------------------------------------------
// InlineMarkdown
// ---------------------------------------------------------------------------

const SAFE_PROTOCOL = /^https?:|^mailto:|^tel:/i;

const ALLOWED_ELEMENTS = ["p", "br", "a", "strong", "em", "code", "del"];

const INLINE_COMPONENTS = {
  p: ({ children }: { children?: ReactNode }) => (
    <span className="block">{children}</span>
  ),
  a: ({ children, href }: { children?: ReactNode; href?: string }) => {
    if (!href || !SAFE_PROTOCOL.test(href)) {
      return <>{children}</>;
    }
    const isHttp = /^https?:/i.test(href);
    return (
      <a
        href={href}
        className="underline underline-offset-2"
        {...(isHttp ? { target: "_blank", rel: "noopener noreferrer" } : {})}
      >
        {children}
      </a>
    );
  },
  code: ({ children }: { children?: ReactNode }) => (
    <code className="[font-family:var(--font-dm-mono)] bg-background-tint-02 rounded px-1 py-0.5">
      {children}
    </code>
  ),
};

interface InlineMarkdownProps {
  content: string;
}

export default function InlineMarkdown({ content }: InlineMarkdownProps) {
  // Convert \n to CommonMark hard line breaks (two trailing spaces + newline).
  // react-markdown renders these as <br />, which inherits the parent's
  // line-height for font-appropriate spacing.
  const normalized = content.replace(/\n/g, "  \n");

  return (
    <ReactMarkdown
      components={INLINE_COMPONENTS}
      allowedElements={ALLOWED_ELEMENTS}
      unwrapDisallowed
      remarkPlugins={[remarkGfm]}
    >
      {normalized}
    </ReactMarkdown>
  );
}

// ---------------------------------------------------------------------------
// RichStr helpers
// ---------------------------------------------------------------------------

function isRichStr(value: unknown): value is RichStr {
  return (
    typeof value === "object" &&
    value !== null &&
    (value as RichStr).__brand === "RichStr"
  );
}

/** Resolves `string | RichStr` to a `ReactNode`. */
export function resolveStr(value: string | RichStr): ReactNode {
  return isRichStr(value) ? <InlineMarkdown content={value.raw} /> : value;
}

/** Extracts the plain string from `string | RichStr`. */
export function toPlainString(value: string | RichStr): string {
  return isRichStr(value) ? value.raw : value;
}


================================================
FILE: web/lib/opal/src/components/text/README.md
================================================
# Text

**Import:** `import { Text, type TextProps, type TextFont, type TextColor } from "@opal/components";`

A styled text component with string-enum props for font preset and color selection. Supports
inline markdown rendering via `RichStr` — pass `markdown("*bold* text")` as children to enable.

## Props

| Prop | Type | Default | Description |
|---|---|---|---|
| `font` | `TextFont` | `"main-ui-body"` | Font preset (size, weight, line-height) |
| `color` | `TextColor` | `"text-04"` | Text color |
| `as` | `"p" \| "span" \| "li" \| "h1" \| "h2" \| "h3"` | `"span"` | HTML tag to render |
| `nowrap` | `boolean` | `false` | Prevent text wrapping |
| `children` | `string \| RichStr` | — | Plain string or `markdown()` for inline markdown |

### `TextFont`

| Value | Size | Weight | Line-height |
|---|---|---|---|
| `"heading-h1"` | 48px | 600 | 64px |
| `"heading-h2"` | 24px | 600 | 36px |
| `"heading-h3"` | 18px | 600 | 28px |
| `"heading-h3-muted"` | 18px | 500 | 28px |
| `"main-content-body"` | 16px | 450 | 24px |
| `"main-content-muted"` | 16px | 400 | 24px |
| `"main-content-emphasis"` | 16px | 700 | 24px |
| `"main-content-mono"` | 16px | 400 | 23px |
| `"main-ui-body"` | 14px | 500 | 20px |
| `"main-ui-muted"` | 14px | 400 | 20px |
| `"main-ui-action"` | 14px | 600 | 20px |
| `"main-ui-mono"` | 14px | 400 | 20px |
| `"secondary-body"` | 12px | 400 | 18px |
| `"secondary-action"` | 12px | 600 | 18px |
| `"secondary-mono"` | 12px | 400 | 18px |
| `"figure-small-label"` | 10px | 600 | 14px |
| `"figure-small-value"` | 10px | 400 | 14px |
| `"figure-keystroke"` | 11px | 400 | 16px |

### `TextColor`

`"text-01" | "text-02" | "text-03" | "text-04" | "text-05" | "text-inverted-01" | "text-inverted-02" | "text-inverted-03" | "text-inverted-04" | "text-inverted-05" | "text-light-03" | "text-light-05" | "text-dark-03" | "text-dark-05"`

## Usage Examples

```tsx
import { Text } from "@opal/components";

// Basic
<Text font="main-ui-body" color="text-03">
  Hello world
</Text>

// Heading
<Text font="heading-h2" color="text-05" as="h2">
  Page Title
</Text>

// Inverted (for dark backgrounds)
<Text font="main-ui-body" color="text-inverted-05">
  Light text on dark
</Text>

// As paragraph
<Text font="main-content-body" color="text-03" as="p">
  A full paragraph of text.
</Text>
```

## Inline Markdown via `RichStr`

Inline markdown is opt-in via the `markdown()` function, which returns a `RichStr`. When `Text`
receives a `RichStr` as children, it parses the inner string as inline markdown. Plain strings
are rendered as-is — no parsing, no surprises. `Text` does not accept arbitrary JSX as children;
use `string | RichStr` only.

```tsx
import { Text } from "@opal/components";
import { markdown } from "@opal/utils";

// Inline markdown — bold, italic, links, code, strikethrough
<Text font="main-ui-body" color="text-05">
  {markdown("*Hello*, **world**! Visit [Onyx](https://onyx.app) and run `onyx start`.")}
</Text>

// Plain string — no markdown parsing
<Text font="main-ui-body" color="text-03">
  This *stays* as-is, no formatting applied.
</Text>
```

Supported syntax: `**bold**`, `*italic*`, `` `code` ``, `[link](url)`, `~~strikethrough~~`, `\n` (newline → `<br />`).

Markdown rendering uses `react-markdown` internally, restricted to inline elements only.
`http(s)` links open in a new tab; `mailto:` and `tel:` links open natively. Inline code
inherits the parent font size and switches to the monospace family.

Newlines (`\n`) are converted to `<br />` elements that inherit the parent's line-height,
so line spacing is proportional to the font size. For full block-level markdown (code blocks,
headings, lists), use `MinimalMarkdown` instead.

### Using `RichStr` in component props

Components that want to support optional markdown in their text props should accept
`string | RichStr`:

```tsx
import type { RichStr } from "@opal/types";

interface MyComponentProps {
  title: string | RichStr;
  description?: string | RichStr;
}
```

This avoids API coloring — no `markdown` boolean needs to be threaded through intermediate
components. The decision to use markdown lives at the call site.

## Compatibility

`@/refresh-components/texts/Text` is an independent legacy component that implements the same
font/color presets via a boolean-flag API. It is **not** a wrapper around this component. New
code should import directly from `@opal/components`.


================================================
FILE: web/lib/opal/src/components/text/Text.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { Text } from "@opal/components";
import type { TextFont, TextColor } from "@opal/components";
import { markdown } from "@opal/utils";

const meta: Meta<typeof Text> = {
  title: "opal/components/Text",
  component: Text,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Text>;

// ---------------------------------------------------------------------------
// Basic
// ---------------------------------------------------------------------------

export const Default: Story = {
  args: {
    children: "The quick brown fox jumps over the lazy dog",
  },
};

export const AsHeading: Story = {
  args: {
    font: "heading-h2",
    color: "text-05",
    as: "h2",
    children: "Page Title",
  },
};

export const AsParagraph: Story = {
  args: {
    font: "main-content-body",
    color: "text-03",
    as: "p",
    children: "A full paragraph of body text rendered as a p element.",
  },
};

export const Nowrap: Story = {
  render: () => (
    <div className="w-48 border border-border-02 rounded p-2">
      <Text font="main-ui-body" color="text-05" nowrap>
        This text will not wrap even though the container is narrow
      </Text>
    </div>
  ),
};

// ---------------------------------------------------------------------------
// Fonts
// ---------------------------------------------------------------------------

const ALL_FONTS: TextFont[] = [
  "heading-h1",
  "heading-h2",
  "heading-h3",
  "heading-h3-muted",
  "main-content-body",
  "main-content-muted",
  "main-content-emphasis",
  "main-content-mono",
  "main-ui-body",
  "main-ui-muted",
  "main-ui-action",
  "main-ui-mono",
  "secondary-body",
  "secondary-action",
  "secondary-mono",
  "figure-small-label",
  "figure-small-value",
  "figure-keystroke",
];

export const AllFonts: Story = {
  render: () => (
    <div className="space-y-2">
      {ALL_FONTS.map((font) => (
        <div key={font} className="flex items-baseline gap-4">
          <span className="w-56 shrink-0 font-secondary-body text-text-03">
            {font}
          </span>
          <Text font={font} color="text-05">
            The quick brown fox
          </Text>
        </div>
      ))}
    </div>
  ),
};

// ---------------------------------------------------------------------------
// Colors
// ---------------------------------------------------------------------------

const STANDARD_COLORS: TextColor[] = [
  "text-01",
  "text-02",
  "text-03",
  "text-04",
  "text-05",
];

const INVERTED_COLORS: TextColor[] = [
  "text-inverted-01",
  "text-inverted-02",
  "text-inverted-03",
  "text-inverted-04",
  "text-inverted-05",
];

export const AllColors: Story = {
  render: () => (
    <div className="space-y-2">
      {STANDARD_COLORS.map((color) => (
        <div key={color} className="flex items-baseline gap-4">
          <span className="w-56 shrink-0 font-secondary-body text-text-03">
            {color}
          </span>
          <Text font="main-ui-body" color={color}>
            The quick brown fox
          </Text>
        </div>
      ))}
    </div>
  ),
};

export const InvertedColors: Story = {
  render: () => (
    <div className="bg-background-inverted-01 rounded-lg p-6 space-y-2">
      {INVERTED_COLORS.map((color) => (
        <div key={color} className="flex items-baseline gap-4">
          <span
            className="w-56 shrink-0 font-secondary-body"
            style={{ color: "rgba(255,255,255,0.5)" }}
          >
            {color}
          </span>
          <Text font="main-ui-body" color={color}>
            The quick brown fox
          </Text>
        </div>
      ))}
    </div>
  ),
};

// ---------------------------------------------------------------------------
// Markdown via RichStr
// ---------------------------------------------------------------------------

export const MarkdownBold: Story = {
  args: {
    font: "main-ui-body",
    color: "text-05",
    children: markdown("This is **bold** text"),
  },
};

export const MarkdownItalic: Story = {
  args: {
    font: "main-ui-body",
    color: "text-05",
    children: markdown("This is *italic* text"),
  },
};

export const MarkdownCode: Story = {
  args: {
    font: "main-ui-body",
    color: "text-05",
    children: markdown("Run `npm install` to get started"),
  },
};

export const MarkdownLink: Story = {
  args: {
    font: "main-ui-body",
    color: "text-05",
    children: markdown("Visit [Onyx](https://www.onyx.app/) for more info"),
  },
};

export const MarkdownStrikethrough: Story = {
  args: {
    font: "main-ui-body",
    color: "text-05",
    children: markdown("This is ~~deleted~~ text"),
  },
};

export const MarkdownCombined: Story = {
  args: {
    font: "main-ui-body",
    color: "text-05",
    children: markdown(
      "*Hello*, **world**! Check out [Onyx](https://www.onyx.app/) and run `onyx start` to begin."
    ),
  },
};

export const MarkdownAtDifferentSizes: Story = {
  render: () => (
    <div className="space-y-3">
      <Text font="heading-h2" color="text-05" as="h2">
        {markdown("**Heading** with *emphasis* and `code`")}
      </Text>
      <Text font="main-content-body" color="text-03" as="p">
        {markdown("**Main content** with *emphasis* and `code`")}
      </Text>
      <Text font="secondary-body" color="text-03">
        {markdown("**Secondary** with *emphasis* and `code`")}
      </Text>
    </div>
  ),
};

export const PlainStringNotParsed: Story = {
  render: () => (
    <div className="space-y-2">
      <Text font="main-ui-body" color="text-05">
        {
          "This has *asterisks* and **double asterisks** but they are NOT parsed."
        }
      </Text>
    </div>
  ),
};

// ---------------------------------------------------------------------------
// Tag Variants
// ---------------------------------------------------------------------------

export const TagVariants: Story = {
  render: () => (
    <div className="space-y-2">
      <Text font="main-ui-body" color="text-05">
        Default (span): inline text
      </Text>
      <Text font="main-ui-body" color="text-05" as="p">
        Paragraph (p): block text
      </Text>
      <Text font="heading-h2" color="text-05" as="h2">
        Heading (h2): semantic heading
      </Text>
      <ul className="list-disc pl-6">
        <Text font="main-ui-body" color="text-05" as="li">
          List item (li): inside a list
        </Text>
      </ul>
    </div>
  ),
};


================================================
FILE: web/lib/opal/src/components/text/components.tsx
================================================
import type { HTMLAttributes } from "react";

import type { RichStr, WithoutStyles } from "@opal/types";
import { cn } from "@opal/utils";
import { resolveStr } from "@opal/components/text/InlineMarkdown";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type TextFont =
  | "heading-h1"
  | "heading-h2"
  | "heading-h3"
  | "heading-h3-muted"
  | "main-content-body"
  | "main-content-muted"
  | "main-content-emphasis"
  | "main-content-mono"
  | "main-ui-body"
  | "main-ui-muted"
  | "main-ui-action"
  | "main-ui-mono"
  | "secondary-body"
  | "secondary-action"
  | "secondary-mono"
  | "secondary-mono-label"
  | "figure-small-label"
  | "figure-small-value"
  | "figure-keystroke";

type TextColor =
  | "inherit"
  | "text-01"
  | "text-02"
  | "text-03"
  | "text-04"
  | "text-05"
  | "text-inverted-01"
  | "text-inverted-02"
  | "text-inverted-03"
  | "text-inverted-04"
  | "text-inverted-05"
  | "text-light-03"
  | "text-light-05"
  | "text-dark-03"
  | "text-dark-05";

interface TextProps
  extends WithoutStyles<
    Omit<HTMLAttributes<HTMLElement>, "color" | "children">
  > {
  /** Font preset. Default: `"main-ui-body"`. */
  font?: TextFont;

  /** Color variant. Default: `"text-04"`. */
  color?: TextColor;

  /** HTML tag to render. Default: `"span"`. */
  as?: "p" | "span" | "li" | "h1" | "h2" | "h3";

  /** Prevent text wrapping. */
  nowrap?: boolean;

  /** Truncate text to N lines with ellipsis. `1` uses simple truncation; `2+` uses `-webkit-line-clamp`. */
  maxLines?: number;

  /** Plain string or `markdown()` for inline markdown. */
  children?: string | RichStr;
}

// ---------------------------------------------------------------------------
// Config
// ---------------------------------------------------------------------------

const FONT_CONFIG: Record<TextFont, string> = {
  "heading-h1": "font-heading-h1",
  "heading-h2": "font-heading-h2",
  "heading-h3": "font-heading-h3",
  "heading-h3-muted": "font-heading-h3-muted",
  "main-content-body": "font-main-content-body",
  "main-content-muted": "font-main-content-muted",
  "main-content-emphasis": "font-main-content-emphasis",
  "main-content-mono": "font-main-content-mono",
  "main-ui-body": "font-main-ui-body",
  "main-ui-muted": "font-main-ui-muted",
  "main-ui-action": "font-main-ui-action",
  "main-ui-mono": "font-main-ui-mono",
  "secondary-body": "font-secondary-body",
  "secondary-action": "font-secondary-action",
  "secondary-mono": "font-secondary-mono",
  "secondary-mono-label": "font-secondary-mono-label",
  "figure-small-label": "font-figure-small-label",
  "figure-small-value": "font-figure-small-value",
  "figure-keystroke": "font-figure-keystroke",
};

const COLOR_CONFIG: Record<TextColor, string | null> = {
  inherit: null,
  "text-01": "text-text-01",
  "text-02": "text-text-02",
  "text-03": "text-text-03",
  "text-04": "text-text-04",
  "text-05": "text-text-05",
  "text-inverted-01": "text-text-inverted-01",
  "text-inverted-02": "text-text-inverted-02",
  "text-inverted-03": "text-text-inverted-03",
  "text-inverted-04": "text-text-inverted-04",
  "text-inverted-05": "text-text-inverted-05",
  "text-light-03": "text-text-light-03",
  "text-light-05": "text-text-light-05",
  "text-dark-03": "text-text-dark-03",
  "text-dark-05": "text-text-dark-05",
};

// ---------------------------------------------------------------------------
// Text
// ---------------------------------------------------------------------------

function Text({
  font = "main-ui-body",
  color = "text-04",
  as: Tag = "span",
  nowrap,
  maxLines,
  children,
  ...rest
}: TextProps) {
  const resolvedClassName = cn(
    "px-[2px]",
    FONT_CONFIG[font],
    COLOR_CONFIG[color],
    nowrap && "whitespace-nowrap",
    maxLines === 1 && "truncate",
    maxLines && maxLines > 1 && "overflow-hidden"
  );

  const style =
    maxLines && maxLines > 1
      ? ({
          display: "-webkit-box",
          WebkitBoxOrient: "vertical",
          WebkitLineClamp: maxLines,
        } as React.CSSProperties)
      : undefined;

  return (
    <Tag {...rest} className={resolvedClassName} style={style}>
      {children && resolveStr(children)}
    </Tag>
  );
}

export { Text, type TextProps, type TextFont, type TextColor };


================================================
FILE: web/lib/opal/src/components/tooltip.css
================================================
/* Shared tooltip content styling */

.opal-tooltip {
  z-index: var(--z-tooltip, 1300);
  @apply rounded-08 px-3 py-2 text-sm
    bg-background-neutral-dark-03 text-text-light-05
    animate-in fade-in-0 zoom-in-95
    data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95
    data-[side=bottom]:slide-in-from-top-2
    data-[side=left]:slide-in-from-right-2
    data-[side=right]:slide-in-from-left-2
    data-[side=top]:slide-in-from-bottom-2;
}


================================================
FILE: web/lib/opal/src/core/README.md
================================================
# Core

The lowest-level primitives of the Opal design system. Think of `core` like Rust's `core` crate — compiler intrinsics and foundational types — while higher-level modules (like Rust's `std`) provide the public-facing components that most consumers should reach for first.

End-users *can* use these components directly when needed, but in most cases they should prefer the higher-level components (such as `Button`, `OpenButton`, `SelectButton`, etc.) that are built on top of `core`.

## Contents

| Primitive | Description | Docs |
|-----------|-------------|------|
| [Interactive](./interactive/) | Foundational interactive surface styling (`Stateless`, `Stateful`, `Container`, `Foldable`) | [README](./interactive/README.md) |
| [Animations](./animations/) | Coordinated hover-state animations across grouped elements (`Hoverable`) | [README](./animations/README.md) |


================================================
FILE: web/lib/opal/src/core/animations/Hoverable.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { Hoverable } from "@opal/core";

// ---------------------------------------------------------------------------
// Meta
// ---------------------------------------------------------------------------

const meta: Meta = {
  title: "Core/Hoverable",
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;

// ---------------------------------------------------------------------------
// Stories
// ---------------------------------------------------------------------------

/** Group mode — hovering the root reveals hidden items. */
export const GroupMode: StoryObj = {
  render: () => (
    <Hoverable.Root group="demo">
      <div
        style={{
          display: "flex",
          alignItems: "center",
          gap: "0.75rem",
          padding: "1rem",
          border: "1px solid var(--border-02)",
          borderRadius: "0.5rem",
          minWidth: 260,
        }}
      >
        <span style={{ color: "var(--text-01)" }}>Hover this card</span>
        <Hoverable.Item group="demo" variant="opacity-on-hover">
          <span style={{ color: "var(--text-03)" }}>✓ Revealed</span>
        </Hoverable.Item>
      </div>
    </Hoverable.Root>
  ),
};

/** Local mode — hovering the item itself reveals it (no Root needed). */
export const LocalMode: StoryObj = {
  render: () => (
    <div
      style={{
        display: "flex",
        alignItems: "center",
        gap: "0.75rem",
        padding: "1rem",
      }}
    >
      <span style={{ color: "var(--text-01)" }}>Hover the icon →</span>
      <Hoverable.Item variant="opacity-on-hover">
        <span style={{ fontSize: "1.25rem" }}>🗑</span>
      </Hoverable.Item>
    </div>
  ),
};

/** Multiple independent groups on the same page. */
export const MultipleGroups: StoryObj = {
  render: () => (
    <div style={{ display: "flex", flexDirection: "column", gap: "0.75rem" }}>
      {(["alpha", "beta"] as const).map((group) => (
        <Hoverable.Root key={group} group={group}>
          <div
            style={{
              display: "flex",
              alignItems: "center",
              gap: "0.75rem",
              padding: "1rem",
              border: "1px solid var(--border-02)",
              borderRadius: "0.5rem",
            }}
          >
            <span style={{ color: "var(--text-01)" }}>Group: {group}</span>
            <Hoverable.Item group={group} variant="opacity-on-hover">
              <span style={{ color: "var(--text-03)" }}>✓ Revealed</span>
            </Hoverable.Item>
          </div>
        </Hoverable.Root>
      ))}
    </div>
  ),
};

/** Multiple items revealed by a single root. */
export const MultipleItems: StoryObj = {
  render: () => (
    <Hoverable.Root group="multi">
      <div
        style={{
          display: "flex",
          alignItems: "center",
          gap: "0.75rem",
          padding: "1rem",
          border: "1px solid var(--border-02)",
          borderRadius: "0.5rem",
        }}
      >
        <span style={{ color: "var(--text-01)" }}>Hover to reveal all</span>
        <Hoverable.Item group="multi" variant="opacity-on-hover">
          <span>Edit</span>
        </Hoverable.Item>
        <Hoverable.Item group="multi" variant="opacity-on-hover">
          <span>Delete</span>
        </Hoverable.Item>
        <Hoverable.Item group="multi" variant="opacity-on-hover">
          <span>Share</span>
        </Hoverable.Item>
      </div>
    </Hoverable.Root>
  ),
};

/** Nested groups — inner and outer hover independently. */
export const NestedGroups: StoryObj = {
  render: () => (
    <Hoverable.Root group="outer">
      <div
        style={{
          padding: "1rem",
          border: "1px solid var(--border-02)",
          borderRadius: "0.5rem",
          display: "flex",
          flexDirection: "column",
          gap: "0.75rem",
        }}
      >
        <div style={{ display: "flex", alignItems: "center", gap: "0.75rem" }}>
          <span style={{ color: "var(--text-01)" }}>Outer card</span>
          <Hoverable.Item group="outer" variant="opacity-on-hover">
            <span style={{ color: "var(--text-03)" }}>Outer action</span>
          </Hoverable.Item>
        </div>

        <Hoverable.Root group="inner">
          <div
            style={{
              display: "flex",
              alignItems: "center",
              gap: "0.75rem",
              padding: "0.75rem",
              border: "1px solid var(--border-03)",
              borderRadius: "0.375rem",
            }}
          >
            <span style={{ color: "var(--text-02)" }}>Inner card</span>
            <Hoverable.Item group="inner" variant="opacity-on-hover">
              <span style={{ color: "var(--text-03)" }}>Inner action</span>
            </Hoverable.Item>
          </div>
        </Hoverable.Root>
      </div>
    </Hoverable.Root>
  ),
};


================================================
FILE: web/lib/opal/src/core/animations/README.md
================================================
# Animations (Hoverable)

**Import:** `import { Hoverable } from "@opal/core";`

Provides coordinated hover-state animations across a group of elements. A `Hoverable.Root` tracks hover state and broadcasts it to `Hoverable.Item` descendants via a per-group React context.

## Sub-components

| Sub-component | Role |
|---|---|
| `Hoverable.Root` | Wraps a group of items. Tracks mouse enter/leave and provides hover state via context. |
| `Hoverable.Item` | Reads hover state from its group's context. Applies a CSS class (`opal-hoverable-item`) with variant-specific transitions (e.g. opacity, scale). |

## Props

### Hoverable.Root

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `group` | `string` | `"default"` | Named group for independent hover tracking |

### Hoverable.Item

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `group` | `string` | `"default"` | Which group to listen to |
| `variant` | `HoverableItemVariant` | `"fade"` | Animation variant |

## Usage

```tsx
import { Hoverable } from "@opal/core";

<Hoverable.Root group="card">
  <div>
    <Hoverable.Item group="card" variant="fade">
      <span>Appears on hover</span>
    </Hoverable.Item>
  </div>
</Hoverable.Root>
```


================================================
FILE: web/lib/opal/src/core/animations/components.tsx
================================================
"use client";

import "@opal/core/animations/styles.css";
import React, { createContext, useContext, useState, useCallback } from "react";
import { cn } from "@opal/utils";
import type { WithoutStyles, ExtremaSizeVariants } from "@opal/types";
import { widthVariants } from "@opal/shared";

// ---------------------------------------------------------------------------
// Context-per-group registry
// ---------------------------------------------------------------------------

/**
 * Lazily-created map of group names to React contexts.
 *
 * Each group gets its own `React.Context<boolean | null>` so that a
 * `Hoverable.Item` only re-renders when its *own* group's hover state
 * changes — not when any unrelated group changes.
 *
 * The default value is `null` (no provider found), which lets
 * `Hoverable.Item` distinguish "no Root ancestor" from "Root says
 * not hovered" and throw when `group` was explicitly specified.
 */
const contextMap = new Map<string, React.Context<boolean | null>>();

function getOrCreateContext(group: string): React.Context<boolean | null> {
  let ctx = contextMap.get(group);
  if (!ctx) {
    ctx = createContext<boolean | null>(null);
    ctx.displayName = `HoverableContext(${group})`;
    contextMap.set(group, ctx);
  }
  return ctx;
}

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface HoverableRootProps
  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
  children: React.ReactNode;
  group: string;
  /** Width preset. @default "auto" */
  widthVariant?: ExtremaSizeVariants;
  /** Ref forwarded to the root `<div>`. */
  ref?: React.Ref<HTMLDivElement>;
}

type HoverableItemVariant = "opacity-on-hover";

interface HoverableItemProps
  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
  children: React.ReactNode;
  group?: string;
  variant?: HoverableItemVariant;
  /** Ref forwarded to the item `<div>`. */
  ref?: React.Ref<HTMLDivElement>;
}

// ---------------------------------------------------------------------------
// HoverableRoot
// ---------------------------------------------------------------------------

/**
 * Hover-tracking container for a named group.
 *
 * Wraps children in a `<div>` that tracks mouse-enter / mouse-leave and
 * provides the hover state via a per-group React context.
 *
 * Nesting works because each `Hoverable.Root` creates a **new** context
 * provider that shadows the parent — so an inner `Hoverable.Item group="b"`
 * reads from the inner provider, not the outer `group="a"` provider.
 *
 * @example
 * ```tsx
 * <Hoverable.Root group="card">
 *   <Card>
 *     <Hoverable.Item group="card" variant="opacity-on-hover">
 *       <TrashIcon />
 *     </Hoverable.Item>
 *   </Card>
 * </Hoverable.Root>
 * ```
 */
function HoverableRoot({
  group,
  children,
  widthVariant = "full",
  ref,
  onMouseEnter: consumerMouseEnter,
  onMouseLeave: consumerMouseLeave,
  onFocusCapture: consumerFocusCapture,
  onBlurCapture: consumerBlurCapture,
  ...props
}: HoverableRootProps) {
  const [hovered, setHovered] = useState(false);
  const [focused, setFocused] = useState(false);

  const onMouseEnter = useCallback(
    (e: React.MouseEvent<HTMLDivElement>) => {
      setHovered(true);
      consumerMouseEnter?.(e);
    },
    [consumerMouseEnter]
  );

  const onMouseLeave = useCallback(
    (e: React.MouseEvent<HTMLDivElement>) => {
      setHovered(false);
      consumerMouseLeave?.(e);
    },
    [consumerMouseLeave]
  );

  const onFocusCapture = useCallback(
    (e: React.FocusEvent<HTMLDivElement>) => {
      setFocused(true);
      consumerFocusCapture?.(e);
    },
    [consumerFocusCapture]
  );

  const onBlurCapture = useCallback(
    (e: React.FocusEvent<HTMLDivElement>) => {
      if (
        !(e.relatedTarget instanceof Node) ||
        !e.currentTarget.contains(e.relatedTarget)
      ) {
        setFocused(false);
      }
      consumerBlurCapture?.(e);
    },
    [consumerBlurCapture]
  );

  const active = hovered || focused;
  const GroupContext = getOrCreateContext(group);

  return (
    <GroupContext.Provider value={active}>
      <div
        {...props}
        ref={ref}
        className={cn(widthVariants[widthVariant])}
        onMouseEnter={onMouseEnter}
        onMouseLeave={onMouseLeave}
        onFocusCapture={onFocusCapture}
        onBlurCapture={onBlurCapture}
      >
        {children}
      </div>
    </GroupContext.Provider>
  );
}

// ---------------------------------------------------------------------------
// HoverableItem
// ---------------------------------------------------------------------------

/**
 * An element whose visibility is controlled by hover state.
 *
 * **Local mode** (`group` omitted): the item handles hover on its own
 * element via CSS `:hover`. This is the core abstraction.
 *
 * **Group mode** (`group` provided): visibility is driven by a matching
 * `Hoverable.Root` ancestor's hover state via React context. If no
 * matching Root is found, an error is thrown.
 *
 * Uses data-attributes for variant styling (see `styles.css`).
 *
 * @example
 * ```tsx
 * // Local mode — hover on the item itself
 * <Hoverable.Item variant="opacity-on-hover">
 *   <TrashIcon />
 * </Hoverable.Item>
 *
 * // Group mode — hover on the Root reveals the item
 * <Hoverable.Root group="card">
 *   <Hoverable.Item group="card" variant="opacity-on-hover">
 *     <TrashIcon />
 *   </Hoverable.Item>
 * </Hoverable.Root>
 * ```
 *
 * @throws If `group` is specified but no matching `Hoverable.Root` ancestor exists.
 */
function HoverableItem({
  group,
  variant = "opacity-on-hover",
  children,
  ref,
  ...props
}: HoverableItemProps) {
  const contextValue = useContext(
    group ? getOrCreateContext(group) : NOOP_CONTEXT
  );

  if (group && contextValue === null) {
    throw new Error(
      `Hoverable.Item group="${group}" has no matching Hoverable.Root ancestor. ` +
        `Either wrap it in <Hoverable.Root group="${group}"> or remove the group prop for local hover.`
    );
  }

  const isLocal = group === undefined;

  return (
    <div
      {...props}
      ref={ref}
      className={cn("hoverable-item")}
      data-hoverable-variant={variant}
      data-hoverable-active={
        isLocal ? undefined : contextValue ? "true" : undefined
      }
      data-hoverable-local={isLocal ? "true" : undefined}
    >
      {children}
    </div>
  );
}

/** Stable context used when no group is specified (local mode). */
const NOOP_CONTEXT = createContext<boolean | null>(null);

// ---------------------------------------------------------------------------
// Compound export
// ---------------------------------------------------------------------------

/**
 * Hoverable compound component for hover-to-reveal patterns.
 *
 * Provides two sub-components:
 *
 * - `Hoverable.Root` — A container that tracks hover state for a named group
 *   and provides it via React context.
 *
 * - `Hoverable.Item` — The core abstraction. On its own (no `group`), it
 *   applies local CSS `:hover` for the variant effect. When `group` is
 *   specified, it reads hover state from the nearest matching
 *   `Hoverable.Root` — and throws if no matching Root is found.
 *
 * Supports nesting: a child `Hoverable.Root` shadows the parent's context,
 * so each group's items only respond to their own root's hover.
 *
 * @example
 * ```tsx
 * import { Hoverable } from "@opal/core";
 *
 * // Group mode — hovering the card reveals the trash icon
 * <Hoverable.Root group="card">
 *   <Card>
 *     <span>Card content</span>
 *     <Hoverable.Item group="card" variant="opacity-on-hover">
 *       <TrashIcon />
 *     </Hoverable.Item>
 *   </Card>
 * </Hoverable.Root>
 *
 * // Local mode — hovering the item itself reveals it
 * <Hoverable.Item variant="opacity-on-hover">
 *   <TrashIcon />
 * </Hoverable.Item>
 * ```
 */
const Hoverable = {
  Root: HoverableRoot,
  Item: HoverableItem,
};

export {
  Hoverable,
  type HoverableRootProps,
  type HoverableItemProps,
  type HoverableItemVariant,
};


================================================
FILE: web/lib/opal/src/core/animations/styles.css
================================================
/* Hoverable — item transitions */
.hoverable-item {
  transition: opacity 150ms ease-in-out;
}

.hoverable-item[data-hoverable-variant="opacity-on-hover"] {
  opacity: 0;
}

/* Group mode — Root controls visibility via React context */
.hoverable-item[data-hoverable-variant="opacity-on-hover"][data-hoverable-active="true"] {
  opacity: 1;
}

/* Local mode — item handles its own :hover */
.hoverable-item[data-hoverable-variant="opacity-on-hover"][data-hoverable-local="true"]:hover {
  opacity: 1;
}

/* Focus — item (or a focusable descendant) receives keyboard focus */
.hoverable-item[data-hoverable-variant="opacity-on-hover"]:has(:focus-visible) {
  opacity: 1;
}

/* Focus ring on keyboard focus */
.hoverable-item:focus-visible {
  outline: 2px solid var(--border-04);
  outline-offset: 2px;
  border-radius: 0.25rem;
}


================================================
FILE: web/lib/opal/src/core/disabled/components.tsx
================================================
import "@opal/core/disabled/styles.css";
import React from "react";
import { Slot } from "@radix-ui/react-slot";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface DisabledProps extends React.HTMLAttributes<HTMLElement> {
  ref?: React.Ref<HTMLElement>;

  /**
   * When truthy, applies disabled styling to child elements.
   */
  disabled?: boolean;

  /**
   * When `true`, re-enables pointer events while keeping the disabled
   * visual treatment. Useful for elements that need to show tooltips or
   * error messages on click.
   * @default false
   */
  allowClick?: boolean;

  children: React.ReactElement;
}

// ---------------------------------------------------------------------------
// Disabled
// ---------------------------------------------------------------------------

/**
 * Wrapper component that applies baseline disabled CSS (opacity, cursor,
 * pointer-events) to its child element.
 *
 * Uses Radix `Slot` — merges props onto the single child element without
 * adding any DOM node. Works correctly inside Radix `asChild` chains.
 *
 * @example
 * ```tsx
 * <Disabled disabled={!canSubmit}>
 *   <div>...</div>
 * </Disabled>
 * ```
 */
function Disabled({
  disabled,
  allowClick,
  children,
  ref,
  ...rest
}: DisabledProps) {
  return (
    <Slot
      ref={ref}
      {...rest}
      aria-disabled={disabled || undefined}
      data-opal-disabled={disabled || undefined}
      data-allow-click={disabled && allowClick ? "" : undefined}
    >
      {children}
    </Slot>
  );
}

export { Disabled, type DisabledProps };


================================================
FILE: web/lib/opal/src/core/disabled/styles.css
================================================
/* Disabled — baseline disabled visuals via Radix Slot (no extra DOM node).
 *
 * [data-opal-disabled]                   → cursor + pointer-events for all
 * [data-opal-disabled]:not(.interactive) → opacity for non-Interactive elements
 * [data-opal-disabled][data-allow-click] → re-enables clicks
 *
 * Interactive elements (.interactive) handle their own disabled colors via
 * variant CSS — no blanket opacity is applied to them. Pointer-events are
 * re-enabled so the JS layer can suppress onClick.
 */

[data-opal-disabled] {
  @apply cursor-not-allowed select-none;
  pointer-events: none;
}

/* Only apply blanket opacity to non-Interactive elements.
   Interactive variants define their own disabled backgrounds/foregrounds. */
[data-opal-disabled]:not(.interactive) {
  @apply opacity-50;
}

/* Re-enable pointer-events so the Interactive JS layer can suppress onClick. */
[data-opal-disabled].interactive {
  pointer-events: auto;
}

[data-opal-disabled][data-allow-click] {
  pointer-events: auto;
}


================================================
FILE: web/lib/opal/src/core/index.ts
================================================
/* Disabled */
export { Disabled, type DisabledProps } from "@opal/core/disabled/components";

/* Animations (formerly Hoverable) */
export {
  Hoverable,
  type HoverableRootProps,
  type HoverableItemProps,
  type HoverableItemVariant,
} from "@opal/core/animations/components";

/* Interactive — compound component */
import { InteractiveStateless } from "@opal/core/interactive/stateless/components";
import { InteractiveStateful } from "@opal/core/interactive/stateful/components";
import { InteractiveContainer } from "@opal/core/interactive/container/components";
import { InteractiveSimple } from "@opal/core/interactive/simple/components";
import { Foldable } from "@opal/core/interactive/foldable/components";

const Interactive = {
  Simple: InteractiveSimple,
  Stateless: InteractiveStateless,
  Stateful: InteractiveStateful,
  Container: InteractiveContainer,
  Foldable,
};

export { Interactive };

/* Interactive — types */
export type {
  InteractiveStatelessProps,
  InteractiveStatelessVariant,
  InteractiveStatelessProminence,
  InteractiveStatelessInteraction,
} from "@opal/core/interactive/stateless/components";

export type {
  InteractiveStatefulProps,
  InteractiveStatefulVariant,
  InteractiveStatefulState,
  InteractiveStatefulInteraction,
} from "@opal/core/interactive/stateful/components";

export type {
  InteractiveContainerProps,
  InteractiveContainerRoundingVariant,
} from "@opal/core/interactive/container/components";

export type { FoldableProps } from "@opal/core/interactive/foldable/components";

export type { InteractiveSimpleProps } from "@opal/core/interactive/simple/components";


================================================
FILE: web/lib/opal/src/core/interactive/Interactive.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { Interactive, Disabled } from "@opal/core";

// ---------------------------------------------------------------------------
// Variant / Prominence mappings for the matrix story
// ---------------------------------------------------------------------------

const VARIANT_PROMINENCE_MAP: Record<string, string[]> = {
  default: ["primary", "secondary", "tertiary", "internal"],
  action: ["primary", "secondary", "tertiary", "internal"],
  danger: ["primary", "secondary", "tertiary", "internal"],
};

const SIZE_VARIANTS = ["lg", "md", "sm", "xs", "2xs", "fit"] as const;
const ROUNDING_VARIANTS = ["default", "compact", "mini"] as const;

// ---------------------------------------------------------------------------
// Meta
// ---------------------------------------------------------------------------

const meta: Meta = {
  title: "Core/Interactive",
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;

// ---------------------------------------------------------------------------
// Stories
// ---------------------------------------------------------------------------

/** Basic Interactive.Stateless + Container with text content. */
export const Default: StoryObj = {
  render: () => (
    <div style={{ display: "flex", gap: "0.75rem", alignItems: "center" }}>
      <Interactive.Stateless
        variant="default"
        prominence="secondary"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Secondary</span>
        </Interactive.Container>
      </Interactive.Stateless>

      <Interactive.Stateless
        variant="default"
        prominence="primary"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Primary</span>
        </Interactive.Container>
      </Interactive.Stateless>

      <Interactive.Stateless
        variant="default"
        prominence="tertiary"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Tertiary</span>
        </Interactive.Container>
      </Interactive.Stateless>
    </div>
  ),
};

/** All variant x prominence combinations displayed in a grid. */
export const VariantMatrix: StoryObj = {
  render: () => (
    <div style={{ display: "flex", flexDirection: "column", gap: "1.5rem" }}>
      {Object.entries(VARIANT_PROMINENCE_MAP).map(([variant, prominences]) => (
        <div key={variant}>
          <div
            style={{
              fontSize: "0.75rem",
              fontWeight: 600,
              textTransform: "uppercase",
              letterSpacing: "0.05em",
              paddingBottom: "0.5rem",
            }}
          >
            {variant}
          </div>

          {prominences.length === 0 ? (
            <Interactive.Stateless variant="none" onClick={() => {}}>
              <Interactive.Container border>
                <span style={{ color: "var(--text-01)" }}>
                  none (no prominence)
                </span>
              </Interactive.Container>
            </Interactive.Stateless>
          ) : (
            <div style={{ display: "flex", gap: "0.5rem", flexWrap: "wrap" }}>
              {prominences.map((prominence) => (
                <div
                  key={prominence}
                  style={{
                    display: "flex",
                    flexDirection: "column",
                    alignItems: "center",
                    gap: "0.25rem",
                  }}
                >
                  <Interactive.Stateless
                    // Cast required because the discriminated union can't be
                    // resolved from dynamic strings at the type level.
                    {...({ variant, prominence } as any)}
                    onClick={() => {}}
                  >
                    <Interactive.Container border>
                      <span>{prominence}</span>
                    </Interactive.Container>
                  </Interactive.Stateless>
                  <span
                    style={{
                      fontSize: "0.625rem",
                      opacity: 0.6,
                    }}
                  >
                    {prominence}
                  </span>
                </div>
              ))}
            </div>
          )}
        </div>
      ))}
    </div>
  ),
};

/** All heightVariant sizes (lg, md, sm, xs, 2xs, fit). */
export const Sizes: StoryObj = {
  render: () => (
    <div style={{ display: "flex", alignItems: "center", gap: "0.75rem" }}>
      {SIZE_VARIANTS.map((size) => (
        <Interactive.Stateless
          key={size}
          variant="default"
          prominence="secondary"
          onClick={() => {}}
        >
          <Interactive.Container border heightVariant={size}>
            <span>{size}</span>
          </Interactive.Container>
        </Interactive.Stateless>
      ))}
    </div>
  ),
};

/** Container with widthVariant="full" stretching to fill its parent. */
export const WidthFull: StoryObj = {
  render: () => (
    <div style={{ width: 400 }}>
      <Interactive.Stateless
        variant="default"
        prominence="secondary"
        onClick={() => {}}
      >
        <Interactive.Container border widthVariant="full">
          <span>Full width container</span>
        </Interactive.Container>
      </Interactive.Stateless>
    </div>
  ),
};

/** All rounding variants side by side. */
export const Rounding: StoryObj = {
  render: () => (
    <div style={{ display: "flex", gap: "0.75rem" }}>
      {ROUNDING_VARIANTS.map((rounding) => (
        <Interactive.Stateless
          key={rounding}
          variant="default"
          prominence="secondary"
          onClick={() => {}}
        >
          <Interactive.Container border roundingVariant={rounding}>
            <span>{rounding}</span>
          </Interactive.Container>
        </Interactive.Stateless>
      ))}
    </div>
  ),
};

/** Disabled state prevents clicks and shows disabled styling. */
export const DisabledStory: StoryObj = {
  name: "Disabled",
  render: () => (
    <div style={{ display: "flex", gap: "0.75rem" }}>
      <Disabled disabled>
        <Interactive.Stateless
          variant="default"
          prominence="secondary"
          onClick={() => {}}
        >
          <Interactive.Container border>
            <span>Disabled</span>
          </Interactive.Container>
        </Interactive.Stateless>
      </Disabled>

      <Interactive.Stateless
        variant="default"
        prominence="secondary"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Enabled</span>
        </Interactive.Container>
      </Interactive.Stateless>
    </div>
  ),
};

/** Interaction override forces the hover/active visual state. */
export const Interaction: StoryObj = {
  render: () => (
    <div style={{ display: "flex", gap: "0.75rem" }}>
      <Interactive.Stateless
        variant="default"
        prominence="secondary"
        interaction="hover"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Forced hover</span>
        </Interactive.Container>
      </Interactive.Stateless>

      <Interactive.Stateless
        variant="default"
        prominence="secondary"
        interaction="active"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Forced active</span>
        </Interactive.Container>
      </Interactive.Stateless>

      <Interactive.Stateless
        variant="default"
        prominence="secondary"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Normal (rest)</span>
        </Interactive.Container>
      </Interactive.Stateless>
    </div>
  ),
};

/** Container with border={true}. */
export const WithBorder: StoryObj = {
  render: () => (
    <div style={{ display: "flex", gap: "0.75rem" }}>
      <Interactive.Stateless
        variant="default"
        prominence="secondary"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>With border</span>
        </Interactive.Container>
      </Interactive.Stateless>

      <Interactive.Stateless
        variant="default"
        prominence="secondary"
        onClick={() => {}}
      >
        <Interactive.Container>
          <span>Without border</span>
        </Interactive.Container>
      </Interactive.Stateless>
    </div>
  ),
};

/** Using href to render as a link. */
export const AsLink: StoryObj = {
  render: () => (
    <Interactive.Stateless variant="action" href="/settings">
      <Interactive.Container border>
        <span>Go to Settings</span>
      </Interactive.Container>
    </Interactive.Stateless>
  ),
};

/** Stateful select variant with selected and unselected states. */
export const SelectVariant: StoryObj = {
  render: () => (
    <div style={{ display: "flex", gap: "0.75rem" }}>
      <Interactive.Stateful
        variant="select-light"
        state="selected"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Selected (light)</span>
        </Interactive.Container>
      </Interactive.Stateful>

      <Interactive.Stateful
        variant="select-light"
        state="empty"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Unselected (light)</span>
        </Interactive.Container>
      </Interactive.Stateful>

      <Interactive.Stateful
        variant="select-heavy"
        state="selected"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Selected (heavy)</span>
        </Interactive.Container>
      </Interactive.Stateful>

      <Interactive.Stateful
        variant="select-heavy"
        state="empty"
        onClick={() => {}}
      >
        <Interactive.Container border>
          <span>Unselected (heavy)</span>
        </Interactive.Container>
      </Interactive.Stateful>
    </div>
  ),
};


================================================
FILE: web/lib/opal/src/core/interactive/README.md
================================================
# Interactive

The foundational layer for all clickable surfaces in the design system. Defines hover, active, disabled, and interaction-override state styling in a single place. Higher-level components (Button, SelectButton, OpenButton, etc.) compose on top of it.

## Sub-components

| Sub-component | Role | Docs |
|---|---|---|
| `Interactive.Stateless` | Stateless surfaces (buttons, links, cards). Variant × prominence color matrix. | [README](./stateless/README.md) |
| `Interactive.Stateful` | Stateful surfaces (toggles, sidebar items). Variant × state color matrix. | [README](./stateful/README.md) |
| `Interactive.Container` | Structural box with height, rounding, padding, and optional border. Shared by both. | [README](./container/README.md) |
| `Interactive.Foldable` | Zero-width collapsible wrapper with CSS grid animation. | [README](./foldable/README.md) |

## Foreground colour system

Each variant/prominence/state combination sets two CSS custom properties:
- `--interactive-foreground` — text color
- `--interactive-foreground-icon` — icon color

Both are registered via `@property` as `<color>` in `shared.css`, enabling the browser to interpolate them directly on the parent `.interactive` element. Children read the variables with no independent transitions, guaranteeing perfect sync.

**Opt-in classes:**
- `.interactive-foreground` — sets `color: var(--interactive-foreground)`
- `.interactive-foreground-icon` — sets `color: var(--interactive-foreground-icon)`

## Interaction override

Both `Stateless` and `Stateful` support `interaction?: "rest" | "hover" | "active"` for JS-controlled visual state overrides via `data-interaction`.

## Colour tables

### Stateless: Default

**Background**

| | Primary | Secondary | Tertiary | Internal |
|---|---|---|---|---|
| **Rest** | `theme-primary-05` | `background-tint-01` | `transparent` | `transparent` |
| **Hover** | `theme-primary-04` | `background-tint-02` | `background-tint-02` | `background-tint-00` |
| **Active** | `theme-primary-06` | `background-tint-00` | `background-tint-00` | `background-tint-00` |
| **Disabled** | `background-neutral-04` | `background-neutral-03` | `transparent` | `transparent` |

**Foreground**

| | Primary | Secondary | Tertiary | Internal |
|---|---|---|---|---|
| **Rest** | `text-inverted-05` | `text-03` | `text-03` | `text-03` |
| **Hover** | `text-inverted-05` | `text-04` | `text-04` | `text-04` |
| **Active** | `text-inverted-05` | `text-05` | `text-05` | `text-05` |
| **Disabled** | `text-inverted-04` | `text-01` | `text-01` | `text-01` |

### Stateless: Action

**Background**

| | Primary | Secondary | Tertiary | Internal |
|---|---|---|---|---|
| **Rest** | `action-link-05` | `background-tint-01` | `transparent` | `transparent` |
| **Hover** | `action-link-04` | `background-tint-02` | `background-tint-02` | `background-tint-00` |
| **Active** | `action-link-06` | `background-tint-00` | `background-tint-00` | `background-tint-00` |
| **Disabled** | `action-link-02` | `background-neutral-02` | `transparent` | `transparent` |

**Foreground**

| | Primary | Secondary | Tertiary | Internal |
|---|---|---|---|---|
| **Rest** | `text-light-05` | `action-text-link-05` | `action-text-link-05` | `action-text-link-05` |
| **Hover** | `text-light-05` | `action-text-link-05` | `action-text-link-05` | `action-text-link-05` |
| **Active** | `text-light-05` | `action-text-link-05` | `action-text-link-05` | `action-text-link-05` |
| **Disabled** | `text-01` | `action-link-03` | `action-link-03` | `action-link-03` |

### Stateless: Danger

**Background**

| | Primary | Secondary | Tertiary | Internal |
|---|---|---|---|---|
| **Rest** | `action-danger-05` | `background-tint-01` | `transparent` | `transparent` |
| **Hover** | `action-danger-04` | `background-tint-02` | `background-tint-02` | `background-tint-00` |
| **Active** | `action-danger-06` | `background-tint-00` | `background-tint-00` | `background-tint-00` |
| **Disabled** | `action-danger-02` | `background-neutral-02` | `transparent` | `transparent` |

**Foreground**

| | Primary | Secondary | Tertiary | Internal |
|---|---|---|---|---|
| **Rest** | `text-light-05` | `action-text-danger-05` | `action-text-danger-05` | `action-text-danger-05` |
| **Hover** | `text-light-05` | `action-text-danger-05` | `action-text-danger-05` | `action-text-danger-05` |
| **Active** | `text-light-05` | `action-text-danger-05` | `action-text-danger-05` | `action-text-danger-05` |
| **Disabled** | `text-01` | `action-danger-03` | `action-danger-03` | `action-danger-03` |

### Stateful: Select-Heavy / Select-Light

**Background (empty/filled)**

| | Select-Heavy | Select-Light |
|---|---|---|
| **Rest** | `transparent` | `transparent` |
| **Hover** | `background-tint-02` | `background-tint-02` |
| **Active** | `background-neutral-00` | `background-neutral-00` |
| **Disabled** | `transparent` | `transparent` |

**Background (selected)**

| | Select-Heavy | Select-Light |
|---|---|---|
| **Rest** | `action-link-01` | `transparent` |
| **Hover** | `background-tint-02` | `background-tint-02` |
| **Active** | `background-tint-00` | `background-tint-00` |
| **Disabled** | `transparent` | `transparent` |

**Foreground (empty)**

| | Text | Icon |
|---|---|---|
| **Rest** | `text-04` | `text-03` |
| **Hover** | `text-04` | `text-04` |
| **Active** | `text-05` | `text-05` |
| **Disabled** | `text-01` | `text-01` |

**Foreground (selected)**

| | Text | Icon |
|---|---|---|
| **Rest** | `action-link-05` | `action-link-05` |
| **Hover** | `action-link-05` | `action-link-05` |
| **Active** | `action-link-05` | `action-link-05` |
| **Disabled** | `action-link-03` | `action-link-03` |

### Stateful: Sidebar

**Background**

| | Empty/Filled | Selected |
|---|---|---|
| **Rest** | `transparent` | `background-tint-00` |
| **Hover** | `background-tint-03` | `background-tint-03` |


================================================
FILE: web/lib/opal/src/core/interactive/container/README.md
================================================
# Interactive.Container

**Import:** `import { Interactive } from "@opal/core";` — use as `Interactive.Container`.

Structural container shared by both `Interactive.Stateless` and `Interactive.Stateful`. Provides consistent height, rounding, padding, and optional border. Renders a `<div>` by default, or a `<button>` when `type` is provided.

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `heightVariant` | `SizeVariant` | `"lg"` | Height preset (`2xs`–`lg`, `fit`) |
| `roundingVariant` | `"md" \| "sm" \| "xs"` | `"md"` | Border-radius preset |
| `widthVariant` | `WidthVariant` | — | Width preset (`"auto"`, `"fit"`, `"full"`) |
| `border` | `boolean` | `false` | Renders a 1px border |
| `type` | `"submit" \| "button" \| "reset"` | — | When set, renders a `<button>` element |

## Usage

```tsx
<Interactive.Stateless variant="default" prominence="primary">
  <Interactive.Container heightVariant="sm" roundingVariant="sm" border>
    <span>Content</span>
  </Interactive.Container>
</Interactive.Stateless>
```


================================================
FILE: web/lib/opal/src/core/interactive/container/components.tsx
================================================
import Link from "next/link";
import type { Route } from "next";
import "@opal/core/interactive/shared.css";
import React from "react";
import { cn } from "@opal/utils";
import type { ButtonType, RoundingVariants, WithoutStyles } from "@opal/types";
import {
  containerSizeVariants,
  type ContainerSizeVariants,
  widthVariants,
  type ExtremaSizeVariants,
} from "@opal/shared";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type InteractiveContainerRoundingVariant = Extract<
  RoundingVariants,
  "md" | "sm" | "xs"
>;
const interactiveContainerRoundingVariants: Record<
  InteractiveContainerRoundingVariant,
  string
> = {
  md: "rounded-12",
  sm: "rounded-08",
  xs: "rounded-04",
} as const;

/**
 * Props for {@link InteractiveContainer}.
 *
 * Extends standard `<div>` attributes (minus `className` and `style`).
 */
interface InteractiveContainerProps
  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
  /**
   * Ref forwarded to the underlying element.
   */
  ref?: React.Ref<HTMLElement>;

  /**
   * HTML button type (e.g. `"submit"`, `"button"`, `"reset"`).
   *
   * When provided, renders a `<button>` element instead of a `<div>`.
   * This keeps all styling (background, rounding, height) on a single
   * element — unlike a wrapper approach which would split them.
   *
   * Mutually exclusive with `href`.
   */
  type?: ButtonType;

  /**
   * When `true`, applies a 1px border using the theme's border color.
   *
   * @default false
   */
  border?: boolean;

  /**
   * Border-radius preset controlling corner rounding.
   *
   * @default "default"
   */
  roundingVariant?: InteractiveContainerRoundingVariant;

  /**
   * Size preset controlling the container's height, min-width, and padding.
   *
   * @default "lg"
   */
  heightVariant?: ContainerSizeVariants;

  /**
   * Width preset controlling the container's horizontal size.
   *
   * @default "fit"
   */
  widthVariant?: ExtremaSizeVariants;
}

// ---------------------------------------------------------------------------
// InteractiveContainer
// ---------------------------------------------------------------------------

/**
 * Structural container for use inside `Interactive.Stateless` or
 * `Interactive.Stateful`.
 *
 * Provides a `<div>` with design-system-controlled border, padding, rounding,
 * and height. When nested under a Radix Slot-based parent, correctly extracts
 * and merges injected `className` and `style` values.
 */
function InteractiveContainer({
  ref,
  type,
  border,
  roundingVariant = "md",
  heightVariant = "lg",
  widthVariant = "fit",
  ...props
}: InteractiveContainerProps) {
  const {
    className: slotClassName,
    style: slotStyle,
    href,
    target,
    rel,
    ...rest
  } = props as typeof props & {
    className?: string;
    style?: React.CSSProperties;
    href?: string;
    target?: string;
    rel?: string;
  };
  const { height, minWidth, padding } = containerSizeVariants[heightVariant];
  const sharedProps = {
    ...rest,
    className: cn(
      "interactive-container",
      interactiveContainerRoundingVariants[roundingVariant],
      height,
      minWidth,
      padding,
      widthVariants[widthVariant],
      slotClassName
    ),
    "data-border": border ? ("true" as const) : undefined,
    style: slotStyle,
  };

  if (href) {
    return (
      <Link
        ref={ref as React.Ref<HTMLAnchorElement>}
        href={href as Route}
        target={target}
        rel={rel}
        {...(sharedProps as React.HTMLAttributes<HTMLAnchorElement>)}
      />
    );
  }

  if (type) {
    const ariaDisabled = (rest as Record<string, unknown>)["aria-disabled"];
    const nativeDisabled =
      ariaDisabled === true || ariaDisabled === "true" || undefined;
    return (
      <button
        ref={ref as React.Ref<HTMLButtonElement>}
        type={type}
        disabled={nativeDisabled}
        {...(sharedProps as React.HTMLAttributes<HTMLButtonElement>)}
      />
    );
  }
  return <div ref={ref as React.Ref<HTMLDivElement>} {...sharedProps} />;
}

export {
  InteractiveContainer,
  type InteractiveContainerProps,
  type InteractiveContainerRoundingVariant,
};


================================================
FILE: web/lib/opal/src/core/interactive/foldable/README.md
================================================
# Interactive.Foldable

**Import:** `import { Interactive } from "@opal/core";` — use as `Interactive.Foldable`.

A zero-width collapsible wrapper that expands when its ancestor `.interactive` element is hovered or has an interaction override. Uses a CSS grid `0fr → 1fr` animation for smooth expand/collapse.

## Requirements

- Must be placed inside an `Interactive.Stateless` or `Interactive.Stateful` tree.
- The direct parent element should add the `interactive-foldable-host` class for synchronized gap transitions.

## Props

| Prop | Type | Description |
|------|------|-------------|
| `children` | `ReactNode` | Content that folds/unfolds |

## CSS triggers

The foldable expands when any of these conditions are met on an ancestor `.interactive`:
- `:hover` pseudo-class
- `data-interaction="hover"`
- `data-interaction="active"`

## Usage

```tsx
<Interactive.Stateful variant="select-heavy" state="empty">
  <Interactive.Container>
    <div className="interactive-foldable-host flex items-center">
      <Icon />
      <Interactive.Foldable>
        <span>Label text</span>
      </Interactive.Foldable>
    </div>
  </Interactive.Container>
</Interactive.Stateful>
```


================================================
FILE: web/lib/opal/src/core/interactive/foldable/components.tsx
================================================
import "@opal/core/interactive/foldable/styles.css";
import React from "react";
import type { WithoutStyles } from "@opal/types";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface FoldableProps
  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
  children: React.ReactNode;
}

// ---------------------------------------------------------------------------
// Foldable
// ---------------------------------------------------------------------------

/**
 * A zero-width collapsible wrapper that expands when its ancestor
 * `.interactive` element is hovered or has an interaction override.
 *
 * Uses a CSS grid `0fr ↔ 1fr` animation for smooth expand/collapse.
 * Must be placed inside an `Interactive.Stateless` or `Interactive.Stateful`
 * tree for the CSS triggers to work.
 *
 * The parent element should add the `interactive-foldable-host` class to
 * get synchronized gap transitions.
 *
 * @example
 * ```tsx
 * <Interactive.Stateful variant="select-heavy" state="empty">
 *   <Interactive.Container>
 *     <div className="interactive-foldable-host flex items-center">
 *       <Icon />
 *       <Foldable>
 *         <span>Label text</span>
 *       </Foldable>
 *     </div>
 *   </Interactive.Container>
 * </Interactive.Stateful>
 * ```
 */
function Foldable({ children, ...props }: FoldableProps) {
  return (
    <div {...props} className="interactive-foldable">
      <div className="interactive-foldable-inner">{children}</div>
    </div>
  );
}

export { Foldable, type FoldableProps };


================================================
FILE: web/lib/opal/src/core/interactive/foldable/styles.css
================================================
/* ---------------------------------------------------------------------------
   Foldable — CSS grid collapse/expand animation.

   Expands when an ancestor `.interactive` element is hovered, focused
   within, or has `data-interaction="hover"` / `data-interaction="active"`.

   Structure:
     .interactive-foldable-host   — flex parent, gap transitions 0 → 0.25rem
       [always-visible content]
       .interactive-foldable      — grid container, column 0fr ↔ 1fr
         .interactive-foldable-inner — single grid item, flex + overflow clip
           [foldable content]
   --------------------------------------------------------------------------- */

/* Host: the flex parent that includes both persistent + foldable content */
.interactive-foldable-host {
  gap: 0;
  transition: gap 200ms ease-in-out;
}

.interactive:hover:not([data-disabled]) .interactive-foldable-host,
.interactive:focus-within:not([data-disabled]) .interactive-foldable-host,
.interactive[data-interaction="hover"]:not([data-disabled])
  .interactive-foldable-host,
.interactive[data-interaction="active"]:not([data-disabled])
  .interactive-foldable-host {
  gap: 0.25rem;
}

/* Grid container — collapse animation */
.interactive-foldable {
  display: grid;
  grid-template-columns: 0fr;
  opacity: 0;
  transition:
    grid-template-columns 200ms ease-in-out,
    opacity 200ms ease-in-out;
}

/* Single grid item — content layout + overflow clipping */
.interactive-foldable-inner {
  @apply flex items-center gap-1;
  overflow: hidden;
  min-width: 0;
}

/* Expanded: hovered, focused within, or interaction override */
.interactive:hover:not([data-disabled]) .interactive-foldable,
.interactive:focus-within:not([data-disabled]) .interactive-foldable,
.interactive[data-interaction="hover"]:not([data-disabled])
  .interactive-foldable,
.interactive[data-interaction="active"]:not([data-disabled])
  .interactive-foldable {
  grid-template-columns: 1fr;
  opacity: 1;
}


================================================
FILE: web/lib/opal/src/core/interactive/shared.css
================================================
/* Interactive — shared base classes for stateless + stateful primitives */

/* Register --interactive-foreground as a <color> so the browser can
   interpolate it directly on the parent. Children just read the variable
   with no independent transitions — guaranteeing perfect sync. */
@property --interactive-foreground {
  syntax: "<color>";
  inherits: true;
  initial-value: transparent;
}

@property --interactive-foreground-icon {
  syntax: "<color>";
  inherits: true;
  initial-value: transparent;
}

/* Shared timing tokens — used by .interactive and other surfaces (e.g. table rows) */
:root {
  --interactive-duration: 150ms;
  --interactive-easing: ease-in-out;
}

/* Base interactive surface — sets color directly so all descendants inherit. */
.interactive {
  @apply cursor-pointer select-none;
  color: var(--interactive-foreground);
  transition:
    background-color var(--interactive-duration) var(--interactive-easing),
    --interactive-foreground var(--interactive-duration)
      var(--interactive-easing),
    --interactive-foreground-icon var(--interactive-duration)
      var(--interactive-easing);
}
.interactive[data-disabled] {
  @apply cursor-not-allowed;
}

/* Container — structural box */
.interactive-container {
  @apply flex items-center justify-center overflow-clip;
}
.interactive-container[data-border="true"] {
  @apply border;
}

/* Icon foreground — reads from --interactive-foreground-icon, which may differ
   from --interactive-foreground (e.g. muted icons beside normal text). */
.interactive-foreground-icon {
  color: var(--interactive-foreground-icon);
}


================================================
FILE: web/lib/opal/src/core/interactive/simple/components.tsx
================================================
import React from "react";
import { Slot } from "@radix-ui/react-slot";
import { cn } from "@opal/utils";
import { guardPortalClick } from "@opal/core/interactive/utils";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface InteractiveSimpleProps
  extends Omit<
    React.HTMLAttributes<HTMLElement>,
    "className" | "style" | "color"
  > {
  ref?: React.Ref<HTMLElement>;

  /**
   * Tailwind group class (e.g. `"group/Card"`) for `group-hover:*` utilities.
   */
  group?: string;

  /**
   * URL to navigate to when clicked. Passed through Slot to the child.
   */
  href?: string;

  /**
   * Link target (e.g. `"_blank"`). Only used when `href` is provided.
   */
  target?: string;

  /**
   * Applies disabled cursor and suppresses clicks.
   */
  disabled?: boolean;
}

// ---------------------------------------------------------------------------
// InteractiveSimple
// ---------------------------------------------------------------------------

/**
 * Minimal interactive surface primitive.
 *
 * Provides cursor styling, click handling, and optional link/group
 * support — but **no color or background styling**.
 *
 * Use this for elements that need interactivity (click, cursor, disabled)
 * without participating in the Interactive color system.
 *
 * Uses Radix `Slot` — merges props onto a single child element without
 * adding any DOM node.
 *
 * @example
 * ```tsx
 * <Interactive.Simple onClick={handleClick} group="group/Card">
 *   <Card>...</Card>
 * </Interactive.Simple>
 * ```
 */
function InteractiveSimple({
  ref,
  group,
  href,
  target,
  disabled,
  ...props
}: InteractiveSimpleProps) {
  const isDisabled = !!disabled;

  const classes = cn(
    "cursor-pointer select-none",
    isDisabled && "cursor-not-allowed",
    !props.onClick && !href && "!cursor-default !select-auto",
    group
  );

  const { onClick, ...slotProps } = props;

  const linkAttrs = href
    ? {
        href: isDisabled ? undefined : href,
        target,
        rel: target === "_blank" ? "noopener noreferrer" : undefined,
      }
    : {};

  return (
    <Slot
      ref={ref}
      className={classes}
      aria-disabled={isDisabled || undefined}
      {...linkAttrs}
      {...slotProps}
      onClick={
        isDisabled
          ? href
            ? (e: React.MouseEvent) => e.preventDefault()
            : undefined
          : guardPortalClick(onClick)
      }
    />
  );
}

export { InteractiveSimple, type InteractiveSimpleProps };


================================================
FILE: web/lib/opal/src/core/interactive/stateful/README.md
================================================
# Interactive.Stateful

**Import:** `import { Interactive } from "@opal/core";` — use as `Interactive.Stateful`.

Stateful interactive surface primitive for elements that maintain a value state (empty/filled/selected). Used for toggles, sidebar items, and selectable list rows. Applies variant/state color styling via CSS data-attributes and merges onto a single child element via Radix `Slot`.

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `variant` | `"select-light" \| "select-heavy" \| "select-card" \| "select-tinted" \| "select-filter" \| "sidebar-heavy" \| "sidebar-light"` | `"select-heavy"` | Color variant |
| `state` | `"empty" \| "filled" \| "selected"` | `"empty"` | Current value state |
| `interaction` | `"rest" \| "hover" \| "active"` | `"rest"` | JS-controlled interaction override |
| `group` | `string` | — | Tailwind group class for `group-hover:*` |
| `disabled` | `boolean` | `false` | Disables the element |
| `href` | `string` | — | URL for link behavior |
| `target` | `string` | — | Link target (e.g. `"_blank"`) |

## Variants

- **`select-light`** — Transparent selected background. For inline toggles.
- **`select-heavy`** — Tinted selected background (`action-link-01`). For list rows, model pickers, buttons.
- **`select-card`** — Like `select-heavy`, but the filled state gets a visible background (`background-tint-00`) with neutral foreground. Designed for larger surfaces (cards) where background carries more of the visual distinction than foreground color alone.
- **`select-tinted`** — Like `select-heavy` but with a tinted rest background (`background-tint-01`).
- **`select-filter`** — Like `select-tinted` for empty/filled; selected state uses inverted backgrounds and inverted text.
- **`sidebar-heavy`** — Sidebar navigation: muted when unselected, bold when selected.
- **`sidebar-light`** — Sidebar navigation: uniformly muted across all states.

## State attribute

Uses `data-interactive-state` (not `data-state`) to avoid conflicts with Radix UI, which injects its own `data-state` on trigger elements.

## CSS custom properties

Sets `--interactive-foreground` and `--interactive-foreground-icon` per variant/state. In the `empty` state, icon color (`--text-03`) is intentionally lighter than text color (`--text-04`).

## Usage

```tsx
<Interactive.Stateful variant="select-heavy" state="selected" onClick={toggle}>
  <Interactive.Container>
    <span className="interactive-foreground">Selected item</span>
  </Interactive.Container>
</Interactive.Stateful>
```


================================================
FILE: web/lib/opal/src/core/interactive/stateful/components.tsx
================================================
import "@opal/core/interactive/shared.css";
import "@opal/core/interactive/stateful/styles.css";
import React from "react";
import { Slot } from "@radix-ui/react-slot";
import { cn } from "@opal/utils";
import { guardPortalClick } from "@opal/core/interactive/utils";
import type { ButtonType, WithoutStyles } from "@opal/types";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type InteractiveStatefulVariant =
  | "select-light"
  | "select-heavy"
  | "select-card"
  | "select-tinted"
  | "select-filter"
  | "sidebar-heavy"
  | "sidebar-light";
type InteractiveStatefulState = "empty" | "filled" | "selected";
type InteractiveStatefulInteraction = "rest" | "hover" | "active";

/**
 * Props for {@link InteractiveStateful}.
 */
interface InteractiveStatefulProps
  extends WithoutStyles<React.HTMLAttributes<HTMLElement>> {
  ref?: React.Ref<HTMLElement>;

  /**
   * Visual variant controlling the color palette and behavior.
   *
   * - `"select-light"` — transparent selected background (for inline toggles)
   * - `"select-heavy"` — tinted selected background (for list rows, model pickers)
   * - `"select-card"` — like select-heavy but filled state has a visible background (for cards/larger surfaces)
   * - `"select-tinted"` — like select-heavy but with a tinted rest background
   * - `"select-filter"` — like select-tinted for empty/filled; selected state uses inverted tint backgrounds and inverted text (for filter buttons)
   * - `"sidebar-heavy"` — sidebar navigation items: muted when unselected (text-03/text-02), bold when selected (text-04/text-03)
   * - `"sidebar-light"` — sidebar navigation items: uniformly muted across all states (text-02/text-02)
   *
   * @default "select-heavy"
   */
  variant?: InteractiveStatefulVariant;

  /**
   * The current value state of this element.
   *
   * - `"empty"` — no value / unset
   * - `"filled"` — has a value but not actively selected
   * - `"selected"` — actively chosen / focused
   *
   * @default "empty"
   */
  state?: InteractiveStatefulState;

  /**
   * JS-controllable interaction state override.
   *
   * - `"rest"` — default appearance (no override)
   * - `"hover"` — forces hover visual state
   * - `"active"` — forces active/pressed visual state
   *
   * @default "rest"
   */
  interaction?: InteractiveStatefulInteraction;

  /**
   * Tailwind group class (e.g. `"group/Card"`) for `group-hover:*` utilities.
   */
  group?: string;

  /**
   * HTML button type. When set to `"submit"`, `"button"`, or `"reset"`, the
   * element is treated as inherently interactive for cursor styling purposes
   * even without an explicit `onClick` or `href`.
   */
  type?: ButtonType;

  /**
   * URL to navigate to when clicked. Passed through Slot to the child.
   */
  href?: string;

  /**
   * Link target (e.g. `"_blank"`). Only used when `href` is provided.
   */
  target?: string;

  /**
   * Applies variant-specific disabled colors and suppresses clicks.
   */
  disabled?: boolean;
}

// ---------------------------------------------------------------------------
// InteractiveStateful
// ---------------------------------------------------------------------------

/**
 * Stateful interactive surface primitive.
 *
 * The foundational building block for elements that maintain a value state
 * (empty/filled/selected). Applies variant/state color styling via CSS
 * data-attributes and merges onto a single child element via Radix `Slot`.
 *
 * Disabled state is controlled via the `disabled` prop.
 */
function InteractiveStateful({
  ref,
  variant = "select-heavy",
  state = "empty",
  interaction = "rest",
  group,
  type,
  href,
  target,
  disabled,
  ...props
}: InteractiveStatefulProps) {
  const isDisabled = !!disabled;

  // onClick/href are always passed directly — Stateful is the outermost Slot,
  // so Radix Slot-injected handlers don't bypass this guard.
  const classes = cn(
    "interactive",
    !props.onClick && !href && !type && "!cursor-default !select-auto",
    group
  );

  const dataAttrs = {
    "data-interactive-variant": variant,
    "data-interactive-state": state,
    "data-interaction": interaction !== "rest" ? interaction : undefined,
    "data-disabled": isDisabled ? "true" : undefined,
    "aria-disabled": isDisabled || undefined,
  };

  const { onClick, ...slotProps } = props;

  const linkAttrs = href
    ? {
        href: isDisabled ? undefined : href,
        target,
        rel: target === "_blank" ? "noopener noreferrer" : undefined,
      }
    : {};

  return (
    <Slot
      ref={ref}
      className={classes}
      {...dataAttrs}
      {...linkAttrs}
      {...slotProps}
      onClick={
        isDisabled
          ? href
            ? (e: React.MouseEvent) => e.preventDefault()
            : undefined
          : guardPortalClick(onClick)
      }
    />
  );
}

export {
  InteractiveStateful,
  type InteractiveStatefulProps,
  type InteractiveStatefulVariant,
  type InteractiveStatefulState,
  type InteractiveStatefulInteraction,
};


================================================
FILE: web/lib/opal/src/core/interactive/stateful/styles.css
================================================
/* ============================================================================
   Stateful — variant x state color matrix

   Each combination sets:
     - background-color (via @apply)
     - --interactive-foreground (CSS custom property for descendant text color)
     - --interactive-foreground-icon (CSS custom property for descendant icon color)

   Both foreground variables are registered as <color> via @property in
   shared.css, so the browser interpolates them on the parent element.
   Children read the variables with no independent transitions.

   State dimension: `data-interactive-state` = "empty" | "filled" | "selected"
   Variant dimension: `data-interactive-variant` = "select-light" | "select-heavy" | "select-card" | "select-tinted" | "select-filter" | "sidebar-heavy" | "sidebar-light"

   Interaction override: `data-interaction="hover"` and `data-interaction="active"`
   allow JS-controlled visual state overrides.
============================================================================ */

/* ===========================================================================
   Select-Heavy
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Select-Heavy — Empty
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="empty"] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="empty"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="empty"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
  --interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="empty"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="empty"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-neutral-00;
  --interactive-foreground: var(--text-05);
  --interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="empty"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Select-Heavy — Filled
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="filled"] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-link-05);
  --interactive-foreground-icon: var(--action-link-05);
}
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="filled"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="filled"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="filled"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="filled"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="filled"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Select-Heavy — Selected
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="selected"] {
  @apply bg-[var(--action-link-01)];
  --interactive-foreground: var(--action-link-05);
  --interactive-foreground-icon: var(--action-link-05);
}
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="selected"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="selected"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="selected"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="selected"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="select-heavy"][data-interactive-state="selected"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-link-03);
  --interactive-foreground-icon: var(--action-link-03);
}

/* ===========================================================================
   Select-Card — like Select-Heavy but filled has a visible background.
   Designed for larger surfaces (cards) where background carries more of
   the visual distinction than foreground color alone.
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Select-Card — Empty
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-card"][data-interactive-state="empty"] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="select-card"][data-interactive-state="empty"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-card"][data-interactive-state="empty"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
  --interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="select-card"][data-interactive-state="empty"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-card"][data-interactive-state="empty"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-neutral-00;
  --interactive-foreground: var(--text-05);
  --interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="select-card"][data-interactive-state="empty"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Select-Card — Filled (visible background, neutral foreground)
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-card"][data-interactive-state="filled"] {
  @apply bg-background-tint-00;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="select-card"][data-interactive-state="filled"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-card"][data-interactive-state="filled"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
  --interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="select-card"][data-interactive-state="filled"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-card"][data-interactive-state="filled"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
  --interactive-foreground: var(--text-05);
  --interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="select-card"][data-interactive-state="filled"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Select-Card — Selected
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-card"][data-interactive-state="selected"] {
  @apply bg-[var(--action-link-01)];
  --interactive-foreground: var(--action-link-05);
  --interactive-foreground-icon: var(--action-link-05);
}
.interactive[data-interactive-variant="select-card"][data-interactive-state="selected"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-card"][data-interactive-state="selected"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="select-card"][data-interactive-state="selected"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-card"][data-interactive-state="selected"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="select-card"][data-interactive-state="selected"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-link-03);
  --interactive-foreground-icon: var(--action-link-03);
}

/* ===========================================================================
   Select-Light — identical to Select-Heavy except selected bg is transparent
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Select-Light — Empty
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-light"][data-interactive-state="empty"] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="select-light"][data-interactive-state="empty"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-light"][data-interactive-state="empty"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
  --interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="select-light"][data-interactive-state="empty"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-light"][data-interactive-state="empty"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-neutral-00;
  --interactive-foreground: var(--text-05);
  --interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="select-light"][data-interactive-state="empty"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Select-Light — Filled
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-light"][data-interactive-state="filled"] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-link-05);
  --interactive-foreground-icon: var(--action-link-05);
}
.interactive[data-interactive-variant="select-light"][data-interactive-state="filled"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-light"][data-interactive-state="filled"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="select-light"][data-interactive-state="filled"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-light"][data-interactive-state="filled"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="select-light"][data-interactive-state="filled"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Select-Light — Selected (transparent background, unlike select-heavy)
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-light"][data-interactive-state="selected"] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-link-05);
  --interactive-foreground-icon: var(--action-link-05);
}
.interactive[data-interactive-variant="select-light"][data-interactive-state="selected"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-light"][data-interactive-state="selected"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="select-light"][data-interactive-state="selected"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-light"][data-interactive-state="selected"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="select-light"][data-interactive-state="selected"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-link-03);
  --interactive-foreground-icon: var(--action-link-03);
}

/* ===========================================================================
   Select-Tinted — like Select-Heavy but with a tinted rest background
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Select-Tinted — Empty
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="empty"] {
  @apply bg-background-tint-01;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="empty"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="empty"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
  --interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="empty"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="empty"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-neutral-00;
  --interactive-foreground: var(--text-05);
  --interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="empty"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Select-Tinted — Filled
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="filled"] {
  @apply bg-background-tint-01;
  --interactive-foreground: var(--action-link-05);
  --interactive-foreground-icon: var(--action-link-05);
}
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="filled"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="filled"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="filled"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="filled"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="filled"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Select-Tinted — Selected
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="selected"] {
  @apply bg-[var(--action-link-01)];
  --interactive-foreground: var(--action-link-05);
  --interactive-foreground-icon: var(--action-link-05);
}
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="selected"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="selected"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="selected"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="selected"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="select-tinted"][data-interactive-state="selected"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-link-03);
  --interactive-foreground-icon: var(--action-link-03);
}

/* ===========================================================================
   Select-Filter — empty/filled identical to Select-Tinted;
   selected uses inverted tint backgrounds and inverted text
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Select-Filter — Empty & Filled (identical colors)
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-filter"]:is(
    [data-interactive-state="empty"],
    [data-interactive-state="filled"]
  ) {
  @apply bg-background-tint-01;
  --interactive-foreground: var(--text-02);
  --interactive-foreground-icon: var(--text-02);
}
.interactive[data-interactive-variant="select-filter"]:is(
    [data-interactive-state="empty"],
    [data-interactive-state="filled"]
  ):hover:not([data-disabled]),
.interactive[data-interactive-variant="select-filter"]:is(
    [data-interactive-state="empty"],
    [data-interactive-state="filled"]
  )[data-interaction="hover"]:not([data-disabled]) {
  @apply bg-background-tint-02;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="select-filter"]:is(
    [data-interactive-state="empty"],
    [data-interactive-state="filled"]
  ):active:not([data-disabled]),
.interactive[data-interactive-variant="select-filter"]:is(
    [data-interactive-state="empty"],
    [data-interactive-state="filled"]
  )[data-interaction="active"]:not([data-disabled]) {
  @apply bg-background-neutral-00;
  --interactive-foreground: var(--text-05);
  --interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="select-filter"]:is(
    [data-interactive-state="empty"],
    [data-interactive-state="filled"]
  )[data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Select-Filter — Selected
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"] {
  @apply bg-background-tint-inverted-03;
  --interactive-foreground: var(--text-inverted-05);
  --interactive-foreground-icon: var(--text-inverted-05);
}
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-inverted-04;
  --interactive-foreground: var(--text-inverted-05);
  --interactive-foreground-icon: var(--text-inverted-05);
}
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-inverted-04;
  --interactive-foreground: var(--text-inverted-04);
  --interactive-foreground-icon: var(--text-inverted-04);
}
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"][data-disabled] {
  @apply bg-background-neutral-04;
  --interactive-foreground: var(--text-inverted-04);
  --interactive-foreground-icon: var(--text-inverted-02);
}

/* ===========================================================================
   Sidebar-Heavy

   Not selected: muted (text-03 / icon text-02)
   Selected: default (text-04 / icon text-03)
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Sidebar-Heavy — Empty
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="sidebar-heavy"][data-interactive-state="empty"] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-03);
  --interactive-foreground-icon: var(--text-02);
}
.interactive[data-interactive-variant="sidebar-heavy"][data-interactive-state="empty"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="sidebar-heavy"][data-interactive-state="empty"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-03;
}

/* ---------------------------------------------------------------------------
   Sidebar-Heavy — Filled
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="sidebar-heavy"][data-interactive-state="filled"] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-03);
  --interactive-foreground-icon: var(--text-02);
}
.interactive[data-interactive-variant="sidebar-heavy"][data-interactive-state="filled"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="sidebar-heavy"][data-interactive-state="filled"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-03;
}

/* ---------------------------------------------------------------------------
   Sidebar-Heavy — Selected
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="sidebar-heavy"][data-interactive-state="selected"] {
  @apply bg-background-tint-00;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="sidebar-heavy"][data-interactive-state="selected"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="sidebar-heavy"][data-interactive-state="selected"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-03;
}
/* ---------------------------------------------------------------------------
   Sidebar-Heavy — Disabled (all states)
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="sidebar-heavy"][data-disabled] {
  @apply bg-transparent opacity-50;
  --interactive-foreground: var(--text-03);
  --interactive-foreground-icon: var(--text-03);
}

/* ===========================================================================
   Sidebar-Light

   All states: prominence="muted-2x" colors (text-02 / icon text-02)
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Sidebar-Light — Empty
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="sidebar-light"][data-interactive-state="empty"] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-02);
  --interactive-foreground-icon: var(--text-02);
}
.interactive[data-interactive-variant="sidebar-light"][data-interactive-state="empty"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="sidebar-light"][data-interactive-state="empty"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-03;
}

/* ---------------------------------------------------------------------------
   Sidebar-Light — Filled
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="sidebar-light"][data-interactive-state="filled"] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-02);
  --interactive-foreground-icon: var(--text-02);
}
.interactive[data-interactive-variant="sidebar-light"][data-interactive-state="filled"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="sidebar-light"][data-interactive-state="filled"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-03;
}

/* ---------------------------------------------------------------------------
   Sidebar-Light — Selected
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="sidebar-light"][data-interactive-state="selected"] {
  @apply bg-background-tint-00;
  --interactive-foreground: var(--text-02);
  --interactive-foreground-icon: var(--text-02);
}
.interactive[data-interactive-variant="sidebar-light"][data-interactive-state="selected"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="sidebar-light"][data-interactive-state="selected"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-03;
}
/* ---------------------------------------------------------------------------
   Sidebar-Light — Disabled (all states)
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="sidebar-light"][data-disabled] {
  @apply bg-transparent opacity-50;
  --interactive-foreground: var(--text-03);
  --interactive-foreground-icon: var(--text-03);
}


================================================
FILE: web/lib/opal/src/core/interactive/stateless/README.md
================================================
# Interactive.Stateless

**Import:** `import { Interactive } from "@opal/core";` — use as `Interactive.Stateless`.

Stateless interactive surface primitive for buttons, links, and cards. Applies variant/prominence color styling via CSS data-attributes and merges onto a single child element via Radix `Slot`.

## Props

| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `variant` | `"none" \| "default" \| "action" \| "danger"` | `"default"` | Color variant |
| `prominence` | `"primary" \| "secondary" \| "tertiary" \| "internal"` | `"primary"` | Color prominence within the variant |
| `interaction` | `"rest" \| "hover" \| "active"` | `"rest"` | JS-controlled interaction override |
| `group` | `string` | — | Tailwind group class for `group-hover:*` |
| `disabled` | `boolean` | `false` | Disables the element |
| `href` | `string` | — | URL for link behavior |
| `target` | `string` | — | Link target (e.g. `"_blank"`) |

## CSS custom properties

Sets `--interactive-foreground` and `--interactive-foreground-icon` per variant/prominence/state. Descendants opt in via:
- `.interactive-foreground` — text color
- `.interactive-foreground-icon` — icon color

## Usage

```tsx
<Interactive.Stateless variant="default" prominence="primary" onClick={handleClick}>
  <Interactive.Container border>
    <span className="interactive-foreground">Click me</span>
  </Interactive.Container>
</Interactive.Stateless>
```


================================================
FILE: web/lib/opal/src/core/interactive/stateless/components.tsx
================================================
import "@opal/core/interactive/shared.css";
import "@opal/core/interactive/stateless/styles.css";
import React from "react";
import { Slot } from "@radix-ui/react-slot";
import { cn } from "@opal/utils";
import { guardPortalClick } from "@opal/core/interactive/utils";
import type { ButtonType, WithoutStyles } from "@opal/types";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type InteractiveStatelessVariant = "default" | "action" | "danger";
type InteractiveStatelessProminence =
  | "primary"
  | "secondary"
  | "tertiary"
  | "internal";
type InteractiveStatelessInteraction = "rest" | "hover" | "active";

/**
 * Props for {@link InteractiveStateless}.
 */
interface InteractiveStatelessProps
  extends WithoutStyles<React.HTMLAttributes<HTMLElement>> {
  ref?: React.Ref<HTMLElement>;

  /**
   * Visual variant controlling the color palette.
   * @default "default"
   */
  variant?: InteractiveStatelessVariant;

  /**
   * Prominence level controlling background intensity.
   * @default "primary"
   */
  prominence?: InteractiveStatelessProminence;

  /**
   * JS-controllable interaction state override.
   *
   * - `"rest"` — default appearance (no override)
   * - `"hover"` — forces hover visual state
   * - `"active"` — forces active/pressed visual state
   *
   * @default "rest"
   */
  interaction?: InteractiveStatelessInteraction;

  /**
   * Tailwind group class (e.g. `"group/Card"`) for `group-hover:*` utilities.
   */
  group?: string;

  /**
   * HTML button type. When set to `"submit"`, `"button"`, or `"reset"`, the
   * element is treated as inherently interactive for cursor styling purposes
   * even without an explicit `onClick` or `href`.
   */
  type?: ButtonType;

  /**
   * URL to navigate to when clicked. Passed through Slot to the child.
   */
  href?: string;

  /**
   * Link target (e.g. `"_blank"`). Only used when `href` is provided.
   */
  target?: string;

  /**
   * Applies variant-specific disabled colors and suppresses clicks.
   */
  disabled?: boolean;
}

// ---------------------------------------------------------------------------
// InteractiveStateless
// ---------------------------------------------------------------------------

/**
 * Stateless interactive surface primitive.
 *
 * The foundational building block for buttons, links, and any clickable
 * element that does not maintain selection state. Applies variant/prominence
 * color styling via CSS data-attributes and merges onto a single child
 * element via Radix `Slot`.
 *
 * Disabled state is controlled via the `disabled` prop.
 */
function InteractiveStateless({
  ref,
  variant = "default",
  prominence = "primary",
  interaction = "rest",
  group,
  type,
  href,
  target,
  disabled,
  ...props
}: InteractiveStatelessProps) {
  const isDisabled = !!disabled;

  // onClick/href are always passed directly — Stateless is the outermost Slot,
  // so Radix Slot-injected handlers don't bypass this guard.
  const classes = cn(
    "interactive",
    !props.onClick && !href && !type && "!cursor-default !select-auto",
    group
  );

  const dataAttrs = {
    "data-interactive-variant": variant,
    "data-interactive-prominence": prominence,
    "data-interaction": interaction !== "rest" ? interaction : undefined,
    "data-disabled": isDisabled ? "true" : undefined,
    "aria-disabled": isDisabled || undefined,
  };

  const { onClick, ...slotProps } = props;

  const linkAttrs = href
    ? {
        href: isDisabled ? undefined : href,
        target,
        rel: target === "_blank" ? "noopener noreferrer" : undefined,
      }
    : {};

  return (
    <Slot
      ref={ref}
      className={classes}
      {...dataAttrs}
      {...linkAttrs}
      {...slotProps}
      onClick={
        isDisabled
          ? href
            ? (e: React.MouseEvent) => e.preventDefault()
            : undefined
          : guardPortalClick(onClick)
      }
    />
  );
}

export {
  InteractiveStateless,
  type InteractiveStatelessProps,
  type InteractiveStatelessVariant,
  type InteractiveStatelessProminence,
  type InteractiveStatelessInteraction,
};


================================================
FILE: web/lib/opal/src/core/interactive/stateless/styles.css
================================================
/* ============================================================================
   Stateless — variant x prominence color matrix

   Each combination sets:
     - background-color (via @apply)
     - --interactive-foreground (CSS custom property for descendant text color)
     - --interactive-foreground-icon (CSS custom property for descendant icon color)

   Both foreground variables are registered as <color> via @property in
   shared.css, so the browser interpolates them on the parent element.
   Children read the variables with no independent transitions.

   Interaction override: `data-interaction="hover"` and `data-interaction="active"`
   allow JS-controlled visual state overrides without actual pointer events.
============================================================================ */

/* ---------------------------------------------------------------------------
   Default + Primary
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="default"][data-interactive-prominence="primary"] {
  @apply bg-[var(--theme-primary-05)];
  --interactive-foreground: var(--text-inverted-05);
  --interactive-foreground-icon: var(--text-inverted-05);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="primary"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="default"][data-interactive-prominence="primary"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-[var(--theme-primary-04)];
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="primary"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="default"][data-interactive-prominence="primary"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-[var(--theme-primary-06)];
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="primary"][data-disabled] {
  @apply bg-background-neutral-04;
  --interactive-foreground: var(--text-inverted-04);
  --interactive-foreground-icon: var(--text-inverted-04);
}

/* ---------------------------------------------------------------------------
   Default + Secondary
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="default"][data-interactive-prominence="secondary"] {
  @apply bg-background-tint-01;
  --interactive-foreground: var(--text-03);
  --interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="secondary"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="default"][data-interactive-prominence="secondary"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="secondary"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="default"][data-interactive-prominence="secondary"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
  --interactive-foreground: var(--text-05);
  --interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="secondary"][data-disabled] {
  @apply bg-background-neutral-03;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Default + Tertiary
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="default"][data-interactive-prominence="tertiary"] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-03);
  --interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="tertiary"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="default"][data-interactive-prominence="tertiary"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="tertiary"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="default"][data-interactive-prominence="tertiary"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
  --interactive-foreground: var(--text-05);
  --interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="tertiary"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Default + Internal
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="default"][data-interactive-prominence="internal"] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-03);
  --interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="internal"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="default"][data-interactive-prominence="internal"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
  --interactive-foreground: var(--text-04);
  --interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="internal"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="default"][data-interactive-prominence="internal"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
  --interactive-foreground: var(--text-05);
  --interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="default"][data-interactive-prominence="internal"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Action + Primary
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="action"][data-interactive-prominence="primary"] {
  @apply bg-[var(--action-link-05)];
  --interactive-foreground: var(--text-light-05);
  --interactive-foreground-icon: var(--text-light-05);
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="primary"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="action"][data-interactive-prominence="primary"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-[var(--action-link-04)];
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="primary"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="action"][data-interactive-prominence="primary"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-[var(--action-link-06)];
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="primary"][data-disabled] {
  @apply bg-[var(--action-link-02)];
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Action + Secondary
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="action"][data-interactive-prominence="secondary"] {
  @apply bg-background-tint-01;
  --interactive-foreground: var(--action-text-link-05);
  --interactive-foreground-icon: var(--action-text-link-05);
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="secondary"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="action"][data-interactive-prominence="secondary"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="secondary"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="action"][data-interactive-prominence="secondary"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="secondary"][data-disabled] {
  @apply bg-background-neutral-02;
  --interactive-foreground: var(--action-link-03);
  --interactive-foreground-icon: var(--action-link-03);
}

/* ---------------------------------------------------------------------------
   Action + Tertiary
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="action"][data-interactive-prominence="tertiary"] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-text-link-05);
  --interactive-foreground-icon: var(--action-text-link-05);
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="tertiary"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="action"][data-interactive-prominence="tertiary"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="tertiary"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="action"][data-interactive-prominence="tertiary"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="tertiary"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-link-03);
  --interactive-foreground-icon: var(--action-link-03);
}

/* ---------------------------------------------------------------------------
   Action + Internal
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="action"][data-interactive-prominence="internal"] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-text-link-05);
  --interactive-foreground-icon: var(--action-text-link-05);
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="internal"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="action"][data-interactive-prominence="internal"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="internal"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="action"][data-interactive-prominence="internal"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="action"][data-interactive-prominence="internal"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-link-03);
  --interactive-foreground-icon: var(--action-link-03);
}

/* ---------------------------------------------------------------------------
   Danger + Primary
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="danger"][data-interactive-prominence="primary"] {
  @apply bg-[var(--action-danger-05)];
  --interactive-foreground: var(--text-light-05);
  --interactive-foreground-icon: var(--text-light-05);
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="primary"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="danger"][data-interactive-prominence="primary"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-[var(--action-danger-04)];
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="primary"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="danger"][data-interactive-prominence="primary"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-[var(--action-danger-06)];
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="primary"][data-disabled] {
  @apply bg-[var(--action-danger-02)];
  --interactive-foreground: var(--text-01);
  --interactive-foreground-icon: var(--text-01);
}

/* ---------------------------------------------------------------------------
   Danger + Secondary
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="danger"][data-interactive-prominence="secondary"] {
  @apply bg-background-tint-01;
  --interactive-foreground: var(--action-text-danger-05);
  --interactive-foreground-icon: var(--action-text-danger-05);
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="secondary"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="danger"][data-interactive-prominence="secondary"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="secondary"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="danger"][data-interactive-prominence="secondary"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="secondary"][data-disabled] {
  @apply bg-background-neutral-02;
  --interactive-foreground: var(--action-danger-03);
  --interactive-foreground-icon: var(--action-danger-03);
}

/* ---------------------------------------------------------------------------
   Danger + Tertiary
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="danger"][data-interactive-prominence="tertiary"] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-text-danger-05);
  --interactive-foreground-icon: var(--action-text-danger-05);
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="tertiary"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="danger"][data-interactive-prominence="tertiary"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-02;
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="tertiary"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="danger"][data-interactive-prominence="tertiary"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="tertiary"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-danger-03);
  --interactive-foreground-icon: var(--action-danger-03);
}

/* ---------------------------------------------------------------------------
   Danger + Internal
   --------------------------------------------------------------------------- */
.interactive[data-interactive-variant="danger"][data-interactive-prominence="internal"] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-text-danger-05);
  --interactive-foreground-icon: var(--action-text-danger-05);
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="internal"]:hover:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="danger"][data-interactive-prominence="internal"][data-interaction="hover"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="internal"]:active:not(
    [data-disabled]
  ),
.interactive[data-interactive-variant="danger"][data-interactive-prominence="internal"][data-interaction="active"]:not(
    [data-disabled]
  ) {
  @apply bg-background-tint-00;
}
.interactive[data-interactive-variant="danger"][data-interactive-prominence="internal"][data-disabled] {
  @apply bg-transparent;
  --interactive-foreground: var(--action-danger-03);
  --interactive-foreground-icon: var(--action-danger-03);
}


================================================
FILE: web/lib/opal/src/core/interactive/utils.ts
================================================
import type React from "react";

/**
 * Guards an onClick handler against React synthetic event bubbling from
 * portalled children (e.g. Radix Dialog overlays).
 *
 * React bubbles synthetic events through the **fiber tree** (component
 * hierarchy), not the DOM tree. This means a click on a portalled modal
 * overlay will bubble to a parent component's onClick even though the
 * overlay is not a DOM descendant. This guard checks that the click
 * target is actually inside the handler's DOM element before firing.
 */
function guardPortalClick<E extends React.MouseEvent>(
  onClick: ((e: E) => void) | undefined
): ((e: E) => void) | undefined {
  if (!onClick) return undefined;
  return (e: E) => {
    if (
      e.currentTarget instanceof Node &&
      e.target instanceof Node &&
      e.currentTarget.contains(e.target)
    ) {
      onClick(e);
    }
  };
}

export { guardPortalClick };


================================================
FILE: web/lib/opal/src/icons/DiscordMono.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgDiscordMono = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 52 52"
    fill="currentColor"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path d="M32.7571 7.80005C32.288 8.63286 31.8668 9.4944 31.4839 10.3751C27.8463 9.82945 24.1417 9.82945 20.4946 10.3751C20.1213 9.4944 19.6905 8.63286 19.2214 7.80005C15.804 8.384 12.4727 9.40825 9.31379 10.8537C3.05329 20.1296 1.35894 29.1661 2.20134 38.0782C5.86763 40.7872 9.97429 42.8549 14.349 44.1759C15.3349 42.8549 16.2061 41.4477 16.9527 39.9831C15.536 39.4566 14.1671 38.7961 12.8556 38.0303C13.2002 37.7814 13.5353 37.523 13.8608 37.2741C21.5476 40.8925 30.4501 40.8925 38.1465 37.2741C38.4719 37.5421 38.807 37.8006 39.1516 38.0303C37.8401 38.8057 36.4713 39.4566 35.0449 39.9927C35.7916 41.4573 36.6627 42.8645 37.6487 44.1855C42.0233 42.8645 46.1299 40.8064 49.7965 38.0973C50.7918 27.7589 48.0924 18.799 42.6646 10.8633C39.5154 9.41784 36.1841 8.39355 32.7666 7.81919L32.7571 7.80005ZM18.0248 32.5931C15.6604 32.5931 13.698 30.4488 13.698 27.7972C13.698 25.1456 15.5838 22.9918 18.0153 22.9918C20.4468 22.9918 22.3804 25.1552 22.3421 27.7972C22.3038 30.4393 20.4372 32.5931 18.0248 32.5931ZM33.9728 32.5931C31.5988 32.5931 29.6556 30.4488 29.6556 27.7972C29.6556 25.1456 31.5414 22.9918 33.9728 22.9918C36.4043 22.9918 38.3284 25.1552 38.29 27.7972C38.2518 30.4393 36.3851 32.5931 33.9728 32.5931Z" />
  </svg>
);
export default SvgDiscordMono;


================================================
FILE: web/lib/opal/src/icons/actions.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgActions = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M3.06 6.24449L5.12 4.12225L3.06 2.00001M11.5501 14L14 11.5501M14 11.5501L11.5501 9.10017M14 11.5501H9.75552M4.12224 9.09889L6.24448 10.3242V12.7747L4.12224 14L2 12.7747V10.3242L4.12224 9.09889ZM14 4.12225C14 5.29433 13.0498 6.24449 11.8778 6.24449C10.7057 6.24449 9.75552 5.29433 9.75552 4.12225C9.75552 2.95017 10.7057 2.00001 11.8778 2.00001C13.0498 2.00001 14 2.95017 14 4.12225Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgActions;


================================================
FILE: web/lib/opal/src/icons/activity-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgActivitySmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11.5 8H10L9 11L7 5L6 8H4.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgActivitySmall;


================================================
FILE: web/lib/opal/src/icons/activity.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgActivity = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14.6667 8H12L9.99999 14L5.99999 2L3.99999 8H1.33333"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgActivity;


================================================
FILE: web/lib/opal/src/icons/add-lines.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgAddLines = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    xmlns="http://www.w3.org/2000/svg"
    viewBox="0 0 16 16"
    fill="none"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14 6H2M14 3H2M6 12H2M11.5 9.5V12M11.5 12V14.5M11.5 12H9M11.5 12H14M8.5 9H2"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgAddLines;


================================================
FILE: web/lib/opal/src/icons/alert-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgAlertCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 24 24"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <circle
      cx="12"
      cy="12"
      r="10"
      strokeWidth={2}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M12 8v4"
      strokeWidth={2}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M12 16h.01"
      strokeWidth={2.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgAlertCircle;


================================================
FILE: web/lib/opal/src/icons/alert-triangle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgAlertTriangle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    xmlns="http://www.w3.org/2000/svg"
    viewBox="0 0 16 16"
    fill="none"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.71535 5.8185V8.48516M7.71535 11.1518H7.72201M6.57535 2.39183L0.928679 11.8185C0.812258 12.0201 0.750657 12.2487 0.750005 12.4815C0.749353 12.7143 0.809673 12.9432 0.924964 13.1455C1.04025 13.3478 1.2065 13.5163 1.40715 13.6344C1.60781 13.7525 1.83588 13.8159 2.06868 13.8185H13.362C13.5948 13.8159 13.8229 13.7525 14.0235 13.6344C14.2242 13.5163 14.3904 13.3478 14.5057 13.1455C14.621 12.9432 14.6813 12.7143 14.6807 12.4815C14.68 12.2487 14.6184 12.0201 14.502 11.8185L8.85535 2.39183C8.7365 2.1959 8.56916 2.03391 8.36948 1.92149C8.16979 1.80906 7.9445 1.75 7.71535 1.75C7.48619 1.75 7.2609 1.80906 7.06122 1.92149C6.86153 2.03391 6.69419 2.1959 6.57535 2.39183Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgAlertTriangle;


================================================
FILE: web/lib/opal/src/icons/arrow-down-dot.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgArrowDownDot = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 9 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.25002 12.75L4.25002 6.25M4.25002 12.75L7.75 9.25M4.25002 12.75L0.75 9.25M4.25002 3.75C3.42158 3.75 2.75 3.07843 2.75 2.25C2.75 1.42157 3.42158 0.75 4.25002 0.75C5.07845 0.75 5.75 1.42157 5.75 2.25C5.75 3.07843 5.07845 3.75 4.25002 3.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowDownDot;


================================================
FILE: web/lib/opal/src/icons/arrow-exchange.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgArrowExchange = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5.22381 2.5L3.19527 4.52854C3.06509 4.65871 3 4.82932 3 4.99994M5.22392 7.5L3.19526 5.47134C3.06509 5.34117 3 5.17056 3 4.99994M13 4.99994H3M10.7761 8.50003L12.8047 10.5286C12.9349 10.6587 13 10.8294 13 11M10.7761 13.5L12.8047 11.4714C12.9349 11.3412 13 11.1706 13 11M3 11H13"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgArrowExchange;


================================================
FILE: web/lib/opal/src/icons/arrow-left-dot.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgArrowLeftDot = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 14 9"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M0.75 4.25H7.24999M0.75 4.25L4.25 0.75M0.75 4.25L4.25 7.75M9.74999 4.25C9.74999 5.07844 10.4216 5.75001 11.25 5.75001C12.0784 5.75001 12.75 5.07844 12.75 4.25C12.75 3.42156 12.0784 2.75001 11.25 2.75001C10.4216 2.75001 9.74999 3.42156 9.74999 4.25Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowLeftDot;


================================================
FILE: web/lib/opal/src/icons/arrow-left.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgArrowLeft = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12 8H4M4 8L8 4M4 8L8 12"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowLeft;


================================================
FILE: web/lib/opal/src/icons/arrow-right-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgArrowRightCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.99999 10.6667L10.6667 8.00001M10.6667 8.00001L7.99999 5.33334M10.6667 8.00001L5.33333 8.00001M14.6667 8.00001C14.6667 11.6819 11.6819 14.6667 7.99999 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 8.00001C1.33333 4.31811 4.3181 1.33334 7.99999 1.33334C11.6819 1.33334 14.6667 4.31811 14.6667 8.00001Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowRightCircle;


================================================
FILE: web/lib/opal/src/icons/arrow-right-dot.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgArrowRightDot = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 14 9"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12.75 4.25H6.25M12.75 4.25L9.25 0.75M12.75 4.25L9.25 7.75M3.75 4.25C3.75 5.07844 3.07843 5.75001 2.25 5.75001C1.42157 5.75001 0.75 5.07844 0.75 4.25C0.75 3.42156 1.42157 2.75001 2.25 2.75001C3.07843 2.75001 3.75 3.42156 3.75 4.25Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowRightDot;


================================================
FILE: web/lib/opal/src/icons/arrow-right.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgArrowRight = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4 8H12M12 8L8 4M12 8L8 12"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowRight;


================================================
FILE: web/lib/opal/src/icons/arrow-up-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgArrowUpCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5.33333 8.00001L8 5.33334M8 5.33334L10.6667 8.00001M8 5.33334L8 10.6667M14.6667 8.00001C14.6667 11.6819 11.6819 14.6667 8 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 8.00001C1.33333 4.31811 4.3181 1.33334 8 1.33334C11.6819 1.33334 14.6667 4.31811 14.6667 8.00001Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowUpCircle;


================================================
FILE: web/lib/opal/src/icons/arrow-up-dot.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgArrowUpDot = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 9 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.25002 0.75V7.24999M4.25002 0.75L0.75 4.25M4.25002 0.75L7.75 4.25M4.25002 9.74999C5.07845 9.74999 5.75003 10.4216 5.75003 11.25C5.75003 12.0784 5.07845 12.75 4.25002 12.75C3.42158 12.75 2.75003 12.0784 2.75003 11.25C2.75003 10.4216 3.42158 9.74999 4.25002 9.74999Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowUpDot;


================================================
FILE: web/lib/opal/src/icons/arrow-up-down.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgArrowUpDown = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 13 12"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11.75 2.97381L9.72145 0.945267C9.59128 0.81509 9.42066 0.750002 9.25005 0.750001M6.74999 2.97392L8.77865 0.94526C8.90881 0.815087 9.07943 0.75 9.25005 0.750001M9.25005 10.75V0.750001M5.74996 8.52613L3.72141 10.5547C3.59124 10.6849 3.42062 10.75 3.25001 10.75M0.75 8.52613L2.77861 10.5547C2.90877 10.6849 3.07939 10.75 3.25001 10.75M3.25001 0.75L3.25001 10.75"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowUpDown;


================================================
FILE: web/lib/opal/src/icons/arrow-up-right.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgArrowUpRight = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.66667 11.3333L11 5M4.66667 4.66663H11.3333V11.3333"
      strokeWidth={1.5}
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowUpRight;


================================================
FILE: web/lib/opal/src/icons/arrow-up.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgArrowUp = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 2.6665V13.3335M8 2.6665L4 6.6665M8 2.6665L12 6.6665"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgArrowUp;


================================================
FILE: web/lib/opal/src/icons/arrow-wall-right.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgArrowWallRight = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 12"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8.44281 2.99998L10.8047 5.36191C10.9349 5.49208 11 5.6627 11 5.83331M8.44281 8.66665L10.8047 6.30471C10.9349 6.17455 11 6.00393 11 5.83331M1 5.83331H11M14 1V10.6667"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgArrowWallRight;


================================================
FILE: web/lib/opal/src/icons/audio-eq-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgAudioEqSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5 9V7M7 11V5M9 9.5V6.5M11 9V7"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgAudioEqSmall;


================================================
FILE: web/lib/opal/src/icons/audio.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgAudio = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2 10V6M5 14V2M11 11V5M14 9V7M8 10V6"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgAudio;


================================================
FILE: web/lib/opal/src/icons/aws.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgAws = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 52 52"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <title>AWS</title>
    <path
      d="M14.6195 23.2934C14.6195 23.9333 14.7233 24.4522 14.8443 24.8326C14.9827 25.2131 15.1556 25.6282 15.3978 26.0778C15.4842 26.2162 15.5188 26.3546 15.5188 26.4756C15.5188 26.6486 15.4151 26.8215 15.1902 26.9945L14.1007 27.7208C13.945 27.8246 13.7894 27.8765 13.651 27.8765C13.4781 27.8765 13.3051 27.79 13.1322 27.6344C12.89 27.3749 12.6825 27.0982 12.5096 26.8215C12.3366 26.5275 12.1637 26.1989 11.9734 25.8011C10.6245 27.3922 8.92958 28.1878 6.88881 28.1878C5.43606 28.1878 4.27731 27.7727 3.42988 26.9426C2.58244 26.1124 2.15007 25.0056 2.15007 23.622C2.15007 22.152 2.66891 20.9586 3.72389 20.0593C4.77886 19.16 6.17973 18.7103 7.96108 18.7103C8.54909 18.7103 9.15441 18.7622 9.79431 18.8487C10.4342 18.9352 11.0914 19.0735 11.7832 19.2292V17.9667C11.7832 16.6523 11.5065 15.7356 10.9703 15.1995C10.4169 14.6634 9.483 14.404 8.15132 14.404C7.546 14.404 6.9234 14.4731 6.28349 14.6288C5.64359 14.7844 5.02098 14.9747 4.41567 15.2168C4.13896 15.3379 3.93142 15.407 3.81036 15.4416C3.6893 15.4762 3.60282 15.4935 3.53364 15.4935C3.29152 15.4935 3.17046 15.3206 3.17046 14.9574V14.1099C3.17046 13.8332 3.20505 13.6257 3.29152 13.5046C3.37799 13.3836 3.53364 13.2625 3.77577 13.1414C4.38108 12.8301 5.10746 12.5707 5.9549 12.3632C6.80233 12.1384 7.70165 12.0346 8.65286 12.0346C10.7109 12.0346 12.2156 12.5015 13.1841 13.4355C14.1353 14.3694 14.6195 15.7875 14.6195 17.6899V23.2934ZM7.63248 25.9222C8.2032 25.9222 8.79122 25.8184 9.41383 25.6109C10.0364 25.4034 10.5899 25.0229 11.0568 24.504C11.3335 24.1754 11.5411 23.8122 11.6448 23.3972C11.7486 22.9821 11.8178 22.4806 11.8178 21.8925V21.1662C11.3162 21.0451 10.7801 20.9413 10.2267 20.8722C9.67325 20.803 9.13711 20.7684 8.60098 20.7684C7.44224 20.7684 6.5948 20.9932 6.02407 21.4602C5.45335 21.9271 5.17664 22.5843 5.17664 23.4491C5.17664 24.2619 5.38417 24.8672 5.81654 25.2823C6.23161 25.7147 6.83692 25.9222 7.63248 25.9222ZM21.5201 27.79C21.2088 27.79 21.0012 27.7381 20.8629 27.6171C20.7245 27.5133 20.6035 27.2712 20.4997 26.9426L16.4355 13.5738C16.3317 13.2279 16.2798 13.0031 16.2798 12.882C16.2798 12.6053 16.4182 12.4497 16.6949 12.4497H18.3897C18.7183 12.4497 18.9432 12.5015 19.0642 12.6226C19.2026 12.7264 19.3064 12.9685 19.4101 13.2971L22.3156 24.7462L25.0136 13.2971C25.1001 12.9512 25.2038 12.7264 25.3422 12.6226C25.4806 12.5188 25.7227 12.4497 26.034 12.4497H27.4176C27.7462 12.4497 27.971 12.5015 28.1093 12.6226C28.2477 12.7264 28.3688 12.9685 28.4379 13.2971L31.1705 24.8845L34.1625 13.2971C34.2662 12.9512 34.3873 12.7264 34.5084 12.6226C34.6467 12.5188 34.8716 12.4497 35.1829 12.4497H36.7913C37.068 12.4497 37.2236 12.588 37.2236 12.882C37.2236 12.9685 37.2063 13.055 37.189 13.1587C37.1717 13.2625 37.1372 13.4009 37.068 13.5911L32.9 26.9599C32.7962 27.3058 32.6751 27.5306 32.5368 27.6344C32.3984 27.7381 32.1736 27.8073 31.8796 27.8073H30.3922C30.0636 27.8073 29.8388 27.7554 29.7004 27.6344C29.5621 27.5133 29.441 27.2885 29.3719 26.9426L26.6912 15.7875L24.0278 26.9253C23.9413 27.2712 23.8376 27.496 23.6992 27.6171C23.5609 27.7381 23.3187 27.79 23.0074 27.79H21.5201ZM43.7437 28.257C42.8444 28.257 41.9451 28.1532 41.0803 27.9457C40.2156 27.7381 39.5411 27.5133 39.0914 27.2539C38.8147 27.0982 38.6245 26.9253 38.5553 26.7696C38.4861 26.614 38.4515 26.441 38.4515 26.2854V25.4034C38.4515 25.0402 38.5899 24.8672 38.8493 24.8672C38.9531 24.8672 39.0569 24.8845 39.1606 24.9191C39.2644 24.9537 39.42 25.0229 39.593 25.0921C40.181 25.3515 40.8209 25.559 41.4954 25.6974C42.1872 25.8357 42.8617 25.9049 43.5535 25.9049C44.643 25.9049 45.4905 25.7147 46.0785 25.3342C46.6665 24.9537 46.9778 24.4003 46.9778 23.6912C46.9778 23.2069 46.8222 22.8092 46.5109 22.4806C46.1996 22.152 45.6115 21.858 44.7641 21.5812L42.2564 20.803C40.9939 20.4052 40.0599 19.8172 39.4892 19.0389C38.9185 18.278 38.6245 17.4305 38.6245 16.5312C38.6245 15.8048 38.7801 15.1649 39.0914 14.6115C39.4027 14.0581 39.8178 13.5738 40.3367 13.1933C40.8555 12.7956 41.4435 12.5015 42.1353 12.294C42.8271 12.0865 43.5535 12 44.3144 12C44.6949 12 45.0927 12.0173 45.4732 12.0692C45.871 12.1211 46.2341 12.1902 46.5973 12.2594C46.9432 12.3459 47.2718 12.4324 47.5831 12.5361C47.8944 12.6399 48.1366 12.7437 48.3095 12.8474C48.5516 12.9858 48.7246 13.1242 48.8283 13.2798C48.9321 13.4182 48.984 13.6084 48.984 13.8505V14.6634C48.984 15.0266 48.8456 15.2168 48.5862 15.2168C48.4479 15.2168 48.223 15.1476 47.929 15.0093C46.9432 14.5596 45.8364 14.3348 44.6084 14.3348C43.6227 14.3348 42.8444 14.4904 42.3083 14.819C41.7721 15.1476 41.4954 15.6492 41.4954 16.3583C41.4954 16.8425 41.6684 17.2576 42.0142 17.5862C42.3601 17.9148 43 18.2434 43.9167 18.5374L46.3725 19.3156C47.6177 19.7134 48.517 20.2668 49.0532 20.9759C49.5893 21.685 49.8487 22.4979 49.8487 23.3972C49.8487 24.1408 49.6931 24.8153 49.3991 25.4034C49.0878 25.9914 48.6727 26.5102 48.1366 26.9253C47.6004 27.3577 46.9605 27.669 46.2168 27.8938C45.4386 28.1359 44.6257 28.257 43.7437 28.257Z"
      fill="#252F3E"
    />
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M47.0124 36.6621C41.3225 40.8647 33.0556 43.0958 25.9475 43.0958C15.9858 43.0958 7.00987 39.412 0.230368 33.2897C-0.305766 32.8054 0.178484 32.1482 0.818386 32.5287C8.15132 36.7832 17.1964 39.3601 26.5528 39.3601C32.8654 39.3601 39.8005 38.0457 46.1823 35.3478C47.1335 34.9154 47.9463 35.9704 47.0124 36.6621Z"
      fill="#FF9900"
    />
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M49.3818 33.9642C48.6554 33.0303 44.5738 33.5145 42.7233 33.7394C42.1699 33.8085 42.0834 33.3243 42.585 32.9611C45.8364 30.6782 51.1804 31.3354 51.803 32.0964C52.4256 32.8746 51.6301 38.2187 48.5862 40.7783C48.1193 41.1761 47.6696 40.9685 47.8771 40.4497C48.5689 38.7375 50.1081 34.8808 49.3818 33.9642Z"
      fill="#FF9900"
    />
  </svg>
);

export default SvgAws;


================================================
FILE: web/lib/opal/src/icons/azure.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgAzure = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 52 52"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M18.3281 3.40002C17.3811 3.40002 16.5394 4.00473 16.2763 4.89865L3.12373 43.8366C2.72915 44.9672 3.30787 46.2029 4.43899 46.5973C4.67574 46.6761 4.91248 46.7287 5.17554 46.7287H16.0396C16.855 46.571 17.539 45.9926 17.8283 45.2038L32.0068 3.40002H18.3281Z"
      fill="url(#paint0_linear_9_943)"
    />
    <path
      d="M38.136 31.4795H16.5394C15.987 31.4795 15.5398 31.9264 15.5398 32.4786C15.5398 32.7678 15.645 33.0307 15.8555 33.2147L29.7446 46.1503C30.1392 46.5183 30.6916 46.7287 31.244 46.7287H43.4759L38.136 31.4795Z"
      fill="#0078D4"
    />
    <path
      d="M18.3281 3.40002C17.3811 3.40002 16.5394 4.00473 16.2763 4.89865L3.12373 43.8366C2.72915 44.9672 3.30787 46.2029 4.43899 46.5973C4.67574 46.6761 4.91248 46.7287 5.17554 46.7287H16.0396C16.855 46.571 17.539 45.9926 17.8283 45.2038L20.4589 37.4741L29.8235 46.2555C30.2181 46.571 30.7179 46.755 31.2177 46.755H43.397L38.057 31.4796H22.4844L32.0068 3.40002H18.3281Z"
      fill="url(#paint1_linear_9_943)"
    />
    <path
      d="M35.7422 4.87236C35.4528 3.97844 34.611 3.40002 33.6904 3.40002H18.5123C19.4329 3.40002 20.2747 4.00473 20.5641 4.87236L33.7167 43.8892C34.1112 45.0198 33.4799 46.2555 32.3488 46.6236C32.1384 46.7024 31.9016 46.7287 31.6649 46.7287H46.843C48.053 46.7287 49 45.7559 49 44.5728C49 44.3362 48.9737 44.0996 48.8948 43.8892L35.7422 4.87236Z"
      fill="url(#paint2_linear_9_943)"
    />
    <defs>
      <linearGradient
        id="paint0_linear_9_943"
        x1={23.3411}
        y1={6.61094}
        x2={9.24122}
        y2={48.3769}
        gradientUnits="userSpaceOnUse"
      >
        <stop stopColor="#114A8B" />
        <stop offset={1} stopColor="#0765B6" />
      </linearGradient>
      <linearGradient
        id="paint1_linear_9_943"
        x1={27.7206}
        y1={26.0775}
        x2={24.4488}
        y2={27.1844}
        gradientUnits="userSpaceOnUse"
      >
        <stop stopOpacity={0.3} />
        <stop offset={0.071} stopOpacity={0.2} />
        <stop offset={0.321} stopOpacity={0.1} />
        <stop offset={0.623} stopOpacity={0.05} />
        <stop offset={1} stopOpacity={0} />
      </linearGradient>
      <linearGradient
        id="paint2_linear_9_943"
        x1={26.0229}
        y1={5.35655}
        x2={41.5367}
        y2={46.7094}
        gradientUnits="userSpaceOnUse"
      >
        <stop stopColor="#3BC9F3" />
        <stop offset={1} stopColor="#2892DF" />
      </linearGradient>
    </defs>
  </svg>
);
export default SvgAzure;


================================================
FILE: web/lib/opal/src/icons/bar-chart-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgBarChartSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11 10.5V7M8 10.5V4.5M5 10.5V8"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBarChartSmall;


================================================
FILE: web/lib/opal/src/icons/bar-chart.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgBarChart = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12 13.3333V6.66666M8 13.3333V2.66666M4 13.3333V9.33332"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBarChart;


================================================
FILE: web/lib/opal/src/icons/bell.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgBell = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.15333 14C9.03613 14.2021 8.86789 14.3698 8.66548 14.4864C8.46307 14.6029 8.23359 14.6643 8 14.6643C7.76641 14.6643 7.53693 14.6029 7.33452 14.4864C7.1321 14.3698 6.96387 14.2021 6.84667 14M12 5.33334C12 4.27248 11.5786 3.25506 10.8284 2.50492C10.0783 1.75477 9.06087 1.33334 8 1.33334C6.93913 1.33334 5.92172 1.75477 5.17157 2.50492C4.42143 3.25506 4 4.27248 4 5.33334C4 10 2 11.3333 2 11.3333H14C14 11.3333 12 10 12 5.33334Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBell;


================================================
FILE: web/lib/opal/src/icons/bifrost.tsx
================================================
import { cn } from "@opal/utils";
import type { IconProps } from "@opal/types";

const SvgBifrost = ({ size, className, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 37 46"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    className={cn(className, "text-[#33C19E] dark:text-white")}
    {...props}
  >
    <title>Bifrost</title>
    <path
      d="M27.6219 46H0V36.8H27.6219V46ZM36.8268 36.8H27.6219V27.6H36.8268V36.8ZM18.4146 27.6H9.2073V18.4H18.4146V27.6ZM36.8268 18.4H27.6219V9.2H36.8268V18.4ZM27.6219 9.2H0V0H27.6219V9.2Z"
      fill="currentColor"
    />
  </svg>
);

export default SvgBifrost;


================================================
FILE: web/lib/opal/src/icons/blocks.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgBlocks = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    xmlns="http://www.w3.org/2000/svg"
    viewBox="0 0 24 24"
    fill="none"
    strokeWidth={1.5}
    strokeLinecap="round"
    strokeLinejoin="round"
    className="lucide lucide-blocks-icon lucide-blocks"
    stroke="currentColor"
    {...props}
  >
    <path d="M10 22V7a1 1 0 0 0-1-1H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2v-5a1 1 0 0 0-1-1H2" />
    <rect x={14} y={2} rx={1} />
  </svg>
);
export default SvgBlocks;


================================================
FILE: web/lib/opal/src/icons/book-open.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgBookOpen = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.99999 4.66667C7.99999 3.95942 7.71904 3.28115 7.21895 2.78105C6.71885 2.28095 6.04057 2 5.33333 2H1.33333V12H5.99999C6.53043 12 7.03914 12.2107 7.41421 12.5858C7.78928 12.9609 7.99999 13.4696 7.99999 14M7.99999 4.66667V14M7.99999 4.66667C7.99999 3.95942 8.28095 3.28115 8.78104 2.78105C9.28114 2.28095 9.95942 2 10.6667 2H14.6667V12H9.99999C9.46956 12 8.96085 12.2107 8.58578 12.5858C8.21071 12.9609 7.99999 13.4696 7.99999 14"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBookOpen;


================================================
FILE: web/lib/opal/src/icons/bookmark.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgBookmark = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12.6667 14L7.99999 10.6667L3.33333 14V3.33333C3.33333 2.97971 3.4738 2.64057 3.72385 2.39052C3.9739 2.14048 4.31304 2 4.66666 2H11.3333C11.6869 2 12.0261 2.14048 12.2761 2.39052C12.5262 2.64057 12.6667 2.97971 12.6667 3.33333V14Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBookmark;


================================================
FILE: web/lib/opal/src/icons/books-line-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgBooksLineSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8.25 5.5V10M10.75 5.5V10M5.91469 5.65333L4.75 10"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBooksLineSmall;


================================================
FILE: web/lib/opal/src/icons/books-stack-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgBooksStackSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6 10.5H10.5M5 8H9.5M6.5 5.5H11"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBooksStackSmall;


================================================
FILE: web/lib/opal/src/icons/bracket-curly.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgBracketCurly = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.25 0.750024C3.14543 0.750024 2.25 1.64545 2.25 2.75002V4.67966C2.25 5.34836 1.9158 5.97283 1.3594 6.34376L0.75 6.75002L1.3594 7.15629C1.9158 7.52722 2.25 8.15169 2.25 8.82039V10.75C2.25 11.8546 3.14543 12.75 4.25 12.75M10.25 12.75C11.3546 12.75 12.25 11.8546 12.25 10.75V8.82038C12.25 8.15167 12.5842 7.5272 13.1406 7.15627L13.75 6.75002L13.1406 6.34373C12.5842 5.9728 12.25 5.34835 12.25 4.67965V2.75C12.25 1.64543 11.3546 0.75 10.25 0.75"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgBracketCurly;


================================================
FILE: web/lib/opal/src/icons/branch.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgBranch = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.75001 5C5.71651 5 6.50001 4.2165 6.50001 3.25C6.50001 2.2835 5.7165 1.5 4.75 1.5C3.78351 1.5 3.00001 2.2835 3.00001 3.25C3.00001 4.2165 3.78351 5 4.75001 5ZM4.75001 5L4.75001 6.24999M4.75 11C3.7835 11 3 11.7835 3 12.75C3 13.7165 3.7835 14.5 4.75 14.5C5.7165 14.5 6.5 13.7165 6.5 12.75C6.5 11.7835 5.71649 11 4.75 11ZM4.75 11L4.75001 6.24999M10.5 8.74997C10.5 9.71646 11.2835 10.5 12.25 10.5C13.2165 10.5 14 9.71646 14 8.74997C14 7.78347 13.2165 7 12.25 7C11.2835 7 10.5 7.78347 10.5 8.74997ZM10.5 8.74997L7.25001 8.74999C5.8693 8.74999 4.75001 7.6307 4.75001 6.24999"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBranch;


================================================
FILE: web/lib/opal/src/icons/bubble-text.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgBubbleText = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M10.4939 6.5H5.5M8.00607 9.5H5.50607M1.5 13.5H10.5C12.7091 13.5 14.5 11.7091 14.5 9.5V6.5C14.5 4.29086 12.7091 2.5 10.5 2.5H5.5C3.29086 2.5 1.5 4.29086 1.5 6.5V13.5Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBubbleText;


================================================
FILE: web/lib/opal/src/icons/calendar.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgCalendar = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 14 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.41667 0.75V3.41667M4.08333 0.75V3.41667M0.75 6.08333H12.75M2.08333 2.08333H11.4167C12.153 2.08333 12.75 2.68029 12.75 3.41667V12.75C12.75 13.4864 12.153 14.0833 11.4167 14.0833H2.08333C1.34695 14.0833 0.75 13.4864 0.75 12.75V3.41667C0.75 2.68029 1.34695 2.08333 2.08333 2.08333Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgCalendar;


================================================
FILE: web/lib/opal/src/icons/check-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgCheckCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2879)">
      <path
        d="M14.6667 7.38668V8.00001C14.6658 9.43763 14.2003 10.8365 13.3396 11.9879C12.4788 13.1393 11.2689 13.9817 9.89023 14.3893C8.51162 14.7969 7.03817 14.7479 5.68964 14.2497C4.34112 13.7515 3.18976 12.8307 2.4073 11.6247C1.62484 10.4187 1.25319 8.99205 1.34778 7.55755C1.44237 6.12305 1.99813 4.75756 2.93218 3.66473C3.86623 2.57189 5.12852 1.81027 6.53079 1.49344C7.93306 1.17662 9.40017 1.32157 10.7133 1.90668M14.6667 2.66668L8 9.34001L6 7.34001"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2879">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgCheckCircle;


================================================
FILE: web/lib/opal/src/icons/check-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgCheckSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11 6L7 10L5 8"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgCheckSmall;


================================================
FILE: web/lib/opal/src/icons/check-square.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgCheckSquare = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_604_4473)">
      <path
        d="M6 7.33333L8 9.33333L14.6667 2.66667M14 8V12.6667C14 13.0203 13.8595 13.3594 13.6095 13.6095C13.3594 13.8595 13.0203 14 12.6667 14H3.33333C2.97971 14 2.64057 13.8595 2.39052 13.6095C2.14048 13.3594 2 13.0203 2 12.6667V3.33333C2 2.97971 2.14048 2.64057 2.39052 2.39052C2.64057 2.14048 2.97971 2 3.33333 2H10.6667"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_604_4473">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);

export default SvgCheckSquare;


================================================
FILE: web/lib/opal/src/icons/check.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgCheck = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M13.5 4.5L6 12L2.5 8.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgCheck;


================================================
FILE: web/lib/opal/src/icons/chevron-down-small.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgChevronDownSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5 6.50001L7.5286 9.0286C7.78894 9.28893 8.21107 9.28893 8.47141 9.0286L11 6.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgChevronDownSmall;


================================================
FILE: web/lib/opal/src/icons/chevron-down.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgChevronDown = ({ size, strokeWidth = 1.5, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4 6L8 10L12 6"
      strokeWidth={strokeWidth}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgChevronDown;


================================================
FILE: web/lib/opal/src/icons/chevron-left.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgChevronLeft = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M10 12L6 8L10 4"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgChevronLeft;


================================================
FILE: web/lib/opal/src/icons/chevron-right.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgChevronRight = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6 12L10 8L6 4"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgChevronRight;


================================================
FILE: web/lib/opal/src/icons/chevron-up-small.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgChevronUpSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.99999 9.50385L7.5286 6.97525C7.78893 6.71492 8.21106 6.71492 8.4714 6.97525L11 9.50385"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgChevronUpSmall;


================================================
FILE: web/lib/opal/src/icons/chevron-up.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgChevronUp = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4 10L8 6L12 10"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgChevronUp;


================================================
FILE: web/lib/opal/src/icons/circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <circle cx="8" cy="8" r="4" strokeWidth={1.5} />
  </svg>
);
export default SvgCircle;


================================================
FILE: web/lib/opal/src/icons/claude.tsx
================================================
import React from "react";
import type { IconProps } from "@opal/types";

const SvgClaude = ({ size, ...props }: IconProps) => {
  const clipId = React.useId();
  return (
    <svg
      width={size}
      height={size}
      viewBox="0 0 16 16"
      fill="none"
      xmlns="http://www.w3.org/2000/svg"
      {...props}
    >
      <g clipPath={`url(#${clipId})`}>
        <path
          d="M3.51067 10.7446L6.41508 9.11502L6.46392 8.97348L6.41508 8.89474H6.27355L5.78815 8.86484L4.12863 8.81999L2.68939 8.76019L1.29499 8.68543L0.944149 8.61068L0.615234 8.17711L0.649123 7.96083L0.944149 7.76248L1.36675 7.79936L2.30067 7.86315L3.70204 7.95983L4.71868 8.01963L6.22471 8.17611H6.46392L6.49781 8.07943L6.41608 8.01963L6.35229 7.95983L4.90208 6.97707L3.33226 5.93851L2.50998 5.34048L2.06545 5.03748L1.84119 4.75342L1.74451 4.13347L2.14817 3.68893L2.69038 3.72581L2.82892 3.76269L3.37811 4.18529L4.55124 5.09329L6.08318 6.22157L6.30744 6.40795L6.39714 6.34417L6.40811 6.29931L6.30744 6.13087L5.47419 4.62484L4.58513 3.0929L4.18943 2.458L4.08478 2.07725C4.0479 1.92077 4.02099 1.78921 4.02099 1.62874L4.48047 1.0048L4.73463 0.923065L5.34761 1.0048L5.60576 1.22906L5.9865 2.10018L6.60346 3.47165L7.5603 5.33649L7.84038 5.88966L7.98988 6.40197L8.0457 6.55846H8.14238V6.46875L8.22112 5.41822L8.36664 4.12848L8.50817 2.46896L8.55701 2.0015L8.78825 1.44135L9.24773 1.13835L9.60654 1.30979L9.90157 1.73239L9.86071 2.00549L9.68529 3.14573L9.34142 4.93183L9.11716 6.12788H9.24773L9.39724 5.97837L10.0022 5.17503L11.0189 3.90422L11.4674 3.39989L11.9907 2.84273L12.3266 2.5776H12.9615L13.4289 3.27231L13.2196 3.98994L12.5658 4.8192L12.0236 5.52188L11.2461 6.56843L10.7607 7.40566L10.8056 7.47244L10.9212 7.46148L12.6774 7.08771L13.6263 6.91628L14.7585 6.72192L15.2708 6.96113L15.3267 7.20432L15.1253 7.70168L13.9143 8.0007L12.494 8.28476L10.379 8.78511L10.3531 8.80404L10.383 8.84092L11.3358 8.93062L11.7435 8.95255H12.7412L14.5991 9.0911L15.0845 9.41204L15.3755 9.80474L15.3267 10.1038L14.5791 10.4845L13.5705 10.2453L11.2162 9.68513L10.4089 9.4838H10.2973V9.55058L10.97 10.2084L12.203 11.3217L13.7469 12.757L13.8256 13.1118L13.6273 13.3919L13.418 13.362L12.0614 12.3414L11.5382 11.8819L10.3531 10.8842H10.2743V10.9888L10.5474 11.3885L11.9897 13.5563L12.0644 14.2212L11.9598 14.4374L11.586 14.568L11.1754 14.4933L10.3312 13.3082L9.46003 11.9736L8.75735 10.7775L8.67163 10.8264L8.257 15.2926L8.06264 15.5209L7.61412 15.6923L7.24036 15.4082L7.04201 14.9488L7.24036 14.0408L7.47957 12.8557L7.67393 11.9138L7.84935 10.7436L7.954 10.3549L7.94702 10.329L7.86131 10.34L6.97922 11.551L5.63765 13.364L4.57615 14.5002L4.32199 14.6009L3.88145 14.3727L3.92231 13.965L4.1685 13.6022L5.63765 11.7334L6.52372 10.5752L7.09583 9.9064L7.09185 9.80972H7.05796L3.15584 12.3434L2.46114 12.4331L2.16213 12.153L2.199 11.6935L2.34054 11.544L3.51366 10.7367L3.50968 10.7406L3.51067 10.7446Z"
          fill="#D97757"
        />
      </g>
      <defs>
        <clipPath id={clipId}>
          <rect width="16" height="16" fill="white" />
        </clipPath>
      </defs>
    </svg>
  );
};

export default SvgClaude;


================================================
FILE: web/lib/opal/src/icons/clipboard.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgClipboard = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M10.6667 2.66665H12C12.3536 2.66665 12.6927 2.80712 12.9428 3.05717C13.1928 3.30722 13.3333 3.64636 13.3333 3.99998V13.3333C13.3333 13.6869 13.1928 14.0261 12.9428 14.2761C12.6927 14.5262 12.3536 14.6666 12 14.6666H3.99999C3.64637 14.6666 3.30723 14.5262 3.05718 14.2761C2.80713 14.0261 2.66666 13.6869 2.66666 13.3333V3.99998C2.66666 3.64636 2.80713 3.30722 3.05718 3.05717C3.30723 2.80712 3.64637 2.66665 3.99999 2.66665H5.33332M10.6667 2.66665V1.99998C10.6667 1.63179 10.3682 1.33331 9.99999 1.33331H5.99999C5.6318 1.33331 5.33332 1.63179 5.33332 1.99998V2.66665M10.6667 2.66665V3.33331C10.6667 3.7015 10.3682 3.99998 9.99999 3.99998H5.99999C5.6318 3.99998 5.33332 3.7015 5.33332 3.33331V2.66665"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgClipboard;


================================================
FILE: web/lib/opal/src/icons/clock-hands-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgClockHandsSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 4.5V8L10 10"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgClockHandsSmall;


================================================
FILE: web/lib/opal/src/icons/clock.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgClock = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2605)">
      <path
        d="M7.99999 3.99999V7.99999L10.6667 9.33333M14.6667 7.99999C14.6667 11.6819 11.6819 14.6667 7.99999 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 7.99999C1.33333 4.3181 4.3181 1.33333 7.99999 1.33333C11.6819 1.33333 14.6667 4.3181 14.6667 7.99999Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2605">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgClock;


================================================
FILE: web/lib/opal/src/icons/cloud.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgCloud = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_170_23)">
      <path
        d="M12 6.66669H11.16C10.9106 5.70069 10.3952 4.82401 9.67243 4.13628C8.94966 3.44856 8.04848 2.97735 7.07128 2.7762C6.09409 2.57506 5.08007 2.65205 4.14444 2.99842C3.20881 3.34478 2.3891 3.94664 1.77844 4.73561C1.16778 5.52457 0.790662 6.469 0.689941 7.46159C0.589219 8.45417 0.76893 9.45511 1.20865 10.3507C1.64838 11.2462 2.33048 12.0005 3.17746 12.5277C4.02443 13.055 5.00232 13.3341 6 13.3334H12C12.8841 13.3334 13.7319 12.9822 14.357 12.357C14.9821 11.7319 15.3333 10.8841 15.3333 10C15.3333 9.11597 14.9821 8.26812 14.357 7.643C13.7319 7.01788 12.8841 6.66669 12 6.66669Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_170_23">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgCloud;


================================================
FILE: web/lib/opal/src/icons/code.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgCode = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M10.6667 12L14.6667 8L10.6667 4M5.33334 4L1.33334 8L5.33334 12"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgCode;


================================================
FILE: web/lib/opal/src/icons/column.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgColumn = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6 14H3.33333C2.59695 14 2 13.403 2 12.6667V3.33333C2 2.59695 2.59695 2 3.33333 2H6M6 14V2M6 14H10M6 2H10M10 2H12.6667C13.403 2 14 2.59695 14 3.33333V12.6667C14 13.403 13.403 14 12.6667 14H10M10 2V14"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgColumn;


================================================
FILE: web/lib/opal/src/icons/copy.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgCopy = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2607)">
      <path
        d="M3.33333 9.99999H2.66666C2.31304 9.99999 1.9739 9.85952 1.72385 9.60947C1.4738 9.35942 1.33333 9.02028 1.33333 8.66666V2.66666C1.33333 2.31304 1.4738 1.9739 1.72385 1.72385C1.9739 1.4738 2.31304 1.33333 2.66666 1.33333H8.66666C9.02028 1.33333 9.35942 1.4738 9.60947 1.72385C9.85952 1.9739 9.99999 2.31304 9.99999 2.66666V3.33333M7.33333 5.99999H13.3333C14.0697 5.99999 14.6667 6.59695 14.6667 7.33333V13.3333C14.6667 14.0697 14.0697 14.6667 13.3333 14.6667H7.33333C6.59695 14.6667 5.99999 14.0697 5.99999 13.3333V7.33333C5.99999 6.59695 6.59695 5.99999 7.33333 5.99999Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2607">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgCopy;


================================================
FILE: web/lib/opal/src/icons/corner-right-up-dot.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgCornerRightUpDot = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 9 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.25002 12.75L4.25002 6.25M4.25002 12.75L7.75 9.25M4.25002 12.75L0.75 9.25M4.25002 3.75C3.42158 3.75 2.75 3.07843 2.75 2.25C2.75 1.42157 3.42158 0.75 4.25002 0.75C5.07845 0.75 5.75 1.42157 5.75 2.25C5.75 3.07843 5.07845 3.75 4.25002 3.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgCornerRightUpDot;


================================================
FILE: web/lib/opal/src/icons/cpu.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgCpu = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2615)">
      <path
        d="M6.09091 1V2.90909M9.90909 1V2.90909M6.09091 13.0909V15M9.90909 13.0909V15M13.0909 6.09091H15M13.0909 9.27273H15M1 6.09091H2.90909M1 9.27273H2.90909M4.18182 2.90909H11.8182C12.5211 2.90909 13.0909 3.47891 13.0909 4.18182V11.8182C13.0909 12.5211 12.5211 13.0909 11.8182 13.0909H4.18182C3.47891 13.0909 2.90909 12.5211 2.90909 11.8182V4.18182C2.90909 3.47891 3.47891 2.90909 4.18182 2.90909ZM6 6H10V10H6V6Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2615">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgCpu;


================================================
FILE: web/lib/opal/src/icons/credit-card.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgCreditCard = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14.6667 6V4.00008C14.6667 3.26675 14.0667 2.66675 13.3333 2.66675H2.66668C1.93334 2.66675 1.33334 3.26675 1.33334 4.00008V6M14.6667 6V12.0001C14.6667 12.7334 14.0667 13.3334 13.3333 13.3334H2.66668C1.93334 13.3334 1.33334 12.7334 1.33334 12.0001V6M14.6667 6H1.33334"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgCreditCard;


================================================
FILE: web/lib/opal/src/icons/curate.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgCurate = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 9L8 14.5M8 9C7.35971 8.35971 6.9055 8 6 8H2.5L2.5 13.5H6C6.9055 13.5 7.35971 13.8597 8 14.5M8 9C8.64029 8.35971 9.09449 8 10 8H13.5L13.5 13.5H10C9.09449 13.5 8.64029 13.8597 8 14.5M10.25 3.75C10.25 4.99264 9.24264 6 8 6C6.75736 6 5.75 4.99264 5.75 3.75C5.75 2.50736 6.75736 1.5 8 1.5C9.24264 1.5 10.25 2.50736 10.25 3.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgCurate;


================================================
FILE: web/lib/opal/src/icons/dashboard.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgDashboard = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14 6V3.33333C14 2.59695 13.403 2 12.6667 2H3.33333C2.59695 2 2 2.59695 2 3.33333V6M14 6V12.6667C14 13.403 13.403 14 12.6667 14H6M14 6H6M2 6V12.6667C2 13.403 2.59695 14 3.33333 14H6M2 6H6M6 6V14"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgDashboard;


================================================
FILE: web/lib/opal/src/icons/dev-kit.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgDevKit = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2 5H14M2 5V14H14V5M2 5C2 4.67722 2.11475 4.36495 2.32376 4.11897L4.12423 2H11.8795L13.6766 4.11869C13.8854 4.36487 14 4.67719 14 5M9.66666 11.1733L11.3333 9.50667L9.66666 7.84M6.33333 7.84L4.66666 9.50667L6.33333 11.1733"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgDevKit;


================================================
FILE: web/lib/opal/src/icons/download-cloud.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgDownloadCloud = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5.08333 10.0833L7.75 12.75M7.75 12.75L10.4167 10.0833M7.75 12.75L7.75 6.75M13.3992 10.3545C13.9521 9.9657 14.3668 9.41081 14.583 8.77036C14.7992 8.12991 14.8056 7.43724 14.6015 6.79286C14.3973 6.14848 13.9931 5.58591 13.4476 5.18681C12.902 4.78771 12.2435 4.57283 11.5676 4.57336H10.7662C10.5749 3.8279 10.217 3.13554 9.71944 2.54841C9.22186 1.96129 8.59757 1.49469 7.89357 1.18375C7.18956 0.872809 6.42419 0.725628 5.65508 0.753287C4.88596 0.780946 4.13314 0.982724 3.45329 1.34343C2.77344 1.70414 2.18428 2.21437 1.73016 2.83572C1.27604 3.45707 0.968792 4.17335 0.831551 4.93063C0.69431 5.6879 0.730651 6.46645 0.937838 7.20765C1.14502 7.94885 1.51766 8.63339 2.02769 9.20974"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgDownloadCloud;


================================================
FILE: web/lib/opal/src/icons/download.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgDownload = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14 10V12.6667C14 13.3929 13.3929 14 12.6667 14H3.33333C2.60711 14 2 13.3929 2 12.6667V10M4.66667 6.66667L8 10M8 10L11.3333 6.66667M8 10L8 2"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgDownload;


================================================
FILE: web/lib/opal/src/icons/edit-big.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgEditBig = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 2.5H4C3.17157 2.5 2.5 3.17157 2.5 4V12C2.5 12.8284 3.17157 13.5 4 13.5H12C12.8284 13.5 13.5 12.8284 13.5 12V8M6 10V8.26485C6 8.08682 6.0707 7.91617 6.19654 7.79028L11.5938 2.3931C12.1179 1.86897 12.9677 1.86897 13.4918 2.3931L13.6069 2.50823C14.131 3.03236 14.131 3.88213 13.6069 4.40626L8.20971 9.80345C8.08389 9.92934 7.91317 10 7.73521 10H6Z"
      strokeWidth={1.5}
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgEditBig;


================================================
FILE: web/lib/opal/src/icons/edit.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgEdit = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 13.3333H14M11 2.33334C11.2652 2.06813 11.6249 1.91913 12 1.91913C12.1857 1.91913 12.3696 1.95571 12.5412 2.02678C12.7128 2.09785 12.8687 2.20202 13 2.33334C13.1313 2.46466 13.2355 2.62057 13.3066 2.79215C13.3776 2.96373 13.4142 3.14762 13.4142 3.33334C13.4142 3.51906 13.3776 3.70296 13.3066 3.87454C13.2355 4.04612 13.1313 4.20202 13 4.33334L4.66667 12.6667L2 13.3333L2.66667 10.6667L11 2.33334Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgEdit;


================================================
FILE: web/lib/opal/src/icons/empty.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgEmpty = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14 10V12.6667C14 13.3929 13.3929 14 12.6667 14H3.33333C2.60711 14 2 13.3929 2 12.6667V10M8 2V5M13.5 4.5L11.5 6.5M2.5 4.5L4.5 6.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgEmpty;


================================================
FILE: web/lib/opal/src/icons/expand.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgExpand = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.99994 5.49995L7.52858 2.97131C7.78891 2.71098 8.21105 2.71098 8.47138 2.97131L11 5.49995M5.00024 10.5L7.5288 13.0286C7.78914 13.2889 8.21127 13.2889 8.4716 13.0286L11.0002 10.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgExpand;


================================================
FILE: web/lib/opal/src/icons/external-link.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgExternalLink = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 24 24"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M15 3h6v6"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M10 14L21 3"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgExternalLink;


================================================
FILE: web/lib/opal/src/icons/eye-closed.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgEyeClosed = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 10"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1 1.5C1 1.5 1.69706 2.89413 3 4.22328M15 1.5C15 1.5 14.3029 2.89413 13 4.22328M3 4.22328C3.78612 5.02522 4.7928 5.80351 6 6.23767M3 4.22328L1 6.22328M6 6.23767C6.61544 6.45901 7.28299 6.59091 8 6.59091C8.71701 6.59091 9.38456 6.45901 10 6.23767M6 6.23767L5 8.99908M10 6.23767C11.2072 5.80351 12.2139 5.02522 13 4.22328M10 6.23767L11 8.99908M13 4.22328L15 6.22328"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgEyeClosed;


================================================
FILE: web/lib/opal/src/icons/eye-off.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgEyeOff = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11.78 11.78C10.6922 12.6092 9.36761 13.0685 8 13.0909C3.54545 13.0909 1 8 1 8C1.79157 6.52484 2.88945 5.23602 4.22 4.22M11.78 11.78L9.34909 9.34909M11.78 11.78L15 15M4.22 4.22L1 1M4.22 4.22L6.65091 6.65091M6.66364 3.06182C7.10167 2.95929 7.55013 2.90803 8 2.90909C12.4545 2.90909 15 8 15 8C14.6137 8.72266 14.153 9.40301 13.6255 10.03M9.34909 9.34909L6.65091 6.65091M9.34909 9.34909C8.99954 9.72422 8.49873 9.94737 7.98606 9.95641C6.922 9.97519 6.02481 9.078 6.04358 8.01394C6.05263 7.50127 6.27578 7.00046 6.65091 6.65091"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgEyeOff;


================================================
FILE: web/lib/opal/src/icons/eye.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgEye = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 12"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1 6.00088C1 6.00088 3.54545 0.909973 8 0.909973C12.4545 0.909973 15 6.00088 15 6.00088C15 6.00088 12.4545 11.0918 8 11.0918C3.54545 11.0918 1 6.00088 1 6.00088Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M8 7.90997C9.05436 7.90997 9.90909 7.05524 9.90909 6.00088C9.90909 4.94652 9.05436 4.09179 8 4.09179C6.94564 4.09179 6.09091 4.94652 6.09091 6.00088C6.09091 7.05524 6.94564 7.90997 8 7.90997Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgEye;


================================================
FILE: web/lib/opal/src/icons/file-braces.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFileBraces = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 24 24"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6 22a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h8a2.4 2.4 0 0 1 1.704.706l3.588 3.588A2.4 2.4 0 0 1 20 8v12a2 2 0 0 1-2 2z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M14 2v5a1 1 0 0 0 1 1h5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M10 12a1 1 0 0 0-1 1v1a1 1 0 0 1-1 1 1 1 0 0 1 1 1v1a1 1 0 0 0 1 1"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M14 18a1 1 0 0 0 1-1v-1a1 1 0 0 1 1-1 1 1 0 0 1-1-1v-1a1 1 0 0 0-1-1"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFileBraces;


================================================
FILE: web/lib/opal/src/icons/file-broadcast.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFileBroadcast = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 18 18"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6.1875 2.25003H2.625C1.808 2.25003 1.125 2.93303 1.125 3.75003L1.125 14.25C1.125 15.067 1.808 15.75 2.625 15.75L9.37125 15.75C10.1883 15.75 10.8713 15.067 10.8713 14.25L10.8713 6.94128M6.1875 2.25003L10.8713 6.94128M6.1875 2.25003V6.94128H10.8713M10.3069 2.25L13.216 5.15914C13.6379 5.5811 13.875 6.15339 13.875 6.75013V13.875C13.875 14.5212 13.737 15.2081 13.4392 15.7538M16.4391 15.7538C16.737 15.2081 16.875 14.5213 16.875 13.8751L16.875 7.02481C16.875 5.53418 16.2833 4.10451 15.23 3.04982L14.4301 2.25003"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFileBroadcast;


================================================
FILE: web/lib/opal/src/icons/file-chart-pie.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFileChartPie = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 24 24"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M15.941 22H18a2 2 0 0 0 2-2V8a2.4 2.4 0 0 0-.706-1.704l-3.588-3.588A2.4 2.4 0 0 0 14 2H6a2 2 0 0 0-2 2v3.512"
      strokeWidth={2}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M14 2v5a1 1 0 0 0 1 1h5"
      strokeWidth={2}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M4.017 11.512a6 6 0 1 0 8.466 8.475"
      strokeWidth={2}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M9 16a1 1 0 0 1-1-1v-4c0-.552.45-1.008.995-.917a6 6 0 0 1 4.922 4.922c.091.544-.365.995-.917.995z"
      strokeWidth={2}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFileChartPie;


================================================
FILE: web/lib/opal/src/icons/file-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgFileSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8.75 4.75H5.75001C5.47386 4.75 5.25001 4.97386 5.25001 5.25V10.75C5.25001 11.0261 5.47386 11.25 5.75001 11.25H10.25C10.5261 11.25 10.75 11.0261 10.75 10.75V6.75M8.75 4.75L10.75 6.75M8.75 4.75V6.75H10.75"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFileSmall;


================================================
FILE: web/lib/opal/src/icons/file-text.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFileText = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 20"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.66634 1.6665H2.99967C2.55765 1.6665 2.13372 1.8421 1.82116 2.15466C1.5086 2.46722 1.33301 2.89114 1.33301 3.33317V16.6665C1.33301 17.1085 1.5086 17.5325 1.82116 17.845C2.13372 18.1576 2.55765 18.3332 2.99967 18.3332H12.9997C13.4417 18.3332 13.8656 18.1576 14.1782 17.845C14.4907 17.5325 14.6663 17.1085 14.6663 16.6665V6.6665M9.66634 1.6665L14.6663 6.6665M9.66634 1.6665L9.66634 6.6665L14.6663 6.6665M11.333 10.8332H4.66634M11.333 14.1665H4.66634M6.33301 7.49984H4.66634"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFileText;


================================================
FILE: web/lib/opal/src/icons/files.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFiles = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    xmlns="http://www.w3.org/2000/svg"
    viewBox="0 0 16 16"
    fill="none"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5.5 1.9999H2.33334C1.97971 1.9999 1.64058 2.14038 1.39053 2.39043C1.14048 2.64048 1 2.97961 1 3.33324L1 12.6666C1 13.0202 1.14048 13.3593 1.39052 13.6094C1.64057 13.8594 1.97971 13.9999 2.33333 13.9999L8.33 13.9999C8.68362 13.9999 9.02276 13.8594 9.27281 13.6094C9.52286 13.3593 9.66333 13.0202 9.66333 12.6666L9.66334 6.1699M5.5 1.9999L9.66334 6.1699M5.5 1.9999V6.1699H9.66334M9.16167 1.99988L11.7475 4.58578C12.1226 4.96085 12.3333 5.46956 12.3333 5.99999V12.3332C12.3333 12.9076 12.2107 13.5182 11.9459 14.0032M14.6126 14.0033C14.8773 13.5182 15 12.9077 15 12.3333L15 6.24415C15 4.91915 14.4741 3.64833 13.5377 2.71083L12.8268 1.99991"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFiles;


================================================
FILE: web/lib/opal/src/icons/filter-plus.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFilterPlus = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.5 12.5L6.83334 11.1667V7.80667L1.5 1.5H14.8333L12.1667 4.65333M12.1667 7V9.5M12.1667 9.5V12M12.1667 9.5H9.66667M12.1667 9.5H14.6667"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFilterPlus;


================================================
FILE: web/lib/opal/src/icons/filter.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFilter = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14.6667 3H1.33334L6.66668 9.30667V12.6667L9.33334 14V9.30667L14.6667 3Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFilter;


================================================
FILE: web/lib/opal/src/icons/fold.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFold = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11 3.25L8.47136 5.77857C8.21103 6.0389 7.78889 6.0389 7.52856 5.77857L4.99999 3.25M11 12.75L8.47136 10.2214C8.21103 9.96103 7.78889 9.96103 7.52856 10.2214L4.99999 12.75"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgFold;


================================================
FILE: web/lib/opal/src/icons/folder-in.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFolderIn = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5 2.5L3 2.50001C2.17157 2.50001 1.5 3.17158 1.5 4.00001V12C1.5 12.8284 2.17157 13.5 3 13.5H13C13.8284 13.5 14.5 12.8284 14.5 12V6.00001C14.5 5.17158 13.8284 4.50001 13 4.50001L11 4.5M11 7.5L8.47141 10.0286C8.34124 10.1588 8.17062 10.2239 8.00001 10.2239M5.00001 7.5L7.52861 10.0286C7.65877 10.1588 7.82939 10.2239 8.00001 10.2239M7.99999 1.5L8.00001 10.2239"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFolderIn;


================================================
FILE: web/lib/opal/src/icons/folder-open.tsx
================================================
import React from "react";
import type { IconProps } from "@opal/types";

const SvgFolderOpen = React.forwardRef<SVGSVGElement, IconProps>(
  ({ size = 32, color = "currentColor", title, className, ...props }, ref) => (
    <svg
      ref={ref}
      xmlns="http://www.w3.org/2000/svg"
      viewBox="0 0 32 26"
      width={size}
      height={size}
      fill="none"
      role={title ? "img" : "presentation"}
      aria-label={title}
      className={className}
      stroke="currentColor"
      {...props}
    >
      {title ? <title>{title}</title> : null}
      <path
        d="M30.4177 15.4931L29.1847 15.2876L30.4177 15.4931ZM29.4177 21.4932L30.6507 21.6987V21.6987L29.4177 21.4932ZM2.58209 21.4932L1.3491 21.6987L2.58209 21.4932ZM1.58209 15.4931L0.349095 15.6986L1.58209 15.4931ZM13.8786 2.87868L12.9947 3.76256V3.76256L13.8786 2.87868ZM16.1212 5.12132L17.0051 4.23744V4.23744L16.1212 5.12132ZM4.54127 11.9999V13.2499H27.4585V11.9999V10.7499H4.54127V11.9999ZM30.4177 15.4931L29.1847 15.2876L28.1847 21.2877L29.4177 21.4932L30.6507 21.6987L31.6507 15.6986L30.4177 15.4931ZM26.4585 24V22.75H5.54128V24V25.25H26.4585V24ZM2.58209 21.4932L3.81509 21.2877L2.81508 15.2876L1.58209 15.4931L0.349095 15.6986L1.3491 21.6987L2.58209 21.4932ZM5.54128 24V22.75C4.68581 22.75 3.95572 22.1315 3.81509 21.2877L2.58209 21.4932L1.3491 21.6987C1.69065 23.748 3.46371 25.25 5.54128 25.25V24ZM29.4177 21.4932L28.1847 21.2877C28.0441 22.1315 27.314 22.75 26.4585 22.75V24V25.25C28.5361 25.25 30.3091 23.748 30.6507 21.6987L29.4177 21.4932ZM18.2425 6V7.25H25.9999V6V4.75H18.2425V6ZM5.9999 2V3.25H11.7573V2V0.75H5.9999V2ZM13.8786 2.87868L12.9947 3.76256L15.2373 6.0052L16.1212 5.12132L17.0051 4.23744L14.7625 1.9948L13.8786 2.87868ZM11.7573 2V3.25C12.2214 3.25 12.6665 3.43437 12.9947 3.76256L13.8786 2.87868L14.7625 1.9948C13.9654 1.19777 12.8844 0.75 11.7573 0.75V2ZM18.2425 6V4.75C17.7784 4.75 17.3333 4.56563 17.0051 4.23744L16.1212 5.12132L15.2373 6.0052C16.0344 6.80223 17.1154 7.25 18.2425 7.25V6ZM28.9999 9H30.2499C30.2499 6.65279 28.3471 4.75 25.9999 4.75V6V7.25C26.9664 7.25 27.7499 8.0335 27.7499 9H28.9999ZM2.99989 5H4.24989C4.24989 4.0335 5.0334 3.25 5.9999 3.25V2V0.75C3.65269 0.75 1.74989 2.65279 1.74989 5H2.99989ZM28.9999 9H27.7499V12.4249H28.9999H30.2499V9H28.9999ZM27.4585 11.9999V13.2499C27.7932 13.2499 28.0975 13.3411 28.3564 13.4965L28.9999 12.4249L29.6434 11.3533C29.0065 10.9708 28.2589 10.7499 27.4585 10.7499V11.9999ZM28.9999 12.4249L28.3564 13.4965C28.9538 13.8553 29.3076 14.5505 29.1847 15.2876L30.4177 15.4931L31.6507 15.6986C31.9508 13.8982 31.0763 12.2138 29.6434 11.3533L28.9999 12.4249ZM2.99989 12.4249H4.24989V5H2.99989H1.74989V12.4249H2.99989ZM4.54127 11.9999V10.7499C3.74089 10.7499 2.99329 10.9708 2.35636 11.3533L2.99989 12.4249L3.64343 13.4965C3.90228 13.3411 4.20658 13.2499 4.54127 13.2499V11.9999ZM2.99989 12.4249L2.35636 11.3533C0.923529 12.2138 0.0490297 13.8982 0.349095 15.6986L1.58209 15.4931L2.81508 15.2876C2.69222 14.5505 3.04602 13.8553 3.64343 13.4965L2.99989 12.4249Z"
        fill={color}
        fillOpacity={0.8}
        stroke={color}
        strokeOpacity={0.8}
        strokeWidth={0.2}
      />
    </svg>
  )
);

SvgFolderOpen.displayName = "SvgFolderOpen";
export default SvgFolderOpen;


================================================
FILE: web/lib/opal/src/icons/folder-partial-open.tsx
================================================
import React from "react";
import type { IconProps } from "@opal/types";

const SvgFolderPartialOpen = React.forwardRef<SVGSVGElement, IconProps>(
  ({ size = 32, color = "currentColor", title, className, ...props }, ref) => (
    <svg
      ref={ref}
      xmlns="http://www.w3.org/2000/svg"
      viewBox="0 0 16 13"
      width={size}
      height={size}
      fill="none"
      role={title ? "img" : "presentation"}
      aria-label={title}
      className={className}
      stroke="currentColor"
      {...props}
    >
      {title ? <title>{title}</title> : null}
      <path
        d="M14.1431 4.98782V4.25C14.1431 3.42157 13.4715 2.75 12.6431 2.75H8.76442C8.36659 2.75 7.98506 2.59196 7.70376 2.31066L6.58244 1.18934C6.30113 0.908035 5.9196 0.75 5.52178 0.75H2.6431C1.81467 0.75 1.1431 1.42157 1.1431 2.25V4.9878"
        stroke={color}
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M14.2394 10.3532C14.1852 11.1397 13.5313 11.75 12.743 11.75H2.54321C1.75483 11.75 1.101 11.1397 1.04676 10.3532L0.753657 6.1032C0.693864 5.23621 1.38105 4.5 2.2501 4.5H13.0361C13.9051 4.5 14.5923 5.2362 14.5325 6.1032L14.2394 10.3532Z"
        stroke={color}
        strokeWidth={1.5}
        strokeLinecap="round"
      />
    </svg>
  )
);

SvgFolderPartialOpen.displayName = "SvgFolderPartialOpen";
export default SvgFolderPartialOpen;


================================================
FILE: web/lib/opal/src/icons/folder-plus.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFolderPlus = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.99999 7.33333V11.3333M5.99999 9.33333H10M14.6667 12.6667C14.6667 13.0203 14.5262 13.3594 14.2761 13.6095C14.0261 13.8595 13.6869 14 13.3333 14H2.66666C2.31304 14 1.9739 13.8595 1.72385 13.6095C1.4738 13.3594 1.33333 13.0203 1.33333 12.6667V3.33333C1.33333 2.97971 1.4738 2.64057 1.72385 2.39052C1.9739 2.14048 2.31304 2 2.66666 2H5.99999L7.33333 4H13.3333C13.6869 4 14.0261 4.14048 14.2761 4.39052C14.5262 4.64057 14.6667 4.97971 14.6667 5.33333V12.6667Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFolderPlus;


================================================
FILE: web/lib/opal/src/icons/folder.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgFolder = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14.5 12V6C14.5 5.17157 13.8284 4.5 13 4.5H9.12132C8.7235 4.5 8.34196 4.34196 8.06066 4.06066L6.93934 2.93934C6.65804 2.65804 6.2765 2.5 5.87868 2.5H3C2.17157 2.5 1.5 3.17157 1.5 4V12C1.5 12.8284 2.17157 13.5 3 13.5H13C13.8284 13.5 14.5 12.8284 14.5 12Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgFolder;


================================================
FILE: web/lib/opal/src/icons/gemini.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgGemini = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 52 52"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M26 2C26.5034 2 26.9412 2.34378 27.064 2.83212C27.4405 4.3258 27.9315 5.78274 28.5426 7.20002C30.1345 10.8981 32.3187 14.1349 35.092 16.9081C37.8664 19.6813 41.102 21.8655 44.8 23.4574C46.2186 24.0685 47.6743 24.5595 49.1679 24.936C49.6562 25.0588 49.9999 25.4967 50 26C50 26.5034 49.6563 26.9413 49.1679 27.064C47.6743 27.4405 46.2172 27.9315 44.8 28.5426C41.1019 30.1345 37.8651 32.3187 35.092 35.092C32.3187 37.8665 30.1345 41.1019 28.5426 44.8C27.9315 46.2186 27.4405 47.6743 27.064 49.1679C26.9413 49.6563 26.5034 50 26 50C25.4967 49.9999 25.0588 49.6562 24.936 49.1679C24.5595 47.6743 24.0685 46.2172 23.4574 44.8C21.8655 41.102 19.6826 37.8651 16.9081 35.092C14.1335 32.3187 10.8981 30.1345 7.20002 28.5426C5.78137 27.9315 4.3258 27.4405 2.83212 27.064C2.34378 26.9412 2 26.5034 2 26C2.00006 25.4967 2.34381 25.0588 2.83212 24.936C4.32581 24.5595 5.78273 24.0686 7.20002 23.4574C10.8981 21.8655 14.1349 19.6813 16.9081 16.9081C19.6813 14.1349 21.8655 10.8981 23.4574 7.20002C24.0686 5.78137 24.5595 4.32581 24.936 2.83212C25.0588 2.34381 25.4967 2.00006 26 2Z"
      fill="url(#paint0_linear_9_973)"
    />
    <defs>
      <linearGradient
        id="paint0_linear_9_973"
        x1={15.6448}
        y1={34.1163}
        x2={40.5754}
        y2={13.0975}
        gradientUnits="userSpaceOnUse"
      >
        <stop stopColor="#4893FC" />
        <stop offset={0.27} stopColor="#4893FC" />
        <stop offset={0.776981} stopColor="#969DFF" />
        <stop offset={1} stopColor="#BD99FE" />
      </linearGradient>
    </defs>
  </svg>
);
export default SvgGemini;


================================================
FILE: web/lib/opal/src/icons/globe.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgGlobe = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2601)">
      <path
        d="M14.6667 7.99999C14.6667 11.6819 11.6819 14.6667 8.00001 14.6667M14.6667 7.99999C14.6667 4.3181 11.6819 1.33333 8.00001 1.33333M14.6667 7.99999H1.33334M8.00001 14.6667C4.31811 14.6667 1.33334 11.6819 1.33334 7.99999M8.00001 14.6667C9.66753 12.8411 10.6152 10.472 10.6667 7.99999C10.6152 5.52802 9.66753 3.1589 8.00001 1.33333M8.00001 14.6667C6.33249 12.8411 5.38484 10.472 5.33334 7.99999C5.38484 5.52802 6.33249 3.1589 8.00001 1.33333M1.33334 7.99999C1.33334 4.3181 4.31811 1.33333 8.00001 1.33333"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2601">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgGlobe;


================================================
FILE: web/lib/opal/src/icons/handle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgHandle = ({ size = 16, ...props }: IconProps) => (
  <svg
    width={Math.round((size * 3) / 17)}
    height={size}
    viewBox="0 0 3 17"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M0.5 0.5V16.5M2.5 0.5V16.5"
      stroke="currentColor"
      strokeLinecap="round"
    />
  </svg>
);

export default SvgHandle;


================================================
FILE: web/lib/opal/src/icons/hard-drive.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgHardDrive = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14.6667 8.00002H1.33334M14.6667 8.00002V12C14.6667 12.3536 14.5262 12.6928 14.2762 12.9428C14.0261 13.1929 13.687 13.3334 13.3333 13.3334H2.66668C2.31305 13.3334 1.97392 13.1929 1.72387 12.9428C1.47382 12.6928 1.33334 12.3536 1.33334 12V8.00002M14.6667 8.00002L12.3667 3.40669C12.2563 3.18455 12.0861 2.9976 11.8753 2.86687C11.6645 2.73615 11.4214 2.66682 11.1733 2.66669H4.82668C4.57862 2.66682 4.33552 2.73615 4.12471 2.86687C3.91389 2.9976 3.74373 3.18455 3.63334 3.40669L1.33334 8.00002M4.00001 10.6667H4.00668M6.66668 10.6667H6.67334"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgHardDrive;


================================================
FILE: web/lib/opal/src/icons/hash-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgHashSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5 6.5H6.5M11 6.5H9.5M5 9.5H6.5M11 9.5H9.5M6.5 5V6.5M6.5 11V9.5M9.5 5V6.5M9.5 11V9.5M6.5 9.5H9.5M6.5 9.5V6.5M9.5 9.5V6.5M9.5 6.5H6.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgHashSmall;


================================================
FILE: web/lib/opal/src/icons/hash.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgHash = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2.66667 6H13.3333M2.66667 10H13.3333M6.66667 2L5.33334 14M10.6667 2L9.33334 14"
      stroke="currentColor"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgHash;


================================================
FILE: web/lib/opal/src/icons/headset-mic.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgHeadsetMic = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2.5 7.75002L2.5 7.25C2.5 4.21243 4.96243 1.75 8 1.75C11.0376 1.75 13.5 4.21243 13.5 7.25V7.75M2.5 7.75002L4 7.75C4.55228 7.75 5 8.19772 5 8.75V10.25C5 10.8023 4.55228 11.25 4 11.25H3.5C2.94772 11.25 2.5 10.8023 2.5 10.25V7.75002ZM13.5 7.75H12C11.4477 7.75 11 8.19772 11 8.75V10.25C11 10.8023 11.4477 11.25 12 11.25H12.5C13.0523 11.25 13.5 10.8023 13.5 10.25M13.5 7.75V10.25M13.5 10.25V11.25C13.5 12.9069 12.1569 14.25 10.5 14.25L8 14.25"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgHeadsetMic;


================================================
FILE: web/lib/opal/src/icons/history.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgHistory = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.99998 4.00001V8.00001L11 9.50003M1.33332 1.40151V5.23535M1.33332 5.23535H4.99998M1.33332 5.23535L3.28593 3.28597C4.49236 2.07954 6.15903 1.33334 7.99998 1.33334C11.6819 1.33334 14.6667 4.31811 14.6667 8.00001C14.6667 11.6819 11.6819 14.6667 7.99998 14.6667C4.83386 14.6667 2.18324 12.4596 1.50274 9.50003"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgHistory;


================================================
FILE: web/lib/opal/src/icons/hourglass.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgHourglass = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 7.99999L4.44793 5.72667C4.06499 5.48159 3.83333 5.05828 3.83333 4.60364V1.83333H12.1667V4.60364C12.1667 5.05828 11.935 5.48159 11.5521 5.72667L8 7.99999ZM8 7.99999L11.5521 10.2733C11.935 10.5184 12.1667 10.9417 12.1667 11.3963V14.1667H3.83333V11.3963C3.83333 10.9417 4.06499 10.5184 4.44793 10.2733L8 7.99999ZM13.5 14.1667H2.5M13.5 1.83333H2.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgHourglass;


================================================
FILE: web/lib/opal/src/icons/image-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgImageSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.5 11.5L6.20711 8.20711C5.81658 7.81658 5.18342 7.81658 4.79289 8.20711L4 9M9.75 7.5C10.4404 7.5 11 6.94037 11 6.25C11 5.55964 10.4404 5 9.75 5C9.05963 5 8.5 5.55964 8.5 6.25C8.5 6.94037 9.05963 7.5 9.75 7.5Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgImageSmall;


================================================
FILE: web/lib/opal/src/icons/image.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgImage = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11 14L6.06066 9.06072C5.47487 8.47498 4.52513 8.47498 3.93934 9.06072L2 11M2 3.49998C2 2.67156 2.67157 2 3.5 2H12.5C13.3285 2 14 2.67156 14 3.49998V12.4999C14 13.3283 13.3285 13.9998 12.5 13.9998H3.5C2.67157 13.9998 2 13.3283 2 12.4999V3.49998ZM9.875 7.62492C10.7034 7.62492 11.375 6.95338 11.375 6.12494C11.375 5.29653 10.7034 4.62496 9.875 4.62496C9.04655 4.62496 8.375 5.29653 8.375 6.12494C8.375 6.95338 9.04655 7.62492 9.875 7.62492Z"
      strokeWidth={1.5}
      strokeLinecap="round"
    />
  </svg>
);
export default SvgImage;


================================================
FILE: web/lib/opal/src/icons/import-icon.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgImport = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 14 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6.75 9.41667L9.41667 6.75M9.41667 6.75L6.75 4.08333M9.41667 6.75L0.75 6.74667M2.75 3.75V2.08C2.75 1.34546 3.34546 0.75 4.08 0.75H11.4167C11.7703 0.75 12.1094 0.890476 12.3595 1.14052C12.6095 1.39057 12.75 1.72971 12.75 2.08333V11.4167C12.75 11.7703 12.6095 12.1094 12.3595 12.3595C12.1094 12.6095 11.7703 12.75 11.4167 12.75H4.08C3.34546 12.75 2.75 12.1545 2.75 11.42V9.75"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgImport;


================================================
FILE: web/lib/opal/src/icons/index.ts
================================================
export { default as SvgActions } from "@opal/icons/actions";
export { default as SvgActivity } from "@opal/icons/activity";
export { default as SvgActivitySmall } from "@opal/icons/activity-small";
export { default as SvgAddLines } from "@opal/icons/add-lines";
export { default as SvgAlertCircle } from "@opal/icons/alert-circle";
export { default as SvgAlertTriangle } from "@opal/icons/alert-triangle";
export { default as SvgArrowDownDot } from "@opal/icons/arrow-down-dot";
export { default as SvgArrowExchange } from "@opal/icons/arrow-exchange";
export { default as SvgArrowLeft } from "@opal/icons/arrow-left";
export { default as SvgArrowLeftDot } from "@opal/icons/arrow-left-dot";
export { default as SvgArrowRight } from "@opal/icons/arrow-right";
export { default as SvgArrowRightCircle } from "@opal/icons/arrow-right-circle";
export { default as SvgArrowRightDot } from "@opal/icons/arrow-right-dot";
export { default as SvgArrowUpCircle } from "@opal/icons/arrow-up-circle";
export { default as SvgArrowUp } from "@opal/icons/arrow-up";
export { default as SvgArrowUpDown } from "@opal/icons/arrow-up-down";
export { default as SvgArrowUpDot } from "@opal/icons/arrow-up-dot";
export { default as SvgArrowUpRight } from "@opal/icons/arrow-up-right";
export { default as SvgArrowWallRight } from "@opal/icons/arrow-wall-right";
export { default as SvgAudio } from "@opal/icons/audio";
export { default as SvgAudioEqSmall } from "@opal/icons/audio-eq-small";
export { default as SvgAws } from "@opal/icons/aws";
export { default as SvgAzure } from "@opal/icons/azure";
export { default as SvgBarChart } from "@opal/icons/bar-chart";
export { default as SvgBarChartSmall } from "@opal/icons/bar-chart-small";
export { default as SvgBell } from "@opal/icons/bell";
export { default as SvgBifrost } from "@opal/icons/bifrost";
export { default as SvgBlocks } from "@opal/icons/blocks";
export { default as SvgBookOpen } from "@opal/icons/book-open";
export { default as SvgBookmark } from "@opal/icons/bookmark";
export { default as SvgBooksLineSmall } from "@opal/icons/books-line-small";
export { default as SvgBooksStackSmall } from "@opal/icons/books-stack-small";
export { default as SvgBracketCurly } from "@opal/icons/bracket-curly";
export { default as SvgBranch } from "@opal/icons/branch";
export { default as SvgBubbleText } from "@opal/icons/bubble-text";
export { default as SvgCalendar } from "@opal/icons/calendar";
export { default as SvgCheck } from "@opal/icons/check";
export { default as SvgCheckCircle } from "@opal/icons/check-circle";
export { default as SvgCheckSmall } from "@opal/icons/check-small";
export { default as SvgCheckSquare } from "@opal/icons/check-square";
export { default as SvgChevronDown } from "@opal/icons/chevron-down";
export { default as SvgChevronDownSmall } from "@opal/icons/chevron-down-small";
export { default as SvgChevronLeft } from "@opal/icons/chevron-left";
export { default as SvgChevronRight } from "@opal/icons/chevron-right";
export { default as SvgChevronUp } from "@opal/icons/chevron-up";
export { default as SvgChevronUpSmall } from "@opal/icons/chevron-up-small";
export { default as SvgCircle } from "@opal/icons/circle";
export { default as SvgClaude } from "@opal/icons/claude";
export { default as SvgClipboard } from "@opal/icons/clipboard";
export { default as SvgClock } from "@opal/icons/clock";
export { default as SvgClockHandsSmall } from "@opal/icons/clock-hands-small";
export { default as SvgCloud } from "@opal/icons/cloud";
export { default as SvgCode } from "@opal/icons/code";
export { default as SvgColumn } from "@opal/icons/column";
export { default as SvgCopy } from "@opal/icons/copy";
export { default as SvgCornerRightUpDot } from "@opal/icons/corner-right-up-dot";
export { default as SvgCpu } from "@opal/icons/cpu";
export { default as SvgCurate } from "@opal/icons/curate";
export { default as SvgCreditCard } from "@opal/icons/credit-card";
export { default as SvgDashboard } from "@opal/icons/dashboard";
export { default as SvgDevKit } from "@opal/icons/dev-kit";
export { default as SvgDownload } from "@opal/icons/download";
export { default as SvgDiscordMono } from "@opal/icons/DiscordMono";
export { default as SvgDownloadCloud } from "@opal/icons/download-cloud";
export { default as SvgEdit } from "@opal/icons/edit";
export { default as SvgEditBig } from "@opal/icons/edit-big";
export { default as SvgEmpty } from "@opal/icons/empty";
export { default as SvgExpand } from "@opal/icons/expand";
export { default as SvgExternalLink } from "@opal/icons/external-link";
export { default as SvgEye } from "@opal/icons/eye";
export { default as SvgEyeClosed } from "@opal/icons/eye-closed";
export { default as SvgEyeOff } from "@opal/icons/eye-off";
export { default as SvgFileBraces } from "@opal/icons/file-braces";
export { default as SvgFileBroadcast } from "@opal/icons/file-broadcast";
export { default as SvgFiles } from "@opal/icons/files";
export { default as SvgFileChartPie } from "@opal/icons/file-chart-pie";
export { default as SvgFileSmall } from "@opal/icons/file-small";
export { default as SvgFileText } from "@opal/icons/file-text";
export { default as SvgFilter } from "@opal/icons/filter";
export { default as SvgFilterPlus } from "@opal/icons/filter-plus";
export { default as SvgFold } from "@opal/icons/fold";
export { default as SvgFolder } from "@opal/icons/folder";
export { default as SvgFolderIn } from "@opal/icons/folder-in";
export { default as SvgFolderOpen } from "@opal/icons/folder-open";
export { default as SvgFolderPartialOpen } from "@opal/icons/folder-partial-open";
export { default as SvgFolderPlus } from "@opal/icons/folder-plus";
export { default as SvgGemini } from "@opal/icons/gemini";
export { default as SvgGlobe } from "@opal/icons/globe";
export { default as SvgHandle } from "@opal/icons/handle";
export { default as SvgHardDrive } from "@opal/icons/hard-drive";
export { default as SvgHashSmall } from "@opal/icons/hash-small";
export { default as SvgHash } from "@opal/icons/hash";
export { default as SvgHeadsetMic } from "@opal/icons/headset-mic";
export { default as SvgHistory } from "@opal/icons/history";
export { default as SvgShareWebhook } from "@opal/icons/share-webhook";
export { default as SvgHourglass } from "@opal/icons/hourglass";
export { default as SvgImage } from "@opal/icons/image";
export { default as SvgImageSmall } from "@opal/icons/image-small";
export { default as SvgImport } from "@opal/icons/import-icon";
export { default as SvgInfo } from "@opal/icons/info";
export { default as SvgInfoSmall } from "@opal/icons/info-small";
export { default as SvgKey } from "@opal/icons/key";
export { default as SvgKeystroke } from "@opal/icons/keystroke";
export { default as SvgLightbulbSimple } from "@opal/icons/lightbulb-simple";
export { default as SvgLineChartUp } from "@opal/icons/line-chart-up";
export { default as SvgLink } from "@opal/icons/link";
export { default as SvgLinkedDots } from "@opal/icons/linked-dots";
export { default as SvgLitellm } from "@opal/icons/litellm";
export { default as SvgLmStudio } from "@opal/icons/lm-studio";
export { default as SvgLoader } from "@opal/icons/loader";
export { default as SvgLock } from "@opal/icons/lock";
export { default as SvgLogOut } from "@opal/icons/log-out";
export { default as SvgMaximize2 } from "@opal/icons/maximize-2";
export { default as SvgMcp } from "@opal/icons/mcp";
export { default as SvgMenu } from "@opal/icons/menu";
export { default as SvgMicrophone } from "@opal/icons/microphone";
export { default as SvgMicrophoneOff } from "@opal/icons/microphone-off";
export { default as SvgMinus } from "@opal/icons/minus";
export { default as SvgMinusCircle } from "@opal/icons/minus-circle";
export { default as SvgMoon } from "@opal/icons/moon";
export { default as SvgMoreHorizontal } from "@opal/icons/more-horizontal";
export { default as SvgMusicSmall } from "@opal/icons/music-small";
export { default as SvgNetworkGraph } from "@opal/icons/network-graph";
export { default as SvgNotificationBubble } from "@opal/icons/notification-bubble";
export { default as SvgOllama } from "@opal/icons/ollama";
export { default as SvgOnyxLogo } from "@opal/icons/onyx-logo";
export { default as SvgOnyxLogoTyped } from "@opal/icons/onyx-logo-typed";
export { default as SvgOnyxOctagon } from "@opal/icons/onyx-octagon";
export { default as SvgOnyxTyped } from "@opal/icons/onyx-typed";
export { default as SvgOpenai } from "@opal/icons/openai";
export { default as SvgOpenrouter } from "@opal/icons/openrouter";
export { default as SvgOrganization } from "@opal/icons/organization";
export { default as SvgPaintBrush } from "@opal/icons/paint-brush";
export { default as SvgPaperclip } from "@opal/icons/paperclip";
export { default as SvgPauseCircle } from "@opal/icons/pause-circle";
export { default as SvgPenSmall } from "@opal/icons/pen-small";
export { default as SvgPencilRuler } from "@opal/icons/pencil-ruler";
export { default as SvgPieChart } from "@opal/icons/pie-chart";
export { default as SvgPin } from "@opal/icons/pin";
export { default as SvgPinned } from "@opal/icons/pinned";
export { default as SvgPlayCircle } from "@opal/icons/play-circle";
export { default as SvgPlug } from "@opal/icons/plug";
export { default as SvgPlus } from "@opal/icons/plus";
export { default as SvgPlusCircle } from "@opal/icons/plus-circle";
export { default as SvgProgressBars } from "@opal/icons/progress-bars";
export { default as SvgProgressCircle } from "@opal/icons/progress-circle";
export { default as SvgQuestionMarkSmall } from "@opal/icons/question-mark-small";
export { default as SvgQuoteEnd } from "@opal/icons/quote-end";
export { default as SvgQuoteStart } from "@opal/icons/quote-start";
export { default as SvgRefreshCw } from "@opal/icons/refresh-cw";
export { default as SvgRevert } from "@opal/icons/revert";
export { default as SvgSearch } from "@opal/icons/search";
export { default as SvgSearchMenu } from "@opal/icons/search-menu";
export { default as SvgSearchSmall } from "@opal/icons/search-small";
export { default as SvgServer } from "@opal/icons/server";
export { default as SvgSettings } from "@opal/icons/settings";
export { default as SvgShare } from "@opal/icons/share";
export { default as SvgShield } from "@opal/icons/shield";
export { default as SvgSidebar } from "@opal/icons/sidebar";
export { default as SvgSlack } from "@opal/icons/slack";
export { default as SvgSlash } from "@opal/icons/slash";
export { default as SvgSliders } from "@opal/icons/sliders";
export { default as SvgSlidersSmall } from "@opal/icons/sliders-small";
export { default as SvgSort } from "@opal/icons/sort";
export { default as SvgSortOrder } from "@opal/icons/sort-order";
export { default as SvgSparkle } from "@opal/icons/sparkle";
export { default as SvgStar } from "@opal/icons/star";
export { default as SvgStarOff } from "@opal/icons/star-off";
export { default as SvgStep1 } from "@opal/icons/step1";
export { default as SvgStep2 } from "@opal/icons/step2";
export { default as SvgStep3 } from "@opal/icons/step3";
export { default as SvgStep3End } from "@opal/icons/step3-end";
export { default as SvgStop } from "@opal/icons/stop";
export { default as SvgStopCircle } from "@opal/icons/stop-circle";
export { default as SvgSun } from "@opal/icons/sun";
export { default as SvgTag } from "@opal/icons/tag";
export { default as SvgTerminal } from "@opal/icons/terminal";
export { default as SvgTerminalSmall } from "@opal/icons/terminal-small";
export { default as SvgTextLines } from "@opal/icons/text-lines";
export { default as SvgTextLinesSmall } from "@opal/icons/text-lines-small";
export { default as SvgThumbsDown } from "@opal/icons/thumbs-down";
export { default as SvgThumbsUp } from "@opal/icons/thumbs-up";
export { default as SvgTrash } from "@opal/icons/trash";
export { default as SvgTwoLineSmall } from "@opal/icons/two-line-small";
export { default as SvgUnplug } from "@opal/icons/unplug";
export { default as SvgUploadCloud } from "@opal/icons/upload-cloud";
export { default as SvgUser } from "@opal/icons/user";
export { default as SvgUserCheck } from "@opal/icons/user-check";
export { default as SvgUserEdit } from "@opal/icons/user-edit";
export { default as SvgUserKey } from "@opal/icons/user-key";
export { default as SvgUserManage } from "@opal/icons/user-manage";
export { default as SvgUserMinus } from "@opal/icons/user-minus";
export { default as SvgUserPlus } from "@opal/icons/user-plus";
export { default as SvgUserShield } from "@opal/icons/user-shield";
export { default as SvgUserSpeaker } from "@opal/icons/user-speaker";
export { default as SvgUserSync } from "@opal/icons/user-sync";
export { default as SvgUserX } from "@opal/icons/user-x";
export { default as SvgUsers } from "@opal/icons/users";
export { default as SvgVolume } from "@opal/icons/volume";
export { default as SvgVolumeOff } from "@opal/icons/volume-off";
export { default as SvgWallet } from "@opal/icons/wallet";
export { default as SvgWorkflow } from "@opal/icons/workflow";
export { default as SvgX } from "@opal/icons/x";
export { default as SvgXCircle } from "@opal/icons/x-circle";
export { default as SvgXOctagon } from "@opal/icons/x-octagon";
export { default as SvgZoomIn } from "@opal/icons/zoom-in";
export { default as SvgZoomOut } from "@opal/icons/zoom-out";


================================================
FILE: web/lib/opal/src/icons/info-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgInfoSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 11V7H7M8 11H7M8 11H9M8 4.7V4.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgInfoSmall;


================================================
FILE: web/lib/opal/src/icons/info.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgInfo = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8.00001 10.6666V7.99998M8.00001 5.33331H8.00668M14.6667 7.99998C14.6667 11.6819 11.6819 14.6666 8.00001 14.6666C4.31811 14.6666 1.33334 11.6819 1.33334 7.99998C1.33334 4.31808 4.31811 1.33331 8.00001 1.33331C11.6819 1.33331 14.6667 4.31808 14.6667 7.99998Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgInfo;


================================================
FILE: web/lib/opal/src/icons/key.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgKey = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14 1.33331L12.6667 2.66665M12.6667 2.66665L14.6667 4.66665L12.3333 6.99998L10.3333 4.99998M12.6667 2.66665L10.3333 4.99998M7.59333 7.73998C7.93756 8.07962 8.2112 8.48401 8.3985 8.92984C8.5858 9.37568 8.68306 9.85416 8.68468 10.3377C8.68631 10.8213 8.59225 11.3004 8.40794 11.7475C8.22363 12.1946 7.95271 12.6008 7.61076 12.9427C7.26882 13.2847 6.86261 13.5556 6.41554 13.7399C5.96846 13.9242 5.48933 14.0183 5.00575 14.0167C4.52218 14.015 4.0437 13.9178 3.59786 13.7305C3.15203 13.5432 2.74764 13.2695 2.408 12.9253C1.74009 12.2338 1.37051 11.3076 1.37886 10.3462C1.38722 9.38479 1.77284 8.46514 2.45267 7.78531C3.13249 7.10548 4.05214 6.71986 5.01353 6.71151C5.97492 6.70315 6.90113 7.07273 7.59267 7.74065L7.59333 7.73998ZM7.59333 7.73998L10.3333 4.99998"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgKey;


================================================
FILE: web/lib/opal/src/icons/keystroke.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgKeystroke = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12 4V9C12 9.55228 11.5523 10 11 10H5M5 10L6.5 8.5M5 10L6.5 11.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgKeystroke;


================================================
FILE: web/lib/opal/src/icons/lightbulb-simple.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgLightbulbSimple = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.99998 11.67H5.99998M7.99998 1.67001C5.42265 1.67001 3.33331 3.75935 3.33331 6.33668C3.33331 8.03421 4.2397 9.52008 5.59492 10.3367C5.83556 10.4817 5.99998 10.7333 5.99998 11.0142V12.3367C5.99998 13.4413 6.89538 14.3367 7.99998 14.3367C9.10458 14.3367 9.99998 13.4413 9.99998 12.3367V11.0142C9.99998 10.7333 10.1644 10.4817 10.405 10.3367C11.7602 9.52008 12.6666 8.03421 12.6666 6.33668C12.6666 3.75935 10.5773 1.67001 7.99998 1.67001Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgLightbulbSimple;


================================================
FILE: web/lib/opal/src/icons/line-chart-up.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgLineChartUp = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M13 6.5L13 3M13 3H9.5M13 3L7.99999 8L6.49999 6.5L3 10M3 13H13"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgLineChartUp;


================================================
FILE: web/lib/opal/src/icons/link.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgLink = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 17 9"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    style={{ transform: "rotate(315deg)" }}
    stroke="currentColor"
    {...props}
  >
    <path
      d="M10.0833 0.75H12.0833C12.5211 0.75 12.9545 0.836219 13.3589 1.00373C13.7634 1.17125 14.1308 1.41678 14.4404 1.72631C14.7499 2.03584 14.9954 2.4033 15.1629 2.80772C15.3304 3.21214 15.4167 3.64559 15.4167 4.08333C15.4167 4.52107 15.3304 4.95453 15.1629 5.35894C14.9954 5.76336 14.7499 6.13083 14.4404 6.44036C14.1308 6.74988 13.7634 6.99542 13.3589 7.16293C12.9545 7.33045 12.5211 7.41667 12.0833 7.41667H10.0833M6.08333 7.41667H4.08333C3.64559 7.41667 3.21214 7.33045 2.80772 7.16293C2.4033 6.99542 2.03584 6.74988 1.72631 6.44036C1.10119 5.81523 0.75 4.96739 0.75 4.08333C0.75 3.19928 1.10119 2.35143 1.72631 1.72631C2.35143 1.10119 3.19928 0.75 4.08333 0.75H6.08333M5.41667 4.08333H10.75"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgLink;


================================================
FILE: web/lib/opal/src/icons/linked-dots.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgLinkedDots = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6 4C6 5.10457 5.10457 6 4 6M6 4C6 2.89543 5.10457 2 4 2C2.89543 2 2 2.89543 2 4C2 5.10457 2.89543 6 4 6M6 4H10M4 6V10M10 4C10 5.10457 10.8954 6 12 6C13.1046 6 14 5.10457 14 4C14 2.89543 13.1046 2 12 2C10.8954 2 10 2.89543 10 4ZM4 10C2.89543 10 2 10.8954 2 12C2 13.1046 2.89543 14 4 14C5.10457 14 6 13.1046 6 12C6 10.8954 5.10457 10 4 10ZM14 12C14 13.1046 13.1046 14 12 14C10.8954 14 10 13.1046 10 12C10 10.8954 10.8954 10 12 10C13.1046 10 14 10.8954 14 12Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgLinkedDots;


================================================
FILE: web/lib/opal/src/icons/litellm.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgLitellm = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 52 52"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path d="M48 30H34L29 38H48V30Z" fill="#3B88C3" />
    <path d="M29 38H11L8.58914 42L8 43H48V38H29Z" fill="#226699" />
    <path d="M48 43H8L8.58914 42H4V44H48V43Z" fill="#939598" />
    <path
      d="M18.6953 17.7969H31.7578C32.4927 17.7969 33.121 17.4087 33.4766 16.7656L35.5664 13H21.9023L18.6953 17.7969Z"
      fill="#55ACEE"
    />
    <path
      d="M36.3984 11.5234C36.636 11.0939 36.3737 10.4922 35.8828 10.4922H23.5651L21.9023 13H35.5664L36.3984 11.5234Z"
      fill="#3B88C3"
    />
    <path
      d="M48 30V8H29.5C27.0938 8 24.625 8.94531 23.8516 10.0625L23.5651 10.4922H35.8828C36.3737 10.4922 36.636 11.0939 36.3984 11.5234L35.5664 13L33.4766 16.7656C33.121 17.4087 32.4927 17.7969 31.7578 17.7969H18.6953C16.7788 18.6979 13.786 19.9843 11 21.6321V38H29L34 30H48Z"
      fill="#D1D3D4"
    />
    <path
      d="M4 30C4 34.6406 11 38 11 38V21.6321C7.3195 23.809 4 26.6167 4 30Z"
      fill="#3B88C3"
    />
  </svg>
);
export default SvgLitellm;


================================================
FILE: web/lib/opal/src/icons/lm-studio.tsx
================================================
import React from "react";
import type { IconProps } from "@opal/types";

const SvgLmStudio = ({ size, ...props }: IconProps) => {
  const gradientId = React.useId();
  return (
    <svg
      width={size}
      height={size}
      viewBox="0 0 480 480"
      fill="none"
      xmlns="http://www.w3.org/2000/svg"
      {...props}
    >
      <rect width={480} height={480} rx={96} fill={`url(#${gradientId})`} />
      <rect
        opacity={0.25}
        x={128}
        y={80}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.9}
        x={64}
        y={80}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.25}
        x={208}
        y={136}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.9}
        x={144}
        y={136}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.25}
        x={160}
        y={192}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.9}
        x={96}
        y={192}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.25}
        x={104}
        y={248}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.9}
        x={40}
        y={248}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.25}
        x={160}
        y={304}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.9}
        x={96}
        y={304}
        width={208}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.25}
        x={296}
        y={360}
        width={136}
        height={40}
        rx={20}
        fill="white"
      />
      <rect
        opacity={0.9}
        x={224}
        y={360}
        width={144}
        height={40}
        rx={20}
        fill="white"
      />
      <defs>
        <linearGradient
          id={gradientId}
          x1={-206.055}
          y1={215.087}
          x2={224.119}
          y2={658.689}
          gradientUnits="userSpaceOnUse"
        >
          <stop stopColor="#6E7EF3" />
          <stop offset={1} stopColor="#4F13BE" />
        </linearGradient>
      </defs>
    </svg>
  );
};

export default SvgLmStudio;


================================================
FILE: web/lib/opal/src/icons/loader.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgLoader = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.41667 14.0833C3.73477 14.0833 0.75 11.0986 0.75 7.41667C0.75 3.73477 3.73477 0.75 7.41667 0.75C11.0986 0.75 14.0833 3.73477 14.0833 7.41667"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgLoader;


================================================
FILE: web/lib/opal/src/icons/lock.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgLock = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.66667 7.33334V4.66668C4.66667 3.78262 5.01786 2.93478 5.64298 2.30965C6.2681 1.68453 7.11595 1.33334 8 1.33334C8.88406 1.33334 9.7319 1.68453 10.357 2.30965C10.9821 2.93478 11.3333 3.78262 11.3333 4.66668V7.33334M3.33333 7.33334H12.6667C13.403 7.33334 14 7.9303 14 8.66668V13.3333C14 14.0697 13.403 14.6667 12.6667 14.6667H3.33333C2.59695 14.6667 2 14.0697 2 13.3333V8.66668C2 7.9303 2.59695 7.33334 3.33333 7.33334Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgLock;


================================================
FILE: web/lib/opal/src/icons/log-out.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgLogOut = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 24 24"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9 21H5C4.46957 21 3.96086 20.7893 3.58579 20.4142C3.21071 20.0391 3 19.5304 3 19V5C3 4.46957 3.21071 3.96086 3.58579 3.58579C3.96086 3.21071 4.46957 3 5 3H9"
      strokeWidth={2}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M16 17L21 12L16 7"
      strokeWidth={2}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M21 12H9"
      strokeWidth={2}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgLogOut;


================================================
FILE: web/lib/opal/src/icons/maximize-2.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgMaximize2 = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M10 2H14M14 2V6M14 2L9.33333 6.66667M6 14H2M2 14V10M2 14L6.66667 9.33333"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgMaximize2;


================================================
FILE: web/lib/opal/src/icons/mcp.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgMcp = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 14 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.21111 3.25011L4.28535 6.17584C3.30914 7.15205 3.30914 8.7348 4.28535 9.71101C5.26155 10.6872 6.8443 10.6872 7.82051 9.71101L10.7463 6.78528M0.75 6.17566L5.44353 1.48216C6.41974 0.505948 8.00249 0.505947 8.9787 1.48216C9.95491 2.45837 9.95491 4.04111 8.9787 5.01732M8.9787 5.01732L6.05294 7.94306M8.9787 5.01732C9.95491 4.04111 11.538 4.04148 12.5142 5.01769C13.4904 5.9939 13.4904 7.57665 12.5142 8.55286L8.17457 12.8932C7.97933 13.0884 7.97934 13.405 8.17459 13.6003L8.82434 14.25"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgMcp;


================================================
FILE: web/lib/opal/src/icons/menu.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgMenu = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 32 32"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M26.5 9H5.5M5.5 23H26.5M26.5 16H5.5"
      strokeWidth={2}
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgMenu;


================================================
FILE: web/lib/opal/src/icons/microphone-off.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgMicrophoneOff = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    {/* Microphone body */}
    <path
      d="M12.5 7V7.5C12.5 9.98528 10.4853 12 8 12M3.5 7V7.5C3.5 9.98528 5.51472 12 8 12M8 12V14.5M8 14.5H5M8 14.5H11M8 9.5C6.89543 9.5 6 8.60457 6 7.5V3.5C6 2.39543 6.89543 1.5 8 1.5C9.10457 1.5 10 2.39543 10 3.5V7.5C10 8.60457 9.10457 9.5 8 9.5Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    {/* Diagonal slash */}
    <path
      d="M2 2L14 14"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgMicrophoneOff;


================================================
FILE: web/lib/opal/src/icons/microphone.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgMicrophone = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12.5 7V7.5C12.5 9.98528 10.4853 12 8 12M3.5 7V7.5C3.5 9.98528 5.51472 12 8 12M8 12V14.5M8 14.5H5M8 14.5H11M8 9.5C6.89543 9.5 6 8.60457 6 7.5V3.5C6 2.39543 6.89543 1.5 8 1.5C9.10457 1.5 10 2.39543 10 3.5V7.5C10 8.60457 9.10457 9.5 8 9.5Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgMicrophone;


================================================
FILE: web/lib/opal/src/icons/minus-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgMinusCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5.33333 7.99998H10.6667M14.6667 7.99998C14.6667 11.6819 11.6819 14.6666 7.99999 14.6666C4.3181 14.6666 1.33333 11.6819 1.33333 7.99998C1.33333 4.31808 4.3181 1.33331 7.99999 1.33331C11.6819 1.33331 14.6667 4.31808 14.6667 7.99998Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgMinusCircle;


================================================
FILE: web/lib/opal/src/icons/minus.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgMinus = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    strokeWidth={2.5}
    {...props}
  >
    <path d="M4 8H12" strokeLinecap="round" />
  </svg>
);

export default SvgMinus;


================================================
FILE: web/lib/opal/src/icons/moon.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgMoon = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12.7696 11.665C13.4681 10.7615 13.8949 9.67776 14 8.54054C13.0992 9.20635 11.9894 9.52673 10.8724 9.44344C9.75541 9.36014 8.7054 8.87868 7.91336 8.08664C7.12132 7.2946 6.63986 6.24459 6.55656 5.12758C6.47327 4.01056 6.79365 2.90076 7.45946 2C6.32224 2.10509 5.23848 2.53189 4.33497 3.23045C3.43147 3.929 2.74559 4.87043 2.35761 5.94457C1.96962 7.0187 1.89557 8.18112 2.14412 9.29581C2.39267 10.4105 2.95354 11.4313 3.7611 12.2389C4.56866 13.0465 5.5895 13.6073 6.70419 13.8559C7.81888 14.1044 8.9813 14.0304 10.0554 13.6424C11.1296 13.2544 12.071 12.5685 12.7696 11.665Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgMoon;


================================================
FILE: web/lib/opal/src/icons/more-horizontal.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgMoreHorizontal = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 8.75C8.41421 8.75 8.75 8.41421 8.75 8C8.75 7.58579 8.41421 7.25 8 7.25C7.58579 7.25 7.25 7.58579 7.25 8C7.25 8.41421 7.58579 8.75 8 8.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M12.75 8.75C13.1642 8.75 13.5 8.41421 13.5 8C13.5 7.58579 13.1642 7.25 12.75 7.25C12.3358 7.25 12 7.58579 12 8C12 8.41421 12.3358 8.75 12.75 8.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M3.25 8.75C3.66421 8.75 4 8.41421 4 8C4 7.58579 3.66421 7.25 3.25 7.25C2.83579 7.25 2.5 7.58579 2.5 8C2.5 8.41421 2.83579 8.75 3.25 8.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgMoreHorizontal;


================================================
FILE: web/lib/opal/src/icons/music-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgMusicSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.5 10V5L10.5 4.75M9.5 10C9.5 10.8284 8.82843 11.5 8 11.5C7.17157 11.5 6.5 10.8284 6.5 10C6.5 9.17157 7.17157 8.5 8 8.5C8.82843 8.5 9.5 9.17157 9.5 10Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgMusicSmall;


================================================
FILE: web/lib/opal/src/icons/network-graph.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgNetworkGraph = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_2828_22555)">
      <path
        d="M9.23744 4.48744C9.92086 3.80402 9.92086 2.69598 9.23744 2.01256C8.55402 1.32915 7.44598 1.32915 6.76256 2.01256C6.07915 2.69598 6.07915 3.80402 6.76256 4.48744M9.23744 4.48744C8.89573 4.82915 8.44787 5 8 5M9.23744 4.48744L11.7626 8.01256M6.76256 4.48744C7.10427 4.82915 7.55214 5 8 5M6.76256 4.48744L4.23744 8.01256M8 11C7.0335 11 6.25001 11.7835 6.25001 12.75C6.25001 13.7165 7.03351 14.5 8.00001 14.5C8.9665 14.5 9.75 13.7165 9.75 12.75C9.75 11.7835 8.9665 11 8 11ZM8 11V5M4.23744 8.01256C4.92085 8.69598 4.92422 9.81658 4.2408 10.5C3.55739 11.1834 2.44598 11.1709 1.76256 10.4874C1.07915 9.80402 1.07915 8.69598 1.76256 8.01256C2.44598 7.32915 3.55402 7.32915 4.23744 8.01256ZM11.7626 8.01256C11.0791 8.69598 11.0791 9.80402 11.7626 10.4874C12.446 11.1709 13.554 11.1709 14.2374 10.4874C14.9209 9.80402 14.9209 8.69598 14.2374 8.01256C13.554 7.32915 12.446 7.32915 11.7626 8.01256Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_2828_22555">
        <rect width="16" height="16" fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgNetworkGraph;


================================================
FILE: web/lib/opal/src/icons/notification-bubble.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgNotificationBubble = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 6 6"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    className={props.className}
    {...props}
  >
    <path
      d="M0 3C0 1.34315 1.34315 0 3 0C4.65685 0 6 1.34315 6 3C6 4.65685 4.65685 6 3 6C1.34315 6 0 4.65685 0 3Z"
      fill="#DC2626"
    />
  </svg>
);
export default SvgNotificationBubble;


================================================
FILE: web/lib/opal/src/icons/ollama.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgOllama = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M5.24969 0.983626C5.3947 1.04069 5.5256 1.13467 5.64442 1.25886C5.84246 1.46428 6.00961 1.75831 6.13716 2.10672C6.26538 2.45715 6.34862 2.84516 6.38018 3.23452C6.80284 2.9954 7.27191 2.84981 7.75568 2.80757L7.78992 2.80488C8.37396 2.75789 8.95128 2.86329 9.45476 3.12308C9.52256 3.15866 9.58902 3.19693 9.65414 3.2372C9.68771 2.85523 9.76961 2.47594 9.89581 2.13358C10.0234 1.7845 10.1905 1.49113 10.3879 1.28504C10.4982 1.16573 10.6331 1.07181 10.7833 1.00981C10.9558 0.942676 11.1391 0.930593 11.3176 0.981612C11.5868 1.05814 11.8178 1.22865 11.9997 1.47637C12.1662 1.7026 12.291 1.9926 12.3763 2.34034C12.5307 2.96734 12.5575 3.79238 12.4535 4.78725L12.4891 4.81411L12.5065 4.82686C13.0147 5.21353 13.3685 5.76468 13.5558 6.40443C13.8478 7.40267 13.7008 8.52241 13.1973 9.14874L13.1852 9.16283L13.1866 9.16485C13.4665 9.67638 13.6363 10.2168 13.6726 10.776L13.6739 10.7961C13.7169 11.5111 13.5397 12.2307 13.1275 12.9376L13.1228 12.9443L13.1295 12.9604C13.4464 13.7371 13.5457 14.5192 13.4235 15.3006L13.4195 15.3268C13.4006 15.4409 13.3372 15.5429 13.2431 15.6103C13.1491 15.6778 13.0322 15.7052 12.918 15.6866C12.8615 15.6778 12.8072 15.6579 12.7584 15.628C12.7096 15.5982 12.6671 15.559 12.6334 15.5128C12.5997 15.4665 12.5755 15.414 12.5622 15.3584C12.5488 15.3027 12.5465 15.245 12.5555 15.1885C12.6676 14.495 12.5622 13.7996 12.2333 13.092C12.2026 13.0263 12.189 12.9539 12.1937 12.8815C12.1984 12.8091 12.2212 12.739 12.2602 12.6778L12.2628 12.6738C12.6683 12.0535 12.8361 11.4453 12.7999 10.8478C12.769 10.3249 12.5817 9.81132 12.2628 9.32193C12.2008 9.22679 12.1787 9.11107 12.2013 8.99978C12.224 8.88848 12.2894 8.79055 12.3837 8.72716L12.3897 8.72313C12.5528 8.61639 12.7032 8.34384 12.7791 7.97126C12.8628 7.53071 12.8409 7.07659 12.7153 6.6461C12.5777 6.17619 12.3259 5.78414 11.9735 5.51629C11.5741 5.21152 11.0451 5.0645 10.3758 5.1068C10.2883 5.11249 10.2011 5.09179 10.1254 5.0474C10.0498 5.003 9.98922 4.93693 9.95153 4.85774C9.74074 4.41132 9.43328 4.09178 9.04996 3.89374C8.68195 3.71007 8.26995 3.63288 7.86041 3.67087C7.02463 3.73733 6.28753 4.20859 6.06802 4.80269C6.03696 4.88631 5.9811 4.95844 5.90792 5.00943C5.83473 5.06042 5.74772 5.08783 5.65852 5.088C4.94223 5.08934 4.38774 5.25717 3.98227 5.55993C3.63184 5.82174 3.39286 6.1876 3.26665 6.62596C3.15245 7.03859 3.13681 7.47227 3.221 7.89205C3.29619 8.26664 3.44321 8.57678 3.6117 8.74394L3.61707 8.74864C3.75939 8.8876 3.7896 9.10443 3.69025 9.27561C3.44858 9.69317 3.26799 10.3155 3.23846 10.9136C3.20489 11.597 3.36332 12.1904 3.72113 12.616L3.73187 12.6288C3.78587 12.6917 3.82059 12.7689 3.8319 12.851C3.84321 12.9332 3.83062 13.0168 3.79564 13.092C3.40897 13.9217 3.29015 14.6038 3.41837 15.1408C3.44142 15.2525 3.41997 15.3688 3.3586 15.4649C3.29723 15.561 3.20078 15.6293 3.08977 15.6554C2.97876 15.6815 2.86196 15.6632 2.76423 15.6044C2.6665 15.5457 2.59555 15.4511 2.56648 15.3409C2.40335 14.6575 2.51412 13.8747 2.88401 12.9926L2.89341 12.9691L2.88803 12.9611C2.70624 12.6926 2.57056 12.3956 2.48659 12.0823L2.48324 12.0696C2.38134 11.6788 2.34125 11.2745 2.36442 10.8713C2.39395 10.2604 2.55104 9.63476 2.78197 9.13262L2.79002 9.11517L2.78868 9.11383C2.59199 8.83322 2.44631 8.47407 2.36576 8.07666L2.3624 8.06055C2.25141 7.50621 2.2728 6.93351 2.42483 6.38899C2.60072 5.77475 2.94644 5.2471 3.45596 4.8658C3.49624 4.83559 3.53853 4.80538 3.58082 4.77718C3.47409 3.77492 3.50094 2.94451 3.65601 2.31349C3.74127 1.96575 3.8668 1.67574 4.03329 1.44951C4.21454 1.20247 4.44547 1.03196 4.71466 0.95476C4.89323 0.903741 5.07717 0.915153 5.24969 0.982955V0.983626ZM8.01279 7.08581C8.64114 7.08581 9.22115 7.29593 9.65481 7.65978C10.0777 8.01356 10.3295 8.48884 10.3295 8.96211C10.3295 9.55823 10.0569 10.0228 9.56888 10.3195C9.15267 10.5712 8.59482 10.6934 7.95573 10.6934C7.27838 10.6934 6.69972 10.5195 6.28216 10.2007C5.86797 9.88516 5.6357 9.4421 5.6357 8.96211C5.6357 8.4875 5.90288 8.01087 6.3446 7.65575C6.79303 7.29526 7.38512 7.08581 8.01279 7.08581ZM8.01279 7.6873C7.54706 7.68323 7.09369 7.83704 6.72657 8.12365C6.4171 8.37203 6.24189 8.68419 6.24189 8.96278C6.24189 9.2501 6.38286 9.5193 6.65138 9.72405C6.95683 9.95699 7.40593 10.0919 7.95573 10.0919C8.49211 10.0919 8.94457 9.99324 9.2527 9.80595C9.56351 9.61798 9.72261 9.34543 9.72261 8.96211C9.72261 8.67815 9.55747 8.36465 9.26411 8.11895C8.9392 7.84707 8.49882 7.6873 8.01279 7.6873ZM8.4572 8.49958L8.45989 8.50227C8.54044 8.60363 8.52366 8.75065 8.42229 8.83121L8.22627 8.98561V9.28501C8.22591 9.35166 8.19914 9.41545 8.15183 9.46239C8.10451 9.50933 8.04051 9.53559 7.97386 9.53541C7.90721 9.53559 7.84321 9.50933 7.79589 9.46239C7.74857 9.41545 7.7218 9.35166 7.72145 9.28501V8.97621L7.53952 8.82986C7.51552 8.81063 7.49557 8.78684 7.48082 8.75986C7.46606 8.73288 7.45679 8.70325 7.45355 8.67267C7.4503 8.64209 7.45314 8.61117 7.46191 8.58169C7.47067 8.55221 7.48519 8.52476 7.50461 8.50092C7.54424 8.45269 7.6013 8.42204 7.66339 8.41563C7.72548 8.40922 7.7876 8.42757 7.83624 8.46669L7.98057 8.58215L8.12826 8.46534C8.17673 8.42705 8.23825 8.4092 8.29969 8.41559C8.36113 8.42199 8.41765 8.45213 8.4572 8.49958ZM5.07381 7.21134C5.3947 7.21134 5.65583 7.47315 5.65583 7.79605C5.65601 7.95083 5.59474 8.09935 5.48549 8.20899C5.37623 8.31862 5.22792 8.3804 5.07314 8.38076C4.91859 8.38023 4.77056 8.31846 4.66146 8.20899C4.55237 8.09952 4.49112 7.95127 4.49112 7.79672C4.49076 7.64194 4.55186 7.49335 4.66099 7.38359C4.77012 7.27383 4.91903 7.21188 5.07381 7.21134ZM10.9182 7.21134C11.2404 7.21134 11.5009 7.47315 11.5009 7.79605C11.5011 7.95083 11.4398 8.09935 11.3306 8.20899C11.2213 8.31862 11.073 8.3804 10.9182 8.38076C10.7637 8.38023 10.6156 8.31846 10.5065 8.20899C10.3974 8.09952 10.3362 7.95127 10.3362 7.79672C10.3358 7.64194 10.3969 7.49335 10.5061 7.38359C10.6152 7.27383 10.7634 7.21188 10.9182 7.21134ZM4.93754 1.79591L4.93552 1.79725C4.85775 1.83107 4.79134 1.88653 4.7442 1.95702L4.74084 1.96105C4.6482 2.08793 4.56765 2.27455 4.50723 2.51958C4.39311 2.98412 4.36223 3.61448 4.42399 4.38715C4.71265 4.30123 5.02749 4.24752 5.3665 4.22805L5.37321 4.22738L5.38597 4.20456C5.41685 4.14951 5.44974 4.09648 5.48532 4.04412C5.56789 3.52654 5.50009 2.90826 5.31548 2.40344C5.22553 2.15909 5.1161 1.96709 5.01138 1.85767C4.98976 1.83492 4.96567 1.81465 4.93955 1.79725L4.93754 1.79591ZM11.0961 1.82276L11.0948 1.82343C11.0686 1.84083 11.0446 1.8611 11.0229 1.88385C10.9182 1.99327 10.8081 2.18594 10.7188 2.43029C10.5242 2.96331 10.459 3.62253 10.5644 4.1569L10.6034 4.22201L10.6087 4.23141H10.6289C10.962 4.2315 11.2935 4.27942 11.613 4.37373C11.6707 3.61918 11.6385 3.00225 11.5271 2.54643C11.4667 2.3014 11.3861 2.11478 11.2928 1.9879L11.2901 1.98387C11.2431 1.91313 11.1767 1.85743 11.0988 1.82343H11.0961V1.82276Z"
      fill="currentColor"
    />
  </svg>
);

export default SvgOllama;


================================================
FILE: web/lib/opal/src/icons/onyx-logo-typed.tsx
================================================
import SvgOnyxLogo from "@opal/icons/onyx-logo";
import SvgOnyxTyped from "@opal/icons/onyx-typed";
import { cn } from "@opal/utils";

interface OnyxLogoTypedProps {
  size?: number;
  className?: string;
}

// # NOTE(@raunakab):
// This ratio is not some random, magical number; it is available on Figma.
const HEIGHT_TO_GAP_RATIO = 5 / 16;

const SvgOnyxLogoTyped = ({ size: height, className }: OnyxLogoTypedProps) => {
  const gap = height != null ? height * HEIGHT_TO_GAP_RATIO : undefined;

  return (
    <div
      className={cn(`flex flex-row items-center`, className)}
      style={{ gap }}
    >
      <SvgOnyxLogo size={height} />
      <SvgOnyxTyped size={height} />
    </div>
  );
};
export default SvgOnyxLogoTyped;


================================================
FILE: web/lib/opal/src/icons/onyx-logo.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgOnyxLogo = ({ size, ...props }: IconProps) => (
  <svg
    height={size}
    viewBox="0 0 64 64"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M10.4014 13.25L18.875 32L10.3852 50.75L2 32L10.4014 13.25Z"
      fill="var(--theme-primary-05)"
    />
    <path
      d="M53.5264 13.25L62 32L53.5102 50.75L45.125 32L53.5264 13.25Z"
      fill="var(--theme-primary-05)"
    />
    <path
      d="M32 45.125L50.75 53.5625L32 62L13.25 53.5625L32 45.125Z"
      fill="var(--theme-primary-05)"
    />
    <path
      d="M32 2L50.75 10.4375L32 18.875L13.25 10.4375L32 2Z"
      fill="var(--theme-primary-05)"
    />
  </svg>
);
export default SvgOnyxLogo;


================================================
FILE: web/lib/opal/src/icons/onyx-octagon.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgOnyxOctagon = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_586_578)">
      <path
        d="M4.5 2.50002L8 1.00002L11.5 2.50002M13.5 4.50002L15 8.00001L13.5 11.5M11.5 13.5L8 15L4.5 13.5M2.5 11.5L1 8L2.5 4.50002"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_586_578">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgOnyxOctagon;


================================================
FILE: web/lib/opal/src/icons/onyx-typed.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgOnyxTyped = ({ size, ...props }: IconProps) => (
  <svg
    height={size}
    viewBox="0 0 152 64"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M19.1795 51.2136C15.6695 51.2136 12.4353 50.3862 9.47691 48.7315C6.56865 47.0768 4.2621 44.8454 2.55726 42.0374C0.85242 39.1793 0 36.0955 0 32.7861C0 30.279 0.451281 27.9223 1.35384 25.716C2.30655 23.4596 3.76068 21.3285 5.71623 19.3228L11.8085 13.08C12.4604 12.6789 13.4131 12.3529 14.6666 12.1022C15.9202 11.8014 17.2991 11.6509 18.8034 11.6509C22.3134 11.6509 25.5225 12.4783 28.4307 14.133C31.3891 15.7877 33.7208 18.0441 35.4256 20.9023C37.1304 23.7103 37.9829 26.794 37.9829 30.1536C37.9829 32.6106 37.5065 34.9673 36.5538 37.2237C35.6512 39.4802 34.147 41.6864 32.041 43.8426L26.3248 49.7845C25.3219 50.2358 24.2188 50.5868 23.0154 50.8375C21.8621 51.0882 20.5835 51.2136 19.1795 51.2136ZM20.1572 43.8426C21.8621 43.8426 23.4917 43.4164 25.0461 42.5639C26.6005 41.6614 27.8541 40.3577 28.8068 38.6528C29.8097 36.948 30.3111 34.9172 30.3111 32.5605C30.3111 30.0032 29.6843 27.6966 28.4307 25.6408C27.2273 23.5849 25.6478 21.9803 23.6923 20.8271C21.7869 19.6236 19.8313 19.0219 17.8256 19.0219C16.0706 19.0219 14.4159 19.4732 12.8615 20.3758C11.3573 21.2282 10.1288 22.5068 9.17606 24.2117C8.22335 25.9166 7.747 27.9473 7.747 30.304C7.747 32.8613 8.34871 35.1679 9.55212 37.2237C10.7555 39.2796 12.31 40.9092 14.2154 42.1127C16.1709 43.2659 18.1515 43.8426 20.1572 43.8426Z"
      fill="var(--theme-primary-05)"
    />
    <path
      d="M42.6413 50.4614V12.4031H50.6891V17.7433L55.5028 12.7039C56.0544 12.4532 56.8065 12.2276 57.7592 12.027C58.7621 11.7763 59.8903 11.6509 61.1438 11.6509C64.0521 11.6509 66.5843 12.3028 68.7404 13.6065C70.9467 14.8601 72.6264 16.6401 73.7797 18.9467C74.9831 21.2533 75.5848 23.961 75.5848 27.0698V50.4614H67.6122V29.1006C67.6122 26.9946 67.2612 25.1895 66.5592 23.6852C65.9074 22.1308 64.9547 20.9775 63.7011 20.2253C62.4977 19.4231 61.0686 19.0219 59.4139 19.0219C56.7564 19.0219 54.6253 19.9245 53.0208 21.7296C51.4663 23.4846 50.6891 25.9416 50.6891 29.1006V50.4614H42.6413Z"
      fill="var(--theme-primary-05)"
    />
    <path
      d="M82.3035 64V56.0273H89.9753C91.2288 56.0273 92.2066 55.7264 92.9086 55.1247C93.6607 54.523 94.2625 53.5452 94.7137 52.1913L108.027 12.4031H116.751L103.664 49.4084C103.062 51.1634 102.461 52.5173 101.859 53.47C101.307 54.4227 100.53 55.4506 99.5274 56.5538L92.4573 64H82.3035ZM90.7274 46.6255L76.9633 12.4031H85.989L99.4522 46.6255H90.7274Z"
      fill="var(--theme-primary-05)"
    />
    <path
      d="M115.657 50.4614L129.045 31.2066L116.033 12.4031H125.435L134.085 24.8134L142.358 12.4031H151.308L138.372 31.0562L151.684 50.4614H142.358L133.332 37.3742L124.683 50.4614H115.657Z"
      fill="var(--theme-primary-05)"
    />
  </svg>
);
export default SvgOnyxTyped;


================================================
FILE: web/lib/opal/src/icons/openai.tsx
================================================
import React from "react";
import type { IconProps } from "@opal/types";

const SvgOpenAI = ({ size, ...props }: IconProps) => {
  const clipId = React.useId();
  return (
    <svg
      width={size}
      height={size}
      viewBox="0 0 16 16"
      fill="none"
      xmlns="http://www.w3.org/2000/svg"
      {...props}
    >
      <g clipPath={`url(#${clipId})`}>
        <path
          d="M6.27989 5.99136V4.58828C6.27989 4.4701 6.32383 4.38143 6.42625 4.32242L9.22206 2.69783C9.60266 2.4763 10.0564 2.37296 10.5247 2.37296C12.2813 2.37296 13.3937 3.74654 13.3937 5.20864C13.3937 5.31199 13.3937 5.43016 13.379 5.54833L10.4808 3.83506C10.3052 3.73172 10.1295 3.73172 9.95386 3.83506L6.27989 5.99136ZM12.8082 11.4561V8.10334C12.8082 7.89651 12.7203 7.74883 12.5447 7.64548L8.87071 5.48918L10.071 4.79498C10.1734 4.73597 10.2613 4.73597 10.3637 4.79498L13.1595 6.41959C13.9647 6.89226 14.5061 7.89651 14.5061 8.87124C14.5061 9.99365 13.8476 11.0277 12.8082 11.4561ZM5.41629 8.50218L4.21603 7.7933C4.11361 7.73429 4.06967 7.64563 4.06967 7.52745V4.27824C4.06967 2.69797 5.26993 1.50157 6.89473 1.50157C7.50955 1.50157 8.08029 1.70841 8.56345 2.07761L5.67991 3.76136C5.5043 3.86471 5.41643 4.01239 5.41643 4.21923L5.41629 8.50218ZM7.99984 10.0086L6.27988 9.03389V6.96624L7.99984 5.99151L9.71963 6.96624V9.03389L7.99984 10.0086ZM9.10494 14.4985C8.49012 14.4985 7.91938 14.2917 7.43622 13.9226L10.3197 12.2387C10.4953 12.1354 10.5832 11.9878 10.5832 11.7809V7.4978L11.7982 8.20668C11.9006 8.2657 11.9445 8.35436 11.9445 8.47254V11.7218C11.9445 13.302 10.7296 14.4985 9.10494 14.4985ZM5.63583 11.205L2.84002 9.58041C2.03489 9.10771 1.4934 8.10348 1.4934 7.12875C1.4934 5.99151 2.16672 4.97244 3.20591 4.5441V7.91148C3.20591 8.11831 3.29379 8.26599 3.46939 8.36934L7.12882 10.5108L5.92856 11.205C5.82613 11.264 5.73825 11.264 5.63583 11.205ZM5.47491 13.6272C3.82088 13.6272 2.60592 12.3717 2.60592 10.821C2.60592 10.7028 2.62061 10.5846 2.63517 10.4665L5.51871 12.1502C5.69432 12.2535 5.87006 12.2535 6.04567 12.1502L9.71964 10.0088V11.4119C9.71964 11.53 9.67571 11.6186 9.57328 11.6777L6.77746 13.3023C6.39688 13.5238 5.94323 13.6272 5.47491 13.6272ZM9.10494 15.3846C10.8761 15.3846 12.3544 14.1145 12.6912 12.4307C14.3305 12.0024 15.3845 10.4516 15.3845 8.87139C15.3845 7.8375 14.9453 6.83326 14.1549 6.10955C14.2281 5.79937 14.2721 5.48918 14.2721 5.17914C14.2721 3.06718 12.5741 1.48677 10.6126 1.48677C10.2175 1.48677 9.83689 1.54578 9.4563 1.67878C8.79753 1.02891 7.88999 0.615387 6.89473 0.615387C5.12357 0.615387 3.64528 1.88548 3.30848 3.56923C1.66914 3.99756 0.615234 5.54834 0.615234 7.1286C0.615234 8.1625 1.05431 9.16673 1.84474 9.89044C1.77155 10.2006 1.72762 10.5108 1.72762 10.8209C1.72762 12.9328 3.42558 14.5132 5.38704 14.5132C5.78218 14.5132 6.16278 14.4542 6.54336 14.3213C7.20198 14.9711 8.10953 15.3846 9.10494 15.3846Z"
          fill="currentColor"
        />
      </g>
      <defs>
        <clipPath id={clipId}>
          <rect width="16" height="16" fill="white" />
        </clipPath>
      </defs>
    </svg>
  );
};

export default SvgOpenAI;


================================================
FILE: web/lib/opal/src/icons/openrouter.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgOpenrouter = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 48 40"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <title>OpenRouter</title>
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M33.6 0L48 8.19239V8.36602L33.6 16.4V12.2457L31.8202 12.1858C29.7043 12.1299 28.6014 12.1898 27.2887 12.4053C25.1628 12.7546 23.2168 13.5569 21.001 15.1035L16.6733 18.1071C16.1059 18.4962 15.6843 18.7776 15.3147 19.0151L14.2857 19.6577L13.4925 20.1247L14.2617 20.5837L15.3207 21.2583C16.2717 21.8849 17.6583 22.8469 20.7173 24.9823C22.9351 26.529 24.8791 27.3312 27.005 27.6805L27.6044 27.7703C28.991 27.9519 30.7029 27.9579 33.6 27.8362V23.6L48 31.7198V31.8934L33.6 40V36.284L31.9041 36.3279C29.1349 36.4117 27.6344 36.3319 25.6344 36.0046C22.2498 35.4458 19.1209 34.1566 15.8821 31.8954L11.5704 28.9019C11.0745 28.5603 10.5715 28.2289 10.0619 27.908L9.12887 27.3492C8.62495 27.0592 8.11878 26.7731 7.61039 26.491C5.81019 25.4912 1.12488 24.2658 0 24.2658V15.836C1.12687 15.822 6.09391 14.5946 7.89011 13.5928L9.92008 12.4353L10.7952 11.8884C11.6503 11.3296 12.9371 10.4396 16.1618 8.19039C19.4006 5.92925 22.5275 4.63803 25.9141 4.08123C28.2158 3.70204 29.9237 3.65614 33.6 3.80582V0Z"
      fill="currentColor"
    />
  </svg>
);

export default SvgOpenrouter;


================================================
FILE: web/lib/opal/src/icons/organization.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgOrganization = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.5 14H13.5C14.0523 14 14.5 13.5523 14.5 13V6C14.5 5.44772 14.0523 5 13.5 5H7.5M7.5 14V11M7.5 14H4.5M7.5 5V3C7.5 2.44772 7.05228 2 6.5 2H4.5M7.5 5H1.5M7.5 5V8M1.5 5V3C1.5 2.44772 1.94772 2 2.5 2H4.5M1.5 5V8M7.5 8V11M7.5 8H4.5M1.5 8V11M1.5 8H4.5M7.5 11H4.5M1.5 11V13C1.5 13.5523 1.94772 14 2.5 14H4.5M1.5 11H4.5M4.5 2V8M4.5 14V11M4.5 11V8M10 8H12M10 11H12"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgOrganization;


================================================
FILE: web/lib/opal/src/icons/paint-brush.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPaintBrush = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 32 32"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5.00001 17L5.00002 19.2344C5.00003 20.2431 5.7511 21.0939 6.75195 21.219L11.2481 21.781C12.2489 21.9061 13 22.7569 13 23.7656L13 26C13 27.6569 14.3431 29 16 29C17.6569 29 19 27.6569 19 26L19 23.7656C19 22.7569 19.7511 21.9061 20.7519 21.781L25.2481 21.219C26.2489 21.0939 27 20.2431 27 19.2344L27 17M5.00001 17L5 9C5 5.68629 7.68629 3 11 3H17M5.00001 17H27M27 17L27 3H22M22 3L22 10M22 3H17M17 3L17 8"
      strokeWidth={2.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgPaintBrush;


================================================
FILE: web/lib/opal/src/icons/paperclip.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPaperclip = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12.0924 3.99814L12.0924 10.6626C12.0924 11.724 11.6707 12.742 10.9202 13.4926C10.1696 14.2431 9.15163 14.6648 8.09018 14.6648C7.02872 14.6648 6.01074 14.2431 5.26018 13.4926C4.50961 12.742 4.08795 11.724 4.08795 10.6626L4.08795 3.99814C4.08795 3.2905 4.36906 2.61184 4.86944 2.11147C5.36981 1.6111 6.04847 1.32999 6.7561 1.32999C7.46374 1.32999 8.14239 1.61109 8.64277 2.11147C9.14314 2.61184 9.42425 3.2905 9.42425 3.99814L9.41954 10.6673C9.41954 11.0211 9.27898 11.3604 9.0288 11.6106C8.77861 11.8608 8.43928 12.0013 8.08546 12.0013C7.73164 12.0013 7.39232 11.8608 7.14213 11.6106C6.89194 11.3604 6.75139 11.0211 6.75139 10.6673L6.7561 4.66753"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgPaperclip;


================================================
FILE: web/lib/opal/src/icons/pause-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPauseCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6.08333 9.41667V5.41667M8.75 9.41667V5.41667M14.0833 7.41667C14.0833 11.0986 11.0986 14.0833 7.41667 14.0833C3.73477 14.0833 0.75 11.0986 0.75 7.41667C0.75 3.73477 3.73477 0.75 7.41667 0.75C11.0986 0.75 14.0833 3.73477 14.0833 7.41667Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgPauseCircle;


================================================
FILE: web/lib/opal/src/icons/pen-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgPenSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6.5 11L11.5 6L10 4.5L5 9.5L5 11H6.5Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgPenSmall;


================================================
FILE: web/lib/opal/src/icons/pencil-ruler.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgPencilRuler = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    xmlns="http://www.w3.org/2000/svg"
    viewBox="0 0 24 24"
    fill="none"
    strokeWidth={1.5}
    strokeLinecap="round"
    strokeLinejoin="round"
    className="lucide lucide-pencil-ruler-icon lucide-pencil-ruler"
    stroke="currentColor"
    {...props}
  >
    <path d="M13 7 8.7 2.7a2.41 2.41 0 0 0-3.4 0L2.7 5.3a2.41 2.41 0 0 0 0 3.4L7 13" />
    <path d="m8 6 2-2" />
    <path d="m18 16 2-2" />
    <path d="m17 11 4.3 4.3c.94.94.94 2.46 0 3.4l-2.6 2.6c-.94.94-2.46.94-3.4 0L11 17" />
    <path d="M21.174 6.812a1 1 0 0 0-3.986-3.987L3.842 16.174a2 2 0 0 0-.5.83l-1.321 4.352a.5.5 0 0 0 .623.622l4.353-1.32a2 2 0 0 0 .83-.497z" />
    <path d="m15 5 4 4" />
  </svg>
);
export default SvgPencilRuler;


================================================
FILE: web/lib/opal/src/icons/pie-chart.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPieChart = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_76_2931)">
      <path
        d="M14.14 10.5933C13.7159 11.5963 13.0525 12.4802 12.2079 13.1675C11.3633 13.8549 10.3632 14.325 9.29496 14.5365C8.22674 14.7481 7.12295 14.6948 6.0801 14.3812C5.03725 14.0676 4.08709 13.5034 3.31268 12.7378C2.53828 11.9722 1.96321 11.0285 1.63776 9.98931C1.31231 8.95011 1.24638 7.847 1.44574 6.77643C1.64509 5.70586 2.10367 4.70043 2.78137 3.84803C3.45907 2.99563 4.33526 2.32222 5.33334 1.88668M14.6667 8.00001C14.6667 7.12453 14.4942 6.25762 14.1592 5.44879C13.8242 4.63995 13.3331 3.90502 12.7141 3.28597C12.095 2.66691 11.3601 2.17584 10.5512 1.84081C9.74239 1.50578 8.87548 1.33334 8 1.33334V8.00001H14.6667Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_76_2931">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgPieChart;


================================================
FILE: web/lib/opal/src/icons/pin.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPin = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6.70001 9.29581L2.20001 13.7958M6.70001 9.29581L9.99291 12.5887C10.6229 13.2187 11.7 12.7725 11.7 11.8816V10.5384C11.7 9.7428 12.0161 8.97974 12.5787 8.41713L13.4929 7.50292C13.8834 7.11239 13.8834 6.47923 13.4929 6.0887L9.90712 2.50292C9.51659 2.11239 8.88343 2.11239 8.49291 2.50292L7.57869 3.41713C7.01608 3.97974 6.25302 4.29581 5.45737 4.29581H4.11423C3.22332 4.29581 2.77715 5.37295 3.40712 6.00291L6.70001 9.29581Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgPin;


================================================
FILE: web/lib/opal/src/icons/pinned.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPinned = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    xmlns="http://www.w3.org/2000/svg"
    viewBox="0 0 16 16"
    fill="none"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 8.85714V14.14286M8 8.85714L13.14286 8.85714C14.03377 8.85714 14.47993 7.78 13.84997 7.15003L12.90022 6.20028C12.33761 5.63767 12.02155 4.87461 12.02155 4.07896V2.78571C12.02155 2.23342 11.57384 1.78571 11.02155 1.78571L4.97845 1.78571C4.42616 1.78571 3.97845 2.23342 3.97845 2.78571L3.97845 4.07896C3.97845 4.87461 3.66238 5.63767 3.09977 6.20028L2.15002 7.15003C1.52006 7.78 1.96622 8.85714 2.85713 8.85714H8Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgPinned;


================================================
FILE: web/lib/opal/src/icons/play-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPlayCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M6.08333 4.75L10.0833 7.41667L6.08333 10.0833V4.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgPlayCircle;


================================================
FILE: web/lib/opal/src/icons/plug.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPlug = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12 10.5H15M12 10.5V12.5M12 10.5V5.5M12 3.5H8.5C6.01472 3.5 4 5.51472 4 8M12 3.5V5.5M12 3.5V2M12 12.5H8.5C6.01472 12.5 4 10.4853 4 8M12 12.5V14M4 8H1M12 5.5H15"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgPlug;


================================================
FILE: web/lib/opal/src/icons/plus-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPlusCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2625)">
      <path
        d="M7.99999 5.33333V10.6667M5.33333 7.99999H10.6667M14.6667 7.99999C14.6667 11.6819 11.6819 14.6667 7.99999 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 7.99999C1.33333 4.3181 4.3181 1.33333 7.99999 1.33333C11.6819 1.33333 14.6667 4.3181 14.6667 7.99999Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2625">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgPlusCircle;


================================================
FILE: web/lib/opal/src/icons/plus.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgPlus = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    xmlns="http://www.w3.org/2000/svg"
    viewBox="0 0 16 16"
    fill="none"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8 2V14M2 8H14"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgPlus;


================================================
FILE: web/lib/opal/src/icons/progress-bars.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgProgressBars = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5.5 2.00003L13.25 2C13.9403 2 14.5 2.55964 14.5 3.25C14.5 3.94036 13.9403 4.5 13.25 4.5L5.5 4.50003M5.5 2.00003L2.74998 2C2.05963 2 1.49998 2.55964 1.49998 3.25C1.49998 3.94036 2.05963 4.5 2.74998 4.5L5.5 4.50003M5.5 2.00003V4.50003M10.5 11.5H13.25C13.9403 11.5 14.5 12.0596 14.5 12.75C14.5 13.4404 13.9403 14 13.25 14H10.5M10.5 11.5H2.74998C2.05963 11.5 1.49998 12.0596 1.49998 12.75C1.49998 13.4404 2.05963 14 2.74999 14H10.5M10.5 11.5V14M8 6.75H13.25C13.9403 6.75 14.5 7.30964 14.5 8C14.5 8.69036 13.9403 9.25 13.25 9.25H8M8 6.75H2.74998C2.05963 6.75 1.49998 7.30964 1.49998 8C1.49998 8.69036 2.05963 9.25 2.74998 9.25H8M8 6.75V9.25"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgProgressBars;


================================================
FILE: web/lib/opal/src/icons/progress-circle.tsx
================================================
import { cn } from "@/lib/utils";
import SvgCheckCircle from "@opal/icons/check-circle";
import type { IconProps } from "@opal/types";

export interface SvgProgressCircleProps extends IconProps {
  value?: number;
}

const SvgProgressCircle = ({
  value = 100,
  className,
  ...props
}: SvgProgressCircleProps) => {
  // Clamp value between 0 and 100
  const progress = Math.min(Math.max(value, 0), 100);
  const isComplete = progress >= 100;

  // Calculate circumference for circular progress
  // For a stroke to fill from center to radius R, we need:
  // - Circle at radius R/2 with strokeWidth R
  // This way stroke extends from 0 to R (R/2 - R/2 to R/2 + R/2)
  const maxRadius = 5; // Maximum inner circle radius
  const strokeRadius = maxRadius / 2; // Position circle at half the desired radius
  const strokeWidth = maxRadius; // Stroke width equals max radius
  const circumference = 2 * Math.PI * strokeRadius;
  // Calculate how much of the circle to show (inverted for clockwise from top)
  const offset = circumference - (progress / 100) * circumference;

  return (
    <div className={className}>
      {isComplete ? (
        <SvgCheckCircle
          className={cn(className, "!stroke-status-success-05")}
          {...props}
        />
      ) : (
        <svg
          width="16"
          height="16"
          viewBox="0 0 16 16"
          fill="none"
          xmlns="http://www.w3.org/2000/svg"
        >
          {/* Outer circle - outline only */}
          <circle
            cx="8"
            cy="8"
            r="7"
            stroke="currentColor"
            strokeWidth="1.5"
            fill="none"
            className="text-border-medium"
          />

          {/* Inner circle progress - fills like a pie using thick stroke */}
          <circle
            cx="8"
            cy="8"
            r={strokeRadius}
            stroke="currentColor"
            strokeWidth={strokeWidth}
            fill="none"
            strokeDasharray={circumference}
            strokeDashoffset={offset}
            className="-rotate-90 origin-center"
            style={{
              transformOrigin: "center",
            }}
          />
        </svg>
      )}
    </div>
  );
};

export default SvgProgressCircle;


================================================
FILE: web/lib/opal/src/icons/question-mark-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgQuestionMarkSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6.06 5.99995C6.21673 5.5544 6.5261 5.17869 6.9333 4.93937C7.3405 4.70006 7.81926 4.61258 8.28478 4.69243C8.7503 4.77228 9.17254 5.0143 9.47672 5.37564C9.78089 5.73697 9.94737 6.1943 9.94666 6.66662C9.94666 7.99995 7.94666 8.66662 7.94666 8.66662M8 11.3333H8.00666"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgQuestionMarkSmall;


================================================
FILE: web/lib/opal/src/icons/quote-end.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgQuoteEnd = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 22 18"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.344 10.0627C9.344 15.6947 5.824 18.0627 1.10262e-10 17.9987L2.91054e-07 14.6707C3.712 14.4787 4.8 12.9427 4.8 10.5747L4.8 9.67874L0.512 9.67874L0.512001 -1.87854e-06L9.344 -1.10642e-06L9.344 10.0627ZM22 0L22 10.0627C22 15.6947 18.416 18.0627 12.592 17.9987L12.592 14.6707C16.304 14.4787 17.392 12.9427 17.392 10.5747L17.392 9.67874L13.104 9.67874L13.104 -7.77713e-07L22 0Z"
      fill="#E6E6E9"
    />
  </svg>
);
export default SvgQuoteEnd;


================================================
FILE: web/lib/opal/src/icons/quote-start.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgQuoteStart = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 22 18"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12.656 7.93726C12.656 2.30526 16.176 -0.0627379 22 0.00126124V3.32926C18.288 3.52126 17.2 5.05726 17.2 7.42526V8.32126H21.488V18H12.656V7.93726ZM0 18V7.93726C0 2.30526 3.584 -0.0627379 9.408 0.00126124V3.32926C5.696 3.52126 4.608 5.05726 4.608 7.42526V8.32126H8.896V18H0Z"
      fill="#E6E6E9"
    />
  </svg>
);
export default SvgQuoteStart;


================================================
FILE: web/lib/opal/src/icons/refresh-cw.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgRefreshCw = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14.448 3.10983V6.77746M14.448 6.77746H10.7803M14.448 6.77746L11.6117 4.11231C10.9547 3.45502 10.142 2.97486 9.24923 2.71664C8.35651 2.45842 7.41292 2.43055 6.50651 2.63564C5.6001 2.84072 4.76042 3.27208 4.06581 3.88945C3.3712 4.50683 2.84431 5.2901 2.53429 6.16618M1 12.8902V9.22254M1 9.22254H4.66763M1 9.22254L3.8363 11.8877C4.49326 12.545 5.30603 13.0251 6.19875 13.2834C7.09147 13.5416 8.03506 13.5694 8.94147 13.3644C9.84787 13.1593 10.6876 12.7279 11.3822 12.1105C12.0768 11.4932 12.6037 10.7099 12.9137 9.83381"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgRefreshCw;


================================================
FILE: web/lib/opal/src/icons/revert.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgRevert = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1.33333 2V6M1.33333 6H5.33333M1.33333 6L4.00432 3.33333C5.05887 2.27806 6.50634 1.66667 8.06318 1.66667C11.2745 1.66667 13.8799 4.27203 13.8799 7.48333C13.8799 10.6946 11.2745 13.3 8.06318 13.3C5.52018 13.3 3.35026 11.6635 2.54132 9.38632"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgRevert;


================================================
FILE: web/lib/opal/src/icons/search-menu.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSearchMenu = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1.00261 7.5H2.5M1 4H3.25M1.00261 11H3.25M15 13L12.682 10.682M12.682 10.682C13.4963 9.86764 14 8.74264 14 7.5C14 5.01472 11.9853 3 9.49999 3C7.01472 3 5 5.01472 5 7.5C5 9.98528 7.01472 12 9.49999 12C10.7426 12 11.8676 11.4963 12.682 10.682Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgSearchMenu;


================================================
FILE: web/lib/opal/src/icons/search-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgSearchSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.69454 9.69454C10.7685 8.6206 10.7685 6.8794 9.69454 5.80546C8.6206 4.73151 6.8794 4.73151 5.80546 5.80546C4.73151 6.8794 4.73151 8.6206 5.80546 9.69454C6.8794 10.7685 8.6206 10.7685 9.69454 9.69454ZM9.69454 9.69454L11 11"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgSearchSmall;


================================================
FILE: web/lib/opal/src/icons/search.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSearch = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14 14L11.1 11.1M12.6667 7.33333C12.6667 10.2789 10.2789 12.6667 7.33333 12.6667C4.38781 12.6667 2 10.2789 2 7.33333C2 4.38781 4.38781 2 7.33333 2C10.2789 2 12.6667 4.38781 12.6667 7.33333Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgSearch;


================================================
FILE: web/lib/opal/src/icons/server.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgServer = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_170_22)">
      <path
        d="M3.99999 4.00001H4.00666M3.99999 12H4.00666M2.66666 1.33334H13.3333C14.0697 1.33334 14.6667 1.9303 14.6667 2.66668V5.33334C14.6667 6.06972 14.0697 6.66668 13.3333 6.66668H2.66666C1.93028 6.66668 1.33333 6.06972 1.33333 5.33334V2.66668C1.33333 1.9303 1.93028 1.33334 2.66666 1.33334ZM2.66666 9.33334H13.3333C14.0697 9.33334 14.6667 9.9303 14.6667 10.6667V13.3333C14.6667 14.0697 14.0697 14.6667 13.3333 14.6667H2.66666C1.93028 14.6667 1.33333 14.0697 1.33333 13.3333V10.6667C1.33333 9.9303 1.93028 9.33334 2.66666 9.33334Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_170_22">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgServer;


================================================
FILE: web/lib/opal/src/icons/settings.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSettings = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2873)">
      <path
        d="M8.00001 9.99999C9.10457 9.99999 10 9.10456 10 7.99999C10 6.89542 9.10457 5.99999 8.00001 5.99999C6.89544 5.99999 6.00001 6.89542 6.00001 7.99999C6.00001 9.10456 6.89544 9.99999 8.00001 9.99999Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M12.9333 9.99999C12.8446 10.2011 12.8181 10.4241 12.8573 10.6404C12.8965 10.8566 12.9996 11.0562 13.1533 11.2133L13.1933 11.2533C13.3173 11.3772 13.4157 11.5242 13.4828 11.6861C13.5499 11.8479 13.5844 12.0214 13.5844 12.1967C13.5844 12.3719 13.5499 12.5454 13.4828 12.7072C13.4157 12.8691 13.3173 13.0162 13.1933 13.14C13.0695 13.264 12.9225 13.3623 12.7606 13.4294C12.5987 13.4965 12.4252 13.531 12.25 13.531C12.0748 13.531 11.9013 13.4965 11.7394 13.4294C11.5776 13.3623 11.4305 13.264 11.3067 13.14L11.2667 13.1C11.1096 12.9463 10.91 12.8432 10.6937 12.804C10.4775 12.7648 10.2544 12.7912 10.0533 12.88C9.85616 12.9645 9.68799 13.1048 9.56954 13.2837C9.45109 13.4625 9.38753 13.6721 9.38667 13.8867V14C9.38667 14.3536 9.2462 14.6928 8.99615 14.9428C8.7461 15.1928 8.40696 15.3333 8.05334 15.3333C7.69972 15.3333 7.36058 15.1928 7.11053 14.9428C6.86048 14.6928 6.72001 14.3536 6.72001 14V13.94C6.71484 13.7193 6.64342 13.5053 6.51501 13.3258C6.38661 13.1463 6.20716 13.0095 6.00001 12.9333C5.79893 12.8446 5.57588 12.8181 5.35961 12.8573C5.14335 12.8965 4.94379 12.9996 4.78667 13.1533L4.74667 13.1933C4.62284 13.3173 4.47579 13.4156 4.31393 13.4827C4.15206 13.5498 3.97856 13.5844 3.80334 13.5844C3.62812 13.5844 3.45462 13.5498 3.29275 13.4827C3.13089 13.4156 2.98384 13.3173 2.86001 13.1933C2.73604 13.0695 2.63769 12.9224 2.57059 12.7606C2.50349 12.5987 2.46896 12.4252 2.46896 12.25C2.46896 12.0748 2.50349 11.9013 2.57059 11.7394C2.63769 11.5775 2.73604 11.4305 2.86001 11.3067L2.90001 11.2667C3.0537 11.1095 3.1568 10.91 3.19601 10.6937C3.23522 10.4775 3.20875 10.2544 3.12001 10.0533C3.0355 9.85614 2.89518 9.68798 2.71632 9.56953C2.53746 9.45108 2.32786 9.38751 2.11334 9.38666H2.00001C1.64638 9.38666 1.30724 9.24618 1.0572 8.99613C0.807148 8.74608 0.666672 8.40695 0.666672 8.05332C0.666672 7.6997 0.807148 7.36056 1.0572 7.11051C1.30724 6.86047 1.64638 6.71999 2.00001 6.71999H2.06001C2.28067 6.71483 2.49467 6.6434 2.6742 6.515C2.85373 6.38659 2.99048 6.20715 3.06667 5.99999C3.15542 5.79891 3.18189 5.57586 3.14267 5.3596C3.10346 5.14333 3.00036 4.94378 2.84667 4.78666L2.80667 4.74666C2.6827 4.62283 2.58436 4.47577 2.51726 4.31391C2.45016 4.15205 2.41562 3.97854 2.41562 3.80332C2.41562 3.6281 2.45016 3.4546 2.51726 3.29274C2.58436 3.13087 2.6827 2.98382 2.80667 2.85999C2.9305 2.73602 3.07755 2.63768 3.23942 2.57058C3.40128 2.50348 3.57478 2.46894 3.75001 2.46894C3.92523 2.46894 4.09873 2.50348 4.26059 2.57058C4.42246 2.63768 4.56951 2.73602 4.69334 2.85999L4.73334 2.89999C4.89046 3.05368 5.09002 3.15678 5.30628 3.19599C5.52254 3.23521 5.74559 3.20873 5.94667 3.11999H6.00001C6.19718 3.03548 6.36535 2.89516 6.4838 2.7163C6.60225 2.53744 6.66582 2.32785 6.66667 2.11332V1.99999C6.66667 1.64637 6.80715 1.30723 7.0572 1.05718C7.30725 0.807132 7.64638 0.666656 8.00001 0.666656C8.35363 0.666656 8.69277 0.807132 8.94281 1.05718C9.19286 1.30723 9.33334 1.64637 9.33334 1.99999V2.05999C9.33419 2.27451 9.39776 2.48411 9.51621 2.66297C9.63466 2.84183 9.80283 2.98215 10 3.06666C10.2011 3.1554 10.4241 3.18187 10.6404 3.14266C10.8567 3.10345 11.0562 3.00035 11.2133 2.84666L11.2533 2.80666C11.3772 2.68269 11.5242 2.58434 11.6861 2.51724C11.8479 2.45014 12.0215 2.41561 12.1967 2.41561C12.3719 2.41561 12.5454 2.45014 12.7073 2.51724C12.8691 2.58434 13.0162 2.68269 13.14 2.80666C13.264 2.93049 13.3623 3.07754 13.4294 3.2394C13.4965 3.40127 13.5311 3.57477 13.5311 3.74999C13.5311 3.92521 13.4965 4.09871 13.4294 4.26058C13.3623 4.42244 13.264 4.56949 13.14 4.69332L13.1 4.73332C12.9463 4.89044 12.8432 5.09 12.804 5.30626C12.7648 5.52253 12.7913 5.74558 12.88 5.94666V5.99999C12.9645 6.19717 13.1048 6.36533 13.2837 6.48379C13.4626 6.60224 13.6721 6.6658 13.8867 6.66666H14C14.3536 6.66666 14.6928 6.80713 14.9428 7.05718C15.1929 7.30723 15.3333 7.64637 15.3333 7.99999C15.3333 8.35361 15.1929 8.69275 14.9428 8.9428C14.6928 9.19285 14.3536 9.33332 14 9.33332H13.94C13.7255 9.33418 13.5159 9.39774 13.337 9.5162C13.1582 9.63465 13.0178 9.80281 12.9333 9.99999Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2873">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgSettings;


================================================
FILE: web/lib/opal/src/icons/share-webhook.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgShareWebhook = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M10.0002 4C10.0002 3.99708 10.0002 3.99415 10.0001 3.99123C9.99542 2.8907 9.10181 2 8.00016 2C6.89559 2 6.00016 2.89543 6.00016 4C6.00016 4.73701 6.39882 5.38092 6.99226 5.72784L4.67276 9.70412M11.6589 13.7278C11.9549 13.9009 12.2993 14 12.6668 14C13.7714 14 14.6668 13.1046 14.6668 12C14.6668 10.8954 13.7714 10 12.6668 10C12.2993 10 11.9549 10.0991 11.6589 10.2722L9.33943 6.29588M2.33316 10.2678C1.73555 10.6136 1.3335 11.2599 1.3335 12C1.3335 13.1046 2.22893 14 3.3335 14C4.43807 14 5.3335 13.1046 5.3335 12H10.0002"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgShareWebhook;


================================================
FILE: web/lib/opal/src/icons/share.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgShare = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2.66667 8.00001V13.3333C2.66667 13.687 2.80715 14.0261 3.0572 14.2762C3.30724 14.5262 3.64638 14.6667 4.00001 14.6667H12C12.3536 14.6667 12.6928 14.5262 12.9428 14.2762C13.1929 14.0261 13.3333 13.687 13.3333 13.3333V8.00001M10.6667 4.00001L8.00001 1.33334M8.00001 1.33334L5.33334 4.00001M8.00001 1.33334V10"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgShare;


================================================
FILE: web/lib/opal/src/icons/shield.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgShield = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M8.00001 14.6667C8.00001 14.6667 13.3333 12 13.3333 8.00001V3.33334L8.00001 1.33334L2.66667 3.33334V8.00001C2.66667 12 8.00001 14.6667 8.00001 14.6667Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgShield;


================================================
FILE: web/lib/opal/src/icons/sidebar.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSidebar = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6 2V14M3.33333 2H12.6667C13.403 2 14 2.59695 14 3.33333V12.6667C14 13.403 13.403 14 12.6667 14H3.33333C2.59695 14 2 13.403 2 12.6667V3.33333C2 2.59695 2.59695 2 3.33333 2Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgSidebar;


================================================
FILE: web/lib/opal/src/icons/slack.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSlack = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_259_269)">
      <path
        d="M9.66666 6.66665C9.11333 6.66665 8.66666 6.21998 8.66666 5.66665V2.33331C8.66666 1.77998 9.11333 1.33331 9.66666 1.33331C10.22 1.33331 10.6667 1.77998 10.6667 2.33331V5.66665C10.6667 6.21998 10.22 6.66665 9.66666 6.66665Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M13.6667 6.66665H12.6667V5.66665C12.6667 5.11331 13.1133 4.66665 13.6667 4.66665C14.22 4.66665 14.6667 5.11331 14.6667 5.66665C14.6667 6.21998 14.22 6.66665 13.6667 6.66665Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M6.33333 9.33331C6.88666 9.33331 7.33333 9.77998 7.33333 10.3333V13.6666C7.33333 14.22 6.88666 14.6666 6.33333 14.6666C5.78 14.6666 5.33333 14.22 5.33333 13.6666V10.3333C5.33333 9.77998 5.78 9.33331 6.33333 9.33331Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M2.33333 9.33331H3.33333V10.3333C3.33333 10.8866 2.88666 11.3333 2.33333 11.3333C1.77999 11.3333 1.33333 10.8866 1.33333 10.3333C1.33333 9.77998 1.77999 9.33331 2.33333 9.33331Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M9.33333 9.66665C9.33333 9.11331 9.78 8.66665 10.3333 8.66665H13.6667C14.22 8.66665 14.6667 9.11331 14.6667 9.66665C14.6667 10.22 14.22 10.6666 13.6667 10.6666H10.3333C9.78 10.6666 9.33333 10.22 9.33333 9.66665Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M10.3333 12.6666H9.33333V13.6666C9.33333 14.22 9.78 14.6666 10.3333 14.6666C10.8867 14.6666 11.3333 14.22 11.3333 13.6666C11.3333 13.1133 10.8867 12.6666 10.3333 12.6666Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M6.66666 6.33331C6.66666 5.77998 6.22 5.33331 5.66666 5.33331H2.33333C1.77999 5.33331 1.33333 5.77998 1.33333 6.33331C1.33333 6.88665 1.77999 7.33331 2.33333 7.33331H5.66666C6.22 7.33331 6.66666 6.88665 6.66666 6.33331Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M5.66666 3.33331H6.66666V2.33331C6.66666 1.77998 6.22 1.33331 5.66666 1.33331C5.11333 1.33331 4.66666 1.77998 4.66666 2.33331C4.66666 2.88665 5.11333 3.33331 5.66666 3.33331Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_259_269">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgSlack;


================================================
FILE: web/lib/opal/src/icons/slash.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSlash = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_slash)">
      <path
        d="M14.6667 7.99999C14.6667 11.6819 11.6819 14.6667 7.99999 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 7.99999C1.33333 4.3181 4.3181 1.33333 7.99999 1.33333C11.6819 1.33333 14.6667 4.3181 14.6667 7.99999Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
      <path
        d="M3.5 3.5L12.5 12.5"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
  </svg>
);
export default SvgSlash;


================================================
FILE: web/lib/opal/src/icons/sliders-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgSlidersSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6 11V8.75M6 6.75V5M6 6.75H4.75M6 6.75H7.25M10 11V9.25M10 9.25H8.75M10 9.25H11.25M10 7.25V5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgSlidersSmall;


================================================
FILE: web/lib/opal/src/icons/sliders.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSliders = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2627)">
      <path
        d="M2.66666 14V9.33333M2.66666 6.66667V2M7.99999 14V8M7.99999 5.33333V2M13.3333 14V10.6667M13.3333 8V2M0.666656 9.33333H4.66666M5.99999 5.33333H9.99999M11.3333 10.6667H15.3333"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2627">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgSliders;


================================================
FILE: web/lib/opal/src/icons/sort-order.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSortOrder = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2.66675 12L7.67009 12.0001M2.66675 8H10.5001M2.66675 4H13.3334"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgSortOrder;


================================================
FILE: web/lib/opal/src/icons/sort.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSort = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2 4.5H10M2 8H7M2 11.5H5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M12 5V12M12 12L14 10M12 12L10 10"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgSort;


================================================
FILE: web/lib/opal/src/icons/sparkle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSparkle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1.5 8C5.11111 6.91667 6.91667 5.11111 8 1.5C9.08333 5.11111 10.8889 6.91667 14.5 8C10.8889 9.08333 9.08333 10.8889 8 14.5C6.91667 10.8889 5.11111 9.08333 1.5 8Z"
      strokeWidth={1.5}
      strokeLinecap="square"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgSparkle;


================================================
FILE: web/lib/opal/src/icons/star-off.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgStarOff = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1 1L5.56196 5.56196M15 15L5.56196 5.56196M5.56196 5.56196L1.33333 6.18004L4.66666 9.42671L3.88 14.0134L8 11.8467L12.12 14.0134L11.7267 11.72M12.1405 8.64051L14.6667 6.18004L10.06 5.50671L8 1.33337L6.95349 3.45349"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgStarOff;


================================================
FILE: web/lib/opal/src/icons/star.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgStar = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.99999 1.33331L10.06 5.50665L14.6667 6.17998L11.3333 9.42665L12.12 14.0133L7.99999 11.8466L3.87999 14.0133L4.66666 9.42665L1.33333 6.17998L5.93999 5.50665L7.99999 1.33331Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgStar;


================================================
FILE: web/lib/opal/src/icons/step1.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgStep1 = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.41989 7.42018L11.7505 4.92023C10.8858 3.42605 9.27082 2.42116 7.42035 2.42106L7.41989 7.42018Z"
      fill="currentColor"
    />
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M7.4198 7.42V2.42C9.14596 2.42161 10.8242 3.31822 11.7494 4.92083L7.4198 7.42Z"
      fill="currentColor"
    />
    <path
      d="M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgStep1;


================================================
FILE: web/lib/opal/src/icons/step2.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgStep2 = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.4198 7.42L11.7501 9.92041C12.6118 8.42453 12.6745 6.52344 11.7494 4.92083L7.4198 7.42Z"
      fill="currentColor"
    />
    <path
      d="M7.4198 2.42L7.4198 7.42L11.7494 4.92083C10.8242 3.31822 9.14596 2.42161 7.4198 2.42Z"
      fill="currentColor"
    />
    <path
      d="M7.4198 7.42L11.7501 9.92041C12.6118 8.42453 12.6745 6.52344 11.7494 4.92083L7.4198 7.42Z"
      fill="currentColor"
    />
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M7.4198 7.42V2.42C9.14596 2.42161 10.8242 3.31822 11.7494 4.92083L7.4198 7.42Z"
      fill="currentColor"
    />
    <path
      d="M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgStep2;


================================================
FILE: web/lib/opal/src/icons/step3-end.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgStep3End = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.42012 7.42018L7.42039 2.41981C5.69408 2.42152 4.01631 3.31772 3.09099 4.92023L7.42012 7.42018Z"
      fill="currentColor"
    />
    <path
      d="M3.09 9.92018L7.42012 7.42018L3.09099 4.92023C2.16566 6.52274 2.22832 8.42448 3.09 9.92018Z"
      fill="currentColor"
    />
    <path
      d="M7.42012 7.41982L3.08954 9.91977C3.95417 11.4139 5.56919 12.4188 7.41967 12.4189L7.42012 7.41982Z"
      fill="currentColor"
    />
    <path
      d="M11.7502 9.91982L7.42012 7.41982L7.41967 12.4189C9.27014 12.419 10.8858 11.4139 11.7502 9.91982Z"
      fill="currentColor"
    />
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M7.4198 7.42V2.42C9.14596 2.42161 10.8242 3.31822 11.7494 4.92083L7.4198 7.42Z"
      fill="currentColor"
    />
    <path
      d="M7.4198 7.42L11.7501 9.92041C12.6118 8.42453 12.6745 6.52344 11.7494 4.92083L7.4198 7.42Z"
      fill="currentColor"
    />
    <path
      d="M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgStep3End;


================================================
FILE: web/lib/opal/src/icons/step3.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgStep3 = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.42012 7.41982L3.08954 9.91977C3.95417 11.4139 5.56919 12.4188 7.41967 12.4189L7.42012 7.41982Z"
      fill="currentColor"
    />
    <path
      d="M11.7502 9.91982L7.42012 7.41982L7.41967 12.4189C9.27014 12.419 10.8858 11.4139 11.7502 9.91982Z"
      fill="currentColor"
    />
    <path
      d="M7.42012 7.41982L3.08954 9.91977C3.95417 11.4139 5.56919 12.4188 7.41967 12.4189L7.42012 7.41982Z"
      fill="currentColor"
    />
    <path
      d="M11.7502 9.91982L7.42012 7.41982L7.41967 12.4189C9.27014 12.419 10.8858 11.4139 11.7502 9.91982Z"
      fill="currentColor"
    />
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M7.4198 7.42V2.42C9.14596 2.42161 10.8242 3.31822 11.7494 4.92083L7.4198 7.42Z"
      fill="currentColor"
    />
    <path
      d="M7.4198 7.42L11.7501 9.92041C12.6118 8.42453 12.6745 6.52344 11.7494 4.92083L7.4198 7.42Z"
      fill="currentColor"
    />
    <path
      d="M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgStep3;


================================================
FILE: web/lib/opal/src/icons/stop-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgStopCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M9.41667 5.41667H5.41667V9.41667H9.41667V5.41667Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgStopCircle;


================================================
FILE: web/lib/opal/src/icons/stop.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgStop = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12 4H4V12H12V4Z"
      strokeWidth={1.5}
      fill="var(--background-tint-00)"
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgStop;


================================================
FILE: web/lib/opal/src/icons/sun.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgSun = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_2458_12738)">
      <path
        d="M8 1L8 2.5M8 13.5V15M3.04909 3.04909L4.11091 4.11091M11.8891 11.8891L12.9509 12.9509M1 8L2.5 8M13.5 8L15 8M3.04909 12.9509L4.11091 11.8891M11.8891 4.11091L12.9509 3.04909M11 8C11 9.65685 9.65685 11 8 11C6.34315 11 5 9.65685 5 8C5 6.34315 6.34315 5 8 5C9.65685 5 11 6.34315 11 8Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_2458_12738">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgSun;


================================================
FILE: web/lib/opal/src/icons/tag.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgTag = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M4.66666 4.66668H4.67333M13.7267 8.94001L8.94666 13.72C8.82283 13.844 8.67578 13.9423 8.51392 14.0094C8.35205 14.0765 8.17855 14.1111 8.00333 14.1111C7.82811 14.1111 7.65461 14.0765 7.49274 14.0094C7.33088 13.9423 7.18383 13.844 7.05999 13.72L1.33333 8.00001V1.33334H7.99999L13.7267 7.06001C13.975 7.30983 14.1144 7.64776 14.1144 8.00001C14.1144 8.35226 13.975 8.69019 13.7267 8.94001Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgTag;


================================================
FILE: web/lib/opal/src/icons/terminal-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgTerminalSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5.5 10L7.5 8L5.5 6M8.5 10.5H10.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgTerminalSmall;


================================================
FILE: web/lib/opal/src/icons/terminal.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgTerminal = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2.66667 11.3333L6.66667 7.33331L2.66667 3.33331M8.00001 12.6666H13.3333"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgTerminal;


================================================
FILE: web/lib/opal/src/icons/text-lines-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgTextLinesSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M5 10.5H8.5M5 8H11M5 5.5H11"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgTextLinesSmall;


================================================
FILE: web/lib/opal/src/icons/text-lines.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgTextLines = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 18 18"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M15.75 7.4925H2.25M15.75 4.5H2.25M9 13.5H2.25M15.75 10.4962H2.25"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgTextLines;


================================================
FILE: web/lib/opal/src/icons/thumbs-down.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgThumbsDown = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2611)">
      <path
        d="M11.3333 8.66667L8.66666 14.6667C8.13622 14.6667 7.62752 14.456 7.25244 14.0809C6.87737 13.7058 6.66666 13.1971 6.66666 12.6667V10H2.89332C2.70005 10.0022 2.50861 9.96234 2.33228 9.8832C2.15594 9.80405 1.99891 9.68752 1.87208 9.54166C1.74525 9.39581 1.65165 9.22413 1.59776 9.03851C1.54387 8.85289 1.53098 8.65777 1.55999 8.46667L2.47999 2.46667C2.52821 2.14874 2.6897 1.85894 2.93472 1.65067C3.17974 1.4424 3.49177 1.32971 3.81332 1.33334H11.3333M11.3333 8.66667V1.33334M11.3333 8.66667H13.1133C13.4906 8.67335 13.8573 8.54125 14.1436 8.29546C14.4299 8.04967 14.6161 7.7073 14.6667 7.33334V2.66667C14.6161 2.29271 14.4299 1.95034 14.1436 1.70455C13.8573 1.45876 13.4906 1.32667 13.1133 1.33334H11.3333"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2611">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgThumbsDown;


================================================
FILE: web/lib/opal/src/icons/thumbs-up.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgThumbsUp = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2609)">
      <path
        d="M4.66666 7.33333L7.33333 1.33333C7.86376 1.33333 8.37247 1.54404 8.74754 1.91911C9.12262 2.29419 9.33333 2.8029 9.33333 3.33333V6H13.1067C13.2999 5.99781 13.4914 6.03766 13.6677 6.11681C13.844 6.19595 14.0011 6.31248 14.1279 6.45834C14.2547 6.60419 14.3483 6.77588 14.4022 6.9615C14.4561 7.14712 14.469 7.34223 14.44 7.53333L13.52 13.5333C13.4718 13.8513 13.3103 14.1411 13.0653 14.3493C12.8202 14.5576 12.5082 14.6703 12.1867 14.6667H4.66666M4.66666 7.33333V14.6667M4.66666 7.33333H2.66666C2.31304 7.33333 1.9739 7.4738 1.72385 7.72385C1.4738 7.9739 1.33333 8.31304 1.33333 8.66666V13.3333C1.33333 13.687 1.4738 14.0261 1.72385 14.2761C1.9739 14.5262 2.31304 14.6667 2.66666 14.6667H4.66666"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2609">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgThumbsUp;


================================================
FILE: web/lib/opal/src/icons/trash.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgTrash = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2 3.99998H3.33333M3.33333 3.99998H14M3.33333 3.99998V13.3333C3.33333 13.6869 3.47381 14.0261 3.72386 14.2761C3.97391 14.5262 4.31304 14.6666 4.66667 14.6666H11.3333C11.687 14.6666 12.0261 14.5262 12.2761 14.2761C12.5262 14.0261 12.6667 13.6869 12.6667 13.3333V3.99998M5.33333 3.99998V2.66665C5.33333 2.31302 5.47381 1.97389 5.72386 1.72384C5.97391 1.47379 6.31304 1.33331 6.66667 1.33331H9.33333C9.68696 1.33331 10.0261 1.47379 10.2761 1.72384C10.5262 1.97389 10.6667 2.31302 10.6667 2.66665V3.99998M6.66667 7.33331V11.3333M9.33333 7.33331V11.3333"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgTrash;


================================================
FILE: web/lib/opal/src/icons/two-line-small.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgTwoLineSmall = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M6 6.50002V9.50002M10 6.50002V9.50002"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgTwoLineSmall;


================================================
FILE: web/lib/opal/src/icons/unplug.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgUnplug = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_454_2471)">
      <path
        d="M1 1L5.0778 5.0778M15 15L12 12M15 10.5H14M12 12.5H8.5C6.01472 12.5 4 10.4853 4 8M12 12.5V14M12 12.5V12M12 3.5H8.5C8.04537 3.5 7.60649 3.56742 7.1928 3.6928M12 3.5V5.5M12 3.5V2M12 5.5H15M12 5.5V8.5M4 8H1M4 8C4 6.88463 4.40579 5.86403 5.0778 5.0778M5.0778 5.0778L12 12"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_454_2471">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgUnplug;


================================================
FILE: web/lib/opal/src/icons/upload-cloud.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgUploadCloud = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M10.4167 9.41502L7.75 6.74835M7.75 6.74835L5.08333 9.41502M7.75 6.74835V12.7484M13.0913 10.5412C13.712 10.2028 14.2022 9.66745 14.4848 9.01952C14.7673 8.3716 14.826 7.64804 14.6517 6.96303C14.4773 6.27802 14.0798 5.67058 13.5219 5.23658C12.964 4.80258 12.2774 4.56673 11.5706 4.56627H10.7688C10.5762 3.82131 10.2173 3.1297 9.71889 2.54345C9.22053 1.95719 8.59575 1.49155 7.89152 1.18151C7.1873 0.871475 6.42195 0.725121 5.65301 0.753452C4.88408 0.781783 4.13158 0.98406 3.45207 1.34508C2.77257 1.70609 2.18375 2.21646 1.72988 2.8378C1.27601 3.45913 0.968902 4.17528 0.831645 4.93239C0.694388 5.6895 0.730552 6.46788 0.93742 7.209C1.14429 7.95013 1.51648 8.63471 2.026 9.21129"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgUploadCloud;


================================================
FILE: web/lib/opal/src/icons/user-check.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUserCheck = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11 14C11 13.6667 11 13.3333 11 13C11 11.3431 9.65684 10 7.99998 10H4.00002C2.34316 10 1 11.3431 1 13C1 13.3333 1 13.6667 1 14M10.75 7.49999L12.25 9L15 6.24999M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUserCheck;


================================================
FILE: web/lib/opal/src/icons/user-edit.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUserEdit = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H7M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75ZM12.09 8.41421C12.3552 8.149 12.7149 8 13.09 8C13.2757 8 13.4596 8.03658 13.6312 8.10765C13.8028 8.17872 13.9587 8.28289 14.09 8.41421C14.2213 8.54554 14.3255 8.70144 14.3966 8.87302C14.4676 9.0446 14.5042 9.2285 14.5042 9.41421C14.5042 9.59993 14.4676 9.78383 14.3966 9.95541C14.3255 10.127 14.2213 10.2829 14.09 10.4142L10.6667 13.8333L8 14.5L8.66667 11.8333L12.09 8.41421Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUserEdit;


================================================
FILE: web/lib/opal/src/icons/user-key.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUserKey = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H8.5M12.625 10C13.6605 10 14.5 9.16053 14.5 8.125C14.5 7.08947 13.6605 6.25 12.625 6.25C11.5895 6.25 10.75 7.08947 10.75 8.125C10.75 9.16053 11.5895 10 12.625 10ZM12.625 10V12.25M12.625 14.5V13.5M12.625 13.5H13.875V12.25H12.625M12.625 13.5V12.25M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUserKey;


================================================
FILE: web/lib/opal/src/icons/user-manage.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgUserManage = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M0.75 12.75C0.75 12.4167 0.75 12.0833 0.75 11.75C0.750002 10.0931 2.09316 8.75 3.75002 8.75L5.75 8.75M12.25 11.25L13.2981 12.2981M12.25 11.25C12.5916 10.9084 12.7499 10.4481 12.75 10.0004M12.25 11.25C11.9083 11.5917 11.4479 11.75 11 11.75M9.75 11.25L8.7019 12.2981M9.75 11.25C10.0917 11.5917 10.5521 11.75 11 11.75M9.75 11.25C9.4084 10.9084 9.25011 10.4481 9.25 10.0004M9.75 8.75L8.7019 7.70193M9.75 8.75C10.0917 8.40829 10.5521 8.25 11 8.25M9.75 8.75C9.40818 9.09182 9.24989 9.55242 9.25 10.0004M12.25 8.75L13.2981 7.70193M12.25 8.75C12.5918 9.09182 12.7501 9.55242 12.75 10.0004M12.25 8.75C11.9083 8.40829 11.4479 8.25 11 8.25M12.75 10.0004L14.25 10M11 13.25V11.75M11 6.75V8.25M7.75 10L9.25 10.0004M8.5 3.5C8.5 5.01878 7.26878 6.25 5.75 6.25C4.23122 6.25 3 5.01878 3 3.5C3 1.98122 4.23122 0.75 5.75 0.75C7.26878 0.75 8.5 1.98122 8.5 3.5Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUserManage;


================================================
FILE: web/lib/opal/src/icons/user-minus.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUserMinus = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11 14C11 13.6667 11 13.3333 11 13C11 11.3431 9.65684 10 7.99998 10H4.00002C2.34316 10 1 11.3431 1 13C1 13.3333 1 13.6667 1 14M10.75 7.49999L14.75 7.50007M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUserMinus;


================================================
FILE: web/lib/opal/src/icons/user-plus.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUserPlus = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11 14C11 13.6667 11 13.3333 11 13C11 11.3431 9.65684 10 7.99998 10H4.00002C2.34316 10 1 11.3431 1 13C1 13.3333 1 13.6667 1 14M10.75 7.50005L12.75 7.50007M12.75 7.50007H14.75M12.75 7.50007V9.5M12.75 7.50007V5.5M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUserPlus;


================================================
FILE: web/lib/opal/src/icons/user-shield.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUserShield = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H7M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75ZM12 14.5C12 14.5 14.5 13.25 14.5 11.375V9L12 8L9.5 9V11.375C9.5 13.25 12 14.5 12 14.5Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUserShield;


================================================
FILE: web/lib/opal/src/icons/user-speaker.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUserSpeaker = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H7.99998C9.65684 10 11 11.3431 11 13C11 13.3333 11 13.6667 11 14H14.5V10L12.7071 8.20711M12 7.5L12.7071 8.20711M12.7071 8.20711C13.0976 7.81658 13.0976 7.18342 12.7071 6.79289C12.3166 6.40237 11.6834 6.40237 11.2929 6.79289C10.9024 7.18342 10.9024 7.81658 11.2929 8.20711C11.6834 8.59763 12.3166 8.59763 12.7071 8.20711ZM8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUserSpeaker;


================================================
FILE: web/lib/opal/src/icons/user-sync.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgUserSync = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H7M11 8.5L9.5 10L14.5 9.99985M13 14L14.5 12.5L9.5 12.5M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgUserSync;


================================================
FILE: web/lib/opal/src/icons/user-x.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUserX = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M11 14C11 13.6667 11 13.3333 11 13C11 11.3431 9.65684 10 7.99998 10H4.00002C2.34316 10 1 11.3431 1 13C1 13.3333 1 13.6667 1 14M11.5 8.5L13.25 6.75M13.25 6.75L15 5M13.25 6.75L15 8.5M13.25 6.75L11.5 5M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUserX;


================================================
FILE: web/lib/opal/src/icons/user.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgUser = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M13.3333 14V12.6667C13.3333 11.9594 13.0524 11.2811 12.5523 10.781C12.0522 10.281 11.3739 10 10.6667 10H5.33334C4.62609 10 3.94782 10.281 3.44772 10.781C2.94762 11.2811 2.66667 11.9594 2.66667 12.6667V14M10.6667 4.66667C10.6667 6.13943 9.47276 7.33333 8.00001 7.33333C6.52725 7.33333 5.33334 6.13943 5.33334 4.66667C5.33334 3.19391 6.52725 2 8.00001 2C9.47276 2 10.6667 3.19391 10.6667 4.66667Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUser;


================================================
FILE: web/lib/opal/src/icons/users.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgUsers = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <g clipPath="url(#clip0_16_2864)">
      <path
        d="M11.3333 14V12.6667C11.3333 11.9594 11.0524 11.2811 10.5523 10.781C10.0522 10.281 9.3739 10 8.66666 10H3.33332C2.62608 10 1.9478 10.281 1.44771 10.781C0.947608 11.2811 0.666656 11.9594 0.666656 12.6667V14M15.3333 14V12.6667C15.3329 12.0758 15.1362 11.5018 14.7742 11.0349C14.4122 10.5679 13.9054 10.2344 13.3333 10.0867M10.6667 2.08667C11.2403 2.23353 11.7487 2.56713 12.1117 3.03487C12.4748 3.50261 12.6719 4.07789 12.6719 4.67C12.6719 5.26211 12.4748 5.83739 12.1117 6.30513C11.7487 6.77287 11.2403 7.10647 10.6667 7.25333M8.66666 4.66667C8.66666 6.13943 7.47275 7.33333 5.99999 7.33333C4.52723 7.33333 3.33332 6.13943 3.33332 4.66667C3.33332 3.19391 4.52723 2 5.99999 2C7.47275 2 8.66666 3.19391 8.66666 4.66667Z"
        strokeWidth={1.5}
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </g>
    <defs>
      <clipPath id="clip0_16_2864">
        <rect width={16} height={16} fill="white" />
      </clipPath>
    </defs>
  </svg>
);
export default SvgUsers;


================================================
FILE: web/lib/opal/src/icons/volume-off.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgVolumeOff = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2 6V10H5L9 13V3L5 6H2Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M14 6L11 9M11 6L14 9"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgVolumeOff;


================================================
FILE: web/lib/opal/src/icons/volume.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgVolume = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2 6V10H5L9 13V3L5 6H2Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M11.5 5.5C12.3 6.3 12.8 7.4 12.8 8.5C12.8 9.6 12.3 10.7 11.5 11.5"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgVolume;


================================================
FILE: web/lib/opal/src/icons/wallet.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgWallet = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M14 4.75H9C8.44772 4.75 8 5.19772 8 5.75L8 10.25C8 10.8023 8.44772 11.25 9 11.25H14M14 4.75C14.5523 4.75 15 5.19772 15 5.75V10.25C15 10.8023 14.5523 11.25 14 11.25M14 4.75V3.33333C14 2.6 13.4 2 12.6667 2H3.33333C2.6 2 2 2.6 2 3.33333V12.6667C2 13.4 2.6 14 3.33333 14H12.6667C13.4 14 14 13.4 14 12.6667L14 11.25M10.25 7V9"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgWallet;


================================================
FILE: web/lib/opal/src/icons/workflow.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgWorkflow = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 16 16"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M2.79986 5.60004C2.61157 5.85073 2.5 6.16234 2.5 6.5V11.9754C2.5 13.203 4.08461 13.6951 4.78005 12.6836L11.2199 3.31644C11.9154 2.30488 13.5 2.79705 13.5 4.0246V9.5C13.5 9.83766 13.3884 10.1493 13.2001 10.4M2.79986 5.60004C3.13415 5.85118 3.54969 6 4 6C5.10457 6 6 5.10457 6 4C6 2.89543 5.10457 2 4 2C2.89543 2 2 2.89543 2 4C2 4.65426 2.31416 5.23515 2.79986 5.60004ZM13.2001 10.4C12.8659 10.1488 12.4503 10 12 10C10.8954 10 10 10.8954 10 12C10 13.1046 10.8954 14 12 14C13.1046 14 14 13.1046 14 12C14 11.3457 13.6858 10.7648 13.2001 10.4Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgWorkflow;


================================================
FILE: web/lib/opal/src/icons/x-circle.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgXCircle = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.41667 5.41667L5.41667 9.41667M5.41667 5.41667L9.41667 9.41667M14.0833 7.41667C14.0833 11.0986 11.0986 14.0833 7.41667 14.0833C3.73477 14.0833 0.75 11.0986 0.75 7.41667C0.75 3.73477 3.73477 0.75 7.41667 0.75C11.0986 0.75 14.0833 3.73477 14.0833 7.41667Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgXCircle;


================================================
FILE: web/lib/opal/src/icons/x-octagon.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgXOctagon = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 15 15"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M9.41667 5.41667L5.41667 9.41667M5.41667 5.41667L9.41667 9.41667M4.65667 0.75H10.1767L14.0833 4.65667V10.1767L10.1767 14.0833H4.65667L0.75 10.1767V4.65667L4.65667 0.75Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgXOctagon;


================================================
FILE: web/lib/opal/src/icons/x.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgX = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 28 28"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    strokeWidth={2.5}
    {...props}
  >
    <path d="M21 7L7 21M7 7L21 21" strokeLinejoin="round" />
  </svg>
);
export default SvgX;


================================================
FILE: web/lib/opal/src/icons/zoom-in.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgZoomIn = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 14 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12.75 12.75L9.85 9.85M6.08333 4.08333V8.08333M4.08333 6.08333H8.08333M11.4167 6.08333C11.4167 9.02885 9.02885 11.4167 6.08333 11.4167C3.13781 11.4167 0.75 9.02885 0.75 6.08333C0.75 3.13781 3.13781 0.75 6.08333 0.75C9.02885 0.75 11.4167 3.13781 11.4167 6.08333Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgZoomIn;


================================================
FILE: web/lib/opal/src/icons/zoom-out.tsx
================================================
import type { IconProps } from "@opal/types";

const SvgZoomOut = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 14 14"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    stroke="currentColor"
    {...props}
  >
    <path
      d="M12.75 12.75L9.85 9.85M4.08333 6.08333H8.08333M11.4167 6.08333C11.4167 9.02885 9.02885 11.4167 6.08333 11.4167C3.13781 11.4167 0.75 9.02885 0.75 6.08333C0.75 3.13781 3.13781 0.75 6.08333 0.75C9.02885 0.75 11.4167 3.13781 11.4167 6.08333Z"
      strokeWidth={1.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);

export default SvgZoomOut;


================================================
FILE: web/lib/opal/src/illustrations/broken-key.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgBrokenKey = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M54.375 43.125H43.125M69.375 28.125V16.875M58.125 31.875L48.75 22.5"
      stroke="#EC5B13"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M108.75 18.75L98.5535 24.6369M98.5535 24.6369L104.044 34.1465L91.7404 41.25L86.25 31.7404M98.5535 24.6369L86.25 31.7404M86.25 31.7404L78.7499 36.0705M49.6599 62.8401C45.5882 58.7684 39.9632 56.25 33.75 56.25C21.3236 56.25 11.25 66.3236 11.25 78.75C11.25 91.1764 21.3236 101.25 33.75 101.25C46.1764 101.25 56.25 91.1764 56.25 78.75C56.25 72.5368 53.7316 66.9118 49.6599 62.8401ZM49.6599 62.8401L49.6406 62.8594M49.6599 62.8401L60 52.5"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgBrokenKey;


================================================
FILE: web/lib/opal/src/illustrations/connect.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgConnect = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M43.125 86.2644H73.379M73.379 86.2644H90.9447C95.6006 86.2644 99.375 90.0388 99.375 94.6947C99.375 99.3506 95.6006 103.125 90.9447 103.125H89.6455C86.3575 103.125 83.292 101.464 81.4959 98.7104L73.379 86.2644ZM73.379 86.2644L39.1266 33.7441M69.375 33.7372L39.1266 33.7441M39.1266 33.7441L21.5635 33.7481C16.9034 33.7491 13.125 29.9717 13.125 25.3115C13.125 20.6522 16.9022 16.875 21.5616 16.875H22.8545C26.1425 16.875 29.208 18.5356 31.0041 21.2896L39.1266 33.7441Z"
      stroke="#286DF8"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M99.3626 50.625V43.125V24.375V16.875L86.2376 16.875C76.9178 16.875 69.3626 24.4302 69.3626 33.75C69.3626 43.0698 76.9178 50.625 86.2376 50.625H99.3626Z"
      fill="#E6E6E6"
    />
    <path
      d="M13.1126 103.125L13.1126 69.3751L26.2376 69.375C35.5574 69.375 43.1126 76.9302 43.1126 86.25C43.1126 95.5698 35.5574 103.125 26.2376 103.125L13.1126 103.125Z"
      fill="white"
    />
    <path
      d="M99.3626 43.125H110.613M99.3626 43.125V24.375M99.3626 43.125V50.625M99.3626 24.375H110.613M99.3626 24.375V16.875M99.3626 50.625H86.2376C76.9178 50.625 69.3626 43.0698 69.3626 33.75C69.3626 24.4302 76.9178 16.875 86.2376 16.875L99.3626 16.875M99.3626 50.625V54.375M99.3626 16.875V13.125M13.1126 103.125L26.2376 103.125C35.5574 103.125 43.1126 95.5698 43.1126 86.25C43.1126 76.9302 35.5574 69.375 26.2376 69.375L13.1126 69.3751M13.1126 103.125L13.1126 69.3751M13.1126 103.125L13.1126 106.875M13.1126 69.3751V65.6251"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgConnect;


================================================
FILE: web/lib/opal/src/illustrations/connected.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgConnected = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M48.0722 48.0722L53.4375 53.4375L66.5625 66.5625L71.9324 71.9416L82.5 61.3648C89.0901 54.7747 89.0901 44.0901 82.5 37.5C75.9099 30.9099 65.2253 30.9099 58.6352 37.5L48.0722 48.0722Z"
      fill="#E6E6E6"
    />
    <path
      d="M48.0722 48.0722L58.6352 37.5C65.2253 30.9099 75.9099 30.9099 82.5 37.5M48.0722 48.0722L43.125 43.125M48.0722 48.0722L53.4375 53.4375M71.9324 71.9416L82.5 61.3648C89.0901 54.7747 89.0901 44.0901 82.5 37.5M71.9324 71.9416L76.875 76.8842M71.9324 71.9416L66.5625 66.5625M82.5 37.5L105 15M53.4375 53.4375L43.125 63.75M53.4375 53.4375L66.5625 66.5625M66.5625 66.5625L56.25 76.875"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M71.9278 71.937L48.0676 48.0675L37.5 58.6443C30.9099 65.2344 30.9099 75.9191 37.5 82.5092C44.0901 89.0993 54.7748 89.0993 61.3649 82.5092L71.9278 71.937Z"
      fill="white"
    />
    <path
      d="M71.9278 71.937L61.3649 82.5092C54.7748 89.0993 44.0901 89.0993 37.5 82.5092M71.9278 71.937L48.0676 48.0675M71.9278 71.937L76.875 76.8842M48.0676 48.0675L37.5 58.6443C30.9099 65.2344 30.9099 75.9191 37.5 82.5092M48.0676 48.0675L43.125 43.125M37.5 82.5092L15 105"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M24.375 24.375L33.75 33.75L52.5 15"
      stroke="#286DF8"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgConnected;


================================================
FILE: web/lib/opal/src/illustrations/disconnected.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgDisconnected = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M60 83.8554L36.1351 59.9906L26.25 69.8849C19.6599 76.475 19.6599 87.1597 26.25 93.7498C32.8401 100.34 43.5248 100.34 50.1149 93.7498L60 83.8554Z"
      fill="white"
    />
    <path
      d="M60 83.8554L50.1149 93.7498C43.5248 100.34 32.8401 100.34 26.25 93.7498M60 83.8554L36.1351 59.9906M60 83.8554L63.75 87.6055M36.1351 59.9906L26.25 69.8849C19.6599 76.475 19.6599 87.1597 26.25 93.7498M36.1351 59.9906L32.3946 56.25M26.25 93.7498L15 105"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M60 36.1443L65.3033 41.4476L78.5616 54.7059L83.8649 60.0092L93.75 50.1148C100.34 43.5247 100.34 32.8401 93.75 26.25C87.1599 19.6599 76.4752 19.6599 69.8851 26.25L60 36.1443Z"
      fill="#E6E6E6"
    />
    <path
      d="M65.3033 41.4476L56.25 50.5009M65.3033 41.4476L60 36.1443M65.3033 41.4476L78.5616 54.7059M60 36.1443L69.8851 26.25C76.4752 19.6599 87.1599 19.6599 93.75 26.25M60 36.1443L56.25 32.3942M83.8649 60.0092L93.75 50.1148C100.34 43.5247 100.34 32.8401 93.75 26.25M83.8649 60.0092L78.5616 54.7059M83.8649 60.0092L87.6054 63.7498M78.5616 54.7059L69.5177 63.7498M93.75 26.25L105 15"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M30 45H18.75M45 30V18.75M33.75 33.75L24.375 24.375"
      stroke="#EC5B13"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgDisconnected;


================================================
FILE: web/lib/opal/src/illustrations/empty.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgEmpty = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M18.75 71.25V90C18.75 94.1421 22.1079 97.5 26.25 97.5H93.75C97.8921 97.5 101.25 94.1422 101.25 90V71.25H18.75Z"
      fill="#E6E6E6"
    />
    <path d="M18.75 71.25H101.25L86.25 48.75H33.75L18.75 71.25Z" fill="white" />
    <path
      d="M18.75 71.25V90C18.75 94.1421 22.1079 97.5 26.25 97.5H93.75C97.8921 97.5 101.25 94.1422 101.25 90V71.25M18.75 71.25H101.25M18.75 71.25L33.75 48.75H86.25L101.25 71.25M54.375 80.625H65.625"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M43.125 35.625L33.75 26.25M76.875 35.625L86.25 26.25M60 28.125V15"
      stroke="#FFC733"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgEmpty;


================================================
FILE: web/lib/opal/src/illustrations/end-of-line.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgEndOfLine = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M67.5 33.75H88.125C93.3027 33.75 97.5 29.5527 97.5 24.375C97.5 19.1973 93.3027 15 88.125 15H76.875C71.6973 15 67.5 19.1973 67.5 24.375V33.75ZM67.5 33.75H15M67.5 33.75V82.5"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M30 82.5H105"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M41.25 93.75H93.75"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M52.5 105H82.5"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgEndOfLine;


================================================
FILE: web/lib/opal/src/illustrations/index.ts
================================================
export { default as SvgBrokenKey } from "@opal/illustrations/broken-key";
export { default as SvgConnect } from "@opal/illustrations/connect";
export { default as SvgConnected } from "@opal/illustrations/connected";
export { default as SvgDisconnected } from "@opal/illustrations/disconnected";
export { default as SvgEmpty } from "@opal/illustrations/empty";
export { default as SvgEndOfLine } from "@opal/illustrations/end-of-line";
export { default as SvgLimitAlert } from "@opal/illustrations/limit-alert";
export { default as SvgLongWait } from "@opal/illustrations/long-wait";
export { default as SvgNoAccess } from "@opal/illustrations/no-access";
export { default as SvgNoResult } from "@opal/illustrations/no-result";
export { default as SvgNotFound } from "@opal/illustrations/not-found";
export { default as SvgOverflow } from "@opal/illustrations/overflow";
export { default as SvgPlugBroken } from "@opal/illustrations/plug-broken";
export { default as SvgTimeout } from "@opal/illustrations/timeout";
export { default as SvgUnPlugged } from "@opal/illustrations/un-plugged";
export { default as SvgUsageAlert } from "@opal/illustrations/usage-alert";


================================================
FILE: web/lib/opal/src/illustrations/limit-alert.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgLimitAlert = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M15 82.5C15 78.3579 18.3579 75 22.5 75L97.5 75C101.642 75 105 78.3579 105 82.5V90C105 94.1421 101.642 97.5 97.5 97.5L22.5 97.5C18.3579 97.5 15 94.1421 15 90V82.5Z"
      fill="#FBEAE4"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M93.75 86.25H78.75"
      stroke="#EC5B13"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M67.5 86.2499H26.25"
      stroke="#F5A88B"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M15 48.75C15 44.6079 18.3579 41.25 22.5 41.25L52.5 41.25C56.6421 41.25 60 44.6079 60 48.75L60 56.25C60 60.3921 56.6421 63.75 52.5 63.75H22.5C18.3579 63.75 15 60.3921 15 56.25L15 48.75Z"
      fill="#F0F0F0"
    />
    <path
      d="M45 52.5H26.25M52.5 63.75H22.5C18.3579 63.75 15 60.3921 15 56.25L15 48.75C15 44.6079 18.3579 41.25 22.5 41.25L52.5 41.25C56.6421 41.25 60 44.6079 60 48.75L60 56.25C60 60.3921 56.6421 63.75 52.5 63.75Z"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M86.25 41.25C81.0723 41.25 76.875 45.4473 76.875 50.625L76.875 63.75L86.25 63.75L95.625 63.75V50.625C95.625 45.4473 91.4277 41.25 86.25 41.25Z"
      fill="#FBEAE4"
    />
    <path
      d="M76.875 63.75L76.875 50.625C76.875 45.4473 81.0723 41.25 86.25 41.25C91.4277 41.25 95.625 45.4473 95.625 50.625V63.75M76.875 63.75L86.25 63.75M76.875 63.75L73.125 63.75M95.625 63.75H99.375M95.625 63.75L86.25 63.75M86.25 52.5V63.75M76.875 33.75L71.25 28.125M95.625 33.75L101.25 28.125M86.25 30L86.25 22.5"
      stroke="#EC5B13"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgLimitAlert;


================================================
FILE: web/lib/opal/src/illustrations/long-wait.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgLongWait = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M103.253 47.5404C104.391 51.4971 105 55.6774 105 60C105 84.8528 84.8528 105 60 105C35.1472 105 15 84.8528 15 60C15 35.1472 35.1472 15 60 15C64.3226 15 68.5029 15.6095 72.4596 16.7472C70.4991 20.0854 69.375 23.9739 69.375 28.125C69.375 40.5514 79.4486 50.625 91.875 50.625C96.0261 50.625 99.9146 49.5009 103.253 47.5404Z"
      fill="#F0F0F0"
    />
    <path
      d="M69.375 28.125C69.375 40.5514 79.4486 50.625 91.875 50.625C96.0261 50.625 99.9146 49.5009 103.253 47.5404C109.908 43.6322 114.375 36.4003 114.375 28.125C114.375 15.6986 104.301 5.625 91.875 5.625C83.5997 5.625 76.3678 10.0925 72.4596 16.7472C70.4991 20.0854 69.375 23.9739 69.375 28.125Z"
      fill="white"
    />
    <path
      d="M54.1223 104.615C56.0462 104.866 58.0077 105 60 105C61.9911 105 63.9513 104.866 65.874 104.615M42.7771 101.576C39.1175 100.058 35.7047 98.0716 32.6074 95.6909M87.3889 95.6909C84.2914 98.0715 80.8791 100.058 77.2192 101.576M24.3054 87.3889C21.9251 84.2915 19.9377 80.8789 18.4204 77.2192M101.576 77.2192C100.058 80.8791 98.0715 84.2914 95.6909 87.3889M15.3809 54.1223C15.1299 56.046 15 58.0079 15 60C15 61.9909 15.1302 63.9515 15.3809 65.874M104.615 65.874C104.866 63.9513 105 61.9911 105 60C105 58.0077 104.866 56.0462 104.615 54.1223M18.4204 42.7771C19.9379 39.1177 21.925 35.7046 24.3054 32.6074M32.6074 24.3054C35.7046 21.925 39.1177 19.9379 42.7771 18.4204M65.874 15.3809C63.9515 15.1302 61.9909 15 60 15C58.0079 15 56.046 15.1299 54.1223 15.3809"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M60 33.0001V60.0001L78 69.0001"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M84.375 20.625H99.375L84.375 35.625H99.375"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgLongWait;


================================================
FILE: web/lib/opal/src/illustrations/no-access.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgNoAccess = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M18.75 22.5V105L60 105L101.25 105V22.5C101.25 18.3578 97.8921 15 93.75 15H60H26.25C22.1079 15 18.75 18.3578 18.75 22.5Z"
      fill="white"
    />
    <path
      d="M18.75 105V22.5C18.75 18.3578 22.1079 15 26.25 15H60M18.75 105L60 105M18.75 105L11.25 105M101.25 105V22.5C101.25 18.3578 97.8921 15 93.75 15H60M101.25 105L60 105M101.25 105H108.75M60 93.75V105M60 15V26.25"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M46.875 58.1249V50.625C46.875 43.3762 52.7512 37.5 60 37.5C67.2487 37.5 73.125 43.3762 73.125 50.625V58.125M46.875 58.1249L44.9999 58.1249C42.9289 58.125 41.25 59.8039 41.25 61.8749V78.75C41.25 80.821 42.9289 82.5 45 82.5L75 82.5C77.071 82.5 78.75 80.821 78.75 78.75V61.875C78.75 59.8039 77.071 58.125 75 58.125H73.125M46.875 58.1249L73.125 58.125M60 67.4999V73.1249"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgNoAccess;


================================================
FILE: web/lib/opal/src/illustrations/no-result.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgNoResult = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path d="M91.875 45H28.125L11.25 112.5H108.75L91.875 45Z" fill="white" />
    <path
      d="M26.25 45L50.0345 23.8582C52.8762 21.3323 56.4381 20.0693 60 20.0693C63.5619 20.0693 67.1238 21.3323 69.9655 23.8582L93.75 45H26.25Z"
      fill="#E6E6E6"
    />
    <path
      d="M60 7.5V20.0693M60 20.0693C56.4381 20.0693 52.8762 21.3323 50.0345 23.8582L26.25 45H93.75L69.9655 23.8582C67.1238 21.3323 63.5619 20.0693 60 20.0693Z"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M43.125 99.375L33.75 90M60 91.875V78.75M76.875 99.375L86.25 90"
      stroke="#FFC733"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgNoResult;


================================================
FILE: web/lib/opal/src/illustrations/not-found.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgNotFound = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M61.875 95.625C80.5146 95.625 95.625 80.5146 95.625 61.875C95.625 43.2354 80.5146 28.125 61.875 28.125C43.2354 28.125 28.125 43.2354 28.125 61.875C28.125 80.5146 43.2354 95.625 61.875 95.625Z"
      fill="white"
    />
    <path
      d="M103.125 103.125L85.7109 85.7109M95.625 61.875C95.625 80.5146 80.5146 95.625 61.875 95.625C43.2354 95.625 28.125 80.5146 28.125 61.875C28.125 43.2354 43.2354 28.125 61.875 28.125C80.5146 28.125 95.625 43.2354 95.625 61.875Z"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M56.713 46.3302C54.7486 47.4847 53.2561 49.2972 52.5 51.4466H51.5625V51.1341C52.3923 48.7766 54.0843 46.7901 56.239 45.5237C58.3943 44.257 60.9272 43.7937 63.3911 44.2163C65.855 44.639 68.091 45.9183 69.7009 47.8308C71.3108 49.7433 72.1912 52.1645 72.1875 54.6643C72.1868 58.3647 69.5002 61.0222 67.0935 62.6734C65.8647 63.5165 64.6397 64.1423 63.728 64.5594C63.2713 64.7682 62.8885 64.9259 62.6184 65.0318V67.5H61.875L61.875 64.3111C61.875 64.3111 71.25 61.095 71.25 54.6628C71.2534 52.3842 70.4503 50.178 68.9829 48.4348C67.5155 46.6917 65.4785 45.5241 63.2328 45.1389C60.987 44.7537 58.6774 45.1757 56.713 46.3302Z"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M62.8125 76.875H60.9375V78.75H62.8125V76.875Z"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M20.625 50.625H11.25M30 30L22.5 22.5M50.625 20.625V11.25"
      stroke="#FFC733"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgNotFound;


================================================
FILE: web/lib/opal/src/illustrations/overflow.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgOverflow = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M22.5 71.2501L25.3301 91.0607C25.8579 94.7555 29.0223 97.5 32.7547 97.5H87.2453C90.9777 97.5 94.1421 94.7555 94.6699 91.0607L97.5 71.2501H22.5Z"
      fill="#E6E6E6"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M18.7501 46.8752L78.5183 52.4717M32.7965 34.583L91.8752 45.0002M45.1839 22.5002L103.125 38.0255M90.0002 61.8752H30.0002"
      stroke="#EC5B13"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgOverflow;


================================================
FILE: web/lib/opal/src/illustrations/plug-broken.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgPlugBroken = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M31.875 78.75L24.375 71.25M50.625 78.75L58.125 71.25M41.25 73.125V63.75"
      stroke="#EC5B13"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M97.5 30H90H71.25H63.75V43.125C63.75 52.4448 71.3052 60 80.625 60C89.9448 60 97.5 52.4448 97.5 43.125V30Z"
      fill="#E6E6E6"
    />
    <path
      d="M50.625 90H95.625C99.7671 90 103.125 93.3579 103.125 97.5C103.125 101.642 99.7671 105 95.625 105H88.125C83.9829 105 80.625 101.642 80.625 97.5V60M31.875 90H16.875M90 30V18.75M90 30H97.5M90 30H71.25M97.5 30V43.125C97.5 52.4448 89.9448 60 80.625 60M97.5 30H103.125M63.75 30V43.125C63.75 52.4448 71.3052 60 80.625 60M63.75 30H71.25M63.75 30H58.125M71.25 30V18.75"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgPlugBroken;


================================================
FILE: web/lib/opal/src/illustrations/timeout.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgTimeout = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M26.25 101.25H78.75V93.75L62.6392 83.3931C56.4628 79.4225 48.5372 79.4225 42.3608 83.3931L26.25 93.75V101.25Z"
      fill="#E6E6E6"
    />
    <path
      d="M74.4446 77.8572L52.5 63.75L30.5554 77.8572C27.8721 79.5822 26.25 82.5533 26.25 85.7433V93.75L42.3608 83.3931C48.5372 79.4225 56.4628 79.4225 62.6392 83.3931L78.75 93.75V85.7433C78.75 82.5533 77.1279 79.5822 74.4446 77.8572Z"
      fill="white"
    />
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M26.25 26.25H68.7803C67.9512 28.5958 67.5 31.1202 67.5 33.75C67.5 40.0285 70.0716 45.7064 74.219 49.7878L52.5 63.75L30.5554 49.6428C27.8721 47.9178 26.25 44.9467 26.25 41.7567V26.25Z"
      fill="white"
    />
    <path
      d="M112.5 33.75C112.5 21.3236 102.426 11.25 90 11.25C80.2034 11.25 71.8691 17.511 68.7803 26.25C67.9512 28.5958 67.5 31.1202 67.5 33.75C67.5 40.0285 70.0716 45.7064 74.219 49.7878C78.2801 53.7843 83.8521 56.25 90 56.25C102.426 56.25 112.5 46.1764 112.5 33.75Z"
      fill="#F0F0F0"
    />
    <path
      d="M52.5 63.75L30.5554 49.6428C27.8721 47.9178 26.25 44.9467 26.25 41.7567V26.25M52.5 63.75L74.4446 77.8572C77.1279 79.5822 78.75 82.5533 78.75 85.7433V101.25M52.5 63.75L30.5554 77.8572C27.8721 79.5822 26.25 82.5533 26.25 85.7433V101.25M52.5 63.75L72.6052 50.8252M26.25 26.25H18.75M26.25 26.25H66.8006M78.75 101.25H26.25M78.75 101.25H86.25M26.25 101.25H18.75"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M82.5 26.25H97.5L82.5 41.25H97.5"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgTimeout;


================================================
FILE: web/lib/opal/src/illustrations/un-plugged.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUnPlugged = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      fillRule="evenodd"
      clipRule="evenodd"
      d="M56.25 16.875C43.8236 16.875 33.75 26.9486 33.75 39.375L33.75 54.375C33.75 66.8014 43.8236 76.875 56.25 76.875H71.25C83.6764 76.875 93.75 66.8014 93.75 54.375V39.375C93.75 26.9486 83.6764 16.875 71.25 16.875H56.25ZM67.5 65.625V60C67.5 56.8934 64.9816 54.375 61.875 54.375C58.7684 54.375 56.25 56.8934 56.25 60V65.625H67.5Z"
      fill="#F0F0F0"
    />
    <path
      d="M67.5 60V65.625H56.25V60C56.25 56.8934 58.7684 54.375 61.875 54.375C64.9816 54.375 67.5 56.8934 67.5 60Z"
      fill="white"
    />
    <path
      d="M48.75 46.875V35.625M75 46.875V35.625M67.5 65.625V60C67.5 56.8934 64.9816 54.375 61.875 54.375C58.7684 54.375 56.25 56.8934 56.25 60V65.625H67.5ZM56.25 76.875H71.25C83.6764 76.875 93.75 66.8014 93.75 54.375V39.375C93.75 26.9486 83.6764 16.875 71.25 16.875H56.25C43.8236 16.875 33.75 26.9486 33.75 39.375L33.75 54.375C33.75 66.8014 43.8236 76.875 56.25 76.875Z"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path d="M26.25 87.1875V97.5H28.125V87.1875H26.25Z" fill="#F0F0F0" />
    <path
      d="M52.5 88.125V97.5H50.625V88.125C50.625 87.6072 51.0447 87.1875 51.5625 87.1875C52.0803 87.1875 52.5 87.6072 52.5 88.125Z"
      fill="#F0F0F0"
    />
    <path
      d="M26.25 87.1875V97.5H28.125V87.1875H26.25Z"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M52.5 88.125V97.5H50.625V88.125C50.625 87.6072 51.0447 87.1875 51.5625 87.1875C52.0803 87.1875 52.5 87.6072 52.5 88.125Z"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M18.1798 109.239C16.3504 108.958 15 107.384 15 105.533V97.5H63.75V105V108.75C63.75 110.821 62.0711 112.5 60 112.5H39.6618C39.4709 112.5 39.2802 112.485 39.0916 112.456L18.1798 109.239Z"
      fill="#E6E6E6"
    />
    <path
      d="M63.75 105H105M63.75 105V108.75C63.75 110.821 62.0711 112.5 60 112.5H39.6618C39.4709 112.5 39.2802 112.485 39.0916 112.456L18.1798 109.239C16.3504 108.958 15 107.384 15 105.533V97.5H63.75V105Z"
      stroke="#A4A4A4"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M93.75 15L95.625 9.37498M103.125 31.875L108.75 33.75M101.25 22.5L106.875 18.75"
      stroke="#EC5B13"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUnPlugged;


================================================
FILE: web/lib/opal/src/illustrations/usage-alert.tsx
================================================
import type { IconProps } from "@opal/types";
const SvgUsageAlert = ({ size, ...props }: IconProps) => (
  <svg
    width={size}
    height={size}
    viewBox="0 0 120 120"
    fill="none"
    xmlns="http://www.w3.org/2000/svg"
    {...props}
  >
    <path
      d="M15 90C15 85.8578 18.3579 82.5 22.5 82.5L60 82.5C64.1421 82.5 67.5 85.8578 67.5 90L67.5 97.5C67.5 101.642 64.1421 105 60 105H22.5C18.3579 105 15 101.642 15 97.5L15 90Z"
      fill="#F0F0F0"
    />
    <path
      d="M15 22.5C15 18.3579 18.3579 15 22.5 15H45C49.1421 15 52.5 18.3579 52.5 22.5L52.5 29.9999C52.5 34.1421 49.1421 37.4999 45 37.4999H22.5C18.3579 37.4999 15 34.1421 15 29.9999V22.5Z"
      fill="#F0F0F0"
    />
    <path
      d="M52.5 93.75H26.25M37.5 26.25H26.25M22.5 15H45C49.1421 15 52.5 18.3579 52.5 22.5L52.5 29.9999C52.5 34.1421 49.1421 37.4999 45 37.4999H22.5C18.3579 37.4999 15 34.1421 15 29.9999V22.5C15 18.3579 18.3579 15 22.5 15ZM60 105H22.5C18.3579 105 15 101.642 15 97.5L15 90C15 85.8578 18.3579 82.5 22.5 82.5L60 82.5C64.1421 82.5 67.5 85.8578 67.5 90L67.5 97.5C67.5 101.642 64.1421 105 60 105Z"
      stroke="#CCCCCC"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M78.75 60H71.25M90 37.5V30M103.125 50.625H110.625M99.375 41.25L105 35.625M82.5 71.25L22.5 71.25C18.3579 71.25 15 67.8922 15 63.75V56.25C15 52.1079 18.3579 48.75 22.5 48.75L82.5 48.75C86.6421 48.75 90 52.1079 90 56.25V63.75C90 67.8922 86.6421 71.25 82.5 71.25Z"
      stroke="#EC5B13"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
    <path
      d="M60 60H26.25"
      stroke="#F5A88B"
      strokeWidth={3.5}
      strokeLinecap="round"
      strokeLinejoin="round"
    />
  </svg>
);
export default SvgUsageAlert;


================================================
FILE: web/lib/opal/src/layouts/README.md
================================================
# @opal/layouts

**Import:** `import { Content, ContentAction, IllustrationContent } from "@opal/layouts";`

Layout primitives for composing content blocks. These components handle sizing, font selection, icon alignment, and optional inline editing — things that are tedious to get right by hand and easy to get wrong.

## Components

| Component | Description | Docs |
|---|---|---|
| [`Content`](./content/README.md) | Icon + title + description row. Routes to an internal layout (`ContentXl`, `ContentLg`, `ContentMd`, or `ContentSm`) based on `sizePreset` and `variant`. | [Content README](./content/README.md) |
| [`ContentAction`](./content-action/README.md) | Wraps `Content` in a flex-row with an optional `rightChildren` slot for action buttons. Adds padding alignment via the shared `SizeVariant` scale. | [ContentAction README](./content-action/README.md) |
| [`IllustrationContent`](./illustration-content/README.md) | Center-aligned illustration + title + description stack for empty states, error pages, and placeholders. | [IllustrationContent README](./illustration-content/README.md) |

## Quick Start

```tsx
import { Content, ContentAction, IllustrationContent } from "@opal/layouts";
import { Button } from "@opal/components";
import SvgSettings from "@opal/icons/settings";
import SvgNoResult from "@opal/illustrations/no-result";

// Simple heading
<Content
  icon={SvgSettings}
  title="Account Settings"
  description="Manage your preferences"
  sizePreset="headline"
  variant="heading"
/>

// Label with tag
<Content
  icon={SvgSettings}
  title="OpenAI"
  description="GPT"
  sizePreset="main-content"
  variant="section"
  tag={{ title: "Default", color: "blue" }}
/>

// Row with action button
<ContentAction
  icon={SvgSettings}
  title="Provider Name"
  description="Some description"
  sizePreset="main-content"
  variant="section"
  paddingVariant="lg"
  rightChildren={
    <Button icon={SvgSettings} prominence="tertiary" />
  }
/>

// Empty state with illustration
<IllustrationContent
  illustration={SvgNoResult}
  title="No results found"
  description="Try adjusting your search or filters."
/>
```

## Architecture

### Two-axis design (`Content`)

`Content` uses a two-axis system:

- **`sizePreset`** — controls sizing tokens (icon size, padding, gap, font, line-height).
- **`variant`** — controls structural layout (icon placement, description rendering).

Valid preset/variant combinations are enforced at the type level via a discriminated union. See the [Content README](./content/README.md) for the full matrix.

### Shared size scale (`ContentAction`)

`ContentAction` uses the same `SizeVariant` scale (`lg`, `md`, `sm`, `xs`, `2xs`, `fit`) defined in `@opal/shared` that powers `Interactive.Container` and `Button`. This ensures that padding on content rows aligns with adjacent interactive elements at the same size.

## Exports

From `@opal/layouts`:

```ts
// Components
Content
ContentAction
IllustrationContent

// Types
ContentProps
ContentActionProps
IllustrationContentProps
SizePreset
ContentVariant
```

## Internal Layout Components

These are not exported — `Content` routes to them automatically:

| Layout | Used when | File |
|---|---|---|
| `ContentXl` | `sizePreset` is `headline` or `section` with `variant="heading"` | `content/ContentXl.tsx` |
| `ContentLg` | `sizePreset` is `headline` or `section` with `variant="section"` | `content/ContentLg.tsx` |
| `ContentMd` | `sizePreset` is `main-content`, `main-ui`, or `secondary` with `variant="section"` | `content/ContentMd.tsx` |
| `ContentSm` | `variant="body"` | `content/ContentSm.tsx` |


================================================
FILE: web/lib/opal/src/layouts/cards/header-layout/CardHeaderLayout.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { CardHeaderLayout } from "@opal/layouts";
import { Button } from "@opal/components";
import {
  SvgArrowExchange,
  SvgCheckSquare,
  SvgGlobe,
  SvgSettings,
  SvgUnplug,
} from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import type { Decorator } from "@storybook/react";

const withTooltipProvider: Decorator = (Story) => (
  <TooltipPrimitive.Provider>
    <Story />
  </TooltipPrimitive.Provider>
);

const meta = {
  title: "Layouts/CardHeaderLayout",
  component: CardHeaderLayout,
  tags: ["autodocs"],
  decorators: [withTooltipProvider],
  parameters: {
    layout: "centered",
  },
} satisfies Meta<typeof CardHeaderLayout>;

export default meta;

type Story = StoryObj<typeof meta>;

// ---------------------------------------------------------------------------
// Stories
// ---------------------------------------------------------------------------

export const Default: Story = {
  render: () => (
    <div className="w-[28rem] border rounded-16">
      <CardHeaderLayout
        sizePreset="main-ui"
        variant="section"
        icon={SvgGlobe}
        title="Google Search"
        description="Web search provider"
        rightChildren={
          <Button prominence="tertiary" rightIcon={SvgArrowExchange}>
            Connect
          </Button>
        }
      />
    </div>
  ),
};

export const WithBothSlots: Story = {
  render: () => (
    <div className="w-[28rem] border rounded-16">
      <CardHeaderLayout
        sizePreset="main-ui"
        variant="section"
        icon={SvgGlobe}
        title="Google Search"
        description="Currently the default provider."
        rightChildren={
          <Button variant="action" prominence="tertiary" icon={SvgCheckSquare}>
            Current Default
          </Button>
        }
        bottomRightChildren={
          <>
            <Button
              icon={SvgUnplug}
              tooltip="Disconnect"
              prominence="tertiary"
              size="sm"
            />
            <Button
              icon={SvgSettings}
              tooltip="Edit"
              prominence="tertiary"
              size="sm"
            />
          </>
        }
      />
    </div>
  ),
};

export const RightChildrenOnly: Story = {
  render: () => (
    <div className="w-[28rem] border rounded-16">
      <CardHeaderLayout
        sizePreset="main-ui"
        variant="section"
        icon={SvgGlobe}
        title="OpenAI"
        description="Not configured"
        rightChildren={
          <Button prominence="tertiary" rightIcon={SvgArrowExchange}>
            Connect
          </Button>
        }
      />
    </div>
  ),
};

export const NoRightChildren: Story = {
  render: () => (
    <div className="w-[28rem] border rounded-16">
      <CardHeaderLayout
        sizePreset="main-ui"
        variant="section"
        icon={SvgGlobe}
        title="Section Header"
        description="No actions on the right."
      />
    </div>
  ),
};

export const LongContent: Story = {
  render: () => (
    <div className="w-[28rem] border rounded-16">
      <CardHeaderLayout
        sizePreset="main-ui"
        variant="section"
        icon={SvgGlobe}
        title="Very Long Provider Name That Should Truncate"
        description="This is a much longer description that tests how the layout handles overflow when the content area needs to shrink."
        rightChildren={
          <Button variant="action" prominence="tertiary" icon={SvgCheckSquare}>
            Current Default
          </Button>
        }
        bottomRightChildren={
          <>
            <Button
              icon={SvgUnplug}
              prominence="tertiary"
              size="sm"
              tooltip="Disconnect"
            />
            <Button
              icon={SvgSettings}
              prominence="tertiary"
              size="sm"
              tooltip="Edit"
            />
          </>
        }
      />
    </div>
  ),
};


================================================
FILE: web/lib/opal/src/layouts/cards/header-layout/README.md
================================================
# CardHeaderLayout

**Import:** `import { CardHeaderLayout, type CardHeaderLayoutProps } from "@opal/layouts";`

A card header layout that pairs a [`Content`](../../content/README.md) block with a right-side column of vertically stacked children.

## Why CardHeaderLayout?

[`ContentAction`](../../content-action/README.md) provides a single `rightChildren` slot. Card headers typically need two distinct right-side regions — a primary action on top and secondary actions on the bottom. `CardHeaderLayout` provides this with `rightChildren` and `bottomRightChildren` slots, with no padding or gap between them so the caller has full control over spacing.

## Props

Inherits **all** props from [`Content`](../../content/README.md) (icon, title, description, sizePreset, variant, etc.) plus:

| Prop | Type | Default | Description |
|---|---|---|---|
| `rightChildren` | `ReactNode` | `undefined` | Content rendered to the right of the Content block (top of right column). |
| `bottomRightChildren` | `ReactNode` | `undefined` | Content rendered below `rightChildren` in the same column. Laid out as `flex flex-row`. |

## Layout Structure

```
┌──────────────────────────────────────────────────────┐
│ [Content (p-2, self-start)]  [rightChildren]         │
│  icon + title + description  [bottomRightChildren]   │
└──────────────────────────────────────────────────────┘
```

- Outer wrapper: `flex flex-row items-stretch w-full`
- Content area: `flex-1 min-w-0 self-start p-2` — top-aligned with fixed padding
- Right column: `flex flex-col items-end justify-between shrink-0` — no padding, no gap
- `bottomRightChildren` wrapper: `flex flex-row` — lays children out horizontally

The right column uses `justify-between` so when both slots are present, `rightChildren` sits at the top and `bottomRightChildren` at the bottom.

## Usage

### Card with primary and secondary actions

```tsx
import { CardHeaderLayout } from "@opal/layouts";
import { Button } from "@opal/components";
import { SvgGlobe, SvgSettings, SvgUnplug, SvgCheckSquare } from "@opal/icons";

<CardHeaderLayout
  icon={SvgGlobe}
  title="Google Search"
  description="Web search provider"
  sizePreset="main-ui"
  variant="section"
  rightChildren={
    <Button icon={SvgCheckSquare} variant="action" prominence="tertiary">
      Current Default
    </Button>
  }
  bottomRightChildren={
    <>
      <Button icon={SvgUnplug} size="sm" prominence="tertiary" tooltip="Disconnect" />
      <Button icon={SvgSettings} size="sm" prominence="tertiary" tooltip="Edit" />
    </>
  }
/>
```

### Card with only a connect action

```tsx
<CardHeaderLayout
  icon={SvgCloud}
  title="OpenAI"
  description="Not configured"
  sizePreset="main-ui"
  variant="section"
  rightChildren={
    <Button rightIcon={SvgArrowExchange} prominence="tertiary">
      Connect
    </Button>
  }
/>
```

### No right children

```tsx
<CardHeaderLayout
  icon={SvgInfo}
  title="Section Header"
  description="Description text"
  sizePreset="main-content"
  variant="section"
/>
```

When both `rightChildren` and `bottomRightChildren` are omitted, the component renders only the padded `Content`.


================================================
FILE: web/lib/opal/src/layouts/cards/header-layout/components.tsx
================================================
import { Content, type ContentProps } from "@opal/layouts/content/components";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type CardHeaderLayoutProps = ContentProps & {
  /** Content rendered to the right of the Content block. */
  rightChildren?: React.ReactNode;

  /** Content rendered below `rightChildren` in the same column. */
  bottomRightChildren?: React.ReactNode;
};

// ---------------------------------------------------------------------------
// CardHeaderLayout
// ---------------------------------------------------------------------------

/**
 * A card header layout that pairs a {@link Content} block (with `p-2`)
 * with a right-side column.
 *
 * The right column contains two vertically stacked slots —
 * `rightChildren` on top, `bottomRightChildren` below — with no
 * padding or gap between them.
 *
 * @example
 * ```tsx
 * <CardHeaderLayout
 *   icon={SvgGlobe}
 *   title="Google"
 *   description="Search engine"
 *   sizePreset="main-ui"
 *   variant="section"
 *   rightChildren={<Button>Connect</Button>}
 *   bottomRightChildren={
 *     <>
 *       <Button icon={SvgUnplug} size="sm" prominence="tertiary" />
 *       <Button icon={SvgSettings} size="sm" prominence="tertiary" />
 *     </>
 *   }
 * />
 * ```
 */
function CardHeaderLayout({
  rightChildren,
  bottomRightChildren,
  ...contentProps
}: CardHeaderLayoutProps) {
  const hasRight = rightChildren || bottomRightChildren;

  return (
    <div className="flex flex-row items-stretch w-full">
      <div className="flex-1 min-w-0 self-start p-2">
        <Content {...contentProps} />
      </div>
      {hasRight && (
        <div className="flex flex-col items-end shrink-0">
          {rightChildren && <div className="flex-1">{rightChildren}</div>}
          {bottomRightChildren && (
            <div className="flex flex-row">{bottomRightChildren}</div>
          )}
        </div>
      )}
    </div>
  );
}

// ---------------------------------------------------------------------------
// Exports
// ---------------------------------------------------------------------------

export { CardHeaderLayout, type CardHeaderLayoutProps };


================================================
FILE: web/lib/opal/src/layouts/content/Content.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { Content } from "@opal/layouts";
import { SvgSettings, SvgStar, SvgRefreshCw } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta = {
  title: "Layouts/Content",
  component: Content,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
} satisfies Meta<typeof Content>;

export default meta;

type Story = StoryObj<typeof meta>;

// ---------------------------------------------------------------------------
// XL stories (sizePreset: headline | section, variant: heading)
// ---------------------------------------------------------------------------

export const XlHeadline: Story = {
  args: {
    sizePreset: "headline",
    variant: "heading",
    title: "Welcome to Onyx",
    description: "Your enterprise search and AI assistant platform.",
  },
};

export const XlSection: Story = {
  args: {
    sizePreset: "section",
    variant: "heading",
    title: "Configuration",
  },
};

// ---------------------------------------------------------------------------
// LG stories (sizePreset: headline | section, variant: section)
// ---------------------------------------------------------------------------

export const LgHeadline: Story = {
  args: {
    sizePreset: "headline",
    variant: "section",
    title: "Connectors Overview",
  },
};

export const LgSection: Story = {
  args: {
    sizePreset: "section",
    variant: "section",
    title: "Data Sources",
  },
};

// ---------------------------------------------------------------------------
// MD stories (sizePreset: main-content | main-ui | secondary, variant: section)
// ---------------------------------------------------------------------------

export const MdMainContent: Story = {
  args: {
    sizePreset: "main-content",
    variant: "section",
    title: "General Settings",
    description: "Manage your workspace preferences.",
    icon: SvgSettings,
  },
};

export const MdWithTag: Story = {
  args: {
    sizePreset: "main-ui",
    variant: "section",
    title: "Knowledge Graph",
    tag: { title: "Beta", color: "blue" },
  },
};

export const MdMuted: Story = {
  args: {
    sizePreset: "secondary",
    variant: "section",
    title: "Advanced Options",
    description: "Fine-tune model behavior and parameters.",
  },
};

// ---------------------------------------------------------------------------
// SM stories (sizePreset: main-content | main-ui | secondary, variant: body)
// ---------------------------------------------------------------------------

export const SmBody: Story = {
  args: {
    sizePreset: "secondary",
    variant: "body",
    title: "Last synced 2 minutes ago",
  },
};

export const SmStacked: Story = {
  args: {
    sizePreset: "secondary",
    variant: "body",
    title: "Document count",
    orientation: "stacked",
  },
};

// ---------------------------------------------------------------------------
// Editable
// ---------------------------------------------------------------------------

export const Editable: Story = {
  args: {
    sizePreset: "main-ui",
    variant: "section",
    title: "Editable Title",
    editable: true,
  },
};

// ---------------------------------------------------------------------------
// MD — optional prop
// ---------------------------------------------------------------------------

export const MdWithOptional: Story = {
  args: {
    sizePreset: "main-content",
    variant: "section",
    title: "API Key",
    optional: true,
  },
};

// ---------------------------------------------------------------------------
// MD — auxIcon prop
// ---------------------------------------------------------------------------

export const MdWithAuxIcon: Story = {
  args: {
    sizePreset: "main-content",
    variant: "section",
    title: "Connection Status",
    auxIcon: "warning",
  },
};

// ---------------------------------------------------------------------------
// XL — moreIcon1 / moreIcon2 props
// ---------------------------------------------------------------------------

export const XlWithMoreIcons: Story = {
  args: {
    sizePreset: "headline",
    variant: "heading",
    title: "Dashboard",
    moreIcon1: SvgStar,
    moreIcon2: SvgRefreshCw,
  },
};

// ---------------------------------------------------------------------------
// SM — prominence: muted
// ---------------------------------------------------------------------------

export const SmMuted: Story = {
  args: {
    sizePreset: "secondary",
    variant: "body",
    title: "Updated 5 min ago",
    prominence: "muted",
  },
};

// ---------------------------------------------------------------------------
// widthVariant: full
// ---------------------------------------------------------------------------

export const WidthFull: Story = {
  args: {
    sizePreset: "main-content",
    variant: "section",
    title: "Full Width Content",
    widthVariant: "full",
  },
  decorators: [
    (Story) => (
      <div style={{ width: 600, border: "1px dashed gray" }}>
        <Story />
      </div>
    ),
  ],
};


================================================
FILE: web/lib/opal/src/layouts/content/ContentLg.tsx
================================================
"use client";

import { Button } from "@opal/components/buttons/button/components";
import type { ContainerSizeVariants } from "@opal/types";
import SvgEdit from "@opal/icons/edit";
import type { IconFunctionComponent, RichStr } from "@opal/types";
import { Text, type TextFont } from "@opal/components/text/components";
import { toPlainString } from "@opal/components/text/InlineMarkdown";
import { cn } from "@opal/utils";
import { useState } from "react";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type ContentLgSizePreset = "headline" | "section";

interface ContentLgPresetConfig {
  /** Icon width/height (CSS value). */
  iconSize: string;
  /** Tailwind padding class for the icon container. */
  iconContainerPadding: string;
  /** Gap between icon container and content (CSS value). */
  gap: string;
  /** Opal font name for the title (without `font-` prefix). */
  titleFont: TextFont;
  /** Title line-height — also used as icon container min-height (CSS value). */
  lineHeight: string;
  /** Button `size` prop for the edit button. Uses the shared `SizeVariant` scale. */
  editButtonSize: ContainerSizeVariants;
  /** Tailwind padding class for the edit button container. */
  editButtonPadding: string;
}

interface ContentLgProps {
  /** Optional icon component. */
  icon?: IconFunctionComponent;

  /** Main title text. */
  title: string | RichStr;

  /** Optional description below the title. */
  description?: string | RichStr;

  /** Enable inline editing of the title. */
  editable?: boolean;

  /** Called when the user commits an edit. */
  onTitleChange?: (newTitle: string) => void;

  /** Size preset. Default: `"headline"`. */
  sizePreset?: ContentLgSizePreset;

  /** Ref forwarded to the root `<div>`. */
  ref?: React.Ref<HTMLDivElement>;
}

// ---------------------------------------------------------------------------
// Presets
// ---------------------------------------------------------------------------

const CONTENT_LG_PRESETS: Record<ContentLgSizePreset, ContentLgPresetConfig> = {
  headline: {
    iconSize: "2rem",
    iconContainerPadding: "p-0.5",
    gap: "0.25rem",
    titleFont: "heading-h2",
    lineHeight: "2.25rem",
    editButtonSize: "md",
    editButtonPadding: "p-1",
  },
  section: {
    iconSize: "1.25rem",
    iconContainerPadding: "p-1",
    gap: "0rem",
    titleFont: "heading-h3-muted",
    lineHeight: "1.75rem",
    editButtonSize: "sm",
    editButtonPadding: "p-0.5",
  },
};

// ---------------------------------------------------------------------------
// ContentLg
// ---------------------------------------------------------------------------

function ContentLg({
  sizePreset = "headline",
  icon: Icon,
  title,
  description,
  editable,
  onTitleChange,
  ref,
}: ContentLgProps) {
  const [editing, setEditing] = useState(false);
  const [editValue, setEditValue] = useState(toPlainString(title));

  const config = CONTENT_LG_PRESETS[sizePreset];

  function startEditing() {
    setEditValue(toPlainString(title));
    setEditing(true);
  }

  function commit() {
    const value = editValue.trim();
    if (value && value !== toPlainString(title)) onTitleChange?.(value);
    setEditing(false);
  }

  return (
    <div ref={ref} className="opal-content-lg" style={{ gap: config.gap }}>
      {Icon && (
        <div
          className={cn(
            "opal-content-lg-icon-container shrink-0",
            config.iconContainerPadding
          )}
          style={{ minHeight: config.lineHeight }}
        >
          <Icon
            className="opal-content-lg-icon"
            style={{ width: config.iconSize, height: config.iconSize }}
          />
        </div>
      )}

      <div className="opal-content-lg-body">
        <div className="opal-content-lg-title-row">
          {editing ? (
            <div className="opal-content-lg-input-sizer">
              <span
                className={cn(
                  "opal-content-lg-input-mirror",
                  `font-${config.titleFont}`
                )}
              >
                {editValue || "\u00A0"}
              </span>
              <input
                className={cn(
                  "opal-content-lg-input",
                  `font-${config.titleFont}`,
                  "text-text-04"
                )}
                value={editValue}
                onChange={(e) => setEditValue(e.target.value)}
                size={1}
                autoFocus
                onFocus={(e) => e.currentTarget.select()}
                onBlur={commit}
                onKeyDown={(e) => {
                  if (e.key === "Enter") commit();
                  if (e.key === "Escape") {
                    setEditValue(toPlainString(title));
                    setEditing(false);
                  }
                }}
                style={{ height: config.lineHeight }}
              />
            </div>
          ) : (
            <Text
              font={config.titleFont}
              color="inherit"
              maxLines={1}
              title={toPlainString(title)}
              onClick={editable ? startEditing : undefined}
            >
              {title}
            </Text>
          )}

          {editable && !editing && (
            <div
              className={cn(
                "opal-content-lg-edit-button",
                config.editButtonPadding
              )}
            >
              <Button
                icon={SvgEdit}
                prominence="internal"
                size={config.editButtonSize}
                tooltip="Edit"
                tooltipSide="right"
                onClick={startEditing}
              />
            </div>
          )}
        </div>

        {description && toPlainString(description) && (
          <div className="opal-content-lg-description">
            <Text font="secondary-body" color="text-03" as="p">
              {description}
            </Text>
          </div>
        )}
      </div>
    </div>
  );
}

export { ContentLg, type ContentLgProps, type ContentLgSizePreset };


================================================
FILE: web/lib/opal/src/layouts/content/ContentMd.tsx
================================================
"use client";

import { Button } from "@opal/components/buttons/button/components";
import { Tag, type TagProps } from "@opal/components/tag/components";
import type { ContainerSizeVariants } from "@opal/types";
import SvgAlertCircle from "@opal/icons/alert-circle";
import SvgAlertTriangle from "@opal/icons/alert-triangle";
import SvgEdit from "@opal/icons/edit";
import SvgXOctagon from "@opal/icons/x-octagon";
import type { IconFunctionComponent, RichStr } from "@opal/types";
import { Text, type TextFont } from "@opal/components/text/components";
import { toPlainString } from "@opal/components/text/InlineMarkdown";
import { cn } from "@opal/utils";
import { useRef, useState } from "react";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type ContentMdSizePreset = "main-content" | "main-ui" | "secondary";

type ContentMdAuxIcon = "info-gray" | "info-blue" | "warning" | "error";

type ContentMdSuffix = "optional" | (string & {});

interface ContentMdPresetConfig {
  iconSize: string;
  iconContainerPadding: string;
  iconColorClass: string;
  titleFont: TextFont;
  lineHeight: string;
  /** Button `size` prop for the edit button. Uses the shared `SizeVariant` scale. */
  editButtonSize: ContainerSizeVariants;
  editButtonPadding: string;
  optionalFont: TextFont;
  /** Aux icon size = lineHeight − 2 × p-0.5. */
  auxIconSize: string;
  /** Left indent for the description so it aligns with the title (past the icon). */
  descriptionIndent: string;
}

interface ContentMdProps {
  /** Optional icon component. */
  icon?: IconFunctionComponent;

  /** Main title text. */
  title: string | RichStr;

  /** Optional description text below the title. */
  description?: string | RichStr;

  /** Enable inline editing of the title. */
  editable?: boolean;

  /** Called when the user commits an edit. */
  onTitleChange?: (newTitle: string) => void;

  /**
   * Muted suffix rendered beside the title.
   * Use `"optional"` for the standard "(Optional)" label, or pass any string.
   */
  suffix?: ContentMdSuffix;

  /** Auxiliary status icon rendered beside the title. */
  auxIcon?: ContentMdAuxIcon;

  /** Tag rendered beside the title. */
  tag?: TagProps;

  /** Size preset. Default: `"main-ui"`. */
  sizePreset?: ContentMdSizePreset;

  /** Ref forwarded to the root `<div>`. */
  ref?: React.Ref<HTMLDivElement>;
}

// ---------------------------------------------------------------------------
// Presets
// ---------------------------------------------------------------------------

const CONTENT_MD_PRESETS: Record<ContentMdSizePreset, ContentMdPresetConfig> = {
  "main-content": {
    iconSize: "1rem",
    iconContainerPadding: "p-1",
    iconColorClass: "text-text-04",
    titleFont: "main-content-emphasis",
    lineHeight: "1.5rem",
    editButtonSize: "sm",
    editButtonPadding: "p-0",
    optionalFont: "main-content-muted",
    auxIconSize: "1.25rem",
    descriptionIndent: "1.625rem",
  },
  "main-ui": {
    iconSize: "1rem",
    iconContainerPadding: "p-0.5",
    iconColorClass: "text-text-03",
    titleFont: "main-ui-action",
    lineHeight: "1.25rem",
    editButtonSize: "xs",
    editButtonPadding: "p-0",
    optionalFont: "main-ui-muted",
    auxIconSize: "1rem",
    descriptionIndent: "1.375rem",
  },
  secondary: {
    iconSize: "0.75rem",
    iconContainerPadding: "p-0.5",
    iconColorClass: "text-text-04",
    titleFont: "secondary-action",
    lineHeight: "1rem",
    editButtonSize: "2xs",
    editButtonPadding: "p-0",
    optionalFont: "secondary-action",
    auxIconSize: "0.75rem",
    descriptionIndent: "1.125rem",
  },
};

// ---------------------------------------------------------------------------
// ContentMd
// ---------------------------------------------------------------------------

const AUX_ICON_CONFIG: Record<
  ContentMdAuxIcon,
  { icon: IconFunctionComponent; colorClass: string }
> = {
  "info-gray": { icon: SvgAlertCircle, colorClass: "text-text-02" },
  "info-blue": { icon: SvgAlertCircle, colorClass: "text-status-info-05" },
  warning: { icon: SvgAlertTriangle, colorClass: "text-status-warning-05" },
  error: { icon: SvgXOctagon, colorClass: "text-status-error-05" },
};

function ContentMd({
  icon: Icon,
  title,
  description,
  editable,
  onTitleChange,
  suffix,
  auxIcon,
  tag,
  sizePreset = "main-ui",
  ref,
}: ContentMdProps) {
  const [editing, setEditing] = useState(false);
  const [editValue, setEditValue] = useState(toPlainString(title));
  const inputRef = useRef<HTMLInputElement>(null);

  const config = CONTENT_MD_PRESETS[sizePreset];

  function startEditing() {
    setEditValue(toPlainString(title));
    setEditing(true);
  }

  function commit() {
    const value = editValue.trim();
    if (value && value !== toPlainString(title)) onTitleChange?.(value);
    setEditing(false);
  }

  return (
    <div ref={ref} className="opal-content-md">
      <div
        className="opal-content-md-header"
        data-editing={editing || undefined}
      >
        {Icon && (
          <div
            className={cn(
              "opal-content-md-icon-container shrink-0",
              config.iconContainerPadding
            )}
            style={{ minHeight: config.lineHeight }}
          >
            <Icon
              className={cn("opal-content-md-icon", config.iconColorClass)}
              style={{ width: config.iconSize, height: config.iconSize }}
            />
          </div>
        )}

        <div className="opal-content-md-title-row">
          {editing ? (
            <div className="opal-content-md-input-sizer">
              <span
                className={cn(
                  "opal-content-md-input-mirror",
                  `font-${config.titleFont}`
                )}
              >
                {editValue || "\u00A0"}
              </span>
              <input
                ref={inputRef}
                className={cn(
                  "opal-content-md-input",
                  `font-${config.titleFont}`,
                  "text-text-04"
                )}
                value={editValue}
                onChange={(e) => setEditValue(e.target.value)}
                size={1}
                autoFocus
                onFocus={(e) => e.currentTarget.select()}
                onBlur={commit}
                onKeyDown={(e) => {
                  if (e.key === "Enter") commit();
                  if (e.key === "Escape") {
                    setEditValue(toPlainString(title));
                    setEditing(false);
                  }
                }}
                style={{ height: config.lineHeight }}
              />
            </div>
          ) : (
            <Text
              font={config.titleFont}
              color="inherit"
              maxLines={1}
              title={toPlainString(title)}
              onClick={editable ? startEditing : undefined}
            >
              {title}
            </Text>
          )}

          {suffix && (
            <Text font={config.optionalFont} color="text-03">
              {suffix === "optional" ? "(Optional)" : suffix}
            </Text>
          )}

          {auxIcon &&
            (() => {
              const { icon: AuxIcon, colorClass } = AUX_ICON_CONFIG[auxIcon];
              return (
                <div
                  className="opal-content-md-aux-icon shrink-0 p-0.5"
                  style={{ height: config.lineHeight }}
                >
                  <AuxIcon
                    className={colorClass}
                    style={{
                      width: config.auxIconSize,
                      height: config.auxIconSize,
                    }}
                  />
                </div>
              );
            })()}

          {tag && <Tag {...tag} />}

          {editable && !editing && (
            <div
              className={cn(
                "opal-content-md-edit-button",
                config.editButtonPadding
              )}
            >
              <Button
                icon={SvgEdit}
                prominence="internal"
                size={config.editButtonSize}
                tooltip="Edit"
                tooltipSide="right"
                onClick={startEditing}
              />
            </div>
          )}
        </div>
      </div>

      {description && toPlainString(description) && (
        <div
          className="opal-content-md-description"
          style={Icon ? { paddingLeft: config.descriptionIndent } : undefined}
        >
          <Text font="secondary-body" color="text-03" as="p">
            {description}
          </Text>
        </div>
      )}
    </div>
  );
}

export {
  ContentMd,
  type ContentMdProps,
  type ContentMdSizePreset,
  type ContentMdSuffix,
  type ContentMdAuxIcon,
};


================================================
FILE: web/lib/opal/src/layouts/content/ContentSm.tsx
================================================
"use client";

import type { IconFunctionComponent, RichStr } from "@opal/types";
import { Text, type TextFont } from "@opal/components/text/components";
import { toPlainString } from "@opal/components/text/InlineMarkdown";
import { cn } from "@opal/utils";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type ContentSmSizePreset = "main-content" | "main-ui" | "secondary";
type ContentSmOrientation = "vertical" | "inline" | "reverse";
type ContentSmProminence = "default" | "muted";

interface ContentSmPresetConfig {
  /** Icon width/height (CSS value). */
  iconSize: string;
  /** Tailwind padding class for the icon container. */
  iconContainerPadding: string;
  /** Font preset for the title. */
  titleFont: TextFont;
  /** Title line-height — also used as icon container min-height (CSS value). */
  lineHeight: string;
  /** Gap between icon container and title (CSS value). */
  gap: string;
}

/** Props for {@link ContentSm}. Does not support editing or descriptions. */
interface ContentSmProps {
  /** Optional icon component. */
  icon?: IconFunctionComponent;

  /** Main title text (read-only — editing is not supported). */
  title: string | RichStr;

  /** Size preset. Default: `"main-ui"`. */
  sizePreset?: ContentSmSizePreset;

  /** Layout orientation. Default: `"inline"`. */
  orientation?: ContentSmOrientation;

  /** Title prominence. Default: `"default"`. */
  prominence?: ContentSmProminence;

  /** Ref forwarded to the root `<div>`. */
  ref?: React.Ref<HTMLDivElement>;
}

// ---------------------------------------------------------------------------
// Presets
// ---------------------------------------------------------------------------

const CONTENT_SM_PRESETS: Record<ContentSmSizePreset, ContentSmPresetConfig> = {
  "main-content": {
    iconSize: "1rem",
    iconContainerPadding: "p-1",
    titleFont: "main-content-body",
    lineHeight: "1.5rem",
    gap: "0.125rem",
  },
  "main-ui": {
    iconSize: "1rem",
    iconContainerPadding: "p-0.5",
    titleFont: "main-ui-action",
    lineHeight: "1.25rem",
    gap: "0.25rem",
  },
  secondary: {
    iconSize: "0.75rem",
    iconContainerPadding: "p-0.5",
    titleFont: "secondary-action",
    lineHeight: "1rem",
    gap: "0.125rem",
  },
};

// ---------------------------------------------------------------------------
// ContentSm
// ---------------------------------------------------------------------------

function ContentSm({
  icon: Icon,
  title,
  sizePreset = "main-ui",
  orientation = "inline",
  prominence = "default",
  ref,
}: ContentSmProps) {
  const config = CONTENT_SM_PRESETS[sizePreset];

  return (
    <div
      ref={ref}
      className="opal-content-sm"
      data-orientation={orientation}
      data-prominence={prominence}
      style={{ gap: config.gap }}
    >
      {Icon && (
        <div
          className={cn(
            "opal-content-sm-icon-container shrink-0",
            config.iconContainerPadding
          )}
          style={{ minHeight: config.lineHeight }}
        >
          <Icon
            className="opal-content-sm-icon"
            style={{ width: config.iconSize, height: config.iconSize }}
          />
        </div>
      )}

      <Text
        font={config.titleFont}
        color="inherit"
        maxLines={1}
        title={toPlainString(title)}
      >
        {title}
      </Text>
    </div>
  );
}

export {
  ContentSm,
  type ContentSmProps,
  type ContentSmSizePreset,
  type ContentSmOrientation,
  type ContentSmProminence,
};


================================================
FILE: web/lib/opal/src/layouts/content/ContentXl.tsx
================================================
"use client";

import { Button } from "@opal/components/buttons/button/components";
import type { ContainerSizeVariants } from "@opal/types";
import SvgEdit from "@opal/icons/edit";
import type { IconFunctionComponent, RichStr } from "@opal/types";
import { Text, type TextFont } from "@opal/components/text/components";
import { toPlainString } from "@opal/components/text/InlineMarkdown";
import { cn } from "@opal/utils";
import { useState } from "react";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type ContentXlSizePreset = "headline" | "section";

interface ContentXlPresetConfig {
  /** Icon width/height (CSS value). */
  iconSize: string;
  /** Tailwind padding class for the icon container. */
  iconContainerPadding: string;
  /** More-icon-1 width/height (CSS value). */
  moreIcon1Size: string;
  /** Tailwind padding class for the more-icon-1 container. */
  moreIcon1ContainerPadding: string;
  /** More-icon-2 width/height (CSS value). */
  moreIcon2Size: string;
  /** Tailwind padding class for the more-icon-2 container. */
  moreIcon2ContainerPadding: string;
  /** Opal font name for the title (without `font-` prefix). */
  titleFont: TextFont;
  /** Title line-height — also used as icon container min-height (CSS value). */
  lineHeight: string;
  /** Button `size` prop for the edit button. Uses the shared `SizeVariant` scale. */
  editButtonSize: ContainerSizeVariants;
  /** Tailwind padding class for the edit button container. */
  editButtonPadding: string;
}

interface ContentXlProps {
  /** Optional icon component. */
  icon?: IconFunctionComponent;

  /** Main title text. */
  title: string | RichStr;

  /** Optional description below the title. */
  description?: string | RichStr;

  /** Enable inline editing of the title. */
  editable?: boolean;

  /** Called when the user commits an edit. */
  onTitleChange?: (newTitle: string) => void;

  /** Size preset. Default: `"headline"`. */
  sizePreset?: ContentXlSizePreset;

  /** Optional secondary icon rendered in the icon row. */
  moreIcon1?: IconFunctionComponent;

  /** Optional tertiary icon rendered in the icon row. */
  moreIcon2?: IconFunctionComponent;

  /** Ref forwarded to the root `<div>`. */
  ref?: React.Ref<HTMLDivElement>;
}

// ---------------------------------------------------------------------------
// Presets
// ---------------------------------------------------------------------------

const CONTENT_XL_PRESETS: Record<ContentXlSizePreset, ContentXlPresetConfig> = {
  headline: {
    iconSize: "2rem",
    iconContainerPadding: "p-0.5",
    moreIcon1Size: "1rem",
    moreIcon1ContainerPadding: "p-0.5",
    moreIcon2Size: "2rem",
    moreIcon2ContainerPadding: "p-0.5",
    titleFont: "heading-h2",
    lineHeight: "2.25rem",
    editButtonSize: "md",
    editButtonPadding: "p-1",
  },
  section: {
    iconSize: "1.5rem",
    iconContainerPadding: "p-0.5",
    moreIcon1Size: "0.75rem",
    moreIcon1ContainerPadding: "p-0.5",
    moreIcon2Size: "1.5rem",
    moreIcon2ContainerPadding: "p-0.5",
    titleFont: "heading-h3",
    lineHeight: "1.75rem",
    editButtonSize: "sm",
    editButtonPadding: "p-0.5",
  },
};

// ---------------------------------------------------------------------------
// ContentXl
// ---------------------------------------------------------------------------

function ContentXl({
  sizePreset = "headline",
  icon: Icon,
  title,
  description,
  editable,
  onTitleChange,
  moreIcon1: MoreIcon1,
  moreIcon2: MoreIcon2,
  ref,
}: ContentXlProps) {
  const [editing, setEditing] = useState(false);
  const [editValue, setEditValue] = useState(toPlainString(title));

  const config = CONTENT_XL_PRESETS[sizePreset];

  function startEditing() {
    setEditValue(toPlainString(title));
    setEditing(true);
  }

  function commit() {
    const value = editValue.trim();
    if (value && value !== toPlainString(title)) onTitleChange?.(value);
    setEditing(false);
  }

  return (
    <div ref={ref} className="opal-content-xl">
      {(Icon || MoreIcon1 || MoreIcon2) && (
        <div className="opal-content-xl-icon-row">
          {Icon && (
            <div
              className={cn(
                "opal-content-xl-icon-container shrink-0",
                config.iconContainerPadding
              )}
              style={{ minHeight: config.lineHeight }}
            >
              <Icon
                className="opal-content-xl-icon"
                style={{ width: config.iconSize, height: config.iconSize }}
              />
            </div>
          )}

          {MoreIcon1 && (
            <div
              className={cn(
                "opal-content-xl-more-icon-container shrink-0",
                config.moreIcon1ContainerPadding
              )}
            >
              <MoreIcon1
                className="opal-content-xl-icon"
                style={{
                  width: config.moreIcon1Size,
                  height: config.moreIcon1Size,
                }}
              />
            </div>
          )}

          {MoreIcon2 && (
            <div
              className={cn(
                "opal-content-xl-more-icon-container shrink-0",
                config.moreIcon2ContainerPadding
              )}
            >
              <MoreIcon2
                className="opal-content-xl-icon"
                style={{
                  width: config.moreIcon2Size,
                  height: config.moreIcon2Size,
                }}
              />
            </div>
          )}
        </div>
      )}

      <div className="opal-content-xl-body">
        <div className="opal-content-xl-title-row">
          {editing ? (
            <div className="opal-content-xl-input-sizer">
              <span
                className={cn(
                  "opal-content-xl-input-mirror",
                  `font-${config.titleFont}`
                )}
              >
                {editValue || "\u00A0"}
              </span>
              <input
                className={cn(
                  "opal-content-xl-input",
                  `font-${config.titleFont}`,
                  "text-text-04"
                )}
                value={editValue}
                onChange={(e) => setEditValue(e.target.value)}
                size={1}
                autoFocus
                onFocus={(e) => e.currentTarget.select()}
                onBlur={commit}
                onKeyDown={(e) => {
                  if (e.key === "Enter") commit();
                  if (e.key === "Escape") {
                    setEditValue(toPlainString(title));
                    setEditing(false);
                  }
                }}
                style={{ height: config.lineHeight }}
              />
            </div>
          ) : (
            <Text
              font={config.titleFont}
              color="inherit"
              maxLines={1}
              title={toPlainString(title)}
              onClick={editable ? startEditing : undefined}
            >
              {title}
            </Text>
          )}

          {editable && !editing && (
            <div
              className={cn(
                "opal-content-xl-edit-button",
                config.editButtonPadding
              )}
            >
              <Button
                icon={SvgEdit}
                prominence="internal"
                size={config.editButtonSize}
                tooltip="Edit"
                tooltipSide="right"
                onClick={startEditing}
              />
            </div>
          )}
        </div>

        {description && toPlainString(description) && (
          <div className="opal-content-xl-description">
            <Text font="secondary-body" color="text-03" as="p">
              {description}
            </Text>
          </div>
        )}
      </div>
    </div>
  );
}

export { ContentXl, type ContentXlProps, type ContentXlSizePreset };


================================================
FILE: web/lib/opal/src/layouts/content/README.md
================================================
# Content

**Import:** `import { Content, type ContentProps } from "@opal/layouts";`

A two-axis layout component for displaying icon + title + description rows. Routes to an internal layout based on the `sizePreset` and `variant` combination.

## Two-Axis Architecture

### `sizePreset` — controls sizing (icon, padding, gap, font)

#### ContentXl presets (variant="heading")

| Preset | Icon | Icon padding | moreIcon1 | mI1 padding | moreIcon2 | mI2 padding | Title font | Line-height |
|---|---|---|---|---|---|---|---|---|
| `headline` | 2rem (32px) | `p-0.5` (2px) | 1rem (16px) | `p-0.5` (2px) | 2rem (32px) | `p-0.5` (2px) | `font-heading-h2` | 2.25rem (36px) |
| `section` | 1.5rem (24px) | `p-0.5` (2px) | 0.75rem (12px) | `p-0.5` (2px) | 1.5rem (24px) | `p-0.5` (2px) | `font-heading-h3` | 1.75rem (28px) |

#### ContentLg presets (variant="section")

| Preset | Icon | Icon padding | Gap | Title font | Line-height |
|---|---|---|---|---|---|
| `headline` | 2rem (32px) | `p-0.5` (2px) | 0.25rem (4px) | `font-heading-h2` | 2.25rem (36px) |
| `section` | 1.25rem (20px) | `p-1` (4px) | 0rem | `font-heading-h3-muted` | 1.75rem (28px) |

#### ContentMd presets

| Preset | Icon | Icon padding | Icon color | Gap | Title font | Line-height |
|---|---|---|---|---|---|---|
| `main-content` | 1rem (16px) | `p-1` (4px) | `text-04` | 0.125rem (2px) | `font-main-content-emphasis` | 1.5rem (24px) |
| `main-ui` | 1rem (16px) | `p-0.5` (2px) | `text-03` | 0.25rem (4px) | `font-main-ui-action` | 1.25rem (20px) |
| `secondary` | 0.75rem (12px) | `p-0.5` (2px) | `text-04` | 0.125rem (2px) | `font-secondary-action` | 1rem (16px) |

> Icon container height (icon + 2 x padding) always equals the title line-height.

### `variant` — controls structure / layout

| variant | Description |
|---|---|
| `heading` | Icon on **top** (flex-col) — ContentXl |
| `section` | Icon **inline** (flex-row) — ContentLg or ContentMd |
| `body` | Body text layout — ContentSm |

### Valid Combinations -> Internal Routing

| sizePreset | variant | Routes to |
|---|---|---|
| `headline` / `section` | `heading` | **ContentXl** (icon on top) |
| `headline` / `section` | `section` | **ContentLg** (icon inline) |
| `main-content` / `main-ui` / `secondary` | `section` | **ContentMd** |
| `main-content` / `main-ui` / `secondary` | `body` | **ContentSm** |

Invalid combinations (e.g. `sizePreset="headline" + variant="body"`) are excluded at the type level.

## Props

| Prop | Type | Default | Description |
|---|---|---|---|
| `sizePreset` | `SizePreset` | `"headline"` | Size preset (see tables above) |
| `variant` | `ContentVariant` | `"heading"` | Layout variant (see table above) |
| `icon` | `IconFunctionComponent` | — | Optional icon component |
| `title` | `string` | **(required)** | Main title text |
| `description` | `string` | — | Optional description below the title |
| `editable` | `boolean` | `false` | Enable inline editing of the title |
| `onTitleChange` | `(newTitle: string) => void` | — | Called when user commits an edit |
| `moreIcon1` | `IconFunctionComponent` | — | Secondary icon in icon row (ContentXl only) |
| `moreIcon2` | `IconFunctionComponent` | — | Tertiary icon in icon row (ContentXl only) |

## Internal Layouts

### ContentXl

For `headline` / `section` presets with `variant="heading"`. Icon row on top (flex-col), supports `moreIcon1` and `moreIcon2` in the icon row. Description is always `font-secondary-body text-text-03`.

### ContentLg

For `headline` / `section` presets with `variant="section"`. Always inline (flex-row). Description is always `font-secondary-body text-text-03`.

### ContentMd

For `main-content` / `main-ui` / `secondary` presets. Always inline. Both `icon` and `description` are optional. Description is always `font-secondary-body text-text-03`.

## Usage Examples

```tsx
import { Content } from "@opal/layouts";
import SvgSearch from "@opal/icons/search";

// ContentXl — headline, icon on top
<Content
  icon={SvgSearch}
  sizePreset="headline"
  variant="heading"
  title="Agent Settings"
  description="Configure your agent's behavior"
/>

// ContentXl — with more icons
<Content
  icon={SvgSearch}
  sizePreset="headline"
  variant="heading"
  title="Agent Settings"
  moreIcon1={SvgStar}
  moreIcon2={SvgLock}
/>

// ContentLg — section, icon inline
<Content
  icon={SvgSearch}
  sizePreset="section"
  variant="section"
  title="Data Sources"
  description="Connected integrations"
/>

// ContentMd — with icon and description
<Content
  icon={SvgSearch}
  sizePreset="main-ui"
  title="Instructions"
  description="Agent system prompt"
/>

// ContentMd — title only (no icon, no description)
<Content
  sizePreset="main-content"
  title="Featured Agent"
/>

// Editable title
<Content
  icon={SvgSearch}
  sizePreset="headline"
  variant="heading"
  title="My Agent"
  editable
  onTitleChange={(newTitle) => save(newTitle)}
/>
```


================================================
FILE: web/lib/opal/src/layouts/content/components.tsx
================================================
import "@opal/layouts/content/styles.css";
import {
  ContentSm,
  type ContentSmOrientation,
  type ContentSmProminence,
} from "@opal/layouts/content/ContentSm";
import {
  ContentXl,
  type ContentXlProps,
} from "@opal/layouts/content/ContentXl";
import {
  ContentLg,
  type ContentLgProps,
} from "@opal/layouts/content/ContentLg";
import {
  ContentMd,
  type ContentMdProps,
} from "@opal/layouts/content/ContentMd";
import type { TagProps } from "@opal/components/tag/components";
import type { IconFunctionComponent, RichStr } from "@opal/types";
import { widthVariants } from "@opal/shared";
import type { ExtremaSizeVariants } from "@opal/types";

// ---------------------------------------------------------------------------
// Shared types
// ---------------------------------------------------------------------------

type SizePreset =
  | "headline"
  | "section"
  | "main-content"
  | "main-ui"
  | "secondary";

type ContentVariant = "heading" | "section" | "body";

interface ContentBaseProps {
  /** Optional icon component. */
  icon?: IconFunctionComponent;

  /** Main title text. */
  title: string | RichStr;

  /** Optional description below the title. */
  description?: string | RichStr;

  /** Enable inline editing of the title. */
  editable?: boolean;

  /** Called when the user commits an edit. */
  onTitleChange?: (newTitle: string) => void;

  /**
   * Width preset controlling the component's horizontal size.
   * Uses the shared `WidthVariant` scale from `@opal/shared`.
   *
   * - `"auto"` — Shrink-wraps to content width
   * - `"fit"` — Shrink-wraps to content width
   * - `"full"` — Stretches to fill the parent's width
   *
   * @default "fit"
   */
  widthVariant?: ExtremaSizeVariants;

  /** Ref forwarded to the root `<div>` of the resolved layout. */
  ref?: React.Ref<HTMLDivElement>;
}

// ---------------------------------------------------------------------------
// Discriminated union: valid sizePreset × variant combinations
// ---------------------------------------------------------------------------

type XlContentProps = ContentBaseProps & {
  /** Size preset. Default: `"headline"`. */
  sizePreset?: "headline" | "section";
  /** Variant. Default: `"heading"` for heading-eligible presets. */
  variant?: "heading";
  /** Optional secondary icon rendered in the icon row (ContentXl only). */
  moreIcon1?: IconFunctionComponent;
  /** Optional tertiary icon rendered in the icon row (ContentXl only). */
  moreIcon2?: IconFunctionComponent;
};

type LgContentProps = ContentBaseProps & {
  /** Size preset. Default: `"headline"`. */
  sizePreset?: "headline" | "section";
  /** Variant. */
  variant: "section";
};

type MdContentProps = ContentBaseProps & {
  sizePreset: "main-content" | "main-ui" | "secondary";
  variant?: "section";
  /** Muted suffix rendered beside the title. Use `"optional"` for "(Optional)". */
  suffix?: "optional" | (string & {});
  /** Auxiliary status icon rendered beside the title. */
  auxIcon?: "info-gray" | "info-blue" | "warning" | "error";
  /** Tag rendered beside the title. */
  tag?: TagProps;
};

/** ContentSm does not support descriptions or inline editing. */
type SmContentProps = Omit<
  ContentBaseProps,
  "description" | "editable" | "onTitleChange"
> & {
  sizePreset: "main-content" | "main-ui" | "secondary";
  variant: "body";
  /** Layout orientation. Default: `"inline"`. */
  orientation?: ContentSmOrientation;
  /** Title prominence. Default: `"default"`. */
  prominence?: ContentSmProminence;
};

type ContentProps =
  | XlContentProps
  | LgContentProps
  | MdContentProps
  | SmContentProps;

// ---------------------------------------------------------------------------
// Content — routes to the appropriate internal layout
// ---------------------------------------------------------------------------

function Content(props: ContentProps) {
  const {
    sizePreset = "headline",
    variant = "heading",
    widthVariant = "full",
    ref,
    ...rest
  } = props;

  let layout: React.ReactNode = null;

  // ContentXl / ContentLg: headline/section presets
  if (sizePreset === "headline" || sizePreset === "section") {
    if (variant === "heading") {
      layout = (
        <ContentXl
          sizePreset={sizePreset}
          ref={ref}
          {...(rest as Omit<ContentXlProps, "sizePreset">)}
        />
      );
    } else {
      layout = (
        <ContentLg
          sizePreset={sizePreset}
          ref={ref}
          {...(rest as Omit<ContentLgProps, "sizePreset">)}
        />
      );
    }
  }

  // ContentMd: main-content/main-ui/secondary with section/heading variant
  // (variant defaults to "heading" when omitted on MdContentProps, so both arms are needed)
  else if (variant === "section" || variant === "heading") {
    layout = (
      <ContentMd
        sizePreset={sizePreset}
        ref={ref}
        {...(rest as Omit<ContentMdProps, "sizePreset">)}
      />
    );
  }

  // ContentSm: main-content/main-ui/secondary with body variant
  else if (variant === "body") {
    layout = (
      <ContentSm
        sizePreset={sizePreset}
        ref={ref}
        {...(rest as Omit<
          React.ComponentProps<typeof ContentSm>,
          "sizePreset"
        >)}
      />
    );
  }

  // This case should NEVER be hit.
  if (!layout)
    throw new Error(
      `Content: no layout matched for sizePreset="${sizePreset}" variant="${variant}"`
    );

  return <div className={widthVariants[widthVariant]}>{layout}</div>;
}

// ---------------------------------------------------------------------------
// Exports
// ---------------------------------------------------------------------------

export {
  Content,
  type ContentProps,
  type SizePreset,
  type ContentVariant,
  type XlContentProps,
  type LgContentProps,
  type MdContentProps,
  type SmContentProps,
};


================================================
FILE: web/lib/opal/src/layouts/content/styles.css
================================================
/* ===========================================================================
   Content — ContentXl

   Icon row on top (flex-col). Icon row contains main icon + optional
   moreIcon1 / moreIcon2 in a flex-row.

   Sizing (icon size, gap, padding, font, line-height) is driven by the
   sizePreset prop via inline styles + Tailwind classes in the component.
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Layout — flex-col (icon row above body)
   --------------------------------------------------------------------------- */

.opal-content-xl {
  @apply flex flex-col items-start text-text-04;
}

/* ---------------------------------------------------------------------------
   Icon row — flex-row containing main icon + more icons
   --------------------------------------------------------------------------- */

.opal-content-xl-icon-row {
  @apply flex flex-row items-center;
  gap: 0.25rem;
}

/* ---------------------------------------------------------------------------
   Icons
   --------------------------------------------------------------------------- */

.opal-content-xl-icon-container {
  display: flex;
  align-items: center;
  justify-content: center;
}

.opal-content-xl-more-icon-container {
  display: flex;
  align-items: center;
  justify-content: center;
}

.opal-content-xl-icon {
  color: var(--text-04);
}

/* ---------------------------------------------------------------------------
   Body column
   --------------------------------------------------------------------------- */

.opal-content-xl-body {
  @apply flex flex-1 flex-col items-start;
  min-width: 0.0625rem;
}

/* ---------------------------------------------------------------------------
   Title row — title (or input) + edit button
   --------------------------------------------------------------------------- */

.opal-content-xl-title-row {
  @apply flex items-center w-full;
  gap: 0.25rem;
}

.opal-content-xl-input-sizer {
  display: inline-grid;
  align-items: stretch;
}

.opal-content-xl-input-sizer > * {
  grid-area: 1 / 1;
  padding: 0 0.125rem;
  min-width: 0.0625rem;
}

.opal-content-xl-input-mirror {
  visibility: hidden;
  white-space: pre;
}

.opal-content-xl-input {
  @apply bg-transparent outline-none border-none;
}

/* ---------------------------------------------------------------------------
   Edit button — visible only on hover of the outer container
   --------------------------------------------------------------------------- */

.opal-content-xl-edit-button {
  @apply opacity-0 transition-opacity shrink-0;
}

.opal-content-xl:hover .opal-content-xl-edit-button {
  @apply opacity-100;
}

/* ---------------------------------------------------------------------------
   Description
   --------------------------------------------------------------------------- */

.opal-content-xl-description {
  @apply text-left w-full;
}

/* ===========================================================================
   Content — ContentLg

   Always inline (flex-row) — icon beside content.

   Sizing (icon size, gap, padding, font, line-height) is driven by the
   sizePreset prop via inline styles + Tailwind classes in the component.
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Layout
   --------------------------------------------------------------------------- */

.opal-content-lg {
  @apply flex flex-row items-start text-text-04;
}

/* ---------------------------------------------------------------------------
   Icon
   --------------------------------------------------------------------------- */

.opal-content-lg-icon-container {
  display: flex;
  align-items: center;
  justify-content: center;
}

.opal-content-lg-icon {
  color: var(--text-04);
}

/* ---------------------------------------------------------------------------
   Body column
   --------------------------------------------------------------------------- */

.opal-content-lg-body {
  @apply flex flex-1 flex-col items-start;
  min-width: 0.0625rem;
}

/* ---------------------------------------------------------------------------
   Title row — title (or input) + edit button
   --------------------------------------------------------------------------- */

.opal-content-lg-title-row {
  @apply flex items-center w-full;
  gap: 0.25rem;
}

.opal-content-lg-input-sizer {
  display: inline-grid;
  align-items: stretch;
}

.opal-content-lg-input-sizer > * {
  grid-area: 1 / 1;
  padding: 0 0.125rem;
  min-width: 0.0625rem;
}

.opal-content-lg-input-mirror {
  visibility: hidden;
  white-space: pre;
}

.opal-content-lg-input {
  @apply bg-transparent outline-none border-none;
}

/* ---------------------------------------------------------------------------
   Edit button — visible only on hover of the outer container
   --------------------------------------------------------------------------- */

.opal-content-lg-edit-button {
  @apply opacity-0 transition-opacity shrink-0;
}

.opal-content-lg:hover .opal-content-lg-edit-button {
  @apply opacity-100;
}

/* ---------------------------------------------------------------------------
   Description
   --------------------------------------------------------------------------- */

.opal-content-lg-description {
  @apply text-left w-full;
}

/* ===========================================================================
   Content — ContentMd

   Always inline (flex-row). Icon color varies per sizePreset and is applied
   via Tailwind class from the component.
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Layout
   --------------------------------------------------------------------------- */

.opal-content-md {
  @apply flex flex-col items-start text-text-04;
}

.opal-content-md-header {
  @apply flex flex-row items-center w-full;
}

.opal-content-md-header[data-editing] {
  @apply rounded-08;
  box-shadow: inset 0 0 0 1px var(--border-02);
}

/* ---------------------------------------------------------------------------
   Icon
   --------------------------------------------------------------------------- */

.opal-content-md-icon-container {
  display: flex;
  align-items: center;
  justify-content: center;
}

/* ---------------------------------------------------------------------------
   Title row — title (or input) + edit button
   --------------------------------------------------------------------------- */

.opal-content-md-title-row {
  @apply flex items-center w-full;
  gap: 0.25rem;
}

.opal-content-md-input-sizer {
  display: inline-grid;
  align-items: stretch;
  width: 100%;
}

.opal-content-md-input-sizer > * {
  grid-area: 1 / 1;
  padding: 0 0.125rem;
  min-width: 0.0625rem;
}

.opal-content-md-input-mirror {
  visibility: hidden;
  white-space: pre;
}

.opal-content-md-input {
  @apply bg-transparent outline-none border-none;
}

/* ---------------------------------------------------------------------------
   Aux icon
   --------------------------------------------------------------------------- */

.opal-content-md-aux-icon {
  display: flex;
  align-items: center;
  justify-content: center;
}

/* ---------------------------------------------------------------------------
   Edit button — visible only on hover of the outer container
   --------------------------------------------------------------------------- */

.opal-content-md-edit-button {
  @apply opacity-0 transition-opacity shrink-0;
}

.opal-content-md:hover .opal-content-md-edit-button {
  @apply opacity-100;
}

/* ---------------------------------------------------------------------------
   Description
   --------------------------------------------------------------------------- */

.opal-content-md-description {
  @apply text-left w-full;
}

/* ===========================================================================
   Content — ContentSm

   Three orientation modes (driven by orientation prop):
     inline  : flex-row         — icon left, title right
     vertical: flex-col         — icon top, title below
     reverse : flex-row-reverse — title left, icon right

   Icon color is always text-03. Title color varies by prominence
   (text-04 default, text-03 muted) via data-prominence.
   =========================================================================== */

/* ---------------------------------------------------------------------------
   Layout — orientation
   --------------------------------------------------------------------------- */

.opal-content-sm {
  /* since `ContentSm` doesn't have a description, it's possible to center-align the icon and text */
  @apply flex items-center text-text-04;
}

.opal-content-sm[data-orientation="inline"] {
  @apply flex-row;
}

.opal-content-sm[data-orientation="vertical"] {
  @apply flex-col;
}

.opal-content-sm[data-orientation="reverse"] {
  @apply flex-row-reverse;
}

/* ---------------------------------------------------------------------------
   Icon
   --------------------------------------------------------------------------- */

.opal-content-sm-icon-container {
  display: flex;
  align-items: center;
  justify-content: center;
}

.opal-content-sm-icon {
  @apply text-text-03;
}

.opal-content-sm[data-prominence="muted"] .opal-content-sm-icon {
  @apply text-text-02;
}

/* ---------------------------------------------------------------------------
   Title
   --------------------------------------------------------------------------- */

.opal-content-sm[data-prominence="muted"] {
  @apply text-text-03;
}

/* ===========================================================================
   Interactive override

   When a Content variant is nested inside an `.interactive` element,
   the title inherits color from the Interactive's `--interactive-foreground`
   and icons switch to `--interactive-foreground-icon`. This is automatic —
   no opt-in prop is required.
   =========================================================================== */

.interactive .opal-content-xl {
  color: inherit;
}

.interactive .opal-content-xl .opal-content-xl-icon {
  color: var(--interactive-foreground-icon);
}

.interactive .opal-content-lg {
  color: inherit;
}

.interactive .opal-content-lg .opal-content-lg-icon {
  color: var(--interactive-foreground-icon);
}

.interactive .opal-content-md {
  color: inherit;
}

.interactive .opal-content-md .opal-content-md-icon {
  color: var(--interactive-foreground-icon);
}

.interactive .opal-content-sm {
  color: inherit;
}

.interactive .opal-content-sm .opal-content-sm-icon {
  color: var(--interactive-foreground-icon);
}


================================================
FILE: web/lib/opal/src/layouts/content-action/ContentAction.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { ContentAction } from "@opal/layouts";
import { Button } from "@opal/components";
import { SvgSettings } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import type { Decorator } from "@storybook/react";

const withTooltipProvider: Decorator = (Story) => (
  <TooltipPrimitive.Provider>
    <Story />
  </TooltipPrimitive.Provider>
);

const meta = {
  title: "Layouts/ContentAction",
  component: ContentAction,
  tags: ["autodocs"],
  decorators: [withTooltipProvider],
  parameters: {
    layout: "centered",
  },
} satisfies Meta<typeof ContentAction>;

export default meta;

type Story = StoryObj<typeof meta>;

// ---------------------------------------------------------------------------
// Stories
// ---------------------------------------------------------------------------

export const Default: Story = {
  args: {
    sizePreset: "main-content",
    variant: "section",
    title: "OpenAI",
    description: "GPT-4o language model provider.",
    icon: SvgSettings,
    rightChildren: <Button prominence="tertiary">Edit</Button>,
  },
};

export const MultipleActions: Story = {
  args: {
    sizePreset: "main-content",
    variant: "section",
    title: "Connector",
    description: "Manage your data source connector.",
    rightChildren: (
      <div className="flex items-center gap-2">
        <Button prominence="tertiary" icon={SvgSettings} />
        <Button variant="danger" prominence="primary">
          Delete
        </Button>
      </div>
    ),
  },
};

export const NoPadding: Story = {
  args: {
    sizePreset: "main-content",
    variant: "section",
    title: "Compact Row",
    description: "No padding around content area.",
    paddingVariant: "fit",
    rightChildren: <Button prominence="tertiary">Action</Button>,
  },
};


================================================
FILE: web/lib/opal/src/layouts/content-action/README.md
================================================
# ContentAction

**Import:** `import { ContentAction, type ContentActionProps } from "@opal/layouts";`

A row layout that pairs a [`Content`](../content/README.md) block with optional right-side action children (buttons, badges, icons, etc.).

## Why ContentAction?

`Content` renders icon + title + description but has no slot for actions. When you need a settings row, card header, or list item with an action on the right you would typically wrap `Content` in a manual flex-row. `ContentAction` standardises that pattern and adds padding alignment with `Interactive.Container` and `Button` via the shared `SizeVariant` scale.

## Props

Inherits **all** props from [`Content`](../content/README.md) (same discriminated-union API) plus:

| Prop | Type | Default | Description |
|---|---|---|---|
| `rightChildren` | `ReactNode` | `undefined` | Content rendered on the right side. Wrapper stretches to the full height of the row. |
| `paddingVariant` | `SizeVariant` | `"lg"` | Padding preset applied around the `Content` area. Uses the shared size scale from `@opal/shared`. |

### `paddingVariant` reference

| Value | Padding class | Effective padding |
|---|---|---|
| `lg` | `p-2` | 0.5rem (8px) |
| `md` | `p-1` | 0.25rem (4px) |
| `sm` | `p-1` | 0.25rem (4px) |
| `xs` | `p-0.5` | 0.125rem (2px) |
| `2xs` | `p-0.5` | 0.125rem (2px) |
| `fit` | `p-0` | 0 |

These values are identical to the padding applied by `Interactive.Container` at each size, so `ContentAction` labels naturally align with adjacent buttons of the same size.

## Layout Structure

```
[  Content (flex-1, padded)  ][  rightChildren (shrink-0, full height)  ]
```

- The outer wrapper is `flex flex-row items-stretch w-full`.
- `Content` sits inside a `flex-1 min-w-0` div with padding from `paddingVariant`.
- `rightChildren` is wrapped in `flex items-stretch shrink-0` so it stretches vertically.

## Usage Examples

### Settings row with an edit button

```tsx
import { ContentAction } from "@opal/layouts";
import { Button } from "@opal/components";
import SvgSettings from "@opal/icons/settings";

<ContentAction
  icon={SvgSettings}
  title="OpenAI"
  description="GPT"
  sizePreset="main-content"
  variant="section"
  tag={{ title: "Default", color: "blue" }}
  paddingVariant="lg"
  rightChildren={
    <Button icon={SvgSettings} prominence="tertiary" onClick={handleEdit} />
  }
/>
```

### Card header with connect action

```tsx
import { ContentAction } from "@opal/layouts";
import { Button } from "@opal/components";
import { SvgArrowExchange, SvgCloud } from "@opal/icons";

<ContentAction
  icon={SvgCloud}
  title="Google Cloud Vertex AI"
  description="Gemini"
  sizePreset="main-content"
  variant="section"
  paddingVariant="md"
  rightChildren={
    <Button rightIcon={SvgArrowExchange} prominence="tertiary">
      Connect
    </Button>
  }
/>
```

### No right children (padding-only wrapper)

```tsx
<ContentAction
  title="Section Header"
  sizePreset="main-content"
  variant="section"
  paddingVariant="lg"
/>
```

When `rightChildren` is omitted the component renders only the padded `Content` — useful for alignment consistency when some rows have actions and others don't.


================================================
FILE: web/lib/opal/src/layouts/content-action/components.tsx
================================================
import { Content, type ContentProps } from "@opal/layouts/content/components";
import {
  containerSizeVariants,
  type ContainerSizeVariants,
} from "@opal/shared";
import { cn } from "@opal/utils";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

type ContentActionProps = ContentProps & {
  /** Content rendered on the right side, stretched to full height. */
  rightChildren?: React.ReactNode;

  /**
   * Padding applied around the `Content` area.
   * Uses the shared `SizeVariant` scale from `@opal/shared`.
   *
   * @default "lg"
   * @see {@link ContainerSizeVariants} for the full list of presets.
   */
  paddingVariant?: ContainerSizeVariants;
};

// ---------------------------------------------------------------------------
// ContentAction
// ---------------------------------------------------------------------------

/**
 * A row layout that pairs a {@link Content} block with optional right-side
 * action children (e.g. buttons, badges).
 *
 * The `Content` area receives padding controlled by `paddingVariant`, using
 * the same size scale as `Interactive.Container` and `Button`. The
 * `rightChildren` wrapper stretches to the full height of the row.
 *
 * @example
 * ```tsx
 * import { ContentAction } from "@opal/layouts";
 * import { Button } from "@opal/components";
 * import SvgSettings from "@opal/icons/settings";
 *
 * <ContentAction
 *   icon={SvgSettings}
 *   title="OpenAI"
 *   description="GPT"
 *   sizePreset="main-content"
 *   variant="section"
 *   paddingVariant="lg"
 *   rightChildren={<Button icon={SvgSettings} prominence="tertiary" />}
 * />
 * ```
 */
function ContentAction({
  rightChildren,
  paddingVariant = "lg",
  ...contentProps
}: ContentActionProps) {
  const { padding } = containerSizeVariants[paddingVariant];

  return (
    <div className="flex flex-row items-stretch w-full">
      <div className={cn("flex-1 min-w-0 self-center", padding)}>
        <Content {...contentProps} />
      </div>
      {rightChildren && (
        <div className="flex items-stretch shrink-0">{rightChildren}</div>
      )}
    </div>
  );
}

// ---------------------------------------------------------------------------
// Exports
// ---------------------------------------------------------------------------

export { ContentAction, type ContentActionProps };


================================================
FILE: web/lib/opal/src/layouts/illustration-content/IllustrationContent.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { IllustrationContent } from "@opal/layouts";
import { SvgEmpty } from "@opal/illustrations";

const meta = {
  title: "Layouts/IllustrationContent",
  component: IllustrationContent,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
} satisfies Meta<typeof IllustrationContent>;

export default meta;

type Story = StoryObj<typeof meta>;

// ---------------------------------------------------------------------------
// Stories
// ---------------------------------------------------------------------------

export const Default: Story = {
  args: {
    illustration: SvgEmpty,
    title: "No results found",
    description: "Try adjusting your search or filters to find what you need.",
  },
};

export const TitleOnly: Story = {
  args: {
    title: "Nothing here yet",
  },
};

export const NoIllustration: Story = {
  args: {
    title: "No documents available",
    description:
      "Connect a data source to start indexing documents into your workspace.",
  },
};


================================================
FILE: web/lib/opal/src/layouts/illustration-content/README.md
================================================
# IllustrationContent

**Import:** `import { IllustrationContent, type IllustrationContentProps } from "@opal/layouts";`

A vertically-stacked, center-aligned layout for empty states, error pages, and informational placeholders. Pairs a large illustration with a title and optional description.

## Why IllustrationContent?

Empty states and placeholder screens share a recurring pattern: a large illustration centered above a title and description. `IllustrationContent` standardises that pattern so every empty state looks consistent without hand-rolling flex containers and spacing each time.

## Layout Structure

```
┌─────────────────────────────────┐
│          (1.25rem pad)          │
│     ┌───────────────────┐       │
│     │   illustration    │       │
│     │   7.5rem × 7.5rem │       │
│     └───────────────────┘       │
│         (0.75rem gap)           │
│          title (center)         │
│         (0.75rem gap)           │
│      description (center)       │
│          (1.25rem pad)          │
└─────────────────────────────────┘
```

- Outer container: `flex flex-col items-center gap-3 p-5 text-center`.
- Illustration: `w-[7.5rem] h-[7.5rem]` (120px), no extra padding.
- Title: `<p>` with `font-main-content-emphasis text-text-04`.
- Description: `<p>` with `font-secondary-body text-text-03`.

## Props

| Prop | Type | Default | Description |
|---|---|---|---|
| `illustration` | `IconFunctionComponent` | — | Optional illustration component rendered at 7.5rem × 7.5rem, centered. Works with any `@opal/illustrations` SVG. |
| `title` | `string` | **(required)** | Main title text, center-aligned. |
| `description` | `string` | — | Optional description below the title, center-aligned. |

## Usage Examples

### Empty search results

```tsx
import { IllustrationContent } from "@opal/layouts";
import SvgNoResult from "@opal/illustrations/no-result";

<IllustrationContent
  illustration={SvgNoResult}
  title="No results found"
  description="Try adjusting your search or filters."
/>
```

### Not found page

```tsx
import { IllustrationContent } from "@opal/layouts";
import SvgNotFound from "@opal/illustrations/not-found";

<IllustrationContent
  illustration={SvgNotFound}
  title="Page not found"
  description="The page you're looking for doesn't exist or has been moved."
/>
```

### Title only (no illustration, no description)

```tsx
import { IllustrationContent } from "@opal/layouts";

<IllustrationContent title="Nothing here yet" />
```

### Empty state with illustration and title (no description)

```tsx
import { IllustrationContent } from "@opal/layouts";
import SvgEmpty from "@opal/illustrations/empty";

<IllustrationContent
  illustration={SvgEmpty}
  title="No items"
/>
```


================================================
FILE: web/lib/opal/src/layouts/illustration-content/components.tsx
================================================
import type { IconFunctionComponent, RichStr } from "@opal/types";
import { Text } from "@opal/components";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface IllustrationContentProps {
  /** Optional illustration rendered at 7.5rem × 7.5rem (120px), centered. */
  illustration?: IconFunctionComponent;

  /** Main title text, center-aligned. Uses `font-main-content-emphasis`. */
  title: string | RichStr;

  /** Optional description below the title, center-aligned. Uses `font-secondary-body`. */
  description?: string | RichStr;
}

// ---------------------------------------------------------------------------
// IllustrationContent
// ---------------------------------------------------------------------------

/**
 * A vertically-stacked, center-aligned layout for empty states, error pages,
 * and informational placeholders.
 *
 * Renders an optional illustration on top, followed by a title and an optional
 * description — all center-aligned with consistent spacing.
 *
 * **Layout structure:**
 *
 * ```
 * ┌─────────────────────────────────┐
 * │          (1.25rem pad)          │
 * │     ┌───────────────────┐       │
 * │     │   illustration    │       │
 * │     │   7.5rem × 7.5rem │       │
 * │     └───────────────────┘       │
 * │         (0.75rem gap)           │
 * │          title (center)         │
 * │      description (center)       │
 * │          (1.25rem pad)          │
 * └─────────────────────────────────┘
 * ```
 *
 * @example
 * ```tsx
 * import { IllustrationContent } from "@opal/layouts";
 * import SvgNoResult from "@opal/illustrations/no-result";
 *
 * <IllustrationContent
 *   illustration={SvgNoResult}
 *   title="No results found"
 *   description="Try adjusting your search or filters."
 * />
 * ```
 */
function IllustrationContent({
  illustration: Illustration,
  title,
  description,
}: IllustrationContentProps) {
  return (
    <div className="flex flex-col items-center gap-3 p-5 text-center">
      {Illustration && (
        <Illustration
          aria-hidden="true"
          className="shrink-0 w-[7.5rem] h-[7.5rem]"
        />
      )}
      <div className="flex flex-col items-center text-center">
        <Text font="main-content-emphasis" color="text-04" as="p">
          {title}
        </Text>
        {description && (
          <Text font="secondary-body" color="text-03" as="p">
            {description}
          </Text>
        )}
      </div>
    </div>
  );
}

// ---------------------------------------------------------------------------
// Exports
// ---------------------------------------------------------------------------

export { IllustrationContent, type IllustrationContentProps };


================================================
FILE: web/lib/opal/src/layouts/index.ts
================================================
/* Content */
export {
  Content,
  type ContentProps,
  type SizePreset,
  type ContentVariant,
} from "@opal/layouts/content/components";

/* ContentAction */
export {
  ContentAction,
  type ContentActionProps,
} from "@opal/layouts/content-action/components";

/* CardHeaderLayout */
export {
  CardHeaderLayout,
  type CardHeaderLayoutProps,
} from "@opal/layouts/cards/header-layout/components";

/* IllustrationContent */
export {
  IllustrationContent,
  type IllustrationContentProps,
} from "@opal/layouts/illustration-content/components";


================================================
FILE: web/lib/opal/src/shared.ts
================================================
/**
 * @opal/shared — Shared constants and types for the opal design system.
 *
 * This module holds design tokens that are referenced by multiple opal
 * packages (core, components, layouts). Centralising them here avoids
 * circular imports and gives every consumer a single source of truth.
 */

import type {
  SizeVariants,
  OverridableExtremaSizeVariants,
  ContainerSizeVariants,
  ExtremaSizeVariants,
  PaddingVariants,
  RoundingVariants,
} from "@opal/types";

/**
 * Size-variant scale.
 *
 * Each entry maps a named preset to Tailwind utility classes for
 * `height`, `min-width`, and `padding`.
 *
 * | Key   | Height        | Padding  |
 * |-------|---------------|----------|
 * | `lg`  | 2.25rem (36px)| `p-2`   |
 * | `md`  | 1.75rem (28px)| `p-1`   |
 * | `sm`  | 1.5rem (24px) | `p-1`   |
 * | `xs`  | 1.25rem (20px)| `p-0.5` |
 * | `2xs` | 1rem (16px)   | `p-0.5` |
 * | `fit` | h-fit         | `p-0`   |
 */
type ContainerProperties = {
  height: string;
  minWidth: string;
  padding: string;
};
const containerSizeVariants: Record<
  ContainerSizeVariants,
  ContainerProperties
> = {
  fit: { height: "h-fit", minWidth: "", padding: "p-0" },
  lg: { height: "h-[2.25rem]", minWidth: "min-w-[2.25rem]", padding: "p-2" },
  md: { height: "h-[1.75rem]", minWidth: "min-w-[1.75rem]", padding: "p-1" },
  sm: { height: "h-[1.5rem]", minWidth: "min-w-[1.5rem]", padding: "p-1" },
  xs: {
    height: "h-[1.25rem]",
    minWidth: "min-w-[1.25rem]",
    padding: "p-0.5",
  },
  "2xs": { height: "h-[1rem]", minWidth: "min-w-[1rem]", padding: "p-0.5" },
} as const;

// ---------------------------------------------------------------------------
// Width/Height Variants
//
// A named scale of width/height presets that map to Tailwind width/height utility classes.
//
// Consumers (for width):
//   - Interactive.Container  (widthVariant)
//   - Button                 (width)
//   - Content                (widthVariant)
// ---------------------------------------------------------------------------

/**
 * Width-variant scale.
 *
 * | Key    | Tailwind class |
 * |--------|----------------|
 * | `auto` | `w-auto`       |
 * | `fit`  | `w-fit`        |
 * | `full` | `w-full`       |
 */
const widthVariants: Record<ExtremaSizeVariants, string> = {
  fit: "w-fit",
  full: "w-full",
} as const;

/**
 * Height-variant scale.
 *
 * | Key    | Tailwind class |
 * |--------|----------------|
 * | `auto` | `h-auto`       |
 * | `fit`  | `h-fit`        |
 * | `full` | `h-full`       |
 */
const heightVariants: Record<ExtremaSizeVariants, string> = {
  fit: "h-fit",
  full: "h-full",
} as const;

// ---------------------------------------------------------------------------
// Card Variants
//
// Shared padding and rounding scales for card components (Card, SelectCard).
//
// Consumers:
//   - Card          (paddingVariant, roundingVariant)
//   - SelectCard    (paddingVariant, roundingVariant)
// ---------------------------------------------------------------------------

const cardPaddingVariants: Record<PaddingVariants, string> = {
  lg: "p-6",
  md: "p-4",
  sm: "p-2",
  xs: "p-1",
  "2xs": "p-0.5",
  fit: "p-0",
};

const cardRoundingVariants: Record<RoundingVariants, string> = {
  lg: "rounded-16",
  md: "rounded-12",
  sm: "rounded-08",
  xs: "rounded-04",
};

export {
  type ExtremaSizeVariants,
  type ContainerSizeVariants,
  type OverridableExtremaSizeVariants,
  type SizeVariants,
  containerSizeVariants,
  cardPaddingVariants,
  cardRoundingVariants,
  widthVariants,
  heightVariants,
};


================================================
FILE: web/lib/opal/src/types.ts
================================================
import type { SVGProps } from "react";

// ---------------------------------------------------------------------------
// Size Variants
//
// A named scale of size presets (lg → 2xs, plus fit) that map to Tailwind
// utility classes for height, min-width, and padding.
//
// Consumers:
//   - Interactive.Container  (height + min-width + padding)
//   - Button                 (icon sizing)
//   - ContentAction          (padding only)
//   - Content (ContentXl / ContentLg / ContentMd)  (edit-button size)
// ---------------------------------------------------------------------------

// Base Size Types:

/**
 * Full range of size variants.
 *
 * This is the complete scale of size presets available in the design system.
 * Components needing the full range use this type directly.
 */
export type SizeVariants = "fit" | "full" | "lg" | "md" | "sm" | "xs" | "2xs";

// Convenience Size Types:
//
// NOTE (@raunakab + @nmgarza5)
// There are many components throughout the library that need to "extract" very specific sizings from the full gamut that is available.
// For those components, we've extracted these below "convenience" types.

/**
 * Size variants for container components (excludes "full").
 *
 * Used by components that control height, min-width, and padding.
 * Excludes "full" since containers need a fixed height preset.
 */
export type ContainerSizeVariants = Exclude<SizeVariants, "full">;

/**
 * Padding size variants.
 *
 * | Variant | Class   |
 * |---------|---------|
 * | `lg`    | `p-6`   |
 * | `md`    | `p-4`   |
 * | `sm`    | `p-2`   |
 * | `xs`    | `p-1`   |
 * | `2xs`   | `p-0.5` |
 * | `fit`   | `p-0`   |
 */
export type PaddingVariants = Extract<
  SizeVariants,
  "fit" | "lg" | "md" | "sm" | "xs" | "2xs"
>;

/**
 * Rounding size variants.
 *
 * | Variant | Class        |
 * |---------|--------------|
 * | `lg`    | `rounded-16` |
 * | `md`    | `rounded-12` |
 * | `sm`    | `rounded-08` |
 * | `xs`    | `rounded-04` |
 */
export type RoundingVariants = Extract<SizeVariants, "lg" | "md" | "sm" | "xs">;

/**
 * Extreme size variants ("fit" and "full" only).
 *
 * Used for width and height properties that only support extremal values.
 */
export type ExtremaSizeVariants = Extract<SizeVariants, "fit" | "full">;

/**
 * Size variants with numeric overrides.
 *
 * Allows size specification as a named preset or a custom numeric value.
 * Used in components that need programmatic sizing flexibility.
 */
export type OverridableExtremaSizeVariants = ExtremaSizeVariants | number;

// ---------------------------------------------------------------------------
// Icon Props
// ---------------------------------------------------------------------------

/**
 * Base props for SVG icon components.
 *
 * Extends standard SVG element attributes with convenience props used across
 * the design system. All generated icon components (in `@opal/icons`) accept
 * this interface, ensuring a consistent API for sizing, coloring, and labeling.
 *
 * @example
 * ```tsx
 * import type { IconProps } from "@opal/types";
 *
 * function MyIcon({ size = 16, className, ...props }: IconProps) {
 *   return (
 *     <svg width={size} height={size} className={className} {...props}>
 *       ...
 *     </svg>
 *   );
 * }
 * ```
 */
export interface IconProps extends SVGProps<SVGSVGElement> {
  className?: string;
  size?: number;
  title?: string;
  color?: string;
}

/** Strips `className` and `style` from a props type to enforce design-system styling. */
export type WithoutStyles<T> = Omit<T, "className" | "style">;

// ---------------------------------------------------------------------------
// Rich Strings
// ---------------------------------------------------------------------------

/**
 * A branded string wrapper that signals inline markdown should be parsed.
 *
 * Created via the `markdown()` function. Components that accept `string | RichStr`
 * will parse the inner `raw` string as inline markdown when a `RichStr` is passed,
 * and render plain text when a regular `string` is passed.
 *
 * This avoids "API coloring" — components don't need a `markdown` boolean prop,
 * and intermediate wrappers don't need to thread it through. The decision to
 * use markdown lives at the call site via `markdown("*bold* text")`.
 */
export interface RichStr {
  readonly __brand: "RichStr";
  readonly raw: string;
}

/**
 * HTML button `type` attribute values.
 *
 * Used by interactive primitives and button-like components to indicate that
 * the element is inherently interactive for cursor-styling purposes, even
 * without an explicit `onClick` or `href`.
 */
export type ButtonType = "submit" | "button" | "reset";

/** Like `Omit` but distributes over union types, preserving discriminated unions. */
export type DistributiveOmit<T, K extends keyof any> = T extends any
  ? Omit<T, K>
  : never;

/**
 * A React function component that accepts {@link IconProps}.
 *
 * Use this type when a component prop expects an icon — it ensures the icon
 * supports `className`, `size`, `title`, and `color` without callers needing
 * to import `IconProps` directly.
 *
 * @example
 * ```tsx
 * import type { IconFunctionComponent } from "@opal/types";
 *
 * interface ButtonProps {
 *   icon?: IconFunctionComponent;
 * }
 * ```
 */
export type IconFunctionComponent = React.FunctionComponent<IconProps>;


================================================
FILE: web/lib/opal/src/utils.ts
================================================
import { clsx, type ClassValue } from "clsx";
import { twMerge } from "tailwind-merge";
import type { RichStr } from "@opal/types";

export function cn(...inputs: ClassValue[]) {
  return twMerge(clsx(inputs));
}

/**
 * Wraps strings for inline markdown parsing by `Text` and other Opal components.
 *
 * Multiple arguments are joined with newlines, so each string renders on its own line:
 * ```tsx
 * markdown("Line one", "Line two", "Line three")
 * ```
 */
export function markdown(...lines: string[]): RichStr {
  return { __brand: "RichStr", raw: lines.join("\n") };
}


================================================
FILE: web/lib/opal/tsconfig.json
================================================
{
  "extends": "../../tsconfig.json",
  "compilerOptions": {
    "paths": {
      "@opal/*": ["./src/*"],
      // TODO (@raunakab): Remove this once the table component migration is
      // complete. The table internals still import app-layer modules (e.g.
      // @/refresh-components/texts/Text, @/refresh-components/Popover) via the
      // @/ alias. Without this entry the IDE cannot resolve those paths since
      // opal's tsconfig only defines @opal/*. Once all @/ deps are replaced
      // with opal-internal equivalents, this line should be deleted.
      "@/*": ["../../src/*"]
    }
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules"]
}


================================================
FILE: web/next.config.js
================================================
// Always require withSentryConfig
const { withSentryConfig } = require("@sentry/nextjs");

const cspHeader = `
    style-src 'self' 'unsafe-inline' https://fonts.googleapis.com;
    font-src 'self' https://fonts.gstatic.com;
    object-src 'none';
    base-uri 'self';
    form-action 'self';
    ${
      process.env.NEXT_PUBLIC_CLOUD_ENABLED === "true" &&
      process.env.NODE_ENV !== "development"
        ? "upgrade-insecure-requests;"
        : ""
    }
`;

/** @type {import('next').NextConfig} */
const nextConfig = {
  productionBrowserSourceMaps: false,
  output: "standalone",
  transpilePackages: ["@onyx/opal"],
  typedRoutes: true,
  reactCompiler: true,
  images: {
    // Used to fetch favicons
    remotePatterns: [
      {
        protocol: "https",
        hostname: "www.google.com",
        port: "",
        pathname: "/s2/favicons/**",
      },
    ],
    unoptimized: true, // Disable image optimization to avoid requiring Sharp
  },
  async headers() {
    const isDev = process.env.NODE_ENV === "development";
    return [
      {
        source: "/(.*)",
        headers: [
          {
            key: "Content-Security-Policy",
            value: cspHeader.replace(/\n/g, ""),
          },
          {
            key: "Strict-Transport-Security",
            value: "max-age=63072000; includeSubDomains; preload",
          },
          {
            key: "Referrer-Policy",
            value: "strict-origin-when-cross-origin",
          },
          {
            key: "X-Content-Type-Options",
            value: "nosniff",
          },
          {
            key: "Permissions-Policy",
            value:
              "accelerometer=(), ambient-light-sensor=(), autoplay=(), battery=(), camera=(), cross-origin-isolated=(), display-capture=(), document-domain=(), encrypted-media=(), execution-while-not-rendered=(), execution-while-out-of-viewport=(), fullscreen=(), geolocation=(), gyroscope=(), keyboard-map=(), magnetometer=(), microphone=(self), midi=(), navigation-override=(), payment=(), picture-in-picture=(), publickey-credentials-get=(), screen-wake-lock=(), sync-xhr=(), usb=(), web-share=(), xr-spatial-tracking=()",
          },
        ],
      },
      {
        // Cache static assets (images, icons, fonts, etc.) to prevent refetching and re-renders
        source: "/_next/static/:path*",
        headers: [
          {
            key: "Cache-Control",
            value: isDev
              ? "no-cache, must-revalidate" // Dev: always check if fresh
              : "public, max-age=2592000, immutable", // Prod: cache for 30 days
          },
        ],
      },
    ];
  },
  async rewrites() {
    return [
      {
        source: "/ph_ingest/static/:path*",
        destination: "https://us-assets.i.posthog.com/static/:path*",
      },
      {
        source: "/ph_ingest/:path*",
        destination: `${
          process.env.NEXT_PUBLIC_POSTHOG_HOST || "https://us.i.posthog.com"
        }/:path*`,
      },
      {
        source: "/api/docs/:path*", // catch /api/docs and /api/docs/...
        destination: `${
          process.env.INTERNAL_URL || "http://localhost:8080"
        }/docs/:path*`,
      },
      {
        source: "/api/docs", // if you also need the exact /api/docs
        destination: `${
          process.env.INTERNAL_URL || "http://localhost:8080"
        }/docs`,
      },
      {
        source: "/openapi.json",
        destination: `${
          process.env.INTERNAL_URL || "http://localhost:8080"
        }/openapi.json`,
      },
    ];
  },
  async redirects() {
    return [
      {
        source: "/chat",
        destination: "/app",
        permanent: true,
      },
      // NRF routes: Redirect to /nrf which doesn't require auth
      // (NRFPage handles unauthenticated users gracefully with a login modal)
      {
        source: "/app/nrf/side-panel",
        destination: "/nrf/side-panel",
        permanent: true,
      },
      {
        source: "/app/nrf",
        destination: "/nrf",
        permanent: true,
      },
      {
        source: "/chat/:path*",
        destination: "/app/:path*",
        permanent: true,
      },
      // Legacy /assistants → /agents redirects (added in PR #8869).
      // Preserves backward compatibility for bookmarks, shared links, and
      // hardcoded URLs that still reference the old /assistants paths.
      // TODO: Remove these redirects in v4.0 — https://linear.app/onyx-app/issue/ENG-3771
      {
        source: "/admin/assistants",
        destination: "/admin/agents",
        permanent: true,
      },
      {
        source: "/admin/assistants/:path*",
        destination: "/admin/agents/:path*",
        permanent: true,
      },
      {
        source: "/ee/assistants/:path*",
        destination: "/ee/agents/:path*",
        permanent: true,
      },
    ];
  },
};

// Sentry configuration for error monitoring:
// - Without SENTRY_AUTH_TOKEN and NEXT_PUBLIC_SENTRY_DSN: Sentry is completely disabled
// - With both configured: Capture errors and limited performance data

// Determine if Sentry should be enabled
const sentryEnabled = Boolean(
  process.env.SENTRY_AUTH_TOKEN && process.env.NEXT_PUBLIC_SENTRY_DSN
);

// Sentry webpack plugin options
const sentryWebpackPluginOptions = {
  org: process.env.SENTRY_ORG || "onyx-vl",
  project: process.env.SENTRY_PROJECT || "onyx-web",
  authToken: process.env.SENTRY_AUTH_TOKEN,
  silent: !sentryEnabled, // Silence output when Sentry is disabled
  dryRun: !sentryEnabled, // Don't upload source maps when Sentry is disabled
  ...(sentryEnabled && {
    sourceMaps: {
      include: ["./.next"],
      ignore: ["node_modules"],
      urlPrefix: "~/_next",
      stripPrefix: ["webpack://_N_E/"],
      validate: true,
      cleanArtifacts: true,
    },
  }),
};

// Export the module with conditional Sentry configuration
module.exports = withSentryConfig(nextConfig, sentryWebpackPluginOptions);


================================================
FILE: web/package.json
================================================
{
  "name": "web",
  "version": "1.0.0-dev",
  "private": true,
  "workspaces": [
    "lib/opal"
  ],
  "scripts": {
    "dev": "next dev",
    "dev:profile": "NEXT_PUBLIC_ENABLE_STATS=true next dev",
    "build": "next build",
    "start": "next start",
    "lint": "next lint",
    "lint:unused": "eslint --ext .js,.jsx,.ts,.tsx --rule 'unused-imports/no-unused-imports: error' --quiet --fix=false src/",
    "lint:fix-unused": "eslint --ext .js,.jsx,.ts,.tsx --rule 'unused-imports/no-unused-imports: error' --quiet --fix src/",
    "lint:fix-unused-vars": "eslint --ext .js,.jsx,.ts,.tsx --fix --quiet src/",
    "types:check": "tsgo --noEmit --project tsconfig.types.json",
    "format": "prettier --write \"src/**/*.{ts,tsx,js,jsx,json,css,md}\"",
    "format:check": "prettier --check \"src/**/*.{ts,tsx,js,jsx,json,css,md}\"",
    "test": "jest",
    "test:watch": "jest --watch",
    "test:coverage": "jest --coverage",
    "test:verbose": "jest --verbose",
    "test:ci": "jest --ci --maxWorkers=2 --silent --bail",
    "test:changed": "jest --onlyChanged",
    "test:diff": "jest --changedSince=main",
    "test:debug": "node --inspect-brk node_modules/.bin/jest --runInBand",
    "storybook": "storybook dev -p 6006",
    "storybook:build": "storybook build -o storybook-static"
  },
  "dependencies": {
    "@dnd-kit/core": "^6.1.0",
    "@dnd-kit/modifiers": "^7.0.0",
    "@dnd-kit/sortable": "^8.0.0",
    "@dnd-kit/utilities": "^3.2.2",
    "@emotion/stylis": "^0.8.5",
    "@headlessui/react": "^2.2.0",
    "@headlessui/tailwindcss": "^0.2.1",
    "@onyx/opal": "./lib/opal",
    "@phosphor-icons/react": "^2.0.8",
    "@radix-ui/react-accordion": "^1.2.2",
    "@radix-ui/react-avatar": "^1.1.10",
    "@radix-ui/react-collapsible": "^1.1.2",
    "@radix-ui/react-dialog": "^1.1.6",
    "@radix-ui/react-dropdown-menu": "^2.1.6",
    "@radix-ui/react-hover-card": "^1.1.15",
    "@radix-ui/react-label": "^2.1.1",
    "@radix-ui/react-menubar": "^1.1.16",
    "@radix-ui/react-popover": "^1.1.6",
    "@radix-ui/react-radio-group": "^1.2.2",
    "@radix-ui/react-scroll-area": "^1.2.2",
    "@radix-ui/react-select": "^2.1.6",
    "@radix-ui/react-separator": "^1.1.0",
    "@radix-ui/react-slider": "^1.2.2",
    "@radix-ui/react-slot": "^1.2.4",
    "@radix-ui/react-tabs": "^1.1.1",
    "@radix-ui/react-tooltip": "^1.2.8",
    "@sentry/nextjs": "^10.27.0",
    "@sentry/tracing": "^7.120.3",
    "@stripe/stripe-js": "^4.6.0",
    "@tailwindcss/container-queries": "^0.1.1",
    "@tanstack/react-table": "^8.21.3",
    "autoprefixer": "^10.4.22",
    "class-variance-authority": "^0.7.0",
    "clsx": "^2.1.1",
    "cmdk": "^1.0.0",
    "cookies-next": "^5.1.0",
    "date-fns": "^3.6.0",
    "docx-preview": "^0.3.7",
    "favicon-fetch": "^1.0.0",
    "formik": "^2.2.9",
    "highlight.js": "^11.11.1",
    "js-cookie": "^3.0.5",
    "katex": "^0.16.38",
    "linguist-languages": "^9.3.1",
    "lodash": "^4.17.23",
    "lowlight": "^3.3.0",
    "lucide-react": "^0.454.0",
    "mdast-util-find-and-replace": "^3.0.1",
    "mime": "^4.1.0",
    "motion": "^12.29.0",
    "next": "16.1.7",
    "next-themes": "^0.4.4",
    "postcss": "^8.5.6",
    "posthog-js": "^1.176.0",
    "pptxgenjs": "^4.0.1",
    "react": "19.2.4",
    "react-datepicker": "^7.6.0",
    "react-day-picker": "^9.13.0",
    "react-dom": "19.2.4",
    "react-dropzone": "^14.2.3",
    "react-icons": "^4.8.0",
    "react-loader-spinner": "^8.0.0",
    "react-markdown": "^9.0.1",
    "react-select": "^5.8.0",
    "recharts": "^2.13.1",
    "rehype-highlight": "^7.0.2",
    "rehype-katex": "^7.0.1",
    "rehype-sanitize": "^6.0.0",
    "rehype-stringify": "^10.0.1",
    "remark-gfm": "^4.0.0",
    "remark-math": "^6.0.0",
    "semver": "^7.5.4",
    "sharp": "^0.33.5",
    "stripe": "^17.0.0",
    "swr": "^2.1.5",
    "tailwind-merge": "^2.5.4",
    "tailwindcss-animate": "^1.0.7",
    "uuid": "^9.0.1",
    "vaul": "^1.1.1",
    "yup": "^1.4.0",
    "zustand": "^5.0.8"
  },
  "devDependencies": {
    "@playwright/test": "^1.39.0",
    "@storybook/addon-essentials": "^8.6.18",
    "@storybook/addon-themes": "^8.6.18",
    "@storybook/blocks": "^8.6.18",
    "@storybook/react": "^8.6.18",
    "@storybook/react-vite": "^8.6.18",
    "@tailwindcss/typography": "^0.5.19",
    "@testing-library/jest-dom": "^6.9.1",
    "@testing-library/react": "^16.3.0",
    "@testing-library/user-event": "^14.6.1",
    "@types/chrome": "^0.0.287",
    "@types/hast": "^3.0.4",
    "@types/jest": "^29.5.14",
    "@types/js-cookie": "^3.0.6",
    "@types/lodash": "^4.17.20",
    "@types/node": "18.15.11",
    "@types/react": "19.2.10",
    "@types/react-dom": "19.2.3",
    "@types/stats.js": "^0.17.4",
    "@types/uuid": "^9.0.8",
    "@typescript/native-preview": "7.0.0-dev.20251222.1",
    "babel-plugin-react-compiler": "^1.0.0",
    "baseline-browser-mapping": "^2.9.19",
    "eslint": "^9.39.1",
    "eslint-config-next": "16.1.6",
    "eslint-plugin-unused-imports": "^4.1.4",
    "identity-obj-proxy": "^3.0.0",
    "jest": "^29.7.0",
    "jest-environment-jsdom": "^30.2.0",
    "prettier": "3.1.0",
    "stats.js": "^0.17.0",
    "storybook": "^8.6.18",
    "tailwindcss": "^3.4.17",
    "ts-jest": "^29.2.5",
    "ts-unused-exports": "^11.0.1",
    "typescript": "^5.9.3",
    "whatwg-fetch": "^3.6.20"
  },
  "overrides": {
    "react-is": "^19.0.0-rc-69d4b800-20241021",
    "@types/react": "19.2.10",
    "@types/react-dom": "19.2.3"
  }
}


================================================
FILE: web/playwright.config.ts
================================================
import { defineConfig, devices } from "@playwright/test";
import * as dotenv from "dotenv";

dotenv.config({ path: ".vscode/.env" });

export default defineConfig({
  globalSetup: require.resolve("./tests/e2e/global-setup"),
  timeout: 100000, // 100 seconds timeout
  expect: {
    timeout: 15000, // 15 seconds timeout for all assertions to reduce flakiness
    toHaveScreenshot: {
      // Allow up to 1% of pixels to differ (accounts for anti-aliasing, subpixel rendering)
      maxDiffPixelRatio: 0.01,
      // Threshold per-channel (0-1): how different a pixel can be before it counts as changed
      threshold: 0.2,
    },
  },
  retries: process.env.CI ? 2 : 0, // Retry failed tests 2 times in CI, 0 locally

  // When debugging, comment out the first `workers` line and uncomment the second one.
  // The second one runs the tests in serial, which helps when using the playwright-debugger to step through each test-step.
  // - @raunakab
  workers: process.env.CI ? 2 : undefined, // Limit to 2 parallel workers in CI to reduce flakiness
  // workers: 1,

  reporter: [["list"]],
  // Only run Playwright tests from tests/e2e directory (ignore Jest tests in src/)
  testMatch: /.*\/tests\/e2e\/.*\.spec\.ts/,
  outputDir: "output/playwright",
  use: {
    // Base URL for the application, can be overridden via BASE_URL environment variable
    baseURL: process.env.BASE_URL || "http://localhost:3000",
    // Capture trace on failure
    trace: "retain-on-failure",
  },
  projects: [
    {
      name: "admin",
      use: {
        ...devices["Desktop Chrome"],
        viewport: { width: 1280, height: 720 },
        storageState: "admin_auth.json",
      },
      grepInvert: [/@exclusive/, /@lite/],
    },
    {
      // this suite runs independently and serially + slower
      // we should be cautious about bloating this suite
      name: "exclusive",
      use: {
        ...devices["Desktop Chrome"],
        viewport: { width: 1280, height: 720 },
        storageState: "admin_auth.json",
      },
      grep: /@exclusive/,
      workers: 1,
    },
    {
      // runs against the Onyx Lite stack (DISABLE_VECTOR_DB=true, no Vespa/Redis)
      name: "lite",
      use: {
        ...devices["Desktop Chrome"],
        viewport: { width: 1280, height: 720 },
        storageState: "admin_auth.json",
      },
      grep: /@lite/,
    },
  ],
});


================================================
FILE: web/postcss.config.js
================================================
module.exports = {
  plugins: {
    tailwindcss: {},
    autoprefixer: {},
  },
};


================================================
FILE: web/sentry.edge.config.ts
================================================
// This file configures the initialization of Sentry for edge features (middleware, edge routes, and so on).
// The config you add here will be used whenever one of the edge features is loaded.
// Note that this config is unrelated to the Vercel Edge Runtime and is also required when running locally.
// https://docs.sentry.io/platforms/javascript/guides/nextjs/

import * as Sentry from "@sentry/nextjs";

if (process.env.NEXT_PUBLIC_SENTRY_DSN) {
  Sentry.init({
    dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,
    release: process.env.SENTRY_RELEASE,
    // Only capture unhandled exceptions
    tracesSampleRate: 0,
    debug: false,
  });
}


================================================
FILE: web/sentry.server.config.ts
================================================
// This file configures the initialization of Sentry on the server.
// The config you add here will be used whenever the server handles a request.
// https://docs.sentry.io/platforms/javascript/guides/nextjs/

import * as Sentry from "@sentry/nextjs";

if (process.env.NEXT_PUBLIC_SENTRY_DSN) {
  Sentry.init({
    dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,
    release: process.env.SENTRY_RELEASE,

    // Setting this option to true will print useful information to the console while you're setting up Sentry.
    debug: false,

    // Disable performance monitoring and only capture errors
    tracesSampleRate: 0,
    profilesSampleRate: 0,
  });
}


================================================
FILE: web/src/app/PostHogPageView.tsx
================================================
"use client";

import { usePathname, useSearchParams } from "next/navigation";
import { useEffect } from "react";
import { usePostHog } from "posthog-js/react";

export default function PostHogPageView(): null {
  const pathname = usePathname();
  const searchParams = useSearchParams();
  const posthog = usePostHog();

  useEffect(() => {
    if (!posthog) {
      return;
    }

    // Track pageviews
    if (pathname) {
      let url = window.origin + pathname;
      if (searchParams?.toString()) {
        url = url + `?${searchParams.toString()}`;
      }
      posthog.capture("$pageview", {
        $current_url: url,
      });
    }
  }, [pathname, searchParams, posthog]);

  return null;
}


================================================
FILE: web/src/app/admin/actions/edit/[toolId]/page.tsx
================================================
"use client";

import { useEffect } from "react";
import { useRouter } from "next/navigation";
import type { Route } from "next";

export default function EditToolPage() {
  const router = useRouter();

  useEffect(() => {
    // Redirect to MCP actions page
    router.replace("/admin/actions/mcp" as Route);
  }, [router]);

  return null;
}


================================================
FILE: web/src/app/admin/actions/edit-mcp/page.tsx
================================================
"use client";

import { useEffect } from "react";
import { useRouter } from "next/navigation";
import type { Route } from "next";

export default function EditMCPPage() {
  const router = useRouter();

  useEffect(() => {
    // Redirect to MCP actions page
    router.replace("/admin/actions/mcp" as Route);
  }, [router]);

  return null;
}


================================================
FILE: web/src/app/admin/actions/mcp/page.tsx
================================================
"use client";

import MCPPageContent from "@/sections/actions/MCPPageContent";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.MCP_ACTIONS;

export default function Main() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title={route.title}
        description="Connect MCP (Model Context Protocol) servers to add custom actions and tools for your agents."
        separator
      />
      <SettingsLayouts.Body>
        <MCPPageContent />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/actions/new/page.tsx
================================================
"use client";

import { useEffect } from "react";
import { useRouter } from "next/navigation";
import type { Route } from "next";

export default function NewActionPage() {
  const router = useRouter();

  useEffect(() => {
    // Redirect to MCP actions page
    router.replace("/admin/actions/mcp" as Route);
  }, [router]);

  return null;
}


================================================
FILE: web/src/app/admin/actions/open-api/page.tsx
================================================
"use client";

import * as SettingsLayouts from "@/layouts/settings-layouts";
import OpenApiPageContent from "@/sections/actions/OpenApiPageContent";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.OPENAPI_ACTIONS;

export default function Main() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title={route.title}
        description="Connect OpenAPI servers to add custom actions and tools for your agents."
        separator
      />
      <SettingsLayouts.Body>
        <OpenApiPageContent />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/actions/page.tsx
================================================
"use client";

import { useEffect } from "react";
import { useRouter } from "next/navigation";
import type { Route } from "next";

export default function AdminActionsPage() {
  const router = useRouter();

  useEffect(() => {
    // Redirect to MCP actions page as the default
    router.replace("/admin/actions/mcp" as Route);
  }, [router]);

  return null;
}


================================================
FILE: web/src/app/admin/add-connector/page.tsx
================================================
"use client";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { SourceCategory, SourceMetadata } from "@/lib/search/interfaces";
import { listSourceMetadata } from "@/lib/sources";
import { Button } from "@opal/components";
import {
  useCallback,
  useContext,
  useDeferredValue,
  useEffect,
  useMemo,
  useRef,
  useState,
} from "react";
import {
  Tooltip,
  TooltipContent,
  TooltipProvider,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { useFederatedConnectors } from "@/lib/hooks";
import {
  FederatedConnectorDetail,
  federatedSourceToRegularSource,
  ValidSources,
} from "@/lib/types";
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib";
import { Credential } from "@/lib/connectors/credentials";
import { SettingsContext } from "@/providers/SettingsProvider";
import SourceTile from "@/components/SourceTile";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Text from "@/refresh-components/texts/Text";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.ADD_CONNECTOR;

function SourceTileTooltipWrapper({
  sourceMetadata,
  preSelect,
  federatedConnectors,
  slackCredentials,
}: {
  sourceMetadata: SourceMetadata;
  preSelect?: boolean;
  federatedConnectors?: FederatedConnectorDetail[];
  slackCredentials?: Credential<any>[];
}) {
  // Check if there's already a federated connector for this source
  const existingFederatedConnector = useMemo(() => {
    if (!sourceMetadata.federated || !federatedConnectors) {
      return null;
    }

    return federatedConnectors.find(
      (connector) =>
        federatedSourceToRegularSource(connector.source) ===
        sourceMetadata.internalName
    );
  }, [sourceMetadata, federatedConnectors]);

  // For Slack specifically, check if there are existing non-federated credentials
  const isSlackTile = sourceMetadata.internalName === ValidSources.Slack;
  const hasExistingSlackCredentials = useMemo(() => {
    return isSlackTile && slackCredentials && slackCredentials.length > 0;
  }, [isSlackTile, slackCredentials]);

  // Determine the URL to navigate to
  const navigationUrl = useMemo(() => {
    // If there's an existing federated connector, route to edit it
    if (existingFederatedConnector) {
      return `/admin/federated/${existingFederatedConnector.id}`;
    }

    // For all other sources (including Slack), use the regular admin URL
    return sourceMetadata.adminUrl;
  }, [existingFederatedConnector, sourceMetadata]);

  // Compute whether to hide the tooltip
  const shouldHideTooltip =
    !existingFederatedConnector &&
    !hasExistingSlackCredentials &&
    !sourceMetadata.federated;

  // If tooltip should be hidden, just render the tile as a component
  if (shouldHideTooltip) {
    return (
      <SourceTile
        sourceMetadata={sourceMetadata}
        preSelect={preSelect}
        navigationUrl={navigationUrl}
        hasExistingSlackCredentials={!!hasExistingSlackCredentials}
      />
    );
  }

  return (
    <TooltipProvider>
      <Tooltip>
        <TooltipTrigger asChild>
          <div>
            <SourceTile
              sourceMetadata={sourceMetadata}
              preSelect={preSelect}
              navigationUrl={navigationUrl}
              hasExistingSlackCredentials={!!hasExistingSlackCredentials}
            />
          </div>
        </TooltipTrigger>
        <TooltipContent side="top" className="max-w-sm">
          {existingFederatedConnector ? (
            <Text as="p" textLight05 secondaryBody>
              <strong>Federated connector already configured.</strong> Click to
              edit the existing connector.
            </Text>
          ) : hasExistingSlackCredentials ? (
            <Text as="p" textLight05 secondaryBody>
              <strong>Existing Slack credentials found.</strong> Click to manage
              your Slack connector.
            </Text>
          ) : null}
        </TooltipContent>
      </Tooltip>
    </TooltipProvider>
  );
}

export default function Page() {
  const sources = useMemo(() => listSourceMetadata(), []);

  const [rawSearchTerm, setSearchTerm] = useState("");
  const searchTerm = useDeferredValue(rawSearchTerm);

  const { data: federatedConnectors } = useFederatedConnectors();
  const settings = useContext(SettingsContext);

  // Fetch Slack credentials to determine navigation behavior
  const { data: slackCredentials } = useSWR<Credential<any>[]>(
    buildSimilarCredentialInfoURL(ValidSources.Slack),
    errorHandlingFetcher
  );

  const searchInputRef = useRef<HTMLInputElement>(null);

  useEffect(() => {
    if (searchInputRef.current) {
      searchInputRef.current.focus();
    }
  }, []);

  const filterSources = useCallback(
    (sources: SourceMetadata[]) => {
      if (!searchTerm) return sources;
      const lowerSearchTerm = searchTerm.toLowerCase();
      return sources.filter(
        (source) =>
          source.displayName.toLowerCase().includes(lowerSearchTerm) ||
          source.category.toLowerCase().includes(lowerSearchTerm)
      );
    },
    [searchTerm]
  );

  const popularSources = useMemo(() => {
    const filtered = filterSources(sources);
    return sources.filter(
      (source) =>
        source.isPopular &&
        (filtered.includes(source) ||
          source.displayName.toLowerCase().includes(searchTerm.toLowerCase()))
    );
  }, [sources, filterSources, searchTerm]);

  const categorizedSources = useMemo(() => {
    const filtered = filterSources(sources);
    const categories = Object.values(SourceCategory).reduce(
      (acc, category) => {
        acc[category] = sources.filter(
          (source) =>
            source.category === category &&
            (filtered.includes(source) ||
              category.toLowerCase().includes(searchTerm.toLowerCase()))
        );
        return acc;
      },
      {} as Record<SourceCategory, SourceMetadata[]>
    );
    // Filter out the "Other" category if show_extra_connectors is false
    if (settings?.settings?.show_extra_connectors === false) {
      const filteredCategories = Object.entries(categories).filter(
        ([category]) => category !== SourceCategory.Other
      );
      return Object.fromEntries(filteredCategories) as Record<
        SourceCategory,
        SourceMetadata[]
      >;
    }
    return categories;
  }, [
    sources,
    filterSources,
    searchTerm,
    settings?.settings?.show_extra_connectors,
  ]);

  // When searching, dedupe Popular against whatever is already in results
  const resultIds = useMemo(() => {
    if (!searchTerm) return new Set<string>();
    return new Set(
      Object.values(categorizedSources)
        .flat()
        .map((s) => s.internalName)
    );
  }, [categorizedSources, searchTerm]);

  const dedupedPopular = useMemo(() => {
    if (!searchTerm) return popularSources;
    return popularSources.filter((s) => !resultIds.has(s.internalName));
  }, [popularSources, resultIds, searchTerm]);

  const handleKeyPress = (e: React.KeyboardEvent<HTMLInputElement>) => {
    if (e.key === "Enter") {
      const filteredCategories = Object.entries(categorizedSources).filter(
        ([_, sources]) => sources.length > 0
      );
      if (
        filteredCategories.length > 0 &&
        filteredCategories[0] !== undefined &&
        filteredCategories[0][1].length > 0
      ) {
        const firstSource = filteredCategories[0][1][0];
        if (firstSource) {
          // Check if this source has an existing federated connector
          const existingFederatedConnector =
            firstSource.federated && federatedConnectors
              ? federatedConnectors.find(
                  (connector) =>
                    connector.source === `federated_${firstSource.internalName}`
                )
              : null;

          const url = existingFederatedConnector
            ? `/admin/federated/${existingFederatedConnector.id}`
            : firstSource.adminUrl;

          window.open(url, "_self");
        }
      }
    }
  };

  return (
    <SettingsLayouts.Root width="full">
      <SettingsLayouts.Header
        icon={route.icon}
        title={route.title}
        rightChildren={
          <Button href="/admin/indexing/status">See Connectors</Button>
        }
        separator
      />
      <SettingsLayouts.Body>
        <InputTypeIn
          type="text"
          placeholder="Search Connectors"
          ref={searchInputRef}
          value={rawSearchTerm} // keep the input bound to immediate state
          onChange={(event) => setSearchTerm(event.target.value)}
          onKeyDown={handleKeyPress}
          className="w-96 flex-none"
        />

        {dedupedPopular.length > 0 && (
          <div className="pt-8">
            <Text as="p" headingH3>
              Popular
            </Text>
            <div className="flex flex-wrap gap-4 p-4">
              {dedupedPopular.map((source) => (
                <SourceTileTooltipWrapper
                  preSelect={false}
                  key={source.internalName}
                  sourceMetadata={source}
                  federatedConnectors={federatedConnectors}
                  slackCredentials={slackCredentials}
                />
              ))}
            </div>
          </div>
        )}

        {Object.entries(categorizedSources)
          .filter(([_, sources]) => sources.length > 0)
          .map(([category, sources], categoryInd) => (
            <div key={category} className="pt-8">
              <Text as="p" headingH3>
                {category}
              </Text>
              <div className="flex flex-wrap gap-4 p-4">
                {sources.map((source, sourceInd) => (
                  <SourceTileTooltipWrapper
                    preSelect={
                      (searchTerm?.length ?? 0) > 0 &&
                      categoryInd == 0 &&
                      sourceInd == 0
                    }
                    key={source.internalName}
                    sourceMetadata={source}
                    federatedConnectors={federatedConnectors}
                    slackCredentials={slackCredentials}
                  />
                ))}
              </div>
            </div>
          ))}
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/agents/CollapsibleSection.tsx
================================================
"use client";
import React, { ReactNode, useState } from "react";
import { FiSettings } from "react-icons/fi";

interface CollapsibleSectionProps {
  children: ReactNode;
  prompt?: string;
  className?: string;
}

const CollapsibleSection: React.FC<CollapsibleSectionProps> = ({
  children,
  prompt,
  className = "",
}) => {
  const [isCollapsed, setIsCollapsed] = useState<boolean>(false);

  const toggleCollapse = (e?: React.MouseEvent<HTMLDivElement>) => {
    // Only toggle if the click is on the border or plus sign
    if (
      !e ||
      e.currentTarget === e.target ||
      (e.target as HTMLElement).classList.contains("collapse-toggle")
    ) {
      setIsCollapsed(!isCollapsed);
    }
  };

  return (
    <div
      className={`relative ${isCollapsed ? "h-6" : ""} ${className}`}
      style={{ transition: "height 0.3s ease-out" }}
    >
      <div
        className={`
          cursor-pointer
          ${isCollapsed ? "h-6" : "pl-6 border-l-2  border-border"}
        `}
        onClick={toggleCollapse}
      >
        {" "}
        {isCollapsed ? (
          <span className="collapse-toggle text-lg absolute left-0 top-0 text-sm flex items-center gap-x-3 cursor-pointer">
            <FiSettings className="pointer-events-none my-auto" size={16} />
            {prompt}{" "}
          </span>
        ) : (
          <>{children}</>
        )}
      </div>
    </div>
  );
};

export default CollapsibleSection;


================================================
FILE: web/src/app/admin/agents/interfaces.ts
================================================
import { ValidSources } from "@/lib/types";
import { ToolSnapshot } from "@/lib/tools/interfaces";
import { DocumentSetSummary, MinimalUserSnapshot } from "@/lib/types";

// Represents a hierarchy node (folder, space, channel, etc.) attached to a persona
export interface HierarchyNodeSnapshot {
  id: number;
  raw_node_id: string;
  display_name: string;
  link: string | null;
  source: ValidSources;
  node_type: string; // HierarchyNodeType enum value
}

// Represents a document attached to a persona
export interface AttachedDocumentSnapshot {
  id: string;
  title: string;
  link: string | null;
  parent_id: number | null;
  last_modified: string | null;
  last_synced: string | null;
  source: ValidSources | null;
}

export interface StarterMessageBase {
  message: string;
}

export interface StarterMessage extends StarterMessageBase {
  name: string;
}

export interface MinimalPersonaSnapshot {
  id: number;
  name: string;
  description: string;
  tools: ToolSnapshot[];
  starter_messages: StarterMessage[] | null;
  document_sets: DocumentSetSummary[];
  // Counts for knowledge sources (used to determine if search tool should be enabled)
  hierarchy_node_count?: number;
  attached_document_count?: number;
  // Unique sources from all knowledge (document sets + hierarchy nodes)
  // Used to populate source filters in chat
  knowledge_sources?: ValidSources[];
  llm_model_version_override?: string;
  llm_model_provider_override?: string;

  uploaded_image_id?: string;
  icon_name?: string;

  is_public: boolean;
  is_listed: boolean;
  display_priority: number | null;
  is_featured: boolean;
  builtin_persona: boolean;

  labels?: PersonaLabel[];
  owner: MinimalUserSnapshot | null;
}

export interface Persona extends MinimalPersonaSnapshot {
  user_file_ids: string[];
  users: MinimalUserSnapshot[];
  groups: number[];
  // Hierarchy nodes (folders, spaces, channels) attached for scoped search
  hierarchy_nodes?: HierarchyNodeSnapshot[];
  // Individual documents attached for scoped search
  attached_documents?: AttachedDocumentSnapshot[];

  // Embedded prompt fields on persona
  system_prompt: string | null;
  replace_base_system_prompt: boolean;
  task_prompt: string | null;
  datetime_aware: boolean;
}

export interface FullPersona extends Persona {
  search_start_date: string | null;
}

export interface PersonaLabel {
  id: number;
  name: string;
}


================================================
FILE: web/src/app/admin/agents/lib.ts
================================================
import {
  MinimalPersonaSnapshot,
  Persona,
  StarterMessage,
} from "@/app/admin/agents/interfaces";

interface PersonaUpsertRequest {
  name: string;
  description: string;
  system_prompt: string;
  task_prompt: string;
  datetime_aware: boolean;
  document_set_ids: number[];
  is_public: boolean;
  llm_model_provider_override: string | null;
  llm_model_version_override: string | null;
  starter_messages: StarterMessage[] | null;
  users?: string[];
  groups: number[];
  tool_ids: number[];
  remove_image?: boolean;
  uploaded_image_id: string | null;
  icon_name: string | null;
  search_start_date: Date | null;
  is_featured: boolean;
  display_priority: number | null;
  label_ids: number[] | null;
  user_file_ids: string[] | null;
  replace_base_system_prompt: boolean;
  // Hierarchy nodes (folders, spaces, channels) for scoped search
  hierarchy_node_ids: number[];
  // Individual documents for scoped search
  document_ids: string[];
}

export interface PersonaUpsertParameters {
  name: string;
  description: string;
  system_prompt: string;
  replace_base_system_prompt: boolean;
  task_prompt: string;
  datetime_aware: boolean;
  document_set_ids: number[];
  is_public: boolean;
  llm_model_provider_override: string | null;
  llm_model_version_override: string | null;
  starter_messages: StarterMessage[] | null;
  users?: string[];
  groups: number[];
  tool_ids: number[];
  remove_image?: boolean;
  search_start_date: Date | null;
  uploaded_image_id: string | null;
  icon_name: string | null;
  is_featured: boolean;
  label_ids: number[] | null;
  user_file_ids: string[];
  // Hierarchy nodes (folders, spaces, channels) for scoped search
  hierarchy_node_ids?: number[];
  // Individual documents for scoped search
  document_ids?: string[];
}

function buildPersonaUpsertRequest({
  name,
  description,
  system_prompt,
  task_prompt,
  document_set_ids,
  is_public,
  groups,
  datetime_aware,
  users,
  tool_ids,
  remove_image,
  search_start_date,
  user_file_ids,
  hierarchy_node_ids,
  document_ids,
  icon_name,
  uploaded_image_id,
  is_featured,
  llm_model_provider_override,
  llm_model_version_override,
  starter_messages,
  label_ids,
  replace_base_system_prompt,
}: PersonaUpsertParameters): PersonaUpsertRequest {
  return {
    name,
    description,
    system_prompt,
    task_prompt,
    document_set_ids,
    is_public,
    uploaded_image_id,
    icon_name,
    groups,
    users,
    tool_ids,
    remove_image,
    search_start_date,
    datetime_aware,
    is_featured: is_featured ?? false,
    llm_model_provider_override: llm_model_provider_override ?? null,
    llm_model_version_override: llm_model_version_override ?? null,
    starter_messages: starter_messages ?? null,
    display_priority: null,
    label_ids: label_ids ?? null,
    user_file_ids: user_file_ids ?? null,
    replace_base_system_prompt,
    hierarchy_node_ids: hierarchy_node_ids ?? [],
    document_ids: document_ids ?? [],
  };
}

export async function uploadFile(file: File): Promise<string | null> {
  const formData = new FormData();
  formData.append("file", file);
  const response = await fetch("/api/admin/persona/upload-image", {
    method: "POST",
    body: formData,
    credentials: "include",
  });

  if (!response.ok) {
    console.error("Failed to upload file");
    return null;
  }

  const responseJson = await response.json();
  return responseJson.file_id;
}

export async function createPersona(
  personaUpsertParams: PersonaUpsertParameters
): Promise<Response | null> {
  const createPersonaResponse = await fetch("/api/persona", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(buildPersonaUpsertRequest(personaUpsertParams)),
    credentials: "include",
  });

  return createPersonaResponse;
}

export async function updatePersona(
  id: number,
  personaUpsertParams: PersonaUpsertParameters
): Promise<Response | null> {
  const updatePersonaResponse = await fetch(`/api/persona/${id}`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(buildPersonaUpsertRequest(personaUpsertParams)),
    credentials: "include",
  });

  return updatePersonaResponse;
}

export function deletePersona(personaId: number) {
  return fetch(`/api/persona/${personaId}`, {
    method: "DELETE",
    credentials: "include",
  });
}

function smallerNumberFirstComparator(a: number, b: number) {
  return a > b ? 1 : -1;
}

function closerToZeroNegativesFirstComparator(a: number, b: number) {
  if (a < 0 && b > 0) {
    return -1;
  }
  if (a > 0 && b < 0) {
    return 1;
  }

  const absA = Math.abs(a);
  const absB = Math.abs(b);

  if (absA === absB) {
    return a > b ? 1 : -1;
  }

  return absA > absB ? 1 : -1;
}

export function personaComparator(
  a: MinimalPersonaSnapshot | Persona,
  b: MinimalPersonaSnapshot | Persona
) {
  if (a.display_priority === null && b.display_priority === null) {
    return closerToZeroNegativesFirstComparator(a.id, b.id);
  }

  if (a.display_priority !== b.display_priority) {
    if (a.display_priority === null) {
      return 1;
    }
    if (b.display_priority === null) {
      return -1;
    }

    return smallerNumberFirstComparator(a.display_priority, b.display_priority);
  }

  return closerToZeroNegativesFirstComparator(a.id, b.id);
}

export async function togglePersonaFeatured(
  personaId: number,
  featured: boolean
) {
  const response = await fetch(`/api/admin/persona/${personaId}/featured`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      is_featured: !featured,
    }),
    credentials: "include",
  });
  return response;
}

export async function togglePersonaVisibility(
  personaId: number,
  isVisible: boolean
) {
  const response = await fetch(`/api/admin/persona/${personaId}/listed`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      is_listed: !isVisible,
    }),
    credentials: "include",
  });
  return response;
}


================================================
FILE: web/src/app/admin/agents/page.tsx
================================================
export { default } from "@/refresh-pages/admin/AgentsPage";


================================================
FILE: web/src/app/admin/billing/BillingDetailsView.tsx
================================================
"use client";

import { useState } from "react";
import Link from "next/link";
import { Section } from "@/layouts/general-layouts";
import { Content } from "@opal/layouts";
import * as InputLayouts from "@/layouts/input-layouts";
import Card from "@/refresh-components/cards/Card";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import Message from "@/refresh-components/messages/Message";
import InfoBlock from "@/refresh-components/messages/InfoBlock";
import InputNumber from "@/refresh-components/inputs/InputNumber";
import {
  SvgUsers,
  SvgExternalLink,
  SvgArrowRight,
  SvgPlus,
  SvgWallet,
  SvgFileText,
  SvgOrganization,
} from "@opal/icons";
import { BillingInformation, LicenseStatus } from "@/lib/billing/interfaces";
import {
  createCustomerPortalSession,
  resetStripeConnection,
  updateSeatCount,
  claimLicense,
  refreshLicenseCache,
} from "@/lib/billing/svc";
import { formatDateShort } from "@/lib/dateUtils";
import { humanReadableFormatShort } from "@/lib/time";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import useUsers from "@/hooks/useUsers";

// ----------------------------------------------------------------------------
// Constants
// ----------------------------------------------------------------------------

const GRACE_PERIOD_DAYS = 30;

// ----------------------------------------------------------------------------
// Helpers
// ----------------------------------------------------------------------------

function getExpirationState(
  billing: BillingInformation,
  license?: LicenseStatus
) {
  const isAnnualBilling = billing.billing_period === "annual";

  // Check license expiration for self-hosted
  if (license?.expires_at) {
    const expiresAt = new Date(license.expires_at);
    const now = new Date();
    const daysRemaining = Math.ceil(
      (expiresAt.getTime() - now.getTime()) / (1000 * 60 * 60 * 24)
    );

    if (daysRemaining <= 0 || license.status === "expired") {
      const gracePeriodEnd = license.grace_period_end
        ? new Date(license.grace_period_end)
        : new Date(
            expiresAt.getTime() + GRACE_PERIOD_DAYS * 24 * 60 * 60 * 1000
          );
      const daysUntilDeletion = Math.max(
        0,
        Math.ceil(
          (gracePeriodEnd.getTime() - now.getTime()) / (1000 * 60 * 60 * 24)
        )
      );
      return {
        variant: "error" as const,
        daysRemaining: 0,
        daysUntilDeletion,
        expirationDate: humanReadableFormatShort(gracePeriodEnd),
      };
    }

    // Only show warning for annual subscriptions (30 days before expiration)
    if (isAnnualBilling && daysRemaining <= 30) {
      return {
        variant: "warning" as const,
        daysRemaining,
        expirationDate: humanReadableFormatShort(expiresAt),
      };
    }
  }

  // Check billing expiration for cloud (only show warnings for canceled subscriptions)
  if (billing.cancel_at_period_end && billing.current_period_end) {
    const expiresAt = new Date(billing.current_period_end);
    const now = new Date();
    const daysRemaining = Math.ceil(
      (expiresAt.getTime() - now.getTime()) / (1000 * 60 * 60 * 24)
    );

    if (daysRemaining <= 0) {
      const gracePeriodEnd = new Date(
        expiresAt.getTime() + GRACE_PERIOD_DAYS * 24 * 60 * 60 * 1000
      );
      const daysUntilDeletion = Math.max(
        0,
        Math.ceil(
          (gracePeriodEnd.getTime() - now.getTime()) / (1000 * 60 * 60 * 24)
        )
      );
      return {
        variant: "error" as const,
        daysRemaining: 0,
        daysUntilDeletion,
        expirationDate: humanReadableFormatShort(gracePeriodEnd),
      };
    }

    // Only show warning for annual subscriptions (30 days before expiration)
    // Monthly subscriptions auto-renew, so no warning needed
    if (isAnnualBilling && daysRemaining <= 30) {
      return {
        variant: "warning" as const,
        daysRemaining,
        expirationDate: humanReadableFormatShort(expiresAt),
      };
    }
  }

  if (billing.status === "expired" || billing.status === "cancelled") {
    return {
      variant: "error" as const,
      daysRemaining: 0,
      daysUntilDeletion: GRACE_PERIOD_DAYS,
      expirationDate: "",
    };
  }

  return null;
}

// ----------------------------------------------------------------------------
// SubscriptionCard
// ----------------------------------------------------------------------------

function SubscriptionCard({
  billing,
  license,
  onViewPlans,
  disabled,
  isManualLicenseOnly,
  onReconnect,
}: {
  billing?: BillingInformation;
  license?: LicenseStatus;
  onViewPlans: () => void;
  disabled?: boolean;
  isManualLicenseOnly?: boolean;
  onReconnect?: () => Promise<void>;
}) {
  const [isReconnecting, setIsReconnecting] = useState(false);

  const planName = isManualLicenseOnly ? "Enterprise Plan" : "Business Plan";
  const PlanIcon = isManualLicenseOnly ? SvgOrganization : SvgUsers;
  const expirationDate = billing?.current_period_end ?? license?.expires_at;
  const formattedDate = formatDateShort(expirationDate);

  const isExpiredFromBilling =
    billing?.status === "expired" || billing?.status === "cancelled";
  const isExpiredFromLicense =
    license?.status === "expired" ||
    license?.status === "gated_access" ||
    (license?.expires_at && new Date(license.expires_at) < new Date());
  const isExpired = isExpiredFromBilling || isExpiredFromLicense;
  const isCanceling = billing?.cancel_at_period_end;

  let subtitle: string;
  if (isExpired) {
    subtitle = `Expired on ${formattedDate}`;
  } else if (isCanceling) {
    subtitle = `Valid until ${formattedDate}`;
  } else if (billing) {
    subtitle = `Next payment on ${formattedDate}`;
  } else {
    subtitle = `Valid until ${formattedDate}`;
  }

  const handleManagePlan = async () => {
    try {
      const response = await createCustomerPortalSession({
        return_url: `${window.location.origin}/admin/billing?portal_return=true`,
      });
      if (response.stripe_customer_portal_url) {
        window.location.href = response.stripe_customer_portal_url;
      }
    } catch (error) {
      console.error("Failed to open customer portal:", error);
    }
  };

  const handleReconnect = async () => {
    setIsReconnecting(true);
    try {
      await resetStripeConnection();
      await onReconnect?.();
    } catch (error) {
      console.error("Failed to reconnect to Stripe:", error);
    } finally {
      setIsReconnecting(false);
    }
  };

  return (
    <Card>
      <Section
        flexDirection="row"
        justifyContent="between"
        alignItems="start"
        height="auto"
      >
        <Section gap={0.25} alignItems="start" height="auto" width="auto">
          <PlanIcon className="w-5 h-5" />
          <Text headingH3Muted text04>
            {planName}
          </Text>
          <Text secondaryBody text03>
            {subtitle}
          </Text>
        </Section>
        <Section
          flexDirection="column"
          gap={0.25}
          alignItems="end"
          height="auto"
          width="fit"
        >
          {isManualLicenseOnly ? (
            <Text secondaryBody text03 className="text-right">
              Your plan is managed through sales.
              <br />
              <a
                href="mailto:support@onyx.app?subject=Billing%20change%20request"
                className="underline"
              >
                Contact billing
              </a>{" "}
              to make changes.
            </Text>
          ) : disabled ? (
            <OpalButton
              disabled={isReconnecting}
              prominence="secondary"
              onClick={handleReconnect}
              rightIcon={SvgArrowRight}
            >
              {isReconnecting ? "Connecting..." : "Connect to Stripe"}
            </OpalButton>
          ) : (
            <OpalButton onClick={handleManagePlan} rightIcon={SvgExternalLink}>
              Manage Plan
            </OpalButton>
          )}
          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
          <Button tertiary onClick={onViewPlans} className="billing-text-link">
            <Text secondaryBody text03>
              View Plan Details
            </Text>
          </Button>
        </Section>
      </Section>
    </Card>
  );
}

// ----------------------------------------------------------------------------
// SeatsCard
// ----------------------------------------------------------------------------

function SeatsCard({
  billing,
  license,
  onRefresh,
  disabled,
  hideUpdateSeats,
}: {
  billing?: BillingInformation;
  license?: LicenseStatus;
  onRefresh?: () => Promise<void>;
  disabled?: boolean;
  hideUpdateSeats?: boolean;
}) {
  const [isEditing, setIsEditing] = useState(false);
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [error, setError] = useState<string | null>(null);

  const { data: usersData, isLoading: isLoadingUsers } = useUsers({
    includeApiKeys: false,
  });

  const totalSeats = billing?.seats ?? license?.seats ?? 0;
  const acceptedUsers =
    usersData?.accepted?.filter((u) => u.is_active).length ?? 0;
  const slackUsers =
    usersData?.slack_users?.filter((u) => u.is_active).length ?? 0;
  const usedSeats = acceptedUsers + slackUsers;
  const pendingSeats = usersData?.invited?.length ?? 0;
  const remainingSeats = Math.max(0, totalSeats - usedSeats - pendingSeats);

  const [newSeatCount, setNewSeatCount] = useState(totalSeats);
  const minRequiredSeats = usedSeats + pendingSeats;
  const isBelowMinimum = newSeatCount < minRequiredSeats;

  const handleStartEdit = () => {
    setNewSeatCount(totalSeats);
    setError(null);
    setIsEditing(true);
  };

  const handleCancel = () => {
    setIsEditing(false);
    setError(null);
  };

  const handleConfirm = async () => {
    if (newSeatCount === totalSeats) {
      setIsEditing(false);
      return;
    }
    if (isBelowMinimum) return;

    setIsSubmitting(true);
    setError(null);

    try {
      await updateSeatCount({ new_seat_count: newSeatCount });
      if (!NEXT_PUBLIC_CLOUD_ENABLED) {
        // Wait for control plane to process the subscription update before claiming
        await new Promise((resolve) => setTimeout(resolve, 1500));
        await claimLicense();
        // Force refresh the Redis cache from the database
        await refreshLicenseCache();
      }
      await onRefresh?.();
      setIsEditing(false);
    } catch (err) {
      setError(err instanceof Error ? err.message : "Failed to update seats");
    } finally {
      setIsSubmitting(false);
    }
  };

  const seatDifference = newSeatCount - totalSeats;
  const isAdding = seatDifference > 0;
  const isRemoving = seatDifference < 0;
  const nextBillingDate = formatDateShort(billing?.current_period_end);
  const seatCount = Math.abs(seatDifference);
  const seatWord = seatCount === 1 ? "seat" : "seats";

  if (isEditing) {
    return (
      <Card
        padding={0}
        gap={0}
        alignItems="stretch"
        className="billing-card-enter"
      >
        <Section
          flexDirection="row"
          justifyContent="between"
          alignItems="start"
          padding={1}
          height="auto"
        >
          <Content
            title="Update Seats"
            description="Add or remove seats to reflect your team size."
            sizePreset="main-content"
            variant="section"
          />
          <OpalButton
            disabled={isSubmitting}
            prominence="secondary"
            onClick={handleCancel}
          >
            Cancel
          </OpalButton>
        </Section>

        <div className="billing-content-area">
          <Section
            flexDirection="column"
            alignItems="stretch"
            gap={0.25}
            padding={1}
            height="auto"
          >
            <InputLayouts.Vertical title="Seats">
              <InputNumber
                value={newSeatCount}
                onChange={(v) => setNewSeatCount(v ?? 1)}
                min={1}
                defaultValue={totalSeats}
                showReset
                variant={isBelowMinimum ? "error" : "primary"}
              />
            </InputLayouts.Vertical>

            {isBelowMinimum ? (
              <InputLayouts.ErrorTextLayout type="error">
                You cannot set seats below current{" "}
                <span className="font-semibold">{minRequiredSeats}</span> seats
                in use/pending.{" "}
                <Link
                  href="/admin/users"
                  className="underline hover:no-underline"
                >
                  Remove users
                </Link>{" "}
                first before adjusting seats.
              </InputLayouts.ErrorTextLayout>
            ) : seatDifference !== 0 ? (
              <Text secondaryBody text03>
                {Math.abs(seatDifference)} seat
                {Math.abs(seatDifference) !== 1 ? "s" : ""} to be{" "}
                {isAdding ? "added" : "removed"}
              </Text>
            ) : null}

            {error && (
              <Text secondaryBody className="billing-error-text">
                {error}
              </Text>
            )}
          </Section>
        </div>

        <Section
          flexDirection="row"
          alignItems="center"
          justifyContent="between"
          padding={1}
          height="auto"
        >
          {isAdding ? (
            <Text secondaryBody text03>
              You will be billed for the{" "}
              <Text secondaryBody text04>
                {seatCount}
              </Text>{" "}
              additional {seatWord} at a pro-rated amount.
            </Text>
          ) : isRemoving ? (
            <Text secondaryBody text03>
              <Text secondaryBody text04>
                {seatCount}
              </Text>{" "}
              {seatWord} will be removed on{" "}
              <Text secondaryBody text04>
                {nextBillingDate}
              </Text>{" "}
              (after current billing cycle).
            </Text>
          ) : (
            <Text secondaryBody text03>
              No changes to your billing.
            </Text>
          )}
          <OpalButton
            disabled={
              isSubmitting || newSeatCount === totalSeats || isBelowMinimum
            }
            onClick={handleConfirm}
          >
            {isSubmitting ? "Saving..." : "Confirm Change"}
          </OpalButton>
        </Section>
      </Card>
    );
  }

  return (
    <Card>
      <Section
        flexDirection="row"
        justifyContent="between"
        alignItems="center"
        height="auto"
      >
        <Section gap={0.25} alignItems="start" height="auto" width="auto">
          <Text mainContentMuted text04>
            {totalSeats} Seats
          </Text>
          <Text secondaryBody text03>
            {usedSeats} in use • {pendingSeats} pending • {remainingSeats}{" "}
            remaining
          </Text>
        </Section>
        <Section
          flexDirection="row"
          gap={0.5}
          justifyContent="end"
          height="auto"
          width="auto"
        >
          <OpalButton
            prominence="tertiary"
            href="/admin/users"
            icon={SvgExternalLink}
          >
            View Users
          </OpalButton>
          {!hideUpdateSeats && (
            <OpalButton
              disabled={isLoadingUsers || disabled || !billing}
              prominence="secondary"
              onClick={handleStartEdit}
              icon={SvgPlus}
            >
              Update Seats
            </OpalButton>
          )}
        </Section>
      </Section>
    </Card>
  );
}

// ----------------------------------------------------------------------------
// PaymentSection
// ----------------------------------------------------------------------------

function PaymentSection({ billing }: { billing: BillingInformation }) {
  const handleOpenPortal = async () => {
    try {
      const response = await createCustomerPortalSession({
        return_url: `${window.location.origin}/admin/billing?portal_return=true`,
      });
      if (response.stripe_customer_portal_url) {
        window.location.href = response.stripe_customer_portal_url;
      }
    } catch (error) {
      console.error("Failed to open customer portal:", error);
    }
  };

  if (!billing.payment_method_enabled) return null;

  const lastPaymentDate = formatDateShort(billing.current_period_start);

  return (
    <div className="billing-payment-section">
      <Section alignItems="start" height="auto" width="full">
        <Text mainContentEmphasis>Payment</Text>
        <Section
          flexDirection="row"
          gap={0.5}
          alignItems="stretch"
          height="auto"
        >
          <Card className="billing-payment-card">
            <Section
              flexDirection="row"
              justifyContent="between"
              alignItems="start"
              height="auto"
            >
              <InfoBlock
                icon={SvgWallet}
                title="Visa ending in 1234"
                description="Payment method"
              />
              <OpalButton
                prominence="tertiary"
                onClick={handleOpenPortal}
                rightIcon={SvgExternalLink}
              >
                Update
              </OpalButton>
            </Section>
          </Card>
          {lastPaymentDate && (
            <Card className="billing-payment-card">
              <Section
                flexDirection="row"
                justifyContent="between"
                alignItems="start"
                height="auto"
              >
                <InfoBlock
                  icon={SvgFileText}
                  title={lastPaymentDate}
                  description="Last payment"
                />
                <OpalButton
                  prominence="tertiary"
                  onClick={handleOpenPortal}
                  rightIcon={SvgExternalLink}
                >
                  View Invoice
                </OpalButton>
              </Section>
            </Card>
          )}
        </Section>
      </Section>
    </div>
  );
}

// ----------------------------------------------------------------------------
// BillingDetailsView
// ----------------------------------------------------------------------------

interface BillingDetailsViewProps {
  billing?: BillingInformation;
  license?: LicenseStatus;
  onViewPlans: () => void;
  onRefresh?: () => Promise<void>;
  isAirGapped?: boolean;
  isManualLicenseOnly?: boolean;
  hasStripeError?: boolean;
  licenseCard?: React.ReactNode;
}

export default function BillingDetailsView({
  billing,
  license,
  onViewPlans,
  onRefresh,
  isAirGapped,
  isManualLicenseOnly,
  hasStripeError,
  licenseCard,
}: BillingDetailsViewProps) {
  const expirationState = billing ? getExpirationState(billing, license) : null;
  const disableBillingActions =
    isAirGapped || hasStripeError || isManualLicenseOnly;

  return (
    <Section gap={1} height="auto" width="full">
      {/* Stripe connection error banner */}
      {hasStripeError && (
        <Message
          static
          warning
          text="Unable to connect to Stripe payment portal."
          description="Check your internet connection or manually provide a license."
          close={false}
          className="w-full"
        />
      )}

      {/* Air-gapped mode info banner */}
      {isAirGapped && !hasStripeError && !isManualLicenseOnly && (
        <Message
          static
          info
          text="Air-gapped deployment"
          description="Online billing management is disabled. Contact support to update your subscription."
          close={false}
          className="w-full"
        />
      )}

      {/* Expiration banner */}
      {expirationState && (
        <Message
          static
          warning={expirationState.variant === "warning"}
          error={expirationState.variant === "error"}
          text={
            expirationState.variant === "error"
              ? expirationState.daysUntilDeletion
                ? `Your subscription has expired. Data will be deleted in ${expirationState.daysUntilDeletion} days.`
                : "Your subscription has expired."
              : `Your subscription is expiring in ${expirationState.daysRemaining} days.`
          }
          description={
            expirationState.variant === "error"
              ? expirationState.expirationDate
                ? `Renew your subscription by ${expirationState.expirationDate} to restore access.`
                : "Renew your subscription to restore access to paid features."
              : `Renew your subscription by ${expirationState.expirationDate} to avoid disruption.`
          }
          close={false}
          className="w-full"
        />
      )}

      {/* Subscription card */}
      {(billing || license?.has_license) && (
        <SubscriptionCard
          billing={billing}
          license={license}
          onViewPlans={onViewPlans}
          disabled={disableBillingActions}
          isManualLicenseOnly={isManualLicenseOnly}
          onReconnect={onRefresh}
        />
      )}

      {/* License card (inline for manual license users) */}
      {licenseCard}

      {/* Seats card */}
      <SeatsCard
        billing={billing}
        license={license}
        onRefresh={onRefresh}
        disabled={disableBillingActions}
        hideUpdateSeats={isManualLicenseOnly}
      />

      {/* Payment section */}
      {/* TODO: Re-enable payment section when APIs for fetching payment details are implemented */}
      {/* {billing?.payment_method_enabled && !isAirGapped && <PaymentSection billing={billing} />} */}
    </Section>
  );
}


================================================
FILE: web/src/app/admin/billing/CheckoutView.tsx
================================================
"use client";

import { useState, useMemo, useEffect } from "react";
import { Section } from "@/layouts/general-layouts";
import * as InputLayouts from "@/layouts/input-layouts";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import Card from "@/refresh-components/cards/Card";
import Separator from "@/refresh-components/Separator";
import { SvgUsers, SvgCheck } from "@opal/icons";
import { createCheckoutSession } from "@/lib/billing/svc";
import { useUser } from "@/providers/UserProvider";
import { formatDateShort } from "@/lib/dateUtils";
import type { PlanType } from "@/lib/billing/interfaces";
import InputNumber from "@/refresh-components/inputs/InputNumber";
import useUsers from "@/hooks/useUsers";

// ----------------------------------------------------------------------------
// BillingOption
// ----------------------------------------------------------------------------

interface BillingOptionProps {
  selected: boolean;
  onClick: () => void;
  title: string;
  price: number;
  badge?: string;
}

function BillingOption({
  selected,
  onClick,
  title,
  price,
  badge,
}: BillingOptionProps) {
  return (
    <Card
      onClick={onClick}
      className="billing-option"
      data-selected={selected}
      padding={0}
    >
      <Section
        flexDirection="row"
        gap={0.5}
        height="fit"
        justifyContent="between"
        alignItems="start"
      >
        <Section
          alignItems="start"
          justifyContent="center"
          gap={0}
          height="fit"
          width="fit"
        >
          <Text mainUiAction className="billing-option-title">
            {title}
          </Text>
          <div className="billing-option-price">
            <Text mainContentEmphasis text04>
              ${price}
            </Text>
            <Text secondaryBody text03 nowrap>
              per seat/month
            </Text>
          </div>
        </Section>
        {badge && (
          <Section
            flexDirection="row"
            gap={0.25}
            alignItems="center"
            justifyContent="end"
            width="fit"
            height="fit"
          >
            <Text secondaryAction className="billing-option-badge">
              {badge}
            </Text>
            <SvgCheck className="billing-option-check" />
          </Section>
        )}
      </Section>
    </Card>
  );
}

// ----------------------------------------------------------------------------
// CheckoutView
// ----------------------------------------------------------------------------

interface CheckoutViewProps {
  onAdjustPlan: () => void;
}

export default function CheckoutView({ onAdjustPlan }: CheckoutViewProps) {
  const { user } = useUser();
  const { data: usersData } = useUsers({ includeApiKeys: false });

  // Calculate minimum required seats based on current active users
  const acceptedUsers =
    usersData?.accepted?.filter((u) => u.is_active).length ?? 0;
  const slackUsers =
    usersData?.slack_users?.filter((u) => u.is_active).length ?? 0;
  const minRequiredSeats = Math.max(1, acceptedUsers + slackUsers);

  const [billingPeriod, setBillingPeriod] = useState<PlanType>("annual");
  const [seats, setSeats] = useState(minRequiredSeats);
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [error, setError] = useState<string | null>(null);

  // Update seats if minRequiredSeats changes (e.g., after user data loads)
  useEffect(() => {
    if (seats < minRequiredSeats) {
      setSeats(minRequiredSeats);
    }
  }, [minRequiredSeats, seats]);

  const monthlyPrice = 25;
  const annualPrice = 20;
  const annualPriceSelected = billingPeriod === "annual";

  const trialEndDate = useMemo(() => {
    const date = new Date();
    date.setMonth(date.getMonth() + 1);
    return formatDateShort(date.toISOString());
  }, []);

  const handleSubmit = async () => {
    setIsSubmitting(true);
    setError(null);

    try {
      const response = await createCheckoutSession({
        billing_period: billingPeriod,
        seats,
        email: user?.email,
      });

      if (response.stripe_checkout_url) {
        window.location.href = response.stripe_checkout_url;
      } else {
        throw new Error("Invalid response from checkout session");
      }
    } catch (err) {
      console.error("Error creating checkout session:", err);
      setError(
        err instanceof Error ? err.message : "Failed to create checkout session"
      );
    } finally {
      setIsSubmitting(false);
    }
  };

  return (
    <Card padding={0} gap={0} alignItems="stretch">
      {/* Header */}
      <Section
        flexDirection="row"
        justifyContent="between"
        alignItems="start"
        padding={1}
        height="auto"
      >
        <Section
          flexDirection="column"
          alignItems="start"
          gap={0.25}
          height="auto"
          width="fit"
        >
          <SvgUsers size={24} />
          <Text headingH2 text04>
            Business
          </Text>
        </Section>
        <Button prominence="secondary" onClick={onAdjustPlan}>
          Adjust Plan
        </Button>
      </Section>

      {/* Content */}
      <div className="billing-content-area">
        <Section
          flexDirection="column"
          alignItems="stretch"
          gap={1}
          padding={1}
          height="auto"
        >
          {/* Billing Cycle */}
          <InputLayouts.Horizontal
            title="Billing Cycle"
            description="after your 1-month free trial"
          >
            <Section
              flexDirection="row"
              gap={0.25}
              width="fit"
              height="auto"
              justifyContent="start"
            >
              <BillingOption
                selected={billingPeriod === "monthly"}
                onClick={() => setBillingPeriod("monthly")}
                title="Monthly"
                price={monthlyPrice}
              />
              <BillingOption
                selected={billingPeriod === "annual"}
                onClick={() => setBillingPeriod("annual")}
                title="Annual"
                price={annualPrice}
                badge="Save 20%"
              />
            </Section>
          </InputLayouts.Horizontal>

          <Separator noPadding />

          {/* Seats */}
          <InputLayouts.Horizontal
            title="Seats"
            description={`Minimum ${minRequiredSeats} seat${
              minRequiredSeats !== 1 ? "s" : ""
            } required for your current users and Slack accounts.`}
          >
            <InputNumber
              value={seats}
              onChange={(v) => setSeats(v ?? minRequiredSeats)}
              min={minRequiredSeats}
              defaultValue={minRequiredSeats}
              showReset
            />
          </InputLayouts.Horizontal>
        </Section>
      </div>

      {/* Footer */}
      <Section
        flexDirection="row"
        alignItems="center"
        justifyContent="between"
        padding={1}
        height="auto"
      >
        {error ? (
          <Text secondaryBody className="billing-error-text">
            {error}
          </Text>
        ) : !annualPriceSelected ? (
          <Text secondaryBody text03>
            You will be billed on{" "}
            <Text secondaryBody text04>
              {trialEndDate}
            </Text>{" "}
            After your 1-month free trial ends.
          </Text>
        ) : (
          // Empty div to maintain space-between alignment
          <div></div>
        )}
        <Button disabled={isSubmitting} onClick={handleSubmit}>
          {isSubmitting ? "Loading..." : "Continue to Payment"}
        </Button>
      </Section>
    </Card>
  );
}


================================================
FILE: web/src/app/admin/billing/LicenseActivationCard.tsx
================================================
"use client";

import { useState } from "react";
import Card from "@/refresh-components/cards/Card";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import InputFile from "@/refresh-components/inputs/InputFile";
import { Section } from "@/layouts/general-layouts";
import * as InputLayouts from "@/layouts/input-layouts";
import { SvgXCircle, SvgCheckCircle, SvgXOctagon } from "@opal/icons";
import { uploadLicense } from "@/lib/billing/svc";
import { LicenseStatus } from "@/lib/billing/interfaces";
import { formatDateShort } from "@/lib/dateUtils";

const BILLING_HELP_URL = "https://docs.onyx.app/more/billing";

interface LicenseActivationCardProps {
  isOpen: boolean;
  onClose: () => void;
  onSuccess: () => void;
  license?: LicenseStatus;
  hideClose?: boolean;
}

export default function LicenseActivationCard({
  isOpen,
  onClose,
  onSuccess,
  license,
  hideClose,
}: LicenseActivationCardProps) {
  const [licenseKey, setLicenseKey] = useState("");
  const [isActivating, setIsActivating] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const [success, setSuccess] = useState(false);
  const [showInput, setShowInput] = useState(!license?.has_license);

  const hasLicense = license?.has_license;
  const isDateExpired = license?.expires_at
    ? new Date(license.expires_at) < new Date()
    : false;
  const isExpired =
    license?.status === "expired" ||
    license?.status === "gated_access" ||
    isDateExpired;
  const expirationDate = license?.expires_at
    ? formatDateShort(license.expires_at)
    : null;

  const handleActivate = async () => {
    if (!licenseKey.trim()) {
      setError("Please enter a license key");
      return;
    }

    setIsActivating(true);
    setError(null);

    try {
      await uploadLicense(licenseKey.trim());
      setSuccess(true);
      setTimeout(() => {
        onSuccess();
        handleClose();
      }, 1000);
    } catch (err) {
      console.error("Error activating license:", err);
      setError(
        err instanceof Error ? err.message : "Failed to activate license"
      );
    } finally {
      setIsActivating(false);
    }
  };

  const handleClose = () => {
    setLicenseKey("");
    setError(null);
    setSuccess(false);
    setShowInput(!license?.has_license);
    onClose();
  };

  if (!isOpen) return null;

  // License status view (when license exists and not editing)
  if (hasLicense && !showInput) {
    return (
      <Card padding={1} alignItems="stretch">
        <Section
          flexDirection="row"
          justifyContent="between"
          alignItems="center"
          height="auto"
        >
          <Section
            flexDirection="column"
            alignItems="start"
            gap={0.5}
            height="auto"
            width="auto"
          >
            {isExpired ? (
              <SvgXOctagon size={16} className="stroke-status-error-05" />
            ) : (
              <SvgCheckCircle size={16} className="stroke-status-success-05" />
            )}
            <Text secondaryBody text03>
              {isExpired ? (
                <>License key expired</>
              ) : (
                <>
                  License key active until{" "}
                  <Text secondaryBody text04>
                    {expirationDate}
                  </Text>
                </>
              )}
            </Text>
          </Section>
          <Section flexDirection="row" gap={0.5} height="auto" width="auto">
            <Button prominence="secondary" onClick={() => setShowInput(true)}>
              Update Key
            </Button>
            {!hideClose && (
              <Button prominence="tertiary" onClick={handleClose}>
                Close
              </Button>
            )}
          </Section>
        </Section>
      </Card>
    );
  }

  // License input form
  return (
    <Card padding={0} alignItems="stretch" gap={0}>
      {/* Header */}
      <Section flexDirection="column" alignItems="stretch" gap={0} padding={1}>
        <Section
          flexDirection="row"
          justifyContent="between"
          alignItems="center"
        >
          <Text headingH3>
            {hasLicense ? "Update License Key" : "Activate License Key"}
          </Text>
          <Button
            disabled={isActivating}
            prominence="secondary"
            onClick={handleClose}
          >
            Cancel
          </Button>
        </Section>
        <Text secondaryBody text03>
          Manually add and activate a license for this Onyx instance.
        </Text>
      </Section>

      {/* Content */}
      <div className="billing-content-area">
        <Section
          flexDirection="column"
          alignItems="stretch"
          gap={0.5}
          padding={1}
        >
          {success && (
            <div className="billing-success-message">
              <Text secondaryBody>
                License {hasLicense ? "updated" : "activated"} successfully!
              </Text>
            </div>
          )}

          <InputLayouts.Vertical
            title="License Key"
            subDescription={
              error
                ? undefined
                : "Paste or attach your license key file you received from Onyx."
            }
          >
            <InputFile
              placeholder="eyJwYXlsb2FkIjogeyJ2ZXJzaW9..."
              setValue={(value) => {
                setLicenseKey(value);
                setError(null);
              }}
              error={!!error}
              className="billing-license-input"
            />
            {error && (
              <Section
                flexDirection="row"
                alignItems="center"
                justifyContent="start"
                gap={0.25}
                height="auto"
              >
                <div className="billing-error-icon">
                  <SvgXCircle />
                </div>
                <Text secondaryBody text04>
                  {error}.{" "}
                  <a
                    href={BILLING_HELP_URL}
                    target="_blank"
                    rel="noopener noreferrer"
                    className="billing-help-link"
                  >
                    Billing Help
                  </a>
                </Text>
              </Section>
            )}
          </InputLayouts.Vertical>
        </Section>
      </div>

      {/* Footer */}
      <Section flexDirection="row" justifyContent="end" padding={1}>
        <Button
          disabled={isActivating || !licenseKey.trim() || success}
          onClick={handleActivate}
        >
          {isActivating
            ? "Activating..."
            : hasLicense
              ? "Update License"
              : "Activate License"}
        </Button>
      </Section>
    </Card>
  );
}


================================================
FILE: web/src/app/admin/billing/PlansView.tsx
================================================
"use client";

import {
  SvgDashboard,
  SvgHistory,
  SvgFiles,
  SvgGlobe,
  SvgHardDrive,
  SvgHeadsetMic,
  SvgShareWebhook,
  SvgKey,
  SvgLock,
  SvgPaintBrush,
  SvgOrganization,
  SvgServer,
  SvgShield,
  SvgSliders,
  SvgUserManage,
  SvgUsers,
} from "@opal/icons";
import "@/app/admin/billing/billing.css";
import type { IconProps } from "@opal/types";
import Card from "@/refresh-components/cards/Card";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";

const SALES_URL = "https://www.onyx.app/contact-sales";

// ----------------------------------------------------------------------------
// Types
// ----------------------------------------------------------------------------

interface PlanFeature {
  icon: React.FunctionComponent<IconProps>;
  text: string;
}

interface PlanConfig {
  icon: React.FunctionComponent<IconProps>;
  title: string;
  pricing?: string;
  description: string;
  buttonLabel: string;
  buttonVariant: "primary" | "secondary";
  buttonIcon?: React.FunctionComponent<IconProps>;
  onClick?: () => void;
  href?: string;
  features: PlanFeature[];
  featuresPrefix: string;
  isCurrentPlan?: boolean;
}

// ----------------------------------------------------------------------------
// Plan Features
// ----------------------------------------------------------------------------

const BUSINESS_FEATURES: PlanFeature[] = [
  { icon: SvgFiles, text: "Inherit Document Permissions" },
  { icon: SvgHistory, text: "Query History and Usage Dashboard" },
  { icon: SvgShield, text: "Role Based Access Control (RBAC)" },
  { icon: SvgLock, text: "Encryption of Secrets" },
  { icon: SvgKey, text: "Service Account API Keys" },
  { icon: SvgHardDrive, text: "Self-hosting (Optional)" },
  { icon: SvgPaintBrush, text: "Custom Theming" },
];

const ENTERPRISE_FEATURES: PlanFeature[] = [
  { icon: SvgUsers, text: "SCIM / Group Sync" },
  { icon: SvgDashboard, text: "Full White-labeling" },
  { icon: SvgUserManage, text: "Custom Roles and Permissions" },
  { icon: SvgSliders, text: "Configurable Usage Limits" },
  { icon: SvgShareWebhook, text: "Hook Extensions" },
  { icon: SvgServer, text: "Custom Deployments" },
  { icon: SvgGlobe, text: "Region-Specific Data Processing" },
  { icon: SvgHeadsetMic, text: "Enterprise SLAs and Priority Support" },
];

// ----------------------------------------------------------------------------
// PlanCard (inlined)
// ----------------------------------------------------------------------------

function PlanCard({
  icon: Icon,
  title,
  pricing,
  description,
  buttonLabel,
  buttonIcon: ButtonIcon,
  onClick,
  href,
  features,
  featuresPrefix,
  isCurrentPlan,
  hideFeatures,
}: PlanConfig & { hideFeatures?: boolean }) {
  return (
    <Card
      padding={0}
      gap={0}
      alignItems="stretch"
      aria-label={title + " plan card"}
      className="plan-card"
    >
      <Section
        flexDirection="column"
        alignItems="stretch"
        padding={1}
        height="fit"
      >
        {/* Title */}
        <Section
          flexDirection="column"
          alignItems="start"
          gap={0.25}
          width="full"
        >
          <Icon size={24} />
          <Text headingH3 text04>
            {title}
          </Text>
        </Section>

        {/* Pricing */}
        <Section
          flexDirection="row"
          justifyContent="start"
          alignItems="center"
          gap={0.5}
          height="auto"
        >
          {pricing && (
            <Text headingH2 text04>
              {pricing}
            </Text>
          )}
          <Text
            secondaryBody
            text03
            className={
              pricing ? "whitespace-pre-line" : "whitespace-pre-line min-h-9"
            }
          >
            {description}
          </Text>
        </Section>

        {/* Button */}
        <div className="plan-card-button">
          {isCurrentPlan ? (
            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
            <Button tertiary transient className="pointer-events-none">
              <Text mainUiAction text03>
                Your Current Plan
              </Text>
            </Button>
          ) : href ? (
            <OpalButton
              prominence="secondary"
              href={href}
              target="_blank"
              rel="noopener noreferrer"
            >
              {buttonLabel}
            </OpalButton>
          ) : onClick ? (
            <OpalButton onClick={onClick} icon={ButtonIcon}>
              {buttonLabel}
            </OpalButton>
          ) : (
            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
            <Button tertiary transient className="pointer-events-none">
              <Text mainUiAction text03>
                Included in your plan
              </Text>
            </Button>
          )}
        </div>
      </Section>

      {/* Features */}
      <div
        className="plan-card-features-container"
        data-hidden={hideFeatures ? "true" : "false"}
      >
        <Section
          flexDirection="column"
          alignItems="start"
          justifyContent="start"
          gap={1}
          padding={1}
        >
          <Text mainUiBody text03>
            {featuresPrefix}
          </Text>
          <Section
            flexDirection="column"
            alignItems="start"
            gap={0.5}
            height="auto"
          >
            {features.map((feature) => (
              <Section
                key={feature.text}
                flexDirection="row"
                alignItems="start"
                justifyContent="start"
                gap={0.25}
                width="fit"
                height="auto"
              >
                <div className="plan-card-feature-icon">
                  <feature.icon size={16} className="stroke-text-03" />
                </div>
                <Text mainUiBody text03>
                  {feature.text}
                </Text>
              </Section>
            ))}
          </Section>
        </Section>
      </div>
    </Card>
  );
}

// ----------------------------------------------------------------------------
// PlansView
// ----------------------------------------------------------------------------

interface PlansViewProps {
  hasSubscription?: boolean;
  hasLicense?: boolean;
  onCheckout: () => void;
  hideFeatures?: boolean;
}

export default function PlansView({
  hasSubscription,
  hasLicense,
  onCheckout,
  hideFeatures,
}: PlansViewProps) {
  const plans: PlanConfig[] = [
    {
      icon: SvgUsers,
      title: "Business",
      pricing: "$20",
      description:
        "per seat/month billed annually\nor $25 per seat if billed monthly",
      buttonLabel: "Get Business Plan",
      buttonVariant: "primary",
      onClick: hasLicense ? undefined : onCheckout,
      features: BUSINESS_FEATURES,
      featuresPrefix: "Get more work done with AI for your team.",
      isCurrentPlan: !!hasSubscription,
    },
    {
      icon: SvgOrganization,
      title: "Enterprise",
      description:
        "Flexible pricing & deployment options\nfor large organizations",
      buttonLabel: "Contact Sales",
      buttonVariant: "secondary",
      href: SALES_URL,
      features: ENTERPRISE_FEATURES,
      featuresPrefix: "Everything in Business Plan, plus:",
      isCurrentPlan: !!hasLicense && !hasSubscription,
    },
  ];

  return (
    <Section flexDirection="row" alignItems="stretch" width="full">
      {plans.map((plan) => (
        <PlanCard key={plan.title} {...plan} hideFeatures={hideFeatures} />
      ))}
    </Section>
  );
}


================================================
FILE: web/src/app/admin/billing/billing.css
================================================
/**
 * Billing page styles
 *
 * This file contains custom styles for billing components that cannot be achieved
 * with built-in component props. Prefer using component props over custom CSS.
 */

/* -----------------------------------------------------------------------------
 * Plan Card
 * -------------------------------------------------------------------------- */

.plan-card {
  display: flex;
  flex-direction: column;
  overflow: hidden;
  /* Let parent's align-items: stretch handle the height */
  align-self: stretch;
}

/* Override Card's inner Section to grow and fill the card */
.plan-card > div {
  display: flex;
  flex-direction: column;
  flex: 1;
}

.plan-card-button button,
.plan-card-button a {
  width: 100%;
}

.plan-card-features-container {
  display: flex;
  flex-direction: column;
  overflow: hidden;
  transition:
    max-height 0.25s ease-out,
    opacity 0.2s ease-out;
  background: var(--background-tint-01);
  flex: 1;
}

.plan-card-features-container[data-hidden="true"] {
  max-height: 0;
  opacity: 0;
  flex: 0;
}

.plan-card-features-container[data-hidden="false"] {
  max-height: none;
  opacity: 1;
}

.plan-card-feature-icon {
  display: flex;
  align-items: center;
  justify-content: center;
  width: 1.25rem;
  height: 1.25rem;
  padding: 0.125rem;
  flex-shrink: 0;
}

/* -----------------------------------------------------------------------------
 * Other Billing Components
 * -------------------------------------------------------------------------- */

.billing-payment-card {
  min-width: 17.5rem;
  flex: 1;
}

.billing-license-input {
  font-family: var(--font-mono);
}

.billing-error-icon {
  display: flex;
  align-items: center;
  justify-content: center;
  width: 0.75rem;
  height: 0.75rem;
  flex-shrink: 0;
  color: var(--status-error-05);
}

.billing-content-area {
  background: var(--background-tint-01);
}

.billing-success-message {
  width: 100%;
  padding: 0.75rem;
  background: var(--status-success-01);
  border: 1px solid var(--status-success-02);
  border-radius: 0.5rem;
  color: var(--status-success-05);
}

/* -----------------------------------------------------------------------------
 * Checkout View
 * -------------------------------------------------------------------------- */

.billing-option {
  width: 14rem;
  min-width: 10rem;
  padding: 0.75rem;
  border-radius: 0.5rem;
  border: 1px solid var(--border-01);
  background: var(--background-neutral-00);
  text-align: left;
  transition: border-color 0.15s ease;
  cursor: pointer;
}

.billing-option:hover {
  border-color: var(--border-02);
}

.billing-option[data-selected="true"] {
  border-color: var(--action-link-05);
  background: var(--action-link-01);
}

.billing-option-title {
  color: var(--text-04);
}

.billing-option-price {
  display: flex;
  flex-direction: row;
  align-items: baseline;
  gap: 0.25rem;
}

.billing-option[data-selected="true"] .billing-option-title {
  color: var(--action-link-05);
}

.billing-option-badge {
  color: var(--action-link-05);
}

.billing-option-check {
  width: 1rem;
  height: 1rem;
  stroke: var(--action-link-05);
}

.billing-error-text {
  color: var(--status-error-05);
}

.billing-payment-section {
  padding-top: 1rem;
  width: 100%;
}

/* -----------------------------------------------------------------------------
 * View Transition Animations
 * -------------------------------------------------------------------------- */

@keyframes billing-fade-in {
  from {
    opacity: 0;
    transform: translateY(8px);
  }
  to {
    opacity: 1;
    transform: translateY(0);
  }
}

.billing-view-enter {
  animation: billing-fade-in 0.25s ease-out;
}

.billing-view-expand {
  animation: billing-fade-in 0.25s ease-out;
}

.billing-view-collapse {
  animation: billing-fade-in 0.25s ease-out;
}

.billing-card-enter {
  animation: billing-fade-in 0.25s ease-out;
}


================================================
FILE: web/src/app/admin/billing/page.test.tsx
================================================
/**
 * Tests for BillingPage handleBillingReturn retry logic.
 *
 * The retry logic retries claimLicense up to 3 times with 2s backoff
 * when returning from a Stripe checkout session. This prevents the user
 * from getting stranded when the Stripe webhook fires concurrently with
 * the browser redirect and the license isn't ready yet.
 */
import React from "react";
import { render, screen, waitFor } from "@tests/setup/test-utils";
import { act } from "@testing-library/react";

// ---- Stable mock objects (must be named with mock* prefix for jest hoisting) ----
// useRouter and useSearchParams must return the SAME reference each call, otherwise
// React's useEffect sees them as changed and re-runs the effect on every render.
const mockRouter = {
  replace: jest.fn() as jest.Mock,
  refresh: jest.fn() as jest.Mock,
};
const mockSearchParams = {
  get: jest.fn() as jest.Mock,
};
const mockClaimLicense = jest.fn() as jest.Mock;
const mockRefreshBilling = jest.fn() as jest.Mock;
const mockRefreshLicense = jest.fn() as jest.Mock;

// ---- Mocks ----

jest.mock("next/navigation", () => ({
  useRouter: () => mockRouter,
  useSearchParams: () => mockSearchParams,
}));

jest.mock("@/layouts/settings-layouts", () => ({
  Root: ({ children }: { children: React.ReactNode }) => (
    <div data-testid="settings-root">{children}</div>
  ),
  Header: () => <div data-testid="settings-header" />,
  Body: ({ children }: { children: React.ReactNode }) => (
    <div data-testid="settings-body">{children}</div>
  ),
}));

jest.mock("@/layouts/general-layouts", () => ({
  Section: ({ children }: { children: React.ReactNode }) => (
    <div>{children}</div>
  ),
}));

jest.mock("@opal/icons", () => ({
  SvgArrowUpCircle: () => <svg />,
  SvgWallet: () => <svg />,
}));

jest.mock("./PlansView", () => ({
  __esModule: true,
  default: () => <div data-testid="plans-view" />,
}));
jest.mock("./CheckoutView", () => ({
  __esModule: true,
  default: () => <div data-testid="checkout-view" />,
}));
jest.mock("./BillingDetailsView", () => ({
  __esModule: true,
  default: () => <div data-testid="billing-details-view" />,
}));
jest.mock("./LicenseActivationCard", () => ({
  __esModule: true,
  default: () => <div data-testid="license-activation-card" />,
}));

jest.mock("@/refresh-components/messages/Message", () => ({
  __esModule: true,
  default: ({
    text,
    description,
    onClose,
  }: {
    text: string;
    description?: string;
    onClose?: () => void;
  }) => (
    <div data-testid="activating-banner">
      <span data-testid="activating-banner-text">{text}</span>
      {description && (
        <span data-testid="activating-banner-description">{description}</span>
      )}
      {onClose && (
        <button data-testid="activating-banner-close" onClick={onClose}>
          Close
        </button>
      )}
    </div>
  ),
}));

jest.mock("@/lib/billing", () => ({
  useBillingInformation: jest.fn(),
  useLicense: jest.fn(),
  hasActiveSubscription: jest.fn().mockReturnValue(false),
  claimLicense: (...args: unknown[]) => mockClaimLicense(...args),
}));

jest.mock("@/lib/constants", () => ({
  NEXT_PUBLIC_CLOUD_ENABLED: false,
}));

// ---- Import after mocks ----
import BillingPage from "./page";
import { useBillingInformation, useLicense } from "@/lib/billing";

// ---- Test helpers ----

function setupHooks() {
  (useBillingInformation as jest.Mock).mockReturnValue({
    data: null,
    isLoading: false,
    error: null,
    refresh: mockRefreshBilling,
  });
  (useLicense as jest.Mock).mockReturnValue({
    data: null,
    isLoading: false,
    refresh: mockRefreshLicense,
  });
}

// ---- Tests ----

describe("BillingPage — handleBillingReturn retry logic", () => {
  beforeEach(() => {
    jest.clearAllMocks();
    jest.useFakeTimers();
    setupHooks();
    // Default: no billing-return params
    mockSearchParams.get.mockReturnValue(null);
    // Clear any activating state from prior tests
    sessionStorage.clear();
  });

  afterEach(() => {
    jest.useRealTimers();
    jest.restoreAllMocks();
  });

  test("calls claimLicense once and refreshes on first-attempt success", async () => {
    mockSearchParams.get.mockImplementation((key: string) =>
      key === "session_id" ? "cs_test_123" : null
    );
    mockClaimLicense.mockResolvedValueOnce({ success: true });

    render(<BillingPage />);

    await act(async () => {
      await jest.runAllTimersAsync();
    });

    await waitFor(() => {
      expect(mockClaimLicense).toHaveBeenCalledTimes(1);
      expect(mockClaimLicense).toHaveBeenCalledWith("cs_test_123");
    });
    expect(mockRouter.refresh).toHaveBeenCalled();
    expect(mockRefreshBilling).toHaveBeenCalled();
    // URL cleaned up after checkout return
    expect(mockRouter.replace).toHaveBeenCalledWith("/admin/billing", {
      scroll: false,
    });
  });

  test("retries after first failure and succeeds on second attempt", async () => {
    mockSearchParams.get.mockImplementation((key: string) =>
      key === "session_id" ? "cs_retry_test" : null
    );
    mockClaimLicense
      .mockRejectedValueOnce(new Error("License not ready yet"))
      .mockResolvedValueOnce({ success: true });

    render(<BillingPage />);

    await act(async () => {
      await jest.runAllTimersAsync();
    });

    await waitFor(() => {
      expect(mockClaimLicense).toHaveBeenCalledTimes(2);
    });
    // On eventual success, router and billing should be refreshed
    expect(mockRouter.refresh).toHaveBeenCalled();
    expect(mockRefreshBilling).toHaveBeenCalled();
  });

  test("retries all 3 times then navigates to details even on total failure", async () => {
    mockSearchParams.get.mockImplementation((key: string) =>
      key === "session_id" ? "cs_all_fail" : null
    );
    // All 3 attempts fail
    mockClaimLicense.mockRejectedValue(new Error("Webhook not processed yet"));

    const consoleSpy = jest
      .spyOn(console, "error")
      .mockImplementation(() => {});

    render(<BillingPage />);

    await act(async () => {
      await jest.runAllTimersAsync();
    });

    await waitFor(() => {
      expect(mockClaimLicense).toHaveBeenCalledTimes(3);
    });
    // User stays on plans view with the activating banner
    await waitFor(() => {
      expect(screen.getByTestId("plans-view")).toBeInTheDocument();
    });
    // refreshBilling still fires so billing state is up to date
    expect(mockRefreshBilling).toHaveBeenCalled();
    // Failure is logged
    expect(consoleSpy).toHaveBeenCalledWith(
      expect.stringContaining("Failed to sync license after billing return"),
      expect.any(Error)
    );

    consoleSpy.mockRestore();
  });

  test("calls claimLicense without session_id on portal_return", async () => {
    mockSearchParams.get.mockImplementation((key: string) =>
      key === "portal_return" ? "true" : null
    );
    mockClaimLicense.mockResolvedValueOnce({ success: true });

    render(<BillingPage />);

    await act(async () => {
      await jest.runAllTimersAsync();
    });

    await waitFor(() => {
      expect(mockClaimLicense).toHaveBeenCalledTimes(1);
      // No session_id for portal returns — called with undefined
      expect(mockClaimLicense).toHaveBeenCalledWith(undefined);
    });
    expect(mockRefreshBilling).toHaveBeenCalled();
  });

  test("does not call claimLicense when no billing-return params present", async () => {
    mockSearchParams.get.mockReturnValue(null);

    render(<BillingPage />);

    await act(async () => {
      await jest.runAllTimersAsync();
    });

    expect(mockClaimLicense).not.toHaveBeenCalled();
  });

  test("shows activating banner and sets sessionStorage on 3x retry failure", async () => {
    mockSearchParams.get.mockImplementation((key: string) =>
      key === "session_id" ? "cs_all_fail" : null
    );
    mockClaimLicense.mockRejectedValue(new Error("Webhook not processed yet"));

    const consoleSpy = jest
      .spyOn(console, "error")
      .mockImplementation(() => {});

    render(<BillingPage />);

    await act(async () => {
      await jest.runAllTimersAsync();
    });

    await waitFor(() => {
      expect(screen.getByTestId("activating-banner")).toBeInTheDocument();
    });
    expect(screen.getByTestId("activating-banner-text")).toHaveTextContent(
      "Your license is still activating"
    );
    expect(
      sessionStorage.getItem("billing_license_activating_until")
    ).not.toBeNull();

    consoleSpy.mockRestore();
  });

  test("banner not rendered when no activating state", async () => {
    mockSearchParams.get.mockReturnValue(null);

    render(<BillingPage />);

    await act(async () => {
      await jest.runAllTimersAsync();
    });

    expect(screen.queryByTestId("activating-banner")).not.toBeInTheDocument();
  });

  test("banner shown on mount when sessionStorage key is set and not expired", async () => {
    sessionStorage.setItem(
      "billing_license_activating_until",
      String(Date.now() + 120_000)
    );
    mockSearchParams.get.mockReturnValue(null);

    render(<BillingPage />);

    // Flush React effects — banner is visible from lazy state init, no timer advancement needed
    await act(async () => {});

    expect(screen.getByTestId("activating-banner")).toBeInTheDocument();
  });

  test("banner not shown on mount when sessionStorage key is expired", async () => {
    sessionStorage.setItem(
      "billing_license_activating_until",
      String(Date.now() - 1000)
    );
    mockSearchParams.get.mockReturnValue(null);

    render(<BillingPage />);

    await act(async () => {
      await jest.runAllTimersAsync();
    });

    expect(screen.queryByTestId("activating-banner")).not.toBeInTheDocument();
    expect(
      sessionStorage.getItem("billing_license_activating_until")
    ).toBeNull();
  });

  test("poll calls claimLicense after 15s and clears banner on success", async () => {
    sessionStorage.setItem(
      "billing_license_activating_until",
      String(Date.now() + 120_000)
    );
    mockSearchParams.get.mockReturnValue(null);
    // Poll attempt succeeds
    mockClaimLicense.mockResolvedValueOnce({ success: true });

    render(<BillingPage />);

    // Flush effects — banner visible from lazy state init
    await act(async () => {});
    expect(screen.getByTestId("activating-banner")).toBeInTheDocument();

    // Advance past one poll interval (15s)
    await act(async () => {
      await jest.advanceTimersByTimeAsync(15_000);
    });

    expect(mockClaimLicense).toHaveBeenCalledWith(undefined);
    expect(screen.queryByTestId("activating-banner")).not.toBeInTheDocument();
    expect(
      sessionStorage.getItem("billing_license_activating_until")
    ).toBeNull();
    expect(mockRefreshBilling).toHaveBeenCalled();
    expect(mockRefreshLicense).toHaveBeenCalled();
    expect(mockRouter.refresh).toHaveBeenCalled();
  });

  test("close button removes banner and clears sessionStorage", async () => {
    sessionStorage.setItem(
      "billing_license_activating_until",
      String(Date.now() + 120_000)
    );
    mockSearchParams.get.mockReturnValue(null);

    render(<BillingPage />);

    // Flush effects — banner visible from lazy state init
    await act(async () => {});
    expect(screen.getByTestId("activating-banner")).toBeInTheDocument();

    const closeButton = screen.getByTestId("activating-banner-close");
    await act(async () => {
      closeButton.click();
    });

    expect(screen.queryByTestId("activating-banner")).not.toBeInTheDocument();
    expect(
      sessionStorage.getItem("billing_license_activating_until")
    ).toBeNull();
  });
});


================================================
FILE: web/src/app/admin/billing/page.tsx
================================================
"use client";

import { useEffect, useState } from "react";
import { useSearchParams, useRouter } from "next/navigation";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { Section } from "@/layouts/general-layouts";
import Button from "@/refresh-components/buttons/Button";
import Text from "@/refresh-components/texts/Text";
import { SvgArrowUpCircle, SvgWallet } from "@opal/icons";
import type { IconProps } from "@opal/types";
import {
  useBillingInformation,
  useLicense,
  BillingInformation,
  hasActiveSubscription,
  claimLicense,
} from "@/lib/billing";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { useUser } from "@/providers/UserProvider";
import Message from "@/refresh-components/messages/Message";

import PlansView from "./PlansView";
import CheckoutView from "./CheckoutView";
import BillingDetailsView from "./BillingDetailsView";
import LicenseActivationCard from "./LicenseActivationCard";
import "./billing.css";

// sessionStorage key: value is a unix-ms expiry timestamp
const BILLING_ACTIVATING_KEY = "billing_license_activating_until";

// ----------------------------------------------------------------------------
// Types
// ----------------------------------------------------------------------------

type BillingView = "plans" | "details" | "checkout" | null;

interface ViewConfig {
  icon: React.FunctionComponent<IconProps>;
  title: string;
  showBackButton: boolean;
}

// ----------------------------------------------------------------------------
// FooterLinks (inlined)
// ----------------------------------------------------------------------------

const SUPPORT_EMAIL = "support@onyx.app";

function FooterLinks({
  hasSubscription,
  onActivateLicense,
  hideLicenseLink,
}: {
  hasSubscription?: boolean;
  onActivateLicense?: () => void;
  hideLicenseLink?: boolean;
}) {
  const { user } = useUser();
  const licenseText = hasSubscription
    ? "Update License Key"
    : "Activate License Key";
  const billingHelpHref = `mailto:${SUPPORT_EMAIL}?subject=${encodeURIComponent(
    `[Billing] support for ${user?.email ?? "unknown"}`
  )}`;

  return (
    <Section flexDirection="row" justifyContent="center" gap={1} height="auto">
      {onActivateLicense && !hideLicenseLink && (
        <>
          <Text secondaryBody text03>
            Have a license key?
          </Text>
          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
          <Button action tertiary onClick={onActivateLicense}>
            <Text secondaryBody text05 className="underline">
              {licenseText}
            </Text>
          </Button>
        </>
      )}
      {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
      <Button
        action
        tertiary
        href={billingHelpHref}
        className="billing-text-link"
      >
        <Text secondaryBody text03 className="underline">
          Billing Help
        </Text>
      </Button>
    </Section>
  );
}

// ----------------------------------------------------------------------------
// BillingPage
// ----------------------------------------------------------------------------

export default function BillingPage() {
  const router = useRouter();
  const searchParams = useSearchParams();
  // Start with null view to prevent flash - will be set once data loads
  const [view, setView] = useState<BillingView | null>(null);
  const [showLicenseActivationInput, setShowLicenseActivationInput] =
    useState(false);
  const [licenseCardAutoOpened, setLicenseCardAutoOpened] = useState(false);
  const [viewChangeId, setViewChangeId] = useState(0);
  const [transitionType, setTransitionType] = useState<
    "expand" | "collapse" | "fade"
  >("fade");
  const [isActivating, setIsActivating] = useState<boolean>(false);

  const {
    data: billingData,
    isLoading: billingLoading,
    error: billingError,
    refresh: refreshBilling,
  } = useBillingInformation();
  const {
    data: licenseData,
    isLoading: licenseLoading,
    refresh: refreshLicense,
  } = useLicense();

  const isLoading = billingLoading || licenseLoading;
  const hasSubscription = billingData && hasActiveSubscription(billingData);
  const billing = hasSubscription ? (billingData as BillingInformation) : null;
  const isSelfHosted = !NEXT_PUBLIC_CLOUD_ENABLED;

  const hasManualLicense = licenseData?.source === "manual_upload";

  // Air-gapped: billing endpoint is unreachable (manual license + connectivity error)
  const isAirGapped = !!(hasManualLicense && billingError);

  // Stripe error: auto-fetched license but billing endpoint is unreachable
  const hasStripeError = !!(
    isSelfHosted &&
    licenseData?.has_license &&
    billingError &&
    !hasManualLicense
  );

  // Manual license without active Stripe subscription
  // Stripe-dependent actions (manage plan, update seats) won't work
  const isManualLicenseOnly = !!(hasManualLicense && !hasSubscription);

  // Set initial view based on subscription status (only once when data first loads)
  useEffect(() => {
    if (!isLoading && view === null) {
      const shouldShowDetails =
        hasSubscription || (isSelfHosted && licenseData?.has_license);
      setView(shouldShowDetails ? "details" : "plans");
    }
  }, [
    isLoading,
    hasSubscription,
    isSelfHosted,
    licenseData?.has_license,
    view,
  ]);

  // Read activating state from sessionStorage after mount (avoids SSR hydration mismatch)
  useEffect(() => {
    const raw = sessionStorage.getItem(BILLING_ACTIVATING_KEY);
    if (!raw) return;
    if (Number(raw) > Date.now()) {
      setIsActivating(true);
    } else {
      sessionStorage.removeItem(BILLING_ACTIVATING_KEY);
    }
  }, []);

  // Show license activation card when there's a Stripe error
  useEffect(() => {
    if (hasStripeError && !showLicenseActivationInput) {
      setLicenseCardAutoOpened(true);
      setShowLicenseActivationInput(true);
    }
  }, [hasStripeError, showLicenseActivationInput]);

  // Handle return from checkout or customer portal
  useEffect(() => {
    const sessionId = searchParams.get("session_id");
    const portalReturn = searchParams.get("portal_return");

    if (!sessionId && !portalReturn) return;

    router.replace("/admin/billing", { scroll: false });

    let cancelled = false;

    const handleBillingReturn = async () => {
      if (!NEXT_PUBLIC_CLOUD_ENABLED) {
        // Retry up to 3 times with 2s backoff. The license may not be available
        // immediately if the Stripe webhook hasn't finished processing yet
        // (redirect and webhook fire nearly simultaneously).
        let lastError: Error | null = null;
        for (let attempt = 0; attempt < 3; attempt++) {
          if (cancelled) return;
          try {
            // After checkout, exchange session_id for license; after portal, re-sync license
            await claimLicense(sessionId ?? undefined);
            if (cancelled) return;
            refreshLicense();
            // Refresh the page to update settings (including ee_features_enabled)
            router.refresh();
            // Navigate to billing details now that the license is active
            changeView("details");
            lastError = null;
            break;
          } catch (err) {
            lastError = err instanceof Error ? err : new Error("Unknown error");
            if (attempt < 2) {
              await new Promise((resolve) => setTimeout(resolve, 2000));
            }
          }
        }
        if (cancelled) return;
        if (lastError) {
          console.error(
            "Failed to sync license after billing return:",
            lastError
          );
          // Show an activating banner on the plans view and keep retrying in the background.
          sessionStorage.setItem(
            BILLING_ACTIVATING_KEY,
            String(Date.now() + 120_000)
          );
          setIsActivating(true);
          changeView("plans");
        }
      }
      if (!cancelled) refreshBilling();
    };
    handleBillingReturn();

    return () => {
      cancelled = true;
    };
    // changeView intentionally omitted: it only calls stable state setters and the
    // effect runs at most once (when session_id/portal_return params are present).
  }, [searchParams, router, refreshBilling, refreshLicense]); // eslint-disable-line react-hooks/exhaustive-deps

  // Poll every 15s while activating, up to 2 minutes, to detect when the license arrives.
  useEffect(() => {
    if (!isActivating) return;

    let requestInFlight = false;

    const intervalId = setInterval(async () => {
      if (requestInFlight) return;
      const raw = sessionStorage.getItem(BILLING_ACTIVATING_KEY);
      if (!raw || Number(raw) <= Date.now()) {
        // Expired — stop immediately without waiting for React cleanup
        clearInterval(intervalId);
        sessionStorage.removeItem(BILLING_ACTIVATING_KEY);
        setIsActivating(false);
        return;
      }
      requestInFlight = true;
      try {
        await claimLicense(undefined);
        sessionStorage.removeItem(BILLING_ACTIVATING_KEY);
        setIsActivating(false);
        refreshLicense();
        refreshBilling();
        router.refresh();
        changeView("details");
      } catch (err) {
        // License not ready yet — keep polling. Log so unexpected failures
        // (network errors, 500s) are distinguishable from expected 404s.
        console.debug("License activation poll: will retry", err);
      } finally {
        requestInFlight = false;
      }
    }, 15_000);

    return () => clearInterval(intervalId);
  }, [isActivating]); // eslint-disable-line react-hooks/exhaustive-deps

  const handleRefresh = async () => {
    await Promise.all([
      refreshBilling(),
      isSelfHosted ? refreshLicense() : Promise.resolve(),
    ]);
  };

  // Hide license activation card when Stripe connection is restored (only if auto-opened)
  useEffect(() => {
    if (
      !hasStripeError &&
      !isAirGapped &&
      showLicenseActivationInput &&
      licenseCardAutoOpened &&
      !isLoading
    ) {
      if (billingData && hasActiveSubscription(billingData)) {
        setLicenseCardAutoOpened(false);
        setShowLicenseActivationInput(false);
      }
    }
  }, [
    hasStripeError,
    isAirGapped,
    showLicenseActivationInput,
    licenseCardAutoOpened,
    isLoading,
    billingData,
  ]);

  const handleLicenseActivated = () => {
    refreshLicense();
    refreshBilling();
    // Refresh the page to update settings (including ee_features_enabled)
    router.refresh();
    // Navigate to billing details now that the license is active
    changeView("details");
  };

  // View configuration
  const getViewConfig = (): ViewConfig => {
    if (isLoading || view === null) {
      return {
        icon: SvgWallet,
        title: "Plans & Billing",
        showBackButton: false,
      };
    }
    switch (view) {
      case "checkout":
        return {
          icon: SvgArrowUpCircle,
          title: "Upgrade Plan",
          showBackButton: false,
        };
      case "plans":
        return {
          icon: hasSubscription ? SvgWallet : SvgArrowUpCircle,
          title: hasSubscription ? "View Plans" : "Upgrade Plan",
          showBackButton: !!(
            hasSubscription ||
            (isSelfHosted && licenseData?.has_license)
          ),
        };
      case "details":
        return {
          icon: SvgWallet,
          title: "Plans & Billing",
          showBackButton: false,
        };
    }
  };

  const viewConfig = getViewConfig();

  // Handle view changes with transition
  const changeView = (newView: "plans" | "details" | "checkout") => {
    if (newView === view) return;
    if (newView === "checkout" && view === "plans") {
      setTransitionType("expand");
    } else if (newView === "plans" && view === "checkout") {
      setTransitionType("collapse");
    } else {
      setTransitionType("fade");
    }
    setViewChangeId((id) => id + 1);
    setView(newView);
  };

  const handleBack = () => {
    const hasEntitlement =
      hasSubscription || (isSelfHosted && licenseData?.has_license);
    if (view === "checkout") {
      changeView(hasEntitlement ? "details" : "plans");
    } else if (view === "plans" && hasEntitlement) {
      changeView("details");
    }
  };

  const renderContent = () => {
    if (isLoading || view === null) return null;

    const animationClass =
      transitionType === "expand"
        ? "billing-view-expand"
        : transitionType === "collapse"
          ? "billing-view-collapse"
          : "billing-view-enter";

    const views: Record<typeof view, React.ReactNode> = {
      checkout: <CheckoutView onAdjustPlan={() => changeView("plans")} />,
      plans: (
        <PlansView
          hasSubscription={!!hasSubscription}
          hasLicense={!!licenseData?.has_license}
          onCheckout={() => changeView("checkout")}
          hideFeatures={showLicenseActivationInput}
        />
      ),
      details: (
        <BillingDetailsView
          billing={billing ?? undefined}
          license={licenseData ?? undefined}
          onViewPlans={() => changeView("plans")}
          onRefresh={handleRefresh}
          isAirGapped={isAirGapped}
          isManualLicenseOnly={isManualLicenseOnly}
          hasStripeError={hasStripeError}
          licenseCard={
            isManualLicenseOnly ? (
              <LicenseActivationCard
                isOpen
                onSuccess={handleLicenseActivated}
                license={licenseData ?? undefined}
                onClose={() => {}}
                hideClose
              />
            ) : undefined
          }
        />
      ),
    };

    return (
      <div key={viewChangeId} className={`w-full ${animationClass}`}>
        {views[view]}
      </div>
    );
  };

  // Render footer
  const renderFooter = () => {
    if (isLoading || view === null) return null;
    return (
      <>
        {showLicenseActivationInput && !isManualLicenseOnly && (
          <div className="w-full billing-card-enter">
            <LicenseActivationCard
              isOpen={showLicenseActivationInput}
              onSuccess={handleLicenseActivated}
              license={licenseData ?? undefined}
              onClose={() => {
                setLicenseCardAutoOpened(false);
                setShowLicenseActivationInput(false);
              }}
            />
          </div>
        )}
        <FooterLinks
          hasSubscription={!!hasSubscription || !!licenseData?.has_license}
          onActivateLicense={
            isSelfHosted ? () => setShowLicenseActivationInput(true) : undefined
          }
          hideLicenseLink={
            isManualLicenseOnly ||
            showLicenseActivationInput ||
            (view === "plans" &&
              (!!hasSubscription || !!licenseData?.has_license))
          }
        />
      </>
    );
  };

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={viewConfig.icon}
        title={viewConfig.title}
        backButton={viewConfig.showBackButton}
        onBack={handleBack}
        separator
      />
      <SettingsLayouts.Body>
        <div className="flex flex-col items-center gap-6">
          {isActivating && (
            <Message
              static
              warning
              large
              text="Your license is still activating"
              description="Your license is being processed. You'll be taken to billing details automatically once confirmed."
              icon
              close
              onClose={() => {
                sessionStorage.removeItem(BILLING_ACTIVATING_KEY);
                setIsActivating(false);
              }}
              className="w-full"
            />
          )}
          {renderContent()}
          {renderFooter()}
        </div>
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/bots/SlackBotCreationForm.tsx
================================================
"use client";

import CardSection from "@/components/admin/CardSection";
import { useRouter } from "next/navigation";
import { useState } from "react";
import { SlackTokensForm } from "./SlackTokensForm";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { SvgSlack } from "@opal/icons";

export function NewSlackBotForm() {
  const [formValues] = useState({
    name: "",
    enabled: true,
    bot_token: "",
    app_token: "",
    user_token: "",
  });
  const router = useRouter();

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={SvgSlack}
        title="New Slack Bot"
        separator
        backButton
      />
      <SettingsLayouts.Body>
        <CardSection>
          <div className="p-4">
            <SlackTokensForm
              isUpdate={false}
              initialValues={formValues}
              router={router}
            />
          </div>
        </CardSection>
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/bots/SlackBotTable.tsx
================================================
"use client";

import { PageSelector } from "@/components/PageSelector";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import { useEffect, useState } from "react";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import { Badge } from "@/components/ui/badge";
import { SlackBot } from "@/lib/types";
import { EditIcon } from "@/components/icons/icons";

const NUM_IN_PAGE = 20;

function ClickableTableRow({
  url,
  children,
  ...props
}: {
  url: string;
  children: React.ReactNode;
  [key: string]: any;
}) {
  const router = useRouter();

  useEffect(() => {
    router.prefetch(url as Route);
  }, [router, url]);

  const navigate = () => {
    router.push(url as Route);
  };

  return (
    <TableRow {...props} onClick={navigate}>
      {children}
    </TableRow>
  );
}

export const SlackBotTable = ({ slackBots }: { slackBots: SlackBot[] }) => {
  const [page, setPage] = useState(1);

  // sort by id for consistent ordering
  slackBots.sort((a, b) => {
    if (a.id < b.id) {
      return -1;
    } else if (a.id > b.id) {
      return 1;
    } else {
      return 0;
    }
  });

  const slackBotsForPage = slackBots.slice(
    NUM_IN_PAGE * (page - 1),
    NUM_IN_PAGE * page
  );

  return (
    <div>
      <Table>
        <TableHeader>
          <TableRow>
            <TableHead>Name</TableHead>
            <TableHead>Status</TableHead>
            <TableHead>Default Config</TableHead>
            <TableHead>Channel Count</TableHead>
          </TableRow>
        </TableHeader>
        <TableBody>
          {slackBotsForPage.map((slackBot) => {
            return (
              <ClickableTableRow
                url={`/admin/bots/${slackBot.id}`}
                key={slackBot.id}
                className="hover:bg-muted cursor-pointer"
              >
                <TableCell>
                  <div className="flex items-center">
                    <EditIcon className="mr-4" />
                    {slackBot.name}
                  </div>
                </TableCell>
                <TableCell>
                  {slackBot.enabled ? (
                    <Badge variant="success">Enabled</Badge>
                  ) : (
                    <Badge variant="destructive">Disabled</Badge>
                  )}
                </TableCell>
                <TableCell>
                  <Badge variant="secondary">Default Set</Badge>
                </TableCell>
                <TableCell>{slackBot.configs_count}</TableCell>
                <TableCell>
                  {/* Add any action buttons here if needed */}
                </TableCell>
              </ClickableTableRow>
            );
          })}
          {slackBots.length === 0 && (
            <TableRow>
              <TableCell
                colSpan={5}
                className="text-center text-muted-foreground"
              >
                Please add a New Slack Bot to begin chatting with Danswer!
              </TableCell>
            </TableRow>
          )}
        </TableBody>
      </Table>
      {slackBots.length > NUM_IN_PAGE && (
        <div className="mt-3 flex">
          <div className="mx-auto">
            <PageSelector
              totalPages={Math.ceil(slackBots.length / NUM_IN_PAGE)}
              currentPage={page}
              onPageChange={(newPage) => {
                setPage(newPage);
                window.scrollTo({
                  top: 0,
                  left: 0,
                  behavior: "smooth",
                });
              }}
            />
          </div>
        </div>
      )}
    </div>
  );
};


================================================
FILE: web/src/app/admin/bots/SlackBotUpdateForm.tsx
================================================
"use client";

import { toast } from "@/hooks/useToast";
import { SlackBot } from "@/lib/types";
import { useRouter } from "next/navigation";
import { useState, useEffect, useRef } from "react";
import { updateSlackBotField } from "@/lib/updateSlackBotField";
import { SlackTokensForm } from "./SlackTokensForm";

import { EditableStringFieldDisplay } from "@/components/EditableStringFieldDisplay";
import { deleteSlackBot } from "./new/lib";
import GenericConfirmModal from "@/components/modals/GenericConfirmModal";
import { Button } from "@opal/components";
import { cn } from "@/lib/utils";
import { SvgChevronDownSmall, SvgTrash } from "@opal/icons";

function Checkbox({
  label,
  checked,
  onChange,
}: {
  label: string;
  checked: boolean;
  onChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
}) {
  return (
    <label className="flex text-xs cursor-pointer">
      <input
        checked={checked}
        onChange={onChange}
        type="checkbox"
        className="mr-2 w-3.5 h-3.5 my-auto"
      />
      <span className="block font-medium text-text-700 text-sm">{label}</span>
    </label>
  );
}

export const ExistingSlackBotForm = ({
  existingSlackBot,
  refreshSlackBot,
}: {
  existingSlackBot: SlackBot;
  refreshSlackBot?: () => void;
}) => {
  const [isExpanded, setIsExpanded] = useState(false);
  const [formValues, setFormValues] = useState(existingSlackBot);
  const router = useRouter();
  const dropdownRef = useRef<HTMLDivElement>(null);
  const [showDeleteModal, setShowDeleteModal] = useState(false);

  const handleUpdateField = async (
    field: keyof SlackBot,
    value: string | boolean
  ) => {
    try {
      const response = await updateSlackBotField(
        existingSlackBot,
        field,
        value
      );
      if (!response.ok) {
        throw new Error(await response.text());
      }
      toast.success(`Connector ${field} updated successfully`);
    } catch (error) {
      toast.error(`Failed to update connector ${field}`);
    }
    setFormValues((prev) => ({ ...prev, [field]: value }));
  };

  useEffect(() => {
    const handleClickOutside = (event: MouseEvent) => {
      if (
        dropdownRef.current &&
        !dropdownRef.current.contains(event.target as Node) &&
        isExpanded
      ) {
        setIsExpanded(false);
      }
    };

    document.addEventListener("mousedown", handleClickOutside);
    return () => {
      document.removeEventListener("mousedown", handleClickOutside);
    };
  }, [isExpanded]);

  return (
    <div>
      <div className="flex items-center justify-between h-14">
        <div className="flex items-center gap-2">
          <div>
            <EditableStringFieldDisplay
              value={formValues.name}
              isEditable={true}
              onUpdate={(value) => handleUpdateField("name", value)}
              scale={2.1}
            />
          </div>
        </div>

        <div className="flex flex-col" ref={dropdownRef}>
          <div className="flex items-center gap-4">
            <Button
              prominence="secondary"
              icon={({ className }) => (
                <SvgChevronDownSmall
                  className={cn(className, !isExpanded && "-rotate-90")}
                />
              )}
              onClick={() => setIsExpanded(!isExpanded)}
            >
              Update Tokens
            </Button>
            <Button
              variant="danger"
              onClick={() => setShowDeleteModal(true)}
              icon={SvgTrash}
            >
              Delete
            </Button>
          </div>

          {isExpanded && (
            <div className="bg-background border rounded-lg border-background-200 shadow-lg absolute mt-12 right-0 z-10 w-full md:w-3/4 lg:w-1/2">
              <div className="p-4">
                <SlackTokensForm
                  isUpdate={true}
                  initialValues={formValues}
                  existingSlackBotId={existingSlackBot.id}
                  refreshSlackBot={refreshSlackBot}
                  router={router}
                  onValuesChange={(values) => setFormValues(values)}
                />
              </div>
            </div>
          )}
        </div>
      </div>
      <div className="mt-2">
        <div className="inline-block border rounded-lg border-background-200 p-2">
          <Checkbox
            label="Enabled"
            checked={formValues.enabled}
            onChange={(e) => handleUpdateField("enabled", e.target.checked)}
          />
        </div>
        {showDeleteModal && (
          <GenericConfirmModal
            title="Delete Slack Bot"
            message="Are you sure you want to delete this Slack bot? This action cannot be undone."
            confirmText="Delete"
            onClose={() => setShowDeleteModal(false)}
            onConfirm={async () => {
              try {
                const response = await deleteSlackBot(existingSlackBot.id);
                if (!response.ok) {
                  throw new Error(await response.text());
                }
                toast.success("Slack bot deleted successfully");
                router.push("/admin/bots");
              } catch (error) {
                toast.error("Failed to delete Slack bot");
              }
              setShowDeleteModal(false);
            }}
          />
        )}
      </div>
    </div>
  );
};


================================================
FILE: web/src/app/admin/bots/SlackTokensForm.tsx
================================================
"use client";

import { TextFormField } from "@/components/Field";
import { Form, Formik } from "formik";
import * as Yup from "yup";
import { createSlackBot, updateSlackBot } from "./new/lib";
import { Button } from "@opal/components";
import Separator from "@/refresh-components/Separator";
import { useEffect } from "react";
import { DOCS_ADMINS_PATH } from "@/lib/constants";
import { toast } from "@/hooks/useToast";

export const SlackTokensForm = ({
  isUpdate,
  initialValues,
  existingSlackBotId,
  refreshSlackBot,
  router,
  onValuesChange,
}: {
  isUpdate: boolean;
  initialValues: any;
  existingSlackBotId?: number;
  refreshSlackBot?: () => void;
  router: any;
  onValuesChange?: (values: any) => void;
}) => {
  useEffect(() => {
    if (onValuesChange) {
      onValuesChange(initialValues);
    }
  }, [initialValues, onValuesChange]);

  return (
    <Formik
      initialValues={{
        ...initialValues,
      }}
      validationSchema={Yup.object().shape({
        bot_token: Yup.string().required(),
        app_token: Yup.string().required(),
        name: Yup.string().required(),
        user_token: Yup.string().optional(),
      })}
      onSubmit={async (values, formikHelpers) => {
        formikHelpers.setSubmitting(true);

        let response;
        if (isUpdate) {
          response = await updateSlackBot(existingSlackBotId!, values);
        } else {
          response = await createSlackBot(values);
        }
        formikHelpers.setSubmitting(false);
        if (response.ok) {
          if (refreshSlackBot) {
            refreshSlackBot();
          }
          const responseJson = await response.json();
          const botId = isUpdate ? existingSlackBotId : responseJson.id;
          toast.success(
            isUpdate
              ? "Successfully updated Slack Bot!"
              : "Successfully created Slack Bot!"
          );
          router.push(`/admin/bots/${encodeURIComponent(botId)}`);
        } else {
          const responseJson = await response.json();
          let errorMsg = responseJson.detail || responseJson.message;

          if (errorMsg.includes("Invalid bot token:")) {
            errorMsg = "Slack Bot Token is invalid";
          } else if (errorMsg.includes("Invalid app token:")) {
            errorMsg = "Slack App Token is invalid";
          }
          toast.error(
            isUpdate
              ? `Error updating Slack Bot - ${errorMsg}`
              : `Error creating Slack Bot - ${errorMsg}`
          );
        }
      }}
      enableReinitialize={true}
    >
      {({ isSubmitting, setFieldValue, values }) => (
        <Form className="w-full">
          {!isUpdate && (
            <div className="">
              <TextFormField
                name="name"
                label="Name This Slack Bot:"
                type="text"
              />
            </div>
          )}

          {!isUpdate && (
            <div className="mt-4">
              <Separator />
              Please refer to our{" "}
              <a
                className="text-blue-500 hover:underline"
                href={`${DOCS_ADMINS_PATH}/getting_started/slack_bot_setup`}
                target="_blank"
                rel="noopener noreferrer"
              >
                guide
              </a>{" "}
              if you are not sure how to get these tokens!
            </div>
          )}
          <TextFormField
            name="bot_token"
            label="Slack Bot Token"
            type="password"
          />
          <TextFormField
            name="app_token"
            label="Slack App Token"
            type="password"
          />
          <TextFormField
            name="user_token"
            label="Slack User Token (Optional)"
            type="password"
            subtext="Optional: User OAuth token for enhanced private channel access"
          />
          <div className="flex justify-end w-full mt-4">
            <Button
              disabled={
                isSubmitting ||
                !values.bot_token ||
                !values.app_token ||
                !values.name
              }
              type="submit"
            >
              {isUpdate ? "Update" : "Create"}
            </Button>
          </div>
        </Form>
      )}
    </Formik>
  );
};


================================================
FILE: web/src/app/admin/bots/[bot-id]/SlackChannelConfigsTable.tsx
================================================
"use client";

import { PageSelector } from "@/components/PageSelector";
import { toast } from "@/hooks/useToast";
import { EditIcon } from "@/components/icons/icons";
import { SlackChannelConfig } from "@/lib/types";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import Link from "next/link";
import type { Route } from "next";
import { useState } from "react";
import { deleteSlackChannelConfig, isPersonaASlackBotPersona } from "./lib";
import { Card } from "@/components/ui/card";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import { Button } from "@opal/components";
import { SvgSettings, SvgTrash } from "@opal/icons";
const numToDisplay = 50;

export interface SlackChannelConfigsTableProps {
  slackBotId: number;
  slackChannelConfigs: SlackChannelConfig[];
  refresh: () => void;
}

export default function SlackChannelConfigsTable({
  slackBotId,
  slackChannelConfigs,
  refresh,
}: SlackChannelConfigsTableProps) {
  const [page, setPage] = useState(1);

  const defaultConfig = slackChannelConfigs.find((config) => config.is_default);
  const channelConfigs = slackChannelConfigs.filter(
    (config) => !config.is_default
  );

  return (
    <div className="space-y-8">
      <div className="flex justify-between items-center mb-6">
        <Button
          prominence="secondary"
          onClick={() => {
            window.location.href = `/admin/bots/${slackBotId}/channels/${defaultConfig?.id}`;
          }}
          icon={SvgSettings}
        >
          Edit Default Configuration
        </Button>
        <CreateButton href={`/admin/bots/${slackBotId}/channels/new`} secondary>
          New Channel Configuration
        </CreateButton>
      </div>

      <div>
        <h2 className="text-2xl font- mb-4">Channel-Specific Configurations</h2>
        <Card>
          <Table>
            <TableHeader>
              <TableRow>
                <TableHead>Channel</TableHead>
                <TableHead>Assistant</TableHead>
                <TableHead>Document Sets</TableHead>
                <TableHead>Actions</TableHead>
              </TableRow>
            </TableHeader>
            <TableBody>
              {channelConfigs
                .slice(numToDisplay * (page - 1), numToDisplay * page)
                .map((slackChannelConfig) => {
                  return (
                    <TableRow
                      key={slackChannelConfig.id}
                      className="cursor-pointer transition-colors"
                      onClick={() => {
                        window.location.href = `/admin/bots/${slackBotId}/channels/${slackChannelConfig.id}`;
                      }}
                    >
                      <TableCell>
                        <div className="flex gap-x-2">
                          <div className="my-auto">
                            <EditIcon className="text-muted-foreground" />
                          </div>
                          <div className="my-auto">
                            {"#" +
                              slackChannelConfig.channel_config.channel_name}
                          </div>
                        </div>
                      </TableCell>
                      <TableCell onClick={(e) => e.stopPropagation()}>
                        {slackChannelConfig.persona &&
                        !isPersonaASlackBotPersona(
                          slackChannelConfig.persona
                        ) ? (
                          <Link
                            href={
                              `/app/agents/edit/${slackChannelConfig.persona.id}` as Route
                            }
                            className="text-primary hover:underline"
                          >
                            {slackChannelConfig.persona.name}
                          </Link>
                        ) : (
                          "-"
                        )}
                      </TableCell>
                      <TableCell>
                        <div>
                          {slackChannelConfig.persona &&
                          slackChannelConfig.persona.document_sets.length > 0
                            ? slackChannelConfig.persona.document_sets
                                .map((documentSet) => documentSet.name)
                                .join(", ")
                            : "-"}
                        </div>
                      </TableCell>
                      <TableCell onClick={(e) => e.stopPropagation()}>
                        <Button
                          onClick={async (e) => {
                            e.stopPropagation();
                            const response = await deleteSlackChannelConfig(
                              slackChannelConfig.id
                            );
                            if (response.ok) {
                              toast.success(
                                `Slack bot config "${slackChannelConfig.id}" deleted`
                              );
                            } else {
                              const errorMsg = await response.text();
                              toast.error(
                                `Failed to delete Slack bot config - ${errorMsg}`
                              );
                            }
                            refresh();
                          }}
                          icon={SvgTrash}
                          prominence="tertiary"
                          size="sm"
                        />
                      </TableCell>
                    </TableRow>
                  );
                })}

              {channelConfigs.length === 0 && (
                <TableRow>
                  <TableCell
                    colSpan={4}
                    className="text-center text-muted-foreground"
                  >
                    No channel-specific configurations. Add a new configuration
                    to customize behavior for specific channels.
                  </TableCell>
                </TableRow>
              )}
            </TableBody>
          </Table>
        </Card>

        {channelConfigs.length > numToDisplay && (
          <div className="mt-4 flex justify-center">
            <PageSelector
              totalPages={Math.ceil(channelConfigs.length / numToDisplay)}
              currentPage={page}
              onPageChange={(newPage) => setPage(newPage)}
            />
          </div>
        )}
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm.tsx
================================================
"use client";

import { useMemo } from "react";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import { toast } from "@/hooks/useToast";
import {
  DocumentSetSummary,
  SlackChannelConfig,
  SlackBotResponseType,
} from "@/lib/types";
import {
  createSlackChannelConfig,
  isPersonaASlackBotPersona,
  updateSlackChannelConfig,
} from "../lib";
import CardSection from "@/components/admin/CardSection";
import { useRouter } from "next/navigation";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { StandardAnswerCategoryResponse } from "@/components/standardAnswers/getStandardAnswerCategoriesIfEE";
import { SEARCH_TOOL_ID } from "@/app/app/components/tools/constants";
import { SlackChannelConfigFormFields } from "./SlackChannelConfigFormFields";

export const SlackChannelConfigCreationForm = ({
  slack_bot_id,
  documentSets,
  personas,
  standardAnswerCategoryResponse,
  existingSlackChannelConfig,
}: {
  slack_bot_id: number;
  documentSets: DocumentSetSummary[];
  personas: MinimalPersonaSnapshot[];
  standardAnswerCategoryResponse: StandardAnswerCategoryResponse;
  existingSlackChannelConfig?: SlackChannelConfig;
}) => {
  const router = useRouter();
  const isUpdate = Boolean(existingSlackChannelConfig);
  const isDefault = existingSlackChannelConfig?.is_default || false;
  const existingSlackBotUsesPersona = existingSlackChannelConfig?.persona
    ? !isPersonaASlackBotPersona(existingSlackChannelConfig.persona)
    : false;
  const existingPersonaHasSearchTool = existingSlackChannelConfig?.persona
    ? existingSlackChannelConfig.persona.tools.some(
        (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID
      )
    : false;

  const [searchEnabledAgents, nonSearchAgents] = useMemo(() => {
    return personas.reduce(
      (acc, persona) => {
        if (
          persona.tools.some((tool) => tool.in_code_tool_id === SEARCH_TOOL_ID)
        ) {
          acc[0].push(persona);
        } else {
          acc[1].push(persona);
        }
        return acc;
      },
      [[], []] as [MinimalPersonaSnapshot[], MinimalPersonaSnapshot[]]
    );
  }, [personas]);

  return (
    <CardSection className="!px-12 max-w-4xl">
      <Formik
        initialValues={{
          slack_bot_id: slack_bot_id,
          channel_name: isDefault
            ? ""
            : existingSlackChannelConfig?.channel_config.channel_name || "",
          response_type: "citations" as SlackBotResponseType,
          answer_validity_check_enabled: (
            existingSlackChannelConfig?.channel_config?.answer_filters || []
          ).includes("well_answered_postfilter"),
          questionmark_prefilter_enabled: (
            existingSlackChannelConfig?.channel_config?.answer_filters || []
          ).includes("questionmark_prefilter"),
          respond_tag_only:
            existingSlackChannelConfig?.channel_config?.respond_tag_only ||
            false,
          is_ephemeral:
            existingSlackChannelConfig?.channel_config?.is_ephemeral || false,
          respond_to_bots:
            existingSlackChannelConfig?.channel_config?.respond_to_bots ||
            false,
          show_continue_in_web_ui:
            existingSlackChannelConfig?.channel_config
              ?.show_continue_in_web_ui ?? !isUpdate,
          enable_auto_filters:
            existingSlackChannelConfig?.enable_auto_filters || false,
          respond_member_group_list:
            existingSlackChannelConfig?.channel_config
              ?.respond_member_group_list || [],
          still_need_help_enabled:
            existingSlackChannelConfig?.channel_config?.follow_up_tags !==
            undefined,
          follow_up_tags:
            existingSlackChannelConfig?.channel_config?.follow_up_tags ||
            undefined,
          document_sets:
            existingSlackChannelConfig && existingSlackChannelConfig.persona
              ? existingSlackChannelConfig.persona.document_sets.map(
                  (documentSet) => documentSet.id
                )
              : ([] as number[]),
          persona_id:
            existingSlackChannelConfig?.persona &&
            !isPersonaASlackBotPersona(existingSlackChannelConfig.persona)
              ? existingSlackChannelConfig.persona.id
              : null,
          standard_answer_categories:
            existingSlackChannelConfig?.standard_answer_categories || [],
          knowledge_source: existingSlackBotUsesPersona
            ? existingPersonaHasSearchTool
              ? "assistant"
              : "non_search_agent"
            : existingSlackChannelConfig?.persona
              ? "document_sets"
              : "all_public",
          disabled:
            existingSlackChannelConfig?.channel_config?.disabled ?? false,
        }}
        validationSchema={Yup.object().shape({
          slack_bot_id: Yup.number().required(),
          channel_name: isDefault
            ? Yup.string()
            : Yup.string().required("Channel Name is required"),
          response_type: Yup.mixed<SlackBotResponseType>()
            .oneOf(["quotes", "citations"])
            .required(),
          answer_validity_check_enabled: Yup.boolean().required(),
          questionmark_prefilter_enabled: Yup.boolean().required(),
          respond_tag_only: Yup.boolean().required(),
          respond_to_bots: Yup.boolean().required(),
          is_ephemeral: Yup.boolean().required(),
          show_continue_in_web_ui: Yup.boolean().required(),
          enable_auto_filters: Yup.boolean().required(),
          respond_member_group_list: Yup.array().of(Yup.string()).required(),
          still_need_help_enabled: Yup.boolean().required(),
          follow_up_tags: Yup.array().of(Yup.string()),
          document_sets: Yup.array()
            .of(Yup.number())
            .when("knowledge_source", {
              is: "document_sets",
              then: (schema) =>
                schema.min(
                  1,
                  "At least one Document Set is required when using the 'Document Sets' knowledge source"
                ),
            }),
          persona_id: Yup.number()
            .nullable()
            .when("knowledge_source", {
              is: "assistant",
              then: (schema) =>
                schema.required(
                  "An agent is required when using the 'Agent' knowledge source"
                ),
            }),
          standard_answer_categories: Yup.array(),
          knowledge_source: Yup.string()
            .oneOf([
              "all_public",
              "document_sets",
              "assistant",
              "non_search_agent",
            ])
            .required(),
          disabled: Yup.boolean().optional().default(false),
        })}
        onSubmit={async (values, formikHelpers) => {
          formikHelpers.setSubmitting(true);

          const cleanedValues = {
            ...values,
            slack_bot_id,
            channel_name: values.channel_name,
            respond_member_group_list: values.respond_member_group_list,
            usePersona:
              values.knowledge_source === "assistant" ||
              values.knowledge_source === "non_search_agent",
            document_sets:
              values.knowledge_source === "document_sets"
                ? values.document_sets
                : [],
            persona_id:
              values.knowledge_source === "assistant" ||
              values.knowledge_source === "non_search_agent"
                ? values.persona_id
                : null,
            standard_answer_categories: values.standard_answer_categories.map(
              (category: any) => category.id
            ),
            response_type: values.response_type as SlackBotResponseType,
            disabled: values.disabled ?? false,
          };

          if (!cleanedValues.still_need_help_enabled) {
            cleanedValues.follow_up_tags = undefined;
          } else {
            if (!cleanedValues.follow_up_tags) {
              cleanedValues.follow_up_tags = [];
            }
          }

          const response = isUpdate
            ? await updateSlackChannelConfig(
                existingSlackChannelConfig!.id,
                cleanedValues
              )
            : await createSlackChannelConfig(cleanedValues);

          formikHelpers.setSubmitting(false);
          if (response.ok) {
            router.push(`/admin/bots/${slack_bot_id}`);
          } else {
            const responseJson = await response.json();
            const errorMsg = responseJson.detail || responseJson.message;
            toast.error(
              `Error ${
                isUpdate ? "updating" : "creating"
              } OnyxBot config - ${errorMsg}`
            );
          }
        }}
      >
        {({ isSubmitting, values, setFieldValue, ...formikProps }) => (
          <Form>
            <div className="pb-6 w-full">
              <SlackChannelConfigFormFields
                {...values}
                isUpdate={isUpdate}
                isDefault={isDefault}
                documentSets={documentSets}
                searchEnabledAgents={searchEnabledAgents}
                nonSearchAgents={nonSearchAgents}
                standardAnswerCategoryResponse={standardAnswerCategoryResponse}
                slack_bot_id={slack_bot_id}
                formikProps={formikProps}
              />
            </div>
          </Form>
        )}
      </Formik>
    </CardSection>
  );
};


================================================
FILE: web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigFormFields.tsx
================================================
"use client";

import { useState, useEffect, useMemo } from "react";
import { FieldArray, useFormikContext, ErrorMessage } from "formik";
import { DocumentSetSummary } from "@/lib/types";
import { toast } from "@/hooks/useToast";
import {
  Label,
  SelectorFormField,
  SubLabel,
  TextArrayField,
  TextFormField,
} from "@/components/Field";
import { Button } from "@opal/components";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import DocumentSetCard from "@/sections/cards/DocumentSetCard";
import CollapsibleSection from "@/app/admin/agents/CollapsibleSection";
import { StandardAnswerCategoryResponse } from "@/components/standardAnswers/getStandardAnswerCategoriesIfEE";
import { StandardAnswerCategoryDropdownField } from "@/components/standardAnswers/StandardAnswerCategoryDropdown";
import { RadioGroup } from "@/components/ui/radio-group";
import { RadioGroupItemField } from "@/components/ui/RadioGroupItemField";
import { AlertCircle } from "lucide-react";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { TooltipProvider } from "@radix-ui/react-tooltip";
import { SourceIcon } from "@/components/SourceIcon";
import Link from "next/link";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import { Badge } from "@/components/ui/badge";
import {
  Accordion,
  AccordionContent,
  AccordionItem,
  AccordionTrigger,
} from "@/components/ui/accordion";
import Separator from "@/refresh-components/Separator";
import { CheckboxField } from "@/refresh-components/form/LabeledCheckboxField";

export interface SlackChannelConfigFormFieldsProps {
  isUpdate: boolean;
  isDefault: boolean;
  documentSets: DocumentSetSummary[];
  searchEnabledAgents: MinimalPersonaSnapshot[];
  nonSearchAgents: MinimalPersonaSnapshot[];
  standardAnswerCategoryResponse: StandardAnswerCategoryResponse;
  slack_bot_id: number;
  formikProps: any;
}

export function SlackChannelConfigFormFields({
  isUpdate,
  isDefault,
  documentSets,
  searchEnabledAgents,
  nonSearchAgents,
  standardAnswerCategoryResponse,
  slack_bot_id,
  formikProps,
}: SlackChannelConfigFormFieldsProps) {
  const router = useRouter();
  const { values, setFieldValue } = useFormikContext<any>();
  const [viewUnselectableSets, setViewUnselectableSets] = useState(false);
  const [viewSyncEnabledAgents, setViewSyncEnabledAgents] = useState(false);

  // Helper function to check if a document set contains sync connectors
  const documentSetContainsSync = (documentSet: DocumentSetSummary) => {
    return documentSet.cc_pair_summaries.some(
      (summary) => summary.access_type === "sync"
    );
  };

  // Helper function to check if a document set contains private connectors
  const documentSetContainsPrivate = (documentSet: DocumentSetSummary) => {
    return documentSet.cc_pair_summaries.some(
      (summary) => summary.access_type === "private"
    );
  };

  // Helper function to get cc_pair_summaries from DocumentSetSummary
  const getCcPairSummaries = (documentSet: DocumentSetSummary) => {
    return documentSet.cc_pair_summaries;
  };

  const [syncEnabledAgents, availableAgents] = useMemo(() => {
    const sync: MinimalPersonaSnapshot[] = [];
    const available: MinimalPersonaSnapshot[] = [];

    searchEnabledAgents.forEach((persona) => {
      const hasSyncSet = persona.document_sets.some(documentSetContainsSync);
      if (hasSyncSet) {
        sync.push(persona);
      } else {
        available.push(persona);
      }
    });

    return [sync, available];
  }, [searchEnabledAgents]);

  const unselectableSets = useMemo(() => {
    return documentSets.filter(documentSetContainsSync);
  }, [documentSets]);

  const memoizedPrivateConnectors = useMemo(() => {
    const uniqueDescriptors = new Map();
    documentSets.forEach((ds: DocumentSetSummary) => {
      const ccPairSummaries = getCcPairSummaries(ds);
      ccPairSummaries.forEach((summary: any) => {
        if (
          summary.access_type === "private" &&
          !uniqueDescriptors.has(summary.id)
        ) {
          uniqueDescriptors.set(summary.id, summary);
        }
      });
    });
    return Array.from(uniqueDescriptors.values());
  }, [documentSets]);

  const selectableSets = useMemo(() => {
    return documentSets.filter((ds) => !documentSetContainsSync(ds));
  }, [documentSets]);

  useEffect(() => {
    const invalidSelected = values.document_sets.filter((dsId: number) =>
      unselectableSets.some((us) => us.id === dsId)
    );
    if (invalidSelected.length > 0) {
      setFieldValue(
        "document_sets",
        values.document_sets.filter(
          (dsId: number) => !invalidSelected.includes(dsId)
        )
      );
      toast.warning(
        "We removed one or more document sets from your selection because they are no longer valid. Please review and update your configuration."
      );
    }
  }, [unselectableSets, values.document_sets, setFieldValue]);

  const shouldShowPrivacyAlert = useMemo(() => {
    if (values.knowledge_source === "document_sets") {
      const selectedSets = documentSets.filter((ds) =>
        values.document_sets.includes(ds.id)
      );
      return selectedSets.some((ds) => documentSetContainsPrivate(ds));
    } else if (values.knowledge_source === "assistant") {
      const chosenAgent = searchEnabledAgents.find(
        (p) => p.id == values.persona_id
      );
      return chosenAgent?.document_sets.some((ds) =>
        documentSetContainsPrivate(ds)
      );
    }
    return false;
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [values.knowledge_source, values.document_sets, values.persona_id]);

  return (
    <>
      <div className="w-full">
        {isDefault && (
          <>
            <Badge variant="agent" className="bg-blue-100 text-blue-800">
              Default Configuration
            </Badge>
            <p className="mt-2 text-sm">
              This default configuration will apply to all channels and direct
              messages (DMs) in your Slack workspace.
            </p>
            <div className="mt-4 p-4 bg-background rounded-md border border-neutral-300">
              <CheckboxField
                name="disabled"
                label="Disable Default Configuration"
                labelClassName="text-text"
              />
              <p className="mt-2 text-sm italic">
                Warning: Disabling the default configuration means OnyxBot
                won&apos;t respond in Slack channels unless they are explicitly
                configured. Additionally, OnyxBot will not respond to DMs.
              </p>
            </div>
          </>
        )}
        {!isDefault && (
          <>
            <TextFormField
              name="channel_name"
              label="Slack Channel Name"
              placeholder="Enter channel name (e.g., general, support)"
              subtext="Enter the name of the Slack channel (without the # symbol)"
            />
          </>
        )}
        <div className="space-y-2 mt-4">
          <Label>Knowledge Source</Label>
          <RadioGroup
            className="flex flex-col gap-y-4"
            value={values.knowledge_source}
            onValueChange={(value: string) => {
              setFieldValue("knowledge_source", value);
            }}
          >
            <RadioGroupItemField
              value="all_public"
              id="all_public"
              label="All Public Knowledge"
              sublabel="Let OnyxBot respond based on information from all public connectors"
            />
            {selectableSets.length + unselectableSets.length > 0 && (
              <RadioGroupItemField
                value="document_sets"
                id="document_sets"
                label="Specific Document Sets"
                sublabel="Control which documents to use for answering questions"
              />
            )}
            <RadioGroupItemField
              value="assistant"
              id="assistant"
              label="Search Agent"
              sublabel="Control both the documents and the prompt to use for answering questions"
            />
            <RadioGroupItemField
              value="non_search_agent"
              id="non_search_agent"
              label="Non-Search Agent"
              sublabel="Chat with an agent that does not use documents"
            />
          </RadioGroup>
        </div>
        {values.knowledge_source === "document_sets" &&
          documentSets.length > 0 && (
            <div className="mt-4">
              <SubLabel>
                <>
                  Select the document sets OnyxBot will use while answering
                  questions in Slack.
                  <br />
                  {unselectableSets.length > 0 ? (
                    <span>
                      Some incompatible document sets are{" "}
                      {viewUnselectableSets ? "visible" : "hidden"}.{" "}
                      <button
                        type="button"
                        onClick={() =>
                          setViewUnselectableSets(
                            (viewUnselectableSets) => !viewUnselectableSets
                          )
                        }
                        className="text-sm text-action-link-05"
                      >
                        {viewUnselectableSets
                          ? "Hide un-selectable "
                          : "View all "}
                        document sets
                      </button>
                    </span>
                  ) : (
                    ""
                  )}
                </>
              </SubLabel>
              <FieldArray
                name="document_sets"
                render={(arrayHelpers) => (
                  <>
                    {selectableSets.length > 0 && (
                      <div className="mb-3 mt-2 flex gap-2 flex-wrap text-sm">
                        {selectableSets.map((documentSet) => {
                          const selectedIndex = values.document_sets.indexOf(
                            documentSet.id
                          );
                          const isSelected = selectedIndex !== -1;

                          return (
                            <DocumentSetCard
                              key={documentSet.id}
                              documentSet={documentSet}
                              isSelected={isSelected}
                              onSelectToggle={(selected) => {
                                if (selected) arrayHelpers.push(documentSet.id);
                                else arrayHelpers.remove(selectedIndex);
                              }}
                            />
                          );
                        })}
                      </div>
                    )}

                    {viewUnselectableSets && unselectableSets.length > 0 && (
                      <div className="mt-4">
                        <p className="text-sm text-text-dark/80">
                          These document sets cannot be attached as they have
                          auto-synced docs:
                        </p>
                        <div className="mb-3 mt-2 flex gap-2 flex-wrap text-sm">
                          {unselectableSets.map((documentSet) => (
                            <DocumentSetCard
                              key={documentSet.id}
                              documentSet={documentSet}
                              disabled
                              disabledTooltip="Unable to use this document set because it contains a connector with auto-sync permissions. OnyxBot's responses in this channel are visible to all Slack users, so mirroring the asker's permissions could inadvertently expose private information."
                              isSelected={false}
                            />
                          ))}
                        </div>
                      </div>
                    )}
                    <ErrorMessage
                      className="text-red-500 text-sm mt-1"
                      name="document_sets"
                      component="div"
                    />
                  </>
                )}
              />
            </div>
          )}
        {values.knowledge_source === "assistant" && (
          <div className="mt-4">
            <SubLabel>
              <>
                Select the search-enabled agent OnyxBot will use while answering
                questions in Slack.
                {syncEnabledAgents.length > 0 && (
                  <>
                    <br />
                    <span className="text-sm text-text-dark/80">
                      Note: Some of your agents have auto-synced connectors in
                      their document sets. You cannot select these agents as
                      they will not be able to answer questions in Slack.{" "}
                      <button
                        type="button"
                        onClick={() =>
                          setViewSyncEnabledAgents(
                            (viewSyncEnabledAgents) => !viewSyncEnabledAgents
                          )
                        }
                        className="text-sm text-action-link-05"
                      >
                        {viewSyncEnabledAgents
                          ? "Hide un-selectable "
                          : "View all "}
                        agents
                      </button>
                    </span>
                  </>
                )}
              </>
            </SubLabel>

            <SelectorFormField
              name="persona_id"
              options={availableAgents.map((persona) => ({
                name: persona.name,
                value: persona.id,
              }))}
            />
            {viewSyncEnabledAgents && syncEnabledAgents.length > 0 && (
              <div className="mt-4">
                <p className="text-sm text-text-dark/80">
                  Un-selectable agents:
                </p>
                <div className="mb-3 mt-2 flex gap-2 flex-wrap text-sm">
                  {syncEnabledAgents.map((persona: MinimalPersonaSnapshot) => (
                    <button
                      type="button"
                      onClick={() =>
                        router.push(`/app/agents/edit/${persona.id}` as Route)
                      }
                      key={persona.id}
                      className="p-2 bg-background-100 cursor-pointer rounded-md flex items-center gap-2"
                    >
                      <AgentAvatar agent={persona} size={16} />
                      {persona.name}
                    </button>
                  ))}
                </div>
              </div>
            )}
          </div>
        )}
        {values.knowledge_source === "non_search_agent" && (
          <div className="mt-4">
            <SubLabel>
              <>
                Select the non-search agent OnyxBot will use while answering
                questions in Slack.
                {syncEnabledAgents.length > 0 && (
                  <>
                    <br />
                    <span className="text-sm text-text-dark/80">
                      Note: Some of your agents have auto-synced connectors in
                      their document sets. You cannot select these agents as
                      they will not be able to answer questions in Slack.{" "}
                      <button
                        type="button"
                        onClick={() =>
                          setViewSyncEnabledAgents(
                            (viewSyncEnabledAgents) => !viewSyncEnabledAgents
                          )
                        }
                        className="text-sm text-action-link-05"
                      >
                        {viewSyncEnabledAgents
                          ? "Hide un-selectable "
                          : "View all "}
                        agents
                      </button>
                    </span>
                  </>
                )}
              </>
            </SubLabel>

            <SelectorFormField
              name="persona_id"
              options={nonSearchAgents.map((persona) => ({
                name: persona.name,
                value: persona.id,
              }))}
            />
          </div>
        )}
      </div>
      <Separator className="my-4" />
      <Accordion type="multiple" className="gap-y-2 w-full">
        {values.knowledge_source !== "non_search_agent" && (
          <AccordionItem value="search-options">
            <AccordionTrigger className="text-text">
              Search Configuration
            </AccordionTrigger>
            <AccordionContent>
              <div className="space-y-4 pb-3">
                <div className="w-64">
                  <SelectorFormField
                    name="response_type"
                    label="Answer Type"
                    tooltip="Controls the format of OnyxBot's responses."
                    options={[
                      { name: "Standard", value: "citations" },
                      { name: "Detailed", value: "quotes" },
                    ]}
                  />
                </div>
                <CheckboxField
                  name="answer_validity_check_enabled"
                  label="Only respond if citations found"
                  tooltip="If set, will only answer questions where the model successfully produces citations"
                />
              </div>
            </AccordionContent>
          </AccordionItem>
        )}

        <AccordionItem className="mt-4" value="general-options">
          <AccordionTrigger>General Configuration</AccordionTrigger>
          <AccordionContent className="overflow-visible">
            <div className="space-y-4">
              <CheckboxField
                name="show_continue_in_web_ui"
                label="Show Continue in Web UI button"
                tooltip="If set, will show a button at the bottom of the response that allows the user to continue the conversation in the Onyx Web UI"
              />

              <CheckboxField
                name="still_need_help_enabled"
                onChange={(checked: boolean) => {
                  setFieldValue("still_need_help_enabled", checked);
                  if (!checked) {
                    setFieldValue("follow_up_tags", []);
                  }
                }}
                label={'Give a "Still need help?" button'}
                tooltip={`OnyxBot's response will include a button at the bottom
                      of the response that asks the user if they still need help.`}
              />
              {values.still_need_help_enabled && (
                <CollapsibleSection prompt="Configure Still Need Help Button">
                  <TextArrayField
                    name="follow_up_tags"
                    label="(Optional) Users / Groups to Tag"
                    values={values}
                    subtext={
                      <div>
                        The Slack users / groups we should tag if the user
                        clicks the &quot;Still need help?&quot; button. If no
                        emails are provided, we will not tag anyone and will
                        just react with a 🆘 emoji to the original message.
                      </div>
                    }
                    placeholder="User email or user group name..."
                  />
                </CollapsibleSection>
              )}

              <CheckboxField
                name="questionmark_prefilter_enabled"
                label="Only respond to questions"
                tooltip="If set, OnyxBot will only respond to messages that contain a question mark"
              />
              <CheckboxField
                name="respond_tag_only"
                label="Respond to @OnyxBot Only"
                tooltip="If set, OnyxBot will only respond when directly tagged"
              />
              <CheckboxField
                name="respond_to_bots"
                label="Respond to Bot messages"
                tooltip="If not set, OnyxBot will always ignore messages from Bots"
              />
              <CheckboxField
                name="is_ephemeral"
                label="Respond to user in a private (ephemeral) message"
                tooltip="If set, OnyxBot will respond only to the user in a private (ephemeral) message. If you also
                chose 'Search' Agent above, selecting this option will make documents that are private to the user
                available for their queries."
              />

              <TextArrayField
                name="respond_member_group_list"
                label="(Optional) Respond to Certain Users / Groups"
                subtext={
                  "If specified, OnyxBot responses will only " +
                  "be visible to the members or groups in this list."
                }
                values={values}
                placeholder="User email or user group name..."
              />

              <StandardAnswerCategoryDropdownField
                standardAnswerCategoryResponse={standardAnswerCategoryResponse}
                categories={values.standard_answer_categories}
                setCategories={(categories: any) =>
                  setFieldValue("standard_answer_categories", categories)
                }
              />
            </div>
          </AccordionContent>
        </AccordionItem>
      </Accordion>

      <div className="flex mt-8 gap-x-2 w-full justify-end">
        {shouldShowPrivacyAlert && (
          <TooltipProvider>
            <Tooltip>
              <TooltipTrigger asChild>
                <div className="flex hover:bg-background-150 cursor-pointer p-2 rounded-lg items-center">
                  <AlertCircle className="h-5 w-5 text-alert" />
                </div>
              </TooltipTrigger>
              <TooltipContent side="top" className="bg-background p-4 w-80">
                <Label className="text-text mb-2 font-semibold">
                  Privacy Alert
                </Label>
                <p className="text-sm text-text-darker mb-4">
                  Please note that if the private (ephemeral) response is *not
                  selected*, only public documents within the selected document
                  sets will be accessible for user queries. If the private
                  (ephemeral) response *is selected*, user quries can also
                  leverage documents that the user has already been granted
                  access to. Note that users will be able to share the response
                  with others in the channel, so please ensure that this is
                  aligned with your company sharing policies.
                </p>
                <div className="space-y-2">
                  <h4 className="text-sm text-text font-medium">
                    Relevant Connectors:
                  </h4>
                  <div className="max-h-40 overflow-y-auto border-t border-text-subtle flex-col gap-y-2">
                    {memoizedPrivateConnectors.map((ccpairinfo: any) => (
                      <Link
                        key={ccpairinfo.id}
                        href={`/admin/connector/${ccpairinfo.id}`}
                        className="flex items-center p-2 rounded-md hover:bg-background-100 transition-colors"
                      >
                        <div className="mr-2">
                          <SourceIcon
                            iconSize={16}
                            sourceType={ccpairinfo.source}
                          />
                        </div>
                        <span className="text-sm text-text-darker font-medium">
                          {ccpairinfo.name}
                        </span>
                      </Link>
                    ))}
                  </div>
                </div>
              </TooltipContent>
            </Tooltip>
          </TooltipProvider>
        )}
        <Button type="submit">{isUpdate ? "Update" : "Create"}</Button>
        <Button prominence="secondary" onClick={() => router.back()}>
          Cancel
        </Button>
      </div>
    </>
  );
}


================================================
FILE: web/src/app/admin/bots/[bot-id]/channels/[id]/page.tsx
================================================
"use client";

import { use } from "react";
import { SlackChannelConfigCreationForm } from "@/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm";
import { ErrorCallout } from "@/components/ErrorCallout";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { SvgSlack } from "@opal/icons";
import { useSlackChannelConfigs } from "@/app/admin/bots/[bot-id]/hooks";
import { useDocumentSets } from "@/app/admin/documents/sets/hooks";
import { useAgents } from "@/hooks/useAgents";
import { useStandardAnswerCategories } from "@/app/ee/admin/standard-answer/hooks";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import type { StandardAnswerCategoryResponse } from "@/components/standardAnswers/getStandardAnswerCategoriesIfEE";

function EditSlackChannelConfigContent({ id }: { id: string }) {
  const isPaidEnterprise = usePaidEnterpriseFeaturesEnabled();

  const {
    data: slackChannelConfigs,
    isLoading: isChannelsLoading,
    error: channelsError,
  } = useSlackChannelConfigs();

  const {
    data: documentSets,
    isLoading: isDocSetsLoading,
    error: docSetsError,
  } = useDocumentSets();

  const {
    agents,
    isLoading: isAgentsLoading,
    error: agentsError,
  } = useAgents();

  const {
    data: standardAnswerCategories,
    isLoading: isStdAnswerLoading,
    error: stdAnswerError,
  } = useStandardAnswerCategories();

  const isLoading =
    isChannelsLoading ||
    isDocSetsLoading ||
    isAgentsLoading ||
    (isPaidEnterprise && isStdAnswerLoading);

  const slackChannelConfig = slackChannelConfigs?.find(
    (config) => config.id === Number(id)
  );

  const title = slackChannelConfig?.is_default
    ? "Edit Default Slack Config"
    : "Edit Slack Channel Config";

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={SvgSlack}
        title={title}
        separator
        backButton
      />
      <SettingsLayouts.Body>
        {isLoading ? (
          <SimpleLoader />
        ) : channelsError || !slackChannelConfigs ? (
          <ErrorCallout
            errorTitle="Something went wrong :("
            errorMsg={`Failed to fetch Slack Channels - ${
              channelsError?.message ?? "unknown error"
            }`}
          />
        ) : !slackChannelConfig ? (
          <ErrorCallout
            errorTitle="Something went wrong :("
            errorMsg={`Did not find Slack Channel config with ID: ${id}`}
          />
        ) : docSetsError || !documentSets ? (
          <ErrorCallout
            errorTitle="Something went wrong :("
            errorMsg={`Failed to fetch document sets - ${
              docSetsError?.message ?? "unknown error"
            }`}
          />
        ) : agentsError ? (
          <ErrorCallout
            errorTitle="Something went wrong :("
            errorMsg={`Failed to fetch agents - ${
              agentsError?.message ?? "unknown error"
            }`}
          />
        ) : (
          <SlackChannelConfigCreationForm
            slack_bot_id={slackChannelConfig.slack_bot_id}
            documentSets={documentSets}
            personas={agents}
            standardAnswerCategoryResponse={
              isPaidEnterprise
                ? {
                    paidEnterpriseFeaturesEnabled: true,
                    categories: standardAnswerCategories ?? [],
                    ...(stdAnswerError
                      ? { error: { message: String(stdAnswerError) } }
                      : {}),
                  }
                : { paidEnterpriseFeaturesEnabled: false }
            }
            existingSlackChannelConfig={slackChannelConfig}
          />
        )}
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}

export default function Page(props: { params: Promise<{ id: string }> }) {
  const params = use(props.params);

  return <EditSlackChannelConfigContent id={params.id} />;
}


================================================
FILE: web/src/app/admin/bots/[bot-id]/channels/new/page.tsx
================================================
"use client";

import { use, useEffect } from "react";
import { SlackChannelConfigCreationForm } from "@/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm";
import { ErrorCallout } from "@/components/ErrorCallout";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { SvgSlack } from "@opal/icons";
import { useDocumentSets } from "@/app/admin/documents/sets/hooks";
import { useAgents } from "@/hooks/useAgents";
import { useStandardAnswerCategories } from "@/app/ee/admin/standard-answer/hooks";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import type { StandardAnswerCategoryResponse } from "@/components/standardAnswers/getStandardAnswerCategoriesIfEE";
import { useRouter } from "next/navigation";

function NewChannelConfigContent({ slackBotId }: { slackBotId: number }) {
  const isPaidEnterprise = usePaidEnterpriseFeaturesEnabled();

  const {
    data: documentSets,
    isLoading: isDocSetsLoading,
    error: docSetsError,
  } = useDocumentSets();

  const {
    agents,
    isLoading: isAgentsLoading,
    error: agentsError,
  } = useAgents();

  const {
    data: standardAnswerCategories,
    isLoading: isStdAnswerLoading,
    error: stdAnswerError,
  } = useStandardAnswerCategories();

  if (
    isDocSetsLoading ||
    isAgentsLoading ||
    (isPaidEnterprise && isStdAnswerLoading)
  ) {
    return <SimpleLoader />;
  }

  if (docSetsError || !documentSets) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch document sets - ${
          docSetsError?.message ?? "unknown error"
        }`}
      />
    );
  }

  if (agentsError) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch agents - ${
          agentsError?.message ?? "unknown error"
        }`}
      />
    );
  }

  const standardAnswerCategoryResponse: StandardAnswerCategoryResponse =
    isPaidEnterprise
      ? {
          paidEnterpriseFeaturesEnabled: true,
          categories: standardAnswerCategories ?? [],
          ...(stdAnswerError
            ? { error: { message: String(stdAnswerError) } }
            : {}),
        }
      : { paidEnterpriseFeaturesEnabled: false };

  return (
    <SlackChannelConfigCreationForm
      slack_bot_id={slackBotId}
      documentSets={documentSets}
      personas={agents}
      standardAnswerCategoryResponse={standardAnswerCategoryResponse}
    />
  );
}

export default function Page(props: { params: Promise<{ "bot-id": string }> }) {
  const unwrappedParams = use(props.params);
  const router = useRouter();

  const slack_bot_id_raw = unwrappedParams?.["bot-id"] || null;
  const slack_bot_id = slack_bot_id_raw
    ? parseInt(slack_bot_id_raw as string, 10)
    : null;

  useEffect(() => {
    if (!slack_bot_id || isNaN(slack_bot_id)) {
      router.replace("/admin/bots");
    }
  }, [slack_bot_id, router]);

  if (!slack_bot_id || isNaN(slack_bot_id)) {
    return null;
  }

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={SvgSlack}
        title="Configure OnyxBot for Slack Channel"
        separator
        backButton
      />
      <SettingsLayouts.Body>
        <NewChannelConfigContent slackBotId={slack_bot_id} />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/bots/[bot-id]/hooks.ts
================================================
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SlackBot, SlackChannelConfig } from "@/lib/types";
import useSWR, { mutate } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";

export const useSlackChannelConfigs = () => {
  const swrResponse = useSWR<SlackChannelConfig[]>(
    SWR_KEYS.slackChannels,
    errorHandlingFetcher
  );

  return {
    ...swrResponse,
    refreshSlackChannelConfigs: () => mutate(SWR_KEYS.slackChannels),
  };
};

export const useSlackBots = () => {
  const swrResponse = useSWR<SlackBot[]>(
    SWR_KEYS.slackBots,
    errorHandlingFetcher
  );

  return {
    ...swrResponse,
    refreshSlackBots: () => mutate(SWR_KEYS.slackBots),
  };
};

export const useSlackBot = (botId: number) => {
  const swrResponse = useSWR<SlackBot>(
    SWR_KEYS.slackBot(botId),
    errorHandlingFetcher
  );

  return {
    ...swrResponse,
    refreshSlackBot: () => mutate(SWR_KEYS.slackBot(botId)),
  };
};

export const useSlackChannelConfigsByBot = (botId: number) => {
  const swrResponse = useSWR<SlackChannelConfig[]>(
    SWR_KEYS.slackBotConfig(botId),
    errorHandlingFetcher
  );

  return {
    ...swrResponse,
    refreshSlackChannelConfigs: () => mutate(SWR_KEYS.slackBotConfig(botId)),
  };
};


================================================
FILE: web/src/app/admin/bots/[bot-id]/lib.ts
================================================
import { SlackBotResponseType } from "@/lib/types";
import { Persona } from "@/app/admin/agents/interfaces";

interface SlackChannelConfigCreationRequest {
  slack_bot_id: number;
  document_sets: number[];
  persona_id: number | null;
  enable_auto_filters: boolean;
  channel_name: string;
  answer_validity_check_enabled: boolean;
  questionmark_prefilter_enabled: boolean;
  respond_tag_only: boolean;
  is_ephemeral: boolean;
  respond_to_bots: boolean;
  show_continue_in_web_ui: boolean;
  respond_member_group_list: string[];
  follow_up_tags?: string[];
  usePersona: boolean;
  response_type: SlackBotResponseType;
  standard_answer_categories: number[];
  disabled: boolean;
}

const buildFiltersFromCreationRequest = (
  creationRequest: SlackChannelConfigCreationRequest
): string[] => {
  const answerFilters = [] as string[];
  if (creationRequest.answer_validity_check_enabled) {
    answerFilters.push("well_answered_postfilter");
  }
  if (creationRequest.questionmark_prefilter_enabled) {
    answerFilters.push("questionmark_prefilter");
  }
  return answerFilters;
};

const buildRequestBodyFromCreationRequest = (
  creationRequest: SlackChannelConfigCreationRequest
) => {
  return JSON.stringify({
    slack_bot_id: creationRequest.slack_bot_id,
    channel_name: creationRequest.channel_name,
    respond_tag_only: creationRequest.respond_tag_only,
    respond_to_bots: creationRequest.respond_to_bots,
    is_ephemeral: creationRequest.is_ephemeral,
    show_continue_in_web_ui: creationRequest.show_continue_in_web_ui,
    enable_auto_filters: creationRequest.enable_auto_filters,
    respond_member_group_list: creationRequest.respond_member_group_list,
    answer_filters: buildFiltersFromCreationRequest(creationRequest),
    follow_up_tags: creationRequest.follow_up_tags?.filter((tag) => tag !== ""),
    ...(creationRequest.usePersona
      ? { persona_id: creationRequest.persona_id }
      : { document_sets: creationRequest.document_sets }),
    response_type: creationRequest.response_type,
    standard_answer_categories: creationRequest.standard_answer_categories,
    disabled: creationRequest.disabled,
  });
};

export const createSlackChannelConfig = async (
  creationRequest: SlackChannelConfigCreationRequest
) => {
  return fetch("/api/manage/admin/slack-app/channel", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: buildRequestBodyFromCreationRequest(creationRequest),
  });
};

export const updateSlackChannelConfig = async (
  id: number,
  creationRequest: SlackChannelConfigCreationRequest
) => {
  return fetch(`/api/manage/admin/slack-app/channel/${id}`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: buildRequestBodyFromCreationRequest(creationRequest),
  });
};

export const deleteSlackChannelConfig = async (id: number) => {
  return fetch(`/api/manage/admin/slack-app/channel/${id}`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
  });
};

export function isPersonaASlackBotPersona(persona: Persona) {
  return persona.name.startsWith("__slack_bot_persona__");
}


================================================
FILE: web/src/app/admin/bots/[bot-id]/page.tsx
================================================
"use client";

import { use } from "react";
import { ErrorCallout } from "@/components/ErrorCallout";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import SlackChannelConfigsTable from "./SlackChannelConfigsTable";
import { useSlackBot, useSlackChannelConfigsByBot } from "./hooks";
import { ExistingSlackBotForm } from "../SlackBotUpdateForm";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { SvgSlack } from "@opal/icons";
import { getErrorMsg } from "@/lib/error";

function SlackBotEditContent({ botId }: { botId: string }) {
  const {
    data: slackBot,
    isLoading: isSlackBotLoading,
    error: slackBotError,
    refreshSlackBot,
  } = useSlackBot(Number(botId));

  const {
    data: slackChannelConfigs,
    isLoading: isSlackChannelConfigsLoading,
    error: slackChannelConfigsError,
    refreshSlackChannelConfigs,
  } = useSlackChannelConfigsByBot(Number(botId));

  if (isSlackBotLoading || isSlackChannelConfigsLoading) {
    return <SimpleLoader />;
  }

  if (slackBotError || !slackBot) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch Slack Bot ${botId}: ${getErrorMsg(
          slackBotError
        )}`}
      />
    );
  }

  if (slackChannelConfigsError || !slackChannelConfigs) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch Slack Bot ${botId}: ${getErrorMsg(
          slackChannelConfigsError
        )}`}
      />
    );
  }

  return (
    <>
      <ExistingSlackBotForm
        existingSlackBot={slackBot}
        refreshSlackBot={refreshSlackBot}
      />

      <div className="mt-8">
        <SlackChannelConfigsTable
          slackBotId={slackBot.id}
          slackChannelConfigs={slackChannelConfigs}
          refresh={refreshSlackChannelConfigs}
        />
      </div>
    </>
  );
}

export default function Page({
  params,
}: {
  params: Promise<{ "bot-id": string }>;
}) {
  const unwrappedParams = use(params);

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={SvgSlack}
        title="Edit Slack Bot"
        backButton
        separator
      />
      <SettingsLayouts.Body>
        <SlackBotEditContent botId={unwrappedParams["bot-id"]} />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/bots/new/lib.ts
================================================
export interface SlackBotCreationRequest {
  name: string;
  enabled: boolean;

  bot_token: string;
  app_token: string;
  user_token?: string;
}

const buildRequestBodyFromCreationRequest = (
  creationRequest: SlackBotCreationRequest
): string => {
  return JSON.stringify({
    name: creationRequest.name,
    enabled: creationRequest.enabled,
    bot_token: creationRequest.bot_token,
    app_token: creationRequest.app_token,
    user_token: creationRequest.user_token,
  });
};

export const createSlackBot = async (
  creationRequest: SlackBotCreationRequest
) => {
  return fetch("/api/manage/admin/slack-app/bots", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: buildRequestBodyFromCreationRequest(creationRequest),
  });
};

export const updateSlackBot = async (
  id: number,
  creationRequest: SlackBotCreationRequest
) => {
  return fetch(`/api/manage/admin/slack-app/bots/${id}`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: buildRequestBodyFromCreationRequest(creationRequest),
  });
};

export const deleteSlackBot = async (id: number) => {
  return fetch(`/api/manage/admin/slack-app/bots/${id}`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
  });
};


================================================
FILE: web/src/app/admin/bots/new/page.tsx
================================================
"use client";

import { NewSlackBotForm } from "../SlackBotCreationForm";

export default function Page() {
  return <NewSlackBotForm />;
}


================================================
FILE: web/src/app/admin/bots/page.tsx
================================================
"use client";

import { ErrorCallout } from "@/components/ErrorCallout";
import { ThreeDotsLoader } from "@/components/Loading";
import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh";
import { SlackBotTable } from "./SlackBotTable";
import { useSlackBots } from "./[bot-id]/hooks";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import { DOCS_ADMINS_PATH } from "@/lib/constants";

const route = ADMIN_ROUTES.SLACK_BOTS;

function Main() {
  const {
    data: slackBots,
    isLoading: isSlackBotsLoading,
    error: slackBotsError,
  } = useSlackBots();

  if (isSlackBotsLoading) {
    return <ThreeDotsLoader />;
  }

  if (slackBotsError || !slackBots) {
    const errorMsg =
      slackBotsError?.info?.message ||
      slackBotsError?.info?.detail ||
      "An unknown error occurred";

    return (
      <ErrorCallout errorTitle="Error loading apps" errorMsg={`${errorMsg}`} />
    );
  }

  return (
    <div className="mb-8">
      <p className="mb-2 text-sm text-muted-foreground">
        Setup Slack bots that connect to Onyx. Once setup, you will be able to
        ask questions to Onyx directly from Slack. Additionally, you can:
      </p>

      <div className="mb-2">
        <ul className="list-disc mt-2 ml-4 text-sm text-muted-foreground">
          <li>
            Setup OnyxBot to automatically answer questions in certain channels.
          </li>
          <li>
            Choose which document sets OnyxBot should answer from, depending on
            the channel the question is being asked.
          </li>
          <li>
            Directly message OnyxBot to search just as you would in the web UI.
          </li>
        </ul>
      </div>

      <p className="mb-6 text-sm text-muted-foreground">
        Follow the{" "}
        <a
          className="text-blue-500 hover:underline"
          href={`${DOCS_ADMINS_PATH}/getting_started/slack_bot_setup`}
          target="_blank"
          rel="noopener noreferrer"
        >
          guide{" "}
        </a>
        found in the Onyx documentation to get started!
      </p>

      <CreateButton href="/admin/bots/new">New Slack Bot</CreateButton>

      <SlackBotTable slackBots={slackBots} />
    </div>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />
      <SettingsLayouts.Body>
        <InstantSSRAutoRefresh />
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/configuration/chat-preferences/page.tsx
================================================
"use client";

import ChatPreferencesPage from "@/refresh-pages/admin/ChatPreferencesPage";

export default function Page() {
  return <ChatPreferencesPage />;
}


================================================
FILE: web/src/app/admin/configuration/code-interpreter/page.tsx
================================================
export { default } from "@/refresh-pages/admin/CodeInterpreterPage";


================================================
FILE: web/src/app/admin/configuration/document-processing/page.tsx
================================================
"use client";

import { useState } from "react";
import CardSection from "@/components/admin/CardSection";
import { Button } from "@opal/components";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { ThreeDotsLoader } from "@/components/Loading";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import { SvgLock } from "@opal/icons";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.DOCUMENT_PROCESSING;

function Main() {
  const {
    data: isApiKeySet,
    error,
    mutate,
    isLoading,
  } = useSWR<{
    unstructured_api_key: string | null;
  }>(SWR_KEYS.unstructuredApiKeySet, (url: string) =>
    fetch(url).then((res) => res.json())
  );

  const [apiKey, setApiKey] = useState("");

  const handleSave = async () => {
    try {
      await fetch(
        `/api/search-settings/upsert-unstructured-api-key?unstructured_api_key=${apiKey}`,
        {
          method: "PUT",
        }
      );
    } catch (error) {
      console.error("Failed to save API key:", error);
    }
    mutate();
  };

  const handleDelete = async () => {
    try {
      await fetch("/api/search-settings/delete-unstructured-api-key", {
        method: "DELETE",
      });
      setApiKey("");
    } catch (error) {
      console.error("Failed to delete API key:", error);
    }
    mutate();
  };

  if (isLoading) {
    return <ThreeDotsLoader />;
  }
  return (
    <div className="pb-36">
      <div className="w-full max-w-2xl">
        <CardSection className="flex flex-col gap-2">
          <Text
            as="p"
            headingH3
            text05
            className="border-b border-border-01 pb-2"
          >
            Process with Unstructured API
          </Text>

          <div className="flex flex-col gap-2">
            <Text as="p" mainContentBody text04 className="leading-relaxed">
              Unstructured extracts and transforms complex data from formats
              like .pdf, .docx, .png, .pptx, etc. into clean text for Onyx to
              ingest. Provide an API key to enable Unstructured document
              processing.
            </Text>
            <Text as="p" mainContentMuted text03>
              <span className="font-main-ui-action text-text-03">Note:</span>{" "}
              this will send documents to Unstructured servers for processing.
            </Text>
            <Text as="p" mainContentBody text04 className="leading-relaxed">
              Learn more about Unstructured{" "}
              <a
                href="https://docs.unstructured.io/welcome"
                target="_blank"
                rel="noopener noreferrer"
                className="text-action-link-05 underline-offset-4 hover:underline"
              >
                here
              </a>
              .
            </Text>
            <div className="pt-1.5">
              {isApiKeySet ? (
                <div
                  className={cn(
                    "flex",
                    "items-center",
                    "gap-0.5",
                    "rounded-08",
                    "border",
                    "border-border-01",
                    "bg-background-neutral-01",
                    "px-2",
                    "py-1.5"
                  )}
                >
                  <Text
                    as="p"
                    mainUiMuted
                    text03
                    className="flex-1 tracking-[0.3em] text-text-03"
                  >
                    ••••••••••••••••
                  </Text>
                  <SvgLock className="h-4 w-4 stroke-text-03" aria-hidden />
                </div>
              ) : (
                <InputTypeIn
                  placeholder="Enter API Key"
                  value={apiKey}
                  onChange={(e) => setApiKey(e.target.value)}
                />
              )}
            </div>
            <div className="flex flex-col gap-2 desktop:flex-row desktop:items-center desktop:gap-2">
              {isApiKeySet ? (
                <>
                  <Button variant="danger" onClick={handleDelete}>
                    Delete API Key
                  </Button>
                  <Text as="p" mainContentBody text04 className="desktop:mt-0">
                    Delete the current API key before updating.
                  </Text>
                </>
              ) : (
                <Button variant="action" onClick={handleSave}>
                  Save API Key
                </Button>
              )}
            </div>
          </div>
        </CardSection>
      </div>
    </div>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/configuration/image-generation/page.tsx
================================================
export { default } from "@/refresh-pages/admin/ImageGenerationPage";


================================================
FILE: web/src/app/admin/configuration/llm/ModelConfigurationField.tsx
================================================
"use client";

import { ArrayHelpers, FieldArray, FormikProps, useField } from "formik";
import { ModelConfiguration } from "@/interfaces/llm";
import { ManualErrorMessage, TextFormField } from "@/components/Field";
import { useEffect, useState } from "react";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import { Button } from "@opal/components";
import { SvgX } from "@opal/icons";
import Text from "@/refresh-components/texts/Text";

function ModelConfigurationRow({
  name,
  index,
  arrayHelpers,
  formikProps,
  setError,
}: {
  name: string;
  index: number;
  arrayHelpers: ArrayHelpers;
  formikProps: FormikProps<{ model_configurations: ModelConfiguration[] }>;
  setError: (value: string | null) => void;
}) {
  const [, input] = useField(`${name}[${index}]`);
  useEffect(() => {
    if (!input.touched) return;
    setError((input.error as { name: string } | undefined)?.name ?? null);
  }, [input.touched, input.error]);

  return (
    <div key={index} className="flex flex-row w-full gap-4">
      <div
        className={`flex flex-[2] ${
          input.touched && input.error ? "border-2 border-error rounded-lg" : ""
        }`}
      >
        <TextFormField
          name={`${name}[${index}].name`}
          label=""
          placeholder={`model-name-${index + 1}`}
          removeLabel
          hideError
        />
      </div>
      <div className="flex flex-[1]">
        <TextFormField
          name={`${name}[${index}].max_input_tokens`}
          label=""
          placeholder="Default"
          removeLabel
          hideError
          type="number"
          min={1}
        />
      </div>
      <div className="flex flex-col justify-center">
        <Button
          disabled={formikProps.values.model_configurations.length <= 1}
          onClick={() => {
            if (formikProps.values.model_configurations.length > 1) {
              setError(null);
              arrayHelpers.remove(index);
            }
          }}
          icon={SvgX}
          prominence="secondary"
        />
      </div>
    </div>
  );
}

export function ModelConfigurationField({
  name,
  formikProps,
}: {
  name: string;
  formikProps: FormikProps<{ model_configurations: ModelConfiguration[] }>;
}) {
  const [errorMap, setErrorMap] = useState<{ [index: number]: string }>({});
  const [finalError, setFinalError] = useState<string | undefined>();

  return (
    <div className="pb-5 flex flex-col w-full">
      <div className="flex flex-col">
        <Text as="p" mainUiAction>
          Model Configurations
        </Text>
        <Text as="p" secondaryBody text03>
          Add models and customize the number of input tokens that they accept.
        </Text>
      </div>
      <FieldArray
        name={name}
        render={(arrayHelpers: ArrayHelpers) => (
          <div className="flex flex-col">
            <div className="flex flex-col gap-4 py-4">
              <div className="flex">
                <Text as="p" secondaryBody className="flex flex-[2]">
                  Model Name
                </Text>
                <Text as="p" secondaryBody className="flex flex-[1]">
                  Max Input Tokens
                </Text>
                <div className="w-10" />
              </div>
              {formikProps.values.model_configurations.map((_, index) => (
                <ModelConfigurationRow
                  key={index}
                  name={name}
                  formikProps={formikProps}
                  arrayHelpers={arrayHelpers}
                  index={index}
                  setError={(message: string | null) => {
                    const newErrors = { ...errorMap };
                    if (message) {
                      newErrors[index] = message;
                    } else {
                      delete newErrors[index];
                      for (const key in newErrors) {
                        const numKey = Number(key);
                        if (numKey > index) {
                          const errorValue = newErrors[key];
                          if (errorValue !== undefined) {
                            // Ensure the value is not undefined
                            newErrors[numKey - 1] = errorValue;
                            delete newErrors[numKey];
                          }
                        }
                      }
                    }
                    setErrorMap(newErrors);
                    setFinalError(
                      Object.values(newErrors).filter((item) => item)[0]
                    );
                  }}
                />
              ))}
            </div>
            {finalError && (
              <ManualErrorMessage>{finalError}</ManualErrorMessage>
            )}
            <div className="mt-3">
              <CreateButton
                onClick={() => {
                  arrayHelpers.push({
                    name: "",
                    is_visible: true,
                    // Use null so Yup.number().nullable() accepts empty inputs
                    max_input_tokens: null,
                  });
                }}
              >
                Add New
              </CreateButton>
            </div>
          </div>
        )}
      />
    </div>
  );
}


================================================
FILE: web/src/app/admin/configuration/llm/ProviderIcon.tsx
================================================
import { defaultTailwindCSS, IconProps } from "@/components/icons/icons";
import { getProviderIcon } from "@/app/admin/configuration/llm/utils";

export interface ProviderIconProps extends IconProps {
  provider: string;
  modelName?: string;
}

export const ProviderIcon = ({
  provider,
  modelName,
  size = 16,
  className = defaultTailwindCSS,
}: ProviderIconProps) => {
  const Icon = getProviderIcon(provider, modelName);
  return <Icon size={size} className={className} />;
};


================================================
FILE: web/src/app/admin/configuration/llm/page.tsx
================================================
export { default } from "@/refresh-pages/admin/LLMConfigurationPage";


================================================
FILE: web/src/app/admin/configuration/llm/utils.ts
================================================
import { JSX } from "react";
import {
  AnthropicIcon,
  AmazonIcon,
  AzureIcon,
  CPUIcon,
  MicrosoftIconSVG,
  MistralIcon,
  MetaIcon,
  GeminiIcon,
  IconProps,
  DeepseekIcon,
  OpenAISVG,
  QwenIcon,
  OllamaIcon,
  LMStudioIcon,
  LiteLLMIcon,
  ZAIIcon,
} from "@/components/icons/icons";
import {
  OllamaModelResponse,
  OpenRouterModelResponse,
  BedrockModelResponse,
  LMStudioModelResponse,
  LiteLLMProxyModelResponse,
  BifrostModelResponse,
  ModelConfiguration,
  LLMProviderName,
  BedrockFetchParams,
  OllamaFetchParams,
  LMStudioFetchParams,
  OpenRouterFetchParams,
  LiteLLMProxyFetchParams,
  BifrostFetchParams,
} from "@/interfaces/llm";
import { SvgAws, SvgBifrost, SvgOpenrouter } from "@opal/icons";

// Aggregator providers that host models from multiple vendors
export const AGGREGATOR_PROVIDERS = new Set([
  "bedrock",
  "bedrock_converse",
  "openrouter",
  "ollama_chat",
  "lm_studio",
  "litellm_proxy",
  "bifrost",
  "vertex_ai",
]);

export const getProviderIcon = (
  providerName: string,
  modelName?: string
): (({ size, className }: IconProps) => JSX.Element) => {
  const iconMap: Record<
    string,
    ({ size, className }: IconProps) => JSX.Element
  > = {
    amazon: AmazonIcon,
    phi: MicrosoftIconSVG,
    mistral: MistralIcon,
    ministral: MistralIcon,
    llama: MetaIcon,
    ollama_chat: OllamaIcon,
    ollama: OllamaIcon,
    lm_studio: LMStudioIcon,
    gemini: GeminiIcon,
    deepseek: DeepseekIcon,
    claude: AnthropicIcon,
    anthropic: AnthropicIcon,
    openai: OpenAISVG,
    // Azure OpenAI should display the Azure logo
    azure: AzureIcon,
    microsoft: MicrosoftIconSVG,
    meta: MetaIcon,
    google: GeminiIcon,
    qwen: QwenIcon,
    qwq: QwenIcon,
    zai: ZAIIcon,
    // Cloud providers - use AWS icon for Bedrock
    bedrock: SvgAws,
    bedrock_converse: SvgAws,
    openrouter: SvgOpenrouter,
    litellm_proxy: LiteLLMIcon,
    bifrost: SvgBifrost,
    vertex_ai: GeminiIcon,
  };

  const lowerProviderName = providerName.toLowerCase();

  // For aggregator providers (bedrock, openrouter, vertex_ai), prioritize showing
  // the vendor icon based on model name (e.g., show Claude icon for Bedrock Claude models)
  if (AGGREGATOR_PROVIDERS.has(lowerProviderName) && modelName) {
    const lowerModelName = modelName.toLowerCase();
    for (const [key, icon] of Object.entries(iconMap)) {
      if (lowerModelName.includes(key)) {
        return icon;
      }
    }
  }

  // Check if provider name directly matches an icon
  if (lowerProviderName in iconMap) {
    const icon = iconMap[lowerProviderName];
    if (icon) {
      return icon;
    }
  }

  // For non-aggregator providers, check if model name contains any of the keys
  if (modelName) {
    const lowerModelName = modelName.toLowerCase();
    for (const [key, icon] of Object.entries(iconMap)) {
      if (lowerModelName.includes(key)) {
        return icon;
      }
    }
  }

  // Fallback to CPU icon if no matches
  return CPUIcon;
};

export const isAnthropic = (provider: string, modelName?: string) =>
  provider === LLMProviderName.ANTHROPIC ||
  !!modelName?.toLowerCase().includes("claude");

/**
 * Fetches Bedrock models directly without any form state dependencies.
 * Uses snake_case params to match API structure.
 */
export const fetchBedrockModels = async (
  params: BedrockFetchParams
): Promise<{ models: ModelConfiguration[]; error?: string }> => {
  if (!params.aws_region_name) {
    return { models: [], error: "AWS region is required" };
  }

  try {
    const response = await fetch("/api/admin/llm/bedrock/available-models", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        aws_region_name: params.aws_region_name,
        aws_access_key_id: params.aws_access_key_id,
        aws_secret_access_key: params.aws_secret_access_key,
        aws_bearer_token_bedrock: params.aws_bearer_token_bedrock,
        provider_name: params.provider_name,
      }),
    });

    if (!response.ok) {
      let errorMessage = "Failed to fetch models";
      try {
        const errorData = await response.json();
        errorMessage = errorData.detail || errorData.message || errorMessage;
      } catch {
        // ignore JSON parsing errors
      }
      return { models: [], error: errorMessage };
    }

    const data: BedrockModelResponse[] = await response.json();
    const models: ModelConfiguration[] = data.map((modelData) => ({
      name: modelData.name,
      display_name: modelData.display_name,
      is_visible: false,
      max_input_tokens: modelData.max_input_tokens,
      supports_image_input: modelData.supports_image_input,
      supports_reasoning: false,
    }));

    return { models };
  } catch (error) {
    const errorMessage =
      error instanceof Error ? error.message : "Unknown error";
    return { models: [], error: errorMessage };
  }
};

/**
 * Fetches Ollama models directly without any form state dependencies.
 * Uses snake_case params to match API structure.
 */
export const fetchOllamaModels = async (
  params: OllamaFetchParams
): Promise<{ models: ModelConfiguration[]; error?: string }> => {
  const apiBase = params.api_base;
  if (!apiBase) {
    return { models: [], error: "API Base is required" };
  }

  try {
    const response = await fetch("/api/admin/llm/ollama/available-models", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        api_base: apiBase,
        provider_name: params.provider_name,
      }),
      signal: params.signal,
    });

    if (!response.ok) {
      let errorMessage = "Failed to fetch models";
      try {
        const errorData = await response.json();
        errorMessage = errorData.detail || errorData.message || errorMessage;
      } catch {
        // ignore JSON parsing errors
      }
      return { models: [], error: errorMessage };
    }

    const data: OllamaModelResponse[] = await response.json();
    const models: ModelConfiguration[] = data.map((modelData) => ({
      name: modelData.name,
      display_name: modelData.display_name,
      is_visible: true,
      max_input_tokens: modelData.max_input_tokens,
      supports_image_input: modelData.supports_image_input,
      supports_reasoning: false,
    }));

    return { models };
  } catch (error) {
    const errorMessage =
      error instanceof Error ? error.message : "Unknown error";
    return { models: [], error: errorMessage };
  }
};

/**
 * Fetches OpenRouter models directly without any form state dependencies.
 * Uses snake_case params to match API structure.
 */
export const fetchOpenRouterModels = async (
  params: OpenRouterFetchParams
): Promise<{ models: ModelConfiguration[]; error?: string }> => {
  const apiBase = params.api_base;
  const apiKey = params.api_key;
  if (!apiBase) {
    return { models: [], error: "API Base is required" };
  }
  if (!apiKey) {
    return { models: [], error: "API Key is required" };
  }

  try {
    const response = await fetch("/api/admin/llm/openrouter/available-models", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        api_base: apiBase,
        api_key: apiKey,
        provider_name: params.provider_name,
      }),
    });

    if (!response.ok) {
      let errorMessage = "Failed to fetch models";
      try {
        const errorData = await response.json();
        errorMessage = errorData.detail || errorData.message || errorMessage;
      } catch (jsonError) {
        console.warn(
          "Failed to parse OpenRouter model fetch error response",
          jsonError
        );
      }
      return { models: [], error: errorMessage };
    }

    const data: OpenRouterModelResponse[] = await response.json();
    const models: ModelConfiguration[] = data.map((modelData) => ({
      name: modelData.name,
      display_name: modelData.display_name,
      is_visible: true,
      max_input_tokens: modelData.max_input_tokens,
      supports_image_input: modelData.supports_image_input,
      supports_reasoning: false,
    }));

    return { models };
  } catch (error) {
    const errorMessage =
      error instanceof Error ? error.message : "Unknown error";
    return { models: [], error: errorMessage };
  }
};

/**
 * Fetches LM Studio models directly without any form state dependencies.
 * Uses snake_case params to match API structure.
 */
export const fetchLMStudioModels = async (
  params: LMStudioFetchParams
): Promise<{ models: ModelConfiguration[]; error?: string }> => {
  const apiBase = params.api_base;
  if (!apiBase) {
    return { models: [], error: "API Base is required" };
  }

  try {
    const response = await fetch("/api/admin/llm/lm-studio/available-models", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        api_base: apiBase,
        api_key: params.api_key,
        api_key_changed: params.api_key_changed ?? false,
        provider_name: params.provider_name,
      }),
      signal: params.signal,
    });

    if (!response.ok) {
      let errorMessage = "Failed to fetch models";
      try {
        const errorData = await response.json();
        errorMessage = errorData.detail || errorData.message || errorMessage;
      } catch (jsonError) {
        console.warn(
          "Failed to parse LM Studio model fetch error response",
          jsonError
        );
      }
      return { models: [], error: errorMessage };
    }

    const data: LMStudioModelResponse[] = await response.json();
    const models: ModelConfiguration[] = data.map((modelData) => ({
      name: modelData.name,
      display_name: modelData.display_name,
      is_visible: true,
      max_input_tokens: modelData.max_input_tokens,
      supports_image_input: modelData.supports_image_input,
      supports_reasoning: modelData.supports_reasoning,
    }));

    return { models };
  } catch (error) {
    const errorMessage =
      error instanceof Error ? error.message : "Unknown error";
    return { models: [], error: errorMessage };
  }
};

/**
 * Fetches Bifrost models directly without any form state dependencies.
 * Uses snake_case params to match API structure.
 */
export const fetchBifrostModels = async (
  params: BifrostFetchParams
): Promise<{ models: ModelConfiguration[]; error?: string }> => {
  const apiBase = params.api_base;
  if (!apiBase) {
    return { models: [], error: "API Base is required" };
  }

  try {
    const response = await fetch("/api/admin/llm/bifrost/available-models", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        api_base: apiBase,
        api_key: params.api_key,
        provider_name: params.provider_name,
      }),
      signal: params.signal,
    });

    if (!response.ok) {
      let errorMessage = "Failed to fetch models";
      try {
        const errorData = await response.json();
        errorMessage = errorData.detail || errorData.message || errorMessage;
      } catch (jsonError) {
        console.warn(
          "Failed to parse Bifrost model fetch error response",
          jsonError
        );
      }
      return { models: [], error: errorMessage };
    }

    const data: BifrostModelResponse[] = await response.json();
    const models: ModelConfiguration[] = data.map((modelData) => ({
      name: modelData.name,
      display_name: modelData.display_name,
      is_visible: true,
      max_input_tokens: modelData.max_input_tokens,
      supports_image_input: modelData.supports_image_input,
      supports_reasoning: modelData.supports_reasoning,
    }));

    return { models };
  } catch (error) {
    const errorMessage =
      error instanceof Error ? error.message : "Unknown error";
    return { models: [], error: errorMessage };
  }
};

/**
 * Fetches LiteLLM Proxy models directly without any form state dependencies.
 * Uses snake_case params to match API structure.
 */
export const fetchLiteLLMProxyModels = async (
  params: LiteLLMProxyFetchParams
): Promise<{ models: ModelConfiguration[]; error?: string }> => {
  const apiBase = params.api_base;
  const apiKey = params.api_key;
  if (!apiBase) {
    return { models: [], error: "API Base is required" };
  }
  if (!apiKey) {
    return { models: [], error: "API Key is required" };
  }

  try {
    const response = await fetch("/api/admin/llm/litellm/available-models", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        api_base: apiBase,
        api_key: apiKey,
        provider_name: params.provider_name,
      }),
      signal: params.signal,
    });

    if (!response.ok) {
      let errorMessage = "Failed to fetch models";
      try {
        const errorData = await response.json();
        errorMessage = errorData.detail || errorData.message || errorMessage;
      } catch {
        // ignore JSON parsing errors
      }
      return { models: [], error: errorMessage };
    }

    const data: LiteLLMProxyModelResponse[] = await response.json();
    const models: ModelConfiguration[] = data.map((modelData) => ({
      name: modelData.model_name,
      display_name: modelData.model_name,
      is_visible: true,
      max_input_tokens: null,
      supports_image_input: false,
      supports_reasoning: false,
    }));

    return { models };
  } catch (error) {
    const errorMessage =
      error instanceof Error ? error.message : "Unknown error";
    return { models: [], error: errorMessage };
  }
};

/**
 * Fetches models for a provider. Accepts form values directly and maps them
 * to the expected fetch params format internally.
 */
export const fetchModels = async (
  providerName: string,
  formValues: {
    api_base?: string;
    api_key?: string;
    api_key_changed?: boolean;
    name?: string;
    custom_config?: Record<string, string>;
    model_configurations?: ModelConfiguration[];
  },
  signal?: AbortSignal
) => {
  const customConfig = formValues.custom_config || {};

  switch (providerName) {
    case LLMProviderName.BEDROCK:
      return fetchBedrockModels({
        aws_region_name: customConfig.AWS_REGION_NAME || "",
        aws_access_key_id: customConfig.AWS_ACCESS_KEY_ID,
        aws_secret_access_key: customConfig.AWS_SECRET_ACCESS_KEY,
        aws_bearer_token_bedrock: customConfig.AWS_BEARER_TOKEN_BEDROCK,
        provider_name: formValues.name,
      });
    case LLMProviderName.OLLAMA_CHAT:
      return fetchOllamaModels({
        api_base: formValues.api_base,
        provider_name: formValues.name,
        signal,
      });
    case LLMProviderName.LM_STUDIO:
      return fetchLMStudioModels({
        api_base: formValues.api_base,
        api_key: formValues.custom_config?.LM_STUDIO_API_KEY,
        api_key_changed: formValues.api_key_changed ?? false,
        provider_name: formValues.name,
        signal,
      });
    case LLMProviderName.OPENROUTER:
      return fetchOpenRouterModels({
        api_base: formValues.api_base,
        api_key: formValues.api_key,
        provider_name: formValues.name,
      });
    case LLMProviderName.LITELLM_PROXY:
      return fetchLiteLLMProxyModels({
        api_base: formValues.api_base,
        api_key: formValues.api_key,
        provider_name: formValues.name,
        signal,
      });
    case LLMProviderName.BIFROST:
      return fetchBifrostModels({
        api_base: formValues.api_base,
        api_key: formValues.api_key,
        provider_name: formValues.name,
        signal,
      });
    default:
      return { models: [], error: `Unknown provider: ${providerName}` };
  }
};

export function canProviderFetchModels(providerName?: string) {
  if (!providerName) return false;
  switch (providerName) {
    case LLMProviderName.BEDROCK:
    case LLMProviderName.OLLAMA_CHAT:
    case LLMProviderName.LM_STUDIO:
    case LLMProviderName.OPENROUTER:
    case LLMProviderName.LITELLM_PROXY:
    case LLMProviderName.BIFROST:
      return true;
    default:
      return false;
  }
}


================================================
FILE: web/src/app/admin/configuration/search/UpgradingPage.tsx
================================================
import { ThreeDotsLoader } from "@/components/Loading";
import Modal from "@/refresh-components/Modal";
import { errorHandlingFetcher } from "@/lib/fetcher";
import {
  ConnectorIndexingStatusLite,
  ConnectorIndexingStatusLiteResponse,
  FailedConnectorIndexingStatus,
  ValidStatuses,
} from "@/lib/types";
import { Text } from "@opal/components";
import { markdown } from "@opal/utils";
import Spacer from "@/refresh-components/Spacer";
import Title from "@/components/ui/title";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import { useMemo, useState } from "react";
import useSWR, { mutate } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { ReindexingProgressTable } from "../../../../components/embedding/ReindexingProgressTable";
import { ErrorCallout } from "@/components/ErrorCallout";
import {
  CloudEmbeddingModel,
  HostedEmbeddingModel,
} from "../../../../components/embedding/interfaces";
import { Connector } from "@/lib/connectors/connectors";
import { FailedReIndexAttempts } from "@/components/embedding/FailedReIndexAttempts";
import { useConnectorIndexingStatusWithPagination } from "@/lib/hooks";
import { SvgX } from "@opal/icons";
import { ConnectorCredentialPairStatus } from "@/app/admin/connector/[ccPairId]/types";
import { useVectorDbEnabled } from "@/providers/SettingsProvider";

export default function UpgradingPage({
  futureEmbeddingModel,
}: {
  futureEmbeddingModel: CloudEmbeddingModel | HostedEmbeddingModel;
}) {
  const [isCancelling, setIsCancelling] = useState<boolean>(false);
  const vectorDbEnabled = useVectorDbEnabled();

  const { data: connectors, isLoading: isLoadingConnectors } = useSWR<
    Connector<any>[]
  >(vectorDbEnabled ? SWR_KEYS.connector : null, errorHandlingFetcher, {
    refreshInterval: 5000,
  });

  const {
    data: connectorIndexingStatuses,
    isLoading: isLoadingOngoingReIndexingStatus,
  } = useConnectorIndexingStatusWithPagination(
    { secondary_index: true, get_all_connectors: true },
    5000,
    vectorDbEnabled
  ) as {
    data: ConnectorIndexingStatusLiteResponse[];
    isLoading: boolean;
  };

  const { data: failedIndexingStatus } = useSWR<
    FailedConnectorIndexingStatus[]
  >(
    vectorDbEnabled
      ? "/api/manage/admin/connector/failed-indexing-status?secondary_index=true"
      : null,
    errorHandlingFetcher,
    { refreshInterval: 5000 }
  );

  const onCancel = async () => {
    const response = await fetch("/api/search-settings/cancel-new-embedding", {
      method: "POST",
    });
    if (response.ok) {
      mutate(SWR_KEYS.secondarySearchSettings);
    } else {
      alert(
        `Failed to cancel embedding model update - ${await response.text()}`
      );
    }
    setIsCancelling(false);
  };
  const statusOrder: Record<ValidStatuses, number> = useMemo(
    () => ({
      invalid: 0,
      failed: 1,
      canceled: 2,
      completed_with_errors: 3,
      not_started: 4,
      in_progress: 5,
      success: 6,
    }),
    []
  );

  const ongoingReIndexingStatus = useMemo(() => {
    return connectorIndexingStatuses
      .flatMap(
        (status) => status.indexing_statuses as ConnectorIndexingStatusLite[]
      )
      .filter((status) => status.cc_pair_id !== undefined);
  }, [connectorIndexingStatuses]);

  const visibleReindexingStatus = useMemo(() => {
    const statuses = ongoingReIndexingStatus || [];

    if (futureEmbeddingModel.switchover_type === "active_only") {
      return statuses.filter(
        (status) =>
          status.cc_pair_status !== ConnectorCredentialPairStatus.PAUSED
      );
    }

    return statuses;
  }, [futureEmbeddingModel.switchover_type, ongoingReIndexingStatus]);

  const sortedReindexingProgress = useMemo(() => {
    return [...(visibleReindexingStatus || [])].sort((a, b) => {
      const statusComparison =
        statusOrder[a.last_status || "not_started"] -
        statusOrder[b.last_status || "not_started"];

      if (statusComparison !== 0) {
        return statusComparison;
      }

      return (a.cc_pair_id || 0) - (b.cc_pair_id || 0);
    });
  }, [visibleReindexingStatus, statusOrder]);

  const hasVisibleReindexingProgress = sortedReindexingProgress.length > 0;

  if (isLoadingConnectors || isLoadingOngoingReIndexingStatus) {
    return <ThreeDotsLoader />;
  }

  return (
    <>
      {isCancelling && (
        <Modal open onOpenChange={() => setIsCancelling(false)}>
          <Modal.Content width="sm" height="sm">
            <Modal.Header
              icon={SvgX}
              title="Cancel Embedding Model Switch"
              onClose={() => setIsCancelling(false)}
            />
            <Modal.Body>
              <div>
                Are you sure you want to cancel? Cancelling will revert to the
                previous model and all progress will be lost.
              </div>
            </Modal.Body>
            <Modal.Footer>
              <OpalButton onClick={onCancel}>Confirm</OpalButton>
              <OpalButton
                prominence="secondary"
                onClick={() => setIsCancelling(false)}
              >
                Cancel
              </OpalButton>
            </Modal.Footer>
          </Modal.Content>
        </Modal>
      )}

      {futureEmbeddingModel && (
        <div>
          <Title className="mt-8">Current Upgrade Status</Title>
          <div className="mt-4">
            <div className="italic text-lg mb-2">
              Currently in the process of switching to:{" "}
              {futureEmbeddingModel.model_name}
            </div>

            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
            <Button
              danger
              className="mt-4"
              onClick={() => setIsCancelling(true)}
            >
              Cancel
            </Button>

            {connectors && connectors.length > 0 ? (
              futureEmbeddingModel.switchover_type === "instant" ? (
                <div className="mt-8">
                  <h3 className="text-lg font-semibold mb-2">
                    Switching Embedding Models
                  </h3>
                  <p className="mb-4 text-text-800">
                    You&apos;re currently switching embedding models, and
                    you&apos;ve selected the instant switch option. The
                    transition will complete shortly.
                  </p>
                  <p className="text-text-600">
                    The new model will be active soon.
                  </p>
                </div>
              ) : (
                <>
                  {failedIndexingStatus && failedIndexingStatus.length > 0 && (
                    <FailedReIndexAttempts
                      failedIndexingStatuses={failedIndexingStatus}
                    />
                  )}

                  <Spacer rem={1} />
                  <Text as="p">
                    {futureEmbeddingModel.switchover_type === "active_only"
                      ? markdown(
                          "The table below shows the re-indexing progress of active (non-paused) connectors. Once all active connectors have been re-indexed successfully, the new model will be used for all search queries. Paused connectors will continue to be indexed in the background but won't block the switchover. Until then, we will use the old model so that no downtime is necessary during this transition.\nNote: User file re-indexing progress is not shown. You will see this page until all active connectors are re-indexed!"
                        )
                      : markdown(
                          "The table below shows the re-indexing progress of all existing connectors. Once all connectors have been re-indexed successfully, the new model will be used for all search queries. Until then, we will use the old model so that no downtime is necessary during this transition.\nNote: User file re-indexing progress is not shown. You will see this page until all user files are re-indexed!"
                        )}
                  </Text>
                  <Spacer rem={1} />

                  {sortedReindexingProgress ? (
                    <>
                      {futureEmbeddingModel.switchover_type === "active_only" &&
                        !hasVisibleReindexingProgress && (
                          <>
                            <Spacer rem={1} />
                            <Text as="p">
                              All connectors are currently paused, so none are
                              blocking the switchover. Paused connectors will
                              keep re-indexing in the background.
                            </Text>
                          </>
                        )}
                      {hasVisibleReindexingProgress && (
                        <ReindexingProgressTable
                          reindexingProgress={sortedReindexingProgress}
                        />
                      )}
                    </>
                  ) : (
                    <ErrorCallout errorTitle="Failed to fetch re-indexing progress" />
                  )}
                </>
              )
            ) : (
              <div className="mt-8 p-6 bg-background-100 border border-border-strong rounded-lg max-w-2xl">
                <h3 className="text-lg font-semibold mb-2">
                  Switching Embedding Models
                </h3>
                <p className="mb-4 text-text-800">
                  You&apos;re currently switching embedding models, but there
                  are no connectors to reindex. This means the transition will
                  be quick and seamless!
                </p>
                <p className="text-text-600">
                  The new model will be active soon.
                </p>
              </div>
            )}
          </div>
        </div>
      )}
    </>
  );
}


================================================
FILE: web/src/app/admin/configuration/search/page.tsx
================================================
"use client";

import { ThreeDotsLoader } from "@/components/Loading";
import { errorHandlingFetcher } from "@/lib/fetcher";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { Text } from "@opal/components";
import Title from "@/components/ui/title";
import { Button } from "@opal/components";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { ModelPreview } from "@/components/embedding/ModelSelector";
import {
  HostedEmbeddingModel,
  CloudEmbeddingModel,
} from "@/components/embedding/interfaces";
import { SavedSearchSettings } from "@/app/admin/embeddings/interfaces";
import UpgradingPage from "./UpgradingPage";
import { useContext } from "react";
import { SettingsContext } from "@/providers/SettingsProvider";
import CardSection from "@/components/admin/CardSection";
import { ErrorCallout } from "@/components/ErrorCallout";
import { useToastFromQuery } from "@/hooks/useToast";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.INDEX_SETTINGS;

export interface EmbeddingDetails {
  api_key: string;
  custom_config: any;
  default_model_id?: number;
  name: string;
}

function Main() {
  const settings = useContext(SettingsContext);
  useToastFromQuery({
    "search-settings": {
      message: `Changed search settings successfully`,
      type: "success",
    },
  });
  const {
    data: currentEmeddingModel,
    isLoading: isLoadingCurrentModel,
    error: currentEmeddingModelError,
  } = useSWR<CloudEmbeddingModel | HostedEmbeddingModel | null>(
    SWR_KEYS.currentSearchSettings,
    errorHandlingFetcher,
    { refreshInterval: 5000 } // 5 seconds
  );

  const { data: searchSettings, isLoading: isLoadingSearchSettings } =
    useSWR<SavedSearchSettings | null>(
      SWR_KEYS.currentSearchSettings,
      errorHandlingFetcher,
      { refreshInterval: 5000 } // 5 seconds
    );

  const {
    data: futureEmbeddingModel,
    isLoading: isLoadingFutureModel,
    error: futureEmeddingModelError,
  } = useSWR<CloudEmbeddingModel | HostedEmbeddingModel | null>(
    SWR_KEYS.secondarySearchSettings,
    errorHandlingFetcher,
    { refreshInterval: 5000 } // 5 seconds
  );

  if (
    isLoadingCurrentModel ||
    isLoadingFutureModel ||
    isLoadingSearchSettings
  ) {
    return <ThreeDotsLoader />;
  }

  if (
    currentEmeddingModelError ||
    !currentEmeddingModel ||
    futureEmeddingModelError
  ) {
    return <ErrorCallout errorTitle="Failed to fetch embedding model status" />;
  }

  return (
    <div>
      {!futureEmbeddingModel ? (
        <>
          {settings?.settings.needs_reindexing && (
            <p className="max-w-3xl">
              Your search settings are currently out of date! We recommend
              updating your search settings and re-indexing.
            </p>
          )}
          <Title className="mb-6 mt-8 !text-2xl">Embedding Model</Title>

          {currentEmeddingModel ? (
            <ModelPreview model={currentEmeddingModel} display showDetails />
          ) : (
            <Title className="mt-8 mb-4">Choose your Embedding Model</Title>
          )}

          <Title className="mb-2 mt-8 !text-2xl">Post-processing</Title>

          <CardSection className="!mr-auto mt-8 !w-96 shadow-lg bg-background-tint-00 rounded-16">
            {searchSettings && (
              <>
                <div className="px-1 w-full rounded-lg">
                  <div className="space-y-4">
                    <div>
                      <Text as="p" font="main-ui-action">
                        Multipass Indexing
                      </Text>
                      <Text as="p">
                        {searchSettings.multipass_indexing
                          ? "Enabled"
                          : "Disabled"}
                      </Text>
                    </div>

                    <div>
                      <Text as="p" font="main-ui-action">
                        Contextual RAG
                      </Text>
                      <Text as="p">
                        {searchSettings.enable_contextual_rag
                          ? "Enabled"
                          : "Disabled"}
                      </Text>
                    </div>
                  </div>
                </div>
              </>
            )}
          </CardSection>

          <div className="mt-4">
            <Button variant="action" href="/admin/embeddings">
              Update Index Settings
            </Button>
          </div>
        </>
      ) : (
        <UpgradingPage futureEmbeddingModel={futureEmbeddingModel} />
      )}
    </div>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header title={route.title} icon={route.icon} separator />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/configuration/voice/VoiceProviderSetupModal.tsx
================================================
"use client";

import { markdown } from "@opal/utils";
import Image from "next/image";
import { FunctionComponent, useState, useEffect } from "react";
import {
  AzureIcon,
  ElevenLabsIcon,
  OpenAIIcon,
} from "@/components/icons/icons";
import Modal from "@/refresh-components/Modal";
import Button from "@/refresh-components/buttons/Button";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import PasswordInputTypeIn from "@/refresh-components/inputs/PasswordInputTypeIn";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import InputComboBox from "@/refresh-components/inputs/InputComboBox";
import { FormField } from "@/refresh-components/form/FormField";
import { Vertical, Horizontal } from "@/layouts/input-layouts";
import { Section } from "@/layouts/general-layouts";
import { SvgArrowExchange, SvgOnyxLogo } from "@opal/icons";
import { Disabled } from "@opal/core";
import type { IconProps } from "@opal/types";
import { VoiceProviderView } from "@/hooks/useVoiceProviders";
import {
  testVoiceProvider,
  upsertVoiceProvider,
  fetchVoicesByType,
  fetchLLMProviders,
} from "@/lib/admin/voice/svc";

interface VoiceOption {
  value: string;
  label: string;
  description?: string;
}

interface LLMProviderView {
  id: number;
  name: string;
  provider: string;
  api_key: string | null;
}

interface ApiKeyOption {
  value: string;
  label: string;
  description?: string;
}

interface VoiceProviderSetupModalProps {
  providerType: string;
  existingProvider: VoiceProviderView | null;
  mode: "stt" | "tts";
  defaultModelId?: string | null;
  onClose: () => void;
  onSuccess: () => void;
}

const PROVIDER_LABELS: Record<string, string> = {
  openai: "OpenAI",
  azure: "Azure Speech Services",
  elevenlabs: "ElevenLabs",
};

const PROVIDER_API_KEY_URLS: Record<string, string> = {
  openai: "https://platform.openai.com/api-keys",
  azure: "https://portal.azure.com/",
  elevenlabs: "https://elevenlabs.io/app/settings/api-keys",
};

const PROVIDER_LOGO_URLS: Record<string, string> = {
  openai: "/Openai.svg",
  azure: "/Azure.png",
  elevenlabs: "/ElevenLabs.svg",
};

const PROVIDER_DOCS_URLS: Record<string, string> = {
  openai: "https://platform.openai.com/docs/guides/text-to-speech",
  azure: "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/",
  elevenlabs: "https://elevenlabs.io/docs",
};

const PROVIDER_VOICE_DOCS_URLS: Record<string, { url: string; label: string }> =
  {
    openai: {
      url: "https://platform.openai.com/docs/guides/text-to-speech#voice-options",
      label: "OpenAI",
    },
    azure: {
      url: "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts",
      label: "Azure",
    },
    elevenlabs: {
      url: "https://elevenlabs.io/docs/voices/premade-voices",
      label: "ElevenLabs",
    },
  };

const OPENAI_STT_MODELS = [{ id: "whisper-1", name: "Whisper v1" }];

const OPENAI_TTS_MODELS = [
  { id: "tts-1", name: "TTS-1" },
  { id: "tts-1-hd", name: "TTS-1 HD" },
];

// Map model IDs from cards to actual API model IDs
const MODEL_ID_MAP: Record<string, string> = {
  "tts-1": "tts-1",
  "tts-1-hd": "tts-1-hd",
  whisper: "whisper-1",
};

type Phase = "idle" | "validating" | "saving";
type MessageState = {
  kind: "status" | "error" | "success";
  text: string;
} | null;

export default function VoiceProviderSetupModal({
  providerType,
  existingProvider,
  mode,
  defaultModelId,
  onClose,
  onSuccess,
}: VoiceProviderSetupModalProps) {
  // Map the card model ID to the actual API model ID
  // Prioritize defaultModelId (from the clicked card) over stored value
  const initialTtsModel = defaultModelId
    ? MODEL_ID_MAP[defaultModelId] ?? "tts-1"
    : existingProvider?.tts_model ?? "tts-1";

  const [apiKey, setApiKey] = useState("");
  const [apiKeyChanged, setApiKeyChanged] = useState(false);
  const [targetUri, setTargetUri] = useState(
    existingProvider?.target_uri ?? ""
  );
  const [selectedLlmProviderId, setSelectedLlmProviderId] = useState<
    number | null
  >(null);
  const [sttModel, setSttModel] = useState(
    existingProvider?.stt_model ?? "whisper-1"
  );
  const [ttsModel, setTtsModel] = useState(initialTtsModel);
  const [defaultVoice, setDefaultVoice] = useState(
    existingProvider?.default_voice ?? ""
  );
  const [phase, setPhase] = useState<Phase>("idle");
  const [message, setMessage] = useState<MessageState>(null);

  // Dynamic voices fetched from backend
  const [voiceOptions, setVoiceOptions] = useState<VoiceOption[]>([]);
  const [isLoadingVoices, setIsLoadingVoices] = useState(false);

  // Existing OpenAI LLM providers for API key reuse
  const [existingApiKeyOptions, setExistingApiKeyOptions] = useState<
    ApiKeyOption[]
  >([]);
  const [llmProviderMap, setLlmProviderMap] = useState<Map<string, number>>(
    new Map()
  );

  // Fetch existing OpenAI LLM providers (for API key reuse)
  useEffect(() => {
    if (providerType !== "openai") return;

    fetchLLMProviders()
      .then((res) => res.json())
      .then((data: { providers: LLMProviderView[] } | LLMProviderView[]) => {
        const providers = Array.isArray(data) ? data : data.providers ?? [];
        const openaiProviders = providers.filter(
          (p) => p.provider === "openai" && p.api_key
        );
        const options: ApiKeyOption[] = openaiProviders.map((p) => ({
          value: p.api_key!,
          label: p.api_key!,
          description: `Used for LLM provider **${p.name}**`,
        }));
        setExistingApiKeyOptions(options);

        // Map masked API keys to provider IDs for lookup on selection
        const providerMap = new Map<string, number>();
        openaiProviders.forEach((p) => {
          if (p.api_key) {
            providerMap.set(p.api_key, p.id);
          }
        });
        setLlmProviderMap(providerMap);
      })
      .catch(() => {
        setExistingApiKeyOptions([]);
      });
  }, [providerType]);

  // Fetch voices on mount (works without API key for ElevenLabs/OpenAI)
  useEffect(() => {
    setIsLoadingVoices(true);
    fetchVoicesByType(providerType)
      .then((res) => res.json())
      .then((data: Array<{ id: string; name: string }>) => {
        const options = data.map((v) => ({
          value: v.id,
          label: v.name,
          description: v.id,
        }));
        setVoiceOptions(options);
        // Set default voice to first option if not already set,
        // or if current value doesn't exist in the new options
        setDefaultVoice((prev) => {
          if (!prev) return options[0]?.value ?? "";
          const existsInOptions = options.some((opt) => opt.value === prev);
          return existsInOptions ? prev : options[0]?.value ?? "";
        });
      })
      .catch(() => {
        setVoiceOptions([]);
      })
      .finally(() => {
        setIsLoadingVoices(false);
      });
  }, [providerType]);

  const isEditing = !!existingProvider;
  const label = PROVIDER_LABELS[providerType] ?? providerType;
  const isProcessing = phase !== "idle";
  const hasNonEmptyApiKey = apiKey.trim().length > 0;
  const shouldSendApiKey =
    !selectedLlmProviderId && apiKeyChanged && hasNonEmptyApiKey;
  const shouldUseStoredKey =
    isEditing && !selectedLlmProviderId && !shouldSendApiKey;

  const canConnect = (() => {
    if (selectedLlmProviderId) return true;
    if (!isEditing && !apiKey) return false;
    if (providerType === "azure" && !isEditing && !targetUri) return false;
    return true;
  })();

  // Logo arrangement component for the modal header
  // No useMemo needed - providerType and label are stable props
  const LogoArrangement: FunctionComponent<IconProps> = () => (
    <div className="flex items-center gap-2">
      <div className="flex items-center justify-center size-7 shrink-0 overflow-clip">
        {providerType === "openai" ? (
          <OpenAIIcon size={24} />
        ) : providerType === "azure" ? (
          <AzureIcon size={24} />
        ) : providerType === "elevenlabs" ? (
          <ElevenLabsIcon size={24} />
        ) : (
          <Image
            src={PROVIDER_LOGO_URLS[providerType] ?? "/Openai.svg"}
            alt={`${label} logo`}
            width={24}
            height={24}
            className="object-contain"
          />
        )}
      </div>
      <div className="flex items-center justify-center size-4 shrink-0">
        <SvgArrowExchange className="size-3 text-text-04" />
      </div>
      <div className="flex items-center justify-center size-7 p-0.5 shrink-0 overflow-clip">
        <SvgOnyxLogo size={24} className="shrink-0" />
      </div>
    </div>
  );

  const formFieldState: "idle" | "error" | "success" =
    message?.kind === "error"
      ? "error"
      : message?.kind === "success"
        ? "success"
        : "idle";

  const handleSubmit = async () => {
    if (!canConnect) return;

    setMessage(null);

    try {
      // Test the connection first (skip if reusing LLM provider key - validated on save)
      if (!selectedLlmProviderId) {
        setPhase("validating");
        setMessage({ kind: "status", text: "Validating API key..." });

        const testResponse = await testVoiceProvider({
          provider_type: providerType,
          api_key: shouldSendApiKey ? apiKey : undefined,
          target_uri: targetUri || undefined,
          use_stored_key: shouldUseStoredKey,
        });

        if (!testResponse.ok) {
          const data = await testResponse.json().catch(() => ({}));
          const detail =
            typeof data?.detail === "string"
              ? data.detail
              : "Connection test failed";
          setPhase("idle");
          setMessage({ kind: "error", text: detail });
          return;
        }

        setMessage({
          kind: "status",
          text: "API key validated. Saving provider...",
        });
      }

      // Save the provider
      setPhase("saving");
      const response = await upsertVoiceProvider({
        id: existingProvider?.id,
        name: label,
        provider_type: providerType,
        api_key: shouldSendApiKey ? apiKey : undefined,
        api_key_changed: shouldSendApiKey,
        target_uri: targetUri || undefined,
        llm_provider_id: selectedLlmProviderId,
        stt_model: sttModel,
        tts_model: ttsModel,
        default_voice: defaultVoice,
        activate_stt: mode === "stt",
        activate_tts: mode === "tts",
      });

      if (response.ok) {
        onSuccess();
      } else {
        const data = await response.json().catch(() => ({}));
        const detail =
          typeof data?.detail === "string"
            ? data.detail
            : "Failed to save provider";
        setPhase("idle");
        setMessage({ kind: "error", text: detail });
      }
    } catch {
      setPhase("idle");
      setMessage({ kind: "error", text: "Failed to save provider" });
    }
  };

  return (
    <Modal open onOpenChange={(isOpen) => !isOpen && onClose()}>
      <Modal.Content width="sm">
        <Modal.Header
          icon={LogoArrangement}
          title={isEditing ? `Edit ${label}` : `Set up ${label}`}
          description={`Connect to ${label} and set up your voice models.`}
          onClose={onClose}
        />
        <Modal.Body>
          <Section gap={1} alignItems="stretch">
            <FormField name="api_key" state={formFieldState} className="w-full">
              <FormField.Label>API Key</FormField.Label>
              <FormField.Description>
                {isEditing ? (
                  "Leave blank to keep existing key"
                ) : (
                  <>
                    Paste your{" "}
                    <a
                      href={PROVIDER_API_KEY_URLS[providerType]}
                      target="_blank"
                      rel="noopener noreferrer"
                      className="underline"
                    >
                      API key
                    </a>{" "}
                    from {label} to access your models.
                  </>
                )}
              </FormField.Description>
              <FormField.Control asChild>
                {providerType === "openai" &&
                existingApiKeyOptions.length > 0 ? (
                  <InputComboBox
                    placeholder={isEditing ? "••••••••" : "Enter API key"}
                    value={apiKey}
                    onChange={(e) => {
                      setApiKey(e.target.value);
                      setApiKeyChanged(true);
                      setSelectedLlmProviderId(null);
                      setMessage(null);
                    }}
                    onValueChange={(value) => {
                      setApiKey(value);
                      // Check if this is an existing key
                      const llmProviderId = llmProviderMap.get(value);
                      if (llmProviderId) {
                        setSelectedLlmProviderId(llmProviderId);
                        setApiKeyChanged(false);
                      } else {
                        setSelectedLlmProviderId(null);
                        setApiKeyChanged(true);
                      }
                      setMessage(null);
                    }}
                    options={existingApiKeyOptions}
                    separatorLabel="Reuse OpenAI API Keys"
                    strict={false}
                    showAddPrefix
                  />
                ) : (
                  <PasswordInputTypeIn
                    placeholder={isEditing ? "••••••••" : "Enter API key"}
                    value={apiKey}
                    onChange={(e) => {
                      setApiKey(e.target.value);
                      setApiKeyChanged(true);
                      setMessage(null);
                    }}
                    showClearButton={false}
                  />
                )}
              </FormField.Control>
              {isProcessing ? (
                <FormField.APIMessage
                  state="loading"
                  messages={{
                    loading: message?.text ?? "Validating API key...",
                  }}
                />
              ) : message ? (
                <FormField.Message
                  messages={{
                    idle: "",
                    error: message.kind === "error" ? message.text : "",
                    success: message.kind === "success" ? message.text : "",
                  }}
                />
              ) : null}
            </FormField>

            {providerType === "azure" && (
              <Vertical
                title="Target URI"
                subDescription={markdown(
                  "Paste the endpoint shown in [Azure Portal (Keys and Endpoint)](https://portal.azure.com/). Onyx extracts the speech region from this URL. Examples: https://westus.api.cognitive.microsoft.com/ or https://westus.tts.speech.microsoft.com/."
                )}
                nonInteractive
              >
                <InputTypeIn
                  placeholder={
                    isEditing
                      ? "Leave blank to keep existing"
                      : "https://<region>.api.cognitive.microsoft.com/"
                  }
                  value={targetUri}
                  onChange={(e) => setTargetUri(e.target.value)}
                />
              </Vertical>
            )}

            {providerType === "openai" && mode === "stt" && (
              <Horizontal title="STT Model" center nonInteractive>
                <InputSelect value={sttModel} onValueChange={setSttModel}>
                  <InputSelect.Trigger />
                  <InputSelect.Content>
                    {OPENAI_STT_MODELS.map((model) => (
                      <InputSelect.Item key={model.id} value={model.id}>
                        {model.name}
                      </InputSelect.Item>
                    ))}
                  </InputSelect.Content>
                </InputSelect>
              </Horizontal>
            )}

            {providerType === "openai" && mode === "tts" && (
              <Vertical
                title="Default Model"
                subDescription="This model will be used by Onyx by default for text-to-speech."
                nonInteractive
              >
                <InputSelect value={ttsModel} onValueChange={setTtsModel}>
                  <InputSelect.Trigger />
                  <InputSelect.Content>
                    {OPENAI_TTS_MODELS.map((model) => (
                      <InputSelect.Item key={model.id} value={model.id}>
                        {model.name}
                      </InputSelect.Item>
                    ))}
                  </InputSelect.Content>
                </InputSelect>
              </Vertical>
            )}

            {mode === "tts" && (
              <Vertical
                title="Voice"
                subDescription={markdown(
                  `This voice will be used for spoken responses. See full list of supported languages and voices at [${
                    PROVIDER_VOICE_DOCS_URLS[providerType]?.label ?? label
                  }](${
                    PROVIDER_VOICE_DOCS_URLS[providerType]?.url ??
                    PROVIDER_DOCS_URLS[providerType]
                  }).`
                )}
                nonInteractive
              >
                <InputComboBox
                  value={defaultVoice}
                  onValueChange={setDefaultVoice}
                  options={voiceOptions}
                  placeholder={
                    isLoadingVoices
                      ? "Loading voices..."
                      : "Select a voice or enter voice ID"
                  }
                  disabled={isLoadingVoices}
                  strict={false}
                />
              </Vertical>
            )}
          </Section>
        </Modal.Body>
        <Modal.Footer>
          <Button secondary onClick={onClose}>
            Cancel
          </Button>
          <Disabled disabled={!canConnect || isProcessing}>
            <Button
              onClick={handleSubmit}
              disabled={!canConnect || isProcessing}
            >
              {isProcessing ? "Connecting..." : isEditing ? "Save" : "Connect"}
            </Button>
          </Disabled>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/configuration/voice/page.tsx
================================================
export { default } from "@/refresh-pages/admin/VoiceConfigurationPage";


================================================
FILE: web/src/app/admin/configuration/web-search/page.tsx
================================================
export { default } from "@/refresh-pages/admin/WebSearchPage";


================================================
FILE: web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx
================================================
"use client";

import { useState } from "react";

import { ValidSources } from "@/lib/types";
import { Section } from "@/layouts/general-layouts";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import Separator from "@/refresh-components/Separator";
import { SvgChevronUp, SvgChevronDown, SvgEdit } from "@opal/icons";
import Truncated from "@/refresh-components/texts/Truncated";

function convertObjectToString(obj: any): string | any {
  if (typeof obj === "object" && obj !== null) {
    if (!Array.isArray(obj)) {
      return JSON.stringify(obj);
    } else {
      if (obj.length === 0) {
        return null;
      }
      return obj.map((item) => convertObjectToString(item)).join(", ");
    }
  }
  if (typeof obj === "boolean") {
    return obj.toString();
  }
  return obj;
}

export function buildConfigEntries(
  obj: any,
  sourceType: ValidSources
): { [key: string]: string } {
  if (sourceType === ValidSources.File) {
    return {};
  } else if (sourceType === ValidSources.GoogleSites) {
    return {
      base_url: obj.base_url,
    };
  }
  return obj;
}

interface ConfigItemProps {
  label: string;
  value: any;
  onEdit?: () => void;
}

function ConfigItem({ label, value, onEdit }: ConfigItemProps) {
  const [isExpanded, setIsExpanded] = useState(false);
  const isExpandable = Array.isArray(value) && value.length > 5;

  const renderValue = () => {
    if (Array.isArray(value)) {
      const displayedItems = isExpanded ? value : value.slice(0, 5);
      return (
        <Section
          flexDirection="row"
          gap={0.25}
          justifyContent="end"
          alignItems="center"
          height="fit"
        >
          <Text secondaryBody text03 className="break-words">
            {displayedItems
              .map((item) => convertObjectToString(item))
              .join(", ")}
          </Text>
        </Section>
      );
    } else if (typeof value === "object" && value !== null) {
      return (
        <Section gap={0.25} alignItems="end" height="fit">
          {Object.entries(value).map(([key, val]) => (
            <Text key={key} secondaryBody text03 className="break-words">
              <Text mainContentEmphasis text03>
                {key}:
              </Text>{" "}
              {convertObjectToString(val)}
            </Text>
          ))}
        </Section>
      );
    } else if (typeof value === "boolean") {
      return (
        <Text secondaryBody text03 className="text-right">
          {value ? "True" : "False"}
        </Text>
      );
    }
    return (
      <Truncated secondaryBody text03 className="text-right">
        {convertObjectToString(value) || "-"}
      </Truncated>
    );
  };

  return (
    <Section
      flexDirection="row"
      justifyContent="between"
      alignItems="center"
      gap={1}
    >
      <Section alignItems="start">
        <Text mainUiBody text04>
          {label}
        </Text>
      </Section>
      <Section
        flexDirection="row"
        justifyContent="end"
        alignItems="center"
        gap={0.5}
      >
        {renderValue()}

        {isExpandable && (
          <Button
            prominence="tertiary"
            size="md"
            icon={isExpanded ? SvgChevronUp : SvgChevronDown}
            onClick={() => setIsExpanded(!isExpanded)}
          >
            {isExpanded ? "Show less" : `Show all (${value.length} items)`}
          </Button>
        )}
        {onEdit && (
          <Button
            prominence="tertiary"
            icon={SvgEdit}
            onClick={onEdit}
            tooltip="Edit"
          />
        )}
      </Section>
    </Section>
  );
}

export function AdvancedConfigDisplay({
  pruneFreq,
  refreshFreq,
  indexingStart,
  onRefreshEdit,
  onPruningEdit,
}: {
  pruneFreq: number | null;
  refreshFreq: number | null;
  indexingStart: Date | null;
  onRefreshEdit: () => void;
  onPruningEdit: () => void;
}) {
  const formatRefreshFrequency = (seconds: number | null): string => {
    if (seconds === null) return "-";
    const totalMinutes = seconds / 60;

    // If it's 60 minutes or more and evenly divisible by 60, show in hours
    if (totalMinutes >= 60 && totalMinutes % 60 === 0) {
      const hours = totalMinutes / 60;
      return `${hours} hour${hours !== 1 ? "s" : ""}`;
    }

    // Otherwise show in minutes
    const minutes = Math.round(totalMinutes);
    return `${minutes} minute${minutes !== 1 ? "s" : ""}`;
  };
  const formatPruneFrequency = (seconds: number | null): string => {
    if (seconds === null) return "-";
    const totalHours = seconds / 3600;

    // If less than 1 hour, show in minutes
    if (totalHours < 1) {
      const minutes = Math.round(seconds / 60);
      return `${minutes} minute${minutes !== 1 ? "s" : ""}`;
    }

    const hours = Math.round(totalHours);

    // If it's 24 hours or more and evenly divisible by 24, show in days
    if (hours >= 24 && hours % 24 === 0) {
      const days = hours / 24;
      return `${days} day${days !== 1 ? "s" : ""}`;
    }

    // Otherwise show in hours
    return `${hours} hour${hours !== 1 ? "s" : ""}`;
  };

  const formatDate = (date: Date | null): string => {
    if (date === null) return "-";
    return date.toLocaleString("en-US", {
      year: "numeric",
      month: "long",
      day: "numeric",
      hour: "2-digit",
      minute: "2-digit",
      timeZoneName: "short",
    });
  };

  const items = [
    pruneFreq !== null && {
      label: "Pruning Frequency",
      value: formatPruneFrequency(pruneFreq),
      onEdit: onPruningEdit,
    },
    refreshFreq && {
      label: "Refresh Frequency",
      value: formatRefreshFrequency(refreshFreq),
      onEdit: onRefreshEdit,
    },
    indexingStart && {
      label: "Indexing Start",
      value: formatDate(indexingStart),
    },
  ].filter(Boolean) as ConfigItemProps[];

  return (
    <Section gap={0} height="fit">
      {items.map((item, index) => (
        <div key={item.label} className="w-full">
          <div className="py-4">
            <ConfigItem
              label={item.label}
              value={item.value}
              onEdit={item.onEdit}
            />
          </div>
          {index < items.length - 1 && <Separator noPadding />}
        </div>
      ))}
    </Section>
  );
}

export function ConfigDisplay({
  configEntries,
  onEdit,
}: {
  configEntries: { [key: string]: string };
  onEdit?: (key: string) => void;
}) {
  const entries = Object.entries(configEntries);

  return (
    <Section gap={0} height="fit">
      {entries.map(([key, value], index) => (
        <div key={key} className="w-full">
          <div className="py-4">
            <ConfigItem
              label={key}
              value={value}
              onEdit={onEdit ? () => onEdit(key) : undefined}
            />
          </div>
          {index < entries.length - 1 && <Separator noPadding />}
        </div>
      ))}
    </Section>
  );
}


================================================
FILE: web/src/app/admin/connector/[ccPairId]/DeletionErrorStatus.tsx
================================================
import { FiInfo } from "react-icons/fi";

export default function DeletionErrorStatus({
  deletion_failure_message,
}: {
  deletion_failure_message: string;
}) {
  return (
    <div className="mt-2 rounded-md border border-error-300 bg-error-50 p-4 text-error-600 max-w-3xl">
      <div className="flex items-center">
        <h3 className="text-base font-medium">Deletion Error</h3>
        <div className="ml-2 relative group">
          <FiInfo className="h-4 w-4 text-error-600 cursor-help" />
          <div className="absolute z-10 w-64 p-2 mt-2 text-sm bg-white rounded-md shadow-lg opacity-0 group-hover:opacity-100 transition-opacity duration-300 border border-background-200">
            This error occurred while attempting to delete the connector. You
            may re-attempt a deletion by clicking the &quot;Delete&quot; button.
          </div>
        </div>
      </div>
      <div className="mt-2 text-sm">
        <p>{deletion_failure_message}</p>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/connector/[ccPairId]/IndexAttemptErrorsModal.tsx
================================================
import Modal from "@/refresh-components/Modal";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import { IndexAttemptError } from "./types";
import { localizeAndPrettify } from "@/lib/time";
import Button from "@/refresh-components/buttons/Button";
import Text from "@/refresh-components/texts/Text";
import { PageSelector } from "@/components/PageSelector";
import { useCallback, useEffect, useRef, useState, useMemo } from "react";
import { SvgAlertTriangle } from "@opal/icons";
export interface IndexAttemptErrorsModalProps {
  errors: {
    items: IndexAttemptError[];
    total_items: number;
  };
  onClose: () => void;
  onResolveAll: () => void;
  isResolvingErrors?: boolean;
}

const ROW_HEIGHT = 65; // 4rem + 1px for border

export default function IndexAttemptErrorsModal({
  errors,
  onClose,
  onResolveAll,
  isResolvingErrors = false,
}: IndexAttemptErrorsModalProps) {
  const observerRef = useRef<ResizeObserver | null>(null);
  const [pageSize, setPageSize] = useState(10);
  const [currentPage, setCurrentPage] = useState(1);

  const tableContainerRef = useCallback((container: HTMLDivElement | null) => {
    if (observerRef.current) {
      observerRef.current.disconnect();
      observerRef.current = null;
    }

    if (!container) return;

    const observer = new ResizeObserver(() => {
      const thead = container.querySelector("thead");
      const theadHeight = thead?.getBoundingClientRect().height ?? 0;
      const availableHeight = container.clientHeight - theadHeight;
      const newPageSize = Math.max(3, Math.floor(availableHeight / ROW_HEIGHT));
      setPageSize(newPageSize);
    });

    observer.observe(container);
    observerRef.current = observer;
  }, []);

  // When data changes, reset to page 1.
  // When page size changes (resize), preserve the user's position by
  // finding which new page contains the first item they were looking at.
  const prevPageSizeRef = useRef(pageSize);
  useEffect(() => {
    if (pageSize !== prevPageSizeRef.current) {
      setCurrentPage((prev) => {
        const firstVisibleIndex = (prev - 1) * prevPageSizeRef.current;
        const newPage = Math.floor(firstVisibleIndex / pageSize) + 1;
        const totalPages = Math.ceil(errors.items.length / pageSize);
        return Math.min(newPage, totalPages);
      });
      prevPageSizeRef.current = pageSize;
    } else {
      setCurrentPage(1);
    }
  }, [errors.items.length, pageSize]);

  const paginationData = useMemo(() => {
    const totalPages = Math.ceil(errors.items.length / pageSize);
    const startIndex = (currentPage - 1) * pageSize;
    const currentPageItems = errors.items.slice(
      startIndex,
      startIndex + pageSize
    );
    return { totalPages, currentPageItems };
  }, [errors.items, pageSize, currentPage]);

  const hasUnresolvedErrors = useMemo(
    () => errors.items.some((error) => !error.is_resolved),
    [errors.items]
  );

  const handlePageChange = (page: number) => {
    // Ensure we don't go to an invalid page
    if (page >= 1 && page <= paginationData.totalPages) {
      setCurrentPage(page);
    }
  };

  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="full" height="full">
        <Modal.Header
          icon={SvgAlertTriangle}
          title="Indexing Errors"
          description={
            isResolvingErrors
              ? "Currently attempting to resolve all errors by performing a full re-index. This may take some time to complete."
              : undefined
          }
          onClose={onClose}
          height="fit"
        />
        <Modal.Body height="full">
          {!isResolvingErrors && (
            <div className="flex flex-col gap-2 flex-shrink-0">
              <Text as="p">
                Below are the errors encountered during indexing. Each row
                represents a failed document or entity.
              </Text>
              <Text as="p">
                Click the button below to kick off a full re-index to try and
                resolve these errors. This full re-index may take much longer
                than a normal update.
              </Text>
            </div>
          )}

          <div
            ref={tableContainerRef}
            className="flex-1 w-full overflow-hidden min-h-0"
          >
            <Table>
              <TableHeader>
                <TableRow>
                  <TableHead>Time</TableHead>
                  <TableHead>Document ID</TableHead>
                  <TableHead className="w-1/2">Error Message</TableHead>
                  <TableHead>Status</TableHead>
                </TableRow>
              </TableHeader>
              <TableBody>
                {paginationData.currentPageItems.length > 0 ? (
                  paginationData.currentPageItems.map((error) => (
                    <TableRow key={error.id} className="h-[4rem]">
                      <TableCell>
                        {localizeAndPrettify(error.time_created)}
                      </TableCell>
                      <TableCell>
                        {error.document_link ? (
                          <a
                            href={error.document_link}
                            target="_blank"
                            rel="noopener noreferrer"
                            className="text-link hover:underline"
                          >
                            {error.document_id || error.entity_id || "Unknown"}
                          </a>
                        ) : (
                          error.document_id || error.entity_id || "Unknown"
                        )}
                      </TableCell>
                      <TableCell>
                        <div className="flex items-center h-[2rem] overflow-y-auto whitespace-normal">
                          {error.failure_message}
                        </div>
                      </TableCell>
                      <TableCell>
                        <span
                          className={`px-2 py-1 rounded text-xs ${
                            error.is_resolved
                              ? "bg-green-100 text-green-800"
                              : "bg-red-100 text-red-800"
                          }`}
                        >
                          {error.is_resolved ? "Resolved" : "Unresolved"}
                        </span>
                      </TableCell>
                    </TableRow>
                  ))
                ) : (
                  <TableRow className="h-[4rem]">
                    <TableCell
                      colSpan={4}
                      className="text-center py-8 text-gray-500"
                    >
                      No errors found on this page
                    </TableCell>
                  </TableRow>
                )}
              </TableBody>
            </Table>
          </div>

          {paginationData.totalPages > 1 && (
            <div className="flex w-full justify-center">
              <PageSelector
                totalPages={paginationData.totalPages}
                currentPage={currentPage}
                onPageChange={handlePageChange}
              />
            </div>
          )}
        </Modal.Body>
        <Modal.Footer>
          {hasUnresolvedErrors && !isResolvingErrors && (
            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
            <Button onClick={onResolveAll} className="ml-4 whitespace-nowrap">
              Resolve All
            </Button>
          )}
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/connector/[ccPairId]/IndexAttemptsTable.tsx
================================================
"use client";

import { useState } from "react";
import {
  Table,
  TableHead,
  TableRow,
  TableBody,
  TableCell,
  TableHeader,
} from "@/components/ui/table";
import { Text } from "@opal/components";
import { Callout } from "@/components/ui/callout";
import { CCPairFullInfo } from "./types";
import { IndexAttemptSnapshot } from "@/lib/types";
import { IndexAttemptStatus } from "@/components/Status";
import { PageSelector } from "@/components/PageSelector";
import { localizeAndPrettify } from "@/lib/time";
import { getDocsProcessedPerMinute } from "@/lib/indexAttempt";
import { InfoIcon } from "@/components/icons/icons";
import ExceptionTraceModal from "@/sections/modals/PreviewModal/ExceptionTraceModal";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { SvgClock } from "@opal/icons";
export interface IndexingAttemptsTableProps {
  ccPair: CCPairFullInfo;
  indexAttempts: IndexAttemptSnapshot[];
  currentPage: number;
  totalPages: number;
  onPageChange: (page: number) => void;
}

export function IndexAttemptsTable({
  indexAttempts,
  currentPage,
  totalPages,
  onPageChange,
}: IndexingAttemptsTableProps) {
  const [indexAttemptTracePopupId, setIndexAttemptTracePopupId] = useState<
    number | null
  >(null);

  if (!indexAttempts?.length) {
    return (
      <Callout
        className="mt-4"
        title="No indexing attempts scheduled yet"
        type="notice"
      >
        Index attempts are scheduled in the background, and may take some time
        to appear. Try refreshing the page in ~30 seconds!
      </Callout>
    );
  }

  const indexAttemptToDisplayTraceFor = indexAttempts?.find(
    (indexAttempt) => indexAttempt.id === indexAttemptTracePopupId
  );

  return (
    <>
      {indexAttemptToDisplayTraceFor?.full_exception_trace && (
        <ExceptionTraceModal
          onOutsideClick={() => setIndexAttemptTracePopupId(null)}
          exceptionTrace={indexAttemptToDisplayTraceFor.full_exception_trace}
        />
      )}

      <Table>
        <TableHeader>
          <TableRow>
            <TableHead>Time Started</TableHead>
            <TableHead>Status</TableHead>
            <TableHead className="whitespace-nowrap">New Docs</TableHead>
            <TableHead>
              <SimpleTooltip
                tooltip="Total number of documents replaced in the index during this indexing attempt"
                side="top"
              >
                <span className="flex items-center">
                  Total Docs
                  <InfoIcon className="ml-1 w-4 h-4" />
                </span>
              </SimpleTooltip>
            </TableHead>
            <TableHead>Error Message</TableHead>
          </TableRow>
        </TableHeader>
        <TableBody>
          {indexAttempts.map((indexAttempt) => {
            const docsPerMinute =
              getDocsProcessedPerMinute(indexAttempt)?.toFixed(2);
            const isReindexInProgress =
              indexAttempt.status === "in_progress" ||
              indexAttempt.status === "not_started";
            const reindexTooltip = `This index attempt ${
              isReindexInProgress ? "is" : "was"
            } a full re-index. All documents from the source ${
              isReindexInProgress ? "are being" : "were"
            } synced into the system.`;
            return (
              <TableRow
                key={indexAttempt.id}
                className={
                  indexAttempt.full_exception_trace
                    ? "hover:bg-accent-background cursor-pointer relative select-none"
                    : undefined
                }
              >
                <TableCell>
                  {indexAttempt.time_started
                    ? localizeAndPrettify(indexAttempt.time_started)
                    : "-"}
                </TableCell>
                <TableCell>
                  <IndexAttemptStatus
                    status={indexAttempt.status || "not_started"}
                  />
                  {docsPerMinute ? (
                    <div className="text-xs mt-1">
                      {docsPerMinute} docs / min
                    </div>
                  ) : (
                    indexAttempt.status === "success" && (
                      <div className="text-xs mt-1">
                        No additional docs processed
                      </div>
                    )
                  )}
                </TableCell>
                <TableCell>
                  <div className="flex">
                    <div className="text-right">
                      <div>{indexAttempt.new_docs_indexed}</div>
                      {indexAttempt.docs_removed_from_index > 0 && (
                        <div className="text-xs w-52 text-wrap flex italic overflow-hidden whitespace-normal px-1">
                          (also removed {indexAttempt.docs_removed_from_index}{" "}
                          docs that were detected as deleted in the source)
                        </div>
                      )}
                    </div>
                  </div>
                </TableCell>
                <TableCell>
                  <div className="flex items-center">
                    {indexAttempt.total_docs_indexed}
                    {indexAttempt.from_beginning && (
                      <SimpleTooltip side="top" tooltip={reindexTooltip}>
                        <span className="cursor-help flex items-center">
                          <SvgClock className="ml-2 h-3.5 w-3.5 stroke-current" />
                        </span>
                      </SimpleTooltip>
                    )}
                  </div>
                </TableCell>
                <TableCell>
                  {indexAttempt.status === "success" && <Text as="p">-</Text>}

                  {indexAttempt.status === "failed" &&
                    indexAttempt.error_msg && (
                      <Text as="p">{indexAttempt.error_msg}</Text>
                    )}
                </TableCell>
                <td className="w-0 p-0">
                  {indexAttempt.full_exception_trace && (
                    <button
                      type="button"
                      aria-label="View full trace"
                      onClick={() =>
                        setIndexAttemptTracePopupId(indexAttempt.id)
                      }
                      className="absolute w-full h-full left-0 top-0"
                    />
                  )}
                </td>
              </TableRow>
            );
          })}
        </TableBody>
      </Table>
      {totalPages > 1 && (
        <div className="flex flex-1 justify-center pt-3">
          <PageSelector
            totalPages={totalPages}
            currentPage={currentPage}
            onPageChange={onPageChange}
          />
        </div>
      )}
    </>
  );
}


================================================
FILE: web/src/app/admin/connector/[ccPairId]/InlineFileManagement.tsx
================================================
"use client";

import { useState, useRef } from "react";
import { Button } from "@opal/components";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import {
  updateConnectorFiles,
  type ConnectorFileInfo,
} from "@/lib/fileConnector";
import { toast } from "@/hooks/useToast";
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ThreeDotsLoader } from "@/components/Loading";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import {
  SvgCheck,
  SvgEdit,
  SvgFolderPlus,
  SvgPlusCircle,
  SvgX,
} from "@opal/icons";
import { formatBytes } from "@/lib/utils";
import { timestampToReadableDate } from "@/lib/dateUtils";

interface InlineFileManagementProps {
  connectorId: number;
  onRefresh: () => void;
}

export default function InlineFileManagement({
  connectorId,
  onRefresh,
}: InlineFileManagementProps) {
  const [isEditing, setIsEditing] = useState(false);
  const [selectedFilesToRemove, setSelectedFilesToRemove] = useState<
    Set<string>
  >(new Set());
  const [filesToAdd, setFilesToAdd] = useState<File[]>([]);
  const [isSaving, setIsSaving] = useState(false);
  const [showSaveConfirm, setShowSaveConfirm] = useState(false);
  const fileInputRef = useRef<HTMLInputElement>(null);

  const {
    data: filesResponse,
    isLoading,
    error,
    mutate: refreshFiles,
  } = useSWR<{ files: ConnectorFileInfo[] }>(
    `/api/manage/admin/connector/${connectorId}/files`,
    errorHandlingFetcher,
    { refreshInterval: isEditing ? 0 : 5000 } // Disable auto-refresh while editing
  );

  const files = filesResponse?.files || [];

  const handleFileSelect = (event: React.ChangeEvent<HTMLInputElement>) => {
    const selectedFiles = event.target.files;
    if (!selectedFiles || selectedFiles.length === 0) return;

    setFilesToAdd((prev) => [...prev, ...Array.from(selectedFiles)]);
    // Reset the input
    if (fileInputRef.current) {
      fileInputRef.current.value = "";
    }
  };

  const handleRemoveNewFile = (index: number) => {
    setFilesToAdd((prev) => prev.filter((_, i) => i !== index));
  };

  const toggleFileForRemoval = (fileId: string) => {
    setSelectedFilesToRemove((prev) => {
      const newSet = new Set(prev);
      if (newSet.has(fileId)) {
        newSet.delete(fileId);
      } else {
        newSet.add(fileId);
      }
      return newSet;
    });
  };

  const handleSaveClick = () => {
    // Validate that we won't remove all files
    const remainingFiles = files.filter(
      (file) => !selectedFilesToRemove.has(file.file_id)
    ).length;

    if (remainingFiles === 0 && filesToAdd.length === 0) {
      toast.error(
        "Cannot remove all files from a connector. Delete the connector if this is desired."
      );
      return;
    }

    // Show confirmation modal
    setShowSaveConfirm(true);
  };

  const handleConfirmSave = async () => {
    setShowSaveConfirm(false);
    setIsSaving(true);
    try {
      await updateConnectorFiles(
        connectorId,
        Array.from(selectedFilesToRemove),
        filesToAdd
      );

      toast.success(
        "Files updated successfully! Document index is being updated in the background. " +
          "New files are being indexed and removed files will be pruned from the search results."
      );

      // Reset editing state
      setIsEditing(false);
      setSelectedFilesToRemove(new Set());
      setFilesToAdd([]);

      // Refresh data
      refreshFiles();
      onRefresh();
    } catch (error) {
      toast.error(
        error instanceof Error ? error.message : "Failed to update files"
      );
    } finally {
      setIsSaving(false);
    }
  };

  const handleCancel = () => {
    setIsEditing(false);
    setSelectedFilesToRemove(new Set());
    setFilesToAdd([]);
  };

  if (isLoading) {
    return (
      <div className="flex justify-center py-12">
        <ThreeDotsLoader />
      </div>
    );
  }

  if (error) {
    return (
      <Text as="p" className="text-error">
        Error loading files: {error.message}
      </Text>
    );
  }

  const currentFiles = files.filter(
    (file) => !selectedFilesToRemove.has(file.file_id)
  );
  const totalFiles = currentFiles.length + filesToAdd.length;

  return (
    <>
      {/* Header with Edit/Save buttons */}
      <div className="flex justify-between items-center mb-4">
        <Text as="p" mainUiBody>
          Files ({totalFiles} file{totalFiles !== 1 ? "s" : ""})
        </Text>
        <div className="flex gap-2">
          {!isEditing ? (
            <Button
              prominence="secondary"
              onClick={() => setIsEditing(true)}
              icon={SvgEdit}
            >
              Edit
            </Button>
          ) : (
            <>
              <Button
                disabled={isSaving}
                prominence="secondary"
                onClick={handleCancel}
                icon={SvgX}
              >
                Cancel
              </Button>
              <Button
                disabled={
                  isSaving ||
                  (selectedFilesToRemove.size === 0 && filesToAdd.length === 0)
                }
                onClick={handleSaveClick}
                icon={SvgCheck}
              >
                {isSaving ? "Saving..." : "Save Changes"}
              </Button>
            </>
          )}
        </div>
      </div>

      {/* File List */}
      {files.length === 0 && filesToAdd.length === 0 ? (
        <Text as="p" mainUiMuted className="text-center py-8">
          No files in this connector
        </Text>
      ) : (
        <div className="border rounded-lg overflow-hidden mb-4">
          {/* Scrollable container with max height */}
          <div className="max-h-[400px] overflow-y-auto">
            <Table>
              <TableHeader className="sticky top-0 bg-background z-10">
                <TableRow>
                  {isEditing && <TableHead className="w-12"></TableHead>}
                  <TableHead>File Name</TableHead>
                  <TableHead>Size</TableHead>
                  <TableHead>Upload Date</TableHead>
                  {isEditing && <TableHead className="w-12"></TableHead>}
                </TableRow>
              </TableHeader>
              <TableBody>
                {/* Existing files */}
                {files.map((file) => {
                  const isMarkedForRemoval = selectedFilesToRemove.has(
                    file.file_id
                  );
                  return (
                    <TableRow
                      key={file.file_id}
                      className={
                        isMarkedForRemoval
                          ? "bg-red-100 dark:bg-red-900/20"
                          : ""
                      }
                    >
                      {isEditing && (
                        <TableCell>
                          <Checkbox
                            checked={isMarkedForRemoval}
                            onCheckedChange={() =>
                              toggleFileForRemoval(file.file_id)
                            }
                          />
                        </TableCell>
                      )}
                      <TableCell className="font-medium">
                        <span
                          className={
                            isMarkedForRemoval ? "line-through opacity-60" : ""
                          }
                        >
                          {file.file_name}
                        </span>
                        {isMarkedForRemoval && (
                          <span className="ml-2 text-xs font-semibold text-red-600 dark:text-red-400">
                            Removing
                          </span>
                        )}
                      </TableCell>
                      <TableCell
                        className={
                          isMarkedForRemoval ? "line-through opacity-60" : ""
                        }
                      >
                        {formatBytes(file.file_size)}
                      </TableCell>
                      <TableCell
                        className={
                          isMarkedForRemoval ? "line-through opacity-60" : ""
                        }
                      >
                        {file.upload_date
                          ? timestampToReadableDate(file.upload_date)
                          : "-"}
                      </TableCell>
                      {isEditing && <TableCell></TableCell>}
                    </TableRow>
                  );
                })}

                {/* New files to be added */}
                {filesToAdd.map((file, index) => (
                  <TableRow
                    key={`new-${index}`}
                    className="bg-green-50 dark:bg-green-900/10"
                  >
                    {isEditing && (
                      <TableCell>
                        <Button
                          icon={SvgX}
                          variant="danger"
                          prominence="tertiary"
                          size="sm"
                          onClick={() => handleRemoveNewFile(index)}
                          tooltip="Remove file"
                          title="Remove file"
                        />
                      </TableCell>
                    )}
                    <TableCell className="font-medium">
                      {file.name}
                      <Text as="p" figureSmallValue>
                        New
                      </Text>
                    </TableCell>
                    <TableCell>{formatBytes(file.size)}</TableCell>
                    <TableCell>-</TableCell>
                    {isEditing && <TableCell></TableCell>}
                  </TableRow>
                ))}
              </TableBody>
            </Table>
          </div>
        </div>
      )}

      {/* Add Files Button (only in edit mode) */}
      {isEditing && (
        <div className="mt-4">
          <input
            ref={fileInputRef}
            type="file"
            multiple
            onChange={handleFileSelect}
            className="hidden"
            id={`file-upload-${connectorId}`}
          />
          <Button
            disabled={isSaving}
            prominence="secondary"
            onClick={() => fileInputRef.current?.click()}
            icon={SvgPlusCircle}
          >
            Add Files
          </Button>
        </div>
      )}

      {/* Confirmation Modal */}
      <Modal open={showSaveConfirm} onOpenChange={setShowSaveConfirm}>
        <Modal.Content width="sm">
          <Modal.Header
            icon={SvgFolderPlus}
            title="Confirm File Changes"
            description="When you save these changes, the following will happen:"
          />

          <Modal.Body>
            {selectedFilesToRemove.size > 0 && (
              <div className="p-3 bg-red-50 dark:bg-red-900/10 rounded-md">
                <Text
                  as="p"
                  mainUiBody
                  className="font-semibold text-red-800 dark:text-red-200"
                >
                  🗑️ {selectedFilesToRemove.size} file(s) will be removed
                </Text>
                <Text
                  as="p"
                  secondaryBody
                  className="text-red-700 dark:text-red-300 mt-1"
                >
                  Documents from these files will be pruned from the Document
                  Index
                </Text>
              </div>
            )}

            {filesToAdd.length > 0 && (
              <div className="p-3 bg-green-50 dark:bg-green-900/10 rounded-md">
                <Text
                  as="p"
                  mainUiBody
                  className="font-semibold text-green-800 dark:text-green-200"
                >
                  {filesToAdd.length} file(s) will be added
                </Text>
                <Text
                  as="p"
                  secondaryBody
                  className="text-green-700 dark:text-green-300 mt-1"
                >
                  New files will be uploaded, chunked, embedded, and indexed in
                  the Document Index
                </Text>
              </div>
            )}
          </Modal.Body>

          <Modal.Footer>
            <Button
              disabled={isSaving}
              prominence="secondary"
              onClick={() => setShowSaveConfirm(false)}
            >
              Cancel
            </Button>
            <Button disabled={isSaving} onClick={handleConfirmSave}>
              {isSaving ? "Saving..." : "Confirm & Save"}
            </Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    </>
  );
}


================================================
FILE: web/src/app/admin/connector/[ccPairId]/ReIndexModal.tsx
================================================
"use client";

import { Button } from "@opal/components";
import { useState } from "react";
import { toast } from "@/hooks/useToast";
import { triggerIndexing } from "@/app/admin/connector/[ccPairId]/lib";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import Separator from "@/refresh-components/Separator";
import { SvgRefreshCw } from "@opal/icons";
// Hook to handle re-indexing functionality
export function useReIndexModal(
  connectorId: number | null,
  credentialId: number | null,
  ccPairId: number | null
) {
  const [reIndexPopupVisible, setReIndexPopupVisible] = useState(false);

  const showReIndexModal = () => {
    if (connectorId == null || credentialId == null || ccPairId == null) {
      return;
    }
    setReIndexPopupVisible(true);
  };

  const hideReIndexModal = () => {
    setReIndexPopupVisible(false);
  };

  const triggerReIndex = async (fromBeginning: boolean) => {
    if (connectorId == null || credentialId == null || ccPairId == null) {
      return;
    }

    try {
      const result = await triggerIndexing(
        fromBeginning,
        connectorId,
        credentialId,
        ccPairId
      );

      // Show appropriate notification based on result
      if (result.success) {
        toast.success(
          `${
            fromBeginning ? "Complete re-indexing" : "Indexing update"
          } started successfully`
        );
      } else {
        toast.error(result.message || "Failed to start indexing");
      }
    } catch (error) {
      console.error("Failed to trigger indexing:", error);
      toast.error(
        "An unexpected error occurred while trying to start indexing"
      );
    }
  };

  const FinalReIndexModal =
    reIndexPopupVisible &&
    connectorId != null &&
    credentialId != null &&
    ccPairId != null ? (
      <ReIndexModal hide={hideReIndexModal} onRunIndex={triggerReIndex} />
    ) : null;

  return {
    showReIndexModal,
    ReIndexModal: FinalReIndexModal,
  };
}

export interface ReIndexModalProps {
  hide: () => void;
  onRunIndex: (fromBeginning: boolean) => Promise<void>;
}

export default function ReIndexModal({ hide, onRunIndex }: ReIndexModalProps) {
  const [isProcessing, setIsProcessing] = useState(false);

  const handleRunIndex = async (fromBeginning: boolean) => {
    if (isProcessing) return;

    setIsProcessing(true);
    try {
      // First show immediate feedback with a toast
      toast.info(
        `Starting ${
          fromBeginning ? "complete re-indexing" : "indexing update"
        }...`
      );

      // Then close the modal
      hide();

      // Then run the indexing operation
      await onRunIndex(fromBeginning);
    } catch (error) {
      console.error("Error starting indexing:", error);
      // Show error in toast if needed
      toast.error("Failed to start indexing process");
    } finally {
      setIsProcessing(false);
    }
  };

  return (
    <Modal open onOpenChange={hide}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header icon={SvgRefreshCw} title="Run Indexing" onClose={hide} />
        <Modal.Body>
          <Text as="p">
            This will pull in and index all documents that have changed and/or
            have been added since the last successful indexing run.
          </Text>
          <Button disabled={isProcessing} onClick={() => handleRunIndex(false)}>
            Run Update
          </Button>

          <Separator />

          <Text as="p">
            This will cause a complete re-indexing of all documents from the
            source.
          </Text>
          <Text as="p">
            <strong>NOTE:</strong> depending on the number of documents stored
            in the source, this may take a long time.
          </Text>

          <Button disabled={isProcessing} onClick={() => handleRunIndex(true)}>
            Run Complete Re-Indexing
          </Button>
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/connector/[ccPairId]/lib.ts
================================================
import { runConnector } from "@/lib/connector";
import { ValidSources } from "@/lib/types";
import { mutate } from "swr";

export function buildCCPairInfoUrl(ccPairId: string | number) {
  return `/api/manage/admin/cc-pair/${ccPairId}`;
}

export function buildSimilarCredentialInfoURL(
  source_type: ValidSources,
  get_editable: boolean = false
) {
  const base = `/api/manage/admin/similar-credentials/${source_type}`;
  return get_editable ? `${base}?get_editable=True` : base;
}

export async function triggerIndexing(
  fromBeginning: boolean,
  connectorId: number,
  credentialId: number,
  ccPairId: number
): Promise<{ success: boolean; message: string }> {
  const errorMsg = await runConnector(
    connectorId,
    [credentialId],
    fromBeginning
  );

  mutate(buildCCPairInfoUrl(ccPairId));

  if (errorMsg) {
    return {
      success: false,
      message: errorMsg,
    };
  } else {
    return {
      success: true,
      message: "Triggered connector run",
    };
  }
}

export function getTooltipMessage(
  isInvalid: boolean,
  isDeleting: boolean,
  isIndexing: boolean,
  isDisabled: boolean
): string | undefined {
  if (isInvalid) {
    return "Connector is in an invalid state. Please update the credentials or configuration before re-indexing.";
  }
  if (isDeleting) {
    return "Cannot index while connector is deleting";
  }
  if (isIndexing) {
    return "Indexing is already in progress";
  }
  if (isDisabled) {
    return "Connector must be re-enabled before indexing";
  }
  return undefined;
}


================================================
FILE: web/src/app/admin/connector/[ccPairId]/page.tsx
================================================
"use client";

import BackButton from "@/refresh-components/buttons/BackButton";
import { ErrorCallout } from "@/components/ErrorCallout";
import { ThreeDotsLoader } from "@/components/Loading";
import { SourceIcon } from "@/components/SourceIcon";
import { CCPairStatus, PermissionSyncStatus } from "@/components/Status";
import { toast } from "@/hooks/useToast";
import CredentialSection from "@/components/credentials/CredentialSection";
import Text from "@/refresh-components/texts/Text";
import {
  updateConnectorCredentialPairName,
  updateConnectorCredentialPairProperty,
} from "@/lib/connector";
import { credentialTemplates } from "@/lib/connectors/credentials";
import { errorHandlingFetcher } from "@/lib/fetcher";
import Title from "@/components/ui/title";
import { useRouter } from "next/navigation";
import { useCallback, useEffect, useRef, useState, use } from "react";
import useSWR, { mutate } from "swr";
import {
  AdvancedConfigDisplay,
  buildConfigEntries,
  ConfigDisplay,
} from "./ConfigDisplay";
import DeletionErrorStatus from "./DeletionErrorStatus";
import { IndexAttemptsTable } from "./IndexAttemptsTable";
import InlineFileManagement from "./InlineFileManagement";
import { buildCCPairInfoUrl, triggerIndexing } from "./lib";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import {
  CCPairFullInfo,
  ConnectorCredentialPairStatus,
  IndexAttemptError,
  statusIsNotCurrentlyActive,
} from "./types";
import { EditableStringFieldDisplay } from "@/components/EditableStringFieldDisplay";
import EditPropertyModal from "@/components/modals/EditPropertyModal";
import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle";
import { deleteCCPair } from "@/lib/documentDeletion";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import * as Yup from "yup";
import {
  AlertCircle,
  PlayIcon,
  PauseIcon,
  Trash2Icon,
  RefreshCwIcon,
} from "lucide-react";
import IndexAttemptErrorsModal from "./IndexAttemptErrorsModal";
import usePaginatedFetch from "@/hooks/usePaginatedFetch";
import { IndexAttemptSnapshot } from "@/lib/types";
import { Spinner } from "@/components/Spinner";
import { Callout } from "@/components/ui/callout";
import { Card } from "@/components/ui/card";
import {
  DropdownMenu,
  DropdownMenuContent,
  DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import { DropdownMenuItemWithTooltip } from "@/components/ui/dropdown-menu-with-tooltip";
import { timeAgo } from "@/lib/time";
import { useStatusChange } from "./useStatusChange";
import { useReIndexModal } from "./ReIndexModal";
import { Button } from "@opal/components";
import { SvgSettings } from "@opal/icons";
import { UserRole } from "@/lib/types";
import { useUser } from "@/providers/UserProvider";
// synchronize these validations with the SQLAlchemy connector class until we have a
// centralized schema for both frontend and backend
const RefreshFrequencySchema = Yup.object().shape({
  propertyValue: Yup.number()
    .typeError("Property value must be a valid number")
    .integer("Property value must be an integer")
    .min(1, "Property value must be greater than or equal to 1 minute")
    .required("Property value is required"),
});

const PruneFrequencySchema = Yup.object().shape({
  propertyValue: Yup.number()
    .typeError("Property value must be a valid number")
    .min(
      0.083,
      "Property value must be greater than or equal to 0.083 hours (5 minutes)"
    )
    .required("Property value is required"),
});

const ITEMS_PER_PAGE = 8;
const PAGES_PER_BATCH = 8;

function Main({ ccPairId }: { ccPairId: number }) {
  const router = useRouter();
  const { user } = useUser();

  const {
    data: ccPair,
    isLoading: isLoadingCCPair,
    error: ccPairError,
  } = useSWR<CCPairFullInfo>(
    buildCCPairInfoUrl(ccPairId),
    errorHandlingFetcher,
    { refreshInterval: 5000 } // 5 seconds
  );

  const {
    currentPageData: indexAttempts,
    isLoading: isLoadingIndexAttempts,
    currentPage,
    totalPages,
    goToPage,
  } = usePaginatedFetch<IndexAttemptSnapshot>({
    itemsPerPage: ITEMS_PER_PAGE,
    pagesPerBatch: PAGES_PER_BATCH,
    endpoint: `${buildCCPairInfoUrl(ccPairId)}/index-attempts`,
  });

  const { currentPageData: indexAttemptErrorsPage } =
    usePaginatedFetch<IndexAttemptError>({
      itemsPerPage: 10,
      pagesPerBatch: 1,
      endpoint: `/api/manage/admin/cc-pair/${ccPairId}/errors`,
    });

  // Initialize hooks at top level to avoid conditional hook calls
  const { showReIndexModal, ReIndexModal } = useReIndexModal(
    ccPair?.connector?.id ?? null,
    ccPair?.credential?.id ?? null,
    ccPairId
  );

  const {
    handleStatusChange,
    isUpdating: isStatusUpdating,
    ConfirmModal,
  } = useStatusChange(ccPair || null);

  const indexAttemptErrors = indexAttemptErrorsPage
    ? {
        items: indexAttemptErrorsPage,
        total_items: indexAttemptErrorsPage.length,
      }
    : null;

  const [hasLoadedOnce, setHasLoadedOnce] = useState(false);
  const [editingRefreshFrequency, setEditingRefreshFrequency] = useState(false);
  const [editingPruningFrequency, setEditingPruningFrequency] = useState(false);
  const [showIndexAttemptErrors, setShowIndexAttemptErrors] = useState(false);

  const [showIsResolvingKickoffLoader, setShowIsResolvingKickoffLoader] =
    useState(false);
  const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
  const [showDeleteConnectorConfirmModal, setShowDeleteConnectorConfirmModal] =
    useState(false);
  const isSchedulingConnectorDeletionRef = useRef(false);

  const refresh = useCallback(() => {
    mutate(buildCCPairInfoUrl(ccPairId));
  }, [ccPairId]);

  const finishConnectorDeletion = useCallback(() => {
    router.push("/admin/indexing/status");
  }, [router]);

  const scheduleConnectorDeletion = useCallback(() => {
    if (!ccPair) return;
    if (isSchedulingConnectorDeletionRef.current) return;
    isSchedulingConnectorDeletionRef.current = true;

    deleteCCPair(ccPair.connector.id, ccPair.credential.id).catch((error) => {
      toast.error(
        "Failed to schedule deletion of connector - " + error.message
      );
    });
    finishConnectorDeletion();
  }, [ccPair, finishConnectorDeletion]);

  const latestIndexAttempt = indexAttempts?.[0];
  const canManageInlineFileConnectorFiles =
    ccPair?.connector.source === "file" &&
    (ccPair.is_editable_for_current_user ||
      (user?.role === UserRole.GLOBAL_CURATOR &&
        ccPair.access_type === "public"));

  const isResolvingErrors =
    (latestIndexAttempt?.status === "in_progress" ||
      latestIndexAttempt?.status === "not_started") &&
    latestIndexAttempt?.from_beginning &&
    // if there are errors in the latest index attempt, we don't want to show the loader
    !indexAttemptErrors?.items?.some(
      (error) => error.index_attempt_id === latestIndexAttempt?.id
    );

  const handleStatusUpdate = async (
    newStatus: ConnectorCredentialPairStatus
  ) => {
    setShowIsResolvingKickoffLoader(true); // Show fullscreen spinner
    await handleStatusChange(newStatus);
    setShowIsResolvingKickoffLoader(false); // Hide fullscreen spinner
  };

  const triggerReIndex = async (fromBeginning: boolean) => {
    if (!ccPair) return;

    setShowIsResolvingKickoffLoader(true);

    try {
      const result = await triggerIndexing(
        fromBeginning,
        ccPair.connector.id,
        ccPair.credential.id,
        ccPair.id
      );

      if (result.success) {
        toast.success(
          `${
            fromBeginning ? "Complete re-indexing" : "Indexing update"
          } started successfully`
        );
      } else {
        toast.error(result.message || "Failed to start indexing");
      }
    } catch (error) {
      console.error("Failed to trigger indexing:", error);
      toast.error(
        "An unexpected error occurred while trying to start indexing"
      );
    } finally {
      setShowIsResolvingKickoffLoader(false);
    }
  };

  useEffect(() => {
    if (isLoadingCCPair) {
      return;
    }
    if (ccPair && !ccPairError) {
      setHasLoadedOnce(true);
    }

    if (
      (hasLoadedOnce && (ccPairError || !ccPair)) ||
      (ccPair?.status === ConnectorCredentialPairStatus.DELETING &&
        !ccPair.connector)
    ) {
      finishConnectorDeletion();
    }
  }, [
    isLoadingCCPair,
    ccPair,
    ccPairError,
    hasLoadedOnce,
    finishConnectorDeletion,
  ]);

  const handleUpdateName = async (newName: string) => {
    try {
      const response = await updateConnectorCredentialPairName(
        ccPair?.id!,
        newName
      );
      if (!response.ok) {
        throw new Error(await response.text());
      }
      mutate(buildCCPairInfoUrl(ccPairId));
      toast.success("Connector name updated successfully");
    } catch (error) {
      toast.error("Failed to update connector name");
    }
  };

  const handleRefreshEdit = async () => {
    setEditingRefreshFrequency(true);
  };

  const handlePruningEdit = async () => {
    setEditingPruningFrequency(true);
  };

  const handleRefreshSubmit = async (
    propertyName: string,
    propertyValue: string
  ) => {
    const parsedRefreshFreqMinutes = parseInt(propertyValue, 10);

    if (isNaN(parsedRefreshFreqMinutes)) {
      toast.error("Invalid refresh frequency: must be an integer");
      return;
    }

    // Convert minutes to seconds
    const parsedRefreshFreqSeconds = parsedRefreshFreqMinutes * 60;

    try {
      const response = await updateConnectorCredentialPairProperty(
        ccPairId,
        propertyName,
        String(parsedRefreshFreqSeconds)
      );
      if (!response.ok) {
        throw new Error(await response.text());
      }
      mutate(buildCCPairInfoUrl(ccPairId));
      toast.success("Connector refresh frequency updated successfully");
    } catch (error) {
      toast.error("Failed to update connector refresh frequency");
    }
  };

  const handlePruningSubmit = async (
    propertyName: string,
    propertyValue: string
  ) => {
    const parsedFreqHours = parseFloat(propertyValue);

    if (isNaN(parsedFreqHours)) {
      toast.error("Invalid pruning frequency: must be a valid number");
      return;
    }

    // Convert hours to seconds
    const parsedFreqSeconds = parsedFreqHours * 3600;

    try {
      const response = await updateConnectorCredentialPairProperty(
        ccPairId,
        propertyName,
        String(parsedFreqSeconds)
      );
      if (!response.ok) {
        throw new Error(await response.text());
      }
      mutate(buildCCPairInfoUrl(ccPairId));
      toast.success("Connector pruning frequency updated successfully");
    } catch (error) {
      toast.error("Failed to update connector pruning frequency");
    }
  };

  if (isLoadingCCPair || isLoadingIndexAttempts) {
    return <ThreeDotsLoader />;
  }

  if (!ccPair || (!hasLoadedOnce && ccPairError)) {
    return (
      <ErrorCallout
        errorTitle={`Failed to fetch info on Connector with ID ${ccPairId}`}
        errorMsg={
          ccPairError?.info?.detail ||
          ccPairError?.toString() ||
          "Unknown error"
        }
      />
    );
  }

  const isDeleting = ccPair.status === ConnectorCredentialPairStatus.DELETING;

  const {
    prune_freq: pruneFreq,
    refresh_freq: refreshFreq,
    indexing_start: indexingStart,
  } = ccPair.connector;

  return (
    <>
      {showIsResolvingKickoffLoader && !isResolvingErrors && <Spinner />}
      {ReIndexModal}
      {ConfirmModal}

      {showDeleteConnectorConfirmModal && (
        <ConfirmEntityModal
          danger
          entityType="connector"
          entityName={ccPair.name}
          additionalDetails="Deleting this connector schedules a deletion job that removes its indexed documents and deletes it for every user."
          onClose={() => {
            setShowDeleteConnectorConfirmModal(false);
          }}
          onSubmit={scheduleConnectorDeletion}
        />
      )}

      {editingRefreshFrequency && (
        <EditPropertyModal
          propertyTitle="Refresh Frequency"
          propertyDetails="How often the connector should refresh (in minutes)"
          propertyName="refresh_frequency"
          propertyValue={String(Math.round((refreshFreq || 0) / 60))}
          validationSchema={RefreshFrequencySchema}
          onSubmit={handleRefreshSubmit}
          onClose={() => setEditingRefreshFrequency(false)}
        />
      )}

      {editingPruningFrequency && (
        <EditPropertyModal
          propertyTitle="Pruning Frequency"
          propertyDetails="How often the connector should be pruned (in hours)"
          propertyName="pruning_frequency"
          propertyValue={String(
            ((pruneFreq || 0) / 3600).toFixed(3).replace(/\.?0+$/, "")
          )}
          validationSchema={PruneFrequencySchema}
          onSubmit={handlePruningSubmit}
          onClose={() => setEditingPruningFrequency(false)}
        />
      )}

      {showIndexAttemptErrors && indexAttemptErrors && (
        <IndexAttemptErrorsModal
          errors={indexAttemptErrors}
          onClose={() => setShowIndexAttemptErrors(false)}
          onResolveAll={async () => {
            setShowIndexAttemptErrors(false);
            setShowIsResolvingKickoffLoader(true);
            await triggerReIndex(true);
          }}
          isResolvingErrors={isResolvingErrors}
        />
      )}

      <BackButton />
      <div
        className="flex
        items-center
        justify-between
        h-16
        pb-2
        border-b
        border-neutral-200
        dark:border-neutral-600"
      >
        <div className="my-auto">
          <SourceIcon iconSize={32} sourceType={ccPair.connector.source} />
        </div>

        <div className="ml-2 overflow-hidden text-ellipsis whitespace-nowrap flex-1 mr-4">
          <EditableStringFieldDisplay
            value={ccPair.name}
            isEditable={ccPair.is_editable_for_current_user}
            onUpdate={handleUpdateName}
            scale={2.1}
          />
        </div>

        <div className="ml-auto flex gap-x-2">
          {ccPair.is_editable_for_current_user && (
            <DropdownMenu>
              <DropdownMenuTrigger asChild>
                <Button prominence="secondary" icon={SvgSettings}>
                  Manage
                </Button>
              </DropdownMenuTrigger>
              <DropdownMenuContent align="end">
                <DropdownMenuItemWithTooltip
                  onClick={() => {
                    if (
                      !ccPair.indexing &&
                      ccPair.status !== ConnectorCredentialPairStatus.PAUSED &&
                      ccPair.status !== ConnectorCredentialPairStatus.INVALID
                    ) {
                      showReIndexModal();
                    }
                  }}
                  disabled={
                    ccPair.indexing ||
                    ccPair.status === ConnectorCredentialPairStatus.PAUSED ||
                    ccPair.status === ConnectorCredentialPairStatus.INVALID
                  }
                  className="flex items-center gap-x-2 cursor-pointer px-3 py-2"
                  tooltip={
                    ccPair.indexing
                      ? "Cannot re-index while indexing is already in progress"
                      : ccPair.status === ConnectorCredentialPairStatus.PAUSED
                        ? "Resume the connector before re-indexing"
                        : ccPair.status ===
                            ConnectorCredentialPairStatus.INVALID
                          ? "Fix the connector configuration before re-indexing"
                          : undefined
                  }
                >
                  <RefreshCwIcon className="h-4 w-4" />
                  <span>Re-Index</span>
                </DropdownMenuItemWithTooltip>
                {!isDeleting && (
                  <DropdownMenuItemWithTooltip
                    onClick={() =>
                      handleStatusUpdate(
                        statusIsNotCurrentlyActive(ccPair.status)
                          ? ConnectorCredentialPairStatus.ACTIVE
                          : ConnectorCredentialPairStatus.PAUSED
                      )
                    }
                    disabled={isStatusUpdating}
                    className="flex items-center gap-x-2 cursor-pointer px-3 py-2"
                    tooltip={
                      isStatusUpdating ? "Status update in progress" : undefined
                    }
                  >
                    {statusIsNotCurrentlyActive(ccPair.status) ? (
                      <PlayIcon className="h-4 w-4" />
                    ) : (
                      <PauseIcon className="h-4 w-4" />
                    )}
                    <span>
                      {statusIsNotCurrentlyActive(ccPair.status)
                        ? "Resume"
                        : "Pause"}
                    </span>
                  </DropdownMenuItemWithTooltip>
                )}
                {!isDeleting && (
                  <DropdownMenuItemWithTooltip
                    onClick={() => {
                      setShowDeleteConnectorConfirmModal(true);
                    }}
                    disabled={!statusIsNotCurrentlyActive(ccPair.status)}
                    className="flex items-center gap-x-2 cursor-pointer px-3 py-2 text-red-600 hover:text-red-700 dark:text-red-400 dark:hover:text-red-300"
                    tooltip={
                      !statusIsNotCurrentlyActive(ccPair.status)
                        ? "Pause the connector before deleting"
                        : undefined
                    }
                  >
                    <Trash2Icon className="h-4 w-4" />
                    <span>Delete</span>
                  </DropdownMenuItemWithTooltip>
                )}
              </DropdownMenuContent>
            </DropdownMenu>
          )}
        </div>
      </div>

      {ccPair.deletion_failure_message &&
        ccPair.status === ConnectorCredentialPairStatus.DELETING && (
          <>
            <div className="mt-6" />
            <DeletionErrorStatus
              deletion_failure_message={ccPair.deletion_failure_message}
            />
          </>
        )}

      {ccPair.status === ConnectorCredentialPairStatus.INVALID && (
        <div className="mt-6">
          <Callout type="warning" title="Invalid Connector State">
            This connector is in an invalid state. Please update your
            credentials or create a new connector before re-indexing.
          </Callout>
        </div>
      )}

      {indexAttemptErrors && indexAttemptErrors.total_items > 0 && (
        <Alert className="border-alert bg-yellow-50 dark:bg-yellow-800 my-2 mt-6">
          <AlertCircle className="h-4 w-4 text-yellow-700 dark:text-yellow-500" />
          <AlertTitle className="text-yellow-950 dark:text-yellow-200 font-semibold">
            Some documents failed to index
          </AlertTitle>
          <AlertDescription className="text-yellow-900 dark:text-yellow-300">
            {isResolvingErrors ? (
              <span>
                <span className="text-sm text-yellow-700 dark:text-yellow-400 da animate-pulse">
                  Resolving failures
                </span>
              </span>
            ) : (
              <>
                We ran into some issues while processing some documents.{" "}
                <b
                  className="text-link cursor-pointer dark:text-blue-300"
                  onClick={() => setShowIndexAttemptErrors(true)}
                >
                  View details.
                </b>
              </>
            )}
          </AlertDescription>
        </Alert>
      )}

      <Title className="mb-2 mt-6" size="md">
        Indexing
      </Title>

      <Card className="px-8 py-12">
        <div className="flex">
          <div className="w-[200px]">
            <div className="text-sm font-medium mb-1">Status</div>
            <CCPairStatus
              ccPairStatus={ccPair.status}
              inRepeatedErrorState={ccPair.in_repeated_error_state}
              lastIndexAttemptStatus={latestIndexAttempt?.status}
            />
          </div>

          <div className="w-[200px]">
            <div className="text-sm font-medium mb-1">Documents Indexed</div>
            <div className="text-sm text-text-default flex items-center gap-x-1">
              {ccPair.num_docs_indexed.toLocaleString()}
              {ccPair.status ===
                ConnectorCredentialPairStatus.INITIAL_INDEXING &&
                ccPair.overall_indexing_speed !== null &&
                ccPair.num_docs_indexed > 0 && (
                  <div className="ml-0.5 text-xs font-medium">
                    ({ccPair.overall_indexing_speed.toFixed(1)} docs / min)
                  </div>
                )}
            </div>
          </div>

          <div className="w-[200px]">
            <div className="text-sm font-medium mb-1">Last Indexed</div>
            <div className="text-sm text-text-default">
              {timeAgo(ccPair?.last_indexed) ?? "-"}
            </div>
          </div>

          {ccPair.access_type === "sync" && (
            <>
              <div className="w-[200px]">
                {/* TODO: Remove className and switch to text03 once Text is fully integrated across this page */}
                <Text as="p" className="text-sm font-medium mb-1">
                  Permission Syncing
                </Text>
                {ccPair.permission_syncing ||
                ccPair.last_permission_sync_attempt_status ? (
                  <PermissionSyncStatus
                    status={ccPair.last_permission_sync_attempt_status}
                    errorMsg={ccPair.last_permission_sync_attempt_error_message}
                  />
                ) : (
                  <PermissionSyncStatus status={null} />
                )}
              </div>

              <div className="w-[200px]">
                {/* TODO: Remove className and switch to text03 once Text is fully integrated across this page */}
                <Text as="p" className="text-sm font-medium mb-1">
                  Last Synced
                </Text>
                <Text as="p" className="text-sm text-text-default">
                  {ccPair.last_permission_sync_attempt_finished
                    ? timeAgo(ccPair.last_permission_sync_attempt_finished)
                    : timeAgo(ccPair.last_full_permission_sync) ?? "-"}
                </Text>
              </div>
            </>
          )}
        </div>
      </Card>

      {credentialTemplates[ccPair.connector.source] &&
        ccPair.is_editable_for_current_user && (
          <>
            <Title size="md" className="mt-10 mb-2">
              Credential
            </Title>

            <div className="mt-2">
              <CredentialSection
                ccPair={ccPair}
                sourceType={ccPair.connector.source}
                refresh={() => refresh()}
              />
            </div>
          </>
        )}

      {ccPair.connector.connector_specific_config &&
        Object.keys(ccPair.connector.connector_specific_config).length > 0 && (
          <>
            <Title size="md" className="mt-10 mb-2">
              Connector Configuration
            </Title>

            <Card className="px-8 py-4">
              <ConfigDisplay
                configEntries={buildConfigEntries(
                  ccPair.connector.connector_specific_config,
                  ccPair.connector.source
                )}
              />

              {/* Inline file management for file connectors */}
              {canManageInlineFileConnectorFiles && (
                <div className="mt-6">
                  <InlineFileManagement
                    connectorId={ccPair.connector.id}
                    onRefresh={refresh}
                  />
                </div>
              )}
            </Card>
          </>
        )}

      <div className="mt-6">
        <div className="flex">
          <AdvancedOptionsToggle
            showAdvancedOptions={showAdvancedOptions}
            setShowAdvancedOptions={setShowAdvancedOptions}
            title="Advanced"
          />
        </div>
        {showAdvancedOptions && (
          <div className="pb-16">
            {(pruneFreq || indexingStart || refreshFreq) && (
              <>
                <Title size="md" className="mt-3 mb-2">
                  Advanced Configuration
                </Title>
                <Card className="px-8 py-4">
                  <div>
                    <AdvancedConfigDisplay
                      pruneFreq={pruneFreq}
                      indexingStart={indexingStart}
                      refreshFreq={refreshFreq}
                      onRefreshEdit={handleRefreshEdit}
                      onPruningEdit={handlePruningEdit}
                    />
                  </div>
                </Card>
              </>
            )}

            <Title size="md" className="mt-6 mb-2">
              Indexing Attempts
            </Title>
            {indexAttempts && (
              <IndexAttemptsTable
                ccPair={ccPair}
                indexAttempts={indexAttempts}
                currentPage={currentPage}
                totalPages={totalPages}
                onPageChange={goToPage}
              />
            )}
          </div>
        )}
      </div>
    </>
  );
}

export default function Page(props: { params: Promise<{ ccPairId: string }> }) {
  const params = use(props.params);
  const ccPairId = parseInt(params.ccPairId);

  return (
    <div className="mx-auto w-[800px]">
      <Main ccPairId={ccPairId} />
    </div>
  );
}


================================================
FILE: web/src/app/admin/connector/[ccPairId]/types.ts
================================================
import { Connector } from "@/lib/connectors/connectors";
import { Credential } from "@/lib/connectors/credentials";
import {
  DeletionAttemptSnapshot,
  IndexAttemptSnapshot,
  ValidStatuses,
  AccessType,
} from "@/lib/types";
import { UUID } from "crypto";

export enum ConnectorCredentialPairStatus {
  SCHEDULED = "SCHEDULED",
  INITIAL_INDEXING = "INITIAL_INDEXING",
  ACTIVE = "ACTIVE",
  PAUSED = "PAUSED",
  DELETING = "DELETING",
  INVALID = "INVALID",
}

export enum PermissionSyncStatusEnum {
  CANCELED = "canceled",
  COMPLETED_WITH_ERRORS = "completed_with_errors",
  FAILED = "failed",
  IN_PROGRESS = "in_progress",
  NOT_STARTED = "not_started",
  SUCCESS = "success",
}

/**
 * Returns true if the status is not currently active (i.e. paused or invalid), but not deleting
 */
export function statusIsNotCurrentlyActive(
  status: ConnectorCredentialPairStatus
): boolean {
  return (
    status === ConnectorCredentialPairStatus.PAUSED ||
    status === ConnectorCredentialPairStatus.INVALID
  );
}

export interface CCPairFullInfo {
  id: number;
  name: string;
  status: ConnectorCredentialPairStatus;
  in_repeated_error_state: boolean;
  num_docs_indexed: number;
  connector: Connector<any>;
  credential: Credential<any>;
  number_of_index_attempts: number;
  last_index_attempt_status: ValidStatuses | null;
  latest_deletion_attempt: DeletionAttemptSnapshot | null;
  access_type: AccessType;
  is_editable_for_current_user: boolean;
  deletion_failure_message: string | null;
  indexing: boolean;
  creator: UUID | null;
  creator_email: string | null;

  last_indexed: string | null;
  last_pruned: string | null;
  last_full_permission_sync: string | null;
  overall_indexing_speed: number | null;
  latest_checkpoint_description: string | null;

  // permission sync attempt status
  last_permission_sync_attempt_status: PermissionSyncStatusEnum | null;
  permission_syncing: boolean;
  last_permission_sync_attempt_finished: string | null;
  last_permission_sync_attempt_error_message: string | null;
}

export interface PaginatedIndexAttempts {
  index_attempts: IndexAttemptSnapshot[];
  page: number;
  total_pages: number;
}

export interface IndexAttemptError {
  id: number;
  connector_credential_pair_id: number;

  document_id: string | null;
  document_link: string | null;

  entity_id: string | null;
  failed_time_range_start: string | null;
  failed_time_range_end: string | null;

  failure_message: string;
  is_resolved: boolean;

  time_created: string;

  index_attempt_id: number;
}

export interface PaginatedIndexAttemptErrors {
  items: IndexAttemptError[];
  total_items: number;
}


================================================
FILE: web/src/app/admin/connector/[ccPairId]/useStatusChange.tsx
================================================
"use client";

import { CCPairFullInfo, ConnectorCredentialPairStatus } from "./types";
import { mutate } from "swr";
import { buildCCPairInfoUrl } from "./lib";
import { setCCPairStatus } from "@/lib/ccPair";
import { useState } from "react";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";

// Export the status change functionality separately
export function useStatusChange(ccPair: CCPairFullInfo | null) {
  const [isUpdating, setIsUpdating] = useState(false);
  const [showConfirmModal, setShowConfirmModal] = useState(false);

  const updateStatus = async (newStatus: ConnectorCredentialPairStatus) => {
    if (!ccPair) return false;

    setIsUpdating(true);

    try {
      // Call the backend to update the status
      await setCCPairStatus(ccPair.id, newStatus);

      // Use mutate to revalidate the status on the backend
      await mutate(buildCCPairInfoUrl(ccPair.id));
    } catch (error) {
      console.error("Failed to update status", error);
    } finally {
      // Reset local updating state and button text after mutation
      setIsUpdating(false);
    }

    return true;
  };

  const handleStatusChange = async (
    newStatus: ConnectorCredentialPairStatus
  ) => {
    if (isUpdating || !ccPair) return false; // Prevent double-clicks or multiple requests

    if (
      ccPair.status === ConnectorCredentialPairStatus.INVALID &&
      newStatus === ConnectorCredentialPairStatus.ACTIVE
    ) {
      setShowConfirmModal(true);
      return false;
    } else {
      return await updateStatus(newStatus);
    }
  };

  const ConfirmModal =
    showConfirmModal && ccPair ? (
      <ConfirmEntityModal
        entityType="Invalid Connector"
        entityName={ccPair.name}
        onClose={() => setShowConfirmModal(false)}
        onSubmit={() => {
          setShowConfirmModal(false);
          updateStatus(ConnectorCredentialPairStatus.ACTIVE);
        }}
        additionalDetails="This connector was previously marked as invalid. Please verify that your configuration is correct before re-enabling. Are you sure you want to proceed?"
        actionButtonText="Re-Enable"
      />
    ) : null;

  return {
    handleStatusChange,
    isUpdating,
    ConfirmModal,
  };
}


================================================
FILE: web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx
================================================
"use client";

import { errorHandlingFetcher } from "@/lib/fetcher";
import useSWR, { mutate } from "swr";
import { AdminPageTitle } from "@/components/admin/Title";
import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib";
import { toast } from "@/hooks/useToast";
import { useFormContext } from "@/components/context/FormContext";
import { getSourceDisplayName, getSourceMetadata } from "@/lib/sources";
import { SourceIcon } from "@/components/SourceIcon";
import { useEffect, useRef, useState } from "react";
import { deleteCredential, linkCredential } from "@/lib/credential";
import { submitFiles } from "@/app/admin/connectors/[connector]/pages/utils/files";
import { submitGoogleSite } from "@/app/admin/connectors/[connector]/pages/utils/google_site";
import AdvancedFormPage from "@/app/admin/connectors/[connector]/pages/Advanced";
import DynamicConnectionForm from "@/app/admin/connectors/[connector]/pages/DynamicConnectorCreationForm";
import CreateCredential from "@/components/credentials/actions/CreateCredential";
import ModifyCredential from "@/components/credentials/actions/ModifyCredential";
import {
  ConfigurableSources,
  oauthSupportedSources,
  ValidSources,
} from "@/lib/types";
import { Credential, credentialTemplates } from "@/lib/connectors/credentials";
import {
  ConnectionConfiguration,
  connectorConfigs,
  createConnectorInitialValues,
  createConnectorValidationSchema,
  defaultPruneFreqHours,
  defaultRefreshFreqMinutes,
  isLoadState,
  Connector,
  ConnectorBase,
} from "@/lib/connectors/connectors";
import Modal from "@/refresh-components/Modal";
import { GmailMain } from "@/app/admin/connectors/[connector]/pages/gmail/GmailPage";
import {
  useGmailCredentials,
  useGoogleDriveCredentials,
} from "@/app/admin/connectors/[connector]/pages/utils/hooks";
import { Formik } from "formik";
import NavigationRow from "@/app/admin/connectors/[connector]/NavigationRow";
import { useRouter } from "next/navigation";
import CardSection from "@/components/admin/CardSection";
import { prepareOAuthAuthorizationRequest } from "@/lib/oauth_utils";
import {
  EE_ENABLED,
  NEXT_PUBLIC_CLOUD_ENABLED,
  NEXT_PUBLIC_TEST_ENV,
} from "@/lib/constants";
import {
  getConnectorOauthRedirectUrl,
  useOAuthDetails,
} from "@/lib/connectors/oauth";
import { CreateStdOAuthCredential } from "@/components/credentials/actions/CreateStdOAuthCredential";
import { Spinner } from "@/components/Spinner";
import { Button } from "@opal/components";
import { deleteConnector } from "@/lib/connector";
import ConnectorDocsLink from "@/components/admin/connectors/ConnectorDocsLink";
import Text from "@/refresh-components/texts/Text";
import { SvgKey, SvgAlertCircle } from "@opal/icons";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import Link from "next/link";

export interface AdvancedConfig {
  refreshFreq: number;
  pruneFreq: number;
  indexingStart: string;
}

const BASE_CONNECTOR_URL = "/api/manage/admin/connector";
const CONNECTOR_CREATION_TIMEOUT_MS = 10000; // ~10 seconds is reasonable for longer connector validation

export async function submitConnector<T>(
  connector: ConnectorBase<T>,
  connectorId?: number,
  fakeCredential?: boolean
): Promise<{ message: string; isSuccess: boolean; response?: Connector<T> }> {
  const isUpdate = connectorId !== undefined;
  if (!connector.connector_specific_config) {
    connector.connector_specific_config = {} as T;
  }

  try {
    if (fakeCredential) {
      const response = await fetch(
        "/api/manage/admin/connector-with-mock-credential",
        {
          method: isUpdate ? "PATCH" : "POST",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify({ ...connector }),
        }
      );
      if (response.ok) {
        const responseJson = await response.json();
        return { message: "Success!", isSuccess: true, response: responseJson };
      } else {
        const errorData = await response.json();
        return { message: `Error: ${errorData.detail}`, isSuccess: false };
      }
    } else {
      const response = await fetch(
        BASE_CONNECTOR_URL + (isUpdate ? `/${connectorId}` : ""),
        {
          method: isUpdate ? "PATCH" : "POST",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify(connector),
        }
      );

      if (response.ok) {
        const responseJson = await response.json();
        return { message: "Success!", isSuccess: true, response: responseJson };
      } else {
        const errorData = await response.json();
        return { message: `Error: ${errorData.detail}`, isSuccess: false };
      }
    }
  } catch (error) {
    return { message: `Error: ${error}`, isSuccess: false };
  }
}

export default function AddConnector({
  connector,
}: {
  connector: ConfigurableSources;
}) {
  const [currentPageUrl, setCurrentPageUrl] = useState<string | null>(null);
  const [oauthUrl, setOauthUrl] = useState<string | null>(null);
  const [isAuthorizing, setIsAuthorizing] = useState(false);
  const [isAuthorizeVisible, setIsAuthorizeVisible] = useState(false);
  useEffect(() => {
    if (typeof window !== "undefined") {
      setCurrentPageUrl(window.location.href);
    }

    if (EE_ENABLED && (NEXT_PUBLIC_CLOUD_ENABLED || NEXT_PUBLIC_TEST_ENV)) {
      const sourceMetadata = getSourceMetadata(connector);
      if (sourceMetadata?.oauthSupported == true) {
        setIsAuthorizeVisible(true);
      }
    }
  }, []);

  const router = useRouter();

  // State for managing credentials and files
  const [currentCredential, setCurrentCredential] =
    useState<Credential<any> | null>(null);
  const [createCredentialFormToggle, setCreateCredentialFormToggle] =
    useState(false);

  // Fetch credentials data
  const { data: credentials } = useSWR<Credential<any>[]>(
    buildSimilarCredentialInfoURL(connector),
    errorHandlingFetcher,
    { refreshInterval: 5000 }
  );

  const { data: editableCredentials } = useSWR<Credential<any>[]>(
    buildSimilarCredentialInfoURL(connector, true),
    errorHandlingFetcher,
    { refreshInterval: 5000 }
  );

  const { data: oauthDetails, isLoading: oauthDetailsLoading } =
    useOAuthDetails(connector);

  // Get credential template and configuration
  const credentialTemplate = credentialTemplates[connector];
  const configuration: ConnectionConfiguration = connectorConfigs[connector];

  // Form context and popup management
  const { setFormStep, setAllowCreate, formStep } = useFormContext();
  const [uploading, setUploading] = useState(false);
  const [creatingConnector, setCreatingConnector] = useState(false);

  // Connector creation timeout management
  const timeoutErrorHappenedRef = useRef<boolean>(false);
  const connectorIdRef = useRef<number | null>(null);

  useEffect(() => {
    return () => {
      // Cleanup refs when component unmounts
      timeoutErrorHappenedRef.current = false;
      connectorIdRef.current = null;
    };
  }, []);

  // Hooks for Google Drive and Gmail credentials
  const { liveGDriveCredential } = useGoogleDriveCredentials(connector);
  const { liveGmailCredential } = useGmailCredentials(connector);

  // Check if credential is activated
  const credentialActivated =
    (connector === "google_drive" && liveGDriveCredential) ||
    (connector === "gmail" && liveGmailCredential) ||
    currentCredential;

  // Check if there are no credentials
  const noCredentials = credentialTemplate == null;

  useEffect(() => {
    if (noCredentials && 1 != formStep) {
      setFormStep(Math.max(1, formStep));
    }

    if (!noCredentials && !credentialActivated && formStep != 0) {
      setFormStep(Math.min(formStep, 0));
    }
  }, [noCredentials, formStep, setFormStep]);

  const convertStringToDateTime = (indexingStart: string | null) => {
    return indexingStart ? new Date(indexingStart) : null;
  };

  const displayName = getSourceDisplayName(connector) || connector;
  const sourceMetadata = getSourceMetadata(connector);
  const hasFederatedOption = sourceMetadata.federated === true;

  if (!credentials || !editableCredentials) {
    return <></>;
  }

  // Credential handler functions
  const refresh = () => {
    mutate(buildSimilarCredentialInfoURL(connector));
  };

  const onDeleteCredential = async (credential: Credential<any | null>) => {
    const response = await deleteCredential(credential.id, true);
    if (response.ok) {
      toast.success("Credential deleted successfully!");
    } else {
      const errorData = await response.json();
      toast.error(errorData.detail || errorData.message);
    }
  };

  const onSwap = async (selectedCredential: Credential<any>) => {
    setCurrentCredential(selectedCredential);
    setAllowCreate(true);
    toast.success("Swapped credential successfully!");
    refresh();
  };

  const onSuccess = () => {
    router.push("/admin/indexing/status?message=connector-created");
  };

  const handleAuthorize = async () => {
    // authorize button handler
    // gets an auth url from the server and directs the user to it in a popup

    if (!currentPageUrl) return;

    setIsAuthorizing(true);
    try {
      const response = await prepareOAuthAuthorizationRequest(
        connector,
        currentPageUrl
      );
      if (response.url) {
        setOauthUrl(response.url);
        window.open(response.url, "_blank", "noopener,noreferrer");
      } else {
        toast.error("Failed to fetch OAuth URL");
      }
    } catch (error: unknown) {
      // Narrow the type of error
      if (error instanceof Error) {
        toast.error(`Error: ${error.message}`);
      } else {
        // Handle non-standard errors
        toast.error("An unknown error occurred");
      }
    } finally {
      setIsAuthorizing(false);
    }
  };

  return (
    <Formik
      initialValues={createConnectorInitialValues(connector)}
      validationSchema={createConnectorValidationSchema(connector)}
      onSubmit={async (values) => {
        const {
          name,
          groups,
          access_type,
          pruneFreq,
          indexingStart,
          refreshFreq,
          auto_sync_options,
          ...connector_specific_config
        } = values;

        // Apply special transforms according to application logic
        const transformedConnectorSpecificConfig = Object.entries(
          connector_specific_config
        ).reduce(
          (acc, [key, value]) => {
            // Filter out empty strings from arrays
            if (Array.isArray(value)) {
              value = (value as any[]).filter(
                (item) => typeof item !== "string" || item.trim() !== ""
              );
            }
            const matchingConfigValue = configuration.values.find(
              (configValue) => configValue.name === key
            );
            if (
              matchingConfigValue &&
              "transform" in matchingConfigValue &&
              matchingConfigValue.transform
            ) {
              acc[key] = matchingConfigValue.transform(value as string[]);
            } else {
              acc[key] = value;
            }
            return acc;
          },
          {} as Record<string, any>
        );

        // Apply advanced configuration-specific transforms.
        const advancedConfiguration: any = {
          pruneFreq: (pruneFreq ?? defaultPruneFreqHours) * 3600,
          indexingStart: convertStringToDateTime(indexingStart),
          refreshFreq: (refreshFreq ?? defaultRefreshFreqMinutes) * 60,
        };

        // File-specific handling
        const selectedFiles = Array.isArray(values.file_locations)
          ? values.file_locations
          : values.file_locations
            ? [values.file_locations]
            : [];

        // Google sites-specific handling
        if (connector == "google_sites") {
          const response = await submitGoogleSite(
            selectedFiles,
            values?.base_url,
            advancedConfiguration.refreshFreq,
            advancedConfiguration.pruneFreq,
            advancedConfiguration.indexingStart,
            values.access_type,
            groups,
            name
          );
          if (response) {
            onSuccess();
          }
          return;
        }
        // File-specific handling
        if (connector == "file") {
          setUploading(true);
          try {
            const response = await submitFiles(
              selectedFiles,
              name,
              access_type,
              groups
            );
            if (response) {
              onSuccess();
            }
          } catch (error) {
            toast.error("Error uploading files");
          } finally {
            setUploading(false);
          }

          return;
        }

        setCreatingConnector(true);
        try {
          const timeoutPromise = new Promise<{ isTimeout: true }>((resolve) =>
            setTimeout(
              () => resolve({ isTimeout: true }),
              CONNECTOR_CREATION_TIMEOUT_MS
            )
          );

          const connectorCreationPromise = (async () => {
            const { message, isSuccess, response } = await submitConnector<any>(
              {
                connector_specific_config: transformedConnectorSpecificConfig,
                input_type: isLoadState(connector) ? "load_state" : "poll", // single case
                name: name,
                source: connector,
                access_type: access_type,
                refresh_freq: advancedConfiguration.refreshFreq || null,
                prune_freq: advancedConfiguration.pruneFreq || null,
                indexing_start: advancedConfiguration.indexingStart || null,
                groups: groups,
              },
              undefined,
              credentialActivated ? false : true
            );

            // Store the connector id immediately for potential timeout
            if (response?.id) {
              connectorIdRef.current = response.id;
            }

            // If no credential
            if (!credentialActivated) {
              if (isSuccess) {
                onSuccess();
              } else {
                toast.error(message);
              }
            }

            // With credential
            if (credentialActivated && isSuccess && response) {
              const credential =
                currentCredential ||
                liveGDriveCredential ||
                liveGmailCredential;
              const linkCredentialResponse = await linkCredential(
                response.id,
                credential?.id!,
                name,
                access_type,
                groups,
                auto_sync_options
              );
              if (linkCredentialResponse.ok) {
                onSuccess();
              } else {
                const errorData = await linkCredentialResponse.json();

                if (!timeoutErrorHappenedRef.current) {
                  // Only show error if timeout didn't happen
                  toast.error(errorData.detail || errorData.message);
                }
              }
            } else if (isSuccess) {
              onSuccess();
            } else {
              toast.error(message);
            }

            timeoutErrorHappenedRef.current = false;
            return;
          })();

          const result = (await Promise.race([
            connectorCreationPromise,
            timeoutPromise,
          ])) as {
            isTimeout?: true;
          };

          if (result.isTimeout) {
            timeoutErrorHappenedRef.current = true;
            toast.error(
              `Operation timed out after ${
                CONNECTOR_CREATION_TIMEOUT_MS / 1000
              } seconds. Check your configuration for errors?`
            );

            if (connectorIdRef.current) {
              await deleteConnector(connectorIdRef.current);
              connectorIdRef.current = null;
            }
          }
          return;
        } finally {
          setCreatingConnector(false);
        }
      }}
    >
      {(formikProps) => (
        <div className="mx-auto w-full">
          {uploading && <Spinner />}

          {creatingConnector && <Spinner />}

          <AdminPageTitle
            includeDivider={false}
            icon={<SourceIcon iconSize={32} sourceType={connector} />}
            title={
              hasFederatedOption ? (
                <span className="inline-flex items-center gap-1.5">
                  {displayName}
                  <SimpleTooltip
                    tooltip={
                      <div className="flex flex-col gap-2">
                        <Text as="p" textLight05>
                          A federated search option is available for this
                          connector. It will result in greater latency and
                          reduced search quality.
                        </Text>
                        <Link
                          href={`/admin/connectors/${connector}?mode=federated`}
                          className="text-action-link-04 hover:underline text-sm"
                        >
                          Use federated version instead →
                        </Link>
                      </div>
                    }
                    side="bottom"
                    delayDuration={0}
                  >
                    <SvgAlertCircle size={20} />
                  </SimpleTooltip>
                </span>
              ) : (
                displayName
              )
            }
            farRightElement={undefined}
          />

          {formStep == 0 && (
            <CardSection>
              <Text as="p" headingH3 className="pb-2">
                Select a credential
              </Text>

              {connector == ValidSources.Gmail ? (
                <GmailMain />
              ) : (
                <>
                  <ModifyCredential
                    showIfEmpty
                    accessType={formikProps.values.access_type}
                    defaultedCredential={currentCredential!}
                    credentials={credentials}
                    editableCredentials={editableCredentials}
                    onDeleteCredential={onDeleteCredential}
                    onSwitch={onSwap}
                  />
                  {!createCredentialFormToggle && (
                    <div className="mt-6 flex gap-4">
                      {/* Button to pop up a form to manually enter credentials */}
                      <Button
                        onClick={async () => {
                          if (oauthDetails && oauthDetails.oauth_enabled) {
                            if (oauthDetails.additional_kwargs.length > 0) {
                              setCreateCredentialFormToggle(true);
                            } else {
                              const redirectUrl =
                                await getConnectorOauthRedirectUrl(
                                  connector,
                                  {}
                                );
                              // if redirect is supported, just use it
                              if (redirectUrl) {
                                window.location.href = redirectUrl;
                              } else {
                                setCreateCredentialFormToggle(
                                  (createConnectorToggle) =>
                                    !createConnectorToggle
                                );
                              }
                            }
                          } else {
                            setCreateCredentialFormToggle(
                              (createConnectorToggle) => !createConnectorToggle
                            );
                          }
                        }}
                      >
                        Create New
                      </Button>
                      {/* Button to sign in via OAuth */}
                      {oauthSupportedSources.includes(connector) &&
                        (NEXT_PUBLIC_CLOUD_ENABLED || NEXT_PUBLIC_TEST_ENV) && (
                          <Button
                            disabled={isAuthorizing}
                            variant="action"
                            onClick={handleAuthorize}
                            hidden={!isAuthorizeVisible}
                          >
                            {isAuthorizing
                              ? "Authorizing..."
                              : `Authorize with ${getSourceDisplayName(
                                  connector
                                )}`}
                          </Button>
                        )}
                    </div>
                  )}

                  {createCredentialFormToggle && (
                    <Modal
                      open
                      onOpenChange={() => setCreateCredentialFormToggle(false)}
                    >
                      <Modal.Content>
                        <Modal.Header
                          icon={SvgKey}
                          title={`Create a ${getSourceDisplayName(
                            connector
                          )} credential`}
                          onClose={() => setCreateCredentialFormToggle(false)}
                        />
                        <Modal.Body>
                          {oauthDetailsLoading ? (
                            <Spinner />
                          ) : (
                            <>
                              {oauthDetails && oauthDetails.oauth_enabled ? (
                                <CreateStdOAuthCredential
                                  sourceType={connector}
                                  additionalFields={
                                    oauthDetails.additional_kwargs
                                  }
                                />
                              ) : (
                                <CreateCredential
                                  close
                                  refresh={refresh}
                                  sourceType={connector}
                                  accessType={formikProps.values.access_type}
                                  onSwitch={onSwap}
                                  onClose={() =>
                                    setCreateCredentialFormToggle(false)
                                  }
                                />
                              )}
                            </>
                          )}
                        </Modal.Body>
                      </Modal.Content>
                    </Modal>
                  )}
                </>
              )}
            </CardSection>
          )}

          {formStep == 1 && (
            <CardSection className="w-full py-8 flex gap-y-6 flex-col max-w-3xl px-12 mx-auto">
              <DynamicConnectionForm
                values={formikProps.values}
                config={configuration}
                connector={connector}
                currentCredential={
                  currentCredential ||
                  liveGDriveCredential ||
                  liveGmailCredential ||
                  null
                }
              />
              <ConnectorDocsLink sourceType={connector} />
            </CardSection>
          )}

          {formStep === 2 && (
            <CardSection>
              <AdvancedFormPage />
            </CardSection>
          )}

          <NavigationRow
            activatedCredential={credentialActivated != null}
            isValid={formikProps.isValid}
            onSubmit={formikProps.handleSubmit}
            noCredentials={noCredentials}
            noAdvanced={connector == "file"}
          />
        </div>
      )}
    </Formik>
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/ConnectorWrapper.tsx
================================================
"use client";

import {
  ConfigurableSources,
  FederatedConnectorDetail,
  federatedSourceToRegularSource,
  ValidSources,
} from "@/lib/types";
import AddConnector from "./AddConnectorPage";
import { FormProvider } from "@/components/context/FormContext";
import Sidebar from "../../../../sections/sidebar/CreateConnectorSidebar";
import { HeaderTitle } from "@/components/header/HeaderTitle";
import Button from "@/refresh-components/buttons/Button";
import { isValidSource, getSourceMetadata } from "@/lib/sources";
import { FederatedConnectorForm } from "@/components/admin/federated/FederatedConnectorForm";
import { useSearchParams } from "next/navigation";
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib";
import { Credential } from "@/lib/connectors/credentials";
import { useFederatedConnectors } from "@/lib/hooks";
import Text from "@/refresh-components/texts/Text";
import { useToastFromQuery } from "@/hooks/useToast";

export default function ConnectorWrapper({
  connector,
}: {
  connector: ConfigurableSources;
}) {
  const searchParams = useSearchParams();
  const mode = searchParams?.get("mode"); // 'federated' or 'regular'

  useToastFromQuery({
    oauth_failed: {
      message: "OAuth authentication failed. Please try again.",
      type: "error",
    },
  });

  // Check if the connector is valid
  if (!isValidSource(connector)) {
    return (
      <FormProvider connector={connector}>
        <div className="flex justify-center w-full h-full">
          <Sidebar />
          <div className="mt-12 w-full max-w-3xl mx-auto">
            <div className="mx-auto flex flex-col gap-y-2">
              <HeaderTitle>
                <p>&lsquo;{connector}&rsquo; is not a valid Connector Type!</p>
              </HeaderTitle>
              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
              <Button
                onClick={() => window.open("/admin/indexing/status", "_self")}
                className="mr-auto"
              >
                {" "}
                Go home{" "}
              </Button>
            </div>
          </div>
        </div>
      </FormProvider>
    );
  }

  const sourceMetadata = getSourceMetadata(connector);
  const supportsFederated = sourceMetadata.federated === true;

  // Only show federated form if explicitly requested via URL parameter
  const showFederatedForm = mode === "federated" && supportsFederated;

  // For federated form, use the specialized form without FormProvider
  if (showFederatedForm) {
    return (
      <div className="flex justify-center w-full h-full">
        <div className="mt-12 w-full max-w-4xl mx-auto">
          <FederatedConnectorForm connector={connector} />
        </div>
      </div>
    );
  }

  // For regular connectors, use the existing flow
  return (
    <FormProvider connector={connector}>
      <div className="flex justify-center w-full h-full">
        <Sidebar />
        <div className="mt-12 w-full max-w-3xl mx-auto">
          <AddConnector connector={connector} />
        </div>
      </div>
    </FormProvider>
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/NavigationRow.tsx
================================================
import { useFormContext } from "@/components/context/FormContext";
import { Button } from "@opal/components";
import { SvgArrowLeft, SvgArrowRight, SvgPlusCircle } from "@opal/icons";

const NavigationRow = ({
  noAdvanced,
  noCredentials,
  activatedCredential,
  onSubmit,
  isValid,
}: {
  isValid: boolean;
  onSubmit: () => void;
  noAdvanced: boolean;
  noCredentials: boolean;
  activatedCredential: boolean;
}) => {
  const { formStep, prevFormStep, nextFormStep } = useFormContext();

  return (
    <div className="mt-4 w-full grid grid-cols-3">
      <div>
        {((formStep > 0 && !noCredentials) ||
          (formStep > 1 && !noAdvanced)) && (
          <Button
            prominence="secondary"
            onClick={prevFormStep}
            icon={SvgArrowLeft}
          >
            Previous
          </Button>
        )}
      </div>
      <div className="flex justify-center">
        {(formStep > 0 || noCredentials) && (
          <Button
            disabled={!isValid}
            rightIcon={SvgPlusCircle}
            onClick={onSubmit}
          >
            Create Connector
          </Button>
        )}
      </div>
      <div className="flex justify-end">
        {formStep === 0 && (
          <Button
            disabled={!activatedCredential}
            variant="action"
            rightIcon={SvgArrowRight}
            onClick={() => nextFormStep()}
          >
            Continue
          </Button>
        )}
        {!noAdvanced && formStep === 1 && (
          <Button
            disabled={!isValid}
            prominence="secondary"
            rightIcon={SvgArrowRight}
            onClick={() => nextFormStep()}
          >
            Advanced
          </Button>
        )}
      </div>
    </div>
  );
};
export default NavigationRow;


================================================
FILE: web/src/app/admin/connectors/[connector]/auth/callback/route.ts
================================================
import { getDomain } from "@/lib/redirectSS";
import { buildUrl } from "@/lib/utilsSS";
import { NextRequest, NextResponse } from "next/server";
import { cookies } from "next/headers";
import {
  CRAFT_OAUTH_COOKIE_NAME,
  CRAFT_CONFIGURE_PATH,
} from "@/app/craft/v1/constants";
import { processCookies } from "@/lib/userSS";

export const GET = async (request: NextRequest) => {
  const requestCookies = await cookies();
  const connector = request.url.includes("gmail") ? "gmail" : "google-drive";

  const callbackEndpoint = `/manage/connector/${connector}/callback`;
  const url = new URL(buildUrl(callbackEndpoint));
  url.search = request.nextUrl.search;

  const response = await fetch(url.toString(), {
    headers: {
      cookie: processCookies(requestCookies),
    },
  });

  if (!response.ok) {
    return NextResponse.redirect(
      new URL(
        `/admin/connectors/${connector}?message=oauth_failed`,
        getDomain(request)
      )
    );
  }

  // Check for build mode OAuth flag (redirects to build admin panel)
  const isBuildMode =
    requestCookies.get(CRAFT_OAUTH_COOKIE_NAME)?.value === "true";
  if (isBuildMode) {
    const redirectResponse = NextResponse.redirect(
      new URL(CRAFT_CONFIGURE_PATH, getDomain(request))
    );
    redirectResponse.cookies.delete(CRAFT_OAUTH_COOKIE_NAME);
    return redirectResponse;
  }

  return NextResponse.redirect(
    new URL(`/admin/connectors/${connector}`, getDomain(request))
  );
};


================================================
FILE: web/src/app/admin/connectors/[connector]/oauth/callback/page.tsx
================================================
"use client";

import { useEffect, useState } from "react";
import { usePathname, useSearchParams } from "next/navigation";
import { AdminPageTitle } from "@/components/admin/Title";
import { getSourceMetadata, isValidSource } from "@/lib/sources";
import { ValidSources } from "@/lib/types";
import CardSection from "@/components/admin/CardSection";
import { handleOAuthAuthorizationResponse } from "@/lib/oauth_utils";
import { SvgKey } from "@opal/icons";
export default function OAuthCallbackPage() {
  const searchParams = useSearchParams();

  const [statusMessage, setStatusMessage] = useState("Processing...");
  const [statusDetails, setStatusDetails] = useState(
    "Please wait while we complete the setup."
  );
  const [redirectUrl, setRedirectUrl] = useState<string | null>(null);
  const [isError, setIsError] = useState(false);
  const [pageTitle, setPageTitle] = useState(
    "Authorize with Third-Party service"
  );

  // Extract query parameters
  const code = searchParams?.get("code");
  const state = searchParams?.get("state");

  const pathname = usePathname();
  const connector = pathname?.split("/")[3];

  useEffect(() => {
    const onFirstLoad = async () => {
      // Examples
      // connector (url segment)= "google-drive"
      // sourceType (for looking up metadata) = "google_drive"

      if (!code || !state) {
        setStatusMessage("Improperly formed OAuth authorization request.");
        setStatusDetails(
          !code ? "Missing authorization code." : "Missing state parameter."
        );
        setIsError(true);
        return;
      }

      if (!connector) {
        setStatusMessage(
          `The specified connector source type ${connector} does not exist.`
        );
        setStatusDetails(`${connector} is not a valid source type.`);
        setIsError(true);
        return;
      }

      const sourceType = connector.replaceAll("-", "_");
      if (!isValidSource(sourceType)) {
        setStatusMessage(
          `The specified connector source type ${sourceType} does not exist.`
        );
        setStatusDetails(`${sourceType} is not a valid source type.`);
        setIsError(true);
        return;
      }

      const sourceMetadata = getSourceMetadata(sourceType as ValidSources);
      setPageTitle(`Authorize with ${sourceMetadata.displayName}`);

      setStatusMessage("Processing...");
      setStatusDetails("Please wait while we complete authorization.");
      setIsError(false); // Ensure no error state during loading

      try {
        const response = await handleOAuthAuthorizationResponse(
          connector,
          code,
          state
        );

        if (!response) {
          throw new Error("Empty response from OAuth server.");
        }

        setStatusMessage("Success!");

        // set the continuation link
        if (response.finalize_url) {
          setRedirectUrl(response.finalize_url);
          setStatusDetails(
            `Your authorization with ${sourceMetadata.displayName} completed successfully. Additional steps are required to complete credential setup.`
          );
        } else {
          setRedirectUrl(response.redirect_on_success);
          setStatusDetails(
            `Your authorization with ${sourceMetadata.displayName} completed successfully.`
          );
        }
        setIsError(false);
      } catch (error) {
        console.error("OAuth error:", error);
        setStatusMessage("Oops, something went wrong!");
        setStatusDetails(
          "An error occurred during the OAuth process. Please try again."
        );
        setIsError(true);
      }
    };

    onFirstLoad();
  }, [code, state, connector]);

  return (
    <div className="mx-auto h-screen flex flex-col">
      <AdminPageTitle title={pageTitle} icon={SvgKey} />

      <div className="flex-1 flex flex-col items-center justify-center">
        <CardSection className="max-w-md w-[500px] h-[250px] p-8">
          <h1 className="text-2xl font-bold mb-4">{statusMessage}</h1>
          <p className="text-text-500">{statusDetails}</p>
          {redirectUrl && !isError && (
            <div className="mt-4">
              <p className="text-sm">
                Click{" "}
                <a href={redirectUrl} className="text-blue-500 underline">
                  here
                </a>{" "}
                to continue.
              </p>
            </div>
          )}
        </CardSection>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/oauth/finalize/page.tsx
================================================
"use client";

import { useEffect, useState } from "react";
import { usePathname, useRouter, useSearchParams } from "next/navigation";
import { AdminPageTitle } from "@/components/admin/Title";
import { Button } from "@opal/components";
import { getSourceMetadata, isValidSource } from "@/lib/sources";
import { ConfluenceAccessibleResource, ValidSources } from "@/lib/types";
import CardSection from "@/components/admin/CardSection";
import {
  handleOAuthConfluenceFinalize,
  handleOAuthPrepareFinalization,
} from "@/lib/oauth_utils";
import { SelectorFormField } from "@/components/Field";
import { ErrorMessage, Field, Form, Formik, useFormikContext } from "formik";
import * as Yup from "yup";
import { SvgKey } from "@opal/icons";
// Helper component to keep the effect logic clean:
function UpdateCloudURLOnCloudIdChange({
  accessibleResources,
}: {
  accessibleResources: ConfluenceAccessibleResource[];
}) {
  const { values, setValues, setFieldValue } = useFormikContext<{
    cloud_id: string;
    cloud_name: string;
    cloud_url: string;
  }>();

  useEffect(() => {
    // Whenever cloud_id changes, find the matching resource and update cloud_url
    if (values.cloud_id) {
      const selectedResource = accessibleResources.find(
        (resource) => resource.id === values.cloud_id
      );
      if (selectedResource) {
        // Update multiple fields together ... somehow setting them in sequence
        // doesn't work with the validator
        // it may also be possible to await each setFieldValue call.
        // https://github.com/jaredpalmer/formik/issues/2266
        setValues((prevValues) => ({
          ...prevValues,
          cloud_name: selectedResource.name,
          cloud_url: selectedResource.url,
        }));
      }
    }
  }, [values.cloud_id, accessibleResources, setFieldValue]);

  // This component doesn't render anything visible:
  return null;
}

export default function OAuthFinalizePage() {
  const router = useRouter();
  const searchParams = useSearchParams();

  const [statusMessage, setStatusMessage] = useState("Processing...");
  const [statusDetails, setStatusDetails] = useState(
    "Please wait while we complete the setup."
  );
  const [redirectUrl, setRedirectUrl] = useState<string | null>(null);
  const [isError, setIsError] = useState(false);
  const [isSubmitted, setIsSubmitted] = useState(false); // New state
  const [pageTitle, setPageTitle] = useState(
    "Finalize Authorization with Third-Party service"
  );

  const [accessibleResources, setAccessibleResources] = useState<
    ConfluenceAccessibleResource[]
  >([]);

  // Extract query parameters
  const credentialParam = searchParams?.get("credential");
  const credential = credentialParam ? parseInt(credentialParam, 10) : NaN;
  const pathname = usePathname();
  const connector = pathname?.split("/")[3];

  useEffect(() => {
    const onFirstLoad = async () => {
      // Examples
      // connector (url segment)= "google-drive"
      // sourceType (for looking up metadata) = "google_drive"

      if (isNaN(credential) || !connector) {
        setStatusMessage("Improperly formed OAuth finalization request.");
        setStatusDetails("Invalid or missing credential id.");
        setIsError(true);
        return;
      }

      const sourceType = connector.replaceAll("-", "_");
      if (!isValidSource(sourceType)) {
        setStatusMessage(
          `The specified connector source type ${sourceType} does not exist.`
        );
        setStatusDetails(`${sourceType} is not a valid source type.`);
        setIsError(true);
        return;
      }

      const sourceMetadata = getSourceMetadata(sourceType as ValidSources);
      setPageTitle(`Finalize Authorization with ${sourceMetadata.displayName}`);

      setStatusMessage("Processing...");
      setStatusDetails(
        "Please wait while we retrieve a list of your accessible sites."
      );
      setIsError(false); // Ensure no error state during loading

      try {
        const response = await handleOAuthPrepareFinalization(
          connector,
          credential
        );

        if (!response) {
          throw new Error("Empty response from OAuth server.");
        }

        setAccessibleResources(response.accessible_resources);

        setStatusMessage("Select a Confluence site");
        setStatusDetails("");

        setIsError(false);
      } catch (error) {
        console.error("OAuth finalization error:", error);
        setStatusMessage("Oops, something went wrong!");
        setStatusDetails(
          "An error occurred during the OAuth finalization process. Please try again."
        );
        setIsError(true);
      }
    };

    onFirstLoad();
  }, [credential, connector]);

  useEffect(() => {}, [redirectUrl]);

  return (
    <div className="mx-auto h-screen flex flex-col">
      <AdminPageTitle title={pageTitle} icon={SvgKey} />

      <div className="flex-1 flex flex-col items-center justify-center">
        <CardSection className="max-w-md w-[500px] h-[250px] p-8">
          <h1 className="text-2xl font-bold mb-4">{statusMessage}</h1>
          <p className="text-text-500">{statusDetails}</p>

          <Formik
            initialValues={{
              credential_id: credential,
              cloud_id: "",
              cloud_name: "",
              cloud_url: "",
            }}
            validationSchema={Yup.object().shape({
              credential_id: Yup.number().required(
                "Credential ID is required."
              ),
              cloud_id: Yup.string().required(
                "You must select a Confluence site (id not found)."
              ),
              cloud_name: Yup.string().required(
                "You must select a Confluence site (name not found)."
              ),
              cloud_url: Yup.string().required(
                "You must select a Confluence site (url not found)."
              ),
            })}
            validateOnMount
            onSubmit={async (values, formikHelpers) => {
              formikHelpers.setSubmitting(true);
              try {
                if (!values.cloud_id) {
                  throw new Error("Cloud ID is required.");
                }

                if (!values.cloud_name) {
                  throw new Error("Cloud URL is required.");
                }

                if (!values.cloud_url) {
                  throw new Error("Cloud URL is required.");
                }

                const response = await handleOAuthConfluenceFinalize(
                  values.credential_id,
                  values.cloud_id,
                  values.cloud_name,
                  values.cloud_url
                );
                formikHelpers.setSubmitting(false);

                if (response) {
                  setRedirectUrl(response.redirect_url);
                  setStatusMessage("Confluence authorization finalized.");
                }

                setIsSubmitted(true); // Mark as submitted
              } catch (error) {
                console.error(error);
                setStatusMessage("Error during submission.");
                setStatusDetails(
                  "An error occurred during the submission process. Please try again."
                );
                setIsError(true);
                formikHelpers.setSubmitting(false);
              }
            }}
          >
            {({ isSubmitting, isValid, setFieldValue }) => (
              <Form>
                {/* Debug info
                <div className="mb-4 p-2 bg-gray-100 rounded text-xs">
                  <pre>
                    isValid: {String(isValid)}
                    errors: {JSON.stringify(errors, null, 2)}
                    values: {JSON.stringify(values, null, 2)}
                  </pre>
                </div> */}

                {/* Our helper component that reacts to changes in cloud_id */}
                <UpdateCloudURLOnCloudIdChange
                  accessibleResources={accessibleResources}
                />

                <Field type="hidden" name="cloud_name" />
                <ErrorMessage
                  name="cloud_name"
                  component="div"
                  className="error"
                />

                <Field type="hidden" name="cloud_url" />
                <ErrorMessage
                  name="cloud_url"
                  component="div"
                  className="error"
                />

                {!redirectUrl && accessibleResources.length > 0 && (
                  <SelectorFormField
                    name="cloud_id"
                    options={accessibleResources.map((resource) => ({
                      name: `${resource.name} - ${resource.url}`,
                      value: resource.id,
                    }))}
                    onSelect={(selectedValue) => {
                      const selectedResource = accessibleResources.find(
                        (resource) => resource.id === selectedValue
                      );
                      if (selectedResource) {
                        setFieldValue("cloud_id", selectedResource.id);
                      }
                    }}
                  />
                )}
                <br />
                {!redirectUrl && (
                  <Button disabled={!isValid || isSubmitting} type="submit">
                    {isSubmitting ? "Submitting..." : "Submit"}
                  </Button>
                )}
              </Form>
            )}
          </Formik>

          {redirectUrl && !isError && (
            <div className="mt-4">
              <p className="text-sm">
                Authorization finalized. Click{" "}
                <a href={redirectUrl} className="text-blue-500 underline">
                  here
                </a>{" "}
                to continue.
              </p>
            </div>
          )}
        </CardSection>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/page.tsx
================================================
import { ConfigurableSources } from "@/lib/types";
import ConnectorWrapper from "./ConnectorWrapper";

export default async function Page(props: {
  params: Promise<{ connector: string }>;
}) {
  const params = await props.params;
  return (
    <ConnectorWrapper
      connector={params.connector.replace("-", "_") as ConfigurableSources}
    />
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/Advanced.tsx
================================================
import React from "react";
import NumberInput from "./ConnectorInput/NumberInput";
import { TextFormField } from "@/components/Field";
import { Button } from "@opal/components";
import { SvgTrash } from "@opal/icons";
export default function AdvancedFormPage() {
  return (
    <div className="py-4 flex flex-col gap-y-6 rounded-lg max-w-2xl mx-auto">
      <h2 className="text-2xl font-bold mb-4 text-text-800">
        Advanced Configuration
      </h2>

      <NumberInput
        description={`
          Checks all documents against the source to delete those that no longer exist.
          Note: This process checks every document, so be cautious when increasing frequency.
          Default is 720 hours (30 days). Decimal hours are supported (e.g., 0.1 hours = 6 minutes).
          Enter 0 to disable pruning for this connector.
        `}
        label="Prune Frequency (hours)"
        name="pruneFreq"
      />

      <NumberInput
        description="This is how frequently we pull new documents from the source (in minutes). If you input 0, we will never pull new documents for this connector."
        label="Refresh Frequency (minutes)"
        name="refreshFreq"
      />

      <TextFormField
        type="date"
        subtext="Documents prior to this date will not be pulled in"
        optional
        label="Indexing Start Date"
        name="indexingStart"
      />
      <div className="mt-4 flex w-full mx-auto max-w-2xl justify-start">
        <Button variant="danger" icon={SvgTrash} type="submit">
          Reset
        </Button>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/ConnectorInput/FileInput.tsx
================================================
import { useField } from "formik";
import { FileUpload } from "@/components/admin/connectors/FileUpload";
import CredentialSubText from "@/components/credentials/CredentialFields";

interface FileInputProps {
  name: string;
  label?: string;
  optional?: boolean;
  description?: string;
  multiple?: boolean;
  isZip?: boolean;
  hideError?: boolean;
}

export default function FileInput({
  name,
  label,
  optional = false,
  description,
  multiple = true,
  isZip = false, // Default to false for multiple file uploads
  hideError = false,
}: FileInputProps) {
  const [field, meta, helpers] = useField(name);

  return (
    <>
      {label && (
        <label
          htmlFor={name}
          className="block text-sm font-medium text-text-700 mb-1"
        >
          {label}
          {optional && <span className="text-text-500 ml-1">(optional)</span>}
        </label>
      )}
      {description && <CredentialSubText>{description}</CredentialSubText>}
      <FileUpload
        selectedFiles={
          Array.isArray(field.value)
            ? field.value
            : field.value
              ? [field.value]
              : []
        }
        setSelectedFiles={(files: File[]) => {
          if (isZip || !multiple) {
            helpers.setValue(files[0] || null);
          } else {
            helpers.setValue(files);
          }
        }}
        multiple={!isZip && multiple} // Allow multiple files if not a zip
        accept={isZip ? ".zip" : undefined} // Only accept zip files if isZip is true
      />
      {!hideError && meta.touched && meta.error && (
        <div className="text-red-500 text-sm mt-1">{meta.error}</div>
      )}
    </>
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/ConnectorInput/ListInput.tsx
================================================
import React from "react";
import { TextArrayField } from "@/components/Field";
import { useFormikContext } from "formik";

interface ListInputProps {
  name: string;
  label: string | ((credential: any) => string);
  description: string | ((credential: any) => string);
}

const ListInput: React.FC<ListInputProps> = ({ name, label, description }) => {
  const { values } = useFormikContext<any>();
  return (
    <TextArrayField
      name={name}
      label={typeof label === "function" ? label(null) : label}
      values={values}
      subtext={
        typeof description === "function" ? description(null) : description
      }
      placeholder={`Enter ${
        typeof label === "function" ? label(null) : label.toLowerCase()
      }`}
    />
  );
};

export default ListInput;


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/ConnectorInput/NumberInput.tsx
================================================
import { Label, SubLabel } from "@/components/Field";
import { ErrorMessage, useField } from "formik";

export default function NumberInput({
  label,
  optional,
  description,
  name,
  showNeverIfZero,
}: {
  label: string;
  name: string;
  optional?: boolean;
  description?: string;
  showNeverIfZero?: boolean;
}) {
  const [field, meta, helpers] = useField(name);

  const handleChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    // If the input is empty, set the value to undefined or null
    // This prevents the "NaN from empty string" error
    if (e.target.value === "") {
      helpers.setValue(undefined);
    } else {
      helpers.setValue(Number(e.target.value));
    }
  };

  return (
    <div className="w-full flex flex-col">
      <Label>
        <>
          {label}
          {optional && <span className="text-text-500 ml-1">(optional)</span>}
        </>
      </Label>
      {description && <SubLabel>{description}</SubLabel>}

      <input
        {...field}
        type="number"
        min="-1"
        onChange={handleChange}
        value={
          field.value === undefined || field.value === null ? "" : field.value
        }
        className={`mt-2 block w-full px-3 py-2 
                bg-[#fff] dark:bg-transparent border border-background-300 rounded-md 
                text-sm shadow-sm placeholder-text-400
                focus:outline-none focus:border-sky-500 focus:ring-1 focus:ring-sky-500
                disabled:bg-background-50 disabled:text-text-500 disabled:border-background-200 disabled:shadow-none
                invalid:border-pink-500 invalid:text-pink-600
                focus:invalid:border-pink-500 focus:invalid:ring-pink-500`}
      />
      <ErrorMessage
        name={name}
        component="div"
        className="text-error text-sm mt-1"
      />
    </div>
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/ConnectorInput/SelectInput.tsx
================================================
import CredentialSubText from "@/components/credentials/CredentialFields";
import { StringWithDescription } from "@/lib/connectors/connectors";
import { Field } from "formik";

export default function SelectInput({
  name,
  optional,
  description,
  options,
  label,
}: {
  name: string;
  optional?: boolean;
  description?: string;
  options: StringWithDescription[];
  label?: string;
}) {
  return (
    <>
      <label
        htmlFor={name}
        className="block text-sm font-medium text-text-700 mb-1"
      >
        {label}
        {optional && <span className="text-text-500 ml-1">(optional)</span>}
      </label>
      {description && <CredentialSubText>{description}</CredentialSubText>}

      <Field
        as="select"
        name={name}
        className="w-full p-2 border border-border-03 rounded-08 bg-transparent text-text-04 focus:ring-2 focus:ring-lighter-agent focus:border-lighter-agent focus:outline-none"
      >
        <option value="">Select an option</option>
        {options?.map((option: any) => (
          <option key={option.name} value={option.name}>
            {option.name}
          </option>
        ))}
      </Field>
    </>
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/DynamicConnectorCreationForm.tsx
================================================
import React, { useEffect, useState } from "react";
import CredentialSubText from "@/components/credentials/CredentialFields";
import { ConnectionConfiguration } from "@/lib/connectors/connectors";
import { TextFormField } from "@/components/Field";
import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle";
import { AccessTypeForm } from "@/components/admin/connectors/AccessTypeForm";
import { AccessTypeGroupSelector } from "@/components/admin/connectors/AccessTypeGroupSelector";
import { ConfigurableSources } from "@/lib/types";
import { Credential } from "@/lib/connectors/credentials";
import { RenderField } from "./FieldRendering";
import { useFormikContext } from "formik";

export interface DynamicConnectionFormProps {
  config: ConnectionConfiguration;
  values: any;
  connector: ConfigurableSources;
  currentCredential: Credential<any> | null;
}

export default function DynamicConnectionForm({
  config,
  values,
  connector,
  currentCredential,
}: DynamicConnectionFormProps) {
  const { setFieldValue } = useFormikContext<any>(); // Get Formik's context functions

  const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
  const [connectorNameInitialized, setConnectorNameInitialized] =
    useState(false);

  let initialConnectorName = "";
  if (config.initialConnectorName) {
    initialConnectorName =
      currentCredential?.credential_json?.[config.initialConnectorName] ?? "";
  }

  useEffect(() => {
    const field_value = values["name"];
    if (initialConnectorName && !connectorNameInitialized && !field_value) {
      setFieldValue("name", initialConnectorName);
      setConnectorNameInitialized(true);
    }
  }, [initialConnectorName, setFieldValue, values]);

  return (
    <>
      {config.subtext && (
        <CredentialSubText>{config.subtext}</CredentialSubText>
      )}

      <TextFormField
        subtext="A descriptive name for the connector."
        type={"text"}
        label={"Connector Name"}
        name={"name"}
      />

      {config.values.map(
        (field) =>
          !field.hidden && (
            <RenderField
              key={field.name}
              field={field}
              values={values}
              connector={connector}
              currentCredential={currentCredential}
            />
          )
      )}

      <AccessTypeForm
        connector={connector}
        currentCredential={currentCredential}
      />
      <AccessTypeGroupSelector connector={connector} />

      {config.advanced_values.length > 0 &&
        (!config.advancedValuesVisibleCondition ||
          config.advancedValuesVisibleCondition(values, currentCredential)) && (
          <>
            <AdvancedOptionsToggle
              showAdvancedOptions={showAdvancedOptions}
              setShowAdvancedOptions={setShowAdvancedOptions}
            />
            {showAdvancedOptions &&
              config.advanced_values.map(
                (field) =>
                  !field.hidden && (
                    <RenderField
                      key={field.name}
                      field={field}
                      values={values}
                      connector={connector}
                      currentCredential={currentCredential}
                    />
                  )
              )}
          </>
        )}
    </>
  );
}


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/FieldRendering.tsx
================================================
import React, { FC, useEffect } from "react";
import { TabOption } from "@/lib/connectors/connectors";
import SelectInput from "./ConnectorInput/SelectInput";
import NumberInput from "./ConnectorInput/NumberInput";
import { TextFormField, MultiSelectField } from "@/components/Field";
import ListInput from "./ConnectorInput/ListInput";
import FileInput from "./ConnectorInput/FileInput";
import { ConfigurableSources } from "@/lib/types";
import { Credential } from "@/lib/connectors/credentials";
import CollapsibleSection from "@/app/admin/agents/CollapsibleSection";
import Tabs from "@/refresh-components/Tabs";
import { useFormikContext } from "formik";
import * as GeneralLayouts from "@/layouts/general-layouts";
import * as InputLayouts from "@/layouts/input-layouts";
import { Content } from "@opal/layouts";
import CheckboxField from "@/refresh-components/form/LabeledCheckboxField";
import InputTextAreaField from "@/refresh-components/form/InputTextAreaField";
import Text from "@/refresh-components/texts/Text";

// Define a general type for form values
type FormValues = Record<string, any>;

interface TabsFieldProps {
  tabField: TabOption;
  values: any;
  connector: ConfigurableSources;
  currentCredential: Credential<any> | null;
}

const TabsField: FC<TabsFieldProps> = ({
  tabField,
  values,
  connector,
  currentCredential,
}) => {
  const { setFieldValue } = useFormikContext<FormValues>();

  const resolvedLabel =
    typeof tabField.label === "function"
      ? tabField.label(currentCredential)
      : tabField.label;
  const resolvedDescription =
    typeof tabField.description === "function"
      ? tabField.description(currentCredential)
      : tabField.description;

  return (
    <GeneralLayouts.Section gap={0.5} alignItems="start">
      {tabField.label && (
        <Content
          title={resolvedLabel ?? ""}
          description={resolvedDescription}
          sizePreset="main-content"
          variant="section"
        />
      )}

      {/* Ensure there's at least one tab before rendering */}
      {tabField.tabs.length === 0 ? (
        <Text text03 secondaryBody>
          No tabs to display.
        </Text>
      ) : (
        <Tabs
          defaultValue={tabField.defaultTab || tabField.tabs[0]?.value}
          onValueChange={(newTab) => {
            // Clear values from other tabs but preserve defaults
            tabField.tabs.forEach((tab) => {
              if (tab.value !== newTab) {
                tab.fields.forEach((field) => {
                  // Only clear if not default value
                  if (values[field.name] !== field.default) {
                    setFieldValue(field.name, field.default);
                  }
                });
              }
            });
          }}
        >
          <Tabs.List>
            {tabField.tabs.map((tab) => (
              <Tabs.Trigger key={tab.value} value={tab.value}>
                {tab.label}
              </Tabs.Trigger>
            ))}
          </Tabs.List>
          {tabField.tabs.map((tab) => (
            <Tabs.Content key={tab.value} value={tab.value}>
              <GeneralLayouts.Section gap={0.75} alignItems="start">
                {tab.fields.map((subField) => {
                  // Check visibility condition first
                  if (
                    subField.visibleCondition &&
                    !subField.visibleCondition(values, currentCredential)
                  ) {
                    return null;
                  }

                  return (
                    <RenderField
                      key={subField.name}
                      field={subField}
                      values={values}
                      connector={connector}
                      currentCredential={currentCredential}
                    />
                  );
                })}
              </GeneralLayouts.Section>
            </Tabs.Content>
          ))}
        </Tabs>
      )}
    </GeneralLayouts.Section>
  );
};

interface RenderFieldProps {
  field: any;
  values: any;
  connector: ConfigurableSources;
  currentCredential: Credential<any> | null;
}

export const RenderField: FC<RenderFieldProps> = ({
  field,
  values,
  connector,
  currentCredential,
}) => {
  const { setFieldValue } = useFormikContext<FormValues>(); // Get Formik's context functions

  const label =
    typeof field.label === "function"
      ? field.label(currentCredential)
      : field.label;
  const description =
    typeof field.description === "function"
      ? field.description(currentCredential)
      : field.description;
  const disabled =
    typeof field.disabled === "function"
      ? field.disabled(currentCredential)
      : field.disabled ?? false;
  const initialValue =
    typeof field.initial === "function"
      ? field.initial(currentCredential)
      : field.initial ?? "";

  // if initialValue exists, prepopulate the field with it
  useEffect(() => {
    const field_value = values[field.name];
    if (initialValue && field_value === undefined) {
      setFieldValue(field.name, initialValue);
    }
  }, [field.name, initialValue, setFieldValue, values]);

  if (field.type === "tab") {
    return (
      <TabsField
        tabField={field}
        values={values}
        connector={connector}
        currentCredential={currentCredential}
      />
    );
  }

  const fieldContent = (
    <>
      {field.type === "zip" || field.type === "file" ? (
        <FileInput
          name={field.name}
          isZip={field.type === "zip"}
          label={label}
          optional={field.optional}
          description={description}
        />
      ) : field.type === "list" ? (
        <ListInput name={field.name} label={label} description={description} />
      ) : field.type === "select" ? (
        <SelectInput
          name={field.name}
          optional={field.optional}
          description={description}
          options={field.options || []}
          label={label}
        />
      ) : field.type === "multiselect" ? (
        <MultiSelectField
          name={field.name}
          label={label}
          subtext={description}
          options={
            field.options?.map((option: { value: string; name: string }) => ({
              value: option.value,
              label: option.name,
            })) || []
          }
          selectedInitially={values[field.name] || field.default || []}
          onChange={(selected) => setFieldValue(field.name, selected)}
        />
      ) : field.type === "number" ? (
        <NumberInput
          label={label}
          optional={field.optional}
          description={description}
          name={field.name}
        />
      ) : field.type === "checkbox" ? (
        <GeneralLayouts.Section
          flexDirection="row"
          justifyContent="start"
          alignItems="start"
          gap={0.5}
        >
          <CheckboxField
            name={field.name}
            label={label}
            sublabel={description}
            disabled={disabled}
            size="lg"
            onChange={(checked) => setFieldValue(field.name, checked)}
          />
        </GeneralLayouts.Section>
      ) : field.type === "text" ? (
        field.isTextArea ? (
          <InputLayouts.Vertical
            name={field.name}
            title={label}
            description={description}
            suffix={field.optional ? "optional" : undefined}
          >
            <InputTextAreaField
              name={field.name}
              placeholder={field.placeholder}
              variant={disabled ? "disabled" : undefined}
              rows={1}
            />
          </InputLayouts.Vertical>
        ) : (
          <TextFormField
            subtext={description}
            optional={field.optional}
            type={field.type}
            label={label}
            name={field.name}
            isTextArea={false}
            defaultHeight={"h-15"}
            disabled={disabled}
            onChange={(e) => setFieldValue(field.name, e.target.value)}
          />
        )
      ) : field.type === "string_tab" ? (
        <GeneralLayouts.Section>
          <Text text03 secondaryBody>
            {description}
          </Text>
        </GeneralLayouts.Section>
      ) : (
        <>INVALID FIELD TYPE</>
      )}
    </>
  );

  if (field.wrapInCollapsible) {
    return (
      <CollapsibleSection prompt={label} key={field.name}>
        {fieldContent}
      </CollapsibleSection>
    );
  }

  return (
    <GeneralLayouts.Section alignItems="start">
      {fieldContent}
    </GeneralLayouts.Section>
  );
};


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/gdrive/Credential.tsx
================================================
import { toast } from "@/hooks/useToast";
import React, { useState, useEffect } from "react";
import { useSWRConfig } from "swr";
import * as Yup from "yup";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import { adminDeleteCredential } from "@/lib/credential";
import { setupGoogleDriveOAuth } from "@/lib/googleDrive";
import { DOCS_ADMINS_PATH } from "@/lib/constants";
import { TextFormField, SectionHeader } from "@/components/Field";
import { Form, Formik } from "formik";
import { User } from "@/lib/types";
import { Button } from "@opal/components";
import {
  Credential,
  GoogleDriveCredentialJson,
  GoogleDriveServiceAccountCredentialJson,
} from "@/lib/connectors/credentials";
import { refreshAllGoogleData } from "@/lib/googleConnector";
import { ValidSources } from "@/lib/types";
import { SWR_KEYS } from "@/lib/swr-keys";
import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib";
import { FiFile, FiCheck, FiLink, FiAlertTriangle } from "react-icons/fi";
import { cn, truncateString } from "@/lib/utils";

type GoogleDriveCredentialJsonTypes = "authorized_user" | "service_account";

export const DriveJsonUpload = ({ onSuccess }: { onSuccess?: () => void }) => {
  const { mutate } = useSWRConfig();
  const [isUploading, setIsUploading] = useState(false);
  const [fileName, setFileName] = useState<string | undefined>();
  const [isDragging, setIsDragging] = useState(false);

  const handleFileUpload = async (file: File) => {
    setIsUploading(true);
    setFileName(file.name);

    const reader = new FileReader();
    reader.onload = async (loadEvent) => {
      if (!loadEvent?.target?.result) {
        setIsUploading(false);
        return;
      }

      const credentialJsonStr = loadEvent.target.result as string;

      // Check credential type
      let credentialFileType: GoogleDriveCredentialJsonTypes;
      try {
        const appCredentialJson = JSON.parse(credentialJsonStr);
        if (appCredentialJson.web) {
          credentialFileType = "authorized_user";
        } else if (appCredentialJson.type === "service_account") {
          credentialFileType = "service_account";
        } else {
          throw new Error(
            "Unknown credential type, expected one of 'OAuth Web application' or 'Service Account'"
          );
        }
      } catch (e) {
        toast.error(`Invalid file provided - ${e}`);
        setIsUploading(false);
        return;
      }

      if (credentialFileType === "authorized_user") {
        const response = await fetch(
          "/api/manage/admin/connector/google-drive/app-credential",
          {
            method: "PUT",
            headers: {
              "Content-Type": "application/json",
            },
            body: credentialJsonStr,
          }
        );
        if (response.ok) {
          toast.success("Successfully uploaded app credentials");
          mutate(SWR_KEYS.googleConnectorAppCredential("google-drive"));
          if (onSuccess) {
            onSuccess();
          }
        } else {
          const errorMsg = await response.text();
          toast.error(`Failed to upload app credentials - ${errorMsg}`);
        }
      }

      if (credentialFileType === "service_account") {
        const response = await fetch(
          "/api/manage/admin/connector/google-drive/service-account-key",
          {
            method: "PUT",
            headers: {
              "Content-Type": "application/json",
            },
            body: credentialJsonStr,
          }
        );
        if (response.ok) {
          toast.success("Successfully uploaded service account key");
          mutate(SWR_KEYS.googleConnectorServiceAccountKey("google-drive"));
          if (onSuccess) {
            onSuccess();
          }
        } else {
          const errorMsg = await response.text();
          toast.error(`Failed to upload service account key - ${errorMsg}`);
        }
      }
      setIsUploading(false);
    };

    reader.readAsText(file);
  };

  const handleDragEnter = (e: React.DragEvent<HTMLLabelElement>) => {
    e.preventDefault();
    e.stopPropagation();
    if (!isUploading) {
      setIsDragging(true);
    }
  };

  const handleDragLeave = (e: React.DragEvent<HTMLLabelElement>) => {
    e.preventDefault();
    e.stopPropagation();
    setIsDragging(false);
  };

  const handleDragOver = (e: React.DragEvent<HTMLLabelElement>) => {
    e.preventDefault();
    e.stopPropagation();
  };

  const handleDrop = (e: React.DragEvent<HTMLLabelElement>) => {
    e.preventDefault();
    e.stopPropagation();
    setIsDragging(false);

    if (isUploading) return;

    const files = e.dataTransfer.files;
    if (files.length > 0) {
      const file = files[0];
      if (
        file !== undefined &&
        (file.type === "application/json" || file.name.endsWith(".json"))
      ) {
        handleFileUpload(file);
      } else {
        toast.error("Please upload a JSON file");
      }
    }
  };

  return (
    <div className="flex flex-col mt-4">
      <div className="flex items-center">
        <div className="relative flex flex-1 items-center">
          <label
            className={cn(
              "flex h-10 items-center justify-center w-full px-4 py-2 border border-dashed rounded-md transition-colors",
              isUploading
                ? "opacity-70 cursor-not-allowed border-background-400 bg-background-50/30"
                : isDragging
                  ? "bg-background-50/50 border-primary dark:border-primary"
                  : "cursor-pointer hover:bg-background-50/30 hover:border-primary dark:hover:border-primary border-background-300 dark:border-background-600"
            )}
            onDragEnter={handleDragEnter}
            onDragLeave={handleDragLeave}
            onDragOver={handleDragOver}
            onDrop={handleDrop}
          >
            <div className="flex items-center space-x-2">
              {isUploading ? (
                <div className="h-4 w-4 border-t-2 border-b-2 border-primary rounded-full animate-spin"></div>
              ) : (
                <FiFile className="h-4 w-4 text-text-500" />
              )}
              <span className="text-sm text-text-500">
                {isUploading
                  ? `Uploading ${truncateString(fileName || "file", 50)}...`
                  : isDragging
                    ? "Drop JSON file here"
                    : truncateString(
                        fileName || "Select or drag JSON credentials file...",
                        50
                      )}
              </span>
            </div>
            <input
              className="sr-only"
              type="file"
              accept=".json"
              disabled={isUploading}
              onChange={(event) => {
                if (!event.target.files?.length) {
                  return;
                }
                const file = event.target.files[0];
                if (file === undefined) {
                  return;
                }
                handleFileUpload(file);
              }}
            />
          </label>
        </div>
      </div>
    </div>
  );
};

interface DriveJsonUploadSectionProps {
  appCredentialData?: { client_id: string };
  serviceAccountCredentialData?: { service_account_email: string };
  isAdmin: boolean;
  onSuccess?: () => void;
  existingAuthCredential?: boolean;
}

export const DriveJsonUploadSection = ({
  appCredentialData,
  serviceAccountCredentialData,
  isAdmin,
  onSuccess,
  existingAuthCredential,
}: DriveJsonUploadSectionProps) => {
  const { mutate } = useSWRConfig();
  const router = useRouter();
  const [localServiceAccountData, setLocalServiceAccountData] = useState(
    serviceAccountCredentialData
  );
  const [localAppCredentialData, setLocalAppCredentialData] =
    useState(appCredentialData);

  // Update local state when props change
  useEffect(() => {
    setLocalServiceAccountData(serviceAccountCredentialData);
    setLocalAppCredentialData(appCredentialData);
  }, [serviceAccountCredentialData, appCredentialData]);

  const handleSuccess = () => {
    if (onSuccess) {
      onSuccess();
    } else {
      refreshAllGoogleData(ValidSources.GoogleDrive);
    }
  };

  if (!isAdmin) {
    return (
      <div>
        <div className="flex items-start py-3 px-4 bg-yellow-50/30 dark:bg-yellow-900/5 rounded">
          <FiAlertTriangle className="text-yellow-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0" />
          <p className="text-sm">
            Curators are unable to set up the Google Drive credentials. To add a
            Google Drive connector, please contact an administrator.
          </p>
        </div>
      </div>
    );
  }

  return (
    <div>
      <p className="text-sm mb-3">
        To connect your Google Drive, create credentials (either OAuth App or
        Service Account), download the JSON file, and upload it below.
      </p>
      <div className="mb-4">
        <a
          className="text-primary hover:text-primary/80 flex items-center gap-1 text-sm"
          target="_blank"
          href={`${DOCS_ADMINS_PATH}/connectors/official/google_drive/overview`}
          rel="noreferrer"
        >
          <FiLink className="h-3 w-3" />
          View detailed setup instructions
        </a>
      </div>

      {(localServiceAccountData?.service_account_email ||
        localAppCredentialData?.client_id) && (
        <div className="mb-4">
          <div className="relative flex flex-1 items-center">
            <label
              className={cn(
                "flex h-10 items-center justify-center w-full px-4 py-2 border border-dashed rounded-md transition-colors",
                false
                  ? "opacity-70 cursor-not-allowed border-background-400 bg-background-50/30"
                  : "cursor-pointer hover:bg-background-50/30 hover:border-primary dark:hover:border-primary border-background-300 dark:border-background-600"
              )}
            >
              <div className="flex items-center space-x-2">
                {false ? (
                  <div className="h-4 w-4 border-t-2 border-b-2 border-primary rounded-full animate-spin"></div>
                ) : (
                  <FiFile className="h-4 w-4 text-text-500" />
                )}
                <span className="text-sm text-text-500">
                  {truncateString(
                    localServiceAccountData?.service_account_email ||
                      localAppCredentialData?.client_id ||
                      "",
                    50
                  )}
                </span>
              </div>
            </label>
          </div>
          {isAdmin && !existingAuthCredential && (
            <div className="mt-2">
              <Button
                variant="danger"
                onClick={async () => {
                  const endpoint =
                    localServiceAccountData?.service_account_email
                      ? SWR_KEYS.googleConnectorServiceAccountKey(
                          "google-drive"
                        )
                      : SWR_KEYS.googleConnectorAppCredential("google-drive");

                  const response = await fetch(endpoint, {
                    method: "DELETE",
                  });

                  if (response.ok) {
                    mutate(endpoint);
                    // Also mutate the credential endpoints to ensure Step 2 is reset
                    mutate(
                      buildSimilarCredentialInfoURL(ValidSources.GoogleDrive)
                    );

                    // Add additional mutations to refresh all credential-related endpoints
                    mutate(SWR_KEYS.googleConnectorCredentials("google-drive"));
                    mutate(
                      SWR_KEYS.googleConnectorPublicCredential("google-drive")
                    );
                    mutate(
                      SWR_KEYS.googleConnectorServiceAccountCredential(
                        "google-drive"
                      )
                    );

                    toast.success(
                      `Successfully deleted ${
                        localServiceAccountData
                          ? "service account key"
                          : "app credentials"
                      }`
                    );
                    // Immediately update local state
                    if (localServiceAccountData) {
                      setLocalServiceAccountData(undefined);
                    } else {
                      setLocalAppCredentialData(undefined);
                    }
                    handleSuccess();
                  } else {
                    const errorMsg = await response.text();
                    toast.error(`Failed to delete credentials - ${errorMsg}`);
                  }
                }}
              >
                Delete Credentials
              </Button>
            </div>
          )}
        </div>
      )}

      {!(
        localServiceAccountData?.service_account_email ||
        localAppCredentialData?.client_id
      ) && <DriveJsonUpload onSuccess={handleSuccess} />}
    </div>
  );
};

interface DriveCredentialSectionProps {
  googleDrivePublicUploadedCredential?: Credential<GoogleDriveCredentialJson>;
  googleDriveServiceAccountCredential?: Credential<GoogleDriveServiceAccountCredentialJson>;
  serviceAccountKeyData?: { service_account_email: string };
  appCredentialData?: { client_id: string };
  refreshCredentials: () => void;
  connectorAssociated: boolean;
  user: User | null;
}

async function handleRevokeAccess(
  connectorAssociated: boolean,
  existingCredential:
    | Credential<GoogleDriveCredentialJson>
    | Credential<GoogleDriveServiceAccountCredentialJson>,
  refreshCredentials: () => void
) {
  if (connectorAssociated) {
    const message =
      "Cannot revoke the Google Drive credential while any connector is still associated with the credential. " +
      "Please delete all associated connectors, then try again.";
    toast.error(message);
    return;
  }

  await adminDeleteCredential(existingCredential.id);
  toast.success("Successfully revoked the Google Drive credential!");

  refreshCredentials();
}

export const DriveAuthSection = ({
  googleDrivePublicUploadedCredential,
  googleDriveServiceAccountCredential,
  serviceAccountKeyData,
  appCredentialData,
  refreshCredentials,
  connectorAssociated,
  user,
}: DriveCredentialSectionProps) => {
  const router = useRouter();
  const [isAuthenticating, setIsAuthenticating] = useState(false);
  const [localServiceAccountData, setLocalServiceAccountData] = useState(
    serviceAccountKeyData
  );
  const [localAppCredentialData, setLocalAppCredentialData] =
    useState(appCredentialData);
  const [
    localGoogleDrivePublicCredential,
    setLocalGoogleDrivePublicCredential,
  ] = useState(googleDrivePublicUploadedCredential);
  const [
    localGoogleDriveServiceAccountCredential,
    setLocalGoogleDriveServiceAccountCredential,
  ] = useState(googleDriveServiceAccountCredential);

  // Update local state when props change
  useEffect(() => {
    setLocalServiceAccountData(serviceAccountKeyData);
    setLocalAppCredentialData(appCredentialData);
    setLocalGoogleDrivePublicCredential(googleDrivePublicUploadedCredential);
    setLocalGoogleDriveServiceAccountCredential(
      googleDriveServiceAccountCredential
    );
  }, [
    serviceAccountKeyData,
    appCredentialData,
    googleDrivePublicUploadedCredential,
    googleDriveServiceAccountCredential,
  ]);

  const existingCredential =
    localGoogleDrivePublicCredential ||
    localGoogleDriveServiceAccountCredential;
  if (existingCredential) {
    return (
      <div>
        <div className="mt-4">
          <div className="py-3 px-4 bg-blue-50/30 dark:bg-blue-900/5 rounded mb-4 flex items-start">
            <FiCheck className="text-blue-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0" />
            <div className="flex-1">
              <span className="font-medium block">Authentication Complete</span>
              <p className="text-sm mt-1 text-text-500 dark:text-text-400 break-words">
                Your Google Drive credentials have been successfully uploaded
                and authenticated.
              </p>
            </div>
          </div>
          <Button
            variant="danger"
            onClick={async () => {
              handleRevokeAccess(
                connectorAssociated,
                existingCredential,
                refreshCredentials
              );
            }}
          >
            Revoke Access
          </Button>
        </div>
      </div>
    );
  }

  // If no credentials are uploaded, show message to complete step 1 first
  if (
    !localServiceAccountData?.service_account_email &&
    !localAppCredentialData?.client_id
  ) {
    return (
      <div>
        <SectionHeader>Google Drive Authentication</SectionHeader>
        <div className="mt-4">
          <div className="flex items-start py-3 px-4 bg-yellow-50/30 dark:bg-yellow-900/5 rounded">
            <FiAlertTriangle className="text-yellow-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0" />
            <p className="text-sm">
              Please complete Step 1 by uploading either OAuth credentials or a
              Service Account key before proceeding with authentication.
            </p>
          </div>
        </div>
      </div>
    );
  }

  if (localServiceAccountData?.service_account_email) {
    return (
      <div>
        <div className="mt-4">
          <Formik
            initialValues={{
              google_primary_admin: user?.email || "",
            }}
            validationSchema={Yup.object().shape({
              google_primary_admin: Yup.string()
                .email("Must be a valid email")
                .required("Required"),
            })}
            onSubmit={async (values, formikHelpers) => {
              formikHelpers.setSubmitting(true);
              try {
                const response = await fetch(
                  "/api/manage/admin/connector/google-drive/service-account-credential",
                  {
                    method: "PUT",
                    headers: {
                      "Content-Type": "application/json",
                    },
                    body: JSON.stringify({
                      google_primary_admin: values.google_primary_admin,
                    }),
                  }
                );

                if (response.ok) {
                  toast.success(
                    "Successfully created service account credential"
                  );
                  refreshCredentials();
                } else {
                  const errorMsg = await response.text();
                  toast.error(
                    `Failed to create service account credential - ${errorMsg}`
                  );
                }
              } catch (error) {
                toast.error(
                  `Failed to create service account credential - ${error}`
                );
              } finally {
                formikHelpers.setSubmitting(false);
              }
            }}
          >
            {({ isSubmitting }) => (
              <Form>
                <TextFormField
                  name="google_primary_admin"
                  label="Primary Admin Email:"
                  subtext="Enter the email of an admin/owner of the Google Organization that owns the Google Drive(s) you want to index."
                />
                <div className="flex">
                  <Button disabled={isSubmitting} type="submit">
                    {isSubmitting ? "Creating..." : "Create Credential"}
                  </Button>
                </div>
              </Form>
            )}
          </Formik>
        </div>
      </div>
    );
  }

  if (localAppCredentialData?.client_id) {
    return (
      <div>
        <div className="bg-background-50/30 dark:bg-background-900/20 rounded mb-4">
          <p className="text-sm">
            Next, you need to authenticate with Google Drive via OAuth. This
            gives us read access to the documents you have access to in your
            Google Drive account.
          </p>
        </div>
        <Button
          disabled={isAuthenticating}
          onClick={async () => {
            setIsAuthenticating(true);
            try {
              const [authUrl, errorMsg] = await setupGoogleDriveOAuth({
                isAdmin: true,
                name: "OAuth (uploaded)",
              });

              if (authUrl) {
                router.push(authUrl as Route);
              } else {
                toast.error(errorMsg);
                setIsAuthenticating(false);
              }
            } catch (error) {
              toast.error(
                `Failed to authenticate with Google Drive - ${error}`
              );
              setIsAuthenticating(false);
            }
          }}
        >
          {isAuthenticating
            ? "Authenticating..."
            : "Authenticate with Google Drive"}
        </Button>
      </div>
    );
  }

  // This code path should not be reached with the new conditions above
  return null;
};


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/gdrive/GoogleDrivePage.tsx
================================================
"use client";

import React from "react";
import { ErrorCallout } from "@/components/ErrorCallout";
import { LoadingAnimation } from "@/components/Loading";
import { ValidSources } from "@/lib/types";
import { usePublicCredentials } from "@/lib/hooks";
import Title from "@/components/ui/title";
import { DriveJsonUploadSection, DriveAuthSection } from "./Credential";
import {
  Credential,
  GoogleDriveCredentialJson,
  GoogleDriveServiceAccountCredentialJson,
} from "@/lib/connectors/credentials";
import { useUser } from "@/providers/UserProvider";
import {
  useGoogleAppCredential,
  useGoogleServiceAccountKey,
  useGoogleCredentials,
  useConnectorsByCredentialId,
  checkCredentialsFetched,
  filterUploadedCredentials,
  checkConnectorsExist,
  refreshAllGoogleData,
} from "@/lib/googleConnector";

const GDriveMain = () => {
  const { isAdmin, user } = useUser();

  // Get app credential and service account key
  const {
    data: appCredentialData,
    isLoading: isAppCredentialLoading,
    error: isAppCredentialError,
  } = useGoogleAppCredential("google_drive");

  const {
    data: serviceAccountKeyData,
    isLoading: isServiceAccountKeyLoading,
    error: isServiceAccountKeyError,
  } = useGoogleServiceAccountKey("google_drive");

  // Get all public credentials
  const {
    data: credentialsData,
    isLoading: isCredentialsLoading,
    error: credentialsError,
    refreshCredentials,
  } = usePublicCredentials();

  // Get Google Drive-specific credentials
  const {
    data: googleDriveCredentials,
    isLoading: isGoogleDriveCredentialsLoading,
    error: googleDriveCredentialsError,
  } = useGoogleCredentials(ValidSources.GoogleDrive);

  // Filter uploaded credentials and get credential ID
  const { credential_id, uploadedCredentials } = filterUploadedCredentials(
    googleDriveCredentials
  );

  // Get connectors for the credential ID
  const {
    data: googleDriveConnectors,
    isLoading: isGoogleDriveConnectorsLoading,
    error: googleDriveConnectorsError,
    refreshConnectorsByCredentialId,
  } = useConnectorsByCredentialId(credential_id);

  // Check if credentials were successfully fetched
  const {
    appCredentialSuccessfullyFetched,
    serviceAccountKeySuccessfullyFetched,
  } = checkCredentialsFetched(
    appCredentialData,
    isAppCredentialError,
    serviceAccountKeyData,
    isServiceAccountKeyError
  );

  // Handle refresh of all data
  const handleRefresh = () => {
    refreshCredentials();
    refreshConnectorsByCredentialId();
    refreshAllGoogleData(ValidSources.GoogleDrive);
  };

  // Loading state
  if (
    (!appCredentialSuccessfullyFetched && isAppCredentialLoading) ||
    (!serviceAccountKeySuccessfullyFetched && isServiceAccountKeyLoading) ||
    (!credentialsData && isCredentialsLoading) ||
    (!googleDriveCredentials && isGoogleDriveCredentialsLoading) ||
    (!googleDriveConnectors && isGoogleDriveConnectorsLoading)
  ) {
    return (
      <div className="mx-auto">
        <LoadingAnimation text="" />
      </div>
    );
  }

  // Error states
  if (credentialsError || !credentialsData) {
    return <ErrorCallout errorTitle="Failed to load credentials." />;
  }

  if (googleDriveCredentialsError || !googleDriveCredentials) {
    return (
      <ErrorCallout errorTitle="Failed to load Google Drive credentials." />
    );
  }

  if (
    !appCredentialSuccessfullyFetched ||
    !serviceAccountKeySuccessfullyFetched
  ) {
    return (
      <ErrorCallout errorTitle="Error loading Google Drive app credentials. Contact an administrator." />
    );
  }

  if (googleDriveConnectorsError) {
    return (
      <ErrorCallout errorTitle="Failed to load Google Drive associated connectors." />
    );
  }

  // Check if connectors exist
  const connectorAssociated = checkConnectorsExist(googleDriveConnectors);

  // Get the uploaded OAuth credential
  const googleDrivePublicUploadedCredential:
    | Credential<GoogleDriveCredentialJson>
    | undefined = credentialsData.find(
    (credential) =>
      credential.credential_json?.google_tokens &&
      credential.admin_public &&
      credential.source === "google_drive" &&
      credential.credential_json.authentication_method !== "oauth_interactive"
  );

  // Get the service account credential
  const googleDriveServiceAccountCredential:
    | Credential<GoogleDriveServiceAccountCredentialJson>
    | undefined = credentialsData.find(
    (credential) =>
      credential.credential_json?.google_service_account_key &&
      credential.source === "google_drive"
  );

  return (
    <>
      <Title className="mb-2 mt-6">Step 1: Provide your Credentials</Title>
      <DriveJsonUploadSection
        appCredentialData={appCredentialData}
        serviceAccountCredentialData={serviceAccountKeyData}
        isAdmin={isAdmin}
        onSuccess={handleRefresh}
        existingAuthCredential={Boolean(
          googleDrivePublicUploadedCredential ||
            googleDriveServiceAccountCredential
        )}
      />

      {isAdmin &&
        (appCredentialData?.client_id ||
          serviceAccountKeyData?.service_account_email) && (
          <>
            <Title className="mb-2 mt-6">Step 2: Authenticate with Onyx</Title>
            <DriveAuthSection
              refreshCredentials={handleRefresh}
              googleDrivePublicUploadedCredential={
                googleDrivePublicUploadedCredential
              }
              googleDriveServiceAccountCredential={
                googleDriveServiceAccountCredential
              }
              appCredentialData={appCredentialData}
              serviceAccountKeyData={serviceAccountKeyData}
              connectorAssociated={connectorAssociated}
              user={user}
            />
          </>
        )}
    </>
  );
};

export default GDriveMain;


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/gmail/Credential.tsx
================================================
import { Button } from "@opal/components";
import { toast } from "@/hooks/useToast";
import React, { useState, useEffect } from "react";
import { useSWRConfig } from "swr";
import * as Yup from "yup";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import { adminDeleteCredential } from "@/lib/credential";
import { setupGmailOAuth } from "@/lib/gmail";
import { DOCS_ADMINS_PATH } from "@/lib/constants";
import { CRAFT_OAUTH_COOKIE_NAME } from "@/app/craft/v1/constants";
import Cookies from "js-cookie";
import { TextFormField, SectionHeader } from "@/components/Field";
import { Form, Formik } from "formik";
import { User } from "@/lib/types";
import {
  Credential,
  GmailCredentialJson,
  GmailServiceAccountCredentialJson,
} from "@/lib/connectors/credentials";
import { refreshAllGoogleData } from "@/lib/googleConnector";
import { ValidSources } from "@/lib/types";
import { SWR_KEYS } from "@/lib/swr-keys";
import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib";
import { FiFile, FiCheck, FiLink, FiAlertTriangle } from "react-icons/fi";
import { cn, truncateString } from "@/lib/utils";
import { Section } from "@/layouts/general-layouts";

type GmailCredentialJsonTypes = "authorized_user" | "service_account";

const GmailCredentialUpload = ({ onSuccess }: { onSuccess?: () => void }) => {
  const { mutate } = useSWRConfig();
  const [isUploading, setIsUploading] = useState(false);
  const [fileName, setFileName] = useState<string | undefined>();
  const [isDragging, setIsDragging] = useState(false);

  const handleFileUpload = async (file: File) => {
    setIsUploading(true);
    setFileName(file.name);

    const reader = new FileReader();
    reader.onload = async (loadEvent) => {
      if (!loadEvent?.target?.result) {
        setIsUploading(false);
        return;
      }

      const credentialJsonStr = loadEvent.target.result as string;

      // Check credential type
      let credentialFileType: GmailCredentialJsonTypes;
      try {
        const appCredentialJson = JSON.parse(credentialJsonStr);
        if (appCredentialJson.web) {
          credentialFileType = "authorized_user";
        } else if (appCredentialJson.type === "service_account") {
          credentialFileType = "service_account";
        } else {
          throw new Error(
            "Unknown credential type, expected one of 'OAuth Web application' or 'Service Account'"
          );
        }
      } catch (e) {
        toast.error(`Invalid file provided - ${e}`);
        setIsUploading(false);
        return;
      }

      if (credentialFileType === "authorized_user") {
        const response = await fetch(
          "/api/manage/admin/connector/gmail/app-credential",
          {
            method: "PUT",
            headers: {
              "Content-Type": "application/json",
            },
            body: credentialJsonStr,
          }
        );
        if (response.ok) {
          toast.success("Successfully uploaded app credentials");
          mutate(SWR_KEYS.googleConnectorAppCredential("gmail"));
          if (onSuccess) {
            onSuccess();
          }
        } else {
          const errorMsg = await response.text();
          toast.error(`Failed to upload app credentials - ${errorMsg}`);
        }
      }

      if (credentialFileType === "service_account") {
        const response = await fetch(
          "/api/manage/admin/connector/gmail/service-account-key",
          {
            method: "PUT",
            headers: {
              "Content-Type": "application/json",
            },
            body: credentialJsonStr,
          }
        );
        if (response.ok) {
          toast.success("Successfully uploaded service account key");
          mutate(SWR_KEYS.googleConnectorServiceAccountKey("gmail"));
          if (onSuccess) {
            onSuccess();
          }
        } else {
          const errorMsg = await response.text();
          toast.error(`Failed to upload service account key - ${errorMsg}`);
        }
      }
      setIsUploading(false);
    };

    reader.readAsText(file);
  };

  const handleDragEnter = (e: React.DragEvent<HTMLLabelElement>) => {
    e.preventDefault();
    e.stopPropagation();
    if (!isUploading) {
      setIsDragging(true);
    }
  };

  const handleDragLeave = (e: React.DragEvent<HTMLLabelElement>) => {
    e.preventDefault();
    e.stopPropagation();
    setIsDragging(false);
  };

  const handleDragOver = (e: React.DragEvent<HTMLLabelElement>) => {
    e.preventDefault();
    e.stopPropagation();
  };

  const handleDrop = (e: React.DragEvent<HTMLLabelElement>) => {
    e.preventDefault();
    e.stopPropagation();
    setIsDragging(false);

    if (isUploading) return;

    const files = e.dataTransfer.files;
    if (files.length > 0) {
      const file = files[0];
      if (
        file !== undefined &&
        (file.type === "application/json" || file.name.endsWith(".json"))
      ) {
        handleFileUpload(file);
      } else {
        toast.error("Please upload a JSON file");
      }
    }
  };

  return (
    <div className="flex flex-col mt-4">
      <div className="flex items-center">
        <div className="relative flex flex-1 items-center">
          <label
            className={cn(
              "flex h-10 items-center justify-center w-full px-4 py-2 border border-dashed rounded-md transition-colors",
              isUploading
                ? "opacity-70 cursor-not-allowed border-background-400 bg-background-50/30"
                : isDragging
                  ? "bg-background-50/50 border-primary dark:border-primary"
                  : "cursor-pointer hover:bg-background-50/30 hover:border-primary dark:hover:border-primary border-background-300 dark:border-background-600"
            )}
            onDragEnter={handleDragEnter}
            onDragLeave={handleDragLeave}
            onDragOver={handleDragOver}
            onDrop={handleDrop}
          >
            <div className="flex items-center space-x-2">
              {isUploading ? (
                <div className="h-4 w-4 border-t-2 border-b-2 border-primary rounded-full animate-spin"></div>
              ) : (
                <FiFile className="h-4 w-4 text-text-500" />
              )}
              <span className="text-sm text-text-500">
                {isUploading
                  ? `Uploading ${truncateString(fileName || "file", 50)}...`
                  : isDragging
                    ? "Drop JSON file here"
                    : truncateString(
                        fileName || "Select or drag JSON credentials file...",
                        50
                      )}
              </span>
            </div>
            <input
              className="sr-only"
              type="file"
              accept=".json"
              disabled={isUploading}
              onChange={(event) => {
                if (!event.target.files?.length) {
                  return;
                }
                const file = event.target.files[0];
                if (file === undefined) {
                  return;
                }
                handleFileUpload(file);
              }}
            />
          </label>
        </div>
      </div>
    </div>
  );
};

interface GmailJsonUploadSectionProps {
  appCredentialData?: { client_id: string };
  serviceAccountCredentialData?: { service_account_email: string };
  isAdmin: boolean;
  onSuccess?: () => void;
  existingAuthCredential?: boolean;
}

export const GmailJsonUploadSection = ({
  appCredentialData,
  serviceAccountCredentialData,
  isAdmin,
  onSuccess,
  existingAuthCredential,
}: GmailJsonUploadSectionProps) => {
  const { mutate } = useSWRConfig();
  const [localServiceAccountData, setLocalServiceAccountData] = useState(
    serviceAccountCredentialData
  );
  const [localAppCredentialData, setLocalAppCredentialData] =
    useState(appCredentialData);

  // Update local state when props change
  useEffect(() => {
    setLocalServiceAccountData(serviceAccountCredentialData);
    setLocalAppCredentialData(appCredentialData);
  }, [serviceAccountCredentialData, appCredentialData]);

  const handleSuccess = () => {
    if (onSuccess) {
      onSuccess();
    } else {
      refreshAllGoogleData(ValidSources.Gmail);
    }
  };

  if (!isAdmin) {
    return (
      <div>
        <div className="flex items-start py-3 px-4 bg-yellow-50/30 dark:bg-yellow-900/5 rounded">
          <FiAlertTriangle className="text-yellow-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0" />
          <p className="text-sm">
            Curators are unable to set up the Gmail credentials. To add a Gmail
            connector, please contact an administrator.
          </p>
        </div>
      </div>
    );
  }

  return (
    <div>
      <p className="text-sm mb-3">
        To connect your Gmail, create credentials (either OAuth App or Service
        Account), download the JSON file, and upload it below.
      </p>
      <div className="mb-4">
        <a
          className="text-primary hover:text-primary/80 flex items-center gap-1 text-sm"
          target="_blank"
          href={`${DOCS_ADMINS_PATH}/connectors/official/gmail/overview`}
          rel="noreferrer"
        >
          <FiLink className="h-3 w-3" />
          View detailed setup instructions
        </a>
      </div>

      {(localServiceAccountData?.service_account_email ||
        localAppCredentialData?.client_id) && (
        <div className="mb-4">
          <div className="relative flex flex-1 items-center">
            <label
              className={cn(
                "flex h-10 items-center justify-center w-full px-4 py-2 border border-dashed rounded-md transition-colors",
                false
                  ? "opacity-70 cursor-not-allowed border-background-400 bg-background-50/30"
                  : "cursor-pointer hover:bg-background-50/30 hover:border-primary dark:hover:border-primary border-background-300 dark:border-background-600"
              )}
            >
              <div className="flex items-center space-x-2">
                {false ? (
                  <div className="h-4 w-4 border-t-2 border-b-2 border-primary rounded-full animate-spin"></div>
                ) : (
                  <FiFile className="h-4 w-4 text-text-500" />
                )}
                <span className="text-sm text-text-500">
                  {truncateString(
                    localServiceAccountData?.service_account_email ||
                      localAppCredentialData?.client_id ||
                      "",
                    50
                  )}
                </span>
              </div>
            </label>
          </div>
          {isAdmin && !existingAuthCredential && (
            <div className="mt-2">
              <Button
                variant="danger"
                onClick={async () => {
                  const endpoint =
                    localServiceAccountData?.service_account_email
                      ? SWR_KEYS.googleConnectorServiceAccountKey("gmail")
                      : SWR_KEYS.googleConnectorAppCredential("gmail");

                  const response = await fetch(endpoint, {
                    method: "DELETE",
                  });

                  if (response.ok) {
                    mutate(endpoint);
                    // Also mutate the credential endpoints to ensure Step 2 is reset
                    mutate(buildSimilarCredentialInfoURL(ValidSources.Gmail));

                    // Add additional mutations to refresh all credential-related endpoints
                    mutate(SWR_KEYS.googleConnectorCredentials("gmail"));
                    mutate(SWR_KEYS.googleConnectorPublicCredential("gmail"));
                    mutate(
                      SWR_KEYS.googleConnectorServiceAccountCredential("gmail")
                    );

                    toast.success(
                      `Successfully deleted ${
                        localServiceAccountData
                          ? "service account key"
                          : "app credentials"
                      }`
                    );
                    // Immediately update local state
                    if (localServiceAccountData) {
                      setLocalServiceAccountData(undefined);
                    } else {
                      setLocalAppCredentialData(undefined);
                    }
                    handleSuccess();
                  } else {
                    const errorMsg = await response.text();
                    toast.error(`Failed to delete credentials - ${errorMsg}`);
                  }
                }}
              >
                Delete Credentials
              </Button>
            </div>
          )}
        </div>
      )}

      {!(
        localServiceAccountData?.service_account_email ||
        localAppCredentialData?.client_id
      ) && <GmailCredentialUpload onSuccess={handleSuccess} />}
    </div>
  );
};

interface GmailCredentialSectionProps {
  gmailPublicCredential?: Credential<GmailCredentialJson>;
  gmailServiceAccountCredential?: Credential<GmailServiceAccountCredentialJson>;
  serviceAccountKeyData?: { service_account_email: string };
  appCredentialData?: { client_id: string };
  refreshCredentials: () => void;
  connectorExists: boolean;
  user: User | null;
  buildMode?: boolean;
  onOAuthRedirect?: () => void;
  onCredentialCreated?: (
    credential: Credential<
      GmailCredentialJson | GmailServiceAccountCredentialJson
    >
  ) => void;
}

async function handleRevokeAccess(
  connectorExists: boolean,
  existingCredential:
    | Credential<GmailCredentialJson>
    | Credential<GmailServiceAccountCredentialJson>,
  refreshCredentials: () => void
) {
  if (connectorExists) {
    const message =
      "Cannot revoke the Gmail credential while any connector is still associated with the credential. " +
      "Please delete all associated connectors, then try again.";
    toast.error(message);
    return;
  }

  await adminDeleteCredential(existingCredential.id);
  toast.success("Successfully revoked the Gmail credential!");

  refreshCredentials();
}

export const GmailAuthSection = ({
  gmailPublicCredential,
  gmailServiceAccountCredential,
  serviceAccountKeyData,
  appCredentialData,
  refreshCredentials,
  connectorExists,
  user,
  buildMode = false,
  onOAuthRedirect,
  onCredentialCreated,
}: GmailCredentialSectionProps) => {
  const router = useRouter();
  const [isAuthenticating, setIsAuthenticating] = useState(false);
  const [localServiceAccountData, setLocalServiceAccountData] = useState(
    serviceAccountKeyData
  );
  const [localAppCredentialData, setLocalAppCredentialData] =
    useState(appCredentialData);
  const [localGmailPublicCredential, setLocalGmailPublicCredential] = useState(
    gmailPublicCredential
  );
  const [
    localGmailServiceAccountCredential,
    setLocalGmailServiceAccountCredential,
  ] = useState(gmailServiceAccountCredential);

  // Update local state when props change
  useEffect(() => {
    setLocalServiceAccountData(serviceAccountKeyData);
    setLocalAppCredentialData(appCredentialData);
    setLocalGmailPublicCredential(gmailPublicCredential);
    setLocalGmailServiceAccountCredential(gmailServiceAccountCredential);
  }, [
    serviceAccountKeyData,
    appCredentialData,
    gmailPublicCredential,
    gmailServiceAccountCredential,
  ]);

  const existingCredential =
    localGmailPublicCredential || localGmailServiceAccountCredential;
  if (existingCredential) {
    return (
      <div>
        <div className="mt-4">
          <div className="py-3 px-4 bg-blue-50/30 dark:bg-blue-900/5 rounded mb-4 flex items-start">
            <FiCheck className="text-blue-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0" />
            <div className="flex-1">
              <span className="font-medium block">Authentication Complete</span>
              <p className="text-sm mt-1 text-text-500 dark:text-text-400 break-words">
                Your Gmail credentials have been successfully uploaded and
                authenticated.
              </p>
            </div>
          </div>
          <Section flexDirection="row" justifyContent="between" height="fit">
            <Button
              variant="danger"
              onClick={async () => {
                handleRevokeAccess(
                  connectorExists,
                  existingCredential,
                  refreshCredentials
                );
              }}
            >
              Revoke Access
            </Button>
            {buildMode && onCredentialCreated && (
              <Button onClick={() => onCredentialCreated(existingCredential)}>
                Continue
              </Button>
            )}
          </Section>
        </div>
      </div>
    );
  }

  // If no credentials are uploaded, show message to complete step 1 first
  if (
    !localServiceAccountData?.service_account_email &&
    !localAppCredentialData?.client_id
  ) {
    return (
      <div>
        <SectionHeader>Gmail Authentication</SectionHeader>
        <div className="mt-4">
          <div className="flex items-start py-3 px-4 bg-yellow-50/30 dark:bg-yellow-900/5 rounded">
            <FiAlertTriangle className="text-yellow-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0" />
            <p className="text-sm">
              Please complete Step 1 by uploading either OAuth credentials or a
              Service Account key before proceeding with authentication.
            </p>
          </div>
        </div>
      </div>
    );
  }

  if (localServiceAccountData?.service_account_email) {
    return (
      <div>
        <div className="mt-4">
          <Formik
            initialValues={{
              google_primary_admin: user?.email || "",
            }}
            validationSchema={Yup.object().shape({
              google_primary_admin: Yup.string()
                .email("Must be a valid email")
                .required("Required"),
            })}
            onSubmit={async (values, formikHelpers) => {
              formikHelpers.setSubmitting(true);
              try {
                const response = await fetch(
                  "/api/manage/admin/connector/gmail/service-account-credential",
                  {
                    method: "PUT",
                    headers: {
                      "Content-Type": "application/json",
                    },
                    body: JSON.stringify({
                      google_primary_admin: values.google_primary_admin,
                    }),
                  }
                );

                if (response.ok) {
                  toast.success(
                    "Successfully created service account credential"
                  );
                  refreshCredentials();
                } else {
                  const errorMsg = await response.text();
                  toast.error(
                    `Failed to create service account credential - ${errorMsg}`
                  );
                }
              } catch (error) {
                toast.error(
                  `Failed to create service account credential - ${error}`
                );
              } finally {
                formikHelpers.setSubmitting(false);
              }
            }}
          >
            {({ isSubmitting }) => (
              <Form>
                <TextFormField
                  name="google_primary_admin"
                  label="Primary Admin Email:"
                  subtext="Enter the email of an admin/owner of the Google Organization that owns the Gmail account(s) you want to index."
                />
                <div className="flex">
                  <Button disabled={isSubmitting} type="submit">
                    {isSubmitting ? "Creating..." : "Create Credential"}
                  </Button>
                </div>
              </Form>
            )}
          </Formik>
        </div>
      </div>
    );
  }

  if (localAppCredentialData?.client_id) {
    return (
      <div>
        <div className="bg-background-50/30 dark:bg-background-900/20 rounded mb-4">
          <p className="text-sm">
            Next, you need to authenticate with Gmail via OAuth. This gives us
            read access to the emails you have access to in your Gmail account.
          </p>
        </div>
        <Button
          disabled={isAuthenticating}
          onClick={async () => {
            setIsAuthenticating(true);
            try {
              if (buildMode) {
                Cookies.set(CRAFT_OAUTH_COOKIE_NAME, "true", {
                  path: "/",
                });
              }
              const [authUrl, errorMsg] = await setupGmailOAuth({
                isAdmin: true,
              });

              if (authUrl) {
                onOAuthRedirect?.();
                router.push(authUrl as Route);
              } else {
                toast.error(errorMsg);
                setIsAuthenticating(false);
              }
            } catch (error) {
              toast.error(`Failed to authenticate with Gmail - ${error}`);
              setIsAuthenticating(false);
            }
          }}
        >
          {isAuthenticating ? "Authenticating..." : "Authenticate with Gmail"}
        </Button>
      </div>
    );
  }

  // This code path should not be reached with the new conditions above
  return null;
};


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/gmail/GmailPage.tsx
================================================
"use client";

import React from "react";
import { ErrorCallout } from "@/components/ErrorCallout";
import { LoadingAnimation } from "@/components/Loading";
import { toast } from "@/hooks/useToast";
import { CCPairBasicInfo, ValidSources } from "@/lib/types";
import {
  Credential,
  GmailCredentialJson,
  GmailServiceAccountCredentialJson,
} from "@/lib/connectors/credentials";
import { GmailAuthSection, GmailJsonUploadSection } from "./Credential";
import { usePublicCredentials, useBasicConnectorStatus } from "@/lib/hooks";
import Title from "@/components/ui/title";
import { useUser } from "@/providers/UserProvider";
import {
  useGoogleAppCredential,
  useGoogleServiceAccountKey,
  useGoogleCredentials,
  useConnectorsByCredentialId,
  checkCredentialsFetched,
  filterUploadedCredentials,
  checkConnectorsExist,
  refreshAllGoogleData,
} from "@/lib/googleConnector";

interface GmailMainProps {
  buildMode?: boolean;
  onOAuthRedirect?: () => void;
  onCredentialCreated?: (
    credential: Credential<
      GmailCredentialJson | GmailServiceAccountCredentialJson
    >
  ) => void;
}

export const GmailMain = ({
  buildMode = false,
  onOAuthRedirect,
  onCredentialCreated,
}: GmailMainProps) => {
  const { isAdmin, user } = useUser();

  const {
    data: appCredentialData,
    isLoading: isAppCredentialLoading,
    error: isAppCredentialError,
  } = useGoogleAppCredential("gmail");

  const {
    data: serviceAccountKeyData,
    isLoading: isServiceAccountKeyLoading,
    error: isServiceAccountKeyError,
  } = useGoogleServiceAccountKey("gmail");

  const {
    data: connectorIndexingStatuses,
    isLoading: isConnectorIndexingStatusesLoading,
    error: connectorIndexingStatusesError,
  } = useBasicConnectorStatus();

  const {
    data: credentialsData,
    isLoading: isCredentialsLoading,
    error: credentialsError,
    refreshCredentials,
  } = usePublicCredentials();

  const {
    data: gmailCredentials,
    isLoading: isGmailCredentialsLoading,
    error: gmailCredentialsError,
  } = useGoogleCredentials(ValidSources.Gmail);

  const { credential_id, uploadedCredentials } =
    filterUploadedCredentials(gmailCredentials);

  const {
    data: gmailConnectors,
    isLoading: isGmailConnectorsLoading,
    error: gmailConnectorsError,
    refreshConnectorsByCredentialId,
  } = useConnectorsByCredentialId(credential_id);

  const {
    appCredentialSuccessfullyFetched,
    serviceAccountKeySuccessfullyFetched,
  } = checkCredentialsFetched(
    appCredentialData,
    isAppCredentialError,
    serviceAccountKeyData,
    isServiceAccountKeyError
  );

  const handleRefresh = () => {
    refreshCredentials();
    refreshConnectorsByCredentialId();
    refreshAllGoogleData(ValidSources.Gmail);
  };

  if (
    (!appCredentialSuccessfullyFetched && isAppCredentialLoading) ||
    (!serviceAccountKeySuccessfullyFetched && isServiceAccountKeyLoading) ||
    (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
    (!credentialsData && isCredentialsLoading) ||
    (!gmailCredentials && isGmailCredentialsLoading) ||
    (!gmailConnectors && isGmailConnectorsLoading)
  ) {
    return (
      <div className="mx-auto">
        <LoadingAnimation text="" />
      </div>
    );
  }

  if (credentialsError || !credentialsData) {
    return <ErrorCallout errorTitle="Failed to load credentials." />;
  }

  if (gmailCredentialsError || !gmailCredentials) {
    return <ErrorCallout errorTitle="Failed to load Gmail credentials." />;
  }

  if (connectorIndexingStatusesError || !connectorIndexingStatuses) {
    return <ErrorCallout errorTitle="Failed to load connectors." />;
  }

  if (
    !appCredentialSuccessfullyFetched ||
    !serviceAccountKeySuccessfullyFetched
  ) {
    return (
      <ErrorCallout errorTitle="Error loading Gmail app credentials. Contact an administrator." />
    );
  }

  if (gmailConnectorsError) {
    return (
      <ErrorCallout errorTitle="Failed to load Gmail associated connectors." />
    );
  }

  const connectorExistsFromCredential = checkConnectorsExist(gmailConnectors);

  const gmailPublicUploadedCredential:
    | Credential<GmailCredentialJson>
    | undefined = credentialsData.find(
    (credential) =>
      credential.credential_json?.google_tokens &&
      credential.admin_public &&
      credential.source === "gmail" &&
      credential.credential_json.authentication_method !== "oauth_interactive"
  );

  const gmailServiceAccountCredential:
    | Credential<GmailServiceAccountCredentialJson>
    | undefined = credentialsData.find(
    (credential) =>
      credential.credential_json?.google_service_account_key &&
      credential.source === "gmail"
  );

  const gmailConnectorIndexingStatuses: CCPairBasicInfo[] =
    connectorIndexingStatuses.filter(
      (connectorIndexingStatus) => connectorIndexingStatus.source === "gmail"
    );

  const connectorExists =
    connectorExistsFromCredential || gmailConnectorIndexingStatuses.length > 0;

  const hasUploadedCredentials =
    Boolean(appCredentialData?.client_id) ||
    Boolean(serviceAccountKeyData?.service_account_email);

  return (
    <>
      <Title className="mb-2 mt-6 ml-auto mr-auto">
        Step 1: Provide your Credentials
      </Title>
      <GmailJsonUploadSection
        appCredentialData={appCredentialData}
        serviceAccountCredentialData={serviceAccountKeyData}
        isAdmin={isAdmin}
        onSuccess={handleRefresh}
        existingAuthCredential={Boolean(
          gmailPublicUploadedCredential || gmailServiceAccountCredential
        )}
      />

      {isAdmin && hasUploadedCredentials && (
        <>
          <Title className="mb-2 mt-6 ml-auto mr-auto">
            Step 2: Authenticate with Onyx
          </Title>
          <GmailAuthSection
            refreshCredentials={handleRefresh}
            gmailPublicCredential={gmailPublicUploadedCredential}
            gmailServiceAccountCredential={gmailServiceAccountCredential}
            appCredentialData={appCredentialData}
            serviceAccountKeyData={serviceAccountKeyData}
            connectorExists={connectorExists}
            user={user}
            buildMode={buildMode}
            onOAuthRedirect={onOAuthRedirect}
            // Necessary prop drilling for build mode v1.
            // TODO: either integrate gmail into normal flow
            // or create a build-mode specific Gmail flow
            onCredentialCreated={onCredentialCreated}
          />
        </>
      )}
    </>
  );
};


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/utils/files.ts
================================================
import { toast } from "@/hooks/useToast";
import { createConnector, runConnector } from "@/lib/connector";
import { createCredential, linkCredential } from "@/lib/credential";
import { FileConfig } from "@/lib/connectors/connectors";
import { AccessType, ValidSources } from "@/lib/types";

export const submitFiles = async (
  selectedFiles: File[],
  name: string,
  access_type: string,
  groups?: number[]
) => {
  const formData = new FormData();

  selectedFiles.forEach((file) => {
    formData.append("files", file);
  });

  const response = await fetch("/api/manage/admin/connector/file/upload", {
    method: "POST",
    body: formData,
  });
  const responseJson = await response.json();
  if (!response.ok) {
    toast.error(`Unable to upload files - ${responseJson.detail}`);
    return;
  }

  const filePaths = responseJson.file_paths as string[];
  const fileNames = responseJson.file_names as string[];
  const zipMetadataFileId = responseJson.zip_metadata_file_id as string | null;

  const [connectorErrorMsg, connector] = await createConnector<FileConfig>({
    name: "FileConnector-" + Date.now(),
    source: ValidSources.File,
    input_type: "load_state",
    connector_specific_config: {
      file_locations: filePaths,
      file_names: fileNames,
      zip_metadata_file_id: zipMetadataFileId,
    },
    refresh_freq: null,
    prune_freq: null,
    indexing_start: null,
    access_type: access_type,
    groups: groups,
  });
  if (connectorErrorMsg || !connector) {
    toast.error(`Unable to create connector - ${connectorErrorMsg}`);
    return;
  }

  // Since there is no "real" credential associated with a file connector
  // we create a dummy one here so that we can associate the CC Pair with a
  // user. This is needed since the user for a CC Pair is found via the credential
  // associated with it.
  const createCredentialResponse = await createCredential({
    credential_json: {},
    admin_public: true,
    source: ValidSources.File,
    curator_public: true,
    groups: groups,
    name,
  });
  if (!createCredentialResponse.ok) {
    const errorMsg = await createCredentialResponse.text();
    toast.error(`Error creating credential for CC Pair - ${errorMsg}`);
    return false;
  }
  const credentialId = (await createCredentialResponse.json()).id;

  const credentialResponse = await linkCredential(
    connector.id,
    credentialId,
    name,
    access_type as AccessType,
    groups
  );
  if (!credentialResponse.ok) {
    const credentialResponseJson = await credentialResponse.json();
    toast.error(
      `Unable to link connector to credential - ${credentialResponseJson.detail}`
    );
    return false;
  }

  const runConnectorErrorMsg = await runConnector(connector.id, [0]);
  if (runConnectorErrorMsg) {
    toast.error(`Unable to run connector - ${runConnectorErrorMsg}`);
    return false;
  }

  toast.success("Successfully uploaded files!");
  return true;
};


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/utils/google_site.ts
================================================
import { toast } from "@/hooks/useToast";
import { createConnector, runConnector } from "@/lib/connector";
import { linkCredential } from "@/lib/credential";
import { GoogleSitesConfig } from "@/lib/connectors/connectors";
import { ValidSources } from "@/lib/types";

export const submitGoogleSite = async (
  selectedFiles: File[],
  base_url: any,
  refreshFreq: number,
  pruneFreq: number,
  indexingStart: Date,
  access_type: string,
  groups: number[],
  name?: string
) => {
  const uploadCreateAndTriggerConnector = async () => {
    const formData = new FormData();

    selectedFiles.forEach((file) => {
      formData.append("files", file);
    });

    const response = await fetch(
      "/api/manage/admin/connector/file/upload?unzip=false",
      {
        method: "POST",
        body: formData,
      }
    );
    const responseJson = await response.json();
    if (!response.ok) {
      toast.error(`Unable to upload files - ${responseJson.detail}`);
      return false;
    }

    const filePaths = responseJson.file_paths as string[];
    if (!filePaths || filePaths.length === 0) {
      toast.error(
        "File upload was successful, but no file path was returned. Cannot create connector."
      );
      return false;
    }

    const filePath = filePaths[0];
    if (filePath === undefined) {
      toast.error(
        "File upload was successful, but file path is undefined. Cannot create connector."
      );
      return false;
    }

    const [connectorErrorMsg, connector] =
      await createConnector<GoogleSitesConfig>({
        name: name ? name : `GoogleSitesConnector-${base_url}`,
        source: ValidSources.GoogleSites,
        input_type: "load_state",
        connector_specific_config: {
          base_url: base_url,
          zip_path: filePath,
        },
        access_type: access_type,
        refresh_freq: refreshFreq,
        prune_freq: pruneFreq,
        indexing_start: indexingStart,
      });
    if (connectorErrorMsg || !connector) {
      toast.error(`Unable to create connector - ${connectorErrorMsg}`);
      return false;
    }

    const credentialResponse = await linkCredential(
      connector.id,
      0,
      base_url,
      undefined,
      groups
    );
    if (!credentialResponse.ok) {
      const credentialResponseJson = await credentialResponse.json();
      toast.error(
        `Unable to link connector to credential - ${credentialResponseJson.detail}`
      );
      return false;
    }

    const runConnectorErrorMsg = await runConnector(connector.id, [0]);
    if (runConnectorErrorMsg) {
      toast.error(`Unable to run connector - ${runConnectorErrorMsg}`);
      return false;
    }
    toast.success("Successfully created Google Site connector!");
    return true;
  };

  try {
    const response = await uploadCreateAndTriggerConnector();
    return response;
  } catch (e) {
    return false;
  }
};


================================================
FILE: web/src/app/admin/connectors/[connector]/pages/utils/hooks.ts
================================================
import { GmailConfig } from "@/lib/connectors/connectors";

export const gmailConnectorNameBuilder = (values: GmailConfig) =>
  "GmailConnector";

import { usePublicCredentials } from "@/lib/hooks";
import {
  Credential,
  GmailCredentialJson,
  GmailServiceAccountCredentialJson,
  GoogleDriveCredentialJson,
  GoogleDriveServiceAccountCredentialJson,
} from "@/lib/connectors/credentials";

export const useGmailCredentials = (connector: string) => {
  const {
    data: credentialsData,
    isLoading: isCredentialsLoading,
    error: credentialsError,
    refreshCredentials,
  } = usePublicCredentials();

  const gmailPublicCredential: Credential<GmailCredentialJson> | undefined =
    credentialsData?.find(
      (credential) =>
        credential.credential_json?.google_tokens &&
        credential.admin_public &&
        credential.source === connector
    );

  const gmailServiceAccountCredential:
    | Credential<GmailServiceAccountCredentialJson>
    | undefined = credentialsData?.find(
    (credential) =>
      credential.credential_json?.google_service_account_key &&
      credential.admin_public &&
      credential.source === connector
  );

  const liveGmailCredential =
    gmailPublicCredential || gmailServiceAccountCredential;

  return {
    liveGmailCredential: liveGmailCredential,
  };
};

export const useGoogleDriveCredentials = (connector: string) => {
  const { data: credentialsData } = usePublicCredentials();

  const googleDrivePublicCredential:
    | Credential<GoogleDriveCredentialJson>
    | undefined = credentialsData?.find(
    (credential) =>
      credential.credential_json?.google_tokens &&
      credential.admin_public &&
      credential.source === connector
  );

  const googleDriveServiceAccountCredential:
    | Credential<GoogleDriveServiceAccountCredentialJson>
    | undefined = credentialsData?.find(
    (credential) =>
      credential.credential_json?.google_service_account_key &&
      credential.admin_public &&
      credential.source === connector
  );

  const liveGDriveCredential =
    googleDrivePublicCredential || googleDriveServiceAccountCredential;

  return {
    liveGDriveCredential: liveGDriveCredential,
  };
};


================================================
FILE: web/src/app/admin/debug/page.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ThreeDotsLoader } from "@/components/Loading";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import { Button, Text } from "@opal/components";
import { Card } from "@/components/ui/card";
import { markdown } from "@opal/utils";
import Spacer from "@/refresh-components/Spacer";
import { Spinner } from "@/components/Spinner";
import { SvgDownloadCloud } from "@opal/icons";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.DEBUG;

function Main() {
  const [categories, setCategories] = useState<string[]>([]);
  const [isLoading, setIsLoading] = useState(true);
  const [isDownloading, setIsDownloading] = useState(false);

  useEffect(() => {
    const fetchCategories = async () => {
      try {
        const response = await fetch("/api/admin/long-term-logs");
        if (!response.ok) throw new Error("Failed to fetch categories");
        const data = await response.json();
        setCategories(data);
      } catch (error) {
        console.error("Error fetching categories:", error);
      } finally {
        setIsLoading(false);
      }
    };

    fetchCategories();
  }, []);

  const handleDownload = async (category: string) => {
    setIsDownloading(true);
    try {
      const response = await fetch(
        `/api/admin/long-term-logs/${category}/download`
      );
      if (!response.ok) throw new Error("Failed to download logs");

      const blob = await response.blob();
      const url = window.URL.createObjectURL(blob);

      const a = document.createElement("a");
      a.href = url;
      a.download = `${category}-logs.zip`;
      document.body.appendChild(a);
      a.click();
      window.URL.revokeObjectURL(url);
      document.body.removeChild(a);
    } catch (error) {
      console.error("Error downloading logs:", error);
    } finally {
      setIsDownloading(false);
    }
  };

  if (isLoading) {
    return <ThreeDotsLoader />;
  }

  return (
    <>
      {isDownloading && <Spinner />}
      <div className="mb-8">
        <Text as="p">
          {markdown(
            "**Debug Logs** provide detailed information about system operations and events. You can download logs for each category to analyze system behavior or troubleshoot issues."
          )}
        </Text>
        <Spacer rem={0.75} />

        {categories.length > 0 && (
          <Card className="mt-4">
            <Table>
              <TableHeader>
                <TableRow>
                  <TableHead>Category</TableHead>
                  <TableHead>Actions</TableHead>
                </TableRow>
              </TableHeader>
              <TableBody>
                {categories.map((category) => (
                  <TableRow
                    key={category}
                    className="hover:bg-transparent dark:hover:bg-transparent"
                  >
                    <TableCell className="font-medium">{category}</TableCell>
                    <TableCell>
                      <Button
                        prominence="secondary"
                        onClick={() => handleDownload(category)}
                        icon={SvgDownloadCloud}
                      >
                        Download Logs
                      </Button>
                    </TableCell>
                  </TableRow>
                ))}
              </TableBody>
            </Table>
          </Card>
        )}
      </div>
    </>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/discord-bot/BotConfigCard.tsx
================================================
"use client";

import { useState } from "react";
import { Section } from "@/layouts/general-layouts";
import Text from "@/refresh-components/texts/Text";
import Card from "@/refresh-components/cards/Card";
import { Button } from "@opal/components";
import { Badge } from "@/components/ui/badge";
import PasswordInputTypeIn from "@/refresh-components/inputs/PasswordInputTypeIn";
import { ThreeDotsLoader } from "@/components/Loading";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import {
  useDiscordBotConfig,
  useDiscordGuilds,
} from "@/app/admin/discord-bot/hooks";
import { createBotConfig, deleteBotConfig } from "@/app/admin/discord-bot/lib";
import { toast } from "@/hooks/useToast";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import { getFormattedDateTime } from "@/lib/dateUtils";

export function BotConfigCard() {
  const {
    data: botConfig,
    isLoading,
    isManaged,
    refreshBotConfig,
  } = useDiscordBotConfig();
  const { data: guilds } = useDiscordGuilds();

  const [botToken, setBotToken] = useState("");
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [showDeleteConfirm, setShowDeleteConfirm] = useState(false);

  // Don't render anything if managed externally (Cloud or env var)
  if (isManaged) {
    return null;
  }

  // Show loading while fetching initial state
  if (isLoading) {
    return (
      <Card>
        <Section
          flexDirection="row"
          justifyContent="between"
          alignItems="center"
        >
          <Text mainContentEmphasis text05>
            Bot Token
          </Text>
        </Section>
        <ThreeDotsLoader />
      </Card>
    );
  }

  const isConfigured = botConfig?.configured ?? false;
  const hasServerConfigs = (guilds?.length ?? 0) > 0;

  const handleSaveToken = async () => {
    if (!botToken.trim()) {
      toast.error("Please enter a bot token");
      return;
    }

    setIsSubmitting(true);
    try {
      await createBotConfig(botToken.trim());
      setBotToken("");
      refreshBotConfig();
      toast.success("Bot token saved successfully");
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to save bot token"
      );
    } finally {
      setIsSubmitting(false);
    }
  };

  const handleDeleteToken = async () => {
    setIsSubmitting(true);
    try {
      await deleteBotConfig();
      refreshBotConfig();
      toast.success("Bot token deleted");
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to delete bot token"
      );
    } finally {
      setIsSubmitting(false);
      setShowDeleteConfirm(false);
    }
  };

  return (
    <>
      {showDeleteConfirm && (
        <ConfirmEntityModal
          danger
          entityType="Discord bot token"
          entityName="Discord Bot Token"
          onClose={() => setShowDeleteConfirm(false)}
          onSubmit={handleDeleteToken}
          additionalDetails="This will disconnect your Discord bot. You will need to re-enter the token to use the bot again."
        />
      )}
      <Card>
        <Section flexDirection="row" justifyContent="between">
          <Section flexDirection="row" gap={0.5} width="fit">
            <Text mainContentEmphasis text05>
              Bot Token
            </Text>
            {isConfigured ? (
              <Badge variant="success">Configured</Badge>
            ) : (
              <Badge variant="secondary">Not Configured</Badge>
            )}
          </Section>
          {isConfigured && (
            <SimpleTooltip
              tooltip={
                hasServerConfigs ? "Delete server configs first" : undefined
              }
              disabled={!hasServerConfigs}
            >
              <Button
                disabled={isSubmitting || hasServerConfigs}
                variant="danger"
                onClick={() => setShowDeleteConfirm(true)}
              >
                Delete Discord Token
              </Button>
            </SimpleTooltip>
          )}
        </Section>

        {isConfigured ? (
          <Section flexDirection="column" alignItems="start" gap={0.5}>
            <Text text03 secondaryBody>
              Your Discord bot token is configured.
              {botConfig?.created_at && (
                <>
                  {" "}
                  Added {getFormattedDateTime(new Date(botConfig.created_at))}.
                </>
              )}
            </Text>
            <Text text03 secondaryBody>
              To change the token, delete the current one and add a new one.
            </Text>
          </Section>
        ) : (
          <Section flexDirection="column" alignItems="start" gap={0.75}>
            <Text text03 secondaryBody>
              Enter your Discord bot token to enable the bot. You can get this
              from the Discord Developer Portal.
            </Text>
            <Section flexDirection="row" alignItems="end" gap={0.5}>
              <PasswordInputTypeIn
                value={botToken}
                onChange={(e) => setBotToken(e.target.value)}
                placeholder="Enter bot token..."
                disabled={isSubmitting}
                className="flex-1"
              />
              <Button
                disabled={isSubmitting || !botToken.trim()}
                onClick={handleSaveToken}
              >
                {isSubmitting ? "Saving..." : "Save Token"}
              </Button>
            </Section>
          </Section>
        )}
      </Card>
    </>
  );
}


================================================
FILE: web/src/app/admin/discord-bot/DiscordGuildsTable.tsx
================================================
"use client";

import { useState } from "react";
import { useRouter } from "next/navigation";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import { Badge } from "@/components/ui/badge";
import { DeleteButton } from "@/components/DeleteButton";
import { Button } from "@opal/components";
import Switch from "@/refresh-components/inputs/Switch";
import { SvgEdit, SvgServer } from "@opal/icons";
import EmptyMessage from "@/refresh-components/EmptyMessage";
import { DiscordGuildConfig } from "@/app/admin/discord-bot/types";
import {
  deleteGuildConfig,
  updateGuildConfig,
} from "@/app/admin/discord-bot/lib";
import { toast } from "@/hooks/useToast";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";

interface Props {
  guilds: DiscordGuildConfig[];
  onRefresh: () => void;
}

export function DiscordGuildsTable({ guilds, onRefresh }: Props) {
  const router = useRouter();
  const [guildToDelete, setGuildToDelete] = useState<DiscordGuildConfig | null>(
    null
  );
  const [updatingGuildIds, setUpdatingGuildIds] = useState<Set<number>>(
    new Set()
  );

  const handleDelete = async (guildId: number) => {
    try {
      await deleteGuildConfig(guildId);
      onRefresh();
      toast.success("Server configuration deleted");
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to delete server config"
      );
    } finally {
      setGuildToDelete(null);
    }
  };

  const handleToggleEnabled = async (guild: DiscordGuildConfig) => {
    if (!guild.guild_id) {
      toast.error("Server must be registered before it can be enabled");
      return;
    }

    setUpdatingGuildIds((prev) => new Set(prev).add(guild.id));
    try {
      await updateGuildConfig(guild.id, {
        enabled: !guild.enabled,
        default_persona_id: guild.default_persona_id,
      });
      onRefresh();
      toast.success(`Server ${!guild.enabled ? "enabled" : "disabled"}`);
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to update server"
      );
    } finally {
      setUpdatingGuildIds((prev) => {
        const next = new Set(prev);
        next.delete(guild.id);
        return next;
      });
    }
  };

  if (guilds.length === 0) {
    return (
      <EmptyMessage
        icon={SvgServer}
        title="No Discord servers configured yet"
        description="Create a server configuration to get started."
      />
    );
  }

  return (
    <>
      {guildToDelete && (
        <ConfirmEntityModal
          danger
          entityType="Discord server configuration"
          entityName={guildToDelete.guild_name || `Server #${guildToDelete.id}`}
          onClose={() => setGuildToDelete(null)}
          onSubmit={() => handleDelete(guildToDelete.id)}
          additionalDetails="This will remove all settings for this Discord server."
        />
      )}
      <Table>
        <TableHeader>
          <TableRow>
            <TableHead>Server</TableHead>
            <TableHead>Status</TableHead>
            <TableHead>Registered</TableHead>
            <TableHead>Enabled</TableHead>
            <TableHead>Actions</TableHead>
          </TableRow>
        </TableHeader>
        <TableBody>
          {guilds.map((guild) => (
            <TableRow key={guild.id}>
              <TableCell>
                <Button
                  disabled={!guild.guild_id}
                  prominence="internal"
                  onClick={() => router.push(`/admin/discord-bot/${guild.id}`)}
                  icon={SvgEdit}
                >
                  {guild.guild_name || `Server #${guild.id}`}
                </Button>
              </TableCell>
              <TableCell>
                {guild.guild_id ? (
                  <Badge variant="success">Registered</Badge>
                ) : (
                  <Badge variant="secondary">Pending</Badge>
                )}
              </TableCell>
              <TableCell>
                {guild.registered_at
                  ? new Date(guild.registered_at).toLocaleDateString()
                  : "-"}
              </TableCell>
              <TableCell>
                {!guild.guild_id ? (
                  "-"
                ) : (
                  <Switch
                    checked={guild.enabled}
                    onCheckedChange={() => handleToggleEnabled(guild)}
                    disabled={updatingGuildIds.has(guild.id)}
                  />
                )}
              </TableCell>
              <TableCell>
                <DeleteButton onClick={() => setGuildToDelete(guild)} />
              </TableCell>
            </TableRow>
          ))}
        </TableBody>
      </Table>
    </>
  );
}


================================================
FILE: web/src/app/admin/discord-bot/[guild-id]/DiscordChannelsTable.tsx
================================================
"use client";

import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import Switch from "@/refresh-components/inputs/Switch";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import EmptyMessage from "@/refresh-components/EmptyMessage";
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";
import {
  DiscordChannelConfig,
  DiscordChannelType,
} from "@/app/admin/discord-bot/types";
import { SvgHash, SvgBubbleText, SvgLock } from "@opal/icons";
import { IconProps } from "@opal/types";
import { Persona } from "@/app/admin/agents/interfaces";

function getChannelIcon(
  channelType: DiscordChannelType,
  isPrivate: boolean = false
): React.ComponentType<IconProps> {
  // TODO: Need different icon for private channel vs private forum
  if (isPrivate) {
    return SvgLock;
  }
  switch (channelType) {
    case "forum":
      return SvgBubbleText;
    case "text":
    default:
      return SvgHash;
  }
}

interface Props {
  channels: DiscordChannelConfig[];
  personas: Persona[];
  onChannelUpdate: (
    channelId: number,
    field:
      | "enabled"
      | "require_bot_invocation"
      | "thread_only_mode"
      | "persona_override_id",
    value: boolean | number | null
  ) => void;
  disabled?: boolean;
}

export function DiscordChannelsTable({
  channels,
  personas,
  onChannelUpdate,
  disabled = false,
}: Props) {
  if (channels.length === 0) {
    return (
      <EmptyMessage
        title="No channels configured"
        description="Run !sync-channels in Discord to add channels."
      />
    );
  }

  return (
    <Table>
      <TableHeader>
        <TableRow className="[&>th]:whitespace-nowrap">
          <TableHead>Channel</TableHead>
          <TableHead>Enabled</TableHead>
          <TableHead>Require @mention</TableHead>
          <TableHead>Thread Only Mode</TableHead>
          <TableHead>Agent Override</TableHead>
        </TableRow>
      </TableHeader>
      <TableBody>
        {channels.map((channel) => {
          const ChannelIcon = getChannelIcon(
            channel.channel_type,
            channel.is_private
          );
          return (
            <TableRow key={channel.id}>
              <TableCell>
                <Section
                  flexDirection="row"
                  justifyContent="start"
                  gap={0.5}
                  width="fit"
                >
                  <ChannelIcon width={16} height={16} />
                  <Text text04 mainUiBody>
                    {channel.channel_name}
                  </Text>
                </Section>
              </TableCell>
              <TableCell>
                <Switch
                  checked={channel.enabled}
                  onCheckedChange={(checked) =>
                    onChannelUpdate(channel.id, "enabled", checked)
                  }
                  disabled={disabled}
                />
              </TableCell>
              <TableCell>
                <Switch
                  checked={channel.require_bot_invocation}
                  onCheckedChange={(checked) =>
                    onChannelUpdate(
                      channel.id,
                      "require_bot_invocation",
                      checked
                    )
                  }
                  disabled={disabled}
                />
              </TableCell>
              <TableCell>
                {channel.channel_type !== "forum" && (
                  <Switch
                    checked={channel.thread_only_mode}
                    onCheckedChange={(checked) =>
                      onChannelUpdate(channel.id, "thread_only_mode", checked)
                    }
                    disabled={disabled}
                  />
                )}
              </TableCell>
              <TableCell>
                <InputSelect
                  value={channel.persona_override_id?.toString() ?? "default"}
                  onValueChange={(value: string) =>
                    onChannelUpdate(
                      channel.id,
                      "persona_override_id",
                      value === "default" ? null : parseInt(value)
                    )
                  }
                  disabled={disabled}
                >
                  <InputSelect.Trigger placeholder="-" />
                  <InputSelect.Content>
                    <InputSelect.Item value="default">-</InputSelect.Item>
                    {personas.map((persona) => (
                      <InputSelect.Item
                        key={persona.id}
                        value={persona.id.toString()}
                      >
                        {persona.name}
                      </InputSelect.Item>
                    ))}
                  </InputSelect.Content>
                </InputSelect>
              </TableCell>
            </TableRow>
          );
        })}
      </TableBody>
    </Table>
  );
}


================================================
FILE: web/src/app/admin/discord-bot/[guild-id]/page.tsx
================================================
"use client";

import { use, useState, useEffect, useCallback, useMemo } from "react";
import { cn } from "@/lib/utils";
import { ThreeDotsLoader } from "@/components/Loading";
import { ErrorCallout } from "@/components/ErrorCallout";
import { toast } from "@/hooks/useToast";
import { Section } from "@/layouts/general-layouts";
import { ContentAction } from "@opal/layouts";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import Text from "@/refresh-components/texts/Text";
import Card from "@/refresh-components/cards/Card";
import { Callout } from "@/components/ui/callout";
import Message from "@/refresh-components/messages/Message";
import { Button } from "@opal/components";
import { SvgServer } from "@opal/icons";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import {
  useDiscordGuild,
  useDiscordChannels,
} from "@/app/admin/discord-bot/hooks";
import {
  updateGuildConfig,
  bulkUpdateChannelConfigs,
} from "@/app/admin/discord-bot/lib";
import { DiscordChannelsTable } from "@/app/admin/discord-bot/[guild-id]/DiscordChannelsTable";
import { DiscordChannelConfig } from "@/app/admin/discord-bot/types";
import { useAdminPersonas } from "@/hooks/useAdminPersonas";
import { Persona } from "@/app/admin/agents/interfaces";

interface Props {
  params: Promise<{ "guild-id": string }>;
}

function GuildDetailContent({
  guildId,
  personas,
  localChannels,
  onChannelUpdate,
  handleEnableAll,
  handleDisableAll,
  disabled,
}: {
  guildId: number;
  personas: Persona[];
  localChannels: DiscordChannelConfig[];
  onChannelUpdate: (
    channelId: number,
    field:
      | "enabled"
      | "require_bot_invocation"
      | "thread_only_mode"
      | "persona_override_id",
    value: boolean | number | null
  ) => void;
  handleEnableAll: () => void;
  handleDisableAll: () => void;
  disabled: boolean;
}) {
  const {
    data: guild,
    isLoading: guildLoading,
    error: guildError,
  } = useDiscordGuild(guildId);
  const { isLoading: channelsLoading, error: channelsError } =
    useDiscordChannels(guildId);

  if (guildLoading) {
    return <ThreeDotsLoader />;
  }

  if (guildError || !guild) {
    return (
      <ErrorCallout
        errorTitle="Failed to load server"
        errorMsg={guildError?.info?.detail || "Server not found"}
      />
    );
  }

  const isRegistered = !!guild.guild_id;

  return (
    <>
      {!isRegistered && (
        <Callout type="notice" title="Waiting for Registration">
          Use the !register command in your Discord server with the registration
          key to complete setup.
        </Callout>
      )}

      <Card variant={disabled ? "disabled" : "primary"}>
        <ContentAction
          title="Channel Configuration"
          description="Run !sync-channels in Discord to update the channel list."
          sizePreset="main-content"
          variant="section"
          rightChildren={
            isRegistered && !channelsLoading && !channelsError ? (
              <Section
                flexDirection="row"
                justifyContent="end"
                alignItems="center"
                width="fit"
                gap={0.5}
              >
                <Button
                  disabled={disabled}
                  prominence="secondary"
                  onClick={handleEnableAll}
                >
                  Enable All
                </Button>
                <Button
                  disabled={disabled}
                  prominence="secondary"
                  onClick={handleDisableAll}
                >
                  Disable All
                </Button>
              </Section>
            ) : undefined
          }
        />

        {!isRegistered ? (
          <Text text03 secondaryBody>
            Channel configuration will be available after the server is
            registered.
          </Text>
        ) : channelsLoading ? (
          <ThreeDotsLoader />
        ) : channelsError ? (
          <ErrorCallout
            errorTitle="Failed to load channels"
            errorMsg={channelsError?.info?.detail || "Could not load channels"}
          />
        ) : (
          <DiscordChannelsTable
            channels={localChannels}
            personas={personas}
            onChannelUpdate={onChannelUpdate}
            disabled={disabled}
          />
        )}
      </Card>
    </>
  );
}

export default function Page({ params }: Props) {
  const unwrappedParams = use(params);
  const guildId = Number(unwrappedParams["guild-id"]);
  const { data: guild, refreshGuild } = useDiscordGuild(guildId);
  const {
    data: channels,
    isLoading: channelsLoading,
    error: channelsError,
    refreshChannels,
  } = useDiscordChannels(guildId);
  const { personas, isLoading: personasLoading } = useAdminPersonas({
    includeDefault: true,
  });
  const [isUpdating, setIsUpdating] = useState(false);

  // Local state for channel configurations
  const [localChannels, setLocalChannels] = useState<DiscordChannelConfig[]>(
    []
  );

  // Track the original server state to detect changes
  const [originalChannels, setOriginalChannels] = useState<
    DiscordChannelConfig[]
  >([]);

  // Sync local state with fetched channels
  useEffect(() => {
    if (channels) {
      setLocalChannels(channels);
      setOriginalChannels(channels);
    }
  }, [channels]);

  // Check if there are unsaved changes
  const hasUnsavedChanges = useMemo(() => {
    for (const local of localChannels) {
      const original = originalChannels.find((c) => c.id === local.id);
      if (!original) return true;
      if (
        local.enabled !== original.enabled ||
        local.require_bot_invocation !== original.require_bot_invocation ||
        local.thread_only_mode !== original.thread_only_mode ||
        local.persona_override_id !== original.persona_override_id
      ) {
        return true;
      }
    }
    return false;
  }, [localChannels, originalChannels]);

  // Get list of changed channels for bulk update
  const getChangedChannels = useCallback(() => {
    const changes: {
      channelConfigId: number;
      update: {
        enabled: boolean;
        require_bot_invocation: boolean;
        thread_only_mode: boolean;
        persona_override_id: number | null;
      };
    }[] = [];

    for (const local of localChannels) {
      const original = originalChannels.find((c) => c.id === local.id);
      if (!original) continue;
      if (
        local.enabled !== original.enabled ||
        local.require_bot_invocation !== original.require_bot_invocation ||
        local.thread_only_mode !== original.thread_only_mode ||
        local.persona_override_id !== original.persona_override_id
      ) {
        changes.push({
          channelConfigId: local.id,
          update: {
            enabled: local.enabled,
            require_bot_invocation: local.require_bot_invocation,
            thread_only_mode: local.thread_only_mode,
            persona_override_id: local.persona_override_id,
          },
        });
      }
    }

    return changes;
  }, [localChannels, originalChannels]);

  const handleChannelUpdate = useCallback(
    (
      channelId: number,
      field:
        | "enabled"
        | "require_bot_invocation"
        | "thread_only_mode"
        | "persona_override_id",
      value: boolean | number | null
    ) => {
      setLocalChannels((prev) =>
        prev.map((channel) =>
          channel.id === channelId ? { ...channel, [field]: value } : channel
        )
      );
    },
    []
  );

  const handleEnableAll = useCallback(() => {
    setLocalChannels((prev) =>
      prev.map((channel) => ({ ...channel, enabled: true }))
    );
  }, []);

  const handleDisableAll = useCallback(() => {
    setLocalChannels((prev) =>
      prev.map((channel) => ({ ...channel, enabled: false }))
    );
  }, []);

  const handleSaveChanges = async () => {
    const changes = getChangedChannels();
    if (changes.length === 0) return;

    setIsUpdating(true);
    try {
      const { succeeded, failed } = await bulkUpdateChannelConfigs(
        guildId,
        changes
      );

      if (failed > 0) {
        toast.error(`Updated ${succeeded} channels, but ${failed} failed`);
        // Refresh to get actual server state when some updates failed
        refreshChannels();
      } else {
        toast.success(
          `Updated ${succeeded} channel${succeeded !== 1 ? "s" : ""}`
        );
        // Update original to match local (avoids flash from refresh)
        setOriginalChannels(localChannels);
      }
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to update channels"
      );
    } finally {
      setIsUpdating(false);
    }
  };

  const handleDefaultPersonaChange = async (personaId: number | null) => {
    if (!guild) return;
    setIsUpdating(true);
    try {
      await updateGuildConfig(guildId, {
        enabled: guild.enabled,
        default_persona_id: personaId,
      });
      refreshGuild();
      toast.success(
        personaId ? "Default agent updated" : "Default agent cleared"
      );
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to update agent"
      );
    } finally {
      setIsUpdating(false);
    }
  };

  const registeredText = guild?.registered_at
    ? `Registered: ${new Date(guild.registered_at).toLocaleString()}`
    : "Pending registration";

  const isRegistered = !!guild?.guild_id;
  const isUpdateDisabled =
    !isRegistered ||
    channelsLoading ||
    !!channelsError ||
    !hasUnsavedChanges ||
    !guild?.enabled ||
    isUpdating;

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={SvgServer}
        title={guild?.guild_name || `Server #${guildId}`}
        description={registeredText}
        backButton
        rightChildren={
          <Button disabled={isUpdateDisabled} onClick={handleSaveChanges}>
            Update Configuration
          </Button>
        }
      />
      <SettingsLayouts.Body>
        {/* Default Persona Selector */}
        <Card variant={!guild?.enabled ? "disabled" : "primary"}>
          <ContentAction
            title="Default Agent"
            description="The agent used by the bot in all channels unless overridden."
            sizePreset="main-content"
            variant="section"
            rightChildren={
              <InputSelect
                value={guild?.default_persona_id?.toString() ?? "default"}
                onValueChange={(value: string) =>
                  handleDefaultPersonaChange(
                    value === "default" ? null : parseInt(value)
                  )
                }
                disabled={isUpdating || !guild?.enabled || personasLoading}
              >
                <InputSelect.Trigger placeholder="Select agent" />
                <InputSelect.Content>
                  <InputSelect.Item value="default">
                    Default Agent
                  </InputSelect.Item>
                  {personas.map((persona) => (
                    <InputSelect.Item
                      key={persona.id}
                      value={persona.id.toString()}
                    >
                      {persona.name}
                    </InputSelect.Item>
                  ))}
                </InputSelect.Content>
              </InputSelect>
            }
          />
        </Card>

        <GuildDetailContent
          guildId={guildId}
          personas={personas}
          localChannels={localChannels}
          onChannelUpdate={handleChannelUpdate}
          handleEnableAll={handleEnableAll}
          handleDisableAll={handleDisableAll}
          disabled={!guild?.enabled}
        />

        {/* Unsaved changes indicator - sticky at bottom, centered in content area */}
        <div
          className={cn(
            "sticky z-toast bottom-4 w-fit mx-auto transition-all duration-300 ease-in-out",
            hasUnsavedChanges &&
              isRegistered &&
              !channelsLoading &&
              guild?.enabled
              ? "opacity-100 translate-y-0"
              : "opacity-0 translate-y-4 pointer-events-none"
          )}
        >
          <Message
            warning
            text="You have unsaved changes"
            description="Click Update to save them."
            close={false}
          />
        </div>
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/discord-bot/hooks.ts
================================================
"use client";

import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import {
  DiscordBotConfig,
  DiscordGuildConfig,
  DiscordChannelConfig,
} from "@/app/admin/discord-bot/types";

const BASE_URL = "/api/manage/admin/discord-bot";

/**
 * Custom fetcher for bot config that handles 403 specially.
 * 403 means bot config is managed externally (Cloud or env var).
 */
async function botConfigFetcher(url: string): Promise<DiscordBotConfig | null> {
  const res = await fetch(url);

  if (res.status === 403) {
    // Bot config is managed externally - return null to indicate not accessible
    return null;
  }

  if (!res.ok) {
    throw new Error("Failed to fetch bot config");
  }

  return res.json();
}

/**
 * Hook for bot config. Returns null when managed externally (Cloud/env var).
 */
export function useDiscordBotConfig() {
  const url = `${BASE_URL}/config`;
  const swrResponse = useSWR<DiscordBotConfig | null>(url, botConfigFetcher);
  return {
    ...swrResponse,
    // null = managed externally (403), undefined = loading
    isManaged: swrResponse.data === null,
    refreshBotConfig: () => swrResponse.mutate(),
  };
}

export function useDiscordGuilds() {
  const url = `${BASE_URL}/guilds`;
  const swrResponse = useSWR<DiscordGuildConfig[]>(url, errorHandlingFetcher);
  return {
    ...swrResponse,
    refreshGuilds: () => swrResponse.mutate(),
  };
}

export function useDiscordGuild(configId: number) {
  const url = `${BASE_URL}/guilds/${configId}`;
  const swrResponse = useSWR<DiscordGuildConfig>(url, errorHandlingFetcher);
  return {
    ...swrResponse,
    refreshGuild: () => swrResponse.mutate(),
  };
}

export function useDiscordChannels(guildConfigId: number) {
  const url = guildConfigId
    ? `${BASE_URL}/guilds/${guildConfigId}/channels`
    : null;
  const swrResponse = useSWR<DiscordChannelConfig[]>(url, errorHandlingFetcher);
  return {
    ...swrResponse,
    refreshChannels: () => swrResponse.mutate(),
  };
}


================================================
FILE: web/src/app/admin/discord-bot/lib.ts
================================================
import {
  DiscordBotConfig,
  DiscordGuildConfig,
  DiscordGuildConfigCreateResponse,
  DiscordGuildConfigUpdate,
  DiscordChannelConfig,
  DiscordChannelConfigUpdate,
} from "@/app/admin/discord-bot/types";

const BASE_URL = "/api/manage/admin/discord-bot";

// === Bot Config (Self-hosted only) ===

export async function fetchBotConfig(): Promise<DiscordBotConfig> {
  const response = await fetch(`${BASE_URL}/config`);
  if (!response.ok) {
    throw new Error("Failed to fetch bot config");
  }
  return response.json();
}

export async function createBotConfig(
  botToken: string
): Promise<DiscordBotConfig> {
  const response = await fetch(`${BASE_URL}/config`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ bot_token: botToken }),
  });
  if (!response.ok) {
    const error = await response.json();
    throw new Error(error.detail || "Failed to create bot config");
  }
  return response.json();
}

export async function deleteBotConfig(): Promise<void> {
  const response = await fetch(`${BASE_URL}/config`, { method: "DELETE" });
  if (!response.ok) {
    throw new Error("Failed to delete bot config");
  }
}

// === Guild Config ===

export async function fetchGuildConfigs(): Promise<DiscordGuildConfig[]> {
  const response = await fetch(`${BASE_URL}/guilds`);
  if (!response.ok) {
    throw new Error("Failed to fetch guild configs");
  }
  return response.json();
}

export async function createGuildConfig(): Promise<DiscordGuildConfigCreateResponse> {
  const response = await fetch(`${BASE_URL}/guilds`, { method: "POST" });
  if (!response.ok) {
    const error = await response.json();
    throw new Error(error.detail || "Failed to create guild config");
  }
  return response.json();
}

export async function fetchGuildConfig(
  configId: number
): Promise<DiscordGuildConfig> {
  const response = await fetch(`${BASE_URL}/guilds/${configId}`);
  if (!response.ok) {
    throw new Error("Failed to fetch guild config");
  }
  return response.json();
}

export async function updateGuildConfig(
  configId: number,
  update: DiscordGuildConfigUpdate
): Promise<DiscordGuildConfig> {
  const response = await fetch(`${BASE_URL}/guilds/${configId}`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(update),
  });
  if (!response.ok) {
    const error = await response.json();
    throw new Error(error.detail || "Failed to update guild config");
  }
  return response.json();
}

export async function deleteGuildConfig(configId: number): Promise<void> {
  const response = await fetch(`${BASE_URL}/guilds/${configId}`, {
    method: "DELETE",
  });
  if (!response.ok) {
    throw new Error("Failed to delete guild config");
  }
}

// === Channel Config ===

export async function fetchChannelConfigs(
  guildConfigId: number
): Promise<DiscordChannelConfig[]> {
  const response = await fetch(`${BASE_URL}/guilds/${guildConfigId}/channels`);
  if (!response.ok) {
    throw new Error("Failed to fetch channel configs");
  }
  return response.json();
}

export async function updateChannelConfig(
  guildConfigId: number,
  channelConfigId: number,
  update: DiscordChannelConfigUpdate
): Promise<DiscordChannelConfig> {
  const response = await fetch(
    `${BASE_URL}/guilds/${guildConfigId}/channels/${channelConfigId}`,
    {
      method: "PATCH",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify(update),
    }
  );
  if (!response.ok) {
    const error = await response.json();
    throw new Error(error.detail || "Failed to update channel config");
  }
  return response.json();
}

export async function bulkUpdateChannelConfigs(
  guildConfigId: number,
  updates: { channelConfigId: number; update: DiscordChannelConfigUpdate }[]
): Promise<{ succeeded: number; failed: number }> {
  let succeeded = 0;
  let failed = 0;

  for (const { channelConfigId, update } of updates) {
    try {
      await updateChannelConfig(guildConfigId, channelConfigId, update);
      succeeded++;
    } catch {
      failed++;
    }
  }

  return { succeeded, failed };
}


================================================
FILE: web/src/app/admin/discord-bot/page.tsx
================================================
"use client";

import { useState } from "react";
import { ThreeDotsLoader } from "@/components/Loading";
import { ErrorCallout } from "@/components/ErrorCallout";
import { toast } from "@/hooks/useToast";
import { Section } from "@/layouts/general-layouts";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import Text from "@/refresh-components/texts/Text";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import Modal from "@/refresh-components/Modal";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import Card from "@/refresh-components/cards/Card";
import { SvgKey } from "@opal/icons";
import {
  useDiscordGuilds,
  useDiscordBotConfig,
} from "@/app/admin/discord-bot/hooks";
import { createGuildConfig } from "@/app/admin/discord-bot/lib";
import { DiscordGuildsTable } from "@/app/admin/discord-bot/DiscordGuildsTable";
import { BotConfigCard } from "@/app/admin/discord-bot/BotConfigCard";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.DISCORD_BOTS;

function DiscordBotContent() {
  const { data: guilds, isLoading, error, refreshGuilds } = useDiscordGuilds();
  const { data: botConfig, isManaged } = useDiscordBotConfig();
  const [registrationKey, setRegistrationKey] = useState<string | null>(null);
  const [isCreating, setIsCreating] = useState(false);

  // Bot is available if:
  // - Managed externally (Cloud/env) - assume it's configured
  // - Self-hosted and explicitly configured via UI
  const isBotAvailable = isManaged || botConfig?.configured === true;

  const handleCreateGuild = async () => {
    setIsCreating(true);
    try {
      const result = await createGuildConfig();
      setRegistrationKey(result.registration_key);
      refreshGuilds();
      toast.success("Server configuration created!");
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to create server"
      );
    } finally {
      setIsCreating(false);
    }
  };

  if (isLoading) {
    return <ThreeDotsLoader />;
  }

  if (error || !guilds) {
    return (
      <ErrorCallout
        errorTitle="Failed to load Discord servers"
        errorMsg={error?.info?.detail || "An unknown error occurred"}
      />
    );
  }

  return (
    <>
      <BotConfigCard />

      <Modal open={!!registrationKey}>
        <Modal.Content width="sm">
          <Modal.Header
            title="Registration Key"
            icon={SvgKey}
            onClose={() => setRegistrationKey(null)}
            description="This key will only be shown once!"
          />
          <Modal.Body>
            <Text text04 mainUiBody>
              Copy the command and send it from any text channel in your server!
            </Text>
            <Card variant="secondary">
              <Section
                flexDirection="row"
                justifyContent="between"
                alignItems="center"
              >
                <Text text03 secondaryMono>
                  !register {registrationKey}
                </Text>
                <CopyIconButton
                  getCopyText={() => `!register ${registrationKey}`}
                />
              </Section>
            </Card>
          </Modal.Body>
        </Modal.Content>
      </Modal>

      <Card variant={!isBotAvailable ? "disabled" : "primary"}>
        <Section
          flexDirection="row"
          justifyContent="between"
          alignItems="center"
        >
          <Text mainContentEmphasis text05>
            Server Configurations
          </Text>
          <CreateButton
            onClick={handleCreateGuild}
            disabled={isCreating || !isBotAvailable}
          >
            {isCreating ? "Creating..." : "Add Server"}
          </CreateButton>
        </Section>
        <DiscordGuildsTable guilds={guilds} onRefresh={refreshGuilds} />
      </Card>
    </>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title={route.title}
        description="Connect Onyx to your Discord servers. Users can ask questions directly in Discord channels."
      />
      <SettingsLayouts.Body>
        <DiscordBotContent />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/discord-bot/types.ts
================================================
// Types matching backend Pydantic models

export interface DiscordBotConfig {
  configured: boolean;
  created_at: string | null;
}

export interface DiscordGuildConfig {
  id: number;
  guild_id: number | null;
  guild_name: string | null;
  registered_at: string | null;
  default_persona_id: number | null;
  enabled: boolean;
}

export interface DiscordGuildConfigCreateResponse {
  id: number;
  registration_key: string; // Shown once!
}

export type DiscordChannelType = "text" | "forum";

export interface DiscordChannelConfig {
  id: number;
  channel_id: number;
  channel_name: string;
  channel_type: DiscordChannelType;
  is_private: boolean;
  require_bot_invocation: boolean;
  thread_only_mode: boolean;
  persona_override_id: number | null;
  enabled: boolean;
}

export interface DiscordChannelConfigUpdate {
  require_bot_invocation: boolean;
  thread_only_mode: boolean;
  persona_override_id: number | null;
  enabled: boolean;
}

export interface DiscordGuildConfigUpdate {
  enabled: boolean;
  default_persona_id: number | null;
}


================================================
FILE: web/src/app/admin/document-index-migration/page.tsx
================================================
"use client";

import { useState } from "react";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.INDEX_MIGRATION;

import Card from "@/refresh-components/cards/Card";
import { Content, ContentAction } from "@opal/layouts";
import Text from "@/refresh-components/texts/Text";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import Button from "@/refresh-components/buttons/Button";
import { errorHandlingFetcher } from "@/lib/fetcher";

interface MigrationStatus {
  total_chunks_migrated: number;
  created_at: string | null;
  migration_completed_at: string | null;
  approx_chunk_count_in_vespa: number | null;
}

interface RetrievalStatus {
  enable_opensearch_retrieval: boolean;
}

function formatTimestamp(iso: string): string {
  return new Date(iso).toLocaleString();
}

function MigrationStatusSection() {
  const { data, isLoading, error } = useSWR<MigrationStatus>(
    SWR_KEYS.opensearchMigrationStatus,
    errorHandlingFetcher
  );

  if (isLoading) {
    return (
      <Card>
        <Text headingH3>Migration Status</Text>
        <Text mainUiBody text03>
          Loading...
        </Text>
      </Card>
    );
  }

  if (error) {
    return (
      <Card>
        <Text headingH3>Migration Status</Text>
        <Text mainUiBody text03>
          Failed to load migration status.
        </Text>
      </Card>
    );
  }

  const hasStarted = data?.created_at != null;
  const hasCompleted = data?.migration_completed_at != null;
  const isOngoing = hasStarted && !hasCompleted;

  const totalChunksMigrated = data?.total_chunks_migrated ?? 0;
  const approxTotalChunks = data?.approx_chunk_count_in_vespa;

  // Calculate percentage progress if migration is ongoing and we have approx
  // total chunks.
  const shouldShowProgress = isOngoing && approxTotalChunks;
  const progressPercentage = shouldShowProgress
    ? Math.min(99, (totalChunksMigrated / approxTotalChunks) * 100)
    : null;

  return (
    <Card>
      <Text headingH3>Migration Status</Text>

      <ContentAction
        title="Started"
        sizePreset="main-ui"
        variant="section"
        rightChildren={
          <Text mainUiBody>
            {hasStarted ? formatTimestamp(data.created_at!) : "Not started"}
          </Text>
        }
      />

      <ContentAction
        title="Chunks Migrated"
        sizePreset="main-ui"
        variant="section"
        rightChildren={
          <Text mainUiBody>
            {progressPercentage !== null
              ? `${totalChunksMigrated} (approx. progress ${Math.round(
                  progressPercentage
                )}%)`
              : String(totalChunksMigrated)}
          </Text>
        }
      />

      <ContentAction
        title="Completed"
        sizePreset="main-ui"
        variant="section"
        rightChildren={
          <Text mainUiBody>
            {hasCompleted
              ? formatTimestamp(data.migration_completed_at!)
              : hasStarted
                ? "In progress"
                : "Not started"}
          </Text>
        }
      />
    </Card>
  );
}

function RetrievalSourceSection() {
  const { data, isLoading, error, mutate } = useSWR<RetrievalStatus>(
    SWR_KEYS.opensearchMigrationRetrieval,
    errorHandlingFetcher
  );
  const [selectedSource, setSelectedSource] = useState<string | null>(null);
  const [updating, setUpdating] = useState(false);

  const serverValue = data?.enable_opensearch_retrieval
    ? "opensearch"
    : "vespa";
  const currentValue = selectedSource ?? serverValue;
  const hasChanges = selectedSource !== null && selectedSource !== serverValue;

  async function handleUpdate() {
    setUpdating(true);
    try {
      const response = await fetch(SWR_KEYS.opensearchMigrationRetrieval, {
        method: "PUT",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          enable_opensearch_retrieval: currentValue === "opensearch",
        }),
      });
      if (!response.ok) {
        throw new Error("Failed to update retrieval setting");
      }
      await mutate();
      setSelectedSource(null);
    } finally {
      setUpdating(false);
    }
  }

  if (isLoading) {
    return (
      <Card>
        <Text headingH3>Retrieval Source</Text>
        <Text mainUiBody text03>
          Loading...
        </Text>
      </Card>
    );
  }

  if (error) {
    return (
      <Card>
        <Text headingH3>Retrieval Source</Text>
        <Text mainUiBody text03>
          Failed to load retrieval settings.
        </Text>
      </Card>
    );
  }

  return (
    <Card>
      <Content
        title="Retrieval Source"
        description="Controls which document index is used for retrieval."
        sizePreset="main-ui"
        variant="section"
      />

      <InputSelect
        value={currentValue}
        onValueChange={setSelectedSource}
        disabled={updating}
      >
        <InputSelect.Trigger placeholder="Select retrieval source" />
        <InputSelect.Content>
          <InputSelect.Item value="vespa">Vespa</InputSelect.Item>
          <InputSelect.Item value="opensearch">OpenSearch</InputSelect.Item>
        </InputSelect.Content>
      </InputSelect>

      {hasChanges && (
        // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
        <Button
          className="self-center"
          onClick={handleUpdate}
          disabled={updating}
        >
          {updating ? "Updating..." : "Update Settings"}
        </Button>
      )}
    </Card>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title={route.title}
        description="Monitor the migration from Vespa to OpenSearch and control the active retrieval source."
        separator
      />
      <SettingsLayouts.Body>
        <MigrationStatusSection />
        <RetrievalSourceSection />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/documents/ScoreEditor.tsx
================================================
import { toast } from "@/hooks/useToast";
import { updateBoost } from "./lib";
import { EditableValue } from "@/components/EditableValue";

export const ScoreSection = ({
  documentId,
  initialScore,
  refresh,
  consistentWidth = true,
}: {
  documentId: string;
  initialScore: number;
  refresh: () => void;
  consistentWidth?: boolean;
}) => {
  const onSubmit = async (value: string) => {
    const numericScore = Number(value);
    if (isNaN(numericScore)) {
      toast.error("Score must be a number");
      return false;
    }

    const errorMsg = await updateBoost(documentId, numericScore);
    if (errorMsg) {
      toast.error(errorMsg);
      return false;
    } else {
      toast.success("Updated score!");
      refresh();
    }

    return true;
  };

  return (
    <EditableValue
      initialValue={initialScore.toString()}
      onSubmit={onSubmit}
      consistentWidth={consistentWidth}
    />
  );
};


================================================
FILE: web/src/app/admin/documents/explorer/DocumentExplorerPage.tsx
================================================
"use client";

import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { Explorer } from "./Explorer";
import { Connector } from "@/lib/connectors/connectors";
import { DocumentSetSummary } from "@/lib/types";

const route = ADMIN_ROUTES.DOCUMENT_EXPLORER;

interface DocumentExplorerPageProps {
  initialSearchValue: string | undefined;
  connectors: Connector<any>[];
  documentSets: DocumentSetSummary[];
}

export default function DocumentExplorerPage({
  initialSearchValue,
  connectors,
  documentSets,
}: DocumentExplorerPageProps) {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />

      <SettingsLayouts.Body>
        <Explorer
          initialSearchValue={initialSearchValue}
          connectors={connectors}
          documentSets={documentSets}
        />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/documents/explorer/Explorer.tsx
================================================
"use client";

import { adminSearch } from "./lib";
import { MagnifyingGlass } from "@phosphor-icons/react";
import { useState, useEffect, useCallback } from "react";
import { OnyxDocument } from "@/lib/search/interfaces";
import { buildDocumentSummaryDisplay } from "@/components/search/DocumentDisplay";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import { updateHiddenStatus } from "../lib";
import { toast } from "@/hooks/useToast";
import { getErrorMsg } from "@/lib/fetchUtils";
import { ScoreSection } from "../ScoreEditor";
import { useRouter } from "next/navigation";
import { useFilters } from "@/lib/hooks";
import { buildFilters } from "@/lib/search/utils";
import { DocumentUpdatedAtBadge } from "@/components/search/DocumentUpdatedAtBadge";
import { DocumentSetSummary } from "@/lib/types";
import { SourceIcon } from "@/components/SourceIcon";
import { Connector } from "@/lib/connectors/connectors";
import { HorizontalFilters } from "@/components/filters/SourceSelector";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { ThreeDotsLoader } from "@/components/Loading";

const DocumentDisplay = ({
  document,
  refresh,
}: {
  document: OnyxDocument;
  refresh: () => void;
}) => {
  return (
    <div
      key={document.document_id}
      className="text-sm border-b border-border mb-3"
    >
      <div className="flex relative">
        <a
          className={
            "rounded-lg flex font-bold " +
            (document.link ? "" : "pointer-events-none")
          }
          href={document.link}
          target="_blank"
          rel="noopener noreferrer"
        >
          <SourceIcon sourceType={document.source_type} iconSize={22} />
          <p className="truncate break-all ml-2 my-auto text-base">
            {document.semantic_identifier || document.document_id}
          </p>
        </a>
      </div>
      <div className="flex flex-wrap gap-x-2 mt-1 text-xs">
        <div className="px-1 py-0.5 bg-accent-background-hovered rounded flex">
          <p className="mr-1 my-auto">Boost:</p>
          <ScoreSection
            documentId={document.document_id}
            initialScore={document.boost}
            refresh={refresh}
            consistentWidth={false}
          />
        </div>
        <div
          onClick={async () => {
            const response = await updateHiddenStatus(
              document.document_id,
              !document.hidden
            );
            if (response.ok) {
              refresh();
            } else {
              toast.error(
                `Failed to update document - ${getErrorMsg(response)}`
              );
            }
          }}
          className="px-1 py-0.5 bg-accent-background-hovered hover:bg-accent-background rounded flex cursor-pointer select-none"
        >
          <div className="my-auto">
            {document.hidden ? (
              <div className="text-error">Hidden</div>
            ) : (
              "Visible"
            )}
          </div>
          <div className="ml-1 my-auto">
            <Checkbox checked={!document.hidden} />
          </div>
        </div>
      </div>
      {document.updated_at && (
        <div className="mt-2">
          <DocumentUpdatedAtBadge updatedAt={document.updated_at} />
        </div>
      )}
      <p className="pl-1 pt-2 pb-3 break-words">
        {buildDocumentSummaryDisplay(document.match_highlights, document.blurb)}
      </p>
    </div>
  );
};

export function Explorer({
  initialSearchValue,
  connectors,
  documentSets,
}: {
  initialSearchValue: string | undefined;
  connectors: Connector<any>[];
  documentSets: DocumentSetSummary[];
}) {
  const router = useRouter();

  const [query, setQuery] = useState(initialSearchValue || "");
  const [timeoutId, setTimeoutId] = useState<number | null>(null);
  const [results, setResults] = useState<OnyxDocument[]>([]);
  const [isLoading, setIsLoading] = useState(false);

  const filterManager = useFilters();

  const onSearch = useCallback(
    async (query: string) => {
      setIsLoading(true);
      try {
        const filters = buildFilters(
          filterManager.selectedSources,
          filterManager.selectedDocumentSets,
          filterManager.timeRange,
          filterManager.selectedTags
        );
        const results = await adminSearch(query, filters);
        if (results.ok) {
          setResults((await results.json()).documents);
        }
      } finally {
        setTimeoutId(null);
        setIsLoading(false);
      }
    },
    [
      filterManager.selectedDocumentSets,
      filterManager.selectedSources,
      filterManager.timeRange,
      filterManager.selectedTags,
    ]
  );

  useEffect(() => {
    if (timeoutId !== null) {
      clearTimeout(timeoutId);
    }
    router.replace(
      `/admin/documents/explorer?query=${encodeURIComponent(query)}`
    );

    const newTimeoutId = window.setTimeout(() => onSearch(query), 300);
    setTimeoutId(newTimeoutId);
  }, [
    query,
    filterManager.selectedDocumentSets,
    filterManager.selectedSources,
    filterManager.timeRange,
  ]);

  return (
    <div className="flex flex-col gap-6">
      <div className="flex flex-col justify-center gap-2">
        <InputTypeIn
          placeholder="Find documents based on title / content..."
          value={query}
          onChange={(event) => {
            setQuery(event.target.value);
          }}
          onKeyDown={(event) => {
            if (
              event.key === "Enter" &&
              !event.shiftKey &&
              !(event.nativeEvent as any).isComposing
            ) {
              onSearch(query);
              event.preventDefault();
            }
          }}
          role="textarea"
        />

        <HorizontalFilters
          {...filterManager}
          availableDocumentSets={documentSets}
          existingSources={connectors.map((connector) => connector.source)}
          availableTags={[]}
          toggleFilters={() => {}}
          filtersUntoggled={false}
          tagsOnLeft={true}
        />
        <div className="border-b" />
      </div>
      {results.length > 0 && (
        <div className="mt-3">
          {results.map((document) => {
            return (
              <DocumentDisplay
                key={document.document_id}
                document={document}
                refresh={() => onSearch(query)}
              />
            );
          })}
        </div>
      )}
      {isLoading && <ThreeDotsLoader />}
    </div>
  );
}


================================================
FILE: web/src/app/admin/documents/explorer/lib.ts
================================================
import { Filters } from "@/lib/search/interfaces";

export const adminSearch = async (query: string, filters: Filters) => {
  const response = await fetch("/api/admin/search", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      query,
      filters,
    }),
  });
  return response;
};


================================================
FILE: web/src/app/admin/documents/explorer/page.tsx
================================================
import { fetchValidFilterInfo } from "@/lib/search/utilsSS";
import DocumentExplorerPage from "./DocumentExplorerPage";

export default async function Page(props: {
  searchParams: Promise<{ [key: string]: string }>;
}) {
  const searchParams = await props.searchParams;
  const { connectors, documentSets } = await fetchValidFilterInfo();

  return (
    <DocumentExplorerPage
      initialSearchValue={searchParams.query}
      connectors={connectors}
      documentSets={documentSets}
    />
  );
}


================================================
FILE: web/src/app/admin/documents/feedback/DocumentFeedbackTable.tsx
================================================
import { toast } from "@/hooks/useToast";
import { useState } from "react";
import {
  Table,
  TableHead,
  TableRow,
  TableHeader,
  TableBody,
  TableCell,
} from "@/components/ui/table";
import { PageSelector } from "@/components/PageSelector";
import { DocumentBoostStatus } from "@/lib/types";
import { updateHiddenStatus } from "../lib";
import { numToDisplay } from "./constants";
import { FiEye, FiEyeOff } from "react-icons/fi";
import { getErrorMsg } from "@/lib/fetchUtils";
import { HoverPopup } from "@/components/HoverPopup";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import { ScoreSection } from "../ScoreEditor";
import { truncateString } from "@/lib/utils";

const IsVisibleSection = ({
  document,
  onUpdate,
}: {
  document: DocumentBoostStatus;
  onUpdate: (response: Response) => void;
}) => {
  return (
    <HoverPopup
      mainContent={
        document.hidden ? (
          <div
            onClick={async () => {
              const response = await updateHiddenStatus(
                document.document_id,
                false
              );
              onUpdate(response);
            }}
            className="flex text-error cursor-pointer hover:bg-accent-background-hovered py-1 px-2 w-fit rounded-full"
          >
            <div className="select-none">Hidden</div>
            <div className="ml-1 my-auto">
              <Checkbox checked={false} />
            </div>
          </div>
        ) : (
          <div
            onClick={async () => {
              const response = await updateHiddenStatus(
                document.document_id,
                true
              );
              onUpdate(response);
            }}
            className="flex cursor-pointer hover:bg-accent-background-hovered py-1 px-2 w-fit rounded-full"
          >
            <div className="my-auto select-none">Visible</div>
            <div className="ml-1 my-auto">
              <Checkbox checked={true} />
            </div>
          </div>
        )
      }
      popupContent={
        <div className="text-xs">
          {document.hidden ? (
            <div className="flex">
              <FiEye className="my-auto mr-1" /> Unhide
            </div>
          ) : (
            <div className="flex">
              <FiEyeOff className="my-auto mr-1" />
              Hide
            </div>
          )}
        </div>
      }
      direction="left"
    />
  );
};

export const DocumentFeedbackTable = ({
  documents,
  refresh,
}: {
  documents: DocumentBoostStatus[];
  refresh: () => void;
}) => {
  const [page, setPage] = useState(1);

  return (
    <div>
      <Table className="overflow-visible">
        <TableHeader>
          <TableRow>
            <TableHead>Document Name</TableHead>
            <TableHead>Is Searchable?</TableHead>
            <TableHead>Score</TableHead>
          </TableRow>
        </TableHeader>
        <TableBody>
          {documents
            .slice((page - 1) * numToDisplay, page * numToDisplay)
            .map((document) => {
              return (
                <TableRow key={document.document_id}>
                  <TableCell className="whitespace-normal break-all">
                    <a
                      className="text-blue-600 dark:text-blue-300"
                      href={document.link}
                      target="_blank"
                      rel="noopener noreferrer"
                    >
                      {truncateString(document.semantic_id, 100)}
                    </a>
                  </TableCell>
                  <TableCell>
                    <IsVisibleSection
                      document={document}
                      onUpdate={async (response) => {
                        if (response.ok) {
                          refresh();
                        } else {
                          toast.error(
                            `Error updating hidden status - ${getErrorMsg(
                              response
                            )}`
                          );
                        }
                      }}
                    />
                  </TableCell>
                  <TableCell>
                    <div className="relative">
                      <div
                        key={document.document_id}
                        className="h-10 ml-auto mr-8"
                      >
                        <ScoreSection
                          documentId={document.document_id}
                          initialScore={document.boost}
                          refresh={refresh}
                        />
                      </div>
                    </div>
                  </TableCell>
                </TableRow>
              );
            })}
        </TableBody>
      </Table>

      <div className="mt-3 flex">
        <div className="mx-auto">
          <PageSelector
            totalPages={Math.ceil(documents.length / numToDisplay)}
            currentPage={page}
            onPageChange={(newPage) => setPage(newPage)}
          />
        </div>
      </div>
    </div>
  );
};


================================================
FILE: web/src/app/admin/documents/feedback/constants.ts
================================================
export const numPages = 8;
export const numToDisplay = 10;


================================================
FILE: web/src/app/admin/documents/feedback/page.tsx
================================================
"use client";

import { LoadingAnimation } from "@/components/Loading";
import { useMostReactedToDocuments } from "@/lib/hooks";
import { DocumentFeedbackTable } from "./DocumentFeedbackTable";
import { numPages, numToDisplay } from "./constants";
import Title from "@/components/ui/title";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.DOCUMENT_FEEDBACK;

function Main() {
  const {
    data: mostLikedDocuments,
    isLoading: isMostLikedDocumentsLoading,
    error: mostLikedDocumentsError,
    refreshDocs: refreshMostLikedDocuments,
  } = useMostReactedToDocuments(false, numToDisplay * numPages);

  const {
    data: mostDislikedDocuments,
    isLoading: isMostLikedDocumentLoading,
    error: mostDislikedDocumentsError,
    refreshDocs: refreshMostDislikedDocuments,
  } = useMostReactedToDocuments(true, numToDisplay * numPages);

  const refresh = () => {
    refreshMostLikedDocuments();
    refreshMostDislikedDocuments();
  };

  if (isMostLikedDocumentsLoading || isMostLikedDocumentLoading) {
    return <LoadingAnimation text="Loading" />;
  }

  if (
    mostLikedDocumentsError ||
    mostDislikedDocumentsError ||
    !mostLikedDocuments ||
    !mostDislikedDocuments
  ) {
    return (
      <div className="text-red-600">
        Error loading documents -{" "}
        {mostDislikedDocumentsError || mostLikedDocumentsError}
      </div>
    );
  }

  return (
    <div>
      <Title className="mb-2">Most Liked Documents</Title>
      <DocumentFeedbackTable documents={mostLikedDocuments} refresh={refresh} />

      <Title className="mb-2 mt-6">Most Disliked Documents</Title>
      <DocumentFeedbackTable
        documents={mostDislikedDocuments}
        refresh={refresh}
      />
    </div>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/documents/lib.ts
================================================
export const updateBoost = async (documentId: string, boost: number) => {
  const response = await fetch("/api/manage/admin/doc-boosts", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      document_id: documentId,
      boost,
    }),
  });
  if (response.ok) {
    return null;
  }
  const responseJson = await response.json();
  return responseJson.message || responseJson.detail || "Unknown error";
};

export const updateHiddenStatus = async (
  documentId: string,
  isHidden: boolean
) => {
  const response = await fetch("/api/manage/admin/doc-hidden", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      document_id: documentId,
      hidden: isHidden,
    }),
  });
  return response;
};


================================================
FILE: web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx
================================================
"use client";

import { Form, Formik } from "formik";
import { mutate } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import * as Yup from "yup";
import { toast } from "@/hooks/useToast";
import {
  createDocumentSet,
  updateDocumentSet,
  DocumentSetCreationRequest,
} from "./lib";
import {
  ConnectorStatus,
  DocumentSetSummary,
  UserGroup,
  UserRole,
  FederatedConnectorConfig,
} from "@/lib/types";
import { TextFormField } from "@/components/Field";
import Button from "@/refresh-components/buttons/Button";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { IsPublicGroupSelector } from "@/components/IsPublicGroupSelector";
import React, { useEffect, useState } from "react";
import { useUser } from "@/providers/UserProvider";
import { ConnectorMultiSelect } from "@/components/ConnectorMultiSelect";
import { NonSelectableConnectors } from "@/components/NonSelectableConnectors";
import { FederatedConnectorSelector } from "@/components/FederatedConnectorSelector";
import { useFederatedConnectors } from "@/lib/hooks";

interface SetCreationPopupProps {
  ccPairs: ConnectorStatus<any, any>[];
  userGroups: UserGroup[] | undefined;
  onClose: () => void;
  existingDocumentSet?: DocumentSetSummary;
}

export const DocumentSetCreationForm = ({
  ccPairs,
  userGroups,
  onClose,
  existingDocumentSet,
}: SetCreationPopupProps) => {
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const isUpdate = existingDocumentSet !== undefined;
  const [localCcPairs, setLocalCcPairs] = useState(ccPairs);
  const { user } = useUser();
  const { data: federatedConnectors } = useFederatedConnectors();

  useEffect(() => {
    if (existingDocumentSet?.is_public) {
      return;
    }
  }, [existingDocumentSet?.is_public]);

  return (
    <div className="max-w-full mx-auto">
      <Formik<DocumentSetCreationRequest>
        initialValues={{
          name: existingDocumentSet?.name ?? "",
          description: existingDocumentSet?.description ?? "",
          cc_pair_ids:
            existingDocumentSet?.cc_pair_summaries.map(
              (ccPairSummary) => ccPairSummary.id
            ) ?? [],
          is_public: existingDocumentSet?.is_public ?? true,
          users: existingDocumentSet?.users ?? [],
          groups: existingDocumentSet?.groups ?? [],
          federated_connectors:
            existingDocumentSet?.federated_connector_summaries?.map((fc) => ({
              federated_connector_id: fc.id,
              entities: fc.entities,
            })) ?? [],
        }}
        validationSchema={Yup.object()
          .shape({
            name: Yup.string().required("Please enter a name for the set"),
            description: Yup.string().optional(),
            cc_pair_ids: Yup.array().of(Yup.number().required()),
            federated_connectors: Yup.array().of(
              Yup.object().shape({
                federated_connector_id: Yup.number().required(),
                entities: Yup.object().required(),
              })
            ),
          })
          .test(
            "at-least-one-connector",
            "Please select at least one connector (regular or federated)",
            function (values) {
              const hasRegularConnectors =
                values.cc_pair_ids && values.cc_pair_ids.length > 0;
              const hasFederatedConnectors =
                values.federated_connectors &&
                values.federated_connectors.length > 0;
              return hasRegularConnectors || hasFederatedConnectors;
            }
          )}
        onSubmit={async (values, formikHelpers) => {
          formikHelpers.setSubmitting(true);
          // If the document set is public, then we don't want to send any groups
          const processedValues = {
            ...values,
            groups: values.is_public ? [] : values.groups,
          };

          let response;
          if (isUpdate) {
            response = await updateDocumentSet({
              id: existingDocumentSet.id,
              ...processedValues,
              users: processedValues.users,
            });
          } else {
            response = await createDocumentSet(processedValues);
          }
          formikHelpers.setSubmitting(false);
          if (response.ok) {
            toast.success(
              isUpdate
                ? "Successfully updated document set!"
                : "Successfully created document set!"
            );
            await Promise.all([
              mutate(SWR_KEYS.documentSets),
              mutate(SWR_KEYS.documentSetsEditable),
            ]);
            onClose();
          } else {
            const errorMsg = await response.text();
            toast.error(
              isUpdate
                ? `Error updating document set - ${errorMsg}`
                : `Error creating document set - ${errorMsg}`
            );
          }
        }}
      >
        {(props) => {
          // Filter visible cc pairs for curator role
          const visibleCcPairs =
            user?.role === UserRole.CURATOR
              ? localCcPairs.filter(
                  (ccPair) =>
                    ccPair.access_type === "public" ||
                    (ccPair.groups.length > 0 &&
                      props.values.groups.every((group) =>
                        ccPair.groups.includes(group)
                      ))
                )
              : localCcPairs;

          // Filter non-visible cc pairs for curator role
          const nonVisibleCcPairs =
            user?.role === UserRole.CURATOR
              ? localCcPairs.filter(
                  (ccPair) =>
                    !(ccPair.access_type === "public") &&
                    (ccPair.groups.length === 0 ||
                      !props.values.groups.every((group) =>
                        ccPair.groups.includes(group)
                      ))
                )
              : [];

          // Deselect filtered out cc pairs
          if (user?.role === UserRole.CURATOR) {
            const visibleCcPairIds = visibleCcPairs.map(
              (ccPair) => ccPair.cc_pair_id
            );
            props.values.cc_pair_ids = props.values.cc_pair_ids.filter((id) =>
              visibleCcPairIds.includes(id)
            );
          }

          return (
            <Form className="space-y-6 w-full ">
              <div className="space-y-4 w-full">
                <TextFormField
                  name="name"
                  label="Name:"
                  placeholder="A name for the document set"
                />
                <TextFormField
                  name="description"
                  label="Description:"
                  placeholder="Describe what the document set represents"
                  optional={true}
                />

                {isPaidEnterpriseFeaturesEnabled && (
                  <IsPublicGroupSelector
                    formikProps={props}
                    objectName="document set"
                  />
                )}
              </div>

              <div className="my-6 border-t border-border-02" />

              <div className="space-y-6">
                {user?.role === UserRole.CURATOR ? (
                  <>
                    <ConnectorMultiSelect
                      name="cc_pair_ids"
                      label={`Connectors available to ${
                        userGroups && userGroups.length > 1
                          ? "the selected group"
                          : "the group you curate"
                      }`}
                      connectors={visibleCcPairs}
                      selectedIds={props.values.cc_pair_ids}
                      onChange={(selectedIds) => {
                        props.setFieldValue("cc_pair_ids", selectedIds);
                      }}
                      placeholder="Search for connectors..."
                    />

                    <NonSelectableConnectors
                      connectors={nonVisibleCcPairs}
                      title={`Connectors not available to the ${
                        userGroups && userGroups.length > 1
                          ? `group${
                              props.values.groups.length > 1 ? "s" : ""
                            } you have selected`
                          : "group you curate"
                      }`}
                      description="Only connectors that are directly assigned to the group you are trying to add the document set to will be available."
                    />
                  </>
                ) : (
                  <ConnectorMultiSelect
                    name="cc_pair_ids"
                    label="Pick your connectors"
                    connectors={visibleCcPairs}
                    selectedIds={props.values.cc_pair_ids}
                    onChange={(selectedIds) => {
                      props.setFieldValue("cc_pair_ids", selectedIds);
                    }}
                    placeholder="Search for connectors..."
                  />
                )}

                {/* Federated Connectors Section */}
                {federatedConnectors && federatedConnectors.length > 0 && (
                  <>
                    <div className="my-4 border-t border-border-02" />
                    <FederatedConnectorSelector
                      name="federated_connectors"
                      label="Federated Connectors"
                      federatedConnectors={federatedConnectors}
                      selectedConfigs={props.values.federated_connectors}
                      onChange={(selectedConfigs) => {
                        props.setFieldValue(
                          "federated_connectors",
                          selectedConfigs
                        );
                      }}
                      placeholder="Search for federated connectors..."
                    />
                  </>
                )}
              </div>

              <div className="flex mt-6 pt-4 border-t border-border-02">
                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
                <Button
                  type="submit"
                  disabled={props.isSubmitting}
                  className="w-56 mx-auto"
                  primary
                >
                  {isUpdate ? "Update Document Set" : "Create Document Set"}
                </Button>
              </div>
            </Form>
          );
        }}
      </Formik>
    </div>
  );
};


================================================
FILE: web/src/app/admin/documents/sets/[documentSetId]/page.tsx
================================================
"use client";
import { use } from "react";

import { ErrorCallout } from "@/components/ErrorCallout";
import { refreshDocumentSets, useDocumentSets } from "../hooks";
import { useConnectorStatus, useUserGroups } from "@/lib/hooks";
import { ThreeDotsLoader } from "@/components/Loading";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import CardSection from "@/components/admin/CardSection";
import { DocumentSetCreationForm } from "../DocumentSetCreationForm";
import { useRouter } from "next/navigation";
import { useVectorDbEnabled } from "@/providers/SettingsProvider";

const route = ADMIN_ROUTES.DOCUMENT_SETS;

function Main({ documentSetId }: { documentSetId: number }) {
  const router = useRouter();
  const vectorDbEnabled = useVectorDbEnabled();

  const {
    data: documentSets,
    isLoading: isDocumentSetsLoading,
    error: documentSetsError,
  } = useDocumentSets();

  const {
    data: ccPairs,
    isLoading: isCCPairsLoading,
    error: ccPairsError,
  } = useConnectorStatus(30000, vectorDbEnabled);

  // EE only
  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();

  if (
    isDocumentSetsLoading ||
    (vectorDbEnabled && isCCPairsLoading) ||
    userGroupsIsLoading
  ) {
    return (
      <div className="flex justify-center items-center min-h-[400px]">
        <ThreeDotsLoader />
      </div>
    );
  }

  if (documentSetsError || !documentSets) {
    return (
      <ErrorCallout
        errorTitle="Failed to fetch document sets"
        errorMsg={documentSetsError}
      />
    );
  }

  if (vectorDbEnabled && (ccPairsError || !ccPairs)) {
    return (
      <ErrorCallout
        errorTitle="Failed to fetch Connectors"
        errorMsg={ccPairsError}
      />
    );
  }

  const documentSet = documentSets.find(
    (documentSet) => documentSet.id === documentSetId
  );
  if (!documentSet) {
    return (
      <ErrorCallout
        errorTitle="Document set not found"
        errorMsg={`Document set with id ${documentSetId} not found`}
      />
    );
  }

  return (
    <CardSection>
      <DocumentSetCreationForm
        ccPairs={ccPairs ?? []}
        userGroups={userGroups}
        onClose={() => {
          refreshDocumentSets();
          router.push("/admin/documents/sets");
        }}
        existingDocumentSet={documentSet}
      />
    </CardSection>
  );
}

export default function Page(props: {
  params: Promise<{ documentSetId: string }>;
}) {
  const params = use(props.params);
  const documentSetId = parseInt(params.documentSetId);

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title="Edit Document Set"
        separator
        backButton
      />
      <SettingsLayouts.Body>
        <Main documentSetId={documentSetId} />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/documents/sets/hooks.tsx
================================================
import { errorHandlingFetcher } from "@/lib/fetcher";
import { DocumentSetSummary } from "@/lib/types";
import useSWR, { mutate } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";

export function refreshDocumentSets() {
  mutate(SWR_KEYS.documentSets);
}

export function useDocumentSets(getEditable: boolean = false) {
  const url = getEditable
    ? SWR_KEYS.documentSetsEditable
    : SWR_KEYS.documentSets;

  const swrResponse = useSWR<DocumentSetSummary[]>(url, errorHandlingFetcher, {
    refreshInterval: 5000, // 5 seconds
  });

  return {
    ...swrResponse,
    refreshDocumentSets: refreshDocumentSets,
  };
}


================================================
FILE: web/src/app/admin/documents/sets/lib.ts
================================================
import { FederatedConnectorConfig } from "@/lib/types";

export interface DocumentSetCreationRequest {
  name: string;
  description: string;
  cc_pair_ids: number[];
  is_public: boolean;
  users: string[];
  groups: number[];
  federated_connectors: FederatedConnectorConfig[];
}

export const createDocumentSet = async ({
  name,
  description,
  cc_pair_ids,
  is_public,
  users,
  groups,
  federated_connectors,
}: DocumentSetCreationRequest) => {
  return fetch("/api/manage/admin/document-set", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      name,
      description,
      cc_pair_ids,
      is_public,
      users,
      groups,
      federated_connectors,
    }),
  });
};

interface DocumentSetUpdateRequest {
  id: number;
  description: string;
  cc_pair_ids: number[];
  is_public: boolean;
  users: string[];
  groups: number[];
  federated_connectors: FederatedConnectorConfig[];
}

export const updateDocumentSet = async ({
  id,
  description,
  cc_pair_ids,
  is_public,
  users,
  groups,
  federated_connectors,
}: DocumentSetUpdateRequest) => {
  return fetch("/api/manage/admin/document-set", {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      id,
      description,
      cc_pair_ids,
      is_public,
      users,
      groups,
      federated_connectors,
    }),
  });
};

export const deleteDocumentSet = async (id: number) => {
  return fetch(`/api/manage/admin/document-set/${id}`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
  });
};


================================================
FILE: web/src/app/admin/documents/sets/new/page.tsx
================================================
"use client";

import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { DocumentSetCreationForm } from "../DocumentSetCreationForm";
import { useConnectorStatus, useUserGroups } from "@/lib/hooks";
import { ThreeDotsLoader } from "@/components/Loading";
import { ErrorCallout } from "@/components/ErrorCallout";
import { useRouter } from "next/navigation";
import { refreshDocumentSets } from "../hooks";
import CardSection from "@/components/admin/CardSection";
import { useVectorDbEnabled } from "@/providers/SettingsProvider";

const route = ADMIN_ROUTES.DOCUMENT_SETS;

function Main() {
  const router = useRouter();
  const vectorDbEnabled = useVectorDbEnabled();

  const {
    data: ccPairs,
    isLoading: isCCPairsLoading,
    error: ccPairsError,
  } = useConnectorStatus(30000, vectorDbEnabled);

  // EE only
  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();

  if ((vectorDbEnabled && isCCPairsLoading) || userGroupsIsLoading) {
    return (
      <div className="flex justify-center items-center min-h-[400px]">
        <ThreeDotsLoader />
      </div>
    );
  }

  if (vectorDbEnabled && (ccPairsError || !ccPairs)) {
    return (
      <ErrorCallout
        errorTitle="Failed to fetch Connectors"
        errorMsg={ccPairsError}
      />
    );
  }

  return (
    <>
      <CardSection>
        <DocumentSetCreationForm
          ccPairs={ccPairs ?? []}
          userGroups={userGroups}
          onClose={() => {
            refreshDocumentSets();
            router.push("/admin/documents/sets");
          }}
        />
      </CardSection>
    </>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title="New Document Set"
        separator
        backButton
      />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/documents/sets/page.tsx
================================================
"use client";

import { ThreeDotsLoader } from "@/components/Loading";
import { PageSelector } from "@/components/PageSelector";
import { InfoIcon } from "@/components/icons/icons";
import {
  Table,
  TableHead,
  TableRow,
  TableBody,
  TableCell,
} from "@/components/ui/table";
import { Text } from "@opal/components";
import { markdown } from "@opal/utils";
import Spacer from "@/refresh-components/Spacer";
import Title from "@/components/ui/title";
import Separator from "@/refresh-components/Separator";
import { DocumentSetSummary } from "@/lib/types";
import { useState } from "react";
import { useDocumentSets } from "./hooks";
import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle";
import { deleteDocumentSet } from "./lib";
import { toast } from "@/hooks/useToast";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import {
  FiAlertTriangle,
  FiCheckCircle,
  FiClock,
  FiEdit2,
  FiLock,
  FiUnlock,
} from "react-icons/fi";
import { DeleteButton } from "@/components/DeleteButton";
import { useRouter } from "next/navigation";
import { TableHeader } from "@/components/ui/table";
import { Badge } from "@/components/ui/badge";
import {
  Tooltip,
  TooltipContent,
  TooltipProvider,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import { SourceIcon } from "@/components/SourceIcon";
import Link from "next/link";

const route = ADMIN_ROUTES.DOCUMENT_SETS;
const numToDisplay = 50;

// Component to display federated connectors with consistent styling
const FederatedConnectorTitle = ({
  federatedConnector,
  showMetadata = true,
  isLink = true,
}: {
  federatedConnector: any;
  showMetadata?: boolean;
  isLink?: boolean;
}) => {
  const sourceType = federatedConnector.source.replace(/^federated_/, "");

  const mainSectionClassName = "text-blue-500 dark:text-blue-100 flex w-fit";
  const mainDisplay = (
    <>
      <SourceIcon sourceType={sourceType as any} iconSize={16} />
      <div className="ml-1 my-auto text-xs font-medium truncate">
        {federatedConnector.name}
      </div>
      <Badge variant="outline" className="text-xs ml-2">
        Federated
      </Badge>
    </>
  );

  return (
    <div className="my-auto max-w-full">
      {isLink ? (
        <Link
          className={mainSectionClassName}
          href={`/admin/federated/${federatedConnector.id}`}
        >
          {mainDisplay}
        </Link>
      ) : (
        <div className={mainSectionClassName}>{mainDisplay}</div>
      )}
      {showMetadata && Object.keys(federatedConnector.entities).length > 0 && (
        <div className="text-[10px] mt-0.5 text-gray-600 dark:text-gray-400">
          {Object.entries(federatedConnector.entities)
            .filter(
              ([_, value]) =>
                value &&
                (Array.isArray(value) ? value.length > 0 : String(value).trim())
            )
            .map(([key, value]) => (
              <div key={key} className="truncate">
                <i>{key}:</i>{" "}
                {Array.isArray(value) ? value.join(", ") : String(value)}
              </div>
            ))}
        </div>
      )}
    </div>
  );
};

const EditRow = ({
  documentSet,
  isEditable,
}: {
  documentSet: DocumentSetSummary;
  isEditable: boolean;
}) => {
  const router = useRouter();

  if (!isEditable) {
    return (
      <div className="text-text-darker font-medium my-auto p-1">
        {documentSet.name}
      </div>
    );
  }

  return (
    <div className="relative flex">
      <TooltipProvider>
        <Tooltip>
          <TooltipTrigger asChild>
            <div
              className={`
              text-text-darker font-medium my-auto p-1 hover:bg-accent-background flex items-center select-none
              ${documentSet.is_up_to_date ? "cursor-pointer" : "cursor-default"}
            `}
              style={{ wordBreak: "normal", overflowWrap: "break-word" }}
              onClick={() => {
                if (documentSet.is_up_to_date) {
                  router.push(`/admin/documents/sets/${documentSet.id}`);
                }
              }}
            >
              <FiEdit2 className="mr-2 flex-shrink-0" />
              <span className="font-medium">{documentSet.name}</span>
            </div>
          </TooltipTrigger>
          {!documentSet.is_up_to_date && (
            <TooltipContent width="max-w-sm">
              <div className="flex break-words break-keep whitespace-pre-wrap items-start">
                <InfoIcon className="mr-2 mt-0.5" />
                Cannot update while syncing! Wait for the sync to finish, then
                try again.
              </div>
            </TooltipContent>
          )}
        </Tooltip>
      </TooltipProvider>
    </div>
  );
};

interface DocumentFeedbackTableProps {
  documentSets: DocumentSetSummary[];
  refresh: () => void;
  refreshEditable: () => void;
  editableDocumentSets: DocumentSetSummary[];
}

const DocumentSetTable = ({
  documentSets,
  editableDocumentSets,
  refresh,
  refreshEditable,
}: DocumentFeedbackTableProps) => {
  const [page, setPage] = useState(1);

  // sort by name for consistent ordering
  documentSets.sort((a, b) => {
    if (a.name < b.name) {
      return -1;
    } else if (a.name > b.name) {
      return 1;
    } else {
      return 0;
    }
  });

  const sortedDocumentSets = [
    ...editableDocumentSets,
    ...documentSets.filter(
      (ds) => !editableDocumentSets.some((eds) => eds.id === ds.id)
    ),
  ];

  return (
    <div>
      <Title>Existing Document Sets</Title>
      <Table className="overflow-visible mt-2">
        <TableHeader>
          <TableRow>
            <TableHead>Name</TableHead>
            <TableHead>Connectors</TableHead>
            <TableHead>Status</TableHead>
            <TableHead>Public</TableHead>
            <TableHead>Delete</TableHead>
          </TableRow>
        </TableHeader>
        <TableBody>
          {sortedDocumentSets
            .slice((page - 1) * numToDisplay, page * numToDisplay)
            .map((documentSet) => {
              const isEditable = editableDocumentSets.some(
                (eds) => eds.id === documentSet.id
              );
              return (
                <TableRow key={documentSet.id}>
                  <TableCell className="whitespace-normal break-all">
                    <div className="flex gap-x-1 text-emphasis">
                      <EditRow
                        documentSet={documentSet}
                        isEditable={isEditable}
                      />
                    </div>
                  </TableCell>
                  <TableCell>
                    <div>
                      {/* Regular Connectors */}
                      {documentSet.cc_pair_summaries.map(
                        (ccPairSummary, ind) => {
                          return (
                            <div
                              className={
                                ind !== documentSet.cc_pair_summaries.length - 1
                                  ? "mb-3"
                                  : ""
                              }
                              key={ccPairSummary.id}
                            >
                              <div className="text-blue-500 dark:text-blue-100 flex w-fit">
                                <SourceIcon
                                  sourceType={ccPairSummary.source}
                                  iconSize={16}
                                />
                                <div className="ml-1 my-auto text-xs font-medium truncate">
                                  {ccPairSummary.name || "Unnamed"}
                                </div>
                              </div>
                            </div>
                          );
                        }
                      )}

                      {/* Federated Connectors */}
                      {documentSet.federated_connector_summaries &&
                        documentSet.federated_connector_summaries.length >
                          0 && (
                          <>
                            {documentSet.cc_pair_summaries.length > 0 && (
                              <div className="mb-3" />
                            )}
                            {documentSet.federated_connector_summaries.map(
                              (federatedConnector, ind) => {
                                return (
                                  <div
                                    className={
                                      ind !==
                                      documentSet.federated_connector_summaries
                                        .length -
                                        1
                                        ? "mb-3"
                                        : ""
                                    }
                                    key={`federated-${federatedConnector.id}`}
                                  >
                                    <FederatedConnectorTitle
                                      federatedConnector={federatedConnector}
                                      showMetadata={true}
                                    />
                                  </div>
                                );
                              }
                            )}
                          </>
                        )}
                    </div>
                  </TableCell>
                  <TableCell>
                    {documentSet.is_up_to_date ? (
                      <Badge variant="success" icon={FiCheckCircle}>
                        Up to Date
                      </Badge>
                    ) : documentSet.cc_pair_summaries.length > 0 ||
                      (documentSet.federated_connector_summaries &&
                        documentSet.federated_connector_summaries.length >
                          0) ? (
                      <Badge variant="in_progress" icon={FiClock}>
                        Syncing
                      </Badge>
                    ) : (
                      <Badge variant="destructive" icon={FiAlertTriangle}>
                        Deleting
                      </Badge>
                    )}
                  </TableCell>
                  <TableCell>
                    {documentSet.is_public ? (
                      <Badge
                        variant={isEditable ? "success" : "default"}
                        icon={FiUnlock}
                      >
                        Public
                      </Badge>
                    ) : (
                      <Badge
                        variant={isEditable ? "private" : "default"}
                        icon={FiLock}
                      >
                        Private
                      </Badge>
                    )}
                  </TableCell>
                  <TableCell>
                    {isEditable ? (
                      <DeleteButton
                        onClick={async () => {
                          const response = await deleteDocumentSet(
                            documentSet.id
                          );
                          if (response.ok) {
                            toast.success(
                              `Document set "${documentSet.name}" scheduled for deletion`
                            );
                          } else {
                            const errorMsg = (await response.json()).detail;
                            toast.error(
                              `Failed to schedule document set for deletion - ${errorMsg}`
                            );
                          }
                          refresh();
                          refreshEditable();
                        }}
                      />
                    ) : (
                      "-"
                    )}
                  </TableCell>
                </TableRow>
              );
            })}
        </TableBody>
      </Table>

      <div className="mt-3 flex">
        <div className="mx-auto">
          <PageSelector
            totalPages={Math.ceil(sortedDocumentSets.length / numToDisplay)}
            currentPage={page}
            onPageChange={(newPage) => setPage(newPage)}
          />
        </div>
      </div>
    </div>
  );
};

function Main() {
  const {
    data: documentSets,
    isLoading: isDocumentSetsLoading,
    error: documentSetsError,
    refreshDocumentSets,
  } = useDocumentSets();

  const {
    data: editableDocumentSets,
    isLoading: isEditableDocumentSetsLoading,
    error: editableDocumentSetsError,
    refreshDocumentSets: refreshEditableDocumentSets,
  } = useDocumentSets(true);

  if (isDocumentSetsLoading || isEditableDocumentSetsLoading) {
    return (
      <div className="flex justify-center items-center min-h-[400px]">
        <ThreeDotsLoader />
      </div>
    );
  }

  if (documentSetsError || !documentSets) {
    return <div>Error: {documentSetsError}</div>;
  }

  if (editableDocumentSetsError || !editableDocumentSets) {
    return <div>Error: {editableDocumentSetsError}</div>;
  }

  return (
    <div className="mb-8">
      <Text as="p">
        {markdown(
          "**Document Sets** allow you to group logically connected documents into a single bundle. These can then be used as a filter when performing searches to control the scope of information Onyx searches over."
        )}
      </Text>
      <Spacer rem={0.75} />

      <div className="mb-3"></div>

      <div className="flex mb-6">
        <CreateButton href="/admin/documents/sets/new">
          New Document Set
        </CreateButton>
      </div>

      {documentSets.length > 0 && (
        <>
          <Separator />
          <DocumentSetTable
            documentSets={documentSets}
            editableDocumentSets={editableDocumentSets}
            refresh={refreshDocumentSets}
            refreshEditable={refreshEditableDocumentSets}
          />
        </>
      )}
    </div>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/embeddings/EmbeddingModelSelectionForm.tsx
================================================
"use client";

import { errorHandlingFetcher } from "@/lib/fetcher";
import useSWR from "swr";
import { Dispatch, SetStateAction, useState } from "react";
import {
  CloudEmbeddingProvider,
  CloudEmbeddingModel,
  AVAILABLE_MODELS,
  AVAILABLE_CLOUD_PROVIDERS,
  LITELLM_CLOUD_PROVIDER,
  AZURE_CLOUD_PROVIDER,
  HostedEmbeddingModel,
  EmbeddingProvider,
} from "@/components/embedding/interfaces";
import OpenEmbeddingPage from "@/app/admin/embeddings/pages/OpenEmbeddingPage";
import CloudEmbeddingPage from "@/app/admin/embeddings/pages/CloudEmbeddingPage";
import ProviderCreationModal from "@/app/admin/embeddings/modals/ProviderCreationModal";
import DeleteCredentialsModal from "@/app/admin/embeddings/modals/DeleteCredentialsModal";
import SelectModelModal from "@/app/admin/embeddings/modals/SelectModelModal";
import ChangeCredentialsModal from "@/app/admin/embeddings/modals/ChangeCredentialsModal";
import ModelSelectionConfirmationModal from "@/app/admin/embeddings/modals/ModelSelectionModal";
import AlreadyPickedModal from "@/app/admin/embeddings/modals/AlreadyPickedModal";
import { ModelOption } from "@/components/embedding/ModelSelector";
import {
  EMBEDDING_MODELS_ADMIN_URL,
  EMBEDDING_PROVIDERS_ADMIN_URL,
} from "@/lib/llmConfig/constants";
import { AdvancedSearchConfiguration } from "@/app/admin/embeddings/interfaces";
import { Button } from "@opal/components";

export interface EmbeddingDetails {
  api_key?: string;
  api_url?: string;
  api_version?: string;
  deployment_name?: string;
  custom_config: any;
  provider_type: EmbeddingProvider;
}

export interface EmbeddingModelSelectionProps {
  modelTab: "open" | "cloud" | null;
  setModelTab: Dispatch<SetStateAction<"open" | "cloud" | null>>;
  currentEmbeddingModel: CloudEmbeddingModel | HostedEmbeddingModel;
  selectedProvider: CloudEmbeddingModel | HostedEmbeddingModel;
  updateSelectedProvider: (
    model: CloudEmbeddingModel | HostedEmbeddingModel
  ) => void;
  updateCurrentModel: (
    newModel: string,
    provider_type: EmbeddingProvider
  ) => void;
  advancedEmbeddingDetails: AdvancedSearchConfiguration;
}

export default function EmbeddingModelSelection({
  selectedProvider,
  currentEmbeddingModel,
  updateSelectedProvider,
  modelTab,
  setModelTab,
  updateCurrentModel,
  advancedEmbeddingDetails,
}: EmbeddingModelSelectionProps) {
  // Cloud Provider based modals
  const [showTentativeProvider, setShowTentativeProvider] =
    useState<CloudEmbeddingProvider | null>(null);

  const [showUnconfiguredProvider, setShowUnconfiguredProvider] =
    useState<CloudEmbeddingProvider | null>(null);
  const [changeCredentialsProvider, setChangeCredentialsProvider] =
    useState<CloudEmbeddingProvider | null>(null);

  // Cloud Model based modals
  const [alreadySelectedModel, setAlreadySelectedModel] =
    useState<CloudEmbeddingModel | null>(null);
  const [showTentativeModel, setShowTentativeModel] =
    useState<CloudEmbeddingModel | null>(null);

  const [showModelInQueue, setShowModelInQueue] =
    useState<CloudEmbeddingModel | null>(null);

  // Open Model based modals
  const [showTentativeOpenProvider, setShowTentativeOpenProvider] =
    useState<HostedEmbeddingModel | null>(null);

  const [showDeleteCredentialsModal, setShowDeleteCredentialsModal] =
    useState<boolean>(false);

  const [showAddConnectorPopup, setShowAddConnectorPopup] =
    useState<boolean>(false);

  const { data: embeddingModelDetails } = useSWR<CloudEmbeddingModel[]>(
    EMBEDDING_MODELS_ADMIN_URL,
    errorHandlingFetcher,
    { refreshInterval: 5000 } // 5 seconds
  );

  const {
    data: embeddingProviderDetails,
    mutate: mutateEmbeddingProviderDetails,
  } = useSWR<EmbeddingDetails[]>(
    EMBEDDING_PROVIDERS_ADMIN_URL,
    errorHandlingFetcher,
    { refreshInterval: 5000 } // 5 seconds
  );

  return (
    <div className="p-2">
      {alreadySelectedModel && (
        <AlreadyPickedModal
          model={alreadySelectedModel}
          onClose={() => setAlreadySelectedModel(null)}
        />
      )}

      {showTentativeOpenProvider && (
        <ModelSelectionConfirmationModal
          selectedModel={showTentativeOpenProvider}
          isCustom={
            AVAILABLE_MODELS.find(
              (model) =>
                model.model_name === showTentativeOpenProvider.model_name
            ) === undefined
          }
          onConfirm={() => {
            updateSelectedProvider(showTentativeOpenProvider);
            setShowTentativeOpenProvider(null);
          }}
          onCancel={() => setShowTentativeOpenProvider(null)}
        />
      )}

      {showTentativeProvider && (
        <ProviderCreationModal
          updateCurrentModel={updateCurrentModel}
          isProxy={
            showTentativeProvider.provider_type == EmbeddingProvider.LITELLM
          }
          isAzure={
            showTentativeProvider.provider_type == EmbeddingProvider.AZURE
          }
          selectedProvider={showTentativeProvider}
          onConfirm={() => {
            setShowTentativeProvider(showUnconfiguredProvider);
            if (showModelInQueue) {
              setShowTentativeModel(showModelInQueue);
            }
            mutateEmbeddingProviderDetails();
          }}
          onCancel={() => {
            setShowModelInQueue(null);
            setShowTentativeProvider(null);
          }}
        />
      )}

      {changeCredentialsProvider && (
        <ChangeCredentialsModal
          isProxy={
            changeCredentialsProvider.provider_type == EmbeddingProvider.LITELLM
          }
          isAzure={
            changeCredentialsProvider.provider_type == EmbeddingProvider.AZURE
          }
          useFileUpload={
            changeCredentialsProvider.provider_type == EmbeddingProvider.GOOGLE
          }
          onDeleted={() => {
            setChangeCredentialsProvider(null);
            mutateEmbeddingProviderDetails();
          }}
          provider={changeCredentialsProvider}
          onConfirm={() => setChangeCredentialsProvider(null)}
          onCancel={() => setChangeCredentialsProvider(null)}
        />
      )}

      {showTentativeModel && (
        <SelectModelModal
          model={showTentativeModel}
          onConfirm={() => {
            setShowModelInQueue(null);
            updateSelectedProvider(showTentativeModel);
            setShowTentativeModel(null);
          }}
          onCancel={() => {
            setShowModelInQueue(null);
            setShowTentativeModel(null);
          }}
        />
      )}

      {showDeleteCredentialsModal && (
        <DeleteCredentialsModal
          modelProvider={showTentativeProvider!}
          onConfirm={() => {
            setShowDeleteCredentialsModal(false);
            mutateEmbeddingProviderDetails();
          }}
          onCancel={() => setShowDeleteCredentialsModal(false)}
        />
      )}

      <p className="mb-4">
        Select from cloud, self-hosted models, or continue with your current
        embedding model.
      </p>
      <div className="text-sm mr-auto mb-6 divide-x-2 flex">
        <button
          onClick={() => setModelTab(null)}
          className={`mr-4 p-2 font-bold  ${
            !modelTab
              ? "rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline"
              : " hover:underline bg-neutral-100 dark:bg-neutral-900"
          }`}
        >
          Current
        </button>
        <div className="px-2">
          <button
            onClick={() => setModelTab("cloud")}
            className={`mx-2 p-2 font-bold  ${
              modelTab == "cloud"
                ? "rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline"
                : " hover:underline bg-neutral-100 dark:bg-neutral-900"
            }`}
          >
            Cloud-based
          </button>
        </div>
        <div className="px-2">
          <button
            onClick={() => setModelTab("open")}
            className={` mx-2 p-2 font-bold  ${
              modelTab == "open"
                ? "rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline"
                : "hover:underline bg-neutral-100 dark:bg-neutral-900"
            }`}
          >
            Self-hosted
          </button>
        </div>
      </div>

      {modelTab == "open" && (
        <OpenEmbeddingPage
          selectedProvider={selectedProvider}
          onSelectOpenSource={(model: HostedEmbeddingModel) => {
            setShowTentativeOpenProvider(model);
          }}
        />
      )}

      {modelTab == "cloud" && (
        <CloudEmbeddingPage
          advancedEmbeddingDetails={advancedEmbeddingDetails}
          embeddingModelDetails={embeddingModelDetails}
          setShowModelInQueue={setShowModelInQueue}
          setShowTentativeModel={setShowTentativeModel}
          currentModel={selectedProvider || currentEmbeddingModel}
          setAlreadySelectedModel={setAlreadySelectedModel}
          embeddingProviderDetails={embeddingProviderDetails}
          setShowTentativeProvider={setShowTentativeProvider}
          setChangeCredentialsProvider={setChangeCredentialsProvider}
        />
      )}

      {!modelTab && (
        <>
          <button onClick={() => updateSelectedProvider(currentEmbeddingModel)}>
            <ModelOption
              model={currentEmbeddingModel}
              selected={
                selectedProvider.model_name == currentEmbeddingModel.model_name
              }
            />
          </button>
          {currentEmbeddingModel?.provider_type && (
            <div className="mt-2">
              <Button
                prominence="secondary"
                onClick={() => {
                  const allProviders = [
                    ...AVAILABLE_CLOUD_PROVIDERS,
                    LITELLM_CLOUD_PROVIDER,
                    AZURE_CLOUD_PROVIDER,
                  ];
                  const provider = allProviders.find(
                    (p) =>
                      p.provider_type === currentEmbeddingModel.provider_type
                  );
                  if (!provider) {
                    return;
                  }
                  setChangeCredentialsProvider(provider);
                }}
              >
                Update API key
              </Button>
            </div>
          )}
        </>
      )}
    </div>
  );
}


================================================
FILE: web/src/app/admin/embeddings/RerankingFormPage.tsx
================================================
import React, {
  Dispatch,
  forwardRef,
  SetStateAction,
  useContext,
  useState,
} from "react";
import { Formik, Form, FormikProps } from "formik";
import * as Yup from "yup";
import {
  RerankerProvider,
  RerankingDetails,
  RerankingModel,
  rerankingModels,
} from "./interfaces";
import { FiExternalLink } from "react-icons/fi";
import {
  AmazonIcon,
  CohereIcon,
  LiteLLMIcon,
  MixedBreadIcon,
} from "@/components/icons/icons";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import { TextFormField } from "@/components/Field";
import { SettingsContext } from "@/providers/SettingsProvider";
import { SvgAlertTriangle, SvgKey } from "@opal/icons";

interface RerankingDetailsFormProps {
  setRerankingDetails: Dispatch<SetStateAction<RerankingDetails>>;
  currentRerankingDetails: RerankingDetails;
  originalRerankingDetails: RerankingDetails;
  modelTab: "open" | "cloud" | null;
  setModelTab: Dispatch<SetStateAction<"open" | "cloud" | null>>;
  onValidationChange?: (
    isValid: boolean,
    errors: Record<string, string>
  ) => void;
}

const RerankingDetailsForm = forwardRef<
  FormikProps<RerankingDetails>,
  RerankingDetailsFormProps
>(
  (
    {
      setRerankingDetails,
      originalRerankingDetails,
      currentRerankingDetails,
      modelTab,
      setModelTab,
      onValidationChange,
    },
    ref
  ) => {
    const [showGpuWarningModalModel, setShowGpuWarningModalModel] =
      useState<RerankingModel | null>(null);
    const [isApiKeyModalOpen, setIsApiKeyModalOpen] = useState(false);
    const [showLiteLLMConfigurationModal, setShowLiteLLMConfigurationModal] =
      useState(false);

    const combinedSettings = useContext(SettingsContext);
    const gpuEnabled = combinedSettings?.settings.gpu_enabled;

    // Define the validation schema
    const validationSchema = Yup.object().shape({
      rerank_model_name: Yup.string().nullable(),
      rerank_provider_type: Yup.mixed<RerankerProvider>()
        .nullable()
        .oneOf(Object.values(RerankerProvider))
        .optional(),
      rerank_api_key: Yup.string()
        .nullable()
        .test(
          "required-if-cohere",
          "API Key is required for Cohere reranking",
          function (value) {
            const { rerank_provider_type } = this.parent;
            return (
              rerank_provider_type !== RerankerProvider.COHERE ||
              (value !== null && value !== "")
            );
          }
        ),
      rerank_api_url: Yup.string()
        .url("Must be a valid URL")
        .matches(/^https?:\/\//, "URL must start with http:// or https://")
        .nullable()
        .test(
          "required-if-litellm",
          "API URL is required for LiteLLM reranking",
          function (value) {
            const { rerank_provider_type } = this.parent;
            return (
              rerank_provider_type !== RerankerProvider.LITELLM ||
              (value !== null && value !== "")
            );
          }
        ),
    });

    return (
      <Formik
        innerRef={ref}
        initialValues={currentRerankingDetails}
        validationSchema={validationSchema}
        onSubmit={async (_, { setSubmitting }) => {
          setSubmitting(false);
        }}
        validate={(values) => {
          // Update parent component with values
          setRerankingDetails(values);

          // Run validation and report errors
          if (onValidationChange) {
            // We'll return an empty object here since Yup will handle the actual validation
            // But we need to check if there are any validation errors
            const errors: Record<string, string> = {};
            try {
              // Manually validate against the schema
              validationSchema.validateSync(values, { abortEarly: false });
              onValidationChange(true, {});
            } catch (validationError) {
              if (validationError instanceof Yup.ValidationError) {
                validationError.inner.forEach((err) => {
                  if (err.path) {
                    errors[err.path] = err.message;
                  }
                });
                onValidationChange(false, errors);
              }
            }
          }

          return {}; // Return empty object as Formik will handle the errors
        }}
        enableReinitialize={true}
      >
        {({ values, setFieldValue, resetForm }) => {
          const resetRerankingValues = () => {
            setRerankingDetails({
              rerank_api_key: null,
              rerank_provider_type: null,
              rerank_model_name: null,
              rerank_api_url: null,
            });
            resetForm();
          };

          return (
            <div className="p-2 rounded-lg max-w-4xl mx-auto">
              <p className="mb-4">
                Select from cloud, self-hosted models, or use no reranking
                model.
              </p>
              <div className="text-sm mr-auto mb-6 divide-x-2 flex">
                {originalRerankingDetails.rerank_model_name && (
                  <button
                    onClick={() => setModelTab(null)}
                    className={`mx-2 p-2 font-bold  ${
                      !modelTab
                        ? "rounded bg-background-900 text-text-100 underline"
                        : " hover:underline bg-background-100"
                    }`}
                  >
                    Current
                  </button>
                )}
                <div
                  className={`${
                    originalRerankingDetails.rerank_model_name && "px-2 ml-2"
                  }`}
                >
                  <button
                    onClick={() => setModelTab("cloud")}
                    className={`mr-2 p-2 font-bold  ${
                      modelTab == "cloud"
                        ? "rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline"
                        : " hover:underline bg-neutral-100 dark:bg-neutral-900"
                    }`}
                  >
                    Cloud-based
                  </button>
                </div>

                <div className="px-2">
                  <button
                    onClick={() => setModelTab("open")}
                    className={` mx-2 p-2 font-bold  ${
                      modelTab == "open"
                        ? "rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline"
                        : "hover:underline bg-neutral-100 dark:bg-neutral-900"
                    }`}
                  >
                    Self-hosted
                  </button>
                </div>
                {values.rerank_model_name && (
                  <div className="px-2">
                    <button
                      onClick={() => resetRerankingValues()}
                      className={`mx-2 p-2 font-bold rounded bg-neutral-100 dark:bg-neutral-900 text-neutral-900 dark:text-neutral-100 hover:underline`}
                    >
                      Remove Reranking
                    </button>
                  </div>
                )}
              </div>

              <Form>
                <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
                  {(modelTab
                    ? rerankingModels.filter(
                        (model) => model.cloud == (modelTab == "cloud")
                      )
                    : rerankingModels.filter(
                        (modelCard) =>
                          (modelCard.modelName ==
                            originalRerankingDetails.rerank_model_name &&
                            modelCard.rerank_provider_type ==
                              originalRerankingDetails.rerank_provider_type) ||
                          (modelCard.rerank_provider_type ==
                            RerankerProvider.LITELLM &&
                            originalRerankingDetails.rerank_provider_type ==
                              RerankerProvider.LITELLM)
                      )
                  ).map((card) => {
                    const isSelected =
                      values.rerank_provider_type ===
                        card.rerank_provider_type &&
                      (card.modelName == null ||
                        values.rerank_model_name === card.modelName);

                    return (
                      <div
                        key={`${card.rerank_provider_type}-${card.modelName}`}
                        className={`p-4 border rounded-lg cursor-pointer transition-all duration-200 ${
                          isSelected
                            ? "border-blue-800 bg-blue-50 dark:bg-blue-950 dark:border-blue-700 shadow-md"
                            : "border-background-200 hover:border-blue-300 hover:shadow-sm dark:border-neutral-700 dark:hover:border-blue-300"
                        }`}
                        onClick={() => {
                          if (
                            card.rerank_provider_type == RerankerProvider.COHERE
                          ) {
                            setIsApiKeyModalOpen(true);
                          } else if (
                            card.rerank_provider_type ==
                            RerankerProvider.BEDROCK
                          ) {
                            setIsApiKeyModalOpen(true);
                          } else if (
                            card.rerank_provider_type ==
                            RerankerProvider.LITELLM
                          ) {
                            setShowLiteLLMConfigurationModal(true);
                          } else if (
                            !card.rerank_provider_type &&
                            !gpuEnabled
                          ) {
                            setShowGpuWarningModalModel(card);
                          }

                          if (!isSelected) {
                            setRerankingDetails({
                              ...values,
                              rerank_provider_type: card.rerank_provider_type!,
                              rerank_model_name: card.modelName || null,
                              rerank_api_key: null,
                              rerank_api_url: null,
                            });
                            setFieldValue(
                              "rerank_provider_type",
                              card.rerank_provider_type
                            );
                            setFieldValue("rerank_model_name", card.modelName);
                          }
                        }}
                      >
                        <div className="flex items-center justify-between mb-3">
                          <div className="flex items-center">
                            {card.rerank_provider_type ===
                            RerankerProvider.LITELLM ? (
                              <LiteLLMIcon size={24} className="mr-2" />
                            ) : card.rerank_provider_type ===
                              RerankerProvider.COHERE ? (
                              <CohereIcon size={24} className="mr-2" />
                            ) : card.rerank_provider_type ===
                              RerankerProvider.BEDROCK ? (
                              <AmazonIcon size={24} className="mr-2" />
                            ) : (
                              <MixedBreadIcon size={24} className="mr-2" />
                            )}
                            <h3 className="font-bold text-lg">
                              {card.displayName}
                            </h3>
                          </div>
                          {card.link && (
                            <a
                              href={card.link}
                              target="_blank"
                              rel="noopener noreferrer"
                              onClick={(e) => e.stopPropagation()}
                              className="text-blue-500 hover:text-blue-700 transition-colors duration-200"
                            >
                              <FiExternalLink size={18} />
                            </a>
                          )}
                        </div>
                        <p className="text-sm text-text-600 mb-2">
                          {card.description}
                        </p>
                        <div className="text-xs text-text-500">
                          {card.cloud ? "Cloud-based" : "Self-hosted"}
                        </div>
                      </div>
                    );
                  })}
                </div>

                {showGpuWarningModalModel && (
                  <Modal
                    open
                    onOpenChange={() => setShowGpuWarningModalModel(null)}
                  >
                    <Modal.Content width="sm" height="sm">
                      <Modal.Header
                        icon={SvgAlertTriangle}
                        title="GPU Not Enabled"
                        onClose={() => setShowGpuWarningModalModel(null)}
                      />
                      <Modal.Body>
                        <p className="text-error font-semibold">Warning:</p>
                        <p>
                          Local reranking models require significant
                          computational resources and may perform slowly without
                          GPU acceleration. Consider switching to GPU-enabled
                          infrastructure or using a cloud-based alternative for
                          better performance.
                        </p>
                      </Modal.Body>
                      <Modal.Footer>
                        <Button
                          onClick={() => setShowGpuWarningModalModel(null)}
                        >
                          Understood
                        </Button>
                      </Modal.Footer>
                    </Modal.Content>
                  </Modal>
                )}

                {showLiteLLMConfigurationModal && (
                  <Modal
                    open
                    onOpenChange={() => {
                      resetForm();
                      setShowLiteLLMConfigurationModal(false);
                    }}
                  >
                    <Modal.Content>
                      <Modal.Header
                        icon={SvgKey}
                        title="API Key Configuration"
                        onClose={() => {
                          resetForm();
                          setShowLiteLLMConfigurationModal(false);
                        }}
                      />
                      <Modal.Body>
                        <div className="w-full flex flex-col gap-y-4">
                          <TextFormField
                            subtext="Set the URL at which your LiteLLM Proxy is hosted"
                            placeholder={values.rerank_api_url || undefined}
                            onChange={(
                              e: React.ChangeEvent<HTMLInputElement>
                            ) => {
                              const value = e.target.value;
                              setRerankingDetails({
                                ...values,
                                rerank_api_url: value,
                              });
                              setFieldValue("rerank_api_url", value);
                            }}
                            type="text"
                            label="LiteLLM Proxy  URL"
                            name="rerank_api_url"
                          />

                          <TextFormField
                            subtext="Set the key to access your LiteLLM Proxy"
                            placeholder={
                              values.rerank_api_key
                                ? "*".repeat(values.rerank_api_key.length)
                                : undefined
                            }
                            onChange={(
                              e: React.ChangeEvent<HTMLInputElement>
                            ) => {
                              const value = e.target.value;
                              setRerankingDetails({
                                ...values,
                                rerank_api_key: value,
                              });
                              setFieldValue("rerank_api_key", value);
                            }}
                            type="password"
                            label="LiteLLM Proxy Key"
                            name="rerank_api_key"
                            optional
                          />

                          <TextFormField
                            subtext="Set the model name to use for LiteLLM Proxy"
                            placeholder={
                              values.rerank_model_name
                                ? "*".repeat(values.rerank_model_name.length)
                                : undefined
                            }
                            onChange={(
                              e: React.ChangeEvent<HTMLInputElement>
                            ) => {
                              const value = e.target.value;
                              setRerankingDetails({
                                ...values,
                                rerank_model_name: value,
                              });
                              setFieldValue("rerank_model_name", value);
                            }}
                            label="LiteLLM Model Name"
                            name="rerank_model_name"
                            optional
                          />
                        </div>
                      </Modal.Body>
                      <Modal.Footer>
                        <Button
                          onClick={() => {
                            setShowLiteLLMConfigurationModal(false);
                          }}
                        >
                          Update
                        </Button>
                      </Modal.Footer>
                    </Modal.Content>
                  </Modal>
                )}

                {isApiKeyModalOpen && (
                  <Modal
                    open
                    onOpenChange={() => {
                      Object.keys(originalRerankingDetails).forEach((key) => {
                        setFieldValue(
                          key,
                          originalRerankingDetails[
                            key as keyof RerankingDetails
                          ]
                        );
                      });

                      setIsApiKeyModalOpen(false);
                    }}
                  >
                    <Modal.Content>
                      <Modal.Header
                        icon={SvgKey}
                        title="API Key Configuration"
                        onClose={() => {
                          Object.keys(originalRerankingDetails).forEach(
                            (key) => {
                              setFieldValue(
                                key,
                                originalRerankingDetails[
                                  key as keyof RerankingDetails
                                ]
                              );
                            }
                          );

                          setIsApiKeyModalOpen(false);
                        }}
                      />
                      <Modal.Body>
                        <div className="w-full">
                          <TextFormField
                            placeholder={
                              values.rerank_api_key
                                ? "*".repeat(values.rerank_api_key.length)
                                : values.rerank_provider_type ===
                                    RerankerProvider.BEDROCK
                                  ? "aws_ACCESSKEY_SECRETKEY_REGION"
                                  : "Enter your API key"
                            }
                            onChange={(
                              e: React.ChangeEvent<HTMLInputElement>
                            ) => {
                              const value = e.target.value;
                              setRerankingDetails({
                                ...values,
                                rerank_api_key: value,
                              });
                              setFieldValue("rerank_api_key", value);
                            }}
                            type="password"
                            label={
                              values.rerank_provider_type ===
                              RerankerProvider.BEDROCK
                                ? "AWS Credentials in format: aws_ACCESSKEY_SECRETKEY_REGION"
                                : "Cohere API Key"
                            }
                            name="rerank_api_key"
                          />
                        </div>
                      </Modal.Body>
                      <Modal.Footer>
                        <Button onClick={() => setIsApiKeyModalOpen(false)}>
                          Update
                        </Button>
                      </Modal.Footer>
                    </Modal.Content>
                  </Modal>
                )}
              </Form>
            </div>
          );
        }}
      </Formik>
    );
  }
);
RerankingDetailsForm.displayName = "RerankingDetailsForm";

export default RerankingDetailsForm;


================================================
FILE: web/src/app/admin/embeddings/interfaces.ts
================================================
import {
  AVAILABLE_CLOUD_PROVIDERS,
  AVAILABLE_MODELS,
  CloudEmbeddingModel,
  EmbeddingProvider,
  HostedEmbeddingModel,
} from "@/components/embedding/interfaces";

// This is a slightly differnte interface than used in the backend
// but is always used in conjunction with `AdvancedSearchConfiguration`
export interface RerankingDetails {
  rerank_model_name: string | null;
  rerank_provider_type: RerankerProvider | null;
  rerank_api_key: string | null;
  rerank_api_url: string | null;
}

export enum SwitchoverType {
  REINDEX = "reindex",
  ACTIVE_ONLY = "active_only",
  INSTANT = "instant",
}

export enum RerankerProvider {
  COHERE = "cohere",
  LITELLM = "litellm",
  BEDROCK = "bedrock",
}

export enum EmbeddingPrecision {
  FLOAT = "float",
  BFLOAT16 = "bfloat16",
}

export interface LLMContextualCost {
  provider: string;
  model_name: string;
  cost: number;
}

export interface AdvancedSearchConfiguration {
  index_name: string | null;
  multipass_indexing: boolean;
  enable_contextual_rag: boolean;
  contextual_rag_llm_name: string | null;
  contextual_rag_llm_provider: string | null;
  multilingual_expansion: string[];
  disable_rerank_for_streaming: boolean;
  api_url: string | null;
  num_rerank: number;
  embedding_precision: EmbeddingPrecision;
  reduced_dimension: number | null;
}

export interface SavedSearchSettings
  extends RerankingDetails,
    AdvancedSearchConfiguration {
  provider_type: EmbeddingProvider | null;
  switchover_type?: SwitchoverType;
}

export interface RerankingModel {
  rerank_provider_type: RerankerProvider | null;
  modelName?: string;
  displayName: string;
  description: string;
  link: string;
  cloud: boolean;
}

export const rerankingModels: RerankingModel[] = [
  {
    rerank_provider_type: RerankerProvider.LITELLM,
    cloud: true,
    displayName: "LiteLLM",
    description: "Host your own reranker or router with LiteLLM proxy",
    link: "https://docs.litellm.ai/docs/simple_proxy",
  },
  {
    rerank_provider_type: null,
    cloud: false,
    modelName: "mixedbread-ai/mxbai-rerank-xsmall-v1",
    displayName: "MixedBread XSmall",
    description: "Fastest, smallest model for basic reranking tasks.",
    link: "https://huggingface.co/mixedbread-ai/mxbai-rerank-xsmall-v1",
  },
  {
    rerank_provider_type: null,
    cloud: false,
    modelName: "mixedbread-ai/mxbai-rerank-base-v1",
    displayName: "MixedBread Base",
    description: "Balanced performance for general reranking needs.",
    link: "https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v1",
  },
  {
    rerank_provider_type: null,
    cloud: false,
    modelName: "mixedbread-ai/mxbai-rerank-large-v1",
    displayName: "MixedBread Large",
    description: "Most powerful model for complex reranking tasks.",
    link: "https://huggingface.co/mixedbread-ai/mxbai-rerank-large-v1",
  },
  {
    cloud: true,
    rerank_provider_type: RerankerProvider.COHERE,
    modelName: "rerank-english-v3.0",
    displayName: "Cohere English",
    description: "High-performance English-focused reranking model.",
    link: "https://docs.cohere.com/v2/reference/rerank",
  },
  {
    cloud: true,
    rerank_provider_type: RerankerProvider.COHERE,
    modelName: "rerank-multilingual-v3.0",
    displayName: "Cohere Multilingual",
    description: "Powerful multilingual reranking model.",
    link: "https://docs.cohere.com/v2/reference/rerank",
  },
  {
    cloud: true,
    rerank_provider_type: RerankerProvider.BEDROCK,
    modelName: "cohere.rerank-v3-5:0",
    displayName: "Cohere Rerank 3.5",
    description:
      "Powerful multilingual reranking model invoked through AWS Bedrock.",
    link: "https://aws.amazon.com/blogs/machine-learning/cohere-rerank-3-5-is-now-available-in-amazon-bedrock-through-rerank-api",
  },
];

export const getCurrentModelCopy = (
  currentModelName: string
): CloudEmbeddingModel | HostedEmbeddingModel | null => {
  const AVAILABLE_CLOUD_PROVIDERS_FLATTENED = AVAILABLE_CLOUD_PROVIDERS.flatMap(
    (provider) =>
      provider.embedding_models.map((model) => ({
        ...model,
        provider_type: provider.provider_type,
        model_name: model.model_name,
      }))
  );

  return (
    AVAILABLE_MODELS.find((model) => model.model_name === currentModelName) ||
    AVAILABLE_CLOUD_PROVIDERS_FLATTENED.find(
      (model) => model.model_name === currentModelName
    ) ||
    null
  );
};


================================================
FILE: web/src/app/admin/embeddings/modals/AlreadyPickedModal.tsx
================================================
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import { CloudEmbeddingModel } from "../../../../components/embedding/interfaces";
import { SvgCheck } from "@opal/icons";

export interface AlreadyPickedModalProps {
  model: CloudEmbeddingModel;
  onClose: () => void;
}

export default function AlreadyPickedModal({
  model,
  onClose,
}: AlreadyPickedModalProps) {
  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header
          icon={SvgCheck}
          title={`${model.model_name} already chosen`}
          description="You can select a different one if you want!"
          onClose={onClose}
        />
        <Modal.Footer>
          <Button onClick={onClose}>Close</Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/embeddings/modals/ChangeCredentialsModal.tsx
================================================
"use client";

import React, { useRef, useState } from "react";
import Modal from "@/refresh-components/Modal";
import { Callout } from "@/components/ui/callout";
import Text from "@/refresh-components/texts/Text";
import Separator from "@/refresh-components/Separator";
import Button from "@/refresh-components/buttons/Button";
import { Label } from "@/components/Field";
import {
  CloudEmbeddingProvider,
  getFormattedProviderName,
} from "@/components/embedding/interfaces";
import { EMBEDDING_PROVIDERS_ADMIN_URL } from "@/lib/llmConfig/constants";
import { mutate } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { testEmbedding } from "@/app/admin/embeddings/pages/utils";
import { SvgSettings } from "@opal/icons";

export interface ChangeCredentialsModalProps {
  provider: CloudEmbeddingProvider;
  onConfirm: () => void;
  onCancel: () => void;
  onDeleted: () => void;
  useFileUpload: boolean;
  isProxy?: boolean;
  isAzure?: boolean;
}

export default function ChangeCredentialsModal({
  provider,
  onConfirm,
  onCancel,
  onDeleted,
  useFileUpload,
  isProxy = false,
  isAzure = false,
}: ChangeCredentialsModalProps) {
  const [apiKey, setApiKey] = useState("");
  const [apiUrl, setApiUrl] = useState("");
  const [modelName, setModelName] = useState("");
  const [testError, setTestError] = useState<string>("");
  const [fileName, setFileName] = useState<string>("");
  const fileInputRef = useRef<HTMLInputElement>(null);
  const [deletionError, setDeletionError] = useState<string>("");

  const clearFileInput = () => {
    setFileName("");
    if (fileInputRef.current) {
      fileInputRef.current.value = "";
    }
  };

  const handleFileUpload = async (
    event: React.ChangeEvent<HTMLInputElement>
  ) => {
    const file = event.target.files?.[0];
    setFileName("");

    if (file) {
      setFileName(file.name);
      try {
        setDeletionError("");
        const fileContent = await file.text();
        let jsonContent;
        try {
          jsonContent = JSON.parse(fileContent);
          setApiKey(JSON.stringify(jsonContent));
        } catch (parseError) {
          throw new Error(
            "Failed to parse JSON file. Please ensure it's a valid JSON."
          );
        }
      } catch (error) {
        setTestError(
          error instanceof Error
            ? error.message
            : "An unknown error occurred while processing the file."
        );
        setApiKey("");
        clearFileInput();
      }
    }
  };

  const handleDelete = async () => {
    setDeletionError("");

    try {
      const response = await fetch(
        `${EMBEDDING_PROVIDERS_ADMIN_URL}/${provider.provider_type.toLowerCase()}`,
        {
          method: "DELETE",
        }
      );

      if (!response.ok) {
        const errorData = await response.json();
        setDeletionError(errorData.detail);
        return;
      }

      mutate(SWR_KEYS.adminLlmProviders);
      onDeleted();
    } catch (error) {
      setDeletionError(
        error instanceof Error ? error.message : "An unknown error occurred"
      );
    }
  };

  const handleSubmit = async () => {
    setTestError("");
    const normalizedProviderType = provider.provider_type
      .toLowerCase()
      .split(" ")[0];

    if (!normalizedProviderType) {
      setTestError("Provider type is invalid or missing.");
      return;
    }

    try {
      const testResponse = await testEmbedding({
        provider_type: normalizedProviderType,
        modelName,
        apiKey,
        apiUrl,
        apiVersion: null,
        deploymentName: null,
      });

      if (!testResponse.ok) {
        const errorMsg = (await testResponse.json()).detail;
        throw new Error(errorMsg);
      }

      const updateResponse = await fetch(EMBEDDING_PROVIDERS_ADMIN_URL, {
        method: "PUT",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          provider_type: normalizedProviderType,
          api_key: apiKey,
          api_url: apiUrl,
          is_default_provider: false,
          is_configured: true,
        }),
      });

      if (!updateResponse.ok) {
        const errorData = await updateResponse.json();
        throw new Error(
          errorData.detail ||
            `Failed to update provider- check your ${
              isProxy ? "API URL" : "API key"
            }`
        );
      }

      // Refresh cached provider details so the rest of the form sees the new key without forcing a re-index
      await mutate(EMBEDDING_PROVIDERS_ADMIN_URL);

      onConfirm();
    } catch (error) {
      setTestError(
        error instanceof Error ? error.message : "An unknown error occurred"
      );
    }
  };
  return (
    <Modal open onOpenChange={onCancel}>
      <Modal.Content>
        <Modal.Header
          icon={SvgSettings}
          title={`Modify your ${getFormattedProviderName(
            provider.provider_type
          )} ${isProxy ? "Configuration" : "key"}`}
          onClose={onCancel}
        />
        <Modal.Body>
          {!isAzure && (
            <>
              <Text as="p">
                You can modify your configuration by providing a new API key
                {isProxy ? " or API URL." : "."}
              </Text>

              <div className="flex flex-col gap-2">
                <Label className="mt-2">API Key</Label>
                {useFileUpload ? (
                  <>
                    <Label className="mt-2">Upload JSON File</Label>
                    <input
                      ref={fileInputRef}
                      type="file"
                      accept=".json"
                      onChange={handleFileUpload}
                      className="text-lg w-full p-1"
                    />
                    {fileName && <p>Uploaded file: {fileName}</p>}
                  </>
                ) : (
                  <>
                    <input
                      type="password"
                      className="border border-border rounded w-full py-2 px-3 bg-background-emphasis"
                      value={apiKey}
                      onChange={(e: any) => setApiKey(e.target.value)}
                      placeholder="Paste your API key here"
                    />
                  </>
                )}

                {isProxy && (
                  <>
                    <Label className="mt-2">API URL</Label>

                    <input
                      className={`
                          border
                          border-border
                          rounded
                          w-full
                          py-2
                          px-3
                          bg-background-emphasis
                      `}
                      value={apiUrl}
                      onChange={(e: any) => setApiUrl(e.target.value)}
                      placeholder="Paste your API URL here"
                    />

                    {deletionError && (
                      <Callout type="danger" title="Error">
                        {deletionError}
                      </Callout>
                    )}

                    <div>
                      <Label className="mt-2">Test Model</Label>
                      <Text as="p">
                        Since you are using a liteLLM proxy, we&apos;ll need a
                        model name to test the connection with.
                      </Text>
                    </div>
                    <input
                      className={`
                       border
                       border-border
                       rounded
                       w-full
                       py-2
                       px-3
                       bg-background-emphasis
                   `}
                      value={modelName}
                      onChange={(e: any) => setModelName(e.target.value)}
                      placeholder="Paste your model name here"
                    />
                  </>
                )}

                {testError && (
                  <Callout type="danger" title="Error">
                    {testError}
                  </Callout>
                )}

                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
                <Button
                  className="mr-auto mt-4"
                  onClick={() => handleSubmit()}
                  disabled={!apiKey}
                >
                  Update Configuration
                </Button>

                <Separator />
              </div>
            </>
          )}

          <Text as="p" className="mt-4 font-bold">
            You can delete your configuration.
          </Text>
          <Text as="p">
            This is only possible if you have already switched to a different
            embedding type!
          </Text>

          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
          <Button className="mr-auto" onClick={handleDelete} danger>
            Delete Configuration
          </Button>
          {deletionError && (
            <Callout type="danger" title="Error">
              {deletionError}
            </Callout>
          )}
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/embeddings/modals/DeleteCredentialsModal.tsx
================================================
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import { Callout } from "@/components/ui/callout";
import {
  CloudEmbeddingProvider,
  getFormattedProviderName,
} from "../../../../components/embedding/interfaces";
import { SvgTrash } from "@opal/icons";

export interface DeleteCredentialsModalProps {
  modelProvider: CloudEmbeddingProvider;
  onConfirm: () => void;
  onCancel: () => void;
}

export default function DeleteCredentialsModal({
  modelProvider,
  onConfirm,
  onCancel,
}: DeleteCredentialsModalProps) {
  return (
    <Modal open onOpenChange={onCancel}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header
          icon={SvgTrash}
          title={`Delete ${getFormattedProviderName(
            modelProvider.provider_type
          )} Credentials?`}
          onClose={onCancel}
        />
        <Modal.Body>
          <Text as="p">
            You&apos;re about to delete your{" "}
            {getFormattedProviderName(modelProvider.provider_type)} credentials.
            Are you sure?
          </Text>
          <Callout type="danger" title="Point of No Return" />
        </Modal.Body>
        <Modal.Footer>
          <Button prominence="secondary" onClick={onCancel}>
            Keep Credentials
          </Button>
          <Button variant="danger" onClick={onConfirm}>
            Delete Credentials
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/embeddings/modals/InstantSwitchConfirmModal.tsx
================================================
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { SvgAlertTriangle } from "@opal/icons";
export interface InstantSwitchConfirmModalProps {
  onClose: () => void;
  onConfirm: () => void;
}

export default function InstantSwitchConfirmModal({
  onClose,
  onConfirm,
}: InstantSwitchConfirmModalProps) {
  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header
          icon={SvgAlertTriangle}
          title="Are you sure you want to do an instant switch?"
          onClose={onClose}
        />
        <Modal.Body>
          <Text as="p">
            Instant switching will immediately change the embedding model
            without re-indexing. Searches will be over a partial set of
            documents (starting with 0 documents) until re-indexing is complete.
          </Text>
          <Text as="p">
            <strong>This is not reversible.</strong>
          </Text>
        </Modal.Body>
        <Modal.Footer>
          <Button onClick={onConfirm}>Confirm</Button>
          <Button prominence="secondary" onClick={onClose}>
            Cancel
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/embeddings/modals/ModelSelectionModal.tsx
================================================
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import { Callout } from "@/components/ui/callout";
import { Button } from "@opal/components";
import { HostedEmbeddingModel } from "@/components/embedding/interfaces";
import { SvgServer } from "@opal/icons";

export interface ModelSelectionConfirmationModalProps {
  selectedModel: HostedEmbeddingModel;
  isCustom: boolean;
  onConfirm: () => void;
  onCancel: () => void;
}

export default function ModelSelectionConfirmationModal({
  selectedModel,
  isCustom,
  onConfirm,
  onCancel,
}: ModelSelectionConfirmationModalProps) {
  return (
    <Modal open onOpenChange={onCancel}>
      <Modal.Content width="sm" height="lg">
        <Modal.Header
          icon={SvgServer}
          title="Update Embedding Model"
          onClose={onCancel}
        />
        <Modal.Body>
          <Text as="p">
            You have selected: <strong>{selectedModel.model_name}</strong>. Are
            you sure you want to update to this new embedding model?
          </Text>
          <Text as="p">
            We will re-index all your documents in the background so you will be
            able to continue to use Onyx as normal with the old model in the
            meantime. Depending on how many documents you have indexed, this may
            take a while.
          </Text>
          <Text as="p">
            <i>NOTE:</i> this re-indexing process will consume more resources
            than normal. If you are self-hosting, we recommend that you allocate
            at least 16GB of RAM to Onyx during this process.
          </Text>

          {isCustom && (
            <Callout type="warning" title="IMPORTANT">
              We&apos;ve detected that this is a custom-specified embedding
              model. Since we have to download the model files before verifying
              the configuration&apos;s correctness, we won&apos;t be able to let
              you know if the configuration is valid until{" "}
              <strong>after</strong> we start re-indexing your documents. If
              there is an issue, it will show up on this page as an indexing
              error on this page after clicking Confirm.
            </Callout>
          )}
        </Modal.Body>
        <Modal.Footer>
          <Button onClick={onConfirm}>Confirm</Button>
          <Button prominence="secondary" onClick={onCancel}>
            Cancel
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/embeddings/modals/ProviderCreationModal.tsx
================================================
import React, { useRef, useState } from "react";
import Text from "@/refresh-components/texts/Text";
import { Callout } from "@/components/ui/callout";
import { Button } from "@opal/components";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import { Label, TextFormField } from "@/components/Field";
import {
  CloudEmbeddingProvider,
  EmbeddingProvider,
  getFormattedProviderName,
} from "@/components/embedding/interfaces";
import { EMBEDDING_PROVIDERS_ADMIN_URL } from "@/lib/llmConfig/constants";
import Modal from "@/refresh-components/Modal";
import { SvgSettings } from "@opal/icons";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
export interface ProviderCreationModalProps {
  updateCurrentModel: (
    newModel: string,
    provider_type: EmbeddingProvider
  ) => void;
  selectedProvider: CloudEmbeddingProvider;
  onConfirm: () => void;
  onCancel: () => void;
  existingProvider?: CloudEmbeddingProvider;
  isProxy?: boolean;
  isAzure?: boolean;
}

export default function ProviderCreationModal({
  selectedProvider,
  onConfirm,
  onCancel,
  existingProvider,
  isProxy,
  isAzure,
  updateCurrentModel,
}: ProviderCreationModalProps) {
  const useFileUpload =
    selectedProvider.provider_type == EmbeddingProvider.GOOGLE;

  const [errorMsg, setErrorMsg] = useState<string>("");
  const [fileName, setFileName] = useState<string>("");

  const initialValues = {
    provider_type:
      existingProvider?.provider_type || selectedProvider.provider_type,
    api_key: existingProvider?.api_key || "",
    api_url: existingProvider?.api_url || "",
    custom_config: existingProvider?.custom_config
      ? Object.entries(existingProvider.custom_config)
      : [],
    model_id: 0,
    model_name: null,
  };

  const validationSchema = Yup.object({
    provider_type: Yup.string().required("Provider type is required"),
    api_key:
      isProxy || isAzure
        ? Yup.string()
        : useFileUpload
          ? Yup.string()
          : Yup.string().required("API Key is required"),
    model_name: isProxy
      ? Yup.string().required("Model name is required")
      : Yup.string().nullable(),
    api_url:
      isProxy || isAzure
        ? Yup.string().required("API URL is required")
        : Yup.string(),
    deployment_name: isAzure
      ? Yup.string().required("Deployment name is required")
      : Yup.string(),
    api_version: isAzure
      ? Yup.string().required("API Version is required")
      : Yup.string(),
    custom_config: Yup.array().of(Yup.array().of(Yup.string()).length(2)),
  });

  const fileInputRef = useRef<HTMLInputElement>(null);

  const handleFileUpload = async (
    event: React.ChangeEvent<HTMLInputElement>,
    setFieldValue: (field: string, value: any) => void
  ) => {
    const file = event.target.files?.[0];
    setFileName("");
    if (file) {
      setFileName(file.name);
      try {
        const fileContent = await file.text();
        let jsonContent;
        try {
          jsonContent = JSON.parse(fileContent);
        } catch (parseError) {
          throw new Error(
            "Failed to parse JSON file. Please ensure it's a valid JSON."
          );
        }
        setFieldValue("api_key", JSON.stringify(jsonContent));
      } catch (error) {
        setFieldValue("api_key", "");
      }
    }
  };

  const handleSubmit = async (
    values: any,
    { setSubmitting }: { setSubmitting: (isSubmitting: boolean) => void }
  ) => {
    setErrorMsg("");
    try {
      const customConfig = Object.fromEntries(values.custom_config);
      const providerType = values.provider_type.toLowerCase().split(" ")[0];
      const isOpenAI = providerType === "openai";

      const testModelName =
        isOpenAI || isAzure ? "text-embedding-3-small" : values.model_name;

      const testEmbeddingPayload = {
        provider_type: providerType,
        api_key: values.api_key,
        api_url: values.api_url,
        model_name: testModelName,
        api_version: values.api_version,
        deployment_name: values.deployment_name,
      };

      const initialResponse = await fetch(
        "/api/admin/embedding/test-embedding",
        {
          method: "POST",
          headers: { "Content-Type": "application/json" },
          body: JSON.stringify(testEmbeddingPayload),
        }
      );

      if (!initialResponse.ok) {
        const errorMsg = (await initialResponse.json()).detail;
        setErrorMsg(errorMsg);
        setSubmitting(false);
        return;
      }

      const response = await fetch(EMBEDDING_PROVIDERS_ADMIN_URL, {
        method: "PUT",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          ...values,
          api_version: values.api_version,
          deployment_name: values.deployment_name,
          provider_type: values.provider_type.toLowerCase().split(" ")[0],
          custom_config: customConfig,
          is_default_provider: false,
          is_configured: true,
        }),
      });

      if (isAzure) {
        updateCurrentModel(values.model_name, EmbeddingProvider.AZURE);
      }

      if (!response.ok) {
        const errorData = await response.json();
        throw new Error(
          errorData.detail || "Failed to update provider- check your API key"
        );
      }

      onConfirm();
    } catch (error: unknown) {
      if (error instanceof Error) {
        setErrorMsg(error.message);
      } else {
        setErrorMsg("An unknown error occurred");
      }
    } finally {
      setSubmitting(false);
    }
  };

  return (
    <Modal open onOpenChange={onCancel}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header
          icon={SvgSettings}
          title={`Configure ${getFormattedProviderName(
            selectedProvider.provider_type
          )}`}
          onClose={onCancel}
        />
        <Modal.Body>
          <Formik
            initialValues={initialValues}
            validationSchema={validationSchema}
            onSubmit={handleSubmit}
          >
            {({ isSubmitting, handleSubmit, setFieldValue }) => (
              <Form onSubmit={handleSubmit} className="space-y-4">
                <Text as="p">
                  You are setting the credentials for this provider. To access
                  this information, follow the instructions{" "}
                  <a
                    className="cursor-pointer underline"
                    target="_blank"
                    href={selectedProvider.docsLink}
                    rel="noreferrer"
                  >
                    here
                  </a>{" "}
                  and gather your{" "}
                  <a
                    className="cursor-pointer underline"
                    target="_blank"
                    href={selectedProvider.apiLink}
                    rel="noreferrer"
                  >
                    {isProxy || isAzure ? "API URL" : "API KEY"}
                  </a>
                </Text>

                <div className="flex w-full flex-col gap-y-6">
                  {(isProxy || isAzure) && (
                    <TextFormField
                      name="api_url"
                      label="API URL"
                      placeholder="API URL"
                      type="text"
                    />
                  )}

                  {isProxy && (
                    <TextFormField
                      name="model_name"
                      label={`Model Name ${isProxy ? "(for testing)" : ""}`}
                      placeholder="Model Name"
                      type="text"
                    />
                  )}

                  {isAzure && (
                    <TextFormField
                      name="deployment_name"
                      label="Deployment Name"
                      placeholder="Deployment Name"
                      type="text"
                    />
                  )}

                  {isAzure && (
                    <TextFormField
                      name="api_version"
                      label="API Version"
                      placeholder="API Version"
                      type="text"
                    />
                  )}

                  {useFileUpload ? (
                    <>
                      <Label>Upload JSON File</Label>
                      <input
                        ref={fileInputRef}
                        type="file"
                        accept=".json"
                        onChange={(e) => handleFileUpload(e, setFieldValue)}
                        className="text-lg w-full p-1"
                      />
                      {fileName && <p>Uploaded file: {fileName}</p>}
                    </>
                  ) : (
                    <TextFormField
                      name="api_key"
                      label={`API Key ${
                        isProxy ? "(for non-local deployments)" : ""
                      }`}
                      placeholder="API Key"
                      type="password"
                    />
                  )}

                  <a
                    href={selectedProvider.apiLink}
                    target="_blank"
                    className="underline cursor-pointer"
                    rel="noreferrer"
                  >
                    Learn more here
                  </a>
                </div>

                {errorMsg && (
                  <Callout title="Error" type="danger">
                    {errorMsg}
                  </Callout>
                )}

                <Button
                  disabled={isSubmitting}
                  type="submit"
                  width="full"
                  icon={isSubmitting ? SimpleLoader : undefined}
                >
                  {isSubmitting
                    ? "Submitting"
                    : existingProvider
                      ? "Update"
                      : "Create"}
                </Button>
              </Form>
            )}
          </Formik>
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/embeddings/modals/SelectModelModal.tsx
================================================
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { CloudEmbeddingModel } from "@/components/embedding/interfaces";
import { SvgServer } from "@opal/icons";

export interface SelectModelModalProps {
  model: CloudEmbeddingModel;
  onConfirm: () => void;
  onCancel: () => void;
}

export default function SelectModelModal({
  model,
  onConfirm,
  onCancel,
}: SelectModelModalProps) {
  return (
    <Modal open onOpenChange={onCancel}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header
          icon={SvgServer}
          title={`Select ${model.model_name}`}
          onClose={onCancel}
        />
        <Modal.Body>
          <Text as="p">
            You&apos;re selecting a new embedding model,{" "}
            <strong>{model.model_name}</strong>. If you update to this model,
            you will need to undergo a complete re-indexing. Are you sure?
          </Text>
        </Modal.Body>
        <Modal.Footer>
          <Button onClick={onConfirm}>Confirm</Button>
          <Button prominence="secondary" onClick={onCancel}>
            Cancel
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/embeddings/page.tsx
================================================
"use client";

import { EmbeddingFormProvider } from "@/components/context/EmbeddingContext";
import EmbeddingSidebar from "../../../sections/sidebar/UpsertEmbeddingSidebar";
import EmbeddingForm from "./pages/EmbeddingFormPage";

export default function EmbeddingWrapper() {
  return (
    <EmbeddingFormProvider>
      <div className="flex justify-center w-full h-full">
        <EmbeddingSidebar />
        <div className="mt-12 w-full max-w-5xl mx-auto">
          <EmbeddingForm />
        </div>
      </div>
    </EmbeddingFormProvider>
  );
}


================================================
FILE: web/src/app/admin/embeddings/pages/AdvancedEmbeddingFormPage.tsx
================================================
import React, { forwardRef } from "react";
import { Formik, Form, FormikProps, FieldArray, Field } from "formik";
import * as Yup from "yup";
import {
  AdvancedSearchConfiguration,
  EmbeddingPrecision,
  LLMContextualCost,
} from "../interfaces";
import {
  BooleanFormField,
  Label,
  SubLabel,
  SelectorFormField,
} from "@/components/Field";
import NumberInput from "../../connectors/[connector]/pages/ConnectorInput/NumberInput";
import { StringOrNumberOption } from "@/components/Dropdown";
import useSWR from "swr";
import { LLM_CONTEXTUAL_COST_ADMIN_URL } from "@/lib/llmConfig/constants";
import { errorHandlingFetcher } from "@/lib/fetcher";
import Button from "@/refresh-components/buttons/Button";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { SvgPlusCircle, SvgTrash } from "@opal/icons";
// Number of tokens to show cost calculation for
const COST_CALCULATION_TOKENS = 1_000_000;

interface AdvancedEmbeddingFormPageProps {
  updateAdvancedEmbeddingDetails: (
    key: keyof AdvancedSearchConfiguration,
    value: any
  ) => void;
  advancedEmbeddingDetails: AdvancedSearchConfiguration;
  embeddingProviderType: string | null;
  onValidationChange?: (
    isValid: boolean,
    errors: Record<string, string>
  ) => void;
}

// Options for embedding precision based on EmbeddingPrecision enum
const embeddingPrecisionOptions: StringOrNumberOption[] = [
  { name: EmbeddingPrecision.BFLOAT16, value: EmbeddingPrecision.BFLOAT16 },
  { name: EmbeddingPrecision.FLOAT, value: EmbeddingPrecision.FLOAT },
];

const AdvancedEmbeddingFormPage = forwardRef<
  FormikProps<any>,
  AdvancedEmbeddingFormPageProps
>(
  (
    {
      updateAdvancedEmbeddingDetails,
      advancedEmbeddingDetails,
      embeddingProviderType,
      onValidationChange,
    },
    ref
  ) => {
    // Fetch contextual costs
    const { data: contextualCosts, error: costError } = useSWR<
      LLMContextualCost[]
    >(LLM_CONTEXTUAL_COST_ADMIN_URL, errorHandlingFetcher);

    const llmOptions: StringOrNumberOption[] = React.useMemo(
      () =>
        (contextualCosts || []).map((cost) => {
          return {
            // Use model_name as display - contextual costs don't have display_name field
            name: cost.model_name,
            value: cost.model_name,
          };
        }),
      [contextualCosts]
    );

    // Helper function to format cost as USD
    const formatCost = (cost: number) => {
      return new Intl.NumberFormat("en-US", {
        style: "currency",
        currency: "USD",
      }).format(cost);
    };

    // Get cost info for selected model
    const getSelectedModelCost = (modelName: string | null) => {
      if (!contextualCosts || !modelName) return null;
      return contextualCosts.find((cost) => cost.model_name === modelName);
    };

    // Get the current value for the selector based on the parent state
    const getCurrentLLMValue = React.useMemo(() => {
      if (!advancedEmbeddingDetails.contextual_rag_llm_name) return null;
      return advancedEmbeddingDetails.contextual_rag_llm_name;
    }, [advancedEmbeddingDetails.contextual_rag_llm_name]);

    return (
      <div className="py-4 rounded-lg max-w-4xl px-4 mx-auto">
        <Formik
          innerRef={ref}
          initialValues={{
            ...advancedEmbeddingDetails,
            contextual_rag_llm: getCurrentLLMValue,
          }}
          validationSchema={Yup.object().shape({
            multilingual_expansion: Yup.array().of(Yup.string()),
            multipass_indexing: Yup.boolean(),
            enable_contextual_rag: Yup.boolean(),
            contextual_rag_llm: Yup.string()
              .nullable()
              .test(
                "required-if-contextual-rag",
                "LLM must be selected when Contextual RAG is enabled",
                function (value) {
                  const enableContextualRag = this.parent.enable_contextual_rag;
                  console.log("enableContextualRag", enableContextualRag);
                  console.log("value", value);
                  return !enableContextualRag || value !== null;
                }
              ),
            embedding_precision: Yup.string().nullable(),
            reduced_dimension: Yup.number()
              .nullable()
              .test(
                "positive",
                "Must be larger than or equal to 256",
                (value) => value === null || value === undefined || value >= 256
              )
              .test(
                "openai",
                "Reduced Dimensions is only supported for OpenAI embedding models",
                (value) => {
                  return embeddingProviderType === "openai" || value === null;
                }
              ),
          })}
          onSubmit={async (_, { setSubmitting }) => {
            setSubmitting(false);
          }}
          validate={(values) => {
            // Call updateAdvancedEmbeddingDetails for each changed field
            Object.entries(values).forEach(([key, value]) => {
              if (key === "contextual_rag_llm") {
                const selectedModel = (contextualCosts || []).find(
                  (cost) => cost.model_name === value
                );
                if (selectedModel) {
                  updateAdvancedEmbeddingDetails(
                    "contextual_rag_llm_provider",
                    selectedModel.provider
                  );
                  updateAdvancedEmbeddingDetails(
                    "contextual_rag_llm_name",
                    selectedModel.model_name
                  );
                }
              } else {
                updateAdvancedEmbeddingDetails(
                  key as keyof AdvancedSearchConfiguration,
                  value
                );
              }
            });

            // Run validation and report errors
            if (onValidationChange) {
              // We'll return an empty object here since Yup will handle the actual validation
              // But we need to check if there are any validation errors
              const errors: Record<string, string> = {};
              try {
                // Manually validate against the schema
                Yup.object()
                  .shape({
                    multilingual_expansion: Yup.array().of(Yup.string()),
                    multipass_indexing: Yup.boolean(),
                    enable_contextual_rag: Yup.boolean(),
                    contextual_rag_llm: Yup.string()
                      .nullable()
                      .test(
                        "required-if-contextual-rag",
                        "LLM must be selected when Contextual RAG is enabled",
                        function (value) {
                          const enableContextualRag =
                            this.parent.enable_contextual_rag;
                          return !enableContextualRag || value !== null;
                        }
                      ),
                    embedding_precision: Yup.string().nullable(),
                    reduced_dimension: Yup.number()
                      .nullable()
                      .test(
                        "positive",
                        "Must be larger than or equal to 256",
                        (value) =>
                          value === null || value === undefined || value >= 256
                      )
                      .test(
                        "openai",
                        "Reduced Dimensions is only supported for OpenAI embedding models",
                        (value) => {
                          return (
                            embeddingProviderType === "openai" || value === null
                          );
                        }
                      ),
                  })
                  .validateSync(values, { abortEarly: false });
                onValidationChange(true, {});
              } catch (validationError) {
                if (validationError instanceof Yup.ValidationError) {
                  validationError.inner.forEach((err) => {
                    if (err.path) {
                      errors[err.path] = err.message;
                    }
                  });
                  onValidationChange(false, errors);
                }
              }
            }

            return {}; // Return empty object as Formik will handle the errors
          }}
          enableReinitialize={true}
        >
          {({ values }) => (
            <Form>
              <BooleanFormField
                subtext="Enable multipass indexing for both mini and large chunks."
                optional
                label="Multipass Indexing"
                name="multipass_indexing"
              />
              <BooleanFormField
                subtext={
                  NEXT_PUBLIC_CLOUD_ENABLED
                    ? "Contextual RAG disabled in Onyx Cloud"
                    : "Enable contextual RAG for all chunk sizes."
                }
                optional
                label="Contextual RAG"
                name="enable_contextual_rag"
                disabled={NEXT_PUBLIC_CLOUD_ENABLED}
              />
              <div>
                <SelectorFormField
                  name="contextual_rag_llm"
                  label="Contextual RAG LLM"
                  subtext={
                    costError
                      ? "Error loading LLM models. Please try again later."
                      : !contextualCosts
                        ? "Loading available LLM models..."
                        : values.enable_contextual_rag
                          ? "Select the LLM model to use for contextual RAG processing."
                          : "Enable Contextual RAG above to select an LLM model."
                  }
                  options={llmOptions}
                  disabled={
                    !values.enable_contextual_rag ||
                    !contextualCosts ||
                    !!costError
                  }
                />
                {values.enable_contextual_rag &&
                  values.contextual_rag_llm &&
                  !costError && (
                    <div className="mt-2 text-sm text-text-600">
                      {contextualCosts ? (
                        <>
                          Estimated cost for processing{" "}
                          {COST_CALCULATION_TOKENS.toLocaleString()} tokens:{" "}
                          <span className="font-medium">
                            {getSelectedModelCost(values.contextual_rag_llm)
                              ? formatCost(
                                  getSelectedModelCost(
                                    values.contextual_rag_llm
                                  )!.cost
                                )
                              : "Cost information not available"}
                          </span>
                        </>
                      ) : (
                        "Loading cost information..."
                      )}
                    </div>
                  )}
              </div>
              <SelectorFormField
                name="embedding_precision"
                label="Embedding Precision"
                options={embeddingPrecisionOptions}
                subtext="Select the precision for embedding vectors. Lower precision uses less storage but may reduce accuracy."
              />

              <NumberInput
                description="Number of dimensions to reduce the embedding to.
              Will reduce memory usage but may reduce accuracy.
              If not specified, will just use the selected model's default dimensionality without any reduction.
              Currently only supported for OpenAI embedding models"
                optional={true}
                label="Reduced Dimension"
                name="reduced_dimension"
              />
            </Form>
          )}
        </Formik>
      </div>
    );
  }
);
export default AdvancedEmbeddingFormPage;

AdvancedEmbeddingFormPage.displayName = "AdvancedEmbeddingFormPage";


================================================
FILE: web/src/app/admin/embeddings/pages/CloudEmbeddingPage.tsx
================================================
"use client";

import { Text } from "@opal/components";
import { markdown } from "@opal/utils";
import Spacer from "@/refresh-components/Spacer";
import Title from "@/components/ui/title";
import {
  CloudEmbeddingProvider,
  CloudEmbeddingModel,
  AVAILABLE_CLOUD_PROVIDERS,
  CloudEmbeddingProviderFull,
  EmbeddingModelDescriptor,
  EmbeddingProvider,
  LITELLM_CLOUD_PROVIDER,
  AZURE_CLOUD_PROVIDER,
  getFormattedProviderName,
} from "../../../../components/embedding/interfaces";
import { EmbeddingDetails } from "../EmbeddingModelSelectionForm";
import { FiExternalLink, FiInfo, FiTrash } from "react-icons/fi";
import { HoverPopup } from "@/components/HoverPopup";
import { Dispatch, SetStateAction, useEffect, useState } from "react";
import { CustomEmbeddingModelForm } from "@/components/embedding/CustomEmbeddingModelForm";
import { deleteSearchSettings } from "./utils";
import { toast } from "@/hooks/useToast";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import { AdvancedSearchConfiguration } from "../interfaces";
import CardSection from "@/components/admin/CardSection";

export default function CloudEmbeddingPage({
  currentModel,
  embeddingProviderDetails,
  embeddingModelDetails,
  setShowTentativeProvider,
  setChangeCredentialsProvider,
  setAlreadySelectedModel,
  setShowTentativeModel,
  setShowModelInQueue,
  advancedEmbeddingDetails,
}: {
  setShowModelInQueue: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;
  setShowTentativeModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;
  currentModel: EmbeddingModelDescriptor | CloudEmbeddingModel;
  setAlreadySelectedModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;
  embeddingModelDetails?: CloudEmbeddingModel[];
  embeddingProviderDetails?: EmbeddingDetails[];
  setShowTentativeProvider: React.Dispatch<
    React.SetStateAction<CloudEmbeddingProvider | null>
  >;
  setChangeCredentialsProvider: React.Dispatch<
    React.SetStateAction<CloudEmbeddingProvider | null>
  >;
  advancedEmbeddingDetails: AdvancedSearchConfiguration;
}) {
  function hasProviderTypeinArray(
    arr: Array<{ provider_type: string }>,
    searchName: string
  ): boolean {
    return arr.some(
      (item) => item.provider_type.toLowerCase() === searchName.toLowerCase()
    );
  }

  const providers: CloudEmbeddingProviderFull[] = AVAILABLE_CLOUD_PROVIDERS.map(
    (model) => ({
      ...model,
      configured:
        embeddingProviderDetails &&
        hasProviderTypeinArray(embeddingProviderDetails, model.provider_type),
    })
  );
  const [liteLLMProvider, setLiteLLMProvider] = useState<
    EmbeddingDetails | undefined
  >(undefined);

  const [azureProvider, setAzureProvider] = useState<
    EmbeddingDetails | undefined
  >(undefined);

  useEffect(() => {
    const liteLLMProvider = embeddingProviderDetails?.find(
      (provider) =>
        provider.provider_type === EmbeddingProvider.LITELLM.toLowerCase()
    );
    setLiteLLMProvider(liteLLMProvider);
    const azureProvider = embeddingProviderDetails?.find(
      (provider) =>
        provider.provider_type === EmbeddingProvider.AZURE.toLowerCase()
    );
    setAzureProvider(azureProvider);
  }, [embeddingProviderDetails]);

  const isAzureConfigured = azureProvider !== undefined;

  // Get details of the configured Azure provider
  const azureProviderDetails = embeddingProviderDetails?.find(
    (provider) => provider.provider_type.toLowerCase() === "azure"
  );

  return (
    <div>
      <Title className="mt-8">
        Here are some cloud-based models to choose from.
      </Title>
      <Text as="p">
        {
          "These models require API keys and run in the clouds of the respective providers."
        }
      </Text>
      <Spacer rem={1} />

      <div className="gap-4 mt-2 pb-10 flex content-start flex-wrap">
        {providers.map((provider) => (
          <div key={provider.provider_type} className="mt-4 w-full">
            <div className="flex items-center mb-2">
              {provider.icon({ size: 40 })}
              <h2 className="ml-2  mt-2 text-xl font-bold">
                {getFormattedProviderName(provider.provider_type)}{" "}
                {provider.provider_type == EmbeddingProvider.COHERE &&
                  "(recommended)"}
              </h2>
              <HoverPopup
                mainContent={
                  <FiInfo className="ml-2 mt-2 cursor-pointer" size={18} />
                }
                popupContent={
                  <div className="text-sm text-text-800 w-52">
                    <div className="my-auto">{provider.description}</div>
                  </div>
                }
                style="dark"
              />
            </div>

            <button
              onClick={() => {
                if (!provider.configured) {
                  setShowTentativeProvider(provider);
                } else {
                  setChangeCredentialsProvider(provider);
                }
              }}
              className="mb-2  hover:underline text-sm cursor-pointer"
            >
              {provider.configured ? "Modify API key" : "Provide API key"}
            </button>
            <div className="flex flex-wrap gap-4">
              {provider.embedding_models.map((model) => (
                <CloudModelCard
                  key={`${provider.provider_type}-${model.model_name}`}
                  model={model}
                  provider={provider}
                  currentModel={currentModel}
                  setAlreadySelectedModel={setAlreadySelectedModel}
                  setShowTentativeModel={setShowTentativeModel}
                  setShowModelInQueue={setShowModelInQueue}
                  setShowTentativeProvider={setShowTentativeProvider}
                />
              ))}
            </div>
          </div>
        ))}

        <Spacer rem={1.5} />
        <Text as="p">
          {markdown(
            "Alternatively, you can use a self-hosted model using the LiteLLM proxy. This allows you to leverage various LLM providers through a unified interface that you control. [Learn more about LiteLLM](https://docs.litellm.ai/)"
          )}
        </Text>

        <div key={LITELLM_CLOUD_PROVIDER.provider_type} className="mt-4 w-full">
          <div className="flex items-center mb-2">
            {LITELLM_CLOUD_PROVIDER.icon({ size: 40 })}
            <h2 className="ml-2  mt-2 text-xl font-bold">
              {getFormattedProviderName(LITELLM_CLOUD_PROVIDER.provider_type)}{" "}
              {LITELLM_CLOUD_PROVIDER.provider_type ==
                EmbeddingProvider.COHERE && "(recommended)"}
            </h2>
            <HoverPopup
              mainContent={
                <FiInfo className="ml-2 mt-2 cursor-pointer" size={18} />
              }
              popupContent={
                <div className="text-sm text-text-800 w-52">
                  <div className="my-auto">
                    {LITELLM_CLOUD_PROVIDER.description}
                  </div>
                </div>
              }
              style="dark"
            />
          </div>
          <div className="w-full flex flex-col items-start">
            {!liteLLMProvider ? (
              <button
                onClick={() => setShowTentativeProvider(LITELLM_CLOUD_PROVIDER)}
                className="mb-2 px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600 text-sm cursor-pointer"
              >
                Set API Configuration
              </button>
            ) : (
              <button
                onClick={() =>
                  setChangeCredentialsProvider(LITELLM_CLOUD_PROVIDER)
                }
                className="mb-2 hover:underline text-sm cursor-pointer"
              >
                Modify API Configuration
              </button>
            )}

            {!liteLLMProvider && (
              <CardSection className="mt-2 w-full max-w-4xl bg-background-50 border border-background-200">
                <div className="p-4">
                  <Text as="p" font="heading-h3">
                    API URL Required
                  </Text>
                  <Spacer rem={0.5} />
                  <Text as="p">
                    {
                      'Before you can add models, you need to provide an API URL for your LiteLLM proxy. Click the "Provide API URL" button above to set up your LiteLLM configuration.'
                    }
                  </Text>
                  <Spacer rem={1} />
                  <div className="flex items-center">
                    <FiInfo className="text-blue-500 mr-2" size={18} />
                    <span className="text-blue-500">
                      <Text as="p">
                        {
                          "Once configured, you'll be able to add and manage your LiteLLM models here."
                        }
                      </Text>
                    </span>
                  </div>
                </div>
              </CardSection>
            )}
            {liteLLMProvider && (
              <>
                <div className="flex mb-4 flex-wrap gap-4">
                  {embeddingModelDetails
                    ?.filter(
                      (model) =>
                        model.provider_type ===
                        EmbeddingProvider.LITELLM.toLowerCase()
                    )
                    .map((model, index) => (
                      <CloudModelCard
                        key={
                          model.id ??
                          `${model.provider_type}-${model.model_name}-${index}`
                        }
                        model={model}
                        provider={LITELLM_CLOUD_PROVIDER}
                        currentModel={currentModel}
                        setAlreadySelectedModel={setAlreadySelectedModel}
                        setShowTentativeModel={setShowTentativeModel}
                        setShowModelInQueue={setShowModelInQueue}
                        setShowTentativeProvider={setShowTentativeProvider}
                      />
                    ))}
                </div>

                <CardSection
                  className={`mt-2 w-full max-w-4xl ${
                    currentModel.provider_type === EmbeddingProvider.LITELLM
                      ? "border-2 border-blue-500"
                      : ""
                  }`}
                >
                  <CustomEmbeddingModelForm
                    embeddingType={EmbeddingProvider.LITELLM}
                    provider={liteLLMProvider}
                    currentValues={
                      currentModel.provider_type === EmbeddingProvider.LITELLM
                        ? (currentModel as CloudEmbeddingModel)
                        : null
                    }
                    setShowTentativeModel={setShowTentativeModel}
                  />
                </CardSection>
              </>
            )}
          </div>
        </div>

        <Spacer rem={1.5} />
        <Text as="p">
          {
            "You can also use Azure OpenAI models for embeddings. Azure requires separate configuration for each model."
          }
        </Text>

        <div key={AZURE_CLOUD_PROVIDER.provider_type} className="mt-4 w-full">
          <div className="flex items-center mb-2">
            {AZURE_CLOUD_PROVIDER.icon({ size: 40 })}
            <h2 className="ml-2  mt-2 text-xl font-bold">
              {getFormattedProviderName(AZURE_CLOUD_PROVIDER.provider_type)}{" "}
            </h2>
            <HoverPopup
              mainContent={
                <FiInfo className="ml-2 mt-2 cursor-pointer" size={18} />
              }
              popupContent={
                <div className="text-sm text-text-800 w-52">
                  <div className="my-auto">
                    {AZURE_CLOUD_PROVIDER.description}
                  </div>
                </div>
              }
              style="dark"
            />
          </div>
        </div>

        <div className="w-full flex flex-col items-start">
          {!isAzureConfigured ? (
            <>
              <button
                onClick={() => setShowTentativeProvider(AZURE_CLOUD_PROVIDER)}
                className="mb-2 px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600 text-sm cursor-pointer"
              >
                Configure Azure OpenAI
              </button>
              <div className="mt-2 w-full max-w-4xl">
                <CardSection className="p-4 border border-background-200 rounded-lg shadow-sm">
                  <Text as="p" font="main-ui-action">
                    Configure Azure OpenAI for Embeddings
                  </Text>
                  <Spacer rem={0.5} />
                  <Text as="p">
                    {
                      'Click "Configure Azure OpenAI" to set up Azure OpenAI for embeddings.'
                    }
                  </Text>
                  <Spacer rem={0.75} />
                  <div className="flex items-center">
                    <FiInfo className="text-neutral-400 mr-2" size={16} />
                    <Text as="p">
                      {
                        "You'll need: API version, base URL, API key, model name, and deployment name."
                      }
                    </Text>
                  </div>
                </CardSection>
              </div>
            </>
          ) : (
            <>
              <div className="mb-6 w-full">
                <Text as="p" font="heading-h3">
                  Current Azure Configuration
                </Text>
                <Spacer rem={0.75} />

                {azureProviderDetails ? (
                  <CardSection className="bg-white shadow-sm border border-background-200 rounded-lg">
                    <div className="p-4 space-y-3">
                      <div className="flex justify-between">
                        <span className="font-medium">API Version:</span>
                        <span>{azureProviderDetails.api_version}</span>
                      </div>
                      <div className="flex justify-between">
                        <span className="font-medium">Base URL:</span>
                        <span>{azureProviderDetails.api_url}</span>
                      </div>
                      <div className="flex justify-between">
                        <span className="font-medium">Deployment Name:</span>
                        <span>{azureProviderDetails.deployment_name}</span>
                      </div>
                    </div>
                    <button
                      onClick={() =>
                        setChangeCredentialsProvider(AZURE_CLOUD_PROVIDER)
                      }
                      className="mt-2 px-4 py-2 bg-red-500 text-white rounded hover:bg-red-600 text-sm"
                    >
                      Delete Current Azure Provider
                    </button>
                  </CardSection>
                ) : (
                  <CardSection className="bg-background-50 border border-background-200 rounded-lg">
                    <div className="p-4 text-text-500 text-center">
                      No Azure provider has been configured yet.
                    </div>
                  </CardSection>
                )}
              </div>

              <CardSection
                className={`mt-2 w-full max-w-4xl ${
                  currentModel.provider_type === EmbeddingProvider.AZURE
                    ? "border-2 border-blue-500"
                    : ""
                }`}
              >
                {azureProvider && (
                  <CustomEmbeddingModelForm
                    embeddingType={EmbeddingProvider.AZURE}
                    provider={azureProvider}
                    currentValues={
                      currentModel.provider_type === EmbeddingProvider.AZURE
                        ? (currentModel as CloudEmbeddingModel)
                        : null
                    }
                    setShowTentativeModel={setShowTentativeModel}
                  />
                )}
              </CardSection>
            </>
          )}
        </div>
      </div>
    </div>
  );
}

export function CloudModelCard({
  model,
  provider,
  currentModel,
  setAlreadySelectedModel,
  setShowTentativeModel,
  setShowModelInQueue,
  setShowTentativeProvider,
}: {
  model: CloudEmbeddingModel;
  provider: CloudEmbeddingProviderFull;
  currentModel: EmbeddingModelDescriptor | CloudEmbeddingModel;
  setAlreadySelectedModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;
  setShowTentativeModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;
  setShowModelInQueue: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;
  setShowTentativeProvider: React.Dispatch<
    React.SetStateAction<CloudEmbeddingProvider | null>
  >;
}) {
  const [showDeleteModel, setShowDeleteModel] = useState(false);
  const modelId = typeof model.id === "number" ? model.id : null;
  const currentModelId =
    typeof currentModel.id === "number" ? currentModel.id : null;

  const idsMatch =
    modelId !== null && currentModelId !== null && modelId === currentModelId;

  const shouldCompareNames = modelId === null || currentModelId === null;

  const namesMatch =
    shouldCompareNames &&
    model.model_name === currentModel.model_name &&
    model.provider_type?.toLowerCase() ===
      currentModel.provider_type?.toLowerCase();

  const enabled = idsMatch || namesMatch;

  const deleteModel = async () => {
    if (!model.id) {
      toast.error("Model cannot be deleted");
      return;
    }

    const response = await deleteSearchSettings(model.id);

    if (response.ok) {
      toast.success("Model deleted successfully");
      setShowDeleteModel(false);
    } else {
      toast.error(
        "Failed to delete model. Ensure you are not attempting to delete a curently active model."
      );
    }
  };

  return (
    <div
      className={`p-4 w-96 border rounded-lg transition-all duration-200 ${
        enabled
          ? "border-blue-500 bg-blue-50 dark:bg-blue-950 shadow-md"
          : "border-background-300 hover:border-blue-300 hover:shadow-sm"
      } ${!provider.configured && "opacity-80 hover:opacity-100"}`}
    >
      {showDeleteModel && (
        <ConfirmEntityModal
          entityName={model.model_name}
          entityType="embedding model configuration"
          onSubmit={() => deleteModel()}
          onClose={() => setShowDeleteModel(false)}
        />
      )}

      <div className="flex items-center justify-between mb-3">
        <h3 className="font-bold dark:text-neutral-100 text-lg">
          {model.model_name}
        </h3>
        <div className="flex gap-x-2">
          {model.provider_type == EmbeddingProvider.LITELLM.toLowerCase() && (
            <button
              onClickCapture={() => setShowDeleteModel(true)}
              onClick={(e) => e.stopPropagation()}
              className="text-blue-500 hover:text-blue-700 transition-colors duration-200"
            >
              <FiTrash size={18} />
            </button>
          )}
          <a
            href={provider.website}
            target="_blank"
            rel="noopener noreferrer"
            onClick={(e) => e.stopPropagation()}
            className="text-blue-500 hover:text-blue-700 transition-colors duration-200"
          >
            <FiExternalLink size={18} />
          </a>
        </div>
      </div>
      <p className="text-sm text-text-600 dark:text-neutral-400 mb-2">
        {model.description}
      </p>
      {model?.provider_type?.toLowerCase() !=
        EmbeddingProvider.LITELLM.toLowerCase() && (
        <div className="text-xs text-text-500 mb-2">
          ${model.pricePerMillion}/M tokens
        </div>
      )}
      <div className="mt-3">
        <button
          className={`w-full p-2 rounded-lg text-sm ${
            enabled
              ? "bg-background-125 border border-border cursor-not-allowed"
              : "bg-background border border-border hover:bg-accent-background-hovered cursor-pointer"
          }`}
          onClick={() => {
            if (enabled) {
              setAlreadySelectedModel(model);
            } else if (
              provider.configured ||
              provider.provider_type === EmbeddingProvider.LITELLM
            ) {
              setShowTentativeModel(model);
            } else {
              setShowModelInQueue(model);
              setShowTentativeProvider(provider);
            }
          }}
          disabled={enabled}
        >
          {enabled ? "Selected Model" : "Select Model"}
        </button>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx
================================================
"use client";

import { toast } from "@/hooks/useToast";

import EmbeddingModelSelection from "../EmbeddingModelSelectionForm";
import { useCallback, useEffect, useMemo, useState, useRef } from "react";
import Text from "@/refresh-components/texts/Text";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import { WarningCircle, Warning, CaretDownIcon } from "@phosphor-icons/react";
import {
  CloudEmbeddingModel,
  EmbeddingProvider,
  HostedEmbeddingModel,
} from "@/components/embedding/interfaces";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ErrorCallout } from "@/components/ErrorCallout";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { ThreeDotsLoader } from "@/components/Loading";
import AdvancedEmbeddingFormPage from "./AdvancedEmbeddingFormPage";
import {
  AdvancedSearchConfiguration,
  EmbeddingPrecision,
  RerankingDetails,
  SavedSearchSettings,
  SwitchoverType,
} from "../interfaces";
import RerankingDetailsForm from "../RerankingFormPage";
import { useEmbeddingFormContext } from "@/components/context/EmbeddingContext";
import Modal from "@/refresh-components/Modal";
import InstantSwitchConfirmModal from "../modals/InstantSwitchConfirmModal";
import { useRouter } from "next/navigation";
import CardSection from "@/components/admin/CardSection";
import { combineSearchSettings } from "./utils";
import {
  DropdownMenu,
  DropdownMenuContent,
  DropdownMenuItem,
  DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { SvgAlertTriangle, SvgArrowLeft, SvgArrowRight } from "@opal/icons";
export default function EmbeddingForm() {
  const { formStep, nextFormStep, prevFormStep } = useEmbeddingFormContext();
  const router = useRouter();

  const [advancedEmbeddingDetails, setAdvancedEmbeddingDetails] =
    useState<AdvancedSearchConfiguration>({
      index_name: "",
      multipass_indexing: true,
      enable_contextual_rag: false,
      contextual_rag_llm_name: null,
      contextual_rag_llm_provider: null,
      multilingual_expansion: [],
      disable_rerank_for_streaming: false,
      api_url: null,
      num_rerank: 0,
      embedding_precision: EmbeddingPrecision.BFLOAT16,
      reduced_dimension: null,
    });

  const [rerankingDetails, setRerankingDetails] = useState<RerankingDetails>({
    rerank_api_key: "",
    rerank_provider_type: null,
    rerank_model_name: "",
    rerank_api_url: null,
  });

  const [switchoverType, setSwitchoverType] = useState<SwitchoverType>(
    SwitchoverType.REINDEX
  );

  const [formErrors, setFormErrors] = useState<Record<string, string>>({});
  const [isFormValid, setIsFormValid] = useState(true);
  const [rerankFormErrors, setRerankFormErrors] = useState<
    Record<string, string>
  >({});
  const [isRerankFormValid, setIsRerankFormValid] = useState(true);
  const advancedFormRef = useRef(null);
  const rerankFormRef = useRef(null);

  const updateAdvancedEmbeddingDetails = (
    key: keyof AdvancedSearchConfiguration,
    value: any
  ) => {
    setAdvancedEmbeddingDetails((values) => ({ ...values, [key]: value }));
  };

  async function updateSearchSettings(searchSettings: SavedSearchSettings) {
    const response = await fetch(
      "/api/search-settings/update-inference-settings",
      {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          ...searchSettings,
        }),
      }
    );
    return response;
  }

  const updateSelectedProvider = (
    model: CloudEmbeddingModel | HostedEmbeddingModel
  ) => {
    setSelectedProvider(model);
  };
  const [displayPoorModelName, setDisplayPoorModelName] = useState(true);
  const [showPoorModel, setShowPoorModel] = useState(false);
  const [showInstantSwitchConfirm, setShowInstantSwitchConfirm] =
    useState(false);
  const [modelTab, setModelTab] = useState<"open" | "cloud" | null>(null);

  const {
    data: currentEmbeddingModel,
    isLoading: isLoadingCurrentModel,
    error: currentEmbeddingModelError,
  } = useSWR<CloudEmbeddingModel | HostedEmbeddingModel | null>(
    SWR_KEYS.currentSearchSettings,
    errorHandlingFetcher,
    { refreshInterval: 5000 } // 5 seconds
  );

  const [selectedProvider, setSelectedProvider] = useState<
    CloudEmbeddingModel | HostedEmbeddingModel | null
  >(currentEmbeddingModel!);

  const { data: searchSettings, isLoading: isLoadingSearchSettings } =
    useSWR<SavedSearchSettings | null>(
      SWR_KEYS.currentSearchSettings,
      errorHandlingFetcher,
      { refreshInterval: 5000 } // 5 seconds
    );

  useEffect(() => {
    if (searchSettings) {
      setAdvancedEmbeddingDetails({
        index_name: searchSettings.index_name,
        multipass_indexing: searchSettings.multipass_indexing,
        enable_contextual_rag: searchSettings.enable_contextual_rag,
        contextual_rag_llm_name: searchSettings.contextual_rag_llm_name,
        contextual_rag_llm_provider: searchSettings.contextual_rag_llm_provider,
        multilingual_expansion: searchSettings.multilingual_expansion,
        disable_rerank_for_streaming:
          searchSettings.disable_rerank_for_streaming,
        num_rerank: searchSettings.num_rerank,
        api_url: null,
        embedding_precision: searchSettings.embedding_precision,
        reduced_dimension: searchSettings.reduced_dimension,
      });

      setRerankingDetails({
        rerank_api_key: searchSettings.rerank_api_key,
        rerank_provider_type: searchSettings.rerank_provider_type,
        rerank_model_name: searchSettings.rerank_model_name,
        rerank_api_url: searchSettings.rerank_api_url,
      });
    }
  }, [searchSettings]);

  const originalRerankingDetails: RerankingDetails = searchSettings
    ? {
        rerank_api_key: searchSettings.rerank_api_key,
        rerank_provider_type: searchSettings.rerank_provider_type,
        rerank_model_name: searchSettings.rerank_model_name,
        rerank_api_url: searchSettings.rerank_api_url,
      }
    : {
        rerank_api_key: "",
        rerank_provider_type: null,
        rerank_model_name: "",
        rerank_api_url: null,
      };

  useEffect(() => {
    if (currentEmbeddingModel) {
      setSelectedProvider(currentEmbeddingModel);
    }
  }, [currentEmbeddingModel]);

  const needsReIndex =
    currentEmbeddingModel != selectedProvider ||
    searchSettings?.multipass_indexing !=
      advancedEmbeddingDetails.multipass_indexing ||
    searchSettings?.embedding_precision !=
      advancedEmbeddingDetails.embedding_precision ||
    searchSettings?.reduced_dimension !=
      advancedEmbeddingDetails.reduced_dimension ||
    searchSettings?.enable_contextual_rag !=
      advancedEmbeddingDetails.enable_contextual_rag;

  const updateSearch = useCallback(async () => {
    if (!selectedProvider) {
      return false;
    }
    const searchSettings = combineSearchSettings(
      selectedProvider,
      advancedEmbeddingDetails,
      rerankingDetails,
      selectedProvider.provider_type?.toLowerCase() as EmbeddingProvider | null,
      switchoverType
    );

    const response = await updateSearchSettings(searchSettings);
    if (response.ok) {
      return true;
    } else {
      toast.error("Failed to update search settings");
      return false;
    }
  }, [
    selectedProvider,
    advancedEmbeddingDetails,
    rerankingDetails,
    switchoverType,
  ]);

  const handleValidationChange = useCallback(
    (isValid: boolean, errors: Record<string, string>) => {
      setIsFormValid(isValid);
      setFormErrors(errors);
    },
    []
  );

  const handleRerankValidationChange = useCallback(
    (isValid: boolean, errors: Record<string, string>) => {
      setIsRerankFormValid(isValid);
      setRerankFormErrors(errors);
    },
    []
  );

  // Combine validation states for both forms
  const isOverallFormValid = isFormValid && isRerankFormValid;
  const combinedFormErrors = useMemo(() => {
    return { ...formErrors, ...rerankFormErrors };
  }, [formErrors, rerankFormErrors]);

  const ReIndexingButton = useMemo(() => {
    const ReIndexingButtonComponent = ({
      needsReIndex,
    }: {
      needsReIndex: boolean;
    }) => {
      return needsReIndex ? (
        <div className="flex mx-auto gap-x-1 ml-auto items-center">
          <div className="flex items-center h-fit">
            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
            <Button
              onClick={() => {
                if (switchoverType == SwitchoverType.INSTANT) {
                  setShowInstantSwitchConfirm(true);
                } else {
                  handleReIndex();
                  navigateToEmbeddingPage("search settings");
                }
              }}
              disabled={!isOverallFormValid}
              action
              className="rounded-r-none w-32 h-full"
            >
              {switchoverType == SwitchoverType.REINDEX
                ? "Re-index"
                : switchoverType == SwitchoverType.ACTIVE_ONLY
                  ? "Active Only"
                  : "Instant Switch"}
            </Button>
            <DropdownMenu>
              <DropdownMenuTrigger asChild>
                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
                <Button
                  disabled={!isOverallFormValid}
                  action
                  className="rounded-l-none border-l border-white/20 px-1 h-[36px] w-[30px] min-w-[30px]"
                >
                  <CaretDownIcon className="text-text-inverted-05" />
                </Button>
              </DropdownMenuTrigger>
              <DropdownMenuContent>
                <DropdownMenuItem
                  onClick={() => {
                    setSwitchoverType(SwitchoverType.REINDEX);
                  }}
                >
                  <SimpleTooltip tooltip="Re-runs all connectors in the background before switching over. Takes longer but ensures no degredation of search during the switch.">
                    <span className="w-full text-left">
                      (Recommended) Re-index
                    </span>
                  </SimpleTooltip>
                </DropdownMenuItem>
                <DropdownMenuItem
                  onClick={() => {
                    setSwitchoverType(SwitchoverType.ACTIVE_ONLY);
                  }}
                >
                  <SimpleTooltip tooltip="Re-runs only active (non-paused) connectors in the background before switching over. Paused connectors won't block the switchover.">
                    <span className="w-full text-left">
                      Active Connectors Only
                    </span>
                  </SimpleTooltip>
                </DropdownMenuItem>
                <DropdownMenuItem
                  onClick={() => {
                    setSwitchoverType(SwitchoverType.INSTANT);
                  }}
                >
                  <SimpleTooltip tooltip="Immediately switches to new settings without re-indexing. Searches will be degraded until the re-indexing is complete.">
                    <span className="w-full text-left">Instant Switch</span>
                  </SimpleTooltip>
                </DropdownMenuItem>
              </DropdownMenuContent>
            </DropdownMenu>
          </div>
          {isOverallFormValid && (
            <div className="relative group">
              <WarningCircle
                className="text-text-800 cursor-help"
                size={20}
                weight="fill"
              />
              <div className="absolute z-10 invisible group-hover:visible bg-background-800 text-text-200 text-sm rounded-md shadow-md p-2 right-0 mt-1 w-64">
                <p className="font-semibold mb-2">Needs re-indexing due to:</p>
                <ul className="list-disc pl-5">
                  {currentEmbeddingModel != selectedProvider && (
                    <li>Changed embedding provider</li>
                  )}
                  {searchSettings?.multipass_indexing !=
                    advancedEmbeddingDetails.multipass_indexing && (
                    <li>Multipass indexing modification</li>
                  )}
                  {searchSettings?.embedding_precision !=
                    advancedEmbeddingDetails.embedding_precision && (
                    <li>Embedding precision modification</li>
                  )}
                  {searchSettings?.reduced_dimension !=
                    advancedEmbeddingDetails.reduced_dimension && (
                    <li>Reduced dimension modification</li>
                  )}
                  {(searchSettings?.enable_contextual_rag !=
                    advancedEmbeddingDetails.enable_contextual_rag ||
                    searchSettings?.contextual_rag_llm_name !=
                      advancedEmbeddingDetails.contextual_rag_llm_name ||
                    searchSettings?.contextual_rag_llm_provider !=
                      advancedEmbeddingDetails.contextual_rag_llm_provider) && (
                    <li>Contextual RAG modification</li>
                  )}
                </ul>
              </div>
            </div>
          )}
          {!isOverallFormValid &&
            Object.keys(combinedFormErrors).length > 0 && (
              <div className="relative group">
                <Warning
                  className="text-red-500 cursor-help"
                  size={20}
                  weight="fill"
                />
                <div className="absolute z-10 invisible group-hover:visible bg-background-800 text-text-200 text-sm rounded-md shadow-md p-2 right-0 mt-1 w-64">
                  <p className="font-semibold mb-2">Validation Errors:</p>
                  <ul className="list-disc pl-5">
                    {Object.entries(combinedFormErrors).map(
                      ([field, error]) => (
                        <li key={field}>
                          {field}: {error}
                        </li>
                      )
                    )}
                  </ul>
                </div>
              </div>
            )}
        </div>
      ) : (
        <div className="flex mx-auto gap-x-1 ml-auto items-center">
          <OpalButton
            disabled={!isOverallFormValid}
            onClick={() => {
              updateSearch();
              navigateToEmbeddingPage("search settings");
            }}
          >
            Update Search
          </OpalButton>
          {!isOverallFormValid &&
            Object.keys(combinedFormErrors).length > 0 && (
              <div className="relative group">
                <Warning
                  className="text-red-500 cursor-help"
                  size={20}
                  weight="fill"
                />
                <div className="absolute z-10 invisible group-hover:visible bg-background-800 text-text-200 text-sm rounded-md shadow-md p-2 right-0 mt-1 w-64">
                  <p className="font-semibold mb-2 text-red-400">
                    Validation Errors:
                  </p>
                  <ul className="list-disc pl-5">
                    {Object.entries(combinedFormErrors).map(
                      ([field, error]) => (
                        <li key={field}>{error}</li>
                      )
                    )}
                  </ul>
                </div>
              </div>
            )}
        </div>
      );
    };
    ReIndexingButtonComponent.displayName = "ReIndexingButton";
    return ReIndexingButtonComponent;
  }, [needsReIndex, switchoverType, isOverallFormValid, combinedFormErrors]);

  if (!selectedProvider) {
    return <ThreeDotsLoader />;
  }
  if (currentEmbeddingModelError || !currentEmbeddingModel) {
    return <ErrorCallout errorTitle="Failed to fetch embedding model status" />;
  }

  const updateCurrentModel = (newModel: string) => {
    setAdvancedEmbeddingDetails((values) => ({
      ...values,
      model_name: newModel,
    }));
  };

  const navigateToEmbeddingPage = (changedResource: string) => {
    router.push("/admin/configuration/search?message=search-settings");
  };

  const handleReIndex = async () => {
    if (!selectedProvider) {
      return;
    }
    let searchSettings: SavedSearchSettings;

    if (selectedProvider.provider_type != null) {
      // This is a cloud model
      searchSettings = combineSearchSettings(
        selectedProvider,
        advancedEmbeddingDetails,
        rerankingDetails,
        selectedProvider.provider_type
          ?.toLowerCase()
          .split(" ")[0] as EmbeddingProvider | null,
        switchoverType
      );
    } else {
      // This is a locally hosted model
      searchSettings = combineSearchSettings(
        selectedProvider,
        advancedEmbeddingDetails,
        rerankingDetails,
        null,
        switchoverType
      );
    }

    searchSettings.index_name = null;

    const response = await fetch(
      "/api/search-settings/set-new-search-settings",
      {
        method: "POST",
        body: JSON.stringify(searchSettings),
        headers: {
          "Content-Type": "application/json",
        },
      }
    );

    if (response.ok) {
      navigateToEmbeddingPage("embedding model");
    } else {
      toast.error("Failed to update embedding model");

      alert(`Failed to update embedding model - ${await response.text()}`);
    }
  };

  return (
    <div className="mx-auto mb-8 w-full">
      <div className="mx-auto max-w-4xl">
        {formStep == 0 && (
          <>
            <h2 className="text-2xl font-bold mb-4 text-text-800">
              Select an Embedding Model
            </h2>
            <Text as="p" className="mb-4">
              Note that updating the backing model will require a complete
              re-indexing of all documents across every connected source. This
              is taken care of in the background so that the system can continue
              to be used, but depending on the size of the corpus, this could
              take hours or days. You can monitor the progress of the
              re-indexing on this page while the models are being switched.
            </Text>
            <CardSection>
              <EmbeddingModelSelection
                updateCurrentModel={updateCurrentModel}
                setModelTab={setModelTab}
                modelTab={modelTab}
                selectedProvider={selectedProvider}
                currentEmbeddingModel={currentEmbeddingModel}
                updateSelectedProvider={updateSelectedProvider}
                advancedEmbeddingDetails={advancedEmbeddingDetails}
              />
            </CardSection>
            <div className="mt-4 flex w-full justify-end">
              <OpalButton
                variant="action"
                onClick={() => {
                  if (
                    selectedProvider.model_name.includes("e5") &&
                    displayPoorModelName
                  ) {
                    setDisplayPoorModelName(false);
                    setShowPoorModel(true);
                  } else {
                    // Skip reranking step (step 1), go directly to advanced settings (step 2)
                    nextFormStep();
                    nextFormStep();
                  }
                }}
                rightIcon={SvgArrowRight}
              >
                Continue
              </OpalButton>
            </div>
          </>
        )}
        {showPoorModel && (
          <Modal open onOpenChange={() => setShowPoorModel(false)}>
            <Modal.Content>
              <Modal.Header
                icon={SvgAlertTriangle}
                title={`Are you sure you want to select ${selectedProvider.model_name}?`}
                onClose={() => setShowPoorModel(false)}
              />
              <Modal.Body>
                <div className="text-lg">
                  <Text as="p">
                    {`${selectedProvider.model_name} is a lower accuracy model. We recommend the following alternatives:`}
                  </Text>
                  <ul className="list-disc list-inside mt-2 ml-4">
                    <li>
                      <Text as="p">
                        Cohere embed-english-v3.0 for cloud-based
                      </Text>
                    </li>
                    <li>
                      <Text as="p">
                        Nomic nomic-embed-text-v1 for self-hosted
                      </Text>
                    </li>
                  </ul>
                </div>
              </Modal.Body>
              <Modal.Footer>
                <OpalButton
                  prominence="secondary"
                  onClick={() => setShowPoorModel(false)}
                >
                  Cancel update
                </OpalButton>
                <OpalButton
                  onClick={() => {
                    setShowPoorModel(false);
                    // Skip reranking step (step 1), go directly to advanced settings (step 2)
                    nextFormStep();
                    nextFormStep();
                  }}
                >
                  {`Continue with ${selectedProvider.model_name}`}
                </OpalButton>
              </Modal.Footer>
            </Modal.Content>
          </Modal>
        )}

        {showInstantSwitchConfirm && (
          <InstantSwitchConfirmModal
            onClose={() => setShowInstantSwitchConfirm(false)}
            onConfirm={() => {
              setShowInstantSwitchConfirm(false);
              handleReIndex();
              navigateToEmbeddingPage("search settings");
            }}
          />
        )}

        {formStep == 1 && (
          <>
            <h2 className="text-2xl font-bold mb-4 text-text-800">
              Select a Reranking Model
            </h2>
            <Text as="p" className="mb-4">
              Updating the reranking model does not require re-indexing
              documents. The reranker helps improve search quality by reordering
              results after the initial embedding search. Changes will take
              effect immediately for all new searches.
            </Text>

            <CardSection>
              <RerankingDetailsForm
                ref={rerankFormRef}
                setModelTab={setModelTab}
                modelTab={
                  originalRerankingDetails.rerank_model_name
                    ? modelTab
                    : modelTab || "cloud"
                }
                currentRerankingDetails={rerankingDetails}
                originalRerankingDetails={originalRerankingDetails}
                setRerankingDetails={setRerankingDetails}
                onValidationChange={handleRerankValidationChange}
              />
            </CardSection>

            <div className={`mt-4 w-full grid grid-cols-3`}>
              <OpalButton
                prominence="secondary"
                icon={SvgArrowLeft}
                onClick={() => prevFormStep()}
              >
                Previous
              </OpalButton>

              <ReIndexingButton needsReIndex={needsReIndex} />

              <div className="flex w-full justify-end">
                <OpalButton
                  prominence="secondary"
                  onClick={() => {
                    nextFormStep();
                  }}
                  rightIcon={SvgArrowRight}
                >
                  Advanced
                </OpalButton>
              </div>
            </div>
          </>
        )}
        {formStep == 2 && (
          <>
            <h2 className="text-2xl font-bold mb-4 text-text-800">
              Advanced Search Configuration
            </h2>
            <Text as="p" className="mb-4">
              Configure advanced embedding and search settings. Changes will
              require re-indexing documents.
            </Text>

            <CardSection>
              <AdvancedEmbeddingFormPage
                ref={advancedFormRef}
                advancedEmbeddingDetails={advancedEmbeddingDetails}
                updateAdvancedEmbeddingDetails={updateAdvancedEmbeddingDetails}
                embeddingProviderType={selectedProvider.provider_type}
                onValidationChange={handleValidationChange}
              />
            </CardSection>

            <div className={`mt-4 grid  grid-cols-3 w-full `}>
              <OpalButton
                prominence="secondary"
                onClick={() => {
                  // Skip reranking step (step 1), go back to embedding model (step 0)
                  prevFormStep();
                  prevFormStep();
                }}
                icon={SvgArrowLeft}
              >
                Previous
              </OpalButton>

              <ReIndexingButton needsReIndex={needsReIndex} />
            </div>
          </>
        )}
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/embeddings/pages/OpenEmbeddingPage.tsx
================================================
"use client";

import Button from "@/refresh-components/buttons/Button";
import { Text } from "@opal/components";
import { markdown } from "@opal/utils";
import Spacer from "@/refresh-components/Spacer";
import Title from "@/components/ui/title";
import { ModelSelector } from "../../../../components/embedding/ModelSelector";
import {
  AVAILABLE_MODELS,
  CloudEmbeddingModel,
  HostedEmbeddingModel,
} from "../../../../components/embedding/interfaces";
import { CustomModelForm } from "../../../../components/embedding/CustomModelForm";
import { useState } from "react";
import CardSection from "@/components/admin/CardSection";
export default function OpenEmbeddingPage({
  onSelectOpenSource,
  selectedProvider,
}: {
  onSelectOpenSource: (model: HostedEmbeddingModel) => void;
  selectedProvider: HostedEmbeddingModel | CloudEmbeddingModel;
}) {
  const [configureModel, setConfigureModel] = useState(false);
  return (
    <div>
      <Title className="mt-8">
        Here are some locally-hosted models to choose from.
      </Title>
      <Text as="p">
        {
          "These models can be used without any API keys, and can leverage a GPU for faster inference."
        }
      </Text>
      <Spacer rem={1} />
      <ModelSelector
        modelOptions={AVAILABLE_MODELS}
        setSelectedModel={onSelectOpenSource}
        currentEmbeddingModel={selectedProvider}
      />

      <Spacer rem={1.5} />
      <Text as="p">
        {markdown(
          "Alternatively, (if you know what you're doing) you can specify a [SentenceTransformers](https://www.sbert.net/)-compatible model of your choice below. The rough list of supported models can be found [here](https://huggingface.co/models?library=sentence-transformers&sort=trending)."
        )}
      </Text>
      <Text as="p">
        {markdown(
          "**NOTE:** not all models listed will work with Onyx, since some have unique interfaces or special requirements. If in doubt, reach out to the Onyx team."
        )}
      </Text>
      {!configureModel && (
        // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
        <Button
          onClick={() => setConfigureModel(true)}
          className="mt-4"
          secondary
        >
          Configure custom model
        </Button>
      )}
      {configureModel && (
        <div className="w-full flex">
          <CardSection className="mt-4 2xl:w-4/6 mx-auto">
            <CustomModelForm onSubmit={onSelectOpenSource} />
          </CardSection>
        </div>
      )}
    </div>
  );
}


================================================
FILE: web/src/app/admin/embeddings/pages/utils.ts
================================================
import {
  CloudEmbeddingProvider,
  HostedEmbeddingModel,
} from "@/components/embedding/interfaces";

import {
  AdvancedSearchConfiguration,
  SavedSearchSettings,
  SwitchoverType,
} from "../interfaces";

import { EmbeddingProvider } from "@/components/embedding/interfaces";
import { RerankingDetails } from "../interfaces";

export const deleteSearchSettings = async (search_settings_id: number) => {
  const response = await fetch(`/api/search-settings/delete-search-settings`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ search_settings_id }),
  });
  return response;
};

export const testEmbedding = async ({
  provider_type,
  modelName,
  apiKey,
  apiUrl,
  apiVersion,
  deploymentName,
}: {
  provider_type: string;
  modelName: string;
  apiKey: string | null;
  apiUrl: string | null;
  apiVersion: string | null;
  deploymentName: string | null;
}) => {
  const testModelName =
    provider_type === "openai" ? "text-embedding-3-small" : modelName;

  const testResponse = await fetch("/api/admin/embedding/test-embedding", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      provider_type: provider_type,
      api_key: apiKey,
      api_url: apiUrl,
      model_name: testModelName,
      api_version: apiVersion,
      deployment_name: deploymentName,
    }),
  });

  return testResponse;
};

// We use a spread operation to merge properties from multiple objects into a single object.
// Advanced embedding details may update default values.
// Do NOT modify the order unless you are positive the new hierarchy is correct.
export const combineSearchSettings = (
  selectedProvider: CloudEmbeddingProvider | HostedEmbeddingModel,
  advancedEmbeddingDetails: AdvancedSearchConfiguration,
  rerankingDetails: RerankingDetails,
  provider_type: EmbeddingProvider | null,
  switchover_type?: SwitchoverType
): SavedSearchSettings => {
  return {
    ...selectedProvider,
    ...advancedEmbeddingDetails,
    ...rerankingDetails,
    provider_type: provider_type,
    switchover_type,
  };
};


================================================
FILE: web/src/app/admin/federated/[id]/page.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { notFound } from "next/navigation";
import { Loader2 } from "lucide-react";
import { useFederatedConnector } from "./useFederatedConnector";
import { FederatedConnectorForm } from "@/components/admin/federated/FederatedConnectorForm";

export default function EditFederatedConnectorPage(props: {
  params: Promise<{ id: string }>;
}) {
  const [params, setParams] = useState<{ id: string } | null>(null);

  useEffect(() => {
    props.params.then(setParams);
  }, [props.params]);

  const { sourceType, connectorData, credentialSchema, isLoading, error } =
    useFederatedConnector(params?.id ?? "");

  if (isLoading) {
    return (
      <div className="flex justify-center w-full h-full">
        <div className="mt-12 w-full max-w-4xl mx-auto">
          <div className="flex flex-col items-center justify-center py-16">
            <Loader2 className="h-8 w-8 animate-spin text-blue-500 mb-4" />
            <div className="text-center">
              <p className="text-lg font-medium text-gray-700 mb-2">
                Loading connector configuration...
              </p>
              <p className="text-sm text-gray-500">
                Retrieving connector details and credential schema
              </p>
            </div>
          </div>
        </div>
      </div>
    );
  }

  if (error) {
    return (
      <div className="flex justify-center w-full h-full">
        <div className="mt-12 w-full max-w-4xl mx-auto">
          <div className="text-center">
            <h1 className="text-2xl font-bold text-red-600 mb-4">Error</h1>
            <p className="text-gray-600">{error}</p>
          </div>
        </div>
      </div>
    );
  }

  if (!sourceType || !params) {
    notFound();
  }

  const connectorId = parseInt(params.id);

  return (
    <div className="flex justify-center w-full h-full">
      <div className="mt-12 w-full max-w-4xl mx-auto">
        <FederatedConnectorForm
          connector={sourceType}
          connectorId={connectorId}
          preloadedConnectorData={connectorData ?? undefined}
          preloadedCredentialSchema={credentialSchema ?? undefined}
        />
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/federated/[id]/useFederatedConnector.ts
================================================
import { useState, useEffect } from "react";
import {
  ConfigurableSources,
  FederatedConnectorDetail,
  CredentialSchemaResponse,
} from "@/lib/types";

interface UseFederatedConnectorResult {
  sourceType: ConfigurableSources | null;
  connectorData: FederatedConnectorDetail | null;
  credentialSchema: CredentialSchemaResponse | null;
  isLoading: boolean;
  error: string | null;
}

export function useFederatedConnector(
  connectorId: string
): UseFederatedConnectorResult {
  const [sourceType, setSourceType] = useState<ConfigurableSources | null>(
    null
  );
  const [connectorData, setConnectorData] =
    useState<FederatedConnectorDetail | null>(null);
  const [credentialSchema, setCredentialSchema] =
    useState<CredentialSchemaResponse | null>(null);
  const [isLoading, setIsLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);

  useEffect(() => {
    const fetchData = async () => {
      try {
        setIsLoading(true);
        setError(null);

        // First, fetch connector details to get the source type
        const connectorResponse = await fetch(`/api/federated/${connectorId}`);

        if (!connectorResponse.ok) {
          throw new Error(
            `Failed to fetch connector: ${connectorResponse.statusText}`
          );
        }

        const connectorData: FederatedConnectorDetail =
          await connectorResponse.json();

        // Extract source type from the federated source string (remove 'federated_' prefix)
        const extractedSourceType = connectorData.source.replace(
          /^federated_/,
          ""
        ) as ConfigurableSources;

        // Now fetch credential schema and set state in parallel
        const schemaPromise = fetch(
          `/api/federated/sources/federated_${extractedSourceType}/credentials/schema`
        );

        // Set the data we already have
        setConnectorData(connectorData);
        setSourceType(extractedSourceType);

        // Wait for schema fetch to complete
        const schemaResponse = await schemaPromise;

        if (!schemaResponse.ok) {
          throw new Error(
            `Failed to fetch schema: ${schemaResponse.statusText}`
          );
        }

        const schemaData: CredentialSchemaResponse =
          await schemaResponse.json();
        setCredentialSchema(schemaData);
      } catch (error) {
        console.error("Error fetching federated connector data:", error);
        setError(`Failed to load connector: ${error}`);
      } finally {
        setIsLoading(false);
      }
    };

    if (connectorId) {
      fetchData();
    }
  }, [connectorId]);

  return {
    sourceType,
    connectorData,
    credentialSchema,
    isLoading,
    error,
  };
}


================================================
FILE: web/src/app/admin/groups/[id]/page.tsx
================================================
"use client";

import { use } from "react";
import EditGroupPage from "@/refresh-pages/admin/GroupsPage/EditGroupPage";

export default function EditGroupRoute({
  params,
}: {
  params: Promise<{ id: string }>;
}) {
  const { id } = use(params);
  return <EditGroupPage groupId={Number(id)} />;
}


================================================
FILE: web/src/app/admin/groups/create/page.tsx
================================================
export { default } from "@/refresh-pages/admin/GroupsPage/CreateGroupPage";


================================================
FILE: web/src/app/admin/groups/page.tsx
================================================
export { default } from "@/refresh-pages/admin/GroupsPage";


================================================
FILE: web/src/app/admin/groups2/[id]/page.tsx
================================================
"use client";

import { use } from "react";
import EditGroupPage from "@/refresh-pages/admin/GroupsPage/EditGroupPage";

export default function EditGroupRoute({
  params,
}: {
  params: Promise<{ id: string }>;
}) {
  const { id } = use(params);
  const groupId = Number(id);
  if (Number.isNaN(groupId)) {
    return null;
  }
  return <EditGroupPage groupId={groupId} />;
}


================================================
FILE: web/src/app/admin/groups2/create/page.tsx
================================================
export { default } from "@/refresh-pages/admin/GroupsPage/CreateGroupPage";


================================================
FILE: web/src/app/admin/groups2/page.tsx
================================================
export { default } from "@/refresh-pages/admin/GroupsPage";


================================================
FILE: web/src/app/admin/hooks/page.tsx
================================================
export { default } from "@/ee/refresh-pages/admin/HooksPage";


================================================
FILE: web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx
================================================
import React from "react";
import {
  Table,
  TableRow,
  TableHead,
  TableBody,
  TableCell,
  TableHeader,
} from "@/components/ui/table";
import { Badge } from "@/components/ui/badge";
import { CCPairStatus } from "@/components/Status";
import { timeAgo } from "@/lib/time";
import {
  ValidSources,
  ConnectorIndexingStatusLiteResponse,
  SourceSummary,
  ConnectorIndexingStatusLite,
  FederatedConnectorStatus,
} from "@/lib/types";
import type { Route } from "next";
import { useRouter } from "next/navigation";
import Truncated from "@/refresh-components/texts/Truncated";
import {
  FiChevronDown,
  FiChevronRight,
  FiLock,
  FiUnlock,
  FiRefreshCw,
} from "react-icons/fi";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { SourceIcon } from "@/components/SourceIcon";
import { getSourceDisplayName } from "@/lib/sources";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { ConnectorCredentialPairStatus } from "../../connector/[ccPairId]/types";
import { PageSelector } from "@/components/PageSelector";
import { ConnectorStaggeredSkeleton } from "./ConnectorRowSkeleton";
import { Button } from "@opal/components";
import { SvgSettings } from "@opal/icons";

// Helper to handle navigation with cmd/ctrl+click support
// NOTE: using this rather than Next/Link (or similar) since shadcn
// table row components must be direct descendants of the table component
// and putting the <Link> inside the <TableRow> would causes some parts of the
// row to not navigate as expected.
function navigateWithModifier(
  e: React.MouseEvent,
  url: string,
  router: ReturnType<typeof useRouter>
) {
  if (e.metaKey || e.ctrlKey) {
    window.open(url, "_blank");
  } else {
    router.push(url as Route);
  }
}

function isFederatedConnectorStatus(
  status: ConnectorIndexingStatusLite | FederatedConnectorStatus
) {
  return status.name?.toLowerCase().includes("federated");
}

const NUMBER_OF_ROWS_PER_PAGE = 10;
const NUMBER_OF_COLUMNS = 6;

function SummaryRow({
  source,
  summary,
  isOpen,
  onToggle,
}: {
  source: ValidSources;
  summary: SourceSummary;
  isOpen: boolean;
  onToggle: () => void;
}) {
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  return (
    <TableRow
      onClick={onToggle}
      className="border-border dark:hover:bg-neutral-800 dark:border-neutral-700 group hover:bg-background-settings-hover/20 bg-background-sidebar py-4 rounded-sm !border cursor-pointer"
    >
      <TableCell>
        <div className="text-xl flex items-center truncate ellipsis gap-x-2 font-semibold">
          <div className="cursor-pointer">
            {isOpen ? (
              <FiChevronDown size={20} />
            ) : (
              <FiChevronRight size={20} />
            )}
          </div>
          <SourceIcon iconSize={20} sourceType={source} />
          {getSourceDisplayName(source)}
        </div>
      </TableCell>

      <TableCell>
        <div className="text-sm text-neutral-500 dark:text-neutral-300">
          Total Connectors
        </div>
        <div className="text-xl font-semibold">{summary.total_connectors}</div>
      </TableCell>

      <TableCell>
        <div className="text-sm text-neutral-500 dark:text-neutral-300">
          Active Connectors
        </div>
        <p className="flex text-xl mx-auto font-semibold items-center text-lg mt-1">
          {summary.active_connectors}/{summary.total_connectors}
        </p>
      </TableCell>

      {isPaidEnterpriseFeaturesEnabled && (
        <TableCell>
          <div className="text-sm text-neutral-500 dark:text-neutral-300">
            Public Connectors
          </div>
          <p className="flex text-xl mx-auto font-semibold items-center text-lg mt-1">
            {summary.public_connectors}/{summary.total_connectors}
          </p>
        </TableCell>
      )}

      <TableCell>
        <div className="text-sm text-neutral-500 dark:text-neutral-300">
          Total Docs Indexed
        </div>
        <div className="text-xl font-semibold">
          {summary.total_docs_indexed.toLocaleString()}
        </div>
      </TableCell>

      <TableCell />
    </TableRow>
  );
}

function ConnectorRow({
  ccPairsIndexingStatus,
  invisible,
  isEditable,
}: {
  ccPairsIndexingStatus: ConnectorIndexingStatusLite;
  invisible?: boolean;
  isEditable: boolean;
}) {
  const router = useRouter();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  const connectorUrl = `/admin/connector/${ccPairsIndexingStatus.cc_pair_id}`;

  const handleRowClick = (e: React.MouseEvent) => {
    navigateWithModifier(e, connectorUrl, router);
  };

  return (
    <TableRow
      className={`
  border border-border dark:border-neutral-700
          hover:bg-accent-background ${
            invisible
              ? "invisible !h-0 !-mb-10 !border-none"
              : "!border border-border dark:border-neutral-700"
          }  w-full cursor-pointer relative `}
      onClick={handleRowClick}
    >
      <TableCell className="">
        <Truncated>{ccPairsIndexingStatus.name}</Truncated>
      </TableCell>
      <TableCell>
        {timeAgo(ccPairsIndexingStatus?.last_success) || "-"}
      </TableCell>
      <TableCell>
        <CCPairStatus
          ccPairStatus={
            ccPairsIndexingStatus.last_finished_status !== null
              ? ccPairsIndexingStatus.cc_pair_status
              : ccPairsIndexingStatus.last_status == "not_started"
                ? ConnectorCredentialPairStatus.SCHEDULED
                : ConnectorCredentialPairStatus.INITIAL_INDEXING
          }
          inRepeatedErrorState={ccPairsIndexingStatus.in_repeated_error_state}
          lastIndexAttemptStatus={ccPairsIndexingStatus.last_status}
        />
      </TableCell>
      {isPaidEnterpriseFeaturesEnabled && (
        <TableCell>
          {ccPairsIndexingStatus.access_type === "public" ? (
            <Badge variant={isEditable ? "success" : "default"} icon={FiUnlock}>
              Organization Public
            </Badge>
          ) : ccPairsIndexingStatus.access_type === "sync" ? (
            <Badge
              variant={isEditable ? "auto-sync" : "default"}
              icon={FiRefreshCw}
            >
              Inherited from{" "}
              {getSourceDisplayName(ccPairsIndexingStatus.source)}
            </Badge>
          ) : (
            <Badge variant={isEditable ? "private" : "default"} icon={FiLock}>
              Private
            </Badge>
          )}
        </TableCell>
      )}
      <TableCell>{ccPairsIndexingStatus.docs_indexed}</TableCell>
      <TableCell>
        {isEditable && (
          <SimpleTooltip tooltip="Manage Connector">
            <Button icon={SvgSettings} prominence="tertiary" />
          </SimpleTooltip>
        )}
      </TableCell>
    </TableRow>
  );
}

function FederatedConnectorRow({
  federatedConnector,
  invisible,
}: {
  federatedConnector: FederatedConnectorStatus;
  invisible?: boolean;
}) {
  const router = useRouter();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  const federatedUrl = `/admin/federated/${federatedConnector.id}`;

  const handleRowClick = (e: React.MouseEvent) => {
    navigateWithModifier(e, federatedUrl, router);
  };

  return (
    <TableRow
      className={`
  border border-border dark:border-neutral-700
          hover:bg-accent-background ${
            invisible
              ? "invisible !h-0 !-mb-10 !border-none"
              : "!border border-border dark:border-neutral-700"
          }  w-full cursor-pointer relative `}
      onClick={handleRowClick}
    >
      <TableCell className="">
        <Truncated>{federatedConnector.name}</Truncated>
      </TableCell>
      <TableCell>N/A</TableCell>
      <TableCell>
        <Badge variant="success">Indexed</Badge>
      </TableCell>
      {isPaidEnterpriseFeaturesEnabled && (
        <TableCell>
          <Badge variant="secondary" icon={FiRefreshCw}>
            Federated Access
          </Badge>
        </TableCell>
      )}
      <TableCell>N/A</TableCell>
      <TableCell>
        <Button
          icon={SvgSettings}
          prominence="tertiary"
          onClick={(e: React.MouseEvent) => {
            e.stopPropagation();
            navigateWithModifier(e, federatedUrl, router);
          }}
          tooltip="Manage Federated Connector"
        />
      </TableCell>
    </TableRow>
  );
}

export function CCPairIndexingStatusTable({
  ccPairsIndexingStatuses,
  connectorsToggled,
  toggleSource,
  onPageChange,
  sourceLoadingStates = {} as Record<ValidSources, boolean>,
}: {
  ccPairsIndexingStatuses: ConnectorIndexingStatusLiteResponse[];
  connectorsToggled: Record<ValidSources, boolean>;
  toggleSource: (source: ValidSources, toggled?: boolean | null) => void;
  onPageChange: (source: ValidSources, newPage: number) => void;
  sourceLoadingStates?: Record<ValidSources, boolean>;
}) {
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  return (
    <Table className="-mt-8 table-fixed">
      <TableHeader>
        <ConnectorRow
          invisible
          ccPairsIndexingStatus={{
            cc_pair_id: 1,
            name: "Sample File Connector",
            cc_pair_status: ConnectorCredentialPairStatus.ACTIVE,
            last_status: "success",
            source: ValidSources.File,
            access_type: "public",
            docs_indexed: 1000,
            last_success: "2023-07-01T12:00:00Z",
            last_finished_status: "success",
            is_editable: false,
            in_repeated_error_state: false,
            in_progress: false,
            latest_index_attempt_docs_indexed: 0,
          }}
          isEditable={false}
        />
      </TableHeader>
      <TableBody>
        {ccPairsIndexingStatuses.map((ccPairStatus) => (
          <React.Fragment key={ccPairStatus.source}>
            <TableRow className="border-none">
              <TableCell
                colSpan={
                  isPaidEnterpriseFeaturesEnabled
                    ? NUMBER_OF_COLUMNS
                    : NUMBER_OF_COLUMNS - 1
                }
                className="h-4 p-0"
              />
            </TableRow>
            <SummaryRow
              source={ccPairStatus.source}
              summary={ccPairStatus.summary}
              isOpen={connectorsToggled[ccPairStatus.source] || false}
              onToggle={() => toggleSource(ccPairStatus.source)}
            />
            {connectorsToggled[ccPairStatus.source] && (
              <>
                {sourceLoadingStates[ccPairStatus.source] && (
                  <ConnectorStaggeredSkeleton rowCount={8} height="h-[79px]" />
                )}
                {!sourceLoadingStates[ccPairStatus.source] && (
                  <>
                    <TableRow className="border border-border dark:border-neutral-700">
                      <TableHead>Name</TableHead>
                      <TableHead>Last Indexed</TableHead>
                      <TableHead>Status</TableHead>
                      {isPaidEnterpriseFeaturesEnabled && (
                        <TableHead>Permissions / Access</TableHead>
                      )}
                      <TableHead>Total Docs</TableHead>
                      <TableHead></TableHead>
                    </TableRow>
                    {ccPairStatus.indexing_statuses.map((indexingStatus) => {
                      if (isFederatedConnectorStatus(indexingStatus)) {
                        const status =
                          indexingStatus as FederatedConnectorStatus;
                        return (
                          <FederatedConnectorRow
                            key={status.id}
                            federatedConnector={status}
                          />
                        );
                      } else {
                        const status =
                          indexingStatus as ConnectorIndexingStatusLite;
                        return (
                          <ConnectorRow
                            key={status.cc_pair_id}
                            ccPairsIndexingStatus={status}
                            isEditable={status.is_editable}
                          />
                        );
                      }
                    })}
                    {/* Add dummy rows to reach 10 total rows for cleaner UI */}
                    {ccPairStatus.indexing_statuses.length <
                      NUMBER_OF_ROWS_PER_PAGE &&
                      ccPairStatus.total_pages > 1 &&
                      Array.from({
                        length:
                          NUMBER_OF_ROWS_PER_PAGE -
                          ccPairStatus.indexing_statuses.length,
                      }).map((_, index) => {
                        const isLastDummyRow =
                          index ===
                          NUMBER_OF_ROWS_PER_PAGE -
                            ccPairStatus.indexing_statuses.length -
                            1;
                        return (
                          <TableRow
                            key={`dummy-${ccPairStatus.source}-${index}`}
                            className={
                              isLastDummyRow
                                ? "border-l border-r border-b border-border dark:border-neutral-700"
                                : "border-l border-r border-t-0 border-b-0 border-border dark:border-neutral-700"
                            }
                            style={
                              isLastDummyRow
                                ? {
                                    borderBottom: "1px solid var(--border)",
                                    borderRight: "1px solid var(--border)",
                                    borderLeft: "1px solid var(--border)",
                                  }
                                : {}
                            }
                          >
                            {isLastDummyRow ? (
                              <TableCell
                                colSpan={
                                  isPaidEnterpriseFeaturesEnabled
                                    ? NUMBER_OF_COLUMNS
                                    : NUMBER_OF_COLUMNS - 1
                                }
                                className="h-[56px] text-center text-sm text-gray-400 dark:text-gray-500 border-b border-r border-l border-border dark:border-neutral-700"
                              >
                                <span className="italic">
                                  All caught up! No more connectors to show
                                </span>
                              </TableCell>
                            ) : (
                              <>
                                <TableCell className="h-[56px]"></TableCell>
                                <TableCell></TableCell>
                                <TableCell></TableCell>
                                {isPaidEnterpriseFeaturesEnabled && (
                                  <TableCell></TableCell>
                                )}
                                <TableCell></TableCell>
                                <TableCell></TableCell>
                              </>
                            )}
                          </TableRow>
                        );
                      })}
                  </>
                )}
                {ccPairStatus.total_pages > 1 && (
                  <TableRow className="border-l border-r border-b border-border dark:border-neutral-700">
                    <TableCell
                      colSpan={
                        isPaidEnterpriseFeaturesEnabled
                          ? NUMBER_OF_COLUMNS
                          : NUMBER_OF_COLUMNS - 1
                      }
                    >
                      <div className="flex justify-center">
                        <PageSelector
                          currentPage={ccPairStatus.current_page}
                          totalPages={ccPairStatus.total_pages}
                          onPageChange={(newPage) =>
                            onPageChange(ccPairStatus.source, newPage)
                          }
                        />
                      </div>
                    </TableCell>
                  </TableRow>
                )}
              </>
            )}
          </React.Fragment>
        ))}
      </TableBody>
    </Table>
  );
}


================================================
FILE: web/src/app/admin/indexing/status/ConnectorRowSkeleton.tsx
================================================
import React from "react";
import {
  Table,
  TableRow,
  TableHead,
  TableBody,
  TableCell,
  TableHeader,
} from "@/components/ui/table";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";

// Staggered loading animation skeleton with proper table column alignment
export function ConnectorStaggeredSkeleton({
  rowCount = 5,
  standalone = false,
  height = "h-20",
}: {
  rowCount?: number;
  standalone?: boolean; // if you want to show skeleton which is not in a table, set this to true
  height?: string;
}) {
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  const skeletonRows = [...Array(rowCount)].map((_, index) => (
    <TableRow
      key={index}
      className={`border border-border dark:border-neutral-700 hover:bg-accent-background animate-pulse ${height}`}
      style={{
        animationDelay: `${index * 150}ms`,
        animationDuration: "1.5s",
      }}
    >
      {/* Connector Name */}
      <TableCell>
        <div className="flex items-center gap-2">
          <div className="h-5 w-5 bg-neutral-200 dark:bg-neutral-700 rounded"></div>
          <div className="lg:w-[180px] xl:w-[350px] h-5 bg-neutral-200 dark:bg-neutral-700 rounded"></div>
        </div>
      </TableCell>

      {/* Last Success */}
      <TableCell>
        <div className="flex flex-col gap-1">
          <div className="h-3 w-20 bg-neutral-200 dark:bg-neutral-700 rounded"></div>
          <div className="h-4 w-16 bg-neutral-200 dark:bg-neutral-700 rounded"></div>
        </div>
      </TableCell>

      {/* Status */}
      <TableCell>
        <div className="flex items-center gap-2">
          <div className="h-2 w-2 bg-neutral-200 dark:bg-neutral-700 rounded-full"></div>
          <div className="h-6 w-24 bg-neutral-200 dark:bg-neutral-700 rounded-full"></div>
        </div>
      </TableCell>

      {/* Access Type (Enterprise only) */}
      {isPaidEnterpriseFeaturesEnabled && (
        <TableCell>
          <div className="flex items-center gap-2">
            <div className="h-4 w-4 bg-neutral-200 dark:bg-neutral-700 rounded"></div>
            <div className="h-6 w-28 bg-neutral-200 dark:bg-neutral-700 rounded-full"></div>
          </div>
        </TableCell>
      )}

      {/* Docs Indexed */}
      <TableCell>
        <div className="flex flex-col gap-1">
          <div className="h-3 w-8 bg-neutral-200 dark:bg-neutral-700 rounded"></div>
          <div className="h-5 w-16 bg-neutral-200 dark:bg-neutral-700 rounded"></div>
        </div>
      </TableCell>

      {/* Settings Icon */}
      <TableCell>
        <div className="flex items-center justify-center">
          <div className="h-5 w-5 bg-neutral-200 dark:bg-neutral-700 rounded"></div>
        </div>
      </TableCell>
    </TableRow>
  ));

  // If standalone, wrap in complete table structure
  if (standalone) {
    return (
      <div className="w-full">
        <Table className="w-full">
          <TableBody>{skeletonRows}</TableBody>
        </Table>
      </div>
    );
  }

  // If not standalone, just return the rows
  return <>{skeletonRows}</>;
}


================================================
FILE: web/src/app/admin/indexing/status/FilterComponent.tsx
================================================
"use client";

import React, { useState, useImperativeHandle, forwardRef } from "react";
import {
  DropdownMenu,
  DropdownMenuContent,
  DropdownMenuGroup,
  DropdownMenuLabel,
  DropdownMenuSeparator,
  DropdownMenuTrigger,
  DropdownMenuCheckboxItem,
} from "@/components/ui/dropdown-menu";
import { Input } from "@/components/ui/input";
import { Badge } from "@/components/ui/badge";
import { AccessType, ValidStatuses } from "@/lib/types";
import { Button } from "@opal/components";
import { SvgFilter } from "@opal/icons";
export interface FilterOptions {
  accessType: AccessType[] | null;
  docsCountFilter: {
    operator: ">" | "<" | "=" | null;
    value: number | null;
  };
  lastStatus: ValidStatuses[] | null;
}

interface FilterComponentProps {
  onFilterChange: (filters: FilterOptions) => void;
}

export const FilterComponent = forwardRef<
  { resetFilters: () => void },
  FilterComponentProps
>(({ onFilterChange }, ref) => {
  const [isOpen, setIsOpen] = useState(false);
  const [filters, setFilters] = useState<FilterOptions>({
    accessType: null,
    docsCountFilter: {
      operator: null,
      value: null,
    },
    lastStatus: null,
  });

  // Local state for tracking selected filters before applying
  const [docsOperator, setDocsOperator] = useState<">" | "<" | "=" | null>(
    null
  );
  const [docsValue, setDocsValue] = useState<string>("");
  const [selectedAccessTypes, setSelectedAccessTypes] = useState<AccessType[]>(
    []
  );
  const [selectedStatuses, setSelectedStatuses] = useState<ValidStatuses[]>([]);

  // Expose resetFilters method via ref
  useImperativeHandle(ref, () => ({
    resetFilters: () => {
      setDocsOperator(null);
      setDocsValue("");
      setSelectedAccessTypes([]);
      setSelectedStatuses([]);
      setFilters({
        accessType: null,
        docsCountFilter: {
          operator: null,
          value: null,
        },
        lastStatus: null,
      });
    },
  }));

  const handleAccessTypeChange = (accessType: AccessType) => {
    const newAccessTypes = selectedAccessTypes.includes(accessType)
      ? selectedAccessTypes.filter((type) => type !== accessType)
      : [...selectedAccessTypes, accessType];

    setSelectedAccessTypes(newAccessTypes);
  };

  const handleStatusChange = (status: ValidStatuses) => {
    const newStatuses = selectedStatuses.includes(status)
      ? selectedStatuses.filter((s) => s !== status)
      : [...selectedStatuses, status];

    setSelectedStatuses(newStatuses);
  };

  const applyFilters = () => {
    const newFilters = {
      ...filters,
      accessType: selectedAccessTypes.length > 0 ? selectedAccessTypes : null,
      lastStatus: selectedStatuses.length > 0 ? selectedStatuses : null,
      docsCountFilter: {
        operator: docsOperator,
        value: docsValue ? parseInt(docsValue) : null,
      },
    };

    setFilters(newFilters);
    onFilterChange(newFilters);
    setIsOpen(false);
  };

  // Sync local state with filters when dropdown opens
  const handleOpenChange = (open: boolean) => {
    if (open) {
      // When opening, initialize local state from current filters
      setSelectedAccessTypes(filters.accessType || []);
      setSelectedStatuses(filters.lastStatus || []);
      setDocsOperator(filters.docsCountFilter.operator);
      setDocsValue(
        filters.docsCountFilter.value !== null
          ? filters.docsCountFilter.value.toString()
          : ""
      );
    }
    setIsOpen(open);
  };

  const hasActiveFilters =
    (filters.accessType && filters.accessType.length > 0) ||
    (filters.lastStatus && filters.lastStatus.length > 0) ||
    filters.docsCountFilter.operator !== null;

  return (
    <div className="relative">
      <DropdownMenu open={isOpen} onOpenChange={handleOpenChange}>
        <DropdownMenuTrigger asChild>
          <Button
            icon={SvgFilter}
            prominence="secondary"
            interaction={isOpen ? "hover" : "rest"}
          />
        </DropdownMenuTrigger>
        <DropdownMenuContent
          align="end"
          className="w-72"
          onCloseAutoFocus={(e) => e.preventDefault()}
        >
          <div className="flex items-center justify-between px-2 py-1.5">
            <DropdownMenuLabel className="text-base font-medium">
              Filter Connectors
            </DropdownMenuLabel>
          </div>
          <DropdownMenuSeparator />

          <DropdownMenuGroup>
            <DropdownMenuLabel className="px-2 py-1.5 text-xs text-muted-foreground">
              Access Type
            </DropdownMenuLabel>
            <div onClick={(e) => e.stopPropagation()}>
              <DropdownMenuCheckboxItem
                checked={selectedAccessTypes.includes("public")}
                onCheckedChange={() => handleAccessTypeChange("public")}
                className="flex items-center justify-between"
                onSelect={(e) => e.preventDefault()}
              >
                Public
              </DropdownMenuCheckboxItem>
              <DropdownMenuCheckboxItem
                checked={selectedAccessTypes.includes("private")}
                onCheckedChange={() => handleAccessTypeChange("private")}
                className="flex items-center justify-between"
                onSelect={(e) => e.preventDefault()}
              >
                Private
              </DropdownMenuCheckboxItem>
              <DropdownMenuCheckboxItem
                checked={selectedAccessTypes.includes("sync")}
                onCheckedChange={() => handleAccessTypeChange("sync")}
                className="flex items-center justify-between"
                onSelect={(e) => e.preventDefault()}
              >
                Auto-Sync
              </DropdownMenuCheckboxItem>
            </div>
          </DropdownMenuGroup>

          <DropdownMenuSeparator />

          <DropdownMenuGroup>
            <DropdownMenuLabel className="px-2 py-1.5 text-xs text-muted-foreground">
              Last Status
            </DropdownMenuLabel>
            <div onClick={(e) => e.stopPropagation()}>
              <DropdownMenuCheckboxItem
                checked={selectedStatuses.includes("success")}
                onCheckedChange={() => handleStatusChange("success")}
                className="flex items-center justify-between"
                onSelect={(e) => e.preventDefault()}
              >
                Success
              </DropdownMenuCheckboxItem>
              <DropdownMenuCheckboxItem
                checked={selectedStatuses.includes("failed")}
                onCheckedChange={() => handleStatusChange("failed")}
                className="flex items-center justify-between"
                onSelect={(e) => e.preventDefault()}
              >
                Failed
              </DropdownMenuCheckboxItem>
              <DropdownMenuCheckboxItem
                checked={selectedStatuses.includes("in_progress")}
                onCheckedChange={() => handleStatusChange("in_progress")}
                className="flex items-center justify-between"
                onSelect={(e) => e.preventDefault()}
              >
                In Progress
              </DropdownMenuCheckboxItem>
              <DropdownMenuCheckboxItem
                checked={selectedStatuses.includes("not_started")}
                onCheckedChange={() => handleStatusChange("not_started")}
                className="flex items-center justify-between"
                onSelect={(e) => e.preventDefault()}
              >
                Not Started
              </DropdownMenuCheckboxItem>
              <DropdownMenuCheckboxItem
                checked={selectedStatuses.includes("completed_with_errors")}
                onCheckedChange={() =>
                  handleStatusChange("completed_with_errors")
                }
                className="flex items-center justify-between"
                onSelect={(e) => e.preventDefault()}
              >
                Completed with Errors
              </DropdownMenuCheckboxItem>
            </div>
          </DropdownMenuGroup>

          <DropdownMenuSeparator />

          <DropdownMenuGroup>
            <DropdownMenuLabel className="px-2 py-1.5 text-xs text-muted-foreground">
              Document Count
            </DropdownMenuLabel>
            <div
              className="flex items-center px-2 py-2 gap-2"
              onClick={(e) => e.stopPropagation()}
            >
              <div className="flex gap-2">
                <Button
                  prominence={docsOperator !== ">" ? "secondary" : "primary"}
                  onClick={(e) => {
                    e.preventDefault();
                    e.stopPropagation();
                    setDocsOperator(docsOperator === ">" ? null : ">");
                  }}
                  type="button"
                >
                  &gt;
                </Button>
                <Button
                  prominence={docsOperator !== "<" ? "secondary" : "primary"}
                  onClick={(e) => {
                    e.preventDefault();
                    e.stopPropagation();
                    setDocsOperator(docsOperator === "<" ? null : "<");
                  }}
                  type="button"
                >
                  &lt;
                </Button>
                <Button
                  prominence={docsOperator !== "=" ? "secondary" : "primary"}
                  onClick={(e) => {
                    e.preventDefault();
                    e.stopPropagation();
                    setDocsOperator(docsOperator === "=" ? null : "=");
                  }}
                  type="button"
                >
                  =
                </Button>
              </div>
              <Input
                type="number"
                placeholder="Count"
                value={docsValue}
                onChange={(e) => setDocsValue(e.target.value)}
                className="h-8 w-full"
                onClick={(e) => e.stopPropagation()}
              />
            </div>
            <div className="px-2 py-1.5">
              <Button
                width="full"
                onClick={(e) => {
                  e.preventDefault();
                  e.stopPropagation();
                  applyFilters();
                }}
                type="button"
              >
                Apply
              </Button>
            </div>
          </DropdownMenuGroup>
        </DropdownMenuContent>
      </DropdownMenu>

      {hasActiveFilters && (
        <div className="absolute -top-1 -right-1">
          <Badge className="h-2 !bg-red-400 !border-red-400 w-2 p-0 border-2 flex items-center justify-center" />
        </div>
      )}
    </div>
  );
});

FilterComponent.displayName = "FilterComponent";


================================================
FILE: web/src/app/admin/indexing/status/SearchAndFilterControls.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { Button } from "@opal/components";
import { Badge } from "@/components/ui/badge";
import { FilterComponent, FilterOptions } from "./FilterComponent";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";

interface SearchAndFilterControlsProps {
  searchQuery: string;
  onSearchChange: (query: string) => void;
  hasExpandedSources: boolean;
  onExpandAll: () => void;
  onCollapseAll: () => void;
  filterOptions: FilterOptions;
  onFilterChange: (filterOptions: FilterOptions) => void;
  onClearFilters: () => void;
  hasActiveFilters: boolean;
  filterComponentRef: React.RefObject<{ resetFilters: () => void }>;
  resetPagination: () => void;
}

export function SearchAndFilterControls({
  searchQuery,
  onSearchChange,
  hasExpandedSources,
  onExpandAll,
  onCollapseAll,
  filterOptions,
  onFilterChange,
  onClearFilters,
  hasActiveFilters,
  filterComponentRef,
  resetPagination,
}: SearchAndFilterControlsProps) {
  const [localSearchValue, setLocalSearchValue] = useState(searchQuery);

  // Debounce the search query
  useEffect(() => {
    const timer = setTimeout(() => {
      resetPagination();
      onSearchChange(localSearchValue);
    }, 300);

    return () => clearTimeout(timer);
  }, [localSearchValue, onSearchChange, resetPagination]);

  // Sync with external searchQuery changes (e.g., when filters are cleared)
  useEffect(() => {
    setLocalSearchValue(searchQuery);
  }, [searchQuery]);

  return (
    <div className="flex items-center gap-x-2">
      <InputTypeIn
        placeholder="Search Connectors"
        type="text"
        value={localSearchValue}
        onChange={(event) => setLocalSearchValue(event.target.value)}
        className="w-96"
      />

      <Button onClick={hasExpandedSources ? onCollapseAll : onExpandAll}>
        {hasExpandedSources ? "Collapse All" : "Expand All"}
      </Button>

      <div className="flex items-center gap-2">
        <FilterComponent
          onFilterChange={onFilterChange}
          ref={filterComponentRef}
        />

        {hasActiveFilters && (
          <div className="flex flex-none items-center gap-1 ml-2 max-w-[500px]">
            {filterOptions.accessType &&
              filterOptions.accessType.length > 0 && (
                <Badge variant="secondary" className="px-2 py-0.5 text-xs">
                  Access: {filterOptions.accessType.join(", ")}
                </Badge>
              )}

            {filterOptions.lastStatus &&
              filterOptions.lastStatus.length > 0 && (
                <Badge variant="secondary" className="px-2 py-0.5 text-xs">
                  Status:{" "}
                  {filterOptions.lastStatus
                    .map((s) => s.replace(/_/g, " "))
                    .join(", ")}
                </Badge>
              )}

            {filterOptions.docsCountFilter.operator &&
              filterOptions.docsCountFilter.value !== null && (
                <Badge variant="secondary" className="px-2 py-0.5 text-xs">
                  Docs {filterOptions.docsCountFilter.operator}{" "}
                  {filterOptions.docsCountFilter.value}
                </Badge>
              )}

            {filterOptions.docsCountFilter.operator &&
              filterOptions.docsCountFilter.value === null && (
                <Badge variant="secondary" className="px-2 py-0.5 text-xs">
                  Docs {filterOptions.docsCountFilter.operator} any
                </Badge>
              )}

            <Badge
              variant="outline"
              className="px-2 py-0.5 text-xs border-red-400  bg-red-100 hover:border-red-600 cursor-pointer hover:bg-red-100 dark:hover:bg-red-900"
              onClick={onClearFilters}
            >
              <span className="text-red-500 dark:text-red-400">Clear</span>
            </Badge>
          </div>
        )}
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/indexing/status/page.tsx
================================================
"use client";

import { CCPairIndexingStatusTable } from "./CCPairIndexingStatusTable";
import { SearchAndFilterControls } from "./SearchAndFilterControls";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import Link from "next/link";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { Text } from "@opal/components";
import { markdown } from "@opal/utils";
import Spacer from "@/refresh-components/Spacer";
import { useConnectorIndexingStatusWithPagination } from "@/lib/hooks";
import { useToastFromQuery } from "@/hooks/useToast";
import { Button } from "@opal/components";
import { useVectorDbEnabled } from "@/providers/SettingsProvider";
import { useState, useRef, useMemo, RefObject } from "react";
import { FilterOptions } from "./FilterComponent";
import { ValidSources } from "@/lib/types";
import Cookies from "js-cookie";
import { TOGGLED_CONNECTORS_COOKIE_NAME } from "@/lib/constants";
import { ConnectorStaggeredSkeleton } from "./ConnectorRowSkeleton";
import { IndexingStatusRequest } from "@/lib/types";

const route = ADMIN_ROUTES.INDEXING_STATUS;

function Main() {
  const vectorDbEnabled = useVectorDbEnabled();

  // State for filter management
  const [filterOptions, setFilterOptions] = useState<FilterOptions>({
    accessType: null,
    docsCountFilter: {
      operator: null,
      value: null,
    },
    lastStatus: null,
  });

  // State for search
  const [searchQuery, setSearchQuery] = useState<string>("");

  // State for collapse/expand functionality
  const [connectorsToggled, setConnectorsToggled] = useState<
    Record<ValidSources, boolean>
  >(() => {
    const savedState = Cookies.get(TOGGLED_CONNECTORS_COOKIE_NAME);
    return savedState ? JSON.parse(savedState) : {};
  });

  // Reference to the FilterComponent for resetting its state
  const filterComponentRef = useRef<{
    resetFilters: () => void;
  }>(null);

  // Convert filter options to API request format
  const request: IndexingStatusRequest = useMemo(() => {
    return {
      secondary_index: false,
      access_type_filters: filterOptions.accessType || [],
      last_status_filters: filterOptions.lastStatus || [],
      docs_count_operator: filterOptions.docsCountFilter.operator,
      docs_count_value: filterOptions.docsCountFilter.value,
      name_filter: searchQuery,
    };
  }, [filterOptions, searchQuery]);

  // Use the paginated hook with filter request and 30-second refresh
  const {
    data: ccPairsIndexingStatuses,
    isLoading: isLoadingCcPairsIndexingStatuses,
    error: ccPairsIndexingStatusesError,
    handlePageChange,
    sourcePages,
    sourceLoadingStates,
    resetPagination,
  } = useConnectorIndexingStatusWithPagination(request, 30000, vectorDbEnabled);

  // Check if filters are active
  const hasActiveFilters = useMemo(() => {
    return (
      (filterOptions.accessType && filterOptions.accessType.length > 0) ||
      (filterOptions.lastStatus && filterOptions.lastStatus.length > 0) ||
      filterOptions.docsCountFilter.operator !== null
    );
  }, [filterOptions]);

  // Handle filter changes
  const handleFilterChange = (newFilterOptions: FilterOptions) => {
    setFilterOptions(newFilterOptions);
    // Reset pagination when filters change
    resetPagination();
  };

  // Toggle source expand/collapse functions
  const toggleSource = (
    source: ValidSources,
    toggled: boolean | null = null
  ) => {
    const newConnectorsToggled = {
      ...connectorsToggled,
      [source]: toggled == null ? !connectorsToggled[source] : toggled,
    };
    setConnectorsToggled(newConnectorsToggled);
    Cookies.set(
      TOGGLED_CONNECTORS_COOKIE_NAME,
      JSON.stringify(newConnectorsToggled)
    );
  };

  const expandAll = () => {
    if (!ccPairsIndexingStatuses) return;
    const newConnectorsToggled = { ...connectorsToggled };
    ccPairsIndexingStatuses.forEach((ccPairStatus) => {
      newConnectorsToggled[ccPairStatus.source] = true;
    });
    setConnectorsToggled(newConnectorsToggled);
    Cookies.set(
      TOGGLED_CONNECTORS_COOKIE_NAME,
      JSON.stringify(newConnectorsToggled)
    );
  };

  const collapseAll = () => {
    if (!ccPairsIndexingStatuses) return;
    const newConnectorsToggled = { ...connectorsToggled };
    ccPairsIndexingStatuses.forEach((ccPairStatus) => {
      newConnectorsToggled[ccPairStatus.source] = false;
    });
    setConnectorsToggled(newConnectorsToggled);
    Cookies.set(
      TOGGLED_CONNECTORS_COOKIE_NAME,
      JSON.stringify(newConnectorsToggled)
    );
  };

  // Check if any sources are expanded
  const hasExpandedSources =
    ccPairsIndexingStatuses?.some(
      (ccPairStatus) => connectorsToggled[ccPairStatus.source]
    ) || false;

  // Handler functions for the search and filter controls
  const handleClearFilters = () => {
    if (filterComponentRef.current) {
      filterComponentRef.current.resetFilters();
      setFilterOptions({
        accessType: null,
        docsCountFilter: {
          operator: null,
          value: null,
        },
        lastStatus: null,
      });
    }
  };

  if (ccPairsIndexingStatusesError) {
    return (
      <div className="text-error">
        {ccPairsIndexingStatusesError?.info?.detail ||
          "Error loading indexing status."}
      </div>
    );
  }

  return (
    <div>
      {/* Search bar and controls */}
      <SearchAndFilterControls
        searchQuery={searchQuery}
        onSearchChange={setSearchQuery}
        hasExpandedSources={hasExpandedSources}
        onExpandAll={expandAll}
        onCollapseAll={collapseAll}
        filterOptions={filterOptions}
        onFilterChange={handleFilterChange}
        resetPagination={resetPagination}
        onClearFilters={handleClearFilters}
        hasActiveFilters={hasActiveFilters}
        filterComponentRef={
          filterComponentRef as RefObject<{ resetFilters: () => void }>
        }
      />

      {/* Table component */}
      {isLoadingCcPairsIndexingStatuses ? (
        <div className="mt-12">
          <ConnectorStaggeredSkeleton rowCount={8} standalone={true} />
        </div>
      ) : !ccPairsIndexingStatuses || ccPairsIndexingStatuses.length === 0 ? (
        <div>
          <Spacer rem={3} />
          <Text as="p">
            {markdown(
              "It looks like you don't have any connectors setup yet. Visit the [Add Connector](/admin/add-connector) page to get started!"
            )}
          </Text>
        </div>
      ) : (
        <CCPairIndexingStatusTable
          ccPairsIndexingStatuses={ccPairsIndexingStatuses}
          connectorsToggled={connectorsToggled}
          toggleSource={toggleSource}
          onPageChange={handlePageChange}
          sourceLoadingStates={sourceLoadingStates}
        />
      )}
    </div>
  );
}

export default function Status() {
  useToastFromQuery({
    "connector-created": {
      message: "Connector created successfully",
      type: "success",
    },
  });

  return (
    <SettingsLayouts.Root width="full">
      <SettingsLayouts.Header
        icon={route.icon}
        title={route.title}
        rightChildren={
          <Button href="/admin/add-connector">Add Connector</Button>
        }
        separator
      />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/kg/KGEntityTypes.tsx
================================================
import { useEffect, useState } from "react";
import { SourceIcon } from "@/components/SourceIcon";
import Switch from "@/refresh-components/inputs/Switch";
import Link from "next/link";
import { EntityType, SourceAndEntityTypeView } from "@/app/admin/kg/interfaces";
import CollapsibleCard from "@/components/CollapsibleCard";
import { ValidSources } from "@/lib/types";
import { FaCircleQuestion } from "react-icons/fa6";
import { CheckmarkIcon } from "@/components/icons/icons";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { cn } from "@/lib/utils";

// Utility: Convert capitalized snake case to human readable case
function snakeToHumanReadable(str: string): string {
  return (
    str
      .toLowerCase()
      .replace(/_/g, " ")
      .replace(/\b\w/g, (match) => match.toUpperCase())
      // # TODO (@raunakab)
      // Special case to replace all instances of "Pr" with "PR".
      // This is a *dumb* implementation. If there exists a string that starts with "Pr" (e.g., "Prompt"),
      // then this line will stupidly convert it to "PRompt".
      // Fix this later (or if this becomes a problem lol).
      .replace("Pr", "PR")
  );
}

// Custom Header Component
function TableHeader() {
  return (
    <div className="grid grid-cols-12 gap-y-4 px-8 p-4 border-b bg-background-tint-00">
      <div className="col-span-1">
        <Text as="p">Entity Name</Text>
      </div>
      <div className="col-span-10">
        <Text as="p">Description</Text>
      </div>
      <div className="col-span-1 flex flex-1 justify-center">
        <Text as="p">Active</Text>
      </div>
    </div>
  );
}

// Custom Row Component
function TableRow({ entityType }: { entityType: EntityType }) {
  const [entityTypeState, setEntityTypeState] = useState(entityType);
  const [descriptionSavingState, setDescriptionSavingState] = useState<
    "saving" | "saved" | "failed" | undefined
  >(undefined);

  const [timer, setTimer] = useState<NodeJS.Timeout | null>(null);
  const [checkmarkVisible, setCheckmarkVisible] = useState(false);
  const [hasMounted, setHasMounted] = useState(false);

  const handleToggle = async (checked: boolean) => {
    const response = await fetch("/api/admin/kg/entity-types", {
      method: "PUT",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify([{ ...entityType, active: checked }]),
    });

    if (!response.ok) return;

    setEntityTypeState({ ...entityTypeState, active: checked });
  };

  const handleDescriptionChange = async (description: string) => {
    try {
      const response = await fetch("/api/admin/kg/entity-types", {
        method: "PUT",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify([{ ...entityType, description }]),
      });
      if (response.ok) {
        setDescriptionSavingState("saved");
        setCheckmarkVisible(true);
        setTimeout(() => setCheckmarkVisible(false), 1000);
      } else {
        setDescriptionSavingState("failed");
        setCheckmarkVisible(false);
      }
    } catch {
      setDescriptionSavingState("failed");
      setCheckmarkVisible(false);
    } finally {
      setTimeout(() => setDescriptionSavingState(undefined), 1000);
    }
  };

  useEffect(() => {
    if (!hasMounted) {
      setHasMounted(true);
      return;
    }
    if (timer) clearTimeout(timer);
    setTimer(
      setTimeout(() => {
        setDescriptionSavingState("saving");
        setCheckmarkVisible(false);
        setTimer(
          setTimeout(
            () => handleDescriptionChange(entityTypeState.description),
            500
          )
        );
      }, 1000)
    );
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [entityTypeState.description]);

  return (
    <div className="bg-background-tint-00">
      <div className="grid grid-cols-12 px-8 py-4">
        <div
          className={cn(
            "grid grid-cols-11 col-span-11 transition-opacity duration-150 ease-in-out",
            !entityTypeState.active && "opacity-60"
          )}
        >
          <div className="col-span-1 flex items-center">
            <Text as="p">{snakeToHumanReadable(entityType.name)}</Text>
          </div>
          <div className="col-span-10 relative">
            <InputTypeIn
              placeholder="Value"
              variant={!entityTypeState.active ? "disabled" : undefined}
              className="w-full px-3 py-2 border"
              defaultValue={entityType.description}
              onChange={(e) =>
                setEntityTypeState({
                  ...entityTypeState,
                  description: e.target.value,
                })
              }
              onKeyDown={async (e) => {
                if (e.key === "Enter") {
                  e.preventDefault();
                  if (timer) {
                    clearTimeout(timer);
                    setTimer(null);
                  }
                  setDescriptionSavingState("saving");
                  setCheckmarkVisible(false);
                  await handleDescriptionChange(
                    (e.target as HTMLInputElement).value
                  );
                }
              }}
            />
            <span
              className="absolute right-3 top-1/2 -translate-y-1/2 w-5 h-5"
              style={{ pointerEvents: "none" }}
            >
              <span
                className={cn(
                  "absolute inset-0 flex items-center justify-center transition-opacity duration-400 ease-in-out",
                  descriptionSavingState === "saving" && hasMounted
                    ? "opacity-100"
                    : "opacity-0"
                )}
                style={{ zIndex: 1 }}
              >
                <span className="inline-block w-4 h-4 align-middle border-2 border-theme-primary-04 border-t-transparent rounded-full animate-spin" />
              </span>
              <span
                className={cn(
                  "absolute inset-0 flex items-center justify-center transition-opacity duration-400 ease-in-out",
                  checkmarkVisible ? "opacity-100" : "opacity-0"
                )}
                style={{ zIndex: 2 }}
              >
                <CheckmarkIcon size={16} className="text-status-success-05" />
              </span>
            </span>
          </div>
        </div>
        <div className="grid col-span-1 items-center justify-center">
          <Switch
            checked={entityTypeState.active}
            onCheckedChange={handleToggle}
          />
        </div>
      </div>
    </div>
  );
}

interface KGEntityTypesProps {
  sourceAndEntityTypes: SourceAndEntityTypeView;
}

export default function KGEntityTypes({
  sourceAndEntityTypes,
}: KGEntityTypesProps) {
  // State to control open/close of all CollapsibleCards
  const [openCards, setOpenCards] = useState<{ [key: string]: boolean }>({});
  // State for search query
  const [search, setSearch] = useState("");

  // Initialize openCards state when data changes
  useEffect(() => {
    const initialState: { [key: string]: boolean } = {};
    Object.keys(sourceAndEntityTypes.entity_types).forEach((key) => {
      initialState[key] = true;
    });
    setOpenCards(initialState);
  }, [sourceAndEntityTypes]);

  // Handlers for expand/collapse all
  const handleExpandAll = () => {
    const newState: { [key: string]: boolean } = {};
    Object.keys(sourceAndEntityTypes.entity_types).forEach((key) => {
      newState[key] = true;
    });
    setOpenCards(newState);
  };
  const handleCollapseAll = () => {
    const newState: { [key: string]: boolean } = {};
    Object.keys(sourceAndEntityTypes.entity_types).forEach((key) => {
      newState[key] = false;
    });
    setOpenCards(newState);
  };

  // Determine if all cards are closed
  const allClosed = Object.values(openCards).every((v) => v === false);

  return (
    <div className="flex flex-col gap-y-4 w-full">
      <div className="flex flex-row items-center gap-x-1.5 mb-2">
        <InputTypeIn
          placeholder="Search source type..."
          value={search}
          onChange={(event) => setSearch(event.target.value)}
        />
        <Button onClick={allClosed ? handleExpandAll : handleCollapseAll}>
          {allClosed ? "Expand All" : "Collapse All"}
        </Button>
      </div>
      <div className="flex flex-col gap-y-4 w-full">
        {Object.entries(sourceAndEntityTypes.entity_types).length === 0 ? (
          <div className="flex flex-col gap-y-4">
            <Text as="p" text02>
              No results available.
            </Text>
            <Text as="p" text02>
              To configure Knowledge Graph, first connect some{" "}
              <Link
                href="/admin/add-connector"
                className="underline text-action-link-01"
              >
                Connectors.
              </Link>
            </Text>
          </div>
        ) : (
          Object.entries(sourceAndEntityTypes.entity_types)
            .filter(([key]) =>
              snakeToHumanReadable(key)
                .toLowerCase()
                .includes(search.toLowerCase())
            )
            .sort(([keyA], [keyB]) => keyA.localeCompare(keyB))
            .map(([key, entityTypesArr]) => {
              const stats = sourceAndEntityTypes.source_statistics[key] ?? {
                source_name: key,
                last_updated: undefined,
                entities_count: 0,
              };
              return (
                <div key={key}>
                  <CollapsibleCard
                    className="focus:outline-none focus-visible:outline-none outline-none"
                    header={
                      <span className="font-semibold text-lg flex flex-row gap-x-4 items-center">
                        {Object.values(ValidSources).includes(
                          key as ValidSources
                        ) ? (
                          <SourceIcon
                            sourceType={key as ValidSources}
                            iconSize={25}
                          />
                        ) : (
                          <FaCircleQuestion size={25} />
                        )}
                        {snakeToHumanReadable(key)}
                        <span className="ml-auto flex flex-row gap-x-16 items-center pr-16">
                          <span className="flex flex-col items-start">
                            <Text as="p" secondaryBody text02>
                              Entities Count
                            </Text>
                            <Text as="p">{stats.entities_count}</Text>
                          </span>
                          <span className="flex flex-col items-start">
                            <Text as="p" secondaryBody text02>
                              Last Updated
                            </Text>
                            <Text as="p">
                              {stats.last_updated
                                ? new Date(stats.last_updated).toLocaleString()
                                : "N/A"}
                            </Text>
                          </span>
                        </span>
                      </span>
                    }
                    // Use a key that changes with openCards[key] to force remount and update defaultOpen
                    key={`${key}-${openCards[key]}`}
                    defaultOpen={
                      openCards[key] !== undefined ? openCards[key] : true
                    }
                  >
                    <div className="w-full">
                      <TableHeader />
                      {entityTypesArr.map(
                        (entityType: EntityType, index: number) => (
                          <TableRow key={index} entityType={entityType} />
                        )
                      )}
                    </div>
                  </CollapsibleCard>
                </div>
              );
            })
        )}
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/admin/kg/interfaces.ts
================================================
export type KGConfig = {
  enabled: boolean;
  vendor?: string | null;
  vendor_domains?: string[] | null;
  ignore_domains?: string[] | null;
  coverage_start: Date;
};

export type KGConfigRaw = {
  enabled: boolean;
  vendor?: string | null;
  vendor_domains?: string[] | null;
  ignore_domains?: string[] | null;
  coverage_start: string;
};

export type EntityTypeValues = { [key: string]: EntityType };

export type SourceAndEntityTypeView = {
  source_statistics: Record<string, SourceStatistics>;
  entity_types: Record<string, EntityType[]>;
};

export type SourceStatistics = {
  source_name: string;
  last_updated: string;
  entities_count: number;
};

export type EntityType = {
  name: string;
  description: string;
  active: boolean;
  grounded_source_name: string;
};


================================================
FILE: web/src/app/admin/kg/page.tsx
================================================
"use client";

import CardSection from "@/components/admin/CardSection";
import {
  DatePickerField,
  FieldLabel,
  TextArrayField,
  TextFormField,
} from "@/components/Field";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import SwitchField from "@/refresh-components/form/SwitchField";
import { Form, Formik, FormikState, useFormikContext } from "formik";
import { useState } from "react";
import * as Yup from "yup";
import {
  KGConfig,
  KGConfigRaw,
  SourceAndEntityTypeView,
} from "@/app/admin/kg/interfaces";
import { sanitizeKGConfig } from "@/app/admin/kg/utils";
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";
import { toast } from "@/hooks/useToast";
import Title from "@/components/ui/title";
import { redirect } from "next/navigation";
import { useIsKGExposed } from "@/app/admin/kg/utils";
import KGEntityTypes from "@/app/admin/kg/KGEntityTypes";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import { SvgSettings } from "@opal/icons";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.KNOWLEDGE_GRAPH;

function createDomainField(
  name: string,
  label: string,
  subtext: string,
  placeholder: string,
  minFields?: number
) {
  return function DomainFields({ disabled = false }: { disabled?: boolean }) {
    const { values } = useFormikContext<any>();

    return (
      <TextArrayField
        name={name}
        label={label}
        subtext={subtext}
        placeholder={placeholder}
        minFields={minFields}
        values={values}
        disabled={disabled}
      />
    );
  };
}

const VendorDomains = createDomainField(
  "vendor_domains",
  "Vendor Domains",
  "Domain names of your company. Users with these email domains will be recognized as employees.",
  "Domain",
  1
);

const IgnoreDomains = createDomainField(
  "ignore_domains",
  "Ignore Domains",
  "Domain names to ignore. Users with these email domains will be excluded from the Knowledge Graph.",
  "Domain"
);

function KGConfiguration({
  kgConfig,
  onSubmitSuccess,
  entityTypesMutate,
}: {
  kgConfig: KGConfig;
  onSubmitSuccess?: () => void;
  entityTypesMutate?: () => void;
}) {
  const initialValues: KGConfig = {
    enabled: kgConfig.enabled,
    vendor: kgConfig.vendor ?? "",
    vendor_domains:
      (kgConfig.vendor_domains?.length ?? 0) > 0
        ? kgConfig.vendor_domains
        : [""],
    ignore_domains: kgConfig.ignore_domains ?? [],
    coverage_start: kgConfig.coverage_start,
  };

  const enabledSchema = Yup.object({
    enabled: Yup.boolean().required(),
    vendor: Yup.string().required("Vendor is required."),
    vendor_domains: Yup.array(
      Yup.string().required("Vendor Domain is required.")
    )
      .min(1)
      .required(),
    ignore_domains: Yup.array(
      Yup.string().required("Ignore Domain is required")
    )
      .min(0)
      .required(),
    coverage_start: Yup.date().nullable(),
  });

  const disabledSchema = Yup.object({
    enabled: Yup.boolean().required(),
  });

  const validationSchema = Yup.lazy((values) =>
    values.enabled ? enabledSchema : disabledSchema
  );

  const onSubmit = async (
    values: KGConfig,
    {
      resetForm,
    }: {
      resetForm: (nextState?: Partial<FormikState<KGConfig>>) => void;
    }
  ) => {
    const { enabled, ...enableRequest } = values;
    const body = enabled ? enableRequest : {};

    const response = await fetch("/api/admin/kg/config", {
      method: "PUT",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify(body),
    });

    if (!response.ok) {
      const errorMsg = (await response.json()).detail;
      console.warn({ errorMsg });
      toast.error("Failed to configure Knowledge Graph.");
      return;
    }

    toast.success("Successfully configured Knowledge Graph.");
    resetForm({ values });
    onSubmitSuccess?.();

    // Refresh entity types if KG was enabled
    if (enabled && entityTypesMutate) {
      entityTypesMutate();
    }
  };

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      onSubmit={onSubmit}
    >
      {(props) => (
        <Form>
          <div className="flex flex-col gap-y-6 w-full">
            <div className="flex flex-col gap-y-1">
              <FieldLabel
                name="enabled"
                label="Enabled"
                subtext="Enable or disable Knowledge Graph."
              />
              <SwitchField
                name="enabled"
                onCheckedChange={(state) => {
                  if (!state) props.resetForm();
                }}
              />
            </div>
            <div
              className={cn(
                "flex flex-col gap-y-6",
                !props.values.enabled && "opacity-50"
              )}
            >
              <TextFormField
                name="vendor"
                label="Vendor"
                subtext="Your company name."
                className="flex flex-row flex-1 w-full"
                placeholder="My Company Inc."
                disabled={!props.values.enabled}
              />
              <VendorDomains disabled={!props.values.enabled} />
              <IgnoreDomains disabled={!props.values.enabled} />
              <DatePickerField
                name="coverage_start"
                label="Coverage Start"
                subtext="The start date of coverage for Knowledge Graph."
                startYear={2025} // TODO: remove this after public beta
                disabled={!props.values.enabled}
              />
            </div>
            <Button disabled={!props.dirty} type="submit">
              Submit
            </Button>
          </div>
        </Form>
      )}
    </Formik>
  );
}

function Main() {
  // Data:
  const {
    data: configData,
    isLoading: configIsLoading,
    mutate: configMutate,
  } = useSWR<KGConfigRaw>(SWR_KEYS.kgConfig, errorHandlingFetcher);
  const {
    data: sourceAndEntityTypesData,
    isLoading: entityTypesIsLoading,
    mutate: entityTypesMutate,
  } = useSWR<SourceAndEntityTypeView>(
    SWR_KEYS.kgEntityTypes,
    errorHandlingFetcher
  );

  // Local State:
  const [configureModalShown, setConfigureModalShown] = useState(false);

  if (
    configIsLoading ||
    entityTypesIsLoading ||
    !configData ||
    !sourceAndEntityTypesData
  ) {
    return <></>;
  }

  const kgConfig = sanitizeKGConfig(configData);

  return (
    <div className="flex flex-col py-4 gap-y-8">
      <CardSection className="max-w-2xl shadow-01 rounded-08 flex flex-col gap-2">
        <Text as="p" headingH2>
          Knowledge Graph Configuration (Private Beta)
        </Text>
        <div className="flex flex-col gap-y-6">
          <div>
            <Text as="p" text03>
              The Knowledge Graph feature lets you explore your data in new
              ways. Instead of searching through unstructured text, your data is
              organized as entities and their relationships, enabling powerful
              queries like:
            </Text>
            <div className="p-4">
              <Text as="p" text03>
                - &quot;Summarize my last 3 calls with account XYZ&quot;
              </Text>
              <Text as="p" text03>
                - &quot;How many open Jiras are assigned to John Smith, ranked
                by priority&quot;
              </Text>
            </div>
            <Text as="p" text03>
              (To use Knowledge Graph queries, you&apos;ll need a dedicated
              Assistant configured in a specific way. Please contact the Onyx
              team for setup instructions.)
            </Text>
          </div>
          <Text as="p" text03>
            <Title>Getting Started:</Title>
            Begin by configuring some high-level attributes, and then define the
            entities you want to model afterwards.
          </Text>
          <Button
            icon={SvgSettings}
            onClick={() => setConfigureModalShown(true)}
          >
            Configure Knowledge Graph
          </Button>
        </div>
      </CardSection>
      {kgConfig.enabled && (
        <>
          <Text as="p" headingH2>
            Entity Types
          </Text>
          <KGEntityTypes sourceAndEntityTypes={sourceAndEntityTypesData} />
        </>
      )}
      {configureModalShown && (
        <Modal open onOpenChange={() => setConfigureModalShown(false)}>
          <Modal.Content>
            <Modal.Header
              icon={SvgSettings}
              title="Configure Knowledge Graph"
              onClose={() => setConfigureModalShown(false)}
            />
            <Modal.Body>
              <KGConfiguration
                kgConfig={kgConfig}
                onSubmitSuccess={async () => {
                  await configMutate();
                  setConfigureModalShown(false);
                }}
                entityTypesMutate={entityTypesMutate}
              />
            </Modal.Body>
          </Modal.Content>
        </Modal>
      )}
    </div>
  );
}

export default function Page() {
  const { kgExposed, isLoading } = useIsKGExposed();

  if (isLoading) {
    return <></>;
  }

  if (!kgExposed) {
    redirect("/");
  }

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/kg/utils.ts
================================================
import { useUser } from "@/providers/UserProvider";
import { errorHandlingFetcher } from "@/lib/fetcher";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { KGConfig, KGConfigRaw } from "./interfaces";

export type KgExposedStatus = { kgExposed: boolean; isLoading: boolean };

export function useIsKGExposed(): KgExposedStatus {
  const { isAdmin } = useUser();
  const { data: kgExposedRaw, isLoading } = useSWR<boolean>(
    isAdmin ? SWR_KEYS.kgExposed : null,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      revalidateOnReconnect: false,
    }
  );
  return { kgExposed: kgExposedRaw ?? false, isLoading };
}

export function sanitizeKGConfig(raw: KGConfigRaw): KGConfig {
  const coverage_start = new Date(raw.coverage_start);

  return {
    ...raw,
    coverage_start,
  };
}


================================================
FILE: web/src/app/admin/layout.tsx
================================================
import Layout from "@/components/admin/Layout";

export interface AdminLayoutProps {
  children: React.ReactNode;
}

export default async function AdminLayout({ children }: AdminLayoutProps) {
  return await Layout({ children });
}


================================================
FILE: web/src/app/admin/scim/ScimModal.tsx
================================================
import { SvgDownload, SvgKey, SvgRefreshCw } from "@opal/icons";
import { Interactive, Hoverable } from "@opal/core";
import { Section } from "@/layouts/general-layouts";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import InputTextArea from "@/refresh-components/inputs/InputTextArea";
import Modal, { BasicModalFooter } from "@/refresh-components/Modal";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { toast } from "@/hooks/useToast";
import { downloadFile } from "@/lib/download";

import type { ScimModalView } from "./interfaces";

// ---------------------------------------------------------------------------
// Props
// ---------------------------------------------------------------------------

interface ScimModalProps {
  view: ScimModalView;
  isSubmitting: boolean;
  onRegenerate: () => void;
  onClose: () => void;
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

async function copyToClipboard(text: string) {
  try {
    await navigator.clipboard.writeText(text);
    toast.success("Token copied to clipboard");
  } catch {
    toast.error("Failed to copy token");
  }
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

export default function ScimModal({
  view,
  isSubmitting,
  onRegenerate,
  onClose,
}: ScimModalProps) {
  switch (view.kind) {
    case "regenerate":
      return (
        <ConfirmationModalLayout
          icon={SvgRefreshCw}
          title="Regenerate SCIM Token"
          onClose={onClose}
          submit={
            <Button
              disabled={isSubmitting}
              variant="danger"
              onClick={onRegenerate}
            >
              Regenerate Token
            </Button>
          }
        >
          <Section alignItems="start" gap={0.5}>
            <Text as="p" text03>
              Your current SCIM token will be revoked and a new token will be
              generated. You will need to update the token on your identity
              provider before SCIM provisioning will resume.
            </Text>
          </Section>
        </ConfirmationModalLayout>
      );

    case "token":
      return (
        <Modal open onOpenChange={(open) => !open && onClose()}>
          <Modal.Content width="sm">
            <Modal.Header
              icon={SvgKey}
              title="SCIM Token"
              description="Save this key before continuing. It won't be shown again."
              onClose={onClose}
            />
            <Modal.Body>
              <Hoverable.Root group="token">
                <Interactive.Stateless
                  onClick={() => copyToClipboard(view.rawToken)}
                >
                  <InputTextArea
                    value={view.rawToken}
                    readOnly
                    autoResize
                    resizable={false}
                    rows={2}
                    className="font-main-ui-mono break-all cursor-pointer [&_textarea]:cursor-pointer"
                    rightSection={
                      <div onClick={(e) => e.stopPropagation()}>
                        <Hoverable.Item
                          group="token"
                          variant="opacity-on-hover"
                        >
                          <CopyIconButton getCopyText={() => view.rawToken} />
                        </Hoverable.Item>
                      </div>
                    }
                  />
                </Interactive.Stateless>
              </Hoverable.Root>
            </Modal.Body>
            <Modal.Footer>
              <BasicModalFooter
                left={
                  <Button
                    prominence="secondary"
                    icon={SvgDownload}
                    onClick={() =>
                      downloadFile(`onyx-scim-token-${Date.now()}.txt`, {
                        content: view.rawToken,
                      })
                    }
                  >
                    Download
                  </Button>
                }
                submit={
                  <Button
                    autoFocus
                    onClick={() => copyToClipboard(view.rawToken)}
                  >
                    Copy Token
                  </Button>
                }
              />
            </Modal.Footer>
          </Modal.Content>
        </Modal>
      );
  }
}


================================================
FILE: web/src/app/admin/scim/ScimSyncCard.tsx
================================================
import { SvgCheckCircle, SvgClock, SvgKey, SvgRefreshCw } from "@opal/icons";
import { ContentAction } from "@opal/layouts";
import { Section } from "@/layouts/general-layouts";
import Card from "@/refresh-components/cards/Card";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import Separator from "@/refresh-components/Separator";
import { timeAgo } from "@/lib/time";

// ---------------------------------------------------------------------------
// Props
// ---------------------------------------------------------------------------

interface ScimSyncCardProps {
  hasToken: boolean;
  isConnected: boolean;
  lastUsedAt: string | null;
  idpDomain: string | null;
  isSubmitting: boolean;
  onGenerate: () => void;
  onRegenerate: () => void;
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

export default function ScimSyncCard({
  hasToken,
  isConnected,
  lastUsedAt,
  idpDomain,
  isSubmitting,
  onGenerate,
  onRegenerate,
}: ScimSyncCardProps) {
  return (
    <Card gap={0.75}>
      <ContentAction
        title="SCIM Sync"
        description="Connect your identity provider to import and sync users and groups."
        sizePreset="main-ui"
        variant="section"
        paddingVariant="fit"
        rightChildren={
          hasToken ? (
            <Button
              variant="danger"
              prominence="secondary"
              onClick={onRegenerate}
              icon={SvgRefreshCw}
            >
              Regenerate Token
            </Button>
          ) : (
            <Button
              disabled={isSubmitting}
              rightIcon={SvgKey}
              onClick={onGenerate}
            >
              Generate SCIM Token
            </Button>
          )
        }
      />

      {hasToken && (
        <>
          <Separator noPadding />

          <Section
            flexDirection="row"
            justifyContent="between"
            alignItems="end"
            gap={1}
          >
            <Section alignItems="start" gap={0} width="fit">
              {isConnected ? (
                <SvgCheckCircle size={15} className="text-status-success-05" />
              ) : (
                <SvgClock size={15} className="text-theme-amber-05" />
              )}
              <Text as="p" mainUiBody text04>
                {isConnected ? "Connected" : "Waiting for Connection"}
              </Text>
            </Section>

            <Section alignItems="end" gap={0} width="fit">
              {isConnected ? (
                <>
                  {idpDomain && (
                    <Text as="p" secondaryAction text03>
                      {idpDomain}
                    </Text>
                  )}
                  <Text as="p" secondaryBody text03>
                    {timeAgo(lastUsedAt)}
                  </Text>
                </>
              ) : (
                <Text
                  as="p"
                  secondaryBody
                  text03
                  className="max-w-[240px] text-right"
                >
                  Provide the SCIM key to your identity provider to begin
                  syncing users and groups.
                </Text>
              )}
            </Section>
          </Section>
        </>
      )}
    </Card>
  );
}


================================================
FILE: web/src/app/admin/scim/interfaces.ts
================================================
export interface ScimTokenResponse {
  id: number;
  name: string;
  token_display: string;
  is_active: boolean;
  created_at: string;
  last_used_at: string | null;
  idp_domain: string | null;
}

export interface ScimTokenCreatedResponse extends ScimTokenResponse {
  raw_token: string;
}

export type ScimModalView =
  | { kind: "regenerate" }
  | { kind: "token"; rawToken: string };


================================================
FILE: web/src/app/admin/scim/page.tsx
================================================
"use client";

import { useState } from "react";

import { SvgUserSync } from "@opal/icons";
import { toast } from "@/hooks/useToast";
import { useScimToken } from "@/hooks/useScimToken";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import Text from "@/refresh-components/texts/Text";
import { ThreeDotsLoader } from "@/components/Loading";

import type { ScimTokenCreatedResponse, ScimModalView } from "./interfaces";
import { generateScimToken } from "./svc";
import ScimSyncCard from "./ScimSyncCard";
import ScimModal from "./ScimModal";

// ---------------------------------------------------------------------------
// SCIM Content
// ---------------------------------------------------------------------------

function ScimContent() {
  const { data: token, error: tokenError, isLoading, mutate } = useScimToken();

  const modal = useCreateModal();

  const [modalView, setModalView] = useState<ScimModalView | null>(null);
  const [isSubmitting, setIsSubmitting] = useState(false);

  const hasToken = !!token;
  const isConnected = hasToken && token.last_used_at !== null;

  // 404 means no active token — not an error
  const is404 =
    tokenError &&
    typeof tokenError === "object" &&
    "status" in tokenError &&
    (tokenError as { status: number }).status === 404;

  if (isLoading) {
    return <ThreeDotsLoader />;
  }

  if (tokenError && !is404) {
    return (
      <Text as="p" text03>
        Failed to load SCIM token status.
      </Text>
    );
  }

  // -----------------------------------------------------------------------
  // Handlers
  // -----------------------------------------------------------------------

  function openModal(view: ScimModalView) {
    setModalView(view);
    modal.toggle(true);
  }

  function closeModal() {
    modal.toggle(false);
    setModalView(null);
  }

  async function handleCreateToken() {
    setIsSubmitting(true);
    try {
      const response = await generateScimToken("default");
      if (!response.ok) {
        let detail: string;
        try {
          const body = await response.clone().json();
          detail = body.detail ?? JSON.stringify(body);
        } catch {
          detail = await response.text();
        }
        toast.error(`Failed to generate token: ${detail}`);
        return;
      }
      const created: ScimTokenCreatedResponse = await response.json();
      await mutate();
      openModal({ kind: "token", rawToken: created.raw_token });
      if (hasToken) toast.success("Token regenerated");
    } catch {
      toast.error("Something went wrong. Please try again.");
    } finally {
      setIsSubmitting(false);
    }
  }

  // -----------------------------------------------------------------------
  // Render
  // -----------------------------------------------------------------------

  return (
    <>
      <ScimSyncCard
        hasToken={hasToken}
        isConnected={isConnected}
        lastUsedAt={token?.last_used_at ?? null}
        idpDomain={token?.idp_domain ?? null}
        isSubmitting={isSubmitting}
        onGenerate={handleCreateToken}
        onRegenerate={() => openModal({ kind: "regenerate" })}
      />

      {modal.isOpen && modalView && (
        <modal.Provider>
          <ScimModal
            view={modalView}
            isSubmitting={isSubmitting}
            onRegenerate={handleCreateToken}
            onClose={closeModal}
          />
        </modal.Provider>
      )}
    </>
  );
}

// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={SvgUserSync}
        title="SCIM"
        description="Sync users and groups via System for Cross-domain Identity Management (SCIM) protocol."
        separator
      />
      <SettingsLayouts.Body>
        <ScimContent />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/scim/svc.ts
================================================
export async function generateScimToken(name: string) {
  return fetch("/api/admin/enterprise-settings/scim/token", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ name }),
  });
}


================================================
FILE: web/src/app/admin/service-accounts/page.tsx
================================================
export { default } from "@/refresh-pages/admin/ServiceAccountsPage";


================================================
FILE: web/src/app/admin/systeminfo/page.tsx
================================================
import { NotebookIcon } from "@/components/icons/icons";
import { getWebVersion, getBackendVersion } from "@/lib/version";

const Page = async () => {
  let web_version: string | null = null;
  let backend_version: string | null = null;
  try {
    [web_version, backend_version] = await Promise.all([
      getWebVersion(),
      getBackendVersion(),
    ]);
  } catch (e) {
    console.log(`Version info fetch failed for system info page - ${e}`);
  }

  return (
    <div>
      <div className="border-solid border-background-600 border-b pb-2 mb-4 flex">
        <NotebookIcon size={32} />
        <h1 className="text-3xl font-bold pl-2">Version</h1>
      </div>

      <div>
        <div className="flex mb-2">
          <p className="my-auto mr-1">Backend Version: </p>
          <p className="text-base my-auto text-slate-400 italic">
            {backend_version}
          </p>
        </div>
        <div className="flex mb-2">
          <p className="my-auto mr-1">Web Version: </p>
          <p className="text-base my-auto text-slate-400 italic">
            {web_version}
          </p>
        </div>
      </div>
    </div>
  );
};

export default Page;


================================================
FILE: web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx
================================================
"use client";

import * as Yup from "yup";
import { Button } from "@opal/components";
import { useEffect, useState } from "react";
import Modal from "@/refresh-components/Modal";
import { Form, Formik } from "formik";
import { SelectorFormField, TextFormField } from "@/components/Field";
import { UserGroup } from "@/lib/types";
import { Scope } from "./types";
import { toast } from "@/hooks/useToast";
import { SvgSettings } from "@opal/icons";
interface CreateRateLimitModalProps {
  isOpen: boolean;
  setIsOpen: (isOpen: boolean) => void;
  onSubmit: (
    target_scope: Scope,
    period_hours: number,
    token_budget: number,
    group_id: number
  ) => void;
  forSpecificScope?: Scope;
  forSpecificUserGroup?: number;
}

export default function CreateRateLimitModal({
  isOpen,
  setIsOpen,
  onSubmit,
  forSpecificScope,
  forSpecificUserGroup,
}: CreateRateLimitModalProps) {
  const [modalUserGroups, setModalUserGroups] = useState([]);
  const [shouldFetchUserGroups, setShouldFetchUserGroups] = useState(
    forSpecificScope === Scope.USER_GROUP
  );

  useEffect(() => {
    const fetchData = async () => {
      try {
        const response = await fetch("/api/manage/admin/user-group");
        const data = await response.json();
        const options = data.map((userGroup: UserGroup) => ({
          name: userGroup.name,
          value: userGroup.id,
        }));
        setModalUserGroups(options);
        setShouldFetchUserGroups(false);
      } catch (error) {
        toast.error(`Failed to fetch user groups: ${error}`);
      }
    };

    if (shouldFetchUserGroups) {
      fetchData();
    }
  }, [shouldFetchUserGroups]);

  return (
    <Modal open={isOpen} onOpenChange={() => setIsOpen(false)}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header
          icon={SvgSettings}
          title="Create a Token Rate Limit"
          onClose={() => setIsOpen(false)}
        />
        <Modal.Body>
          <Formik
            initialValues={{
              enabled: true,
              period_hours: "",
              token_budget: "",
              target_scope: forSpecificScope || Scope.GLOBAL,
              user_group_id: forSpecificUserGroup,
            }}
            validationSchema={Yup.object().shape({
              period_hours: Yup.number()
                .required("Time Window is a required field")
                .min(1, "Time Window must be at least 1 hour"),
              token_budget: Yup.number()
                .required("Token Budget is a required field")
                .min(1, "Token Budget must be at least 1"),
              target_scope: Yup.string().required(
                "Target Scope is a required field"
              ),
              user_group_id: Yup.string().test(
                "user_group_id",
                "User Group is a required field",
                (value, context) => {
                  return (
                    context.parent.target_scope !== "user_group" ||
                    (context.parent.target_scope === "user_group" &&
                      value !== undefined)
                  );
                }
              ),
            })}
            onSubmit={async (values, formikHelpers) => {
              formikHelpers.setSubmitting(true);
              onSubmit(
                values.target_scope,
                Number(values.period_hours),
                Number(values.token_budget),
                Number(values.user_group_id)
              );
              return formikHelpers.setSubmitting(false);
            }}
          >
            {({ isSubmitting, values, setFieldValue }) => (
              <Form className="overflow-visible px-2">
                {!forSpecificScope && (
                  <SelectorFormField
                    name="target_scope"
                    label="Target Scope"
                    options={[
                      { name: "Global", value: Scope.GLOBAL },
                      { name: "User", value: Scope.USER },
                      { name: "User Group", value: Scope.USER_GROUP },
                    ]}
                    includeDefault={false}
                    onSelect={(selected) => {
                      setFieldValue("target_scope", selected);
                      if (selected === Scope.USER_GROUP) {
                        setShouldFetchUserGroups(true);
                      }
                    }}
                  />
                )}
                {forSpecificUserGroup === undefined &&
                  values.target_scope === Scope.USER_GROUP && (
                    <SelectorFormField
                      name="user_group_id"
                      label="User Group"
                      options={modalUserGroups}
                      includeDefault={false}
                    />
                  )}
                <TextFormField
                  name="period_hours"
                  label="Time Window (Hours)"
                  type="number"
                  placeholder=""
                />
                <TextFormField
                  name="token_budget"
                  label="Token Budget (Thousands)"
                  type="number"
                  placeholder=""
                />
                <Button disabled={isSubmitting} type="submit">
                  Create
                </Button>
              </Form>
            )}
          </Formik>
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx
================================================
"use client";

import {
  Table,
  TableHead,
  TableRow,
  TableBody,
  TableCell,
} from "@/components/ui/table";
import Title from "@/components/ui/title";
import { DeleteButton } from "@/components/DeleteButton";
import { deleteTokenRateLimit, updateTokenRateLimit } from "./lib";
import { ThreeDotsLoader } from "@/components/Loading";
import { TokenRateLimitDisplay } from "./types";
import { errorHandlingFetcher } from "@/lib/fetcher";
import useSWR, { mutate } from "swr";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import { TableHeader } from "@/components/ui/table";
import { Text } from "@opal/components";
import Spacer from "@/refresh-components/Spacer";

type TokenRateLimitTableArgs = {
  tokenRateLimits: TokenRateLimitDisplay[];
  title?: string;
  description?: string;
  fetchUrl: string;
  hideHeading?: boolean;
  isAdmin: boolean;
};

export const TokenRateLimitTable = ({
  tokenRateLimits,
  title,
  description,
  fetchUrl,
  hideHeading,
  isAdmin,
}: TokenRateLimitTableArgs) => {
  const shouldRenderGroupName = () =>
    tokenRateLimits.length > 0 &&
    tokenRateLimits[0] !== undefined &&
    tokenRateLimits[0].group_name !== undefined;

  const handleEnabledChange = (id: number) => {
    const tokenRateLimit = tokenRateLimits.find(
      (tokenRateLimit) => tokenRateLimit.token_id === id
    );

    if (!tokenRateLimit) {
      return;
    }

    updateTokenRateLimit(id, {
      token_budget: tokenRateLimit.token_budget,
      period_hours: tokenRateLimit.period_hours,
      enabled: !tokenRateLimit.enabled,
    }).then(() => {
      mutate(fetchUrl);
    });
  };

  const handleDelete = (id: number) =>
    deleteTokenRateLimit(id).then(() => {
      mutate(fetchUrl);
    });

  if (tokenRateLimits.length === 0) {
    return (
      <div className="w-full">
        {!hideHeading && title && <Title>{title}</Title>}
        {!hideHeading && description && (
          <>
            <Spacer rem={0.5} />
            <Text as="p">{description}</Text>
            <Spacer rem={0.5} />
          </>
        )}
        {!hideHeading && <Spacer rem={2} />}
        <Text as="p">No token rate limits set!</Text>
        {!hideHeading && <Spacer rem={2} />}
      </div>
    );
  }

  return (
    <div className="w-full">
      {!hideHeading && title && <Title>{title}</Title>}
      {!hideHeading && description && (
        <>
          <Spacer rem={0.5} />
          <Text as="p">{description}</Text>
          <Spacer rem={0.5} />
        </>
      )}
      <Table
        className={`overflow-visible ${
          !hideHeading && "my-8"
        } [&_td]:text-center [&_th]:text-center`}
      >
        <TableHeader>
          <TableRow>
            <TableHead>Enabled</TableHead>
            {shouldRenderGroupName() && <TableHead>Group Name</TableHead>}
            <TableHead>Time Window (Hours)</TableHead>
            <TableHead>Token Budget (Thousands)</TableHead>
            {isAdmin && <TableHead>Delete</TableHead>}
          </TableRow>
        </TableHeader>
        <TableBody>
          {tokenRateLimits.map((tokenRateLimit) => {
            return (
              <TableRow key={tokenRateLimit.token_id}>
                <TableCell>
                  <div className="flex justify-center">
                    <div
                      onClick={
                        isAdmin
                          ? () => handleEnabledChange(tokenRateLimit.token_id)
                          : undefined
                      }
                      className={`px-1 py-0.5 rounded select-none w-24 ${
                        isAdmin
                          ? "hover:bg-accent-background cursor-pointer"
                          : "opacity-50"
                      }`}
                    >
                      <div className="flex items-center justify-center">
                        <Checkbox
                          checked={tokenRateLimit.enabled}
                          onCheckedChange={
                            isAdmin
                              ? () =>
                                  handleEnabledChange(tokenRateLimit.token_id)
                              : undefined
                          }
                        />
                        <p className="ml-2">
                          {tokenRateLimit.enabled ? "Enabled" : "Disabled"}
                        </p>
                      </div>
                    </div>
                  </div>
                </TableCell>
                {shouldRenderGroupName() && (
                  <TableCell className="font-bold text-text-darker">
                    {tokenRateLimit.group_name}
                  </TableCell>
                )}
                <TableCell>
                  {tokenRateLimit.period_hours +
                    " hour" +
                    (tokenRateLimit.period_hours > 1 ? "s" : "")}
                </TableCell>
                <TableCell>
                  {tokenRateLimit.token_budget + " thousand tokens"}
                </TableCell>
                {isAdmin && (
                  <TableCell>
                    <div className="flex justify-center">
                      <DeleteButton
                        onClick={() => handleDelete(tokenRateLimit.token_id)}
                      />
                    </div>
                  </TableCell>
                )}
              </TableRow>
            );
          })}
        </TableBody>
      </Table>
    </div>
  );
};

export const GenericTokenRateLimitTable = ({
  fetchUrl,
  title,
  description,
  hideHeading,
  responseMapper,
  isAdmin = true,
}: {
  fetchUrl: string;
  title?: string;
  description?: string;
  hideHeading?: boolean;
  responseMapper?: (data: any) => TokenRateLimitDisplay[];
  isAdmin?: boolean;
}) => {
  const { data, isLoading, error } = useSWR<TokenRateLimitDisplay[]>(
    fetchUrl,
    errorHandlingFetcher
  );

  if (isLoading) {
    return <ThreeDotsLoader />;
  }

  if (!isLoading && error) {
    return <Text as="p">Failed to load token rate limits</Text>;
  }

  let processedData = data;
  if (responseMapper) {
    processedData = responseMapper(data);
  }

  return (
    <TokenRateLimitTable
      tokenRateLimits={processedData ?? []}
      fetchUrl={fetchUrl}
      title={title}
      description={description}
      hideHeading={hideHeading}
      isAdmin={isAdmin}
    />
  );
};


================================================
FILE: web/src/app/admin/token-rate-limits/lib.ts
================================================
import { TokenRateLimitArgs } from "./types";

const API_PREFIX = "/api/admin/token-rate-limits";

// Global Token Limits
export const insertGlobalTokenRateLimit = async (
  tokenRateLimit: TokenRateLimitArgs
) => {
  return await fetch(`${API_PREFIX}/global`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(tokenRateLimit),
  });
};

// User Token Limits
export const insertUserTokenRateLimit = async (
  tokenRateLimit: TokenRateLimitArgs
) => {
  return await fetch(`${API_PREFIX}/users`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(tokenRateLimit),
  });
};

// User Group Token Limits (EE Only)
export const insertGroupTokenRateLimit = async (
  tokenRateLimit: TokenRateLimitArgs,
  group_id: number
) => {
  return await fetch(`${API_PREFIX}/user-group/${group_id}`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(tokenRateLimit),
  });
};

// Common Endpoints

export const deleteTokenRateLimit = async (token_rate_limit_id: number) => {
  return await fetch(`${API_PREFIX}/rate-limit/${token_rate_limit_id}`, {
    method: "DELETE",
  });
};

export const updateTokenRateLimit = async (
  token_rate_limit_id: number,
  tokenRateLimit: TokenRateLimitArgs
) => {
  return await fetch(`${API_PREFIX}/rate-limit/${token_rate_limit_id}`, {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(tokenRateLimit),
  });
};


================================================
FILE: web/src/app/admin/token-rate-limits/page.tsx
================================================
"use client";

import SimpleTabs from "@/refresh-components/SimpleTabs";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { Text } from "@opal/components";
import { useState } from "react";
import {
  insertGlobalTokenRateLimit,
  insertGroupTokenRateLimit,
  insertUserTokenRateLimit,
} from "./lib";
import { Scope, TokenRateLimit } from "./types";
import { GenericTokenRateLimitTable } from "./TokenRateLimitTables";
import { mutate } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { toast } from "@/hooks/useToast";
import CreateRateLimitModal from "./CreateRateLimitModal";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import { SvgGlobe, SvgUser, SvgUsers } from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.TOKEN_RATE_LIMITS;
const GLOBAL_TOKEN_FETCH_URL = SWR_KEYS.globalTokenRateLimits;
const USER_TOKEN_FETCH_URL = SWR_KEYS.userTokenRateLimits;
const USER_GROUP_FETCH_URL = SWR_KEYS.userGroupTokenRateLimits;

const GLOBAL_DESCRIPTION =
  "Global rate limits apply to all users, user groups, and API keys. When the global \
  rate limit is reached, no more tokens can be spent.";
const USER_DESCRIPTION =
  "User rate limits apply to individual users. When a user reaches a limit, they will \
  be temporarily blocked from spending tokens.";
const USER_GROUP_DESCRIPTION =
  "User group rate limits apply to all users in a group. When a group reaches a limit, \
  all users in the group will be temporarily blocked from spending tokens, regardless \
  of their individual limits. If a user is in multiple groups, the most lenient limit \
  will apply.";

const handleCreateTokenRateLimit = async (
  target_scope: Scope,
  period_hours: number,
  token_budget: number,
  group_id: number = -1
) => {
  const tokenRateLimitArgs = {
    enabled: true,
    token_budget: token_budget,
    period_hours: period_hours,
  };

  if (target_scope === Scope.GLOBAL) {
    return await insertGlobalTokenRateLimit(tokenRateLimitArgs);
  } else if (target_scope === Scope.USER) {
    return await insertUserTokenRateLimit(tokenRateLimitArgs);
  } else if (target_scope === Scope.USER_GROUP) {
    return await insertGroupTokenRateLimit(tokenRateLimitArgs, group_id);
  } else {
    throw new Error(`Invalid target_scope: ${target_scope}`);
  }
};

function Main() {
  const [tabIndex, setTabIndex] = useState(0);
  const [modalIsOpen, setModalIsOpen] = useState(false);

  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  const updateTable = (target_scope: Scope) => {
    if (target_scope === Scope.GLOBAL) {
      mutate(GLOBAL_TOKEN_FETCH_URL);
      setTabIndex(0);
    } else if (target_scope === Scope.USER) {
      mutate(USER_TOKEN_FETCH_URL);
      setTabIndex(1);
    } else if (target_scope === Scope.USER_GROUP) {
      mutate(USER_GROUP_FETCH_URL);
      setTabIndex(2);
    }
  };

  const handleSubmit = (
    target_scope: Scope,
    period_hours: number,
    token_budget: number,
    group_id: number = -1
  ) => {
    handleCreateTokenRateLimit(
      target_scope,
      period_hours,
      token_budget,
      group_id
    )
      .then(() => {
        setModalIsOpen(false);
        toast.success("Token rate limit created!");
        updateTable(target_scope);
      })
      .catch((error) => {
        toast.error(error.message);
      });
  };

  return (
    <Section alignItems="stretch" justifyContent="start" height="auto">
      <Text as="p">
        Token rate limits enable you control how many tokens can be spent in a
        given time period. With token rate limits, you can:
      </Text>

      <ul className="list-disc ml-4">
        <li>
          <Text as="p">
            Set a global rate limit to control your team&apos;s overall token
            spend.
          </Text>
        </li>
        {isPaidEnterpriseFeaturesEnabled && (
          <>
            <li>
              <Text as="p">
                Set rate limits for users to ensure that no single user can
                spend too many tokens.
              </Text>
            </li>
            <li>
              <Text as="p">
                Set rate limits for user groups to control token spend for your
                teams.
              </Text>
            </li>
          </>
        )}
        <li>
          <Text as="p">Enable and disable rate limits on the fly.</Text>
        </li>
      </ul>

      <CreateButton onClick={() => setModalIsOpen(true)}>
        Create a Token Rate Limit
      </CreateButton>

      {isPaidEnterpriseFeaturesEnabled ? (
        <SimpleTabs
          tabs={{
            "0": {
              name: "Global",
              icon: SvgGlobe,
              content: (
                <GenericTokenRateLimitTable
                  fetchUrl={GLOBAL_TOKEN_FETCH_URL}
                  title={"Global Token Rate Limits"}
                  description={GLOBAL_DESCRIPTION}
                />
              ),
            },
            "1": {
              name: "User",
              icon: SvgUser,
              content: (
                <GenericTokenRateLimitTable
                  fetchUrl={USER_TOKEN_FETCH_URL}
                  title={"User Token Rate Limits"}
                  description={USER_DESCRIPTION}
                />
              ),
            },
            "2": {
              name: "User Groups",
              icon: SvgUsers,
              content: (
                <GenericTokenRateLimitTable
                  fetchUrl={USER_GROUP_FETCH_URL}
                  title={"User Group Token Rate Limits"}
                  description={USER_GROUP_DESCRIPTION}
                  responseMapper={(data: Record<string, TokenRateLimit[]>) =>
                    Object.entries(data).flatMap(([group_name, elements]) =>
                      elements.map((element) => ({
                        ...element,
                        group_name,
                      }))
                    )
                  }
                />
              ),
            },
          }}
          value={tabIndex.toString()}
          onValueChange={(val) => setTabIndex(parseInt(val))}
        />
      ) : (
        <GenericTokenRateLimitTable
          fetchUrl={GLOBAL_TOKEN_FETCH_URL}
          title={"Global Token Rate Limits"}
          description={GLOBAL_DESCRIPTION}
        />
      )}

      <CreateRateLimitModal
        isOpen={modalIsOpen}
        setIsOpen={() => setModalIsOpen(false)}
        onSubmit={handleSubmit}
        forSpecificScope={
          isPaidEnterpriseFeaturesEnabled ? undefined : Scope.GLOBAL
        }
      />
    </Section>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header title={route.title} icon={route.icon} separator />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/admin/token-rate-limits/types.ts
================================================
export enum Scope {
  USER = "user",
  USER_GROUP = "user_group",
  GLOBAL = "global",
}

export interface TokenRateLimitArgs {
  enabled: boolean;
  token_budget: number;
  period_hours: number;
}

export interface TokenRateLimit {
  token_id: number;
  enabled: boolean;
  token_budget: number;
  period_hours: number;
}

export interface TokenRateLimitDisplay extends TokenRateLimit {
  group_name?: string;
}


================================================
FILE: web/src/app/admin/users/page.tsx
================================================
export { default } from "@/refresh-pages/admin/UsersPage";


================================================
FILE: web/src/app/anonymous/[id]/AnonymousPage.tsx
================================================
"use client";
import { redirect } from "next/navigation";
import { useEffect } from "react";

export default function AnonymousPage({
  anonymousPath,
}: {
  anonymousPath: string;
}) {
  const loginAsAnonymousUser = async () => {
    try {
      const response = await fetch(
        `/api/tenants/anonymous-user?anonymous_user_path=${encodeURIComponent(
          anonymousPath
        )}`,
        {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
          credentials: "same-origin",
        }
      );

      if (!response.ok) {
        console.error("Failed to login as anonymous user", response);
        throw new Error("Failed to login as anonymous user");
      }
      // Redirect to the chat page and force a refresh
      window.location.href = "/app";
    } catch (error) {
      console.error("Error logging in as anonymous user:", error);
      redirect("/auth/signup?error=Anonymous");
    }
  };

  useEffect(() => {
    loginAsAnonymousUser();
  }, []);

  return (
    <div className="flex flex-col items-center justify-center min-h-screen bg-background-100">
      <div className="bg-white p-8 rounded-lg shadow-md">
        <h1 className="text-2xl font-bold mb-4 text-center">
          Redirecting you to the chat page...
        </h1>
        <div className="flex justify-center">
          <div className="animate-spin rounded-full h-16 w-16 border-t-4 border-b-4 border-background-800"></div>
        </div>
        <p className="mt-4 text-text-600 text-center">
          Please wait while we set up your anonymous session.
        </p>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/anonymous/[id]/page.tsx
================================================
import AnonymousPage from "./AnonymousPage";

export default async function Page(props: { params: Promise<{ id: string }> }) {
  const params = await props.params;

  return <AnonymousPage anonymousPath={params.id} />;
}


================================================
FILE: web/src/app/api/[...path]/route.ts
================================================
import { INTERNAL_URL } from "@/lib/constants";
import { NextRequest, NextResponse } from "next/server";

/* NextJS is annoying and makes use use a separate function for
each request type >:( */

export async function GET(
  request: NextRequest,
  props: { params: Promise<{ path: string[] }> }
) {
  const params = await props.params;
  return handleRequest(request, params.path);
}

export async function POST(
  request: NextRequest,
  props: { params: Promise<{ path: string[] }> }
) {
  const params = await props.params;
  return handleRequest(request, params.path);
}

export async function PUT(
  request: NextRequest,
  props: { params: Promise<{ path: string[] }> }
) {
  const params = await props.params;
  return handleRequest(request, params.path);
}

export async function PATCH(
  request: NextRequest,
  props: { params: Promise<{ path: string[] }> }
) {
  const params = await props.params;
  return handleRequest(request, params.path);
}

export async function DELETE(
  request: NextRequest,
  props: { params: Promise<{ path: string[] }> }
) {
  const params = await props.params;
  return handleRequest(request, params.path);
}

export async function HEAD(
  request: NextRequest,
  props: { params: Promise<{ path: string[] }> }
) {
  const params = await props.params;
  return handleRequest(request, params.path);
}

export async function OPTIONS(
  request: NextRequest,
  props: { params: Promise<{ path: string[] }> }
) {
  const params = await props.params;
  return handleRequest(request, params.path);
}

async function handleRequest(request: NextRequest, path: string[]) {
  if (
    process.env.NODE_ENV !== "development" &&
    // NOTE: Set this environment variable to 'true' for preview environments
    // Where you want finer-grained control over API access
    process.env.OVERRIDE_API_PRODUCTION !== "true"
  ) {
    return NextResponse.json(
      {
        message:
          "This API is only available in development mode. In production, something else (e.g. nginx) should handle this.",
      },
      { status: 404 }
    );
  }

  try {
    const backendUrl = new URL(`${INTERNAL_URL}/${path.join("/")}`);

    // Get the URL parameters from the request
    const urlParams = new URLSearchParams(request.url.split("?")[1]);

    // Append the URL parameters to the backend URL
    urlParams.forEach((value, key) => {
      backendUrl.searchParams.append(key, value);
    });

    // Build headers, optionally injecting debug auth cookie
    const headers = new Headers(request.headers);
    if (
      process.env.DEBUG_AUTH_COOKIE &&
      process.env.NODE_ENV === "development"
    ) {
      // Inject the debug auth cookie for local development against remote backend
      // Get from cloud site: DevTools → Application → Cookies → fastapiusersauth
      const existingCookies = headers.get("cookie") || "";
      const debugCookie = `fastapiusersauth=${process.env.DEBUG_AUTH_COOKIE}`;
      headers.set(
        "cookie",
        existingCookies ? `${existingCookies}; ${debugCookie}` : debugCookie
      );
    }

    const response = await fetch(backendUrl, {
      method: request.method,
      headers: headers,
      body: request.body,
      signal: request.signal,
      redirect: "manual",
      // @ts-ignore
      duplex: "half",
    });

    const setCookies =
      // @ts-ignore - undici provides getSetCookie in Node.
      response.headers.getSetCookie?.() ??
      (response.headers.get("set-cookie")
        ? [response.headers.get("set-cookie")]
        : []);

    const responseHeaders = new Headers(response.headers);
    responseHeaders.delete("set-cookie");

    // Check if the response is a stream
    if (
      response.headers.get("Transfer-Encoding") === "chunked" ||
      response.headers.get("Content-Type")?.includes("stream")
    ) {
      // If it's a stream, create a TransformStream to pass the data through
      const { readable, writable } = new TransformStream();
      response.body?.pipeTo(writable);

      const proxyResponse = new NextResponse(readable, {
        status: response.status,
        headers: responseHeaders,
      });
      for (const cookie of setCookies) {
        if (cookie) {
          proxyResponse.headers.append("set-cookie", cookie);
        }
      }
      return proxyResponse;
    } else {
      const proxyResponse = new NextResponse(response.body, {
        status: response.status,
        headers: responseHeaders,
      });
      for (const cookie of setCookies) {
        if (cookie) {
          proxyResponse.headers.append("set-cookie", cookie);
        }
      }
      return proxyResponse;
    }
  } catch (error: unknown) {
    console.error("Proxy error:", error);
    return NextResponse.json(
      {
        message: "Proxy error",
        error:
          error instanceof Error ? error.message : "An unknown error occurred",
      },
      { status: 500 }
    );
  }
}


================================================
FILE: web/src/app/api/chat/mcp/oauth/callback/route.ts
================================================
import { NextRequest, NextResponse } from "next/server";

// Proxies browser callback to backend OAuth callback endpoint and then
// redirects back to the chat UI.

export async function GET(req: NextRequest) {
  const url = new URL(req.url);
  const code = url.searchParams.get("code");
  const state = url.searchParams.get("state");
  const serverId =
    url.searchParams.get("server_id") || url.searchParams.get("serverId");
  const codeVerifier = url.searchParams.get("code_verifier");

  if (!code || !serverId) {
    return NextResponse.json(
      { error: "Missing code or server_id" },
      { status: 400 }
    );
  }

  try {
    const resp = await fetch(
      `${
        process.env.NEXT_PUBLIC_ONYX_BACKEND_URL || ""
      }/api/mcp/oauth/callback`,
      {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          server_id: serverId,
          code,
          state,
          code_verifier: codeVerifier,
          transport: "streamable-http",
        }),
        // Ensure cookies/auth forwarded if needed
        credentials: "include",
      }
    );

    if (!resp.ok) {
      const err = await resp.json().catch(() => ({}) as any);
      return NextResponse.json(
        { error: err.detail || "OAuth callback failed" },
        { status: 400 }
      );
    }

    // Check if this is an admin OAuth flow
    const isAdminFlow = url.searchParams.get("admin") === "true";

    // Redirect back to appropriate page
    let redirectTo = url.searchParams.get("redirect_to");
    if (!redirectTo) {
      if (isAdminFlow) {
        // For admin flow, redirect back to the MCP edit page
        redirectTo = `/admin/actions/edit-mcp?server_id=${serverId}`;
      } else {
        // For user flow, redirect to chat
        redirectTo = "/app";
      }
    }

    return NextResponse.redirect(new URL(redirectTo, req.url));
  } catch (e) {
    return NextResponse.json(
      { error: "OAuth callback error" },
      { status: 500 }
    );
  }
}


================================================
FILE: web/src/app/app/agents/create/page.tsx
================================================
import AgentEditorPage from "@/refresh-pages/AgentEditorPage";
import * as AppLayouts from "@/layouts/app-layouts";

export default async function Page() {
  return (
    <AppLayouts.Root>
      <AgentEditorPage />
    </AppLayouts.Root>
  );
}


================================================
FILE: web/src/app/app/agents/edit/[id]/page.tsx
================================================
"use client";

import { use, useEffect } from "react";
import { useRouter } from "next/navigation";
import { useAgent } from "@/hooks/useAgents";
import AgentEditorPage from "@/refresh-pages/AgentEditorPage";
import * as AppLayouts from "@/layouts/app-layouts";

export interface PageProps {
  params: Promise<{ id: string }>;
}

export default function Page(props: PageProps) {
  const router = useRouter();
  const { id } = use(props.params);
  const agentId = parseInt(id);

  // Call hook unconditionally (passes null when ID is invalid)
  const { agent, isLoading, refresh } = useAgent(
    isNaN(agentId) ? null : agentId
  );

  // Handle invalid ID (NaN)
  useEffect(() => {
    if (isNaN(agentId)) {
      router.push("/app");
    }
  }, [agentId, router]);

  // Redirect to home if agent not found after loading completes
  useEffect(() => {
    if (!isLoading && !agent) {
      router.push("/app");
    }
  }, [isLoading, agent, router]);

  // Show nothing while redirecting or loading
  if (isLoading || !agent) return null;

  return (
    <AppLayouts.Root>
      <AgentEditorPage agent={agent} refreshAgent={refresh} />
    </AppLayouts.Root>
  );
}


================================================
FILE: web/src/app/app/agents/page.tsx
================================================
import AgentsNavigationPage from "@/refresh-pages/AgentsNavigationPage";
import * as AppLayouts from "@/layouts/app-layouts";

export default async function Page() {
  return (
    <AppLayouts.Root>
      <AgentsNavigationPage />
    </AppLayouts.Root>
  );
}


================================================
FILE: web/src/app/app/components/AgentDescription.tsx
================================================
"use client";

import Text from "@/refresh-components/texts/Text";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";

export interface AgentDescriptionProps {
  agent?: MinimalPersonaSnapshot;
}

export default function AgentDescription({ agent }: AgentDescriptionProps) {
  if (!agent?.description) return null;

  return (
    <Text
      as="p"
      secondaryBody
      text03
      className="w-full min-w-0 text-center break-words"
    >
      {agent.description}
    </Text>
  );
}


================================================
FILE: web/src/app/app/components/AppPopup.tsx
================================================
"use client";

import Modal from "@/refresh-components/Modal";
import { SettingsContext } from "@/providers/SettingsProvider";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { FormField } from "@/refresh-components/form/FormField";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import { useContext, useEffect, useState } from "react";
import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";
import { transformLinkUri } from "@/lib/utils";
import { SvgAlertCircle } from "@opal/icons";
import { IconProps, OnyxIcon } from "@/components/icons/icons";

const ALL_USERS_INITIAL_POPUP_FLOW_COMPLETED =
  "allUsersInitialPopupFlowCompleted";

const CustomLogoHeaderIcon = ({ className, size = 24 }: IconProps) => (
  <img
    src="/api/enterprise-settings/logo"
    alt="Logo"
    style={{ width: size, height: size, objectFit: "contain" }}
    className={className}
  />
);

export function AppPopup() {
  const [completedFlow, setCompletedFlow] = useState(true);
  const [showConsentError, setShowConsentError] = useState(false);
  const [consentChecked, setConsentChecked] = useState(false);

  useEffect(() => {
    setCompletedFlow(
      localStorage.getItem(ALL_USERS_INITIAL_POPUP_FLOW_COMPLETED) === "true"
    );
  }, []);

  const settings = useContext(SettingsContext);
  const enterpriseSettings = settings?.enterpriseSettings;
  const isConsentScreen = enterpriseSettings?.enable_consent_screen;

  if (
    !enterpriseSettings?.custom_popup_content ||
    completedFlow ||
    !enterpriseSettings?.show_first_visit_notice
  ) {
    return null;
  }

  const popupTitle = enterpriseSettings?.custom_popup_header;

  const popupContent = enterpriseSettings?.custom_popup_content;

  const hasApplicationName = Boolean(
    enterpriseSettings?.application_name?.trim()
  );
  const hasCustomLogo = Boolean(enterpriseSettings?.use_custom_logo);
  const logoDisplayStyle = enterpriseSettings?.logo_display_style;

  // Header icon rules:
  // - If neither app name nor custom logo exists -> show Onyx icon
  // - If logo display is "name_only" -> show alert icon
  // - Otherwise -> show uploaded custom logo (fallback to Onyx icon)
  const headerIcon =
    !hasApplicationName && !hasCustomLogo
      ? (props: IconProps) => <OnyxIcon size={24} {...props} />
      : logoDisplayStyle === "name_only"
        ? SvgAlertCircle
        : hasCustomLogo
          ? CustomLogoHeaderIcon
          : (props: IconProps) => <OnyxIcon size={24} {...props} />;

  return (
    <Modal open onOpenChange={() => {}}>
      <Modal.Content width="sm" height="lg">
        <Modal.Header
          icon={headerIcon}
          title={popupTitle || "Welcome to Onyx!"}
        />
        <Modal.Body>
          <div className="overflow-y-auto text-left">
            <ReactMarkdown
              className="prose prose-neutral dark:prose-invert max-w-full"
              components={{
                a: ({ node, ...props }) => (
                  <a
                    {...props}
                    className="text-link hover:text-link-hover"
                    target="_blank"
                    rel="noopener noreferrer"
                  />
                ),
                p: ({ node, ...props }) => (
                  <Text as="p" mainUiBody text03 {...props} />
                ),
                strong: ({ node, ...props }) => (
                  <Text mainUiBody text03 {...props} />
                ),
                h1: ({ node, ...props }) => (
                  <Text as="p" headingH1 text03 {...props} />
                ),
                h2: ({ node, ...props }) => (
                  <Text as="p" headingH2 text03 {...props} />
                ),
                h3: ({ node, ...props }) => (
                  <Text as="p" headingH3 text03 {...props} />
                ),
                li: ({ node, ...props }) => (
                  <Text as="li" mainUiBody text03 {...props} />
                ),
              }}
              remarkPlugins={[remarkGfm]}
              urlTransform={transformLinkUri}
            >
              {popupContent}
            </ReactMarkdown>
            {isConsentScreen && enterpriseSettings?.consent_screen_prompt && (
              <FormField
                state={showConsentError ? "error" : "idle"}
                className="mt-6"
              >
                <div className="flex items-center gap-1">
                  <FormField.Control>
                    <Checkbox
                      aria-label="Consent checkbox"
                      checked={consentChecked}
                      onCheckedChange={(checked) => {
                        setConsentChecked(checked);
                        if (checked) {
                          setShowConsentError(false);
                        }
                      }}
                    />
                  </FormField.Control>
                  <FormField.Label>
                    <ReactMarkdown
                      className="prose prose-neutral dark:prose-invert max-w-full"
                      components={{
                        a: ({ node, ...props }) => (
                          <a
                            {...props}
                            className="text-link hover:text-link-hover"
                            target="_blank"
                            rel="noopener noreferrer"
                          />
                        ),
                        p: ({ node, ...props }) => (
                          <Text
                            as="p"
                            mainUiBody
                            text04
                            className="!my-0" //dont remove the !my-0 class, it's important for the markdown to render without any alignment issues
                            {...props}
                          />
                        ),
                        strong: ({ node, ...props }) => (
                          <Text mainUiBody text04 {...props} />
                        ),
                        li: ({ node, ...props }) => (
                          <Text as="li" mainUiBody text04 {...props} />
                        ),
                      }}
                      remarkPlugins={[remarkGfm]}
                      urlTransform={transformLinkUri}
                    >
                      {enterpriseSettings.consent_screen_prompt}
                    </ReactMarkdown>
                  </FormField.Label>
                </div>
                <FormField.Message
                  messages={{
                    error:
                      "You need to agree to the terms to access the application.",
                  }}
                />
              </FormField>
            )}
          </div>
        </Modal.Body>
        <Modal.Footer>
          <Button
            onClick={() => {
              if (isConsentScreen && !consentChecked) {
                setShowConsentError(true);
                return;
              }
              localStorage.setItem(
                ALL_USERS_INITIAL_POPUP_FLOW_COMPLETED,
                "true"
              );
              setCompletedFlow(true);
            }}
          >
            Start
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/app/components/WelcomeMessage.tsx
================================================
"use client";

import Logo from "@/refresh-components/Logo";
import {
  getRandomGreeting,
  GREETING_MESSAGES,
} from "@/lib/chat/greetingMessages";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import Text from "@/refresh-components/texts/Text";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { useState, useEffect } from "react";
import { useSettingsContext } from "@/providers/SettingsProvider";
import FrostedDiv from "@/refresh-components/FrostedDiv";

export interface WelcomeMessageProps {
  agent?: MinimalPersonaSnapshot;
  isDefaultAgent: boolean;
}

export default function WelcomeMessage({
  agent,
  isDefaultAgent,
}: WelcomeMessageProps) {
  const settings = useSettingsContext();
  const enterpriseSettings = settings?.enterpriseSettings;

  // Use a stable default for SSR, then randomize on client after hydration
  const [greeting, setGreeting] = useState(GREETING_MESSAGES[0]);

  useEffect(() => {
    if (enterpriseSettings?.custom_greeting_message) {
      setGreeting(enterpriseSettings.custom_greeting_message);
    } else {
      setGreeting(getRandomGreeting());
    }
  }, [enterpriseSettings?.custom_greeting_message]);

  let content: React.ReactNode = null;

  if (isDefaultAgent) {
    content = (
      <div data-testid="onyx-logo" className="flex flex-row items-center gap-4">
        <Logo folded size={32} />
        <Text as="p" headingH2>
          {greeting}
        </Text>
      </div>
    );
  } else if (agent) {
    content = (
      <>
        <div
          data-testid="agent-name-display"
          className="flex flex-row items-center gap-3"
        >
          <AgentAvatar agent={agent} size={36} />
          <Text as="p" headingH2>
            {agent.name}
          </Text>
        </div>
      </>
    );
  }

  // if we aren't using the default agent, we need to wait for the agent info to load
  // before rendering
  if (!content) return null;

  return (
    <FrostedDiv
      data-testid="chat-intro"
      className="flex flex-col items-center justify-center gap-3 w-full max-w-[var(--app-page-main-content-width)]"
    >
      {content}
    </FrostedDiv>
  );
}


================================================
FILE: web/src/app/app/components/files/InputBarPreview.tsx
================================================
import { useRef, useState } from "react";
import { FileDescriptor } from "@/app/app/interfaces";
import { FiLoader, FiFileText } from "react-icons/fi";
import { InputBarPreviewImage } from "./images/InputBarPreviewImage";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { Button } from "@opal/components";
import { SvgX } from "@opal/icons";
export interface InputBarPreviewImageProviderProps {
  file: FileDescriptor;
  onDelete: () => void;
  isUploading: boolean;
}

export function InputBarPreviewImageProvider({
  file,
  onDelete,
  isUploading,
}: InputBarPreviewImageProviderProps) {
  const [isHovered, setIsHovered] = useState(false);

  return (
    <div
      className="h-6 relative"
      onMouseEnter={() => setIsHovered(true)}
      onMouseLeave={() => setIsHovered(false)}
    >
      {isHovered && (
        <Button
          icon={SvgX}
          onClick={onDelete}
          prominence="tertiary"
          size="sm"
        />
      )}
      {isUploading && (
        <div
          className="
            absolute
            inset-0
            flex
            items-center
            justify-center
            bg-opacity-50
            rounded-lg
            z-0
          "
        >
          <FiLoader className="animate-spin text-white" />
        </div>
      )}
      <InputBarPreviewImage fileId={file.id} />
    </div>
  );
}

export interface InputBarPreviewProps {
  file: FileDescriptor;
  onDelete: () => void;
  isUploading: boolean;
}

export function InputBarPreview({
  file,
  onDelete,
  isUploading,
}: InputBarPreviewProps) {
  const fileNameRef = useRef<HTMLDivElement>(null);

  return (
    <div className="relative">
      {isUploading && (
        <div
          className="
            absolute
            inset-0
            flex
            items-center
            justify-center
            bg-opacity-50
            rounded-lg
            z-0
          "
        >
          <FiLoader size={12} className="animate-spin text-white" />
        </div>
      )}
      <div
        className={`
        flex
        items-center
        px-2
        bg-accent-background-hovered
        border
        gap-x-1.5
        border-border
        rounded-md
        box-border
        h-8
      `}
      >
        <div className="flex-shrink-0">
          <div
            className="
            w-5
            h-5
            bg-document
            flex
            items-center
            justify-center
            rounded-md
          "
          >
            <FiFileText size={12} className="text-white" />
          </div>
        </div>

        <SimpleTooltip tooltip={file.name ?? undefined}>
          <div
            ref={fileNameRef}
            className={`font-medium text-sm line-clamp-1 break-all ellipses max-w-48`}
          >
            {file.name}
          </div>
        </SimpleTooltip>

        <Button
          onClick={onDelete}
          icon={SvgX}
          prominence="tertiary"
          size="sm"
        />
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/app/components/files/images/FullImageModal.tsx
================================================
"use client";

import { useEffect } from "react";
import { buildImgUrl } from "@/app/app/components/files/images/utils";
import { cn } from "@/lib/utils";
import * as Dialog from "@radix-ui/react-dialog";

interface FullImageModalProps {
  fileId: string;
  open: boolean;
  onOpenChange: (open: boolean) => void;
}

export function FullImageModal({
  fileId,
  open,
  onOpenChange,
}: FullImageModalProps) {
  // pre-fetch image
  useEffect(() => {
    const img = new Image();
    img.src = buildImgUrl(fileId);
  }, [fileId]);

  return (
    <Dialog.Root open={open} onOpenChange={onOpenChange}>
      <Dialog.Portal>
        <Dialog.Overlay className="fixed inset-0 bg-black bg-opacity-80 z-50 backdrop-blur-xl" />
        <Dialog.Content
          className={cn(
            "fixed inset-0 flex items-center justify-center p-4 z-[100]",
            "max-w-screen-lg h-fit top-1/2 left-1/2 -translate-y-2/4 -translate-x-2/4",
            "focus:outline-none"
          )}
        >
          <img
            src={buildImgUrl(fileId)}
            alt="Uploaded image"
            className="max-w-full max-h-full"
          />
        </Dialog.Content>
      </Dialog.Portal>
    </Dialog.Root>
  );
}


================================================
FILE: web/src/app/app/components/files/images/InMessageImage.tsx
================================================
import { memo, useState } from "react";
import { SvgDownload } from "@opal/icons";
import { ImageShape } from "@/app/app/services/streamingModels";
import { FullImageModal } from "@/app/app/components/files/images/FullImageModal";
import { buildImgUrl } from "@/app/app/components/files/images/utils";
import { Button } from "@opal/components";
import { Hoverable } from "@opal/core";
import { cn } from "@/lib/utils";

const DEFAULT_SHAPE: ImageShape = "square";

const SHAPE_CLASSES: Record<ImageShape, { container: string; image: string }> =
  {
    square: {
      container: "max-w-96 max-h-96",
      image: "max-w-96 max-h-96",
    },
    landscape: {
      container: "max-w-[28rem] max-h-72",
      image: "max-w-[28rem] max-h-72",
    },
    portrait: {
      container: "max-w-72 max-h-[28rem]",
      image: "max-w-72 max-h-[28rem]",
    },
  };

// Used to stop image flashing as images are loaded and response continues
const loadedImages = new Set<string>();

interface InMessageImageProps {
  fileId: string;
  fileName?: string;
  shape?: ImageShape;
}

export const InMessageImage = memo(function InMessageImage({
  fileId,
  fileName,
  shape = DEFAULT_SHAPE,
}: InMessageImageProps) {
  const [fullImageShowing, setFullImageShowing] = useState(false);
  const [imageLoaded, setImageLoaded] = useState(loadedImages.has(fileId));

  const normalizedShape = SHAPE_CLASSES[shape] ? shape : DEFAULT_SHAPE;
  const { container: shapeContainerClasses, image: shapeImageClasses } =
    SHAPE_CLASSES[normalizedShape];

  const handleDownload = async (e: React.MouseEvent) => {
    e.stopPropagation(); // Prevent opening the full image modal

    try {
      const response = await fetch(buildImgUrl(fileId));
      if (!response.ok) {
        console.error("Failed to download image:", response.status);
        return;
      }
      const blob = await response.blob();
      const url = window.URL.createObjectURL(blob);
      const a = document.createElement("a");
      a.href = url;
      a.download = fileName || `image-${fileId}.png`;
      document.body.appendChild(a);
      a.click();
      window.URL.revokeObjectURL(url);
      document.body.removeChild(a);
    } catch (error) {
      console.error("Failed to download image:", error);
    }
  };

  return (
    <>
      <FullImageModal
        fileId={fileId}
        open={fullImageShowing}
        onOpenChange={(open) => setFullImageShowing(open)}
      />

      <Hoverable.Root group="messageImage" widthVariant="fit">
        <div className={cn("relative", shapeContainerClasses)}>
          {!imageLoaded && (
            <div className="absolute inset-0 bg-background-tint-02 animate-pulse rounded-lg" />
          )}

          <img
            width={1200}
            height={1200}
            alt="Chat Message Image"
            onLoad={() => {
              loadedImages.add(fileId);
              setImageLoaded(true);
            }}
            className={cn(
              "object-contain object-left overflow-hidden rounded-lg w-full h-full transition-opacity duration-300 cursor-pointer",
              shapeImageClasses,
              imageLoaded ? "opacity-100" : "opacity-0"
            )}
            onClick={() => setFullImageShowing(true)}
            src={buildImgUrl(fileId)}
            loading="lazy"
          />

          {/* Download button - appears on hover */}
          <div className="absolute bottom-2 right-2 z-10">
            <Hoverable.Item group="messageImage" variant="opacity-on-hover">
              <Button
                icon={SvgDownload}
                tooltip="Download"
                onClick={handleDownload}
              />
            </Hoverable.Item>
          </div>
        </div>
      </Hoverable.Root>
    </>
  );
});


================================================
FILE: web/src/app/app/components/files/images/InputBarPreviewImage.tsx
================================================
"use client";

import { useState } from "react";
import { buildImgUrl } from "./utils";
import { FullImageModal } from "./FullImageModal";

export function InputBarPreviewImage({ fileId }: { fileId: string }) {
  const [fullImageShowing, setFullImageShowing] = useState(false);

  return (
    <>
      <FullImageModal
        fileId={fileId}
        open={fullImageShowing}
        onOpenChange={(open) => setFullImageShowing(open)}
      />
      <div
        className={`
          bg-transparent
          border-none
          flex
          items-center
          bg-accent-background-hovered
          border
          border-border
          rounded-md
          box-border
          h-6
      `}
      >
        <img
          alt="preview"
          onClick={() => setFullImageShowing(true)}
          className="h-6 w-6 object-cover rounded-lg bg-background cursor-pointer"
          src={buildImgUrl(fileId)}
        />
      </div>
    </>
  );
}


================================================
FILE: web/src/app/app/components/files/images/utils.ts
================================================
const CHAT_FILE_URL_REGEX = /\/api\/chat\/file\/([^/?#]+)/;
const IMAGE_EXTENSIONS = /\.(png|jpe?g|gif|webp|svg|bmp|ico|tiff?)$/i;

export function buildImgUrl(fileId: string) {
  return `/api/chat/file/${fileId}`;
}

/**
 * If `href` points to a chat file and `linkText` ends with an image extension,
 * returns the file ID. Otherwise returns null.
 */
export function extractChatImageFileId(
  href: string | undefined,
  linkText: string
): string | null {
  if (!href) return null;
  const match = CHAT_FILE_URL_REGEX.exec(href);
  if (!match?.[1]) return null;
  if (!IMAGE_EXTENSIONS.test(linkText)) return null;
  return match[1];
}


================================================
FILE: web/src/app/app/components/folders/FolderDropdown.tsx
================================================
import React, { useState, ReactNode, forwardRef } from "react";
import { Folder } from "./interfaces";
import { ChatSession } from "@/app/app/interfaces";
import { Caret } from "@/components/icons/icons";
import { cn } from "@/lib/utils";

interface FolderDropdownProps {
  folder: Folder;
  currentChatId?: string;
  showShareModal?: (chatSession: ChatSession) => void;
  closeSidebar?: () => void;
  children?: ReactNode;
  index: number;
}

export const FolderDropdown = forwardRef<HTMLDivElement, FolderDropdownProps>(
  ({ folder, children, index }: FolderDropdownProps, ref) => {
    const [isOpen, setIsOpen] = useState(true);

    return (
      <div className="overflow-visible pt-2 w-full">
        <div
          className="sticky top-0 bg-background-sidebar dark:bg-transparent z-10"
          style={{ zIndex: 1000 - index }}
        >
          <div
            ref={ref}
            className={cn(
              "flex",
              "overflow-visible",
              "items-center",
              "w-full",
              "text-text-darker",
              "rounded-md",
              "p-1",
              "bg-background-sidebar",
              "dark:bg-[#000]",
              "sticky",
              "top-0"
            )}
            style={{ zIndex: 10 - index }}
          >
            <button
              className="flex overflow-hidden bg-background-sidebar dark:bg-[#000] items-center flex-grow"
              onClick={() => setIsOpen(!isOpen)}
            >
              {isOpen ? (
                <Caret size={16} className="mr-1" />
              ) : (
                <Caret size={16} className="-rotate-90 mr-1" />
              )}
              <div className="flex items-center">
                <span className="text-sm font-[500]">{folder.folder_name}</span>
              </div>
            </button>
          </div>
          {isOpen && (
            <div className="overflow-visible mr-3 ml-1 mt-1">{children}</div>
          )}
        </div>
      </div>
    );
  }
);

FolderDropdown.displayName = "FolderDropdown";


================================================
FILE: web/src/app/app/components/folders/interfaces.ts
================================================
import { ChatSession } from "@/app/app/interfaces";

export interface Folder {
  folder_id?: number;
  folder_name: string;
  display_priority: number;
  chat_sessions: ChatSession[];
}


================================================
FILE: web/src/app/app/components/modifiers/SelectedDocuments.tsx
================================================
import { BasicClickable } from "@/components/BasicClickable";
import { OnyxDocument } from "@/lib/search/interfaces";
import { FiBook } from "react-icons/fi";

export function SelectedDocuments({
  selectedDocuments,
}: {
  selectedDocuments: OnyxDocument[];
}) {
  if (selectedDocuments.length === 0) {
    return null;
  }

  return (
    <BasicClickable>
      <div className="flex text-xs max-w-md overflow-hidden">
        <FiBook className="my-auto mr-1" />{" "}
        <div className="w-fit whitespace-nowrap">
          Chatting with {selectedDocuments.length} Selected Documents
        </div>
      </div>
    </BasicClickable>
  );
}


================================================
FILE: web/src/app/app/components/projects/ProjectChatSessionList.tsx
================================================
"use client";

import React, { useMemo } from "react";
import Link from "next/link";
import { ChatSessionMorePopup } from "@/components/sidebar/ChatSessionMorePopup";
import { useProjectsContext } from "@/providers/ProjectsContext";
import { ChatSession } from "@/app/app/interfaces";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import { useAgents } from "@/hooks/useAgents";
import { formatRelativeTime } from "./project_utils";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import { UNNAMED_CHAT } from "@/lib/constants";
import ChatSessionSkeleton from "@/refresh-components/skeletons/ChatSessionSkeleton";
import { SvgBubbleText } from "@opal/icons";

export default function ProjectChatSessionList() {
  const {
    currentProjectDetails,
    currentProjectId,
    refreshCurrentProjectDetails,
    isLoadingProjectDetails,
  } = useProjectsContext();
  const { agents } = useAgents();
  const [isRenamingChat, setIsRenamingChat] = React.useState<string | null>(
    null
  );
  const [hoveredChatId, setHoveredChatId] = React.useState<string | null>(null);

  const projectChats: ChatSession[] = useMemo(() => {
    const sessions = currentProjectDetails?.project?.chat_sessions || [];
    return [...sessions].sort(
      (a, b) =>
        new Date(b.time_updated).getTime() - new Date(a.time_updated).getTime()
    );
  }, [currentProjectDetails?.project?.chat_sessions]);

  if (!currentProjectId) return null;

  return (
    <div className="flex flex-col gap-2 px-2 w-full mx-auto mt-4">
      <div className="flex items-center pl-2">
        <Text as="p" text02 secondaryBody>
          Recent Chats
        </Text>
      </div>

      {isLoadingProjectDetails && !currentProjectDetails ? (
        <div className="flex flex-col gap-2">
          <ChatSessionSkeleton />
          <ChatSessionSkeleton />
          <ChatSessionSkeleton />
        </div>
      ) : projectChats.length === 0 ? (
        <Text as="p" text02 secondaryBody className="p-2">
          No chats yet.
        </Text>
      ) : (
        <div className="flex flex-col gap-2">
          {projectChats.map((chat) => (
            <Link
              key={chat.id}
              href={{ pathname: "/app", query: { chatId: chat.id } }}
              className="relative flex w-full"
              onMouseEnter={() => setHoveredChatId(chat.id)}
              onMouseLeave={() => setHoveredChatId(null)}
            >
              <div
                className={cn(
                  "w-full rounded-08 py-2 transition-colors p-1.5",
                  hoveredChatId === chat.id && "bg-background-tint-02"
                )}
              >
                <div className="flex gap-3 min-w-0 w-full">
                  <div className="flex h-full w-fit pt-1 pl-1">
                    {(() => {
                      const personaIdToFeatured =
                        currentProjectDetails?.persona_id_to_is_featured || {};
                      const isFeatured = personaIdToFeatured[chat.persona_id];
                      if (isFeatured === false) {
                        const agent = agents.find(
                          (a) => a.id === chat.persona_id
                        );
                        if (agent) {
                          return (
                            <div className="h-full pt-1">
                              <AgentAvatar agent={agent} size={18} />
                            </div>
                          );
                        }
                      }
                      return (
                        <SvgBubbleText className="h-4 w-4 stroke-text-02" />
                      );
                    })()}
                  </div>
                  <div className="flex flex-col w-full">
                    <div className="flex items-center gap-1 w-full justify-between">
                      <div className="flex items-center gap-1">
                        <Text
                          as="p"
                          text03
                          mainUiBody
                          nowrap
                          className="truncate"
                          title={chat.name}
                        >
                          {chat.name || UNNAMED_CHAT}
                        </Text>
                      </div>
                      <div className="flex items-center">
                        <ChatSessionMorePopup
                          chatSession={chat}
                          projectId={currentProjectId}
                          isRenamingChat={isRenamingChat === chat.id}
                          setIsRenamingChat={(value) =>
                            setIsRenamingChat(value ? chat.id : null)
                          }
                          search={false}
                          afterDelete={() => {
                            refreshCurrentProjectDetails();
                          }}
                          afterMove={() => {
                            refreshCurrentProjectDetails();
                          }}
                          afterRemoveFromProject={() => {
                            refreshCurrentProjectDetails();
                          }}
                          iconSize={20}
                          isVisible={hoveredChatId === chat.id}
                        />
                      </div>
                    </div>
                    <Text
                      as="p"
                      text03
                      secondaryBody
                      nowrap
                      className="truncate"
                    >
                      Last message {formatRelativeTime(chat.time_updated)}
                    </Text>
                  </div>
                </div>
              </div>
            </Link>
          ))}
        </div>
      )}
    </div>
  );
}


================================================
FILE: web/src/app/app/components/projects/ProjectContextPanel.tsx
================================================
"use client";

import React, { useCallback, useState } from "react";
import { useDropzone } from "react-dropzone";
import Separator from "@/refresh-components/Separator";
import { useProjectsContext } from "@/providers/ProjectsContext";
import FilePickerPopover from "@/refresh-components/popovers/FilePickerPopover";
import type { ProjectFile } from "../../projects/projectsService";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import { Button } from "@opal/components";

import AddInstructionModal from "@/components/modals/AddInstructionModal";
import UserFilesModal from "@/components/modals/UserFilesModal";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import Text from "@/refresh-components/texts/Text";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import { FileCard, FileCardSkeleton } from "@/sections/cards/FileCard";
import { hasNonImageFiles } from "@/lib/utils";
import IconButton from "@/refresh-components/buttons/IconButton";
import ButtonRenaming from "@/refresh-components/buttons/ButtonRenaming";
import { UserFileStatus } from "../../projects/projectsService";
import { SvgAddLines, SvgEdit, SvgFiles, SvgFolderOpen } from "@opal/icons";
import { Hoverable } from "@opal/core";

export interface ProjectContextPanelProps {
  projectTokenCount?: number;
  availableContextTokens?: number;
  setPresentingDocument?: (document: MinimalOnyxDocument) => void;
}
export default function ProjectContextPanel({
  projectTokenCount = 0,
  availableContextTokens = 128_000,
  setPresentingDocument,
}: ProjectContextPanelProps) {
  const addInstructionModal = useCreateModal();
  const projectFilesModal = useCreateModal();
  // Edit project name state
  const [isEditingName, setIsEditingName] = useState(false);
  // Convert ProjectFile to MinimalOnyxDocument format for viewing
  const handleOnView = useCallback(
    (file: ProjectFile) => {
      if (!setPresentingDocument) return;

      const documentForViewer: MinimalOnyxDocument = {
        document_id: `project_file__${file.file_id}`,
        semantic_identifier: file.name,
      };

      setPresentingDocument(documentForViewer);
    },
    [setPresentingDocument]
  );
  const {
    currentProjectDetails,
    currentProjectId,
    unlinkFileFromProject,
    linkFileToProject,
    allCurrentProjectFiles,
    isLoadingProjectDetails,
    beginUpload,
    projects,
    renameProject,
  } = useProjectsContext();
  const handleUploadFiles = useCallback(
    async (files: File[]) => {
      if (!files || files.length === 0) return;
      beginUpload(Array.from(files), currentProjectId);
    },
    [currentProjectId, beginUpload]
  );

  const totalFiles = allCurrentProjectFiles.length;
  const displayFileCount = totalFiles > 100 ? "100+" : String(totalFiles);

  const handleUploadChange = useCallback(
    async (e: React.ChangeEvent<HTMLInputElement>) => {
      const files = e.target.files;
      if (!files || files.length === 0) return;
      await handleUploadFiles(Array.from(files));
      e.target.value = "";
    },
    [handleUploadFiles]
  );

  // Nested dropzone for drag-and-drop within ProjectContextPanel
  const { getRootProps, getInputProps, isDragActive } = useDropzone({
    noClick: true,
    noKeyboard: true,
    multiple: true,
    noDragEventsBubbling: true,
    onDrop: (acceptedFiles) => {
      void handleUploadFiles(acceptedFiles);
    },
  });

  // Handle project name editing
  const currentProject = projects.find((p) => p.id === currentProjectId);
  const projectName = currentProject?.name || "Loading project...";

  const startEditing = useCallback(() => {
    setIsEditingName(true);
  }, []);

  const cancelEditing = useCallback(() => {
    setIsEditingName(false);
  }, []);

  if (!currentProjectId) return null; // no selection yet

  // Detect if there are any non-image files in the displayed files
  // to determine if images should be compact
  const displayedFiles = allCurrentProjectFiles.slice(0, 4);
  const shouldCompactImages = hasNonImageFiles(displayedFiles);

  return (
    <>
      <addInstructionModal.Provider>
        <AddInstructionModal />
      </addInstructionModal.Provider>

      <projectFilesModal.Provider>
        <UserFilesModal
          title="Project Files"
          description="Sessions in this project can access the files here."
          recentFiles={[...allCurrentProjectFiles]}
          onView={handleOnView}
          handleUploadChange={handleUploadChange}
          onDelete={async (file: ProjectFile) => {
            if (!currentProjectId) return;
            await unlinkFileFromProject(currentProjectId, file.id);
          }}
        />
      </projectFilesModal.Provider>
      <div className="flex flex-col gap-6 w-full max-w-[var(--app-page-main-content-width)] mx-auto p-4 pt-14 pb-6">
        <div className="flex flex-col gap-1 text-text-04">
          <SvgFolderOpen className="h-8 w-8 text-text-04" />
          <Hoverable.Root group="projectName" widthVariant="fit">
            <div className="flex items-center gap-2">
              {isEditingName ? (
                <ButtonRenaming
                  initialName={projectName}
                  onRename={async (newName) => {
                    if (currentProjectId) {
                      await renameProject(currentProjectId, newName);
                    }
                  }}
                  onClose={cancelEditing}
                  className="font-heading-h2 text-text-04"
                />
              ) : (
                <>
                  <Text as="p" headingH2 className="font-heading-h2">
                    {projectName}
                  </Text>
                  {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
                  <Hoverable.Item
                    group="projectName"
                    variant="opacity-on-hover"
                  >
                    <IconButton
                      icon={SvgEdit}
                      internal
                      onClick={startEditing}
                      tooltip="Edit project name"
                    />
                  </Hoverable.Item>
                </>
              )}
            </div>
          </Hoverable.Root>
        </div>

        <Separator className="py-0" />
        <div className="flex flex-row gap-2 justify-between">
          <div className="min-w-0 flex-1">
            <Text as="p" headingH3 text04>
              Instructions
            </Text>
            {isLoadingProjectDetails && !currentProjectDetails ? (
              <div className="h-5 w-3/4 rounded bg-background-tint-02 animate-pulse" />
            ) : currentProjectDetails?.project?.instructions ? (
              <Text as="p" text02 secondaryBody className="truncate">
                {currentProjectDetails.project.instructions}
              </Text>
            ) : (
              <Text as="p" text02 secondaryBody className="truncate">
                Add instructions to tailor the response in this project.
              </Text>
            )}
          </div>
          <Button
            prominence="tertiary"
            icon={SvgAddLines}
            onClick={() => addInstructionModal.toggle(true)}
          >
            Set Instructions
          </Button>
        </div>
        <div
          className="flex flex-col gap-2 "
          {...getRootProps({ onClick: (e) => e.stopPropagation() })}
        >
          <div className="flex flex-row gap-2 justify-between">
            <div>
              <Text as="p" headingH3 text04>
                Files
              </Text>
              <Text as="p" text02 secondaryBody>
                Chats in this project can access these files.
              </Text>
            </div>
            <FilePickerPopover
              trigger={(open) => (
                // The `secondary={undefined}` is required here because `CreateButton` sets it to true.
                // Therefore, we need to first remove the truthiness before passing in the other `tertiary` flag.
                <CreateButton secondary={undefined} tertiary transient={open}>
                  Add Files
                </CreateButton>
              )}
              onFileClick={handleOnView}
              onPickRecent={async (file) => {
                if (file.status === UserFileStatus.UPLOADING) return;
                if (file.status === UserFileStatus.DELETING) return;
                if (!currentProjectId) return;
                if (!linkFileToProject) return;
                linkFileToProject(currentProjectId, file);
              }}
              onUnpickRecent={async (file) => {
                if (!currentProjectId) return;
                await unlinkFileFromProject(currentProjectId, file.id);
              }}
              handleUploadChange={handleUploadChange}
              selectedFileIds={(allCurrentProjectFiles || []).map((f) => f.id)}
            />
          </div>
          {/* Hidden input just to satisfy dropzone contract; we rely on FilePicker for clicks */}
          <input {...getInputProps()} />

          {isLoadingProjectDetails && !currentProjectDetails ? (
            <>
              {/* Mobile / small screens: show skeleton */}
              <div className="sm:hidden">
                <div className="w-full h-[68px] rounded-xl bg-background-tint-02 animate-pulse" />
              </div>

              {/* Desktop / larger screens: show skeleton file cards */}
              <div className="hidden sm:flex gap-1">
                <FileCardSkeleton />
                <FileCardSkeleton />
                <FileCardSkeleton />
                <FileCardSkeleton />
              </div>
            </>
          ) : allCurrentProjectFiles.length > 0 ? (
            <>
              {/* Mobile / small screens: just show a button to view files */}
              <div className="sm:hidden">
                <button
                  className="w-full rounded-xl px-3 py-3 text-left bg-transparent hover:bg-accent-background-hovered hover:dark:bg-neutral-800/75 transition-colors"
                  onClick={() => projectFilesModal.toggle(true)}
                >
                  <div className="flex flex-col overflow-hidden">
                    <div className="flex items-center justify-between gap-2 w-full">
                      <Text as="p" text04 secondaryAction>
                        View files
                      </Text>
                      <SvgFiles className="h-5 w-5 stroke-text-02" />
                    </div>
                    <Text as="p" text03 secondaryBody>
                      {displayFileCount} files
                    </Text>
                  </div>
                </button>
              </div>

              {/* Desktop / larger screens: show previews with optional View All */}
              <div className="hidden sm:flex gap-1 relative items-center">
                {(() => {
                  return allCurrentProjectFiles.slice(0, 4).map((f) => (
                    <div key={f.id}>
                      <FileCard
                        file={f}
                        removeFile={async (fileId: string) => {
                          if (!currentProjectId) return;
                          await unlinkFileFromProject(currentProjectId, fileId);
                        }}
                        onFileClick={handleOnView}
                        compactImages={shouldCompactImages}
                      />
                    </div>
                  ));
                })()}
                {totalFiles > 4 && (
                  <button
                    className="rounded-xl px-3 py-1 text-left transition-colors hover:bg-background-tint-02"
                    onClick={() => projectFilesModal.toggle(true)}
                  >
                    <div className="flex flex-col overflow-hidden h-12 p-1">
                      <div className="flex items-center justify-between gap-2 w-full">
                        <Text as="p" text04 secondaryAction>
                          View All
                        </Text>
                        <SvgFiles className="h-5 w-5 stroke-text-02" />
                      </div>
                      <Text as="p" text03 secondaryBody>
                        {displayFileCount} files
                      </Text>
                    </div>
                  </button>
                )}
                {isDragActive && (
                  <div className="pointer-events-none absolute inset-0 rounded-lg border-2 border-dashed border-action-link-05" />
                )}
              </div>
              {projectTokenCount > availableContextTokens && (
                <Text as="p" text02 secondaryBody>
                  This project exceeds the model&apos;s context limits. Sessions
                  will automatically search for relevant files first before
                  generating response.
                </Text>
              )}
            </>
          ) : (
            <div
              className={`h-12 rounded-lg border border-dashed ${
                isDragActive
                  ? "bg-action-link-01 border-action-link-05"
                  : "border-border-01"
              } flex items-center pl-2`}
            >
              <p
                className={`font-secondary-body ${
                  isDragActive ? "text-action-link-05" : "text-text-02 "
                }`}
              >
                {isDragActive
                  ? "Drop files here to add to this project"
                  : "Add documents, texts, or images to use in the project. Drag & drop supported."}
              </p>
            </div>
          )}
        </div>
      </div>
    </>
  );
}


================================================
FILE: web/src/app/app/components/projects/project_utils.ts
================================================
export function formatRelativeTime(isoDate: string): string {
  const date = new Date(isoDate);
  const now = new Date();
  const diffMs = now.getTime() - date.getTime();

  const seconds = Math.floor(diffMs / 1000);
  if (seconds < 45) return "just now";
  const minutes = Math.floor(seconds / 60);
  if (minutes < 60) return `${minutes} min${minutes === 1 ? "" : "s"} ago`;
  const hours = Math.floor(minutes / 60);
  if (hours < 24) return `${hours} hour${hours === 1 ? "" : "s"} ago`;
  const days = Math.floor(hours / 24);
  if (days < 30) return `${days} day${days === 1 ? "" : "s"} ago`;
  const months = Math.floor(days / 30);
  if (months < 12) return `${months} month${months === 1 ? "" : "s"} ago`;
  const years = Math.floor(months / 12);
  return `${years} year${years === 1 ? "" : "s"} ago`;
}


================================================
FILE: web/src/app/app/components/tools/GeneratingImageDisplay.tsx
================================================
import React, { useState, useEffect, useRef } from "react";

export default function GeneratingImageDisplay({ isCompleted = false }) {
  const [progress, setProgress] = useState(0);
  const progressRef = useRef(0);
  const animationRef = useRef<number | null>(null);
  const startTimeRef = useRef<number>(Date.now());

  useEffect(() => {
    // Animation setup
    let lastUpdateTime = 0;
    const updateInterval = 500;
    const animationDuration = 30000;

    const animate = (timestamp: number) => {
      const elapsedTime = timestamp - startTimeRef.current;

      // Calculate progress using logarithmic curve
      const maxProgress = 99.9;
      const progress =
        maxProgress * (1 - Math.exp(-elapsedTime / animationDuration));

      // Update progress if enough time has passed
      if (timestamp - lastUpdateTime > updateInterval) {
        progressRef.current = progress;
        setProgress(Math.round(progress * 10) / 10);
        lastUpdateTime = timestamp;
      }

      // Continue animation if not completed
      if (!isCompleted && elapsedTime < animationDuration) {
        animationRef.current = requestAnimationFrame(animate);
      }
    };

    // Start animation
    startTimeRef.current = performance.now();
    animationRef.current = requestAnimationFrame(animate);

    // Cleanup function
    return () => {
      if (animationRef.current) {
        cancelAnimationFrame(animationRef.current);
      }
    };
  }, [isCompleted]);

  // Handle completion
  useEffect(() => {
    if (isCompleted) {
      if (animationRef.current) {
        cancelAnimationFrame(animationRef.current);
      }
      setProgress(100);
    }
  }, [isCompleted]);

  return (
    <div className="object-cover object-center border border-background-200 bg-background-100 items-center justify-center overflow-hidden flex rounded-lg w-96 h-96 transition-opacity duration-300 opacity-100">
      <div className="m-auto relative flex">
        <svg className="w-16 h-16 transform -rotate-90" viewBox="0 0 100 100">
          <circle
            className="text-text-200"
            strokeWidth="8"
            stroke="currentColor"
            fill="transparent"
            r="44"
            cx="50"
            cy="50"
          />
          <circle
            className="text-text-800 transition-all duration-300"
            strokeWidth="8"
            strokeDasharray={276.46}
            strokeDashoffset={276.46 * (1 - progress / 100)}
            strokeLinecap="round"
            stroke="currentColor"
            fill="transparent"
            r="44"
            cx="50"
            cy="50"
          />
        </svg>
        <div className="absolute inset-0 flex items-center justify-center">
          <svg
            className="w-6 h-6 text-text-500 animate-pulse-strong"
            fill="none"
            viewBox="0 0 24 24"
            stroke="currentColor"
          >
            <path
              strokeLinecap="round"
              strokeLinejoin="round"
              strokeWidth="2"
              d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z"
            />
          </svg>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/app/components/tools/constants.ts
================================================
import type { IconProps } from "@opal/types";
import { SvgCode, SvgGlobe, SvgImage, SvgLink, SvgSearch } from "@opal/icons";

// Tool names as referenced by tool results / tool calls
export const SEARCH_TOOL_NAME = "run_search";
export const INTERNET_SEARCH_TOOL_NAME = "run_internet_search";
export const IMAGE_GENERATION_TOOL_NAME = "run_image_generation";
export const PYTHON_TOOL_NAME = "run_python";
export const OPEN_URL_TOOL_NAME = "open_url";

// In-code tool IDs that also correspond to the tool's name when associated with a persona
export const SEARCH_TOOL_ID = "SearchTool";
export const IMAGE_GENERATION_TOOL_ID = "ImageGenerationTool";
export const WEB_SEARCH_TOOL_ID = "WebSearchTool";
export const PYTHON_TOOL_ID = "PythonTool";
export const OPEN_URL_TOOL_ID = "OpenURLTool";
export const FILE_READER_TOOL_ID = "FileReaderTool";

// Icon mappings for system tools
export const SYSTEM_TOOL_ICONS: Record<
  string,
  React.FunctionComponent<IconProps>
> = {
  [SEARCH_TOOL_ID]: SvgSearch,
  [WEB_SEARCH_TOOL_ID]: SvgGlobe,
  [IMAGE_GENERATION_TOOL_ID]: SvgImage,
  [PYTHON_TOOL_ID]: SvgCode,
  [OPEN_URL_TOOL_ID]: SvgLink,
};


================================================
FILE: web/src/app/app/interfaces.ts
================================================
import {
  OnyxDocument,
  Filters,
  SearchOnyxDocument,
  StreamStopReason,
} from "@/lib/search/interfaces";
import { Packet } from "./services/streamingModels";

export type FeedbackType = "like" | "dislike";
export type ChatState =
  | "input"
  | "loading"
  | "streaming"
  | "toolBuilding"
  | "uploading";
export interface RegenerationState {
  regenerating: boolean;
  finalMessageIndex: number;
}

export enum RetrievalType {
  None = "none",
  Search = "search",
  SelectedDocs = "selectedDocs",
}

export enum ResearchType {
  LegacyAgentic = "LEGACY_AGENTIC",
  Thoughtful = "THOUGHTFUL",
  Deep = "DEEP",
  Fast = "FAST",
}

export enum ChatSessionSharedStatus {
  Private = "private",
  Public = "public",
}

export interface ChatSessionSummary {
  id: string;
  name: string | null;
  persona_id: number | null;
  time_created: string;
  shared_status: ChatSessionSharedStatus;
  current_alternate_model: string | null;
  current_temperature_override: number | null;
  highlights?: string[];
}

export interface ChatSessionGroup {
  title: string;
  chats: ChatSessionSummary[];
}

export interface ChatSearchResponse {
  groups: ChatSessionGroup[];
  has_more: boolean;
  next_page: number | null;
}

// The number of messages to buffer on the client side.
export const BUFFER_COUNT = 35;

export interface RetrievalDetails {
  run_search: "always" | "never" | "auto";
  real_time: boolean;
  filters?: Filters;
  enable_auto_detect_filters?: boolean | null;
}

// Citation number -> Document ID (allows O(1) lookup when rendering citations)
export type CitationMap = { [citation_num: number]: string };

export enum ChatFileType {
  IMAGE = "image",
  DOCUMENT = "document",
  PLAIN_TEXT = "plain_text",
  TABULAR = "tabular",
  USER_KNOWLEDGE = "user_knowledge",
}

export const isTextFile = (fileType: ChatFileType) =>
  [
    ChatFileType.PLAIN_TEXT,
    ChatFileType.TABULAR,
    ChatFileType.USER_KNOWLEDGE,
    ChatFileType.DOCUMENT,
  ].includes(fileType);

export interface FileDescriptor {
  id: string;
  type: ChatFileType;
  name?: string | null;

  user_file_id?: string | null;
  // FE only
  isUploading?: boolean;
}

export interface FileDescriptorWithHighlights extends FileDescriptor {
  match_highlights: string[];
}

export interface LLMRelevanceFilterPacket {
  relevant_chunk_indices: number[];
}

export interface ToolCallMetadata {
  tool_name: string;
  tool_args: Record<string, any>;
  tool_result?: Record<string, any>;
}

export interface ToolCallFinalResult {
  tool_name: string;
  tool_args: Record<string, any>;
  tool_result: Record<string, any>;
}

export interface ChatSession {
  id: string;
  name: string;
  persona_id: number;
  time_created: string;
  time_updated: string;
  shared_status: ChatSessionSharedStatus;
  project_id: number | null;
  current_alternate_model: string;
  current_temperature_override: number | null;
}

export interface SearchSession {
  search_session_id: string;
  documents: SearchOnyxDocument[];
  messages: BackendMessage[];
  description: string;
}

export interface Message {
  is_generating?: boolean;
  messageId?: number;
  nodeId: number; // Unique identifier for tree structure (can be negative for temp messages)
  message: string;
  type: "user" | "assistant" | "system" | "error"; // TODO: rename "assistant" to "agent" — https://linear.app/onyx-app/issue/ENG-3766
  retrievalType?: RetrievalType;
  researchType?: ResearchType;
  query?: string | null;
  files: FileDescriptor[];
  toolCall: ToolCallMetadata | null;
  // for rebuilding the message tree - these now use nodeId
  parentNodeId: number | null;
  childrenNodeIds?: number[];
  latestChildNodeId?: number | null;
  alternateAgentID?: number | null;
  stackTrace?: string | null;
  errorCode?: string | null;
  isRetryable?: boolean;
  errorDetails?: Record<string, any> | null;
  overridden_model?: string;
  stopReason?: StreamStopReason | null;

  // Multi-model answer generation
  preferredResponseId?: number | null;
  modelDisplayName?: string | null;

  // new gen
  packets: Packet[];
  packetCount?: number; // Tracks packet count for React memo comparison (avoids reading from mutated array)

  // cached values for easy access
  documents?: OnyxDocument[] | null;
  citations?: CitationMap;

  // feedback state
  currentFeedback?: FeedbackType | null;

  // Duration in seconds for processing this message (agent messages only)
  processingDurationSeconds?: number;
}

export interface BackendChatSession {
  chat_session_id: string;
  description: string;
  persona_id: number;
  persona_name: string;
  messages: BackendMessage[];
  time_created: string;
  time_updated: string;
  shared_status: ChatSessionSharedStatus;
  current_temperature_override: number | null;
  current_alternate_model?: string;

  owner_name: string | null;
  packets: Packet[][];
}

export function toChatSession(backend: BackendChatSession): ChatSession {
  return {
    id: backend.chat_session_id,
    name: backend.description,
    persona_id: backend.persona_id,
    time_created: backend.time_created,
    time_updated: backend.time_updated,
    shared_status: backend.shared_status,
    project_id: null,
    current_alternate_model: backend.current_alternate_model ?? "",
    current_temperature_override: backend.current_temperature_override,
  };
}

export interface BackendMessage {
  message_id: number;
  message_type: string;
  research_type: string | null;
  parent_message: number | null;
  latest_child_message: number | null;
  message: string;
  rephrased_query: string | null;
  // Backend sends context_docs as a flat array of documents
  context_docs: OnyxDocument[] | null;
  time_sent: string;
  overridden_model: string;
  alternate_assistant_id: number | null; // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766
  chat_session_id: string;
  citations: CitationMap | null;
  files: FileDescriptor[];
  tool_call: ToolCallFinalResult | null;
  current_feedback: string | null;
  // Duration in seconds for processing this message (agent messages only)
  processing_duration_seconds?: number;

  sub_questions: SubQuestionDetail[];
  // Keeping existing properties
  comments: any;
  parentMessageId: number | null;
  refined_answer_improvement: boolean | null;
  is_agentic: boolean | null;
  // Multi-model answer generation
  preferred_response_id: number | null;
  model_display_name: string | null;
}

export interface MessageResponseIDInfo {
  type: "message_id_info";
  user_message_id: number | null;
  reserved_assistant_message_id: number; // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766
}

export interface ModelResponseSlot {
  message_id: number;
  model_name: string;
}

export interface MultiModelMessageResponseIDInfo {
  type: "multi_model_message_id_info";
  user_message_id: number | null;
  responses: ModelResponseSlot[];
}

export interface UserKnowledgeFilePacket {
  user_files: FileDescriptor[];
}

export interface DocumentsResponse {
  top_documents: OnyxDocument[];
  rephrased_query: string | null;
  level?: number | null;
  level_question_num?: number | null;
}

export interface FileChatDisplay {
  file_ids: string[];
}

export interface StreamingError {
  error: string;
  stack_trace: string;
  error_code?: string;
  is_retryable?: boolean;
  details?: Record<string, any>;
}

export interface InputPrompt {
  id: number;
  prompt: string;
  content: string;
  active: boolean;
  is_public: boolean;
}

export interface EditPromptModalProps {
  onClose: () => void;

  promptId: number;
  editInputPrompt: (
    promptId: number,
    values: CreateInputPromptRequest
  ) => Promise<void>;
}
export interface CreateInputPromptRequest {
  prompt: string;
  content: string;
}

export interface AddPromptModalProps {
  onClose: () => void;
  onSubmit: (promptData: CreateInputPromptRequest) => void;
}
export interface PromptData {
  id: number;
  prompt: string;
  content: string;
}

/**
 * // Start of Selection
 */

export interface BaseQuestionIdentifier {
  level: number;
  level_question_num: number;
}

export interface SubQuestionDetail extends BaseQuestionIdentifier {
  question: string;
  answer: string;
  sub_queries?: SubQueryDetail[] | null;
  context_docs?: { top_documents: OnyxDocument[] } | null;
  is_complete?: boolean;
  is_stopped?: boolean;
  answer_streaming?: boolean;
}

export interface SubQueryDetail {
  query: string;
  query_id: number;
  doc_ids?: number[] | null;
}


================================================
FILE: web/src/app/app/layout.tsx
================================================
import { redirect } from "next/navigation";
import type { Route } from "next";
import { unstable_noStore as noStore } from "next/cache";
import { requireAuth } from "@/lib/auth/requireAuth";
import { ProjectsProvider } from "@/providers/ProjectsContext";
import { VoiceModeProvider } from "@/providers/VoiceModeProvider";
import AppSidebar from "@/sections/sidebar/AppSidebar";

export interface LayoutProps {
  children: React.ReactNode;
}

export default async function Layout({ children }: LayoutProps) {
  noStore();

  // Only check authentication - data fetching is done client-side via SWR hooks
  const authResult = await requireAuth();

  if (authResult.redirect) {
    redirect(authResult.redirect as Route);
  }

  return (
    <ProjectsProvider>
      {/* VoiceModeProvider wraps the full app layout so TTS playback state
          persists across page navigations (e.g., sidebar clicks during playback).
          It only activates WebSocket connections when TTS is actually triggered. */}
      <VoiceModeProvider>
        <div className="flex flex-row w-full h-full">
          <AppSidebar />
          {children}
        </div>
      </VoiceModeProvider>
    </ProjectsProvider>
  );
}


================================================
FILE: web/src/app/app/message/BlinkingBar.tsx
================================================
import { cn } from "@/lib/utils";

export function BlinkingBar({ addMargin = false }: { addMargin?: boolean }) {
  return (
    <span
      className={cn(
        "animate-pulse flex-none bg-theme-primary-05 relative top-[0.15rem] inline-block w-[0.5rem] h-[1rem]",
        addMargin && "ml-1"
      )}
    ></span>
  );
}


================================================
FILE: web/src/app/app/message/CodeBlock.tsx
================================================
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import React, { useState, ReactNode, useCallback, useMemo, memo } from "react";
import { SvgCheck, SvgCode, SvgCopy } from "@opal/icons";

interface CodeBlockProps {
  className?: string;
  children?: ReactNode;
  codeText: string;
  showHeader?: boolean;
  noPadding?: boolean;
}

const MemoizedCodeLine = memo(({ content }: { content: ReactNode }) => (
  <>{content}</>
));

export const CodeBlock = memo(function CodeBlock({
  className = "",
  children,
  codeText,
  showHeader = true,
  noPadding = false,
}: CodeBlockProps) {
  const [copied, setCopied] = useState(false);

  const language = useMemo(() => {
    return className
      .split(" ")
      .filter((cls) => cls.startsWith("language-"))
      .map((cls) => cls.replace("language-", ""))
      .join(" ");
  }, [className]);

  const handleCopy = useCallback(() => {
    if (!codeText) return;
    navigator.clipboard.writeText(codeText).then(() => {
      setCopied(true);
      setTimeout(() => setCopied(false), 2000);
    });
  }, [codeText]);

  const CopyButton = () => (
    <div
      className="ml-auto cursor-pointer select-none"
      onMouseDown={handleCopy}
    >
      {copied ? (
        <div className="flex items-center space-x-2">
          <SvgCheck height={14} width={14} stroke="currentColor" />
          <Text as="p" secondaryMono>
            Copied!
          </Text>
        </div>
      ) : (
        <div className="flex items-center space-x-2">
          <SvgCopy height={14} width={14} stroke="currentColor" />
          <Text as="p" secondaryMono>
            Copy
          </Text>
        </div>
      )}
    </div>
  );

  if (typeof children === "string" && !language) {
    return (
      <span
        data-testid="code-block"
        className={cn(
          "font-mono",
          "text-text-05",
          "bg-background-tint-00",
          "rounded",
          "text-[0.75em]",
          "inline",
          "whitespace-pre-wrap",
          "break-words",
          "py-0.5",
          "px-1",
          className
        )}
      >
        {children}
      </span>
    );
  }

  const CodeContent = () => {
    if (!language) {
      return (
        <pre className="!p-2 m-0 overflow-x-auto w-0 min-w-full hljs">
          <code className={`text-sm hljs ${className}`}>
            {Array.isArray(children)
              ? children.map((child, index) => (
                  <MemoizedCodeLine key={index} content={child} />
                ))
              : children}
          </code>
        </pre>
      );
    }

    return (
      <pre className="!p-2 m-0 overflow-x-auto w-0 min-w-full hljs">
        <code className="text-xs">
          {Array.isArray(children)
            ? children.map((child, index) => (
                <MemoizedCodeLine key={index} content={child} />
              ))
            : children}
        </code>
      </pre>
    );
  };

  return (
    <>
      {showHeader ? (
        <div
          className={cn(
            "bg-background-tint-00 rounded-12 max-w-full min-w-0",
            !noPadding && "px-1 pb-1"
          )}
        >
          {language && (
            <div className="flex items-center px-2 py-1 text-sm text-text-04 gap-x-2">
              <SvgCode
                height={12}
                width={12}
                stroke="currentColor"
                className="my-auto"
              />
              <Text secondaryMono>{language}</Text>
              {codeText && <CopyButton />}
            </div>
          )}
          <CodeContent />
        </div>
      ) : (
        <CodeContent />
      )}
    </>
  );
});

CodeBlock.displayName = "CodeBlock";
MemoizedCodeLine.displayName = "MemoizedCodeLine";


================================================
FILE: web/src/app/app/message/FileDisplay.tsx
================================================
"use client";

import { ReactNode, useState } from "react";
import { cn } from "@/lib/utils";
import { ChatFileType, FileDescriptor } from "@/app/app/interfaces";
import Attachment from "@/refresh-components/Attachment";
import { InMessageImage } from "@/app/app/components/files/images/InMessageImage";
import CsvContent from "@/components/tools/CSVContent";
import PreviewModal from "@/sections/modals/PreviewModal";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import ExpandableContentWrapper from "@/components/tools/ExpandableContentWrapper";

interface FileContainerProps {
  children: ReactNode;
  className?: string;
  id?: string;
}

interface FileDisplayProps {
  files: FileDescriptor[];
}

function FileContainer({ children, className, id }: FileContainerProps) {
  return (
    <div
      id={id}
      className={cn("flex w-full flex-col items-end gap-2 py-2", className)}
    >
      {children}
    </div>
  );
}

export default function FileDisplay({ files }: FileDisplayProps) {
  const [close, setClose] = useState(true);
  const [previewingFile, setPreviewingFile] = useState<FileDescriptor | null>(
    null
  );
  const textFiles = files.filter(
    (file) =>
      file.type === ChatFileType.PLAIN_TEXT ||
      file.type === ChatFileType.DOCUMENT
  );
  const imageFiles = files.filter((file) => file.type === ChatFileType.IMAGE);
  // TODO(danelegend): XLSX files are binary (OOXML) and will fail to parse in CsvContent.
  // The backend should convert XLSX to CSV text before serving via /api/chat/file,
  // or XLSX should be split into a separate ChatFileType and rendered as an Attachment.
  const tabularFiles = files.filter(
    (file) => file.type === ChatFileType.TABULAR
  );

  const presentingDocument: MinimalOnyxDocument = {
    document_id: previewingFile?.id ?? "",
    semantic_identifier: previewingFile?.name ?? "",
  };

  return (
    <>
      {previewingFile && (
        <PreviewModal
          presentingDocument={presentingDocument}
          onClose={() => setPreviewingFile(null)}
        />
      )}

      {textFiles.length > 0 && (
        <FileContainer id="onyx-file">
          {textFiles.map((file) => (
            <Attachment
              key={file.id}
              fileName={file.name || file.id}
              open={() => setPreviewingFile(file)}
            />
          ))}
        </FileContainer>
      )}

      {imageFiles.length > 0 && (
        <FileContainer id="onyx-image">
          {imageFiles.map((file) => (
            <InMessageImage key={file.id} fileId={file.id} />
          ))}
        </FileContainer>
      )}

      {tabularFiles.length > 0 && (
        <FileContainer className="overflow-auto">
          {tabularFiles.map((file) =>
            close ? (
              <ExpandableContentWrapper
                key={file.id}
                fileDescriptor={file}
                close={() => setClose(false)}
                ContentComponent={CsvContent}
              />
            ) : (
              <Attachment
                key={file.id}
                open={() => setClose(true)}
                fileName={file.name || file.id}
              />
            )
          )}
        </FileContainer>
      )}
    </>
  );
}


================================================
FILE: web/src/app/app/message/HumanMessage.tsx
================================================
"use client";

import React, { useEffect, useMemo, useRef, useState } from "react";
import { FileDescriptor } from "@/app/app/interfaces";
import "katex/dist/katex.min.css";
import MessageSwitcher from "@/app/app/message/MessageSwitcher";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import useScreenSize from "@/hooks/useScreenSize";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import { Button } from "@opal/components";
import { SvgEdit } from "@opal/icons";
import { Hoverable } from "@opal/core";
import FileDisplay from "./FileDisplay";

interface MessageEditingProps {
  content: string;
  onSubmitEdit: (editedContent: string) => void;
  onCancelEdit: () => void;
}

function MessageEditing({
  content,
  onSubmitEdit,
  onCancelEdit,
}: MessageEditingProps) {
  const textareaRef = useRef<HTMLTextAreaElement>(null);
  const [editedContent, setEditedContent] = useState(content);

  useEffect(() => {
    if (!textareaRef.current) return;

    // Focus the textarea
    textareaRef.current.focus();
    textareaRef.current.select();
  }, []);

  function handleSubmit() {
    onSubmitEdit(editedContent);
  }

  function handleCancel() {
    setEditedContent(content);
    onCancelEdit();
  }

  return (
    <div className="w-full">
      <div
        className={cn(
          "w-full h-full border rounded-16 overflow-hidden p-3 flex flex-col gap-2"
        )}
      >
        <textarea
          ref={textareaRef}
          className={cn(
            "w-full h-full resize-none outline-none bg-transparent overflow-y-scroll whitespace-normal break-word"
          )}
          aria-multiline
          role="textarea"
          value={editedContent}
          style={{ scrollbarWidth: "thin" }}
          onChange={(e) => {
            setEditedContent(e.target.value);
            textareaRef.current!.style.height = "auto";
            e.target.style.height = `${e.target.scrollHeight}px`;
          }}
          onKeyDown={(e) => {
            if (e.key === "Escape") {
              e.preventDefault();
              handleCancel();
            }
            // Submit edit if "Command Enter" is pressed, like in ChatGPT
            if (e.key === "Enter" && e.metaKey) handleSubmit();
          }}
        />
        <div className="flex justify-end gap-1">
          <Button onClick={handleSubmit}>Submit</Button>
          <Button prominence="secondary" onClick={handleCancel}>
            Cancel
          </Button>
        </div>
      </div>
    </div>
  );
}

interface HumanMessageProps {
  // Content and display
  content: string;
  files?: FileDescriptor[];

  // Message navigation - nodeId for tree position, messageId for editing
  nodeId: number;
  messageId?: number | null;
  otherMessagesCanSwitchTo?: number[];
  onMessageSelection?: (nodeId: number) => void;

  // Editing functionality - takes (editedContent, messageId) to allow stable callback reference
  onEdit?: (editedContent: string, messageId: number) => void;

  // Streaming and generation
  stopGenerating?: () => void;
  disableSwitchingForStreaming?: boolean;
}

// Memoization comparison - compare by value for primitives, by reference for objects/arrays
function arePropsEqual(
  prev: HumanMessageProps,
  next: HumanMessageProps
): boolean {
  return (
    prev.content === next.content &&
    prev.nodeId === next.nodeId &&
    prev.messageId === next.messageId &&
    prev.files === next.files &&
    prev.disableSwitchingForStreaming === next.disableSwitchingForStreaming &&
    prev.otherMessagesCanSwitchTo === next.otherMessagesCanSwitchTo &&
    prev.onEdit === next.onEdit
    // Skip: stopGenerating, onMessageSelection (inline function props)
  );
}

const HumanMessage = React.memo(function HumanMessage({
  content: initialContent,
  files,
  nodeId,
  messageId,
  otherMessagesCanSwitchTo,
  onEdit,
  onMessageSelection,
  stopGenerating = () => null,
  disableSwitchingForStreaming = false,
}: HumanMessageProps) {
  // TODO (@raunakab):
  //
  // This is some duplicated state that is patching a memoization issue with `HumanMessage`.
  // Fix this later.
  const [content, setContent] = useState(initialContent);

  const [isEditing, setIsEditing] = useState(false);
  const { isMobile } = useScreenSize();

  // Use nodeId for switching (finding position in siblings)
  const indexInSiblings = otherMessagesCanSwitchTo?.indexOf(nodeId);
  // indexOf returns -1 if not found, treat that as undefined
  const currentMessageInd =
    indexInSiblings !== undefined && indexInSiblings !== -1
      ? indexInSiblings
      : undefined;

  const getPreviousMessage = () => {
    if (
      currentMessageInd !== undefined &&
      currentMessageInd > 0 &&
      otherMessagesCanSwitchTo
    ) {
      return otherMessagesCanSwitchTo[currentMessageInd - 1];
    }
    return undefined;
  };

  const getNextMessage = () => {
    if (
      currentMessageInd !== undefined &&
      currentMessageInd < (otherMessagesCanSwitchTo?.length || 0) - 1 &&
      otherMessagesCanSwitchTo
    ) {
      return otherMessagesCanSwitchTo[currentMessageInd + 1];
    }
    return undefined;
  };

  const copyEditButtonContent = useMemo(
    () => (
      <div className="flex flex-row flex-shrink px-1">
        <CopyIconButton
          getCopyText={() => content}
          prominence="tertiary"
          data-testid="HumanMessage/copy-button"
        />
        <Button
          icon={SvgEdit}
          prominence="tertiary"
          tooltip="Edit"
          onClick={() => setIsEditing(true)}
          data-testid="HumanMessage/edit-button"
        />
      </div>
    ),
    [content]
  );

  const copyEditButton = (
    <Hoverable.Item group="humanMessage" variant="opacity-on-hover">
      {copyEditButtonContent}
    </Hoverable.Item>
  );

  return (
    <Hoverable.Root group="humanMessage" widthVariant="full">
      <div
        id="onyx-human-message"
        className="flex flex-col justify-end w-full relative"
      >
        <FileDisplay files={files || []} />
        {isEditing ? (
          <MessageEditing
            content={content}
            onSubmitEdit={(editedContent) => {
              // Don't update UI for edits that can't be persisted
              if (messageId === undefined || messageId === null) {
                setIsEditing(false);
                return;
              }
              onEdit?.(editedContent, messageId);
              setContent(editedContent);
              setIsEditing(false);
            }}
            onCancelEdit={() => setIsEditing(false)}
          />
        ) : (
          <div className="flex justify-end">
            {onEdit && !isMobile && copyEditButton}
            <div className="md:max-w-[37.5rem]">
              <div
                className={
                  "max-w-[30rem] md:max-w-[37.5rem] whitespace-break-spaces break-anywhere rounded-t-16 rounded-bl-16 bg-background-tint-02 py-2 px-3"
                }
                onCopy={(e) => {
                  const selection = window.getSelection();
                  if (selection) {
                    e.preventDefault();
                    const text = selection
                      .toString()
                      .replace(/\n{2,}/g, "\n")
                      .trim();
                    e.clipboardData.setData("text/plain", text);
                  }
                }}
              >
                <Text
                  as="p"
                  className="inline-block align-middle"
                  mainContentBody
                >
                  {content}
                </Text>
              </div>
            </div>
          </div>
        )}
        <div className="flex justify-end pt-1">
          {!isEditing && onEdit && isMobile && copyEditButton}
          {currentMessageInd !== undefined &&
            onMessageSelection &&
            otherMessagesCanSwitchTo &&
            otherMessagesCanSwitchTo.length > 1 && (
              <MessageSwitcher
                disableForStreaming={disableSwitchingForStreaming}
                currentPage={currentMessageInd + 1}
                totalPages={otherMessagesCanSwitchTo.length}
                handlePrevious={() => {
                  stopGenerating();
                  const prevMessage = getPreviousMessage();
                  if (prevMessage !== undefined) {
                    onMessageSelection(prevMessage);
                  }
                }}
                handleNext={() => {
                  stopGenerating();
                  const nextMessage = getNextMessage();
                  if (nextMessage !== undefined) {
                    onMessageSelection(nextMessage);
                  }
                }}
              />
            )}
        </div>
      </div>
    </Hoverable.Root>
  );
}, arePropsEqual);

export default HumanMessage;


================================================
FILE: web/src/app/app/message/MemoizedTextComponents.tsx
================================================
import {
  QuestionCardProps,
  DocumentCardProps,
} from "@/components/search/results/Citation";
import {
  LoadedOnyxDocument,
  MinimalOnyxDocument,
  OnyxDocument,
} from "@/lib/search/interfaces";
import React, { memo, JSX, useMemo, useCallback } from "react";
import { SourceIcon } from "@/components/SourceIcon";
import { WebResultIcon } from "@/components/WebResultIcon";
import { SubQuestionDetail, CitationMap } from "../interfaces";
import { ValidSources } from "@/lib/types";
import { ProjectFile } from "../projects/projectsService";
import { BlinkingBar } from "./BlinkingBar";
import Text from "@/refresh-components/texts/Text";
import SourceTag from "@/refresh-components/buttons/source-tag/SourceTag";
import {
  documentToSourceInfo,
  questionToSourceInfo,
  getDisplayNameForSource,
} from "@/refresh-components/buttons/source-tag/sourceTagUtils";
import { openDocument } from "@/lib/search/utils";
import { ensureHrefProtocol } from "@/lib/utils";

export const MemoizedAnchor = memo(
  ({
    docs,
    subQuestions,
    openQuestion,
    userFiles,
    citations,
    href,
    updatePresentingDocument,
    children,
  }: {
    subQuestions?: SubQuestionDetail[];
    openQuestion?: (question: SubQuestionDetail) => void;
    docs?: OnyxDocument[] | null;
    userFiles?: ProjectFile[] | null;
    citations?: CitationMap;
    updatePresentingDocument: (doc: MinimalOnyxDocument) => void;
    href?: string;
    children: React.ReactNode;
  }): JSX.Element => {
    const value = children?.toString();
    if (value?.startsWith("[") && value?.endsWith("]")) {
      const match = value.match(/\[(D|Q)?(\d+)\]/);

      if (match) {
        const match_item = match[2];
        if (match_item !== undefined) {
          const isSubQuestion = match[1] === "Q";
          const isDocument = !isSubQuestion;

          const citation_num = parseInt(match_item, 10);

          // Use citation map to find the correct document
          // Citations map format: {citation_num: document_id}
          // e.g., {1: "doc_abc", 2: "doc_xyz", 3: "doc_123"}
          let associatedDoc: OnyxDocument | null = null;
          if (isDocument && docs && citations) {
            const document_id = citations[citation_num];
            if (document_id) {
              associatedDoc =
                docs.find((d) => d.document_id === document_id) || null;
            }
          }

          const associatedSubQuestion = isSubQuestion
            ? subQuestions?.[citation_num - 1]
            : undefined;

          if (!associatedDoc && !associatedSubQuestion) {
            return <>{children}</>;
          }

          let icon: React.ReactNode = null;
          if (associatedDoc?.source_type === "web") {
            icon = <WebResultIcon url={associatedDoc.link} />;
          } else {
            icon = (
              <SourceIcon
                sourceType={associatedDoc?.source_type as ValidSources}
                iconSize={18}
              />
            );
          }
          const associatedDocInfo = associatedDoc
            ? {
                ...associatedDoc,
                icon: icon as any,
                link: associatedDoc.link,
              }
            : undefined;

          return (
            <MemoizedLink
              updatePresentingDocument={updatePresentingDocument}
              href={href}
              document={associatedDocInfo}
              question={associatedSubQuestion}
              openQuestion={openQuestion}
            >
              {children}
            </MemoizedLink>
          );
        }
      }
    }
    return (
      <MemoizedLink
        updatePresentingDocument={updatePresentingDocument}
        href={href}
      >
        {children}
      </MemoizedLink>
    );
  }
);

export const MemoizedLink = memo(
  ({
    node,
    document,
    updatePresentingDocument,
    question,
    href,
    openQuestion,
    ...rest
  }: Partial<DocumentCardProps & QuestionCardProps> & {
    node?: any;
    [key: string]: any;
  }) => {
    const value = rest.children;

    // Convert document to SourceInfo for SourceTag
    const documentSourceInfo = useMemo(() => {
      if (!document) return null;
      return documentToSourceInfo(document as OnyxDocument);
    }, [document]);

    // Convert question to SourceInfo for SourceTag
    const questionSourceInfo = useMemo(() => {
      if (!question) return null;
      return questionToSourceInfo(question, question.level_question_num);
    }, [question]);

    // Handle click on SourceTag
    const handleSourceClick = useCallback(() => {
      if (document && updatePresentingDocument) {
        openDocument(document as OnyxDocument, updatePresentingDocument);
      } else if (question && openQuestion) {
        openQuestion(question);
      }
    }, [document, updatePresentingDocument, question, openQuestion]);

    if (value?.toString().startsWith("*")) {
      return <BlinkingBar addMargin />;
    } else if (value?.toString().startsWith("[")) {
      const sourceInfo = documentSourceInfo || questionSourceInfo;
      if (!sourceInfo) {
        return <>{rest.children}</>;
      }

      const displayName = document
        ? getDisplayNameForSource(document as OnyxDocument)
        : question?.question || "Question";

      return (
        <SourceTag
          variant="inlineCitation"
          displayName={displayName}
          sources={[sourceInfo]}
          onSourceClick={handleSourceClick}
          showDetailsCard
          className="mr-0.5"
        />
      );
    }

    const url = ensureHrefProtocol(href);

    // Check if the link is to a file on the backend
    const isChatFile = url?.includes("/api/chat/file/");
    if (isChatFile && updatePresentingDocument) {
      const fileId = url!.split("/api/chat/file/")[1]?.split(/[?#]/)[0] || "";
      const filename = value?.toString() || "download";
      return (
        <a
          href="#"
          onClick={(e) => {
            e.preventDefault();
            updatePresentingDocument({
              document_id: fileId,
              semantic_identifier: filename,
            });
          }}
          className="cursor-pointer text-link hover:text-link-hover"
        >
          {rest.children}
        </a>
      );
    }

    return (
      <a
        href={url}
        target="_blank"
        rel="noopener noreferrer"
        className="cursor-pointer text-link hover:text-link-hover"
      >
        {rest.children}
      </a>
    );
  }
);

interface MemoizedParagraphProps {
  className?: string;
  children?: React.ReactNode;
}

export const MemoizedParagraph = memo(function MemoizedParagraph({
  className,
  children,
}: MemoizedParagraphProps) {
  return (
    <Text as="p" mainContentBody className={className}>
      {children}
    </Text>
  );
});

MemoizedAnchor.displayName = "MemoizedAnchor";
MemoizedLink.displayName = "MemoizedLink";
MemoizedParagraph.displayName = "MemoizedParagraph";


================================================
FILE: web/src/app/app/message/MessageSwitcher.tsx
================================================
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { SvgChevronLeft, SvgChevronRight } from "@opal/icons";
const DISABLED_MESSAGE = "Wait for agent message to complete";

interface MessageSwitcherProps {
  currentPage: number;
  totalPages: number;
  handlePrevious: () => void;
  handleNext: () => void;
  disableForStreaming?: boolean;
}

export default function MessageSwitcher({
  currentPage,
  totalPages,
  handlePrevious,
  handleNext,
  disableForStreaming,
}: MessageSwitcherProps) {
  const handle = (num: number, callback: () => void) =>
    disableForStreaming
      ? undefined
      : currentPage === num
        ? undefined
        : callback;
  const previous = handle(1, handlePrevious);
  const next = handle(totalPages, handleNext);

  return (
    <div
      className="flex flex-row items-center gap-1"
      data-testid="MessageSwitcher/container"
    >
      <Button
        disabled={disableForStreaming}
        icon={SvgChevronLeft}
        onClick={previous}
        prominence="tertiary"
        tooltip={disableForStreaming ? DISABLED_MESSAGE : "Previous"}
      />

      <div className="flex flex-row items-center justify-center">
        <Text as="p" text03 mainUiAction>
          {currentPage}
        </Text>
        <Text as="p" text03 mainUiAction>
          /
        </Text>
        <Text as="p" text03 mainUiAction>
          {totalPages}
        </Text>
      </div>

      <Button
        disabled={disableForStreaming}
        icon={SvgChevronRight}
        onClick={next}
        prominence="tertiary"
        tooltip={disableForStreaming ? DISABLED_MESSAGE : "Next"}
      />
    </div>
  );
}


================================================
FILE: web/src/app/app/message/Resubmit.tsx
================================================
import { useState } from "react";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import { SvgChevronDown, SvgChevronRight } from "@opal/icons";
import { Button } from "@opal/components";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import { getErrorIcon, getErrorTitle } from "./errorHelpers";

interface ResubmitProps {
  resubmit: () => void;
}

export const Resubmit: React.FC<ResubmitProps> = ({ resubmit }) => {
  return (
    <div className="flex flex-col items-center justify-center gap-y-2 mt-4">
      <p className="text-sm text-neutral-700 dark:text-neutral-300">
        There was an error with the response.
      </p>
      <Button onClick={resubmit}>Regenerate</Button>
    </div>
  );
};

export const ErrorBanner = ({
  error,
  errorCode,
  isRetryable = true,
  details,
  stackTrace,
  resubmit,
}: {
  error: string;
  errorCode?: string;
  isRetryable?: boolean;
  details?: Record<string, any>;
  stackTrace?: string | null;
  resubmit?: () => void;
}) => {
  const [isStackTraceExpanded, setIsStackTraceExpanded] = useState(false);

  return (
    <div className="text-red-700 mt-4 text-sm my-auto">
      <Alert variant="broken">
        {getErrorIcon(errorCode)}
        <AlertTitle>{getErrorTitle(errorCode)}</AlertTitle>
        <AlertDescription className="flex flex-col gap-y-1">
          <span>{error}</span>
          {details?.model && (
            <span className="text-xs text-muted-foreground">
              Model: {details.model}
              {details.provider && ` (${details.provider})`}
            </span>
          )}
          {details?.tool_name && (
            <span className="text-xs text-muted-foreground">
              Tool: {details.tool_name}
            </span>
          )}
          {stackTrace && (
            <div className="mt-2 border-t border-neutral-200 dark:border-neutral-700 pt-2">
              <div className="flex flex-1 items-center justify-between">
                <Button
                  prominence="tertiary"
                  icon={isStackTraceExpanded ? SvgChevronDown : SvgChevronRight}
                  onClick={() => setIsStackTraceExpanded(!isStackTraceExpanded)}
                >
                  Stack trace
                </Button>
                <CopyIconButton
                  prominence="tertiary"
                  getCopyText={() => stackTrace}
                />
              </div>
              {isStackTraceExpanded && (
                <pre className="mt-2 p-3 bg-neutral-100 dark:bg-neutral-800 border border-neutral-200 dark:border-neutral-700 rounded text-xs text-neutral-700 dark:text-neutral-300 overflow-auto max-h-48 whitespace-pre-wrap font-mono">
                  {stackTrace}
                </pre>
              )}
            </div>
          )}
        </AlertDescription>
      </Alert>
      {isRetryable && resubmit && <Resubmit resubmit={resubmit} />}
    </div>
  );
};


================================================
FILE: web/src/app/app/message/codeUtils.test.ts
================================================
import { preprocessLaTeX } from "./codeUtils";

describe("preprocessLaTeX", () => {
  describe("currency formatting", () => {
    it("should properly escape dollar signs in text with amounts", () => {
      const input =
        "Maria wants to buy a new laptop that costs $1,200. She has saved $800 so far. If she saves an additional $100 each month, how many months will it take her to have enough money to buy the laptop?";
      const processed = preprocessLaTeX(input);

      // Should escape all dollar signs in currency amounts
      expect(processed).toContain("costs \\$1,200");
      expect(processed).toContain("saved \\$800");
      expect(processed).toContain("additional \\$100");
      expect(processed).not.toContain("costs $1,200");
    });

    it("should handle dollar signs with backslashes already present", () => {
      const input =
        "Maria wants to buy a new laptop that costs \\$1,200. She has saved \\$800 so far.";
      const processed = preprocessLaTeX(input);

      // Should preserve the existing escaped dollar signs
      expect(processed).toContain("\\$1,200");
      expect(processed).toContain("\\$800");
    });
  });

  describe("code block handling", () => {
    it("should not process dollar signs in code blocks", () => {
      const input = "```plaintext\nThe total cost is $50.\n```";
      const processed = preprocessLaTeX(input);

      // Dollar sign in code block should remain untouched
      expect(processed).toContain("The total cost is $50.");
      expect(processed).not.toContain("The total cost is \\$50.");
    });

    it("should not process dollar signs in inline code", () => {
      const input =
        'Use the `printf "$%.2f" $amount` command to format currency.';
      const processed = preprocessLaTeX(input);

      // Dollar signs in inline code should remain untouched
      expect(processed).toContain('`printf "$%.2f" $amount`');
      expect(processed).not.toContain('`printf "\\$%.2f" \\$amount`');
    });

    it("should handle mixed content with code blocks and currency", () => {
      const input =
        "The cost is $100.\n\n```javascript\nconst price = '$50';\n```\n\nThe remaining balance is $50.";
      const processed = preprocessLaTeX(input);

      // Dollar signs outside code blocks should be escaped
      expect(processed).toContain("The cost is \\$100");
      expect(processed).toContain("The remaining balance is \\$50");

      // Dollar sign in code block should be preserved
      expect(processed).toContain("const price = '$50';");
      expect(processed).not.toContain("const price = '\\$50';");
    });
  });

  describe("LaTeX handling", () => {
    it("should preserve proper LaTeX delimiters", () => {
      const input =
        "The formula $x^2 + y^2 = z^2$ represents the Pythagorean theorem.";
      const processed = preprocessLaTeX(input);

      // LaTeX delimiters should be preserved
      expect(processed).toContain("$x^2 + y^2 = z^2$");
    });

    it("should convert LaTeX block delimiters", () => {
      const input = "Consider the equation: \\[E = mc^2\\]";
      const processed = preprocessLaTeX(input);

      // Block LaTeX delimiters should be converted
      expect(processed).toContain("$$E = mc^2$$");
      expect(processed).not.toContain("\\[E = mc^2\\]");
    });

    it("should convert LaTeX inline delimiters", () => {
      const input =
        "The speed of light \\(c\\) is approximately 299,792,458 m/s.";
      const processed = preprocessLaTeX(input);

      // Inline LaTeX delimiters should be converted
      expect(processed).toContain("$c$");
      expect(processed).not.toContain("\\(c\\)");
    });
  });

  describe("special cases", () => {
    it("should handle shell variables in text", () => {
      const input =
        "In bash, you can access arguments with $1, $2, and use echo $HOME to print the home directory.";
      const processed = preprocessLaTeX(input);

      // Verify current behavior (numeric shell variables are being escaped)
      expect(processed).toContain("\\$1");
      expect(processed).toContain("\\$2");

      // But $HOME is not escaped (non-numeric)
      expect(processed).toContain("$HOME");
    });

    it("should handle shell commands with dollar signs", () => {
      const input = "Use awk '{print $2}' to print the second column.";
      const processed = preprocessLaTeX(input);

      // Dollar sign in awk command should not be escaped
      expect(processed).toContain("{print $2}");
      expect(processed).not.toContain("{print \\$2}");
    });

    it("should handle Einstein's equation with mixed LaTeX and code blocks", () => {
      const input =
        "Sure! The equation for Einstein's mass-energy equivalence, \\(E = mc^2\\), can be written in LaTeX as follows: ```latex\nE = mc^2\n``` When rendered, it looks like this: \\[ E = mc^2 \\]";
      const processed = preprocessLaTeX(input);

      // LaTeX inline delimiters should be converted
      expect(processed).toContain("equivalence, $E = mc^2$,");
      expect(processed).not.toContain("equivalence, \\(E = mc^2\\),");

      // LaTeX block delimiters should be converted
      expect(processed).toContain("it looks like this: $$ E = mc^2 $$");
      expect(processed).not.toContain("it looks like this: \\[ E = mc^2 \\]");

      // LaTeX within code blocks should remain untouched
      expect(processed).toContain("```latex\nE = mc^2\n```");
    });
  });
});


================================================
FILE: web/src/app/app/message/codeUtils.ts
================================================
import React from "react";

export function extractCodeText(
  node: any,
  content: string,
  children: React.ReactNode
): string {
  let codeText: string | null = null;

  if (
    node?.position?.start?.offset != null &&
    node?.position?.end?.offset != null
  ) {
    codeText = content
      .slice(node.position.start.offset, node.position.end.offset)
      .trim();

    // Match code block with optional language declaration
    const codeBlockMatch = codeText.match(/^```[^\n]*\n([\s\S]*?)\n?```$/);
    if (codeBlockMatch) {
      const codeTextMatch = codeBlockMatch[1];
      if (codeTextMatch !== undefined) {
        codeText = codeTextMatch;
      }
    }

    // Normalize indentation
    const codeLines = codeText.split("\n");
    const minIndent = codeLines
      .filter((line) => line.trim().length > 0)
      .reduce((min, line) => {
        const match = line.match(/^\s*/);
        return Math.min(min, match ? match[0].length : min);
      }, Infinity);

    const formattedCodeLines = codeLines.map((line) => line.slice(minIndent));
    codeText = formattedCodeLines.join("\n").trim();
  } else {
    // Fallback if position offsets are not available
    const extractTextFromReactNode = (node: React.ReactNode): string => {
      if (typeof node === "string") return node;
      if (typeof node === "number") return String(node);
      if (!node) return "";

      if (React.isValidElement(node)) {
        const children = (node.props as any).children;
        if (Array.isArray(children)) {
          return children.map(extractTextFromReactNode).join("");
        }
        return extractTextFromReactNode(children);
      }

      if (Array.isArray(node)) {
        return node.map(extractTextFromReactNode).join("");
      }

      return "";
    };

    codeText = extractTextFromReactNode(children);
  }

  return codeText || "";
}
// We must preprocess LaTeX in the LLM output to avoid improper formatting

export const preprocessLaTeX = (content: string) => {
  // First detect if content is within a code block
  const codeBlockRegex = /^```[\s\S]*?```$/;
  const isCodeBlock = codeBlockRegex.test(content.trim());

  // If the entire content is a code block, don't process LaTeX
  if (isCodeBlock) {
    return content;
  }

  // Extract code blocks and replace with placeholders
  const codeBlocks: string[] = [];
  const withCodeBlocksReplaced = content.replace(/```[\s\S]*?```/g, (match) => {
    const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
    codeBlocks.push(match);
    return placeholder;
  });

  // First, protect code-like expressions where $ is used for variables
  const codeProtected = withCodeBlocksReplaced.replace(
    /\b(\w+(?:\s*-\w+)*\s*(?:'[^']*')?)\s*\{[^}]*?\$\d+[^}]*?\}/g,
    (match) => {
      // Replace $ with a temporary placeholder in code contexts
      return match.replace(/\$/g, "___DOLLAR_PLACEHOLDER___");
    }
  );

  // Also protect common shell variable patterns like $1, $2, etc.
  const shellProtected = codeProtected.replace(
    /\b(?:print|echo|awk|sed|grep)\s+.*?\$\d+/g,
    (match) => match.replace(/\$/g, "___DOLLAR_PLACEHOLDER___")
  );

  // Protect inline code blocks with backticks
  const inlineCodeProtected = shellProtected.replace(/`[^`]+`/g, (match) => {
    return match.replace(/\$/g, "___DOLLAR_PLACEHOLDER___");
  });

  // Process LaTeX expressions now that code is protected
  // Valid LaTeX should have matching dollar signs with non-space chars surrounding content
  const processedForLatex = inlineCodeProtected.replace(
    /\$([^\s$][^$]*?[^\s$])\$/g,
    (_, equation) => `$${equation}$`
  );

  // Escape currency mentions
  const currencyEscaped = processedForLatex.replace(
    /\$(\d+(?:\.\d*)?)/g,
    (_, p1) => `\\$${p1}`
  );

  // Replace block-level LaTeX delimiters \[ \] with $$ $$
  const blockProcessed = currencyEscaped.replace(
    /\\\[([\s\S]*?)\\\]/g,
    (_, equation) => `$$${equation}$$`
  );

  // Replace inline LaTeX delimiters \( \) with $ $
  const inlineProcessed = blockProcessed.replace(
    /\\\(([\s\S]*?)\\\)/g,
    (_, equation) => `$${equation}$`
  );

  // Restore original dollar signs in code contexts
  const restoredDollars = inlineProcessed.replace(
    /___DOLLAR_PLACEHOLDER___/g,
    "$"
  );

  // Restore code blocks
  const restoredCodeBlocks = restoredDollars.replace(
    /___CODE_BLOCK_(\d+)___/g,
    (_, index) => codeBlocks[parseInt(index)] ?? ""
  );

  return restoredCodeBlocks;
};


================================================
FILE: web/src/app/app/message/copyingUtils.tsx
================================================
"use client";
import { unified } from "unified";
import remarkParse from "remark-parse";
import remarkGfm from "remark-gfm";
import remarkMath from "remark-math";
import remarkRehype from "remark-rehype";
import rehypeHighlight from "rehype-highlight";
import rehypeKatex from "rehype-katex";
import rehypeSanitize from "rehype-sanitize";
import rehypeStringify from "rehype-stringify";

export function handleCopy(
  event: React.ClipboardEvent,
  markdownRef: React.RefObject<HTMLDivElement>
) {
  // Check if we have a selection
  const selection = window.getSelection();
  if (!selection?.rangeCount) return;

  const range = selection.getRangeAt(0);

  // If selection is within our markdown container
  if (
    markdownRef.current &&
    markdownRef.current.contains(range.commonAncestorContainer)
  ) {
    event.preventDefault();

    // Clone selection to get the HTML
    const fragment = range.cloneContents();
    const tempDiv = document.createElement("div");
    tempDiv.appendChild(fragment);

    // Create clipboard data with both HTML and plain text
    event.clipboardData.setData("text/html", tempDiv.innerHTML);
    event.clipboardData.setData("text/plain", selection.toString());
  }
}

// Convert markdown tables to TSV format for spreadsheet compatibility
export function convertMarkdownTablesToTsv(content: string): string {
  const lines = content.split("\n");
  const result: string[] = [];

  for (const line of lines) {
    // Check if line is a markdown table row (starts and ends with |)
    const trimmed = line.trim();
    if (trimmed.startsWith("|") && trimmed.endsWith("|")) {
      // Check if it's a separator row (contains only |, -, :, and spaces)
      if (/^\|[\s\-:|\s]+\|$/.test(trimmed)) {
        // Skip separator rows
        continue;
      }
      // Convert table row: split by |, trim cells, join with tabs
      const placeholder = "\x00";
      const cells = trimmed
        .slice(1, -1) // Remove leading and trailing |
        .replace(/\\\|/g, placeholder) // Preserve escaped pipes
        .split("|")
        .map((cell) => cell.trim().replace(new RegExp(placeholder, "g"), "|"));
      result.push(cells.join("\t"));
    } else {
      result.push(line);
    }
  }

  return result.join("\n");
}

// For copying the entire content
export function copyAll(content: string) {
  // Convert markdown to HTML using unified ecosystem
  unified()
    .use(remarkParse)
    .use(remarkGfm)
    .use(remarkMath)
    .use(remarkRehype)
    .use(rehypeHighlight)
    .use(rehypeKatex)
    .use(rehypeSanitize)
    .use(rehypeStringify)
    .process(content)
    .then((file: any) => {
      const htmlContent = String(file);

      // Create clipboard data
      const clipboardItem = new ClipboardItem({
        "text/html": new Blob([htmlContent], { type: "text/html" }),
        "text/plain": new Blob([content], { type: "text/plain" }),
      });

      navigator.clipboard.write([clipboardItem]);
    });
}


================================================
FILE: web/src/app/app/message/custom-code-styles.css
================================================
/* Light mode syntax highlighting (Atom One Light) */
.hljs {
  color: #383a42 !important;
  background: var(--background-code-01) !important;
}

.hljs-comment,
.hljs-quote {
  color: #a0a1a7;
  font-style: italic;
}

.hljs-doctag,
.hljs-keyword,
.hljs-formula {
  color: #a626a4;
}

.hljs-section,
.hljs-name,
.hljs-selector-tag,
.hljs-deletion,
.hljs-subst {
  color: #e45649;
}

.hljs-literal {
  color: #0184bb;
}

.hljs-string,
.hljs-regexp,
.hljs-addition,
.hljs-attribute,
.hljs-meta .hljs-string {
  color: #50a14f;
}

.hljs-attr,
.hljs-variable,
.hljs-template-variable,
.hljs-type,
.hljs-selector-class,
.hljs-selector-attr,
.hljs-selector-pseudo,
.hljs-number {
  color: #986801;
}

.hljs-symbol,
.hljs-bullet,
.hljs-link,
.hljs-meta,
.hljs-selector-id,
.hljs-title {
  color: #4078f2;
}

.hljs-built_in,
.hljs-title.class_,
.hljs-class .hljs-title {
  color: #c18401;
}

.hljs-emphasis {
  font-style: italic;
}

.hljs-strong {
  font-weight: bold;
}

.hljs-link {
  text-decoration: underline;
}

/* Dark mode syntax highlighting (Atom One Dark) */
.dark .hljs {
  color: #e2e6eb !important;
  background: var(--background-code-01) !important;
}

.dark .hljs-comment,
.dark .hljs-quote {
  color: #5c6370;
  font-style: italic;
}

.dark .hljs-doctag,
.dark .hljs-keyword,
.dark .hljs-formula {
  color: #c678dd;
}

.dark .hljs-section,
.dark .hljs-name,
.dark .hljs-selector-tag,
.dark .hljs-deletion,
.dark .hljs-subst {
  color: #e06c75;
}

.dark .hljs-literal {
  color: #56b6c2;
}

.dark .hljs-string,
.dark .hljs-regexp,
.dark .hljs-addition,
.dark .hljs-attribute,
.dark .hljs-meta .hljs-string {
  color: #98c379;
}

.dark .hljs-attr,
.dark .hljs-variable,
.dark .hljs-template-variable,
.dark .hljs-type,
.dark .hljs-selector-class,
.dark .hljs-selector-attr,
.dark .hljs-selector-pseudo,
.dark .hljs-number {
  color: #d19a66;
}

.dark .hljs-symbol,
.dark .hljs-bullet,
.dark .hljs-link,
.dark .hljs-meta,
.dark .hljs-selector-id,
.dark .hljs-title {
  color: #61aeee;
}

.dark .hljs-built_in,
.dark .hljs-title.class_,
.dark .hljs-class .hljs-title {
  color: #e6c07b;
}

.dark .hljs-emphasis {
  font-style: italic;
}

.dark .hljs-strong {
  font-weight: bold;
}

.dark .hljs-link {
  text-decoration: underline;
}

pre[class*="language-"] {
  padding: 0px; /* Override padding */
  margin: 0px;
  border: none;
}

.prose :where(pre):not(:where([class~="not-prose"], [class~="not-prose"] *)) {
  padding: 0px; /* Override padding */
  margin: 0px;

  /* Override scrollbar style to match highlight.js theme */
  ::-webkit-scrollbar {
    width: 8px; /* Vertical scrollbar width */
    height: 8px; /* Horizontal scrollbar height */
  }

  /* Light mode scrollbar */
  ::-webkit-scrollbar-track {
    background: #e5e7eb; /* Light track background color */
  }

  ::-webkit-scrollbar-thumb {
    background: #c9cdd1; /* Light handle color - subtle */
    border-radius: 10px;
    transition: background 0.2s ease;
  }

  ::-webkit-scrollbar-thumb:hover {
    background: #6b7280; /* Light handle color on hover */
  }

  scrollbar-width: thin;
  scrollbar-color: #c9cdd1 #e5e7eb; /* thumb and track colors for light mode */
}

/* Light mode - highlight scrollbar when hovering code block */
.prose
  :where(pre):not(:where([class~="not-prose"], [class~="not-prose"] *)):hover {
  ::-webkit-scrollbar-thumb {
    background: #9ca3af; /* More visible on code block hover */
  }

  ::-webkit-scrollbar-thumb:hover {
    background: #6b7280;
  }

  scrollbar-color: #9ca3af #e5e7eb;
}

/* Dark mode scrollbar for code blocks */
.dark
  .prose
  :where(pre):not(:where([class~="not-prose"], [class~="not-prose"] *)) {
  ::-webkit-scrollbar-track {
    background: #1f2937; /* Dark track background color */
  }

  ::-webkit-scrollbar-thumb {
    background: #374151; /* Dark handle color - subtle */
    transition: background 0.2s ease;
  }

  ::-webkit-scrollbar-thumb:hover {
    background: #6b7280; /* Dark handle color on hover */
    box-shadow: 0 0 10px #6b7280; /* Light up effect on hover */
  }

  scrollbar-color: #374151 #1f2937; /* thumb and track colors for dark mode */
}

/* Dark mode - highlight scrollbar when hovering code block */
.dark
  .prose
  :where(pre):not(:where([class~="not-prose"], [class~="not-prose"] *)):hover {
  ::-webkit-scrollbar-thumb {
    background: #4b5563; /* More visible on code block hover */
  }

  ::-webkit-scrollbar-thumb:hover {
    background: #6b7280;
    box-shadow: 0 0 10px #6b7280;
  }

  scrollbar-color: #4b5563 #1f2937;
}

/*
 * Table breakout container - allows tables to extend beyond their parent's
 * constrained width to use the full container query width (100cqw).
 *
 * Requires an ancestor element with `container-type: inline-size` (@container in Tailwind).
 *
 * How the math works:
 * - width: 100cqw → expand to full container query width
 * - marginLeft: calc((100% - 100cqw) / 2) → negative margin pulls element left
 *   (100% is parent width, 100cqw is larger, so result is negative)
 * - paddingLeft/Right: calc((100cqw - 100%) / 2) → padding keeps content aligned
 *   with original position while allowing scroll area to extend
 */
.markdown-table-breakout {
  overflow-x: auto;
  width: 100cqw;
  margin-left: calc((100% - 100cqw) / 2);
  padding-left: calc((100cqw - 100%) / 2);
  padding-right: calc((100cqw - 100%) / 2);
}


================================================
FILE: web/src/app/app/message/errorHelpers.tsx
================================================
import { AlertCircle, Clock, Lock, Wifi, Server } from "lucide-react";

/**
 * Get the appropriate icon for a given error code
 */
export const getErrorIcon = (errorCode?: string) => {
  switch (errorCode) {
    case "RATE_LIMIT":
      return <Clock className="h-4 w-4" />;
    case "AUTH_ERROR":
    case "PERMISSION_DENIED":
      return <Lock className="h-4 w-4" />;
    case "CONNECTION_ERROR":
      return <Wifi className="h-4 w-4" />;
    case "SERVICE_UNAVAILABLE":
      return <Server className="h-4 w-4" />;
    case "BUDGET_EXCEEDED":
      return <AlertCircle className="h-4 w-4" />;
    default:
      return <AlertCircle className="h-4 w-4" />;
  }
};

/**
 * Get a human-readable title for a given error code
 */
export const getErrorTitle = (errorCode?: string) => {
  switch (errorCode) {
    case "RATE_LIMIT":
      return "Rate Limit Exceeded";
    case "AUTH_ERROR":
      return "Authentication Error";
    case "PERMISSION_DENIED":
      return "Permission Denied";
    case "CONTEXT_TOO_LONG":
      return "Message Too Long";
    case "TOOL_CALL_FAILED":
      return "Tool Error";
    case "CONNECTION_ERROR":
      return "Connection Error";
    case "SERVICE_UNAVAILABLE":
      return "Service Unavailable";
    case "INIT_FAILED":
      return "Initialization Error";
    case "VALIDATION_ERROR":
      return "Validation Error";
    case "BUDGET_EXCEEDED":
      return "Budget Exceeded";
    case "CONTENT_POLICY":
      return "Content Policy Violation";
    case "BAD_REQUEST":
      return "Invalid Request";
    case "NOT_FOUND":
      return "Resource Not Found";
    case "API_ERROR":
      return "API Error";
    default:
      return "Error";
  }
};


================================================
FILE: web/src/app/app/message/hooks.ts
================================================
import { useEffect, useRef, useState } from "react";

export function useMouseTracking() {
  const [isHovering, setIsHovering] = useState<boolean>(false);
  const trackedElementRef = useRef<HTMLDivElement>(null);
  const hoverElementRef = useRef<HTMLDivElement>(null);

  useEffect(() => {
    const handleMouseMove = (event: MouseEvent) => {
      if (trackedElementRef.current && hoverElementRef.current) {
        const trackedRect = trackedElementRef.current.getBoundingClientRect();
        const hoverRect = hoverElementRef.current.getBoundingClientRect();

        const isOverTracked =
          event.clientX >= trackedRect.left &&
          event.clientX <= trackedRect.right &&
          event.clientY >= trackedRect.top &&
          event.clientY <= trackedRect.bottom;

        const isOverHover =
          event.clientX >= hoverRect.left &&
          event.clientX <= hoverRect.right &&
          event.clientY >= hoverRect.top &&
          event.clientY <= hoverRect.bottom;

        setIsHovering(isOverTracked || isOverHover);
      }
    };

    document.addEventListener("mousemove", handleMouseMove);

    return () => {
      document.removeEventListener("mousemove", handleMouseMove);
    };
  }, []);

  return { isHovering, trackedElementRef, hoverElementRef };
}


================================================
FILE: web/src/app/app/message/messageComponents/AgentMessage.tsx
================================================
"use client";

import React, {
  useRef,
  RefObject,
  useMemo,
  useEffect,
  useLayoutEffect,
} from "react";
import { Packet, StopReason } from "@/app/app/services/streamingModels";
import CustomToolAuthCard from "@/app/app/message/messageComponents/CustomToolAuthCard";
import { FullChatState } from "@/app/app/message/messageComponents/interfaces";
import { FeedbackType } from "@/app/app/interfaces";
import { handleCopy } from "@/app/app/message/copyingUtils";
import { useAuthErrors } from "@/app/app/message/messageComponents/hooks/useAuthErrors";
import { useMessageSwitching } from "@/app/app/message/messageComponents/hooks/useMessageSwitching";
import { RendererComponent } from "@/app/app/message/messageComponents/renderMessageComponent";
import { usePacketProcessor } from "@/app/app/message/messageComponents/timeline/hooks/usePacketProcessor";
import { usePacedTurnGroups } from "@/app/app/message/messageComponents/timeline/hooks/usePacedTurnGroups";
import MessageToolbar from "@/app/app/message/messageComponents/MessageToolbar";
import { LlmDescriptor, LlmManager } from "@/lib/hooks";
import { Message } from "@/app/app/interfaces";
import Text from "@/refresh-components/texts/Text";
import { AgentTimeline } from "@/app/app/message/messageComponents/timeline/AgentTimeline";
import { useVoiceMode } from "@/providers/VoiceModeProvider";
import { getTextContent } from "@/app/app/services/packetUtils";
import { removeThinkingTokens } from "@/app/app/services/thinkingTokens";

// Type for the regeneration factory function passed from ChatUI
export type RegenerationFactory = (regenerationRequest: {
  messageId: number;
  parentMessage: Message;
  forceSearch?: boolean;
}) => (modelOverride: LlmDescriptor) => Promise<void>;

export interface AgentMessageProps {
  rawPackets: Packet[];
  packetCount?: number; // Tracked separately for React memo comparison (avoids reading from mutated array)
  chatState: FullChatState;
  nodeId: number;
  messageId?: number;
  currentFeedback?: FeedbackType | null;
  llmManager: LlmManager | null;
  otherMessagesCanSwitchTo?: number[];
  onMessageSelection?: (nodeId: number) => void;
  // Stable regeneration callback - takes (parentMessage) and returns a function that takes (modelOverride)
  onRegenerate?: RegenerationFactory;
  // Parent message needed to construct regeneration request
  parentMessage?: Message | null;
  // Duration in seconds for processing this message (agent messages only)
  processingDurationSeconds?: number;
}

// TODO: Consider more robust comparisons:
// - `chatState.docs`, `chatState.citations`, and `otherMessagesCanSwitchTo` use
//   reference equality. Shallow array/object comparison would be more robust if
//   these are recreated with the same values.
function arePropsEqual(
  prev: AgentMessageProps,
  next: AgentMessageProps
): boolean {
  return (
    prev.nodeId === next.nodeId &&
    prev.messageId === next.messageId &&
    prev.currentFeedback === next.currentFeedback &&
    // Compare packetCount (primitive) instead of rawPackets.length
    // The array is mutated in place, so reading .length from prev and next would return same value
    prev.packetCount === next.packetCount &&
    prev.chatState.agent?.id === next.chatState.agent?.id &&
    prev.chatState.docs === next.chatState.docs &&
    prev.chatState.citations === next.chatState.citations &&
    prev.chatState.overriddenModel === next.chatState.overriddenModel &&
    prev.chatState.researchType === next.chatState.researchType &&
    prev.otherMessagesCanSwitchTo === next.otherMessagesCanSwitchTo &&
    prev.onRegenerate === next.onRegenerate &&
    prev.parentMessage?.messageId === next.parentMessage?.messageId &&
    prev.llmManager?.isLoadingProviders ===
      next.llmManager?.isLoadingProviders &&
    prev.processingDurationSeconds === next.processingDurationSeconds
    // Skip: chatState.regenerate, chatState.setPresentingDocument,
    //       most of llmManager, onMessageSelection (function/object props)
  );
}

const AgentMessage = React.memo(function AgentMessage({
  rawPackets,
  packetCount,
  chatState,
  nodeId,
  messageId,
  currentFeedback,
  llmManager,
  otherMessagesCanSwitchTo,
  onMessageSelection,
  onRegenerate,
  parentMessage,
  processingDurationSeconds,
}: AgentMessageProps) {
  const markdownRef = useRef<HTMLDivElement>(null);
  const finalAnswerRef = useRef<HTMLDivElement>(null);

  // Process streaming packets: returns data and callbacks
  // Hook handles all state internally, exposes clean API
  const {
    citations,
    citationMap,
    documentMap,
    toolGroups,
    toolTurnGroups,
    displayGroups,
    hasSteps,
    stopPacketSeen,
    stopReason,
    isGeneratingImage,
    generatedImageCount,
    isComplete,
    onRenderComplete,
    finalAnswerComing,
    toolProcessingDuration,
  } = usePacketProcessor(rawPackets, nodeId);

  // Apply pacing delays between different tool types for smoother visual transitions
  const { pacedTurnGroups, pacedDisplayGroups, pacedFinalAnswerComing } =
    usePacedTurnGroups(
      toolTurnGroups,
      displayGroups,
      stopPacketSeen,
      nodeId,
      finalAnswerComing
    );

  // Memoize merged citations separately to avoid creating new object when neither source changed
  const mergedCitations = useMemo(
    () => ({
      ...chatState.citations,
      ...citationMap,
    }),
    [chatState.citations, citationMap]
  );

  // Create a chatState that uses streaming citations for immediate rendering
  // This merges the prop citations with streaming citations, preferring streaming ones
  // Memoized with granular dependencies to prevent cascading re-renders
  // Note: chatState object is recreated upstream on every render, so we depend on
  // individual fields instead of the whole object for proper memoization
  const effectiveChatState = useMemo<FullChatState>(
    () => ({
      ...chatState,
      citations: mergedCitations,
    }),
    [
      chatState.agent,
      chatState.docs,
      chatState.setPresentingDocument,
      chatState.overriddenModel,
      chatState.researchType,
      mergedCitations,
    ]
  );

  const authErrors = useAuthErrors(rawPackets);

  // Message switching logic
  const {
    currentMessageInd,
    includeMessageSwitcher,
    getPreviousMessage,
    getNextMessage,
  } = useMessageSwitching({
    nodeId,
    otherMessagesCanSwitchTo,
    onMessageSelection,
  });

  // Streaming TTS integration
  const { streamTTS, resetTTS, stopTTS } = useVoiceMode();
  const ttsCompletedRef = useRef(false);
  const hasStreamedIncompleteRef = useRef(false);
  const hasObservedPacketGrowthRef = useRef(false);
  const lastSeenPacketCountRef = useRef(packetCount ?? rawPackets.length);
  const streamTTSRef = useRef(streamTTS);

  // Keep streamTTS ref in sync without triggering effect re-runs
  useEffect(() => {
    streamTTSRef.current = streamTTS;
  }, [streamTTS]);

  // Stream TTS as text content arrives - only for messages still streaming
  // Uses ref for streamTTS to avoid re-triggering when its identity changes
  // Note: packetCount is used instead of rawPackets because the array is mutated in place
  useLayoutEffect(() => {
    const effectivePacketCount = packetCount ?? rawPackets.length;
    if (effectivePacketCount > lastSeenPacketCountRef.current) {
      hasObservedPacketGrowthRef.current = true;
    }
    lastSeenPacketCountRef.current = effectivePacketCount;

    // Skip if we've already finished TTS for this message
    if (ttsCompletedRef.current) return;

    // If user cancelled generation, do not send more text to TTS.
    if (stopPacketSeen && stopReason === StopReason.USER_CANCELLED) {
      ttsCompletedRef.current = true;
      return;
    }

    const textContent = removeThinkingTokens(getTextContent(rawPackets));
    if (!(typeof textContent === "string" && textContent.length > 0)) return;

    // Only autoplay messages that were observed streaming in this lifecycle.
    // Prevents historical, already-complete chats from re-triggering read-aloud on mount.
    if (!isComplete) {
      if (!hasObservedPacketGrowthRef.current) {
        return;
      }
      hasStreamedIncompleteRef.current = true;
      streamTTSRef.current(textContent, false, nodeId);
      return;
    }

    if (hasStreamedIncompleteRef.current) {
      streamTTSRef.current(textContent, true, nodeId);
      ttsCompletedRef.current = true;
    }
  }, [packetCount, isComplete, rawPackets, nodeId, stopPacketSeen, stopReason]); // packetCount triggers on new packets since rawPackets is mutated in place

  // Stop TTS immediately when user cancels generation.
  useEffect(() => {
    if (stopPacketSeen && stopReason === StopReason.USER_CANCELLED) {
      stopTTS({ manual: true });
    }
  }, [stopPacketSeen, stopReason, stopTTS]);

  // Reset TTS completed flag when nodeId changes (new message)
  useEffect(() => {
    ttsCompletedRef.current = false;
    hasStreamedIncompleteRef.current = false;
    hasObservedPacketGrowthRef.current = false;
    lastSeenPacketCountRef.current = packetCount ?? rawPackets.length;
  }, [nodeId]);

  // Reset TTS when component unmounts or nodeId changes
  useEffect(() => {
    return () => {
      resetTTS();
    };
  }, [nodeId, resetTTS]);

  return (
    <div
      className="flex flex-col gap-3"
      data-testid={isComplete ? "onyx-ai-message" : undefined}
    >
      {/* Row 1: Two-column layout for tool steps */}

      <AgentTimeline
        turnGroups={pacedTurnGroups}
        chatState={effectiveChatState}
        stopPacketSeen={stopPacketSeen}
        stopReason={stopReason}
        hasDisplayContent={pacedDisplayGroups.length > 0}
        processingDurationSeconds={processingDurationSeconds}
        isGeneratingImage={isGeneratingImage}
        generatedImageCount={generatedImageCount}
        finalAnswerComing={pacedFinalAnswerComing}
        toolProcessingDuration={toolProcessingDuration}
      />

      {/* Row 2: Display content + MessageToolbar */}
      <div
        ref={markdownRef}
        className="overflow-x-visible focus:outline-none select-text cursor-text px-3"
        onCopy={(e) => {
          if (markdownRef.current) {
            handleCopy(e, markdownRef as RefObject<HTMLDivElement>);
          }
        }}
      >
        {pacedDisplayGroups.length > 0 && (
          <div ref={finalAnswerRef} className="flex flex-col gap-3">
            {authErrors.map((authError, i) => (
              <CustomToolAuthCard
                key={`auth-error-${i}`}
                toolName={authError.toolName}
                toolId={authError.toolId}
                tools={effectiveChatState.agent.tools}
                agentId={effectiveChatState.agent.id}
              />
            ))}
            {pacedDisplayGroups.map((displayGroup, index) => (
              <RendererComponent
                key={`${displayGroup.turn_index}-${displayGroup.tab_index}`}
                packets={displayGroup.packets}
                chatState={effectiveChatState}
                messageNodeId={nodeId}
                hasTimelineThinking={pacedTurnGroups.length > 0 || hasSteps}
                onComplete={() => {
                  // Only mark complete on the last display group
                  // Hook handles the finalAnswerComing check internally
                  if (index === pacedDisplayGroups.length - 1) {
                    onRenderComplete();
                  }
                }}
                animate={false}
                stopPacketSeen={stopPacketSeen}
                stopReason={stopReason}
              >
                {(results) => (
                  <>
                    {results.map((r, i) => (
                      <div key={i}>{r.content}</div>
                    ))}
                  </>
                )}
              </RendererComponent>
            ))}
          </div>
        )}
        {/* Show stopped message when user cancelled and no display content */}
        {pacedDisplayGroups.length === 0 &&
          stopReason === StopReason.USER_CANCELLED && (
            <Text as="p" secondaryBody text04>
              User has stopped generation
            </Text>
          )}
      </div>

      {/* Feedback buttons - only show when streaming and rendering complete */}
      {isComplete && (
        <MessageToolbar
          nodeId={nodeId}
          messageId={messageId}
          includeMessageSwitcher={includeMessageSwitcher}
          currentMessageInd={currentMessageInd}
          otherMessagesCanSwitchTo={otherMessagesCanSwitchTo}
          getPreviousMessage={getPreviousMessage}
          getNextMessage={getNextMessage}
          onMessageSelection={onMessageSelection}
          rawPackets={rawPackets}
          finalAnswerRef={finalAnswerRef}
          currentFeedback={currentFeedback}
          onRegenerate={onRegenerate}
          parentMessage={parentMessage}
          llmManager={llmManager}
          currentModelName={chatState.overriddenModel}
          citations={citations}
          documentMap={documentMap}
        />
      )}
    </div>
  );
}, arePropsEqual);

export default AgentMessage;


================================================
FILE: web/src/app/app/message/messageComponents/CustomToolAuthCard.tsx
================================================
"use client";

import { useMemo } from "react";
import Message from "@/refresh-components/messages/Message";
import { ToolSnapshot } from "@/lib/tools/interfaces";
import { initiateOAuthFlow } from "@/lib/oauth/api";
import { useToolOAuthStatus } from "@/lib/hooks/useToolOAuthStatus";
import { SvgArrowExchange } from "@opal/icons";

interface CustomToolAuthCardProps {
  toolName: string;
  toolId: number | null;
  tools: ToolSnapshot[];
  agentId: number;
}

function CustomToolAuthCard({
  toolName,
  toolId,
  tools,
  agentId,
}: CustomToolAuthCardProps) {
  const { getToolAuthStatus } = useToolOAuthStatus(agentId);
  const matchedTool = useMemo(() => {
    if (toolId == null) return null;
    return tools.find((t) => t.id === toolId) ?? null;
  }, [toolId, tools]);

  // Hide the card if the user already has a valid token
  const authStatus = matchedTool ? getToolAuthStatus(matchedTool) : undefined;
  if (authStatus?.hasToken && !authStatus.isTokenExpired) {
    return null;
  }

  const oauthConfigId = matchedTool?.oauth_config_id ?? null;

  // No OAuth config — nothing actionable to show
  if (!oauthConfigId) {
    return null;
  }

  const handleAuthenticate = () => {
    initiateOAuthFlow(
      oauthConfigId,
      window.location.pathname + window.location.search
    );
  };

  return (
    <Message
      static
      large
      icon
      close={false}
      text={`${toolName} not connected`}
      description={`Connect to ${toolName} to enable this tool`}
      actions="Connect"
      actionPrimary
      actionIcon={SvgArrowExchange}
      onAction={handleAuthenticate}
      className="w-full"
    />
  );
}

export default CustomToolAuthCard;


================================================
FILE: web/src/app/app/message/messageComponents/MessageToolbar.tsx
================================================
"use client";

import React, { RefObject, useState, useCallback, useMemo } from "react";
import { Packet, StreamingCitation } from "@/app/app/services/streamingModels";
import { FeedbackType } from "@/app/app/interfaces";
import { OnyxDocument } from "@/lib/search/interfaces";
import { TooltipGroup } from "@/components/tooltip/CustomTooltip";
import {
  useChatSessionStore,
  useDocumentSidebarVisible,
  useSelectedNodeForDocDisplay,
} from "@/app/app/stores/useChatSessionStore";
import { convertMarkdownTablesToTsv } from "@/app/app/message/copyingUtils";
import { getTextContent } from "@/app/app/services/packetUtils";
import { removeThinkingTokens } from "@/app/app/services/thinkingTokens";
import MessageSwitcher from "@/app/app/message/MessageSwitcher";
import SourceTag from "@/refresh-components/buttons/source-tag/SourceTag";
import { citationsToSourceInfoArray } from "@/refresh-components/buttons/source-tag/sourceTagUtils";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import LLMPopover from "@/refresh-components/popovers/LLMPopover";
import { parseLlmDescriptor } from "@/lib/llmConfig/utils";
import { LlmManager } from "@/lib/hooks";
import { Message } from "@/app/app/interfaces";
import { SvgThumbsDown, SvgThumbsUp } from "@opal/icons";
import { RegenerationFactory } from "./AgentMessage";
import useFeedbackController from "@/hooks/useFeedbackController";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import FeedbackModal, {
  FeedbackModalProps,
} from "@/sections/modals/FeedbackModal";
import { Button, SelectButton } from "@opal/components";
import TTSButton from "./TTSButton";
import { useVoiceMode } from "@/providers/VoiceModeProvider";
import { useVoiceStatus } from "@/hooks/useVoiceStatus";

// Wrapper component for SourceTag in toolbar to handle memoization
const SourcesTagWrapper = React.memo(function SourcesTagWrapper({
  citations,
  documentMap,
  nodeId,
  selectedMessageForDocDisplay,
  documentSidebarVisible,
  updateCurrentDocumentSidebarVisible,
  updateCurrentSelectedNodeForDocDisplay,
}: {
  citations: StreamingCitation[];
  documentMap: Map<string, OnyxDocument>;
  nodeId: number;
  selectedMessageForDocDisplay: number | null;
  documentSidebarVisible: boolean;
  updateCurrentDocumentSidebarVisible: (visible: boolean) => void;
  updateCurrentSelectedNodeForDocDisplay: (nodeId: number | null) => void;
}) {
  // Convert citations to SourceInfo array
  const sources = useMemo(
    () => citationsToSourceInfoArray(citations, documentMap),
    [citations, documentMap]
  );

  // Handle click to toggle sidebar
  const handleSourceClick = useCallback(() => {
    if (selectedMessageForDocDisplay === nodeId && documentSidebarVisible) {
      updateCurrentDocumentSidebarVisible(false);
      updateCurrentSelectedNodeForDocDisplay(null);
    } else {
      updateCurrentSelectedNodeForDocDisplay(nodeId);
      updateCurrentDocumentSidebarVisible(true);
    }
  }, [
    nodeId,
    selectedMessageForDocDisplay,
    documentSidebarVisible,
    updateCurrentDocumentSidebarVisible,
    updateCurrentSelectedNodeForDocDisplay,
  ]);

  if (sources.length === 0) return null;

  return (
    <SourceTag
      variant="button"
      displayName="Sources"
      sources={sources}
      onSourceClick={handleSourceClick}
      toggleSource
    />
  );
});

export interface MessageToolbarProps {
  // Message identification
  nodeId: number;
  messageId?: number;

  // Message switching
  includeMessageSwitcher: boolean;
  currentMessageInd: number | null | undefined;
  otherMessagesCanSwitchTo?: number[];
  getPreviousMessage: () => number | undefined;
  getNextMessage: () => number | undefined;
  onMessageSelection?: (nodeId: number) => void;

  // Copy functionality
  rawPackets: Packet[];
  finalAnswerRef: RefObject<HTMLDivElement | null>;

  // Feedback
  currentFeedback?: FeedbackType | null;

  // Regeneration
  onRegenerate?: RegenerationFactory;
  parentMessage?: Message | null;
  llmManager: LlmManager | null;
  currentModelName?: string;

  // Citations
  citations: StreamingCitation[];
  documentMap: Map<string, OnyxDocument>;
}

export default function MessageToolbar({
  nodeId,
  messageId,
  includeMessageSwitcher,
  currentMessageInd,
  otherMessagesCanSwitchTo,
  getPreviousMessage,
  getNextMessage,
  onMessageSelection,
  rawPackets,
  finalAnswerRef,
  currentFeedback,
  onRegenerate,
  parentMessage,
  llmManager,
  currentModelName,
  citations,
  documentMap,
}: MessageToolbarProps) {
  // Document sidebar state - managed internally to reduce prop drilling
  const documentSidebarVisible = useDocumentSidebarVisible();
  const selectedMessageForDocDisplay = useSelectedNodeForDocDisplay();
  const updateCurrentDocumentSidebarVisible = useChatSessionStore(
    (state) => state.updateCurrentDocumentSidebarVisible
  );
  const updateCurrentSelectedNodeForDocDisplay = useChatSessionStore(
    (state) => state.updateCurrentSelectedNodeForDocDisplay
  );

  // Voice mode - hide toolbar during TTS playback for this message
  const { isTTSPlaying, activeMessageNodeId, isAwaitingAutoPlaybackStart } =
    useVoiceMode();
  const { ttsEnabled } = useVoiceStatus();
  const isTTSActiveForThisMessage =
    (isTTSPlaying || isAwaitingAutoPlaybackStart) &&
    activeMessageNodeId === nodeId;

  // Feedback modal state and handlers
  const { handleFeedbackChange } = useFeedbackController();
  const modal = useCreateModal();
  const [feedbackModalProps, setFeedbackModalProps] =
    useState<FeedbackModalProps | null>(null);

  // Helper to check if feedback button should be in transient state
  const isFeedbackTransient = useCallback(
    (feedbackType: "like" | "dislike") => {
      const hasCurrentFeedback = currentFeedback === feedbackType;
      if (!modal.isOpen) return hasCurrentFeedback;

      const isModalForThisFeedback =
        feedbackModalProps?.feedbackType === feedbackType;
      const isModalForThisMessage = feedbackModalProps?.messageId === messageId;

      return (
        hasCurrentFeedback || (isModalForThisFeedback && isModalForThisMessage)
      );
    },
    [currentFeedback, modal.isOpen, feedbackModalProps, messageId]
  );

  // Handler for feedback button clicks with toggle logic
  const handleFeedbackClick = useCallback(
    async (clickedFeedback: "like" | "dislike") => {
      if (!messageId) {
        console.error("Cannot provide feedback - message has no messageId");
        return;
      }

      // Toggle logic
      if (currentFeedback === clickedFeedback) {
        // Clicking same button - remove feedback
        await handleFeedbackChange(messageId, null);
      }

      // Clicking like (will automatically clear dislike if it was active).
      // Open modal for positive feedback.
      else if (clickedFeedback === "like") {
        setFeedbackModalProps({
          feedbackType: "like",
          messageId,
        });
        modal.toggle(true);
      }

      // Clicking dislike (will automatically clear like if it was active).
      // Always open modal for dislike.
      else {
        setFeedbackModalProps({
          feedbackType: "dislike",
          messageId,
        });
        modal.toggle(true);
      }
    },
    [messageId, currentFeedback, handleFeedbackChange, modal]
  );

  // Hide toolbar while TTS is playing for this message
  if (isTTSActiveForThisMessage) {
    return null;
  }

  return (
    <>
      <modal.Provider>
        <FeedbackModal {...feedbackModalProps!} />
      </modal.Provider>

      <div
        data-testid="AgentMessage/toolbar"
        className="flex md:flex-row justify-between items-center w-full transition-transform duration-300 ease-in-out transform opacity-100 pl-1"
      >
        <TooltipGroup>
          <div className="flex items-center">
            {includeMessageSwitcher && (
              <div className="-mx-1">
                <MessageSwitcher
                  currentPage={(currentMessageInd ?? 0) + 1}
                  totalPages={otherMessagesCanSwitchTo?.length || 0}
                  handlePrevious={() => {
                    const prevMessage = getPreviousMessage();
                    if (prevMessage !== undefined && onMessageSelection) {
                      onMessageSelection(prevMessage);
                    }
                  }}
                  handleNext={() => {
                    const nextMessage = getNextMessage();
                    if (nextMessage !== undefined && onMessageSelection) {
                      onMessageSelection(nextMessage);
                    }
                  }}
                />
              </div>
            )}

            <CopyIconButton
              getCopyText={() =>
                convertMarkdownTablesToTsv(
                  removeThinkingTokens(getTextContent(rawPackets)) as string
                )
              }
              getHtmlContent={() => finalAnswerRef.current?.innerHTML || ""}
              data-testid="AgentMessage/copy-button"
            />
            <SelectButton
              icon={SvgThumbsUp}
              onClick={() => handleFeedbackClick("like")}
              variant="select-light"
              state={isFeedbackTransient("like") ? "selected" : "empty"}
              tooltip={
                currentFeedback === "like" ? "Remove Like" : "Good Response"
              }
              data-testid="AgentMessage/like-button"
            />
            <SelectButton
              icon={SvgThumbsDown}
              onClick={() => handleFeedbackClick("dislike")}
              variant="select-light"
              state={isFeedbackTransient("dislike") ? "selected" : "empty"}
              tooltip={
                currentFeedback === "dislike"
                  ? "Remove Dislike"
                  : "Bad Response"
              }
              data-testid="AgentMessage/dislike-button"
            />
            {ttsEnabled && (
              <TTSButton
                text={
                  removeThinkingTokens(getTextContent(rawPackets)) as string
                }
              />
            )}

            {onRegenerate &&
              messageId !== undefined &&
              parentMessage &&
              llmManager && (
                <div data-testid="AgentMessage/regenerate">
                  <LLMPopover
                    llmManager={llmManager}
                    currentModelName={currentModelName}
                    onSelect={(modelName) => {
                      const llmDescriptor = parseLlmDescriptor(modelName);
                      const regenerator = onRegenerate({
                        messageId,
                        parentMessage,
                      });
                      regenerator(llmDescriptor);
                    }}
                    foldable
                  />
                </div>
              )}

            {nodeId && (citations.length > 0 || documentMap.size > 0) && (
              <SourcesTagWrapper
                citations={citations}
                documentMap={documentMap}
                nodeId={nodeId}
                selectedMessageForDocDisplay={selectedMessageForDocDisplay}
                documentSidebarVisible={documentSidebarVisible}
                updateCurrentDocumentSidebarVisible={
                  updateCurrentDocumentSidebarVisible
                }
                updateCurrentSelectedNodeForDocDisplay={
                  updateCurrentSelectedNodeForDocDisplay
                }
              />
            )}
          </div>
        </TooltipGroup>
      </div>
    </>
  );
}


================================================
FILE: web/src/app/app/message/messageComponents/TTSButton.tsx
================================================
"use client";

import { useCallback, useEffect } from "react";
import { SvgPlayCircle, SvgStop } from "@opal/icons";
import { Button } from "@opal/components";
import { useVoicePlayback } from "@/hooks/useVoicePlayback";
import { useVoiceMode } from "@/providers/VoiceModeProvider";
import { toast } from "@/hooks/useToast";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";

interface TTSButtonProps {
  text: string;
  voice?: string;
  speed?: number;
}

function TTSButton({ text, voice, speed }: TTSButtonProps) {
  const { isPlaying, isLoading, error, play, pause, stop } = useVoicePlayback();
  const { isTTSPlaying, isTTSLoading, isAwaitingAutoPlaybackStart, stopTTS } =
    useVoiceMode();

  const isGlobalTTSActive =
    isTTSPlaying || isTTSLoading || isAwaitingAutoPlaybackStart;
  const isButtonPlaying = isGlobalTTSActive || isPlaying;
  const isButtonLoading = !isGlobalTTSActive && isLoading;

  const handleClick = useCallback(async () => {
    if (isGlobalTTSActive) {
      // Stop auto-playback voice mode stream from the toolbar button.
      stopTTS({ manual: true });
      stop();
    } else if (isPlaying) {
      pause();
    } else if (isButtonLoading) {
      stop();
    } else {
      try {
        // Ensure no voice-mode stream is active before starting manual playback.
        stopTTS();
        await play(text, voice, speed);
      } catch (err) {
        console.error("TTS playback failed:", err);
        toast.error("Could not play audio");
      }
    }
  }, [
    isGlobalTTSActive,
    isPlaying,
    isButtonLoading,
    text,
    voice,
    speed,
    play,
    pause,
    stop,
    stopTTS,
  ]);

  // Surface streaming voice playback errors to the user via toast
  useEffect(() => {
    if (error) {
      console.error("Voice playback error:", error);
      toast.error(error);
    }
  }, [error]);

  const icon = isButtonLoading
    ? SimpleLoader
    : isButtonPlaying
      ? SvgStop
      : SvgPlayCircle;

  const tooltip = isButtonPlaying
    ? "Stop playback"
    : isButtonLoading
      ? "Loading..."
      : "Read aloud";

  return (
    <Button
      icon={icon}
      onClick={handleClick}
      prominence="tertiary"
      tooltip={tooltip}
      data-testid="AgentMessage/tts-button"
    />
  );
}

export default TTSButton;


================================================
FILE: web/src/app/app/message/messageComponents/constants.ts
================================================
export const STANDARD_TEXT_COLOR = "text-text-700";


================================================
FILE: web/src/app/app/message/messageComponents/hooks/useAuthErrors.ts
================================================
import { useRef } from "react";
import {
  CustomToolDelta,
  Packet,
  PacketType,
} from "@/app/app/services/streamingModels";

interface AuthError {
  toolName: string;
  toolId: number | null;
}

export function useAuthErrors(rawPackets: Packet[]): AuthError[] {
  const stateRef = useRef<{ processedCount: number; errors: AuthError[] }>({
    processedCount: 0,
    errors: [],
  });

  // Reset if packets shrunk (e.g. new message)
  if (rawPackets.length < stateRef.current.processedCount) {
    stateRef.current = { processedCount: 0, errors: [] };
  }

  // Process only new packets (incremental, like usePacketProcessor)
  if (rawPackets.length > stateRef.current.processedCount) {
    let newErrors = stateRef.current.errors;
    for (let i = stateRef.current.processedCount; i < rawPackets.length; i++) {
      const packet = rawPackets[i]!;
      if (packet.obj.type === PacketType.CUSTOM_TOOL_DELTA) {
        const delta = packet.obj as CustomToolDelta;
        if (delta.error?.is_auth_error) {
          const alreadyPresent = newErrors.some(
            (e) =>
              (delta.tool_id != null && e.toolId === delta.tool_id) ||
              (delta.tool_id == null && e.toolName === delta.tool_name)
          );
          if (!alreadyPresent) {
            newErrors = [
              ...newErrors,
              { toolName: delta.tool_name, toolId: delta.tool_id ?? null },
            ];
          }
        }
      }
    }
    stateRef.current = {
      processedCount: rawPackets.length,
      errors: newErrors,
    };
  }

  return stateRef.current.errors;
}


================================================
FILE: web/src/app/app/message/messageComponents/hooks/useMessageSwitching.ts
================================================
interface UseMessageSwitchingProps {
  nodeId: number;
  otherMessagesCanSwitchTo?: number[];
  onMessageSelection?: (messageId: number) => void;
}

interface UseMessageSwitchingReturn {
  currentMessageInd: number | undefined;
  includeMessageSwitcher: boolean;
  getPreviousMessage: () => number | undefined;
  getNextMessage: () => number | undefined;
}

export function useMessageSwitching({
  nodeId,
  otherMessagesCanSwitchTo,
  onMessageSelection,
}: UseMessageSwitchingProps): UseMessageSwitchingReturn {
  // Calculate message switching state
  const indexInSiblings = nodeId
    ? otherMessagesCanSwitchTo?.indexOf(nodeId)
    : undefined;
  // indexOf returns -1 if not found, treat that as undefined
  const currentMessageInd =
    indexInSiblings !== undefined && indexInSiblings !== -1
      ? indexInSiblings
      : undefined;

  const includeMessageSwitcher =
    currentMessageInd !== undefined &&
    onMessageSelection !== undefined &&
    otherMessagesCanSwitchTo !== undefined &&
    otherMessagesCanSwitchTo.length > 1;

  const getPreviousMessage = () => {
    if (
      currentMessageInd !== undefined &&
      currentMessageInd > 0 &&
      otherMessagesCanSwitchTo
    ) {
      return otherMessagesCanSwitchTo[currentMessageInd - 1];
    }
    return undefined;
  };

  const getNextMessage = () => {
    if (
      currentMessageInd !== undefined &&
      currentMessageInd < (otherMessagesCanSwitchTo?.length || 0) - 1 &&
      otherMessagesCanSwitchTo
    ) {
      return otherMessagesCanSwitchTo[currentMessageInd + 1];
    }
    return undefined;
  };

  return {
    currentMessageInd,
    includeMessageSwitcher,
    getPreviousMessage,
    getNextMessage,
  };
}


================================================
FILE: web/src/app/app/message/messageComponents/hooks/usePacketAnimationAndCollapse.ts
================================================
import { useEffect, useState } from "react";
import { Packet } from "@/app/app/services/streamingModels";

// Control the rate of packet streaming (packets per second)
const PACKET_DELAY_MS = 10;

interface UsePacketAnimationAndCollapseOptions {
  /** Array of packets to animate */
  packets: Packet[];
  /** Whether animation is enabled */
  animate: boolean;
  /** Whether the content is complete (has SECTION_END) */
  isComplete: boolean;
  /** Callback to invoke when animation and completion are done */
  onComplete: () => void;
  /** Optional: prevent double-calling onComplete (for renderers that need it) */
  preventDoubleComplete?: boolean;
}

interface UsePacketAnimationAndCollapseReturn {
  /** Number of packets currently displayed (or -1 if showing all) */
  displayedPacketCount: number;
  /** Whether the content is expanded */
  isExpanded: boolean;
  /** Function to toggle expansion state */
  toggleExpanded: () => void;
}

/**
 * Hook that handles packet animation and auto-collapse behavior.
 *
 * Features:
 * - Gradually displays packets with configurable delay
 * - Auto-collapses when content is complete
 * - Calls onComplete when animation finishes and content is complete
 * - Manages expansion state for collapsible content
 */
export function usePacketAnimationAndCollapse({
  packets,
  animate,
  isComplete,
  onComplete,
  preventDoubleComplete = false,
}: UsePacketAnimationAndCollapseOptions): UsePacketAnimationAndCollapseReturn {
  // If we're animating, start with 1 packet, otherwise show all
  const initialPacketCount = animate ? (packets.length > 0 ? 1 : 0) : -1;

  const [displayedPacketCount, setDisplayedPacketCount] =
    useState(initialPacketCount);
  const [isExpanded, setIsExpanded] = useState(true);
  const [hasAutoCollapsed, setHasAutoCollapsed] = useState(false);
  const [hasCalledComplete, setHasCalledComplete] = useState(false);

  // Auto-collapse when content is complete
  useEffect(() => {
    if (isComplete && !hasAutoCollapsed) {
      setIsExpanded(false);
      setHasAutoCollapsed(true);
    }
  }, [isComplete, hasAutoCollapsed]);

  // Animation effect - gradually increase displayed packets
  useEffect(() => {
    if (!animate) {
      setDisplayedPacketCount(-1);
      return;
    }

    if (displayedPacketCount >= 0 && displayedPacketCount < packets.length) {
      const timer = setTimeout(() => {
        setDisplayedPacketCount((prev) => Math.min(prev + 1, packets.length));
      }, PACKET_DELAY_MS);

      return () => clearTimeout(timer);
    }
  }, [animate, displayedPacketCount, packets.length]);

  // Reset displayed count when packet array changes significantly
  useEffect(() => {
    if (animate && packets.length < displayedPacketCount) {
      setDisplayedPacketCount(packets.length > 0 ? 1 : 0);
    }
  }, [animate, packets.length, displayedPacketCount]);

  // Call onComplete when done (animation finished and content complete)
  useEffect(() => {
    if (isComplete) {
      // If animation is still in progress, wait for it to finish
      if (
        animate &&
        displayedPacketCount >= 0 &&
        displayedPacketCount < packets.length
      ) {
        return;
      }

      // Prevent double-calling if requested
      if (preventDoubleComplete && hasCalledComplete) {
        return;
      }

      if (preventDoubleComplete) {
        setHasCalledComplete(true);
      }
      onComplete();
    }
  }, [
    isComplete,
    onComplete,
    animate,
    displayedPacketCount,
    packets.length,
    preventDoubleComplete,
    hasCalledComplete,
  ]);

  const toggleExpanded = () => {
    setIsExpanded((prev) => !prev);
  };

  return {
    displayedPacketCount,
    isExpanded,
    toggleExpanded,
  };
}


================================================
FILE: web/src/app/app/message/messageComponents/interfaces.ts
================================================
import { JSX } from "react";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { Packet, StopReason } from "../../services/streamingModels";
import { OnyxDocument, MinimalOnyxDocument } from "@/lib/search/interfaces";
import { ProjectFile } from "../../projects/projectsService";
import { LlmDescriptor } from "@/lib/hooks";
import { IconType } from "react-icons";
import { OnyxIconType } from "@/components/icons/icons";
import { CitationMap } from "../../interfaces";
import { TimelineSurfaceBackground } from "@/app/app/message/messageComponents/timeline/primitives/TimelineSurface";

export enum RenderType {
  HIGHLIGHT = "highlight",
  FULL = "full",
  COMPACT = "compact",
  INLINE = "inline",
}

/**
 * Controls whether a renderer expects to be wrapped by timeline UI.
 * - timeline: parent should render StepContainer around the result.
 * - content: renderer already contains its own layout (headers/containers).
 */
export type TimelineLayout = "timeline" | "content";

export interface FullChatState {
  agent: MinimalPersonaSnapshot;
  // Document-related context for citations
  docs?: OnyxDocument[] | null;
  userFiles?: ProjectFile[];
  citations?: CitationMap;
  setPresentingDocument?: (document: MinimalOnyxDocument) => void;
  // Regenerate functionality
  regenerate?: (modelOverRide: LlmDescriptor) => Promise<void>;
  overriddenModel?: string;
  researchType?: string | null;
}

export interface RendererResult {
  icon: IconType | OnyxIconType | null;
  status: string | JSX.Element | null;
  content: JSX.Element;

  // can be used to override the look on the "expanded" view
  // used for things that should just show text w/o an icon or header
  // e.g. ReasoningRenderer
  expandedText?: JSX.Element;

  // Whether this renderer supports collapsible mode (collapse button shown only when true)
  supportsCollapsible?: boolean;
  /** Whether the step should remain collapsible even in single-step timelines */
  alwaysCollapsible?: boolean;
  /** Whether the result should be wrapped by timeline UI or rendered as-is */
  timelineLayout?: TimelineLayout;
  /** Remove right padding for long-form content (reasoning, deep research, memory). */
  noPaddingRight?: boolean;
  /** Override the surface background (e.g. "error" for auth failures). */
  surfaceBackground?: TimelineSurfaceBackground;
}

// All renderers return an array of results (even single-step renderers return a 1-element array)
export type RendererOutput = RendererResult[];

export type MessageRenderer<
  T extends Packet,
  S extends Partial<FullChatState>,
> = React.ComponentType<{
  packets: T[];
  state: S;
  /** Node id for the message currently being rendered */
  messageNodeId?: number;
  /** True when timeline/thinking UI is already shown above this text block */
  hasTimelineThinking?: boolean;
  onComplete: () => void;
  renderType: RenderType;
  animate: boolean;
  stopPacketSeen: boolean;
  stopReason?: StopReason;
  /** Whether this is the last step in the timeline (for connector line decisions) */
  isLastStep?: boolean;
  /** Hover state from parent */
  isHover?: boolean;
  children: (result: RendererOutput) => JSX.Element;
}>;


================================================
FILE: web/src/app/app/message/messageComponents/markdownUtils.tsx
================================================
import React, { useCallback, useMemo, JSX } from "react";
import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";
import remarkMath from "remark-math";
import rehypeHighlight from "rehype-highlight";
import rehypeKatex from "rehype-katex";
import "katex/dist/katex.min.css";
import "@/app/app/message/custom-code-styles.css";
import { FullChatState } from "@/app/app/message/messageComponents/interfaces";
import {
  MemoizedAnchor,
  MemoizedParagraph,
} from "@/app/app/message/MemoizedTextComponents";
import { extractCodeText, preprocessLaTeX } from "@/app/app/message/codeUtils";
import { CodeBlock } from "@/app/app/message/CodeBlock";
import { transformLinkUri, cn } from "@/lib/utils";
import { InMessageImage } from "@/app/app/components/files/images/InMessageImage";
import { extractChatImageFileId } from "@/app/app/components/files/images/utils";

/**
 * Processes content for markdown rendering by handling code blocks and LaTeX
 */
export const processContent = (content: string): string => {
  const codeBlockRegex = /```(\w*)\n[\s\S]*?```|```[\s\S]*?$/g;
  const matches = content.match(codeBlockRegex);

  if (matches) {
    content = matches.reduce((acc, match) => {
      if (!match.match(/```\w+/)) {
        return acc.replace(match, match.replace("```", "```plaintext"));
      }
      return acc;
    }, content);

    const lastMatch = matches[matches.length - 1];
    if (lastMatch && !lastMatch.endsWith("```")) {
      return preprocessLaTeX(content);
    }
  }

  const processed = preprocessLaTeX(content);
  return processed;
};

/**
 * Hook that provides markdown component callbacks for consistent rendering
 */
export const useMarkdownComponents = (
  state: FullChatState | undefined,
  processedContent: string,
  className?: string
) => {
  const paragraphCallback = useCallback(
    (props: any) => (
      <MemoizedParagraph className={className}>
        {props.children}
      </MemoizedParagraph>
    ),
    [className]
  );

  const anchorCallback = useCallback(
    (props: any) => {
      const imageFileId = extractChatImageFileId(
        props.href,
        String(props.children ?? "")
      );
      if (imageFileId) {
        return (
          <InMessageImage
            fileId={imageFileId}
            fileName={String(props.children ?? "")}
          />
        );
      }
      return (
        <MemoizedAnchor
          updatePresentingDocument={state?.setPresentingDocument || (() => {})}
          docs={state?.docs || []}
          userFiles={state?.userFiles || []}
          citations={state?.citations}
          href={props.href}
        >
          {props.children}
        </MemoizedAnchor>
      );
    },
    [
      state?.docs,
      state?.userFiles,
      state?.citations,
      state?.setPresentingDocument,
    ]
  );

  const markdownComponents = useMemo(
    () => ({
      a: anchorCallback,
      p: paragraphCallback,
      pre: ({ node, className, children }: any) => {
        // Don't render the pre wrapper - CodeBlock handles its own wrapper
        return <>{children}</>;
      },
      b: ({ node, className, children }: any) => {
        return <span className={className}>{children}</span>;
      },
      ul: ({ node, className, children, ...props }: any) => {
        return (
          <ul className={className} {...props}>
            {children}
          </ul>
        );
      },
      ol: ({ node, className, children, ...props }: any) => {
        return (
          <ol className={className} {...props}>
            {children}
          </ol>
        );
      },
      li: ({ node, className, children, ...props }: any) => {
        return (
          <li className={className} {...props}>
            {children}
          </li>
        );
      },
      table: ({ node, className, children, ...props }: any) => {
        return (
          <div className="markdown-table-breakout">
            <table className={cn(className, "min-w-full")} {...props}>
              {children}
            </table>
          </div>
        );
      },
      code: ({ node, className, children }: any) => {
        const codeText = extractCodeText(node, processedContent, children);

        return (
          <CodeBlock className={className} codeText={codeText}>
            {children}
          </CodeBlock>
        );
      },
    }),
    [anchorCallback, paragraphCallback, processedContent]
  );

  return markdownComponents;
};

/**
 * Renders markdown content with consistent configuration
 */
export const renderMarkdown = (
  content: string,
  markdownComponents: any,
  textSize: string = "text-base"
): JSX.Element => {
  return (
    <div dir="auto">
      <ReactMarkdown
        className={`prose dark:prose-invert font-main-content-body max-w-full ${textSize}`}
        components={markdownComponents}
        remarkPlugins={[
          remarkGfm,
          [remarkMath, { singleDollarTextMath: true }],
        ]}
        rehypePlugins={[rehypeHighlight, rehypeKatex]}
        urlTransform={transformLinkUri}
      >
        {content}
      </ReactMarkdown>
    </div>
  );
};

/**
 * Complete markdown processing and rendering utility
 */
export const useMarkdownRenderer = (
  content: string,
  state: FullChatState | undefined,
  textSize: string
) => {
  const processedContent = useMemo(() => processContent(content), [content]);
  const markdownComponents = useMarkdownComponents(
    state,
    processedContent,
    textSize
  );

  const renderedContent = useMemo(
    () => renderMarkdown(processedContent, markdownComponents, textSize),
    [processedContent, markdownComponents, textSize]
  );

  return {
    processedContent,
    markdownComponents,
    renderedContent,
  };
};


================================================
FILE: web/src/app/app/message/messageComponents/renderMessageComponent.tsx
================================================
import React, { JSX, memo } from "react";
import {
  ChatPacket,
  CODE_INTERPRETER_TOOL_TYPES,
  ImageGenerationToolPacket,
  Packet,
  PacketType,
  ReasoningPacket,
  SearchToolStart,
  StopReason,
  ToolCallArgumentDelta,
} from "../../services/streamingModels";
import {
  FullChatState,
  MessageRenderer,
  RenderType,
  RendererResult,
  RendererOutput,
} from "./interfaces";
import { MessageTextRenderer } from "./renderers/MessageTextRenderer";
import { ImageToolRenderer } from "./renderers/ImageToolRenderer";
import { PythonToolRenderer } from "./timeline/renderers/code/PythonToolRenderer";
import { ReasoningRenderer } from "./timeline/renderers/reasoning/ReasoningRenderer";
import CustomToolRenderer from "./renderers/CustomToolRenderer";
import { FileReaderToolRenderer } from "./timeline/renderers/filereader/FileReaderToolRenderer";
import { FetchToolRenderer } from "./timeline/renderers/fetch/FetchToolRenderer";
import { MemoryToolRenderer } from "./timeline/renderers/memory/MemoryToolRenderer";
import { DeepResearchPlanRenderer } from "./timeline/renderers/deepresearch/DeepResearchPlanRenderer";
import { ResearchAgentRenderer } from "./timeline/renderers/deepresearch/ResearchAgentRenderer";
import { WebSearchToolRenderer } from "./timeline/renderers/search/WebSearchToolRenderer";
import { InternalSearchToolRenderer } from "./timeline/renderers/search/InternalSearchToolRenderer";

// Different types of chat packets using discriminated unions
interface GroupedPackets {
  packets: Packet[];
}

function isChatPacket(packet: Packet): packet is ChatPacket {
  return (
    packet.obj.type === PacketType.MESSAGE_START ||
    packet.obj.type === PacketType.MESSAGE_DELTA ||
    packet.obj.type === PacketType.MESSAGE_END
  );
}

function isWebSearchPacket(packet: Packet): boolean {
  if (packet.obj.type !== PacketType.SEARCH_TOOL_START) return false;
  return (packet.obj as SearchToolStart).is_internet_search === true;
}

function isInternalSearchPacket(packet: Packet): boolean {
  if (packet.obj.type !== PacketType.SEARCH_TOOL_START) return false;
  return (packet.obj as SearchToolStart).is_internet_search !== true;
}

function isImageToolPacket(packet: Packet) {
  return packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START;
}

function isPythonToolPacket(packet: Packet) {
  return (
    packet.obj.type === PacketType.PYTHON_TOOL_START ||
    (packet.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&
      (packet.obj as ToolCallArgumentDelta).tool_type ===
        CODE_INTERPRETER_TOOL_TYPES.PYTHON)
  );
}

function isCustomToolPacket(packet: Packet) {
  return packet.obj.type === PacketType.CUSTOM_TOOL_START;
}

function isFileReaderToolPacket(packet: Packet) {
  return packet.obj.type === PacketType.FILE_READER_START;
}

function isFetchToolPacket(packet: Packet) {
  return packet.obj.type === PacketType.FETCH_TOOL_START;
}

function isMemoryToolPacket(packet: Packet) {
  return (
    packet.obj.type === PacketType.MEMORY_TOOL_START ||
    packet.obj.type === PacketType.MEMORY_TOOL_NO_ACCESS
  );
}

function isReasoningPacket(packet: Packet): packet is ReasoningPacket {
  return (
    packet.obj.type === PacketType.REASONING_START ||
    packet.obj.type === PacketType.REASONING_DELTA ||
    packet.obj.type === PacketType.SECTION_END ||
    packet.obj.type === PacketType.ERROR
  );
}

function isDeepResearchPlanPacket(packet: Packet) {
  return (
    packet.obj.type === PacketType.DEEP_RESEARCH_PLAN_START ||
    packet.obj.type === PacketType.DEEP_RESEARCH_PLAN_DELTA
  );
}

function isResearchAgentPacket(packet: Packet) {
  // Check for any packet type that indicates a research agent group
  return (
    packet.obj.type === PacketType.RESEARCH_AGENT_START ||
    packet.obj.type === PacketType.INTERMEDIATE_REPORT_START ||
    packet.obj.type === PacketType.INTERMEDIATE_REPORT_DELTA ||
    packet.obj.type === PacketType.INTERMEDIATE_REPORT_CITED_DOCS
  );
}

export function findRenderer(
  groupedPackets: GroupedPackets
): MessageRenderer<any, any> | null {
  // Check for chat messages first
  if (groupedPackets.packets.some((packet) => isChatPacket(packet))) {
    return MessageTextRenderer;
  }

  // Check for deep research packets EARLY - these have priority over other tools
  // because deep research groups may contain multiple packet types (plan + reasoning + fetch)
  if (
    groupedPackets.packets.some((packet) => isDeepResearchPlanPacket(packet))
  ) {
    return DeepResearchPlanRenderer;
  }
  if (groupedPackets.packets.some((packet) => isResearchAgentPacket(packet))) {
    return ResearchAgentRenderer;
  }

  // Standard tool checks
  if (groupedPackets.packets.some((packet) => isWebSearchPacket(packet))) {
    return WebSearchToolRenderer;
  }
  if (groupedPackets.packets.some((packet) => isInternalSearchPacket(packet))) {
    return InternalSearchToolRenderer;
  }
  if (groupedPackets.packets.some((packet) => isImageToolPacket(packet))) {
    return ImageToolRenderer;
  }
  if (groupedPackets.packets.some((packet) => isPythonToolPacket(packet))) {
    return PythonToolRenderer;
  }
  if (groupedPackets.packets.some((packet) => isFileReaderToolPacket(packet))) {
    return FileReaderToolRenderer;
  }
  if (groupedPackets.packets.some((packet) => isCustomToolPacket(packet))) {
    return CustomToolRenderer;
  }
  if (groupedPackets.packets.some((packet) => isFetchToolPacket(packet))) {
    return FetchToolRenderer;
  }
  if (groupedPackets.packets.some((packet) => isMemoryToolPacket(packet))) {
    return MemoryToolRenderer;
  }
  if (groupedPackets.packets.some((packet) => isReasoningPacket(packet))) {
    return ReasoningRenderer;
  }
  return null;
}

// Handles display groups containing both chat text and image generation packets
function MixedContentHandler({
  chatPackets,
  imagePackets,
  chatState,
  messageNodeId,
  hasTimelineThinking,
  onComplete,
  animate,
  stopPacketSeen,
  stopReason,
  children,
}: {
  chatPackets: Packet[];
  imagePackets: Packet[];
  chatState: FullChatState;
  messageNodeId?: number;
  hasTimelineThinking?: boolean;
  onComplete: () => void;
  animate: boolean;
  stopPacketSeen: boolean;
  stopReason?: StopReason;
  children: (result: RendererOutput) => JSX.Element;
}) {
  return (
    <MessageTextRenderer
      packets={chatPackets as ChatPacket[]}
      state={chatState}
      messageNodeId={messageNodeId}
      hasTimelineThinking={hasTimelineThinking}
      onComplete={() => {}}
      animate={animate}
      renderType={RenderType.FULL}
      stopPacketSeen={stopPacketSeen}
      stopReason={stopReason}
    >
      {(textResults) => (
        <ImageToolRenderer
          packets={imagePackets as ImageGenerationToolPacket[]}
          state={chatState}
          onComplete={onComplete}
          animate={animate}
          renderType={RenderType.FULL}
          stopPacketSeen={stopPacketSeen}
          stopReason={stopReason}
        >
          {(imageResults) => children([...textResults, ...imageResults])}
        </ImageToolRenderer>
      )}
    </MessageTextRenderer>
  );
}

// Props interface for RendererComponent
interface RendererComponentProps {
  packets: Packet[];
  chatState: FullChatState;
  messageNodeId?: number;
  hasTimelineThinking?: boolean;
  onComplete: () => void;
  animate: boolean;
  stopPacketSeen: boolean;
  stopReason?: StopReason;
  children: (result: RendererOutput) => JSX.Element;
}

// Custom comparison to prevent unnecessary re-renders
function areRendererPropsEqual(
  prev: RendererComponentProps,
  next: RendererComponentProps
): boolean {
  return (
    prev.packets === next.packets &&
    prev.stopPacketSeen === next.stopPacketSeen &&
    prev.stopReason === next.stopReason &&
    prev.animate === next.animate &&
    prev.chatState.agent?.id === next.chatState.agent?.id &&
    prev.messageNodeId === next.messageNodeId
    // Skip: onComplete, children (function refs), chatState (memoized upstream)
  );
}

// React component wrapper that directly uses renderer components
export const RendererComponent = memo(function RendererComponent({
  packets,
  chatState,
  messageNodeId,
  hasTimelineThinking,
  onComplete,
  animate,
  stopPacketSeen,
  stopReason,
  children,
}: RendererComponentProps) {
  // Detect mixed display groups (both chat text and image generation)
  const hasChatPackets = packets.some((p) => isChatPacket(p));
  const hasImagePackets = packets.some((p) => isImageToolPacket(p));

  if (hasChatPackets && hasImagePackets) {
    const sharedTypes = new Set<string>([
      PacketType.SECTION_END,
      PacketType.ERROR,
    ]);

    const chatPackets = packets.filter(
      (p) =>
        isChatPacket(p) ||
        p.obj.type === PacketType.CITATION_INFO ||
        sharedTypes.has(p.obj.type as string)
    );
    const imagePackets = packets.filter(
      (p) =>
        isImageToolPacket(p) ||
        p.obj.type === PacketType.IMAGE_GENERATION_TOOL_DELTA ||
        sharedTypes.has(p.obj.type as string)
    );

    return (
      <MixedContentHandler
        chatPackets={chatPackets}
        imagePackets={imagePackets}
        chatState={chatState}
        messageNodeId={messageNodeId}
        hasTimelineThinking={hasTimelineThinking}
        onComplete={onComplete}
        animate={animate}
        stopPacketSeen={stopPacketSeen}
        stopReason={stopReason}
      >
        {children}
      </MixedContentHandler>
    );
  }

  const RendererFn = findRenderer({ packets });

  if (!RendererFn) {
    return children([{ icon: null, status: null, content: <></> }]);
  }

  return (
    <RendererFn
      packets={packets as any}
      state={chatState}
      messageNodeId={messageNodeId}
      hasTimelineThinking={hasTimelineThinking}
      onComplete={onComplete}
      animate={animate}
      renderType={RenderType.FULL}
      stopPacketSeen={stopPacketSeen}
      stopReason={stopReason}
    >
      {(results: RendererOutput) => children(results)}
    </RendererFn>
  );
}, areRendererPropsEqual);


================================================
FILE: web/src/app/app/message/messageComponents/renderers/CustomToolRenderer.tsx
================================================
import React, { useEffect, useMemo } from "react";
import {
  PacketType,
  CustomToolPacket,
  CustomToolStart,
  CustomToolArgs,
  CustomToolDelta,
  CustomToolErrorInfo,
  SectionEnd,
} from "../../../services/streamingModels";
import { MessageRenderer, RenderType } from "../interfaces";
import { buildImgUrl } from "../../../components/files/images/utils";
import Text from "@/refresh-components/texts/Text";
import {
  SvgActions,
  SvgArrowExchange,
  SvgDownload,
  SvgExternalLink,
} from "@opal/icons";
import { CodeBlock } from "@/app/app/message/CodeBlock";
import hljs from "highlight.js/lib/core";
import json from "highlight.js/lib/languages/json";
import FadingEdgeContainer from "@/refresh-components/FadingEdgeContainer";

// Lazy registration for hljs JSON language
function ensureHljsRegistered() {
  if (!hljs.listLanguages().includes("json")) {
    hljs.registerLanguage("json", json);
  }
}

// Component to render syntax-highlighted JSON
interface HighlightedJsonCodeProps {
  code: string;
}
function HighlightedJsonCode({ code }: HighlightedJsonCodeProps) {
  const highlightedHtml = useMemo(() => {
    ensureHljsRegistered();
    try {
      return hljs.highlight(code, { language: "json" }).value;
    } catch {
      return code
        .replace(/&/g, "&amp;")
        .replace(/</g, "&lt;")
        .replace(/>/g, "&gt;");
    }
  }, [code]);

  return (
    <span
      dangerouslySetInnerHTML={{ __html: highlightedHtml }}
      className="hljs"
    />
  );
}

function constructCustomToolState(packets: CustomToolPacket[]) {
  const toolStart = packets.find(
    (p) => p.obj.type === PacketType.CUSTOM_TOOL_START
  )?.obj as CustomToolStart | null;
  const toolDeltas = packets
    .filter((p) => p.obj.type === PacketType.CUSTOM_TOOL_DELTA)
    .map((p) => p.obj as CustomToolDelta);
  const toolEnd = packets.find(
    (p) =>
      p.obj.type === PacketType.SECTION_END || p.obj.type === PacketType.ERROR
  )?.obj as SectionEnd | null;

  const toolName = toolStart?.tool_name || toolDeltas[0]?.tool_name || "Tool";
  const toolArgsPacket = packets.find(
    (p) => p.obj.type === PacketType.CUSTOM_TOOL_ARGS
  )?.obj as CustomToolArgs | null;
  const toolArgs = toolArgsPacket?.tool_args ?? null;
  const latestDelta = toolDeltas[toolDeltas.length - 1] || null;
  const responseType = latestDelta?.response_type || null;
  const data = latestDelta?.data;
  const fileIds = latestDelta?.file_ids || null;
  const error = latestDelta?.error || null;

  const isRunning = Boolean(toolStart && !toolEnd);
  const isComplete = Boolean(toolStart && toolEnd);

  return {
    toolName,
    toolArgs,
    responseType,
    data,
    fileIds,
    error,
    isRunning,
    isComplete,
  };
}

export const CustomToolRenderer: MessageRenderer<CustomToolPacket, {}> = ({
  packets,
  onComplete,
  renderType,
  children,
}) => {
  const {
    toolName,
    toolArgs,
    responseType,
    data,
    fileIds,
    error,
    isRunning,
    isComplete,
  } = constructCustomToolState(packets);

  useEffect(() => {
    if (isComplete) {
      onComplete();
    }
  }, [isComplete, onComplete]);

  const status = useMemo(() => {
    if (isComplete) {
      if (error) {
        return error.is_auth_error
          ? `${toolName} authentication failed (HTTP ${error.status_code})`
          : `${toolName} failed (HTTP ${error.status_code})`;
      }
      if (responseType === "image") return `${toolName} returned images`;
      if (responseType === "csv") return `${toolName} returned a file`;
      return `${toolName} completed`;
    }
    if (isRunning) return `${toolName} running...`;
    return null;
  }, [toolName, responseType, error, isComplete, isRunning]);

  const icon = SvgActions;

  const toolArgsJson = useMemo(
    () => (toolArgs ? JSON.stringify(toolArgs, null, 2) : null),
    [toolArgs]
  );
  const dataJson = useMemo(
    () =>
      data !== undefined && data !== null && typeof data === "object"
        ? JSON.stringify(data, null, 2)
        : null,
    [data]
  );

  const content = useMemo(
    () => (
      <div className="flex flex-col gap-3">
        {/* Loading indicator */}
        {isRunning &&
          !error &&
          !fileIds &&
          (data === undefined || data === null) && (
            <div className="flex items-center gap-2 text-sm text-text-03">
              <div className="flex gap-0.5">
                <div className="w-1 h-1 bg-current rounded-full animate-pulse"></div>
                <div
                  className="w-1 h-1 bg-current rounded-full animate-pulse"
                  style={{ animationDelay: "0.1s" }}
                ></div>
                <div
                  className="w-1 h-1 bg-current rounded-full animate-pulse"
                  style={{ animationDelay: "0.2s" }}
                ></div>
              </div>
              <Text text03 secondaryBody>
                Waiting for response...
              </Text>
            </div>
          )}

        {/* Tool arguments */}
        {toolArgsJson && (
          <div>
            <div className="flex items-center gap-1">
              <SvgArrowExchange className="w-3 h-3 text-text-02" />
              <Text text04 secondaryBody>
                Request
              </Text>
            </div>
            <div className="prose max-w-full">
              <CodeBlock
                className="font-secondary-mono"
                codeText={toolArgsJson}
                noPadding
              >
                <HighlightedJsonCode code={toolArgsJson} />
              </CodeBlock>
            </div>
          </div>
        )}

        {/* Error display */}
        {error && (
          <div className="pl-[var(--timeline-common-text-padding)]">
            <Text text03 mainUiMuted>
              {error.message}
            </Text>
          </div>
        )}

        {/* File responses */}
        {!error && fileIds && fileIds.length > 0 && (
          <div className="text-sm text-text-03 flex flex-col gap-2">
            {fileIds.map((fid, idx) => (
              <div key={fid} className="flex items-center gap-2 flex-wrap">
                <Text text03 secondaryBody className="whitespace-nowrap">
                  File {idx + 1}
                </Text>
                <a
                  href={buildImgUrl(fid)}
                  target="_blank"
                  rel="noreferrer"
                  className="inline-flex items-center gap-1 text-xs text-action-link-01 hover:underline whitespace-nowrap"
                >
                  <SvgExternalLink className="w-3 h-3" /> Open
                </a>
                <a
                  href={buildImgUrl(fid)}
                  download
                  className="inline-flex items-center gap-1 text-xs text-action-link-01 hover:underline whitespace-nowrap"
                >
                  <SvgDownload className="w-3 h-3" /> Download
                </a>
              </div>
            ))}
          </div>
        )}

        {/* JSON/Text responses */}
        {!error && data !== undefined && data !== null && (
          <div>
            <div className="flex items-center gap-1">
              <SvgArrowExchange className="w-3 h-3 text-text-02" />
              <Text text04 secondaryBody>
                Response
              </Text>
            </div>
            <div className="prose max-w-full">
              {dataJson ? (
                <CodeBlock
                  className="font-secondary-mono"
                  codeText={dataJson}
                  noPadding
                >
                  <HighlightedJsonCode code={dataJson} />
                </CodeBlock>
              ) : (
                <CodeBlock
                  className="font-secondary-mono"
                  codeText={String(data)}
                  noPadding
                >
                  {String(data)}
                </CodeBlock>
              )}
            </div>
          </div>
        )}
      </div>
    ),
    [toolArgsJson, dataJson, data, fileIds, error, isRunning]
  );

  // Auth error: always render FULL with error surface
  if (error?.is_auth_error) {
    return children([
      {
        icon,
        status,
        supportsCollapsible: false,
        noPaddingRight: true,
        surfaceBackground: "error" as const,
        content,
      },
    ]);
  }

  // FULL mode
  if (renderType === RenderType.FULL) {
    return children([
      {
        icon,
        status,
        supportsCollapsible: true,
        noPaddingRight: true,
        content,
      },
    ]);
  }

  // COMPACT mode: wrap in fading container
  return children([
    {
      icon,
      status,
      supportsCollapsible: true,
      content: (
        <FadingEdgeContainer
          direction="bottom"
          className="max-h-24 overflow-hidden"
        >
          {content}
        </FadingEdgeContainer>
      ),
    },
  ]);
};

export default CustomToolRenderer;


================================================
FILE: web/src/app/app/message/messageComponents/renderers/ImageToolRenderer.tsx
================================================
import React, { useEffect, useMemo } from "react";
import { SvgImage } from "@opal/icons";
import {
  PacketType,
  ImageGenerationToolPacket,
  ImageGenerationToolStart,
  ImageGenerationToolDelta,
  SectionEnd,
} from "../../../services/streamingModels";
import { MessageRenderer, RenderType } from "../interfaces";
import { InMessageImage } from "../../../components/files/images/InMessageImage";
import GeneratingImageDisplay from "../../../components/tools/GeneratingImageDisplay";

// Helper function to construct current image state
function constructCurrentImageState(packets: ImageGenerationToolPacket[]) {
  const imageStart = packets.find(
    (packet) => packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START
  )?.obj as ImageGenerationToolStart | null;
  const imageDeltas = packets
    .filter(
      (packet) => packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_DELTA
    )
    .map((packet) => packet.obj as ImageGenerationToolDelta);
  const imageEnd = packets.find(
    (packet) =>
      packet.obj.type === PacketType.SECTION_END ||
      packet.obj.type === PacketType.ERROR
  )?.obj as SectionEnd | null;

  const prompt = ""; // Image generation tools don't have a main description
  const images = imageDeltas.flatMap((delta) => delta?.images || []);
  const isGenerating = imageStart && !imageEnd;
  const isComplete = imageStart && imageEnd;

  return {
    prompt,
    images,
    isGenerating,
    isComplete,
    error: false, // For now, we don't have error state in the packets
  };
}

export const ImageToolRenderer: MessageRenderer<
  ImageGenerationToolPacket,
  {}
> = ({ packets, onComplete, renderType, children }) => {
  const { prompt, images, isGenerating, isComplete, error } =
    constructCurrentImageState(packets);

  useEffect(() => {
    if (isComplete) {
      onComplete();
    }
  }, [isComplete]);

  const status = useMemo(() => {
    if (isComplete) {
      return `Generated ${images.length} image${images.length > 1 ? "s" : ""}`;
    }
    if (isGenerating) {
      return "Generating image...";
    }
    return null;
  }, [isComplete, isGenerating, images.length]);

  // Render based on renderType
  if (renderType === RenderType.FULL) {
    // Full rendering with title header and content below
    // Loading state - when generating
    if (isGenerating) {
      return children([
        {
          icon: SvgImage,
          status: "Generating images...",
          supportsCollapsible: false,
          content: (
            <div className="flex flex-col">
              <div>
                <GeneratingImageDisplay isCompleted={false} />
              </div>
            </div>
          ),
        },
      ]);
    }

    // Complete state - show images
    if (isComplete) {
      return children([
        {
          icon: SvgImage,
          status: `Generated ${images.length} image${
            images.length !== 1 ? "s" : ""
          }`,
          supportsCollapsible: false,
          content: (
            <div className="flex flex-col my-1">
              {images.length > 0 ? (
                <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
                  {images.map((image, index: number) => (
                    <div
                      key={image.file_id || index}
                      className="transition-all group"
                    >
                      {image.file_id && (
                        <InMessageImage
                          fileId={image.file_id}
                          shape={image.shape}
                        />
                      )}
                    </div>
                  ))}
                </div>
              ) : (
                <div className="py-4 text-center text-gray-500 dark:text-gray-400 ml-7">
                  <SvgImage className="w-6 h-6 mx-auto mb-2 opacity-50" />
                  <p className="text-sm">No images generated</p>
                </div>
              )}
            </div>
          ),
        },
      ]);
    }

    // Fallback (shouldn't happen in normal flow)
    return children([
      {
        icon: SvgImage,
        status: status,
        supportsCollapsible: false,
        content: <div></div>,
      },
    ]);
  }

  // Highlight/Short rendering
  if (isGenerating) {
    return children([
      {
        icon: SvgImage,
        status: "Generating image...",
        supportsCollapsible: false,
        content: (
          <div className="flex items-center gap-2 text-sm text-muted-foreground">
            <div className="flex gap-0.5">
              <div className="w-1 h-1 bg-current rounded-full animate-pulse"></div>
              <div
                className="w-1 h-1 bg-current rounded-full animate-pulse"
                style={{ animationDelay: "0.1s" }}
              ></div>
              <div
                className="w-1 h-1 bg-current rounded-full animate-pulse"
                style={{ animationDelay: "0.2s" }}
              ></div>
            </div>
            <span>Generating image...</span>
          </div>
        ),
      },
    ]);
  }

  if (error) {
    return children([
      {
        icon: SvgImage,
        status: "Image generation failed",
        supportsCollapsible: false,
        content: (
          <div className="text-sm text-red-600 dark:text-red-400">
            Image generation failed
          </div>
        ),
      },
    ]);
  }

  if (isComplete && images.length > 0) {
    return children([
      {
        icon: SvgImage,
        status: `Generated ${images.length} image${
          images.length > 1 ? "s" : ""
        }`,
        supportsCollapsible: false,
        content: (
          <div className="text-sm text-muted-foreground">
            Generated {images.length} image
            {images.length > 1 ? "s" : ""}
          </div>
        ),
      },
    ]);
  }

  return children([
    {
      icon: SvgImage,
      status: "Image generation",
      supportsCollapsible: false,
      content: (
        <div className="text-sm text-muted-foreground">Image generation</div>
      ),
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/renderers/MessageTextRenderer.tsx
================================================
import React, { useEffect, useMemo, useRef, useState } from "react";
import Text from "@/refresh-components/texts/Text";

import {
  ChatPacket,
  PacketType,
  StopReason,
} from "../../../services/streamingModels";
import { MessageRenderer, FullChatState } from "../interfaces";
import { isFinalAnswerComplete } from "../../../services/packetUtils";
import { useMarkdownRenderer } from "../markdownUtils";
import { BlinkingBar } from "../../BlinkingBar";
import { useVoiceMode } from "@/providers/VoiceModeProvider";

/**
 * Maps a cleaned character position to the corresponding position in markdown text.
 * This allows progressive reveal to work with markdown formatting.
 */
function getRevealPosition(markdown: string, cleanChars: number): number {
  // Skip patterns that don't contribute to visible character count
  const skipChars = new Set(["*", "`", "#"]);
  let cleanIndex = 0;
  let mdIndex = 0;

  while (cleanIndex < cleanChars && mdIndex < markdown.length) {
    const char = markdown[mdIndex];

    // Skip markdown formatting characters
    if (char !== undefined && skipChars.has(char)) {
      mdIndex++;
      continue;
    }

    // Handle link syntax [text](url) - skip the (url) part but count the text
    if (
      char === "]" &&
      mdIndex + 1 < markdown.length &&
      markdown[mdIndex + 1] === "("
    ) {
      const closeIdx = markdown.indexOf(")", mdIndex + 2);
      if (closeIdx > 0) {
        mdIndex = closeIdx + 1;
        continue;
      }
    }

    cleanIndex++;
    mdIndex++;
  }

  // Extend to word boundary to avoid cutting mid-word
  while (
    mdIndex < markdown.length &&
    markdown[mdIndex] !== " " &&
    markdown[mdIndex] !== "\n"
  ) {
    mdIndex++;
  }

  return mdIndex;
}

// Control the rate of packet streaming (packets per second)
const PACKET_DELAY_MS = 10;

export const MessageTextRenderer: MessageRenderer<
  ChatPacket,
  FullChatState
> = ({
  packets,
  state,
  messageNodeId,
  hasTimelineThinking,
  onComplete,
  renderType,
  animate,
  stopPacketSeen,
  stopReason,
  children,
}) => {
  // If we're animating and the final answer is already complete, show more packets initially
  const initialPacketCount = animate
    ? packets.length > 0
      ? 1 // Otherwise start with 1 packet
      : 0
    : -1; // Show all if not animating

  const [displayedPacketCount, setDisplayedPacketCount] =
    useState(initialPacketCount);
  const lastStableSyncedContentRef = useRef("");
  const lastVisibleContentRef = useRef("");

  // Get voice mode context for progressive text reveal synced with audio
  const {
    revealedCharCount,
    autoPlayback,
    isAudioSyncActive,
    activeMessageNodeId,
    isAwaitingAutoPlaybackStart,
  } = useVoiceMode();

  // Get the full content from all packets
  const fullContent = packets
    .map((packet) => {
      if (
        packet.obj.type === PacketType.MESSAGE_DELTA ||
        packet.obj.type === PacketType.MESSAGE_START
      ) {
        return packet.obj.content;
      }
      return "";
    })
    .join("");

  const shouldUseAutoPlaybackSync =
    autoPlayback &&
    typeof messageNodeId === "number" &&
    activeMessageNodeId === messageNodeId;

  // Animation effect - gradually increase displayed packets at controlled rate
  useEffect(() => {
    if (!animate) {
      setDisplayedPacketCount(-1); // Show all packets
      return;
    }

    if (displayedPacketCount >= 0 && displayedPacketCount < packets.length) {
      const timer = setTimeout(() => {
        setDisplayedPacketCount((prev) => Math.min(prev + 1, packets.length));
      }, PACKET_DELAY_MS);

      return () => clearTimeout(timer);
    }
  }, [animate, displayedPacketCount, packets.length]);

  // Reset displayed count when packet array changes significantly (e.g., new message)
  useEffect(() => {
    if (animate && packets.length < displayedPacketCount) {
      const resetCount = isFinalAnswerComplete(packets)
        ? Math.min(10, packets.length)
        : packets.length > 0
          ? 1
          : 0;
      setDisplayedPacketCount(resetCount);
    }
  }, [animate, packets.length, displayedPacketCount]);

  // Only mark as complete when all packets are received AND displayed
  useEffect(() => {
    if (isFinalAnswerComplete(packets)) {
      // If animating, wait until all packets are displayed
      if (
        animate &&
        displayedPacketCount >= 0 &&
        displayedPacketCount < packets.length
      ) {
        return;
      }
      onComplete();
    }
  }, [packets, onComplete, animate, displayedPacketCount]);

  // Get content based on displayed packet count or audio progress
  const computedContent = useMemo(() => {
    // Hold response in "thinking" state only while autoplay startup is pending.
    if (shouldUseAutoPlaybackSync && isAwaitingAutoPlaybackStart) {
      return "";
    }

    // Sync text with audio only for the message currently being spoken.
    if (shouldUseAutoPlaybackSync && isAudioSyncActive) {
      const MIN_REVEAL_CHARS = 12;
      if (revealedCharCount < MIN_REVEAL_CHARS) {
        return "";
      }

      // Reveal text progressively based on audio progress
      const revealPos = getRevealPosition(fullContent, revealedCharCount);
      return fullContent.slice(0, Math.max(revealPos, 0));
    }

    // During an active synced turn, if sync temporarily drops, keep current reveal
    // instead of jumping to full content or blanking.
    if (shouldUseAutoPlaybackSync && !stopPacketSeen) {
      return lastStableSyncedContentRef.current;
    }

    // Standard behavior when auto-playback is off
    if (!animate || displayedPacketCount === -1) {
      return fullContent; // Show all content
    }

    // Packet-based reveal (when auto-playback is disabled)
    return packets
      .slice(0, displayedPacketCount)
      .map((packet) => {
        if (
          packet.obj.type === PacketType.MESSAGE_DELTA ||
          packet.obj.type === PacketType.MESSAGE_START
        ) {
          return packet.obj.content;
        }
        return "";
      })
      .join("");
  }, [
    animate,
    displayedPacketCount,
    fullContent,
    packets,
    revealedCharCount,
    autoPlayback,
    isAudioSyncActive,
    activeMessageNodeId,
    isAwaitingAutoPlaybackStart,
    messageNodeId,
    shouldUseAutoPlaybackSync,
    stopPacketSeen,
  ]);

  // Keep synced text monotonic: once visible, never regress or disappear between chunks.
  const content = useMemo(() => {
    const wasUserCancelled = stopReason === StopReason.USER_CANCELLED;

    // On user cancel, freeze at exactly what was already visible.
    if (wasUserCancelled) {
      return lastVisibleContentRef.current;
    }

    if (!shouldUseAutoPlaybackSync) {
      return computedContent;
    }

    if (computedContent.length === 0) {
      return lastStableSyncedContentRef.current;
    }

    const last = lastStableSyncedContentRef.current;
    if (computedContent.startsWith(last)) {
      return computedContent;
    }

    // If content shape changed unexpectedly mid-stream, prefer the stable version
    // to avoid flicker/dumps.
    if (!stopPacketSeen || wasUserCancelled) {
      return last;
    }

    // For normal completed responses, allow final full content.
    return computedContent;
  }, [computedContent, shouldUseAutoPlaybackSync, stopPacketSeen, stopReason]);

  // Sync the stable ref outside of useMemo to avoid side effects during render.
  useEffect(() => {
    if (stopReason === StopReason.USER_CANCELLED) {
      return;
    }
    if (!shouldUseAutoPlaybackSync) {
      lastStableSyncedContentRef.current = "";
    } else if (content.length > 0) {
      lastStableSyncedContentRef.current = content;
    }
  }, [content, shouldUseAutoPlaybackSync, stopReason]);

  // Track last actually rendered content so cancel can freeze without dumping buffered text.
  useEffect(() => {
    if (content.length > 0) {
      lastVisibleContentRef.current = content;
    }
  }, [content]);

  const shouldShowThinkingPlaceholder =
    shouldUseAutoPlaybackSync &&
    isAwaitingAutoPlaybackStart &&
    !hasTimelineThinking &&
    !stopPacketSeen;

  const shouldShowSpeechWarmupIndicator =
    shouldUseAutoPlaybackSync &&
    !isAwaitingAutoPlaybackStart &&
    content.length === 0 &&
    fullContent.length > 0 &&
    !hasTimelineThinking &&
    !stopPacketSeen;

  const shouldShowCursor =
    content.length > 0 &&
    (!stopPacketSeen ||
      (shouldUseAutoPlaybackSync && content.length < fullContent.length));

  const { renderedContent } = useMarkdownRenderer(
    // the [*]() is a hack to show a blinking dot when the packet is not complete
    shouldShowCursor ? content + " [*]() " : content,
    state,
    "font-main-content-body"
  );

  return children([
    {
      icon: null,
      status: null,
      content:
        shouldShowThinkingPlaceholder || shouldShowSpeechWarmupIndicator ? (
          <Text as="span" secondaryBody text04 className="italic">
            Thinking
          </Text>
        ) : content.length > 0 ? (
          <>{renderedContent}</>
        ) : (
          <BlinkingBar addMargin />
        ),
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/AgentTimeline.tsx
================================================
"use client";

import React, { useMemo, useCallback } from "react";
import { StopReason } from "@/app/app/services/streamingModels";
import { FullChatState, RenderType } from "../interfaces";
import { TurnGroup } from "./transformers";
import { cn } from "@/lib/utils";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import Text from "@/refresh-components/texts/Text";
import { useTimelineExpansion } from "@/app/app/message/messageComponents/timeline/hooks/useTimelineExpansion";
import { useTimelineMetrics } from "@/app/app/message/messageComponents/timeline/hooks/useTimelineMetrics";
import { useTimelineHeader } from "@/app/app/message/messageComponents/timeline/hooks/useTimelineHeader";
import {
  useTimelineUIState,
  TimelineUIState,
} from "@/app/app/message/messageComponents/timeline/hooks/useTimelineUIState";
import {
  isResearchAgentPackets,
  isSearchToolPackets,
  stepSupportsCollapsedStreaming,
  stepHasCollapsedStreamingContent,
} from "@/app/app/message/messageComponents/timeline/packetHelpers";
import { useTimelineStepState } from "@/app/app/message/messageComponents/timeline/hooks/useTimelineStepState";
import { StreamingHeader } from "@/app/app/message/messageComponents/timeline/headers/StreamingHeader";
import { CompletedHeader } from "@/app/app/message/messageComponents/timeline/headers/CompletedHeader";
import { StoppedHeader } from "@/app/app/message/messageComponents/timeline/headers/StoppedHeader";
import { ParallelStreamingHeader } from "@/app/app/message/messageComponents/timeline/headers/ParallelStreamingHeader";
import { useStreamingStartTime } from "@/app/app/stores/useChatSessionStore";
import { ExpandedTimelineContent } from "./ExpandedTimelineContent";
import { CollapsedStreamingContent } from "./CollapsedStreamingContent";
import { TimelineRoot } from "@/app/app/message/messageComponents/timeline/primitives/TimelineRoot";
import { TimelineHeaderRow } from "@/app/app/message/messageComponents/timeline/primitives/TimelineHeaderRow";

// =============================================================================
// Private Wrapper Components
// =============================================================================

interface TimelineContainerProps {
  agent: FullChatState["agent"];
  headerContent?: React.ReactNode;
  children?: React.ReactNode;
}

function TimelineContainer({
  agent,
  headerContent,
  children,
}: TimelineContainerProps) {
  return (
    <TimelineRoot>
      <TimelineHeaderRow left={<AgentAvatar agent={agent} size={24} />}>
        {headerContent}
      </TimelineHeaderRow>
      {children}
    </TimelineRoot>
  );
}

// =============================================================================
// Main Component
// =============================================================================

export interface AgentTimelineProps {
  /** Turn groups from usePacketProcessor */
  turnGroups: TurnGroup[];
  /** Chat state for rendering content */
  chatState: FullChatState;
  /** Whether the stop packet has been seen */
  stopPacketSeen?: boolean;
  /** Reason for stopping (if stopped) */
  stopReason?: StopReason;
  /** Whether final answer is coming (affects last connector) */
  finalAnswerComing?: boolean;
  /** Whether there is display content after timeline */
  hasDisplayContent?: boolean;
  /** Content to render after timeline (final message + toolbar) - slot pattern */
  children?: React.ReactNode;
  /** Whether the timeline is collapsible */
  collapsible?: boolean;
  /** Title of the button to toggle the timeline */
  buttonTitle?: string;
  /** Test ID for e2e testing */
  "data-testid"?: string;
  /** Processing duration in seconds (for completed messages) */
  processingDurationSeconds?: number;
  /** Whether image generation is in progress */
  isGeneratingImage?: boolean;
  /** Number of images generated */
  generatedImageCount?: number;
  /** Tool processing duration from backend (via MESSAGE_START packet) */
  toolProcessingDuration?: number;
}

/**
 * Custom prop comparison for AgentTimeline memoization.
 * Prevents unnecessary re-renders when parent renders but props haven't meaningfully changed.
 */
function areAgentTimelinePropsEqual(
  prev: AgentTimelineProps,
  next: AgentTimelineProps
): boolean {
  return (
    prev.turnGroups === next.turnGroups &&
    prev.stopPacketSeen === next.stopPacketSeen &&
    prev.stopReason === next.stopReason &&
    prev.finalAnswerComing === next.finalAnswerComing &&
    prev.hasDisplayContent === next.hasDisplayContent &&
    prev.processingDurationSeconds === next.processingDurationSeconds &&
    prev.collapsible === next.collapsible &&
    prev.buttonTitle === next.buttonTitle &&
    prev.chatState === next.chatState &&
    prev.isGeneratingImage === next.isGeneratingImage &&
    prev.generatedImageCount === next.generatedImageCount &&
    prev.toolProcessingDuration === next.toolProcessingDuration
  );
}

export const AgentTimeline = React.memo(function AgentTimeline({
  turnGroups,
  chatState,
  stopPacketSeen = false,
  stopReason,
  finalAnswerComing = false,
  hasDisplayContent = false,
  collapsible = true,
  buttonTitle,
  "data-testid": testId,
  processingDurationSeconds,
  isGeneratingImage = false,
  generatedImageCount = 0,
  toolProcessingDuration,
}: AgentTimelineProps) {
  // Header text and state flags
  const { headerText, hasPackets, userStopped } = useTimelineHeader(
    turnGroups,
    stopReason,
    isGeneratingImage
  );

  // Memoized metrics derived from turn groups
  const {
    totalSteps,
    isSingleStep,
    lastTurnGroup,
    lastStep,
    lastStepIsResearchAgent,
    lastStepSupportsCollapsedStreaming,
  } = useTimelineMetrics(turnGroups, userStopped);

  // Extract memory text, operation, and whether this is a memory-only timeline
  const { memoryText, memoryOperation, memoryId, memoryIndex, isMemoryOnly } =
    useTimelineStepState(turnGroups);

  // Check if last step is a search tool for INLINE render type
  const lastStepIsSearchTool = useMemo(
    () => lastStep && isSearchToolPackets(lastStep.packets),
    [lastStep]
  );

  const { isExpanded, handleToggle, parallelActiveTab, setParallelActiveTab } =
    useTimelineExpansion(stopPacketSeen, lastTurnGroup, hasDisplayContent);

  // Streaming duration tracking
  const streamingStartTime = useStreamingStartTime();

  // Parallel step analysis for collapsed streaming view
  const parallelActiveStep = useMemo(() => {
    if (!lastTurnGroup?.isParallel) return null;
    return (
      lastTurnGroup.steps.find((s) => s.key === parallelActiveTab) ??
      lastTurnGroup.steps[0]
    );
  }, [lastTurnGroup, parallelActiveTab]);

  const parallelActiveStepSupportsCollapsedStreaming = useMemo(() => {
    if (!parallelActiveStep) return false;
    return stepSupportsCollapsedStreaming(parallelActiveStep.packets);
  }, [parallelActiveStep]);

  const lastStepHasCollapsedContent = useMemo(() => {
    if (!lastStep) return false;
    return stepHasCollapsedStreamingContent(lastStep.packets);
  }, [lastStep]);

  const parallelActiveStepHasCollapsedContent = useMemo(() => {
    if (!parallelActiveStep) return false;
    return stepHasCollapsedStreamingContent(parallelActiveStep.packets);
  }, [parallelActiveStep]);

  const stoppedStepsCount = useMemo(() => {
    if (!stopPacketSeen || !userStopped) {
      return totalSteps;
    }

    let count = 0;
    for (const turnGroup of turnGroups) {
      for (const step of turnGroup.steps) {
        if (stepHasCollapsedStreamingContent(step.packets)) {
          count += 1;
        }
      }
    }

    return count;
  }, [stopPacketSeen, userStopped, totalSteps, turnGroups]);

  // Derive all UI state from inputs
  const {
    uiState,
    showCollapsedCompact,
    showCollapsedParallel,
    showParallelTabs,
    showDoneStep,
    showStoppedStep,
    hasDoneIndicator,
    showTintedBackground,
    showRoundedBottom,
  } = useTimelineUIState({
    stopPacketSeen,
    hasPackets,
    hasDisplayContent,
    userStopped,
    isExpanded,
    lastTurnGroup,
    lastStep,
    lastStepSupportsCollapsedStreaming,
    lastStepHasCollapsedContent,
    lastStepIsResearchAgent,
    parallelActiveStepSupportsCollapsedStreaming,
    parallelActiveStepHasCollapsedContent,
    isGeneratingImage,
    finalAnswerComing,
  });

  const headerIsInteractive = useMemo(() => {
    if (!collapsible || isMemoryOnly) {
      return false;
    }

    if (uiState === TimelineUIState.STOPPED) {
      return stoppedStepsCount > 0;
    }

    return totalSteps > 0;
  }, [collapsible, isMemoryOnly, uiState, stoppedStepsCount, totalSteps]);

  // Determine render type override for collapsed streaming view
  const collapsedRenderTypeOverride = useMemo(() => {
    if (lastStepIsResearchAgent) return RenderType.HIGHLIGHT;
    if (lastStepIsSearchTool) return RenderType.INLINE;
    return RenderType.COMPACT;
  }, [lastStepIsResearchAgent, lastStepIsSearchTool]);

  // Header selection based on UI state
  const renderHeader = useCallback(() => {
    switch (uiState) {
      case TimelineUIState.STREAMING_PARALLEL:
        // Only show parallel header when collapsed (showParallelTabs includes !isExpanded check)
        if (showParallelTabs && lastTurnGroup) {
          return (
            <ParallelStreamingHeader
              steps={lastTurnGroup.steps}
              activeTab={parallelActiveTab}
              onTabChange={setParallelActiveTab}
              collapsible={collapsible}
              isExpanded={isExpanded}
              onToggle={handleToggle}
            />
          );
        }
      // falls through to sequential header when expanded or no lastTurnGroup
      case TimelineUIState.STREAMING_SEQUENTIAL:
        return (
          <StreamingHeader
            headerText={headerText}
            collapsible={collapsible}
            buttonTitle={buttonTitle}
            isExpanded={isExpanded}
            onToggle={handleToggle}
            streamingStartTime={streamingStartTime}
            toolProcessingDuration={toolProcessingDuration}
          />
        );

      case TimelineUIState.STOPPED:
        return (
          <StoppedHeader
            totalSteps={stoppedStepsCount}
            collapsible={collapsible}
            isExpanded={isExpanded}
            onToggle={handleToggle}
          />
        );

      case TimelineUIState.COMPLETED_COLLAPSED:
      case TimelineUIState.COMPLETED_EXPANDED:
        return (
          <CompletedHeader
            totalSteps={totalSteps}
            collapsible={collapsible}
            isExpanded={isExpanded}
            onToggle={handleToggle}
            processingDurationSeconds={
              toolProcessingDuration ?? processingDurationSeconds
            }
            generatedImageCount={generatedImageCount}
            isMemoryOnly={isMemoryOnly}
            memoryText={memoryText}
            memoryOperation={memoryOperation}
            memoryId={memoryId}
            memoryIndex={memoryIndex}
          />
        );

      default:
        return null;
    }
  }, [
    uiState,
    showParallelTabs,
    lastTurnGroup,
    parallelActiveTab,
    setParallelActiveTab,
    collapsible,
    isExpanded,
    handleToggle,
    headerText,
    buttonTitle,
    streamingStartTime,
    isMemoryOnly,
    memoryText,
    memoryOperation,
    memoryId,
    memoryIndex,
    totalSteps,
    stoppedStepsCount,
    processingDurationSeconds,
    generatedImageCount,
    toolProcessingDuration,
  ]);

  // Empty state: no packets, still streaming, and not stopped
  if (uiState === TimelineUIState.EMPTY) {
    return (
      <TimelineContainer
        agent={chatState.agent}
        headerContent={
          <div className="flex w-full h-full items-center pl-[var(--timeline-header-padding-left)] pr-[var(--timeline-header-padding-right)]">
            <Text
              as="p"
              mainUiAction
              text03
              className="animate-shimmer bg-[length:200%_100%] bg-[linear-gradient(90deg,var(--shimmer-base)_10%,var(--shimmer-highlight)_40%,var(--shimmer-base)_70%)] bg-clip-text text-transparent"
            >
              {headerText}
            </Text>
          </div>
        }
      />
    );
  }

  // Display content only (no timeline steps) - but show header for image generation
  if (uiState === TimelineUIState.DISPLAY_CONTENT_ONLY) {
    return <TimelineContainer agent={chatState.agent} />;
  }

  return (
    <TimelineContainer
      agent={chatState.agent}
      headerContent={
        <div
          className={cn(
            "flex flex-1 min-w-0 h-full items-center justify-between p-1 rounded-t-12 transition-colors duration-300",
            headerIsInteractive && "hover:bg-background-tint-00",
            showTintedBackground && "bg-background-tint-00",
            showRoundedBottom && "rounded-b-12"
          )}
        >
          {renderHeader()}
        </div>
      }
    >
      {/* Collapsed streaming view - single step compact mode */}
      {showCollapsedCompact && lastStep && (
        <CollapsedStreamingContent
          step={lastStep}
          chatState={chatState}
          stopReason={stopReason}
          renderTypeOverride={collapsedRenderTypeOverride}
        />
      )}

      {/* Collapsed streaming view - parallel tools compact mode */}
      {showCollapsedParallel && parallelActiveStep && (
        <CollapsedStreamingContent
          step={parallelActiveStep}
          chatState={chatState}
          stopReason={stopReason}
          renderTypeOverride={RenderType.HIGHLIGHT}
        />
      )}

      {/* Expanded timeline view */}
      {isExpanded && (
        <div className="animate-in fade-in slide-in-from-top-2 duration-300">
          <ExpandedTimelineContent
            turnGroups={turnGroups}
            chatState={chatState}
            stopPacketSeen={stopPacketSeen}
            stopReason={stopReason}
            isSingleStep={isSingleStep}
            userStopped={userStopped}
            showDoneStep={showDoneStep}
            showStoppedStep={showStoppedStep}
            hasDoneIndicator={hasDoneIndicator}
          />
        </div>
      )}
    </TimelineContainer>
  );
}, areAgentTimelinePropsEqual);

export default AgentTimeline;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/CollapsedStreamingContent.tsx
================================================
"use client";

import React, { useCallback } from "react";
import { StopReason } from "@/app/app/services/streamingModels";
import { FullChatState, RenderType } from "../interfaces";
import { TransformedStep } from "./transformers";
import {
  TimelineRendererComponent,
  TimelineRendererOutput,
} from "./TimelineRendererComponent";
import { TimelineRow } from "@/app/app/message/messageComponents/timeline/primitives/TimelineRow";
import { TimelineSurface } from "@/app/app/message/messageComponents/timeline/primitives/TimelineSurface";

export interface CollapsedStreamingContentProps {
  step: TransformedStep;
  chatState: FullChatState;
  stopReason?: StopReason;
  renderTypeOverride?: RenderType;
}

export const CollapsedStreamingContent = React.memo(
  function CollapsedStreamingContent({
    step,
    chatState,
    stopReason,
    renderTypeOverride,
  }: CollapsedStreamingContentProps) {
    const renderContentOnly = useCallback(
      (results: TimelineRendererOutput) => (
        <>
          {results.map((result, index) => (
            <React.Fragment key={index}>{result.content}</React.Fragment>
          ))}
        </>
      ),
      []
    );

    return (
      <TimelineRow railVariant="spacer">
        <TimelineSurface className="px-2 pb-2" roundedBottom>
          <TimelineRendererComponent
            key={`${step.key}-compact`}
            packets={step.packets}
            chatState={chatState}
            animate={true}
            stopPacketSeen={false}
            stopReason={stopReason}
            defaultExpanded={false}
            renderTypeOverride={renderTypeOverride}
            isLastStep={true}
          >
            {renderContentOnly}
          </TimelineRendererComponent>
        </TimelineSurface>
      </TimelineRow>
    );
  }
);

export default CollapsedStreamingContent;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/ExpandedTimelineContent.tsx
================================================
"use client";

import React, { FunctionComponent, useMemo, useCallback } from "react";
import { StopReason } from "@/app/app/services/streamingModels";
import { FullChatState } from "../interfaces";
import { TurnGroup, TransformedStep } from "./transformers";
import { SvgCheckCircle, SvgStopCircle } from "@opal/icons";
import { IconProps } from "@opal/types";
import {
  TimelineRendererComponent,
  TimelineRendererOutput,
  TimelineRendererResult,
} from "./TimelineRendererComponent";
import { ParallelTimelineTabs } from "./ParallelTimelineTabs";
import { StepContainer } from "./StepContainer";
import { TimelineStepComposer } from "./TimelineStepComposer";
import {
  isSearchToolPackets,
  isPythonToolPackets,
} from "@/app/app/message/messageComponents/timeline/packetHelpers";

// =============================================================================
// TimelineStep Component - Memoized to prevent re-renders
// =============================================================================

interface TimelineStepProps {
  step: TransformedStep;
  chatState: FullChatState;
  stopPacketSeen: boolean;
  stopReason?: StopReason;
  isLastStep: boolean;
  isFirstStep: boolean;
  isSingleStep: boolean;
  isStreaming?: boolean;
}

const TimelineStep = React.memo(function TimelineStep({
  step,
  chatState,
  stopPacketSeen,
  stopReason,
  isLastStep,
  isFirstStep,
  isSingleStep,
  isStreaming = false,
}: TimelineStepProps) {
  const isSearchTool = useMemo(
    () => isSearchToolPackets(step.packets),
    [step.packets]
  );
  const isPythonTool = useMemo(
    () => isPythonToolPackets(step.packets),
    [step.packets]
  );
  const getCollapsedIcon = useCallback(
    (result: TimelineRendererResult) =>
      isSearchTool ? (result.icon as FunctionComponent<IconProps>) : undefined,
    [isSearchTool]
  );

  const renderStep = useCallback(
    (results: TimelineRendererOutput) => (
      <TimelineStepComposer
        results={results}
        isLastStep={isLastStep}
        isFirstStep={isFirstStep}
        isSingleStep={isSingleStep}
        collapsible={true}
        getCollapsedIcon={getCollapsedIcon}
      />
    ),
    [isFirstStep, isLastStep, isSingleStep, getCollapsedIcon]
  );

  return (
    <TimelineRendererComponent
      packets={step.packets}
      chatState={chatState}
      animate={!stopPacketSeen}
      stopPacketSeen={stopPacketSeen}
      stopReason={stopReason}
      defaultExpanded={isStreaming || (isSingleStep && !isPythonTool)}
      isLastStep={isLastStep}
    >
      {renderStep}
    </TimelineRendererComponent>
  );
});

// =============================================================================
// ExpandedTimelineContent Component
// =============================================================================

export interface ExpandedTimelineContentProps {
  turnGroups: TurnGroup[];
  chatState: FullChatState;
  stopPacketSeen: boolean;
  stopReason?: StopReason;
  isSingleStep: boolean;
  userStopped: boolean;
  showDoneStep: boolean;
  showStoppedStep: boolean;
  hasDoneIndicator: boolean;
}

export const ExpandedTimelineContent = React.memo(
  function ExpandedTimelineContent({
    turnGroups,
    chatState,
    stopPacketSeen,
    stopReason,
    isSingleStep,
    userStopped,
    showDoneStep,
    showStoppedStep,
    hasDoneIndicator,
  }: ExpandedTimelineContentProps) {
    return (
      <div className="w-full">
        {turnGroups.map((turnGroup, turnIdx) =>
          turnGroup.isParallel ? (
            <ParallelTimelineTabs
              key={turnGroup.turnIndex}
              turnGroup={turnGroup}
              chatState={chatState}
              stopPacketSeen={stopPacketSeen}
              stopReason={stopReason}
              isLastTurnGroup={
                turnIdx === turnGroups.length - 1 &&
                !showDoneStep &&
                !showStoppedStep
              }
              isFirstTurnGroup={turnIdx === 0}
            />
          ) : (
            turnGroup.steps.map((step, stepIdx) => {
              const stepIsLast =
                turnIdx === turnGroups.length - 1 &&
                stepIdx === turnGroup.steps.length - 1 &&
                !hasDoneIndicator &&
                !userStopped;
              const stepIsFirst = turnIdx === 0 && stepIdx === 0;

              return (
                <TimelineStep
                  key={step.key}
                  step={step}
                  chatState={chatState}
                  stopPacketSeen={stopPacketSeen}
                  stopReason={stopReason}
                  isLastStep={stepIsLast}
                  isFirstStep={stepIsFirst}
                  isSingleStep={isSingleStep}
                  isStreaming={!stopPacketSeen && !userStopped}
                />
              );
            })
          )
        )}

        {/* Done indicator */}
        {showDoneStep && (
          <StepContainer
            stepIcon={SvgCheckCircle}
            header="Done"
            isLastStep={true}
            isFirstStep={false}
          >
            {null}
          </StepContainer>
        )}

        {/* Stopped indicator */}
        {showStoppedStep && (
          <StepContainer
            stepIcon={SvgStopCircle}
            header="Stopped"
            isLastStep={true}
            isFirstStep={false}
          >
            {null}
          </StepContainer>
        )}
      </div>
    );
  }
);

export default ExpandedTimelineContent;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/ParallelTimelineTabs.tsx
================================================
"use client";

import React, { useState, useMemo, useCallback } from "react";
import { cn } from "@/lib/utils";
import { StopReason } from "@/app/app/services/streamingModels";
import { FullChatState } from "../interfaces";
import { TurnGroup } from "./transformers";
import {
  getToolName,
  getToolIcon,
  isToolComplete,
} from "../toolDisplayHelpers";
import {
  TimelineRendererComponent,
  TimelineRendererOutput,
} from "./TimelineRendererComponent";
import Tabs from "@/refresh-components/Tabs";
import { SvgBranch, SvgFold, SvgExpand } from "@opal/icons";
import { Button } from "@opal/components";
import { TimelineRow } from "@/app/app/message/messageComponents/timeline/primitives/TimelineRow";
import { TimelineSurface } from "@/app/app/message/messageComponents/timeline/primitives/TimelineSurface";
import { TimelineTopSpacer } from "@/app/app/message/messageComponents/timeline/primitives/TimelineTopSpacer";
import { TimelineStepComposer } from "./TimelineStepComposer";

export interface ParallelTimelineTabsProps {
  /** Turn group containing parallel steps */
  turnGroup: TurnGroup;
  /** Chat state for rendering content */
  chatState: FullChatState;
  /** Whether the stop packet has been seen */
  stopPacketSeen: boolean;
  /** Reason for stopping (if stopped) */
  stopReason?: StopReason;
  /** Whether this is the last turn group (affects connector line) */
  isLastTurnGroup: boolean;
  /** Whether this is the first turn group (affects connector line) */
  isFirstTurnGroup: boolean;
}

export function ParallelTimelineTabs({
  turnGroup,
  chatState,
  stopPacketSeen,
  stopReason,
  isLastTurnGroup,
  isFirstTurnGroup,
}: ParallelTimelineTabsProps) {
  const [activeTab, setActiveTab] = useState(turnGroup.steps[0]?.key ?? "");
  const [isExpanded, setIsExpanded] = useState(true);
  const [isHover, setIsHover] = useState(false);
  const handleToggle = useCallback(() => setIsExpanded((prev) => !prev), []);
  const handleHeaderEnter = useCallback(() => setIsHover(true), []);
  const handleHeaderLeave = useCallback(() => setIsHover(false), []);
  const topSpacerVariant = isFirstTurnGroup ? "first" : "none";
  const shouldShowResults = !(!isExpanded && stopPacketSeen);

  // Find the active step based on selected tab
  const activeStep = useMemo(
    () => turnGroup.steps.find((step) => step.key === activeTab),
    [turnGroup.steps, activeTab]
  );

  // Memoized loading states for each step
  const loadingStates = useMemo(
    () =>
      new Map(
        turnGroup.steps.map((step) => [
          step.key,
          !stopPacketSeen &&
            step.packets.length > 0 &&
            !isToolComplete(step.packets),
        ])
      ),
    [turnGroup.steps, stopPacketSeen]
  );

  const renderTabContent = useCallback(
    (results: TimelineRendererOutput) => (
      <TimelineStepComposer
        results={results}
        isLastStep={isLastTurnGroup}
        isFirstStep={false}
        isSingleStep={false}
        collapsible={true}
      />
    ),
    [isLastTurnGroup]
  );

  const hasActivePackets = Boolean(activeStep && activeStep.packets.length > 0);
  const headerIsLast =
    isLastTurnGroup && (!shouldShowResults || !hasActivePackets);

  return (
    <Tabs value={activeTab} onValueChange={setActiveTab}>
      <div className="flex flex-col w-full">
        <TimelineRow
          railVariant="rail"
          isFirst={isFirstTurnGroup}
          isLast={headerIsLast}
          isHover={isHover}
          disableTopConnectorHover={true}
          icon={
            <div
              className={cn(
                "h-[var(--timeline-branch-icon-wrapper-size)] w-[var(--timeline-branch-icon-wrapper-size)] flex items-center justify-center text-text-02",
                isHover &&
                  "text-text-inverted-05 bg-background-neutral-inverted-00 rounded-full"
              )}
            >
              <SvgBranch className="h-[var(--timeline-branch-icon-size)] w-[var(--timeline-branch-icon-size)]" />
            </div>
          }
        >
          <TimelineSurface
            className="flex-1 flex flex-col"
            isHover={isHover}
            roundedBottom={headerIsLast}
          >
            <TimelineTopSpacer variant={topSpacerVariant} />

            <div
              className="flex items-center min-h-[var(--timeline-step-header-height)] pl-[var(--timeline-header-padding-left)] pr-[var(--timeline-header-padding-right)]"
              onMouseEnter={handleHeaderEnter}
              onMouseLeave={handleHeaderLeave}
            >
              <Tabs.List
                variant="pill"
                enableScrollArrows
                className={cn(
                  isHover && "bg-background-tint-02",
                  "transition-colors duration-200"
                )}
                rightContent={
                  <Button
                    prominence="tertiary"
                    size="sm"
                    onClick={handleToggle}
                    icon={isExpanded ? SvgFold : SvgExpand}
                  />
                }
              >
                {turnGroup.steps.map((step) => (
                  <Tabs.Trigger
                    key={step.key}
                    value={step.key}
                    variant="pill"
                    isLoading={loadingStates.get(step.key)}
                  >
                    <span className="flex items-center gap-1.5">
                      {getToolIcon(step.packets)}
                      {getToolName(step.packets)}
                    </span>
                  </Tabs.Trigger>
                ))}
              </Tabs.List>
            </div>
          </TimelineSurface>
        </TimelineRow>

        {shouldShowResults && activeStep && (
          <TimelineRendererComponent
            key={`${activeTab}-${isExpanded}`}
            packets={activeStep.packets}
            chatState={chatState}
            animate={!stopPacketSeen}
            stopPacketSeen={stopPacketSeen}
            stopReason={stopReason}
            defaultExpanded={isExpanded}
            isLastStep={isLastTurnGroup}
            isHover={isHover}
          >
            {renderTabContent}
          </TimelineRendererComponent>
        )}
      </div>
    </Tabs>
  );
}

export default ParallelTimelineTabs;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/StepContainer.tsx
================================================
import React, { FunctionComponent } from "react";
import { cn } from "@/lib/utils";
import { IconProps } from "@opal/types";
import { TimelineRow } from "@/app/app/message/messageComponents/timeline/primitives/TimelineRow";
import {
  TimelineSurface,
  TimelineSurfaceBackground,
} from "@/app/app/message/messageComponents/timeline/primitives/TimelineSurface";
import { TimelineStepContent } from "@/app/app/message/messageComponents/timeline/primitives/TimelineStepContent";

export interface StepContainerProps {
  /** Main content */
  children?: React.ReactNode;
  /** Step icon component */
  stepIcon?: FunctionComponent<IconProps>;
  /** Header left slot */
  header?: React.ReactNode;
  /** Button title for toggle */
  buttonTitle?: string;
  /** Controlled expanded state */
  isExpanded?: boolean;
  /** Toggle callback */
  onToggle?: () => void;
  /** Whether collapse control is shown */
  collapsible?: boolean;
  /** Collapse button shown only when renderer supports collapsible mode */
  supportsCollapsible?: boolean;
  /** Last step (no bottom connector) */
  isLastStep?: boolean;
  /** First step (top padding instead of connector) */
  isFirstStep?: boolean;
  /** Hide header (single-step timelines) */
  hideHeader?: boolean;
  /** Hover state from parent */
  isHover?: boolean;
  /** Custom icon to show when collapsed (defaults to SvgExpand) */
  collapsedIcon?: FunctionComponent<IconProps>;
  /** Remove right padding (for reasoning content) */
  noPaddingRight?: boolean;
  /** Render without rail (for nested/parallel content) */
  withRail?: boolean;
  /** Override the surface background variant */
  surfaceBackground?: TimelineSurfaceBackground;
}

/** Visual wrapper for timeline steps - icon, connector line, header, and content */
export function StepContainer({
  children,
  stepIcon: StepIconComponent,
  header,
  buttonTitle,
  isExpanded = true,
  onToggle,
  collapsible = true,
  supportsCollapsible = false,
  isLastStep = false,
  isFirstStep = false,
  hideHeader = false,
  isHover = false,
  collapsedIcon: CollapsedIconComponent,
  noPaddingRight = false,
  withRail = true,
  surfaceBackground,
}: StepContainerProps) {
  const iconNode = StepIconComponent ? (
    <StepIconComponent
      className={cn(
        "h-[var(--timeline-icon-size)] w-[var(--timeline-icon-size)] stroke-text-02",
        isHover && "stroke-text-04"
      )}
    />
  ) : null;

  const content = (
    <TimelineSurface
      className="flex-1 flex flex-col"
      isHover={isHover}
      roundedBottom={isLastStep}
      background={surfaceBackground}
    >
      <TimelineStepContent
        header={header}
        buttonTitle={buttonTitle}
        isExpanded={isExpanded}
        onToggle={onToggle}
        collapsible={collapsible}
        supportsCollapsible={supportsCollapsible}
        hideHeader={hideHeader}
        collapsedIcon={CollapsedIconComponent}
        noPaddingRight={noPaddingRight}
        surfaceBackground={surfaceBackground}
      >
        {children}
      </TimelineStepContent>
    </TimelineSurface>
  );

  if (!withRail) {
    return <div className="flex w-full">{content}</div>;
  }

  return (
    <TimelineRow
      railVariant="rail"
      icon={iconNode}
      showIcon={!hideHeader && Boolean(StepIconComponent)}
      iconRowVariant={hideHeader ? "compact" : "default"}
      isFirst={isFirstStep}
      isLast={isLastStep}
      isHover={isHover}
    >
      {content}
    </TimelineRow>
  );
}

export default StepContainer;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/TimelineRendererComponent.tsx
================================================
"use client";

import React, { useState, useCallback, JSX } from "react";
import { Packet, StopReason } from "@/app/app/services/streamingModels";
import {
  FullChatState,
  RenderType,
  RendererResult,
  RendererOutput,
} from "../interfaces";
import { findRenderer } from "../renderMessageComponent";

/** Extended result that includes collapse state */
export interface TimelineRendererResult extends RendererResult {
  /** Current expanded state */
  isExpanded: boolean;
  /** Toggle callback */
  onToggle: () => void;
  /** Current render type */
  renderType: RenderType;
  /** Whether this is the last step (passed through from props) */
  isLastStep: boolean;
  /** Hover state from parent */
  isHover: boolean;
  /** Whether parent should wrap with StepContainer or render raw content */
  timelineLayout: "timeline" | "content";
}

// All renderers return an array of results
export type TimelineRendererOutput = TimelineRendererResult[];

export interface TimelineRendererComponentProps {
  /** Packets to render */
  packets: Packet[];
  /** Chat state for rendering */
  chatState: FullChatState;
  /** Whether to animate streaming */
  animate: boolean;
  /** Whether stop packet has been seen */
  stopPacketSeen: boolean;
  /** Reason for stopping */
  stopReason?: StopReason;
  /** Initial expanded state */
  defaultExpanded?: boolean;
  /** Whether this is the last step in the timeline (for connector line decisions) */
  isLastStep?: boolean;
  /** Hover state from parent component */
  isHover?: boolean;
  /** Override render type (if not set, derives from defaultExpanded) */
  renderTypeOverride?: RenderType;
  /** Children render function - receives extended result with collapse state (single or array) */
  children: (result: TimelineRendererOutput) => JSX.Element;
}

// Custom comparison function to prevent unnecessary re-renders
// Only re-render if meaningful changes occur
function arePropsEqual(
  prev: TimelineRendererComponentProps,
  next: TimelineRendererComponentProps
): boolean {
  return (
    prev.packets === next.packets &&
    prev.stopPacketSeen === next.stopPacketSeen &&
    prev.stopReason === next.stopReason &&
    prev.animate === next.animate &&
    prev.isLastStep === next.isLastStep &&
    prev.isHover === next.isHover &&
    prev.defaultExpanded === next.defaultExpanded &&
    prev.renderTypeOverride === next.renderTypeOverride
    // Skipping chatState (memoized upstream)
  );
}

export const TimelineRendererComponent = React.memo(
  function TimelineRendererComponent({
    packets,
    chatState,
    animate,
    stopPacketSeen,
    stopReason,
    defaultExpanded = true,
    isLastStep,
    isHover = false,
    renderTypeOverride,
    children,
  }: TimelineRendererComponentProps) {
    const [isExpanded, setIsExpanded] = useState(defaultExpanded);
    const handleToggle = useCallback(() => setIsExpanded((prev) => !prev), []);
    const RendererFn = findRenderer({ packets });
    const renderType =
      renderTypeOverride ?? (isExpanded ? RenderType.FULL : RenderType.COMPACT);

    if (!RendererFn) {
      return children([
        {
          icon: null,
          status: null,
          content: <></>,
          supportsCollapsible: false,
          timelineLayout: "timeline",
          isExpanded,
          onToggle: handleToggle,
          renderType,
          isLastStep: isLastStep ?? true,
          isHover,
        },
      ]);
    }

    // Helper to add timeline context to a result
    const enhanceResult = (result: RendererResult): TimelineRendererResult => ({
      ...result,
      isExpanded,
      onToggle: handleToggle,
      renderType,
      isLastStep: isLastStep ?? true,
      isHover,
      timelineLayout: result.timelineLayout ?? "timeline",
    });

    return (
      <RendererFn
        packets={packets as any}
        state={chatState}
        onComplete={() => {}}
        animate={animate}
        renderType={renderType}
        stopPacketSeen={stopPacketSeen}
        stopReason={stopReason}
        isLastStep={isLastStep}
        isHover={isHover}
      >
        {(rendererOutput: RendererOutput) =>
          children(rendererOutput.map((result) => enhanceResult(result)))
        }
      </RendererFn>
    );
  },
  arePropsEqual
);


================================================
FILE: web/src/app/app/message/messageComponents/timeline/TimelineStepComposer.tsx
================================================
import React, { FunctionComponent } from "react";
import { IconProps } from "@opal/types";
import { StepContainer } from "./StepContainer";
import {
  TimelineRendererOutput,
  TimelineRendererResult,
} from "./TimelineRendererComponent";

export interface TimelineStepComposerProps {
  /** Results produced by the active renderer. */
  results: TimelineRendererOutput;
  /** Whether the overall step is the last in the timeline (affects connector). */
  isLastStep: boolean;
  /** Whether the overall step is the first in the timeline (affects connector). */
  isFirstStep: boolean;
  /** Whether the timeline has a single step (used to hide headers). */
  isSingleStep?: boolean;
  /** Whether StepContainer should show collapse controls. */
  collapsible?: boolean;
  /** Optional resolver for custom collapsed icon per result. */
  getCollapsedIcon?: (
    result: TimelineRendererResult
  ) => FunctionComponent<IconProps> | undefined;
}

/**
 * TimelineStepComposer renders renderer results into either raw content blocks
 * or StepContainer-wrapped timeline rows based on the layout contract.
 */
export function TimelineStepComposer({
  results,
  isLastStep,
  isFirstStep,
  isSingleStep = false,
  collapsible = true,
  getCollapsedIcon,
}: TimelineStepComposerProps) {
  return (
    <>
      {results.map((result, index) =>
        result.timelineLayout === "content" ? (
          <React.Fragment key={index}>{result.content}</React.Fragment>
        ) : (
          <StepContainer
            key={index}
            stepIcon={result.icon as FunctionComponent<IconProps> | undefined}
            header={result.status}
            isExpanded={result.isExpanded}
            onToggle={result.onToggle}
            collapsible={
              collapsible && (!isSingleStep || !!result.alwaysCollapsible)
            }
            supportsCollapsible={result.supportsCollapsible}
            isLastStep={index === results.length - 1 && isLastStep}
            isFirstStep={index === 0 && isFirstStep}
            hideHeader={
              results.length === 1 &&
              isSingleStep &&
              !result.supportsCollapsible
            }
            collapsedIcon={
              getCollapsedIcon ? getCollapsedIcon(result) : undefined
            }
            noPaddingRight={result.noPaddingRight ?? false}
            isHover={result.isHover}
            surfaceBackground={result.surfaceBackground}
          >
            {result.content}
          </StepContainer>
        )
      )}
    </>
  );
}

export default TimelineStepComposer;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/headers/CompletedHeader.tsx
================================================
"use client";

import React from "react";
import { SvgFold, SvgExpand, SvgAddLines, SvgMaximize2 } from "@opal/icons";
import { Button } from "@opal/components";
import Tag from "@/refresh-components/buttons/Tag";
import Text from "@/refresh-components/texts/Text";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { Section } from "@/layouts/general-layouts";
import { ContentAction } from "@opal/layouts";
import { formatDurationSeconds } from "@/lib/time";
import { noProp } from "@/lib/utils";
import MemoriesModal from "@/refresh-components/modals/MemoriesModal";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";

// =============================================================================
// MemoryTagWithTooltip
// =============================================================================

interface MemoryTagWithTooltipProps {
  memoryText: string | null;
  memoryOperation: "add" | "update" | null;
  memoryId: number | null;
  memoryIndex: number | null;
}

function MemoryTagWithTooltip({
  memoryText,
  memoryOperation,
  memoryId,
  memoryIndex,
}: MemoryTagWithTooltipProps) {
  const memoriesModal = useCreateModal();

  const operationLabel =
    memoryOperation === "add" ? "Added to memories" : "Updated memory";

  const tag = <Tag icon={SvgAddLines} label={operationLabel} />;

  if (!memoryText) return tag;

  return (
    <>
      <memoriesModal.Provider>
        <MemoriesModal
          initialTargetMemoryId={memoryId}
          initialTargetIndex={memoryIndex}
          highlightOnOpen
        />
      </memoriesModal.Provider>
      {memoriesModal.isOpen ? (
        <span>{tag}</span>
      ) : (
        <SimpleTooltip
          delayDuration={0}
          side="bottom"
          className="bg-background-neutral-00 text-text-01 shadow-md max-w-[17.5rem] p-1"
          tooltip={
            <Section
              flexDirection="column"
              alignItems="start"
              padding={0.25}
              gap={0.25}
              height="auto"
            >
              <div className="p-1">
                <Text as="p" secondaryBody text03>
                  {memoryText}
                </Text>
              </div>
              <ContentAction
                icon={SvgAddLines}
                title={operationLabel}
                sizePreset="secondary"
                paddingVariant="sm"
                variant="body"
                prominence="muted"
                rightChildren={
                  <Button
                    prominence="tertiary"
                    size="sm"
                    icon={SvgMaximize2}
                    onClick={(e) => {
                      e.stopPropagation();
                      memoriesModal.toggle(true);
                    }}
                  />
                }
              />
            </Section>
          }
        >
          <span>{tag}</span>
        </SimpleTooltip>
      )}
    </>
  );
}

// =============================================================================
// CompletedHeader
// =============================================================================

export interface CompletedHeaderProps {
  totalSteps: number;
  collapsible: boolean;
  isExpanded: boolean;
  onToggle: () => void;
  processingDurationSeconds?: number;
  generatedImageCount?: number;
  isMemoryOnly?: boolean;
  memoryText?: string | null;
  memoryOperation?: "add" | "update" | null;
  memoryId?: number | null;
  memoryIndex?: number | null;
}

/** Header when completed - handles both collapsed and expanded states */
export const CompletedHeader = React.memo(function CompletedHeader({
  totalSteps,
  collapsible,
  isExpanded,
  onToggle,
  processingDurationSeconds = 0,
  generatedImageCount = 0,
  isMemoryOnly = false,
  memoryText = null,
  memoryOperation = null,
  memoryId = null,
  memoryIndex = null,
}: CompletedHeaderProps) {
  if (isMemoryOnly) {
    return (
      <div className="flex w-full justify-between">
        <div className="flex items-center px-[var(--timeline-header-text-padding-x)] py-[var(--timeline-header-text-padding-y)]">
          <MemoryTagWithTooltip
            memoryText={memoryText}
            memoryOperation={memoryOperation}
            memoryId={memoryId}
            memoryIndex={memoryIndex}
          />
        </div>
        {collapsible && totalSteps > 0 && isExpanded && (
          <Button
            prominence="tertiary"
            size="md"
            onClick={noProp(onToggle)}
            rightIcon={isExpanded ? SvgFold : SvgExpand}
            aria-label="Expand timeline"
            aria-expanded={isExpanded}
          >
            {`${totalSteps} ${totalSteps === 1 ? "step" : "steps"}`}
          </Button>
        )}
      </div>
    );
  }

  const durationText = processingDurationSeconds
    ? `Thought for ${formatDurationSeconds(processingDurationSeconds)}`
    : "Thought for some time";

  const imageText =
    generatedImageCount > 0
      ? `Generated ${generatedImageCount} ${
          generatedImageCount === 1 ? "image" : "images"
        }`
      : null;

  return (
    <div
      role="button"
      onClick={onToggle}
      className="flex items-center justify-between w-full"
    >
      <div className="flex items-center gap-2 px-[var(--timeline-header-text-padding-x)] py-[var(--timeline-header-text-padding-y)]">
        <Text as="p" mainUiAction text03>
          {isExpanded ? durationText : imageText ?? durationText}
        </Text>
        {memoryOperation && !isExpanded && (
          <MemoryTagWithTooltip
            memoryText={memoryText}
            memoryOperation={memoryOperation}
            memoryId={memoryId}
            memoryIndex={memoryIndex}
          />
        )}
      </div>

      {collapsible && totalSteps > 0 && (
        <Button
          prominence="tertiary"
          size="md"
          onClick={noProp(onToggle)}
          rightIcon={isExpanded ? SvgFold : SvgExpand}
          aria-label="Expand timeline"
          aria-expanded={isExpanded}
        >
          {`${totalSteps} ${totalSteps === 1 ? "step" : "steps"}`}
        </Button>
      )}
    </div>
  );
});


================================================
FILE: web/src/app/app/message/messageComponents/timeline/headers/ParallelStreamingHeader.tsx
================================================
import React, { useMemo } from "react";
import { SvgFold, SvgExpand } from "@opal/icons";
import Tabs from "@/refresh-components/Tabs";
import { Button } from "@opal/components";
import { TurnGroup } from "../transformers";
import {
  getToolIcon,
  getToolName,
  isToolComplete,
} from "../../toolDisplayHelpers";

export interface ParallelStreamingHeaderProps {
  steps: TurnGroup["steps"];
  activeTab: string;
  onTabChange: (tab: string) => void;
  collapsible: boolean;
  isExpanded: boolean;
  onToggle: () => void;
}

/** Header during streaming with parallel tools - tabs only */
export const ParallelStreamingHeader = React.memo(
  function ParallelStreamingHeader({
    steps,
    activeTab,
    onTabChange,
    collapsible,
    isExpanded,
    onToggle,
  }: ParallelStreamingHeaderProps) {
    // Memoized loading states for each step
    const loadingStates = useMemo(
      () =>
        new Map(
          steps.map((step) => [
            step.key,
            step.packets.length > 0 && !isToolComplete(step.packets),
          ])
        ),
      [steps]
    );

    return (
      <Tabs value={activeTab} onValueChange={onTabChange}>
        <Tabs.List
          variant="pill"
          enableScrollArrows
          rightContent={
            collapsible ? (
              <Button
                prominence="tertiary"
                size="sm"
                onClick={onToggle}
                icon={isExpanded ? SvgFold : SvgExpand}
                aria-label={
                  isExpanded ? "Collapse timeline" : "Expand timeline"
                }
                aria-expanded={isExpanded}
              />
            ) : undefined
          }
          className="bg-transparent"
        >
          {steps.map((step) => (
            <Tabs.Trigger
              key={step.key}
              value={step.key}
              variant="pill"
              isLoading={loadingStates.get(step.key)}
            >
              <span className="flex items-center gap-1.5">
                {getToolIcon(step.packets)}
                {getToolName(step.packets)}
              </span>
            </Tabs.Trigger>
          ))}
        </Tabs.List>
      </Tabs>
    );
  }
);


================================================
FILE: web/src/app/app/message/messageComponents/timeline/headers/StoppedHeader.tsx
================================================
import React from "react";
import { SvgFold, SvgExpand } from "@opal/icons";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { cn, noProp } from "@/lib/utils";

export interface StoppedHeaderProps {
  totalSteps: number;
  collapsible: boolean;
  isExpanded: boolean;
  onToggle: () => void;
}

/** Header when user stopped/cancelled */
export const StoppedHeader = React.memo(function StoppedHeader({
  totalSteps,
  collapsible,
  isExpanded,
  onToggle,
}: StoppedHeaderProps) {
  const isInteractive = collapsible && totalSteps > 0;

  return (
    <div
      role={isInteractive ? "button" : undefined}
      onClick={isInteractive ? onToggle : undefined}
      className={cn(
        "flex items-center justify-between w-full rounded-12",
        isInteractive ? "cursor-pointer" : "cursor-default"
      )}
      aria-disabled={isInteractive ? undefined : true}
    >
      <div className="px-[var(--timeline-header-text-padding-x)] py-[var(--timeline-header-text-padding-y)]">
        <Text as="p" mainUiAction text03>
          Interrupted Thinking
        </Text>
      </div>

      {isInteractive && (
        <Button
          prominence="tertiary"
          size="md"
          onClick={noProp(onToggle)}
          rightIcon={isExpanded ? SvgFold : SvgExpand}
          aria-label={isExpanded ? "Collapse timeline" : "Expand timeline"}
          aria-expanded={isExpanded}
        >
          {`${totalSteps} ${totalSteps === 1 ? "step" : "steps"}`}
        </Button>
      )}
    </div>
  );
});


================================================
FILE: web/src/app/app/message/messageComponents/timeline/headers/StreamingHeader.tsx
================================================
"use client";

import React from "react";
import { SvgFold, SvgExpand } from "@opal/icons";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { useStreamingDuration } from "../hooks/useStreamingDuration";
import { formatDurationSeconds } from "@/lib/time";

export interface StreamingHeaderProps {
  headerText: string;
  collapsible: boolean;
  buttonTitle?: string;
  isExpanded: boolean;
  onToggle: () => void;
  streamingStartTime?: number;
  /** Tool processing duration from backend (freezes timer when available) */
  toolProcessingDuration?: number;
}

/** Header during streaming - shimmer text with current activity */
export const StreamingHeader = React.memo(function StreamingHeader({
  headerText,
  collapsible,
  buttonTitle,
  isExpanded,
  onToggle,
  streamingStartTime,
  toolProcessingDuration,
}: StreamingHeaderProps) {
  // Use backend duration when available, otherwise continue live timer
  const elapsedSeconds = useStreamingDuration(
    toolProcessingDuration === undefined, // Stop updating when we have backend duration
    streamingStartTime,
    toolProcessingDuration
  );
  const showElapsedTime =
    isExpanded && streamingStartTime && elapsedSeconds > 0;

  return (
    <>
      <div className="px-[var(--timeline-header-text-padding-x)] py-[var(--timeline-header-text-padding-y)]">
        <Text
          as="p"
          mainUiAction
          text03
          className="animate-shimmer bg-[length:200%_100%] bg-[linear-gradient(90deg,var(--shimmer-base)_10%,var(--shimmer-highlight)_40%,var(--shimmer-base)_70%)] bg-clip-text text-transparent"
        >
          {headerText}
        </Text>
      </div>

      {collapsible &&
        (buttonTitle ? (
          <Button
            prominence="tertiary"
            size="md"
            onClick={onToggle}
            rightIcon={isExpanded ? SvgFold : SvgExpand}
            aria-expanded={isExpanded}
          >
            {buttonTitle}
          </Button>
        ) : showElapsedTime ? (
          <Button
            prominence="tertiary"
            size="md"
            onClick={onToggle}
            rightIcon={SvgFold}
            aria-label="Collapse timeline"
            aria-expanded={true}
          >
            {formatDurationSeconds(elapsedSeconds)}
          </Button>
        ) : (
          <Button
            prominence="tertiary"
            size="md"
            onClick={onToggle}
            icon={isExpanded ? SvgFold : SvgExpand}
            aria-label={isExpanded ? "Collapse timeline" : "Expand timeline"}
            aria-expanded={isExpanded}
          />
        ))}
    </>
  );
});


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/__tests__/testHelpers.ts
================================================
/**
 * Shared test helpers for packet processing tests
 */
import {
  Packet,
  PacketType,
  Placement,
  StopReason,
} from "@/app/app/services/streamingModels";
import { OnyxDocument } from "@/lib/search/interfaces";

// Core packet factory
export function createPacket(
  type: PacketType,
  placement: Partial<Placement> = {},
  objOverrides: Record<string, unknown> = {}
): Packet {
  return {
    placement: {
      turn_index: 0,
      tab_index: 0,
      ...placement,
    },
    obj: {
      type,
      ...objOverrides,
    },
  } as Packet;
}

// Stop packet
export function createStopPacket(
  stopReason?: StopReason,
  placement: Partial<Placement> = {}
): Packet {
  return createPacket(PacketType.STOP, placement, {
    stop_reason: stopReason,
  });
}

// Branching packet
export function createBranchingPacket(
  numBranches: number,
  turnIndex: number
): Packet {
  return createPacket(
    PacketType.TOP_LEVEL_BRANCHING,
    { turn_index: turnIndex },
    { num_parallel_branches: numBranches }
  );
}

// Message packet
export function createMessageStartPacket(
  placement: Partial<Placement> = {},
  preAnswerProcessingSeconds?: number
): Packet {
  return createPacket(PacketType.MESSAGE_START, placement, {
    id: "msg-1",
    content: "",
    final_documents: null,
    ...(preAnswerProcessingSeconds !== undefined && {
      pre_answer_processing_seconds: preAnswerProcessingSeconds,
    }),
  });
}

// Citation packet
export function createCitationPacket(
  citationNumber: number,
  documentId: string,
  placement: Partial<Placement> = {}
): Packet {
  return createPacket(PacketType.CITATION_INFO, placement, {
    citation_number: citationNumber,
    document_id: documentId,
  });
}

// Image generation packet
export function createImageDeltaPacket(
  imageCount: number,
  placement: Partial<Placement> = {}
): Packet {
  const images = Array.from({ length: imageCount }, (_, i) => ({
    file_id: `file-${i}`,
    url: `https://example.com/image-${i}.png`,
    revised_prompt: `Image ${i}`,
  }));
  return createPacket(PacketType.IMAGE_GENERATION_TOOL_DELTA, placement, {
    images,
  });
}

// Search Tool helpers
export function createSearchToolStartPacket(
  placement: Partial<Placement> = {},
  isInternetSearch?: boolean
): Packet {
  return createPacket(PacketType.SEARCH_TOOL_START, placement, {
    ...(isInternetSearch !== undefined && {
      is_internet_search: isInternetSearch,
    }),
  });
}

export function createSearchToolQueriesPacket(
  queries: string[],
  placement: Partial<Placement> = {}
): Packet {
  return createPacket(PacketType.SEARCH_TOOL_QUERIES_DELTA, placement, {
    queries,
  });
}

export function createSearchToolDocumentsPacket(
  documents: Partial<OnyxDocument>[],
  placement: Partial<Placement> = {}
): Packet {
  return createPacket(PacketType.SEARCH_TOOL_DOCUMENTS_DELTA, placement, {
    documents,
  });
}

// Fetch Tool helpers
export function createFetchToolStartPacket(
  placement: Partial<Placement> = {}
): Packet {
  return createPacket(PacketType.FETCH_TOOL_START, placement);
}

export function createFetchToolUrlsPacket(
  urls: string[],
  placement: Partial<Placement> = {}
): Packet {
  return createPacket(PacketType.FETCH_TOOL_URLS, placement, {
    urls,
  });
}

export function createFetchToolDocumentsPacket(
  documents: Partial<OnyxDocument>[],
  placement: Partial<Placement> = {}
): Packet {
  return createPacket(PacketType.FETCH_TOOL_DOCUMENTS, placement, {
    documents,
  });
}

// Python Tool helpers
export function createPythonToolStartPacket(
  code: string,
  placement: Partial<Placement> = {}
): Packet {
  return createPacket(PacketType.PYTHON_TOOL_START, placement, {
    code,
  });
}

export function createPythonToolDeltaPacket(
  stdout: string,
  stderr: string,
  fileIds: string[],
  placement: Partial<Placement> = {}
): Packet {
  return createPacket(PacketType.PYTHON_TOOL_DELTA, placement, {
    stdout,
    stderr,
    file_ids: fileIds,
  });
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/packetProcessor.test.ts
================================================
/**
 * Unit tests for packetProcessor.ts
 *
 * Tests the pure packet processing functions that handle streaming packet parsing,
 * grouping, and state management. These tests serve as documentation for the
 * packet processing logic and prevent regressions.
 */
import {
  Packet,
  PacketType,
  StopReason,
} from "@/app/app/services/streamingModels";
import { createInitialState, processPackets } from "./packetProcessor";
import {
  createPacket,
  createStopPacket,
  createCitationPacket,
  createBranchingPacket,
  createMessageStartPacket,
  createImageDeltaPacket,
  createSearchToolStartPacket,
  createSearchToolQueriesPacket,
  createSearchToolDocumentsPacket,
  createFetchToolStartPacket,
  createFetchToolUrlsPacket,
  createFetchToolDocumentsPacket,
  createPythonToolStartPacket,
  createPythonToolDeltaPacket,
} from "./__tests__/testHelpers";

// ============================================================================
// Tests
// ============================================================================

describe("packetProcessor", () => {
  describe("createInitialState", () => {
    test("creates state with correct nodeId", () => {
      const state = createInitialState(123);
      expect(state.nodeId).toBe(123);
    });

    test("initializes nextPacketIndex to 0", () => {
      const state = createInitialState(1);
      expect(state.nextPacketIndex).toBe(0);
    });

    test("initializes empty citations array", () => {
      const state = createInitialState(1);
      expect(state.citations).toEqual([]);
    });

    test("initializes empty seenCitationDocIds set", () => {
      const state = createInitialState(1);
      expect(state.seenCitationDocIds.size).toBe(0);
    });

    test("initializes empty citationMap", () => {
      const state = createInitialState(1);
      expect(state.citationMap).toEqual({});
    });

    test("initializes empty documentMap", () => {
      const state = createInitialState(1);
      expect(state.documentMap.size).toBe(0);
    });

    test("initializes empty groupedPacketsMap", () => {
      const state = createInitialState(1);
      expect(state.groupedPacketsMap.size).toBe(0);
    });

    test("initializes finalAnswerComing to false", () => {
      const state = createInitialState(1);
      expect(state.finalAnswerComing).toBe(false);
    });

    test("initializes stopPacketSeen to false", () => {
      const state = createInitialState(1);
      expect(state.stopPacketSeen).toBe(false);
    });

    test("initializes empty toolGroups array", () => {
      const state = createInitialState(1);
      expect(state.toolGroups).toEqual([]);
    });

    test("initializes empty potentialDisplayGroups array", () => {
      const state = createInitialState(1);
      expect(state.potentialDisplayGroups).toEqual([]);
    });
  });

  describe("processPackets - basic behavior", () => {
    test("processes only new packets on subsequent calls", () => {
      const state = createInitialState(1);

      // First call with 2 packets
      const packets1 = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result1 = processPackets(state, packets1);
      expect(result1.nextPacketIndex).toBe(2);

      // Second call with 4 packets (2 new)
      const packets2 = [
        ...packets1,
        createMessageStartPacket({ turn_index: 1 }),
        createStopPacket(),
      ];
      const result2 = processPackets(result1, packets2);
      expect(result2.nextPacketIndex).toBe(4);
    });

    test("skips null packets", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        null as unknown as Packet,
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      // Should process valid packets without error
      expect(result.nextPacketIndex).toBe(3);
      expect(result.toolGroupKeys.has("0-0")).toBe(true);
    });

    test("skips undefined packets", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        undefined as unknown as Packet,
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.nextPacketIndex).toBe(3);
      expect(result.toolGroupKeys.has("0-0")).toBe(true);
    });

    test("does not rebuild result arrays when no new packets", () => {
      const state = createInitialState(1);
      const packets = [createSearchToolStartPacket({ turn_index: 0 })];

      const result1 = processPackets(state, packets);
      const toolGroups1 = result1.toolGroups;

      // Process same packets again
      const result2 = processPackets(result1, packets);

      // Same reference since no new packets processed
      expect(result2.toolGroups).toBe(toolGroups1);
    });
  });

  describe("processPackets - stream reset detection", () => {
    test("resets state when packets array shrinks", () => {
      const state = createInitialState(1);

      // Process 5 packets
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createSearchToolQueriesPacket(["query1"], { turn_index: 0 }),
        createSearchToolDocumentsPacket([{ document_id: "doc-1" }], {
          turn_index: 0,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
        createMessageStartPacket({ turn_index: 1 }),
      ];
      const result1 = processPackets(state, packets);
      expect(result1.nextPacketIndex).toBe(5);
      expect(result1.documentMap.size).toBe(1);

      // Process with shorter array (simulating reset)
      const shorterPackets = [createSearchToolStartPacket({ turn_index: 0 })];
      const result2 = processPackets(result1, shorterPackets);

      // State should be reset
      expect(result2.nextPacketIndex).toBe(1);
      expect(result2.documentMap.size).toBe(0);
    });

    test("preserves nodeId after reset", () => {
      const state = createInitialState(42);

      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result1 = processPackets(state, packets);

      // Shrink array to trigger reset
      const shorterPackets = [createSearchToolStartPacket({ turn_index: 0 })];
      const result2 = processPackets(result1, shorterPackets);

      expect(result2.nodeId).toBe(42);
    });
  });

  describe("packet grouping", () => {
    test("groups packets by turn_index-tab_index key", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolQueriesPacket(["query"], {
          turn_index: 0,
          tab_index: 0,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.groupedPacketsMap.has("0-0")).toBe(true);
      expect(result.groupedPacketsMap.get("0-0")?.length).toBe(3);
    });

    test("separates packets with different turn_index", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
        createSearchToolStartPacket({ turn_index: 1 }),
        createPacket(PacketType.SECTION_END, { turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      expect(result.groupedPacketsMap.has("0-0")).toBe(true);
      expect(result.groupedPacketsMap.has("1-0")).toBe(true);
    });

    test("separates packets with different tab_index (parallel tools)", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 1 }),
      ];
      const result = processPackets(state, packets);

      expect(result.groupedPacketsMap.has("0-0")).toBe(true);
      expect(result.groupedPacketsMap.has("0-1")).toBe(true);
      expect(result.groupedPacketsMap.get("0-0")?.length).toBe(2);
      expect(result.groupedPacketsMap.get("0-1")?.length).toBe(2);
    });
  });

  describe("group categorization", () => {
    test("categorization happens only on first packet of group", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        // These delta packets should not affect categorization
        createSearchToolQueriesPacket(["query"], { turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroupKeys.has("0-0")).toBe(true);
      expect(result.toolGroupKeys.size).toBe(1);
    });

    // Parameterized tests for tool packet types
    test.each([
      [PacketType.SEARCH_TOOL_START, "SEARCH_TOOL_START"],
      [PacketType.PYTHON_TOOL_START, "PYTHON_TOOL_START"],
      [PacketType.FETCH_TOOL_START, "FETCH_TOOL_START"],
      [PacketType.CUSTOM_TOOL_START, "CUSTOM_TOOL_START"],
      [PacketType.FILE_READER_START, "FILE_READER_START"],
      [PacketType.REASONING_START, "REASONING_START"],
      [PacketType.DEEP_RESEARCH_PLAN_START, "DEEP_RESEARCH_PLAN_START"],
      [PacketType.RESEARCH_AGENT_START, "RESEARCH_AGENT_START"],
    ])("%s categorizes as tool group", (packetType) => {
      const state = createInitialState(1);
      const packets = [createPacket(packetType, { turn_index: 0 })];
      const result = processPackets(state, packets);

      expect(result.toolGroupKeys.has("0-0")).toBe(true);
    });

    // Parameterized tests for display packet types
    test.each([
      [PacketType.MESSAGE_START, "MESSAGE_START"],
      [PacketType.IMAGE_GENERATION_TOOL_START, "IMAGE_GENERATION_TOOL_START"],
    ])("%s categorizes as display group", (packetType) => {
      const state = createInitialState(1);
      const packets = [createPacket(packetType, { turn_index: 0 })];
      const result = processPackets(state, packets);

      expect(result.displayGroupKeys.has("0-0")).toBe(true);
    });
  });

  describe("SECTION_END and ERROR tracking", () => {
    test("tracks SECTION_END in groupKeysWithSectionEnd", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(true);
    });

    test("tracks ERROR as completion marker", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(
          PacketType.ERROR,
          { turn_index: 0 },
          { message: "Failed" }
        ),
      ];
      const result = processPackets(state, packets);

      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(true);
    });
  });

  describe("handleTopLevelBranching", () => {
    test("stores expected branch count in expectedBranches map", () => {
      const state = createInitialState(1);
      const packets = [createBranchingPacket(3, 0)];
      const result = processPackets(state, packets);

      expect(result.expectedBranches.get(0)).toBe(3);
    });

    test("does not add branching packet to any group", () => {
      const state = createInitialState(1);
      const packets = [createBranchingPacket(2, 0)];
      const result = processPackets(state, packets);

      expect(result.groupedPacketsMap.size).toBe(0);
    });

    test("handles multiple branching packets at different turns", () => {
      const state = createInitialState(1);
      const packets = [
        createBranchingPacket(2, 0),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),
        createBranchingPacket(3, 1),
      ];
      const result = processPackets(state, packets);

      expect(result.expectedBranches.get(0)).toBe(2);
      expect(result.expectedBranches.get(1)).toBe(3);
    });
  });

  describe("handleTurnTransition", () => {
    test("injects SECTION_END when turn_index changes", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        // No explicit SECTION_END before turn change
        createMessageStartPacket({ turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      // SECTION_END should be injected for turn 0
      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(true);
    });

    test("does not inject SECTION_END when only tab_index changes", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),
      ];
      const result = processPackets(state, packets);

      // No SECTION_END should be injected for parallel tools
      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(false);
      expect(result.groupKeysWithSectionEnd.has("0-1")).toBe(false);
    });

    test("does not inject duplicate SECTION_END", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
        createMessageStartPacket({ turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      const sectionEndCount =
        group?.filter((p) => p.obj.type === PacketType.SECTION_END).length ?? 0;
      expect(sectionEndCount).toBe(1);
    });

    test("injects SECTION_END for all previous groups on turn change", () => {
      const state = createInitialState(1);
      const packets = [
        createBranchingPacket(2, 0),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),
        // Turn changes
        createMessageStartPacket({ turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(true);
      expect(result.groupKeysWithSectionEnd.has("0-1")).toBe(true);
    });
  });

  describe("Search Tool flow", () => {
    test("SEARCH_TOOL_START categorizes group as tool", () => {
      const state = createInitialState(1);
      const packets = [createSearchToolStartPacket({ turn_index: 0 })];
      const result = processPackets(state, packets);

      expect(result.toolGroupKeys.has("0-0")).toBe(true);
    });

    test("SEARCH_TOOL_START with is_internet_search=true", () => {
      const state = createInitialState(1);
      const packets = [createSearchToolStartPacket({ turn_index: 0 }, true)];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect(
        (group?.[0]?.obj as { is_internet_search?: boolean }).is_internet_search
      ).toBe(true);
    });

    test("SEARCH_TOOL_START with is_internet_search=false", () => {
      const state = createInitialState(1);
      const packets = [createSearchToolStartPacket({ turn_index: 0 }, false)];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect(
        (group?.[0]?.obj as { is_internet_search?: boolean }).is_internet_search
      ).toBe(false);
    });

    test("SEARCH_TOOL_QUERIES_DELTA stores queries in packet", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createSearchToolQueriesPacket(["what is AI", "machine learning"], {
          turn_index: 0,
        }),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect((group?.[1]?.obj as { queries: string[] }).queries).toEqual([
        "what is AI",
        "machine learning",
      ]);
    });

    test("SEARCH_TOOL_DOCUMENTS_DELTA extracts documents to documentMap", () => {
      const state = createInitialState(1);
      const docs = [
        { document_id: "doc-1", semantic_identifier: "Doc 1" },
        { document_id: "doc-2", semantic_identifier: "Doc 2" },
      ];
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createSearchToolDocumentsPacket(docs, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.documentMap.get("doc-1")).toBeDefined();
      expect(result.documentMap.get("doc-2")).toBeDefined();
    });

    test("full search flow: START -> QUERIES -> DOCUMENTS -> SECTION_END", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }, false),
        createSearchToolQueriesPacket(["test query"], { turn_index: 0 }),
        createSearchToolDocumentsPacket([{ document_id: "doc-1" }], {
          turn_index: 0,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroups.length).toBe(1);
      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(true);
      expect(result.documentMap.has("doc-1")).toBe(true);
    });

    test("multiple QUERIES_DELTA packets accumulate", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createSearchToolQueriesPacket(["query 1"], { turn_index: 0 }),
        createSearchToolQueriesPacket(["query 2", "query 3"], {
          turn_index: 0,
        }),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect(group?.length).toBe(3);
    });

    test("multiple DOCUMENTS_DELTA packets accumulate documents", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createSearchToolDocumentsPacket([{ document_id: "doc-1" }], {
          turn_index: 0,
        }),
        createSearchToolDocumentsPacket([{ document_id: "doc-2" }], {
          turn_index: 0,
        }),
      ];
      const result = processPackets(state, packets);

      expect(result.documentMap.has("doc-1")).toBe(true);
      expect(result.documentMap.has("doc-2")).toBe(true);
    });

    test("SEARCH_TOOL_START resets finalAnswerComing if after message", () => {
      const state = createInitialState(1);
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        // Tool comes after message (Claude workaround)
        createSearchToolStartPacket({ turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      // finalAnswerComing should be reset since tool follows message
      expect(result.finalAnswerComing).toBe(false);
    });

    test("parallel search tools at same turn_index with different tab_index", () => {
      const state = createInitialState(1);
      const packets = [
        createBranchingPacket(2, 0),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),
        createSearchToolDocumentsPacket([{ document_id: "doc-a" }], {
          turn_index: 0,
          tab_index: 0,
        }),
        createSearchToolDocumentsPacket([{ document_id: "doc-b" }], {
          turn_index: 0,
          tab_index: 1,
        }),
      ];
      const result = processPackets(state, packets);

      expect(result.expectedBranches.get(0)).toBe(2);
      expect(result.toolGroups.length).toBe(2);
      expect(result.documentMap.has("doc-a")).toBe(true);
      expect(result.documentMap.has("doc-b")).toBe(true);
    });
  });

  describe("Fetch Tool flow", () => {
    test("FETCH_TOOL_START categorizes group as tool", () => {
      const state = createInitialState(1);
      const packets = [createFetchToolStartPacket({ turn_index: 0 })];
      const result = processPackets(state, packets);

      expect(result.toolGroupKeys.has("0-0")).toBe(true);
    });

    test("FETCH_TOOL_URLS stores urls in packet", () => {
      const state = createInitialState(1);
      const packets = [
        createFetchToolStartPacket({ turn_index: 0 }),
        createFetchToolUrlsPacket(["https://example.com", "https://test.com"], {
          turn_index: 0,
        }),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect((group?.[1]?.obj as { urls: string[] }).urls).toEqual([
        "https://example.com",
        "https://test.com",
      ]);
    });

    test("FETCH_TOOL_DOCUMENTS extracts documents to documentMap", () => {
      const state = createInitialState(1);
      const packets = [
        createFetchToolStartPacket({ turn_index: 0 }),
        createFetchToolDocumentsPacket([{ document_id: "fetched-doc-1" }], {
          turn_index: 0,
        }),
      ];
      const result = processPackets(state, packets);

      expect(result.documentMap.has("fetched-doc-1")).toBe(true);
    });

    test("full fetch flow: START -> URLS -> DOCUMENTS -> SECTION_END", () => {
      const state = createInitialState(1);
      const packets = [
        createFetchToolStartPacket({ turn_index: 0 }),
        createFetchToolUrlsPacket(["https://example.com"], { turn_index: 0 }),
        createFetchToolDocumentsPacket([{ document_id: "url-doc" }], {
          turn_index: 0,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroups.length).toBe(1);
      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(true);
    });

    test("multiple URLs in single FETCH_TOOL_URLS packet", () => {
      const state = createInitialState(1);
      const packets = [
        createFetchToolStartPacket({ turn_index: 0 }),
        createFetchToolUrlsPacket(
          ["https://a.com", "https://b.com", "https://c.com"],
          { turn_index: 0 }
        ),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect((group?.[1]?.obj as { urls: string[] }).urls.length).toBe(3);
    });

    test("empty urls array handling", () => {
      const state = createInitialState(1);
      const packets = [
        createFetchToolStartPacket({ turn_index: 0 }),
        createFetchToolUrlsPacket([], { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect((group?.[1]?.obj as { urls: string[] }).urls).toEqual([]);
    });

    test("FETCH_TOOL_START resets finalAnswerComing if after message", () => {
      const state = createInitialState(1);
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createFetchToolStartPacket({ turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      expect(result.finalAnswerComing).toBe(false);
    });

    test("fetch tool with ERROR instead of SECTION_END", () => {
      const state = createInitialState(1);
      const packets = [
        createFetchToolStartPacket({ turn_index: 0 }),
        createFetchToolUrlsPacket(["https://invalid.com"], { turn_index: 0 }),
        createPacket(
          PacketType.ERROR,
          { turn_index: 0 },
          { error: "Failed to fetch" }
        ),
      ];
      const result = processPackets(state, packets);

      // ERROR counts as section end
      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(true);
    });
  });

  describe("Python Tool flow", () => {
    test("PYTHON_TOOL_START categorizes group as tool", () => {
      const state = createInitialState(1);
      const packets = [
        createPythonToolStartPacket("print('hello')", { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroupKeys.has("0-0")).toBe(true);
    });

    test("PYTHON_TOOL_START stores code in packet", () => {
      const state = createInitialState(1);
      const code = "import pandas as pd\ndf = pd.read_csv('data.csv')";
      const packets = [createPythonToolStartPacket(code, { turn_index: 0 })];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect((group?.[0]?.obj as { code: string }).code).toBe(code);
    });

    test("PYTHON_TOOL_DELTA stores stdout/stderr/file_ids", () => {
      const state = createInitialState(1);
      const packets = [
        createPythonToolStartPacket("print('test')", { turn_index: 0 }),
        createPythonToolDeltaPacket("test\n", "", [], { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      const delta = group?.[1]?.obj as {
        stdout: string;
        stderr: string;
        file_ids: string[];
      };
      expect(delta.stdout).toBe("test\n");
      expect(delta.stderr).toBe("");
    });

    test("PYTHON_TOOL_DELTA with file_ids (generated files)", () => {
      const state = createInitialState(1);
      const packets = [
        createPythonToolStartPacket("plt.savefig('chart.png')", {
          turn_index: 0,
        }),
        createPythonToolDeltaPacket("", "", ["file-123", "file-456"], {
          turn_index: 0,
        }),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect((group?.[1]?.obj as { file_ids: string[] }).file_ids).toEqual([
        "file-123",
        "file-456",
      ]);
    });

    test("multiple DELTA packets (streaming output)", () => {
      const state = createInitialState(1);
      const packets = [
        createPythonToolStartPacket("for i in range(3): print(i)", {
          turn_index: 0,
        }),
        createPythonToolDeltaPacket("0\n", "", [], { turn_index: 0 }),
        createPythonToolDeltaPacket("1\n", "", [], { turn_index: 0 }),
        createPythonToolDeltaPacket("2\n", "", [], { turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect(group?.length).toBe(5); // START + 3 DELTAs + SECTION_END
    });

    test("python tool with stderr (error output)", () => {
      const state = createInitialState(1);
      const packets = [
        createPythonToolStartPacket("undefined_var", { turn_index: 0 }),
        createPythonToolDeltaPacket(
          "",
          "NameError: name 'undefined_var' is not defined",
          [],
          { turn_index: 0 }
        ),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      const group = result.groupedPacketsMap.get("0-0");
      expect((group?.[1]?.obj as { stderr: string }).stderr).toContain(
        "NameError"
      );
    });

    test("PYTHON_TOOL_START resets finalAnswerComing if after message", () => {
      const state = createInitialState(1);
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createPythonToolStartPacket("print(1)", { turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      expect(result.finalAnswerComing).toBe(false);
    });

    test("python tool with ERROR instead of SECTION_END", () => {
      const state = createInitialState(1);
      const packets = [
        createPythonToolStartPacket("crash()", { turn_index: 0 }),
        createPacket(
          PacketType.ERROR,
          { turn_index: 0 },
          { message: "Execution failed" }
        ),
      ];
      const result = processPackets(state, packets);

      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(true);
    });
  });

  describe("handleStreamingStatusPacket", () => {
    test("sets finalAnswerComing on MESSAGE_START", () => {
      const state = createInitialState(1);
      const packets = [createMessageStartPacket({ turn_index: 0 })];
      const result = processPackets(state, packets);

      expect(result.finalAnswerComing).toBe(true);
    });

    test("sets finalAnswerComing on MESSAGE_DELTA", () => {
      const state = createInitialState(1);
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createPacket(
          PacketType.MESSAGE_DELTA,
          { turn_index: 0 },
          { content: "Hello" }
        ),
      ];
      const result = processPackets(state, packets);

      expect(result.finalAnswerComing).toBe(true);
    });

    test("sets finalAnswerComing on IMAGE_GENERATION_TOOL_START", () => {
      const state = createInitialState(1);
      const packets = [
        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.finalAnswerComing).toBe(true);
    });

    test("captures toolProcessingDuration from MESSAGE_START", () => {
      const state = createInitialState(1);
      const packets = [createMessageStartPacket({ turn_index: 0 }, 2.5)];
      const result = processPackets(state, packets);

      expect(result.toolProcessingDuration).toBe(2.5);
    });
  });

  describe("handleStopPacket", () => {
    test("sets stopPacketSeen to true", () => {
      const state = createInitialState(1);
      const packets = [createStopPacket()];
      const result = processPackets(state, packets);

      expect(result.stopPacketSeen).toBe(true);
    });

    test("stores stop reason", () => {
      const state = createInitialState(1);
      const packets = [createStopPacket(StopReason.FINISHED)];
      const result = processPackets(state, packets);

      expect(result.stopReason).toBe(StopReason.FINISHED);
    });

    test("injects SECTION_END for all incomplete groups", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        // No explicit SECTION_END
        createStopPacket(),
      ];
      const result = processPackets(state, packets);

      expect(result.groupKeysWithSectionEnd.has("0-0")).toBe(true);
    });

    test("does not process duplicate STOP packets", () => {
      const state = createInitialState(1);
      const packets = [
        createStopPacket(StopReason.FINISHED),
        createStopPacket(StopReason.USER_CANCELLED),
      ];
      const result = processPackets(state, packets);

      // First stop reason should be preserved
      expect(result.stopReason).toBe(StopReason.FINISHED);
    });
  });

  describe("handleToolAfterMessagePacket", () => {
    test("resets finalAnswerComing when actual tool follows message", () => {
      const state = createInitialState(1);
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createSearchToolStartPacket({ turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      expect(result.finalAnswerComing).toBe(false);
    });

    test("REASONING_START does NOT reset finalAnswerComing (critical fix)", () => {
      const state = createInitialState(1);
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createPacket(PacketType.REASONING_START, { turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      // Reasoning is just thinking, not an actual tool call
      expect(result.finalAnswerComing).toBe(true);
    });

    test("REASONING_DELTA does NOT reset finalAnswerComing", () => {
      const state = createInitialState(1);
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createPacket(
          PacketType.REASONING_DELTA,
          { turn_index: 1 },
          { reasoning: "thinking..." }
        ),
      ];
      const result = processPackets(state, packets);

      expect(result.finalAnswerComing).toBe(true);
    });

    test("does not reset finalAnswerComing if stopPacketSeen", () => {
      const state = createInitialState(1);
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createStopPacket(),
        createSearchToolStartPacket({ turn_index: 1 }),
      ];
      const result = processPackets(state, packets);

      // Stop already seen, so finalAnswerComing should remain true
      expect(result.finalAnswerComing).toBe(true);
    });
  });

  describe("image generation counting", () => {
    test("sets isGeneratingImage on IMAGE_GENERATION_TOOL_START", () => {
      const state = createInitialState(1);
      const packets = [
        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.isGeneratingImage).toBe(true);
    });

    test("counts images from IMAGE_GENERATION_TOOL_DELTA", () => {
      const state = createInitialState(1);
      const packets = [
        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),
        createImageDeltaPacket(2, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.generatedImageCount).toBe(2);
    });

    test("accumulates image count from multiple DELTA packets", () => {
      const state = createInitialState(1);
      const packets = [
        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),
        createImageDeltaPacket(1, { turn_index: 0 }),
        createImageDeltaPacket(2, { turn_index: 0 }),
        createImageDeltaPacket(1, { turn_index: 0 }),
      ];
      const result = processPackets(state, packets);

      expect(result.generatedImageCount).toBe(4);
    });
  });

  describe("buildGroupsFromKeys", () => {
    test("filters out groups without content packets", () => {
      const state = createInitialState(1);
      // Create a group with only SECTION_END (no content packet)
      const packets = [createPacket(PacketType.SECTION_END, { turn_index: 0 })];
      const result = processPackets(state, packets);

      // Group should exist in map but not in result arrays
      expect(result.groupedPacketsMap.has("0-0")).toBe(true);
      expect(result.toolGroups.length).toBe(0);
      expect(result.potentialDisplayGroups.length).toBe(0);
    });

    test("sorts groups by turn_index then tab_index", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 1, tab_index: 1 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 1, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),
      ];
      const result = processPackets(state, packets);

      const keys = result.toolGroups.map(
        (g) => `${g.turn_index}-${g.tab_index}`
      );
      expect(keys).toEqual(["0-0", "0-1", "1-0", "1-1"]);
    });

    test("creates new packet array references (immutability)", () => {
      const state = createInitialState(1);
      const packets = [createSearchToolStartPacket({ turn_index: 0 })];
      const result = processPackets(state, packets);

      const mapPackets = result.groupedPacketsMap.get("0-0");
      const resultPackets = result.toolGroups[0]?.packets;

      // Should be different array references
      expect(resultPackets).not.toBe(mapPackets);
      // But same content
      expect(resultPackets).toEqual(mapPackets);
    });

    test("includes groups with MESSAGE_START as content", () => {
      const state = createInitialState(1);
      const packets = [createMessageStartPacket({ turn_index: 0 })];
      const result = processPackets(state, packets);

      expect(result.potentialDisplayGroups.length).toBe(1);
    });

    test("includes groups with SEARCH_TOOL_START as content", () => {
      const state = createInitialState(1);
      const packets = [createSearchToolStartPacket({ turn_index: 0 })];
      const result = processPackets(state, packets);

      expect(result.toolGroups.length).toBe(1);
    });
  });

  describe("multi-tool scenarios", () => {
    test("Search + Python + Fetch in same conversation", () => {
      const state = createInitialState(1);
      const packets = [
        // Turn 0: Search
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolDocumentsPacket([{ document_id: "search-doc" }], {
          turn_index: 0,
          tab_index: 0,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),

        // Turn 1: Python
        createPythonToolStartPacket("analyze()", {
          turn_index: 1,
          tab_index: 0,
        }),
        createPythonToolDeltaPacket("Result: 42", "", [], {
          turn_index: 1,
          tab_index: 0,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 1, tab_index: 0 }),

        // Turn 2: Fetch
        createFetchToolStartPacket({ turn_index: 2, tab_index: 0 }),
        createFetchToolUrlsPacket(["https://api.example.com"], {
          turn_index: 2,
          tab_index: 0,
        }),
        createFetchToolDocumentsPacket([{ document_id: "fetch-doc" }], {
          turn_index: 2,
          tab_index: 0,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 2, tab_index: 0 }),

        // Turn 3: Final answer
        createMessageStartPacket({ turn_index: 3, tab_index: 0 }),
        createStopPacket(),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroups.length).toBe(3);
      expect(result.potentialDisplayGroups.length).toBe(1);
      expect(result.documentMap.has("search-doc")).toBe(true);
      expect(result.documentMap.has("fetch-doc")).toBe(true);
      expect(result.finalAnswerComing).toBe(true);
      expect(result.stopPacketSeen).toBe(true);
    });

    test("parallel search tools then message", () => {
      const state = createInitialState(1);
      const packets = [
        createBranchingPacket(3, 0),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 2 }),
        createSearchToolDocumentsPacket([{ document_id: "doc-0" }], {
          turn_index: 0,
          tab_index: 0,
        }),
        createSearchToolDocumentsPacket([{ document_id: "doc-1" }], {
          turn_index: 0,
          tab_index: 1,
        }),
        createSearchToolDocumentsPacket([{ document_id: "doc-2" }], {
          turn_index: 0,
          tab_index: 2,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 1 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 2 }),
        createMessageStartPacket({ turn_index: 1 }),
        createStopPacket(),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroups.length).toBe(3);
      expect(result.expectedBranches.get(0)).toBe(3);
      expect(result.documentMap.size).toBe(3);
      expect(result.finalAnswerComing).toBe(true);
    });

    test("tool-after-message Claude workaround scenario", () => {
      const state = createInitialState(1);
      const packets = [
        // Claude sends message first
        createMessageStartPacket({ turn_index: 0 }),
        createPacket(
          PacketType.MESSAGE_DELTA,
          { turn_index: 0 },
          { content: "Let me search for that..." }
        ),
        // Then tool is called (this is the workaround case)
        createSearchToolStartPacket({ turn_index: 1 }),
        createSearchToolDocumentsPacket([{ document_id: "doc-1" }], {
          turn_index: 1,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 1 }),
        // Then actual final answer
        createMessageStartPacket({ turn_index: 2 }),
        createStopPacket(),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroups.length).toBe(1);
      expect(result.potentialDisplayGroups.length).toBe(2);
      expect(result.finalAnswerComing).toBe(true);
    });

    test("image generation flow", () => {
      const state = createInitialState(1);
      const packets = [
        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),
        createImageDeltaPacket(1, { turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
        createStopPacket(),
      ];
      const result = processPackets(state, packets);

      expect(result.isGeneratingImage).toBe(true);
      expect(result.generatedImageCount).toBe(1);
      expect(result.finalAnswerComing).toBe(true);
      expect(result.displayGroupKeys.has("0-0")).toBe(true);
    });

    test("deep research with sub-agents", () => {
      const state = createInitialState(1);
      const packets = [
        createPacket(PacketType.DEEP_RESEARCH_PLAN_START, { turn_index: 0 }),
        createPacket(
          PacketType.DEEP_RESEARCH_PLAN_DELTA,
          { turn_index: 0 },
          { content: "Plan..." }
        ),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
        createPacket(
          PacketType.RESEARCH_AGENT_START,
          { turn_index: 1 },
          { research_task: "Research topic A" }
        ),
        createSearchToolStartPacket({ turn_index: 1, sub_turn_index: 0 }),
        createPacket(PacketType.SECTION_END, {
          turn_index: 1,
          sub_turn_index: 0,
        }),
        createPacket(PacketType.SECTION_END, { turn_index: 1 }),
        createMessageStartPacket({ turn_index: 2 }),
        createStopPacket(),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroups.length).toBe(2); // Plan + Research agent
      expect(result.potentialDisplayGroups.length).toBe(1);
    });
  });

  describe("edge cases", () => {
    test("handles empty packets array", () => {
      const state = createInitialState(1);
      const result = processPackets(state, []);

      expect(result.nextPacketIndex).toBe(0);
      expect(result.toolGroups).toEqual([]);
    });

    test("handles sparse packets array", () => {
      const state = createInitialState(1);
      const packets: Packet[] = [];
      packets[0] = createSearchToolStartPacket({ turn_index: 0 });
      packets[5] = createPacket(PacketType.SECTION_END, { turn_index: 0 });

      const result = processPackets(state, packets);

      // Should handle sparse array
      expect(result.nextPacketIndex).toBe(6);
    });

    test("handles large turn indices", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 9999 }),
        createPacket(PacketType.SECTION_END, { turn_index: 9999 }),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroupKeys.has("9999-0")).toBe(true);
    });

    test("handles large tab indices", () => {
      const state = createInitialState(1);
      const packets = [
        createSearchToolStartPacket({ turn_index: 0, tab_index: 999 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 999 }),
      ];
      const result = processPackets(state, packets);

      expect(result.toolGroupKeys.has("0-999")).toBe(true);
    });
  });
});


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/packetProcessor.ts
================================================
import {
  Packet,
  PacketType,
  StreamingCitation,
  StopReason,
  CitationInfo,
  SearchToolDocumentsDelta,
  FetchToolDocuments,
  TopLevelBranching,
  Stop,
  ImageGenerationToolDelta,
  MessageStart,
  ToolCallArgumentDelta,
  CODE_INTERPRETER_TOOL_TYPES,
} from "@/app/app/services/streamingModels";
import { CitationMap } from "@/app/app/interfaces";
import { OnyxDocument } from "@/lib/search/interfaces";
import {
  isActualToolCallPacket,
  isToolPacket,
  isDisplayPacket,
} from "@/app/app/services/packetUtils";
import { parseToolKey } from "@/app/app/message/messageComponents/toolDisplayHelpers";

// Re-export parseToolKey for consumers that import from this module
export { parseToolKey };

// ============================================================================
// Types
// ============================================================================

export interface ProcessorState {
  nodeId: number;
  nextPacketIndex: number;

  // Citations
  citations: StreamingCitation[];
  seenCitationDocIds: Set<string>;
  citationMap: CitationMap;

  // Documents
  documentMap: Map<string, OnyxDocument>;

  // Packet grouping
  groupedPacketsMap: Map<string, Packet[]>;
  seenGroupKeys: Set<string>;
  groupKeysWithSectionEnd: Set<string>;
  expectedBranches: Map<number, number>;

  // Pre-categorized groups (populated during packet processing)
  toolGroupKeys: Set<string>;
  displayGroupKeys: Set<string>;

  // Image generation status
  isGeneratingImage: boolean;
  generatedImageCount: number;

  // Streaming status
  finalAnswerComing: boolean;
  stopPacketSeen: boolean;
  stopReason: StopReason | undefined;

  // Tool processing duration from backend (captured when MESSAGE_START arrives)
  toolProcessingDuration: number | undefined;

  // Result arrays (built at end of processPackets)
  toolGroups: GroupedPacket[];
  potentialDisplayGroups: GroupedPacket[];
}

export interface GroupedPacket {
  turn_index: number;
  tab_index: number;
  packets: Packet[];
}

// ============================================================================
// State Creation
// ============================================================================

export function createInitialState(nodeId: number): ProcessorState {
  return {
    nodeId,
    nextPacketIndex: 0,
    citations: [],
    seenCitationDocIds: new Set(),
    citationMap: {},
    documentMap: new Map(),
    groupedPacketsMap: new Map(),
    seenGroupKeys: new Set(),
    groupKeysWithSectionEnd: new Set(),
    expectedBranches: new Map(),
    toolGroupKeys: new Set(),
    displayGroupKeys: new Set(),
    isGeneratingImage: false,
    generatedImageCount: 0,
    finalAnswerComing: false,
    stopPacketSeen: false,
    stopReason: undefined,
    toolProcessingDuration: undefined,
    toolGroups: [],
    potentialDisplayGroups: [],
  };
}

// ============================================================================
// Helper Functions
// ============================================================================

function getGroupKey(packet: Packet): string {
  const turnIndex = packet.placement.turn_index;
  const tabIndex = packet.placement.tab_index ?? 0;
  return `${turnIndex}-${tabIndex}`;
}

function injectSectionEnd(state: ProcessorState, groupKey: string): void {
  if (state.groupKeysWithSectionEnd.has(groupKey)) {
    return; // Already has SECTION_END
  }

  const { turn_index, tab_index } = parseToolKey(groupKey);

  const syntheticPacket: Packet = {
    placement: { turn_index, tab_index },
    obj: { type: PacketType.SECTION_END },
  };

  const existingGroup = state.groupedPacketsMap.get(groupKey);
  if (existingGroup) {
    existingGroup.push(syntheticPacket);
  }
  state.groupKeysWithSectionEnd.add(groupKey);
}

/**
 * Content packet types that indicate a group has meaningful content to display
 */
const CONTENT_PACKET_TYPES_SET = new Set<PacketType>([
  PacketType.MESSAGE_START,
  PacketType.SEARCH_TOOL_START,
  PacketType.IMAGE_GENERATION_TOOL_START,
  PacketType.PYTHON_TOOL_START,
  PacketType.TOOL_CALL_ARGUMENT_DELTA,
  PacketType.CUSTOM_TOOL_START,
  PacketType.FILE_READER_START,
  PacketType.FETCH_TOOL_START,
  PacketType.MEMORY_TOOL_START,
  PacketType.MEMORY_TOOL_NO_ACCESS,
  PacketType.REASONING_START,
  PacketType.DEEP_RESEARCH_PLAN_START,
  PacketType.RESEARCH_AGENT_START,
]);

function hasContentPackets(packets: Packet[]): boolean {
  return packets.some((packet) => {
    const type = packet.obj.type as PacketType;
    if (type === PacketType.TOOL_CALL_ARGUMENT_DELTA) {
      return (
        (packet.obj as ToolCallArgumentDelta).tool_type ===
        CODE_INTERPRETER_TOOL_TYPES.PYTHON
      );
    }
    return CONTENT_PACKET_TYPES_SET.has(type);
  });
}

/**
 * Packet types that indicate final answer content is coming
 */
const FINAL_ANSWER_PACKET_TYPES_SET = new Set<PacketType>([
  PacketType.MESSAGE_START,
  PacketType.MESSAGE_DELTA,
  PacketType.IMAGE_GENERATION_TOOL_START,
  PacketType.IMAGE_GENERATION_TOOL_DELTA,
]);

// ============================================================================
// Packet Handlers
// ============================================================================

function handleTopLevelBranching(state: ProcessorState, packet: Packet): void {
  const branchingPacket = packet.obj as TopLevelBranching;
  state.expectedBranches.set(
    packet.placement.turn_index,
    branchingPacket.num_parallel_branches
  );
}

function handleTurnTransition(state: ProcessorState, packet: Packet): void {
  const currentTurnIndex = packet.placement.turn_index;

  // Get all previous turn indices from seen group keys
  const previousTurnIndices = new Set(
    Array.from(state.seenGroupKeys).map((key) => parseToolKey(key).turn_index)
  );

  const isNewTurnIndex = !previousTurnIndices.has(currentTurnIndex);

  // If we see a new turn_index (not just tab_index), inject SECTION_END for previous groups
  if (isNewTurnIndex && state.seenGroupKeys.size > 0) {
    state.seenGroupKeys.forEach((prevGroupKey) => {
      if (!state.groupKeysWithSectionEnd.has(prevGroupKey)) {
        injectSectionEnd(state, prevGroupKey);
      }
    });
  }
}

function handleCitationPacket(state: ProcessorState, packet: Packet): void {
  if (packet.obj.type !== PacketType.CITATION_INFO) {
    return;
  }

  const citationInfo = packet.obj as CitationInfo;

  // Add to citation map immediately for rendering
  state.citationMap[citationInfo.citation_number] = citationInfo.document_id;

  // Also add to citations array for CitedSourcesToggle (deduplicated)
  if (!state.seenCitationDocIds.has(citationInfo.document_id)) {
    state.seenCitationDocIds.add(citationInfo.document_id);
    state.citations.push({
      citation_num: citationInfo.citation_number,
      document_id: citationInfo.document_id,
    });
  }
}

function handleDocumentPacket(state: ProcessorState, packet: Packet): void {
  if (packet.obj.type === PacketType.SEARCH_TOOL_DOCUMENTS_DELTA) {
    const docDelta = packet.obj as SearchToolDocumentsDelta;
    if (docDelta.documents) {
      for (const doc of docDelta.documents) {
        if (doc.document_id) {
          state.documentMap.set(doc.document_id, doc);
        }
      }
    }
  } else if (packet.obj.type === PacketType.FETCH_TOOL_DOCUMENTS) {
    const fetchDocuments = packet.obj as FetchToolDocuments;
    if (fetchDocuments.documents) {
      for (const doc of fetchDocuments.documents) {
        if (doc.document_id) {
          state.documentMap.set(doc.document_id, doc);
        }
      }
    }
  }
}

function handleStreamingStatusPacket(
  state: ProcessorState,
  packet: Packet
): void {
  // Check if final answer is coming
  if (FINAL_ANSWER_PACKET_TYPES_SET.has(packet.obj.type as PacketType)) {
    state.finalAnswerComing = true;
  }

  // Capture pre-answer processing time from MESSAGE_START packet
  if (packet.obj.type === PacketType.MESSAGE_START) {
    const messageStart = packet.obj as MessageStart;
    if (messageStart.pre_answer_processing_seconds !== undefined) {
      state.toolProcessingDuration = messageStart.pre_answer_processing_seconds;
    }
  }
}

function handleStopPacket(state: ProcessorState, packet: Packet): void {
  if (packet.obj.type !== PacketType.STOP || state.stopPacketSeen) {
    return;
  }

  state.stopPacketSeen = true;

  // Extract and store the stop reason
  const stopPacket = packet.obj as Stop;
  state.stopReason = stopPacket.stop_reason;

  // Inject SECTION_END for all group keys that don't have one
  state.seenGroupKeys.forEach((groupKey) => {
    if (!state.groupKeysWithSectionEnd.has(groupKey)) {
      injectSectionEnd(state, groupKey);
    }
  });
}

function handleToolAfterMessagePacket(
  state: ProcessorState,
  packet: Packet
): void {
  // Handles case where we get a Message packet from Claude, and then tool
  // calling packets. We use isActualToolCallPacket instead of isToolPacket
  // to exclude reasoning packets - reasoning is just the model thinking,
  // not an actual tool call that would produce new content.
  if (
    state.finalAnswerComing &&
    !state.stopPacketSeen &&
    isActualToolCallPacket(packet)
  ) {
    state.finalAnswerComing = false;
  }
}

function addPacketToGroup(
  state: ProcessorState,
  packet: Packet,
  groupKey: string
): void {
  const existingGroup = state.groupedPacketsMap.get(groupKey);
  if (existingGroup) {
    existingGroup.push(packet);
  } else {
    state.groupedPacketsMap.set(groupKey, [packet]);
  }
}

// ============================================================================
// Main Processing Function
// ============================================================================

function processPacket(state: ProcessorState, packet: Packet): void {
  if (!packet) return;

  // Handle TopLevelBranching packets - these tell us how many parallel branches to expect
  if (packet.obj.type === PacketType.TOP_LEVEL_BRANCHING) {
    handleTopLevelBranching(state, packet);
    // Don't add this packet to any group, it's just metadata
    return;
  }

  // Handle turn transitions (inject SECTION_END for previous groups)
  handleTurnTransition(state, packet);

  // Track group key
  const groupKey = getGroupKey(packet);
  state.seenGroupKeys.add(groupKey);

  // Track SECTION_END and ERROR packets (both indicate completion)
  if (
    packet.obj.type === PacketType.SECTION_END ||
    packet.obj.type === PacketType.ERROR
  ) {
    state.groupKeysWithSectionEnd.add(groupKey);
  }

  // Check if this is the first packet in the group (before adding)
  const existingGroup = state.groupedPacketsMap.get(groupKey);
  const isFirstPacket = !existingGroup;

  // Add packet to group
  addPacketToGroup(state, packet, groupKey);

  // Categorize on first packet of each group
  if (isFirstPacket) {
    if (isToolPacket(packet, false)) {
      state.toolGroupKeys.add(groupKey);
    }
    if (isDisplayPacket(packet)) {
      state.displayGroupKeys.add(groupKey);
    }
  }

  // Track image generation for header display (regardless of group position)
  if (packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START) {
    state.isGeneratingImage = true;
  }

  // Count generated images from DELTA packets
  if (packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_DELTA) {
    const delta = packet.obj as ImageGenerationToolDelta;
    state.generatedImageCount += delta.images?.length ?? 0;
  }

  // Handle specific packet types
  handleCitationPacket(state, packet);
  handleDocumentPacket(state, packet);
  handleStreamingStatusPacket(state, packet);
  handleStopPacket(state, packet);
  handleToolAfterMessagePacket(state, packet);
}

export function processPackets(
  state: ProcessorState,
  rawPackets: Packet[]
): ProcessorState {
  // Handle reset (packets array shrunk - upstream replaced with shorter list)
  if (state.nextPacketIndex > rawPackets.length) {
    state = createInitialState(state.nodeId);
  }

  // Track if we processed any new packets
  const prevProcessedIndex = state.nextPacketIndex;

  // Process only new packets
  for (let i = state.nextPacketIndex; i < rawPackets.length; i++) {
    const packet = rawPackets[i];
    if (packet) {
      processPacket(state, packet);
    }
  }

  state.nextPacketIndex = rawPackets.length;

  // Only rebuild result arrays if we processed new packets
  // This prevents creating new references when nothing changed
  if (prevProcessedIndex !== rawPackets.length) {
    // Build result arrays after processing new packets
    state.toolGroups = buildGroupsFromKeys(state, state.toolGroupKeys);
    state.potentialDisplayGroups = buildGroupsFromKeys(
      state,
      state.displayGroupKeys
    );
  }

  return state;
}

/**
 * Build GroupedPacket array from a set of group keys.
 * Filters to only include groups with meaningful content and sorts by turn/tab index.
 *
 * @example
 * // Input: state.groupedPacketsMap + keys Set
 * // ┌─────────────────────────────────────────────────────┐
 * // │ groupedPacketsMap = {                               │
 * // │   "0-0" → [packet1, packet2]                       │
 * // │   "0-1" → [packet3]                                │
 * // │   "1-0" → [packet4, packet5]                       │
 * // │   "2-0" → [empty_packet]  ← no content packets     │
 * // │ }                                                  │
 * // │ keys = Set{"0-0", "0-1", "1-0", "2-0"}             │
 * // └─────────────────────────────────────────────────────┘
 * //
 * // Step 1: Map keys → GroupedPacket (parse key, lookup packets)
 * // ┌─────────────────────────────────────────────────────┐
 * // │ "0-0" → { turn_index:0, tab_index:0, packets:[...] }│
 * // │ "0-1" → { turn_index:0, tab_index:1, packets:[...] }│
 * // │ "1-0" → { turn_index:1, tab_index:0, packets:[...] }│
 * // │ "2-0" → { turn_index:2, tab_index:0, packets:[...] }│
 * // └─────────────────────────────────────────────────────┘
 * //
 * // Step 2: Filter (hasContentPackets check)
 * // ┌─────────────────────────────────────────────────────┐
 * // │ ✓ "0-0" has MESSAGE_START        → keep            │
 * // │ ✓ "0-1" has SEARCH_TOOL_START    → keep            │
 * // │ ✓ "1-0" has PYTHON_TOOL_START    → keep            │
 * // │ ✗ "2-0" no content packets       → filtered out    │
 * // └─────────────────────────────────────────────────────┘
 * //
 * // Step 3: Sort by turn_index, then tab_index
 * // ┌─────────────────────────────────────────────────────┐
 * // │ Output: GroupedPacket[]                             │
 * // ├─────────────────────────────────────────────────────┤
 * // │ [0] turn_index=0, tab_index=0, packets=[...]       │
 * // │ [1] turn_index=0, tab_index=1, packets=[...]       │
 * // │ [2] turn_index=1, tab_index=0, packets=[...]       │
 * // └─────────────────────────────────────────────────────┘
 */
function buildGroupsFromKeys(
  state: ProcessorState,
  keys: Set<string>
): GroupedPacket[] {
  return Array.from(keys)
    .map((key) => {
      const { turn_index, tab_index } = parseToolKey(key);
      const packets = state.groupedPacketsMap.get(key);
      // Spread to create new array reference - ensures React detects changes for re-renders
      return packets ? { turn_index, tab_index, packets: [...packets] } : null;
    })
    .filter(
      (g): g is GroupedPacket => g !== null && hasContentPackets(g.packets)
    )
    .sort((a, b) => {
      if (a.turn_index !== b.turn_index) {
        return a.turn_index - b.turn_index;
      }
      return a.tab_index - b.tab_index;
    });
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/usePacedTurnGroups.test.tsx
================================================
/**
 * Tests for usePacedTurnGroups hook
 *
 * Tests the pacing logic that reveals steps with delays during streaming.
 * Uses @testing-library/react's renderHook with fake timers.
 */
import { renderHook, act } from "@testing-library/react";
import { PacketType, Packet } from "@/app/app/services/streamingModels";
import { TurnGroup, TransformedStep } from "../transformers";
import { GroupedPacket } from "./packetProcessor";
import { usePacedTurnGroups } from "./usePacedTurnGroups";

// ============================================================================
// Test Helpers
// ============================================================================

/**
 * Create a mock TransformedStep with a TOOL_START packet
 */
function createStep(
  turnIndex: number,
  tabIndex: number,
  packetType: PacketType = PacketType.SEARCH_TOOL_START
): TransformedStep {
  return {
    key: `${turnIndex}-${tabIndex}`,
    turnIndex,
    tabIndex,
    packets: [
      {
        placement: { turn_index: turnIndex, tab_index: tabIndex },
        obj: { type: packetType },
      } as Packet,
    ],
  };
}

/**
 * Create a TurnGroup from steps
 */
function createTurnGroup(steps: TransformedStep[]): TurnGroup {
  if (steps.length === 0) throw new Error("TurnGroup needs at least one step");
  return {
    turnIndex: steps[0]!.turnIndex,
    steps,
    isParallel: steps.length > 1,
  };
}

/**
 * Create a mock display group (MESSAGE_START)
 */
function createDisplayGroup(turnIndex: number): GroupedPacket {
  return {
    turn_index: turnIndex,
    tab_index: 0,
    packets: [
      {
        placement: { turn_index: turnIndex, tab_index: 0 },
        obj: {
          type: PacketType.MESSAGE_START,
          id: "msg-1",
          content: "",
          final_documents: null,
        },
      } as Packet,
    ],
  };
}

// ============================================================================
// Tests
// ============================================================================

describe("usePacedTurnGroups", () => {
  beforeEach(() => {
    jest.useFakeTimers();
  });

  afterEach(() => {
    jest.useRealTimers();
  });

  describe("initial state", () => {
    test("returns empty arrays when no turn groups provided", () => {
      const { result } = renderHook(() =>
        usePacedTurnGroups([], [], false, 1, false)
      );

      expect(result.current.pacedTurnGroups).toEqual([]);
      expect(result.current.pacedDisplayGroups).toEqual([]);
      expect(result.current.pacedFinalAnswerComing).toBe(false);
    });
  });

  describe("bypass pacing for completed messages", () => {
    test("returns all turn groups immediately when stopPacketSeen on first render", () => {
      const step1 = createStep(0, 0);
      const step2 = createStep(1, 0);
      const turnGroups = [createTurnGroup([step1]), createTurnGroup([step2])];
      const displayGroups = [createDisplayGroup(2)];

      const { result } = renderHook(() =>
        usePacedTurnGroups(turnGroups, displayGroups, true, 1, true)
      );

      // All steps revealed immediately - no pacing
      expect(result.current.pacedTurnGroups.length).toBe(2);
      expect(result.current.pacedDisplayGroups.length).toBe(1);
      expect(result.current.pacedFinalAnswerComing).toBe(true);
    });
  });

  describe("stop packet handling", () => {
    test("flushes all pending steps when stop packet received", () => {
      const step1 = createStep(0, 0);
      const step2 = createStep(1, 0);
      const step3 = createStep(2, 0);

      // Start with first step
      const { result, rerender } = renderHook(
        ({ turnGroups, stopPacketSeen }) =>
          usePacedTurnGroups(turnGroups, [], stopPacketSeen, 1, false),
        {
          initialProps: {
            turnGroups: [createTurnGroup([step1])],
            stopPacketSeen: false,
          },
        }
      );

      // First step revealed immediately
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Add more steps
      rerender({
        turnGroups: [
          createTurnGroup([step1]),
          createTurnGroup([step2]),
          createTurnGroup([step3]),
        ],
        stopPacketSeen: false,
      });

      // Still only first step (others pending)
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // STOP packet arrives - flush all
      rerender({
        turnGroups: [
          createTurnGroup([step1]),
          createTurnGroup([step2]),
          createTurnGroup([step3]),
        ],
        stopPacketSeen: true,
      });

      // All steps revealed immediately
      expect(result.current.pacedTurnGroups.length).toBe(3);
    });
  });

  describe("nodeId change reset", () => {
    test("resets pacing state when nodeId changes", () => {
      const step1 = createStep(0, 0);
      const turnGroups = [createTurnGroup([step1])];

      const { result, rerender } = renderHook(
        ({ nodeId }) =>
          usePacedTurnGroups(turnGroups, [], false, nodeId, false),
        { initialProps: { nodeId: 1 } }
      );

      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Change nodeId - should reset state
      rerender({ nodeId: 2 });

      // First step of new message revealed immediately
      expect(result.current.pacedTurnGroups.length).toBe(1);
    });
  });

  describe("step pacing", () => {
    test("first step is revealed immediately", () => {
      const step1 = createStep(0, 0);
      const turnGroups = [createTurnGroup([step1])];

      const { result } = renderHook(() =>
        usePacedTurnGroups(turnGroups, [], false, 1, false)
      );

      // First step revealed immediately without timer
      expect(result.current.pacedTurnGroups.length).toBe(1);
      expect(result.current.pacedTurnGroups[0]?.steps[0]?.key).toBe("0-0");
    });

    test("second step is revealed after 200ms delay", () => {
      const step1 = createStep(0, 0);

      const { result, rerender } = renderHook(
        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),
        { initialProps: { turnGroups: [createTurnGroup([step1])] } }
      );

      // First step revealed
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Add second step
      const step2 = createStep(1, 0);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
      });

      // Still only first step
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Advance timer
      act(() => {
        jest.advanceTimersByTime(200);
      });

      // Now second step revealed
      expect(result.current.pacedTurnGroups.length).toBe(2);
    });

    test("third step is revealed after 400ms total (200ms after second)", () => {
      const step1 = createStep(0, 0);

      const { result, rerender } = renderHook(
        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),
        { initialProps: { turnGroups: [createTurnGroup([step1])] } }
      );

      // First step revealed
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Add second and third steps
      const step2 = createStep(1, 0);
      const step3 = createStep(2, 0);
      rerender({
        turnGroups: [
          createTurnGroup([step1]),
          createTurnGroup([step2]),
          createTurnGroup([step3]),
        ],
      });

      // Still only first step
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // After 200ms - second step
      act(() => {
        jest.advanceTimersByTime(200);
      });
      expect(result.current.pacedTurnGroups.length).toBe(2);

      // After another 200ms (400ms total) - third step
      act(() => {
        jest.advanceTimersByTime(200);
      });
      expect(result.current.pacedTurnGroups.length).toBe(3);
    });

    test("same-type steps are paced with delay (NOT batched)", () => {
      const step1 = createStep(0, 0, PacketType.SEARCH_TOOL_START);

      const { result, rerender } = renderHook(
        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),
        { initialProps: { turnGroups: [createTurnGroup([step1])] } }
      );

      // First step revealed immediately
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Add two more SEARCH_TOOL steps (same type as first)
      const step2 = createStep(1, 0, PacketType.SEARCH_TOOL_START);
      const step3 = createStep(2, 0, PacketType.SEARCH_TOOL_START);
      rerender({
        turnGroups: [
          createTurnGroup([step1]),
          createTurnGroup([step2]),
          createTurnGroup([step3]),
        ],
      });

      // Still only first step - same-type steps should NOT be batched
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // After 200ms - second step (even though same type)
      act(() => {
        jest.advanceTimersByTime(200);
      });
      expect(result.current.pacedTurnGroups.length).toBe(2);

      // After another 200ms - third step (even though same type)
      act(() => {
        jest.advanceTimersByTime(200);
      });
      expect(result.current.pacedTurnGroups.length).toBe(3);
    });

    test("different-type steps are paced with delay", () => {
      const step1 = createStep(0, 0, PacketType.SEARCH_TOOL_START);

      const { result, rerender } = renderHook(
        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),
        { initialProps: { turnGroups: [createTurnGroup([step1])] } }
      );

      // First step revealed immediately
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Add step of different type
      const step2 = createStep(1, 0, PacketType.PYTHON_TOOL_START);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
      });

      // Still only first step
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // After 200ms - second step
      act(() => {
        jest.advanceTimersByTime(200);
      });
      expect(result.current.pacedTurnGroups.length).toBe(2);
    });
  });

  describe("display groups", () => {
    test("display groups shown only after tool pacing complete", () => {
      const step1 = createStep(0, 0);
      const displayGroup = createDisplayGroup(1);

      const { result, rerender } = renderHook(
        ({ turnGroups }) =>
          usePacedTurnGroups(turnGroups, [displayGroup], false, 1, true),
        { initialProps: { turnGroups: [createTurnGroup([step1])] } }
      );

      // First step revealed, but display groups hidden until pacing complete
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Add second step
      const step2 = createStep(1, 0);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
      });

      // Display groups still hidden (pacing not complete)
      expect(result.current.pacedDisplayGroups.length).toBe(0);

      // Complete pacing
      act(() => {
        jest.advanceTimersByTime(200);
      });

      // Now pacing is complete, display groups shown
      expect(result.current.pacedTurnGroups.length).toBe(2);
      expect(result.current.pacedDisplayGroups.length).toBe(1);
    });

    test("display groups shown immediately when no tool steps", () => {
      const displayGroup = createDisplayGroup(0);

      const { result } = renderHook(() =>
        usePacedTurnGroups([], [displayGroup], false, 1, true)
      );

      // No tools = pacing complete immediately
      expect(result.current.pacedDisplayGroups.length).toBe(1);
      expect(result.current.pacedFinalAnswerComing).toBe(true);
    });
  });

  describe("pacedFinalAnswerComing", () => {
    test("returns false when tool pacing not complete", () => {
      const step1 = createStep(0, 0);

      const { result, rerender } = renderHook(
        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, true),
        { initialProps: { turnGroups: [createTurnGroup([step1])] } }
      );

      // Add second step (creates pending)
      const step2 = createStep(1, 0);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
      });

      // Pacing not complete
      expect(result.current.pacedFinalAnswerComing).toBe(false);

      // Complete pacing
      act(() => {
        jest.advanceTimersByTime(200);
      });

      // Now pacing complete
      expect(result.current.pacedFinalAnswerComing).toBe(true);
    });

    test("returns true when bypassing pacing", () => {
      const step1 = createStep(0, 0);
      const turnGroups = [createTurnGroup([step1])];

      const { result } = renderHook(() =>
        usePacedTurnGroups(turnGroups, [], true, 1, true)
      );

      // Bypassing pacing, so finalAnswerComing passed through
      expect(result.current.pacedFinalAnswerComing).toBe(true);
    });
  });

  describe("tool-after-message transition", () => {
    test("resets toolPacingComplete when finalAnswerComing goes true → false with new tool step", () => {
      const displayGroup = createDisplayGroup(0);

      // Step 1: Render with finalAnswerComing=true, no tool steps
      // No tools = pacing complete immediately → display groups shown
      const { result, rerender } = renderHook(
        ({ turnGroups, finalAnswerComing }) =>
          usePacedTurnGroups(
            turnGroups,
            [displayGroup],
            false,
            1,
            finalAnswerComing
          ),
        {
          initialProps: {
            turnGroups: [] as TurnGroup[],
            finalAnswerComing: true,
          },
        }
      );

      expect(result.current.pacedDisplayGroups.length).toBe(1);
      expect(result.current.pacedFinalAnswerComing).toBe(true);

      // Step 2: finalAnswerComing goes false + new tool step arrives
      // This simulates the agent switching from message streaming back to tools
      const step1 = createStep(0, 0);
      rerender({
        turnGroups: [createTurnGroup([step1])],
        finalAnswerComing: false,
      });

      // toolPacingComplete was reset, so display groups should be hidden
      // (first tool step is revealed immediately, but pacing just re-started)
      expect(result.current.pacedTurnGroups.length).toBe(1);
      expect(result.current.pacedDisplayGroups.length).toBe(0);

      // Step 3: Add a second tool step so pacing is not yet complete
      const step2 = createStep(1, 0);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
        finalAnswerComing: false,
      });

      // Display groups still hidden (pacing incomplete)
      expect(result.current.pacedDisplayGroups.length).toBe(0);

      // Step 4: Advance timer to complete pacing
      act(() => {
        jest.advanceTimersByTime(200);
      });

      // Now pacing is complete → display groups shown again
      expect(result.current.pacedTurnGroups.length).toBe(2);
      expect(result.current.pacedDisplayGroups.length).toBe(1);
    });
  });

  describe("referential stability", () => {
    test("returns same array reference when turn groups have not changed", () => {
      const step1 = createStep(0, 0);

      const { result, rerender } = renderHook(
        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),
        { initialProps: { turnGroups: [createTurnGroup([step1])] } }
      );

      // First step revealed immediately
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Add second step and reveal it via pacing
      const step2 = createStep(1, 0);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
      });
      act(() => {
        jest.advanceTimersByTime(200);
      });
      expect(result.current.pacedTurnGroups.length).toBe(2);

      const stableRef = result.current.pacedTurnGroups;

      // Re-render with new array containing structurally identical turn groups
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
      });

      // Should be the exact same array reference (nothing changed)
      expect(result.current.pacedTurnGroups).toBe(stableRef);
    });

    test("preserves completed group references when streaming group changes", () => {
      const step1 = createStep(0, 0);

      const { result, rerender } = renderHook(
        ({ turnGroups, stopPacketSeen }) =>
          usePacedTurnGroups(turnGroups, [], stopPacketSeen, 1, false),
        {
          initialProps: {
            turnGroups: [createTurnGroup([step1])],
            stopPacketSeen: false,
          },
        }
      );

      // First step revealed immediately
      expect(result.current.pacedTurnGroups.length).toBe(1);

      // Add second step and advance timer to reveal it
      const step2 = createStep(1, 0);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
        stopPacketSeen: false,
      });
      act(() => {
        jest.advanceTimersByTime(200);
      });
      expect(result.current.pacedTurnGroups.length).toBe(2);

      const firstGroupRef = result.current.pacedTurnGroups[0];

      // Simulate streaming: step2 gets more packets (new object with longer packets array)
      const step2Updated: TransformedStep = {
        ...step2,
        packets: [
          ...step2.packets,
          {
            placement: { turn_index: 1, tab_index: 0 },
            obj: { type: PacketType.SEARCH_TOOL_START },
          } as Packet,
        ],
      };
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2Updated])],
        stopPacketSeen: false,
      });

      // First group (completed) should keep the same object reference
      expect(result.current.pacedTurnGroups[0]).toBe(firstGroupRef);
      // Second group changed (packets.length differs) — new reference
      expect(result.current.pacedTurnGroups.length).toBe(2);
    });

    test("returns new array reference when a new step is revealed", () => {
      const step1 = createStep(0, 0);

      const { result, rerender } = renderHook(
        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),
        { initialProps: { turnGroups: [createTurnGroup([step1])] } }
      );

      const firstResult = result.current.pacedTurnGroups;
      expect(firstResult.length).toBe(1);

      // Add second step and reveal it
      const step2 = createStep(1, 0);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
      });
      act(() => {
        jest.advanceTimersByTime(200);
      });

      // Array reference must differ (length changed)
      expect(result.current.pacedTurnGroups).not.toBe(firstResult);
      expect(result.current.pacedTurnGroups.length).toBe(2);
    });
  });

  describe("timer cleanup", () => {
    test("clears timer on unmount", () => {
      const step1 = createStep(0, 0);

      const { result, rerender, unmount } = renderHook(
        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),
        { initialProps: { turnGroups: [createTurnGroup([step1])] } }
      );

      // Add second step to create pending timer
      const step2 = createStep(1, 0);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
      });

      // Unmount before timer fires
      unmount();

      // Advance timer - should not throw
      act(() => {
        jest.advanceTimersByTime(200);
      });

      // No assertion needed - just verifying no errors on timer fire after unmount
    });

    test("clears timer on nodeId change", () => {
      const step1 = createStep(0, 0);

      const { result, rerender } = renderHook(
        ({ turnGroups, nodeId }) =>
          usePacedTurnGroups(turnGroups, [], false, nodeId, false),
        {
          initialProps: {
            turnGroups: [createTurnGroup([step1])],
            nodeId: 1,
          },
        }
      );

      // Add second step to create pending timer
      const step2 = createStep(1, 0);
      rerender({
        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],
        nodeId: 1,
      });

      // Change nodeId - should clear timer
      rerender({
        turnGroups: [createTurnGroup([step1])],
        nodeId: 2,
      });

      // Old timer should not affect new state
      act(() => {
        jest.advanceTimersByTime(200);
      });

      // Only one step for new nodeId
      expect(result.current.pacedTurnGroups.length).toBe(1);
    });
  });
});


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/usePacedTurnGroups.ts
================================================
import { useRef, useState, useEffect, useCallback, useMemo } from "react";
import { PacketType } from "@/app/app/services/streamingModels";
import { GroupedPacket } from "./packetProcessor";
import { TurnGroup, TransformedStep } from "../transformers";

// Delay between steps (ms)
const PACING_DELAY_MS = 200;

/**
 * Tool START packet types used for categorizing steps
 * These determine the "type" of a step for pacing purposes
 */
const TOOL_START_PACKET_TYPES = new Set<PacketType>([
  PacketType.SEARCH_TOOL_START,
  PacketType.FETCH_TOOL_START,
  PacketType.PYTHON_TOOL_START,
  PacketType.CUSTOM_TOOL_START,
  PacketType.FILE_READER_START,
  PacketType.REASONING_START,
  PacketType.IMAGE_GENERATION_TOOL_START,
  PacketType.DEEP_RESEARCH_PLAN_START,
  PacketType.RESEARCH_AGENT_START,
  PacketType.MEMORY_TOOL_START,
  PacketType.MEMORY_TOOL_NO_ACCESS,
]);

/**
 * Get the primary packet type from a step's packets (first START packet)
 * Used to determine if a type transition occurred
 */
function getStepPacketType(step: TransformedStep): PacketType | null {
  for (const packet of step.packets) {
    if (TOOL_START_PACKET_TYPES.has(packet.obj.type as PacketType)) {
      return packet.obj.type as PacketType;
    }
  }
  return null;
}

/**
 * Internal pacing state stored in ref (not triggering re-renders)
 */
interface PacingState {
  // Tracking revealed content
  revealedStepKeys: Set<string>;
  lastRevealedPacketType: PacketType | null;

  // Queued content
  pendingSteps: TransformedStep[];

  // Timer
  pacingTimer: ReturnType<typeof setTimeout> | null;

  // Flags
  toolPacingComplete: boolean;
  stopPacketSeen: boolean;

  // Track nodeId for reset detection
  nodeId: string | null;
}

function createInitialPacingState(): PacingState {
  return {
    revealedStepKeys: new Set(),
    lastRevealedPacketType: null,
    pendingSteps: [],
    pacingTimer: null,
    toolPacingComplete: false,
    stopPacketSeen: false,
    nodeId: null,
  };
}

/**
 * Hook that adds pacing delays between steps during streaming.
 * Creates visual breathing room between agent activities.
 *
 * Architecture:
 * - Pacing state in ref: no re-renders for internal tracking
 * - useState only for revealTrigger: forces re-render when content should update
 * - Timer-based delays: 200ms between all steps
 *
 * @param toolTurnGroups - Turn groups from packet processor
 * @param displayGroups - Display content groups (MESSAGE_START/DELTA)
 * @param stopPacketSeen - Whether STOP packet has been received
 * @param nodeId - Message node ID for reset detection
 * @param finalAnswerComing - Whether message content is streaming
 */
export function usePacedTurnGroups(
  toolTurnGroups: TurnGroup[],
  displayGroups: GroupedPacket[],
  stopPacketSeen: boolean,
  nodeId: number,
  finalAnswerComing: boolean
): {
  pacedTurnGroups: TurnGroup[];
  pacedDisplayGroups: GroupedPacket[];
  pacedFinalAnswerComing: boolean;
} {
  // Ref-based pacing state (no re-renders)
  const stateRef = useRef<PacingState>(createInitialPacingState());

  // Track previous finalAnswerComing to detect tool-after-message transitions
  const prevFinalAnswerComingRef = useRef(finalAnswerComing);

  // Cache previous pacedTurnGroups to preserve referential equality
  // for completed turn groups that haven't changed
  const prevPacedRef = useRef<TurnGroup[]>([]);

  // Trigger re-render when content should update
  // Used in useMemo dependencies since state.revealedStepKeys is stored in a ref
  const [revealTrigger, setRevealTrigger] = useState(0);

  // Stable nodeId string for comparison
  const nodeIdStr = String(nodeId);

  // Reset on nodeId change
  if (stateRef.current.nodeId !== nodeIdStr) {
    if (stateRef.current.pacingTimer) {
      clearTimeout(stateRef.current.pacingTimer);
    }
    stateRef.current = createInitialPacingState();
    stateRef.current.nodeId = nodeIdStr;
    prevPacedRef.current = [];
  }

  const state = stateRef.current;

  // Bypass pacing for completed messages (old messages loaded from history)
  // If stopPacketSeen is true on first render, return everything immediately
  const shouldBypassPacing =
    stopPacketSeen &&
    state.revealedStepKeys.size === 0 &&
    toolTurnGroups.length > 0;

  // Handle revealing the next pending step
  // Reveals ONE step per timer fire, always with delay between steps
  const revealNextPendingStep = useCallback(() => {
    const state = stateRef.current;

    if (state.pendingSteps.length > 0) {
      const stepToReveal = state.pendingSteps.shift()!;
      state.revealedStepKeys.add(stepToReveal.key);
      state.lastRevealedPacketType = getStepPacketType(stepToReveal);

      // Schedule next step if more pending (always delay, regardless of type)
      if (state.pendingSteps.length > 0) {
        state.pacingTimer = setTimeout(revealNextPendingStep, PACING_DELAY_MS);
        setRevealTrigger((t) => t + 1);
        return;
      }
    }

    // No more pending steps - pacing complete
    state.toolPacingComplete = true;
    state.pacingTimer = null;
    setRevealTrigger((t) => t + 1);
  }, []);

  // Process incoming turn groups
  useEffect(() => {
    // Skip processing when bypassing pacing
    if (shouldBypassPacing) return;

    const state = stateRef.current;

    // Detect tool-after-message transition: message was showing, now tools are starting
    // Reset toolPacingComplete to hide display until new tools finish pacing
    if (prevFinalAnswerComingRef.current && !finalAnswerComing) {
      state.toolPacingComplete = false;
    }
    prevFinalAnswerComingRef.current = finalAnswerComing;

    // Handle STOP packet - flush everything immediately
    if (stopPacketSeen && !state.stopPacketSeen) {
      state.stopPacketSeen = true;

      // Clear any pending timer
      if (state.pacingTimer) {
        clearTimeout(state.pacingTimer);
        state.pacingTimer = null;
      }

      // Reveal all pending steps immediately
      for (const step of state.pendingSteps) {
        state.revealedStepKeys.add(step.key);
      }
      state.pendingSteps = [];
      state.toolPacingComplete = true;

      setRevealTrigger((t) => t + 1);
      return;
    }

    // Collect all steps from turn groups
    const allSteps: TransformedStep[] = [];
    for (const turnGroup of toolTurnGroups) {
      for (const step of turnGroup.steps) {
        allSteps.push(step);
      }
    }

    // Find new steps (not yet revealed or pending)
    const newSteps: TransformedStep[] = [];
    const pendingKeys = new Set(state.pendingSteps.map((s) => s.key));

    for (const step of allSteps) {
      if (!state.revealedStepKeys.has(step.key) && !pendingKeys.has(step.key)) {
        newSteps.push(step);
      }
    }

    if (newSteps.length === 0) {
      // If there are no tool steps at all, mark pacing complete immediately
      // This allows tool-less responses to render their displayGroups
      if (allSteps.length === 0 && !state.toolPacingComplete) {
        state.toolPacingComplete = true;
        setRevealTrigger((t) => t + 1);
        return;
      }

      // Check if all steps are revealed (no pending, no new)
      if (
        state.pendingSteps.length === 0 &&
        !state.pacingTimer &&
        allSteps.length > 0
      ) {
        const allRevealed = allSteps.every((s) =>
          state.revealedStepKeys.has(s.key)
        );
        if (allRevealed && !state.toolPacingComplete) {
          state.toolPacingComplete = true;
          setRevealTrigger((t) => t + 1);
        }
      }
      return;
    }

    // Process new steps
    for (const step of newSteps) {
      const stepType = getStepPacketType(step);

      // First step ever - reveal immediately
      if (
        state.revealedStepKeys.size === 0 &&
        state.pendingSteps.length === 0
      ) {
        state.revealedStepKeys.add(step.key);
        state.lastRevealedPacketType = stepType;
        setRevealTrigger((t) => t + 1);
        continue;
      }

      // All subsequent steps - queue for paced reveal
      state.pendingSteps.push(step);

      // Start timer if not already running
      if (!state.pacingTimer && state.pendingSteps.length === 1) {
        state.pacingTimer = setTimeout(revealNextPendingStep, PACING_DELAY_MS);
      }
    }

    // Mark pacing incomplete while we have pending steps or timer
    if (state.pendingSteps.length > 0 || state.pacingTimer) {
      state.toolPacingComplete = false;
    }
  }, [
    toolTurnGroups,
    stopPacketSeen,
    finalAnswerComing,
    revealNextPendingStep,
    shouldBypassPacing,
  ]);

  // Cleanup timer on unmount
  useEffect(() => {
    return () => {
      if (stateRef.current.pacingTimer) {
        clearTimeout(stateRef.current.pacingTimer);
      }
    };
  }, []);

  // Build paced turn groups from revealed step keys
  // Memoized to prevent unnecessary re-renders in downstream components
  // revealTrigger is included because state.revealedStepKeys is stored in a ref
  const pacedTurnGroups = useMemo(() => {
    // Bypass: return all turn groups immediately
    if (shouldBypassPacing) return toolTurnGroups;

    const result: TurnGroup[] = [];
    for (const turnGroup of toolTurnGroups) {
      const revealedSteps = turnGroup.steps.filter((step) =>
        state.revealedStepKeys.has(step.key)
      );
      if (revealedSteps.length > 0) {
        result.push({
          turnIndex: turnGroup.turnIndex,
          steps: revealedSteps,
          isParallel: revealedSteps.length > 1,
        });
      }
    }

    // Stabilize: reuse previous TurnGroup objects when their content hasn't changed.
    // This preserves referential equality for completed groups, preventing
    // unnecessary re-renders in downstream components (e.g. SearchChipList).
    const prev = prevPacedRef.current;
    if (prev.length === result.length) {
      let allMatch = true;
      for (let i = 0; i < result.length; i++) {
        const oldGroup = prev[i]!;
        const newGroup = result[i]!;
        if (
          oldGroup.turnIndex === newGroup.turnIndex &&
          oldGroup.steps.length === newGroup.steps.length &&
          oldGroup.steps.every(
            (s, j) =>
              s.key === newGroup.steps[j]!.key &&
              s.packets.length === newGroup.steps[j]!.packets.length
          )
        ) {
          // Reuse old object reference for this group
          result[i] = oldGroup;
        } else {
          allMatch = false;
        }
      }
      if (allMatch) {
        // Every group matched — return the exact same array reference
        return prev;
      }
    }

    prevPacedRef.current = result;
    return result;
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [toolTurnGroups, revealTrigger, shouldBypassPacing]);

  // Only return display groups when tool pacing is complete (or bypassing)
  const pacedDisplayGroups = useMemo(
    () => (shouldBypassPacing || state.toolPacingComplete ? displayGroups : []),
    // eslint-disable-next-line react-hooks/exhaustive-deps
    [state.toolPacingComplete, displayGroups, revealTrigger, shouldBypassPacing]
  );

  // Paced signals for header state consistency
  // Only signal finalAnswerComing when tool pacing is complete (or bypassing)
  const pacedFinalAnswerComing = useMemo(
    () => (shouldBypassPacing || state.toolPacingComplete) && finalAnswerComing,
    // eslint-disable-next-line react-hooks/exhaustive-deps
    [
      state.toolPacingComplete,
      finalAnswerComing,
      revealTrigger,
      shouldBypassPacing,
    ]
  );

  return {
    pacedTurnGroups,
    pacedDisplayGroups,
    pacedFinalAnswerComing,
  };
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/usePacketProcessor.test.tsx
================================================
/**
 * Integration tests for usePacketProcessor hook
 *
 * Tests the React hook that wraps packet processing functions with React state
 * management, memoization, and callbacks. Uses @testing-library/react's renderHook.
 */
import { renderHook, act } from "@testing-library/react";
import {
  Packet,
  PacketType,
  StopReason,
} from "@/app/app/services/streamingModels";
import { usePacketProcessor } from "./usePacketProcessor";
import {
  createPacket,
  createSearchToolStartPacket,
  createMessageStartPacket,
  createStopPacket,
  createBranchingPacket,
} from "./__tests__/testHelpers";

// Mock the transformers module
jest.mock("../transformers", () => ({
  transformPacketGroups: jest.fn((groups) =>
    groups.map(
      (g: { turn_index: number; tab_index: number; packets: Packet[] }) => ({
        key: `${g.turn_index}-${g.tab_index}`,
        turnIndex: g.turn_index,
        tabIndex: g.tab_index,
        packets: g.packets,
      })
    )
  ),
  groupStepsByTurn: jest.fn((steps) => {
    const turnMap = new Map<number, typeof steps>();
    for (const step of steps) {
      const existing = turnMap.get(step.turnIndex);
      if (existing) {
        existing.push(step);
      } else {
        turnMap.set(step.turnIndex, [step]);
      }
    }
    return Array.from(turnMap.entries())
      .sort(([a], [b]) => a - b)
      .map(([turnIndex, stepsForTurn]) => ({
        turnIndex,
        steps: stepsForTurn,
        isParallel: stepsForTurn.length > 1,
      }));
  }),
}));

// ============================================================================
// Tests
// ============================================================================

describe("usePacketProcessor", () => {
  describe("initial state", () => {
    test("returns empty arrays when no packets", () => {
      const { result } = renderHook(() => usePacketProcessor([], 1));

      expect(result.current.toolGroups).toEqual([]);
      expect(result.current.displayGroups).toEqual([]);
      expect(result.current.toolTurnGroups).toEqual([]);
    });

    test("returns empty citations when no packets", () => {
      const { result } = renderHook(() => usePacketProcessor([], 1));

      expect(result.current.citations).toEqual([]);
      expect(result.current.citationMap).toEqual({});
    });

    test("initializes stopPacketSeen to false", () => {
      const { result } = renderHook(() => usePacketProcessor([], 1));

      expect(result.current.stopPacketSeen).toBe(false);
    });

    test("initializes isComplete to false", () => {
      const { result } = renderHook(() => usePacketProcessor([], 1));

      expect(result.current.isComplete).toBe(false);
    });

    test("provides stable callback references", () => {
      const { result, rerender } = renderHook(() => usePacketProcessor([], 1));

      const onRenderComplete1 = result.current.onRenderComplete;
      const markAllToolsDisplayed1 = result.current.markAllToolsDisplayed;

      rerender();

      expect(result.current.onRenderComplete).toBe(onRenderComplete1);
      expect(result.current.markAllToolsDisplayed).toBe(markAllToolsDisplayed1);
    });
  });

  describe("nodeId changes", () => {
    test("resets state when nodeId changes", () => {
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];

      const { result, rerender } = renderHook(
        ({ packets, nodeId }) => usePacketProcessor(packets, nodeId),
        { initialProps: { packets, nodeId: 1 } }
      );

      expect(result.current.toolGroups.length).toBe(1);

      // Change nodeId
      rerender({ packets: [], nodeId: 2 });

      expect(result.current.toolGroups).toEqual([]);
    });

    test("processes new packets after nodeId change", () => {
      const packets1 = [createSearchToolStartPacket({ turn_index: 0 })];
      const packets2 = [createMessageStartPacket({ turn_index: 0 })];

      const { result, rerender } = renderHook(
        ({ packets, nodeId }) => usePacketProcessor(packets, nodeId),
        { initialProps: { packets: packets1, nodeId: 1 } }
      );

      expect(result.current.toolGroups.length).toBe(1);

      rerender({ packets: packets2, nodeId: 2 });

      expect(result.current.toolGroups.length).toBe(0);
      expect(result.current.displayGroups.length).toBe(1);
    });
  });

  describe("stream reset detection", () => {
    test("resets state when packets array shrinks", () => {
      const packets1 = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
        createMessageStartPacket({ turn_index: 1 }),
      ];
      const packets2 = [createSearchToolStartPacket({ turn_index: 0 })];

      const { result, rerender } = renderHook(
        ({ packets }) => usePacketProcessor(packets, 1),
        { initialProps: { packets: packets1 } }
      );

      expect(result.current.finalAnswerComing).toBe(true);

      // Shrink packets (simulates stream reset)
      rerender({ packets: packets2 });

      expect(result.current.finalAnswerComing).toBe(false);
    });

    test("resets renderComplete on stream reset", () => {
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createStopPacket(),
      ];

      const { result, rerender } = renderHook(
        ({ packets }) => usePacketProcessor(packets, 1),
        { initialProps: { packets } }
      );

      // Trigger render complete
      act(() => {
        result.current.onRenderComplete();
      });

      expect(result.current.isComplete).toBe(true);

      // Shrink packets
      rerender({ packets: [createMessageStartPacket({ turn_index: 0 })] });

      expect(result.current.isComplete).toBe(false);
    });
  });

  describe("incremental processing", () => {
    test("processes only new packets on update", () => {
      const { result, rerender } = renderHook(
        ({ packets }) => usePacketProcessor(packets, 1),
        { initialProps: { packets: [] as Packet[] } }
      );

      expect(result.current.toolGroups.length).toBe(0);

      // Add packets
      const packets = [createSearchToolStartPacket({ turn_index: 0 })];
      rerender({ packets });

      expect(result.current.toolGroups.length).toBe(1);

      // Add more packets
      const morePackets = [
        ...packets,
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
      ];
      rerender({ packets: morePackets });

      expect(result.current.toolGroups.length).toBe(1);
    });

    test("handles rapid packet updates", () => {
      const { result, rerender } = renderHook(
        ({ packets }) => usePacketProcessor(packets, 1),
        { initialProps: { packets: [] as Packet[] } }
      );

      // Simulate rapid streaming updates
      for (let i = 0; i < 10; i++) {
        const packets = Array.from({ length: i + 1 }, (_, j) =>
          j === 0
            ? createSearchToolStartPacket({ turn_index: 0 })
            : createPacket(
                PacketType.SEARCH_TOOL_QUERIES_DELTA,
                { turn_index: 0 },
                { queries: [`q${j}`] }
              )
        );
        rerender({ packets });
      }

      expect(result.current.toolGroups.length).toBe(1);
    });
  });

  describe("displayGroups derivation", () => {
    test("returns empty when tools exist but finalAnswerComing is false", () => {
      const packets = [createSearchToolStartPacket({ turn_index: 0 })];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.toolGroups.length).toBe(1);
      expect(result.current.displayGroups.length).toBe(0);
      expect(result.current.finalAnswerComing).toBe(false);
    });

    test("returns potentialDisplayGroups when finalAnswerComing is true", () => {
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
        createMessageStartPacket({ turn_index: 1 }),
      ];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.finalAnswerComing).toBe(true);
      expect(result.current.displayGroups.length).toBe(1);
    });

    test("returns potentialDisplayGroups when no tools exist", () => {
      const packets = [createMessageStartPacket({ turn_index: 0 })];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.toolGroups.length).toBe(0);
      expect(result.current.displayGroups.length).toBe(1);
    });

    test("returns potentialDisplayGroups when forceShowAnswer triggered", () => {
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createMessageStartPacket({ turn_index: 1 }),
      ];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      // Initially visible because finalAnswerComing is true
      expect(result.current.displayGroups.length).toBe(1);

      // Add tool after message to reset finalAnswerComing
      const { result: result2 } = renderHook(() =>
        usePacketProcessor(
          [
            createSearchToolStartPacket({ turn_index: 0 }),
            // No message yet, so displayGroups should be empty
          ],
          2
        )
      );

      expect(result2.current.displayGroups.length).toBe(0);

      // Force show answer
      act(() => {
        result2.current.markAllToolsDisplayed();
      });

      expect(result2.current.displayGroups.length).toBe(0); // Still 0 because no message packet
    });
  });

  describe("tool-after-message transition", () => {
    test("resets renderComplete on transition from finalAnswerComing true to false", () => {
      // Start with message (finalAnswerComing=true)
      const initialPackets = [createMessageStartPacket({ turn_index: 0 })];

      const { result, rerender } = renderHook(
        ({ packets }) => usePacketProcessor(packets, 1),
        { initialProps: { packets: initialPackets } }
      );

      expect(result.current.finalAnswerComing).toBe(true);

      // Add a tool after the message - this simulates the Claude workaround scenario
      // where Claude sends a message first, then decides to call a tool
      const packetsWithToolAfter = [
        ...initialPackets,
        createSearchToolStartPacket({ turn_index: 1 }),
      ];
      rerender({ packets: packetsWithToolAfter });

      // The tool should reset finalAnswerComing since it's an actual tool call
      expect(result.current.finalAnswerComing).toBe(false);
    });
  });

  describe("onRenderComplete callback", () => {
    test("sets isComplete when finalAnswerComing and stopPacketSeen", () => {
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createStopPacket(),
      ];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.finalAnswerComing).toBe(true);
      expect(result.current.stopPacketSeen).toBe(true);
      expect(result.current.isComplete).toBe(false);

      act(() => {
        result.current.onRenderComplete();
      });

      expect(result.current.isComplete).toBe(true);
    });

    test("does not set isComplete when finalAnswerComing is false", () => {
      const packets = [createSearchToolStartPacket({ turn_index: 0 })];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.finalAnswerComing).toBe(false);

      act(() => {
        result.current.onRenderComplete();
      });

      expect(result.current.isComplete).toBe(false);
    });
  });

  describe("markAllToolsDisplayed callback", () => {
    test("forces displayGroups to show even when finalAnswerComing is false", () => {
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createMessageStartPacket({ turn_index: 1 }),
      ];

      const { result, rerender } = renderHook(
        ({ packets }) => usePacketProcessor(packets, 1),
        { initialProps: { packets } }
      );

      // Initially visible since finalAnswerComing is true after MESSAGE_START
      expect(result.current.displayGroups.length).toBe(1);

      // Reset to a state where no message, with forceShow
      const toolOnlyPackets = [createSearchToolStartPacket({ turn_index: 0 })];

      const { result: result2 } = renderHook(() =>
        usePacketProcessor(toolOnlyPackets, 2)
      );

      expect(result2.current.displayGroups.length).toBe(0);

      act(() => {
        result2.current.markAllToolsDisplayed();
      });

      // Now should be ready to show (though still empty because no message in packets)
      // The key thing is forceShowAnswer flag is set
      expect(result2.current.finalAnswerComing).toBe(false);
    });
  });

  describe("isComplete flag", () => {
    test("false when stopPacketSeen is false", () => {
      const packets = [createMessageStartPacket({ turn_index: 0 })];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      act(() => {
        result.current.onRenderComplete();
      });

      expect(result.current.stopPacketSeen).toBe(false);
      expect(result.current.isComplete).toBe(false);
    });

    test("false when renderComplete is false", () => {
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createStopPacket(),
      ];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.stopPacketSeen).toBe(true);
      expect(result.current.isComplete).toBe(false);
    });

    test("true only when BOTH stopPacketSeen and renderComplete are true", () => {
      const packets = [
        createMessageStartPacket({ turn_index: 0 }),
        createStopPacket(),
      ];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.stopPacketSeen).toBe(true);
      expect(result.current.isComplete).toBe(false);

      act(() => {
        result.current.onRenderComplete();
      });

      expect(result.current.isComplete).toBe(true);
    });
  });

  describe("hasSteps flag", () => {
    test("false when no tool groups", () => {
      const packets = [createMessageStartPacket({ turn_index: 0 })];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.hasSteps).toBe(false);
    });

    test("true when tool groups exist", () => {
      const packets = [createSearchToolStartPacket({ turn_index: 0 })];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.hasSteps).toBe(true);
    });
  });

  describe("toolTurnGroups transformation", () => {
    test("groups tools by turn index", () => {
      const packets = [
        createBranchingPacket(2, 0),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),
        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 1 }),
      ];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.toolTurnGroups.length).toBe(1);
      expect(result.current.toolTurnGroups[0]?.isParallel).toBe(true);
      expect(result.current.toolTurnGroups[0]?.steps.length).toBe(2);
    });
  });

  describe("expectedBranchesPerTurn", () => {
    test("exposes branch metadata from packets", () => {
      const packets = [
        createBranchingPacket(3, 0),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),
        createSearchToolStartPacket({ turn_index: 0, tab_index: 2 }),
      ];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.expectedBranchesPerTurn.get(0)).toBe(3);
    });
  });

  describe("complex scenarios", () => {
    test("full flow: tools -> message -> complete", () => {
      const packets = [
        createSearchToolStartPacket({ turn_index: 0 }),
        createPacket(
          PacketType.SEARCH_TOOL_QUERIES_DELTA,
          { turn_index: 0 },
          { queries: ["test"] }
        ),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
        createMessageStartPacket({ turn_index: 1 }, 1.5),
        createPacket(
          PacketType.MESSAGE_DELTA,
          { turn_index: 1 },
          { content: "Result:" }
        ),
        createStopPacket(StopReason.FINISHED),
      ];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.toolGroups.length).toBe(1);
      expect(result.current.displayGroups.length).toBe(1);
      expect(result.current.hasSteps).toBe(true);
      expect(result.current.stopPacketSeen).toBe(true);
      expect(result.current.stopReason).toBe(StopReason.FINISHED);
      expect(result.current.finalAnswerComing).toBe(true);
      expect(result.current.toolProcessingDuration).toBe(1.5);

      act(() => {
        result.current.onRenderComplete();
      });

      expect(result.current.isComplete).toBe(true);
    });

    test("handles image generation flow", () => {
      const packets = [
        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),
        createPacket(
          PacketType.IMAGE_GENERATION_TOOL_DELTA,
          { turn_index: 0 },
          {
            images: [
              {
                file_id: "img1",
                url: "http://example.com/1.png",
                revised_prompt: "test",
              },
            ],
          }
        ),
        createPacket(PacketType.SECTION_END, { turn_index: 0 }),
        createStopPacket(),
      ];

      const { result } = renderHook(() => usePacketProcessor(packets, 1));

      expect(result.current.isGeneratingImage).toBe(true);
      expect(result.current.generatedImageCount).toBe(1);
      expect(result.current.finalAnswerComing).toBe(true);
      expect(result.current.displayGroups.length).toBe(1);
    });
  });
});


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/usePacketProcessor.ts
================================================
import { useRef, useState, useMemo, useCallback } from "react";
import {
  Packet,
  StreamingCitation,
  StopReason,
} from "@/app/app/services/streamingModels";
import { CitationMap } from "@/app/app/interfaces";
import { OnyxDocument } from "@/lib/search/interfaces";
import {
  ProcessorState,
  GroupedPacket,
  createInitialState,
  processPackets,
} from "@/app/app/message/messageComponents/timeline/hooks/packetProcessor";
import {
  transformPacketGroups,
  groupStepsByTurn,
  TurnGroup,
} from "@/app/app/message/messageComponents/timeline/transformers";

export interface UsePacketProcessorResult {
  // Data
  toolGroups: GroupedPacket[];
  displayGroups: GroupedPacket[];
  toolTurnGroups: TurnGroup[];
  citations: StreamingCitation[];
  citationMap: CitationMap;
  documentMap: Map<string, OnyxDocument>;

  // Status (derived from packets)
  stopPacketSeen: boolean;
  stopReason: StopReason | undefined;
  hasSteps: boolean;
  expectedBranchesPerTurn: Map<number, number>;
  isGeneratingImage: boolean;
  generatedImageCount: number;
  // Whether final answer is coming (MESSAGE_START seen)
  finalAnswerComing: boolean;
  // Tool processing duration from backend (via MESSAGE_START packet)
  toolProcessingDuration: number | undefined;

  // Completion: stopPacketSeen && renderComplete
  isComplete: boolean;

  // Callbacks
  onRenderComplete: () => void;
  markAllToolsDisplayed: () => void;
}

/**
 * Hook for processing streaming packets in AgentMessage.
 *
 * Architecture:
 * - Processor state in ref: incremental processing, synchronous, no double render
 * - Only true UI state: renderComplete (set by callback), forceShowAnswer (override)
 * - Everything else derived from packets
 *
 * Key insight: finalAnswerComing and stopPacketSeen are DERIVED from packets,
 * not independent state. Only renderComplete needs useState.
 */
export function usePacketProcessor(
  rawPackets: Packet[],
  nodeId: number
): UsePacketProcessorResult {
  // Processor in ref: incremental, synchronous, no double render
  const stateRef = useRef<ProcessorState>(createInitialState(nodeId));

  // Only TRUE UI state: "has renderer finished?"
  const [renderComplete, setRenderComplete] = useState(false);

  // Optional override to force showing answer
  const [forceShowAnswer, setForceShowAnswer] = useState(false);

  // Reset on nodeId change
  if (stateRef.current.nodeId !== nodeId) {
    stateRef.current = createInitialState(nodeId);
    setRenderComplete(false);
    setForceShowAnswer(false);
  }

  // Track for transition detection
  const prevNextPacketIndex = stateRef.current.nextPacketIndex;
  const prevFinalAnswerComing = stateRef.current.finalAnswerComing;

  // Detect stream reset (packets shrunk)
  if (prevNextPacketIndex > rawPackets.length) {
    stateRef.current = createInitialState(nodeId);
    setRenderComplete(false);
    setForceShowAnswer(false);
  }

  // Process packets synchronously (incremental) - only if new packets arrived
  if (rawPackets.length > stateRef.current.nextPacketIndex) {
    stateRef.current = processPackets(stateRef.current, rawPackets);
  }

  // Reset renderComplete on tool-after-message transition
  if (prevFinalAnswerComing && !stateRef.current.finalAnswerComing) {
    setRenderComplete(false);
  }

  // Access state directly (result arrays are built in processPackets)
  const state = stateRef.current;

  // Derive displayGroups (not state!)
  const effectiveFinalAnswerComing = state.finalAnswerComing || forceShowAnswer;
  const displayGroups = useMemo(() => {
    if (effectiveFinalAnswerComing || state.toolGroups.length === 0) {
      return state.potentialDisplayGroups;
    }
    return [];
  }, [
    effectiveFinalAnswerComing,
    state.toolGroups.length,
    state.potentialDisplayGroups,
  ]);

  // Transform toolGroups to timeline format
  const toolTurnGroups = useMemo(() => {
    const allSteps = transformPacketGroups(state.toolGroups);
    return groupStepsByTurn(allSteps);
  }, [state.toolGroups]);

  // Callback reads from ref: always current value, no ref needed in component
  const onRenderComplete = useCallback(() => {
    if (stateRef.current.finalAnswerComing) {
      setRenderComplete(true);
    }
  }, []);

  const markAllToolsDisplayed = useCallback(() => {
    setForceShowAnswer(true);
  }, []);

  return {
    // Data
    toolGroups: state.toolGroups,
    displayGroups,
    toolTurnGroups,
    citations: state.citations,
    citationMap: state.citationMap,
    documentMap: state.documentMap,

    // Status (derived from packets)
    stopPacketSeen: state.stopPacketSeen,
    stopReason: state.stopReason,
    hasSteps: toolTurnGroups.length > 0,
    expectedBranchesPerTurn: state.expectedBranches,
    isGeneratingImage: state.isGeneratingImage,
    generatedImageCount: state.generatedImageCount,
    finalAnswerComing: state.finalAnswerComing,
    toolProcessingDuration: state.toolProcessingDuration,

    // Completion: stopPacketSeen && renderComplete
    isComplete: state.stopPacketSeen && renderComplete,

    // Callbacks
    onRenderComplete,
    markAllToolsDisplayed,
  };
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/useStreamingDuration.ts
================================================
import { useState, useEffect, useRef } from "react";

/**
 * Hook to track elapsed streaming duration with efficient updates.
 *
 * Uses requestAnimationFrame for accurate timing but only triggers re-renders
 * when the elapsed seconds value actually changes (once per second).
 *
 * @param isStreaming - Whether streaming is currently active
 * @param startTime - Timestamp when streaming started (from Date.now())
 * @param backendDuration - Duration from backend when available (freezes timer)
 * @returns Elapsed seconds since streaming started
 */
export function useStreamingDuration(
  isStreaming: boolean,
  startTime: number | undefined,
  backendDuration?: number
): number {
  const [elapsedSeconds, setElapsedSeconds] = useState(0);
  const rafRef = useRef<number | null>(null);
  const lastElapsedRef = useRef<number>(0);

  // Determine if we should run the live timer
  // Stop the timer when backend duration is available
  const shouldRunTimer = isStreaming && backendDuration === undefined;

  useEffect(() => {
    if (!shouldRunTimer || !startTime) {
      // Don't reset when stopping - preserve last calculated value
      // Only reset when explicitly given no start time
      if (!startTime) {
        setElapsedSeconds(0);
        lastElapsedRef.current = 0;
      }
      return;
    }

    const updateElapsed = () => {
      const now = Date.now();
      const elapsed = Math.floor((now - startTime) / 1000);

      // Only update state when seconds change to avoid unnecessary re-renders
      if (elapsed !== lastElapsedRef.current) {
        lastElapsedRef.current = elapsed;
        setElapsedSeconds(elapsed);
      }

      rafRef.current = requestAnimationFrame(updateElapsed);
    };

    // Start the animation loop
    rafRef.current = requestAnimationFrame(updateElapsed);

    return () => {
      if (rafRef.current !== null) {
        cancelAnimationFrame(rafRef.current);
        rafRef.current = null;
      }
    };
  }, [shouldRunTimer, startTime]);

  // Return backend duration if provided, otherwise return live elapsed time
  return backendDuration !== undefined ? backendDuration : elapsedSeconds;
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/useTimelineExpansion.ts
================================================
import { useState, useEffect, useCallback, useRef } from "react";
import { TurnGroup } from "../transformers";

export interface TimelineExpansionState {
  isExpanded: boolean;
  handleToggle: () => void;
  parallelActiveTab: string;
  setParallelActiveTab: (tab: string) => void;
}

/**
 * Manages expansion state for the timeline.
 * Auto-collapses when streaming completes or message content starts, and syncs parallel tab selection.
 */
export function useTimelineExpansion(
  stopPacketSeen: boolean,
  lastTurnGroup: TurnGroup | undefined,
  hasDisplayContent: boolean = false
): TimelineExpansionState {
  const [isExpanded, setIsExpanded] = useState(false);
  const [parallelActiveTab, setParallelActiveTab] = useState<string>("");
  const userHasToggled = useRef(false);

  const handleToggle = useCallback(() => {
    userHasToggled.current = true;
    setIsExpanded((prev) => !prev);
  }, []);

  // Auto-collapse when streaming completes or message content starts
  // BUT respect user intent - if they've manually toggled, don't auto-collapse
  useEffect(() => {
    if ((stopPacketSeen || hasDisplayContent) && !userHasToggled.current) {
      setIsExpanded(false);
    }
  }, [stopPacketSeen, hasDisplayContent]);

  // Sync active tab when parallel turn group changes
  useEffect(() => {
    if (lastTurnGroup?.isParallel && lastTurnGroup.steps.length > 0) {
      const validTabs = lastTurnGroup.steps.map((s) => s.key);
      const firstStep = lastTurnGroup.steps[0];
      if (firstStep && !validTabs.includes(parallelActiveTab)) {
        setParallelActiveTab(firstStep.key);
      }
    }
  }, [lastTurnGroup, parallelActiveTab]);

  return {
    isExpanded,
    handleToggle,
    parallelActiveTab,
    setParallelActiveTab,
  };
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/useTimelineHeader.ts
================================================
import { useMemo } from "react";
import { TurnGroup } from "../transformers";
import {
  PacketType,
  SearchToolPacket,
  StopReason,
  CustomToolStart,
} from "@/app/app/services/streamingModels";
import { constructCurrentSearchState } from "@/app/app/message/messageComponents/timeline/renderers/search/searchStateUtils";

export interface TimelineHeaderResult {
  headerText: string;
  hasPackets: boolean;
  userStopped: boolean;
}

/**
 * Hook that determines timeline header state based on current activity.
 * Returns header text, whether there are packets, and whether user stopped.
 */
export function useTimelineHeader(
  turnGroups: TurnGroup[],
  stopReason?: StopReason,
  isGeneratingImage?: boolean
): TimelineHeaderResult {
  return useMemo(() => {
    const hasPackets = turnGroups.length > 0;
    const userStopped = stopReason === StopReason.USER_CANCELLED;

    // If generating image with no tool packets, show image generation header
    if (isGeneratingImage && !hasPackets) {
      return { headerText: "Generating image...", hasPackets, userStopped };
    }

    if (!hasPackets) {
      return { headerText: "Thinking...", hasPackets, userStopped };
    }

    // Get the last (current) turn group
    const currentTurn = turnGroups[turnGroups.length - 1];
    if (!currentTurn) {
      return { headerText: "Thinking...", hasPackets, userStopped };
    }

    const currentStep = currentTurn.steps[0];
    if (!currentStep?.packets?.length) {
      return { headerText: "Thinking...", hasPackets, userStopped };
    }

    const firstPacket = currentStep.packets[0];
    if (!firstPacket) {
      return { headerText: "Thinking...", hasPackets, userStopped };
    }

    const packetType = firstPacket.obj.type;

    // Determine header based on packet type
    if (packetType === PacketType.SEARCH_TOOL_START) {
      const searchState = constructCurrentSearchState(
        currentStep.packets as SearchToolPacket[]
      );
      let headerText: string;
      if (searchState.hasResults && !searchState.isInternetSearch) {
        headerText = "Reading";
      } else {
        headerText = searchState.isInternetSearch
          ? "Searching the web"
          : "Searching internal documents";
      }
      return { headerText, hasPackets, userStopped };
    }

    if (packetType === PacketType.FETCH_TOOL_START) {
      return { headerText: "Reading", hasPackets, userStopped };
    }

    if (packetType === PacketType.PYTHON_TOOL_START) {
      return { headerText: "Executing code", hasPackets, userStopped };
    }

    if (packetType === PacketType.IMAGE_GENERATION_TOOL_START) {
      return { headerText: "Generating images", hasPackets, userStopped };
    }

    if (packetType === PacketType.FILE_READER_START) {
      return { headerText: "Reading file", hasPackets, userStopped };
    }

    if (packetType === PacketType.CUSTOM_TOOL_START) {
      const toolName = (firstPacket.obj as CustomToolStart).tool_name;
      return {
        headerText: toolName ? `Executing ${toolName}` : "Executing tool",
        hasPackets,
        userStopped,
      };
    }

    if (
      packetType === PacketType.MEMORY_TOOL_START ||
      packetType === PacketType.MEMORY_TOOL_NO_ACCESS
    ) {
      return { headerText: "Updating memory...", hasPackets, userStopped };
    }

    if (packetType === PacketType.REASONING_START) {
      return { headerText: "Thinking", hasPackets, userStopped };
    }

    if (packetType === PacketType.DEEP_RESEARCH_PLAN_START) {
      return { headerText: "Generating plan", hasPackets, userStopped };
    }

    if (packetType === PacketType.RESEARCH_AGENT_START) {
      return { headerText: "Researching", hasPackets, userStopped };
    }

    return { headerText: "Thinking...", hasPackets, userStopped };
  }, [turnGroups, stopReason, isGeneratingImage]);
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/useTimelineMetrics.ts
================================================
import { useMemo } from "react";
import {
  TurnGroup,
  TransformedStep,
} from "@/app/app/message/messageComponents/timeline/transformers";
import {
  isResearchAgentPackets,
  stepSupportsCollapsedStreaming,
} from "@/app/app/message/messageComponents/timeline/packetHelpers";

export interface TimelineMetrics {
  totalSteps: number;
  isSingleStep: boolean;
  lastTurnGroup: TurnGroup | undefined;
  lastStep: TransformedStep | undefined;
  lastStepIsResearchAgent: boolean;
  lastStepSupportsCollapsedStreaming: boolean;
}

/**
 * Memoizes derived metrics from turn groups to avoid recomputation on every render.
 * Single-pass computation where possible for performance with large packet counts.
 */
export function useTimelineMetrics(
  turnGroups: TurnGroup[],
  userStopped: boolean
): TimelineMetrics {
  return useMemo(() => {
    // Compute in single pass
    let totalSteps = 0;
    for (const tg of turnGroups) {
      totalSteps += tg.steps.length;
    }

    const lastTurnGroup = turnGroups[turnGroups.length - 1];
    const lastStep = lastTurnGroup?.steps[lastTurnGroup.steps.length - 1];

    // Analyze last step packets once
    const lastStepIsResearchAgent = lastStep
      ? isResearchAgentPackets(lastStep.packets)
      : false;
    const lastStepSupportsCollapsedStreaming = lastStep
      ? stepSupportsCollapsedStreaming(lastStep.packets)
      : false;

    return {
      totalSteps,
      isSingleStep: totalSteps === 1 && !userStopped,
      lastTurnGroup,
      lastStep,
      lastStepIsResearchAgent,
      lastStepSupportsCollapsedStreaming,
    };
  }, [turnGroups, userStopped]);
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/useTimelineStepState.ts
================================================
import { useMemo } from "react";
import { MemoryToolPacket } from "@/app/app/services/streamingModels";
import { TurnGroup } from "@/app/app/message/messageComponents/timeline/transformers";
import { constructCurrentMemoryState } from "@/app/app/message/messageComponents/timeline/renderers/memory/memoryStateUtils";
import { isMemoryToolPackets } from "@/app/app/message/messageComponents/timeline/packetHelpers";

interface MemoryStepState {
  memoryText: string | null;
  memoryOperation: "add" | "update" | null;
  memoryId: number | null;
  memoryIndex: number | null;
  isMemoryOnly: boolean;
}

/**
 * Extracts memory state from the first memory-tool step in turnGroups
 * and determines whether the timeline contains only memory steps.
 */
export function useTimelineStepState(turnGroups: TurnGroup[]): MemoryStepState {
  return useMemo(() => {
    let memoryText: string | null = null;
    let memoryOperation: "add" | "update" | null = null;
    let memoryId: number | null = null;
    let memoryIndex: number | null = null;
    let foundMemory = false;

    let totalSteps = 0;
    let allMemory = true;

    for (const tg of turnGroups) {
      for (const step of tg.steps) {
        totalSteps++;
        const isMem = isMemoryToolPackets(step.packets);

        if (!isMem) {
          allMemory = false;
        }

        if (!foundMemory && isMem) {
          foundMemory = true;
          const state = constructCurrentMemoryState(
            step.packets as unknown as MemoryToolPacket[]
          );
          memoryText = state.memoryText;
          memoryOperation = state.operation;
          memoryId = state.memoryId;
          memoryIndex = state.index;
        }
      }
    }

    return {
      memoryText,
      memoryOperation,
      memoryId,
      memoryIndex,
      isMemoryOnly: totalSteps > 0 && allMemory,
    };
  }, [turnGroups]);
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/hooks/useTimelineUIState.ts
================================================
import { useMemo } from "react";
import { TurnGroup, TransformedStep } from "../transformers";

// =============================================================================
// Timeline UI State Machine
// =============================================================================

export enum TimelineUIState {
  /** No packets yet, showing shimmer */
  EMPTY = "EMPTY",
  /** Final message only, no timeline */
  DISPLAY_CONTENT_ONLY = "DISPLAY_CONTENT_ONLY",
  /** Active single tool execution */
  STREAMING_SEQUENTIAL = "STREAMING_SEQUENTIAL",
  /** Active parallel tool execution */
  STREAMING_PARALLEL = "STREAMING_PARALLEL",
  /** User cancelled */
  STOPPED = "STOPPED",
  /** Done, timeline collapsed */
  COMPLETED_COLLAPSED = "COMPLETED_COLLAPSED",
  /** Done, timeline expanded */
  COMPLETED_EXPANDED = "COMPLETED_EXPANDED",
}

export interface TimelineUIStateInput {
  /** Whether the stop packet has been seen */
  stopPacketSeen: boolean;
  /** Whether there are any packets in the timeline */
  hasPackets: boolean;
  /** Whether there is display content after timeline */
  hasDisplayContent: boolean;
  /** Whether the user stopped the generation */
  userStopped: boolean;
  /** Whether the timeline is expanded */
  isExpanded: boolean;
  /** The last turn group (for parallel detection) */
  lastTurnGroup: TurnGroup | undefined;
  /** The last step */
  lastStep: TransformedStep | undefined;
  /** Whether the last step supports collapsed streaming rendering */
  lastStepSupportsCollapsedStreaming: boolean;
  /** Whether the last step has renderable collapsed streaming content */
  lastStepHasCollapsedContent: boolean;
  /** Whether the last step is a research agent */
  lastStepIsResearchAgent: boolean;
  /** Whether the parallel active step supports collapsed streaming rendering */
  parallelActiveStepSupportsCollapsedStreaming: boolean;
  /** Whether the parallel active step has renderable collapsed streaming content */
  parallelActiveStepHasCollapsedContent: boolean;
  /** Whether image generation is in progress */
  isGeneratingImage: boolean;
  /** Whether final answer is coming (MESSAGE_START received) */
  finalAnswerComing: boolean;
}

export interface TimelineUIStateResult {
  /** The current UI state */
  uiState: TimelineUIState;

  // Convenience booleans
  /** Whether actively streaming (tool execution in progress) */
  isStreaming: boolean;
  /** Whether completed (stop packet seen) */
  isCompleted: boolean;
  /** Whether actively executing tools (streaming without display content, or generating image) */
  isActivelyExecuting: boolean;

  // Display flags
  /** Show collapsed compact content for single step */
  showCollapsedCompact: boolean;
  /** Show collapsed compact content for parallel tools */
  showCollapsedParallel: boolean;
  /** Show parallel tabs in header */
  showParallelTabs: boolean;
  /** Show the "Done" indicator step in expanded view */
  showDoneStep: boolean;
  /** Show the "Stopped" indicator step in expanded view */
  showStoppedStep: boolean;
  /** For stepIsLast calculation (excludes research agent) */
  hasDoneIndicator: boolean;

  // Styling flags
  /** Show tinted background on header */
  showTintedBackground: boolean;
  /** Show rounded bottom on header */
  showRoundedBottom: boolean;
}

/**
 * Derives the current UI state from timeline inputs.
 * Centralizes all boolean logic for timeline rendering decisions.
 */
export function useTimelineUIState(
  input: TimelineUIStateInput
): TimelineUIStateResult {
  return useMemo(() => {
    const {
      stopPacketSeen,
      hasPackets,
      hasDisplayContent,
      userStopped,
      isExpanded,
      lastTurnGroup,
      lastStep,
      lastStepSupportsCollapsedStreaming,
      lastStepHasCollapsedContent,
      lastStepIsResearchAgent,
      parallelActiveStepSupportsCollapsedStreaming,
      parallelActiveStepHasCollapsedContent,
      isGeneratingImage,
      finalAnswerComing,
    } = input;

    // Derive the primary UI state
    let uiState: TimelineUIState;

    if (!hasPackets && !hasDisplayContent && !stopPacketSeen) {
      uiState = TimelineUIState.EMPTY;
    } else if (hasDisplayContent && !hasPackets && !isGeneratingImage) {
      uiState = TimelineUIState.DISPLAY_CONTENT_ONLY;
    } else if (!stopPacketSeen && (!hasDisplayContent || isGeneratingImage)) {
      // Actively executing tools
      uiState = lastTurnGroup?.isParallel
        ? TimelineUIState.STREAMING_PARALLEL
        : TimelineUIState.STREAMING_SEQUENTIAL;
    } else if (userStopped) {
      uiState = TimelineUIState.STOPPED;
    } else if (isExpanded) {
      uiState = TimelineUIState.COMPLETED_EXPANDED;
    } else {
      uiState = TimelineUIState.COMPLETED_COLLAPSED;
    }

    // Convenience booleans
    const isStreaming =
      uiState === TimelineUIState.STREAMING_SEQUENTIAL ||
      uiState === TimelineUIState.STREAMING_PARALLEL;
    const isCompleted =
      uiState === TimelineUIState.COMPLETED_COLLAPSED ||
      uiState === TimelineUIState.COMPLETED_EXPANDED ||
      uiState === TimelineUIState.STOPPED;
    const isActivelyExecuting =
      !stopPacketSeen && (!hasDisplayContent || isGeneratingImage);

    // Parallel tabs in header only when collapsed during streaming
    const showParallelTabs =
      uiState === TimelineUIState.STREAMING_PARALLEL &&
      !isExpanded &&
      !!lastTurnGroup?.isParallel &&
      (lastTurnGroup?.steps.length ?? 0) > 0;

    // Collapsed streaming: show compact content below header (only during tool execution)
    const showCollapsedCompact =
      uiState === TimelineUIState.STREAMING_SEQUENTIAL &&
      !isExpanded &&
      !!lastStep &&
      !lastTurnGroup?.isParallel &&
      lastStepSupportsCollapsedStreaming &&
      lastStepHasCollapsedContent;

    // Collapsed parallel streaming content
    const showCollapsedParallel =
      showParallelTabs &&
      !isExpanded &&
      parallelActiveStepSupportsCollapsedStreaming &&
      parallelActiveStepHasCollapsedContent;

    // Done step: shown when expanded and completed (either normally or with display content)
    // Also shown when finalAnswerComing is true (MESSAGE_START received)
    const showDoneStep =
      (stopPacketSeen || finalAnswerComing) &&
      isExpanded &&
      (!userStopped || hasDisplayContent);

    // Stopped step: shown when user stopped without display content
    const showStoppedStep =
      stopPacketSeen && isExpanded && userStopped && !hasDisplayContent;

    // For stepIsLast calculation: done indicator present (excludes research agent)
    const hasDoneIndicator =
      (stopPacketSeen || finalAnswerComing) &&
      isExpanded &&
      !userStopped &&
      !lastStepIsResearchAgent;

    // Styling flags
    const showTintedBackground = isActivelyExecuting || isExpanded;
    const showRoundedBottom =
      !isExpanded && !showCollapsedCompact && !showCollapsedParallel;

    return {
      uiState,
      isStreaming,
      isCompleted,
      isActivelyExecuting,
      showCollapsedCompact,
      showCollapsedParallel,
      showParallelTabs,
      showDoneStep,
      showStoppedStep,
      hasDoneIndicator,
      showTintedBackground,
      showRoundedBottom,
    };
  }, [input]);
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/packetHelpers.ts
================================================
import {
  CODE_INTERPRETER_TOOL_TYPES,
  Packet,
  PacketType,
  ToolCallArgumentDelta,
} from "@/app/app/services/streamingModels";

// Packet types with renderers supporting collapsed streaming mode.
// TOOL_CALL_ARGUMENT_DELTA is intentionally excluded here because it requires
// a tool_type check — it's handled separately in stepSupportsCollapsedStreaming.
export const COLLAPSED_STREAMING_PACKET_TYPES = new Set<PacketType>([
  PacketType.SEARCH_TOOL_START,
  PacketType.FETCH_TOOL_START,
  PacketType.PYTHON_TOOL_START,
  PacketType.CUSTOM_TOOL_START,
  PacketType.RESEARCH_AGENT_START,
  PacketType.REASONING_START,
  PacketType.DEEP_RESEARCH_PLAN_START,
]);

// Check if packets belong to a research agent (handles its own Done indicator)
export const isResearchAgentPackets = (packets: Packet[]): boolean =>
  packets.some((p) => p.obj.type === PacketType.RESEARCH_AGENT_START);

// Check if packets belong to a search tool
export const isSearchToolPackets = (packets: Packet[]): boolean =>
  packets.some((p) => p.obj.type === PacketType.SEARCH_TOOL_START);

// Check if packets belong to a python tool
export const isPythonToolPackets = (packets: Packet[]): boolean =>
  packets.some(
    (p) =>
      p.obj.type === PacketType.PYTHON_TOOL_START ||
      (p.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&
        (p.obj as ToolCallArgumentDelta).tool_type ===
          CODE_INTERPRETER_TOOL_TYPES.PYTHON)
  );

// Check if packets belong to reasoning
export const isReasoningPackets = (packets: Packet[]): boolean =>
  packets.some((p) => p.obj.type === PacketType.REASONING_START);

// Check if step supports collapsed streaming rendering mode
export const stepSupportsCollapsedStreaming = (packets: Packet[]): boolean =>
  packets.some(
    (p) =>
      COLLAPSED_STREAMING_PACKET_TYPES.has(p.obj.type as PacketType) ||
      (p.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&
        (p.obj as ToolCallArgumentDelta).tool_type ===
          CODE_INTERPRETER_TOOL_TYPES.PYTHON)
  );

// Check if packets have content worth rendering in collapsed streaming mode.
// Avoids rendering empty containers when only START packets have arrived.
export const stepHasCollapsedStreamingContent = (
  packets: Packet[]
): boolean => {
  const packetTypes = new Set(
    packets.map((packet) => packet.obj.type as PacketType)
  );

  // Errors should render even if no deltas arrived
  if (packetTypes.has(PacketType.ERROR)) {
    return true;
  }

  // Search tools need actual query/doc deltas before showing content
  if (
    packetTypes.has(PacketType.SEARCH_TOOL_QUERIES_DELTA) ||
    packetTypes.has(PacketType.SEARCH_TOOL_DOCUMENTS_DELTA)
  ) {
    return true;
  }

  // Fetch tool shows a loading indicator once started
  if (
    packetTypes.has(PacketType.FETCH_TOOL_START) ||
    packetTypes.has(PacketType.FETCH_TOOL_URLS) ||
    packetTypes.has(PacketType.FETCH_TOOL_DOCUMENTS)
  ) {
    return true;
  }

  // Python tool renders code/output from the start packet onward
  if (
    packetTypes.has(PacketType.PYTHON_TOOL_START) ||
    packetTypes.has(PacketType.PYTHON_TOOL_DELTA) ||
    packets.some(
      (p) =>
        p.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&
        (p.obj as ToolCallArgumentDelta).tool_type ===
          CODE_INTERPRETER_TOOL_TYPES.PYTHON
    )
  ) {
    return true;
  }

  // Custom tool shows running/completed state after start
  if (
    packetTypes.has(PacketType.CUSTOM_TOOL_START) ||
    packetTypes.has(PacketType.CUSTOM_TOOL_DELTA)
  ) {
    return true;
  }

  // Research agent has meaningful content from start (task) or report deltas
  if (
    packetTypes.has(PacketType.RESEARCH_AGENT_START) ||
    packetTypes.has(PacketType.INTERMEDIATE_REPORT_START) ||
    packetTypes.has(PacketType.INTERMEDIATE_REPORT_DELTA) ||
    packetTypes.has(PacketType.INTERMEDIATE_REPORT_CITED_DOCS)
  ) {
    return true;
  }

  // Reasoning content only appears in deltas
  if (packetTypes.has(PacketType.REASONING_DELTA)) {
    return true;
  }

  // Deep research plan content only appears in deltas
  if (packetTypes.has(PacketType.DEEP_RESEARCH_PLAN_DELTA)) {
    return true;
  }

  return false;
};

// Check if packets belong to a deep research plan
export const isDeepResearchPlanPackets = (packets: Packet[]): boolean =>
  packets.some((p) => p.obj.type === PacketType.DEEP_RESEARCH_PLAN_START);

// Check if packets belong to a memory tool
export const isMemoryToolPackets = (packets: Packet[]): boolean =>
  packets.some(
    (p) =>
      p.obj.type === PacketType.MEMORY_TOOL_START ||
      p.obj.type === PacketType.MEMORY_TOOL_NO_ACCESS
  );


================================================
FILE: web/src/app/app/message/messageComponents/timeline/primitives/TimelineHeaderRow.tsx
================================================
import React from "react";

export interface TimelineHeaderRowProps {
  left?: React.ReactNode;
  children?: React.ReactNode;
}

/**
 * TimelineHeaderRow aligns the top header (e.g., agent avatar + title row)
 * with the same rail width used by the timeline steps.
 */
export function TimelineHeaderRow({ left, children }: TimelineHeaderRowProps) {
  return (
    <div className="flex w-full h-[var(--timeline-header-row-height)]">
      <div className="flex items-center justify-center w-[var(--timeline-rail-width)] h-[var(--timeline-header-row-height)]">
        {left}
      </div>
      <div className="flex-1 min-w-0 h-full">{children}</div>
    </div>
  );
}

export default TimelineHeaderRow;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/primitives/TimelineIconColumn.tsx
================================================
import React from "react";
import { cn } from "@/lib/utils";

/**
 * TimelineRailVariant controls whether a row shows the rail or only reserves width.
 * - rail: renders icon + connector line.
 * - spacer: keeps column width for alignment, but no rail.
 */
export type TimelineRailVariant = "rail" | "spacer";

export interface TimelineIconColumnProps {
  variant?: TimelineRailVariant;
  isFirst?: boolean;
  isLast?: boolean;
  isHover?: boolean;
  disableTopConnectorHover?: boolean;
  icon?: React.ReactNode;
  showIcon?: boolean;
  /**
   * Controls the vertical height of the icon row.
   * - default: uses step header height for normal rows.
   * - compact: uses first-step spacer height for hidden headers.
   */
  iconRowVariant?: "default" | "compact";
}

/**
 * TimelineIconColumn renders the left rail (connector + icon).
 * For default rows, icon alignment is tied to step text padding:
 * - icon wrapper stays fixed at 1.25rem
 * - remaining top/bottom header space is filled with connector segments
 */
export function TimelineIconColumn({
  variant = "rail",
  isFirst = false,
  isLast = false,
  isHover = false,
  disableTopConnectorHover = false,
  icon,
  showIcon = true,
  iconRowVariant = "default",
}: TimelineIconColumnProps) {
  if (variant === "spacer") {
    return <div className="w-[var(--timeline-rail-width)]" />;
  }

  const connectorColorClass = isHover ? "bg-border-04" : "bg-border-01";
  const topConnectorColorClass = disableTopConnectorHover
    ? "bg-border-01"
    : connectorColorClass;

  return (
    <div className="relative flex flex-col items-center w-[var(--timeline-rail-width)]">
      <div
        className={cn(
          "w-full shrink-0 flex flex-col items-center",
          iconRowVariant === "compact"
            ? "h-[var(--timeline-first-top-spacer-height)]"
            : "h-[var(--timeline-step-header-height)]"
        )}
      >
        {iconRowVariant === "default" ? (
          <>
            <div
              className={cn(
                "w-px h-[calc(var(--timeline-step-top-padding)*2)]",
                !isFirst && topConnectorColorClass
              )}
            />
            <div className="h-[var(--timeline-branch-icon-wrapper-size)] w-[var(--timeline-branch-icon-wrapper-size)] shrink-0 flex items-center justify-center">
              {showIcon && icon}
            </div>
            <div className={cn("w-px flex-1", connectorColorClass)} />
          </>
        ) : (
          <div className={cn("w-px flex-1", !isFirst && connectorColorClass)} />
        )}
      </div>

      {!isLast && <div className={cn("w-px flex-1", connectorColorClass)} />}
    </div>
  );
}

export default TimelineIconColumn;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/primitives/TimelineRoot.tsx
================================================
import React from "react";
import { getTimelineStyles, TimelineTokens } from "./tokens";

export interface TimelineRootProps {
  children: React.ReactNode;
  tokens?: Partial<TimelineTokens>;
}

/**
 * TimelineRoot provides the shared sizing contract for all timeline primitives.
 * It sets CSS variables derived from TimelineTokens so rail width, header height,
 * and padding stay consistent across the timeline.
 */
export function TimelineRoot({ children, tokens }: TimelineRootProps) {
  return (
    <div
      className="flex flex-col pl-[var(--timeline-agent-message-padding-left)]"
      style={getTimelineStyles(tokens)}
    >
      {children}
    </div>
  );
}

export default TimelineRoot;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/primitives/TimelineRow.tsx
================================================
import React from "react";
import { TimelineIconColumn, TimelineRailVariant } from "./TimelineIconColumn";

/**
 * TimelineRowRailVariant controls how the left column is rendered.
 * - rail: normal icon + connector column.
 * - spacer: empty column that preserves rail width.
 * - none: no left column at all.
 */
export type TimelineRowRailVariant = TimelineRailVariant | "none";

export interface TimelineRowProps {
  railVariant?: TimelineRowRailVariant;
  icon?: React.ReactNode;
  showIcon?: boolean;
  disableTopConnectorHover?: boolean;
  /**
   * Controls the height of the icon row within the rail.
   * Use compact when the header is hidden to keep alignment stable.
   */
  iconRowVariant?: "default" | "compact";
  isFirst?: boolean;
  isLast?: boolean;
  isHover?: boolean;
  children?: React.ReactNode;
}

/**
 * TimelineRow composes the rail column + content column.
 * It is the base layout primitive for all timeline rows.
 */
export function TimelineRow({
  railVariant = "rail",
  icon,
  showIcon = true,
  disableTopConnectorHover = false,
  iconRowVariant = "default",
  isFirst = false,
  isLast = false,
  isHover = false,
  children,
}: TimelineRowProps) {
  return (
    <div className="flex w-full">
      {railVariant !== "none" && (
        <TimelineIconColumn
          variant={railVariant === "spacer" ? "spacer" : "rail"}
          icon={icon}
          showIcon={showIcon}
          disableTopConnectorHover={disableTopConnectorHover}
          iconRowVariant={iconRowVariant}
          isFirst={isFirst}
          isLast={isLast}
          isHover={isHover}
        />
      )}
      <div className="flex-1 min-w-0">{children}</div>
    </div>
  );
}

export default TimelineRow;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/primitives/TimelineStepContent.tsx
================================================
import React, { FunctionComponent } from "react";
import { cn } from "@/lib/utils";
import { SvgFold, SvgExpand, SvgXOctagon } from "@opal/icons";
import { IconProps } from "@opal/types";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { TimelineSurfaceBackground } from "@/app/app/message/messageComponents/timeline/primitives/TimelineSurface";

export interface TimelineStepContentProps {
  children?: React.ReactNode;
  header?: React.ReactNode;
  buttonTitle?: string;
  isExpanded?: boolean;
  onToggle?: () => void;
  collapsible?: boolean;
  supportsCollapsible?: boolean;
  hideHeader?: boolean;
  collapsedIcon?: FunctionComponent<IconProps>;
  noPaddingRight?: boolean;
  surfaceBackground?: TimelineSurfaceBackground;
}

/**
 * TimelineStepContent renders the header row + content body for a step.
 * It is used by StepContainer and by parallel tab content to keep layout consistent.
 */
export function TimelineStepContent({
  children,
  header,
  buttonTitle,
  isExpanded = true,
  onToggle,
  collapsible = true,
  supportsCollapsible = false,
  hideHeader = false,
  collapsedIcon: CollapsedIconComponent,
  noPaddingRight = false,
  surfaceBackground,
}: TimelineStepContentProps) {
  const showCollapseControls = collapsible && supportsCollapsible && onToggle;

  return (
    <div className="flex flex-col px-1 pb-1">
      {!hideHeader && header && (
        <div className="flex items-center justify-between h-[var(--timeline-step-header-height)] pl-1">
          <div className="pt-[var(--timeline-step-top-padding)] pl-[var(--timeline-common-text-padding)] w-full">
            <Text as="p" mainUiMuted text04>
              {header}
            </Text>
          </div>

          <div className="h-full w-[var(--timeline-step-header-right-section-width)] flex items-center justify-end">
            {showCollapseControls ? (
              buttonTitle ? (
                <Button
                  prominence="tertiary"
                  size="md"
                  onClick={onToggle}
                  rightIcon={
                    isExpanded ? SvgFold : CollapsedIconComponent || SvgExpand
                  }
                >
                  {buttonTitle}
                </Button>
              ) : (
                <Button
                  prominence="tertiary"
                  size="md"
                  onClick={onToggle}
                  icon={
                    isExpanded ? SvgFold : CollapsedIconComponent || SvgExpand
                  }
                />
              )
            ) : surfaceBackground === "error" ? (
              <div className="p-1.5">
                <SvgXOctagon className="h-4 w-4 text-status-error-05" />
              </div>
            ) : null}
          </div>
        </div>
      )}

      {children && (
        <div
          className={cn(
            "pl-1 pb-1",
            !noPaddingRight &&
              "pr-[var(--timeline-step-header-right-section-width)]",
            hideHeader && "pt-[var(--timeline-step-top-padding)]"
          )}
        >
          {children}
        </div>
      )}
    </div>
  );
}

export default TimelineStepContent;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/primitives/TimelineSurface.tsx
================================================
import React from "react";
import { cn } from "@/lib/utils";

export type TimelineSurfaceBackground = "tint" | "transparent" | "error";

export interface TimelineSurfaceProps {
  children: React.ReactNode;
  className?: string;
  isHover?: boolean;
  roundedTop?: boolean;
  roundedBottom?: boolean;
  background?: TimelineSurfaceBackground;
}

/**
 * TimelineSurface provides the shared background + rounded corners for a row.
 * Use it to keep hover and tint behavior consistent across timeline items.
 */
export function TimelineSurface({
  children,
  className,
  isHover = false,
  roundedTop = false,
  roundedBottom = false,
  background = "tint",
}: TimelineSurfaceProps) {
  if (React.Children.count(children) === 0) {
    return null;
  }

  const baseBackground =
    background === "tint"
      ? "bg-background-tint-00"
      : background === "error"
        ? "bg-status-error-00"
        : "";
  const hoverBackground =
    (background === "tint" || background === "error") && isHover
      ? "bg-background-tint-02"
      : "";

  return (
    <div
      className={cn(
        "transition-colors duration-200",
        baseBackground,
        hoverBackground,
        roundedTop && "rounded-t-12",
        roundedBottom && "rounded-b-12",
        className
      )}
    >
      {children}
    </div>
  );
}

export default TimelineSurface;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/primitives/TimelineTopSpacer.tsx
================================================
import React from "react";
import { TimelineTopSpacerVariant } from "./tokens";

export interface TimelineTopSpacerProps {
  variant?: TimelineTopSpacerVariant;
}

/**
 * TimelineTopSpacer creates vertical spacing at the top of a step's content.
 * It mirrors connector spacing when the connector is part of layout flow.
 */
export function TimelineTopSpacer({
  variant = "default",
}: TimelineTopSpacerProps) {
  if (variant === "none") {
    return null;
  }

  if (variant === "first") {
    return <div className="h-[var(--timeline-first-top-spacer-height)]" />;
  }

  return <div className="h-[var(--timeline-top-connector-height)]" />;
}

export default TimelineTopSpacer;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/primitives/tokens.ts
================================================
import React from "react";

/**
 * TimelineTokens define the shared layout contract for timeline primitives.
 * Values are applied as CSS variables via TimelineRoot.
 */
export interface TimelineTokens {
  railWidth: string;
  headerRowHeight: string;
  stepHeaderHeight: string;
  topConnectorHeight: string;
  firstTopSpacerHeight: string;
  iconSize: string;
  branchIconWrapperSize: string;
  branchIconSize: string;
  stepHeaderRightSectionWidth: string;
  headerPaddingLeft: string;
  headerPaddingRight: string;
  headerTextPaddingX: string;
  headerTextPaddingY: string;
  stepTopPadding: string;
  agentMessagePaddingLeft: string;
  timelineCommonTextPadding: string;
}

/**
 * Controls the top spacer inside TimelineStepContent.
 * - default: reserve space equal to the top connector height.
 * - first: smaller spacer used for the first step.
 * - none: no spacer (use when connector is drawn outside layout flow).
 */
export type TimelineTopSpacerVariant = "default" | "first" | "none";

/**
 * Default sizing for the timeline layout. Override in TimelineRoot if needed.
 */
export const timelineTokenDefaults: TimelineTokens = {
  railWidth: "2.25rem",
  headerRowHeight: "2.25rem",
  stepHeaderHeight: "2rem",
  topConnectorHeight: "0.5rem",
  firstTopSpacerHeight: "0.25rem",
  iconSize: "0.75rem",
  branchIconWrapperSize: "1.25rem",
  branchIconSize: "0.75rem",
  stepHeaderRightSectionWidth: "2.125rem",
  headerPaddingLeft: "0.5rem",
  headerPaddingRight: "0.25rem",
  headerTextPaddingX: "0.375rem",
  headerTextPaddingY: "0.125rem",
  stepTopPadding: "0.25rem",
  agentMessagePaddingLeft: "0.12rem",
  timelineCommonTextPadding: "0.12rem",
};

/**
 * Returns CSS variables for timeline layout based on defaults + overrides.
 */
export function getTimelineStyles(
  tokens?: Partial<TimelineTokens>
): React.CSSProperties {
  const merged: TimelineTokens = { ...timelineTokenDefaults, ...tokens };
  return {
    "--timeline-rail-width": merged.railWidth,
    "--timeline-header-row-height": merged.headerRowHeight,
    "--timeline-step-header-height": merged.stepHeaderHeight,
    "--timeline-top-connector-height": merged.topConnectorHeight,
    "--timeline-first-top-spacer-height": merged.firstTopSpacerHeight,
    "--timeline-icon-size": merged.iconSize,
    "--timeline-branch-icon-wrapper-size": merged.branchIconWrapperSize,
    "--timeline-branch-icon-size": merged.branchIconSize,
    "--timeline-step-header-right-section-width":
      merged.stepHeaderRightSectionWidth,
    "--timeline-header-padding-left": merged.headerPaddingLeft,
    "--timeline-header-padding-right": merged.headerPaddingRight,
    "--timeline-header-text-padding-x": merged.headerTextPaddingX,
    "--timeline-header-text-padding-y": merged.headerTextPaddingY,
    "--timeline-step-top-padding": merged.stepTopPadding,
    "--timeline-agent-message-padding-left": merged.agentMessagePaddingLeft,
    "--timeline-common-text-padding": merged.timelineCommonTextPadding,
  } as React.CSSProperties;
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/code/PythonToolRenderer.tsx
================================================
import { useEffect, useMemo } from "react";
import {
  PacketType,
  PythonToolPacket,
  PythonToolStart,
  PythonToolDelta,
  ToolCallArgumentDelta,
  SectionEnd,
  CODE_INTERPRETER_TOOL_TYPES,
} from "@/app/app/services/streamingModels";
import {
  MessageRenderer,
  RenderType,
} from "@/app/app/message/messageComponents/interfaces";
import { CodeBlock } from "@/app/app/message/CodeBlock";
import hljs from "highlight.js/lib/core";
import python from "highlight.js/lib/languages/python";
import { SvgTerminal } from "@opal/icons";
import FadingEdgeContainer from "@/refresh-components/FadingEdgeContainer";

// Register Python language for highlighting
hljs.registerLanguage("python", python);

// Component to render syntax-highlighted Python code
function HighlightedPythonCode({ code }: { code: string }) {
  const highlightedHtml = useMemo(() => {
    try {
      return hljs.highlight(code, { language: "python" }).value;
    } catch {
      return code;
    }
  }, [code]);

  return (
    <span
      dangerouslySetInnerHTML={{ __html: highlightedHtml }}
      className="hljs"
    />
  );
}

// Helper function to construct current Python execution state
function constructCurrentPythonState(packets: PythonToolPacket[]) {
  // Accumulate streaming code from argument deltas (arrives before PythonToolStart)
  const streamingCode = packets
    .filter(
      (packet) =>
        packet.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&
        (packet.obj as ToolCallArgumentDelta).tool_type ===
          CODE_INTERPRETER_TOOL_TYPES.PYTHON
    )
    .map((packet) =>
      String((packet.obj as ToolCallArgumentDelta).argument_deltas.code ?? "")
    )
    .join("");
  const pythonStart = packets.find(
    (packet) => packet.obj.type === PacketType.PYTHON_TOOL_START
  )?.obj as PythonToolStart | null;
  const pythonDeltas = packets
    .filter((packet) => packet.obj.type === PacketType.PYTHON_TOOL_DELTA)
    .map((packet) => packet.obj as PythonToolDelta);
  const pythonEnd = packets.find(
    (packet) =>
      packet.obj.type === PacketType.SECTION_END ||
      packet.obj.type === PacketType.ERROR
  )?.obj as SectionEnd | null;

  // Use complete code from PythonToolStart if available, else use streamed code.
  const code = pythonStart?.code || streamingCode;
  const stdout = pythonDeltas
    .map((delta) => delta?.stdout || "")
    .filter((s) => s)
    .join("");
  const stderr = pythonDeltas
    .map((delta) => delta?.stderr || "")
    .filter((s) => s)
    .join("");
  const fileIds = pythonDeltas.flatMap((delta) => delta?.file_ids || []);
  const isStreaming = !pythonStart && streamingCode.length > 0;
  const isExecuting = pythonStart && !pythonEnd;
  const isComplete = pythonStart && pythonEnd;
  const hasError = stderr.length > 0;

  return {
    code,
    stdout,
    stderr,
    fileIds,
    isStreaming,
    isExecuting,
    isComplete,
    hasError,
  };
}

export const PythonToolRenderer: MessageRenderer<PythonToolPacket, {}> = ({
  packets,
  onComplete,
  renderType,
  children,
}) => {
  const {
    code,
    stdout,
    stderr,
    fileIds,
    isStreaming,
    isExecuting,
    isComplete,
    hasError,
  } = constructCurrentPythonState(packets);

  useEffect(() => {
    if (isComplete) {
      onComplete();
    }
  }, [isComplete, onComplete]);

  const status = useMemo(() => {
    if (isStreaming) {
      return "Writing code...";
    }
    if (isExecuting) {
      return "Executing Python code...";
    }
    if (hasError) {
      return "Python execution failed";
    }
    if (isComplete) {
      return "Python execution completed";
    }
    return "Python execution";
  }, [isStreaming, isComplete, isExecuting, hasError]);

  // Shared content for all states - used by both FULL and compact modes
  const content = (
    <div className="flex flex-col mb-1 space-y-2">
      {/* Loading indicator when streaming or executing */}
      {(isStreaming || isExecuting) && (
        <div className="flex items-center gap-2 text-sm text-muted-foreground">
          <div className="flex gap-0.5">
            <div className="w-1 h-1 bg-current rounded-full animate-pulse"></div>
            <div
              className="w-1 h-1 bg-current rounded-full animate-pulse"
              style={{ animationDelay: "0.1s" }}
            ></div>
            <div
              className="w-1 h-1 bg-current rounded-full animate-pulse"
              style={{ animationDelay: "0.2s" }}
            ></div>
          </div>
          <span>{isStreaming ? "Writing code..." : "Running code..."}</span>
        </div>
      )}

      {/* Code block */}
      {code && (
        <div className="prose max-w-full">
          <CodeBlock className="language-python" codeText={code.trim()}>
            <HighlightedPythonCode code={code.trim()} />
          </CodeBlock>
        </div>
      )}

      {/* Output */}
      {stdout && (
        <div className="rounded-md bg-background-neutral-02 p-3">
          <div className="text-xs font-semibold mb-1 text-text-03">Output:</div>
          <pre className="text-sm whitespace-pre-wrap font-mono text-text-01 overflow-x-auto">
            {stdout}
          </pre>
        </div>
      )}

      {/* Error */}
      {stderr && (
        <div className="rounded-md bg-status-error-01 p-3 border border-status-error-02">
          <div className="text-xs font-semibold mb-1 text-status-error-05">
            Error:
          </div>
          <pre className="text-sm whitespace-pre-wrap font-mono text-status-error-05 overflow-x-auto">
            {stderr}
          </pre>
        </div>
      )}

      {/* File count */}
      {fileIds.length > 0 && (
        <div className="text-sm text-text-03">
          Generated {fileIds.length} file{fileIds.length !== 1 ? "s" : ""}
        </div>
      )}

      {/* No output fallback - only when complete with no output */}
      {isComplete && !stdout && !stderr && (
        <div className="py-2 text-center text-text-04">
          <SvgTerminal className="w-4 h-4 mx-auto mb-1 opacity-50" />
          <p className="text-xs">No output</p>
        </div>
      )}
    </div>
  );

  // FULL mode: render content directly
  if (renderType === RenderType.FULL) {
    return children([
      {
        icon: SvgTerminal,
        status,
        content,
        supportsCollapsible: true,
        alwaysCollapsible: true,
      },
    ]);
  }

  // Compact mode: wrap content in FadeDiv
  return children([
    {
      icon: SvgTerminal,
      status,
      supportsCollapsible: true,
      alwaysCollapsible: true,
      content: (
        <FadingEdgeContainer
          direction="bottom"
          className="max-h-24 overflow-hidden"
        >
          {content}
        </FadingEdgeContainer>
      ),
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/deepresearch/DeepResearchPlanRenderer.tsx
================================================
import React, { useCallback, useMemo } from "react";
import { SvgCircle } from "@opal/icons";

import {
  DeepResearchPlanPacket,
  PacketType,
} from "@/app/app/services/streamingModels";
import {
  MessageRenderer,
  FullChatState,
} from "@/app/app/message/messageComponents/interfaces";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import ExpandableTextDisplay from "@/refresh-components/texts/ExpandableTextDisplay";
import {
  mutedTextMarkdownComponents,
  collapsedMarkdownComponents,
} from "@/app/app/message/messageComponents/timeline/renderers/sharedMarkdownComponents";

/**
 * Renderer for deep research plan packets.
 * Streams the research plan content with a list icon.
 */
export const DeepResearchPlanRenderer: MessageRenderer<
  DeepResearchPlanPacket,
  FullChatState
> = ({ packets, stopPacketSeen, children }) => {
  const isComplete = packets.some((p) => p.obj.type === PacketType.SECTION_END);

  const fullContent = useMemo(
    () =>
      packets
        .map((packet) => {
          if (packet.obj.type === PacketType.DEEP_RESEARCH_PLAN_DELTA) {
            return packet.obj.content;
          }
          return "";
        })
        .join(""),
    [packets]
  );

  const statusText = isComplete ? "Generated plan" : "Generating plan";

  // Markdown renderer callback for ExpandableTextDisplay
  // Uses collapsed components (no spacing) in collapsed view, normal spacing in expanded modal
  const renderMarkdown = useCallback(
    (text: string, isExpanded: boolean) => (
      <MinimalMarkdown
        content={text}
        components={
          isExpanded ? mutedTextMarkdownComponents : collapsedMarkdownComponents
        }
      />
    ),
    []
  );

  const planContent = (
    <ExpandableTextDisplay
      title="Research Plan"
      content={fullContent}
      renderContent={renderMarkdown}
      isStreaming={!isComplete}
    />
  );

  return children([
    {
      icon: SvgCircle,
      status: statusText,
      content: planContent,
      noPaddingRight: true,
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/deepresearch/ResearchAgentRenderer.tsx
================================================
import React, { useMemo, useCallback } from "react";
import { SvgCircle, SvgCheckCircle, SvgBookOpen } from "@opal/icons";

import {
  PacketType,
  Packet,
  ResearchAgentPacket,
  ResearchAgentStart,
  IntermediateReportDelta,
} from "@/app/app/services/streamingModels";
import {
  MessageRenderer,
  FullChatState,
  RenderType,
} from "@/app/app/message/messageComponents/interfaces";
import { getToolName } from "@/app/app/message/messageComponents/toolDisplayHelpers";
import { StepContainer } from "@/app/app/message/messageComponents/timeline/StepContainer";
import {
  TimelineRendererComponent,
  TimelineRendererOutput,
} from "@/app/app/message/messageComponents/timeline/TimelineRendererComponent";
import { TimelineStepComposer } from "@/app/app/message/messageComponents/timeline/TimelineStepComposer";
import ExpandableTextDisplay from "@/refresh-components/texts/ExpandableTextDisplay";
import Text from "@/refresh-components/texts/Text";
import {
  processContent,
  useMarkdownComponents,
  renderMarkdown,
} from "@/app/app/message/messageComponents/markdownUtils";

interface NestedToolGroup {
  sub_turn_index: number;
  toolType: string;
  status: string;
  isComplete: boolean;
  packets: Packet[];
}

/**
 * ResearchAgentRenderer - Renders research agent steps in deep research
 *
 * Segregates packets by tool and uses StepContainer + TimelineRendererComponent.
 *
 * RenderType modes:
 * - FULL: Shows all nested tool groups, research task, and report. Headers passed as `status` prop.
 *         Used when step is expanded in timeline.
 * - COMPACT: Shows only the latest active item (tool or report). Header passed as `status` prop.
 *            Used when step is collapsed in timeline, still wrapped in StepContainer.
 * - HIGHLIGHT: Shows only the latest active item with header embedded directly in content.
 *              No StepContainer wrapper. Used for parallel streaming preview.
 *              Nested tools are rendered with HIGHLIGHT mode recursively.
 */
export const ResearchAgentRenderer: MessageRenderer<
  ResearchAgentPacket,
  FullChatState
> = ({
  packets,
  state,
  onComplete,
  renderType,
  stopPacketSeen,
  isLastStep = true,
  isHover = false,
  children,
}) => {
  // Extract the research task from the start packet
  const startPacket = packets.find(
    (p) => p.obj.type === PacketType.RESEARCH_AGENT_START
  );
  const researchTask = startPacket
    ? (startPacket.obj as ResearchAgentStart).research_task
    : "";

  // Separate parent packets from nested tool packets
  const { parentPackets, nestedToolGroups } = useMemo(() => {
    const parent: Packet[] = [];
    const nestedBySubTurn = new Map<number, Packet[]>();

    packets.forEach((packet) => {
      const subTurnIndex = packet.placement.sub_turn_index;
      if (subTurnIndex === undefined || subTurnIndex === null) {
        parent.push(packet);
      } else {
        if (!nestedBySubTurn.has(subTurnIndex)) {
          nestedBySubTurn.set(subTurnIndex, []);
        }
        nestedBySubTurn.get(subTurnIndex)!.push(packet);
      }
    });

    // Convert nested packets to groups with metadata
    const groups: NestedToolGroup[] = Array.from(nestedBySubTurn.entries())
      .sort(([a], [b]) => a - b)
      .map(([subTurnIndex, toolPackets]) => {
        const name = getToolName(toolPackets);
        const isComplete = toolPackets.some(
          (p) =>
            p.obj.type === PacketType.SECTION_END ||
            p.obj.type === PacketType.REASONING_DONE
        );
        return {
          sub_turn_index: subTurnIndex,
          toolType: name,
          status: isComplete ? "Complete" : "Running",
          isComplete,
          packets: toolPackets,
        };
      });

    return { parentPackets: parent, nestedToolGroups: groups };
  }, [packets]);

  // Filter nested tool groups based on renderType (COMPACT and HIGHLIGHT show only latest)
  const visibleNestedToolGroups = useMemo(() => {
    if (
      (renderType !== RenderType.COMPACT &&
        renderType !== RenderType.HIGHLIGHT) ||
      nestedToolGroups.length === 0
    ) {
      return nestedToolGroups;
    }
    // COMPACT/HIGHLIGHT mode: show only the latest group (last in sorted array)
    const latestGroup = nestedToolGroups[nestedToolGroups.length - 1];
    return latestGroup ? [latestGroup] : [];
  }, [renderType, nestedToolGroups]);

  // Check completion from parent packets
  const isComplete = parentPackets.some(
    (p) => p.obj.type === PacketType.SECTION_END
  );

  // Determine if report is actively streaming
  const isReportStreaming = !isComplete && !stopPacketSeen;

  // Build report content from parent packets
  const fullReportContent = parentPackets
    .map((packet) => {
      if (packet.obj.type === PacketType.INTERMEDIATE_REPORT_DELTA) {
        return (packet.obj as IntermediateReportDelta).content;
      }
      return "";
    })
    .join("");

  // Condensed modes: show only the currently active/streaming section
  const isCompact = renderType === RenderType.COMPACT;
  const isHighlight = renderType === RenderType.HIGHLIGHT;
  const isCondensedMode = isCompact || isHighlight;
  // Report takes priority if it has content (means tools are done, report is streaming)
  const showOnlyReport =
    isCondensedMode && fullReportContent && visibleNestedToolGroups.length > 0;
  const showOnlyTools =
    isCondensedMode && !fullReportContent && visibleNestedToolGroups.length > 0;

  // Process content once for consistent markdown handling
  // This ensures code block extraction uses the same offsets as rendered content
  const processedReportContent = useMemo(
    () => processContent(fullReportContent),
    [fullReportContent]
  );

  // Get markdown components for rendering (stable across renders)
  // Uses processed content so code block extraction offsets match rendered content
  const markdownComponents = useMarkdownComponents(
    state,
    processedReportContent,
    "text-text-03 font-main-ui-body"
  );

  // Stable callbacks to avoid creating new functions on every render
  // renderReport renders the processed content
  // Uses pre-computed processedReportContent since ExpandableTextDisplay
  // passes the same fullReportContent that we processed above
  // Parameters are required by ExpandableTextDisplay interface but we use
  // the pre-processed content to ensure offsets match code block extraction
  const renderReport = useCallback(
    (_content: string, _isExpanded?: boolean) =>
      renderMarkdown(
        processedReportContent,
        markdownComponents,
        "text-text-03 font-main-ui-body"
      ),
    [processedReportContent, markdownComponents]
  );

  // HIGHLIGHT mode: return raw content with header embedded in content
  if (isHighlight) {
    if (showOnlyReport) {
      return children([
        {
          icon: null,
          status: null,
          content: (
            <div className="flex flex-col pl-[var(--timeline-common-text-padding)]">
              <Text as="p" text04 mainUiMuted className="mb-1">
                Research Report
              </Text>
              <ExpandableTextDisplay
                title="Research Report"
                content={fullReportContent}
                maxLines={5}
                renderContent={renderReport}
                isStreaming={isReportStreaming}
              />
            </div>
          ),
          supportsCollapsible: true,
          timelineLayout: "content",
        },
      ]);
    }

    if (showOnlyTools) {
      const latestGroup = visibleNestedToolGroups[0];
      if (latestGroup) {
        return (
          <TimelineRendererComponent
            key={latestGroup.sub_turn_index}
            packets={latestGroup.packets}
            chatState={state}
            animate={!stopPacketSeen && !latestGroup.isComplete}
            stopPacketSeen={stopPacketSeen}
            defaultExpanded={false}
            renderTypeOverride={RenderType.HIGHLIGHT}
            isLastStep={true}
            isHover={isHover}
          >
            {(results: TimelineRendererOutput) =>
              children([
                {
                  icon: null,
                  status: null,
                  content: (
                    <>
                      {results.map((result, index) => (
                        <React.Fragment key={index}>
                          {result.content}
                        </React.Fragment>
                      ))}
                    </>
                  ),
                  supportsCollapsible: true,
                  timelineLayout: "content",
                },
              ])
            }
          </TimelineRendererComponent>
        );
      }
    }

    // Fallback: research task with header embedded
    if (researchTask) {
      return children([
        {
          icon: null,
          status: null,
          content: (
            <div className="flex flex-col pl-[var(--timeline-common-text-padding)]">
              <Text as="p" text04 mainUiMuted>
                Research Task
              </Text>
              <Text as="p" text03 mainUiMuted>
                {researchTask}
              </Text>
            </div>
          ),
          supportsCollapsible: true,
          timelineLayout: "content",
        },
      ]);
    }

    return children([
      {
        icon: null,
        status: null,
        content: <></>,
        supportsCollapsible: true,
        timelineLayout: "content",
      },
    ]);
  }

  // Build content using StepContainer pattern
  const researchAgentContent = (
    <div className="flex flex-col">
      {/* Research Task - hidden in compact mode when tools/report are active */}
      {researchTask && !showOnlyReport && !showOnlyTools && (
        <StepContainer
          stepIcon={SvgCircle}
          header="Research Task"
          collapsible={true}
          isLastStep={
            !stopPacketSeen &&
            nestedToolGroups.length === 0 &&
            !fullReportContent &&
            !isComplete
          }
          isHover={isHover}
        >
          <div className="pl-[var(--timeline-common-text-padding)]">
            <Text as="p" text02 mainUiMuted>
              {researchTask}
            </Text>
          </div>
        </StepContainer>
      )}

      {/* Nested tool calls - hidden when report is streaming in compact mode */}
      {!showOnlyReport &&
        visibleNestedToolGroups.map((group, index) => {
          const isLastNestedStep =
            !stopPacketSeen &&
            index === visibleNestedToolGroups.length - 1 &&
            !fullReportContent &&
            !isComplete;

          return (
            <TimelineRendererComponent
              key={group.sub_turn_index}
              packets={group.packets}
              chatState={state}
              animate={!stopPacketSeen && !group.isComplete}
              stopPacketSeen={stopPacketSeen}
              defaultExpanded={true}
              isLastStep={isLastNestedStep}
              isHover={isHover}
            >
              {(results: TimelineRendererOutput) => (
                <TimelineStepComposer
                  results={results}
                  isLastStep={isLastNestedStep}
                  isFirstStep={!researchTask && index === 0}
                  isSingleStep={false}
                  collapsible={true}
                />
              )}
            </TimelineRendererComponent>
          );
        })}

      {/* Intermediate report - hidden when tools are active in compact mode */}
      {fullReportContent && !showOnlyTools && (
        <StepContainer
          stepIcon={SvgBookOpen}
          header="Research Report"
          isLastStep={!stopPacketSeen && !isComplete}
          isFirstStep={!researchTask && nestedToolGroups.length === 0}
          isHover={isHover}
          noPaddingRight={true}
        >
          <div className="pl-[var(--timeline-common-text-padding)]">
            <ExpandableTextDisplay
              title="Research Report"
              content={fullReportContent}
              renderContent={renderReport}
              isStreaming={isReportStreaming}
            />
          </div>
        </StepContainer>
      )}
    </div>
  );

  // Return simplified result (no icon, no status)
  return children([
    {
      icon: null,
      status: null,
      content: researchAgentContent,
      supportsCollapsible: true,
      timelineLayout: "content",
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/fetch/FetchToolRenderer.tsx
================================================
import { FetchToolPacket } from "@/app/app/services/streamingModels";
import {
  MessageRenderer,
  RenderType,
} from "@/app/app/message/messageComponents/interfaces";
import { BlinkingBar } from "@/app/app/message/BlinkingBar";
import { OnyxDocument } from "@/lib/search/interfaces";
import { ValidSources } from "@/lib/types";
import { SearchChipList, SourceInfo } from "../search/SearchChipList";
import { getMetadataTags } from "../search/searchStateUtils";
import {
  constructCurrentFetchState,
  INITIAL_URLS_TO_SHOW,
  URLS_PER_EXPANSION,
} from "./fetchStateUtils";
import Text from "@/refresh-components/texts/Text";
import { SvgCircle } from "@opal/icons";

const urlToSourceInfo = (url: string, index: number): SourceInfo => ({
  id: `url-${index}`,
  title: url,
  sourceType: ValidSources.Web,
  sourceUrl: url,
});

const documentToSourceInfo = (doc: OnyxDocument): SourceInfo => ({
  id: doc.document_id,
  title: doc.semantic_identifier || doc.link || "",
  sourceType: doc.source_type || ValidSources.Web,
  sourceUrl: doc.link,
  description: doc.blurb,
  metadata: {
    date: doc.updated_at || undefined,
    tags: getMetadataTags(doc.metadata),
  },
});

/**
 * FetchToolRenderer - Renders URL fetch/open tool execution steps
 *
 * RenderType modes:
 * - FULL: Shows all details (URLs being opened + reading). Header passed as `status` prop.
 *         Used when step is expanded in timeline.
 * - COMPACT: Shows only reading (no URL list). Header passed as `status` prop.
 *            Used when step is collapsed in timeline, still wrapped in StepContainer.
 * - HIGHLIGHT: Shows URL list with header embedded directly in content.
 *              No StepContainer wrapper. Used for parallel streaming preview.
 */
export const FetchToolRenderer: MessageRenderer<FetchToolPacket, {}> = ({
  packets,
  onComplete,
  animate,
  stopPacketSeen,
  renderType,
  children,
}) => {
  const fetchState = constructCurrentFetchState(packets);
  const { urls, documents, hasStarted, isLoading, isComplete } = fetchState;
  const isCompact = renderType === RenderType.COMPACT;
  const isHighlight = renderType === RenderType.HIGHLIGHT;

  if (!hasStarted) {
    return children([
      {
        icon: SvgCircle,
        status: "Reading",
        content: <div />,
        supportsCollapsible: false,
        timelineLayout: "timeline",
      },
    ]);
  }

  const displayDocuments = documents.length > 0;
  const displayUrls = !displayDocuments && isComplete && urls.length > 0;

  // HIGHLIGHT mode: header embedded in content, no StepContainer
  if (isHighlight) {
    return children([
      {
        icon: null,
        status: null,
        supportsCollapsible: false,
        timelineLayout: "content",
        content: (
          <div className="flex flex-col">
            <Text as="p" text02 className="text-sm mb-1">
              Reading
            </Text>
            {displayDocuments ? (
              <SearchChipList
                items={documents}
                initialCount={INITIAL_URLS_TO_SHOW}
                expansionCount={URLS_PER_EXPANSION}
                getKey={(doc: OnyxDocument) => doc.document_id}
                toSourceInfo={(doc: OnyxDocument) => documentToSourceInfo(doc)}
                onClick={(doc: OnyxDocument) => {
                  if (doc.link) window.open(doc.link, "_blank");
                }}
                emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
              />
            ) : displayUrls ? (
              <SearchChipList
                items={urls}
                initialCount={INITIAL_URLS_TO_SHOW}
                expansionCount={URLS_PER_EXPANSION}
                getKey={(url: string) => url}
                toSourceInfo={urlToSourceInfo}
                onClick={(url: string) => window.open(url, "_blank")}
                emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
              />
            ) : (
              !stopPacketSeen && <BlinkingBar />
            )}
          </div>
        ),
      },
    ]);
  }

  return children([
    {
      icon: SvgCircle,
      status: "Reading",
      supportsCollapsible: false,
      timelineLayout: "timeline",
      content: (
        <div className="flex flex-col">
          {displayDocuments ? (
            <SearchChipList
              items={documents}
              initialCount={INITIAL_URLS_TO_SHOW}
              expansionCount={URLS_PER_EXPANSION}
              getKey={(doc: OnyxDocument) => doc.document_id}
              toSourceInfo={(doc: OnyxDocument) => documentToSourceInfo(doc)}
              onClick={(doc: OnyxDocument) => {
                if (doc.link) window.open(doc.link, "_blank");
              }}
              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
            />
          ) : displayUrls ? (
            <SearchChipList
              items={urls}
              initialCount={INITIAL_URLS_TO_SHOW}
              expansionCount={URLS_PER_EXPANSION}
              getKey={(url: string) => url}
              toSourceInfo={urlToSourceInfo}
              onClick={(url: string) => window.open(url, "_blank")}
              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
            />
          ) : (
            <div className="flex flex-wrap gap-x-2 gap-y-2 ml-1">
              {!stopPacketSeen && <BlinkingBar />}
            </div>
          )}
        </div>
      ),
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/fetch/fetchStateUtils.ts
================================================
import {
  PacketType,
  FetchToolPacket,
  FetchToolUrls,
  FetchToolDocuments,
} from "@/app/app/services/streamingModels";
import { OnyxDocument } from "@/lib/search/interfaces";

export const INITIAL_URLS_TO_SHOW = 3;
export const URLS_PER_EXPANSION = 5;
export const READING_MIN_DURATION_MS = 1000;
export const READ_MIN_DURATION_MS = 1000;

export interface FetchState {
  urls: string[];
  documents: OnyxDocument[];
  hasStarted: boolean;
  isLoading: boolean;
  isComplete: boolean;
}

/** Constructs the current fetch state from fetch tool packets. */
export const constructCurrentFetchState = (
  packets: FetchToolPacket[]
): FetchState => {
  const startPacket = packets.find(
    (packet) => packet.obj.type === PacketType.FETCH_TOOL_START
  );
  const urlsPacket = packets.find(
    (packet) => packet.obj.type === PacketType.FETCH_TOOL_URLS
  )?.obj as FetchToolUrls | undefined;
  const documentsPacket = packets.find(
    (packet) => packet.obj.type === PacketType.FETCH_TOOL_DOCUMENTS
  )?.obj as FetchToolDocuments | undefined;
  const sectionEnd = packets.find(
    (packet) =>
      packet.obj.type === PacketType.SECTION_END ||
      packet.obj.type === PacketType.ERROR
  );

  const urls = urlsPacket?.urls || [];
  const documents = documentsPacket?.documents || [];
  const hasStarted = Boolean(startPacket);
  const isLoading = hasStarted && !documentsPacket;
  const isComplete = Boolean(startPacket && sectionEnd);

  return { urls, documents, hasStarted, isLoading, isComplete };
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/filereader/FileReaderToolRenderer.tsx
================================================
import { useEffect } from "react";
import { SvgFileText } from "@opal/icons";
import {
  PacketType,
  FileReaderToolPacket,
  FileReaderResult,
} from "@/app/app/services/streamingModels";
import {
  MessageRenderer,
  RenderType,
} from "@/app/app/message/messageComponents/interfaces";
import { BlinkingBar } from "@/app/app/message/BlinkingBar";
import { Section } from "@/layouts/general-layouts";
import Card from "@/refresh-components/cards/Card";
import Text from "@/refresh-components/texts/Text";

interface FileReaderState {
  fileName: string | null;
  fileId: string | null;
  startChar: number;
  endChar: number;
  totalChars: number;
  previewStart: string;
  previewEnd: string;
  isReading: boolean;
  isComplete: boolean;
}

function constructFileReaderState(
  packets: FileReaderToolPacket[]
): FileReaderState {
  const result = packets.find(
    (p) => p.obj.type === PacketType.FILE_READER_RESULT
  )?.obj as FileReaderResult | null;

  const hasStart = packets.some(
    (p) => p.obj.type === PacketType.FILE_READER_START
  );
  const hasEnd = packets.some(
    (p) =>
      p.obj.type === PacketType.SECTION_END || p.obj.type === PacketType.ERROR
  );

  return {
    fileName: result?.file_name ?? null,
    fileId: result?.file_id ?? null,
    startChar: result?.start_char ?? 0,
    endChar: result?.end_char ?? 0,
    totalChars: result?.total_chars ?? 0,
    previewStart: result?.preview_start ?? "",
    previewEnd: result?.preview_end ?? "",
    isReading: hasStart && !hasEnd,
    isComplete: hasStart && hasEnd,
  };
}

function formatCharRange(
  startChar: number,
  endChar: number,
  totalChars: number
): string {
  return `chars ${startChar.toLocaleString()}\u2013${endChar.toLocaleString()} of ${totalChars.toLocaleString()}`;
}

export const FileReaderToolRenderer: MessageRenderer<
  FileReaderToolPacket,
  {}
> = ({ packets, onComplete, stopPacketSeen, renderType, children }) => {
  const state = constructFileReaderState(packets);

  useEffect(() => {
    if (state.isComplete) {
      onComplete();
    }
  }, [state.isComplete, onComplete]);

  const statusText = state.fileName
    ? `Read ${state.fileName} (${formatCharRange(
        state.startChar,
        state.endChar,
        state.totalChars
      )})`
    : "Reading file";

  const isCompact = renderType === RenderType.COMPACT;

  if (isCompact) {
    return children([
      {
        icon: SvgFileText,
        status: statusText,
        supportsCollapsible: true,
        timelineLayout: "timeline",
        content: <></>,
      },
    ]);
  }

  const hasPreview = state.previewStart || state.previewEnd;

  return children([
    {
      icon: SvgFileText,
      status: statusText,
      supportsCollapsible: true,
      timelineLayout: "timeline",
      content: (
        <Section gap={0.5} alignItems="start" height="fit">
          {state.fileName ? (
            <>
              <Section
                flexDirection="row"
                alignItems="center"
                justifyContent="start"
                gap={0.5}
                height="fit"
              >
                <Text as="span" mainUiAction text02>
                  {state.fileName}
                </Text>
                <Text as="span" mainUiMuted text04>
                  {formatCharRange(
                    state.startChar,
                    state.endChar,
                    state.totalChars
                  )}
                </Text>
              </Section>
              {hasPreview && (
                <Card variant="secondary" padding={0.5} gap={0.25}>
                  <Text as="span" secondaryMono text04>
                    {state.previewStart}
                    {state.previewEnd && "\u2026"}
                  </Text>
                  {state.previewEnd && (
                    <Text as="span" secondaryMono text04>
                      {"\u2026"}
                      {state.previewEnd}
                    </Text>
                  )}
                </Card>
              )}
            </>
          ) : (
            !stopPacketSeen && <BlinkingBar />
          )}
        </Section>
      ),
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/memory/MemoryToolRenderer.tsx
================================================
"use client";

import { MemoryToolPacket } from "@/app/app/services/streamingModels";
import {
  MessageRenderer,
  RenderType,
} from "@/app/app/message/messageComponents/interfaces";
import { BlinkingBar } from "@/app/app/message/BlinkingBar";
import { constructCurrentMemoryState } from "./memoryStateUtils";
import Text from "@/refresh-components/texts/Text";
import { SvgEditBig, SvgMaximize2 } from "@opal/icons";
import { cn } from "@/lib/utils";
import { Button } from "@opal/components";
import MemoriesModal from "@/refresh-components/modals/MemoriesModal";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";

/**
 * MemoryToolRenderer - Renders memory tool execution steps
 *
 * States:
 * - Loading (start, no delta): "Saving memory..." with BlinkingBar
 * - Delta received: operation label + memory text
 * - Complete (SectionEnd): "Memory saved" / "Memory updated" + memory text
 * - No Access: "Memory tool disabled"
 */
export const MemoryToolRenderer: MessageRenderer<MemoryToolPacket, {}> = ({
  packets,
  stopPacketSeen,
  renderType,
  children,
}) => {
  const memoryState = constructCurrentMemoryState(packets);
  const {
    hasStarted,
    noAccess,
    memoryText,
    operation,
    isComplete,
    memoryId,
    index,
  } = memoryState;
  const memoriesModal = useCreateModal();
  const isHighlight = renderType === RenderType.HIGHLIGHT;

  if (!hasStarted) {
    return children([
      {
        icon: SvgEditBig,
        status: "Memory",
        content: <div />,
        supportsCollapsible: false,
        timelineLayout: "timeline",
        noPaddingRight: true,
      },
    ]);
  }

  // No access case
  if (noAccess) {
    const content = (
      <Text as="p" text03 className="text-sm">
        Memory tool disabled
      </Text>
    );

    if (isHighlight) {
      return children([
        {
          icon: null,
          status: null,
          supportsCollapsible: false,
          timelineLayout: "content",
          content: (
            <div className="flex flex-col">
              <Text as="p" text02 className="text-sm mb-1">
                Memory
              </Text>
              {content}
            </div>
          ),
        },
      ]);
    }

    return children([
      {
        icon: SvgEditBig,
        status: "Memory",
        supportsCollapsible: false,
        timelineLayout: "timeline",
        noPaddingRight: true,
        content,
      },
    ]);
  }

  // Determine status text
  let statusLabel = "Updating memory";

  const memoryContent = (
    <div className="flex flex-col">
      <memoriesModal.Provider>
        <MemoriesModal
          initialTargetMemoryId={memoryId}
          initialTargetIndex={index}
          highlightOnOpen
        />
      </memoriesModal.Provider>
      {memoryText ? (
        <div className={cn("w-full flex")}>
          <div className="flex-1 min-w-0">
            <Text as="p" text02 className="text-sm break-words">
              {memoryText}
            </Text>
          </div>
          {/* Expand button */}
          <div className="flex justify-end items-end mt-1 w-8">
            <Button
              prominence="tertiary"
              size="md"
              icon={SvgMaximize2}
              tooltip="View Memories"
              onClick={(e) => {
                e.stopPropagation();
                memoriesModal.toggle(true);
              }}
            />
          </div>
        </div>
      ) : (
        !stopPacketSeen && <BlinkingBar />
      )}
    </div>
  );

  if (isHighlight) {
    return children([
      {
        icon: null,
        status: null,
        supportsCollapsible: false,
        timelineLayout: "content",
        content: (
          <div className="flex flex-col">
            <Text as="p" text02 className="text-sm mb-1">
              {statusLabel}
            </Text>
            {memoryContent}
          </div>
        ),
      },
    ]);
  }

  return children([
    {
      icon: SvgEditBig,
      status: statusLabel,
      supportsCollapsible: false,
      timelineLayout: "timeline",
      noPaddingRight: true,
      content: memoryContent,
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/memory/memoryStateUtils.ts
================================================
import {
  PacketType,
  MemoryToolPacket,
  MemoryToolDelta,
} from "@/app/app/services/streamingModels";

export interface MemoryState {
  hasStarted: boolean;
  noAccess: boolean;
  memoryText: string | null;
  operation: "add" | "update" | null;
  memoryId: number | null;
  index: number | null;
  isComplete: boolean;
}

/** Constructs the current memory state from memory tool packets. */
export function constructCurrentMemoryState(
  packets: MemoryToolPacket[]
): MemoryState {
  const startPacket = packets.find(
    (packet) => packet.obj.type === PacketType.MEMORY_TOOL_START
  );
  const noAccessPacket = packets.find(
    (packet) => packet.obj.type === PacketType.MEMORY_TOOL_NO_ACCESS
  );
  const deltaPacket = packets.find(
    (packet) => packet.obj.type === PacketType.MEMORY_TOOL_DELTA
  )?.obj as MemoryToolDelta | undefined;
  const sectionEnd = packets.find(
    (packet) =>
      packet.obj.type === PacketType.SECTION_END ||
      packet.obj.type === PacketType.ERROR
  );

  const hasStarted = Boolean(startPacket || noAccessPacket);
  const noAccess = Boolean(noAccessPacket);
  const memoryText = deltaPacket?.memory_text ?? null;
  const operation = deltaPacket?.operation ?? null;
  const memoryId = deltaPacket?.memory_id ?? null;
  const index = deltaPacket?.index ?? null;
  const isComplete = Boolean(sectionEnd);

  return {
    hasStarted,
    noAccess,
    memoryText,
    operation,
    memoryId,
    index,
    isComplete,
  };
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/reasoning/ReasoningRenderer.tsx
================================================
import React, {
  useCallback,
  useEffect,
  useMemo,
  useRef,
  useState,
} from "react";

import {
  PacketType,
  ReasoningDelta,
  ReasoningPacket,
} from "@/app/app/services/streamingModels";
import {
  MessageRenderer,
  FullChatState,
} from "@/app/app/message/messageComponents/interfaces";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import ExpandableTextDisplay from "@/refresh-components/texts/ExpandableTextDisplay";
import {
  mutedTextMarkdownComponents,
  collapsedMarkdownComponents,
} from "@/app/app/message/messageComponents/timeline/renderers/sharedMarkdownComponents";
import { SvgCircle } from "@opal/icons";

const THINKING_MIN_DURATION_MS = 500; // 0.5 second minimum for "Thinking" state

const THINKING_STATUS = "Thinking";

function extractFirstParagraph(content: string): {
  title: string | null;
  remainingContent: string;
} {
  if (!content || content.trim().length === 0) {
    return { title: null, remainingContent: content };
  }

  const trimmed = content.trim();

  // Split by double newline (paragraph break) or single newline
  const lines = trimmed.split(/\n\n|\n/);
  const firstLine = lines[0]?.trim();

  if (!firstLine) {
    return { title: null, remainingContent: content };
  }

  // Only treat as title if it's an actual markdown heading (starts with #)
  const isMarkdownHeading = /^#+\s/.test(firstLine);
  if (!isMarkdownHeading) {
    return { title: null, remainingContent: content };
  }

  // Remove markdown heading markers (# ## ### etc.)
  const cleanTitle = firstLine.replace(/^#+\s*/, "").trim();

  // Only use as title if it's reasonably short (under ~60 chars for UI fit)
  if (cleanTitle.length > 60) {
    return { title: null, remainingContent: content };
  }

  // Remove the first line from content
  const remainingContent = trimmed.slice(firstLine.length).replace(/^\n+/, "");

  return { title: cleanTitle, remainingContent };
}

function constructCurrentReasoningState(packets: ReasoningPacket[]) {
  const hasStart = packets.some(
    (p) => p.obj.type === PacketType.REASONING_START
  );
  const hasEnd = packets.some(
    (p) =>
      p.obj.type === PacketType.SECTION_END ||
      p.obj.type === PacketType.ERROR ||
      // Support reasoning_done from backend
      (p.obj as any).type === PacketType.REASONING_DONE
  );
  const deltas = packets
    .filter((p) => p.obj.type === PacketType.REASONING_DELTA)
    .map((p) => p.obj as ReasoningDelta);

  const content = deltas.map((d) => d.reasoning).join("");

  return {
    hasStart,
    hasEnd,
    content,
  };
}

export const ReasoningRenderer: MessageRenderer<
  ReasoningPacket,
  FullChatState
> = ({ packets, onComplete, animate, children }) => {
  const { hasStart, hasEnd, content } = useMemo(
    () => constructCurrentReasoningState(packets),
    [packets]
  );

  const { title, remainingContent } = useMemo(
    () => extractFirstParagraph(content),
    [content]
  );

  // Use extracted title if available, otherwise default
  const displayStatus = title || THINKING_STATUS;
  const displayContent = title ? remainingContent : content;

  // Track reasoning timing for minimum display duration
  const [reasoningStartTime, setReasoningStartTime] = useState<number | null>(
    null
  );
  const timeoutRef = useRef<NodeJS.Timeout | null>(null);
  const completionHandledRef = useRef(false);

  // Track when reasoning starts
  useEffect(() => {
    if ((hasStart || hasEnd) && reasoningStartTime === null) {
      setReasoningStartTime(Date.now());
    }
  }, [hasStart, hasEnd, reasoningStartTime]);

  // Handle reasoning completion with minimum duration
  useEffect(() => {
    if (
      hasEnd &&
      reasoningStartTime !== null &&
      !completionHandledRef.current
    ) {
      completionHandledRef.current = true;
      const elapsedTime = Date.now() - reasoningStartTime;
      const minimumThinkingDuration = animate ? THINKING_MIN_DURATION_MS : 0;

      if (elapsedTime >= minimumThinkingDuration) {
        // Enough time has passed, complete immediately
        onComplete();
      } else {
        // Not enough time has passed, delay completion
        const remainingTime = minimumThinkingDuration - elapsedTime;
        timeoutRef.current = setTimeout(() => {
          onComplete();
        }, remainingTime);
      }
    }
  }, [hasEnd, reasoningStartTime, animate, onComplete]);

  // Cleanup timeout on unmount
  useEffect(() => {
    return () => {
      if (timeoutRef.current) {
        clearTimeout(timeoutRef.current);
      }
    };
  }, []);

  // Markdown renderer callback for ExpandableTextDisplay
  // Uses collapsed components (no spacing) in collapsed view, normal spacing in expanded modal
  const renderMarkdown = useCallback(
    (text: string, isExpanded: boolean) => (
      <MinimalMarkdown
        content={text}
        components={
          isExpanded ? mutedTextMarkdownComponents : collapsedMarkdownComponents
        }
      />
    ),
    []
  );

  if (!hasStart && !hasEnd && content.length === 0) {
    return children([
      {
        icon: SvgCircle,
        status: THINKING_STATUS,
        content: <></>,
        noPaddingRight: true,
      },
    ]);
  }

  const reasoningContent = (
    <div className="pl-[var(--timeline-common-text-padding)]">
      <ExpandableTextDisplay
        title="Full text"
        content={content}
        displayContent={displayContent}
        renderContent={renderMarkdown}
        isStreaming={!hasEnd}
      />
    </div>
  );

  return children([
    {
      icon: SvgCircle,
      status: displayStatus,
      content: reasoningContent,
      expandedText: reasoningContent,
      noPaddingRight: true,
    },
  ]);
};

export default ReasoningRenderer;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/search/InternalSearchToolRenderer.tsx
================================================
import { SvgSearch, SvgSearchMenu } from "@opal/icons";
import { SearchToolPacket } from "@/app/app/services/streamingModels";
import {
  MessageRenderer,
  RenderType,
} from "@/app/app/message/messageComponents/interfaces";
import { BlinkingBar } from "@/app/app/message/BlinkingBar";
import { OnyxDocument } from "@/lib/search/interfaces";
import { ValidSources } from "@/lib/types";
import { SearchChipList, SourceInfo } from "./SearchChipList";
import {
  constructCurrentSearchState,
  INITIAL_QUERIES_TO_SHOW,
  QUERIES_PER_EXPANSION,
  INITIAL_RESULTS_TO_SHOW,
  RESULTS_PER_EXPANSION,
  getMetadataTags,
} from "./searchStateUtils";
import Text from "@/refresh-components/texts/Text";

const queryToSourceInfo = (query: string, index: number): SourceInfo => ({
  id: `query-${index}`,
  title: query,
  sourceType: ValidSources.Web,
  icon: SvgSearch,
});

const resultToSourceInfo = (doc: OnyxDocument): SourceInfo => ({
  id: doc.document_id,
  title: doc.semantic_identifier || "",
  sourceType: doc.source_type,
  sourceUrl: doc.link,
  description: doc.blurb,
  metadata: {
    date: doc.updated_at || undefined,
    tags: getMetadataTags(doc.metadata),
  },
});

/**
 * InternalSearchToolRenderer - Renders internal document search tool execution steps
 *
 * RenderType modes:
 * - FULL: Shows 1 combined timeline step (queries + results together).
 *         Used when step is expanded in timeline.
 * - COMPACT: Shows only results (no queries). Header passed as `status` prop.
 *            Used when step is collapsed in timeline, still wrapped in StepContainer.
 * - HIGHLIGHT: Shows only results with header embedded directly in content.
 *              No StepContainer wrapper. Used for parallel streaming preview.
 * - INLINE: Phase-based (queries -> results) for collapsed streaming view.
 */
export const InternalSearchToolRenderer: MessageRenderer<
  SearchToolPacket,
  {}
> = ({
  packets,
  onComplete,
  animate,
  stopPacketSeen,
  renderType,
  children,
}) => {
  const searchState = constructCurrentSearchState(packets);
  const { queries, results, isComplete } = searchState;

  const isCompact = renderType === RenderType.COMPACT;
  const isHighlight = renderType === RenderType.HIGHLIGHT;
  const isInline = renderType === RenderType.INLINE;

  const hasResults = results.length > 0;

  const queriesHeader = "Searching internal documents";

  if (queries.length === 0) {
    return children([
      {
        icon: SvgSearchMenu,
        status: queriesHeader,
        content: <></>,
        supportsCollapsible: true,
        timelineLayout: "timeline",
      },
    ]);
  }

  // HIGHLIGHT mode: header embedded in content, no StepContainer
  if (isHighlight) {
    return children([
      {
        icon: null,
        status: null,
        supportsCollapsible: true,
        timelineLayout: "content",
        content: (
          <div className="flex flex-col">
            <Text as="p" text04 mainUiMuted className="mb-1">
              {queriesHeader}
            </Text>
            <SearchChipList
              items={results}
              initialCount={INITIAL_RESULTS_TO_SHOW}
              expansionCount={RESULTS_PER_EXPANSION}
              getKey={(doc: OnyxDocument, index: number) =>
                doc.document_id ?? `result-${index}`
              }
              toSourceInfo={(doc: OnyxDocument) => resultToSourceInfo(doc)}
              onClick={(doc: OnyxDocument) => {
                if (doc.link) {
                  window.open(doc.link, "_blank", "noopener,noreferrer");
                }
              }}
              emptyState={
                !isComplete ? (
                  <BlinkingBar />
                ) : (
                  <Text as="p" text04 mainUiMuted>
                    No results found
                  </Text>
                )
              }
            />
          </div>
        ),
      },
    ]);
  }

  // INLINE mode: dynamic phase-based content for collapsed streaming view
  if (isInline) {
    // Querying phase: show queries
    if (!hasResults) {
      return children([
        {
          icon: null,
          status: queriesHeader,
          supportsCollapsible: true,
          timelineLayout: "content",
          content: (
            <SearchChipList
              items={queries}
              initialCount={INITIAL_QUERIES_TO_SHOW}
              expansionCount={QUERIES_PER_EXPANSION}
              getKey={(_, index) => index}
              toSourceInfo={queryToSourceInfo}
              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
              showDetailsCard={false}
              isQuery={true}
            />
          ),
        },
      ]);
    }

    // Reading phase: show results
    return children([
      {
        icon: null,
        status: "Reading",
        supportsCollapsible: true,
        timelineLayout: "content",
        content: (
          <SearchChipList
            items={results}
            initialCount={INITIAL_RESULTS_TO_SHOW}
            expansionCount={RESULTS_PER_EXPANSION}
            getKey={(doc: OnyxDocument, index: number) =>
              doc.document_id ?? `result-${index}`
            }
            toSourceInfo={(doc: OnyxDocument) => resultToSourceInfo(doc)}
            onClick={(doc: OnyxDocument) => {
              if (doc.link) {
                window.open(doc.link, "_blank", "noopener,noreferrer");
              }
            }}
            emptyState={
              !isComplete ? (
                <BlinkingBar />
              ) : (
                <Text as="p" text04 mainUiMuted>
                  No results found
                </Text>
              )
            }
          />
        ),
      },
    ]);
  }

  // FULL and COMPACT modes: single combined step (queries + results together)
  return children([
    {
      icon: SvgSearchMenu,
      status: queriesHeader,
      supportsCollapsible: true,
      timelineLayout: "timeline",
      content: (
        <div className="flex flex-col">
          {!isCompact && (
            <SearchChipList
              items={queries}
              initialCount={INITIAL_QUERIES_TO_SHOW}
              expansionCount={QUERIES_PER_EXPANSION}
              getKey={(_, index) => index}
              toSourceInfo={queryToSourceInfo}
              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
              showDetailsCard={false}
              isQuery={true}
            />
          )}

          {(results.length > 0 || queries.length > 0) && (
            <>
              {!isCompact && (
                <Text as="p" mainUiMuted text04>
                  Reading
                </Text>
              )}
              <SearchChipList
                items={results}
                initialCount={INITIAL_RESULTS_TO_SHOW}
                expansionCount={RESULTS_PER_EXPANSION}
                getKey={(doc: OnyxDocument, index: number) =>
                  doc.document_id ?? `result-${index}`
                }
                toSourceInfo={(doc: OnyxDocument) => resultToSourceInfo(doc)}
                onClick={(doc: OnyxDocument) => {
                  if (doc.link) {
                    window.open(doc.link, "_blank", "noopener,noreferrer");
                  }
                }}
                emptyState={
                  !isComplete ? (
                    <BlinkingBar />
                  ) : (
                    <Text as="p" text03 mainUiMuted>
                      No results found
                    </Text>
                  )
                }
              />
            </>
          )}
        </div>
      ),
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/search/SearchChipList.tsx
================================================
import React, { JSX, useState, useEffect, useRef, useMemo } from "react";
import { SourceTag, SourceInfo } from "@/refresh-components/buttons/source-tag";
import { cn } from "@/lib/utils";

export type { SourceInfo };

const ANIMATION_DELAY_MS = 30;

export interface SearchChipListProps<T> {
  items: T[];
  initialCount: number;
  expansionCount: number;
  getKey: (item: T, index: number) => string | number;
  toSourceInfo: (item: T, index: number) => SourceInfo;
  onClick?: (item: T) => void;
  emptyState?: React.ReactNode;
  className?: string;
  showDetailsCard?: boolean;
  isQuery?: boolean;
}

type DisplayEntry<T> =
  | { type: "chip"; item: T; index: number }
  | { type: "more"; batchId: number };

export function SearchChipList<T>({
  items,
  initialCount,
  expansionCount,
  getKey,
  toSourceInfo,
  onClick,
  emptyState,
  className = "",
  showDetailsCard,
  isQuery,
}: SearchChipListProps<T>): JSX.Element {
  const [visibleCount, setVisibleCount] = useState(initialCount);
  const animatedKeysRef = useRef<Set<string>>(new Set());

  const getEntryKey = (entry: DisplayEntry<T>): string => {
    if (entry.type === "more") return `more-button`;
    return String(getKey(entry.item, entry.index));
  };

  const effectiveCount = Math.min(visibleCount, items.length);

  const displayList: DisplayEntry<T>[] = useMemo(() => {
    const chips: DisplayEntry<T>[] = items
      .slice(0, effectiveCount)
      .map((item, i) => ({ type: "chip" as const, item, index: i }));

    if (effectiveCount < items.length) {
      chips.push({ type: "more", batchId: 0 });
    }
    return chips;
  }, [items, effectiveCount]);

  const chipCount = effectiveCount;
  const remainingCount = items.length - chipCount;
  const remainingItems = items.slice(chipCount);

  const handleShowMore = () => {
    setVisibleCount((prev) => prev + expansionCount);
  };

  useEffect(() => {
    const timer = setTimeout(() => {
      displayList.forEach((entry) =>
        animatedKeysRef.current.add(getEntryKey(entry))
      );
    }, 0);
    return () => clearTimeout(timer);
  }, [displayList]);

  let newItemCounter = 0;

  return (
    <div className={cn("flex flex-wrap gap-x-2 gap-y-2", className)}>
      {displayList.map((entry) => {
        const key = getEntryKey(entry);
        const isNew = !animatedKeysRef.current.has(key);
        const delay = isNew ? newItemCounter++ * ANIMATION_DELAY_MS : 0;

        return (
          <div
            key={key}
            className={cn("text-xs", {
              "animate-in fade-in slide-in-from-left-2 duration-150": isNew,
            })}
            style={
              isNew
                ? {
                    animationDelay: `${delay}ms`,
                    animationFillMode: "backwards",
                  }
                : undefined
            }
          >
            {entry.type === "chip" ? (
              <SourceTag
                displayName={toSourceInfo(entry.item, entry.index).title}
                sources={[toSourceInfo(entry.item, entry.index)]}
                onSourceClick={onClick ? () => onClick(entry.item) : undefined}
                showDetailsCard={showDetailsCard}
                isQuery={isQuery}
                tooltipText={isQuery ? "View Full Search Term" : undefined}
              />
            ) : (
              <SourceTag
                displayName={`+${remainingCount} more`}
                sources={remainingItems.map((item, i) =>
                  toSourceInfo(item, chipCount + i)
                )}
                onSourceClick={() => handleShowMore()}
                showDetailsCard={showDetailsCard}
                isQuery={isQuery}
                isMore={isQuery}
              />
            )}
          </div>
        );
      })}

      {items.length === 0 && emptyState}
    </div>
  );
}


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/search/WebSearchToolRenderer.tsx
================================================
import React from "react";
import { SvgSearch, SvgGlobe } from "@opal/icons";
import { SearchToolPacket } from "@/app/app/services/streamingModels";
import {
  MessageRenderer,
  RenderType,
} from "@/app/app/message/messageComponents/interfaces";
import { BlinkingBar } from "@/app/app/message/BlinkingBar";
import { ValidSources } from "@/lib/types";
import { SearchChipList, SourceInfo } from "./SearchChipList";
import {
  constructCurrentSearchState,
  INITIAL_QUERIES_TO_SHOW,
  QUERIES_PER_EXPANSION,
} from "./searchStateUtils";
import Text from "@/refresh-components/texts/Text";

const queryToSourceInfo = (query: string, index: number): SourceInfo => ({
  id: `query-${index}`,
  title: query,
  sourceType: ValidSources.Web,
  icon: SvgSearch,
});

/**
 * WebSearchToolRenderer - Renders web search tool execution steps
 *
 * Only shows queries - results are handled by the fetch tool.
 *
 * RenderType modes:
 * - FULL: Shows queries timeline step. Used when step is expanded in timeline.
 * - HIGHLIGHT: Shows queries with header embedded directly in content.
 *              No StepContainer wrapper. Used for parallel streaming preview.
 * - INLINE: Shows queries for collapsed streaming view.
 */
export const WebSearchToolRenderer: MessageRenderer<SearchToolPacket, {}> = ({
  packets,
  onComplete,
  animate,
  stopPacketSeen,
  renderType,
  children,
}) => {
  const searchState = constructCurrentSearchState(packets);
  const { queries } = searchState;

  const isHighlight = renderType === RenderType.HIGHLIGHT;
  const isInline = renderType === RenderType.INLINE;

  const queriesHeader = "Searching the web";

  if (queries.length === 0) {
    return children([
      {
        icon: SvgGlobe,
        status: "Searching the web",
        content: <div />,
        supportsCollapsible: false,
        timelineLayout: "timeline",
      },
    ]);
  }

  // HIGHLIGHT mode: header embedded in content, no StepContainer
  if (isHighlight) {
    return children([
      {
        icon: null,
        status: null,
        supportsCollapsible: false,
        timelineLayout: "content",
        content: (
          <div className="flex flex-col">
            <Text as="p" text04 mainUiMuted className="mb-1">
              {queriesHeader}
            </Text>
            <SearchChipList
              items={queries}
              initialCount={INITIAL_QUERIES_TO_SHOW}
              expansionCount={QUERIES_PER_EXPANSION}
              getKey={(_, index) => index}
              toSourceInfo={queryToSourceInfo}
              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
              showDetailsCard={false}
              isQuery={true}
            />
          </div>
        ),
      },
    ]);
  }

  // INLINE mode: show queries for collapsed streaming view
  if (isInline) {
    return children([
      {
        icon: null,
        status: queriesHeader,
        supportsCollapsible: false,
        timelineLayout: "content",
        content: (
          <SearchChipList
            items={queries}
            initialCount={INITIAL_QUERIES_TO_SHOW}
            expansionCount={QUERIES_PER_EXPANSION}
            getKey={(_, index) => index}
            toSourceInfo={queryToSourceInfo}
            emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
            showDetailsCard={false}
            isQuery={true}
          />
        ),
      },
    ]);
  }

  // FULL mode: return queries timeline step
  return children([
    {
      icon: SvgGlobe,
      status: "Searching the web",
      content: (
        <SearchChipList
          items={queries}
          initialCount={INITIAL_QUERIES_TO_SHOW}
          expansionCount={QUERIES_PER_EXPANSION}
          getKey={(_, index) => index}
          toSourceInfo={queryToSourceInfo}
          emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
          showDetailsCard={false}
          isQuery={true}
        />
      ),
      supportsCollapsible: false,
      timelineLayout: "timeline",
    },
  ]);
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/search/searchStateUtils.ts
================================================
import {
  PacketType,
  SearchToolPacket,
  SearchToolStart,
  SearchToolQueriesDelta,
  SearchToolDocumentsDelta,
  SectionEnd,
} from "@/app/app/services/streamingModels";
import { OnyxDocument } from "@/lib/search/interfaces";

export const MAX_TITLE_LENGTH = 25;

export const getMetadataTags = (metadata?: {
  [key: string]: string;
}): string[] | undefined => {
  if (!metadata) return undefined;
  const tags = Object.values(metadata)
    .filter((value) => typeof value === "string" && value.length > 0)
    .slice(0, 2)
    .map((value) => `# ${value}`);
  return tags.length > 0 ? tags : undefined;
};

export const INITIAL_QUERIES_TO_SHOW = 3;
export const QUERIES_PER_EXPANSION = 5;
export const INITIAL_RESULTS_TO_SHOW = 3;
export const RESULTS_PER_EXPANSION = 10;

export interface SearchState {
  queries: string[];
  results: OnyxDocument[];
  isSearching: boolean;
  hasResults: boolean;
  isComplete: boolean;
  isInternetSearch: boolean;
}

/** Constructs the current search state from search tool packets. */
export const constructCurrentSearchState = (
  packets: SearchToolPacket[]
): SearchState => {
  const searchStart = packets.find(
    (packet) => packet.obj.type === PacketType.SEARCH_TOOL_START
  )?.obj as SearchToolStart | null;

  const queryDeltas = packets
    .filter(
      (packet) => packet.obj.type === PacketType.SEARCH_TOOL_QUERIES_DELTA
    )
    .map((packet) => packet.obj as SearchToolQueriesDelta);

  const documentDeltas = packets
    .filter(
      (packet) => packet.obj.type === PacketType.SEARCH_TOOL_DOCUMENTS_DELTA
    )
    .map((packet) => packet.obj as SearchToolDocumentsDelta);

  const searchEnd = packets.find(
    (packet) =>
      packet.obj.type === PacketType.SECTION_END ||
      packet.obj.type === PacketType.ERROR
  )?.obj as SectionEnd | null;

  // Deduplicate queries using Set for O(n) instead of indexOf which is O(n²)
  const seenQueries = new Set<string>();
  const queries = queryDeltas
    .flatMap((delta) => delta?.queries || [])
    .filter((query) => {
      if (seenQueries.has(query)) return false;
      seenQueries.add(query);
      return true;
    });

  const seenDocIds = new Set<string>();
  const results = documentDeltas
    .flatMap((delta) => delta?.documents || [])
    .filter((doc) => {
      if (!doc || !doc.document_id) return false;
      if (seenDocIds.has(doc.document_id)) return false;
      seenDocIds.add(doc.document_id);
      return true;
    });

  const isSearching = Boolean(searchStart && !searchEnd);
  const hasResults = results.length > 0;
  const isComplete = Boolean(searchStart && searchEnd);
  const isInternetSearch = searchStart?.is_internet_search || false;

  return {
    queries,
    results,
    isSearching,
    hasResults,
    isComplete,
    isInternetSearch,
  };
};


================================================
FILE: web/src/app/app/message/messageComponents/timeline/renderers/sharedMarkdownComponents.tsx
================================================
import type { Components } from "react-markdown";
import Text from "@/refresh-components/texts/Text";

// Expanded view: normal spacing between paragraphs/lists
export const mutedTextMarkdownComponents = {
  p: ({ children }: { children?: React.ReactNode }) => (
    <Text as="p" text03 mainUiMuted className="!my-1">
      {children}
    </Text>
  ),
  li: ({ children }: { children?: React.ReactNode }) => (
    <Text as="li" text03 mainUiMuted className="!my-0 !py-0 leading-normal">
      {children}
    </Text>
  ),
  ul: ({ children }: { children?: React.ReactNode }) => (
    <ul className="!pl-0 !ml-0 !my-0.5 list-inside">{children}</ul>
  ),
  ol: ({ children }: { children?: React.ReactNode }) => (
    <ol className="!pl-0 !ml-0 !my-0.5 list-inside">{children}</ol>
  ),
  a: ({ children, href }: { children?: React.ReactNode; href?: string }) => (
    <a
      href={href}
      className="text-text-03 mainUiMuted underline"
      target="_blank"
      rel="noopener noreferrer"
    >
      {children}
    </a>
  ),
} satisfies Partial<Components>;

// Collapsed view: no spacing for compact display
export const collapsedMarkdownComponents = {
  p: ({ children }: { children?: React.ReactNode }) => (
    <Text as="p" text03 mainUiMuted className="!my-0">
      {children}
    </Text>
  ),
  li: ({ children }: { children?: React.ReactNode }) => (
    <Text as="li" text03 mainUiMuted className="!my-0 !py-0 leading-normal">
      {children}
    </Text>
  ),
  ul: ({ children }: { children?: React.ReactNode }) => (
    <ul className="!pl-0 !ml-0 !my-0 list-inside">{children}</ul>
  ),
  ol: ({ children }: { children?: React.ReactNode }) => (
    <ol className="!pl-0 !ml-0 !my-0 list-inside">{children}</ol>
  ),
  a: ({ children, href }: { children?: React.ReactNode; href?: string }) => (
    <a
      href={href}
      className="text-text-03 mainUiMuted underline"
      target="_blank"
      rel="noopener noreferrer"
    >
      {children}
    </a>
  ),
} satisfies Partial<Components>;


================================================
FILE: web/src/app/app/message/messageComponents/timeline/transformers.ts
================================================
import { GroupedPacket } from "./hooks/packetProcessor";

/**
 * Transformed step data ready for rendering
 */
export interface TransformedStep {
  /** Unique key for React rendering */
  key: string;
  /** Turn index from packet placement */
  turnIndex: number;
  /** Tab index for parallel tools */
  tabIndex: number;
  /** Raw packets for content rendering */
  packets: GroupedPacket["packets"];
}

/**
 * Group steps by turn_index for detecting parallel tools
 */
export interface TurnGroup {
  turnIndex: number;
  steps: TransformedStep[];
  /** True if multiple steps have the same turn_index (parallel execution) */
  isParallel: boolean;
}

/**
 * Transform a single GroupedPacket into step data
 */
export function transformPacketGroup(group: GroupedPacket): TransformedStep {
  return {
    key: `${group.turn_index}-${group.tab_index}`,
    turnIndex: group.turn_index,
    tabIndex: group.tab_index,
    packets: group.packets,
  };
}

/**
 * Transform all packet groups into step data
 */
export function transformPacketGroups(
  groups: GroupedPacket[]
): TransformedStep[] {
  return groups.map(transformPacketGroup);
}

/**
 * Group transformed steps by turn_index to detect parallel tools
 *
 * @example
 * // Input: TransformedStep[]
 * // ┌──────────────────────────────────────────┐
 * // │ [0] key="0-0" turnIndex=0 tabIndex=0     │
 * // │ [1] key="0-1" turnIndex=0 tabIndex=1     │
 * // │ [2] key="1-0" turnIndex=1 tabIndex=0     │
 * // └──────────────────────────────────────────┘
 * //
 * // Step 1: Build Map<turnIndex, TransformedStep[]>
 * // ┌─────────────────────────────────────────────┐
 * // │ turnMap = {                                 │
 * // │   0 → [step(0-0), step(0-1)]               │
 * // │   1 → [step(1-0)]                          │
 * // │ }                                          │
 * // └─────────────────────────────────────────────┘
 * //
 * // Step 2: Sort turn indices & steps by tabIndex
 * //
 * // Step 3: Build TurnGroup[] with isParallel flag
 * // ┌─────────────────────────────────────────────┐
 * // │ Output: TurnGroup[]                         │
 * // ├─────────────────────────────────────────────┤
 * // │ [0] turnIndex=0                             │
 * // │     steps=[0-0, 0-1]                        │
 * // │     isParallel=true  ← 2 steps = parallel   │
 * // │                                             │
 * // │ [1] turnIndex=1                             │
 * // │     steps=[1-0]                             │
 * // │     isParallel=false ← 1 step = sequential  │
 * // └─────────────────────────────────────────────┘
 */
export function groupStepsByTurn(steps: TransformedStep[]): TurnGroup[] {
  const turnMap = new Map<number, TransformedStep[]>();

  for (const step of steps) {
    const existing = turnMap.get(step.turnIndex);
    if (existing) {
      existing.push(step);
    } else {
      turnMap.set(step.turnIndex, [step]);
    }
  }

  const result: TurnGroup[] = [];
  const sortedTurnIndices = Array.from(turnMap.keys()).sort((a, b) => a - b);

  for (const turnIndex of sortedTurnIndices) {
    const stepsForTurn = turnMap.get(turnIndex)!;
    stepsForTurn.sort((a, b) => a.tabIndex - b.tabIndex);

    result.push({
      turnIndex,
      steps: stepsForTurn,
      isParallel: stepsForTurn.length > 1,
    });
  }

  return result;
}


================================================
FILE: web/src/app/app/message/messageComponents/timing.ts
================================================
import { MutableRefObject } from "react";

/**
 * Clears multiple timeout refs and optionally resets them to null.
 * Returns true if any timeout was cleared.
 */
export function clearTimeoutRefs(
  timeoutRefs: Array<MutableRefObject<NodeJS.Timeout | null>>,
  resetToNull: boolean = false
): boolean {
  let hadPendingTimeout = false;

  for (const ref of timeoutRefs) {
    if (ref.current) {
      clearTimeout(ref.current);
      hadPendingTimeout = true;
      if (resetToNull) {
        ref.current = null;
      }
    }
  }

  return hadPendingTimeout;
}


================================================
FILE: web/src/app/app/message/messageComponents/toolDisplayHelpers.tsx
================================================
import { JSX } from "react";
import { FiCircle, FiList, FiTool, FiXCircle } from "react-icons/fi";
import { BrainIcon } from "@/components/icons/icons";

import {
  Packet,
  PacketType,
  SearchToolPacket,
} from "@/app/app/services/streamingModels";
import { constructCurrentSearchState } from "./timeline/renderers/search/searchStateUtils";
import {
  SvgGlobe,
  SvgSearchMenu,
  SvgTerminal,
  SvgLink,
  SvgImage,
  SvgUser,
  SvgCircle,
  SvgBookOpen,
} from "@opal/icons";

/**
 * Check if a packet group contains an ERROR packet (tool failed)
 */
export function hasToolError(packets: Packet[]): boolean {
  return packets.some((p) => p.obj.type === PacketType.ERROR);
}

/**
 * Check if a tool group is complete.
 * For research agents, we only look at parent-level SECTION_END packets (sub_turn_index is undefined/null),
 * not the SECTION_END packets from nested tools (which have sub_turn_index as a number).
 */
export function isToolComplete(packets: Packet[]): boolean {
  const firstPacket = packets[0];
  if (!firstPacket) return false;

  // For research agents, only parent-level SECTION_END indicates completion
  // Nested tools (search, fetch, etc.) within the research agent have sub_turn_index set
  if (firstPacket.obj.type === PacketType.RESEARCH_AGENT_START) {
    return packets.some(
      (p) =>
        (p.obj.type === PacketType.SECTION_END ||
          p.obj.type === PacketType.ERROR) &&
        (p.placement.sub_turn_index === undefined ||
          p.placement.sub_turn_index === null)
    );
  }

  // For other tools, any SECTION_END or ERROR indicates completion
  return packets.some(
    (p) =>
      p.obj.type === PacketType.SECTION_END || p.obj.type === PacketType.ERROR
  );
}

/**
 * Get an error icon for failed tools
 */
export function getToolErrorIcon(): JSX.Element {
  return <FiXCircle className="w-3.5 h-3.5 text-error" />;
}

export function getToolKey(turn_index: number, tab_index: number): string {
  return `${turn_index}-${tab_index}`;
}

export function parseToolKey(key: string): {
  turn_index: number;
  tab_index: number;
} {
  const parts = key.split("-");
  return {
    turn_index: parseInt(parts[0] ?? "0", 10),
    tab_index: parseInt(parts[1] ?? "0", 10),
  };
}

export function getToolName(packets: Packet[]): string {
  const firstPacket = packets[0];
  if (!firstPacket) return "Tool";

  switch (firstPacket.obj.type) {
    case PacketType.SEARCH_TOOL_START: {
      const searchState = constructCurrentSearchState(
        packets as SearchToolPacket[]
      );
      return searchState.isInternetSearch ? "Web Search" : "Internal Search";
    }
    case PacketType.PYTHON_TOOL_START:
      return "Code Interpreter";
    case PacketType.FETCH_TOOL_START:
      return "Open URLs";
    case PacketType.CUSTOM_TOOL_START:
      return (
        (firstPacket.obj as { tool_name?: string }).tool_name || "Custom Tool"
      );
    case PacketType.IMAGE_GENERATION_TOOL_START:
      return "Generate Image";
    case PacketType.DEEP_RESEARCH_PLAN_START:
      return "Generate plan";
    case PacketType.RESEARCH_AGENT_START:
      return "Research agent";
    case PacketType.REASONING_START:
      return "Thinking";
    case PacketType.MEMORY_TOOL_START:
    case PacketType.MEMORY_TOOL_NO_ACCESS:
      return "Memory";
    default:
      return "Tool";
  }
}

export function getToolIcon(packets: Packet[]): JSX.Element {
  const firstPacket = packets[0];
  if (!firstPacket) return <FiCircle className="w-3.5 h-3.5" />;

  switch (firstPacket.obj.type) {
    case PacketType.SEARCH_TOOL_START: {
      const searchState = constructCurrentSearchState(
        packets as SearchToolPacket[]
      );
      return searchState.isInternetSearch ? (
        <SvgGlobe className="w-3.5 h-3.5" />
      ) : (
        <SvgSearchMenu className="w-3.5 h-3.5" />
      );
    }
    case PacketType.PYTHON_TOOL_START:
      return <SvgTerminal className="w-3.5 h-3.5" />;
    case PacketType.FETCH_TOOL_START:
      return <SvgLink className="w-3.5 h-3.5" />;
    case PacketType.CUSTOM_TOOL_START:
      return <FiTool className="w-3.5 h-3.5" />;
    case PacketType.IMAGE_GENERATION_TOOL_START:
      return <SvgImage className="w-3.5 h-3.5" />;
    case PacketType.DEEP_RESEARCH_PLAN_START:
      return <FiList className="w-3.5 h-3.5" />;
    case PacketType.RESEARCH_AGENT_START:
      return <SvgUser className="w-3.5 h-3.5" />;
    case PacketType.REASONING_START:
      return <BrainIcon className="w-3.5 h-3.5" />;
    case PacketType.MEMORY_TOOL_START:
    case PacketType.MEMORY_TOOL_NO_ACCESS:
      return <SvgBookOpen className="w-3.5 h-3.5" />;
    default:
      return <SvgCircle className="w-3.5 h-3.5" />;
  }
}


================================================
FILE: web/src/app/app/message/thinkingBox/ThinkingBox.css
================================================
/* ThinkingBox.css */

/* Apply transition to dark mode as well to ensure smooth color changes */
html {
  transition:
    background-color 0.2s ease-in-out,
    color 0.2s ease-in-out;
}

:root {
  --thinking-border-color: rgba(0, 0, 0, 0.1);
  --thinking-bg-color: transparent;
  --thinking-text-color: #6b7280;
  --thinking-title-color: #374151;
  --thinking-fade-start: rgba(249, 250, 251, 1);
  --thinking-fade-end: rgba(249, 250, 251, 0);
  --thinking-fade-start-rgb: 249, 250, 251;
}

.dark {
  --thinking-border-color: rgba(255, 255, 255, 0.1);
  --thinking-bg-color: transparent;
  --thinking-text-color: #9ca3af;
  --thinking-title-color: #e5e7eb;
  --thinking-fade-start: rgba(30, 41, 59, 1);
  --thinking-fade-end: rgba(30, 41, 59, 0);
  --thinking-fade-start-rgb: 30, 41, 59;
}

.thinking-box {
  width: 98%;
  max-width: 100%;
  position: relative;
}

/* Simple direct rule to prevent border flash in dark mode */
.dark .thinking-box * {
  border-color: rgba(255, 255, 255, 0.1);
}

.thinking-box__container {
  border: 1px solid var(--thinking-border-color);
  border-radius: 0.75rem;
  background-color: var(--thinking-bg-color);
  overflow: hidden;
  transition: all 0.2s ease-in-out;
  box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
  transform: translateZ(0);
  backface-visibility: hidden;
  perspective: 1000px;
}

/* More subtle collapsed state */
.thinking-box__container--collapsed {
  border-color: var(--thinking-border-color);
  opacity: 0.9;
}

/* No preview - make the bottom border curved too */
.thinking-box__container--no-preview {
  border-bottom-left-radius: 0.75rem;
  border-bottom-right-radius: 0.75rem;
}

/* Remove the bottom border when there's no preview */
.thinking-box__container--no-preview .thinking-box__header {
  border-bottom: none;
}

/* Style for the transitioning state to prevent flashing */
.thinking-box__container--transitioning {
  pointer-events: none; /* Prevent interactions during transition */
}

/* Fix for the flashing white border in dark mode */
.dark .thinking-box__container--transitioning {
  border-color: rgba(255, 255, 255, 0.1);
}

.dark .thinking-box__container--transitioning .thinking-box__header {
  border-bottom-color: rgba(255, 255, 255, 0.1);
}

.dark .thinking-box__container--transitioning .thinking-box__content {
  border-top-color: rgba(255, 255, 255, 0.1);
}

.dark .thinking-box__container--transitioning .thinking-box__preview--crawling {
  border-top-color: rgba(255, 255, 255, 0.1);
}

.thinking-box__header {
  display: flex;
  align-items: center;
  justify-content: space-between;
  padding: 0.75rem 1rem;
  cursor: pointer;
  transition: background-color 0.2s ease-in-out;
  user-select: none;
  border-bottom: 1px solid var(--thinking-border-color);
}

.thinking-box__header:hover {
  background-color: rgba(0, 0, 0, 0.02);
}

.dark .thinking-box__header:hover {
  background-color: rgba(255, 255, 255, 0.02);
}

.thinking-box__title {
  display: flex;
  align-items: center;
  gap: 0.5rem;
  color: var(--thinking-title-color);
}

.thinking-box__icon {
  color: var(--thinking-text-color);
  margin-right: 0.25rem;
  animation: pulse 1.5s infinite ease-in-out;
}

.thinking-box__title-text {
  font-size: 0.8rem;
  font-weight: 500;
}

.thinking-box__timer {
  font-size: 0.8rem;
  color: var(--thinking-text-color);
}

.thinking-box__collapse-icon {
  color: var(--thinking-text-color);
  display: flex;
  align-items: center;
}

.thinking-box__content {
  border-top: 1px solid var(--thinking-border-color);
  padding: 1.25rem;
  max-height: 400px;
  overflow-y: auto;
  color: var(--thinking-text-color);
  animation: fadeIn 0.3s ease-in-out;
}

.thinking-box__markdown {
  font-size: 0.875rem;
  color: var(--thinking-text-color);
  line-height: 1.5;
  overflow-wrap: break-word;
}

/* Preview container (collapsed state) */
.thinking-box__preview {
  position: relative;
  height: 2.5rem;
  overflow: hidden;
  width: 100%;
  padding: 0.15rem 0;
  transition: all 0.35s cubic-bezier(0.16, 1, 0.3, 1);
}

/* Active animation styling - highlight active thinking */
.thinking-box__preview--crawling {
  height: 5rem;
  transition: all 0.5s cubic-bezier(0.16, 1, 0.3, 1);
  border-top: 1px solid var(--thinking-border-color);
  background-color: rgba(0, 0, 0, 0.01);
}

.dark .thinking-box__preview--crawling {
  background-color: rgba(255, 255, 255, 0.025);
}

.thinking-box__fade-container {
  position: relative;
  height: 100%;
  overflow: hidden;
  transition: all 0.4s cubic-bezier(0.16, 1, 0.3, 1);
}

/* Create fade effect at top and bottom */
.thinking-box__fade-container::before,
.thinking-box__fade-container::after {
  content: "";
  position: absolute;
  left: 0;
  right: 0;
  height: 0.85rem; /* Increased for more visible gradient */
  z-index: 10;
  pointer-events: none;
  transition: all 0.3s cubic-bezier(0.16, 1, 0.3, 1);
  opacity: 0.95;
}

/* Enhanced gradients with smoother transitions */
.thinking-box__fade-container::before {
  top: 0;
  background: linear-gradient(
    to bottom,
    var(--thinking-fade-start),
    rgba(var(--thinking-fade-start-rgb, 249, 250, 251), 0.85) 25%,
    var(--thinking-fade-end) 100%
  );
}

.thinking-box__fade-container::after {
  bottom: 0;
  background: linear-gradient(
    to top,
    var(--thinking-fade-start),
    rgba(var(--thinking-fade-start-rgb, 249, 250, 251), 0.85) 25%,
    var(--thinking-fade-end) 100%
  );
}

.dark .thinking-box__fade-container::before {
  background: linear-gradient(
    to bottom,
    var(--thinking-fade-start),
    rgba(var(--thinking-fade-start-rgb, 30, 41, 59), 0.85) 25%,
    var(--thinking-fade-end) 100%
  );
}

.dark .thinking-box__fade-container::after {
  background: linear-gradient(
    to top,
    var(--thinking-fade-start),
    rgba(var(--thinking-fade-start-rgb, 30, 41, 59), 0.85) 25%,
    var(--thinking-fade-end) 100%
  );
}

/* Make gradients more visible during crawling */
.thinking-box__preview--crawling .thinking-box__fade-container::before,
.thinking-box__preview--crawling .thinking-box__fade-container::after {
  height: 1.5rem;
  opacity: 0.95;
}

.thinking-box__scroll-content {
  padding: 0.75rem 1rem;
  height: 100%;
  width: 100%;
  overflow-y: hidden;
  will-change: transform;
  transform: translateZ(0);
  backface-visibility: hidden;
  -webkit-font-smoothing: antialiased;
  -webkit-mask-image: linear-gradient(
    to bottom,
    transparent,
    black 12%,
    black 88%,
    transparent
  );
  mask-image: linear-gradient(
    to bottom,
    transparent,
    black 12%,
    black 88%,
    transparent
  );
}

/* Enhanced text during crawling */
.thinking-box__preview--crawling .thinking-box__preview-text {
  opacity: 0.98;
  font-size: 0.75rem;
  line-height: 1.6;
}

.thinking-box__expand-prompt {
  display: none;
}

/* Animation for thinking indicator */
@keyframes pulse {
  0% {
    opacity: 0.5;
  }
  50% {
    opacity: 1;
  }
  100% {
    opacity: 0.5;
  }
}

/* Fade in animation */
@keyframes fadeIn {
  from {
    opacity: 0;
  }
  to {
    opacity: 1;
  }
}

/* Smooth scrolling effect */
@keyframes scrollText {
  0% {
    transform: translateY(0);
  }
  100% {
    transform: translateY(-100%);
  }
}

.thinking-box__preview-text {
  font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace;
  font-size: 0.7rem;
  color: var(--thinking-text-color);
  white-space: pre-wrap;
  margin: 0;
  display: block;
  opacity: 0.85;
  line-height: 1.4;
  transition: all 0.3s ease;
}

/* Enhanced masking during crawling */
.thinking-box__preview--crawling .thinking-box__scroll-content {
  -webkit-mask-image: linear-gradient(
    to bottom,
    transparent,
    black 8%,
    black 92%,
    transparent
  );
  mask-image: linear-gradient(
    to bottom,
    transparent,
    black 8%,
    black 92%,
    transparent
  );
  padding: 0.75rem 1rem;
}

/* Make sure the preview adjusts immediately when new content arrives */
.thinking-box__preview--crawling .thinking-box__scroll-content {
  transition: height 0.3s ease-out;
}

/* Enhance visibility of actual content */
.thinking-box__preview--crawling .thinking-box__preview-text {
  opacity: 0.98;
  font-size: 0.75rem;
  line-height: 1.7;
  text-shadow: 0 0 0.1px rgba(0, 0, 0, 0.2);
}

/* Faster transition for expanding preview */
.thinking-box__preview {
  transition: all 0.25s cubic-bezier(0.16, 1, 0.3, 1);
}

/* Enhanced hover feedback for collapsed header */
.thinking-box__container--collapsed .thinking-box__header:hover {
  background-color: rgba(0, 0, 0, 0.03);
}

.dark .thinking-box__container--collapsed .thinking-box__header:hover {
  background-color: rgba(255, 255, 255, 0.03);
}


================================================
FILE: web/src/app/app/page.tsx
================================================
import AppPage from "@/refresh-pages/AppPage";

export interface PageProps {
  searchParams: Promise<{ [key: string]: string }>;
}

export default async function Page(props: PageProps) {
  const searchParams = await props.searchParams;
  const firstMessage = searchParams.firstMessage;

  // Other pages in `web/src/app/chat` are wrapped with `<AppPageLayout>`.
  // `chat/page.tsx` is not because it also needs to handle rendering of the document-sidebar (`web/src/sections/document-sidebar/DocumentsSidebar.tsx`).
  return <AppPage firstMessage={firstMessage} />;
}


================================================
FILE: web/src/app/app/projects/projectsService.ts
================================================
import { ChatFileType, ChatSession } from "../interfaces";

// Generic error handler that avoids exposing server error details
const handleRequestError = (action: string, response: Response) => {
  throw new Error(`${action} failed (Status: ${response.status})`);
};

export interface Project {
  id: number;
  name: string;
  description: string | null;
  created_at: string;
  user_id: string;
  instructions: string | null;
  chat_sessions: ChatSession[];
}

export interface CategorizedFiles {
  user_files: ProjectFile[];
  rejected_files: RejectedFile[];
}

export interface ProjectFile {
  id: string;
  name: string;
  project_id: number | null;
  user_id: string | null;
  file_id: string;
  created_at: string;
  status: UserFileStatus;
  file_type: string;
  last_accessed_at: string;
  chat_file_type: ChatFileType;
  token_count: number | null;
  chunk_count: number | null;
  temp_id?: string | null;
}

export interface RejectedFile {
  file_name: string;
  reason: string;
}

export interface UserFileDeleteResult {
  has_associations: boolean;
  project_names: string[];
  assistant_names: string[];
}

export enum UserFileStatus {
  UPLOADING = "UPLOADING", //UI only
  PROCESSING = "PROCESSING",
  COMPLETED = "COMPLETED",
  SKIPPED = "SKIPPED",
  FAILED = "FAILED",
  CANCELED = "CANCELED",
  DELETING = "DELETING",
}

export type ProjectDetails = {
  project: Project;
  files?: ProjectFile[];
  persona_id_to_is_featured?: Record<number, boolean>;
};

export async function fetchProjects(): Promise<Project[]> {
  const response = await fetch("/api/user/projects");
  if (!response.ok) {
    handleRequestError("Fetch projects", response);
  }
  return response.json();
}

export async function createProject(name: string): Promise<Project> {
  const response = await fetch(
    `/api/user/projects/create?name=${encodeURIComponent(name)}`,
    { method: "POST" }
  );
  if (!response.ok) {
    handleRequestError("Create project", response);
  }
  return response.json();
}

export async function uploadFiles(
  files: File[],
  projectId?: number | null,
  tempIdMap?: Map<string, string>
): Promise<CategorizedFiles> {
  const formData = new FormData();
  files.forEach((file) => formData.append("files", file));
  if (projectId !== undefined && projectId !== null) {
    formData.append("project_id", String(projectId));
  }
  if (tempIdMap !== undefined && tempIdMap !== null) {
    formData.append(
      "temp_id_map",
      JSON.stringify(Object.fromEntries(tempIdMap))
    );
  }

  const response = await fetch("/api/user/projects/file/upload", {
    method: "POST",
    body: formData,
  });

  if (!response.ok) {
    handleRequestError("Upload files", response);
  }

  return response.json();
}

export async function getRecentFiles(): Promise<ProjectFile[]> {
  const response = await fetch(`/api/user/files/recent`);
  if (!response.ok) {
    handleRequestError("Fetch recent files", response);
  }
  return response.json();
}

export async function getFilesInProject(
  projectId: number
): Promise<ProjectFile[]> {
  const response = await fetch(`/api/user/projects/files/${projectId}`);
  if (!response.ok) {
    handleRequestError("Fetch project files", response);
  }
  return response.json();
}

export async function getProject(projectId: number): Promise<Project> {
  const response = await fetch(`/api/user/projects/${projectId}`);
  if (!response.ok) {
    handleRequestError("Fetch project", response);
  }
  return response.json();
}

export async function renameProject(
  projectId: number,
  name: string
): Promise<Project> {
  const response = await fetch(`/api/user/projects/${projectId}`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ name }),
  });
  if (!response.ok) {
    handleRequestError("Rename project", response);
  }
  return response.json();
}

export async function deleteProject(projectId: number): Promise<void> {
  const response = await fetch(`/api/user/projects/${projectId}`, {
    method: "DELETE",
  });
  if (!response.ok) {
    handleRequestError("Delete project", response);
  }
}

export async function getProjectInstructions(
  projectId: number
): Promise<string | null> {
  const response = await fetch(`/api/user/projects/${projectId}/instructions`);
  if (!response.ok) {
    handleRequestError("Fetch project instructions", response);
  }
  const data = (await response.json()) as { instructions: string | null };
  return data.instructions ?? null;
}

export async function upsertProjectInstructions(
  projectId: number,
  instructions: string
): Promise<string | null> {
  const response = await fetch(`/api/user/projects/${projectId}/instructions`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ instructions }),
  });
  if (!response.ok) {
    handleRequestError("Update project instructions", response);
  }
  const data = (await response.json()) as { instructions: string | null };
  return data.instructions ?? null;
}

export async function getProjectDetails(
  projectId: number
): Promise<ProjectDetails> {
  const response = await fetch(`/api/user/projects/${projectId}/details`);
  if (!response.ok) {
    handleRequestError("Fetch project details", response);
  }
  return response.json();
}

export async function unlinkFileFromProject(
  projectId: number,
  fileId: string
): Promise<Response> {
  const response = await fetch(
    `/api/user/projects/${encodeURIComponent(
      projectId
    )}/files/${encodeURIComponent(fileId)}`,
    { method: "DELETE" }
  );
  if (!response.ok) {
    handleRequestError("Unlink file from project", response);
  }
  return response;
}

export async function linkFileToProject(
  projectId: number,
  fileId: string
): Promise<Response> {
  const response = await fetch(
    `/api/user/projects/${encodeURIComponent(
      projectId
    )}/files/${encodeURIComponent(fileId)}`,
    { method: "POST" }
  );
  if (!response.ok) {
    handleRequestError("Link file to project", response);
  }
  return response;
}

export async function deleteUserFile(
  fileId: string
): Promise<UserFileDeleteResult> {
  const response = await fetch(
    `/api/user/projects/file/${encodeURIComponent(fileId)}`,
    {
      method: "DELETE",
    }
  );
  if (!response.ok) {
    handleRequestError("Delete file", response);
  }
  return (await response.json()) as UserFileDeleteResult;
}

export async function getUserFile(fileId: string): Promise<ProjectFile> {
  const response = await fetch(
    `/api/user/projects/file/${encodeURIComponent(fileId)}`
  );
  if (!response.ok) {
    handleRequestError("Fetch file", response);
  }
  return response.json();
}

export async function getUserFileStatuses(
  fileIds: string[]
): Promise<ProjectFile[]> {
  const response = await fetch(`/api/user/projects/file/statuses`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ file_ids: fileIds }),
  });
  if (!response.ok) {
    handleRequestError("Fetch file statuses", response);
  }
  return response.json();
}

export async function getSessionProjectTokenCount(
  chatSessionId: string
): Promise<number> {
  const response = await fetch(
    `/api/user/projects/session/${encodeURIComponent(
      chatSessionId
    )}/token-count`
  );
  if (!response.ok) {
    return 0;
  }
  const data = (await response.json()) as { total_tokens: number };
  return data.total_tokens ?? 0;
}

export async function getProjectFilesForSession(
  chatSessionId: string
): Promise<ProjectFile[]> {
  const response = await fetch(
    `/api/user/projects/session/${encodeURIComponent(chatSessionId)}/files`
  );
  if (!response.ok) {
    return [];
  }
  return response.json();
}

export async function getProjectTokenCount(projectId: number): Promise<number> {
  const response = await fetch(
    `/api/user/projects/${encodeURIComponent(projectId)}/token-count`
  );
  if (!response.ok) {
    return 0;
  }
  const data = (await response.json()) as { total_tokens: number };
  return data.total_tokens ?? 0;
}

export async function getMaxSelectedDocumentTokens(
  personaId: number
): Promise<number | null> {
  const response = await fetch(
    `/api/chat/max-selected-document-tokens?persona_id=${personaId}`
  );
  if (!response.ok) {
    return null;
  }
  const json = await response.json();
  return (json?.max_tokens as number) ?? null;
}

export async function moveChatSession(
  projectId: number,
  chatSessionId: string
): Promise<boolean> {
  const response = await fetch(
    `/api/user/projects/${projectId}/move_chat_session`,
    {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ chat_session_id: chatSessionId }),
    }
  );
  if (!response.ok) {
    handleRequestError("Move chat session", response);
  }
  return response.ok;
}

export async function removeChatSessionFromProject(
  chatSessionId: string
): Promise<boolean> {
  const response = await fetch(`/api/user/projects/remove_chat_session`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ chat_session_id: chatSessionId }),
  });
  if (!response.ok) {
    handleRequestError("Remove chat session from project", response);
  }
  return response.ok;
}


================================================
FILE: web/src/app/app/services/actionUtils.ts
================================================
import { JSX } from "react";
import type { IconProps } from "@opal/types";
import { ToolSnapshot } from "@/lib/tools/interfaces";
import {
  SvgCpu,
  SvgGlobe,
  SvgImage,
  SvgLink,
  SvgSearch,
  SvgServer,
} from "@opal/icons";

// Helper functions to identify specific tools
const isSearchTool = (tool: ToolSnapshot): boolean => {
  return (
    tool.in_code_tool_id === "SearchTool" ||
    tool.name === "run_search" ||
    tool.display_name?.toLowerCase().includes("search tool")
  );
};

const isWebSearchTool = (tool: ToolSnapshot): boolean => {
  return (
    tool.in_code_tool_id === "WebSearchTool" ||
    tool.display_name?.toLowerCase().includes("web_search")
  );
};

const isImageGenerationTool = (tool: ToolSnapshot): boolean => {
  return (
    tool.in_code_tool_id === "ImageGenerationTool" ||
    tool.display_name?.toLowerCase().includes("image generation")
  );
};

const isKnowledgeGraphTool = (tool: ToolSnapshot): boolean => {
  return (
    tool.in_code_tool_id === "KnowledgeGraphTool" ||
    tool.display_name?.toLowerCase().includes("knowledge graph")
  );
};

const isOpenUrlTool = (tool: ToolSnapshot): boolean => {
  return (
    tool.in_code_tool_id === "OpenURLTool" ||
    tool.name === "open_url" ||
    tool.display_name?.toLowerCase().includes("open url")
  );
};

export function getIconForAction(
  action: ToolSnapshot
): (props: IconProps) => JSX.Element {
  if (isSearchTool(action)) return SvgSearch;
  if (isWebSearchTool(action)) return SvgGlobe;
  if (isImageGenerationTool(action)) return SvgImage;
  if (isKnowledgeGraphTool(action)) return SvgServer;
  if (isOpenUrlTool(action)) return SvgLink;
  return SvgCpu;
}

// Check if the agent has either search tool or web search tool available
export function hasSearchToolsAvailable(tools: ToolSnapshot[]): boolean {
  return tools.some((tool) => isSearchTool(tool) || isWebSearchTool(tool));
}


================================================
FILE: web/src/app/app/services/currentMessageFIFO.ts
================================================
import { PacketType, sendMessage, SendMessageParams } from "./lib";

export class CurrentMessageFIFO {
  private stack: PacketType[] = [];
  isComplete: boolean = false;
  error: string | null = null;

  push(packetBunch: PacketType) {
    this.stack.push(packetBunch);
  }

  nextPacket(): PacketType | undefined {
    return this.stack.shift();
  }

  isEmpty(): boolean {
    return this.stack.length === 0;
  }
}

export async function updateCurrentMessageFIFO(
  stack: CurrentMessageFIFO,
  params: SendMessageParams
) {
  try {
    for await (const packet of sendMessage(params)) {
      if (params.signal?.aborted) {
        throw new Error("AbortError");
      }
      stack.push(packet);
    }
  } catch (error: unknown) {
    if (error instanceof Error) {
      if (error.name === "AbortError") {
        console.debug("Stream aborted");
      } else {
        stack.error = error.message;
      }
    } else {
      stack.error = String(error);
    }
  } finally {
    stack.isComplete = true;
  }
}


================================================
FILE: web/src/app/app/services/fileUtils.ts
================================================
import { FileDescriptor } from "../interfaces";
import { ProjectFile } from "../projects/projectsService";

export function projectsFileToFileDescriptor(
  file: ProjectFile
): FileDescriptor {
  return {
    id: file.file_id,
    type: file.chat_file_type,
    name: file.name,
    user_file_id: file.id,
  };
}

export function projectFilesToFileDescriptors(
  files: ProjectFile[]
): FileDescriptor[] {
  return files.map(projectsFileToFileDescriptor);
}


================================================
FILE: web/src/app/app/services/lib.tsx
================================================
import {
  Filters,
  DocumentInfoPacket,
  StreamStopInfo,
} from "@/lib/search/interfaces";
import { handleSSEStream } from "@/lib/search/streamingUtils";
import { FeedbackType } from "@/app/app/interfaces";
import {
  BackendMessage,
  DocumentsResponse,
  FileDescriptor,
  FileChatDisplay,
  Message,
  MessageResponseIDInfo,
  MultiModelMessageResponseIDInfo,
  ResearchType,
  RetrievalType,
  StreamingError,
  ToolCallMetadata,
  UserKnowledgeFilePacket,
} from "../interfaces";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { ReadonlyURLSearchParams } from "next/navigation";
import { SEARCH_PARAM_NAMES } from "./searchParams";
import { WEB_SEARCH_TOOL_ID } from "@/app/app/components/tools/constants";
import { SEARCH_TOOL_ID } from "@/app/app/components/tools/constants";
import { Packet } from "./streamingModels";

export async function updateLlmOverrideForChatSession(
  chatSessionId: string,
  newAlternateModel: string
) {
  const response = await fetch("/api/chat/update-chat-session-model", {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      chat_session_id: chatSessionId,
      new_alternate_model: newAlternateModel,
    }),
  });
  return response;
}

export async function updateTemperatureOverrideForChatSession(
  chatSessionId: string,
  newTemperature: number
) {
  const response = await fetch("/api/chat/update-chat-session-temperature", {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      chat_session_id: chatSessionId,
      temperature_override: newTemperature,
    }),
  });
  return response;
}

export async function createChatSession(
  personaId: number,
  description: string | null,
  projectId: number | null
): Promise<string> {
  const createChatSessionResponse = await fetch(
    "/api/chat/create-chat-session",
    {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        persona_id: personaId,
        description,
        project_id: projectId,
      }),
    }
  );
  if (!createChatSessionResponse.ok) {
    console.error(
      `Failed to create chat session - ${createChatSessionResponse.status}`
    );
    throw Error("Failed to create chat session");
  }
  const chatSessionResponseJson = await createChatSessionResponse.json();
  return chatSessionResponseJson.chat_session_id;
}

export type PacketType =
  | ToolCallMetadata
  | BackendMessage
  | DocumentInfoPacket
  | DocumentsResponse
  | FileChatDisplay
  | StreamingError
  | MessageResponseIDInfo
  | MultiModelMessageResponseIDInfo
  | StreamStopInfo
  | UserKnowledgeFilePacket
  | Packet;

// Origin of the message for telemetry tracking.
// Keep in sync with backend: backend/onyx/server/query_and_chat/models.py::MessageOrigin
export type MessageOrigin =
  | "webapp"
  | "chrome_extension"
  | "api"
  | "slackbot"
  | "unknown";

export interface LLMOverride {
  model_provider: string;
  model_version: string;
  temperature?: number;
  display_name?: string;
}

export interface SendMessageParams {
  message: string;
  fileDescriptors?: FileDescriptor[];
  parentMessageId: number | null;
  chatSessionId: string;
  filters: Filters | null;
  signal?: AbortSignal;
  deepResearch?: boolean;
  enabledToolIds?: number[];
  // Single forced tool ID (new API uses singular, not array)
  forcedToolId?: number | null;
  // LLM override parameters
  modelProvider?: string;
  modelVersion?: string;
  temperature?: number;
  // Multi-model: send multiple LLM overrides for parallel generation
  llmOverrides?: LLMOverride[];
  // Origin of the message for telemetry tracking
  origin?: MessageOrigin;
  // Additional context injected into the LLM call but not stored/shown in chat.
  // Used e.g. by Chrome extension "Read this tab" feature.
  additionalContext?: string;
}

export async function* sendMessage({
  message,
  fileDescriptors,
  parentMessageId,
  chatSessionId,
  filters,
  signal,
  deepResearch,
  enabledToolIds,
  forcedToolId,
  modelProvider,
  modelVersion,
  temperature,
  llmOverrides,
  origin,
  additionalContext,
}: SendMessageParams): AsyncGenerator<PacketType, void, unknown> {
  // Build payload for new send-chat-message API
  const payload = {
    message: message,
    chat_session_id: chatSessionId,
    parent_message_id: parentMessageId,
    file_descriptors: fileDescriptors,
    internal_search_filters: filters,
    deep_research: deepResearch ?? false,
    allowed_tool_ids: enabledToolIds,
    forced_tool_id: forcedToolId ?? null,
    llm_override:
      temperature || modelVersion
        ? {
            temperature,
            model_provider: modelProvider,
            model_version: modelVersion,
          }
        : null,
    // Multi-model: list of LLM overrides for parallel generation
    llm_overrides: llmOverrides ?? null,
    // Default to "unknown" for consistency with backend; callers should set explicitly
    origin: origin ?? "unknown",
    additional_context: additionalContext ?? null,
  };

  const body = JSON.stringify(payload);

  const response = await fetch(`/api/chat/send-chat-message`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body,
    signal,
  });

  if (!response.ok) {
    const data = await response.json().catch(() => ({}));
    throw new Error(data.detail ?? `HTTP error! status: ${response.status}`);
  }

  yield* handleSSEStream<PacketType>(response, signal);
}

export async function setPreferredResponse(
  userMessageId: number,
  preferredResponseId: number
): Promise<Response> {
  return fetch("/api/chat/set-preferred-response", {
    method: "PUT",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      user_message_id: userMessageId,
      preferred_response_id: preferredResponseId,
    }),
  });
}

export async function nameChatSession(chatSessionId: string) {
  const response = await fetch("/api/chat/rename-chat-session", {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      chat_session_id: chatSessionId,
      name: null,
    }),
  });
  return response;
}

export async function patchMessageToBeLatest(messageId: number) {
  const response = await fetch("/api/chat/set-message-as-latest", {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      message_id: messageId,
    }),
  });
  return response;
}

export async function handleChatFeedback(
  messageId: number,
  feedback: FeedbackType,
  feedbackDetails: string,
  predefinedFeedback: string | undefined
) {
  const response = await fetch("/api/chat/create-chat-message-feedback", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      chat_message_id: messageId,
      is_positive: feedback === "like",
      feedback_text: feedbackDetails,
      predefined_feedback: predefinedFeedback,
    }),
  });
  return response;
}

export async function removeChatFeedback(messageId: number) {
  const response = await fetch(
    `/api/chat/remove-chat-message-feedback?chat_message_id=${messageId}`,
    {
      method: "DELETE",
      headers: {
        "Content-Type": "application/json",
      },
    }
  );
  return response;
}

export async function renameChatSession(
  chatSessionId: string,
  newName: string
) {
  const response = await fetch(`/api/chat/rename-chat-session`, {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      chat_session_id: chatSessionId,
      name: newName,
    }),
  });
  return response;
}

export async function deleteChatSession(chatSessionId: string) {
  const response = await fetch(
    `/api/chat/delete-chat-session/${chatSessionId}`,
    {
      method: "DELETE",
    }
  );
  return response;
}

export async function deleteAllChatSessions() {
  const response = await fetch(`/api/chat/delete-all-chat-sessions`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
  });
  return response;
}

export async function getAvailableContextTokens(
  chatSessionId: string
): Promise<number | null> {
  const response = await fetch(
    `/api/chat/available-context-tokens/${chatSessionId}`
  );
  if (!response.ok) {
    return null;
  }
  const data = (await response.json()) as { available_tokens: number };
  return data?.available_tokens ?? null;
}

export function processRawChatHistory(
  rawMessages: BackendMessage[],
  packets: Packet[][]
): Map<number, Message> {
  const messages: Map<number, Message> = new Map();
  const parentMessageChildrenMap: Map<number, number[]> = new Map();

  let agentMessageInd = 0;

  rawMessages.forEach((messageInfo, _ind) => {
    const packetsForMessage = packets[agentMessageInd];
    if (messageInfo.message_type === "assistant") {
      agentMessageInd++;
    }

    const hasContextDocs = (messageInfo?.context_docs || []).length > 0;
    let retrievalType;
    if (hasContextDocs) {
      if (messageInfo.rephrased_query) {
        retrievalType = RetrievalType.Search;
      } else {
        retrievalType = RetrievalType.SelectedDocs;
      }
    } else {
      retrievalType = RetrievalType.None;
    }

    const message: Message = {
      // for existing messages, use the message_id as the nodeId
      // all that matters is that the nodeId is unique for a given chat session
      nodeId: messageInfo.message_id,
      messageId: messageInfo.message_id,
      message: messageInfo.message,
      type: messageInfo.message_type as "user" | "assistant",
      files: messageInfo.files,
      alternateAgentID:
        messageInfo.alternate_assistant_id !== null
          ? Number(messageInfo.alternate_assistant_id)
          : null,
      // only include these fields if this is an agent message so that
      // this is identical to what is computed at streaming time
      ...(messageInfo.message_type === "assistant"
        ? {
            retrievalType: retrievalType,
            researchType: messageInfo.research_type as ResearchType | undefined,
            query: messageInfo.rephrased_query,
            documents: messageInfo?.context_docs || [],
            citations: messageInfo?.citations || {},
            processingDurationSeconds: messageInfo.processing_duration_seconds,
          }
        : {}),
      toolCall: messageInfo.tool_call,
      parentNodeId: messageInfo.parent_message,
      childrenNodeIds: [],
      latestChildNodeId: messageInfo.latest_child_message,
      overridden_model: messageInfo.overridden_model,
      packets: packetsForMessage || [],
      currentFeedback: messageInfo.current_feedback as FeedbackType | null,
      // Multi-model answer generation
      preferredResponseId: messageInfo.preferred_response_id ?? null,
      modelDisplayName: messageInfo.model_display_name ?? null,
    };

    messages.set(messageInfo.message_id, message);

    if (messageInfo.parent_message !== null) {
      if (!parentMessageChildrenMap.has(messageInfo.parent_message)) {
        parentMessageChildrenMap.set(messageInfo.parent_message, []);
      }
      parentMessageChildrenMap
        .get(messageInfo.parent_message)!
        .push(messageInfo.message_id);
    }
  });

  // Populate childrenMessageIds for each message
  parentMessageChildrenMap.forEach((childrenIds, parentId) => {
    childrenIds.sort((a, b) => a - b);
    const parentMesage = messages.get(parentId);
    if (parentMesage) {
      parentMesage.childrenNodeIds = childrenIds;
    }
  });

  return messages;
}

export function personaIncludesRetrieval(
  selectedPersona: MinimalPersonaSnapshot
) {
  return selectedPersona.tools.some(
    (tool) =>
      tool.in_code_tool_id &&
      [SEARCH_TOOL_ID, WEB_SEARCH_TOOL_ID].includes(tool.in_code_tool_id)
  );
}

const PARAMS_TO_SKIP = [
  SEARCH_PARAM_NAMES.SUBMIT_ON_LOAD,
  SEARCH_PARAM_NAMES.USER_PROMPT,
  SEARCH_PARAM_NAMES.TITLE,
  // only use these if explicitly passed in
  SEARCH_PARAM_NAMES.CHAT_ID,
  SEARCH_PARAM_NAMES.PERSONA_ID,
  SEARCH_PARAM_NAMES.PROJECT_ID,
  // do not persist project context in the URL after navigation
  "projectid",
];

export function buildChatUrl(
  existingSearchParams: ReadonlyURLSearchParams | null,
  chatSessionId: string | null,
  personaId: number | null,
  search?: boolean,
  skipReload?: boolean
) {
  const finalSearchParams: string[] = [];
  if (chatSessionId) {
    finalSearchParams.push(
      `${
        search ? SEARCH_PARAM_NAMES.SEARCH_ID : SEARCH_PARAM_NAMES.CHAT_ID
      }=${chatSessionId}`
    );
  }
  if (personaId !== null) {
    finalSearchParams.push(`${SEARCH_PARAM_NAMES.PERSONA_ID}=${personaId}`);
  }

  existingSearchParams?.forEach((value, key) => {
    if (!PARAMS_TO_SKIP.includes(key)) {
      finalSearchParams.push(`${key}=${value}`);
    }
  });

  if (skipReload) {
    finalSearchParams.push(`${SEARCH_PARAM_NAMES.SKIP_RELOAD}=true`);
  }

  const finalSearchParamsString = finalSearchParams.join("&");

  if (finalSearchParamsString) {
    return `/${search ? "search" : "chat"}?${finalSearchParamsString}`;
  }

  return `/${search ? "search" : "chat"}`;
}

export async function uploadFilesForChat(
  files: File[]
): Promise<[FileDescriptor[], string | null]> {
  const formData = new FormData();
  files.forEach((file) => {
    formData.append("files", file);
  });

  const response = await fetch("/api/chat/file", {
    method: "POST",
    body: formData,
  });
  if (!response.ok) {
    return [[], `Failed to upload files - ${(await response.json()).detail}`];
  }
  const responseJson = await response.json();

  return [responseJson.files as FileDescriptor[], null];
}


================================================
FILE: web/src/app/app/services/messageTree.ts
================================================
import { FileDescriptor, Message } from "../interfaces";

export const SYSTEM_MESSAGE_ID = -3;
export const SYSTEM_NODE_ID = -3;

export type MessageTreeState = Map<number, Message>; // key is nodeId

export function createInitialMessageTreeState(
  initialMessages?: Map<number, Message> | Message[]
): MessageTreeState {
  if (!initialMessages) {
    return new Map();
  }
  if (initialMessages instanceof Map) {
    return new Map(initialMessages); // Shallow copy
  }
  return new Map(initialMessages.map((msg) => [msg.nodeId, msg]));
}

export function getMessage(
  messages: MessageTreeState,
  nodeId: number
): Message | undefined {
  return messages.get(nodeId);
}

export function getMessageByMessageId(
  messages: MessageTreeState,
  messageId: number
): Message | undefined {
  for (const message of Array.from(messages.values())) {
    if (message.messageId === messageId) {
      return message;
    }
  }
  return undefined;
}

function updateParentInMap(
  map: Map<number, Message>,
  parentNodeId: number,
  childNodeId: number,
  makeLatest: boolean
): void {
  const parent = map.get(parentNodeId);
  if (parent) {
    const parentChildren = parent.childrenNodeIds || [];
    const childrenSet = new Set(parentChildren);
    let updatedChildren = parentChildren;

    if (!childrenSet.has(childNodeId)) {
      updatedChildren = [...parentChildren, childNodeId];
    }

    const updatedParent = {
      ...parent,
      childrenNodeIds: updatedChildren,
      // Update latestChild only if explicitly requested or if it's the only child,
      // or if the child was newly added
      latestChildNodeId:
        makeLatest ||
        updatedChildren.length === 1 ||
        !childrenSet.has(childNodeId)
          ? childNodeId
          : parent.latestChildNodeId,
    };
    if (makeLatest && parent.latestChildNodeId !== childNodeId) {
      updatedParent.latestChildNodeId = childNodeId;
    }

    map.set(parentNodeId, updatedParent);
  } else {
    console.warn(
      `Parent message with nodeId ${parentNodeId} not found when updating for child ${childNodeId}`
    );
  }
}

export function upsertMessages(
  currentMessages: MessageTreeState,
  messagesToAdd: Message[],
  makeLatestChildMessage: boolean = false
): MessageTreeState {
  let newMessages = new Map(currentMessages);
  let messagesToAddClones = messagesToAdd.map((msg) => ({ ...msg })); // Clone all incoming messages

  if (newMessages.size === 0 && messagesToAddClones.length > 0) {
    const firstMessage = messagesToAddClones[0];
    if (!firstMessage) {
      throw new Error("No first message found in the message tree.");
    }
    const systemNodeId =
      firstMessage.parentNodeId !== null
        ? firstMessage.parentNodeId
        : SYSTEM_NODE_ID;
    const firstNodeId = firstMessage.nodeId;

    // Check if system message needs to be added or already exists (e.g., from parentNodeId)
    if (!newMessages.has(systemNodeId)) {
      const dummySystemMessage: Message = {
        messageId: SYSTEM_MESSAGE_ID,
        nodeId: systemNodeId,
        message: "",
        type: "system",
        files: [],
        toolCall: null,
        parentNodeId: null,
        childrenNodeIds: [firstNodeId],
        latestChildNodeId: firstNodeId,
        packets: [],
      };
      newMessages.set(dummySystemMessage.nodeId, dummySystemMessage);
    }
    // Ensure the first message points to the system message if its parent was null
    if (!firstMessage) {
      console.error("No first message found in the message tree.");
      return newMessages;
    }
    if (firstMessage.parentNodeId === null) {
      firstMessage.parentNodeId = systemNodeId;
    }
  }

  messagesToAddClones.forEach((message) => {
    // Add/update the message itself
    newMessages.set(message.nodeId, message);

    // Update parent's children if the message has a parent
    if (message.parentNodeId !== null) {
      // When adding multiple messages, only make the *first* one added potentially the latest,
      // unless `makeLatestChildMessage` is true for all.
      // Let's stick to the original logic: update parent, potentially making this message latest
      // based on makeLatestChildMessage flag OR if it's a new child being added.
      updateParentInMap(
        newMessages,
        message.parentNodeId,
        message.nodeId,
        makeLatestChildMessage
      );
    }
  });

  // Explicitly set the last message of the batch as the latest if requested,
  // overriding previous updates within the loop if necessary.
  if (makeLatestChildMessage && messagesToAddClones.length > 0) {
    const lastMessage = messagesToAddClones[messagesToAddClones.length - 1];
    if (!lastMessage) {
      console.error("No last message found in the message tree.");
      return newMessages;
    }
    if (lastMessage.parentNodeId !== null) {
      const parent = newMessages.get(lastMessage.parentNodeId);
      if (parent && parent.latestChildNodeId !== lastMessage.nodeId) {
        const updatedParent = {
          ...parent,
          latestChildNodeId: lastMessage.nodeId,
        };
        newMessages.set(parent.nodeId, updatedParent);
      }
    }
  }

  return newMessages;
}

export function removeMessage(
  currentMessages: MessageTreeState,
  nodeIdToRemove: number
): MessageTreeState {
  if (!currentMessages.has(nodeIdToRemove)) {
    return currentMessages; // Return original if message doesn't exist
  }

  const newMessages = new Map(currentMessages);
  const messageToRemove = newMessages.get(nodeIdToRemove)!;

  // Collect all descendant IDs to remove
  const idsToRemove = new Set<number>();
  const queue: number[] = [nodeIdToRemove];

  while (queue.length > 0) {
    const currentId = queue.shift()!;
    if (!newMessages.has(currentId) || idsToRemove.has(currentId)) continue;
    idsToRemove.add(currentId);

    const currentMsg = newMessages.get(currentId);
    if (currentMsg?.childrenNodeIds) {
      currentMsg.childrenNodeIds.forEach((childId) => queue.push(childId));
    }
  }

  // Remove all descendants
  idsToRemove.forEach((id) => newMessages.delete(id));

  // Update the parent
  if (messageToRemove.parentNodeId !== null) {
    const parent = newMessages.get(messageToRemove.parentNodeId);
    if (parent) {
      const updatedChildren = (parent.childrenNodeIds || []).filter(
        (id) => id !== nodeIdToRemove
      );
      const updatedParent = {
        ...parent,
        childrenNodeIds: updatedChildren,
        // If the removed message was the latest, find the new latest (last in the updated children list)
        latestChildNodeId:
          parent.latestChildNodeId === nodeIdToRemove
            ? updatedChildren.length > 0
              ? updatedChildren[updatedChildren.length - 1]
              : null
            : parent.latestChildNodeId,
      };
      newMessages.set(parent.nodeId, updatedParent);
    }
  }

  return newMessages;
}

export function setMessageAsLatest(
  currentMessages: MessageTreeState,
  nodeId: number
): MessageTreeState {
  const message = currentMessages.get(nodeId);
  if (!message || message.parentNodeId === null) {
    return currentMessages; // Cannot set root or non-existent message as latest
  }

  const parent = currentMessages.get(message.parentNodeId);
  if (!parent || !(parent.childrenNodeIds || []).includes(nodeId)) {
    console.warn(
      `Cannot set message ${nodeId} as latest, parent ${message.parentNodeId} or child link missing.`
    );
    return currentMessages; // Parent doesn't exist or doesn't list this message as a child
  }

  if (parent.latestChildNodeId === nodeId) {
    return currentMessages; // Already the latest
  }

  const newMessages = new Map(currentMessages);
  const updatedParent = {
    ...parent,
    latestChildNodeId: nodeId,
  };
  newMessages.set(parent.nodeId, updatedParent);

  return newMessages;
}

export function getLatestMessageChain(messages: MessageTreeState): Message[] {
  const chain: Message[] = [];
  if (messages.size === 0) {
    return chain;
  }

  // Find the root message
  let root: Message | undefined;
  if (messages.has(SYSTEM_NODE_ID)) {
    root = messages.get(SYSTEM_NODE_ID);
  } else {
    // Use Array.from to fix linter error
    const potentialRoots = Array.from(messages.values()).filter(
      (message) =>
        message.parentNodeId === null || !messages.has(message.parentNodeId!)
    );
    if (potentialRoots.length > 0) {
      // Prefer non-system message if multiple roots found somehow
      root =
        potentialRoots.find((m) => m.type !== "system") || potentialRoots[0];
    }
  }

  if (!root) {
    console.error("Could not determine the root message.");
    // Fallback: return flat list sorted by nodeId perhaps? Or empty?
    return Array.from(messages.values()).sort((a, b) => a.nodeId - b.nodeId);
  }

  let currentMessage: Message | undefined = root;
  // The root itself (like SYSTEM_MESSAGE) might not be part of the visible chain
  if (root.nodeId !== SYSTEM_NODE_ID && root.type !== "system") {
    // Need to clone message for safety? If MessageTreeState guarantees immutability maybe not.
    // Let's assume Message objects within the map are treated as immutable.
    chain.push(root);
  }

  while (
    currentMessage?.latestChildNodeId !== null &&
    currentMessage?.latestChildNodeId !== undefined
  ) {
    const nextNodeId = currentMessage.latestChildNodeId;
    const nextMessage = messages.get(nextNodeId);
    if (nextMessage) {
      chain.push(nextMessage);
      currentMessage = nextMessage;
    } else {
      console.warn(
        `Chain broken: Message with nodeId ${nextNodeId} not found.`
      );
      break;
    }
  }

  return chain;
}

export function getHumanAndAIMessageFromMessageNumber(
  messages: MessageTreeState,
  messageNumber: number
): { humanMessage: Message | null; aiMessage: Message | null } {
  const latestChain = getLatestMessageChain(messages);
  const messageIndex = latestChain.findIndex(
    (msg) => msg.messageId === messageNumber
  );

  if (messageIndex === -1) {
    // Maybe the message exists but isn't in the latest chain? Search the whole map.
    const message = getMessageByMessageId(messages, messageNumber);
    if (!message) return { humanMessage: null, aiMessage: null };

    if (message.type === "user") {
      // Find its latest child that is an agent
      const potentialAiMessage =
        message.latestChildNodeId !== null &&
        message.latestChildNodeId !== undefined
          ? messages.get(message.latestChildNodeId)
          : undefined;
      const aiMessage =
        potentialAiMessage?.type === "assistant" ? potentialAiMessage : null;
      return { humanMessage: message, aiMessage };
    } else if (message.type === "assistant" || message.type === "error") {
      const humanMessage =
        message.parentNodeId !== null
          ? messages.get(message.parentNodeId)
          : null;
      return {
        humanMessage: humanMessage?.type === "user" ? humanMessage : null,
        aiMessage: message,
      };
    }
    return { humanMessage: null, aiMessage: null };
  }

  // Message is in the latest chain
  const message = latestChain[messageIndex];
  if (!message) {
    console.error(`Message ${messageNumber} not found in the latest chain.`);
    return { humanMessage: null, aiMessage: null };
  }

  if (message.type === "user") {
    const potentialAiMessage = latestChain[messageIndex + 1];
    const aiMessage =
      potentialAiMessage?.type === "assistant" &&
      potentialAiMessage.parentNodeId === message.nodeId
        ? potentialAiMessage
        : null;
    return { humanMessage: message, aiMessage };
  } else if (message.type === "assistant" || message.type === "error") {
    const potentialHumanMessage = latestChain[messageIndex - 1];
    const humanMessage =
      potentialHumanMessage?.type === "user" &&
      message.parentNodeId === potentialHumanMessage.nodeId
        ? potentialHumanMessage
        : null;
    return { humanMessage, aiMessage: message };
  }

  return { humanMessage: null, aiMessage: null };
}

export function getLastSuccessfulMessageId(
  messages: MessageTreeState,
  chain?: Message[]
): number | null {
  const messageChain = chain || getLatestMessageChain(messages);
  for (let i = messageChain.length - 1; i >= 0; i--) {
    const message = messageChain[i];
    if (!message) {
      console.error(`Message ${i} not found in the message chain.`);
      continue;
    }

    // don't include failed / not-completed messages
    if (message.type !== "error" && message.messageId !== undefined) {
      return message.messageId ?? null;
    }
  }

  // If the chain starts with an error or is empty, check for system message
  const systemMessage = messages.get(SYSTEM_NODE_ID);
  if (systemMessage) {
    // Check if the system message itself is considered "successful" (it usually is)
    // Or if it has a successful child
    const childNodeId = systemMessage.latestChildNodeId;
    if (childNodeId !== null && childNodeId !== undefined) {
      const firstRealMessage = messages.get(childNodeId);
      if (firstRealMessage && firstRealMessage.type !== "error") {
        return firstRealMessage.messageId ?? null;
      }
    }
    // If no successful child, return the system message ID itself as the root?
    // This matches the class behavior implicitly returning the root ID if nothing else works.
    return systemMessage.messageId ?? null;
  }

  return null; // No successful message found
}

interface BuildEmptyMessageParams {
  messageType: "user" | "assistant";
  parentNodeId: number;
  message?: string;
  files?: FileDescriptor[];
  nodeIdOffset?: number;
}

export const buildEmptyMessage = (params: BuildEmptyMessageParams): Message => {
  // use negative number to avoid conflicts with messageIds
  const tempNodeId = -1 * Date.now() - (params.nodeIdOffset || 0);
  return {
    nodeId: tempNodeId,
    message: params.message || "",
    type: params.messageType,
    files: params.files || [],
    toolCall: null,
    parentNodeId: params.parentNodeId,
    packets: [],
  };
};

export const buildImmediateMessages = (
  parentNodeId: number,
  userInput: string,
  files: FileDescriptor[],
  messageToResend?: Message
): {
  initialUserNode: Message;
  initialAgentNode: Message;
} => {
  // Always create a NEW message with a new nodeId for proper branching.
  // When editing (messageToResend exists), this creates a sibling to the original
  // message since they share the same parentNodeId.
  const initialUserNode = buildEmptyMessage({
    messageType: "user",
    parentNodeId,
    message: userInput,
    files,
  });
  const initialAgentNode = buildEmptyMessage({
    messageType: "assistant",
    parentNodeId: initialUserNode.nodeId,
    nodeIdOffset: 1,
  });

  initialUserNode.childrenNodeIds = [initialAgentNode.nodeId];
  initialUserNode.latestChildNodeId = initialAgentNode.nodeId;

  return {
    initialUserNode,
    initialAgentNode,
  };
};


================================================
FILE: web/src/app/app/services/packetUtils.test.ts
================================================
/**
 * Unit tests for packetUtils functions
 * Tests packet type classification and utility functions
 */

import { Packet, PacketType, Placement } from "./streamingModels";
import {
  isToolPacket,
  isActualToolCallPacket,
  isDisplayPacket,
  isSearchToolPacket,
  isStreamingComplete,
  isFinalAnswerComing,
} from "./packetUtils";

// Helper to create a mock packet with a specific type
function createPacket(
  type: PacketType,
  placement?: Partial<Placement>
): Packet {
  return {
    placement: { turn_index: 0, tab_index: 0, ...placement },
    obj: { type } as any,
  };
}

describe("packetUtils", () => {
  describe("isToolPacket", () => {
    const toolPacketTypes = [
      PacketType.SEARCH_TOOL_START,
      PacketType.SEARCH_TOOL_QUERIES_DELTA,
      PacketType.SEARCH_TOOL_DOCUMENTS_DELTA,
      PacketType.PYTHON_TOOL_START,
      PacketType.PYTHON_TOOL_DELTA,
      PacketType.CUSTOM_TOOL_START,
      PacketType.CUSTOM_TOOL_DELTA,
      PacketType.REASONING_START,
      PacketType.REASONING_DELTA,
      PacketType.FETCH_TOOL_START,
      PacketType.FETCH_TOOL_URLS,
      PacketType.FETCH_TOOL_DOCUMENTS,
      PacketType.DEEP_RESEARCH_PLAN_START,
      PacketType.DEEP_RESEARCH_PLAN_DELTA,
      PacketType.RESEARCH_AGENT_START,
      PacketType.INTERMEDIATE_REPORT_START,
      PacketType.INTERMEDIATE_REPORT_DELTA,
      PacketType.INTERMEDIATE_REPORT_CITED_DOCS,
    ];

    test.each(toolPacketTypes)(
      "returns true for tool packet type: %s",
      (packetType) => {
        const packet = createPacket(packetType);
        expect(isToolPacket(packet, false)).toBe(true);
      }
    );

    test("returns true for SECTION_END when includeSectionEnd is true", () => {
      const packet = createPacket(PacketType.SECTION_END);
      expect(isToolPacket(packet, true)).toBe(true);
    });

    test("returns false for SECTION_END when includeSectionEnd is false", () => {
      const packet = createPacket(PacketType.SECTION_END);
      expect(isToolPacket(packet, false)).toBe(false);
    });

    test("returns true for ERROR when includeSectionEnd is true", () => {
      const packet = createPacket(PacketType.ERROR);
      expect(isToolPacket(packet, true)).toBe(true);
    });

    test("returns false for ERROR when includeSectionEnd is false", () => {
      const packet = createPacket(PacketType.ERROR);
      expect(isToolPacket(packet, false)).toBe(false);
    });

    test("returns false for MESSAGE_START", () => {
      const packet = createPacket(PacketType.MESSAGE_START);
      expect(isToolPacket(packet)).toBe(false);
    });

    test("returns false for STOP", () => {
      const packet = createPacket(PacketType.STOP);
      expect(isToolPacket(packet)).toBe(false);
    });
  });

  describe("isActualToolCallPacket", () => {
    const actualToolCallTypes = [
      PacketType.SEARCH_TOOL_START,
      PacketType.SEARCH_TOOL_QUERIES_DELTA,
      PacketType.SEARCH_TOOL_DOCUMENTS_DELTA,
      PacketType.PYTHON_TOOL_START,
      PacketType.PYTHON_TOOL_DELTA,
      PacketType.CUSTOM_TOOL_START,
      PacketType.CUSTOM_TOOL_DELTA,
      PacketType.FETCH_TOOL_START,
      PacketType.FETCH_TOOL_URLS,
      PacketType.FETCH_TOOL_DOCUMENTS,
      PacketType.DEEP_RESEARCH_PLAN_START,
      PacketType.DEEP_RESEARCH_PLAN_DELTA,
      PacketType.RESEARCH_AGENT_START,
      PacketType.INTERMEDIATE_REPORT_START,
      PacketType.INTERMEDIATE_REPORT_DELTA,
      PacketType.INTERMEDIATE_REPORT_CITED_DOCS,
    ];

    test.each(actualToolCallTypes)(
      "returns true for actual tool call type: %s",
      (packetType) => {
        const packet = createPacket(packetType);
        expect(isActualToolCallPacket(packet)).toBe(true);
      }
    );

    test("returns false for REASONING_START (this is the key fix)", () => {
      const packet = createPacket(PacketType.REASONING_START);
      expect(isActualToolCallPacket(packet)).toBe(false);
    });

    test("returns false for REASONING_DELTA (this is the key fix)", () => {
      const packet = createPacket(PacketType.REASONING_DELTA);
      expect(isActualToolCallPacket(packet)).toBe(false);
    });

    test("returns false for MESSAGE_START", () => {
      const packet = createPacket(PacketType.MESSAGE_START);
      expect(isActualToolCallPacket(packet)).toBe(false);
    });

    test("returns false for STOP", () => {
      const packet = createPacket(PacketType.STOP);
      expect(isActualToolCallPacket(packet)).toBe(false);
    });

    test("returns false for SECTION_END", () => {
      const packet = createPacket(PacketType.SECTION_END);
      expect(isActualToolCallPacket(packet)).toBe(false);
    });

    test("returns false for ERROR", () => {
      const packet = createPacket(PacketType.ERROR);
      expect(isActualToolCallPacket(packet)).toBe(false);
    });

    // Test that isActualToolCallPacket is consistent with isToolPacket
    // (i.e., it's a subset of tool packets minus reasoning)
    test("isActualToolCallPacket is isToolPacket minus reasoning packets", () => {
      // All actual tool call types should also be tool packets
      actualToolCallTypes.forEach((packetType) => {
        const packet = createPacket(packetType);
        expect(isToolPacket(packet, false)).toBe(true);
        expect(isActualToolCallPacket(packet)).toBe(true);
      });

      // Reasoning packets should be tool packets but NOT actual tool calls
      const reasoningPacket1 = createPacket(PacketType.REASONING_START);
      const reasoningPacket2 = createPacket(PacketType.REASONING_DELTA);

      expect(isToolPacket(reasoningPacket1, false)).toBe(true);
      expect(isActualToolCallPacket(reasoningPacket1)).toBe(false);

      expect(isToolPacket(reasoningPacket2, false)).toBe(true);
      expect(isActualToolCallPacket(reasoningPacket2)).toBe(false);
    });
  });

  describe("isDisplayPacket", () => {
    test("returns true for MESSAGE_START", () => {
      const packet = createPacket(PacketType.MESSAGE_START);
      expect(isDisplayPacket(packet)).toBe(true);
    });

    test("returns true for IMAGE_GENERATION_TOOL_START", () => {
      const packet = createPacket(PacketType.IMAGE_GENERATION_TOOL_START);
      expect(isDisplayPacket(packet)).toBe(true);
    });

    test("returns false for other packet types", () => {
      const packet = createPacket(PacketType.SEARCH_TOOL_START);
      expect(isDisplayPacket(packet)).toBe(false);
    });
  });

  describe("isSearchToolPacket", () => {
    test("returns true for SEARCH_TOOL_START", () => {
      const packet = createPacket(PacketType.SEARCH_TOOL_START);
      expect(isSearchToolPacket(packet)).toBe(true);
    });

    test("returns true for SEARCH_TOOL_QUERIES_DELTA", () => {
      const packet = createPacket(PacketType.SEARCH_TOOL_QUERIES_DELTA);
      expect(isSearchToolPacket(packet)).toBe(true);
    });

    test("returns true for SEARCH_TOOL_DOCUMENTS_DELTA", () => {
      const packet = createPacket(PacketType.SEARCH_TOOL_DOCUMENTS_DELTA);
      expect(isSearchToolPacket(packet)).toBe(true);
    });

    test("returns false for other packet types", () => {
      const packet = createPacket(PacketType.PYTHON_TOOL_START);
      expect(isSearchToolPacket(packet)).toBe(false);
    });
  });

  describe("isStreamingComplete", () => {
    test("returns true when packets contain STOP", () => {
      const packets = [
        createPacket(PacketType.MESSAGE_START),
        createPacket(PacketType.MESSAGE_DELTA),
        createPacket(PacketType.STOP),
      ];
      expect(isStreamingComplete(packets)).toBe(true);
    });

    test("returns false when packets do not contain STOP", () => {
      const packets = [
        createPacket(PacketType.MESSAGE_START),
        createPacket(PacketType.MESSAGE_DELTA),
      ];
      expect(isStreamingComplete(packets)).toBe(false);
    });

    test("returns false for empty array", () => {
      expect(isStreamingComplete([])).toBe(false);
    });
  });

  describe("isFinalAnswerComing", () => {
    test("returns true when packets contain MESSAGE_START", () => {
      const packets = [
        createPacket(PacketType.SEARCH_TOOL_START),
        createPacket(PacketType.MESSAGE_START),
      ];
      expect(isFinalAnswerComing(packets)).toBe(true);
    });

    test("returns true when packets contain IMAGE_GENERATION_TOOL_START", () => {
      const packets = [createPacket(PacketType.IMAGE_GENERATION_TOOL_START)];
      expect(isFinalAnswerComing(packets)).toBe(true);
    });

    test("returns false when no display packets present", () => {
      const packets = [
        createPacket(PacketType.SEARCH_TOOL_START),
        createPacket(PacketType.REASONING_START),
      ];
      expect(isFinalAnswerComing(packets)).toBe(false);
    });

    test("returns false for empty array", () => {
      expect(isFinalAnswerComing([])).toBe(false);
    });
  });
});


================================================
FILE: web/src/app/app/services/packetUtils.ts
================================================
import {
  MessageDelta,
  MessageStart,
  PacketType,
  StreamingCitation,
} from "./streamingModels";
import { Packet } from "@/app/app/services/streamingModels";

export function isToolPacket(
  packet: Packet,
  includeSectionEnd: boolean = true
) {
  let toolPacketTypes = [
    PacketType.SEARCH_TOOL_START,
    PacketType.SEARCH_TOOL_QUERIES_DELTA,
    PacketType.SEARCH_TOOL_DOCUMENTS_DELTA,
    PacketType.PYTHON_TOOL_START,
    PacketType.PYTHON_TOOL_DELTA,
    PacketType.TOOL_CALL_ARGUMENT_DELTA,
    PacketType.CUSTOM_TOOL_START,
    PacketType.CUSTOM_TOOL_ARGS,
    PacketType.CUSTOM_TOOL_DELTA,
    PacketType.FILE_READER_START,
    PacketType.FILE_READER_RESULT,
    PacketType.REASONING_START,
    PacketType.REASONING_DELTA,
    PacketType.FETCH_TOOL_START,
    PacketType.FETCH_TOOL_URLS,
    PacketType.FETCH_TOOL_DOCUMENTS,
    PacketType.MEMORY_TOOL_START,
    PacketType.MEMORY_TOOL_DELTA,
    PacketType.MEMORY_TOOL_NO_ACCESS,
    PacketType.DEEP_RESEARCH_PLAN_START,
    PacketType.DEEP_RESEARCH_PLAN_DELTA,
    PacketType.RESEARCH_AGENT_START,
    PacketType.INTERMEDIATE_REPORT_START,
    PacketType.INTERMEDIATE_REPORT_DELTA,
    PacketType.INTERMEDIATE_REPORT_CITED_DOCS,
  ];
  if (includeSectionEnd) {
    toolPacketTypes.push(PacketType.SECTION_END);
    toolPacketTypes.push(PacketType.ERROR);
  }
  return toolPacketTypes.includes(packet.obj.type as PacketType);
}

// Check if a packet is an actual tool call (not reasoning/thinking).
// This is used to determine if we should reset finalAnswerComing state
// when a tool packet arrives after message packets (Claude workaround).
// Reasoning packets should NOT reset finalAnswerComing since they are
// just the model thinking, not actual tool calls that would produce new content.
export function isActualToolCallPacket(packet: Packet): boolean {
  return (
    isToolPacket(packet, false) &&
    packet.obj.type !== PacketType.REASONING_START &&
    packet.obj.type !== PacketType.REASONING_DELTA
  );
}

export function isDisplayPacket(packet: Packet) {
  return (
    packet.obj.type === PacketType.MESSAGE_START ||
    packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START
  );
}

export function isSearchToolPacket(packet: Packet): boolean {
  return (
    packet.obj.type === PacketType.SEARCH_TOOL_START ||
    packet.obj.type === PacketType.SEARCH_TOOL_QUERIES_DELTA ||
    packet.obj.type === PacketType.SEARCH_TOOL_DOCUMENTS_DELTA
  );
}

export function isStreamingComplete(packets: Packet[]) {
  return packets.some((packet) => packet.obj.type === PacketType.STOP);
}

export function isFinalAnswerComing(packets: Packet[]) {
  return packets.some(
    (packet) =>
      packet.obj.type === PacketType.MESSAGE_START ||
      packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START
  );
}

export function isFinalAnswerComplete(packets: Packet[]) {
  // Find the first MESSAGE_START packet and get its index
  const messageStartPacket = packets.find(
    (packet) =>
      packet.obj.type === PacketType.MESSAGE_START ||
      packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START
  );

  if (!messageStartPacket) {
    return false;
  }

  // Check if there's a corresponding SECTION_END or ERROR with the same turn_index
  return packets.some(
    (packet) =>
      (packet.obj.type === PacketType.SECTION_END ||
        packet.obj.type === PacketType.ERROR) &&
      packet.placement.turn_index === messageStartPacket.placement.turn_index
  );
}

export function groupPacketsByTurnIndex(
  packets: Packet[]
): { turn_index: number; tab_index: number; packets: Packet[] }[] {
  /*
  Group packets by (turn_index, tab_index). 
  Ordered from lowest turn_index to highest, then by tab_index within each turn.
  This supports parallel tool calls where multiple tools share the same turn_index
  but have different tab_index values.
  */
  const groups = packets.reduce(
    (
      acc: Map<
        string,
        { turn_index: number; tab_index: number; packets: Packet[] }
      >,
      packet
    ) => {
      const turn_index = packet.placement.turn_index;
      const tab_index = packet.placement.tab_index ?? 0;
      const key = `${turn_index}-${tab_index}`;
      if (!acc.has(key)) {
        acc.set(key, { turn_index, tab_index, packets: [] });
      }
      acc.get(key)!.packets.push(packet);
      return acc;
    },
    new Map()
  );

  // Convert to array and sort by turn_index first, then tab_index
  return Array.from(groups.values()).sort((a, b) => {
    if (a.turn_index !== b.turn_index) {
      return a.turn_index - b.turn_index;
    }
    return a.tab_index - b.tab_index;
  });
}

export function getTextContent(packets: Packet[]) {
  return packets
    .map((packet) => {
      if (
        packet.obj.type === PacketType.MESSAGE_START ||
        packet.obj.type === PacketType.MESSAGE_DELTA
      ) {
        return (packet.obj as MessageStart | MessageDelta).content || "";
      }
      return "";
    })
    .join("");
}

export function getCitations(packets: Packet[]): StreamingCitation[] {
  const citations: StreamingCitation[] = [];
  const seenDocIds = new Set<string>();

  packets.forEach((packet) => {
    if (packet.obj.type === PacketType.CITATION_INFO) {
      // Individual citation packet from backend
      const citationInfo = packet.obj as {
        citation_number: number;
        document_id: string;
      };
      if (!seenDocIds.has(citationInfo.document_id)) {
        seenDocIds.add(citationInfo.document_id);
        citations.push({
          citation_num: citationInfo.citation_number,
          document_id: citationInfo.document_id,
        });
      }
    }
  });

  return citations;
}


================================================
FILE: web/src/app/app/services/searchParams.ts
================================================
import { ReadonlyURLSearchParams } from "next/navigation";

// search params
export const SEARCH_PARAM_NAMES = {
  CHAT_ID: "chatId",
  SEARCH_ID: "searchId",
  PERSONA_ID: "agentId",
  PROJECT_ID: "projectId",
  ALL_MY_DOCUMENTS: "allMyDocuments",
  // overrides
  TEMPERATURE: "temperature",
  MODEL_VERSION: "model-version",
  SYSTEM_PROMPT: "system-prompt",
  STRUCTURED_MODEL: "structured-model",
  // user message
  USER_PROMPT: "user-prompt",
  SUBMIT_ON_LOAD: "submit-on-load",
  // chat title
  TITLE: "title",
  FILES: "files",
  // for seeding chats
  SEEDED: "seeded",
  SEND_ON_LOAD: "send-on-load",

  // when sending a message for the first time, we don't want to reload the page
  // and cause a re-render
  SKIP_RELOAD: "skip-reload",
};

export function shouldSubmitOnLoad(
  searchParams: ReadonlyURLSearchParams | null
) {
  const rawSubmitOnLoad = searchParams?.get(SEARCH_PARAM_NAMES.SUBMIT_ON_LOAD);
  if (rawSubmitOnLoad === "true" || rawSubmitOnLoad === "1") {
    return true;
  }
  return false;
}


================================================
FILE: web/src/app/app/services/streamingModels.ts
================================================
import { OnyxDocument } from "@/lib/search/interfaces";

// Base interface for all streaming objects
interface BaseObj {
  type: string;
}

export enum PacketType {
  MESSAGE_START = "message_start",
  MESSAGE_DELTA = "message_delta",
  MESSAGE_END = "message_end",

  STOP = "stop",
  SECTION_END = "section_end",
  TOP_LEVEL_BRANCHING = "top_level_branching",
  ERROR = "error",

  // Specific tool packets
  SEARCH_TOOL_START = "search_tool_start",
  SEARCH_TOOL_QUERIES_DELTA = "search_tool_queries_delta",
  SEARCH_TOOL_DOCUMENTS_DELTA = "search_tool_documents_delta",
  IMAGE_GENERATION_TOOL_START = "image_generation_start",
  IMAGE_GENERATION_TOOL_DELTA = "image_generation_final",
  PYTHON_TOOL_START = "python_tool_start",
  PYTHON_TOOL_DELTA = "python_tool_delta",
  FETCH_TOOL_START = "open_url_start",
  FETCH_TOOL_URLS = "open_url_urls",
  FETCH_TOOL_DOCUMENTS = "open_url_documents",

  // Tool call argument delta (streams tool args before tool executes)
  TOOL_CALL_ARGUMENT_DELTA = "tool_call_argument_delta",

  // Custom tool packets
  CUSTOM_TOOL_START = "custom_tool_start",
  CUSTOM_TOOL_ARGS = "custom_tool_args",
  CUSTOM_TOOL_DELTA = "custom_tool_delta",

  // File reader tool packets
  FILE_READER_START = "file_reader_start",
  FILE_READER_RESULT = "file_reader_result",
  // Memory tool packets
  MEMORY_TOOL_START = "memory_tool_start",
  MEMORY_TOOL_DELTA = "memory_tool_delta",
  MEMORY_TOOL_NO_ACCESS = "memory_tool_no_access",

  // Reasoning packets
  REASONING_START = "reasoning_start",
  REASONING_DELTA = "reasoning_delta",
  REASONING_DONE = "reasoning_done",

  // Citation packets
  CITATION_START = "citation_start",
  CITATION_END = "citation_end",
  // Backend sends individual citation_info packets during streaming
  CITATION_INFO = "citation_info",

  // Deep Research packets
  DEEP_RESEARCH_PLAN_START = "deep_research_plan_start",
  DEEP_RESEARCH_PLAN_DELTA = "deep_research_plan_delta",
  RESEARCH_AGENT_START = "research_agent_start",
  INTERMEDIATE_REPORT_START = "intermediate_report_start",
  INTERMEDIATE_REPORT_DELTA = "intermediate_report_delta",
  INTERMEDIATE_REPORT_CITED_DOCS = "intermediate_report_cited_docs",
}

export const CODE_INTERPRETER_TOOL_TYPES = {
  PYTHON: "python",
} as const;

// Basic Message Packets
export interface MessageStart extends BaseObj {
  id: string;
  type: "message_start";
  content: string;

  final_documents: OnyxDocument[] | null;
  pre_answer_processing_seconds?: number;
}

export interface MessageDelta extends BaseObj {
  content: string;
  type: "message_delta";
}

export interface MessageEnd extends BaseObj {
  type: "message_end";
}

// Control Packets
export enum StopReason {
  FINISHED = "finished",
  USER_CANCELLED = "user_cancelled",
}

export interface Stop extends BaseObj {
  type: "stop";
  stop_reason?: StopReason;
}

export interface SectionEnd extends BaseObj {
  type: "section_end";
}

export interface TopLevelBranching extends BaseObj {
  type: "top_level_branching";
  num_parallel_branches: number;
}

export interface PacketError extends BaseObj {
  type: "error";
  message?: string;
}

// Specific tool packets
export interface SearchToolStart extends BaseObj {
  type: "search_tool_start";
  is_internet_search?: boolean;
}

export interface SearchToolQueriesDelta extends BaseObj {
  type: "search_tool_queries_delta";
  queries: string[];
}

export interface SearchToolDocumentsDelta extends BaseObj {
  type: "search_tool_documents_delta";
  documents: OnyxDocument[];
}

export type ImageShape = "square" | "landscape" | "portrait";

interface GeneratedImage {
  file_id: string;
  url: string;
  revised_prompt: string;
  shape?: ImageShape;
}

export interface ImageGenerationToolStart extends BaseObj {
  type: "image_generation_start";
}

export interface ImageGenerationToolDelta extends BaseObj {
  type: "image_generation_final";
  images: GeneratedImage[];
}

export interface PythonToolStart extends BaseObj {
  type: "python_tool_start";
  code: string;
}

export interface PythonToolDelta extends BaseObj {
  type: "python_tool_delta";
  stdout: string;
  stderr: string;
  file_ids: string[];
}

export interface ToolCallArgumentDelta extends BaseObj {
  type: "tool_call_argument_delta";
  tool_type: string;
  tool_id: string;
  argument_deltas: Record<string, unknown>;
}

export interface FetchToolStart extends BaseObj {
  type: "open_url_start";
}

export interface FetchToolUrls extends BaseObj {
  type: "open_url_urls";
  urls: string[];
}

export interface FetchToolDocuments extends BaseObj {
  type: "open_url_documents";
  documents: OnyxDocument[];
}

// Custom Tool Packets
export interface CustomToolErrorInfo {
  is_auth_error: boolean;
  status_code: number;
  message: string;
}

export interface CustomToolStart extends BaseObj {
  type: "custom_tool_start";
  tool_name: string;
  tool_id?: number | null;
}

export interface CustomToolArgs extends BaseObj {
  type: "custom_tool_args";
  tool_name: string;
  tool_args: Record<string, any>;
}

export interface CustomToolDelta extends BaseObj {
  type: "custom_tool_delta";
  tool_name: string;
  tool_id?: number | null;
  response_type: string;
  data?: any;
  file_ids?: string[] | null;
  error?: CustomToolErrorInfo | null;
}

// File Reader Packets
export interface FileReaderStart extends BaseObj {
  type: "file_reader_start";
}

export interface FileReaderResult extends BaseObj {
  type: "file_reader_result";
  file_name: string;
  file_id: string;
  start_char: number;
  end_char: number;
  total_chars: number;
  preview_start: string;
  preview_end: string;
}
// Memory Tool Packets
export interface MemoryToolStart extends BaseObj {
  type: "memory_tool_start";
}

export interface MemoryToolDelta extends BaseObj {
  type: "memory_tool_delta";
  memory_text: string;
  operation: "add" | "update";
  memory_id: number | null;
  index: number | null;
}

export interface MemoryToolNoAccess extends BaseObj {
  type: "memory_tool_no_access";
}

// Reasoning Packets
export interface ReasoningStart extends BaseObj {
  type: "reasoning_start";
}

export interface ReasoningDelta extends BaseObj {
  type: "reasoning_delta";
  reasoning: string;
}

export interface ReasoningDone extends BaseObj {
  type: "reasoning_done";
}

// Citation Packets
export interface StreamingCitation {
  citation_num: number;
  document_id: string;
}

export interface CitationStart extends BaseObj {
  type: "citation_start";
}

// Individual citation info packet (sent during streaming from backend)
export interface CitationInfo extends BaseObj {
  type: "citation_info";
  citation_number: number;
  document_id: string;
}

// Deep Research Plan Packets
export interface DeepResearchPlanStart extends BaseObj {
  type: "deep_research_plan_start";
}

export interface DeepResearchPlanDelta extends BaseObj {
  type: "deep_research_plan_delta";
  content: string;
}

export interface ResearchAgentStart extends BaseObj {
  type: "research_agent_start";
  research_task: string;
}

export interface IntermediateReportStart extends BaseObj {
  type: "intermediate_report_start";
}

export interface IntermediateReportDelta extends BaseObj {
  type: "intermediate_report_delta";
  content: string;
}

export interface IntermediateReportCitedDocs extends BaseObj {
  type: "intermediate_report_cited_docs";
  cited_docs: OnyxDocument[] | null;
}

export type ChatObj = MessageStart | MessageDelta | MessageEnd;

export type StopObj = Stop;

export type SectionEndObj = SectionEnd;

export type TopLevelBranchingObj = TopLevelBranching;

export type PacketErrorObj = PacketError;

// Specific tool objects
export type SearchToolObj =
  | SearchToolStart
  | SearchToolQueriesDelta
  | SearchToolDocumentsDelta
  | SectionEnd
  | PacketError;
export type ImageGenerationToolObj =
  | ImageGenerationToolStart
  | ImageGenerationToolDelta
  | SectionEnd
  | PacketError;
export type PythonToolObj =
  | PythonToolStart
  | PythonToolDelta
  | ToolCallArgumentDelta
  | SectionEnd
  | PacketError;
export type FetchToolObj =
  | FetchToolStart
  | FetchToolUrls
  | FetchToolDocuments
  | SectionEnd
  | PacketError;
export type CustomToolObj =
  | CustomToolStart
  | CustomToolArgs
  | CustomToolDelta
  | SectionEnd
  | PacketError;
export type FileReaderToolObj =
  | FileReaderStart
  | FileReaderResult
  | SectionEnd
  | PacketError;
export type MemoryToolObj =
  | MemoryToolStart
  | MemoryToolDelta
  | MemoryToolNoAccess
  | SectionEnd
  | PacketError;
export type NewToolObj =
  | SearchToolObj
  | ImageGenerationToolObj
  | PythonToolObj
  | FetchToolObj
  | CustomToolObj
  | FileReaderToolObj
  | MemoryToolObj;

export type ReasoningObj =
  | ReasoningStart
  | ReasoningDelta
  | ReasoningDone
  | SectionEnd
  | PacketError;

export type CitationObj =
  | CitationStart
  | CitationInfo
  | SectionEnd
  | PacketError;

export type DeepResearchPlanObj =
  | DeepResearchPlanStart
  | DeepResearchPlanDelta
  | SectionEnd;

export type ResearchAgentObj =
  | ResearchAgentStart
  | IntermediateReportStart
  | IntermediateReportDelta
  | IntermediateReportCitedDocs
  | SectionEnd;

// Union type for all possible streaming objects
export type ObjTypes =
  | ChatObj
  | NewToolObj
  | ReasoningObj
  | StopObj
  | SectionEndObj
  | TopLevelBranchingObj
  | CitationObj
  | DeepResearchPlanObj
  | ResearchAgentObj
  | PacketErrorObj
  | CitationObj;

// Placement interface for packet positioning
export interface Placement {
  turn_index: number;
  tab_index?: number; // For parallel tool calls - tools with same turn_index but different tab_index run in parallel
  sub_turn_index?: number | null;
  model_index?: number | null; // For multi-model answer generation - identifies which model produced this packet
}

// Packet wrapper for streaming objects
export interface Packet {
  placement: Placement;
  obj: ObjTypes;
}

export interface ChatPacket {
  placement: Placement;
  obj: ChatObj;
}

export interface StopPacket {
  placement: Placement;
  obj: StopObj;
}

export interface CitationPacket {
  placement: Placement;
  obj: CitationObj;
}

// New specific tool packet types
export interface SearchToolPacket {
  placement: Placement;
  obj: SearchToolObj;
}

export interface ImageGenerationToolPacket {
  placement: Placement;
  obj: ImageGenerationToolObj;
}

export interface PythonToolPacket {
  placement: Placement;
  obj: PythonToolObj;
}

export interface FetchToolPacket {
  placement: Placement;
  obj: FetchToolObj;
}

export interface CustomToolPacket {
  placement: Placement;
  obj: CustomToolObj;
}

export interface FileReaderToolPacket {
  placement: Placement;
  obj: FileReaderToolObj;
}
export interface MemoryToolPacket {
  placement: Placement;
  obj: MemoryToolObj;
}

export interface ReasoningPacket {
  placement: Placement;
  obj: ReasoningObj;
}

export interface SectionEndPacket {
  placement: Placement;
  obj: SectionEndObj;
}

export interface TopLevelBranchingPacket {
  placement: Placement;
  obj: TopLevelBranchingObj;
}

export interface DeepResearchPlanPacket {
  placement: Placement;
  obj: DeepResearchPlanObj;
}

export interface ResearchAgentPacket {
  placement: Placement;
  obj: ResearchAgentObj;
}


================================================
FILE: web/src/app/app/services/thinkingTokens.ts
================================================
import { JSX } from "react";

/**
 * Utility functions to handle thinking tokens in AI messages
 */

/**
 * Check if a message contains complete thinking tokens
 */
export function hasCompletedThinkingTokens(
  content: string | JSX.Element
): boolean {
  if (typeof content !== "string") return false;

  return (
    /<think>[\s\S]*?<\/think>/.test(content) ||
    /<thinking>[\s\S]*?<\/thinking>/.test(content)
  );
}

/**
 * Check if a message contains partial thinking tokens (streaming)
 */
export function hasPartialThinkingTokens(
  content: string | JSX.Element
): boolean {
  if (typeof content !== "string") return false;

  // Count opening and closing tags
  const thinkOpenCount = (content.match(/<think>/g) || []).length;
  const thinkCloseCount = (content.match(/<\/think>/g) || []).length;
  const thinkingOpenCount = (content.match(/<thinking>/g) || []).length;
  const thinkingCloseCount = (content.match(/<\/thinking>/g) || []).length;

  // Return true if we have any unmatched tags
  return (
    thinkOpenCount > thinkCloseCount || thinkingOpenCount > thinkingCloseCount
  );
}

/**
 * Extract thinking content from a message
 */
export function extractThinkingContent(content: string | JSX.Element): string {
  if (typeof content !== "string") return "";

  // For complete thinking tags, extract all sections
  const completeThinkRegex = /<think>[\s\S]*?<\/think>/g;
  const completeThinkingRegex = /<thinking>[\s\S]*?<\/thinking>/g;

  const thinkMatches = Array.from(content.matchAll(completeThinkRegex));
  const thinkingMatches = Array.from(content.matchAll(completeThinkingRegex));

  if (thinkMatches.length > 0 || thinkingMatches.length > 0) {
    // Combine all matches and sort by their position in the original string
    const allMatches = [...thinkMatches, ...thinkingMatches].sort(
      (a, b) => (a.index || 0) - (b.index || 0)
    );
    return allMatches.map((match) => match[0]).join("\n");
  }

  // For partial thinking tokens (streaming)
  if (hasPartialThinkingTokens(content)) {
    // Find the last opening tag position
    const lastThinkPos = content.lastIndexOf("<think>");
    const lastThinkingPos = content.lastIndexOf("<thinking>");

    // Use the position of whichever tag appears last
    const startPos = Math.max(lastThinkPos, lastThinkingPos);

    if (startPos >= 0) {
      // Extract everything from the last opening tag to the end
      return content.substring(startPos);
    }
  }

  return "";
}

/**
 * Check if thinking tokens are complete
 */
export function isThinkingComplete(content: string | JSX.Element): boolean {
  if (typeof content !== "string") return false;

  // Count opening and closing tags
  const thinkOpenCount = (content.match(/<think>/g) || []).length;
  const thinkCloseCount = (content.match(/<\/think>/g) || []).length;
  const thinkingOpenCount = (content.match(/<thinking>/g) || []).length;
  const thinkingCloseCount = (content.match(/<\/thinking>/g) || []).length;

  // All tags must be matched
  return (
    thinkOpenCount === thinkCloseCount &&
    thinkingOpenCount === thinkingCloseCount
  );
}

/**
 * Remove thinking tokens from content
 */
export function removeThinkingTokens(
  content: string | JSX.Element
): string | JSX.Element {
  if (typeof content !== "string") return content;

  // First, remove complete thinking blocks
  let result = content.replace(/<think>[\s\S]*?<\/think>/g, "");
  result = result.replace(/<thinking>[\s\S]*?<\/thinking>/g, "");

  // Handle case where there's an incomplete thinking token at the end
  if (hasPartialThinkingTokens(result)) {
    // Find the last opening tag position
    const lastThinkPos = result.lastIndexOf("<think>");
    const lastThinkingPos = result.lastIndexOf("<thinking>");

    // Use the position of whichever tag appears last
    const startPos = Math.max(lastThinkPos, lastThinkingPos);

    if (startPos >= 0) {
      // Only keep content before the last opening tag
      result = result.substring(0, startPos);
    }
  }

  return result.trim();
}

// /**
//  * Clean the extracted thinking content (remove tags)
//  */
export function cleanThinkingContent(thinkingContent: string): string {
  if (!thinkingContent) return "";

  return thinkingContent
    .replace(/<think>|<\/think>|<thinking>|<\/thinking>/g, "")
    .trim();
}


================================================
FILE: web/src/app/app/settings/accounts-access/page.tsx
================================================
"use client";

import { useEffect } from "react";
import { useRouter } from "next/navigation";
import { useUser } from "@/providers/UserProvider";
import { useAuthType } from "@/lib/hooks";
import { AuthType } from "@/lib/constants";
import { AccountsAccessSettings } from "@/refresh-pages/SettingsPage";

export default function AccountsAccessPage() {
  const router = useRouter();
  const { user } = useUser();
  const authType = useAuthType();

  const showPasswordSection = Boolean(user?.password_configured);
  const showTokensSection = authType !== null;
  const hasAccess = showPasswordSection || showTokensSection;

  // Only redirect after authType has loaded to avoid redirecting during loading state
  const isAuthTypeLoaded = authType !== null;

  useEffect(() => {
    if (isAuthTypeLoaded && !hasAccess) {
      router.replace("/app/settings/general");
    }
  }, [isAuthTypeLoaded, hasAccess, router]);

  // Don't render content until authType is loaded and access is determined
  if (!isAuthTypeLoaded || !hasAccess) {
    return null;
  }

  return <AccountsAccessSettings />;
}


================================================
FILE: web/src/app/app/settings/chat-preferences/page.tsx
================================================
import { ChatPreferencesSettings } from "@/refresh-pages/SettingsPage";

export default function ChatPreferencesPage() {
  return <ChatPreferencesSettings />;
}


================================================
FILE: web/src/app/app/settings/connectors/page.tsx
================================================
import { ConnectorsSettings } from "@/refresh-pages/SettingsPage";

export default function ConnectorsPage() {
  return <ConnectorsSettings />;
}


================================================
FILE: web/src/app/app/settings/general/page.tsx
================================================
import { GeneralSettings } from "@/refresh-pages/SettingsPage";

export default function GeneralSettingsPage() {
  return <GeneralSettings />;
}


================================================
FILE: web/src/app/app/settings/layout.tsx
================================================
"use client";

import { usePathname } from "next/navigation";
import * as AppLayouts from "@/layouts/app-layouts";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { SidebarTab } from "@opal/components";
import { SvgSliders } from "@opal/icons";
import { useUser } from "@/providers/UserProvider";
import { useAuthType } from "@/lib/hooks";
import { Section } from "@/layouts/general-layouts";

interface LayoutProps {
  children: React.ReactNode;
}

export default function Layout({ children }: LayoutProps) {
  const pathname = usePathname();
  const { user } = useUser();
  const authType = useAuthType();

  const showPasswordSection = Boolean(user?.password_configured);
  const showTokensSection = authType !== null;
  const showAccountsAccessTab = showPasswordSection || showTokensSection;

  return (
    <AppLayouts.Root>
      <SettingsLayouts.Root width="lg">
        <SettingsLayouts.Header icon={SvgSliders} title="Settings" separator />

        <SettingsLayouts.Body>
          <Section
            flexDirection="row"
            justifyContent="start"
            alignItems="start"
            gap={1.5}
          >
            {/* Left: Tab Navigation */}
            <div
              data-testid="settings-left-tab-navigation"
              className="flex flex-col px-2 min-w-[12.5rem]"
            >
              <SidebarTab
                href="/app/settings/general"
                selected={pathname === "/app/settings/general"}
              >
                General
              </SidebarTab>
              <SidebarTab
                href="/app/settings/chat-preferences"
                selected={pathname === "/app/settings/chat-preferences"}
              >
                Chat Preferences
              </SidebarTab>
              {showAccountsAccessTab && (
                <SidebarTab
                  href="/app/settings/accounts-access"
                  selected={pathname === "/app/settings/accounts-access"}
                >
                  Accounts & Access
                </SidebarTab>
              )}
              <SidebarTab
                href="/app/settings/connectors"
                selected={pathname === "/app/settings/connectors"}
              >
                Connectors
              </SidebarTab>
            </div>

            {/* Right: Tab Content */}
            {children}
          </Section>
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>
    </AppLayouts.Root>
  );
}


================================================
FILE: web/src/app/app/settings/page.tsx
================================================
import { redirect } from "next/navigation";

export default function SettingsPage() {
  redirect("/app/settings/general");
}


================================================
FILE: web/src/app/app/shared/[chatId]/SharedChatDisplay.tsx
================================================
"use client";

import { useState } from "react";
import { humanReadableFormat } from "@/lib/time";
import { BackendChatSession } from "@/app/app/interfaces";
import { processRawChatHistory } from "@/app/app/services/lib";
import { getLatestMessageChain } from "@/app/app/services/messageTree";
import HumanMessage from "@/app/app/message/HumanMessage";
import AgentMessage from "@/app/app/message/messageComponents/AgentMessage";
import OnyxInitializingLoader from "@/components/OnyxInitializingLoader";
import { Section } from "@/layouts/general-layouts";
import { IllustrationContent } from "@opal/layouts";
import SvgNotFound from "@opal/illustrations/not-found";
import { Button } from "@opal/components";
import { Persona } from "@/app/admin/agents/interfaces";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import PreviewModal from "@/sections/modals/PreviewModal";
import { UNNAMED_CHAT } from "@/lib/constants";
import Text from "@/refresh-components/texts/Text";
import useOnMount from "@/hooks/useOnMount";
import SharedAppInputBar from "@/sections/input/SharedAppInputBar";

export interface SharedChatDisplayProps {
  chatSession: BackendChatSession | null;
  persona: Persona;
}

export default function SharedChatDisplay({
  chatSession,
  persona,
}: SharedChatDisplayProps) {
  const [presentingDocument, setPresentingDocument] =
    useState<MinimalOnyxDocument | null>(null);

  const isMounted = useOnMount();

  if (!chatSession) {
    return (
      <div className="h-full w-full flex flex-col items-center justify-center">
        <Section flexDirection="column" alignItems="center" gap={1}>
          <IllustrationContent
            illustration={SvgNotFound}
            title="Shared chat not found"
            description="Did not find a shared chat with the specified ID."
          />
          <Button href="/app" prominence="secondary">
            Start a new chat
          </Button>
        </Section>
      </div>
    );
  }

  const messages = getLatestMessageChain(
    processRawChatHistory(chatSession.messages, chatSession.packets)
  );

  const firstMessage = messages[0];

  if (firstMessage === undefined) {
    return (
      <div className="h-full w-full flex flex-col items-center justify-center">
        <Section flexDirection="column" alignItems="center" gap={1}>
          <IllustrationContent
            illustration={SvgNotFound}
            title="Shared chat not found"
            description="No messages found in shared chat."
          />
          <Button href="/app" prominence="secondary">
            Start a new chat
          </Button>
        </Section>
      </div>
    );
  }

  return (
    <>
      {presentingDocument && (
        <PreviewModal
          presentingDocument={presentingDocument}
          onClose={() => setPresentingDocument(null)}
        />
      )}

      <div className="flex flex-col h-full w-full overflow-hidden">
        <div className="flex-1 flex flex-col items-center overflow-y-auto">
          <div className="sticky top-0 z-10 flex items-center justify-between w-full bg-background-tint-01 px-8 py-4">
            <Text as="p" text04 headingH2>
              {chatSession.description || UNNAMED_CHAT}
            </Text>
            <div className="flex flex-col items-end">
              <Text as="p" text03 secondaryBody>
                Shared on {humanReadableFormat(chatSession.time_created)}
              </Text>
              {chatSession.owner_name && (
                <Text as="p" text03 secondaryBody>
                  by {chatSession.owner_name}
                </Text>
              )}
            </div>
          </div>

          {isMounted ? (
            <div className="w-[min(50rem,100%)]">
              {messages.map((message, i) => {
                if (message.type === "user") {
                  return (
                    <HumanMessage
                      key={message.messageId}
                      content={message.message}
                      files={message.files}
                      nodeId={message.nodeId}
                    />
                  );
                } else if (message.type === "assistant") {
                  return (
                    <AgentMessage
                      key={message.messageId}
                      rawPackets={message.packets}
                      chatState={{
                        agent: persona,
                        docs: message.documents,
                        citations: message.citations,
                        setPresentingDocument: setPresentingDocument,
                        overriddenModel: message.overridden_model,
                      }}
                      nodeId={message.nodeId}
                      llmManager={null}
                      otherMessagesCanSwitchTo={undefined}
                      onMessageSelection={undefined}
                    />
                  );
                } else {
                  // Error message case
                  return (
                    <div key={message.messageId} className="py-5 ml-4 lg:px-5">
                      <div className="mx-auto w-[90%] max-w-message-max">
                        <p className="text-status-text-error-05 text-sm my-auto">
                          {message.message}
                        </p>
                      </div>
                    </div>
                  );
                }
              })}
            </div>
          ) : (
            <div className="h-full w-full flex items-center justify-center">
              <OnyxInitializingLoader />
            </div>
          )}
        </div>

        <div className="w-full max-w-[50rem] mx-auto px-4 pb-4">
          <SharedAppInputBar />
        </div>
      </div>
    </>
  );
}


================================================
FILE: web/src/app/app/shared/[chatId]/page.tsx
================================================
import { fetchSS } from "@/lib/utilsSS";
import { redirect } from "next/navigation";
import type { Route } from "next";
import { requireAuth } from "@/lib/auth/requireAuth";
import SharedChatDisplay from "@/app/app/shared/[chatId]/SharedChatDisplay";
import * as AppLayouts from "@/layouts/app-layouts";
import { Persona } from "@/app/admin/agents/interfaces";

// This is used for rendering a persona in the shared chat display
export function constructMiniFiedPersona(name: string, id: number): Persona {
  return {
    id,
    name,
    is_listed: true,
    is_public: true,
    display_priority: 0,
    description: "",
    document_sets: [],
    tools: [],
    owner: null,
    starter_messages: null,
    builtin_persona: false,
    is_featured: false,
    users: [],
    groups: [],
    user_file_ids: [],
    system_prompt: null,
    task_prompt: null,
    datetime_aware: true,
    replace_base_system_prompt: false,
  };
}

async function getSharedChat(chatId: string) {
  const response = await fetchSS(
    `/chat/get-chat-session/${chatId}?is_shared=True`
  );
  if (response.ok) {
    return await response.json();
  }
  return null;
}

export interface PageProps {
  params: Promise<{ chatId: string }>;
}

export default async function Page(props: PageProps) {
  const params = await props.params;

  const authResult = await requireAuth();
  if (authResult.redirect) {
    return redirect(authResult.redirect as Route);
  }

  // Catch cases where backend is completely unreachable
  // Allows render instead of throwing an exception and crashing
  const chatSession = await getSharedChat(params.chatId).catch(() => null);

  const persona: Persona = constructMiniFiedPersona(
    chatSession?.persona_name ?? "",
    chatSession?.persona_id ?? 0
  );

  return (
    <AppLayouts.Root>
      <SharedChatDisplay chatSession={chatSession} persona={persona} />
    </AppLayouts.Root>
  );
}


================================================
FILE: web/src/app/app/stores/useChatSessionStore.ts
================================================
import { create } from "zustand";
import {
  ChatState,
  RegenerationState,
  Message,
  ChatSessionSharedStatus,
  BackendChatSession,
  FeedbackType,
} from "../interfaces";
import {
  getLatestMessageChain,
  getMessageByMessageId,
  MessageTreeState,
} from "../services/messageTree";
import { useMemo } from "react";

interface ChatSessionData {
  sessionId: string;
  messageTree: MessageTreeState;
  chatState: ChatState;
  regenerationState: RegenerationState | null;
  canContinue: boolean;
  submittedMessage: string;
  maxTokens: number;
  chatSessionSharedStatus: ChatSessionSharedStatus;
  selectedNodeIdForDocDisplay: number | null; // should be the node ID, not the message ID
  abortController: AbortController;
  hasPerformedInitialScroll: boolean;
  documentSidebarVisible: boolean;
  hasSentLocalUserMessage: boolean;

  // Session-specific state (previously global)
  isFetchingChatMessages: boolean;
  uncaughtError: string | null;
  loadingError: string | null;
  isReady: boolean;

  // Session metadata
  lastAccessed: Date;
  isLoaded: boolean;
  description?: string;
  personaId?: number;

  // Streaming duration tracking
  streamingStartTime?: number;
}

interface ChatSessionStore {
  // Session management
  currentSessionId: string | null;
  sessions: Map<string, ChatSessionData>;

  // Actions - Session Management
  setCurrentSession: (sessionId: string | null) => void;
  createSession: (
    sessionId: string,
    initialData?: Partial<ChatSessionData>
  ) => void;
  updateSessionData: (
    sessionId: string,
    updates: Partial<ChatSessionData>
  ) => void;
  updateSessionMessageTree: (
    sessionId: string,
    messageTree: MessageTreeState
  ) => void;
  updateSessionAndMessageTree: (
    sessionId: string,
    messageTree: MessageTreeState
  ) => void;

  // Actions - Message Management
  updateChatState: (sessionId: string, chatState: ChatState) => void;
  updateRegenerationState: (
    sessionId: string,
    state: RegenerationState | null
  ) => void;
  updateCanContinue: (sessionId: string, canContinue: boolean) => void;
  updateSubmittedMessage: (sessionId: string, message: string) => void;
  updateMessageFeedback: (
    sessionId: string,
    messageId: number,
    feedback: string | null
  ) => void;
  updateCurrentMessageFeedback: (
    messageId: number,
    feedback: string | null
  ) => void;
  updateSelectedNodeForDocDisplay: (
    sessionId: string,
    selectedMessageForDocDisplay: number | null
  ) => void;
  updateHasPerformedInitialScroll: (
    sessionId: string,
    hasPerformedInitialScroll: boolean
  ) => void;
  updateDocumentSidebarVisible: (
    sessionId: string,
    documentSidebarVisible: boolean
  ) => void;
  updateCurrentDocumentSidebarVisible: (
    documentSidebarVisible: boolean
  ) => void;
  updateHasSentLocalUserMessage: (
    sessionId: string,
    hasSentLocalUserMessage: boolean
  ) => void;
  updateCurrentHasSentLocalUserMessage: (
    hasSentLocalUserMessage: boolean
  ) => void;

  // Convenience functions that automatically use current session ID
  updateCurrentSelectedNodeForDocDisplay: (
    selectedNodeForDocDisplay: number | null
  ) => void;
  updateCurrentChatSessionSharedStatus: (
    chatSessionSharedStatus: ChatSessionSharedStatus
  ) => void;
  updateCurrentChatState: (chatState: ChatState) => void;
  updateCurrentRegenerationState: (
    regenerationState: RegenerationState | null
  ) => void;
  updateCurrentCanContinue: (canContinue: boolean) => void;
  updateCurrentSubmittedMessage: (submittedMessage: string) => void;

  // Actions - Session-specific State (previously global)
  setIsFetchingChatMessages: (sessionId: string, fetching: boolean) => void;
  setUncaughtError: (sessionId: string, error: string | null) => void;
  setLoadingError: (sessionId: string, error: string | null) => void;
  setIsReady: (sessionId: string, ready: boolean) => void;

  // Actions - Streaming Duration
  setStreamingStartTime: (sessionId: string, time: number | null) => void;
  getStreamingStartTime: (sessionId: string) => number | undefined;

  // Actions - Abort Controllers
  setAbortController: (sessionId: string, controller: AbortController) => void;
  abortSession: (sessionId: string) => void;
  abortAllSessions: () => void;

  // Utilities
  initializeSession: (
    sessionId: string,
    backendSession?: BackendChatSession
  ) => void;
  cleanupOldSessions: (maxSessions?: number) => void;
}

const createInitialSessionData = (
  sessionId: string,
  initialData?: Partial<ChatSessionData>
): ChatSessionData => ({
  sessionId,
  messageTree: new Map<number, Message>(),
  chatState: "input" as ChatState,
  regenerationState: null,
  canContinue: false,
  submittedMessage: "",
  maxTokens: 128_000,
  chatSessionSharedStatus: ChatSessionSharedStatus.Private,
  selectedNodeIdForDocDisplay: null,
  abortController: new AbortController(),
  hasPerformedInitialScroll: true,
  documentSidebarVisible: false,
  hasSentLocalUserMessage: false,

  // Session-specific state defaults
  isFetchingChatMessages: false,
  uncaughtError: null,
  loadingError: null,
  isReady: true,

  lastAccessed: new Date(),
  isLoaded: false,
  ...initialData,
});

export const useChatSessionStore = create<ChatSessionStore>()((set, get) => ({
  // Initial state
  currentSessionId: null,
  sessions: new Map<string, ChatSessionData>(),

  // Session Management Actions
  setCurrentSession: (sessionId: string | null) => {
    set((state) => {
      if (sessionId && !state.sessions.has(sessionId)) {
        // Create new session if it doesn't exist
        const newSession = createInitialSessionData(sessionId);
        const newSessions = new Map(state.sessions);
        newSessions.set(sessionId, newSession);

        return {
          currentSessionId: sessionId,
          sessions: newSessions,
        };
      }

      // Update last accessed for the new current session
      if (sessionId && state.sessions.has(sessionId)) {
        const session = state.sessions.get(sessionId)!;
        const updatedSession = { ...session, lastAccessed: new Date() };
        const newSessions = new Map(state.sessions);
        newSessions.set(sessionId, updatedSession);

        return {
          currentSessionId: sessionId,
          sessions: newSessions,
        };
      }

      return { currentSessionId: sessionId };
    });
  },

  createSession: (
    sessionId: string,
    initialData?: Partial<ChatSessionData>
  ) => {
    set((state) => {
      const newSession = createInitialSessionData(sessionId, initialData);
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, newSession);

      return { sessions: newSessions };
    });
  },

  updateSessionData: (sessionId: string, updates: Partial<ChatSessionData>) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      const updatedSession = {
        ...(session || createInitialSessionData(sessionId)),
        ...updates,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);

      return { sessions: newSessions };
    });
  },

  updateSessionMessageTree: (
    sessionId: string,
    messageTree: MessageTreeState
  ) => {
    get().updateSessionData(sessionId, { messageTree });
  },

  updateSessionAndMessageTree: (
    sessionId: string,
    messageTree: MessageTreeState
  ) => {
    set((state) => {
      // Ensure session exists
      const existingSession = state.sessions.get(sessionId);
      const session = existingSession || createInitialSessionData(sessionId);

      // Update session with new message tree
      const updatedSession = {
        ...session,
        messageTree,
        lastAccessed: new Date(),
      };

      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);

      // Return both updates in a single state change
      return {
        currentSessionId: sessionId,
        sessions: newSessions,
      };
    });
  },

  // Message Management Actions
  updateChatState: (sessionId: string, chatState: ChatState) => {
    get().updateSessionData(sessionId, { chatState });
  },

  updateRegenerationState: (
    sessionId: string,
    regenerationState: RegenerationState | null
  ) => {
    get().updateSessionData(sessionId, { regenerationState });
  },

  updateCanContinue: (sessionId: string, canContinue: boolean) => {
    get().updateSessionData(sessionId, { canContinue });
  },

  updateSubmittedMessage: (sessionId: string, submittedMessage: string) => {
    get().updateSessionData(sessionId, { submittedMessage });
  },

  updateMessageFeedback: (
    sessionId: string,
    messageId: number,
    feedback: string | null
  ) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) {
        console.warn(`Session ${sessionId} not found`);
        return state;
      }

      const message = getMessageByMessageId(session.messageTree, messageId);
      if (!message) {
        console.warn(`Message ${messageId} not found in session ${sessionId}`);
        return state;
      }

      // Create new message object with updated feedback (immutable update)
      const updatedMessage = {
        ...message,
        currentFeedback: feedback as FeedbackType | null,
      };

      // Create new messageTree Map with updated message
      const newMessageTree = new Map(session.messageTree);
      newMessageTree.set(message.nodeId, updatedMessage);

      // Create new session object with new messageTree
      const updatedSession = {
        ...session,
        messageTree: newMessageTree,
        lastAccessed: new Date(),
      };

      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);

      return { sessions: newSessions };
    });
  },

  updateCurrentMessageFeedback: (
    messageId: number,
    feedback: string | null
  ) => {
    const { currentSessionId } = get();
    if (currentSessionId) {
      get().updateMessageFeedback(currentSessionId, messageId, feedback);
    }
  },

  updateSelectedNodeForDocDisplay: (
    sessionId: string,
    selectedMessageForDocDisplay: number | null
  ) => {
    get().updateSessionData(sessionId, {
      selectedNodeIdForDocDisplay: selectedMessageForDocDisplay,
    });
  },

  updateHasPerformedInitialScroll: (
    sessionId: string,
    hasPerformedInitialScroll: boolean
  ) => {
    get().updateSessionData(sessionId, { hasPerformedInitialScroll });
  },

  updateDocumentSidebarVisible: (
    sessionId: string,
    documentSidebarVisible: boolean
  ) => {
    get().updateSessionData(sessionId, { documentSidebarVisible });
  },

  updateCurrentDocumentSidebarVisible: (documentSidebarVisible: boolean) => {
    const { currentSessionId } = get();
    if (currentSessionId) {
      get().updateDocumentSidebarVisible(
        currentSessionId,
        documentSidebarVisible
      );
    }
  },

  updateHasSentLocalUserMessage: (
    sessionId: string,
    hasSentLocalUserMessage: boolean
  ) => {
    get().updateSessionData(sessionId, { hasSentLocalUserMessage });
  },

  updateCurrentHasSentLocalUserMessage: (hasSentLocalUserMessage: boolean) => {
    const { currentSessionId } = get();
    if (currentSessionId) {
      get().updateHasSentLocalUserMessage(
        currentSessionId,
        hasSentLocalUserMessage
      );
    }
  },

  // Convenience functions that automatically use current session ID
  updateCurrentSelectedNodeForDocDisplay: (
    selectedNodeForDocDisplay: number | null
  ) => {
    const { currentSessionId } = get();
    if (currentSessionId) {
      get().updateSelectedNodeForDocDisplay(
        currentSessionId,
        selectedNodeForDocDisplay
      );
    }
  },

  updateCurrentChatSessionSharedStatus: (
    chatSessionSharedStatus: ChatSessionSharedStatus
  ) => {
    const { currentSessionId } = get();
    if (currentSessionId) {
      get().updateSessionData(currentSessionId, { chatSessionSharedStatus });
    }
  },

  updateCurrentChatState: (chatState: ChatState) => {
    const { currentSessionId } = get();
    if (currentSessionId) {
      get().updateChatState(currentSessionId, chatState);
    }
  },

  updateCurrentRegenerationState: (
    regenerationState: RegenerationState | null
  ) => {
    const { currentSessionId } = get();
    if (currentSessionId) {
      get().updateRegenerationState(currentSessionId, regenerationState);
    }
  },

  updateCurrentCanContinue: (canContinue: boolean) => {
    const { currentSessionId } = get();
    if (currentSessionId) {
      get().updateCanContinue(currentSessionId, canContinue);
    }
  },

  updateCurrentSubmittedMessage: (submittedMessage: string) => {
    const { currentSessionId } = get();
    if (currentSessionId) {
      get().updateSubmittedMessage(currentSessionId, submittedMessage);
    }
  },

  // Session-specific State Actions (previously global)
  setIsFetchingChatMessages: (
    sessionId: string,
    isFetchingChatMessages: boolean
  ) => {
    get().updateSessionData(sessionId, { isFetchingChatMessages });
  },

  setUncaughtError: (sessionId: string, uncaughtError: string | null) => {
    get().updateSessionData(sessionId, { uncaughtError });
  },

  setLoadingError: (sessionId: string, loadingError: string | null) => {
    get().updateSessionData(sessionId, { loadingError });
  },

  setIsReady: (sessionId: string, isReady: boolean) => {
    get().updateSessionData(sessionId, { isReady });
  },

  // Streaming Duration Actions
  setStreamingStartTime: (sessionId: string, time: number | null) => {
    get().updateSessionData(sessionId, {
      streamingStartTime: time ?? undefined,
    });
  },

  getStreamingStartTime: (sessionId: string) => {
    return get().sessions.get(sessionId)?.streamingStartTime;
  },

  // Abort Controller Actions
  setAbortController: (sessionId: string, controller: AbortController) => {
    get().updateSessionData(sessionId, { abortController: controller });
  },

  abortSession: (sessionId: string) => {
    const session = get().sessions.get(sessionId);
    if (session?.abortController) {
      session.abortController.abort();
      get().updateSessionData(sessionId, {
        abortController: new AbortController(),
      });
    }
  },

  abortAllSessions: () => {
    const { sessions } = get();
    sessions.forEach((session, sessionId) => {
      if (session.abortController) {
        session.abortController.abort();
        get().updateSessionData(sessionId, {
          abortController: new AbortController(),
        });
      }
    });
  },

  // Utilities
  initializeSession: (
    sessionId: string,
    backendSession?: BackendChatSession
  ) => {
    const initialData: Partial<ChatSessionData> = {
      isLoaded: true,
      description: backendSession?.description,
      personaId: backendSession?.persona_id,
    };

    const existingSession = get().sessions.get(sessionId);
    if (existingSession) {
      get().updateSessionData(sessionId, initialData);
    } else {
      get().createSession(sessionId, initialData);
    }
  },

  cleanupOldSessions: (maxSessions: number = 10) => {
    set((state) => {
      const sortedSessions = Array.from(state.sessions.entries()).sort(
        ([, a], [, b]) => b.lastAccessed.getTime() - a.lastAccessed.getTime()
      );

      if (sortedSessions.length <= maxSessions) {
        return state;
      }

      const sessionsToKeep = sortedSessions.slice(0, maxSessions);
      const sessionsToRemove = sortedSessions.slice(maxSessions);

      // Abort controllers for sessions being removed
      sessionsToRemove.forEach(([, session]) => {
        if (session.abortController) {
          session.abortController.abort();
        }
      });

      const newSessions = new Map(sessionsToKeep);

      return {
        sessions: newSessions,
      };
    });
  },
}));

export const useCurrentMessageTree = () =>
  useChatSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    const currentSession = currentSessionId
      ? sessions.get(currentSessionId)
      : null;
    return currentSession?.messageTree;
  });

export const useCurrentMessageHistory = () => {
  const messageTree = useCurrentMessageTree();
  return useMemo(() => {
    if (!messageTree) {
      return [];
    }
    return getLatestMessageChain(messageTree);
  }, [messageTree]);
};

export const useCurrentChatState = () =>
  useChatSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    const currentSession = currentSessionId
      ? sessions.get(currentSessionId)
      : null;
    return currentSession?.chatState || "input";
  });

export const useUncaughtError = () =>
  useChatSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    const currentSession = currentSessionId
      ? sessions.get(currentSessionId)
      : null;
    return currentSession?.uncaughtError || null;
  });

export const useLoadingError = () =>
  useChatSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    const currentSession = currentSessionId
      ? sessions.get(currentSessionId)
      : null;
    return currentSession?.loadingError || null;
  });

export const useIsReady = () =>
  useChatSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    const currentSession = currentSessionId
      ? sessions.get(currentSessionId)
      : null;
    return currentSession?.isReady ?? true;
  });

export const useDocumentSidebarVisible = () =>
  useChatSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    const currentSession = currentSessionId
      ? sessions.get(currentSessionId)
      : null;
    return currentSession?.documentSidebarVisible || false;
  });

export const useSelectedNodeForDocDisplay = () =>
  useChatSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    const currentSession = currentSessionId
      ? sessions.get(currentSessionId)
      : null;
    return currentSession?.selectedNodeIdForDocDisplay || null;
  });

export const useHasSentLocalUserMessage = () =>
  useChatSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    const currentSession = currentSessionId
      ? sessions.get(currentSessionId)
      : null;
    return currentSession?.hasSentLocalUserMessage || false;
  });

export const useStreamingStartTime = () =>
  useChatSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    const currentSession = currentSessionId
      ? sessions.get(currentSessionId)
      : null;
    return currentSession?.streamingStartTime;
  });


================================================
FILE: web/src/app/auth/create-account/page.tsx
================================================
"use client";

import AuthFlowContainer from "@/components/auth/AuthFlowContainer";
import { REGISTRATION_URL } from "@/lib/constants";
import { Button } from "@opal/components";
import Link from "next/link";
import { SvgImport } from "@opal/icons";

export default function Page() {
  return (
    <AuthFlowContainer>
      <div className="flex flex-col space-y-6">
        <h2 className="text-2xl font-bold text-text-900 text-center">
          Account Not Found
        </h2>
        <p className="text-text-700 max-w-md text-center">
          We couldn&apos;t find your account in our records. To access Onyx, you
          need to either:
        </p>
        <ul className="list-disc text-left text-text-600 w-full pl-6 mx-auto">
          <li>Be invited to an existing Onyx team</li>
          <li>Create a new Onyx team</li>
        </ul>
        <div className="flex justify-center">
          <Button
            href={`${REGISTRATION_URL}/register`}
            width="full"
            icon={SvgImport}
          >
            Create New Organization
          </Button>
        </div>
        <p className="text-sm text-text-500 text-center">
          Have an account with a different email?{" "}
          <Link
            href="/auth/login"
            className="text-action-link-05 hover:underline"
          >
            Sign in
          </Link>
        </p>
      </div>
    </AuthFlowContainer>
  );
}


================================================
FILE: web/src/app/auth/error/AuthErrorContent.tsx
================================================
"use client";

import AuthFlowContainer from "@/components/auth/AuthFlowContainer";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";

import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";

// Maps raw IdP/OAuth error codes to user-friendly messages.
// If the message is a known code, we replace it; otherwise show it as-is.
const ERROR_CODE_MESSAGES: Record<string, string> = {
  access_denied: "Access was denied by your identity provider.",
  login_required: "You need to log in with your identity provider first.",
  consent_required:
    "Your identity provider requires consent before continuing.",
  interaction_required:
    "Additional interaction with your identity provider is required.",
  invalid_scope: "The requested permissions are not available.",
  server_error:
    "Your identity provider encountered an error. Please try again.",
  temporarily_unavailable:
    "Your identity provider is temporarily unavailable. Please try again later.",
};

function resolveMessage(raw: string | null): string | null {
  if (!raw) return null;
  return ERROR_CODE_MESSAGES[raw] ?? raw;
}

interface AuthErrorContentProps {
  message: string | null;
}

function AuthErrorContent({ message: rawMessage }: AuthErrorContentProps) {
  const message = resolveMessage(rawMessage);
  return (
    <AuthFlowContainer>
      <div className="flex flex-col items-center gap-4">
        <Text headingH2 text05>
          Authentication Error
        </Text>
        <Text mainContentBody text03>
          There was a problem with your login attempt.
        </Text>
        {/* TODO: Error card component */}
        <div className="w-full rounded-12 border border-status-error-05 bg-status-error-00 p-4">
          {message ? (
            <Text mainContentBody className="text-status-error-05">
              {message}
            </Text>
          ) : (
            <div className="flex flex-col gap-2 px-4">
              <Text mainContentEmphasis className="text-status-error-05">
                Possible Issues:
              </Text>
              <Text as="li" mainContentBody className="text-status-error-05">
                Incorrect or expired login credentials
              </Text>
              <Text as="li" mainContentBody className="text-status-error-05">
                Temporary authentication system disruption
              </Text>
              <Text as="li" mainContentBody className="text-status-error-05">
                Account access restrictions or permissions
              </Text>
            </div>
          )}
        </div>

        <Button href="/auth/login" width="full">
          Return to Login Page
        </Button>

        <Text mainContentBody text04>
          {NEXT_PUBLIC_CLOUD_ENABLED ? (
            <>
              If you continue to experience problems, please reach out to the
              Onyx team at{" "}
              <a href="mailto:support@onyx.app" className="text-action-link-05">
                support@onyx.app
              </a>
            </>
          ) : (
            "If you continue to experience problems, please reach out to your system administrator for assistance."
          )}
        </Text>
      </div>
    </AuthFlowContainer>
  );
}

export default AuthErrorContent;


================================================
FILE: web/src/app/auth/error/layout.tsx
================================================
export default function AuthErrorLayout({
  children,
}: {
  children: React.ReactNode;
}) {
  // In a production environment, you might want to send this to your error tracking service
  // For example, if using a service like Sentry:
  // captureException(new Error("Authentication error page was accessed unexpectedly"));

  return <>{children}</>;
}


================================================
FILE: web/src/app/auth/error/page.tsx
================================================
"use client";

import AuthErrorContent from "./AuthErrorContent";
import { useSearchParams } from "next/navigation";

function Page() {
  const searchParams = useSearchParams();
  const error = searchParams?.get("error") || null;

  return <AuthErrorContent message={error} />;
}

export default Page;


================================================
FILE: web/src/app/auth/forgot-password/page.tsx
================================================
"use client";
import React, { useState } from "react";
import { forgotPassword } from "./utils";
import AuthFlowContainer from "@/components/auth/AuthFlowContainer";
import Title from "@/components/ui/title";
import { Text } from "@opal/components";
import { markdown } from "@opal/utils";
import Spacer from "@/refresh-components/Spacer";
import Link from "next/link";
import { Button } from "@opal/components";
import { Form, Formik } from "formik";
import * as Yup from "yup";
import { TextFormField } from "@/components/Field";
import { toast } from "@/hooks/useToast";
import { Spinner } from "@/components/Spinner";
import { redirect } from "next/navigation";
import { NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED } from "@/lib/constants";

const ForgotPasswordPage: React.FC = () => {
  const [isWorking, setIsWorking] = useState(false);

  if (!NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED) {
    redirect("/auth/login");
  }

  return (
    <AuthFlowContainer>
      <div className="flex flex-col w-full justify-center">
        <div className="flex">
          <Title className="mb-2 mx-auto font-bold">Forgot Password</Title>
        </div>
        {isWorking && <Spinner />}
        <Formik
          initialValues={{
            email: "",
          }}
          validationSchema={Yup.object().shape({
            email: Yup.string().email().required(),
          })}
          onSubmit={async (values) => {
            setIsWorking(true);
            try {
              await forgotPassword(values.email);
              toast.success(
                "Password reset email sent. Please check your inbox."
              );
            } catch (error) {
              const errorMessage =
                error instanceof Error
                  ? error.message
                  : "An error occurred. Please try again.";
              toast.error(errorMessage);
            } finally {
              setIsWorking(false);
            }
          }}
        >
          {({ isSubmitting }) => (
            <Form className="w-full flex flex-col items-stretch mt-2">
              <TextFormField
                name="email"
                label="Email"
                type="email"
                placeholder="email@yourcompany.com"
              />

              <div className="flex">
                <Button disabled={isSubmitting} type="submit" width="full">
                  Reset Password
                </Button>
              </div>
            </Form>
          )}
        </Formik>
        <Spacer rem={1} />
        <div className="flex">
          <div className="mx-auto">
            <Text as="p">{markdown("[Back to Login](/auth/login)")}</Text>
          </div>
        </div>
      </div>
    </AuthFlowContainer>
  );
};

export default ForgotPasswordPage;


================================================
FILE: web/src/app/auth/forgot-password/utils.ts
================================================
export const forgotPassword = async (email: string): Promise<void> => {
  const response = await fetch(`/api/auth/forgot-password`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ email }),
  });

  if (!response.ok) {
    const error = await response.json();
    const errorMessage =
      error?.detail || "An error occurred during password reset.";
    throw new Error(errorMessage);
  }
};

export const resetPassword = async (
  token: string,
  password: string
): Promise<void> => {
  const response = await fetch(`/api/auth/reset-password`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ token, password }),
  });

  if (!response.ok) {
    const error = await response.json();
    if (error?.detail?.code === "RESET_PASSWORD_INVALID_PASSWORD") {
      throw new Error(error.detail.reason || "Invalid password");
    }
    const errorMessage =
      error?.detail || "An error occurred during password reset.";
    throw new Error(errorMessage);
  }
};


================================================
FILE: web/src/app/auth/impersonate/page.tsx
================================================
"use client";

import AuthFlowContainer from "@/components/auth/AuthFlowContainer";

import { useUser } from "@/providers/UserProvider";
import { redirect, useRouter } from "next/navigation";
import type { Route } from "next";
import { Formik, Form, FormikHelpers } from "formik";
import * as Yup from "yup";
import { toast } from "@/hooks/useToast";
import { TextFormField } from "@/components/Field";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";

const ImpersonateSchema = Yup.object().shape({
  email: Yup.string().email("Invalid email").required("Required"),
  apiKey: Yup.string().required("Required"),
});

export default function ImpersonatePage() {
  const router = useRouter();
  const { user, isCloudSuperuser } = useUser();
  if (!user) {
    redirect("/auth/login");
  }

  if (!isCloudSuperuser) {
    redirect("/app" as Route);
  }

  const handleImpersonate = async (
    values: { email: string; apiKey: string },
    helpers: FormikHelpers<{ email: string; apiKey: string }>
  ) => {
    try {
      const response = await fetch("/api/tenants/impersonate", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
          Authorization: `Bearer ${values.apiKey}`,
        },
        body: JSON.stringify({ email: values.email }),
        credentials: "same-origin",
      });

      if (!response.ok) {
        const errorData = await response.json();
        toast.error(errorData.detail || "Failed to impersonate user");
        helpers.setSubmitting(false);
      } else {
        helpers.setSubmitting(false);
        router.push("/app" as Route);
      }
    } catch (error) {
      toast.error(
        error instanceof Error ? error.message : "Failed to impersonate user"
      );
      helpers.setSubmitting(false);
    }
  };

  return (
    <AuthFlowContainer>
      <div className="flex flex-col w-full justify-center">
        <div className="w-full flex flex-col items-center justify-center">
          <Text as="p" headingH3 className="mb-6 text-center">
            Impersonate User
          </Text>
        </div>

        <Formik
          initialValues={{ email: "", apiKey: "" }}
          validationSchema={ImpersonateSchema}
          onSubmit={(values, helpers) => handleImpersonate(values, helpers)}
        >
          {({ isSubmitting }) => (
            <Form className="flex flex-col gap-4">
              <TextFormField
                name="email"
                type="email"
                label="Email"
                placeholder="email@yourcompany.com"
              />

              <TextFormField
                name="apiKey"
                type="password"
                label="API Key"
                placeholder="Enter API Key"
              />

              <Button disabled={isSubmitting} type="submit" width="full">
                Impersonate User
              </Button>
            </Form>
          )}
        </Formik>

        <Text
          as="p"
          mainUiMuted
          text03
          className="mt-4 text-center px-4"
        >{`Note: This feature is only available for @onyx.app administrators`}</Text>
      </div>
    </AuthFlowContainer>
  );
}


================================================
FILE: web/src/app/auth/join/page.tsx
================================================
import { User } from "@/lib/types";
import {
  getCurrentUserSS,
  getAuthTypeMetadataSS,
  AuthTypeMetadata,
  getAuthUrlSS,
} from "@/lib/userSS";
import { redirect } from "next/navigation";
import EmailPasswordForm from "../login/EmailPasswordForm";
import SignInButton from "@/app/auth/login/SignInButton";
import AuthFlowContainer from "@/components/auth/AuthFlowContainer";
import AuthErrorDisplay from "@/components/auth/AuthErrorDisplay";
import { AuthType } from "@/lib/constants";

const Page = async (props: {
  searchParams?: Promise<{ [key: string]: string | string[] | undefined }>;
}) => {
  const searchParams = await props.searchParams;
  const nextUrl = Array.isArray(searchParams?.next)
    ? searchParams?.next[0]
    : searchParams?.next || null;

  const defaultEmail = Array.isArray(searchParams?.email)
    ? searchParams?.email[0]
    : searchParams?.email || null;

  const teamName = Array.isArray(searchParams?.team)
    ? searchParams?.team[0]
    : searchParams?.team || "your team";

  // catch cases where the backend is completely unreachable here
  // without try / catch, will just raise an exception and the page
  // will not render
  let authTypeMetadata: AuthTypeMetadata | null = null;
  let currentUser: User | null = null;
  try {
    [authTypeMetadata, currentUser] = await Promise.all([
      getAuthTypeMetadataSS(),
      getCurrentUserSS(),
    ]);
  } catch (e) {
    console.log(`Some fetch failed for the login page - ${e}`);
  }

  // if user is already logged in, take them to the main app page
  if (currentUser && currentUser.is_active && !currentUser.is_anonymous_user) {
    if (!authTypeMetadata?.requiresVerification || currentUser.is_verified) {
      return redirect("/app");
    }
    return redirect("/auth/waiting-on-verification");
  }
  const cloud = authTypeMetadata?.authType === AuthType.CLOUD;

  // only enable this page if basic login is enabled
  if (authTypeMetadata?.authType !== AuthType.BASIC && !cloud) {
    return redirect("/app");
  }

  let authUrl: string | null = null;
  if (cloud && authTypeMetadata) {
    authUrl = await getAuthUrlSS(authTypeMetadata.authType, null);
  }
  const emailDomain = defaultEmail?.split("@")[1];

  return (
    <AuthFlowContainer authState="join">
      <AuthErrorDisplay searchParams={searchParams} />

      <>
        <div className="absolute top-10x w-full"></div>
        <div className="flex w-full flex-col justify-center">
          <h2 className="text-center text-xl text-strong font-bold">
            Re-authenticate to join team
          </h2>

          {cloud && authUrl && (
            <div className="w-full justify-center">
              <SignInButton authorizeUrl={authUrl} authType={AuthType.CLOUD} />
              <div className="flex items-center w-full my-4">
                <div className="flex-grow border-t border-background-300"></div>
                <span className="px-4 text-text-500">or</span>
                <div className="flex-grow border-t border-background-300"></div>
              </div>
            </div>
          )}

          <EmailPasswordForm
            isSignup
            isJoin
            shouldVerify={authTypeMetadata?.requiresVerification}
            nextUrl={nextUrl}
            defaultEmail={defaultEmail}
          />
        </div>
      </>
    </AuthFlowContainer>
  );
};

export default Page;


================================================
FILE: web/src/app/auth/lib.ts
================================================
export async function requestEmailVerification(email: string) {
  return await fetch("/api/auth/request-verify-token", {
    headers: {
      "Content-Type": "application/json",
    },
    method: "POST",
    body: JSON.stringify({
      email: email,
    }),
  });
}


================================================
FILE: web/src/app/auth/libSS.ts
================================================
import "server-only";

import { getDomain } from "@/lib/redirectSS";
import { NextRequest, NextResponse } from "next/server";

export async function authErrorRedirect(
  request: NextRequest,
  response: Response,
  redirectStatus?: number
): Promise<NextResponse> {
  const errorUrl = new URL("/auth/error", getDomain(request));
  try {
    const body = await response.json();
    const detail = body?.detail;
    if (typeof detail === "string" && detail) {
      errorUrl.searchParams.set("error", detail);
    }
  } catch {
    // response may not be JSON
  }
  return NextResponse.redirect(errorUrl, redirectStatus);
}


================================================
FILE: web/src/app/auth/login/EmailPasswordForm.test.tsx
================================================
/**
 * Integration Test: Email/Password Authentication Workflow
 *
 * Tests the complete user journey for logging in.
 * This tests the full workflow: form → validation → API call → redirect
 */
import React from "react";
import { render, screen, waitFor, setupUser } from "@tests/setup/test-utils";
import EmailPasswordForm from "./EmailPasswordForm";

// Mock next/navigation (not used by this component, but required by dependencies)
jest.mock("next/navigation", () => ({
  useRouter: () => ({
    push: jest.fn(),
    refresh: jest.fn(),
  }),
}));

describe("Email/Password Login Workflow", () => {
  let fetchSpy: jest.SpyInstance;

  beforeEach(() => {
    jest.clearAllMocks();
    fetchSpy = jest.spyOn(global, "fetch");
  });

  afterEach(() => {
    fetchSpy.mockRestore();
  });

  test("allows user to login with valid credentials", async () => {
    const user = setupUser();

    // Mock POST /api/auth/login
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    render(<EmailPasswordForm isSignup={false} />);

    // User fills out the form using placeholder text
    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);
    const passwordInput = screen.getByPlaceholderText(/∗/);

    await user.type(emailInput, "test@example.com");
    await user.type(passwordInput, "password123");

    // User submits the form
    const loginButton = screen.getByRole("button", { name: /sign in/i });
    await user.click(loginButton);

    // Verify success message is shown after login
    await waitFor(() => {
      expect(screen.getByText(/signed in successfully\./i)).toBeInTheDocument();
    });

    // Verify API was called with correct credentials
    expect(fetchSpy).toHaveBeenCalledWith(
      "/api/auth/login",
      expect.objectContaining({
        method: "POST",
        headers: {
          "Content-Type": "application/x-www-form-urlencoded",
        },
      })
    );

    // Verify the request body contains email and password
    const callArgs = fetchSpy.mock.calls[0];
    const body = callArgs[1].body;
    expect(body.toString()).toContain("username=test%40example.com");
    expect(body.toString()).toContain("password=password123");
  });

  test("shows error message when login fails", async () => {
    const user = setupUser();

    // Mock POST /api/auth/login (failure)
    fetchSpy.mockResolvedValueOnce({
      ok: false,
      status: 401,
      json: async () => ({ detail: "LOGIN_BAD_CREDENTIALS" }),
    } as Response);

    render(<EmailPasswordForm isSignup={false} />);

    // User fills out form with invalid credentials
    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);
    const passwordInput = screen.getByPlaceholderText(/∗/);

    await user.type(emailInput, "wrong@example.com");
    await user.type(passwordInput, "wrongpassword");

    // User submits
    const loginButton = screen.getByRole("button", { name: /sign in/i });
    await user.click(loginButton);

    // Verify field-level error message is displayed (not the toast)
    await waitFor(() => {
      expect(
        screen.getByText(/^Invalid email or password$/i)
      ).toBeInTheDocument();
    });
  });
});

describe("Email/Password Signup Workflow", () => {
  let fetchSpy: jest.SpyInstance;

  beforeEach(() => {
    jest.clearAllMocks();
    fetchSpy = jest.spyOn(global, "fetch");
  });

  afterEach(() => {
    fetchSpy.mockRestore();
  });

  test("allows user to sign up and login with valid credentials", async () => {
    const user = setupUser();

    // Mock POST /api/auth/register
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    // Mock POST /api/auth/login (after successful signup)
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    render(<EmailPasswordForm isSignup={true} />);

    // User fills out the signup form
    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);
    const passwordInput = screen.getByPlaceholderText(/∗/);

    await user.type(emailInput, "newuser@example.com");
    await user.type(passwordInput, "securepassword123");

    // User submits the signup form
    const signupButton = screen.getByRole("button", {
      name: /create account/i,
    });
    await user.click(signupButton);

    // Verify signup API was called
    await waitFor(() => {
      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/auth/register",
        expect.objectContaining({
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
        })
      );
    });

    // Verify signup request body
    const signupCallArgs = fetchSpy.mock.calls[0];
    const signupBody = JSON.parse(signupCallArgs[1].body);
    expect(signupBody).toEqual({
      email: "newuser@example.com",
      username: "newuser@example.com",
      password: "securepassword123",
      referral_source: undefined,
    });

    // Verify login API was called after successful signup
    await waitFor(() => {
      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/auth/login",
        expect.objectContaining({
          method: "POST",
        })
      );
    });

    // Verify success message is shown
    await waitFor(() => {
      expect(
        screen.getByText(/account created\. signing in/i)
      ).toBeInTheDocument();
    });
  });

  test("shows error when email already exists", async () => {
    const user = setupUser();

    // Mock POST /api/auth/register (failure - user exists)
    fetchSpy.mockResolvedValueOnce({
      ok: false,
      status: 400,
      json: async () => ({ detail: "REGISTER_USER_ALREADY_EXISTS" }),
    } as Response);

    render(<EmailPasswordForm isSignup={true} />);

    // User fills out form with existing email
    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);
    const passwordInput = screen.getByPlaceholderText(/∗/);

    await user.type(emailInput, "existing@example.com");
    await user.type(passwordInput, "password123");

    // User submits
    const signupButton = screen.getByRole("button", {
      name: /create account/i,
    });
    await user.click(signupButton);

    // Verify field-level error message is displayed (not the toast)
    await waitFor(() => {
      expect(
        screen.getByText(
          /^An account already exists with the specified email\.$/i
        )
      ).toBeInTheDocument();
    });
  });

  test("shows rate limit error when too many requests", async () => {
    const user = setupUser();

    // Mock POST /api/auth/register (failure - rate limit)
    fetchSpy.mockResolvedValueOnce({
      ok: false,
      status: 429,
      json: async () => ({ detail: "Too many requests" }),
    } as Response);

    render(<EmailPasswordForm isSignup={true} />);

    // User fills out form
    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);
    const passwordInput = screen.getByPlaceholderText(/∗/);

    await user.type(emailInput, "user@example.com");
    await user.type(passwordInput, "password123");

    // User submits
    const signupButton = screen.getByRole("button", {
      name: /create account/i,
    });
    await user.click(signupButton);

    // Verify field-level rate limit message is displayed (not the toast)
    await waitFor(() => {
      expect(
        screen.getByText(/^Too many requests\. Please try again later\.$/i)
      ).toBeInTheDocument();
    });
  });
});


================================================
FILE: web/src/app/auth/login/EmailPasswordForm.tsx
================================================
"use client";

import { toast } from "@/hooks/useToast";
import { basicLogin, basicSignup } from "@/lib/user";
import { Button } from "@opal/components";
import { Form, Formik } from "formik";
import * as Yup from "yup";
import { requestEmailVerification } from "../lib";
import { useMemo, useState } from "react";
import { Spinner } from "@/components/Spinner";
import Link from "next/link";
import { useUser } from "@/providers/UserProvider";
import { FormikField } from "@/refresh-components/form/FormikField";
import { FormField } from "@/refresh-components/form/FormField";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import PasswordInputTypeIn from "@/refresh-components/inputs/PasswordInputTypeIn";
import { validateInternalRedirect } from "@/lib/auth/redirectValidation";
import { APIFormFieldState } from "@/refresh-components/form/types";
import { SvgArrowRightCircle } from "@opal/icons";
import { useCaptcha } from "@/lib/hooks/useCaptcha";
import Spacer from "@/refresh-components/Spacer";

interface EmailPasswordFormProps {
  isSignup?: boolean;
  shouldVerify?: boolean;
  referralSource?: string;
  nextUrl?: string | null;
  defaultEmail?: string | null;
  isJoin?: boolean;
}

export default function EmailPasswordForm({
  isSignup = false,
  shouldVerify,
  referralSource,
  nextUrl,
  defaultEmail,
  isJoin = false,
}: EmailPasswordFormProps) {
  const { user, authTypeMetadata } = useUser();
  const passwordMinLength = authTypeMetadata?.passwordMinLength ?? 8;
  const [isWorking, setIsWorking] = useState<boolean>(false);
  const [apiStatus, setApiStatus] = useState<APIFormFieldState>("loading");
  const [showApiMessage, setShowApiMessage] = useState(false);
  const [errorMessage, setErrorMessage] = useState<string>("");
  const { getCaptchaToken } = useCaptcha();

  const apiMessages = useMemo(
    () => ({
      loading: isSignup
        ? isJoin
          ? "Joining..."
          : "Creating account..."
        : "Signing in...",
      success: isSignup
        ? "Account created. Signing in..."
        : "Signed in successfully.",
      error: errorMessage,
    }),
    [isSignup, isJoin, errorMessage]
  );

  return (
    <>
      {isWorking && <Spinner />}

      <Formik
        initialValues={{
          email: defaultEmail ? defaultEmail.toLowerCase() : "",
          password: "",
        }}
        validateOnChange={true}
        validateOnBlur={true}
        validationSchema={Yup.object().shape({
          email: Yup.string()
            .email()
            .required()
            .transform((value) => value.toLowerCase()),
          password: Yup.string()
            .min(
              passwordMinLength,
              `Password must be at least ${passwordMinLength} characters`
            )
            .required(),
        })}
        onSubmit={async (values: { email: string; password: string }) => {
          // Ensure email is lowercase
          const email: string = values.email.toLowerCase();
          setShowApiMessage(true);
          setApiStatus("loading");
          setErrorMessage("");

          if (isSignup) {
            // login is fast, no need to show a spinner
            setIsWorking(true);

            // Get captcha token for signup (if captcha is enabled)
            const captchaToken = await getCaptchaToken("signup");

            const response = await basicSignup(
              email,
              values.password,
              referralSource,
              captchaToken
            );

            if (!response.ok) {
              setIsWorking(false);

              const errorBody: any = await response.json();
              const errorDetail = errorBody.detail;
              let errorMsg: string = "Unknown error";
              if (errorDetail === "REGISTER_USER_ALREADY_EXISTS") {
                errorMsg =
                  "An account already exists with the specified email.";
              } else if (typeof errorDetail === "string" && errorDetail) {
                errorMsg = errorDetail;
              }
              if (response.status === 429) {
                errorMsg = "Too many requests. Please try again later.";
              }
              setErrorMessage(errorMsg);
              setApiStatus("error");
              toast.error(`Failed to sign up - ${errorMsg}`);
              setIsWorking(false);
              return;
            } else {
              setApiStatus("success");
              toast.success("Account created successfully. Please log in.");
            }
          }

          const loginResponse = await basicLogin(email, values.password);
          if (loginResponse.ok) {
            setApiStatus("success");
            if (isSignup && shouldVerify) {
              await requestEmailVerification(email);
              // Use window.location.href to force a full page reload,
              // ensuring app re-initializes with the new state (including
              // server-side provider values)
              window.location.href = "/auth/waiting-on-verification";
            } else {
              // The searchparam is purely for multi tenant developement purposes.
              // It replicates the behavior of the case where a user
              // has signed up with email / password as the only user to an instance
              // and has just completed verification
              const validatedNextUrl = validateInternalRedirect(nextUrl);
              window.location.href = validatedNextUrl
                ? validatedNextUrl
                : `/app${isSignup && !isJoin ? "?new_team=true" : ""}`;
            }
          } else {
            setIsWorking(false);
            const errorDetail: any = (await loginResponse.json()).detail;
            let errorMsg: string = "Unknown error";
            if (errorDetail === "LOGIN_BAD_CREDENTIALS") {
              errorMsg = "Invalid email or password";
            } else if (errorDetail === "NO_WEB_LOGIN_AND_HAS_NO_PASSWORD") {
              errorMsg = "Create an account to set a password";
            } else if (typeof errorDetail === "string") {
              errorMsg = errorDetail;
            }
            if (loginResponse.status === 429) {
              errorMsg = "Too many requests. Please try again later.";
            }
            setErrorMessage(errorMsg);
            setApiStatus("error");
            toast.error(`Failed to login - ${errorMsg}`);
          }
        }}
      >
        {({ isSubmitting, isValid, dirty, values }) => {
          return (
            <Form className="gap-y-3">
              <FormikField<string>
                name="email"
                render={(field, helper, meta, state) => (
                  <FormField name="email" state={state} className="w-full">
                    <FormField.Label>Email Address</FormField.Label>
                    <FormField.Control>
                      <InputTypeIn
                        {...field}
                        onChange={(e) => {
                          if (showApiMessage && apiStatus === "error") {
                            setShowApiMessage(false);
                            setErrorMessage("");
                            setApiStatus("loading");
                          }
                          field.onChange(e);
                        }}
                        placeholder="email@yourcompany.com"
                        onClear={() => helper.setValue("")}
                        data-testid="email"
                        variant={apiStatus === "error" ? "error" : undefined}
                        showClearButton={false}
                      />
                    </FormField.Control>
                  </FormField>
                )}
              />

              <FormikField<string>
                name="password"
                render={(field, helper, meta, state) => (
                  <FormField name="password" state={state} className="w-full">
                    <FormField.Label>Password</FormField.Label>
                    <FormField.Control>
                      <PasswordInputTypeIn
                        {...field}
                        onChange={(e) => {
                          if (showApiMessage && apiStatus === "error") {
                            setShowApiMessage(false);
                            setErrorMessage("");
                            setApiStatus("loading");
                          }
                          field.onChange(e);
                        }}
                        placeholder="∗∗∗∗∗∗∗∗∗∗∗∗∗∗"
                        onClear={() => helper.setValue("")}
                        data-testid="password"
                        error={apiStatus === "error"}
                        showClearButton={false}
                      />
                    </FormField.Control>
                    {isSignup && !showApiMessage && (
                      <FormField.Message
                        messages={{
                          idle: `Password must be at least ${passwordMinLength} characters`,
                          error: meta.error,
                          success: `Password must be at least ${passwordMinLength} characters`,
                        }}
                      />
                    )}
                    {showApiMessage && (
                      <FormField.APIMessage
                        state={apiStatus}
                        messages={apiMessages}
                      />
                    )}
                  </FormField>
                )}
              />

              <Spacer rem={0.25} />
              <Button
                disabled={isSubmitting || !isValid || !dirty}
                type="submit"
                width="full"
                rightIcon={SvgArrowRightCircle}
              >
                {isJoin ? "Join" : isSignup ? "Create Account" : "Sign In"}
              </Button>
              {user?.is_anonymous_user && (
                <Link
                  href="/app"
                  className="text-xs text-action-link-05 cursor-pointer text-center w-full font-medium mx-auto"
                >
                  <span className="hover:border-b hover:border-dotted hover:border-action-link-05">
                    or continue as guest
                  </span>
                </Link>
              )}
            </Form>
          );
        }}
      </Formik>
    </>
  );
}


================================================
FILE: web/src/app/auth/login/LoginPage.tsx
================================================
"use client";

import { AuthTypeMetadata } from "@/hooks/useAuthTypeMetadata";
import LoginText from "@/app/auth/login/LoginText";
import SignInButton from "@/app/auth/login/SignInButton";
import EmailPasswordForm from "./EmailPasswordForm";
import { AuthType, NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED } from "@/lib/constants";
import { useSendAuthRequiredMessage } from "@/lib/extension/utils";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import Message from "@/refresh-components/messages/Message";

interface LoginPageProps {
  authUrl: string | null;
  authTypeMetadata: AuthTypeMetadata | null;
  nextUrl: string | null;
  hidePageRedirect?: boolean;
  verified?: boolean;
  isFirstUser?: boolean;
}

export default function LoginPage({
  authUrl,
  authTypeMetadata,
  nextUrl,
  hidePageRedirect,
  verified,
  isFirstUser,
}: LoginPageProps) {
  useSendAuthRequiredMessage();

  // Honor any existing nextUrl; only default to new team flow for first users with no nextUrl
  const effectiveNextUrl =
    nextUrl ?? (isFirstUser ? "/app?new_team=true" : null);

  return (
    <div className="flex flex-col w-full justify-center">
      {verified && (
        <Message
          success
          close={false}
          text="Your email has been verified! Please sign in to continue."
          className="w-full mb-4"
        />
      )}
      {authUrl &&
        authTypeMetadata &&
        authTypeMetadata.authType !== AuthType.CLOUD &&
        // basic auth is handled below w/ the EmailPasswordForm
        authTypeMetadata.authType !== AuthType.BASIC && (
          <div className="flex flex-col w-full gap-4">
            <LoginText />
            <SignInButton
              authorizeUrl={authUrl}
              authType={authTypeMetadata?.authType}
            />
          </div>
        )}

      {authTypeMetadata?.authType === AuthType.CLOUD && (
        <div className="w-full justify-center flex flex-col gap-6">
          <LoginText />
          {authUrl && authTypeMetadata && (
            <>
              <SignInButton
                authorizeUrl={authUrl}
                authType={authTypeMetadata?.authType}
              />
              <div className="flex flex-row items-center w-full gap-2">
                <div className="flex-1 border-t border-text-01" />
                <Text as="p" text03 mainUiMuted>
                  or
                </Text>
                <div className="flex-1 border-t border-text-01" />
              </div>
            </>
          )}
          <EmailPasswordForm shouldVerify={true} nextUrl={effectiveNextUrl} />
          {NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED && (
            <Button href="/auth/forgot-password">Reset Password</Button>
          )}
        </div>
      )}

      {authTypeMetadata?.authType === AuthType.BASIC && (
        <div className="flex flex-col w-full gap-6">
          <LoginText />
          <EmailPasswordForm nextUrl={effectiveNextUrl} />
        </div>
      )}

      {!hidePageRedirect && (
        <p className="text-center mt-4">
          Don&apos;t have an account?{" "}
          <span
            onClick={() => {
              if (typeof window !== "undefined" && window.top) {
                window.top.location.href = "/auth/signup";
              } else {
                window.location.href = "/auth/signup";
              }
            }}
            className="text-link font-medium cursor-pointer"
          >
            Create an account
          </span>
        </p>
      )}
    </div>
  );
}


================================================
FILE: web/src/app/auth/login/LoginText.tsx
================================================
"use client";

import React, { useContext } from "react";
import { SettingsContext } from "@/providers/SettingsProvider";
import Text from "@/refresh-components/texts/Text";

export default function LoginText() {
  const settings = useContext(SettingsContext);
  return (
    <div className="w-full flex flex-col ">
      <Text as="p" headingH2 text05>
        Welcome to{" "}
        {(settings && settings?.enterpriseSettings?.application_name) || "Onyx"}
      </Text>
      <Text as="p" text03 mainUiMuted>
        Your open source AI platform for work
      </Text>
    </div>
  );
}


================================================
FILE: web/src/app/auth/login/SignInButton.tsx
================================================
/**
 * SignInButton — renders the SSO / OAuth sign-in button on the login page.
 *
 * IMPORTANT: This component is rendered as part of the /auth/login page, which
 * is used in healthcheck and monitoring flows that issue headless (non-browser)
 * requests (e.g. `curl`). During server-side rendering of those requests,
 * browser-only globals like `window`, `document`, `navigator`, etc. are NOT
 * available. Even though this file is marked "use client", Next.js still
 * executes the component body on the server during SSR — only hooks like
 * `useEffect` are skipped.
 *
 * Do NOT reference `window` or other browser APIs in the render path of this
 * component. If you need browser globals, gate them behind `useEffect` or
 * `typeof window !== "undefined"` checks inside callbacks/effects — but be
 * aware that Turbopack may optimise away bare `typeof window` guards in the
 * SSR bundle, so prefer `useEffect` for safety.
 */

"use client";

import { Button } from "@opal/components";
import { AuthType } from "@/lib/constants";
import { FcGoogle } from "react-icons/fc";
import type { IconProps } from "@opal/types";

interface SignInButtonProps {
  authorizeUrl: string;
  authType: AuthType;
}

export default function SignInButton({
  authorizeUrl,
  authType,
}: SignInButtonProps) {
  let button: string | undefined;
  let icon: React.FunctionComponent<IconProps> | undefined;

  if (authType === AuthType.GOOGLE_OAUTH || authType === AuthType.CLOUD) {
    button = "Continue with Google";
    icon = FcGoogle;
  } else if (authType === AuthType.OIDC) {
    button = "Continue with OIDC SSO";
  } else if (authType === AuthType.SAML) {
    button = "Continue with SAML SSO";
  }

  if (!button) {
    throw new Error(`Unhandled authType: ${authType}`);
  }

  return (
    <Button
      prominence={
        authType === AuthType.GOOGLE_OAUTH || authType === AuthType.CLOUD
          ? "secondary"
          : "primary"
      }
      width="full"
      icon={icon}
      href={authorizeUrl}
    >
      {button}
    </Button>
  );
}


================================================
FILE: web/src/app/auth/login/page.tsx
================================================
import { User } from "@/lib/types";
import {
  getCurrentUserSS,
  getAuthUrlSS,
  getAuthTypeMetadataSS,
  AuthTypeMetadata,
} from "@/lib/userSS";
import { redirect } from "next/navigation";
import type { Route } from "next";
import AuthFlowContainer from "@/components/auth/AuthFlowContainer";
import LoginPage from "./LoginPage";
import { AuthType } from "@/lib/constants";

export interface PageProps {
  searchParams?: Promise<{ [key: string]: string | string[] | undefined }>;
}

export default async function Page(props: PageProps) {
  const searchParams = await props.searchParams;
  const autoRedirectDisabled = searchParams?.disableAutoRedirect === "true";
  const autoRedirectToSignupDisabled =
    searchParams?.autoRedirectToSignup === "false";
  const nextUrl: string | null = Array.isArray(searchParams?.next)
    ? searchParams?.next[0] ?? null
    : searchParams?.next ?? null;
  const verified = searchParams?.verified === "true";
  const isFirstUser = searchParams?.first_user === "true";

  // catch cases where the backend is completely unreachable here
  // without try / catch, will just raise an exception and the page
  // will not render
  let authTypeMetadata: AuthTypeMetadata | null = null;
  let currentUser: User | null = null;
  try {
    [authTypeMetadata, currentUser] = await Promise.all([
      getAuthTypeMetadataSS(),
      getCurrentUserSS(),
    ]);
  } catch (e) {
    console.log(`Some fetch failed for the login page - ${e}`);
  }

  // if there are no users, redirect to signup page for initial setup
  // (only for auth types that support self-service signup)
  if (
    authTypeMetadata &&
    !authTypeMetadata.hasUsers &&
    !autoRedirectToSignupDisabled &&
    authTypeMetadata.authType === AuthType.BASIC
  ) {
    return redirect("/auth/signup");
  }

  // if user is already logged in, take them to the main app page
  if (currentUser && currentUser.is_active && !currentUser.is_anonymous_user) {
    console.log("Login page: User is logged in, redirecting to chat", {
      userId: currentUser.id,
      is_active: currentUser.is_active,
      is_anonymous: currentUser.is_anonymous_user,
    });

    if (authTypeMetadata?.requiresVerification && !currentUser.is_verified) {
      return redirect("/auth/waiting-on-verification");
    }

    // Add a query parameter to indicate this is a redirect from login
    // This will help prevent redirect loops
    return redirect("/app?from=login");
  }

  // get where to send the user to authenticate
  let authUrl: string | null = null;
  if (authTypeMetadata) {
    try {
      authUrl = await getAuthUrlSS(authTypeMetadata.authType, nextUrl);
    } catch (e) {
      console.log(`Some fetch failed for the login page - ${e}`);
    }
  }

  if (authTypeMetadata?.autoRedirect && authUrl && !autoRedirectDisabled) {
    return redirect(authUrl as Route);
  }

  const ssoLoginFooterContent =
    authTypeMetadata &&
    (authTypeMetadata.authType === AuthType.GOOGLE_OAUTH ||
      authTypeMetadata.authType === AuthType.OIDC ||
      authTypeMetadata.authType === AuthType.SAML) ? (
      <>Need access? Reach out to your IT admin to get access.</>
    ) : undefined;

  return (
    <div className="flex flex-col ">
      <AuthFlowContainer
        authState="login"
        footerContent={ssoLoginFooterContent}
      >
        <LoginPage
          authUrl={authUrl}
          authTypeMetadata={authTypeMetadata}
          nextUrl={nextUrl}
          hidePageRedirect={true}
          verified={verified}
          isFirstUser={isFirstUser}
        />
      </AuthFlowContainer>
    </div>
  );
}


================================================
FILE: web/src/app/auth/logout/route.ts
================================================
import { getAuthTypeMetadataSS, logoutSS } from "@/lib/userSS";
import { NextRequest } from "next/server";

export const POST = async (request: NextRequest) => {
  // Directs the logout request to the appropriate FastAPI endpoint.
  // Needed since env variables don't work well on the client-side
  const authTypeMetadata = await getAuthTypeMetadataSS();
  const response = await logoutSS(authTypeMetadata.authType, request.headers);

  if (response && !response.ok) {
    return new Response(response.body, { status: response?.status });
  }

  // Always clear the auth cookie on logout. This is critical for the JWT
  // auth backend where destroy_token is a no-op (stateless), but is also
  // the correct thing to do for Redis/Postgres backends — the server-side
  // Set-Cookie from FastAPI never reaches the browser since logoutSS is a
  // server-to-server fetch.
  const cookiesToDelete = ["fastapiusersauth"];
  const cookieOptions = {
    path: "/",
    secure: process.env.NODE_ENV === "production",
    httpOnly: true,
    sameSite: "lax" as const,
  };

  const headers = new Headers();

  cookiesToDelete.forEach((cookieName) => {
    headers.append(
      "Set-Cookie",
      `${cookieName}=; Max-Age=0; ${Object.entries(cookieOptions)
        .map(([key, value]) => `${key}=${value}`)
        .join("; ")}`
    );
  });

  return new Response(null, {
    status: 204,
    headers: headers,
  });
};


================================================
FILE: web/src/app/auth/oauth/callback/route.ts
================================================
import { authErrorRedirect } from "@/app/auth/libSS";
import { getDomain } from "@/lib/redirectSS";
import { buildUrl } from "@/lib/utilsSS";
import { NextRequest, NextResponse } from "next/server";

export const GET = async (request: NextRequest) => {
  // Wrapper around the FastAPI endpoint /auth/oauth/callback,
  // which adds back a redirect to the main app.
  const url = new URL(buildUrl("/auth/oauth/callback"));
  url.search = request.nextUrl.search;
  const cookieHeader = request.headers.get("cookie") || "";

  // Set 'redirect' to 'manual' to prevent automatic redirection
  const response = await fetch(url.toString(), {
    redirect: "manual",
    headers: cookieHeader ? { cookie: cookieHeader } : undefined,
  });
  const setCookieHeader = response.headers.get("set-cookie");

  if (response.status === 401) {
    return NextResponse.redirect(
      new URL("/auth/create-account", getDomain(request))
    );
  }

  if (!setCookieHeader) {
    return authErrorRedirect(request, response);
  }

  // Get the redirect URL from the backend's 'Location' header, or default to '/'
  const redirectUrl = response.headers.get("location") || "/";

  const redirectResponse = NextResponse.redirect(
    new URL(redirectUrl, getDomain(request))
  );

  redirectResponse.headers.set("set-cookie", setCookieHeader);
  return redirectResponse;
};


================================================
FILE: web/src/app/auth/oidc/callback/route.ts
================================================
import { authErrorRedirect } from "@/app/auth/libSS";
import { getDomain } from "@/lib/redirectSS";
import { buildUrl } from "@/lib/utilsSS";
import { NextRequest, NextResponse } from "next/server";

export const GET = async (request: NextRequest) => {
  // Wrapper around the FastAPI endpoint /auth/oidc/callback,
  // which adds back a redirect to the main app.
  const url = new URL(buildUrl("/auth/oidc/callback"));
  url.search = request.nextUrl.search;
  const cookieHeader = request.headers.get("cookie") || "";

  // Set 'redirect' to 'manual' to prevent automatic redirection
  const response = await fetch(url.toString(), {
    redirect: "manual",
    headers: cookieHeader ? { cookie: cookieHeader } : undefined,
  });
  const setCookieHeader = response.headers.get("set-cookie");

  if (response.status === 401) {
    return NextResponse.redirect(
      new URL("/auth/create-account", getDomain(request))
    );
  }

  if (!setCookieHeader) {
    return authErrorRedirect(request, response);
  }

  // Get the redirect URL from the backend's 'Location' header, or default to '/'
  const redirectUrl = response.headers.get("location") || "/";

  const redirectResponse = NextResponse.redirect(
    new URL(redirectUrl, getDomain(request))
  );

  redirectResponse.headers.set("set-cookie", setCookieHeader);
  return redirectResponse;
};


================================================
FILE: web/src/app/auth/reset-password/page.tsx
================================================
"use client";
import React, { useState, useEffect } from "react";
import { resetPassword } from "../forgot-password/utils";
import AuthFlowContainer from "@/components/auth/AuthFlowContainer";
import Title from "@/components/ui/title";
import { Text } from "@opal/components";
import { markdown } from "@opal/utils";
import Spacer from "@/refresh-components/Spacer";
import Link from "next/link";
import { Button } from "@opal/components";
import { Form, Formik } from "formik";
import * as Yup from "yup";
import { TextFormField } from "@/components/Field";
import { toast } from "@/hooks/useToast";
import { Spinner } from "@/components/Spinner";
import { redirect, useSearchParams } from "next/navigation";
import {
  NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED,
  TENANT_ID_COOKIE_NAME,
} from "@/lib/constants";
import Cookies from "js-cookie";

const ResetPasswordPage: React.FC = () => {
  const [isWorking, setIsWorking] = useState(false);
  const searchParams = useSearchParams();
  const token = searchParams?.get("token");
  const tenantId = searchParams?.get(TENANT_ID_COOKIE_NAME);
  // Keep search param same name as cookie for simplicity

  useEffect(() => {
    if (tenantId) {
      Cookies.set(TENANT_ID_COOKIE_NAME, tenantId, {
        path: "/",
        expires: 1 / 24,
      }); // Expires in 1 hour
    }
  }, [tenantId]);

  if (!NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED) {
    redirect("/auth/login");
  }

  return (
    <AuthFlowContainer>
      <div className="flex flex-col w-full justify-center">
        <div className="flex">
          <Title className="mb-2 mx-auto font-bold">Reset Password</Title>
        </div>
        {isWorking && <Spinner />}
        <Formik
          initialValues={{
            password: "",
            confirmPassword: "",
          }}
          validationSchema={Yup.object().shape({
            password: Yup.string().required("Password is required"),
            confirmPassword: Yup.string()
              .oneOf([Yup.ref("password"), undefined], "Passwords must match")
              .required("Confirm Password is required"),
          })}
          onSubmit={async (values) => {
            if (!token) {
              toast.error("Invalid or missing reset token.");
              return;
            }
            setIsWorking(true);
            try {
              await resetPassword(token, values.password);
              toast.success(
                "Password reset successfully. Redirecting to login..."
              );
              setTimeout(() => {
                redirect("/auth/login");
              }, 1000);
            } catch (error) {
              if (error instanceof Error) {
                toast.error(
                  error.message || "An error occurred during password reset."
                );
              } else {
                toast.error("An unexpected error occurred. Please try again.");
              }
            } finally {
              setIsWorking(false);
            }
          }}
        >
          {({ isSubmitting }) => (
            <Form className="w-full flex flex-col items-stretch mt-2">
              <TextFormField
                name="password"
                label="New Password"
                type="password"
                placeholder="Enter your new password"
              />
              <TextFormField
                name="confirmPassword"
                label="Confirm New Password"
                type="password"
                placeholder="Confirm your new password"
              />

              <div className="flex">
                <Button disabled={isSubmitting} type="submit" width="full">
                  Reset Password
                </Button>
              </div>
            </Form>
          )}
        </Formik>
        <Spacer rem={1} />
        <div className="flex">
          <div className="mx-auto">
            <Text as="p">{markdown("[Back to Login](/auth/login)")}</Text>
          </div>
        </div>
      </div>
    </AuthFlowContainer>
  );
};

export default ResetPasswordPage;


================================================
FILE: web/src/app/auth/saml/callback/route.ts
================================================
import { authErrorRedirect } from "@/app/auth/libSS";
import { validateInternalRedirect } from "@/lib/auth/redirectValidation";
import { getDomain } from "@/lib/redirectSS";
import { buildUrl } from "@/lib/utilsSS";
import { NextRequest, NextResponse } from "next/server";

// have to use this so we don't hit the redirect URL with a `POST` request
const SEE_OTHER_REDIRECT_STATUS = 303;

async function handleSamlCallback(
  request: NextRequest,
  method: "GET" | "POST"
) {
  // Wrapper around the FastAPI endpoint /auth/saml/callback,
  // which adds back a redirect to the main app.
  const url = new URL(buildUrl("/auth/saml/callback"));
  url.search = request.nextUrl.search;

  const fetchOptions: RequestInit = {
    method,
    headers: {},
  };

  let relayState: string | null = null;

  // For POST requests, include form data
  if (method === "POST") {
    const formData = await request.formData();
    const relayStateValue = formData.get("RelayState");
    relayState = typeof relayStateValue === "string" ? relayStateValue : null;
    fetchOptions.body = formData;
  }

  // OneLogin python toolkit only supports HTTP-POST binding for SAMLResponse.
  // If the IdP returned SAMLResponse via query parameters (GET), convert to POST.
  if (method === "GET") {
    const samlResponse = request.nextUrl.searchParams.get("SAMLResponse");
    relayState = request.nextUrl.searchParams.get("RelayState");
    if (samlResponse) {
      const formData = new FormData();
      formData.set("SAMLResponse", samlResponse);
      if (relayState) {
        formData.set("RelayState", relayState);
      }
      // Clear query on backend URL and send as POST with form body
      url.search = "";
      fetchOptions.method = "POST";
      fetchOptions.body = formData;
    }
  }

  const response = await fetch(url.toString(), fetchOptions);
  const setCookieHeader = response.headers.get("set-cookie");

  if (!setCookieHeader) {
    return authErrorRedirect(request, response, SEE_OTHER_REDIRECT_STATUS);
  }

  const validatedRelayState = validateInternalRedirect(relayState);
  const redirectDestination = validatedRelayState ?? "/";

  const redirectResponse = NextResponse.redirect(
    new URL(redirectDestination, getDomain(request)),
    SEE_OTHER_REDIRECT_STATUS
  );
  redirectResponse.headers.set("set-cookie", setCookieHeader);
  return redirectResponse;
}

export const GET = async (request: NextRequest) => {
  return handleSamlCallback(request, "GET");
};

export const POST = async (request: NextRequest) => {
  return handleSamlCallback(request, "POST");
};


================================================
FILE: web/src/app/auth/signup/ReferralSourceSelector.tsx
================================================
"use client";

import { useState } from "react";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import { Label } from "@/components/Field";

interface ReferralSourceSelectorProps {
  defaultValue?: string;
}

export default function ReferralSourceSelector({
  defaultValue,
}: ReferralSourceSelectorProps) {
  const [referralSource, setReferralSource] = useState(defaultValue);

  const referralOptions = [
    { value: "search", label: "Search Engine (Google/Bing)" },
    { value: "friend", label: "Friend/Colleague" },
    { value: "linkedin", label: "LinkedIn" },
    { value: "twitter", label: "Twitter" },
    { value: "hackernews", label: "HackerNews" },
    { value: "reddit", label: "Reddit" },
    { value: "youtube", label: "YouTube" },
    { value: "podcast", label: "Podcast" },
    { value: "blog", label: "Article/Blog" },
    { value: "ads", label: "Advertisements" },
    { value: "other", label: "Other" },
  ];

  const handleChange = (value: string) => {
    setReferralSource(value);
    const cookies = require("js-cookie");
    cookies.set("referral_source", value, {
      expires: 365,
      path: "/",
      sameSite: "strict",
    });
  };

  return (
    <div className="w-full gap-y-2 flex flex-col">
      <Label className="text-text-950" small={false}>
        How did you hear about us?
      </Label>
      <InputSelect value={referralSource} onValueChange={handleChange}>
        <InputSelect.Trigger placeholder="Select an option" />

        <InputSelect.Content>
          {referralOptions.map((option) => (
            <InputSelect.Item key={option.value} value={option.value}>
              {option.label}
            </InputSelect.Item>
          ))}
        </InputSelect.Content>
      </InputSelect>
    </div>
  );
}


================================================
FILE: web/src/app/auth/signup/page.tsx
================================================
import { User } from "@/lib/types";
import {
  getCurrentUserSS,
  getAuthTypeMetadataSS,
  AuthTypeMetadata,
  getAuthUrlSS,
} from "@/lib/userSS";
import { redirect } from "next/navigation";
import EmailPasswordForm from "../login/EmailPasswordForm";
import SignInButton from "@/app/auth/login/SignInButton";
import AuthFlowContainer from "@/components/auth/AuthFlowContainer";
import ReferralSourceSelector from "./ReferralSourceSelector";
import AuthErrorDisplay from "@/components/auth/AuthErrorDisplay";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import { AuthType } from "@/lib/constants";

const Page = async (props: {
  searchParams?: Promise<{ [key: string]: string | string[] | undefined }>;
}) => {
  const searchParams = await props.searchParams;
  const nextUrl = Array.isArray(searchParams?.next)
    ? searchParams?.next[0]
    : searchParams?.next || null;

  const defaultEmail = Array.isArray(searchParams?.email)
    ? searchParams?.email[0]
    : searchParams?.email || null;

  // catch cases where the backend is completely unreachable here
  // without try / catch, will just raise an exception and the page
  // will not render
  let authTypeMetadata: AuthTypeMetadata | null = null;
  let currentUser: User | null = null;
  try {
    [authTypeMetadata, currentUser] = await Promise.all([
      getAuthTypeMetadataSS(),
      getCurrentUserSS(),
    ]);
  } catch (e) {
    console.log(`Some fetch failed for the login page - ${e}`);
  }

  // if user is already logged in, take them to the main app page
  if (currentUser && currentUser.is_active && !currentUser.is_anonymous_user) {
    if (!authTypeMetadata?.requiresVerification || currentUser.is_verified) {
      return redirect("/app");
    }
    return redirect("/auth/waiting-on-verification");
  }
  const cloud = authTypeMetadata?.authType === AuthType.CLOUD;

  // only enable this page if basic login is enabled
  if (authTypeMetadata?.authType !== AuthType.BASIC && !cloud) {
    return redirect("/app");
  }

  let authUrl: string | null = null;
  if (cloud && authTypeMetadata) {
    authUrl = await getAuthUrlSS(authTypeMetadata.authType, null);
  }

  return (
    <AuthFlowContainer authState="signup">
      <AuthErrorDisplay searchParams={searchParams} />

      <>
        <div className="absolute top-10x w-full"></div>
        <div
          className={cn(
            "flex w-full flex-col justify-start",
            cloud ? "" : "gap-6"
          )}
        >
          <div className="w-full">
            <Text as="p" headingH2 text05>
              {cloud ? "Complete your sign up" : "Create account"}
            </Text>
            <Text as="p" text03>
              Get started with Onyx
            </Text>
          </div>
          {cloud && authUrl && (
            <div className="w-full justify-center mt-6">
              <SignInButton authorizeUrl={authUrl} authType={AuthType.CLOUD} />
              <div className="flex items-center w-full my-4">
                <div className="flex-grow border-t border-border-01" />
                <Text as="p" mainUiMuted text03 className="mx-2">
                  or
                </Text>
                <div className="flex-grow border-t border-border-01" />
              </div>
            </div>
          )}

          {cloud && (
            <>
              <div className="w-full flex flex-col mb-3">
                <ReferralSourceSelector />
              </div>
            </>
          )}

          <EmailPasswordForm
            isSignup
            shouldVerify={authTypeMetadata?.requiresVerification}
            nextUrl={nextUrl}
            defaultEmail={defaultEmail}
          />
        </div>
      </>
    </AuthFlowContainer>
  );
};

export default Page;


================================================
FILE: web/src/app/auth/verify-email/Verify.tsx
================================================
"use client";

import { useSearchParams } from "next/navigation";
import { useCallback, useEffect, useState } from "react";
import { Text } from "@opal/components";
import Spacer from "@/refresh-components/Spacer";
import { RequestNewVerificationEmail } from "../waiting-on-verification/RequestNewVerificationEmail";
import { User } from "@/lib/types";
import Logo from "@/refresh-components/Logo";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";

export interface VerifyProps {
  user: User | null;
}

export default function Verify({ user }: VerifyProps) {
  const searchParams = useSearchParams();

  const [error, setError] = useState("");

  const verify = useCallback(async () => {
    const token = searchParams?.get("token");
    const firstUser =
      searchParams?.get("first_user") === "true" && NEXT_PUBLIC_CLOUD_ENABLED;
    if (!token) {
      setError(
        "Missing verification token. Try requesting a new verification email."
      );
      return;
    }

    const response = await fetch("/api/auth/verify", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({ token }),
    });

    if (response.ok) {
      // Redirect to login page instead of /app so user can log in
      // from any browser (not dependent on the original signup session)
      const loginUrl = firstUser
        ? "/auth/login?verified=true&first_user=true"
        : "/auth/login?verified=true";
      window.location.href = loginUrl;
    } else {
      let errorDetail = "unknown error";
      try {
        errorDetail = (await response.json()).detail;
      } catch (e) {
        console.error("Failed to parse verification error response:", e);
      }
      setError(
        `Failed to verify your email - ${errorDetail}. Please try requesting a new verification email.`
      );
    }
  }, [searchParams]);

  useEffect(() => {
    verify();
  }, [verify]);

  return (
    <main>
      <div className="min-h-screen flex flex-col items-center justify-center py-12 px-4 sm:px-6 lg:px-8">
        <Logo folded size={64} className="mx-auto w-fit animate-pulse" />
        {!error ? (
          <>
            <Spacer rem={0.5} />
            <Text as="p">Verifying your email...</Text>
          </>
        ) : (
          <div>
            <Spacer rem={0.5} />
            <Text as="p">{error}</Text>

            {user && (
              <div className="text-center">
                <RequestNewVerificationEmail email={user.email}>
                  {/* TODO(@raunakab): migrate to @opal/components Text */}
                  <p className="text-sm mt-2 text-link">
                    Get new verification email
                  </p>
                </RequestNewVerificationEmail>
              </div>
            )}
          </div>
        )}
      </div>
    </main>
  );
}


================================================
FILE: web/src/app/auth/verify-email/page.tsx
================================================
import {
  AuthTypeMetadata,
  getAuthTypeMetadataSS,
  getCurrentUserSS,
} from "@/lib/userSS";
import Verify from "./Verify";
import { User } from "@/lib/types";
import { redirect } from "next/navigation";

export default async function Page() {
  // catch cases where the backend is completely unreachable here
  // without try / catch, will just raise an exception and the page
  // will not render
  let authTypeMetadata: AuthTypeMetadata | null = null;
  let currentUser: User | null = null;
  try {
    [authTypeMetadata, currentUser] = await Promise.all([
      getAuthTypeMetadataSS(),
      getCurrentUserSS(),
    ]);
  } catch (e) {
    console.log(`Some fetch failed for the login page - ${e}`);
  }

  if (!authTypeMetadata?.requiresVerification || currentUser?.is_verified) {
    return redirect("/app");
  }

  return <Verify user={currentUser} />;
}


================================================
FILE: web/src/app/auth/waiting-on-verification/RequestNewVerificationEmail.tsx
================================================
"use client";

import { toast } from "@/hooks/useToast";
import { requestEmailVerification } from "../lib";
import { Spinner } from "@/components/Spinner";
import { useState, JSX } from "react";

export function RequestNewVerificationEmail({
  children,
  email,
}: {
  children: JSX.Element | string;
  email: string;
}) {
  const [isRequestingVerification, setIsRequestingVerification] =
    useState(false);

  return (
    <button
      className="text-link"
      onClick={async () => {
        setIsRequestingVerification(true);
        const response = await requestEmailVerification(email);
        setIsRequestingVerification(false);

        if (response.ok) {
          toast.success("A new verification email has been sent!");
        } else {
          const errorDetail = (await response.json()).detail;
          toast.error(
            `Failed to send a new verification email - ${errorDetail}`
          );
        }
      }}
    >
      {isRequestingVerification && <Spinner />}
      {children}
    </button>
  );
}


================================================
FILE: web/src/app/auth/waiting-on-verification/page.tsx
================================================
import {
  AuthTypeMetadata,
  getAuthTypeMetadataSS,
  getCurrentUserSS,
} from "@/lib/userSS";
import { redirect } from "next/navigation";
import { User } from "@/lib/types";
import { RequestNewVerificationEmail } from "./RequestNewVerificationEmail";
import Logo from "@/refresh-components/Logo";
import { Text } from "@opal/components";
import { markdown } from "@opal/utils";

export default async function Page() {
  // catch cases where the backend is completely unreachable here
  // without try / catch, will just raise an exception and the page
  // will not render
  let authTypeMetadata: AuthTypeMetadata | null = null;
  let currentUser: User | null = null;
  try {
    [authTypeMetadata, currentUser] = await Promise.all([
      getAuthTypeMetadataSS(),
      getCurrentUserSS(),
    ]);
  } catch (e) {
    console.log(`Some fetch failed for the login page - ${e}`);
  }

  if (!currentUser) {
    return redirect("/auth/login");
  }

  if (!authTypeMetadata?.requiresVerification || currentUser.is_verified) {
    return redirect("/app");
  }

  return (
    <main>
      <div className="min-h-screen flex flex-col items-center justify-center py-12 px-4 sm:px-6 lg:px-8 gap-4">
        <Logo folded size={64} className="mx-auto w-fit" />
        <div className="flex flex-col gap-2">
          <Text as="span">
            {markdown(
              `Hey, *${currentUser.email}*, it looks like you haven't verified your email yet.\nCheck your inbox for an email from us to get started!`
            )}
          </Text>
          <div className="flex flex-row items-center gap-1">
            <Text as="span">If you don't see anything, click</Text>
            <RequestNewVerificationEmail email={currentUser.email}>
              <Text as="span">here</Text>
            </RequestNewVerificationEmail>
            <Text as="span">to request a new email.</Text>
          </div>
        </div>
      </div>
    </main>
  );
}


================================================
FILE: web/src/app/components/nrf/SettingsPanel.tsx
================================================
"use client";

import Switch from "@/refresh-components/inputs/Switch";
import { useNRFPreferences } from "@/components/context/NRFPreferencesContext";
import Text from "@/refresh-components/texts/Text";
import { SvgX, SvgSettings, SvgSun, SvgMoon, SvgCheck } from "@opal/icons";
import { Button } from "@opal/components";
import { cn } from "@/lib/utils";
import { useUser } from "@/providers/UserProvider";
import { useTheme } from "next-themes";
import {
  CHAT_BACKGROUND_OPTIONS,
  CHAT_BACKGROUND_NONE,
} from "@/lib/constants/chatBackgrounds";

interface SettingRowProps {
  label: string;
  description?: string;
  children: React.ReactNode;
}

const SettingRow = ({ label, description, children }: SettingRowProps) => (
  <div className="flex justify-between items-center py-3">
    <div className="flex flex-col gap-0.5">
      <Text mainUiBody text04>
        {label}
      </Text>
      {description && (
        <Text secondaryBody text03>
          {description}
        </Text>
      )}
    </div>
    {children}
  </div>
);

interface BackgroundThumbnailProps {
  thumbnailUrl: string;
  label: string;
  isNone?: boolean;
  isSelected: boolean;
  onClick: () => void;
}

const BackgroundThumbnail = ({
  thumbnailUrl,
  label,
  isNone = false,
  isSelected,
  onClick,
}: BackgroundThumbnailProps) => (
  <button
    onClick={onClick}
    className="relative overflow-hidden rounded-xl transition-all aspect-video cursor-pointer border-none p-0 bg-transparent group"
    title={label}
    aria-label={`${label} background${isSelected ? " (selected)" : ""}`}
  >
    {isNone ? (
      <div className="absolute inset-0 bg-background flex items-center justify-center">
        <Text secondaryBody text03>
          None
        </Text>
      </div>
    ) : (
      <div
        className="absolute inset-0 bg-cover bg-center transition-transform duration-300 group-hover:scale-105"
        style={{ backgroundImage: `url(${thumbnailUrl})` }}
      />
    )}
    <div
      className={cn(
        "absolute inset-0 transition-all rounded-xl",
        isSelected
          ? "ring-2 ring-inset ring-theme-primary-05"
          : "ring-1 ring-inset ring-border-02 group-hover:ring-border-03"
      )}
    />
    {isSelected && (
      <div className="absolute top-2 right-2 w-5 h-5 rounded-full bg-theme-primary-05 flex items-center justify-center">
        <SvgCheck className="w-3 h-3 stroke-text-inverted-05" />
      </div>
    )}
  </button>
);

export const SettingsPanel = ({
  settingsOpen,
  toggleSettings,
  handleUseOnyxToggle,
}: {
  settingsOpen: boolean;
  toggleSettings: () => void;
  handleUseOnyxToggle: (checked: boolean) => void;
}) => {
  const { useOnyxAsNewTab } = useNRFPreferences();
  const { theme, setTheme } = useTheme();
  const { user, updateUserChatBackground } = useUser();

  const currentBackgroundId = user?.preferences?.chat_background ?? "none";
  const isDark = theme === "dark";

  const toggleTheme = () => {
    setTheme(isDark ? "light" : "dark");
  };

  const handleBackgroundChange = (backgroundId: string) => {
    updateUserChatBackground(
      backgroundId === CHAT_BACKGROUND_NONE ? null : backgroundId
    );
  };

  return (
    <>
      {/* Backdrop overlay */}
      <div
        className={cn(
          "fixed inset-0 bg-mask-03 backdrop-blur-sm z-40 transition-opacity duration-300",
          settingsOpen
            ? "opacity-100 pointer-events-auto"
            : "opacity-0 pointer-events-none"
        )}
        onClick={toggleSettings}
      />

      {/* Settings panel */}
      <div
        className={cn(
          "fixed top-0 right-0 w-[25rem] h-full z-50",
          "bg-gradient-to-b from-background-tint-02 to-background-tint-01",
          "backdrop-blur-[24px] border-l border-border-01 overflow-y-auto",
          "transition-transform duration-300 ease-out",
          settingsOpen ? "translate-x-0" : "translate-x-full"
        )}
      >
        {/* Header */}
        <div className="sticky top-0 z-10 bg-gradient-to-b from-background-tint-02 to-transparent pb-4">
          <div className="flex items-center justify-between px-6 pt-6 pb-2">
            <div className="flex items-center gap-3">
              <div className="flex items-center justify-center w-10 h-10 rounded-xl bg-background-tint-02">
                <SvgSettings className="w-5 h-5 stroke-text-03" />
              </div>
              <Text headingH3 text04>
                Settings
              </Text>
            </div>
            <div className="flex items-center gap-3">
              {/* Theme Toggle */}
              <Button
                icon={isDark ? SvgMoon : SvgSun}
                onClick={toggleTheme}
                prominence="tertiary"
                tooltip={`Switch to ${isDark ? "light" : "dark"} theme`}
              />
              <Button
                icon={SvgX}
                onClick={toggleSettings}
                prominence="tertiary"
                tooltip="Close settings"
              />
            </div>
          </div>
        </div>

        <div className="px-6 pb-8 flex flex-col gap-8">
          {/* General Section */}
          <section className="flex flex-col gap-3">
            <Text secondaryAction text03 className="uppercase tracking-wider">
              General
            </Text>
            <div className="flex flex-col gap-1 bg-background-tint-01 rounded-2xl px-4">
              <SettingRow label="Use Onyx as new tab page">
                <Switch
                  checked={useOnyxAsNewTab}
                  onCheckedChange={handleUseOnyxToggle}
                />
              </SettingRow>
            </div>
          </section>

          {/* Background Section */}
          <section className="flex flex-col gap-3">
            <Text secondaryAction text03 className="uppercase tracking-wider">
              Background
            </Text>
            <div className="grid grid-cols-3 gap-2">
              {CHAT_BACKGROUND_OPTIONS.map((bg) => (
                <BackgroundThumbnail
                  key={bg.id}
                  thumbnailUrl={bg.thumbnail}
                  label={bg.label}
                  isNone={bg.src === CHAT_BACKGROUND_NONE}
                  isSelected={currentBackgroundId === bg.id}
                  onClick={() => handleBackgroundChange(bg.id)}
                />
              ))}
            </div>
          </section>
        </div>
      </div>
    </>
  );
};


================================================
FILE: web/src/app/config/timeRange.tsx
================================================
import { getXDaysAgo, getXYearsAgo } from "@/lib/dateUtils";

export const timeRangeValues = [
  { label: "Last 2 years", value: getXYearsAgo(2) },
  { label: "Last year", value: getXYearsAgo(1) },
  { label: "Last 30 days", value: getXDaysAgo(30) },
  { label: "Last 7 days", value: getXDaysAgo(7) },
  { label: "Today", value: getXDaysAgo(1) },
];


================================================
FILE: web/src/app/connector/oauth/callback/[source]/route.tsx
================================================
import { INTERNAL_URL } from "@/lib/constants";
import { NextRequest, NextResponse } from "next/server";

// TODO: deprecate this and just go directly to the backend via /api/...
// For some reason Egnyte doesn't work when using /api, so leaving this as is for now
// If we do try and remove this, make sure we test the Egnyte connector oauth flow
export async function GET(request: NextRequest) {
  try {
    const backendUrl = new URL(INTERNAL_URL);
    // Copy path and query parameters from incoming request
    backendUrl.pathname = request.nextUrl.pathname;
    backendUrl.search = request.nextUrl.search;

    const response = await fetch(backendUrl, {
      method: "GET",
      headers: request.headers,
      body: request.body,
      signal: request.signal,
      // @ts-ignore
      duplex: "half",
    });

    const responseData = await response.json();
    if (responseData.redirect_url) {
      return NextResponse.redirect(responseData.redirect_url);
    }

    return new NextResponse(JSON.stringify(responseData), {
      status: response.status,
      headers: response.headers,
    });
  } catch (error: unknown) {
    console.error("Proxy error:", error);
    return NextResponse.json(
      {
        message: "Proxy error",
        error:
          error instanceof Error ? error.message : "An unknown error occurred",
      },
      { status: 500 }
    );
  }
}


================================================
FILE: web/src/app/craft/README.md
================================================
<h2 align="center">
    <a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true" /></a>
</h2>

<h1 align="center">Onyx Craft</h1>

<p align="center">
  <strong>Build apps, documents, and presentations from your company knowledge</strong>
</p>

<p align="center">
  <a href="https://docs.onyx.app/overview/core_features/craft"><img alt="Documentation" src="https://img.shields.io/badge/docs-onyx.app-blue?style=flat-square" /></a>
  <a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/badge/license-MIT-green?style=flat-square" /></a>
    <a href="https://discord.gg/TDJ59cGV2X" target="_blank" rel="noopener noreferrer">
        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord" />
    </a>
  <img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/onyx-dot-app/onyx" />
</p>

---

<p align="center">
  <a href="https://www.youtube.com/watch?v=Hvjn76YSIRY">
    <img src="https://img.youtube.com/vi/Hvjn76YSIRY/hqdefault.jpg" alt="Watch the video" />
  </a>
</p>

---

## Overview

Onyx Craft is an AI coding agent that creates web applications, documents, presentations, and more using your company's indexed knowledge. Users describe what they want in natural language, and the agent builds artifacts in an isolated sandbox environment with access to documents from connected sources like Linear, Slack, Google Drive, Confluence, and more.

For detailed documentation, visit [our docs](https://docs.onyx.app/overview/core_features/craft).

## Key Features

- **Web Applications** — Build Next.js applications with React, shadcn/ui, and Recharts for interactive dashboards and tools
- **Documents & Reports** — Generate polished markdown documents with DOCX export
- **Knowledge Integration** — Access indexed documents from your connectors (Linear, Slack, Google Drive, Confluence, etc.)
- **Real-time Preview** — Watch the agent build with live output streaming and tool call visibility
- **Session Management** — Pre-provisioned sandboxes, automatic snapshots, and session restore

## Quick Start

### Requirements

- Onyx deployment with an LLM provider configured (Anthropic, OpenAI, etc.)

### New Installations

You can install Onyx Craft using our [quickstart script](https://docs.onyx.app/deployment/getting_started/quickstart):

```bash
curl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh \
  && chmod +x install.sh \
  && ./install.sh --include-craft
```

This will:

- Set `ENABLE_CRAFT=true` in the `.env` file
- Set `IMAGE_TAG=craft-latest` to use Craft-enabled images
- Run template setup on container startup

### Existing Deployments

Enable Craft on an existing deployment:

```bash
ENABLE_CRAFT=true IMAGE_TAG=craft-latest docker compose up -d
```

## How It Works

1. **User visits `/craft/v1`** — A sandbox is pre-provisioned in the background
2. **User describes what they want** — Message is sent to the OpenCode agent
3. **Agent builds artifacts** — Uses company knowledge and uploaded files
4. **Live preview shows output** — Next.js app, markdown, or other artifacts
5. **User iterates or downloads** — Request changes or export finished work

## Technical Architecture

### Sandbox Backends

Craft supports two sandbox backends controlled by `SANDBOX_BACKEND`:

**Self-Hosted**

- Filesystem-based sandboxes under `SANDBOX_BASE_PATH` (default: `/tmp/onyx-sandboxes`)
- No container isolation (process-level only)
- No automatic cleanup or snapshots
- Direct file access via symlinks to user's knowledge files

**Cloud** (Production)

- Pod-based isolation with ClusterIP services
- S3-based snapshots for session persistence
- Automatic cleanup of idle sandboxes (default: 1 hour timeout)
- Two containers per pod:
  - `sandbox` — Runs OpenCode agent and Next.js preview server
  - `file-sync` — Sidecar for S3 file synchronization

### Session Lifecycle

Sessions go through these states:

| State            | Description                                                     |
| ---------------- | --------------------------------------------------------------- |
| **Provisioning** | Sandbox being created when user visits /craft                   |
| **Ready**        | Sandbox ready, waiting for first message                        |
| **Running**      | Active session with agent processing                            |
| **Idle**         | No recent activity                                              |
| **Sleeping**     | Idle timeout reached, pod terminated (K8s only), snapshot saved |
| **Restored**     | User returns, snapshot loaded, session continues                |

### Sandbox Workspace Structure

Each session gets an isolated workspace:

```
$SANDBOX_ROOT/
├── files/                     # Symlink to user's knowledge files
└── sessions/
    └── {session_id}/
        ├── outputs/web/       # Next.js application
        ├── .venv/             # Python environment
        ├── .opencode/skills/  # Agent skills
        ├── attachments/       # User uploads
        ├── AGENTS.md          # Agent instructions
        └── opencode.json      # LLM configuration
```

### Sandbox Cleanup

Idle sandboxes are cleaned up by a Celery background task:

- **Trigger**: Sandbox idle longer than `SANDBOX_IDLE_TIMEOUT_SECONDS` (default: 1 hour)
- **Kubernetes**: Creates snapshots of all sessions, terminates the pod, marks sandbox as "sleeping"
- **Local**: No automatic cleanup (sandboxes persist until manually removed)

## Configuration

Key configuration categories (see source for full reference):

- **Core** — `ENABLE_CRAFT`, `SANDBOX_BACKEND` (local vs kubernetes)
- **Lifecycle** — Idle timeout (default 1 hour), max concurrent sandboxes per org (default 10)
- **Kubernetes** — Namespace, container image, S3 bucket for snapshots
- **File uploads** — Size limits (50MB per file, 20 files per session, 200MB total)
- **Rate limits** — Free users: 5 messages total; Paid users: 25 messages/week

## Tech Stack

**Frontend**

- Next.js, React, TypeScript
- Zustand for state management
- shadcn/ui components

**Backend**

- FastAPI, SQLAlchemy, Celery
- PostgreSQL for session/sandbox metadata
- S3-compatible storage for snapshots

**Agent**

- OpenCode CLI with ACP (Agent Communication Protocol)
- JSON-RPC 2.0 over stdin/stdout

**Sandbox Environment**

- Next.js 16, React 19
- shadcn/ui, Tailwind CSS, Recharts
- Python 3.11 with numpy, pandas, matplotlib

## Coming Soon

- **Presentations** — Create slide decks with AI-generated visuals using nanobanana
- **Spreadsheets**
- **HTML Dashboards**

## Contributing

See the main [CONTRIBUTING.md](../../../../CONTRIBUTING.md) for guidelines.

For Craft-specific development:

1. Set `ENABLE_CRAFT=true` in your environment
2. Ensure templates are available at `/templates/outputs` and `/templates/venv`
3. For local development, sandboxes are created under `/tmp/onyx-sandboxes`

## License

MIT — see [LICENSE](../../../../LICENSE)


================================================
FILE: web/src/app/craft/components/BigButton.tsx
================================================
"use client";

import { forwardRef, type ButtonHTMLAttributes } from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";

export interface BigButtonProps
  extends ButtonHTMLAttributes<HTMLButtonElement> {
  // Subvariants
  primary?: boolean;
  secondary?: boolean;

  // Inverted mode for dark backgrounds
  inverted?: boolean;
}

const BigButton = forwardRef<HTMLButtonElement, BigButtonProps>(
  (
    { primary, secondary, inverted, disabled, children, className, ...props },
    ref
  ) => {
    const subvariant = primary
      ? "primary"
      : secondary
        ? "secondary"
        : "primary";

    const baseStyles =
      "px-6 py-3 rounded-xl w-fit flex flex-row items-center justify-center transition-colors";

    const variantStyles = {
      primary: {
        normal:
          "bg-theme-primary-05 hover:bg-theme-primary-04 active:bg-theme-primary-06",
        inverted: "bg-white hover:bg-gray-200 active:bg-gray-300",
        disabled: "bg-background-neutral-04",
      },
      secondary: {
        normal:
          "bg-transparent border border-border-01 hover:bg-background-tint-02 active:bg-background-tint-00",
        inverted:
          "bg-transparent border border-text-inverted-05 hover:bg-background-tint-inverted-02 active:bg-background-tint-inverted-01",
        disabled: "bg-background-neutral-03 border border-border-01",
      },
    };

    const textStyles = {
      primary: {
        normal: "text-text-inverted-05",
        inverted: "text-gray-900",
        disabled: "text-text-inverted-04",
      },
      secondary: {
        normal:
          "text-text-03 group-hover:text-text-04 group-active:text-text-05",
        inverted: "text-text-inverted-05",
        disabled: "text-text-01",
      },
    };

    const getVariantStyle = () => {
      if (disabled) return variantStyles[subvariant].disabled;
      return inverted
        ? variantStyles[subvariant].inverted
        : variantStyles[subvariant].normal;
    };

    const getTextStyle = () => {
      if (disabled) return textStyles[subvariant].disabled;
      return inverted
        ? textStyles[subvariant].inverted
        : textStyles[subvariant].normal;
    };

    // Check if className contains text color override
    const hasTextWhiteOverride =
      className?.includes("!text-white") || className?.includes("text-white");
    const hasTextBlackOverride =
      className?.includes("!text-black") || className?.includes("text-black");

    const getTextOverride = () => {
      if (hasTextWhiteOverride) return "!text-white";
      if (hasTextBlackOverride) return "!text-black";
      return getTextStyle();
    };

    return (
      <button
        ref={ref}
        className={cn("group", baseStyles, getVariantStyle(), className)}
        disabled={disabled}
        type="button"
        {...props}
      >
        <Text
          mainContentEmphasis
          className={cn("whitespace-nowrap", getTextOverride())}
          as="span"
        >
          {children}
        </Text>
      </button>
    );
  }
);
BigButton.displayName = "BigButton";

export default BigButton;


================================================
FILE: web/src/app/craft/components/BuildLLMPopover.tsx
================================================
"use client";

import { useState, useCallback, useRef, useEffect, useMemo } from "react";
import {
  SvgCheck,
  SvgChevronDown,
  SvgChevronRight,
  SvgPlug,
} from "@opal/icons";
import Text from "@/refresh-components/texts/Text";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import Switch from "@/refresh-components/inputs/Switch";
import LineItem from "@/refresh-components/buttons/LineItem";
import { LLMProviderDescriptor } from "@/interfaces/llm";
import {
  BuildLlmSelection,
  BUILD_MODE_PROVIDERS,
  isRecommendedModel,
} from "@/app/craft/onboarding/constants";
import { ToggleWarningModal } from "./ToggleWarningModal";
import { getProviderIcon } from "@/app/admin/configuration/llm/utils";
import { Section } from "@/layouts/general-layouts";
import {
  Accordion,
  AccordionContent,
  AccordionItem,
  AccordionTrigger,
} from "@/components/ui/accordion";

interface BuildLLMPopoverProps {
  currentSelection: BuildLlmSelection | null;
  onSelectionChange: (selection: BuildLlmSelection) => void;
  llmProviders: LLMProviderDescriptor[] | undefined;
  onOpenOnboarding: (providerKey: string) => void;
  children: React.ReactNode;
  disabled?: boolean;
}

interface ModelOption {
  providerKey: string;
  providerName: string;
  providerDisplayName: string;
  modelName: string;
  displayName: string;
  isRecommended: boolean;
  isConfigured: boolean;
}

export function BuildLLMPopover({
  currentSelection,
  onSelectionChange,
  llmProviders,
  onOpenOnboarding,
  children,
  disabled = false,
}: BuildLLMPopoverProps) {
  const [showRecommendedOnly, setShowRecommendedOnly] = useState(true);
  const [showToggleWarning, setShowToggleWarning] = useState(false);
  const [isOpen, setIsOpen] = useState(false);
  const isClosingModalRef = useRef(false);
  const scrollContainerRef = useRef<HTMLDivElement>(null);
  const selectedItemRef = useRef<HTMLDivElement>(null);

  // Check which providers are configured (exact match on provider field)
  const isProviderConfigured = useCallback(
    (providerKey: string) => {
      return llmProviders?.some((p) => p.provider === providerKey);
    },
    [llmProviders]
  );

  // Get the actual provider descriptor for a configured provider
  const getProviderDescriptor = useCallback(
    (providerKey: string) => {
      return llmProviders?.find((p) => p.provider === providerKey);
    },
    [llmProviders]
  );

  // Build model options based on mode
  const modelOptions = useMemo((): ModelOption[] => {
    const options: ModelOption[] = [];

    if (showRecommendedOnly) {
      // Show curated list from BUILD_MODE_PROVIDERS
      BUILD_MODE_PROVIDERS.forEach((provider) => {
        const isConfigured = isProviderConfigured(provider.providerName);
        const descriptor = getProviderDescriptor(provider.providerName);
        const modelsToShow = provider.models.filter((m) => m.recommended);

        modelsToShow.forEach((model) => {
          // Get display name from backend if available
          const backendConfig = descriptor?.model_configurations.find(
            (mc) => mc.name === model.name
          );
          options.push({
            providerKey: provider.providerName,
            providerName: descriptor?.name || provider.label,
            providerDisplayName: provider.label,
            modelName: model.name,
            displayName: backendConfig?.display_name || model.label,
            isRecommended: true,
            isConfigured: isConfigured ?? false,
          });
        });
      });
    } else {
      // Show ALL configured providers and their visible models
      llmProviders?.forEach((provider) => {
        const visibleModels = provider.model_configurations.filter(
          (m) => m.is_visible
        );

        visibleModels.forEach((model) => {
          options.push({
            providerKey: provider.provider,
            providerName: provider.name,
            providerDisplayName:
              provider.provider_display_name || provider.provider,
            modelName: model.name,
            displayName: model.display_name || model.name,
            isRecommended: isRecommendedModel(provider.provider, model.name),
            isConfigured: true,
          });
        });
      });
    }

    return options;
  }, [
    showRecommendedOnly,
    llmProviders,
    isProviderConfigured,
    getProviderDescriptor,
  ]);

  // Group options by provider
  const groupedOptions = useMemo(() => {
    const groups = new Map<
      string,
      {
        providerKey: string;
        displayName: string;
        options: ModelOption[];
        isConfigured: boolean;
      }
    >();

    modelOptions.forEach((option) => {
      const groupKey = option.providerKey;

      if (!groups.has(groupKey)) {
        groups.set(groupKey, {
          providerKey: option.providerKey,
          displayName: option.providerDisplayName,
          options: [],
          isConfigured: option.isConfigured,
        });
      }

      groups.get(groupKey)!.options.push(option);
    });

    // Sort groups alphabetically
    const sortedKeys = Array.from(groups.keys()).sort((a, b) =>
      groups.get(a)!.displayName.localeCompare(groups.get(b)!.displayName)
    );

    return sortedKeys.map((key) => groups.get(key)!);
  }, [modelOptions]);

  // Determine current group for auto-expand
  const currentGroupKey = useMemo(() => {
    if (!currentSelection) return "";
    return currentSelection.provider;
  }, [currentSelection]);

  // Track expanded groups
  const [expandedGroups, setExpandedGroups] = useState<string[]>([
    currentGroupKey,
  ]);

  // Reset expanded groups when popover opens
  useEffect(() => {
    if (isOpen) {
      setExpandedGroups([currentGroupKey]);
    }
  }, [isOpen, currentGroupKey]);

  // Auto-scroll to selected model
  useEffect(() => {
    if (isOpen) {
      const timer = setTimeout(() => {
        selectedItemRef.current?.scrollIntoView({
          behavior: "instant",
          block: "center",
        });
      }, 50);
      return () => clearTimeout(timer);
    }
  }, [isOpen]);

  const handleAccordionChange = (value: string[]) => {
    setExpandedGroups(value);
  };

  const applySelection = useCallback(
    (option: ModelOption) => {
      if (!option.isConfigured) return;

      onSelectionChange({
        providerName: option.providerName,
        provider: option.providerKey,
        modelName: option.modelName,
      });
      setIsOpen(false);
    },
    [onSelectionChange]
  );

  // Handle toggle change - show warning when turning OFF
  const handleToggleChange = (checked: boolean) => {
    if (!checked && showRecommendedOnly) {
      setShowToggleWarning(true);
    } else {
      setShowRecommendedOnly(checked);
    }
  };

  // Reset closing flag after modal close transition
  useEffect(() => {
    if (!showToggleWarning && isClosingModalRef.current) {
      const timeoutId = setTimeout(() => {
        isClosingModalRef.current = false;
      }, 100);
      return () => clearTimeout(timeoutId);
    }
  }, [showToggleWarning]);

  const handleConnectClick = (providerKey: string) => {
    setIsOpen(false);
    onOpenOnboarding(providerKey);
  };

  const handlePopoverOpenChange = (open: boolean) => {
    if (disabled && open) {
      return;
    }
    if (!open && (showToggleWarning || isClosingModalRef.current)) {
      return;
    }
    setIsOpen(open);
  };

  const renderModelItem = (option: ModelOption) => {
    const isSelected =
      currentSelection?.modelName === option.modelName &&
      currentSelection?.provider === option.providerKey;

    // Build description with recommendation badge
    const description = option.isRecommended ? "Recommended" : undefined;

    return (
      <div
        key={`${option.providerKey}-${option.modelName}`}
        ref={isSelected ? selectedItemRef : undefined}
      >
        <LineItem
          selected={isSelected}
          description={description}
          onClick={() => applySelection(option)}
          rightChildren={
            isSelected ? (
              <SvgCheck className="h-4 w-4 stroke-action-link-05 shrink-0" />
            ) : null
          }
        >
          {option.displayName}
        </LineItem>
      </div>
    );
  };

  return (
    <>
      <Popover open={isOpen} onOpenChange={handlePopoverOpenChange}>
        <Popover.Trigger asChild>{children}</Popover.Trigger>
        <Popover.Content
          side="bottom"
          align="start"
          width="lg"
          onInteractOutside={(e) => {
            if (showToggleWarning || isClosingModalRef.current) {
              e.preventDefault();
            }
          }}
          onPointerDownOutside={(e) => {
            if (showToggleWarning || isClosingModalRef.current) {
              e.preventDefault();
            }
          }}
        >
          <div className="px-3">
            <Section gap={0.5}>
              {/* Toggle for recommended only */}
              <div className="flex items-center justify-between py-3 gap-3 border-b border-border-01 px-1">
                <Text secondaryBody text03>
                  Recommended Models Only
                </Text>
                <Switch
                  checked={showRecommendedOnly}
                  onCheckedChange={handleToggleChange}
                />
              </div>

              {/* Model List */}
              <PopoverMenu scrollContainerRef={scrollContainerRef}>
                {groupedOptions.length === 0
                  ? [
                      <div key="empty" className="py-3 px-2">
                        <Text secondaryBody text03>
                          No models found
                        </Text>
                      </div>,
                    ]
                  : groupedOptions.length === 1
                    ? // Single provider - show models directly
                      [
                        <div
                          key="single-provider"
                          className="flex flex-col gap-1"
                        >
                          {groupedOptions[0]!.isConfigured ? (
                            groupedOptions[0]!.options.map(renderModelItem)
                          ) : (
                            <div className="flex items-center justify-between px-2 py-2">
                              <Text secondaryBody text03>
                                Not configured
                              </Text>
                              <button
                                onClick={() =>
                                  handleConnectClick(
                                    groupedOptions[0]!.providerKey
                                  )
                                }
                                className="flex items-center gap-1 px-2 py-1 text-xs rounded-08 bg-background-02 hover:bg-background-03 transition-colors"
                              >
                                <SvgPlug className="w-3 h-3" />
                                <span>Connect</span>
                              </button>
                            </div>
                          )}
                        </div>,
                      ]
                    : // Multiple providers - show accordion
                      [
                        <Accordion
                          key="accordion"
                          type="multiple"
                          value={expandedGroups}
                          onValueChange={handleAccordionChange}
                          className="w-full flex flex-col"
                        >
                          {groupedOptions.map((group) => {
                            const isExpanded = expandedGroups.includes(
                              group.providerKey
                            );
                            const ProviderIcon = getProviderIcon(
                              group.providerKey
                            );

                            return (
                              <AccordionItem
                                key={group.providerKey}
                                value={group.providerKey}
                                className="border-none pt-1"
                              >
                                {/* Group Header */}
                                <AccordionTrigger className="flex items-center rounded-08 hover:no-underline hover:bg-background-tint-02 group [&>svg]:hidden w-full py-1">
                                  <div className="flex items-center gap-1 shrink-0">
                                    <div className="flex items-center justify-center size-5 shrink-0">
                                      <ProviderIcon size={16} />
                                    </div>
                                    <Text
                                      secondaryBody
                                      text03
                                      nowrap
                                      className="px-0.5"
                                    >
                                      {group.displayName}
                                    </Text>
                                  </div>
                                  <div className="flex-1" />
                                  {!group.isConfigured && (
                                    <button
                                      onClick={(e) => {
                                        e.stopPropagation();
                                        handleConnectClick(group.providerKey);
                                      }}
                                      className="flex items-center gap-1 px-2 py-0.5 mr-1 text-xs rounded-08 bg-background-02 hover:bg-background-03 transition-colors"
                                    >
                                      <SvgPlug className="w-3 h-3" />
                                      <span>Connect</span>
                                    </button>
                                  )}
                                  <div className="flex items-center justify-center size-6 shrink-0">
                                    {isExpanded ? (
                                      <SvgChevronDown className="h-4 w-4 stroke-text-04 shrink-0" />
                                    ) : (
                                      <SvgChevronRight className="h-4 w-4 stroke-text-04 shrink-0" />
                                    )}
                                  </div>
                                </AccordionTrigger>

                                {/* Model Items */}
                                <AccordionContent className="pb-0 pt-0">
                                  <div className="flex flex-col gap-1">
                                    {group.isConfigured ? (
                                      group.options.map(renderModelItem)
                                    ) : (
                                      <div className="py-1.5 px-3">
                                        <Text secondaryBody text03>
                                          Not configured
                                        </Text>
                                      </div>
                                    )}
                                  </div>
                                </AccordionContent>
                              </AccordionItem>
                            );
                          })}
                        </Accordion>,
                      ]}
              </PopoverMenu>
            </Section>
          </div>
        </Popover.Content>
      </Popover>

      {/* Warning modal when turning OFF "Recommended Models Only" */}
      <ToggleWarningModal
        open={showToggleWarning}
        onConfirm={() => {
          setShowRecommendedOnly(false);
          isClosingModalRef.current = true;
          setShowToggleWarning(false);
        }}
        onCancel={() => {
          isClosingModalRef.current = true;
          setShowToggleWarning(false);
        }}
      />
    </>
  );
}


================================================
FILE: web/src/app/craft/components/BuildMessageList.tsx
================================================
"use client";

import { useRef, useEffect } from "react";
import Logo from "@/refresh-components/Logo";
import TextChunk from "@/app/craft/components/TextChunk";
import ThinkingCard from "@/app/craft/components/ThinkingCard";
import ToolCallPill from "@/app/craft/components/ToolCallPill";
import TodoListCard from "@/app/craft/components/TodoListCard";
import WorkingPill from "@/app/craft/components/WorkingPill";
import UserMessage from "@/app/craft/components/UserMessage";
import { BuildMessage } from "@/app/craft/types/streamingTypes";
import {
  StreamItem,
  GroupedStreamItem,
  ToolCallState,
} from "@/app/craft/types/displayTypes";
import { isWorkingToolCall } from "@/app/craft/utils/streamItemHelpers";

/**
 * BlinkingDot - Pulsing gray circle for loading state
 * Matches the main chat UI's loading indicator
 */
function BlinkingDot() {
  return (
    <span className="animate-pulse flex-none bg-theme-primary-05 inline-block rounded-full h-3 w-3 ml-2 mt-2" />
  );
}

/**
 * Group consecutive working tool calls into WorkingGroup items.
 * Keeps text, thinking, todo_list, and task tool_calls as individual items.
 */
function groupStreamItems(items: StreamItem[]): GroupedStreamItem[] {
  const grouped: GroupedStreamItem[] = [];
  let currentWorkingGroup: ToolCallState[] = [];

  const flushWorkingGroup = () => {
    const firstToolCall = currentWorkingGroup[0];
    if (firstToolCall) {
      grouped.push({
        type: "working_group",
        id: `working-${firstToolCall.id}`,
        toolCalls: [...currentWorkingGroup],
      });
      currentWorkingGroup = [];
    }
  };

  for (const item of items) {
    if (item.type === "tool_call" && isWorkingToolCall(item.toolCall)) {
      // Add to current working group
      currentWorkingGroup.push(item.toolCall);
    } else {
      // Flush any accumulated working group before adding non-working item
      flushWorkingGroup();
      // Add the item as-is (text, thinking, todo_list, or task tool_call)
      grouped.push(item as GroupedStreamItem);
    }
  }

  // Don't forget to flush any remaining working group
  flushWorkingGroup();

  return grouped;
}

interface BuildMessageListProps {
  messages: BuildMessage[];
  streamItems: StreamItem[];
  isStreaming?: boolean;
  /** Whether auto-scroll is enabled (user is at bottom) */
  autoScrollEnabled?: boolean;
  /** Ref to the end marker div for scroll detection */
  messagesEndRef?: React.RefObject<HTMLDivElement>;
}

/**
 * BuildMessageList - Displays the conversation history with FIFO rendering
 *
 * User messages are shown as right-aligned bubbles.
 * Agent responses render streamItems in exact chronological order:
 * text, thinking, and tool calls appear exactly as they arrived.
 */
export default function BuildMessageList({
  messages,
  streamItems,
  isStreaming = false,
  autoScrollEnabled = true,
  messagesEndRef: externalMessagesEndRef,
}: BuildMessageListProps) {
  const internalMessagesEndRef = useRef<HTMLDivElement>(null);
  // Use external ref if provided, otherwise use internal ref
  const messagesEndRef = externalMessagesEndRef ?? internalMessagesEndRef;

  // Auto-scroll to bottom when new content arrives (only if auto-scroll is enabled)
  useEffect(() => {
    if (autoScrollEnabled && messagesEndRef.current) {
      messagesEndRef.current.scrollIntoView({ behavior: "smooth" });
    }
  }, [messages.length, streamItems.length, autoScrollEnabled, messagesEndRef]);

  // Determine if we should show streaming response area (for current in-progress response)
  const hasStreamItems = streamItems.length > 0;
  const lastMessage = messages[messages.length - 1];
  const lastMessageIsUser = lastMessage?.type === "user";
  // Show streaming area if we have stream items OR if we're waiting for a response to the latest user message
  const showStreamingArea =
    hasStreamItems || (isStreaming && lastMessageIsUser);

  // Check for active tools (for "Working..." state)
  const hasActiveTools = streamItems.some(
    (item) =>
      item.type === "tool_call" &&
      (item.toolCall.status === "in_progress" ||
        item.toolCall.status === "pending")
  );

  // Helper to render stream items with grouping (used for both saved messages and current streaming)
  const renderStreamItems = (items: StreamItem[], isCurrentStream = false) => {
    const grouped = groupStreamItems(items);

    // Find the index of the last working_group (only relevant for current stream)
    const lastWorkingGroupIndex = isCurrentStream
      ? grouped.findLastIndex((item) => item.type === "working_group")
      : -1;

    return grouped.map((item, index) => {
      switch (item.type) {
        case "text":
          return <TextChunk key={item.id} content={item.content} />;
        case "thinking":
          return (
            <ThinkingCard
              key={item.id}
              content={item.content}
              isStreaming={item.isStreaming}
            />
          );
        case "tool_call":
          // Only task/subagent tools reach here (non-working tools)
          return <ToolCallPill key={item.id} toolCall={item.toolCall} />;
        case "todo_list":
          return (
            <TodoListCard
              key={item.id}
              todoList={item.todoList}
              defaultOpen={item.todoList.isOpen}
            />
          );
        case "working_group":
          return (
            <WorkingPill
              key={item.id}
              toolCalls={item.toolCalls}
              isLatest={index === lastWorkingGroupIndex}
            />
          );
        default:
          return null;
      }
    });
  };

  // Helper to render an agent message
  const renderAgentMessage = (message: BuildMessage) => {
    // Check if we have saved stream items in message_metadata
    const savedStreamItems = message.message_metadata?.streamItems as
      | StreamItem[]
      | undefined;

    return (
      <div key={message.id} className="flex items-start gap-3 py-4">
        <div className="shrink-0 mt-0.5">
          <Logo folded size={24} />
        </div>
        <div className="flex-1 flex flex-col gap-3 min-w-0">
          {savedStreamItems && savedStreamItems.length > 0 ? (
            // Render full stream items (includes tool calls, thinking, etc.)
            renderStreamItems(savedStreamItems)
          ) : (
            // Fallback to text content only
            <TextChunk content={message.content} />
          )}
        </div>
      </div>
    );
  };

  return (
    <div className="flex flex-col items-center px-4 pb-4">
      <div className="w-full max-w-2xl backdrop-blur-md rounded-16 p-4">
        {/* Render messages in order (user and agent interleaved) */}
        {messages.map((message) =>
          message.type === "user" ? (
            <UserMessage key={message.id} content={message.content} />
          ) : message.type === "assistant" ? (
            renderAgentMessage(message)
          ) : null
        )}

        {/* Render current streaming response (for in-progress response) */}
        {showStreamingArea && (
          <div className="flex items-start gap-3 py-4">
            <div className="shrink-0 mt-0.5">
              <Logo folded size={24} />
            </div>
            <div className="flex-1 flex flex-col gap-3 min-w-0">
              {!hasStreamItems ? (
                // Loading state - no content yet, show blinking dot like main chat
                <BlinkingDot />
              ) : (
                <>
                  {/* Render stream items in FIFO order */}
                  {renderStreamItems(streamItems, true)}

                  {/* Streaming indicator when actively streaming text */}
                  {isStreaming && hasStreamItems && !hasActiveTools && (
                    <BlinkingDot />
                  )}
                </>
              )}
            </div>
          </div>
        )}

        {/* Scroll anchor */}
        <div ref={messagesEndRef} />
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/BuildWelcome.tsx
================================================
"use client";

import { useRef } from "react";
import { BuildFile } from "@/app/craft/contexts/UploadFilesContext";
import Text from "@/refresh-components/texts/Text";
import Logo from "@/refresh-components/Logo";
import InputBar, { InputBarHandle } from "@/app/craft/components/InputBar";
import SuggestedPrompts from "@/app/craft/components/SuggestedPrompts";
import ConnectDataBanner from "@/app/craft/components/ConnectDataBanner";
import { getBuildUserPersona } from "@/app/craft/onboarding/constants";
import { workAreaToPersona } from "@/app/craft/constants/exampleBuildPrompts";

interface BuildWelcomeProps {
  onSubmit: (
    message: string,
    files: BuildFile[],
    demoDataEnabled: boolean
  ) => void;
  isRunning: boolean;
  /** When true, shows spinner on send button with "Initializing sandbox..." tooltip */
  sandboxInitializing?: boolean;
}

/**
 * BuildWelcome - Welcome screen shown when no session exists
 *
 * Displays a centered welcome message and input bar to start a new build.
 */
export default function BuildWelcome({
  onSubmit,
  isRunning,
  sandboxInitializing = false,
}: BuildWelcomeProps) {
  const inputBarRef = useRef<InputBarHandle>(null);
  const userPersona = getBuildUserPersona();
  const persona = workAreaToPersona(userPersona?.workArea);

  const handlePromptClick = (promptText: string) => {
    inputBarRef.current?.setMessage(promptText);
  };

  return (
    <div className="h-full flex flex-col items-center justify-center px-4">
      <div className="flex flex-col items-center gap-4 mb-6">
        <Logo folded size={48} />
        <Text headingH2 text05>
          What shall we craft today?
        </Text>
      </div>
      <div className="w-full max-w-2xl">
        <InputBar
          ref={inputBarRef}
          onSubmit={onSubmit}
          isRunning={isRunning}
          placeholder="Analyze my data and create a dashboard..."
          sandboxInitializing={sandboxInitializing}
          isWelcomePage
        />
        <ConnectDataBanner />
        <SuggestedPrompts persona={persona} onPromptClick={handlePromptClick} />
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/ChatPanel.tsx
================================================
"use client";

import { useCallback, useState, useEffect, useRef, useMemo } from "react";
import { useRouter } from "next/navigation";
import { track, AnalyticsEvent } from "@/lib/analytics";
import {
  useSession,
  useSessionId,
  useHasSession,
  useIsRunning,
  useOutputPanelOpen,
  useToggleOutputPanel,
  useBuildSessionStore,
  useIsPreProvisioning,
  useIsPreProvisioningFailed,
  usePreProvisionedSessionId,
  useFollowupSuggestions,
  useSuggestionsLoading,
} from "@/app/craft/hooks/useBuildSessionStore";
import { useBuildStreaming } from "@/app/craft/hooks/useBuildStreaming";
import { useUsageLimits } from "@/app/craft/hooks/useUsageLimits";
import { SessionErrorCode } from "@/app/craft/types/streamingTypes";
import {
  BuildFile,
  UploadFileStatus,
  useUploadFilesContext,
} from "@/app/craft/contexts/UploadFilesContext";
import { CRAFT_SEARCH_PARAM_NAMES } from "@/app/craft/services/searchParams";
import { CRAFT_PATH } from "@/app/craft/v1/constants";
import { toast } from "@/hooks/useToast";
import InputBar, { InputBarHandle } from "@/app/craft/components/InputBar";
import BuildWelcome from "@/app/craft/components/BuildWelcome";
import BuildMessageList from "@/app/craft/components/BuildMessageList";
import SuggestionBubbles from "@/app/craft/components/SuggestionBubbles";
import ConnectorBannersRow from "@/app/craft/components/ConnectorBannersRow";
import SandboxStatusIndicator from "@/app/craft/components/SandboxStatusIndicator";
import UpgradePlanModal from "@/app/craft/components/UpgradePlanModal";
import IconButton from "@/refresh-components/buttons/IconButton";
import { SvgSidebar, SvgChevronDown } from "@opal/icons";
import { Button as OpalButton } from "@opal/components";
import { useBuildContext } from "@/app/craft/contexts/BuildContext";
import useScreenSize from "@/hooks/useScreenSize";
import { cn } from "@/lib/utils";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";

interface BuildChatPanelProps {
  /** Session ID from URL - used to prevent welcome flash while loading */
  existingSessionId?: string | null;
}

/**
 * BuildChatPanel - Center panel containing the chat interface
 *
 * Handles:
 * - Welcome state (no session)
 * - Message list (when session exists)
 * - Input bar at bottom
 * - Header with output panel toggle
 */
export default function BuildChatPanel({
  existingSessionId,
}: BuildChatPanelProps) {
  const router = useRouter();
  const outputPanelOpen = useOutputPanelOpen();
  const session = useSession();
  const sessionId = useSessionId();
  const hasSession = useHasSession();
  const isRunning = useIsRunning();
  const { setLeftSidebarFolded, leftSidebarFolded } = useBuildContext();
  const { isMobile } = useScreenSize();
  const toggleOutputPanel = useToggleOutputPanel();

  // Track when output panel is fully closed (after animation completes)
  // This prevents the "open panel" button from appearing during the close animation
  const [isOutputPanelFullyClosed, setIsOutputPanelFullyClosed] =
    useState(!outputPanelOpen);

  const { limits, refreshLimits } = useUsageLimits();
  const [showUpgradeModal, setShowUpgradeModal] = useState(false);
  const setCurrentError = useBuildSessionStore(
    (state) => state.setCurrentError
  );

  useEffect(() => {
    if (session?.error === SessionErrorCode.RATE_LIMIT_EXCEEDED) {
      setShowUpgradeModal(true);
      setCurrentError(null);
      refreshLimits();
    }
  }, [session?.error, refreshLimits, setCurrentError]);

  useEffect(() => {
    if (outputPanelOpen) {
      // Panel opening - immediately mark as not fully closed
      setIsOutputPanelFullyClosed(false);
    } else {
      // Panel closing - wait for 300ms animation to complete
      const timer = setTimeout(() => setIsOutputPanelFullyClosed(true), 300);
      return () => clearTimeout(timer);
    }
  }, [outputPanelOpen]);

  // Access actions directly like chat does - these don't cause re-renders
  const consumePreProvisionedSession = useBuildSessionStore(
    (state) => state.consumePreProvisionedSession
  );
  const createSession = useBuildSessionStore((state) => state.createSession);
  const appendMessageToCurrent = useBuildSessionStore(
    (state) => state.appendMessageToCurrent
  );
  const nameBuildSession = useBuildSessionStore(
    (state) => state.nameBuildSession
  );
  const { streamMessage } = useBuildStreaming();
  const isPreProvisioning = useIsPreProvisioning();
  const isPreProvisioningFailed = useIsPreProvisioningFailed();
  const preProvisionedSessionId = usePreProvisionedSessionId();

  // Disable input when pre-provisioning is in progress or failed (waiting for retry)
  const sandboxNotReady = isPreProvisioning || isPreProvisioningFailed;
  const { currentMessageFiles, hasUploadingFiles, setActiveSession } =
    useUploadFilesContext();
  const followupSuggestions = useFollowupSuggestions();
  const suggestionsLoading = useSuggestionsLoading();
  const clearFollowupSuggestions = useBuildSessionStore(
    (state) => state.clearFollowupSuggestions
  );

  // Ref to access current file state in async callbacks
  const currentFilesRef = useRef(currentMessageFiles);
  useEffect(() => {
    currentFilesRef.current = currentMessageFiles;
  }, [currentMessageFiles]);

  /**
   * Keep the upload context in sync with the active session.
   * The context handles all session change logic internally (fetching attachments,
   * clearing files, auto-uploading pending files).
   */
  useEffect(() => {
    const activeSession = existingSessionId ?? preProvisionedSessionId ?? null;
    setActiveSession(activeSession);
  }, [existingSessionId, preProvisionedSessionId, setActiveSession]);

  // Ref to access InputBar methods
  const inputBarRef = useRef<InputBarHandle>(null);

  // Scroll detection for auto-scroll "magnet"
  const scrollContainerRef = useRef<HTMLDivElement>(null);
  const [isAtBottom, setIsAtBottom] = useState(true);
  const [showScrollButton, setShowScrollButton] = useState(false);
  const prevScrollTopRef = useRef(0);

  // Check if user is at bottom of scroll container
  const checkIfAtBottom = useCallback(() => {
    const container = scrollContainerRef.current;
    if (!container) return true;

    const scrollTop = container.scrollTop;
    const scrollHeight = container.scrollHeight;
    const clientHeight = container.clientHeight;
    const distanceFromBottom = scrollHeight - scrollTop - clientHeight;
    const threshold = 32; // 2rem threshold

    return distanceFromBottom <= threshold;
  }, []);

  // Handle scroll events - only update state on user-initiated scrolling
  const handleScroll = useCallback(() => {
    const container = scrollContainerRef.current;
    if (!container) return;

    const currentScrollTop = container.scrollTop;
    const prevScrollTop = prevScrollTopRef.current;
    const wasAtBottom = checkIfAtBottom();

    // Detect if user scrolled up (scrollTop decreased)
    // This distinguishes user scrolling from content growth
    const scrolledUp = currentScrollTop < prevScrollTop - 5; // 5px threshold

    // Only update state if user scrolled up (definitely user action)
    // If content grows and we're still at bottom, don't change state
    if (scrolledUp) {
      // User scrolled up - release auto-scroll magnet
      setIsAtBottom(wasAtBottom);
      setShowScrollButton(!wasAtBottom);
    } else if (wasAtBottom) {
      // We're at bottom - ensure button stays hidden (handles content growth)
      setIsAtBottom(true);
      setShowScrollButton(false);
    }
    // If scrollTop increased but we're still at bottom, it's content growth - do nothing

    prevScrollTopRef.current = currentScrollTop;
  }, [checkIfAtBottom]);

  // Scroll to bottom and resume auto-scroll
  const scrollToBottom = useCallback(() => {
    const container = scrollContainerRef.current;
    if (!container) return;

    // Use requestAnimationFrame to ensure we scroll after any layout changes
    requestAnimationFrame(() => {
      if (!container) return;

      // Scroll to a value larger than scrollHeight - browsers will clamp to max
      // This ensures we always reach the absolute bottom
      const targetScroll = container.scrollHeight + 1000; // Add buffer to ensure we go all the way
      container.scrollTo({ top: targetScroll, behavior: "smooth" });

      // Update state immediately
      setIsAtBottom(true);
      setShowScrollButton(false);

      // Update prevScrollTopRef after scroll completes
      setTimeout(() => {
        if (container) {
          prevScrollTopRef.current = container.scrollTop;
        }
      }, 600); // Smooth scroll animation duration
    });
  }, []);

  // Reset scroll state when session changes
  useEffect(() => {
    setIsAtBottom(true);
    setShowScrollButton(false);
  }, [sessionId]);

  // Handle suggestion bubble click - populate InputBar with the suggestion
  const handleSuggestionSelect = useCallback((text: string) => {
    inputBarRef.current?.setMessage(text);
  }, []);

  // Check if agent has finished streaming at least one message
  // Show banner only after first agent message completes streaming
  const shouldShowConnectorBanner = useMemo(() => {
    // Don't show if currently streaming
    if (isRunning) {
      return false;
    }
    // Check if there's at least one agent message in the session
    const hasAgentMessage = session?.messages?.some(
      (msg) => msg.type === "assistant"
    );
    return hasAgentMessage ?? false;
  }, [isRunning, session?.messages]);

  const handleSubmit = useCallback(
    async (message: string, files: BuildFile[], demoDataEnabled: boolean) => {
      if (limits?.isLimited) {
        setShowUpgradeModal(true);
        return;
      }

      track(AnalyticsEvent.SENT_CRAFT_MESSAGE);

      if (hasSession && sessionId) {
        // Existing session flow
        // Check if response is still streaming - show toast like main chat does
        if (isRunning) {
          toast.error("Please wait for the current operation to complete.");
          return;
        }

        // Clear follow-up suggestions when user sends a new message
        clearFollowupSuggestions(sessionId);

        // Add user message to state
        appendMessageToCurrent({
          id: `msg-${Date.now()}`,
          type: "user",
          content: message,
          timestamp: new Date(),
        });
        // Stream the response
        await streamMessage(sessionId, message);
        refreshLimits();
      } else {
        // New session flow - ALWAYS use pre-provisioned session
        const newSessionId = await consumePreProvisionedSession();

        if (!newSessionId) {
          // This should not happen if UI properly disables input until ready
          console.error("[ChatPanel] No pre-provisioned session available");
          toast.error("Please wait for sandbox to initialize");
          return;
        }

        // Pre-provisioned session flow:
        // The backend session already exists (created during pre-provisioning).
        // Files were already uploaded immediately when attached to the pre-provisioned session.
        // Here we initialize the LOCAL Zustand store entry with the right state.
        const userMessage = {
          id: `msg-${Date.now()}`,
          type: "user" as const,
          content: message,
          timestamp: new Date(),
        };
        // Initialize local state (NOT an API call - backend session already exists)
        // - status: "running" disables input immediately
        // - isLoaded: false allows loadSession to fetch sandbox info while preserving messages
        createSession(newSessionId, {
          messages: [userMessage],
          status: "running",
        });

        // Handle files that weren't successfully uploaded yet
        // This handles edge cases where:
        // 1. File is still uploading when user sends message - wait for it
        // 2. File upload failed and needs retry
        // 3. File was attached but upload hasn't started yet

        // Wait for any in-flight uploads to complete (max 5 seconds)
        // Use ref to check current state during polling
        if (hasUploadingFiles) {
          const maxWaitMs = 5000;
          const checkIntervalMs = 100;
          let waited = 0;

          await new Promise<void>((resolve) => {
            const checkUploads = () => {
              // Check current state via ref (updates with each render)
              const stillUploading = currentFilesRef.current.some(
                (f) => f.status === UploadFileStatus.UPLOADING
              );
              if (!stillUploading || waited >= maxWaitMs) {
                resolve();
              } else {
                waited += checkIntervalMs;
                setTimeout(checkUploads, checkIntervalMs);
              }
            };
            checkUploads();
          });
        }

        // Note: PENDING files are auto-uploaded by the context when session becomes available

        // Navigate to URL - session controller will set currentSessionId
        router.push(
          `${CRAFT_PATH}?${CRAFT_SEARCH_PARAM_NAMES.SESSION_ID}=${newSessionId}`
        );

        // Schedule naming after delay (message will be saved by then)
        // Note: Don't call refreshSessionHistory() here - it would overwrite the
        // optimistic update from consumePreProvisionedSession() before the message is saved
        setTimeout(() => nameBuildSession(newSessionId), 1000);

        // Stream the response (uses session ID directly, not currentSessionId)
        await streamMessage(newSessionId, message);
        refreshLimits();
      }
    },
    [
      hasSession,
      sessionId,
      isRunning,
      appendMessageToCurrent,
      streamMessage,
      consumePreProvisionedSession,
      createSession,
      nameBuildSession,
      router,
      clearFollowupSuggestions,
      hasUploadingFiles,
      limits,
      refreshLimits,
    ]
  );

  return (
    <div className="h-full w-full">
      <UpgradePlanModal
        open={showUpgradeModal}
        onClose={() => setShowUpgradeModal(false)}
        limits={limits}
      />
      {/* Content wrapper - shrinks when output panel opens */}
      <div
        className={cn(
          "flex flex-col h-full transition-all duration-300 ease-in-out",
          outputPanelOpen ? "w-1/2 pl-4" : "w-full"
        )}
      >
        {/* Chat header */}
        <div className="flex flex-row items-center justify-between pl-4 pr-4 py-3 relative overflow-visible">
          <div className="flex flex-row items-center gap-2 max-w-[75%]">
            {/* Mobile sidebar toggle - only show on mobile when sidebar is folded */}
            {isMobile && leftSidebarFolded && (
              <OpalButton
                icon={SvgSidebar}
                onClick={() => setLeftSidebarFolded(false)}
                prominence="tertiary"
                size="sm"
              />
            )}
            <SandboxStatusIndicator />
          </div>
          {/* Output panel toggle - only show when panel is fully closed (after animation) */}
          {isOutputPanelFullyClosed && (
            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
            <IconButton
              icon={SvgSidebar}
              onClick={toggleOutputPanel}
              tooltip="Open output panel"
              tertiary
              className="!bg-background-tint-00 border rounded-full"
              iconClassName="!stroke-text-04"
            />
          )}
          {/* Soft fade border at bottom */}
          <div className="absolute bottom-0 left-0 right-0 h-10 bg-gradient-to-b from-background-neutral-01 to-transparent pointer-events-none translate-y-full z-10" />
        </div>

        {/* Main content area */}
        <div
          ref={scrollContainerRef}
          onScroll={handleScroll}
          className="flex-1 overflow-auto"
        >
          {!hasSession && !existingSessionId ? (
            <BuildWelcome
              onSubmit={handleSubmit}
              isRunning={isRunning}
              sandboxInitializing={sandboxNotReady}
            />
          ) : (
            <BuildMessageList
              messages={session?.messages ?? []}
              streamItems={session?.streamItems ?? []}
              isStreaming={isRunning}
              autoScrollEnabled={isAtBottom}
            />
          )}
        </div>

        {/* Input bar at bottom when session exists */}
        {(hasSession || existingSessionId) && (
          <div className="px-4 pb-8 pt-4 relative">
            {/* Soft fade border at top */}
            <div className="absolute top-0 left-0 right-0 h-12 bg-gradient-to-t from-background-neutral-01 to-transparent pointer-events-none -translate-y-full" />
            <div className="max-w-2xl mx-auto">
              {/* Scroll to bottom button - shown when user has scrolled away */}
              {showScrollButton && (
                <div className="absolute -top-12 left-1/2 -translate-x-1/2 z-10">
                  <SimpleTooltip tooltip="Scroll to bottom" delayDuration={200}>
                    <button
                      onClick={scrollToBottom}
                      className={cn(
                        "flex items-center justify-center",
                        "w-8 h-8 rounded-full",
                        "bg-background-neutral-inverted-00 border border-border-01",
                        "shadow-01 hover:shadow-02",
                        "transition-all duration-200",
                        "hover:bg-background-tint-inverted-01"
                      )}
                      aria-label="Scroll to bottom"
                    >
                      <SvgChevronDown
                        size={20}
                        className="stroke-background-neutral-00"
                      />
                    </button>
                  </SimpleTooltip>
                </div>
              )}
              {/* Follow-up suggestion bubbles - show after first agent message */}
              {(followupSuggestions || suggestionsLoading) && (
                <div className="mb-3">
                  <SuggestionBubbles
                    suggestions={followupSuggestions ?? []}
                    loading={suggestionsLoading}
                    onSelect={handleSuggestionSelect}
                  />
                </div>
              )}
              {/* Connector banners - show after first agent message finishes streaming */}
              {shouldShowConnectorBanner && (
                <ConnectorBannersRow className="" />
              )}
              <InputBar
                ref={inputBarRef}
                onSubmit={handleSubmit}
                isRunning={isRunning}
                placeholder="Continue the conversation..."
              />
            </div>
          </div>
        )}
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/ConnectDataBanner.tsx
================================================
"use client";

import { useRouter } from "next/navigation";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import {
  ConfluenceIcon,
  GoogleDriveIcon,
  GithubIcon,
  NotionIcon,
  ColorSlackIcon,
  HubSpotIcon,
} from "@/components/icons/icons";
import { SvgChevronRight } from "@opal/icons";
import { useBuildConnectors } from "@/app/craft/hooks/useBuildConnectors";
import { CRAFT_CONFIGURE_PATH } from "@/app/craft/v1/constants";

interface ConnectDataBannerProps {
  className?: string;
}

function IconWrapper({ children }: { children: React.ReactNode }) {
  return (
    <div className="w-6 h-6 rounded-full bg-background-neutral-00 border border-border-01 flex items-center justify-center overflow-hidden">
      {children}
    </div>
  );
}

export default function ConnectDataBanner({
  className,
}: ConnectDataBannerProps) {
  const router = useRouter();
  const { hasConnectorEverSucceeded, isLoading } = useBuildConnectors();

  const handleClick = () => {
    router.push(CRAFT_CONFIGURE_PATH);
  };

  // Only show banner if user hasn't successfully synced any connectors (and not loading)
  if (isLoading || hasConnectorEverSucceeded) {
    return null;
  }

  return (
    <div className="relative">
      <button
        onClick={handleClick}
        className={cn(
          // Layout
          "flex items-center justify-between gap-2",
          "mx-auto px-4 py-2",
          // Sizing - thin and full width to match InputBar
          "h-9 w-[50%]",
          // Appearance - slightly different color, rounded bottom
          "bg-background-neutral-01 hover:bg-background-neutral-02",
          "rounded-b-12 rounded-t-none",
          // Border for definition
          "border border-t-0 border-border-01",
          // Transition
          "transition-colors duration-200",
          // Cursor
          "cursor-pointer",
          // Group for hover effects
          "group",
          className
        )}
      >
        {/* Left side: 3 icons */}
        <div className="flex items-center -space-x-2">
          {/* Outermost - no movement */}
          <div>
            <IconWrapper>
              <ColorSlackIcon size={16} />
            </IconWrapper>
          </div>
          {/* Middle - slight movement */}
          <div className="transition-transform duration-200 group-hover:translate-x-2">
            <IconWrapper>
              <GoogleDriveIcon size={16} />
            </IconWrapper>
          </div>
          {/* Innermost - moves towards center */}
          <div className="transition-transform duration-200 group-hover:translate-x-4">
            <IconWrapper>
              <ConfluenceIcon size={16} />
            </IconWrapper>
          </div>
        </div>

        {/* Center: Text and Arrow */}
        <div className="flex items-center justify-center gap-1">
          <Text secondaryBody text03>
            Connect your data
          </Text>
          <SvgChevronRight className="h-4 w-4 text-text-03" />
        </div>

        {/* Right side: 3 icons */}
        <div className="flex items-center -space-x-2">
          {/* Innermost - moves towards center */}
          <div className="transition-transform duration-200 group-hover:-translate-x-4">
            <IconWrapper>
              <GithubIcon size={16} />
            </IconWrapper>
          </div>
          {/* Middle - slight movement */}
          <div className="transition-transform duration-200 group-hover:-translate-x-2">
            <IconWrapper>
              <NotionIcon size={16} />
            </IconWrapper>
          </div>
          {/* Outermost - no movement */}
          <div>
            <IconWrapper>
              <HubSpotIcon size={16} />
            </IconWrapper>
          </div>
        </div>
      </button>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/ConnectorBannersRow.tsx
================================================
"use client";

import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import {
  ConfluenceIcon,
  GoogleDriveIcon,
  GithubIcon,
  NotionIcon,
  ColorSlackIcon,
  HubSpotIcon,
} from "@/components/icons/icons";
import { SvgChevronRight, SvgCalendar } from "@opal/icons";
import { useBuildConnectors } from "@/app/craft/hooks/useBuildConnectors";
import {
  CRAFT_CONFIGURE_PATH,
  ONYX_CRAFT_CALENDAR_URL,
} from "@/app/craft/v1/constants";

interface ConnectorBannersRowProps {
  className?: string;
}

function IconWrapper({ children }: { children: React.ReactNode }) {
  return (
    <div className="w-6 h-6 rounded-full bg-background-neutral-00 border border-border-01 flex items-center justify-center overflow-hidden">
      {children}
    </div>
  );
}

/**
 * Row of two banners that appear above the InputBar after first agent response.
 * - Left: "Connect your data" - exact same look as welcome page banner, but flipped
 * - Right: "Get help setting up connectors" - links to cal.com booking
 *
 * Only shows if user has no connectors configured.
 * Slides up from the input bar with animation.
 */
export default function ConnectorBannersRow({
  className,
}: ConnectorBannersRowProps) {
  const { hasConnectorEverSucceeded } = useBuildConnectors();

  // Hide if user has successfully synced at least one connector
  if (hasConnectorEverSucceeded) {
    return null;
  }

  const handleConnectClick = () => {
    window.location.href = CRAFT_CONFIGURE_PATH;
  };

  const handleHelpClick = () => {
    window.open(ONYX_CRAFT_CALENDAR_URL, "_blank");
  };

  return (
    <div
      className={cn(
        "flex justify-center animate-in slide-in-from-bottom-2 fade-in duration-300",
        className
      )}
    >
      {/* Left banner: Connect your data - exact same as welcome page but flipped */}
      <button
        onClick={handleConnectClick}
        className={cn(
          // Layout
          "flex items-center justify-between gap-2",
          "px-4 py-2",
          // Sizing - thin and slightly narrower than 50% width
          "h-9 w-[calc(48%-4px)]",
          // Appearance - rounded top left only
          "bg-background-neutral-01 hover:bg-background-neutral-02",
          "rounded-tl-12 rounded-tr-none rounded-bl-none rounded-br-none",
          // Border - flipped: no bottom border instead of no top
          "border border-b-0 border-border-01",
          // Transition
          "transition-colors duration-200",
          // Cursor
          "cursor-pointer",
          // Group for hover effects
          "group"
        )}
      >
        {/* Left side: 3 icons */}
        <div className="flex items-center -space-x-2">
          {/* Outermost - no movement */}
          <div>
            <IconWrapper>
              <ColorSlackIcon size={16} />
            </IconWrapper>
          </div>
          {/* Middle - slight movement */}
          <div className="transition-transform duration-200 group-hover:translate-x-2">
            <IconWrapper>
              <GoogleDriveIcon size={16} />
            </IconWrapper>
          </div>
          {/* Innermost - moves towards center */}
          <div className="transition-transform duration-200 group-hover:translate-x-4">
            <IconWrapper>
              <ConfluenceIcon size={16} />
            </IconWrapper>
          </div>
        </div>

        {/* Center: Text and Arrow */}
        <div className="flex items-center justify-center gap-1">
          <Text secondaryBody text03>
            Connect your data
          </Text>
          <SvgChevronRight className="h-4 w-4 text-text-03" />
        </div>

        {/* Right side: 3 icons */}
        <div className="flex items-center -space-x-2">
          {/* Innermost - moves towards center */}
          <div className="transition-transform duration-200 group-hover:-translate-x-4">
            <IconWrapper>
              <GithubIcon size={16} />
            </IconWrapper>
          </div>
          {/* Middle - slight movement */}
          <div className="transition-transform duration-200 group-hover:-translate-x-2">
            <IconWrapper>
              <NotionIcon size={16} />
            </IconWrapper>
          </div>
          {/* Outermost - no movement */}
          <div>
            <IconWrapper>
              <HubSpotIcon size={16} />
            </IconWrapper>
          </div>
        </div>
      </button>

      {/* Right banner: Get help setting up connectors */}
      <button
        onClick={handleHelpClick}
        className={cn(
          // Layout
          "flex items-center justify-center gap-2",
          "px-4 py-2",
          // Sizing - same as left banner
          "h-9 w-[calc(49%)]",
          // Appearance - rounded top right only
          "bg-background-neutral-01 hover:bg-background-neutral-02",
          "rounded-tr-12 rounded-tl-none rounded-bl-none rounded-br-none",
          // Border - flipped: no bottom border
          "border border-b-0 border-border-01",
          // Transition
          "transition-colors duration-200",
          // Cursor
          "cursor-pointer"
        )}
      >
        {/* Calendar icon */}
        <SvgCalendar className="h-4 w-4 text-text-03" />

        {/* Text */}
        <Text secondaryBody text03>
          Get help setting up connectors
        </Text>

        {/* Arrow indicator */}
        <SvgChevronRight className="h-4 w-4 text-text-03" />
      </button>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/CraftingLoader.tsx
================================================
"use client";

import { useState, useEffect, useRef } from "react";

const messages = [
  "Punching wood...",
  "Gathering resources...",
  "Placing blocks...",
  "Crafting your workspace...",
  "Mining for dependencies...",
  "Smelting the code...",
  "Enchanting with magic...",
  "World generation complete...",
  "/gamemode 1",
];

const MESSAGE_COUNT = messages.length;
const TYPE_DELAY = 40;
const LINE_PAUSE = 800;
const RESET_DELAY = 2000;

export default function CraftingLoader() {
  const [display, setDisplay] = useState({
    lines: [] as string[],
    currentText: "",
  });

  const lineIndexRef = useRef(0);
  const charIndexRef = useRef(0);
  const lastUpdateRef = useRef(0);
  const timeoutRef = useRef<NodeJS.Timeout | undefined>(undefined);
  const rafRef = useRef<number | undefined>(undefined);

  useEffect(() => {
    let isActive = true;

    const update = (now: number) => {
      if (!isActive) return;

      const lineIdx = lineIndexRef.current;
      const charIdx = charIndexRef.current;

      if (lineIdx >= MESSAGE_COUNT) {
        timeoutRef.current = setTimeout(() => {
          if (!isActive) return;
          lineIndexRef.current = 0;
          charIndexRef.current = 0;
          setDisplay({ lines: [], currentText: "" });
          lastUpdateRef.current = performance.now();
          rafRef.current = requestAnimationFrame(update);
        }, RESET_DELAY);
        return;
      }

      const msg = messages[lineIdx];
      if (!msg) return;

      const elapsed = now - lastUpdateRef.current;

      if (charIdx < msg.length) {
        if (elapsed >= TYPE_DELAY) {
          charIndexRef.current = charIdx + 1;
          setDisplay((prev) => ({
            lines: prev.lines,
            currentText: msg.substring(0, charIdx + 1),
          }));
          lastUpdateRef.current = now;
        }
      } else if (elapsed >= LINE_PAUSE) {
        setDisplay((prev) => ({
          lines: [...prev.lines, msg],
          currentText: "",
        }));
        lineIndexRef.current = lineIdx + 1;
        charIndexRef.current = 0;
        lastUpdateRef.current = now;
      }

      rafRef.current = requestAnimationFrame(update);
    };

    lastUpdateRef.current = performance.now();
    rafRef.current = requestAnimationFrame(update);

    return () => {
      isActive = false;
      if (rafRef.current !== undefined) cancelAnimationFrame(rafRef.current);
      if (timeoutRef.current !== undefined) clearTimeout(timeoutRef.current);
    };
  }, []);

  const { lines, currentText } = display;
  const hasCurrentText = currentText.length > 0;

  return (
    <div className="h-full bg-gradient-to-br from-neutral-950 via-neutral-900 to-neutral-950 flex flex-col items-center justify-center p-4">
      <div className="w-full max-w-md rounded-sm overflow-hidden shadow-2xl border-2 border-neutral-700">
        <div className="bg-neutral-800 px-4 py-3 flex items-center gap-2 border-b-2 border-neutral-700">
          <div className="w-3 h-3 rounded-none bg-red-500" />
          <div className="w-3 h-3 rounded-none bg-yellow-500" />
          <div className="w-3 h-3 rounded-none bg-green-500" />
          <span className="ml-4 text-neutral-500 text-sm font-mono">
            crafting_table
          </span>
        </div>

        <div className="bg-neutral-900 p-6 min-h-[250px] font-mono text-sm">
          {lines.map((line, i) => (
            <div key={i} className="flex items-center text-neutral-300">
              <span className="text-emerald-500 mr-2">/&gt;</span>
              <span>{line}</span>
            </div>
          ))}
          {hasCurrentText ? (
            <div className="flex items-center text-neutral-300">
              <span className="text-emerald-500 mr-2">/&gt;</span>
              <span>{currentText}</span>
              <span className="w-2 h-5 bg-emerald-500 animate-pulse ml-0.5" />
            </div>
          ) : (
            <div className="flex items-center text-neutral-300">
              <span className="text-emerald-500 mr-2">/&gt;</span>
              <span className="w-2 h-5 bg-emerald-500 animate-pulse" />
            </div>
          )}
        </div>
      </div>

      <p className="mt-6 text-neutral-500 text-sm font-mono">
        Crafting your next great idea...
      </p>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/DiffView.tsx
================================================
"use client";

import { useMemo } from "react";
import { cn } from "@/lib/utils";

interface DiffViewProps {
  oldContent: string;
  newContent: string;
  maxHeight?: string;
  /** File path for context (displayed in header) */
  filePath?: string;
}

interface DiffLine {
  type: "added" | "removed" | "unchanged" | "header";
  content: string;
  oldLineNum?: number;
  newLineNum?: number;
}

/**
 * Compute a simple line-by-line diff between old and new content.
 * Uses a basic LCS-like approach for reasonable diff output.
 */
function computeDiff(oldText: string, newText: string): DiffLine[] {
  const oldLines = oldText.split("\n");
  const newLines = newText.split("\n");

  const result: DiffLine[] = [];

  let oldIdx = 0;
  let newIdx = 0;
  let oldLineNum = 1;
  let newLineNum = 1;

  while (oldIdx < oldLines.length || newIdx < newLines.length) {
    const oldLine: string | undefined = oldLines[oldIdx];
    const newLine: string | undefined = newLines[newIdx];

    if (oldIdx >= oldLines.length || oldLine === undefined) {
      // All remaining new lines are additions
      result.push({
        type: "added",
        content: newLine ?? "",
        newLineNum: newLineNum++,
      });
      newIdx++;
    } else if (newIdx >= newLines.length || newLine === undefined) {
      // All remaining old lines are deletions
      result.push({
        type: "removed",
        content: oldLine,
        oldLineNum: oldLineNum++,
      });
      oldIdx++;
    } else if (oldLine === newLine) {
      // Lines match - unchanged
      result.push({
        type: "unchanged",
        content: oldLine,
        oldLineNum: oldLineNum++,
        newLineNum: newLineNum++,
      });
      oldIdx++;
      newIdx++;
    } else {
      // Lines differ - check if old line exists later in new, or vice versa
      const oldExistsLaterInNew = newLines.slice(newIdx + 1).includes(oldLine);
      const newExistsLaterInOld = oldLines.slice(oldIdx + 1).includes(newLine);

      if (!oldExistsLaterInNew && newExistsLaterInOld) {
        // Old line was removed
        result.push({
          type: "removed",
          content: oldLine,
          oldLineNum: oldLineNum++,
        });
        oldIdx++;
      } else if (oldExistsLaterInNew && !newExistsLaterInOld) {
        // New line was added
        result.push({
          type: "added",
          content: newLine,
          newLineNum: newLineNum++,
        });
        newIdx++;
      } else {
        // Both differ - show as removal then addition (replacement)
        result.push({
          type: "removed",
          content: oldLine,
          oldLineNum: oldLineNum++,
        });
        result.push({
          type: "added",
          content: newLine,
          newLineNum: newLineNum++,
        });
        oldIdx++;
        newIdx++;
      }
    }
  }

  return result;
}

/**
 * Collapse unchanged lines in the middle of the diff.
 * Shows context lines around changes.
 */
function collapseUnchanged(
  lines: DiffLine[],
  contextLines: number = 3
): DiffLine[] {
  const result: DiffLine[] = [];
  const changeIndices: number[] = [];

  // Find all indices with changes
  lines.forEach((line, idx) => {
    if (line.type === "added" || line.type === "removed") {
      changeIndices.push(idx);
    }
  });

  if (changeIndices.length === 0) {
    // No changes, show a summary
    if (lines.length > 10) {
      return [{ type: "header", content: `(${lines.length} unchanged lines)` }];
    }
    return lines;
  }

  // Create a set of indices to show
  const showIndices = new Set<number>();
  changeIndices.forEach((idx) => {
    for (
      let i = Math.max(0, idx - contextLines);
      i <= Math.min(lines.length - 1, idx + contextLines);
      i++
    ) {
      showIndices.add(i);
    }
  });

  let lastShownIdx = -1;
  lines.forEach((line, idx) => {
    if (showIndices.has(idx)) {
      if (lastShownIdx !== -1 && idx - lastShownIdx > 1) {
        // Add collapse marker
        const skipped = idx - lastShownIdx - 1;
        result.push({
          type: "header",
          content: `... ${skipped} unchanged line${skipped > 1 ? "s" : ""} ...`,
        });
      }
      result.push(line);
      lastShownIdx = idx;
    }
  });

  return result;
}

/**
 * DiffView - Displays a diff between old and new content
 *
 * Shows added lines in green with + prefix
 * Shows removed lines in red with - prefix
 * Collapses long unchanged sections
 */
export default function DiffView({
  oldContent,
  newContent,
  maxHeight = "300px",
  filePath,
}: DiffViewProps) {
  const diffLines = useMemo(() => {
    const rawDiff = computeDiff(oldContent, newContent);
    return collapseUnchanged(rawDiff);
  }, [oldContent, newContent]);

  // Count changes for summary
  const stats = useMemo(() => {
    const added = diffLines.filter((l) => l.type === "added").length;
    const removed = diffLines.filter((l) => l.type === "removed").length;
    return { added, removed };
  }, [diffLines]);

  return (
    <div
      className={cn(
        "rounded-08 border overflow-hidden",
        "bg-[#fafafa] border-[#e5e5e5] dark:bg-[#151617] dark:border-[#2a2a2a]"
      )}
    >
      {/* Header with stats */}
      <div
        className={cn(
          "px-3 py-2 border-b text-xs flex items-center gap-3",
          "bg-[#f5f5f5] border-[#e5e5e5] dark:bg-[#1a1a1a] dark:border-[#2a2a2a]"
        )}
        style={{ fontFamily: "var(--font-dm-mono)" }}
      >
        {filePath && (
          <span className="text-text-03 truncate flex-1">{filePath}</span>
        )}
        <div className="flex items-center gap-2 shrink-0">
          {stats.added > 0 && (
            <span className="text-green-600 dark:text-green-400">
              +{stats.added}
            </span>
          )}
          {stats.removed > 0 && (
            <span className="text-red-600 dark:text-red-400">
              -{stats.removed}
            </span>
          )}
        </div>
      </div>

      {/* Diff content */}
      <div
        className="overflow-auto text-xs"
        style={{
          fontFamily: "var(--font-dm-mono)",
          maxHeight,
        }}
      >
        {diffLines.map((line, idx) => (
          <div
            key={idx}
            className={cn(
              "px-3 py-0.5 whitespace-pre-wrap break-words",
              line.type === "added" &&
                "bg-green-100 dark:bg-green-950/40 text-green-800 dark:text-green-300",
              line.type === "removed" &&
                "bg-red-100 dark:bg-red-950/40 text-red-800 dark:text-red-300",
              line.type === "unchanged" && "text-text-03",
              line.type === "header" &&
                "text-text-04 bg-[#f0f0f0] dark:bg-[#1d1d1d] text-center italic py-1"
            )}
          >
            {line.type === "added" && (
              <span className="select-none text-green-600 dark:text-green-500 mr-2">
                +
              </span>
            )}
            {line.type === "removed" && (
              <span className="select-none text-red-600 dark:text-red-500 mr-2">
                -
              </span>
            )}
            {line.type === "unchanged" && (
              <span className="select-none text-text-04 mr-2">&nbsp;</span>
            )}
            {line.content || (line.type !== "header" ? " " : "")}
          </div>
        ))}
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/FileBrowser.tsx
================================================
"use client";

import { useState, useCallback, useEffect } from "react";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import {
  SvgFolder,
  SvgFolderOpen,
  SvgFileSmall,
  SvgChevronRight,
  SvgChevronDown,
  SvgDownloadCloud,
  SvgEye,
  SvgHardDrive,
  SvgLoader,
} from "@opal/icons";
import {
  listDirectory,
  getArtifactUrl,
  FileSystemEntry,
} from "@/lib/build/client";
import FilePreviewModal from "@/app/craft/components/FilePreviewModal";

interface FileBrowserProps {
  sessionId: string;
}

interface DirectoryNodeProps {
  entry: FileSystemEntry;
  sessionId: string;
  depth: number;
  onPreview: (entry: FileSystemEntry) => void;
}

function DirectoryNode({
  entry,
  sessionId,
  depth,
  onPreview,
}: DirectoryNodeProps) {
  const [isOpen, setIsOpen] = useState(false);
  const [children, setChildren] = useState<FileSystemEntry[] | null>(null);
  const [isLoading, setIsLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);

  const loadChildren = useCallback(async () => {
    if (children !== null) return;

    setIsLoading(true);
    setError(null);
    try {
      const listing = await listDirectory(sessionId, entry.path);
      setChildren(listing.entries);
    } catch (err) {
      setError(err instanceof Error ? err.message : "Failed to load directory");
    } finally {
      setIsLoading(false);
    }
  }, [sessionId, entry.path, children]);

  const handleToggle = async (open: boolean) => {
    setIsOpen(open);
    if (open) {
      await loadChildren();
    }
  };

  const paddingLeft = depth * 1.25;

  return (
    <Collapsible open={isOpen} onOpenChange={handleToggle}>
      <CollapsibleTrigger asChild>
        <button
          className="w-full flex flex-row items-center gap-2 p-2 hover:bg-background-neutral-01 rounded-08 transition-colors"
          style={{ paddingLeft: `${paddingLeft}rem` }}
        >
          {isLoading ? (
            <SvgLoader className="size-4 stroke-text-03 animate-spin" />
          ) : isOpen ? (
            <SvgChevronDown className="size-4 stroke-text-03" />
          ) : (
            <SvgChevronRight className="size-4 stroke-text-03" />
          )}
          {isOpen ? (
            <SvgFolderOpen className="size-4 stroke-text-03" />
          ) : (
            <SvgFolder className="size-4 stroke-text-03" />
          )}
          <Text mainContentMono text04 className="truncate">
            {entry.name}
          </Text>
        </button>
      </CollapsibleTrigger>
      <CollapsibleContent>
        {error && (
          <div style={{ paddingLeft: `${paddingLeft + 1.25}rem` }}>
            <Text secondaryBody className="text-status-error-01">
              {error}
            </Text>
          </div>
        )}
        {children?.map((child) =>
          child.is_directory ? (
            <DirectoryNode
              key={child.path}
              entry={child}
              sessionId={sessionId}
              depth={depth + 1}
              onPreview={onPreview}
            />
          ) : (
            <FileNode
              key={child.path}
              entry={child}
              sessionId={sessionId}
              depth={depth + 1}
              onPreview={onPreview}
            />
          )
        )}
      </CollapsibleContent>
    </Collapsible>
  );
}

interface FileNodeProps {
  entry: FileSystemEntry;
  sessionId: string;
  depth: number;
  onPreview: (entry: FileSystemEntry) => void;
}

function FileNode({ entry, sessionId, depth, onPreview }: FileNodeProps) {
  const paddingLeft = depth * 1.25;
  const downloadUrl = getArtifactUrl(sessionId, entry.path);

  const canPreview =
    entry.mime_type?.startsWith("text/") ||
    entry.mime_type?.startsWith("image/") ||
    entry.mime_type === "application/json" ||
    entry.name.endsWith(".md") ||
    entry.name.endsWith(".txt") ||
    entry.name.endsWith(".json") ||
    entry.name.endsWith(".js") ||
    entry.name.endsWith(".ts") ||
    entry.name.endsWith(".tsx") ||
    entry.name.endsWith(".jsx") ||
    entry.name.endsWith(".css") ||
    entry.name.endsWith(".html") ||
    entry.name.endsWith(".py") ||
    entry.name.endsWith(".yaml") ||
    entry.name.endsWith(".yml");

  const formatSize = (bytes: number | null) => {
    if (bytes === null) return "";
    if (bytes < 1024) return `${bytes} B`;
    if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
    return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
  };

  return (
    <div
      className="w-full flex flex-row items-center gap-2 p-2 hover:bg-background-neutral-01 rounded-08 transition-colors group"
      style={{ paddingLeft: `${paddingLeft + 1.25}rem` }}
    >
      <SvgFileSmall className="size-4 stroke-text-03 shrink-0" />
      <Text mainContentMono text04 className="truncate flex-1">
        {entry.name}
      </Text>
      {entry.size !== null && (
        <Text secondaryBody text03 className="shrink-0">
          {formatSize(entry.size)}
        </Text>
      )}
      <div className="flex flex-row gap-1 opacity-0 group-hover:opacity-100 transition-opacity">
        {canPreview && (
          <Button
            variant="action"
            prominence="tertiary"
            icon={SvgEye}
            onClick={(e) => {
              e.stopPropagation();
              onPreview(entry);
            }}
          >
            Preview
          </Button>
        )}
        <a
          href={downloadUrl}
          download={entry.name}
          onClick={(e) => e.stopPropagation()}
        >
          <Button
            variant="action"
            prominence="tertiary"
            icon={SvgDownloadCloud}
          >
            Download
          </Button>
        </a>
      </div>
    </div>
  );
}

export default function FileBrowser({ sessionId }: FileBrowserProps) {
  const [rootEntries, setRootEntries] = useState<FileSystemEntry[] | null>(
    null
  );
  const [isLoading, setIsLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const [previewFile, setPreviewFile] = useState<FileSystemEntry | null>(null);
  const [isOpen, setIsOpen] = useState(true);

  const loadRoot = useCallback(async () => {
    if (rootEntries !== null) return;

    setIsLoading(true);
    setError(null);
    try {
      const listing = await listDirectory(sessionId);
      setRootEntries(listing.entries);
    } catch (err) {
      setError(
        err instanceof Error ? err.message : "Failed to load file system"
      );
    } finally {
      setIsLoading(false);
    }
  }, [sessionId, rootEntries]);

  const handleToggleRoot = async (open: boolean) => {
    setIsOpen(open);
    if (open) {
      await loadRoot();
    }
  };

  const handlePreview = (entry: FileSystemEntry) => {
    setPreviewFile(entry);
  };

  const handleClosePreview = () => {
    setPreviewFile(null);
  };

  // Auto-load on mount
  useEffect(() => {
    loadRoot();
  }, []);

  return (
    <>
      <div className="border border-border-01 rounded-08 overflow-hidden">
        <Collapsible open={isOpen} onOpenChange={handleToggleRoot}>
          <CollapsibleTrigger asChild>
            <button className="w-full flex flex-row items-center gap-2 p-2 bg-background-neutral-01 hover:bg-background-neutral-02 transition-colors">
              {isLoading ? (
                <SvgLoader className="size-4 stroke-text-03 animate-spin" />
              ) : isOpen ? (
                <SvgChevronDown className="size-4 stroke-text-03" />
              ) : (
                <SvgChevronRight className="size-4 stroke-text-03" />
              )}
              <SvgHardDrive className="size-4 stroke-text-03" />
              <Text mainUiAction text03>
                Workspace Files
              </Text>
            </button>
          </CollapsibleTrigger>
          <CollapsibleContent>
            <div className="p-1 max-h-[50vh] overflow-auto">
              {error && (
                <Text secondaryBody className="text-status-error-01 p-2">
                  {error}
                </Text>
              )}
              {rootEntries?.length === 0 && (
                <Text secondaryBody text03 className="p-2 text-center">
                  No files yet
                </Text>
              )}
              {rootEntries?.map((entry) =>
                entry.is_directory ? (
                  <DirectoryNode
                    key={entry.path}
                    entry={entry}
                    sessionId={sessionId}
                    depth={0}
                    onPreview={handlePreview}
                  />
                ) : (
                  <FileNode
                    key={entry.path}
                    entry={entry}
                    sessionId={sessionId}
                    depth={0}
                    onPreview={handlePreview}
                  />
                )
              )}
            </div>
          </CollapsibleContent>
        </Collapsible>
      </div>

      {previewFile && (
        <FilePreviewModal
          sessionId={sessionId}
          entry={previewFile}
          onClose={handleClosePreview}
        />
      )}
    </>
  );
}


================================================
FILE: web/src/app/craft/components/FilePreviewModal.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { SvgFileText, SvgDownloadCloud, SvgImage } from "@opal/icons";
import { getArtifactUrl, FileSystemEntry } from "@/lib/build/client";

interface FilePreviewModalProps {
  sessionId: string;
  entry: FileSystemEntry;
  onClose: () => void;
}

export default function FilePreviewModal({
  sessionId,
  entry,
  onClose,
}: FilePreviewModalProps) {
  const [content, setContent] = useState<string | null>(null);
  const [isLoading, setIsLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);

  const downloadUrl = getArtifactUrl(sessionId, entry.path);
  const isImage = entry.mime_type?.startsWith("image/");

  useEffect(() => {
    if (isImage) {
      setIsLoading(false);
      return;
    }

    const fetchContent = async () => {
      setIsLoading(true);
      setError(null);
      try {
        const response = await fetch(downloadUrl);
        if (!response.ok) {
          throw new Error(`Failed to fetch file: ${response.statusText}`);
        }
        const text = await response.text();
        setContent(text);
      } catch (err) {
        setError(err instanceof Error ? err.message : "Failed to load file");
      } finally {
        setIsLoading(false);
      }
    };

    fetchContent();
  }, [downloadUrl, isImage]);

  return (
    <Modal open onOpenChange={(open) => !open && onClose()}>
      <Modal.Content>
        <Modal.Header
          icon={isImage ? SvgImage : SvgFileText}
          title={entry.name}
          description={entry.path}
          onClose={onClose}
        />
        <Modal.Body>
          {isLoading ? (
            <div className="flex items-center justify-center p-8">
              <SimpleLoader />
            </div>
          ) : error ? (
            <Text secondaryBody className="text-status-error-01">
              {error}
            </Text>
          ) : isImage ? (
            <div className="flex items-center justify-center p-4">
              {/* eslint-disable-next-line @next/next/no-img-element */}
              <img
                src={downloadUrl}
                alt={entry.name}
                className="max-w-full max-h-[60vh] object-contain rounded-08"
              />
            </div>
          ) : (
            <div className="w-full overflow-auto max-h-[60vh] rounded-08 bg-background-neutral-02 border border-border-01">
              <pre className="p-4 text-sm font-mono whitespace-pre-wrap break-words text-text-04">
                {content}
              </pre>
            </div>
          )}
        </Modal.Body>
        <Modal.Footer>
          <a href={downloadUrl} download={entry.name}>
            <Button
              variant="action"
              prominence="secondary"
              icon={SvgDownloadCloud}
            >
              Download
            </Button>
          </a>
          <Button variant="action" onClick={onClose}>
            Close
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/app/craft/components/InputBar.tsx
================================================
"use client";

import {
  memo,
  forwardRef,
  useImperativeHandle,
  useCallback,
  useEffect,
  useRef,
  useState,
  type ChangeEvent,
  type ClipboardEvent,
  type KeyboardEvent,
} from "react";
import { useRouter } from "next/navigation";
import { getPastedFilesIfNoText } from "@/lib/clipboard";
import { cn, isImageFile } from "@/lib/utils";
import { Disabled } from "@opal/core";
import {
  useUploadFilesContext,
  BuildFile,
  UploadFileStatus,
} from "@/app/craft/contexts/UploadFilesContext";
import { useDemoDataEnabled } from "@/app/craft/hooks/useBuildSessionStore";
import { CRAFT_CONFIGURE_PATH } from "@/app/craft/v1/constants";
import IconButton from "@/refresh-components/buttons/IconButton";
import SelectButton from "@/refresh-components/buttons/SelectButton";
import { Button } from "@opal/components";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import {
  SvgArrowUp,
  SvgClock,
  SvgFileText,
  SvgImage,
  SvgLoader,
  SvgX,
  SvgPaperclip,
  SvgOrganization,
  SvgAlertCircle,
} from "@opal/icons";

const MAX_INPUT_HEIGHT = 200;

export interface InputBarHandle {
  reset: () => void;
  focus: () => void;
  setMessage: (message: string) => void;
}

export interface InputBarProps {
  onSubmit: (
    message: string,
    files: BuildFile[],
    demoDataEnabled: boolean
  ) => void;
  isRunning: boolean;
  disabled?: boolean;
  placeholder?: string;
  /** When true, shows spinner on send button with "Initializing sandbox..." tooltip */
  sandboxInitializing?: boolean;
  /** When true, removes bottom rounding to allow seamless connection with components below */
  noBottomRounding?: boolean;
  /** Whether this is the welcome page (no existing session in URL). Used for Demo Data pill. */
  isWelcomePage?: boolean;
}

/**
 * Simple file card for displaying attached files
 */
function BuildFileCard({
  file,
  onRemove,
}: {
  file: BuildFile;
  onRemove: (id: string) => void;
}) {
  const isImage = isImageFile(file.name);
  const isUploading = file.status === UploadFileStatus.UPLOADING;
  const isPending = file.status === UploadFileStatus.PENDING;
  const isFailed = file.status === UploadFileStatus.FAILED;

  const cardContent = (
    <div
      className={cn(
        "flex items-center gap-1.5 px-2 py-1 rounded-08",
        "bg-background-neutral-01 border",
        "text-sm text-text-04",
        isFailed ? "border-status-error-02" : "border-border-01"
      )}
    >
      {isUploading ? (
        <SvgLoader className="h-4 w-4 animate-spin text-text-03" />
      ) : isPending ? (
        <SvgClock className="h-4 w-4 text-text-03" />
      ) : isFailed ? (
        <SvgAlertCircle className="h-4 w-4 text-status-error-02" />
      ) : isImage ? (
        <SvgImage className="h-4 w-4 text-text-03" />
      ) : (
        <SvgFileText className="h-4 w-4 text-text-03" />
      )}
      <span
        className={cn(
          "max-w-[120px] truncate",
          isFailed && "text-status-error-02"
        )}
      >
        {file.name}
      </span>
      <button
        onClick={() => onRemove(file.id)}
        className="ml-1 p-0.5 hover:bg-background-neutral-02 rounded"
      >
        <SvgX className="h-3 w-3 text-text-03" />
      </button>
    </div>
  );

  // Wrap in tooltip for error or pending status
  if (isFailed && file.error) {
    return (
      <SimpleTooltip tooltip={file.error} side="top">
        {cardContent}
      </SimpleTooltip>
    );
  }

  if (isPending) {
    return (
      <SimpleTooltip tooltip="Waiting for session to be ready..." side="top">
        {cardContent}
      </SimpleTooltip>
    );
  }

  return cardContent;
}

/**
 * InputBar - Text input with file attachment support
 *
 * File upload state is managed by UploadFilesContext. This component just:
 * - Triggers file selection/paste
 * - Displays attached files
 * - Handles message submission
 *
 * The context handles:
 * - Session binding (which session to upload to)
 * - Auto-upload when session becomes available
 * - Fetching existing attachments on session change
 */
const InputBar = memo(
  forwardRef<InputBarHandle, InputBarProps>(
    (
      {
        onSubmit,
        isRunning,
        disabled = false,
        placeholder = "Describe your task...",
        sandboxInitializing = false,
        noBottomRounding = false,
        isWelcomePage = false,
      },
      ref
    ) => {
      const router = useRouter();
      const demoDataEnabled = useDemoDataEnabled();
      const [message, setMessage] = useState("");

      const textAreaRef = useRef<HTMLTextAreaElement>(null);
      const containerRef = useRef<HTMLDivElement>(null);
      const fileInputRef = useRef<HTMLInputElement>(null);

      const {
        currentMessageFiles,
        uploadFiles,
        removeFile,
        clearFiles,
        hasUploadingFiles,
      } = useUploadFilesContext();

      // Expose reset, focus, and setMessage methods to parent via ref
      useImperativeHandle(ref, () => ({
        reset: () => {
          setMessage("");
          clearFiles();
        },
        focus: () => {
          textAreaRef.current?.focus();
        },
        setMessage: (msg: string) => {
          setMessage(msg);
          // Move cursor to end after setting message
          setTimeout(() => {
            const textarea = textAreaRef.current;
            if (textarea) {
              textarea.focus();
              textarea.setSelectionRange(msg.length, msg.length);
            }
          }, 0);
        },
      }));

      // Auto-resize textarea based on content
      useEffect(() => {
        const textarea = textAreaRef.current;
        if (textarea) {
          textarea.style.height = "0px";
          textarea.style.height = `${Math.min(
            textarea.scrollHeight,
            MAX_INPUT_HEIGHT
          )}px`;
        }
      }, [message]);

      // Auto-focus on mount
      useEffect(() => {
        textAreaRef.current?.focus();
      }, []);

      const handleFileSelect = useCallback(
        async (e: ChangeEvent<HTMLInputElement>) => {
          const files = e.target.files;
          if (!files || files.length === 0) return;
          // Context handles session binding internally
          uploadFiles(Array.from(files));
          e.target.value = "";
        },
        [uploadFiles]
      );

      const handlePaste = useCallback(
        (event: ClipboardEvent) => {
          const pastedFiles = getPastedFilesIfNoText(event.clipboardData);
          if (pastedFiles.length > 0) {
            event.preventDefault();
            // Context handles session binding internally
            uploadFiles(pastedFiles);
          }
        },
        [uploadFiles]
      );

      const handleInputChange = useCallback(
        (event: ChangeEvent<HTMLTextAreaElement>) => {
          setMessage(event.target.value);
        },
        []
      );

      const handleSubmit = useCallback(() => {
        if (disabled || isRunning || hasUploadingFiles || sandboxInitializing)
          return;

        const hasMessage = message.trim().length > 0;
        const hasFiles = currentMessageFiles.length > 0;

        if (hasMessage) {
          onSubmit(message.trim(), currentMessageFiles, demoDataEnabled);
          setMessage("");
          clearFiles({ suppressRefetch: true });
        } else if (hasFiles) {
          // User hit Enter with only files attached: remove files from input bar
          // (File stays in session; no way to delete from session for now)
          clearFiles({ suppressRefetch: true });
        }
      }, [
        message,
        disabled,
        isRunning,
        hasUploadingFiles,
        sandboxInitializing,
        onSubmit,
        currentMessageFiles,
        clearFiles,
        demoDataEnabled,
      ]);

      const handleKeyDown = useCallback(
        (event: KeyboardEvent<HTMLTextAreaElement>) => {
          if (
            event.key === "Enter" &&
            !event.shiftKey &&
            !(event.nativeEvent as any).isComposing
          ) {
            event.preventDefault();
            handleSubmit();
          }
        },
        [handleSubmit]
      );

      const canSubmit =
        message.trim().length > 0 &&
        !disabled &&
        !isRunning &&
        !hasUploadingFiles &&
        !sandboxInitializing;

      return (
        <Disabled disabled={disabled}>
          <div
            ref={containerRef}
            className={cn(
              "w-full flex flex-col shadow-01 bg-background-neutral-00",
              noBottomRounding ? "rounded-t-16 rounded-b-none" : "rounded-16"
            )}
          >
            {/* Hidden file input */}
            <input
              ref={fileInputRef}
              type="file"
              className="hidden"
              multiple
              onChange={handleFileSelect}
              accept="*/*"
            />

            {/* Attached Files */}
            {currentMessageFiles.length > 0 && (
              <div className="p-2 rounded-t-16 flex flex-wrap gap-1">
                {currentMessageFiles.map((file) => (
                  <BuildFileCard
                    key={file.id}
                    file={file}
                    onRemove={removeFile}
                  />
                ))}
              </div>
            )}

            {/* Input area */}
            <textarea
              onPaste={handlePaste}
              onChange={handleInputChange}
              onKeyDown={handleKeyDown}
              ref={textAreaRef}
              className={cn(
                "w-full",
                "h-[44px]",
                "outline-none",
                "bg-transparent",
                "resize-none",
                "placeholder:text-text-03",
                "whitespace-pre-wrap",
                "break-word",
                "overscroll-contain",
                "overflow-y-auto",
                "px-3",
                "pb-2",
                "pt-3"
              )}
              autoFocus
              style={{ scrollbarWidth: "thin" }}
              role="textarea"
              aria-multiline
              placeholder={placeholder}
              value={message}
              disabled={disabled}
            />

            {/* Bottom controls */}
            <div className="flex justify-between items-center w-full p-1 min-h-[40px]">
              {/* Bottom left controls */}
              <div className="flex flex-row items-center gap-1">
                {/* (+) button for file upload */}
                <Button
                  disabled={disabled}
                  icon={SvgPaperclip}
                  tooltip="Attach Files"
                  prominence="tertiary"
                  onClick={() => fileInputRef.current?.click()}
                />
                {/* Demo Data indicator pill - only show on welcome page (no session) when demo data is enabled */}
                {demoDataEnabled && isWelcomePage && (
                  <SimpleTooltip
                    tooltip="Switch to your data in the Configure panel!"
                    side="top"
                  >
                    <span>
                      <SelectButton
                        disabled={disabled}
                        leftIcon={SvgOrganization}
                        engaged={demoDataEnabled}
                        action
                        folded
                        onClick={() => router.push(CRAFT_CONFIGURE_PATH)}
                        className="bg-action-link-01"
                      >
                        Demo Data Active
                      </SelectButton>
                    </span>
                  </SimpleTooltip>
                )}
              </div>

              {/* Bottom right controls */}
              <div className="flex flex-row items-center gap-1">
                {/* Submit button */}
                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
                <IconButton
                  icon={sandboxInitializing ? SvgLoader : SvgArrowUp}
                  onClick={handleSubmit}
                  disabled={!canSubmit}
                  tooltip={
                    sandboxInitializing ? "Initializing sandbox..." : "Send"
                  }
                  iconClassName={
                    sandboxInitializing ? "animate-spin" : undefined
                  }
                />
              </div>
            </div>
          </div>
        </Disabled>
      );
    }
  )
);

InputBar.displayName = "InputBar";

export default InputBar;


================================================
FILE: web/src/app/craft/components/IntroBackground.tsx
================================================
// Floating dust particles background with mouse interaction
import { useEffect, useRef, useState, useCallback } from "react";

interface Particle {
  x: number;
  y: number;
  vx: number;
  vy: number;
  size: number;
  opacity: number;
  baseOpacity: number;
  mass: number;
  id: number;
  glowMultiplier?: number;
  glowVelocity?: number;
}

interface BuildModeIntroBackgroundProps {
  particleCount?: number;
  particleSize?: number;
  particleOpacity?: number;
  glowIntensity?: number;
  movementSpeed?: number;
  mouseInfluence?: number;
  backgroundColor?: string;
  particleColor?: string;
  mouseGravity?: "none" | "attract" | "repel";
  gravityStrength?: number;
  glowAnimation?: "instant" | "ease" | "spring";
  particleInteraction?: boolean;
  interactionType?: "bounce" | "merge";
}

/**
 * @framerSupportedLayoutWidth any
 * @framerSupportedLayoutHeight any
 */
export default function BuildModeIntroBackground(
  props: BuildModeIntroBackgroundProps
) {
  const {
    particleCount = 400,
    particleSize = 2,
    particleOpacity = 1,
    glowIntensity = 20,
    movementSpeed = 0.75,
    mouseInfluence = 100,
    backgroundColor = "#000000",
    particleColor = "#FFFFFF",
    mouseGravity = "attract",
    gravityStrength = 50,
    glowAnimation = "ease",
    particleInteraction = true,
    interactionType = "bounce",
  } = props;

  const canvasRef = useRef<HTMLCanvasElement>(null);
  const animationRef = useRef<number | undefined>(undefined);
  const mouseRef = useRef({ x: 0, y: 0 });
  const particlesRef = useRef<Particle[]>([]);
  const [canvasSize, setCanvasSize] = useState({ width: 800, height: 600 });
  const containerRef = useRef<HTMLDivElement>(null);

  const initializeParticles = useCallback(
    (width: number, height: number) => {
      return Array.from({ length: particleCount }, (_, index) => ({
        x: Math.random() * width,
        y: Math.random() * height,
        vx: (Math.random() - 0.5) * movementSpeed,
        vy: (Math.random() - 0.5) * movementSpeed,
        size: Math.random() * particleSize + 1,
        opacity: particleOpacity,
        baseOpacity: particleOpacity,
        mass: Math.random() * 0.5 + 0.5,
        id: index,
      }));
    },
    [particleCount, particleSize, particleOpacity, movementSpeed]
  );

  const redistributeParticles = useCallback((width: number, height: number) => {
    particlesRef.current.forEach((particle) => {
      // Redistribute particles proportionally across the new dimensions
      particle.x = Math.random() * width;
      particle.y = Math.random() * height;
    });
  }, []);

  const updateParticles = useCallback(
    (canvas: HTMLCanvasElement) => {
      const rect = canvas.getBoundingClientRect();
      const mouse = mouseRef.current;

      particlesRef.current.forEach((particle, index) => {
        // Calculate distance to mouse
        const dx = mouse.x - particle.x;
        const dy = mouse.y - particle.y;
        const distance = Math.sqrt(dx * dx + dy * dy);

        // Mouse influence and gravity
        if (distance < mouseInfluence && distance > 0) {
          const force = (mouseInfluence - distance) / mouseInfluence;
          const normalizedDx = dx / distance;
          const normalizedDy = dy / distance;
          const gravityForce = force * (gravityStrength * 0.001);

          // Apply gravity effect based on mouseGravity setting
          if (mouseGravity === "attract") {
            particle.vx += normalizedDx * gravityForce;
            particle.vy += normalizedDy * gravityForce;
          } else if (mouseGravity === "repel") {
            particle.vx -= normalizedDx * gravityForce;
            particle.vy -= normalizedDy * gravityForce;
          }

          particle.opacity = Math.min(1, particle.baseOpacity + force * 0.4);

          // Apply glow animation based on type
          const targetGlow = 1 + force * 2;
          const currentGlow = particle.glowMultiplier || 1;

          if (glowAnimation === "instant") {
            particle.glowMultiplier = targetGlow;
          } else if (glowAnimation === "ease") {
            // Ease in-out animation
            const easeSpeed = 0.15;
            particle.glowMultiplier =
              currentGlow + (targetGlow - currentGlow) * easeSpeed;
          } else if (glowAnimation === "spring") {
            // Spring animation with overshoot
            const springForce = (targetGlow - currentGlow) * 0.2;
            const damping = 0.85;
            particle.glowVelocity =
              (particle.glowVelocity || 0) * damping + springForce;
            particle.glowMultiplier = currentGlow + particle.glowVelocity;
          }
        } else {
          particle.opacity = Math.max(
            particle.baseOpacity * 0.3,
            particle.opacity - 0.02
          );

          // Return glow to normal based on animation type
          const targetGlow = 1;
          const currentGlow = particle.glowMultiplier || 1;

          if (glowAnimation === "instant") {
            particle.glowMultiplier = targetGlow;
          } else if (glowAnimation === "ease") {
            const easeSpeed = 0.08;
            particle.glowMultiplier = Math.max(
              1,
              currentGlow + (targetGlow - currentGlow) * easeSpeed
            );
          } else if (glowAnimation === "spring") {
            const springForce = (targetGlow - currentGlow) * 0.15;
            const damping = 0.9;
            particle.glowVelocity =
              (particle.glowVelocity || 0) * damping + springForce;
            particle.glowMultiplier = Math.max(
              1,
              currentGlow + particle.glowVelocity
            );
          }
        }

        // Particle interaction
        if (particleInteraction) {
          for (let j = index + 1; j < particlesRef.current.length; j++) {
            const other = particlesRef.current[j];
            if (!other) continue;
            const dx = other.x - particle.x;
            const dy = other.y - particle.y;
            const distance = Math.sqrt(dx * dx + dy * dy);
            const minDistance = particle.size + other.size + 5;

            if (distance < minDistance && distance > 0) {
              if (interactionType === "bounce") {
                // Elastic collision
                const normalX = dx / distance;
                const normalY = dy / distance;

                // Relative velocity
                const relativeVx = particle.vx - other.vx;
                const relativeVy = particle.vy - other.vy;

                // Relative velocity in collision normal direction
                const speed = relativeVx * normalX + relativeVy * normalY;

                // Only resolve if velocities are separating
                if (speed < 0) return;

                // Collision impulse
                const impulse = (2 * speed) / (particle.mass + other.mass);

                // Update velocities
                particle.vx -= impulse * other.mass * normalX;
                particle.vy -= impulse * other.mass * normalY;
                other.vx += impulse * particle.mass * normalX;
                other.vy += impulse * particle.mass * normalY;

                // Separate particles to prevent overlap
                const overlap = minDistance - distance;
                const separationX = normalX * overlap * 0.5;
                const separationY = normalY * overlap * 0.5;

                particle.x -= separationX;
                particle.y -= separationY;
                other.x += separationX;
                other.y += separationY;
              } else if (interactionType === "merge") {
                // Temporary merge effect - increase glow and size
                const mergeForce = (minDistance - distance) / minDistance;
                particle.glowMultiplier =
                  (particle.glowMultiplier || 1) + mergeForce * 0.5;
                other.glowMultiplier =
                  (other.glowMultiplier || 1) + mergeForce * 0.5;

                // Attract particles slightly
                const attractForce = mergeForce * 0.01;
                particle.vx += dx * attractForce;
                particle.vy += dy * attractForce;
                other.vx -= dx * attractForce;
                other.vy -= dy * attractForce;
              }
            }
          }
        }

        // Update position
        particle.x += particle.vx;
        particle.y += particle.vy;

        // Add subtle random movement
        particle.vx += (Math.random() - 0.5) * 0.001;
        particle.vy += (Math.random() - 0.5) * 0.001;

        // Damping
        particle.vx *= 0.999;
        particle.vy *= 0.999;

        // Boundary wrapping
        if (particle.x < 0) particle.x = rect.width;
        if (particle.x > rect.width) particle.x = 0;
        if (particle.y < 0) particle.y = rect.height;
        if (particle.y > rect.height) particle.y = 0;
      });
    },
    [
      mouseInfluence,
      mouseGravity,
      gravityStrength,
      glowAnimation,
      particleInteraction,
      interactionType,
    ]
  );

  const drawParticles = useCallback(
    (ctx: CanvasRenderingContext2D) => {
      ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height);

      particlesRef.current.forEach((particle) => {
        ctx.save();

        // Create glow effect with enhanced blur based on interaction
        const currentGlowMultiplier = particle.glowMultiplier || 1;
        ctx.shadowColor = particleColor;
        ctx.shadowBlur = glowIntensity * currentGlowMultiplier * 2;
        ctx.globalAlpha = particle.opacity;

        ctx.fillStyle = particleColor;
        ctx.beginPath();
        ctx.arc(particle.x, particle.y, particle.size, 0, Math.PI * 2);
        ctx.fill();

        ctx.restore();
      });
    },
    [particleColor, glowIntensity]
  );

  const animate = useCallback(() => {
    const canvas = canvasRef.current;
    if (!canvas) return;

    const ctx = canvas.getContext("2d");
    if (!ctx) return;

    updateParticles(canvas);
    drawParticles(ctx);

    animationRef.current = requestAnimationFrame(animate);
  }, [updateParticles, drawParticles]);

  const handleMouseMove = useCallback((e: MouseEvent) => {
    const canvas = canvasRef.current;
    if (!canvas) return;

    const rect = canvas.getBoundingClientRect();
    mouseRef.current = {
      x: e.clientX - rect.left,
      y: e.clientY - rect.top,
    };
  }, []);

  const resizeCanvas = useCallback(() => {
    const canvas = canvasRef.current;
    const container = containerRef.current;
    if (!canvas || !container) return;

    const rect = container.getBoundingClientRect();
    const newWidth = rect.width;
    const newHeight = rect.height;

    canvas.width = newWidth;
    canvas.height = newHeight;

    // Update canvas size state and redistribute particles
    setCanvasSize({ width: newWidth, height: newHeight });

    // Only redistribute if particles exist and size changed significantly
    if (particlesRef.current.length > 0) {
      redistributeParticles(newWidth, newHeight);
    }
  }, [redistributeParticles]);

  // Effect to reinitialize particles when particle count changes
  useEffect(() => {
    const canvas = canvasRef.current;
    if (!canvas) return;

    particlesRef.current = initializeParticles(
      canvas.width || canvasSize.width,
      canvas.height || canvasSize.height
    );
  }, [particleCount, initializeParticles, canvasSize]);

  // Effect to update particle properties when they change
  useEffect(() => {
    particlesRef.current.forEach((particle) => {
      particle.baseOpacity = particleOpacity;
      particle.opacity = particleOpacity;
      // Update velocity based on new movement speed
      const currentSpeed = Math.sqrt(
        particle.vx * particle.vx + particle.vy * particle.vy
      );
      if (currentSpeed > 0) {
        const ratio = movementSpeed / currentSpeed;
        particle.vx *= ratio;
        particle.vy *= ratio;
      }
    });
  }, [particleOpacity, movementSpeed]);

  useEffect(() => {
    resizeCanvas();

    if (typeof window !== "undefined") {
      window.addEventListener("mousemove", handleMouseMove);
      window.addEventListener("resize", resizeCanvas);
    }

    // Set up ResizeObserver for container
    if (containerRef.current && typeof ResizeObserver !== "undefined") {
      const resizeObserver = new ResizeObserver(() => {
        resizeCanvas();
      });
      resizeObserver.observe(containerRef.current);

      return () => {
        resizeObserver.disconnect();
        if (typeof window !== "undefined") {
          window.removeEventListener("mousemove", handleMouseMove);
          window.removeEventListener("resize", resizeCanvas);
        }
      };
    }

    return () => {
      if (typeof window !== "undefined") {
        window.removeEventListener("mousemove", handleMouseMove);
        window.removeEventListener("resize", resizeCanvas);
      }
    };
  }, [handleMouseMove, resizeCanvas]);

  useEffect(() => {
    animate();

    return () => {
      if (animationRef.current) {
        cancelAnimationFrame(animationRef.current);
      }
    };
  }, [animate]);

  return (
    <div
      ref={containerRef}
      style={{
        width: "100%",
        height: "100%",
        backgroundColor,
        position: "relative",
        overflow: "hidden",
      }}
    >
      <canvas
        ref={canvasRef}
        style={{
          width: "100%",
          height: "100%",
          display: "block",
        }}
      />
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/IntroContent.tsx
================================================
"use client";

import { useEffect } from "react";
import { motion } from "motion/react";
import { track, AnalyticsEvent } from "@/lib/analytics";
import { OnyxLogoTypeIcon } from "@/components/icons/icons";
import Text from "@/refresh-components/texts/Text";
import BigButton from "@/app/craft/components/BigButton";

interface BuildModeIntroContentProps {
  onClose: () => void;
  onTryBuildMode: () => void;
}

export default function BuildModeIntroContent({
  onClose,
  onTryBuildMode,
}: BuildModeIntroContentProps) {
  // Track when user sees the craft intro
  useEffect(() => {
    track(AnalyticsEvent.SAW_CRAFT_INTRO);
  }, []);

  return (
    <div className="absolute inset-0 flex flex-col items-center justify-center pointer-events-none">
      <div className="flex flex-col items-center gap-7 w-full">
        <motion.div
          initial={{ opacity: 0, y: 20 }}
          animate={{ opacity: 1, y: 0 }}
          transition={{ duration: 0.8, delay: 0.5 }}
          className="w-full relative"
        >
          <div className="grid grid-cols-[1fr_auto_1fr] items-end">
            <div className="flex justify-end">
              <OnyxLogoTypeIcon size={385} className="text-white" />
            </div>
            <div className="w-8"></div>
            <div className="flex justify-start">
              <div
                className="relative inline-flex overflow-visible"
                style={{ transform: "translateX(-0.6em)" }}
              >
                <span className="relative inline-block leading-[3.5]">
                  <Text
                    headingH1
                    className="!text-9xl !text-white relative inline-block"
                    style={{
                      fontFamily: "var(--font-kh-teka)",
                      fontWeight: 500,
                    }}
                  >
                    Craft
                  </Text>
                </span>
                <span
                  className="pointer-events-none absolute top-3 -right-14 text-[1em] uppercase tracking-[0.2em] !text-white"
                  style={{ fontFamily: "var(--font-kh-teka)", fontWeight: 500 }}
                >
                  BETA
                </span>
              </div>
            </div>
          </div>
        </motion.div>
        <motion.div
          className="flex gap-5 pointer-events-auto justify-center"
          initial={{ opacity: 0, y: 20 }}
          animate={{ opacity: 1, y: 0 }}
          transition={{ duration: 0.8, delay: 1.3 }}
        >
          <BigButton
            secondary
            className="!border-white !text-white hover:!bg-white/10 active:!bg-white/20 !w-[160px]"
            onClick={(e) => {
              e.stopPropagation();
              track(AnalyticsEvent.CLICKED_GO_HOME);
              onClose();
            }}
          >
            Return Home
          </BigButton>
          <BigButton
            primary
            className="!bg-white !text-black hover:!bg-gray-200 active:!bg-gray-300 !w-[160px]"
            onClick={(e) => {
              e.stopPropagation();
              track(AnalyticsEvent.CLICKED_TRY_CRAFT);
              onTryBuildMode();
            }}
          >
            Start Crafting
          </BigButton>
        </motion.div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/OutputPanel.tsx
================================================
"use client";

import { memo, useState, useEffect, useCallback } from "react";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import {
  useSession,
  useWebappNeedsRefresh,
  useBuildSessionStore,
  useFilePreviewTabs,
  useActiveOutputTab,
  useActiveFilePreviewPath,
  usePreProvisionedSessionId,
  useIsPreProvisioning,
  useTabHistory,
  OutputTabType,
} from "@/app/craft/hooks/useBuildSessionStore";
import {
  fetchWebappInfo,
  fetchArtifacts,
  exportDocx,
} from "@/app/craft/services/apiServices";
import { cn, getFileIcon } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import {
  SvgGlobe,
  SvgHardDrive,
  SvgFiles,
  SvgX,
  SvgMinus,
  SvgMaximize2,
} from "@opal/icons";
import { IconProps } from "@opal/types";
import CraftingLoader from "@/app/craft/components/CraftingLoader";

// Output panel sub-components
import UrlBar from "@/app/craft/components/output-panel/UrlBar";
import PreviewTab from "@/app/craft/components/output-panel/PreviewTab";
import { FilePreviewContent } from "@/app/craft/components/output-panel/FilePreviewContent";
import FilesTab from "@/app/craft/components/output-panel/FilesTab";
import ArtifactsTab from "@/app/craft/components/output-panel/ArtifactsTab";

type TabValue = OutputTabType;

const tabs: { value: TabValue; label: string; icon: React.FC<IconProps> }[] = [
  { value: "preview", label: "Preview", icon: SvgGlobe },
  { value: "files", label: "Files", icon: SvgHardDrive },
  { value: "artifacts", label: "Artifacts", icon: SvgFiles },
];

interface BuildOutputPanelProps {
  onClose: () => void;
  isOpen: boolean;
}

/**
 * BuildOutputPanel - Right panel showing preview, files, and artifacts
 *
 * Features:
 * - Tabbed interface (Preview, Files, Artifacts)
 * - Live preview iframe for webapp artifacts
 * - File browser for exploring sandbox filesystem
 * - Artifact list with download/view options
 */
const BuildOutputPanel = memo(({ onClose, isOpen }: BuildOutputPanelProps) => {
  const session = useSession();
  const preProvisionedSessionId = usePreProvisionedSessionId();
  const isPreProvisioning = useIsPreProvisioning();

  // Get active tab state from store
  const activeOutputTab = useActiveOutputTab();
  const activeFilePreviewPath = useActiveFilePreviewPath();
  const filePreviewTabs = useFilePreviewTabs();

  // Store actions
  const setActiveOutputTab = useBuildSessionStore(
    (state) => state.setActiveOutputTab
  );
  const setNoSessionActiveOutputTab = useBuildSessionStore(
    (state) => state.setNoSessionActiveOutputTab
  );
  const openFilePreview = useBuildSessionStore(
    (state) => state.openFilePreview
  );
  const closeFilePreview = useBuildSessionStore(
    (state) => state.closeFilePreview
  );
  const setActiveFilePreviewPath = useBuildSessionStore(
    (state) => state.setActiveFilePreviewPath
  );

  // Store actions for refresh
  const triggerFilesRefresh = useBuildSessionStore(
    (state) => state.triggerFilesRefresh
  );

  // Counters to force-reload previews
  const [previewRefreshKey, setPreviewRefreshKey] = useState(0);
  const [filePreviewRefreshKey, setFilePreviewRefreshKey] = useState(0);

  // Determine which tab is visually active
  const isFilePreviewActive = activeFilePreviewPath !== null;
  const activeTab = isFilePreviewActive ? null : activeOutputTab;

  const handlePinnedTabClick = (tab: TabValue) => {
    if (session?.id) {
      setActiveOutputTab(session.id, tab);
    } else {
      // No session - use temporary state for tab switching
      setNoSessionActiveOutputTab(tab);
    }
  };

  const handlePreviewTabClick = (path: string) => {
    if (session?.id) {
      setActiveFilePreviewPath(session.id, path);
    }
  };

  const handlePreviewTabClose = (e: React.MouseEvent, path: string) => {
    e.stopPropagation(); // Don't trigger tab click
    if (session?.id) {
      closeFilePreview(session.id, path);
    }
  };

  const handleFileClick = (path: string, fileName: string) => {
    if (session?.id) {
      openFilePreview(session.id, path, fileName);
    }
  };

  const handleMaximize = () => {
    setIsMaximized((prev) => !prev);
  };

  // Track when panel animation completes (defer fetch until fully open)
  const [isFullyOpen, setIsFullyOpen] = useState(false);
  // Track when content should unmount (delayed on close for animation)
  const [shouldRenderContent, setShouldRenderContent] = useState(false);
  // Track if panel is maximized
  const [isMaximized, setIsMaximized] = useState(false);

  useEffect(() => {
    if (isOpen) {
      // Render content immediately on open
      setShouldRenderContent(true);
      // Wait for 300ms CSS transition to complete before fetching
      const timer = setTimeout(() => setIsFullyOpen(true), 300);
      return () => clearTimeout(timer);
    } else {
      // Stop fetching immediately
      setIsFullyOpen(false);
      // Delay unmount until close animation completes
      const timer = setTimeout(() => setShouldRenderContent(false), 300);
      return () => clearTimeout(timer);
    }
  }, [isOpen]);

  // Session-scoped URL caching
  const [cachedWebappUrl, setCachedWebappUrl] = useState<string | null>(null);
  const [cachedForSessionId, setCachedForSessionId] = useState<string | null>(
    null
  );

  // Clear cache when session changes
  useEffect(() => {
    if (session?.id !== cachedForSessionId) {
      setCachedWebappUrl(null);
      setCachedForSessionId(session?.id ?? null);
    }
  }, [session?.id, cachedForSessionId]);

  // Webapp refresh trigger from streaming / restore
  const webappNeedsRefresh = useWebappNeedsRefresh();

  // Track polling window: poll for up to 30s after a restore/refresh trigger
  const [pollingDeadline, setPollingDeadline] = useState<number | null>(null);
  const [isWebappReady, setIsWebappReady] = useState(false);

  // When webappNeedsRefresh bumps (restore or file edit), start a 30s polling window
  // and reset readiness so we poll until the server is back up
  useEffect(() => {
    if (webappNeedsRefresh > 0) {
      setPollingDeadline(Date.now() + 30_000);
      setIsWebappReady(false);

      // Force a re-render after 30s to stop polling even if server never responded
      const timer = setTimeout(() => setPollingDeadline(null), 30_000);
      return () => clearTimeout(timer);
    }
  }, [webappNeedsRefresh]);

  // Fetch webapp info from dedicated endpoint
  // Only fetch for real sessions when panel is fully open
  const shouldFetchWebapp =
    isFullyOpen &&
    session?.id &&
    !session.id.startsWith("temp-") &&
    session.status !== "creating";

  // Poll every 2s while NextJS is starting up (capped at 30s), then stop
  const shouldPoll =
    !isWebappReady && pollingDeadline !== null && Date.now() < pollingDeadline;

  const { data: webappInfo, mutate } = useSWR(
    shouldFetchWebapp ? SWR_KEYS.buildSessionWebappInfo(session.id) : null,
    () => (session?.id ? fetchWebappInfo(session.id) : null),
    {
      refreshInterval: shouldPoll ? 2000 : 0,
      revalidateOnFocus: true,
      keepPreviousData: true,
    }
  );

  // Update readiness from SWR response and clear polling deadline
  useEffect(() => {
    if (webappInfo?.ready) {
      setIsWebappReady(true);
      setPollingDeadline(null);
    }
  }, [webappInfo?.ready]);

  // Update cache when SWR returns data for current session
  useEffect(() => {
    if (webappInfo?.webapp_url && session?.id === cachedForSessionId) {
      setCachedWebappUrl(webappInfo.webapp_url);
    }
  }, [webappInfo?.webapp_url, session?.id, cachedForSessionId]);

  // Refresh when web/ file changes or after restore
  // webappNeedsRefresh is a counter that increments on each edit/restore,
  // ensuring each triggers a new refresh even if the panel is already open
  useEffect(() => {
    if (webappNeedsRefresh > 0 && isFullyOpen && session?.id) {
      mutate();
    }
  }, [webappNeedsRefresh, isFullyOpen, mutate, session?.id]);

  const webappUrl = webappInfo?.webapp_url ?? null;

  // Use cache only if it belongs to current session
  const validCachedUrl =
    cachedForSessionId === session?.id ? cachedWebappUrl : null;
  const displayUrl = webappUrl ?? validCachedUrl;

  // Tab navigation history
  const tabHistory = useTabHistory();
  const navigateTabBack = useBuildSessionStore(
    (state) => state.navigateTabBack
  );
  const navigateTabForward = useBuildSessionStore(
    (state) => state.navigateTabForward
  );

  const canGoBack = tabHistory.currentIndex > 0;
  const canGoForward = tabHistory.currentIndex < tabHistory.entries.length - 1;

  const handleBack = useCallback(() => {
    if (session?.id) {
      navigateTabBack(session.id);
    }
  }, [session?.id, navigateTabBack]);

  const handleForward = useCallback(() => {
    if (session?.id) {
      navigateTabForward(session.id);
    }
  }, [session?.id, navigateTabForward]);

  // Determine if the active file preview is a markdown or pptx file (for download buttons)
  const isMarkdownPreview =
    isFilePreviewActive &&
    activeFilePreviewPath &&
    /\.md$/i.test(activeFilePreviewPath);

  const isPptxPreview =
    isFilePreviewActive &&
    activeFilePreviewPath &&
    /\.pptx$/i.test(activeFilePreviewPath);

  const isPdfPreview =
    isFilePreviewActive &&
    activeFilePreviewPath &&
    /\.pdf$/i.test(activeFilePreviewPath);

  const [isExportingDocx, setIsExportingDocx] = useState(false);

  const handleDocxDownload = useCallback(async () => {
    if (!session?.id || !activeFilePreviewPath) return;
    setIsExportingDocx(true);
    try {
      const blob = await exportDocx(session.id, activeFilePreviewPath);
      const fileName =
        activeFilePreviewPath.split("/").pop() || activeFilePreviewPath;
      const url = URL.createObjectURL(blob);
      const link = document.createElement("a");
      link.href = url;
      link.download = fileName.replace(/\.md$/i, ".docx");
      document.body.appendChild(link);
      link.click();
      document.body.removeChild(link);
      URL.revokeObjectURL(url);
    } catch (err) {
      console.error("Failed to export as DOCX:", err);
    } finally {
      setIsExportingDocx(false);
    }
  }, [session?.id, activeFilePreviewPath]);

  const handleRawFileDownload = useCallback(() => {
    if (!session?.id || !activeFilePreviewPath) return;
    const encodedPath = activeFilePreviewPath
      .split("/")
      .map((s) => encodeURIComponent(s))
      .join("/");
    const link = document.createElement("a");
    link.href = `/api/build/sessions/${session.id}/artifacts/${encodedPath}`;
    link.download =
      activeFilePreviewPath.split("/").pop() || activeFilePreviewPath;
    document.body.appendChild(link);
    link.click();
    document.body.removeChild(link);
  }, [session?.id, activeFilePreviewPath]);

  // Unified refresh handler — dispatches based on the active tab/preview
  const handleRefresh = useCallback(() => {
    if (isFilePreviewActive && activeFilePreviewPath) {
      // File preview tab: bump key to reload standalone + content previews
      setFilePreviewRefreshKey((k) => k + 1);
    } else if (activeOutputTab === "preview") {
      // Web preview tab: remount the iframe
      setPreviewRefreshKey((k) => k + 1);
    } else if (activeOutputTab === "files" && session?.id) {
      // Files tab: clear cache and re-fetch directory listing
      triggerFilesRefresh(session.id);
    }
  }, [
    isFilePreviewActive,
    activeFilePreviewPath,
    activeOutputTab,
    session?.id,
    triggerFilesRefresh,
  ]);

  // Fetch artifacts - poll every 5 seconds when on artifacts tab
  const shouldFetchArtifacts =
    session?.id &&
    !session.id.startsWith("temp-") &&
    session.status !== "creating" &&
    activeTab === "artifacts";

  const { data: polledArtifacts } = useSWR(
    shouldFetchArtifacts ? SWR_KEYS.buildSessionArtifacts(session.id) : null,
    () => (session?.id ? fetchArtifacts(session.id) : null),
    {
      refreshInterval: 5000, // Refresh every 5 seconds to catch new artifacts
      revalidateOnFocus: true,
    }
  );

  // Use polled artifacts if available, otherwise fall back to session store
  const artifacts = polledArtifacts ?? session?.artifacts ?? [];

  return (
    <div
      className={cn(
        "absolute z-20 flex flex-col border rounded-12 border-border-01 bg-background-neutral-00 overflow-hidden transition-all duration-300 ease-in-out",
        isMaximized
          ? "top-4 right-16 bottom-4 w-[calc(100%-8rem)]"
          : "top-4 right-4 bottom-4 w-[calc(50%-2rem)]",
        isOpen
          ? "opacity-100 translate-x-0"
          : "opacity-0 translate-x-full pointer-events-none"
      )}
      style={{
        boxShadow: "0 8px 60px 30px rgba(0, 0, 0, 0.07)",
      }}
    >
      {/* Tab List - Chrome-style tabs */}
      <div className="flex flex-col w-full">
        {/* Tabs row */}
        <div className="flex items-end w-full pt-1.5 bg-background-tint-03">
          {/* macOS-style window controls - sticky on left */}
          <div className="group flex items-center gap-2.5 pl-4 pr-2 py-3 flex-shrink-0">
            <button
              onClick={onClose}
              className="relative w-3.5 h-3.5 rounded-full bg-[#ff5f57] hover:bg-[#ff3b30] transition-colors flex-shrink-0 flex items-center justify-center"
              aria-label="No action"
            >
              <SvgX
                size={12}
                strokeWidth={4}
                className="opacity-0 group-hover:opacity-100 transition-opacity"
                style={{ stroke: "#8a2e2a" }}
              />
            </button>
            <button
              onClick={onClose}
              className="relative w-3.5 h-3.5 rounded-full bg-[#ffbd2e] hover:bg-[#ffa000] transition-colors flex-shrink-0 flex items-center justify-center"
              aria-label="Close panel"
            >
              <SvgMinus
                size={12}
                strokeWidth={3}
                className="opacity-0 group-hover:opacity-100 transition-opacity"
                style={{ stroke: "#8a6618" }}
              />
            </button>
            <button
              onClick={handleMaximize}
              className="relative w-3.5 h-3.5 rounded-full bg-[#28ca42] hover:bg-[#1fb832] transition-colors flex-shrink-0 flex items-center justify-center"
              aria-label="Maximize panel"
            >
              <SvgMaximize2
                size={8}
                strokeWidth={2.5}
                className="opacity-0 group-hover:opacity-90 rotate-90 transition-opacity"
                style={{ stroke: "#155c24" }}
              />
            </button>
          </div>
          {/* Scrollable tabs container */}
          <div className="flex items-end gap-1.5 flex-1 pl-3 pr-2 overflow-x-auto [&::-webkit-scrollbar]:hidden [-ms-overflow-style:none] [scrollbar-width:none]">
            {/* Pinned tabs */}
            {tabs.map((tab) => {
              const Icon = tab.icon;
              const isActive = activeTab === tab.value;
              // Disable artifacts tab when no session
              const isDisabled = tab.value === "artifacts" && !session;
              return (
                <button
                  key={tab.value}
                  onClick={() => !isDisabled && handlePinnedTabClick(tab.value)}
                  disabled={isDisabled}
                  title={
                    isDisabled
                      ? "Start building something to see artifacts!"
                      : undefined
                  }
                  className={cn(
                    "relative inline-flex items-center justify-center gap-2 px-5",
                    "max-w-[15%] min-w-fit",
                    isDisabled
                      ? "text-text-02 bg-transparent cursor-not-allowed py-1 mb-1"
                      : isActive
                        ? "bg-background-neutral-00 text-text-04 rounded-t-lg py-2"
                        : "text-text-03 bg-transparent hover:bg-background-tint-02 rounded-full py-1 mb-1"
                  )}
                >
                  {/* Left curved joint */}
                  {isActive && (
                    <div
                      className="absolute -left-3 bottom-0 w-3 h-3 bg-background-neutral-00"
                      style={{
                        maskImage:
                          "radial-gradient(circle at 0 0, transparent 12px, black 12px)",
                        WebkitMaskImage:
                          "radial-gradient(circle at 0 0, transparent 12px, black 12px)",
                      }}
                    />
                  )}
                  <Icon
                    size={16}
                    className={cn(
                      "stroke-current flex-shrink-0",
                      isDisabled
                        ? "stroke-text-02"
                        : isActive
                          ? "stroke-text-04"
                          : "stroke-text-03"
                    )}
                  />
                  <Text
                    className={cn("truncate", isDisabled && "text-text-02")}
                  >
                    {tab.label}
                  </Text>
                  {/* Right curved joint */}
                  {isActive && (
                    <div
                      className="absolute -right-3 bottom-0 w-3 h-3 bg-background-neutral-00"
                      style={{
                        maskImage:
                          "radial-gradient(circle at 100% 0, transparent 12px, black 12px)",
                        WebkitMaskImage:
                          "radial-gradient(circle at 100% 0, transparent 12px, black 12px)",
                      }}
                    />
                  )}
                </button>
              );
            })}

            {/* Separator between pinned and preview tabs */}
            {filePreviewTabs.length > 0 && (
              <div className="w-px h-5 bg-border-02 mx-2 mb-1 self-center" />
            )}

            {/* Preview tabs */}
            {filePreviewTabs.map((previewTab) => {
              const isActive = activeFilePreviewPath === previewTab.path;
              const TabIcon = getFileIcon(previewTab.fileName);
              return (
                <button
                  key={previewTab.path}
                  onClick={() => handlePreviewTabClick(previewTab.path)}
                  className={cn(
                    "group relative inline-flex items-center justify-center gap-1.5 px-3 pr-2",
                    "max-w-[150px] min-w-fit",
                    isActive
                      ? "bg-background-neutral-00 text-text-04 rounded-t-lg py-2"
                      : "text-text-03 bg-transparent hover:bg-background-tint-02 rounded-full py-1 mb-1"
                  )}
                >
                  {/* Left curved joint */}
                  {isActive && (
                    <div
                      className="absolute -left-3 bottom-0 w-3 h-3 bg-background-neutral-00"
                      style={{
                        maskImage:
                          "radial-gradient(circle at 0 0, transparent 12px, black 12px)",
                        WebkitMaskImage:
                          "radial-gradient(circle at 0 0, transparent 12px, black 12px)",
                      }}
                    />
                  )}
                  <TabIcon
                    size={14}
                    className={cn(
                      "stroke-current flex-shrink-0",
                      isActive ? "stroke-text-04" : "stroke-text-03"
                    )}
                  />
                  <Text className="truncate text-sm">
                    {previewTab.fileName}
                  </Text>
                  {/* Close button */}
                  <button
                    onClick={(e) => handlePreviewTabClose(e, previewTab.path)}
                    className={cn(
                      "flex-shrink-0 p-0.5 rounded hover:bg-background-tint-03 transition-colors",
                      isActive
                        ? "opacity-100"
                        : "opacity-0 group-hover:opacity-100"
                    )}
                    aria-label={`Close ${previewTab.fileName}`}
                  >
                    <SvgX size={12} className="stroke-text-03" />
                  </button>
                  {/* Right curved joint */}
                  {isActive && (
                    <div
                      className="absolute -right-3 bottom-0 w-3 h-3 bg-background-neutral-00"
                      style={{
                        maskImage:
                          "radial-gradient(circle at 100% 0, transparent 12px, black 12px)",
                        WebkitMaskImage:
                          "radial-gradient(circle at 100% 0, transparent 12px, black 12px)",
                      }}
                    />
                  )}
                </button>
              );
            })}
          </div>
        </div>
        {/* White bar connecting tabs to content */}
        <div className="h-2 w-full bg-background-neutral-00" />
      </div>

      {/* URL Bar - Chrome-style */}
      <UrlBar
        displayUrl={
          isFilePreviewActive && activeFilePreviewPath
            ? `sandbox://${activeFilePreviewPath}`
            : activeOutputTab === "preview"
              ? session
                ? displayUrl || "Loading..."
                : "no-active-sandbox://"
              : activeOutputTab === "files"
                ? session
                  ? "sandbox://"
                  : preProvisionedSessionId
                    ? "pre-provisioned-sandbox://"
                    : isPreProvisioning
                      ? "provisioning-sandbox://..."
                      : "no-sandbox://"
                : "artifacts://"
        }
        showNavigation={true}
        canGoBack={canGoBack}
        canGoForward={canGoForward}
        onBack={handleBack}
        onForward={handleForward}
        previewUrl={
          !isFilePreviewActive &&
          activeOutputTab === "preview" &&
          displayUrl &&
          displayUrl.startsWith("http")
            ? displayUrl
            : null
        }
        onDownloadRaw={
          isMarkdownPreview || isPptxPreview || isPdfPreview
            ? handleRawFileDownload
            : undefined
        }
        downloadRawTooltip={
          isPdfPreview
            ? "Download PDF"
            : isPptxPreview
              ? "Download PPTX"
              : "Download MD file"
        }
        onDownload={isMarkdownPreview ? handleDocxDownload : undefined}
        isDownloading={isExportingDocx}
        onRefresh={handleRefresh}
        sessionId={
          !isFilePreviewActive &&
          activeOutputTab === "preview" &&
          session?.id &&
          displayUrl?.startsWith("http")
            ? session.id
            : undefined
        }
        sharingScope={webappInfo?.sharing_scope ?? "private"}
        onScopeChange={mutate}
      />

      {/* Tab Content */}
      <div className="flex-1 overflow-hidden rounded-b-08">
        {/* File preview content - shown when a preview tab is active */}
        {isFilePreviewActive && activeFilePreviewPath && session?.id && (
          <FilePreviewContent
            sessionId={session.id}
            filePath={activeFilePreviewPath}
            refreshKey={filePreviewRefreshKey}
          />
        )}
        {/* Pinned tab content - only show when no file preview is active */}
        {!isFilePreviewActive && (
          <>
            {activeOutputTab === "preview" &&
              shouldRenderContent &&
              // Show crafting loader only when no session exists (welcome state)
              // Otherwise, PreviewTab handles the loading/iframe display
              (!session ? (
                <CraftingLoader />
              ) : (
                <PreviewTab
                  webappUrl={displayUrl}
                  refreshKey={previewRefreshKey}
                />
              ))}
            {activeOutputTab === "files" && (
              <FilesTab
                sessionId={session?.id ?? preProvisionedSessionId}
                onFileClick={session ? handleFileClick : undefined}
                isPreProvisioned={!session && !!preProvisionedSessionId}
                isProvisioning={!session && isPreProvisioning}
              />
            )}
            {activeOutputTab === "artifacts" && (
              <ArtifactsTab
                artifacts={artifacts}
                sessionId={session?.id ?? null}
              />
            )}
          </>
        )}
      </div>
    </div>
  );
});
BuildOutputPanel.displayName = "BuildOutputPanel";
export default BuildOutputPanel;


================================================
FILE: web/src/app/craft/components/RawOutputBlock.tsx
================================================
"use client";

import { useMemo } from "react";
import { cn } from "@/lib/utils";
import hljs from "highlight.js/lib/core";

// Import highlight.js theme styles (dark mode Atom One Dark)
import "@/app/app/message/custom-code-styles.css";

// Register common languages
import javascript from "highlight.js/lib/languages/javascript";
import typescript from "highlight.js/lib/languages/typescript";
import python from "highlight.js/lib/languages/python";
import json from "highlight.js/lib/languages/json";
import css from "highlight.js/lib/languages/css";
import xml from "highlight.js/lib/languages/xml"; // includes HTML
import bash from "highlight.js/lib/languages/bash";
import yaml from "highlight.js/lib/languages/yaml";
import markdown from "highlight.js/lib/languages/markdown";
import sql from "highlight.js/lib/languages/sql";

hljs.registerLanguage("javascript", javascript);
hljs.registerLanguage("js", javascript);
hljs.registerLanguage("jsx", javascript);
hljs.registerLanguage("typescript", typescript);
hljs.registerLanguage("ts", typescript);
hljs.registerLanguage("tsx", typescript);
hljs.registerLanguage("python", python);
hljs.registerLanguage("py", python);
hljs.registerLanguage("json", json);
hljs.registerLanguage("css", css);
hljs.registerLanguage("html", xml);
hljs.registerLanguage("xml", xml);
hljs.registerLanguage("bash", bash);
hljs.registerLanguage("sh", bash);
hljs.registerLanguage("shell", bash);
hljs.registerLanguage("yaml", yaml);
hljs.registerLanguage("yml", yaml);
hljs.registerLanguage("markdown", markdown);
hljs.registerLanguage("md", markdown);
hljs.registerLanguage("sql", sql);

/**
 * Get language from file extension
 */
function getLanguageFromPath(filePath: string | undefined): string | undefined {
  if (!filePath) return undefined;
  const ext = filePath.split(".").pop()?.toLowerCase();
  if (!ext) return undefined;

  const langMap: Record<string, string> = {
    js: "javascript",
    jsx: "javascript",
    ts: "typescript",
    tsx: "typescript",
    py: "python",
    json: "json",
    css: "css",
    html: "html",
    xml: "xml",
    sh: "bash",
    bash: "bash",
    yaml: "yaml",
    yml: "yaml",
    md: "markdown",
    sql: "sql",
  };

  return langMap[ext];
}

interface RawOutputBlockProps {
  content: string;
  maxHeight?: string;
  /** File path to derive language from, or explicit language name */
  language?: string;
}

/**
 * RawOutputBlock - Scrollable code block for tool output
 *
 * Displays raw output in a dark monospace container with
 * horizontal and vertical scrolling. Applies syntax highlighting
 * when a language can be determined.
 */
export default function RawOutputBlock({
  content,
  maxHeight = "300px",
  language,
}: RawOutputBlockProps) {
  const highlightedHtml = useMemo(() => {
    if (!content) return null;

    // Try to determine language from file path or explicit language
    const lang = language?.includes(".")
      ? getLanguageFromPath(language)
      : language;

    try {
      if (lang && hljs.getLanguage(lang)) {
        return hljs.highlight(content, { language: lang }).value;
      }
      // Don't auto-detect for plain output (like command results)
      return null;
    } catch {
      return null;
    }
  }, [content, language]);

  if (!content) {
    return (
      <div
        className={cn(
          "p-3 rounded-08 border",
          // Match hljs theme: light=#fafafa, dark=#151617
          "bg-[#fafafa] border-[#fafafa] dark:bg-[#151617] dark:border-[#151617]",
          "text-text-03 text-xs"
        )}
        style={{ fontFamily: "var(--font-dm-mono)" }}
      >
        No output yet...
      </div>
    );
  }

  return (
    <div
      className={cn(
        "p-3 rounded-08 border",
        // Match hljs theme: light=#fafafa, dark=#151617
        "bg-[#fafafa] border-[#fafafa] dark:bg-[#151617] dark:border-[#151617]",
        "text-xs overflow-auto"
      )}
      style={{
        fontFamily: "var(--font-dm-mono)",
        maxHeight,
      }}
    >
      {highlightedHtml ? (
        <pre
          className="whitespace-pre-wrap break-words m-0 hljs"
          dangerouslySetInnerHTML={{ __html: highlightedHtml }}
        />
      ) : (
        <pre className="whitespace-pre-wrap break-words m-0">{content}</pre>
      )}
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/SandboxStatusIndicator.tsx
================================================
"use client";

import { motion, AnimatePresence } from "motion/react";

import {
  useSession,
  useIsPreProvisioning,
  useIsPreProvisioningReady,
  useIsPreProvisioningFailed,
} from "@/app/craft/hooks/useBuildSessionStore";
import { Card } from "@/components/ui/card";
import Text from "@/refresh-components/texts/Text";

const STATUS_CONFIG = {
  provisioning: {
    color: "bg-status-warning-05",
    pulse: true,
    label: "Initializing sandbox...",
  },
  running: {
    color: "bg-status-success-05",
    pulse: false,
    label: "Sandbox running",
  },
  idle: { color: "bg-status-warning-05", pulse: false, label: "Sandbox idle" },
  sleeping: {
    color: "bg-status-info-05",
    pulse: false,
    label: "Sandbox sleeping",
  },
  restoring: {
    color: "bg-status-warning-05",
    pulse: true,
    label: "Restoring sandbox...",
  },
  terminated: {
    color: "bg-status-error-05",
    pulse: false,
    label: "Sandbox terminated",
  },
  failed: {
    color: "bg-status-error-05",
    pulse: false,
    label: "Failed to provision sandbox",
  },
  ready: {
    color: "bg-status-success-05",
    pulse: false,
    label: "Sandbox ready",
  },
  loading: {
    color: "bg-text-03",
    pulse: true,
    label: "Finding sandbox...",
  },
} as const;

type Status = keyof typeof STATUS_CONFIG;

interface SandboxStatusIndicatorProps {}

/**
 * Derives the current sandbox status from session state or pre-provisioning state.
 *
 * Priority:
 * 1. Actual sandbox status from backend (if session has sandbox info)
 * 2. Session exists but no sandbox info → "running" (optimistic for consumed pre-provisioned sessions)
 * 3. Pre-provisioning failed → "failed"
 * 4. Pre-provisioning in progress → "provisioning" (only when no session - welcome page)
 * 5. Pre-provisioning ready (not yet consumed) → "ready"
 * 6. Default → "loading" (gray, finding sandbox)
 *
 * IMPORTANT: Pre-provisioning state is checked AFTER session existence because
 * pre-provisioning is for NEW sessions. When viewing an existing session, we
 * should show that session's status, not the background pre-provisioning state.
 */
function deriveSandboxStatus(
  session: ReturnType<typeof useSession>,
  isPreProvisioning: boolean,
  isReady: boolean,
  isFailed: boolean
): Status {
  // 1. Backend is source of truth when available
  if (session?.sandbox) {
    return session.sandbox.status as Status;
  }
  // 2. Session exists but no sandbox info - assume running
  // (This handles consumed pre-provisioned sessions before sandbox loads)
  if (session) {
    return "running";
  }
  // 3. Pre-provisioning failed
  if (isFailed) {
    return "failed";
  }
  // 4. No session - check pre-provisioning state (welcome page)
  if (isPreProvisioning) {
    return "provisioning";
  }
  // 5. Pre-provisioning ready but not consumed
  if (isReady) {
    return "ready";
  }
  // 6. No session, no pre-provisioning state - loading
  return "loading";
}

/**
 * Displays the current sandbox status with a colored indicator dot.
 *
 * Shows actual sandbox state when a session exists, otherwise shows
 * pre-provisioning state (provisioning/ready).
 */
export default function SandboxStatusIndicator(
  _props: SandboxStatusIndicatorProps = {}
) {
  const session = useSession();
  const isPreProvisioning = useIsPreProvisioning();
  const isReady = useIsPreProvisioningReady();
  const isFailed = useIsPreProvisioningFailed();

  const status = deriveSandboxStatus(
    session,
    isPreProvisioning,
    isReady,
    isFailed
  );
  const { color, pulse, label } = STATUS_CONFIG[status];

  return (
    <motion.div layout transition={{ duration: 0.3, ease: "easeInOut" }}>
      <Card className="flex items-center gap-2 p-2 overflow-hidden">
        <div
          className={`w-2 h-2 rounded-full shrink-0 ${color} ${
            pulse ? "animate-pulse" : ""
          }`}
        />
        <AnimatePresence mode="wait">
          <motion.span
            key={status}
            initial={{ opacity: 0, y: 5 }}
            animate={{ opacity: 1, y: 0 }}
            exit={{ opacity: 0, y: -5 }}
            transition={{ duration: 0.2 }}
          >
            <Text text05>{label}</Text>
          </motion.span>
        </AnimatePresence>
      </Card>
    </motion.div>
  );
}


================================================
FILE: web/src/app/craft/components/ShareButton.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import { SvgLink, SvgCopy, SvgCheck, SvgX } from "@opal/icons";
import { setSessionSharing } from "@/app/craft/services/apiServices";
import type { SharingScope } from "@/app/craft/types/streamingTypes";
import { cn } from "@/lib/utils";
import Popover from "@/refresh-components/Popover";
import Truncated from "@/refresh-components/texts/Truncated";
import { Section } from "@/layouts/general-layouts";
import { ContentAction } from "@opal/layouts";

interface ShareButtonProps {
  sessionId: string;
  webappUrl: string;
  sharingScope: SharingScope;
  onScopeChange?: () => void;
}

const SCOPE_OPTIONS: {
  value: SharingScope;
  label: string;
  description: string;
}[] = [
  {
    value: "private",
    label: "Private",
    description: "Only you can view this app.",
  },
  {
    value: "public_org",
    label: "Organization",
    description: "Anyone logged into your Onyx can view this app.",
  },
];

export default function ShareButton({
  sessionId,
  webappUrl,
  sharingScope: initialScope,
  onScopeChange,
}: ShareButtonProps) {
  const [isOpen, setIsOpen] = useState(false);
  const [sharingScope, setSharingScope] = useState<SharingScope>(initialScope);
  const [copyState, setCopyState] = useState<"idle" | "copied" | "error">(
    "idle"
  );
  const [isLoading, setIsLoading] = useState(false);

  const isShared = sharingScope !== "private";

  const shareUrl =
    typeof window !== "undefined"
      ? webappUrl.startsWith("http")
        ? webappUrl
        : `${window.location.origin}${webappUrl}`
      : webappUrl;

  const handleSelect = async (scope: SharingScope) => {
    if (scope === sharingScope || isLoading) return;
    setIsLoading(true);
    try {
      await setSessionSharing(sessionId, scope);
      setSharingScope(scope);
      onScopeChange?.();
    } catch (err) {
      console.error("Failed to update sharing:", err);
    } finally {
      setIsLoading(false);
    }
  };

  const handleCopy = async () => {
    let success = false;
    try {
      await navigator.clipboard.writeText(shareUrl);
      success = true;
    } catch {
      try {
        const el = document.createElement("textarea");
        el.value = shareUrl;
        el.style.cssText = "position:fixed;opacity:0";
        document.body.appendChild(el);
        el.focus();
        el.select();
        success = document.execCommand("copy");
        document.body.removeChild(el);
      } catch {}
    }
    setCopyState(success ? "copied" : "error");
    setTimeout(() => setCopyState("idle"), 2000);
  };

  return (
    <Section width="fit" height="fit">
      <Popover open={isOpen} onOpenChange={setIsOpen}>
        <Popover.Trigger asChild>
          <Button
            variant="action"
            prominence={isShared ? "primary" : "tertiary"}
            icon={SvgLink}
            aria-label="Share webapp"
          >
            {isShared ? "Shared" : "Share"}
          </Button>
        </Popover.Trigger>
        <Popover.Content side="bottom" align="end" width="lg" sideOffset={4}>
          <Section
            alignItems="stretch"
            gap={0.25}
            padding={0.25}
            width="full"
            height="fit"
          >
            {/* Scope options */}
            <Section alignItems="stretch" gap={0.25} width="full">
              {SCOPE_OPTIONS.map((opt) => (
                <div
                  key={opt.value}
                  role="button"
                  tabIndex={0}
                  onClick={() => handleSelect(opt.value)}
                  onKeyDown={(e) =>
                    e.key === "Enter" && handleSelect(opt.value)
                  }
                  aria-disabled={isLoading}
                  className={cn(
                    "cursor-pointer rounded-08 transition-colors",
                    sharingScope === opt.value
                      ? "bg-background-tint-03"
                      : "hover:bg-background-tint-02"
                  )}
                >
                  <ContentAction
                    title={opt.label}
                    description={opt.description}
                    sizePreset="main-ui"
                    variant="section"
                    paddingVariant="sm"
                  />
                </div>
              ))}
            </Section>

            {/* Copy link — shown when not private */}
            {isShared && (
              <div className="rounded-08 bg-background-tint-02">
                <Section
                  flexDirection="row"
                  alignItems="center"
                  gap={0.25}
                  padding={0.25}
                  width="full"
                  height="fit"
                >
                  <div className="min-w-0 flex-1 overflow-hidden">
                    <Truncated secondaryBody text03>
                      {shareUrl}
                    </Truncated>
                  </div>
                  <Button
                    variant="action"
                    prominence="tertiary"
                    size="md"
                    icon={
                      copyState === "copied"
                        ? SvgCheck
                        : copyState === "error"
                          ? SvgX
                          : SvgCopy
                    }
                    onClick={handleCopy}
                    aria-label="Copy link"
                  />
                </Section>
              </div>
            )}
          </Section>
        </Popover.Content>
      </Popover>
    </Section>
  );
}


================================================
FILE: web/src/app/craft/components/SideBar.tsx
================================================
"use client";

import { memo, useMemo, useCallback, useState, useEffect, useRef } from "react";
import { useRouter, usePathname } from "next/navigation";
import { useBuildContext } from "@/app/craft/contexts/BuildContext";
import {
  useSession,
  useSessionHistory,
  useBuildSessionStore,
  SessionHistoryItem,
} from "@/app/craft/hooks/useBuildSessionStore";
import { useUsageLimits } from "@/app/craft/hooks/useUsageLimits";
import { CRAFT_SEARCH_PARAM_NAMES } from "@/app/craft/services/searchParams";
import { SidebarTab } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import SidebarWrapper from "@/sections/sidebar/SidebarWrapper";
import SidebarBody from "@/sections/sidebar/SidebarBody";
import SidebarSection from "@/sections/sidebar/SidebarSection";
import UserAvatarPopover from "@/sections/sidebar/UserAvatarPopover";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import IconButton from "@/refresh-components/buttons/IconButton";
import ButtonRenaming from "@/refresh-components/buttons/ButtonRenaming";
import LineItem from "@/refresh-components/buttons/LineItem";
import { cn, noProp } from "@/lib/utils";
import useScreenSize from "@/hooks/useScreenSize";
import {
  SvgEditBig,
  SvgArrowLeft,
  SvgSettings,
  SvgMoreHorizontal,
  SvgEdit,
  SvgTrash,
  SvgCheckCircle,
} from "@opal/icons";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { Button } from "@opal/components";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import TypewriterText from "@/app/craft/components/TypewriterText";
import {
  DELETE_SUCCESS_DISPLAY_DURATION_MS,
  DELETE_MESSAGE_ROTATION_INTERVAL_MS,
} from "@/app/craft/constants";
import { CRAFT_PATH, CRAFT_CONFIGURE_PATH } from "@/app/craft/v1/constants";

// ============================================================================
// Fun Deleting Messages
// ============================================================================

const DELETING_MESSAGES = [
  "Mining away your blocks...",
  "Returning diamonds to the caves...",
  "Creeper blew up your save file...",
  "Throwing items into lava...",
  "Despawning your entities...",
  "Breaking bedrock illegally...",
  "Enderman teleported your data away...",
  "Falling into the void...",
  "Your build ran out of hearts...",
  "Respawning at world spawn...",
  "Feeding your code to the Ender Dragon...",
  "Activating TNT chain reaction...",
  "Zombie horde consumed your bytes...",
  "Wither withering your session...",
  "Herobrine deleted your world...",
];

function DeletingMessage() {
  const [messageIndex, setMessageIndex] = useState(() =>
    Math.floor(Math.random() * DELETING_MESSAGES.length)
  );

  useEffect(() => {
    const interval = setInterval(() => {
      setMessageIndex((prev) => {
        let next = Math.floor(Math.random() * DELETING_MESSAGES.length);
        while (next === prev && DELETING_MESSAGES.length > 1) {
          next = Math.floor(Math.random() * DELETING_MESSAGES.length);
        }
        return next;
      });
    }, DELETE_MESSAGE_ROTATION_INTERVAL_MS);
    return () => clearInterval(interval);
  }, []);

  return (
    <Text as="p" text03 className="animate-subtle-pulse">
      {DELETING_MESSAGES[messageIndex]}
    </Text>
  );
}

// ============================================================================
// Build Session Button
// ============================================================================

interface BuildSessionButtonProps {
  historyItem: SessionHistoryItem;
  isActive: boolean;
  onLoad: () => void;
  onRename: (newName: string) => Promise<void>;
  onDelete: () => Promise<void>;
  onDeleteActiveSession?: () => void;
}

function BuildSessionButton({
  historyItem,
  isActive,
  onLoad,
  onRename,
  onDelete,
  onDeleteActiveSession,
}: BuildSessionButtonProps) {
  const [renaming, setRenaming] = useState(false);
  const [popoverOpen, setPopoverOpen] = useState(false);
  const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);
  const [isDeleting, setIsDeleting] = useState(false);
  const [deleteSuccess, setDeleteSuccess] = useState(false);
  const [deleteError, setDeleteError] = useState<string | null>(null);
  const deleteTimeoutRef = useRef<NodeJS.Timeout | null>(null);

  // Track title changes for typewriter animation (only for auto-naming, not manual rename)
  const prevTitleRef = useRef(historyItem.title);
  const [shouldAnimate, setShouldAnimate] = useState(false);

  // Detect when title changes from "Fresh Craft" to a real name (auto-naming)
  useEffect(() => {
    const prevTitle = prevTitleRef.current;
    if (
      prevTitle !== historyItem.title &&
      prevTitle === "Fresh Craft" &&
      !renaming
    ) {
      setShouldAnimate(true);
    }
    prevTitleRef.current = historyItem.title;
  }, [historyItem.title, renaming]);

  const closeModal = useCallback(() => {
    if (deleteTimeoutRef.current) {
      clearTimeout(deleteTimeoutRef.current);
      deleteTimeoutRef.current = null;
    }
    setIsDeleteModalOpen(false);
    setPopoverOpen(false);
    setDeleteSuccess(false);
    setDeleteError(null);
    setIsDeleting(false);
  }, []);

  const handleConfirmDelete = useCallback(
    async (e: React.MouseEvent<HTMLButtonElement>) => {
      e.stopPropagation();
      setIsDeleting(true);
      setDeleteError(null);

      try {
        await onDelete();
        setIsDeleting(false);
        setDeleteSuccess(true);
        // Show success briefly, then close and redirect if needed
        deleteTimeoutRef.current = setTimeout(() => {
          closeModal();
          if (isActive && onDeleteActiveSession) {
            onDeleteActiveSession();
          }
        }, DELETE_SUCCESS_DISPLAY_DURATION_MS);
      } catch (err) {
        setIsDeleting(false);
        setDeleteError(
          err instanceof Error ? err.message : "Failed to delete session"
        );
      }
    },
    [onDelete, closeModal, isActive, onDeleteActiveSession]
  );

  const rightMenu = (
    <>
      <Popover.Trigger asChild onClick={noProp()}>
        <div>
          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
          <IconButton
            icon={SvgMoreHorizontal}
            className={cn(
              !popoverOpen && "hidden",
              !renaming && "group-hover/SidebarTab:flex"
            )}
            transient={popoverOpen}
            internal
          />
        </div>
      </Popover.Trigger>
      <Popover.Content side="right" align="start">
        <PopoverMenu>
          {[
            <LineItem
              key="rename"
              icon={SvgEdit}
              onClick={noProp(() => setRenaming(true))}
            >
              Rename
            </LineItem>,
            null,
            <LineItem
              key="delete"
              icon={SvgTrash}
              onClick={noProp(() => setIsDeleteModalOpen(true))}
              danger
            >
              Delete
            </LineItem>,
          ]}
        </PopoverMenu>
      </Popover.Content>
    </>
  );

  return (
    <>
      <Popover
        onOpenChange={(state) => {
          setPopoverOpen(state);
        }}
      >
        <Popover.Anchor>
          <SidebarTab
            onClick={onLoad}
            selected={isActive}
            rightChildren={rightMenu}
          >
            {renaming ? (
              <ButtonRenaming
                initialName={historyItem.title}
                onRename={onRename}
                onClose={() => setRenaming(false)}
              />
            ) : shouldAnimate ? (
              <Text
                as="p"
                data-state={isActive ? "active" : "inactive"}
                className="line-clamp-1 break-all text-left"
                mainUiBody
              >
                <TypewriterText
                  text={historyItem.title}
                  charSpeed={25}
                  animateOnMount={true}
                  onAnimationComplete={() => setShouldAnimate(false)}
                />
              </Text>
            ) : (
              historyItem.title
            )}
          </SidebarTab>
        </Popover.Anchor>
      </Popover>
      {isDeleteModalOpen && (
        <ConfirmationModalLayout
          title={
            deleteSuccess
              ? "Deleted"
              : deleteError
                ? "Delete Failed"
                : "Delete Craft"
          }
          icon={deleteSuccess ? SvgCheckCircle : SvgTrash}
          onClose={isDeleting || deleteSuccess ? undefined : closeModal}
          hideCancel={isDeleting || deleteSuccess}
          twoTone={!isDeleting && !deleteSuccess && !deleteError}
          submit={
            deleteSuccess ? (
              <Button disabled variant="action" icon={SvgCheckCircle}>
                Done
              </Button>
            ) : deleteError ? (
              <Button variant="danger" onClick={closeModal}>
                Close
              </Button>
            ) : (
              <Button
                disabled={isDeleting}
                variant="danger"
                onClick={handleConfirmDelete}
                icon={isDeleting ? SimpleLoader : undefined}
              >
                {isDeleting ? "Deleting..." : "Delete"}
              </Button>
            )
          }
        >
          {deleteSuccess ? (
            <Text as="p" text03>
              Build deleted successfully.
            </Text>
          ) : deleteError ? (
            <Text as="p" text03 className="text-status-error-02">
              {deleteError}
            </Text>
          ) : isDeleting ? (
            <DeletingMessage />
          ) : (
            "Are you sure you want to delete this craft? This action cannot be undone."
          )}
        </ConfirmationModalLayout>
      )}
    </>
  );
}

// ============================================================================
// Build Sidebar Inner
// ============================================================================

interface BuildSidebarInnerProps {
  folded: boolean;
  onFoldClick: () => void;
}

const MemoizedBuildSidebarInner = memo(
  ({ folded, onFoldClick }: BuildSidebarInnerProps) => {
    const router = useRouter();
    const pathname = usePathname();
    const session = useSession();
    const sessionHistory = useSessionHistory();
    // Access actions directly like chat does - these don't cause re-renders
    const renameBuildSession = useBuildSessionStore(
      (state) => state.renameBuildSession
    );
    const deleteBuildSession = useBuildSessionStore(
      (state) => state.deleteBuildSession
    );
    const refreshSessionHistory = useBuildSessionStore(
      (state) => state.refreshSessionHistory
    );
    const { limits, isEnabled } = useUsageLimits();

    // Fetch session history on mount
    useEffect(() => {
      refreshSessionHistory();
    }, [refreshSessionHistory]);

    // Build section title with usage if cloud is enabled
    // limit=0 indicates unlimited (local/self-hosted mode), so hide the count
    const sessionsTitle = useMemo(() => {
      if (isEnabled && limits && limits.limit > 0) {
        return `Total Messages (${limits.messagesUsed}/${limits.limit})`;
      }
      return "Sessions";
    }, [isEnabled, limits]);

    // Navigate to new build - session controller handles setCurrentSession and pre-provisioning
    const handleNewBuild = useCallback(() => {
      router.push(CRAFT_PATH);
    }, [router]);

    const handleLoadSession = useCallback(
      (sessionId: string) => {
        router.push(
          `${CRAFT_PATH}?${CRAFT_SEARCH_PARAM_NAMES.SESSION_ID}=${sessionId}`
        );
      },
      [router]
    );

    const newBuildButton = useMemo(
      () => (
        <SidebarTab icon={SvgEditBig} folded={folded} onClick={handleNewBuild}>
          Start Crafting
        </SidebarTab>
      ),
      [folded, handleNewBuild]
    );

    const buildConfigurePanel = useMemo(
      () => (
        <SidebarTab
          icon={SvgSettings}
          folded={folded}
          href={CRAFT_CONFIGURE_PATH}
          selected={pathname.startsWith(CRAFT_CONFIGURE_PATH)}
        >
          Configure
        </SidebarTab>
      ),
      [folded, pathname]
    );

    const backToChatButton = useMemo(
      () => (
        <SidebarTab icon={SvgArrowLeft} folded={folded} href="/app">
          Back to Chat
        </SidebarTab>
      ),
      [folded]
    );

    const footer = useMemo(
      () => (
        <div>
          {backToChatButton}
          <UserAvatarPopover folded={folded} />
        </div>
      ),
      [folded, backToChatButton]
    );

    return (
      <SidebarWrapper folded={folded} onFoldClick={onFoldClick}>
        <SidebarBody
          pinnedContent={
            <div className="flex flex-col gap-0.5">
              {newBuildButton}
              {buildConfigurePanel}
            </div>
          }
          footer={footer}
          scrollKey="build-sidebar"
        >
          {!folded && (
            <SidebarSection title={sessionsTitle}>
              {sessionHistory.length === 0 ? (
                <div className="pl-2 pr-1.5 py-1">
                  <Text text01>
                    Start building! Session history will appear here.
                  </Text>
                </div>
              ) : (
                sessionHistory.map((historyItem) => (
                  <BuildSessionButton
                    key={historyItem.id}
                    historyItem={historyItem}
                    isActive={
                      !pathname.startsWith(CRAFT_CONFIGURE_PATH) &&
                      session?.id === historyItem.id
                    }
                    onLoad={() => handleLoadSession(historyItem.id)}
                    onRename={(newName) =>
                      renameBuildSession(historyItem.id, newName)
                    }
                    onDelete={() => deleteBuildSession(historyItem.id)}
                    onDeleteActiveSession={
                      session?.id === historyItem.id
                        ? () => router.push(CRAFT_PATH)
                        : undefined
                    }
                  />
                ))
              )}
            </SidebarSection>
          )}
        </SidebarBody>
      </SidebarWrapper>
    );
  }
);

MemoizedBuildSidebarInner.displayName = "BuildSidebarInner";

// ============================================================================
// Build Sidebar (Main Export)
// ============================================================================

export default function BuildSidebar() {
  const { leftSidebarFolded, setLeftSidebarFolded } = useBuildContext();
  const { isMobile } = useScreenSize();

  if (!isMobile)
    return (
      <MemoizedBuildSidebarInner
        folded={leftSidebarFolded}
        onFoldClick={() => setLeftSidebarFolded((prev) => !prev)}
      />
    );

  return (
    <>
      <div
        className={cn(
          "fixed inset-y-0 left-0 z-50 transition-transform duration-200",
          leftSidebarFolded ? "-translate-x-full" : "translate-x-0"
        )}
      >
        <MemoizedBuildSidebarInner
          folded={false}
          onFoldClick={() => setLeftSidebarFolded(true)}
        />
      </div>

      {/* Hitbox to close the sidebar if anything outside of it is touched */}
      <div
        className={cn(
          "fixed inset-0 z-40 bg-mask-03 backdrop-blur-03 transition-opacity duration-200",
          leftSidebarFolded
            ? "opacity-0 pointer-events-none"
            : "opacity-100 pointer-events-auto"
        )}
        onClick={() => setLeftSidebarFolded(true)}
      />
    </>
  );
}


================================================
FILE: web/src/app/craft/components/SuggestedPrompts.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { cn } from "@/lib/utils";
import {
  getPromptsForPersona,
  UserPersona,
  BuildPrompt,
} from "@/app/craft/constants/exampleBuildPrompts";

interface SuggestedPromptsProps {
  persona?: UserPersona;
  onPromptClick: (promptText: string) => void;
}

/**
 * Shuffles an array using Fisher-Yates algorithm
 */
function shuffleArray<T>(array: T[]): T[] {
  const shuffled = [...array];
  for (let i = shuffled.length - 1; i > 0; i--) {
    const j = Math.floor(Math.random() * (i + 1));
    const temp = shuffled[i]!;
    shuffled[i] = shuffled[j]!;
    shuffled[j] = temp;
  }
  return shuffled;
}

/**
 * Randomly selects 4 prompts from the available prompts
 */
function selectRandomPrompts(prompts: BuildPrompt[]): BuildPrompt[] {
  const shuffled = shuffleArray(prompts);
  return shuffled.slice(0, 4);
}

/**
 * SuggestedPrompts - Displays clickable prompt suggestions in a 2x2 grid
 *
 * Shows a 2x2 grid of example prompts based on user persona.
 * Each prompt has summary text on top and a cropped image below it.
 * Clicking a prompt triggers the onPromptClick callback.
 * Randomly selects 4 prompts from the available prompts for the persona.
 * Shuffles on every component mount (when user returns) and when persona changes.
 */
export default function SuggestedPrompts({
  persona = "default",
  onPromptClick,
}: SuggestedPromptsProps) {
  // Randomly select 4 prompts - shuffles on mount and when persona changes
  const [gridPrompts, setGridPrompts] = useState<BuildPrompt[]>(() => {
    const prompts = getPromptsForPersona(persona);
    return selectRandomPrompts(prompts);
  });

  // Reshuffle when persona changes
  useEffect(() => {
    const prompts = getPromptsForPersona(persona);
    setGridPrompts(selectRandomPrompts(prompts));
  }, [persona]);

  return (
    <div className="mt-4 w-full grid grid-cols-2 gap-4">
      {gridPrompts.map((prompt) => (
        <button
          key={prompt.id}
          onClick={() => onPromptClick(prompt.fullText)}
          className={cn(
            "flex flex-col items-center gap-2",
            "p-4 rounded-12",
            "bg-background-neutral-00 border border-border-01",
            "hover:bg-background-neutral-01 hover:border-border-02",
            "transition-all duration-200",
            "cursor-pointer",
            "focus:outline-none focus:ring-2 focus:ring-action-link-01 focus:ring-offset-2"
          )}
        >
          {/* Summary text */}
          <span className="text-sm text-text-04 text-center leading-tight">
            {prompt.summary}
          </span>
          {/* Image resized to cut in half height (4:1 aspect ratio) */}
          {prompt.image && (
            <div className="w-full aspect-[3/1] rounded-08 overflow-hidden bg-background-neutral-01">
              <img
                src={prompt.image}
                alt={prompt.summary}
                className="w-full h-full object-cover object-top"
              />
            </div>
          )}
        </button>
      ))}
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/SuggestionBubbles.tsx
================================================
"use client";

import { cn } from "@/lib/utils";
import { SuggestionBubble } from "@/app/craft/hooks/useBuildSessionStore";

interface SuggestionBubblesProps {
  suggestions: SuggestionBubble[];
  loading?: boolean;
  onSelect: (text: string) => void;
}

/**
 * Get theme-specific styles for suggestion bubbles
 */
function getThemeStyles(theme: string): string {
  // Match user message styling - same gray background
  switch (theme) {
    case "add":
    case "question":
    default:
      // Same gray as user messages
      return "bg-background-tint-02 hover:bg-background-tint-03";
  }
}

/**
 * Displays follow-up suggestion bubbles after the first agent message.
 * Styled like user chat messages - stacked vertically and right-aligned.
 * Each bubble is clickable and populates the input bar with the suggestion text.
 */
export default function SuggestionBubbles({
  suggestions,
  loading,
  onSelect,
}: SuggestionBubblesProps) {
  if (loading) {
    return (
      <div className="flex flex-col items-end gap-2">
        {/* Loading skeleton bubbles - right aligned */}
        {[1, 2].map((i) => (
          <div
            key={i}
            className="h-10 w-48 bg-background-neutral-01 rounded-16 animate-pulse"
          />
        ))}
      </div>
    );
  }

  if (!suggestions || suggestions.length === 0) return null;

  return (
    <div className="flex flex-col items-end gap-3">
      {suggestions.map((suggestion, idx) => (
        <button
          key={idx}
          onClick={() => onSelect(suggestion.text)}
          className={cn(
            "px-4 py-3 rounded-t-16 rounded-bl-16 text-sm text-left",
            "text-text-03 transition-colors cursor-pointer",
            "max-w-[95%] shadow-01",
            "animate-in fade-in duration-500",
            getThemeStyles(suggestion.theme)
          )}
          style={{
            animationDelay: `${idx * 100}ms`,
            animationFillMode: "both",
          }}
        >
          {suggestion.text}
        </button>
      ))}
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/TextChunk.tsx
================================================
"use client";

import MinimalMarkdown from "@/components/chat/MinimalMarkdown";

interface TextChunkProps {
  content: string;
}

/**
 * TextChunk - Renders markdown text content
 *
 * Uses MinimalMarkdown for consistent rendering with the main chat.
 */
export default function TextChunk({ content }: TextChunkProps) {
  if (!content) return null;

  return (
    <div className="py-1">
      <MinimalMarkdown content={content} className="text-text-05" />
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/ThinkingCard.tsx
================================================
"use client";

import { useState } from "react";
import { cn } from "@/lib/utils";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import { SvgChevronDown, SvgBubbleText } from "@opal/icons";

interface ThinkingCardProps {
  content: string;
  isStreaming: boolean;
}

/**
 * ThinkingCard - Expandable card for agent thinking content
 *
 * Starts open and stays open. User can manually toggle.
 */
export default function ThinkingCard({
  content,
  isStreaming,
}: ThinkingCardProps) {
  const [isOpen, setIsOpen] = useState(true);

  if (!content) return null;

  return (
    <Collapsible open={isOpen} onOpenChange={setIsOpen}>
      <div
        className={cn(
          "w-full border-[0.5px] rounded-lg overflow-hidden transition-colors",
          "hover:bg-background-tint-02",
          isStreaming
            ? "border-theme-blue-02 bg-theme-blue-01"
            : "border-border-01 bg-background-neutral-01"
        )}
      >
        <CollapsibleTrigger asChild>
          <button
            className={cn(
              "w-full flex items-center justify-between gap-2 px-3 py-2",
              "transition-colors text-left"
            )}
          >
            <div className="flex items-center gap-2">
              <SvgBubbleText
                className={cn(
                  "size-4",
                  isStreaming ? "stroke-theme-blue-05" : "stroke-text-03"
                )}
              />
              <span
                className={cn(
                  "text-sm font-medium",
                  isStreaming ? "text-theme-blue-05" : "text-text-04"
                )}
              >
                Thinking
              </span>
              {isStreaming && (
                <span className="text-xs text-theme-blue-04 animate-pulse">
                  ...
                </span>
              )}
            </div>
            <SvgChevronDown
              className={cn(
                "size-4 stroke-text-03 transition-transform duration-150",
                !isOpen && "rotate-[-90deg]"
              )}
            />
          </button>
        </CollapsibleTrigger>

        <CollapsibleContent>
          <div className="px-3 pb-3 pt-0">
            <div
              className={cn(
                "p-3 rounded-08 text-sm",
                "bg-background-neutral-02 text-text-03",
                "max-h-48 overflow-y-auto",
                "italic"
              )}
            >
              <p className="whitespace-pre-wrap break-words m-0">{content}</p>
            </div>
          </div>
        </CollapsibleContent>
      </div>
    </Collapsible>
  );
}


================================================
FILE: web/src/app/craft/components/TodoListCard.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { cn } from "@/lib/utils";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import { SvgChevronDown, SvgCheckCircle } from "@opal/icons";
import {
  TodoListState,
  TodoItem,
  TodoStatus,
} from "@/app/craft/types/displayTypes";

interface TodoListCardProps {
  todoList: TodoListState;
  /** Whether this card should be open by default */
  defaultOpen?: boolean;
}

/**
 * Get status icon for a todo item
 */
function getStatusIcon(status: TodoStatus) {
  switch (status) {
    case "completed":
      return (
        <SvgCheckCircle className="size-4 stroke-status-success-05 mt-0.5 shrink-0" />
      );
    case "in_progress":
      // Gray circle with inset filled circle to indicate work in progress
      return (
        <div className="size-4 rounded-full border-2 border-text-03 mt-0.5 shrink-0 flex items-center justify-center">
          <div className="size-2 bg-text-03 rounded-full" />
        </div>
      );
    case "pending":
    default:
      return (
        <div className="size-4 rounded-full border-2 border-text-03 mt-0.5 shrink-0" />
      );
  }
}

/**
 * Single todo item row
 */
function TodoItemRow({ todo }: { todo: TodoItem }) {
  return (
    <div className="flex items-start gap-2 py-1">
      {/* Status indicator */}
      {getStatusIcon(todo.status)}

      {/* Task text - show activeForm when in_progress, otherwise content */}
      <span
        className={cn(
          "text-sm",
          todo.status === "completed"
            ? "text-text-03 line-through"
            : "text-text-04"
        )}
      >
        {todo.status === "in_progress" ? todo.activeForm : todo.content}
      </span>
    </div>
  );
}

/**
 * TodoListCard - Collapsible card showing a list of todo items
 *
 * Features:
 * - Shows progress count (e.g., "3/5 completed")
 * - Spinner in header when any item is in_progress
 * - Auto-collapses when new todo list appears (controlled by parent)
 * - Items show different states: pending (empty circle), in_progress (spinner), completed (checkmark)
 */
export default function TodoListCard({
  todoList,
  defaultOpen = true,
}: TodoListCardProps) {
  const [isOpen, setIsOpen] = useState(defaultOpen);

  // Update isOpen when defaultOpen changes (for auto-collapse behavior)
  useEffect(() => {
    setIsOpen(defaultOpen);
  }, [defaultOpen]);

  // Calculate progress stats
  const total = todoList.todos.length;
  const completed = todoList.todos.filter(
    (t) => t.status === "completed"
  ).length;

  // Determine background color based on state
  // Only two states: gray (default) and green (completed)
  const allCompleted = completed === total && total > 0;

  return (
    <Collapsible open={isOpen} onOpenChange={setIsOpen}>
      <div
        className={cn(
          "w-full border-[0.5px] rounded-lg overflow-hidden",
          allCompleted
            ? "bg-status-success-01 border-status-success-01"
            : "bg-background-neutral-01 border-border-01"
        )}
      >
        <CollapsibleTrigger asChild>
          <button
            className={cn(
              "w-full flex items-center justify-between px-3 py-2",
              "hover:bg-background-tint-02 transition-colors text-left"
            )}
          >
            <div className="flex items-center gap-2 min-w-0 flex-1">
              {/* Status indicator in header - no spinner, only static icons */}
              {allCompleted ? (
                <SvgCheckCircle className="size-4 stroke-status-success-05 shrink-0" />
              ) : (
                <div className="size-4 rounded border-2 border-text-03 shrink-0 flex items-center justify-center">
                  <div className="size-2 bg-text-03 rounded-sm" />
                </div>
              )}

              {/* Title */}
              <span className="text-sm font-medium text-text-04">Tasks</span>

              {/* Progress count */}
              <span className="text-xs text-text-03">
                {completed}/{total} completed
              </span>
            </div>

            {/* Expand arrow */}
            <SvgChevronDown
              className={cn(
                "size-4 stroke-text-03 transition-transform duration-150 shrink-0",
                !isOpen && "rotate-[-90deg]"
              )}
            />
          </button>
        </CollapsibleTrigger>

        <CollapsibleContent>
          <div className="px-3 pb-3 pt-0 space-y-0.5">
            {todoList.todos.map((todo, index) => (
              <TodoItemRow key={`${todoList.id}-${index}`} todo={todo} />
            ))}
            {todoList.todos.length === 0 && (
              <span className="text-sm text-text-03 italic">No tasks</span>
            )}
          </div>
        </CollapsibleContent>
      </div>
    </Collapsible>
  );
}


================================================
FILE: web/src/app/craft/components/ToggleWarningModal.tsx
================================================
"use client";

import Text from "@/refresh-components/texts/Text";

interface ToggleWarningModalProps {
  open: boolean;
  onConfirm: () => void;
  onCancel: () => void;
}

export function ToggleWarningModal({
  open,
  onConfirm,
  onCancel,
}: ToggleWarningModalProps) {
  if (!open) return null;

  return (
    <div className="fixed inset-0 z-[1400] flex items-center justify-center">
      {/* Backdrop */}
      <div
        className="absolute inset-0 bg-black/50 backdrop-blur-sm"
        onClick={(e) => {
          e.stopPropagation();
          onCancel();
        }}
      />

      {/* Modal */}
      <div className="relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01">
        <div className="p-6 flex flex-col gap-6">
          {/* Header */}
          <div className="flex items-center justify-center">
            <Text headingH2 text05>
              Show all models?
            </Text>
          </div>

          {/* Message */}
          <div className="flex justify-center">
            <Text mainUiBody text04 className="text-center">
              We recommend using <strong>Claude Opus 4.6</strong> for Crafting.
              <br />
              Other models may have reduced capabilities for code creation,
              <br />
              data analysis, and artifact creation.
            </Text>
          </div>

          {/* Action buttons */}
          <div className="flex items-center justify-center gap-3">
            <button
              type="button"
              onClick={(e) => {
                e.stopPropagation();
                onConfirm();
              }}
              className="px-4 py-2 rounded-12 bg-background-neutral-01 border border-border-02 hover:opacity-90 transition-colors"
            >
              <Text mainUiBody text05>
                Show All Models
              </Text>
            </button>
            <button
              type="button"
              onClick={(e) => {
                e.stopPropagation();
                onCancel();
              }}
              className="px-4 py-2 rounded-12 bg-black dark:bg-white hover:opacity-90 transition-colors"
            >
              <Text
                mainUiAction
                className="text-text-light-05 dark:text-text-dark-05"
              >
                Keep Recommended
              </Text>
            </button>
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/ToolCallPill.tsx
================================================
"use client";

import { useState } from "react";
import { cn } from "@/lib/utils";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import {
  SvgChevronDown,
  SvgTerminalSmall,
  SvgFileText,
  SvgEdit,
  SvgLoader,
  SvgCheckSquare,
  SvgAlertCircle,
  SvgBubbleText,
} from "@opal/icons";
import RawOutputBlock from "@/app/craft/components/RawOutputBlock";
import DiffView from "@/app/craft/components/DiffView";
import { ToolCallState, ToolCallKind } from "@/app/craft/types/displayTypes";

interface ToolCallPillProps {
  toolCall: ToolCallState;
}

/**
 * Get icon based on tool kind
 */
function getToolIcon(kind: ToolCallKind) {
  switch (kind) {
    case "execute":
      return SvgTerminalSmall;
    case "read":
      return SvgFileText;
    case "task":
      return SvgBubbleText;
    case "other":
      return SvgEdit;
    default:
      return SvgTerminalSmall;
  }
}

/**
 * Get status icon and color
 */
function getStatusDisplay(status: string) {
  switch (status) {
    case "pending":
      return {
        icon: null,
        iconClass: "stroke-status-info-05",
        bgClass: "bg-status-info-01 border-status-info-01",
        showSpinner: true,
      };
    case "in_progress":
      return {
        icon: null,
        iconClass: "stroke-status-info-05",
        bgClass: "bg-status-info-01 border-status-info-01",
        showSpinner: true,
      };
    case "completed":
      return {
        icon: SvgCheckSquare,
        iconClass: "stroke-status-success-05",
        bgClass: "bg-background-neutral-01 border-border-01",
        showSpinner: false,
      };
    case "failed":
      return {
        icon: SvgAlertCircle,
        iconClass: "stroke-status-error-05",
        bgClass: "bg-status-error-01 border-status-error-01",
        showSpinner: false,
      };
    default:
      return {
        icon: null,
        iconClass: "stroke-text-03",
        bgClass: "bg-background-neutral-01 border-border-01",
        showSpinner: false,
      };
  }
}

/**
 * Get language hint for syntax highlighting based on tool kind and title
 */
function getLanguageHint(toolCall: ToolCallState): string | undefined {
  // Search results (glob/grep) - no highlighting for file lists
  if (
    toolCall.title === "Searching files" ||
    toolCall.title === "Searching content" ||
    toolCall.title === "Searching"
  ) {
    return undefined;
  }

  switch (toolCall.kind) {
    case "execute":
      return "bash";
    case "task":
      return "markdown";
    case "read":
    case "other":
      // Use description (file path) for syntax detection
      return toolCall.description;
    default:
      return undefined;
  }
}

/**
 * ToolCallPill - Expandable pill for tool calls
 *
 * Shows description and command in collapsed state.
 * Expands to show raw output.
 *
 * Status icons:
 * - pending: gray circle
 * - in_progress: blue spinner
 * - completed: green checkmark
 * - failed: red X
 */
export default function ToolCallPill({ toolCall }: ToolCallPillProps) {
  const [isOpen, setIsOpen] = useState(false);

  const Icon = getToolIcon(toolCall.kind);
  const statusDisplay = getStatusDisplay(toolCall.status);
  const StatusIcon = statusDisplay.icon;

  return (
    <Collapsible open={isOpen} onOpenChange={setIsOpen}>
      <div
        className={cn(
          "w-full border-[0.5px] rounded-lg overflow-hidden transition-colors",
          "hover:bg-background-tint-02",
          statusDisplay.bgClass
        )}
      >
        <CollapsibleTrigger asChild>
          <button
            className={cn(
              "w-full flex flex-col gap-1 px-3 py-2",
              "transition-colors text-left"
            )}
          >
            {/* Top row: status icon + title + description + expand arrow */}
            <div className="flex items-center justify-between gap-2 w-full">
              <div className="flex items-center gap-2 min-w-0 flex-1">
                {/* Status indicator */}
                {statusDisplay.showSpinner ? (
                  <SvgLoader className="size-4 stroke-status-info-05 animate-spin shrink-0" />
                ) : StatusIcon ? (
                  <StatusIcon
                    className={cn("size-4 shrink-0", statusDisplay.iconClass)}
                  />
                ) : (
                  <Icon className="size-4 stroke-text-03 shrink-0" />
                )}

                {/* Title (action) */}
                <span className="text-sm font-medium text-text-04 shrink-0">
                  {toolCall.title}
                </span>

                {/* Description (target) */}
                {toolCall.description && (
                  <span className="text-sm text-text-03 truncate">
                    {toolCall.description}
                  </span>
                )}
              </div>

              {/* Expand arrow */}
              <SvgChevronDown
                className={cn(
                  "size-4 stroke-text-03 transition-transform duration-150 shrink-0",
                  !isOpen && "rotate-[-90deg]"
                )}
              />
            </div>

            {/* Bottom row: command in monospace (for execute tools) */}
            {toolCall.kind === "execute" && toolCall.command && (
              <div
                className="text-xs text-text-03 truncate pl-6"
                style={{ fontFamily: "var(--font-dm-mono)" }}
              >
                {toolCall.command}
              </div>
            )}
          </button>
        </CollapsibleTrigger>

        <CollapsibleContent>
          <div className="px-3 pb-3 pt-0">
            {/* Show diff view for edit operations (not new files) */}
            {toolCall.title === "Editing file" &&
            toolCall.oldContent !== undefined &&
            toolCall.newContent !== undefined ? (
              <DiffView
                oldContent={toolCall.oldContent}
                newContent={toolCall.newContent}
                maxHeight="300px"
                filePath={toolCall.description}
              />
            ) : (
              <RawOutputBlock
                content={toolCall.rawOutput}
                maxHeight="300px"
                language={getLanguageHint(toolCall)}
              />
            )}
          </div>
        </CollapsibleContent>
      </div>
    </Collapsible>
  );
}


================================================
FILE: web/src/app/craft/components/TypewriterText.tsx
================================================
"use client";

import { useState, useEffect, useRef, memo } from "react";

interface TypewriterTextProps {
  /** The text to display with typewriter animation */
  text: string;
  /** Speed of each character animation in ms (default: 30) */
  charSpeed?: number;
  /** Whether to animate on initial render (default: false) */
  animateOnMount?: boolean;
  /** Class name for the text container */
  className?: string;
  /** Callback when animation completes */
  onAnimationComplete?: () => void;
}

/**
 * TypewriterText - Animates text changes with a delete-then-type effect.
 *
 * When text changes:
 * 1. Old text is deleted character by character (from end to start)
 * 2. New text is typed character by character (from start to end)
 *
 * This creates a smooth "rename" animation effect for session titles.
 */
function TypewriterText({
  text,
  charSpeed = 30,
  animateOnMount = false,
  className = "",
  onAnimationComplete,
}: TypewriterTextProps) {
  // Track the currently displayed text
  const [displayedText, setDisplayedText] = useState(
    animateOnMount ? "" : text
  );
  // Track whether we're in the "deleting" or "typing" phase
  const [isDeleting, setIsDeleting] = useState(false);
  // Store the target text we're animating towards
  const targetTextRef = useRef(text);
  // Store the previous text for comparison
  // When animateOnMount is true, initialize to empty so animation triggers
  const prevTextRef = useRef(animateOnMount ? "" : text);
  // Track if this is the first render
  const isFirstRender = useRef(true);
  // Animation frame ID for cleanup
  const animationRef = useRef<NodeJS.Timeout | null>(null);

  useEffect(() => {
    // Skip animation on first render unless animateOnMount is true
    if (isFirstRender.current) {
      isFirstRender.current = false;
      if (!animateOnMount) {
        setDisplayedText(text);
        prevTextRef.current = text;
        targetTextRef.current = text;
        return;
      }
      // When animateOnMount is true, we want to animate from empty to text
      // So we skip the delete phase and go straight to typing
      // Set prevTextRef to empty so subsequent renders don't trigger delete phase
      prevTextRef.current = "";
      targetTextRef.current = text;
      setIsDeleting(false); // Start in typing phase
      return;
    }

    // If text hasn't changed, no animation needed
    if (text === prevTextRef.current) {
      return;
    }

    // If we're currently animating from empty (animateOnMount case), don't restart
    // This happens when prevTextRef is "" (from animateOnMount) and we're typing
    if (
      prevTextRef.current === "" &&
      displayedText.length < targetTextRef.current.length &&
      !isDeleting
    ) {
      // We're in the middle of typing from animateOnMount, don't interrupt
      return;
    }

    // Clear any existing animation
    if (animationRef.current) {
      clearTimeout(animationRef.current);
    }

    // Update target and start deleting phase
    targetTextRef.current = text;
    setIsDeleting(true);

    return () => {
      if (animationRef.current) {
        clearTimeout(animationRef.current);
      }
    };
  }, [text, animateOnMount]);

  useEffect(() => {
    // Handle the animation loop
    if (isDeleting) {
      // Deleting phase: remove characters from the end
      if (displayedText.length > 0) {
        animationRef.current = setTimeout(() => {
          setDisplayedText((prev) => prev.slice(0, -1));
        }, charSpeed);
      } else {
        // Done deleting, switch to typing phase
        setIsDeleting(false);
        prevTextRef.current = targetTextRef.current;
      }
    } else {
      // Typing phase: add characters from the target
      const target = targetTextRef.current;
      if (displayedText.length < target.length) {
        animationRef.current = setTimeout(() => {
          setDisplayedText(target.slice(0, displayedText.length + 1));
        }, charSpeed);
      } else if (
        displayedText.length === target.length &&
        displayedText === target
      ) {
        // Animation complete - update prevTextRef so future changes are detected
        prevTextRef.current = target;
        onAnimationComplete?.();
      }
    }

    return () => {
      if (animationRef.current) {
        clearTimeout(animationRef.current);
      }
    };
  }, [displayedText, isDeleting, charSpeed, onAnimationComplete]);

  return <span className={className}>{displayedText}</span>;
}

export default memo(TypewriterText);


================================================
FILE: web/src/app/craft/components/UpgradePlanModal.tsx
================================================
"use client";

import Text from "@/refresh-components/texts/Text";
import { SvgAlertTriangle } from "@opal/icons";
import { UsageLimits } from "@/app/craft/types/streamingTypes";

interface UpgradePlanModalProps {
  open: boolean;
  onClose: () => void;
  limits: UsageLimits | null;
}

/**
 * Modal shown when users hit their message limit.
 * Shows different messaging for free (total limit) vs paid (weekly limit) users.
 */
export default function UpgradePlanModal({
  open,
  onClose,
  limits,
}: UpgradePlanModalProps) {
  if (!open) return null;

  const isPaidUser = limits?.limitType === "weekly";

  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center">
      <div
        className="absolute inset-0 bg-black/50 backdrop-blur-sm"
        onClick={onClose}
      />

      <div className="relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01">
        <div className="p-6 flex flex-col gap-6 min-h-[300px]">
          <div className="flex-1 flex flex-col items-center justify-center gap-6">
            <SvgAlertTriangle className="w-16 h-16 text-status-warning-02" />

            <div className="flex flex-col items-center gap-2 text-center">
              <Text headingH2 text05>
                You've reached your message limit
              </Text>
              <Text mainUiBody text03 className="max-w-sm">
                {isPaidUser ? (
                  <>
                    You've used all {limits?.limit ?? 25} messages for this
                    week. Your message limit will automatically reset at the
                    start of each week, allowing you to continue crafting with
                    Onyx.
                  </>
                ) : (
                  <>
                    You've used all {limits?.limit ?? 5} free messages available
                    in your trial. You've reached the limit for your free
                    account.
                  </>
                )}
              </Text>
            </div>
          </div>

          <div className="flex justify-center pt-2">
            <button
              type="button"
              onClick={onClose}
              className="flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors"
            >
              <Text mainUiAction>Got it</Text>
            </button>
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/UserMessage.tsx
================================================
"use client";

import Text from "@/refresh-components/texts/Text";

interface UserMessageProps {
  content: string;
}

export default function UserMessage({ content }: UserMessageProps) {
  return (
    <div className="flex justify-end py-4">
      <div className="max-w-[80%] whitespace-break-spaces rounded-t-16 rounded-bl-16 bg-background-tint-02 py-3 px-4">
        <Text as="p" mainContentBody>
          {content}
        </Text>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/WorkingLine.tsx
================================================
"use client";

import { useState } from "react";
import { cn } from "@/lib/utils";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import {
  SvgChevronDown,
  SvgTerminalSmall,
  SvgFileText,
  SvgEdit,
  SvgLoader,
  SvgCheckSquare,
  SvgAlertCircle,
  SvgSearch,
} from "@opal/icons";
import RawOutputBlock from "@/app/craft/components/RawOutputBlock";
import DiffView from "@/app/craft/components/DiffView";
import { ToolCallState, ToolCallKind } from "@/app/craft/types/displayTypes";

interface WorkingLineProps {
  toolCall: ToolCallState;
}

/**
 * Get icon based on tool kind
 */
function getToolIcon(kind: ToolCallKind) {
  switch (kind) {
    case "search":
      return SvgSearch;
    case "execute":
      return SvgTerminalSmall;
    case "read":
      return SvgFileText;
    case "edit":
      return SvgEdit;
    case "other":
      return SvgEdit;
    default:
      return SvgTerminalSmall;
  }
}

/**
 * Get status icon and styling
 */
function getStatusDisplay(status: string) {
  switch (status) {
    case "pending":
    case "in_progress":
      return {
        icon: SvgLoader,
        iconClass: "stroke-status-info-05 animate-spin",
      };
    case "completed":
      return {
        icon: SvgCheckSquare,
        iconClass: "stroke-status-success-05",
      };
    case "failed":
      return {
        icon: SvgAlertCircle,
        iconClass: "stroke-status-error-05",
      };
    default:
      return {
        icon: null,
        iconClass: "stroke-text-03",
      };
  }
}

/**
 * Get language hint for syntax highlighting
 */
function getLanguageHint(toolCall: ToolCallState): string | undefined {
  switch (toolCall.kind) {
    case "search":
      // Search results - no highlighting for file lists
      return undefined;
    case "execute":
      return "bash";
    case "read":
    case "edit":
    case "other":
      // Use description (file path) for syntax detection
      return toolCall.description;
    default:
      return undefined;
  }
}

/**
 * WorkingLine - A single expandable line within the Working pill.
 *
 * Shows: [status icon] [action text] [expand arrow]
 * Expands to show detailed content (diff view or raw output)
 */
export default function WorkingLine({ toolCall }: WorkingLineProps) {
  const [isOpen, setIsOpen] = useState(false);

  const statusDisplay = getStatusDisplay(toolCall.status);
  const StatusIcon = statusDisplay.icon;
  const ToolIcon = getToolIcon(toolCall.kind);

  return (
    <Collapsible open={isOpen} onOpenChange={setIsOpen}>
      <div className="rounded-md overflow-hidden">
        <CollapsibleTrigger asChild>
          <button
            className={cn(
              "w-full flex gap-2 py-1.5 pl-2 pr-4 rounded-md",
              "hover:bg-background-tint-02 transition-colors text-left",
              "items-start"
            )}
          >
            {/* Status indicator */}
            {StatusIcon ? (
              <StatusIcon
                className={cn(
                  "size-3.5 shrink-0 mt-0.5",
                  statusDisplay.iconClass
                )}
              />
            ) : (
              <ToolIcon
                className={cn("size-3.5 stroke-text-03 shrink-0 mt-0.5")}
              />
            )}

            {/* Action text */}
            <span className="text-sm flex-1 min-w-0 text-left">
              <span
                className={cn(
                  "block",
                  isOpen
                    ? toolCall.kind === "execute"
                      ? "break-words whitespace-pre-wrap"
                      : "break-words whitespace-normal"
                    : "truncate"
                )}
              >
                {toolCall.kind === "execute" && toolCall.description ? (
                  <>
                    {/* For execute: show description as primary, command as secondary */}
                    <span className="text-text-04">
                      {toolCall.description.charAt(0).toUpperCase() +
                        toolCall.description.slice(1)}
                    </span>
                    {toolCall.command && (
                      <span className="text-text-02"> {toolCall.command}</span>
                    )}
                  </>
                ) : (
                  <span className="text-text-04">
                    {toolCall.title}
                    {toolCall.description &&
                      toolCall.description !== toolCall.title && (
                        <>
                          {" "}
                          <span className="text-text-02">
                            {toolCall.description}
                          </span>
                        </>
                      )}
                  </span>
                )}
              </span>
            </span>

            {/* Expand arrow */}
            <SvgChevronDown
              className={cn(
                "size-3.5 stroke-text-03 transition-transform duration-150 shrink-0 mt-0.5",
                !isOpen && "rotate-[-90deg]"
              )}
            />
          </button>
        </CollapsibleTrigger>

        <CollapsibleContent>
          <div className="pl-6 pr-2 pb-2">
            {/* Show diff view for edit operations with actual diff data */}
            {toolCall.kind === "edit" &&
            !toolCall.isNewFile &&
            toolCall.oldContent &&
            toolCall.newContent ? (
              <DiffView
                oldContent={toolCall.oldContent}
                newContent={toolCall.newContent}
                maxHeight="200px"
                filePath={toolCall.description}
              />
            ) : (
              <RawOutputBlock
                content={toolCall.rawOutput}
                maxHeight="200px"
                language={getLanguageHint(toolCall)}
              />
            )}
          </div>
        </CollapsibleContent>
      </div>
    </Collapsible>
  );
}


================================================
FILE: web/src/app/craft/components/WorkingPill.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { cn } from "@/lib/utils";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import { SvgChevronDown, SvgPencilRuler } from "@opal/icons";
import { ToolCallState } from "@/app/craft/types/displayTypes";
import WorkingLine from "@/app/craft/components/WorkingLine";

interface WorkingPillProps {
  toolCalls: ToolCallState[];
  /** Whether this is the latest/active working group - auto-collapses when false */
  isLatest?: boolean;
}

/**
 * WorkingPill - Consolidates multiple tool calls into a single expandable container.
 *
 * Features:
 * - Auto-expanded by default when isLatest
 * - Auto-collapses when a newer Working pill appears (isLatest becomes false)
 * - Each action renders as an expandable WorkingLine
 */
export default function WorkingPill({
  toolCalls,
  isLatest = true,
}: WorkingPillProps) {
  const [isOpen, setIsOpen] = useState(true); // Auto-expanded by default

  // Auto-collapse when this is no longer the latest working group
  useEffect(() => {
    if (!isLatest) {
      setIsOpen(false);
    }
  }, [isLatest]);

  // Check if any tool is in progress (for background color)
  const hasInProgress = toolCalls.some(
    (tc) => tc.status === "pending" || tc.status === "in_progress"
  );

  return (
    <Collapsible open={isOpen} onOpenChange={setIsOpen}>
      <div
        className={cn(
          "w-full border-[0.5px] rounded-lg overflow-hidden transition-colors",
          hasInProgress
            ? "bg-status-info-01 border-status-info-01"
            : "bg-background-neutral-01 border-border-01"
        )}
      >
        <CollapsibleTrigger asChild>
          <button
            className={cn(
              "w-full flex items-center justify-between gap-2 px-3 py-2",
              "transition-colors text-left rounded-t-lg",
              "hover:bg-background-tint-02"
            )}
          >
            <div className="flex items-center gap-2 min-w-0 flex-1">
              {/* Static icon */}
              <SvgPencilRuler className="size-4 stroke-text-03 shrink-0" />

              {/* Title */}
              <span className="text-sm font-medium text-text-04">Working</span>
            </div>

            {/* Expand arrow */}
            <SvgChevronDown
              className={cn(
                "size-4 stroke-text-03 transition-transform duration-150 shrink-0",
                !isOpen && "rotate-[-90deg]"
              )}
            />
          </button>
        </CollapsibleTrigger>

        <CollapsibleContent>
          <div className="pl-5 pr-3 pb-3 pt-0 space-y-1">
            {toolCalls.map((toolCall) => (
              <WorkingLine key={toolCall.id} toolCall={toolCall} />
            ))}
          </div>
        </CollapsibleContent>
      </div>
    </Collapsible>
  );
}


================================================
FILE: web/src/app/craft/components/output-panel/ArtifactsTab.tsx
================================================
"use client";

import { useCallback, useEffect, useState } from "react";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import {
  SvgGlobe,
  SvgDownloadCloud,
  SvgFolder,
  SvgFiles,
  SvgChevronDown,
  SvgChevronRight,
} from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import { Artifact } from "@/app/craft/hooks/useBuildSessionStore";
import { useFilesNeedsRefresh } from "@/app/craft/hooks/useBuildSessionStore";
import {
  fetchDirectoryListing,
  downloadArtifactFile,
  downloadDirectory,
} from "@/app/craft/services/apiServices";
import { FileSystemEntry } from "@/app/craft/types/streamingTypes";
import { getFileIcon } from "@/lib/utils";
import { cn } from "@/lib/utils";

interface ArtifactsTabProps {
  artifacts: Artifact[];
  sessionId: string | null;
}

export default function ArtifactsTab({
  artifacts,
  sessionId,
}: ArtifactsTabProps) {
  const webappArtifacts = artifacts.filter(
    (a) => a.type === "nextjs_app" || a.type === "web_app"
  );

  const filesNeedsRefresh = useFilesNeedsRefresh();
  const { data: outputsListing } = useSWR(
    sessionId
      ? [SWR_KEYS.buildSessionOutputFiles(sessionId), filesNeedsRefresh]
      : null,
    () => (sessionId ? fetchDirectoryListing(sessionId, "outputs") : null),
    {
      revalidateOnFocus: false,
      dedupingInterval: 2000,
    }
  );

  // Filter out "web" directory (shown as webapp artifact)
  const rawEntries = (outputsListing?.entries ?? []).filter(
    (entry) => entry.name !== "web"
  );

  // Filter out empty directories
  const [outputEntries, setOutputEntries] = useState<FileSystemEntry[]>([]);

  useEffect(() => {
    if (!sessionId || rawEntries.length === 0) {
      setOutputEntries([]);
      return;
    }

    let cancelled = false;

    async function filterEmptyDirs() {
      const results = await Promise.all(
        rawEntries.map(async (entry) => {
          if (!entry.is_directory) return entry;
          try {
            const listing = await fetchDirectoryListing(sessionId!, entry.path);
            if (listing && listing.entries.length > 0) return entry;
          } catch {
            return entry;
          }
          return null;
        })
      );
      if (!cancelled) {
        setOutputEntries(
          results.filter((e): e is FileSystemEntry => e !== null)
        );
      }
    }

    filterEmptyDirs();
    return () => {
      cancelled = true;
    };
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [sessionId, JSON.stringify(rawEntries.map((e) => e.path))]);

  const handleWebappDownload = () => {
    if (!sessionId) return;
    const link = document.createElement("a");
    link.href = `/api/build/sessions/${sessionId}/webapp-download`;
    link.download = "";
    document.body.appendChild(link);
    link.click();
    document.body.removeChild(link);
  };

  const handleOutputDownload = useCallback(
    (path: string, isDirectory: boolean) => {
      if (!sessionId) return;
      if (isDirectory) {
        downloadDirectory(sessionId, path);
      } else {
        downloadArtifactFile(sessionId, path);
      }
    },
    [sessionId]
  );

  const hasWebapps = webappArtifacts.length > 0;
  const hasOutputFiles = outputEntries.length > 0;

  if (!sessionId || (!hasWebapps && !hasOutputFiles)) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <SvgFiles size={48} className="stroke-text-02" />
        <Text headingH3 text03>
          No artifacts yet
        </Text>
        <Text secondaryBody text02>
          Output files and web apps will appear here
        </Text>
      </Section>
    );
  }

  return (
    <div className="flex flex-col h-full">
      <div className="flex-1 overflow-auto overlay-scrollbar">
        <div className="divide-y divide-border-01">
          {/* Webapp Artifacts */}
          {webappArtifacts.map((artifact) => (
            <div
              key={artifact.id}
              className="flex items-center gap-3 p-3 hover:bg-background-tint-01 transition-colors"
            >
              <SvgGlobe size={24} className="stroke-text-02 flex-shrink-0" />

              <div className="flex-1 min-w-0 flex items-center gap-2">
                <Text secondaryBody text04 className="truncate">
                  {artifact.name}
                </Text>
                <Text secondaryBody text02>
                  Next.js Application
                </Text>
              </div>

              <div className="flex items-center gap-2">
                <Button
                  variant="action"
                  prominence="tertiary"
                  icon={SvgDownloadCloud}
                  onClick={handleWebappDownload}
                >
                  Download
                </Button>
              </div>
            </div>
          ))}

          {/* Output Files & Folders */}
          {outputEntries.map((entry) => (
            <OutputEntryRow
              key={entry.path}
              entry={entry}
              sessionId={sessionId!}
              depth={0}
              onDownload={handleOutputDownload}
            />
          ))}
        </div>
      </div>
    </div>
  );
}

interface OutputEntryRowProps {
  entry: FileSystemEntry;
  sessionId: string;
  depth: number;
  onDownload: (path: string, isDirectory: boolean) => void;
}

function OutputEntryRow({
  entry,
  sessionId,
  depth,
  onDownload,
}: OutputEntryRowProps) {
  const [expanded, setExpanded] = useState(false);
  const [children, setChildren] = useState<FileSystemEntry[]>([]);
  const [loaded, setLoaded] = useState(false);

  const toggleExpand = useCallback(async () => {
    if (!entry.is_directory) return;

    if (!loaded) {
      const listing = await fetchDirectoryListing(sessionId, entry.path);
      if (listing) {
        setChildren(listing.entries);
      }
      setLoaded(true);
    }
    setExpanded((prev) => !prev);
  }, [entry.is_directory, entry.path, sessionId, loaded]);

  const FileIcon = entry.is_directory ? SvgFolder : getFileIcon(entry.name);
  const paddingLeft = depth * 20;

  return (
    <>
      <div
        className={cn(
          "flex items-center gap-3 p-3 hover:bg-background-tint-01 transition-colors",
          entry.is_directory && "cursor-pointer"
        )}
        style={{ paddingLeft: 12 + paddingLeft }}
        onClick={entry.is_directory ? toggleExpand : undefined}
      >
        {entry.is_directory ? (
          expanded ? (
            <SvgChevronDown
              size={16}
              className="stroke-text-03 flex-shrink-0"
            />
          ) : (
            <SvgChevronRight
              size={16}
              className="stroke-text-03 flex-shrink-0"
            />
          )
        ) : (
          <div className="w-4 flex-shrink-0" />
        )}

        <FileIcon size={20} className="stroke-text-02 flex-shrink-0" />

        <div className="flex-1 min-w-0 flex items-center gap-2">
          <Text secondaryBody text04 className="truncate">
            {entry.name}
          </Text>
          {!entry.is_directory && entry.size !== null ? (
            <Text secondaryBody text02>
              {formatFileSize(entry.size)}
            </Text>
          ) : null}
        </div>

        <div className="flex items-center gap-2">
          <Button
            variant="action"
            prominence="tertiary"
            icon={SvgDownloadCloud}
            onClick={(e) => {
              e.stopPropagation();
              onDownload(entry.path, entry.is_directory);
            }}
          >
            Download
          </Button>
        </div>
      </div>

      {expanded &&
        children.map((child) => (
          <OutputEntryRow
            key={child.path}
            entry={child}
            sessionId={sessionId}
            depth={depth + 1}
            onDownload={onDownload}
          />
        ))}
    </>
  );
}

function formatFileSize(bytes: number): string {
  if (bytes < 1024) return `${bytes} B`;
  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}


================================================
FILE: web/src/app/craft/components/output-panel/FilePreviewContent.tsx
================================================
"use client";

import { useEffect } from "react";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { fetchFileContent } from "@/app/craft/services/apiServices";
import Text from "@/refresh-components/texts/Text";
import { SvgFileText } from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import ImagePreview from "@/app/craft/components/output-panel/ImagePreview";
import MarkdownFilePreview, {
  type FileRendererProps,
} from "@/app/craft/components/output-panel/MarkdownFilePreview";
import PptxPreview from "@/app/craft/components/output-panel/PptxPreview";
import PdfPreview from "@/app/craft/components/output-panel/PdfPreview";

// ── Preview registry ─────────────────────────────────────────────────────
// Unified registry for all file preview types. First match wins.
//
// "standalone" — binary formats that handle their own data fetching.
// "content"    — text-based formats that receive already-fetched content.

interface StandaloneEntry {
  type: "standalone";
  matches: (filePath: string) => boolean;
  component: React.FC<{
    sessionId: string;
    filePath: string;
    refreshKey?: number;
  }>;
}

interface ContentEntry {
  type: "content";
  matches: (filePath: string, mimeType: string, isImage: boolean) => boolean;
  component: React.FC<FileRendererProps>;
}

type PreviewEntry = StandaloneEntry | ContentEntry;

function ImageRendererWrapper({ content, fileName }: FileRendererProps) {
  return <ImagePreview src={content} fileName={fileName} />;
}

const PREVIEW_REGISTRY: PreviewEntry[] = [
  {
    type: "standalone",
    matches: (path) => /\.pptx$/i.test(path),
    component: PptxPreview,
  },
  {
    type: "standalone",
    matches: (path) => /\.pdf$/i.test(path),
    component: PdfPreview,
  },
  {
    type: "content",
    matches: (_, __, isImage) => isImage,
    component: ImageRendererWrapper,
  },
  {
    type: "content",
    matches: (path) => /\.md$/i.test(path),
    component: MarkdownFilePreview,
  },
];

function findStandalonePreview(filePath: string): StandaloneEntry | undefined {
  return PREVIEW_REGISTRY.find(
    (e): e is StandaloneEntry => e.type === "standalone" && e.matches(filePath)
  );
}

function findContentPreview(
  filePath: string,
  mimeType: string,
  isImage: boolean
): ContentEntry | undefined {
  return PREVIEW_REGISTRY.find(
    (e): e is ContentEntry =>
      e.type === "content" && e.matches(filePath, mimeType, isImage)
  );
}

// ── Public components ────────────────────────────────────────────────────

interface FilePreviewContentProps {
  sessionId: string;
  filePath: string;
  /** Changing this value forces the preview to reload its data */
  refreshKey?: number;
}

/**
 * FilePreviewContent — full-height file preview for the main output panel.
 * Routes to the appropriate preview component based on file type.
 */
export function FilePreviewContent({
  sessionId,
  filePath,
  refreshKey,
}: FilePreviewContentProps) {
  const standalone = findStandalonePreview(filePath);
  if (standalone) {
    const Comp = standalone.component;
    return (
      <Comp sessionId={sessionId} filePath={filePath} refreshKey={refreshKey} />
    );
  }

  return (
    <FetchedFilePreview
      sessionId={sessionId}
      filePath={filePath}
      fullHeight
      refreshKey={refreshKey}
    />
  );
}

/**
 * InlineFilePreview — compact file preview for pre-provisioned mode.
 * Same routing logic, without full-height layout.
 */
export function InlineFilePreview({
  sessionId,
  filePath,
}: FilePreviewContentProps) {
  const standalone = findStandalonePreview(filePath);
  if (standalone) {
    const Comp = standalone.component;
    return <Comp sessionId={sessionId} filePath={filePath} />;
  }

  return <FetchedFilePreview sessionId={sessionId} filePath={filePath} />;
}

// ── FetchedFilePreview (inner) ───────────────────────────────────────────

interface FetchedFilePreviewProps {
  sessionId: string;
  filePath: string;
  fullHeight?: boolean;
  refreshKey?: number;
}

/**
 * Fetches file content via SWR, then delegates to the first matching
 * "content" entry in the registry (or falls back to raw monospace text).
 */
function FetchedFilePreview({
  sessionId,
  filePath,
  fullHeight,
  refreshKey,
}: FetchedFilePreviewProps) {
  const { data, error, isLoading, mutate } = useSWR(
    SWR_KEYS.buildSessionArtifactFile(sessionId, filePath),
    () => fetchFileContent(sessionId, filePath),
    {
      revalidateOnFocus: false,
      dedupingInterval: 5000,
    }
  );

  // Re-fetch when refreshKey changes
  useEffect(() => {
    if (refreshKey && refreshKey > 0) {
      mutate();
    }
  }, [refreshKey, mutate]);

  if (isLoading) {
    if (fullHeight) {
      return (
        <Section
          height="full"
          alignItems="center"
          justifyContent="center"
          padding={2}
        >
          <Text secondaryBody text03>
            Loading file...
          </Text>
        </Section>
      );
    }
    return (
      <div className="p-4">
        <Text secondaryBody text03>
          Loading file...
        </Text>
      </div>
    );
  }

  if (error) {
    if (fullHeight) {
      return (
        <Section
          height="full"
          alignItems="center"
          justifyContent="center"
          padding={2}
        >
          <SvgFileText size={48} className="stroke-text-02" />
          <Text headingH3 text03>
            Error loading file
          </Text>
          <Text secondaryBody text02>
            {error.message}
          </Text>
        </Section>
      );
    }
    return (
      <div className="p-4">
        <Text secondaryBody text02>
          Error: {error.message}
        </Text>
      </div>
    );
  }

  if (!data) {
    if (fullHeight) {
      return (
        <Section
          height="full"
          alignItems="center"
          justifyContent="center"
          padding={2}
        >
          <Text secondaryBody text03>
            No content
          </Text>
        </Section>
      );
    }
    return (
      <div className="p-4">
        <Text secondaryBody text03>
          No content
        </Text>
      </div>
    );
  }

  if (data.error) {
    if (fullHeight) {
      return (
        <Section
          height="full"
          alignItems="center"
          justifyContent="center"
          padding={2}
        >
          <SvgFileText size={48} className="stroke-text-02" />
          <Text headingH3 text03>
            Cannot preview file
          </Text>
          <Text secondaryBody text02 className="text-center max-w-md">
            {data.error}
          </Text>
        </Section>
      );
    }
    return (
      <div className="p-4">
        <Text secondaryBody text02 className="text-center">
          {data.error}
        </Text>
      </div>
    );
  }

  // Match against content-based renderers
  const fileName = filePath.split("/").pop() || filePath;
  const mimeType = data.mimeType ?? "text/plain";
  const isImage = !!data.isImage;

  const contentPreview = findContentPreview(filePath, mimeType, isImage);
  if (contentPreview) {
    const Comp = contentPreview.component;
    return (
      <Comp
        content={data.content}
        fileName={fileName}
        filePath={filePath}
        mimeType={mimeType}
        isImage={isImage}
      />
    );
  }

  // Default fallback: raw text
  if (fullHeight) {
    return (
      <div className="h-full flex flex-col">
        <div className="flex-1 overflow-auto p-4">
          <pre className="font-mono text-sm text-text-04 whitespace-pre-wrap break-words">
            {data.content}
          </pre>
        </div>
      </div>
    );
  }

  return (
    <div className="p-4">
      <pre className="font-mono text-sm text-text-04 whitespace-pre-wrap break-words">
        {data.content}
      </pre>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/output-panel/FilesTab.tsx
================================================
"use client";

import { useState, useEffect, useMemo, useRef, useCallback } from "react";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import {
  useBuildSessionStore,
  useFilesTabState,
  useFilesNeedsRefresh,
} from "@/app/craft/hooks/useBuildSessionStore";
import { fetchDirectoryListing } from "@/app/craft/services/apiServices";
import { FileSystemEntry } from "@/app/craft/types/streamingTypes";
import { cn, getFileIcon } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import {
  SvgHardDrive,
  SvgFolder,
  SvgFolderOpen,
  SvgChevronRight,
  SvgArrowLeft,
  SvgImage,
  SvgFileText,
} from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import { InlineFilePreview } from "@/app/craft/components/output-panel/FilePreviewContent";

interface FilesTabProps {
  sessionId: string | null;
  onFileClick?: (path: string, fileName: string) => void;
  /** True when showing pre-provisioned sandbox (read-only, no file clicks) */
  isPreProvisioned?: boolean;
  /** True when sandbox is still being provisioned */
  isProvisioning?: boolean;
}

export default function FilesTab({
  sessionId,
  onFileClick,
  isPreProvisioned = false,
  isProvisioning = false,
}: FilesTabProps) {
  // Get persisted state from store (only used when not pre-provisioned)
  const filesTabState = useFilesTabState();
  const updateFilesTabState = useBuildSessionStore(
    (state) => state.updateFilesTabState
  );

  // Local state for pre-provisioned mode (no persistence needed)
  const [localExpandedPaths, setLocalExpandedPaths] = useState<Set<string>>(
    new Set()
  );
  const [localDirectoryCache, setLocalDirectoryCache] = useState<
    Map<string, FileSystemEntry[]>
  >(new Map());
  const [previewingFile, setPreviewingFile] = useState<{
    path: string;
    fileName: string;
    mimeType: string | null;
  } | null>(null);

  // Use local state for pre-provisioned, store state otherwise
  const expandedPaths = useMemo(
    () =>
      isPreProvisioned
        ? localExpandedPaths
        : new Set(filesTabState.expandedPaths),
    [isPreProvisioned, localExpandedPaths, filesTabState.expandedPaths]
  );

  const directoryCache = useMemo(
    () =>
      isPreProvisioned
        ? localDirectoryCache
        : (new Map(Object.entries(filesTabState.directoryCache)) as Map<
            string,
            FileSystemEntry[]
          >),
    [isPreProvisioned, localDirectoryCache, filesTabState.directoryCache]
  );

  // Scroll container ref for position tracking
  const scrollContainerRef = useRef<HTMLDivElement>(null);

  // Fetch root directory
  const {
    data: rootListing,
    error,
    mutate,
  } = useSWR(
    sessionId ? SWR_KEYS.buildSessionFiles(sessionId) : null,
    () => (sessionId ? fetchDirectoryListing(sessionId, "") : null),
    {
      revalidateOnFocus: false,
      dedupingInterval: 2000,
    }
  );

  // Refresh files list when outputs/ directory changes
  const filesNeedsRefresh = useFilesNeedsRefresh();

  // Snapshot of currently expanded paths — avoids putting both local and store
  // versions in the dependency array (only one is used per mode).
  const currentExpandedPaths = isPreProvisioned
    ? Array.from(localExpandedPaths)
    : filesTabState.expandedPaths;

  useEffect(() => {
    if (filesNeedsRefresh > 0 && sessionId && mutate) {
      // Clear directory cache to ensure all directories are refreshed
      if (isPreProvisioned) {
        setLocalDirectoryCache(new Map());
      } else {
        updateFilesTabState(sessionId, { directoryCache: {} });
      }
      // Refresh root directory listing
      mutate();

      // Re-fetch all currently expanded subdirectories so they don't get
      // stuck on "Loading..." after the cache was cleared
      if (currentExpandedPaths.length > 0) {
        Promise.allSettled(
          currentExpandedPaths.map((p) => fetchDirectoryListing(sessionId, p))
        ).then((settled) => {
          // Collect only the successful fetches into a path → entries map
          const fetched = new Map<string, FileSystemEntry[]>();
          settled.forEach((r, i) => {
            const p = currentExpandedPaths[i];
            if (p && r.status === "fulfilled" && r.value) {
              fetched.set(p, r.value.entries);
            }
          });

          if (isPreProvisioned) {
            setLocalDirectoryCache((prev) => {
              const next = new Map(prev);
              fetched.forEach((entries, p) => next.set(p, entries));
              return next;
            });
          } else {
            const obj: Record<string, FileSystemEntry[]> = {};
            fetched.forEach((entries, p) => {
              obj[p] = entries;
            });
            updateFilesTabState(sessionId, { directoryCache: obj });
          }
        });
      }
    }
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [
    filesNeedsRefresh,
    sessionId,
    mutate,
    isPreProvisioned,
    updateFilesTabState,
  ]);

  // Update cache when root listing changes
  useEffect(() => {
    if (rootListing && sessionId) {
      if (isPreProvisioned) {
        setLocalDirectoryCache((prev) => {
          const newCache = new Map(prev);
          newCache.set("", rootListing.entries);
          return newCache;
        });
      } else {
        const newCache = {
          ...filesTabState.directoryCache,
          "": rootListing.entries,
        };
        updateFilesTabState(sessionId, { directoryCache: newCache });
      }
    }
  }, [rootListing, sessionId, isPreProvisioned]);

  const toggleFolder = useCallback(
    async (path: string) => {
      if (!sessionId) return;

      if (isPreProvisioned) {
        // Use local state for pre-provisioned mode
        const newExpanded = new Set(localExpandedPaths);
        if (newExpanded.has(path)) {
          newExpanded.delete(path);
          setLocalExpandedPaths(newExpanded);
        } else {
          newExpanded.add(path);
          if (!localDirectoryCache.has(path)) {
            const listing = await fetchDirectoryListing(sessionId, path);
            if (listing) {
              setLocalDirectoryCache((prev) => {
                const newCache = new Map(prev);
                newCache.set(path, listing.entries);
                return newCache;
              });
            }
          }
          setLocalExpandedPaths(newExpanded);
        }
      } else {
        // Use store state for active sessions
        const newExpanded = new Set(expandedPaths);
        if (newExpanded.has(path)) {
          newExpanded.delete(path);
          updateFilesTabState(sessionId, {
            expandedPaths: Array.from(newExpanded),
          });
        } else {
          newExpanded.add(path);
          if (!directoryCache.has(path)) {
            const listing = await fetchDirectoryListing(sessionId, path);
            if (listing) {
              const newCache = {
                ...filesTabState.directoryCache,
                [path]: listing.entries,
              };
              updateFilesTabState(sessionId, {
                expandedPaths: Array.from(newExpanded),
                directoryCache: newCache,
              });
              return;
            }
          }
          updateFilesTabState(sessionId, {
            expandedPaths: Array.from(newExpanded),
          });
        }
      }
    },
    [
      sessionId,
      isPreProvisioned,
      localExpandedPaths,
      localDirectoryCache,
      expandedPaths,
      directoryCache,
      filesTabState.directoryCache,
      updateFilesTabState,
    ]
  );

  // Handle file click for pre-provisioned mode (inline preview)
  const handleLocalFileClick = useCallback(
    (path: string, fileName: string, mimeType: string | null) => {
      if (isPreProvisioned) {
        setPreviewingFile({ path, fileName, mimeType });
      } else if (onFileClick) {
        onFileClick(path, fileName);
      }
    },
    [isPreProvisioned, onFileClick]
  );

  // Restore scroll position when component mounts or tab becomes active
  useEffect(() => {
    if (
      scrollContainerRef.current &&
      filesTabState.scrollTop > 0 &&
      !isPreProvisioned
    ) {
      scrollContainerRef.current.scrollTop = filesTabState.scrollTop;
    }
  }, []); // Only on mount

  // Save scroll position on scroll (debounced via passive listener)
  const handleScroll = useCallback(() => {
    if (scrollContainerRef.current && sessionId && !isPreProvisioned) {
      const scrollTop = scrollContainerRef.current.scrollTop;
      updateFilesTabState(sessionId, { scrollTop });
    }
  }, [sessionId, isPreProvisioned, updateFilesTabState]);

  const formatFileSize = (bytes: number | null): string => {
    if (bytes === null) return "";
    if (bytes < 1024) return `${bytes} B`;
    if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
    return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
  };

  if (!sessionId) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <SvgHardDrive size={48} className="stroke-text-02" />
        <Text headingH3 text03>
          {isProvisioning ? "Preparing sandbox..." : "No files yet"}
        </Text>
        <Text secondaryBody text02>
          {isProvisioning
            ? "Setting up your development environment"
            : "Files created during the build will appear here"}
        </Text>
      </Section>
    );
  }

  if (error) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <SvgHardDrive size={48} className="stroke-text-02" />
        <Text headingH3 text03>
          Error loading files
        </Text>
        <Text secondaryBody text02>
          {error.message}
        </Text>
      </Section>
    );
  }

  if (!rootListing) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <Text secondaryBody text03>
          Loading files...
        </Text>
      </Section>
    );
  }

  // Show inline file preview for pre-provisioned mode
  if (isPreProvisioned && previewingFile && sessionId) {
    const isImage = previewingFile.mimeType?.startsWith("image/");

    return (
      <div className="flex flex-col h-full">
        {/* Header with back button */}
        <div className="flex items-center gap-2 px-3 py-2 border-b border-border-01">
          <button
            onClick={() => setPreviewingFile(null)}
            className="p-1 rounded hover:bg-background-tint-02 transition-colors"
          >
            <SvgArrowLeft size={16} className="stroke-text-03" />
          </button>
          {isImage ? (
            <SvgImage size={16} className="stroke-text-03" />
          ) : (
            <SvgFileText size={16} className="stroke-text-03" />
          )}
          <Text secondaryBody text04 className="truncate">
            {previewingFile.fileName}
          </Text>
        </div>
        {/* File content */}
        <div className="flex-1 overflow-auto">
          <InlineFilePreview
            sessionId={sessionId}
            filePath={previewingFile.path}
          />
        </div>
      </div>
    );
  }

  return (
    <div className="flex flex-col h-full">
      <div
        ref={scrollContainerRef}
        onScroll={handleScroll}
        className="flex-1 overflow-auto px-2 pb-2 relative"
      >
        {/* Background to prevent content showing through sticky gap */}
        <div className="sticky top-0 left-0 right-0 h-2 bg-background-neutral-00 -mx-2 z-[101]" />
        {rootListing.entries.length === 0 ? (
          <Section
            height="full"
            alignItems="center"
            justifyContent="center"
            padding={2}
          >
            <Text secondaryBody text03>
              No files in this directory
            </Text>
          </Section>
        ) : (
          <div className="font-mono text-sm">
            <FileTreeNode
              entries={rootListing.entries}
              depth={0}
              expandedPaths={expandedPaths}
              directoryCache={directoryCache}
              onToggleFolder={toggleFolder}
              onFileClick={handleLocalFileClick}
              formatFileSize={formatFileSize}
            />
          </div>
        )}
      </div>
    </div>
  );
}

// ── FileTreeNode (internal) ──────────────────────────────────────────────

interface FileTreeNodeProps {
  entries: FileSystemEntry[];
  depth: number;
  expandedPaths: Set<string>;
  directoryCache: Map<string, FileSystemEntry[]>;
  onToggleFolder: (path: string) => void;
  onFileClick?: (
    path: string,
    fileName: string,
    mimeType: string | null
  ) => void;
  formatFileSize: (bytes: number | null) => string;
  parentIsLast?: boolean[];
}

function FileTreeNode({
  entries,
  depth,
  expandedPaths,
  directoryCache,
  onToggleFolder,
  onFileClick,
  formatFileSize,
  parentIsLast = [],
}: FileTreeNodeProps) {
  // Sort entries: directories first, then alphabetically
  const sortedEntries = [...entries].sort((a, b) => {
    if (a.is_directory && !b.is_directory) return -1;
    if (!a.is_directory && b.is_directory) return 1;
    return a.name.localeCompare(b.name);
  });

  return (
    <>
      {sortedEntries.map((entry, index) => {
        const isExpanded = expandedPaths.has(entry.path);
        const isLast = index === sortedEntries.length - 1;
        const childEntries = directoryCache.get(entry.path) || [];
        const FileIcon = getFileIcon(entry.name);

        // Row height for sticky offset calculation
        const rowHeight = 28;
        // Account for the 8px (h-2) spacer at top of scroll container
        const stickyTopOffset = 8;

        return (
          <div key={entry.path} className="relative">
            {/* Tree item row */}
            <button
              onClick={() => {
                if (entry.is_directory) {
                  onToggleFolder(entry.path);
                } else if (onFileClick) {
                  onFileClick(entry.path, entry.name, entry.mime_type);
                }
              }}
              className={cn(
                "w-full flex items-center py-1.5 hover:bg-background-tint-02 rounded transition-colors relative",
                !entry.is_directory && onFileClick && "cursor-pointer",
                !entry.is_directory && !onFileClick && "cursor-default",
                // Make expanded folders sticky
                entry.is_directory &&
                  isExpanded &&
                  "sticky bg-background-neutral-00"
              )}
              style={
                entry.is_directory && isExpanded
                  ? {
                      top: stickyTopOffset + depth * rowHeight,
                      zIndex: 100 - depth, // Higher z-index for parent folders
                    }
                  : undefined
              }
            >
              {/* Tree lines for depth */}
              {parentIsLast.map((isParentLast, i) => (
                <span
                  key={i}
                  className="inline-flex w-5 justify-center flex-shrink-0 self-stretch relative"
                >
                  {!isParentLast && (
                    <span className="absolute left-1/2 -translate-x-1/2 -top-1.5 -bottom-1.5 w-px bg-border-02" />
                  )}
                </span>
              ))}

              {/* Branch connector */}
              {depth > 0 && (
                <span className="inline-flex w-5 flex-shrink-0 self-stretch relative">
                  {/* Vertical line */}
                  <span
                    className={cn(
                      "absolute left-1/2 -translate-x-1/2 w-px bg-border-02",
                      isLast ? "-top-1.5 bottom-1/2" : "-top-1.5 -bottom-1.5"
                    )}
                  />
                  {/* Horizontal line */}
                  <span className="absolute top-1/2 left-1/2 w-2 h-px bg-border-02" />
                </span>
              )}

              {/* Expand/collapse chevron for directories */}
              {entry.is_directory ? (
                <span className="inline-flex w-4 h-4 items-center justify-center flex-shrink-0">
                  <SvgChevronRight
                    size={12}
                    className={cn(
                      "stroke-text-03 transition-transform duration-150",
                      isExpanded && "rotate-90"
                    )}
                  />
                </span>
              ) : (
                <span className="w-4 flex-shrink-0" />
              )}

              {/* Icon */}
              {entry.is_directory ? (
                isExpanded ? (
                  <SvgFolderOpen
                    size={16}
                    className="stroke-text-03 flex-shrink-0 mx-1"
                  />
                ) : (
                  <SvgFolder
                    size={16}
                    className="stroke-text-03 flex-shrink-0 mx-1"
                  />
                )
              ) : (
                <FileIcon
                  size={16}
                  className="stroke-text-03 flex-shrink-0 mx-1"
                />
              )}

              {/* Name */}
              <Text
                secondaryBody
                text04
                className="truncate flex-1 text-left ml-1"
              >
                {entry.name}
              </Text>

              {/* File size */}
              {!entry.is_directory && entry.size !== null && (
                <Text text02 className="ml-2 mr-2 flex-shrink-0">
                  {formatFileSize(entry.size)}
                </Text>
              )}
            </button>

            {/* Render children if expanded */}
            {entry.is_directory && isExpanded && childEntries.length > 0 && (
              <FileTreeNode
                entries={childEntries}
                depth={depth + 1}
                expandedPaths={expandedPaths}
                directoryCache={directoryCache}
                onToggleFolder={onToggleFolder}
                onFileClick={onFileClick}
                formatFileSize={formatFileSize}
                parentIsLast={[...parentIsLast, isLast]}
              />
            )}

            {/* Loading indicator for expanded but not-yet-loaded directories */}
            {entry.is_directory &&
              isExpanded &&
              !directoryCache.has(entry.path) && (
                <div
                  className="flex items-center py-1"
                  style={{ paddingLeft: `${(depth + 1) * 20 + 24}px` }}
                >
                  <Text secondaryBody text02>
                    Loading...
                  </Text>
                </div>
              )}
          </div>
        );
      })}
    </>
  );
}


================================================
FILE: web/src/app/craft/components/output-panel/ImagePreview.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import { SvgImage } from "@opal/icons";
import { Section } from "@/layouts/general-layouts";

interface ImagePreviewProps {
  src: string;
  fileName: string;
}

/**
 * ImagePreview - Displays images with loading and error states
 * Includes proper accessibility attributes
 */
export default function ImagePreview({ src, fileName }: ImagePreviewProps) {
  const [imageLoading, setImageLoading] = useState(true);
  const [imageError, setImageError] = useState(false);

  // Extract just the filename from path for better alt text
  const displayName = fileName.split("/").pop() || fileName;

  // Reset loading state when src changes
  useEffect(() => {
    setImageLoading(true);
    setImageError(false);
  }, [src]);

  if (imageError) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <SvgImage size={48} className="stroke-text-02" />
        <Text headingH3 text03>
          Failed to load image
        </Text>
        <Text secondaryBody text02>
          The image could not be displayed
        </Text>
      </Section>
    );
  }

  return (
    <div className="h-full flex flex-col overflow-hidden">
      <div className="flex-1 flex items-center justify-center p-4">
        {imageLoading && (
          <div className="absolute">
            <Text secondaryBody text03>
              Loading image...
            </Text>
          </div>
        )}
        <img
          src={src}
          alt={displayName}
          role="img"
          aria-label={`Preview of ${displayName}`}
          className={cn(
            "max-w-full max-h-full object-contain transition-opacity",
            imageLoading ? "opacity-0" : "opacity-100"
          )}
          onLoad={() => setImageLoading(false)}
          onError={() => {
            setImageLoading(false);
            setImageError(true);
          }}
        />
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/output-panel/MarkdownFilePreview.tsx
================================================
"use client";

import MinimalMarkdown from "@/components/chat/MinimalMarkdown";

/** Shared interface for the file renderer registry */
export interface FileRendererProps {
  content: string;
  fileName: string;
  filePath: string;
  mimeType: string;
  isImage: boolean;
}

export default function MarkdownFilePreview({ content }: FileRendererProps) {
  return (
    <div className="relative h-full">
      <div className="absolute inset-0 overflow-auto default-scrollbar p-6">
        <MinimalMarkdown
          content={content}
          className="max-w-3xl mx-auto"
          components={{
            a: ({ href, children }: any) => (
              <a
                href={href}
                target="_blank"
                rel="noopener noreferrer"
                className="text-link hover:text-link-hover underline"
              >
                {children}
              </a>
            ),
          }}
        />
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/output-panel/PdfPreview.tsx
================================================
"use client";

import { useState, useEffect, useRef } from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import { SvgFileText } from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import { getArtifactUrl } from "@/lib/build/client";

interface PdfPreviewProps {
  sessionId: string;
  filePath: string;
  refreshKey?: number;
}

/**
 * PdfPreview - Renders PDF files using the browser's built-in PDF viewer.
 * Fetches the PDF as a blob and creates an object URL so the iframe renders
 * it inline (the backend serves artifacts with Content-Disposition: attachment,
 * which would otherwise force a download).
 */
export default function PdfPreview({
  sessionId,
  filePath,
  refreshKey,
}: PdfPreviewProps) {
  const [blobUrl, setBlobUrl] = useState<string | null>(null);
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState(false);
  const blobUrlRef = useRef<string | null>(null);

  useEffect(() => {
    const controller = new AbortController();

    // Revoke the previous blob URL before starting a new fetch
    if (blobUrlRef.current) {
      URL.revokeObjectURL(blobUrlRef.current);
      blobUrlRef.current = null;
    }
    setBlobUrl(null);
    setLoading(true);
    setError(false);

    const encodedPath = filePath
      .split("/")
      .map((segment) => encodeURIComponent(segment))
      .join("/");
    const artifactUrl = getArtifactUrl(sessionId, encodedPath);

    fetch(artifactUrl, { signal: controller.signal })
      .then((res) => {
        if (!res.ok) throw new Error(`Failed to fetch PDF: ${res.status}`);
        return res.blob();
      })
      .then((blob) => {
        const url = URL.createObjectURL(blob);
        blobUrlRef.current = url;
        setBlobUrl(url);
        setLoading(false);
      })
      .catch((err) => {
        if (err instanceof DOMException && err.name === "AbortError") return;
        setError(true);
        setLoading(false);
      });

    return () => {
      controller.abort();
      if (blobUrlRef.current) {
        URL.revokeObjectURL(blobUrlRef.current);
        blobUrlRef.current = null;
      }
    };
  }, [sessionId, filePath, refreshKey]);

  if (error) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <SvgFileText size={48} className="stroke-text-02" />
        <Text headingH3 text03>
          Cannot preview PDF
        </Text>
        <Text secondaryBody text02 className="text-center max-w-md">
          The PDF file could not be loaded.
        </Text>
      </Section>
    );
  }

  if (loading || !blobUrl) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <Text secondaryBody text03>
          Loading PDF...
        </Text>
      </Section>
    );
  }

  return (
    <iframe
      src={blobUrl}
      title={filePath.split("/").pop() || "PDF Preview"}
      className={cn("w-full h-full border-none")}
    />
  );
}


================================================
FILE: web/src/app/craft/components/output-panel/PptxPreview.tsx
================================================
"use client";

import { useState, useEffect, useCallback } from "react";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import { SvgChevronLeft, SvgChevronRight, SvgFileText } from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import { fetchPptxPreview } from "@/app/craft/services/apiServices";
import { getArtifactUrl } from "@/lib/build/client";

interface PptxPreviewProps {
  sessionId: string;
  filePath: string;
  refreshKey?: number;
}

/**
 * PptxPreview - Displays PPTX files as navigable slide images.
 * Triggers on-demand conversion via the backend, then renders
 * individual slide JPEGs in a carousel with keyboard navigation.
 */
export default function PptxPreview({
  sessionId,
  filePath,
  refreshKey,
}: PptxPreviewProps) {
  const [currentSlide, setCurrentSlide] = useState(0);
  const [imageLoading, setImageLoading] = useState(true);

  const { data, error, isLoading, mutate } = useSWR(
    SWR_KEYS.buildSessionPptxPreview(sessionId, filePath),
    () => fetchPptxPreview(sessionId, filePath),
    {
      revalidateOnFocus: false,
      dedupingInterval: 10000,
    }
  );

  const slideCount = data?.slide_count ?? 0;

  const goToPrev = useCallback(() => {
    setCurrentSlide((prev) => Math.max(0, prev - 1));
  }, []);

  const goToNext = useCallback(() => {
    setCurrentSlide((prev) => Math.min(slideCount - 1, prev + 1));
  }, [slideCount]);

  // Reset slide index when file changes
  useEffect(() => {
    setCurrentSlide(0);
  }, [filePath]);

  // Reset image loading state when slide changes
  useEffect(() => {
    setImageLoading(true);
  }, [currentSlide, data]);

  // Re-fetch when refreshKey changes
  useEffect(() => {
    if (refreshKey && refreshKey > 0) {
      mutate();
    }
  }, [refreshKey, mutate]);

  // Keyboard navigation
  useEffect(() => {
    function handleKeyDown(e: KeyboardEvent) {
      if (e.key === "ArrowLeft") {
        goToPrev();
      } else if (e.key === "ArrowRight") {
        goToNext();
      }
    }
    window.addEventListener("keydown", handleKeyDown);
    return () => window.removeEventListener("keydown", handleKeyDown);
  }, [goToPrev, goToNext]);

  if (isLoading) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <Text secondaryBody text03>
          Converting presentation...
        </Text>
      </Section>
    );
  }

  if (error) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <SvgFileText size={48} className="stroke-text-02" />
        <Text headingH3 text03>
          Cannot preview presentation
        </Text>
        <Text secondaryBody text02 className="text-center max-w-md">
          {error.message}
        </Text>
      </Section>
    );
  }

  if (!data || slideCount === 0) {
    return (
      <Section
        height="full"
        alignItems="center"
        justifyContent="center"
        padding={2}
      >
        <SvgFileText size={48} className="stroke-text-02" />
        <Text secondaryBody text03>
          No slides in this presentation
        </Text>
      </Section>
    );
  }

  const slidePath = data.slide_paths[currentSlide] ?? "";
  const slideUrl = getArtifactUrl(sessionId, slidePath);

  return (
    <div className="h-full flex flex-col overflow-hidden">
      {/* Slide image */}
      <div className="flex-1 flex items-center justify-center p-4 overflow-hidden">
        {imageLoading && (
          <div className="absolute">
            <Text secondaryBody text03>
              Loading slide...
            </Text>
          </div>
        )}
        <img
          src={slideUrl}
          alt={`Slide ${currentSlide + 1} of ${slideCount}`}
          className={cn(
            "max-w-full max-h-full object-contain transition-opacity",
            imageLoading ? "opacity-0" : "opacity-100"
          )}
          onLoad={() => setImageLoading(false)}
          onError={() => setImageLoading(false)}
        />
      </div>

      {/* Navigation bar */}
      {slideCount > 1 && (
        <div className="flex items-center justify-center gap-3 p-2 border-t border-border-02">
          <button
            onClick={goToPrev}
            disabled={currentSlide === 0}
            className={cn(
              "p-1 rounded",
              currentSlide === 0
                ? "opacity-30 cursor-not-allowed"
                : "hover:bg-background-neutral-03 cursor-pointer"
            )}
          >
            <SvgChevronLeft size={16} className="stroke-text-02" />
          </button>
          <Text secondaryBody text03>
            Slide {currentSlide + 1} of {slideCount}
          </Text>
          <button
            onClick={goToNext}
            disabled={currentSlide === slideCount - 1}
            className={cn(
              "p-1 rounded",
              currentSlide === slideCount - 1
                ? "opacity-30 cursor-not-allowed"
                : "hover:bg-background-neutral-03 cursor-pointer"
            )}
          >
            <SvgChevronRight size={16} className="stroke-text-02" />
          </button>
        </div>
      )}
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/output-panel/PreviewTab.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { cn } from "@/lib/utils";

interface PreviewTabProps {
  webappUrl: string | null;
  /** Changing this value forces the iframe to fully remount / reload */
  refreshKey?: number;
}

/**
 * PreviewTab - Shows the webapp iframe preview
 *
 * States:
 * - No webapp URL yet: Shows blank dark background while SWR fetches
 * - Has webapp URL: Shows iframe with crossfade from blank background
 */
export default function PreviewTab({ webappUrl, refreshKey }: PreviewTabProps) {
  const [iframeLoaded, setIframeLoaded] = useState(false);

  // Reset loaded state when URL or refreshKey changes
  useEffect(() => {
    setIframeLoaded(false);
  }, [webappUrl, refreshKey]);

  // Base background shown while loading or when no webapp exists yet
  return (
    <div className="h-full flex flex-col">
      <div className="flex-1 p-3 relative">
        {/* Base dark background - always present, visible when no iframe or iframe loading */}
        <div
          className={cn(
            "absolute inset-0 rounded-b-08 bg-neutral-950",
            "transition-opacity duration-300",
            iframeLoaded ? "opacity-0 pointer-events-none" : "opacity-100"
          )}
        />

        {/* Iframe - fades in when loaded */}
        {webappUrl && (
          <iframe
            key={refreshKey}
            src={webappUrl}
            onLoad={() => setIframeLoaded(true)}
            className={cn(
              "absolute inset-0 w-full h-full rounded-b-08 bg-neutral-950",
              "transition-opacity duration-300",
              iframeLoaded ? "opacity-100" : "opacity-0"
            )}
            sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-popups-to-escape-sandbox allow-top-navigation-by-user-activation"
            title="Web App Preview"
          />
        )}
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/components/output-panel/UrlBar.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import {
  SvgDownloadCloud,
  SvgLoader,
  SvgArrowLeft,
  SvgArrowRight,
  SvgExternalLink,
  SvgRevert,
} from "@opal/icons";
import { IconProps } from "@opal/types";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import ShareButton from "@/app/craft/components/ShareButton";
import type { SharingScope } from "@/app/craft/types/streamingTypes";

/** SvgLoader wrapped with animate-spin so it can be passed as a Button leftIcon */
const SpinningLoader: React.FunctionComponent<IconProps> = (props) => (
  <SvgLoader {...props} className={cn(props.className, "animate-spin")} />
);

export interface UrlBarProps {
  displayUrl: string;
  showNavigation?: boolean;
  canGoBack?: boolean;
  canGoForward?: boolean;
  onBack?: () => void;
  onForward?: () => void;
  previewUrl?: string | null;
  /** Optional callback to download the raw file — shows a cloud-download icon inside the URL pill */
  onDownloadRaw?: () => void;
  /** Tooltip text for the raw download button */
  downloadRawTooltip?: string;
  /** Optional download callback — shows an export button in the URL bar when provided */
  onDownload?: () => void;
  /** Whether a download/export is currently in progress */
  isDownloading?: boolean;
  /** Optional refresh callback — shows a refresh icon at the right edge of the URL pill */
  onRefresh?: () => void;
  /** Session ID — when present with previewUrl, shows share button for webapp */
  sessionId?: string;
  /** Sharing scope for the webapp (used when sessionId + previewUrl) */
  sharingScope?: SharingScope;
  /** Callback when sharing scope changes (revalidate webapp info) */
  onScopeChange?: () => void;
}

/**
 * UrlBar - Chrome-style URL/status bar below tabs
 * Shows the current URL/path based on active tab or file preview
 * Optionally shows back/forward navigation buttons
 * For Preview tab, shows a button to open the URL in a new browser tab
 * For downloadable files, shows a download icon
 */
export default function UrlBar({
  displayUrl,
  showNavigation = false,
  canGoBack = false,
  canGoForward = false,
  onBack,
  onForward,
  previewUrl,
  onDownloadRaw,
  downloadRawTooltip = "Download file",
  onDownload,
  isDownloading = false,
  onRefresh,
  sessionId,
  sharingScope = "private",
  onScopeChange,
}: UrlBarProps) {
  const handleOpenInNewTab = () => {
    if (previewUrl) {
      window.open(previewUrl, "_blank", "noopener,noreferrer");
    }
  };

  return (
    <div className="px-3 pb-2">
      <div className="flex items-center gap-1">
        {/* Navigation buttons + refresh */}
        {showNavigation && (
          <div className="flex items-center gap-0.5">
            <button
              onClick={onBack}
              disabled={!canGoBack}
              className={cn(
                "p-1.5 rounded-full transition-colors",
                canGoBack
                  ? "hover:bg-background-tint-03 text-text-03"
                  : "text-text-02 cursor-not-allowed"
              )}
              aria-label="Go back"
            >
              <SvgArrowLeft size={16} />
            </button>
            <button
              onClick={onForward}
              disabled={!canGoForward}
              className={cn(
                "p-1.5 rounded-full transition-colors",
                canGoForward
                  ? "hover:bg-background-tint-03 text-text-03"
                  : "text-text-02 cursor-not-allowed"
              )}
              aria-label="Go forward"
            >
              <SvgArrowRight size={16} />
            </button>
            {onRefresh && (
              <button
                onClick={onRefresh}
                className="p-1.5 rounded-full transition-colors hover:bg-background-tint-03 text-text-03"
                aria-label="Refresh"
              >
                <SvgRevert size={14} className="-scale-x-100" />
              </button>
            )}
          </div>
        )}
        {/* URL display */}
        <div className="flex-1 min-w-0 flex items-center px-3 py-1.5 bg-background-tint-02 rounded-full gap-2 min-h-[2.25rem]">
          {/* Download raw file button */}
          {onDownloadRaw && (
            <SimpleTooltip tooltip={downloadRawTooltip} delayDuration={200}>
              <button
                onClick={onDownloadRaw}
                className="flex-shrink-0 p-0.5 rounded transition-colors hover:bg-background-tint-03 text-text-03"
                aria-label={downloadRawTooltip}
              >
                <SvgDownloadCloud size={14} />
              </button>
            </SimpleTooltip>
          )}
          {/* Open in new tab button - only shown for Preview tab with valid URL */}
          {previewUrl && (
            <SimpleTooltip tooltip="open in a new tab" delayDuration={200}>
              <button
                onClick={handleOpenInNewTab}
                className="flex-shrink-0 p-0.5 rounded transition-colors hover:bg-background-tint-03 text-text-03"
                aria-label="open in a new tab"
              >
                <SvgExternalLink size={14} />
              </button>
            </SimpleTooltip>
          )}
          <Text secondaryBody text03 className="min-w-0 flex-1 truncate">
            {displayUrl}
          </Text>
        </div>
        {/* Export button — shown for downloadable file previews (e.g. markdown → docx) */}
        {onDownload && (
          <Button
            disabled={isDownloading}
            variant="action"
            prominence="tertiary"
            icon={isDownloading ? SpinningLoader : SvgExternalLink}
            onClick={onDownload}
          >
            {isDownloading ? "Exporting..." : "Export to .docx"}
          </Button>
        )}
        {/* Share button — shown when webapp preview is active */}
        {previewUrl && sessionId && (
          <ShareButton
            key={sessionId}
            sessionId={sessionId}
            webappUrl={previewUrl}
            sharingScope={sharingScope}
            onScopeChange={onScopeChange}
          />
        )}
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/constants/exampleBuildPrompts.ts
================================================
/**
 * Example prompts for the Build Mode welcome screen.
 * Organized by user persona to allow different prompts for different user types.
 */

export interface BuildPrompt {
  id: string;
  /** Short summary shown on the button */
  summary: string;
  /** Full prompt text inserted into the input bar */
  fullText: string;
  /** Optional image URL/path for visual display */
  image?: string;
}

export type UserPersona = "default" | "engineering" | "sales" | "product";

/**
 * Example prompts organized by user persona.
 * Each persona has a set of prompts tailored to their typical use cases.
 */
export const exampleBuildPrompts: Record<UserPersona, BuildPrompt[]> = {
  default: [
    {
      id: "default-1",
      summary: "Analyze team productivity by month across my company",
      fullText:
        "Create a dashboard with the number of closed tickets per month. Split by priority and compare teams.",
      image: "/craft_suggested_image_1.png",
    },
    {
      id: "default-2",
      summary:
        "Visualize what my team did this month with interactive drill-downs",
      fullText:
        "What did my team work on this month? Create a dashboard that 1) shows the number of actions per activity, 2) shows the individual work items when I select something in the dashboard.",
      image: "/craft_suggested_image_2.png",
    },
    {
      id: "default-3",
      summary: "Connect my backlog to recent customer conversations",
      fullText:
        "For each of my open Linear tickets, find at least 2 customers that have discussed related issues. Present the results in a dashboard table.",
      image: "/craft_suggested_image_3.png",
    },
    {
      id: "default-4",
      summary:
        "Surface the top pain points from this week's customer success calls",
      fullText:
        "Based on the customer calls this week, what are the 5 most important challenges? Create a table in a dashboard that shows the challenge and the customers that complained about it.",
      image: "/craft_suggested_image_4.png",
    },
    {
      id: "default-5",
      summary:
        "Compare and contrast which messaging resonates the most with our prospects",
      fullText:
        "If you look at the customer calls over the last 30 days, which part of our messaging seems to resonate the best, and appears to drive the most customer value? Generate a slide that effectively tells the story.",
      image: "/craft_suggested_image_5.png",
    },
  ],
  engineering: [
    {
      id: "eng-1",
      summary: "Enrich my open PRs with customer insights and feedback",
      fullText:
        "Look at my open PRs and find information from customer discussions regarding these PRs that could help to implement better. Also find for each PR the design doc I wrote and create a new one that is appropriately updated.",
      image: "/craft_suggested_image_1.png",
    },
    {
      id: "eng-2",
      summary: "Track engineering velocity from ticket to merged PR",
      fullText:
        "What is the average time it takes the engineers to merge PRs after my team created a Linear ticket? Create a dashboard that shows the average time by engineering team.",
      image: "/craft_suggested_image_2.png",
    },
    {
      id: "eng-3",
      summary: "Build a visual roadmap story from my quarterly contributions",
      fullText:
        "Create an image (slide) that groups my PRs by quarter, finds the common thread, and presents a coherent story. This will later go into a historical roadmap.",
      image: "/craft_suggested_image_3.png",
    },
    {
      id: "eng-4",
      summary:
        "Find churned customers who would have benefited from our releases",
      fullText:
        "Look at the PRs that my team merged this month. Then look at the customers we lost over the last 2 months and tell me which of the customers would have likely benefitted from the merged PRs. Rank the customers by importance. Present in a dashboard.",
      image: "/craft_suggested_image_4.png",
    },
    {
      id: "eng-5",
      summary: "Build a Linear dashboard to track my team's progress",
      fullText: "Create a Linear dashboard for my team.",
      image: "/craft_suggested_image_5.png",
    },
  ],
  sales: [
    {
      id: "sales-1",
      summary: "Identify sales blockers and quantify their revenue impact",
      fullText:
        "Look at the customer calls that my team had last month and identify the 3 most important sales blockers. Those could be product-related, messaging-related, or persona-chemistry. Create a dashboard showing how much revenue seems to be associated with each blocker.",
      image: "/craft_suggested_image_1.png",
    },
    {
      id: "sales-2",
      summary: "Prepare winning talking points for my upcoming meeting",
      fullText:
        "I have a meeting with a prospect next week. Please go through the objections they raised and suggest good talking points based on other customer situations, upcoming product changes, etc.",
      image: "/craft_suggested_image_2.png",
    },
    {
      id: "sales-3",
      summary: "Learn how my teammates overcame similar deal objections",
      fullText:
        "I don't want to give up on this opportunity. Find customer discussions from other members of my team where similar issues came up and were overcome. Provide some recommendations.",
      image: "/craft_suggested_image_3.png",
    },
    {
      id: "sales-4",
      summary: "Discover which pitch messaging resonates most with customers",
      fullText:
        "If you look at the customer calls over the last 30 days, which part of our messaging seems to resonate the best, and appears to drive the most customer value? Generate a slide that effectively tells the story.",
      image: "/craft_suggested_image_4.png",
    },
    {
      id: "sales-5",
      summary: "Surface the top product challenges from customer calls",
      fullText:
        "Based on the customer calls this week, what are the 5 most important challenges with the product? Create a table in a dashboard that shows the challenge and the customers that complained about it.",
      image: "/craft_suggested_image_5.png",
    },
  ],
  product: [
    {
      id: "product-1",
      summary: "Summarize what I did this month for my manager",
      fullText:
        "I need to explain to my manager what I did last month, and how it matters for customer impact.",
      image: "/craft_suggested_image_1.png",
    },
    {
      id: "product-2",
      summary: "Connect my backlog to recent customer conversations",
      fullText:
        "For each of my open Linear tickets, find at least 2 customers that have discussed related issues. Present the results in a dashboard table.",
      image: "/craft_suggested_image_2.png",
    },
    {
      id: "product-3",
      summary:
        "Visualize what my team did this month with interactive drill-downs",
      fullText:
        "What did my team work on this month? Create a dashboard that 1) shows the number of actions per activity, 2) shows the individual work items when I select something in the dashboard.",
      image: "/craft_suggested_image_4.png",
    },
    {
      id: "product-4",
      summary:
        "Find churned customers who would have benefited from the releases this month",
      fullText:
        "Look at the PRs that my team merged this month. Then look at the customers we lost over the last 2 months and tell me which of the customers would have likely benefitted from the merged PRs. Rank the customers by importance. Present in a dashboard.",
      image: "/craft_suggested_image_3.png",
    },
    {
      id: "product-5",
      summary: "Analyze team productivity by month across my company",
      fullText:
        "Create a dashboard with the number of closed tickets per month. Split by priority and compare teams.",
      image: "/craft_suggested_image_5.png",
    },
  ],
};

/**
 * Get prompts for a specific user persona.
 * Falls back to default prompts if persona is not found.
 */
export function getPromptsForPersona(persona: UserPersona): BuildPrompt[] {
  return exampleBuildPrompts[persona] ?? exampleBuildPrompts.default;
}

/**
 * Maps a workArea value from the build_user_persona cookie to a UserPersona.
 * Work areas that don't have dedicated prompts (executive, marketing, other) fall back to default.
 */
export function workAreaToPersona(workArea: string | undefined): UserPersona {
  switch (workArea) {
    case "engineering":
      return "engineering";
    case "sales":
      return "sales";
    case "product":
      return "product";
    default:
      return "default";
  }
}


================================================
FILE: web/src/app/craft/constants.ts
================================================
// ============================================================================
// Build Session Constants
// ============================================================================

/** Duration to display success state after session deletion (ms) */
export const DELETE_SUCCESS_DISPLAY_DURATION_MS = 800;

/** Interval for rotating delete messages during session deletion (ms) */
export const DELETE_MESSAGE_ROTATION_INTERVAL_MS = 3000;


================================================
FILE: web/src/app/craft/contexts/BuildContext.tsx
================================================
"use client";

import {
  createContext,
  useContext,
  useState,
  useMemo,
  type ReactNode,
} from "react";

/**
 * Build UI Context
 *
 * This context manages UI state (sidebar visibility).
 * Output panel state is stored per-session in useBuildSessionStore.
 */
interface BuildContextValue {
  // UI state - left sidebar
  leftSidebarFolded: boolean;
  setLeftSidebarFolded: React.Dispatch<React.SetStateAction<boolean>>;
}

const BuildContext = createContext<BuildContextValue | null>(null);

export interface BuildProviderProps {
  children: ReactNode;
}

export function BuildProvider({ children }: BuildProviderProps) {
  const [leftSidebarFolded, setLeftSidebarFolded] = useState(false);

  const value = useMemo<BuildContextValue>(
    () => ({
      leftSidebarFolded,
      setLeftSidebarFolded,
    }),
    [leftSidebarFolded]
  );

  return (
    <BuildContext.Provider value={value}>{children}</BuildContext.Provider>
  );
}

export function useBuildContext() {
  const context = useContext(BuildContext);
  if (!context) {
    throw new Error("useBuildContext must be used within a BuildProvider");
  }
  return context;
}


================================================
FILE: web/src/app/craft/contexts/UploadFilesContext.tsx
================================================
"use client";

import {
  createContext,
  useContext,
  useState,
  useCallback,
  useMemo,
  useRef,
  useEffect,
  type ReactNode,
} from "react";
import {
  uploadFile as uploadFileApi,
  deleteFile as deleteFileApi,
  fetchDirectoryListing,
} from "@/app/craft/services/apiServices";
import { useBuildSessionStore } from "@/app/craft/hooks/useBuildSessionStore";

/**
 * Upload File Status - tracks the state of files being uploaded
 */
export enum UploadFileStatus {
  /** File is currently being uploaded to the sandbox */
  UPLOADING = "UPLOADING",
  /** File is being processed after upload */
  PROCESSING = "PROCESSING",
  /** File has been successfully uploaded and has a path */
  COMPLETED = "COMPLETED",
  /** File upload failed */
  FAILED = "FAILED",
  /** File is waiting for a session to be created before uploading */
  PENDING = "PENDING",
}

/**
 * Build File - represents a file attached to a build session
 */
export interface BuildFile {
  id: string;
  name: string;
  status: UploadFileStatus;
  file_type: string;
  size: number;
  created_at: string;
  // Original File object for upload
  file?: File;
  // Path in sandbox after upload (e.g., "attachments/doc.pdf")
  path?: string;
  // Error message if upload failed
  error?: string;
}

// Helper to generate unique temp IDs
const generateTempId = () => {
  try {
    return `temp_${crypto.randomUUID()}`;
  } catch {
    return `temp_${Date.now()}_${Math.random().toString(36).slice(2, 11)}`;
  }
};

// =============================================================================
// File Validation (matches backend: build/configs.py and build/utils.py)
// =============================================================================

/** Maximum individual file size - matches BUILD_MAX_UPLOAD_FILE_SIZE_MB (50MB) */
const MAX_FILE_SIZE_BYTES = 50 * 1024 * 1024;

/** Maximum total attachment size per session - matches BUILD_MAX_TOTAL_UPLOAD_SIZE_MB (200MB) */
const MAX_TOTAL_SIZE_BYTES = 200 * 1024 * 1024;

/** Maximum files per session - matches BUILD_MAX_UPLOAD_FILES_PER_SESSION */
const MAX_FILES_PER_SESSION = 20;

/** Blocked file extensions (executables/dangerous) - matches backend BLOCKED_EXTENSIONS */
const BLOCKED_EXTENSIONS = new Set([
  // Windows executables
  ".exe",
  ".dll",
  ".msi",
  ".scr",
  ".com",
  ".bat",
  ".cmd",
  ".ps1",
  // macOS
  ".app",
  ".dmg",
  ".pkg",
  // Linux
  ".deb",
  ".rpm",
  ".so",
  // Cross-platform
  ".jar",
  ".war",
  ".ear",
  // Other potentially dangerous
  ".vbs",
  ".vbe",
  ".wsf",
  ".wsh",
  ".hta",
  ".cpl",
  ".reg",
  ".lnk",
  ".pif",
]);

/** Format bytes to human-readable string */
function formatBytes(bytes: number): string {
  if (bytes < 1024) return `${bytes} B`;
  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}

/** Get file extension (lowercase, including dot) */
function getFileExtension(filename: string): string {
  const lastDot = filename.lastIndexOf(".");
  if (lastDot === -1) return "";
  return filename.slice(lastDot).toLowerCase();
}

/** Validation result for a single file */
interface FileValidationResult {
  valid: boolean;
  error?: string;
}

/** Validate a single file before upload */
function validateFile(file: File): FileValidationResult {
  // Check file size
  if (file.size > MAX_FILE_SIZE_BYTES) {
    return {
      valid: false,
      error: `File too large (${formatBytes(
        file.size
      )}). Maximum is ${formatBytes(MAX_FILE_SIZE_BYTES)}.`,
    };
  }

  // Check blocked extensions
  const ext = getFileExtension(file.name);
  if (ext && BLOCKED_EXTENSIONS.has(ext)) {
    return {
      valid: false,
      error: `File type '${ext}' is not allowed for security reasons.`,
    };
  }

  // Check for missing extension
  if (!ext) {
    return {
      valid: false,
      error: "File must have an extension.",
    };
  }

  return { valid: true };
}

/** Validate total files and size constraints */
function validateBatch(
  newFiles: File[],
  existingFiles: BuildFile[]
): FileValidationResult {
  const totalCount = existingFiles.length + newFiles.length;
  if (totalCount > MAX_FILES_PER_SESSION) {
    return {
      valid: false,
      error: `Too many files. Maximum is ${MAX_FILES_PER_SESSION} files per session.`,
    };
  }

  const existingSize = existingFiles.reduce((sum, f) => sum + f.size, 0);
  const newSize = newFiles.reduce((sum, f) => sum + f.size, 0);
  const totalSize = existingSize + newSize;

  if (totalSize > MAX_TOTAL_SIZE_BYTES) {
    return {
      valid: false,
      error: `Total size exceeds limit. Maximum is ${formatBytes(
        MAX_TOTAL_SIZE_BYTES
      )} per session.`,
    };
  }

  return { valid: true };
}

/** Create a failed BuildFile for validation errors */
function createFailedFile(file: File, error: string): BuildFile {
  return {
    id: generateTempId(),
    name: file.name,
    status: UploadFileStatus.FAILED,
    file_type: file.type,
    size: file.size,
    created_at: new Date().toISOString(),
    error,
  };
}

// Create optimistic file from File object
const createOptimisticFile = (file: File): BuildFile => {
  const tempId = generateTempId();
  return {
    id: tempId,
    name: file.name,
    status: UploadFileStatus.UPLOADING,
    file_type: file.type,
    size: file.size,
    created_at: new Date().toISOString(),
    file,
  };
};

/**
 * Error types for better error handling
 */
export enum UploadErrorType {
  NETWORK = "NETWORK",
  AUTH = "AUTH",
  NOT_FOUND = "NOT_FOUND",
  SERVER = "SERVER",
  UNKNOWN = "UNKNOWN",
}

function classifyError(error: unknown): {
  type: UploadErrorType;
  message: string;
} {
  if (error instanceof Error) {
    const message = error.message.toLowerCase();
    if (message.includes("401") || message.includes("unauthorized")) {
      return { type: UploadErrorType.AUTH, message: "Session expired" };
    }
    if (message.includes("404") || message.includes("not found")) {
      return { type: UploadErrorType.NOT_FOUND, message: "Resource not found" };
    }
    if (message.includes("500") || message.includes("server")) {
      return { type: UploadErrorType.SERVER, message: "Server error" };
    }
    if (message.includes("network") || message.includes("fetch")) {
      return { type: UploadErrorType.NETWORK, message: "Network error" };
    }
    return { type: UploadErrorType.UNKNOWN, message: error.message };
  }
  return { type: UploadErrorType.UNKNOWN, message: "Upload failed" };
}

/**
 * UploadFilesContext - Centralized file upload state management
 *
 * This context manages:
 * - File attachment state (current files attached to input)
 * - Active session binding (which session files are associated with)
 * - Automatic upload of pending files when session becomes available
 * - Automatic fetch of existing attachments when session changes
 * - File upload, removal, and clearing operations
 *
 * Components should:
 * - Call `setActiveSession(sessionId)` when session changes
 * - Call `uploadFiles(files)` to attach files (uses active session internally)
 * - Call `removeFile(fileId)` to remove files (uses active session internally)
 * - Read `currentMessageFiles` to display attached files
 */
interface UploadFilesContextValue {
  // Current message files (attached to the input bar)
  currentMessageFiles: BuildFile[];

  // Active session ID (set by parent components)
  activeSessionId: string | null;

  /**
   * Set the active session ID. This triggers:
   * - Fetching existing attachments from the new session (if different)
   * - Clearing files if navigating to no session
   * - Auto-uploading any pending files
   *
   * Call this when:
   * - Session ID changes in URL
   * - Pre-provisioned session becomes available
   */
  setActiveSession: (sessionId: string | null) => void;

  /**
   * Upload files to the active session.
   * - If session is available: uploads immediately
   * - If no session: marks as PENDING (auto-uploads when session available)
   */
  uploadFiles: (files: File[]) => Promise<BuildFile[]>;

  /**
   * Remove a file from the input bar.
   * If the file was uploaded, also deletes from the sandbox.
   */
  removeFile: (fileId: string) => void;

  /**
   * Clear all attached files from the input bar.
   * Does NOT delete from sandbox (use for form reset).
   * @param options.suppressRefetch - When true, skips the refetch that would
   *   normally restore session attachments (e.g. when user hits Enter to dismiss
   *   a file from the input bar).
   */
  clearFiles: (options?: { suppressRefetch?: boolean }) => void;

  // Check if any files are uploading
  hasUploadingFiles: boolean;

  // Check if any files are pending upload
  hasPendingFiles: boolean;
}

const UploadFilesContext = createContext<UploadFilesContextValue | null>(null);

export interface UploadFilesProviderProps {
  children: ReactNode;
}

export function UploadFilesProvider({ children }: UploadFilesProviderProps) {
  // =========================================================================
  // State
  // =========================================================================

  const [currentMessageFiles, setCurrentMessageFiles] = useState<BuildFile[]>(
    []
  );
  const [activeSessionId, setActiveSessionId] = useState<string | null>(null);

  // Get triggerFilesRefresh from the store to refresh the file explorer
  const triggerFilesRefresh = useBuildSessionStore(
    (state) => state.triggerFilesRefresh
  );

  // =========================================================================
  // Refs for race condition protection
  // =========================================================================

  const isUploadingPendingRef = useRef(false);
  const fetchingSessionRef = useRef<string | null>(null);
  const prevSessionRef = useRef<string | null>(null);
  // Track active deletions to prevent refetch race condition
  const activeDeletionsRef = useRef<Set<string>>(new Set());
  // When true, skip the refetch that runs after clearFiles (e.g. Enter to dismiss file)
  const suppressRefetchRef = useRef(false);

  // =========================================================================
  // Derived state
  // =========================================================================

  const hasUploadingFiles = useMemo(() => {
    return currentMessageFiles.some(
      (file) => file.status === UploadFileStatus.UPLOADING
    );
  }, [currentMessageFiles]);

  const hasPendingFiles = useMemo(() => {
    return currentMessageFiles.some(
      (file) => file.status === UploadFileStatus.PENDING
    );
  }, [currentMessageFiles]);

  // =========================================================================
  // Internal operations (not exposed to consumers)
  // =========================================================================

  /**
   * Upload pending files to the given session.
   * Internal function - called automatically by effects.
   * Reads current files from state internally to avoid stale closures.
   */
  const uploadPendingFilesInternal = useCallback(
    async (sessionId: string): Promise<void> => {
      if (isUploadingPendingRef.current) return;

      // Read current files and find pending ones atomically
      let pendingFiles: BuildFile[] = [];
      setCurrentMessageFiles((prev) => {
        pendingFiles = prev.filter(
          (f) => f.status === UploadFileStatus.PENDING && f.file
        );
        // Mark as uploading in the same state update to avoid race conditions
        if (pendingFiles.length > 0) {
          return prev.map((f) =>
            pendingFiles.some((pf) => pf.id === f.id)
              ? { ...f, status: UploadFileStatus.UPLOADING }
              : f
          );
        }
        return prev;
      });

      if (pendingFiles.length === 0) return;

      isUploadingPendingRef.current = true;

      try {
        // Upload in parallel
        const results = await Promise.all(
          pendingFiles.map(async (file) => {
            try {
              const result = await uploadFileApi(sessionId, file.file!);
              return { id: file.id, success: true as const, result };
            } catch (error) {
              const { message } = classifyError(error);
              return {
                id: file.id,
                success: false as const,
                errorMessage: message,
              };
            }
          })
        );

        // Update statuses
        setCurrentMessageFiles((prev) =>
          prev.map((f) => {
            const result = results.find((r) => r.id === f.id);
            if (!result) return f;
            return result.success
              ? {
                  ...f,
                  status: UploadFileStatus.COMPLETED,
                  path: result.result.path,
                  name: result.result.filename,
                  file: undefined, // Clear blob to free memory
                }
              : {
                  ...f,
                  status: UploadFileStatus.FAILED,
                  error: result.errorMessage,
                };
          })
        );

        // Refresh file explorer if any uploads succeeded
        const anySucceeded = results.some((r) => r.success);
        if (anySucceeded) {
          triggerFilesRefresh(sessionId);
        }
      } finally {
        isUploadingPendingRef.current = false;
      }
    },
    [triggerFilesRefresh]
  );

  /**
   * Fetch existing attachments from the backend.
   * Internal function - called automatically by effects.
   */
  const fetchExistingAttachmentsInternal = useCallback(
    async (sessionId: string, replace: boolean): Promise<void> => {
      // Request deduplication
      if (fetchingSessionRef.current === sessionId) return;

      fetchingSessionRef.current = sessionId;

      try {
        const listing = await fetchDirectoryListing(sessionId, "attachments");

        // Use deterministic IDs based on session and path for stable React keys
        const attachments: BuildFile[] = listing.entries
          .filter((entry) => !entry.is_directory)
          .map((entry) => ({
            id: `existing_${sessionId}_${entry.path}`,
            name: entry.name,
            status: UploadFileStatus.COMPLETED,
            file_type: entry.mime_type || "application/octet-stream",
            size: entry.size || 0,
            created_at: new Date().toISOString(),
            path: entry.path,
          }));

        if (replace) {
          // When replacing, preserve any files that are still being processed locally
          // (uploading, pending, or recently completed uploads that might not be in
          // backend listing yet due to race conditions)
          setCurrentMessageFiles((prev) => {
            // Keep files that are still in-flight or don't have a path yet
            const localOnlyFiles = prev.filter(
              (f) =>
                f.status === UploadFileStatus.UPLOADING ||
                f.status === UploadFileStatus.PENDING ||
                f.status === UploadFileStatus.PROCESSING ||
                // Keep recently uploaded files (have temp ID, not fetched from backend)
                f.id.startsWith("temp_")
            );

            // Merge: backend attachments + local-only files (avoiding duplicates by path)
            const backendPaths = new Set(attachments.map((f) => f.path));
            const nonDuplicateLocalFiles = localOnlyFiles.filter(
              (f) => !f.path || !backendPaths.has(f.path)
            );

            return [...attachments, ...nonDuplicateLocalFiles];
          });
        } else if (attachments.length > 0) {
          setCurrentMessageFiles((prev) => {
            const existingPaths = new Set(prev.map((f) => f.path));
            const newFiles = attachments.filter(
              (f) => !existingPaths.has(f.path)
            );
            return [...prev, ...newFiles];
          });
        }
      } catch (error) {
        const { type } = classifyError(error);
        if (type !== UploadErrorType.NOT_FOUND) {
          console.error(
            "[UploadFilesContext] fetchExistingAttachments error:",
            error
          );
        }
        if (replace) {
          // On error, only clear files that aren't being processed locally
          setCurrentMessageFiles((prev) =>
            prev.filter(
              (f) =>
                f.status === UploadFileStatus.UPLOADING ||
                f.status === UploadFileStatus.PENDING ||
                f.status === UploadFileStatus.PROCESSING ||
                f.id.startsWith("temp_")
            )
          );
        }
      } finally {
        fetchingSessionRef.current = null;
      }
    },
    []
  );

  // =========================================================================
  // Effects - Automatic state machine transitions
  // =========================================================================

  /**
   * Effect: Handle session changes
   *
   * When activeSessionId changes:
   * - If changed to a DIFFERENT non-null session: fetch attachments (replace mode)
   * - If changed to null: do nothing (don't clear - session might be temporarily null during revalidation)
   *
   * This prevents unnecessary fetches/clears when the focus handler temporarily
   * resets the pre-provisioned session state.
   */
  useEffect(() => {
    const prevSession = prevSessionRef.current;
    const currentSession = activeSessionId;

    // Only update ref when we have a non-null session (ignore temporary nulls)
    if (currentSession) {
      // Session changed to a different non-null value
      if (currentSession !== prevSession) {
        prevSessionRef.current = currentSession;
        fetchExistingAttachmentsInternal(currentSession, true);
      }
    }
    // When session becomes null, don't clear files or update ref.
    // This handles the case where pre-provisioning temporarily resets on focus.
    // Files will be cleared when user actually navigates away or logs out.
  }, [activeSessionId, fetchExistingAttachmentsInternal]);

  /**
   * Effect: Auto-upload pending files when session becomes available
   *
   * This handles the case where user attaches files before session is ready.
   */
  useEffect(() => {
    if (activeSessionId && hasPendingFiles) {
      uploadPendingFilesInternal(activeSessionId);
    }
  }, [activeSessionId, hasPendingFiles, uploadPendingFilesInternal]);

  /**
   * Effect: Refetch attachments after files are cleared
   *
   * When files are cleared (e.g., after sending a message) but we're still
   * on the same session, refetch to restore any backend attachments.
   *
   * IMPORTANT: Skip refetch if files went to 0 due to active deletions.
   * This prevents a race condition where refetch returns the file before
   * backend deletion completes, causing the file pill to persist.
   */
  const prevFilesLengthRef = useRef(currentMessageFiles.length);
  useEffect(() => {
    const prevLength = prevFilesLengthRef.current;
    const currentLength = currentMessageFiles.length;
    prevFilesLengthRef.current = currentLength;

    // Files were just cleared (went from >0 to 0)
    const filesWereCleared = prevLength > 0 && currentLength === 0;

    // Skip refetch if there are active deletions in progress
    // This prevents the deleted file from being re-added before backend deletion completes
    const hasActiveDeletions = activeDeletionsRef.current.size > 0;
    // Skip refetch if caller explicitly suppressed (e.g. user hit Enter to dismiss file)
    const shouldSuppressRefetch = suppressRefetchRef.current;
    if (shouldSuppressRefetch) {
      suppressRefetchRef.current = false;
    }

    // Refetch if on same session and files were cleared (not deleted)
    if (
      filesWereCleared &&
      activeSessionId &&
      prevSessionRef.current === activeSessionId &&
      !hasActiveDeletions &&
      !shouldSuppressRefetch
    ) {
      fetchExistingAttachmentsInternal(activeSessionId, false);
    }
  }, [
    currentMessageFiles.length,
    activeSessionId,
    fetchExistingAttachmentsInternal,
  ]);

  // =========================================================================
  // Public API
  // =========================================================================

  /**
   * Set the active session. Triggers fetching/clearing as needed.
   */
  const setActiveSession = useCallback((sessionId: string | null) => {
    setActiveSessionId(sessionId);
  }, []);

  /**
   * Upload files. Uses activeSessionId internally.
   * Validates files before upload (size, extension, batch limits).
   */
  const uploadFiles = useCallback(
    async (files: File[]): Promise<BuildFile[]> => {
      // Get current files for batch validation
      const existingFiles = currentMessageFiles;

      // Validate batch constraints first
      const batchValidation = validateBatch(files, existingFiles);
      if (!batchValidation.valid) {
        // Create failed files for all with the batch error
        const failedFiles = files.map((f) =>
          createFailedFile(f, batchValidation.error!)
        );
        setCurrentMessageFiles((prev) => [...prev, ...failedFiles]);
        return failedFiles;
      }

      // Validate each file individually and separate valid from invalid
      const validFiles: File[] = [];
      const failedFiles: BuildFile[] = [];

      for (const file of files) {
        const validation = validateFile(file);
        if (validation.valid) {
          validFiles.push(file);
        } else {
          failedFiles.push(createFailedFile(file, validation.error!));
        }
      }

      // Add failed files immediately
      if (failedFiles.length > 0) {
        setCurrentMessageFiles((prev) => [...prev, ...failedFiles]);
      }

      // If no valid files, return early
      if (validFiles.length === 0) {
        return failedFiles;
      }

      // Create optimistic files for valid files
      const optimisticFiles = validFiles.map(createOptimisticFile);

      // Add to current message files immediately
      setCurrentMessageFiles((prev) => [...prev, ...optimisticFiles]);

      const sessionId = activeSessionId;

      if (sessionId) {
        // Session available - upload immediately
        const uploadPromises = optimisticFiles.map(async (optimisticFile) => {
          try {
            const result = await uploadFileApi(sessionId, optimisticFile.file!);
            return {
              id: optimisticFile.id,
              success: true as const,
              result,
            };
          } catch (error) {
            const { message } = classifyError(error);
            return {
              id: optimisticFile.id,
              success: false as const,
              errorMessage: message,
            };
          }
        });

        const results = await Promise.all(uploadPromises);

        // Batch update all file statuses
        setCurrentMessageFiles((prev) =>
          prev.map((f) => {
            const uploadResult = results.find((r) => r.id === f.id);
            if (!uploadResult) return f;

            if (uploadResult.success) {
              return {
                ...f,
                status: UploadFileStatus.COMPLETED,
                path: uploadResult.result.path,
                name: uploadResult.result.filename,
                file: undefined, // Clear blob to free memory
              };
            } else {
              return {
                ...f,
                status: UploadFileStatus.FAILED,
                error: uploadResult.errorMessage,
              };
            }
          })
        );

        // Refresh file explorer if any uploads succeeded
        const anySucceeded = results.some((r) => r.success);
        if (anySucceeded) {
          triggerFilesRefresh(sessionId);
        }
      } else {
        // No session yet - mark as PENDING (effect will auto-upload when session available)
        setCurrentMessageFiles((prev) =>
          prev.map((f) =>
            optimisticFiles.some((of) => of.id === f.id)
              ? { ...f, status: UploadFileStatus.PENDING }
              : f
          )
        );
      }

      return [...failedFiles, ...optimisticFiles];
    },
    [activeSessionId, currentMessageFiles, triggerFilesRefresh]
  );

  /**
   * Remove a file. Uses activeSessionId internally for sandbox deletion.
   */
  const removeFile = useCallback(
    (fileId: string) => {
      // Track this deletion to prevent refetch race condition
      activeDeletionsRef.current.add(fileId);

      // Use functional update to get current state and avoid stale closures
      let removedFile: BuildFile | null = null;
      let removedIndex = -1;

      setCurrentMessageFiles((prev) => {
        const index = prev.findIndex((f) => f.id === fileId);
        if (index === -1) return prev;

        // Capture file info for potential rollback and backend deletion
        const file = prev[index];
        if (!file) return prev;
        removedFile = file;
        removedIndex = index;

        // Return filtered array (optimistic removal)
        return prev.filter((f) => f.id !== fileId);
      });

      // After state update, trigger backend deletion if needed
      // Use setTimeout to ensure state update has completed
      setTimeout(() => {
        if (removedFile?.path && activeSessionId) {
          const filePath = removedFile.path;
          const fileToRestore = removedFile;
          const indexToRestore = removedIndex;

          deleteFileApi(activeSessionId, filePath)
            .then(() => {
              // Deletion succeeded - remove from active deletions
              activeDeletionsRef.current.delete(fileId);
              // Refresh file explorer
              triggerFilesRefresh(activeSessionId);
            })
            .catch((error) => {
              console.error(
                "[UploadFilesContext] Failed to delete file from sandbox:",
                error
              );
              // Remove from active deletions
              activeDeletionsRef.current.delete(fileId);
              // Rollback: restore the file at its original position
              setCurrentMessageFiles((prev) => {
                // Check if file was already re-added (e.g., by another operation)
                if (prev.some((f) => f.id === fileToRestore.id)) return prev;

                const newFiles = [...prev];
                const insertIndex = Math.min(indexToRestore, newFiles.length);
                newFiles.splice(insertIndex, 0, fileToRestore);
                return newFiles;
              });
            });
        } else {
          // No backend deletion needed - remove from active deletions immediately
          activeDeletionsRef.current.delete(fileId);
        }
      }, 0);
    },
    [activeSessionId, triggerFilesRefresh]
  );

  /**
   * Clear all files from the input bar.
   */
  const clearFiles = useCallback((options?: { suppressRefetch?: boolean }) => {
    if (options?.suppressRefetch) {
      suppressRefetchRef.current = true;
    }
    setCurrentMessageFiles([]);
  }, []);

  // =========================================================================
  // Context value
  // =========================================================================

  const value = useMemo<UploadFilesContextValue>(
    () => ({
      currentMessageFiles,
      activeSessionId,
      setActiveSession,
      uploadFiles,
      removeFile,
      clearFiles,
      hasUploadingFiles,
      hasPendingFiles,
    }),
    [
      currentMessageFiles,
      activeSessionId,
      setActiveSession,
      uploadFiles,
      removeFile,
      clearFiles,
      hasUploadingFiles,
      hasPendingFiles,
    ]
  );

  return (
    <UploadFilesContext.Provider value={value}>
      {children}
    </UploadFilesContext.Provider>
  );
}

export function useUploadFilesContext() {
  const context = useContext(UploadFilesContext);
  if (!context) {
    throw new Error(
      "useUploadFilesContext must be used within an UploadFilesProvider"
    );
  }
  return context;
}


================================================
FILE: web/src/app/craft/hooks/useBuildConnectors.ts
================================================
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";
import {
  BuildConnectorConfig,
  ConnectorStatus,
} from "@/app/craft/v1/configure/components/ConnectorCard";

interface BuildConnectorListResponse {
  connectors: BuildConnectorConfig[];
}

/**
 * Hook to fetch and manage build mode connectors.
 *
 * @returns Object containing:
 * - `connectors`: Array of connector configurations
 * - `hasActiveConnector`: True if at least one connector has status "connected" (currently synced)
 * - `hasConnectorEverSucceeded`: True if any connector has ever succeeded (has last_indexed timestamp).
 *   Use this to determine if demo data can be disabled or if banners should be hidden.
 * - `hasAnyConnector`: True if any connectors exist (regardless of status). Useful for general checks.
 * - `isLoading`: True while fetching
 * - `mutate`: Function to refetch connectors
 */
export function useBuildConnectors() {
  const { data, isLoading, mutate } = useSWR<BuildConnectorListResponse>(
    SWR_KEYS.buildConnectors,
    errorHandlingFetcher,
    { refreshInterval: 30000 } // 30 seconds - matches configure page
  );

  const connectors = data?.connectors ?? [];

  // At least one connector with status "connected" (actively synced)
  const hasActiveConnector = connectors.some((c) => c.status === "connected");

  // Check if any connector has ever succeeded (has last_indexed timestamp)
  // This allows demo data to be turned off even if connectors currently have errors
  const hasConnectorEverSucceeded = connectors.some(
    (c) => c.last_indexed !== null
  );

  // Any connector exists (regardless of status)
  const hasAnyConnector = connectors.length > 0;

  return {
    connectors,
    hasActiveConnector,
    hasConnectorEverSucceeded,
    hasAnyConnector,
    isLoading,
    mutate,
  };
}


================================================
FILE: web/src/app/craft/hooks/useBuildLlmSelection.ts
================================================
import { useMemo, useState, useCallback } from "react";
import { LLMProviderDescriptor } from "@/interfaces/llm";
import {
  BuildLlmSelection,
  getBuildLlmSelection,
  setBuildLlmSelection,
  clearBuildLlmSelection,
  getDefaultLlmSelection,
} from "@/app/craft/onboarding/constants";

/**
 * Hook for managing Build mode LLM selection.
 *
 * Resolution priority:
 * 1. Cookie - User's explicit selection (via onboarding or configure page)
 * 2. Smart default - via getDefaultLlmSelection()
 */
export function useBuildLlmSelection(
  llmProviders: LLMProviderDescriptor[] | undefined
) {
  const [selection, setSelectionState] = useState<BuildLlmSelection | null>(
    () => getBuildLlmSelection()
  );

  // Validate that a selection is still valid against current providers.
  // Only checks that the provider exists
  const isSelectionValid = useCallback(
    (sel: BuildLlmSelection | null): boolean => {
      if (!sel || !llmProviders) return false;
      return llmProviders.some(
        (p) => p.provider === sel.provider || p.name === sel.providerName
      );
    },
    [llmProviders]
  );

  // Compute effective selection: cookie > smart default
  const effectiveSelection = useMemo((): BuildLlmSelection | null => {
    // Use cookie if valid
    if (selection && isSelectionValid(selection)) {
      return selection;
    }

    // Fall back to smart default
    return getDefaultLlmSelection(llmProviders);
  }, [selection, llmProviders, isSelectionValid]);

  // Update selection and persist to cookie
  const updateSelection = useCallback((newSelection: BuildLlmSelection) => {
    setBuildLlmSelection(newSelection);
    setSelectionState(newSelection);
  }, []);

  // Clear selection (removes cookie)
  const clearSelection = useCallback(() => {
    clearBuildLlmSelection();
    setSelectionState(null);
  }, []);

  return {
    selection: effectiveSelection,
    updateSelection,
    clearSelection,
    isFromCookie: selection !== null && isSelectionValid(selection),
  };
}


================================================
FILE: web/src/app/craft/hooks/useBuildSessionController.ts
================================================
"use client";

import { useEffect, useRef, useCallback } from "react";
import { useRouter } from "next/navigation";
import { useBuildSessionStore } from "@/app/craft/hooks/useBuildSessionStore";
import { usePreProvisionPolling } from "@/app/craft/hooks/usePreProvisionPolling";
import { CRAFT_SEARCH_PARAM_NAMES } from "@/app/craft/services/searchParams";
import { CRAFT_PATH } from "@/app/craft/v1/constants";
import { getBuildUserPersona } from "@/app/craft/onboarding/constants";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import { checkPreProvisionedSession } from "@/app/craft/services/apiServices";

interface UseBuildSessionControllerProps {
  /** Session ID from search params, or null for new session */
  existingSessionId: string | null;
}

/**
 * Controller hook for managing build session lifecycle based on URL.
 * Mirrors useChatSessionController pattern.
 *
 * Responsibilities:
 * - Load session from API when URL changes
 * - Switch current session based on URL (single source of truth)
 * - Trigger pre-provisioning when on new build page
 * - Track session loading state
 * - Re-validate pre-provisioned session on tab focus (multi-tab support)
 *
 * IMPORTANT: This is the ONLY place that should call setCurrentSession.
 * Other components should navigate to URLs and let this controller handle state.
 */
export function useBuildSessionController({
  existingSessionId,
}: UseBuildSessionControllerProps) {
  const router = useRouter();

  // Check LLM provider availability
  const { llmProviders } = useLLMProviders();
  const hasAnyProvider = !!(llmProviders && llmProviders.length > 0);

  // Check if user has completed onboarding (persona cookie is set)
  // Read directly from cookie on every render - cookie reads are cheap and this
  // ensures we always have the current value, especially important after onboarding
  // completes when the cookie is set synchronously but other state updates are async
  const hasCompletedOnboarding = getBuildUserPersona() !== null;

  // Track previous existingSessionId to detect navigation transitions
  const prevExistingSessionIdRef = useRef<string | null>(existingSessionId);

  // Access store state and actions individually like chat does
  const currentSessionId = useBuildSessionStore(
    (state) => state.currentSessionId
  );
  const setCurrentSession = useBuildSessionStore(
    (state) => state.setCurrentSession
  );
  const loadSession = useBuildSessionStore((state) => state.loadSession);

  // Controller state from Zustand (replaces refs for better race condition handling)
  const controllerState = useBuildSessionStore(
    (state) => state.controllerState
  );
  const setControllerTriggered = useBuildSessionStore(
    (state) => state.setControllerTriggered
  );
  const setControllerLoaded = useBuildSessionStore(
    (state) => state.setControllerLoaded
  );

  // Pre-provisioning state (discriminated union)
  const preProvisioning = useBuildSessionStore(
    (state) => state.preProvisioning
  );
  const ensurePreProvisionedSession = useBuildSessionStore(
    (state) => state.ensurePreProvisionedSession
  );

  // Compute derived state directly in selectors for efficiency
  const isLoading = useBuildSessionStore((state) => {
    if (!state.currentSessionId) return false;
    const session = state.sessions.get(state.currentSessionId);
    return session ? !session.isLoaded : false;
  });

  const isStreaming = useBuildSessionStore((state) => {
    if (!state.currentSessionId) return false;
    const session = state.sessions.get(state.currentSessionId);
    return session?.status === "running" || session?.status === "creating";
  });

  // Pre-provisioning derived state
  const isPreProvisioning = preProvisioning.status === "provisioning";
  const isPreProvisioningReady = preProvisioning.status === "ready";

  // Effect: Handle session changes based on URL
  useEffect(() => {
    const prevExistingSessionId = prevExistingSessionIdRef.current;
    prevExistingSessionIdRef.current = existingSessionId;

    // Handle navigation to "new build" (no session ID in URL)
    if (existingSessionId === null) {
      // Clear current session
      if (currentSessionId !== null) {
        setCurrentSession(null);
      }

      // Reset state when transitioning FROM a session TO new build
      // This ensures we fetch fresh pre-provisioned status from backend
      if (prevExistingSessionId !== null) {
        setControllerTriggered(null);
        // Clear pre-provisioned state to force a fresh check from backend
        useBuildSessionStore.setState({ preProvisioning: { status: "idle" } });
      }

      // Trigger pre-provisioning if conditions are met
      const canTrigger =
        controllerState.lastTriggeredForUrl !== "new-build" &&
        (preProvisioning.status === "idle" ||
          preProvisioning.status === "failed") &&
        hasCompletedOnboarding &&
        hasAnyProvider;

      // Also trigger retry if failed and retry time has passed
      const shouldRetry =
        preProvisioning.status === "failed" &&
        Date.now() >= preProvisioning.retryAt &&
        hasCompletedOnboarding &&
        hasAnyProvider;

      if (canTrigger || shouldRetry) {
        setControllerTriggered("new-build");
        ensurePreProvisionedSession();
      }
      return;
    }

    // Navigating to a session - reset the trigger state for next new build visit
    if (controllerState.lastTriggeredForUrl === "new-build") {
      setControllerTriggered(null);
    }

    // Handle navigation to existing session
    async function fetchSession() {
      if (!existingSessionId) return;

      // Mark as loaded BEFORE any async work to prevent duplicate calls
      setControllerLoaded(existingSessionId);

      // Access sessions via getState() to avoid dependency on Map reference
      const currentState = useBuildSessionStore.getState();
      const cachedSession = currentState.sessions.get(existingSessionId);

      if (cachedSession?.isLoaded) {
        // Just switch to it
        setCurrentSession(existingSessionId);
        return;
      }

      // Need to load from API
      await loadSession(existingSessionId);
    }

    // Only fetch if we haven't already loaded this session
    const currentState = useBuildSessionStore.getState();
    const currentSessionData = currentState.currentSessionId
      ? currentState.sessions.get(currentState.currentSessionId)
      : null;
    // Only block loading during active LLM streaming ("running").
    // "creating" means sandbox restore, which should not prevent
    // navigating to and loading a different session.
    const isCurrentlyStreaming = currentSessionData?.status === "running";

    if (
      controllerState.loadedSessionId !== existingSessionId &&
      !isCurrentlyStreaming
    ) {
      fetchSession();
    } else if (currentSessionId !== existingSessionId) {
      // Session is cached, just switch to it
      setCurrentSession(existingSessionId);
    }
  }, [
    existingSessionId,
    currentSessionId,
    setCurrentSession,
    loadSession,
    preProvisioning,
    ensurePreProvisionedSession,
    hasCompletedOnboarding,
    hasAnyProvider,
    controllerState.lastTriggeredForUrl,
    controllerState.loadedSessionId,
    setControllerTriggered,
    setControllerLoaded,
  ]);

  // Effect: Auto-retry provisioning after backoff period
  // When provisioning fails, we set a retryAt timestamp. This effect schedules
  // a timer to retry after the backoff period elapses.
  useEffect(() => {
    // Only set up timer if in failed state and on new-build page
    if (
      preProvisioning.status !== "failed" ||
      existingSessionId !== null ||
      !hasCompletedOnboarding ||
      !hasAnyProvider
    ) {
      return;
    }

    const msUntilRetry = preProvisioning.retryAt - Date.now();

    // If retry time has already passed, trigger immediately
    if (msUntilRetry <= 0) {
      console.info("[PreProvision] Retry time passed, retrying now...");
      ensurePreProvisionedSession();
      return;
    }

    // Schedule retry after backoff period
    console.info(
      `[PreProvision] Scheduling retry in ${Math.round(msUntilRetry / 1000)}s`
    );
    const timerId = setTimeout(() => {
      console.info("[PreProvision] Backoff elapsed, retrying...");
      ensurePreProvisionedSession();
    }, msUntilRetry);

    return () => clearTimeout(timerId);
  }, [
    preProvisioning,
    existingSessionId,
    hasCompletedOnboarding,
    hasAnyProvider,
    ensurePreProvisionedSession,
  ]);

  // Effect: Re-validate pre-provisioned session on tab focus (multi-tab support)
  // Uses checkPreProvisionedSession API to validate without resetting state,
  // which prevents unnecessary cascading effects when session is still valid.
  useEffect(() => {
    const handleFocus = async () => {
      const { preProvisioning } = useBuildSessionStore.getState();

      // Only re-validate if we have a "ready" pre-provisioned session
      if (preProvisioning.status === "ready") {
        const cachedSessionId = preProvisioning.sessionId;

        try {
          // Check if session is still valid WITHOUT resetting state
          const { valid } = await checkPreProvisionedSession(cachedSessionId);

          if (!valid) {
            // Session was consumed by another tab - now reset and re-provision
            console.info(
              `[PreProvision] Session ${cachedSessionId.slice(
                0,
                8
              )} invalidated on focus, re-provisioning...`
            );
            useBuildSessionStore.setState({
              preProvisioning: { status: "idle" },
            });
            const newSessionId = await useBuildSessionStore
              .getState()
              .ensurePreProvisionedSession();

            if (newSessionId) {
              console.info(
                `[PreProvision] Session changed on focus: ${cachedSessionId.slice(
                  0,
                  8
                )} -> ${newSessionId.slice(0, 8)}`
              );
            }
          }
          // If valid, do nothing - keep the current session
        } catch (error) {
          // On error, log but don't reset - better to keep potentially stale session
          // than to cause UI flicker on network blip
          console.warn(
            "[PreProvision] Failed to validate session on focus:",
            error
          );
        }
      }
    };

    window.addEventListener("focus", handleFocus);
    return () => window.removeEventListener("focus", handleFocus);
  }, []);

  /**
   * Navigate to a specific session
   */
  const navigateToSession = useCallback(
    (sessionId: string) => {
      router.push(
        `${CRAFT_PATH}?${CRAFT_SEARCH_PARAM_NAMES.SESSION_ID}=${sessionId}`
      );
    },
    [router]
  );

  /**
   * Navigate to new build (clear session)
   * Note: We intentionally don't abort the current session's stream,
   * allowing it to continue in the background.
   */
  const navigateToNewBuild = useCallback(() => {
    router.push(CRAFT_PATH);
  }, [router]);

  // Poll to verify pre-provisioned session is still valid (multi-tab support)
  // Only poll on welcome page (existingSessionId === null) - no point polling on session pages
  usePreProvisionPolling({ enabled: existingSessionId === null });

  return {
    currentSessionId,
    isLoading,
    isStreaming,
    navigateToSession,
    navigateToNewBuild,
    // Pre-provisioning state
    isPreProvisioning,
    isPreProvisioningReady,
    preProvisioning,
  };
}


================================================
FILE: web/src/app/craft/hooks/useBuildSessionStore.ts
================================================
"use client";

import { create } from "zustand";
import { getDemoDataEnabled } from "@/app/craft/v1/constants";
import {
  getBuildUserPersona,
  getBuildLlmSelection,
} from "@/app/craft/onboarding/constants";
import { DELETE_SUCCESS_DISPLAY_DURATION_MS } from "@/app/craft/constants";

import {
  ApiSandboxResponse,
  Artifact,
  ArtifactType,
  BuildMessage,
  Session,
  SessionHistoryItem,
  SessionStatus,
  ToolCall,
  ToolCallStatus,
} from "@/app/craft/types/streamingTypes";

import {
  StreamItem,
  ToolCallState,
  TodoListState,
} from "@/app/craft/types/displayTypes";

import {
  createSession as apiCreateSession,
  fetchSession,
  fetchSessionHistory,
  generateSessionName,
  updateSessionName,
  deleteSession as apiDeleteSession,
  fetchMessages,
  fetchArtifacts,
  restoreSession,
} from "@/app/craft/services/apiServices";

import { genId } from "@/app/craft/utils/streamItemHelpers";
import { parsePacket } from "@/app/craft/utils/parsePacket";

/**
 * Convert loaded messages (with message_metadata) to StreamItem[] format.
 *
 * The backend stores messages with these packet types in message_metadata:
 * - user_message: {type: "user_message", content: {type: "text", text: "..."}}
 * - agent_message: {type: "agent_message", content: {type: "text", text: "..."}}
 * - agent_thought: {type: "agent_thought", content: {type: "text", text: "..."}}
 * - tool_call_progress: Full tool call data with status="completed"
 * - agent_plan_update: Plan entries (not rendered as stream items)
 *
 * This function converts agent messages to StreamItem[] for rendering.
 */
function convertMessagesToStreamItems(messages: BuildMessage[]): StreamItem[] {
  const items: StreamItem[] = [];

  for (const message of messages) {
    if (message.type === "user") continue;

    const metadata = message.message_metadata;
    if (!metadata || typeof metadata !== "object") continue;

    // SAME parsePacket — identical classification for both code paths
    const packet = parsePacket(metadata);

    switch (packet.type) {
      case "text_chunk":
        if (packet.text) {
          items.push({
            type: "text",
            id: message.id || genId("text"),
            content: packet.text,
            isStreaming: false,
          });
        }
        break;

      case "thinking_chunk":
        if (packet.text) {
          items.push({
            type: "thinking",
            id: message.id || genId("thinking"),
            content: packet.text,
            isStreaming: false,
          });
        }
        break;

      case "tool_call_progress":
        if (packet.isTodo) {
          // Upsert: update existing todo_list or create new one
          const existingIdx = items.findIndex(
            (item) =>
              item.type === "todo_list" &&
              item.todoList.id === packet.toolCallId
          );
          if (existingIdx >= 0) {
            const existing = items[existingIdx];
            if (existing && existing.type === "todo_list") {
              items[existingIdx] = {
                ...existing,
                todoList: { ...existing.todoList, todos: packet.todos },
              };
            }
          } else {
            items.push({
              type: "todo_list",
              id: packet.toolCallId,
              todoList: {
                id: packet.toolCallId,
                todos: packet.todos,
                isOpen: false,
              },
            });
          }
        } else {
          items.push({
            type: "tool_call",
            id: packet.toolCallId,
            toolCall: {
              id: packet.toolCallId,
              kind: packet.kind,
              title: packet.title,
              description: packet.description,
              command: packet.command,
              status: packet.status,
              rawOutput: packet.rawOutput,
              subagentType: packet.subagentType ?? undefined,
              isNewFile: packet.isNewFile,
              oldContent: packet.oldContent,
              newContent: packet.newContent,
            },
          });
        }
        break;

      // agent_plan_update and other packet types are not rendered as stream items
      default:
        break;
    }
  }

  return items;
}

/**
 * Consolidate raw backend messages into proper conversation turns.
 *
 * The backend stores each streaming packet as a separate message. This function:
 * 1. Groups consecutive agent messages (between user messages) into turns
 * 2. Converts each group's packets to streamItems
 * 3. Creates consolidated messages with streamItems in message_metadata
 *
 * Returns: Array of consolidated messages (user messages + one agent message per turn)
 */
function consolidateMessagesIntoTurns(
  rawMessages: BuildMessage[]
): BuildMessage[] {
  const consolidated: BuildMessage[] = [];
  let currentAgentPackets: BuildMessage[] = [];

  for (const message of rawMessages) {
    if (message.type === "user") {
      // If we have accumulated agent packets, consolidate them into one message
      if (currentAgentPackets.length > 0) {
        const streamItems = convertMessagesToStreamItems(currentAgentPackets);
        const textContent = streamItems
          .filter((item) => item.type === "text")
          .map((item) => item.content)
          .join("");

        consolidated.push({
          id: currentAgentPackets[0]?.id || genId("agent-msg"),
          type: "assistant",
          content: textContent,
          timestamp: currentAgentPackets[0]?.timestamp || new Date(),
          message_metadata: {
            streamItems,
          },
        });
        currentAgentPackets = [];
      }
      // Add the user message as-is
      consolidated.push(message);
    } else if (message.type === "assistant") {
      // Check if this message already has consolidated streamItems (from new format)
      if (message.message_metadata?.streamItems) {
        // Already consolidated, add as-is
        if (currentAgentPackets.length > 0) {
          // Flush any pending packets first
          const streamItems = convertMessagesToStreamItems(currentAgentPackets);
          const textContent = streamItems
            .filter((item) => item.type === "text")
            .map((item) => item.content)
            .join("");

          consolidated.push({
            id: currentAgentPackets[0]?.id || genId("agent-msg"),
            type: "assistant",
            content: textContent,
            timestamp: currentAgentPackets[0]?.timestamp || new Date(),
            message_metadata: {
              streamItems,
            },
          });
          currentAgentPackets = [];
        }
        consolidated.push(message);
      } else {
        // Old format - accumulate for consolidation
        currentAgentPackets.push(message);
      }
    }
  }

  // Don't forget any trailing agent packets
  if (currentAgentPackets.length > 0) {
    const streamItems = convertMessagesToStreamItems(currentAgentPackets);
    const textContent = streamItems
      .filter((item) => item.type === "text")
      .map((item) => item.content)
      .join("");

    consolidated.push({
      id: currentAgentPackets[0]?.id || genId("agent-msg"),
      type: "assistant",
      content: textContent,
      timestamp: currentAgentPackets[0]?.timestamp || new Date(),
      message_metadata: {
        streamItems,
      },
    });
  }

  return consolidated;
}

// Re-export types for consumers
export type {
  Artifact,
  ArtifactType,
  BuildMessage,
  Session,
  SessionHistoryItem,
  SessionStatus,
  ToolCall,
  ToolCallStatus,
};

// =============================================================================
// Store Types (mirrors chat's useChatSessionStore pattern)
// =============================================================================

/** Pre-provisioning state machine - exactly one of these states at a time */
export type PreProvisioningState =
  | { status: "idle" }
  | { status: "provisioning"; demoDataEnabled: boolean }
  | { status: "ready"; sessionId: string; demoDataEnabled: boolean }
  | { status: "failed"; error: string; retryCount: number; retryAt: number };

// Module-level variable to store the provisioning promise (not in Zustand state for serializability)
let provisioningPromise: Promise<string | null> | null = null;

/** File preview tab data */
export interface FilePreviewTab {
  path: string;
  fileName: string;
}

/** Files tab state - persisted across tab switches */
export interface FilesTabState {
  expandedPaths: string[];
  scrollTop: number;
  /** Cached directory listings by path - avoids refetch on tab switch */
  directoryCache: Record<string, unknown[]>;
}

/** Tab history entry - can be a pinned tab or a file preview */
export type TabHistoryEntry =
  | { type: "pinned"; tab: OutputTabType }
  | { type: "file"; path: string };

/** Browser-style tab navigation history */
export interface TabNavigationHistory {
  entries: TabHistoryEntry[];
  currentIndex: number;
}

/** Follow-up suggestion bubble */
export interface SuggestionBubble {
  theme: "add" | "question";
  text: string;
}

/** Output panel tab types */
export type OutputTabType = "preview" | "files" | "artifacts";

export interface BuildSessionData {
  id: string;
  status: SessionStatus;
  messages: BuildMessage[];
  artifacts: Artifact[];
  /** Active tool calls for the current response */
  toolCalls: ToolCall[];
  /**
   * FIFO stream items for the current agent turn.
   * Items are stored in chronological order as they arrive.
   * Rendered directly without transformation.
   */
  streamItems: StreamItem[];
  error: string | null;
  webappUrl: string | null;
  /** Sandbox info from backend */
  sandbox: ApiSandboxResponse | null;
  abortController: AbortController;
  lastAccessed: Date;
  isLoaded: boolean;
  outputPanelOpen: boolean;
  /** Counter to trigger webapp refresh when web/ files change (increments on each edit) */
  webappNeedsRefresh: number;
  /** Counter to trigger files list refresh when outputs/ directory changes (increments on each write/edit) */
  filesNeedsRefresh: number;
  /** File preview tabs open in this session */
  filePreviewTabs: FilePreviewTab[];
  /** Active pinned tab in output panel */
  activeOutputTab: OutputTabType;
  /** Active file preview path (when set, this is the active tab instead of pinned tab) */
  activeFilePreviewPath: string | null;
  /** Files tab state - expanded folders and scroll position */
  filesTabState: FilesTabState;
  /** Browser-style tab navigation history for back/forward */
  tabHistory: TabNavigationHistory;
  /** Follow-up suggestions after first agent message */
  followupSuggestions: SuggestionBubble[] | null;
  /** Whether suggestions are currently being generated */
  suggestionsLoading: boolean;
}

interface BuildSessionStore {
  // Session management (mirrors chat)
  currentSessionId: string | null;
  sessions: Map<string, BuildSessionData>;
  sessionHistory: SessionHistoryItem[];

  // Pre-provisioning state (discriminated union - see PreProvisioningState type)
  preProvisioning: PreProvisioningState;

  // Controller state (replaces refs in useBuildSessionController for better race condition handling)
  controllerState: {
    /** Tracks which URL we've triggered provisioning for (prevents re-triggering) */
    lastTriggeredForUrl: string | null;
    /** Tracks which session ID has been loaded (prevents duplicate API calls) */
    loadedSessionId: string | null;
  };

  // Temporary output panel state when no session exists (resets when session is created/cleared)
  noSessionOutputPanelOpen: boolean;

  // Temporary active tab when no session exists (resets when session is created/cleared)
  noSessionActiveOutputTab: OutputTabType;

  // Actions - Session Management
  setCurrentSession: (sessionId: string | null) => void;
  createSession: (
    sessionId: string,
    initialData?: Partial<BuildSessionData>
  ) => void;
  updateSessionData: (
    sessionId: string,
    updates: Partial<BuildSessionData>
  ) => void;

  // Actions - Current Session Shortcuts
  setCurrentSessionStatus: (status: SessionStatus) => void;
  appendMessageToCurrent: (message: BuildMessage) => void;
  updateLastMessageInCurrent: (content: string) => void;
  addArtifactToCurrent: (artifact: Artifact) => void;
  setCurrentError: (error: string | null) => void;
  setCurrentOutputPanelOpen: (open: boolean) => void;
  toggleCurrentOutputPanel: () => void;

  // Actions - Session-specific operations (for streaming - immune to currentSessionId changes)
  appendMessageToSession: (sessionId: string, message: BuildMessage) => void;
  updateLastMessageInSession: (sessionId: string, content: string) => void;
  updateMessageByIdInSession: (
    sessionId: string,
    messageId: string,
    content: string
  ) => void;
  addArtifactToSession: (sessionId: string, artifact: Artifact) => void;

  // Actions - Tool Call Management
  addToolCallToSession: (sessionId: string, toolCall: ToolCall) => void;
  updateToolCallInSession: (
    sessionId: string,
    toolCallId: string,
    updates: Partial<ToolCall>
  ) => void;
  clearToolCallsInSession: (sessionId: string) => void;

  // Actions - Stream Items (FIFO rendering)
  appendStreamItem: (sessionId: string, item: StreamItem) => void;
  updateStreamItem: (
    sessionId: string,
    itemId: string,
    updates: Partial<StreamItem>
  ) => void;
  updateLastStreamingText: (sessionId: string, content: string) => void;
  updateLastStreamingThinking: (sessionId: string, content: string) => void;
  updateToolCallStreamItem: (
    sessionId: string,
    toolCallId: string,
    updates: Partial<ToolCallState>
  ) => void;
  updateTodoListStreamItem: (
    sessionId: string,
    todoListId: string,
    updates: Partial<TodoListState>
  ) => void;
  upsertTodoListStreamItem: (
    sessionId: string,
    todoListId: string,
    todoList: TodoListState
  ) => void;
  clearStreamItems: (sessionId: string) => void;

  // Actions - Abort Control
  setAbortController: (sessionId: string, controller: AbortController) => void;
  abortSession: (sessionId: string) => void;
  abortCurrentSession: () => void;

  // Actions - Session Lifecycle
  createNewSession: (prompt: string) => Promise<string | null>;
  loadSession: (sessionId: string) => Promise<void>;

  // Actions - Session History
  refreshSessionHistory: () => Promise<void>;
  nameBuildSession: (sessionId: string) => Promise<void>;
  renameBuildSession: (sessionId: string, newName: string) => Promise<void>;
  deleteBuildSession: (sessionId: string) => Promise<void>;

  // Utilities
  cleanupOldSessions: (maxSessions?: number) => void;

  // Pre-provisioning Actions
  ensurePreProvisionedSession: () => Promise<string | null>;
  consumePreProvisionedSession: () => Promise<string | null>;
  /** Clear and delete any pre-provisioned session (used when settings change) */
  clearPreProvisionedSession: () => Promise<void>;

  // Controller State Actions (for useBuildSessionController - replaces refs)
  setControllerTriggered: (url: string | null) => void;
  setControllerLoaded: (sessionId: string | null) => void;
  resetControllerState: () => void;

  // Webapp Refresh Actions
  triggerWebappRefresh: (sessionId: string) => void;
  // Files Refresh Actions
  triggerFilesRefresh: (sessionId: string) => void;

  // File Preview Actions
  openFilePreview: (sessionId: string, path: string, fileName: string) => void;
  /** Atomically open panel + create file tab + set active for a markdown file detected during streaming */
  openMarkdownPreview: (sessionId: string, filePath: string) => void;
  closeFilePreview: (sessionId: string, path: string) => void;
  setActiveOutputTab: (sessionId: string, tab: OutputTabType) => void;
  setActiveFilePreviewPath: (sessionId: string, path: string | null) => void;
  /** Set active tab when no session exists (for pre-provisioned sandbox viewing) */
  setNoSessionActiveOutputTab: (tab: OutputTabType) => void;

  // Files Tab State Actions
  updateFilesTabState: (
    sessionId: string,
    updates: Partial<FilesTabState>
  ) => void;

  // Tab Navigation History Actions
  navigateTabBack: (sessionId: string) => void;
  navigateTabForward: (sessionId: string) => void;

  // Follow-up Suggestion Actions
  setFollowupSuggestions: (
    sessionId: string,
    suggestions: SuggestionBubble[] | null
  ) => void;
  setSuggestionsLoading: (sessionId: string, loading: boolean) => void;
  clearFollowupSuggestions: (sessionId: string) => void;
}

// =============================================================================
// Initial State Factory
// =============================================================================

const createInitialSessionData = (
  sessionId: string,
  initialData?: Partial<BuildSessionData>
): BuildSessionData => ({
  id: sessionId,
  status: "idle",
  messages: [],
  artifacts: [],
  toolCalls: [],
  streamItems: [],
  error: null,
  webappUrl: null,
  sandbox: null,
  abortController: new AbortController(),
  lastAccessed: new Date(),
  isLoaded: false,
  outputPanelOpen: false,
  webappNeedsRefresh: 0,
  filesNeedsRefresh: 0,
  filePreviewTabs: [],
  activeOutputTab: "preview",
  activeFilePreviewPath: null,
  filesTabState: { expandedPaths: [], scrollTop: 0, directoryCache: {} },
  tabHistory: {
    entries: [{ type: "pinned", tab: "preview" }],
    currentIndex: 0,
  },
  followupSuggestions: null,
  suggestionsLoading: false,
  ...initialData,
});

// =============================================================================
// Store
// =============================================================================

export const useBuildSessionStore = create<BuildSessionStore>()((set, get) => ({
  currentSessionId: null,
  sessions: new Map<string, BuildSessionData>(),
  sessionHistory: [],

  // Pre-provisioning state
  preProvisioning: { status: "idle" },

  // Controller state (replaces refs in useBuildSessionController)
  controllerState: {
    lastTriggeredForUrl: null,
    loadedSessionId: null,
  },

  // Temporary output panel state when no session exists (resets when session is created/cleared)
  noSessionOutputPanelOpen: false,

  // Temporary active tab when no session exists
  noSessionActiveOutputTab: "preview" as OutputTabType,

  // ===========================================================================
  // Session Management (mirrors chat's pattern)
  // ===========================================================================

  setCurrentSession: (sessionId: string | null) => {
    set((state) => {
      // If setting to null, clear current session and reset no-session panel state
      if (sessionId === null) {
        return { currentSessionId: null, noSessionOutputPanelOpen: false };
      }

      // If session doesn't exist, create it and inherit output panel state
      if (!state.sessions.has(sessionId)) {
        const newSession = createInitialSessionData(sessionId, {
          outputPanelOpen: state.noSessionOutputPanelOpen,
        });
        const newSessions = new Map(state.sessions);
        newSessions.set(sessionId, newSession);
        return {
          currentSessionId: sessionId,
          sessions: newSessions,
          noSessionOutputPanelOpen: false,
        };
      }

      // Update last accessed for existing session and reset no-session panel state
      const session = state.sessions.get(sessionId)!;
      const updatedSession = { ...session, lastAccessed: new Date() };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);

      return {
        currentSessionId: sessionId,
        sessions: newSessions,
        noSessionOutputPanelOpen: false,
      };
    });
  },

  // Initialize local session state (does NOT create backend session - use apiCreateSession for that)
  createSession: (
    sessionId: string,
    initialData?: Partial<BuildSessionData>
  ) => {
    set((state) => {
      // Inherit output panel state from no-session state if not explicitly set
      const outputPanelOpen =
        initialData?.outputPanelOpen ?? state.noSessionOutputPanelOpen;
      const newSession = createInitialSessionData(sessionId, {
        ...initialData,
        outputPanelOpen,
      });
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, newSession);
      return { sessions: newSessions };
    });
  },

  updateSessionData: (
    sessionId: string,
    updates: Partial<BuildSessionData>
  ) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        ...updates,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  // ===========================================================================
  // Current Session Shortcuts
  // ===========================================================================

  setCurrentSessionStatus: (status: SessionStatus) => {
    const { currentSessionId, updateSessionData } = get();
    if (currentSessionId) {
      updateSessionData(currentSessionId, { status });
    }
  },

  appendMessageToCurrent: (message: BuildMessage) => {
    const { currentSessionId } = get();
    if (!currentSessionId) return;

    set((state) => {
      const currentSession = state.sessions.get(currentSessionId);
      if (!currentSession) return state;

      const updatedSession: BuildSessionData = {
        ...currentSession,
        messages: [...currentSession.messages, message],
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(currentSessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  updateLastMessageInCurrent: (content: string) => {
    const { currentSessionId } = get();
    if (!currentSessionId) return;

    set((state) => {
      const session = state.sessions.get(currentSessionId);
      if (!session || session.messages.length === 0) return state;

      const messages = session.messages.map((msg, idx) =>
        idx === session.messages.length - 1 ? { ...msg, content } : msg
      );
      const updatedSession: BuildSessionData = {
        ...session,
        messages,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(currentSessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  addArtifactToCurrent: (artifact: Artifact) => {
    const { currentSessionId } = get();
    if (!currentSessionId) return;

    set((state) => {
      const session = state.sessions.get(currentSessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        artifacts: [...session.artifacts, artifact],
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(currentSessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  setCurrentError: (error: string | null) => {
    const { currentSessionId, updateSessionData } = get();
    if (currentSessionId) {
      updateSessionData(currentSessionId, { error });
    }
  },

  setCurrentOutputPanelOpen: (open: boolean) => {
    const { currentSessionId, updateSessionData } = get();
    if (currentSessionId) {
      updateSessionData(currentSessionId, { outputPanelOpen: open });
    } else {
      // No session - update temporary state
      set({ noSessionOutputPanelOpen: open });
    }
  },

  toggleCurrentOutputPanel: () => {
    const {
      currentSessionId,
      sessions,
      updateSessionData,
      noSessionOutputPanelOpen,
    } = get();
    if (currentSessionId) {
      const session = sessions.get(currentSessionId);
      if (session) {
        updateSessionData(currentSessionId, {
          outputPanelOpen: !session.outputPanelOpen,
        });
      }
    } else {
      // No session - toggle temporary state
      set({ noSessionOutputPanelOpen: !noSessionOutputPanelOpen });
    }
  },

  // ===========================================================================
  // Session-specific operations (for streaming - immune to currentSessionId changes)
  // ===========================================================================

  appendMessageToSession: (sessionId: string, message: BuildMessage) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        messages: [...session.messages, message],
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  updateLastMessageInSession: (sessionId: string, content: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session || session.messages.length === 0) return state;

      const messages = session.messages.map((msg, idx) =>
        idx === session.messages.length - 1 ? { ...msg, content } : msg
      );
      const updatedSession: BuildSessionData = {
        ...session,
        messages,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  updateMessageByIdInSession: (
    sessionId: string,
    messageId: string,
    content: string
  ) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const messages = session.messages.map((msg) =>
        msg.id === messageId ? { ...msg, content } : msg
      );
      const updatedSession: BuildSessionData = {
        ...session,
        messages,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  addArtifactToSession: (sessionId: string, artifact: Artifact) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        artifacts: [...session.artifacts, artifact],
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  // ===========================================================================
  // Tool Call Management
  // ===========================================================================

  addToolCallToSession: (sessionId: string, toolCall: ToolCall) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        toolCalls: [...session.toolCalls, toolCall],
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  updateToolCallInSession: (
    sessionId: string,
    toolCallId: string,
    updates: Partial<ToolCall>
  ) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const toolCalls = session.toolCalls.map((tc) =>
        tc.id === toolCallId ? { ...tc, ...updates } : tc
      );
      const updatedSession: BuildSessionData = {
        ...session,
        toolCalls,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  clearToolCallsInSession: (sessionId: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        toolCalls: [],
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  // ===========================================================================
  // Stream Items (FIFO rendering)
  // ===========================================================================

  appendStreamItem: (sessionId: string, item: StreamItem) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        streamItems: [...session.streamItems, item],
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  updateStreamItem: (
    sessionId: string,
    itemId: string,
    updates: Partial<StreamItem>
  ) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const streamItems = session.streamItems.map((item) =>
        item.id === itemId ? { ...item, ...updates } : item
      ) as StreamItem[];
      const updatedSession: BuildSessionData = {
        ...session,
        streamItems,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  updateLastStreamingText: (sessionId: string, content: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      // Find the last text item that is streaming
      const items = [...session.streamItems];
      for (let i = items.length - 1; i >= 0; i--) {
        const item = items[i];
        if (item && item.type === "text" && item.isStreaming) {
          items[i] = { ...item, content };
          break;
        }
      }

      const updatedSession: BuildSessionData = {
        ...session,
        streamItems: items,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  updateLastStreamingThinking: (sessionId: string, content: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      // Find the last thinking item that is streaming
      const items = [...session.streamItems];
      for (let i = items.length - 1; i >= 0; i--) {
        const item = items[i];
        if (item && item.type === "thinking" && item.isStreaming) {
          items[i] = { ...item, content };
          break;
        }
      }

      const updatedSession: BuildSessionData = {
        ...session,
        streamItems: items,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  updateToolCallStreamItem: (
    sessionId: string,
    toolCallId: string,
    updates: Partial<ToolCallState>
  ) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const streamItems = session.streamItems.map((item) => {
        if (item.type === "tool_call" && item.toolCall.id === toolCallId) {
          return {
            ...item,
            toolCall: { ...item.toolCall, ...updates },
          };
        }
        return item;
      }) as StreamItem[];

      const updatedSession: BuildSessionData = {
        ...session,
        streamItems,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  updateTodoListStreamItem: (
    sessionId: string,
    todoListId: string,
    updates: Partial<TodoListState>
  ) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const streamItems = session.streamItems.map((item) => {
        if (item.type === "todo_list" && item.todoList.id === todoListId) {
          return {
            ...item,
            todoList: { ...item.todoList, ...updates },
          };
        }
        return item;
      }) as StreamItem[];

      const updatedSession: BuildSessionData = {
        ...session,
        streamItems,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  upsertTodoListStreamItem: (
    sessionId: string,
    todoListId: string,
    todoList: TodoListState
  ) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      // Check if a todo_list with this ID already exists
      const existingIndex = session.streamItems.findIndex(
        (item) => item.type === "todo_list" && item.todoList.id === todoListId
      );

      let streamItems: StreamItem[];
      if (existingIndex >= 0) {
        // Update existing todo_list
        streamItems = session.streamItems.map((item, index) => {
          if (index === existingIndex && item.type === "todo_list") {
            return {
              ...item,
              todoList: { ...item.todoList, ...todoList },
            };
          }
          return item;
        }) as StreamItem[];
      } else {
        // Create new todo_list item
        streamItems = [
          ...session.streamItems,
          {
            type: "todo_list" as const,
            id: todoListId,
            todoList,
          },
        ];
      }

      const updatedSession: BuildSessionData = {
        ...session,
        streamItems,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  clearStreamItems: (sessionId: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        streamItems: [],
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  // ===========================================================================
  // Abort Control (mirrors chat's per-session pattern)
  // ===========================================================================

  setAbortController: (sessionId: string, controller: AbortController) => {
    get().updateSessionData(sessionId, { abortController: controller });
  },

  abortSession: (sessionId: string) => {
    const session = get().sessions.get(sessionId);
    if (session?.abortController) {
      session.abortController.abort();
      get().updateSessionData(sessionId, {
        abortController: new AbortController(),
      });
    }
  },

  abortCurrentSession: () => {
    const { currentSessionId, abortSession } = get();
    if (currentSessionId) {
      abortSession(currentSessionId);
    }
  },

  // ===========================================================================
  // Session Lifecycle
  // ===========================================================================

  createNewSession: async (prompt: string) => {
    const {
      setCurrentSession,
      updateSessionData,
      refreshSessionHistory,
      nameBuildSession,
    } = get();
    // Read from cookie - single source of truth
    const demoDataEnabled = getDemoDataEnabled();

    // Create a temporary session ID for optimistic UI
    const tempId = `temp-${Date.now()}`;
    setCurrentSession(tempId);
    updateSessionData(tempId, { status: "creating" });

    try {
      // Get LLM selection from cookie
      const llmSelection = getBuildLlmSelection();
      const sessionData = await apiCreateSession({
        name: prompt.slice(0, 50),
        demoDataEnabled,
        llmProviderType: llmSelection?.provider || null,
        llmModelName: llmSelection?.modelName || null,
      });
      const realSessionId = sessionData.id;

      // Remove temp session and create real one
      set((state) => {
        const newSessions = new Map(state.sessions);
        newSessions.delete(tempId);
        newSessions.set(
          realSessionId,
          createInitialSessionData(realSessionId, {
            status: "idle",
            messages: [
              {
                id: `msg-${Date.now()}`,
                type: "user",
                content: prompt,
                timestamp: new Date(),
              },
            ],
            isLoaded: true,
            // Inherit output panel state from no-session state
            outputPanelOpen: state.noSessionOutputPanelOpen,
          })
        );
        return {
          currentSessionId: realSessionId,
          sessions: newSessions,
        };
      });

      // Auto-name the session after a short delay
      setTimeout(() => {
        nameBuildSession(realSessionId);
      }, 200);

      await refreshSessionHistory();
      return realSessionId;
    } catch (err) {
      console.error("Failed to create session:", err);
      updateSessionData(tempId, {
        status: "failed",
        error: (err as Error).message,
      });
      return null;
    }
  },

  loadSession: async (sessionId: string) => {
    const { setCurrentSession, updateSessionData, sessions } = get();

    // Check if already loaded in cache
    const existingSession = sessions.get(sessionId);
    if (existingSession?.isLoaded) {
      setCurrentSession(sessionId);
      return;
    }

    // Set as current and mark as loading
    setCurrentSession(sessionId);

    try {
      // First fetch session to check sandbox status
      let sessionData = await fetchSession(sessionId);

      // Check if session needs to be restored:
      // - Sandbox is sleeping or terminated
      // - Sandbox is running but session workspace is not loaded
      const needsRestore =
        sessionData.sandbox?.status === "sleeping" ||
        sessionData.sandbox?.status === "terminated" ||
        (sessionData.sandbox?.status === "running" &&
          !sessionData.session_loaded_in_sandbox);

      if (needsRestore) {
        // Show sandbox as "restoring" while we load messages + restore
        updateSessionData(sessionId, {
          status: "creating",
          sandbox: sessionData.sandbox
            ? { ...sessionData.sandbox, status: "restoring" }
            : null,
        });
      }

      // Messages come from DB and don't need the sandbox running.
      // Artifacts need sandbox filesystem, so skip during restore.
      const messages = await fetchMessages(sessionId);
      const artifacts = needsRestore ? [] : await fetchArtifacts(sessionId);

      // Preserve optimistic messages if actively streaming (pre-provisioned flow).
      const currentSession = get().sessions.get(sessionId);
      const isStreaming =
        (currentSession?.messages?.length ?? 0) > 0 &&
        (currentSession?.status === "running" ||
          currentSession?.status === "creating");

      // Construct webapp URL
      let webappUrl: string | null = null;
      const hasWebapp = artifacts.some(
        (a) => a.type === "nextjs_app" || a.type === "web_app"
      );
      if (hasWebapp && sessionData.sandbox?.nextjs_port) {
        webappUrl = `http://localhost:${sessionData.sandbox.nextjs_port}`;
      }

      const status = isStreaming
        ? currentSession!.status
        : needsRestore
          ? "creating"
          : sessionData.status === "active"
            ? "active"
            : "idle";
      const resolvedMessages = isStreaming
        ? currentSession!.messages
        : consolidateMessagesIntoTurns(messages);
      const streamItems = isStreaming ? currentSession!.streamItems : [];
      const sandbox =
        needsRestore && sessionData.sandbox
          ? { ...sessionData.sandbox, status: "restoring" as const }
          : sessionData.sandbox;

      updateSessionData(sessionId, {
        status,
        messages: resolvedMessages,
        streamItems,
        artifacts,
        webappUrl,
        sandbox,
        error: null,
        isLoaded: true,
      });

      // Now restore the sandbox if needed (messages are already visible).
      // The backend enforces a timeout and returns an error if restore
      // takes too long, so no frontend timeout needed here.
      if (needsRestore) {
        try {
          sessionData = await restoreSession(sessionId);

          // Sandbox is now running - fetch artifacts
          const restoredArtifacts = await fetchArtifacts(sessionId);

          updateSessionData(sessionId, {
            status: sessionData.status === "active" ? "active" : "idle",
            artifacts: restoredArtifacts,
            sandbox: sessionData.sandbox,
            // Bump so OutputPanel's SWR refetches webapp-info (which
            // derives the actual webappUrl from the backend).
            webappNeedsRefresh:
              (get().sessions.get(sessionId)?.webappNeedsRefresh || 0) + 1,
          });
        } catch (restoreErr) {
          console.error("Sandbox restore failed:", restoreErr);
          updateSessionData(sessionId, {
            status: "idle",
            sandbox: sessionData.sandbox
              ? { ...sessionData.sandbox, status: "failed" }
              : null,
          });
        }
      }
    } catch (err) {
      console.error("Failed to load session:", err);
      updateSessionData(sessionId, {
        error: (err as Error).message,
      });
    }
  },

  // ===========================================================================
  // Session History
  // ===========================================================================

  refreshSessionHistory: async () => {
    try {
      const history = await fetchSessionHistory();
      set({ sessionHistory: history });
    } catch (err) {
      console.error("Failed to fetch session history:", err);
    }
  },

  nameBuildSession: async (sessionId: string) => {
    try {
      // Generate name using LLM based on first user message
      const generatedName = await generateSessionName(sessionId);

      // Optimistically update the session title in sessionHistory immediately
      // This triggers the typewriter animation in the sidebar
      set((state) => ({
        sessionHistory: state.sessionHistory.map((item) =>
          item.id === sessionId ? { ...item, title: generatedName } : item
        ),
      }));

      // Persist the name to backend (fire and forget - error handling below)
      await updateSessionName(sessionId, generatedName);
    } catch (err) {
      console.error("Failed to auto-name session:", err);
      // On error, refresh to get the actual state from backend
      await get().refreshSessionHistory();
    }
  },

  renameBuildSession: async (sessionId: string, newName: string) => {
    try {
      await updateSessionName(sessionId, newName);
      set((state) => ({
        sessionHistory: state.sessionHistory.map((item) =>
          item.id === sessionId ? { ...item, title: newName } : item
        ),
      }));
    } catch (err) {
      console.error("Failed to rename session:", err);
      await get().refreshSessionHistory();
      throw err;
    }
  },

  deleteBuildSession: async (sessionId: string) => {
    const { currentSessionId, abortSession, refreshSessionHistory } = get();

    try {
      abortSession(sessionId);
      await apiDeleteSession(sessionId);

      // Remove session from local state
      set((state) => {
        const newSessions = new Map(state.sessions);
        newSessions.delete(sessionId);
        return {
          sessions: newSessions,
          currentSessionId:
            currentSessionId === sessionId ? null : state.currentSessionId,
        };
      });

      // Refresh history after UI has shown success state
      setTimeout(
        () => refreshSessionHistory(),
        DELETE_SUCCESS_DISPLAY_DURATION_MS
      );
    } catch (err) {
      console.error("Failed to delete session:", err);
      throw err;
    }
  },

  // ===========================================================================
  // Utilities (mirrors chat's cleanup pattern)
  // ===========================================================================

  cleanupOldSessions: (maxSessions: number = 10) => {
    set((state) => {
      const sortedSessions = Array.from(state.sessions.entries()).sort(
        ([, a], [, b]) => b.lastAccessed.getTime() - a.lastAccessed.getTime()
      );

      if (sortedSessions.length <= maxSessions) {
        return state;
      }

      const sessionsToKeep = sortedSessions.slice(0, maxSessions);
      const sessionsToRemove = sortedSessions.slice(maxSessions);

      // Abort controllers for sessions being removed
      sessionsToRemove.forEach(([, session]) => {
        if (session.abortController) {
          session.abortController.abort();
        }
      });

      return {
        sessions: new Map(sessionsToKeep),
      };
    });
  },

  // ===========================================================================
  // Pre-provisioning Actions
  // ===========================================================================

  ensurePreProvisionedSession: async () => {
    const { preProvisioning } = get();
    // Read from cookie - single source of truth
    const demoDataEnabled = getDemoDataEnabled();

    // Already have a pre-provisioned session ready
    if (preProvisioning.status === "ready") {
      // If demoDataEnabled matches, return the existing session
      if (preProvisioning.demoDataEnabled === demoDataEnabled) {
        return preProvisioning.sessionId;
      }
      // demoDataEnabled changed - invalidate and re-provision
      const sessionIdToDelete = preProvisioning.sessionId;
      set({ preProvisioning: { status: "idle" } });
      apiDeleteSession(sessionIdToDelete).catch((err) => {
        console.error(
          "[PreProvision] Failed to delete invalidated session:",
          err
        );
      });
      // Fall through to create a new session with the current setting
    }

    // Already provisioning - return existing promise
    if (preProvisioning.status === "provisioning") {
      return provisioningPromise;
    }

    // Handle failed state with retry
    // Capture retryCount BEFORE resetting to idle (so we can increment it on next failure)
    let currentRetryCount = 0;
    if (preProvisioning.status === "failed") {
      currentRetryCount = preProvisioning.retryCount;
      if (Date.now() < preProvisioning.retryAt) {
        // Not yet time to retry
        return null;
      }
      // Time to retry - reset to idle and continue
      set({ preProvisioning: { status: "idle" } });
    }

    // Start new provisioning with current demoDataEnabled value

    const promise = (async (): Promise<string | null> => {
      try {
        // Parse user persona and LLM selection from cookies
        const persona = getBuildUserPersona();
        const llmSelection = getBuildLlmSelection();

        const sessionData = await apiCreateSession({
          demoDataEnabled,
          userWorkArea: persona?.workArea || null,
          userLevel: persona?.level || null,
          llmProviderType: llmSelection?.provider || null,
          llmModelName: llmSelection?.modelName || null,
        });

        provisioningPromise = null; // Clear promise on success
        set({
          preProvisioning: {
            status: "ready",
            sessionId: sessionData.id,
            demoDataEnabled,
          },
        });
        return sessionData.id;
      } catch (err) {
        console.error("[PreProvision] Failed to pre-provision session:", err);
        const errorMessage =
          err instanceof Error ? err.message : "Unknown error";

        // Exponential backoff: 1s, 2s, 4s, 8s, ... max 30s
        const newRetryCount = currentRetryCount + 1;
        const backoffMs = Math.min(
          1000 * Math.pow(2, newRetryCount - 1),
          30000
        );

        provisioningPromise = null; // Clear promise on failure
        set({
          preProvisioning: {
            status: "failed",
            error: errorMessage,
            retryCount: newRetryCount,
            retryAt: Date.now() + backoffMs,
          },
        });
        return null;
      }
    })();

    provisioningPromise = promise;
    set({
      preProvisioning: { status: "provisioning", demoDataEnabled },
    });
    return promise;
  },

  consumePreProvisionedSession: async () => {
    const { preProvisioning } = get();

    // Wait for provisioning to complete if in progress
    if (preProvisioning.status === "provisioning") {
      await provisioningPromise;
    }

    // Re-check state after awaiting (may have changed)
    const { preProvisioning: currentState, sessionHistory } = get();

    if (currentState.status === "ready") {
      const { sessionId } = currentState;

      // Optimistically add to session history so it appears in sidebar immediately
      // (Backend excludes empty sessions, but we're about to send a message)
      const alreadyInHistory = sessionHistory.some(
        (item) => item.id === sessionId
      );
      if (!alreadyInHistory) {
        set({
          sessionHistory: [
            {
              id: sessionId,
              title: "Fresh Craft",
              createdAt: new Date(),
            },
            ...sessionHistory,
          ],
        });
      }

      // Reset to idle and return the session ID
      set({ preProvisioning: { status: "idle" } });
      return sessionId;
    }

    // No session available
    return null;
  },

  clearPreProvisionedSession: async () => {
    const { preProvisioning } = get();

    // If provisioning is in progress, wait for it to complete
    if (preProvisioning.status === "provisioning") {
      await provisioningPromise;
    }

    // Re-check state after awaiting
    const { preProvisioning: currentState } = get();

    if (currentState.status === "ready") {
      const { sessionId } = currentState;

      // Reset to idle first
      set({ preProvisioning: { status: "idle" } });

      // Delete the session and wait for completion
      try {
        await apiDeleteSession(sessionId);
      } catch (err) {
        console.error(
          "[PreProvision] Failed to delete pre-provisioned session:",
          err
        );
      }
    } else {
      // Just reset to idle if not ready
      set({ preProvisioning: { status: "idle" } });
    }
  },

  // ===========================================================================
  // Controller State Actions (replaces refs in useBuildSessionController)
  // ===========================================================================

  setControllerTriggered: (url: string | null) => {
    set((state) => ({
      controllerState: {
        ...state.controllerState,
        lastTriggeredForUrl: url,
      },
    }));
  },

  setControllerLoaded: (sessionId: string | null) => {
    set((state) => ({
      controllerState: {
        ...state.controllerState,
        loadedSessionId: sessionId,
      },
    }));
  },

  resetControllerState: () => {
    set({
      controllerState: {
        lastTriggeredForUrl: null,
        loadedSessionId: null,
      },
    });
  },

  // ===========================================================================
  // Webapp Refresh Actions
  // ===========================================================================

  triggerWebappRefresh: (sessionId: string) => {
    const session = get().sessions.get(sessionId);
    if (session) {
      // Increment refresh counter and open panel if not already open
      // Using a counter ensures each edit triggers a new refresh
      get().updateSessionData(sessionId, {
        webappNeedsRefresh: (session.webappNeedsRefresh || 0) + 1,
        ...(session.outputPanelOpen ? {} : { outputPanelOpen: true }),
      });
    }
  },

  triggerFilesRefresh: (sessionId: string) => {
    const session = get().sessions.get(sessionId);
    if (session) {
      // Increment refresh counter to trigger files list refresh
      // Using a counter ensures each write/edit triggers a new refresh
      // Also collapse the attachments directory to show fresh state
      const collapsedExpandedPaths = session.filesTabState.expandedPaths.filter(
        (path) => path !== "attachments" && !path.startsWith("attachments/")
      );
      get().updateSessionData(sessionId, {
        filesNeedsRefresh: (session.filesNeedsRefresh || 0) + 1,
        filesTabState: {
          ...session.filesTabState,
          expandedPaths: collapsedExpandedPaths,
        },
      });
    }
  },

  // ===========================================================================
  // File Preview Actions
  // ===========================================================================

  openFilePreview: (sessionId: string, path: string, fileName: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      // Check if tab already exists
      const existingTab = session.filePreviewTabs.find(
        (tab) => tab.path === path
      );

      let filePreviewTabs = session.filePreviewTabs;
      if (!existingTab) {
        // Add new tab
        filePreviewTabs = [...session.filePreviewTabs, { path, fileName }];
      }

      // Push to history (truncate forward history if navigating from middle)
      const { tabHistory } = session;
      const newEntry: TabHistoryEntry = { type: "file", path };
      const newEntries = [
        ...tabHistory.entries.slice(0, tabHistory.currentIndex + 1),
        newEntry,
      ];

      const updatedSession: BuildSessionData = {
        ...session,
        filePreviewTabs,
        activeFilePreviewPath: path, // Always switch to this tab
        tabHistory: {
          entries: newEntries,
          currentIndex: newEntries.length - 1,
        },
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  openMarkdownPreview: (sessionId: string, filePath: string) => {
    const fileName = filePath.split("/").pop() || filePath;
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const existingTab = session.filePreviewTabs.find(
        (t) => t.path === filePath
      );
      let filePreviewTabs = session.filePreviewTabs;
      if (!existingTab) {
        filePreviewTabs = [
          ...session.filePreviewTabs,
          { path: filePath, fileName },
        ];
      }

      // Push to history (truncate forward history if navigating from middle)
      const { tabHistory } = session;
      const newEntry: TabHistoryEntry = { type: "file", path: filePath };
      const newEntries = [
        ...tabHistory.entries.slice(0, tabHistory.currentIndex + 1),
        newEntry,
      ];

      const updatedSession: BuildSessionData = {
        ...session,
        outputPanelOpen: true,
        filePreviewTabs,
        activeFilePreviewPath: filePath,
        tabHistory: {
          entries: newEntries,
          currentIndex: newEntries.length - 1,
        },
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  closeFilePreview: (sessionId: string, path: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      // Remove the tab
      const filePreviewTabs = session.filePreviewTabs.filter(
        (tab) => tab.path !== path
      );

      // If closing the active preview tab, switch to Files tab
      const activeFilePreviewPath =
        session.activeFilePreviewPath === path
          ? null
          : session.activeFilePreviewPath;

      // If we closed the active tab, set activeOutputTab to "files"
      const activeOutputTab =
        session.activeFilePreviewPath === path
          ? "files"
          : session.activeOutputTab;

      const updatedSession: BuildSessionData = {
        ...session,
        filePreviewTabs,
        activeFilePreviewPath,
        activeOutputTab,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  setActiveOutputTab: (sessionId: string, tab: OutputTabType) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      // Push to history (truncate forward history if navigating from middle)
      const { tabHistory } = session;
      const newEntry: TabHistoryEntry = { type: "pinned", tab };
      const newEntries = [
        ...tabHistory.entries.slice(0, tabHistory.currentIndex + 1),
        newEntry,
      ];

      const updatedSession: BuildSessionData = {
        ...session,
        activeOutputTab: tab,
        activeFilePreviewPath: null, // Clear file preview when selecting pinned tab
        tabHistory: {
          entries: newEntries,
          currentIndex: newEntries.length - 1,
        },
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  setActiveFilePreviewPath: (sessionId: string, path: string | null) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      // Push to history if switching to a file (truncate forward history)
      const { tabHistory } = session;
      let newTabHistory = tabHistory;
      if (path !== null) {
        const newEntry: TabHistoryEntry = { type: "file", path };
        const newEntries = [
          ...tabHistory.entries.slice(0, tabHistory.currentIndex + 1),
          newEntry,
        ];
        newTabHistory = {
          entries: newEntries,
          currentIndex: newEntries.length - 1,
        };
      }

      const updatedSession: BuildSessionData = {
        ...session,
        activeFilePreviewPath: path,
        tabHistory: newTabHistory,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  setNoSessionActiveOutputTab: (tab: OutputTabType) => {
    set({ noSessionActiveOutputTab: tab });
  },

  // ===========================================================================
  // Files Tab State Actions
  // ===========================================================================

  updateFilesTabState: (sessionId: string, updates: Partial<FilesTabState>) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        filesTabState: { ...session.filesTabState, ...updates },
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  // ===========================================================================
  // Tab Navigation History Actions
  // ===========================================================================

  navigateTabBack: (sessionId: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const { tabHistory } = session;
      if (tabHistory.currentIndex <= 0) return state;

      const newIndex = tabHistory.currentIndex - 1;
      const entry = tabHistory.entries[newIndex];
      if (!entry) return state;

      // Re-open file tab if it was closed
      let filePreviewTabs = session.filePreviewTabs;
      if (entry.type === "file") {
        const tabExists = filePreviewTabs.some(
          (tab) => tab.path === entry.path
        );
        if (!tabExists) {
          // Extract filename from path
          const fileName = entry.path.split("/").pop() || entry.path;
          filePreviewTabs = [
            ...filePreviewTabs,
            { path: entry.path, fileName },
          ];
        }
      }

      const updatedSession: BuildSessionData = {
        ...session,
        tabHistory: { ...tabHistory, currentIndex: newIndex },
        activeOutputTab:
          entry.type === "pinned" ? entry.tab : session.activeOutputTab,
        activeFilePreviewPath: entry.type === "file" ? entry.path : null,
        filePreviewTabs,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  navigateTabForward: (sessionId: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const { tabHistory } = session;
      if (tabHistory.currentIndex >= tabHistory.entries.length - 1)
        return state;

      const newIndex = tabHistory.currentIndex + 1;
      const entry = tabHistory.entries[newIndex];
      if (!entry) return state;

      // Re-open file tab if it was closed
      let filePreviewTabs = session.filePreviewTabs;
      if (entry.type === "file") {
        const tabExists = filePreviewTabs.some(
          (tab) => tab.path === entry.path
        );
        if (!tabExists) {
          // Extract filename from path
          const fileName = entry.path.split("/").pop() || entry.path;
          filePreviewTabs = [
            ...filePreviewTabs,
            { path: entry.path, fileName },
          ];
        }
      }

      const updatedSession: BuildSessionData = {
        ...session,
        tabHistory: { ...tabHistory, currentIndex: newIndex },
        activeOutputTab:
          entry.type === "pinned" ? entry.tab : session.activeOutputTab,
        activeFilePreviewPath: entry.type === "file" ? entry.path : null,
        filePreviewTabs,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  // ===========================================================================
  // Follow-up Suggestion Actions
  // ===========================================================================

  setFollowupSuggestions: (
    sessionId: string,
    suggestions: SuggestionBubble[] | null
  ) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        followupSuggestions: suggestions,
        suggestionsLoading: false,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  setSuggestionsLoading: (sessionId: string, loading: boolean) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        suggestionsLoading: loading,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },

  clearFollowupSuggestions: (sessionId: string) => {
    set((state) => {
      const session = state.sessions.get(sessionId);
      if (!session) return state;

      const updatedSession: BuildSessionData = {
        ...session,
        followupSuggestions: null,
        suggestionsLoading: false,
        lastAccessed: new Date(),
      };
      const newSessions = new Map(state.sessions);
      newSessions.set(sessionId, updatedSession);
      return { sessions: newSessions };
    });
  },
}));

// =============================================================================
// Selector Hooks (mirrors chat's pattern)
// =============================================================================

// Stable empty references for SSR hydration (prevents infinite loop)
const EMPTY_ARRAY: never[] = [];
const EMPTY_FILE_PREVIEW_TABS: FilePreviewTab[] = [];
const EMPTY_FILES_TAB_STATE: FilesTabState = {
  expandedPaths: [],
  scrollTop: 0,
  directoryCache: {},
};
const EMPTY_TAB_HISTORY: TabNavigationHistory = {
  entries: [],
  currentIndex: 0,
};

export const useCurrentSession = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    return currentSessionId ? sessions.get(currentSessionId) : null;
  });

/**
 * Returns the current session data with stable reference.
 * Returns null when no session exists.
 */
export const useSession = (): BuildSessionData | null =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return null;
    return sessions.get(currentSessionId) ?? null;
  });

export const useSessionId = () =>
  useBuildSessionStore((state) => state.currentSessionId);

export const useHasSession = () =>
  useBuildSessionStore((state) => state.currentSessionId !== null);

export const useIsRunning = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return false;
    const session = sessions.get(currentSessionId);
    return session?.status === "running" || session?.status === "creating";
  });

export const useMessages = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return EMPTY_ARRAY;
    return sessions.get(currentSessionId)?.messages ?? EMPTY_ARRAY;
  });

export const useArtifacts = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return EMPTY_ARRAY;
    return sessions.get(currentSessionId)?.artifacts ?? EMPTY_ARRAY;
  });

export const useToolCalls = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return EMPTY_ARRAY;
    return sessions.get(currentSessionId)?.toolCalls ?? EMPTY_ARRAY;
  });

export const useSessionHistory = () =>
  useBuildSessionStore((state) => state.sessionHistory);

/**
 * Returns the output panel open state for the current session.
 * Falls back to temporary state when no session exists (welcome page).
 * This temporary state resets to false when a session is created or cleared.
 */
export const useOutputPanelOpen = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions, noSessionOutputPanelOpen } = state;
    if (!currentSessionId) return noSessionOutputPanelOpen;
    return sessions.get(currentSessionId)?.outputPanelOpen ?? false;
  });

export const useToggleOutputPanel = () =>
  useBuildSessionStore((state) => state.toggleCurrentOutputPanel);

// Pre-provisioning selectors
export const useIsPreProvisioning = () =>
  useBuildSessionStore(
    (state) => state.preProvisioning.status === "provisioning"
  );

export const useIsPreProvisioningReady = () =>
  useBuildSessionStore((state) => state.preProvisioning.status === "ready");

export const useIsPreProvisioningFailed = () =>
  useBuildSessionStore((state) => state.preProvisioning.status === "failed");

export const usePreProvisionedSessionId = () =>
  useBuildSessionStore((state) =>
    state.preProvisioning.status === "ready"
      ? state.preProvisioning.sessionId
      : null
  );

// Demo data selector - reads directly from cookie (single source of truth)
// Note: This returns the current cookie value but doesn't trigger re-renders on change.
// Components that need reactive updates should manage their own local state.
export const useDemoDataEnabled = () => getDemoDataEnabled();

// Controller state selectors (for useBuildSessionController)
export const useControllerState = () =>
  useBuildSessionStore((state) => state.controllerState);

export const useSetControllerTriggered = () =>
  useBuildSessionStore((state) => state.setControllerTriggered);

export const useSetControllerLoaded = () =>
  useBuildSessionStore((state) => state.setControllerLoaded);

export const useResetControllerState = () =>
  useBuildSessionStore((state) => state.resetControllerState);

// Stream items selector
export const useStreamItems = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return EMPTY_ARRAY;
    return sessions.get(currentSessionId)?.streamItems ?? EMPTY_ARRAY;
  });

// Webapp refresh selector
export const useWebappNeedsRefresh = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return 0;
    return sessions.get(currentSessionId)?.webappNeedsRefresh ?? 0;
  });

// Files refresh selector
export const useFilesNeedsRefresh = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return 0;
    return sessions.get(currentSessionId)?.filesNeedsRefresh ?? 0;
  });

// File preview selectors
export const useFilePreviewTabs = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return EMPTY_FILE_PREVIEW_TABS;
    return (
      sessions.get(currentSessionId)?.filePreviewTabs ?? EMPTY_FILE_PREVIEW_TABS
    );
  });

export const useActiveOutputTab = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions, noSessionActiveOutputTab } = state;
    if (!currentSessionId) return noSessionActiveOutputTab;
    return sessions.get(currentSessionId)?.activeOutputTab ?? "preview";
  });

export const useActiveFilePreviewPath = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return null;
    return sessions.get(currentSessionId)?.activeFilePreviewPath ?? null;
  });

export const useFilesTabState = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return EMPTY_FILES_TAB_STATE;
    return (
      sessions.get(currentSessionId)?.filesTabState ?? EMPTY_FILES_TAB_STATE
    );
  });

export const useTabHistory = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return EMPTY_TAB_HISTORY;
    return sessions.get(currentSessionId)?.tabHistory ?? EMPTY_TAB_HISTORY;
  });

// Follow-up suggestion selectors
export const useFollowupSuggestions = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return null;
    return sessions.get(currentSessionId)?.followupSuggestions ?? null;
  });

export const useSuggestionsLoading = () =>
  useBuildSessionStore((state) => {
    const { currentSessionId, sessions } = state;
    if (!currentSessionId) return false;
    return sessions.get(currentSessionId)?.suggestionsLoading ?? false;
  });


================================================
FILE: web/src/app/craft/hooks/useBuildStreaming.ts
================================================
"use client";

import { useCallback, useMemo } from "react";

import {
  Artifact,
  ArtifactType,
  SessionErrorCode,
} from "@/app/craft/types/streamingTypes";

import {
  sendMessageStream,
  processSSEStream,
  fetchSession,
  generateFollowupSuggestions,
  RateLimitError,
} from "@/app/craft/services/apiServices";

import { useBuildSessionStore } from "@/app/craft/hooks/useBuildSessionStore";
import { StreamItem } from "@/app/craft/types/displayTypes";

import { genId } from "@/app/craft/utils/streamItemHelpers";
import { parsePacket } from "@/app/craft/utils/parsePacket";

/**
 * Hook for handling message streaming in build sessions.
 *
 * Uses a simple FIFO approach:
 * - Stream items are appended in chronological order as packets arrive
 * - Text/thinking chunks are merged when consecutive
 * - Tool calls are interleaved with text in the exact order they arrive
 */
export function useBuildStreaming() {
  const appendMessageToSession = useBuildSessionStore(
    (state) => state.appendMessageToSession
  );
  const addArtifactToSession = useBuildSessionStore(
    (state) => state.addArtifactToSession
  );
  const setAbortController = useBuildSessionStore(
    (state) => state.setAbortController
  );
  const abortCurrentSession = useBuildSessionStore(
    (state) => state.abortCurrentSession
  );
  const updateSessionData = useBuildSessionStore(
    (state) => state.updateSessionData
  );

  // Stream item actions
  const appendStreamItem = useBuildSessionStore(
    (state) => state.appendStreamItem
  );
  const updateLastStreamingText = useBuildSessionStore(
    (state) => state.updateLastStreamingText
  );
  const updateLastStreamingThinking = useBuildSessionStore(
    (state) => state.updateLastStreamingThinking
  );
  const updateToolCallStreamItem = useBuildSessionStore(
    (state) => state.updateToolCallStreamItem
  );
  const upsertTodoListStreamItem = useBuildSessionStore(
    (state) => state.upsertTodoListStreamItem
  );
  const clearStreamItems = useBuildSessionStore(
    (state) => state.clearStreamItems
  );
  const triggerWebappRefresh = useBuildSessionStore(
    (state) => state.triggerWebappRefresh
  );
  const triggerFilesRefresh = useBuildSessionStore(
    (state) => state.triggerFilesRefresh
  );
  const openMarkdownPreview = useBuildSessionStore(
    (state) => state.openMarkdownPreview
  );
  const setFollowupSuggestions = useBuildSessionStore(
    (state) => state.setFollowupSuggestions
  );
  const setSuggestionsLoading = useBuildSessionStore(
    (state) => state.setSuggestionsLoading
  );

  // ── Output file detector registry ──────────────────────────────────────
  // Ordered by priority — first match wins.
  // To add a new output type, add an entry here + a store action.
  const OUTPUT_FILE_DETECTORS = useMemo(
    () => [
      {
        match: (fp: string, k: string) =>
          (k === "edit" || k === "write") &&
          (fp.includes("/web/") || fp.startsWith("web/")),
        onDetect: (sid: string) => triggerWebappRefresh(sid),
      },
      {
        match: (fp: string, k: string) =>
          (k === "edit" || k === "write") &&
          fp.endsWith(".md") &&
          (fp.includes("/outputs/") || fp.startsWith("outputs/")),
        onDetect: (sid: string, fp: string) => {
          openMarkdownPreview(sid, fp);
          triggerFilesRefresh(sid);
        },
      },
      {
        match: (fp: string, k: string) =>
          (k === "edit" || k === "write") &&
          (fp.includes("/outputs/") || fp.startsWith("outputs/")),
        onDetect: (sid: string) => triggerFilesRefresh(sid),
      },
    ],
    [triggerWebappRefresh, triggerFilesRefresh, openMarkdownPreview]
  );

  /**
   * Stream a message to the given session and process the SSE response.
   * Populates streamItems in FIFO order as packets arrive.
   */
  const streamMessage = useCallback(
    async (sessionId: string, content: string): Promise<void> => {
      const currentState = useBuildSessionStore.getState();
      const existingSession = currentState.sessions.get(sessionId);

      if (existingSession?.abortController) {
        existingSession.abortController.abort();
      }

      const controller = new AbortController();
      setAbortController(sessionId, controller);

      // Set status to running and clear previous stream items
      updateSessionData(sessionId, { status: "running" });
      clearStreamItems(sessionId);

      // Track accumulated content for streaming text/thinking
      let accumulatedText = "";
      let accumulatedThinking = "";
      let lastItemType: "text" | "thinking" | "tool" | null = null;

      // Helper to finalize any streaming item before switching types
      const finalizeStreaming = () => {
        const session = useBuildSessionStore.getState().sessions.get(sessionId);
        if (!session) return;

        const items = session.streamItems;
        const lastItem = items[items.length - 1];
        if (lastItem) {
          if (lastItem.type === "text" && lastItem.isStreaming) {
            useBuildSessionStore
              .getState()
              .updateStreamItem(sessionId, lastItem.id, { isStreaming: false });
          } else if (lastItem.type === "thinking" && lastItem.isStreaming) {
            useBuildSessionStore
              .getState()
              .updateStreamItem(sessionId, lastItem.id, { isStreaming: false });
          }
        }
      };

      try {
        const response = await sendMessageStream(
          sessionId,
          content,
          controller.signal
        );

        await processSSEStream(response, (rawPacket) => {
          const parsed = parsePacket(rawPacket);

          switch (parsed.type) {
            // Agent message content - accumulate and update/create text item
            case "text_chunk": {
              if (!parsed.text) break;

              accumulatedText += parsed.text;

              if (lastItemType === "text") {
                updateLastStreamingText(sessionId, accumulatedText);
              } else {
                finalizeStreaming();
                accumulatedText = parsed.text;
                const item: StreamItem = {
                  type: "text",
                  id: genId("text"),
                  content: parsed.text,
                  isStreaming: true,
                };
                appendStreamItem(sessionId, item);
                lastItemType = "text";
              }
              break;
            }

            // Agent thinking - accumulate and update/create thinking item
            case "thinking_chunk": {
              if (!parsed.text) break;

              accumulatedThinking += parsed.text;

              if (lastItemType === "thinking") {
                updateLastStreamingThinking(sessionId, accumulatedThinking);
              } else {
                finalizeStreaming();
                accumulatedThinking = parsed.text;
                const item: StreamItem = {
                  type: "thinking",
                  id: genId("thinking"),
                  content: parsed.text,
                  isStreaming: true,
                };
                appendStreamItem(sessionId, item);
                lastItemType = "thinking";
              }
              break;
            }

            // Tool call started
            case "tool_call_start": {
              finalizeStreaming();
              accumulatedText = "";
              accumulatedThinking = "";

              // Skip tool_call_start for TodoWrite — pill created on first progress
              if (parsed.isTodo) {
                lastItemType = "tool";
                break;
              }

              appendStreamItem(sessionId, {
                type: "tool_call",
                id: parsed.toolCallId,
                toolCall: {
                  id: parsed.toolCallId,
                  kind: parsed.kind,
                  title: "",
                  status: "pending",
                  description: "",
                  command: "",
                  rawOutput: "",
                  subagentType: undefined,
                  isNewFile: true,
                  oldContent: "",
                  newContent: "",
                },
              });
              lastItemType = "tool";
              break;
            }

            // Tool call progress
            case "tool_call_progress": {
              if (parsed.isTodo) {
                upsertTodoListStreamItem(sessionId, parsed.toolCallId, {
                  id: parsed.toolCallId,
                  todos: parsed.todos,
                  isOpen: true,
                });
                break;
              }

              updateToolCallStreamItem(sessionId, parsed.toolCallId, {
                status: parsed.status,
                title: parsed.title,
                description: parsed.description,
                command: parsed.command,
                rawOutput: parsed.rawOutput,
                subagentType: parsed.subagentType ?? undefined,
                ...(parsed.kind === "edit" && {
                  isNewFile: parsed.isNewFile,
                  oldContent: parsed.oldContent,
                  newContent: parsed.newContent,
                }),
              });

              // Run output file detectors (filePath is pre-sanitized)
              if (parsed.filePath && parsed.kind) {
                for (const detector of OUTPUT_FILE_DETECTORS) {
                  if (detector.match(parsed.filePath, parsed.kind)) {
                    detector.onDetect(sessionId, parsed.filePath);
                    break;
                  }
                }
              }

              // Task completion → emit text StreamItem
              if (parsed.taskOutput) {
                appendStreamItem(sessionId, {
                  type: "text",
                  id: genId("task-output"),
                  content: parsed.taskOutput,
                  isStreaming: false,
                });
                lastItemType = "text";
                accumulatedText = "";
              }
              break;
            }

            // Artifacts
            case "artifact_created": {
              const newArtifact: Artifact = {
                id: parsed.artifact.id,
                session_id: sessionId,
                type: parsed.artifact.type as ArtifactType,
                name: parsed.artifact.name,
                path: parsed.artifact.path,
                preview_url: parsed.artifact.preview_url || null,
                created_at: new Date(),
                updated_at: new Date(),
              };
              addArtifactToSession(sessionId, newArtifact);

              // If webapp, fetch session to get sandbox port
              const isWebapp =
                newArtifact.type === "nextjs_app" ||
                newArtifact.type === "web_app";
              if (isWebapp) {
                fetchSession(sessionId)
                  .then((sessionData) => {
                    if (sessionData.sandbox?.nextjs_port) {
                      const webappUrl = `http://localhost:${sessionData.sandbox.nextjs_port}`;
                      updateSessionData(sessionId, { webappUrl });
                    }
                  })
                  .catch((err) =>
                    console.error(
                      "Failed to fetch session for webapp URL:",
                      err
                    )
                  );
              }
              break;
            }

            // Agent finished
            case "prompt_response": {
              finalizeStreaming();

              const session = useBuildSessionStore
                .getState()
                .sessions.get(sessionId);

              if (session && session.streamItems.length > 0) {
                const textContent = session.streamItems
                  .filter((item) => item.type === "text")
                  .map((item) => item.content)
                  .join("");

                const isFirstAgentMessage =
                  session.messages.filter((m) => m.type === "assistant")
                    .length === 0;

                const firstUserMessage = session.messages.find(
                  (m) => m.type === "user"
                );

                if (isFirstAgentMessage && firstUserMessage && textContent) {
                  (async () => {
                    try {
                      setSuggestionsLoading(sessionId, true);
                      const suggestions = await generateFollowupSuggestions(
                        sessionId,
                        firstUserMessage.content,
                        textContent
                      );
                      setFollowupSuggestions(sessionId, suggestions);
                    } catch (err) {
                      console.error("Failed to generate suggestions:", err);
                      setFollowupSuggestions(sessionId, null);
                    }
                  })();
                }

                appendMessageToSession(sessionId, {
                  id: genId("agent-msg"),
                  type: "assistant",
                  content: textContent,
                  timestamp: new Date(),
                  message_metadata: {
                    streamItems: session.streamItems.map((item) => ({
                      ...item,
                      ...(item.type === "text" || item.type === "thinking"
                        ? { isStreaming: false }
                        : {}),
                    })),
                  },
                });
              }

              updateSessionData(sessionId, {
                status: "active",
                streamItems: [],
              });
              break;
            }

            // Error
            case "error": {
              updateSessionData(sessionId, {
                status: "failed",
                error: parsed.message,
              });
              break;
            }

            default:
              break;
          }
        });
      } catch (err) {
        if ((err as Error).name === "AbortError") {
          // User cancelled - no error handling needed
        } else if (err instanceof RateLimitError) {
          console.warn("[Streaming] Rate limit exceeded");
          updateSessionData(sessionId, {
            status: "active",
            error: SessionErrorCode.RATE_LIMIT_EXCEEDED,
          });
        } else {
          console.error("[Streaming] Stream error:", err);
          updateSessionData(sessionId, {
            status: "failed",
            error: (err as Error).message,
          });
        }
      } finally {
        setAbortController(sessionId, new AbortController());
      }
    },
    [
      setAbortController,
      updateSessionData,
      appendStreamItem,
      updateLastStreamingText,
      updateLastStreamingThinking,
      updateToolCallStreamItem,
      upsertTodoListStreamItem,
      clearStreamItems,
      addArtifactToSession,
      appendMessageToSession,
      OUTPUT_FILE_DETECTORS,
      setFollowupSuggestions,
      setSuggestionsLoading,
    ]
  );

  return useMemo(
    () => ({
      streamMessage,
      abortStream: abortCurrentSession,
    }),
    [streamMessage, abortCurrentSession]
  );
}


================================================
FILE: web/src/app/craft/hooks/usePreProvisionPolling.ts
================================================
"use client";

import { useEffect, useRef } from "react";
import { useBuildSessionStore } from "./useBuildSessionStore";
import { checkPreProvisionedSession } from "../services/apiServices";

/** Polling interval in milliseconds (5 seconds) */
const POLLING_INTERVAL_MS = 5000;

interface UsePreProvisionPollingOptions {
  /** Only poll when enabled (should be true only on welcome page) */
  enabled: boolean;
}

/**
 * Hook that polls to verify the pre-provisioned session is still valid.
 *
 * When multiple browser tabs have the same pre-provisioned session,
 * one tab may claim it by sending a message. This hook detects when
 * that happens and triggers re-provisioning so the current tab gets
 * a fresh session.
 *
 * Usage: Call this hook on the welcome page where pre-provisioned
 * sessions are used. Pass enabled=true only on the welcome page.
 */
export function usePreProvisionPolling({
  enabled,
}: UsePreProvisionPollingOptions) {
  const preProvisioning = useBuildSessionStore(
    (state) => state.preProvisioning
  );
  const ensurePreProvisionedSession = useBuildSessionStore(
    (state) => state.ensurePreProvisionedSession
  );

  // Extract sessionId only when status is "ready" (handles discriminated union)
  const sessionId =
    preProvisioning.status === "ready" ? preProvisioning.sessionId : null;

  // Use ref to track if we're currently checking (prevents overlapping requests)
  const isCheckingRef = useRef(false);

  useEffect(() => {
    // Only poll when enabled (welcome page) and we have a ready session
    if (!enabled || !sessionId) {
      return;
    }

    const checkValidity = async () => {
      if (isCheckingRef.current) return;
      isCheckingRef.current = true;

      try {
        const result = await checkPreProvisionedSession(sessionId);

        if (!result.valid) {
          console.log(
            `[PreProvisionPolling] Session ${sessionId.slice(
              0,
              8
            )} was used, re-provisioning...`
          );
          // Session was used by another tab - reset state and re-provision.
          // Zustand setState is synchronous, so ensurePreProvisionedSession
          // will immediately see the idle status (no setTimeout needed).
          useBuildSessionStore.setState({
            preProvisioning: { status: "idle" },
          });
          ensurePreProvisionedSession();
        }
      } catch (err) {
        console.error("[PreProvisionPolling] Failed to check session:", err);
        // On error, don't re-provision - might be a network issue
      } finally {
        isCheckingRef.current = false;
      }
    };

    // Start polling
    const intervalId = setInterval(checkValidity, POLLING_INTERVAL_MS);

    // Also check immediately on mount (in case session was used while tab was inactive)
    checkValidity();

    return () => {
      clearInterval(intervalId);
    };
  }, [enabled, sessionId, ensurePreProvisionedSession]);
}


================================================
FILE: web/src/app/craft/hooks/useUsageLimits.ts
================================================
"use client";

import useSWR from "swr";

import { UsageLimits, LimitType } from "@/app/craft/types/streamingTypes";

import {
  USAGE_LIMITS_ENDPOINT,
  fetchUsageLimits,
} from "@/app/craft/services/apiServices";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";

// Re-export types for consumers
export type { UsageLimits, LimitType };

// =============================================================================
// Hook Return Type
// =============================================================================

export interface UseUsageLimitsReturn {
  // Limits state
  limits: UsageLimits | null;
  isLoading: boolean;
  error: Error | null;
  /** Whether limits are enabled (cloud mode) */
  isEnabled: boolean;

  // Actions
  refreshLimits: () => void;
}

// =============================================================================
// Hook Implementation
// =============================================================================

/**
 * useUsageLimits - Hook for managing build mode usage limits
 *
 * Rate limits from API:
 * - Free/unpaid users: 5 messages total (limitType: "total")
 * - Paid users: 25 messages per week by default (limitType: "weekly")
 *   (configurable via CRAFT_PAID_USER_RATE_LIMIT env var)
 *
 * Only fetches when NEXT_PUBLIC_CLOUD_ENABLED is true.
 * Automatically fetches limits on mount and provides refresh capability.
 */
export function useUsageLimits(): UseUsageLimitsReturn {
  const isEnabled = NEXT_PUBLIC_CLOUD_ENABLED;

  const { data, error, isLoading, mutate } = useSWR<UsageLimits>(
    // Only fetch if cloud is enabled
    isEnabled ? USAGE_LIMITS_ENDPOINT : null,
    fetchUsageLimits,
    {
      // Revalidate on focus (when user returns to tab)
      revalidateOnFocus: true,
      // Revalidate on reconnect
      revalidateOnReconnect: true,
      // No caching - usage changes with every message sent
      // Callers should call refreshLimits() after sending messages
      dedupingInterval: 0,
    }
  );

  return {
    limits: data ?? null,
    isLoading,
    error: error ?? null,
    isEnabled,
    refreshLimits: () => mutate(),
  };
}


================================================
FILE: web/src/app/craft/layout.tsx
================================================
import { redirect } from "next/navigation";
import type { Route } from "next";
import { unstable_noStore as noStore } from "next/cache";
import { requireAuth } from "@/lib/auth/requireAuth";
import { fetchSettingsSS } from "@/components/settings/lib";

export interface LayoutProps {
  children: React.ReactNode;
}

/**
 * Build Layout - Minimal wrapper that handles authentication and feature flag check
 *
 * Child routes (/craft and /craft/v1) handle their own UI structure.
 * Redirects to /app if Onyx Craft is disabled via feature flag.
 */
export default async function Layout({ children }: LayoutProps) {
  noStore();

  // Only check authentication - data fetching is done client-side
  const authResult = await requireAuth();

  if (authResult.redirect) {
    redirect(authResult.redirect as Route);
  }

  // Check if Onyx Craft is enabled via feature flag
  // Only explicit true enables the feature; false or undefined = disabled
  const settings = await fetchSettingsSS();
  if (settings?.settings?.onyx_craft_enabled !== true) {
    redirect("/app" as Route);
  }

  return <>{children}</>;
}


================================================
FILE: web/src/app/craft/onboarding/BuildOnboardingProvider.tsx
================================================
"use client";

import { createContext, useContext } from "react";
import { useRouter } from "next/navigation";
import { useOnboardingModal } from "@/app/craft/onboarding/hooks/useOnboardingModal";
import BuildOnboardingModal from "@/app/craft/onboarding/components/BuildOnboardingModal";
import NoLlmProvidersModal from "@/app/craft/onboarding/components/NoLlmProvidersModal";
import { OnboardingModalController } from "@/app/craft/onboarding/types";
import { useUser } from "@/providers/UserProvider";

// Context for accessing onboarding modal controls
const OnboardingContext = createContext<OnboardingModalController | null>(null);

export function useOnboarding(): OnboardingModalController {
  const ctx = useContext(OnboardingContext);
  if (!ctx) {
    throw new Error(
      "useOnboarding must be used within BuildOnboardingProvider"
    );
  }
  return ctx;
}

interface BuildOnboardingProviderProps {
  children: React.ReactNode;
}

export function BuildOnboardingProvider({
  children,
}: BuildOnboardingProviderProps) {
  const router = useRouter();
  const { user } = useUser();
  const controller = useOnboardingModal();

  // Show loading state while user data is loading
  if (!user) {
    return (
      <div className="flex items-center justify-center w-full h-full">
        <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-text-01" />
      </div>
    );
  }

  // Non-admin users with no LLM providers cannot use Craft
  // Don't show modal while loading to prevent flash
  const showNoProvidersModal =
    !controller.isLoading && !controller.isAdmin && !controller.hasAnyProvider;

  return (
    <OnboardingContext.Provider value={controller}>
      {/* Block non-admin users when no LLM providers are configured */}
      <NoLlmProvidersModal
        open={showNoProvidersModal}
        onClose={() => router.push("/app")}
      />

      {/* Unified onboarding modal - only show if not blocked by no providers */}
      {!showNoProvidersModal && (
        <BuildOnboardingModal
          mode={controller.mode}
          llmProviders={controller.llmProviders}
          initialValues={controller.initialValues}
          isAdmin={controller.isAdmin}
          hasUserInfo={controller.hasUserInfo}
          allProvidersConfigured={controller.allProvidersConfigured}
          hasAnyProvider={controller.hasAnyProvider}
          onComplete={controller.completeUserInfo}
          onLlmComplete={controller.completeLlmSetup}
          onClose={controller.close}
        />
      )}

      {/* Build content - always rendered, modals overlay it */}
      {children}
    </OnboardingContext.Provider>
  );
}


================================================
FILE: web/src/app/craft/onboarding/components/BuildOnboardingModal.tsx
================================================
"use client";

import { useState, useEffect, useMemo } from "react";
import {
  track,
  AnalyticsEvent,
  LLMProviderConfiguredSource,
} from "@/lib/analytics";
import { SvgArrowRight, SvgArrowLeft, SvgX } from "@opal/icons";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import {
  BuildUserInfo,
  OnboardingModalMode,
  OnboardingStep,
} from "@/app/craft/onboarding/types";
import {
  WorkArea,
  Level,
  WORK_AREAS_REQUIRING_LEVEL,
  setBuildLlmSelection,
  getBuildLlmSelection,
  getDefaultLlmSelection,
} from "@/app/craft/onboarding/constants";
import { LLMProviderDescriptor } from "@/interfaces/llm";
import { LLM_PROVIDERS_ADMIN_URL } from "@/lib/llmConfig/constants";
import { buildOnboardingInitialValues as buildInitialValues } from "@/sections/modals/llmConfig/utils";
import { testApiKeyHelper } from "@/sections/modals/llmConfig/svc";
import OnboardingInfoPages from "@/app/craft/onboarding/components/OnboardingInfoPages";
import OnboardingUserInfo from "@/app/craft/onboarding/components/OnboardingUserInfo";
import OnboardingLlmSetup, {
  PROVIDERS,
  type ProviderKey,
} from "@/app/craft/onboarding/components/OnboardingLlmSetup";

/**
 * Auto-select the best available LLM based on priority order.
 * Used when user completes onboarding without going through LLM setup step.
 */
function autoSelectBestLlm(
  llmProviders: LLMProviderDescriptor[] | undefined
): void {
  // Don't override if user already has a selection
  if (getBuildLlmSelection()) return;

  const selection = getDefaultLlmSelection(llmProviders);
  if (selection) {
    setBuildLlmSelection(selection);
  }
}

interface InitialValues {
  firstName: string;
  lastName: string;
  workArea: WorkArea | undefined;
  level: Level | undefined;
}

interface BuildOnboardingModalProps {
  mode: OnboardingModalMode;
  llmProviders?: LLMProviderDescriptor[];
  initialValues: InitialValues;
  isAdmin: boolean;
  hasUserInfo: boolean;
  allProvidersConfigured: boolean;
  hasAnyProvider: boolean;
  onComplete: (info: BuildUserInfo) => Promise<void>;
  onLlmComplete: () => Promise<void>;
  onClose: () => void;
}

// Helper to compute steps for mode
function getStepsForMode(
  mode: OnboardingModalMode,
  isAdmin: boolean,
  allProvidersConfigured: boolean,
  hasUserInfo: boolean
): OnboardingStep[] {
  switch (mode.type) {
    case "initial-onboarding":
      // Full flow: page1 → llm-setup (if admin + not all configured) → user-info
      const steps: OnboardingStep[] = ["page1"];

      if (isAdmin && !allProvidersConfigured) {
        steps.push("llm-setup");
      }

      if (!hasUserInfo) {
        steps.push("user-info");
      }

      return steps;

    case "edit-persona":
      return ["user-info"];

    case "add-llm":
      return ["llm-setup"];

    case "closed":
      return [];
  }
}

export default function BuildOnboardingModal({
  mode,
  llmProviders,
  initialValues,
  isAdmin,
  hasUserInfo,
  allProvidersConfigured,
  hasAnyProvider,
  onComplete,
  onLlmComplete,
  onClose,
}: BuildOnboardingModalProps) {
  // Compute steps based on mode
  const steps = useMemo(
    () => getStepsForMode(mode, isAdmin, allProvidersConfigured, hasUserInfo),
    [mode, isAdmin, allProvidersConfigured, hasUserInfo]
  );

  // Determine initial step based on mode
  const initialStep = useMemo((): OnboardingStep => {
    if (mode.type === "add-llm") return "llm-setup";
    return steps[0] || "user-info";
  }, [mode.type, steps]);

  // Navigation state
  const [currentStep, setCurrentStep] = useState<OnboardingStep>(initialStep);

  // Reset step when mode changes
  useEffect(() => {
    if (mode.type !== "closed") {
      setCurrentStep(initialStep);
    }
  }, [mode.type, initialStep]);

  // User info state - pre-fill from initialValues
  const [firstName, setFirstName] = useState(initialValues.firstName);
  const [lastName, setLastName] = useState(initialValues.lastName);
  const [workArea, setWorkArea] = useState(initialValues.workArea);
  const [level, setLevel] = useState(initialValues.level);

  // Update form values when initialValues changes
  useEffect(() => {
    setFirstName(initialValues.firstName);
    setLastName(initialValues.lastName);
    setWorkArea(initialValues.workArea);
    setLevel(initialValues.level);
  }, [initialValues]);

  // Determine initial provider for add-llm mode
  const initialProvider = mode.type === "add-llm" ? mode.provider : undefined;

  // LLM setup state
  const [selectedProvider, setSelectedProvider] = useState<ProviderKey>(
    (initialProvider as ProviderKey) || "anthropic"
  );
  const [selectedModel, setSelectedModel] = useState<string>(
    PROVIDERS.find((p) => p.key === (initialProvider || "anthropic"))?.models[0]
      ?.name || ""
  );
  const [apiKey, setApiKey] = useState("");
  const [connectionStatus, setConnectionStatus] = useState<
    "idle" | "testing" | "success" | "error"
  >("idle");
  const [errorMessage, setErrorMessage] = useState("");

  // Reset LLM state when mode changes to add-llm with a specific provider
  useEffect(() => {
    if (mode.type === "add-llm" && mode.provider) {
      const providerConfig = PROVIDERS.find(
        (p) => p.key === (mode.provider as ProviderKey)
      );
      if (providerConfig) {
        setSelectedProvider(providerConfig.key);
        setSelectedModel(providerConfig.models[0]?.name || "");
        setApiKey("");
        setConnectionStatus("idle");
        setErrorMessage("");
      }
    }
  }, [mode]);

  // Submission state
  const [isSubmitting, setIsSubmitting] = useState(false);

  const requiresLevel =
    workArea !== undefined && WORK_AREAS_REQUIRING_LEVEL.includes(workArea);
  const isUserInfoValid = workArea && (!requiresLevel || level);

  const currentProviderConfig = PROVIDERS.find(
    (p) => p.key === selectedProvider
  )!;
  const isLlmValid = apiKey.trim() && selectedModel;

  // Calculate step navigation
  const currentStepIndex = steps.indexOf(currentStep);
  const totalSteps = steps.length;

  const handleNext = () => {
    setErrorMessage("");
    const nextIndex = currentStepIndex + 1;
    if (nextIndex < steps.length) {
      setCurrentStep(steps[nextIndex]!);
    }
  };

  const handleBack = () => {
    setErrorMessage("");
    const prevIndex = currentStepIndex - 1;
    if (prevIndex >= 0) {
      setCurrentStep(steps[prevIndex]!);
    }
  };

  const handleConnect = async () => {
    if (!apiKey.trim()) return;

    setConnectionStatus("testing");
    setErrorMessage("");

    const baseValues = buildInitialValues();
    const providerName = `build-mode-${currentProviderConfig.providerName}`;
    const payload = {
      ...baseValues,
      name: providerName,
      provider: currentProviderConfig.providerName,
      api_key: apiKey,
      default_model_name: selectedModel,
      model_configurations: currentProviderConfig.models.map((m) => ({
        name: m.name,
        is_visible: true,
        max_input_tokens: null,
        supports_image_input: true,
      })),
    };

    const testResult = await testApiKeyHelper(
      currentProviderConfig.providerName,
      payload
    );

    if (!testResult.ok) {
      setErrorMessage(
        "There was an issue with this provider and model, please try a different one."
      );
      setConnectionStatus("error");
      return;
    }

    try {
      const response = await fetch(
        `${LLM_PROVIDERS_ADMIN_URL}?is_creation=true`,
        {
          method: "PUT",
          headers: { "Content-Type": "application/json" },
          body: JSON.stringify(payload),
        }
      );

      if (!response.ok) {
        setErrorMessage(
          "There was an issue creating the provider. Please try again."
        );
        setConnectionStatus("error");
        return;
      }

      if (!llmProviders || llmProviders.length === 0) {
        const newProvider = await response.json();
        if (newProvider?.id) {
          await fetch(`${LLM_PROVIDERS_ADMIN_URL}/${newProvider.id}/default`, {
            method: "POST",
          });
        }
      }

      setBuildLlmSelection({
        providerName: providerName,
        provider: currentProviderConfig.providerName,
        modelName: selectedModel,
      });

      track(AnalyticsEvent.CONFIGURED_LLM_PROVIDER, {
        provider: currentProviderConfig.providerName,
        is_creation: true,
        source: LLMProviderConfiguredSource.CRAFT_ONBOARDING,
      });

      setConnectionStatus("success");
    } catch (error) {
      console.error("Error connecting LLM provider:", error);
      setErrorMessage(
        "There was an issue connecting the provider. Please try again."
      );
      setConnectionStatus("error");
    }
  };

  const handleSubmit = async () => {
    // For add-llm mode, just close after successful connection
    if (mode.type === "add-llm") {
      if (connectionStatus === "success") {
        await onLlmComplete();
        onClose();
      }
      return;
    }

    if (!isUserInfoValid) return;
    // If LLM setup was part of the flow and user has no providers (can't skip), require completion
    if (
      steps.includes("llm-setup") &&
      !hasAnyProvider &&
      connectionStatus !== "success"
    )
      return;

    setIsSubmitting(true);

    try {
      // Refresh LLM providers if LLM was set up
      if (steps.includes("llm-setup") && connectionStatus === "success") {
        await onLlmComplete();
      }

      // Auto-select best available LLM if user didn't go through LLM setup
      // (e.g., non-admin users or when all providers already configured)
      autoSelectBestLlm(llmProviders);

      // Validate workArea is provided before submission
      if (!workArea) {
        setErrorMessage("Please select a work area.");
        setIsSubmitting(false);
        return;
      }

      const requiresLevel = WORK_AREAS_REQUIRING_LEVEL.includes(workArea);

      // Validate level if required
      if (requiresLevel && !level) {
        setErrorMessage("Please select a level.");
        setIsSubmitting(false);
        return;
      }

      await onComplete({
        firstName: firstName.trim(),
        lastName: lastName.trim() || undefined,
        workArea,
        level: level || undefined,
      });

      track(AnalyticsEvent.COMPLETED_CRAFT_ONBOARDING);
      onClose();
    } catch (error) {
      console.error("Error completing onboarding:", error);
      setErrorMessage(
        "There was an issue completing onboarding. Please try again."
      );
    } finally {
      setIsSubmitting(false);
    }
  };

  if (mode.type === "closed") return null;

  const canProceedUserInfo = isUserInfoValid;
  const isConnecting = connectionStatus === "testing";
  const canTestConnection = isLlmValid && !isConnecting;
  const isLastStep = currentStepIndex === steps.length - 1;
  const isFirstStep = currentStepIndex === 0;

  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center">
      {/* Backdrop */}
      <div className="absolute inset-0 bg-black/50 backdrop-blur-sm" />

      {/* Modal */}
      <div className="relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01">
        {/* Close button for add-llm mode */}
        {mode.type === "add-llm" && (
          <button
            type="button"
            onClick={onClose}
            className="absolute top-4 right-4 z-10 p-1 rounded-08 text-text-03 hover:text-text-05 hover:bg-background-tint-02 transition-colors"
          >
            <SvgX className="w-5 h-5" />
          </button>
        )}
        <div className="p-6 flex flex-col gap-6 min-h-[600px]">
          {/* User Info Step */}
          {currentStep === "user-info" && (
            <OnboardingUserInfo
              firstName={firstName}
              lastName={lastName}
              workArea={workArea}
              level={level}
              onFirstNameChange={setFirstName}
              onLastNameChange={setLastName}
              onWorkAreaChange={setWorkArea}
              onLevelChange={setLevel}
            />
          )}

          {/* LLM Setup Step */}
          {currentStep === "llm-setup" && (
            <OnboardingLlmSetup
              selectedProvider={selectedProvider}
              selectedModel={selectedModel}
              apiKey={apiKey}
              connectionStatus={connectionStatus}
              errorMessage={errorMessage}
              llmProviders={llmProviders}
              onProviderChange={setSelectedProvider}
              onModelChange={setSelectedModel}
              onApiKeyChange={setApiKey}
              onConnectionStatusChange={setConnectionStatus}
              onErrorMessageChange={setErrorMessage}
            />
          )}

          {/* Page 1 - What is Onyx Craft? */}
          {currentStep === "page1" && (
            <OnboardingInfoPages
              step="page1"
              workArea={workArea}
              level={level}
            />
          )}

          {/* Navigation buttons */}
          <div className="relative flex justify-between items-center pt-2">
            {/* Back button */}
            <div>
              {!isFirstStep && (
                <button
                  type="button"
                  onClick={handleBack}
                  className="flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors"
                >
                  <SvgArrowLeft className="w-4 h-4" />
                  <Text mainUiAction>Back</Text>
                </button>
              )}
            </div>

            {/* Step indicator */}
            {totalSteps > 1 && (
              <div className="absolute left-1/2 -translate-x-1/2 flex items-center justify-center gap-2">
                {Array.from({ length: totalSteps }).map((_, i) => (
                  <div
                    key={i}
                    className={cn(
                      "w-2 h-2 rounded-full transition-colors",
                      i === currentStepIndex
                        ? "bg-text-05"
                        : i < currentStepIndex
                          ? "bg-text-03"
                          : "bg-border-01"
                    )}
                  />
                ))}
              </div>
            )}

            {/* Action buttons */}
            {currentStep === "user-info" && (
              <button
                type="button"
                onClick={() => {
                  track(AnalyticsEvent.COMPLETED_CRAFT_USER_INFO, {
                    first_name: firstName.trim(),
                    last_name: lastName.trim() || undefined,
                    work_area: workArea,
                    level: level,
                  });
                  if (isLastStep) {
                    handleSubmit();
                  } else {
                    handleNext();
                  }
                }}
                disabled={!canProceedUserInfo || isSubmitting}
                className={cn(
                  "flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors",
                  canProceedUserInfo && !isSubmitting
                    ? "bg-black dark:bg-white text-white dark:text-black hover:opacity-90"
                    : "bg-background-neutral-01 text-text-02 cursor-not-allowed"
                )}
              >
                <Text
                  mainUiAction
                  className={cn(
                    canProceedUserInfo && !isSubmitting
                      ? "text-white dark:text-black"
                      : "text-text-02"
                  )}
                >
                  {isLastStep
                    ? isSubmitting
                      ? "Saving..."
                      : "Get Started!"
                    : "Continue"}
                </Text>
                {!isLastStep && (
                  <SvgArrowRight
                    className={cn(
                      "w-4 h-4",
                      canProceedUserInfo && !isSubmitting
                        ? "text-white dark:text-black"
                        : "text-text-02"
                    )}
                  />
                )}
              </button>
            )}

            {currentStep === "page1" && (
              <button
                type="button"
                onClick={handleNext}
                className="flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors bg-black dark:bg-white text-white dark:text-black hover:opacity-90"
              >
                <Text mainUiAction className="text-white dark:text-black">
                  Continue
                </Text>
                <SvgArrowRight className="w-4 h-4 text-white dark:text-black" />
              </button>
            )}

            {currentStep === "llm-setup" && connectionStatus !== "success" && (
              <div className="flex items-center gap-2">
                {/* Skip button - only shown if user has at least one provider */}
                {hasAnyProvider && !isLastStep && (
                  <button
                    type="button"
                    onClick={handleNext}
                    disabled={isConnecting}
                    className="flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors"
                  >
                    <Text mainUiAction>Skip</Text>
                    <SvgArrowRight className="w-4 h-4" />
                  </button>
                )}
                {/* Connect button */}
                <button
                  type="button"
                  onClick={handleConnect}
                  disabled={!canTestConnection || isConnecting}
                  className={cn(
                    "flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors",
                    canTestConnection && !isConnecting
                      ? "bg-black dark:bg-white text-white dark:text-black hover:opacity-90"
                      : "bg-background-neutral-01 text-text-02 cursor-not-allowed"
                  )}
                >
                  <Text
                    mainUiAction
                    className={cn(
                      canTestConnection && !isConnecting
                        ? "text-white dark:text-black"
                        : "text-text-02"
                    )}
                  >
                    {isConnecting ? "Connecting..." : "Connect"}
                  </Text>
                </button>
              </div>
            )}

            {currentStep === "llm-setup" && connectionStatus === "success" && (
              <button
                type="button"
                onClick={isLastStep ? handleSubmit : handleNext}
                className="flex items-center gap-1.5 px-4 py-2 rounded-12 bg-black dark:bg-white text-white dark:text-black hover:opacity-90 transition-colors"
              >
                <Text mainUiAction className="text-white dark:text-black">
                  {isLastStep ? "Done" : "Continue"}
                </Text>
                {!isLastStep && (
                  <SvgArrowRight className="w-4 h-4 text-white dark:text-black" />
                )}
              </button>
            )}
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/onboarding/components/NoLlmProvidersModal.tsx
================================================
"use client";

import { useState } from "react";
import { useRouter } from "next/navigation";
import Text from "@/refresh-components/texts/Text";
import { SvgLock, SvgArrowRight } from "@opal/icons";
import { logout } from "@/lib/user";
import { cn } from "@/lib/utils";

interface NoLlmProvidersModalProps {
  open: boolean;
  onClose: () => void;
}

/**
 * Modal shown to non-admin users when no LLM providers are configured.
 * Explains that an admin needs to configure providers before they can use Craft.
 */
export default function NoLlmProvidersModal({
  open,
  onClose,
}: NoLlmProvidersModalProps) {
  const router = useRouter();
  const [isLoading, setIsLoading] = useState(false);

  const handleCreateNewAccount = async () => {
    setIsLoading(true);
    try {
      await logout();
      router.push("/auth/signup");
    } finally {
      setIsLoading(false);
    }
  };

  if (!open) return null;

  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center">
      {/* Backdrop */}
      <div className="absolute inset-0 bg-black/50 backdrop-blur-sm" />

      {/* Modal */}
      <div className="relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01">
        <div className="p-6 flex flex-col gap-6 min-h-[400px]">
          {/* Content */}
          <div className="flex-1 flex flex-col items-center justify-center gap-6">
            {/* Icon */}
            <div className="w-16 h-16 rounded-full bg-background-tint-02 flex items-center justify-center">
              <SvgLock className="w-8 h-8 text-text-03" />
            </div>

            {/* Header */}
            <div className="flex flex-col items-center gap-2 text-center">
              <Text headingH2 text05>
                LLM Provider Required
              </Text>
              <Text mainUiBody text03 className="max-w-sm">
                Onyx Craft requires an LLM provider to be configured, but only
                admins can set this up.
                <br />
                <br />
                Please ask your admin to configure an LLM provider, or create a
                new Onyx account to become an admin yourself!
              </Text>
            </div>
          </div>

          {/* Footer buttons */}
          <div className="flex justify-center gap-3 pt-2">
            <button
              type="button"
              onClick={onClose}
              className="flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors"
            >
              <Text mainUiAction>Go Back</Text>
            </button>
            <button
              type="button"
              onClick={handleCreateNewAccount}
              disabled={isLoading}
              className={cn(
                "flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors",
                !isLoading
                  ? "bg-black dark:bg-white text-white dark:text-black hover:opacity-90"
                  : "bg-background-neutral-01 text-text-02 cursor-not-allowed"
              )}
            >
              <Text
                mainUiAction
                className={cn(
                  !isLoading ? "text-white dark:text-black" : "text-text-02"
                )}
              >
                {isLoading ? "Signing out..." : "Create a new account"}
              </Text>
              {!isLoading && (
                <SvgArrowRight className="w-4 h-4 text-white dark:text-black" />
              )}
            </button>
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/onboarding/components/NotAllowedModal.tsx
================================================
"use client";

import { useState } from "react";
import { useRouter } from "next/navigation";
import Text from "@/refresh-components/texts/Text";
import { SvgLock, SvgArrowRight } from "@opal/icons";
import { logout } from "@/lib/user";
import { cn } from "@/lib/utils";

interface NotAllowedModalProps {
  open: boolean;
  onClose: () => void;
}

export default function NotAllowedModal({
  open,
  onClose,
}: NotAllowedModalProps) {
  const router = useRouter();
  const [isLoading, setIsLoading] = useState(false);

  const handleCreateNewAccount = async () => {
    setIsLoading(true);
    try {
      await logout();
      router.push("/auth/signup");
    } finally {
      setIsLoading(false);
    }
  };

  if (!open) return null;

  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center">
      {/* Backdrop */}
      <div className="absolute inset-0 bg-black/50 backdrop-blur-sm" />

      {/* Modal */}
      <div className="relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01">
        <div className="p-6 flex flex-col gap-6 min-h-[400px]">
          {/* Content */}
          <div className="flex-1 flex flex-col items-center justify-center gap-6">
            {/* Icon */}
            <div className="w-16 h-16 rounded-full bg-background-tint-02 flex items-center justify-center">
              <SvgLock className="w-8 h-8 text-text-03" />
            </div>

            {/* Header */}
            <div className="flex flex-col items-center gap-2 text-center">
              <Text headingH2 text05>
                Custom Crafting Restricted
              </Text>
              <Text mainUiBody text03 className="max-w-sm">
                Unfortunately, connecting your own data to Craft requires admin
                permissions.
                <br />
                <br />
                Luckily, you can create a new Onyx account to become an admin
                and craft with your own data!
              </Text>
            </div>
          </div>

          {/* Footer buttons */}
          <div className="flex justify-center gap-3 pt-2">
            <button
              type="button"
              onClick={onClose}
              className="flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors"
            >
              <Text mainUiAction>Go Back</Text>
            </button>
            <button
              type="button"
              onClick={handleCreateNewAccount}
              disabled={isLoading}
              className={cn(
                "flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors",
                !isLoading
                  ? "bg-black dark:bg-white text-white dark:text-black hover:opacity-90"
                  : "bg-background-neutral-01 text-text-02 cursor-not-allowed"
              )}
            >
              <Text
                mainUiAction
                className={cn(
                  !isLoading ? "text-white dark:text-black" : "text-text-02"
                )}
              >
                {isLoading ? "Signing out..." : "Create a new account"}
              </Text>
              {!isLoading && (
                <SvgArrowRight className="w-4 h-4 text-white dark:text-black" />
              )}
            </button>
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/onboarding/components/OnboardingInfoPages.tsx
================================================
"use client";

import Text from "@/refresh-components/texts/Text";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import {
  WorkArea,
  Level,
  getPersonaInfo,
  getPositionText,
  DEMO_COMPANY_NAME,
} from "@/app/craft/onboarding/constants";
import {
  GoogleDriveIcon,
  GithubIcon,
  HubSpotIcon,
  LinearIcon,
  FirefliesIcon,
  GmailIcon,
  ColorSlackIcon,
} from "@/components/icons/icons";

interface OnboardingInfoPagesProps {
  step: "page1" | "page2";
  workArea: WorkArea | undefined;
  level: Level | undefined;
}

export default function OnboardingInfoPages({
  step,
  workArea,
  level,
}: OnboardingInfoPagesProps) {
  // Get persona info from mapping (only if both are valid enum values)
  const personaInfo =
    workArea && level ? getPersonaInfo(workArea, level) : undefined;

  // Helper function to determine article (a/an) based on first letter
  const getArticle = (word: string | undefined): string => {
    if (!word || word.length === 0) return "a";
    const firstLetter = word.toLowerCase()[0];
    if (!firstLetter) return "a";
    const vowels = ["a", "e", "i", "o", "u"];
    return vowels.includes(firstLetter) ? "an" : "a";
  };

  // Get position text using shared helper (only if workArea is valid enum)
  const positionText = workArea ? getPositionText(workArea, level) : "Not set";

  // Determine article based on position text
  const article = getArticle(positionText);

  if (step === "page1") {
    return (
      <div className="flex-1 flex flex-col gap-6 items-center justify-center">
        <Text headingH2 text05>
          What is Onyx Craft?
        </Text>
        <img
          src="/craft_demo_image_1.png"
          alt="Onyx Craft"
          className="max-w-full h-auto rounded-12"
        />
        <Text mainContentBody text04 className="text-center">
          Beautiful dashboards, slides, and reports.
          <br />
          Built by AI agents that know your world. Privately and securely.
        </Text>
      </div>
    );
  }

  // Page 2
  return (
    <div className="flex-1 flex flex-col gap-6 items-center justify-center">
      <Text headingH2 text05>
        Let's get started!
      </Text>
      <img
        src="/craft_demo_image_2.png"
        alt="Onyx Craft"
        className="max-w-full h-auto rounded-12"
      />
    </div>
  );
}


================================================
FILE: web/src/app/craft/onboarding/components/OnboardingLlmSetup.tsx
================================================
"use client";

import { SvgCheckCircle } from "@opal/icons";
import { cn } from "@/lib/utils";
import { Disabled } from "@opal/core";
import Text from "@/refresh-components/texts/Text";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { LLMProviderName, LLMProviderDescriptor } from "@/interfaces/llm";

// Provider configurations
export type ProviderKey = "anthropic" | "openai" | "openrouter";

interface ModelOption {
  name: string;
  label: string;
  recommended?: boolean;
}

export interface ProviderConfig {
  key: ProviderKey;
  label: string;
  providerName: LLMProviderName;
  recommended?: boolean;
  models: ModelOption[];
  apiKeyPlaceholder: string;
  apiKeyUrl: string;
  apiKeyLabel: string;
}

export const PROVIDERS: ProviderConfig[] = [
  {
    key: "anthropic",
    label: "Anthropic",
    providerName: LLMProviderName.ANTHROPIC,
    recommended: true,
    models: [
      { name: "claude-opus-4-6", label: "Claude Opus 4.6", recommended: true },
      { name: "claude-sonnet-4-6", label: "Claude Sonnet 4.6" },
    ],
    apiKeyPlaceholder: "sk-ant-...",
    apiKeyUrl: "https://console.anthropic.com/dashboard",
    apiKeyLabel: "Anthropic Console",
  },
  {
    key: "openai",
    label: "OpenAI",
    providerName: LLMProviderName.OPENAI,
    models: [
      { name: "gpt-5.2", label: "GPT-5.2", recommended: true },
      { name: "gpt-5.1", label: "GPT-5.1" },
    ],
    apiKeyPlaceholder: "sk-...",
    apiKeyUrl: "https://platform.openai.com/api-keys",
    apiKeyLabel: "OpenAI Dashboard",
  },
  {
    key: "openrouter",
    label: "OpenRouter",
    providerName: LLMProviderName.OPENROUTER,
    models: [
      {
        name: "moonshotai/kimi-k2-thinking",
        label: "Kimi K2 Thinking",
        recommended: true,
      },
      { name: "google/gemini-3-pro-preview", label: "Gemini 3 Pro" },
      { name: "qwen/qwen3-235b-a22b-thinking-2507", label: "Qwen3 235B" },
    ],
    apiKeyPlaceholder: "sk-or-...",
    apiKeyUrl: "https://openrouter.ai/keys",
    apiKeyLabel: "OpenRouter Dashboard",
  },
];

interface SelectableButtonProps {
  selected: boolean;
  onClick: () => void;
  children: React.ReactNode;
  subtext?: string;
  disabled?: boolean;
  tooltip?: string;
}

function SelectableButton({
  selected,
  onClick,
  children,
  subtext,
  disabled,
  tooltip,
}: SelectableButtonProps) {
  const button = (
    <div className="flex flex-col items-center gap-1">
      <Disabled disabled={disabled} allowClick>
        <button
          type="button"
          onClick={onClick}
          disabled={disabled}
          className={cn(
            "w-full px-6 py-3 rounded-12 border transition-colors",
            selected
              ? "border-action-link-05 bg-action-link-01 text-action-text-link-05"
              : "border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-01"
          )}
        >
          <Text mainUiAction>{children}</Text>
        </button>
      </Disabled>
      {subtext && (
        <Text figureSmallLabel text02>
          {subtext}
        </Text>
      )}
    </div>
  );

  if (tooltip) {
    return <SimpleTooltip tooltip={tooltip}>{button}</SimpleTooltip>;
  }

  return button;
}

interface ModelSelectButtonProps {
  selected: boolean;
  onClick: () => void;
  label: string;
  recommended?: boolean;
  disabled?: boolean;
}

function ModelSelectButton({
  selected,
  onClick,
  label,
  recommended,
  disabled,
}: ModelSelectButtonProps) {
  return (
    <div className="flex flex-col items-center gap-1 w-full">
      <Disabled disabled={disabled} allowClick>
        <button
          type="button"
          onClick={onClick}
          disabled={disabled}
          className={cn(
            "w-full px-4 py-2.5 rounded-12 border transition-colors",
            selected
              ? "border-action-link-05 bg-action-link-01 text-action-text-link-05"
              : "border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-01"
          )}
        >
          <Text mainUiAction>{label}</Text>
        </button>
      </Disabled>
      {recommended && (
        <Text figureSmallLabel text02>
          Recommended
        </Text>
      )}
    </div>
  );
}

interface OnboardingLlmSetupProps {
  selectedProvider: ProviderKey;
  selectedModel: string;
  apiKey: string;
  connectionStatus: "idle" | "testing" | "success" | "error";
  errorMessage: string;
  llmProviders?: LLMProviderDescriptor[];
  onProviderChange: (provider: ProviderKey) => void;
  onModelChange: (model: string) => void;
  onApiKeyChange: (apiKey: string) => void;
  onConnectionStatusChange: (
    status: "idle" | "testing" | "success" | "error"
  ) => void;
  onErrorMessageChange: (message: string) => void;
}

export default function OnboardingLlmSetup({
  selectedProvider,
  selectedModel,
  apiKey,
  connectionStatus,
  errorMessage,
  llmProviders,
  onProviderChange,
  onModelChange,
  onApiKeyChange,
  onConnectionStatusChange,
  onErrorMessageChange,
}: OnboardingLlmSetupProps) {
  const currentProviderConfig = PROVIDERS.find(
    (p) => p.key === selectedProvider
  )!;

  const isProviderConfigured = (providerName: string) => {
    return llmProviders?.some((p) => p.provider === providerName) ?? false;
  };

  const handleProviderChange = (provider: ProviderKey) => {
    const providerConfig = PROVIDERS.find((p) => p.key === provider)!;
    // Don't allow selecting already-configured providers
    if (isProviderConfigured(providerConfig.providerName)) return;

    onProviderChange(provider);
    onModelChange(providerConfig.models[0]?.name || "");
    onConnectionStatusChange("idle");
    onErrorMessageChange("");
  };

  const handleModelChange = (model: string) => {
    onModelChange(model);
    onConnectionStatusChange("idle");
    onErrorMessageChange("");
  };

  const handleApiKeyChange = (value: string) => {
    onApiKeyChange(value);
    onConnectionStatusChange("idle");
    onErrorMessageChange("");
  };

  return (
    <div className="flex-1 flex flex-col gap-6 justify-between">
      {/* Header */}
      <div className="flex items-center justify-center">
        <Text headingH2 text05>
          Connect your LLM
        </Text>
      </div>

      {/* Provider selection */}
      <div className="flex flex-col gap-3 items-center">
        <Text mainUiBody text04>
          Provider
        </Text>
        <div className="flex justify-center gap-3 w-full max-w-md">
          {PROVIDERS.map((provider) => {
            const isConfigured = isProviderConfigured(provider.providerName);
            return (
              <div key={provider.key} className="flex-1">
                <SelectableButton
                  selected={selectedProvider === provider.key}
                  onClick={() => handleProviderChange(provider.key)}
                  subtext={
                    isConfigured
                      ? "Already configured"
                      : provider.recommended
                        ? "Recommended"
                        : undefined
                  }
                  disabled={connectionStatus === "testing" || isConfigured}
                  tooltip={
                    isConfigured
                      ? "This provider is already configured"
                      : undefined
                  }
                >
                  {provider.label}
                </SelectableButton>
              </div>
            );
          })}
        </div>
      </div>

      {/* Model selection */}
      <div className="flex flex-col gap-3 items-center">
        <Text mainUiBody text04>
          Default Model
        </Text>
        <div className="flex justify-center gap-3 flex-wrap w-full max-w-md">
          {currentProviderConfig.models.map((model) => (
            <div key={model.name} className="flex-1 min-w-0">
              <ModelSelectButton
                selected={selectedModel === model.name}
                onClick={() => handleModelChange(model.name)}
                label={model.label}
                recommended={model.recommended}
                disabled={connectionStatus === "testing"}
              />
            </div>
          ))}
        </div>
      </div>

      {/* API Key input */}
      <div className="flex flex-col gap-3 items-center">
        <Text mainUiBody text04>
          API Key
        </Text>
        <div className="w-full max-w-md">
          <Disabled disabled={connectionStatus === "testing"} allowClick>
            <input
              type="password"
              value={apiKey}
              onChange={(e) => handleApiKeyChange(e.target.value)}
              placeholder={currentProviderConfig.apiKeyPlaceholder}
              disabled={connectionStatus === "testing"}
              className="w-full px-3 py-2 rounded-08 input-normal text-text-04 placeholder:text-text-02 focus:outline-none"
            />
          </Disabled>
          {/* Message area */}
          <div className="min-h-[2rem] flex justify-center pt-4">
            {connectionStatus === "error" && (
              <Text secondaryBody className="text-red-500">
                {errorMessage}
              </Text>
            )}
            <div
              className={cn(
                "flex items-center gap-2 px-3 py-2 rounded-08 bg-status-success-00 border border-status-success-02 w-fit",
                connectionStatus !== "success" && "hidden"
              )}
            >
              <SvgCheckCircle className="w-4 h-4 stroke-status-success-05 shrink-0" />
              <Text secondaryBody className="text-status-success-05">
                Success!
              </Text>
            </div>
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/onboarding/components/OnboardingUserInfo.tsx
================================================
"use client";

import { cn } from "@/lib/utils";
import { Disabled } from "@opal/core";
import Text from "@/refresh-components/texts/Text";
import {
  WorkArea,
  Level,
  WORK_AREA_OPTIONS,
  LEVEL_OPTIONS,
  WORK_AREAS_REQUIRING_LEVEL,
  PERSONA_MAPPING,
  DEMO_COMPANY_NAME,
  getPositionText,
} from "@/app/craft/onboarding/constants";

interface SelectableButtonProps {
  selected: boolean;
  onClick: () => void;
  children: React.ReactNode;
  subtext?: string;
  disabled?: boolean;
}

function SelectableButton({
  selected,
  onClick,
  children,
  subtext,
  disabled,
}: SelectableButtonProps) {
  return (
    <div className="flex flex-col items-center gap-1">
      <Disabled disabled={disabled} allowClick>
        <button
          type="button"
          onClick={onClick}
          disabled={disabled}
          className={cn(
            "w-full px-6 py-3 rounded-12 border transition-colors",
            selected
              ? "border-action-link-05 bg-action-link-01 text-action-text-link-05"
              : "border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-01"
          )}
        >
          <Text mainUiAction>{children}</Text>
        </button>
      </Disabled>
      {subtext && (
        <Text figureSmallLabel text02>
          {subtext}
        </Text>
      )}
    </div>
  );
}

interface OnboardingUserInfoProps {
  firstName: string;
  lastName: string;
  workArea: WorkArea | undefined;
  level: Level | undefined;
  onFirstNameChange: (value: string) => void;
  onLastNameChange: (value: string) => void;
  onWorkAreaChange: (value: WorkArea | undefined) => void;
  onLevelChange: (value: Level | undefined) => void;
}

export default function OnboardingUserInfo({
  firstName: _firstName,
  lastName: _lastName,
  workArea,
  level,
  onFirstNameChange: _onFirstNameChange,
  onLastNameChange: _onLastNameChange,
  onWorkAreaChange,
  onLevelChange,
}: OnboardingUserInfoProps) {
  const requiresLevel =
    workArea !== undefined && WORK_AREAS_REQUIRING_LEVEL.includes(workArea);

  // Get persona info for preview
  const selectedLevel = level ?? Level.IC;
  const personaInfo =
    workArea !== undefined ? PERSONA_MAPPING[workArea]?.[selectedLevel] : null;
  const positionText =
    workArea !== undefined ? getPositionText(workArea, level) : null;

  return (
    <div className="flex-1 flex flex-col gap-6">
      {/* Header */}
      <div className="flex flex-col items-center gap-3">
        <Text headingH2 text05>
          Demo Data Configuration
        </Text>
      </div>

      <div className="flex-1 flex flex-col gap-8 justify-center">
        {/* Name inputs - commented out for now, can be re-enabled later
        <div className="flex justify-center">
          <div className="grid grid-cols-2 gap-4 w-full max-w-md">
            <div className="flex flex-col gap-1.5">
              <Text secondaryBody text03>
                First name
              </Text>
              <input
                type="text"
                value={firstName}
                onChange={(e) => onFirstNameChange(e.target.value)}
                placeholder="Steven"
                className="w-full px-3 py-2 rounded-08 input-normal text-text-04 placeholder:text-text-02 focus:outline-none"
              />
            </div>
            <div className="flex flex-col gap-1.5">
              <Text secondaryBody text03>
                Last name
              </Text>
              <input
                type="text"
                value={lastName}
                onChange={(e) => onLastNameChange(e.target.value)}
                placeholder="Alexson"
                className="w-full px-3 py-2 rounded-08 input-normal text-text-04 placeholder:text-text-02 focus:outline-none"
              />
            </div>
          </div>
        </div>
        */}

        <Text mainUiBody text04 className="text-center">
          While you wait for your data to sync, try out our simulated demo
          dataset! <br />
          The simulated data will adapt to your role and level choices below.
        </Text>

        {/* Work area */}
        <div className="flex flex-col gap-3 items-center">
          <Text mainUiBody text04>
            Select your role:
          </Text>
          <div className="grid grid-cols-3 gap-3 w-full">
            {WORK_AREA_OPTIONS.map((option) => (
              <SelectableButton
                key={option.value}
                selected={workArea === option.value}
                onClick={() => onWorkAreaChange(option.value)}
              >
                {option.label}
              </SelectableButton>
            ))}
          </div>
        </div>

        {/* Level */}
        <div className="flex flex-col gap-3 items-center">
          <Text mainUiBody text04>
            Level{" "}
            {requiresLevel && <span className="text-status-error-05">*</span>}
          </Text>
          <div className="flex justify-center gap-3 w-full">
            <div className="grid grid-cols-2 gap-3 w-2/3">
              {LEVEL_OPTIONS.map((option) => (
                <SelectableButton
                  key={option.value}
                  selected={level === option.value}
                  onClick={() =>
                    onLevelChange(
                      level === option.value ? undefined : option.value
                    )
                  }
                >
                  {option.label}
                </SelectableButton>
              ))}
            </div>
          </div>
        </div>

        {/* Persona preview - always reserve space to prevent layout shift */}
        <div className="flex justify-center min-h-[1.5rem]">
          {personaInfo && positionText && (
            <Text mainContentBody text03 className="text-center">
              You will play the role of {positionText} named {personaInfo.name}{" "}
              working at <br />
              {DEMO_COMPANY_NAME}
            </Text>
          )}
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/onboarding/constants.ts
================================================
// =============================================================================
// LLM Selection Types and Utilities
// =============================================================================

export interface BuildLlmSelection {
  providerName: string; // e.g., "build-mode-anthropic" (LLMProviderDescriptor.name)
  provider: string; // e.g., "anthropic"
  modelName: string; // e.g., "claude-opus-4-6"
}

// Priority order for smart default LLM selection
const LLM_SELECTION_PRIORITY = [
  { provider: "anthropic", modelName: "claude-opus-4-6" },
  { provider: "openai", modelName: "gpt-5.2" },
  { provider: "openrouter", modelName: "minimax/minimax-m2.1" },
] as const;

// Minimal provider interface for selection logic
interface MinimalLlmProvider {
  name: string;
  provider: string;
  model_configurations: { name: string; is_visible: boolean }[];
}

/**
 * Get the best default LLM selection based on available providers.
 * Priority: Anthropic > OpenAI > OpenRouter > first available
 */
export function getDefaultLlmSelection(
  llmProviders: MinimalLlmProvider[] | undefined
): BuildLlmSelection | null {
  if (!llmProviders || llmProviders.length === 0) return null;

  // Try each priority provider in order
  for (const { provider, modelName } of LLM_SELECTION_PRIORITY) {
    const matchingProvider = llmProviders.find((p) => p.provider === provider);
    if (matchingProvider) {
      return {
        providerName: matchingProvider.name,
        provider: matchingProvider.provider,
        modelName,
      };
    }
  }

  // Fallback: first available provider, use its first visible model
  const firstProvider = llmProviders[0];
  if (firstProvider) {
    const firstModel = firstProvider.model_configurations.find(
      (m) => m.is_visible
    );
    return {
      providerName: firstProvider.name,
      provider: firstProvider.provider,
      modelName: firstModel?.name ?? "",
    };
  }

  return null;
}

// Recommended models config (for UI display)
export const RECOMMENDED_BUILD_MODELS = {
  preferred: {
    provider: "anthropic",
    modelName: "claude-opus-4-6",
    displayName: "Claude Opus 4.6",
  },
  alternatives: [
    { provider: "anthropic", modelName: "claude-sonnet-4-6" },
    { provider: "openai", modelName: "gpt-5.2" },
    { provider: "openai", modelName: "gpt-5.1-codex" },
    { provider: "openrouter", modelName: "minimax/minimax-m2.1" },
  ],
} as const;

// Cookie utilities
const BUILD_LLM_COOKIE_KEY = "build_llm_selection";

export function getBuildLlmSelection(): BuildLlmSelection | null {
  if (typeof document === "undefined") return null;
  const cookie = document.cookie
    .split("; ")
    .find((row) => row.startsWith(`${BUILD_LLM_COOKIE_KEY}=`));
  if (!cookie) return null;
  try {
    const value = cookie.split("=")[1];
    if (!value) return null;
    return JSON.parse(decodeURIComponent(value));
  } catch {
    return null;
  }
}

export function setBuildLlmSelection(selection: BuildLlmSelection): void {
  if (typeof document === "undefined") return;
  const value = encodeURIComponent(JSON.stringify(selection));
  // Cookie expires in 1 year
  const expires = new Date(
    Date.now() + 365 * 24 * 60 * 60 * 1000
  ).toUTCString();
  document.cookie = `${BUILD_LLM_COOKIE_KEY}=${value}; path=/; expires=${expires}; SameSite=Lax`;
}

export function clearBuildLlmSelection(): void {
  if (typeof document === "undefined") return;
  document.cookie = `${BUILD_LLM_COOKIE_KEY}=; path=/; expires=Thu, 01 Jan 1970 00:00:00 GMT`;
}

export function isRecommendedModel(
  provider: string,
  modelName: string
): boolean {
  const { preferred, alternatives } = RECOMMENDED_BUILD_MODELS;
  // Exact match for preferred model
  if (preferred.provider === provider && modelName === preferred.modelName) {
    return true;
  }
  // Exact match for alternatives
  return alternatives.some(
    (alt) => alt.provider === provider && modelName === alt.modelName
  );
}

// Curated providers for Build mode (shared between BuildOnboardingModal and BuildLLMPopover)
export interface BuildModeModel {
  name: string;
  label: string;
  recommended?: boolean;
}

export interface BuildModeProvider {
  key: string;
  label: string;
  providerName: string;
  recommended?: boolean;
  models: BuildModeModel[];
  // API-related fields (optional, only needed for onboarding modal)
  apiKeyPlaceholder?: string;
  apiKeyUrl?: string;
  apiKeyLabel?: string;
}

export const BUILD_MODE_PROVIDERS: BuildModeProvider[] = [
  {
    key: "anthropic",
    label: "Anthropic",
    providerName: "anthropic",
    recommended: true,
    models: [
      { name: "claude-opus-4-6", label: "Claude Opus 4.6", recommended: true },
      { name: "claude-sonnet-4-6", label: "Claude Sonnet 4.6" },
    ],
    apiKeyPlaceholder: "sk-ant-...",
    apiKeyUrl: "https://console.anthropic.com/dashboard",
    apiKeyLabel: "Anthropic Console",
  },
  {
    key: "openai",
    label: "OpenAI",
    providerName: "openai",
    models: [
      { name: "gpt-5.2", label: "GPT-5.2", recommended: true },
      { name: "gpt-5.1-codex", label: "GPT-5.1 Codex" },
    ],
    apiKeyPlaceholder: "sk-...",
    apiKeyUrl: "https://platform.openai.com/api-keys",
    apiKeyLabel: "OpenAI Dashboard",
  },
  {
    key: "openrouter",
    label: "OpenRouter",
    providerName: "openrouter",
    models: [
      {
        name: "minimax/minimax-m2.1",
        label: "MiniMax M2.1",
        recommended: true,
      },
    ],
    apiKeyPlaceholder: "sk-or-...",
    apiKeyUrl: "https://openrouter.ai/keys",
    apiKeyLabel: "OpenRouter Dashboard",
  },
];

// =============================================================================
// User Info/Persona Constants
// =============================================================================

export interface PersonaInfo {
  name: string;
  email: string;
}

// Work area enum - derived from PERSONA_MAPPING keys
export enum WorkArea {
  ENGINEERING = "engineering",
  PRODUCT = "product",
  EXECUTIVE = "executive",
  SALES = "sales",
  MARKETING = "marketing",
  OTHER = "other",
}

// Level enum - derived from PERSONA_MAPPING structure
export enum Level {
  IC = "ic",
  MANAGER = "manager",
}

// Persona mapping: work_area -> level -> PersonaInfo
// Matches backend/onyx/server/features/build/sandbox/util/persona_mapping.py
// This is the source of truth for work areas and levels
export const PERSONA_MAPPING: Record<WorkArea, Record<Level, PersonaInfo>> = {
  [WorkArea.ENGINEERING]: {
    [Level.IC]: {
      name: "Jiwon Kang",
      email: "jiwon_kang@netherite-extraction.onyx.app",
    },
    [Level.MANAGER]: {
      name: "Javier Morales",
      email: "javier_morales@netherite-extraction.onyx.app",
    },
  },
  [WorkArea.SALES]: {
    [Level.IC]: {
      name: "Megan Foster",
      email: "megan_foster@netherite-extraction.onyx.app",
    },
    [Level.MANAGER]: {
      name: "Valeria Cruz",
      email: "valeria_cruz@netherite-extraction.onyx.app",
    },
  },
  [WorkArea.PRODUCT]: {
    [Level.IC]: {
      name: "Michael Anderson",
      email: "michael_anderson@netherite-extraction.onyx.app",
    },
    [Level.MANAGER]: {
      name: "David Liu",
      email: "david_liu@netherite-extraction.onyx.app",
    },
  },
  [WorkArea.MARKETING]: {
    [Level.IC]: {
      name: "Rahul Patel",
      email: "rahul_patel@netherite-extraction.onyx.app",
    },
    [Level.MANAGER]: {
      name: "Olivia Reed",
      email: "olivia_reed@netherite-extraction.onyx.app",
    },
  },
  [WorkArea.EXECUTIVE]: {
    [Level.IC]: {
      name: "Sarah Mitchell",
      email: "sarah_mitchell@netherite-extraction.onyx.app",
    },
    [Level.MANAGER]: {
      name: "Sarah Mitchell",
      email: "sarah_mitchell@netherite-extraction.onyx.app",
    },
  },
  [WorkArea.OTHER]: {
    [Level.MANAGER]: {
      name: "Ralf Schroeder",
      email: "ralf_schroeder@netherite-extraction.onyx.app",
    },
    [Level.IC]: {
      name: "John Carpenter",
      email: "john_carpenter@netherite-extraction.onyx.app",
    },
  },
};

// Helper to capitalize first letter
const capitalize = (str: string): string => {
  return str.charAt(0).toUpperCase() + str.slice(1);
};

// Derive WORK_AREA_OPTIONS from WorkArea enum
export const WORK_AREA_OPTIONS = Object.values(WorkArea).map((value) => ({
  value,
  label: capitalize(value),
}));

// Derive LEVEL_OPTIONS from Level enum
export const LEVEL_OPTIONS = Object.values(Level).map((value) => ({
  value,
  label: value === Level.IC ? "IC" : capitalize(value),
}));

// Work areas where level selection is required
// Executive has the same persona for both levels, so level is optional
export const WORK_AREAS_REQUIRING_LEVEL: WorkArea[] = [
  WorkArea.ENGINEERING,
  WorkArea.PRODUCT,
  WorkArea.SALES,
  WorkArea.MARKETING,
  WorkArea.OTHER,
];

// Helper function to get persona info
export function getPersonaInfo(
  workArea: WorkArea,
  level: Level
): PersonaInfo | undefined {
  return PERSONA_MAPPING[workArea]?.[level];
}

// Company name for demo personas
export const DEMO_COMPANY_NAME = "Netherite Extraction Inc.";

// Helper function to get position text from work area and level
// Executive: "Executive" (no level), Other: "employee", Everything else: show level if available
export function getPositionText(
  workArea: WorkArea,
  level: Level | undefined
): string {
  const workAreaLabel =
    WORK_AREA_OPTIONS.find((opt) => opt.value === workArea)?.label || workArea;

  if (workArea === WorkArea.OTHER) {
    return "Employee";
  }

  if (workArea === WorkArea.EXECUTIVE) {
    return "Executive";
  }

  if (level) {
    const levelLabel =
      LEVEL_OPTIONS.find((opt) => opt.value === level)?.label || level;
    return `${workAreaLabel} ${levelLabel}`;
  }

  return workAreaLabel;
}

export const BUILD_USER_PERSONA_COOKIE_NAME = "build_user_persona";

// Helper type for the consolidated cookie
export interface BuildUserPersona {
  workArea: WorkArea;
  level?: Level;
}

// Helper functions for getting/setting the consolidated cookie
export function getBuildUserPersona(): BuildUserPersona | null {
  if (typeof window === "undefined") return null;

  const cookieValue = document.cookie
    .split("; ")
    .find((row) => row.startsWith(`${BUILD_USER_PERSONA_COOKIE_NAME}=`))
    ?.split("=")[1];

  if (!cookieValue) return null;

  try {
    const parsed = JSON.parse(decodeURIComponent(cookieValue));
    // Validate and cast to enum types
    if (
      parsed.workArea &&
      Object.values(WorkArea).includes(parsed.workArea as WorkArea)
    ) {
      return {
        workArea: parsed.workArea as WorkArea,
        level:
          parsed.level && Object.values(Level).includes(parsed.level as Level)
            ? (parsed.level as Level)
            : undefined,
      };
    }
    return null;
  } catch {
    return null;
  }
}

export function setBuildUserPersona(persona: BuildUserPersona): void {
  const cookieValue = encodeURIComponent(JSON.stringify(persona));
  const expires = new Date();
  expires.setFullYear(expires.getFullYear() + 1);
  document.cookie = `${BUILD_USER_PERSONA_COOKIE_NAME}=${cookieValue}; path=/; expires=${expires.toUTCString()}`;
}


================================================
FILE: web/src/app/craft/onboarding/hooks/useOnboardingModal.ts
================================================
"use client";

import { useCallback, useState, useMemo, useEffect } from "react";
import { useUser } from "@/providers/UserProvider";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import { LLMProviderName } from "@/interfaces/llm";
import {
  OnboardingModalMode,
  OnboardingModalController,
  BuildUserInfo,
} from "@/app/craft/onboarding/types";
import {
  getBuildUserPersona,
  setBuildUserPersona,
} from "@/app/craft/onboarding/constants";
import { updateUserPersonalization } from "@/lib/userSettings";
import { useBuildSessionStore } from "@/app/craft/hooks/useBuildSessionStore";

// Check if all 3 build mode providers are configured (anthropic, openai, openrouter)
function checkAllProvidersConfigured(
  llmProviders: import("@/interfaces/llm").LLMProviderDescriptor[] | undefined
): boolean {
  if (!llmProviders || llmProviders.length === 0) {
    return false;
  }
  const configuredProviders = new Set(llmProviders.map((p) => p.provider));
  return (
    configuredProviders.has(LLMProviderName.ANTHROPIC) &&
    configuredProviders.has(LLMProviderName.OPENAI) &&
    configuredProviders.has(LLMProviderName.OPENROUTER)
  );
}

// Check if at least one provider is configured
function checkHasAnyProvider(
  llmProviders: import("@/interfaces/llm").LLMProviderDescriptor[] | undefined
): boolean {
  return !!(llmProviders && llmProviders.length > 0);
}

export function useOnboardingModal(): OnboardingModalController {
  const { user, isAdmin, refreshUser } = useUser();
  const {
    llmProviders,
    isLoading: isLoadingLlm,
    refetch: refetchLlmProviders,
  } = useLLMProviders();

  // Get ensurePreProvisionedSession from the session store
  const ensurePreProvisionedSession = useBuildSessionStore(
    (state) => state.ensurePreProvisionedSession
  );

  // Modal mode state
  const [mode, setMode] = useState<OnboardingModalMode>({ type: "closed" });
  const [hasInitialized, setHasInitialized] = useState(false);

  // Compute initial values for the form (read fresh on every render)
  const existingPersona = getBuildUserPersona();
  const existingName = user?.personalization?.name || "";
  const spaceIndex = existingName.indexOf(" ");
  const initialFirstName =
    spaceIndex > 0 ? existingName.slice(0, spaceIndex) : existingName;
  const initialLastName =
    spaceIndex > 0 ? existingName.slice(spaceIndex + 1) : "";

  const initialValues = {
    firstName: initialFirstName,
    lastName: initialLastName,
    workArea: existingPersona?.workArea,
    level: existingPersona?.level,
  };

  // Check if user has completed initial onboarding (only role required, not name)
  const hasUserInfo = useMemo(() => {
    return !!getBuildUserPersona()?.workArea;
  }, [user]);

  // Check if all providers are configured (skip LLM step entirely if so)
  const allProvidersConfigured = useMemo(
    () => checkAllProvidersConfigured(llmProviders),
    [llmProviders]
  );

  // Check if at least one provider is configured (allow skipping LLM step)
  const hasAnyProvider = useMemo(
    () => checkHasAnyProvider(llmProviders),
    [llmProviders]
  );

  // Auto-open initial onboarding modal on first load
  // Shows if: user info (role) missing OR (admin AND no providers configured)
  useEffect(() => {
    if (hasInitialized || isLoadingLlm || !user) return;

    const needsUserInfo = !hasUserInfo;
    const needsLlmSetup = isAdmin && !hasAnyProvider;

    if (needsUserInfo || needsLlmSetup) {
      setMode({ type: "initial-onboarding" });
    }

    setHasInitialized(true);
  }, [
    hasInitialized,
    isLoadingLlm,
    user,
    hasUserInfo,
    isAdmin,
    hasAnyProvider,
  ]);

  // Complete user info callback
  const completeUserInfo = useCallback(
    async (info: BuildUserInfo) => {
      // Save name via API (handle optional lastName)
      const fullName = info.lastName
        ? `${info.firstName} ${info.lastName}`.trim()
        : info.firstName.trim();
      await updateUserPersonalization({ name: fullName });

      // Save persona to cookie
      setBuildUserPersona({
        workArea: info.workArea,
        level: info.level,
      });

      // Refresh user to update state
      await refreshUser();

      // Trigger pre-provisioning now that onboarding is complete
      // This ensures the sandbox starts provisioning immediately rather than
      // waiting for the controller effect to detect the cookie change
      ensurePreProvisionedSession();
    },
    [refreshUser, ensurePreProvisionedSession]
  );

  // Complete LLM setup callback
  const completeLlmSetup = useCallback(async () => {
    await refetchLlmProviders();
  }, [refetchLlmProviders]);

  // Actions
  const openPersonaEditor = useCallback(() => {
    setMode({ type: "edit-persona" });
  }, []);

  const openLlmSetup = useCallback((provider?: string) => {
    setMode({ type: "add-llm", provider });
  }, []);

  const close = useCallback(() => {
    setMode({ type: "closed" });
  }, []);

  const isOpen = mode.type !== "closed";

  return {
    mode,
    isOpen,
    openPersonaEditor,
    openLlmSetup,
    close,
    llmProviders,
    initialValues,
    completeUserInfo,
    completeLlmSetup,
    refetchLlmProviders,
    isAdmin,
    hasUserInfo,
    allProvidersConfigured,
    hasAnyProvider,
    isLoading: isLoadingLlm,
  };
}


================================================
FILE: web/src/app/craft/onboarding/types.ts
================================================
import { WorkArea, Level } from "./constants";
import type {
  LLMProviderDescriptor,
  LLMProviderResponse,
} from "@/interfaces/llm";

export interface BuildUserInfo {
  firstName: string;
  lastName?: string;
  workArea: WorkArea;
  level?: Level;
}

// Legacy flow interface (kept for backwards compatibility during migration)
export interface BuildOnboardingFlow {
  showNotAllowedModal: boolean;
  showUserInfoModal: boolean;
  showLlmModal: boolean;
}

// New mode-based modal types
export type OnboardingModalMode =
  | { type: "initial-onboarding" } // Full flow: user-info → llm? → content
  | { type: "edit-persona" } // Just user-info step
  | { type: "add-llm"; provider?: string } // Just llm-setup step
  | { type: "closed" }; // Modal not visible

export type OnboardingStep = "user-info" | "llm-setup" | "page1" | "page2";

export interface OnboardingModalController {
  mode: OnboardingModalMode;
  isOpen: boolean;

  // Actions
  openPersonaEditor: () => void;
  openLlmSetup: (provider?: string) => void;
  close: () => void;

  // Data needed for modal
  llmProviders: LLMProviderDescriptor[] | undefined;
  initialValues: {
    firstName: string;
    lastName: string;
    workArea: WorkArea | undefined;
    level: Level | undefined;
  };

  // State
  isAdmin: boolean;
  hasUserInfo: boolean; // User has completed user-info (name + workArea)
  allProvidersConfigured: boolean; // All 3 providers (anthropic, openai, openrouter) are configured
  hasAnyProvider: boolean; // At least 1 provider is configured (allows skipping)
  isLoading: boolean; // True while LLM providers are loading

  // Callbacks
  completeUserInfo: (info: BuildUserInfo) => Promise<void>;
  completeLlmSetup: () => Promise<void>;
  refetchLlmProviders: () => Promise<
    LLMProviderResponse<LLMProviderDescriptor> | undefined
  >;
}


================================================
FILE: web/src/app/craft/page.tsx
================================================
"use client";

import { useEffect } from "react";
import { useRouter } from "next/navigation";
import { CRAFT_PATH } from "@/app/craft/v1/constants";

/**
 * Build Page - Redirects to the new Build V1 page
 *
 * The new Build experience is at /craft/v1
 * This page exists for backwards compatibility.
 */
export default function BuildPage() {
  const router = useRouter();

  useEffect(() => {
    router.replace(CRAFT_PATH);
  }, [router]);

  return (
    <div className="flex items-center justify-center h-screen">
      <div className="animate-pulse text-text-03">Redirecting...</div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/services/apiServices.ts
================================================
import {
  ApiSessionResponse,
  ApiDetailedSessionResponse,
  ApiMessageResponse,
  ApiArtifactResponse,
  ApiUsageLimitsResponse,
  ApiWebappInfoResponse,
  SessionHistoryItem,
  Artifact,
  BuildMessage,
  StreamPacket,
  UsageLimits,
  DirectoryListing,
  SharingScope,
} from "@/app/craft/types/streamingTypes";

// =============================================================================
// API Configuration
// =============================================================================

const API_BASE = "/api/build";
export const USAGE_LIMITS_ENDPOINT = `${API_BASE}/limit`;

// =============================================================================
// SSE Stream Processing
// =============================================================================

export async function processSSEStream(
  response: Response,
  onPacket: (packet: StreamPacket) => void
): Promise<void> {
  const reader = response.body?.getReader();
  if (!reader) throw new Error("No response body");

  const decoder = new TextDecoder();
  let buffer = "";
  let currentEventType = "";

  while (true) {
    const { done, value } = await reader.read();
    if (done) break;

    buffer += decoder.decode(value, { stream: true });
    const lines = buffer.split("\n");
    buffer = lines.pop() || "";

    for (const line of lines) {
      if (line.startsWith("event: ") || line.startsWith("event:")) {
        // Capture the event type from the SSE event line
        currentEventType = line.slice(line.indexOf(":") + 1).trim();
      } else if (line.startsWith("data: ") || line.startsWith("data:")) {
        const dataStr = line.slice(line.indexOf(":") + 1).trim();
        if (dataStr) {
          try {
            const data = JSON.parse(dataStr);
            // The backend sends `event: message` for all events and puts the
            // actual type in data.type. Only use SSE event type as fallback
            // if data.type is not present and SSE event is not "message".
            if (
              !data.type &&
              currentEventType &&
              currentEventType !== "message"
            ) {
              onPacket({ ...data, type: currentEventType });
            } else {
              onPacket(data);
            }
          } catch (e) {
            console.error("[SSE] Parse error:", e, "Raw data:", dataStr);
          }
        }
        // Reset event type for next event
        currentEventType = "";
      }
    }
  }
}

// =============================================================================
// Session API
// =============================================================================

export interface CreateSessionOptions {
  name?: string | null;
  demoDataEnabled?: boolean;
  userWorkArea?: string | null;
  userLevel?: string | null;
  // LLM selection from user's cookie
  llmProviderType?: string | null; // Provider type (e.g., "anthropic", "openai")
  llmModelName?: string | null;
}

export async function createSession(
  options?: CreateSessionOptions
): Promise<ApiDetailedSessionResponse> {
  const res = await fetch(`${API_BASE}/sessions`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      name: options?.name || null,
      demo_data_enabled: options?.demoDataEnabled ?? true,
      user_work_area: options?.userWorkArea || null,
      user_level: options?.userLevel || null,
      llm_provider_type: options?.llmProviderType || null,
      llm_model_name: options?.llmModelName || null,
    }),
  });

  if (!res.ok) {
    throw new Error(`Failed to create session: ${res.status}`);
  }

  return res.json();
}

export async function fetchSession(
  sessionId: string
): Promise<ApiDetailedSessionResponse> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}`);

  if (!res.ok) {
    throw new Error(`Failed to load session: ${res.status}`);
  }

  return res.json();
}

export async function fetchSessionHistory(): Promise<SessionHistoryItem[]> {
  const res = await fetch(`${API_BASE}/sessions`);

  if (!res.ok) {
    throw new Error(`Failed to fetch session history: ${res.status}`);
  }

  const data = await res.json();
  return data.sessions.map((s: ApiSessionResponse) => ({
    id: s.id,
    title: s.name || `Session ${s.id.slice(0, 8)}...`,
    createdAt: new Date(s.created_at),
  }));
}

export async function generateSessionName(sessionId: string): Promise<string> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}/generate-name`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
  });

  if (!res.ok) {
    throw new Error(`Failed to generate session name: ${res.status}`);
  }

  const data = await res.json();
  return data.name;
}

export interface SuggestionBubble {
  theme: "add" | "question";
  text: string;
}

export async function generateFollowupSuggestions(
  sessionId: string,
  userMessage: string,
  agentMessage: string
): Promise<SuggestionBubble[]> {
  const res = await fetch(
    `${API_BASE}/sessions/${sessionId}/generate-suggestions`,
    {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({
        user_message: userMessage,
        assistant_message: agentMessage,
      }),
    }
  );

  if (!res.ok) {
    throw new Error(`Failed to generate suggestions: ${res.status}`);
  }

  const data = await res.json();
  return data.suggestions;
}

export async function updateSessionName(
  sessionId: string,
  name: string | null
): Promise<void> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}/name`, {
    method: "PUT",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ name }),
  });

  if (!res.ok) {
    throw new Error(`Failed to update session name: ${res.status}`);
  }
}

export async function setSessionSharing(
  sessionId: string,
  sharingScope: SharingScope
): Promise<{ session_id: string; sharing_scope: SharingScope }> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}/public`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ sharing_scope: sharingScope }),
  });

  if (!res.ok) {
    throw new Error(`Failed to update session sharing: ${res.status}`);
  }

  return res.json();
}

export async function deleteSession(sessionId: string): Promise<void> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}`, {
    method: "DELETE",
  });

  if (!res.ok) {
    throw new Error(`Failed to delete session: ${res.status}`);
  }
}

/**
 * Restore a sleeping sandbox and load the session's snapshot.
 * This is a blocking call that waits until the restore is complete.
 *
 * Handles two cases:
 * 1. Sandbox is SLEEPING: Re-provisions pod, then loads session snapshot
 * 2. Sandbox is RUNNING but session not loaded: Just loads session snapshot
 *
 * Returns immediately if session workspace already exists in pod.
 */
export async function restoreSession(
  sessionId: string
): Promise<ApiDetailedSessionResponse> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}/restore`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
  });

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(
      errorData.detail || `Failed to restore session: ${res.status}`
    );
  }

  return res.json();
}

/**
 * Check if a pre-provisioned session is still valid (empty).
 * Used for polling to detect when another tab has used the session.
 *
 * @returns { valid: true, session_id: string } if session is still empty
 * @returns { valid: false, session_id: null } if session has messages or doesn't exist
 */
export async function checkPreProvisionedSession(
  sessionId: string
): Promise<{ valid: boolean; session_id: string | null }> {
  const res = await fetch(
    `${API_BASE}/sessions/${sessionId}/pre-provisioned-check`
  );

  if (!res.ok) {
    // Treat errors as invalid session
    return { valid: false, session_id: null };
  }

  return res.json();
}

// =============================================================================
// Messages API
// =============================================================================

/**
 * Extract text content from message_metadata.
 * For user_message: {type: "user_message", content: {type: "text", text: "..."}}
 */
function extractContentFromMetadata(
  metadata: Record<string, any> | null | undefined
): string {
  if (!metadata) return "";
  const content = metadata.content;
  if (!content) return "";
  if (typeof content === "string") return content;
  if (typeof content === "object" && content.type === "text" && content.text) {
    return content.text;
  }
  return "";
}

export async function fetchMessages(
  sessionId: string
): Promise<BuildMessage[]> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}/messages`);

  if (!res.ok) {
    throw new Error(`Failed to fetch messages: ${res.status}`);
  }

  const data = await res.json();
  return data.messages.map((m: ApiMessageResponse) => ({
    id: m.id,
    type: m.type,
    // Content is stored in message_metadata, not as a separate field
    content: m.content || extractContentFromMetadata(m.message_metadata),
    message_metadata: m.message_metadata,
    timestamp: new Date(m.created_at),
  }));
}

/**
 * Custom error class for rate limit (429) errors.
 * Used to distinguish rate limit errors from other API errors
 * so the UI can show an upsell modal instead of a generic error.
 */
export class RateLimitError extends Error {
  public readonly statusCode: number = 429;

  constructor() {
    super("Rate limit exceeded");
    this.name = "RateLimitError";
  }
}

/**
 * Send a message and return the streaming response.
 * The caller is responsible for processing the SSE stream.
 */
export async function sendMessageStream(
  sessionId: string,
  content: string,
  signal?: AbortSignal
): Promise<Response> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}/send-message`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ content }),
    signal,
  });

  if (!res.ok) {
    // Handle rate limit errors specifically so UI can show upsell modal
    if (res.status === 429) {
      throw new RateLimitError();
    }
    throw new Error(`Failed to send message: ${res.status}`);
  }

  return res;
}

// =============================================================================
// Artifacts API
// =============================================================================

export async function fetchArtifacts(sessionId: string): Promise<Artifact[]> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}/artifacts`);

  if (!res.ok) {
    throw new Error(`Failed to fetch artifacts: ${res.status}`);
  }

  const data = await res.json();
  // Backend returns a direct array, not wrapped in an object
  return data.map((a: ApiArtifactResponse) => ({
    id: a.id,
    session_id: a.session_id,
    type: a.type,
    name: a.name,
    path: a.path,
    preview_url: a.preview_url,
    created_at: new Date(a.created_at),
    updated_at: new Date(a.updated_at),
  }));
}

// =============================================================================
// Webapp API
// =============================================================================

export async function fetchWebappInfo(
  sessionId: string
): Promise<ApiWebappInfoResponse> {
  const res = await fetch(`${API_BASE}/sessions/${sessionId}/webapp-info`);

  if (!res.ok) {
    throw new Error(`Failed to fetch webapp info: ${res.status}`);
  }

  return res.json();
}

// =============================================================================
// Files API
// =============================================================================

export async function fetchDirectoryListing(
  sessionId: string,
  path: string = ""
): Promise<DirectoryListing> {
  const url = new URL(
    `${API_BASE}/sessions/${sessionId}/files`,
    window.location.origin
  );
  if (path) {
    url.searchParams.set("path", path);
  }

  const res = await fetch(url.toString());

  if (!res.ok) {
    throw new Error(`Failed to fetch directory listing: ${res.status}`);
  }

  return res.json();
}

/**
 * Trigger a browser download for a single file from the sandbox.
 */
export function downloadArtifactFile(sessionId: string, path: string): void {
  const encodedPath = path
    .split("/")
    .map((segment) => encodeURIComponent(segment))
    .join("/");
  const link = document.createElement("a");
  link.href = `${API_BASE}/sessions/${sessionId}/artifacts/${encodedPath}`;
  link.download = path.split("/").pop() || path;
  document.body.appendChild(link);
  link.click();
  document.body.removeChild(link);
}

/**
 * Trigger a browser download for a directory as a zip file.
 */
export function downloadDirectory(sessionId: string, path: string): void {
  const encodedPath = path
    .split("/")
    .map((segment) => encodeURIComponent(segment))
    .join("/");
  const link = document.createElement("a");
  link.href = `${API_BASE}/sessions/${sessionId}/download-directory/${encodedPath}`;
  link.download = "";
  document.body.appendChild(link);
  link.click();
  document.body.removeChild(link);
}

export interface FileContentResponse {
  content: string; // For text files: text content. For images: data URL (base64-encoded)
  mimeType: string;
  isImage?: boolean; // True if the content is an image data URL
  error?: string; // Error message if file can't be previewed
}

// Maximum file size for image preview (10MB)
const MAX_IMAGE_SIZE = 10 * 1024 * 1024;

/**
 * Fetch file content from the sandbox for preview.
 * Reuses the artifacts download endpoint but reads content as text.
 */
export async function fetchFileContent(
  sessionId: string,
  path: string
): Promise<FileContentResponse> {
  // Encode each path segment individually (spaces, special chars) but preserve slashes
  const encodedPath = path
    .split("/")
    .map((segment) => encodeURIComponent(segment))
    .join("/");

  const res = await fetch(
    `${API_BASE}/sessions/${sessionId}/artifacts/${encodedPath}`
  );

  if (!res.ok) {
    throw new Error(`Failed to fetch file content: ${res.status}`);
  }

  const mimeType = res.headers.get("Content-Type") || "text/plain";

  // For images, convert to data URL instead of blob URL (no cleanup needed)
  if (mimeType.startsWith("image/")) {
    const blob = await res.blob();

    // Check file size limit for images
    if (blob.size > MAX_IMAGE_SIZE) {
      return {
        content: "",
        mimeType,
        isImage: false,
        error: `Image too large to preview (${(
          blob.size /
          (1024 * 1024)
        ).toFixed(1)}MB). Maximum size is ${MAX_IMAGE_SIZE / (1024 * 1024)}MB.`,
      };
    }

    return new Promise((resolve, reject) => {
      const reader = new FileReader();
      reader.onloadend = () => {
        // Verify result is a string
        if (typeof reader.result !== "string") {
          reject(new Error("FileReader returned unexpected type"));
          return;
        }
        resolve({
          content: reader.result,
          mimeType,
          isImage: true,
        });
      };
      reader.onerror = () => {
        reject(new Error(reader.error?.message || "Failed to read image file"));
      };
      reader.readAsDataURL(blob);
    });
  }

  const content = await res.text();
  return { content, mimeType, isImage: false };
}

// =============================================================================
// Usage Limits API
// =============================================================================

/** Transform API response to frontend types */
function transformUsageLimitsResponse(
  data: ApiUsageLimitsResponse
): UsageLimits {
  return {
    isLimited: data.is_limited,
    limitType: data.limit_type,
    messagesUsed: data.messages_used,
    limit: data.limit,
    resetTimestamp: data.reset_timestamp
      ? new Date(data.reset_timestamp)
      : null,
  };
}

export async function fetchUsageLimits(): Promise<UsageLimits> {
  const res = await fetch(USAGE_LIMITS_ENDPOINT);

  if (!res.ok) {
    throw new Error(`Failed to fetch usage limits: ${res.status}`);
  }

  const data: ApiUsageLimitsResponse = await res.json();
  return transformUsageLimitsResponse(data);
}

// =============================================================================
// File Upload API
// =============================================================================

export interface UploadFileResponse {
  filename: string;
  path: string;
  size_bytes: number;
}

/**
 * Upload a file to the session's sandbox.
 * The file will be placed in the sandbox's user_uploaded_files directory.
 */
export async function uploadFile(
  sessionId: string,
  file: File
): Promise<UploadFileResponse> {
  const formData = new FormData();
  formData.append("file", file);

  const res = await fetch(`${API_BASE}/sessions/${sessionId}/upload`, {
    method: "POST",
    body: formData,
  });

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(errorData.detail || `Failed to upload file: ${res.status}`);
  }

  return res.json();
}

/**
 * Delete a file from the session's sandbox.
 */
export async function deleteFile(
  sessionId: string,
  path: string
): Promise<void> {
  // Encode each path segment individually (spaces, special chars) but preserve slashes
  const encodedPath = path
    .split("/")
    .map((segment) => encodeURIComponent(segment))
    .join("/");

  const res = await fetch(
    `${API_BASE}/sessions/${sessionId}/files/${encodedPath}`,
    {
      method: "DELETE",
    }
  );

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(errorData.detail || `Failed to delete file: ${res.status}`);
  }
}

/**
 * Export a markdown file as DOCX.
 * Returns a Blob of the converted document.
 */
export async function exportDocx(
  sessionId: string,
  path: string
): Promise<Blob> {
  const encodedPath = path
    .split("/")
    .map((segment) => encodeURIComponent(segment))
    .join("/");

  const res = await fetch(
    `${API_BASE}/sessions/${sessionId}/export-docx/${encodedPath}`
  );

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(
      errorData.detail || `Failed to export as DOCX: ${res.status}`
    );
  }

  return res.blob();
}

// =============================================================================
// PPTX Preview API
// =============================================================================

export interface PptxPreviewResponse {
  slide_count: number;
  slide_paths: string[];
  cached: boolean;
}

/**
 * Fetch PPTX slide preview images.
 * Triggers on-demand conversion (soffice → pdftoppm) with disk caching.
 */
export async function fetchPptxPreview(
  sessionId: string,
  path: string
): Promise<PptxPreviewResponse> {
  const encodedPath = path
    .split("/")
    .map((segment) => encodeURIComponent(segment))
    .join("/");

  const res = await fetch(
    `${API_BASE}/sessions/${sessionId}/pptx-preview/${encodedPath}`
  );

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(
      errorData.detail || `Failed to generate PPTX preview: ${res.status}`
    );
  }

  return res.json();
}

// =============================================================================
// Connector Management API
// =============================================================================

export async function deleteConnector(
  connectorId: number,
  credentialId: number
): Promise<void> {
  const res = await fetch("/api/manage/admin/deletion-attempt", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      connector_id: connectorId,
      credential_id: credentialId,
    }),
  });

  if (!res.ok) {
    const errorData = await res.json();
    throw new Error(
      errorData.detail || `Failed to delete connector: ${res.status}`
    );
  }
}

// =============================================================================
// User Library API
// =============================================================================

import {
  LibraryEntry,
  CreateDirectoryRequest,
  UploadResponse,
} from "@/app/craft/types/user-library";

const USER_LIBRARY_BASE = `${API_BASE}/user-library`;

/**
 * Fetch the user's library tree (uploaded files).
 */
export async function fetchLibraryTree(): Promise<LibraryEntry[]> {
  const res = await fetch(`${USER_LIBRARY_BASE}/tree`);

  if (!res.ok) {
    throw new Error(`Failed to fetch library tree: ${res.status}`);
  }

  return res.json();
}

/**
 * Upload files to the user library.
 */
export async function uploadLibraryFiles(
  path: string,
  files: File[]
): Promise<UploadResponse> {
  const formData = new FormData();
  formData.append("path", path);
  for (const file of files) {
    formData.append("files", file);
  }

  const res = await fetch(`${USER_LIBRARY_BASE}/upload`, {
    method: "POST",
    body: formData,
  });

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(
      errorData.detail || `Failed to upload files: ${res.status}`
    );
  }

  return res.json();
}

/**
 * Upload and extract a zip file to the user library.
 */
export async function uploadLibraryZip(
  path: string,
  file: File
): Promise<UploadResponse> {
  const formData = new FormData();
  formData.append("path", path);
  formData.append("file", file);

  const res = await fetch(`${USER_LIBRARY_BASE}/upload-zip`, {
    method: "POST",
    body: formData,
  });

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(errorData.detail || `Failed to upload zip: ${res.status}`);
  }

  return res.json();
}

/**
 * Create a directory in the user library.
 */
export async function createLibraryDirectory(
  request: CreateDirectoryRequest
): Promise<LibraryEntry> {
  const res = await fetch(`${USER_LIBRARY_BASE}/directories`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(request),
  });

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(
      errorData.detail || `Failed to create directory: ${res.status}`
    );
  }

  return res.json();
}

/**
 * Toggle sync status for a file/directory in the user library.
 */
export async function toggleLibraryFileSync(
  documentId: string,
  enabled: boolean
): Promise<void> {
  const res = await fetch(
    `${USER_LIBRARY_BASE}/files/${encodeURIComponent(
      documentId
    )}/toggle?enabled=${enabled}`,
    {
      method: "PATCH",
    }
  );

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(errorData.detail || `Failed to toggle sync: ${res.status}`);
  }
}

/**
 * Delete a file/directory from the user library.
 */
export async function deleteLibraryFile(documentId: string): Promise<void> {
  const res = await fetch(
    `${USER_LIBRARY_BASE}/files/${encodeURIComponent(documentId)}`,
    {
      method: "DELETE",
    }
  );

  if (!res.ok) {
    const errorData = await res.json().catch(() => ({}));
    throw new Error(errorData.detail || `Failed to delete file: ${res.status}`);
  }
}


================================================
FILE: web/src/app/craft/services/searchParams.ts
================================================
import { ReadonlyURLSearchParams } from "next/navigation";

// search params for build pages
export const CRAFT_SEARCH_PARAM_NAMES = {
  SESSION_ID: "sessionId",
};

export function getSessionIdFromSearchParams(
  searchParams: ReadonlyURLSearchParams | null
): string | null {
  return searchParams?.get(CRAFT_SEARCH_PARAM_NAMES.SESSION_ID) ?? null;
}


================================================
FILE: web/src/app/craft/types/displayTypes.ts
================================================
/**
 * Display Types
 *
 * Simple FIFO types for rendering streaming content.
 * Items are stored and rendered in chronological order as they arrive.
 */

export type ToolCallKind =
  | "search"
  | "read"
  | "execute"
  | "edit"
  | "task"
  | "other";

// =============================================================================
// Todo List Types (for TodoWrite tool)
// =============================================================================

export type TodoStatus = "pending" | "in_progress" | "completed";

export interface TodoItem {
  /** The task description */
  content: string;
  /** Current status */
  status: TodoStatus;
  /** Present tense form shown during execution (e.g., "Creating API endpoint") */
  activeForm: string;
}

export interface TodoListState {
  /** Tool call ID */
  id: string;
  /** Array of todo items */
  todos: TodoItem[];
  /** Whether the card is expanded (UI state only) */
  isOpen: boolean;
}
export type ToolCallStatus =
  | "pending"
  | "in_progress"
  | "completed"
  | "failed"
  | "cancelled";

export interface ToolCallState {
  id: string;
  kind: ToolCallKind;
  title: string;
  description: string; // "Listing output directory" or task description
  command: string; // "ls outputs/" or task prompt for task kind
  status: ToolCallStatus;
  rawOutput: string; // Full output for expanded view
  /** For task tool calls: the subagent type (e.g., "explore", "plan") */
  subagentType?: string;
  /** For edit operations: whether this is a new file (write) or edit of existing */
  isNewFile?: boolean;
  /** For edit operations: the old content before the edit (empty for new files) */
  oldContent?: string;
  /** For edit operations: the new content after the edit */
  newContent?: string;
}

/**
 * StreamItem - A single item in the FIFO stream.
 * These are stored in chronological order and rendered directly.
 */
export type StreamItem =
  | { type: "text"; id: string; content: string; isStreaming: boolean }
  | { type: "thinking"; id: string; content: string; isStreaming: boolean }
  | { type: "tool_call"; id: string; toolCall: ToolCallState }
  | { type: "todo_list"; id: string; todoList: TodoListState };

/**
 * GroupedStreamItem - StreamItem after grouping transformation for rendering.
 * Consecutive working tool calls are grouped into a single "working_group" item.
 * Used by BuildMessageList to render consolidated Working pills.
 */
export type GroupedStreamItem =
  | { type: "text"; id: string; content: string; isStreaming: boolean }
  | { type: "thinking"; id: string; content: string; isStreaming: boolean }
  | { type: "tool_call"; id: string; toolCall: ToolCallState }
  | { type: "todo_list"; id: string; todoList: TodoListState }
  | { type: "working_group"; id: string; toolCalls: ToolCallState[] };


================================================
FILE: web/src/app/craft/types/streamingTypes.ts
================================================
// =============================================================================
// Sharing Types
// =============================================================================

export type SharingScope = "private" | "public_org" | "public_global";

// =============================================================================
// Session Error Constants
// =============================================================================

export const SessionErrorCode = {
  RATE_LIMIT_EXCEEDED: "RATE_LIMIT_EXCEEDED",
} as const;

export type SessionErrorCode =
  (typeof SessionErrorCode)[keyof typeof SessionErrorCode];

// =============================================================================
// Usage Limits Types
// =============================================================================

export type LimitType = "weekly" | "total";

export interface UsageLimits {
  /** Whether the user has reached their limit */
  isLimited: boolean;
  /** Type of limit period: "weekly" for paid, "total" for free */
  limitType: LimitType;
  /** Number of messages used in current period */
  messagesUsed: number;
  /** Maximum messages allowed in the period */
  limit: number;
  /** For weekly limits: timestamp when the limit resets (null for total limits) */
  resetTimestamp: Date | null;
}

// API response shape (snake_case from backend)
export interface ApiUsageLimitsResponse {
  is_limited: boolean;
  limit_type: LimitType;
  messages_used: number;
  limit: number;
  reset_timestamp: string | null;
}

// =============================================================================
// Artifact & Message Types
// =============================================================================

export type ArtifactType =
  | "nextjs_app"
  | "web_app" // Backend sends this
  | "pptx"
  | "xlsx"
  | "docx"
  | "markdown"
  | "chart"
  | "csv"
  | "image";

export interface Artifact {
  id: string;
  session_id: string;
  type: ArtifactType;
  name: string;
  path: string;
  preview_url?: string | null;
  created_at: Date;
  updated_at: Date;
}

export interface BuildMessage {
  id: string;
  type: "user" | "assistant" | "system";
  content: string;
  timestamp: Date;
  /** Structured ACP event data (tool calls, thinking, plans) */
  message_metadata?: Record<string, any> | null;
  /** Tool calls associated with this message (for agent messages) */
  toolCalls?: ToolCall[];
}

// =============================================================================
// Tool Call Types (for tracking agent tool usage)
// =============================================================================

export type ToolCallStatus =
  | "pending"
  | "in_progress"
  | "completed"
  | "failed"
  | "cancelled";

export interface ToolCall {
  /** Unique ID for this tool call */
  id: string;
  /** Tool kind/category (e.g., "edit", "execute", "other") */
  kind: string;
  /** Tool name (e.g., "write", "bash", "ls") */
  name: string;
  /** Human-readable title */
  title: string;
  /** Current status */
  status: ToolCallStatus;
  /** Tool input parameters */
  input?: Record<string, unknown>;
  /** Raw input from ACP (complete command/parameters) */
  raw_input?: Record<string, any> | null;
  /** Raw output from ACP (complete result) */
  raw_output?: Record<string, any> | null;
  /** Content block from ACP (description text) */
  content?: any | null;
  /** Result content (when completed) */
  result?: string;
  /** Error message (when failed) */
  error?: string;
  /** When the tool call started */
  startedAt: Date;
  /** When the tool call finished */
  finishedAt?: Date;
}

export type SessionStatus =
  | "idle"
  | "creating"
  | "running"
  | "active"
  | "failed";

export interface Session {
  id: string | null;
  status: SessionStatus;
  artifacts: Artifact[];
  messages: BuildMessage[];
  error: string | null;
  webappUrl: string | null;
}

export interface SessionHistoryItem {
  id: string;
  title: string;
  createdAt: Date;
}

// =============================================================================
// API Response Types
// =============================================================================

export interface ApiSandboxResponse {
  id: string;
  status:
    | "provisioning"
    | "running"
    | "idle"
    | "sleeping"
    | "terminated"
    | "failed"
    | "restoring"; // Frontend-only: set during snapshot restore
  container_id: string | null;
  created_at: string;
  last_heartbeat: string | null;
  nextjs_port: number | null;
}

export interface ApiSessionResponse {
  id: string;
  user_id: string | null;
  name: string | null;
  status: "active" | "idle" | "archived";
  created_at: string;
  last_activity_at: string;
  sandbox: ApiSandboxResponse | null;
  artifacts: ApiArtifactResponse[];
  sharing_scope: SharingScope;
}

export interface ApiDetailedSessionResponse extends ApiSessionResponse {
  session_loaded_in_sandbox: boolean;
}

export interface ApiMessageResponse {
  id: string;
  session_id: string;
  type: "user" | "assistant";
  content: string;
  message_metadata?: Record<string, any> | null;
  created_at: string;
}

export interface ApiArtifactResponse {
  id: string;
  session_id: string;
  type: ArtifactType;
  path: string;
  name: string;
  created_at: string;
  updated_at: string;
  preview_url?: string | null;
}

export interface ApiWebappInfoResponse {
  has_webapp: boolean;
  webapp_url: string | null;
  status: string;
  ready: boolean;
  sharing_scope: SharingScope;
}

export interface FileSystemEntry {
  name: string;
  path: string;
  is_directory: boolean;
  size: number | null;
  mime_type: string | null;
}

export interface DirectoryListing {
  path: string;
  entries: FileSystemEntry[];
}

// =============================================================================
// SSE Packet Types (matching backend build_packet_types.py)
// =============================================================================

// Step/Thinking Packets
export interface StepStartPacket {
  type: "step_start";
  step_id: string;
  step_name?: string;
  timestamp: string;
}

export interface StepDeltaPacket {
  type: "step_delta";
  step_id: string;
  content: string;
  timestamp: string;
}

export interface StepEndPacket {
  type: "step_end";
  step_id: string;
  status: "completed" | "failed" | "cancelled";
  timestamp: string;
}

// Tool Call Packets
export interface ToolStartPacket {
  type: "tool_start";
  tool_call_id: string;
  tool_name: string;
  tool_input: Record<string, any>;
  title?: string;
  timestamp: string;
}

export interface ToolProgressPacket {
  type: "tool_progress";
  tool_call_id: string;
  tool_name: string;
  status: "pending" | "in_progress" | "completed" | "failed" | "cancelled";
  progress?: number;
  message?: string;
  timestamp: string;
}

export interface ToolEndPacket {
  type: "tool_end";
  tool_call_id: string;
  tool_name: string;
  status: "success" | "error" | "cancelled";
  result?: string | Record<string, any>;
  error?: string;
  timestamp: string;
}

// Agent Output Packets
export interface OutputStartPacket {
  type: "output_start";
  timestamp: string;
}

export interface OutputDeltaPacket {
  type: "output_delta";
  content: string;
  timestamp: string;
}

export interface OutputEndPacket {
  type: "output_end";
  timestamp: string;
}

// Plan Packets
export interface PlanEntry {
  id: string;
  description: string;
  status: "pending" | "in_progress" | "completed" | "cancelled";
  priority?: number;
}

export interface PlanPacket {
  type: "plan";
  plan?: string;
  entries?: PlanEntry[];
  timestamp: string;
}

// Mode Update Packets
export interface ModeUpdatePacket {
  type: "mode_update";
  mode: string;
  description?: string;
  timestamp: string;
}

// Completion Packets
export interface DonePacket {
  type: "done";
  summary: string;
  stop_reason?:
    | "end_turn"
    | "max_tokens"
    | "max_turn_requests"
    | "refusal"
    | "cancelled";
  usage?: Record<string, any>;
  timestamp: string;
}

// Error Packets
export interface ErrorPacket {
  type: "error";
  message: string;
  code?: number;
  details?: Record<string, any>;
  timestamp: string;
}

// File Write Packets
export interface FileWritePacket {
  type: "file_write";
  path: string;
  size_bytes?: number;
  operation: "create" | "update" | "delete";
  timestamp: string;
}

// Artifact Packets
export type BackendArtifactType =
  | "web_app"
  | "markdown"
  | "image"
  | "csv"
  | "excel"
  | "pptx"
  | "docx"
  | "pdf"
  | "code"
  | "other";

export interface ArtifactCreatedPacket {
  type: "artifact_created";
  artifact: {
    id: string;
    type: BackendArtifactType;
    name: string;
    path: string;
    preview_url?: string;
    download_url?: string;
    mime_type?: string;
    size_bytes?: number;
  };
  timestamp: string;
}

// Permission Packets (for future use)
export interface PermissionRequestPacket {
  type: "permission_request";
  request_id: string;
  operation: string;
  description: string;
  auto_approve: boolean;
  timestamp: string;
}

export interface PermissionResponsePacket {
  type: "permission_response";
  request_id: string;
  approved: boolean;
  reason?: string;
  timestamp: string;
}

// =============================================================================
// Raw ACP Packets (sent directly from backend with ALL ACP fields)
// =============================================================================

// Content block types from ACP
export interface TextContentBlock {
  type: "text";
  text: string;
}

export interface ImageContentBlock {
  type: "image";
  data: string;
  mimeType: string;
}

export type ContentBlock =
  | TextContentBlock
  | ImageContentBlock
  | Record<string, any>;

// Base ACP event fields
export interface ACPBaseEvent {
  field_meta?: Record<string, any> | null; // _meta field for extensibility
  timestamp: string;
}

// ACP: agent_message_chunk - Agent's text/content output
export interface AgentMessageChunkPacket extends ACPBaseEvent {
  type: "agent_message_chunk";
  content: ContentBlock;
  session_update?: string;
}

// ACP: agent_thought_chunk - Agent's internal reasoning
export interface AgentThoughtChunkPacket extends ACPBaseEvent {
  type: "agent_thought_chunk";
  content: ContentBlock;
  session_update?: string;
}

// ACP: tool_call_start - Tool invocation started
export interface ToolCallStartPacket extends ACPBaseEvent {
  type: "tool_call_start";
  tool_call_id: string;
  kind: string | null;
  title: string | null;
  content: ContentBlock | null;
  locations: string[] | null;
  raw_input: Record<string, any> | null;
  raw_output: Record<string, any> | null;
  status: string | null;
  session_update?: string;
}

// ACP: tool_call_progress - Tool execution progress/completion
export interface ToolCallProgressPacket extends ACPBaseEvent {
  type: "tool_call_progress";
  tool_call_id: string;
  kind: string | null;
  title: string | null;
  content: ContentBlock | null;
  locations: string[] | null;
  raw_input: Record<string, any> | null;
  raw_output: Record<string, any> | null;
  status: string | null;
  session_update?: string;
}

// ACP: agent_plan_update - Agent's execution plan
export interface AgentPlanUpdatePacket extends ACPBaseEvent {
  type: "agent_plan_update";
  entries: Array<{
    id: string;
    description: string;
    status: string;
    priority: string | number | null;
  }> | null;
  session_update?: string;
}

// ACP: current_mode_update - Agent mode change
export interface CurrentModeUpdatePacket extends ACPBaseEvent {
  type: "current_mode_update";
  current_mode_id: string | null;
  session_update?: string;
}

// ACP: prompt_response - Agent finished processing
export interface PromptResponsePacket extends ACPBaseEvent {
  type: "prompt_response";
  stop_reason: string | null;
}

// ACP: error - Error from ACP
export interface ACPErrorPacket {
  type: "error";
  code: string | null;
  message: string;
  data: Record<string, any> | null;
  timestamp: string;
}

// Union type for all packets (including raw ACP packets)
export type StreamPacket =
  // Raw ACP packets with ALL fields
  | AgentMessageChunkPacket
  | AgentThoughtChunkPacket
  | ToolCallStartPacket
  | ToolCallProgressPacket
  | AgentPlanUpdatePacket
  | CurrentModeUpdatePacket
  | PromptResponsePacket
  | ACPErrorPacket
  // Custom Onyx packets
  | StepStartPacket
  | StepDeltaPacket
  | StepEndPacket
  | ToolStartPacket
  | ToolProgressPacket
  | ToolEndPacket
  | OutputStartPacket
  | OutputDeltaPacket
  | OutputEndPacket
  | PlanPacket
  | ModeUpdatePacket
  | DonePacket
  | ErrorPacket
  | FileWritePacket
  | ArtifactCreatedPacket
  | PermissionRequestPacket
  | PermissionResponsePacket
  | { type: string; timestamp?: string }; // catch-all for unknown packet types


================================================
FILE: web/src/app/craft/types/user-library.ts
================================================
/**
 * Types for User Library - raw binary file uploads in Craft.
 */

export interface LibraryEntry {
  id: string; // document_id
  name: string;
  path: string;
  is_directory: boolean;
  file_size: number | null;
  mime_type: string | null;
  sync_enabled: boolean;
  created_at: string;
  children?: LibraryEntry[];
}

export interface CreateDirectoryRequest {
  name: string;
  parent_path: string;
}

export interface UploadResponse {
  entries: LibraryEntry[];
  total_uploaded: number;
  total_size_bytes: number;
}


================================================
FILE: web/src/app/craft/utils/packetTypes.ts
================================================
/**
 * Packet Types
 *
 * Type definitions for raw and parsed ACP packets.
 * Centralizes all snake_case / camelCase field resolution.
 * Defines the ParsedPacket discriminated union consumed by both
 * useBuildStreaming (live SSE) and useBuildSessionStore (DB reload).
 */

import type { TodoItem } from "../types/displayTypes";

// Re-export from displayTypes — single source of truth
export type {
  ToolCallKind as ToolKind,
  ToolCallStatus as ToolStatus,
} from "../types/displayTypes";

// ─── Raw Packet Field Access ─────────────────────────────────────────
// Every backend field name variant is listed ONCE here.

export function getRawInput(
  p: Record<string, unknown>
): Record<string, unknown> | null {
  return (p.raw_input ?? p.rawInput ?? null) as Record<string, unknown> | null;
}

export function getRawOutput(
  p: Record<string, unknown>
): Record<string, unknown> | null {
  return (p.raw_output ?? p.rawOutput ?? null) as Record<
    string,
    unknown
  > | null;
}

export function getToolCallId(p: Record<string, unknown>): string {
  return (p.tool_call_id ?? p.toolCallId ?? "") as string;
}

export function getToolNameRaw(p: Record<string, unknown>): string {
  // Prefer explicit tool_name fields
  const explicit = (p.tool_name ?? p.toolName ?? "") as string;
  if (explicit) return explicit.toLowerCase();

  // Fall back to title only if it looks like a simple tool name
  // (no spaces or newlines — otherwise it's a human-readable description)
  const title = (p.title ?? "") as string;
  if (title && !title.includes(" ") && !title.includes("\n")) {
    return title.toLowerCase();
  }

  return "";
}

// ─── Parsed Packet Types (Discriminated Union) ──────────────────────

export type ToolName =
  | "glob"
  | "grep"
  | "read"
  | "write"
  | "edit"
  | "bash"
  | "task"
  | "todowrite"
  | "webfetch"
  | "websearch"
  | "unknown";

export interface ParsedTextChunk {
  type: "text_chunk";
  text: string;
}

export interface ParsedThinkingChunk {
  type: "thinking_chunk";
  text: string;
}

export interface ParsedToolCallStart {
  type: "tool_call_start";
  toolCallId: string;
  toolName: ToolName;
  kind: import("../types/displayTypes").ToolCallKind;
  isTodo: boolean;
}

export interface ParsedToolCallProgress {
  type: "tool_call_progress";
  toolCallId: string;
  toolName: ToolName;
  kind: import("../types/displayTypes").ToolCallKind;
  status: import("../types/displayTypes").ToolCallStatus;
  isTodo: boolean;
  // Pre-extracted, pre-sanitized fields (ready for display)
  title: string;
  description: string;
  command: string;
  rawOutput: string;
  filePath: string; // Session-relative
  subagentType: string | null;
  // Edit-specific
  isNewFile: boolean;
  oldContent: string;
  newContent: string;
  // Todo-specific
  todos: TodoItem[];
  // Task-specific
  taskOutput: string | null;
}

export interface ParsedPromptResponse {
  type: "prompt_response";
}

export interface ParsedArtifact {
  type: "artifact_created";
  artifact: {
    id: string;
    type: string;
    name: string;
    path: string;
    preview_url: string | null;
  };
}

export interface ParsedError {
  type: "error";
  message: string;
}

export interface ParsedUnknown {
  type: "unknown";
}

export type ParsedPacket =
  | ParsedTextChunk
  | ParsedThinkingChunk
  | ParsedToolCallStart
  | ParsedToolCallProgress
  | ParsedPromptResponse
  | ParsedArtifact
  | ParsedError
  | ParsedUnknown;


================================================
FILE: web/src/app/craft/utils/parsePacket.ts
================================================
/**
 * Parse Packet
 *
 * Single entry point for converting raw ACP packets into strongly-typed
 * ParsedPacket values. All field resolution, tool detection, and path
 * sanitization happen here. Consumers never touch Record<string, unknown>.
 */

import { stripSessionPrefix, sanitizePathsInText } from "./pathSanitizer";
import {
  getRawInput,
  getRawOutput,
  getToolCallId,
  getToolNameRaw,
  type ParsedPacket,
  type ParsedToolCallStart,
  type ParsedToolCallProgress,
  type ParsedArtifact,
  type ToolName,
  type ToolKind,
  type ToolStatus,
} from "./packetTypes";
import type { TodoItem, TodoStatus } from "../types/displayTypes";

export function parsePacket(raw: unknown): ParsedPacket {
  if (!raw || typeof raw !== "object") return { type: "unknown" };
  const p = raw as Record<string, unknown>;
  const packetType = p.type as string | undefined;

  switch (packetType) {
    case "agent_message_chunk": // Live SSE
    case "agent_message": // DB-stored format
      return { type: "text_chunk", text: extractText(p.content) };

    case "agent_thought_chunk": // Live SSE
    case "agent_thought": // DB-stored format
      return { type: "thinking_chunk", text: extractText(p.content) };

    case "tool_call_start":
      return parseToolCallStart(p);

    case "tool_call_progress":
      return parseToolCallProgress(p);

    case "prompt_response":
      return { type: "prompt_response" };

    case "artifact_created":
      return parseArtifact(p);

    case "error":
      return { type: "error", message: (p.message ?? "") as string };

    default:
      return { type: "unknown" };
  }
}

// ─── Tool Name Resolution ─────────────────────────────────────────

const NAME_MAP: Record<string, ToolName> = {
  glob: "glob",
  grep: "grep",
  read: "read",
  write: "write",
  edit: "edit",
  bash: "bash",
  task: "task",
  todowrite: "todowrite",
  todo_write: "todowrite",
  webfetch: "webfetch",
  websearch: "websearch",
};

function resolveToolName(p: Record<string, unknown>): ToolName {
  const rawName = getToolNameRaw(p);

  if (NAME_MAP[rawName]) return NAME_MAP[rawName];

  // Fallback: detect by rawInput shape (handles title changes on completion)
  const ri = getRawInput(p);
  if (ri?.subagent_type || ri?.subagentType) return "task";
  if (ri?.todos && Array.isArray(ri.todos)) return "todowrite";

  // Detect tools by rawInput fields (opencode agent uses different field names)
  if (ri?.patchText && typeof ri.patchText === "string") return "edit";
  if (ri?.command && typeof ri.command === "string") return "bash";

  // Fallback: use backend-provided kind to infer tool name
  const rawKind = (p.kind as string) ?? null;
  if (rawKind === "execute") return "bash";
  if (rawKind === "read") return "read";
  if (rawKind === "edit" || rawKind === "delete" || rawKind === "move")
    return "edit";
  if (rawKind === "search") return "glob";
  if (rawKind === "fetch") return "webfetch";

  return "unknown";
}

const TOOL_KIND_MAP: Record<ToolName, ToolKind> = {
  glob: "search",
  grep: "search",
  read: "read",
  write: "edit",
  edit: "edit",
  bash: "execute",
  task: "task",
  todowrite: "other",
  webfetch: "other",
  websearch: "search",
  unknown: "other",
};

function resolveKind(toolName: ToolName, rawKind: string | null): ToolKind {
  const fromName = TOOL_KIND_MAP[toolName];
  if (fromName !== "other") return fromName;

  // Fall back to backend-provided kind
  if (
    rawKind === "search" ||
    rawKind === "read" ||
    rawKind === "execute" ||
    rawKind === "edit" ||
    rawKind === "task"
  ) {
    return rawKind;
  }
  return "other";
}

// ─── Shared Helpers ───────────────────────────────────────────────

/** Extract text from ACP content structure (string, {type,text}, or array) */
function extractText(content: unknown): string {
  if (!content) return "";
  if (typeof content === "string") return content;
  if (typeof content === "object" && content !== null) {
    const obj = content as Record<string, unknown>;
    if (obj.type === "text" && typeof obj.text === "string") return obj.text;
    if (Array.isArray(content)) {
      return content
        .filter(
          (c: Record<string, unknown>) =>
            c?.type === "text" && typeof c.text === "string"
        )
        .map((c: Record<string, unknown>) => c.text)
        .join("");
    }
    if (typeof obj.text === "string") return obj.text;
  }
  return "";
}

function normalizeStatus(status: string | null | undefined): ToolStatus {
  if (
    status === "pending" ||
    status === "in_progress" ||
    status === "completed" ||
    status === "failed" ||
    status === "cancelled"
  ) {
    return status;
  }
  return "pending";
}

// ─── Edit / Diff Extraction ──────────────────────────────────────

/** Extract oldText and newText from content[].type==="diff" items */
function extractDiffData(content: unknown): {
  oldText: string;
  newText: string;
  isNewFile: boolean;
} {
  if (!Array.isArray(content))
    return { oldText: "", newText: "", isNewFile: true };
  let oldText = "";
  let newText = "";
  for (const item of content) {
    if (item?.type === "diff") {
      if (typeof item.oldText === "string") oldText = item.oldText;
      if (typeof item.newText === "string") newText = item.newText;
    }
  }
  return { oldText, newText, isNewFile: oldText === "" };
}

/** Extract file path from content[].type==="diff" items (fallback when rawInput has no path) */
function extractDiffPath(p: Record<string, unknown>): string {
  const content = p.content as unknown[] | undefined;
  if (!Array.isArray(content)) return "";
  for (const item of content) {
    if (
      item &&
      typeof item === "object" &&
      (item as Record<string, unknown>).type === "diff"
    ) {
      const diffPath = (item as Record<string, unknown>).path as
        | string
        | undefined;
      if (diffPath) return stripSessionPrefix(diffPath);
    }
  }
  // Final fallback: title field may contain a file path
  const title = p.title as string | undefined;
  if (title && title.includes("/")) return stripSessionPrefix(title);
  return "";
}

// ─── Patch Text Extraction (opencode agent) ─────────────────────

/** Extract file path and new-file flag from opencode's patch format.
 *  Format: "*** Update File: path" or "*** Add File: path" */
function extractPatchInfo(
  patchText: string
): { path: string; isNew: boolean } | null {
  const match = patchText.match(
    /\*\*\*\s+(Update|Add|Delete)\s+File:\s*(.+?)(?:\n|$)/
  );
  if (match?.[2]) {
    return {
      path: stripSessionPrefix(match[2].trim()),
      isNew: match[1] === "Add",
    };
  }
  return null;
}

// ─── Description Builder ─────────────────────────────────────────

function buildDescription(
  toolName: ToolName,
  kind: ToolKind,
  filePath: string,
  ri: Record<string, unknown> | null,
  rawDescription: string
): string {
  // Task tool: use description from rawInput
  if (toolName === "task") {
    return rawDescription || "Running subagent";
  }
  // Read/edit: show file path
  if (kind === "read" || kind === "edit") {
    if (filePath) return filePath;
  }
  // Execute: use backend description
  if (kind === "execute") {
    return sanitizePathsInText(rawDescription) || "Running command";
  }
  // Search: show pattern
  if (
    (toolName === "glob" || toolName === "grep" || kind === "search") &&
    ri?.pattern &&
    typeof ri.pattern === "string"
  ) {
    return ri.pattern as string;
  }
  return buildTitle(toolName, kind, true);
}

// ─── Title Builder ───────────────────────────────────────────────

function buildTitle(
  toolName: ToolName,
  kind: ToolKind,
  isNewFile: boolean
): string {
  // Edit/write: distinguish "Writing" (new file) vs "Editing" (existing)
  if (kind === "edit") return isNewFile ? "Writing" : "Editing";

  const TITLES: Record<ToolName, string> = {
    glob: "Searching files",
    grep: "Searching content",
    read: "Reading",
    write: "Writing",
    edit: "Editing",
    bash: "Running command",
    task: "Running task",
    todowrite: "Updating todos",
    webfetch: "Fetching web content",
    websearch: "Searching web",
    unknown: "Running tool",
  };

  // When toolName is unknown, use kind for a more specific title
  if (toolName === "unknown") {
    const KIND_TITLES: Partial<Record<ToolKind, string>> = {
      search: "Searching",
      read: "Reading",
      execute: "Running command",
      task: "Running task",
    };
    return KIND_TITLES[kind] || TITLES.unknown;
  }

  return TITLES[toolName];
}

// ─── Raw Output Extraction ───────────────────────────────────────

/** Extract the appropriate output text based on tool kind.
 *  Returns raw unsanitized text — caller applies sanitizePathsInText. */
function extractRawOutputText(
  toolName: ToolName,
  kind: ToolKind,
  p: Record<string, unknown>,
  ro: Record<string, unknown> | null
): string {
  // Task tool: show the prompt (not the output JSON)
  if (toolName === "task") {
    const ri = getRawInput(p);
    if (ri?.prompt && typeof ri.prompt === "string") return ri.prompt as string;
    return "";
  }
  // Execute: prefer metadata.output, then output
  if (kind === "execute") {
    if (!ro) return "";
    const metadata = ro.metadata as Record<string, unknown> | null;
    return (metadata?.output || ro.output || "") as string;
  }
  // Read: extract file content from <file>...</file> wrapper
  if (kind === "read") {
    const fileContent = extractFileContent(p.content);
    if (fileContent) return fileContent;
    if (!ro) return "";
    if (typeof ro.content === "string") return ro.content;
    return JSON.stringify(ro, null, 2);
  }
  // Edit: show new text from diff
  if (kind === "edit") {
    const content = p.content as unknown[] | undefined;
    if (Array.isArray(content)) {
      for (const item of content) {
        const rec = item as Record<string, unknown> | null;
        if (rec?.type === "diff" && typeof rec.newText === "string")
          return rec.newText as string;
      }
    }
    // Fallback: show patchText from rawInput (opencode agent)
    const ri = getRawInput(p);
    if (ri?.patchText && typeof ri.patchText === "string")
      return ri.patchText as string;
    if (!ro) return "";
    // Prefer output string over JSON dump
    if (typeof ro.output === "string") return ro.output;
    return JSON.stringify(ro, null, 2);
  }
  // Search: files list or output string
  if (toolName === "glob" || toolName === "grep" || kind === "search") {
    if (!ro) return "";
    if (typeof ro.output === "string") return ro.output;
    if (ro.files && Array.isArray(ro.files))
      return (ro.files as string[]).join("\n");
    return JSON.stringify(ro, null, 2);
  }
  // Fallback
  if (!ro) return "";
  return JSON.stringify(ro, null, 2);
}

/** Extract file content from content[].type==="content" items, stripping line numbers */
function extractFileContent(content: unknown): string {
  if (!Array.isArray(content)) return "";
  for (const item of content) {
    if (item?.type === "content" && item?.content?.type === "text") {
      const text = item.content.text as string;
      const fileMatch = text.match(
        /<file>\n?([\s\S]*?)\n?\(End of file[^)]*\)\n?<\/file>/
      );
      if (fileMatch?.[1]) {
        return fileMatch[1].replace(/^\d+\| /gm, "");
      }
      return text;
    }
  }
  return "";
}

// ─── Todo Extraction ─────────────────────────────────────────────

function extractTodos(ri: Record<string, unknown> | null): TodoItem[] {
  if (!ri?.todos || !Array.isArray(ri.todos)) return [];
  return ri.todos.map((t: Record<string, unknown>) => ({
    content: (t.content as string) || "",
    status: normalizeTodoStatus(t.status),
    activeForm: (t.activeForm as string) || (t.content as string) || "",
  }));
}

function normalizeTodoStatus(status: unknown): TodoStatus {
  if (
    status === "pending" ||
    status === "in_progress" ||
    status === "completed"
  )
    return status;
  return "pending";
}

// ─── Task Output Extraction ──────────────────────────────────────

function extractTaskOutput(ro: Record<string, unknown> | null): string | null {
  if (!ro?.output || typeof ro.output !== "string") return null;
  return (
    ro.output.replace(/<task_metadata>[\s\S]*?<\/task_metadata>/g, "").trim() ||
    null
  );
}

// ─── Artifact Parsing ─────────────────────────────────────────────

function parseArtifact(p: Record<string, unknown>): ParsedArtifact {
  const artifact = p.artifact as Record<string, unknown> | undefined;
  return {
    type: "artifact_created",
    artifact: {
      id: (artifact?.id ?? "") as string,
      type: (artifact?.type ?? "") as string,
      name: (artifact?.name ?? "") as string,
      path: (artifact?.path ?? "") as string,
      preview_url: (artifact?.preview_url as string) || null,
    },
  };
}

// ─── Tool Call Parsing ────────────────────────────────────────────

function parseToolCallStart(p: Record<string, unknown>): ParsedToolCallStart {
  const toolName = resolveToolName(p);
  const rawKind = p.kind as string | null;
  return {
    type: "tool_call_start",
    toolCallId: getToolCallId(p),
    toolName,
    kind: resolveKind(toolName, rawKind),
    isTodo: toolName === "todowrite",
  };
}

function parseToolCallProgress(
  p: Record<string, unknown>
): ParsedToolCallProgress {
  const toolName = resolveToolName(p);
  const rawKind = p.kind as string | null;
  const kind = resolveKind(toolName, rawKind);
  const ri = getRawInput(p);
  const ro = getRawOutput(p);
  const isTodo = toolName === "todowrite";

  // ── Edit-specific (extracted first — isNewFile needed by buildTitle) ──
  const diffData =
    kind === "edit"
      ? extractDiffData(p.content)
      : { oldText: "", newText: "", isNewFile: true };

  // ── Patch info (opencode agent uses patchText instead of file_path) ──
  const patchInfo =
    kind === "edit" && ri?.patchText && typeof ri.patchText === "string"
      ? extractPatchInfo(ri.patchText as string)
      : null;

  // ── File path (structured field → stripSessionPrefix) ──────────
  const rawFilePath = (ri?.file_path ??
    ri?.filePath ??
    ri?.path ??
    "") as string;
  let filePath = rawFilePath
    ? stripSessionPrefix(rawFilePath)
    : extractDiffPath(p);

  // Fallback: extract from patchText
  if (!filePath && patchInfo) {
    filePath = patchInfo.path;
  }

  // ── Command (freeform → sanitizePathsInText) ──────────────────
  const rawCommand = (ri?.command ?? "") as string;
  const command = sanitizePathsInText(rawCommand);

  // ── Description ───────────────────────────────────────────────
  const rawDescription = (ri?.description ?? "") as string;
  const description = buildDescription(
    toolName,
    kind,
    filePath,
    ri,
    rawDescription
  );

  // ── Output (freeform → sanitizePathsInText) ───────────────────
  const rawOutputText = extractRawOutputText(toolName, kind, p, ro);
  const rawOutput = sanitizePathsInText(rawOutputText);

  // ── Title ─────────────────────────────────────────────────────
  const title = buildTitle(toolName, kind, diffData.isNewFile);

  // ── Status ────────────────────────────────────────────────────
  const status = normalizeStatus(p.status as string | null);

  // ── Todo-specific ─────────────────────────────────────────────
  const todos = isTodo ? extractTodos(ri) : [];

  // ── Task-specific ─────────────────────────────────────────────
  const subagentType = (ri?.subagent_type ?? ri?.subagentType ?? null) as
    | string
    | null;
  const taskOutput =
    toolName === "task" && status === "completed"
      ? extractTaskOutput(ro)
      : null;

  return {
    type: "tool_call_progress",
    toolCallId: getToolCallId(p),
    toolName,
    kind,
    status,
    isTodo,
    title,
    description,
    command,
    rawOutput,
    filePath,
    subagentType,
    isNewFile:
      diffData.oldText || diffData.newText
        ? diffData.isNewFile
        : patchInfo?.isNew ?? diffData.isNewFile,
    oldContent: diffData.oldText,
    newContent: diffData.newText,
    todos,
    taskOutput,
  };
}


================================================
FILE: web/src/app/craft/utils/pathSanitizer.test.ts
================================================
import { stripSessionPrefix, sanitizePathsInText } from "./pathSanitizer";

// =============================================================================
// stripSessionPrefix
// =============================================================================

describe("stripSessionPrefix", () => {
  it("returns empty string for empty input", () => {
    expect(stripSessionPrefix("")).toBe("");
  });

  // ── Local dev (sandboxes + sessions) ────────────────────────────────

  it("strips local sandboxes/sessions prefix", () => {
    expect(
      stripSessionPrefix(
        "/Users/wenxi-onyx/data/sandboxes/b29c196e-fa14-46b8-8182-ff4a7f67b47b/sessions/9c7662c1-785f-4f1c-b9e0-9021ddbf2893/outputs/web/AGENTS.md"
      )
    ).toBe("outputs/web/AGENTS.md");
  });

  it("strips local sandboxes/sessions prefix for files/ directory", () => {
    expect(
      stripSessionPrefix(
        "/Users/wenxi-onyx/data/sandboxes/b29c196e-fa14-46b8-8182-ff4a7f67b47b/sessions/9c7662c1-785f-4f1c-b9e0-9021ddbf2893/files/linear/Engineering/ticket.json"
      )
    ).toBe("files/linear/Engineering/ticket.json");
  });

  it("strips sandboxes/sessions even with non-standard prefix", () => {
    expect(
      stripSessionPrefix(
        "/data/sandboxes/abcdef1234567890abcdef1234567890ab/sessions/abcdef1234567890abcdef1234567890ab/file.txt"
      )
    ).toBe("file.txt");
  });

  // ── Kubernetes (sessions only) ──────────────────────────────────────

  it("strips kubernetes sessions prefix", () => {
    expect(
      stripSessionPrefix(
        "/workspace/sessions/9c7662c1-785f-4f1c-b9e0-9021ddbf2893/outputs/web/page.tsx"
      )
    ).toBe("outputs/web/page.tsx");
  });

  it("strips kubernetes sessions with short prefix", () => {
    expect(
      stripSessionPrefix("/some/path/sessions/def-456/files/data.json")
    ).toBe("files/data.json");
  });

  // ── Already relative ────────────────────────────────────────────────

  it("returns already-relative paths unchanged", () => {
    expect(stripSessionPrefix("outputs/web/page.tsx")).toBe(
      "outputs/web/page.tsx"
    );
  });

  it("strips leading slash from short paths", () => {
    expect(stripSessionPrefix("/file.txt")).toBe("file.txt");
  });

  // ── Title field (no leading /) ──────────────────────────────────────

  it("handles title field without leading slash (sandboxes path)", () => {
    expect(
      stripSessionPrefix(
        "Users/wenxi-onyx/data/sandboxes/b29c196e-fa14-46b8-8182-ff4a7f67b47b/sessions/9c7662c1-785f-4f1c-b9e0-9021ddbf2893/outputs/web/page.tsx"
      )
    ).toBe("outputs/web/page.tsx");
  });

  // ── Fallback (unknown format, >3 segments) ──────────────────────────

  it("falls back to last 3 segments for unknown deep paths", () => {
    expect(stripSessionPrefix("/some/unknown/deep/path/to/file.tsx")).toBe(
      "path/to/file.tsx"
    );
  });

  // ── Short paths ─────────────────────────────────────────────────────

  it("returns short relative path as-is", () => {
    expect(stripSessionPrefix("file.txt")).toBe("file.txt");
  });

  it("returns 3-segment path as-is", () => {
    expect(stripSessionPrefix("a/b/c")).toBe("a/b/c");
  });
});

// =============================================================================
// sanitizePathsInText
// =============================================================================

describe("sanitizePathsInText", () => {
  it("returns empty string for empty input", () => {
    expect(sanitizePathsInText("")).toBe("");
  });

  // ── Bash commands ───────────────────────────────────────────────────

  it("strips local sandboxes path from cd command", () => {
    expect(
      sanitizePathsInText(
        "cd /Users/wenxi-onyx/data/sandboxes/abc-123/sessions/def-456/outputs/web && python3 prepare.py"
      )
    ).toBe("cd outputs/web && python3 prepare.py");
  });

  it("strips multiple paths in a single command", () => {
    expect(
      sanitizePathsInText(
        "chmod +x /Users/wenxi/data/sandboxes/abc/sessions/def/outputs/web/prepare.sh && /Users/wenxi/data/sandboxes/abc/sessions/def/outputs/web/prepare.sh"
      )
    ).toBe("chmod +x outputs/web/prepare.sh && outputs/web/prepare.sh");
  });

  // ── Output listings ─────────────────────────────────────────────────

  it("strips kubernetes paths from ls output", () => {
    expect(
      sanitizePathsInText(
        "/workspace/sessions/def-456/outputs/web/page.tsx\n/workspace/sessions/def-456/outputs/web/globals.css"
      )
    ).toBe("outputs/web/page.tsx\noutputs/web/globals.css");
  });

  it("strips local paths from find output", () => {
    expect(
      sanitizePathsInText(
        "find /Users/wenxi/data/sandboxes/abc/sessions/def/files/linear -type d"
      )
    ).toBe("find files/linear -type d");
  });

  // ── No paths — passthrough ──────────────────────────────────────────

  it("returns text without sandbox/session paths unchanged", () => {
    const text =
      "total 0\ndrwxr-xr-x@ 3 wenxi-onyx  staff  96 Jan 21 15:18 .\n";
    expect(sanitizePathsInText(text)).toBe(text);
  });

  // ── Error messages ──────────────────────────────────────────────────

  it("strips paths from error messages", () => {
    expect(
      sanitizePathsInText(
        "Error: ENOENT: no such file or directory, open '/workspace/sessions/abc-123/outputs/web/missing.tsx'"
      )
    ).toBe(
      "Error: ENOENT: no such file or directory, open 'outputs/web/missing.tsx'"
    );
  });
});


================================================
FILE: web/src/app/craft/utils/pathSanitizer.ts
================================================
/**
 * Path Sanitizer
 *
 * Pure string functions for stripping sandbox/session path prefixes.
 * All paths displayed in the UI must be relative to the session root.
 *
 * Two deployment shapes exist (both always include the sessions layer):
 *   Local:  /Users/.../sandboxes/{uuid}/sessions/{uuid}/outputs/web/page.tsx
 *   Kube:   /workspace/sessions/{uuid}/outputs/web/page.tsx
 */

/**
 * Strip sandbox/session path prefixes to produce a session-relative path.
 *
 * Returns the path relative to the session root (the directory that
 * contains outputs/, files/, etc.)
 */
export function stripSessionPrefix(fullPath: string): string {
  if (!fullPath) return "";

  // 1. .../sandboxes/{uuid}/sessions/{uuid}/REST  →  REST
  //    Matches local dev (always sandboxes + sessions)
  const sbSession = fullPath.match(
    /\/sandboxes\/[0-9a-f-]+\/sessions\/[0-9a-f-]+\/(.+)$/
  );
  if (sbSession?.[1]) return sbSession[1];

  // 2. .../sessions/{uuid}/REST  →  REST
  //    Matches kubernetes (e.g. /workspace/sessions/...)
  const session = fullPath.match(/\/sessions\/[0-9a-f-]+\/(.+)$/);
  if (session?.[1]) return session[1];

  // 3. Fallback: keep last 3 path segments for context
  //    /some/unknown/deep/path/to/file.tsx  →  path/to/file.tsx
  const segments = fullPath.split("/").filter(Boolean);
  if (segments.length > 3) return segments.slice(-3).join("/");

  // 4. Already relative or short — return as-is
  return fullPath.startsWith("/") ? fullPath.slice(1) : fullPath;
}

/**
 * Replace all absolute sandbox/session paths in freeform text with
 * session-relative paths.
 *
 * Handles paths embedded in commands, output listings, error messages, etc.
 * Matches both local and kubernetes path formats.
 */

// Pre-compiled regexes (module-level, not per-call)
// Order matters: most specific first to avoid partial matches
const SESSION_PATH_PATTERNS = [
  // Local: .../sandboxes/uuid/sessions/uuid/REST
  /(?:\/[\w._-]+)*\/sandboxes\/[0-9a-f-]+\/sessions\/[0-9a-f-]+\//g,
  // Kubernetes: .../sessions/uuid/REST  (no sandboxes prefix)
  /(?:\/[\w._-]+)*\/sessions\/[0-9a-f-]+\//g,
];

export function sanitizePathsInText(text: string): string {
  if (!text) return "";

  let result = text;
  for (const pattern of SESSION_PATH_PATTERNS) {
    // Reset lastIndex since we reuse the regex
    pattern.lastIndex = 0;
    result = result.replace(pattern, "");
  }
  return result;
}


================================================
FILE: web/src/app/craft/utils/streamItemHelpers.ts
================================================
/**
 * Stream Item Helpers
 *
 * Reduced to only utility functions that are NOT packet-processing concerns.
 * All packet parsing, tool detection, and path sanitization now live in parsePacket.ts.
 */

/**
 * Generate a unique ID for stream items
 */
export function genId(prefix: string): string {
  return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
}

/**
 * Check if a tool call should be included in a "Working" pill.
 * Returns true for all tool calls except task/subagent tools.
 * Working tools: glob, grep, read, edit, write, bash, webfetch, websearch, etc.
 */
export function isWorkingToolCall(toolCall: {
  kind: string;
  subagentType?: string;
}): boolean {
  // Task tools (subagents) are kept as separate pills
  if (toolCall.kind === "task") return false;
  if (toolCall.subagentType) return false;
  return true;
}


================================================
FILE: web/src/app/craft/v1/configure/components/ComingSoonConnectors.tsx
================================================
"use client";

import { useState } from "react";
import Card from "@/refresh-components/cards/Card";
import Text from "@/refresh-components/texts/Text";
import { Content } from "@opal/layouts";
import Separator from "@/refresh-components/Separator";
import { ValidSources } from "@/lib/types";
import { getSourceMetadata } from "@/lib/sources";
import RequestConnectorModal from "@/app/craft/v1/configure/components/RequestConnectorModal";
import {
  OutlookIcon,
  OneDriveIcon,
  BoxIcon,
  TrelloIcon,
  ServiceNowIcon,
} from "@/components/icons/icons";

// Coming soon connectors - organized by ecosystem
const COMING_SOON_CONNECTORS: ValidSources[] = [
  // Microsoft
  ValidSources.Sharepoint,
  ValidSources.Teams,
  ValidSources.Imap, // Outlook via IMAP
  // Atlassian
  ValidSources.Confluence,
  ValidSources.Jira,
  ValidSources.Bitbucket,
  // Git/GitLab
  ValidSources.GitLab,
  // Cloud Storage
  ValidSources.Dropbox,
  // Salesforce
  ValidSources.Salesforce,
  ValidSources.Gong,
  // Knowledge Base/Wiki
  ValidSources.Bookstack,
  ValidSources.Discord,
  ValidSources.Zendesk,
  ValidSources.Freshdesk,
  ValidSources.Egnyte,
  // Project Management
  ValidSources.Asana,
  ValidSources.Clickup,
  ValidSources.Productboard,
  // Knowledge Base/Wiki
  ValidSources.Outline,
  ValidSources.Slab,
  ValidSources.Coda,
  ValidSources.Guru,
  ValidSources.Document360,
  ValidSources.Gitbook,
  ValidSources.Highspot,
  ValidSources.DrupalWiki,
  ValidSources.Discourse,
  ValidSources.Axero,
  // Messaging/Collaboration
  ValidSources.Zulip,
  // Other
  ValidSources.Loopio,
  ValidSources.Xenforo,
];

export default function ComingSoonConnectors() {
  const [showRequestModal, setShowRequestModal] = useState(false);

  return (
    <>
      <Separator />
      <div className="w-full flex items-center justify-between pb-2">
        <div className="flex flex-col gap-0.25">
          <Text mainContentEmphasis text04>
            Coming Soon
          </Text>
          <Text secondaryBody text03>
            Don't see what you're looking for? Submit a connector request!
          </Text>
        </div>
        <button
          type="button"
          onClick={() => setShowRequestModal(true)}
          className="px-4 py-2 rounded-12 bg-white dark:bg-black hover:opacity-90 transition-colors whitespace-nowrap"
        >
          <Text
            mainUiAction
            className="text-text-dark-05 dark:text-text-light-05"
          >
            Submit a request
          </Text>
        </button>
      </div>
      <div className="w-full grid grid-cols-1 md:grid-cols-4 gap-2">
        {COMING_SOON_CONNECTORS.flatMap((type) => {
          const sourceMetadata = getSourceMetadata(type);
          // Special case: IMAP should display as "Outlook" with custom icon
          const displayName =
            type === ValidSources.Imap ? "Outlook" : sourceMetadata.displayName;

          const card = (
            <div key={type} className="opacity-60">
              <Card variant="secondary">
                <Content
                  icon={
                    type === ValidSources.Imap
                      ? OutlookIcon
                      : sourceMetadata.icon
                  }
                  title={displayName}
                  sizePreset="main-ui"
                  variant="body"
                />
              </Card>
            </div>
          );

          // Insert OneDrive right after Outlook
          if (type === ValidSources.Imap) {
            return [
              card,
              <div key="onedrive" className="opacity-60">
                <Card variant="secondary">
                  <Content
                    icon={OneDriveIcon}
                    title="OneDrive"
                    sizePreset="main-ui"
                    variant="body"
                  />
                </Card>
              </div>,
            ];
          }

          // Insert Box right after Discord
          if (type === ValidSources.Discord) {
            return [
              card,
              <div key="box" className="opacity-60">
                <Card variant="secondary">
                  <Content
                    icon={BoxIcon}
                    title="Box"
                    sizePreset="main-ui"
                    variant="body"
                  />
                </Card>
              </div>,
            ];
          }

          return [card];
        })}
        {/* Enterprise/ERP */}
        <div className="opacity-60">
          <Card variant="secondary">
            <Content
              icon={ServiceNowIcon}
              title="ServiceNow"
              sizePreset="main-ui"
              variant="body"
            />
          </Card>
        </div>
        {/* Project Management */}
        <div className="opacity-60">
          <Card variant="secondary">
            <Content
              icon={TrelloIcon}
              title="Trello"
              sizePreset="main-ui"
              variant="body"
            />
          </Card>
        </div>
      </div>
      <RequestConnectorModal
        open={showRequestModal}
        onClose={() => setShowRequestModal(false)}
      />
    </>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/components/ConfigureConnectorModal.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import useSWR from "swr";
import Modal from "@/refresh-components/Modal";
import { ValidSources, ConfigurableSources } from "@/lib/types";
import { getSourceMetadata, getSourceDocLink } from "@/lib/sources";
import { SvgPlug, SvgExternalLink } from "@opal/icons";
import { Credential, credentialTemplates } from "@/lib/connectors/credentials";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib";
import CredentialStep from "@/app/craft/v1/configure/components/CredentialStep";
import ConnectorConfigStep from "@/app/craft/v1/configure/components/ConnectorConfigStep";
import { OAUTH_STATE_KEY } from "@/app/craft/v1/constants";
import { connectorConfigs } from "@/lib/connectors/connectors";
import { Button } from "@opal/components";
import { Section } from "@/layouts/general-layouts";

type ModalStep = "credential" | "configure";

function connectorNeedsCredentials(connectorType: ValidSources): boolean {
  return credentialTemplates[connectorType] != null;
}

function connectorNeedsConfigStep(connectorType: ValidSources): boolean {
  const config = connectorConfigs[connectorType as ConfigurableSources];
  if (!config) return false;

  // Only check main values, not advanced_values
  // Advanced values are optional configuration and shouldn't force a 2-step flow
  const hasVisibleValues = config.values.some(
    (field) => !("hidden" in field && field.hidden)
  );

  return hasVisibleValues;
}

interface ConfigureConnectorModalProps {
  connectorType: ValidSources | null;
  existingConfig: unknown | null;
  open: boolean;
  onClose: () => void;
  onSuccess: () => void;
}

export default function ConfigureConnectorModal({
  connectorType,
  existingConfig,
  open,
  onClose,
  onSuccess,
}: ConfigureConnectorModalProps) {
  const [step, setStep] = useState<ModalStep>("credential");
  const [selectedCredential, setSelectedCredential] =
    useState<Credential<any> | null>(null);

  const sourceMetadata = connectorType
    ? getSourceMetadata(connectorType)
    : null;
  const isConfigured = !!existingConfig;

  const needsCredentials = connectorType
    ? connectorNeedsCredentials(connectorType)
    : true;
  const needsConfigStep = connectorType
    ? connectorNeedsConfigStep(connectorType)
    : false;
  const isSingleStep = needsCredentials && !needsConfigStep;

  // Fetch credentials for this connector type
  const { data: credentials, mutate: refreshCredentials } = useSWR<
    Credential<any>[]
  >(
    connectorType && open && !isConfigured
      ? buildSimilarCredentialInfoURL(connectorType)
      : null,
    errorHandlingFetcher
  );

  useEffect(() => {
    if (open && !isConfigured) {
      setStep("credential");
      setSelectedCredential(null);
    }
  }, [open, connectorType, isConfigured]);

  // Auto-select credential if there's only one
  useEffect(() => {
    if (credentials?.length === 1 && !selectedCredential && credentials[0]) {
      setSelectedCredential(credentials[0]);
    }
  }, [credentials, selectedCredential]);

  if (!connectorType || !sourceMetadata) return null;

  // Don't render for configured connectors (handled by popover in ConnectorCard)
  if (isConfigured) return null;

  const handleCredentialCreated = (cred: Credential<any>) => {
    setSelectedCredential(cred);
    refreshCredentials();
  };

  const handleCredentialDeleted = (credId: number) => {
    if (selectedCredential?.id === credId) {
      setSelectedCredential(null);
    }
    refreshCredentials();
  };

  const handleOAuthRedirect = () => {
    // Save state before OAuth redirect
    sessionStorage.setItem(
      OAUTH_STATE_KEY,
      JSON.stringify({
        connectorType,
        timestamp: Date.now(),
      })
    );
  };

  const handleContinue = () => {
    if (selectedCredential) {
      setStep("configure");
    }
  };

  const handleBack = () => {
    setStep("credential");
  };

  // Dynamic title and description based on flow type
  const getStepTitle = () => {
    if (isSingleStep) {
      return `Connect ${sourceMetadata.displayName}`;
    }
    return step === "credential"
      ? `Connect ${sourceMetadata.displayName}`
      : `Configure ${sourceMetadata.displayName}`;
  };

  const getStepDescription = () => {
    if (isSingleStep) {
      return "Select or create a credential to connect";
    }
    return step === "credential"
      ? "Step 1: Select or create a credential"
      : "Step 2: Configure your connector";
  };

  return (
    <>
      <Modal open={open} onOpenChange={onClose}>
        <Modal.Content width="xl" height="fit">
          <Modal.Header
            icon={SvgPlug}
            title={getStepTitle()}
            description={getStepDescription()}
            onClose={onClose}
          />
          <Modal.Body>
            {getSourceDocLink(connectorType) && (
              <Section flexDirection="row" justifyContent="end" width="full">
                <div className="pr-10">
                  <Button
                    variant="action"
                    prominence="tertiary"
                    rightIcon={SvgExternalLink}
                    href={getSourceDocLink(connectorType)!}
                    target="_blank"
                  >
                    View setup documentation
                  </Button>
                </div>
              </Section>
            )}
            {step === "credential" ? (
              <CredentialStep
                connectorType={connectorType}
                credentials={credentials || []}
                selectedCredential={selectedCredential}
                onSelectCredential={setSelectedCredential}
                onCredentialCreated={handleCredentialCreated}
                onCredentialDeleted={handleCredentialDeleted}
                onContinue={handleContinue}
                onOAuthRedirect={handleOAuthRedirect}
                refresh={refreshCredentials}
                isSingleStep={isSingleStep}
                onConnectorSuccess={onSuccess}
              />
            ) : selectedCredential ? (
              <ConnectorConfigStep
                connectorType={connectorType}
                credential={selectedCredential}
                onSuccess={onSuccess}
                onBack={handleBack}
              />
            ) : null}
          </Modal.Body>
        </Modal.Content>
      </Modal>
    </>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/components/ConfigureOverlays.tsx
================================================
"use client";

import { cn } from "@/lib/utils";
import Message from "@/refresh-components/messages/Message";

interface ConnectorInfoOverlayProps {
  visible: boolean;
}

export function ConnectorInfoOverlay({ visible }: ConnectorInfoOverlayProps) {
  return (
    <div
      className={cn(
        "fixed bottom-16 left-1/2 -translate-x-1/2 z-toast transition-all duration-300 ease-in-out",
        visible
          ? "opacity-100 translate-y-0"
          : "opacity-0 translate-y-4 pointer-events-none"
      )}
    >
      <Message
        info
        text="Existing sessions won't have access to this data"
        description="Once synced, documents from this connector will be available in your new sessions!"
        close={false}
      />
    </div>
  );
}

interface ReprovisionWarningOverlayProps {
  visible: boolean;
  onUpdate?: () => void;
  isUpdating?: boolean;
}

export function ReprovisionWarningOverlay({
  visible,
  onUpdate,
  isUpdating,
}: ReprovisionWarningOverlayProps) {
  return (
    <div
      className={cn(
        "fixed bottom-16 left-1/2 -translate-x-1/2 z-toast transition-all duration-300 ease-in-out",
        visible
          ? "opacity-100 translate-y-0"
          : "opacity-0 translate-y-4 pointer-events-none"
      )}
    >
      <Message
        warning
        text={isUpdating ? "Updating..." : "Click Update to apply your changes"}
        description="Your sandbox will be recreated with your new settings. Previously running sessions will not be affected by your changes."
        close={false}
        actions={isUpdating ? false : "Update"}
        onAction={isUpdating ? undefined : onUpdate}
      />
    </div>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/components/ConnectorCard.tsx
================================================
"use client";

import { useState } from "react";
import Card from "@/refresh-components/cards/Card";
import Popover from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import { ContentAction } from "@opal/layouts";
import { ValidSources } from "@/lib/types";
import { getSourceMetadata } from "@/lib/sources";
import { SvgMoreHorizontal, SvgPlug, SvgSettings, SvgTrash } from "@opal/icons";
import { Button } from "@opal/components";
import { useRouter } from "next/navigation";
import { cn } from "@/lib/utils";

export type ConnectorStatus =
  | "not_connected"
  | "connected"
  | "connected_with_errors"
  | "indexing"
  | "error"
  | "deleting";

export interface BuildConnectorConfig {
  cc_pair_id: number;
  connector_id: number;
  credential_id: number;
  source: string;
  name: string;
  status: ConnectorStatus;
  docs_indexed: number;
  last_indexed: string | null;
  error_message?: string | null;
}

interface ConnectorCardProps {
  connectorType: ValidSources;
  config: BuildConnectorConfig | null;
  onConfigure: () => void;
  onDelete: () => void;
}

function getStatusText(status: ConnectorStatus, docsIndexed: number): string {
  switch (status) {
    case "connected":
      return docsIndexed > 0
        ? `${docsIndexed.toLocaleString()} docs`
        : "Connected";
    case "connected_with_errors":
      return docsIndexed > 0
        ? `${docsIndexed.toLocaleString()} docs`
        : "Connected, has errors";
    case "indexing":
      return "Syncing...";
    case "error":
      return "Error";
    case "deleting":
      return "Deleting...";
    case "not_connected":
    default:
      return "Not connected";
  }
}

export default function ConnectorCard({
  connectorType,
  config,
  onConfigure,
  onDelete,
}: ConnectorCardProps) {
  const [popoverOpen, setPopoverOpen] = useState(false);
  const router = useRouter();
  const sourceMetadata = getSourceMetadata(connectorType);
  const status: ConnectorStatus = config?.status || "not_connected";
  const isConnected = status !== "not_connected" && status !== "deleting";
  const isDeleting = status === "deleting";

  // Check if this connector type is always available (doesn't need connection setup)
  const isAlwaysConnected = sourceMetadata.alwaysConnected ?? false;
  const customDescription = sourceMetadata.customDescription;

  const handleCardClick = () => {
    if (isDeleting) {
      return; // No action while deleting
    }
    // Always-connected connectors always go to onConfigure
    if (isAlwaysConnected) {
      onConfigure();
      return;
    }
    if (isConnected) {
      setPopoverOpen(true);
    } else {
      onConfigure();
    }
  };

  // Always-connected connectors show a settings icon
  // Regular connectors show popover menu when connected, plug icon when not
  const rightContent = isDeleting ? null : isAlwaysConnected ? (
    <Button prominence="internal" icon={SvgSettings} />
  ) : isConnected ? (
    <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>
      <Popover.Trigger asChild>
        <Button
          icon={SvgMoreHorizontal}
          prominence="tertiary"
          onClick={(e) => {
            e.stopPropagation();
            setPopoverOpen(!popoverOpen);
          }}
        />
      </Popover.Trigger>
      <Popover.Content side="right" align="start" sideOffset={4}>
        <Popover.Menu>
          <LineItem
            key="manage"
            icon={SvgSettings}
            onClick={(e) => {
              e.stopPropagation();
              setPopoverOpen(false);
              router.push(`/admin/connector/${config?.cc_pair_id}`);
            }}
          >
            Manage connector
          </LineItem>
          <LineItem
            key="delete"
            danger
            icon={SvgTrash}
            onClick={(e) => {
              e.stopPropagation();
              setPopoverOpen(false);
              onDelete();
            }}
          >
            Disconnect
          </LineItem>
        </Popover.Menu>
      </Popover.Content>
    </Popover>
  ) : (
    <Button icon={SvgPlug} prominence="tertiary" size="sm" />
  );

  // Always-connected connectors show as "primary" variant
  const cardVariant =
    isAlwaysConnected || isConnected ? "primary" : "secondary";

  const descriptionText =
    customDescription ?? getStatusText(status, config?.docs_indexed || 0);

  return (
    <div
      className={cn(!isDeleting && "cursor-pointer")}
      onClick={handleCardClick}
    >
      <Card variant={cardVariant}>
        <ContentAction
          icon={sourceMetadata.icon}
          title={sourceMetadata.displayName}
          description={descriptionText}
          sizePreset="main-content"
          variant="section"
          rightChildren={rightContent}
        />
      </Card>
    </div>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/components/ConnectorConfigStep.tsx
================================================
"use client";

import { useState } from "react";
import { Formik, Form, useFormikContext } from "formik";
import { Section } from "@/layouts/general-layouts";
import { Button } from "@opal/components";
import { toast } from "@/hooks/useToast";
import { ValidSources } from "@/lib/types";
import { Credential } from "@/lib/connectors/credentials";
import Separator from "@/refresh-components/Separator";
import {
  connectorConfigs,
  createConnectorInitialValues,
} from "@/lib/connectors/connectors";
import CardSection from "@/components/admin/CardSection";
import { RenderField } from "@/app/admin/connectors/[connector]/pages/FieldRendering";
import { createBuildConnector } from "@/app/craft/v1/configure/utils/createBuildConnector";
import { useUser } from "@/providers/UserProvider";

interface ConnectorConfigStepProps {
  connectorType: ValidSources;
  credential: Credential<any>;
  onSuccess: () => void;
  onBack: () => void;
}

function ConnectorConfigForm({
  connectorType,
  credential,
  onSuccess,
  onBack,
}: ConnectorConfigStepProps) {
  const [isSubmitting, setIsSubmitting] = useState(false);
  const { values } = useFormikContext<Record<string, any>>();
  const { user } = useUser();

  const config =
    connectorConfigs[connectorType as keyof typeof connectorConfigs];

  const handleSubmit = async () => {
    setIsSubmitting(true);

    try {
      // Extract connector_name and exclude access_type/groups (these are top-level fields)
      const { connector_name, access_type, groups, ...connectorConfig } =
        values;

      const result = await createBuildConnector({
        connectorType,
        credential,
        connectorSpecificConfig: connectorConfig,
        connectorName: connector_name,
        userEmail: user?.email,
      });

      if (!result.success) {
        throw new Error(result.error);
      }

      onSuccess();
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to create connector"
      );
    } finally {
      setIsSubmitting(false);
    }
  };

  const hasConfigFields = config?.values && config.values.length > 0;

  return (
    <Form className="w-full flex flex-col items-center">
      <CardSection className="flex flex-col gap-y-4">
        {hasConfigFields &&
          config.values.map((field) => (
            <RenderField
              key={field.name}
              field={field}
              values={values}
              connector={connectorType as any}
              currentCredential={credential}
            />
          ))}
        <Separator />
        {config?.advanced_values &&
          config.advanced_values.length > 0 &&
          config.advanced_values.map((field) => (
            <RenderField
              key={field.name}
              field={field}
              values={values}
              connector={connectorType as any}
              currentCredential={credential}
            />
          ))}
        <Section flexDirection="row" justifyContent="between" height="fit">
          <Button
            disabled={isSubmitting}
            prominence="secondary"
            onClick={onBack}
          >
            Back
          </Button>
          <Button disabled={isSubmitting} type="button" onClick={handleSubmit}>
            {isSubmitting ? "Creating..." : "Create Connector"}
          </Button>
        </Section>
      </CardSection>
    </Form>
  );
}

function getUserIdentifier(email?: string): string {
  if (!email) return "";
  const prefix = email.split("@")[0] || email;
  return `-${prefix.replace(/[^a-zA-Z0-9]/g, "-")}`;
}

export default function ConnectorConfigStep({
  connectorType,
  credential,
  onSuccess,
  onBack,
}: ConnectorConfigStepProps) {
  const { user } = useUser();
  const baseInitialValues = createConnectorInitialValues(connectorType as any);
  const userIdentifier = getUserIdentifier(user?.email);
  const initialValues: Record<string, any> = {
    ...baseInitialValues,
    connector_name: `build-mode-${connectorType}${userIdentifier}`,
  };

  return (
    <Formik
      initialValues={initialValues}
      onSubmit={() => {}}
      enableReinitialize
    >
      <ConnectorConfigForm
        connectorType={connectorType}
        credential={credential}
        onSuccess={onSuccess}
        onBack={onBack}
      />
    </Formik>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/components/CreateCredentialInline.tsx
================================================
"use client";

import { useState } from "react";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import { Section } from "@/layouts/general-layouts";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import { TextFormField } from "@/components/Field";
import { ValidSources } from "@/lib/types";
import {
  Credential,
  credentialTemplates,
  getDisplayNameForCredentialKey,
} from "@/lib/connectors/credentials";
import { createCredential } from "@/lib/credential";
import { getSourceMetadata } from "@/lib/sources";

interface CreateCredentialInlineProps {
  connectorType: ValidSources;
  onSuccess: (credential: Credential<any>) => void;
  onCancel: () => void;
}

export default function CreateCredentialInline({
  connectorType,
  onSuccess,
  onCancel,
}: CreateCredentialInlineProps) {
  const [error, setError] = useState<string | null>(null);
  const [isSubmitting, setIsSubmitting] = useState(false);

  const sourceMetadata = getSourceMetadata(connectorType);
  const credentialTemplate = credentialTemplates[connectorType];

  if (!credentialTemplate) {
    return (
      <Section gap={0.5} alignItems="center" height="fit">
        <Text secondaryBody text03>
          No credential configuration available for {sourceMetadata.displayName}
          .
        </Text>
        <Button variant="action" prominence="secondary" onClick={onCancel}>
          Cancel
        </Button>
      </Section>
    );
  }

  // Build initial values and validation schema from template
  const initialValues: Record<string, string> = {};
  const schemaFields: Record<string, Yup.StringSchema> = {};

  // Filter out metadata fields and build form config
  Object.entries(credentialTemplate).forEach(([key, value]) => {
    if (key === "authentication_method" || key === "authMethods") {
      return;
    }
    initialValues[key] = typeof value === "string" ? value : "";
    schemaFields[key] = Yup.string().required(
      `${getDisplayNameForCredentialKey(key)} is required`
    );
  });

  // Add credential name field
  initialValues["credential_name"] = "";

  const validationSchema = Yup.object().shape(schemaFields);

  const handleSubmit = async (values: Record<string, string>) => {
    setIsSubmitting(true);
    setError(null);

    try {
      // Extract credential name and build credential_json
      const { credential_name, ...credentialFields } = values;

      const response = await createCredential({
        credential_json: credentialFields,
        admin_public: false,
        source: connectorType,
        name: credential_name || `${sourceMetadata.displayName} Credential`,
      });

      if (!response.ok) {
        const errorData = await response.json();
        throw new Error(errorData.detail || "Failed to create credential");
      }

      const credential = await response.json();
      onSuccess(credential);
    } catch (err) {
      setError(
        err instanceof Error ? err.message : "Failed to create credential"
      );
    } finally {
      setIsSubmitting(false);
    }
  };

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      onSubmit={handleSubmit}
    >
      {({ isValid, dirty }) => (
        <Form>
          <Section gap={1} alignItems="stretch" height="fit">
            <TextFormField
              name="credential_name"
              label="Credential Name"
              placeholder={`My ${sourceMetadata.displayName} Credential`}
              type="text"
            />

            {Object.entries(credentialTemplate).map(([key, value]) => {
              // Skip metadata fields
              if (key === "authentication_method" || key === "authMethods") {
                return null;
              }

              const isSecret =
                key.toLowerCase().includes("token") ||
                key.toLowerCase().includes("password") ||
                key.toLowerCase().includes("secret") ||
                key.toLowerCase().includes("key");

              return (
                <TextFormField
                  key={key}
                  name={key}
                  label={getDisplayNameForCredentialKey(key)}
                  placeholder={typeof value === "string" ? value : ""}
                  type={isSecret ? "password" : "text"}
                />
              );
            })}

            {error && (
              <Text secondaryBody className="text-status-error-05">
                {error}
              </Text>
            )}

            <Section
              flexDirection="row"
              justifyContent="end"
              gap={0.5}
              height="fit"
            >
              <Button
                disabled={isSubmitting}
                variant="action"
                prominence="secondary"
                onClick={onCancel}
              >
                Cancel
              </Button>
              <Button
                disabled={!isValid || !dirty || isSubmitting}
                variant="action"
                type="submit"
              >
                {isSubmitting ? "Creating..." : "Create Credential"}
              </Button>
            </Section>
          </Section>
        </Form>
      )}
    </Formik>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/components/CredentialStep.tsx
================================================
"use client";

import { useState } from "react";
import { Section } from "@/layouts/general-layouts";
import { Button } from "@opal/components";
import Modal from "@/refresh-components/Modal";
import { SvgKey } from "@opal/icons";
import {
  ConfigurableSources,
  ValidSources,
  oauthSupportedSources,
} from "@/lib/types";
import { Credential } from "@/lib/connectors/credentials";
import { getSourceDisplayName } from "@/lib/sources";
import {
  useOAuthDetails,
  getConnectorOauthRedirectUrl,
} from "@/lib/connectors/oauth";
import { deleteCredential } from "@/lib/credential";
import ModifyCredential from "@/components/credentials/actions/ModifyCredential";
import CreateCredential from "@/components/credentials/actions/CreateCredential";
import { CreateStdOAuthCredential } from "@/components/credentials/actions/CreateStdOAuthCredential";
import { GmailMain } from "@/app/admin/connectors/[connector]/pages/gmail/GmailPage";
import CardSection from "@/components/admin/CardSection";
import { Spinner } from "@/components/Spinner";
import {
  NEXT_PUBLIC_CLOUD_ENABLED,
  NEXT_PUBLIC_TEST_ENV,
} from "@/lib/constants";
import {
  CRAFT_CONFIGURE_PATH,
  CRAFT_OAUTH_COOKIE_NAME,
} from "@/app/craft/v1/constants";
import Cookies from "js-cookie";
import { toast } from "@/hooks/useToast";
import { createBuildConnector } from "@/app/craft/v1/configure/utils/createBuildConnector";
import { useUser } from "@/providers/UserProvider";

interface CredentialStepProps {
  connectorType: ValidSources;
  credentials: Credential<any>[];
  selectedCredential: Credential<any> | null;
  onSelectCredential: (cred: Credential<any>) => void;
  onCredentialCreated: (cred: Credential<any>) => void;
  onCredentialDeleted: (credId: number) => void;
  onContinue: () => void;
  onOAuthRedirect: () => void;
  refresh?: () => void;
  isSingleStep?: boolean;
  onConnectorSuccess?: () => void;
}

export default function CredentialStep({
  connectorType,
  credentials,
  selectedCredential,
  onSelectCredential,
  onCredentialCreated,
  onCredentialDeleted,
  onContinue,
  onOAuthRedirect,
  refresh = () => {},
  isSingleStep = false,
  onConnectorSuccess,
}: CredentialStepProps) {
  const [createCredentialFormToggle, setCreateCredentialFormToggle] =
    useState(false);
  const [isAuthorizing, setIsAuthorizing] = useState(false);
  const [isConnecting, setIsConnecting] = useState(false);
  const { user } = useUser();

  const { data: oauthDetails, isLoading: oauthDetailsLoading } =
    useOAuthDetails(connectorType);

  const isAuthorizeVisible =
    oauthDetails?.oauth_enabled !== true ||
    (oauthDetails?.additional_kwargs?.length ?? 0) === 0;

  const handleAuthorize = async () => {
    setIsAuthorizing(true);
    onOAuthRedirect();

    const redirectUrl = await getConnectorOauthRedirectUrl(connectorType, {
      desired_return_url: `${window.location.origin}${CRAFT_CONFIGURE_PATH}`,
    });
    if (redirectUrl) {
      window.location.href = redirectUrl;
    } else {
      setIsAuthorizing(false);
      console.error("Failed to get OAuth redirect URL");
    }
  };

  const handleConnect = async () => {
    if (!selectedCredential || !isSingleStep) return;

    setIsConnecting(true);

    try {
      const result = await createBuildConnector({
        connectorType,
        credential: selectedCredential,
        userEmail: user?.email,
      });

      if (!result.success) {
        throw new Error(result.error);
      }

      onConnectorSuccess?.();
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to create connector"
      );
    } finally {
      setIsConnecting(false);
    }
  };

  const handleDeleteCredential = async (credential: Credential<any>) => {
    try {
      const response = await deleteCredential(credential.id);
      if (response.ok) {
        onCredentialDeleted(credential.id);
      } else {
        console.error("Failed to delete credential");
      }
    } catch (error) {
      console.error("Error deleting credential:", error);
    }
  };

  const handleSwap = (newCredential: Credential<any>) => {
    onSelectCredential(newCredential);
  };

  const hasCredentials = credentials.length > 0;

  return (
    <Section flexDirection="column" alignItems="center" height="fit">
      <CardSection>
        {connectorType === ValidSources.Gmail ? (
          <GmailMain
            buildMode
            onOAuthRedirect={onOAuthRedirect}
            onCredentialCreated={async (credential) => {
              onSelectCredential(credential);
              // For single-step connectors (like Gmail), create connector immediately
              // For multi-step connectors, continue to config step
              if (isSingleStep && onConnectorSuccess) {
                // Create connector immediately for single-step flow
                setIsConnecting(true);
                try {
                  const result = await createBuildConnector({
                    connectorType,
                    credential: credential,
                    userEmail: user?.email,
                  });

                  if (!result.success) {
                    throw new Error(result.error);
                  }

                  onConnectorSuccess();
                } catch (err) {
                  toast.error(
                    err instanceof Error
                      ? err.message
                      : "Failed to create connector"
                  );
                } finally {
                  setIsConnecting(false);
                }
              } else {
                onContinue();
              }
            }}
          />
        ) : (
          <>
            <ModifyCredential
              showIfEmpty
              accessType="public"
              defaultedCredential={selectedCredential!}
              credentials={credentials}
              editableCredentials={credentials}
              onDeleteCredential={handleDeleteCredential}
              onSwitch={handleSwap}
            />
            {!createCredentialFormToggle && (
              <div className="mt-6 flex gap-4 justify-between items-center">
                <div className="flex gap-4">
                  <Button
                    onClick={async () => {
                      if (oauthDetails && oauthDetails.oauth_enabled) {
                        if (oauthDetails.additional_kwargs.length > 0) {
                          setCreateCredentialFormToggle(true);
                        } else {
                          const redirectUrl =
                            await getConnectorOauthRedirectUrl(connectorType, {
                              desired_return_url: `${window.location.origin}${CRAFT_CONFIGURE_PATH}`,
                            });
                          if (redirectUrl) {
                            onOAuthRedirect();
                            window.location.href = redirectUrl;
                          } else {
                            setCreateCredentialFormToggle(
                              (createConnectorToggle) => !createConnectorToggle
                            );
                          }
                        }
                      } else {
                        if (connectorType === ValidSources.GoogleDrive) {
                          Cookies.set(CRAFT_OAUTH_COOKIE_NAME, "true", {
                            path: "/",
                          });
                          onOAuthRedirect();
                        }
                        setCreateCredentialFormToggle(
                          (createConnectorToggle) => !createConnectorToggle
                        );
                      }
                    }}
                  >
                    Create New
                  </Button>
                  {oauthSupportedSources.includes(
                    connectorType as ConfigurableSources
                  ) &&
                    (NEXT_PUBLIC_CLOUD_ENABLED || NEXT_PUBLIC_TEST_ENV) && (
                      <Button
                        disabled={isAuthorizing}
                        variant="action"
                        onClick={handleAuthorize}
                        hidden={!isAuthorizeVisible}
                      >
                        {isAuthorizing
                          ? "Authorizing..."
                          : `Authorize with ${getSourceDisplayName(
                              connectorType
                            )}`}
                      </Button>
                    )}
                </div>
                {hasCredentials && (
                  <Button
                    disabled={!selectedCredential || isConnecting}
                    onClick={isSingleStep ? handleConnect : onContinue}
                  >
                    {isSingleStep
                      ? isConnecting
                        ? "Connecting..."
                        : "Connect"
                      : "Continue"}
                  </Button>
                )}
              </div>
            )}

            {createCredentialFormToggle && (
              <Modal
                open
                onOpenChange={() => setCreateCredentialFormToggle(false)}
              >
                <Modal.Content width="xl" height="fit">
                  <Modal.Header
                    icon={SvgKey}
                    title={`Create a ${getSourceDisplayName(
                      connectorType
                    )} credential`}
                    onClose={() => setCreateCredentialFormToggle(false)}
                  />
                  <Modal.Body>
                    {oauthDetailsLoading ? (
                      <Spinner />
                    ) : (
                      <>
                        {oauthDetails && oauthDetails.oauth_enabled ? (
                          <CreateStdOAuthCredential
                            sourceType={connectorType}
                            additionalFields={oauthDetails.additional_kwargs}
                          />
                        ) : (
                          <CreateCredential
                            close
                            refresh={refresh}
                            sourceType={connectorType}
                            accessType="public"
                            onSwitch={async (cred) => {
                              onCredentialCreated(cred);
                              setCreateCredentialFormToggle(false);
                            }}
                            onClose={() => setCreateCredentialFormToggle(false)}
                          />
                        )}
                      </>
                    )}
                  </Modal.Body>
                </Modal.Content>
              </Modal>
            )}
          </>
        )}
      </CardSection>
    </Section>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/components/DemoDataConfirmModal.tsx
================================================
"use client";

import Text from "@/refresh-components/texts/Text";

interface DemoDataConfirmModalProps {
  open: boolean;
  onClose: () => void;
  pendingDemoDataEnabled: boolean | null;
  onConfirm: () => void;
}

export default function DemoDataConfirmModal({
  open,
  onClose,
  pendingDemoDataEnabled,
  onConfirm,
}: DemoDataConfirmModalProps) {
  if (!open) return null;

  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center">
      {/* Backdrop */}
      <div
        className="absolute inset-0 bg-black/50 backdrop-blur-sm"
        onClick={onClose}
      />

      {/* Modal */}
      <div className="relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01">
        <div className="p-6 flex flex-col gap-6">
          {/* Header */}
          <div className="flex items-center justify-center">
            <Text headingH2 text05>
              Confirm Demo Data Change
            </Text>
          </div>

          {/* Message */}
          <div className="flex justify-center">
            <Text mainUiBody text04 className="text-center">
              Are you sure you want to{" "}
              {pendingDemoDataEnabled ? "enable" : "disable"} demo data?
              <br />
              Your sandbox will be re-initialized with your new data set
            </Text>
          </div>

          {/* Action buttons */}
          <div className="flex items-center justify-center gap-3">
            <button
              type="button"
              onClick={onClose}
              className="px-4 py-2 rounded-12 bg-background-neutral-01 border border-border-02 hover:opacity-90 transition-colors"
            >
              <Text mainUiBody text05>
                Cancel
              </Text>
            </button>
            <button
              type="button"
              onClick={onConfirm}
              className="px-4 py-2 rounded-12 bg-black dark:bg-white hover:opacity-90 transition-colors"
            >
              <Text
                mainUiAction
                className="text-text-light-05 dark:text-text-dark-05"
              >
                Confirm
              </Text>
            </button>
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/components/RequestConnectorModal.tsx
================================================
"use client";

import { useState } from "react";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";

interface RequestConnectorModalProps {
  open: boolean;
  onClose: () => void;
}

export default function RequestConnectorModal({
  open,
  onClose,
}: RequestConnectorModalProps) {
  const [connectorName, setConnectorName] = useState("");
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [errorMessage, setErrorMessage] = useState<string | null>(null);
  const [successMessage, setSuccessMessage] = useState<string | null>(null);

  const handleClose = () => {
    setConnectorName("");
    setErrorMessage(null);
    setSuccessMessage(null);
    onClose();
  };

  const handleSubmit = async (e?: React.FormEvent) => {
    e?.preventDefault();
    if (!connectorName.trim()) return;

    setIsSubmitting(true);
    setErrorMessage(null);
    setSuccessMessage(null);

    try {
      const response = await fetch("/api/manage/connector-request", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          connector_name: connectorName.trim(),
        }),
      });

      const data = await response.json();

      if (!response.ok) {
        throw new Error(data.detail || "Failed to submit connector request");
      }

      setSuccessMessage(
        data.message ||
          "Connector request submitted successfully. We'll prioritize popular requests!"
      );

      setTimeout(() => {
        handleClose();
      }, 2000);
    } catch (error) {
      console.error("Failed to submit connector request:", error);
      setErrorMessage(
        error instanceof Error
          ? error.message
          : "Failed to submit connector request. Please try again."
      );
    } finally {
      setIsSubmitting(false);
    }
  };

  if (!open) return null;

  const isCloud = NEXT_PUBLIC_CLOUD_ENABLED;
  const DISCORD_URL = "https://discord.gg/4NA5SbzrWb";

  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center">
      {/* Backdrop */}
      <div
        className="absolute inset-0 bg-black/50 backdrop-blur-sm"
        onClick={handleClose}
      />

      {/* Modal */}
      <div className="relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01">
        <div className="p-6 flex flex-col gap-6">
          <div className="flex items-center justify-center">
            <Text headingH2 text05>
              Request a Connector
            </Text>
          </div>

          <div className="flex flex-col gap-3">
            <Text mainUiBody text04 className="text-center">
              Let us know which connectors you'd like to craft with
              <br />
              We'll prioritize popular requests!
            </Text>

            {successMessage && (
              <div className="px-4 py-3 rounded-12 bg-status-success-00 border border-status-success-02">
                <Text mainUiBody text05 className="text-status-success-05">
                  {successMessage}
                </Text>
              </div>
            )}

            {errorMessage && (
              <div className="px-4 py-3 rounded-12 bg-status-error-00 border border-status-error-02">
                <Text mainUiBody text05 className="text-status-error-05">
                  {errorMessage}
                </Text>
              </div>
            )}

            {isCloud ? (
              // Cloud: Show form with text input
              <>
                <form
                  onSubmit={handleSubmit}
                  className="flex flex-col gap-4 items-center"
                >
                  <input
                    id="connector-name"
                    type="text"
                    value={connectorName}
                    onChange={(e) => {
                      setConnectorName(e.target.value);
                      if (errorMessage) setErrorMessage(null);
                    }}
                    placeholder="e.g., ServiceNow, Workday, etc."
                    className="px-4 py-2 rounded-12 bg-background-tint-00 border border-border-01 text-text-05 placeholder:text-text-02 focus:outline-none focus:ring-2 focus:ring-border-01 text-center max-w-md w-full"
                    disabled={isSubmitting || !!successMessage}
                  />
                </form>

                <div className="flex items-center justify-center gap-3 pt-2 max-w-md w-full mx-auto">
                  <button
                    type="button"
                    onClick={handleClose}
                    disabled={isSubmitting}
                    className="flex-1 px-4 py-2 rounded-12 bg-background-neutral-01 border border-border-02 hover:opacity-90 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
                  >
                    <Text mainUiBody text05>
                      {successMessage ? "Close" : "Cancel"}
                    </Text>
                  </button>
                  {!successMessage && (
                    <button
                      type="button"
                      onClick={handleSubmit}
                      disabled={!connectorName.trim() || isSubmitting}
                      className={cn(
                        "flex-1 px-4 py-2 rounded-12 transition-colors",
                        !connectorName.trim() || isSubmitting
                          ? "bg-background-neutral-01 text-text-02 cursor-not-allowed"
                          : "bg-black dark:bg-white hover:opacity-90"
                      )}
                    >
                      <Text
                        mainUiAction
                        className={
                          !connectorName.trim() || isSubmitting
                            ? "text-text-02"
                            : "text-text-light-05 dark:text-text-dark-05"
                        }
                      >
                        {isSubmitting ? "Submitting..." : "Submit Request"}
                      </Text>
                    </button>
                  )}
                </div>
              </>
            ) : (
              // Self-hosted: Show email link and Discord button
              <>
                <div className="flex flex-col gap-4 items-center">
                  <Text mainUiBody text04 className="text-center">
                    Email your request to{" "}
                    <a
                      href="mailto:hello@onyx.app?subject=Onyx Craft Connector Request"
                      className="text-blue-600 dark:text-blue-400 hover:underline"
                    >
                      hello@onyx.app
                    </a>
                  </Text>
                </div>

                <div className="flex items-center justify-center gap-3 pt-2 max-w-md w-full mx-auto">
                  <button
                    type="button"
                    onClick={handleClose}
                    className="flex-1 px-4 py-2 rounded-12 bg-background-neutral-01 border border-border-02 hover:opacity-90 transition-colors"
                  >
                    <Text mainUiBody text05>
                      Close
                    </Text>
                  </button>
                  <a
                    href={DISCORD_URL}
                    target="_blank"
                    rel="noopener noreferrer"
                    className="flex-1 px-4 py-2 rounded-12 bg-black dark:bg-white hover:opacity-90 transition-colors text-center"
                  >
                    <Text
                      mainUiAction
                      className="text-text-light-05 dark:text-text-dark-05"
                    >
                      Join Onyx Discord
                    </Text>
                  </a>
                </div>
              </>
            )}
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/components/UserLibraryModal.tsx
================================================
"use client";

import { useState, useCallback, useRef, useMemo } from "react";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import {
  fetchLibraryTree,
  uploadLibraryFiles,
  uploadLibraryZip,
  createLibraryDirectory,
  toggleLibraryFileSync,
  deleteLibraryFile,
} from "@/app/craft/services/apiServices";
import { LibraryEntry } from "@/app/craft/types/user-library";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import Modal from "@/refresh-components/Modal";
import ShadowDiv from "@/refresh-components/ShadowDiv";
import { Section } from "@/layouts/general-layouts";
import {
  SvgFolder,
  SvgFolderOpen,
  SvgChevronRight,
  SvgUploadCloud,
  SvgTrash,
  SvgFileText,
  SvgFolderPlus,
} from "@opal/icons";
import Switch from "@/refresh-components/inputs/Switch";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import IconButton from "@/refresh-components/buttons/IconButton";

/**
 * Build a hierarchical tree from a flat list of library entries.
 * Entries have paths like "user_library/test" or "user_library/test/file.pdf"
 */
function buildTreeFromFlatList(flatList: LibraryEntry[]): LibraryEntry[] {
  // Create a map of path -> entry (with children array initialized)
  const pathToEntry = new Map<string, LibraryEntry>();

  // First pass: create entries with empty children arrays
  for (const entry of flatList) {
    pathToEntry.set(entry.path, { ...entry, children: [] });
  }

  // Second pass: build parent-child relationships
  const rootEntries: LibraryEntry[] = [];

  for (const entry of flatList) {
    const entryWithChildren = pathToEntry.get(entry.path)!;

    // Find parent path by removing the last segment
    const pathParts = entry.path.split("/");
    pathParts.pop(); // Remove last segment (filename or folder name)
    const parentPath = pathParts.join("/");

    const parent = pathToEntry.get(parentPath);
    if (parent && parent.children) {
      parent.children.push(entryWithChildren);
    } else {
      // No parent found, this is a root-level entry
      rootEntries.push(entryWithChildren);
    }
  }

  return rootEntries;
}

interface UserLibraryModalProps {
  open: boolean;
  onClose: () => void;
  onChanges?: () => void; // Called when files are uploaded, deleted, or sync toggled
}

export default function UserLibraryModal({
  open,
  onClose,
  onChanges,
}: UserLibraryModalProps) {
  const [expandedPaths, setExpandedPaths] = useState<Set<string>>(new Set());
  const [isUploading, setIsUploading] = useState(false);
  const [uploadError, setUploadError] = useState<string | null>(null);
  const [entryToDelete, setEntryToDelete] = useState<LibraryEntry | null>(null);
  const [showNewFolderModal, setShowNewFolderModal] = useState(false);
  const [newFolderName, setNewFolderName] = useState("");
  const fileInputRef = useRef<HTMLInputElement>(null);
  const uploadTargetPathRef = useRef<string>("/");

  // Fetch library tree
  const {
    data: tree,
    error,
    isLoading,
    mutate,
  } = useSWR(open ? SWR_KEYS.buildUserLibraryTree : null, fetchLibraryTree, {
    revalidateOnFocus: false,
  });

  // Build hierarchical tree from flat list
  const hierarchicalTree = useMemo(() => {
    if (!tree) return [];
    return buildTreeFromFlatList(tree);
  }, [tree]);

  const toggleFolder = useCallback((path: string) => {
    setExpandedPaths((prev) => {
      const newSet = new Set(prev);
      if (newSet.has(path)) {
        newSet.delete(path);
      } else {
        newSet.add(path);
      }
      return newSet;
    });
  }, []);

  const handleFileUpload = useCallback(
    async (event: React.ChangeEvent<HTMLInputElement>) => {
      const files = event.target.files;
      if (!files || files.length === 0) return;

      setIsUploading(true);
      setUploadError(null);

      const targetPath = uploadTargetPathRef.current;

      try {
        const fileArray = Array.from(files);
        // Check if it's a single zip file
        const firstFile = fileArray[0];
        if (
          fileArray.length === 1 &&
          firstFile &&
          firstFile.name.endsWith(".zip")
        ) {
          await uploadLibraryZip(targetPath, firstFile);
        } else {
          await uploadLibraryFiles(targetPath, fileArray);
        }
        mutate();
        onChanges?.(); // Notify parent that changes were made
      } catch (err) {
        setUploadError(err instanceof Error ? err.message : "Upload failed");
      } finally {
        setIsUploading(false);
        uploadTargetPathRef.current = "/";
        // Reset input
        event.target.value = "";
      }
    },
    [mutate, onChanges]
  );

  const handleUploadToFolder = useCallback((folderPath: string) => {
    uploadTargetPathRef.current = folderPath;
    fileInputRef.current?.click();
  }, []);

  const handleToggleSync = useCallback(
    async (entry: LibraryEntry, enabled: boolean) => {
      try {
        await toggleLibraryFileSync(entry.id, enabled);
        mutate();
        onChanges?.(); // Notify parent that changes were made
      } catch (err) {
        console.error("Failed to toggle sync:", err);
      }
    },
    [mutate, onChanges]
  );

  const handleDeleteConfirm = useCallback(async () => {
    if (!entryToDelete) return;

    try {
      await deleteLibraryFile(entryToDelete.id);
      mutate();
      onChanges?.(); // Notify parent that changes were made
    } catch (err) {
      console.error("Failed to delete:", err);
    } finally {
      setEntryToDelete(null);
    }
  }, [entryToDelete, mutate, onChanges]);

  const handleCreateDirectory = useCallback(async () => {
    const name = newFolderName.trim();
    if (!name) return;

    try {
      await createLibraryDirectory({ name, parent_path: "/" });
      mutate();
    } catch (err) {
      console.error("Failed to create directory:", err);
      setUploadError(
        err instanceof Error ? err.message : "Failed to create folder"
      );
    } finally {
      setShowNewFolderModal(false);
      setNewFolderName("");
    }
  }, [mutate, newFolderName]);

  const formatFileSize = (bytes: number | null): string => {
    if (bytes === null) return "";
    if (bytes < 1024) return `${bytes} B`;
    if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
    return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
  };

  const fileCount = hierarchicalTree.length;

  return (
    <>
      <Modal open={open} onOpenChange={(isOpen) => !isOpen && onClose()}>
        <Modal.Content width="xl" height="fit">
          <Modal.Header
            icon={SvgFileText}
            title="Your Files"
            description="Upload files for your agent to read (Excel, Word, PowerPoint, etc.)"
            onClose={onClose}
          />
          <Modal.Body>
            <Section flexDirection="column" gap={1} alignItems="stretch">
              {/* Upload error */}
              {uploadError && (
                <Section
                  flexDirection="row"
                  alignItems="center"
                  justifyContent="start"
                  padding={0.5}
                  height="fit"
                >
                  <Text secondaryBody>{uploadError}</Text>
                </Section>
              )}

              {/* File explorer */}
              <Section flexDirection="column" alignItems="stretch">
                {/* Action buttons */}
                <Section
                  flexDirection="row"
                  justifyContent="end"
                  gap={0.5}
                  padding={0.5}
                >
                  <Button
                    prominence="secondary"
                    icon={SvgFolderPlus}
                    onClick={() => setShowNewFolderModal(true)}
                    tooltip="New Folder"
                  />
                  <input
                    ref={fileInputRef}
                    type="file"
                    multiple
                    style={{ display: "none" }}
                    onChange={handleFileUpload}
                    disabled={isUploading}
                    accept=".xlsx,.xls,.docx,.doc,.pptx,.ppt,.csv,.json,.txt,.pdf,.zip"
                  />
                  <Button
                    disabled={isUploading}
                    prominence="secondary"
                    icon={SvgUploadCloud}
                    onClick={() => handleUploadToFolder("/")}
                    tooltip={isUploading ? "Uploading..." : "Upload"}
                    aria-label={isUploading ? "Uploading..." : "Upload"}
                  />
                </Section>

                {isLoading ? (
                  <Section padding={2} height="fit">
                    <Text secondaryBody text03>
                      Loading files...
                    </Text>
                  </Section>
                ) : error ? (
                  <Section padding={2} height="fit">
                    <Text secondaryBody text03>
                      Failed to load files
                    </Text>
                  </Section>
                ) : fileCount === 0 ? (
                  <Section padding={2} height="fit" gap={0.5}>
                    <SvgFileText size={32} className="stroke-text-02" />
                    <Text secondaryBody text03>
                      No files uploaded yet
                    </Text>
                    <Text secondaryBody text02>
                      Upload Excel, Word, PowerPoint, or other files for your
                      agent to work with
                    </Text>
                  </Section>
                ) : (
                  <ShadowDiv style={{ maxHeight: "400px", padding: "0.5rem" }}>
                    <LibraryTreeView
                      entries={hierarchicalTree}
                      expandedPaths={expandedPaths}
                      onToggleFolder={toggleFolder}
                      onToggleSync={handleToggleSync}
                      onDelete={setEntryToDelete}
                      onUploadToFolder={handleUploadToFolder}
                      formatFileSize={formatFileSize}
                    />
                  </ShadowDiv>
                )}
              </Section>
            </Section>
          </Modal.Body>

          <Modal.Footer>
            <Button onClick={onClose}>Done</Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>

      {/* Delete confirmation modal */}
      {entryToDelete && (
        <ConfirmEntityModal
          danger
          entityType={entryToDelete.is_directory ? "folder" : "file"}
          entityName={entryToDelete.name}
          action="delete"
          actionButtonText="Delete"
          additionalDetails={
            entryToDelete.is_directory
              ? "This will delete the folder and all its contents."
              : "This file will be removed from your library."
          }
          onClose={() => setEntryToDelete(null)}
          onSubmit={handleDeleteConfirm}
        />
      )}

      {/* New folder modal */}
      <Modal
        open={showNewFolderModal}
        onOpenChange={(isOpen) => {
          if (!isOpen) {
            setShowNewFolderModal(false);
            setNewFolderName("");
          }
        }}
      >
        <Modal.Content width="sm" height="fit">
          <Modal.Header
            icon={SvgFolder}
            title="New Folder"
            onClose={() => {
              setShowNewFolderModal(false);
              setNewFolderName("");
            }}
          />
          <Modal.Body>
            <Section flexDirection="column" gap={0.5} alignItems="stretch">
              <Text secondaryBody text03>
                Folder name
              </Text>
              <InputTypeIn
                value={newFolderName}
                onChange={(e) => setNewFolderName(e.target.value)}
                placeholder="Enter folder name"
                onKeyDown={(e) => {
                  if (e.key === "Enter" && newFolderName.trim()) {
                    handleCreateDirectory();
                  }
                }}
                autoFocus
              />
            </Section>
          </Modal.Body>
          <Modal.Footer>
            <Button
              prominence="secondary"
              onClick={() => {
                setShowNewFolderModal(false);
                setNewFolderName("");
              }}
            >
              Cancel
            </Button>
            <Button
              disabled={!newFolderName.trim()}
              onClick={handleCreateDirectory}
            >
              Create
            </Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    </>
  );
}

interface LibraryTreeViewProps {
  entries: LibraryEntry[];
  expandedPaths: Set<string>;
  onToggleFolder: (path: string) => void;
  onToggleSync: (entry: LibraryEntry, enabled: boolean) => void;
  onDelete: (entry: LibraryEntry) => void;
  onUploadToFolder: (folderPath: string) => void;
  formatFileSize: (bytes: number | null) => string;
  depth?: number;
}

function LibraryTreeView({
  entries,
  expandedPaths,
  onToggleFolder,
  onToggleSync,
  onDelete,
  onUploadToFolder,
  formatFileSize,
  depth = 0,
}: LibraryTreeViewProps) {
  // Sort entries: directories first, then alphabetically
  const sortedEntries = [...entries].sort((a, b) => {
    if (a.is_directory && !b.is_directory) return -1;
    if (!a.is_directory && b.is_directory) return 1;
    return a.name.localeCompare(b.name);
  });

  return (
    <>
      {sortedEntries.map((entry) => {
        const isExpanded = expandedPaths.has(entry.path);

        return (
          <Section
            key={entry.id}
            flexDirection="column"
            alignItems="stretch"
            gap={0}
            height="fit"
          >
            <Section
              flexDirection="row"
              alignItems="center"
              justifyContent="start"
              gap={0.25}
              height="fit"
              padding={0.5}
            >
              {/* Indent spacer - inline style needed for dynamic depth */}
              {depth > 0 && (
                <span
                  aria-hidden
                  style={{
                    display: "inline-block",
                    width: `${depth * 1.25}rem`,
                    flexShrink: 0,
                  }}
                />
              )}

              {/* Expand/collapse for directories */}
              {entry.is_directory ? (
                // TODO(@raunakab): migrate to opal Button once it supports style prop
                <IconButton
                  icon={SvgChevronRight}
                  onClick={() => onToggleFolder(entry.path)}
                  small
                  tooltip={isExpanded ? "Collapse" : "Expand"}
                  style={{
                    transform: isExpanded ? "rotate(90deg)" : undefined,
                    transition: "transform 150ms ease",
                  }}
                />
              ) : (
                <Section width="fit" height="fit" gap={0} padding={0}>
                  <SvgChevronRight size={12} style={{ visibility: "hidden" }} />
                </Section>
              )}

              {/* Icon */}
              {entry.is_directory ? (
                isExpanded ? (
                  <SvgFolderOpen size={16} className="stroke-text-03" />
                ) : (
                  <SvgFolder size={16} className="stroke-text-03" />
                )
              ) : (
                <SvgFileText size={16} className="stroke-text-03" />
              )}

              {/* Name */}
              <Section
                flexDirection="row"
                alignItems="center"
                justifyContent="start"
                gap={0}
                height="fit"
              >
                <Text secondaryBody text04 className="truncate">
                  {entry.name}
                </Text>
              </Section>

              {/* File size */}
              {!entry.is_directory && entry.file_size !== null && (
                <Section width="fit" height="fit" gap={0} padding={0}>
                  <Text secondaryBody text02 style={{ whiteSpace: "nowrap" }}>
                    {formatFileSize(entry.file_size)}
                  </Text>
                </Section>
              )}

              {/* Actions */}
              <Section
                flexDirection="row"
                alignItems="center"
                justifyContent="end"
                gap={0.25}
                width="fit"
                height="fit"
              >
                {entry.is_directory && (
                  <Button
                    size="sm"
                    icon={SvgUploadCloud}
                    onClick={(e) => {
                      e.stopPropagation();
                      const uploadPath =
                        entry.path.replace(/^user_library/, "") || "/";
                      onUploadToFolder(uploadPath);
                    }}
                    tooltip="Upload to this folder"
                  />
                )}
                <Button
                  variant="danger"
                  size="sm"
                  icon={SvgTrash}
                  onClick={() => onDelete(entry)}
                  tooltip="Delete"
                />
              </Section>

              {/* Sync toggle */}
              <SimpleTooltip
                tooltip={
                  entry.sync_enabled
                    ? "Synced to sandbox - click to disable"
                    : "Not synced - click to enable"
                }
              >
                <Switch
                  checked={entry.sync_enabled}
                  onCheckedChange={(checked) => onToggleSync(entry, checked)}
                />
              </SimpleTooltip>
            </Section>

            {/* Children */}
            {entry.is_directory && isExpanded && entry.children && (
              <LibraryTreeView
                entries={entry.children}
                expandedPaths={expandedPaths}
                onToggleFolder={onToggleFolder}
                onToggleSync={onToggleSync}
                onDelete={onDelete}
                onUploadToFolder={onUploadToFolder}
                formatFileSize={formatFileSize}
                depth={depth + 1}
              />
            )}
          </Section>
        );
      })}
    </>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/page.tsx
================================================
"use client";

import { useState, useEffect, useCallback, useMemo } from "react";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { Section } from "@/layouts/general-layouts";
import * as InputLayouts from "@/layouts/input-layouts";
import {
  useBuildSessionStore,
  useIsPreProvisioning,
} from "@/app/craft/hooks/useBuildSessionStore";
import SandboxStatusIndicator from "@/app/craft/components/SandboxStatusIndicator";
import { useBuildLlmSelection } from "@/app/craft/hooks/useBuildLlmSelection";
import { useBuildConnectors } from "@/app/craft/hooks/useBuildConnectors";
import { BuildLLMPopover } from "@/app/craft/components/BuildLLMPopover";
import Text from "@/refresh-components/texts/Text";
import Card from "@/refresh-components/cards/Card";
import {
  SvgPlug,
  SvgSettings,
  SvgChevronDown,
  SvgInfoSmall,
} from "@opal/icons";
import { ValidSources } from "@/lib/types";
import ConnectorCard, {
  BuildConnectorConfig,
} from "@/app/craft/v1/configure/components/ConnectorCard";
import ConfigureConnectorModal from "@/app/craft/v1/configure/components/ConfigureConnectorModal";
import ComingSoonConnectors from "@/app/craft/v1/configure/components/ComingSoonConnectors";
import DemoDataConfirmModal from "@/app/craft/v1/configure/components/DemoDataConfirmModal";
import UserLibraryModal from "@/app/craft/v1/configure/components/UserLibraryModal";
import {
  ConnectorInfoOverlay,
  ReprovisionWarningOverlay,
} from "@/app/craft/v1/configure/components/ConfigureOverlays";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import { getSourceMetadata } from "@/lib/sources";
import { deleteConnector } from "@/app/craft/services/apiServices";
import { Button } from "@opal/components";
import {
  OAUTH_STATE_KEY,
  getDemoDataEnabled,
  setDemoDataCookie,
} from "@/app/craft/v1/constants";
import Separator from "@/refresh-components/Separator";
import Switch from "@/refresh-components/inputs/Switch";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import NotAllowedModal from "@/app/craft/onboarding/components/NotAllowedModal";
import { useOnboarding } from "@/app/craft/onboarding/BuildOnboardingProvider";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import { useUser } from "@/providers/UserProvider";
import { getProviderIcon } from "@/app/admin/configuration/llm/utils";
import {
  getBuildUserPersona,
  getPersonaInfo,
  getPositionText,
  DEMO_COMPANY_NAME,
  BuildLlmSelection,
  BUILD_MODE_PROVIDERS,
} from "@/app/craft/onboarding/constants";

// Build mode connectors
const BUILD_CONNECTORS: ValidSources[] = [
  ValidSources.GoogleDrive,
  ValidSources.Gmail,
  ValidSources.Notion,
  ValidSources.GitHub,
  ValidSources.Slack,
  ValidSources.Linear,
  ValidSources.Fireflies,
  ValidSources.Hubspot,
  ValidSources.Airtable,
  ValidSources.CraftFile, // User's uploaded files
];

interface SelectedConnectorState {
  type: ValidSources;
  config: BuildConnectorConfig | null;
}

/**
 * Build Admin Panel - Connector configuration page
 *
 * Renders in the center panel area (replacing ChatPanel + OutputPanel).
 * Uses SettingsLayouts like AgentEditorPage does.
 */
export default function BuildConfigPage() {
  const { isAdmin, isCurator } = useUser();
  const { llmProviders } = useLLMProviders();
  const { openPersonaEditor, openLlmSetup } = useOnboarding();
  const [selectedConnector, setSelectedConnector] =
    useState<SelectedConnectorState | null>(null);
  const [connectorToDelete, setConnectorToDelete] =
    useState<BuildConnectorConfig | null>(null);
  const [showNotAllowedModal, setShowNotAllowedModal] = useState(false);
  const [showDemoDataConfirmModal, setShowDemoDataConfirmModal] =
    useState(false);
  const [showUserLibraryModal, setShowUserLibraryModal] = useState(false);
  const [pendingDemoDataEnabled, setPendingDemoDataEnabled] = useState<
    boolean | null
  >(null);

  // Pending state for tracking unsaved changes
  const [pendingLlmSelection, setPendingLlmSelection] =
    useState<BuildLlmSelection | null>(null);
  const [pendingDemoData, setPendingDemoData] = useState<boolean | null>(null);
  const [userLibraryChanged, setUserLibraryChanged] = useState(false);
  const [isUpdating, setIsUpdating] = useState(false);

  // Track original values (set on mount and after Update)
  const [originalLlmSelection, setOriginalLlmSelection] =
    useState<BuildLlmSelection | null>(null);
  const [originalDemoData, setOriginalDemoData] = useState<boolean | null>(
    null
  );

  const isBasicUser = !isAdmin && !isCurator;
  const isPreProvisioning = useIsPreProvisioning();

  // Build mode LLM selection (cookie-based)
  const { selection: llmSelection, updateSelection: updateLlmSelection } =
    useBuildLlmSelection(llmProviders);

  // Read demo data from cookie (single source of truth)
  const [demoDataEnabled, setDemoDataEnabledLocal] = useState(() =>
    getDemoDataEnabled()
  );

  // Get store values
  const clearPreProvisionedSession = useBuildSessionStore(
    (state) => state.clearPreProvisionedSession
  );
  const ensurePreProvisionedSession = useBuildSessionStore(
    (state) => state.ensurePreProvisionedSession
  );

  // Initialize pending state from current values on mount
  useEffect(() => {
    if (llmSelection && pendingLlmSelection === null) {
      setPendingLlmSelection(llmSelection);
      setOriginalLlmSelection(llmSelection);
    }
  }, [llmSelection, pendingLlmSelection]);

  useEffect(() => {
    if (pendingDemoData === null) {
      setPendingDemoData(demoDataEnabled);
      setOriginalDemoData(demoDataEnabled);
    }
  }, [demoDataEnabled, pendingDemoData]);

  // Compute whether there are unsaved changes
  const hasChanges = useMemo(() => {
    const llmChanged =
      pendingLlmSelection !== null &&
      originalLlmSelection !== null &&
      (pendingLlmSelection.provider !== originalLlmSelection.provider ||
        pendingLlmSelection.modelName !== originalLlmSelection.modelName);

    const demoDataChanged =
      pendingDemoData !== null &&
      originalDemoData !== null &&
      pendingDemoData !== originalDemoData;

    return llmChanged || demoDataChanged || userLibraryChanged;
  }, [
    pendingLlmSelection,
    pendingDemoData,
    originalLlmSelection,
    originalDemoData,
    userLibraryChanged,
  ]);

  // Compute display name for the pending LLM selection
  const pendingLlmDisplayName = useMemo(() => {
    if (!pendingLlmSelection) return "Select model";

    // 1. Try to get display name from backend llmProviders
    if (llmProviders) {
      for (const provider of llmProviders) {
        const config = provider.model_configurations.find(
          (m) => m.name === pendingLlmSelection.modelName
        );
        if (config) {
          return config.display_name || config.name;
        }
      }
    }

    // 2. Fall back to BUILD_MODE_PROVIDERS labels (for unconfigured providers)
    for (const provider of BUILD_MODE_PROVIDERS) {
      const model = provider.models.find(
        (m) => m.name === pendingLlmSelection.modelName
      );
      if (model) {
        return model.label;
      }
    }

    // 3. Fall back to raw model name
    return pendingLlmSelection.modelName;
  }, [pendingLlmSelection, llmProviders]);

  // Handle LLM selection change - only update pending state
  const handleLlmSelectionChange = useCallback(
    (newSelection: BuildLlmSelection) => {
      setPendingLlmSelection(newSelection);
    },
    []
  );

  // Handle demo data toggle change - only update pending state (after confirmation)
  const handleDemoDataConfirm = useCallback(() => {
    if (pendingDemoDataEnabled !== null) {
      setPendingDemoData(pendingDemoDataEnabled);
    }
    setShowDemoDataConfirmModal(false);
    setPendingDemoDataEnabled(null);
  }, [pendingDemoDataEnabled]);

  // Restore changes - revert pending state to original values
  // Note: User Library changes cannot be reverted (files already uploaded/deleted/toggled)
  // so we just reset the flag - user needs to manually undo file changes if desired
  const handleRestoreChanges = useCallback(() => {
    setPendingLlmSelection(originalLlmSelection);
    setPendingDemoData(originalDemoData);
    setUserLibraryChanged(false);
  }, [originalLlmSelection, originalDemoData]);

  // Update - apply pending changes and re-provision sandbox
  const handleUpdate = useCallback(async () => {
    setIsUpdating(true);
    try {
      // 1. Apply cookies FIRST (synchronous) - these are the user's preferences
      // This ensures settings are persisted even if user navigates away during async operations
      if (pendingLlmSelection) {
        updateLlmSelection(pendingLlmSelection);
        setOriginalLlmSelection(pendingLlmSelection);
      }
      if (pendingDemoData !== null) {
        // Update cookie (single source of truth)
        setDemoDataCookie(pendingDemoData);
        // Update local state for UI reactivity
        setDemoDataEnabledLocal(pendingDemoData);
        setOriginalDemoData(pendingDemoData);
      }

      // 2. Clear pre-provisioned session (may wait if provisioning in progress)
      await clearPreProvisionedSession();

      // 3. Start provisioning a new session with updated settings
      ensurePreProvisionedSession();

      // 4. Reset User Library change flag (sandbox now has the updated files)
      setUserLibraryChanged(false);
    } catch (error) {
      console.error("Failed to update settings:", error);
    } finally {
      setIsUpdating(false);
    }
  }, [
    pendingLlmSelection,
    pendingDemoData,
    updateLlmSelection,
    clearPreProvisionedSession,
    ensurePreProvisionedSession,
  ]);

  // Read persona from cookies
  const existingPersona = getBuildUserPersona();
  const workAreaValue = existingPersona?.workArea;
  const levelValue = existingPersona?.level;

  // Get persona info from mapping
  // If workAreaValue and levelValue exist, personaInfo will always be defined
  // (all combinations are mapped in PERSONA_MAPPING)
  const personaInfo =
    workAreaValue && levelValue
      ? getPersonaInfo(workAreaValue, levelValue)
      : undefined;

  // Get persona name (split into first and last)
  const personaName = personaInfo?.name;
  const [firstName, ...lastNameParts] = personaName?.split(" ") || [];
  const lastName = lastNameParts.join(" ") || "";

  // Get position text using shared helper
  const positionText = workAreaValue
    ? getPositionText(workAreaValue, levelValue)
    : "Not set";

  const hasLlmProvider = (llmProviders?.length ?? 0) > 0;

  const { connectors, hasConnectorEverSucceeded, isLoading, mutate } =
    useBuildConnectors();

  // Check for OAuth return state on mount
  useEffect(() => {
    const savedState = sessionStorage.getItem(OAUTH_STATE_KEY);
    if (savedState) {
      try {
        const { connectorType, timestamp } = JSON.parse(savedState);
        // Only restore if < 10 minutes old
        if (Date.now() - timestamp < 600000) {
          setSelectedConnector({
            type: connectorType as ValidSources,
            config: null,
          });
        }
      } catch (e) {
        console.error("Failed to parse OAuth state:", e);
      }
      sessionStorage.removeItem(OAUTH_STATE_KEY);
    }
  }, []);

  // Merge configured status with all available build connectors
  const connectorStates = BUILD_CONNECTORS.map((type) => ({
    type,
    config: connectors.find((c) => c.source === type) || null,
  }));

  // Auto-enable demo data when no connectors have ever succeeded.
  // Guard against loading state to avoid a race condition: before the
  // connector fetch completes, hasConnectorEverSucceeded is false (empty
  // array fallback), which would incorrectly re-enable demo data.
  useEffect(() => {
    if (isLoading) return;
    if (!hasConnectorEverSucceeded && !demoDataEnabled) {
      // Update cookie (single source of truth)
      setDemoDataCookie(true);
      // Update local state for UI reactivity
      setDemoDataEnabledLocal(true);
      // Also sync pending state so UI stays consistent
      setPendingDemoData(true);
      setOriginalDemoData(true);
      // Clear and re-provision with new setting
      clearPreProvisionedSession().then(() => {
        ensurePreProvisionedSession();
      });
    }
  }, [
    isLoading,
    hasConnectorEverSucceeded,
    demoDataEnabled,
    clearPreProvisionedSession,
    ensurePreProvisionedSession,
  ]);

  const handleDeleteConfirm = async () => {
    if (!connectorToDelete) return;

    try {
      await deleteConnector(
        connectorToDelete.connector_id,
        connectorToDelete.credential_id
      );
      mutate();
    } catch (error) {
      console.error("Failed to delete connector:", error);
    } finally {
      setConnectorToDelete(null);
    }
  };

  return (
    <div className="relative w-full h-full">
      {/* Sandbox status indicator - positioned in top-left corner like ChatPanel */}
      <div className="absolute top-3 left-4 z-20">
        <SandboxStatusIndicator />
      </div>

      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={SvgPlug}
          title="Configure Onyx Craft"
          description="Select data sources and your default LLM"
          rightChildren={
            <div className="flex items-center gap-2">
              <Button
                disabled={!hasChanges || isUpdating}
                prominence="secondary"
                onClick={handleRestoreChanges}
              >
                Restore Changes
              </Button>
              <Button
                disabled={!hasChanges || isUpdating || isPreProvisioning}
                onClick={handleUpdate}
              >
                {isUpdating || isPreProvisioning ? "Updating..." : "Update"}
              </Button>
            </div>
          }
        />
        <SettingsLayouts.Body>
          {isLoading ? (
            <Card variant="tertiary">
              <Section alignItems="center" gap={0.5} height="fit">
                <Text mainContentBody>Loading...</Text>
              </Section>
            </Card>
          ) : (
            <Section flexDirection="column" gap={2}>
              <Section
                flexDirection="column"
                alignItems="start"
                gap={0.5}
                height="fit"
              >
                <Card>
                  <InputLayouts.Horizontal
                    title="Your Demo Persona"
                    description={
                      firstName && lastName && positionText
                        ? `${firstName} ${lastName}, ${positionText} at ${DEMO_COMPANY_NAME}`
                        : positionText
                          ? `${positionText} at ${DEMO_COMPANY_NAME}`
                          : "Not set"
                    }
                    center
                  >
                    <SimpleTooltip
                      tooltip={
                        !hasLlmProvider
                          ? "Configure an LLM provider first"
                          : undefined
                      }
                      disabled={hasLlmProvider}
                    >
                      <button
                        type="button"
                        onClick={() => openPersonaEditor()}
                        disabled={!hasLlmProvider}
                        className="p-2 rounded-08 text-text-03 hover:bg-background-tint-02 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
                      >
                        <SvgSettings className="w-5 h-5" />
                      </button>
                    </SimpleTooltip>
                  </InputLayouts.Horizontal>
                </Card>
                <Card
                  className={
                    isUpdating || isPreProvisioning ? "opacity-50" : ""
                  }
                  title={
                    isUpdating || isPreProvisioning
                      ? "Please wait while your session is being provisioned"
                      : undefined
                  }
                >
                  <div
                    className={`w-full ${
                      isUpdating || isPreProvisioning
                        ? "pointer-events-none"
                        : ""
                    }`}
                  >
                    <InputLayouts.Horizontal
                      title="Default LLM"
                      description="Select the language model to craft with"
                      center
                    >
                      <BuildLLMPopover
                        currentSelection={pendingLlmSelection}
                        onSelectionChange={handleLlmSelectionChange}
                        llmProviders={llmProviders}
                        onOpenOnboarding={(providerKey) =>
                          openLlmSetup(providerKey)
                        }
                        disabled={isUpdating || isPreProvisioning}
                      >
                        <button
                          type="button"
                          className="flex items-center gap-2 px-3 py-1.5 rounded-08 border border-border-01 bg-background-tint-00 hover:bg-background-tint-01 transition-colors"
                        >
                          {pendingLlmSelection?.provider &&
                            (() => {
                              const ProviderIcon = getProviderIcon(
                                pendingLlmSelection.provider
                              );
                              return <ProviderIcon className="w-4 h-4" />;
                            })()}
                          <Text mainUiAction>{pendingLlmDisplayName}</Text>
                          <SvgChevronDown className="w-4 h-4 text-text-03" />
                        </button>
                      </BuildLLMPopover>
                    </InputLayouts.Horizontal>
                  </div>
                </Card>
                <Separator />
                <div className="w-full flex items-center justify-between">
                  <div className="flex flex-col gap-0.25">
                    <Text mainContentEmphasis text04>
                      Connectors
                    </Text>
                    <Text secondaryBody text03>
                      Connect your own data sources
                    </Text>
                  </div>
                  <div className="w-fit flex-shrink-0">
                    <SimpleTooltip
                      tooltip={
                        isUpdating || isPreProvisioning
                          ? "Please wait while your session is being provisioned"
                          : !hasConnectorEverSucceeded
                            ? "Connect and sync a data source to disable demo data"
                            : undefined
                      }
                      disabled={
                        hasConnectorEverSucceeded &&
                        !isUpdating &&
                        !isPreProvisioning
                      }
                    >
                      <Card
                        padding={0.75}
                        className={
                          !hasConnectorEverSucceeded ||
                          isUpdating ||
                          isPreProvisioning
                            ? "opacity-50"
                            : ""
                        }
                      >
                        <div
                          className={`flex items-center gap-3 ${
                            !hasConnectorEverSucceeded ||
                            isUpdating ||
                            isPreProvisioning
                              ? "pointer-events-none"
                              : ""
                          }`}
                        >
                          <div className="flex items-center gap-2">
                            <SimpleTooltip tooltip="The demo dataset contains 1000 files across various connectors">
                              <span className="inline-flex items-center cursor-help">
                                <SvgInfoSmall
                                  size={16}
                                  className="text-text-03"
                                />
                              </span>
                            </SimpleTooltip>
                            <Text mainUiAction>Use Demo Dataset</Text>
                          </div>
                          <Switch
                            checked={pendingDemoData ?? demoDataEnabled}
                            disabled={
                              isUpdating ||
                              isPreProvisioning ||
                              !hasConnectorEverSucceeded
                            }
                            onCheckedChange={(newValue) => {
                              setPendingDemoDataEnabled(newValue);
                              setShowDemoDataConfirmModal(true);
                            }}
                          />
                        </div>
                      </Card>
                    </SimpleTooltip>
                  </div>
                </div>
                <div className="w-full grid grid-cols-1 md:grid-cols-2 gap-2 pt-2">
                  {connectorStates.map(({ type, config }) => {
                    const metadata = getSourceMetadata(type);
                    return (
                      <ConnectorCard
                        key={type}
                        connectorType={type}
                        config={config}
                        onConfigure={() => {
                          // Connectors marked as alwaysConnected open their custom modal
                          if (metadata.alwaysConnected) {
                            setShowUserLibraryModal(true);
                            return;
                          }
                          // Only open modal for unconfigured connectors
                          if (!config) {
                            if (isBasicUser) {
                              setShowNotAllowedModal(true);
                            } else {
                              setSelectedConnector({ type, config });
                            }
                          }
                        }}
                        onDelete={() => config && setConnectorToDelete(config)}
                      />
                    );
                  })}
                </div>
                <ComingSoonConnectors />
              </Section>
            </Section>
          )}

          {/* Sticky overlay for reprovision warning */}
          <div className="sticky z-toast bottom-10 w-fit mx-auto">
            <ReprovisionWarningOverlay
              visible={hasChanges && !isLoading}
              onUpdate={handleUpdate}
              isUpdating={isUpdating || isPreProvisioning}
            />
          </div>

          {/* Fixed overlay for connector info - centered on screen like the modal */}
          <ConnectorInfoOverlay visible={!!selectedConnector} />
        </SettingsLayouts.Body>

        <ConfigureConnectorModal
          connectorType={selectedConnector?.type || null}
          existingConfig={selectedConnector?.config || null}
          open={!!selectedConnector}
          onClose={() => setSelectedConnector(null)}
          onSuccess={() => {
            setSelectedConnector(null);
            mutate();
          }}
        />

        {connectorToDelete && (
          <ConfirmEntityModal
            danger
            entityType="connector"
            entityName={
              getSourceMetadata(connectorToDelete.source as ValidSources)
                .displayName
            }
            action="disconnect"
            actionButtonText="Disconnect"
            additionalDetails="This will remove access to this data source. You can reconnect it later."
            onClose={() => setConnectorToDelete(null)}
            onSubmit={handleDeleteConfirm}
          />
        )}

        <NotAllowedModal
          open={showNotAllowedModal}
          onClose={() => setShowNotAllowedModal(false)}
        />

        <DemoDataConfirmModal
          open={showDemoDataConfirmModal}
          onClose={() => {
            setShowDemoDataConfirmModal(false);
            setPendingDemoDataEnabled(null);
          }}
          pendingDemoDataEnabled={pendingDemoDataEnabled}
          onConfirm={handleDemoDataConfirm}
        />

        <UserLibraryModal
          open={showUserLibraryModal}
          onClose={() => setShowUserLibraryModal(false)}
          onChanges={() => setUserLibraryChanged(true)}
        />
      </SettingsLayouts.Root>
    </div>
  );
}


================================================
FILE: web/src/app/craft/v1/configure/utils/createBuildConnector.ts
================================================
import { ValidSources, ProcessingMode } from "@/lib/types";
import { Credential } from "@/lib/connectors/credentials";
import { createConnector } from "@/lib/connector";
import { linkCredential } from "@/lib/credential";
import { connectorConfigs, isLoadState } from "@/lib/connectors/connectors";

export interface CreateBuildConnectorParams {
  connectorType: ValidSources;
  credential: Credential<any>;
  connectorSpecificConfig?: Record<string, any>;
  connectorName?: string;
  userEmail?: string;
}

export interface CreateBuildConnectorResult {
  success: boolean;
  error?: string;
  connectorId?: number;
}

function getUserIdentifier(email?: string): string {
  if (!email) return "";
  // Extract the part before @ and sanitize it
  const prefix = email.split("@")[0] || email;
  // Replace any non-alphanumeric characters with dashes
  return `-${prefix.replace(/[^a-zA-Z0-9]/g, "-")}`;
}

export async function createBuildConnector({
  connectorType,
  credential,
  connectorSpecificConfig = {},
  connectorName,
  userEmail,
}: CreateBuildConnectorParams): Promise<CreateBuildConnectorResult> {
  const config =
    connectorConfigs[connectorType as keyof typeof connectorConfigs];
  const userIdentifier = getUserIdentifier(userEmail);
  const name = connectorName || `build-mode-${connectorType}${userIdentifier}`;

  const filteredConfig: Record<string, any> = {};
  Object.entries(connectorSpecificConfig).forEach(([key, value]) => {
    if (value !== "" && value !== null && value !== undefined) {
      if (Array.isArray(value) && value.length === 0) {
        return;
      }
      filteredConfig[key] = value;
    }
  });

  try {
    const [connectorError, connector] = await createConnector({
      name,
      source: connectorType,
      input_type: isLoadState(connectorType) ? "load_state" : "poll",
      connector_specific_config: filteredConfig,
      refresh_freq: config?.overrideDefaultFreq || 1800,
      prune_freq: 2592000,
      indexing_start: null,
      access_type: "private",
      groups: [],
    });

    if (connectorError || !connector) {
      return {
        success: false,
        error: connectorError || "Failed to create connector",
      };
    }

    const linkResponse = await linkCredential(
      connector.id,
      credential.id,
      name,
      "private",
      [],
      undefined,
      "FILE_SYSTEM"
    );

    if (!linkResponse.ok) {
      const linkError = await linkResponse.json();
      return {
        success: false,
        error: linkError.detail || "Failed to link credential",
      };
    }

    return {
      success: true,
      connectorId: connector.id,
    };
  } catch (err) {
    return {
      success: false,
      error: err instanceof Error ? err.message : "Failed to create connector",
    };
  }
}


================================================
FILE: web/src/app/craft/v1/constants.ts
================================================
import Cookies from "js-cookie";

export const CRAFT_PATH = "/craft/v1";
export const CRAFT_CONFIGURE_PATH = `${CRAFT_PATH}/configure`;
export const CRAFT_OAUTH_COOKIE_NAME = "build_mode_oauth";
export const OAUTH_STATE_KEY = "build_oauth_state";
export const CRAFT_DEMO_DATA_COOKIE_NAME = "build_demo_data_enabled";
export const ONYX_CRAFT_CALENDAR_URL = "https://cal.com/team/onyx/onyx-craft";

/**
 * Read demo data enabled setting from cookie.
 * This is the single source of truth for the demo data setting.
 * Defaults to true if cookie doesn't exist or is invalid.
 */
export function getDemoDataEnabled(): boolean {
  if (typeof window === "undefined") return true; // SSR fallback
  const cookieValue = Cookies.get(CRAFT_DEMO_DATA_COOKIE_NAME);
  if (cookieValue === "false") return false;
  return true; // Default to true
}

/**
 * Write demo data enabled setting to cookie.
 */
export function setDemoDataCookie(enabled: boolean): void {
  Cookies.set(CRAFT_DEMO_DATA_COOKIE_NAME, String(enabled), {
    path: "/",
    expires: 365, // 1 year
  });
}


================================================
FILE: web/src/app/craft/v1/layout.tsx
================================================
"use client";

import { BuildProvider } from "@/app/craft/contexts/BuildContext";
import { UploadFilesProvider } from "@/app/craft/contexts/UploadFilesContext";
import { BuildOnboardingProvider } from "@/app/craft/onboarding/BuildOnboardingProvider";
import BuildSidebar from "@/app/craft/components/SideBar";

/**
 * Build V1 Layout - Skeleton pattern with 3-panel layout
 *
 * Wraps with BuildProvider and UploadFilesProvider (for file uploads).
 * Includes BuildSidebar on the left.
 * Pre-provisioning is handled by useBuildSessionController.
 * The page component provides the center (chat) and right (output) panels.
 */
export default function Layout({ children }: { children: React.ReactNode }) {
  return (
    <UploadFilesProvider>
      <BuildProvider>
        <BuildOnboardingProvider>
          <div className="flex flex-row w-full h-full">
            <BuildSidebar />
            {children}
          </div>
        </BuildOnboardingProvider>
      </BuildProvider>
    </UploadFilesProvider>
  );
}


================================================
FILE: web/src/app/craft/v1/page.tsx
================================================
"use client";

import { useSearchParams } from "next/navigation";
import { useBuildSessionController } from "@/app/craft/hooks/useBuildSessionController";
import {
  useOutputPanelOpen,
  useToggleOutputPanel,
} from "@/app/craft/hooks/useBuildSessionStore";
import { getSessionIdFromSearchParams } from "@/app/craft/services/searchParams";
import BuildChatPanel from "@/app/craft/components/ChatPanel";
import BuildOutputPanel from "@/app/craft/components/OutputPanel";

/**
 * Build V1 Page - Entry point for builds
 *
 * URL: /craft/v1 (new build)
 * URL: /craft/v1?sessionId=xxx (existing session)
 *
 * Renders the 2-panel layout (chat + output) and handles session controller setup.
 */
export default function BuildV1Page() {
  const searchParams = useSearchParams();
  const sessionId = getSessionIdFromSearchParams(searchParams);

  const outputPanelOpen = useOutputPanelOpen();
  const toggleOutputPanel = useToggleOutputPanel();
  useBuildSessionController({ existingSessionId: sessionId });

  return (
    <div className="relative flex-1 h-full overflow-hidden">
      {/* Chat panel - always full width for background */}
      <BuildChatPanel existingSessionId={sessionId} />

      {/* Output panel - floats over as a card */}
      <BuildOutputPanel onClose={toggleOutputPanel} isOpen={outputPanelOpen} />
    </div>
  );
}


================================================
FILE: web/src/app/css/attachment-button.css
================================================
/* AttachmentButton styles */

.attachment-button {
  display: flex;
  flex-direction: row;
  width: 100%;
  padding: 0.25rem;
  background-color: var(--background-tint-00);
  border-radius: var(--border-radius-12);
  gap: 0.5rem;
}

.attachment-button:hover {
  background-color: var(--background-tint-02);
}

.attachment-button[data-state="selected"] {
  background-color: var(--action-link-01);
}

.attachment-button__content {
  flex: 1;
  display: flex;
  flex-direction: row;
  gap: 0.5rem;
  min-width: 0;
}

.attachment-button__icon-wrapper {
  height: 100%;
  aspect-ratio: 1;
  background-color: var(--background-tint-01);
  border-radius: var(--border-radius-08);
  display: flex;
  flex-direction: column;
  align-items: center;
  justify-content: center;
  flex-shrink: 0;
}

.attachment-button__icon {
  height: 1rem;
  width: 1rem;
  stroke: var(--text-02);
}

.attachment-button[data-state="processing"] .attachment-button__icon {
  stroke: var(--text-01);
}

.attachment-button__text-container {
  display: flex;
  flex-direction: column;
  align-items: flex-start;
  justify-content: center;
  min-width: 0;
  flex: 1;
}

.attachment-button__title-row {
  display: flex;
  flex-direction: row;
  align-items: center;
  gap: 0.5rem;
  width: 100%;
  min-width: 0;
}

.attachment-button__title-wrapper {
  max-width: 70%;
  min-width: 0;
  flex-shrink: 1;
  overflow: hidden;
}

.attachment-button__view-button {
  flex-shrink: 0;
  visibility: hidden;
}

.attachment-button:hover .attachment-button__view-button {
  visibility: visible;
}

.attachment-button__actions {
  display: flex;
  flex-direction: row;
  align-self: stretch;
  justify-content: flex-end;
  align-items: center;
  gap: 0.5rem;
  padding: 0.25rem;
  flex-shrink: 0;
}

.attachment-button__action-button {
  visibility: hidden;
}

.attachment-button:hover .attachment-button__action-button {
  visibility: visible;
}


================================================
FILE: web/src/app/css/button.css
================================================
/* ============================================================================
   Main Variant - Primary
   ============================================================================ */

.button-main-primary {
  background-color: var(--theme-primary-05);
}
.button-main-primary:hover {
  background-color: var(--theme-primary-04);
}
.button-main-primary[data-state="transient"] {
  background-color: var(--theme-primary-06);
}
.button-main-primary:active {
  background-color: var(--theme-primary-06);
}
.button-main-primary:disabled {
  background-color: var(--background-neutral-04);
}

.button-main-primary-text {
  color: var(--text-inverted-05) !important;
}
.button-main-primary:disabled .button-main-primary-text {
  color: var(--text-inverted-04) !important;
}

.button-main-primary-icon {
  stroke: var(--text-inverted-05);
}
.button-main-primary:disabled .button-main-primary-icon {
  stroke: var(--text-inverted-04);
}

/* ============================================================================
   Main Variant - Secondary
   ============================================================================ */

.button-main-secondary {
  background-color: var(--background-tint-01);
  border: 1px solid var(--border-01);
}
.button-main-secondary:hover {
  background-color: var(--background-tint-02);
}
.button-main-secondary[data-state="transient"] {
  background-color: var(--background-tint-00);
}
.button-main-secondary:active {
  background-color: var(--background-tint-00);
}
.button-main-secondary:disabled {
  background-color: var(--background-neutral-03);
  border: 1px solid var(--border-01);
}

.button-main-secondary-text {
  color: var(--text-03) !important;
}
.button-main-secondary:hover .button-main-secondary-text {
  color: var(--text-04) !important;
}
.button-main-secondary[data-state="transient"] .button-main-secondary-text {
  color: var(--text-05) !important;
}
.button-main-secondary:active .button-main-secondary-text {
  color: var(--text-05) !important;
}
.button-main-secondary:disabled .button-main-secondary-text {
  color: var(--text-01) !important;
}

.button-main-secondary-icon {
  stroke: var(--text-03);
}
.button-main-secondary:hover .button-main-secondary-icon {
  stroke: var(--text-04);
}
.button-main-secondary[data-state="transient"] .button-main-secondary-icon {
  stroke: var(--text-05);
}
.button-main-secondary:active .button-main-secondary-icon {
  stroke: var(--text-05);
}
.button-main-secondary:disabled .button-main-secondary-icon {
  stroke: var(--text-01);
}

/* ============================================================================
   Main Variant - Tertiary
   ============================================================================ */

.button-main-tertiary {
  background-color: transparent;
}
.button-main-tertiary:hover {
  background-color: var(--background-tint-02);
}
.button-main-tertiary[data-state="transient"] {
  background-color: var(--background-tint-00);
}
.button-main-tertiary:active {
  background-color: var(--background-tint-00);
}
.button-main-tertiary:disabled {
  background-color: transparent;
}

.button-main-tertiary-text {
  color: var(--text-03) !important;
}
.button-main-tertiary:hover .button-main-tertiary-text {
  color: var(--text-04) !important;
}
.button-main-tertiary[data-state="transient"] .button-main-tertiary-text {
  color: var(--text-05) !important;
}
.button-main-tertiary:active .button-main-tertiary-text {
  color: var(--text-05) !important;
}
.button-main-tertiary:disabled .button-main-tertiary-text {
  color: var(--text-01) !important;
}

.button-main-tertiary-icon {
  stroke: var(--text-03);
}
.button-main-tertiary:hover .button-main-tertiary-icon {
  stroke: var(--text-04);
}
.button-main-tertiary[data-state="transient"] .button-main-tertiary-icon {
  stroke: var(--text-05);
}
.button-main-tertiary:active .button-main-tertiary-icon {
  stroke: var(--text-05);
}
.button-main-tertiary:disabled .button-main-tertiary-icon {
  stroke: var(--text-01);
}

/* ============================================================================
   Main Variant - Internal
   ============================================================================ */

.button-main-internal {
  background-color: transparent;
}
.button-main-internal:hover {
  background-color: var(--background-tint-02);
}
.button-main-internal[data-state="transient"] {
  background-color: var(--background-tint-00);
}
.button-main-internal:active {
  background-color: var(--background-tint-00);
}
.button-main-internal:disabled {
  background-color: transparent;
}

.button-main-internal-text {
  color: var(--text-03) !important;
}
.button-main-internal:hover .button-main-internal-text {
  color: var(--text-04) !important;
}
.button-main-internal[data-state="transient"] .button-main-internal-text {
  color: var(--text-05) !important;
}
.button-main-internal:active .button-main-internal-text {
  color: var(--text-05) !important;
}
.button-main-internal:disabled .button-main-internal-text {
  color: var(--text-01) !important;
}

.button-main-internal-icon {
  stroke: var(--text-03);
}
.button-main-internal:hover .button-main-internal-icon {
  stroke: var(--text-04);
}
.button-main-internal[data-state="transient"] .button-main-internal-icon {
  stroke: var(--text-05);
}
.button-main-internal:active .button-main-internal-icon {
  stroke: var(--text-05);
}
.button-main-internal:disabled .button-main-internal-icon {
  stroke: var(--text-01);
}

/* ============================================================================
   Action Variant - Primary
   ============================================================================ */

.button-action-primary {
  background-color: var(--action-link-05);
}
.button-action-primary:hover {
  background-color: var(--action-link-04);
}
.button-action-primary[data-state="transient"] {
  background-color: var(--action-link-06);
}
.button-action-primary:active {
  background-color: var(--action-link-06);
}
.button-action-primary:disabled {
  background-color: var(--action-link-02);
}

.button-action-primary-text {
  color: var(--text-light-05) !important;
}
.button-action-primary:disabled .button-action-primary-text {
  color: var(--text-01) !important;
}

.button-action-primary-icon {
  stroke: var(--text-light-05);
}
.button-action-primary:disabled .button-action-primary-icon {
  stroke: var(--text-01);
}

/* ============================================================================
   Action Variant - Secondary
   ============================================================================ */

.button-action-secondary {
  background-color: var(--background-tint-01);
  border: 1px solid var(--border-01);
}
.button-action-secondary:hover {
  background-color: var(--background-tint-02);
}
.button-action-secondary[data-state="transient"] {
  background-color: var(--background-tint-00);
}
.button-action-secondary:active {
  background-color: var(--background-tint-00);
}
.button-action-secondary:disabled {
  background-color: var(--background-neutral-02);
  border: 1px solid var(--border-01);
}

.button-action-secondary-text {
  color: var(--action-text-link-05) !important;
}
.button-action-secondary:disabled .button-action-secondary-text {
  color: var(--action-link-03) !important;
}

.button-action-secondary-icon {
  stroke: var(--action-text-link-05);
}
.button-action-secondary:disabled .button-action-secondary-icon {
  stroke: var(--action-link-03);
}

/* ============================================================================
   Action Variant - Tertiary
   ============================================================================ */

.button-action-tertiary {
  background-color: transparent;
}
.button-action-tertiary:hover {
  background-color: var(--background-tint-02);
}
.button-action-tertiary[data-state="transient"] {
  background-color: var(--background-tint-00);
}
.button-action-tertiary:active {
  background-color: var(--background-tint-00);
}
.button-action-tertiary:disabled {
  background-color: transparent;
}

.button-action-tertiary-text {
  color: var(--action-text-link-05) !important;
}
.button-action-tertiary:disabled .button-action-tertiary-text {
  color: var(--action-link-03) !important;
}

.button-action-tertiary-icon {
  stroke: var(--action-text-link-05);
}
.button-action-tertiary:disabled .button-action-tertiary-icon {
  stroke: var(--action-link-03);
}

/* ============================================================================
   Danger Variant - Primary
   ============================================================================ */

.button-danger-primary {
  background-color: var(--action-danger-05);
}
.button-danger-primary:hover {
  background-color: var(--action-danger-04);
}
.button-danger-primary[data-state="transient"] {
  background-color: var(--action-danger-06);
}
.button-danger-primary:active {
  background-color: var(--action-danger-06);
}
.button-danger-primary:disabled {
  background-color: var(--action-danger-02);
}

.button-danger-primary-text {
  color: var(--text-light-05) !important;
}
.button-danger-primary:disabled .button-danger-primary-text {
  color: var(--text-01) !important;
}

.button-danger-primary-icon {
  stroke: var(--text-light-05);
}
.button-danger-primary:disabled .button-danger-primary-icon {
  stroke: var(--text-01);
}

/* ============================================================================
   Danger Variant - Secondary
   ============================================================================ */

.button-danger-secondary {
  background-color: var(--background-tint-01);
  border: 1px solid var(--border-01);
}
.button-danger-secondary:hover {
  background-color: var(--background-tint-02);
}
.button-danger-secondary[data-state="transient"] {
  background-color: var(--background-tint-00);
}
.button-danger-secondary:active {
  background-color: var(--background-tint-00);
}
.button-danger-secondary:disabled {
  background-color: var(--background-neutral-02);
  border: 1px solid var(--border-01);
}

.button-danger-secondary-text {
  color: var(--action-text-danger-05) !important;
}
.button-danger-secondary:disabled .button-danger-secondary-text {
  color: var(--action-danger-03) !important;
}

.button-danger-secondary-icon {
  stroke: var(--action-text-danger-05);
}
.button-danger-secondary:disabled .button-danger-secondary-icon {
  stroke: var(--action-danger-03);
}

/* ============================================================================
   Danger Variant - Tertiary
   ============================================================================ */

.button-danger-tertiary {
  background-color: transparent;
}
.button-danger-tertiary:hover {
  background-color: var(--background-tint-02);
}
.button-danger-tertiary[data-state="transient"] {
  background-color: var(--background-tint-00);
}
.button-danger-tertiary:active {
  background-color: var(--background-tint-00);
}
.button-danger-tertiary:disabled {
  background-color: transparent;
}

.button-danger-tertiary-text {
  color: var(--action-text-danger-05) !important;
}
.button-danger-tertiary:disabled .button-danger-tertiary-text {
  color: var(--action-danger-03) !important;
}

.button-danger-tertiary-icon {
  stroke: var(--action-text-danger-05);
}
.button-danger-tertiary:disabled .button-danger-tertiary-icon {
  stroke: var(--action-danger-03);
}


================================================
FILE: web/src/app/css/card.css
================================================
.card {
  @apply rounded-16 w-full overflow-clip;
}

.card[data-variant="primary"] {
  @apply bg-background-tint-00 border;
}

.card[data-variant="secondary"] {
  @apply bg-transparent border;
}

.card[data-variant="tertiary"] {
  @apply bg-transparent border border-dashed;
}

.card[data-variant="disabled"] {
  @apply cursor-not-allowed bg-background-tint-00 border opacity-50;
}

.card[data-variant="borderless"] {
  @apply bg-background-tint-00;
}


================================================
FILE: web/src/app/css/code.css
================================================
.code-block {
  display: block;
  padding: 0.5rem;
  background-color: var(--background-tint-00);
  border: 1px solid var(--border-01);
  border-radius: var(--border-radius-12);
  word-break: break-all;
  font-family: var(--font-mono);
  font-size: 0.75rem;
  line-height: 1rem;
  color: var(--text-03);
}

.code-copy-button {
  position: absolute;
  top: 0.5rem;
  right: 0.5rem;
  opacity: 0;
  transition: opacity 150ms ease-in-out;
}

.code-wrapper:hover .code-copy-button {
  opacity: 1;
}


================================================
FILE: web/src/app/css/color-swatch.css
================================================
.color-swatch {
  /* Base styles */
  display: inline-flex;
  align-items: center;
  justify-content: center;
  border: 1.5px solid var(--border-01);
  border-radius: var(--border-radius-08);
  padding: 0.12rem 0.25rem;
  background-color: var(--background-tint-light-01);
  gap: 0.1rem;
}

.color-swatch[data-state="dark"] {
  background-color: var(--background-tint-dark-01);
}

.color-swatch__text {
  font-size: 0.5rem;
  font-weight: 500;
  color: var(--text-dark-05);
}

.color-swatch[data-state="dark"] .color-swatch__text {
  color: var(--text-light-05);
}


================================================
FILE: web/src/app/css/colors.css
================================================
/* Base Colors */
:root {
  /* Grey Scale */
  --grey-100: #000000;
  --grey-98: #050505;
  --grey-96: #0a0a0a;
  --grey-94: #0f0f0f;
  --grey-92: #141414;
  --grey-90: #1a1a1a;
  --grey-85: #262626;
  --grey-80: #333333;
  --grey-75: #404040;
  --grey-70: #4d4d4d;
  --grey-60: #555555;
  --grey-50: #808080;
  --grey-40: #a4a4a4;
  --grey-30: #b2b2b2;
  --grey-20: #cccccc;
  --grey-10: #e6e6e6;
  --grey-08: #ebebeb;
  --grey-06: #f0f0f0;
  --grey-04: #f5f5f5;
  --grey-02: #fafafa;
  --grey-00: #ffffff;

  /* Alpha Grey 100 (Black with opacity) */
  --alpha-grey-100-95: #000000f2;
  --alpha-grey-100-90: #000000e5;
  --alpha-grey-100-85: #000000d9;
  --alpha-grey-100-80: #000000cc;
  --alpha-grey-100-75: #000000bf;
  --alpha-grey-100-70: #000000b2;
  --alpha-grey-100-65: #000000a6;
  --alpha-grey-100-60: #00000099;
  --alpha-grey-100-55: #0000008c;
  --alpha-grey-100-50: #00000080;
  --alpha-grey-100-45: #00000073;
  --alpha-grey-100-40: #00000066;
  --alpha-grey-100-35: #00000059;
  --alpha-grey-100-30: #0000004d;
  --alpha-grey-100-25: #00000040;
  --alpha-grey-100-20: #00000033;
  --alpha-grey-100-15: #00000026;
  --alpha-grey-100-10: #0000001a;
  --alpha-grey-100-05: #0000000d;
  --alpha-grey-100-00: #00000000;

  /* Alpha Grey 00 (White with opacity) */
  --alpha-grey-00-95: #fffffff2;
  --alpha-grey-00-90: #ffffffe5;
  --alpha-grey-00-85: #ffffffd9;
  --alpha-grey-00-80: #ffffffcc;
  --alpha-grey-00-75: #ffffffbf;
  --alpha-grey-00-70: #ffffffb2;
  --alpha-grey-00-65: #ffffffa6;
  --alpha-grey-00-60: #ffffff99;
  --alpha-grey-00-55: #ffffff8c;
  --alpha-grey-00-50: #ffffff80;
  --alpha-grey-00-45: #ffffff73;
  --alpha-grey-00-40: #ffffff66;
  --alpha-grey-00-35: #ffffff59;
  --alpha-grey-00-30: #ffffff4d;
  --alpha-grey-00-25: #ffffff40;
  --alpha-grey-00-20: #ffffff33;
  --alpha-grey-00-15: #ffffff26;
  --alpha-grey-00-10: #ffffff1a;
  --alpha-grey-00-05: #ffffff0d;
  --alpha-grey-00-00: #ffffff00;

  /* Blue Scale */
  --blue-95: #040e25;
  --blue-90: #091938;
  --blue-85: #11254e;
  --blue-80: #173268;
  --blue-60: #3363c3;
  --blue-50: #286df8;
  --blue-45: #397bff;
  --blue-40: #508afb;
  --blue-20: #9bbeff;
  --blue-10: #cddfff;
  --blue-05: #e7effc;
  --blue-01: #f8fafe;

  /* Green Scale */
  --green-95: #001503;
  --green-90: #002207;
  --green-85: #00320d;
  --green-80: #004214;
  --green-60: #008933;
  --green-50: #00a43f;
  --green-40: #2eaa4d;
  --green-20: #91d099;
  --green-10: #c9e8cc;
  --green-05: #e6f2e7;
  --green-01: #f8fbf8;

  /* Red Scale */
  --red-95: #210504;
  --red-90: #330b09;
  --red-85: #481310;
  --red-80: #5f1a16;
  --red-60: #b02b27;
  --red-50: #dc2626;
  --red-45: #f23a36;
  --red-40: #e8594e;
  --red-20: #f8a59b;
  --red-10: #fed2cc;
  --red-05: #fceae7;
  --red-01: #fef7f6;

  /* Orange Scale */
  --orange-95: #200600;
  --orange-90: #320d01;
  --orange-85: #471602;
  --orange-80: #5d1e01;
  --orange-60: #b44105;
  --orange-55: #ce4b05;
  --orange-50: #ec5b13;
  --orange-40: #e1642f;
  --orange-20: #f5a88b;
  --orange-10: #fcd4c5;
  --orange-05: #fbeae4;
  --orange-01: #fef9f7;

  /* Purple Scale */
  --purple-95: #140921;
  --purple-90: #211132;
  --purple-85: #301b47;
  --purple-80: #41255e;
  --purple-60: #7e4bb2;
  --purple-50: #9948e3;
  --purple-45: #a361e6;
  --purple-40: #a96fe8;
  --purple-20: #ccaef2;
  --purple-10: #e5d6fa;
  --purple-05: #f1ebfa;
  --purple-01: #f9f7fd;

  /* Neon Scale
     Base vars (--neon-X) are the /40 level.
     Alpha variants use -aXX suffix (e.g. -a60 = 40 at 60% opacity).
     Numeric suffixes are Figma scale levels (e.g. -50 = Neon/X/50). */
  --neon-yellow-90: #5a581d;
  --neon-yellow-80: #979430;
  --neon-yellow-50: #ece600;
  --neon-yellow: #fef800;
  --neon-yellow-a60: #fef80099;
  --neon-yellow-a30: #fef8004d;
  --neon-yellow-20: #fcfa8f;
  --neon-yellow-05: #f9faeb;

  --neon-amber-90: #625025;
  --neon-amber-80: #a68018;
  --neon-amber-60: #d9a500;
  --neon-amber-50: #ecb400;
  --neon-amber: #ffc733;
  --neon-amber-a60: #ffc73399;
  --neon-amber-a30: #ffc7334d;
  --neon-amber-20: #ffd985;
  --neon-amber-05: #fef8ea;

  --neon-sky-90: #204f67;
  --neon-sky-80: #3989b3;
  --neon-sky-50: #1ebcff;
  --neon-sky: #4dc3ff;
  --neon-sky-a60: #4dc3ff99;
  --neon-sky-a30: #4dc3ff4d;
  --neon-sky-20: #93d8ff;
  --neon-sky-05: #f2faff;

  --neon-cyan-90: #1a5e5d;
  --neon-cyan-80: #009a99;
  --neon-cyan-50: #00ebea;
  --neon-cyan: #00f9f9;
  --neon-cyan-a60: #00f9f999;
  --neon-cyan-a30: #00f9f94d;
  --neon-cyan-20: #62fefd;
  --neon-cyan-05: #eafdfc;

  --neon-lime-90: #3f5b39;
  --neon-lime-80: #639e56;
  --neon-lime-60: #53cd32;
  --neon-lime: #6dff46;
  --neon-lime-a60: #6dff4699;
  --neon-lime-a30: #6dff464d;
  --neon-lime-20: #a8ff94;
  --neon-lime-05: #f2fcf0;

  --neon-magenta-90: #654666;
  --neon-magenta-80: #ab6bac;
  --neon-magenta-50: #f198f2;
  --neon-magenta: #fea1ff;
  --neon-magenta-a60: #fea1ff99;
  --neon-magenta-a30: #fea1ff4d;
  --neon-magenta-20: #fec4fe;
  --neon-magenta-05: #fff5ff;

  /* Stone Scale */
  --stone-98: #0b0b0f;
  --stone-95: #19191e;
  --stone-90: #26262b;
  --stone-85: #323239;
  --stone-80: #3f3f46;
  --stone-60: #54545d;
  --stone-50: #7c7c83;
  --stone-40: #a4a4ab;
  --stone-20: #cccccf;
  --stone-10: #e6e6e9;
  --stone-05: #f0f0f1;
  --stone-02: #fafafa;

  /* Chalk Scale */
  --chalk-98: #150702;
  --chalk-95: #1c1917;
  --chalk-90: #292524;
  --chalk-85: #373230;
  --chalk-80: #433f3b;
  --chalk-60: #595550;
  --chalk-50: #827c78;
  --chalk-40: #a9a3a0;
  --chalk-20: #cfcbc9;
  --chalk-10: #e8e6e5;
  --chalk-05: #f0f0ee;
  --chalk-02: #fafaf9;

  /* Slate Scale */
  --slate-98: #050b17;
  --slate-95: #161a21;
  --slate-90: #202730;
  --slate-85: #2b333f;
  --slate-80: #38404d;
  --slate-60: #4d5663;
  --slate-50: #777d8a;
  --slate-40: #9fa5ae;
  --slate-20: #c7ccd4;
  --slate-10: #e4e6ea;
  --slate-05: #eef0f3;
  --slate-02: #f9fafb;
}

/* Brand Colors */
:root {
  /* Onyx / Ink */
  --onyx-ink-100: #000000;
  --onyx-ink-95: #1c1c1c;
  --onyx-ink-90: #333333;

  /* Onyx / Chrome */
  --onyx-chrome-20: #cacaca;
  --onyx-chrome-10: #e9e9e9;
  --onyx-chrome-00: #ffffff;

  /* Tint (referencing Stone variables) */
  --tint-98: var(--stone-98);
  --tint-95: var(--stone-95);
  --tint-90: var(--stone-90);
  --tint-85: var(--stone-85);
  --tint-80: var(--stone-80);
  --tint-60: var(--stone-60);
  --tint-50: var(--stone-50);
  --tint-40: var(--stone-40);
  --tint-20: var(--stone-20);
  --tint-10: var(--stone-10);
  --tint-05: var(--stone-05);
  --tint-02: var(--stone-02);
}

/* Light Colors */
:root {
  /* Shimmer colors for loading animations */
  --shimmer-base: #a3a3a3;
  --shimmer-highlight: #000000;

  /* Text */
  --text-05: var(--alpha-grey-100-90);
  --text-04: var(--alpha-grey-100-75);
  --text-03: var(--alpha-grey-100-55);
  --text-02: var(--alpha-grey-100-45);
  --text-01: var(--alpha-grey-100-20);
  --text-inverted-01: var(--alpha-grey-00-20);
  --text-inverted-02: var(--alpha-grey-00-45);
  --text-inverted-03: var(--alpha-grey-00-60);
  --text-inverted-04: var(--alpha-grey-00-85);
  --text-inverted-05: var(--alpha-grey-00-95);
  --text-light-03: var(--alpha-grey-00-60);
  --text-light-05: var(--grey-00);
  --text-dark-03: var(--alpha-grey-100-55);
  --text-dark-05: var(--grey-100);

  /* Background / Neutral */
  --background-neutral-00: var(--grey-00);
  --background-neutral-01: var(--grey-02);
  --background-neutral-02: var(--grey-06);
  --background-neutral-03: var(--grey-10);
  --background-neutral-04: var(--grey-20);
  --background-neutral-inverted-04: var(--grey-60);
  --background-neutral-inverted-03: var(--grey-75);
  --background-neutral-inverted-02: var(--grey-85);
  --background-neutral-inverted-01: var(--grey-90);
  --background-neutral-inverted-00: var(--grey-100);
  --background-neutral-light-00: var(--grey-00);
  --background-neutral-light-03: var(--grey-10);
  --background-neutral-dark-03: var(--grey-80);

  /* Background / Tint */
  --background-tint-00: var(--grey-00);
  --background-tint-01: var(--tint-02);
  --background-tint-02: var(--tint-05);
  --background-tint-03: var(--tint-10);
  --background-tint-04: var(--tint-20);
  --background-tint-inverted-04: var(--tint-60);
  --background-tint-inverted-03: var(--tint-85);
  --background-tint-inverted-02: var(--tint-90);
  --background-tint-inverted-01: var(--tint-95);
  --background-tint-inverted-00: var(--grey-100);
  --background-tint-light-01: var(--tint-02);
  --background-tint-dark-01: var(--tint-95);

  /* Border */
  --border-01: var(--grey-10);
  --border-02: var(--grey-20);
  --border-03: var(--grey-40);
  --border-04: var(--grey-50);
  --border-05: var(--grey-100);
  --border-inverted-05: var(--grey-00);
  --border-inverted-04: var(--grey-30);
  --border-inverted-03: var(--grey-50);
  --border-inverted-02: var(--grey-60);
  --border-inverted-01: var(--grey-80);

  /* Theme */
  --theme-primary-06: var(--onyx-ink-100);
  --theme-primary-05: var(--onyx-ink-95);
  --theme-primary-04: var(--onyx-ink-90);

  /* Theme / Gradient */
  --theme-gradient-05: var(--tint-50);
  --theme-gradient-00: var(--grey-100);

  /* Theme / Red */
  --theme-red-05: var(--red-50);
  --theme-red-04: var(--red-50);
  --theme-red-02: var(--red-20);
  --theme-red-01: var(--red-05);

  /* Theme / Orange */
  --theme-orange-05: var(--orange-55);
  --theme-orange-04: var(--orange-50);
  --theme-orange-02: var(--orange-20);
  --theme-orange-01: var(--orange-05);

  /* Theme / Amber */
  --theme-amber-05: var(--neon-amber-50);
  --theme-amber-04: var(--neon-amber);
  --theme-amber-02: var(--neon-amber-20);
  --theme-amber-01: var(--neon-amber-05);

  /* Theme / Yellow */
  --theme-yellow-05: var(--neon-yellow-50);
  --theme-yellow-02: var(--neon-yellow-20);
  --theme-yellow-01: var(--neon-yellow-05);

  /* Theme / Green */
  --theme-green-05: var(--green-60);
  --theme-green-02: var(--green-20);
  --theme-green-01: var(--green-05);

  /* Theme / Lime */
  --theme-lime-05: var(--neon-lime-60);
  --theme-lime-02: var(--neon-lime-20);
  --theme-lime-01: var(--neon-lime-05);

  /* Theme / Cyan */
  --theme-cyan-05: var(--neon-cyan-50);
  --theme-cyan-02: var(--neon-cyan-20);
  --theme-cyan-01: var(--neon-cyan-05);

  /* Theme / Sky */
  --theme-sky-05: var(--neon-sky-50);
  --theme-sky-02: var(--neon-sky-20);
  --theme-sky-01: var(--neon-sky-05);

  /* Theme / Blue */
  --theme-blue-05: var(--blue-50);
  --theme-blue-02: var(--blue-20);
  --theme-blue-01: var(--blue-05);

  /* Theme / Purple */
  --theme-purple-05: var(--purple-50);
  --theme-purple-02: var(--purple-20);
  --theme-purple-01: var(--purple-05);

  /* Theme / Magenta */
  --theme-magenta-05: var(--neon-magenta-50);
  --theme-magenta-02: var(--neon-magenta-20);
  --theme-magenta-01: var(--neon-magenta-05);

  /* Status */
  --status-success-05: var(--green-50);
  --status-success-02: var(--green-20);
  --status-success-01: var(--green-05);
  --status-success-00: var(--green-01);
  --status-info-05: var(--blue-50);
  --status-info-02: var(--blue-20);
  --status-info-01: var(--blue-05);
  --status-info-00: var(--blue-01);
  --status-warning-05: var(--orange-50);
  --status-warning-02: var(--orange-20);
  --status-warning-01: var(--orange-05);
  --status-warning-00: var(--orange-01);
  --status-error-05: var(--red-50);
  --status-error-02: var(--red-20);
  --status-error-01: var(--red-05);
  --status-error-00: var(--red-01);

  /* Status / Text */
  --status-text-success-05: var(--green-60);
  --status-text-info-05: var(--blue-50);
  --status-text-warning-05: var(--orange-55);
  --status-text-error-05: var(--red-50);

  /* Action */
  --action-link-06: var(--blue-60);
  --action-link-05: var(--blue-50);
  --action-link-04: var(--blue-40);
  --action-link-03: var(--blue-20);
  --action-link-02: var(--blue-10);
  --action-link-01: var(--blue-05);
  --action-link-00: var(--blue-01);
  --action-danger-06: var(--red-60);
  --action-danger-05: var(--red-50);
  --action-danger-04: var(--red-40);
  --action-danger-03: var(--red-20);
  --action-danger-02: var(--red-10);
  --action-danger-01: var(--red-05);

  /* Action / Text */
  --action-text-link-05: var(--blue-50);
  --action-text-danger-05: var(--red-50);

  /* Background / Code */
  --background-code-01: var(--grey-02);

  /* Code */
  --code-code: var(--alpha-grey-100-85);
  --code-comment: var(--alpha-grey-100-35);
  --code-keyword: var(--purple-50);
  --code-string: var(--green-60);
  --code-number: var(--blue-50);
  --code-definition: var(--orange-55);

  /* Highlight */
  --highlight-match: var(--neon-yellow-a30);
  --highlight-selection: var(--neon-sky-a30);
  --highlight-active: var(--neon-amber-a60);
  --highlight-accent: var(--neon-magenta-a60);

  /* Shadow */
  --shadow-01: var(--alpha-grey-100-05);
  --shadow-02: var(--alpha-grey-100-10);
  --shadow-03: var(--alpha-grey-100-20);

  /* Mask */
  --mask-01: var(--alpha-grey-00-10);
  --mask-02: var(--alpha-grey-100-20);
  --mask-03: var(--alpha-grey-100-40);

  /* Frost Overlay (for FrostedDiv component) - lighter in light mode */
  --frost-overlay: var(--alpha-grey-00-10);

  /* Scrollbar */
  --scrollbar-track: transparent;
  --scrollbar-thumb: var(--alpha-grey-100-20);
}

/* Dark Colors */
.dark {
  /* Shimmer colors for loading animations */
  --shimmer-base: #5c5c5c;
  --shimmer-highlight: #ffffff;

  /* Text */
  --text-05: var(--alpha-grey-00-95);
  --text-04: var(--alpha-grey-00-85);
  --text-03: var(--alpha-grey-00-60);
  --text-02: var(--alpha-grey-00-45);
  --text-01: var(--alpha-grey-00-20);
  --text-inverted-01: var(--alpha-grey-100-20);
  --text-inverted-02: var(--alpha-grey-100-45);
  --text-inverted-03: var(--alpha-grey-100-55);
  --text-inverted-04: var(--alpha-grey-100-75);
  --text-inverted-05: var(--alpha-grey-100-90);
  --text-light-03: var(--alpha-grey-00-60);
  --text-light-05: var(--grey-00);
  --text-dark-03: var(--alpha-grey-100-55);
  --text-dark-05: var(--grey-100);

  /* Background / Neutral */
  --background-neutral-00: var(--grey-100);
  --background-neutral-01: var(--grey-90);
  --background-neutral-02: var(--grey-85);
  --background-neutral-03: var(--grey-80);
  --background-neutral-04: var(--grey-75);
  --background-neutral-inverted-04: var(--grey-20);
  --background-neutral-inverted-03: var(--grey-10);
  --background-neutral-inverted-02: var(--grey-06);
  --background-neutral-inverted-01: var(--grey-02);
  --background-neutral-inverted-00: var(--grey-00);
  --background-neutral-light-00: var(--grey-00);
  --background-neutral-light-03: var(--grey-10);
  --background-neutral-dark-03: var(--grey-80);

  /* Background / Tint */
  --background-tint-00: var(--grey-100);
  --background-tint-01: var(--tint-95);
  --background-tint-02: var(--tint-90);
  --background-tint-03: var(--tint-85);
  --background-tint-04: var(--tint-80);
  --background-tint-inverted-04: var(--tint-20);
  --background-tint-inverted-03: var(--tint-10);
  --background-tint-inverted-02: var(--tint-05);
  --background-tint-inverted-01: var(--tint-02);
  --background-tint-inverted-00: var(--grey-00);
  --background-tint-light-01: var(--tint-02);
  --background-tint-dark-01: var(--tint-95);

  /* Border */
  --border-01: var(--grey-80);
  --border-02: var(--grey-60);
  --border-03: var(--grey-50);
  --border-04: var(--grey-30);
  --border-05: var(--grey-00);
  --border-inverted-05: var(--grey-100);
  --border-inverted-04: var(--grey-50);
  --border-inverted-03: var(--grey-40);
  --border-inverted-02: var(--grey-20);
  --border-inverted-01: var(--grey-10);

  /* Theme */
  --theme-primary-06: var(--onyx-chrome-00);
  --theme-primary-05: var(--onyx-chrome-10);
  --theme-primary-04: var(--onyx-chrome-20);

  /* Theme / Gradient */
  --theme-gradient-05: var(--grey-100);
  --theme-gradient-00: var(--grey-00);

  /* Theme / Red */
  --theme-red-05: var(--red-45);
  --theme-red-04: var(--red-50);
  --theme-red-02: var(--red-80);
  --theme-red-01: var(--red-90);

  /* Theme / Orange */
  --theme-orange-05: var(--orange-40);
  --theme-orange-04: var(--orange-50);
  --theme-orange-02: var(--orange-80);
  --theme-orange-01: var(--orange-90);

  /* Theme / Amber */
  --theme-amber-05: var(--neon-amber);
  --theme-amber-04: var(--neon-amber-60);
  --theme-amber-02: var(--neon-amber-80);
  --theme-amber-01: var(--neon-amber-90);

  /* Theme / Yellow */
  --theme-yellow-05: var(--neon-yellow);
  --theme-yellow-02: var(--neon-yellow-80);
  --theme-yellow-01: var(--neon-yellow-90);

  /* Theme / Green */
  --theme-green-05: var(--green-50);
  --theme-green-02: var(--green-80);
  --theme-green-01: var(--green-90);

  /* Theme / Lime */
  --theme-lime-05: var(--neon-lime);
  --theme-lime-02: var(--neon-lime-80);
  --theme-lime-01: var(--neon-lime-90);

  /* Theme / Cyan */
  --theme-cyan-05: var(--neon-cyan);
  --theme-cyan-02: var(--neon-cyan-80);
  --theme-cyan-01: var(--neon-cyan-90);

  /* Theme / Sky */
  --theme-sky-05: var(--neon-sky);
  --theme-sky-02: var(--neon-sky-80);
  --theme-sky-01: var(--neon-sky-90);

  /* Theme / Blue */
  --theme-blue-05: var(--blue-45);
  --theme-blue-02: var(--blue-80);
  --theme-blue-01: var(--blue-90);

  /* Theme / Purple */
  --theme-purple-05: var(--purple-45);
  --theme-purple-02: var(--purple-80);
  --theme-purple-01: var(--purple-90);

  /* Theme / Magenta */
  --theme-magenta-05: var(--neon-magenta);
  --theme-magenta-02: var(--neon-magenta-80);
  --theme-magenta-01: var(--neon-magenta-90);

  /* Status */
  --status-success-05: var(--green-50);
  --status-success-02: var(--green-80);
  --status-success-01: var(--green-90);
  --status-success-00: var(--green-95);
  --status-info-05: var(--blue-50);
  --status-info-02: var(--blue-80);
  --status-info-01: var(--blue-90);
  --status-info-00: var(--blue-95);
  --status-warning-05: var(--orange-50);
  --status-warning-02: var(--orange-80);
  --status-warning-01: var(--orange-90);
  --status-warning-00: var(--orange-95);
  --status-error-05: var(--red-50);
  --status-error-02: var(--red-80);
  --status-error-01: var(--red-90);
  --status-error-00: var(--red-95);

  /* Status / Text */
  --status-text-success-05: var(--green-50);
  --status-text-info-05: var(--blue-45);
  --status-text-warning-05: var(--orange-50);
  --status-text-error-05: var(--red-45);

  /* Action */
  --action-link-06: var(--blue-40);
  --action-link-05: var(--blue-50);
  --action-link-04: var(--blue-60);
  --action-link-03: var(--blue-80);
  --action-link-02: var(--blue-85);
  --action-link-01: var(--blue-90);
  --action-link-00: var(--blue-95);
  --action-danger-06: var(--red-40);
  --action-danger-05: var(--red-50);
  --action-danger-04: var(--red-60);
  --action-danger-03: var(--red-80);
  --action-danger-02: var(--red-85);
  --action-danger-01: var(--red-90);

  /* Action / Text */
  --action-text-link-05: var(--blue-45);
  --action-text-danger-05: var(--red-45);

  /* Background / Code */
  --background-code-01: #151617;

  /* Code */
  --code-code: var(--alpha-grey-00-85);
  --code-comment: var(--alpha-grey-00-45);
  --code-keyword: var(--purple-45);
  --code-string: var(--green-50);
  --code-number: var(--blue-45);
  --code-definition: var(--orange-50);

  /* Highlight */
  --highlight-match: var(--neon-yellow-a30);
  --highlight-selection: var(--neon-sky-a30);
  --highlight-active: var(--neon-amber-a60);
  --highlight-accent: var(--neon-magenta-a60);

  /* Shadow */
  --shadow-01: var(--alpha-grey-00-05);
  --shadow-02: var(--alpha-grey-00-10);
  --shadow-03: var(--alpha-grey-00-20);

  /* Mask */
  --mask-01: var(--alpha-grey-00-10);
  --mask-02: var(--alpha-grey-100-20);
  --mask-03: var(--alpha-grey-100-40);

  /* Frost Overlay (for FrostedDiv component) - darker in dark mode */
  --frost-overlay: var(--alpha-grey-100-10);

  /* Scrollbar */
  --scrollbar-track: transparent;
  --scrollbar-thumb: var(--alpha-grey-00-20);
}


================================================
FILE: web/src/app/css/divider.css
================================================
/* =============================================================================
   Divider Keyboard Navigation Overrides
   Disable hover effects when keyboard navigation is active
   ============================================================================= */
[data-keyboard-nav="true"] .group\/divider:hover {
  background-color: transparent !important;
}

[data-keyboard-nav="true"] .group\/divider[data-selected="true"] {
  background-color: var(--background-tint-02) !important;
}


================================================
FILE: web/src/app/css/general-layouts.css
================================================
/* LineItemLayout */
.line-item-layout {
  @apply grid;
  column-gap: 0.5rem;
  grid-template-columns: 1fr;
}

.line-item-layout[data-reduced-padding="true"] {
  @apply p-2;
}

.line-item-layout[data-has-icon="true"] {
  grid-template-columns: auto 1fr;
}

.line-item-layout[data-loading="true"] {
  row-gap: 0.25rem;
}

/* LineItemLayout Icon */
.line-item-layout-icon {
  @apply self-center stroke-text-04;
}

.line-item-layout[data-variant="tertiary-muted"] .line-item-layout-icon,
.line-item-layout[data-variant="mini"] .line-item-layout-icon {
  @apply stroke-text-03;
}

.line-item-layout-title {
  @apply text-left;
}

.line-item-layout[data-strikethrough="true"] .line-item-layout-title {
  @apply line-through;
}

/* LineItemLayout Description */
.line-item-layout-description {
  @apply leading-none text-left;
}

.line-item-layout[data-has-icon="true"] .line-item-layout-description {
  @apply col-start-2;
}

/* LineItemLayout Skeleton */
.line-item-layout-skeleton-title {
  @apply h-4 bg-background-neutral-01 rounded-08 w-1/3 animate-pulse;
}

.line-item-layout-skeleton-description {
  @apply h-6 bg-background-neutral-01 rounded-08 w-2/3 animate-pulse;
}

.line-item-layout-skeleton-right {
  @apply h-5 w-10 bg-background-neutral-01 rounded-full animate-pulse;
}


================================================
FILE: web/src/app/css/inputs.css
================================================
/* Input styling */
.input-normal {
  background-color: var(--background-neutral-00);
  border: 1px solid var(--border-01);
}
.input-normal:hover {
  border-color: var(--border-02);
}
.input-normal:active {
  border-color: var(--border-05);
}
.input-normal:focus:not(:active),
.input-normal:focus-within:not(:active) {
  border-color: var(--border-05);
  box-shadow: inset 0px 0px 0px 2px var(--background-tint-04);
}

.input-error {
  background-color: var(--background-neutral-00);
  border: 1px solid var(--status-error-05);
}
.input-error:focus:not(:active),
.input-error:focus-within:not(:active) {
  box-shadow: inset 0px 0px 0px 2px var(--background-tint-04);
}

.input-disabled {
  background-color: var(--background-neutral-03);
  border: 1px solid transparent;
  cursor: not-allowed;
}


================================================
FILE: web/src/app/css/knowledge-table.css
================================================
/* ============================================================================
   Table Layout Components
   Based on Figma: Table/Cell component specs
   ============================================================================ */

/* Table Row Layout
   Figma specs:
   - Regular size: min-height 36px (2.25rem)
   - Padding: 8px (0.5rem) vertical
   - Gap: 4px (0.25rem) between items
   - Border radius: 8px on hover
*/
.table-row-layout {
  display: flex;
  flex-direction: row;
  align-items: center;
  gap: 0.25rem;
  min-height: 2.25rem;
  padding: 0.5rem 0;
  border-radius: var(--border-radius-08);
  width: 100%;
  min-width: 0;
}

.table-row-layout.cursor-pointer {
  cursor: pointer;
}

.table-row-layout:hover {
  background-color: var(--background-tint-01);
}

.table-row-layout[data-selected="true"] {
  background-color: var(--action-link-01);
}

/* Table Cell Layout
   Figma specs:
   - Gap: 4px (0.25rem) internal
   - Text padding: 2px (0.125rem) horizontal
   - min-width: 1px to allow truncation
*/
.table-cell-layout {
  display: flex;
  flex: 0 0 auto;
  gap: 0.25rem;
  align-items: center;
  min-width: 1px;
  min-height: 1px;
  overflow: hidden;
}

.table-cell-layout[data-flex="true"] {
  flex: 1 0 0;
}

.table-cell-layout[data-fixed="true"] {
  flex-shrink: 0;
}

/* Sidebar Layout
   Fixed-width navigation sidebar
   Figma specs:
   - Width: 200px (12.5rem)
   - Gap: 4px (0.25rem)
*/
.sidebar-layout {
  display: flex;
  flex-direction: column;
  width: 12.5rem;
  flex-shrink: 0;
  gap: 0.25rem;
}

/* Fix for Truncated text containers in sidebar to prevent vertical clipping
   The Truncated component wrapper has overflow-hidden which clips text
   if the container doesn't have sufficient height for the line-height.
   font-secondary-body uses line-height: 16px (1rem), but we add slightly more
   (1.125rem = 18px) to accommodate text descenders (like 'p', 'g', 'd').
*/
.sidebar-layout .flex-grow.overflow-hidden {
  min-height: 1.125rem;
}

/* Two Column Layout
   Container for sidebar + content pattern
*/
.two-column-layout {
  display: flex;
  flex-direction: row;
  gap: 0.5rem;
  min-width: 0;
  overflow: hidden;
}

/* Content Column Layout
   Main content area that fills remaining space
   Content should align to the top (start)
*/
.content-column-layout {
  display: flex;
  flex-direction: column;
  flex: 1 0 0;
  min-width: 1px;
  min-height: 1px;
  overflow: hidden;
  justify-content: flex-start;
}

/* Hidden Input
   For file uploads and other hidden inputs
*/
.hidden-input {
  display: none;
}

/* Checkbox Cell
   Figma specs:
   - Width: 24px (1.5rem)
   - Centered content
*/
.checkbox-cell-layout {
  display: flex;
  align-items: center;
  justify-content: center;
  width: 1.5rem;
  min-width: 1.5rem;
  flex-shrink: 0;
}

/* Source Icons Row
   Row of source type icons
   Figma specs:
   - Gap: 4px (0.25rem)
   - Icon size: 16px (1rem)
*/
.source-icons-layout {
  display: flex;
  align-items: center;
  gap: 0.25rem;
}

.source-icons-layout > svg {
  width: 1rem;
  height: 1rem;
  flex-shrink: 0;
}


================================================
FILE: web/src/app/css/line-item.css
================================================
/* LineItem Button Variants */
/* Hover styles are disabled when keyboard navigation is active (data-keyboard-nav on parent) */
.line-item-button-main {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }
}

.line-item-button-main-emphasized {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }

  &[data-selected="true"] {
    @apply bg-action-link-01;
  }

  /* Ensure selected wins over keyboard-nav hover override */
  [data-keyboard-nav="true"] &[data-selected="true"] {
    @apply bg-action-link-01;
  }
}

.line-item-button-strikethrough {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }
}

.line-item-button-strikethrough-emphasized {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }
}

.line-item-button-disabled {
  @apply bg-transparent cursor-not-allowed;
}

.line-item-button-disabled-emphasized {
  @apply bg-transparent cursor-not-allowed;
}

.line-item-button-danger {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }
}

.line-item-button-danger-emphasized {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }

  &[data-selected="true"] {
    @apply bg-status-error-01;
  }

  /* Ensure selected wins over keyboard-nav hover override */
  [data-keyboard-nav="true"] &[data-selected="true"] {
    @apply bg-status-error-01;
  }
}

/* Action Variant - same background behavior as main */
.line-item-button-action {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }
}

.line-item-button-action-emphasized {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }

  &[data-selected="true"] {
    @apply bg-background-tint-02;
  }

  /* Ensure selected wins over keyboard-nav hover override */
  [data-keyboard-nav="true"] &[data-selected="true"] {
    @apply bg-background-tint-02;
  }
}

/* Muted Variant - subdued styling for less prominent items */
.line-item-button-muted {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }
}

.line-item-button-muted-emphasized {
  @apply bg-transparent hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }

  &[data-selected="true"] {
    @apply bg-background-tint-02;
  }

  /* Ensure selected wins over keyboard-nav hover override */
  [data-keyboard-nav="true"] &[data-selected="true"] {
    @apply bg-background-tint-02;
  }
}

/* Skeleton Variant - dashed border placeholder style */
.line-item-button-skeleton {
  @apply bg-transparent border border-dashed border-border-01 hover:bg-background-tint-01;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }
}

.line-item-button-skeleton-emphasized {
  @apply bg-transparent border border-dashed border-border-01 hover:bg-background-tint-02;

  [data-keyboard-nav="true"] &:hover {
    @apply bg-transparent;
  }

  &[data-selected="true"] {
    @apply bg-background-tint-02;
  }

  /* Ensure selected wins over keyboard-nav hover override */
  [data-keyboard-nav="true"] &[data-selected="true"] {
    @apply bg-background-tint-02;
  }
}

/* LineItem Text Variants */
.line-item-text-main {
  color: var(--text-04) !important;

  .group\/LineItem[data-selected="true"] & {
    color: var(--action-link-05) !important;
  }
}

.line-item-text-disabled {
  color: var(--text-01) !important;
}

.line-item-text-strikethrough {
  color: var(--text-02) !important;
  @apply line-through decoration-2 !important;
}

.line-item-text-danger {
  color: var(--status-error-05) !important;
}

.line-item-text-action {
  font-family: var(--font-hanken-grotesk);
  font-size: 14px;
  font-weight: 600;
  line-height: 20px;
  letter-spacing: 0px;
  color: var(--text-04) !important;
}

.line-item-text-muted {
  font-family: var(--font-hanken-grotesk);
  font-size: 14px;
  font-weight: 500;
  line-height: 20px;
  letter-spacing: 0px;
  color: var(--text-03) !important;

  .group\/LineItem[data-selected="true"] & {
    color: var(--text-03) !important;
  }
}

.line-item-text-skeleton {
  font-family: var(--font-hanken-grotesk);
  font-size: 12px;
  font-weight: 400;
  line-height: 16px;
  letter-spacing: 0px;
  color: var(--text-03) !important;
}

/* LineItem Icon Variants */
.line-item-icon-main {
  @apply stroke-text-03;

  .group\/LineItem[data-selected="true"] & {
    @apply stroke-action-link-05;
  }
}

.line-item-icon-strikethrough {
  @apply stroke-text-03;
}

.line-item-icon-disabled {
  @apply stroke-text-01;
}

.line-item-icon-danger {
  @apply stroke-status-error-05;
}

.line-item-icon-action {
  @apply stroke-text-03;
}

.line-item-icon-muted {
  @apply stroke-text-02;

  .group\/LineItem[data-selected="true"] & {
    @apply stroke-text-02;
  }
}

.line-item-icon-skeleton {
  @apply stroke-text-02;
}


================================================
FILE: web/src/app/css/sizes.css
================================================
:root {
  --app-page-main-content-width: 52.5rem;
  --block-width-form-input-min: 10rem;

  --container-sm: 42rem;
  --container-sm-md: 47rem;
  --container-md: 54.5rem;
  --container-lg: 62rem;
  --container-full: 100%;
}


================================================
FILE: web/src/app/css/square-button.css
================================================
.square-button {
  /* Base styles */
  position: relative;
  display: inline-flex;
  align-items: center;
  justify-content: center;
  aspect-ratio: 1 / 1;
  border-radius: var(--radius-08);
  padding: 0.5rem;
  background-color: var(--background-tint-01);
}

.square-button:hover {
  background-color: var(--background-tint-02);
}

.square-button:active {
  background-color: var(--background-tint-03);
}

.square-button:disabled {
  cursor: not-allowed;
  opacity: 0.5;
}

/* Transient state */
.square-button[data-state="transient"] {
  border: 1px solid var(--action-link-05);
  background-color: var(--action-link-00);
}

.square-button[data-state="transient"]:hover {
  background-color: var(--action-link-01);
}

.square-button[data-state="transient"]:active {
  background-color: var(--action-link-02);
}


================================================
FILE: web/src/app/css/switch.css
================================================
.switch-normal {
  background-color: var(--background-tint-03);
  border: 1px solid transparent;
}
.switch-normal:hover {
  background-color: var(--background-tint-04);
}
.switch-normal:focus,
.switch-normal:focus-within {
  border-color: var(--background-tint-04);
}
.switch-normal:focus:hover,
.switch-normal:focus-within:hover {
  border-color: var(--border-01);
}

.switch-normal-checked {
  background-color: var(--action-link-05);
  border: 1px solid transparent;
}
.switch-normal-checked:hover {
  background-color: var(--action-link-04);
}
.switch-normal-checked:focus,
.switch-normal-checked:focus-within {
  border-color: var(--action-link-04);
}
.switch-normal-checked:focus:hover,
.switch-normal-checked:focus-within:hover {
  border-color: var(--border-01);
}

.switch-disabled {
  background-color: var(--background-neutral-04);
  border: 1px solid transparent;
  cursor: not-allowed !important;
}
.switch-disabled-checked {
  background-color: var(--action-link-03);
  border: 1px solid transparent;
  cursor: not-allowed !important;
}

.switch-thumb {
  background-color: var(--background-neutral-light-00);
}
.switch-thumb-disabled {
  background-color: var(--background-neutral-03);
}


================================================
FILE: web/src/app/css/z-index.css
================================================
:root {
  /* Base layers */
  --z-base: 0;
  --z-content: 1;
  /* Settings header must sit above sticky table headers (--z-sticky: 10) so
     the page header scrolls over pinned columns without being obscured. */
  --z-settings-header: 11;
  --z-app-layout: 9;
  --z-sticky: 10;

  /* Interactive overlays */
  --z-modal-overlay: 900;
  --z-modal: 1000;
  --z-toast: 1100;
  --z-popover: 1200;
  --z-tooltip: 1300;
}

/* Base layers */
.z-base {
  z-index: var(--z-base);
}
.z-content {
  z-index: var(--z-content);
}
.z-settings-header {
  z-index: var(--z-settings-header);
}
.z-app-layout {
  z-index: var(--z-app-layout);
}
.z-sticky {
  z-index: var(--z-sticky);
}

/* Interactive overlays */
.z-modal-overlay {
  z-index: var(--z-modal-overlay);
}
.z-modal {
  z-index: var(--z-modal);
}
.z-toast {
  z-index: var(--z-toast);
}
.z-popover {
  z-index: var(--z-popover);
}
.z-tooltip {
  z-index: var(--z-tooltip);
}


================================================
FILE: web/src/app/ee/EEFeatureRedirect.tsx
================================================
"use client";

import { useEffect } from "react";
import { useRouter } from "next/navigation";
import { toast } from "@/hooks/useToast";

export default function EEFeatureRedirect() {
  const router = useRouter();

  useEffect(() => {
    toast.error(
      "This feature requires a license. Please upgrade your plan to access."
    );
    router.replace("/app");
  }, [router]);

  return null;
}


================================================
FILE: web/src/app/ee/LICENSE
================================================
The Onyx Enterprise License (the "Enterprise License")
Copyright (c) 2023-present DanswerAI, Inc.

With regard to the Onyx Software:

This software and associated documentation files (the "Software") may only be
used in production, if you (and any entity that you represent) have agreed to,
and are in compliance with, the Onyx Subscription Terms of Service, available
at https://www.onyx.app/legal/self-host (the "Enterprise Terms"), or other
agreement governing the use of the Software, as agreed by you and DanswerAI,
and otherwise have a valid Onyx Enterprise License for the
correct number of user seats. Subject to the foregoing sentence, you are free to
modify this Software and publish patches to the Software. You agree that DanswerAI
and/or its licensors (as applicable) retain all right, title and interest in and
to all such modifications and/or patches, and all such modifications and/or
patches may only be used, copied, modified, displayed, distributed, or otherwise
exploited with a valid Onyx Enterprise License for the correct
number of user seats. Notwithstanding the foregoing, you may copy and modify
the Software for development and testing purposes, without requiring a
subscription. You agree that DanswerAI and/or its licensors (as applicable) retain
all right, title and interest in and to all such modifications. You are not
granted any other rights beyond what is expressly stated herein. Subject to the
foregoing, it is forbidden to copy, merge, publish, distribute, sublicense,
and/or sell the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

For all third party components incorporated into the Onyx Software, those
components are licensed under the original license provided by the owner of the
applicable component.


================================================
FILE: web/src/app/ee/admin/billing/BillingAlerts.tsx
================================================
import React from "react";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import { CircleAlert, Info } from "lucide-react";
import { BillingInformation, BillingStatus } from "@/lib/billing/interfaces";

export function BillingAlerts({
  billingInformation,
}: {
  billingInformation: BillingInformation;
}) {
  const isTrialing = billingInformation.status === BillingStatus.TRIALING;
  const isCancelled = billingInformation.cancel_at_period_end;
  const isExpired = billingInformation.current_period_end
    ? new Date(billingInformation.current_period_end) < new Date()
    : false;
  const noPaymentMethod = !billingInformation.payment_method_enabled;

  const messages: string[] = [];

  if (isExpired) {
    messages.push(
      "Your subscription has expired. Please resubscribe to continue using the service."
    );
  }
  if (isCancelled && !isExpired && billingInformation.current_period_end) {
    messages.push(
      `Your subscription will cancel on ${new Date(
        billingInformation.current_period_end
      ).toLocaleDateString()}. You can resubscribe before this date to remain uninterrupted.`
    );
  }
  if (isTrialing) {
    messages.push(
      `You're currently on a trial. Your trial ends on ${
        billingInformation.trial_end
          ? new Date(billingInformation.trial_end).toLocaleDateString()
          : "N/A"
      }.`
    );
  }
  if (noPaymentMethod) {
    messages.push(
      "You currently have no payment method on file. Please add one to avoid service interruption."
    );
  }

  const variant = isExpired || noPaymentMethod ? "destructive" : "default";

  if (messages.length === 0) return null;

  return (
    <Alert variant={variant}>
      <AlertTitle className="flex items-center space-x-2">
        {variant === "destructive" ? (
          <CircleAlert className="h-4 w-4" />
        ) : (
          <Info className="h-4 w-4" />
        )}
        <span>
          {variant === "destructive"
            ? "Important Subscription Notice"
            : "Subscription Notice"}
        </span>
      </AlertTitle>
      <AlertDescription>
        <ul className="list-disc list-inside space-y-1 mt-2">
          {messages.map((msg, idx) => (
            <li key={idx}>{msg}</li>
          ))}
        </ul>
      </AlertDescription>
    </Alert>
  );
}


================================================
FILE: web/src/app/ee/admin/billing/BillingInformationPage.tsx
================================================
"use client";

import { useEffect } from "react";
import { toast } from "@/hooks/useToast";
import {
  createCustomerPortalSession,
  useBillingInformation,
  hasActiveSubscription,
} from "@/lib/billing";

import {
  Card,
  CardContent,
  CardDescription,
  CardHeader,
  CardTitle,
} from "@/components/ui/card";
import { Button } from "@opal/components";
import { SubscriptionSummary } from "./SubscriptionSummary";
import { BillingAlerts } from "./BillingAlerts";
import { SvgClipboard, SvgWallet } from "@opal/icons";
export default function BillingInformationPage() {
  const {
    data: billingInformation,
    error,
    isLoading,
  } = useBillingInformation();

  useEffect(() => {
    const url = new URL(window.location.href);
    if (url.searchParams.has("session_id")) {
      toast.success(
        "Congratulations! Your subscription has been updated successfully."
      );
      url.searchParams.delete("session_id");
      window.history.replaceState({}, "", url.toString());
    }
  }, []);

  if (isLoading) {
    return <div className="text-center py-8">Loading...</div>;
  }

  if (error) {
    console.error("Failed to fetch billing information:", error);
    return (
      <div className="text-center py-8 text-red-500">
        Error loading billing information. Please try again later.
      </div>
    );
  }

  if (!billingInformation || !hasActiveSubscription(billingInformation)) {
    return (
      <div className="text-center py-8">No billing information available.</div>
    );
  }

  const handleManageSubscription = async () => {
    try {
      const response = await createCustomerPortalSession();
      console.log("response", response);
      if (!response.stripe_customer_portal_url) {
        throw new Error("No portal URL returned from the server");
      }
      window.location.href = response.stripe_customer_portal_url;
    } catch (error) {
      console.error("Error creating customer portal session:", error);
      toast.error("Error creating customer portal session");
    }
  };

  return (
    <div className="space-y-8">
      <Card className="shadow-md">
        <CardHeader>
          <CardTitle className="text-2xl font-bold flex items-center">
            <SvgWallet className="mr-4 text-muted-foreground h-6 w-6" />
            Subscription Details
          </CardTitle>
        </CardHeader>
        <CardContent className="space-y-6">
          <SubscriptionSummary billingInformation={billingInformation} />
          <BillingAlerts billingInformation={billingInformation} />
        </CardContent>
      </Card>

      <Card className="shadow-md">
        <CardHeader>
          <CardTitle className="text-xl font-semibold">
            Manage Subscription
          </CardTitle>
          <CardDescription>
            View your plan, update payment, or change subscription
          </CardDescription>
        </CardHeader>
        <CardContent>
          <Button
            onClick={handleManageSubscription}
            width="full"
            icon={SvgClipboard}
          >
            Manage Subscription
          </Button>
        </CardContent>
      </Card>
    </div>
  );
}


================================================
FILE: web/src/app/ee/admin/billing/InfoItem.tsx
================================================
import React from "react";

interface InfoItemProps {
  title: string;
  value: string;
}

export function InfoItem({ title, value }: InfoItemProps) {
  return (
    <div className="bg-muted p-4 rounded-lg">
      <p className="text-sm font-medium text-muted-foreground mb-1">{title}</p>
      <p className="text-lg font-semibold text-foreground dark:text-neutral-100">
        {value}
      </p>
    </div>
  );
}


================================================
FILE: web/src/app/ee/admin/billing/SubscriptionSummary.tsx
================================================
import React from "react";
import { InfoItem } from "./InfoItem";
import { statusToDisplay, BillingInformation } from "@/lib/billing";
import { formatDateShort } from "@/lib/dateUtils";

interface SubscriptionSummaryProps {
  billingInformation: BillingInformation;
}

export function SubscriptionSummary({
  billingInformation,
}: SubscriptionSummaryProps) {
  return (
    <div className="grid grid-cols-2 gap-4">
      <InfoItem
        title="Subscription Status"
        value={statusToDisplay(billingInformation.status)}
      />
      <InfoItem
        title="Seats"
        value={billingInformation.seats?.toString() ?? "—"}
      />
      <InfoItem
        title="Billing Start"
        value={formatDateShort(billingInformation.current_period_start)}
      />
      <InfoItem
        title="Billing End"
        value={formatDateShort(billingInformation.current_period_end)}
      />
    </div>
  );
}


================================================
FILE: web/src/app/ee/admin/billing/page.tsx
================================================
import * as SettingsLayouts from "@/layouts/settings-layouts";
import BillingInformationPage from "./BillingInformationPage";
import { SvgCreditCard } from "@opal/icons";

export interface BillingInformation {
  stripe_subscription_id: string;
  status: string;
  current_period_start: Date;
  current_period_end: Date;
  number_of_seats: number;
  cancel_at_period_end: boolean;
  canceled_at: Date | null;
  trial_start: Date | null;
  trial_end: Date | null;
  seats: number;
  payment_method_enabled: boolean;
}

export default function page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={SvgCreditCard}
        title="Billing Information"
        separator
      />
      <SettingsLayouts.Body>
        <BillingInformationPage />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/ee/admin/groups/[id]/page.tsx
================================================
"use client";

import { use } from "react";
import EditGroupPage from "@/refresh-pages/admin/GroupsPage/EditGroupPage";

export default function EditGroupRoute({
  params,
}: {
  params: Promise<{ id: string }>;
}) {
  const { id } = use(params);
  return <EditGroupPage groupId={Number(id)} />;
}


================================================
FILE: web/src/app/ee/admin/groups/create/page.tsx
================================================
export { default } from "@/refresh-pages/admin/GroupsPage/CreateGroupPage";


================================================
FILE: web/src/app/ee/admin/groups/page.tsx
================================================
export { default } from "@/refresh-pages/admin/GroupsPage";


================================================
FILE: web/src/app/ee/admin/layout.tsx
================================================
import Layout from "@/components/admin/Layout";

export interface AdminLayoutProps {
  children: React.ReactNode;
}

export default async function AdminLayout({ children }: AdminLayoutProps) {
  return await Layout({ children });
}


================================================
FILE: web/src/app/ee/admin/performance/custom-analytics/CustomAnalyticsUpdateForm.tsx
================================================
"use client";

import { Label, SubLabel } from "@/components/Field";
import { toast } from "@/hooks/useToast";
import { SettingsContext } from "@/providers/SettingsProvider";
import { Button, Text } from "@opal/components";
import { markdown } from "@opal/utils";
import { Callout } from "@/components/ui/callout";
import { useContext, useState } from "react";
import InputTextArea from "@/refresh-components/inputs/InputTextArea";
import Spacer from "@/refresh-components/Spacer";

export function CustomAnalyticsUpdateForm() {
  const settings = useContext(SettingsContext);
  const customAnalyticsScript = settings?.customAnalyticsScript;

  const [newCustomAnalyticsScript, setNewCustomAnalyticsScript] =
    useState<string>(customAnalyticsScript || "");
  const [secretKey, setSecretKey] = useState<string>("");

  if (!settings) {
    return <Callout type="danger" title="Failed to fetch settings"></Callout>;
  }

  return (
    <div>
      <form
        onSubmit={async (e) => {
          e.preventDefault();

          const response = await fetch(
            "/api/admin/enterprise-settings/custom-analytics-script",
            {
              method: "PUT",
              headers: {
                "Content-Type": "application/json",
              },
              body: JSON.stringify({
                script: newCustomAnalyticsScript.trim(),
                secret_key: secretKey,
              }),
            }
          );
          if (response.ok) {
            toast.success("Custom analytics script updated successfully!");
          } else {
            const errorMsg = (await response.json()).detail;
            toast.error(
              `Failed to update custom analytics script: "${errorMsg}"`
            );
          }
          setSecretKey("");
        }}
      >
        <div className="mb-4">
          <Label>Script</Label>
          <Text as="p">
            Specify the Javascript that should run on page load in order to
            initialize your custom tracking/analytics.
          </Text>
          <Spacer rem={0.75} />
          <Text as="p">
            {markdown(
              "Do not include the `<script></script>` tags. If you upload a script below but you are not receiving any events in your analytics platform, try removing all extra whitespace before each line of JavaScript."
            )}
          </Text>
          <Spacer rem={0.5} />
          <InputTextArea
            value={newCustomAnalyticsScript}
            onChange={(event) =>
              setNewCustomAnalyticsScript(event.target.value)
            }
          />
        </div>

        <Label>Secret Key</Label>
        <SubLabel>
          <>
            For security reasons, you must provide a secret key to update this
            script. This should be the value of the{" "}
            <i>CUSTOM_ANALYTICS_SECRET_KEY</i> environment variable set when
            initially setting up Onyx.
          </>
        </SubLabel>
        <input
          className={`
            border
            border-border
            rounded
            w-full
            py-2
            px-3
            mt-1`}
          type="password"
          value={secretKey}
          onChange={(e) => setSecretKey(e.target.value)}
        />
        <Spacer rem={1} />
        <Button type="submit">Update</Button>
      </form>
    </div>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/custom-analytics/page.tsx
================================================
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { CUSTOM_ANALYTICS_ENABLED } from "@/lib/constants";
import { Callout } from "@/components/ui/callout";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { Text } from "@opal/components";
import Spacer from "@/refresh-components/Spacer";
import { CustomAnalyticsUpdateForm } from "./CustomAnalyticsUpdateForm";

const route = ADMIN_ROUTES.CUSTOM_ANALYTICS;

function Main() {
  if (!CUSTOM_ANALYTICS_ENABLED) {
    return (
      <div>
        <div className="mt-4">
          <Callout type="danger" title="Custom Analytics is not enabled.">
            To set up custom analytics scripts, please work with the team who
            setup Onyx in your team to set the{" "}
            <i>CUSTOM_ANALYTICS_SECRET_KEY</i> environment variable.
          </Callout>
        </div>
      </div>
    );
  }

  return (
    <div>
      <Text as="p">
        {
          "This allows you to bring your own analytics tool to Onyx! Copy the Web snippet from your analytics provider into the box below, and we'll start sending usage events."
        }
      </Text>
      <Spacer rem={2} />

      <CustomAnalyticsUpdateForm />
    </div>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/lib.ts
================================================
import { errorHandlingFetcher } from "@/lib/fetcher";
import useSWR, { mutate } from "swr";
import { OnyxBotAnalytics, QueryAnalytics, UserAnalytics } from "./usage/types";
import { useState } from "react";
import { buildApiPath } from "@/lib/urlBuilder";

import {
  convertDateToEndOfDay,
  convertDateToStartOfDay,
  getXDaysAgo,
} from "../../../../components/dateRangeSelectors/dateUtils";
import { THIRTY_DAYS } from "../../../../components/dateRangeSelectors/AdminDateRangeSelector";
import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";

export const useTimeRange = () => {
  return useState<DateRangePickerValue>({
    to: new Date(),
    from: getXDaysAgo(30),
    selectValue: THIRTY_DAYS,
  });
};

export const useQueryAnalytics = (timeRange: DateRangePickerValue) => {
  const url = buildApiPath("/api/analytics/admin/query", {
    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),
    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),
  });
  const swrResponse = useSWR<QueryAnalytics[]>(url, errorHandlingFetcher);

  return {
    ...swrResponse,
    refreshQueryAnalytics: () => mutate(url),
  };
};

export const useUserAnalytics = (timeRange: DateRangePickerValue) => {
  const url = buildApiPath("/api/analytics/admin/user", {
    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),
    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),
  });
  const swrResponse = useSWR<UserAnalytics[]>(url, errorHandlingFetcher);

  return {
    ...swrResponse,
    refreshUserAnalytics: () => mutate(url),
  };
};

export const useOnyxBotAnalytics = (timeRange: DateRangePickerValue) => {
  const url = buildApiPath("/api/analytics/admin/onyxbot", {
    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),
    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),
  });
  const swrResponse = useSWR<OnyxBotAnalytics[]>(url, errorHandlingFetcher); // TODO

  return {
    ...swrResponse,
    refreshOnyxBotAnalytics: () => mutate(url),
  };
};

export function getDatesList(startDate: Date): string[] {
  const datesList: string[] = [];
  const endDate = new Date(); // current date

  for (let d = new Date(startDate); d <= endDate; d.setDate(d.getDate() + 1)) {
    const dateStr = d.toISOString().split("T")[0]; // convert date object to 'YYYY-MM-DD' format
    if (dateStr !== undefined) {
      datesList.push(dateStr);
    }
  }

  return datesList;
}

export interface PersonaMessageAnalytics {
  total_messages: number;
  date: string;
  persona_id: number;
}

export interface PersonaSnapshot {
  id: number;
  name: string;
  description: string;
  is_listed: boolean;
  is_public: boolean;
}

export const usePersonaMessages = (
  personaId: number | undefined,
  timeRange: DateRangePickerValue
) => {
  const url = buildApiPath(`/api/analytics/admin/persona/messages`, {
    persona_id: personaId?.toString(),
    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),
    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),
  });

  const { data, error, isLoading } = useSWR<PersonaMessageAnalytics[]>(
    personaId !== undefined ? url : null,
    errorHandlingFetcher
  );

  return {
    data,
    error,
    isLoading,
    refreshPersonaMessages: () => mutate(url),
  };
};

export interface PersonaUniqueUserAnalytics {
  unique_users: number;
  date: string;
  persona_id: number;
}

export const usePersonaUniqueUsers = (
  personaId: number | undefined,
  timeRange: DateRangePickerValue
) => {
  const url = buildApiPath(`/api/analytics/admin/persona/unique-users`, {
    persona_id: personaId?.toString(),
    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),
    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),
  });

  const { data, error, isLoading } = useSWR<PersonaUniqueUserAnalytics[]>(
    personaId !== undefined ? url : null,
    errorHandlingFetcher
  );

  return {
    data,
    error,
    isLoading,
    refreshPersonaUniqueUsers: () => mutate(url),
  };
};


================================================
FILE: web/src/app/ee/admin/performance/query-history/FeedbackBadge.tsx
================================================
import { Badge } from "@/components/ui/badge";
import { Feedback } from "@/lib/types";

export function FeedbackBadge({
  feedback,
}: {
  feedback?: Feedback | "mixed" | null;
}) {
  let feedbackBadge;
  switch (feedback) {
    case "like":
      feedbackBadge = (
        <Badge variant="success" className="text-sm">
          Like
        </Badge>
      );
      break;
    case "dislike":
      feedbackBadge = (
        <Badge variant="destructive" className="text-sm">
          Dislike
        </Badge>
      );
      break;
    case "mixed":
      feedbackBadge = (
        <Badge variant="purple" className="text-sm">
          Mixed
        </Badge>
      );
      break;
    default:
      feedbackBadge = (
        <Badge variant="outline" className="text-sm">
          N/A
        </Badge>
      );
      break;
  }
  return feedbackBadge;
}


================================================
FILE: web/src/app/ee/admin/performance/query-history/KickoffCSVExport.tsx
================================================
import { toast } from "@/hooks/useToast";
import Button from "@/refresh-components/buttons/Button";
import { useRef, useState } from "react";
import { DateRange } from "../../../../../components/dateRangeSelectors/AdminDateRangeSelector";
import { withRequestId, withDateRange } from "./utils";
import {
  CHECK_QUERY_HISTORY_EXPORT_STATUS_URL,
  DOWNLOAD_QUERY_HISTORY_URL,
  MAX_RETRIES,
  PREVIOUS_CSV_TASK_BUTTON_NAME,
  RETRY_COOLDOWN_MILLISECONDS,
} from "./constants";
import {
  CheckQueryHistoryExportStatusResponse,
  SpinnerStatus,
  StartQueryHistoryExportResponse,
} from "./types";
import { cn } from "@/lib/utils";
import { SvgLoader, SvgPlayCircle } from "@opal/icons";
export default function KickoffCSVExport({
  dateRange,
}: {
  dateRange: DateRange;
}) {
  const timerIdRef = useRef<null | number>(null);
  const retryCount = useRef<number>(0);
  const [, rerender] = useState<void>();
  const [spinnerStatus, setSpinnerStatus] = useState<SpinnerStatus>("static");

  const reset = (failure: boolean = false) => {
    setSpinnerStatus("static");
    if (timerIdRef.current) {
      clearInterval(timerIdRef.current);
      timerIdRef.current = null;
    }
    retryCount.current = 0;

    if (failure) {
      toast.error("Failed to download the query-history.");
    }

    rerender();
  };

  const startExport = async () => {
    // If the button is pressed again while we're spinning, then we reset and cancel the request.
    if (spinnerStatus === "spinning") {
      reset();
      return;
    }

    setSpinnerStatus("spinning");
    toast.info(
      `Generating CSV report. Click the '${PREVIOUS_CSV_TASK_BUTTON_NAME}' button to see all jobs.`
    );
    const response = await fetch(withDateRange(dateRange), {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
    });

    if (!response.ok) {
      reset(true);
      return;
    }

    const { request_id } =
      (await response.json()) as StartQueryHistoryExportResponse;
    const timer = setInterval(
      () => checkStatus(request_id),
      RETRY_COOLDOWN_MILLISECONDS
    ) as unknown as number;
    timerIdRef.current = timer;
    rerender();
  };

  const checkStatus = async (requestId: string) => {
    if (retryCount.current >= MAX_RETRIES) {
      reset();
      return;
    }
    retryCount.current += 1;
    rerender();

    const response = await fetch(
      withRequestId(CHECK_QUERY_HISTORY_EXPORT_STATUS_URL, requestId),
      {
        method: "GET",
      }
    );

    if (!response.ok) {
      reset(true);
      return;
    }

    const { status } =
      (await response.json()) as CheckQueryHistoryExportStatusResponse;

    if (status === "SUCCESS") {
      reset();
      window.location.href = withRequestId(
        DOWNLOAD_QUERY_HISTORY_URL,
        requestId
      );
    } else if (status === "FAILURE") {
      reset(true);
    }
  };

  return (
    <div className="flex flex-1 flex-col w-full justify-center">
      {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
      <Button
        className="ml-auto"
        onClick={startExport}
        danger={spinnerStatus === "spinning"}
        leftIcon={
          spinnerStatus === "spinning"
            ? ({ className }) => (
                <SvgLoader className={cn(className, "animate-spin")} />
              )
            : SvgPlayCircle
        }
      >
        {spinnerStatus === "spinning" ? "Cancel" : "Kickoff Export"}
      </Button>
    </div>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx
================================================
import Separator from "@/refresh-components/Separator";
import {
  Table,
  TableHead,
  TableRow,
  TableBody,
  TableCell,
  TableHeader,
} from "@/components/ui/table";
import Text from "@/refresh-components/texts/Text";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import { ThreeDotsLoader } from "@/components/Loading";
import { ChatSessionMinimal } from "@/app/ee/admin/performance/usage/types";
import { Section } from "@/layouts/general-layouts";
import { timestampToReadableDate } from "@/lib/dateUtils";
import { Dispatch, SetStateAction, useCallback, useState } from "react";
import { Feedback, TaskStatus } from "@/lib/types";
import {
  DateRange,
  AdminDateRangeSelector,
} from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { PageSelector } from "@/components/PageSelector";
import Link from "next/link";
import type { Route } from "next";
import { FeedbackBadge } from "@/app/ee/admin/performance/query-history/FeedbackBadge";
import KickoffCSVExport from "@/app/ee/admin/performance/query-history/KickoffCSVExport";
import CardSection from "@/components/admin/CardSection";
import usePaginatedFetch from "@/hooks/usePaginatedFetch";
import { ErrorCallout } from "@/components/ErrorCallout";
import { errorHandlingFetcher } from "@/lib/fetcher";
import useSWR from "swr";
import { TaskQueueState } from "@/app/ee/admin/performance/query-history/types";
import { withRequestId } from "@/app/ee/admin/performance/query-history/utils";
import {
  DOWNLOAD_QUERY_HISTORY_URL,
  LIST_QUERY_HISTORY_URL,
  NUM_IN_PAGE,
  ITEMS_PER_PAGE,
  PAGES_PER_BATCH,
  PREVIOUS_CSV_TASK_BUTTON_NAME,
} from "@/app/ee/admin/performance/query-history/constants";
import { humanReadableFormatWithTime } from "@/lib/time";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import { Badge } from "@/components/ui/badge";
import {
  SvgDownloadCloud,
  SvgFileText,
  SvgMinus,
  SvgMinusCircle,
  SvgThumbsDown,
  SvgThumbsUp,
} from "@opal/icons";
function QueryHistoryTableRow({
  chatSessionMinimal,
}: {
  chatSessionMinimal: ChatSessionMinimal;
}) {
  return (
    <TableRow
      key={chatSessionMinimal.id}
      className="hover:bg-accent-background cursor-pointer relative select-none"
    >
      <TableCell>
        <Text className="whitespace-normal line-clamp-5">
          {chatSessionMinimal.first_user_message ||
            chatSessionMinimal.name ||
            "-"}
        </Text>
      </TableCell>
      <TableCell>
        <Text className="whitespace-normal line-clamp-5">
          {chatSessionMinimal.first_ai_message || "-"}
        </Text>
      </TableCell>
      <TableCell>
        <FeedbackBadge feedback={chatSessionMinimal.feedback_type} />
      </TableCell>
      <TableCell>{chatSessionMinimal.user_email || "-"}</TableCell>
      <TableCell>{chatSessionMinimal.assistant_name || "Unknown"}</TableCell>
      <TableCell>
        {timestampToReadableDate(chatSessionMinimal.time_created)}
      </TableCell>
      {/* Wrapping in <td> to avoid console warnings */}
      <td className="w-0 p-0">
        <Link
          href={
            `/ee/admin/performance/query-history/${chatSessionMinimal.id}` as Route
          }
          className="absolute w-full h-full left-0 top-0"
        ></Link>
      </td>
    </TableRow>
  );
}

function SelectFeedbackType({
  value,
  onValueChange,
}: {
  value: Feedback | "all";
  onValueChange: (value: Feedback | "all") => void;
}) {
  return (
    <Section alignItems="start" gap={0.25}>
      <Text as="p" className="font-medium">
        Feedback Type
      </Text>
      <InputSelect
        value={value}
        onValueChange={onValueChange as (value: string) => void}
      >
        <InputSelect.Trigger />

        <InputSelect.Content>
          <InputSelect.Item value="all" icon={SvgMinusCircle}>
            Any
          </InputSelect.Item>
          <InputSelect.Item value="like" icon={SvgThumbsUp}>
            Like
          </InputSelect.Item>
          <InputSelect.Item value="dislike" icon={SvgThumbsDown}>
            Dislike
          </InputSelect.Item>
          <InputSelect.Item value="mixed" icon={SvgMinus}>
            Mixed
          </InputSelect.Item>
        </InputSelect.Content>
      </InputSelect>
    </Section>
  );
}

function ExportBadge({ status }: { status: TaskStatus }) {
  if (status === "SUCCESS") return <Badge variant="success">Success</Badge>;
  else if (status === "FAILURE")
    return <Badge variant="destructive">Failure</Badge>;
  else if (status === "PENDING" || status === "STARTED")
    return <Badge variant="in_progress">Pending</Badge>;
  else return <></>;
}

function PreviousQueryHistoryExportsModal({
  setShowModal,
}: {
  setShowModal: Dispatch<SetStateAction<boolean>>;
}) {
  const { data: queryHistoryTasks } = useSWR<TaskQueueState[]>(
    LIST_QUERY_HISTORY_URL,
    errorHandlingFetcher,
    {
      refreshInterval: 3000,
    }
  );

  const tasks = (queryHistoryTasks ?? []).map((queryHistory) => ({
    taskId: queryHistory.task_id,
    start: new Date(queryHistory.start),
    end: new Date(queryHistory.end),
    status: queryHistory.status,
    startTime: queryHistory.start_time,
  }));

  // sort based off of "most-recently-exported" CSV file.
  tasks.sort((task_a, task_b) => {
    if (task_a.startTime < task_b.startTime) return 1;
    else if (task_a.startTime > task_b.startTime) return -1;
    else return 0;
  });

  const [taskPage, setTaskPage] = useState(1);
  const totalTaskPages = Math.ceil(tasks.length / NUM_IN_PAGE);
  const paginatedTasks = tasks.slice(
    NUM_IN_PAGE * (taskPage - 1),
    NUM_IN_PAGE * taskPage
  );

  return (
    <Modal open onOpenChange={() => setShowModal(false)}>
      <Modal.Content width="full" height="full">
        <Modal.Header
          icon={SvgFileText}
          title="Previous Query History Exports"
          onClose={() => setShowModal(false)}
        />
        <Modal.Body>
          <Table>
            <TableHeader>
              <TableRow>
                <TableHead>Generated At</TableHead>
                <TableHead>Start Range</TableHead>
                <TableHead>End Range</TableHead>
                <TableHead>Status</TableHead>
                <TableHead>Download</TableHead>
              </TableRow>
            </TableHeader>
            <TableBody>
              {paginatedTasks.map((task, index) => (
                <TableRow key={index}>
                  <TableCell>
                    {humanReadableFormatWithTime(task.startTime)}
                  </TableCell>
                  <TableCell>{task.start.toDateString()}</TableCell>
                  <TableCell>{task.end.toDateString()}</TableCell>
                  <TableCell>
                    <ExportBadge status={task.status} />
                  </TableCell>
                  <TableCell>
                    <Button
                      variant="default"
                      prominence="tertiary"
                      icon={SvgDownloadCloud}
                      size="sm"
                      disabled={task.status !== "SUCCESS"}
                      tooltip={
                        task.status !== "SUCCESS"
                          ? "Export is not yet ready"
                          : undefined
                      }
                      href={
                        task.status === "SUCCESS"
                          ? withRequestId(
                              DOWNLOAD_QUERY_HISTORY_URL,
                              task.taskId
                            )
                          : undefined
                      }
                    />
                  </TableCell>
                </TableRow>
              ))}
            </TableBody>
          </Table>

          <Section>
            <PageSelector
              currentPage={taskPage}
              totalPages={totalTaskPages}
              onPageChange={setTaskPage}
            />
          </Section>
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}

export function QueryHistoryTable() {
  const [dateRange, setDateRange] = useState<DateRange>(undefined);
  const [filters, setFilters] = useState<{
    feedback_type?: Feedback | "all";
    start_time?: string;
    end_time?: string;
  }>({});

  const [showModal, setShowModal] = useState(false);

  const {
    currentPageData: chatSessionData,
    isLoading,
    error,
    currentPage,
    totalPages,
    goToPage,
  } = usePaginatedFetch<ChatSessionMinimal>({
    itemsPerPage: ITEMS_PER_PAGE,
    pagesPerBatch: PAGES_PER_BATCH,
    endpoint: "/api/admin/chat-session-history",
    filter: filters,
  });

  const onTimeRangeChange = useCallback((value: DateRange) => {
    setDateRange(value);

    if (value?.from && value?.to) {
      setFilters((prev) => ({
        ...prev,
        start_time: value.from.toISOString(),
        end_time: value.to.toISOString(),
      }));
    } else {
      setFilters((prev) => {
        const newFilters = { ...prev };
        delete newFilters.start_time;
        delete newFilters.end_time;
        return newFilters;
      });
    }
  }, []);

  if (error) {
    return (
      <ErrorCallout
        errorTitle="Error fetching query history"
        errorMsg={error?.message}
      />
    );
  }

  return (
    <>
      <CardSection className="mt-8">
        <div className="flex">
          <div className="gap-y-3 flex flex-col">
            <SelectFeedbackType
              value={filters.feedback_type || "all"}
              onValueChange={(value) => {
                setFilters((prev) => {
                  const newFilters = { ...prev };
                  if (value === "all") {
                    delete newFilters.feedback_type;
                  } else {
                    newFilters.feedback_type = value;
                  }
                  return newFilters;
                });
              }}
            />

            <AdminDateRangeSelector
              value={dateRange}
              onValueChange={onTimeRangeChange}
            />
          </div>
          <div className="flex flex-row w-full items-center gap-x-2">
            <KickoffCSVExport dateRange={dateRange} />
            <Button prominence="secondary" onClick={() => setShowModal(true)}>
              {PREVIOUS_CSV_TASK_BUTTON_NAME}
            </Button>
          </div>
        </div>
        <Separator />
        <Section>
          <Table className="mt-5">
            <TableHeader>
              <TableRow>
                <TableHead>First User Message</TableHead>
                <TableHead>First AI Response</TableHead>
                <TableHead>Feedback</TableHead>
                <TableHead>User</TableHead>
                <TableHead>Persona</TableHead>
                <TableHead>Date</TableHead>
              </TableRow>
            </TableHeader>
            {isLoading ? (
              <TableBody>
                <TableRow>
                  <TableCell colSpan={6} className="text-center">
                    <ThreeDotsLoader />
                  </TableCell>
                </TableRow>
              </TableBody>
            ) : (
              <TableBody>
                {chatSessionData?.map((chatSessionMinimal) => (
                  <QueryHistoryTableRow
                    key={chatSessionMinimal.id}
                    chatSessionMinimal={chatSessionMinimal}
                  />
                ))}
              </TableBody>
            )}
          </Table>

          {chatSessionData && (
            <Section>
              <PageSelector
                totalPages={totalPages}
                currentPage={currentPage}
                onPageChange={goToPage}
              />
            </Section>
          )}
        </Section>
      </CardSection>

      {showModal && (
        <PreviousQueryHistoryExportsModal setShowModal={setShowModal} />
      )}
    </>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/query-history/[id]/page.tsx
================================================
"use client";
import { use } from "react";

import { Text } from "@opal/components";
import Title from "@/components/ui/title";
import Separator from "@/refresh-components/Separator";
import Spacer from "@/refresh-components/Spacer";
import { ChatSessionSnapshot, MessageSnapshot } from "../../usage/types";
import { FiBook } from "react-icons/fi";
import { timestampToReadableDate } from "@/lib/dateUtils";
import BackButton from "@/refresh-components/buttons/BackButton";
import { FeedbackBadge } from "../FeedbackBadge";
import { errorHandlingFetcher } from "@/lib/fetcher";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { ErrorCallout } from "@/components/ErrorCallout";
import { ThreeDotsLoader } from "@/components/Loading";
import CardSection from "@/components/admin/CardSection";

function MessageDisplay({ message }: { message: MessageSnapshot }) {
  return (
    <div>
      <p className="text-xs font-bold mb-1">
        {message.message_type === "user" ? "User" : "AI"}
      </p>
      <Text as="p">{message.message}</Text>
      {message.documents.length > 0 && (
        <div className="flex flex-col gap-y-2 mt-2">
          <p className="font-bold text-xs">Reference Documents</p>
          {message.documents.slice(0, 5).map((document) => {
            return (
              <div className="text-sm flex" key={document.document_id}>
                <FiBook
                  className={
                    "my-auto mr-1" + (document.link ? " text-link" : " ")
                  }
                />
                {document.link ? (
                  <a
                    href={document.link}
                    target="_blank"
                    className="text-link"
                    rel="noreferrer"
                  >
                    {document.semantic_identifier}
                  </a>
                ) : (
                  document.semantic_identifier
                )}
              </div>
            );
          })}
        </div>
      )}
      {message.feedback_type && (
        <div className="mt-2">
          <p className="font-bold text-xs">Feedback</p>
          {message.feedback_text && <Text as="p">{message.feedback_text}</Text>}
          <div className="mt-1">
            <FeedbackBadge feedback={message.feedback_type} />
          </div>
        </div>
      )}
      <Separator />
    </div>
  );
}

export default function QueryPage(props: { params: Promise<{ id: string }> }) {
  const params = use(props.params);
  const {
    data: chatSessionSnapshot,
    isLoading,
    error,
  } = useSWR<ChatSessionSnapshot>(
    SWR_KEYS.adminChatSession(params.id),
    errorHandlingFetcher
  );

  if (isLoading) {
    return <ThreeDotsLoader />;
  }

  if (!chatSessionSnapshot || error) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch chat session - ${error}`}
      />
    );
  }

  return (
    <main className="pt-4 mx-auto container">
      <BackButton />

      <CardSection className="mt-4">
        <Title>Chat Session Details</Title>

        <Spacer rem={0.25} />
        {chatSessionSnapshot.assistant_name && (
          <Text as="p">{chatSessionSnapshot.assistant_name}</Text>
        )}
        <Spacer rem={0.25} />
        <Text as="p">
          {`${
            chatSessionSnapshot.user_email
              ? `${chatSessionSnapshot.user_email}, `
              : ""
          }${timestampToReadableDate(chatSessionSnapshot.time_created)}, ${
            chatSessionSnapshot.flow_type
          }`}
        </Text>

        <Separator />

        <div className="flex flex-col">
          {chatSessionSnapshot.messages.map((message) => {
            return <MessageDisplay key={message.id} message={message} />;
          })}
        </div>
      </CardSection>
    </main>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/query-history/constants.ts
================================================
export const LIST_QUERY_HISTORY_URL = "/api/admin/query-history/list";
export const START_QUERY_HISTORY_EXPORT_URL =
  "/api/admin/query-history/start-export";
export const CHECK_QUERY_HISTORY_EXPORT_STATUS_URL =
  "/api/admin/query-history/export-status";
export const DOWNLOAD_QUERY_HISTORY_URL = "/api/admin/query-history/download";
export const MAX_RETRIES = 10;
export const RETRY_COOLDOWN_MILLISECONDS = 200;

export const ITEMS_PER_PAGE = 20;
export const PAGES_PER_BATCH = 2;
export const NUM_IN_PAGE = 10;

export const PREVIOUS_CSV_TASK_BUTTON_NAME = "View Exports";


================================================
FILE: web/src/app/ee/admin/performance/query-history/page.tsx
================================================
"use client";

import * as SettingsLayouts from "@/layouts/settings-layouts";
import { QueryHistoryTable } from "@/app/ee/admin/performance/query-history/QueryHistoryTable";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.QUERY_HISTORY;

export default function QueryHistoryPage() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />

      <SettingsLayouts.Body>
        <QueryHistoryTable />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/query-history/types.ts
================================================
import { TaskStatus } from "@/lib/types";

export interface TaskQueueState {
  task_id: string;
  start: string;
  end: string;
  status: TaskStatus;
  start_time: string;
}

export type StartQueryHistoryExportResponse = { request_id: string };

export type CheckQueryHistoryExportStatusResponse = {
  status: TaskStatus;
};

// The status of the spinner.
// If it's "static", then no spinning animation should be shown.
// Otherwise, the spinning animation should be shown.
export type SpinnerStatus = "static" | "spinning";


================================================
FILE: web/src/app/ee/admin/performance/query-history/utils.ts
================================================
import { DateRange } from "../../../../../components/dateRangeSelectors/AdminDateRangeSelector";
import { START_QUERY_HISTORY_EXPORT_URL } from "./constants";

export const withRequestId = (url: string, requestId: string): string =>
  `${url}?request_id=${requestId}`;

export const withDateRange = (dateRange: DateRange): string => {
  if (!dateRange) {
    return START_QUERY_HISTORY_EXPORT_URL;
  }

  const { from, to } = dateRange;

  const fromString = from.toISOString();
  const toString = to.toISOString();

  return `${START_QUERY_HISTORY_EXPORT_URL}?start=${fromString}&end=${toString}`;
};


================================================
FILE: web/src/app/ee/admin/performance/usage/FeedbackChart.tsx
================================================
import { ThreeDotsLoader } from "@/components/Loading";
import { getDatesList, useQueryAnalytics } from "../lib";
import { Text } from "@opal/components";
import Title from "@/components/ui/title";

import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import CardSection from "@/components/admin/CardSection";
import { AreaChartDisplay } from "@/components/ui/areaChart";

export function FeedbackChart({
  timeRange,
}: {
  timeRange: DateRangePickerValue;
}) {
  const {
    data: queryAnalyticsData,
    isLoading: isQueryAnalyticsLoading,
    error: queryAnalyticsError,
  } = useQueryAnalytics(timeRange);

  let chart;
  if (isQueryAnalyticsLoading) {
    chart = (
      <div className="h-80 flex flex-col">
        <ThreeDotsLoader />
      </div>
    );
  } else if (
    !queryAnalyticsData ||
    queryAnalyticsData[0] === undefined ||
    queryAnalyticsError
  ) {
    chart = (
      <div className="h-80 text-red-600 text-bold flex flex-col">
        <p className="m-auto">Failed to fetch feedback data...</p>
      </div>
    );
  } else {
    const initialDate = timeRange.from || new Date(queryAnalyticsData[0].date);
    const dateRange = getDatesList(initialDate);

    const dateToQueryAnalytics = new Map(
      queryAnalyticsData.map((queryAnalyticsEntry) => [
        queryAnalyticsEntry.date,
        queryAnalyticsEntry,
      ])
    );

    chart = (
      <AreaChartDisplay
        className="mt-4"
        data={dateRange.map((dateStr) => {
          const queryAnalyticsForDate = dateToQueryAnalytics.get(dateStr);
          return {
            Day: dateStr,
            "Positive Feedback": queryAnalyticsForDate?.total_likes || 0,
            "Negative Feedback": queryAnalyticsForDate?.total_dislikes || 0,
          };
        })}
        categories={["Positive Feedback", "Negative Feedback"]}
        index="Day"
        colors={["indigo", "fuchsia"]}
        yAxisWidth={60}
      />
    );
  }

  return (
    <CardSection className="mt-8">
      <Title>Feedback</Title>
      <Text as="p">Thumbs Up / Thumbs Down over time</Text>
      {chart}
    </CardSection>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/usage/OnyxBotChart.tsx
================================================
import { ThreeDotsLoader } from "@/components/Loading";
import { getDatesList, useOnyxBotAnalytics } from "../lib";
import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { Text } from "@opal/components";
import Title from "@/components/ui/title";
import CardSection from "@/components/admin/CardSection";
import { AreaChartDisplay } from "@/components/ui/areaChart";

export function OnyxBotChart({
  timeRange,
}: {
  timeRange: DateRangePickerValue;
}) {
  const {
    data: onyxBotAnalyticsData,
    isLoading: isOnyxBotAnalyticsLoading,
    error: onyxBotAnalyticsError,
  } = useOnyxBotAnalytics(timeRange);

  let chart;
  if (isOnyxBotAnalyticsLoading) {
    chart = (
      <div className="h-80 flex flex-col">
        <ThreeDotsLoader />
      </div>
    );
  } else if (
    !onyxBotAnalyticsData ||
    onyxBotAnalyticsData[0] == undefined ||
    onyxBotAnalyticsError
  ) {
    chart = (
      <div className="h-80 text-red-600 text-bold flex flex-col">
        <p className="m-auto">Failed to fetch feedback data...</p>
      </div>
    );
  } else {
    const initialDate =
      timeRange.from || new Date(onyxBotAnalyticsData[0].date);
    const dateRange = getDatesList(initialDate);

    const dateToOnyxBotAnalytics = new Map(
      onyxBotAnalyticsData.map((onyxBotAnalyticsEntry) => [
        onyxBotAnalyticsEntry.date,
        onyxBotAnalyticsEntry,
      ])
    );

    chart = (
      <AreaChartDisplay
        className="mt-4"
        data={dateRange.map((dateStr) => {
          const onyxBotAnalyticsForDate = dateToOnyxBotAnalytics.get(dateStr);
          return {
            Day: dateStr,
            "Total Queries": onyxBotAnalyticsForDate?.total_queries || 0,
            "Automatically Resolved":
              onyxBotAnalyticsForDate?.auto_resolved || 0,
          };
        })}
        categories={["Total Queries", "Automatically Resolved"]}
        index="Day"
        colors={["indigo", "fuchsia"]}
        yAxisWidth={60}
      />
    );
  }

  return (
    <CardSection className="mt-8">
      <Title>Slack Channel</Title>
      <Text as="p">Total Queries vs Auto Resolved</Text>
      {chart}
    </CardSection>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/usage/PersonaMessagesChart.tsx
================================================
import { ThreeDotsLoader } from "@/components/Loading";
import { X, Search } from "lucide-react";
import {
  getDatesList,
  usePersonaMessages,
  usePersonaUniqueUsers,
} from "../lib";
import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { Text } from "@opal/components";
import Title from "@/components/ui/title";
import CardSection from "@/components/admin/CardSection";
import { AreaChartDisplay } from "@/components/ui/areaChart";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { useState, useMemo, useEffect } from "react";
import { Persona } from "@/app/admin/agents/interfaces";

export function PersonaMessagesChart({
  availablePersonas,
  timeRange,
}: {
  availablePersonas: Persona[];
  timeRange: DateRangePickerValue;
}) {
  const [selectedPersonaId, setSelectedPersonaId] = useState<
    number | undefined
  >(undefined);
  const [searchQuery, setSearchQuery] = useState("");
  const [highlightedIndex, setHighlightedIndex] = useState(-1);

  const {
    data: personaMessagesData,
    isLoading: isPersonaMessagesLoading,
    error: personaMessagesError,
  } = usePersonaMessages(selectedPersonaId, timeRange);

  const {
    data: personaUniqueUsersData,
    isLoading: isPersonaUniqueUsersLoading,
    error: personaUniqueUsersError,
  } = usePersonaUniqueUsers(selectedPersonaId, timeRange);

  const isLoading = isPersonaMessagesLoading || isPersonaUniqueUsersLoading;
  const hasError = personaMessagesError || personaUniqueUsersError;

  const filteredPersonaList = useMemo(() => {
    if (!availablePersonas) return [];
    return availablePersonas.filter((persona) =>
      persona.name.toLowerCase().includes(searchQuery.toLowerCase())
    );
  }, [availablePersonas, searchQuery]);

  const handleKeyDown = (e: React.KeyboardEvent) => {
    e.stopPropagation();

    switch (e.key) {
      case "ArrowDown":
        e.preventDefault();
        setHighlightedIndex((prev) =>
          prev < filteredPersonaList.length - 1 ? prev + 1 : prev
        );
        break;
      case "ArrowUp":
        e.preventDefault();
        setHighlightedIndex((prev) => (prev > 0 ? prev - 1 : prev));
        break;
      case "Enter":
        if (
          highlightedIndex >= 0 &&
          highlightedIndex < filteredPersonaList.length
        ) {
          const filteredPersona = filteredPersonaList[highlightedIndex];
          if (filteredPersona !== undefined) {
            setSelectedPersonaId(filteredPersona.id);
            setSearchQuery("");
            setHighlightedIndex(-1);
          }
        }
        break;
      case "Escape":
        setSearchQuery("");
        setHighlightedIndex(-1);
        break;
    }
  };

  // Reset highlight when search query changes
  useEffect(() => {
    setHighlightedIndex(-1);
  }, [searchQuery]);

  const chartData = useMemo(() => {
    if (
      !personaMessagesData?.length ||
      !personaUniqueUsersData?.length ||
      selectedPersonaId === undefined
    ) {
      return null;
    }

    const initialDate =
      timeRange.from ||
      new Date(
        Math.min(
          ...personaMessagesData.map((entry) => new Date(entry.date).getTime())
        )
      );
    const dateRange = getDatesList(initialDate);

    // Create maps for messages and unique users data
    const messagesMap = new Map(
      personaMessagesData.map((entry) => [entry.date, entry])
    );
    const uniqueUsersMap = new Map(
      personaUniqueUsersData.map((entry) => [entry.date, entry])
    );

    return dateRange.map((dateStr) => {
      const messageData = messagesMap.get(dateStr);
      const uniqueUserData = uniqueUsersMap.get(dateStr);
      return {
        Day: dateStr,
        Messages: messageData?.total_messages || 0,
        "Unique Users": uniqueUserData?.unique_users || 0,
      };
    });
  }, [
    personaMessagesData,
    personaUniqueUsersData,
    timeRange.from,
    selectedPersonaId,
  ]);

  let content;
  if (isLoading) {
    content = (
      <div className="h-80 flex flex-col">
        <ThreeDotsLoader />
      </div>
    );
  } else if (!availablePersonas || hasError) {
    content = (
      <div className="h-80 text-red-600 text-bold flex flex-col">
        <p className="m-auto">Failed to fetch data...</p>
      </div>
    );
  } else if (selectedPersonaId === undefined) {
    content = (
      <div className="h-80 text-text-500 flex flex-col">
        <p className="m-auto">Select an agent to view analytics</p>
      </div>
    );
  } else if (!personaMessagesData?.length) {
    content = (
      <div className="h-80 text-text-500 flex flex-col">
        <p className="m-auto">
          No data found for selected agent in the specified time range
        </p>
      </div>
    );
  } else if (chartData) {
    content = (
      <AreaChartDisplay
        className="mt-4"
        data={chartData}
        categories={["Messages", "Unique Users"]}
        index="Day"
        colors={["indigo", "fuchsia"]}
        yAxisWidth={60}
      />
    );
  }

  return (
    <CardSection className="mt-8">
      <Title>Agent Analytics</Title>
      <div className="flex flex-col gap-4">
        <Text as="p">
          Messages and unique users per day for the selected agent
        </Text>
        <div className="flex items-center gap-4">
          <Select
            value={selectedPersonaId?.toString() ?? ""}
            onValueChange={(value) => {
              setSelectedPersonaId(parseInt(value));
            }}
          >
            <SelectTrigger className="flex w-full max-w-xs">
              <SelectValue placeholder="Select an agent to display" />
            </SelectTrigger>
            <SelectContent>
              <div className="flex items-center px-2 pb-2 sticky top-0 bg-background border-b">
                <Search className="h-4 w-4 mr-2 shrink-0 opacity-50" />
                <input
                  className="flex h-8 w-full rounded-sm bg-transparent py-3 text-sm outline-none placeholder:text-muted-foreground disabled:cursor-not-allowed disabled:opacity-50"
                  placeholder="Search agents..."
                  value={searchQuery}
                  onChange={(e) => setSearchQuery(e.target.value)}
                  onClick={(e) => e.stopPropagation()}
                  onMouseDown={(e) => e.stopPropagation()}
                  onKeyDown={handleKeyDown}
                />
                {searchQuery && (
                  <X
                    className="h-4 w-4 shrink-0 opacity-50 cursor-pointer hover:opacity-100"
                    onClick={() => {
                      setSearchQuery("");
                      setHighlightedIndex(-1);
                    }}
                  />
                )}
              </div>
              {filteredPersonaList.map((persona, index) => (
                <SelectItem
                  key={persona.id}
                  value={persona.id.toString()}
                  className={`${highlightedIndex === index ? "hover" : ""}`}
                  onMouseEnter={() => setHighlightedIndex(index)}
                >
                  {persona.name}
                </SelectItem>
              ))}
            </SelectContent>
          </Select>
        </div>
      </div>
      {content}
    </CardSection>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/usage/QueryPerformanceChart.tsx
================================================
"use client";

import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { getDatesList, useQueryAnalytics, useUserAnalytics } from "../lib";
import { ThreeDotsLoader } from "@/components/Loading";
import { AreaChartDisplay } from "@/components/ui/areaChart";
import Title from "@/components/ui/title";
import { Text } from "@opal/components";
import CardSection from "@/components/admin/CardSection";

export function QueryPerformanceChart({
  timeRange,
}: {
  timeRange: DateRangePickerValue;
}) {
  const {
    data: queryAnalyticsData,
    isLoading: isQueryAnalyticsLoading,
    error: queryAnalyticsError,
  } = useQueryAnalytics(timeRange);
  const {
    data: userAnalyticsData,
    isLoading: isUserAnalyticsLoading,
    error: userAnalyticsError,
  } = useUserAnalytics(timeRange);

  let chart;
  if (isQueryAnalyticsLoading || isUserAnalyticsLoading) {
    chart = (
      <div className="h-80 flex flex-col">
        <ThreeDotsLoader />
      </div>
    );
  } else if (
    !queryAnalyticsData ||
    queryAnalyticsData[0] === undefined ||
    !userAnalyticsData ||
    queryAnalyticsError ||
    userAnalyticsError
  ) {
    chart = (
      <div className="h-80 text-red-600 text-bold flex flex-col">
        <p className="m-auto">Failed to fetch query data...</p>
      </div>
    );
  } else {
    const initialDate = timeRange.from || new Date(queryAnalyticsData[0].date);
    const dateRange = getDatesList(initialDate);

    const dateToQueryAnalytics = new Map(
      queryAnalyticsData.map((queryAnalyticsEntry) => [
        queryAnalyticsEntry.date,
        queryAnalyticsEntry,
      ])
    );
    const dateToUserAnalytics = new Map(
      userAnalyticsData.map((userAnalyticsEntry) => [
        userAnalyticsEntry.date,
        userAnalyticsEntry,
      ])
    );

    chart = (
      <AreaChartDisplay
        className="mt-4"
        stacked={false}
        data={dateRange.map((dateStr) => {
          const queryAnalyticsForDate = dateToQueryAnalytics.get(dateStr);
          const userAnalyticsForDate = dateToUserAnalytics.get(dateStr);
          return {
            Day: dateStr,
            Queries: queryAnalyticsForDate?.total_queries || 0,
            "Unique Users": userAnalyticsForDate?.total_active_users || 0,
          };
        })}
        categories={["Queries", "Unique Users"]}
        index="Day"
        colors={["indigo", "fuchsia"]}
        yAxisFormatter={(number: number) =>
          new Intl.NumberFormat("en-US", {
            notation: "standard",
            maximumFractionDigits: 0,
          }).format(number)
        }
        xAxisFormatter={(dateStr: string) => {
          const date = new Date(dateStr);
          return date.toLocaleDateString("en-US", {
            month: "short",
            day: "numeric",
          });
        }}
        yAxisWidth={60}
        allowDecimals={false}
      />
    );
  }

  return (
    <CardSection className="mt-8">
      <Title>Usage</Title>
      <Text as="p">Usage over time</Text>
      {chart}
    </CardSection>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/usage/UsageReports.tsx
================================================
"use client";

import { format } from "date-fns";
import { errorHandlingFetcher } from "@/lib/fetcher";

import { FiDownload } from "react-icons/fi";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import { Text } from "@opal/components";
import Title from "@/components/ui/title";
import Spacer from "@/refresh-components/Spacer";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import React, { useState } from "react";
import { UsageReport } from "./types";
import { ThreeDotsLoader } from "@/components/Loading";
import Link from "next/link";
import { humanReadableFormat, humanReadableFormatWithTime } from "@/lib/time";
import { ErrorCallout } from "@/components/ErrorCallout";
import { PageSelector } from "@/components/PageSelector";
import Separator from "@/refresh-components/Separator";
import { DateRangePickerValue } from "../../../../../components/dateRangeSelectors/AdminDateRangeSelector";
import Popover from "@/refresh-components/Popover";
import Calendar from "@/refresh-components/Calendar";
import { cn } from "@/lib/utils";
import { Spinner } from "@/components/Spinner";
import { SvgCalendar, SvgDownloadCloud } from "@opal/icons";

function GenerateReportInput({
  onReportGenerated,
  isWaitingForReport,
}: {
  onReportGenerated: () => void;
  isWaitingForReport: boolean;
}) {
  const [dateRange, setDateRange] = useState<DateRangePickerValue | undefined>(
    undefined
  );
  const [isLoading, setIsLoading] = useState(false);

  const [errorOccurred, setErrorOccurred] = useState<Error | null>(null);

  const requestReport = async () => {
    setIsLoading(true);
    setErrorOccurred(null);
    try {
      let period_from: string | null = null;
      let period_to: string | null = null;

      if (dateRange?.selectValue != "allTime" && dateRange?.from) {
        period_from = dateRange?.from?.toISOString();
        period_to = dateRange?.to?.toISOString() ?? new Date().toISOString();
      }

      const res = await fetch("/api/admin/usage-report", {
        method: "POST",
        credentials: "include",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          period_from: period_from,
          period_to: period_to,
        }),
      });

      if (!res.ok) {
        throw Error(`Received an error: ${res.statusText}`);
      }

      // Trigger refresh of the reports list
      onReportGenerated();
    } catch (e) {
      setErrorOccurred(e as Error);
    } finally {
      setIsLoading(false);
    }
  };

  const today = new Date();

  const lastWeek = new Date();
  lastWeek.setDate(today.getDate() - 7);

  const lastMonth = new Date();
  lastMonth.setMonth(today.getMonth() - 1);

  const lastYear = new Date();
  lastYear.setFullYear(today.getFullYear() - 1);

  return (
    <div className="mb-8">
      <Title className="mb-2">Generate Usage Reports</Title>
      <Text as="p">Generate usage statistics for users in the workspace.</Text>
      <Spacer rem={2} />
      <div className="grid gap-2 mb-3">
        <Popover>
          <Popover.Trigger asChild>
            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
            <Button
              secondary
              className={cn(
                "w-[300px] justify-start text-left font-normal",
                !dateRange && "text-muted-foreground"
              )}
              leftIcon={SvgCalendar}
            >
              {dateRange?.from ? (
                dateRange.to ? (
                  <>
                    {format(dateRange.from, "LLL dd, y")} -{" "}
                    {format(dateRange.to, "LLL dd, y")}
                  </>
                ) : (
                  format(dateRange.from, "LLL dd, y")
                )
              ) : (
                <span>Pick a date range</span>
              )}
            </Button>
          </Popover.Trigger>
          <Popover.Content align="start">
            <Calendar
              initialFocus
              mode="range"
              defaultMonth={dateRange?.from}
              selected={dateRange}
              onSelect={(range) =>
                range?.from &&
                setDateRange({
                  from: range.from,
                  to: range.to ?? range.from,
                  selectValue: "custom",
                })
              }
              numberOfMonths={2}
              disabled={(date) => date > new Date()}
            />
            <div className="border-t p-3">
              <OpalButton
                prominence="tertiary"
                width="full"
                onClick={() => {
                  setDateRange({
                    from: lastWeek,
                    to: new Date(),
                    selectValue: "lastWeek",
                  });
                }}
              >
                Last 7 days
              </OpalButton>
              <OpalButton
                prominence="tertiary"
                width="full"
                onClick={() => {
                  setDateRange({
                    from: lastMonth,
                    to: new Date(),
                    selectValue: "lastMonth",
                  });
                }}
              >
                Last 30 days
              </OpalButton>
              <OpalButton
                prominence="tertiary"
                width="full"
                onClick={() => {
                  setDateRange({
                    from: lastYear,
                    to: new Date(),
                    selectValue: "lastYear",
                  });
                }}
              >
                Last year
              </OpalButton>
              <OpalButton
                prominence="tertiary"
                width="full"
                onClick={() => {
                  setDateRange({
                    from: new Date(1970, 0, 1),
                    to: new Date(),
                    selectValue: "allTime",
                  });
                }}
              >
                All time
              </OpalButton>
            </div>
          </Popover.Content>
        </Popover>
      </div>
      <OpalButton
        disabled={isLoading || isWaitingForReport}
        color={"blue"}
        icon={SvgDownloadCloud}
        onClick={() => requestReport()}
      >
        {isWaitingForReport ? "Generating..." : "Generate Report"}
      </OpalButton>
      <p className="mt-1 text-xs">
        {isWaitingForReport
          ? "A report is currently being generated. Please wait..."
          : 'Report generation runs in the background. Check the "Previous Reports" section below to download when ready.'}
      </p>
      {errorOccurred && (
        <ErrorCallout
          errorTitle="Something went wrong."
          errorMsg={errorOccurred?.toString()}
        />
      )}
    </div>
  );
}

const USAGE_REPORT_URL = SWR_KEYS.usageReport;

function UsageReportsTable({
  refreshTrigger,
  isWaitingForReport,
  onNewReportDetected,
}: {
  refreshTrigger: number;
  isWaitingForReport: boolean;
  onNewReportDetected: () => void;
}) {
  const [page, setPage] = useState(1);
  const NUM_IN_PAGE = 10;
  const [previousReportCount, setPreviousReportCount] = useState<number | null>(
    null
  );

  const {
    data: usageReportsMetadata,
    error: usageReportsError,
    isLoading: usageReportsIsLoading,
    mutate,
  } = useSWR<UsageReport[]>(USAGE_REPORT_URL, errorHandlingFetcher, {
    refreshInterval: isWaitingForReport ? 3000 : 0, // Poll every 3 seconds when waiting
  });

  // Refresh when refreshTrigger changes
  React.useEffect(() => {
    if (refreshTrigger > 0) {
      mutate();
    }
  }, [refreshTrigger, mutate]);

  // Detect when a new report appears
  React.useEffect(() => {
    if (usageReportsMetadata && previousReportCount !== null) {
      if (usageReportsMetadata.length > previousReportCount) {
        onNewReportDetected();
      }
    }
    if (usageReportsMetadata) {
      setPreviousReportCount(usageReportsMetadata.length);
    }
  }, [usageReportsMetadata, previousReportCount, onNewReportDetected]);

  const paginatedReports = usageReportsMetadata
    ? usageReportsMetadata
        .slice(0)
        .reverse()
        .slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page)
    : [];

  const totalPages = usageReportsMetadata
    ? Math.ceil(usageReportsMetadata.length / NUM_IN_PAGE)
    : 0;

  return (
    <div>
      <Title className="mb-2 mt-6 mx-auto"> Previous Reports </Title>
      {usageReportsIsLoading && !isWaitingForReport ? (
        <div className="flex justify-center w-full">
          <ThreeDotsLoader />
        </div>
      ) : usageReportsError ? (
        <ErrorCallout
          errorTitle="Something went wrong."
          errorMsg={(usageReportsError as Error).toString()}
        />
      ) : (
        <>
          <Table>
            <TableHeader>
              <TableRow>
                <TableHead>Report</TableHead>
                <TableHead>Period</TableHead>
                <TableHead>Generated By</TableHead>
                <TableHead>Time Generated</TableHead>
                <TableHead>Download</TableHead>
              </TableRow>
            </TableHeader>

            <TableBody>
              {paginatedReports.map((r) => (
                <TableRow key={r.report_name}>
                  <TableCell>
                    {r.report_name.split("_")[1]?.substring(0, 8) ||
                      r.report_name.substring(0, 8)}
                  </TableCell>
                  <TableCell>
                    {r.period_from
                      ? `${humanReadableFormat(
                          r.period_from
                        )} - ${humanReadableFormat(r.period_to!)}`
                      : "All time"}
                  </TableCell>
                  <TableCell>{r.requestor ?? "Auto generated"}</TableCell>
                  <TableCell>
                    {humanReadableFormatWithTime(r.time_created)}
                  </TableCell>
                  <TableCell>
                    <Link
                      href={`/api/admin/usage-report/${r.report_name}`}
                      className="flex justify-center"
                    >
                      <FiDownload color="primary" />
                    </Link>
                  </TableCell>
                </TableRow>
              ))}
            </TableBody>
          </Table>
          <div className="mt-3 flex">
            <div className="mx-auto">
              <PageSelector
                totalPages={totalPages}
                currentPage={page}
                onPageChange={(newPage) => {
                  setPage(newPage);
                  window.scrollTo({
                    top: 0,
                    left: 0,
                    behavior: "smooth",
                  });
                }}
              />
            </div>
          </div>
        </>
      )}
    </div>
  );
}

export default function UsageReports() {
  const [refreshTrigger, setRefreshTrigger] = useState(0);
  const [isWaitingForReport, setIsWaitingForReport] = useState(false);
  const [timeoutMessage, setTimeoutMessage] = useState<string | null>(null);
  const timeoutRef = React.useRef<NodeJS.Timeout | null>(null);

  const handleReportGenerated = () => {
    setRefreshTrigger((prev) => prev + 1);
    setIsWaitingForReport(true);
    setTimeoutMessage(null);

    // Clear any existing timeout
    if (timeoutRef.current) {
      clearTimeout(timeoutRef.current);
    }

    // Set a 15 second timeout
    timeoutRef.current = setTimeout(() => {
      setIsWaitingForReport(false);
      setTimeoutMessage(
        "Report generation is taking longer than expected. The report will continue generating in the background. Please check back in a few minutes."
      );
      timeoutRef.current = null;
    }, 15000);
  };

  const handleNewReportDetected = () => {
    setIsWaitingForReport(false);
    setTimeoutMessage(null);
    // Clear the timeout if report completed before timeout
    if (timeoutRef.current) {
      clearTimeout(timeoutRef.current);
      timeoutRef.current = null;
    }
  };

  // Cleanup on unmount
  React.useEffect(() => {
    return () => {
      if (timeoutRef.current) {
        clearTimeout(timeoutRef.current);
      }
    };
  }, []);

  return (
    <>
      {isWaitingForReport && <Spinner />}
      <>
        <GenerateReportInput
          onReportGenerated={handleReportGenerated}
          isWaitingForReport={isWaitingForReport}
        />
        {timeoutMessage && (
          <div className="mb-4 p-4 bg-status-warning-00 border border-status-warning-02 rounded-regular">
            <div className="flex items-start gap-2">
              <div className="text-status-warning-05 mt-0.5">
                <svg
                  className="w-5 h-5"
                  fill="none"
                  stroke="currentColor"
                  viewBox="0 0 24 24"
                >
                  <path
                    strokeLinecap="round"
                    strokeLinejoin="round"
                    strokeWidth={2}
                    d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"
                  />
                </svg>
              </div>
              <div className="flex-1">
                <div className="text-status-warning-05">
                  <Text as="p" font="main-ui-action">
                    Report Generation In Progress
                  </Text>
                </div>
                <Spacer rem={0.25} />
                <div className="text-status-warning-05">
                  <Text as="p">{timeoutMessage}</Text>
                </div>
              </div>
            </div>
          </div>
        )}
        <Separator />
        <UsageReportsTable
          refreshTrigger={refreshTrigger}
          isWaitingForReport={isWaitingForReport}
          onNewReportDetected={handleNewReportDetected}
        />
      </>
    </>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/usage/page.tsx
================================================
"use client";

import { AdminDateRangeSelector } from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { OnyxBotChart } from "@/app/ee/admin/performance/usage/OnyxBotChart";
import { FeedbackChart } from "@/app/ee/admin/performance/usage/FeedbackChart";
import { QueryPerformanceChart } from "@/app/ee/admin/performance/usage/QueryPerformanceChart";
import { PersonaMessagesChart } from "@/app/ee/admin/performance/usage/PersonaMessagesChart";
import { useTimeRange } from "@/app/ee/admin/performance/lib";
import UsageReports from "@/app/ee/admin/performance/usage/UsageReports";
import Separator from "@/refresh-components/Separator";
import { useAdminPersonas } from "@/hooks/useAdminPersonas";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import * as SettingsLayouts from "@/layouts/settings-layouts";

const route = ADMIN_ROUTES.USAGE;

export default function AnalyticsPage() {
  const [timeRange, setTimeRange] = useTimeRange();
  const { personas } = useAdminPersonas();

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />
      <SettingsLayouts.Body>
        <AdminDateRangeSelector
          value={timeRange}
          onValueChange={(value) => setTimeRange(value as any)}
        />
        <QueryPerformanceChart timeRange={timeRange} />
        <FeedbackChart timeRange={timeRange} />
        <OnyxBotChart timeRange={timeRange} />
        <PersonaMessagesChart
          availablePersonas={personas}
          timeRange={timeRange}
        />
        <Separator />
        <UsageReports />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/ee/admin/performance/usage/types.ts
================================================
import { Feedback, SessionType } from "@/lib/types";

export interface QueryAnalytics {
  total_queries: number;
  total_likes: number;
  total_dislikes: number;
  date: string;
}

export interface UserAnalytics {
  total_active_users: number;
  date: string;
}

export interface OnyxBotAnalytics {
  total_queries: number;
  auto_resolved: number;
  date: string;
}

export interface AbridgedSearchDoc {
  document_id: string;
  semantic_identifier: string;
  link: string | null;
}

export interface MessageSnapshot {
  id: number;
  message: string;
  message_type: "user" | "assistant";
  documents: AbridgedSearchDoc[];
  feedback_type: Feedback | null;
  feedback_text: string | null;
  time_created: string;
}

export interface ChatSessionSnapshot {
  id: number;
  user_email: string | null;
  name: string | null;
  messages: MessageSnapshot[];
  assistant_id: number | null;
  assistant_name: string | null;
  time_created: string;
  flow_type: SessionType;
}

export interface ChatSessionMinimal {
  id: number;
  user_email: string | null;
  name: string | null;
  first_user_message: string;
  first_ai_message: string;
  assistant_id: number | null;
  assistant_name: string | null;
  time_created: string;
  feedback_type: Feedback | "mixed" | null;
  flow_type: SessionType;
  conversation_length: number;
}

export interface UsageReport {
  report_name: string;
  requestor: string | null;
  time_created: string;
  period_from: string | null;
  period_to: string | null;
}


================================================
FILE: web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx
================================================
"use client";

import { toast } from "@/hooks/useToast";
import { StandardAnswerCategory, StandardAnswer } from "@/lib/types";
import CardSection from "@/components/admin/CardSection";
import Button from "@/refresh-components/buttons/Button";
import { Form, Formik } from "formik";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import * as Yup from "yup";
import {
  createStandardAnswer,
  createStandardAnswerCategory,
  StandardAnswerCreationRequest,
  updateStandardAnswer,
} from "./lib";
import {
  TextFormField,
  MarkdownFormField,
  BooleanFormField,
  SelectorFormField,
} from "@/components/Field";
import MultiSelectDropdown from "@/components/MultiSelectDropdown";

function mapKeywordSelectToMatchAny(keywordSelect: "any" | "all"): boolean {
  return keywordSelect == "any";
}

function mapMatchAnyToKeywordSelect(matchAny: boolean): "any" | "all" {
  return matchAny ? "any" : "all";
}

export const StandardAnswerCreationForm = ({
  standardAnswerCategories,
  existingStandardAnswer,
}: {
  standardAnswerCategories: StandardAnswerCategory[];
  existingStandardAnswer?: StandardAnswer;
}) => {
  const isUpdate = existingStandardAnswer !== undefined;
  const router = useRouter();

  return (
    <div>
      <CardSection>
        <Formik
          initialValues={{
            keyword: existingStandardAnswer
              ? existingStandardAnswer.keyword
              : "",
            answer: existingStandardAnswer ? existingStandardAnswer.answer : "",
            categories: existingStandardAnswer
              ? existingStandardAnswer.categories
              : [],
            matchRegex: existingStandardAnswer
              ? existingStandardAnswer.match_regex
              : false,
            matchAnyKeywords: existingStandardAnswer
              ? mapMatchAnyToKeywordSelect(
                  existingStandardAnswer.match_any_keywords
                )
              : "all",
          }}
          validationSchema={Yup.object().shape({
            keyword: Yup.string()
              .required("Keywords or pattern is required")
              .max(255)
              .min(1),
            answer: Yup.string().required("Answer is required").min(1),
            categories: Yup.array()
              .required()
              .min(1, "At least one category is required"),
          })}
          onSubmit={async (values, formikHelpers) => {
            formikHelpers.setSubmitting(true);

            const cleanedValues: StandardAnswerCreationRequest = {
              ...values,
              matchAnyKeywords: mapKeywordSelectToMatchAny(
                values.matchAnyKeywords
              ),
              categories: values.categories.map((category) => category.id),
            };

            let response;
            if (isUpdate) {
              response = await updateStandardAnswer(
                existingStandardAnswer.id,
                cleanedValues
              );
            } else {
              response = await createStandardAnswer(cleanedValues);
            }
            formikHelpers.setSubmitting(false);
            if (response.ok) {
              router.push(`/ee/admin/standard-answer?u=${Date.now()}` as Route);
            } else {
              const responseJson = await response.json();
              const errorMsg = responseJson.detail || responseJson.message;
              toast.error(
                isUpdate
                  ? `Error updating Standard Answer - ${errorMsg}`
                  : `Error creating Standard Answer - ${errorMsg}`
              );
            }
          }}
        >
          {({ isSubmitting, values, setFieldValue }) => (
            <Form>
              {values.matchRegex ? (
                <TextFormField
                  name="keyword"
                  label="Regex pattern"
                  isCode
                  tooltip="Triggers if the question matches this regex pattern (using Python `re.search()`)"
                  placeholder="(?:it|support)\s*ticket"
                />
              ) : values.matchAnyKeywords == "any" ? (
                <TextFormField
                  name="keyword"
                  label="Any of these keywords, separated by spaces"
                  tooltip="A question must match these keywords in order to trigger the answer."
                  placeholder="ticket problem issue"
                />
              ) : (
                <TextFormField
                  name="keyword"
                  label="All of these keywords, in any order, separated by spaces"
                  tooltip="A question must match these keywords in order to trigger the answer."
                  placeholder="it ticket"
                />
              )}
              <BooleanFormField
                subtext="Match a regex pattern instead of an exact keyword"
                optional
                label="Match regex"
                name="matchRegex"
              />
              {values.matchRegex ? null : (
                <SelectorFormField
                  defaultValue={`all`}
                  label="Keyword detection strategy"
                  subtext="Choose whether to require the user's question to contain any or all of the keywords above to show this answer."
                  name="matchAnyKeywords"
                  options={[
                    {
                      name: "All keywords",
                      value: "all",
                    },
                    {
                      name: "Any keywords",
                      value: "any",
                    },
                  ]}
                  onSelect={(selected) => {
                    setFieldValue("matchAnyKeywords", selected);
                  }}
                />
              )}
              <div className="w-full">
                <MarkdownFormField
                  name="answer"
                  label="Answer"
                  placeholder="The answer in Markdown. Example: If you need any help from the IT team, please email internalsupport@company.com"
                />
              </div>
              <div className="w-4/12">
                <MultiSelectDropdown
                  name="categories"
                  label="Categories:"
                  onChange={(selected_options) => {
                    const selected_categories = selected_options.map(
                      (option) => {
                        return { id: Number(option.value), name: option.label };
                      }
                    );
                    setFieldValue("categories", selected_categories);
                  }}
                  creatable={true}
                  onCreate={async (created_name) => {
                    const response = await createStandardAnswerCategory({
                      name: created_name,
                    });
                    const newCategory = await response.json();
                    return {
                      label: newCategory.name,
                      value: newCategory.id.toString(),
                    };
                  }}
                  options={standardAnswerCategories.map((category) => ({
                    label: category.name,
                    value: category.id.toString(),
                  }))}
                  initialSelectedOptions={values.categories.map((category) => ({
                    label: category.name,
                    value: category.id.toString(),
                  }))}
                />
              </div>
              <div className="py-4 flex">
                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
                <Button
                  type="submit"
                  disabled={isSubmitting}
                  className="mx-auto w-64"
                >
                  {isUpdate ? "Update!" : "Create!"}
                </Button>
              </div>
            </Form>
          )}
        </Formik>
      </CardSection>
    </div>
  );
};


================================================
FILE: web/src/app/ee/admin/standard-answer/[id]/page.tsx
================================================
import { StandardAnswerCreationForm } from "@/app/ee/admin/standard-answer/StandardAnswerCreationForm";
import { fetchSS } from "@/lib/utilsSS";
import { ErrorCallout } from "@/components/ErrorCallout";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { StandardAnswer, StandardAnswerCategory } from "@/lib/types";

const route = ADMIN_ROUTES.STANDARD_ANSWERS;

async function Main({ id }: { id: string }) {
  const tasks = [
    fetchSS("/manage/admin/standard-answer"),
    fetchSS(`/manage/admin/standard-answer/category`),
  ];
  const [standardAnswersResponse, standardAnswerCategoriesResponse] =
    await Promise.all(tasks);

  if (standardAnswersResponse === undefined) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch standard answers.`}
      />
    );
  }

  if (!standardAnswersResponse.ok) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch standard answers - ${await standardAnswersResponse.text()}`}
      />
    );
  }
  const allStandardAnswers =
    (await standardAnswersResponse.json()) as StandardAnswer[];
  const standardAnswer = allStandardAnswers.find(
    (answer) => answer.id.toString() === id
  );

  if (!standardAnswer) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Did not find standard answer with ID: ${id}`}
      />
    );
  }

  if (standardAnswerCategoriesResponse === undefined) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch standard answer categories.`}
      />
    );
  }

  if (!standardAnswerCategoriesResponse.ok) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch standard answer categories - ${await standardAnswerCategoriesResponse.text()}`}
      />
    );
  }

  const standardAnswerCategories =
    (await standardAnswerCategoriesResponse.json()) as StandardAnswerCategory[];

  return (
    <StandardAnswerCreationForm
      standardAnswerCategories={standardAnswerCategories}
      existingStandardAnswer={standardAnswer}
    />
  );
}

export default async function Page(props: { params: Promise<{ id: string }> }) {
  const params = await props.params;

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title="Edit Standard Answer"
        backButton
        separator
      />
      <SettingsLayouts.Body>
        <Main id={params.id} />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/ee/admin/standard-answer/hooks.ts
================================================
import { errorHandlingFetcher } from "@/lib/fetcher";
import { StandardAnswerCategory, StandardAnswer } from "@/lib/types";
import useSWR, { mutate } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";

export const useStandardAnswerCategories = () => {
  const swrResponse = useSWR<StandardAnswerCategory[]>(
    SWR_KEYS.standardAnswerCategories,
    errorHandlingFetcher
  );

  return {
    ...swrResponse,
    refreshStandardAnswerCategories: () =>
      mutate(SWR_KEYS.standardAnswerCategories),
  };
};

export const useStandardAnswers = () => {
  const swrResponse = useSWR<StandardAnswer[]>(
    SWR_KEYS.standardAnswers,
    errorHandlingFetcher
  );

  return {
    ...swrResponse,
    refreshStandardAnswers: () => mutate(SWR_KEYS.standardAnswers),
  };
};


================================================
FILE: web/src/app/ee/admin/standard-answer/lib.ts
================================================
export interface StandardAnswerCategoryCreationRequest {
  name: string;
}

export interface StandardAnswerCreationRequest {
  keyword: string;
  answer: string;
  categories: number[];
  matchRegex: boolean;
  matchAnyKeywords: boolean;
}

const buildRequestBodyFromStandardAnswerCategoryCreationRequest = (
  request: StandardAnswerCategoryCreationRequest
) => {
  return JSON.stringify({
    name: request.name,
  });
};

export const createStandardAnswerCategory = async (
  request: StandardAnswerCategoryCreationRequest
) => {
  return fetch("/api/manage/admin/standard-answer/category", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: buildRequestBodyFromStandardAnswerCategoryCreationRequest(request),
  });
};

export const updateStandardAnswerCategory = async (
  id: number,
  request: StandardAnswerCategoryCreationRequest
) => {
  return fetch(`/api/manage/admin/standard-answer/category/${id}`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: buildRequestBodyFromStandardAnswerCategoryCreationRequest(request),
  });
};

const buildRequestBodyFromStandardAnswerCreationRequest = (
  request: StandardAnswerCreationRequest
) => {
  return JSON.stringify({
    keyword: request.keyword,
    answer: request.answer,
    categories: request.categories,
    match_regex: request.matchRegex,
    match_any_keywords: request.matchAnyKeywords,
  });
};

export const createStandardAnswer = async (
  request: StandardAnswerCreationRequest
) => {
  return fetch("/api/manage/admin/standard-answer", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: buildRequestBodyFromStandardAnswerCreationRequest(request),
  });
};

export const updateStandardAnswer = async (
  id: number,
  request: StandardAnswerCreationRequest
) => {
  return fetch(`/api/manage/admin/standard-answer/${id}`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: buildRequestBodyFromStandardAnswerCreationRequest(request),
  });
};

export const deleteStandardAnswer = async (id: number) => {
  return fetch(`/api/manage/admin/standard-answer/${id}`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
  });
};


================================================
FILE: web/src/app/ee/admin/standard-answer/new/page.tsx
================================================
import { StandardAnswerCreationForm } from "@/app/ee/admin/standard-answer/StandardAnswerCreationForm";
import { fetchSS } from "@/lib/utilsSS";
import { ErrorCallout } from "@/components/ErrorCallout";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { StandardAnswerCategory } from "@/lib/types";

const route = ADMIN_ROUTES.STANDARD_ANSWERS;

async function Page() {
  const standardAnswerCategoriesResponse = await fetchSS(
    "/manage/admin/standard-answer/category"
  );

  if (!standardAnswerCategoriesResponse.ok) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch standard answer categories - ${await standardAnswerCategoriesResponse.text()}`}
      />
    );
  }
  const standardAnswerCategories =
    (await standardAnswerCategoriesResponse.json()) as StandardAnswerCategory[];

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title="New Standard Answer"
        backButton
        separator
      />
      <SettingsLayouts.Body>
        <StandardAnswerCreationForm
          standardAnswerCategories={standardAnswerCategories}
        />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}

export default Page;


================================================
FILE: web/src/app/ee/admin/standard-answer/page.tsx
================================================
"use client";

import * as SettingsLayouts from "@/layouts/settings-layouts";
import { toast } from "@/hooks/useToast";
import { useStandardAnswers, useStandardAnswerCategories } from "./hooks";
import { ThreeDotsLoader } from "@/components/Loading";
import { ErrorCallout } from "@/components/ErrorCallout";
import Separator from "@/refresh-components/Separator";
import {
  Table,
  TableHead,
  TableRow,
  TableBody,
  TableCell,
} from "@/components/ui/table";

import Link from "next/link";
import type { Route } from "next";
import { StandardAnswer, StandardAnswerCategory } from "@/lib/types";
import { MagnifyingGlass } from "@phosphor-icons/react";
import { useState, JSX } from "react";
import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";
import { deleteStandardAnswer } from "./lib";
import { FilterDropdown } from "@/components/search/filtering/FilterDropdown";
import { FiTag } from "react-icons/fi";
import { PageSelector } from "@/components/PageSelector";
import { Text } from "@opal/components";
import { markdown } from "@opal/utils";
import Spacer from "@/refresh-components/Spacer";
import { TableHeader } from "@/components/ui/table";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import { SvgEdit, SvgTrash } from "@opal/icons";
import { Button } from "@opal/components";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
const NUM_RESULTS_PER_PAGE = 10;

const route = ADMIN_ROUTES.STANDARD_ANSWERS;

type Displayable = JSX.Element | string;

const RowTemplate = ({
  id,
  entries,
}: {
  id: number;
  entries: [
    Displayable,
    Displayable,
    Displayable,
    Displayable,
    Displayable,
    Displayable,
  ];
}) => {
  return (
    <TableRow key={id}>
      <TableCell className="w-1/24">{entries[0]}</TableCell>
      <TableCell className="w-2/12">{entries[1]}</TableCell>
      <TableCell className="w-2/12">{entries[2]}</TableCell>
      <TableCell className="w-1/24">{entries[3]}</TableCell>
      <TableCell className="w-7/12 overflow-auto">{entries[4]}</TableCell>
      <TableCell className="w-1/24">{entries[5]}</TableCell>
    </TableRow>
  );
};

const CategoryBubble = ({
  name,
  onDelete,
}: {
  name: string;
  onDelete?: () => void;
}) => (
  <span
    className={`
      inline-block
      px-2
      py-1
      mr-1
      mb-1
      text-xs
      font-semibold
      text-emphasis
      bg-accent-background-hovered
      rounded-full
      items-center
      w-fit
      ${onDelete ? "cursor-pointer" : ""}
    `}
    onClick={onDelete}
  >
    {name}
    {onDelete && (
      <button
        className="ml-1 text-subtle hover:text-emphasis"
        aria-label="Remove category"
      >
        &times;
      </button>
    )}
  </span>
);

const StandardAnswersTableRow = ({
  standardAnswer,
  handleDelete,
}: {
  standardAnswer: StandardAnswer;
  handleDelete: (id: number) => void;
}) => {
  return (
    <RowTemplate
      id={standardAnswer.id}
      entries={[
        <Link
          key={`edit-${standardAnswer.id}`}
          href={`/ee/admin/standard-answer/${standardAnswer.id}` as Route}
        >
          <SvgEdit size={16} />
        </Link>,
        <div key={`categories-${standardAnswer.id}`}>
          {standardAnswer.categories.map((category) => (
            <CategoryBubble key={category.id} name={category.name} />
          ))}
        </div>,
        <ReactMarkdown key={`keyword-${standardAnswer.id}`}>
          {standardAnswer.match_regex
            ? `\`${standardAnswer.keyword}\``
            : standardAnswer.keyword}
        </ReactMarkdown>,
        <div
          key={`match_regex-${standardAnswer.id}`}
          className="flex items-center"
        >
          {standardAnswer.match_regex ? (
            <span className="text-green-500 font-medium">Yes</span>
          ) : (
            <span className="text-gray-500">No</span>
          )}
        </div>,
        <ReactMarkdown
          key={`answer-${standardAnswer.id}`}
          className="prose dark:prose-invert"
          remarkPlugins={[remarkGfm]}
        >
          {standardAnswer.answer}
        </ReactMarkdown>,
        <Button
          key={`delete-${standardAnswer.id}`}
          icon={SvgTrash}
          onClick={() => handleDelete(standardAnswer.id)}
        />,
      ]}
    />
  );
};

const StandardAnswersTable = ({
  standardAnswers,
  standardAnswerCategories,
  refresh,
}: {
  standardAnswers: StandardAnswer[];
  standardAnswerCategories: StandardAnswerCategory[];
  refresh: () => void;
}) => {
  const [query, setQuery] = useState("");
  const [currentPage, setCurrentPage] = useState(1);
  const [selectedCategories, setSelectedCategories] = useState<
    StandardAnswerCategory[]
  >([]);
  const columns = [
    { name: "", key: "edit" },
    { name: "Categories", key: "category" },
    { name: "Keywords/Pattern", key: "keyword" },
    { name: "Match regex?", key: "match_regex" },
    { name: "Answer", key: "answer" },
    { name: "", key: "delete" },
  ];

  const filteredStandardAnswers = standardAnswers.filter((standardAnswer) => {
    const {
      answer,
      id,
      categories,
      match_regex,
      match_any_keywords,
      ...fieldsToSearch
    } = standardAnswer;
    const cleanedQuery = query.toLowerCase();
    const searchMatch = Object.values(fieldsToSearch).some((value) => {
      return value.toLowerCase().includes(cleanedQuery);
    });
    const categoryMatch =
      selectedCategories.length == 0 ||
      selectedCategories.some((category) =>
        categories.map((c) => c.id).includes(category.id)
      );
    return searchMatch && categoryMatch;
  });

  const totalPages = Math.ceil(
    filteredStandardAnswers.length / NUM_RESULTS_PER_PAGE
  );
  const startIndex = (currentPage - 1) * NUM_RESULTS_PER_PAGE;
  const endIndex = startIndex + NUM_RESULTS_PER_PAGE;
  const paginatedStandardAnswers = filteredStandardAnswers.slice(
    startIndex,
    endIndex
  );

  const handlePageChange = (page: number) => {
    setCurrentPage(page);
  };

  const handleDelete = async (id: number) => {
    const response = await deleteStandardAnswer(id);
    if (response.ok) {
      toast.success(`Standard answer ${id} deleted`);
    } else {
      const errorMsg = await response.text();
      toast.error(`Failed to delete standard answer - ${errorMsg}`);
    }
    refresh();
  };

  const handleCategorySelect = (category: StandardAnswerCategory) => {
    setSelectedCategories((prev: StandardAnswerCategory[]) => {
      const prevCategoryIds = prev.map((category) => category.id);
      if (prevCategoryIds.includes(category.id)) {
        return prev.filter((c) => c.id !== category.id);
      }
      return [...prev, category];
    });
  };

  return (
    <div className="justify-center py-2">
      <div className="flex items-center w-full border-2 border-border rounded-lg px-4 py-2 focus-within:border-accent">
        <MagnifyingGlass />
        <textarea
          autoFocus
          className="flex-grow ml-2 h-6 bg-transparent outline-none placeholder-subtle overflow-hidden whitespace-normal resize-none"
          role="textarea"
          aria-multiline
          placeholder="Find standard answers by keyword/phrase..."
          value={query}
          onChange={(event) => {
            setQuery(event.target.value);
            setCurrentPage(1);
          }}
          onKeyDown={(event) => {
            if (event.key === "Enter") {
              event.preventDefault();
            }
          }}
          suppressContentEditableWarning={true}
        />
      </div>
      <div className="my-4 border-b border-border">
        <FilterDropdown
          options={standardAnswerCategories.map((category) => {
            return {
              key: category.name,
              display: category.name,
            };
          })}
          selected={selectedCategories.map((category) => category.name)}
          handleSelect={(option) => {
            handleCategorySelect(
              standardAnswerCategories.find(
                (category) => category.name === option.key
              )!
            );
          }}
          icon={
            <div className="my-auto mr-2 w-[16px] h-[16px]">
              <FiTag size={16} />
            </div>
          }
          defaultDisplay="All Categories"
        />
        <div className="flex flex-wrap pb-4 mt-3">
          {selectedCategories.map((category) => (
            <CategoryBubble
              key={category.id}
              name={category.name}
              onDelete={() => handleCategorySelect(category)}
            />
          ))}
        </div>
      </div>
      <div className="flex flex-col w-full mx-auto">
        <Table className="w-full">
          <TableHeader>
            <TableRow>
              {columns.map((column) => (
                <TableHead key={column.key}>{column.name}</TableHead>
              ))}
            </TableRow>
          </TableHeader>

          <TableBody>
            {paginatedStandardAnswers.length > 0 ? (
              paginatedStandardAnswers.map((item) => (
                <StandardAnswersTableRow
                  key={item.id}
                  standardAnswer={item}
                  handleDelete={handleDelete}
                />
              ))
            ) : (
              <RowTemplate id={0} entries={["", "", "", "", "", ""]} />
            )}
          </TableBody>
        </Table>
        <div>
          {paginatedStandardAnswers.length === 0 && (
            <div className="flex justify-center">
              <Text as="p">No matching standard answers found...</Text>
            </div>
          )}
        </div>
        {paginatedStandardAnswers.length > 0 && (
          <>
            <div className="mt-4">
              <Text as="p">
                {markdown(
                  "Ensure that you have added the category to the relevant [Slack Bot](/admin/bots)."
                )}
              </Text>
            </div>
            <div className="mt-4 flex justify-center">
              <PageSelector
                currentPage={currentPage}
                totalPages={totalPages}
                onPageChange={handlePageChange}
                shouldScroll={true}
              />
            </div>
          </>
        )}
      </div>
    </div>
  );
};

function Main() {
  const {
    data: standardAnswers,
    error: standardAnswersError,
    isLoading: standardAnswersIsLoading,
    refreshStandardAnswers,
  } = useStandardAnswers();
  const {
    data: standardAnswerCategories,
    error: standardAnswerCategoriesError,
    isLoading: standardAnswerCategoriesIsLoading,
  } = useStandardAnswerCategories();

  if (standardAnswersIsLoading || standardAnswerCategoriesIsLoading) {
    return <ThreeDotsLoader />;
  }

  if (standardAnswersError || !standardAnswers) {
    return (
      <ErrorCallout
        errorTitle="Error loading standard answers"
        errorMsg={
          standardAnswersError.info?.detail ||
          standardAnswersError.info?.message
        }
      />
    );
  }

  if (standardAnswerCategoriesError || !standardAnswerCategories) {
    return (
      <ErrorCallout
        errorTitle="Error loading standard answer categories"
        errorMsg={
          standardAnswerCategoriesError.info?.detail ||
          standardAnswerCategoriesError.info?.message
        }
      />
    );
  }

  return (
    <div className="mb-8">
      <Text as="p">
        {markdown(
          "Manage the standard answers for pre-defined questions.\nNote: Currently, only questions asked from Slack can receive standard answers."
        )}
      </Text>
      <Spacer rem={0.5} />
      {standardAnswers.length == 0 && (
        <>
          <Text as="p">Add your first standard answer below!</Text>
          <Spacer rem={0.5} />
        </>
      )}
      <div className="mb-2"></div>

      <CreateButton href="/admin/standard-answer/new">
        New Standard Answer
      </CreateButton>

      <Separator />

      <div>
        <StandardAnswersTable
          standardAnswers={standardAnswers}
          standardAnswerCategories={standardAnswerCategories}
          refresh={refreshStandardAnswers}
        />
      </div>
    </div>
  );
}

export default function Page() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />
      <SettingsLayouts.Body>
        <Main />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/app/ee/admin/theme/AppearanceThemeSettings.tsx
================================================
"use client";

import { FormField } from "@/refresh-components/form/FormField";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Tabs from "@/refresh-components/Tabs";
import Separator from "@/refresh-components/Separator";
import { Preview } from "./Preview";
import InputTextArea from "@/refresh-components/inputs/InputTextArea";
import Switch from "@/refresh-components/inputs/Switch";
import CharacterCount from "@/refresh-components/CharacterCount";
import InputImage from "@/refresh-components/inputs/InputImage";
import { Button } from "@opal/components";
import { useFormikContext } from "formik";
import {
  forwardRef,
  useEffect,
  useImperativeHandle,
  useMemo,
  useRef,
  useState,
} from "react";
import type { PreviewHighlightTarget } from "./Preview";
import { SvgEdit } from "@opal/icons";

interface AppearanceThemeSettingsProps {
  selectedLogo: File | null;
  setSelectedLogo: (file: File | null) => void;
  logoVersion: number;
  charLimits: {
    application_name: number;
    custom_greeting_message: number;
    custom_header_content: number;
    custom_lower_disclaimer_content: number;
    custom_popup_header: number;
    custom_popup_content: number;
    consent_screen_prompt: number;
  };
}

export interface AppearanceThemeSettingsRef {
  focusFirstError: (errors: Record<string, any>) => void;
}

export const AppearanceThemeSettings = forwardRef<
  AppearanceThemeSettingsRef,
  AppearanceThemeSettingsProps
>(function AppearanceThemeSettings(
  { selectedLogo, setSelectedLogo, logoVersion, charLimits },
  ref
) {
  const { values, errors, setFieldValue } = useFormikContext<any>();
  const fileInputRef = useRef<HTMLInputElement>(null);
  const applicationNameInputRef = useRef<HTMLInputElement>(null);
  const greetingMessageInputRef = useRef<HTMLInputElement>(null);
  const headerContentInputRef = useRef<HTMLInputElement>(null);
  const lowerDisclaimerInputRef = useRef<HTMLTextAreaElement>(null);
  const noticeHeaderInputRef = useRef<HTMLInputElement>(null);
  const noticeContentInputRef = useRef<HTMLTextAreaElement>(null);
  const consentPromptTextAreaRef = useRef<HTMLTextAreaElement>(null);
  const prevShowFirstVisitNoticeRef = useRef<boolean>(
    Boolean(values.show_first_visit_notice)
  );
  const prevEnableConsentScreenRef = useRef<boolean>(
    Boolean(values.enable_consent_screen)
  );
  const [focusedPreviewTarget, setFocusedPreviewTarget] =
    useState<PreviewHighlightTarget | null>(null);
  const [hoveredPreviewTarget, setHoveredPreviewTarget] =
    useState<PreviewHighlightTarget | null>(null);

  const highlightTarget = useMemo(
    () => focusedPreviewTarget ?? hoveredPreviewTarget,
    [focusedPreviewTarget, hoveredPreviewTarget]
  );

  const getPreviewHandlers = (target: PreviewHighlightTarget) => ({
    onFocus: () => setFocusedPreviewTarget(target),
    onBlur: () =>
      setFocusedPreviewTarget((cur) => (cur === target ? null : cur)),
    onMouseEnter: () => setHoveredPreviewTarget(target),
    onMouseLeave: () =>
      setHoveredPreviewTarget((cur) => (cur === target ? null : cur)),
  });

  // Expose focusFirstError method to parent component
  useImperativeHandle(ref, () => ({
    focusFirstError: (errors: Record<string, any>) => {
      // Focus on the first field with an error, in priority order
      const fieldRefs = [
        { name: "application_name", ref: applicationNameInputRef },
        { name: "custom_greeting_message", ref: greetingMessageInputRef },
        { name: "custom_header_content", ref: headerContentInputRef },
        {
          name: "custom_lower_disclaimer_content",
          ref: lowerDisclaimerInputRef,
        },
        { name: "custom_popup_header", ref: noticeHeaderInputRef },
        { name: "custom_popup_content", ref: noticeContentInputRef },
        { name: "consent_screen_prompt", ref: consentPromptTextAreaRef },
      ];
      for (const field of fieldRefs) {
        if (errors[field.name] && field.ref.current) {
          field.ref.current.focus();
          // Scroll into view if needed
          field.ref.current.scrollIntoView({
            behavior: "smooth",
            block: "center",
          });
          break;
        }
      }
    },
  }));

  useEffect(() => {
    const prev = prevShowFirstVisitNoticeRef.current;
    const next = Boolean(values.show_first_visit_notice);

    // When enabling the toggle, autofocus the "Notice Header" input.
    if (!prev && next) {
      requestAnimationFrame(() => {
        noticeHeaderInputRef.current?.focus();
      });
    }

    prevShowFirstVisitNoticeRef.current = next;
  }, [values.show_first_visit_notice]);

  useEffect(() => {
    const prev = prevEnableConsentScreenRef.current;
    const next = Boolean(values.enable_consent_screen);

    // When enabling the toggle, autofocus the "Notice Consent Prompt" input.
    if (!prev && next) {
      requestAnimationFrame(() => {
        consentPromptTextAreaRef.current?.focus();
      });
    }

    prevEnableConsentScreenRef.current = next;
  }, [values.enable_consent_screen]);

  const handleLogoEdit = () => {
    fileInputRef.current?.click();
  };

  const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
    const file = event.target.files?.[0];
    if (file) {
      setSelectedLogo(file);
      setFieldValue("use_custom_logo", true);
    }
  };

  const handleLogoRemove = async () => {
    setFieldValue("use_custom_logo", false);
    setSelectedLogo(null);
  };

  // Memoize the blob URL to prevent creating new URLs on every render
  const logoObjectUrl = useMemo(() => {
    if (selectedLogo) {
      return URL.createObjectURL(selectedLogo);
    }
    return null;
  }, [selectedLogo]);

  // Clean up the blob URL when selectedLogo changes or component unmounts
  useEffect(() => {
    return () => {
      if (logoObjectUrl) {
        URL.revokeObjectURL(logoObjectUrl);
      }
    };
  }, [logoObjectUrl]);

  const logoSrc = useMemo(() => {
    if (logoObjectUrl) {
      return logoObjectUrl;
    }
    if (values.use_custom_logo) {
      return `/api/enterprise-settings/logo?v=${logoVersion}`;
    }
    return undefined;
  }, [logoObjectUrl, values.use_custom_logo, logoVersion]);

  // Determine which tabs should be enabled
  const hasLogo = Boolean(selectedLogo || values.use_custom_logo);
  const hasApplicationName = Boolean(values.application_name?.trim());

  // Auto-switch to logo_and_name if current selection becomes invalid
  useEffect(() => {
    if (values.logo_display_style === "logo_only" && !hasLogo) {
      setFieldValue("logo_display_style", "logo_and_name");
    } else if (
      values.logo_display_style === "name_only" &&
      !hasApplicationName
    ) {
      setFieldValue("logo_display_style", "logo_and_name");
    }
  }, [hasLogo, hasApplicationName, values.logo_display_style, setFieldValue]);

  return (
    <div className="flex flex-col gap-4 w-full">
      <input
        type="file"
        ref={fileInputRef}
        onChange={handleFileChange}
        accept="image/png,image/jpeg,image/jpg"
        style={{ display: "none" }}
      />

      <div className="flex gap-10 items-center">
        <div className="flex flex-col gap-4 w-full">
          <FormField state={errors.application_name ? "error" : "idle"}>
            <FormField.Label
              rightAction={
                <CharacterCount
                  value={values.application_name}
                  limit={charLimits.application_name}
                />
              }
            >
              Application Display Name
            </FormField.Label>
            <FormField.Control asChild>
              <InputTypeIn
                ref={applicationNameInputRef}
                data-label="application-name-input"
                showClearButton
                variant={errors.application_name ? "error" : undefined}
                value={values.application_name}
                {...getPreviewHandlers("sidebar")}
                onChange={(e) =>
                  setFieldValue("application_name", e.target.value)
                }
              />
            </FormField.Control>
            <FormField.Description>
              This name will show across the app and replace "Onyx" in the UI.
            </FormField.Description>
            <FormField.Message
              messages={{ error: errors.application_name as string }}
            />
          </FormField>

          <FormField state="idle">
            <FormField.Label>Logo Display Style</FormField.Label>
            <FormField.Control>
              <Tabs
                value={values.logo_display_style}
                onValueChange={(value) =>
                  setFieldValue("logo_display_style", value)
                }
              >
                <Tabs.List>
                  <Tabs.Trigger
                    value="logo_and_name"
                    tooltip="Show both your application logo and name."
                    tooltipSide="top"
                    {...getPreviewHandlers("sidebar")}
                  >
                    Logo & Name
                  </Tabs.Trigger>
                  <Tabs.Trigger
                    value="logo_only"
                    disabled={!hasLogo}
                    tooltip={
                      hasLogo
                        ? "Show only your application logo."
                        : "Upload a logo to enable this option."
                    }
                    tooltipSide="top"
                    {...getPreviewHandlers("sidebar")}
                  >
                    Logo Only
                  </Tabs.Trigger>
                  <Tabs.Trigger
                    value="name_only"
                    disabled={!hasApplicationName}
                    tooltip={
                      hasApplicationName
                        ? "Show only your application name."
                        : "Enter an application name to enable this option."
                    }
                    tooltipSide="top"
                    {...getPreviewHandlers("sidebar")}
                  >
                    Name Only
                  </Tabs.Trigger>
                </Tabs.List>
              </Tabs>
            </FormField.Control>
            <FormField.Description>
              Choose what to display at the top of the sidebar. Options become
              available once you add a logo or application name.
            </FormField.Description>
          </FormField>
        </div>

        <FormField state="idle">
          <FormField.Label>Application Logo</FormField.Label>
          <FormField.Control>
            <InputImage
              src={logoSrc}
              onEdit={handleLogoEdit}
              onDrop={(file) => {
                setSelectedLogo(file);
                setFieldValue("use_custom_logo", true);
              }}
              onRemove={handleLogoRemove}
              showEditOverlay={false}
            />
          </FormField.Control>
          <div className="mt-2 w-full justify-center items-center flex">
            <Button
              disabled={!hasLogo}
              prominence="secondary"
              onClick={handleLogoEdit}
              icon={SvgEdit}
            >
              Update
            </Button>
          </div>
        </FormField>
      </div>

      <Separator className="my-4" />

      <Preview
        className="mb-8"
        logoDisplayStyle={values.logo_display_style}
        applicationDisplayName={values.application_name ?? ""}
        chat_footer_content={
          values.custom_lower_disclaimer_content || "Chat Footer Content"
        }
        chat_header_content={
          values.custom_header_content || "Chat Header Content"
        }
        greeting_message={
          values.custom_greeting_message || "Welcome to Acme Chat"
        }
        logoSrc={logoSrc}
        highlightTarget={highlightTarget}
      />

      <FormField state={errors.custom_greeting_message ? "error" : "idle"}>
        <FormField.Label
          rightAction={
            <CharacterCount
              value={values.custom_greeting_message}
              limit={charLimits.custom_greeting_message}
            />
          }
        >
          Greeting Message
        </FormField.Label>
        <FormField.Control asChild>
          <InputTypeIn
            ref={greetingMessageInputRef}
            data-label="greeting-message-input"
            showClearButton
            variant={errors.custom_greeting_message ? "error" : undefined}
            value={values.custom_greeting_message}
            {...getPreviewHandlers("greeting")}
            onChange={(e) =>
              setFieldValue("custom_greeting_message", e.target.value)
            }
          />
        </FormField.Control>
        <FormField.Description>
          Add a short message to the home page.
        </FormField.Description>
        <FormField.Message
          messages={{ error: errors.custom_greeting_message as string }}
        />
      </FormField>

      <FormField state={errors.custom_header_content ? "error" : "idle"}>
        <FormField.Label
          rightAction={
            <CharacterCount
              value={values.custom_header_content}
              limit={charLimits.custom_header_content}
            />
          }
        >
          Chat Header Text
        </FormField.Label>
        <FormField.Control asChild>
          <InputTypeIn
            ref={headerContentInputRef}
            data-label="chat-header-input"
            showClearButton
            variant={errors.custom_header_content ? "error" : undefined}
            value={values.custom_header_content}
            {...getPreviewHandlers("chat_header")}
            onChange={(e) =>
              setFieldValue("custom_header_content", e.target.value)
            }
          />
        </FormField.Control>
        <FormField.Message
          messages={{ error: errors.custom_header_content as string }}
        />
      </FormField>

      <FormField
        state={errors.custom_lower_disclaimer_content ? "error" : "idle"}
      >
        <FormField.Label
          rightAction={
            <CharacterCount
              value={values.custom_lower_disclaimer_content}
              limit={charLimits.custom_lower_disclaimer_content}
            />
          }
        >
          Chat Footer Text
        </FormField.Label>
        <FormField.Control asChild>
          <InputTextArea
            ref={lowerDisclaimerInputRef}
            data-label="chat-footer-textarea"
            rows={3}
            placeholder="Add markdown content"
            variant={
              errors.custom_lower_disclaimer_content ? "error" : undefined
            }
            value={values.custom_lower_disclaimer_content}
            {...getPreviewHandlers("chat_footer")}
            onChange={(e) =>
              setFieldValue("custom_lower_disclaimer_content", e.target.value)
            }
          />
        </FormField.Control>
        <FormField.Description>
          Add markdown content for disclaimers or additional information.
        </FormField.Description>
        <FormField.Message
          messages={{ error: errors.custom_lower_disclaimer_content as string }}
        />
      </FormField>

      <Separator className="my-4" />

      <div className="flex flex-col gap-4 p-4 bg-background-tint-00 rounded-16">
        <FormField state="idle" className="gap-0">
          <div className="flex justify-between items-center">
            <FormField.Label>Show First Visit Notice</FormField.Label>
            <FormField.Control>
              <Switch
                aria-label="Show First Visit Notice"
                data-label="first-visit-notice-toggle"
                checked={values.show_first_visit_notice}
                onCheckedChange={(checked) =>
                  setFieldValue("show_first_visit_notice", checked)
                }
              />
            </FormField.Control>
          </div>
          <FormField.Description>
            Show a one-time pop-up for new users at their first visit.
          </FormField.Description>
        </FormField>

        {values.show_first_visit_notice && (
          <>
            <FormField state={errors.custom_popup_header ? "error" : "idle"}>
              <FormField.Label
                required
                rightAction={
                  <CharacterCount
                    value={values.custom_popup_header}
                    limit={charLimits.custom_popup_header}
                  />
                }
              >
                Notice Header
              </FormField.Label>
              <FormField.Control asChild>
                <InputTypeIn
                  ref={noticeHeaderInputRef}
                  data-label="notice-header-input"
                  showClearButton
                  variant={errors.custom_popup_header ? "error" : undefined}
                  value={values.custom_popup_header}
                  onChange={(e) =>
                    setFieldValue("custom_popup_header", e.target.value)
                  }
                />
              </FormField.Control>
              <FormField.Message
                messages={{ error: errors.custom_popup_header as string }}
              />
            </FormField>

            <FormField state={errors.custom_popup_content ? "error" : "idle"}>
              <FormField.Label
                required
                rightAction={
                  <CharacterCount
                    value={values.custom_popup_content}
                    limit={charLimits.custom_popup_content}
                  />
                }
              >
                Notice Content
              </FormField.Label>
              <FormField.Control asChild>
                <InputTextArea
                  ref={noticeContentInputRef}
                  data-label="notice-content-textarea"
                  rows={3}
                  placeholder="Add markdown content"
                  variant={errors.custom_popup_content ? "error" : undefined}
                  value={values.custom_popup_content}
                  onChange={(e) =>
                    setFieldValue("custom_popup_content", e.target.value)
                  }
                />
              </FormField.Control>
              <FormField.Message
                messages={{ error: errors.custom_popup_content as string }}
              />
            </FormField>

            <FormField state="idle" className="gap-0">
              <div className="flex justify-between items-center">
                <FormField.Label>Require Consent to Notice</FormField.Label>
                <FormField.Control>
                  <Switch
                    aria-label="Require Consent to Notice"
                    data-label="require-consent-toggle"
                    checked={values.enable_consent_screen}
                    onCheckedChange={(checked) =>
                      setFieldValue("enable_consent_screen", checked)
                    }
                  />
                </FormField.Control>
              </div>
              <FormField.Description>
                Require the user to read and agree to the notice before
                accessing the application.
              </FormField.Description>
            </FormField>

            {values.enable_consent_screen && (
              <FormField
                state={errors.consent_screen_prompt ? "error" : "idle"}
              >
                <FormField.Label
                  required
                  rightAction={
                    <CharacterCount
                      value={values.consent_screen_prompt}
                      limit={charLimits.consent_screen_prompt}
                    />
                  }
                >
                  Notice Consent Prompt
                </FormField.Label>
                <FormField.Control asChild>
                  <InputTextArea
                    ref={consentPromptTextAreaRef}
                    data-label="consent-prompt-textarea"
                    rows={3}
                    placeholder="Add markdown content"
                    variant={errors.consent_screen_prompt ? "error" : undefined}
                    value={values.consent_screen_prompt}
                    onChange={(e) => {
                      setFieldValue("consent_screen_prompt", e.target.value);
                    }}
                  />
                </FormField.Control>
                <FormField.Message
                  messages={{ error: errors.consent_screen_prompt as string }}
                />
              </FormField>
            )}
          </>
        )}
      </div>
    </div>
  );
});


================================================
FILE: web/src/app/ee/admin/theme/Preview.tsx
================================================
"use client";

import React from "react";
import type { Components } from "react-markdown";
import Text from "@/refresh-components/texts/Text";
import Truncated from "@/refresh-components/texts/Truncated";
import { cn, ensureHrefProtocol } from "@/lib/utils";
import { OnyxIcon } from "@/components/icons/icons";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";

const previewMarkdownComponents = {
  p: ({ children }) => (
    <Text as="p" text03 figureSmallValue className="!my-0 text-center">
      {children}
    </Text>
  ),
  a: ({ node, href, className, children, ...rest }) => {
    const fullHref = ensureHrefProtocol(href);
    return (
      <a
        href={fullHref}
        target="_blank"
        rel="noopener noreferrer"
        {...rest}
        className={cn(className, "underline underline-offset-2")}
      >
        <Text text03 figureSmallValue>
          {children}
        </Text>
      </a>
    );
  },
} satisfies Partial<Components>;

const PreviewMinimalMarkdown = React.memo(function PreviewMinimalMarkdown({
  content,
  className,
}: {
  content: string;
  className?: string;
}) {
  return (
    <MinimalMarkdown
      content={content}
      className={className}
      components={previewMarkdownComponents}
    />
  );
});

export type PreviewHighlightTarget =
  | "sidebar"
  | "greeting"
  | "chat_header"
  | "chat_footer";

export interface PreviewProps {
  logoDisplayStyle: "logo_and_name" | "logo_only" | "name_only";
  applicationDisplayName: string;
  chat_footer_content: string;
  chat_header_content: string;
  greeting_message: string;
  className?: string;
  logoSrc?: string;
  highlightTarget?: PreviewHighlightTarget | null;
}

function PreviewLogo({
  logoSrc,
  forceOnyxIcon,
  size,
  className,
}: {
  logoSrc?: string;
  forceOnyxIcon?: boolean;
  size: number;
  className?: string;
}) {
  return logoSrc && !forceOnyxIcon ? (
    <img
      src={logoSrc}
      alt="Logo"
      style={{
        objectFit: "cover",
        height: `${size}px`,
        width: `${size}px`,
      }}
      className={cn("flex-shrink-0 rounded-full", className)}
    />
  ) : (
    <OnyxIcon size={size} className={cn("flex-shrink-0", className)} />
  );
}

export function InputPreview() {
  return (
    <div className="bg-background-neutral-00 border border-border-01 flex flex-col gap-1.5 items-end pb-1 pl-2.5 pr-1 pt-2.5 rounded-08 w-full h-14">
      <div className="h-5 w-5 bg-theme-primary-05 mt-auto rounded-[0.25rem]"></div>
    </div>
  );
}

function PreviewStart({
  logoDisplayStyle,
  applicationDisplayName,
  chat_footer_content,
  chat_header_content,
  greeting_message,
  logoSrc,
  highlightTarget,
}: PreviewProps) {
  return (
    <div className="flex h-60 rounded-12 shadow-00 bg-background-tint-01 relative">
      {/* Sidebar */}
      <div className="flex w-[6rem] h-full bg-background-tint-02 rounded-l-12 p-1 justify-start">
        <div className="flex flex-col h-fit w-full justify-start">
          <div
            className={cn(
              "inline-flex max-w-full items-center justify-start gap-1 rounded-08 p-0.5 overflow-hidden",
              highlightTarget === "sidebar" && "bg-highlight-match"
            )}
          >
            {logoDisplayStyle !== "name_only" && (
              <PreviewLogo
                logoSrc={logoSrc}
                size={16}
                forceOnyxIcon={
                  logoDisplayStyle === "logo_and_name" &&
                  !applicationDisplayName
                }
              />
            )}
            {(logoDisplayStyle === "logo_and_name" ||
              logoDisplayStyle === "name_only") && (
              <Truncated mainUiAction text04 nowrap>
                {applicationDisplayName || "Onyx"}
              </Truncated>
            )}
          </div>
        </div>
      </div>
      {/* Chat */}
      <div className="flex flex-col flex-1 h-full">
        {/* Chat Body */}
        <div className="flex flex-col flex-1 h-full items-center justify-center px-3">
          <div className="flex w-full max-w-[300px] flex-col items-center justify-center">
            <div
              className={cn(
                "inline-flex max-w-full items-center justify-center gap-1 mb-2 rounded-08 border border-transparent p-0.5 text-center",
                highlightTarget === "greeting" && "bg-highlight-match"
              )}
            >
              <PreviewLogo logoSrc={logoSrc} size={18} />
              <Text
                text04
                headingH3
                className="max-w-[260px] whitespace-normal break-words text-center"
              >
                {greeting_message}
              </Text>
            </div>
            <InputPreview />
          </div>
        </div>
        {/* Chat Footer */}
        <div className="flex flex-col items-center justify-end w-full">
          <div className="flex w-full max-w-[300px] justify-center">
            <div
              className={cn(
                "inline-flex max-w-full items-start justify-center rounded-04 border border-transparent p-0.5 text-center",
                highlightTarget === "chat_footer" && "bg-highlight-match"
              )}
            >
              <PreviewMinimalMarkdown
                content={chat_footer_content}
                className={cn("max-w-full text-center origin-center")}
              />
            </div>
          </div>
        </div>
      </div>
    </div>
  );
}

function PreviewChat({
  chat_header_content,
  chat_footer_content,
  highlightTarget,
}: {
  chat_header_content: string;
  chat_footer_content: string;
  highlightTarget?: PreviewHighlightTarget | null;
}) {
  return (
    <div className="flex flex-col h-60 relative bg-background-tint-01 rounded-12 shadow-00">
      {/* Header */}
      <div className="flex justify-center w-full">
        <div className="flex w-full max-w-[300px] justify-center">
          <div
            className={cn(
              "inline-flex max-w-full items-center justify-center rounded-08 border border-transparent p-0.5 text-center",
              highlightTarget === "chat_header" && "bg-highlight-match"
            )}
          >
            <Text
              figureSmallLabel
              text03
              className="max-w-full whitespace-normal break-words text-center"
            >
              {chat_header_content}
            </Text>
          </div>
        </div>
      </div>

      {/* Main Content */}
      <div className="flex flex-1 flex-col gap-2 items-center justify-end max-w-[300px] w-full px-3 py-0 mx-auto">
        {/* User message bubble (right side) */}
        <div className="flex flex-col items-end w-full">
          <div className="bg-background-tint-02 flex flex-col items-start px-2.5 py-2 rounded-bl-[10px] rounded-tl-[10px] rounded-tr-[10px]">
            <div className="bg-background-neutral-03 h-1.5 rounded-04 w-20" />
          </div>
        </div>

        {/* AI response bubble (left side) */}
        <div className="flex flex-col gap-1.5 items-start pl-2 pr-16 py-2 w-full">
          <div className="bg-background-neutral-03 h-1.5 rounded-04 w-full" />
          <div className="bg-background-neutral-03 h-1.5 rounded-04 w-full" />
          <div className="bg-background-neutral-03 h-1.5 rounded-04 w-12" />
        </div>

        {/* Input field */}
        <InputPreview />
      </div>

      {/* Footer */}
      <div className="flex flex-col items-center justify-end w-full">
        <div className="flex w-full max-w-[300px] justify-center">
          <div
            className={cn(
              "inline-flex max-w-full items-start justify-center rounded-04 border border-transparent p-0.5 text-center",
              highlightTarget === "chat_footer" && "bg-highlight-match"
            )}
          >
            <PreviewMinimalMarkdown
              content={chat_footer_content}
              className={cn("max-w-full text-center origin-center")}
            />
          </div>
        </div>
      </div>
    </div>
  );
}
export function Preview({
  logoDisplayStyle,
  applicationDisplayName,
  chat_footer_content,
  chat_header_content,
  greeting_message,
  logoSrc,
  className,
  highlightTarget,
}: PreviewProps) {
  return (
    <div className={cn("grid grid-cols-2 gap-2", className)}>
      <PreviewStart
        logoDisplayStyle={logoDisplayStyle}
        applicationDisplayName={applicationDisplayName}
        chat_footer_content={chat_footer_content}
        chat_header_content={chat_header_content}
        greeting_message={greeting_message}
        logoSrc={logoSrc}
        highlightTarget={highlightTarget}
      />
      <PreviewChat
        chat_header_content={chat_header_content}
        chat_footer_content={chat_footer_content}
        highlightTarget={highlightTarget}
      />
    </div>
  );
}


================================================
FILE: web/src/app/ee/admin/theme/page.tsx
================================================
"use client";

import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { Button } from "@opal/components";
import {
  AppearanceThemeSettings,
  AppearanceThemeSettingsRef,
} from "./AppearanceThemeSettings";
import { useContext, useRef, useState } from "react";
import { SettingsContext } from "@/providers/SettingsProvider";
import { toast } from "@/hooks/useToast";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import { EnterpriseSettings } from "@/interfaces/settings";
import { mutate } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";

const route = ADMIN_ROUTES.THEME;

const CHAR_LIMITS = {
  application_name: 50,
  custom_greeting_message: 50,
  custom_header_content: 100,
  custom_lower_disclaimer_content: 200,
  custom_popup_header: 100,
  custom_popup_content: 500,
  consent_screen_prompt: 200,
};

export default function ThemePage() {
  const settings = useContext(SettingsContext);
  const [selectedLogo, setSelectedLogo] = useState<File | null>(null);
  const [logoVersion, setLogoVersion] = useState(0);
  const appearanceSettingsRef = useRef<AppearanceThemeSettingsRef>(null);

  if (!settings) {
    return null;
  }

  const enterpriseSettings = settings.enterpriseSettings;

  async function updateEnterpriseSettings(
    newValues: EnterpriseSettings
  ): Promise<boolean> {
    const response = await fetch("/api/admin/enterprise-settings", {
      method: "PUT",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        ...(enterpriseSettings || {}),
        ...newValues,
      }),
    });
    if (response.ok) {
      await mutate(SWR_KEYS.enterpriseSettings);
      return true;
    } else {
      const errorMsg = (await response.json()).detail;
      alert(`Failed to update settings. ${errorMsg}`);
      return false;
    }
  }

  const validationSchema = Yup.object().shape({
    application_name: Yup.string()
      .trim()
      .max(
        CHAR_LIMITS.application_name,
        `Maximum ${CHAR_LIMITS.application_name} characters`
      )
      .nullable(),
    logo_display_style: Yup.string()
      .oneOf(["logo_and_name", "logo_only", "name_only"])
      .required(),
    use_custom_logo: Yup.boolean().required(),
    custom_greeting_message: Yup.string()
      .max(
        CHAR_LIMITS.custom_greeting_message,
        `Maximum ${CHAR_LIMITS.custom_greeting_message} characters`
      )
      .nullable(),
    custom_header_content: Yup.string()
      .max(
        CHAR_LIMITS.custom_header_content,
        `Maximum ${CHAR_LIMITS.custom_header_content} characters`
      )
      .nullable(),
    custom_lower_disclaimer_content: Yup.string()
      .max(
        CHAR_LIMITS.custom_lower_disclaimer_content,
        `Maximum ${CHAR_LIMITS.custom_lower_disclaimer_content} characters`
      )
      .nullable(),
    show_first_visit_notice: Yup.boolean().nullable(),
    custom_popup_header: Yup.string()
      .max(
        CHAR_LIMITS.custom_popup_header,
        `Maximum ${CHAR_LIMITS.custom_popup_header} characters`
      )
      .when("show_first_visit_notice", {
        is: true,
        then: (schema) => schema.required("Notice Header is required"),
        otherwise: (schema) => schema.nullable(),
      }),
    custom_popup_content: Yup.string()
      .max(
        CHAR_LIMITS.custom_popup_content,
        `Maximum ${CHAR_LIMITS.custom_popup_content} characters`
      )
      .when("show_first_visit_notice", {
        is: true,
        then: (schema) => schema.required("Notice Content is required"),
        otherwise: (schema) => schema.nullable(),
      }),
    enable_consent_screen: Yup.boolean().nullable(),
    consent_screen_prompt: Yup.string()
      .max(
        CHAR_LIMITS.consent_screen_prompt,
        `Maximum ${CHAR_LIMITS.consent_screen_prompt} characters`
      )
      .when("enable_consent_screen", {
        is: true,
        then: (schema) => schema.required("Notice Consent Prompt is required"),
        otherwise: (schema) => schema.nullable(),
      }),
  });

  return (
    <Formik
      initialValues={{
        application_name: enterpriseSettings?.application_name || "",
        logo_display_style:
          enterpriseSettings?.logo_display_style || "logo_and_name",
        use_custom_logo: enterpriseSettings?.use_custom_logo || false,
        custom_greeting_message:
          enterpriseSettings?.custom_greeting_message || "",
        custom_header_content: enterpriseSettings?.custom_header_content || "",
        custom_lower_disclaimer_content:
          enterpriseSettings?.custom_lower_disclaimer_content || "",
        show_first_visit_notice:
          enterpriseSettings?.show_first_visit_notice || false,
        custom_popup_header: enterpriseSettings?.custom_popup_header || "",
        custom_popup_content: enterpriseSettings?.custom_popup_content || "",
        enable_consent_screen:
          enterpriseSettings?.enable_consent_screen || false,
        consent_screen_prompt: enterpriseSettings?.consent_screen_prompt || "",
      }}
      validationSchema={validationSchema}
      validateOnChange={false}
      onSubmit={async (values, formikHelpers) => {
        let logoUploaded = false;

        // Handle logo upload if a new logo was selected
        if (selectedLogo) {
          const formData = new FormData();
          formData.append("file", selectedLogo);
          const response = await fetch("/api/admin/enterprise-settings/logo", {
            method: "PUT",
            body: formData,
          });
          if (!response.ok) {
            const errorMsg = (await response.json()).detail;
            alert(`Failed to upload logo. ${errorMsg}`);
            formikHelpers.setSubmitting(false);
            return;
          }
          // Only clear the selected logo after a successful upload
          setSelectedLogo(null);
          logoUploaded = true;
          values.use_custom_logo = true;
        }

        // Update enterprise settings
        const success = await updateEnterpriseSettings({
          application_name: values.application_name || null,
          use_custom_logo: values.use_custom_logo,
          use_custom_logotype: enterpriseSettings?.use_custom_logotype || false,
          logo_display_style: values.logo_display_style || null,
          custom_nav_items: enterpriseSettings?.custom_nav_items || [],
          custom_greeting_message: values.custom_greeting_message || null,
          custom_header_content: values.custom_header_content || null,
          custom_lower_disclaimer_content:
            values.custom_lower_disclaimer_content || null,
          two_lines_for_chat_header:
            enterpriseSettings?.two_lines_for_chat_header || null,
          custom_popup_header: values.custom_popup_header || null,
          custom_popup_content: values.custom_popup_content || null,
          show_first_visit_notice: values.show_first_visit_notice || null,
          enable_consent_screen: values.enable_consent_screen || null,
          consent_screen_prompt: values.consent_screen_prompt || null,
        });

        // Important: after a successful save, reset Formik's "baseline" so
        // dirty comparisons reflect the newly-saved values.
        if (success) {
          formikHelpers.resetForm({ values });
          if (logoUploaded) {
            setLogoVersion((v) => v + 1);
          }
          toast.success("Appearance settings saved successfully!");
        }

        formikHelpers.setSubmitting(false);
      }}
    >
      {({
        isSubmitting,
        dirty,
        values,
        validateForm,
        setErrors,
        setTouched,
        submitForm,
      }) => {
        const hasLogoChange = !!selectedLogo;

        return (
          <Form className="w-full h-full">
            <SettingsLayouts.Root>
              <SettingsLayouts.Header
                title={route.title}
                description="Customize how the application appears to users across your organization."
                icon={route.icon}
                rightChildren={
                  <Button
                    disabled={isSubmitting || (!dirty && !hasLogoChange)}
                    type="button"
                    onClick={async () => {
                      const errors = await validateForm();
                      if (Object.keys(errors).length > 0) {
                        setErrors(errors);
                        appearanceSettingsRef.current?.focusFirstError(errors);
                        return;
                      }
                      await submitForm();
                    }}
                  >
                    {isSubmitting ? "Applying..." : "Apply Changes"}
                  </Button>
                }
              />
              <SettingsLayouts.Body>
                <AppearanceThemeSettings
                  ref={appearanceSettingsRef}
                  selectedLogo={selectedLogo}
                  setSelectedLogo={setSelectedLogo}
                  logoVersion={logoVersion}
                  charLimits={CHAR_LIMITS}
                />
              </SettingsLayouts.Body>
            </SettingsLayouts.Root>
          </Form>
        );
      }}
    </Formik>
  );
}


================================================
FILE: web/src/app/ee/agents/stats/[id]/AgentStats.tsx
================================================
"use client";

import { ThreeDotsLoader } from "@/components/Loading";
import { getDatesList } from "@/app/ee/admin/performance/lib";
import { useEffect, useState, useMemo } from "react";
import {
  AdminDateRangeSelector,
  DateRange,
} from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { useAgents } from "@/hooks/useAgents";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import { Card, CardContent, CardHeader } from "@/components/ui/card";
import { AreaChartDisplay } from "@/components/ui/areaChart";

type AgentDailyUsageEntry = {
  date: string;
  total_messages: number;
  total_unique_users: number;
};

type AgentStatsResponse = {
  daily_stats: AgentDailyUsageEntry[];
  total_messages: number;
  total_unique_users: number;
};

export function AgentStats({ agentId }: { agentId: number }) {
  const [agentStats, setAgentStats] = useState<AgentStatsResponse | null>(null);
  const { agents } = useAgents();
  const [isLoading, setIsLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const [dateRange, setDateRange] = useState<DateRange>({
    from: new Date(new Date().setDate(new Date().getDate() - 30)),
    to: new Date(),
  });

  const agent = useMemo(() => {
    return agents.find((a) => a.id === agentId);
  }, [agents, agentId]);

  useEffect(() => {
    async function fetchStats() {
      try {
        setIsLoading(true);
        setError(null);

        const res = await fetch(
          `/api/analytics/assistant/${agentId}/stats?start=${
            dateRange?.from?.toISOString() || ""
          }&end=${dateRange?.to?.toISOString() || ""}`
        );

        if (!res.ok) {
          if (res.status === 403) {
            throw new Error("You don't have permission to view these stats.");
          }
          throw new Error("Failed to fetch agent stats");
        }

        const data = (await res.json()) as AgentStatsResponse;
        setAgentStats(data);
      } catch (err) {
        setError(
          err instanceof Error ? err.message : "An unknown error occurred"
        );
      } finally {
        setIsLoading(false);
      }
    }

    fetchStats();
  }, [agentId, dateRange]);

  const chartData = useMemo(() => {
    if (!agentStats?.daily_stats?.length || !dateRange) {
      return null;
    }

    const initialDate =
      dateRange.from ||
      new Date(
        Math.min(
          ...agentStats.daily_stats.map((entry) =>
            new Date(entry.date).getTime()
          )
        )
      );
    const endDate = dateRange.to || new Date();

    const dateRangeList = getDatesList(initialDate);

    const statsMap = new Map(
      agentStats.daily_stats.map((entry) => [entry.date, entry])
    );

    return dateRangeList
      .filter((date) => new Date(date) <= endDate)
      .map((dateStr) => {
        const dayData = statsMap.get(dateStr);
        return {
          Day: dateStr,
          Messages: dayData?.total_messages || 0,
          "Unique Users": dayData?.total_unique_users || 0,
        };
      });
  }, [agentStats, dateRange]);

  const totalMessages = agentStats?.total_messages ?? 0;
  const totalUniqueUsers = agentStats?.total_unique_users ?? 0;

  let content;
  if (isLoading || !agent) {
    content = (
      <div className="h-80 flex flex-col">
        <ThreeDotsLoader />
      </div>
    );
  } else if (error) {
    content = (
      <div className="h-80 text-red-600 font-bold flex flex-col">
        <p className="m-auto">{error}</p>
      </div>
    );
  } else if (!agentStats?.daily_stats?.length) {
    content = (
      <div className="h-80 text-text-500 flex flex-col">
        <p className="m-auto">
          No data found for this agent in the selected date range
        </p>
      </div>
    );
  } else if (chartData) {
    content = (
      <AreaChartDisplay
        className="mt-4"
        data={chartData}
        categories={["Messages", "Unique Users"]}
        index="Day"
        colors={["#4A4A4A", "#A0A0A0"]}
        yAxisWidth={60}
      />
    );
  }

  return (
    <Card className="w-full">
      <CardHeader className="flex flex-row items-center justify-between space-y-0 pb-2">
        <p className="text-base font-normal text-2xl">Agent Analytics</p>
        <AdminDateRangeSelector
          value={dateRange}
          onValueChange={setDateRange}
        />
      </CardHeader>
      <CardContent>
        <div className="grid grid-cols-1 md:grid-cols-2 gap-4 mb-6">
          <Card>
            <CardContent className="pt-6">
              <div className="flex items-center space-x-4">
                {agent && <AgentAvatar agent={agent} />}
                <div>
                  <h3 className="text-lg font-normal">{agent?.name}</h3>
                  <p className="text-sm text-text-500">{agent?.description}</p>
                </div>
              </div>
            </CardContent>
          </Card>
          <Card>
            <CardContent className="pt-6">
              <div className="grid grid-cols-2 gap-4">
                <div>
                  <p className="text-sm font-medium text-text-500">
                    Total Messages
                  </p>
                  <p className="text-2xl font-normal">{totalMessages}</p>
                </div>
                <div>
                  <p className="text-sm font-medium text-text-500">
                    Total Unique Users
                  </p>
                  <p className="text-2xl font-normal">{totalUniqueUsers}</p>
                </div>
              </div>
            </CardContent>
          </Card>
        </div>
        {content}
      </CardContent>
    </Card>
  );
}


================================================
FILE: web/src/app/ee/agents/stats/[id]/page.tsx
================================================
import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh";
import { unstable_noStore as noStore } from "next/cache";
import { redirect } from "next/navigation";
import type { Route } from "next";
import { requireAuth } from "@/lib/auth/requireAuth";
import { AgentStats } from "./AgentStats";
import BackButton from "@/refresh-components/buttons/BackButton";

export default async function GalleryPage(props: {
  params: Promise<{ id: string }>;
}) {
  const params = await props.params;
  noStore();

  // Only check authentication - data fetching is done client-side via SWR hooks
  const authResult = await requireAuth();

  if (authResult.redirect) {
    redirect(authResult.redirect as Route);
  }

  return (
    <>
      <div className="absolute top-4 left-4">
        <BackButton />
      </div>

      <div className="w-full py-8">
        <div className="px-32">
          <InstantSSRAutoRefresh />
          <div className="max-w-4xl mx-auto !border-none !bg-transparent !ring-none">
            <AgentStats agentId={parseInt(params.id)} />
          </div>
        </div>
      </div>
    </>
  );
}


================================================
FILE: web/src/app/ee/layout.tsx
================================================
import { SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED } from "@/lib/constants";
import { fetchStandardSettingsSS } from "@/components/settings/lib";
import EEFeatureRedirect from "@/app/ee/EEFeatureRedirect";

export default async function AdminLayout({
  children,
}: {
  children: React.ReactNode;
}) {
  // First check build-time constant (fast path)
  if (!SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED) {
    return <EEFeatureRedirect />;
  }

  // Then check runtime license status (for license enforcement mode)
  // This allows gating EE features when user doesn't have a valid license
  try {
    const settingsResponse = await fetchStandardSettingsSS();
    if (settingsResponse?.ok) {
      const settings = await settingsResponse.json();
      if (settings.ee_features_enabled === false) {
        // When the app is in GATED_ACCESS (expired or missing license), defer
        // to the root layout's GatedContentWrapper which handles path-based
        // exemptions (e.g. allowing /admin/billing for license management).
        if (settings.application_status === "gated_access") {
          return children;
        }

        return <EEFeatureRedirect />;
      }
    }
  } catch (error) {
    // If settings fetch fails, allow access (fail open for better UX)
    console.error("Failed to fetch settings for EE check:", error);
  }

  return children;
}


================================================
FILE: web/src/app/federated/oauth/callback/page.tsx
================================================
"use client";

import OAuthCallbackPage from "@/components/oauth/OAuthCallbackPage";
import { getSourceDisplayName } from "@/lib/sources";

export default function FederatedOAuthCallbackPage() {
  const federatedConfig = {
    processingMessage: "Processing...",
    processingDetails: "Please wait while we complete the setup.",
    successMessage: "Success!",
    successDetailsTemplate:
      "Your {serviceName} authorization completed successfully. You can now use this connector for search.",
    errorMessage: "Something Went Wrong",
    backButtonText: "Back to Chat",
    redirectingMessage: "Redirecting to chat in 2 seconds...",
    autoRedirectDelay: 2000,
    defaultRedirectPath: "/app",
    callbackApiUrl: "/api/federated/callback",
    errorMessageMap: {
      "validation errors":
        "Configuration error - please check your connector settings",
      client_secret: "Authentication credentials are missing or invalid",
      oauth: "OAuth authorization failed",
    },
  };

  return <OAuthCallbackPage config={federatedConfig} />;
}


================================================
FILE: web/src/app/global-error.tsx
================================================
"use client";

import * as Sentry from "@sentry/nextjs";
import NextError from "next/error";
import { useEffect } from "react";

// This global error page is necessary to capture errors that occur in the app.
export default function GlobalError({
  error,
}: {
  error: Error & { digest?: string };
}) {
  useEffect(() => {
    if (process.env.NEXT_PUBLIC_SENTRY_DSN) {
      Sentry.captureException(error);
    }
  }, [error]);

  return (
    <html>
      <body>
        {/* NextError require  a `statusCode` prop. However, since the App Router
        does not expose status codes for errors, we simply pass 0 to render a
        generic error message. */}
        <NextError statusCode={0} />
      </body>
    </html>
  );
}


================================================
FILE: web/src/app/globals.css
================================================
@import "css/attachment-button.css";
@import "css/button.css";
@import "css/card.css";
@import "css/code.css";
@import "css/color-swatch.css";
@import "css/colors.css";
@import "css/divider.css";
@import "css/general-layouts.css";
@import "css/inputs.css";
@import "css/knowledge-table.css";
@import "css/line-item.css";
@import "css/sizes.css";
@import "css/square-button.css";
@import "css/switch.css";
@import "css/z-index.css";

/* KH Teka Font */

@font-face {
  font-family: "KH Teka";
  src: url("/fonts/KHTeka-Medium.otf") format("opentype");
  font-weight: 500;
  font-style: normal;
  font-display: swap;
}

@tailwind base;
@tailwind components;
@tailwind utilities;

@layer base {
  /* BORDER RADII */
  :root {
    --border-radius-02: 0.125rem;
    --border-radius-04: 0.25rem;
    --border-radius-08: 0.5rem;
    --border-radius-12: 0.75rem;
    --border-radius-16: 1rem;
    --border-radius-full: 64rem;
  }

  /* BACKDROP BLUR */
  :root {
    --backdrop-blur-01: 2px;
    --backdrop-blur-02: 1px;
    --backdrop-blur-03: 1px;
  }

  * {
    @apply border-border;
  }

  body {
    @apply bg-background text-foreground;
    overscroll-behavior-y: none;
    overflow-anchor: none;
  }

  form {
    display: flex;
    flex-direction: column;
    align-items: start;
  }

  ol > li > p,
  ul > li > p {
    margin-top: 0;
    margin-bottom: 0;
    display: inline;
    /* Make paragraphs inline to reduce vertical space */
  }

  /* Reduce spacing for markdown elements in chat */
  .prose h1,
  .prose h2,
  .prose h3,
  .prose h4,
  .prose h5,
  .prose h6 {
    margin-top: 0.75em;
    margin-bottom: 0.5em;
  }

  .prose ul,
  .prose ol {
    margin-top: 0.5em;
    margin-bottom: 0.5em;
  }

  .prose ol {
    list-style-type: decimal;
    padding-left: 1.5rem;
    margin-left: 0;
  }

  .prose ul {
    list-style-type: disc;
    padding-left: 1.5rem;
    margin-left: 0;
  }

  .prose li {
    margin-top: 0.25em;
    margin-bottom: 0.25em;
    display: list-item;
  }

  .prose hr {
    margin-top: 1.25em;
    margin-bottom: 1em;
  }

  .prose p {
    margin-top: 0.5em;
    margin-bottom: 0.5em;
  }

  /* Remove top margin from first child to align with icon */
  .prose > :first-child {
    margin-top: 0;
  }

  /* Remove bottom margin from last child to avoid extra space */
  .prose > :last-child {
    margin-bottom: 0;
  }
}

@layer utilities {
  .no-scrollbar {
    scrollbar-width: none;
  }

  /* SHADOWS */
  .shadow-00 {
    box-shadow: 0px 0px 2px 1px var(--shadow-01);
  }
  .shadow-01 {
    box-shadow:
      0px 2px 12px 0px var(--shadow-02),
      0px 0px 4px 1px var(--shadow-02);
  }
  .shadow-02 {
    box-shadow:
      0px 2px 24px 0px var(--shadow-03),
      0px 0px 12px 1px var(--shadow-03);
  }

  /* RADIAL GRADIENTS */
  .radial-00 {
    background: radial-gradient(
      236.31% 141.42% at 0% 0%,
      var(--background-tint-00) 0%,
      var(--background-tint-01) 100%
    );
  }

  /* DEBUGGING UTILITIES
  If you ever want to highlight a component for debugging purposes, just type in `className="dbg-red ..."`, and a red box should appear around it.
  This helps with placing things properly on the screen and seeing how they look during active development.
  */

  .dbg-red {
    border: 1px solid red;
  }
  .dbg-blue {
    border: 1px solid blue;
  }
  .dbg-green {
    border: 1px solid green;
  }
}

/* TYPOGRAPHY SYSTEM - Imported from Figma Design System */

/* Font Imports */

/* Font Family Variables */
:root {
  --font-hanken-grotesk: "Hanken Grotesk", -apple-system, BlinkMacSystemFont,
    "Segoe UI", Roboto, sans-serif;
  --font-dm-mono: "DM Mono", "SF Mono", Monaco, "Cascadia Code", "Roboto Mono",
    Consolas, "Courier New", monospace;
  --font-kh-teka: "KH Teka", -apple-system, BlinkMacSystemFont, "Segoe UI",
    Roboto, sans-serif;
}

/* HEADING STYLES */

.font-heading-h1 {
  font-family: var(--font-hanken-grotesk);
  font-size: 48px;
  font-weight: 600;
  line-height: 64px;
  letter-spacing: -0.48px;
}

.font-heading-h2 {
  font-family: var(--font-hanken-grotesk);
  font-size: 24px;
  font-weight: 600;
  line-height: 36px;
  letter-spacing: -0.24px;
}

.font-heading-h3 {
  font-family: var(--font-hanken-grotesk);
  font-size: 18px;
  font-weight: 600;
  line-height: 28px;
  letter-spacing: -0.18px;
}

.font-heading-h3-muted {
  font-family: var(--font-hanken-grotesk);
  font-size: 18px;
  font-weight: 500;
  line-height: 28px;
  letter-spacing: -0.18px;
}

/* MAIN CONTENT STYLES */

.font-main-content-body {
  font-family: var(--font-hanken-grotesk);
  font-size: 16px;
  font-weight: 450;
  line-height: 24px;
  letter-spacing: 0px;
}

.font-main-content-body strong {
  font-weight: 700;
}

.font-main-content-emphasis {
  font-family: var(--font-hanken-grotesk);
  font-size: 16px;
  font-weight: 700;
  line-height: 24px;
  letter-spacing: 0px;
}

.font-main-content-muted {
  font-family: var(--font-hanken-grotesk);
  font-size: 16px;
  font-weight: 400;
  line-height: 24px;
  letter-spacing: 0px;
}

.font-main-content-mono {
  font-family: var(--font-dm-mono);
  font-size: 16px;
  font-weight: 400;
  line-height: 23px;
  letter-spacing: 0px;
}

/* MAIN UI STYLES */

.font-main-ui-body {
  font-family: var(--font-hanken-grotesk);
  font-size: 14px;
  font-weight: 500;
  line-height: 20px;
  letter-spacing: 0px;
}

.font-main-ui-muted {
  font-family: var(--font-hanken-grotesk);
  font-size: 14px;
  font-weight: 400;
  line-height: 20px;
  letter-spacing: 0px;
}

.font-main-ui-action {
  font-family: var(--font-hanken-grotesk);
  font-size: 14px;
  font-weight: 600;
  line-height: 20px;
  letter-spacing: 0px;
}

.font-main-ui-mono {
  font-family: var(--font-dm-mono);
  font-size: 14px;
  font-weight: 400;
  line-height: 20px;
  letter-spacing: 0px;
}

/* SECONDARY STYLES */

.font-secondary-body {
  font-family: var(--font-hanken-grotesk);
  font-size: 12px;
  font-weight: 400;
  line-height: 16px;
  letter-spacing: 0px;
}

.font-secondary-action {
  font-family: var(--font-hanken-grotesk);
  font-size: 12px;
  font-weight: 600;
  line-height: 16px;
  letter-spacing: 0px;
}

.font-secondary-mono {
  font-family: var(--font-dm-mono);
  font-size: 12px;
  font-weight: 400;
  line-height: 16px;
  letter-spacing: 0px;
}

.font-secondary-mono-label {
  font-family: var(--font-dm-mono);
  font-size: 12px;
  font-weight: 500;
  line-height: 16px;
  letter-spacing: 0px;
}

/* FIGURE STYLES */

.font-figure-small-label {
  font-family: var(--font-hanken-grotesk);
  font-size: 10px;
  font-weight: 700;
  line-height: 12px;
  letter-spacing: 0px;
}

.font-figure-small-value {
  font-family: var(--font-hanken-grotesk);
  font-size: 10px;
  font-weight: 400;
  line-height: 12px;
  letter-spacing: 0px;
}

.font-figure-keystroke {
  font-family: var(--font-hanken-grotesk);
  font-size: 12px;
  font-weight: 400;
  line-height: 16px;
  letter-spacing: -0.6px;
}

/* SCROLL BAR */

.default-scrollbar {
  scrollbar-width: thin;
  scrollbar-color: #888 transparent;
  overflow-y: scroll;
  overflow-x: hidden;
}

.scrollbar {
  width: 100%;
  height: 100%;
}

.inputscroll {
  scrollbar-width: none;
}

/* Ensure native scrollbars are visible */
@layer base {
  * {
    scrollbar-width: auto;
  }
}

/* TEXTAREA */

textarea {
  resize: vertical;
  scrollbar-width: thin;
  scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-track);
}

.nextjs-portal {
  display: none !important;
  visibility: hidden !important;
  opacity: 0 !important;
  width: 0 !important;
  height: 0 !important;
  overflow: hidden !important;
  position: absolute !important;
  pointer-events: none !important;
  clip: rect(0, 0, 0, 0) !important;
}

.nextjs-portal * {
  display: none !important;
}

/* Used to create alternatie to React Markdown */
.preserve-lines {
  white-space: pre-wrap;
  /* Preserves whitespace and wraps text */
}

.loading-text {
  color: #e5e5e5;

  background: linear-gradient(
    -90deg,
    #a3a3a3 0%,
    #000000 5%,
    #a3a3a3 10%,
    #a3a3a3 100%
  );
  background-size: 200% 100%;
  background-clip: text;
  -webkit-background-clip: text;
  -webkit-text-fill-color: transparent;
  animation: shimmerTransition 1.8s ease-out infinite;
}

.dark .loading-text {
  color: #1a1a1a;

  background: linear-gradient(
    -90deg,
    #5c5c5c 0%,
    #ffffff 5%,
    #5c5c5c 10%,
    #5c5c5c 100%
  );
  background-size: 200% 100%;
  background-clip: text;
  -webkit-background-clip: text;
  -webkit-text-fill-color: transparent;
}

@keyframes shimmerTransition {
  0% {
    background-position: 100% 0;
  }

  100% {
    background-position: -100% 0;
  }
}

.collapsible {
  max-height: 300px;
  transition:
    max-height 0.5s ease-in-out,
    opacity 0.5s ease-in-out;
  opacity: 1;
}

.collapsible-closed {
  max-height: 0;
  opacity: 0;
  overflow: hidden;
}

.prevent-scroll {
  overscroll-behavior-y: none;
}

/* CUSTOM ANIMATIONS */

@keyframes fadeIn {
  from {
    opacity: 0;
    transform: scale(0.95);
  }

  to {
    opacity: 1;
    transform: scale(1);
  }
}

.animate-fadeIn {
  animation: fadeIn 0.2s ease-out forwards;
}

/* Recording waveform animation */
@keyframes waveform {
  0%,
  100% {
    transform: scaleY(0.3);
  }
  50% {
    transform: scaleY(1);
  }
}

.animate-waveform {
  animation: waveform 0.8s ease-in-out infinite;
}

.container {
  margin-bottom: 1rem;
}


================================================
FILE: web/src/app/layout.tsx
================================================
import "./globals.css";

import { GTM_ENABLED, MODAL_ROOT_ID } from "@/lib/constants";
import { Metadata } from "next";

import AppProvider from "@/providers/AppProvider";
import DynamicMetadata from "@/providers/DynamicMetadata";
import { PHProvider } from "./providers";
import { Suspense } from "react";
import PostHogPageView from "./PostHogPageView";
import Script from "next/script";
import { DM_Mono, Hanken_Grotesk } from "next/font/google";
import { WebVitals } from "./web-vitals";
import { ThemeProvider } from "next-themes";
import { TooltipProvider } from "@/components/ui/tooltip";
import StatsOverlayLoader from "@/components/dev/StatsOverlayLoader";
import { cn } from "@/lib/utils";
import AppHealthBanner from "@/sections/AppHealthBanner";
import CustomAnalyticsScript from "@/providers/CustomAnalyticsScript";
import ProductGatingWrapper from "@/providers/ProductGatingWrapper";
import SWRConfigProvider from "@/providers/SWRConfigProvider";

const hankenGrotesk = Hanken_Grotesk({
  subsets: ["latin"],
  variable: "--font-hanken-grotesk",
  display: "swap",
  fallback: [
    "-apple-system",
    "BlinkMacSystemFont",
    "Segoe UI",
    "Roboto",
    "sans-serif",
  ],
});

const dmMono = DM_Mono({
  weight: "400",
  subsets: ["latin"],
  variable: "--font-dm-mono",
  display: "swap",
  fallback: [
    "SF Mono",
    "Monaco",
    "Cascadia Code",
    "Roboto Mono",
    "Consolas",
    "Courier New",
    "monospace",
  ],
});

export const metadata: Metadata = {
  title: "Onyx",
  description: "Question answering for your documents",
};

// force-dynamic prevents Next.js from statically prerendering pages at build
// time — many child routes use cookies() which requires dynamic rendering.
// This is safe because the layout itself has no server-side data fetching;
// all data is fetched client-side via SWR in the provider tree.
export const dynamic = "force-dynamic";

export default function RootLayout({
  children,
}: {
  children: React.ReactNode;
}) {
  return (
    <html
      lang="en"
      className={cn(hankenGrotesk.variable, dmMono.variable)}
      suppressHydrationWarning
    >
      <head>
        <meta
          name="viewport"
          content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=0, interactive-widget=resizes-content"
        />

        {GTM_ENABLED && (
          <Script
            id="google-tag-manager"
            strategy="afterInteractive"
            dangerouslySetInnerHTML={{
              __html: `
               (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
               new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
               j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
               'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
               })(window,document,'script','dataLayer','GTM-PZXS36NG');
             `,
            }}
          />
        )}
      </head>

      <body className={`relative font-hanken`}>
        <ThemeProvider
          attribute="class"
          defaultTheme="system"
          enableSystem
          disableTransitionOnChange
        >
          <div className="text-text min-h-screen bg-background">
            <TooltipProvider>
              <PHProvider>
                <SWRConfigProvider>
                  <AppHealthBanner />
                  <AppProvider>
                    <DynamicMetadata />
                    <CustomAnalyticsScript />
                    <Suspense fallback={null}>
                      <PostHogPageView />
                    </Suspense>
                    <div id={MODAL_ROOT_ID} className="h-screen w-screen">
                      <ProductGatingWrapper>{children}</ProductGatingWrapper>
                    </div>
                    {process.env.NEXT_PUBLIC_POSTHOG_KEY && <WebVitals />}
                    {process.env.NEXT_PUBLIC_ENABLE_STATS === "true" && (
                      <StatsOverlayLoader />
                    )}
                  </AppProvider>
                </SWRConfigProvider>
              </PHProvider>
            </TooltipProvider>
          </div>
        </ThemeProvider>
      </body>
    </html>
  );
}


================================================
FILE: web/src/app/mcp/[[...path]]/route.ts
================================================
import { MCP_INTERNAL_URL } from "@/lib/constants";
import { NextRequest, NextResponse } from "next/server";

type RouteContext = {
  params?: Promise<{
    path?: string[];
  }>;
};

const proxyHandler = async (
  request: NextRequest,
  context: RouteContext
): Promise<Response> => {
  if (!isProxyEnabled()) {
    return NextResponse.json(
      {
        message:
          "This MCP proxy is only available in development mode. In production, something else (e.g. nginx) should handle this.",
      },
      { status: 404 }
    );
  }

  try {
    const resolvedParams = context.params ? await context.params : undefined;
    const targetUrl = buildTargetUrl(
      resolvedParams?.path,
      request.nextUrl.searchParams
    );
    const headers = buildForwardHeaders(request.headers);
    const fetchOptions: RequestInit & { duplex?: "half" } = {
      method: request.method,
      headers,
      signal: request.signal,
    };

    if (supportsRequestBody(request) && request.body) {
      fetchOptions.body = request.body;
      fetchOptions.duplex = "half";
    }

    const response = await fetch(targetUrl, fetchOptions);
    return response;
  } catch (error: unknown) {
    console.error("MCP Proxy error:", error);
    return NextResponse.json(
      {
        message: "MCP Proxy error",
        error:
          error instanceof Error ? error.message : "An unknown error occurred",
      },
      { status: 500 }
    );
  }
};

const isProxyEnabled = (): boolean => {
  if (process.env.OVERRIDE_API_PRODUCTION === "true") {
    return true;
  }
  return process.env.NODE_ENV === "development";
};

const buildForwardHeaders = (requestHeaders: Headers): Headers => {
  const headers = new Headers(requestHeaders);
  headers.delete("host");
  headers.delete("connection");
  headers.delete("content-length");
  return headers;
};

const supportsRequestBody = (request: NextRequest): boolean => {
  const method = request.method.toUpperCase();
  return method !== "GET" && method !== "HEAD";
};

const trimSlashes = (value: string): string => value.replace(/^\/+|\/+$/g, "");

const sanitizePathSegments = (segments: string[] | undefined): string[] =>
  segments?.filter(Boolean).map((segment) => encodeURIComponent(segment)) ?? [];

const buildTargetUrl = (
  pathSegments: string[] | undefined,
  searchParams: URLSearchParams
): string => {
  const target = new URL(MCP_INTERNAL_URL);
  const forwardedPath = sanitizePathSegments(pathSegments).join("/");

  const basePath = trimSlashes(target.pathname);
  const combinedPath = [basePath, trimSlashes(forwardedPath)]
    .filter(Boolean)
    .join("/");

  target.pathname = combinedPath ? `/${combinedPath}` : "/";
  const queryString = searchParams.toString();
  target.search = queryString;

  return target.toString();
};

type Handler = (
  request: NextRequest,
  context: RouteContext
) => Promise<Response>;

const handler: Handler = proxyHandler;

export const GET = handler;
export const POST = handler;
export const PUT = handler;
export const PATCH = handler;
export const DELETE = handler;
export const HEAD = handler;
export const OPTIONS = handler;


================================================
FILE: web/src/app/mcp/oauth/callback/page.tsx
================================================
"use client";

import OAuthCallbackPage from "@/components/oauth/OAuthCallbackPage";

export default function MCPOAuthCallbackPage() {
  const mcpConfig = {
    processingMessage: "Processing...",
    processingDetails: "Please wait while we complete the MCP server setup.",
    successMessage: "Success!",
    successDetailsTemplate:
      "Your {serviceName} authorization completed successfully. You can now use this server's tools in chat.",
    errorMessage: "Something Went Wrong",
    backButtonText: "Back to Chat",
    redirectingMessage: "Redirecting back in 2 seconds...",
    autoRedirectDelay: 2000,
    defaultRedirectPath: "/app",
    callbackApiUrl: "/api/mcp/oauth/callback",
    errorMessageMap: {
      "server not found": "MCP server configuration not found",
      credentials: "Authentication credentials are invalid",
      oauth: "OAuth authorization failed",
      validation: "Could not validate connection to MCP server",
    },
  };

  return <OAuthCallbackPage config={mcpConfig} />;
}


================================================
FILE: web/src/app/not-found.tsx
================================================
import { redirect } from "next/navigation";

export default function NotFound() {
  redirect("/auth/login");
}


================================================
FILE: web/src/app/nrf/(main)/layout.tsx
================================================
import { unstable_noStore as noStore } from "next/cache";
import AppSidebar from "@/sections/sidebar/AppSidebar";
import { getCurrentUserSS } from "@/lib/userSS";

export interface LayoutProps {
  children: React.ReactNode;
}

/**
 * NRF Main (New Tab) Layout
 *
 * Shows the app sidebar when the user is authenticated.
 * This layout is NOT used by the side-panel route.
 */
export default async function Layout({ children }: LayoutProps) {
  noStore();

  const user = await getCurrentUserSS();

  return (
    <div className="flex flex-row w-full h-full">
      {user && <AppSidebar />}
      {children}
    </div>
  );
}


================================================
FILE: web/src/app/nrf/(main)/page.tsx
================================================
import { unstable_noStore as noStore } from "next/cache";
import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh";
import NRFPage from "@/app/nrf/NRFPage";
import { NRFPreferencesProvider } from "@/components/context/NRFPreferencesContext";
import NRFChrome from "../NRFChrome";

/**
 * NRF (New Tab Page) Route - No Auth Required
 *
 * This route is placed outside /app/app/ to bypass the authentication
 * requirement in /app/app/layout.tsx. The NRFPage component handles
 * unauthenticated users gracefully by showing a login modal instead of
 * redirecting, which is better UX for the Chrome extension.
 *
 * Instead of AppLayouts.Root (which pulls in heavy Header state management),
 * we use NRFChrome — a lightweight overlay that renders only the search/chat
 * mode toggle and footer, floating transparently over NRFPage's background.
 */
export default async function Page() {
  noStore();

  return (
    <div className="relative w-full h-full">
      <InstantSSRAutoRefresh />
      <NRFPreferencesProvider>
        <NRFPage />
      </NRFPreferencesProvider>
      <NRFChrome />
    </div>
  );
}


================================================
FILE: web/src/app/nrf/NRFChrome.tsx
================================================
"use client";

import { useState } from "react";
import { cn, ensureHrefProtocol, noProp } from "@/lib/utils";
import type { Components } from "react-markdown";
import Text from "@/refresh-components/texts/Text";
import Popover from "@/refresh-components/Popover";
import { OpenButton } from "@opal/components";
import LineItem from "@/refresh-components/buttons/LineItem";
import { Button } from "@opal/components";
import { SvgBubbleText, SvgSearchMenu, SvgSidebar } from "@opal/icons";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import { useSettingsContext } from "@/providers/SettingsProvider";
import type { AppMode } from "@/providers/QueryControllerProvider";
import useAppFocus from "@/hooks/useAppFocus";
import { useQueryController } from "@/providers/QueryControllerProvider";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { useAppSidebarContext } from "@/providers/AppSidebarProvider";
import useScreenSize from "@/hooks/useScreenSize";

const footerMarkdownComponents = {
  p: ({ children }: { children?: React.ReactNode }) => (
    <Text as="p" text03 secondaryAction className="!my-0 text-center">
      {children}
    </Text>
  ),
  a: ({
    href,
    className,
    children,
    ...rest
  }: React.AnchorHTMLAttributes<HTMLAnchorElement>) => {
    const fullHref = ensureHrefProtocol(href);
    return (
      <a
        href={fullHref}
        target="_blank"
        rel="noopener noreferrer"
        {...rest}
        className={cn(className, "underline underline-offset-2")}
      >
        <Text text03 secondaryAction>
          {children}
        </Text>
      </a>
    );
  },
} satisfies Partial<Components>;

/**
 * Lightweight chrome overlay for the NRF page.
 *
 * Renders only the search/chat mode toggle (top-left) and footer (bottom),
 * absolutely positioned so they float transparently over NRFPage's own
 * background. This avoids pulling in the full AppLayouts.Root Header which
 * carries heavy state management (share/delete/move modals) that the
 * extension doesn't need.
 */
export default function NRFChrome() {
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const { state, setAppMode } = useQueryController();
  const settings = useSettingsContext();
  const { isMobile } = useScreenSize();
  const { setFolded } = useAppSidebarContext();
  const appFocus = useAppFocus();
  const [modePopoverOpen, setModePopoverOpen] = useState(false);

  const effectiveMode: AppMode =
    appFocus.isNewSession() && state.phase === "idle" ? state.appMode : "chat";

  const customFooterContent =
    settings?.enterpriseSettings?.custom_lower_disclaimer_content ||
    `[Onyx ${
      settings?.webVersion || "dev"
    }](https://www.onyx.app/) - Open Source AI Platform`;

  const showModeToggle =
    isPaidEnterpriseFeaturesEnabled &&
    settings.isSearchModeAvailable &&
    appFocus.isNewSession() &&
    state.phase === "idle";

  const showHeader = isMobile || showModeToggle;

  return (
    <>
      {/* Header chrome — top-left, mirrors position of settings button at top-right */}
      {showHeader && (
        <div className="absolute top-0 left-0 p-4 z-10 flex flex-row items-center gap-2">
          {isMobile && (
            <Button
              prominence="internal"
              icon={SvgSidebar}
              onClick={() => setFolded(false)}
            />
          )}
          {showModeToggle && (
            <Popover open={modePopoverOpen} onOpenChange={setModePopoverOpen}>
              <Popover.Trigger asChild>
                <OpenButton
                  icon={
                    effectiveMode === "search" ? SvgSearchMenu : SvgBubbleText
                  }
                >
                  {effectiveMode === "search" ? "Search" : "Chat"}
                </OpenButton>
              </Popover.Trigger>
              <Popover.Content align="start" width="lg">
                <Popover.Menu>
                  <LineItem
                    icon={SvgSearchMenu}
                    selected={effectiveMode === "search"}
                    description="Quick search for documents"
                    onClick={noProp(() => {
                      setAppMode("search");
                      setModePopoverOpen(false);
                    })}
                  >
                    Search
                  </LineItem>
                  <LineItem
                    icon={SvgBubbleText}
                    selected={effectiveMode === "chat"}
                    description="Conversation and research"
                    onClick={noProp(() => {
                      setAppMode("chat");
                      setModePopoverOpen(false);
                    })}
                  >
                    Chat
                  </LineItem>
                </Popover.Menu>
              </Popover.Content>
            </Popover>
          )}
        </div>
      )}

      {/* Footer — bottom-center, transparent background */}
      <footer className="absolute bottom-0 left-0 w-full z-10 flex flex-row justify-center items-center gap-2 px-2 pb-2 pointer-events-auto">
        <MinimalMarkdown
          content={customFooterContent}
          className="max-w-full text-center"
          components={footerMarkdownComponents}
        />
      </footer>
    </>
  );
}


================================================
FILE: web/src/app/nrf/NRFPage.tsx
================================================
"use client";

import { useState, useEffect, useRef, useCallback, useMemo } from "react";
import { useSearchParams } from "next/navigation";
import { useUser } from "@/providers/UserProvider";
import { toast } from "@/hooks/useToast";
import { AuthType } from "@/lib/constants";
import AppInputBar, { AppInputBarHandle } from "@/sections/input/AppInputBar";
import { Button } from "@opal/components";
import Modal from "@/refresh-components/Modal";
import { useFilters, useLlmManager } from "@/lib/hooks";
import Dropzone from "react-dropzone";
import { useSendMessageToParent, getPanelOrigin } from "@/lib/extension/utils";
import { useNRFPreferences } from "@/components/context/NRFPreferencesContext";
import SidePanelHeader from "@/app/nrf/side-panel/SidePanelHeader";
import { CHROME_MESSAGE } from "@/lib/extension/constants";
import { SettingsPanel } from "@/app/components/nrf/SettingsPanel";
import LoginPage from "@/app/auth/login/LoginPage";
import { sendSetDefaultNewTabMessage } from "@/lib/extension/utils";
import { useAgents } from "@/hooks/useAgents";
import { useProjectsContext } from "@/providers/ProjectsContext";
import useDeepResearchToggle from "@/hooks/useDeepResearchToggle";
import useChatController from "@/hooks/useChatController";
import useChatSessionController from "@/hooks/useChatSessionController";
import useAgentController from "@/hooks/useAgentController";
import {
  useCurrentChatState,
  useCurrentMessageHistory,
  useChatSessionStore,
  useDocumentSidebarVisible,
} from "@/app/app/stores/useChatSessionStore";
import ChatUI from "@/sections/chat/ChatUI";
import ChatScrollContainer from "@/sections/chat/ChatScrollContainer";
import WelcomeMessage from "@/app/app/components/WelcomeMessage";
import useChatSessions from "@/hooks/useChatSessions";
import { cn } from "@/lib/utils";
import Spacer from "@/refresh-components/Spacer";
import { DEFAULT_CONTEXT_TOKENS } from "@/lib/constants";
import { SvgUser, SvgMenu, SvgAlertTriangle } from "@opal/icons";
import { useAppBackground } from "@/providers/AppBackgroundProvider";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import DocumentsSidebar from "@/sections/document-sidebar/DocumentsSidebar";
import PreviewModal from "@/sections/modals/PreviewModal";
import { personaIncludesRetrieval } from "@/app/app/services/lib";
import { useQueryController } from "@/providers/QueryControllerProvider";
import { eeGated } from "@/ce";
import EESearchUI from "@/ee/sections/SearchUI";

const SearchUI = eeGated(EESearchUI);

interface NRFPageProps {
  isSidePanel?: boolean;
}

// Reserve half of the context window for the model's response output
const AVAILABLE_CONTEXT_TOKENS = Number(DEFAULT_CONTEXT_TOKENS) * 0.5;

export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
  const { setUseOnyxAsNewTab } = useNRFPreferences();

  const searchParams = useSearchParams();
  const filterManager = useFilters();
  const { user, authTypeMetadata } = useUser();

  // Chat sessions
  const { refreshChatSessions } = useChatSessions();
  const existingChatSessionId = null; // NRF always starts new chats

  // Get agents for agent selection
  const { agents: availableAgents } = useAgents();

  // Projects context for file handling
  const {
    currentMessageFiles,
    setCurrentMessageFiles,
    lastFailedFiles,
    clearLastFailedFiles,
  } = useProjectsContext();

  // Show toast if any files failed
  useEffect(() => {
    if (lastFailedFiles && lastFailedFiles.length > 0) {
      const names = lastFailedFiles.map((f) => f.name).join(", ");
      toast.error(
        lastFailedFiles.length === 1
          ? `File failed and was removed: ${names}`
          : `Files failed and were removed: ${names}`
      );
      clearLastFailedFiles();
    }
  }, [lastFailedFiles, clearLastFailedFiles]);

  // Assistant controller
  const { selectedAgent, setSelectedAgentFromId, liveAgent } =
    useAgentController({
      selectedChatSession: undefined,
      onAgentSelect: () => {},
    });

  // LLM manager for model selection.
  // - currentChatSession: undefined because NRF always starts new chats
  // - liveAgent: uses the selected assistant, or undefined to fall back
  //   to system-wide default LLM provider.
  //
  // If no LLM provider is configured (e.g., fresh signup), the input bar is
  // disabled and a "Set up an LLM" button is shown (see bottom of component).
  const llmManager = useLlmManager(undefined, liveAgent ?? undefined);

  // Deep research toggle
  const { deepResearchEnabled, toggleDeepResearch } = useDeepResearchToggle({
    chatSessionId: existingChatSessionId,
    agentId: selectedAgent?.id,
  });

  // State
  const [message, setMessage] = useState("");
  const [settingsOpen, setSettingsOpen] = useState<boolean>(false);
  const [tabReadingEnabled, setTabReadingEnabled] = useState<boolean>(false);
  const [currentTabUrl, setCurrentTabUrl] = useState<string | null>(null);
  const [presentingDocument, setPresentingDocument] =
    useState<MinimalOnyxDocument | null>(null);

  // Document sidebar state (from store)
  const documentSidebarVisible = useDocumentSidebarVisible();
  const updateCurrentDocumentSidebarVisible = useChatSessionStore(
    (state) => state.updateCurrentDocumentSidebarVisible
  );
  const setCurrentSession = useChatSessionStore(
    (state) => state.setCurrentSession
  );
  const currentSessionId = useChatSessionStore(
    (state) => state.currentSessionId
  );

  // Memoized callback for closing document sidebar
  const handleDocumentSidebarClose = useCallback(() => {
    updateCurrentDocumentSidebarVisible(false);
  }, [updateCurrentDocumentSidebarVisible]);

  // Initialize message from URL input parameter (for Chrome extension)
  const initializedRef = useRef(false);
  useEffect(() => {
    if (initializedRef.current) return;
    initializedRef.current = true;
    const urlParams = new URLSearchParams(window.location.search);
    const userPrompt = urlParams.get("user-prompt");
    if (userPrompt) {
      setMessage(userPrompt);
    }
  }, []);

  // Chat background from context
  const { hasBackground, appBackgroundUrl } = useAppBackground();

  // Modals
  const [showTurnOffModal, setShowTurnOffModal] = useState<boolean>(false);

  // Refs
  const inputRef = useRef<HTMLDivElement>(null);
  const chatInputBarRef = useRef<AppInputBarHandle | null>(null);
  const submitOnLoadPerformed = useRef<boolean>(false);

  // Access chat state from store
  const currentChatState = useCurrentChatState();
  const messageHistory = useCurrentMessageHistory();

  // Determine if we should show centered welcome or messages
  const hasMessages = messageHistory.length > 0;

  // Resolved assistant to use throughout the component
  const resolvedAgent = liveAgent ?? undefined;

  // Auto-scroll preference from user settings (matches ChatPage pattern)
  const autoScrollEnabled = user?.preferences?.auto_scroll !== false;
  const isStreaming = currentChatState === "streaming";

  // Query controller for search/chat classification (EE feature)
  const { submit: submitQuery, state } = useQueryController();

  // Determine if retrieval (search) is enabled based on the agent
  const retrievalEnabled = useMemo(() => {
    if (liveAgent) {
      return personaIncludesRetrieval(liveAgent);
    }
    return false;
  }, [liveAgent]);

  // Check if we're in search mode
  const isSearch =
    state.phase === "searching" || state.phase === "search-results";

  // Anchor for scroll positioning (matches ChatPage pattern)
  const anchorMessage = messageHistory.at(-2) ?? messageHistory[0];
  const anchorNodeId = anchorMessage?.nodeId;
  const anchorSelector = anchorNodeId ? `#message-${anchorNodeId}` : undefined;

  useSendMessageToParent();

  // Listen for tab URL updates from the Chrome extension
  useEffect(() => {
    if (!isSidePanel) return;

    function handleExtensionMessage(event: MessageEvent) {
      // Only trust messages from the Chrome extension parent.
      // Checking the origin (chrome-extension://) prevents a non-extension
      // page that embeds NRFPage as an iframe from injecting arbitrary URLs
      // into the prompt context via TAB_URL_UPDATED.
      if (!event.origin.startsWith("chrome-extension://")) return;
      if (event.source !== window.parent) return;
      if (event.data?.type === CHROME_MESSAGE.TAB_URL_UPDATED) {
        setCurrentTabUrl(event.data.url as string);
      }
    }

    window.addEventListener("message", handleExtensionMessage);
    return () => window.removeEventListener("message", handleExtensionMessage);
  }, [isSidePanel]);

  const toggleSettings = () => {
    setSettingsOpen((prev) => !prev);
  };

  // If user toggles the "Use Onyx" switch to off, prompt a modal
  const handleUseOnyxToggle = (checked: boolean) => {
    if (!checked) {
      setShowTurnOffModal(true);
    } else {
      setUseOnyxAsNewTab(true);
      sendSetDefaultNewTabMessage(true);
    }
  };

  const confirmTurnOff = () => {
    setUseOnyxAsNewTab(false);
    setShowTurnOffModal(false);
    sendSetDefaultNewTabMessage(false);
  };

  // Reset input bar after sending
  const resetInputBar = useCallback(() => {
    setMessage("");
    setCurrentMessageFiles([]);
    chatInputBarRef.current?.reset();
  }, [setMessage, setCurrentMessageFiles]);

  // Chat controller for submitting messages
  const { onSubmit, stopGenerating, handleMessageSpecificFileUpload } =
    useChatController({
      filterManager,
      llmManager,
      availableAgents: availableAgents || [],
      liveAgent,
      existingChatSessionId,
      selectedDocuments: [],
      searchParams: searchParams!,
      resetInputBar,
      setSelectedAgentFromId,
    });

  // Chat session controller for loading sessions
  const { currentSessionFileTokenCount } = useChatSessionController({
    existingChatSessionId,
    searchParams: searchParams!,
    filterManager,
    firstMessage: undefined,
    setSelectedAgentFromId,
    setSelectedDocuments: () => {}, // No-op: NRF doesn't support document selection
    setCurrentMessageFiles,
    chatSessionIdRef: { current: null },
    loadedIdSessionRef: { current: null },
    chatInputBarRef,
    isInitialLoad: { current: false },
    submitOnLoadPerformed,
    refreshChatSessions,
    onSubmit,
  });

  // Handle file upload
  const handleFileUpload = useCallback(
    async (acceptedFiles: File[]) => {
      handleMessageSpecificFileUpload(acceptedFiles);
    },
    [handleMessageSpecificFileUpload]
  );

  // Handle submit from AppInputBar - routes through query controller for search/chat classification
  const handleChatInputSubmit = useCallback(
    async (submittedMessage: string) => {
      if (!submittedMessage.trim()) return;

      const additionalContext =
        tabReadingEnabled && currentTabUrl
          ? `The user is currently viewing: ${currentTabUrl}. Use the open_url tool to read this page and use its content as additional context for your response.`
          : undefined;

      // If we already have messages (chat session started), always use chat mode
      // (matches AppPage behavior where existing sessions bypass classification)
      if (hasMessages) {
        onSubmit({
          message: submittedMessage,
          currentMessageFiles: currentMessageFiles,
          deepResearch: deepResearchEnabled,
          additionalContext,
        });
        return;
      }

      // Build an onChat closure that captures additionalContext for this submission
      const onChat = (chatMessage: string) => {
        onSubmit({
          message: chatMessage,
          currentMessageFiles: currentMessageFiles,
          deepResearch: deepResearchEnabled,
          additionalContext,
        });
      };

      // Use submitQuery which will classify the query and either:
      // - Route to search (sets phase to "searching"/"search-results" and shows SearchUI)
      // - Route to chat (calls onChat callback)
      await submitQuery(submittedMessage, onChat);
    },
    [
      hasMessages,
      onSubmit,
      currentMessageFiles,
      deepResearchEnabled,
      submitQuery,
      tabReadingEnabled,
      currentTabUrl,
    ]
  );

  // Handle resubmit last message on error
  const handleResubmitLastMessage = useCallback(() => {
    const lastUserMsg = messageHistory
      .slice()
      .reverse()
      .find((m) => m.type === "user");
    if (!lastUserMsg) {
      toast.error("No previously-submitted user message found.");
      return;
    }

    onSubmit({
      message: lastUserMsg.message,
      currentMessageFiles: currentMessageFiles,
      deepResearch: deepResearchEnabled,
      messageIdToResend: lastUserMsg.messageId,
    });
  }, [messageHistory, onSubmit, currentMessageFiles, deepResearchEnabled]);

  // Start a new chat session in the side panel
  const handleNewChat = useCallback(() => {
    setCurrentSession(null);
    setTabReadingEnabled(false);
    setCurrentTabUrl(null);
    resetInputBar();
    // Notify the service worker so it stops sending tab URL updates
    window.parent.postMessage(
      { type: CHROME_MESSAGE.TAB_READING_DISABLED },
      getPanelOrigin()
    );
  }, [setCurrentSession, resetInputBar]);

  const handleToggleTabReading = useCallback(() => {
    const next = !tabReadingEnabled;
    setTabReadingEnabled(next);
    if (!next) {
      setCurrentTabUrl(null);
    }
    window.parent.postMessage(
      {
        type: next
          ? CHROME_MESSAGE.TAB_READING_ENABLED
          : CHROME_MESSAGE.TAB_READING_DISABLED,
      },
      getPanelOrigin()
    );
  }, [tabReadingEnabled]);

  // Handle search result document click
  const handleSearchDocumentClick = useCallback(
    (doc: MinimalOnyxDocument) => setPresentingDocument(doc),
    []
  );

  return (
    <div
      className={cn(
        "relative w-full h-full flex flex-col overflow-hidden",
        isSidePanel
          ? "bg-background"
          : hasBackground && "bg-cover bg-center bg-fixed"
      )}
      style={
        !isSidePanel && hasBackground
          ? { backgroundImage: `url(${appBackgroundUrl})` }
          : undefined
      }
    >
      {/* Semi-transparent overlay for readability when background is set */}
      {!isSidePanel && hasBackground && (
        <div className="absolute inset-0 bg-background/80 pointer-events-none" />
      )}

      {/* Side panel header */}
      {isSidePanel && (
        <SidePanelHeader
          onNewChat={handleNewChat}
          chatSessionId={currentSessionId}
        />
      )}

      {/* Settings button */}
      {!isSidePanel && (
        <div className="absolute top-0 right-0 p-4 z-10">
          <Button
            prominence="secondary"
            icon={SvgMenu}
            onClick={toggleSettings}
            tooltip="Open settings"
          />
        </div>
      )}

      <Dropzone onDrop={handleFileUpload} noClick>
        {({ getRootProps }) => (
          <div
            {...getRootProps()}
            className={cn(
              "flex-1 min-h-0 w-full flex flex-col items-center outline-none",
              isSidePanel && "px-3"
            )}
          >
            {/* Chat area with messages */}
            {hasMessages && resolvedAgent && (
              <>
                {/* Fake header - pushes content below absolute settings button (non-side-panel only) */}
                {!isSidePanel && <Spacer rem={2} />}
                <ChatScrollContainer
                  sessionId="nrf-session"
                  anchorSelector={anchorSelector}
                  autoScroll={autoScrollEnabled}
                  isStreaming={isStreaming}
                  hideScrollbar={isSidePanel}
                >
                  <ChatUI
                    liveAgent={resolvedAgent}
                    llmManager={llmManager}
                    currentMessageFiles={currentMessageFiles}
                    setPresentingDocument={setPresentingDocument}
                    onSubmit={onSubmit}
                    onMessageSelection={() => {}}
                    stopGenerating={stopGenerating}
                    onResubmit={handleResubmitLastMessage}
                    deepResearchEnabled={deepResearchEnabled}
                    anchorNodeId={anchorNodeId}
                  />
                </ChatScrollContainer>
              </>
            )}

            {/* Welcome message - centered when no messages and not in search mode */}
            {!hasMessages && !isSearch && (
              <div className="relative w-full flex-1 flex flex-col items-center justify-end">
                <WelcomeMessage isDefaultAgent />
                <Spacer rem={1.5} />
              </div>
            )}

            {/* AppInputBar container - in normal flex flow like AppPage */}
            <div
              ref={inputRef}
              className={cn(
                "w-full flex flex-col",
                !isSidePanel &&
                  "max-w-[var(--app-page-main-content-width)] px-4"
              )}
            >
              <AppInputBar
                ref={chatInputBarRef}
                deepResearchEnabled={deepResearchEnabled}
                toggleDeepResearch={toggleDeepResearch}
                filterManager={filterManager}
                llmManager={llmManager}
                initialMessage={message}
                stopGenerating={stopGenerating}
                onSubmit={handleChatInputSubmit}
                chatState={currentChatState}
                currentSessionFileTokenCount={currentSessionFileTokenCount}
                availableContextTokens={AVAILABLE_CONTEXT_TOKENS}
                selectedAgent={liveAgent ?? undefined}
                handleFileUpload={handleFileUpload}
                disabled={
                  !llmManager.isLoadingProviders && !llmManager.hasAnyProvider
                }
                {...(isSidePanel && {
                  tabReadingEnabled,
                  currentTabUrl,
                  onToggleTabReading: handleToggleTabReading,
                })}
              />
              <Spacer rem={isSidePanel ? 1 : 0.5} />
            </div>

            {/* Search results - shown when query is classified as search */}
            {isSearch && (
              <div className="flex-1 w-full max-w-[var(--app-page-main-content-width)] px-4 min-h-0 overflow-auto">
                <Spacer rem={0.75} />
                <SearchUI onDocumentClick={handleSearchDocumentClick} />
              </div>
            )}

            {/* Spacer to push content up when showing welcome message */}
            {!hasMessages && !isSearch && <div className="flex-1 w-full" />}
          </div>
        )}
      </Dropzone>

      {/* Document sidebar - shown when sources are clicked */}
      <div
        className={cn(
          "absolute right-0 top-0 h-full z-20 overflow-hidden transition-all duration-300",
          documentSidebarVisible ? "w-[25rem]" : "w-0"
        )}
      >
        <DocumentsSidebar
          setPresentingDocument={setPresentingDocument}
          modal={false}
          closeSidebar={handleDocumentSidebarClose}
          selectedDocuments={[]}
        />
      </div>

      {/* Text/document preview modal */}
      {presentingDocument && (
        <PreviewModal
          presentingDocument={presentingDocument}
          onClose={() => setPresentingDocument(null)}
        />
      )}

      {/* Modals - only show when not in side panel mode */}
      {!isSidePanel && (
        <>
          <SettingsPanel
            settingsOpen={settingsOpen}
            toggleSettings={toggleSettings}
            handleUseOnyxToggle={handleUseOnyxToggle}
          />

          <Modal open={showTurnOffModal} onOpenChange={setShowTurnOffModal}>
            <Modal.Content width="sm">
              <Modal.Header
                icon={SvgAlertTriangle}
                title="Turn off Onyx new tab page?"
                description="You'll see your browser's default new tab page instead. You can turn it back on anytime in your Onyx settings."
                onClose={() => setShowTurnOffModal(false)}
              />
              <Modal.Footer>
                <Button
                  prominence="secondary"
                  onClick={() => setShowTurnOffModal(false)}
                >
                  Cancel
                </Button>
                <Button variant="danger" onClick={confirmTurnOff}>
                  Turn off
                </Button>
              </Modal.Footer>
            </Modal.Content>
          </Modal>
        </>
      )}

      {!user && (
        <Modal open onOpenChange={() => {}}>
          <Modal.Content width="sm" height="sm">
            <Modal.Header icon={SvgUser} title="Welcome to Onyx" />
            <Modal.Body>
              {authTypeMetadata.authType === AuthType.BASIC ? (
                <LoginPage
                  authUrl={null}
                  authTypeMetadata={authTypeMetadata}
                  nextUrl="/nrf"
                />
              ) : (
                <div className="flex flex-col items-center">
                  <Button
                    width="full"
                    prominence="secondary"
                    onClick={() => {
                      if (window.top) {
                        window.top.location.href = "/auth/login";
                      } else {
                        window.location.href = "/auth/login";
                      }
                    }}
                  >
                    Log in
                  </Button>
                </div>
              )}
            </Modal.Body>
          </Modal.Content>
        </Modal>
      )}

      {user && !llmManager.isLoadingProviders && !llmManager.hasAnyProvider && (
        <Button
          width="full"
          prominence="secondary"
          onClick={() => {
            window.location.href = "/admin/configuration/llm";
          }}
        >
          Set up an LLM.
        </Button>
      )}
    </div>
  );
}


================================================
FILE: web/src/app/nrf/layout.tsx
================================================
import { ProjectsProvider } from "@/providers/ProjectsContext";
import { VoiceModeProvider } from "@/providers/VoiceModeProvider";

export interface LayoutProps {
  children: React.ReactNode;
}

/**
 * NRF Root Layout - Shared by all NRF routes
 *
 * Provides ProjectsProvider (needed by NRFPage) without auth redirect.
 * Sidebar and chrome are handled by sub-layouts / individual pages.
 */
export default function Layout({ children }: LayoutProps) {
  return (
    <ProjectsProvider>
      <VoiceModeProvider>{children}</VoiceModeProvider>
    </ProjectsProvider>
  );
}


================================================
FILE: web/src/app/nrf/side-panel/SidePanelHeader.tsx
================================================
"use client";

import Logo from "@/refresh-components/Logo";
import { Button } from "@opal/components";
import { SvgEditBig, SvgExternalLink } from "@opal/icons";

interface SidePanelHeaderProps {
  onNewChat: () => void;
  chatSessionId?: string | null;
}

export default function SidePanelHeader({
  onNewChat,
  chatSessionId,
}: SidePanelHeaderProps) {
  const handleOpenInOnyx = () => {
    const path = chatSessionId ? `/app?chatId=${chatSessionId}` : "/app";
    window.open(`${window.location.origin}${path}`, "_blank");
  };

  return (
    <header className="flex items-center justify-between px-4 py-3 border-b border-border-01 bg-background">
      <Logo />
      <div className="flex items-center gap-1">
        <Button
          prominence="tertiary"
          icon={SvgEditBig}
          onClick={onNewChat}
          tooltip="New chat"
        />
        <Button
          prominence="tertiary"
          icon={SvgExternalLink}
          onClick={handleOpenInOnyx}
          tooltip="Open in Onyx"
        />
      </div>
    </header>
  );
}


================================================
FILE: web/src/app/nrf/side-panel/page.tsx
================================================
import { unstable_noStore as noStore } from "next/cache";
import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh";
import NRFPage from "@/app/nrf/NRFPage";
import { NRFPreferencesProvider } from "@/components/context/NRFPreferencesContext";

/**
 * NRF Side Panel Route - No Auth Required
 *
 * Side panel variant — no NRFChrome overlay needed since the side panel
 * has its own header (logo + "Open in Onyx" button) and doesn't show
 * the mode toggle or footer.
 */
export default async function Page() {
  noStore();

  return (
    <>
      <InstantSSRAutoRefresh />
      <NRFPreferencesProvider>
        <NRFPage isSidePanel />
      </NRFPreferencesProvider>
    </>
  );
}


================================================
FILE: web/src/app/oauth-config/callback/page.tsx
================================================
import OAuthCallbackPage from "@/components/oauth/OAuthCallbackPage";

export default function OAuthConfigCallbackPage() {
  return (
    <OAuthCallbackPage
      config={{
        callbackApiUrl: "/api/oauth-config/callback",
        defaultRedirectPath: "/app",
        processingMessage: "Completing Authorization...",
        processingDetails:
          "Please wait while we securely store your credentials.",
        successMessage: "Authorization Successful!",
        successDetailsTemplate:
          "You have successfully authorized the tool to access your {serviceName} account.",
        errorMessage: "Authorization Failed",
        backButtonText: "Back to Chat",
        autoRedirectDelay: 2000,
      }}
    />
  );
}


================================================
FILE: web/src/app/page.tsx
================================================
import { redirect } from "next/navigation";

export default async function Page() {
  redirect("/app");
}


================================================
FILE: web/src/app/providers.tsx
================================================
"use client";
import posthog from "posthog-js";
import { PostHogProvider } from "posthog-js/react";
import { useEffect } from "react";

const isPostHogEnabled = !!process.env.NEXT_PUBLIC_POSTHOG_KEY;

type PHProviderProps = { children: React.ReactNode };

export function PHProvider({ children }: PHProviderProps) {
  useEffect(() => {
    if (isPostHogEnabled) {
      posthog.init(process.env.NEXT_PUBLIC_POSTHOG_KEY!, {
        api_host: "/ph_ingest",
        ui_host:
          process.env.NEXT_PUBLIC_POSTHOG_HOST || "https://us.posthog.com",
        person_profiles: "identified_only",
        capture_pageview: false,
        session_recording: {
          // Sensitive inputs should use data-ph-no-capture attribute
          maskAllInputs: false,
        },
      });
    }
  }, []);

  if (!isPostHogEnabled) {
    return <>{children}</>;
  }

  return <PostHogProvider client={posthog}>{children}</PostHogProvider>;
}


================================================
FILE: web/src/app/web-vitals.tsx
================================================
"use client";
import { useReportWebVitals } from "next/web-vitals";
import { usePostHog } from "posthog-js/react";

export function WebVitals() {
  const posthog = usePostHog();

  useReportWebVitals((metric) => {
    posthog.capture(metric.name, metric);
  });
  return <></>;
}


================================================
FILE: web/src/ce.tsx
================================================
"use client";

import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { ComponentType, ReactNode, createElement } from "react";

/**
 * Passthrough component — renders children as-is, effectively a no-op wrapper.
 * <A><Invisible><B/></Invisible></A> === <A><B/></A>
 */
function Invisible({ children }: { children?: ReactNode }) {
  return <>{children}</>;
}

/**
 * Gates a component behind Enterprise. Returns the real component for EE,
 * or Invisible (passthrough) for CE.
 *
 * For providers: Community renders Invisible, so children pass through
 * and downstream hooks fall back to their context defaults.
 *
 * For leaf components: Community renders Invisible with no children,
 * so nothing is rendered.
 */
export function eeGated<P extends {}>(
  EEComponent: ComponentType<P>
): ComponentType<P> {
  function EEGatedWrapper(props: P) {
    const isEnterprise = usePaidEnterpriseFeaturesEnabled();
    if (!isEnterprise)
      return (
        <Invisible>{(props as { children?: ReactNode }).children}</Invisible>
      );
    return createElement(EEComponent, props);
  }
  EEGatedWrapper.displayName = `eeGated(${
    EEComponent.displayName || EEComponent.name || "Component"
  })`;
  return EEGatedWrapper;
}


================================================
FILE: web/src/components/AdvancedOptionsToggle.tsx
================================================
import Button from "@/refresh-components/buttons/Button";
import { cn } from "@/lib/utils";
import { SvgChevronRight } from "@opal/icons";
interface AdvancedOptionsToggleProps {
  showAdvancedOptions: boolean;
  setShowAdvancedOptions: (show: boolean) => void;
  title?: string;
}

export function AdvancedOptionsToggle({
  showAdvancedOptions,
  setShowAdvancedOptions,
  title,
}: AdvancedOptionsToggleProps) {
  return (
    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
    <Button
      internal
      leftIcon={({ className }) => (
        <SvgChevronRight
          className={cn(className, showAdvancedOptions && "rotate-90")}
        />
      )}
      onClick={() => setShowAdvancedOptions(!showAdvancedOptions)}
      className="mr-auto"
    >
      {title || "Advanced Options"}
    </Button>
  );
}


================================================
FILE: web/src/components/AgentsMultiSelect.tsx
================================================
import { FormikProps } from "formik";
import { GenericMultiSelect } from "@/components/GenericMultiSelect";

export type AgentsMultiSelectFormType = {
  personas: number[];
};

interface Agent {
  id: number;
  name: string;
  description: string;
}

interface AgentsMultiSelectProps<T extends AgentsMultiSelectFormType> {
  formikProps: FormikProps<T>;
  agents: Agent[] | undefined;
  isLoading?: boolean;
  error?: any;
  label?: string;
  subtext?: string;
  disabled?: boolean;
  disabledMessage?: string;
}

export function AgentsMultiSelect<T extends AgentsMultiSelectFormType>({
  formikProps,
  agents,
  isLoading = false,
  error,
  label = "Agents",
  subtext = "",
  disabled = false,
  disabledMessage,
}: AgentsMultiSelectProps<T>) {
  return (
    <GenericMultiSelect
      formikProps={formikProps}
      fieldName="personas"
      label={label}
      subtext={subtext}
      items={agents}
      isLoading={isLoading}
      error={error}
      emptyMessage="No agents available. Please create an agent first from the Agents page."
      disabled={disabled}
      disabledMessage={disabledMessage}
    />
  );
}


================================================
FILE: web/src/components/BasicClickable.tsx
================================================
import { JSX } from "react";

export function BasicClickable({
  children,
  onClick,
  fullWidth = false,
  inset,
  className,
}: {
  children: string | JSX.Element;
  onClick?: () => void;
  inset?: boolean;
  fullWidth?: boolean;
  className?: string;
}) {
  return (
    <button
      onClick={onClick}
      className={`
        border 
        border-border
        rounded
        font-medium 
        text-text-darker 
        text-sm
        relative
        px-1 py-1.5
        h-full
        bg-background
        select-none
        overflow-hidden
        hover:bg-accent-background
        ${fullWidth ? "w-full" : ""}
        ${className ? className : ""}
        `}
    >
      {children}
    </button>
  );
}

export function EmphasizedClickable({
  children,
  onClick,
  fullWidth = false,
  size = "md",
}: {
  children: string | JSX.Element;
  onClick?: () => void;
  fullWidth?: boolean;
  size?: "sm" | "md" | "lg";
}) {
  return (
    <button
      className={`
        inline-flex 
        items-center 
        justify-center 
        flex-shrink-0 
        font-medium 
        ${
          size === "sm"
            ? `p-1`
            : size === "md"
              ? `min-h-[38px]  py-1 px-3`
              : `min-h-[42px] py-2 px-4`
        }
        w-fit 
        bg-accent-background-hovered
        border-1 border-border-medium border bg-background-100 
        text-sm
        rounded-lg
        hover:bg-background-125
    `}
      onClick={onClick}
    >
      {children}
    </button>
  );
}

export function BasicSelectable({
  children,
  selected,
  hasBorder,
  fullWidth = false,
  padding = "normal",
  removeColors = false,
  isDragging = false,
  isHovered,
}: {
  children: string | JSX.Element;
  selected: boolean;
  hasBorder?: boolean;
  fullWidth?: boolean;
  removeColors?: boolean;
  padding?: "none" | "normal" | "extra";
  isDragging?: boolean;
  isHovered?: boolean;
}) {
  return (
    <div
      className={`
        rounded
        font-medium 
        text-sm
        truncate
        px-2
        ${padding == "normal" && "p-1"}
        ${padding == "extra" && "p-1.5"}
        select-none
        ${hasBorder ? "border border-border" : ""}
        ${
          !removeColors
            ? isDragging
              ? "bg-background-chat-hover"
              : selected
                ? "bg-background-chat-selected"
                : isHovered
                  ? "bg-background-chat-hover"
                  : "hover:bg-background-chat-hover"
            : ""
        }
        ${fullWidth ? "w-full" : ""}`}
    >
      {children}
    </div>
  );
}


================================================
FILE: web/src/components/Bubble.tsx
================================================
import { JSX } from "react";
import Checkbox from "@/refresh-components/inputs/Checkbox";

export function Bubble({
  isSelected,
  onClick,
  children,
  showCheckbox = false,
  notSelectable = false,
}: {
  isSelected: boolean;
  onClick?: () => void;
  children: string | JSX.Element;
  showCheckbox?: boolean;
  notSelectable?: boolean;
}) {
  return (
    <div
      className={
        `
      px-1.5
      py-1
      rounded-lg
      border
      border-border
      w-fit
      flex` +
        (notSelectable
          ? " bg-background cursor-default"
          : isSelected
            ? " bg-accent-background-hovered cursor-pointer"
            : " bg-background hover:bg-accent-background cursor-pointer")
      }
      onClick={onClick}
    >
      <div className="my-auto">{children}</div>
      {showCheckbox && (
        <div className="pl-2 my-auto">
          <Checkbox checked={isSelected} />
        </div>
      )}
    </div>
  );
}


================================================
FILE: web/src/components/CollapsibleCard.tsx
================================================
import { ChevronDown } from "lucide-react";
import React, {
  useState,
  ReactNode,
  useRef,
  useLayoutEffect,
  JSX,
} from "react";

interface CollapsibleCardProps {
  header: JSX.Element;
  children: ReactNode;
  defaultOpen?: boolean;
  className?: string;
}

/**
 * Renders a "collapsible" card which, when collapsed, is meant to showcase very "high-level" information (e.g., the name), but when expanded, can show a list of sub-items which are all related to one another.
 */
export default function CollapsibleCard({
  header,
  children,
  defaultOpen = false,
  className = "",
}: CollapsibleCardProps) {
  const [open, setOpen] = useState(defaultOpen);
  const [maxHeight, setMaxHeight] = useState<string | undefined>(undefined);
  const contentRef = useRef<HTMLDivElement>(null);

  // Update maxHeight for animation when open/close
  useLayoutEffect(() => {
    if (open && contentRef.current) {
      setMaxHeight(contentRef.current.scrollHeight + "px");
    } else {
      setMaxHeight("0px");
    }
  }, [open, children]);

  // If content changes size while open, update maxHeight
  useLayoutEffect(() => {
    if (open && contentRef.current) {
      const handleResize = () => {
        setMaxHeight(contentRef.current!.scrollHeight + "px");
      };
      handleResize();
      window.addEventListener("resize", handleResize);
      return () => window.removeEventListener("resize", handleResize);
    }
  }, [open, children]);

  return (
    <div
      className={`rounded-lg border border-border bg-background shadow-md transition-all ${className}`}
    >
      <button
        type="button"
        className="w-full flex items-center px-8 py-6 text-left focus:outline-none focus:ring-2 focus:ring-accent rounded-t-lg bg-accent-background hover:bg-accent-background-hovered transition-colors"
        onClick={() => setOpen((prev) => !prev)}
        aria-expanded={open}
      >
        <div className="flex-1">{header}</div>
        <span
          className="ml-3 transition-transform flex-shrink-0"
          style={{ transform: open ? "rotate(0deg)" : "rotate(-90deg)" }}
        >
          <ChevronDown size={20} />
        </span>
      </button>
      <div
        ref={contentRef}
        style={{
          maxHeight,
          opacity: open ? 1 : 0,
          overflow: "hidden",
          transition:
            "max-height 0.35s cubic-bezier(0.4, 0, 0.2, 1), opacity 0.25s cubic-bezier(0.4, 0, 0.2, 1)",
        }}
        aria-hidden={!open}
      >
        <div className="border-t border-border bg-background rounded-b-lg">
          {children}
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/ConnectorMultiSelect.tsx
================================================
"use client";

import React, { useState, useRef, useEffect } from "react";
import { ConnectorStatus } from "@/lib/types";
import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle";
import Label from "@/refresh-components/form/Label";
import { ErrorMessage } from "formik";
import Text from "@/refresh-components/texts/Text";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { SvgX } from "@opal/icons";
import { Button } from "@opal/components";

interface ConnectorMultiSelectProps {
  name: string;
  label: string;
  connectors: ConnectorStatus<any, any>[];
  selectedIds: number[];
  onChange: (selectedIds: number[]) => void;
  disabled?: boolean;
  placeholder?: string;
  showError?: boolean;
}

export const ConnectorMultiSelect = ({
  name,
  label,
  connectors,
  selectedIds,
  onChange,
  disabled = false,
  placeholder = "Search connectors...",
  showError = false,
}: ConnectorMultiSelectProps) => {
  const [open, setOpen] = useState(false);
  const [searchQuery, setSearchQuery] = useState("");
  const dropdownRef = useRef<HTMLDivElement>(null);
  const inputRef = useRef<HTMLInputElement>(null);

  const selectedConnectors = connectors.filter((connector) =>
    selectedIds.includes(connector.cc_pair_id)
  );

  const unselectedConnectors = connectors.filter(
    (connector) => !selectedIds.includes(connector.cc_pair_id)
  );

  const allConnectorsSelected =
    connectors.length > 0 && unselectedConnectors.length === 0;

  const filteredUnselectedConnectors = unselectedConnectors.filter(
    (connector) => {
      const connectorName = connector.name || connector.connector.source;
      return connectorName.toLowerCase().includes(searchQuery.toLowerCase());
    }
  );

  useEffect(() => {
    if (allConnectorsSelected) {
      setSearchQuery("");
    }
  }, [allConnectorsSelected, selectedIds]);

  const selectConnector = (connectorId: number) => {
    const newSelectedIds = [...selectedIds, connectorId];
    onChange(newSelectedIds);
    setSearchQuery("");

    const willAllBeSelected = connectors.length === newSelectedIds.length;

    if (!willAllBeSelected) {
      setTimeout(() => {
        inputRef.current?.focus();
      }, 0);
    }
  };

  const removeConnector = (connectorId: number) => {
    onChange(selectedIds.filter((id) => id !== connectorId));
  };

  useEffect(() => {
    const handleClickOutside = (event: MouseEvent) => {
      if (
        dropdownRef.current &&
        !dropdownRef.current.contains(event.target as Node) &&
        inputRef.current !== event.target &&
        !inputRef.current?.contains(event.target as Node)
      ) {
        setOpen(false);
      }
    };

    document.addEventListener("mousedown", handleClickOutside);
    return () => {
      document.removeEventListener("mousedown", handleClickOutside);
    };
  }, []);

  const handleKeyDown = (e: React.KeyboardEvent) => {
    if (e.key === "Escape") {
      setOpen(false);
    }
  };

  const effectivePlaceholder = allConnectorsSelected
    ? "All connectors selected"
    : placeholder;

  const isInputDisabled = disabled;

  return (
    <div className="flex flex-col w-full space-y-2 mb-4">
      {label && (
        <Label>
          <Text>{label}</Text>
        </Label>
      )}

      <Text as="p" mainUiMuted text03>
        All documents indexed by the selected connectors will be part of this
        document set.
      </Text>
      <div className="relative">
        <InputTypeIn
          ref={inputRef}
          leftSearchIcon
          placeholder={effectivePlaceholder}
          value={searchQuery}
          variant={isInputDisabled ? "disabled" : undefined}
          onChange={(e) => {
            if (!allConnectorsSelected) {
              setSearchQuery(e.target.value);
              setOpen(true);
            }
          }}
          onFocus={() => {
            setOpen(true);
          }}
          onKeyDown={handleKeyDown}
          className="rounded-12"
        />

        {open && (
          <div
            ref={dropdownRef}
            className="absolute z-50 w-full mt-1 rounded-12 border border-border-02 bg-background-neutral-00 shadow-md default-scrollbar max-h-[300px] overflow-auto"
          >
            {allConnectorsSelected ? (
              <div className="py-4 px-3">
                <Text as="p" text03 className="text-center text-xs">
                  All available connectors have been selected. Remove connectors
                  below to add different ones.
                </Text>
              </div>
            ) : filteredUnselectedConnectors.length === 0 ? (
              <div className="py-4 px-3">
                <Text as="p" text03 className="text-center text-xs">
                  {searchQuery
                    ? "No matching connectors found"
                    : connectors.length === 0
                      ? "No private connectors available. Create a private connector first."
                      : "No more connectors available"}
                </Text>
              </div>
            ) : (
              <div>
                {filteredUnselectedConnectors.map((connector) => (
                  <div
                    key={connector.cc_pair_id}
                    className="flex items-center justify-between py-2 px-3 cursor-pointer hover:bg-background-neutral-01 text-xs"
                    onClick={() => selectConnector(connector.cc_pair_id)}
                  >
                    <div className="flex items-center truncate mr-2">
                      <ConnectorTitle
                        connector={connector.connector}
                        ccPairId={connector.cc_pair_id}
                        ccPairName={connector.name}
                        isLink={false}
                        showMetadata={false}
                      />
                    </div>
                  </div>
                ))}
              </div>
            )}
          </div>
        )}
      </div>

      {selectedConnectors.length > 0 ? (
        <div className="mt-3">
          <div className="flex flex-wrap gap-1.5">
            {selectedConnectors.map((connector) => (
              <div
                key={connector.cc_pair_id}
                className="flex items-center bg-background-neutral-00 rounded-12 border border-border-02 transition-all px-2 py-1 max-w-full group text-xs"
              >
                <div className="flex items-center overflow-hidden">
                  <div className="flex-shrink-0 text-xs">
                    <ConnectorTitle
                      connector={connector.connector}
                      ccPairId={connector.cc_pair_id}
                      ccPairName={connector.name}
                      isLink={false}
                      showMetadata={false}
                    />
                  </div>
                </div>
                <Button
                  prominence="tertiary"
                  size="sm"
                  type="button"
                  aria-label="Remove connector"
                  tooltip="Remove connector"
                  onClick={() => removeConnector(connector.cc_pair_id)}
                  icon={SvgX}
                />
              </div>
            ))}
          </div>
        </div>
      ) : (
        <div className="mt-3 p-3 border border-dashed border-border-02 rounded-12 bg-background-neutral-01 text-text-03 text-xs">
          No connectors selected. Search and select connectors above.
        </div>
      )}

      {showError && (
        <ErrorMessage
          name={name}
          component="div"
          className="text-action-danger-05 text-xs mt-1"
        />
      )}
    </div>
  );
};


================================================
FILE: web/src/components/DeleteButton.tsx
================================================
import { SvgTrash } from "@opal/icons";
import { Button } from "@opal/components";

export interface DeleteButtonProps {
  onClick?: (event: React.MouseEvent<HTMLElement>) => void | Promise<void>;
  disabled?: boolean;
}

export function DeleteButton({ onClick, disabled }: DeleteButtonProps) {
  return (
    <Button
      disabled={disabled}
      onClick={onClick}
      icon={SvgTrash}
      tooltip="Delete"
      prominence="tertiary"
      size="sm"
    />
  );
}


================================================
FILE: web/src/components/Dropdown.tsx
================================================
"use client";

import { forwardRef, useEffect, useRef, useState, JSX } from "react";
import { FiCheck, FiChevronDown, FiInfo } from "react-icons/fi";
import Popover from "@/refresh-components/Popover";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
export interface Option<T> {
  name: string;
  value: T;
  description?: string;
  icon?: (props: { size?: number; className?: string }) => JSX.Element;
  // Domain-specific flag: when false, render as disabled (used by AccessTypeForm)
  disabled?: boolean;
  disabledReason?: string;
}

export type StringOrNumberOption = Option<string | number>;

export const CustomDropdown = ({
  children,
  dropdown,
  direction = "down",
}: {
  children: JSX.Element | string;
  dropdown: JSX.Element | string;
  direction?: "up" | "down";
}) => {
  const [isOpen, setIsOpen] = useState(false);
  const dropdownRef = useRef<HTMLDivElement>(null);

  useEffect(() => {
    const handleClickOutside = (event: MouseEvent) => {
      if (
        dropdownRef.current &&
        !dropdownRef.current.contains(event.target as Node)
      ) {
        setIsOpen(false);
      }
    };

    document.addEventListener("mousedown", handleClickOutside);
    return () => {
      document.removeEventListener("mousedown", handleClickOutside);
    };
  }, []);

  return (
    <div className="relative inline-block text-left w-full" ref={dropdownRef}>
      <div onClick={() => setIsOpen(!isOpen)}>{children}</div>

      {isOpen && (
        <div
          onClick={() => setIsOpen(!isOpen)}
          className={`absolute ${
            direction === "up" ? "bottom-full pb-2" : "pt-2"
          } w-full z-30 box-shadow`}
        >
          {dropdown}
        </div>
      )}
    </div>
  );
};

export function DefaultDropdownElement({
  name,
  icon,
  description,
  onSelect,
  isSelected,
  includeCheckbox = false,
  disabled = false,
  disabledReason,
}: {
  name: string | JSX.Element;
  icon?: (props: { size?: number; className?: string }) => JSX.Element;
  description?: string;
  onSelect?: () => void;
  isSelected?: boolean;
  includeCheckbox?: boolean;
  disabled?: boolean;
  disabledReason?: string;
}) {
  return (
    <div
      className={`
        flex
        mx-1
        px-2
        text-sm
        py-1.5
        my-1
        select-none
        ${disabled ? "cursor-not-allowed opacity-60" : "cursor-pointer"}
        bg-transparent
        rounded
        text-text-dark
        ${disabled ? "" : "hover:bg-accent-background-hovered"}
      `}
      onClick={disabled ? undefined : onSelect}
    >
      <div>
        <div className="flex">
          {includeCheckbox && (
            <input
              type="checkbox"
              className="mr-2"
              checked={isSelected}
              onChange={() => null}
            />
          )}
          {icon && icon({ size: 16, className: "mr-2 h-4 w-4 my-auto" })}
          {name}
          {disabled && disabledReason && (
            <SimpleTooltip tooltip={disabledReason}>
              <span className="ml-2 my-auto p-1 rounded hover:bg-background-100 text-warning transition-colors cursor-default">
                <FiInfo size={14} className="text-warning" />
              </span>
            </SimpleTooltip>
          )}
        </div>
        {description && <div className="text-xs">{description}</div>}
      </div>
      {isSelected && (
        <div className="ml-auto mr-1 my-auto">
          <FiCheck />
        </div>
      )}
    </div>
  );
}

type DefaultDropdownProps = {
  options: StringOrNumberOption[];
  selected: string | null;
  onSelect: (value: string | number | null) => void;
  includeDefault?: boolean;
  defaultValue?: string;
  side?: "top" | "right" | "bottom" | "left";
  maxHeight?: string;
};

export const DefaultDropdown = forwardRef<HTMLDivElement, DefaultDropdownProps>(
  (
    {
      options,
      selected,
      onSelect,
      includeDefault,
      defaultValue,
      side,
      maxHeight,
    },
    ref
  ) => {
    const selectedOption = options.find((option) => option.value === selected);
    const [isOpen, setIsOpen] = useState(false);

    const handleSelect = (value: any) => {
      onSelect(value);
      setIsOpen(false);
    };

    return (
      <Popover open={isOpen} onOpenChange={setIsOpen}>
        <Popover.Trigger asChild>
          <div
            className={`
              flex
              text-sm
              bg-background
              px-3
              py-1.5
              rounded-lg
              border
              border-border
              cursor-pointer
              w-full`}
          >
            <p className="line-clamp-1">
              {selectedOption?.name ||
                (includeDefault
                  ? defaultValue || "Default"
                  : "Select an option...")}
            </p>
            <FiChevronDown className="my-auto ml-auto" />
          </div>
        </Popover.Trigger>
        <Popover.Content
          align="start"
          side={side}
          sideOffset={5}
          width="trigger"
        >
          <div
            ref={ref}
            className={`
              rounded-lg
              flex
              flex-col
              bg-background
              ${maxHeight || "max-h-96"}
              overflow-y-auto
              overscroll-contain`}
          >
            {includeDefault && (
              <DefaultDropdownElement
                key={-1}
                name="Default"
                onSelect={() => handleSelect(null)}
                isSelected={selected === null}
              />
            )}
            {options.map((option, ind) => {
              const isSelected = option.value === selected;
              return (
                <DefaultDropdownElement
                  key={option.value}
                  name={option.name}
                  description={option.description}
                  onSelect={() => handleSelect(option.value)}
                  isSelected={isSelected}
                  icon={option.icon}
                  disabled={option.disabled}
                  disabledReason={option.disabledReason}
                />
              );
            })}
          </div>
        </Popover.Content>
      </Popover>
    );
  }
);


================================================
FILE: web/src/components/EditableStringFieldDisplay.tsx
================================================
import { EditIcon } from "@/components/icons/icons";
import { useEffect, useRef, useState } from "react";
import { Input } from "@/components/ui/input";
import { cn } from "@/lib/utils";
import IconButton from "@/refresh-components/buttons/IconButton";
import { SvgCheck, SvgX } from "@opal/icons";
interface EditableStringFieldDisplayProps {
  value: string;
  isEditable: boolean;
  onUpdate: (newValue: string) => Promise<void>;
  textClassName?: string;
  scale?: number;
}

export function EditableStringFieldDisplay({
  value,
  isEditable,
  onUpdate,
  textClassName,
  scale = 1,
}: EditableStringFieldDisplayProps) {
  const [isEditing, setIsEditing] = useState(false);
  const [editableValue, setEditableValue] = useState(value);
  const inputRef = useRef<HTMLInputElement | HTMLTextAreaElement>(null);
  const containerRef = useRef<HTMLDivElement>(null);

  useEffect(() => {
    if (isEditing && inputRef.current) {
      inputRef.current.focus();
    }
  }, [isEditing]);

  useEffect(() => {
    const handleClickOutside = (event: MouseEvent) => {
      if (
        containerRef.current &&
        !containerRef.current.contains(event.target as Node) &&
        isEditing
      ) {
        resetEditing();
      }
    };

    document.addEventListener("mousedown", handleClickOutside);
    return () => {
      document.removeEventListener("mousedown", handleClickOutside);
    };
  }, [isEditing]);

  const handleValueChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    setEditableValue(e.target.value);
  };

  const handleUpdate = async () => {
    await onUpdate(editableValue);
    setIsEditing(false);
  };

  const resetEditing = () => {
    setIsEditing(false);
    setEditableValue(value);
  };

  const handleKeyDown = (
    e: React.KeyboardEvent<HTMLInputElement | HTMLTextAreaElement>
  ) => {
    if (e.key === "Enter") {
      handleUpdate();
    }
  };

  return (
    <div ref={containerRef} className={"flex items-center"}>
      <Input
        ref={inputRef as React.RefObject<HTMLInputElement>}
        type="text"
        value={editableValue}
        onChange={handleValueChange}
        onKeyDown={handleKeyDown}
        className={cn(
          textClassName,
          "text-3xl font-bold text-text-800",
          "user-text",
          isEditing ? "block" : "hidden"
        )}
        style={{ fontSize: `${scale}rem` }}
      />
      {!isEditing && (
        <span
          onClick={() => isEditable && setIsEditing(true)}
          className={cn(
            textClassName,
            "text-3xl font-bold text-text-800",
            "cursor-pointer user-text"
          )}
          style={{ fontSize: `${scale}rem` }}
        >
          {value}
        </span>
      )}
      {isEditing && isEditable ? (
        <>
          <div className={cn("flex", "flex-row")}>
            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
            <IconButton
              onClick={handleUpdate}
              internal
              className="ml-2"
              icon={SvgCheck}
            />
            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
            <IconButton
              onClick={resetEditing}
              internal
              className="ml-2"
              icon={SvgX}
            />
          </div>
        </>
      ) : (
        <h1
          onClick={() => isEditable && setIsEditing(true)}
          className={`group flex ${isEditable ? "cursor-pointer" : ""} ${""}`}
          style={{ fontSize: `${scale}rem` }}
        >
          {isEditable && (
            <EditIcon className={`visible ml-2`} size={12 * scale} />
          )}
        </h1>
      )}
    </div>
  );
}


================================================
FILE: web/src/components/EditableValue.tsx
================================================
"use client";

import { useState } from "react";
import { FiEdit2 } from "react-icons/fi";
import { CheckmarkIcon } from "./icons/icons";

export function EditableValue({
  initialValue,
  onSubmit,
  emptyDisplay,
  consistentWidth = true,
}: {
  initialValue: string;
  onSubmit: (value: string) => Promise<boolean>;
  emptyDisplay?: string;
  consistentWidth?: boolean;
}) {
  const [isOpen, setIsOpen] = useState(false);
  const [editedValue, setEditedValue] = useState(initialValue);

  if (isOpen) {
    return (
      <div className="my-auto h-full flex">
        <input
          value={editedValue}
          onChange={(e) => {
            setEditedValue(e.target.value);
          }}
          onKeyDown={async (e) => {
            if (e.key === "Enter") {
              const success = await onSubmit(editedValue);
              if (success) {
                setIsOpen(false);
              }
            }
            if (e.key === "Escape") {
              setIsOpen(false);
              onSubmit(initialValue);
            }
          }}
          className="border bg-background-200 border-background-300 rounded py-1 px-1 w-12 h-4 my-auto"
        />
        <div
          onClick={async () => {
            const success = await onSubmit(editedValue);
            if (success) {
              setIsOpen(false);
            }
          }}
          className="cursor-pointer my-auto ml-2"
        >
          <CheckmarkIcon size={16} className="text-green-700" />
        </div>
      </div>
    );
  }

  return (
    <div className="h-full flex flex-col">
      <div
        className="flex my-auto cursor-pointer hover:bg-accent-background-hovered rounded"
        onClick={() => setIsOpen(true)}
      >
        <div className={"flex " + (consistentWidth && " w-6")}>
          <div className="ml-auto my-auto">{initialValue || emptyDisplay}</div>
        </div>
        <div className="cursor-pointer ml-2 my-auto h-4">
          <FiEdit2 size={16} />
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/ErrorCallout.tsx
================================================
import { Callout } from "@/components/ui/callout";
import { FiAlertTriangle } from "react-icons/fi";

export function ErrorCallout({
  errorTitle,
  errorMsg,
}: {
  errorTitle?: string;
  errorMsg?: string;
}) {
  return (
    <div>
      <Callout
        className="mt-4"
        title={errorTitle || "Page not found"}
        icon={<FiAlertTriangle className="text-red-500 h-5 w-5" />}
        type="danger"
      >
        {errorMsg}
      </Callout>
    </div>
  );
}


================================================
FILE: web/src/components/FederatedConnectorSelector.tsx
================================================
import React, { useState, useRef, useEffect } from "react";
import {
  FederatedConnectorDetail,
  FederatedConnectorConfig,
  federatedSourceToRegularSource,
} from "@/lib/types";
import { SourceIcon } from "@/components/SourceIcon";
import Label from "@/refresh-components/form/Label";
import { ErrorMessage } from "formik";
import Text from "@/refresh-components/texts/Text";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { SvgX } from "@opal/icons";
import { Button } from "@opal/components";

interface FederatedConnectorSelectorProps {
  name: string;
  label: string;
  federatedConnectors: FederatedConnectorDetail[];
  selectedConfigs: FederatedConnectorConfig[];
  onChange: (selectedConfigs: FederatedConnectorConfig[]) => void;
  disabled?: boolean;
  placeholder?: string;
  showError?: boolean;
}

export const FederatedConnectorSelector = ({
  name,
  label,
  federatedConnectors,
  selectedConfigs,
  onChange,
  disabled = false,
  placeholder = "Search federated connectors...",
  showError = false,
}: FederatedConnectorSelectorProps) => {
  const [open, setOpen] = useState(false);
  const [searchQuery, setSearchQuery] = useState("");
  const dropdownRef = useRef<HTMLDivElement>(null);
  const inputRef = useRef<HTMLInputElement>(null);

  const selectedConnectorIds = selectedConfigs.map(
    (config) => config.federated_connector_id
  );

  const selectedConnectors = federatedConnectors.filter((connector) =>
    selectedConnectorIds.includes(connector.id)
  );

  const unselectedConnectors = federatedConnectors.filter(
    (connector) => !selectedConnectorIds.includes(connector.id)
  );

  const allConnectorsSelected = unselectedConnectors.length === 0;

  const filteredUnselectedConnectors = unselectedConnectors.filter(
    (connector) => {
      const connectorName = connector.name;
      return connectorName.toLowerCase().includes(searchQuery.toLowerCase());
    }
  );

  useEffect(() => {
    if (allConnectorsSelected && open) {
      setOpen(false);
      inputRef.current?.blur();
      setSearchQuery("");
    }
  }, [allConnectorsSelected, open]);

  const selectConnector = (connectorId: number) => {
    // Add connector with empty entities configuration
    const newConfig: FederatedConnectorConfig = {
      federated_connector_id: connectorId,
      entities: {},
    };

    const newSelectedConfigs = [...selectedConfigs, newConfig];
    onChange(newSelectedConfigs);
    setSearchQuery("");

    const willAllBeSelected =
      federatedConnectors.length === newSelectedConfigs.length;

    if (!willAllBeSelected) {
      setTimeout(() => {
        inputRef.current?.focus();
      }, 0);
    }
  };

  const removeConnector = (connectorId: number) => {
    onChange(
      selectedConfigs.filter(
        (config) => config.federated_connector_id !== connectorId
      )
    );
  };

  useEffect(() => {
    const handleClickOutside = (event: MouseEvent) => {
      if (
        dropdownRef.current &&
        !dropdownRef.current.contains(event.target as Node) &&
        inputRef.current !== event.target &&
        !inputRef.current?.contains(event.target as Node)
      ) {
        setOpen(false);
      }
    };

    document.addEventListener("mousedown", handleClickOutside);
    return () => {
      document.removeEventListener("mousedown", handleClickOutside);
    };
  }, []);

  const handleKeyDown = (e: React.KeyboardEvent) => {
    if (e.key === "Escape") {
      setOpen(false);
    }
  };

  const effectivePlaceholder = allConnectorsSelected
    ? "All federated connectors selected"
    : placeholder;

  const isInputDisabled = disabled || allConnectorsSelected;

  return (
    <div className="flex flex-col w-full space-y-2 mb-4">
      {label && (
        <Label>
          <Text>{label}</Text>
        </Label>
      )}

      <Text as="p" mainUiMuted text03>
        Documents from selected federated connectors will be searched in
        real-time during queries.
      </Text>
      <div className="relative">
        <InputTypeIn
          ref={inputRef}
          leftSearchIcon
          placeholder={effectivePlaceholder}
          value={searchQuery}
          variant={isInputDisabled ? "disabled" : undefined}
          onChange={(e) => {
            setSearchQuery(e.target.value);
            setOpen(true);
          }}
          onKeyDown={handleKeyDown}
          onFocus={() => {
            if (!allConnectorsSelected) {
              setOpen(true);
            }
          }}
          className={
            allConnectorsSelected
              ? "rounded-12 bg-background-neutral-01"
              : "rounded-12"
          }
        />

        {open && !allConnectorsSelected && (
          <div
            ref={dropdownRef}
            className="absolute z-50 w-full mt-1 rounded-12 border border-border-02 bg-background-neutral-00 shadow-md default-scrollbar max-h-[300px] overflow-auto"
          >
            {filteredUnselectedConnectors.length === 0 ? (
              <div className="py-4 text-center text-xs text-text-03">
                {searchQuery
                  ? "No matching federated connectors found"
                  : "No more federated connectors available"}
              </div>
            ) : (
              <div>
                {filteredUnselectedConnectors.map((connector) => (
                  <div
                    key={connector.id}
                    className="flex items-center justify-between py-2 px-3 cursor-pointer hover:bg-background-neutral-01 text-xs"
                    onClick={() => selectConnector(connector.id)}
                  >
                    <div className="flex items-center truncate mr-2">
                      <div className="mr-2">
                        <SourceIcon
                          sourceType={federatedSourceToRegularSource(
                            connector.source
                          )}
                          iconSize={16}
                        />
                      </div>
                      <span className="font-medium">{connector.name}</span>
                    </div>
                  </div>
                ))}
              </div>
            )}
          </div>
        )}
      </div>

      {selectedConnectors.length > 0 ? (
        <div className="mt-3">
          <div className="flex flex-wrap gap-1.5">
            {selectedConnectors.map((connector) => {
              const config = selectedConfigs.find(
                (c) => c.federated_connector_id === connector.id
              );
              const hasEntitiesConfigured =
                config && Object.keys(config.entities).length > 0;

              return (
                <div
                  key={connector.id}
                  className="flex items-center bg-background-neutral-00 rounded-12 border border-border-02 transition-all px-2 py-1 max-w-full group text-xs"
                >
                  <div className="flex items-center overflow-hidden">
                    <div className="mr-1 flex-shrink-0">
                      <SourceIcon
                        sourceType={federatedSourceToRegularSource(
                          connector.source
                        )}
                        iconSize={14}
                      />
                    </div>
                    <span className="font-medium truncate">
                      {connector.name}
                    </span>
                    {hasEntitiesConfigured && (
                      <div
                        className="ml-1 w-2 h-2 bg-green-500 rounded-full flex-shrink-0"
                        title="Entities configured"
                      />
                    )}
                  </div>
                  <div className="flex items-center ml-2 gap-1">
                    <Button
                      prominence="tertiary"
                      size="sm"
                      type="button"
                      aria-label="Remove connector"
                      tooltip="Remove connector"
                      onClick={() => removeConnector(connector.id)}
                      icon={SvgX}
                    />
                  </div>
                </div>
              );
            })}
          </div>
        </div>
      ) : (
        <div className="mt-3 p-3 border border-dashed border-border-02 rounded-12 bg-background-neutral-01 text-text-03 text-xs">
          No federated connectors selected. Search and select connectors above.
        </div>
      )}

      {showError && (
        <ErrorMessage
          name={name}
          component="div"
          className="text-action-danger-05 text-xs mt-1"
        />
      )}
    </div>
  );
};


================================================
FILE: web/src/components/Field.tsx
================================================
"use client";

import {
  ArrayHelpers,
  ErrorMessage,
  Field,
  FieldArray,
  FastField,
  useField,
  useFormikContext,
} from "formik";
import { FileUpload } from "@/components/admin/connectors/FileUpload";
import * as Yup from "yup";
import { FormBodyBuilder } from "./admin/connectors/types";
import { StringOrNumberOption } from "@/components/Dropdown";
import {
  Select,
  SelectItem,
  SelectContent,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { FiInfo, FiX } from "react-icons/fi";
import ReactMarkdown from "react-markdown";
import { FaMarkdown } from "react-icons/fa";
import { useState, useEffect, memo, JSX } from "react";
import remarkGfm from "remark-gfm";
import Checkbox from "@/refresh-components/inputs/Checkbox";

import { Section } from "@/layouts/general-layouts";
import { cn, transformLinkUri } from "@/lib/utils";
import FileInput from "@/app/admin/connectors/[connector]/pages/ConnectorInput/FileInput";
import InputDatePicker from "@/refresh-components/inputs/InputDatePicker";
import { RichTextSubtext } from "./RichTextSubtext";
import {
  TypedFile,
  createTypedFile,
  getFileTypeDefinitionForField,
  FILE_TYPE_DEFINITIONS,
} from "@/lib/connectors/fileTypes";
import Text from "@/refresh-components/texts/Text";
import CreateButton from "@/refresh-components/buttons/CreateButton";

import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import InputTextArea, {
  InputTextAreaProps,
} from "@/refresh-components/inputs/InputTextArea";
import { SvgEye, SvgEyeClosed } from "@opal/icons";

export function SectionHeader({
  children,
}: {
  children: string | JSX.Element;
}) {
  return <div className="mb-4 font-bold text-lg">{children}</div>;
}

export function Label({
  children,
  small,
  className,
  htmlFor,
}: {
  children: string | JSX.Element;
  small?: boolean;
  className?: string;
  htmlFor?: string;
}) {
  return (
    <label
      {...(htmlFor ? { htmlFor } : {})}
      className={`block font-medium ${className} ${
        small ? "text-sm" : "text-base"
      }`}
    >
      {children}
    </label>
  );
}

export function LabelWithTooltip({
  children,
  tooltip,
}: {
  children: string | JSX.Element;
  tooltip: string;
}) {
  return (
    <div className="flex items-center gap-x-2">
      <Label>{children}</Label>
      <ToolTipDetails>{tooltip}</ToolTipDetails>
    </div>
  );
}

export function SubLabel({ children }: { children: string | JSX.Element }) {
  // Add whitespace-pre-wrap for multiline descriptions (when children is a string with newlines)
  const hasNewlines = typeof children === "string" && children.includes("\n");

  // If children is a string, use RichTextSubtext to parse and render links
  if (typeof children === "string") {
    return (
      <span className="block text-sm text-text-03 mb-2">
        <RichTextSubtext
          text={children}
          className={hasNewlines ? "whitespace-pre-wrap" : ""}
        />
      </span>
    );
  }

  return (
    <span
      className={`block text-sm text-text-03 mb-2 ${
        hasNewlines ? "whitespace-pre-wrap" : ""
      }`}
    >
      {children}
    </span>
  );
}

export function ManualErrorMessage({ children }: { children: string }) {
  return <div className="text-action-danger-05 text-sm">{children}</div>;
}

export function ExplanationText({
  text,
  link,
}: {
  text: string;
  link?: string;
}) {
  return link ? (
    <a
      className="underline text-text-500 cursor-pointer text-xs font-medium"
      target="_blank"
      href={link}
    >
      {text}
    </a>
  ) : (
    <Text as="p" text03 secondaryBody>
      {text}
    </Text>
  );
}

export function ToolTipDetails({ children }: { children: string }) {
  return (
    <SimpleTooltip tooltip={children} side="top" align="center">
      <FiInfo size={12} />
    </SimpleTooltip>
  );
}

export const FieldLabel = ({
  subtext,
  error,
  name,
  tooltip,
  optional,
  hideError,
  label,
  removeLabel,
  vertical,
}: {
  subtext?: string | JSX.Element;
  error?: string;
  name: string;
  tooltip?: string;
  optional?: boolean;
  hideError?: boolean;
  label: string;
  removeLabel?: boolean;
  vertical?: boolean;
}) => (
  <>
    <div
      className={`flex ${
        vertical ? "flex-col" : "flex-row"
      } gap-x-2 items-start`}
    >
      <div className="flex gap-x-2 items-center">
        {!removeLabel && (
          <Label small={false} htmlFor={name}>
            {label}
          </Label>
        )}
        {optional ? <span>(optional) </span> : ""}
        {tooltip && <ToolTipDetails>{tooltip}</ToolTipDetails>}
      </div>
      {error ? (
        <ManualErrorMessage>{error}</ManualErrorMessage>
      ) : (
        !hideError && (
          <ErrorMessage
            name={name}
            component="div"
            className="text-action-danger-05 my-auto text-sm"
          />
        )
      )}
    </div>
    {subtext && <SubLabel>{subtext}</SubLabel>}
  </>
);

export function TextFormField({
  name,
  label,
  subtext,
  placeholder,
  type = "text",
  optional,
  includeRevert,
  isTextArea = false,
  disabled = false,
  autoCompleteEnabled = false,
  error,
  defaultHeight,
  isCode = false,
  fontSize,
  hideError,
  tooltip,
  explanationText,
  explanationLink,
  small,
  maxWidth,
  removeLabel,
  min,
  onChange,
  width,
  vertical,
  className,
  showPasswordToggle = false,
}: {
  name: string;
  removeLabel?: boolean;
  label: string;
  subtext?: string | JSX.Element;
  placeholder?: string;
  includeRevert?: boolean;
  optional?: boolean;
  type?: string;
  isTextArea?: boolean;
  disabled?: boolean;
  autoCompleteEnabled?: boolean;
  error?: string;
  defaultHeight?: string;
  isCode?: boolean;
  fontSize?: "sm" | "md" | "lg";
  maxWidth?: string;
  hideError?: boolean;
  tooltip?: string;
  explanationText?: string;
  explanationLink?: string;
  small?: boolean;
  min?: number;
  onChange?: (e: React.ChangeEvent<HTMLInputElement>) => void;
  width?: string;
  vertical?: boolean;
  className?: string;
  showPasswordToggle?: boolean;
}) {
  let heightString = defaultHeight || "";
  if (isTextArea && !heightString) {
    heightString = "h-28";
  }

  const [, , { setValue }] = useField(name);

  const handleChange = (
    e: React.ChangeEvent<HTMLInputElement | HTMLTextAreaElement>
  ) => {
    if (onChange) {
      onChange(e as React.ChangeEvent<HTMLInputElement>);
    } else {
      setValue(e.target.value);
    }
  };
  const textSizeClasses = {
    sm: {
      label: "text-sm",
      input: "text-sm",
      placeholder: "text-sm",
    },
    md: {
      label: "text-base",
      input: "text-base",
      placeholder: "text-base",
    },
    lg: {
      label: "text-lg",
      input: "text-lg",
      placeholder: "text-lg",
    },
  };

  const sizeClass = textSizeClasses[fontSize || "sm"];
  const isPasswordField = type === "password";
  const [isPasswordVisible, setIsPasswordVisible] = useState(false);
  const effectiveType = isPasswordField && isPasswordVisible ? "text" : type;

  return (
    <div className={`w-full ${maxWidth} ${width}`}>
      <FieldLabel
        key={name}
        subtext={subtext}
        error={error}
        name={name}
        tooltip={tooltip}
        optional={optional}
        hideError={hideError}
        label={label}
        removeLabel={removeLabel}
        vertical={vertical}
      />
      <div className={`w-full flex ${includeRevert && "gap-x-2"} relative`}>
        <Field
          onChange={handleChange}
          min={min}
          as={isTextArea ? "textarea" : "input"}
          type={effectiveType}
          data-testid={name}
          name={name}
          id={name}
          className={`
            ${small && sizeClass.input}
            flex
            h-10
            w-full
            rounded-md
            border
            px-3
            py-2
            mt-1
            file:border-0
            file:bg-transparent
            file:text-sm
            file:font-medium
            file:text-text-05
            placeholder:text-text-02
            placeholder:font-description
            placeholder:${sizeClass.placeholder}
            caret-accent
            focus-visible:outline-none
            focus-visible:ring-1
            focus-visible:ring-lighter-agent
            focus-visible:ring-offset-1
            disabled:cursor-not-allowed
            disabled:opacity-50
            md:text-sm
            border-border-03
            ring-offset-background-neutral-00
            file:text-text-inverted-05
            text-text-04

            ${heightString}
            ${sizeClass.input}
            ${disabled ? "bg-background-neutral-02" : ""}
            ${isCode ? "font-mono" : ""}
            ${className}
            bg-background-neutral-00
            ${isPasswordField && showPasswordToggle ? "pr-10" : ""}
          `}
          disabled={disabled}
          placeholder={placeholder}
          autoComplete={autoCompleteEnabled ? undefined : "off"}
        />
        {!isTextArea && isPasswordField && showPasswordToggle && (
          <button
            type="button"
            aria-label={isPasswordVisible ? "Hide password" : "Show password"}
            className="absolute right-3 top-1/2 -translate-y-1/2 stroke-text-02 hover:stroke-text-03 mt-0.5"
            onClick={() => setIsPasswordVisible((v) => !v)}
            tabIndex={0}
          >
            {isPasswordVisible ? (
              <SvgEye className="h-4 w-4" />
            ) : (
              <SvgEyeClosed className="h-4 w-4" />
            )}
          </button>
        )}
      </div>

      {explanationText && (
        <ExplanationText link={explanationLink} text={explanationText} />
      )}
    </div>
  );
}

export function FileUploadFormField({
  name,
  label,
  subtext,
}: {
  name: string;
  label: string;
  subtext?: string | JSX.Element;
}) {
  // We create a *temporary* field inside of `Formik` to throw the `File` object into.
  // The actual *contents* of the file will be thrown into the field called `name`.
  const fileName = `temporary.filename-${name}`;
  const [fileField] = useField<File>(fileName);
  const [, , contentsHelper] = useField<string>(name);

  useEffect(() => {
    const reader = new FileReader();
    reader.onload = (e) => {
      contentsHelper.setValue(e.target?.result as string);
    };
    if (fileField.value instanceof File) {
      reader.readAsText(fileField.value);
    }
  }, [contentsHelper, fileField.value]);

  return (
    <div className="w-full">
      <FieldLabel name={name} label={label} subtext={subtext} />
      <FileInput name={fileName} multiple={false} hideError />
    </div>
  );
}

export function TypedFileUploadFormField({
  name,
  label,
  subtext,
}: {
  name: string;
  label: string;
  subtext?: string | JSX.Element;
}) {
  const [field, , helpers] = useField<TypedFile | null>(name);
  const [customError, setCustomError] = useState<string>("");
  const [isValidating, setIsValidating] = useState(false);
  const [description, setDescription] = useState<string>("");

  useEffect(() => {
    const typeDefinitionKey = getFileTypeDefinitionForField(name);
    if (typeDefinitionKey) {
      setDescription(
        FILE_TYPE_DEFINITIONS[typeDefinitionKey].description || ""
      );
    }
  }, [name]);

  useEffect(() => {
    const validateFile = async () => {
      if (!field.value) {
        setIsValidating(false);
        return;
      }

      setIsValidating(true);

      try {
        const validation = await field.value.validate();
        if (validation?.isValid) {
          setCustomError("");
        } else {
          setCustomError(validation?.errors.join(", ") || "Unknown error");
          helpers.setValue(null);
        }
      } catch (error) {
        setCustomError(
          error instanceof Error ? error.message : "Validation error"
        );
        helpers.setValue(null);
      } finally {
        setIsValidating(false);
      }
    };

    validateFile();
  }, [field.value, helpers]);

  const handleFileSelection = async (files: File[]) => {
    if (files.length === 0) {
      helpers.setValue(null);
      setCustomError("");
      return;
    }

    const file = files[0];
    if (!file) {
      setCustomError("File selection error");
      return;
    }

    const typeDefinitionKey = getFileTypeDefinitionForField(name);

    if (!typeDefinitionKey) {
      setCustomError(`No file type definition found for field: ${name}`);
      return;
    }

    try {
      const typedFile = createTypedFile(file, name, typeDefinitionKey);
      helpers.setValue(typedFile);
      setCustomError("");
    } catch (error) {
      setCustomError(error instanceof Error ? error.message : "Unknown error");
      helpers.setValue(null);
    } finally {
      setIsValidating(false);
    }
  };

  return (
    <div className="w-full">
      <FieldLabel name={name} label={label} subtext={subtext} />
      {description && (
        <div className="text-sm text-text-03 mb-2">{description}</div>
      )}
      <FileUpload
        selectedFiles={field.value ? [field.value.file] : []}
        setSelectedFiles={handleFileSelection}
        multiple={false}
      />
      {/* Validation feedback */}
      {isValidating && (
        <div className="text-status-info-05 text-sm mt-1">
          Validating file...
        </div>
      )}

      {customError ? (
        <div className="text-action-danger-05 text-sm mt-1">{customError}</div>
      ) : (
        <ErrorMessage
          name={name}
          component="div"
          className="text-action-danger-05 text-sm mt-1"
        />
      )}
    </div>
  );
}

export function MultiSelectField({
  name,
  label,
  subtext,
  options,
  onChange,
  error,
  hideError,
  small,
  selectedInitially,
}: {
  selectedInitially: string[];
  name: string;
  label: string;
  subtext?: string | JSX.Element;
  options: { value: string; label: string }[];
  onChange?: (selected: string[]) => void;
  error?: string;
  hideError?: boolean;
  small?: boolean;
}) {
  const [selectedOptions, setSelectedOptions] =
    useState<string[]>(selectedInitially);

  const handleCheckboxChange = (value: string) => {
    const newSelectedOptions = selectedOptions.includes(value)
      ? selectedOptions.filter((option) => option !== value)
      : [...selectedOptions, value];

    setSelectedOptions(newSelectedOptions);
    if (onChange) {
      onChange(newSelectedOptions);
    }
  };

  return (
    <div className="mb-6">
      <div className="flex gap-x-2 items-center">
        <Label small={small}>{label}</Label>
        {error ? (
          <ManualErrorMessage>{error}</ManualErrorMessage>
        ) : (
          !hideError && (
            <ErrorMessage
              name={name}
              component="div"
              className="text-action-danger-05 my-auto text-sm"
            />
          )
        )}
      </div>

      {subtext && <SubLabel>{subtext}</SubLabel>}
      <div className="mt-2">
        {options.map((option) => (
          <label key={option.value} className="flex items-center mb-2">
            <input
              type="checkbox"
              name={name}
              value={option.value}
              checked={selectedOptions.includes(option.value)}
              onChange={() => handleCheckboxChange(option.value)}
              className="mr-2"
            />
            {option.label}
          </label>
        ))}
      </div>
    </div>
  );
}
interface MarkdownPreviewProps {
  name: string;
  label: string;
  placeholder?: string;
  error?: string;
}

export const MarkdownFormField = ({
  name,
  label,
  error,
  placeholder = "Enter your markdown here...",
}: MarkdownPreviewProps) => {
  const [field] = useField(name);
  const [isPreviewOpen, setIsPreviewOpen] = useState(false);

  const togglePreview = () => {
    setIsPreviewOpen(!isPreviewOpen);
  };

  return (
    <div className="flex flex-col space-y-4 mb-4">
      <Label>{label}</Label>
      <div className="border border-border-02 rounded-md">
        <div className="flex items-center justify-between px-4 py-2 bg-background-neutral-02 rounded-t-md">
          <div className="flex items-center space-x-2">
            <FaMarkdown className="text-text-03" />
            <span className="text-sm font-semibold text-text-04">Markdown</span>
          </div>
          <button
            type="button"
            onClick={togglePreview}
            className="text-sm font-semibold text-text-04 hover:text-text-05 focus:outline-none"
          >
            {isPreviewOpen ? "Write" : "Preview"}
          </button>
        </div>
        {isPreviewOpen ? (
          <div className="p-4 border-t border-border-02">
            <ReactMarkdown
              className="prose dark:prose-invert"
              remarkPlugins={[remarkGfm]}
              urlTransform={transformLinkUri}
            >
              {field.value}
            </ReactMarkdown>
          </div>
        ) : (
          <div className="pt-2 px-2">
            <textarea
              {...field}
              rows={2}
              placeholder={placeholder}
              className={`w-full p-2 border border-border-02 rounded-md`}
            />
          </div>
        )}
      </div>
      {error ? (
        <ManualErrorMessage>{error}</ManualErrorMessage>
      ) : (
        <ErrorMessage
          name={name}
          component="div"
          className="text-action-danger-05 text-sm mt-1"
        />
      )}
    </div>
  );
};

interface BooleanFormFieldProps {
  name: string;
  label: string;
  subtext?: string | JSX.Element;
  removeIndent?: boolean;
  small?: boolean;
  noLabel?: boolean;
  disabled?: boolean;
  optional?: boolean;
  tooltip?: string;
  disabledTooltip?: string;
  disabledTooltipSide?: "top" | "bottom" | "left" | "right";
  onChange?: (checked: boolean) => void;
}

export const BooleanFormField = memo(function BooleanFormField({
  name,
  label,
  subtext,
  removeIndent,
  noLabel,
  optional,
  small,
  disabled,
  tooltip,
  disabledTooltip,
  disabledTooltipSide,
  onChange,
}: BooleanFormFieldProps) {
  // Generate a stable, valid id from the field name for label association
  const checkboxId = `checkbox-${name.replace(/[^a-zA-Z0-9_-]/g, "_")}`;

  return (
    <div>
      <FastField
        name={name}
        type="checkbox"
        disabled={disabled}
        shouldUpdate={(next: any, prev: any) =>
          next.disabled !== prev.disabled ||
          next.formik.values !== prev.formik.values
        }
      >
        {({ field, form }: any) => {
          const toggle = () => {
            if (!disabled) {
              const newValue = !field.value;
              form.setFieldValue(name, newValue);
              if (onChange) onChange(newValue);
            }
          };

          return (
            <SimpleTooltip
              // This may seem confusing, but we only want to show the `disabledTooltip` if and only if the `BooleanFormField` is disabled.
              // If it disabled, then we "enable" the showing of the tooltip. Thus, `disabled={!disabled}` is not a mistake.
              disabled={!disabled}
              tooltip={disabledTooltip}
              side={disabledTooltipSide}
            >
              <Section flexDirection="row" width="fit" height="fit" gap={0}>
                <Checkbox
                  aria-label={`${label
                    .toLowerCase()
                    .replace(" ", "-")}-checkbox`}
                  id={checkboxId}
                  className={cn(
                    disabled && "opacity-50",
                    removeIndent ? "mr-2" : "mx-3"
                  )}
                  checked={Boolean(field.value)}
                  onCheckedChange={(checked) => {
                    if (!disabled) {
                      form.setFieldValue(name, checked === true);
                      if (onChange) onChange(checked === true);
                    }
                  }}
                />
                {!noLabel && (
                  <div
                    className={disabled ? "" : "cursor-pointer"}
                    onClick={toggle}
                  >
                    <div className="flex items-center gap-x-2">
                      <Label small={small}>{`${label}${
                        optional ? " (Optional)" : ""
                      }`}</Label>
                      {tooltip && <ToolTipDetails>{tooltip}</ToolTipDetails>}
                    </div>
                    {subtext && <SubLabel>{subtext}</SubLabel>}
                  </div>
                )}
              </Section>
            </SimpleTooltip>
          );
        }}
      </FastField>

      <ErrorMessage
        name={name}
        component="div"
        className="text-action-danger-05 text-sm mt-1"
      />
    </div>
  );
});

interface TextArrayFieldProps<T extends Yup.AnyObject> {
  name: string;
  label: string | JSX.Element;
  values: T;
  subtext?: string | JSX.Element;
  type?: string;
  tooltip?: string;
  minFields?: number;
  placeholder?: string;
  disabled?: boolean;
}

export function TextArrayField<T extends Yup.AnyObject>({
  name,
  label,
  values,
  subtext,
  type,
  tooltip,
  minFields = 0,
  placeholder = "",
  disabled = false,
}: TextArrayFieldProps<T>) {
  return (
    <div className="mb-4">
      <div className="flex gap-x-2 items-center">
        <Label>{label}</Label>
        {tooltip && <ToolTipDetails>{tooltip}</ToolTipDetails>}
      </div>
      {subtext && <SubLabel>{subtext}</SubLabel>}

      <FieldArray
        name={name}
        render={(arrayHelpers: ArrayHelpers) => (
          <div>
            {values[name] &&
              values[name].length > 0 &&
              (values[name] as string[]).map((_, index) => (
                <div key={index} className="mt-2">
                  <div className="flex">
                    <Field
                      type={type}
                      name={`${name}.${index}`}
                      id={name}
                      className={`
                      border
                      border-border
                      bg-background
                      rounded
                      w-full
                      py-2
                      px-3
                      mr-4
                      disabled:cursor-not-allowed
                      `}
                      // Disable autocomplete since the browser doesn't know how to handle an array of text fields
                      autoComplete="off"
                      placeholder={placeholder}
                      disabled={disabled}
                    />
                    <div className="my-auto">
                      {index >= minFields ? (
                        <FiX
                          className="my-auto w-10 h-10 cursor-pointer hover:bg-background-neutral-02 rounded p-2"
                          onClick={() => {
                            if (!disabled) {
                              arrayHelpers.remove(index);
                            }
                          }}
                        />
                      ) : (
                        <div className="w-10 h-10" />
                      )}
                    </div>
                  </div>
                  <ErrorMessage
                    name={`${name}.${index}`}
                    component="div"
                    className="text-action-danger-05 text-sm mt-1"
                  />
                </div>
              ))}

            <CreateButton
              onClick={() => {
                if (!disabled) {
                  arrayHelpers.push("");
                }
              }}
              type="button"
              disabled={disabled}
            >
              Add New
            </CreateButton>
          </div>
        )}
      />
    </div>
  );
}

interface TextArrayFieldBuilderProps<T extends Yup.AnyObject> {
  name: string;
  label: string;
  subtext?: string | JSX.Element;
  type?: string;
  tooltip?: string;
}

export function TextArrayFieldBuilder<T extends Yup.AnyObject>(
  props: TextArrayFieldBuilderProps<T>
): FormBodyBuilder<T> {
  const _TextArrayField: FormBodyBuilder<T> = (values) => (
    <TextArrayField {...props} values={values} />
  );
  return _TextArrayField;
}

interface SelectorFormFieldProps {
  name: string;
  label?: string;
  options: StringOrNumberOption[];
  subtext?: string | JSX.Element;
  includeDefault?: boolean;
  side?: "top" | "right" | "bottom" | "left";
  maxHeight?: string;
  onSelect?: (selected: string | number | null) => void;
  defaultValue?: string;
  tooltip?: string;
  includeReset?: boolean;
  fontSize?: "sm" | "md" | "lg";
  small?: boolean;
  disabled?: boolean;
}

export function SelectorFormField({
  name,
  label,
  options,
  subtext,
  side = "bottom",
  maxHeight,
  onSelect,
  defaultValue,
  tooltip,
  includeReset = false,
  fontSize = "md",
  small = false,
  disabled = false,
}: SelectorFormFieldProps) {
  const [field] = useField<string>(name);
  const { setFieldValue } = useFormikContext();
  const [container, setContainer] = useState<HTMLDivElement | null>(null);

  const currentlySelected = options.find(
    (option) => option.value?.toString() === field.value?.toString()
  );

  const textSizeClasses = {
    sm: {
      label: "text-sm",
      input: "text-sm",
      placeholder: "text-sm",
    },
    md: {
      label: "text-base",
      input: "text-base",
      placeholder: "text-base",
    },
    lg: {
      label: "text-lg",
      input: "text-lg",
      placeholder: "text-lg",
    },
  };

  const sizeClass = textSizeClasses[fontSize];

  return (
    <div>
      {label && (
        <div className="flex gap-x-2 items-center">
          <Label className={sizeClass.label} small={small}>
            {label}
          </Label>
          {tooltip && <ToolTipDetails>{tooltip}</ToolTipDetails>}
        </div>
      )}
      {subtext && <SubLabel>{subtext}</SubLabel>}
      <div className="mt-2" ref={setContainer}>
        <Select
          value={field.value || defaultValue}
          onValueChange={
            onSelect ||
            ((selected) =>
              selected == "__none__"
                ? setFieldValue(name, null)
                : setFieldValue(name, selected))
          }
          defaultValue={defaultValue}
          disabled={disabled}
        >
          <SelectTrigger className={sizeClass.input} disabled={disabled}>
            <SelectValue placeholder="Select...">
              {currentlySelected?.name || defaultValue || ""}
            </SelectValue>
          </SelectTrigger>

          {container && (
            <SelectContent
              side={side}
              className={`
               ${maxHeight ? `${maxHeight}` : "max-h-72"}
               overflow-y-scroll
               ${sizeClass.input}
              `}
              container={container}
            >
              {options.length === 0 ? (
                <SelectItem value="default">Select...</SelectItem>
              ) : (
                options.map((option) => (
                  <SelectItem
                    hideCheck
                    icon={option.icon}
                    key={option.value}
                    value={String(option.value)}
                    selected={field.value === option.value}
                  >
                    {option.name}
                  </SelectItem>
                ))
              )}
              {includeReset && (
                <SelectItem
                  value={"__none__"}
                  onSelect={() => setFieldValue(name, null)}
                >
                  None
                </SelectItem>
              )}
            </SelectContent>
          )}
        </Select>
      </div>

      <ErrorMessage
        name={name}
        component="div"
        className="text-action-danger-05 text-sm mt-1"
      />
    </div>
  );
}

export interface DatePickerFieldProps {
  label: string;
  name: string;
  subtext?: string;
  startYear?: number;
  disabled?: boolean;
}

export function DatePickerField({
  label,
  name,
  subtext,
  startYear = 1970,
  disabled = false,
}: DatePickerFieldProps) {
  const [field, _, helper] = useField<Date | null>(name);

  return (
    <div>
      <FieldLabel label={label} name={name} subtext={subtext} />
      <InputDatePicker
        selectedDate={field.value}
        setSelectedDate={helper.setValue}
        startYear={startYear}
        disabled={disabled}
      />
    </div>
  );
}

export interface TextAreaFieldProps extends InputTextAreaProps {
  name: string;
}

export function TextAreaField(props: TextAreaFieldProps) {
  const [field, _, helper] = useField<string>(props.name);

  return (
    <InputTextArea
      value={field.value}
      onChange={(event) => {
        helper.setValue(event.target.value);
      }}
      {...props}
    />
  );
}


================================================
FILE: web/src/components/FormErrorHelpers.tsx
================================================
"use client";

import { useEffect, useRef } from "react";
import { useFormikContext } from "formik";

// After a submit with errors, scroll + focus the first invalid field
export function FormErrorFocus() {
  const { submitCount, errors, isSubmitting } = useFormikContext<any>();
  const lastHandled = useRef(0);

  useEffect(() => {
    if (isSubmitting) return;
    if (submitCount <= 0 || submitCount === lastHandled.current) return;

    const keys = Object.keys(errors || {});
    if (keys.length === 0) return;

    const timer = setTimeout(() => {
      try {
        let target: HTMLElement | null = null;

        for (const key of keys) {
          target = document.getElementById(key) as HTMLElement | null;
          if (target) break;
        }

        // 2) Fallback: first element with matching name
        if (!target) {
          for (const key of keys) {
            const byName = document.getElementsByName(key);
            if (byName && byName.length > 0) {
              target = byName[0] as HTMLElement;
              break;
            }
          }
        }

        if (target) {
          target.scrollIntoView({ behavior: "smooth", block: "center" });
          if (typeof (target as any).focus === "function") {
            (target as any).focus({ preventScroll: true });
          }
        }
      } finally {
        lastHandled.current = submitCount;
      }
    }, 0);

    return () => clearTimeout(timer);
  }, [submitCount, errors, isSubmitting]);

  return null;
}


================================================
FILE: web/src/components/GatedContentWrapper.tsx
================================================
"use client";

import { usePathname } from "next/navigation";
import AccessRestrictedPage from "@/components/errorPages/AccessRestrictedPage";

// Paths accessible even when gated - allows users to manage billing updates and seat counts
const ALLOWED_GATED_PATHS = ["/admin/billing", "/admin/users"];

/**
 * Check if pathname matches an allowed path exactly or is a subpath.
 * Uses strict matching to prevent bypasses like "/admin/billing-foo".
 */
function isPathAllowed(pathname: string): boolean {
  return ALLOWED_GATED_PATHS.some(
    (allowedPath) =>
      pathname === allowedPath || pathname.startsWith(allowedPath + "/")
  );
}

export default function GatedContentWrapper({
  children,
}: {
  children: React.ReactNode;
}) {
  const pathname = usePathname();

  if (isPathAllowed(pathname)) {
    return <>{children}</>;
  }

  return <AccessRestrictedPage />;
}


================================================
FILE: web/src/components/GenericMultiSelect.tsx
================================================
import { FormikProps, ErrorMessage } from "formik";
import Text from "@/refresh-components/texts/Text";
import Button from "@/refresh-components/buttons/Button";
import InputComboBox from "@/refresh-components/inputs/InputComboBox/InputComboBox";
import { Disabled } from "@opal/core";
import { SvgX } from "@opal/icons";
export type GenericMultiSelectFormType<T extends string> = {
  [K in T]: number[];
};

interface GenericItem {
  id: number;
  name: string;
}

interface GenericMultiSelectProps<
  T extends string,
  F extends GenericMultiSelectFormType<T>,
> {
  formikProps: FormikProps<F>;
  fieldName: T;
  label: string;
  subtext?: string;
  items: GenericItem[] | undefined;
  isLoading: boolean;
  error: any;
  emptyMessage: string;
  disabled?: boolean;
  disabledMessage?: string;
}

export function GenericMultiSelect<
  T extends string,
  F extends GenericMultiSelectFormType<T>,
>({
  formikProps,
  fieldName,
  label,
  subtext,
  items,
  isLoading,
  error,
  emptyMessage,
  disabled = false,
  disabledMessage,
}: GenericMultiSelectProps<T, F>) {
  if (isLoading) {
    return (
      <div className="flex flex-col gap-2 w-full">
        <Text as="p" mainUiAction>
          {label}
        </Text>
        <div className="animate-pulse bg-background-neutral-02 h-10 w-full rounded-08" />
      </div>
    );
  }

  if (error) {
    return (
      <div className="flex flex-col gap-2 w-full">
        <Text as="p" mainUiAction>
          {label}
        </Text>
        <Text as="p" text03 className="text-action-danger-05">
          Failed to load {label.toLowerCase()}. Please try again.
        </Text>
      </div>
    );
  }

  if (!items || items.length === 0) {
    return (
      <div className="flex flex-col gap-2 w-full">
        <Text as="p" mainUiAction>
          {label}
        </Text>
        <Text as="p" text03>
          {emptyMessage}
        </Text>
      </div>
    );
  }

  const selectedIds = (formikProps.values[fieldName] as number[]) || [];
  const selectedItems = items.filter((item) => selectedIds.includes(item.id));

  const handleSelect = (itemId: number) => {
    if (disabled) return;
    const currentIds = (formikProps.values[fieldName] as number[]) || [];
    if (!currentIds.includes(itemId)) {
      formikProps.setFieldValue(fieldName, [...currentIds, itemId]);
    }
  };

  const handleRemove = (itemId: number) => {
    if (disabled) return;
    const currentIds = (formikProps.values[fieldName] as number[]) || [];
    formikProps.setFieldValue(
      fieldName,
      currentIds.filter((id) => id !== itemId)
    );
  };

  return (
    <div className="flex flex-col gap-2 w-full">
      <Text as="p" mainUiAction>
        {label}
      </Text>

      {subtext && (
        <Text as="p" text03>
          {disabled ? disabledMessage : subtext}
        </Text>
      )}

      <Disabled disabled={disabled}>
        <div>
          <InputComboBox
            placeholder="Search..."
            value=""
            onChange={() => {}}
            onValueChange={(selectedValue) => {
              const numValue = parseInt(selectedValue, 10);
              if (!isNaN(numValue)) {
                handleSelect(numValue);
              }
            }}
            options={items
              .filter((item) => !selectedIds.includes(item.id))
              .map((item) => ({
                label: item.name,
                value: String(item.id),
              }))}
            strict
            leftSearchIcon
          />
        </div>
      </Disabled>

      {selectedItems.length > 0 && (
        <div className="flex flex-wrap gap-2">
          {selectedItems.map((item) => (
            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
            <Button
              key={item.id}
              secondary
              disabled={disabled}
              rightIcon={SvgX}
              onClick={() => handleRemove(item.id)}
              className="!px-2 !py-1"
            >
              {item.name}
            </Button>
          ))}
        </div>
      )}

      <ErrorMessage name={fieldName} component="div">
        {(msg) => (
          <Text as="p" text03 className="text-action-danger-05">
            {msg}
          </Text>
        )}
      </ErrorMessage>
    </div>
  );
}


================================================
FILE: web/src/components/GroupsMultiSelect.tsx
================================================
import { FormikProps } from "formik";
import { Label } from "@/components/Field";
import { useUserGroups } from "@/lib/hooks";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { GenericMultiSelect } from "@/components/GenericMultiSelect";

export type GroupsMultiSelectFormType = {
  groups: number[];
};

interface GroupsMultiSelectProps<T extends GroupsMultiSelectFormType> {
  formikProps: FormikProps<T>;
  label?: string;
  subtext?: string;
  disabled?: boolean;
  disabledMessage?: string;
}

export function GroupsMultiSelect<T extends GroupsMultiSelectFormType>({
  formikProps,
  label = "User Groups",
  subtext = "Select which user groups can access this resource",
  disabled = false,
  disabledMessage,
}: GroupsMultiSelectProps<T>) {
  const {
    data: userGroups,
    isLoading: userGroupsIsLoading,
    error,
  } = useUserGroups();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  // Show loading state while checking enterprise features or loading groups
  if (userGroupsIsLoading || isPaidEnterpriseFeaturesEnabled === undefined) {
    return (
      <div className="mb-4">
        <Label>{label}</Label>
        <div className="animate-pulse bg-background-200 h-10 w-full rounded-lg mt-2"></div>
      </div>
    );
  }

  if (!isPaidEnterpriseFeaturesEnabled) {
    return null;
  }

  return (
    <GenericMultiSelect
      formikProps={formikProps}
      fieldName="groups"
      label={label}
      subtext={subtext}
      items={userGroups}
      isLoading={false}
      error={error}
      emptyMessage="No user groups available. Please create a user group first."
      disabled={disabled}
      disabledMessage={disabledMessage}
    />
  );
}


================================================
FILE: web/src/components/HoverPopup.tsx
================================================
import { JSX } from "react";
import {
  Tooltip,
  TooltipContent,
  TooltipProvider,
  TooltipTrigger,
} from "@/components/ui/tooltip";

interface HoverPopupProps {
  mainContent: string | JSX.Element;
  popupContent: string | JSX.Element;
  classNameModifications?: string;
  direction?: "left" | "left-top" | "bottom" | "top";
  style?: "basic" | "dark";
}

export const HoverPopup = ({
  mainContent,
  popupContent,
  classNameModifications,
  direction = "bottom",
}: HoverPopupProps) => {
  return (
    <TooltipProvider>
      <Tooltip>
        <TooltipTrigger asChild>
          <div>{mainContent}</div>
        </TooltipTrigger>
        <TooltipContent
          side={direction === "left-top" ? "left" : direction}
          className={classNameModifications}
        >
          {popupContent}
        </TooltipContent>
      </Tooltip>
    </TooltipProvider>
  );
};


================================================
FILE: web/src/components/IsPublicGroupSelector.tsx
================================================
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import React, { useState, useEffect } from "react";
import { FormikProps } from "formik";
import { UserRole } from "@/lib/types";
import { useUserGroups } from "@/lib/hooks";
import { BooleanFormField } from "@/components/Field";
import { useUser } from "@/providers/UserProvider";
import { GroupsMultiSelect } from "./GroupsMultiSelect";

export type IsPublicGroupSelectorFormType = {
  is_public: boolean;
  groups: number[];
};

// This should be included for all forms that require groups / public access
// to be set, and access to this / permissioning should be handled within this component itself.
export const IsPublicGroupSelector = <T extends IsPublicGroupSelectorFormType>({
  formikProps,
  objectName,
  publicToWhom = "Users",
  removeIndent = false,
  enforceGroupSelection = true,
  smallLabels = false,
}: {
  formikProps: FormikProps<T>;
  objectName: string;
  publicToWhom?: string;
  removeIndent?: boolean;
  enforceGroupSelection?: boolean;
  smallLabels?: boolean;
}) => {
  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();
  const { isAdmin, user, isCurator } = useUser();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const [shouldHideContent, setShouldHideContent] = useState(false);

  useEffect(() => {
    if (user && userGroups && isPaidEnterpriseFeaturesEnabled) {
      const isUserAdmin = user.role === UserRole.ADMIN;
      if (!isUserAdmin && userGroups.length > 0) {
        formikProps.setFieldValue("is_public", false);
      }
      if (
        userGroups.length === 1 &&
        userGroups[0] !== undefined &&
        !isUserAdmin
      ) {
        formikProps.setFieldValue("groups", [userGroups[0].id]);
        setShouldHideContent(true);
      } else if (formikProps.values.is_public) {
        formikProps.setFieldValue("groups", []);
        setShouldHideContent(false);
      } else {
        setShouldHideContent(false);
      }
    }
  }, [user, userGroups, isPaidEnterpriseFeaturesEnabled]);

  if (userGroupsIsLoading) {
    return <div>Loading...</div>;
  }
  if (!isPaidEnterpriseFeaturesEnabled) {
    return null;
  }

  let firstUserGroupName = "Unknown";
  if (userGroups) {
    const userGroup = userGroups[0];
    if (userGroup) {
      firstUserGroupName = userGroup.name;
    }
  }

  if (shouldHideContent && enforceGroupSelection) {
    return (
      <>
        {userGroups && (
          <div className="mb-1 font-medium text-base">
            This {objectName} will be assigned to group{" "}
            <b>{firstUserGroupName}</b>.
          </div>
        )}
      </>
    );
  }

  return (
    <div>
      {isAdmin && (
        <>
          <BooleanFormField
            name="is_public"
            removeIndent={removeIndent}
            small={smallLabels}
            label={
              publicToWhom === "Curators"
                ? `Make this ${objectName} Curator Accessible?`
                : `Make this ${objectName} Public?`
            }
            disabled={!isAdmin}
            subtext={
              <span className="block mt-2 text-sm text-text-600 dark:text-neutral-400">
                If set, then this {objectName} will be usable by{" "}
                <b>All {publicToWhom}</b>. Otherwise, only <b>Admins</b> and{" "}
                <b>{publicToWhom}</b> who have explicitly been given access to
                this {objectName} (e.g. via a User Group) will have access.
              </span>
            }
          />
        </>
      )}

      <GroupsMultiSelect
        formikProps={formikProps}
        label={`Assign group access for this ${objectName}`}
        subtext={
          isAdmin || !enforceGroupSelection
            ? `This ${objectName} will be visible/accessible by the groups selected below`
            : `Curators must select one or more groups to give access to this ${objectName}`
        }
        disabled={formikProps.values.is_public && !isCurator}
        disabledMessage={`This ${objectName} is public and available to all users.`}
      />
    </div>
  );
};


================================================
FILE: web/src/components/Loading.tsx
================================================
"use client";

import React, { useState, useEffect } from "react";
import "./loading.css";
import { ThreeDots } from "react-loader-spinner";
import { cn } from "@/lib/utils";

interface LoadingAnimationProps {
  text?: string;
  size?: "text-sm" | "text-md";
}

export const LoadingAnimation: React.FC<LoadingAnimationProps> = ({
  text,
  size,
}) => {
  const [dots, setDots] = useState("...");

  useEffect(() => {
    const interval = setInterval(() => {
      setDots((prevDots) => {
        switch (prevDots) {
          case ".":
            return "..";
          case "..":
            return "...";
          case "...":
            return ".";
          default:
            return "...";
        }
      });
    }, 500);

    return () => clearInterval(interval);
  }, []);

  return (
    <span className="loading-animation inline-flex">
      <span className={cn("mx-auto inline-flex", size)}>
        {text === undefined ? "Thinking" : text}
        <span className="dots">{dots}</span>
      </span>
    </span>
  );
};

export const ThreeDotsLoader = () => {
  return (
    <div className="flex my-auto">
      <div className="mx-auto">
        <ThreeDots
          height="30"
          width="50"
          color="#3b82f6"
          ariaLabel="grid-loading"
          radius="12.5"
          wrapperStyle={{}}
          wrapperClass=""
          visible={true}
        />
      </div>
    </div>
  );
};


================================================
FILE: web/src/components/MetadataBadge.tsx
================================================
import { JSX } from "react";

export function MetadataBadge({
  icon,
  value,
  flexNone,
}: {
  icon?: (props: { size?: number; className?: string }) => JSX.Element;
  value: string | JSX.Element;
  flexNone?: boolean;
}) {
  return (
    <div
      className={`
      text-xs 
      text-strong
      flex
      bg-accent-background-hovered 
      rounded-full 
      px-1
      py-0.5 
      w-fit 
      my-auto 
      select-none 
      ${flexNone ? "flex-none" : ""}`}
    >
      {icon &&
        icon({
          size: 12,
          className: flexNone ? "flex-none" : "mr-0.5 my-auto",
        })}
      <p className="max-w-[6rem] text-ellipsis overflow-hidden truncate whitespace-nowrap">
        {value}
      </p>
    </div>
  );
}


================================================
FILE: web/src/components/MultiSelectDropdown.tsx
================================================
import { useState } from "react";
import { Label, ManualErrorMessage } from "@/components/Field";
import CreatableSelect from "react-select/creatable";
import Select from "react-select";
import { ErrorMessage } from "formik";

interface Option {
  value: string;
  label: string;
}

interface MultiSelectDropdownProps {
  name: string;
  label: string;
  options: Option[];
  creatable: boolean;
  initialSelectedOptions?: Option[];
  direction?: "top" | "bottom";
  onChange: (selected: Option[]) => void;
  onCreate?: (created_name: string) => Promise<Option>;
  error?: string;
}

const getReactSelectStyles = () => ({
  control: (base: any) => ({
    ...base,
    backgroundColor: "var(--background-neutral-00)",
    borderColor: "var(--border-03)",
    color: "var(--text-04)",
  }),
  menu: (base: any) => ({
    ...base,
    backgroundColor: "var(--background-neutral-00)",
    border: "1px solid var(--border-03)",
    borderRadius: "4px",
    overflow: "hidden",
  }),
  menuList: (base: any) => ({
    ...base,
    backgroundColor: "var(--background-neutral-00)",
  }),
  option: (base: any, state: any) => ({
    ...base,
    backgroundColor: state.isSelected
      ? "var(--background-150)"
      : state.isFocused
        ? "var(--background-100)"
        : "transparent",
    color: "var(--text-04)",
  }),
  multiValue: (base: any) => ({
    ...base,
    backgroundColor: "var(--background-150)",
  }),
  multiValueLabel: (base: any) => ({
    ...base,
    color: "var(--text-04)",
  }),
  multiValueRemove: (base: any) => ({
    ...base,
    color: "var(--text-04)",
    ":hover": {
      backgroundColor: "var(--background-200)",
      color: "var(--text-04)",
    },
  }),
  input: (base: any) => ({
    ...base,
    color: "var(--text-04)",
  }),
  placeholder: (base: any) => ({
    ...base,
    color: "var(--text-02)",
  }),
  singleValue: (base: any) => ({
    ...base,
    color: "var(--text-04)",
  }),
});

const MultiSelectDropdown = ({
  name,
  label,
  options,
  creatable,
  onChange,
  onCreate,
  error,
  direction = "bottom",
  initialSelectedOptions = [],
}: MultiSelectDropdownProps) => {
  const [selectedOptions, setSelectedOptions] = useState<Option[]>(
    initialSelectedOptions
  );
  const [allOptions, setAllOptions] = useState<Option[]>(options);
  const [inputValue, setInputValue] = useState("");

  const handleInputChange = (input: string) => {
    setInputValue(input);
  };

  const handleChange = (selected: any) => {
    setSelectedOptions(selected || []);
    onChange(selected || []);
  };

  const handleCreateOption = async (inputValue: string) => {
    if (creatable) {
      if (!onCreate) {
        console.error("onCreate is required for creatable");
        return;
      }
      try {
        const newOption = await onCreate(inputValue);
        if (newOption) {
          setAllOptions([...options, newOption]);
          setSelectedOptions([...selectedOptions, newOption]);
          onChange([...selectedOptions, newOption]);
        }
      } catch (error) {
        console.error("Error creating option:", error);
      }
    } else {
      return;
    }
  };

  return (
    <div className="flex flex-col text-white space-y-4 mb-4">
      <Label>{label}</Label>
      {creatable ? (
        <CreatableSelect
          isMulti
          options={allOptions}
          value={selectedOptions}
          onChange={handleChange}
          onCreateOption={handleCreateOption}
          onInputChange={handleInputChange}
          inputValue={inputValue}
          menuPlacement={direction}
          styles={getReactSelectStyles()}
        />
      ) : (
        <Select
          isMulti
          options={allOptions}
          value={selectedOptions}
          onChange={handleChange}
          onInputChange={handleInputChange}
          inputValue={inputValue}
          menuPlacement={direction}
          styles={getReactSelectStyles()}
        />
      )}
      {error ? (
        <ManualErrorMessage>{error}</ManualErrorMessage>
      ) : (
        <ErrorMessage
          name={name}
          component="div"
          className="text-red-500 text-sm mt-1"
        />
      )}
    </div>
  );
};

export default MultiSelectDropdown;


================================================
FILE: web/src/components/NonSelectableConnectors.tsx
================================================
import { ConnectorStatus } from "@/lib/types";
import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle";
import { Content } from "@opal/layouts";
import Text from "@/refresh-components/texts/Text";
import { SvgLock } from "@opal/icons";
interface NonSelectableConnectorsProps {
  connectors: ConnectorStatus<any, any>[];
  title: string;
  description: string;
}

export const NonSelectableConnectors = ({
  connectors,
  title,
  description,
}: NonSelectableConnectorsProps) => {
  if (connectors.length === 0) {
    return null;
  }

  return (
    <div className="mt-6 mb-4">
      <Content
        title={title}
        description={description}
        sizePreset="main-content"
        variant="section"
      />

      <div className="p-3 border border-dashed border-border-02 rounded-12 bg-background-neutral-01">
        <div className="mb-2 flex items-center gap-1.5">
          <SvgLock className="h-3.5 w-3.5 stroke-text-03" />
          <Text as="p" figureSmallLabel text04 className="!mb-0">
            Unavailable connectors:
          </Text>
        </div>
        <div className="flex flex-wrap gap-1.5">
          {connectors.map((connector) => (
            <div
              key={`${connector.connector.id}-${connector.credential.id}`}
              className="flex items-center px-2 py-1 cursor-not-allowed opacity-80 bg-background-neutral-00 border border-border-02 rounded-12 text-xs"
            >
              <div className="flex items-center max-w-[200px] text-xs">
                <ConnectorTitle
                  connector={connector.connector}
                  ccPairId={connector.cc_pair_id}
                  ccPairName={connector.name}
                  isLink={false}
                  showMetadata={false}
                />
              </div>
            </div>
          ))}
        </div>
      </div>
    </div>
  );
};


================================================
FILE: web/src/components/OnyxInitializingLoader.tsx
================================================
"use client";

import { useContext } from "react";
import Logo from "@/refresh-components/Logo";
import { SettingsContext } from "@/providers/SettingsProvider";

export default function OnyxInitializingLoader() {
  const settings = useContext(SettingsContext);

  return (
    <div className="mx-auto my-auto animate-pulse">
      <Logo folded size={96} className="mx-auto mb-3" />
      <p className="text-lg text-text font-semibold">
        Initializing {settings?.enterpriseSettings?.application_name ?? "Onyx"}
      </p>
    </div>
  );
}


================================================
FILE: web/src/components/PageSelector.tsx
================================================
import React from "react";

const PAGINATION_OPTIONS_ON_EACH_SIDE = 2;

const getPaginationOptions = (
  currentPage: number,
  pageCount: number
): number[] => {
  const paginationOptions = [currentPage];
  // if (currentPage !== 1) {
  //   paginationOptions.push(currentPage)
  // }

  let offset = 1;

  // Add one because currentPage is included
  const maxPaginationOptions = PAGINATION_OPTIONS_ON_EACH_SIDE * 2 + 1;
  while (paginationOptions.length < maxPaginationOptions) {
    let added = false;
    if (currentPage + offset <= pageCount) {
      paginationOptions.push(currentPage + offset);
      added = true;
    }
    if (currentPage - offset >= 1) {
      paginationOptions.unshift(currentPage - offset);
      added = true;
    }
    if (!added) {
      break;
    }
    offset++;
  }

  return paginationOptions;
};

const scrollUp = () => {
  setTimeout(() => window.scrollTo({ top: 0 }), 50);
};

type PageLinkProps = {
  linkText: string | number;
  pageChangeHandler?: () => void;
  active?: boolean;
  unclickable?: boolean;
};

const PageLink = ({
  linkText,
  pageChangeHandler,
  active,
  unclickable,
}: PageLinkProps) => (
  <div
    className={`
    select-none
    inline-block
    text-sm
    border
    px-3
    py-1
    leading-5
    -ml-px
    border-border
    ${unclickable ? "text-text-200" : ""}
    ${!unclickable ? "hover:bg-accent-background-hovered" : ""}
    ${!unclickable ? "cursor-pointer" : ""}
    first:ml-0
    first:rounded-l-md
    last:rounded-r-md
    ${active ? "bg-background-200" : ""}
  `}
    onClick={() => {
      if (pageChangeHandler) {
        pageChangeHandler();
      }
    }}
  >
    {linkText}
  </div>
);

export interface PageSelectorProps {
  currentPage: number;
  totalPages: number;
  onPageChange: (newPage: number) => void;
  shouldScroll?: boolean;
}

export const PageSelector = ({
  currentPage,
  totalPages,
  onPageChange,
  shouldScroll = false,
}: PageSelectorProps) => {
  const paginationOptions = getPaginationOptions(currentPage, totalPages);
  const modifiedScrollUp = () => {
    if (shouldScroll) {
      scrollUp();
    }
  };

  return (
    <div style={{ display: "inline-block" }}>
      <PageLink
        linkText="‹"
        unclickable={currentPage === 1}
        pageChangeHandler={() => {
          onPageChange(Math.max(currentPage - 1, 1));
          modifiedScrollUp();
        }}
      />
      {!paginationOptions.includes(1) && (
        <>
          <PageLink
            linkText="1"
            active={currentPage === 1}
            pageChangeHandler={() => {
              onPageChange(1);
              modifiedScrollUp();
            }}
          />
          <PageLink linkText="..." unclickable={true} />
        </>
      )}
      {(!paginationOptions.includes(1)
        ? paginationOptions.slice(2)
        : paginationOptions
      ).map((page) => {
        return (
          <PageLink
            key={page}
            active={page === currentPage}
            linkText={page}
            pageChangeHandler={() => {
              onPageChange(page);
              modifiedScrollUp();
            }}
          />
        );
      })}
      <PageLink
        linkText="›"
        unclickable={currentPage === totalPages}
        pageChangeHandler={() => {
          onPageChange(Math.min(currentPage + 1, totalPages));
          modifiedScrollUp();
        }}
      />
    </div>
  );
};


================================================
FILE: web/src/components/RichTextSubtext.tsx
================================================
import React from "react";

interface RichTextSubtextProps {
  text: string;
  className?: string;
}

/**
 * Component that renders text with clickable links.
 * Detects URLs in the text and converts them to clickable links.
 * Also supports markdown-style links like [text](url).
 * NOTE: we should be careful not to use this component in a way that displays text from external sources
 * because it could be used to create links to malicious sites. Right now it's just used to make links
 * to our docs in connector setup pages
 */
export function RichTextSubtext({
  text,
  className = "",
}: RichTextSubtextProps) {
  // Function to parse text and create React elements
  const parseText = (input: string): React.ReactNode[] => {
    const elements: React.ReactNode[] = [];

    // Regex to match markdown links [text](url) and plain URLs
    const combinedRegex = /(\[([^\]]+)\]\(([^)]+)\))|(https?:\/\/[^\s]+)/g;

    let lastIndex = 0;
    let match;
    let key = 0;

    while ((match = combinedRegex.exec(input)) !== null) {
      // Add text before the match
      if (match.index > lastIndex) {
        elements.push(
          <span key={`text-${key++}`}>
            {input.slice(lastIndex, match.index)}
          </span>
        );
      }

      if (match[1]) {
        // Markdown-style link [text](url)
        const linkText = match[2];
        const url = match[3];
        elements.push(
          <a
            key={`link-${key++}`}
            href={url}
            target="_blank"
            rel="noopener noreferrer"
            className="text-link hover:text-link-hover underline"
            onClick={(e) => e.stopPropagation()}
          >
            {linkText}
          </a>
        );
      } else if (match[4]) {
        // Plain URL
        const url = match[4];
        elements.push(
          <a
            key={`link-${key++}`}
            href={url}
            target="_blank"
            rel="noopener noreferrer"
            className="text-link hover:text-link-hover underline"
            onClick={(e) => e.stopPropagation()}
          >
            {url}
          </a>
        );
      }

      lastIndex = match.index + match[0].length;
    }

    // Add remaining text after the last match
    if (lastIndex < input.length) {
      elements.push(
        <span key={`text-${key++}`}>{input.slice(lastIndex)}</span>
      );
    }

    return elements;
  };

  return <div className={className}>{parseText(text)}</div>;
}


================================================
FILE: web/src/components/SSRAutoRefresh.tsx
================================================
"use client";

import { useRouter } from "next/navigation";
import { useEffect } from "react";

// NOTE: this is causing crashes due to `ECONNRESET` and `UND_ERR_SOCKET`
// during the server-side fetch. Should not be used until this is resolved.
// export function SSRAutoRefresh({ refreshFreq = 5 }: { refreshFreq?: number }) {
//   // Helper which automatically refreshes a SSR page X seconds
//   const router = useRouter();

//   useEffect(() => {
//     const interval = setInterval(() => {
//       router.refresh();
//     }, refreshFreq * 1000);

//     return () => clearInterval(interval);
//   }, []);

//   return <></>;
// }

export function InstantSSRAutoRefresh() {
  const router = useRouter();

  useEffect(() => {
    router.refresh();
  }, [router]);

  return <></>;
}


================================================
FILE: web/src/components/SearchResultIcon.tsx
================================================
"use client";
import { useState, useEffect } from "react";
import faviconFetch from "favicon-fetch";
import { SourceIcon } from "./SourceIcon";
import { ValidSources } from "@/lib/types";
import { OnyxIcon } from "./icons/icons";

const CACHE_DURATION = 24 * 60 * 60 * 1000;

export async function getFaviconUrl(url: string): Promise<string | null> {
  const getCachedFavicon = () => {
    const cachedData = localStorage.getItem(`favicon_${url}`);
    if (cachedData) {
      const { favicon, timestamp } = JSON.parse(cachedData);
      if (Date.now() - timestamp < CACHE_DURATION) {
        return favicon;
      }
    }
    return null;
  };

  const cachedFavicon = getCachedFavicon();
  if (cachedFavicon) {
    return cachedFavicon;
  }

  const newFaviconUrl = await faviconFetch({ uri: url });
  if (newFaviconUrl) {
    localStorage.setItem(
      `favicon_${url}`,
      JSON.stringify({ favicon: newFaviconUrl, timestamp: Date.now() })
    );
    return newFaviconUrl;
  }

  return null;
}

export function SearchResultIcon({ url }: { url: string }) {
  const [faviconUrl, setFaviconUrl] = useState<string | null>(null);

  useEffect(() => {
    getFaviconUrl(url).then((favicon) => {
      if (favicon) {
        setFaviconUrl(favicon);
      }
    });
  }, [url]);

  if (!faviconUrl) {
    return <SourceIcon sourceType={ValidSources.Web} iconSize={18} />;
  }
  if (url.includes("onyx.app")) {
    return <OnyxIcon size={18} className="dark:text-[#fff] text-[#000]" />;
  }

  return (
    <div className="rounded-full w-[18px] h-[18px] overflow-hidden bg-background-200">
      <img
        height={18}
        width={18}
        className="rounded-full w-full h-full object-cover"
        src={faviconUrl}
        alt="favicon"
        onError={(e) => {
          e.currentTarget.onerror = null;
        }}
      />
    </div>
  );
}


================================================
FILE: web/src/components/SourceIcon.tsx
================================================
"use client";

import { getSourceMetadata } from "@/lib/sources";
import { ValidSources } from "@/lib/types";

export interface SourceIconProps {
  sourceType: ValidSources;
  iconSize: number;
}

export function SourceIcon({ sourceType, iconSize }: SourceIconProps) {
  return getSourceMetadata(sourceType).icon({
    size: iconSize,
    className: "text-text-04",
  });
}


================================================
FILE: web/src/components/SourceTile.tsx
================================================
import { SourceIcon } from "@/components/SourceIcon";
import Link from "next/link";
import type { Route } from "next";
import { SourceMetadata } from "@/lib/search/interfaces";
import React from "react";
import Text from "@/refresh-components/texts/Text";

interface SourceTileProps {
  sourceMetadata: SourceMetadata;
  preSelect?: boolean;
  navigationUrl: string;
  hasExistingSlackCredentials: boolean;
}

export default function SourceTile({
  sourceMetadata,
  preSelect,
  navigationUrl,
}: SourceTileProps) {
  return (
    <Link
      className={`flex
              flex-col
              items-center
              justify-center
              p-4
              rounded-lg
              w-40
              cursor-pointer
              shadow-md
              bg-background-tint-00
              hover:bg-background-tint-02
              relative
              ${preSelect ? "subtle-pulse" : ""}
            `}
      href={navigationUrl as Route}
    >
      <SourceIcon sourceType={sourceMetadata.internalName} iconSize={24} />
      <Text as="p" className="pt-2">
        {sourceMetadata.displayName}
      </Text>
    </Link>
  );
}


================================================
FILE: web/src/components/Spinner.tsx
================================================
import "./spinner.css";

export const Spinner = () => {
  return (
    <div className="fixed top-0 left-0 z-50 w-screen h-screen bg-[#000] bg-opacity-50 flex items-center justify-center">
      <div className="loader ease-linear rounded-full border-8 border-t-8 border-background-200 h-8 w-8"></div>
    </div>
  );
};


================================================
FILE: web/src/components/Status.tsx
================================================
"use client";

import { ValidStatuses } from "@/lib/types";
import { Badge } from "@/components/ui/badge";
import { timeAgo } from "@/lib/time";
import {
  FiAlertTriangle,
  FiCheckCircle,
  FiClock,
  FiMinus,
  FiPauseCircle,
} from "react-icons/fi";
import {
  ConnectorCredentialPairStatus,
  PermissionSyncStatusEnum,
} from "@/app/admin/connector/[ccPairId]/types";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";

export function IndexAttemptStatus({
  status,
  errorMsg,
}: {
  status: ValidStatuses | null;
  errorMsg?: string | null;
}) {
  let badge;

  if (status === "failed") {
    const icon = (
      <Badge variant="destructive" icon={FiAlertTriangle}>
        Failed
      </Badge>
    );
    if (errorMsg) {
      badge = (
        <SimpleTooltip tooltip={errorMsg}>
          <div className="cursor-pointer">{icon}</div>
        </SimpleTooltip>
      );
    } else {
      badge = icon;
    }
  } else if (status === "completed_with_errors") {
    badge = (
      <Badge variant="secondary" icon={FiAlertTriangle}>
        Completed with errors
      </Badge>
    );
  } else if (status === "success") {
    badge = (
      <Badge variant="success" icon={FiCheckCircle}>
        Succeeded
      </Badge>
    );
  } else if (status === "in_progress") {
    badge = (
      <Badge variant="in_progress" icon={FiClock}>
        In Progress
      </Badge>
    );
  } else if (status === "not_started") {
    badge = (
      <Badge variant="not_started" icon={FiClock}>
        Scheduled
      </Badge>
    );
  } else if (status === "canceled") {
    badge = (
      <Badge variant="canceled" icon={FiClock}>
        Canceled
      </Badge>
    );
  } else if (status === "invalid") {
    badge = (
      <Badge variant="invalid" icon={FiAlertTriangle}>
        Invalid
      </Badge>
    );
  } else {
    badge = (
      <Badge variant="outline" icon={FiMinus}>
        None
      </Badge>
    );
  }

  return <div>{badge}</div>;
}

export function PermissionSyncStatus({
  status,
  errorMsg,
}: {
  status: PermissionSyncStatusEnum | null;
  errorMsg?: string | null;
}) {
  let badge;

  if (status === PermissionSyncStatusEnum.FAILED) {
    const icon = (
      <Badge variant="destructive" icon={FiAlertTriangle}>
        Failed
      </Badge>
    );
    if (errorMsg) {
      badge = (
        <SimpleTooltip tooltip={errorMsg} side="bottom">
          <div className="cursor-pointer">{icon}</div>
        </SimpleTooltip>
      );
    } else {
      badge = icon;
    }
  } else if (status === PermissionSyncStatusEnum.COMPLETED_WITH_ERRORS) {
    badge = (
      <Badge variant="secondary" icon={FiAlertTriangle}>
        Completed with errors
      </Badge>
    );
  } else if (status === PermissionSyncStatusEnum.SUCCESS) {
    badge = (
      <Badge variant="success" icon={FiCheckCircle}>
        Succeeded
      </Badge>
    );
  } else if (status === PermissionSyncStatusEnum.IN_PROGRESS) {
    badge = (
      <Badge variant="in_progress" icon={FiClock}>
        In Progress
      </Badge>
    );
  } else if (status === PermissionSyncStatusEnum.NOT_STARTED) {
    badge = (
      <Badge variant="not_started" icon={FiClock}>
        Scheduled
      </Badge>
    );
  } else {
    badge = (
      <Badge variant="secondary" icon={FiClock}>
        Not Started
      </Badge>
    );
  }

  return <div>{badge}</div>;
}

export function CCPairStatus({
  ccPairStatus,
  inRepeatedErrorState,
  lastIndexAttemptStatus,
  size = "md",
}: {
  ccPairStatus: ConnectorCredentialPairStatus;
  inRepeatedErrorState: boolean;
  lastIndexAttemptStatus: ValidStatuses | undefined | null;
  size?: "xs" | "sm" | "md" | "lg";
}) {
  let badge;

  if (ccPairStatus == ConnectorCredentialPairStatus.DELETING) {
    badge = (
      <Badge variant="destructive" icon={FiAlertTriangle}>
        Deleting
      </Badge>
    );
  } else if (ccPairStatus == ConnectorCredentialPairStatus.PAUSED) {
    badge = (
      <Badge variant="paused" icon={FiPauseCircle}>
        Paused
      </Badge>
    );
  } else if (inRepeatedErrorState) {
    badge = (
      <Badge variant="destructive" icon={FiAlertTriangle}>
        Error
      </Badge>
    );
  } else if (ccPairStatus == ConnectorCredentialPairStatus.SCHEDULED) {
    badge = (
      <Badge variant="not_started" icon={FiClock}>
        Scheduled
      </Badge>
    );
  } else if (ccPairStatus == ConnectorCredentialPairStatus.INITIAL_INDEXING) {
    badge = (
      <Badge variant="in_progress" icon={FiClock}>
        Initial Indexing
      </Badge>
    );
  } else if (ccPairStatus == ConnectorCredentialPairStatus.INVALID) {
    badge = (
      <Badge
        tooltip="Connector is in an invalid state. Please update the credentials or create a new connector."
        circle
        variant="invalid"
      >
        Invalid
      </Badge>
    );
  } else {
    if (lastIndexAttemptStatus && lastIndexAttemptStatus === "in_progress") {
      badge = (
        <Badge variant="in_progress" icon={FiClock}>
          Indexing
        </Badge>
      );
    } else if (
      lastIndexAttemptStatus &&
      lastIndexAttemptStatus === "not_started"
    ) {
      badge = (
        <Badge variant="not_started" icon={FiClock}>
          Scheduled
        </Badge>
      );
    } else if (
      lastIndexAttemptStatus &&
      lastIndexAttemptStatus === "canceled"
    ) {
      badge = (
        <Badge variant="canceled" icon={FiClock}>
          Canceled
        </Badge>
      );
    } else {
      badge = (
        <Badge variant="success" icon={FiCheckCircle}>
          Indexed
        </Badge>
      );
    }
  }

  return <div>{badge}</div>;
}


================================================
FILE: web/src/components/WebResultIcon.tsx
================================================
"use client";

import { ValidSources } from "@/lib/types";
import { SourceIcon } from "./SourceIcon";
import { useState } from "react";
import { OnyxIcon } from "./icons/icons";

export function WebResultIcon({
  url,
  size = 18,
}: {
  url: string;
  size?: number;
}) {
  const [error, setError] = useState(false);
  let hostname;
  try {
    hostname = new URL(url).hostname;
  } catch (e) {
    hostname = "onyx.app";
  }
  return (
    <>
      {hostname.includes("onyx.app") ? (
        <OnyxIcon size={size} className="dark:text-[#fff] text-[#000]" />
      ) : !error ? (
        <img
          className="my-0 rounded-full py-0"
          src={`https://t3.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://${hostname}&size=128`}
          alt="favicon"
          height={size}
          onError={() => setError(true)}
          width={size}
          style={{
            height: `${size}px`,
            width: `${size}px`,
            background: "transparent",
          }}
        />
      ) : (
        <SourceIcon sourceType={ValidSources.Web} iconSize={size} />
      )}
    </>
  );
}


================================================
FILE: web/src/components/admin/CardSection.tsx
================================================
import { cn } from "@/lib/utils";

export interface CardSectionProps {
  className?: string;
  children?: React.ReactNode;
}

// Used for all admin page sections
export default function CardSection({ children, className }: CardSectionProps) {
  return (
    <div
      className={cn(
        "p-6 bg-background-neutral-00 rounded-16 border",
        className
      )}
    >
      {children}
    </div>
  );
}


================================================
FILE: web/src/components/admin/ClientLayout.tsx
================================================
"use client";

import AdminSidebar from "@/sections/sidebar/AdminSidebar";
import { usePathname } from "next/navigation";
import { useSettingsContext } from "@/providers/SettingsProvider";
import { ApplicationStatus } from "@/interfaces/settings";
import { Button } from "@opal/components";
import { cn } from "@/lib/utils";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

export interface ClientLayoutProps {
  children: React.ReactNode;
  enableCloud: boolean;
}

// TODO (@raunakab): Migrate ALL admin pages to use SettingsLayouts from
// `@/layouts/settings-layouts`. Once every page manages its own layout,
// the `py-10 px-4 md:px-12` padding below can be removed entirely and
// this prefix list can be deleted.
const SETTINGS_LAYOUT_PREFIXES = [
  ADMIN_ROUTES.CHAT_PREFERENCES.path,
  ADMIN_ROUTES.IMAGE_GENERATION.path,
  ADMIN_ROUTES.WEB_SEARCH.path,
  ADMIN_ROUTES.MCP_ACTIONS.path,
  ADMIN_ROUTES.OPENAPI_ACTIONS.path,
  ADMIN_ROUTES.BILLING.path,
  ADMIN_ROUTES.INDEX_MIGRATION.path,
  ADMIN_ROUTES.DISCORD_BOTS.path,
  ADMIN_ROUTES.THEME.path,
  ADMIN_ROUTES.LLM_MODELS.path,
  ADMIN_ROUTES.AGENTS.path,
  ADMIN_ROUTES.USERS.path,
  ADMIN_ROUTES.TOKEN_RATE_LIMITS.path,
  ADMIN_ROUTES.INDEX_SETTINGS.path,
  ADMIN_ROUTES.DOCUMENT_PROCESSING.path,
  ADMIN_ROUTES.CODE_INTERPRETER.path,
  ADMIN_ROUTES.API_KEYS.path,
  ADMIN_ROUTES.ADD_CONNECTOR.path,
  ADMIN_ROUTES.INDEXING_STATUS.path,
  ADMIN_ROUTES.DOCUMENTS.path,
  ADMIN_ROUTES.DEBUG.path,
  ADMIN_ROUTES.KNOWLEDGE_GRAPH.path,
  ADMIN_ROUTES.SLACK_BOTS.path,
  ADMIN_ROUTES.STANDARD_ANSWERS.path,
  ADMIN_ROUTES.GROUPS.path,
  ADMIN_ROUTES.PERFORMANCE.path,
  ADMIN_ROUTES.SCIM.path,
  ADMIN_ROUTES.VOICE.path,
];

export function ClientLayout({ children, enableCloud }: ClientLayoutProps) {
  const pathname = usePathname();
  const settings = useSettingsContext();

  // Certain admin panels have their own custom sidebar.
  // For those pages, we skip rendering the default `AdminSidebar` and let those individual pages render their own.
  const hasCustomSidebar =
    pathname.startsWith("/admin/connectors") ||
    pathname.startsWith("/admin/embeddings");

  // Pages using SettingsLayouts handle their own padding/centering.
  const hasOwnLayout = SETTINGS_LAYOUT_PREFIXES.some((prefix) =>
    pathname.startsWith(prefix)
  );

  return (
    <div className="h-screen w-screen flex overflow-hidden">
      {settings.settings.application_status ===
        ApplicationStatus.PAYMENT_REMINDER && (
        <div className="fixed top-2 left-1/2 transform -translate-x-1/2 bg-amber-400 dark:bg-amber-500 text-gray-900 dark:text-gray-100 p-4 rounded-lg shadow-lg z-50 max-w-md text-center">
          <strong className="font-bold">Warning:</strong> Your trial ends in
          less than 5 days and no payment method has been added.
          <div className="mt-2">
            <Button width="full" href="/admin/billing">
              Update Billing Information
            </Button>
          </div>
        </div>
      )}

      {hasCustomSidebar ? (
        <div className="flex-1 min-w-0 min-h-0 overflow-y-auto">{children}</div>
      ) : (
        <>
          <AdminSidebar enableCloudSS={enableCloud} />
          <div
            data-main-container
            className={cn(
              "flex flex-1 flex-col min-w-0 min-h-0 overflow-y-auto",
              !hasOwnLayout && "py-10 px-4 md:px-12"
            )}
          >
            {children}
          </div>
        </>
      )}
    </div>
  );
}


================================================
FILE: web/src/components/admin/Layout.tsx
================================================
import { redirect } from "next/navigation";
import type { Route } from "next";
import { requireAdminAuth } from "@/lib/auth/requireAuth";
import { ClientLayout } from "./ClientLayout";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { AnnouncementBanner } from "../header/AnnouncementBanner";

export interface LayoutProps {
  children: React.ReactNode;
}

export default async function Layout({ children }: LayoutProps) {
  // Check authentication and admin role - data fetching is done client-side via SWR hooks
  const authResult = await requireAdminAuth();

  // If auth check returned a redirect, redirect immediately
  if (authResult.redirect) {
    return redirect(authResult.redirect as Route);
  }

  return (
    <ClientLayout enableCloud={NEXT_PUBLIC_CLOUD_ENABLED}>
      <AnnouncementBanner />
      {children}
    </ClientLayout>
  );
}


================================================
FILE: web/src/components/admin/Title.tsx
================================================
"use client";

import { JSX } from "react";
import Separator from "@/refresh-components/Separator";
import type { IconProps } from "@opal/types";
import Text from "@/refresh-components/texts/Text";

export interface AdminPageTitleProps {
  icon: React.FunctionComponent<IconProps> | React.ReactNode;
  title: string | JSX.Element;
  farRightElement?: JSX.Element;
  includeDivider?: boolean;
}

export function AdminPageTitle({
  icon: Icon,
  title,
  farRightElement,
  includeDivider = true,
}: AdminPageTitleProps) {
  return (
    <div className="w-full">
      <div className="w-full flex flex-row justify-between">
        <div className="flex flex-row gap-2">
          {typeof Icon === "function" ? (
            <Icon className="stroke-text-04 h-8 w-8" />
          ) : (
            Icon
          )}
          <Text headingH2 aria-label="admin-page-title">
            {title}
          </Text>
        </div>
        {farRightElement}
      </div>
      {includeDivider ? <Separator /> : <div className="mb-6" />}
    </div>
  );
}


================================================
FILE: web/src/components/admin/connectors/AccessTypeForm.tsx
================================================
import { DefaultDropdown } from "@/components/Dropdown";
import {
  AccessType,
  ValidAutoSyncSource,
  ConfigurableSources,
  validAutoSyncSources,
} from "@/lib/types";
import { useField } from "formik";
import { AutoSyncOptions } from "./AutoSyncOptions";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { useEffect, useMemo } from "react";
import { Credential } from "@/lib/connectors/credentials";
import { credentialTemplates } from "@/lib/connectors/credentials";

function isValidAutoSyncSource(
  value: ConfigurableSources
): value is ValidAutoSyncSource {
  return validAutoSyncSources.includes(value as ValidAutoSyncSource);
}

export function AccessTypeForm({
  connector,
  currentCredential,
}: {
  connector: ConfigurableSources;
  currentCredential?: Credential<any> | null;
}) {
  const [access_type, meta, access_type_helpers] =
    useField<AccessType>("access_type");

  const isPaidEnterpriseEnabled = usePaidEnterpriseFeaturesEnabled();
  const isAutoSyncSupported = isValidAutoSyncSource(connector);

  const selectedAuthMethod = currentCredential?.credential_json?.[
    "authentication_method"
  ] as string | undefined;

  // If the selected auth method is one that disables sync, return true
  const isSyncDisabledByAuth = useMemo(() => {
    const template = (credentialTemplates as any)[connector];
    const authMethods = template?.authMethods as
      | { value: string; disablePermSync?: boolean }[]
      | undefined; // auth methods are returned as an array of objects with a value and disablePermSync property
    if (!authMethods || !selectedAuthMethod) return false;
    const method = authMethods.find((m) => m.value === selectedAuthMethod);
    return method?.disablePermSync === true;
  }, [connector, selectedAuthMethod]);

  useEffect(
    () => {
      // Only set default value if access_type.value is not already set
      if (!access_type.value) {
        if (!isPaidEnterpriseEnabled) {
          access_type_helpers.setValue("public");
        } else if (isAutoSyncSupported) {
          access_type_helpers.setValue("sync");
        } else {
          access_type_helpers.setValue("private");
        }
      }
    },
    [
      // Only run this effect once when the component mounts
      // eslint-disable-next-line react-hooks/exhaustive-deps
    ]
  );

  const options = [
    {
      name: "Private",
      value: "private",
      description:
        "Only users who have explicitly been given access to this connector (through the User Groups page) can access the documents pulled in by this connector",
      disabled: false,
      disabledReason: "",
    },
    {
      name: "Public",
      value: "public",
      description:
        "Everyone with an account on Onyx can access the documents pulled in by this connector",
      disabled: false,
      disabledReason: "",
    },
  ];

  if (isAutoSyncSupported && isPaidEnterpriseEnabled) {
    options.push({
      name: "Auto Sync Permissions",
      value: "sync",
      description:
        "We will automatically sync permissions from the source. A document will be searchable in Onyx if and only if the user performing the search has permission to access the document in the source.",
      disabled: isSyncDisabledByAuth,
      disabledReason:
        "Current credential auth method doesn't support Auto Sync Permissions. Please change the credential auth method to a supported one.",
    });
  }

  return (
    <>
      {isPaidEnterpriseEnabled && (
        <>
          <div>
            <label className="text-text-950 font-medium">Document Access</label>
            <p className="text-sm text-text-500">
              Control who has access to the documents indexed by this connector.
            </p>
          </div>
          <DefaultDropdown
            options={options}
            selected={access_type.value}
            onSelect={(selected) => {
              access_type_helpers.setValue(selected as AccessType);
            }}
            includeDefault={false}
          />
          {access_type.value === "sync" && isAutoSyncSupported && (
            <AutoSyncOptions connectorType={connector as ValidAutoSyncSource} />
          )}
        </>
      )}
    </>
  );
}


================================================
FILE: web/src/components/admin/connectors/AccessTypeGroupSelector.tsx
================================================
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import React, { useState, useEffect } from "react";
import { FieldArray, ArrayHelpers, ErrorMessage, useField } from "formik";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import Separator from "@/refresh-components/Separator";
import { UserGroup, UserRole } from "@/lib/types";
import { useUserGroups } from "@/lib/hooks";
import {
  AccessType,
  ValidAutoSyncSource,
  ConfigurableSources,
  validAutoSyncSources,
} from "@/lib/types";
import { useUser } from "@/providers/UserProvider";
import { SvgUsers } from "@opal/icons";
function isValidAutoSyncSource(
  value: ConfigurableSources
): value is ValidAutoSyncSource {
  return validAutoSyncSources.includes(value as ValidAutoSyncSource);
}

// This should be included for all forms that require groups / public access
// to be set, and access to this / permissioning should be handled within this component itself.

export type AccessTypeGroupSelectorFormType = {
  access_type: AccessType;
  groups: number[];
};

export function AccessTypeGroupSelector({
  connector,
}: {
  connector: ConfigurableSources;
}) {
  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();
  const { isAdmin, user, isCurator } = useUser();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const [shouldHideContent, setShouldHideContent] = useState(false);
  const isAutoSyncSupported = isValidAutoSyncSource(connector);

  const [access_type, meta, access_type_helpers] =
    useField<AccessType>("access_type");
  const [groups, groups_meta, groups_helpers] = useField<number[]>("groups");

  useEffect(() => {
    if (user && userGroups && isPaidEnterpriseFeaturesEnabled) {
      const isUserAdmin = user.role === UserRole.ADMIN;
      if (!isPaidEnterpriseFeaturesEnabled) {
        access_type_helpers.setValue("public");
        return;
      }

      // Only set default access type if it's not already set, to avoid overriding user selections
      if (!access_type.value && !isUserAdmin && !isAutoSyncSupported) {
        access_type_helpers.setValue("private");
      }

      if (
        access_type.value === "private" &&
        userGroups.length === 1 &&
        userGroups[0] !== undefined &&
        !isUserAdmin
      ) {
        groups_helpers.setValue([userGroups[0].id]);
        setShouldHideContent(true);
      } else if (access_type.value !== "private") {
        // If the access type is public or sync, empty the groups selection
        groups_helpers.setValue([]);
        setShouldHideContent(false);
      } else {
        setShouldHideContent(false);
      }
    }
  }, [
    user,
    userGroups,
    access_type.value,
    access_type_helpers,
    groups_helpers,
    isPaidEnterpriseFeaturesEnabled,
    isAutoSyncSupported,
  ]);

  if (userGroupsIsLoading) {
    return <div>Loading...</div>;
  }
  if (!isPaidEnterpriseFeaturesEnabled) {
    return null;
  }

  if (shouldHideContent) {
    return (
      <>
        {userGroups && userGroups[0] !== undefined && (
          <div className="mb-1 font-medium text-base">
            This Connector will be assigned to group <b>{userGroups[0].name}</b>
            .
          </div>
        )}
      </>
    );
  }

  return (
    <div>
      {(access_type.value === "private" || isCurator) &&
        userGroups &&
        userGroups?.length > 0 && (
          <>
            <Separator />
            <div className="flex flex-col gap-3 pt-4">
              <Text as="p" mainUiAction text05>
                Assign group access for this Connector
              </Text>
              {userGroupsIsLoading ? (
                <div className="animate-pulse bg-background-200 h-8 w-32 rounded" />
              ) : (
                <Text as="p" mainUiMuted text03>
                  {isAdmin
                    ? "This Connector will be visible/accessible by the groups selected below"
                    : "Curators must select one or more groups to give access to this Connector"}
                </Text>
              )}
            </div>
            <FieldArray
              name="groups"
              render={(arrayHelpers: ArrayHelpers) => (
                <div className="flex flex-wrap gap-2 py-4">
                  {userGroupsIsLoading ? (
                    <div className="animate-pulse bg-background-200 h-8 w-32 rounded"></div>
                  ) : (
                    userGroups &&
                    userGroups.map((userGroup: UserGroup) => {
                      const ind = groups.value.indexOf(userGroup.id);
                      let isSelected = ind !== -1;
                      return (
                        <Button
                          variant={isSelected ? "action" : "default"}
                          key={userGroup.id}
                          icon={SvgUsers}
                          onClick={() => {
                            if (isSelected) {
                              arrayHelpers.remove(ind);
                            } else {
                              arrayHelpers.push(userGroup.id);
                            }
                          }}
                        >
                          {userGroup.name}
                        </Button>
                      );
                    })
                  )}
                </div>
              )}
            />
            <ErrorMessage
              name="groups"
              component="div"
              className="text-error text-sm mt-1"
            />
          </>
        )}
    </div>
  );
}


================================================
FILE: web/src/components/admin/connectors/AutoSyncOptions.tsx
================================================
import { TextFormField } from "@/components/Field";
import { ValidAutoSyncSource } from "@/lib/types";
import Separator from "@/refresh-components/Separator";
import { autoSyncConfigBySource } from "@/lib/connectors/AutoSyncOptionFields";

export function AutoSyncOptions({
  connectorType,
}: {
  connectorType: ValidAutoSyncSource;
}) {
  const autoSyncConfig = autoSyncConfigBySource[connectorType];

  if (Object.keys(autoSyncConfig).length === 0) {
    return null;
  }

  return (
    <div>
      <Separator />
      {Object.entries(autoSyncConfig).map(([key, config]) => (
        <div key={key} className="mb-4">
          <TextFormField
            name={`auto_sync_options.${key}`}
            label={config.label}
            subtext={config.subtext}
          />
        </div>
      ))}
    </div>
  );
}


================================================
FILE: web/src/components/admin/connectors/BasicTable.tsx
================================================
import React, { FC, JSX } from "react";

type Column = {
  header: string;
  key: string;
  width?: number | string;
  alignment?: "left" | "right";
};

type TableData = {
  [key: string]: string | number | JSX.Element;
};

interface BasicTableProps {
  columns: Column[];
  data: TableData[];
  onSelect?: (row: TableData) => void;
}

export const BasicTable: FC<BasicTableProps> = ({
  columns,
  data,
  onSelect,
}) => {
  return (
    <div>
      <table className="w-full table-auto">
        <thead>
          <tr className="text-left bg-background-700">
            {columns.map((column, index) => {
              const isRightAligned = column?.alignment === "right";
              return (
                <th
                  key={index}
                  className={
                    (column.width ? `w-${column.width} ` : "") +
                    "px-4 py-2 font-bold" +
                    (index === 0 ? " rounded-tl-sm" : "") +
                    (index === columns.length - 1 ? " rounded-tr-sm" : "")
                  }
                >
                  <div
                    className={isRightAligned ? "flex flex-row-reverse" : ""}
                  >
                    {column.header}
                  </div>
                </th>
              );
            })}
          </tr>
        </thead>
        <tbody>
          {data.map((row, rowIndex) => (
            <tr
              key={rowIndex}
              className={
                "text-sm" +
                (onSelect ? " hover:bg-background-800 cursor-pointer" : "")
              }
              onClick={() => onSelect && onSelect(row)}
            >
              {columns.map((column, colIndex) => {
                const isRightAligned = column?.alignment === "right";
                return (
                  <td
                    key={colIndex}
                    className={
                      (column.width ? `w-${column.width} ` : "") +
                      (isRightAligned ? "flex" : "") +
                      "py-2 px-4 border-b border-background-800"
                    }
                  >
                    <div>{row[column.key]}</div>
                  </td>
                );
              })}
            </tr>
          ))}
        </tbody>
      </table>
    </div>
  );
};


================================================
FILE: web/src/components/admin/connectors/ConnectorDocsLink.tsx
================================================
import { ValidSources } from "@/lib/types";
import { getSourceDocLink } from "@/lib/sources";

export default function ConnectorDocsLink({
  sourceType,
  className,
}: {
  sourceType: ValidSources;
  className?: string;
}) {
  const docsLink = getSourceDocLink(sourceType);

  if (!docsLink) {
    return null;
  }

  const paragraphClass = ["text-sm", className].filter(Boolean).join(" ");

  return (
    <p className={paragraphClass}>
      Check out
      <a
        className="text-blue-600 hover:underline"
        target="_blank"
        rel="noopener"
        href={docsLink}
      >
        {" "}
        our docs{" "}
      </a>
      for more info on configuring this connector.
    </p>
  );
}


================================================
FILE: web/src/components/admin/connectors/ConnectorTitle.tsx
================================================
import {
  ConfluenceConfig,
  Connector,
  GithubConfig,
  GitlabConfig,
  JiraConfig,
  SlackConfig,
  ZulipConfig,
} from "@/lib/connectors/connectors";
import { getSourceMetadata } from "@/lib/sources";

import Link from "next/link";

interface ConnectorTitleProps {
  connector: Connector<any>;
  ccPairId: number;
  ccPairName: string;
  isPublic?: boolean;
  owner?: string;
  isLink?: boolean;
  showMetadata?: boolean;
  className?: string;
}

export const ConnectorTitle = ({
  connector,
  ccPairId,
  ccPairName,
  owner,
  isPublic = true,
  isLink = true,
  showMetadata = true,
  className = "",
}: ConnectorTitleProps) => {
  const sourceMetadata = getSourceMetadata(connector.source);

  let additionalMetadata = new Map<string, string>();
  if (connector.source === "github") {
    const typedConnector = connector as Connector<GithubConfig>;
    additionalMetadata.set(
      "Repo",
      typedConnector.connector_specific_config.repositories
        ? `${typedConnector.connector_specific_config.repo_owner}/${
            typedConnector.connector_specific_config.repositories.includes(",")
              ? "multiple repos"
              : typedConnector.connector_specific_config.repositories
          }`
        : `${typedConnector.connector_specific_config.repo_owner}/*`
    );
  } else if (connector.source === "gitlab") {
    const typedConnector = connector as Connector<GitlabConfig>;
    additionalMetadata.set(
      "Repo",
      `${typedConnector.connector_specific_config.project_owner}/${typedConnector.connector_specific_config.project_name}`
    );
  } else if (connector.source === "confluence") {
    const typedConnector = connector as Connector<ConfluenceConfig>;
    const wikiUrl = typedConnector.connector_specific_config.is_cloud
      ? `${typedConnector.connector_specific_config.wiki_base}/wiki/spaces/${typedConnector.connector_specific_config.space}`
      : `${typedConnector.connector_specific_config.wiki_base}/spaces/${typedConnector.connector_specific_config.space}`;
    additionalMetadata.set("Wiki URL", wikiUrl);
    if (typedConnector.connector_specific_config.page_id) {
      additionalMetadata.set(
        "Page ID",
        typedConnector.connector_specific_config.page_id
      );
    }
  } else if (connector.source === "jira") {
    const typedConnector = connector as Connector<JiraConfig>;
    additionalMetadata.set(
      "Jira Project URL",
      typedConnector.connector_specific_config.jira_project_url
    );
  } else if (connector.source === "slack") {
    const typedConnector = connector as Connector<SlackConfig>;
    if (
      typedConnector.connector_specific_config?.channels &&
      typedConnector.connector_specific_config?.channels.length > 0
    ) {
      additionalMetadata.set(
        "Channels",
        typedConnector.connector_specific_config.channels.join(", ")
      );
    }
    if (typedConnector.connector_specific_config.channel_regex_enabled) {
      additionalMetadata.set("Channel Regex Enabled", "True");
    }
    if (typedConnector.connector_specific_config.include_bot_messages) {
      additionalMetadata.set("Include Bot Messages", "True");
    }
  } else if (connector.source === "zulip") {
    const typedConnector = connector as Connector<ZulipConfig>;
    additionalMetadata.set(
      "Realm",
      typedConnector.connector_specific_config.realm_name
    );
  }

  const mainSectionClassName = `text-blue-500 dark:text-blue-100 flex w-fit ${className}`;
  const mainDisplay = (
    <>
      {sourceMetadata.icon({ size: 16 })}
      <div className="ml-1 my-auto text-xs font-medium truncate">
        {ccPairName || sourceMetadata.displayName}
      </div>
    </>
  );
  return (
    <div className="my-auto max-w-full">
      {isLink ? (
        <Link
          className={mainSectionClassName}
          href={`/admin/connector/${ccPairId}`}
        >
          {mainDisplay}
        </Link>
      ) : (
        <div className={mainSectionClassName}>{mainDisplay}</div>
      )}
      {showMetadata && additionalMetadata.size > 0 && (
        <div className="text-[10px] mt-0.5 text-gray-600 dark:text-gray-400">
          {Array.from(additionalMetadata.entries()).map(([key, value]) => {
            return (
              <div key={key} className="truncate">
                <i>{key}:</i> {value}
              </div>
            );
          })}
        </div>
      )}
    </div>
  );
};


================================================
FILE: web/src/components/admin/connectors/CredentialForm.tsx
================================================
import React, { JSX } from "react";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import { toast } from "@/hooks/useToast";
import { ValidSources } from "@/lib/types";

import {
  createCredential,
  createCredentialWithPrivateKey,
} from "@/lib/credential";
import {
  CredentialBase,
  Credential,
  CredentialWithPrivateKey,
} from "@/lib/connectors/credentials";

const PRIVATE_KEY_FIELD_KEY = "private_key";

export async function submitCredential<T>(
  credential: CredentialBase<T> | CredentialWithPrivateKey<T>
): Promise<{
  credential?: Credential<any>;
  message: string;
  isSuccess: boolean;
}> {
  let isSuccess = false;
  try {
    let response: Response;
    if (PRIVATE_KEY_FIELD_KEY in credential && credential.private_key) {
      response = await createCredentialWithPrivateKey(
        credential as CredentialWithPrivateKey<T>
      );
    } else {
      response = await createCredential(credential as CredentialBase<T>);
    }
    if (response.ok) {
      const parsed_response = await response.json();
      const credential = parsed_response.credential;
      isSuccess = true;
      return { credential, message: "Success!", isSuccess: true };
    } else {
      const errorData = await response.json();
      return { message: `Error: ${errorData.detail}`, isSuccess: false };
    }
  } catch (error) {
    return { message: `Error: ${error}`, isSuccess: false };
  }
}

interface Props<YupObjectType extends Yup.AnyObject> {
  formBody: JSX.Element | null;
  validationSchema: Yup.ObjectSchema<YupObjectType>;
  initialValues: YupObjectType;
  onSubmit: (isSuccess: boolean) => void;
  source: ValidSources;
}

export function CredentialForm<T extends Yup.AnyObject>({
  formBody,
  validationSchema,
  initialValues,
  source,
  onSubmit,
}: Props<T>): JSX.Element {
  return (
    <>
      <Formik
        initialValues={initialValues}
        validationSchema={validationSchema}
        onSubmit={(values, formikHelpers) => {
          formikHelpers.setSubmitting(true);
          submitCredential<T>({
            credential_json: values,
            admin_public: true,
            curator_public: false,
            groups: [],
            source: source,
          }).then(({ message, isSuccess }) => {
            if (isSuccess) {
              toast.success(message);
            } else {
              toast.error(message);
            }
            formikHelpers.setSubmitting(false);
            onSubmit(isSuccess);
          });
        }}
      >
        {({ isSubmitting }) => (
          <Form>
            {formBody}
            <div className="flex">
              <button
                type="submit"
                color="green"
                disabled={isSubmitting}
                className="mx-auto w-64 inline-flex items-center 
                justify-center whitespace-nowrap rounded-md text-sm 
                font-medium transition-colors  bg-background-200 text-primary-foreground
                focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring 
                disabled:pointer-events-none disabled:opacity-50 
                shadow hover:bg-primary/90 h-9 px-4 py-2"
              >
                Update
              </button>
            </div>
          </Form>
        )}
      </Formik>
    </>
  );
}


================================================
FILE: web/src/components/admin/connectors/FileUpload.tsx
================================================
import { useFormikContext } from "formik";
import { FC, useState } from "react";
import React from "react";
import Dropzone from "react-dropzone";

interface FileUploadProps {
  selectedFiles: File[];
  setSelectedFiles: (files: File[]) => void;
  message?: string;
  name?: string;
  multiple?: boolean;
  accept?: string;
}

export const FileUpload: FC<FileUploadProps> = ({
  name,
  selectedFiles,
  setSelectedFiles,
  message,
  multiple = true,
  accept,
}) => {
  const [dragActive, setDragActive] = useState(false);
  const { setFieldValue } = useFormikContext();

  return (
    <div>
      <Dropzone
        onDrop={(acceptedFiles) => {
          let filesToSet: File[] = [];
          if (multiple) {
            filesToSet = acceptedFiles;
          } else {
            const acceptedFile = acceptedFiles[0];
            if (acceptedFile !== undefined) {
              filesToSet = [acceptedFile];
            }
          }

          if (filesToSet !== undefined) {
            setSelectedFiles(filesToSet);
          }

          setDragActive(false);
          if (name) {
            setFieldValue(name, multiple ? filesToSet : filesToSet[0]);
          }
        }}
        onDragLeave={() => setDragActive(false)}
        onDragEnter={() => setDragActive(true)}
        multiple={multiple}
        accept={accept ? { [accept]: [] } : undefined}
      >
        {({ getRootProps, getInputProps }) => (
          <section>
            <div
              {...getRootProps()}
              className={
                "flex flex-col items-center w-full px-4 py-12 rounded " +
                "shadow-lg tracking-wide border border-border cursor-pointer" +
                (dragActive ? " border-accent" : "")
              }
            >
              <input {...getInputProps()} />
              <b className="text-text-darker">
                {message ||
                  `Drag and drop ${
                    multiple ? "some files" : "a file"
                  } here, or click to select ${multiple ? "files" : "a file"}`}
              </b>
            </div>
          </section>
        )}
      </Dropzone>

      {selectedFiles.length > 0 && (
        <div className="mt-4">
          <h2 className="text-sm font-bold">
            Selected File{multiple ? "s" : ""}
          </h2>
          <ul>
            {selectedFiles.map((file) => (
              <div key={file.name} className="flex">
                <p className="text-sm mr-2">{file.name}</p>
              </div>
            ))}
          </ul>
        </div>
      )}
    </div>
  );
};


================================================
FILE: web/src/components/admin/connectors/types.ts
================================================
import { JSX } from "react";
import * as Yup from "yup";

export type FormBodyBuilder<T extends Yup.AnyObject> = (
  values: T
) => JSX.Element;

export type RequireAtLeastOne<T, Keys extends keyof T = keyof T> = Pick<
  T,
  Exclude<keyof T, Keys>
> &
  {
    [K in Keys]-?: Required<Pick<T, K>> & Partial<Pick<T, Exclude<Keys, K>>>;
  }[Keys];


================================================
FILE: web/src/components/admin/federated/FederatedConnectorForm.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import {
  ConfigurableSources,
  CredentialFieldSpec,
  ConfigurationFieldSpec,
  FederatedConnectorCreateRequest,
  FederatedConnectorDetail,
  CredentialSchemaResponse,
} from "@/lib/types";
import { getSourceMetadata } from "@/lib/sources";
import { SourceIcon } from "@/components/SourceIcon";
import { Card, CardContent } from "@/components/ui/card";
import { Input } from "@/components/ui/input";
import { useRouter } from "next/navigation";
import Text from "@/refresh-components/texts/Text";
import { AlertTriangle, Check, Loader2, Trash2Icon, Info } from "lucide-react";
import BackButton from "@/refresh-components/buttons/BackButton";
import Title from "@/components/ui/title";
import {
  DropdownMenu,
  DropdownMenuContent,
  DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import { DropdownMenuItemWithTooltip } from "@/components/ui/dropdown-menu-with-tooltip";
import { toast } from "@/hooks/useToast";

import { Badge } from "@/components/ui/badge";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { ListFieldInput } from "@/refresh-components/inputs/ListFieldInput";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import Separator from "@/refresh-components/Separator";
import { SvgSettings } from "@opal/icons";

export interface FederatedConnectorFormProps {
  connector: ConfigurableSources;
  connectorId?: number; // Optional ID for editing existing connector
  preloadedConnectorData?: FederatedConnectorDetail;
  preloadedCredentialSchema?: CredentialSchemaResponse;
}

interface CredentialForm {
  [key: string]: string;
}

interface ConfigForm {
  [key: string]: string | boolean | string[] | number | undefined;
}

interface FormState {
  credentials: CredentialForm;
  config: ConfigForm;
  schema: Record<string, CredentialFieldSpec> | null;
  configurationSchema: Record<string, ConfigurationFieldSpec> | null;
  schemaError: string | null;
  configurationSchemaError: string | null;
  connectorError: string | null;
}

async function validateCredentials(
  source: string,
  credentials: CredentialForm
): Promise<{ success: boolean; message: string }> {
  try {
    const response = await fetch(
      `/api/federated/sources/federated_${source}/credentials/validate`,
      {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify(credentials),
      }
    );

    if (!response.ok) {
      const errorData = await response.json().catch(() => ({}));
      return {
        success: false,
        message:
          errorData.detail || `Validation failed: ${response.statusText}`,
      };
    }

    const result = await response.json();
    return {
      success: result,
      message: result ? "Credentials are valid" : "Credentials are invalid",
    };
  } catch (error) {
    return { success: false, message: `Validation error: ${error}` };
  }
}

async function createFederatedConnector(
  source: string,
  credentials: CredentialForm,
  config?: ConfigForm
): Promise<{ success: boolean; message: string }> {
  try {
    const response = await fetch("/api/federated", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        source: `federated_${source}`,
        credentials,
        config: config || {},
      } as FederatedConnectorCreateRequest),
    });

    if (response.ok) {
      return {
        success: true,
        message: "Federated connector created successfully!",
      };
    } else {
      const errorData = await response.json();
      return {
        success: false,
        message: errorData.detail || "Failed to create federated connector",
      };
    }
  } catch (error) {
    return { success: false, message: `Error: ${error}` };
  }
}

async function updateFederatedConnector(
  id: number,
  credentials: CredentialForm,
  config?: ConfigForm
): Promise<{ success: boolean; message: string }> {
  try {
    const response = await fetch(`/api/federated/${id}`, {
      method: "PUT",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        credentials,
        config: config || {},
      }),
    });

    if (response.ok) {
      return {
        success: true,
        message: "Federated connector updated successfully!",
      };
    } else {
      const errorData = await response.json();
      return {
        success: false,
        message: errorData.detail || "Failed to update federated connector",
      };
    }
  } catch (error) {
    return { success: false, message: `Error: ${error}` };
  }
}

async function deleteFederatedConnector(
  id: number
): Promise<{ success: boolean; message: string }> {
  try {
    const response = await fetch(`/api/federated/${id}`, {
      method: "DELETE",
    });

    if (response.ok) {
      return {
        success: true,
        message: "Federated connector deleted successfully!",
      };
    } else {
      const errorData = await response.json();
      return {
        success: false,
        message: errorData.detail || "Failed to delete federated connector",
      };
    }
  } catch (error) {
    return { success: false, message: `Error: ${error}` };
  }
}

export function FederatedConnectorForm({
  connector,
  connectorId,
  preloadedConnectorData,
  preloadedCredentialSchema,
}: FederatedConnectorFormProps) {
  const router = useRouter();
  const sourceMetadata = getSourceMetadata(connector);
  const isEditMode = connectorId !== undefined;

  const [formState, setFormState] = useState<FormState>({
    credentials: preloadedConnectorData?.credentials || {},
    config: preloadedConnectorData?.config || {},
    schema: preloadedCredentialSchema?.credentials || null,
    configurationSchema: null,
    schemaError: null,
    configurationSchemaError: null,
    connectorError: null,
  });
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [submitMessage, setSubmitMessage] = useState<string | null>(null);
  const [submitSuccess, setSubmitSuccess] = useState<boolean | null>(null);
  const [isValidating, setIsValidating] = useState(false);
  const [isDeleting, setIsDeleting] = useState(false);
  const [isLoadingSchema, setIsLoadingSchema] = useState(
    !preloadedCredentialSchema
  );
  const [configValidationErrors, setConfigValidationErrors] = useState<
    Record<string, string>
  >({});

  // Fetch credential schema if not preloaded
  useEffect(() => {
    const fetchCredentialSchema = async () => {
      if (!preloadedCredentialSchema) {
        setIsLoadingSchema(true);
        try {
          const response = await fetch(
            `/api/federated/sources/federated_${connector}/credentials/schema`
          );

          if (!response.ok) {
            throw new Error(
              `Failed to fetch credential schema: ${response.statusText}`
            );
          }

          const responseData = await response.json();
          setFormState((prev) => ({
            ...prev,
            schema: responseData.credentials,
            schemaError: null,
          }));
        } catch (error) {
          console.error("Error fetching credential schema:", error);
          setFormState((prev) => ({
            ...prev,
            schemaError: `Failed to load credential schema: ${error}`,
          }));
        } finally {
          setIsLoadingSchema(false);
        }
      }
    };

    fetchCredentialSchema();
  }, [connector, preloadedCredentialSchema]);

  // Fetch configuration schema for connector configuration
  useEffect(() => {
    const fetchConfigurationSchema = async () => {
      try {
        const response = await fetch(
          `/api/federated/sources/federated_${connector}/configuration/schema`
        );

        if (!response.ok) {
          throw new Error(
            `Failed to fetch configuration schema: ${response.statusText}`
          );
        }

        const responseData = await response.json();
        const configurationSchema = responseData.configuration;

        // Initialize config with defaults - merge with existing config
        // This ensures boolean fields like search_all_channels have explicit values for UI state
        if (configurationSchema) {
          const configWithDefaults: Record<string, any> = {};
          (Object.entries(configurationSchema) as [string, any][]).forEach(
            ([key, field]) => {
              if (field.default !== undefined) {
                configWithDefaults[key] = field.default;
              }
            }
          );

          setFormState((prev) => ({
            ...prev,
            // Merge defaults first, then overlay saved config values
            config: { ...configWithDefaults, ...prev.config },
            configurationSchema,
            configurationSchemaError: null,
          }));
        } else {
          setFormState((prev) => ({
            ...prev,
            configurationSchema,
            configurationSchemaError: null,
          }));
        }
      } catch (error) {
        console.error("Error fetching configuration schema:", error);
        setFormState((prev) => ({
          ...prev,
          configurationSchemaError: `Failed to load configuration schema: ${error}`,
        }));
      }
    };

    fetchConfigurationSchema();
  }, [connector, isEditMode]);

  // Show loading state at the top level if schema is loading
  if (isLoadingSchema) {
    return (
      <div className="mx-auto w-[800px]">
        <div className="flex flex-col items-center justify-center py-16">
          <Loader2 className="h-8 w-8 animate-spin text-blue-500 mb-4" />
          <div className="text-center">
            <p className="text-lg font-medium text-gray-700 mb-2">
              Loading credential schema...
            </p>
            <p className="text-sm text-gray-500">
              Retrieving required fields for this connector type
            </p>
          </div>
        </div>
      </div>
    );
  }

  const handleCredentialChange = (key: string, value: string) => {
    setFormState((prev) => ({
      ...prev,
      credentials: {
        ...prev.credentials,
        [key]: value,
      },
    }));
  };

  const handleConfigChange = (key: string, value: any) => {
    setFormState((prev) => ({
      ...prev,
      config: {
        ...prev.config,
        [key]: value,
      },
    }));
  };

  const handleValidateCredentials = async () => {
    if (!formState.schema) return;

    setIsValidating(true);
    setSubmitMessage(null);
    setSubmitSuccess(null);

    try {
      const result = await validateCredentials(
        connector,
        formState.credentials
      );
      setSubmitMessage(result.message);
      setSubmitSuccess(result.success);
    } catch (error) {
      setSubmitMessage(`Validation error: ${error}`);
      setSubmitSuccess(false);
    } finally {
      setIsValidating(false);
    }
  };

  const handleDeleteConnector = async () => {
    if (!connectorId) return;

    const confirmed = window.confirm(
      "Are you sure you want to delete this federated connector? This action cannot be undone."
    );

    if (!confirmed) return;

    setIsDeleting(true);

    try {
      const result = await deleteFederatedConnector(connectorId);

      if (result.success) {
        toast.success(result.message);
        // Redirect after a short delay
        setTimeout(() => {
          router.push("/admin/indexing/status");
        }, 500);
      } else {
        toast.error(result.message);
      }
    } catch (error) {
      toast.error(`Error deleting connector: ${error}`);
    } finally {
      setIsDeleting(false);
    }
  };

  const handleSubmit = async (e: React.FormEvent) => {
    e.preventDefault();
    setIsSubmitting(true);
    setSubmitMessage(null);
    setSubmitSuccess(null);

    try {
      // Validate required fields
      if (formState.schema) {
        const missingRequired = Object.entries(formState.schema)
          .filter(
            ([key, field]) => field.required && !formState.credentials[key]
          )
          .map(([key]) => key);

        if (missingRequired.length > 0) {
          setSubmitMessage(
            `Missing required fields: ${missingRequired.join(", ")}`
          );
          setSubmitSuccess(false);
          setIsSubmitting(false);
          return;
        }
      }

      // Validate configuration fields (Slack-specific validation)
      const configErrors = getConfigValidationErrors();
      if (Object.keys(configErrors).length > 0) {
        setConfigValidationErrors(configErrors);
        // Show the first error message
        const firstError = Object.values(configErrors)[0] as string;
        setSubmitMessage(firstError);
        setSubmitSuccess(false);
        setIsSubmitting(false);
        return;
      }
      setConfigValidationErrors({});

      // Validate credentials before creating/updating
      const validation = await validateCredentials(
        connector,
        formState.credentials
      );
      if (!validation.success) {
        setSubmitMessage(`Credential validation failed: ${validation.message}`);
        setSubmitSuccess(false);
        setIsSubmitting(false);
        return;
      }

      // Create or update the connector
      const result =
        isEditMode && connectorId
          ? await updateFederatedConnector(
              connectorId,
              formState.credentials,
              formState.config
            )
          : await createFederatedConnector(
              connector,
              formState.credentials,
              formState.config
            );

      setSubmitMessage(result.message);
      setSubmitSuccess(result.success);
      setIsSubmitting(false);

      if (result.success) {
        // Redirect after a short delay
        setTimeout(() => {
          router.push("/admin/indexing/status");
        }, 500);
      }
    } catch (error) {
      setSubmitMessage(`Error: ${error}`);
      setSubmitSuccess(false);
      setIsSubmitting(false);
    }
  };

  const renderCredentialFields = () => {
    if (formState.schemaError) {
      return (
        <div className="flex items-center gap-2 p-3 rounded-md bg-red-50 text-red-700 border border-red-200">
          <AlertTriangle size={16} />
          <span className="text-sm">{formState.schemaError}</span>
        </div>
      );
    }

    if (formState.connectorError) {
      return (
        <div className="flex items-center gap-2 p-3 rounded-md bg-red-50 text-red-700 border border-red-200">
          <AlertTriangle size={16} />
          <span className="text-sm">{formState.connectorError}</span>
        </div>
      );
    }

    if (!formState.schema) {
      return (
        <div className="text-sm text-gray-500">
          No credential schema available for this connector type.
        </div>
      );
    }

    return (
      <>
        {Object.entries(formState.schema).map(([fieldKey, fieldSpec]) => (
          <div
            key={fieldKey}
            className="flex items-center justify-between gap-4 py-2"
          >
            <div className="flex-1">
              <Text as="p" mainUiAction text04 className="mb-1">
                {fieldKey
                  .replace(/_/g, " ")
                  .replace(/\b\w/g, (l) => l.toUpperCase())}
                {fieldSpec.required && (
                  <span className="text-red-500 ml-1">*</span>
                )}
              </Text>
              {fieldSpec.description && (
                <Text as="p" mainUiMuted text03>
                  {fieldSpec.description}
                </Text>
              )}
            </div>
            <Input
              id={fieldKey}
              type={fieldSpec.secret ? "password" : "text"}
              placeholder={
                fieldSpec.example
                  ? String(fieldSpec.example)
                  : fieldSpec.description
              }
              value={formState.credentials[fieldKey] || ""}
              onChange={(e) => handleCredentialChange(fieldKey, e.target.value)}
              className="w-96"
              required={fieldSpec.required}
            />
          </div>
        ))}
      </>
    );
  };

  // Helper to determine if channels input should be disabled for Slack
  const disableSlackChannelInput = (fieldKey: string): boolean => {
    if (connector !== "slack" || fieldKey !== "channels") {
      return false;
    }
    // Disable channels field when search_all_channels is true
    return formState.config.search_all_channels === true;
  };

  // Helper to determine if channels field is required for Slack
  const isSlackChannelsRequired = (): boolean => {
    if (connector !== "slack") {
      return false;
    }
    // Channels are required when search_all_channels is false
    return formState.config.search_all_channels === false;
  };

  // Get validation errors for configuration fields (Slack-specific)
  const getConfigValidationErrors = (): Record<string, string> => {
    const errors: Record<string, string> = {};

    if (connector === "slack") {
      // Check if channels are required but not provided
      if (
        formState.config.search_all_channels === false &&
        (!formState.config.channels ||
          !Array.isArray(formState.config.channels) ||
          formState.config.channels.length === 0)
      ) {
        errors.channels =
          "At least one channel is required when 'Search All Channels' is disabled";
      }
    }

    return errors;
  };

  const renderConfigFields = () => {
    if (formState.configurationSchemaError) {
      return (
        <div className="flex items-center gap-2 p-3 rounded-md bg-red-50 text-red-700 border border-red-200">
          <AlertTriangle size={16} />
          <span className="text-sm">{formState.configurationSchemaError}</span>
        </div>
      );
    }

    if (!formState.configurationSchema) {
      return (
        <div className="text-sm text-gray-500">
          No search configuration available for this connector type.
        </div>
      );
    }

    const channelInputPlaceholder =
      "Type channel name or regex pattern and press Enter";

    return (
      <>
        {Object.entries(formState.configurationSchema).map(
          ([fieldKey, fieldSpec]) => {
            const isBoolType = fieldSpec.type === "bool";
            const isListType = fieldSpec.type.startsWith("list[");

            return (
              <div key={fieldKey} className="space-y-2 w-full">
                {isBoolType ? (
                  <div className="flex items-center gap-3 py-2">
                    <Checkbox
                      checked={
                        formState.config[fieldKey] !== undefined
                          ? Boolean(formState.config[fieldKey])
                          : Boolean(fieldSpec.default)
                      }
                      onCheckedChange={(checked) =>
                        handleConfigChange(fieldKey, checked)
                      }
                    />
                    <div className="flex-1">
                      <Text as="p" mainUiAction text04>
                        {fieldKey
                          .replace(/_/g, " ")
                          .replace(/\b\w/g, (l) => l.toUpperCase())}
                      </Text>
                      {fieldSpec.description && (
                        <Text as="p" mainUiMuted text03>
                          {fieldSpec.description}
                        </Text>
                      )}
                    </div>
                  </div>
                ) : (
                  <>
                    {isListType ? (
                      <>
                        <Text as="p" mainUiAction text04>
                          {fieldSpec.description ||
                            fieldKey
                              .replace(/_/g, " ")
                              .replace(/\b\w/g, (l) => l.toUpperCase())}
                          {(fieldSpec.required ||
                            (fieldKey === "channels" &&
                              isSlackChannelsRequired())) && (
                            <span className="text-red-500 ml-1">*</span>
                          )}
                        </Text>
                        <ListFieldInput
                          values={
                            Array.isArray(formState.config[fieldKey])
                              ? (formState.config[fieldKey] as string[])
                              : []
                          }
                          onChange={(values) => {
                            handleConfigChange(fieldKey, values);
                            // Clear validation error when user adds channels
                            if (
                              fieldKey === "channels" &&
                              configValidationErrors.channels
                            ) {
                              setConfigValidationErrors((prev) => {
                                const { channels, ...rest } = prev;
                                return rest;
                              });
                            }
                          }}
                          placeholder={
                            fieldKey === "channels" ||
                            fieldKey === "exclude_channels"
                              ? channelInputPlaceholder
                              : "Type and press Enter to add an item"
                          }
                          disabled={disableSlackChannelInput(fieldKey)}
                          error={!!configValidationErrors[fieldKey]}
                        />
                        {configValidationErrors[fieldKey] && (
                          <Text as="p" className="text-red-500 text-sm mt-1">
                            {configValidationErrors[fieldKey]}
                          </Text>
                        )}
                      </>
                    ) : (
                      <div className="flex items-center justify-between gap-4 py-2">
                        <div className="flex-1">
                          <Text as="p" mainUiAction text04 className="mb-1">
                            {fieldKey
                              .replace(/_/g, " ")
                              .replace(/\b\w/g, (l) => l.toUpperCase())}
                            {fieldSpec.required && (
                              <span className="text-red-500 ml-1">*</span>
                            )}
                          </Text>
                          {fieldSpec.description && (
                            <Text as="p" mainUiMuted text03>
                              {fieldSpec.description}
                            </Text>
                          )}
                        </div>
                        <Input
                          id={fieldKey}
                          type={fieldSpec.type === "int" ? "number" : "text"}
                          placeholder={
                            fieldSpec.example
                              ? String(fieldSpec.example)
                              : fieldSpec.description
                          }
                          value={
                            formState.config[fieldKey] !== undefined
                              ? String(formState.config[fieldKey])
                              : ""
                          }
                          onChange={(e) => {
                            const value =
                              fieldSpec.type === "int"
                                ? parseInt(e.target.value, 10)
                                : e.target.value;
                            handleConfigChange(fieldKey, value);
                          }}
                          className="w-96"
                          required={fieldSpec.required}
                        />
                      </div>
                    )}
                  </>
                )}
              </div>
            );
          }
        )}
      </>
    );
  };

  return (
    <div className="mx-auto w-[800px] pb-8">
      <BackButton routerOverride="/admin/indexing/status" />

      <div className="flex items-center justify-between h-16 pb-2 border-b border-neutral-200 dark:border-neutral-600">
        <div className="my-auto">
          <SourceIcon iconSize={32} sourceType={connector} />
        </div>

        <div className="ml-2 overflow-hidden text-ellipsis whitespace-nowrap flex-1 mr-4">
          <div className="text-2xl font-bold text-text-default flex items-center gap-2">
            <span>
              {isEditMode ? "Edit" : "Setup"} {sourceMetadata.displayName}
            </span>
            <Badge variant="outline" className="text-xs">
              Federated
            </Badge>
            <SimpleTooltip
              tooltip={
                sourceMetadata.federatedTooltip ||
                "This is a federated connector. It will result in greater latency and lower search quality compared to regular connectors."
              }
              side="bottom"
            >
              <Info className="cursor-help" size={16} />
            </SimpleTooltip>
          </div>
        </div>

        {isEditMode && (
          <div className="ml-auto flex gap-x-2">
            <DropdownMenu>
              <DropdownMenuTrigger asChild>
                <div>
                  <OpalButton prominence="secondary" icon={SvgSettings}>
                    Manage
                  </OpalButton>
                </div>
              </DropdownMenuTrigger>
              <DropdownMenuContent align="end">
                <DropdownMenuItemWithTooltip
                  onClick={handleDeleteConnector}
                  disabled={isDeleting}
                  className="flex items-center gap-x-2 cursor-pointer px-3 py-2 text-red-600 hover:text-red-700 dark:text-red-400 dark:hover:text-red-300"
                  tooltip={isDeleting ? "Deletion in progress" : undefined}
                >
                  <Trash2Icon className="h-4 w-4" />
                  <span>{isDeleting ? "Deleting..." : "Delete"}</span>
                </DropdownMenuItemWithTooltip>
              </DropdownMenuContent>
            </DropdownMenu>
          </div>
        )}
      </div>

      <Title className="mb-2 mt-6" size="md">
        Federated Connector Configuration
      </Title>

      <Card className="px-8 py-4">
        <CardContent className="p-0">
          <form onSubmit={handleSubmit}>
            <Text as="p" headingH3>
              Credentials
            </Text>
            <Text as="p" mainUiMuted>
              Enter the credentials for this connector.
            </Text>
            <div className="space-y-4">{renderCredentialFields()}</div>
            <Separator />
            <Text as="p" headingH3>
              Configuration
            </Text>
            <div className="space-y-4">{renderConfigFields()}</div>

            <div className="flex gap-2 pt-4 w-full justify-end">
              {submitMessage && (
                <div
                  className={`flex items-center gap-2 p-2 rounded-md ${
                    submitSuccess
                      ? "bg-green-50 text-green-700 border border-green-200"
                      : "bg-red-50 text-red-700 border border-red-200"
                  }`}
                >
                  {submitSuccess ? (
                    <Check size={16} />
                  ) : (
                    <AlertTriangle size={16} />
                  )}
                  <span className="text-sm">{submitMessage}</span>
                </div>
              )}

              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
              <Button
                type="button"
                secondary
                onClick={handleValidateCredentials}
                disabled={isValidating || !formState.schema}
                className="flex ml-auto"
              >
                {isValidating ? "Validating..." : "Validate"}
              </Button>
              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
              <Button
                type="submit"
                disabled={isSubmitting || !formState.schema}
                className="flex"
                leftIcon={isSubmitting ? SimpleLoader : undefined}
              >
                {isSubmitting
                  ? isEditMode
                    ? "Updating..."
                    : "Creating..."
                  : isEditMode
                    ? "Update"
                    : "Create"}
              </Button>
            </div>
          </form>
        </CardContent>
      </Card>
    </div>
  );
}


================================================
FILE: web/src/components/admin/users/BulkAdd.tsx
================================================
"use client";

import { withFormik, FormikProps, FormikErrors, Form, Field } from "formik";
import Button from "@/refresh-components/buttons/Button";

const WHITESPACE_SPLIT = /\s+/;
const EMAIL_REGEX = /[^@]+@[^.]+\.[^.]/;

const addUsers = async (url: string, { arg }: { arg: Array<string> }) => {
  return await fetch(url, {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ emails: arg }),
  });
};

export type EmailInviteStatus =
  | "SENT"
  | "NOT_CONFIGURED"
  | "SEND_FAILED"
  | "DISABLED";

interface FormProps {
  onSuccess: (emailInviteStatus: EmailInviteStatus) => void;
  onFailure: (res: Response) => void;
}

interface FormValues {
  emails: string;
}

const normalizeEmails = (emails: string) =>
  emails
    .trim()
    .split(WHITESPACE_SPLIT)
    .filter(Boolean)
    .map((email) => email.toLowerCase());

const AddUserFormRenderer = ({
  touched,
  errors,
  isSubmitting,
  handleSubmit,
}: FormikProps<FormValues>) => (
  <Form className="w-full" onSubmit={handleSubmit}>
    <Field
      id="emails"
      name="emails"
      as="textarea"
      className="w-full p-4"
      onKeyDown={(e: React.KeyboardEvent<HTMLTextAreaElement>) => {
        if (e.key === "Enter") {
          e.preventDefault();
          handleSubmit();
        }
      }}
    />
    {touched.emails && errors.emails && (
      <div className="text-error text-sm">{errors.emails}</div>
    )}
    {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
    <Button type="submit" disabled={isSubmitting} className="self-end">
      Add
    </Button>
  </Form>
);

const AddUserForm = withFormik<FormProps, FormValues>({
  mapPropsToValues: (props) => {
    return {
      emails: "",
    };
  },
  validate: (values: FormValues): FormikErrors<FormValues> => {
    const emails = normalizeEmails(values.emails);
    if (!emails.length) {
      return { emails: "Required" };
    }
    for (let email of emails) {
      if (!email.match(EMAIL_REGEX)) {
        return { emails: `${email} is not a valid email` };
      }
    }
    return {};
  },
  handleSubmit: async (values: FormValues, formikBag) => {
    const emails = normalizeEmails(values.emails);
    formikBag.setSubmitting(true);
    await addUsers("/api/manage/admin/users", { arg: emails })
      .then(async (res) => {
        if (res.ok) {
          const data = await res.json();
          formikBag.props.onSuccess(data.email_invite_status);
        } else {
          formikBag.props.onFailure(res);
        }
      })
      .finally(() => {
        formikBag.setSubmitting(false);
      });
  },
})(AddUserFormRenderer);

const BulkAdd = ({ onSuccess, onFailure }: FormProps) => {
  return <AddUserForm onSuccess={onSuccess} onFailure={onFailure} />;
};

export default BulkAdd;


================================================
FILE: web/src/components/admin/users/CenteredPageSelector.tsx
================================================
import {
  PageSelector,
  type PageSelectorProps as Props,
} from "@/components/PageSelector";

const CenteredPageSelector = ({
  currentPage,
  totalPages,
  onPageChange,
}: Props) => (
  <div className="mx-auto text-center">
    <PageSelector
      currentPage={currentPage}
      totalPages={totalPages}
      onPageChange={onPageChange}
    />
  </div>
);

export default CenteredPageSelector;


================================================
FILE: web/src/components/admin/users/InvitedUserTable.tsx
================================================
import { useState } from "react";
import {
  Table,
  TableHead,
  TableRow,
  TableBody,
  TableCell,
} from "@/components/ui/table";
import CenteredPageSelector from "./CenteredPageSelector";
import { ThreeDotsLoader } from "@/components/Loading";
import { InvitedUserSnapshot } from "@/lib/types";
import { TableHeader } from "@/components/ui/table";
import { InviteUserButton } from "./buttons/InviteUserButton";
import { ErrorCallout } from "@/components/ErrorCallout";
import { FetchError } from "@/lib/fetcher";

const USERS_PER_PAGE = 10;

interface Props {
  users: InvitedUserSnapshot[];
  mutate: () => void;
  error: FetchError | null;
  isLoading: boolean;
  q: string;
}

const InvitedUserTable = ({ users, mutate, error, isLoading, q }: Props) => {
  const [currentPageNum, setCurrentPageNum] = useState<number>(1);

  if (!users.length)
    return <p>Users that have been invited will show up here</p>;

  const totalPages = Math.ceil(users.length / USERS_PER_PAGE);

  // Filter users based on the search query
  const filteredUsers = q
    ? users.filter((user) => user.email.includes(q))
    : users;

  // Get the current page of users
  const currentPageOfUsers = filteredUsers.slice(
    (currentPageNum - 1) * USERS_PER_PAGE,
    currentPageNum * USERS_PER_PAGE
  );

  if (isLoading) {
    return <ThreeDotsLoader />;
  }

  if (error) {
    return (
      <ErrorCallout
        errorTitle="Error loading users"
        errorMsg={error?.info?.detail}
      />
    );
  }

  return (
    <>
      <Table className="overflow-visible">
        <TableHeader>
          <TableRow>
            <TableHead>Email</TableHead>
            <TableHead>
              <div className="flex justify-end">Actions</div>
            </TableHead>
          </TableRow>
        </TableHeader>
        <TableBody>
          {currentPageOfUsers.length ? (
            currentPageOfUsers.map((user) => (
              <TableRow key={user.email}>
                <TableCell>{user.email}</TableCell>
                <TableCell>
                  <div className="flex justify-end">
                    <InviteUserButton
                      user={user}
                      invited={true}
                      mutate={mutate}
                    />
                  </div>
                </TableCell>
              </TableRow>
            ))
          ) : (
            <TableRow>
              <TableCell colSpan={2} className="h-24 text-center">
                {`No users found matching "${q}"`}
              </TableCell>
            </TableRow>
          )}
        </TableBody>
      </Table>
      {totalPages > 1 ? (
        <CenteredPageSelector
          currentPage={currentPageNum}
          totalPages={totalPages}
          onPageChange={setCurrentPageNum}
        />
      ) : null}
    </>
  );
};

export default InvitedUserTable;


================================================
FILE: web/src/components/admin/users/PendingUsersTable.tsx
================================================
import { useState } from "react";
import { toast } from "@/hooks/useToast";
import {
  Table,
  TableHead,
  TableRow,
  TableBody,
  TableCell,
} from "@/components/ui/table";
import CenteredPageSelector from "./CenteredPageSelector";
import { ThreeDotsLoader } from "@/components/Loading";
import { InvitedUserSnapshot } from "@/lib/types";
import { TableHeader } from "@/components/ui/table";
import { Button } from "@opal/components";
import { ErrorCallout } from "@/components/ErrorCallout";
import { FetchError } from "@/lib/fetcher";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import { SvgCheck } from "@opal/icons";
const USERS_PER_PAGE = 10;

interface Props {
  users: InvitedUserSnapshot[];
  mutate: () => void;
  error: FetchError | null;
  isLoading: boolean;
  q: string;
}

const PendingUsersTable = ({ users, mutate, error, isLoading, q }: Props) => {
  const [currentPageNum, setCurrentPageNum] = useState<number>(1);
  const [userToApprove, setUserToApprove] = useState<string | null>(null);

  if (!users.length)
    return <p>Users that have requested to join will show up here</p>;

  const totalPages = Math.ceil(users.length / USERS_PER_PAGE);

  // Filter users based on the search query
  const filteredUsers = q
    ? users.filter((user) => user.email.includes(q))
    : users;

  // Get the current page of users
  const currentPageOfUsers = filteredUsers.slice(
    (currentPageNum - 1) * USERS_PER_PAGE,
    currentPageNum * USERS_PER_PAGE
  );

  if (isLoading) {
    return <ThreeDotsLoader />;
  }

  if (error) {
    return (
      <ErrorCallout
        errorTitle="Error loading pending users"
        errorMsg={error?.info?.detail}
      />
    );
  }

  const handleAcceptRequest = async (email: string) => {
    const normalizedEmail = email.toLowerCase();
    try {
      await fetch("/api/tenants/users/invite/approve", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ email: normalizedEmail }),
      });
      mutate();
      setUserToApprove(null);
    } catch (error) {
      toast.error("Failed to approve user request");
    }
  };

  return (
    <>
      {userToApprove && (
        <ConfirmEntityModal
          entityType="Join Request"
          entityName={userToApprove}
          onClose={() => setUserToApprove(null)}
          onSubmit={() => handleAcceptRequest(userToApprove)}
          actionButtonText="Approve"
          action="approve the join request of"
          additionalDetails={`${userToApprove} has requested to join the team. Approving will add them as a user in this team.`}
          removeConfirmationText
        />
      )}
      <Table className="overflow-visible">
        <TableHeader>
          <TableRow>
            <TableHead>Email</TableHead>
            <TableHead>
              <div className="flex justify-end">Actions</div>
            </TableHead>
          </TableRow>
        </TableHeader>
        <TableBody>
          {currentPageOfUsers.length ? (
            currentPageOfUsers.map((user) => (
              <TableRow key={user.email}>
                <TableCell>{user.email}</TableCell>
                <TableCell>
                  <div className="flex justify-end">
                    <Button
                      prominence="secondary"
                      onClick={() => setUserToApprove(user.email.toLowerCase())}
                      icon={SvgCheck}
                    >
                      Accept Join Request
                    </Button>
                  </div>
                </TableCell>
              </TableRow>
            ))
          ) : (
            <TableRow>
              <TableCell colSpan={2} className="h-24 text-center">
                {`No pending users found matching "${q}"`}
              </TableCell>
            </TableRow>
          )}
        </TableBody>
      </Table>
      {totalPages > 1 ? (
        <CenteredPageSelector
          currentPage={currentPageNum}
          totalPages={totalPages}
          onPageChange={setCurrentPageNum}
        />
      ) : null}
    </>
  );
};

export default PendingUsersTable;


================================================
FILE: web/src/components/admin/users/ResetPasswordModal.tsx
================================================
import { useState } from "react";
import Modal from "@/refresh-components/Modal";
import Button from "@/refresh-components/buttons/Button";
import { User } from "@/lib/types";
import { toast } from "@/hooks/useToast";
import Text from "@/refresh-components/texts/Text";
import { LoadingAnimation } from "@/components/Loading";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import { SvgKey, SvgRefreshCw } from "@opal/icons";

export interface ResetPasswordModalProps {
  user: User;
  onClose: () => void;
}

export default function ResetPasswordModal({
  user,
  onClose,
}: ResetPasswordModalProps) {
  const [newPassword, setNewPassword] = useState<string | null>(null);
  const [isLoading, setIsLoading] = useState(false);

  const handleResetPassword = async () => {
    setIsLoading(true);
    try {
      const response = await fetch("/api/password/reset_password", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ user_email: user.email }),
      });

      if (response.ok) {
        const data = await response.json();
        setNewPassword(data.new_password);
        toast.success("Password reset successfully");
      } else {
        const errorData = await response.json();
        toast.error(errorData.detail || "Failed to reset password");
      }
    } catch (error) {
      toast.error("An error occurred while resetting the password");
    } finally {
      setIsLoading(false);
    }
  };

  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header
          icon={SvgKey}
          title="Reset Password"
          onClose={onClose}
          description={
            newPassword
              ? undefined
              : `Are you sure you want to reset the password for ${user.email}?`
          }
        />
        <Modal.Body>
          {newPassword ? (
            <div>
              <Text as="p">New Password:</Text>
              <div className="flex items-center bg-background-tint-03 p-2 rounded gap-2">
                <Text as="p" data-testid="new-password" className="flex-grow">
                  {newPassword}
                </Text>
                <CopyIconButton getCopyText={() => newPassword} />
              </div>
              <Text as="p" text02>
                Please securely communicate this password to the user.
              </Text>
            </div>
          ) : (
            // TODO(@raunakab): migrate to opal Button once it supports ReactNode children
            <Button
              onClick={handleResetPassword}
              disabled={isLoading}
              leftIcon={SvgRefreshCw}
            >
              {isLoading ? (
                <Text as="p">
                  <LoadingAnimation text="Resetting" />
                </Text>
              ) : (
                "Reset Password"
              )}
            </Button>
          )}
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/components/admin/users/SignedUpUserTable.tsx
================================================
"use client";

import {
  type User,
  UserRole,
  InvitedUserSnapshot,
  USER_ROLE_LABELS,
} from "@/lib/types";
import { ReactNode, useEffect, useState } from "react";
import CenteredPageSelector from "./CenteredPageSelector";
import { toast } from "@/hooks/useToast";
import {
  Table,
  TableHead,
  TableRow,
  TableBody,
  TableCell,
} from "@/components/ui/table";
import { TableHeader } from "@/components/ui/table";
import UserRoleDropdown from "./buttons/UserRoleDropdown";
import DeleteUserButton from "./buttons/DeleteUserButton";
import DeactivateUserButton from "./buttons/DeactivateUserButton";
import usePaginatedFetch from "@/hooks/usePaginatedFetch";
import { ThreeDotsLoader } from "@/components/Loading";
import { ErrorCallout } from "@/components/ErrorCallout";
import { InviteUserButton } from "./buttons/InviteUserButton";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import {
  Select,
  SelectContent,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import Button from "@/refresh-components/buttons/Button";
import { useUser } from "@/providers/UserProvider";
import { LeaveOrganizationButton } from "./buttons/LeaveOrganizationButton";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import ResetPasswordModal from "./ResetPasswordModal";
import { LogOut, UserMinus } from "lucide-react";
import Popover from "@/refresh-components/Popover";
import { SvgKey, SvgMoreHorizontal } from "@opal/icons";
import { Button as OpalButton } from "@opal/components";
const ITEMS_PER_PAGE = 10;
const PAGES_PER_BATCH = 2;

interface ActionMenuProps {
  user: User;
  currentUser: User | null;
  refresh: () => void;
  invitedUsersMutate: () => void;
  handleResetPassword: (user: User) => void;
}

export interface SignedUpUserTableProps {
  invitedUsers: InvitedUserSnapshot[];
  q: string;
  invitedUsersMutate: () => void;
  countDisplay?: ReactNode;
  onTotalItemsChange?: (count: number) => void;
  onLoadingChange?: (isLoading: boolean) => void;
}

export default function SignedUpUserTable({
  invitedUsers,
  q = "",
  invitedUsersMutate,
  countDisplay,
  onTotalItemsChange,
  onLoadingChange,
}: SignedUpUserTableProps) {
  const [filters, setFilters] = useState<{
    is_active?: boolean;
    roles?: UserRole[];
  }>({});

  const [selectedRoles, setSelectedRoles] = useState<UserRole[]>([]);
  const [resetPasswordUser, setResetPasswordUser] = useState<User | null>(null);
  const invitedEmails = invitedUsers.map((user) => user.email.toLowerCase());

  const {
    currentPageData: pageOfUsers,
    isLoading,
    error,
    currentPage,
    totalPages,
    goToPage,
    refresh,
    totalItems,
  } = usePaginatedFetch<User>({
    itemsPerPage: ITEMS_PER_PAGE,
    pagesPerBatch: PAGES_PER_BATCH,
    endpoint: "/api/manage/users/accepted",
    query: q,
    filter: filters,
  });

  const { user: currentUser } = useUser();

  useEffect(() => {
    onLoadingChange?.(isLoading);
  }, [isLoading, onLoadingChange]);

  useEffect(() => {
    if (pageOfUsers !== null) {
      onTotalItemsChange?.(totalItems);
    }
  }, [pageOfUsers, totalItems, onTotalItemsChange]);

  if (error) {
    return (
      <ErrorCallout
        errorTitle="Error loading users"
        errorMsg={error?.message}
      />
    );
  }

  const handlePopup = (message: string, type: "success" | "error") => {
    if (type === "success") refresh();
    if (type === "success") {
      toast.success(message);
    } else {
      toast.error(message);
    }
  };

  const onRoleChangeSuccess = () =>
    handlePopup("User role updated successfully!", "success");
  const onRoleChangeError = (errorMsg: string) =>
    handlePopup(`Unable to update user role - ${errorMsg}`, "error");

  const toggleRole = (roleEnum: UserRole) => {
    setFilters((prev) => {
      const currentRoles = prev.roles || [];
      const newRoles = currentRoles.includes(roleEnum)
        ? currentRoles.filter((r) => r !== roleEnum) // Remove role if already selected
        : [...currentRoles, roleEnum]; // Add role if not selected

      setSelectedRoles(newRoles); // Update selected roles state
      return {
        ...prev,
        roles: newRoles,
      };
    });
  };

  const removeRole = (roleEnum: UserRole) => {
    setSelectedRoles((prev) => prev.filter((role) => role !== roleEnum)); // Remove role from selected roles
    toggleRole(roleEnum); // Deselect the role in filters
  };

  const handleResetPassword = (user: User) => {
    setResetPasswordUser(user);
  };

  // --------------
  // Render Functions
  // --------------

  const renderFilters = () => (
    <>
      <div className="flex flex-wrap items-center justify-between gap-4 py-4">
        <div className="flex flex-wrap items-center gap-4">
          <InputSelect
            value={filters.is_active?.toString() || "all"}
            onValueChange={(selectedStatus) =>
              setFilters((prev) => {
                if (selectedStatus === "all") {
                  const { is_active, ...rest } = prev;
                  return rest;
                }
                return {
                  ...prev,
                  is_active: selectedStatus === "true",
                };
              })
            }
          >
            <InputSelect.Trigger />

            <InputSelect.Content>
              <InputSelect.Item value="all">All Status</InputSelect.Item>
              <InputSelect.Item value="true">Active</InputSelect.Item>
              <InputSelect.Item value="false">Inactive</InputSelect.Item>
            </InputSelect.Content>
          </InputSelect>

          <Select value="roles">
            <SelectTrigger className="w-[260px] h-[34px] bg-neutral">
              <SelectValue>
                {filters.roles?.length
                  ? `${filters.roles.length} role(s) selected`
                  : "All Roles"}
              </SelectValue>
            </SelectTrigger>
            <SelectContent className="bg-background-tint-00">
              {Object.entries(USER_ROLE_LABELS)
                .filter(([role]) => role !== UserRole.EXT_PERM_USER)
                .map(([role, label]) => (
                  <div
                    key={role}
                    className="flex items-center space-x-2 px-2 py-1.5 cursor-pointer hover:bg-background-200"
                    onClick={() => toggleRole(role as UserRole)}
                  >
                    <input
                      type="checkbox"
                      checked={
                        filters.roles?.includes(role as UserRole) || false
                      }
                      onChange={(e) => e.stopPropagation()}
                    />
                    <label className="text-sm font-normal">{label}</label>
                  </div>
                ))}
            </SelectContent>
          </Select>
        </div>
        {countDisplay}
      </div>
      <div className="flex gap-2 py-1">
        {selectedRoles.map((role) => (
          <button
            key={role}
            className="border border-background-300 bg-neutral p-1 rounded text-sm hover:bg-background-200"
            onClick={() => removeRole(role)}
            style={{ padding: "2px 8px" }}
          >
            <span>{USER_ROLE_LABELS[role]}</span>
            <span className="ml-3">&times;</span>
          </button>
        ))}
      </div>
    </>
  );

  const renderUserRoleDropdown = (user: User) => {
    if (user.role === UserRole.SLACK_USER) {
      return <p className="ml-2">Slack User</p>;
    }
    return (
      <UserRoleDropdown
        user={user}
        onSuccess={onRoleChangeSuccess}
        onError={onRoleChangeError}
      />
    );
  };

  const ActionMenu: React.FC<ActionMenuProps> = ({
    user,
    currentUser,
    refresh,
    invitedUsersMutate,
    handleResetPassword,
  }) => {
    const buttonClassName = "w-full";

    return (
      <Popover>
        <Popover.Trigger asChild>
          <OpalButton prominence="secondary" icon={SvgMoreHorizontal} />
        </Popover.Trigger>
        <Popover.Content>
          <div className="grid gap-1">
            {NEXT_PUBLIC_CLOUD_ENABLED && user.id === currentUser?.id ? (
              <LeaveOrganizationButton
                user={user}
                mutate={refresh}
                className={buttonClassName}
              >
                <LogOut className="mr-2 h-4 w-4" />
                <span>Leave Organization</span>
              </LeaveOrganizationButton>
            ) : (
              <>
                {!user.is_active && (
                  <DeleteUserButton
                    user={user}
                    mutate={refresh}
                    className={buttonClassName}
                  >
                    <UserMinus className="mr-2 h-4 w-4" />
                    <span>Delete User</span>
                  </DeleteUserButton>
                )}
                <DeactivateUserButton
                  user={user}
                  deactivate={user.is_active}
                  mutate={refresh}
                  className={buttonClassName}
                >
                  {/*<UserX className="mr-2 h-4 w-4" />*/}
                  {user.is_active ? "Deactivate User" : "Activate User"}
                </DeactivateUserButton>
              </>
            )}
            {user.password_configured && (
              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
              <Button
                className={buttonClassName}
                onClick={() => handleResetPassword(user)}
                leftIcon={SvgKey}
              >
                Reset Password
              </Button>
            )}
          </div>
        </Popover.Content>
      </Popover>
    );
  };

  const renderActionButtons = (user: User) => {
    return (
      <div className="flex items-center justify-end gap-2">
        {user.role === UserRole.SLACK_USER && (
          <InviteUserButton
            user={user}
            invited={invitedEmails.includes(user.email.toLowerCase())}
            mutate={[refresh, invitedUsersMutate]}
          />
        )}
        <ActionMenu
          user={user}
          currentUser={currentUser}
          refresh={refresh}
          invitedUsersMutate={invitedUsersMutate}
          handleResetPassword={handleResetPassword}
        />
      </div>
    );
  };

  return (
    <>
      {renderFilters()}
      <Table className="overflow-visible">
        <TableHeader>
          <TableRow>
            <TableHead>Email</TableHead>
            <TableHead className="text-center">Role</TableHead>
            <TableHead className="text-center">Status</TableHead>
            <TableHead>
              <div className="flex">
                <div className="ml-auto">Actions</div>
              </div>
            </TableHead>
          </TableRow>
        </TableHeader>
        {isLoading ? (
          <TableBody>
            <TableRow>
              <TableCell colSpan={4} className="text-center">
                <ThreeDotsLoader />
              </TableCell>
            </TableRow>
          </TableBody>
        ) : (
          <TableBody>
            {!pageOfUsers?.length ? (
              <TableRow>
                <TableCell colSpan={4} className="text-center">
                  <p className="pt-4 pb-4">
                    {filters.roles?.length || filters.is_active !== undefined
                      ? "No users found matching your filters"
                      : `No users found matching "${q}"`}
                  </p>
                </TableCell>
              </TableRow>
            ) : (
              pageOfUsers.map((user) => (
                <TableRow key={user.id}>
                  <TableCell>{user.email}</TableCell>
                  <TableCell className="w-[180px]">
                    {renderUserRoleDropdown(user)}
                  </TableCell>
                  <TableCell className="text-center w-[140px]">
                    <i>{user.is_active ? "Active" : "Inactive"}</i>
                  </TableCell>
                  <TableCell className="text-right  w-[300px] ">
                    {renderActionButtons(user)}
                  </TableCell>
                </TableRow>
              ))
            )}
          </TableBody>
        )}
      </Table>
      {totalPages > 1 && (
        <CenteredPageSelector
          currentPage={currentPage}
          totalPages={totalPages}
          onPageChange={goToPage}
        />
      )}
      {resetPasswordUser && (
        <ResetPasswordModal
          user={resetPasswordUser}
          onClose={() => setResetPasswordUser(null)}
        />
      )}
    </>
  );
}


================================================
FILE: web/src/components/admin/users/buttons/DeactivateUserButton.tsx
================================================
import { type User } from "@/lib/types";
import { toast } from "@/hooks/useToast";
import Button from "@/refresh-components/buttons/Button";
import useSWRMutation from "swr/mutation";
import userMutationFetcher from "@/lib/admin/users/userMutationFetcher";
import { SvgXCircle } from "@opal/icons";
const DeactivateUserButton = ({
  user,
  deactivate,
  mutate,
  className,
  children,
}: {
  user: User;
  deactivate: boolean;
  mutate: () => void;
  className?: string;
  children?: string;
}) => {
  const { trigger, isMutating } = useSWRMutation(
    deactivate
      ? "/api/manage/admin/deactivate-user"
      : "/api/manage/admin/activate-user",
    userMutationFetcher,
    {
      onSuccess: () => {
        mutate();
        toast.success(`User ${deactivate ? "deactivated" : "activated"}!`);
      },
      onError: (errorMsg) => toast.error(errorMsg.message),
    }
  );
  return (
    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
    <Button
      className={className}
      onClick={() => trigger({ user_email: user.email })}
      disabled={isMutating}
      leftIcon={SvgXCircle}
      tertiary
    >
      {children}
    </Button>
  );
};

export default DeactivateUserButton;


================================================
FILE: web/src/components/admin/users/buttons/DeleteUserButton.tsx
================================================
import { type User } from "@/lib/types";
import { toast } from "@/hooks/useToast";
import userMutationFetcher from "@/lib/admin/users/userMutationFetcher";
import useSWRMutation from "swr/mutation";
import Button from "@/refresh-components/buttons/Button";
import { useState } from "react";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";

const DeleteUserButton = ({
  user,
  mutate,
  className,
  children,
}: {
  user: User;
  mutate: () => void;
  className?: string;
  children?: React.ReactNode;
}) => {
  const { trigger, isMutating } = useSWRMutation(
    "/api/manage/admin/delete-user",
    userMutationFetcher,
    {
      onSuccess: () => {
        mutate();
        toast.success("User deleted successfully!");
      },
      onError: (errorMsg) =>
        toast.error(`Unable to delete user - ${errorMsg.message}`),
    }
  );

  const [showDeleteModal, setShowDeleteModal] = useState(false);
  return (
    <>
      {showDeleteModal && (
        <ConfirmEntityModal
          entityType="user"
          entityName={user.email}
          onClose={() => setShowDeleteModal(false)}
          onSubmit={() => trigger({ user_email: user.email, method: "DELETE" })}
          additionalDetails="All data associated with this user will be deleted (including personas, tools and chat sessions)."
        />
      )}

      {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
      <Button
        className={className}
        onClick={() => setShowDeleteModal(true)}
        disabled={isMutating}
        danger
      >
        {children}
      </Button>
    </>
  );
};

export default DeleteUserButton;


================================================
FILE: web/src/components/admin/users/buttons/InviteUserButton.tsx
================================================
import {
  type InvitedUserSnapshot,
  type AcceptedUserSnapshot,
} from "@/lib/types";

import { toast } from "@/hooks/useToast";
import useSWRMutation from "swr/mutation";
import { Button } from "@opal/components";
import GenericConfirmModal from "@/components/modals/GenericConfirmModal";
import { useState } from "react";

export const InviteUserButton = ({
  user,
  invited,
  mutate,
}: {
  user: AcceptedUserSnapshot | InvitedUserSnapshot;
  invited: boolean;
  mutate: (() => void) | (() => void)[];
}) => {
  const { trigger: inviteTrigger, isMutating: isInviting } = useSWRMutation(
    "/api/manage/admin/users",
    async (url, { arg }: { arg: { emails: string[] } }) => {
      const response = await fetch(url, {
        method: "PUT",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify(arg),
      });
      if (!response.ok) {
        throw new Error(await response.text());
      }
      return response.json();
    },
    {
      onSuccess: () => {
        setShowInviteModal(false);
        if (typeof mutate === "function") {
          mutate();
        } else {
          mutate.forEach((fn) => fn());
        }
        toast.success("User invited successfully!");
      },
      onError: (errorMsg) => {
        setShowInviteModal(false);
        toast.error(`Unable to invite user - ${errorMsg}`);
      },
    }
  );

  const { trigger: uninviteTrigger, isMutating: isUninviting } = useSWRMutation(
    "/api/manage/admin/remove-invited-user",
    async (url, { arg }: { arg: { user_email: string } }) => {
      const response = await fetch(url, {
        method: "PATCH",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify(arg),
      });
      if (!response.ok) {
        throw new Error(await response.text());
      }
      return response.json();
    },
    {
      onSuccess: () => {
        setShowInviteModal(false);
        if (typeof mutate === "function") {
          mutate();
        } else {
          mutate.forEach((fn) => fn());
        }
        toast.success("User uninvited successfully!");
      },
      onError: (errorMsg) => {
        setShowInviteModal(false);
        toast.error(`Unable to uninvite user - ${errorMsg}`);
      },
    }
  );

  const [showInviteModal, setShowInviteModal] = useState(false);

  const handleConfirm = () => {
    const normalizedEmail = user.email.toLowerCase();
    if (invited) {
      uninviteTrigger({ user_email: normalizedEmail });
    } else {
      inviteTrigger({ emails: [normalizedEmail] });
    }
  };

  const isMutating = isInviting || isUninviting;

  return (
    <>
      {showInviteModal && (
        <GenericConfirmModal
          title={`${invited ? "Uninvite" : "Invite"} User`}
          message={`Are you sure you want to ${
            invited ? "uninvite" : "invite"
          } ${user.email}?`}
          onClose={() => setShowInviteModal(false)}
          onConfirm={handleConfirm}
        />
      )}

      <Button disabled={isMutating} onClick={() => setShowInviteModal(true)}>
        {invited ? "Uninvite" : "Invite"}
      </Button>
    </>
  );
};


================================================
FILE: web/src/components/admin/users/buttons/LeaveOrganizationButton.tsx
================================================
import { type User } from "@/lib/types";
import { toast } from "@/hooks/useToast";
import userMutationFetcher from "@/lib/admin/users/userMutationFetcher";
import useSWRMutation from "swr/mutation";
import Button from "@/refresh-components/buttons/Button";
import { useState } from "react";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import { useRouter } from "next/navigation";

export const LeaveOrganizationButton = ({
  user,
  mutate,
  className,
  children,
}: {
  user: User;
  mutate: () => void;
  className?: string;
  children?: React.ReactNode;
}) => {
  const router = useRouter();
  const { trigger, isMutating } = useSWRMutation(
    "/api/tenants/leave-team",
    userMutationFetcher,
    {
      onSuccess: () => {
        mutate();
        toast.success("Successfully left the team!");
      },
      onError: (errorMsg) => toast.error(`Unable to leave team - ${errorMsg}`),
    }
  );

  const [showLeaveModal, setShowLeaveModal] = useState(false);

  const handleLeaveOrganization = async () => {
    await trigger({ user_email: user.email, method: "POST" });
    router.push("/");
  };

  return (
    <>
      {showLeaveModal && (
        <ConfirmEntityModal
          actionButtonText="Leave"
          entityType="team"
          entityName="your team"
          onClose={() => setShowLeaveModal(false)}
          onSubmit={handleLeaveOrganization}
          additionalDetails="You will lose access to all team data and resources."
        />
      )}

      {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
      <Button
        className={className}
        onClick={() => setShowLeaveModal(true)}
        disabled={isMutating}
        internal
      >
        {children}
      </Button>
    </>
  );
};


================================================
FILE: web/src/components/admin/users/buttons/UserRoleDropdown.tsx
================================================
import {
  type User,
  UserRole,
  USER_ROLE_LABELS,
  INVALID_ROLE_HOVER_TEXT,
} from "@/lib/types";
import userMutationFetcher from "@/lib/admin/users/userMutationFetcher";
import useSWRMutation from "swr/mutation";

import InputSelect from "@/refresh-components/inputs/InputSelect";
import GenericConfirmModal from "@/components/modals/GenericConfirmModal";
import { useState } from "react";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";

export interface UserRoleDropdownProps {
  user: User;
  onSuccess: () => void;
  onError: (message: string) => void;
}

export default function UserRoleDropdown({
  user,
  onSuccess,
  onError,
}: UserRoleDropdownProps) {
  const [showConfirmModal, setShowConfirmModal] = useState(false);
  const [pendingRole, setPendingRole] = useState<string | null>(null);

  const { trigger: setUserRole, isMutating: isSettingRole } = useSWRMutation(
    "/api/manage/set-user-role",
    userMutationFetcher,
    { onSuccess, onError }
  );
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  const handleChange = (value: string) => {
    if (value === user.role) return;
    if (user.role === UserRole.CURATOR) {
      setShowConfirmModal(true);
      setPendingRole(value);
    } else {
      setUserRole({
        user_email: user.email,
        new_role: value,
      });
    }
  };

  const handleConfirm = () => {
    if (pendingRole) {
      setUserRole({
        user_email: user.email,
        new_role: pendingRole,
      });
    }
    setShowConfirmModal(false);
    setPendingRole(null);
  };

  return (
    <>
      {showConfirmModal && (
        <GenericConfirmModal
          title="Change Curator Role"
          message={`Warning: Switching roles from Curator to ${
            USER_ROLE_LABELS[pendingRole as UserRole] ??
            USER_ROLE_LABELS[user.role]
          } will remove their status as individual curators from all groups.`}
          confirmText={`Switch Role to ${
            USER_ROLE_LABELS[pendingRole as UserRole] ??
            USER_ROLE_LABELS[user.role]
          }`}
          onClose={() => setShowConfirmModal(false)}
          onConfirm={handleConfirm}
        />
      )}

      <InputSelect
        value={user.role}
        onValueChange={handleChange}
        disabled={isSettingRole}
      >
        <InputSelect.Trigger />

        <InputSelect.Content>
          {(Object.entries(USER_ROLE_LABELS) as [UserRole, string][]).map(
            ([role, label]) => {
              // Don't want to ever show external permissioned users because it's scary
              if (role === UserRole.EXT_PERM_USER) return null;

              // Only want to show limited users if paid enterprise features are enabled
              // Also, dont want to show these other roles in general
              const isNotVisibleRole =
                (!isPaidEnterpriseFeaturesEnabled &&
                  role === UserRole.GLOBAL_CURATOR) ||
                role === UserRole.CURATOR ||
                role === UserRole.LIMITED ||
                role === UserRole.SLACK_USER;

              // Always show the current role
              const isCurrentRole = user.role === role;

              return isNotVisibleRole && !isCurrentRole ? null : (
                <InputSelect.Item
                  key={role}
                  value={role}
                  data-testid={`user-role-dropdown-${role}`}
                  title={INVALID_ROLE_HOVER_TEXT[role] ?? ""}
                  data-tooltip-delay="0"
                >
                  {label}
                </InputSelect.Item>
              );
            }
          )}
        </InputSelect.Content>
      </InputSelect>
    </>
  );
}


================================================
FILE: web/src/components/auth/AuthErrorDisplay.tsx
================================================
"use client";

import { useEffect } from "react";
import { toast } from "@/hooks/useToast";

const ERROR_MESSAGES = {
  Anonymous: "Your team does not have anonymous access enabled.",
};

export default function AuthErrorDisplay({
  searchParams,
}: {
  searchParams: any;
}) {
  const error = searchParams?.error;

  useEffect(() => {
    if (error) {
      toast.error(
        ERROR_MESSAGES[error as keyof typeof ERROR_MESSAGES] ||
          "An error occurred."
      );
    }
  }, [error]);

  return null;
}


================================================
FILE: web/src/components/auth/AuthFlowContainer.tsx
================================================
import Link from "next/link";
import { OnyxIcon } from "../icons/icons";

export default function AuthFlowContainer({
  children,
  authState,
  footerContent,
}: {
  children: React.ReactNode;
  authState?: "signup" | "login" | "join";
  footerContent?: React.ReactNode;
}) {
  return (
    <div className="p-4 flex flex-col items-center justify-center min-h-screen bg-background">
      <div className="w-full max-w-md flex items-start flex-col bg-background-tint-00 rounded-16 shadow-lg shadow-02 p-6">
        <OnyxIcon size={44} className="text-theme-primary-05" />
        <div className="w-full mt-3">{children}</div>
      </div>
      {authState === "login" && (
        <div className="text-sm mt-6 text-center w-full text-text-03 mainUiBody mx-auto">
          {footerContent ?? (
            <>
              New to Onyx?{" "}
              <Link
                href="/auth/signup"
                className="text-text-05 mainUiAction underline transition-colors duration-200"
              >
                Create an Account
              </Link>
            </>
          )}
        </div>
      )}
      {authState === "signup" && (
        <div className="text-sm mt-6 text-center w-full text-text-03 mainUiBody mx-auto">
          Already have an account?{" "}
          <Link
            href="/auth/login?autoRedirectToSignup=false"
            className="text-text-05 mainUiAction underline transition-colors duration-200"
          >
            Sign In
          </Link>
        </div>
      )}
    </div>
  );
}


================================================
FILE: web/src/components/chat/DynamicBottomSpacer.tsx
================================================
"use client";

import React, { useEffect, useRef, useCallback } from "react";
import { useCurrentChatState } from "@/app/app/stores/useChatSessionStore";
import { useScrollContainer } from "@/components/chat/ScrollContainerContext";

// Small offset from the top of the scroll viewport where the anchor should appear.
// The header is outside the scroll container, so when scrolled to the bottom
// during the push-up effect, we only need minimal padding.
const ANCHOR_TOP_OFFSET_PX = 16;

// Duration of smooth scroll animation (browser default is ~400-500ms, we add buffer)
const SMOOTH_SCROLL_DURATION_MS = 600;

// How long to wait after content stops changing before deactivating
const CONTENT_SETTLED_DEBOUNCE_MS = 500;

export interface DynamicBottomSpacerProps {
  /**
   * Node ID of the anchor message (the new user message)
   */
  anchorNodeId?: number;
}

/**
 * DynamicBottomSpacer creates a "fresh chat" effect by filling the space
 * below messages to push content up when a new round starts.
 * Uses ResizeObserver to efficiently detect content changes instead of polling.
 */
const DynamicBottomSpacer = React.memo(
  ({ anchorNodeId }: DynamicBottomSpacerProps) => {
    const spacerRef = useRef<HTMLDivElement>(null);
    const chatState = useCurrentChatState();
    const isStreaming = chatState === "streaming" || chatState === "loading";

    // Get scroll container refs from context (provided by ChatScrollContainer)
    const { scrollContainerRef, contentWrapperRef, spacerHeightRef } =
      useScrollContainer();

    // Track state with refs to avoid re-renders
    const isActiveRef = useRef(false);
    const initialSpacerHeightRef = useRef(0);
    const initialContentHeightRef = useRef(0);
    const currentSpacerHeightRef = useRef(0);
    const prevAnchorNodeIdRef = useRef<number | undefined>(undefined);
    const wasStreamingRef = useRef(false);
    const resizeObserverRef = useRef<ResizeObserver | null>(null);
    const settledTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(
      null
    );

    /**
     * Set spacer height directly on DOM (no re-renders)
     */
    const setHeight = useCallback(
      (height: number) => {
        const h = Math.max(0, Math.round(height));
        currentSpacerHeightRef.current = h;
        spacerHeightRef.current = h;
        if (spacerRef.current) {
          spacerRef.current.style.height = `${h}px`;
        }
      },
      [spacerHeightRef]
    );

    /**
     * Get the scroll container element from context ref
     */
    const getScrollContainer = useCallback(() => {
      return scrollContainerRef.current;
    }, [scrollContainerRef]);

    /**
     * Get content height (total scrollHeight minus current spacer height)
     */
    const getContentHeight = useCallback(() => {
      const scrollContainer = getScrollContainer();
      if (!scrollContainer) return 0;
      return scrollContainer.scrollHeight - currentSpacerHeightRef.current;
    }, [getScrollContainer]);

    /**
     * Update spacer height based on content growth
     */
    const updateSpacerHeight = useCallback(() => {
      if (!isActiveRef.current) return;

      const currentContentHeight = getContentHeight();
      const contentGrowth =
        currentContentHeight - initialContentHeightRef.current;

      // New spacer height = initial spacer - content growth
      const newHeight = initialSpacerHeightRef.current - contentGrowth;

      if (newHeight <= 0) {
        setHeight(0);
        isActiveRef.current = false;
      } else {
        setHeight(newHeight);
      }
    }, [setHeight, getContentHeight]);

    /**
     * Stop observing and clean up
     */
    const stopObserving = useCallback(() => {
      if (resizeObserverRef.current) {
        resizeObserverRef.current.disconnect();
        resizeObserverRef.current = null;
      }
      if (settledTimeoutRef.current) {
        clearTimeout(settledTimeoutRef.current);
        settledTimeoutRef.current = null;
      }
    }, []);

    /**
     * Start observing content changes with ResizeObserver
     */
    const startObserving = useCallback(() => {
      const scrollContainer = getScrollContainer();
      if (!scrollContainer || resizeObserverRef.current) return;

      resizeObserverRef.current = new ResizeObserver(() => {
        // Content size changed - update spacer
        updateSpacerHeight();

        // Reset the "settled" timeout - content is still changing
        if (settledTimeoutRef.current) {
          clearTimeout(settledTimeoutRef.current);
        }

        // After content stops changing for CONTENT_SETTLED_DEBOUNCE_MS, deactivate
        settledTimeoutRef.current = setTimeout(() => {
          // Only deactivate if streaming has ended
          if (!wasStreamingRef.current) {
            isActiveRef.current = false;
            stopObserving();
          }
        }, CONTENT_SETTLED_DEBOUNCE_MS);
      });

      // Observe the content wrapper using context ref
      if (contentWrapperRef.current) {
        resizeObserverRef.current.observe(contentWrapperRef.current);
      }
    }, [
      getScrollContainer,
      updateSpacerHeight,
      stopObserving,
      contentWrapperRef,
    ]);

    /**
     * Activate the spacer - calculate initial height and scroll to bottom
     */
    const activate = useCallback(() => {
      if (!anchorNodeId) return;

      // If already active, stop the current observation to restart fresh
      if (isActiveRef.current) {
        stopObserving();
        isActiveRef.current = false;
      }

      const anchor = document.getElementById(`message-${anchorNodeId}`);
      if (!anchor) return;

      const scrollContainer = getScrollContainer();
      if (!scrollContainer) return;

      // Get measurements first (before modifying spacer)
      const viewportHeight = scrollContainer.clientHeight;
      const currentSpacerHeight = currentSpacerHeightRef.current;

      // Calculate content height (scrollHeight minus current spacer)
      const contentHeight = scrollContainer.scrollHeight - currentSpacerHeight;

      // Calculate anchor's position using getBoundingClientRect for accuracy
      const containerRect = scrollContainer.getBoundingClientRect();
      const anchorRect = anchor.getBoundingClientRect();

      // Anchor's visual offset from the scroll container's top edge
      const anchorVisualOffset = anchorRect.top - containerRect.top;

      // Anchor's absolute position in the scrollable content
      const anchorOffsetInContent =
        anchorVisualOffset + scrollContainer.scrollTop;

      // Calculate spacer height needed to position anchor just below the top offset
      // when scrolled to the absolute bottom.
      const spacerHeight =
        anchorOffsetInContent -
        contentHeight +
        viewportHeight -
        ANCHOR_TOP_OFFSET_PX;

      // If spacer height is <= 0, no push-up effect is needed.
      // This naturally handles new chats and short conversations where
      // the anchor is already positioned appropriately.
      if (spacerHeight <= 0) return;

      // Store initial content height for tracking content growth during streaming
      initialContentHeightRef.current = contentHeight;
      initialSpacerHeightRef.current = spacerHeight;
      isActiveRef.current = true;

      // Set the spacer height
      setHeight(spacerHeight);

      // Tell ChatScrollContainer to not do instant auto-scroll during smooth scroll
      scrollContainer.dataset.smoothScrollActive = "true";

      // Start observing content changes
      startObserving();

      // Scroll to bottom smoothly (after spacer height is applied)
      requestAnimationFrame(() => {
        requestAnimationFrame(() => {
          scrollContainer.scrollTo({
            top: scrollContainer.scrollHeight - scrollContainer.clientHeight,
            behavior: "smooth",
          });

          // Clear the flag after smooth scroll completes and force
          // ChatScrollContainer to refresh scroll state (button visibility,
          // fades). The MutationObserver doesn't observe attribute changes,
          // so we dispatch a synthetic scroll event.
          setTimeout(() => {
            scrollContainer.dataset.smoothScrollActive = "false";
            scrollContainer.dispatchEvent(new Event("scroll"));
          }, SMOOTH_SCROLL_DURATION_MS);
        });
      });
    }, [
      anchorNodeId,
      setHeight,
      getScrollContainer,
      startObserving,
      stopObserving,
    ]);

    /**
     * Main effect: detect streaming start/stop and anchor changes
     */
    useEffect(() => {
      const anchorChanged = prevAnchorNodeIdRef.current !== anchorNodeId;
      const streamingStarted = isStreaming && !wasStreamingRef.current;

      prevAnchorNodeIdRef.current = anchorNodeId;
      wasStreamingRef.current = isStreaming;

      // Activate when: new anchor appears while streaming, or streaming starts with anchor
      if (
        (anchorChanged && anchorNodeId && isStreaming) ||
        (streamingStarted && anchorNodeId)
      ) {
        requestAnimationFrame(() => {
          activate();
        });
      }

      // Note: smoothScrollActive is cleared by the 600ms timeout inside activate().
      // We intentionally do NOT clear it when streaming ends — for fast responses,
      // streaming can end before the smooth scroll animation completes, which would
      // remove the suppression too early and flash the scroll-to-bottom button.
    }, [anchorNodeId, isStreaming, activate]);

    /**
     * Reset when anchor is cleared
     */
    useEffect(() => {
      if (!anchorNodeId) {
        setHeight(0);
        isActiveRef.current = false;
        initialSpacerHeightRef.current = 0;
        initialContentHeightRef.current = 0;
        stopObserving();
      }
    }, [anchorNodeId, setHeight, stopObserving]);

    /**
     * Cleanup on unmount
     */
    useEffect(() => {
      return () => {
        stopObserving();
        const scrollContainer = getScrollContainer();
        if (scrollContainer) {
          scrollContainer.dataset.smoothScrollActive = "false";
        }
      };
    }, [getScrollContainer, stopObserving]);

    return (
      <div
        ref={spacerRef}
        data-dynamic-spacer="true"
        aria-hidden="true"
        className="w-full"
        style={{
          height: "0px",
          flexShrink: 0,
        }}
      />
    );
  }
);

DynamicBottomSpacer.displayName = "DynamicBottomSpacer";

export default DynamicBottomSpacer;


================================================
FILE: web/src/components/chat/FederatedOAuthModal.tsx
================================================
"use client";

import { useContext, useState } from "react";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import { ValidSources } from "@/lib/types";
import { SettingsContext } from "@/providers/SettingsProvider";
import { getSourceMetadata } from "@/lib/sources";
import useFederatedOAuthStatus from "@/hooks/useFederatedOAuthStatus";
import { SvgLink } from "@opal/icons";
import { Card } from "@/refresh-components/cards";
import { ContentAction } from "@opal/layouts";

export interface FederatedConnectorOAuthStatus {
  federated_connector_id: number;
  source: string;
  name: string;
  has_oauth_token: boolean;
  oauth_token_expires_at?: string;
  authorize_url?: string;
}

const MAX_SKIP_COUNT = 2;

function useFederatedOauthModal() {
  // Check localStorage for previous skip preference and count
  const [oAuthModalState, setOAuthModalState] = useState<{
    hidden: boolean;
    skipCount: number;
  }>(() => {
    if (typeof window !== "undefined") {
      const skipData = localStorage.getItem("federatedOAuthModalSkipData");
      if (skipData) {
        try {
          const parsed = JSON.parse(skipData);
          // Check if we're still within the hide duration (1 hour)
          const now = Date.now();
          const hideUntil = parsed.hideUntil || 0;
          const isWithinHideDuration = now < hideUntil;

          return {
            hidden: parsed.permanentlyHidden || isWithinHideDuration,
            skipCount: parsed.skipCount || 0,
          };
        } catch {
          return { hidden: false, skipCount: 0 };
        }
      }
    }
    return { hidden: false, skipCount: 0 };
  });

  const handleOAuthModalSkip = () => {
    if (typeof window !== "undefined") {
      const newSkipCount = oAuthModalState.skipCount + 1;

      if (newSkipCount >= MAX_SKIP_COUNT) {
        // Permanently hide the modal after max skips
        const skipData = {
          skipCount: newSkipCount,
          hideUntil: 0,
          permanentlyHidden: true,
        };

        localStorage.setItem(
          "federatedOAuthModalSkipData",
          JSON.stringify(skipData)
        );

        setOAuthModalState({
          hidden: true,
          skipCount: newSkipCount,
        });
      } else {
        // Hide for 1 hour after first skip
        const oneHourFromNow = Date.now() + 60 * 60 * 1000;

        const skipData = {
          skipCount: newSkipCount,
          hideUntil: oneHourFromNow,
          permanentlyHidden: false,
        };

        localStorage.setItem(
          "federatedOAuthModalSkipData",
          JSON.stringify(skipData)
        );

        setOAuthModalState({
          hidden: true,
          skipCount: newSkipCount,
        });
      }
    }
  };

  return {
    oAuthModalState,
    handleOAuthModalSkip,
  };
}

export default function FederatedOAuthModal() {
  const settings = useContext(SettingsContext);

  const {
    oAuthModalState: { hidden },
    handleOAuthModalSkip,
  } = useFederatedOauthModal();

  const { connectors: federatedConnectors, hasUnauthenticatedConnectors } =
    useFederatedOAuthStatus();

  const needsAuth = federatedConnectors.filter((c) => !c.has_oauth_token);

  if (needsAuth.length === 0 || hidden || !hasUnauthenticatedConnectors) {
    return null;
  }

  const applicationName =
    settings?.enterpriseSettings?.application_name || "Onyx";

  return (
    <Modal open>
      <Modal.Content width="sm" height="sm">
        <Modal.Header
          icon={SvgLink}
          title="Connect Your Apps"
          description={`Improve answer quality by letting ${applicationName} search all your connected data.`}
        />
        <Modal.Body>
          {needsAuth.map((connector) => {
            const sourceMetadata = getSourceMetadata(
              connector.source as ValidSources
            );

            return (
              <Card key={connector.federated_connector_id}>
                <ContentAction
                  icon={sourceMetadata.icon}
                  title={sourceMetadata.displayName}
                  description={sourceMetadata.category}
                  sizePreset="main-content"
                  variant="section"
                  rightChildren={
                    <Button
                      prominence="secondary"
                      target="_blank"
                      href={connector.authorize_url}
                    >
                      Connect
                    </Button>
                  }
                />
              </Card>
            );
          })}
        </Modal.Body>
        <Modal.Footer>
          <Button onClick={handleOAuthModalSkip}>Skip for now</Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/components/chat/MCPApiKeyModal.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import { Input } from "@/components/ui/input";
import Label from "@/refresh-components/form/Label";
import Text from "@/refresh-components/texts/Text";
import { SvgAlertCircle, SvgEye, SvgEyeClosed, SvgKey } from "@opal/icons";
interface MCPAuthTemplate {
  headers: Array<{ name: string; value: string }>;
  request_body_params: Array<{ path: string; value: string }>;
  required_fields: string[];
}

interface MCPApiKeyModalProps {
  isOpen: boolean;
  onClose: () => void;
  serverName: string;
  serverId: number;
  authTemplate?: MCPAuthTemplate;
  onSubmit: (serverId: number, apiKey: string) => void;
  onSubmitCredentials?: (
    serverId: number,
    credentials: Record<string, string>
  ) => void;
  onSuccess?: () => void;
  isAuthenticated?: boolean;
  existingCredentials?: Record<string, string>;
}

export default function MCPApiKeyModal({
  isOpen,
  onClose,
  serverName,
  serverId,
  authTemplate,
  onSubmit,
  onSubmitCredentials,
  onSuccess,
  isAuthenticated = false,
  existingCredentials,
}: MCPApiKeyModalProps) {
  const [apiKey, setApiKey] = useState("");
  const [showApiKey, setShowApiKey] = useState(false);
  const [credentials, setCredentials] = useState<Record<string, string>>({});
  const [showCredentials, setShowCredentials] = useState<
    Record<string, boolean>
  >({});
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [error, setError] = useState<string | null>(null);

  const isTemplateMode =
    authTemplate && authTemplate.required_fields.length > 0;

  // Initialize form with existing credentials when modal opens
  useEffect(() => {
    if (isOpen && existingCredentials) {
      if (isTemplateMode) {
        // For template mode, set the credentials object
        setCredentials(existingCredentials);
      } else {
        // For legacy API key mode, set the api_key field
        const apiKeyValue = existingCredentials.api_key || "";
        setApiKey(apiKeyValue);
      }
    }
  }, [isOpen, existingCredentials, isTemplateMode]);

  const handleSubmit = async (e: React.FormEvent) => {
    e.preventDefault();
    setError(null); // Clear any previous errors

    if (isTemplateMode) {
      // Check all required fields are filled
      const hasAllFields = authTemplate!.required_fields.every(
        (field) => credentials[field]?.trim()
      );
      if (!hasAllFields) return;

      setIsSubmitting(true);
      try {
        if (onSubmitCredentials) {
          await onSubmitCredentials(serverId, credentials);
        }
        setCredentials({});
        if (onSuccess) {
          onSuccess();
        }
        onClose();
      } catch (error) {
        console.error("Error submitting credentials:", error);
        let errorMessage = "Failed to save credentials";
        if (error instanceof Error) {
          errorMessage = error.message;
        } else if (typeof error === "string") {
          errorMessage = error;
        }
        setError(errorMessage);
      } finally {
        setIsSubmitting(false);
      }
    } else {
      // Legacy API key mode
      if (!apiKey.trim()) return;

      setIsSubmitting(true);
      try {
        await onSubmit(serverId, apiKey);
        setApiKey("");
        if (onSuccess) {
          onSuccess();
        }
        onClose();
      } catch (error) {
        console.error("Error submitting API key:", error);
        let errorMessage = "Failed to save API key";
        if (error instanceof Error) {
          errorMessage = error.message;
        } else if (typeof error === "string") {
          errorMessage = error;
        }
        setError(errorMessage);
      } finally {
        setIsSubmitting(false);
      }
    }
  };

  const handleClose = () => {
    setApiKey("");
    setShowApiKey(false);
    setCredentials({});
    setShowCredentials({});
    setError(null);
    onClose();
  };

  const toggleCredentialVisibility = (field: string) => {
    setShowCredentials((prev) => ({
      ...prev,
      [field]: !prev[field],
    }));
  };

  const updateCredential = (field: string, value: string) => {
    setCredentials((prev) => ({
      ...prev,
      [field]: value,
    }));
  };

  const credsType = isTemplateMode ? "Credentials" : "API Key";
  return (
    <Modal open={isOpen} onOpenChange={handleClose}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header
          icon={SvgKey}
          title={isAuthenticated ? `Manage ${credsType}` : `Enter ${credsType}`}
          onClose={handleClose}
        />
        <Modal.Body>
          <Text as="p">
            {isAuthenticated
              ? `Update your ${credsType} for ${serverName}.`
              : `Enter your ${credsType} for ${serverName} to enable authentication.`}
          </Text>
          <Text as="p" text02>
            {isAuthenticated
              ? "Changes will be validated against the server before being saved."
              : `Your ${credsType} will be validated against the server and stored securely.`}
          </Text>

          {error && (
            <div className="flex items-center space-x-2 p-3 bg-red-50 border border-red-200 rounded-md text-red-800 text-sm">
              <SvgAlertCircle className="h-4 w-4 flex-shrink-0" />
              <span>{error}</span>
            </div>
          )}

          <form onSubmit={handleSubmit} className="space-y-4">
            {isTemplateMode ? (
              // Template-based credential fields
              <div className="space-y-4">
                {authTemplate!.required_fields.map((field) => (
                  <div key={field} className="space-y-2">
                    <Label name={field}>
                      <Text>
                        {field
                          .replace(/_/g, " ")
                          .replace(/\b\w/g, (l) => l.toUpperCase())}
                      </Text>
                    </Label>
                    <div className="relative">
                      <Input
                        id={field}
                        type={showCredentials[field] ? "text" : "password"}
                        value={credentials[field] || ""}
                        onChange={(e) =>
                          updateCredential(field, e.target.value)
                        }
                        placeholder={`Enter your ${field.replace(/_/g, " ")}`}
                        className="pr-10"
                        required
                      />
                      <button
                        type="button"
                        onClick={() => toggleCredentialVisibility(field)}
                        className="absolute right-3 top-1/2 -translate-y-1/2 text-subtle hover:text-emphasis"
                      >
                        {showCredentials[field] ? (
                          <SvgEyeClosed className="h-4 w-4" />
                        ) : (
                          <SvgEye className="h-4 w-4" />
                        )}
                      </button>
                    </div>
                  </div>
                ))}
              </div>
            ) : (
              // Legacy API key field
              <div className="space-y-2">
                <Label name="apiKey">
                  <Text>{credsType}</Text>
                </Label>
                <div className="relative">
                  <Input
                    id="apiKey"
                    type={showApiKey ? "text" : "password"}
                    value={apiKey}
                    onChange={(e) => setApiKey(e.target.value)}
                    placeholder={`Enter your ${credsType}`}
                    className="pr-10"
                    required
                  />
                  <button
                    type="button"
                    onClick={() => setShowApiKey(!showApiKey)}
                    className="absolute right-3 top-1/2 -translate-y-1/2 text-subtle hover:text-emphasis"
                  >
                    {showApiKey ? (
                      <SvgEyeClosed className="h-4 w-4" />
                    ) : (
                      <SvgEye className="h-4 w-4" />
                    )}
                  </button>
                </div>
              </div>
            )}

            <div className="flex justify-end space-x-2 pt-4">
              <Button
                disabled={isSubmitting}
                prominence="secondary"
                onClick={handleClose}
              >
                Cancel
              </Button>
              <Button
                disabled={
                  isSubmitting ||
                  (isTemplateMode
                    ? !authTemplate!.required_fields.every(
                        (field) => credentials[field]?.trim()
                      )
                    : !apiKey.trim())
                }
                type="submit"
              >
                {isSubmitting
                  ? "Saving..."
                  : isAuthenticated
                    ? `Update ${credsType}`
                    : `Save ${credsType}`}
              </Button>
            </div>
          </form>
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/components/chat/MinimalMarkdown.test.tsx
================================================
import { render, screen } from "@testing-library/react";
import "@testing-library/jest-dom";
import MinimalMarkdown from "./MinimalMarkdown";

describe("MinimalMarkdown", () => {
  describe("Link handling", () => {
    test("converts bare email markdown links to mailto links", () => {
      render(
        <MinimalMarkdown content="[support@anthropic.com](support@anthropic.com)" />
      );

      const link = screen.getByText("support@anthropic.com").closest("a");
      expect(link).toHaveAttribute("href", "mailto:support@anthropic.com");
    });

    test("preserves explicit mailto links", () => {
      render(
        <MinimalMarkdown content="[support@anthropic.com](mailto:support@anthropic.com)" />
      );

      const link = screen.getByText("support@anthropic.com").closest("a");
      expect(link).toHaveAttribute("href", "mailto:support@anthropic.com");
    });

    test("does not restore hrefs removed by url sanitization", () => {
      render(<MinimalMarkdown content="[click](javascript:alert(1))" />);

      const link = screen.getByText("click").closest("a");
      expect(link).not.toHaveAttribute("href");
    });
  });
});


================================================
FILE: web/src/components/chat/MinimalMarkdown.tsx
================================================
import { CodeBlock } from "@/app/app/message/CodeBlock";
import { extractCodeText } from "@/app/app/message/codeUtils";
import {
  MemoizedLink,
  MemoizedParagraph,
} from "@/app/app/message/MemoizedTextComponents";
import { useMemo, CSSProperties } from "react";
import ReactMarkdown, { type Components } from "react-markdown";
import remarkGfm from "remark-gfm";
import rehypeHighlight from "rehype-highlight";
import remarkMath from "remark-math";
import rehypeKatex from "rehype-katex";
import "katex/dist/katex.min.css";
import { cn, transformLinkUri } from "@/lib/utils";

type MinimalMarkdownComponentOverrides = Partial<Components>;

interface MinimalMarkdownProps {
  content: string;
  className?: string;
  showHeader?: boolean;
  /**
   * Override specific markdown renderers.
   * Any renderer not provided will fall back to this component's defaults.
   */
  components?: MinimalMarkdownComponentOverrides;
}

export default function MinimalMarkdown({
  content,
  className = "",
  showHeader = true,
  components,
}: MinimalMarkdownProps) {
  const markdownComponents = useMemo(() => {
    const defaults: Components = {
      a: MemoizedLink,
      p: MemoizedParagraph,
      pre: ({ node, className, children }: any) => {
        // Don't render the pre wrapper - CodeBlock handles its own wrapper
        return <>{children}</>;
      },
      code: ({ node, inline, className, children, ...props }: any) => {
        const codeText = extractCodeText(node, content, children);
        return (
          <CodeBlock
            className={className}
            codeText={codeText}
            showHeader={showHeader}
          >
            {children}
          </CodeBlock>
        );
      },
    };

    return {
      ...defaults,
      ...(components ?? {}),
    } satisfies Components;
  }, [content, components, showHeader]);

  return (
    <ReactMarkdown
      className={cn(
        "prose dark:prose-invert max-w-full text-sm break-words",
        className
      )}
      components={markdownComponents}
      rehypePlugins={[rehypeHighlight, rehypeKatex]}
      remarkPlugins={[remarkGfm, [remarkMath, { singleDollarTextMath: false }]]}
      urlTransform={transformLinkUri}
    >
      {content}
    </ReactMarkdown>
  );
}


================================================
FILE: web/src/components/chat/ProviderContext.tsx
================================================
"use client";
import {
  WellKnownLLMProviderDescriptor,
  LLMProviderDescriptor,
} from "@/interfaces/llm";
import React, {
  createContext,
  useContext,
  useState,
  useEffect,
  useCallback,
} from "react";
import { useUser } from "@/providers/UserProvider";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import { useLLMProviderOptions } from "@/lib/hooks/useLLMProviderOptions";
import { testDefaultProvider as testDefaultProviderSvc } from "@/lib/llmConfig/svc";

interface ProviderContextType {
  shouldShowConfigurationNeeded: boolean;
  providerOptions: WellKnownLLMProviderDescriptor[];
  refreshProviderInfo: () => Promise<void>;
  // Expose configured provider instances for components that need it (e.g., onboarding)
  llmProviders: LLMProviderDescriptor[] | undefined;
  isLoadingProviders: boolean;
  hasProviders: boolean;
}

const ProviderContext = createContext<ProviderContextType | undefined>(
  undefined
);

const DEFAULT_LLM_PROVIDER_TEST_COMPLETE_KEY = "defaultLlmProviderTestComplete";

function checkDefaultLLMProviderTestComplete() {
  if (typeof window === "undefined") return true;
  return (
    localStorage.getItem(DEFAULT_LLM_PROVIDER_TEST_COMPLETE_KEY) === "true"
  );
}

function setDefaultLLMProviderTestComplete() {
  if (typeof window === "undefined") return;
  localStorage.setItem(DEFAULT_LLM_PROVIDER_TEST_COMPLETE_KEY, "true");
}

export function ProviderContextProvider({
  children,
}: {
  children: React.ReactNode;
}) {
  const { user } = useUser();

  // Use SWR hooks instead of raw fetch
  const {
    llmProviders,
    isLoading: isLoadingProviders,
    refetch: refetchProviders,
  } = useLLMProviders();
  const { llmProviderOptions: providerOptions, refetch: refetchOptions } =
    useLLMProviderOptions();

  const [defaultCheckSuccessful, setDefaultCheckSuccessful] =
    useState<boolean>(true);

  // Test the default provider - only runs if test hasn't passed yet
  const testDefaultProvider = useCallback(async () => {
    const shouldCheck =
      !checkDefaultLLMProviderTestComplete() &&
      (!user || user.role === "admin");

    if (shouldCheck) {
      const success = await testDefaultProviderSvc();
      setDefaultCheckSuccessful(success);
      if (success) {
        setDefaultLLMProviderTestComplete();
      }
    }
  }, [user]);

  // Test default provider on mount
  useEffect(() => {
    testDefaultProvider();
  }, [testDefaultProvider]);

  const hasProviders = (llmProviders?.length ?? 0) > 0;
  const validProviderExists = hasProviders && defaultCheckSuccessful;

  const shouldShowConfigurationNeeded =
    !validProviderExists && (providerOptions?.length ?? 0) > 0;

  const refreshProviderInfo = useCallback(async () => {
    // Refetch provider lists and re-test default provider if needed
    await Promise.all([
      refetchProviders(),
      refetchOptions(),
      testDefaultProvider(),
    ]);
  }, [refetchProviders, refetchOptions, testDefaultProvider]);

  return (
    <ProviderContext.Provider
      value={{
        shouldShowConfigurationNeeded,
        providerOptions: providerOptions ?? [],
        refreshProviderInfo,
        llmProviders,
        isLoadingProviders,
        hasProviders,
      }}
    >
      {children}
    </ProviderContext.Provider>
  );
}

export function useProviderStatus() {
  const context = useContext(ProviderContext);
  if (context === undefined) {
    throw new Error(
      "useProviderStatus must be used within a ProviderContextProvider"
    );
  }
  return context;
}


================================================
FILE: web/src/components/chat/ScrollContainerContext.tsx
================================================
"use client";

import React, {
  createContext,
  useContext,
  useMemo,
  RefObject,
  MutableRefObject,
} from "react";

interface ScrollContainerContextType {
  scrollContainerRef: RefObject<HTMLDivElement | null>;
  contentWrapperRef: RefObject<HTMLDivElement | null>;
  /** Shared ref for the DynamicBottomSpacer's current height (written by spacer, read by scroll container). */
  spacerHeightRef: MutableRefObject<number>;
}

const ScrollContainerContext = createContext<
  ScrollContainerContextType | undefined
>(undefined);

export function ScrollContainerProvider({
  children,
  scrollContainerRef,
  contentWrapperRef,
  spacerHeightRef,
}: {
  children: React.ReactNode;
  scrollContainerRef: RefObject<HTMLDivElement | null>;
  contentWrapperRef: RefObject<HTMLDivElement | null>;
  spacerHeightRef: MutableRefObject<number>;
}) {
  // Memoize context value to prevent unnecessary re-renders of consumers.
  // The refs themselves are stable, but without memoization, a new object
  // would be created on every parent re-render.
  const value = useMemo(
    () => ({ scrollContainerRef, contentWrapperRef, spacerHeightRef }),
    [scrollContainerRef, contentWrapperRef, spacerHeightRef]
  );

  return (
    <ScrollContainerContext.Provider value={value}>
      {children}
    </ScrollContainerContext.Provider>
  );
}

/**
 * Hook to access the scroll container and content wrapper refs.
 * Must be used within a ScrollContainerProvider (inside ChatScrollContainer).
 */
export function useScrollContainer() {
  const context = useContext(ScrollContainerContext);
  if (context === undefined) {
    throw new Error(
      "useScrollContainer must be used within a ScrollContainerProvider"
    );
  }
  return context;
}


================================================
FILE: web/src/components/context/EmbeddingContext.tsx
================================================
import React, {
  createContext,
  useState,
  useContext,
  ReactNode,
  useEffect,
} from "react";
import { usePathname, useRouter, useSearchParams } from "next/navigation";
import type { Route } from "next";

interface EmbeddingFormContextType {
  formStep: number;
  formValues: Record<string, any>;
  setFormValues: (values: Record<string, any>) => void;
  nextFormStep: (contract?: string) => void;
  prevFormStep: () => void;
  formStepToLast: () => void;
  setFormStep: React.Dispatch<React.SetStateAction<number>>;
  allowAdvanced: boolean;
  setAllowAdvanced: React.Dispatch<React.SetStateAction<boolean>>;
  allowCreate: boolean;
  setAllowCreate: React.Dispatch<React.SetStateAction<boolean>>;
}

const EmbeddingFormContext = createContext<
  EmbeddingFormContextType | undefined
>(undefined);

export const EmbeddingFormProvider: React.FC<{
  children: ReactNode;
}> = ({ children }) => {
  const router = useRouter();
  const searchParams = useSearchParams();
  const pathname = usePathname();

  // Initialize formStep based on the URL parameter
  const stepFromUrl = parseInt(searchParams?.get("step") || "0", 10);
  const [formStep, setFormStep] = useState(stepFromUrl);
  const [formValues, setFormValues] = useState<Record<string, any>>({});

  const [allowAdvanced, setAllowAdvanced] = useState(false);
  const [allowCreate, setAllowCreate] = useState(false);

  const nextFormStep = (values = "") => {
    setFormStep((prevStep) => prevStep + 1);
    setFormValues((prevValues) => ({ ...prevValues, values }));
  };

  const prevFormStep = () => {
    setFormStep((currentStep) => Math.max(currentStep - 1, 0));
  };

  const formStepToLast = () => {
    setFormStep(2);
  };

  useEffect(() => {
    // Update URL when formStep changes
    const updatedSearchParams = new URLSearchParams(
      searchParams?.toString() || ""
    );
    const existingStep = updatedSearchParams?.get("step");
    updatedSearchParams.set("step", formStep.toString());
    const newUrl = `${pathname}?${updatedSearchParams.toString()}`;

    if (!existingStep) {
      router.replace(newUrl as Route);
    } else if (newUrl !== pathname) {
      router.push(newUrl as Route);
    }
  }, [formStep, router, pathname]);

  // Update formStep when URL changes
  useEffect(() => {
    if (stepFromUrl !== formStep) {
      setFormStep(stepFromUrl);
    }
  }, [stepFromUrl]);

  const contextValue: EmbeddingFormContextType = {
    formStep,
    formValues,
    setFormValues: (values) =>
      setFormValues((prevValues) => ({ ...prevValues, ...values })),
    nextFormStep,
    prevFormStep,
    formStepToLast,
    setFormStep,
    allowAdvanced,
    setAllowAdvanced,
    allowCreate,
    setAllowCreate: setAllowCreate,
  };

  return (
    <EmbeddingFormContext.Provider value={contextValue}>
      {children}
    </EmbeddingFormContext.Provider>
  );
};

export const useEmbeddingFormContext = () => {
  const context = useContext(EmbeddingFormContext);
  if (context === undefined) {
    throw new Error(
      "useEmbeddingFormContext must be used within a FormProvider"
    );
  }
  return context;
};


================================================
FILE: web/src/components/context/FormContext.tsx
================================================
import React, {
  createContext,
  useState,
  useContext,
  ReactNode,
  useEffect,
} from "react";
import { usePathname, useRouter, useSearchParams } from "next/navigation";
import type { Route } from "next";
import { ValidSources } from "@/lib/types";

interface FormContextType {
  formStep: number;
  formValues: Record<string, any>;
  setFormValues: (values: Record<string, any>) => void;
  nextFormStep: (contract?: string) => void;
  prevFormStep: () => void;
  formStepToLast: () => void;
  connector: ValidSources;
  setFormStep: React.Dispatch<React.SetStateAction<number>>;
  allowAdvanced: boolean;
  setAllowAdvanced: React.Dispatch<React.SetStateAction<boolean>>;
  allowCreate: boolean;
  setAllowCreate: React.Dispatch<React.SetStateAction<boolean>>;
}

const FormContext = createContext<FormContextType | undefined>(undefined);

// TODO: deprecate this
export const FormProvider: React.FC<{
  children: ReactNode;
  connector: ValidSources;
}> = ({ children, connector }) => {
  const router = useRouter();
  const searchParams = useSearchParams();
  const pathname = usePathname();

  // Initialize formStep based on the URL parameter
  const formStepFromUrlParams = parseInt(searchParams?.get("step") || "0", 10);
  const [formStep, setFormStep] = useState(formStepFromUrlParams);
  const [formValues, setFormValues] = useState<Record<string, any>>({});

  const [allowAdvanced, setAllowAdvanced] = useState(false);
  const [allowCreate, setAllowCreate] = useState(false);

  const nextFormStep = (values = "") => {
    setFormStep((prevStep) => prevStep + 1);
    setFormValues((prevValues) => ({ ...prevValues, values }));
  };

  const prevFormStep = () => {
    setFormStep((currentStep) => Math.max(currentStep - 1, 0));
  };

  const formStepToLast = () => {
    setFormStep(2);
  };

  useEffect(() => {
    // Update URL when formStep changes
    const updatedSearchParams = new URLSearchParams(
      searchParams?.toString() || ""
    );
    updatedSearchParams.set("step", formStep.toString());
    const newUrl = `${pathname}?${updatedSearchParams.toString()}`;

    if (!formStepFromUrlParams) {
      router.replace(newUrl as Route);
    } else if (newUrl !== pathname) {
      router.push(newUrl as Route);
    }
  }, [formStep, router, pathname, formStepFromUrlParams]);

  useEffect(() => {
    if (formStepFromUrlParams !== formStep) {
      setFormStep(formStepFromUrlParams);
    }
  }, [formStepFromUrlParams]);

  const contextValue: FormContextType = {
    formStep,
    formValues,
    setFormValues: (values) =>
      setFormValues((prevValues) => ({ ...prevValues, ...values })),
    nextFormStep,
    prevFormStep,
    formStepToLast,
    setFormStep,
    connector,
    allowAdvanced,
    setAllowAdvanced,
    allowCreate,
    setAllowCreate,
  };

  return (
    <FormContext.Provider value={contextValue}>{children}</FormContext.Provider>
  );
};

export const useFormContext = () => {
  const context = useContext(FormContext);
  if (context === undefined) {
    throw new Error("useFormContext must be used within a FormProvider");
  }
  return context;
};


================================================
FILE: web/src/components/context/ModalContext.tsx
================================================
"use client";

import React, { createContext, useContext, useEffect, useState } from "react";
import NewTeamModal from "@/components/modals/NewTeamModal";
import NewTenantModal from "@/sections/modals/NewTenantModal";
import { NewTenantInfo } from "@/lib/types";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { useUser } from "@/providers/UserProvider";

type ModalContextType = {
  showNewTeamModal: boolean;
  setShowNewTeamModal: (show: boolean) => void;
  newTenantInfo: NewTenantInfo | null;
  setNewTenantInfo: (info: NewTenantInfo | null) => void;
  invitationInfo: NewTenantInfo | null;
  setInvitationInfo: (info: NewTenantInfo | null) => void;
};

const ModalContext = createContext<ModalContextType | undefined>(undefined);

export const useModalContext = () => {
  const context = useContext(ModalContext);
  if (context === undefined) {
    throw new Error("useModalContext must be used within a ModalProvider");
  }
  return context;
};

export const ModalProvider: React.FC<{
  children: React.ReactNode;
}> = ({ children }) => {
  const { user } = useUser();
  const [showNewTeamModal, setShowNewTeamModal] = useState(false);
  const [newTenantInfo, setNewTenantInfo] = useState<NewTenantInfo | null>(
    null
  );
  const [invitationInfo, setInvitationInfo] = useState<NewTenantInfo | null>(
    null
  );

  // Sync modal states with user info — clear when backend no longer has the data
  useEffect(() => {
    if (user?.tenant_info?.new_tenant) {
      setNewTenantInfo(user.tenant_info.new_tenant);
    } else {
      setNewTenantInfo(null);
    }
    if (user?.tenant_info?.invitation) {
      setInvitationInfo(user.tenant_info.invitation);
    } else {
      setInvitationInfo(null);
    }
  }, [user?.tenant_info]);

  // Render all application-wide modals
  const renderModals = () => {
    if (!user || !NEXT_PUBLIC_CLOUD_ENABLED) return <></>;

    return (
      <>
        {/* Modal for users to request to join an existing team */}
        <NewTeamModal />

        {/* Modal for users who've been accepted to a new team */}
        {newTenantInfo && (
          <NewTenantModal
            tenantInfo={newTenantInfo}
            // Close function to clear the modal state
            onClose={() => setNewTenantInfo(null)}
          />
        )}

        {/* Modal for users who've been invited to join a team */}
        {invitationInfo && (
          <NewTenantModal
            isInvite={true}
            tenantInfo={invitationInfo}
            // Close function to clear the modal state
            onClose={() => setInvitationInfo(null)}
          />
        )}
      </>
    );
  };

  return (
    <ModalContext.Provider
      value={{
        showNewTeamModal,
        setShowNewTeamModal,
        newTenantInfo,
        setNewTenantInfo,
        invitationInfo,
        setInvitationInfo,
      }}
    >
      {children}
      {renderModals()}
    </ModalContext.Provider>
  );
};


================================================
FILE: web/src/components/context/NRFPreferencesContext.tsx
================================================
"use client";

import React, { createContext, useContext, useState } from "react";
import { LocalStorageKeys } from "@/lib/extension/constants";

interface NRFPreferencesContextValue {
  useOnyxAsNewTab: boolean;
  setUseOnyxAsNewTab: (v: boolean) => void;
}

const NRFPreferencesContext = createContext<
  NRFPreferencesContextValue | undefined
>(undefined);

function useLocalStorageState<T>(
  key: string,
  defaultValue: T
): [T, (value: T) => void] {
  const [state, setState] = useState<T>(() => {
    if (typeof window !== "undefined") {
      const storedValue = localStorage.getItem(key);
      return storedValue ? JSON.parse(storedValue) : defaultValue;
    }
    return defaultValue;
  });

  const setValue = (value: T) => {
    setState(value);
    if (typeof window !== "undefined") {
      localStorage.setItem(key, JSON.stringify(value));
    }
  };

  return [state, setValue];
}

export function NRFPreferencesProvider({
  children,
}: {
  children: React.ReactNode;
}) {
  const [useOnyxAsNewTab, setUseOnyxAsNewTab] = useLocalStorageState<boolean>(
    LocalStorageKeys.USE_ONYX_AS_NEW_TAB,
    true
  );

  return (
    <NRFPreferencesContext.Provider
      value={{
        useOnyxAsNewTab,
        setUseOnyxAsNewTab,
      }}
    >
      {children}
    </NRFPreferencesContext.Provider>
  );
}

export function useNRFPreferences() {
  const context = useContext(NRFPreferencesContext);
  if (!context) {
    throw new Error(
      "useNRFPreferences must be used within an NRFPreferencesProvider"
    );
  }
  return context;
}


================================================
FILE: web/src/components/credentials/CredentialFields.tsx
================================================
import { JSX } from "react";

export default function CredentialSubText({
  children,
}: {
  children: JSX.Element | string;
}) {
  return (
    <p className="text-sm mb-2 whitespace-break-spaces text-text-500">
      {children}
    </p>
  );
}


================================================
FILE: web/src/components/credentials/CredentialSection.tsx
================================================
"use client";

import { AccessType, ValidSources } from "@/lib/types";
import useSWR, { mutate } from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { FaKey } from "react-icons/fa";
import { useState } from "react";
import { FiEdit2 } from "react-icons/fi";
import {
  deleteCredential,
  swapCredential,
  updateCredential,
  updateCredentialWithPrivateKey,
} from "@/lib/credential";
import { toast } from "@/hooks/useToast";
import CreateCredential from "./actions/CreateCredential";
import { CCPairFullInfo } from "@/app/admin/connector/[ccPairId]/types";
import ModifyCredential from "./actions/ModifyCredential";
import { Text } from "@opal/components";
import {
  buildCCPairInfoUrl,
  buildSimilarCredentialInfoURL,
} from "@/app/admin/connector/[ccPairId]/lib";
import Modal from "@/refresh-components/Modal";
import EditCredential from "./actions/EditCredential";
import { getSourceDisplayName } from "@/lib/sources";
import {
  ConfluenceCredentialJson,
  Credential,
} from "@/lib/connectors/credentials";
import {
  getConnectorOauthRedirectUrl,
  useOAuthDetails,
} from "@/lib/connectors/oauth";
import { Spinner } from "@/components/Spinner";
import { CreateStdOAuthCredential } from "@/components/credentials/actions/CreateStdOAuthCredential";
import { Card } from "../ui/card";
import { isTypedFileField, TypedFile } from "@/lib/connectors/fileTypes";
import { SvgEdit, SvgKey } from "@opal/icons";

export interface CredentialSectionProps {
  ccPair: CCPairFullInfo;
  sourceType: ValidSources;
  refresh: () => void;
}

export default function CredentialSection({
  ccPair,
  sourceType,
  refresh,
}: CredentialSectionProps) {
  const { data: credentials } = useSWR<Credential<ConfluenceCredentialJson>[]>(
    buildSimilarCredentialInfoURL(sourceType),
    errorHandlingFetcher,
    { refreshInterval: 5000 } // 5 seconds
  );
  const { data: editableCredentials } = useSWR<Credential<any>[]>(
    buildSimilarCredentialInfoURL(sourceType, true),
    errorHandlingFetcher,
    { refreshInterval: 5000 }
  );
  const { data: oauthDetails, isLoading: oauthDetailsLoading } =
    useOAuthDetails(sourceType);

  const makeShowCreateCredential = async () => {
    if (oauthDetailsLoading || !oauthDetails) {
      return;
    }

    if (oauthDetails.oauth_enabled) {
      if (oauthDetails.additional_kwargs.length > 0) {
        setShowCreateCredential(true);
      } else {
        const redirectUrl = await getConnectorOauthRedirectUrl(sourceType, {});
        if (redirectUrl) {
          window.location.href = redirectUrl;
        }
      }
    } else {
      setShowModifyCredential(false);
      setShowCreateCredential(true);
    }
  };

  const onSwap = async (
    selectedCredential: Credential<any>,
    connectorId: number,
    accessType: AccessType
  ) => {
    const response = await swapCredential(
      selectedCredential.id,
      connectorId,
      accessType
    );
    if (response.ok) {
      mutate(buildSimilarCredentialInfoURL(sourceType));
      refresh();

      toast.success("Swapped credential successfully!");
    } else {
      const errorData = await response.json();
      toast.error(
        `Issue swapping credential: ${
          errorData.detail || errorData.message || "Unknown error"
        }`
      );
    }
  };

  const onUpdateCredential = async (
    selectedCredential: Credential<any | null>,
    details: any,
    onSucces: () => void
  ) => {
    let privateKey: TypedFile | null = null;
    Object.entries(details).forEach(([key, value]) => {
      if (isTypedFileField(key)) {
        privateKey = value as TypedFile;
        delete details[key];
      }
    });
    let response;
    if (privateKey) {
      response = await updateCredentialWithPrivateKey(
        selectedCredential.id,
        details,
        privateKey
      );
    } else {
      response = await updateCredential(selectedCredential.id, details);
    }
    if (response.ok) {
      toast.success("Updated credential");
      onSucces();
    } else {
      toast.error("Issue updating credential");
    }
  };

  const onEditCredential = (credential: Credential<any>) => {
    closeModifyCredential();
    setEditingCredential(credential);
  };

  const onDeleteCredential = async (credential: Credential<any | null>) => {
    await deleteCredential(credential.id, true);
    mutate(buildCCPairInfoUrl(ccPair.id));
  };
  const defaultedCredential = ccPair.credential;

  const [showModifyCredential, setShowModifyCredential] = useState(false);
  const [showCreateCredential, setShowCreateCredential] = useState(false);
  const [editingCredential, setEditingCredential] =
    useState<Credential<any> | null>(null);

  const closeModifyCredential = () => {
    setShowModifyCredential(false);
  };

  const closeCreateCredential = () => {
    setShowCreateCredential(false);
  };

  const closeEditingCredential = () => {
    setEditingCredential(null);
    setShowModifyCredential(true);
  };
  if (!credentials || !editableCredentials) {
    return <></>;
  }

  return (
    <div
      className="flex
      flex-col
      gap-y-4
      rounded-lg
      bg-background"
    >
      <Card className="p-6">
        <div className="flex items-center">
          <div className="flex-shrink-0 mr-3">
            <FaKey className="h-4 w-4 text-muted-foreground" />
          </div>
          <div className="flex-grow flex flex-col justify-center">
            <div className="flex items-center justify-between">
              <div>
                <Text as="p">
                  {ccPair.credential.name ||
                    `Credential #${ccPair.credential.id}`}
                </Text>
                <div className="text-xs text-muted-foreground/70">
                  Created{" "}
                  <i>
                    {new Date(
                      ccPair.credential.time_created
                    ).toLocaleDateString(undefined, {
                      year: "numeric",
                      month: "short",
                      day: "numeric",
                    })}
                  </i>
                  {ccPair.credential.user_email && (
                    <>
                      {" "}
                      by <i>{ccPair.credential.user_email}</i>
                    </>
                  )}
                </div>
              </div>
              <button
                onClick={() => setShowModifyCredential(true)}
                className="inline-flex
                  items-center
                  justify-center
                  p-2
                  rounded-md
                  text-muted-foreground
                  hover:bg-accent
                  hover:text-accent-foreground
                  transition-colors"
              >
                <FiEdit2 className="h-4 w-4" />
                <span className="sr-only">Update Credentials</span>
              </button>
            </div>
          </div>
        </div>
      </Card>

      {showModifyCredential && (
        <Modal open onOpenChange={closeModifyCredential}>
          <Modal.Content>
            <Modal.Header
              icon={SvgEdit}
              title="Update Credentials"
              onClose={closeModifyCredential}
            />
            <Modal.Body>
              <ModifyCredential
                close={closeModifyCredential}
                accessType={ccPair.access_type}
                attachedConnector={ccPair.connector}
                defaultedCredential={defaultedCredential}
                credentials={credentials}
                editableCredentials={editableCredentials}
                onDeleteCredential={onDeleteCredential}
                onEditCredential={(credential: Credential<any>) =>
                  onEditCredential(credential)
                }
                onSwap={onSwap}
                onCreateNew={() => makeShowCreateCredential()}
              />
            </Modal.Body>
          </Modal.Content>
        </Modal>
      )}

      {editingCredential && (
        <Modal open onOpenChange={closeEditingCredential}>
          <Modal.Content>
            <Modal.Header
              icon={SvgEdit}
              title="Edit Credential"
              onClose={closeEditingCredential}
            />
            <Modal.Body>
              <EditCredential
                onUpdate={onUpdateCredential}
                credential={editingCredential}
                onClose={closeEditingCredential}
              />
            </Modal.Body>
          </Modal.Content>
        </Modal>
      )}

      {showCreateCredential && (
        <Modal open onOpenChange={closeCreateCredential}>
          <Modal.Content>
            <Modal.Header
              icon={SvgKey}
              title={`Create ${getSourceDisplayName(sourceType)} Credential`}
              onClose={closeCreateCredential}
            />
            <Modal.Body>
              {oauthDetailsLoading ? (
                <Spinner />
              ) : (
                <>
                  {oauthDetails && oauthDetails.oauth_enabled ? (
                    <CreateStdOAuthCredential
                      sourceType={sourceType}
                      additionalFields={oauthDetails.additional_kwargs}
                    />
                  ) : (
                    <CreateCredential
                      sourceType={sourceType}
                      accessType={ccPair.access_type}
                      swapConnector={ccPair.connector}
                      onSwap={onSwap}
                      onClose={closeCreateCredential}
                    />
                  )}
                </>
              )}
            </Modal.Body>
          </Modal.Content>
        </Modal>
      )}
    </div>
  );
}


================================================
FILE: web/src/components/credentials/actions/CreateCredential.tsx
================================================
import { useState } from "react";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import { ValidSources, AccessType } from "@/lib/types";
import { FaAccusoft } from "react-icons/fa";
import { submitCredential } from "@/components/admin/connectors/CredentialForm";
import { TextFormField } from "@/components/Field";
import { Form, Formik, FormikHelpers } from "formik";
import { toast } from "@/hooks/useToast";
import GDriveMain from "@/app/admin/connectors/[connector]/pages/gdrive/GoogleDrivePage";
import { Connector } from "@/lib/connectors/connectors";
import { Credential, credentialTemplates } from "@/lib/connectors/credentials";
import { GmailMain } from "@/app/admin/connectors/[connector]/pages/gmail/GmailPage";
import { ActionType, dictionaryType } from "../types";
import { createValidationSchema } from "../lib";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle";
import {
  IsPublicGroupSelectorFormType,
  IsPublicGroupSelector,
} from "@/components/IsPublicGroupSelector";
import { useUser } from "@/providers/UserProvider";
import CardSection from "@/components/admin/CardSection";
import { CredentialFieldsRenderer } from "./CredentialFieldsRenderer";
import { TypedFile } from "@/lib/connectors/fileTypes";
import ConnectorDocsLink from "@/components/admin/connectors/ConnectorDocsLink";
import { SvgPlusCircle } from "@opal/icons";
const CreateButton = ({
  onClick,
  isSubmitting,
  isAdmin,
  groups,
}: {
  onClick: () => void;
  isSubmitting: boolean;
  isAdmin: boolean;
  groups: number[];
}) => (
  <OpalButton
    disabled={isSubmitting || (!isAdmin && groups.length === 0)}
    onClick={onClick}
    icon={SvgPlusCircle}
  >
    Create
  </OpalButton>
);

type formType = IsPublicGroupSelectorFormType & {
  name: string;
  [key: string]: any; // For additional credential fields
};

export default function CreateCredential({
  hideSource,
  sourceType,
  accessType,
  close,
  onClose = () => null,
  onSwitch,
  onSwap = async () => null,
  swapConnector,
  refresh = () => null,
}: {
  // Source information
  hideSource?: boolean; // hides docs link
  sourceType: ValidSources;
  accessType: AccessType;

  // Optional toggle- close section after selection?
  close?: boolean;

  // Special handlers
  onClose?: () => void;
  // Switch currently selected credential
  onSwitch?: (selectedCredential: Credential<any>) => Promise<void>;
  // Switch currently selected credential + link with connector
  onSwap?: (
    selectedCredential: Credential<any>,
    connectorId: number,
    accessType: AccessType
  ) => void;

  // For swapping credentials on selection
  swapConnector?: Connector<any>;

  // Mutating parent state
  refresh?: () => void;
}) {
  const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
  const [authMethod, setAuthMethod] = useState<string>();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  const { isAdmin } = useUser();

  const handleSubmit = async (
    values: formType,
    formikHelpers: FormikHelpers<formType>,
    action: ActionType
  ) => {
    const { setSubmitting, validateForm } = formikHelpers;

    const errors = await validateForm(values);
    if (Object.keys(errors).length > 0) {
      formikHelpers.setErrors(errors);
      return;
    }

    setSubmitting(true);
    formikHelpers.setSubmitting(true);

    const { name, is_public, groups, ...credentialValues } = values;

    let privateKey: TypedFile | null = null;
    const filteredCredentialValues = Object.fromEntries(
      Object.entries(credentialValues).filter(([key, value]) => {
        if (value instanceof TypedFile) {
          privateKey = value;
          return false;
        }
        return value !== null && value !== "";
      })
    );

    try {
      const response = await submitCredential({
        credential_json: filteredCredentialValues,
        admin_public: true,
        curator_public: is_public,
        groups: groups,
        name: name,
        source: sourceType,
        private_key: privateKey || undefined,
      });

      const { message, isSuccess, credential } = response;

      if (!credential) {
        throw new Error("No credential returned");
      }

      if (isSuccess && swapConnector) {
        if (action === "createAndSwap") {
          onSwap(credential, swapConnector.id, accessType);
        } else {
          toast.success("Created new credential!");
        }
        onClose();
      } else {
        if (isSuccess) {
          toast.success(message);
        } else {
          toast.error(message);
        }
      }

      if (close) {
        onClose();
      }
      await refresh();

      if (onSwitch) {
        onSwitch(response?.credential!);
      }
    } catch (error) {
      console.error("Error submitting credential:", error);
      toast.error("Error submitting credential");
    } finally {
      formikHelpers.setSubmitting(false);
    }
  };

  if (sourceType == "gmail") {
    return <GmailMain />;
  }

  if (sourceType == "google_drive") {
    return <GDriveMain />;
  }

  const credentialTemplate: dictionaryType = credentialTemplates[sourceType];
  const validationSchema = createValidationSchema(credentialTemplate);

  // Set initial auth method for templates with multiple auth methods
  const templateWithAuth = credentialTemplate as any;
  const initialAuthMethod =
    templateWithAuth?.authMethods?.[0]?.value || undefined;

  return (
    <Formik
      initialValues={
        {
          name: "",
          is_public: isAdmin || !isPaidEnterpriseFeaturesEnabled,
          groups: [],
          ...(initialAuthMethod && {
            authentication_method: initialAuthMethod,
          }),
        } as formType
      }
      validationSchema={validationSchema}
      onSubmit={() => {}} // This will be overridden by our custom submit handlers
    >
      {(formikProps) => {
        // Update authentication_method in formik when authMethod changes
        if (
          authMethod &&
          formikProps.values.authentication_method !== authMethod
        ) {
          formikProps.setFieldValue("authentication_method", authMethod);
        }

        return (
          <Form className="w-full flex items-stretch">
            {!hideSource && <ConnectorDocsLink sourceType={sourceType} />}
            <CardSection className="w-full items-start dark:bg-neutral-900 mt-4 flex flex-col gap-y-6">
              <TextFormField
                name="name"
                placeholder="(Optional) credential name.."
                label="Name:"
              />

              <CredentialFieldsRenderer
                credentialTemplate={credentialTemplate}
                authMethod={authMethod || initialAuthMethod}
                setAuthMethod={setAuthMethod}
              />

              {!swapConnector && (
                <div className="mt-4 flex w-full flex-col sm:flex-row justify-between items-end">
                  <div className="w-full sm:w-3/4 mb-4 sm:mb-0">
                    {isPaidEnterpriseFeaturesEnabled && (
                      <div className="flex flex-col items-start">
                        {isAdmin && (
                          <AdvancedOptionsToggle
                            showAdvancedOptions={showAdvancedOptions}
                            setShowAdvancedOptions={setShowAdvancedOptions}
                          />
                        )}
                        {(showAdvancedOptions || !isAdmin) && (
                          <IsPublicGroupSelector
                            formikProps={formikProps}
                            objectName="credential"
                            publicToWhom="Curators"
                          />
                        )}
                      </div>
                    )}
                  </div>
                  <CreateButton
                    onClick={() =>
                      handleSubmit(formikProps.values, formikProps, "create")
                    }
                    isSubmitting={formikProps.isSubmitting}
                    isAdmin={isAdmin}
                    groups={formikProps.values.groups}
                  />
                </div>
              )}
            </CardSection>
            {swapConnector && (
              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
              <Button
                className="bg-rose-500 hover:bg-rose-400"
                onClick={() =>
                  handleSubmit(formikProps.values, formikProps, "createAndSwap")
                }
                disabled={formikProps.isSubmitting}
                leftIcon={() => (
                  <FaAccusoft className="fill-text-inverted-05" />
                )}
              >
                Create
              </Button>
            )}
          </Form>
        );
      }}
    </Formik>
  );
}


================================================
FILE: web/src/components/credentials/actions/CreateStdOAuthCredential.tsx
================================================
import * as Yup from "yup";

import { Button } from "@opal/components";
import { ValidSources } from "@/lib/types";
import { TextFormField } from "@/components/Field";
import { Form, Formik, FormikHelpers } from "formik";
import CardSection from "@/components/admin/CardSection";
import { getConnectorOauthRedirectUrl } from "@/lib/connectors/oauth";
import { OAuthAdditionalKwargDescription } from "@/lib/connectors/credentials";

type formType = {
  [key: string]: any; // For additional credential fields
};

export function CreateStdOAuthCredential({
  sourceType,
  additionalFields,
}: {
  // Source information
  sourceType: ValidSources;

  additionalFields: OAuthAdditionalKwargDescription[];
}) {
  const handleSubmit = async (
    values: formType,
    formikHelpers: FormikHelpers<formType>
  ) => {
    const { setSubmitting, validateForm } = formikHelpers;

    const errors = await validateForm(values);
    if (Object.keys(errors).length > 0) {
      formikHelpers.setErrors(errors);
      return;
    }

    setSubmitting(true);
    formikHelpers.setSubmitting(true);

    const redirectUrl = await getConnectorOauthRedirectUrl(sourceType, values);

    if (!redirectUrl) {
      throw new Error("No redirect URL found for OAuth connector");
    }

    window.location.href = redirectUrl;
  };

  return (
    <Formik
      initialValues={
        {
          ...Object.fromEntries(additionalFields.map((field) => [field, ""])),
        } as formType
      }
      validationSchema={Yup.object().shape({
        ...Object.fromEntries(
          additionalFields.map((field) => [field.name, Yup.string().required()])
        ),
      })}
      onSubmit={(values, formikHelpers) => {
        handleSubmit(values, formikHelpers);
      }}
    >
      {() => (
        <Form className="w-full flex items-stretch">
          <CardSection className="w-full !border-0 mt-4 flex flex-col gap-y-6">
            {additionalFields.map((field) => (
              <TextFormField
                key={field.name}
                name={field.name}
                label={field.display_name}
                subtext={field.description}
                type="text"
              />
            ))}

            <div className="flex w-full">
              <Button type="submit">Create</Button>
            </div>
          </CardSection>
        </Form>
      )}
    </Formik>
  );
}


================================================
FILE: web/src/components/credentials/actions/CredentialFieldsRenderer.tsx
================================================
"use client";

import Tabs from "@/refresh-components/Tabs";
import { useFormikContext } from "formik";
import {
  BooleanFormField,
  TextFormField,
  TypedFileUploadFormField,
} from "@/components/Field";
import {
  getDisplayNameForCredentialKey,
  CredentialTemplateWithAuth,
} from "@/lib/connectors/credentials";
import { dictionaryType } from "../types";
import { isTypedFileField } from "@/lib/connectors/fileTypes";

interface CredentialFieldsRendererProps {
  credentialTemplate: dictionaryType;
  authMethod?: string;
  setAuthMethod?: (method: string) => void;
}

export function CredentialFieldsRenderer({
  credentialTemplate,
  authMethod,
  setAuthMethod,
}: CredentialFieldsRendererProps) {
  const templateWithAuth =
    credentialTemplate as CredentialTemplateWithAuth<any>;
  const { values, setValues } = useFormikContext<any>();

  // remove other auth‐method fields when switching
  const handleAuthMethodChange = (newMethod: string) => {
    // start from current form values
    const cleaned = { ...values, authentication_method: newMethod };
    // delete every field not in the selected auth method
    templateWithAuth.authMethods?.forEach((m) => {
      if (m.value !== newMethod) {
        Object.keys(m.fields).forEach((fieldKey) => {
          delete cleaned[fieldKey];
        });
      }
    });
    setValues(cleaned);
    setAuthMethod?.(newMethod);
  };

  // Check if this credential template has multiple auth methods
  const hasMultipleAuthMethods =
    templateWithAuth.authMethods && templateWithAuth.authMethods.length > 1;

  if (hasMultipleAuthMethods && templateWithAuth.authMethods) {
    return (
      <div className="w-full space-y-4">
        {/* Render authentication_method as a hidden field */}
        <input
          type="hidden"
          name="authentication_method"
          value={authMethod || (templateWithAuth.authMethods?.[0]?.value ?? "")}
        />

        <Tabs
          value={authMethod || templateWithAuth.authMethods?.[0]?.value || ""}
          onValueChange={handleAuthMethodChange}
        >
          <Tabs.List>
            {templateWithAuth.authMethods.map((method) => (
              <Tabs.Trigger key={method.value} value={method.value}>
                {method.label}
              </Tabs.Trigger>
            ))}
          </Tabs.List>

          {templateWithAuth.authMethods.map((method) => (
            <Tabs.Content
              key={method.value}
              value={method.value}
              alignItems="stretch"
            >
              {/* Show description if method has no fields but has a description */}
              {Object.keys(method.fields).length === 0 &&
                method.description && (
                  <div className="p-4 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-700 rounded-md">
                    <p className="text-sm text-blue-800 dark:text-blue-200">
                      {method.description}
                    </p>
                  </div>
                )}

              {Object.entries(method.fields).map(([key, val]) => {
                if (isTypedFileField(key)) {
                  return (
                    <TypedFileUploadFormField
                      key={key}
                      name={key}
                      label={getDisplayNameForCredentialKey(key)}
                    />
                  );
                }

                if (typeof val === "boolean") {
                  return (
                    <BooleanFormField
                      key={key}
                      name={key}
                      label={getDisplayNameForCredentialKey(key)}
                    />
                  );
                }
                return (
                  <TextFormField
                    key={key}
                    name={key}
                    placeholder={val}
                    label={getDisplayNameForCredentialKey(key)}
                    type={
                      key.toLowerCase().includes("token") ||
                      key.toLowerCase().includes("password") ||
                      key.toLowerCase().includes("secret")
                        ? "password"
                        : "text"
                    }
                  />
                );
              })}
            </Tabs.Content>
          ))}
        </Tabs>
      </div>
    );
  }

  // Render single auth method fields (existing behavior)
  return (
    <>
      {Object.entries(credentialTemplate).map(([key, val]) => {
        // Skip auth method metadata fields
        if (key === "authentication_method" || key === "authMethods") {
          return null;
        }
        if (isTypedFileField(key)) {
          return (
            <TypedFileUploadFormField
              key={key}
              name={key}
              label={getDisplayNameForCredentialKey(key)}
            />
          );
        }

        if (typeof val === "boolean") {
          return (
            <BooleanFormField
              key={key}
              name={key}
              label={getDisplayNameForCredentialKey(key)}
            />
          );
        }
        return (
          <TextFormField
            key={key}
            name={key}
            placeholder={val as string}
            label={getDisplayNameForCredentialKey(key)}
            type={
              key.toLowerCase().includes("token") ||
              key.toLowerCase().includes("password") ||
              key.toLowerCase().includes("secret")
                ? "password"
                : "text"
            }
          />
        );
      })}
    </>
  );
}


================================================
FILE: web/src/components/credentials/actions/EditCredential.tsx
================================================
import { Button } from "@opal/components";
import { Text } from "@opal/components";

import { FaNewspaper, FaTrash } from "react-icons/fa";
import { TextFormField, TypedFileUploadFormField } from "@/components/Field";
import { Form, Formik, FormikHelpers } from "formik";
import { toast } from "@/hooks/useToast";
import {
  Credential,
  getDisplayNameForCredentialKey,
} from "@/lib/connectors/credentials";
import { createEditingValidationSchema, createInitialValues } from "../lib";
import { dictionaryType, formType } from "../types";
import { isTypedFileField } from "@/lib/connectors/fileTypes";
import { SvgTrash } from "@opal/icons";
export interface EditCredentialProps {
  credential: Credential<dictionaryType>;
  onClose: () => void;
  onUpdate: (
    selectedCredentialId: Credential<any>,
    details: any,
    onSuccess: () => void
  ) => Promise<void>;
}

export default function EditCredential({
  credential,
  onClose,
  onUpdate,
}: EditCredentialProps) {
  const validationSchema = createEditingValidationSchema(
    credential.credential_json
  );
  const initialValues = createInitialValues(credential);

  const handleSubmit = async (
    values: formType,
    formikHelpers: FormikHelpers<formType>
  ) => {
    formikHelpers.setSubmitting(true);
    try {
      await onUpdate(credential, values, onClose);
    } catch (error) {
      console.error("Error updating credential:", error);
      toast.error("Error updating credential");
    } finally {
      formikHelpers.setSubmitting(false);
    }
  };

  return (
    <div className="flex flex-col gap-y-6">
      <Text as="p">
        Ensure that you update to a credential with the proper permissions!
      </Text>

      <Formik
        initialValues={initialValues}
        validationSchema={validationSchema}
        onSubmit={handleSubmit}
      >
        {({ isSubmitting, resetForm }) => (
          <Form>
            <TextFormField
              includeRevert
              name="name"
              placeholder={credential.name || ""}
              label="Name (optional):"
            />

            {Object.entries(credential.credential_json).map(([key, value]) =>
              isTypedFileField(key) ? (
                <TypedFileUploadFormField
                  key={key}
                  name={key}
                  label={getDisplayNameForCredentialKey(key)}
                />
              ) : (
                <TextFormField
                  includeRevert
                  key={key}
                  name={key}
                  placeholder={value as string}
                  label={getDisplayNameForCredentialKey(key)}
                  type={
                    key.toLowerCase().includes("token") ||
                    key.toLowerCase().includes("password")
                      ? "password"
                      : "text"
                  }
                  disabled={key === "authentication_method"}
                />
              )
            )}
            <div className="flex justify-between w-full">
              <Button onClick={() => resetForm()} icon={SvgTrash}>
                Reset Changes
              </Button>
              <Button disabled={isSubmitting} type="submit" icon={FaNewspaper}>
                Update
              </Button>
            </div>
          </Form>
        )}
      </Formik>
    </div>
  );
}


================================================
FILE: web/src/components/credentials/actions/ModifyCredential.tsx
================================================
import React, { useState } from "react";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import { Badge } from "@/components/ui/badge";
import { AccessType } from "@/lib/types";
import { EditIcon, NewChatIcon, SwapIcon } from "@/components/icons/icons";
import {
  ConfluenceCredentialJson,
  Credential,
} from "@/lib/connectors/credentials";
import { Connector } from "@/lib/connectors/connectors";
import {
  SvgArrowExchange,
  SvgAlertTriangle,
  SvgBubbleText,
  SvgTrash,
} from "@opal/icons";
import { Button } from "@opal/components";
interface CredentialSelectionTableProps {
  credentials: Credential<any>[];
  editableCredentials: Credential<any>[];
  onSelectCredential: (credential: Credential<any> | null) => void;
  currentCredentialId?: number;
  onDeleteCredential: (credential: Credential<any>) => void;
  onEditCredential?: (credential: Credential<any>) => void;
}

function CredentialSelectionTable({
  credentials,
  editableCredentials,
  onEditCredential,
  onSelectCredential,
  currentCredentialId,
  onDeleteCredential,
}: CredentialSelectionTableProps) {
  const [selectedCredentialId, setSelectedCredentialId] = useState<
    number | null
  >(null);

  // rkuo: this appears to merge editableCredentials into credentials so we get a single list
  // of credentials to display
  // Pretty sure this merging should be done outside of this UI component
  const allCredentials = React.useMemo(() => {
    const credMap = new Map(editableCredentials.map((cred) => [cred.id, cred]));
    credentials.forEach((cred) => {
      if (!credMap.has(cred.id)) {
        credMap.set(cred.id, cred);
      }
    });
    return Array.from(credMap.values());
  }, [credentials, editableCredentials]);

  const handleSelectCredential = (credentialId: number) => {
    const newSelectedId =
      selectedCredentialId === credentialId ? null : credentialId;
    setSelectedCredentialId(newSelectedId);

    const selectedCredential =
      allCredentials.find((cred) => cred.id === newSelectedId) || null;
    onSelectCredential(selectedCredential);
  };

  return (
    <div className="w-full max-h-[50vh] overflow-auto">
      <table className="w-full text-sm border-collapse">
        <thead className="sticky top-0 w-full">
          <tr className="bg-neutral-100 dark:bg-neutral-900">
            <th className="p-2 text-left font-medium text-neutral-600 dark:text-neutral-400"></th>
            <th className="p-2 text-left font-medium text-neutral-600 dark:text-neutral-400">
              ID
            </th>
            <th className="p-2 text-left font-medium text-neutral-600 dark:text-neutral-400">
              Name
            </th>
            <th className="p-2 text-left font-medium text-neutral-600 dark:text-neutral-400">
              Created
            </th>
            <th className="p-2 text-left font-medium text-neutral-600 dark:text-neutral-400">
              Last Updated
            </th>
            <th />
          </tr>
        </thead>

        {allCredentials.length > 0 && (
          <tbody className="w-full">
            {allCredentials.map((credential, ind) => {
              const selected = currentCredentialId
                ? credential.id == (selectedCredentialId || currentCredentialId)
                : false;
              const editable = editableCredentials.some(
                (editableCredential) => editableCredential.id === credential.id
              );
              return (
                <tr
                  key={credential.id}
                  className="border-b hover:bg-background-50"
                >
                  <td className="min-w-[60px] p-2">
                    {!selected ? (
                      <input
                        type="radio"
                        name="credentialSelection"
                        onChange={() => handleSelectCredential(credential.id)}
                        className="form-radio ml-4 h-4 w-4 text-blue-600 transition duration-150 ease-in-out"
                      />
                    ) : (
                      <Badge>selected</Badge>
                    )}
                  </td>
                  <td className="p-2">{credential.id}</td>
                  <td className="p-2">
                    <p>{credential.name ?? "Untitled"}</p>
                  </td>
                  <td className="p-2">
                    {new Date(credential.time_created).toLocaleString()}
                  </td>
                  <td className="p-2">
                    {new Date(credential.time_updated).toLocaleString()}
                  </td>
                  <td className="p-2 flex gap-x-2 content-center mt-auto">
                    <Button
                      disabled={selected || !editable}
                      onClick={async () => {
                        onDeleteCredential(credential);
                      }}
                      icon={SvgTrash}
                    />
                    {onEditCredential && (
                      <button
                        disabled={!editable}
                        onClick={() => onEditCredential(credential)}
                        className="cursor-pointer my-auto"
                      >
                        <EditIcon />
                      </button>
                    )}
                  </td>
                </tr>
              );
            })}
          </tbody>
        )}
      </table>

      {allCredentials.length == 0 && (
        <p className="mt-4"> No credentials exist for this connector!</p>
      )}
    </div>
  );
}

export interface ModifyCredentialProps {
  close?: () => void;
  showIfEmpty?: boolean;
  attachedConnector?: Connector<any>;
  credentials: Credential<any>[];
  editableCredentials: Credential<any>[];
  defaultedCredential?: Credential<any>;
  accessType: AccessType;
  onSwap?: (
    newCredential: Credential<any>,
    connectorId: number,
    accessType: AccessType
  ) => void;
  onSwitch?: (newCredential: Credential<any>) => void;
  onEditCredential?: (credential: Credential<ConfluenceCredentialJson>) => void;
  onDeleteCredential: (credential: Credential<any | null>) => void;
  onCreateNew?: () => void;
}

export default function ModifyCredential({
  close,
  showIfEmpty,
  attachedConnector,
  credentials,
  editableCredentials,
  defaultedCredential,
  accessType,
  onSwap,
  onSwitch,
  onEditCredential,
  onDeleteCredential,
  onCreateNew,
}: ModifyCredentialProps) {
  const [selectedCredential, setSelectedCredential] =
    useState<Credential<any> | null>(null);
  const [confirmDeletionCredential, setConfirmDeletionCredential] =
    useState<null | Credential<any>>(null);

  if (!credentials || !editableCredentials) return null;

  return (
    <>
      {confirmDeletionCredential != null && (
        <Modal open onOpenChange={() => setConfirmDeletionCredential(null)}>
          <Modal.Content width="sm" height="sm">
            <Modal.Header
              icon={SvgAlertTriangle}
              title="Confirm Deletion"
              onClose={() => setConfirmDeletionCredential(null)}
            />
            <Modal.Body>
              <Text as="p">
                Are you sure you want to delete this credential? You cannot
                delete credentials that are linked to live connectors.
              </Text>
            </Modal.Body>
            <Modal.Footer>
              <Button
                onClick={async () => {
                  onDeleteCredential(confirmDeletionCredential);
                  setConfirmDeletionCredential(null);
                }}
              >
                Confirm
              </Button>
              <Button
                prominence="secondary"
                onClick={() => setConfirmDeletionCredential(null)}
              >
                Cancel
              </Button>
            </Modal.Footer>
          </Modal.Content>
        </Modal>
      )}

      <div className="mb-0">
        <Text as="p" className="mb-4">
          Select a credential as needed! Ensure that you have selected a
          credential with the proper permissions for this connector!
        </Text>

        <CredentialSelectionTable
          onDeleteCredential={async (credential: Credential<any | null>) => {
            setConfirmDeletionCredential(credential);
          }}
          onEditCredential={
            onEditCredential
              ? (credential: Credential<ConfluenceCredentialJson>) =>
                  onEditCredential(credential)
              : undefined
          }
          currentCredentialId={
            defaultedCredential ? defaultedCredential.id : undefined
          }
          credentials={credentials}
          editableCredentials={editableCredentials}
          onSelectCredential={(credential: Credential<any> | null) => {
            if (credential && onSwitch) {
              onSwitch(credential);
            } else {
              setSelectedCredential(credential);
            }
          }}
        />

        {!showIfEmpty && (
          <div className="flex mt-8 justify-between">
            {onCreateNew ? (
              <Button onClick={onCreateNew} icon={SvgBubbleText}>
                Create
              </Button>
            ) : (
              <div />
            )}

            <Button
              disabled={selectedCredential == null}
              onClick={() => {
                if (onSwap && attachedConnector) {
                  onSwap(selectedCredential!, attachedConnector.id, accessType);
                  if (close) {
                    close();
                  }
                }
                if (onSwitch) {
                  onSwitch(selectedCredential!);
                }
              }}
              icon={SvgArrowExchange}
            >
              Select
            </Button>
          </div>
        )}
      </div>
    </>
  );
}


================================================
FILE: web/src/components/credentials/lib.ts
================================================
import * as Yup from "yup";

import { dictionaryType, formType } from "./types";
import {
  Credential,
  getDisplayNameForCredentialKey,
  CredentialTemplateWithAuth,
} from "@/lib/connectors/credentials";
import { isTypedFileField } from "@/lib/connectors/fileTypes";

export function createValidationSchema(json_values: Record<string, any>) {
  const schemaFields: Record<string, Yup.AnySchema> = {};
  const template = json_values as CredentialTemplateWithAuth<any>;
  // multi‐auth templates
  if (template.authMethods && template.authMethods.length > 1) {
    // auth method selector
    schemaFields["authentication_method"] = Yup.string().required(
      "Please select an authentication method"
    );
    // conditional rules per authMethod
    template.authMethods.forEach((method) => {
      Object.entries(method.fields).forEach(([key, def]) => {
        const displayName = getDisplayNameForCredentialKey(key);
        if (typeof def === "boolean") {
          schemaFields[key] = Yup.boolean()
            .nullable()
            .default(false)
            .transform((v, o) => (o === undefined ? false : v));
        } else if (isTypedFileField(key)) {
          //TypedFile fields - use mixed schema instead of string (check before null check)
          schemaFields[key] = Yup.mixed().when("authentication_method", {
            is: method.value,
            then: () =>
              Yup.mixed().required(`Please select a ${displayName} file`),
            otherwise: () => Yup.mixed().notRequired(),
          });
        } else if (def === null) {
          schemaFields[key] = Yup.string()
            .trim()
            .transform((v) => (v === "" ? null : v))
            .nullable()
            .notRequired();
        } else {
          schemaFields[key] = Yup.string()
            .trim()
            .when("authentication_method", {
              is: method.value,
              then: (s) =>
                s
                  .min(1, `${displayName} cannot be empty`)
                  .required(`Please enter your ${displayName}`),
              otherwise: (s) => s.notRequired(),
            });
        }
      });
    });
  }
  // single‐auth templates and other fields
  for (const key in json_values) {
    if (!Object.prototype.hasOwnProperty.call(json_values, key)) continue;
    if (key === "authentication_method" || key === "authMethods") continue;
    const displayName = getDisplayNameForCredentialKey(key);
    const def = json_values[key];
    if (typeof def === "boolean") {
      schemaFields[key] = Yup.boolean()
        .nullable()
        .default(false)
        .transform((v, o) => (o === undefined ? false : v));
    } else if (isTypedFileField(key)) {
      // TypedFile fields - use mixed schema instead of string (check before null check)
      schemaFields[key] = Yup.mixed().required(
        `Please select a ${displayName} file`
      );
    } else if (def === null) {
      schemaFields[key] = Yup.string()
        .trim()
        .transform((v) => (v === "" ? null : v))
        .nullable()
        .notRequired();
    } else {
      schemaFields[key] = Yup.string()
        .trim()
        .min(1, `${displayName} cannot be empty`)
        .required(`Please enter your ${displayName}`);
    }
  }

  schemaFields["name"] = Yup.string().optional();
  return Yup.object().shape(schemaFields);
}

export function createEditingValidationSchema(json_values: dictionaryType) {
  const schemaFields: { [key: string]: Yup.AnySchema } = {};

  for (const key in json_values) {
    if (Object.prototype.hasOwnProperty.call(json_values, key)) {
      if (isTypedFileField(key)) {
        // TypedFile fields - use mixed schema for optional file uploads during editing
        schemaFields[key] = Yup.mixed().optional();
      } else {
        schemaFields[key] = Yup.string().optional();
      }
    }
  }

  schemaFields["name"] = Yup.string().optional();
  return Yup.object().shape(schemaFields);
}

export function createInitialValues(credential: Credential<any>): formType {
  const initialValues: formType = {
    name: credential.name || "",
  };

  for (const key in credential.credential_json) {
    // Initialize TypedFile fields as null, other fields as empty strings
    if (isTypedFileField(key)) {
      initialValues[key] = null as any; // TypedFile fields start as null
    } else {
      initialValues[key] = "";
    }
  }

  return initialValues;
}


================================================
FILE: web/src/components/credentials/types.ts
================================================
import { TypedFile } from "@/lib/connectors/fileTypes";

export interface dictionaryType {
  [key: string]: string | TypedFile;
}
export interface formType extends dictionaryType {
  name: string;
}

export type ActionType = "create" | "createAndSwap";


================================================
FILE: web/src/components/dateRangeSelectors/AdminDateRangeSelector.tsx
================================================
import React, { memo, useState } from "react";
import Calendar from "@/refresh-components/Calendar";
import Popover from "@/refresh-components/Popover";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import { cn } from "@/lib/utils";
import { format } from "date-fns";
import { getXDaysAgo } from "./dateUtils";
import { SvgCalendar } from "@opal/icons";
export const THIRTY_DAYS = "30d";

export type DateRangePickerValue = DateRange & {
  selectValue: string;
};

export type DateRange =
  | {
      from: Date;
      to: Date;
    }
  | undefined;

export const AdminDateRangeSelector = memo(function AdminDateRangeSelector({
  value,
  onValueChange,
}: {
  value: DateRange;
  onValueChange: (value: DateRange) => void;
}) {
  const [isOpen, setIsOpen] = useState(false);

  const presets = [
    {
      label: "Last 30 days",
      value: {
        from: getXDaysAgo(30),
        to: getXDaysAgo(0),
      },
    },
    {
      label: "Today",
      value: {
        from: getXDaysAgo(1),
        to: getXDaysAgo(0),
      },
    },
  ];

  return (
    <div className="grid gap-2">
      <Popover open={isOpen} onOpenChange={setIsOpen}>
        <Popover.Trigger asChild>
          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
          <Button
            data-testid="admin-date-range-selector-button"
            secondary
            className={cn("justify-start", !value && "text-muted-foreground")}
            leftIcon={SvgCalendar}
          >
            {value?.from
              ? value.to
                ? `${format(value.from, "LLL dd, y")} - ${format(
                    value.to,
                    "LLL dd, y"
                  )}`
                : format(value.from, "LLL dd, y")
              : "Pick a date range"}
          </Button>
        </Popover.Trigger>
        <Popover.Content align="start">
          <Calendar
            initialFocus
            mode="range"
            defaultMonth={value?.from}
            selected={value}
            onSelect={(range) => {
              if (range?.from) {
                if (range.to) {
                  // Normal range selection when initialized with a range
                  onValueChange({ from: range.from, to: range.to });
                } else {
                  // Single date selection when initilized without a range
                  const to = new Date(range.from);
                  const from = new Date(to.setDate(to.getDate() - 1));
                  onValueChange({ from, to });
                }
              }
            }}
            numberOfMonths={2}
          />
          <div className="border-t p-3">
            {presets.map((preset) => (
              <OpalButton
                key={preset.label}
                prominence="internal"
                width="full"
                onClick={() => {
                  onValueChange(preset.value);
                }}
              >
                {preset.label}
              </OpalButton>
            ))}
          </div>
        </Popover.Content>
      </Popover>
    </div>
  );
});


================================================
FILE: web/src/components/dateRangeSelectors/SearchDateRangeSelector.tsx
================================================
import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { FiCalendar, FiChevronDown, FiXCircle } from "react-icons/fi";
import { CustomDropdown } from "../Dropdown";
import { timeRangeValues } from "@/app/config/timeRange";
import { TimeRangeSelector } from "@/components/filters/TimeRangeSelector";
import { cn } from "@/lib/utils";

export function SearchDateRangeSelector({
  value,
  onValueChange,
  isHorizontal,
  className,
}: {
  value: DateRangePickerValue | null;
  onValueChange: (value: DateRangePickerValue | null) => void;
  isHorizontal?: boolean;
  className?: string;
}) {
  return (
    <div>
      <CustomDropdown
        dropdown={
          <TimeRangeSelector
            value={value}
            className={cn(
              "border border-border bg-background rounded-lg flex flex-col w-64 max-h-96 overflow-y-auto flex overscroll-contain",
              className
            )}
            timeRangeValues={timeRangeValues}
            onValueChange={onValueChange}
          />
        }
      >
        <div
          className={`
            flex
            text-sm
            px-3
            line-clamp-1
            py-1.5
            rounded-lg
            border
            border-border
            cursor-pointer
            hover:bg-accent-background-hovered`}
        >
          <FiCalendar className="flex-none my-auto mr-2" />{" "}
          <p className="line-clamp-1">
            {isHorizontal ? (
              "Date"
            ) : value?.selectValue ? (
              <div className="text-text-darker">{value.selectValue}</div>
            ) : (
              "Any time..."
            )}
          </p>
          {value?.selectValue ? (
            <div
              className="my-auto ml-auto p-0.5 rounded-full w-fit"
              onClick={(e) => {
                onValueChange(null);
                e.stopPropagation();
              }}
            >
              <FiXCircle />
            </div>
          ) : (
            <FiChevronDown className="my-auto ml-auto" />
          )}
        </div>
      </CustomDropdown>
    </div>
  );
}


================================================
FILE: web/src/components/dateRangeSelectors/dateUtils.ts
================================================
export function getXDaysAgo(daysAgo: number) {
  const today = new Date();
  const daysAgoDate = new Date(today);
  daysAgoDate.setDate(today.getDate() - daysAgo);
  return daysAgoDate;
}

export function convertDateToEndOfDay(date?: Date | null) {
  if (!date) {
    return date;
  }

  const dateCopy = new Date(date);
  dateCopy.setHours(23, 59, 59, 999);
  return dateCopy;
}

export function convertDateToStartOfDay(date?: Date | null) {
  if (!date) {
    return date;
  }

  const dateCopy = new Date(date);
  dateCopy.setHours(0, 0, 0, 0);
  return dateCopy;
}


================================================
FILE: web/src/components/dev/StatsOverlay.tsx
================================================
"use client";

import { useEffect } from "react";

/**
 * Development-only stats.js overlay showing FPS, MS, and memory usage.
 * Enable by running `npm run dev:profile` or setting NEXT_PUBLIC_ENABLE_STATS=true.
 * Shows FPS and MB panels (memory is Chrome only).
 *
 * Uses dynamic import to prevent stats.js from being bundled in production.
 */
export default function StatsOverlay() {
  useEffect(() => {
    let animationFrameId: number | undefined;
    let container: HTMLDivElement | null = null;
    let isMounted = true;

    // Dynamic import to avoid bundling in production
    import("stats.js").then((StatsModule) => {
      // Guard against unmount during async import
      if (!isMounted) return;

      const Stats = StatsModule.default;

      // Create Stats instances for FPS and MB
      const panels = [0, 2].map((panel) => {
        // 0=FPS, 2=MB (memory)
        const stats = new Stats();
        stats.showPanel(panel);
        return stats;
      });

      // Create container for all panels
      container = document.createElement("div");
      container.style.position = "fixed";
      container.style.top = "0";
      container.style.left = "50%";
      container.style.transform = "translateX(-50%)";
      container.style.zIndex = "99999";
      container.style.display = "flex";

      panels.forEach((stats) => {
        stats.dom.style.position = "relative";
        container!.appendChild(stats.dom);
      });

      document.body.appendChild(container);

      const animate = () => {
        panels.forEach((stats) => {
          stats.begin();
          stats.end();
        });
        animationFrameId = requestAnimationFrame(animate);
      };

      animationFrameId = requestAnimationFrame(animate);
    });

    return () => {
      isMounted = false;
      if (animationFrameId !== undefined)
        cancelAnimationFrame(animationFrameId);
      if (container?.parentNode) {
        container.parentNode.removeChild(container);
      }
    };
  }, []);

  return null;
}


================================================
FILE: web/src/components/dev/StatsOverlayLoader.tsx
================================================
"use client";

import dynamic from "next/dynamic";

const StatsOverlay = dynamic(() => import("@/components/dev/StatsOverlay"), {
  ssr: false,
});

export default function StatsOverlayLoader() {
  return <StatsOverlay />;
}


================================================
FILE: web/src/components/embedding/CustomEmbeddingModelForm.tsx
================================================
import {
  CloudEmbeddingModel,
  EmbeddingProvider,
  getFormattedProviderName,
} from "./interfaces";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import { TextFormField, BooleanFormField } from "@/components/Field";
import { Dispatch, SetStateAction } from "react";
import { Text } from "@opal/components";
import Spacer from "@/refresh-components/Spacer";
import Button from "@/refresh-components/buttons/Button";
import { EmbeddingDetails } from "@/app/admin/embeddings/EmbeddingModelSelectionForm";

export function CustomEmbeddingModelForm({
  setShowTentativeModel,
  currentValues,
  provider,
  embeddingType,
}: {
  setShowTentativeModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;
  currentValues: CloudEmbeddingModel | null;
  provider: EmbeddingDetails;
  embeddingType: EmbeddingProvider;
}) {
  return (
    <div>
      <Formik
        initialValues={
          currentValues || {
            model_name: "",
            model_dim: 768,
            normalize: false,
            query_prefix: "",
            passage_prefix: "",
            provider_type: embeddingType,
            api_key: "",
            enabled: true,
            api_url: provider.api_url,
            description: "",
            index_name: "",
          }
        }
        validationSchema={Yup.object().shape({
          model_name: Yup.string().required("Model name is required"),
          model_dim: Yup.number().required("Model dimension is required"),
          normalize: Yup.boolean().required(),
          query_prefix: Yup.string(),
          passage_prefix: Yup.string(),
          provider_type: Yup.string().required("Provider type is required"),
          api_key: Yup.string().optional(),
          enabled: Yup.boolean(),
          api_url: Yup.string().required("API base URL is required"),
          description: Yup.string(),
          index_name: Yup.string().nullable(),
        })}
        onSubmit={async (values) => {
          setShowTentativeModel(values as CloudEmbeddingModel);
        }}
      >
        {({ isSubmitting, submitForm, errors }) => (
          <Form>
            <Text as="p" font="heading-h3">
              {`Specify details for your ${getFormattedProviderName(
                embeddingType
              )} Provider's model`}
            </Text>
            <Spacer rem={1} />
            <TextFormField
              name="model_name"
              label="Model Name:"
              subtext={`The name of the ${getFormattedProviderName(
                embeddingType
              )} model`}
              placeholder="e.g. 'all-MiniLM-L6-v2'"
            />

            <TextFormField
              name="model_dim"
              label="Model Dimension:"
              subtext="The dimension of the model's embeddings"
              placeholder="e.g. '1536'"
              type="number"
            />

            <BooleanFormField
              removeIndent
              name="normalize"
              label="Normalize"
              subtext="Whether to normalize the embeddings"
            />

            <TextFormField
              name="query_prefix"
              label="Query Prefix:"
              subtext="Prefix for query embeddings"
            />

            <TextFormField
              name="passage_prefix"
              label="Passage Prefix:"
              subtext="Prefix for passage embeddings"
            />

            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
            <Button
              type="submit"
              disabled={isSubmitting}
              className="w-64 mx-auto"
            >
              Configure {getFormattedProviderName(embeddingType)} Model
            </Button>
          </Form>
        )}
      </Formik>
    </div>
  );
}


================================================
FILE: web/src/components/embedding/CustomModelForm.tsx
================================================
import { BooleanFormField, TextFormField } from "@/components/Field";
import Button from "@/refresh-components/buttons/Button";
import { Form, Formik } from "formik";
import * as Yup from "yup";
import { HostedEmbeddingModel } from "./interfaces";

export function CustomModelForm({
  onSubmit,
}: {
  onSubmit: (model: HostedEmbeddingModel) => void;
}) {
  return (
    <div>
      <Formik
        initialValues={{
          model_name: "",
          model_dim: "",
          query_prefix: "",
          passage_prefix: "",
          description: "",
          normalize: true,
        }}
        validationSchema={Yup.object().shape({
          model_name: Yup.string().required(
            "Please enter the name of the Embedding Model"
          ),
          model_dim: Yup.number().required(
            "Please enter the dimensionality of the embeddings generated by the model"
          ),
          query_prefix: Yup.string(),
          passage_prefix: Yup.string(),
          normalize: Yup.boolean().required(),
        })}
        onSubmit={async (values, formikHelpers) => {
          onSubmit({
            ...values,
            model_dim: parseInt(values.model_dim),
            api_key: null,
            provider_type: null,
            index_name: null,
            api_url: null,
          });
        }}
      >
        {({ isSubmitting }) => (
          <Form>
            <TextFormField
              name="model_name"
              label="Name:"
              subtext="The name of the model on Hugging Face"
              placeholder="E.g. 'nomic-ai/nomic-embed-text-v1'"
            />

            <TextFormField
              name="model_dim"
              label="Model Dimension:"
              subtext="The dimensionality of the embeddings generated by the model"
              placeholder="E.g. '768'"
              type="number"
            />
            <TextFormField
              min={-1}
              name="description"
              label="Description:"
              subtext="Description of  your model"
              placeholder=""
            />

            <TextFormField
              name="query_prefix"
              label="[Optional] Query Prefix:"
              subtext={
                <>
                  The prefix specified by the model creators which should be
                  prepended to <i>queries</i> before passing them to the model.
                  Many models do not have this, in which case this should be
                  left empty.
                </>
              }
              placeholder="E.g. 'query: '"
            />
            <TextFormField
              name="passage_prefix"
              label="[Optional] Passage Prefix:"
              subtext={
                <>
                  The prefix specified by the model creators which should be
                  prepended to <i>passages</i> before passing them to the model.
                  Many models do not have this, in which case this should be
                  left empty.
                </>
              }
              placeholder="E.g. 'passage: '"
            />

            <BooleanFormField
              removeIndent
              name="normalize"
              label="Normalize Embeddings"
              subtext="Whether or not to normalize the embeddings generated by the model. When in doubt, leave this checked."
            />

            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
            <Button
              type="submit"
              disabled={isSubmitting}
              className="w-64 mx-auto"
            >
              Choose
            </Button>
          </Form>
        )}
      </Formik>
    </div>
  );
}


================================================
FILE: web/src/components/embedding/FailedReIndexAttempts.tsx
================================================
import { buildCCPairInfoUrl } from "@/app/admin/connector/[ccPairId]/lib";
import { PageSelector } from "@/components/PageSelector";
import { IndexAttemptStatus } from "@/components/Status";
import { deleteCCPair } from "@/lib/documentDeletion";
import { FailedConnectorIndexingStatus } from "@/lib/types";
import { Button } from "@opal/components";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import { Text } from "@opal/components";
import Spacer from "@/refresh-components/Spacer";
import Link from "next/link";
import { useState } from "react";
import { FiLink, FiMaximize2, FiTrash } from "react-icons/fi";
import { mutate } from "swr";
import { toast } from "@/hooks/useToast";
import { SvgTrash } from "@opal/icons";
export function FailedReIndexAttempts({
  failedIndexingStatuses,
}: {
  failedIndexingStatuses: FailedConnectorIndexingStatus[];
}) {
  const numToDisplay = 10;
  const [page, setPage] = useState(1);
  const [pendingConnectorDeletion, setPendingConnectorDeletion] = useState<{
    connectorId: number;
    credentialId: number;
    ccPairId: number;
    name: string;
  } | null>(null);

  const shouldConfirmConnectorDeletion = true;

  const anyDeletable = failedIndexingStatuses.some(
    (status) => status.is_deletable
  );

  return (
    <div className="mt-6 mb-8 p-4 border border-status-error-02 bg-status-error-00 rounded-lg">
      {pendingConnectorDeletion && (
        <ConfirmEntityModal
          danger
          entityType="connector"
          entityName={pendingConnectorDeletion.name}
          additionalDetails="Deleting this connector schedules a deletion job that removes its indexed documents and deletes it for every user."
          onClose={() => setPendingConnectorDeletion(null)}
          onSubmit={async () => {
            try {
              await deleteCCPair(
                pendingConnectorDeletion.connectorId,
                pendingConnectorDeletion.credentialId,
                () =>
                  mutate(buildCCPairInfoUrl(pendingConnectorDeletion.ccPairId))
              );
            } catch (error) {
              console.error("Error deleting connector:", error);
              toast.error("Failed to delete connector. Please try again.");
            } finally {
              setPendingConnectorDeletion(null);
            }
          }}
        />
      )}

      <div className="text-status-error-05">
        <Text as="p" font="main-ui-action">
          Failed Re-indexing Attempts
        </Text>
      </div>
      <Spacer rem={0.5} />
      <div className="text-status-error-05">
        <Text as="p">
          The table below shows only the failed re-indexing attempts for
          existing connectors. These failures require immediate attention. Once
          all connectors have been re-indexed successfully, the new model will
          be used for all search queries.
        </Text>
      </div>
      <Spacer rem={1} />

      <div>
        <Table>
          <TableHeader>
            <TableRow>
              <TableHead className="w-1/8 sm:w-1/6">Connector Name</TableHead>
              <TableHead className="w-1/8 sm:w-1/6">Status</TableHead>
              <TableHead className="w-4/8 sm:w-2/6">Error Message</TableHead>
              <TableHead className="w-1/8 sm:w-1/6">Visit Connector</TableHead>
              {anyDeletable && (
                <TableHead className="w-1/8 sm:w-2/6">
                  Delete Connector
                </TableHead>
              )}
            </TableRow>
          </TableHeader>
          <TableBody>
            {failedIndexingStatuses
              .slice(numToDisplay * (page - 1), numToDisplay * page)
              .map((reindexingProgress) => {
                return (
                  <TableRow key={reindexingProgress.name}>
                    <TableCell>
                      <Link
                        href={`/admin/connector/${reindexingProgress.cc_pair_id}`}
                        className="text-link cursor-pointer flex"
                      >
                        <FiMaximize2 className="my-auto mr-1" />
                        {reindexingProgress.name}
                      </Link>
                    </TableCell>
                    <TableCell>
                      <IndexAttemptStatus status="failed" />
                    </TableCell>

                    <TableCell>
                      <div>
                        <Text as="p">
                          {reindexingProgress.error_msg || "-"}
                        </Text>
                      </div>
                    </TableCell>
                    <TableCell>
                      <Link
                        href={`/admin/connector/${reindexingProgress.cc_pair_id}`}
                        className="ctext-link cursor-pointer flex"
                      >
                        <FiLink className="my-auto mr-1" />
                        Visit Connector
                      </Link>
                    </TableCell>
                    <TableCell>
                      <Button
                        disabled={!reindexingProgress.is_deletable}
                        variant="danger"
                        onClick={async () => {
                          if (shouldConfirmConnectorDeletion) {
                            setPendingConnectorDeletion({
                              connectorId: reindexingProgress.connector_id,
                              credentialId: reindexingProgress.credential_id,
                              ccPairId: reindexingProgress.cc_pair_id,
                              name: reindexingProgress.name ?? "this connector",
                            });
                            return;
                          }

                          try {
                            await deleteCCPair(
                              reindexingProgress.connector_id,
                              reindexingProgress.credential_id,
                              () =>
                                mutate(
                                  buildCCPairInfoUrl(
                                    reindexingProgress.cc_pair_id
                                  )
                                )
                            );
                          } catch (error) {
                            console.error("Error deleting connector:", error);
                            toast.error(
                              "Failed to delete connector. Please try again."
                            );
                          }
                        }}
                        icon={SvgTrash}
                      >
                        Delete
                      </Button>
                    </TableCell>
                  </TableRow>
                );
              })}
          </TableBody>
        </Table>

        <div className="mt-3 flex">
          <div className="mx-auto">
            <PageSelector
              totalPages={Math.ceil(
                failedIndexingStatuses.length / numToDisplay
              )}
              currentPage={page}
              onPageChange={(newPage) => setPage(newPage)}
            />
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/embedding/ModelSelector.tsx
================================================
import { getCurrentModelCopy } from "@/app/admin/embeddings/interfaces";
import {
  EmbeddingModelDescriptor,
  getIconForRerankType,
  getTitleForRerankType,
  getFormattedProviderName,
  HostedEmbeddingModel,
  CloudEmbeddingModel,
} from "./interfaces";
import { FiExternalLink } from "react-icons/fi";
import CardSection from "../admin/CardSection";

export function ModelPreview({
  model,
  display,
  showDetails = false,
}: {
  model: EmbeddingModelDescriptor;
  display?: boolean;
  showDetails?: boolean;
}) {
  const currentModelCopy = getCurrentModelCopy(model.model_name);

  return (
    <CardSection
      className={`shadow-lg rounded-16 bg-background-tint-00 ${
        display ? "p-4" : "p-2"
      } w-96 flex flex-col`}
    >
      <div className="font-bold text-lg flex">{model.model_name}</div>

      <div className="text-sm mt-1 mx-1 mb-3">
        {model.description ||
          currentModelCopy?.description ||
          "Custom model—no description is available."}
      </div>

      {showDetails && (
        <div className="pt-4 border-t border-border space-y-3">
          <div className="grid grid-cols-2 gap-4 text-sm">
            <div>
              <span className="font-semibold text-text-700">Dimensions:</span>
              <div className="text-text-600">
                {model.model_dim.toLocaleString()}
              </div>
            </div>

            <div>
              <span className="font-semibold text-text-700">Provider:</span>
              <div className="text-text-600">
                {getFormattedProviderName(model.provider_type)}
              </div>
            </div>

            <div>
              <span className="font-semibold text-text-700">Normalized:</span>
              <div className="text-text-600">
                {model.normalize ? "Yes" : "No"}
              </div>
            </div>

            {"embedding_precision" in model &&
              (model as any).embedding_precision && (
                <div>
                  <span className="font-semibold text-text-700">
                    Precision:
                  </span>
                  <div className="text-text-600">
                    {(model as any).embedding_precision}
                  </div>
                </div>
              )}

            {"isDefault" in model &&
              (model as HostedEmbeddingModel).isDefault && (
                <div>
                  <span className="font-semibold text-text-700">Type:</span>
                  <div className="text-text-600">Default</div>
                </div>
              )}

            {"pricePerMillion" in model && (
              <div>
                <span className="font-semibold text-text-700">
                  Price/Million:
                </span>
                <div className="text-text-600">
                  ${(model as CloudEmbeddingModel).pricePerMillion}
                </div>
              </div>
            )}
          </div>

          {(model.query_prefix || model.passage_prefix) && (
            <div className="space-y-2">
              {model.query_prefix && (
                <div>
                  <span className="font-semibold text-text-700">
                    Query Prefix:
                  </span>
                  <div className="text-text-600 font-mono text-xs p-2 rounded">
                    &quot;{model.query_prefix}&quot;
                  </div>
                </div>
              )}

              {model.passage_prefix && (
                <div>
                  <span className="font-semibold text-text-700">
                    Passage Prefix:
                  </span>
                  <div className="text-text-600 font-mono text-xs p-2 rounded">
                    &quot;{model.passage_prefix}&quot;
                  </div>
                </div>
              )}
            </div>
          )}

          {model.api_url && (
            <div>
              <span className="font-semibold text-text-700">API URL:</span>
              <div className="text-text-600 font-mono text-xs bg-background p-2 rounded break-all">
                {model.api_url}
              </div>
            </div>
          )}

          {model.api_version && (
            <div>
              <span className="font-semibold text-text-700">API Version:</span>
              <div className="text-text-600">{model.api_version}</div>
            </div>
          )}

          {model.deployment_name && (
            <div>
              <span className="font-semibold text-text-700">Deployment:</span>
              <div className="text-text-600">{model.deployment_name}</div>
            </div>
          )}

          {"link" in model && (model as HostedEmbeddingModel).link && (
            <div className="pt-2">
              <a
                href={(model as HostedEmbeddingModel).link}
                target="_blank"
                rel="noopener noreferrer"
                className="inline-flex items-center text-blue-500 hover:text-blue-700 transition-colors duration-200 text-sm"
              >
                <span>View Documentation</span>
                <FiExternalLink className="ml-1" size={14} />
              </a>
            </div>
          )}
        </div>
      )}
    </CardSection>
  );
}

export function ModelOption({
  model,
  onSelect,
  selected,
}: {
  model: HostedEmbeddingModel;
  onSelect?: (model: HostedEmbeddingModel) => void;
  selected: boolean;
}) {
  const currentModelCopy = getCurrentModelCopy(model.model_name);

  return (
    <div
      className={`p-4 w-96 border rounded-lg transition-all duration-200 ${
        selected
          ? "border-blue-800 bg-blue-50 dark:bg-blue-950 dark:border-blue-700 shadow-md"
          : "border-background-200 hover:border-blue-300 hover:shadow-sm"
      }`}
    >
      <div className="flex items-center justify-between mb-3">
        <h3 className="font-bold text-lg">{model.model_name}</h3>

        {model.link && (
          <a
            href={model.link}
            target="_blank"
            rel="noopener noreferrer"
            onClick={(e) => e.stopPropagation()}
            className="text-blue-500 hover:text-blue-700 transition-colors duration-200"
          >
            <FiExternalLink size={18} />
          </a>
        )}
      </div>
      <p className="text-sm k text-text-600 dark:text-neutral-400 text-left mb-2">
        {model.description ||
          currentModelCopy?.description ||
          "Custom model—no description is available."}
      </p>
      <div className="text-xs text-text-500">
        {model.isDefault ? "Default" : "Self-hosted"}
      </div>
      {onSelect && (
        <div className="mt-3">
          <button
            className={`w-full p-2 rounded-lg text-sm ${
              selected
                ? "bg-background-125 border border-border cursor-not-allowed"
                : "bg-background border border-border hover:bg-accent-background-hovered cursor-pointer"
            }`}
            onClick={(e) => {
              e.stopPropagation();
              if (!selected) onSelect(model);
            }}
            disabled={selected}
          >
            {selected ? "Selected Model" : "Select Model"}
          </button>
        </div>
      )}
    </div>
  );
}
export function ModelSelector({
  modelOptions,
  setSelectedModel,
  currentEmbeddingModel,
}: {
  currentEmbeddingModel: HostedEmbeddingModel;
  modelOptions: HostedEmbeddingModel[];
  setSelectedModel: (model: HostedEmbeddingModel) => void;
}) {
  const groupedModelOptions = modelOptions.reduce(
    (acc, model) => {
      const [type] = model.model_name.split("/");
      if (type !== undefined) {
        if (!acc[type]) {
          acc[type] = [];
        }

        const acc_by_type = acc[type];
        if (acc_by_type !== undefined) {
          acc_by_type.push(model);
        }
      }

      return acc;
    },
    {} as Record<string, HostedEmbeddingModel[]>
  );

  return (
    <div>
      <div className="flex flex-col gap-y-6 gap-6">
        {Object.entries(groupedModelOptions).map(([type, models]) => (
          <div key={type}>
            <div className="flex items-center mb-2">
              {getIconForRerankType(type)}
              <h2 className="ml-2 mt-2 text-xl font-bold">
                {getTitleForRerankType(type)}
              </h2>
            </div>

            <div className="flex mt-4 flex-wrap gap-4">
              {models.map((modelOption) => (
                <ModelOption
                  key={modelOption.model_name}
                  model={modelOption}
                  onSelect={setSelectedModel}
                  selected={currentEmbeddingModel === modelOption}
                />
              ))}
            </div>
          </div>
        ))}
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/embedding/ReindexingProgressTable.tsx
================================================
import { PageSelector } from "@/components/PageSelector";
import { IndexAttemptStatus } from "@/components/Status";
import {
  ConnectorIndexingStatus,
  ConnectorIndexingStatusLite,
} from "@/lib/types";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import Link from "next/link";
import { useState } from "react";
import { FiMaximize2 } from "react-icons/fi";

export function ReindexingProgressTable({
  reindexingProgress,
}: {
  reindexingProgress: ConnectorIndexingStatusLite[];
}) {
  const numToDisplay = 10;
  const [page, setPage] = useState(1);

  return (
    <div>
      <Table>
        <TableHeader>
          <TableRow>
            <TableHead className="w-1/7 sm:w-1/5">Connector Name</TableHead>
            <TableHead className="w-3/7 sm:w-1/5">Status</TableHead>
            <TableHead className="w-3/7 sm:w-1/5">Docs Re-Indexed</TableHead>
            <TableHead className="w-3/7 sm:w-1/5"></TableHead>
          </TableRow>
        </TableHeader>
        <TableBody>
          {reindexingProgress
            .slice(numToDisplay * (page - 1), numToDisplay * page)
            .map((reindexingProgress) => {
              return (
                <TableRow key={reindexingProgress.name}>
                  <TableCell>
                    <Link
                      href={`/admin/connector/${reindexingProgress.cc_pair_id}`}
                      className="text-link cursor-pointer flex"
                    >
                      <FiMaximize2 className="my-auto mr-1" />
                      {reindexingProgress.name}
                    </Link>
                  </TableCell>
                  <TableCell>
                    {reindexingProgress.last_status && (
                      <IndexAttemptStatus
                        status={reindexingProgress.last_status}
                      />
                    )}
                  </TableCell>
                  <TableCell>
                    {reindexingProgress?.latest_index_attempt_docs_indexed ||
                      "-"}
                  </TableCell>
                </TableRow>
              );
            })}
        </TableBody>
      </Table>

      <div className="mt-3 flex">
        <div className="mx-auto">
          <PageSelector
            totalPages={Math.ceil(reindexingProgress.length / numToDisplay)}
            currentPage={page}
            onPageChange={(newPage) => setPage(newPage)}
          />
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/embedding/interfaces.tsx
================================================
import { JSX } from "react";
import {
  AzureIcon,
  CohereIcon,
  GoogleIcon,
  IconProps,
  LiteLLMIcon,
  MicrosoftIcon,
  NomicIcon,
  OpenAIISVG,
  OpenSourceIcon,
  VoyageIconSVG,
} from "@/components/icons/icons";
import { SwitchoverType } from "@/app/admin/embeddings/interfaces";
import { DOCS_ADMINS_PATH } from "@/lib/constants";

export enum EmbeddingProvider {
  OPENAI = "openai",
  COHERE = "cohere",
  VOYAGE = "voyage",
  GOOGLE = "google",
  LITELLM = "litellm",
  AZURE = "azure",
}

export interface CloudEmbeddingProvider {
  provider_type: EmbeddingProvider;
  api_key?: string;
  api_url?: string;
  custom_config?: Record<string, string>;
  docsLink?: string;

  // Frontend-specific properties
  website: string;
  icon: ({ size, className }: IconProps) => JSX.Element;
  description: string;
  apiLink: string;
  costslink?: string;

  // Relationships
  embedding_models: CloudEmbeddingModel[];
  default_model?: CloudEmbeddingModel;
}

// Embedding Models
export interface EmbeddingModelDescriptor {
  id?: number;
  model_name: string;
  model_dim: number;
  normalize: boolean;
  query_prefix: string;
  passage_prefix: string;
  provider_type: EmbeddingProvider | null;
  description: string;
  api_key: string | null;
  api_url: string | null;
  api_version?: string | null;
  deployment_name?: string | null;
  index_name: string | null;
  switchover_type?: SwitchoverType;
}

export interface CloudEmbeddingModel extends EmbeddingModelDescriptor {
  pricePerMillion: number;
}

export interface HostedEmbeddingModel extends EmbeddingModelDescriptor {
  link?: string;
  isDefault?: boolean;
}

// Responses
export interface FullEmbeddingModelResponse {
  current_model_name: string;
  secondary_model_name: string | null;
}

export interface CloudEmbeddingProviderFull extends CloudEmbeddingProvider {
  configured?: boolean;
}

export const AVAILABLE_MODELS: HostedEmbeddingModel[] = [
  {
    model_name: "nomic-ai/nomic-embed-text-v1",
    model_dim: 768,
    normalize: true,
    description:
      "The recommended default for most situations. If you aren't sure which model to use, this is probably the one.",
    isDefault: true,
    link: "https://huggingface.co/nomic-ai/nomic-embed-text-v1",
    query_prefix: "search_query: ",
    passage_prefix: "search_document: ",
    index_name: "",
    provider_type: null,
    api_key: null,
    api_url: null,
  },
  {
    model_name: "intfloat/e5-base-v2",
    model_dim: 768,
    normalize: true,
    description:
      "A smaller and faster model than the default. It is around 2x faster than the default model at the cost of lower search quality.",
    link: "https://huggingface.co/intfloat/e5-base-v2",
    query_prefix: "query: ",
    passage_prefix: "passage: ",
    index_name: "",
    provider_type: null,
    api_url: null,
    api_key: null,
  },
  {
    model_name: "intfloat/e5-small-v2",
    model_dim: 384,
    normalize: true,
    description:
      "The smallest and fastest version of the E5 line of models. If you're running Onyx on a resource constrained system, then this may be a good choice.",
    link: "https://huggingface.co/intfloat/e5-small-v2",
    query_prefix: "query: ",
    passage_prefix: "passage: ",
    index_name: "",
    provider_type: null,
    api_key: null,
    api_url: null,
  },
  {
    model_name: "intfloat/multilingual-e5-base",
    model_dim: 768,
    normalize: true,
    description:
      "For corpora in other languages besides English, this is the one to choose.",
    link: "https://huggingface.co/intfloat/multilingual-e5-base",
    query_prefix: "query: ",
    passage_prefix: "passage: ",
    index_name: "",
    provider_type: null,
    api_key: null,
    api_url: null,
  },
  {
    model_name: "intfloat/multilingual-e5-small",
    model_dim: 384,
    normalize: true,
    description:
      "For corpora in other languages besides English, as well as being on a resource constrained system, this is the one to choose.",
    link: "https://huggingface.co/intfloat/multilingual-e5-base",
    query_prefix: "query: ",
    passage_prefix: "passage: ",
    index_name: "",
    provider_type: null,
    api_key: null,
    api_url: null,
  },
];

export const LITELLM_CLOUD_PROVIDER: CloudEmbeddingProvider = {
  provider_type: EmbeddingProvider.LITELLM,
  website: "https://github.com/BerriAI/litellm",
  icon: LiteLLMIcon,
  description: "Open-source library to call LLM APIs using OpenAI format",
  apiLink: "https://docs.litellm.ai/docs/proxy/quick_start",
  embedding_models: [], // No default embedding models
};

export const AZURE_CLOUD_PROVIDER: CloudEmbeddingProvider = {
  provider_type: EmbeddingProvider.AZURE,
  website:
    "https://azure.microsoft.com/en-us/products/cognitive-services/openai/",
  icon: AzureIcon,
  description:
    "Azure OpenAI is a cloud-based AI service that provides access to OpenAI models.",
  apiLink:
    "https://docs.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource",
  costslink:
    "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai/",
  embedding_models: [], // No default embedding models
};

export const AVAILABLE_CLOUD_PROVIDERS: CloudEmbeddingProvider[] = [
  {
    provider_type: EmbeddingProvider.COHERE,
    website: "https://cohere.ai",
    icon: CohereIcon,
    docsLink: `${DOCS_ADMINS_PATH}/advanced_configs/search_configs`,
    description:
      "AI company specializing in NLP models for various text-based tasks",
    apiLink: "https://dashboard.cohere.ai/api-keys",
    costslink: "https://cohere.com/pricing",
    embedding_models: [
      {
        provider_type: EmbeddingProvider.COHERE,
        model_name: "embed-english-v3.0",
        description:
          "Cohere's English embedding model. Good performance for English-language tasks.",
        pricePerMillion: 0.1,
        model_dim: 1024,
        normalize: false,
        query_prefix: "",
        passage_prefix: "",
        index_name: "",
        api_key: null,
        api_url: null,
      },
      {
        model_name: "embed-english-light-v3.0",
        provider_type: EmbeddingProvider.COHERE,
        description:
          "Cohere's lightweight English embedding model. Faster and more efficient for simpler tasks.",
        pricePerMillion: 0.1,
        model_dim: 384,
        normalize: false,
        query_prefix: "",
        passage_prefix: "",
        index_name: "",
        api_key: null,
        api_url: null,
      },
    ],
  },
  {
    provider_type: EmbeddingProvider.OPENAI,
    website: "https://openai.com",
    icon: OpenAIISVG,
    description: "AI industry leader known for ChatGPT and DALL-E",
    apiLink: "https://platform.openai.com/api-keys",
    docsLink: `${DOCS_ADMINS_PATH}/advanced_configs/search_configs`,
    costslink: "https://openai.com/pricing",
    embedding_models: [
      {
        provider_type: EmbeddingProvider.OPENAI,
        model_name: "text-embedding-3-large",
        description:
          "OpenAI's large embedding model. Best performance, but more expensive.",
        pricePerMillion: 0.13,
        model_dim: 3072,
        normalize: false,
        query_prefix: "",
        passage_prefix: "",
        index_name: "",
        api_key: null,
        api_url: null,
      },
      {
        provider_type: EmbeddingProvider.OPENAI,
        model_name: "text-embedding-3-small",
        model_dim: 1536,
        normalize: false,
        query_prefix: "",
        passage_prefix: "",
        description:
          "OpenAI's newer, more efficient embedding model. Good balance of performance and cost.",
        pricePerMillion: 0.02,
        index_name: "",
        api_key: null,
        api_url: null,
      },
    ],
  },

  {
    provider_type: EmbeddingProvider.GOOGLE,
    website: "https://ai.google",
    icon: GoogleIcon,
    docsLink: `${DOCS_ADMINS_PATH}/advanced_configs/search_configs`,
    description:
      "Offers a wide range of AI services including language and vision models",
    apiLink: "https://console.cloud.google.com/apis/credentials",
    costslink: "https://cloud.google.com/vertex-ai/pricing",
    embedding_models: [
      {
        provider_type: EmbeddingProvider.GOOGLE,
        model_name: "gemini-embedding-001",
        description: "Google's Gemini embedding model. Powerful and efficient.",
        pricePerMillion: 0.025,
        model_dim: 3072,
        normalize: false,
        query_prefix: "",
        passage_prefix: "",
        index_name: "",
        api_key: null,
        api_url: null,
      },
      {
        provider_type: EmbeddingProvider.GOOGLE,
        model_name: "text-embedding-005",
        description: "Smaller, lighter-weight embedding model from Google.",
        pricePerMillion: 0.025,
        model_dim: 768,
        normalize: false,
        query_prefix: "",
        passage_prefix: "",
        index_name: "",
        api_key: null,
        api_url: null,
      },
    ],
  },
  {
    provider_type: EmbeddingProvider.VOYAGE,
    website: "https://www.voyageai.com",
    icon: VoyageIconSVG,
    description: "Advanced NLP research startup born from Stanford AI Labs",
    docsLink: `${DOCS_ADMINS_PATH}/advanced_configs/search_configs`,
    apiLink: "https://www.voyageai.com/dashboard",
    costslink: "https://www.voyageai.com/pricing",
    embedding_models: [
      {
        provider_type: EmbeddingProvider.VOYAGE,
        model_name: "voyage-large-2-instruct",
        description:
          "Voyage's large embedding model. High performance with instruction fine-tuning.",
        pricePerMillion: 0.12,
        model_dim: 1024,
        normalize: false,
        query_prefix: "",
        passage_prefix: "",
        index_name: "",
        api_key: null,
        api_url: null,
      },
      {
        provider_type: EmbeddingProvider.VOYAGE,
        model_name: "voyage-light-2-instruct",
        description:
          "Voyage's lightweight embedding model. Good balance of performance and efficiency.",
        pricePerMillion: 0.12,
        model_dim: 1024,
        normalize: false,
        query_prefix: "",
        passage_prefix: "",
        index_name: "",
        api_key: null,
        api_url: null,
      },
    ],
  },
];

export const getFormattedProviderName = (providerType: string | null) => {
  if (!providerType) return "Self-hosted";

  switch (providerType) {
    case "openai":
      return "OpenAI";
    case "cohere":
      return "Cohere";
    case "voyage":
      return "Voyage AI";
    case "google":
      return "Google";
    case "litellm":
      return "LiteLLM";
    case "azure":
      return "Azure";
    default:
      return providerType.charAt(0).toUpperCase() + providerType.slice(1);
  }
};

export const getTitleForRerankType = (type: string) => {
  switch (type) {
    case "nomic-ai":
      return "Nomic (recommended)";
    case "intfloat":
      return "Microsoft";
    default:
      return "Open Source";
  }
};

export const getIconForRerankType = (type: string) => {
  switch (type) {
    case "nomic-ai":
      return <NomicIcon size={40} />;
    case "intfloat":
      return <MicrosoftIcon size={40} />;
    default:
      return <OpenSourceIcon size={40} />;
  }
};

export const INVALID_OLD_MODEL = "thenlper/gte-small";

export function checkModelNameIsValid(
  modelName: string | undefined | null
): boolean {
  return !!modelName && modelName !== INVALID_OLD_MODEL;
}


================================================
FILE: web/src/components/errorPages/AccessRestrictedPage.tsx
================================================
"use client";

import { useState } from "react";
import Link from "next/link";
import ErrorPageLayout from "@/components/errorPages/ErrorPageLayout";
import { Button } from "@opal/components";
import InlineExternalLink from "@/refresh-components/InlineExternalLink";
import { logout } from "@/lib/user";
import { loadStripe } from "@stripe/stripe-js";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { useLicense } from "@/hooks/useLicense";
import { useSettingsContext } from "@/providers/SettingsProvider";
import { ApplicationStatus } from "@/interfaces/settings";
import Text from "@/refresh-components/texts/Text";
import { SvgLock } from "@opal/icons";

const linkClassName = "text-action-link-05 hover:text-action-link-06 underline";

const fetchStripePublishableKey = async (): Promise<string> => {
  const response = await fetch("/api/tenants/stripe-publishable-key");
  if (!response.ok) {
    throw new Error("Failed to fetch Stripe publishable key");
  }
  const data = await response.json();
  return data.publishable_key;
};

const fetchResubscriptionSession = async () => {
  const response = await fetch("/api/tenants/create-subscription-session", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
  });
  if (!response.ok) {
    throw new Error("Failed to create resubscription session");
  }
  return response.json();
};

export default function AccessRestricted() {
  const [isLoading, setIsLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const { data: license } = useLicense();
  const settings = useSettingsContext();

  const isSeatLimitExceeded =
    settings.settings.application_status ===
    ApplicationStatus.SEAT_LIMIT_EXCEEDED;
  const hadPreviousLicense = license?.has_license === true;
  const showRenewalMessage = NEXT_PUBLIC_CLOUD_ENABLED || hadPreviousLicense;

  function getSeatLimitMessage() {
    const { used_seats, seat_count } = settings.settings;
    const counts =
      used_seats != null && seat_count != null
        ? ` (${used_seats} users / ${seat_count} seats)`
        : "";
    return `Your organization has exceeded its licensed seat count${counts}. Access is restricted until the number of users is reduced or your license is upgraded.`;
  }

  const initialModalMessage = isSeatLimitExceeded
    ? getSeatLimitMessage()
    : showRenewalMessage
      ? NEXT_PUBLIC_CLOUD_ENABLED
        ? "Your access to Onyx has been temporarily suspended due to a lapse in your subscription."
        : "Your access to Onyx has been temporarily suspended due to a lapse in your license."
      : "An Enterprise license is required to use Onyx. Your data is protected and will be available once a license is activated.";

  const handleResubscribe = async () => {
    setIsLoading(true);
    setError(null);
    try {
      const publishableKey = await fetchStripePublishableKey();
      const { sessionId } = await fetchResubscriptionSession();
      const stripe = await loadStripe(publishableKey);

      if (stripe) {
        await stripe.redirectToCheckout({ sessionId });
      } else {
        throw new Error("Stripe failed to load");
      }
    } catch (error) {
      console.error("Error creating resubscription session:", error);
      setError("Error opening resubscription page. Please try again later.");
    } finally {
      setIsLoading(false);
    }
  };

  return (
    <ErrorPageLayout>
      <div className="flex items-center gap-2">
        <Text headingH2>Access Restricted</Text>
        <SvgLock className="stroke-status-error-05 w-[1.5rem] h-[1.5rem]" />
      </div>

      <Text text03>{initialModalMessage}</Text>

      {isSeatLimitExceeded ? (
        <>
          <Text text03>
            If you are an administrator, you can manage users on the{" "}
            <Link className={linkClassName} href="/admin/users">
              User Management
            </Link>{" "}
            page or upgrade your license on the{" "}
            <Link className={linkClassName} href="/admin/billing">
              Admin Billing
            </Link>{" "}
            page.
          </Text>

          <div className="flex flex-row gap-2">
            <Button
              onClick={async () => {
                await logout();
                window.location.reload();
              }}
            >
              Log out
            </Button>
          </div>
        </>
      ) : NEXT_PUBLIC_CLOUD_ENABLED ? (
        <>
          <Text text03>
            To reinstate your access and continue benefiting from Onyx&apos;s
            powerful features, please update your payment information.
          </Text>

          <Text text03>
            If you&apos;re an admin, you can manage your subscription by
            clicking the button below. For other users, please reach out to your
            administrator to address this matter.
          </Text>

          <div className="flex flex-row gap-2">
            <Button disabled={isLoading} onClick={handleResubscribe}>
              {isLoading ? "Loading..." : "Resubscribe"}
            </Button>
            <Button
              prominence="secondary"
              onClick={async () => {
                await logout();
                window.location.reload();
              }}
            >
              Log out
            </Button>
          </div>

          {error && <Text className="text-status-error-05">{error}</Text>}
        </>
      ) : (
        <>
          <Text text03>
            {hadPreviousLicense
              ? "To reinstate your access and continue using Onyx, please contact your system administrator to renew your license."
              : "To get started, please contact your system administrator to obtain an Enterprise license."}
          </Text>

          <Text text03>
            If you are the administrator, please visit the{" "}
            <Link className={linkClassName} href="/admin/billing">
              Admin Billing
            </Link>{" "}
            page to {hadPreviousLicense ? "renew" : "activate"} your license,
            sign up through Stripe or reach out to{" "}
            <a className={linkClassName} href="mailto:support@onyx.app">
              support@onyx.app
            </a>{" "}
            for billing assistance.
          </Text>

          <div className="flex flex-row gap-2">
            <Button
              onClick={async () => {
                await logout();
                window.location.reload();
              }}
            >
              Log out
            </Button>
          </div>
        </>
      )}

      <Text text03>
        Need help? Join our{" "}
        <InlineExternalLink
          className={linkClassName}
          href="https://discord.gg/4NA5SbzrWb"
        >
          Discord community
        </InlineExternalLink>{" "}
        for support.
      </Text>
    </ErrorPageLayout>
  );
}


================================================
FILE: web/src/components/errorPages/CloudErrorPage.tsx
================================================
import Text from "@/refresh-components/texts/Text";
import ErrorPageLayout from "@/components/errorPages/ErrorPageLayout";

export default function CloudError() {
  return (
    <ErrorPageLayout>
      <Text as="p" headingH2>
        Maintenance in Progress
      </Text>

      <Text as="p" text03>
        Onyx is currently in a maintenance window. Please check back in a couple
        of minutes.
      </Text>

      <Text as="p" text03>
        We apologize for any inconvenience this may cause and appreciate your
        patience.
      </Text>
    </ErrorPageLayout>
  );
}


================================================
FILE: web/src/components/errorPages/ErrorPage.tsx
================================================
import ErrorPageLayout from "@/components/errorPages/ErrorPageLayout";
import Text from "@/refresh-components/texts/Text";
import { DOCS_BASE_URL } from "@/lib/constants";
import { SvgAlertCircle } from "@opal/icons";

export default function Error() {
  return (
    <ErrorPageLayout>
      <div className="flex flex-row items-center gap-2">
        <Text as="p" headingH2>
          We encountered an issue
        </Text>
        <SvgAlertCircle className="w-[1.5rem] h-[1.5rem] stroke-text-04" />
      </div>

      <Text as="p" text03>
        It seems there was a problem loading your Onyx settings. This could be
        due to a configuration issue or incomplete setup.
      </Text>

      <Text as="p" text03>
        If you&apos;re an admin, please review our{" "}
        <a
          className="text-action-link-05"
          href={`${DOCS_BASE_URL}?utm_source=app&utm_medium=error_page&utm_campaign=config_error`}
          target="_blank"
          rel="noopener noreferrer"
        >
          documentation
        </a>{" "}
        for proper configuration steps. If you&apos;re a user, please contact
        your admin for assistance.
      </Text>

      <Text as="p" text03>
        Need help? Join our{" "}
        <a
          className="text-action-link-05"
          href="https://discord.gg/4NA5SbzrWb"
          target="_blank"
          rel="noopener noreferrer"
        >
          Discord community
        </a>{" "}
        for support.
      </Text>
    </ErrorPageLayout>
  );
}


================================================
FILE: web/src/components/errorPages/ErrorPageLayout.tsx
================================================
import React from "react";
import { OnyxLogoTypeIcon } from "@/components/icons/icons";

interface ErrorPageLayoutProps {
  children: React.ReactNode;
}

export default function ErrorPageLayout({ children }: ErrorPageLayoutProps) {
  return (
    <div className="flex flex-col items-center justify-center w-full h-screen gap-4">
      <OnyxLogoTypeIcon size={120} className="" />
      <div className="max-w-[40rem] w-full border bg-background-neutral-00 shadow-02 rounded-16 p-6 flex flex-col gap-4">
        {children}
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/filters/SourceSelector.tsx
================================================
import React, { JSX } from "react";
import { DocumentSetSummary, Tag, ValidSources } from "@/lib/types";
import { SourceMetadata } from "@/lib/search/interfaces";
import { FiBook, FiBookmark, FiMap, FiX } from "react-icons/fi";
import { SearchDateRangeSelector } from "@/components/dateRangeSelectors/SearchDateRangeSelector";
import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { listSourceMetadata } from "@/lib/sources";
import { SourceIcon } from "@/components/SourceIcon";
import { FilterDropdown } from "@/components/search/filtering/FilterDropdown";

export interface SourceSelectorProps {
  timeRange: DateRangePickerValue | null;
  setTimeRange: React.Dispatch<
    React.SetStateAction<DateRangePickerValue | null>
  >;
  showDocSidebar?: boolean;
  selectedSources: SourceMetadata[];
  setSelectedSources: React.Dispatch<React.SetStateAction<SourceMetadata[]>>;
  selectedDocumentSets: string[];
  setSelectedDocumentSets: React.Dispatch<React.SetStateAction<string[]>>;
  selectedTags: Tag[];
  setSelectedTags: React.Dispatch<React.SetStateAction<Tag[]>>;
  availableDocumentSets: DocumentSetSummary[];
  existingSources: ValidSources[];
  availableTags: Tag[];
  toggleFilters: () => void;
  filtersUntoggled: boolean;
  tagsOnLeft: boolean;
}

export function SelectedBubble({
  children,
  onClick,
}: {
  children: string | JSX.Element;
  onClick: () => void;
}) {
  return (
    <div
      className={
        "flex cursor-pointer items-center border border-border " +
        "py-1 my-1.5 rounded-lg px-2 w-fit hover:bg-accent-background-hovered"
      }
      onClick={onClick}
    >
      {children}
      <FiX className="ml-2" size={14} />
    </div>
  );
}

export function HorizontalFilters({
  timeRange,
  setTimeRange,
  selectedSources,
  setSelectedSources,
  selectedDocumentSets,
  setSelectedDocumentSets,
  availableDocumentSets,
  existingSources,
}: SourceSelectorProps) {
  const handleSourceSelect = (source: SourceMetadata) => {
    setSelectedSources((prev: SourceMetadata[]) => {
      const prevSourceNames = prev.map((source) => source.internalName);
      if (prevSourceNames.includes(source.internalName)) {
        return prev.filter((s) => s.internalName !== source.internalName);
      } else {
        return [...prev, source];
      }
    });
  };

  const handleDocumentSetSelect = (documentSetName: string) => {
    setSelectedDocumentSets((prev: string[]) => {
      if (prev.includes(documentSetName)) {
        return prev.filter((s) => s !== documentSetName);
      } else {
        return [...prev, documentSetName];
      }
    });
  };

  const allSources = listSourceMetadata();
  const availableSources = allSources.filter((source) =>
    existingSources.includes(source.internalName)
  );

  return (
    <div className="b">
      <div className="flex gap-x-3">
        <div className="w-52">
          <SearchDateRangeSelector
            value={timeRange}
            onValueChange={setTimeRange}
          />
        </div>

        <FilterDropdown
          width="w-52"
          options={availableSources.map((source) => {
            return {
              key: source.displayName,
              display: (
                <>
                  <SourceIcon
                    sourceType={source.baseSourceType || source.internalName}
                    iconSize={16}
                  />
                  <span className="ml-2 text-sm">{source.displayName}</span>
                </>
              ),
            };
          })}
          selected={selectedSources.map((source) => source.displayName)}
          handleSelect={(option) =>
            handleSourceSelect(
              allSources.find((source) => source.displayName === option.key)!
            )
          }
          icon={
            <div className="my-auto mr-2 w-[16px] h-[16px]">
              <FiMap size={16} />
            </div>
          }
          defaultDisplay="All Sources"
        />
        {availableDocumentSets.length > 0 && (
          <FilterDropdown
            width="w-52"
            options={availableDocumentSets.map((documentSet) => {
              return {
                key: documentSet.name,
                display: (
                  <>
                    <div className="my-auto">
                      <FiBookmark />
                    </div>
                    <span className="ml-2 text-sm">{documentSet.name}</span>
                  </>
                ),
              };
            })}
            selected={selectedDocumentSets}
            handleSelect={(option) => handleDocumentSetSelect(option.key)}
            icon={
              <div className="my-auto mr-2 w-[16px] h-[16px]">
                <FiBook size={16} />
              </div>
            }
            defaultDisplay="All Document Sets"
          />
        )}
      </div>

      <div className="flex  mt-2">
        <div className="flex flex-wrap gap-x-2">
          {timeRange && timeRange.selectValue && (
            <SelectedBubble onClick={() => setTimeRange(null)}>
              <div className="text-sm flex">{timeRange.selectValue}</div>
            </SelectedBubble>
          )}
          {existingSources.length > 0 &&
            selectedSources.map((source) => (
              <SelectedBubble
                key={source.internalName}
                onClick={() => handleSourceSelect(source)}
              >
                <>
                  <SourceIcon
                    sourceType={source.baseSourceType || source.internalName}
                    iconSize={16}
                  />
                  <span className="ml-2 text-sm">{source.displayName}</span>
                </>
              </SelectedBubble>
            ))}
          {selectedDocumentSets.length > 0 &&
            selectedDocumentSets.map((documentSetName) => (
              <SelectedBubble
                key={documentSetName}
                onClick={() => handleDocumentSetSelect(documentSetName)}
              >
                <>
                  <div>
                    <FiBookmark />
                  </div>
                  <span className="ml-2 text-sm">{documentSetName}</span>
                </>
              </SelectedBubble>
            ))}
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/filters/TimeRangeSelector.tsx
================================================
import { DefaultDropdownElement } from "../Dropdown";

export function TimeRangeSelector({
  value,
  onValueChange,
  className,
  timeRangeValues,
}: {
  value: any;
  onValueChange: any;
  className: any;

  timeRangeValues: { label: string; value: Date }[];
}) {
  return (
    <div className={className}>
      {timeRangeValues.map((timeRangeValue) => (
        <DefaultDropdownElement
          key={timeRangeValue.label}
          name={timeRangeValue.label}
          onSelect={() =>
            onValueChange({
              to: new Date(),
              from: timeRangeValue.value,
              selectValue: timeRangeValue.label,
            })
          }
          isSelected={value?.selectValue === timeRangeValue.label}
        />
      ))}
    </div>
  );
}


================================================
FILE: web/src/components/header/AnnouncementBanner.tsx
================================================
"use client";
import { useState, useEffect, useContext } from "react";
import { CustomTooltip } from "../tooltip/CustomTooltip";
import { SettingsContext } from "@/providers/SettingsProvider";
import Link from "next/link";
import type { Route } from "next";
import Cookies from "js-cookie";
import { SvgX } from "@opal/icons";
const DISMISSED_NOTIFICATION_COOKIE_PREFIX = "dismissed_notification_";
const COOKIE_EXPIRY_DAYS = 1;

export function AnnouncementBanner() {
  const settings = useContext(SettingsContext);
  const [localNotifications, setLocalNotifications] = useState(
    settings?.settings.notifications || []
  );

  useEffect(() => {
    const filteredNotifications = (
      settings?.settings.notifications || []
    ).filter(
      (notification) =>
        !Cookies.get(
          `${DISMISSED_NOTIFICATION_COOKIE_PREFIX}${notification.id}`
        )
    );
    setLocalNotifications(filteredNotifications);
  }, [settings?.settings.notifications]);

  if (!localNotifications || localNotifications.length === 0) return null;

  const handleDismiss = async (notificationId: number) => {
    try {
      const response = await fetch(
        `/api/notifications/${notificationId}/dismiss`,
        {
          method: "POST",
        }
      );
      if (response.ok) {
        Cookies.set(
          `${DISMISSED_NOTIFICATION_COOKIE_PREFIX}${notificationId}`,
          "true",
          { expires: COOKIE_EXPIRY_DAYS }
        );
        setLocalNotifications((prevNotifications) =>
          prevNotifications.filter(
            (notification) => notification.id !== notificationId
          )
        );
      } else {
        console.error("Failed to dismiss notification");
      }
    } catch (error) {
      console.error("Error dismissing notification:", error);
    }
  };

  return (
    <>
      {localNotifications
        .filter((notification) => !notification.dismissed)
        .map((notification) => {
          return (
            <div
              key={notification.id}
              className="absolute top-0 left-1/2 transform -translate-x-1/2 bg-blue-600 rounded-sm text-white px-4 pr-8 py-3 mx-auto"
            >
              {notification.notif_type == "reindex" ? (
                <p className="text-center">
                  Your index is out of date - we strongly recommend updating
                  your search settings.{" "}
                  <Link
                    href={"/admin/configuration/search"}
                    className="ml-2 underline cursor-pointer"
                  >
                    Update here
                  </Link>
                </p>
              ) : notification.notif_type == "two_day_trial_ending" ? (
                <p className="text-center">
                  Your trial is ending soon - submit your billing information to
                  continue using Onyx.{" "}
                  <Link
                    href={"/admin/billing" as Route}
                    className="ml-2 underline cursor-pointer"
                  >
                    Update here
                  </Link>
                </p>
              ) : null}
              <button
                onClick={() => handleDismiss(notification.id)}
                className="absolute top-0 right-0 mt-2 mr-2"
                aria-label="Dismiss"
              >
                <CustomTooltip showTick citation delay={100} content="Dismiss">
                  <SvgX className="stroke-text-04 h-5 w-5" />
                </CustomTooltip>
              </button>
            </div>
          );
        })}
    </>
  );
}


================================================
FILE: web/src/components/header/HeaderTitle.tsx
================================================
"use client";

import React, { JSX } from "react";

export function HeaderTitle({
  children,
  backgroundToggled,
}: {
  children: JSX.Element | string;
  backgroundToggled?: boolean;
}) {
  const isString = typeof children === "string";
  const textSize =
    isString && children.length > 10
      ? "text-lg pb-[4px] "
      : "pb-[2px] text-2xl";

  return (
    <h1
      className={`${textSize} ${
        backgroundToggled
          ? "text-text-sidebar-toggled-header"
          : "text-text-sidebar-header"
      } break-words dark:text-[#fff] text-left line-clamp-2 ellipsis text-strong overflow-hidden leading-none font-bold`}
    >
      {children}
    </h1>
  );
}


================================================
FILE: web/src/components/icons/DynamicFaIcon.tsx
================================================
import React from "react";
import { IconBaseProps, IconType } from "react-icons";
import { FaQuestion } from "react-icons/fa";

interface DynamicIconProps extends IconBaseProps {
  name: string;
}

// Renders a FontAwesome icon dynamically based on the provided name
const DynamicFaIcon: React.FC<DynamicIconProps> = ({ name, ...props }) => {
  const IconComponent = getPreloadedIcon(name);
  return IconComponent ? (
    <IconComponent className="h-4 w-4" {...props} />
  ) : (
    <FaQuestion className="h-4 w-4" {...props} />
  );
};

// Cache for storing preloaded icons
const iconCache: Record<string, IconType> = {};

// Preloads icons asynchronously and stores them in the cache
export async function preloadIcons(iconNames: string[]): Promise<void> {
  const promises = iconNames.map(async (name) => {
    try {
      const iconModule = await import("react-icons/fa");
      const iconName = `Fa${
        name.charAt(0).toUpperCase() + name.slice(1)
      }` as keyof typeof iconModule;
      iconCache[name] = (iconModule[iconName] as IconType) || FaQuestion;
    } catch (error) {
      console.error(`Failed to load icon: ${name}`, error);
      iconCache[name] = FaQuestion;
    }
  });

  await Promise.all(promises);
}

// Retrieves a preloaded icon from the cache
export function getPreloadedIcon(name: string): IconType | undefined {
  return iconCache[name] || FaQuestion;
}

export default DynamicFaIcon;


================================================
FILE: web/src/components/icons/icons.test.tsx
================================================
/**
 * Icon Component Tests
 *
 * Tests logo icons to ensure they render correctly with proper accessibility
 * and support various display sizes.
 */
import React from "react";
import { SvgBifrost } from "@opal/icons";
import { render } from "@tests/setup/test-utils";
import { GithubIcon, GitbookIcon, ConfluenceIcon } from "./icons";

describe("Logo Icons", () => {
  test("renders with alt text", () => {
    const { container } = render(<GithubIcon />);
    const image = container.querySelector("img");

    expect(image).toBeInTheDocument();
    expect(image).toHaveAttribute("alt");
  });

  test("applies custom size", () => {
    const { container } = render(<GithubIcon size={48} />);
    const image = container.querySelector("img");

    expect(image).toHaveStyle({ width: "48px", height: "48px" });
  });

  test("applies size adjustments", () => {
    // ConfluenceIcon has a +4px size adjustment
    const { container } = render(<ConfluenceIcon size={16} />);
    const image = container.querySelector("img");

    // Base 16 + adjustment 4 = 20
    expect(image).toHaveStyle({ width: "20px", height: "20px" });
  });

  // This test is for icons that have light and dark variants (e.g. GitbookIcon)
  // Both exist in the DOM, one is hidden via CSS.
  test("renders both light and dark variants", () => {
    const { container } = render(<GitbookIcon />);
    const images = container.querySelectorAll("img");

    // Should render both light and dark variants in the DOM (one hidden via CSS)
    expect(images).toHaveLength(2);
    images.forEach((img) => {
      expect(img).toHaveAttribute("alt");
    });
  });

  test("accepts className and size props", () => {
    expect(() => {
      render(<GithubIcon size={100} className="custom-class" />);
    }).not.toThrow();
  });

  test("renders the Bifrost icon with theme-aware colors", () => {
    const { container } = render(
      <SvgBifrost size={32} className="custom text-red-500 dark:text-black" />
    );
    const icon = container.querySelector("svg");

    expect(icon).toBeInTheDocument();
    expect(icon).toHaveClass("custom", "text-[#33C19E]", "dark:text-white");
    expect(icon).not.toHaveClass("text-red-500", "dark:text-black");
  });
});


================================================
FILE: web/src/components/icons/icons.tsx
================================================
"use client";

import { JSX } from "react";
import Image from "next/image";
import { StaticImageData } from "next/image";
import { BrainIcon as Brain } from "@phosphor-icons/react";
import {
  FiAlertCircle,
  FiAlertTriangle,
  FiChevronDown,
  FiChevronsDown,
  FiChevronsUp,
  FiClipboard,
  FiCpu,
  FiDatabase,
  FiEdit2,
  FiFile,
  FiGlobe,
  FiInfo,
  FiMail,
} from "react-icons/fi";
import { FaRobot } from "react-icons/fa";
import { SiBookstack } from "react-icons/si";
import axeroImage from "@public/Axero.jpeg";
import airtableIcon from "@public/Airtable.svg";
import amazonSVG from "@public/Amazon.svg";
import anthropicSVG from "@public/Anthropic.svg";
import asanaIcon from "@public/Asana.png";
import azureIcon from "@public/Azure.png";
import bitbucketIcon from "@public/Bitbucket.svg";
import clickupIcon from "@public/Clickup.svg";
import codaIcon from "@public/Coda.png";
import cohereIcon from "@public/Cohere.svg";
import confluenceSVG from "@public/Confluence.svg";
import deepseekSVG from "@public/Deepseek.svg";
import discordIcon from "@public/discord.png";
import discourseIcon from "@public/Discourse.png";
import document360Icon from "@public/Document360.png";
import dropboxIcon from "@public/Dropbox.png";
import drupalwikiIcon from "@public/DrupalWiki.png";
import egnyteIcon from "@public/Egnyte.png";
import elevenLabsDarkSVG from "@public/ElevenLabsDark.svg";
import elevenLabsSVG from "@public/ElevenLabs.svg";
import firefliesIcon from "@public/Fireflies.png";
import freshdeskIcon from "@public/Freshdesk.png";
import geminiSVG from "@public/Gemini.svg";
import gitbookDarkIcon from "@public/GitBookDark.png";
import gitbookLightIcon from "@public/GitBookLight.png";
import githubLightIcon from "@public/Github.png";
import gongIcon from "@public/Gong.png";
import googleIcon from "@public/Google.png";
import googleCloudStorageIcon from "@public/GoogleCloudStorage.png";
import googleSitesIcon from "@public/GoogleSites.png";
import guruIcon from "@public/Guru.svg";
import highspotIcon from "@public/Highspot.png";
import hubSpotIcon from "@public/HubSpot.png";
import jiraSVG from "@public/Jira.svg";
import kimiIcon from "@public/Kimi.png";
import linearIcon from "@public/Linear.png";
import litellmIcon from "@public/litellm.png";
import lmStudioIcon from "@public/lm_studio.png";
import mediawikiIcon from "@public/MediaWiki.svg";
import metaSVG from "@public/Meta.svg";
import microsoftIcon from "@public/microsoft.png";
import microsoftSVG from "@public/Microsoft.svg";
import mistralSVG from "@public/Mistral.svg";
import mixedBreadSVG from "@public/Mixedbread.png";
import nomicSVG from "@public/nomic.svg";
import OCIStorageSVG from "@public/OCI.svg";
import ollamaIcon from "@public/Ollama.png";
import openAISVG from "@public/Openai.svg";
import openSourceIcon from "@public/OpenSource.png";
import outlinePNG from "@public/Outline.png";
import qwenSVG from "@public/Qwen.svg";
import r2Icon from "@public/r2.png";
import s3Icon from "@public/S3.png";
import salesforceIcon from "@public/Salesforce.png";
import sharepointIcon from "@public/Sharepoint.png";
import slackIcon from "@public/Slack.png";
import teamsIcon from "@public/Teams.png";
import outlookIcon from "@public/Outlook.png";
import oneDriveIcon from "@public/OneDrive.png";
import boxIcon from "@public/Box.png";
import trelloIcon from "@public/Trello.png";
import serviceNowIcon from "@public/Servicenow.png";
import wikipediaIcon from "@public/Wikipedia.png";
import xenforoIcon from "@public/Xenforo.svg";
import zAIIcon from "@public/Z_AI.png";
import zendeskIcon from "@public/Zendesk.svg";
import zulipIcon from "@public/Zulip.png";
import testrailSVG from "@public/Testrail.svg";
import gitlabIcon from "@public/Gitlab.png";
import gmailIcon from "@public/Gmail.png";
import googleDriveIcon from "@public/GoogleDrive.png";
import loopioIcon from "@public/Loopio.png";
import notionIcon from "@public/Notion.png";
import productboardIcon from "@public/Productboard.png";
import slabLogoIcon from "@public/SlabLogo.png";

export interface IconProps {
  size?: number;
  className?: string;
}
export interface LogoIconProps extends IconProps {
  src: string | StaticImageData;
}
export type OnyxIconType = (props: IconProps) => JSX.Element;

export const defaultTailwindCSS = "my-auto flex flex-shrink-0 text-default";
export const defaultTailwindCSSBlue = "my-auto flex flex-shrink-0 text-link";

export const LogoIcon = ({
  size = 16,
  className = defaultTailwindCSS,
  src,
}: LogoIconProps) => (
  <Image
    style={{ width: `${size}px`, height: `${size}px` }}
    className={`w-[${size}px] h-[${size}px] object-contain ` + className}
    src={src}
    alt="Logo"
    width="96"
    height="96"
  />
);

// Helper to create simple icon components from react-icon libraries
export function createIcon(
  IconComponent: React.ComponentType<{ size?: number; className?: string }>
) {
  function IconWrapper({
    size = 16,
    className = defaultTailwindCSS,
  }: IconProps) {
    return <IconComponent size={size} className={className} />;
  }

  IconWrapper.displayName = `Icon(${
    IconComponent.displayName || IconComponent.name || "Component"
  })`;
  return IconWrapper;
}

/**
 * Creates a logo icon component that automatically supports dark mode adaptations.
 *
 * Depending on the options provided, the returned component handles:
 * 1. Light/Dark variants: If both `src` and `darkSrc` are provided, displays the
 *    appropriate image based on the current color theme.
 * 2. Monochromatic inversion: If `monochromatic` is true, applies a CSS color inversion
 *    in dark mode for a monochrome icon appearance.
 * 3. Static icon: If only `src` is provided, renders the image without dark mode adaptation.
 *
 * @param src - The image or SVG source used for the icon (light/default mode).
 * @param options - Optional settings:
 *   - darkSrc: The image or SVG source used specifically for dark mode.
 *   - monochromatic: If true, applies a CSS inversion in dark mode for monochrome logos.
 *   - sizeAdjustment: Number to add to the icon size (e.g., 4 to make icon larger).
 *   - classNameAddition: Additional CSS classes to apply (e.g., '-m-0.5' for margin).
 * @returns A React functional component that accepts {@link IconProps} and renders
 *          the logo with dark mode handling as needed.
 */
const createLogoIcon = (
  src: string | StaticImageData,
  options?: {
    darkSrc?: string | StaticImageData;
    monochromatic?: boolean;
    sizeAdjustment?: number;
    classNameAddition?: string;
  }
) => {
  const {
    darkSrc,
    monochromatic,
    sizeAdjustment = 0,
    classNameAddition = "",
  } = options || {};

  const LogoIconWrapper = ({
    size = 16,
    className = defaultTailwindCSS,
  }: IconProps) => {
    const adjustedSize = size + sizeAdjustment;

    // Build className dynamically (only apply monochromatic if no darkSrc)
    const monochromaticClass = !darkSrc && monochromatic ? "dark:invert" : "";
    const finalClassName = [className, classNameAddition, monochromaticClass]
      .filter(Boolean)
      .join(" ");

    // If darkSrc is provided, use CSS-based dark mode switching
    // This avoids hydration issues and content flashing since next-themes
    // sets the .dark class before React hydrates
    if (darkSrc) {
      return (
        <>
          <LogoIcon
            size={adjustedSize}
            className={`${finalClassName} dark:hidden`}
            src={src}
          />
          <LogoIcon
            size={adjustedSize}
            className={`${finalClassName} hidden dark:block`}
            src={darkSrc}
          />
        </>
      );
    }

    return (
      <LogoIcon size={adjustedSize} className={finalClassName} src={src} />
    );
  };

  LogoIconWrapper.displayName = "LogoIconWrapper";
  return LogoIconWrapper;
};

// ============================================================================
// GENERIC SVG COMPONENTS (sorted alphabetically)
// ============================================================================
export const AlertIcon = createIcon(FiAlertCircle);
export const ArtAsistantIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      viewBox="0 0 24 24"
      fill="none"
      xmlns="http://www.w3.org/2000/svg"
    >
      <path
        d="M12 1.5C9.98656 1.4999 8.01555 2.07871 6.32185 3.16743C4.62815 4.25616 3.28318 5.8089 2.44724 7.6406C1.6113 9.47231 1.31963 11.5057 1.60699 13.4986C1.89435 15.4914 2.74862 17.3596 4.068 18.8805L10.422 12.6285C10.8429 12.2144 11.4096 11.9824 12 11.9824C12.5904 11.9824 13.1571 12.2144 13.578 12.6285L19.932 18.8805C21.2514 17.3596 22.1056 15.4914 22.393 13.4986C22.6804 11.5057 22.3887 9.47231 21.5528 7.6406C20.7168 5.8089 19.3719 4.25616 17.6782 3.16743C15.9845 2.07871 14.0134 1.4999 12 1.5ZM12 22.5C14.5238 22.5042 16.9639 21.5952 18.87 19.941L12.525 13.6965C12.3848 13.5591 12.1963 13.4821 12 13.4821C11.8037 13.4821 11.6152 13.5591 11.475 13.6965L5.13 19.941C7.03607 21.5952 9.47619 22.5042 12 22.5ZM0 12C0 8.8174 1.26428 5.76516 3.51472 3.51472C5.76516 1.26428 8.8174 0 12 0C15.1826 0 18.2348 1.26428 20.4853 3.51472C22.7357 5.76516 24 8.8174 24 12C24 15.1826 22.7357 18.2348 20.4853 20.4853C18.2348 22.7357 15.1826 24 12 24C8.8174 24 5.76516 22.7357 3.51472 20.4853C1.26428 18.2348 0 15.1826 0 12ZM16.5 8.25C16.5 8.05109 16.421 7.86032 16.2803 7.71967C16.1397 7.57902 15.9489 7.5 15.75 7.5C15.5511 7.5 15.3603 7.57902 15.2197 7.71967C15.079 7.86032 15 8.05109 15 8.25C15 8.44891 15.079 8.63968 15.2197 8.78033C15.3603 8.92098 15.5511 9 15.75 9C15.9489 9 16.1397 8.92098 16.2803 8.78033C16.421 8.63968 16.5 8.44891 16.5 8.25ZM18 8.25C18 8.54547 17.9418 8.83806 17.8287 9.11104C17.7157 9.38402 17.5499 9.63206 17.341 9.84099C17.1321 10.0499 16.884 10.2157 16.611 10.3287C16.3381 10.4418 16.0455 10.5 15.75 10.5C15.4545 10.5 15.1619 10.4418 14.889 10.3287C14.616 10.2157 14.3679 10.0499 14.159 9.84099C13.9501 9.63206 13.7843 9.38402 13.6713 9.11104C13.5582 8.83806 13.5 8.54547 13.5 8.25C13.5 7.65326 13.7371 7.08097 14.159 6.65901C14.581 6.23705 15.1533 6 15.75 6C16.3467 6 16.919 6.23705 17.341 6.65901C17.7629 7.08097 18 7.65326 18 8.25Z"
        fill="currentColor"
      />
    </svg>
  );
};
export const BookmarkIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 16 16"
    >
      <path
        fill="currentColor"
        d="M3.75 2a.75.75 0 0 0-.75.75v10.5a.75.75 0 0 0 1.28.53L8 10.06l3.72 3.72a.75.75 0 0 0 1.28-.53V2.75a.75.75 0 0 0-.75-.75z"
      />
    </svg>
  );
};
export const BrainIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return <Brain size={size} className={className} />;
};
export const CPUIcon = createIcon(FiCpu);
export const DatabaseIcon = createIcon(FiDatabase);
export const CameraIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 14 14"
    >
      <g
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
      >
        <path d="M13.5 5a1 1 0 0 0-1-1h-2L9 2H5L3.5 4h-2a1 1 0 0 0-1 1v6a1 1 0 0 0 1 1h11a1 1 0 0 0 1-1z" />
        <path d="M7 9.75a2.25 2.25 0 1 0 0-4.5a2.25 2.25 0 0 0 0 4.5" />
      </g>
    </svg>
  );
};
export const Caret = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <path
        fill="currentColor"
        d="m12.37 15.835l6.43-6.63C19.201 8.79 18.958 8 18.43 8H5.57c-.528 0-.771.79-.37 1.205l6.43 6.63c.213.22.527.22.74 0Z"
      />
    </svg>
  );
};
export const CheckmarkIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <path
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
        strokeWidth="2"
        d="M20 6L9 17l-5-5"
      />
    </svg>
  );
};
export const ChevronDownIcon = createIcon(FiChevronDown);
export const ChevronsDownIcon = createIcon(FiChevronsDown);
export const ChevronsUpIcon = createIcon(FiChevronsUp);
export const ClipboardIcon = createIcon(FiClipboard);
export const DexpandTwoIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 14 14"
    >
      <path
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
        d="m.5 13.5l5-5m-4 0h4v4m8-12l-5 5m4 0h-4v-4"
      />
    </svg>
  );
};
export const DocumentIcon2 = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <path
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
        strokeWidth="1.5"
        d="M19.5 14.25v-2.625a3.375 3.375 0 0 0-3.375-3.375h-1.5A1.125 1.125 0 0 1 13.5 7.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H8.25m0 12.75h7.5m-7.5 3H12M10.5 2.25H5.625c-.621 0-1.125.504-1.125 1.125v17.25c0 .621.504 1.125 1.125 1.125h12.75c.621 0 1.125-.504 1.125-1.125V11.25a9 9 0 0 0-9-9Z"
      />
    </svg>
  );
};
export const DownloadCSVIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 14 14"
    >
      <path
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
        d="M.5 10.5v1a2 2 0 0 0 2 2h9a2 2 0 0 0 2-2v-1M4 6l3 3.5L10 6M7 9.5v-9"
      />
    </svg>
  );
};
export const EditIcon = createIcon(FiEdit2);
export const EmailIcon = createIcon(FiMail);

//  COMPANY LOGOS
export const ExpandTwoIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 14 14"
    >
      <path
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
        d="m8.5 5.5l5-5m-4 0h4v4m-8 4l-5 5m4 0h-4v-4"
      />
    </svg>
  );
};
export const FileIcon = createIcon(FiFile);
export const FileOptionIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      width="24"
      height="24"
      viewBox="0 0 24 24"
      fill="none"
      xmlns="http://www.w3.org/2000/svg"
    >
      <path
        d="M20.6801 7.02928C20.458 6.5654 20.1451 6.15072 19.76 5.80973L16.76 3.09074C16.0939 2.47491 15.2435 2.09552 14.3401 2.01115C14.2776 1.99628 14.2125 1.99628 14.15 2.01115H8.21008C7.54764 1.98307 6.88617 2.08698 6.26428 2.31683C5.64239 2.54667 5.07249 2.89785 4.58765 3.34995C4.10281 3.80205 3.71274 4.34605 3.44019 4.95025C3.16763 5.55445 3.01797 6.20679 3 6.86934V17.1655C3.03538 18.1647 3.36978 19.1303 3.95984 19.9375C4.5499 20.7448 5.36855 21.3566 6.31006 21.6939C6.92247 21.9253 7.57613 22.0274 8.22998 21.9937H15.79C16.4525 22.0218 17.1138 21.9179 17.7357 21.6881C18.3576 21.4582 18.9276 21.107 19.4125 20.6549C19.8973 20.2028 20.2874 19.6588 20.5599 19.0546C20.8325 18.4504 20.982 17.7981 21 17.1355V8.56872C21.0034 8.03873 20.8944 7.51404 20.6801 7.02928ZM16.0601 7.41915C15.9174 7.42047 15.7759 7.39353 15.6437 7.33986C15.5115 7.2862 15.3913 7.20687 15.2899 7.10649C15.1886 7.00611 15.1081 6.88664 15.0532 6.755C14.9983 6.62336 14.97 6.48215 14.97 6.33953V3.69052C15.63 3.85046 18.2 6.48947 18.76 6.92931C18.9256 7.06878 19.0675 7.23423 19.1801 7.41915H16.0601Z"
        fill="currentColor"
      />
    </svg>
  );
};
export const GlobeIcon = createIcon(FiGlobe);
export const GroupsIconSkeleton = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <g fill="none" stroke="currentColor" strokeWidth="1.5">
        <circle cx="9" cy="6" r="4" />
        <path strokeLinecap="round" d="M15 9a3 3 0 1 0 0-6" />
        <ellipse cx="9" cy="17" rx="7" ry="4" />
        <path
          strokeLinecap="round"
          d="M18 14c1.754.385 3 1.359 3 2.5c0 1.03-1.014 1.923-2.5 2.37"
        />
      </g>
    </svg>
  );
};
export const InfoIcon = createIcon(FiInfo);
export const MacIcon = ({
  size = 16,
  className = "my-auto flex flex-shrink-0 ",
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <path
        fill="currentColor"
        d="M6.5 4.5a2 2 0 0 1 2 2v2h-2a2 2 0 1 1 0-4Zm4 4v-2a4 4 0 1 0-4 4h2v3h-2a4 4 0 1 0 4 4v-2h3v2a4 4 0 1 0 4-4h-2v-3h2a4 4 0 1 0-4-4v2h-3Zm0 2h3v3h-3v-3Zm5-2v-2a2 2 0 1 1 2 2h-2Zm0 7h2a2 2 0 1 1-2 2v-2Zm-7 0v2a2 2 0 1 1-2-2h2Z"
      />
    </svg>
  );
};
export const NewChatIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      viewBox="0 0 20 20"
      fill="none"
      xmlns="http://www.w3.org/2000/svg"
    >
      <path
        d="M12.5 1.99982H6C3.79086 1.99982 2 3.79068 2 5.99982V13.9998C2 16.209 3.79086 17.9998 6 17.9998H14C16.2091 17.9998 18 16.209 18 13.9998V8.49982"
        stroke="currentColor"
        strokeLinecap="round"
      />
      <path
        d="M17.1471 5.13076C17.4492 4.82871 17.6189 4.41901 17.619 3.9918C17.6191 3.56458 17.4494 3.15484 17.1474 2.85271C16.8453 2.55058 16.4356 2.38082 16.0084 2.38077C15.5812 2.38071 15.1715 2.55037 14.8693 2.85242L11.0562 6.66651L7.24297 10.4806C7.1103 10.6129 7.01218 10.7758 6.95726 10.9549L6.20239 13.4418C6.18762 13.4912 6.18651 13.5437 6.19916 13.5937C6.21182 13.6437 6.23778 13.6894 6.27428 13.7258C6.31078 13.7623 6.35646 13.7881 6.40648 13.8007C6.45651 13.8133 6.509 13.8121 6.5584 13.7972L9.04585 13.0429C9.2248 12.9885 9.38766 12.891 9.52014 12.7589L17.1471 5.13076Z"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
      />
    </svg>
  );
};
export const NotebookIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <path
        fill="currentColor"
        d="M11.25 4.533A9.707 9.707 0 0 0 6 3a9.735 9.735 0 0 0-3.25.555a.75.75 0 0 0-.5.707v14.25a.75.75 0 0 0 1 .707A8.237 8.237 0 0 1 6 18.75c1.995 0 3.823.707 5.25 1.886V4.533Zm1.5 16.103A8.214 8.214 0 0 1 18 18.75c.966 0 1.89.166 2.75.47a.75.75 0 0 0 1-.708V4.262a.75.75 0 0 0-.5-.707A9.735 9.735 0 0 0 18 3a9.707 9.707 0 0 0-5.25 1.533v16.103Z"
      />
    </svg>
  );
};
export const NotebookIconSkeleton = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <path
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
        strokeWidth="1.5"
        d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"
      />
    </svg>
  );
};
export const OnyxIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      viewBox="0 0 56 56"
      fill="none"
      xmlns="http://www.w3.org/2000/svg"
    >
      <path
        fillRule="evenodd"
        clipRule="evenodd"
        d="M27.9998 0L10.8691 7.76944L27.9998 15.5389L45.1305 7.76944L27.9998 0ZM27.9998 40.4611L10.8691 48.2306L27.9998 56L45.1305 48.2306L27.9998 40.4611ZM48.2309 10.8691L56.0001 28.0003L48.2309 45.1314L40.4617 28.0003L48.2309 10.8691ZM15.5385 28.0001L7.76923 10.869L0 28.0001L7.76923 45.1313L15.5385 28.0001Z"
        fill="currentColor"
      />
    </svg>
  );
};
export const OnyxLogoTypeIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  const aspectRatio = 2640 / 733; // Calculate the aspect ratio of the original SVG
  const height = size / aspectRatio; // Calculate the height based on the aspect ratio

  return (
    <svg
      version="1.1"
      xmlns="http://www.w3.org/2000/svg"
      width={size}
      height={height}
      viewBox="0 0 2640 733"
      style={{ width: `${size}px`, height: `${height}px` }}
      className={`w-[${size}px] h-[${height}px] ` + className}
    >
      <path
        d="M0 0 C33.33 0 66.66 0 101 0 C116.0557783 38.21851415 116.0557783 38.21851415 122.9375 56 C123.73628808 58.06186447 124.53511763 60.12371288 125.33398438 62.18554688 C132.03825266 79.49433582 138.7196772 96.811945 145.40026855 114.12988281 C154.13201714 136.7646467 162.89604763 159.38677068 171.67333984 182.00390625 C175.22064578 191.14500491 178.76491946 200.28727941 182.30947876 209.42944336 C184.28953371 214.53634164 186.26977615 219.64316721 188.25 224.75 C189.04166757 226.79166632 189.83333424 228.83333298 190.625 230.875 C191.2128125 232.3909375 191.2128125 232.3909375 191.8125 233.9375 C193 237 194.1875 240.0625 195.375 243.125 C195.96267654 244.6405825 195.96267654 244.6405825 196.56222534 246.18678284 C197.35481547 248.23083697 198.14739765 250.27489418 198.93997192 252.31895447 C200.90494473 257.38661809 202.8700505 262.45423006 204.83544922 267.52172852 C208.56750336 277.14472477 212.29807286 286.76829035 216.02435303 296.39352417 C217.79675153 300.97152352 219.56985562 305.54924943 221.34301758 310.12695312 C222.19314439 312.32229107 223.04292035 314.51776492 223.89233398 316.71337891 C225.06537285 319.74545515 226.2395878 322.77707344 227.4140625 325.80859375 C227.93916916 327.16729836 227.93916916 327.16729836 228.47488403 328.55345154 C228.79464203 329.37813431 229.11440002 330.20281708 229.44384766 331.05249023 C229.72185211 331.77073471 229.99985657 332.48897919 230.2862854 333.22898865 C230.99698266 335.06102535 230.99698266 335.06102535 232 337 C246.56988259 292.81493181 261.13820056 248.62935087 275.6875 204.4375 C275.96156803 203.60505013 276.23563606 202.77260025 276.51800919 201.91492462 C279.01707729 194.32423703 281.51604488 186.73351636 284.01490784 179.14276123 C289.17792065 163.45917323 294.3418852 147.77589867 299.50601101 132.09267712 C302.59609693 122.70820373 305.68610876 113.32370595 308.77612305 103.93920898 C309.06425106 103.06415274 309.35237907 102.18909649 309.64923823 101.28752339 C311.15291036 96.72081077 312.65658203 92.154098 314.16025352 87.58738518 C316.88233858 79.32030053 319.60442781 71.05321725 322.32651675 62.78613389 C323.81240457 58.27343827 325.29829211 53.76074257 326.78417969 49.24804688 C327.37414551 47.45629883 327.96411133 45.66455078 328.55407715 43.87280273 C328.84746979 42.98175842 329.14086243 42.09071411 329.44314575 41.17266846 C333.96209717 27.44844564 338.48104858 13.72422282 343 0 C367.99937294 -0.58538238 392.99771191 -1.02550978 418.00257015 -1.2953043 C429.6146466 -1.42393963 441.22245313 -1.59910139 452.83178711 -1.88598633 C462.95725523 -2.13608217 473.07963139 -2.29665662 483.20810229 -2.35221237 C488.56749218 -2.38467268 493.91804247 -2.46022203 499.27458 -2.64325142 C532.79129476 -3.74490236 532.79129476 -3.74490236 540.42525101 2.4351387 C546.11597516 7.94072522 549.54345055 14.7896396 552.64213753 22.00372505 C554.56104941 26.23801807 557.12001289 29.84116876 559.875 33.5625 C560.74382604 34.81477537 561.60901068 36.06959387 562.46875 37.328125 C563.09330078 38.23401367 563.09330078 38.23401367 563.73046875 39.15820312 C565.94759045 42.37473507 568.16143251 45.59352145 570.375 48.8125 C571.04861572 49.79198608 571.04861572 49.79198608 571.73583984 50.79125977 C575.88812976 56.8318215 580.02612448 62.88212951 584.16357422 68.93286133 C587.60197218 73.96013108 591.04732974 78.98252305 594.5 84 C598.50958746 89.82679816 602.50756794 95.66143452 606.5 101.5 C618.66861604 119.30630862 618.66861604 119.30630862 631 137 C633.93322556 134.22886427 636.18623177 131.43481587 638.37109375 128.046875 C639.0165918 127.05220215 639.66208984 126.0575293 640.32714844 125.03271484 C641.37177246 123.407771 641.37177246 123.407771 642.4375 121.75 C643.18594341 120.594188 643.93498196 119.43876119 644.68457031 118.28369141 C645.87691047 116.4461905 647.06885448 114.60844385 648.25906372 112.76956177 C652.45578077 106.28625376 656.69365977 99.83021026 660.93508911 93.37609863 C662.7114261 90.67277816 664.48642899 87.96858282 666.26145935 85.2644043 C667.61103107 83.20841458 668.96077303 81.15253671 670.31054688 79.09667969 C672.96271903 75.05695522 675.61427434 71.01682617 678.265625 66.9765625 C678.69839752 66.31709122 679.13117004 65.65761993 679.57705688 64.97816467 C681.74925116 61.66806158 683.92132765 58.35788126 686.09326172 55.04760742 C690.15895391 48.85129621 694.22565727 42.65565607 698.29595947 36.46237183 C700.1986006 33.5672177 702.10066371 30.67168405 704.00268555 27.77612305 C704.90132401 26.4084212 705.80023926 25.04090117 706.69946289 23.67358398 C707.95401163 21.76593684 709.20756431 19.8576384 710.4609375 17.94921875 C711.55293981 16.28790204 712.64697455 14.62713955 713.79142761 13.00151062 C715.12629684 10.93344915 715.12629684 10.93344915 716.04707336 8.15730286 C717.64187545 4.52163394 718.9067278 2.5270255 722 0 C729.20177109 -2.08840248 737.15310869 -1.27358726 744.55859375 -1.07421875 C746.79057478 -1.05369084 749.02260553 -1.0380382 751.25465393 -1.02705383 C757.11679012 -0.98514255 762.97590648 -0.87717399 768.83685303 -0.75531006 C774.82246038 -0.64258465 780.808558 -0.59250224 786.79492188 -0.53710938 C798.53127155 -0.41929536 810.26538425 -0.23167118 822 0 C820.553704 3.75941534 818.91642122 6.80235597 816.5859375 10.09765625 C815.91151611 11.05768555 815.23709473 12.01771484 814.54223633 13.00683594 C813.82704834 14.01520508 813.11186035 15.02357422 812.375 16.0625 C810.9067251 18.15176282 809.43926664 20.24159965 807.97265625 22.33203125 C807.27543457 23.32412598 806.57821289 24.3162207 805.85986328 25.33837891 C803.18180282 29.17090911 800.59870181 33.0586811 798.0234375 36.9609375 C795.36316557 40.95648105 792.65120576 44.91561926 789.9375 48.875 C789.38537842 49.6805835 788.83325684 50.48616699 788.2644043 51.31616211 C787.41616089 52.55378296 787.41616089 52.55378296 786.55078125 53.81640625 C782.9003326 59.14263677 779.26155213 64.47677492 775.625 69.8125 C774.68829468 71.18668091 774.68829468 71.18668091 773.73266602 72.58862305 C771.82164232 75.39232337 769.91079192 78.19614172 768 81 C760.31372146 92.27869313 752.62415374 103.55504107 744.91357422 114.81713867 C740.76838789 120.87310547 736.63288078 126.93563003 732.5 133 C727.8399646 139.83788867 723.17469524 146.67212488 718.5 153.5 C713.24267506 161.1793747 707.99177778 168.86311662 702.75732422 176.55810547 C699.43988066 181.43315548 696.10700963 186.29662341 692.75 191.14453125 C691.71431885 192.64632935 691.71431885 192.64632935 690.65771484 194.1784668 C689.32948516 196.10380959 687.99666621 198.02599664 686.65869141 199.94458008 C686.06233887 200.81107178 685.46598633 201.67756348 684.8515625 202.5703125 C684.05999756 203.71137451 684.05999756 203.71137451 683.25244141 204.87548828 C681.95654959 207.07370477 681.15461584 208.44868613 681 211 C682.21715668 213.50960961 682.21715668 213.50960961 684.16796875 216.109375 C684.91224121 217.17607422 685.65651367 218.24277344 686.42333984 219.34179688 C686.83807556 219.92791748 687.25281128 220.51403809 687.68011475 221.11791992 C690.08556959 224.54795949 692.40329624 228.03742521 694.73828125 231.515625 C695.80171139 233.09175087 696.86569251 234.66750512 697.93017578 236.24291992 C698.48030334 237.05731537 699.03043091 237.87171082 699.597229 238.71078491 C700.44628815 239.96769638 700.44628815 239.96769638 701.3125 241.25 C702.50508189 243.01600738 703.69762671 244.78203979 704.89013672 246.5480957 C707.36330739 250.21070426 709.83663631 253.87320589 712.31005859 257.53564453 C715.83911575 262.76143727 719.36703433 267.98799773 722.89453125 273.21484375 C734.27251271 290.07392081 745.65406902 306.93057002 757.0425415 323.78256226 C765.76190859 336.68512283 774.4756335 349.59148371 783.18685913 362.49954224 C789.4720778 371.81275642 795.75900144 381.1248105 802.05078125 390.43359375 C802.62283295 391.2799942 803.19488464 392.12639465 803.78427124 392.9984436 C806.50812915 397.0285268 809.23231545 401.0583874 811.95703125 405.08789062 C813.83882414 407.87084107 815.7184406 410.65523891 817.59619141 413.44091797 C819.06398292 415.61794403 820.54021307 417.78797899 822.02490234 419.95361328 C822.57742676 420.76427246 823.12995117 421.57493164 823.69921875 422.41015625 C824.15514404 423.07281494 824.61106934 423.73547363 825.08081055 424.41821289 C826 426 826 426 826 428 C812.7186023 428.55849244 799.44003393 428.98365536 786.14924622 429.24349213 C779.97546122 429.36828224 773.80975625 429.5371114 767.640625 429.81054688 C724.95337975 431.65373323 724.95337975 431.65373323 716.75460434 424.41446114 C712.04037734 419.35447539 709.3798801 413.28122189 707.07299805 406.84484863 C705.66007111 403.09874371 703.78648606 400.42885338 701.359375 397.2578125 C700.75101807 396.39043457 700.14266113 395.52305664 699.51586914 394.62939453 C698.93313232 393.80294434 698.35039551 392.97649414 697.75 392.125 C692.61911083 384.79623086 687.57133526 377.42388622 682.63208008 369.96484375 C678.44807504 363.6523053 674.20014438 357.38466408 669.9375 351.125 C669.22142578 350.073125 668.50535156 349.02125 667.76757812 347.9375 C666.27717369 345.74859181 664.78664067 343.55977184 663.29589844 341.37109375 C650.09138869 321.98362059 636.97651467 302.54099407 624 283 C620.63667007 286.25172952 618.2102807 289.63210501 615.8125 293.625 C611.27686126 301.02497708 606.57303832 308.30539272 601.8125 315.5625 C601.07558406 316.68651154 600.33868638 317.81053504 599.60180664 318.93457031 C598.4901004 320.62998761 597.37820842 322.32528213 596.2658844 324.02029419 C592.58359576 329.63180255 588.91534424 335.25241454 585.25 340.875 C584.63568115 341.81714355 584.0213623 342.75928711 583.38842773 343.72998047 C577.781703 352.33079027 572.18575556 360.93858921 566.59277344 369.54833984 C564.98145814 372.02854037 563.36971782 374.50846414 561.7578125 376.98828125 C561.22019287 377.81537598 560.68257324 378.6424707 560.12866211 379.49462891 C559.04623436 381.15972763 557.96371539 382.82476707 556.88110352 384.48974609 C554.13288848 388.7166856 551.38679308 392.94497671 548.64453125 397.17578125 C543.54752725 405.03601526 538.4393602 412.88782053 533.25 420.6875 C532.83145752 421.31760986 532.41291504 421.94771973 531.98168945 422.59692383 C529.12379488 426.87620512 529.12379488 426.87620512 528 428 C524.8949275 428.09533653 521.81213542 428.12551126 518.70678711 428.11352539 C517.73128403 428.11367142 516.75578094 428.11381744 515.75071716 428.1139679 C512.50957047 428.11326833 509.26848046 428.10547391 506.02734375 428.09765625 C503.78674857 428.09579222 501.54615298 428.09436825 499.30555725 428.09336853 C493.3954049 428.08954213 487.48527695 428.0797108 481.57513428 428.06866455 C475.54944183 428.05845306 469.52374547 428.05387147 463.49804688 428.04882812 C451.66535384 428.03808864 439.83267894 428.02101076 428 428 C429.38759602 424.40646837 430.89676703 421.46843574 433.1328125 418.32421875 C433.76622559 417.42566162 434.39963867 416.52710449 435.05224609 415.60131836 C435.73625488 414.6397583 436.42026367 413.67819824 437.125 412.6875 C442.99768374 404.34860964 448.74795589 395.9440765 454.375 387.4375 C459.22154648 380.12019918 464.15866216 372.91643864 469.3125 365.8125 C473.01095468 360.63466345 476.4858559 355.30363446 480 350 C484.97233364 342.4956293 490.02639754 335.09874934 495.3125 327.8125 C499.01095468 322.63466345 502.4858559 317.30363446 506 312 C510.97233364 304.4956293 516.02639754 297.09874934 521.3125 289.8125 C525.01095468 284.63466345 528.4858559 279.30363446 532 274 C536.96973115 266.49955704 542.02030355 259.1059207 547.3046875 251.82421875 C551.75049226 245.60043968 555.91137952 239.17416483 560.1171875 232.78710938 C560.76171875 231.81193359 561.40625 230.83675781 562.0703125 229.83203125 C562.70614258 228.86990723 563.34197266 227.9077832 563.99707031 226.91650391 C566.71903045 222.95300613 569.65142849 219.15949055 572.62451172 215.3815918 C574.17664131 213.1750609 574.17664131 213.1750609 573.94384766 211.0390625 C572.66324387 208.27248134 571.07159907 205.87694269 569.34375 203.36328125 C568.7827359 202.54166252 568.7827359 202.54166252 568.21038818 201.70344543 C566.97900566 199.90268667 565.73951427 198.10766881 564.5 196.3125 C563.63580164 195.05167482 562.77220246 193.79043875 561.90917969 192.52880859 C560.13808681 189.94102732 558.36412106 187.35525348 556.58789062 184.77099609 C552.78670296 179.23720439 549.02026713 173.6798895 545.25 168.125 C537.51068925 156.74271199 529.75657459 145.37052708 522 134 C513.43731085 121.4477725 504.88308804 108.88986605 496.34082031 96.32373047 C489.2374038 85.87553272 482.11978682 75.43704638 475 65 C472.54146006 61.3959742 470.08312899 57.7918061 467.625 54.1875 C466.71524414 52.85380371 466.71524414 52.85380371 465.78710938 51.49316406 C462.33719761 46.43389556 458.89251854 41.37111171 455.453125 36.3046875 C454.68278621 35.17032452 453.91244119 34.03596578 453.14208984 32.90161133 C451.66751494 30.73020867 450.19405483 28.55806342 448.72119141 26.38549805 C445.20779569 21.21120144 441.66428168 16.06868536 438 11 C435.08364827 18.08863373 432.46200954 25.24489145 429.98046875 32.49609375 C429.60532639 33.58542709 429.23018402 34.67476044 428.84367371 35.79710388 C428.04037885 38.13038493 427.23816706 40.46403908 426.43695068 42.79803467 C424.3028893 49.01424033 422.16062809 55.22762113 420.01953125 61.44140625 C419.58991089 62.68893677 419.16029053 63.93646729 418.71765137 65.22180176 C414.96449345 76.11593334 411.17746429 86.99799684 407.375 97.875 C402.06285691 113.07626901 396.77421708 128.28553048 391.5 143.5 C386.28337043 158.54605188 381.05918969 173.58940169 375.8125 188.625 C375.51695221 189.4720047 375.22140442 190.3190094 374.91690063 191.19168091 C371.55933516 200.81292478 368.19842817 210.43299977 364.83585358 220.05249405 C362.28588913 227.3473167 359.73645342 234.64232401 357.1875 241.9375 C356.95257339 242.60984428 356.71764679 243.28218857 356.4756012 243.97490692 C349.06020798 265.19824903 341.68382863 286.43477977 334.33886719 307.68261719 C329.1266949 322.75869322 323.8879652 337.82528061 318.62036133 352.88208008 C315.27876751 362.43837041 311.96295893 372.00296953 308.67578125 381.578125 C286.91538109 444.91097327 286.91538109 444.91097327 276 463 C275.62584961 463.62052246 275.25169922 464.24104492 274.86621094 464.88037109 C264.62809287 481.55301222 251.42718504 495.89386772 237.94189453 509.96533203 C234.91302457 513.13890536 231.92528159 516.35029604 228.9375 519.5625 C224.13395104 524.71676262 219.32059266 529.86167518 214.5 535 C209.06399618 540.79503813 203.63494056 546.5964599 198.21679688 552.40820312 C195.22343867 555.61901301 192.22641742 558.82626083 189.22265625 562.02734375 C188.5994751 562.69241943 187.97629395 563.35749512 187.33422852 564.04272461 C186.15490895 565.30115732 184.974497 566.55856768 183.79272461 567.81469727 C183.25268799 568.39147217 182.71265137 568.96824707 182.15625 569.5625 C181.69025391 570.05878906 181.22425781 570.55507813 180.74414062 571.06640625 C179.01041791 572.88440751 179.01041791 572.88440751 177.90902519 574.69213963 C175.8242579 577.65667667 174.21232765 579.67328349 170.63210678 580.63813591 C167.68494552 580.86195595 164.80898166 580.86588305 161.85400391 580.79467773 C160.7412207 580.79569992 159.6284375 580.79672211 158.48193359 580.79777527 C154.81249657 580.79290759 151.14556352 580.73849841 147.4765625 580.68359375 C144.92924356 580.67053481 142.38190702 580.66057204 139.83456421 580.65357971 C133.82063293 580.62960552 127.80777254 580.57446934 121.79426992 580.50413328 C114.27317501 580.41808165 106.75193337 580.3798694 99.23046875 580.34179688 C85.81961581 580.27370262 72.41071531 580.13347157 59 580 C59 550.3 59 520.6 59 490 C102.6875 489.625 102.6875 489.625 116.40893555 489.52197266 C120.51884962 489.48199967 120.51884962 489.48199967 124.6287384 489.43954468 C126.43897042 489.42202512 128.2492402 489.40814891 130.05952454 489.39730835 C149.0629029 489.27840054 166.5526309 489.00809099 181 475 C194.03833167 460.33187687 198.69268726 438.59464263 204.69360352 420.32080078 C205.61021258 417.53517626 206.53373079 414.75186488 207.45752525 411.96861649 C209.36609119 406.21746858 211.26957235 400.46460988 213.16162109 394.70800781 C213.4935849 393.69925598 213.82554871 392.69050415 214.16757202 391.65118408 C214.85646855 389.4571306 215.44224984 387.23100064 216 385 C194.88 385 173.76 385 152 385 C141.07422789 357.73568794 130.22141622 330.44397546 119.42333984 303.12890625 C115.84670474 294.08207047 112.26544029 285.03706728 108.68447876 275.99194336 C106.70612518 270.9947057 104.72807827 265.99734665 102.75 261 C101.9583348 258.99999942 101.16666813 256.99999942 100.375 255 C79 201 79 201 57.625 147 C57.23311493 146.00997482 56.84122986 145.01994965 56.43746948 143.99992371 C55.64590381 142.00017831 54.85433887 140.00043262 54.06277466 138.00068665 C52.08206882 132.99678884 50.1013502 127.99289609 48.12060547 122.98901367 C44.57655061 114.03576268 41.03256326 105.08248502 37.48910522 96.1289978 C36.31684935 93.16703084 35.14450913 90.20509727 33.97216797 87.24316406 C32.41610715 83.31168029 30.86049218 79.38002051 29.30517578 75.44824219 C25.18391305 65.03169504 21.05482122 54.61830871 16.91510391 44.20908165 C15.209152 39.91857313 13.50497312 35.62735984 11.80047607 31.33627319 C10.855479 28.95880946 9.90956649 26.58170939 8.96270752 24.20498657 C7.64735757 20.90317604 6.33508189 17.60015896 5.0234375 14.296875 C4.62960876 13.31069687 4.23578003 12.32451874 3.83001709 11.30845642 C3.47022766 10.40067947 3.11043823 9.49290253 2.73974609 8.55761719 C2.42621185 7.76973816 2.11267761 6.98185913 1.78964233 6.17010498 C1 4 1 4 0 0 Z "
        fill="currentColor"
        transform="translate(1814,153)"
      />
      <path
        d="M0 0 C0.83810211 -0.00701431 1.67620422 -0.01402863 2.53970337 -0.02125549 C5.23095126 -0.03866647 7.92197909 -0.04510809 10.61328125 -0.046875 C11.53273529 -0.04754974 12.45218933 -0.04822449 13.39950562 -0.04891968 C27.10769148 -0.03292448 40.46897884 0.34679399 53.92578125 3.203125 C54.75158691 3.37473145 55.57739258 3.54633789 56.42822266 3.72314453 C104.10137996 13.9446933 148.92015889 40.48134523 178.92578125 79.203125 C179.34972168 79.74533691 179.77366211 80.28754883 180.21044922 80.84619141 C214.41902295 124.87863761 228.47978789 178.81199691 222.328125 234.04736328 C216.03894851 283.37618327 192.43364033 325.71878914 158.61669922 361.41503906 C156.70289786 363.4388163 154.80039938 365.47290577 152.8984375 367.5078125 C151.4911795 369.01069209 150.08362987 370.51329867 148.67578125 372.015625 C148.02633545 372.70906006 147.37688965 373.40249512 146.70776367 374.11694336 C142.79169615 378.27866642 138.80498502 382.35314517 134.73291016 386.36254883 C132.92273746 388.20622512 131.22818343 390.1135503 129.55078125 392.078125 C126.80956676 395.26995009 123.91892237 398.24615477 120.92578125 401.203125 C117.99459934 404.09904458 115.16174951 407.00555071 112.48828125 410.140625 C108.70337756 414.57591787 104.54438343 418.61510611 100.37890625 422.6875 C97.94357318 424.99986008 97.94357318 424.99986008 96.62890625 427.31640625 C93.86386929 430.37950912 90.26049646 431.30794193 86.42578125 432.640625 C85.58136475 432.9406543 84.73694824 433.24068359 83.86694336 433.54980469 C62.3934153 441.0136629 40.93860537 444.75407348 18.19897461 444.59423828 C15.49216208 444.57814807 12.7868463 444.59421089 10.08007812 444.61328125 C-6.01970762 444.63290793 -21.99241242 442.8557798 -37.63671875 438.953125 C-38.9196582 438.63734497 -38.9196582 438.63734497 -40.22851562 438.31518555 C-73.32229062 429.88088614 -103.90200102 413.51357452 -128.91796875 390.20703125 C-130.92123799 388.34529733 -132.97995172 386.5858134 -135.07421875 384.828125 C-154.24034916 368.0002625 -168.35310867 345.10704354 -179.07421875 322.203125 C-179.59628906 321.11128906 -180.11835937 320.01945312 -180.65625 318.89453125 C-202.21048168 271.52071861 -202.79727805 213.17787182 -185.1484375 164.46484375 C-173.82008821 134.91287097 -155.84930804 107.17499298 -133.02246094 85.15234375 C-131.20497442 83.33394621 -129.53527624 81.46208081 -127.87109375 79.50390625 C-123.31051112 74.2512437 -118.35061936 69.42985833 -113.390625 64.55859375 C-110.00540165 61.21009977 -106.70976617 57.84651831 -103.60546875 54.234375 C-98.95475676 48.82891889 -93.82732379 43.910037 -88.74609375 38.9140625 C-83.96514754 34.18190146 -79.43948442 29.32013896 -75.07421875 24.203125 C-58.15046977 4.62939398 -24.77048816 0.13949758 0 0 Z M-81.671875 121.2109375 C-94.10751856 136.69988541 -102.42216287 153.91873058 -107.07421875 173.203125 C-107.39390625 174.45867187 -107.71359375 175.71421875 -108.04296875 177.0078125 C-112.75673366 198.0535558 -112.41571877 224.33468929 -107.07421875 245.203125 C-106.88617676 246.00073242 -106.69813477 246.79833984 -106.50439453 247.62011719 C-100.71613874 271.9059255 -89.23935154 293.26409368 -73.07421875 312.203125 C-72.04748047 313.41355469 -72.04748047 313.41355469 -71 314.6484375 C-47.9494122 341.05108564 -16.13936948 358.84287521 19.16259766 361.36279297 C46.59081946 362.79636518 74.17716739 354.47585384 94.92578125 336.203125 C95.74433594 335.52765625 96.56289062 334.8521875 97.40625 334.15625 C122.65186519 312.70726221 135.23054033 279.47024623 137.92578125 247.203125 C140.27625709 205.810847 128.67037622 165.12304253 100.8671875 133.6796875 C97.94939935 130.45922612 94.94396318 127.329408 91.92578125 124.203125 C91.04019531 123.22279297 91.04019531 123.22279297 90.13671875 122.22265625 C68.61718618 98.99259674 35.0082485 85.21932478 4.04296875 82.9140625 C-29.14695206 82.12553306 -59.95156963 96.35755729 -81.671875 121.2109375 Z "
        fill="currentColor"
        transform="translate(1161.07421875,144.796875)"
      />
      <path
        d="M0 0 C0.74490189 -0.00988449 1.48980377 -0.01976898 2.25727844 -0.029953 C50.02685431 -0.53245128 90.51113055 12.36122444 125.65625 45.61328125 C146.70645987 67.4971628 159.37409264 96.78597762 165.71875 126.11328125 C165.91766846 127.02053955 166.11658691 127.92779785 166.3215332 128.86254883 C170.15662585 147.76819 170.25182679 166.81619532 170.19580078 186.02832031 C170.1941145 188.77015288 170.195468 191.51198208 170.19668579 194.2538147 C170.19772564 200.1150116 170.19154441 205.97616338 170.18021011 211.83734894 C170.16383422 220.31123842 170.15873487 228.78510646 170.15623413 237.25901005 C170.1518758 251.01318047 170.13861771 264.76732693 170.11962891 278.52148438 C170.10122157 291.86848838 170.08708894 305.21548584 170.07861328 318.5625 C170.07808783 319.3876303 170.07756239 320.2127606 170.07702102 321.06289485 C170.07441061 325.20305393 170.07188327 329.34321306 170.06939721 333.48337221 C170.0487228 367.7350298 170.01188474 401.98664275 169.96875 436.23828125 C140.59875 436.23828125 111.22875 436.23828125 80.96875 436.23828125 C80.95416748 428.65384033 80.93958496 421.06939941 80.92456055 413.25512695 C80.87567801 388.91549748 80.8122347 364.5759265 80.73976372 340.23635652 C80.72849718 336.4480173 80.71735847 332.65967772 80.70629883 328.87133789 C80.7029933 327.74017644 80.7029933 327.74017644 80.699621 326.58616325 C80.66436498 314.41742491 80.6394351 302.24868662 80.61840298 290.07991634 C80.59654727 277.56940716 80.56340542 265.05896355 80.52004844 252.54851025 C80.49369883 244.84508894 80.47588106 237.14174914 80.46940396 229.43828508 C80.46346619 223.5053007 80.44173435 217.57243578 80.41447449 211.6395092 C80.4057624 209.22466949 80.4019573 206.80980564 80.40374184 204.39495087 C80.4208244 170.57795098 75.59013237 132.77364554 50.84375 107.36328125 C34.74804984 91.64278331 13.31965625 84.3749888 -8.90625 83.86328125 C-34.71741699 84.44989868 -58.6296742 92.74617622 -77.03125 111.23828125 C-97.237198 132.83991951 -104.16021891 163.32200101 -104.18946838 192.12161255 C-104.19440636 193.17146265 -104.19934434 194.22131275 -104.20443195 195.30297649 C-104.21970325 198.7958956 -104.22813927 202.28879648 -104.23657227 205.78173828 C-104.24615895 208.29549823 -104.25616949 210.8092566 -104.26657104 213.32301331 C-104.28796255 218.72218414 -104.30642312 224.12135215 -104.32217598 229.52054214 C-104.34495666 237.32671637 -104.37308777 245.13286423 -104.40249526 252.93901597 C-104.45007113 265.60614219 -104.49336218 278.27327924 -104.53393555 290.94042969 C-104.57334155 303.24024294 -104.61477987 315.54004659 -104.65893555 327.83984375 C-104.66165982 328.59890039 -104.66438409 329.35795702 -104.66719092 330.14001538 C-104.68087124 333.94824732 -104.69459134 337.75647912 -104.70833123 341.56471086 C-104.82213871 373.12254876 -104.928189 404.68040597 -105.03125 436.23828125 C-134.73125 436.23828125 -164.43125 436.23828125 -195.03125 436.23828125 C-195.03125 294.99828125 -195.03125 153.75828125 -195.03125 8.23828125 C-165.33125 8.23828125 -135.63125 8.23828125 -105.03125 8.23828125 C-104.70125 27.37828125 -104.37125 46.51828125 -104.03125 66.23828125 C-100.56625 62.27828125 -100.56625 62.27828125 -97.03125 58.23828125 C-95.71682638 56.88630267 -94.3877664 55.54802123 -93.03125 54.23828125 C-90.43142817 51.67837991 -87.96856602 49.07048585 -85.59375 46.30078125 C-83.02459729 43.31026409 -80.33807744 40.50711441 -77.53125 37.73828125 C-74.12798063 34.381002 -70.9630553 30.91038954 -67.85546875 27.28125 C-65.09061865 24.1848547 -62.12088088 21.30582671 -59.14453125 18.4140625 C-56.95404037 16.28178654 -56.95404037 16.28178654 -55.265625 13.66015625 C-52.22091621 10.35994742 -49.19424393 9.4613871 -44.96875 8.17578125 C-43.7923999 7.80126831 -43.7923999 7.80126831 -42.59228516 7.41918945 C-28.8051804 3.17101218 -14.455506 0.15855111 0 0 Z "
        fill="currentColor"
        transform="translate(1630.03125,144.76171875)"
      />
      <path
        d="M0 0 C2.38143646 0.95204965 4.66474801 1.93938584 6.9765625 3.03515625 C7.64632111 3.34503983 8.31607971 3.6549234 9.00613403 3.97419739 C10.45502737 4.64529835 11.90244088 5.31960167 13.34851074 5.99676514 C17.28525304 7.83974485 21.23314298 9.65859946 25.1796875 11.48046875 C25.99570435 11.85790222 26.81172119 12.23533569 27.65246582 12.62420654 C35.24684457 16.13041351 42.88818173 19.52557216 50.54144287 22.90084839 C64.74910513 29.16757199 78.87098464 35.60271852 92.95608521 42.1399231 C105.56993927 47.99156219 118.24093862 53.69850855 130.96044922 59.31640625 C141.80271707 64.10701193 152.57447648 69.02171692 163.30908203 74.05004883 C176.83057762 80.36079749 190.48865925 86.36485514 204.13589478 92.39733887 C209.77491735 94.89295962 215.39430951 97.4303158 221 100 C218.28771101 103.3023264 214.68401289 104.73818118 210.87890625 106.44921875 C210.15992157 106.77874039 209.44093689 107.10826202 208.70016479 107.44776917 C206.32395617 108.53442542 203.94335827 109.61107376 201.5625 110.6875 C199.8937747 111.44783214 198.22523897 112.20858047 196.55688477 112.96972656 C184.46077963 118.47742824 172.31861067 123.88115312 160.16287231 129.25570679 C149.25806702 134.07726202 138.42334852 139.02302429 127.62548828 144.08056641 C118.40661512 148.38323273 109.11928332 152.51689675 99.8125 156.625 C85.61505373 162.89310403 71.51530598 169.35473837 57.44631958 175.90567017 C43.06084311 182.6030892 28.62983404 189.18383697 14.11132812 195.58837891 C11.89012859 196.59614536 9.68730279 197.64481766 7.49804688 198.72021484 C6.34884766 199.28047363 5.19964844 199.84073242 4.015625 200.41796875 C2.98211914 200.93512451 1.94861328 201.45228027 0.88378906 201.98510742 C-4.62279823 203.92304201 -8.86034932 201.32394333 -13.87890625 198.96484375 C-14.5500798 198.65496017 -15.22125336 198.3450766 -15.9127655 198.02580261 C-17.36723373 197.35384474 -18.82063255 196.6795683 -20.27307129 196.00323486 C-24.22484546 194.16367398 -28.18489354 192.34208919 -32.14453125 190.51953125 C-33.37319412 189.95338104 -33.37319412 189.95338104 -34.62667847 189.37579346 C-42.23342215 185.87754887 -49.88260613 182.47916104 -57.54244995 179.09915161 C-71.74878884 172.83015312 -85.87122674 166.39716911 -99.95608521 159.8600769 C-112.56993927 154.00843781 -125.24093862 148.30149145 -137.96044922 142.68359375 C-148.80271707 137.89298807 -159.57447648 132.97828308 -170.30908203 127.94995117 C-183.83057762 121.63920251 -197.48865925 115.63514486 -211.13589478 109.60266113 C-216.77491735 107.10704038 -222.39430951 104.5696842 -228 102 C-228 101.34 -228 100.68 -228 100 C-208.96990129 91.17964828 -189.93885123 82.36818675 -170.78076172 73.82861328 C-161.47189338 69.67450975 -152.20210363 65.43917651 -142.9375 61.1875 C-131.07933174 55.74614873 -119.18362405 50.39867978 -107.25 45.125 C-93.78985391 39.17645246 -80.41172146 33.06564376 -67.0640564 26.86914062 C-53.2803913 20.47294082 -39.42766418 14.24567147 -25.51953125 8.125 C-24.451689 7.65401886 -24.451689 7.65401886 -23.36227417 7.17352295 C-22.00264694 6.57412276 -20.64226143 5.97643894 -19.28103638 5.38067627 C-15.88988516 3.88548315 -12.61399874 2.28385617 -9.37109375 0.4921875 C-5.40397836 -1.26382418 -4.14424383 -1.00602452 0 0 Z "
        fill="currentColor"
        transform="translate(370,530)"
      />
      <path
        d="M0 0 C0.66 0 1.32 0 2 0 C2.49177734 1.08901611 2.98355469 2.17803223 3.49023438 3.30004883 C5.82583784 8.47131121 8.16271888 13.64199569 10.5 18.8125 C11.1440831 20.23746796 11.1440831 20.23746796 11.80117798 21.69122314 C17.56255614 34.43493377 23.35557963 47.16343967 29.1875 59.875 C34.84426524 72.20531301 40.38209701 84.58434871 45.86605835 96.99237061 C51.491098 109.7096263 57.28584746 122.34285838 63.1399231 134.95608521 C68.98729551 147.56074197 74.69461887 160.22071051 80.30331421 172.93325806 C83.73255225 180.70451144 87.18470715 188.46070308 90.78125 196.15625 C91.08258347 196.80116898 91.38391693 197.44608795 91.69438171 198.11054993 C93.09709069 201.10689714 94.50811603 204.09885205 95.92895508 207.08666992 C96.89529608 209.13646028 97.84393722 211.19473257 98.77075195 213.26269531 C99.53558928 214.96584448 100.340312 216.65232793 101.20385742 218.30761719 C102.82112021 221.85443625 103.25171734 224.02496081 102.2644043 227.84936523 C100.89490649 231.28525401 99.33501246 234.56337031 97.6875 237.875 C97.08394375 239.14365358 96.48351527 240.41379921 95.88598633 241.68530273 C94.6079002 244.39657917 93.31518067 247.1001516 92.01171875 249.79931641 C88.97291652 256.11391361 86.08523772 262.49773801 83.18356323 268.87615967 C82.17658153 271.08646584 81.16519854 273.29470891 80.15258789 275.50244141 C75.39486895 285.8826618 70.74044648 296.30586887 66.125 306.75 C60.17346804 320.2168991 54.05840043 333.6011398 47.8600769 346.95608521 C42.01090383 359.56462351 36.30648975 372.23032779 30.69064331 384.94433594 C25.73533892 396.1584162 20.66458506 407.30938882 15.46435547 418.41210938 C13.01949874 423.63461434 10.60006476 428.86378165 8.265625 434.13671875 C7.84788818 435.07966797 7.43015137 436.02261719 6.99975586 436.99414062 C6.2317571 438.73639073 5.46917521 440.48104306 4.71313477 442.22851562 C2.21791802 447.89104099 2.21791802 447.89104099 0 449 C-0.49177734 447.91098389 -0.98355469 446.82196777 -1.49023438 445.69995117 C-3.82583784 440.52868879 -6.16271888 435.35800431 -8.5 430.1875 C-9.1440831 428.76253204 -9.1440831 428.76253204 -9.80117798 427.30877686 C-15.56255614 414.56506623 -21.35557963 401.83656033 -27.1875 389.125 C-32.84426524 376.79468699 -38.38209701 364.41565129 -43.86605835 352.00762939 C-49.491098 339.2903737 -55.28584746 326.65714162 -61.1399231 314.04391479 C-66.98706511 301.43975468 -72.69064335 288.77890485 -78.30331421 276.0687561 C-83.86081736 263.48797197 -89.5903795 250.99709236 -95.42578125 238.54296875 C-95.87518066 237.57149902 -96.32458008 236.6000293 -96.78759766 235.59912109 C-97.52203805 234.02464417 -98.27253158 232.45733968 -99.05224609 230.90478516 C-101.06073153 226.74847773 -101.058315 224.44564743 -100 220 C-98.9051726 217.32807666 -97.76859972 214.74954149 -96.52734375 212.1484375 C-96.17302277 211.38980408 -95.81870178 210.63117065 -95.4536438 209.84954834 C-94.68832412 208.21224451 -93.91982052 206.57642592 -93.14846802 204.94195557 C-91.07013421 200.536397 -89.01636484 196.11940553 -86.9609375 191.703125 C-86.54068802 190.80147614 -86.12043854 189.89982727 -85.68745422 188.97085571 C-81.70296337 180.4032542 -77.86973 171.77244911 -74.0625 163.125 C-67.89901873 149.14644535 -61.53024284 135.27083115 -55.08288574 121.42114258 C-49.55431376 109.54347648 -44.10418211 97.63853569 -38.77392578 85.67041016 C-34.6402172 76.39728479 -30.42243936 67.16472421 -26.1875 57.9375 C-19.69862154 43.79766493 -13.34130735 29.60226131 -7.05151367 15.37280273 C-6.56479942 14.27269363 -6.56479942 14.27269363 -6.06825256 13.15036011 C-5.46374124 11.78344765 -4.86011273 10.41614428 -4.2575531 9.04837036 C-2.90739365 5.99434024 -1.53001919 2.9685345 0 0 Z "
        fill="currentColor"
        transform="translate(630,142)"
      />
      <path
        d="M0 0 C1.13596367 0.53154945 1.13596367 0.53154945 2.2948761 1.07383728 C3.92670528 1.8383391 5.55663215 2.60691215 7.18481445 3.37915039 C11.57252393 5.45952627 15.97461317 7.50911219 20.375 9.5625 C21.27314423 9.98254303 22.17128845 10.40258606 23.09664917 10.83535767 C31.6971507 14.84948123 40.36487536 18.70248537 49.05078125 22.52734375 C62.43733488 28.43202972 75.73694697 34.50823224 89.00796509 40.66781616 C101.62145533 46.51926381 114.29209559 52.22601825 127.01123047 57.84375 C137.84988029 62.63275707 148.61877392 67.54432958 159.34887695 72.57299805 C170.87702047 77.95536258 182.50845376 83.10132797 194.1484375 88.23632812 C195.26818466 88.73107132 195.26818466 88.73107132 196.41055298 89.23580933 C197.80543467 89.85186781 199.20088087 90.46665049 200.59701538 91.0798645 C204.47199906 92.79105126 208.27717754 94.60307504 212.05078125 96.52734375 C209.59313167 99.35932021 207.05995942 100.60773538 203.64453125 102.1015625 C202.5268335 102.59567627 201.40913574 103.08979004 200.25756836 103.59887695 C199.65637177 103.86092926 199.05517517 104.12298157 198.4357605 104.39297485 C195.14623395 105.82723974 191.8669883 107.28475893 188.5859375 108.73828125 C187.89133926 109.04520401 187.19674103 109.35212677 186.48109436 109.66835022 C179.34665426 112.82575529 172.25712358 116.078926 165.1685791 119.33773804 C152.58690391 125.12051151 139.96611591 130.8053081 127.30078125 136.40234375 C113.83388215 142.35387571 100.44964145 148.46894332 87.09469604 154.66726685 C74.48659661 160.51623633 61.82221311 166.22268356 49.10745239 171.83569336 C40.21719767 175.76149695 31.35526206 179.73409599 22.55737305 183.86328125 C20.21016739 184.95764843 17.85494954 186.03502065 15.49145508 187.09375 C10.51434084 189.33336075 5.85003191 191.54497084 1.3671875 194.671875 C-3.30730917 197.72084874 -7.25844068 199.72627585 -12.94921875 199.52734375 C-23.91430744 196.67263963 -33.99692197 191.14492923 -44.03320312 186.00878906 C-51.25110541 182.33103456 -58.63040917 179.00552317 -66.00054932 175.64694214 C-68.46494274 174.52015431 -70.92435712 173.38316395 -73.38232422 172.24243164 C-84.14816407 167.24638345 -94.96523227 162.37963223 -105.82421875 157.58984375 C-119.8016631 151.42376281 -133.67832456 145.05761594 -147.52807617 138.61022949 C-159.38552399 133.0910683 -171.27090755 127.65184616 -183.21728516 122.32763672 C-192.30244078 118.2755355 -201.36266404 114.17084064 -210.41143799 110.03817749 C-211.95754069 109.33209836 -213.50388028 108.62653771 -215.05047607 107.92153931 C-229.86361928 101.16739681 -229.86361928 101.16739681 -236.94921875 97.52734375 C-235.40297635 94.43485896 -232.39497667 93.77224326 -229.34765625 92.46484375 C-228.65664825 92.16175293 -227.96564026 91.85866211 -227.25369263 91.54638672 C-225.74393793 90.88455358 -224.2329626 90.22549976 -222.72094727 89.56884766 C-218.60037978 87.77824555 -214.48968429 85.96528311 -210.37890625 84.15234375 C-209.52000793 83.77392334 -208.66110962 83.39550293 -207.77618408 83.00561523 C-198.61515962 78.9581325 -189.53116958 74.74850321 -180.44921875 70.52734375 C-167.01842822 64.29113497 -153.53427189 58.19365562 -139.98876953 52.2109375 C-129.12430747 47.41052541 -118.33093631 42.48485397 -107.57421875 37.4465332 C-97.61688065 32.79925504 -87.5769861 28.35132497 -77.52438354 23.91497803 C-62.64355788 17.34143391 -47.88025245 10.52383118 -33.13793945 3.64599609 C-32.29394592 3.25265076 -31.44995239 2.85930542 -30.5803833 2.45404053 C-29.00215581 1.71812923 -27.42451866 0.98095001 -25.84759521 0.24224854 C-24.47254039 -0.39926056 -23.09459654 -1.03463223 -21.71316528 -1.66229248 C-20.25193276 -2.33358828 -18.81033767 -3.04744289 -17.37475586 -3.77197266 C-11.1460286 -5.57131309 -5.51697816 -2.65206409 0 0 Z "
        fill="currentColor"
        transform="translate(378.94921875,4.47265625)"
      />
      <path
        d="M0 0 C3.16996538 2.64292957 4.58615167 5.99846112 6.23046875 9.68359375 C6.53747711 10.35750656 6.84448547 11.03141937 7.16079712 11.72575378 C8.17637287 13.96004264 9.18196727 16.19867546 10.1875 18.4375 C10.9047181 20.02088535 11.62241858 21.60405227 12.34057617 23.18701172 C17.557798 34.70815066 22.67921543 46.2717554 27.79125977 57.83984375 C33.68037773 71.1576613 39.77125553 84.37649769 45.91656494 97.57775879 C51.44532587 109.45578979 56.8956508 121.36108914 62.22607422 133.32958984 C66.02968777 141.86221489 69.90147058 150.36145132 73.79812622 158.85189819 C79.85693235 172.05571719 85.84366894 185.28973626 91.75 198.5625 C92.08842926 199.32144562 92.42685852 200.08039124 92.77554321 200.86233521 C93.10460663 201.60071625 93.43367004 202.33909729 93.77270508 203.09985352 C94.09215591 203.81593277 94.41160675 204.53201202 94.74073792 205.26979065 C95.35400396 206.65889757 95.95876734 208.05179432 96.55430603 209.44859314 C97.7667374 212.29076343 98.99862324 214.99771075 100.59277344 217.6484375 C102.08152337 220.2828054 102.78334997 221.96992389 103 225 C101.30365246 230.98647292 98.55822083 236.34364945 95.75 241.875 C94.95333102 243.50968153 94.16075039 245.14636126 93.37207031 246.78491211 C92.14947833 249.32297049 90.92414575 251.85931053 89.68548584 254.38957214 C85.82877172 262.27058347 82.25905167 270.2638989 78.74108887 278.30059814 C73.29272578 290.73506591 67.67777702 303.08043961 61.91943359 315.37451172 C57.61676727 324.59338488 53.48310325 333.88071668 49.375 343.1875 C43.49553804 356.50466014 37.45637596 369.74030384 31.3125 382.9375 C25.72169773 394.94841853 20.15935153 406.96859091 14.75 419.0625 C13.96882813 420.80793091 13.96882813 420.80793091 13.171875 422.58862305 C10.99165126 427.47731698 8.8303365 432.37253116 6.72265625 437.29296875 C6.22717285 438.44623657 6.22717285 438.44623657 5.72167969 439.62280273 C4.89684505 441.55265885 4.0825803 443.48702409 3.26953125 445.421875 C2 448 2 448 0 449 C-7.04441853 434.01047326 -13.93744829 418.96464382 -20.625 403.8125 C-26.50431543 390.49528108 -32.5434519 377.25960921 -38.6875 364.0625 C-44.62674464 351.30464185 -50.50063799 338.5258406 -56.22607422 325.67041016 C-60.35978255 316.39728534 -64.57752914 307.16470924 -68.8125 297.9375 C-75.12666444 284.17885184 -81.31788127 270.36887768 -87.42114258 256.51538086 C-87.73030106 255.81563644 -88.03945953 255.11589203 -88.35798645 254.39494324 C-88.9687589 253.01216297 -89.57766625 251.62855725 -90.18461609 250.24409485 C-91.79991779 246.58830932 -93.48746332 242.98697332 -95.28125 239.41503906 C-95.65664917 238.65667755 -96.03204834 237.89831604 -96.41882324 237.11697388 C-97.13902339 235.67054754 -97.87361421 234.23116094 -98.62512207 232.80075073 C-100.93690087 228.13205351 -101.37203683 225.20640287 -100 220 C-99.02397272 217.48373378 -98.01543578 215.10715429 -96.859375 212.6796875 C-96.54119812 211.99202301 -96.22302124 211.30435852 -95.89520264 210.59585571 C-94.85494485 208.35168255 -93.8023705 206.11351491 -92.75 203.875 C-91.6452649 201.49798118 -90.54383244 199.11944845 -89.44238281 196.74090576 C-88.69802847 195.13432195 -87.952581 193.52824417 -87.20605469 191.92266846 C-83.90135901 184.81214026 -80.68880328 177.66331233 -77.5 170.5 C-72.45966554 159.20686063 -67.32682016 147.95859258 -62.16937256 136.71862793 C-57.07024954 125.59897477 -52.0696608 114.4390986 -47.125 103.25 C-41.17615106 89.78917193 -35.06527632 76.41023562 -28.86813354 63.06204224 C-22.49936999 49.33854294 -16.29910784 35.54638709 -10.21289062 21.6953125 C-6.98775083 14.36937414 -3.69282015 7.10586496 0 0 Z "
        fill="currentColor"
        transform="translate(101,142)"
      />
    </svg>
  );
};
export const OpenIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 14 14"
    >
      <path
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
        d="M7 13.5a9.26 9.26 0 0 0-5.61-2.95a1 1 0 0 1-.89-1V1.5A1 1 0 0 1 1.64.51A9.3 9.3 0 0 1 7 3.43zm0 0a9.26 9.26 0 0 1 5.61-2.95a1 1 0 0 0 .89-1V1.5a1 1 0 0 0-1.14-.99A9.3 9.3 0 0 0 7 3.43z"
      />
    </svg>
  );
};
export const PaintingIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 36 36"
    >
      <path
        fill="currentColor"
        d="M32 4H4a2 2 0 0 0-2 2v24a2 2 0 0 0 2 2h28a2 2 0 0 0 2-2V6a2 2 0 0 0-2-2ZM8.92 8a3 3 0 1 1-3 3a3 3 0 0 1 3-3ZM6 27v-4.1l6-6.08a1 1 0 0 1 1.41 0L16 19.35L8.32 27Zm24 0H11.15l6.23-6.23l5.4-5.4a1 1 0 0 1 1.41 0L30 21.18Z"
      />
      <path fill="none" d="M0 0h36v36H0z" />
    </svg>
  );
};
export const PaintingIconSkeleton = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 14 14"
    >
      <g
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
      >
        <path d="M1.5 12h11a1 1 0 0 0 1-1V3a1 1 0 0 0-1-1h-11a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1" />
        <path d="M9.502 6.212a1.245 1.245 0 1 0 0-2.49a1.245 1.245 0 0 0 0 2.49M9.083 12a7.098 7.098 0 0 0-7.136-5.786A7.6 7.6 0 0 0 .5 6.349" />
        <path d="M13.5 8.94a7.716 7.716 0 0 0-5.506.225" />
      </g>
    </svg>
  );
};
export const QuestionMarkIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      viewBox="0 0 24 24"
      fill="none"
      stroke="currentColor"
      strokeWidth="2"
      strokeLinecap="round"
      strokeLinejoin="round"
    >
      <circle cx="12" cy="12" r="10" />
      <path d="M9.09 9a3 3 0 0 1 5.83 1c0 2-3 3-3 3" />
      <line x1="12" y1="17" x2="12.01" y2="17" />
    </svg>
  );
};
export const RobotIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return <FaRobot size={size} className={className} />;
};
export const SwapIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <g
        fill="none"
        stroke="currentColor"
        strokeLinecap="round"
        strokeLinejoin="round"
        strokeWidth="1.5"
      >
        <path d="M3.53 11.47v2.118a4.235 4.235 0 0 0 4.235 4.236H20.47M3.53 6.176h12.705a4.235 4.235 0 0 1 4.236 4.236v2.117" />
        <path d="m17.294 14.647l3.177 3.176L17.294 21M6.706 9.353L3.529 6.176L6.706 3" />
      </g>
    </svg>
  );
};
export const TriangleAlertIcon = createIcon(FiAlertTriangle);
export const UsersIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 16 16"
    >
      <path
        fill="currentColor"
        d="M8 8a3 3 0 1 0 0-6a3 3 0 0 0 0 6m4.735 6c.618 0 1.093-.561.872-1.139a6.002 6.002 0 0 0-11.215 0c-.22.578.254 1.139.872 1.139z"
      />
    </svg>
  );
  // return <FiUser size={size} className={className} />;
};
export const WindowsIcon = ({
  size = 16,
  className = "my-auto flex flex-shrink-0 ",
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      viewBox="0 0 24 24"
      width="24"
      height="24"
    >
      <path
        fill="currentColor"
        d="M3 3h8v8H3V3zm10 0h8v8h-8V3zm-10 10h8v8H3v-8zm10 0h8v8h-8v-8z"
      />
    </svg>
  );
};

// ============================================================================
// THIRD-PARTY / COMPANY ICONS (Alphabetically)
// ============================================================================
export const AirtableIcon = createLogoIcon(airtableIcon);
export const AmazonIcon = createLogoIcon(amazonSVG);
export const AnthropicIcon = createLogoIcon(anthropicSVG);
export const AsanaIcon = createLogoIcon(asanaIcon);
export const AxeroIcon = createLogoIcon(axeroImage);
export const AzureIcon = createLogoIcon(azureIcon);
export const BitbucketIcon = createLogoIcon(bitbucketIcon);
export const BookstackIcon = createIcon(SiBookstack);
export const ClickupIcon = createLogoIcon(clickupIcon);
export const CohereIcon = createLogoIcon(cohereIcon);
export const ColorDiscordIcon = createLogoIcon(discordIcon);
export const ColorSlackIcon = createLogoIcon(slackIcon);
export const ConfluenceIcon = createLogoIcon(confluenceSVG, {
  sizeAdjustment: 4,
  classNameAddition: "-m-0.5",
});
export const DeepseekIcon = createLogoIcon(deepseekSVG);
export const DiscourseIcon = createLogoIcon(discourseIcon);
export const Document360Icon = createLogoIcon(document360Icon);
export const DropboxIcon = createLogoIcon(dropboxIcon);
export const DrupalWikiIcon = createLogoIcon(drupalwikiIcon);
export const EgnyteIcon = createLogoIcon(egnyteIcon);
export const ElevenLabsIcon = createLogoIcon(elevenLabsSVG, {
  darkSrc: elevenLabsDarkSVG,
});
export const FirefliesIcon = createLogoIcon(firefliesIcon);
export const FreshdeskIcon = createLogoIcon(freshdeskIcon);
export const GeminiIcon = createLogoIcon(geminiSVG);
export const GitbookIcon = createLogoIcon(gitbookDarkIcon, {
  darkSrc: gitbookLightIcon,
});
export const GithubIcon = createLogoIcon(githubLightIcon, {
  monochromatic: true,
});
export const GitlabIcon = createLogoIcon(gitlabIcon);
export const GmailIcon = createLogoIcon(gmailIcon);
export const GongIcon = createLogoIcon(gongIcon);
export const GoogleDriveIcon = createLogoIcon(googleDriveIcon);
export const GoogleIcon = createLogoIcon(googleIcon);
export const GoogleSitesIcon = createLogoIcon(googleSitesIcon);
export const GoogleStorageIcon = createLogoIcon(googleCloudStorageIcon, {
  sizeAdjustment: 4,
  classNameAddition: "-m-0.5",
});
export const GuruIcon = createLogoIcon(guruIcon, { monochromatic: true });
export const HighspotIcon = createLogoIcon(highspotIcon);
export const HubSpotIcon = createLogoIcon(hubSpotIcon);
export const JiraIcon = createLogoIcon(jiraSVG);
export const KimiIcon = createLogoIcon(kimiIcon);
export const LinearIcon = createLogoIcon(linearIcon);
export const LiteLLMIcon = createLogoIcon(litellmIcon);
export const LoopioIcon = createLogoIcon(loopioIcon, { monochromatic: true });
export const MediaWikiIcon = createLogoIcon(mediawikiIcon);
export const MetaIcon = createLogoIcon(metaSVG);
export const MicrosoftIcon = createLogoIcon(microsoftIcon);
export const MicrosoftIconSVG = createLogoIcon(microsoftSVG);
export const MistralIcon = createLogoIcon(mistralSVG);
export const MixedBreadIcon = createLogoIcon(mixedBreadSVG);
export const NomicIcon = createLogoIcon(nomicSVG);
export const CodaIcon = createLogoIcon(codaIcon);
export const NotionIcon = createLogoIcon(notionIcon, { monochromatic: true });
export const OCIStorageIcon = createLogoIcon(OCIStorageSVG);
export const OllamaIcon = createLogoIcon(ollamaIcon);
export const LMStudioIcon = createLogoIcon(lmStudioIcon);
export const TestRailIcon = createLogoIcon(testrailSVG);
export const OpenAIISVG = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => (
  <svg
    fill="currentColor"
    width={size}
    style={{ width: `${size}px`, height: `${size}px` }}
    height={size}
    className={`w-[${size}px] h-[${size}px] ` + className}
    viewBox="0 0 24 24"
    role="img"
    xmlns="http://www.w3.org/2000/svg"
  >
    <path
      fill="currentColor"
      d="M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z"
    />
  </svg>
);
export const OpenAIIcon = createLogoIcon(openAISVG, { monochromatic: true });
export const OpenAISVG = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      viewBox="0 0 50 50"
    >
      <path
        fill="currentColor"
        d="M45.403,25.562c-0.506-1.89-1.518-3.553-2.906-4.862c1.134-2.665,0.963-5.724-0.487-8.237	c-1.391-2.408-3.636-4.131-6.322-4.851c-1.891-0.506-3.839-0.462-5.669,0.088C28.276,5.382,25.562,4,22.647,4	c-4.906,0-9.021,3.416-10.116,7.991c-0.01,0.001-0.019-0.003-0.029-0.002c-2.902,0.36-5.404,2.019-6.865,4.549	c-1.391,2.408-1.76,5.214-1.04,7.9c0.507,1.891,1.519,3.556,2.909,4.865c-1.134,2.666-0.97,5.714,0.484,8.234	c1.391,2.408,3.636,4.131,6.322,4.851c0.896,0.24,1.807,0.359,2.711,0.359c1.003,0,1.995-0.161,2.957-0.45	C21.722,44.619,24.425,46,27.353,46c4.911,0,9.028-3.422,10.12-8.003c2.88-0.35,5.431-2.006,6.891-4.535	C45.754,31.054,46.123,28.248,45.403,25.562z M35.17,9.543c2.171,0.581,3.984,1.974,5.107,3.919c1.049,1.817,1.243,4,0.569,5.967	c-0.099-0.062-0.193-0.131-0.294-0.19l-9.169-5.294c-0.312-0.179-0.698-0.177-1.01,0.006l-10.198,6.041l-0.052-4.607l8.663-5.001	C30.733,9.26,33,8.963,35.17,9.543z M29.737,22.195l0.062,5.504l-4.736,2.805l-4.799-2.699l-0.062-5.504l4.736-2.805L29.737,22.195z M14.235,14.412C14.235,9.773,18.009,6,22.647,6c2.109,0,4.092,0.916,5.458,2.488C28,8.544,27.891,8.591,27.787,8.651l-9.17,5.294	c-0.312,0.181-0.504,0.517-0.5,0.877l0.133,11.851l-4.015-2.258V14.412z M6.528,23.921c-0.581-2.17-0.282-4.438,0.841-6.383	c1.06-1.836,2.823-3.074,4.884-3.474c-0.004,0.116-0.018,0.23-0.018,0.348V25c0,0.361,0.195,0.694,0.51,0.872l10.329,5.81	L19.11,34.03l-8.662-5.002C8.502,27.905,7.11,26.092,6.528,23.921z M14.83,40.457c-2.171-0.581-3.984-1.974-5.107-3.919	c-1.053-1.824-1.249-4.001-0.573-5.97c0.101,0.063,0.196,0.133,0.299,0.193l9.169,5.294c0.154,0.089,0.327,0.134,0.5,0.134	c0.177,0,0.353-0.047,0.51-0.14l10.198-6.041l0.052,4.607l-8.663,5.001C19.269,40.741,17.001,41.04,14.83,40.457z M35.765,35.588	c0,4.639-3.773,8.412-8.412,8.412c-2.119,0-4.094-0.919-5.459-2.494c0.105-0.056,0.216-0.098,0.32-0.158l9.17-5.294	c0.312-0.181,0.504-0.517,0.5-0.877L31.75,23.327l4.015,2.258V35.588z M42.631,32.462c-1.056,1.83-2.84,3.086-4.884,3.483	c0.004-0.12,0.018-0.237,0.018-0.357V25c0-0.361-0.195-0.694-0.51-0.872l-10.329-5.81l3.964-2.348l8.662,5.002	c1.946,1.123,3.338,2.937,3.92,5.107C44.053,28.249,43.754,30.517,42.631,32.462z"
      />
    </svg>
  );
};
export const OpenSourceIcon = createLogoIcon(openSourceIcon);
export const OutlineIcon = createLogoIcon(outlinePNG, {
  sizeAdjustment: 4,
  classNameAddition: "-m-0.5",
});
export const ProductboardIcon = createLogoIcon(productboardIcon);
export const QwenIcon = createLogoIcon(qwenSVG);
export const R2Icon = createLogoIcon(r2Icon);
export const S3Icon = createLogoIcon(s3Icon);
export const SalesforceIcon = createLogoIcon(salesforceIcon);
export const SharepointIcon = createLogoIcon(sharepointIcon);
export const SlabIcon = createLogoIcon(slabLogoIcon);
export const OutlookIcon = createLogoIcon(outlookIcon);
export const OneDriveIcon = createLogoIcon(oneDriveIcon);
export const BoxIcon = createLogoIcon(boxIcon);
export const TrelloIcon = createLogoIcon(trelloIcon);
export const ServiceNowIcon = createLogoIcon(serviceNowIcon);
export const SlackIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <path
        fill="currentColor"
        d="M16.923 16.52h-2.39a1.984 1.984 0 0 1-1.973-1.195a2.006 2.006 0 0 1 .47-2.263a1.99 1.99 0 0 1 1.502-.53h4.858a1.978 1.978 0 0 1 1.969 1.63a1.951 1.951 0 0 1-1.147 2.173a2.21 2.21 0 0 1-.876.174c-.8.022-1.601.01-2.413.01m-9.435.501v-2.477a2.003 2.003 0 0 1 .56-1.402a1.987 1.987 0 0 1 1.377-.608a1.942 1.942 0 0 1 1.393.522c.377.352.6.84.62 1.357c.043 1.738.043 3.477 0 5.215A1.94 1.94 0 0 1 10.805 21a1.922 1.922 0 0 1-1.423.495a1.954 1.954 0 0 1-1.359-.614a1.97 1.97 0 0 1-.535-1.395c-.01-.815 0-1.64 0-2.466m8.938-9.963v2.434a1.996 1.996 0 0 1-.524 1.5a1.98 1.98 0 0 1-2.242.469a1.981 1.981 0 0 1-1.078-1.165a1.996 1.996 0 0 1-.106-.804V4.46a1.963 1.963 0 0 1 .605-1.386a1.947 1.947 0 0 1 1.408-.537a1.962 1.962 0 0 1 1.383.602a1.979 1.979 0 0 1 .553 1.408c.011.836 0 1.673 0 2.51M6.97 11.511H4.545a1.962 1.962 0 0 1-1.393-.579a1.978 1.978 0 0 1-.427-2.155a1.978 1.978 0 0 1 1.066-1.07a1.97 1.97 0 0 1 .754-.15h4.923a1.962 1.962 0 0 1 1.392.579a1.98 1.98 0 0 1-1.392 3.375zm4.478-6.171v.902c0 .18-.06.261-.216.261H9.165A1.916 1.916 0 0 1 7.9 5.787a1.929 1.929 0 0 1-.4-1.402c.022-.492.227-.958.574-1.306a1.965 1.965 0 0 1 3.342 1.12c.032.38.032.487.032.832v.214zm-5.009 7.204c.06.813.06 1.63 0 2.444a1.902 1.902 0 0 1-.754 1.18a1.887 1.887 0 0 1-1.356.34a1.988 1.988 0 0 1-1.293-.627a2.003 2.003 0 0 1-.536-1.338a1.96 1.96 0 0 1 .497-1.346c.33-.369.786-.599 1.278-.643c.736-.065 1.471-.01 2.164-.01M17.443 11.5V9.329c.052-.509.299-.977.689-1.305c.39-.329.891-.492 1.399-.455c.522 0 1.023.208 1.392.579a1.981 1.981 0 0 1 0 2.796c-.37.371-.87.58-1.392.58c-.671 0-1.363-.022-2.088-.022m-4.967 6.072c.8-.055 1.603-.055 2.402 0c.488.09.92.367 1.208.773c.286.406.405.908.329 1.4a1.99 1.99 0 0 1-.67 1.264a1.98 1.98 0 0 1-1.343.485a1.922 1.922 0 0 1-1.314-.528a1.937 1.937 0 0 1-.6-1.287c-.044-.695-.012-1.401-.012-2.107"
      />
    </svg>
  );
};
export const SlackIconSkeleton = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 14 14"
    >
      <g fill="none" stroke="currentColor">
        <path d="M5.5 2a.5.5 0 1 0 1 0a.5.5 0 1 0-1 0m6 4a.5.5 0 1 0 1 0a.5.5 0 1 0-1 0m-4 6a.5.5 0 1 0 1 0a.5.5 0 1 0-1 0m-6-4a.5.5 0 1 0 1 0a.5.5 0 1 0-1 0" />
        <path
          strokeLinecap="round"
          strokeLinejoin="round"
          d="M8.793 1.219v4.937m-3.59 1.692v4.937M1.215 5.207h4.937m1.692 3.59h4.937"
        />
      </g>
    </svg>
  );
};
export const TeamsIcon = createLogoIcon(teamsIcon);
export const VoyageIconSVG = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => (
  <svg
    style={{ width: `${size}px`, height: `${size}px` }}
    className={`w-[${size}px] h-[${size}px] ` + className}
    xmlns="http://www.w3.org/2000/svg"
    viewBox="0 0 200 200"
    width="200"
    height="200"
  >
    <path
      d="M0 0 C18.56364691 14.8685395 31.52865476 35.60458591 34.68359375 59.39453125 C36.85790415 84.17093249 31.86661083 108.64738046 15.83569336 128.38696289 C-0.18749615 147.32766215 -21.13158775 159.50726579 -46 162 C-70.46026633 163.68595557 -94.53744209 157.16585411 -113.375 141.1875 C-131.5680983 125.12913912 -143.31327081 103.12304227 -145.16845703 78.79052734 C-146.52072106 52.74671426 -138.40787353 29.42123969 -121 10 C-120.39929688 9.30519531 -119.79859375 8.61039063 -119.1796875 7.89453125 C-88.7732111 -25.07872563 -34.66251161 -26.29920259 0 0 Z M-111 6 C-111.96292969 6.76441406 -112.92585938 7.52882813 -113.91796875 8.31640625 C-129.12066 21.0326872 -138.48510826 41.64930525 -141 61 C-142.57102569 86.19086606 -137.40498471 109.10013392 -120.54980469 128.68505859 C-106.05757815 144.84161953 -85.8110604 156.92053779 -63.68798828 158.12597656 C-39.72189393 158.83868932 -17.08757891 154.40601729 1.1875 137.6875 C3.15800523 135.82115685 5.07881363 133.91852176 7 132 C8.22396484 130.7934375 8.22396484 130.7934375 9.47265625 129.5625 C26.2681901 112.046746 31.70691205 89.639394 31.3125 66 C30.4579168 43.32505919 19.07700136 22.58412979 3 7 C-29.27431062 -21.68827611 -78.26536136 -21.67509486 -111 6 Z "
      fill="currentColor"
      transform="translate(155,29)"
    />
    <path
      d="M0 0 C2.62278901 2.33427271 3.96735488 4.64596813 5.4453125 7.81640625 C6.10080078 9.20956055 6.10080078 9.20956055 6.76953125 10.63085938 C7.21683594 11.59830078 7.66414063 12.56574219 8.125 13.5625 C8.58003906 14.53380859 9.03507812 15.50511719 9.50390625 16.50585938 C10.34430119 18.30011504 11.18198346 20.09564546 12.01611328 21.89282227 C12.65935931 23.27045415 13.32005367 24.64010734 14 26 C12.02 26 10.04 26 8 26 C6.515 22.535 6.515 22.535 5 19 C1.7 19 -1.6 19 -5 19 C-5.99 21.31 -6.98 23.62 -8 26 C-9.32 26 -10.64 26 -12 26 C-10.34176227 20.46347949 -7.92776074 15.38439485 -5.4375 10.1875 C-5.02564453 9.31673828 -4.61378906 8.44597656 -4.18945312 7.54882812 C-1.13502139 1.13502139 -1.13502139 1.13502139 0 0 Z M-1 8 C-3.2013866 11.80427492 -3.2013866 11.80427492 -4 16 C-1.69 16 0.62 16 3 16 C2.43260132 11.87026372 2.43260132 11.87026372 1 8 C0.34 8 -0.32 8 -1 8 Z "
      fill="currentColor"
      transform="translate(158,86)"
    />
    <path
      d="M0 0 C2.64453125 1.0234375 2.64453125 1.0234375 4.4453125 4.296875 C4.96971298 5.65633346 5.47294966 7.0241056 5.95703125 8.3984375 C6.22064453 9.08421875 6.48425781 9.77 6.75585938 10.4765625 C7.8687821 13.4482107 8.64453125 15.82826389 8.64453125 19.0234375 C9.30453125 19.0234375 9.96453125 19.0234375 10.64453125 19.0234375 C10.75667969 18.34925781 10.86882813 17.67507812 10.984375 16.98046875 C11.77373626 13.44469078 12.95952974 10.10400184 14.20703125 6.7109375 C14.44099609 6.06576172 14.67496094 5.42058594 14.91601562 4.75585938 C15.48900132 3.17722531 16.06632589 1.60016724 16.64453125 0.0234375 C17.96453125 0.0234375 19.28453125 0.0234375 20.64453125 0.0234375 C20.11164835 5.93359329 17.66052325 10.65458241 15.08203125 15.8984375 C14.65728516 16.77757813 14.23253906 17.65671875 13.79492188 18.5625 C12.75156566 20.71955106 11.70131241 22.87294038 10.64453125 25.0234375 C9.65453125 25.0234375 8.66453125 25.0234375 7.64453125 25.0234375 C6.36851794 22.52596727 5.09866954 20.02565814 3.83203125 17.5234375 C3.29739258 16.47929688 3.29739258 16.47929688 2.75195312 15.4140625 C0.37742917 10.70858383 -1.58321849 5.98797449 -3.35546875 1.0234375 C-2.35546875 0.0234375 -2.35546875 0.0234375 0 0 Z "
      fill="currentColor"
      transform="translate(23.35546875,86.9765625)"
    />
    <path
      d="M0 0 C4.56944444 2.13888889 4.56944444 2.13888889 6 5 C6.58094684 9.76376411 6.98189835 13.6696861 4.0625 17.625 C-0.08290736 19.4862033 -3.52913433 19.80184004 -8 19 C-11.18487773 17.20850628 -12.56721386 16.06753914 -13.9375 12.6875 C-14.04047475 8.25958558 -13.25966827 4.50191217 -10.375 1.0625 C-6.92547207 -0.48070986 -3.67744273 -0.55453501 0 0 Z M-7.66796875 3.21484375 C-9.3387892 5.45403713 -9.40271257 6.72874309 -9.375 9.5 C-9.38273437 10.2734375 -9.39046875 11.046875 -9.3984375 11.84375 C-8.90844456 14.49547648 -8.12507645 15.38331504 -6 17 C-3.17884512 17.42317323 -1.66049093 17.38718434 0.8125 15.9375 C2.65621741 12.92932949 2.30257262 10.44932782 2 7 C1.54910181 4.59436406 1.54910181 4.59436406 0 3 C-4.00690889 1.63330935 -4.00690889 1.63330935 -7.66796875 3.21484375 Z "
      fill="currentColor"
      transform="translate(58,93)"
    />
    <path
      d="M0 0 C0.91007812 0.00902344 1.82015625 0.01804687 2.7578125 0.02734375 C3.45648438 0.03894531 4.15515625 0.05054687 4.875 0.0625 C5.205 1.3825 5.535 2.7025 5.875 4.0625 C4.6375 3.815 3.4 3.5675 2.125 3.3125 C-1.0391959 2.93032359 -1.83705309 2.89394571 -4.6875 4.5625 C-6.71059726 8.08093001 -6.12332701 10.21181009 -5.125 14.0625 C-3.22744856 16.41223818 -3.22744856 16.41223818 0 16.1875 C0.94875 16.14625 1.8975 16.105 2.875 16.0625 C2.875 14.4125 2.875 12.7625 2.875 11.0625 C4.525 11.3925 6.175 11.7225 7.875 12.0625 C8.1875 14.375 8.1875 14.375 7.875 17.0625 C5.25185816 19.29988569 3.33979578 19.9932751 -0.0625 20.5 C-3.96030088 19.9431713 -6.06489651 18.49667323 -9.125 16.0625 C-11.6165904 12.3251144 -11.58293285 10.48918417 -11.125 6.0625 C-7.83836921 1.02299945 -5.86190884 -0.07515268 0 0 Z "
      fill="currentColor"
      transform="translate(113.125,92.9375)"
    />
    <path
      d="M0 0 C4.28705043 1.42901681 5.23208702 4.57025431 7.1875 8.375 C7.55552734 9.06078125 7.92355469 9.7465625 8.30273438 10.453125 C11 15.59744608 11 15.59744608 11 19 C9.35 19 7.7 19 6 19 C5.67 17.68 5.34 16.36 5 15 C2.03 14.67 -0.94 14.34 -4 14 C-4.33 15.65 -4.66 17.3 -5 19 C-5.99 19 -6.98 19 -8 19 C-7.38188466 14.44684052 -5.53234107 10.71540233 -3.4375 6.6875 C-2.9434668 5.71973633 -2.9434668 5.71973633 -2.43945312 4.73242188 C-1.63175745 3.15214772 -0.81662387 1.57567895 0 0 Z M0 6 C-0.33 7.65 -0.66 9.3 -1 11 C0.32 11 1.64 11 3 11 C2.34 9.35 1.68 7.7 1 6 C0.67 6 0.34 6 0 6 Z "
      fill="currentColor"
      transform="translate(90,93)"
    />
    <path
      d="M0 0 C3.63 0 7.26 0 11 0 C11 0.66 11 1.32 11 2 C8.69 2 6.38 2 4 2 C4 3.98 4 5.96 4 8 C5.98 8 7.96 8 10 8 C9.67 8.99 9.34 9.98 9 11 C7.68 11 6.36 11 5 11 C4.67 12.98 4.34 14.96 4 17 C7.465 16.505 7.465 16.505 11 16 C11 16.99 11 17.98 11 19 C7.37 19 3.74 19 0 19 C0 12.73 0 6.46 0 0 Z "
      fill="currentColor"
      transform="translate(124,93)"
    />
    <path
      d="M0 0 C2.25 -0.3125 2.25 -0.3125 5 0 C9 4.10810811 9 4.10810811 9 7 C9.78375 6.21625 10.5675 5.4325 11.375 4.625 C12.91666667 3.08333333 14.45833333 1.54166667 16 0 C16.99 0 17.98 0 19 0 C17.84356383 2.5056117 16.63134741 4.4803655 14.9375 6.6875 C12.52118995 10.81861073 12.20924288 14.29203528 12 19 C10.68 19 9.36 19 8 19 C8.00902344 18.443125 8.01804687 17.88625 8.02734375 17.3125 C7.78294047 11.0217722 5.92390505 8.0388994 1.49609375 3.62890625 C0 2 0 2 0 0 Z "
      fill="currentColor"
      transform="translate(64,93)"
    />
    <path
      d="M0 0 C1.32 0 2.64 0 4 0 C4 8.25 4 16.5 4 25 C2.68 25 1.36 25 0 25 C0 16.75 0 8.5 0 0 Z "
      fill="currentColor"
      transform="translate(173,87)"
    />
    <path
      d="M0 0 C0.66 0.33 1.32 0.66 2 1 C1.125 5.75 1.125 5.75 0 8 C1.093125 7.95875 2.18625 7.9175 3.3125 7.875 C7 8 7 8 10 10 C4.555 10.495 4.555 10.495 -1 11 C-1.99 13.31 -2.98 15.62 -4 18 C-5.32 18 -6.64 18 -8 18 C-6.65150163 13.64029169 -4.95092154 9.68658562 -2.875 5.625 C-2.33617187 4.56539063 -1.79734375 3.50578125 -1.2421875 2.4140625 C-0.83226562 1.61742188 -0.42234375 0.82078125 0 0 Z "
      fill="currentColor"
      transform="translate(154,94)"
    />
    <path
      d="M0 0 C0.66 0.33 1.32 0.66 2 1 C2 1.66 2 2.32 2 3 C1.34 3 0.68 3 0 3 C-0.05429959 4.74965358 -0.09292823 6.49979787 -0.125 8.25 C-0.14820313 9.22453125 -0.17140625 10.1990625 -0.1953125 11.203125 C0.00137219 14.0196498 0.55431084 15.60949036 2 18 C1.34 18.33 0.68 18.66 0 19 C-4.69653179 15.74855491 -4.69653179 15.74855491 -5.9375 12.6875 C-6.02161912 9.07037805 -5.30970069 6.36780178 -4 3 C-1.875 1.0625 -1.875 1.0625 0 0 Z "
      fill="currentColor"
      transform="translate(50,93)"
    />
    <path
      d="M0 0 C2.79192205 -0.05380578 5.5828141 -0.09357669 8.375 -0.125 C9.1690625 -0.14175781 9.963125 -0.15851563 10.78125 -0.17578125 C12.85492015 -0.19335473 14.92883241 -0.10335168 17 0 C17.66 0.66 18.32 1.32 19 2 C17 4 17 4 13.0859375 4.1953125 C11.51550649 4.18200376 9.94513779 4.15813602 8.375 4.125 C7.57320312 4.11597656 6.77140625 4.10695312 5.9453125 4.09765625 C3.96341477 4.07406223 1.98167019 4.03819065 0 4 C0 2.68 0 1.36 0 0 Z "
      fill="currentColor"
      transform="translate(92,187)"
    />
    <path
      d="M0 0 C0.99 0.33 1.98 0.66 3 1 C1.66666667 4.33333333 0.33333333 7.66666667 -1 11 C0.65 11 2.3 11 4 11 C4 11.33 4 11.66 4 12 C1.36 12.33 -1.28 12.66 -4 13 C-4.33 14.98 -4.66 16.96 -5 19 C-5.99 19 -6.98 19 -8 19 C-7.38188466 14.44684052 -5.53234107 10.71540233 -3.4375 6.6875 C-2.9434668 5.71973633 -2.9434668 5.71973633 -2.43945312 4.73242188 C-1.63175745 3.15214772 -0.81662387 1.57567895 0 0 Z "
      fill="currentColor"
      transform="translate(90,93)"
    />
    <path
      d="M0 0 C0.99 0 1.98 0 3 0 C2.43454163 3.95820859 1.19097652 6.6659053 -1 10 C-1.66 9.67 -2.32 9.34 -3 9 C-2.44271087 5.65626525 -1.64826111 2.96687001 0 0 Z "
      fill="currentColor"
      transform="translate(37,97)"
    />
    <path
      d="M0 0 C4.92127034 -0.16682272 8.50343896 -0.24828052 13 2 C9.60268371 4.09065618 6.95730595 4.42098999 3 4 C1.125 2.5625 1.125 2.5625 0 1 C0 0.67 0 0.34 0 0 Z "
      fill="currentColor"
      transform="translate(110,12)"
    />
    <path
      d="M0 0 C0 0.99 0 1.98 0 3 C-3.08888522 5.05925681 -3.70935927 5.2390374 -7.1875 5.125 C-9.0746875 5.063125 -9.0746875 5.063125 -11 5 C-10.67 4.34 -10.34 3.68 -10 3 C-7.96875 2.40234375 -7.96875 2.40234375 -5.5 1.9375 C-2.46226779 1.54135157 -2.46226779 1.54135157 0 0 Z "
      fill="currentColor"
      transform="translate(62,107)"
    />
    <path
      d="M0 0 C0.66 0.33 1.32 0.66 2 1 C1.25 5.75 1.25 5.75 -1 8 C-1.66 8 -2.32 8 -3 8 C-1.125 1.125 -1.125 1.125 0 0 Z "
      fill="currentColor"
      transform="translate(154,94)"
    />
    <path
      d="M0 0 C2.64 0 5.28 0 8 0 C8.33 1.32 8.66 2.64 9 4 C6.03 3.01 3.06 2.02 0 1 C0 0.67 0 0.34 0 0 Z "
      fill="currentColor"
      transform="translate(110,93)"
    />
    <path
      d="M0 0 C1.67542976 0.28604898 3.34385343 0.61781233 5 1 C4.67 2.32 4.34 3.64 4 5 C2.0625 4.6875 2.0625 4.6875 0 4 C-0.33 3.01 -0.66 2.02 -1 1 C-0.67 0.67 -0.34 0.34 0 0 Z "
      fill="currentColor"
      transform="translate(21,87)"
    />
  </svg>
);
export const WikipediaIcon = createLogoIcon(wikipediaIcon);
export const XenforoIcon = createLogoIcon(xenforoIcon);
export const ZAIIcon = createLogoIcon(zAIIcon);
export const ZendeskIcon = ({
  size = 16,
  className = defaultTailwindCSS,
}: IconProps) => (
  <div
    className="rounded-full overflow-visible dark:overflow-hidden flex items-center justify-center dark:bg-[#fff]/90"
    style={{ width: size, height: size }}
  >
    <LogoIcon
      size={
        typeof window !== "undefined" &&
        window.matchMedia("(prefers-color-scheme: dark)").matches
          ? size * 0.8
          : size
      }
      className={`${className}`}
      src={zendeskIcon}
    />
  </div>
);
export const ZulipIcon = createLogoIcon(zulipIcon);

// ============================================================================
// FILE TYPE ICONS (Alphabetically)
// ============================================================================
export const DOCIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`text-blue-600 w-[${size}px] h-[${size}px] ` + className}
      viewBox="0 0 24 24"
      xmlns="http://www.w3.org/2000/svg"
    >
      <path
        d="M15.5,17H14L12,9.5L10,17H8.5L6.1,7H7.8L9.34,14.5L11.3,7H12.7L14.67,14.5L16.2,7H17.9M19,3H5C3.89,3 3,3.89 3,5V19A2,2 0 0,0 5,21H19A2,2 0 0,0 21,19V5C21,3.89 20.1,3 19,3Z"
        fill="currentColor"
      />
    </svg>
  );
};
export const HTMLIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`text-orange-600 w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="24"
      height="24"
      viewBox="0 0 24 24"
    >
      <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8l-6-6zm-1 2 5 5h-5V4zM8.531 18h-.76v-1.411H6.515V18h-.767v-3.373h.767v1.296h1.257v-1.296h.76V18zm3-2.732h-.921V18h-.766v-2.732h-.905v-.641h2.592v.641zM14.818 18l-.05-1.291c-.017-.405-.03-.896-.03-1.387h-.016c-.104.431-.245.911-.375 1.307l-.41 1.316h-.597l-.359-1.307a15.154 15.154 0 0 1-.306-1.316h-.011c-.021.456-.034.976-.059 1.396L12.545 18h-.705l.216-3.373h1.015l.331 1.126c.104.391.21.811.284 1.206h.017c.095-.391.209-.836.32-1.211l.359-1.121h.996L15.563 18h-.745zm3.434 0h-2.108v-3.373h.767v2.732h1.342V18z"></path>
    </svg>
  );
};
export const ImagesIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`text-blue-600 w-[${size}px] h-[${size}px] ` + className}
      viewBox="0 0 24 24"
      xmlns="http://www.w3.org/2000/svg"
    >
      <path
        d="M19 3H5C3.9 3 3 3.9 3 5V19C3 20.1 3.9 21 5 21H19C20.1 21 21 20.1 21 19V5C21 3.9 20.1 3 19 3M9 11.5C9 12.3 8.3 13 7.5 13H6.5V15H5V9H7.5C8.3 9 9 9.7 9 10.5V11.5M14 15H12.5L11.5 12.5V15H10V9H11.5L12.5 11.5V9H14V15M19 10.5H16.5V13.5H17.5V12H19V13.7C19 14.4 18.5 15 17.7 15H16.4C15.6 15 15.1 14.3 15.1 13.7V10.4C15 9.7 15.5 9 16.3 9H17.6C18.4 9 18.9 9.7 18.9 10.3V10.5H19M6.5 10.5H7.5V11.5H6.5V10.5Z"
        fill="currentColor"
      />
    </svg>
  );
};
export const JSONIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`text-yellow-500 w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="200"
      height="200"
      viewBox="0 0 24 24"
    >
      <path
        fill="currentColor"
        d="M5 3h14a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2m3.25 8a1.25 1.25 0 1 0-2.5 0v2a1.25 1.25 0 1 0 2.5 0v-2m4.25-1.25a1.25 1.25 0 0 0-1.25 1.25v2a1.25 1.25 0 1 0 2.5 0v-2a1.25 1.25 0 0 0-1.25-1.25m4.25 1.25a1.25 1.25 0 1 0-2.5 0v2a1.25 1.25 0 1 0 2.5 0v-2z"
      />
    </svg>
  );
};
export const PDFIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`text-red-500 w-[${size}px] h-[${size}px] ` + className}
      viewBox="0 0 24 24"
      xmlns="http://www.w3.org/2000/svg"
    >
      <path
        d="M19 3H5C3.9 3 3 3.9 3 5V19C3 20.1 3.9 21 5 21H19C20.1 21 21 20.1 21 19V5C21 3.9 20.1 3 19 3M9.5 11.5C9.5 12.3 8.8 13 8 13H7V15H5.5V9H8C8.8 9 9.5 9.7 9.5 10.5V11.5M14.5 13.5C14.5 14.3 13.8 15 13 15H10.5V9H13C13.8 9 14.5 9.7 14.5 10.5V13.5M18.5 10.5H17V11.5H18.5V13H17V15H15.5V9H18.5V10.5M12 10.5H13V13.5H12V10.5M7 10.5H8V11.5H7V10.5Z"
        fill="currentColor"
      />
    </svg>
  );
};
export const TXTIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`text-blue-600 w-[${size}px] h-[${size}px] ` + className}
      xmlns="http://www.w3.org/2000/svg"
      width="24"
      height="24"
      fill="currentColor"
      viewBox="0 0 24 24"
    >
      <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8l-6-6zM9.998 14.768H8.895v3.274h-.917v-3.274H6.893V14h3.105v.768zm2.725 3.274-.365-.731c-.15-.282-.246-.492-.359-.726h-.013c-.083.233-.185.443-.312.726l-.335.731h-1.045l1.171-2.045L10.336 14h1.05l.354.738c.121.245.21.443.306.671h.013c.096-.258.174-.438.276-.671l.341-.738h1.043l-1.139 1.973 1.198 2.069h-1.055zm4.384-3.274h-1.104v3.274h-.917v-3.274h-1.085V14h3.105v.768zM14 9h-1V4l5 5h-4z"></path>
    </svg>
  );
};
export const XMLIcon = ({
  size = 24,
  className = defaultTailwindCSS,
}: IconProps) => {
  return (
    <svg
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`text-teal-500 w-[${size}px] h-[${size}px] ` + className}
      viewBox="0 0 24 24"
      xmlns="http://www.w3.org/2000/svg"
    >
      <path
        d="M19 3H5C3.89 3 3 3.89 3 5V19C3 20.11 3.89 21 5 21H19C20.11 21 21 20.11 21 19V5C21 3.89 20.11 3 19 3M8 15H6.5L6 13L5.5 15H4L4.75 12L4 9H5.5L6 11L6.5 9H8L7.25 12L8 15M15.5 15H14V10.5H13V14H11.5V10.5H10.5V15H9V11C9 9.9 9.9 9 11 9H13.5C14.61 9 15.5 9.9 15.5 11V15M20 15H17V9H18.5V13.5H20V15Z"
        fill="currentColor"
      />
    </svg>
  );
};


================================================
FILE: web/src/components/llm/LLMSelector.tsx
================================================
"use client";

import { useMemo } from "react";
import { parseLlmDescriptor, structureValue } from "@/lib/llmConfig/utils";
import { DefaultModel, LLMProviderDescriptor } from "@/interfaces/llm";
import { getProviderIcon } from "@/app/admin/configuration/llm/utils";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import { createIcon } from "@/components/icons/icons";

interface LLMOption {
  name: string;
  value: string;
  icon: ReturnType<typeof getProviderIcon>;
  modelName: string;
  providerName: string;
  provider: string;
  providerDisplayName: string;
  supportsImageInput: boolean;
  vendor: string | null;
}

export interface LLMSelectorProps {
  name?: string;
  userSettings?: boolean;
  llmProviders: LLMProviderDescriptor[];
  defaultText?: DefaultModel | null;
  currentLlm: string | null;
  onSelect: (value: string | null) => void;
  requiresImageGeneration?: boolean;
  excludePublicProviders?: boolean;
}

export default function LLMSelector({
  name,
  userSettings,
  llmProviders,
  defaultText,
  currentLlm,
  onSelect,
  requiresImageGeneration,
  excludePublicProviders = false,
}: LLMSelectorProps) {
  const currentDescriptor = useMemo(
    () => (currentLlm ? parseLlmDescriptor(currentLlm) : null),
    [currentLlm]
  );

  const llmOptions = useMemo(() => {
    const seenKeys = new Set<string>();
    const options: LLMOption[] = [];

    llmProviders.forEach((provider) => {
      provider.model_configurations.forEach((modelConfiguration) => {
        // Use the display name if it is available, otherwise use the model name
        const displayName =
          modelConfiguration.display_name || modelConfiguration.name;

        const matchesCurrentSelection =
          currentDescriptor?.modelName === modelConfiguration.name &&
          (currentDescriptor?.provider === provider.provider ||
            currentDescriptor?.name === provider.name);

        if (!modelConfiguration.is_visible && !matchesCurrentSelection) {
          return;
        }

        const key = `${provider.provider}:${modelConfiguration.name}`;
        if (seenKeys.has(key)) {
          return; // Skip exact duplicate
        }
        seenKeys.add(key);

        const supportsImageInput =
          modelConfiguration.supports_image_input || false;

        // If the model does not support image input and we require image generation, skip it
        if (requiresImageGeneration && !supportsImageInput) {
          return;
        }

        const option: LLMOption = {
          name: displayName,
          value: structureValue(
            provider.name,
            provider.provider,
            modelConfiguration.name
          ),
          icon: getProviderIcon(provider.provider, modelConfiguration.name),
          modelName: modelConfiguration.name,
          providerName: provider.name,
          provider: provider.provider,
          providerDisplayName:
            provider.provider_display_name || provider.provider,
          supportsImageInput,
          vendor: modelConfiguration.vendor || null,
        };

        options.push(option);
      });
    });

    return options;
  }, [
    llmProviders,
    currentDescriptor?.modelName,
    currentDescriptor?.provider,
    currentDescriptor?.name,
    requiresImageGeneration,
  ]);

  // Group options by provider using backend-provided display names
  const groupedOptions = useMemo(() => {
    const groups = new Map<
      string,
      { displayName: string; options: LLMOption[] }
    >();

    llmOptions.forEach((option) => {
      const provider = option.provider.toLowerCase();
      if (!groups.has(provider)) {
        groups.set(provider, {
          displayName: option.providerDisplayName,
          options: [],
        });
      }
      groups.get(provider)!.options.push(option);
    });

    // Sort groups alphabetically by display name
    const sortedProviders = Array.from(groups.keys()).sort((a, b) =>
      groups.get(a)!.displayName.localeCompare(groups.get(b)!.displayName)
    );

    return sortedProviders.map((provider) => {
      const group = groups.get(provider)!;
      return {
        provider,
        displayName: group.displayName,
        options: group.options,
      };
    });
  }, [llmOptions]);

  const defaultProvider = defaultText
    ? llmProviders.find((p) => p.id === defaultText.provider_id)
    : undefined;

  const defaultModelName = defaultText?.model_name;
  const defaultModelConfig = defaultProvider?.model_configurations.find(
    (m) => m.name === defaultModelName
  );
  const defaultModelDisplayName = defaultModelConfig
    ? defaultModelConfig.display_name || defaultModelConfig.name
    : defaultModelName || null;
  const defaultLabel = userSettings ? "System Default" : "User Default";

  // Determine if we should show grouped view (only if we have multiple vendors)
  const showGrouped = groupedOptions.length > 1;

  return (
    <InputSelect
      value={currentLlm ? currentLlm : "default"}
      onValueChange={(value) => onSelect(value === "default" ? null : value)}
    >
      <InputSelect.Trigger id={name} name={name} placeholder={defaultLabel} />

      <InputSelect.Content>
        {!excludePublicProviders && (
          <InputSelect.Item
            value="default"
            description={
              userSettings && defaultModelDisplayName
                ? `(${defaultModelDisplayName})`
                : undefined
            }
          >
            {defaultLabel}
          </InputSelect.Item>
        )}
        {showGrouped
          ? groupedOptions.map((group) => (
              <InputSelect.Group key={group.provider}>
                <InputSelect.Label>{group.displayName}</InputSelect.Label>
                {group.options.map((option) => (
                  <InputSelect.Item
                    key={option.value}
                    value={option.value}
                    icon={createIcon(option.icon)}
                  >
                    {option.name}
                  </InputSelect.Item>
                ))}
              </InputSelect.Group>
            ))
          : llmOptions.map((option) => (
              <InputSelect.Item
                key={option.value}
                value={option.value}
                icon={createIcon(option.icon)}
              >
                {option.name}
              </InputSelect.Item>
            ))}
      </InputSelect.Content>
    </InputSelect>
  );
}


================================================
FILE: web/src/components/loading.css
================================================
.loading {
  font-size: 1.5rem;
  font-weight: bold;
}

.dots {
  animation: blink 1s linear infinite;
}

@keyframes blink {
  0%,
  100% {
    opacity: 1;
  }
  50% {
    opacity: 0.5;
  }
}


================================================
FILE: web/src/components/modals/AddInstructionModal.tsx
================================================
"use client";

import { useEffect, useState } from "react";
import { Button } from "@opal/components";
import { useProjectsContext } from "@/providers/ProjectsContext";
import InputTextArea from "@/refresh-components/inputs/InputTextArea";
import { useModal } from "@/refresh-components/contexts/ModalContext";
import { SvgAddLines } from "@opal/icons";
import Modal from "@/refresh-components/Modal";

export default function AddInstructionModal() {
  const modal = useModal();
  const { currentProjectDetails, upsertInstructions } = useProjectsContext();
  const [instructionText, setInstructionText] = useState("");

  useEffect(() => {
    if (!modal.isOpen) return;
    const preset = currentProjectDetails?.project?.instructions ?? "";
    setInstructionText(preset);
  }, [modal.isOpen, currentProjectDetails?.project?.instructions]);

  async function handleSubmit() {
    const value = instructionText.trim();
    try {
      await upsertInstructions(value);
    } catch (e) {
      console.error("Failed to save instructions", e);
    }
    modal.toggle(false);
  }

  return (
    <Modal open={modal.isOpen} onOpenChange={modal.toggle}>
      <Modal.Content width="sm">
        <Modal.Header
          icon={SvgAddLines}
          title="Set Project Instructions"
          description="Specify the behaviors or tone for the chat sessions in this project."
          onClose={() => modal.toggle(false)}
        />
        <Modal.Body>
          <InputTextArea
            value={instructionText}
            onChange={(event) => setInstructionText(event.target.value)}
            placeholder="My goal with is to... be sure to... in your responses."
          />
        </Modal.Body>
        <Modal.Footer>
          <Button prominence="secondary" onClick={() => modal.toggle(false)}>
            Cancel
          </Button>
          <Button onClick={handleSubmit}>Save Instructions</Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/components/modals/ConfirmEntityModal.tsx
================================================
import Modal from "@/refresh-components/layouts/ConfirmationModalLayout";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { SvgAlertCircle } from "@opal/icons";
import type { IconProps } from "@opal/types";

export interface ConfirmEntityModalProps {
  danger?: boolean;

  onClose: () => void;
  onSubmit: () => void;

  icon?: React.FunctionComponent<IconProps>;

  entityType: string;
  entityName: string;

  additionalDetails?: string;

  action?: string;
  actionButtonText?: string;

  removeConfirmationText?: boolean;
}

export function ConfirmEntityModal({
  danger,

  onClose,
  onSubmit,

  icon: Icon,

  entityType,
  entityName,

  additionalDetails,

  action,
  actionButtonText,

  removeConfirmationText = false,
}: ConfirmEntityModalProps) {
  const buttonText = actionButtonText
    ? actionButtonText
    : danger
      ? "Delete"
      : "Confirm";
  const actionText = action ? action : danger ? "delete" : "modify";

  return (
    <Modal
      icon={Icon || SvgAlertCircle}
      title={`${buttonText} ${entityType}`}
      onClose={onClose}
      submit={
        <Button variant={danger ? "danger" : "default"} onClick={onSubmit}>
          {buttonText}
        </Button>
      }
    >
      <div className="flex flex-col gap-4">
        {!removeConfirmationText && (
          <Text as="p">
            Are you sure you want to {actionText} <b>{entityName}</b>?
          </Text>
        )}

        {additionalDetails && (
          <Text as="p" text03>
            {additionalDetails}
          </Text>
        )}
      </div>
    </Modal>
  );
}


================================================
FILE: web/src/components/modals/CreateProjectModal.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { Button } from "@opal/components";
import { useProjectsContext } from "@/providers/ProjectsContext";
import { useKeyPress } from "@/hooks/useKeyPress";
import * as InputLayouts from "@/layouts/input-layouts";
import { useAppRouter } from "@/hooks/appNavigation";
import { useModal } from "@/refresh-components/contexts/ModalContext";
import { SvgFolderPlus } from "@opal/icons";
import Modal from "@/refresh-components/Modal";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { toast } from "@/hooks/useToast";

interface CreateProjectModalProps {
  initialProjectName?: string;
}

export default function CreateProjectModal({
  initialProjectName,
}: CreateProjectModalProps) {
  const { createProject } = useProjectsContext();
  const modal = useModal();
  const route = useAppRouter();
  const [projectName, setProjectName] = useState(initialProjectName ?? "");

  // Reset when prop changes (modal reopens with different value)
  useEffect(() => {
    setProjectName(initialProjectName ?? "");
  }, [initialProjectName]);

  async function handleSubmit() {
    const name = projectName.trim();
    if (!name) return;

    try {
      const newProject = await createProject(name);
      route({ projectId: newProject.id });
      modal.toggle(false);
    } catch (e) {
      toast.error(`Failed to create the project ${name}`);
    }
  }

  useKeyPress(handleSubmit, "Enter");

  return (
    <>
      <Modal open={modal.isOpen} onOpenChange={modal.toggle}>
        <Modal.Content width="sm">
          <Modal.Header
            icon={SvgFolderPlus}
            title="Create New Project"
            description="Use projects to organize your files and chats in one place, and add custom instructions for ongoing work."
            onClose={() => modal.toggle(false)}
          />
          <Modal.Body>
            <InputLayouts.Vertical title="Project Name">
              <InputTypeIn
                value={projectName}
                onChange={(e) => setProjectName(e.target.value)}
                placeholder="What are you working on?"
                showClearButton
              />
            </InputLayouts.Vertical>
          </Modal.Body>
          <Modal.Footer>
            <Button prominence="secondary" onClick={() => modal.toggle(false)}>
              Cancel
            </Button>
            <Button disabled={!projectName.trim()} onClick={handleSubmit}>
              Create Project
            </Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    </>
  );
}


================================================
FILE: web/src/components/modals/EditPropertyModal.tsx
================================================
import { Formik, Form } from "formik";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import { TextFormField } from "@/components/Field";
import { SvgEdit } from "@opal/icons";
export interface EditPropertyModalProps {
  propertyTitle: string;
  propertyDetails?: string;
  propertyName: string;
  propertyValue: string;
  validationSchema: any;
  onClose: () => void;
  onSubmit: (propertyName: string, propertyValue: string) => Promise<void>;
}

export default function EditPropertyModal({
  propertyTitle, // A friendly title to be displayed for the property
  propertyDetails, // a helpful description of the property to be displayed, (Valid ranges, units, etc)
  propertyName, // the programmatic property name
  propertyValue, // the programmatic property value (current)
  validationSchema, // Allow custom Yup schemas ... set on "propertyValue"
  onClose,
  onSubmit,
}: EditPropertyModalProps) {
  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="sm">
        <Modal.Header
          icon={SvgEdit}
          title={`Edit ${propertyTitle}`}
          onClose={onClose}
        />
        <Modal.Body>
          <Formik
            initialValues={{
              propertyName: propertyName,
              propertyValue: propertyValue,
            }}
            validationSchema={validationSchema}
            onSubmit={(values) => {
              onSubmit(values.propertyName, values.propertyValue);
              onClose();
            }}
          >
            {({ isSubmitting, isValid, values }) => (
              <Form className="w-full">
                <TextFormField
                  vertical
                  label={propertyDetails || ""}
                  name="propertyValue"
                  placeholder="Property value"
                />

                <Modal.Footer>
                  <Button
                    disabled={
                      isSubmitting ||
                      !isValid ||
                      values.propertyValue === propertyValue
                    }
                    type="submit"
                  >
                    {isSubmitting ? "Updating..." : "Update property"}
                  </Button>
                </Modal.Footer>
              </Form>
            )}
          </Formik>
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/components/modals/GenericConfirmModal.tsx
================================================
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { SvgCheck } from "@opal/icons";
export interface GenericConfirmModalProps {
  title: string;
  message: string;
  confirmText?: string;
  onClose: () => void;
  onConfirm: () => void;
}

export default function GenericConfirmModal({
  title,
  message,
  confirmText = "Confirm",
  onClose,
  onConfirm,
}: GenericConfirmModalProps) {
  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="sm" height="sm">
        <Modal.Header icon={SvgCheck} title={title} onClose={onClose} />
        <Modal.Body>
          <Text as="p">{message}</Text>
        </Modal.Body>
        <Modal.Footer>
          <Button onClick={onConfirm}>{confirmText}</Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/components/modals/MoveCustomAgentChatModal.tsx
================================================
"use client";

import { useState } from "react";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { Button } from "@opal/components";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import Text from "@/refresh-components/texts/Text";
import { SvgAlertCircle } from "@opal/icons";
interface MoveCustomAgentChatModalProps {
  onCancel: () => void;
  onConfirm: (doNotShowAgain: boolean) => void;
}

export default function MoveCustomAgentChatModal({
  onCancel,
  onConfirm,
}: MoveCustomAgentChatModalProps) {
  const [doNotShowAgain, setDoNotShowAgain] = useState(false);

  return (
    <ConfirmationModalLayout
      icon={SvgAlertCircle}
      title="Move Custom Agent Chat"
      onClose={onCancel}
      submit={
        <Button onClick={() => onConfirm(doNotShowAgain)}>Confirm Move</Button>
      }
    >
      <div className="flex flex-col gap-4">
        <Text as="p" text03>
          This chat uses a <b>custom agent</b> and moving it to a <b>project</b>{" "}
          will not override the agent&apos;s prompt or knowledge configurations.
          This should only be used for organization purposes.
        </Text>
        <div className="flex items-center gap-1">
          <Checkbox
            id="move-custom-agent-do-not-show"
            checked={doNotShowAgain}
            onCheckedChange={(checked) => setDoNotShowAgain(Boolean(checked))}
          />
          <label
            htmlFor="move-custom-agent-do-not-show"
            className="text-text-03 text-sm"
          >
            Do not show this again
          </label>
        </div>
      </div>
    </ConfirmationModalLayout>
  );
}


================================================
FILE: web/src/components/modals/NewTeamModal.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { useRouter, useSearchParams } from "next/navigation";
import type { Route } from "next";
import { Dialog } from "@headlessui/react";
import { Button } from "@opal/components";
import { toast } from "@/hooks/useToast";
import { useUser } from "@/providers/UserProvider";
import { useModalContext } from "../context/ModalContext";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import {
  SvgArrowRight,
  SvgArrowUp,
  SvgCheckCircle,
  SvgOrganization,
  SvgPlus,
} from "@opal/icons";
export interface TenantByDomainResponse {
  tenant_id: string;
  number_of_users: number;
  creator_email: string;
}

export default function NewTeamModal() {
  const { showNewTeamModal, setShowNewTeamModal } = useModalContext();
  const [existingTenant, setExistingTenant] =
    useState<TenantByDomainResponse | null>(null);
  const [isLoading, setIsLoading] = useState(true);
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [hasRequestedInvite, setHasRequestedInvite] = useState(false);
  const [error, setError] = useState<string | null>(null);

  const { user } = useUser();
  const appDomain = user?.email.split("@")[1];
  const router = useRouter();
  const searchParams = useSearchParams();

  useEffect(() => {
    const hasNewTeamParam = searchParams?.has("new_team");
    if (hasNewTeamParam) {
      setShowNewTeamModal(true);
      fetchTenantInfo();

      // Remove the new_team parameter from the URL without page reload
      const newParams = new URLSearchParams(searchParams?.toString() || "");
      newParams.delete("new_team");
      const newUrl =
        window.location.pathname +
        (newParams.toString() ? `?${newParams.toString()}` : "");
      window.history.replaceState({}, "", newUrl);
    }
  }, [searchParams, setShowNewTeamModal]);

  const fetchTenantInfo = async () => {
    setIsLoading(true);
    setError(null);

    try {
      const response = await fetch("/api/tenants/existing-team-by-domain");
      if (!response.ok) {
        throw new Error(`Failed to fetch team info: ${response.status}`);
      }
      const responseJson = await response.json();
      if (!responseJson) {
        setShowNewTeamModal(false);
        setExistingTenant(null);
        return;
      }

      const data = responseJson as TenantByDomainResponse;
      setExistingTenant(data);
    } catch (error) {
      console.error("Failed to fetch tenant info:", error);
      setError("Could not retrieve team information. Please try again later.");
    } finally {
      setIsLoading(false);
    }
  };

  const handleRequestInvite = async () => {
    if (!existingTenant) return;

    setIsSubmitting(true);
    setError(null);

    try {
      const response = await fetch("/api/tenants/users/invite/request", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ tenant_id: existingTenant.tenant_id }),
      });

      if (!response.ok) {
        const errorData = await response.json().catch(() => ({}));
        throw new Error(
          errorData.detail || errorData.message || "Failed to request invite"
        );
      }

      setHasRequestedInvite(true);
      toast.success("Your invite request has been sent to the team admin.");
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Failed to request an invite";
      setError(message);
      toast.error(message);
    } finally {
      setIsSubmitting(false);
    }
  };

  const handleContinueToNewOrg = () => {
    const newUrl = window.location.pathname;
    router.replace(newUrl as Route);
    setShowNewTeamModal(false);
  };

  // Update the close handler to use the context
  const handleClose = () => {
    setShowNewTeamModal(false);
  };

  // Only render if showNewTeamModal is true
  if (!showNewTeamModal || isLoading) return null;

  return (
    <Dialog
      open={showNewTeamModal}
      onClose={handleClose}
      className="relative z-[1000]"
    >
      {/* Modal backdrop */}
      <div className="fixed inset-0 bg-mask-03" aria-hidden="true" />

      <div className="fixed inset-0 flex items-center justify-center p-4">
        <Dialog.Panel className="mx-auto w-full max-w-md rounded-lg bg-background-neutral-00 p-6 shadow-xl border">
          <Dialog.Title className="text-xl font-semibold mb-4 flex items-center">
            {hasRequestedInvite ? (
              <>
                <SvgCheckCircle className="mr-2 h-5 w-5 stroke-text-05" />
                Join Request Sent
              </>
            ) : (
              <>
                <SvgOrganization className="mr-2 h-5 w-5 stroke-text-04" />
                We found an existing team for {appDomain}
              </>
            )}
          </Dialog.Title>

          {isLoading ? (
            <div className="py-8 text-center">
              <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-border-05 mx-auto mb-4"></div>
              <p>Loading team information...</p>
            </div>
          ) : error ? (
            <div className="space-y-4">
              <p className="text-status-text-error-05">{error}</p>
              <div className="flex w-full pt-2">
                <Button
                  onClick={handleContinueToNewOrg}
                  width="full"
                  rightIcon={SvgArrowRight}
                >
                  Continue with new team
                </Button>
              </div>
            </div>
          ) : hasRequestedInvite ? (
            <div className="space-y-4">
              <p className="text-text-04">
                Your join request has been sent. You can explore as your own
                team while waiting for an admin of {appDomain} to approve your
                request.
              </p>
              <div className="flex w-full pt-2">
                <Button
                  onClick={handleContinueToNewOrg}
                  width="full"
                  rightIcon={SvgArrowRight}
                >
                  Try Onyx while waiting
                </Button>
              </div>
            </div>
          ) : (
            <div className="space-y-4">
              <p className="text-text-03 text-sm mb-2">
                Your join request can be approved by any admin of {appDomain}.
              </p>
              <div className="flex flex-col items-center justify-center gap-4 mt-4">
                <Button
                  disabled={isSubmitting}
                  onClick={handleRequestInvite}
                  width="full"
                  icon={isSubmitting ? SimpleLoader : SvgArrowUp}
                >
                  {isSubmitting
                    ? "Sending request..."
                    : "Request to join your team"}
                </Button>
              </div>
              <Button
                onClick={handleContinueToNewOrg}
                width="full"
                icon={SvgPlus}
                prominence="secondary"
              >
                Continue with new team
              </Button>
            </div>
          )}
        </Dialog.Panel>
      </div>
    </Dialog>
  );
}


================================================
FILE: web/src/components/modals/NoAgentModal.tsx
================================================
"use client";

import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { useUser } from "@/providers/UserProvider";
import { SvgUser } from "@opal/icons";

export default function NoAgentModal() {
  const { isAdmin } = useUser();

  return (
    <Modal open>
      <Modal.Content width="sm" height="sm">
        <Modal.Header icon={SvgUser} title="No Agent Available" />
        <Modal.Body>
          <Text as="p">
            You currently have no agent configured. To use this feature, you
            need to take action.
          </Text>
          {isAdmin ? (
            <>
              <Text as="p">
                As an administrator, you can create a new agent by visiting the
                admin panel.
              </Text>
              <Button width="full" href="/admin/agents">
                Go to Admin Panel
              </Button>
            </>
          ) : (
            <Text as="p">
              Please contact your administrator to configure an agent for you.
            </Text>
          )}
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/components/modals/ProviderModal.tsx
================================================
import React from "react";
import { Button } from "@opal/components";
import type { IconProps } from "@opal/types";
import Modal from "@/refresh-components/Modal";
import { SvgLoader } from "@opal/icons";
export interface ProviderModalProps {
  // Modal configurations
  clickOutsideToClose?: boolean;

  // Base modal props
  open: boolean;
  onOpenChange: (open: boolean) => void;
  icon: React.FunctionComponent<IconProps>;
  title: string;
  description?: string;
  className?: string;
  children?: React.ReactNode;

  // Footer props
  onSubmit?: () => void;
  submitDisabled?: boolean;
  isSubmitting?: boolean;
  submitLabel?: string;
  cancelLabel?: string;
}

export default function ProviderModal({
  open,
  onOpenChange,
  icon: icon,
  title,
  description,
  children,
  onSubmit,
  submitDisabled = false,
  isSubmitting = false,
  submitLabel = "Connect",
  cancelLabel = "Cancel",
}: ProviderModalProps) {
  const SpinningLoader: React.FunctionComponent<IconProps> = (props) => (
    <SvgLoader
      {...props}
      className={`${
        props.className ?? ""
      } h-3 w-3 stroke-text-inverted-04 animate-spin`}
    />
  );

  const handleOpenChange = (isOpen: boolean) => {
    if (!isOpen) {
      onOpenChange(false);
    }
  };

  const handleKeyDown = (e: React.KeyboardEvent) => {
    if (e.key === "Enter" && onSubmit && !submitDisabled && !isSubmitting) {
      // Check if the target is not a textarea (allow Enter in textareas)
      if ((e.target as HTMLElement).tagName !== "TEXTAREA") {
        e.preventDefault();
        onSubmit();
      }
    }
  };

  return (
    <Modal open={open} onOpenChange={handleOpenChange}>
      <Modal.Content width="sm" height="lg" onKeyDown={handleKeyDown}>
        <Modal.Header
          icon={icon}
          title={title}
          description={description}
          onClose={() => onOpenChange(false)}
        />

        <Modal.Body>{children}</Modal.Body>

        {onSubmit && (
          <Modal.Footer>
            <Button
              prominence="secondary"
              type="button"
              onClick={() => onOpenChange(false)}
            >
              {cancelLabel}
            </Button>
            <Button
              disabled={submitDisabled || isSubmitting}
              type="button"
              onClick={onSubmit}
              icon={isSubmitting ? SpinningLoader : undefined}
            >
              {submitLabel}
            </Button>
          </Modal.Footer>
        )}
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/components/modals/UserFilesModal.tsx
================================================
"use client";

import React, { useRef, useState, useEffect, useMemo } from "react";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { ProjectFile } from "@/providers/ProjectsContext";
import { formatRelativeTime } from "@/app/app/components/projects/project_utils";
import Text from "@/refresh-components/texts/Text";
import type { IconProps } from "@opal/types";
import { getFileExtension, isImageExtension } from "@/lib/utils";
import { UserFileStatus } from "@/app/app/projects/projectsService";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import AttachmentButton from "@/refresh-components/buttons/AttachmentButton";
import Modal from "@/refresh-components/Modal";
import { useModal } from "@/refresh-components/contexts/ModalContext";
import TextSeparator from "@/refresh-components/TextSeparator";
import {
  SvgEye,
  SvgFiles,
  SvgFileText,
  SvgImage,
  SvgTrash,
  SvgXCircle,
} from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import useFilter from "@/hooks/useFilter";
import { Button } from "@opal/components";
import ScrollIndicatorDiv from "@/refresh-components/ScrollIndicatorDiv";

function getIcon(
  file: ProjectFile,
  isProcessing: boolean
): React.FunctionComponent<IconProps> {
  if (isProcessing) return SimpleLoader;
  const ext = getFileExtension(file.name).toLowerCase();
  if (isImageExtension(ext)) return SvgImage;
  return SvgFileText;
}

function getDescription(file: ProjectFile): string {
  const s = String(file.status || "");
  const typeLabel = getFileExtension(file.name);
  if (s === UserFileStatus.PROCESSING) return "Processing...";
  if (s === UserFileStatus.UPLOADING) return "Uploading...";
  if (s === UserFileStatus.DELETING) return "Deleting...";
  if (s === UserFileStatus.COMPLETED) return typeLabel;
  return file.status ?? typeLabel;
}

interface FileAttachmentProps {
  file: ProjectFile;
  isSelected: boolean;
  onClick?: () => void;
  onView?: () => void;
  onDelete?: () => void;
}

function FileAttachment({
  file,
  isSelected,
  onClick,
  onView,
  onDelete,
}: FileAttachmentProps) {
  const isProcessing =
    String(file.status) === UserFileStatus.PROCESSING ||
    String(file.status) === UserFileStatus.UPLOADING ||
    String(file.status) === UserFileStatus.DELETING;

  const Icon = getIcon(file, isProcessing);
  const description = getDescription(file);
  const rightText = file.last_accessed_at
    ? formatRelativeTime(file.last_accessed_at)
    : "";

  return (
    <AttachmentButton
      onClick={onClick}
      icon={Icon}
      description={description}
      rightText={rightText}
      selected={isSelected}
      processing={isProcessing}
      onView={onView}
      actionIcon={SvgTrash}
      onAction={onDelete}
    >
      {file.name}
    </AttachmentButton>
  );
}

export interface UserFilesModalProps {
  // Modal content
  title: string;
  description: string;
  recentFiles: ProjectFile[];
  handleUploadChange?: (e: React.ChangeEvent<HTMLInputElement>) => void;
  selectedFileIds?: string[];

  // FileAttachment related
  onView?: (file: ProjectFile) => void;
  onDelete?: (file: ProjectFile) => void;
  onPickRecent?: (file: ProjectFile) => void;
  onUnpickRecent?: (file: ProjectFile) => void;
}

export default function UserFilesModal({
  title,
  description,
  recentFiles,
  handleUploadChange,
  selectedFileIds,

  onView,
  onDelete,
  onPickRecent,
  onUnpickRecent,
}: UserFilesModalProps) {
  const { isOpen, toggle } = useModal();
  const [selectedIds, setSelectedIds] = useState<Set<string>>(
    () => new Set(selectedFileIds || [])
  );
  const [showOnlySelected, setShowOnlySelected] = useState(false);
  const fileInputRef = useRef<HTMLInputElement | null>(null);
  const searchInputRef = useRef<HTMLInputElement | null>(null);
  const triggerUploadPicker = () => fileInputRef.current?.click();

  useEffect(() => {
    if (selectedFileIds) setSelectedIds(new Set(selectedFileIds));
    else setSelectedIds(new Set());
  }, [selectedFileIds]);

  const selectedCount = selectedIds.size;

  function handleDeselectAll() {
    selectedIds.forEach((id) => {
      const file = recentFiles.find((f) => f.id === id);
      if (file) {
        onUnpickRecent?.(file);
      }
    });
    setSelectedIds(new Set());
  }

  const files = useMemo(
    () =>
      showOnlySelected
        ? recentFiles.filter((projectFile) => selectedIds.has(projectFile.id))
        : recentFiles,
    [showOnlySelected, recentFiles, selectedIds]
  );

  const { query, setQuery, filtered } = useFilter(files, (file) => file.name);

  return (
    <>
      {/* Hidden file input */}
      {handleUploadChange && (
        <input
          ref={fileInputRef}
          type="file"
          multiple
          className="hidden"
          onChange={handleUploadChange}
        />
      )}

      <Modal open={isOpen} onOpenChange={toggle}>
        <Modal.Content
          width="sm"
          height="lg"
          onOpenAutoFocus={(e) => {
            e.preventDefault();
            searchInputRef.current?.focus();
          }}
          preventAccidentalClose={false}
        >
          <Modal.Header icon={SvgFiles} title={title} description={description}>
            {/* Search bar section */}
            <Section flexDirection="row" gap={0.5}>
              <InputTypeIn
                ref={searchInputRef}
                placeholder="Search files..."
                value={query}
                onChange={(e) => setQuery(e.target.value)}
                leftSearchIcon
                autoComplete="off"
                tabIndex={0}
                onFocus={(e) => {
                  e.target.select();
                }}
              />
              {handleUploadChange && (
                <CreateButton
                  onClick={triggerUploadPicker}
                  secondary={false}
                  internal
                >
                  Add Files
                </CreateButton>
              )}
            </Section>
          </Modal.Header>

          <Modal.Body
            padding={filtered.length === 0 ? 0.5 : 0}
            gap={0.5}
            alignItems="center"
          >
            {/* File display section */}
            {filtered.length === 0 ? (
              <Text text03>No files found</Text>
            ) : (
              <ScrollIndicatorDiv className="p-2 gap-2 max-h-[70vh]">
                {filtered.map((projectFle) => {
                  const isSelected = selectedIds.has(projectFle.id);
                  return (
                    <FileAttachment
                      key={projectFle.id}
                      file={projectFle}
                      isSelected={isSelected}
                      onClick={
                        onPickRecent
                          ? () => {
                              if (isSelected) {
                                onUnpickRecent?.(projectFle);
                                setSelectedIds((prev) => {
                                  const next = new Set(prev);
                                  next.delete(projectFle.id);
                                  return next;
                                });
                              } else {
                                onPickRecent(projectFle);
                                setSelectedIds((prev) => {
                                  const next = new Set(prev);
                                  next.add(projectFle.id);
                                  return next;
                                });
                              }
                            }
                          : undefined
                      }
                      onView={onView ? () => onView(projectFle) : undefined}
                      onDelete={
                        onDelete ? () => onDelete(projectFle) : undefined
                      }
                    />
                  );
                })}

                {/* File count divider - only show when not searching or filtering */}
                {!query.trim() && !showOnlySelected && (
                  <TextSeparator
                    count={recentFiles.length}
                    text={recentFiles.length === 1 ? "File" : "Files"}
                  />
                )}
              </ScrollIndicatorDiv>
            )}
          </Modal.Body>

          <Modal.Footer>
            {/* Left side: file count and controls */}
            {onPickRecent && (
              <Section flexDirection="row" justifyContent="start" gap={0.5}>
                <Text as="p" text03>
                  {selectedCount} {selectedCount === 1 ? "file" : "files"}{" "}
                  selected
                </Text>
                <Button
                  icon={SvgEye}
                  prominence="tertiary"
                  size="sm"
                  onClick={() => setShowOnlySelected(!showOnlySelected)}
                  interaction={showOnlySelected ? "hover" : "rest"}
                />
                <Button
                  disabled={selectedCount === 0}
                  icon={SvgXCircle}
                  prominence="tertiary"
                  size="sm"
                  onClick={handleDeselectAll}
                />
              </Section>
            )}

            {/* Right side: Done button */}
            <Button prominence="secondary" onClick={() => toggle(false)}>
              Done
            </Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    </>
  );
}


================================================
FILE: web/src/components/oauth/OAuthCallbackPage.tsx
================================================
"use client";

import { useEffect, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation";
import type { Route } from "next";
import { CheckmarkIcon, TriangleAlertIcon } from "@/components/icons/icons";
import CardSection from "@/components/admin/CardSection";
import { Button } from "@opal/components";

interface OAuthCallbackConfig {
  // UI customization
  processingMessage?: string;
  processingDetails?: string;
  successMessage?: string;
  successDetailsTemplate?: string; // Template with {serviceName} placeholder
  errorMessage?: string;
  backButtonText?: string;
  redirectingMessage?: string;

  // Behavior
  autoRedirectDelay?: number; // milliseconds
  defaultRedirectPath?: string;

  // API integration - all flows now use the same pattern
  callbackApiUrl: string; // Required - API endpoint to call

  // Error message mapping
  errorMessageMap?: Record<string, string>;
}

interface OAuthCallbackPageProps {
  config: OAuthCallbackConfig;
}

export default function OAuthCallbackPage({ config }: OAuthCallbackPageProps) {
  const router = useRouter();
  const searchParams = useSearchParams();

  const [statusMessage, setStatusMessage] = useState(
    config.processingMessage || "Processing..."
  );
  const [statusDetails, setStatusDetails] = useState(
    config.processingDetails || "Please wait while we complete the setup."
  );
  const [isError, setIsError] = useState(false);
  const [isSuccess, setIsSuccess] = useState(false);
  const [isLoading, setIsLoading] = useState(true);
  const [serviceName, setServiceName] = useState<string>("");
  const [redirectPath, setRedirectPath] = useState<string | undefined>(
    undefined
  );
  const [secondsLeft, setSecondsLeft] = useState<number | null>(null);

  // Extract query parameters
  const code = searchParams?.get("code");
  const state = searchParams?.get("state");
  const error = searchParams?.get("error");
  const errorDescription = searchParams?.get("error_description");

  // Auto-redirect for success cases (with countdown)
  useEffect(() => {
    if (!isSuccess) return;

    const delayMs = config.autoRedirectDelay ?? 2000;
    setSecondsLeft(Math.ceil(delayMs / 1000));

    const interval = setInterval(() => {
      setSecondsLeft((prev) => (prev !== null && prev > 0 ? prev - 1 : prev));
    }, 1000);

    const timer = setTimeout(() => {
      const target = redirectPath || config.defaultRedirectPath || "/app";
      router.push(target as Route);
    }, delayMs);

    return () => {
      clearInterval(interval);
      clearTimeout(timer);
    };
  }, [
    isSuccess,
    redirectPath,
    router,
    config.autoRedirectDelay,
    config.defaultRedirectPath,
  ]);

  useEffect(() => {
    const controller = new AbortController();

    const handleOAuthCallback = async () => {
      // Handle OAuth error from provider
      if (error) {
        setStatusMessage(config.errorMessage || "Authorization Failed");
        setStatusDetails(
          errorDescription ||
            "The authorization was cancelled or failed. Please try again."
        );
        setIsError(true);
        setIsLoading(false);
        return;
      }

      // Validate required parameters
      if (!code || !state) {
        setStatusMessage("Invalid Request");
        setStatusDetails(
          "The authorization request was incomplete. Please try again."
        );
        setIsError(true);
        setIsLoading(false);
        return;
      }

      try {
        // Make API call to process callback - all flows use this pattern now
        const url = `${config.callbackApiUrl}?code=${encodeURIComponent(
          code
        )}&state=${encodeURIComponent(state)}`;

        const response = await fetch(url, {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
          credentials: "include",
          signal: controller.signal,
        });

        if (!response.ok) {
          let errorMessage = "Failed to complete authorization";
          try {
            const errorData = await response.json();
            if (errorData.detail && config.errorMessageMap) {
              // Use custom error mapping
              for (const [pattern, message] of Object.entries(
                config.errorMessageMap
              )) {
                if (errorData.detail.includes(pattern)) {
                  errorMessage = message;
                  break;
                }
              }
            } else if (errorData.error) {
              errorMessage = errorData.error;
            }
          } catch (parseError) {
            console.error("Error parsing response:", parseError);
          }
          throw new Error(errorMessage);
        }

        // Parse the response to get service and redirect information
        const responseData = await response.json();
        const result = {
          success: true,
          serviceName:
            responseData.source ||
            responseData.server_name ||
            responseData.service_name,
        };

        setServiceName(result.serviceName || "");
        // Respect backend-provided redirect path (from state.return_path)
        // Sanitize to prevent open redirects (e.g. "//evil.com")
        const rawPath =
          responseData.redirect_url ||
          searchParams?.get("return_path") ||
          config.defaultRedirectPath ||
          "/app";
        const sanitizedPath =
          rawPath.startsWith("http://") || rawPath.startsWith("https://")
            ? "/app"
            : "/" + rawPath.replace(/^\/+/, "");
        const redirectUrl = new URL(sanitizedPath, window.location.origin);
        redirectUrl.searchParams.set("message", "oauth_connected");
        setRedirectPath(redirectUrl.pathname + redirectUrl.search);
        setStatusMessage(config.successMessage || "Success!");

        const successDetails = config.successDetailsTemplate
          ? config.successDetailsTemplate.replace(
              "{serviceName}",
              result.serviceName || "service"
            )
          : `Your ${
              result.serviceName || "service"
            } authorization completed successfully.`;

        setStatusDetails(successDetails);
        setIsSuccess(true);
        setIsError(false);
        setIsLoading(false);
      } catch (error) {
        if (controller.signal.aborted) return;
        console.error("OAuth callback error:", error);
        setStatusMessage(config.errorMessage || "Something Went Wrong");
        setStatusDetails(
          error instanceof Error
            ? error.message
            : "An error occurred during the OAuth process. Please try again."
        );
        setIsError(true);
        setIsLoading(false);
      }
    };

    handleOAuthCallback();
    return () => controller.abort();
  }, [code, state, error, errorDescription, searchParams, config]);

  const getStatusIcon = () => {
    if (isLoading) {
      return (
        <div className="w-16 h-16 border-4 border-blue-200 dark:border-blue-800 border-t-blue-600 dark:border-t-blue-400 rounded-full animate-spin mx-auto mb-4"></div>
      );
    }
    if (isSuccess) {
      return (
        <CheckmarkIcon
          size={64}
          className="text-green-500 dark:text-green-400 mx-auto mb-4"
        />
      );
    }
    if (isError) {
      return (
        <TriangleAlertIcon
          size={64}
          className="text-red-500 dark:text-red-400 mx-auto mb-4"
        />
      );
    }
    return null;
  };

  const getStatusColor = () => {
    if (isSuccess) return "text-green-600 dark:text-green-400";
    if (isError) return "text-red-600 dark:text-red-400";
    return "text-gray-600 dark:text-gray-300";
  };

  return (
    <div className="min-h-screen flex flex-col">
      <div className="flex-1 flex flex-col items-center justify-center p-4">
        <CardSection className="max-w-md w-full mx-auto p-8 shadow-lg bg-white dark:bg-gray-800 rounded-lg">
          <div className="text-center">
            {getStatusIcon()}

            <h1 className={`text-2xl font-bold mb-4 ${getStatusColor()}`}>
              {statusMessage}
            </h1>

            <p className="text-gray-600 dark:text-gray-300 mb-6 leading-relaxed">
              {statusDetails}
            </p>

            {isSuccess && secondsLeft !== null && (
              <div className="bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-800 rounded-lg p-4 mb-6">
                <p className="text-green-800 dark:text-green-200 text-sm">
                  Redirecting in {secondsLeft}{" "}
                  {secondsLeft === 1 ? "second" : "seconds"}...
                </p>
              </div>
            )}

            <div className="flex flex-col space-y-3">
              {isError && (
                <div className="flex flex-col space-y-2">
                  <Button
                    onClick={() => {
                      const target =
                        redirectPath || config.defaultRedirectPath || "/app";
                      router.push(target as Route);
                    }}
                    width="full"
                  >
                    {config.backButtonText || "Back to Chat"}
                  </Button>
                </div>
              )}

              {isLoading && (
                <p className="text-sm text-gray-500 dark:text-gray-400">
                  This may take a few moments...
                </p>
              )}
            </div>
          </div>
        </CardSection>
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/resizable/constants.ts
================================================
export const DOCUMENT_SIDEBAR_WIDTH_COOKIE_NAME = "documentSidebarWidth";
export const SIDEBAR_TOGGLED_COOKIE_NAME = "sidebarIsToggled";
export const PRO_SEARCH_TOGGLED_COOKIE_NAME = "proSearchIsToggled";


================================================
FILE: web/src/components/search/DocumentDisplay.tsx
================================================
"use client";
import React, { JSX } from "react";
import { MinimalOnyxDocument, OnyxDocument } from "@/lib/search/interfaces";
import { SourceIcon } from "../SourceIcon";
import { WebResultIcon } from "../WebResultIcon";
import Text from "@/refresh-components/texts/Text";
import { openDocument } from "@/lib/search/utils";
import { SubQuestionDetail } from "@/app/app/interfaces";
import { ValidSources } from "@/lib/types";
import { Card } from "@/components/ui/card";

export const buildDocumentSummaryDisplay = (
  matchHighlights: string[],
  blurb: string
) => {
  // if there are no match highlights, or if it's really short, just use the blurb
  // this is to prevent the UI from showing something like `...` for the summary
  const MIN_MATCH_HIGHLIGHT_LENGTH = 5;
  if (
    !matchHighlights ||
    matchHighlights.length <= MIN_MATCH_HIGHLIGHT_LENGTH
  ) {
    return blurb;
  }

  // content, isBold, isContinuation
  let sections = [] as [string, boolean, boolean][];
  matchHighlights.forEach((matchHighlight, matchHighlightIndex) => {
    if (!matchHighlight) {
      return;
    }

    const words = matchHighlight.split(new RegExp("\\s"));
    words.forEach((word) => {
      if (!word) {
        return;
      }

      let isContinuation = false;
      while (word.includes("<hi>") && word.includes("</hi>")) {
        const start = word.indexOf("<hi>");
        const end = word.indexOf("</hi>");
        const before = word.slice(0, start);
        const highlight = word.slice(start + 4, end);
        const after = word.slice(end + 5);

        if (before) {
          sections.push([before, false, isContinuation]);
          isContinuation = true;
        }
        sections.push([highlight, true, isContinuation]);
        isContinuation = true;
        word = after;
      }

      if (word) {
        sections.push([word, false, isContinuation]);
      }
    });
    if (matchHighlightIndex != matchHighlights.length - 1) {
      sections.push(["...", false, false]);
    }
  });

  if (sections.length == 0) {
    return;
  }

  const firstSection = sections[0];
  if (firstSection === undefined) {
    return;
  }

  let previousIsContinuation = firstSection[2];
  let previousIsBold = firstSection[1];
  let currentText = "";
  const finalJSX = [] as (JSX.Element | string)[];
  sections.forEach(([word, shouldBeBold, isContinuation], index) => {
    if (shouldBeBold != previousIsBold) {
      if (currentText) {
        if (previousIsBold) {
          // remove leading space so that we don't bold the whitespace
          // in front of the matching keywords
          currentText = currentText.trim();
          if (!previousIsContinuation) {
            finalJSX[finalJSX.length - 1] = finalJSX[finalJSX.length - 1] + " ";
          }
          finalJSX.push(
            <b key={index} className="text-text font-bold">
              {currentText}
            </b>
          );
        } else {
          finalJSX.push(currentText);
        }
      }
      currentText = "";
    }
    previousIsBold = shouldBeBold;
    previousIsContinuation = isContinuation;
    if (!isContinuation || index === 0) {
      currentText += " ";
    }
    currentText += word;
  });
  if (currentText) {
    if (previousIsBold) {
      currentText = currentText.trim();
      if (!previousIsContinuation) {
        finalJSX[finalJSX.length - 1] = finalJSX[finalJSX.length - 1] + " ";
      }
      finalJSX.push(
        <b key={sections.length} className="text-default bg-highlight-text">
          {currentText}
        </b>
      );
    } else {
      finalJSX.push(currentText);
    }
  }
  return finalJSX;
};

interface CompactDocumentCardProps {
  document: OnyxDocument;
  updatePresentingDocument: (document: MinimalOnyxDocument) => void;
}

export function CompactDocumentCard({
  document,
  updatePresentingDocument,
}: CompactDocumentCardProps) {
  const isWebSource =
    document.is_internet || document.source_type === ValidSources.Web;

  return (
    <Card className="shadow-00 w-[20rem]">
      <button
        onClick={() => {
          openDocument(document, updatePresentingDocument);
        }}
        className="max-w-[20rem] p-3 flex flex-col gap-1"
      >
        <div className="flex flex-row gap-2 items-center w-full">
          {isWebSource && document.link ? (
            <WebResultIcon url={document.link} size={18} />
          ) : (
            <SourceIcon sourceType={document.source_type} iconSize={18} />
          )}
          <Text as="p" text04 className="truncate !m-0">
            {document.semantic_identifier ?? document.document_id}
          </Text>
        </div>

        {document.blurb && (
          <Text
            as="p"
            text03
            secondaryBody
            className="line-clamp-2 text-left !m-0"
          >
            {document.blurb}
          </Text>
        )}

        {document.updated_at &&
          !isNaN(new Date(document.updated_at).getTime()) && (
            <Text
              as="p"
              text03
              figureSmallLabel
              className="line-clamp-2 text-left !m-0"
            >
              Updated {new Date(document.updated_at).toLocaleDateString()}
            </Text>
          )}
      </button>
    </Card>
  );
}

interface CompactQuestionCardProps {
  question: SubQuestionDetail;
  openQuestion: (question: SubQuestionDetail) => void;
}

export function CompactQuestionCard({
  question,
  openQuestion,
}: CompactQuestionCardProps) {
  return (
    <div
      onClick={() => openQuestion(question)}
      className="max-w-[350px] gap-y-1 cursor-pointer pb-0 pt-0 mt-0 flex gap-y-0 flex-col content-start items-start gap-0"
    >
      <div className="text-sm !pb-0 !mb-0 font-semibold flex items-center gap-x-1 text-text-900 pt-0 mt-0 truncate w-full">
        Question
      </div>
      <div className="text-xs mb-0 text-text-600 line-clamp-2">
        {question.question}
      </div>
      <div className="flex mt-0 pt-0 items-center justify-between w-full">
        <span className="text-xs text-text-500">
          {question.context_docs?.top_documents.length || 0} context docs
        </span>
        {question.sub_queries && (
          <span className="text-xs text-text-500">
            {question.sub_queries.length} subqueries
          </span>
        )}
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/search/DocumentFeedbackBlock.tsx
================================================
import { toast } from "@/hooks/useToast";
import { ChevronsDownIcon, ChevronsUpIcon } from "../icons/icons";
import { CustomTooltip } from "../tooltip/CustomTooltip";

type DocumentFeedbackType = "endorse" | "reject" | "hide" | "unhide";

const giveDocumentFeedback = async (
  documentId: string,
  messageId: number,
  documentRank: number,
  searchFeedback: DocumentFeedbackType
): Promise<string | null> => {
  const response = await fetch("/api/chat/document-search-feedback", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      message_id: messageId,
      document_id: documentId,
      document_rank: documentRank,
      click: false,
      search_feedback: searchFeedback,
    }),
  });
  return response.ok
    ? null
    : response.statusText || (await response.json()).message;
};

interface DocumentFeedbackIconProps {
  documentId: string;
  messageId: number;
  documentRank: number;
  feedbackType: DocumentFeedbackType;
}

const DocumentFeedback = ({
  documentId,
  messageId,
  documentRank,
  feedbackType,
}: DocumentFeedbackIconProps) => {
  let icon = null;
  const size = 20;
  if (feedbackType === "endorse") {
    icon = (
      <ChevronsUpIcon
        size={size}
        className="my-auto flex flex-shrink-0 text-blue-400"
      />
    );
  }
  if (feedbackType === "reject") {
    icon = (
      <ChevronsDownIcon
        size={size}
        className="my-auto flex flex-shrink-0 text-blue-400"
      />
    );
  }
  if (!icon) {
    // TODO: support other types of feedback
    return null;
  }

  return (
    <div
      onClick={async () => {
        const errorMsg = await giveDocumentFeedback(
          documentId,
          messageId,
          documentRank,
          feedbackType
        );
        if (!errorMsg) {
          toast.success("Thanks for your feedback!");
        } else {
          toast.error(`Error giving feedback - ${errorMsg}`);
        }
      }}
      className="cursor-pointer"
    >
      {icon}
    </div>
  );
};

interface DocumentFeedbackBlockProps {
  documentId: string;
  messageId: number;
  documentRank: number;
}

export const DocumentFeedbackBlock = ({
  documentId,
  messageId,
  documentRank,
}: DocumentFeedbackBlockProps) => {
  return (
    <div className="flex items-center gap-x-2">
      <CustomTooltip showTick line content="Good response">
        <DocumentFeedback
          documentId={documentId}
          messageId={messageId}
          documentRank={documentRank}
          feedbackType="endorse"
        />
      </CustomTooltip>
      <CustomTooltip showTick line content="Bad response">
        <DocumentFeedback
          documentId={documentId}
          messageId={messageId}
          documentRank={documentRank}
          feedbackType="reject"
        />
      </CustomTooltip>
    </div>
  );
};


================================================
FILE: web/src/components/search/DocumentUpdatedAtBadge.tsx
================================================
import { timeAgo } from "@/lib/time";
import { MetadataBadge } from "../MetadataBadge";

export function DocumentUpdatedAtBadge({
  updatedAt,
  modal,
}: {
  updatedAt: string;
  modal?: boolean;
}) {
  return (
    <MetadataBadge
      flexNone={modal}
      value={(modal ? "" : "Updated ") + timeAgo(updatedAt)}
    />
  );
}


================================================
FILE: web/src/components/search/filtering/FilterDropdown.tsx
================================================
import { JSX } from "react";
import { FiCheck, FiChevronDown, FiXCircle } from "react-icons/fi";
import { CustomDropdown } from "../../Dropdown";

interface Option {
  key: string;
  display: string | JSX.Element;
  displayName?: string;
  icon?: JSX.Element;
}
export function FilterDropdown({
  options,
  selected,
  handleSelect,
  icon,
  defaultDisplay,
  width = "w-64",
  dropdownWidth,
  optionClassName,
  resetValues,
  backgroundColor,
  dropdownColor,
}: {
  options: Option[];
  selected: string[];
  handleSelect: (option: Option) => void;
  icon: JSX.Element;
  defaultDisplay: string | JSX.Element;
  width?: string;
  dropdownWidth?: string;
  optionClassName?: string;
  resetValues?: () => void;
  backgroundColor?: string;
  dropdownColor?: string;
}) {
  return (
    <div>
      <CustomDropdown
        dropdown={
          <div
            className={`
              border 
              border-border 
              rounded-lg 
              ${backgroundColor || "bg-background"}
              flex 
              flex-col 
              ${dropdownWidth || width}
              max-h-96 
              overflow-y-scroll
              overscroll-contain
              `}
          >
            {options.map((option, ind) => {
              const isSelected = selected.includes(option.key);
              return (
                <div
                  key={`${option.key}-1`}
                  className={`
                      ${optionClassName}
                      flex
                      px-3
                      text-sm
                      py-2.5
                      select-none
                      cursor-pointer
                      flex-none
                      w-full
                      text-text-darker
                      items-center
                      gap-x-1
                      ${dropdownColor || "bg-background"}
                      hover:bg-accent-background-hovered
                      ${
                        ind === options.length - 1
                          ? ""
                          : "border-b border-border"
                      } 
                    `}
                  onClick={(event) => {
                    handleSelect(option);
                    event.preventDefault();
                    event.stopPropagation();
                  }}
                >
                  {option.icon}
                  {option.display}
                  {isSelected && (
                    <div className="ml-auto my-auto mr-1">
                      <FiCheck />
                    </div>
                  )}
                </div>
              );
            })}
          </div>
        }
      >
        <div
          className={`
            flex
            ${width}
            text-sm
            px-3
            py-1.5
            rounded-lg 
            border 
            gap-x-2
            border-border
            cursor-pointer 
            ${backgroundColor || "bg-background"}
            hover:bg-accent-background`}
        >
          <div className="flex-none my-auto">{icon}</div>
          {selected.length === 0 || resetValues ? (
            defaultDisplay
          ) : (
            <p className="line-clamp-1">{selected.join(", ")}</p>
          )}
          {resetValues && selected.length !== 0 ? (
            <div
              className="my-auto ml-auto p-0.5 rounded-full w-fit"
              onClick={(e) => {
                resetValues();
                e.stopPropagation();
              }}
            >
              <FiXCircle />
            </div>
          ) : (
            <FiChevronDown className="my-auto ml-auto" />
          )}
        </div>
      </CustomDropdown>
    </div>
  );
}


================================================
FILE: web/src/components/search/results/Citation.tsx
================================================
import { ReactNode, JSX } from "react";
import { CompactDocumentCard, CompactQuestionCard } from "../DocumentDisplay";
import {
  LoadedOnyxDocument,
  MinimalOnyxDocument,
  OnyxDocument,
} from "@/lib/search/interfaces";
import {
  Tooltip,
  TooltipContent,
  TooltipProvider,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { openDocument } from "@/lib/search/utils";
import { SubQuestionDetail } from "@/app/app/interfaces";
import { getSourceDisplayName } from "@/lib/sources";
import { ValidSources } from "@/lib/types";
import Text from "@/refresh-components/texts/Text";

const MAX_CITATION_TEXT_LENGTH = 40;

export interface DocumentCardProps {
  document: LoadedOnyxDocument;
  updatePresentingDocument: (document: MinimalOnyxDocument) => void;
  url?: string;
}
export interface QuestionCardProps {
  question: SubQuestionDetail;
  openQuestion: (question: SubQuestionDetail) => void;
}

function truncateText(str: string, maxLength: number) {
  if (str.length <= maxLength) return str;
  return str.slice(0, maxLength) + "...";
}

export function Citation({
  children,
  document_info,
  question_info,
  index,
}: {
  document_info?: DocumentCardProps;
  question_info?: QuestionCardProps;
  children?: JSX.Element | string | null | ReactNode;
  index?: number;
}) {
  let innerText = "";
  if (index !== undefined) {
    innerText = index.toString();
  }

  if (children) {
    const childrenString = children.toString();
    const childrenSegment1 = childrenString.split("[")[1];
    if (childrenSegment1 !== undefined) {
      const childrenSegment1_0 = childrenSegment1.split("]")[0];
      if (childrenSegment1_0 !== undefined) {
        innerText = childrenSegment1_0;
      }
    }
  }

  if (!document_info && !question_info) {
    return <>{children}</>;
  }
  const sourceType = document_info?.document?.source_type;
  const title = document_info?.document?.semantic_identifier;
  const citationText =
    (sourceType && sourceType != ValidSources.Web
      ? getSourceDisplayName(sourceType)
      : truncateText(title || "", MAX_CITATION_TEXT_LENGTH)) || "Unknown";

  return (
    <TooltipProvider>
      <Tooltip>
        <TooltipTrigger asChild>
          <span
            onClick={() => {
              document_info?.document
                ? openDocument(
                    document_info.document,
                    document_info.updatePresentingDocument
                  )
                : question_info?.question
                  ? question_info.openQuestion(question_info.question)
                  : null;
            }}
            className="inline-flex items-center cursor-pointer transition-all duration-200 ease-in-out ml-1"
          >
            <span
              className="flex items-center justify-center p-1 h-4 max-w-[200px]
                         bg-background-tint-03 rounded-04
                         hover:bg-background-tint-04 shadow-sm"
              style={{ transform: "translateY(-10%)", lineHeight: "1" }}
            >
              <Text figureSmallValue className="truncate">
                {citationText}
              </Text>
            </span>
          </span>
        </TooltipTrigger>
        <TooltipContent
          className="bg-transparent p-0 shadow-none"
          side="bottom"
          align="start"
        >
          {document_info?.document ? (
            <CompactDocumentCard
              updatePresentingDocument={document_info.updatePresentingDocument}
              document={document_info.document}
            />
          ) : (
            <CompactQuestionCard
              question={question_info?.question!}
              openQuestion={question_info?.openQuestion!}
            />
          )}
        </TooltipContent>
      </Tooltip>
    </TooltipProvider>
  );
}


================================================
FILE: web/src/components/search/results/ResponseSection.tsx
================================================
import { AlertIcon, TriangleAlertIcon } from "@/components/icons/icons";
import { useState, JSX } from "react";

export type StatusOptions = "in-progress" | "failed" | "warning" | "success";

interface ResponseSectionProps {
  header: JSX.Element | string;
  body: JSX.Element | string;
  status: StatusOptions;
  desiredOpenStatus: boolean;
  setDesiredOpenStatus?: (isOpen: boolean) => void;
  isNotControllable?: boolean;
}

export const ResponseSection = ({
  header,
  body,
  status,
  desiredOpenStatus,
  setDesiredOpenStatus,
  isNotControllable,
}: ResponseSectionProps) => {
  const [isOpen, setIsOpen] = useState<boolean | null>(null);

  let icon = null;
  if (status === "in-progress") {
    icon = <></>;
  }
  if (status === "failed") {
    icon = <AlertIcon size={16} className="text-red-500" />;
  }
  if (status === "success") {
    icon = <></>;
  }
  if (status === "warning") {
    icon = <TriangleAlertIcon size={16} className="text-yellow-600" />;
  }

  // use `desiredOpenStatus` if user has not clicked to open/close, otherwise use
  // `isOpen` state
  const finalIsOpen = isOpen !== null ? isOpen : desiredOpenStatus;
  return (
    <div>
      <div
        className={`
        flex 
        my-1 
        p-1 
        rounded  
        select-none 
        ${isNotControllable ? "" : "hover:bg-background-800 cursor-pointer"}`}
        onClick={() => {
          if (!isNotControllable) {
            if (isOpen === null) {
              setIsOpen(!desiredOpenStatus);
            } else {
              setIsOpen(!isOpen);
            }
          }
          if (setDesiredOpenStatus) {
            setDesiredOpenStatus(!desiredOpenStatus);
          }
        }}
      >
        <div className="my-auto">{icon}</div>
        <div className="my-auto text-sm text-text-200">{header}</div>
      </div>
      {finalIsOpen && <div className="pb-1 mx-2 text-sm mb-1">{body}</div>}
    </div>
  );
};


================================================
FILE: web/src/components/settings/lib.ts
================================================
import {
  CombinedSettings,
  EnterpriseSettings,
  ApplicationStatus,
  Settings,
  QueryHistoryType,
} from "@/interfaces/settings";
import {
  CUSTOM_ANALYTICS_ENABLED,
  HOST_URL,
  SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED,
} from "@/lib/constants";
import { fetchSS } from "@/lib/utilsSS";
import { getWebVersion } from "@/lib/version";

export enum SettingsError {
  OTHER = "OTHER",
}

export async function fetchStandardSettingsSS() {
  return fetchSS("/settings");
}

export async function fetchEnterpriseSettingsSS() {
  return fetchSS("/enterprise-settings");
}

export async function fetchCustomAnalyticsScriptSS() {
  return fetchSS("/enterprise-settings/custom-analytics-script");
}

export async function fetchSettingsSS(): Promise<CombinedSettings | null> {
  const tasks = [fetchStandardSettingsSS()];
  if (SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED) {
    tasks.push(fetchEnterpriseSettingsSS());
    if (CUSTOM_ANALYTICS_ENABLED) {
      tasks.push(fetchCustomAnalyticsScriptSS());
    }
  }

  try {
    const results = await Promise.all(tasks);

    let settings: Settings;

    const result_0 = results[0];
    if (!result_0) {
      throw new Error("Standard settings fetch failed.");
    }

    if (!result_0.ok) {
      if (result_0.status === 403 || result_0.status === 401) {
        settings = {
          auto_scroll: true,
          application_status: ApplicationStatus.ACTIVE,
          gpu_enabled: false,
          maximum_chat_retention_days: null,
          notifications: [],
          needs_reindexing: false,
          anonymous_user_enabled: false,
          invite_only_enabled: false,
          deep_research_enabled: true,
          temperature_override_enabled: true,
          query_history_type: QueryHistoryType.NORMAL,
        };
      } else {
        throw new Error(
          `fetchStandardSettingsSS failed: status=${
            result_0.status
          } body=${await result_0.text()}`
        );
      }
    } else {
      settings = await result_0.json();
    }

    let enterpriseSettings: EnterpriseSettings | null = null;
    if (tasks.length > 1) {
      const result_1 = results[1];
      if (!result_1) {
        throw new Error("fetchEnterpriseSettingsSS failed.");
      }

      if (!result_1.ok) {
        if (result_1.status !== 403 && result_1.status !== 401) {
          throw new Error(
            `fetchEnterpriseSettingsSS failed: status=${
              result_1.status
            } body=${await result_1.text()}`
          );
        }
      } else {
        enterpriseSettings = await result_1.json();
      }
    }

    let customAnalyticsScript: string | null = null;
    if (tasks.length > 2) {
      const result_2 = results[2];
      if (!result_2) {
        throw new Error("fetchCustomAnalyticsScriptSS failed.");
      }

      if (!result_2.ok) {
        if (result_2.status !== 403) {
          throw new Error(
            `fetchCustomAnalyticsScriptSS failed: status=${
              result_2.status
            } body=${await result_2.text()}`
          );
        }
      } else {
        customAnalyticsScript = await result_2.json();
      }
    }

    if (settings.deep_research_enabled == null) {
      settings.deep_research_enabled = true;
    }

    const combinedSettings: CombinedSettings = {
      settings,
      enterpriseSettings,
      customAnalyticsScript,
      webVersion: settings.version ?? getWebVersion(),
      webDomain: HOST_URL,
      // Server-side default; the real value is computed client-side in
      // SettingsProvider where connector data is available via useCCPairs.
      isSearchModeAvailable: settings.search_ui_enabled !== false,
      settingsLoading: false,
    };

    return combinedSettings;
  } catch (error) {
    console.error("fetchSettingsSS exception: ", error);
    return null;
  }
}


================================================
FILE: web/src/components/settings/usePaidEnterpriseFeaturesEnabled.ts
================================================
"use client";

import { useSettingsContext } from "@/providers/SettingsProvider";

/**
 * Hook to check if enterprise features should be enabled in the UI.
 *
 * When LICENSE_ENFORCEMENT_ENABLED=true on the backend:
 * - Returns true if user has a valid license (ACTIVE, GRACE_PERIOD, PAYMENT_REMINDER)
 * - Returns false if user has no license (community edition) or expired license (GATED_ACCESS)
 *
 * When LICENSE_ENFORCEMENT_ENABLED=false (legacy behavior):
 * - Returns true if enterpriseSettings exists (build-time constant)
 *
 * This determines whether EE-only UI features like user groups, RBAC, etc. are shown.
 */
export function usePaidEnterpriseFeaturesEnabled(): boolean {
  const combinedSettings = useSettingsContext();

  // Check the runtime license-based flag first
  // This is set by the backend based on actual license status
  if (combinedSettings.settings.ee_features_enabled !== undefined) {
    return combinedSettings.settings.ee_features_enabled;
  }

  // Fallback to legacy behavior: check if enterprise settings exist
  // This handles the case where LICENSE_ENFORCEMENT_ENABLED=false
  return combinedSettings.enterpriseSettings !== null;
}


================================================
FILE: web/src/components/sidebar/ChatSessionMorePopup.tsx
================================================
"use client";

import { ChatSession } from "@/app/app/interfaces";
import { deleteChatSession } from "@/app/app/services/lib";
import { useProjectsContext } from "@/providers/ProjectsContext";
import {
  moveChatSession as moveChatSessionService,
  removeChatSessionFromProject as removeChatSessionFromProjectService,
} from "@/app/app/projects/projectsService";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import { FiMoreHorizontal } from "react-icons/fi";
import useChatSessions from "@/hooks/useChatSessions";
import { useCallback, useState, useMemo } from "react";
import MoveCustomAgentChatModal from "@/components/modals/MoveCustomAgentChatModal";
// PopoverMenu already imported above
import { cn, noProp } from "@/lib/utils";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { Button } from "@opal/components";
import { PopoverSearchInput } from "@/sections/sidebar/ChatButton";
import LineItem from "@/refresh-components/buttons/LineItem";
import { SvgFolder, SvgFolderIn, SvgShare, SvgTrash } from "@opal/icons";
// Constants
const DEFAULT_PERSONA_ID = 0;
const LS_HIDE_MOVE_CUSTOM_AGENT_MODAL_KEY = "onyx:hideMoveCustomAgentModal";

interface ChatSessionMorePopupProps {
  chatSession: ChatSession;
  projectId?: number;
  isRenamingChat: boolean;
  setIsRenamingChat: (value: boolean) => void;
  showShareModal?: (chatSession: ChatSession) => void;
  afterDelete?: () => void;
  afterMove?: () => void;
  afterRemoveFromProject?: () => void;
  search?: boolean;
  iconSize?: number;
  isVisible?: boolean;
}

export function ChatSessionMorePopup({
  chatSession,
  projectId,
  isRenamingChat: _isRenamingChat,
  setIsRenamingChat: _setIsRenamingChat,
  showShareModal,
  afterDelete,
  afterMove,
  afterRemoveFromProject,
  search,
  iconSize = 16,
  isVisible = false,
}: ChatSessionMorePopupProps) {
  const [popoverOpen, setPopoverOpen] = useState(false);
  const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);
  const { refreshChatSessions, removeSession } = useChatSessions();
  const { fetchProjects, projects } = useProjectsContext();

  const [pendingMoveProjectId, setPendingMoveProjectId] = useState<
    number | null
  >(null);
  const [showMoveCustomAgentModal, setShowMoveCustomAgentModal] =
    useState(false);

  const isChatUsingDefaultAgent = chatSession.persona_id === DEFAULT_PERSONA_ID;

  const [showMoveOptions, setShowMoveOptions] = useState(false);
  const [searchTerm, setSearchTerm] = useState("");

  const filteredProjects = projects.filter((project) =>
    project.name.toLowerCase().includes(searchTerm.toLowerCase())
  );

  const handlePopoverOpenChange = useCallback((open: boolean) => {
    setPopoverOpen(open);
  }, []);

  const handleConfirmDelete = useCallback(
    async (e: React.MouseEvent<HTMLButtonElement>) => {
      e.stopPropagation();
      await deleteChatSession(chatSession.id);
      removeSession(chatSession.id);
      await refreshChatSessions();
      await fetchProjects();
      setIsDeleteModalOpen(false);
      setPopoverOpen(false);
      afterDelete?.();
    },
    [
      chatSession,
      refreshChatSessions,
      removeSession,
      fetchProjects,
      afterDelete,
    ]
  );

  const performMove = useCallback(
    async (targetProjectId: number) => {
      await moveChatSessionService(targetProjectId, chatSession.id);
      await fetchProjects();
      await refreshChatSessions();
      setPopoverOpen(false);
      afterMove?.();
    },
    [chatSession.id, fetchProjects, refreshChatSessions, afterMove]
  );

  const handleMoveChatSession = useCallback(
    async (item: { id: number; label: string }) => {
      const targetProjectId = item.id;
      const hideModal =
        typeof window !== "undefined" &&
        window.localStorage.getItem(LS_HIDE_MOVE_CUSTOM_AGENT_MODAL_KEY) ===
          "true";

      if (!isChatUsingDefaultAgent && !hideModal) {
        setPendingMoveProjectId(targetProjectId);
        setShowMoveCustomAgentModal(true);
        return;
      }

      await performMove(targetProjectId);
    },
    [isChatUsingDefaultAgent, performMove]
  );

  const handleRemoveChatSessionFromProject = useCallback(async () => {
    await removeChatSessionFromProjectService(chatSession.id);
    await fetchProjects();
    await refreshChatSessions();
    afterRemoveFromProject?.();
    setPopoverOpen(false);
  }, [
    chatSession.id,
    fetchProjects,
    refreshChatSessions,
    removeChatSessionFromProjectService,
    afterRemoveFromProject,
  ]);

  // Build popover items similar to AppSidebar (no rename here)
  const popoverItems = useMemo(() => {
    if (!showMoveOptions) {
      return [
        showShareModal && (
          <LineItem
            key="share"
            icon={SvgShare}
            onClick={noProp(() => showShareModal(chatSession))}
          >
            Share
          </LineItem>
        ),
        <LineItem
          key="move"
          icon={SvgFolderIn}
          onClick={noProp(() => setShowMoveOptions(true))}
        >
          Move to Project
        </LineItem>,
        projectId && (
          <LineItem
            key="remove"
            icon={SvgFolder}
            onClick={noProp(() => handleRemoveChatSessionFromProject())}
          >
            {`Remove from ${
              projects.find((p) => p.id === projectId)?.name ?? "Project"
            }`}
          </LineItem>
        ),
        null,
        <LineItem
          key="delete"
          icon={SvgTrash}
          onClick={noProp(() => setIsDeleteModalOpen(true))}
          danger
        >
          Delete
        </LineItem>,
      ];
    }
    return [
      <PopoverSearchInput
        key="search"
        setShowMoveOptions={setShowMoveOptions}
        onSearch={setSearchTerm}
      />,
      ...filteredProjects
        .filter((candidate) => candidate.id !== projectId)
        .map((target) => (
          <LineItem
            key={target.id}
            icon={SvgFolder}
            onClick={noProp(() =>
              handleMoveChatSession({ id: target.id, label: target.name })
            )}
          >
            {target.name}
          </LineItem>
        )),
    ];
  }, [
    showMoveOptions,
    showShareModal,
    projects,
    projectId,
    filteredProjects,
    chatSession,
    setShowMoveOptions,
    setSearchTerm,
    handleMoveChatSession,
    handleRemoveChatSessionFromProject,
  ]);

  return (
    <div>
      <div className="-my-1">
        <Popover open={popoverOpen} onOpenChange={handlePopoverOpenChange}>
          <Popover.Trigger
            asChild
            onClick={(event) => {
              event.preventDefault();
              event.stopPropagation();
              handlePopoverOpenChange(!popoverOpen);
            }}
          >
            <div
              className={cn(
                "p-1 rounded cursor-pointer select-none transition-opacity duration-150",
                isVisible || popoverOpen
                  ? "opacity-100 pointer-events-auto"
                  : "opacity-0 pointer-events-none"
              )}
            >
              <FiMoreHorizontal size={iconSize} />
            </div>
          </Popover.Trigger>
          <Popover.Content
            align="end"
            side="right"
            avoidCollisions
            sideOffset={8}
          >
            <PopoverMenu>{popoverItems}</PopoverMenu>
          </Popover.Content>
        </Popover>
      </div>
      {isDeleteModalOpen && (
        <ConfirmationModalLayout
          title="Delete Chat"
          icon={SvgTrash}
          onClose={() => setIsDeleteModalOpen(false)}
          submit={
            <Button variant="danger" onClick={handleConfirmDelete}>
              Delete
            </Button>
          }
        >
          Are you sure you want to delete this chat? This action cannot be
          undone.
        </ConfirmationModalLayout>
      )}

      {showMoveCustomAgentModal && (
        <MoveCustomAgentChatModal
          onCancel={() => {
            setShowMoveCustomAgentModal(false);
            setPendingMoveProjectId(null);
          }}
          onConfirm={async (doNotShowAgain: boolean) => {
            if (doNotShowAgain && typeof window !== "undefined") {
              window.localStorage.setItem(
                LS_HIDE_MOVE_CUSTOM_AGENT_MODAL_KEY,
                "true"
              );
            }
            const target = pendingMoveProjectId;
            setShowMoveCustomAgentModal(false);
            setPendingMoveProjectId(null);
            if (target != null) {
              await performMove(target);
            }
          }}
        />
      )}
    </div>
  );
}


================================================
FILE: web/src/components/sidebar/types.ts
================================================
export type pageType = "search" | "chat" | "agents" | "admin" | "shared";


================================================
FILE: web/src/components/spinner.css
================================================
.loader {
  border-top-color: #2876aa;
  -webkit-animation: spinner 1.5s linear infinite;
  animation: spinner 1.5s linear infinite;
}

@-webkit-keyframes spinner {
  0% {
    -webkit-transform: rotate(0deg);
  }
  100% {
    -webkit-transform: rotate(360deg);
  }
}

@keyframes spinner {
  0% {
    transform: rotate(0deg);
  }
  100% {
    transform: rotate(360deg);
  }
}


================================================
FILE: web/src/components/standardAnswers/StandardAnswerCategoryDropdown.tsx
================================================
import { FC } from "react";
import { StandardAnswerCategoryResponse } from "./getStandardAnswerCategoriesIfEE";
import { Label } from "@/components/Field";
import MultiSelectDropdown from "../MultiSelectDropdown";
import { StandardAnswerCategory } from "@/lib/types";
import { ErrorCallout } from "../ErrorCallout";
import { LoadingAnimation } from "../Loading";

interface StandardAnswerCategoryDropdownFieldProps {
  standardAnswerCategoryResponse: StandardAnswerCategoryResponse;
  categories: StandardAnswerCategory[];
  setCategories: (categories: StandardAnswerCategory[]) => void;
}

export const StandardAnswerCategoryDropdownField: FC<
  StandardAnswerCategoryDropdownFieldProps
> = ({ standardAnswerCategoryResponse, categories, setCategories }) => {
  if (!standardAnswerCategoryResponse.paidEnterpriseFeaturesEnabled) {
    return null;
  }

  if (standardAnswerCategoryResponse.error != null) {
    return (
      <ErrorCallout
        errorTitle="Something went wrong :("
        errorMsg={`Failed to fetch standard answer categories - ${standardAnswerCategoryResponse.error.message}`}
      />
    );
  }

  if (standardAnswerCategoryResponse.categories == null) {
    return <LoadingAnimation />;
  }

  return (
    <>
      <div>
        <Label>Standard Answer Categories</Label>
        <div className="w-64">
          <MultiSelectDropdown
            name="standard_answer_categories"
            label=""
            onChange={(selectedOptions) => {
              const selectedCategories = selectedOptions.map((option) => {
                return {
                  id: Number(option.value),
                  name: option.label,
                };
              });
              setCategories(selectedCategories);
            }}
            creatable={false}
            options={standardAnswerCategoryResponse.categories.map(
              (category) => ({
                label: category.name,
                value: category.id.toString(),
              })
            )}
            initialSelectedOptions={categories.map((category) => ({
              label: category.name,
              value: category.id.toString(),
            }))}
          />
        </div>
      </div>
    </>
  );
};


================================================
FILE: web/src/components/standardAnswers/getStandardAnswerCategoriesIfEE.tsx
================================================
import { SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED } from "@/lib/constants";
import { StandardAnswerCategory } from "@/lib/types";
import { fetchSS } from "@/lib/utilsSS";

export type StandardAnswerCategoryResponse =
  | EEStandardAnswerCategoryResponse
  | NoEEAvailable;

interface NoEEAvailable {
  paidEnterpriseFeaturesEnabled: false;
}

interface EEStandardAnswerCategoryResponse {
  paidEnterpriseFeaturesEnabled: true;
  error?: {
    message: string;
  };
  categories?: StandardAnswerCategory[];
}

export async function getStandardAnswerCategoriesIfEE(): Promise<StandardAnswerCategoryResponse> {
  if (!SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED) {
    return {
      paidEnterpriseFeaturesEnabled: false,
    };
  }

  const standardAnswerCategoriesResponse = await fetchSS(
    "/manage/admin/standard-answer/category"
  );
  if (!standardAnswerCategoriesResponse.ok) {
    return {
      paidEnterpriseFeaturesEnabled: true,
      error: {
        message: await standardAnswerCategoriesResponse.text(),
      },
    };
  }

  const categories =
    (await standardAnswerCategoriesResponse.json()) as StandardAnswerCategory[];

  return {
    paidEnterpriseFeaturesEnabled: true,
    categories,
  };
}


================================================
FILE: web/src/components/table/DragHandle.tsx
================================================
import React from "react";
import { MdDragIndicator } from "react-icons/md";

interface DragHandleProps extends React.HTMLAttributes<HTMLDivElement> {
  isDragging?: boolean;
  size?: number;
}

export const DragHandle: React.FC<DragHandleProps> = ({
  isDragging,
  size = 16,
  ...props
}) => {
  return (
    <div
      className={`flex items-center justify-center ${
        isDragging ? "cursor-grabbing" : "cursor-grab"
      }`}
      {...props}
    >
      <MdDragIndicator size={size} />
    </div>
  );
};


================================================
FILE: web/src/components/table/DraggableRow.tsx
================================================
import { useSortable } from "@dnd-kit/sortable";
import { TableCell, TableRow } from "@/components/ui/table";
import { CSS } from "@dnd-kit/utilities";
import { DragHandle } from "./DragHandle";
import { Row } from "./interfaces";

export function DraggableRow({
  row,
  isAdmin = true,
  isDragOverlay = false,
}: {
  row: Row;
  isAdmin?: boolean;
  isDragOverlay?: boolean;
}) {
  const {
    attributes,
    listeners,
    transform,
    transition,
    setNodeRef,
    isDragging,
  } = useSortable({
    id: row.id,
    disabled: isDragOverlay,
  });

  const style = {
    transform: CSS.Transform.toString(transform),
    transition,
  };

  return (
    <TableRow
      ref={setNodeRef}
      style={isDragOverlay ? undefined : style}
      className={isDragging && !isDragOverlay ? "opacity-0" : ""}
    >
      <TableCell>
        {isAdmin && <DragHandle isDragging={isDragging} {...listeners} />}
      </TableCell>
      {row.cells.map((cell, index) => (
        <TableCell key={index}>{cell}</TableCell>
      ))}
    </TableRow>
  );
}


================================================
FILE: web/src/components/table/DraggableTable.tsx
================================================
import {
  Table,
  TableHead,
  TableRow,
  TableHeader,
  TableBody,
} from "@/components/ui/table";
import React, { useMemo, useState, JSX } from "react";
import {
  closestCenter,
  DndContext,
  DragEndEvent,
  DragOverlay,
  DragStartEvent,
  KeyboardSensor,
  MouseSensor,
  TouchSensor,
  UniqueIdentifier,
  useSensor,
  useSensors,
} from "@dnd-kit/core";
import { restrictToVerticalAxis } from "@dnd-kit/modifiers";
import {
  arrayMove,
  SortableContext,
  verticalListSortingStrategy,
} from "@dnd-kit/sortable";
import { DraggableRow } from "./DraggableRow";
import { Row } from "./interfaces";

export function DraggableTable({
  headers,
  rows,
  setRows,
  isAdmin,
}: {
  headers: (string | JSX.Element | null)[];
  rows: Row[];
  setRows: (newRows: UniqueIdentifier[]) => void | Promise<void>;
  isAdmin: boolean;
}) {
  const [activeId, setActiveId] = useState<UniqueIdentifier | null>();
  const items = useMemo(() => rows?.map(({ id }) => id), [rows]);
  const sensors = useSensors(
    useSensor(MouseSensor, {
      activationConstraint: {
        distance: 5,
      },
    }),
    useSensor(TouchSensor, {
      activationConstraint: {
        delay: 250,
        tolerance: 5,
      },
    }),
    useSensor(KeyboardSensor, {})
  );

  function handleDragStart(event: DragStartEvent) {
    if (isAdmin) {
      setActiveId(event.active.id);
    }
  }

  function handleDragEnd(event: DragEndEvent) {
    if (isAdmin) {
      const { active, over } = event;
      if (over !== null && active.id !== over.id) {
        const oldIndex = items.indexOf(active.id);
        const newIndex = items.indexOf(over.id);
        setRows(arrayMove(rows, oldIndex, newIndex).map((row) => row.id));
      }
    }
    setActiveId(null);
  }

  function handleDragCancel() {
    setActiveId(null);
  }

  const selectedRow = useMemo(() => {
    if (activeId === null || activeId === undefined) {
      return null;
    }
    const row = rows.find(({ id }) => id === activeId);
    return row;
  }, [activeId, rows]);

  return (
    <DndContext
      sensors={sensors}
      onDragEnd={handleDragEnd}
      onDragStart={handleDragStart}
      onDragCancel={handleDragCancel}
      collisionDetection={closestCenter}
      modifiers={[restrictToVerticalAxis]}
    >
      <Table>
        <TableHeader>
          <TableRow>
            <TableHead></TableHead>
            {headers.map((header, ind) => (
              <TableHead key={ind}>{header}</TableHead>
            ))}
          </TableRow>
        </TableHeader>

        <TableBody>
          <SortableContext items={items} strategy={verticalListSortingStrategy}>
            {rows.map((row) => (
              <DraggableRow key={row.id} row={row} isAdmin={isAdmin} />
            ))}
          </SortableContext>
        </TableBody>
      </Table>

      {isAdmin && (
        <DragOverlay>
          {selectedRow && (
            <Table>
              <TableBody>
                <DraggableRow
                  row={selectedRow}
                  isAdmin={isAdmin}
                  isDragOverlay
                />
              </TableBody>
            </Table>
          )}
        </DragOverlay>
      )}
    </DndContext>
  );
}


================================================
FILE: web/src/components/table/interfaces.ts
================================================
import { JSX } from "react";
import { UniqueIdentifier } from "@dnd-kit/core";

export interface Row {
  id: UniqueIdentifier;
  cells: (JSX.Element | string)[];
  staticModifiers?: [number, string][];
}


================================================
FILE: web/src/components/theme/ThemeProvider.tsx
================================================
"use client";

import * as React from "react";
import { ThemeProvider as NextThemesProvider } from "next-themes";

export function ThemeProvider({
  children,
  ...props
}: React.ComponentProps<typeof NextThemesProvider>) {
  return <NextThemesProvider {...props}>{children}</NextThemesProvider>;
}


================================================
FILE: web/src/components/tools/CSVContent.tsx
================================================
// CsvContent
import React, { useState, useEffect } from "react";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import { ContentComponentProps } from "./ExpandableContentWrapper";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { SvgAlertCircle } from "@opal/icons";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";

const CsvContent: React.FC<ContentComponentProps> = ({
  fileDescriptor,
  expanded = false,
}) => {
  const [data, setData] = useState<Record<string, string>[]>([]);
  const [headers, setHeaders] = useState<string[]>([]);
  const [isFetching, setIsFetching] = useState(true);

  // Cache parsed CSV across mounts so closing other modals doesn't force a refetch.
  // Keyed by file id; safe because chat file ids are unique.
  const cacheKey = fileDescriptor.id;
  const cached = csvCache.get(cacheKey);

  useEffect(() => {
    if (cached) {
      setHeaders(cached.headers);
      setData(cached.data);
      setIsFetching(false);
      return;
    }

    fetchCSV(fileDescriptor.id);
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [fileDescriptor.id]);

  const fetchCSV = async (id: string) => {
    setIsFetching(true);
    try {
      const response = await fetch(`/api/chat/file/${id}`, {
        cache: "force-cache",
      });
      if (!response.ok) {
        throw new Error("Failed to fetch CSV file");
      }

      const contentLength = response.headers.get("Content-Length");
      const fileSizeInMB = contentLength
        ? parseInt(contentLength) / (1024 * 1024)
        : 0;
      const MAX_FILE_SIZE_MB = 5;

      if (fileSizeInMB > MAX_FILE_SIZE_MB) {
        throw new Error("File size exceeds the maximum limit of 5MB");
      }

      const csvData = await response.text();
      const rows = parseCSV(csvData.trim());
      const firstRow = rows[0];
      if (!firstRow) {
        throw new Error("CSV file is empty");
      }
      const parsedHeaders = firstRow;
      setHeaders(parsedHeaders);

      const parsedData: Record<string, string>[] = rows
        .slice(1)
        .map((fields) => {
          return parsedHeaders.reduce<Record<string, string>>(
            (obj, header, index) => {
              const val = fields[index];
              if (val !== undefined) {
                obj[header] = val;
              }
              return obj;
            },
            {}
          );
        });
      setData(parsedData);
      csvCache.set(id, { headers: parsedHeaders, data: parsedData });
    } catch (error) {
      console.error("Error fetching CSV file:", error);
      setData([]);
      setHeaders([]);
    } finally {
      setIsFetching(false);
    }
  };

  if (isFetching) {
    return (
      <div className="flex items-center justify-center h-[300px]">
        <SimpleLoader />
      </div>
    );
  }

  return (
    <div
      className={cn(
        "flex relative overflow-auto",
        expanded ? "max-h-[600px]" : "max-h-[300px]"
      )}
    >
      <Table>
        <TableHeader className="sticky top-0 z-sticky">
          <TableRow className="bg-background-tint-01">
            {headers.map((header, index) => (
              <TableHead key={index}>
                <Text
                  as="p"
                  className="line-clamp-2 font-medium"
                  text03
                  mainUiBody
                >
                  {header}
                </Text>
              </TableHead>
            ))}
          </TableRow>
        </TableHeader>

        <TableBody>
          {data.length > 0 ? (
            data.map((row, rowIndex) => (
              <TableRow key={rowIndex}>
                {headers.map((header, cellIndex) => (
                  <TableCell
                    className={cn(
                      cellIndex === 0 && "sticky left-0 bg-background-tint-01",
                      "py-0 px-4"
                    )}
                    key={cellIndex}
                  >
                    {row[header]}
                  </TableCell>
                ))}
              </TableRow>
            ))
          ) : (
            <TableRow>
              <TableCell colSpan={headers.length} className="text-center py-8">
                <div className="flex flex-col items-center justify-center space-y-2">
                  <SvgAlertCircle className="w-8 h-8 stroke-error" />
                  <Text as="p" text03 mainUiBody>
                    {headers.length === 0
                      ? "Error loading CSV"
                      : "No data available"}
                  </Text>
                  <Text as="p" text04 mainUiBody>
                    {headers.length === 0
                      ? "The CSV file may be too large or couldn't be loaded properly."
                      : ""}
                  </Text>
                </div>
              </TableCell>
            </TableRow>
          )}
        </TableBody>
      </Table>
    </div>
  );
};

export default CsvContent;

const csvCache = new Map<
  string,
  { headers: string[]; data: Record<string, string>[] }
>();

export function parseCSV(text: string): string[][] {
  const rows: string[][] = [];
  let field = "";
  let fields: string[] = [];
  let inQuotes = false;

  for (let i = 0; i < text.length; i++) {
    const char = text[i];

    if (inQuotes) {
      if (char === '"') {
        if (i + 1 < text.length && text[i + 1] === '"') {
          field += '"';
          i++;
        } else {
          inQuotes = false;
        }
      } else {
        field += char;
      }
    } else if (char === '"') {
      inQuotes = true;
    } else if (char === ",") {
      fields.push(field);
      field = "";
    } else if (char === "\n" || char === "\r") {
      if (char === "\r" && i + 1 < text.length && text[i + 1] === "\n") {
        i++;
      }
      fields.push(field);
      field = "";
      rows.push(fields);
      fields = [];
    } else {
      field += char;
    }
  }

  if (inQuotes) {
    throw new Error("Malformed CSV: unterminated quoted field");
  }

  if (field.length > 0 || fields.length > 0) {
    fields.push(field);
    rows.push(fields);
  }

  return rows;
}


================================================
FILE: web/src/components/tools/ExpandableContentWrapper.tsx
================================================
// ExpandableContentWrapper
import React, { useState } from "react";
import { SvgDownloadCloud, SvgFold, SvgMaximize2, SvgX } from "@opal/icons";
import { Card, CardHeader, CardTitle, CardContent } from "@/components/ui/card";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { FileDescriptor } from "@/app/app/interfaces";
import { cn } from "@/lib/utils";
import PreviewModal from "@/sections/modals/PreviewModal";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";

export interface ExpandableContentWrapperProps {
  fileDescriptor: FileDescriptor;
  close: () => void;
  ContentComponent: React.ComponentType<ContentComponentProps>;
}

export interface ContentComponentProps {
  fileDescriptor: FileDescriptor;
  expanded?: boolean;
}

export default function ExpandableContentWrapper({
  fileDescriptor,
  close,
  ContentComponent,
}: ExpandableContentWrapperProps) {
  const [expanded, setExpanded] = useState(false);

  const toggleExpand = () => setExpanded((prev) => !prev);

  const downloadFile = () => {
    const a = document.createElement("a");
    a.href = `api/chat/file/${fileDescriptor.id}`;
    a.download = fileDescriptor.name || "download.csv";
    a.setAttribute("download", fileDescriptor.name || "download.csv");
    document.body.appendChild(a);
    a.click();
    document.body.removeChild(a);
  };

  const Content = (
    <div className="w-message-default max-w-full !rounded-lg overflow-y-hidden h-full">
      <CardHeader className="w-full bg-background-tint-02 top-0 p-3">
        <div className="flex justify-between items-center">
          <Text className="text-ellipsis line-clamp-1" text03 mainUiAction>
            {fileDescriptor.name || "Untitled"}
          </Text>
          <div className="flex flex-row items-center justify-end gap-1">
            <Button
              prominence="tertiary"
              size="sm"
              onClick={downloadFile}
              icon={SvgDownloadCloud}
              tooltip="Download file"
            />
            <Button
              prominence="tertiary"
              size="sm"
              onClick={toggleExpand}
              icon={expanded ? SvgFold : SvgMaximize2}
              tooltip={expanded ? "Minimize" : "Full screen"}
            />
            <Button
              prominence="tertiary"
              size="sm"
              onClick={close}
              icon={SvgX}
              tooltip="Hide"
            />
          </div>
        </div>
      </CardHeader>
      <Card
        className={cn(
          "!rounded-none p-0 relative mx-auto w-full",
          expanded ? "max-h-[600px]" : "max-h-[300px] h-full"
        )}
      >
        <CardContent className="p-0">
          <ContentComponent
            fileDescriptor={fileDescriptor}
            expanded={expanded}
          />
        </CardContent>
      </Card>
    </div>
  );

  const presentingDocument: MinimalOnyxDocument = {
    document_id: fileDescriptor.id,
    semantic_identifier: fileDescriptor.name ?? null,
  };

  return (
    <>
      {expanded && (
        <PreviewModal
          presentingDocument={presentingDocument}
          onClose={() => setExpanded(false)}
        />
      )}
      {!expanded && Content}
    </>
  );
}


================================================
FILE: web/src/components/tools/parseCSV.test.ts
================================================
import { parseCSV } from "./CSVContent";

describe("parseCSV", () => {
  it("parses simple comma-separated rows", () => {
    expect(parseCSV("a,b,c\n1,2,3")).toEqual([
      ["a", "b", "c"],
      ["1", "2", "3"],
    ]);
  });

  it("preserves commas inside quoted fields", () => {
    expect(parseCSV('name,address\nAlice,"123 Main St, Apt 4"')).toEqual([
      ["name", "address"],
      ["Alice", "123 Main St, Apt 4"],
    ]);
  });

  it("handles escaped double quotes inside quoted fields", () => {
    expect(parseCSV('a,b\n"say ""hello""",world')).toEqual([
      ["a", "b"],
      ['say "hello"', "world"],
    ]);
  });

  it("handles newlines inside quoted fields", () => {
    expect(parseCSV('a,b\n"line1\nline2",val')).toEqual([
      ["a", "b"],
      ["line1\nline2", "val"],
    ]);
  });

  it("handles CRLF line endings", () => {
    expect(parseCSV("a,b\r\n1,2\r\n3,4")).toEqual([
      ["a", "b"],
      ["1", "2"],
      ["3", "4"],
    ]);
  });

  it("handles empty fields", () => {
    expect(parseCSV("a,b,c\n1,,3")).toEqual([
      ["a", "b", "c"],
      ["1", "", "3"],
    ]);
  });

  it("handles a single element", () => {
    expect(parseCSV("a")).toEqual([["a"]]);
  });

  it("handles a single row with no newline", () => {
    expect(parseCSV("a,b,c")).toEqual([["a", "b", "c"]]);
  });

  it("handles quoted fields that are entirely empty", () => {
    expect(parseCSV('a,b\n"",val')).toEqual([
      ["a", "b"],
      ["", "val"],
    ]);
  });

  it("handles multiple quoted fields with commas", () => {
    expect(parseCSV('"foo, bar","baz, qux"\n"1, 2","3, 4"')).toEqual([
      ["foo, bar", "baz, qux"],
      ["1, 2", "3, 4"],
    ]);
  });

  it("throws on unterminated quoted field", () => {
    expect(() => parseCSV('a,b\n"foo,bar')).toThrow(
      "Malformed CSV: unterminated quoted field"
    );
  });

  it("throws on unterminated quote at end of input", () => {
    expect(() => parseCSV('"unterminated')).toThrow(
      "Malformed CSV: unterminated quoted field"
    );
  });

  it("returns empty array for empty input", () => {
    expect(parseCSV("")).toEqual([]);
  });
});


================================================
FILE: web/src/components/tooltip/CustomTooltip.tsx
================================================
import React, {
  ReactNode,
  useState,
  useEffect,
  useRef,
  createContext,
  useContext,
  JSX,
} from "react";
import { createPortal } from "react-dom";
import { cn } from "@/lib/utils";

// Create a context for the tooltip group
const TooltipGroupContext = createContext<{
  setGroupHovered: React.Dispatch<React.SetStateAction<boolean>>;
  groupHovered: boolean;
  hoverCountRef: React.MutableRefObject<boolean>;
}>({
  setGroupHovered: () => {},
  groupHovered: false,
  hoverCountRef: { current: false },
});

export const TooltipGroup: React.FC<{
  children: React.ReactNode;
  gap?: string;
}> = ({ children, gap }) => {
  const [groupHovered, setGroupHovered] = useState(false);
  const hoverCountRef = useRef(false);

  return (
    <TooltipGroupContext.Provider
      value={{ groupHovered, setGroupHovered, hoverCountRef }}
    >
      <div className={cn("inline-flex", gap)}>{children}</div>
    </TooltipGroupContext.Provider>
  );
};

export const CustomTooltip = ({
  content,
  children,
  large,
  light,
  citation,
  line,
  medium,
  wrap,
  showTick = false,
  delay = 300,
  position = "bottom",
  disabled = false,
  className,
}: {
  medium?: boolean;
  content: string | ReactNode;
  children: JSX.Element;
  large?: boolean;
  line?: boolean;
  light?: boolean;
  showTick?: boolean;
  delay?: number;
  wrap?: boolean;
  citation?: boolean;
  position?: "top" | "bottom";
  disabled?: boolean;
  className?: string;
}) => {
  const [isVisible, setIsVisible] = useState(false);
  const [tooltipPosition, setTooltipPosition] = useState({ top: 0, left: 0 });
  const timeoutRef = useRef<NodeJS.Timeout | null>(null);
  const triggerRef = useRef<HTMLSpanElement>(null);

  const { groupHovered, setGroupHovered, hoverCountRef } =
    useContext(TooltipGroupContext);

  const showTooltip = () => {
    hoverCountRef.current = true;

    const showDelay = groupHovered ? 0 : delay;
    timeoutRef.current = setTimeout(() => {
      setIsVisible(true);
      setGroupHovered(true);
      updateTooltipPosition();
    }, showDelay);
  };

  const hideTooltip = () => {
    if (timeoutRef.current) {
      clearTimeout(timeoutRef.current);
    }
    hoverCountRef.current = false;
    setIsVisible(false);
    setTimeout(() => {
      if (!hoverCountRef.current) {
        setGroupHovered(false);
      }
    }, 100);
  };

  const updateTooltipPosition = () => {
    if (triggerRef.current) {
      const rect = triggerRef.current.getBoundingClientRect();
      const scrollX = window.scrollX || window.pageXOffset;
      const scrollY = window.scrollY || window.pageYOffset;

      setTooltipPosition({
        top: (position === "top" ? rect.top - 10 : rect.bottom + 10) + scrollY,
        left: rect.left + rect.width / 2 + scrollX,
      });
    }
  };

  useEffect(() => {
    return () => {
      if (timeoutRef.current) {
        clearTimeout(timeoutRef.current);
      }
    };
  }, []);

  return (
    <>
      <span
        ref={triggerRef}
        className={cn("relative inline-block", className)}
        onMouseEnter={showTooltip}
        onMouseLeave={hideTooltip}
        onMouseDown={hideTooltip}
        onClick={hideTooltip}
      >
        {children}
      </span>
      {isVisible &&
        !disabled &&
        createPortal(
          <div
            className={cn(
              "fixed z-[1000] overflow-hidden rounded-md text-neutral-50",
              "transform -translate-x-1/2 text-xs",
              "px-2 py-1.5 shadow-md animate-in fade-in-0 zoom-in-95",
              "data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95",
              "data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2",
              "data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
              citation ? "max-w-[350px]" : "max-w-40",
              large ? (medium ? "w-88" : "w-96") : line && "max-w-64 w-auto",
              light
                ? "bg-neutral-200 dark:bg-neutral-800 text-neutral-900 dark:text-neutral-50"
                : "bg-neutral-900 dark:bg-neutral-200 text-neutral-50 dark:text-neutral-900",
              className
            )}
            style={{
              top: `${tooltipPosition.top}px`,
              left: `${tooltipPosition.left}px`,
            }}
          >
            {showTick && (
              <div
                className={cn(
                  "absolute w-2 h-2 left-1/2 transform -translate-x-1/2 rotate-45",
                  position === "top" ? "bottom-1" : "-top-1",
                  light
                    ? "bg-neutral-200 dark:bg-neutral-800"
                    : "bg-neutral-900 dark:bg-neutral-200"
                )}
              />
            )}
            <div
              className={cn(
                "flex-wrap relative p-0",
                wrap && "w-full",
                !line && "flex"
              )}
              style={
                line || wrap
                  ? {
                      whiteSpace: wrap ? "normal" : "nowrap",
                      overflow: "hidden",
                      textOverflow: "ellipsis",
                    }
                  : {}
              }
            >
              {content}
            </div>
          </div>,
          document.body
        )}
    </>
  );
};


================================================
FILE: web/src/components/ui/RadioGroupItemField.tsx
================================================
import React from "react";
import { RadioGroupItem } from "@/components/ui/radio-group";

interface RadioGroupItemFieldProps {
  value: string;
  id: string;
  label: string;
  sublabel?: string;
}

export const RadioGroupItemField: React.FC<RadioGroupItemFieldProps> = ({
  value,
  id,
  label,
  sublabel,
}) => {
  const handleClick = () => {
    const radio = document.getElementById(id) as HTMLInputElement;
    if (radio) {
      radio.checked = true;
      radio.dispatchEvent(new Event("change", { bubbles: true }));
    }
  };

  return (
    <div className="flex items-start space-x-2">
      <RadioGroupItem value={value} id={id} className="mt-1" />
      <div className="flex flex-col">
        <label
          htmlFor={id}
          className="flex flex-col cursor-pointer"
          onClick={handleClick}
        >
          <span className="text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70">
            {label}
          </span>
          {sublabel && (
            <span className="text-sm text-muted-foreground mt-1">
              {sublabel}
            </span>
          )}
        </label>
      </div>
    </div>
  );
};


================================================
FILE: web/src/components/ui/accordion.tsx
================================================
"use client";

import * as React from "react";
import * as AccordionPrimitive from "@radix-ui/react-accordion";
import { ChevronDown } from "lucide-react";

import { cn } from "@/lib/utils";

const Accordion = AccordionPrimitive.Root;

const AccordionItem = React.forwardRef<
  React.ElementRef<typeof AccordionPrimitive.Item>,
  React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Item>
>(({ className, ...props }, ref) => (
  <AccordionPrimitive.Item
    ref={ref}
    className={cn(
      "border-b border-neutral-200 dark:border-neutral-600",
      className
    )}
    {...props}
  />
));
AccordionItem.displayName = "AccordionItem";

const AccordionTrigger = React.forwardRef<
  React.ElementRef<typeof AccordionPrimitive.Trigger>,
  React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Trigger>
>(({ className, children, ...props }, ref) => (
  <AccordionPrimitive.Header className="flex">
    <AccordionPrimitive.Trigger
      ref={ref}
      className={cn(
        "flex flex-1 text-base items-center text-text justify-between pb-4 font-medium transition-all hover:underline [&[data-state=open]>svg]:rotate-180",
        className
      )}
      {...props}
    >
      {children}
      <ChevronDown className="h-4 w-4 shrink-0 transition-transform duration-200" />
    </AccordionPrimitive.Trigger>
  </AccordionPrimitive.Header>
));
AccordionTrigger.displayName = "AccordionTrigger";

const AccordionContent = React.forwardRef<
  React.ElementRef<typeof AccordionPrimitive.Content>,
  React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Content>
>(({ className, children, ...props }, ref) => (
  <AccordionPrimitive.Content
    ref={ref}
    className={cn(
      "overflow-hidden text-sm transition-all data-[state=closed]:animate-accordion-up data-[state=open]:animate-accordion-down",
      className
    )}
    {...props}
  >
    <div className="pb-1 pt-0">{children}</div>
  </AccordionPrimitive.Content>
));
AccordionContent.displayName = "AccordionContent";

export { Accordion, AccordionItem, AccordionTrigger, AccordionContent };


================================================
FILE: web/src/components/ui/alert.tsx
================================================
import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";

import { cn } from "@/lib/utils";

const alertVariants = cva(
  "relative w-full rounded-lg border border-background-200 p-4 [&>svg~*]:pl-7 [&>svg+div]:translate-y-[-3px] [&>svg]:absolute [&>svg]:left-4 [&>svg]:top-4 [&>svg]:text-neutral-950 dark:border-background-800 dark:[&>svg]:text-neutral-50",
  {
    variants: {
      variant: {
        broken:
          "border-red-500/50 text-red-500 dark:border-red-500 [&>svg]:text-red-500 dark:border-red-900/50 dark:text-red-100 dark:dark:border-red-900 dark:[&>svg]:text-red-700 bg-red-50 dark:bg-red-950",
        ark: "border-amber-500/50 text-amber-500 dark:border-amber-500 [&>svg]:text-amber-500 dark:border-amber-900/50 dark:text-amber-900 dark:dark:border-amber-900 dark:[&>svg]:text-amber-900 bg-amber-50 dark:bg-amber-950",
        info: "border-[#fff]/50 dark:border-[#fff] dark:border-[#fff]/50 dark:dark:border-[#fff]",
        default:
          "bg-neutral-50 text-neutral-darker dark:bg-neutral-950 dark:text-text",
        destructive:
          "border-red-500/50 text-red-500 dark:border-red-500 [&>svg]:text-red-500 dark:border-red-900/50 dark:text-red-600 dark:dark:border-red-900 dark:[&>svg]:text-red-900",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  }
);

const Alert = React.forwardRef<
  HTMLDivElement,
  React.HTMLAttributes<HTMLDivElement> & VariantProps<typeof alertVariants>
>(({ className, variant, ...props }, ref) => (
  <div
    ref={ref}
    role="alert"
    className={cn(alertVariants({ variant }), className)}
    {...props}
  />
));
Alert.displayName = "Alert";

const AlertTitle = React.forwardRef<
  HTMLParagraphElement,
  React.HTMLAttributes<HTMLHeadingElement>
>(({ className, ...props }, ref) => (
  <h5
    ref={ref}
    className={cn("mb-1 font-medium leading-none tracking-tight", className)}
    {...props}
  />
));
AlertTitle.displayName = "AlertTitle";

const AlertDescription = React.forwardRef<
  HTMLParagraphElement,
  React.HTMLAttributes<HTMLParagraphElement>
>(({ className, ...props }, ref) => (
  <div
    ref={ref}
    className={cn("text-sm [&_p]:leading-relaxed", className)}
    {...props}
  />
));
AlertDescription.displayName = "AlertDescription";

export { Alert, AlertTitle, AlertDescription };


================================================
FILE: web/src/components/ui/areaChart.tsx
================================================
"use client";

import React from "react";
import {
  Area,
  AreaChart as ReChartsAreaChart,
  CartesianGrid,
  ResponsiveContainer,
  Tooltip,
  XAxis,
  YAxis,
} from "recharts";

import {
  Card,
  CardContent,
  CardDescription,
  CardHeader,
  CardTitle,
} from "@/components/ui/card";

interface AreaChartProps {
  data?: any[];
  categories?: string[];
  index?: string;
  colors?: string[];
  showXAxis?: boolean;
  showYAxis?: boolean;
  yAxisWidth?: number;
  showAnimation?: boolean;
  showTooltip?: boolean;
  showGridLines?: boolean;
  connectNulls?: boolean;
  allowDecimals?: boolean;
  className?: string;
  title?: string;
  description?: string;
  xAxisFormatter?: (value: any) => string;
  yAxisFormatter?: (value: any) => string;
  stacked?: boolean;
}

export function AreaChartDisplay({
  data = [],
  categories = [],
  index,
  colors = ["indigo", "fuchsia"],
  showXAxis = true,
  showYAxis = true,
  yAxisWidth = 56,
  showAnimation = true,
  showTooltip = true,
  showGridLines = true,
  connectNulls = false,
  allowDecimals = true,
  className,
  title,
  description,
  xAxisFormatter = (dateStr: string) => dateStr,
  yAxisFormatter = (number: number) => number.toString(),
  stacked = false,
}: AreaChartProps) {
  return (
    <Card className={className}>
      <CardHeader>
        {title && <CardTitle>{title}</CardTitle>}
        {description && <CardDescription>{description}</CardDescription>}
      </CardHeader>
      <CardContent>
        <div className="h-[350px] w-full">
          <ResponsiveContainer width="100%" height="100%">
            <ReChartsAreaChart
              data={data}
              margin={{
                top: 10,
                right: 30,
                left: 0,
                bottom: 0,
              }}
            >
              {showGridLines && <CartesianGrid strokeDasharray="3 3" />}
              {showXAxis && (
                <XAxis
                  dataKey={index}
                  tickLine={false}
                  axisLine={false}
                  tickMargin={8}
                  tickFormatter={(value) => xAxisFormatter(value)}
                />
              )}
              {showYAxis && (
                <YAxis
                  width={yAxisWidth}
                  tickLine={false}
                  axisLine={false}
                  tickFormatter={(value) => yAxisFormatter(value)}
                  allowDecimals={allowDecimals}
                />
              )}
              {showTooltip && <Tooltip />}
              {categories.map((category, ind) => (
                <Area
                  key={category}
                  type="monotone"
                  dataKey={category}
                  stackId={stacked ? "1" : category}
                  stroke={colors[ind % colors.length]}
                  fill={colors[ind % colors.length]}
                  fillOpacity={0.3}
                  isAnimationActive={showAnimation}
                  connectNulls={connectNulls}
                />
              ))}
            </ReChartsAreaChart>
          </ResponsiveContainer>
        </div>
      </CardContent>
    </Card>
  );
}


================================================
FILE: web/src/components/ui/badge.tsx
================================================
import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { cn } from "@/lib/utils";

const badgeVariants = cva(
  "inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-normal transition-colors focus:outline-none focus:ring-2 focus:ring-neutral-950 focus:ring-offset-2 dark:focus:ring-neutral-300",
  {
    variants: {
      variant: {
        invalid:
          "border-orange-200 bg-orange-50 text-orange-600 dark:border-orange-700 dark:bg-orange-900 dark:text-orange-50",
        outline:
          "border-neutral-200 bg-neutral-50 text-neutral-600 dark:border-neutral-700 dark:bg-neutral-900 dark:text-neutral-50",
        purple:
          "border-purple-200 bg-purple-50 text-purple-700 dark:border-purple-700 dark:bg-purple-900 dark:text-purple-100",
        public:
          "border-green-200 bg-green-50 text-green-700 dark:border-green-700 dark:bg-green-900 dark:text-green-100",
        private:
          "border-yellow-200 bg-yellow-50 text-yellow-700 dark:border-yellow-600 dark:bg-yellow-700 dark:text-yellow-100",
        "auto-sync":
          "border-blue-200 bg-blue-50 text-blue-700 dark:border-blue-700 dark:bg-blue-900 dark:text-blue-100",
        agent:
          "border-orange-200 bg-orange-50 text-orange-600 dark:border-orange-800 dark:bg-orange-600/20 dark:text-neutral-200",
        "agent-faded":
          "border-neutral-200 bg-neutral-50 text-neutral-600 dark:border-neutral-700 dark:bg-neutral-800 dark:text-neutral-50",
        canceled:
          "border-gray-200 bg-gray-50 text-gray-600 dark:border-gray-700 dark:bg-gray-900 dark:text-neutral-50",
        paused:
          "border-yellow-200 bg-yellow-50 text-yellow-700 dark:border-yellow-600 dark:bg-yellow-700 dark:text-yellow-100",
        in_progress:
          "border-blue-200 bg-blue-50 text-blue-600 dark:border-blue-700 dark:bg-blue-900 dark:text-neutral-50",
        success:
          "border-green-200 bg-emerald-50 text-green-600 dark:border-green-600 dark:bg-green-900 dark:text-green-50",
        default:
          "border-neutral-200 bg-neutral-50 text-neutral-600 dark:border-neutral-700 dark:bg-neutral-900 dark:text-neutral-50",
        secondary:
          "border-neutral-200 bg-neutral-50 text-neutral-600 dark:border-neutral-700 dark:bg-neutral-900 dark:text-neutral-50",
        destructive:
          "border-red-200 bg-red-50 text-red-600 dark:border-red-700 dark:bg-red-900 dark:text-neutral-50",
        not_started:
          "border-purple-200 bg-purple-50 text-purple-700 dark:border-purple-700 dark:bg-purple-900 dark:text-purple-100",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  }
);

export interface BadgeProps
  extends React.HTMLAttributes<HTMLDivElement>,
    VariantProps<typeof badgeVariants> {}

function Badge({
  className,
  variant,
  color,
  icon: Icon,
  size = "sm",
  circle,
  tooltip,
  ...props
}: BadgeProps & {
  icon?: React.ElementType;
  size?: "sm" | "md" | "xs";
  circle?: boolean;
  tooltip?: string;
}) {
  const sizeClasses = {
    sm: "px-2.5 py-0.5 text-xs",
    md: "px-3 py-1 text-sm",
    xs: "px-1.5 py-0.25 text-[.5rem]",
  };

  const BadgeContent = (
    <div
      className={cn(
        "flex-none inline-flex items-center whitespace-nowrap overflow-hidden",
        badgeVariants({ variant }),
        sizeClasses[size],
        className
      )}
      {...props}
    >
      {Icon && (
        <Icon
          className={cn(
            "mr-1 flex-shrink-0",
            size === "sm" ? "h-3 w-3" : size === "xs" ? "h-2 w-2" : "h-4 w-4"
          )}
        />
      )}
      {circle && (
        <div
          className={cn(
            "mr-2 rounded-full bg-current opacity-80 flex-shrink-0",
            size === "xs" ? "h-2 w-2" : "h-2.5 w-2.5"
          )}
        />
      )}
      <span className="truncate">{props.children}</span>
    </div>
  );

  return <SimpleTooltip tooltip={tooltip}>{BadgeContent}</SimpleTooltip>;
}

export { Badge, badgeVariants };


================================================
FILE: web/src/components/ui/callout.tsx
================================================
import { cn } from "@/lib/utils";

interface CalloutProps {
  icon?: React.ReactNode;
  children?: React.ReactNode;
  type?: "default" | "warning" | "danger" | "notice";
  className?: string;
}
export function Callout({
  children,
  icon,
  type = "default",
  title,
  className,
  ...props
}: CalloutProps & { title?: string }) {
  return (
    <div
      className={cn(
        "my-6 flex items-start rounded-md border border-l-4 p-4",
        className,
        {
          "border-rose-300 bg-rose-50 dark:border-rose-500 dark:bg-rose-950/50":
            type === "danger",
          "border-amber-300 bg-amber-50 dark:border-amber-700 dark:bg-amber-900/30":
            type === "warning",
          "border-sky-300 bg-sky-50 dark:border-sky-500 dark:bg-sky-950/50":
            type === "notice",
        }
      )}
      {...props}
    >
      {icon && <span className="mr-4 text-2xl">{icon}</span>}
      <div className="flex-1">
        {title && (
          <div className="font-medium mb-1 flex items-center dark:text-[#fff]">
            {title}
          </div>
        )}
        <div className="dark:text-gray-300">{children}</div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/components/ui/card.tsx
================================================
import * as React from "react";

import { cn } from "@/lib/utils";

const Card = React.forwardRef<
  HTMLDivElement,
  React.HTMLAttributes<HTMLDivElement>
>(({ className, ...props }, ref) => (
  <div
    ref={ref}
    className={cn(
      "rounded-16 border bg-background-tint-00 overflow-hidden",
      className
    )}
    {...props}
  />
));
Card.displayName = "Card";

const CardHeader = React.forwardRef<
  HTMLDivElement,
  React.HTMLAttributes<HTMLDivElement>
>(({ className, ...props }, ref) => (
  <div
    ref={ref}
    className={cn("flex flex-col space-y-1.5 p-6", className)}
    {...props}
  />
));
CardHeader.displayName = "CardHeader";

const CardTitle = React.forwardRef<
  HTMLDivElement,
  React.HTMLAttributes<HTMLDivElement>
>(({ className, ...props }, ref) => (
  <div
    ref={ref}
    className={cn(
      "text-2xl font-semibold leading-none tracking-tight",
      className
    )}
    {...props}
  />
));
CardTitle.displayName = "CardTitle";

const CardDescription = React.forwardRef<
  HTMLDivElement,
  React.HTMLAttributes<HTMLDivElement>
>(({ className, ...props }, ref) => (
  <div
    ref={ref}
    className={cn("text-sm text-neutral-500", className)}
    {...props}
  />
));
CardDescription.displayName = "CardDescription";

const CardContent = React.forwardRef<
  HTMLDivElement,
  React.HTMLAttributes<HTMLDivElement>
>(({ className, ...props }, ref) => (
  <div ref={ref} className={cn("p-6 pt-0", className)} {...props} />
));
CardContent.displayName = "CardContent";

const CardFooter = React.forwardRef<
  HTMLDivElement,
  React.HTMLAttributes<HTMLDivElement>
>(({ className, ...props }, ref) => (
  <div
    ref={ref}
    className={cn("flex items-center p-6 pt-0", className)}
    {...props}
  />
));
CardFooter.displayName = "CardFooter";

export {
  Card,
  CardHeader,
  CardFooter,
  CardTitle,
  CardDescription,
  CardContent,
};


================================================
FILE: web/src/components/ui/dialog.tsx
================================================
"use client";

import * as React from "react";
import * as DialogPrimitive from "@radix-ui/react-dialog";
import { X } from "lucide-react";

import { cn } from "@/lib/utils";

const Dialog = DialogPrimitive.Root;

const DialogTrigger = DialogPrimitive.Trigger;

const DialogPortal = DialogPrimitive.Portal;

const DialogClose = DialogPrimitive.Close;

const DialogOverlay = React.forwardRef<
  React.ElementRef<typeof DialogPrimitive.Overlay>,
  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Overlay> & {
    backgroundColor?: string;
    overlayClassName?: string;
  }
>(({ className, backgroundColor, overlayClassName, ...props }, ref) => (
  <DialogPrimitive.Overlay
    ref={ref}
    className={cn(
      backgroundColor || "bg-neutral-950/60",
      "fixed inset-0 z-50   data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",
      overlayClassName,
      className
    )}
    {...props}
  />
));
DialogOverlay.displayName = DialogPrimitive.Overlay.displayName;

const DialogContent = React.forwardRef<
  React.ElementRef<typeof DialogPrimitive.Content>,
  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Content> & {
    hideCloseIcon?: boolean;
    backgroundColor?: string;
    overlayClassName?: string;
  }
>(
  (
    {
      className,
      children,
      hideCloseIcon,
      backgroundColor,
      overlayClassName,
      ...props
    },
    ref
  ) => (
    <DialogPortal>
      <DialogOverlay
        backgroundColor={backgroundColor}
        overlayClassName={overlayClassName}
      />
      <DialogPrimitive.Content
        ref={ref}
        className={cn(
          "fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border border-neutral-200 bg-neutral-50 p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] sm:rounded-lg dark:border-neutral-800 dark:bg-neutral-900",
          className
        )}
        {...props}
      >
        {children}
        {!hideCloseIcon && (
          <DialogPrimitive.Close className="absolute right-4 top-4 rounded-sm opacity-70 ring-offset-white transition-opacity hover:opacity-100 focus:outline-none focus:ring-2 focus:ring-neutral-950 focus:ring-offset-2 disabled:pointer-events-none data-[state=open]:bg-neutral-100 data-[state=open]:text-neutral-500 dark:ring-offset-neutral-950 dark:focus:ring-neutral-300 dark:data-[state=open]:bg-neutral-800 dark:data-[state=open]:text-neutral-400">
            <X className="h-4 w-4" />
            <span className="sr-only">Close</span>
          </DialogPrimitive.Close>
        )}
      </DialogPrimitive.Content>
    </DialogPortal>
  )
);
DialogContent.displayName = DialogPrimitive.Content.displayName;

const DialogHeader = ({
  className,
  ...props
}: React.HTMLAttributes<HTMLDivElement>) => (
  <div
    className={cn(
      "flex flex-col space-y-1.5 text-center sm:text-left",
      className
    )}
    {...props}
  />
);
DialogHeader.displayName = "DialogHeader";

const DialogFooter = ({
  className,
  ...props
}: React.HTMLAttributes<HTMLDivElement>) => (
  <div
    className={cn(
      "flex flex-col-reverse sm:flex-row sm:justify-end sm:space-x-2",
      className
    )}
    {...props}
  />
);
DialogFooter.displayName = "DialogFooter";

const DialogTitle = React.forwardRef<
  React.ElementRef<typeof DialogPrimitive.Title>,
  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Title>
>(({ className, ...props }, ref) => (
  <DialogPrimitive.Title
    ref={ref}
    className={cn(
      "text-lg font-semibold leading-none tracking-tight",
      className
    )}
    {...props}
  />
));
DialogTitle.displayName = DialogPrimitive.Title.displayName;

const DialogDescription = React.forwardRef<
  React.ElementRef<typeof DialogPrimitive.Description>,
  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Description>
>(({ className, ...props }, ref) => (
  <DialogPrimitive.Description
    ref={ref}
    className={cn("text-sm text-neutral-500 dark:text-neutral-400", className)}
    {...props}
  />
));
DialogDescription.displayName = DialogPrimitive.Description.displayName;

export {
  Dialog,
  DialogPortal,
  DialogOverlay,
  DialogClose,
  DialogTrigger,
  DialogContent,
  DialogHeader,
  DialogFooter,
  DialogTitle,
  DialogDescription,
};


================================================
FILE: web/src/components/ui/dropdown-menu-with-tooltip.tsx
================================================
"use client";

import * as React from "react";
import { DropdownMenuItem } from "./dropdown-menu";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { cn } from "@/lib/utils";

interface DropdownMenuItemWithTooltipProps
  extends React.ComponentPropsWithoutRef<typeof DropdownMenuItem> {
  tooltip?: string;
}

const DropdownMenuItemWithTooltip = React.forwardRef<
  React.ElementRef<typeof DropdownMenuItem>,
  DropdownMenuItemWithTooltipProps
>(({ className, tooltip, disabled, ...props }, ref) => {
  // Only show tooltip if the item is disabled and a tooltip is provided
  if (!tooltip || !disabled) {
    return (
      <DropdownMenuItem
        ref={ref}
        className={className}
        disabled={disabled}
        {...props}
      />
    );
  }

  return (
    <SimpleTooltip tooltip={tooltip}>
      <div className="cursor-not-allowed">
        <DropdownMenuItem
          ref={ref}
          className={cn(className)}
          disabled={disabled}
          {...props}
        />
      </div>
    </SimpleTooltip>
  );
});

DropdownMenuItemWithTooltip.displayName = "DropdownMenuItemWithTooltip";

export { DropdownMenuItemWithTooltip };


================================================
FILE: web/src/components/ui/dropdown-menu.tsx
================================================
"use client";

import * as React from "react";
import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu";
import { Check, ChevronRight, Circle } from "lucide-react";

import { cn } from "@/lib/utils";

const DropdownMenu = DropdownMenuPrimitive.Root;

const DropdownMenuTrigger = DropdownMenuPrimitive.Trigger;

const DropdownMenuGroup = DropdownMenuPrimitive.Group;

const DropdownMenuPortal = DropdownMenuPrimitive.Portal;

const DropdownMenuSub = DropdownMenuPrimitive.Sub;

const DropdownMenuRadioGroup = DropdownMenuPrimitive.RadioGroup;

const DropdownMenuSubTrigger = React.forwardRef<
  React.ElementRef<typeof DropdownMenuPrimitive.SubTrigger>,
  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubTrigger> & {
    inset?: boolean;
  }
>(({ className, inset, children, ...props }, ref) => (
  <DropdownMenuPrimitive.SubTrigger
    ref={ref}
    className={cn(
      "flex cursor-default gap-2 select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none focus:bg-neutral-100 data-[state=open]:bg-neutral-100 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 dark:focus:bg-neutral-800 dark:data-[state=open]:bg-neutral-800",
      inset && "pl-8",
      className
    )}
    {...props}
  >
    {children}
    <ChevronRight className="ml-auto" />
  </DropdownMenuPrimitive.SubTrigger>
));
DropdownMenuSubTrigger.displayName =
  DropdownMenuPrimitive.SubTrigger.displayName;

const DropdownMenuSubContent = React.forwardRef<
  React.ElementRef<typeof DropdownMenuPrimitive.SubContent>,
  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubContent>
>(({ className, ...props }, ref) => (
  <DropdownMenuPrimitive.SubContent
    ref={ref}
    className={cn(
      "z-50 min-w-[8rem] overflow-hidden rounded-md border border-neutral-200 bg-white p-1 text-neutral-950 shadow-lg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-neutral-800 dark:bg-neutral-950 dark:text-neutral-50",
      className
    )}
    {...props}
  />
));
DropdownMenuSubContent.displayName =
  DropdownMenuPrimitive.SubContent.displayName;

const DropdownMenuContent = React.forwardRef<
  React.ElementRef<typeof DropdownMenuPrimitive.Content>,
  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Content>
>(({ className, sideOffset = 4, ...props }, ref) => (
  <DropdownMenuPrimitive.Portal>
    <DropdownMenuPrimitive.Content
      ref={ref}
      sideOffset={sideOffset}
      className={cn(
        "z-50 min-w-[8rem] overflow-hidden rounded-md border border-neutral-200 bg-white p-1 text-neutral-950 shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-neutral-800 dark:bg-neutral-950 dark:text-neutral-50",
        className
      )}
      {...props}
    />
  </DropdownMenuPrimitive.Portal>
));
DropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName;

const DropdownMenuItem = React.forwardRef<
  React.ElementRef<typeof DropdownMenuPrimitive.Item>,
  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Item> & {
    inset?: boolean;
  }
>(({ className, inset, ...props }, ref) => (
  <DropdownMenuPrimitive.Item
    ref={ref}
    className={cn(
      "relative flex cursor-default select-none items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-none transition-colors focus:bg-neutral-100 focus:text-neutral-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 dark:focus:bg-neutral-800 dark:focus:text-neutral-50",
      inset && "pl-8",
      className
    )}
    {...props}
  />
));
DropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName;

const DropdownMenuCheckboxItem = React.forwardRef<
  React.ElementRef<typeof DropdownMenuPrimitive.CheckboxItem>,
  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.CheckboxItem>
>(({ className, children, checked, ...props }, ref) => (
  <DropdownMenuPrimitive.CheckboxItem
    ref={ref}
    className={cn(
      "relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-neutral-100 focus:text-neutral-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-neutral-800 dark:focus:text-neutral-50",
      className
    )}
    checked={checked}
    {...props}
  >
    <span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
      <DropdownMenuPrimitive.ItemIndicator>
        <Check className="h-4 w-4" />
      </DropdownMenuPrimitive.ItemIndicator>
    </span>
    {children}
  </DropdownMenuPrimitive.CheckboxItem>
));
DropdownMenuCheckboxItem.displayName =
  DropdownMenuPrimitive.CheckboxItem.displayName;

const DropdownMenuRadioItem = React.forwardRef<
  React.ElementRef<typeof DropdownMenuPrimitive.RadioItem>,
  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.RadioItem>
>(({ className, children, ...props }, ref) => (
  <DropdownMenuPrimitive.RadioItem
    ref={ref}
    className={cn(
      "relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-neutral-100 focus:text-neutral-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-neutral-800 dark:focus:text-neutral-50",
      className
    )}
    {...props}
  >
    <span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
      <DropdownMenuPrimitive.ItemIndicator>
        <Circle className="h-2 w-2 fill-current" />
      </DropdownMenuPrimitive.ItemIndicator>
    </span>
    {children}
  </DropdownMenuPrimitive.RadioItem>
));
DropdownMenuRadioItem.displayName = DropdownMenuPrimitive.RadioItem.displayName;

const DropdownMenuLabel = React.forwardRef<
  React.ElementRef<typeof DropdownMenuPrimitive.Label>,
  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Label> & {
    inset?: boolean;
  }
>(({ className, inset, ...props }, ref) => (
  <DropdownMenuPrimitive.Label
    ref={ref}
    className={cn(
      "px-2 py-1.5 text-sm font-semibold",
      inset && "pl-8",
      className
    )}
    {...props}
  />
));
DropdownMenuLabel.displayName = DropdownMenuPrimitive.Label.displayName;

const DropdownMenuSeparator = React.forwardRef<
  React.ElementRef<typeof DropdownMenuPrimitive.Separator>,
  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Separator>
>(({ className, ...props }, ref) => (
  <DropdownMenuPrimitive.Separator
    ref={ref}
    className={cn(
      "-mx-1 my-1 h-px bg-neutral-100 dark:bg-neutral-800",
      className
    )}
    {...props}
  />
));
DropdownMenuSeparator.displayName = DropdownMenuPrimitive.Separator.displayName;

const DropdownMenuShortcut = ({
  className,
  ...props
}: React.HTMLAttributes<HTMLSpanElement>) => {
  return (
    <span
      className={cn("ml-auto text-xs tracking-widest opacity-60", className)}
      {...props}
    />
  );
};
DropdownMenuShortcut.displayName = "DropdownMenuShortcut";

export {
  DropdownMenu,
  DropdownMenuTrigger,
  DropdownMenuContent,
  DropdownMenuItem,
  DropdownMenuCheckboxItem,
  DropdownMenuRadioItem,
  DropdownMenuLabel,
  DropdownMenuSeparator,
  DropdownMenuShortcut,
  DropdownMenuGroup,
  DropdownMenuPortal,
  DropdownMenuSub,
  DropdownMenuSubContent,
  DropdownMenuSubTrigger,
  DropdownMenuRadioGroup,
};


================================================
FILE: web/src/components/ui/input.tsx
================================================
import * as React from "react";

import { cn } from "@/lib/utils";

interface InputProps extends React.ComponentProps<"input"> {
  removeFocusRing?: boolean;
}

const Input = React.forwardRef<HTMLInputElement, InputProps>(
  ({ className, type, removeFocusRing, ...props }, ref) => {
    return (
      <input
        type={type}
        className={cn(
          "flex h-10 w-full rounded-md border border-neutral-200 bg-white px-3 py-2 text-base ring-offset-white file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-neutral-950 placeholder:text-neutral-500",
          removeFocusRing
            ? ""
            : "focus-visible:outline-none focus-visible:ring-2  focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm dark:border-neutral-800 dark:bg-neutral-950 dark:ring-offset-neutral-950 dark:file:text-neutral-50 dark:placeholder:text-neutral-400 dark:focus-visible:ring-neutral-300",
          "!focus:ring-0 !focus-visible:ring-transparent  !focus-visible:ring-0 !focus:outline-none",
          "flex h-10 w-full rounded-md border border-border bg-background/75 focus:border-border-dark focus:ring-none focus:outline-none px-3 py-2 text-base ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
          className
        )}
        ref={ref}
        {...props}
      />
    );
  }
);
Input.displayName = "Input";

export { Input };


================================================
FILE: web/src/components/ui/radio-group.tsx
================================================
"use client";

import * as React from "react";
import * as RadioGroupPrimitive from "@radix-ui/react-radio-group";
import { Circle } from "lucide-react";

import { cn } from "@/lib/utils";

const RadioGroup = React.forwardRef<
  React.ElementRef<typeof RadioGroupPrimitive.Root>,
  React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Root>
>(({ className, ...props }, ref) => {
  return (
    <RadioGroupPrimitive.Root
      className={cn("grid gap-2", className)}
      {...props}
      ref={ref}
    />
  );
});
RadioGroup.displayName = RadioGroupPrimitive.Root.displayName;

const RadioGroupItem = React.forwardRef<
  React.ElementRef<typeof RadioGroupPrimitive.Item>,
  React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Item>
>(({ className, ...props }, ref) => {
  return (
    <RadioGroupPrimitive.Item
      ref={ref}
      className={cn(
        "aspect-square h-3.5 w-3.5 rounded-full border border-background-900 text-neutral-900 ring-offset-white focus:outline-none focus-visible:ring-2 focus-visible:ring-neutral-950 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 dark:border-background-800 dark:text-neutral-50 dark:ring-offset-neutral-950 dark:focus-visible:ring-neutral-300",
        className
      )}
      {...props}
    >
      <RadioGroupPrimitive.Indicator className="flex items-center justify-center">
        <Circle className="h-2.5 w-2.5 fill-current text-current" />
      </RadioGroupPrimitive.Indicator>
    </RadioGroupPrimitive.Item>
  );
});
RadioGroupItem.displayName = RadioGroupPrimitive.Item.displayName;

export { RadioGroup, RadioGroupItem };


================================================
FILE: web/src/components/ui/scroll-area.tsx
================================================
"use client";

import * as React from "react";
import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area";

import { cn } from "@/lib/utils";

const ScrollArea = React.forwardRef<
  React.ElementRef<typeof ScrollAreaPrimitive.Root>,
  React.ComponentPropsWithoutRef<typeof ScrollAreaPrimitive.Root>
>(({ className, children, ...props }, ref) => (
  <ScrollAreaPrimitive.Root
    ref={ref}
    className={cn("relative overflow-hidden", className)}
    {...props}
  >
    <ScrollAreaPrimitive.Viewport className="h-full w-full rounded-[inherit]">
      {children}
    </ScrollAreaPrimitive.Viewport>
    <ScrollBar />
    <ScrollAreaPrimitive.Corner />
  </ScrollAreaPrimitive.Root>
));
ScrollArea.displayName = ScrollAreaPrimitive.Root.displayName;

const ScrollBar = React.forwardRef<
  React.ElementRef<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>,
  React.ComponentPropsWithoutRef<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>
>(({ className, orientation = "vertical", ...props }, ref) => (
  <ScrollAreaPrimitive.ScrollAreaScrollbar
    ref={ref}
    orientation={orientation}
    className={cn(
      "flex touch-none select-none transition-colors",
      orientation === "vertical" &&
        "h-full w-2.5 border-l border-l-transparent p-[1px]",
      orientation === "horizontal" &&
        "h-2.5 flex-col border-t border-t-transparent p-[1px]",
      className
    )}
    {...props}
  >
    <ScrollAreaPrimitive.ScrollAreaThumb className="relative flex-1 rounded-full bg-neutral-200 dark:bg-neutral-800" />
  </ScrollAreaPrimitive.ScrollAreaScrollbar>
));
ScrollBar.displayName = ScrollAreaPrimitive.ScrollAreaScrollbar.displayName;

export { ScrollArea, ScrollBar };


================================================
FILE: web/src/components/ui/select.tsx
================================================
"use client";

import * as React from "react";
import * as SelectPrimitive from "@radix-ui/react-select";
import { Check, ChevronDown, ChevronUp } from "lucide-react";

import { cn } from "@/lib/utils";

const Select = SelectPrimitive.Root;

const SelectValue = SelectPrimitive.Value;

const SelectTrigger = React.forwardRef<
  React.ElementRef<typeof SelectPrimitive.Trigger>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Trigger>
>(({ className, children, ...props }, ref) => (
  <SelectPrimitive.Trigger
    ref={ref}
    className={cn(
      `border flex h-10 w-full items-center justify-between rounded-md border bg-background-tint-01 p-3 ring-offset-background-neutral-00 placeholder:text-text-03 disabled:cursor-not-allowed disabled:opacity-50 [&>span]:line-clamp-1`,
      className
    )}
    {...props}
  >
    {children}
    <SelectPrimitive.Icon asChild>
      <ChevronDown className="h-4 w-4 opacity-50" />
    </SelectPrimitive.Icon>
  </SelectPrimitive.Trigger>
));
SelectTrigger.displayName = SelectPrimitive.Trigger.displayName;

const SelectScrollUpButton = React.forwardRef<
  React.ElementRef<typeof SelectPrimitive.ScrollUpButton>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollUpButton>
>(({ className, ...props }, ref) => (
  <SelectPrimitive.ScrollUpButton
    ref={ref}
    className={cn(
      "flex cursor-default items-center justify-center py-1",
      className
    )}
    {...props}
  >
    <ChevronUp className="h-4 w-4" />
  </SelectPrimitive.ScrollUpButton>
));
SelectScrollUpButton.displayName = SelectPrimitive.ScrollUpButton.displayName;

const SelectScrollDownButton = React.forwardRef<
  React.ElementRef<typeof SelectPrimitive.ScrollDownButton>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollDownButton>
>(({ className, ...props }, ref) => (
  <SelectPrimitive.ScrollDownButton
    ref={ref}
    className={cn(
      "flex cursor-default items-center justify-center py-1",
      className
    )}
    {...props}
  >
    <ChevronDown className="h-4 w-4" />
  </SelectPrimitive.ScrollDownButton>
));
SelectScrollDownButton.displayName =
  SelectPrimitive.ScrollDownButton.displayName;

const SelectContent = React.forwardRef<
  React.ElementRef<typeof SelectPrimitive.Content>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Content> & {
    container?: HTMLElement | null;
  }
>(({ className, children, position = "popper", container, ...props }, ref) => (
  <SelectPrimitive.Portal container={container}>
    <SelectPrimitive.Content
      ref={ref}
      className={cn(
        "relative z-[2000] max-h-96 min-w-[8rem] overflow-hidden rounded-08 border bg-background-tint-01 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
        position === "popper" &&
          "data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1",
        className
      )}
      position={position}
      {...props}
    >
      <SelectScrollUpButton />
      <SelectPrimitive.Viewport
        className={cn(
          "p-1",
          position === "popper" &&
            "h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)]"
        )}
      >
        {children}
      </SelectPrimitive.Viewport>
      <SelectScrollDownButton />
    </SelectPrimitive.Content>
  </SelectPrimitive.Portal>
));
SelectContent.displayName = SelectPrimitive.Content.displayName;

const SelectItem = React.forwardRef<
  React.ComponentRef<typeof SelectPrimitive.Item>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Item> & {
    hideCheck?: boolean;
    icon?:
      | React.ReactNode
      | ((props: {
          size?: number | undefined;
          className?: string | undefined;
        }) => React.JSX.Element);
    selected?: boolean;
  }
>(({ className, children, hideCheck, icon, selected, ...props }, ref) => (
  <SelectPrimitive.Item
    ref={ref}
    className={cn(
      "relative flex w-full cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 outline-none focus:bg-background-tint-02 data-[disabled]:pointer-events-none data-[disabled]:opacity-50",
      className
    )}
    {...props}
  >
    {icon ? (
      <span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
        {typeof icon === "function" ? icon({ size: 16, className: "" }) : icon}
      </span>
    ) : (
      !hideCheck &&
      selected && (
        <span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
          <SelectPrimitive.ItemIndicator>
            <Check className="h-4 w-4" />
          </SelectPrimitive.ItemIndicator>
        </span>
      )
    )}

    <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
  </SelectPrimitive.Item>
));
SelectItem.displayName = SelectPrimitive.Item.displayName;

export {
  Select,
  SelectValue,
  SelectTrigger,
  SelectContent,
  SelectItem,
  SelectScrollUpButton,
  SelectScrollDownButton,
};


================================================
FILE: web/src/components/ui/slider.tsx
================================================
"use client";

import * as React from "react";
import * as SliderPrimitive from "@radix-ui/react-slider";

import { cn } from "@/lib/utils";

const Slider = React.forwardRef<
  React.ElementRef<typeof SliderPrimitive.Root>,
  React.ComponentPropsWithoutRef<typeof SliderPrimitive.Root>
>(({ className, ...props }, ref) => (
  <SliderPrimitive.Root
    ref={ref}
    className={cn(
      "relative flex w-full touch-none select-none items-center",
      className
    )}
    {...props}
  >
    <SliderPrimitive.Track className="relative h-2 w-full grow overflow-hidden rounded-full bg-neutral-100 dark:bg-neutral-800">
      <SliderPrimitive.Range className="absolute h-full bg-neutral-900 dark:bg-neutral-50" />
    </SliderPrimitive.Track>
    <SliderPrimitive.Thumb className="block h-3 w-3 rounded-full border border-background-900 bg-white ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-neutral-950 focus-visible:ring-offset disabled:pointer-events-none disabled:opacity-50 dark:border-background-50 dark:bg-neutral-950 dark:ring-offset-neutral-950 dark:focus-visible:ring-neutral-300" />
  </SliderPrimitive.Root>
));
Slider.displayName = SliderPrimitive.Root.displayName;

export { Slider };


================================================
FILE: web/src/components/ui/table.tsx
================================================
import * as React from "react";

import { cn } from "@/lib/utils";

const Table = React.forwardRef<
  HTMLTableElement,
  React.HTMLAttributes<HTMLTableElement>
>(({ className, ...props }, ref) => (
  <div className="relative w-full overflow-auto">
    <table
      ref={ref}
      className={cn("w-full caption-bottom text-sm", className)}
      {...props}
    />
  </div>
));
Table.displayName = "Table";

const TableHeader = React.forwardRef<
  HTMLTableSectionElement,
  React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
  <thead ref={ref} className={cn("[&_tr]:border-b", className)} {...props} />
));
TableHeader.displayName = "TableHeader";

const TableBody = React.forwardRef<
  HTMLTableSectionElement,
  React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
  <tbody
    ref={ref}
    className={cn("[&_tr:last-child]:border-0", className)}
    {...props}
  />
));
TableBody.displayName = "TableBody";

const TableFooter = React.forwardRef<
  HTMLTableSectionElement,
  React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
  <tfoot
    ref={ref}
    className={cn(
      "border-t bg-neutral-100/50 font-medium [&>tr]:last:border-b-0 dark:bg-neutral-800/50",
      className
    )}
    {...props}
  />
));
TableFooter.displayName = "TableFooter";

const TableRow = React.forwardRef<
  HTMLTableRowElement,
  React.HTMLAttributes<HTMLTableRowElement> & { noHover?: boolean }
>(({ className, noHover, ...props }, ref) => (
  <tr
    ref={ref}
    className={cn(
      `border-b transition-colors  data-[state=selected]:bg-neutral-100 dark:data-[state=selected]:bg-neutral-800 ${
        noHover ? "" : "dark:hover:bg-neutral-800/80 hover:bg-neutral-100/50"
      }`,
      className
    )}
    {...props}
  />
));
TableRow.displayName = "TableRow";

const TableHead = React.forwardRef<
  HTMLTableCellElement,
  React.ThHTMLAttributes<HTMLTableCellElement>
>(({ className, ...props }, ref) => (
  <th
    ref={ref}
    className={cn(
      "h-12 px-4 text-left align-middle font-medium text-neutral-500 [&:has([role=checkbox])]:pr-0 dark:text-neutral-400",
      className
    )}
    {...props}
  />
));
TableHead.displayName = "TableHead";

const TableCell = React.forwardRef<
  HTMLTableCellElement,
  React.TdHTMLAttributes<HTMLTableCellElement>
>(({ className, ...props }, ref) => (
  <td
    ref={ref}
    className={cn("p-4 align-middle [&:has([role=checkbox])]:pr-0", className)}
    {...props}
  />
));
TableCell.displayName = "TableCell";

const TableCaption = React.forwardRef<
  HTMLTableCaptionElement,
  React.HTMLAttributes<HTMLTableCaptionElement>
>(({ className, ...props }, ref) => (
  <caption
    ref={ref}
    className={cn(
      "mt-4 text-sm text-neutral-500 dark:text-neutral-400",
      className
    )}
    {...props}
  />
));
TableCaption.displayName = "TableCaption";

export {
  Table,
  TableHeader,
  TableBody,
  TableFooter,
  TableHead,
  TableRow,
  TableCell,
  TableCaption,
};


================================================
FILE: web/src/components/ui/title.tsx
================================================
import { cn } from "@/lib/utils";

export default function Title({
  children,
  className,
  size = "sm",
}: {
  children: React.ReactNode;
  className?: string;
  size?: "lg" | "md" | "sm";
}) {
  return (
    <h1
      className={cn(
        "text-lg text-neutral-800 dark:text-neutral-200 font-medium",
        size === "lg" && "text-2xl",
        size === "md" && "text-xl",
        size === "sm" && "text-lg",
        className
      )}
    >
      {children}
    </h1>
  );
}


================================================
FILE: web/src/components/ui/tooltip.tsx
================================================
"use client";

import * as React from "react";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { cn } from "@/lib/utils";

// Default the provider delay to a snappier, consistent value
const TooltipProvider: React.FC<
  React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Provider>
> = ({ delayDuration = 400, skipDelayDuration = 200, ...props }) => (
  <TooltipPrimitive.Provider
    delayDuration={delayDuration}
    skipDelayDuration={skipDelayDuration}
    {...props}
  />
);

const Tooltip = TooltipPrimitive.Root;

const TooltipTrigger = React.forwardRef<
  React.ElementRef<typeof TooltipPrimitive.Trigger>,
  React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Trigger>
>(({ type = "button", ...props }, ref) => (
  <TooltipPrimitive.Trigger ref={ref} type={type} {...props} />
));
TooltipTrigger.displayName = TooltipPrimitive.Trigger.displayName;

type TooltipSize = "sm" | "md" | "lg";

const tooltipSizeClasses: Record<TooltipSize, string> = {
  sm: "px-2 py-1 max-w-[12rem]",
  md: "px-3 py-2 max-w-[20rem]",
  lg: "px-3 py-2 max-w-[30rem]",
};

const TooltipContent = React.forwardRef<
  React.ElementRef<typeof TooltipPrimitive.Content>,
  React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Content> & {
    width?: string;
    showTick?: boolean;
    tickSide?: "top" | "bottom" | "left" | "right";
    side?: "top" | "bottom" | "left" | "right";
    size?: TooltipSize;
  }
>(
  (
    {
      className,
      sideOffset = 4,
      width,
      showTick = false,
      tickSide = "bottom",
      side = "top",
      size = "lg",
      ...props
    },
    ref
  ) => (
    <TooltipPrimitive.Portal>
      <TooltipPrimitive.Content
        ref={ref}
        sideOffset={sideOffset}
        side={side}
        className={cn(
          "z-tooltip rounded-08 text-text-light-05 animate-in fade-in-0 zoom-in-95 bg-background-neutral-dark-03 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
          tooltipSizeClasses[size],
          width,
          className
        )}
        {...props}
      >
        {showTick && (
          <div
            className={cn(
              "absolute w-2 h-2 bg-inherit rotate-45",
              tickSide === "top" && "-top-1 left-1/2 -translate-x-1/2",
              tickSide === "bottom" && "-bottom-1 left-1/2 -translate-x-1/2",
              tickSide === "left" && "-left-1 top-1/2 -translate-y-1/2",
              tickSide === "right" && "-right-1 top-1/2 -translate-y-1/2"
            )}
          />
        )}
        {props.children}
      </TooltipPrimitive.Content>
    </TooltipPrimitive.Portal>
  )
);
TooltipContent.displayName = TooltipPrimitive.Content.displayName;

export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider };


================================================
FILE: web/src/components/voice/Waveform.tsx
================================================
"use client";

import { useEffect, useState, useMemo, useRef } from "react";
import { cn } from "@/lib/utils";
import { formatElapsedTime } from "@/lib/dateUtils";
import { Button } from "@opal/components";
import {
  SvgMicrophone,
  SvgMicrophoneOff,
  SvgVolume,
  SvgVolumeOff,
} from "@opal/icons";

// Recording waveform constants
const RECORDING_BAR_COUNT = 120;
const MIN_BAR_HEIGHT = 2;
const MAX_BAR_HEIGHT = 16;

// Speaking waveform constants
const SPEAKING_BAR_COUNT = 28;

interface WaveformProps {
  /** Visual style and behavior variant */
  variant: "speaking" | "recording";
  /** Whether the waveform is actively animating */
  isActive: boolean;
  /** Whether audio is muted */
  isMuted?: boolean;
  /** Current microphone audio level (0-1), only used for recording variant */
  audioLevel?: number;
  /** Callback when mute button is clicked */
  onMuteToggle?: () => void;
}

function Waveform({
  variant,
  isActive,
  isMuted = false,
  audioLevel = 0,
  onMuteToggle,
}: WaveformProps) {
  // ─── Recording variant state ───────────────────────────────────────────────
  const [elapsedSeconds, setElapsedSeconds] = useState(0);
  const [barHeights, setBarHeights] = useState<number[]>(
    () => new Array(RECORDING_BAR_COUNT).fill(MIN_BAR_HEIGHT) as number[]
  );
  const animationRef = useRef<number | null>(null);
  const lastPushTimeRef = useRef(0);
  const audioLevelRef = useRef(audioLevel);
  audioLevelRef.current = audioLevel;

  // ─── Speaking variant bars ─────────────────────────────────────────────────
  const speakingBars = useMemo(() => {
    return Array.from({ length: SPEAKING_BAR_COUNT }, (_, i) => ({
      id: i,
      // Create a natural wave pattern with height variation
      baseHeight: Math.sin(i * 0.4) * 5 + 8,
      delay: i * 0.025,
    }));
  }, []);

  // ─── Recording: Timer effect ───────────────────────────────────────────────
  useEffect(() => {
    if (variant !== "recording") return;

    if (!isActive) {
      setElapsedSeconds(0);
      return;
    }

    const interval = setInterval(() => {
      setElapsedSeconds((prev) => prev + 1);
    }, 1000);

    return () => clearInterval(interval);
  }, [variant, isActive]);

  // ─── Recording: Audio level visualization effect ───────────────────────────
  useEffect(() => {
    if (variant !== "recording") return;

    if (!isActive) {
      setBarHeights(
        new Array(RECORDING_BAR_COUNT).fill(MIN_BAR_HEIGHT) as number[]
      );
      lastPushTimeRef.current = 0;
      return;
    }

    const updateBars = (timestamp: number) => {
      // Push a new bar roughly every 50ms (~20fps scrolling)
      if (timestamp - lastPushTimeRef.current >= 50) {
        lastPushTimeRef.current = timestamp;
        const level = isMuted ? 0 : audioLevelRef.current;
        const height =
          MIN_BAR_HEIGHT + level * (MAX_BAR_HEIGHT - MIN_BAR_HEIGHT);

        setBarHeights((prev) => {
          const next = prev.slice(1);
          next.push(height);
          return next;
        });
      }

      animationRef.current = requestAnimationFrame(updateBars);
    };

    animationRef.current = requestAnimationFrame(updateBars);

    return () => {
      if (animationRef.current) {
        cancelAnimationFrame(animationRef.current);
        animationRef.current = null;
      }
    };
  }, [variant, isActive, isMuted]);

  const formattedTime = useMemo(
    () => formatElapsedTime(elapsedSeconds),
    [elapsedSeconds]
  );

  if (!isActive) {
    return null;
  }

  // ─── Speaking variant render ───────────────────────────────────────────────
  if (variant === "speaking") {
    return (
      <div className="flex items-center gap-0.5 p-1.5 bg-background-tint-00 rounded-16 shadow-01">
        {/* Waveform container */}
        <div className="flex items-center p-1 bg-background-tint-00 rounded-12 max-w-[144px] min-h-[32px]">
          <div className="flex items-center p-1">
            {/* Waveform bars */}
            <div className="flex items-center justify-center gap-[2px] h-4 w-[120px] overflow-hidden">
              {speakingBars.map((bar) => (
                <div
                  key={bar.id}
                  className={cn(
                    "w-[3px] rounded-full",
                    isMuted ? "bg-text-03" : "bg-theme-blue-05",
                    !isMuted && "animate-waveform"
                  )}
                  style={{
                    height: isMuted ? "2px" : `${bar.baseHeight}px`,
                    animationDelay: isMuted ? undefined : `${bar.delay}s`,
                  }}
                />
              ))}
            </div>
          </div>
        </div>

        {/* Divider */}
        <div className="w-0.5 self-stretch bg-border-02" />

        {/* Volume button */}
        {onMuteToggle && (
          <div className="flex items-center p-1 bg-background-tint-00 rounded-12">
            <Button
              icon={isMuted ? SvgVolumeOff : SvgVolume}
              onClick={onMuteToggle}
              prominence="tertiary"
              size="sm"
              tooltip={isMuted ? "Unmute" : "Mute"}
            />
          </div>
        )}
      </div>
    );
  }

  // ─── Recording variant render ──────────────────────────────────────────────
  return (
    <div className="flex items-center gap-3 px-3 py-2 bg-background-tint-00 rounded-12 min-h-[32px]">
      {/* Waveform visualization driven by real audio levels */}
      <div className="flex-1 flex items-center justify-between h-4 overflow-hidden">
        {barHeights.map((height, i) => (
          <div
            key={i}
            className="w-[1.5px] bg-text-03 rounded-full shrink-0 transition-[height] duration-75"
            style={{ height: `${height}px` }}
          />
        ))}
      </div>

      {/* Timer */}
      <span className="font-mono text-xs text-text-03 tabular-nums shrink-0">
        {formattedTime}
      </span>

      {/* Mute button */}
      {onMuteToggle && (
        <Button
          icon={isMuted ? SvgMicrophoneOff : SvgMicrophone}
          onClick={onMuteToggle}
          prominence="tertiary"
          size="sm"
          aria-label={isMuted ? "Unmute microphone" : "Mute microphone"}
        />
      )}
    </div>
  );
}

export default Waveform;


================================================
FILE: web/src/ee/LICENSE
================================================
The Onyx Enterprise License (the "Enterprise License")
Copyright (c) 2023-present DanswerAI, Inc.

With regard to the Onyx Software:

This software and associated documentation files (the "Software") may only be
used in production, if you (and any entity that you represent) have agreed to,
and are in compliance with, the Onyx Subscription Terms of Service, available
at https://www.onyx.app/legal/self-host (the "Enterprise Terms"), or other
agreement governing the use of the Software, as agreed by you and DanswerAI,
and otherwise have a valid Onyx Enterprise License for the
correct number of user seats. Subject to the foregoing sentence, you are free to
modify this Software and publish patches to the Software. You agree that DanswerAI
and/or its licensors (as applicable) retain all right, title and interest in and
to all such modifications and/or patches, and all such modifications and/or
patches may only be used, copied, modified, displayed, distributed, or otherwise
exploited with a valid Onyx Enterprise License for the correct
number of user seats. Notwithstanding the foregoing, you may copy and modify
the Software for development and testing purposes, without requiring a
subscription. You agree that DanswerAI and/or its licensors (as applicable) retain
all right, title and interest in and to all such modifications. You are not
granted any other rights beyond what is expressly stated herein. Subject to the
foregoing, it is forbidden to copy, merge, publish, distribute, sublicense,
and/or sell the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

For all third party components incorporated into the Onyx Software, those
components are licensed under the original license provided by the owner of the
applicable component.


================================================
FILE: web/src/ee/hooks/useHookExecutionLogs.ts
================================================
import useSWR from "swr";
import { fetchExecutionLogs } from "@/ee/refresh-pages/admin/HooksPage/svc";
import type { HookExecutionRecord } from "@/ee/refresh-pages/admin/HooksPage/interfaces";

const ONE_HOUR_MS = 60 * 60 * 1000;
const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;

interface UseHookExecutionLogsResult {
  isLoading: boolean;
  error: Error | undefined;
  hasRecentErrors: boolean;
  recentErrors: HookExecutionRecord[];
  olderErrors: HookExecutionRecord[];
}

export function useHookExecutionLogs(
  hookId: number,
  limit = 10
): UseHookExecutionLogsResult {
  const { data, isLoading, error } = useSWR(
    ["hook-execution-logs", hookId, limit],
    () => fetchExecutionLogs(hookId, limit),
    { refreshInterval: 60_000 }
  );

  const now = Date.now();

  const recentErrors =
    data?.filter(
      (log) => now - new Date(log.created_at).getTime() < ONE_HOUR_MS
    ) ?? [];

  const olderErrors =
    data?.filter((log) => {
      const age = now - new Date(log.created_at).getTime();
      return age >= ONE_HOUR_MS && age < THIRTY_DAYS_MS;
    }) ?? [];

  const hasRecentErrors = recentErrors.length > 0;

  return { isLoading, error, hasRecentErrors, recentErrors, olderErrors };
}


================================================
FILE: web/src/ee/hooks/useHookSpecs.ts
================================================
"use client";

import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { HookPointMeta } from "@/ee/refresh-pages/admin/HooksPage/interfaces";
import { SWR_KEYS } from "@/lib/swr-keys";

export function useHookSpecs() {
  const { data, isLoading, error } = useSWR<HookPointMeta[]>(
    SWR_KEYS.hookSpecs,
    errorHandlingFetcher,
    { revalidateOnFocus: false }
  );

  return { specs: data, isLoading, error };
}


================================================
FILE: web/src/ee/hooks/useHooks.ts
================================================
"use client";

import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { HookResponse } from "@/ee/refresh-pages/admin/HooksPage/interfaces";
import { SWR_KEYS } from "@/lib/swr-keys";

export function useHooks() {
  const { data, isLoading, error, mutate } = useSWR<HookResponse[]>(
    SWR_KEYS.hooks,
    errorHandlingFetcher,
    { revalidateOnFocus: false }
  );

  return { hooks: data, isLoading, error, mutate };
}


================================================
FILE: web/src/ee/lib/search/svc.ts
================================================
/**
 * Search API Helper Functions
 */

import type {
  BaseFilters,
  SearchFlowClassificationRequest,
  SearchFlowClassificationResponse,
  SearchFullResponse,
  SearchHistoryResponse,
  SendSearchQueryRequest,
} from "@/lib/search/interfaces";

/**
 * Classify a query as search or chat flow
 */
export async function classifyQuery(
  query: string,
  signal?: AbortSignal
): Promise<SearchFlowClassificationResponse> {
  const response = await fetch("/api/search/search-flow-classification", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      user_query: query,
    } as SearchFlowClassificationRequest),
    signal,
  });

  if (!response.ok) {
    throw new Error(`Classification failed: ${response.statusText}`);
  }

  return response.json();
}

/**
 * Perform a document search
 */
export async function searchDocuments(
  query: string,
  options?: {
    filters?: BaseFilters;
    numHits?: number;
    includeContent?: boolean;
    signal?: AbortSignal;
  }
): Promise<SearchFullResponse> {
  const request: SendSearchQueryRequest = {
    search_query: query,
    filters: options?.filters,
    num_hits: options?.numHits ?? 30,
    include_content: options?.includeContent ?? false,
    stream: false,
  };

  const response = await fetch("/api/search/send-search-message", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(request),
    signal: options?.signal,
  });

  if (!response.ok) {
    throw new Error(`Search failed: ${response.statusText}`);
  }

  return response.json();
}

/**
 * Fetch search history for the current user
 */
export async function fetchSearchHistory(options?: {
  limit?: number;
  filterDays?: number;
  signal?: AbortSignal;
}): Promise<SearchHistoryResponse> {
  const params = new URLSearchParams();
  if (options?.limit) params.set("limit", options.limit.toString());
  if (options?.filterDays)
    params.set("filter_days", options.filterDays.toString());

  const response = await fetch(
    `/api/search/search-history?${params.toString()}`,
    {
      signal: options?.signal,
    }
  );

  if (!response.ok) {
    throw new Error(`Failed to fetch search history: ${response.statusText}`);
  }

  return response.json();
}


================================================
FILE: web/src/ee/providers/QueryControllerProvider.tsx
================================================
"use client";

import { useCallback, useEffect, useRef, useState, useMemo } from "react";
import {
  BaseFilters,
  SearchDocWithContent,
  SearchFlowClassificationResponse,
  SearchFullResponse,
} from "@/lib/search/interfaces";
import { classifyQuery, searchDocuments } from "@/ee/lib/search/svc";
import useAppFocus from "@/hooks/useAppFocus";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { useSettingsContext } from "@/providers/SettingsProvider";
import { useUser } from "@/providers/UserProvider";
import {
  QueryControllerContext,
  QueryControllerValue,
  QueryState,
  AppMode,
} from "@/providers/QueryControllerProvider";

interface QueryControllerProviderProps {
  children: React.ReactNode;
}

export function QueryControllerProvider({
  children,
}: QueryControllerProviderProps) {
  const appFocus = useAppFocus();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const settings = useSettingsContext();
  const { isSearchModeAvailable: searchUiEnabled } = settings;
  const { user } = useUser();

  // ── Merged query state (discriminated union) ──────────────────────────
  const [state, setState] = useState<QueryState>({
    phase: "idle",
    appMode: "chat",
  });

  // Persistent app-mode preference — survives phase transitions and is
  // used to restore the correct mode when resetting back to idle.
  const appModeRef = useRef<AppMode>("chat");

  // ── App mode sync from user preferences ───────────────────────────────
  const persistedMode = user?.preferences?.default_app_mode;

  useEffect(() => {
    let mode: AppMode = "chat";
    if (isPaidEnterpriseFeaturesEnabled && searchUiEnabled && persistedMode) {
      const lower = persistedMode.toLowerCase();
      mode = (["auto", "search", "chat"] as const).includes(lower as AppMode)
        ? (lower as AppMode)
        : "chat";
    }
    appModeRef.current = mode;
    setState((prev) =>
      prev.phase === "idle" ? { phase: "idle", appMode: mode } : prev
    );
  }, [isPaidEnterpriseFeaturesEnabled, searchUiEnabled, persistedMode]);

  const setAppMode = useCallback(
    (mode: AppMode) => {
      if (!isPaidEnterpriseFeaturesEnabled || !searchUiEnabled) return;
      setState((prev) => {
        if (prev.phase !== "idle") return prev;
        appModeRef.current = mode;
        return { phase: "idle", appMode: mode };
      });
    },
    [isPaidEnterpriseFeaturesEnabled, searchUiEnabled]
  );

  // ── Ancillary state ───────────────────────────────────────────────────
  const [query, setQuery] = useState<string | null>(null);
  const [searchResults, setSearchResults] = useState<SearchDocWithContent[]>(
    []
  );
  const [llmSelectedDocIds, setLlmSelectedDocIds] = useState<string[] | null>(
    null
  );
  const [error, setError] = useState<string | null>(null);

  // Abort controllers for in-flight requests
  const classifyAbortRef = useRef<AbortController | null>(null);
  const searchAbortRef = useRef<AbortController | null>(null);

  /**
   * Perform document search (pure data-fetching, no phase side effects)
   */
  const performSearch = useCallback(
    async (searchQuery: string, filters?: BaseFilters): Promise<void> => {
      if (searchAbortRef.current) {
        searchAbortRef.current.abort();
      }

      const controller = new AbortController();
      searchAbortRef.current = controller;

      try {
        const response: SearchFullResponse = await searchDocuments(
          searchQuery,
          {
            filters,
            numHits: 30,
            includeContent: false,
            signal: controller.signal,
          }
        );

        if (response.error) {
          setError(response.error);
          setSearchResults([]);
          setLlmSelectedDocIds(null);
          return;
        }

        setError(null);
        setSearchResults(response.search_docs);
        setLlmSelectedDocIds(response.llm_selected_doc_ids ?? null);
      } catch (err) {
        if (err instanceof Error && err.name === "AbortError") {
          throw err;
        }

        setError("Document search failed. Please try again.");
        setSearchResults([]);
        setLlmSelectedDocIds(null);
      }
    },
    []
  );

  /**
   * Classify a query as search or chat
   */
  const performClassification = useCallback(
    async (classifyQueryText: string): Promise<"search" | "chat"> => {
      if (classifyAbortRef.current) {
        classifyAbortRef.current.abort();
      }

      const controller = new AbortController();
      classifyAbortRef.current = controller;

      try {
        const response: SearchFlowClassificationResponse = await classifyQuery(
          classifyQueryText,
          controller.signal
        );

        const result = response.is_search_flow ? "search" : "chat";
        return result;
      } catch (error) {
        if (error instanceof Error && error.name === "AbortError") {
          throw error;
        }

        setError("Query classification failed. Falling back to chat.");
        return "chat";
      }
    },
    []
  );

  /**
   * Submit a query - routes based on app mode
   */
  const submit = useCallback(
    async (
      submitQuery: string,
      onChat: (query: string) => void,
      filters?: BaseFilters
    ): Promise<void> => {
      setQuery(submitQuery);
      setError(null);

      const currentAppMode = appModeRef.current;

      // Always route through chat if:
      // 1. Not Enterprise Enabled
      // 2. Admin has disabled the Search UI
      // 3. Not in the "New Session" tab
      // 4. In "New Session" tab but app-mode is "Chat"
      if (
        !isPaidEnterpriseFeaturesEnabled ||
        !searchUiEnabled ||
        !appFocus.isNewSession() ||
        currentAppMode === "chat"
      ) {
        setState({ phase: "chat" });
        setSearchResults([]);
        setLlmSelectedDocIds(null);
        onChat(submitQuery);
        return;
      }

      // Search mode: immediately show SearchUI with loading state
      if (currentAppMode === "search") {
        setState({ phase: "searching" });
        try {
          await performSearch(submitQuery, filters);
        } catch (err) {
          if (err instanceof Error && err.name === "AbortError") return;
          throw err;
        }
        setState({ phase: "search-results" });
        return;
      }

      // Auto mode: classify first, then route
      setState({ phase: "classifying" });
      try {
        const result = await performClassification(submitQuery);

        if (result === "search") {
          setState({ phase: "searching" });
          await performSearch(submitQuery, filters);
          setState({ phase: "search-results" });
          appModeRef.current = "search";
        } else {
          setState({ phase: "chat" });
          setSearchResults([]);
          setLlmSelectedDocIds(null);
          onChat(submitQuery);
        }
      } catch (error) {
        if (error instanceof Error && error.name === "AbortError") {
          return;
        }

        setState({ phase: "chat" });
        setSearchResults([]);
        setLlmSelectedDocIds(null);
        onChat(submitQuery);
      }
    },
    [
      appFocus,
      performClassification,
      performSearch,
      isPaidEnterpriseFeaturesEnabled,
      searchUiEnabled,
    ]
  );

  /**
   * Re-run the current search query with updated server-side filters
   */
  const refineSearch = useCallback(
    async (filters: BaseFilters): Promise<void> => {
      if (!query) return;
      setState({ phase: "searching" });
      try {
        await performSearch(query, filters);
      } catch (err) {
        if (err instanceof Error && err.name === "AbortError") return;
        throw err;
      }
      setState({ phase: "search-results" });
    },
    [query, performSearch]
  );

  /**
   * Reset all state to initial values
   */
  const reset = useCallback(() => {
    if (classifyAbortRef.current) {
      classifyAbortRef.current.abort();
      classifyAbortRef.current = null;
    }
    if (searchAbortRef.current) {
      searchAbortRef.current.abort();
      searchAbortRef.current = null;
    }

    setQuery(null);
    setState({ phase: "idle", appMode: appModeRef.current });
    setSearchResults([]);
    setLlmSelectedDocIds(null);
    setError(null);
  }, []);

  const value: QueryControllerValue = useMemo(
    () => ({
      state,
      setAppMode,
      searchResults,
      llmSelectedDocIds,
      error,
      submit,
      refineSearch,
      reset,
    }),
    [
      state,
      setAppMode,
      searchResults,
      llmSelectedDocIds,
      error,
      submit,
      refineSearch,
      reset,
    ]
  );

  // Sync state with navigation context
  useEffect(reset, [appFocus, reset]);

  return (
    <QueryControllerContext.Provider value={value}>
      {children}
    </QueryControllerContext.Provider>
  );
}


================================================
FILE: web/src/ee/refresh-pages/admin/HooksPage/HookFormModal.tsx
================================================
"use client";

import { useState } from "react";
import { Formik, Form, useFormikContext } from "formik";
import * as Yup from "yup";
import { Button, Text } from "@opal/components";
import {
  SvgCheckCircle,
  SvgShareWebhook,
  SvgLoader,
  SvgRevert,
} from "@opal/icons";
import Modal, { BasicModalFooter } from "@/refresh-components/Modal";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
import * as InputLayouts from "@/layouts/input-layouts";
import { Section } from "@/layouts/general-layouts";
import { Content, ContentAction } from "@opal/layouts";
import { toast } from "@/hooks/useToast";
import {
  createHook,
  updateHook,
  HookAuthError,
  HookTimeoutError,
  HookConnectError,
} from "@/ee/refresh-pages/admin/HooksPage/svc";
import type {
  HookFailStrategy,
  HookFormState,
  HookPointMeta,
  HookResponse,
  HookUpdateRequest,
} from "@/ee/refresh-pages/admin/HooksPage/interfaces";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface HookFormModalProps {
  onOpenChange: (open: boolean) => void;
  /** When provided, the modal is in edit mode for this hook. */
  hook?: HookResponse;
  /** When provided (create mode), the hook point is pre-selected and locked. */
  spec?: HookPointMeta;
  onSuccess: (hook: HookResponse) => void;
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

const MAX_TIMEOUT_SECONDS = 600;

const SOFT_DESCRIPTION =
  "If the endpoint returns an error, Onyx logs it and continues the pipeline as normal, ignoring the hook result.";

function buildInitialValues(
  hook: HookResponse | undefined,
  spec: HookPointMeta | undefined
): HookFormState {
  if (hook) {
    return {
      name: hook.name,
      endpoint_url: hook.endpoint_url ?? "",
      api_key: "",
      fail_strategy: hook.fail_strategy,
      timeout_seconds: String(hook.timeout_seconds),
    };
  }
  return {
    name: "",
    endpoint_url: "",
    api_key: "",
    fail_strategy: spec?.default_fail_strategy ?? "hard",
    timeout_seconds: spec ? String(spec.default_timeout_seconds) : "30",
  };
}

function buildValidationSchema(isEdit: boolean) {
  return Yup.object().shape({
    name: Yup.string().trim().required("Display name cannot be empty."),
    endpoint_url: Yup.string().trim().required("Endpoint URL cannot be empty."),
    api_key: isEdit
      ? Yup.string()
      : Yup.string().trim().required("API key cannot be empty."),
    timeout_seconds: Yup.string()
      .required("Timeout is required.")
      .test(
        "valid-timeout",
        `Must be greater than 0 and at most ${MAX_TIMEOUT_SECONDS} seconds.`,
        (val) => {
          const num = parseFloat(val ?? "");
          return !isNaN(num) && num > 0 && num <= MAX_TIMEOUT_SECONDS;
        }
      ),
  });
}

// ---------------------------------------------------------------------------
// Timeout field (needs access to spec for revert button)
// ---------------------------------------------------------------------------

interface TimeoutFieldProps {
  spec: HookPointMeta | undefined;
}

function TimeoutField({ spec }: TimeoutFieldProps) {
  const { values, setFieldValue, isSubmitting } =
    useFormikContext<HookFormState>();

  return (
    <InputLayouts.Vertical
      name="timeout_seconds"
      title="Timeout"
      suffix="(seconds)"
      subDescription={`Maximum time Onyx will wait for the endpoint to respond before applying the fail strategy. Must be greater than 0 and at most ${MAX_TIMEOUT_SECONDS} seconds.`}
    >
      <div className="[&_input]:!font-main-ui-mono [&_input::placeholder]:!font-main-ui-mono [&_input]:![appearance:textfield] [&_input::-webkit-outer-spin-button]:!appearance-none [&_input::-webkit-inner-spin-button]:!appearance-none w-full">
        <InputTypeInField
          name="timeout_seconds"
          type="number"
          placeholder={spec ? String(spec.default_timeout_seconds) : undefined}
          variant={isSubmitting ? "disabled" : undefined}
          showClearButton={false}
          rightSection={
            spec?.default_timeout_seconds !== undefined &&
            values.timeout_seconds !== String(spec.default_timeout_seconds) ? (
              <Button
                prominence="tertiary"
                size="xs"
                icon={SvgRevert}
                tooltip="Revert to Default"
                onClick={() =>
                  setFieldValue(
                    "timeout_seconds",
                    String(spec.default_timeout_seconds)
                  )
                }
                disabled={isSubmitting}
              />
            ) : undefined
          }
        />
      </div>
    </InputLayouts.Vertical>
  );
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

export default function HookFormModal({
  onOpenChange,
  hook,
  spec,
  onSuccess,
}: HookFormModalProps) {
  const isEdit = !!hook;
  const [isConnected, setIsConnected] = useState(false);
  const [apiKeyCleared, setApiKeyCleared] = useState(false);

  const initialValues = buildInitialValues(hook, spec);
  const validationSchema = buildValidationSchema(isEdit);

  function handleClose() {
    onOpenChange(false);
  }

  const hookPointDisplayName =
    spec?.display_name ?? spec?.hook_point ?? hook?.hook_point ?? "";
  const hookPointDescription = spec?.description;
  const docsUrl = spec?.docs_url;

  return (
    <Modal open onOpenChange={(open) => !open && handleClose()}>
      <Modal.Content width="md" height="fit">
        <Formik
          initialValues={initialValues}
          validationSchema={validationSchema}
          validateOnMount
          onSubmit={async (values, helpers) => {
            try {
              let result: HookResponse;
              if (isEdit && hook) {
                const req: HookUpdateRequest = {};
                if (values.name !== hook.name) req.name = values.name;
                if (values.endpoint_url !== (hook.endpoint_url ?? ""))
                  req.endpoint_url = values.endpoint_url;
                if (values.fail_strategy !== hook.fail_strategy)
                  req.fail_strategy = values.fail_strategy;
                const timeoutNum = parseFloat(values.timeout_seconds);
                if (timeoutNum !== hook.timeout_seconds)
                  req.timeout_seconds = timeoutNum;
                if (values.api_key.trim().length > 0) {
                  req.api_key = values.api_key;
                } else if (apiKeyCleared) {
                  req.api_key = null;
                }
                if (Object.keys(req).length === 0) {
                  handleClose();
                  return;
                }
                result = await updateHook(hook.id, req);
              } else {
                if (!spec) {
                  toast.error("No hook point specified.");
                  return;
                }
                result = await createHook({
                  name: values.name,
                  hook_point: spec.hook_point,
                  endpoint_url: values.endpoint_url,
                  ...(values.api_key ? { api_key: values.api_key } : {}),
                  fail_strategy: values.fail_strategy,
                  timeout_seconds: parseFloat(values.timeout_seconds),
                });
              }
              toast.success(isEdit ? "Hook updated." : "Hook created.");
              onSuccess(result);
              if (!isEdit) {
                setIsConnected(true);
                await new Promise((resolve) => setTimeout(resolve, 500));
              }
              handleClose();
            } catch (err) {
              if (err instanceof HookAuthError) {
                helpers.setFieldError("api_key", "Invalid API key.");
              } else if (err instanceof HookTimeoutError) {
                helpers.setFieldError(
                  "timeout_seconds",
                  "Connection timed out. Try increasing the timeout."
                );
              } else if (err instanceof HookConnectError) {
                helpers.setFieldError(
                  "endpoint_url",
                  err.message || "Could not connect to endpoint."
                );
              } else {
                toast.error(
                  err instanceof Error ? err.message : "Something went wrong."
                );
              }
            } finally {
              helpers.setSubmitting(false);
            }
          }}
        >
          {({ values, setFieldValue, isSubmitting, isValid, dirty }) => {
            const failStrategyDescription =
              values.fail_strategy === "soft"
                ? SOFT_DESCRIPTION
                : spec?.fail_hard_description;

            return (
              <Form className="w-full overflow-visible">
                <Modal.Header
                  icon={SvgShareWebhook}
                  title={
                    isEdit ? "Manage Hook Extension" : "Set Up Hook Extension"
                  }
                  description={
                    isEdit
                      ? undefined
                      : "Connect an external API endpoint to extend the hook point."
                  }
                  onClose={handleClose}
                />

                <Modal.Body>
                  {/* Hook point section header */}
                  <ContentAction
                    sizePreset="main-ui"
                    variant="section"
                    paddingVariant="fit"
                    title={hookPointDisplayName}
                    description={hookPointDescription}
                    rightChildren={
                      <div className="flex flex-col items-end gap-1">
                        <Content
                          sizePreset="secondary"
                          variant="body"
                          icon={SvgShareWebhook}
                          title="Hook Point"
                          prominence="muted"
                          widthVariant="fit"
                        />
                        {docsUrl && (
                          <a
                            href={docsUrl}
                            target="_blank"
                            rel="noopener noreferrer"
                            className="underline leading-none"
                          >
                            <Text font="secondary-body" color="text-03">
                              Documentation
                            </Text>
                          </a>
                        )}
                      </div>
                    }
                  />

                  <InputLayouts.Vertical name="name" title="Display Name">
                    <div className="[&_input::placeholder]:!font-main-ui-muted w-full">
                      <InputTypeInField
                        name="name"
                        placeholder="Name your extension at this hook point"
                        variant={isSubmitting ? "disabled" : undefined}
                      />
                    </div>
                  </InputLayouts.Vertical>

                  <InputLayouts.Vertical
                    name="fail_strategy"
                    title="Fail Strategy"
                    nonInteractive
                    subDescription={failStrategyDescription}
                  >
                    <InputSelect
                      value={values.fail_strategy}
                      onValueChange={(v) =>
                        setFieldValue("fail_strategy", v as HookFailStrategy)
                      }
                      disabled={isSubmitting}
                    >
                      <InputSelect.Trigger placeholder="Select strategy" />
                      <InputSelect.Content>
                        <InputSelect.Item value="soft">
                          Log Error and Continue
                          {spec?.default_fail_strategy === "soft" && (
                            <>
                              {" "}
                              <Text color="text-03">(Default)</Text>
                            </>
                          )}
                        </InputSelect.Item>
                        <InputSelect.Item value="hard">
                          Block Pipeline on Failure
                          {spec?.default_fail_strategy === "hard" && (
                            <>
                              {" "}
                              <Text color="text-03">(Default)</Text>
                            </>
                          )}
                        </InputSelect.Item>
                      </InputSelect.Content>
                    </InputSelect>
                  </InputLayouts.Vertical>

                  <TimeoutField spec={spec} />

                  <InputLayouts.Vertical
                    name="endpoint_url"
                    title="External API Endpoint URL"
                    subDescription="Only connect to servers you trust. You are responsible for actions taken and data shared with this connection."
                  >
                    <div className="[&_input::placeholder]:!font-main-ui-muted w-full">
                      <InputTypeInField
                        name="endpoint_url"
                        placeholder="https://your-api-endpoint.com"
                        variant={isSubmitting ? "disabled" : undefined}
                      />
                    </div>
                  </InputLayouts.Vertical>

                  <InputLayouts.Vertical
                    name="api_key"
                    title="API Key"
                    subDescription="Onyx will use this key to authenticate with your API endpoint."
                  >
                    <PasswordInputTypeInField
                      name="api_key"
                      placeholder={
                        isEdit
                          ? hook?.api_key_masked ??
                            "Leave blank to keep current key"
                          : undefined
                      }
                      disabled={isSubmitting}
                      onChange={(e) => {
                        if (isEdit && hook?.api_key_masked) {
                          setApiKeyCleared(e.target.value === "");
                        }
                      }}
                    />
                  </InputLayouts.Vertical>

                  {!isEdit && (isSubmitting || isConnected) && (
                    <Section
                      flexDirection="row"
                      alignItems="center"
                      justifyContent="start"
                      height="fit"
                      gap={1}
                      className="px-0.5"
                    >
                      <div className="p-0.5 shrink-0">
                        {isConnected ? (
                          <SvgCheckCircle
                            size={16}
                            className="text-status-success-05"
                          />
                        ) : (
                          <SvgLoader
                            size={16}
                            className="animate-spin text-text-03"
                          />
                        )}
                      </div>
                      <Text font="secondary-body" color="text-03">
                        {isConnected
                          ? "Connection valid."
                          : "Verifying connection…"}
                      </Text>
                    </Section>
                  )}
                </Modal.Body>

                <Modal.Footer>
                  <BasicModalFooter
                    cancel={
                      <Button
                        disabled={isSubmitting}
                        prominence="secondary"
                        onClick={handleClose}
                      >
                        Cancel
                      </Button>
                    }
                    submit={
                      <Button
                        disabled={
                          isSubmitting ||
                          !isValid ||
                          (!dirty && !apiKeyCleared && isEdit)
                        }
                        type="submit"
                        icon={
                          isSubmitting && !isEdit
                            ? () => (
                                <SvgLoader size={16} className="animate-spin" />
                              )
                            : undefined
                        }
                      >
                        {isEdit ? "Save Changes" : "Connect"}
                      </Button>
                    }
                  />
                </Modal.Footer>
              </Form>
            );
          }}
        </Formik>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/ee/refresh-pages/admin/HooksPage/HookLogsModal.tsx
================================================
"use client";

import { Button, Text } from "@opal/components";
import { SvgDownload, SvgTextLines } from "@opal/icons";
import Modal from "@/refresh-components/Modal";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import { useHookExecutionLogs } from "@/ee/hooks/useHookExecutionLogs";
import { formatDateTimeLog } from "@/lib/dateUtils";
import { downloadFile } from "@/lib/download";
import { Section } from "@/layouts/general-layouts";
import type {
  HookExecutionRecord,
  HookPointMeta,
  HookResponse,
} from "@/ee/refresh-pages/admin/HooksPage/interfaces";
import { useModalClose } from "@/refresh-components/contexts/ModalContext";

interface HookLogsModalProps {
  hook: HookResponse;
  spec: HookPointMeta | undefined;
}

// Section header: "Past Hour ————" or "Older ————"
//
// TODO(@raunakab): replace this with a proper, opalified `Separator` component (when it lands).
function SectionHeader({ label }: { label: string }) {
  return (
    <Section
      flexDirection="row"
      alignItems="center"
      height="fit"
      className="py-1"
    >
      <Text font="secondary-body" color="text-03">
        {label}
      </Text>
      <div className="flex-1 ml-2 border-t border-border-02" />
    </Section>
  );
}

function LogRow({ log }: { log: HookExecutionRecord }) {
  return (
    <Section
      flexDirection="row"
      justifyContent="start"
      alignItems="start"
      gap={0.5}
      height="fit"
      className="py-2"
    >
      {/* 1. Timestamp */}
      <span className="shrink-0 text-code-code">
        <Text font="secondary-mono-label" color="inherit" nowrap>
          {formatDateTimeLog(log.created_at)}
        </Text>
      </span>
      {/* 2. Error message */}
      <span className="flex-1 min-w-0 break-all whitespace-pre-wrap text-code-code">
        <Text font="secondary-mono" color="inherit">
          {log.error_message ?? "Unknown error"}
        </Text>
      </span>
      {/* 3. Copy button */}
      <Section width="fit" height="fit" alignItems="center">
        <CopyIconButton size="xs" getCopyText={() => log.error_message ?? ""} />
      </Section>
    </Section>
  );
}

export default function HookLogsModal({ hook, spec }: HookLogsModalProps) {
  const onClose = useModalClose();

  const { recentErrors, olderErrors, isLoading, error } = useHookExecutionLogs(
    hook.id,
    10
  );

  const totalLines = recentErrors.length + olderErrors.length;
  const allLogs = [...recentErrors, ...olderErrors];

  function getLogsText() {
    return allLogs
      .map(
        (log) =>
          `${formatDateTimeLog(log.created_at)} ${
            log.error_message ?? "Unknown error"
          }`
      )
      .join("\n");
  }

  function handleDownload() {
    downloadFile(`${hook.name}-errors.txt`, { content: getLogsText() });
  }

  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="md" height="fit">
        <Modal.Header
          icon={(props) => <SvgTextLines {...props} />}
          title="Recent Errors"
          description={`Hook: ${hook.name} • Hook Point: ${
            spec?.display_name ?? hook.hook_point
          }`}
          onClose={onClose}
        />
        <Modal.Body>
          {isLoading ? (
            <Section justifyContent="center" height="fit" className="py-6">
              <SimpleLoader />
            </Section>
          ) : error ? (
            <Text font="main-ui-body" color="text-03">
              Failed to load logs.
            </Text>
          ) : totalLines === 0 ? (
            <Text font="main-ui-body" color="text-03">
              No errors in the past 30 days.
            </Text>
          ) : (
            <>
              {recentErrors.length > 0 && (
                <>
                  <SectionHeader label="Past Hour" />
                  {recentErrors.map((log, idx) => (
                    <LogRow key={log.created_at + String(idx)} log={log} />
                  ))}
                </>
              )}
              {olderErrors.length > 0 && (
                <>
                  <SectionHeader label="Older" />
                  {olderErrors.map((log, idx) => (
                    <LogRow key={log.created_at + String(idx)} log={log} />
                  ))}
                </>
              )}
            </>
          )}
        </Modal.Body>
        <Section
          flexDirection="row"
          justifyContent="between"
          alignItems="center"
          padding={0.5}
          className="bg-background-tint-01"
        >
          <Text font="main-ui-body" color="text-03">
            {`${totalLines} ${totalLines === 1 ? "line" : "lines"}`}
          </Text>
          <Section
            flexDirection="row"
            alignItems="center"
            width="fit"
            gap={0.25}
            padding={0.25}
            className="rounded-xl bg-background-tint-00"
          >
            <CopyIconButton
              size="sm"
              tooltip="Copy"
              getCopyText={getLogsText}
            />
            <Button
              prominence="tertiary"
              size="sm"
              icon={SvgDownload}
              tooltip="Download"
              onClick={handleDownload}
            />
          </Section>
        </Section>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/ee/refresh-pages/admin/HooksPage/HookStatusPopover.tsx
================================================
"use client";

import { useEffect, useRef, useState } from "react";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import { noProp } from "@/lib/utils";
import { formatTimeOnly } from "@/lib/dateUtils";
import { Button, Text } from "@opal/components";
import { Content } from "@opal/layouts";
import LineItem from "@/refresh-components/buttons/LineItem";
import Popover from "@/refresh-components/Popover";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import Separator from "@/refresh-components/Separator";
import { Section } from "@/layouts/general-layouts";
import {
  SvgAlertTriangle,
  SvgCheckCircle,
  SvgMaximize2,
  SvgXOctagon,
} from "@opal/icons";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import { useHookExecutionLogs } from "@/ee/hooks/useHookExecutionLogs";
import HookLogsModal from "@/ee/refresh-pages/admin/HooksPage/HookLogsModal";
import type {
  HookPointMeta,
  HookResponse,
} from "@/ee/refresh-pages/admin/HooksPage/interfaces";
import { cn } from "@opal/utils";

interface HookStatusPopoverProps {
  hook: HookResponse;
  spec: HookPointMeta | undefined;
  isBusy: boolean;
}

export default function HookStatusPopover({
  hook,
  spec,
  isBusy,
}: HookStatusPopoverProps) {
  const logsModal = useCreateModal();
  const [open, setOpen] = useState(false);
  // true = opened by click (stays until dismissed); false = opened by hover (closes after 1s)
  const [clickOpened, setClickOpened] = useState(false);
  const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);

  const { hasRecentErrors, recentErrors, isLoading, error } =
    useHookExecutionLogs(hook.id);

  useEffect(() => {
    return () => {
      if (closeTimerRef.current) clearTimeout(closeTimerRef.current);
    };
  }, []);

  useEffect(() => {
    if (error) {
      console.error(
        "HookStatusPopover: failed to fetch execution logs:",
        error
      );
    }
  }, [error]);

  function clearCloseTimer() {
    if (closeTimerRef.current) {
      clearTimeout(closeTimerRef.current);
      closeTimerRef.current = null;
    }
  }

  function scheduleClose() {
    clearCloseTimer();
    closeTimerRef.current = setTimeout(() => {
      setOpen(false);
      setClickOpened(false);
    }, 1000);
  }

  function handleTriggerMouseEnter() {
    clearCloseTimer();
    setOpen(true);
  }

  function handleTriggerMouseLeave() {
    if (!clickOpened) scheduleClose();
  }

  function handleTriggerClick() {
    clearCloseTimer();
    if (open && clickOpened) {
      // Click while click-opened → close
      setOpen(false);
      setClickOpened(false);
    } else {
      // Any click → open and pin
      setOpen(true);
      setClickOpened(true);
    }
  }

  function handleContentMouseEnter() {
    clearCloseTimer();
  }

  function handleContentMouseLeave() {
    if (!clickOpened) scheduleClose();
  }

  function handleOpenChange(newOpen: boolean) {
    if (!newOpen) {
      setOpen(false);
      setClickOpened(false);
      clearCloseTimer();
    }
  }

  return (
    <>
      <logsModal.Provider>
        <HookLogsModal hook={hook} spec={spec} />
      </logsModal.Provider>

      <Popover open={open} onOpenChange={handleOpenChange}>
        <Popover.Anchor asChild>
          <Button
            prominence="tertiary"
            rightIcon={({ className, ...props }) =>
              hook.is_reachable === false ? (
                <SvgXOctagon
                  {...props}
                  className={cn("text-status-error-05", className)}
                />
              ) : hasRecentErrors ? (
                <SvgAlertTriangle
                  {...props}
                  className={cn("text-status-warning-05", className)}
                />
              ) : (
                <SvgCheckCircle
                  {...props}
                  className={cn("text-status-success-05", className)}
                />
              )
            }
            onMouseEnter={handleTriggerMouseEnter}
            onMouseLeave={handleTriggerMouseLeave}
            onClick={noProp(handleTriggerClick)}
            disabled={isBusy}
          >
            {hook.is_reachable === false ? "Connection Lost" : "Connected"}
          </Button>
        </Popover.Anchor>

        <Popover.Content
          align="end"
          sideOffset={4}
          onMouseEnter={handleContentMouseEnter}
          onMouseLeave={handleContentMouseLeave}
        >
          <Section
            flexDirection="column"
            justifyContent="start"
            alignItems="start"
            height="fit"
            width={hasRecentErrors ? 20 : 12.5}
            padding={0.125}
            gap={0.25}
          >
            {isLoading ? (
              <Section justifyContent="center">
                <SimpleLoader />
              </Section>
            ) : error ? (
              <Text font="secondary-body" color="text-03">
                Failed to load logs.
              </Text>
            ) : hasRecentErrors ? (
              <>
                <div className="p-1">
                  <Content
                    sizePreset="secondary"
                    variant="section"
                    icon={SvgXOctagon}
                    title={
                      recentErrors.length <= 3
                        ? `${recentErrors.length} ${
                            recentErrors.length === 1 ? "Error" : "Errors"
                          }`
                        : "Most Recent Errors"
                    }
                    description="in the past hour"
                  />
                </div>

                <Separator noPadding className="px-2" />

                {/* Log rows — at most 3, timestamp first then error message */}
                <Section
                  flexDirection="column"
                  justifyContent="start"
                  alignItems="start"
                  gap={0.25}
                  padding={0.25}
                  height="fit"
                >
                  {recentErrors.slice(0, 3).map((log, idx) => (
                    <Section
                      key={log.created_at + String(idx)}
                      flexDirection="column"
                      justifyContent="start"
                      alignItems="start"
                      gap={0.25}
                      padding={0.25}
                      height="fit"
                    >
                      <Section
                        flexDirection="row"
                        justifyContent="between"
                        alignItems="center"
                        gap={0}
                        height="fit"
                      >
                        <span className="text-code-code">
                          <Text font="secondary-mono-label" color="inherit">
                            {formatTimeOnly(log.created_at)}
                          </Text>
                        </span>
                        <CopyIconButton
                          size="xs"
                          getCopyText={() => log.error_message ?? ""}
                        />
                      </Section>
                      <span className="break-all">
                        <Text font="secondary-mono" color="text-03">
                          {log.error_message ?? "Unknown error"}
                        </Text>
                      </span>
                    </Section>
                  ))}
                </Section>

                {/* View More Lines */}
                <LineItem
                  muted
                  icon={SvgMaximize2}
                  onClick={noProp(() => {
                    handleOpenChange(false);
                    logsModal.toggle(true);
                  })}
                >
                  View More Lines
                </LineItem>
              </>
            ) : (
              // No errors state
              <>
                <div className="p-1">
                  <Content
                    sizePreset="secondary"
                    variant="section"
                    icon={SvgCheckCircle}
                    title="No Error"
                    description="in the past hour"
                  />
                </div>

                <Separator noPadding className="px-2" />

                {/* View Older Errors */}
                <LineItem
                  muted
                  icon={SvgMaximize2}
                  onClick={noProp(() => {
                    handleOpenChange(false);
                    logsModal.toggle(true);
                  })}
                >
                  View Older Errors
                </LineItem>
              </>
            )}
          </Section>
        </Popover.Content>
      </Popover>
    </>
  );
}


================================================
FILE: web/src/ee/refresh-pages/admin/HooksPage/index.tsx
================================================
"use client";

import { useCallback, useEffect, useMemo, useState } from "react";
import { useRouter } from "next/navigation";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { useSettingsContext } from "@/providers/SettingsProvider";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { useHookSpecs } from "@/ee/hooks/useHookSpecs";
import { useHooks } from "@/ee/hooks/useHooks";
import useFilter from "@/hooks/useFilter";
import { toast } from "@/hooks/useToast";
import {
  useCreateModal,
  useModalClose,
} from "@/refresh-components/contexts/ModalContext";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { Button, SelectCard, Text } from "@opal/components";
import { Disabled, Hoverable } from "@opal/core";
import { markdown } from "@opal/utils";
import { Content, IllustrationContent } from "@opal/layouts";
import Modal from "@/refresh-components/Modal";
import {
  SvgArrowExchange,
  SvgBubbleText,
  SvgExternalLink,
  SvgFileBroadcast,
  SvgShareWebhook,
  SvgPlug,
  SvgRefreshCw,
  SvgSettings,
  SvgTrash,
  SvgUnplug,
} from "@opal/icons";
import type { IconFunctionComponent } from "@opal/types";
import { SvgNoResult, SvgEmpty } from "@opal/illustrations";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import HookFormModal from "@/ee/refresh-pages/admin/HooksPage/HookFormModal";
import HookStatusPopover from "@/ee/refresh-pages/admin/HooksPage/HookStatusPopover";
import {
  activateHook,
  deactivateHook,
  deleteHook,
  validateHook,
} from "@/ee/refresh-pages/admin/HooksPage/svc";
import type {
  HookPointMeta,
  HookResponse,
} from "@/ee/refresh-pages/admin/HooksPage/interfaces";
import { noProp } from "@/lib/utils";

const route = ADMIN_ROUTES.HOOKS;

const HOOK_POINT_ICONS: Record<string, IconFunctionComponent> = {
  document_ingestion: SvgFileBroadcast,
  query_processing: SvgBubbleText,
};

function getHookPointIcon(hookPoint: string): IconFunctionComponent {
  return HOOK_POINT_ICONS[hookPoint] ?? SvgShareWebhook;
}

// ---------------------------------------------------------------------------
// Disconnect confirmation modal
// ---------------------------------------------------------------------------

interface DisconnectConfirmModalProps {
  hook: HookResponse;
  onDisconnect: () => void;
  onDisconnectAndDelete: () => void;
}

function DisconnectConfirmModal({
  hook,
  onDisconnect,
  onDisconnectAndDelete,
}: DisconnectConfirmModalProps) {
  const onClose = useModalClose();

  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="md" height="fit">
        <Modal.Header
          // TODO(@raunakab): replace the colour of this SVG with red.
          icon={SvgUnplug}
          title={markdown(`Disconnect *${hook.name}*`)}
          onClose={onClose}
        />
        <Modal.Body>
          <div className="flex flex-col gap-2">
            <Text font="main-ui-body" color="text-03">
              {markdown(
                `Onyx will stop calling this endpoint for hook ***${hook.name}***. In-flight requests will continue to run. The external endpoint may still retain data previously sent to it. You can reconnect this hook later if needed.`
              )}
            </Text>
            <Text font="main-ui-body" color="text-03">
              You can also delete this hook. Deletion cannot be undone.
            </Text>
          </div>
        </Modal.Body>
        <Modal.Footer>
          <Button prominence="secondary" onClick={onClose}>
            Cancel
          </Button>
          <Button
            variant="danger"
            prominence="secondary"
            onClick={onDisconnectAndDelete}
          >
            Disconnect &amp; Delete
          </Button>
          <Button variant="danger" prominence="primary" onClick={onDisconnect}>
            Disconnect
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}

// ---------------------------------------------------------------------------
// Delete confirmation modal
// ---------------------------------------------------------------------------

interface DeleteConfirmModalProps {
  hook: HookResponse;
  onDelete: () => void;
}

function DeleteConfirmModal({ hook, onDelete }: DeleteConfirmModalProps) {
  const onClose = useModalClose();

  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="md" height="fit">
        <Modal.Header
          // TODO(@raunakab): replace the colour of this SVG with red.
          icon={SvgTrash}
          title={`Delete ${hook.name}`}
          onClose={onClose}
        />
        <Modal.Body>
          <div className="flex flex-col gap-2">
            <Text font="main-ui-body" color="text-03">
              {markdown(
                `Hook ***${hook.name}*** will be permanently removed from this hook point. The external endpoint may still retain data previously sent to it.`
              )}
            </Text>
            <Text font="main-ui-body" color="text-03">
              Deletion cannot be undone.
            </Text>
          </div>
        </Modal.Body>
        <Modal.Footer>
          <Button prominence="secondary" onClick={onClose}>
            Cancel
          </Button>
          <Button variant="danger" prominence="primary" onClick={onDelete}>
            Delete
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}

// ---------------------------------------------------------------------------
// Unconnected hook card
// ---------------------------------------------------------------------------

interface UnconnectedHookCardProps {
  spec: HookPointMeta;
  onConnect: () => void;
}

function UnconnectedHookCard({ spec, onConnect }: UnconnectedHookCardProps) {
  const Icon = getHookPointIcon(spec.hook_point);

  return (
    <SelectCard state="empty" padding="sm" rounding="lg" onClick={onConnect}>
      <div className="w-full flex flex-row">
        <div className="flex-1 p-2">
          <Content
            sizePreset="main-ui"
            variant="section"
            icon={Icon}
            title={spec.display_name}
            description={spec.description}
          />

          {spec.docs_url && (
            <a
              href={spec.docs_url}
              target="_blank"
              rel="noopener noreferrer"
              className="ml-6 flex items-center gap-1 w-min"
            >
              <span className="underline font-secondary-body text-text-03">
                Documentation
              </span>
              <SvgExternalLink size={12} className="shrink-0" />
            </a>
          )}
        </div>

        <Button
          prominence="tertiary"
          rightIcon={SvgArrowExchange}
          onClick={noProp(onConnect)}
        >
          Connect
        </Button>
      </div>
    </SelectCard>
  );
}

// ---------------------------------------------------------------------------
// Connected hook card
// ---------------------------------------------------------------------------

interface ConnectedHookCardProps {
  hook: HookResponse;
  spec: HookPointMeta | undefined;
  onEdit: () => void;
  onDeleted: () => void;
  onToggled: (updated: HookResponse) => void;
}

function ConnectedHookCard({
  hook,
  spec,
  onEdit,
  onDeleted,
  onToggled,
}: ConnectedHookCardProps) {
  const [isBusy, setIsBusy] = useState(false);
  const disconnectModal = useCreateModal();
  const deleteModal = useCreateModal();

  async function handleDelete() {
    deleteModal.toggle(false);
    setIsBusy(true);
    try {
      await deleteHook(hook.id);
      onDeleted();
    } catch (err) {
      console.error("Failed to delete hook:", err);
      toast.error(
        err instanceof Error ? err.message : "Failed to delete hook."
      );
    } finally {
      setIsBusy(false);
    }
  }

  async function handleActivate() {
    setIsBusy(true);
    try {
      const updated = await activateHook(hook.id);
      onToggled(updated);
    } catch (err) {
      console.error("Failed to reconnect hook:", err);
      toast.error(
        err instanceof Error ? err.message : "Failed to reconnect hook."
      );
    } finally {
      setIsBusy(false);
    }
  }

  async function handleDeactivate() {
    disconnectModal.toggle(false);
    setIsBusy(true);
    try {
      const updated = await deactivateHook(hook.id);
      onToggled(updated);
    } catch (err) {
      console.error("Failed to deactivate hook:", err);
      toast.error(
        err instanceof Error ? err.message : "Failed to deactivate hook."
      );
    } finally {
      setIsBusy(false);
    }
  }

  async function handleDisconnectAndDelete() {
    disconnectModal.toggle(false);
    setIsBusy(true);
    try {
      const deactivated = await deactivateHook(hook.id);
      onToggled(deactivated);
      await deleteHook(hook.id);
      onDeleted();
    } catch (err) {
      console.error("Failed to disconnect hook:", err);
      toast.error(
        err instanceof Error ? err.message : "Failed to disconnect hook."
      );
    } finally {
      setIsBusy(false);
    }
  }

  async function handleValidate() {
    setIsBusy(true);
    try {
      const result = await validateHook(hook.id);
      if (result.status === "passed") {
        toast.success("Hook validated successfully.");
      } else {
        toast.error(
          result.error_message ?? `Validation failed: ${result.status}`
        );
      }
    } catch (err) {
      console.error("Failed to validate hook:", err);
      toast.error(
        err instanceof Error ? err.message : "Failed to validate hook."
      );
    } finally {
      setIsBusy(false);
    }
  }

  const HookIcon = getHookPointIcon(hook.hook_point);

  return (
    <>
      <disconnectModal.Provider>
        <DisconnectConfirmModal
          hook={hook}
          onDisconnect={handleDeactivate}
          onDisconnectAndDelete={handleDisconnectAndDelete}
        />
      </disconnectModal.Provider>

      <deleteModal.Provider>
        <DeleteConfirmModal hook={hook} onDelete={handleDelete} />
      </deleteModal.Provider>

      <Hoverable.Root group="connected-hook-card">
        {/* TODO(@raunakab): Modify the background colour (by using `SelectCard disabled={...}` [when it lands]) to indicate when the card is "disconnected". */}
        <SelectCard state="filled" padding="sm" rounding="lg" onClick={onEdit}>
          <div className="w-full flex flex-row">
            <div className="flex-1 p-2">
              <Content
                sizePreset="main-ui"
                variant="section"
                icon={HookIcon}
                title={
                  !hook.is_active || hook.is_reachable === false
                    ? markdown(`~~${hook.name}~~`)
                    : hook.name
                }
                suffix={!hook.is_active ? "(Disconnected)" : undefined}
                description={`Hook Point: ${
                  spec?.display_name ?? hook.hook_point
                }`}
              />

              {spec?.docs_url && (
                <a
                  href={spec.docs_url}
                  target="_blank"
                  rel="noopener noreferrer"
                  className="ml-6 flex items-center gap-1 w-min"
                >
                  <span className="underline font-secondary-body text-text-03">
                    Documentation
                  </span>
                  <SvgExternalLink size={12} className="shrink-0" />
                </a>
              )}
            </div>

            <div className="flex flex-col items-end shrink-0">
              <div className="flex items-center gap-1">
                {hook.is_active ? (
                  <HookStatusPopover hook={hook} spec={spec} isBusy={isBusy} />
                ) : (
                  <Button
                    prominence="tertiary"
                    rightIcon={SvgPlug}
                    onClick={noProp(handleActivate)}
                    disabled={isBusy}
                  >
                    Reconnect
                  </Button>
                )}
              </div>

              <Disabled disabled={isBusy}>
                <div className="flex items-center pb-1 px-1 gap-1">
                  {hook.is_active ? (
                    <>
                      <Hoverable.Item
                        group="connected-hook-card"
                        variant="opacity-on-hover"
                      >
                        <Button
                          prominence="tertiary"
                          size="md"
                          icon={SvgUnplug}
                          onClick={noProp(() => disconnectModal.toggle(true))}
                          tooltip="Disconnect Hook"
                          aria-label="Deactivate hook"
                        />
                      </Hoverable.Item>
                      <Button
                        prominence="tertiary"
                        size="md"
                        icon={SvgRefreshCw}
                        onClick={noProp(handleValidate)}
                        tooltip="Test Connection"
                        aria-label="Re-validate hook"
                      />
                    </>
                  ) : (
                    <Button
                      prominence="tertiary"
                      size="md"
                      icon={SvgTrash}
                      onClick={noProp(() => deleteModal.toggle(true))}
                      tooltip="Delete"
                      aria-label="Delete hook"
                    />
                  )}
                  <Button
                    prominence="tertiary"
                    size="md"
                    icon={SvgSettings}
                    onClick={noProp(onEdit)}
                    tooltip="Manage"
                    aria-label="Configure hook"
                  />
                </div>
              </Disabled>
            </div>
          </div>
        </SelectCard>
      </Hoverable.Root>
    </>
  );
}

// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------

export default function HooksPage() {
  const router = useRouter();
  const { settings, settingsLoading } = useSettingsContext();
  const isEE = usePaidEnterpriseFeaturesEnabled();

  const [connectSpec, setConnectSpec] = useState<HookPointMeta | null>(null);
  const [editHook, setEditHook] = useState<HookResponse | null>(null);

  const { specs, isLoading: specsLoading, error: specsError } = useHookSpecs();
  const {
    hooks,
    isLoading: hooksLoading,
    error: hooksError,
    mutate,
  } = useHooks();

  const hookExtractor = useCallback(
    (hook: HookResponse) =>
      `${hook.name} ${
        specs?.find((s: HookPointMeta) => s.hook_point === hook.hook_point)
          ?.display_name ?? ""
      }`,
    [specs]
  );

  const sortedHooks = useMemo(
    () => [...(hooks ?? [])].sort((a, b) => a.name.localeCompare(b.name)),
    [hooks]
  );

  const {
    query: search,
    setQuery: setSearch,
    filtered: connectedHooks,
  } = useFilter(sortedHooks, hookExtractor);

  const hooksByPoint = useMemo(() => {
    const map: Record<string, HookResponse[]> = {};
    for (const hook of hooks ?? []) {
      (map[hook.hook_point] ??= []).push(hook);
    }
    return map;
  }, [hooks]);

  const unconnectedSpecs = useMemo(() => {
    const searchLower = search.toLowerCase();
    return (specs ?? [])
      .filter(
        (spec: HookPointMeta) =>
          (hooksByPoint[spec.hook_point]?.length ?? 0) === 0 &&
          (!searchLower ||
            spec.display_name.toLowerCase().includes(searchLower) ||
            spec.description.toLowerCase().includes(searchLower))
      )
      .sort((a: HookPointMeta, b: HookPointMeta) =>
        a.display_name.localeCompare(b.display_name)
      );
  }, [specs, hooksByPoint, search]);

  useEffect(() => {
    if (settingsLoading) return;
    if (!isEE) {
      toast.info("Hook Extensions require an Enterprise license.");
      router.replace("/");
    } else if (!settings.hooks_enabled) {
      toast.info("Hook Extensions are not enabled for this deployment.");
      router.replace("/");
    }
  }, [settingsLoading, isEE, settings.hooks_enabled, router]);

  if (settingsLoading || !isEE || !settings.hooks_enabled) {
    return <SimpleLoader />;
  }

  const isLoading = specsLoading || hooksLoading;

  function handleHookSuccess(updated: HookResponse) {
    mutate((prev: HookResponse[] | undefined) => {
      if (!prev) return [updated];
      const idx = prev.findIndex((h: HookResponse) => h.id === updated.id);
      if (idx >= 0) {
        const next = [...prev];
        next[idx] = updated;
        return next;
      }
      return [...prev, updated];
    });
  }

  function handleHookDeleted(id: number) {
    mutate(
      (prev: HookResponse[] | undefined) =>
        prev?.filter((h: HookResponse) => h.id !== id)
    );
  }

  const connectSpec_ =
    connectSpec ??
    (editHook
      ? specs?.find((s: HookPointMeta) => s.hook_point === editHook.hook_point)
      : undefined);

  return (
    <>
      {/* Create modal */}
      {!!connectSpec && (
        <HookFormModal
          key={connectSpec?.hook_point ?? "create"}
          onOpenChange={(open: boolean) => {
            if (!open) setConnectSpec(null);
          }}
          spec={connectSpec ?? undefined}
          onSuccess={handleHookSuccess}
        />
      )}

      {/* Edit modal */}
      {!!editHook && (
        <HookFormModal
          key={editHook?.id ?? "edit"}
          onOpenChange={(open: boolean) => {
            if (!open) setEditHook(null);
          }}
          hook={editHook ?? undefined}
          spec={connectSpec_ ?? undefined}
          onSuccess={handleHookSuccess}
        />
      )}

      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={route.icon}
          title={route.title}
          description="Extend Onyx pipelines by registering external API endpoints as callbacks at predefined hook points."
          separator
        />
        <SettingsLayouts.Body>
          {isLoading ? (
            <SimpleLoader />
          ) : specsError || hooksError ? (
            <Text font="secondary-body" color="text-03">
              {`Failed to load${
                specsError ? " hook specifications" : " hooks"
              }. Please refresh the page.`}
            </Text>
          ) : (
            <div className="flex flex-col gap-3 h-full">
              <div className="pb-3">
                <InputTypeIn
                  placeholder="Search hooks..."
                  value={search}
                  variant="internal"
                  leftSearchIcon
                  onChange={(e) => setSearch(e.target.value)}
                />
              </div>

              {connectedHooks.length === 0 && unconnectedSpecs.length === 0 ? (
                <div>
                  <IllustrationContent
                    title={
                      search ? "No results found" : "No hook points available"
                    }
                    description={
                      search ? "Try using a different search term." : undefined
                    }
                    illustration={search ? SvgNoResult : SvgEmpty}
                  />
                </div>
              ) : (
                <div className="flex flex-col gap-2">
                  {connectedHooks.map((hook) => {
                    const spec = specs?.find(
                      (s: HookPointMeta) => s.hook_point === hook.hook_point
                    );
                    return (
                      <ConnectedHookCard
                        key={hook.id}
                        hook={hook}
                        spec={spec}
                        onEdit={() => setEditHook(hook)}
                        onDeleted={() => handleHookDeleted(hook.id)}
                        onToggled={handleHookSuccess}
                      />
                    );
                  })}

                  {unconnectedSpecs.map((spec: HookPointMeta) => (
                    <UnconnectedHookCard
                      key={spec.hook_point}
                      spec={spec}
                      onConnect={() => setConnectSpec(spec)}
                    />
                  ))}
                </div>
              )}
            </div>
          )}
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>
    </>
  );
}


================================================
FILE: web/src/ee/refresh-pages/admin/HooksPage/interfaces.ts
================================================
export type HookPoint = string;
export type HookFailStrategy = "hard" | "soft";

export interface HookPointMeta {
  hook_point: HookPoint;
  display_name: string;
  description: string;
  docs_url: string | null;
  input_schema: Record<string, unknown>;
  output_schema: Record<string, unknown>;
  default_timeout_seconds: number;
  default_fail_strategy: HookFailStrategy;
  fail_hard_description: string;
}

export interface HookResponse {
  id: number;
  name: string;
  hook_point: HookPoint;
  endpoint_url: string | null;
  /** Partially-masked API key (e.g. "abcd••••••••wxyz"), or null if no key is set. */
  api_key_masked: string | null;
  fail_strategy: HookFailStrategy;
  timeout_seconds: number;
  is_active: boolean;
  is_reachable: boolean | null;
  creator_email: string | null;
  created_at: string;
  updated_at: string;
}

export interface HookFormState {
  name: string;
  endpoint_url: string;
  api_key: string;
  fail_strategy: HookFailStrategy;
  timeout_seconds: string;
}

export interface HookCreateRequest {
  name: string;
  hook_point: HookPoint;
  endpoint_url: string;
  api_key?: string;
  fail_strategy?: HookFailStrategy;
  timeout_seconds?: number;
}

export interface HookUpdateRequest {
  name?: string;
  endpoint_url?: string;
  api_key?: string | null;
  fail_strategy?: HookFailStrategy;
  timeout_seconds?: number;
}

export interface HookExecutionRecord {
  error_message: string | null;
  status_code: number | null;
  duration_ms: number | null;
  created_at: string;
}

export type HookValidateStatus =
  | "passed"
  | "auth_failed"
  | "timeout"
  | "cannot_connect";

export interface HookValidateResponse {
  status: HookValidateStatus;
  error_message: string | null;
}


================================================
FILE: web/src/ee/refresh-pages/admin/HooksPage/svc.ts
================================================
import {
  HookCreateRequest,
  HookExecutionRecord,
  HookResponse,
  HookUpdateRequest,
  HookValidateResponse,
} from "@/ee/refresh-pages/admin/HooksPage/interfaces";

export class HookAuthError extends Error {}
export class HookTimeoutError extends Error {}
export class HookConnectError extends Error {}

async function parseError(res: Response, fallback: string): Promise<Error> {
  try {
    const body = await res.json();
    if (body?.error_code === "CREDENTIAL_INVALID") {
      return new HookAuthError(body?.detail ?? "Invalid API key.");
    }
    if (body?.error_code === "GATEWAY_TIMEOUT") {
      return new HookTimeoutError(body?.detail ?? "Connection timed out.");
    }
    if (body?.error_code === "BAD_GATEWAY") {
      return new HookConnectError(
        body?.detail ?? "Could not connect to endpoint."
      );
    }
    return new Error(body?.detail ?? fallback);
  } catch (err) {
    console.error("parseError: failed to parse error response body:", err);
    return new Error(fallback);
  }
}

export async function createHook(
  req: HookCreateRequest
): Promise<HookResponse> {
  const res = await fetch("/api/admin/hooks", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(req),
  });
  if (!res.ok) {
    throw await parseError(res, "Failed to create hook");
  }
  return res.json();
}

export async function updateHook(
  id: number,
  req: HookUpdateRequest
): Promise<HookResponse> {
  const res = await fetch(`/api/admin/hooks/${id}`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(req),
  });
  if (!res.ok) {
    throw await parseError(res, "Failed to update hook");
  }
  return res.json();
}

export async function deleteHook(id: number): Promise<void> {
  const res = await fetch(`/api/admin/hooks/${id}`, { method: "DELETE" });
  if (!res.ok) {
    throw await parseError(res, "Failed to delete hook");
  }
}

export async function activateHook(id: number): Promise<HookResponse> {
  const res = await fetch(`/api/admin/hooks/${id}/activate`, {
    method: "POST",
  });
  if (!res.ok) {
    throw await parseError(res, "Failed to activate hook");
  }
  return res.json();
}

export async function deactivateHook(id: number): Promise<HookResponse> {
  const res = await fetch(`/api/admin/hooks/${id}/deactivate`, {
    method: "POST",
  });
  if (!res.ok) {
    throw await parseError(res, "Failed to deactivate hook");
  }
  return res.json();
}

export async function validateHook(id: number): Promise<HookValidateResponse> {
  const res = await fetch(`/api/admin/hooks/${id}/validate`, {
    method: "POST",
  });
  if (!res.ok) {
    throw await parseError(res, "Failed to validate hook");
  }
  return res.json();
}

export async function fetchExecutionLogs(
  id: number,
  limit = 20
): Promise<HookExecutionRecord[]> {
  const res = await fetch(
    `/api/admin/hooks/${id}/execution-logs?limit=${limit}`
  );
  if (!res.ok) {
    throw await parseError(res, "Failed to fetch execution logs");
  }
  return res.json();
}


================================================
FILE: web/src/ee/sections/SearchCard.tsx
================================================
"use client";

import { SearchDocWithContent } from "@/lib/search/interfaces";
import { SourceIcon } from "@/components/SourceIcon";
import { WebResultIcon } from "@/components/WebResultIcon";
import Text from "@/refresh-components/texts/Text";
import Chip from "@/refresh-components/Chip";
import { buildDocumentSummaryDisplay } from "@/components/search/DocumentDisplay";
import { ValidSources } from "@/lib/types";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import { Section } from "@/layouts/general-layouts";
import { Interactive } from "@opal/core";
import Truncated from "@/refresh-components/texts/Truncated";
import { timeAgo } from "@/lib/time";
import { useMemo } from "react";

export interface SearchResultCardProps {
  /** The search result document to display */
  document: SearchDocWithContent;
  /** Whether this result was selected by the LLM as relevant */
  isLlmSelected?: boolean;
  /** Callback when the document is clicked */
  onDocumentClick: (doc: MinimalOnyxDocument) => void;
}

/**
 * Card component for displaying a single search result.
 *
 * Shows the document title, source icon, blurb/highlights, and metadata.
 * Clicking the card opens the document preview.
 */
export default function SearchCard({
  document,
  onDocumentClick,
}: SearchResultCardProps) {
  const isWebSource =
    document.is_internet || document.source_type === ValidSources.Web;

  function handleClick() {
    if (document.link) {
      window.open(document.link, "_blank", "noopener,noreferrer");
      return;
    }
    onDocumentClick({
      document_id: document.document_id,
      semantic_identifier: document.semantic_identifier,
    });
  }

  const content = useMemo(
    () =>
      buildDocumentSummaryDisplay(document.match_highlights, document.blurb) ||
      document.blurb,
    [document.match_highlights, document.blurb]
  );

  return (
    <Interactive.Stateless onClick={handleClick} prominence="secondary">
      <Interactive.Container heightVariant="fit" widthVariant="full">
        <Section alignItems="start" gap={0} padding={0.25}>
          {/* Title Row */}
          <Section
            flexDirection="row"
            justifyContent="start"
            gap={0.25}
            padding={0.25}
          >
            {isWebSource && document.link ? (
              <WebResultIcon url={document.link} size={18} />
            ) : (
              <SourceIcon sourceType={document.source_type} iconSize={16} />
            )}

            <Truncated mainUiAction className="text-left">
              {document.semantic_identifier}
            </Truncated>
          </Section>

          {/* Body Row */}
          <div className="px-1 pb-1">
            <Section alignItems="start" gap={0.25}>
              {/* Metadata */}
              <Section flexDirection="row" justifyContent="start" gap={0.25}>
                {(document.primary_owners ?? []).map((owner, index) => (
                  <Chip key={index}>{owner}</Chip>
                ))}
                {document.metadata?.tags &&
                  (Array.isArray(document.metadata.tags)
                    ? document.metadata.tags
                    : [document.metadata.tags]
                  ).map((tag, index) => <Chip key={index}>{tag}</Chip>)}
                {document.updated_at &&
                  !isNaN(new Date(document.updated_at).getTime()) && (
                    <Text secondaryBody text02>
                      {timeAgo(document.updated_at)}
                    </Text>
                  )}
              </Section>

              {/* Blurb */}
              {content && (
                <Text secondaryBody text03 className="text-left">
                  {content}
                </Text>
              )}
            </Section>
          </div>
        </Section>
      </Interactive.Container>
    </Interactive.Stateless>
  );
}


================================================
FILE: web/src/ee/sections/SearchUI.tsx
================================================
"use client";

import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import {
  BaseFilters,
  MinimalOnyxDocument,
  SourceMetadata,
} from "@/lib/search/interfaces";
import SearchCard from "@/ee/sections/SearchCard";
import { Pagination } from "@opal/components";
import Separator from "@/refresh-components/Separator";
import EmptyMessage from "@/refresh-components/EmptyMessage";
import { IllustrationContent } from "@opal/layouts";
import SvgNoResult from "@opal/illustrations/no-result";
import { getSourceMetadata } from "@/lib/sources";
import { Tag, ValidSources } from "@/lib/types";
import { getTimeFilterDate, TimeFilter } from "@/lib/time";
import useTags from "@/hooks/useTags";
import { SourceIcon } from "@/components/SourceIcon";
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import { SvgCheck, SvgClock, SvgTag } from "@opal/icons";
import { FilterButton } from "@opal/components";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import useFilter from "@/hooks/useFilter";
import { LineItemButton } from "@opal/components";
import { useQueryController } from "@/providers/QueryControllerProvider";
import { cn } from "@/lib/utils";
import { toast } from "@/hooks/useToast";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";

// ============================================================================
// Types
// ============================================================================

export interface SearchResultsProps {
  /** Callback when a document is clicked */
  onDocumentClick: (doc: MinimalOnyxDocument) => void;
}

// ============================================================================
// Constants
// ============================================================================

const RESULTS_PER_PAGE = 20;

const TIME_FILTER_OPTIONS: { value: TimeFilter; label: string }[] = [
  { value: "day", label: "Past 24 hours" },
  { value: "week", label: "Past week" },
  { value: "month", label: "Past month" },
  { value: "year", label: "Past year" },
];

export default function SearchUI({ onDocumentClick }: SearchResultsProps) {
  // Available tags from backend
  const { tags: availableTags } = useTags();
  const {
    state,
    searchResults: results,
    llmSelectedDocIds,
    error,
    refineSearch: onRefineSearch,
  } = useQueryController();

  const prevErrorRef = useRef<string | null>(null);

  // Show a toast notification when a new error occurs
  useEffect(() => {
    if (error && error !== prevErrorRef.current) {
      toast.error(error);
    }
    prevErrorRef.current = error;
  }, [error]);

  // Filter state
  const [selectedSources, setSelectedSources] = useState<string[]>([]);
  const [timeFilter, setTimeFilter] = useState<TimeFilter | null>(null);
  const [timeFilterOpen, setTimeFilterOpen] = useState(false);
  const [selectedTags, setSelectedTags] = useState<Tag[]>([]);
  const [tagFilterOpen, setTagFilterOpen] = useState(false);

  // Pagination state
  const [currentPage, setCurrentPage] = useState(1);

  const tagExtractor = useCallback(
    (tag: Tag) => `${tag.tag_key} ${tag.tag_value}`,
    []
  );
  const {
    query: tagQuery,
    setQuery: setTagQuery,
    filtered: filteredTags,
  } = useFilter(availableTags, tagExtractor);

  // Build the combined server-side filters from current state
  const buildFilters = (
    overrides: { time?: TimeFilter | null; tags?: Tag[] } = {}
  ): BaseFilters => {
    const time = overrides.time !== undefined ? overrides.time : timeFilter;
    const tags = overrides.tags !== undefined ? overrides.tags : selectedTags;
    const cutoff = time ? getTimeFilterDate(time) : null;
    return {
      time_cutoff: cutoff?.toISOString() ?? null,
      tags:
        tags.length > 0
          ? tags.map((t) => ({ tag_key: t.tag_key, tag_value: t.tag_value }))
          : null,
    };
  };

  // Reset source filter and pagination when results change
  useEffect(() => {
    setSelectedSources([]);
    setCurrentPage(1);
  }, [results]);

  // Create a set for fast lookup of LLM-selected docs
  const llmSelectedSet = new Set(llmSelectedDocIds ?? []);

  // Filter and sort results
  const filteredAndSortedResults = useMemo(() => {
    const filtered = results.filter((doc) => {
      // Source filter (client-side)
      if (selectedSources.length > 0) {
        if (!doc.source_type || !selectedSources.includes(doc.source_type)) {
          return false;
        }
      }

      return true;
    });

    // Sort: LLM-selected first, then by score
    return filtered.sort((a, b) => {
      const aSelected = llmSelectedSet.has(a.document_id);
      const bSelected = llmSelectedSet.has(b.document_id);

      if (aSelected && !bSelected) return -1;
      if (!aSelected && bSelected) return 1;

      return (b.score ?? 0) - (a.score ?? 0);
    });
  }, [results, selectedSources, llmSelectedSet]);

  // Pagination
  const totalPages = Math.max(
    1,
    Math.ceil(filteredAndSortedResults.length / RESULTS_PER_PAGE)
  );
  const paginatedResults = useMemo(() => {
    const start = (currentPage - 1) * RESULTS_PER_PAGE;
    return filteredAndSortedResults.slice(start, start + RESULTS_PER_PAGE);
  }, [filteredAndSortedResults, currentPage]);

  // Extract unique sources with metadata for the source filter
  const sourcesWithMeta = useMemo(() => {
    const sourceMap = new Map<
      string,
      { meta: SourceMetadata; count: number }
    >();

    for (const doc of results) {
      if (doc.source_type) {
        const existing = sourceMap.get(doc.source_type);
        if (existing) {
          existing.count++;
        } else {
          sourceMap.set(doc.source_type, {
            meta: getSourceMetadata(doc.source_type as ValidSources),
            count: 1,
          });
        }
      }
    }

    return Array.from(sourceMap.entries())
      .map(([source, data]) => ({
        source,
        ...data,
      }))
      .sort((a, b) => b.count - a.count);
  }, [results]);

  const handleSourceToggle = (source: string) => {
    setCurrentPage(1);
    if (selectedSources.includes(source)) {
      setSelectedSources(selectedSources.filter((s) => s !== source));
    } else {
      setSelectedSources([...selectedSources, source]);
    }
  };

  const showEmpty = !error && results.length === 0;

  // Show a centered spinner while search is in-flight (after all hooks)
  if (state.phase === "searching") {
    return (
      <div className="flex-1 min-h-0 w-full flex items-center justify-center">
        <SimpleLoader />
      </div>
    );
  }

  return (
    <div className="flex-1 min-h-0 w-full flex flex-col gap-3">
      {/* ── Top row: Filters + Result count ── */}
      <div className="flex-shrink-0 flex flex-row gap-x-4">
        <div
          className={cn(
            "flex flex-col justify-end gap-3",
            showEmpty ? "flex-1" : "flex-[3]"
          )}
        >
          <div className="flex flex-row gap-2">
            {/* Time filter */}
            <Popover open={timeFilterOpen} onOpenChange={setTimeFilterOpen}>
              <Popover.Trigger asChild>
                <FilterButton
                  icon={SvgClock}
                  active={!!timeFilter}
                  onClear={() => {
                    setTimeFilter(null);
                    onRefineSearch(buildFilters({ time: null }));
                  }}
                >
                  {TIME_FILTER_OPTIONS.find((o) => o.value === timeFilter)
                    ?.label ?? "All Time"}
                </FilterButton>
              </Popover.Trigger>
              <Popover.Content align="start" width="md">
                <PopoverMenu>
                  {TIME_FILTER_OPTIONS.map((opt) => (
                    <LineItemButton
                      key={opt.value}
                      onClick={() => {
                        setTimeFilter(opt.value);
                        setTimeFilterOpen(false);
                        onRefineSearch(buildFilters({ time: opt.value }));
                      }}
                      state={timeFilter === opt.value ? "selected" : "empty"}
                      icon={timeFilter === opt.value ? SvgCheck : SvgClock}
                      title={opt.label}
                      sizePreset="main-ui"
                      variant="section"
                    />
                  ))}
                </PopoverMenu>
              </Popover.Content>
            </Popover>

            {/* Tag filter */}
            <Popover open={tagFilterOpen} onOpenChange={setTagFilterOpen}>
              <Popover.Trigger asChild>
                <FilterButton
                  icon={SvgTag}
                  active={selectedTags.length > 0}
                  onClear={() => {
                    setSelectedTags([]);
                    onRefineSearch(buildFilters({ tags: [] }));
                  }}
                >
                  {selectedTags.length > 0
                    ? `${selectedTags.length} Tag${
                        selectedTags.length > 1 ? "s" : ""
                      }`
                    : "Tags"}
                </FilterButton>
              </Popover.Trigger>
              <Popover.Content align="start" width="lg">
                <PopoverMenu>
                  <InputTypeIn
                    leftSearchIcon
                    placeholder="Filter tags..."
                    value={tagQuery}
                    onChange={(e) => setTagQuery(e.target.value)}
                    onClear={() => setTagQuery("")}
                    variant="internal"
                  />
                  {filteredTags.map((tag) => {
                    const isSelected = selectedTags.some(
                      (t) =>
                        t.tag_key === tag.tag_key &&
                        t.tag_value === tag.tag_value
                    );
                    return (
                      <LineItemButton
                        key={`${tag.tag_key}=${tag.tag_value}`}
                        onClick={() => {
                          const next = isSelected
                            ? selectedTags.filter(
                                (t) =>
                                  t.tag_key !== tag.tag_key ||
                                  t.tag_value !== tag.tag_value
                              )
                            : [...selectedTags, tag];
                          setSelectedTags(next);
                          onRefineSearch(buildFilters({ tags: next }));
                        }}
                        state={isSelected ? "selected" : "empty"}
                        icon={isSelected ? SvgCheck : SvgTag}
                        title={tag.tag_value}
                        sizePreset="main-ui"
                        variant="section"
                      />
                    );
                  })}
                </PopoverMenu>
              </Popover.Content>
            </Popover>
          </div>

          <Separator noPadding />
        </div>

        {!showEmpty && (
          <div className="flex-1 flex flex-col justify-end gap-3">
            <Section alignItems="start">
              <Text text03 mainUiMuted>
                {results.length} Results
              </Text>
            </Section>

            <Separator noPadding />
          </div>
        )}
      </div>

      {/* ── Middle row: Results + Source filter ── */}
      <div className="flex-1 min-h-0 flex flex-row gap-x-4">
        <div
          className={cn(
            "min-h-0 overflow-y-scroll flex flex-col gap-2",
            showEmpty ? "flex-1 justify-center" : "flex-[3]"
          )}
        >
          {error ? (
            <EmptyMessage title="Search failed" description={error} />
          ) : paginatedResults.length > 0 ? (
            <>
              {paginatedResults.map((doc) => (
                <div
                  key={`${doc.document_id}-${doc.chunk_ind}`}
                  className="flex-shrink-0"
                >
                  <SearchCard
                    document={doc}
                    isLlmSelected={llmSelectedSet.has(doc.document_id)}
                    onDocumentClick={onDocumentClick}
                  />
                </div>
              ))}
            </>
          ) : (
            <IllustrationContent
              illustration={SvgNoResult}
              title="No results found"
              description="Check your connectors/filters or try a different search term."
            />
          )}
        </div>

        {!showEmpty && (
          <div className="flex-1 min-h-0 overflow-y-auto flex flex-col gap-4 px-1">
            <Section gap={0.25} height="fit">
              {sourcesWithMeta.map(({ source, meta, count }) => (
                <LineItemButton
                  key={source}
                  icon={(props) => (
                    <SourceIcon
                      sourceType={source as ValidSources}
                      iconSize={16}
                      {...props}
                    />
                  )}
                  onClick={() => handleSourceToggle(source)}
                  state={
                    selectedSources.includes(source) ? "selected" : "empty"
                  }
                  title={meta.displayName}
                  selectVariant="select-heavy"
                  sizePreset="main-ui"
                  variant="section"
                  rightChildren={<Text text03>{count}</Text>}
                />
              ))}
            </Section>
          </div>
        )}
      </div>

      {/* ── Bottom row: Pagination ── */}
      {!showEmpty && (
        <Section height="fit">
          <Pagination
            currentPage={currentPage}
            totalPages={totalPages}
            onChange={setCurrentPage}
          />
        </Section>
      )}
    </div>
  );
}


================================================
FILE: web/src/hooks/__tests__/useShowOnboarding.test.tsx
================================================
import React from "react";
import { renderHook, act } from "@testing-library/react";
import "@testing-library/jest-dom";
import { useShowOnboarding } from "@/hooks/useShowOnboarding";
import { OnboardingStep } from "@/interfaces/onboarding";

// Mock underlying dependencies used by the inlined useOnboardingState
jest.mock("@/providers/UserProvider", () => ({
  useUser: () => ({
    user: null,
    refreshUser: jest.fn(),
  }),
}));

// Configurable mock for useProviderStatus
const mockProviderStatus = {
  llmProviders: [] as unknown[],
  isLoadingProviders: false,
  hasProviders: false,
  providerOptions: [],
  refreshProviderInfo: jest.fn(),
};

jest.mock("@/components/chat/ProviderContext", () => ({
  useProviderStatus: () => mockProviderStatus,
}));

jest.mock("@/hooks/useLLMProviders", () => ({
  useLLMProviders: () => ({
    refetch: jest.fn(),
  }),
}));

jest.mock("@/lib/userSettings", () => ({
  updateUserPersonalization: jest.fn(),
}));

function renderUseShowOnboarding(
  overrides: {
    isLoadingProviders?: boolean;
    hasAnyProvider?: boolean;
    isLoadingChatSessions?: boolean;
    chatSessionsCount?: number;
    userId?: string;
  } = {}
) {
  // Configure the provider mock based on overrides
  mockProviderStatus.isLoadingProviders = overrides.isLoadingProviders ?? false;
  mockProviderStatus.hasProviders = overrides.hasAnyProvider ?? false;
  mockProviderStatus.llmProviders = overrides.hasAnyProvider
    ? [{ provider: "openai" }]
    : [];

  const defaultParams = {
    liveAgent: undefined as undefined,
    isLoadingChatSessions: overrides.isLoadingChatSessions ?? false,
    chatSessionsCount: overrides.chatSessionsCount ?? 0,
    userId: "userId" in overrides ? overrides.userId : "user-1",
  };

  return renderHook((props) => useShowOnboarding(props), {
    initialProps: defaultParams,
  });
}

describe("useShowOnboarding", () => {
  beforeEach(() => {
    jest.clearAllMocks();
    localStorage.clear();
    // Reset mock to defaults
    mockProviderStatus.llmProviders = [];
    mockProviderStatus.isLoadingProviders = false;
    mockProviderStatus.hasProviders = false;
    mockProviderStatus.providerOptions = [];
  });

  it("returns showOnboarding=false while providers are loading", () => {
    const { result } = renderUseShowOnboarding({
      isLoadingProviders: true,
    });
    expect(result.current.showOnboarding).toBe(false);
  });

  it("returns showOnboarding=false while chat sessions are loading", () => {
    const { result } = renderUseShowOnboarding({
      isLoadingChatSessions: true,
    });
    expect(result.current.showOnboarding).toBe(false);
  });

  it("returns showOnboarding=false when userId is undefined", () => {
    const { result } = renderUseShowOnboarding({
      userId: undefined,
    });
    expect(result.current.showOnboarding).toBe(false);
  });

  it("returns showOnboarding=true when no providers and no chat sessions", () => {
    const { result } = renderUseShowOnboarding({
      hasAnyProvider: false,
      chatSessionsCount: 0,
    });
    expect(result.current.showOnboarding).toBe(true);
  });

  it("returns showOnboarding=false when providers exist", () => {
    const { result } = renderUseShowOnboarding({
      hasAnyProvider: true,
      chatSessionsCount: 0,
    });
    expect(result.current.showOnboarding).toBe(false);
  });

  it("returns showOnboarding=false when chatSessionsCount > 0", () => {
    const { result } = renderUseShowOnboarding({
      hasAnyProvider: false,
      chatSessionsCount: 5,
    });
    expect(result.current.showOnboarding).toBe(false);
  });

  it("self-corrects showOnboarding to false when providers arrive late", () => {
    const { result, rerender } = renderUseShowOnboarding({
      hasAnyProvider: false,
      chatSessionsCount: 0,
      userId: "user-1",
    });
    expect(result.current.showOnboarding).toBe(true);

    // Simulate providers arriving — update the mock
    mockProviderStatus.hasProviders = true;
    mockProviderStatus.llmProviders = [{ provider: "openai" }];

    rerender({
      liveAgent: undefined,
      isLoadingChatSessions: false,
      chatSessionsCount: 0,
      userId: "user-1",
    });

    // Should correct to false — providers exist, no need for LLM setup flow
    expect(result.current.showOnboarding).toBe(false);
  });

  it("re-evaluates when userId changes", () => {
    const { result, rerender } = renderUseShowOnboarding({
      hasAnyProvider: false,
      chatSessionsCount: 0,
      userId: "user-1",
    });
    expect(result.current.showOnboarding).toBe(true);

    // Change to a new userId with providers available — update the mock
    mockProviderStatus.hasProviders = true;
    mockProviderStatus.llmProviders = [{ provider: "openai" }];

    rerender({
      liveAgent: undefined,
      isLoadingChatSessions: false,
      chatSessionsCount: 0,
      userId: "user-2",
    });

    expect(result.current.showOnboarding).toBe(false);
  });

  it("hideOnboarding sets showOnboarding to false", () => {
    const { result } = renderUseShowOnboarding({
      hasAnyProvider: false,
      chatSessionsCount: 0,
    });
    expect(result.current.showOnboarding).toBe(true);

    act(() => {
      result.current.hideOnboarding();
    });

    expect(result.current.showOnboarding).toBe(false);
  });

  it("finishOnboarding sets showOnboarding to false", () => {
    const { result } = renderUseShowOnboarding({
      hasAnyProvider: false,
      chatSessionsCount: 0,
    });
    expect(result.current.showOnboarding).toBe(true);

    act(() => {
      result.current.finishOnboarding();
    });

    expect(result.current.showOnboarding).toBe(false);
  });

  it("returns onboardingState and actions", () => {
    const { result } = renderUseShowOnboarding();
    expect(result.current.onboardingState.currentStep).toBe(
      OnboardingStep.Welcome
    );
    expect(result.current.onboardingActions).toBeDefined();
    expect(result.current.llmDescriptors).toEqual([]);
  });

  describe("localStorage persistence", () => {
    it("finishOnboarding sets localStorage flag and onboardingDismissed", () => {
      const { result } = renderUseShowOnboarding({
        hasAnyProvider: false,
        chatSessionsCount: 0,
      });
      expect(result.current.showOnboarding).toBe(true);
      expect(result.current.onboardingDismissed).toBe(false);

      act(() => {
        result.current.finishOnboarding();
      });

      expect(result.current.showOnboarding).toBe(false);
      expect(result.current.onboardingDismissed).toBe(true);
      expect(localStorage.getItem("onyx:onboardingCompleted:user-1")).toBe(
        "true"
      );
    });

    it("hideOnboarding sets localStorage flag and onboardingDismissed", () => {
      const { result } = renderUseShowOnboarding({
        hasAnyProvider: false,
        chatSessionsCount: 0,
      });

      act(() => {
        result.current.hideOnboarding();
      });

      expect(result.current.onboardingDismissed).toBe(true);
      expect(localStorage.getItem("onyx:onboardingCompleted:user-1")).toBe(
        "true"
      );
    });

    it("showOnboarding stays false when localStorage flag is set", () => {
      localStorage.setItem("onyx:onboardingCompleted:user-1", "true");

      const { result } = renderUseShowOnboarding({
        hasAnyProvider: false,
        chatSessionsCount: 0,
      });

      expect(result.current.showOnboarding).toBe(false);
      expect(result.current.onboardingDismissed).toBe(true);
    });

    it("onboardingDismissed is false when localStorage flag is not set", () => {
      const { result } = renderUseShowOnboarding();
      expect(result.current.onboardingDismissed).toBe(false);
    });

    it("dismissal for user-1 does not suppress onboarding for user-2", () => {
      const { result: result1 } = renderUseShowOnboarding({
        hasAnyProvider: false,
        chatSessionsCount: 0,
        userId: "1",
      });
      expect(result1.current.showOnboarding).toBe(true);

      act(() => {
        result1.current.finishOnboarding();
      });
      expect(result1.current.onboardingDismissed).toBe(true);
      expect(localStorage.getItem("onyx:onboardingCompleted:1")).toBe("true");

      // user-2 should still see onboarding
      const { result: result2 } = renderUseShowOnboarding({
        hasAnyProvider: false,
        chatSessionsCount: 0,
        userId: "2",
      });
      expect(result2.current.showOnboarding).toBe(true);
      expect(result2.current.onboardingDismissed).toBe(false);
      expect(localStorage.getItem("onyx:onboardingCompleted:2")).toBeNull();
    });
  });
});


================================================
FILE: web/src/hooks/appNavigation.ts
================================================
"use client";

import { SEARCH_PARAM_NAMES } from "@/app/app/services/searchParams";
import { useRouter, useSearchParams } from "next/navigation";
import type { Route } from "next";
import { useCallback } from "react";

interface UseAppRouterProps {
  chatSessionId?: string;
  agentId?: number;
  projectId?: number;
}

export function useAppRouter() {
  const router = useRouter();
  return useCallback(
    ({ chatSessionId, agentId, projectId }: UseAppRouterProps = {}) => {
      const finalParams = [];

      if (chatSessionId)
        finalParams.push(`${SEARCH_PARAM_NAMES.CHAT_ID}=${chatSessionId}`);
      else if (agentId)
        finalParams.push(`${SEARCH_PARAM_NAMES.PERSONA_ID}=${agentId}`);
      else if (projectId)
        finalParams.push(`${SEARCH_PARAM_NAMES.PROJECT_ID}=${projectId}`);

      const finalString = finalParams.join("&");
      const finalUrl = `/app?${finalString}`;

      router.push(finalUrl as Route);
    },
    [router]
  );
}

export function useAppParams() {
  const searchParams = useSearchParams();
  return useCallback((name: string) => searchParams.get(name), [searchParams]);
}


================================================
FILE: web/src/hooks/formHooks.ts
================================================
"use client";

import { useField } from "formik";

/**
 * Custom hook for handling form input changes in Formik forms.
 *
 * This hook automatically sets the field as "touched" when its value changes,
 * enabling immediate validation feedback after the first user interaction.
 *
 * @example
 * ```tsx
 * function MyField({ name }: { name: string }) {
 *   const [field] = useField(name);
 *   const onChange = useFormInputCallback(name);
 *
 *   return (
 *     <input
 *       name={name}
 *       value={field.value}
 *       onChange={onChange}
 *     />
 *   );
 * }
 * ```
 *
 * @example
 * ```tsx
 * // With callback
 * function MySelect({ name, onValueChange }: Props) {
 *   const [field] = useField(name);
 *   const onChange = useFormInputCallback(name, onValueChange);
 *
 *   return (
 *     <Select value={field.value} onValueChange={onChange} />
 *   );
 * }
 * ```
 */
export function useOnChangeEvent<T = any>(
  name: string,
  f?: (event: T) => void
) {
  const [field, , helpers] = useField<T>(name);
  return (event: T) => {
    helpers.setTouched(true);
    f?.(event);
    field.onChange(event);
  };
}

/**
 * Custom hook for handling form value changes in Formik forms.
 *
 * This hook automatically sets the field as "touched" when its value changes,
 * enabling immediate validation feedback after the first user interaction.
 * Use this for components that pass values directly (not events).
 *
 * @example
 * ```tsx
 * function MySelect({ name, onValueChange }: Props) {
 *   const [field] = useField(name);
 *   const onChange = useOnChangeValue(name, onValueChange);
 *
 *   return (
 *     <Select value={field.value} onValueChange={onChange} />
 *   );
 * }
 * ```
 *
 * @example
 * ```tsx
 * function MyDatePicker({ name }: Props) {
 *   const [field] = useField(name);
 *   const onChange = useOnChangeValue(name);
 *
 *   return (
 *     <DatePicker selectedDate={field.value} setSelectedDate={onChange} />
 *   );
 * }
 * ```
 */
export function useOnChangeValue<T = any>(
  name: string,
  f?: (value: T) => void
) {
  const [, , helpers] = useField<T>(name);
  return (value: T) => {
    helpers.setTouched(true);
    f?.(value);
    helpers.setValue(value);
  };
}

/**
 * Custom hook for handling form input blur events in Formik forms.
 *
 * This hook chains the consumer's onBlur callback with Formik's blur handler,
 * ensuring both effects run when the field loses focus.
 *
 * @example
 * ```tsx
 * function MyField({ name, onBlur }: Props) {
 *   const [field] = useField(name);
 *   const handleBlur = useOnBlurEvent(name, onBlur);
 *
 *   return (
 *     <input
 *       name={name}
 *       value={field.value}
 *       onBlur={handleBlur}
 *     />
 *   );
 * }
 * ```
 */
export function useOnBlurEvent<T = any>(name: string, f?: (event: T) => void) {
  const [field] = useField<T>(name);
  return (event: T) => {
    f?.(event);
    field.onBlur(event);
  };
}


================================================
FILE: web/src/hooks/useAdminPersonas.ts
================================================
"use client";

import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { buildApiPath } from "@/lib/urlBuilder";
import { Persona } from "@/app/admin/agents/interfaces";

interface UseAdminPersonasOptions {
  includeDeleted?: boolean;
  getEditable?: boolean;
  includeDefault?: boolean;
  pageNum?: number;
  pageSize?: number;
}

interface PaginatedPersonasResponse {
  items: Persona[];
  total_items: number;
}

export const useAdminPersonas = (options?: UseAdminPersonasOptions) => {
  const {
    includeDeleted = false,
    getEditable = false,
    includeDefault = false,
    pageNum,
    pageSize,
  } = options || {};

  // If pageNum and pageSize are provided, use paginated endpoint.
  const usePagination = pageNum !== undefined && pageSize !== undefined;

  const url = usePagination
    ? buildApiPath("/api/admin/agents", {
        include_deleted: includeDeleted,
        get_editable: getEditable,
        include_default: includeDefault,
        page_num: pageNum,
        page_size: pageSize,
      })
    : buildApiPath("/api/admin/persona", {
        include_deleted: includeDeleted,
        get_editable: getEditable,
      });

  const { data, error, isLoading, mutate } = useSWR<
    Persona[] | PaginatedPersonasResponse
  >(url, errorHandlingFetcher);

  // Handle both paginated and non-paginated responses
  const personas = usePagination
    ? (data as PaginatedPersonasResponse)?.items || []
    : (data as Persona[]) || [];

  const totalItems = usePagination
    ? (data as PaginatedPersonasResponse)?.total_items || 0
    : personas.length;

  return {
    personas,
    totalItems,
    error,
    isLoading,
    refresh: mutate,
  };
};


================================================
FILE: web/src/hooks/useAdminUsers.ts
================================================
"use client";

import { useCallback } from "react";
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { SWR_KEYS } from "@/lib/swr-keys";
import { AccountType, UserStatus } from "@/lib/types";
import type { UserRole, InvitedUserSnapshot } from "@/lib/types";
import type {
  UserRow,
  UserGroupInfo,
} from "@/refresh-pages/admin/UsersPage/interfaces";

// ---------------------------------------------------------------------------
// Backend response shape (GET /manage/users/accepted/all)
// ---------------------------------------------------------------------------

interface FullUserSnapshot {
  id: string;
  email: string;
  role: UserRole;
  account_type: AccountType;
  is_active: boolean;
  password_configured: boolean;
  personal_name: string | null;
  created_at: string;
  updated_at: string;
  groups: UserGroupInfo[];
  is_scim_synced: boolean;
}

// ---------------------------------------------------------------------------
// Converters
// ---------------------------------------------------------------------------

function toUserRow(snapshot: FullUserSnapshot): UserRow {
  return {
    id: snapshot.id,
    email: snapshot.email,
    role: snapshot.role,
    status: snapshot.is_active ? UserStatus.ACTIVE : UserStatus.INACTIVE,
    is_active: snapshot.is_active,
    is_scim_synced: snapshot.is_scim_synced,
    personal_name: snapshot.personal_name,
    created_at: snapshot.created_at,
    updated_at: snapshot.updated_at,
    groups: snapshot.groups,
  };
}

function emailToUserRow(
  email: string,
  status: UserStatus.INVITED | UserStatus.REQUESTED
): UserRow {
  return {
    id: null,
    email,
    role: null,
    status,
    is_active: false,
    is_scim_synced: false,
    personal_name: null,
    created_at: null,
    updated_at: null,
    groups: [],
  };
}

// ---------------------------------------------------------------------------
// Hook
// ---------------------------------------------------------------------------

export default function useAdminUsers() {
  const {
    data: acceptedData,
    isLoading: acceptedLoading,
    error: acceptedError,
    mutate: acceptedMutate,
  } = useSWR<FullUserSnapshot[]>(SWR_KEYS.acceptedUsers, errorHandlingFetcher);

  const {
    data: invitedData,
    isLoading: invitedLoading,
    error: invitedError,
    mutate: invitedMutate,
  } = useSWR<InvitedUserSnapshot[]>(
    SWR_KEYS.invitedUsers,
    errorHandlingFetcher
  );

  const {
    data: requestedData,
    isLoading: requestedLoading,
    error: requestedError,
    mutate: requestedMutate,
  } = useSWR<InvitedUserSnapshot[]>(
    NEXT_PUBLIC_CLOUD_ENABLED ? SWR_KEYS.pendingTenantUsers : null,
    errorHandlingFetcher
  );

  const acceptedRows = (acceptedData ?? []).map(toUserRow);
  const invitedRows = (invitedData ?? []).map((u) =>
    emailToUserRow(u.email, UserStatus.INVITED)
  );
  const requestedRows = (requestedData ?? []).map((u) =>
    emailToUserRow(u.email, UserStatus.REQUESTED)
  );

  const users = [...invitedRows, ...requestedRows, ...acceptedRows];

  const isLoading = acceptedLoading || invitedLoading || requestedLoading;
  const error = acceptedError ?? invitedError ?? requestedError;

  const refresh = useCallback(() => {
    acceptedMutate();
    invitedMutate();
    requestedMutate();
  }, [acceptedMutate, invitedMutate, requestedMutate]);

  return { users, isLoading, error, refresh };
}


================================================
FILE: web/src/hooks/useAgentController.ts
================================================
"use client";

import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { useCallback, useMemo, useState } from "react";
import { ChatSession } from "@/app/app/interfaces";
import { useAgents, usePinnedAgents } from "@/hooks/useAgents";
import { useSearchParams } from "next/navigation";
import { SEARCH_PARAM_NAMES } from "@/app/app/services/searchParams";
import { useSettingsContext } from "@/providers/SettingsProvider";

export default function useAgentController({
  selectedChatSession,
  onAgentSelect,
}: {
  selectedChatSession: ChatSession | null | undefined;
  onAgentSelect?: () => void;
}) {
  const searchParams = useSearchParams();
  const { agents: availableAgents } = useAgents();
  const { pinnedAgents: pinnedAgents } = usePinnedAgents();
  const combinedSettings = useSettingsContext();

  const defaultAgentIdRaw = searchParams?.get(SEARCH_PARAM_NAMES.PERSONA_ID);
  const defaultAgentId = defaultAgentIdRaw
    ? parseInt(defaultAgentIdRaw)
    : undefined;

  const existingChatSessionAgentId = selectedChatSession?.persona_id;
  const [selectedAgent, setSelectedAssistant] = useState<
    MinimalPersonaSnapshot | undefined
  >(
    // NOTE: look through available assistants here, so that even if the user
    // has hidden this agent it still shows the correct assistant when
    // going back to an old chat session
    existingChatSessionAgentId !== undefined
      ? availableAgents.find(
          (assistant) => assistant.id === existingChatSessionAgentId
        )
      : defaultAgentId !== undefined
        ? availableAgents.find((assistant) => assistant.id === defaultAgentId)
        : undefined
  );

  // Current assistant is decided based on this ordering
  // 1. Alternative assistant (assistant selected explicitly by user)
  // 2. Selected assistant (assistant default in this chat session)
  // 3. Unified assistant (ID 0) if available (unless disabled)
  // 4. First pinned assistants (ordered list of pinned assistants)
  // 5. Available assistants (ordered list of available assistants)
  // Relevant test: `live_assistant.spec.ts`
  const liveAgent: MinimalPersonaSnapshot | undefined = useMemo(() => {
    if (selectedAgent) return selectedAgent;

    const disableDefaultAssistant =
      combinedSettings?.settings?.disable_default_assistant ?? false;

    if (disableDefaultAssistant) {
      // Skip unified assistant (ID 0), go straight to pinned/available
      // Filter out ID 0 from both pinned and available assistants
      const nonDefaultPinned = pinnedAgents.filter((a) => a.id !== 0);
      const nonDefaultAvailable = availableAgents.filter((a) => a.id !== 0);

      return (
        nonDefaultPinned[0] || nonDefaultAvailable[0] || availableAgents[0] // Last resort fallback
      );
    }

    // Try to use the unified assistant (ID 0) as default
    const unifiedAgent = availableAgents.find((a) => a.id === 0);
    if (unifiedAgent) return unifiedAgent;

    // Fall back to pinned or available assistants
    return pinnedAgents[0] || availableAgents[0];
  }, [selectedAgent, pinnedAgents, availableAgents, combinedSettings]);

  const setSelectedAgentFromId = useCallback(
    (agentId: number | null | undefined) => {
      // NOTE: also intentionally look through available assistants here, so that
      // even if the user has hidden an agent they can still go back to it
      // for old chats
      let newAssistant =
        agentId !== null
          ? availableAgents.find((assistant) => assistant.id === agentId)
          : undefined;

      // if no assistant was passed in / found, use the default agent
      if (!newAssistant && defaultAgentId !== undefined) {
        newAssistant = availableAgents.find(
          (assistant) => assistant.id === defaultAgentId
        );
      }

      setSelectedAssistant(newAssistant);
      onAgentSelect?.();
    },
    [availableAgents, defaultAgentId, onAgentSelect]
  );

  return {
    // main assistant selection
    selectedAgent,
    setSelectedAgentFromId,

    // final computed assistant
    liveAgent,
  };
}


================================================
FILE: web/src/hooks/useAgentPreferences.ts
================================================
"use client";

import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import {
  UserSpecificAgentPreference,
  UserSpecificAgentPreferences,
} from "@/lib/types";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { useCallback } from "react";

// TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766

// TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766
const buildUpdateAgentPreferenceUrl = (agentId: number) =>
  `/api/user/assistant/${agentId}/preferences`;

/**
 * Hook for managing user-specific agent preferences using SWR.
 * Provides automatic caching, deduplication, and revalidation.
 */
export default function useAgentPreferences() {
  const { data, mutate } = useSWR<UserSpecificAgentPreferences>(
    SWR_KEYS.agentPreferences,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  const setSpecificAgentPreferences = useCallback(
    async (
      agentId: number,
      newAgentPreference: UserSpecificAgentPreference
    ) => {
      // Optimistic update
      mutate(
        {
          ...data,
          [agentId]: newAgentPreference,
        },
        false
      );

      try {
        const response = await fetch(buildUpdateAgentPreferenceUrl(agentId), {
          method: "PATCH",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify(newAgentPreference),
        });

        if (!response.ok) {
          console.error(
            `Failed to update agent preferences: ${response.status}`
          );
        }
      } catch (error) {
        console.error("Error updating agent preferences:", error);
      }

      // Revalidate after update
      mutate();
    },
    [data, mutate]
  );

  return {
    agentPreferences: data ?? null,
    setSpecificAgentPreferences,
  };
}


================================================
FILE: web/src/hooks/useAgents.ts
================================================
"use client";

import useSWR from "swr";
import { useState, useEffect, useMemo, useCallback } from "react";
import { SWR_KEYS } from "@/lib/swr-keys";
import {
  MinimalPersonaSnapshot,
  FullPersona,
} from "@/app/admin/agents/interfaces";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { pinAgents } from "@/lib/agents";
import { useUser } from "@/providers/UserProvider";
import { useSearchParams } from "next/navigation";
import { SEARCH_PARAM_NAMES } from "@/app/app/services/searchParams";
import useChatSessions from "./useChatSessions";

/**
 * Fetches all agents (personas) available to the current user.
 *
 * Returns minimal agent snapshots containing basic information like name, description,
 * tools, and display settings. Use this for listing agents in UI components like
 * sidebars, dropdowns, or agent selection interfaces.
 *
 * For full agent details including user_file_ids, groups, and advanced settings,
 * use `useAgent(personaId)` instead.
 *
 * @returns Object containing:
 *   - agents: Array of MinimalPersonaSnapshot objects (empty array while loading)
 *   - isLoading: Boolean indicating if data is being fetched
 *   - error: Any error that occurred during fetch
 *   - refresh: Function to manually revalidate the data
 *
 * @example
 * const { agents, isLoading } = useAgents();
 * if (isLoading) return <Spinner />;
 * return <AgentList agents={agents} />;
 */
export function useAgents() {
  const { data, error, mutate } = useSWR<MinimalPersonaSnapshot[]>(
    SWR_KEYS.personas,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    agents: data ?? [],
    isLoading: !error && !data,
    error,
    refresh: mutate,
  };
}

/**
 * Fetches a single agent (persona) by ID with full details.
 *
 * Returns complete agent information including user_file_ids, groups, system prompts,
 * and all configuration settings. Use this when you need detailed agent data for
 * editing, configuration, or displaying full agent details.
 *
 * For listing multiple agents with basic information, use `useAgents()` instead.
 *
 * @param agentId - The ID of the agent to fetch, or null to skip fetching
 * @returns Object containing:
 *   - agent: FullPersona object with complete agent details, or null if not loaded/not found
 *   - isLoading: Boolean indicating if data is being fetched (false when personaId is null)
 *   - error: Any error that occurred during fetch
 *   - refresh: Function to manually revalidate the data
 *
 * @example
 * const { agent, isLoading } = useAgent(selectedAgentId);
 * if (isLoading) return <Spinner />;
 * if (!agent) return <NotFound />;
 * return <AgentEditor agent={agent} />;
 */
export function useAgent(agentId: number | null) {
  const { data, error, isLoading, mutate } = useSWR<FullPersona>(
    agentId ? SWR_KEYS.persona(agentId) : null,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    agent: data ?? null,
    isLoading,
    error,
    refresh: mutate,
  };
}

/**
 * Hook that combines useAgents and usePinnedAgents to return full agent objects
 * with local state for optimistic drag-and-drop updates.
 */
export function usePinnedAgents() {
  const { user, refreshUser } = useUser();
  const { agents, isLoading: isLoadingAgents } = useAgents();

  // Local state for optimistic updates during drag-and-drop
  const [localPinnedAgents, setLocalPinnedAgents] = useState<
    MinimalPersonaSnapshot[]
  >([]);

  // Derive pinned agents from server data
  const serverPinnedAgents = useMemo(() => {
    if (agents.length === 0) return [];

    // If pinned_assistants is null/undefined (never set), show featured personas
    // If it's an empty array (user explicitly unpinned all), show nothing
    const pinnedIds = user?.preferences.pinned_assistants;
    if (pinnedIds === null || pinnedIds === undefined) {
      return agents.filter((agent) => agent.is_featured && agent.id !== 0);
    }

    return pinnedIds
      .map((id) => agents.find((agent) => agent.id === id))
      .filter((agent): agent is MinimalPersonaSnapshot => !!agent);
  }, [agents, user?.preferences.pinned_assistants]);

  // Sync server data → local state when server data changes
  // Only sync when agents have loaded (to avoid syncing empty during initial load)
  useEffect(() => {
    if (agents.length > 0) {
      setLocalPinnedAgents(serverPinnedAgents);
    }
  }, [serverPinnedAgents, agents.length]);

  // Toggle pin status - updates local state AND persists to server
  const togglePinnedAgent = useCallback(
    async (agent: MinimalPersonaSnapshot, shouldPin: boolean) => {
      const newPinned = shouldPin
        ? [...localPinnedAgents, agent]
        : localPinnedAgents.filter((a) => a.id !== agent.id);

      // Optimistic update
      setLocalPinnedAgents(newPinned);

      // Persist to server
      await pinAgents(newPinned.map((a) => a.id));
      refreshUser(); // Refresh user to sync pinned_assistants
    },
    [localPinnedAgents, refreshUser]
  );

  // Update pinned agents order (for drag-and-drop) - updates AND persists
  const updatePinnedAgents = useCallback(
    async (newPinnedAgents: MinimalPersonaSnapshot[]) => {
      // Optimistic update
      setLocalPinnedAgents(newPinnedAgents);

      // Persist to server
      await pinAgents(newPinnedAgents.map((a) => a.id));
      refreshUser();
    },
    [refreshUser]
  );

  return {
    pinnedAgents: localPinnedAgents,
    togglePinnedAgent,
    updatePinnedAgents, // Use this instead of setPinnedAgents for drag-and-drop
    isLoading: isLoadingAgents,
  };
}

/**
 * Hook to determine the currently active agent based on:
 * 1. URL param `agentId`
 * 2. Chat session's `persona_id`
 * 3. Falls back to null if neither is present
 */
export function useCurrentAgent(): MinimalPersonaSnapshot | null {
  const { agents } = useAgents();
  const searchParams = useSearchParams();

  const agentIdRaw = searchParams?.get(SEARCH_PARAM_NAMES.PERSONA_ID);
  const { currentChatSession } = useChatSessions();

  const currentAgent = useMemo(() => {
    if (agents.length === 0) return null;

    // Priority: URL param > chat session persona > null
    const agentId = agentIdRaw
      ? parseInt(agentIdRaw)
      : currentChatSession?.persona_id;

    if (!agentId) return null;

    return agents.find((a) => a.id === agentId) ?? null;
  }, [agents, agentIdRaw, currentChatSession?.persona_id]);

  return currentAgent;
}


================================================
FILE: web/src/hooks/useAppFocus.ts
================================================
"use client";

// "AppFocus" is the current part of the main application which is active / focused on.
// Namely, if the URL is pointing towards a "chat", then a `{ type: "chat", id: "..." }` is returned.
//
// This is useful in determining what `SidebarTab` should be active, for example.

import { useMemo } from "react";
import { SEARCH_PARAM_NAMES } from "@/app/app/services/searchParams";
import { usePathname, useSearchParams } from "next/navigation";

export type AppFocusType =
  | { type: "agent" | "project" | "chat"; id: string }
  | "new-session"
  | "more-agents"
  | "user-settings"
  | "shared-chat";

export class AppFocus {
  constructor(public value: AppFocusType) {}

  isAgent(): boolean {
    return typeof this.value === "object" && this.value.type === "agent";
  }

  isProject(): boolean {
    return typeof this.value === "object" && this.value.type === "project";
  }

  isChat(): boolean {
    return typeof this.value === "object" && this.value.type === "chat";
  }

  isSharedChat(): boolean {
    return this.value === "shared-chat";
  }

  isNewSession(): boolean {
    return this.value === "new-session";
  }

  isMoreAgents(): boolean {
    return this.value === "more-agents";
  }

  isUserSettings(): boolean {
    return this.value === "user-settings";
  }

  getId(): string | null {
    return typeof this.value === "object" ? this.value.id : null;
  }

  getType():
    | "agent"
    | "project"
    | "chat"
    | "shared-chat"
    | "new-session"
    | "more-agents"
    | "user-settings" {
    return typeof this.value === "object" ? this.value.type : this.value;
  }
}

export default function useAppFocus(): AppFocus {
  const pathname = usePathname();
  const searchParams = useSearchParams();

  const chatId = searchParams.get(SEARCH_PARAM_NAMES.CHAT_ID);
  const agentId = searchParams.get(SEARCH_PARAM_NAMES.PERSONA_ID);
  const projectId = searchParams.get(SEARCH_PARAM_NAMES.PROJECT_ID);

  // Memoize on the values that determine which AppFocus is constructed.
  // AppFocus is immutable, so same inputs → same instance.
  return useMemo(() => {
    if (pathname.startsWith("/app/shared/")) {
      return new AppFocus("shared-chat");
    }
    if (pathname.startsWith("/app/settings")) {
      return new AppFocus("user-settings");
    }
    if (pathname.startsWith("/app/agents")) {
      return new AppFocus("more-agents");
    }
    if (chatId) return new AppFocus({ type: "chat", id: chatId });
    if (agentId) return new AppFocus({ type: "agent", id: agentId });
    if (projectId) return new AppFocus({ type: "project", id: projectId });
    return new AppFocus("new-session");
  }, [pathname, chatId, agentId, projectId]);
}


================================================
FILE: web/src/hooks/useAuthTypeMetadata.ts
================================================
import useSWR from "swr";
import { AuthType, NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { SWR_KEYS } from "@/lib/swr-keys";

interface AuthTypeAPIResponse {
  auth_type: string;
  requires_verification: boolean;
  anonymous_user_enabled: boolean | null;
  password_min_length: number;
  has_users: boolean;
  oauth_enabled: boolean;
}

export interface AuthTypeMetadata {
  authType: AuthType;
  autoRedirect: boolean;
  requiresVerification: boolean;
  anonymousUserEnabled: boolean | null;
  passwordMinLength: number;
  hasUsers: boolean;
  oauthEnabled: boolean;
}

const DEFAULT_AUTH_TYPE_METADATA: AuthTypeMetadata = {
  authType: NEXT_PUBLIC_CLOUD_ENABLED ? AuthType.CLOUD : AuthType.BASIC,
  autoRedirect: false,
  requiresVerification: false,
  anonymousUserEnabled: null,
  passwordMinLength: 0,
  hasUsers: false,
  oauthEnabled: false,
};

async function fetchAuthTypeMetadata(url: string): Promise<AuthTypeMetadata> {
  const res = await fetch(url);
  if (!res.ok) throw new Error("Failed to fetch auth type metadata");
  const data: AuthTypeAPIResponse = await res.json();
  const authType = NEXT_PUBLIC_CLOUD_ENABLED
    ? AuthType.CLOUD
    : (data.auth_type as AuthType);
  return {
    authType,
    autoRedirect: authType === AuthType.OIDC || authType === AuthType.SAML,
    requiresVerification: data.requires_verification,
    anonymousUserEnabled: data.anonymous_user_enabled,
    passwordMinLength: data.password_min_length,
    hasUsers: data.has_users,
    oauthEnabled: data.oauth_enabled,
  };
}

export function useAuthTypeMetadata(): {
  authTypeMetadata: AuthTypeMetadata;
  isLoading: boolean;
  error: Error | undefined;
} {
  const { data, error, isLoading } = useSWR<AuthTypeMetadata>(
    SWR_KEYS.authType,
    fetchAuthTypeMetadata,
    {
      revalidateOnFocus: false,
      revalidateOnReconnect: false,
      revalidateIfStale: false,
      dedupingInterval: 30_000,
    }
  );

  return {
    authTypeMetadata: data ?? DEFAULT_AUTH_TYPE_METADATA,
    isLoading,
    error,
  };
}


================================================
FILE: web/src/hooks/useAvailableTools.ts
================================================
"use client";

import useSWR from "swr";
import { ToolSnapshot } from "@/lib/tools/interfaces";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";

/**
 * Hook to fetch all available tools from the backend.
 *
 * This hook fetches the complete list of tools that can be used with agents,
 * including built-in tools (SearchTool, ImageGenerationTool, WebSearchTool, PythonTool)
 * and any dynamically configured tools (MCP servers, OpenAPI tools).
 *
 * @example
 * ```tsx
 * const { tools, isLoading, error, refresh } = useAvailableTools();
 *
 * if (isLoading) return <Loading />;
 * if (error) return <Error />;
 *
 * const imageGenTool = tools.find(t => t.in_code_tool_id === "ImageGenerationTool");
 * const isImageGenAvailable = !!imageGenTool;
 * ```
 */
export function useAvailableTools() {
  const { data, error, mutate } = useSWR<ToolSnapshot[]>(
    SWR_KEYS.tools,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    tools: data ?? [],
    isLoading: !error && !data,
    error,
    refresh: mutate,
  };
}


================================================
FILE: web/src/hooks/useBillingInformation.ts
================================================
import useSWR from "swr";

import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";
import {
  BillingInformation,
  SubscriptionStatus,
} from "@/lib/billing/interfaces";

/**
 * Hook to fetch billing information from Stripe.
 *
 * Works for both cloud and self-hosted deployments:
 * - Cloud: fetches from /api/tenants/billing-information
 * - Self-hosted: fetches from /api/admin/billing/billing-information
 */
export function useBillingInformation() {
  const url = NEXT_PUBLIC_CLOUD_ENABLED
    ? SWR_KEYS.billingInformationCloud
    : SWR_KEYS.billingInformationSelfHosted;

  const { data, error, mutate, isLoading } = useSWR<
    BillingInformation | SubscriptionStatus
  >(url, errorHandlingFetcher, {
    revalidateOnFocus: false,
    revalidateOnReconnect: false,
    revalidateIfStale: false,
    dedupingInterval: 30000,
    shouldRetryOnError: false,
    keepPreviousData: true,
  });

  return { data, isLoading, error, refresh: mutate };
}


================================================
FILE: web/src/hooks/useBoundingBox.ts
================================================
"use client";

import { useRef, useEffect, useCallback, useState } from "react";

export function useBoundingBox() {
  const ref = useRef<HTMLDivElement>(null);
  const [inside, setInside] = useState(false);

  const checkMousePosition = useCallback((event: MouseEvent) => {
    if (!ref.current) return;

    const rect = ref.current.getBoundingClientRect();
    const isInside =
      event.clientX >= rect.left &&
      event.clientX <= rect.right &&
      event.clientY >= rect.top &&
      event.clientY <= rect.bottom;

    setInside(isInside);
  }, []);

  useEffect(() => {
    // Set up event listeners for mouse movement
    const handleMouseMove = (event: MouseEvent) => checkMousePosition(event);

    document.addEventListener("mousemove", handleMouseMove);

    return () => {
      document.removeEventListener("mousemove", handleMouseMove);
    };
  }, [checkMousePosition]);

  return { ref, inside };
}


================================================
FILE: web/src/hooks/useBrowserInfo.ts
================================================
"use client";

import { useEffect, useState } from "react";

export interface BrowserInfo {
  isSafari: boolean;
  isFirefox: boolean;
  isChrome: boolean;
  isChromium: boolean;
  isEdge: boolean;
  isOpera: boolean;
  isIOS: boolean;
  isMac: boolean;
  isWindows: boolean;
}

const DEFAULT_BROWSER_INFO: BrowserInfo = {
  isSafari: false,
  isFirefox: false,
  isChrome: false,
  isChromium: false,
  isEdge: false,
  isOpera: false,
  isIOS: false,
  isMac: false,
  isWindows: false,
};

export default function useBrowserInfo(): BrowserInfo {
  const [browserInfo, setBrowserInfo] =
    useState<BrowserInfo>(DEFAULT_BROWSER_INFO);
  useEffect(() => {
    const userAgent = window.navigator.userAgent;

    const isEdge = /Edg/i.test(userAgent);
    const isOpera = /OPR|Opera/i.test(userAgent);
    const isFirefox = /Firefox|FxiOS/i.test(userAgent);
    const isChrome = /Chrome|CriOS/i.test(userAgent) && !isEdge && !isOpera;
    const isChromium = /Chromium/i.test(userAgent) || isChrome;
    const isSafari =
      /Safari/i.test(userAgent) &&
      !isChromium &&
      !isEdge &&
      !isOpera &&
      !isFirefox;
    const isIOS = /iPhone|iPad|iPod/i.test(userAgent);
    const isMac = /Macintosh|Mac OS X/i.test(userAgent);
    const isWindows = /Win/i.test(userAgent);

    setBrowserInfo({
      isSafari,
      isFirefox,
      isChrome,
      isChromium,
      isEdge,
      isOpera,
      isIOS,
      isMac,
      isWindows,
    });
  }, []);

  return browserInfo;
}


================================================
FILE: web/src/hooks/useCCPairs.ts
================================================
"use client";

import useSWR from "swr";
import { CCPairBasicInfo } from "@/lib/types";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";

/**
 * Hook for fetching connector-credential pairs (CC Pairs).
 *
 * Retrieves all connector-credential pairs configured in the system. CC Pairs
 * represent connections between data sources (connectors) and their authentication
 * credentials, used for indexing content from various sources like Confluence,
 * Slack, Google Drive, etc. Uses SWR for caching and automatic revalidation.
 *
 * @returns Object containing:
 *   - ccPairs: Array of CCPairBasicInfo objects
 *   - isLoading: Boolean indicating if data is being fetched
 *   - error: Error object if the fetch failed
 *   - refetch: Function to manually reload CC pairs
 *
 * @example
 * ```tsx
 * // Display list of connected data sources
 * const ConnectorList = () => {
 *   const { ccPairs, isLoading, error } = useCCPairs();
 *
 *   if (isLoading) return <Spinner />;
 *   if (error) return <Error message="Failed to load connectors" />;
 *
 *   return (
 *     <ul>
 *       {ccPairs.map(pair => (
 *         <li key={pair.id}>
 *           {pair.name} - {pair.source}
 *         </li>
 *       ))}
 *     </ul>
 *   );
 * };
 * ```
 *
 * @example
 * ```tsx
 * // Filter connectors by source type
 * const SlackConnectors = () => {
 *   const { ccPairs } = useCCPairs();
 *
 *   const slackPairs = ccPairs.filter(pair => pair.source === 'slack');
 *
 *   return <ConnectorGrid connectors={slackPairs} />;
 * };
 * ```
 *
 * @example
 * ```tsx
 * // Refresh list after connecting a new source
 * const ConnectSourceButton = () => {
 *   const { refetch } = useCCPairs();
 *
 *   const handleConnect = async () => {
 *     await connectNewSource();
 *     refetch(); // Refresh the list
 *   };
 *
 *   return <Button onClick={handleConnect}>Connect Source</Button>;
 * };
 * ```
 */
export default function useCCPairs(enabled: boolean = true) {
  const { data, error, isLoading, mutate } = useSWR<CCPairBasicInfo[]>(
    enabled ? SWR_KEYS.connectorStatus : null,
    errorHandlingFetcher
  );

  return {
    ccPairs: data ?? [],
    isLoading: enabled && isLoading,
    error,
    refetch: mutate,
  };
}


================================================
FILE: web/src/hooks/useChatController.ts
================================================
"use client";

import {
  buildChatUrl,
  getAvailableContextTokens,
  nameChatSession,
  updateLlmOverrideForChatSession,
} from "@/app/app/services/lib";
import { getMaxSelectedDocumentTokens } from "@/app/app/projects/projectsService";
import { DEFAULT_CONTEXT_TOKENS } from "@/lib/constants";
import { StreamStopInfo } from "@/lib/search/interfaces";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import type { Route } from "next";
import {
  getLastSuccessfulMessageId,
  getLatestMessageChain,
  MessageTreeState,
  upsertMessages,
  SYSTEM_NODE_ID,
  buildImmediateMessages,
  buildEmptyMessage,
} from "@/app/app/services/messageTree";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { SEARCH_PARAM_NAMES } from "@/app/app/services/searchParams";
import { SEARCH_TOOL_ID } from "@/app/app/components/tools/constants";
import { OnyxDocument } from "@/lib/search/interfaces";
import { FilterManager, LlmDescriptor, LlmManager } from "@/lib/hooks";
import {
  BackendMessage,
  ChatFileType,
  CitationMap,
  FileChatDisplay,
  FileDescriptor,
  Message,
  MessageResponseIDInfo,
  RegenerationState,
  RetrievalType,
  StreamingError,
  ToolCallMetadata,
  UserKnowledgeFilePacket,
} from "@/app/app/interfaces";
import { StreamStopReason } from "@/lib/search/interfaces";
import { createChatSession } from "@/app/app/services/lib";
import {
  getFinalLLM,
  modelSupportsImageInput,
  structureValue,
} from "@/lib/llmConfig/utils";
import {
  CurrentMessageFIFO,
  updateCurrentMessageFIFO,
} from "@/app/app/services/currentMessageFIFO";
import { buildFilters } from "@/lib/search/utils";
import { toast } from "@/hooks/useToast";
import {
  ReadonlyURLSearchParams,
  usePathname,
  useRouter,
  useSearchParams,
} from "next/navigation";
import { track, AnalyticsEvent } from "@/lib/analytics";
import { getExtensionContext } from "@/lib/extension/utils";
import useChatSessions from "@/hooks/useChatSessions";
import { usePinnedAgents } from "@/hooks/useAgents";
import {
  useChatSessionStore,
  useCurrentMessageTree,
  useCurrentChatState,
  useCurrentMessageHistory,
} from "@/app/app/stores/useChatSessionStore";
import { Packet, MessageStart } from "@/app/app/services/streamingModels";
import useAgentPreferences from "@/hooks/useAgentPreferences";
import { useForcedTools } from "@/lib/hooks/useForcedTools";
import { ProjectFile, useProjectsContext } from "@/providers/ProjectsContext";
import { useAppParams } from "@/hooks/appNavigation";
import { projectFilesToFileDescriptors } from "@/app/app/services/fileUtils";

const SYSTEM_MESSAGE_ID = -3;

export interface OnSubmitProps {
  message: string;
  //from chat input bar
  currentMessageFiles: ProjectFile[];
  // from the chat bar???

  deepResearch: boolean;

  // optional params
  messageIdToResend?: number;
  queryOverride?: string;
  forceSearch?: boolean;
  isSeededChat?: boolean;
  modelOverride?: LlmDescriptor;
  regenerationRequest?: RegenerationRequest | null;
  // Additional context injected into the LLM call but not stored/shown in chat.
  additionalContext?: string;
}

interface RegenerationRequest {
  messageId: number;
  parentMessage: Message;
  forceSearch?: boolean;
}

interface UseChatControllerProps {
  filterManager: FilterManager;
  llmManager: LlmManager;
  liveAgent: MinimalPersonaSnapshot | undefined;
  availableAgents: MinimalPersonaSnapshot[];
  existingChatSessionId: string | null;
  selectedDocuments: OnyxDocument[];
  searchParams: ReadonlyURLSearchParams;
  resetInputBar: () => void;
  setSelectedAgentFromId: (agentId: number | null) => void;
}

async function stopChatSession(chatSessionId: string): Promise<void> {
  const response = await fetch(`/api/chat/stop-chat-session/${chatSessionId}`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
  });

  if (!response.ok) {
    throw new Error(`Failed to stop chat session: ${response.statusText}`);
  }
}

export default function useChatController({
  filterManager,
  llmManager,
  availableAgents,
  liveAgent,
  existingChatSessionId,
  selectedDocuments,
  resetInputBar,
  setSelectedAgentFromId,
}: UseChatControllerProps) {
  const pathname = usePathname();
  const router = useRouter();
  const searchParams = useSearchParams();
  const params = useAppParams();
  const { refreshChatSessions, addPendingChatSession } = useChatSessions();
  const { pinnedAgents, togglePinnedAgent } = usePinnedAgents();
  const { agentPreferences } = useAgentPreferences();
  const { forcedToolIds } = useForcedTools();
  const { fetchProjects, setCurrentMessageFiles, beginUpload } =
    useProjectsContext();

  // Use selectors to access only the specific fields we need
  const currentSessionId = useChatSessionStore(
    (state) => state.currentSessionId
  );
  const sessions = useChatSessionStore((state) => state.sessions);

  // Store actions - these don't cause re-renders
  const updateChatStateAction = useChatSessionStore(
    (state) => state.updateChatState
  );
  const updateRegenerationStateAction = useChatSessionStore(
    (state) => state.updateRegenerationState
  );
  const updateCanContinueAction = useChatSessionStore(
    (state) => state.updateCanContinue
  );
  const createSession = useChatSessionStore((state) => state.createSession);
  const setCurrentSession = useChatSessionStore(
    (state) => state.setCurrentSession
  );
  const updateSessionMessageTree = useChatSessionStore(
    (state) => state.updateSessionMessageTree
  );
  const updateSubmittedMessage = useChatSessionStore(
    (state) => state.updateSubmittedMessage
  );
  const updateSelectedNodeForDocDisplay = useChatSessionStore(
    (state) => state.updateSelectedNodeForDocDisplay
  );
  const setUncaughtError = useChatSessionStore(
    (state) => state.setUncaughtError
  );
  const setLoadingError = useChatSessionStore((state) => state.setLoadingError);
  const setAbortController = useChatSessionStore(
    (state) => state.setAbortController
  );
  const setIsReady = useChatSessionStore((state) => state.setIsReady);
  const setStreamingStartTime = useChatSessionStore(
    (state) => state.setStreamingStartTime
  );

  // Use custom hooks for accessing store data
  const currentMessageTree = useCurrentMessageTree();
  const currentMessageHistory = useCurrentMessageHistory();
  const currentChatState = useCurrentChatState();

  const navigatingAway = useRef(false);

  // Sync store state changes
  useEffect(() => {
    if (currentSessionId) {
      // Keep track of current session ID for internal use
    }
  }, [currentSessionId]);

  const getCurrentSessionId = (): string => {
    return currentSessionId || existingChatSessionId || "";
  };

  const updateRegenerationState = (
    newState: RegenerationState | null,
    sessionId?: string | null
  ) => {
    const targetSessionId = sessionId || getCurrentSessionId();
    if (targetSessionId) {
      updateRegenerationStateAction(targetSessionId, newState);
    }
  };

  const resetRegenerationState = (sessionId?: string | null) => {
    updateRegenerationState(null, sessionId);
  };

  const updateCanContinue = (newState: boolean, sessionId?: string | null) => {
    const targetSessionId = sessionId || getCurrentSessionId();
    if (targetSessionId) {
      updateCanContinueAction(targetSessionId, newState);
    }
  };

  const updateStatesWithNewSessionId = (newSessionId: string) => {
    // Create new session in store if it doesn't exist
    const existingSession = sessions.get(newSessionId);
    if (!existingSession) {
      createSession(newSessionId);
    }

    // Set as current session
    setCurrentSession(newSessionId);
  };

  const handleNewSessionNavigation = (chatSessionId: string) => {
    // Build URL with skip-reload parameter
    const newUrl = buildChatUrl(
      searchParams,
      chatSessionId,
      null,
      false,
      true // skipReload
    );

    // Navigate immediately if still on chat page
    // For NRF pages (/chat/nrf, /chat/nrf/side-panel), don't navigate immediately
    // Let the streaming complete inline, then the user can continue chatting there
    const isOnChatPage = pathname === "/app";

    if (isOnChatPage && !navigatingAway.current) {
      router.push(newUrl as Route, { scroll: false });
    }

    // Refresh sidebar - the chat was already optimistically added via addPendingChatSession
    // so it will show as "New Chat". This refresh ensures we get the latest server state
    // and will be called again after naming completes.
    refreshChatSessions();
    fetchProjects();
  };

  const handleNewSessionNaming = async (chatSessionId: string) => {
    // Wait 200ms before naming (gives backend time to process)
    // There is some delay here since we might get a "finished" response from the backend
    // before the ChatSession is written to the database.
    // TODO: remove this delay once we have a way to know when the ChatSession
    // is written to the database.
    await new Promise((resolve) => setTimeout(resolve, 200));

    try {
      // Name chat based on AI response
      const response = await nameChatSession(chatSessionId);

      if (!response.ok) {
        console.error("Failed to name chat session, status:", response.status);
        // Still refresh to show the unnamed chat in sidebar
        refreshChatSessions();
        fetchProjects();
        return;
      }
    } catch (error) {
      console.error("Failed to name chat session:", error);
    } finally {
      // Refresh sidebar to show new name
      await refreshChatSessions();
      await fetchProjects();
    }
  };

  const upsertToCompleteMessageTree = ({
    messages,
    chatSessionId,
    completeMessageTreeOverride,
    makeLatestChildMessage = false,
  }: {
    messages: Message[];
    chatSessionId: string;
    // if calling this function repeatedly with short delay, stay may not update in time
    // and result in weird behavipr
    completeMessageTreeOverride?: MessageTreeState | null;
    oldIds?: number[] | null;
    makeLatestChildMessage?: boolean;
  }) => {
    let currentMessageTreeToUse =
      completeMessageTreeOverride ||
      (chatSessionId !== undefined &&
        sessions.get(chatSessionId)?.messageTree) ||
      currentMessageTree ||
      new Map<number, Message>();

    const newCompleteMessageTree = upsertMessages(
      currentMessageTreeToUse,
      messages,
      makeLatestChildMessage
    );

    updateSessionMessageTree(chatSessionId, newCompleteMessageTree);

    return newCompleteMessageTree;
  };

  const stopGenerating = useCallback(async () => {
    const currentSession = getCurrentSessionId();
    const lastMessage = currentMessageHistory[currentMessageHistory.length - 1];

    // Call the backend stop endpoint to set the Redis fence
    // This signals the backend to stop processing as soon as possible
    // The backend will emit a STOP packet when it detects the fence
    try {
      await stopChatSession(currentSession);
    } catch (error) {
      console.error("Failed to stop chat session:", error);
      // Continue with UI cleanup even if backend call fails
    }

    // Clean up incomplete tool calls for immediate UI feedback
    if (
      lastMessage &&
      lastMessage.type === "assistant" &&
      lastMessage.toolCall &&
      lastMessage.toolCall.tool_result === undefined
    ) {
      const newMessageTree = new Map(currentMessageTree);
      const updatedMessage = { ...lastMessage, toolCall: null };
      newMessageTree.set(lastMessage.nodeId, updatedMessage);
      updateSessionMessageTree(currentSession, newMessageTree);
    }

    // Update chat state to input immediately for good UX
    // The stream will close naturally when the backend sends the STOP packet
    setStreamingStartTime(currentSession, null);
    updateChatStateAction(currentSession, "input");
  }, [currentMessageHistory, currentMessageTree]);

  const onSubmit = useCallback(
    async ({
      message,
      currentMessageFiles,
      deepResearch,
      messageIdToResend,
      queryOverride,
      forceSearch,
      isSeededChat,
      modelOverride,
      regenerationRequest,
      additionalContext,
    }: OnSubmitProps) => {
      const projectId = params(SEARCH_PARAM_NAMES.PROJECT_ID);
      {
        const params = new URLSearchParams(searchParams?.toString() || "");
        if (params.has(SEARCH_PARAM_NAMES.PROJECT_ID)) {
          params.delete(SEARCH_PARAM_NAMES.PROJECT_ID);
          const newUrl = params.toString()
            ? `${pathname}?${params.toString()}`
            : pathname;
          router.replace(newUrl as Route, { scroll: false });
        }
      }

      updateSubmittedMessage(getCurrentSessionId(), message);

      navigatingAway.current = false;
      let frozenSessionId = getCurrentSessionId();
      updateCanContinue(false, frozenSessionId);
      setUncaughtError(frozenSessionId, null);
      setLoadingError(frozenSessionId, null);

      // Check if the last message was an error and remove it before proceeding with a new message
      // Ensure this isn't a regeneration or resend, as those operations should preserve the history leading up to the point of regeneration/resend.
      let currentMessageTreeLocal =
        currentMessageTree || new Map<number, Message>();
      let currentHistory = getLatestMessageChain(currentMessageTreeLocal);
      let lastMessage = currentHistory[currentHistory.length - 1];

      if (
        lastMessage &&
        lastMessage.type === "error" &&
        !messageIdToResend &&
        !regenerationRequest
      ) {
        const newMessageTree = new Map(currentMessageTreeLocal);
        const parentNodeId = lastMessage.parentNodeId;

        // Remove the error message itself
        newMessageTree.delete(lastMessage.nodeId);

        // Remove the parent message + update the parent of the parent to no longer
        // link to the parent
        if (parentNodeId !== null && parentNodeId !== undefined) {
          const parentOfError = newMessageTree.get(parentNodeId);
          if (parentOfError) {
            const grandparentNodeId = parentOfError.parentNodeId;
            if (grandparentNodeId !== null && grandparentNodeId !== undefined) {
              const grandparent = newMessageTree.get(grandparentNodeId);
              if (grandparent) {
                // Update grandparent to no longer link to parent
                const updatedGrandparent = {
                  ...grandparent,
                  childrenNodeIds: (grandparent.childrenNodeIds || []).filter(
                    (id: number) => id !== parentNodeId
                  ),
                  latestChildNodeId:
                    grandparent.latestChildNodeId === parentNodeId
                      ? null
                      : grandparent.latestChildNodeId,
                };
                newMessageTree.set(grandparentNodeId, updatedGrandparent);
              }
            }
            // Remove the parent message
            newMessageTree.delete(parentNodeId);
          }
        }
        // Update the state immediately so subsequent logic uses the cleaned map
        updateSessionMessageTree(frozenSessionId, newMessageTree);
        console.log(
          "Removed previous error message ID:",
          lastMessage.messageId
        );

        // update state for the new world (with the error message removed)
        currentHistory = getLatestMessageChain(newMessageTree);
        currentMessageTreeLocal = newMessageTree;
        lastMessage = currentHistory[currentHistory.length - 1];
      }

      if (currentChatState != "input") {
        if (currentChatState == "uploading") {
          toast.error("Please wait for the content to upload");
        } else {
          toast.error("Please wait for the response to complete");
        }

        return;
      }

      // Auto-pin the agent to sidebar when sending a message if not already pinned
      if (liveAgent) {
        const isAlreadyPinned = pinnedAgents.some(
          (agent) => agent.id === liveAgent.id
        );
        if (!isAlreadyPinned) {
          togglePinnedAgent(liveAgent, true).catch((err) => {
            console.error("Failed to auto-pin agent:", err);
          });
        }
      }

      let currChatSessionId: string;
      // Check both the prop and the store's currentSessionId to determine if this is a new session
      // For pages like NRF where existingChatSessionId is always null, we need to check if
      // we already have a session from a previous message
      const isNewSession = existingChatSessionId === null && !currentSessionId;

      const searchParamBasedChatSessionName =
        searchParams?.get(SEARCH_PARAM_NAMES.TITLE) || null;
      // Auto-name only once, after the first agent response, and only when the chat isn't
      // already explicitly named (e.g. `?title=...`).
      const hadAnyUserMessagesBeforeSubmit = currentHistory.some(
        (m) => m.type === "user"
      );
      if (isNewSession) {
        currChatSessionId = await createChatSession(
          liveAgent?.id || 0,
          searchParamBasedChatSessionName,
          projectId ? parseInt(projectId) : null
        );

        // Optimistically add the new chat session to the sidebar cache
        // This ensures "New Chat" appears immediately, even before any messages are saved
        addPendingChatSession({
          chatSessionId: currChatSessionId,
          personaId: liveAgent?.id || 0,
          projectId: projectId ? parseInt(projectId) : null,
        });
      } else {
        // Use the existing session ID from props or from the store
        currChatSessionId =
          existingChatSessionId || (currentSessionId as string);
      }
      frozenSessionId = currChatSessionId;
      // update the selected model for the chat session if one is specified so that
      // it persists across page reloads. Do not `await` here so that the message
      // request can continue and this will just happen in the background.
      // NOTE: only set the model override for the chat session once we send a
      // message with it. If the user switches models and then starts a new
      // chat session, it is unexpected for that model to be used when they
      // return to this session the next day.
      let finalLLM = modelOverride || llmManager.currentLlm;
      updateLlmOverrideForChatSession(
        currChatSessionId,
        structureValue(
          finalLLM.name || "",
          finalLLM.provider || "",
          finalLLM.modelName || ""
        )
      );

      // mark the session as the current session
      updateStatesWithNewSessionId(currChatSessionId);

      // Navigate immediately for new sessions (before streaming starts)
      if (isNewSession) {
        handleNewSessionNavigation(currChatSessionId);
      }

      const shouldAutoNameChatSessionAfterResponse =
        !searchParamBasedChatSessionName &&
        !hadAnyUserMessagesBeforeSubmit &&
        !sessions.get(currChatSessionId)?.description;

      // set the ability to cancel the request
      const controller = new AbortController();
      setAbortController(currChatSessionId, controller);

      const messageToResend = currentHistory.find(
        (message) => message.messageId === messageIdToResend
      );
      if (messageIdToResend && regenerationRequest) {
        updateRegenerationState(
          { regenerating: true, finalMessageIndex: messageIdToResend + 1 },
          frozenSessionId
        );
      }
      const messageToResendParent =
        messageToResend?.parentNodeId !== null &&
        messageToResend?.parentNodeId !== undefined
          ? currentMessageTreeLocal.get(messageToResend.parentNodeId)
          : null;
      const messageToResendIndex = messageToResend
        ? currentHistory.indexOf(messageToResend)
        : null;

      if (!messageToResend && messageIdToResend !== undefined) {
        toast.error(
          "Failed to re-send message - please refresh the page and try again."
        );
        resetRegenerationState(frozenSessionId);
        updateChatStateAction(frozenSessionId, "input");
        return;
      }

      // When editing (messageIdToResend exists but no regenerationRequest), use the new message
      // When regenerating (regenerationRequest exists), use the original message
      let currMessage = regenerationRequest
        ? messageToResend?.message || message
        : message;

      // When editing a message that had files attached, preserve the original files.
      // Skip for regeneration — the regeneration path reuses the existing user node
      // (and its files), so merging here would send duplicates.
      const effectiveFileDescriptors = [
        ...projectFilesToFileDescriptors(currentMessageFiles),
        ...(!regenerationRequest ? messageToResend?.files ?? [] : []),
      ];

      updateChatStateAction(frozenSessionId, "loading");

      // find the parent
      const currMessageHistory =
        messageToResendIndex !== null
          ? currentHistory.slice(0, messageToResendIndex)
          : currentHistory;

      let parentMessage =
        messageToResendParent ||
        (currMessageHistory.length > 0
          ? currMessageHistory[currMessageHistory.length - 1]
          : null) ||
        (currentMessageTreeLocal.size === 1
          ? Array.from(currentMessageTreeLocal.values())[0]
          : null);

      // Add user message immediately to the message tree so that the chat
      // immediately reflects the user message
      let initialUserNode: Message;
      let initialAgentNode: Message;

      if (regenerationRequest) {
        // For regeneration: keep the existing user message, only create new agent
        initialUserNode = regenerationRequest.parentMessage;
        initialAgentNode = buildEmptyMessage({
          messageType: "assistant",
          parentNodeId: initialUserNode.nodeId,
          nodeIdOffset: 1,
        });
      } else {
        // For new messages or editing: create/update user message and assistant
        const parentNodeIdForMessage = messageToResend
          ? messageToResend.parentNodeId || SYSTEM_NODE_ID
          : parentMessage?.nodeId || SYSTEM_NODE_ID;
        const result = buildImmediateMessages(
          parentNodeIdForMessage,
          currMessage,
          effectiveFileDescriptors,
          messageToResend
        );
        initialUserNode = result.initialUserNode;
        initialAgentNode = result.initialAgentNode;
      }

      // make messages appear + clear input bar
      const messagesToUpsert = regenerationRequest
        ? [initialAgentNode] // Only upsert the new agent for regeneration
        : [initialUserNode, initialAgentNode]; // Upsert both for normal/edit flow
      currentMessageTreeLocal = upsertToCompleteMessageTree({
        messages: messagesToUpsert,
        completeMessageTreeOverride: currentMessageTreeLocal,
        chatSessionId: frozenSessionId,
      });
      resetInputBar();

      let answer = "";

      const stopReason: StreamStopReason | null = null;
      let query: string | null = null;
      let retrievalType: RetrievalType =
        selectedDocuments.length > 0
          ? RetrievalType.SelectedDocs
          : RetrievalType.None;
      let documents: OnyxDocument[] = selectedDocuments;
      let citations: CitationMap | null = null;
      let aiMessageImages: FileDescriptor[] | null = null;
      let error: string | null = null;
      let stackTrace: string | null = null;
      let errorCode: string | null = null;
      let isRetryable: boolean = true;
      let errorDetails: Record<string, any> | null = null;

      let finalMessage: BackendMessage | null = null;
      let toolCall: ToolCallMetadata | null = null;
      let files = effectiveFileDescriptors;
      let packets: Packet[] = [];
      let packetsVersion = 0;

      let newUserMessageId: number | null = null;
      let newAgentMessageId: number | null = null;

      try {
        const lastSuccessfulMessageId = getLastSuccessfulMessageId(
          currentMessageTreeLocal
        );
        const disabledToolIds = liveAgent
          ? agentPreferences?.[liveAgent?.id]?.disabled_tool_ids
          : undefined;

        // Find the search tool's numeric ID for forceSearch
        const searchToolNumericId = liveAgent?.tools.find(
          (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID
        )?.id;

        // Determine the forced tool ID:
        // 1. If forceSearch is true, use the search tool's numeric ID
        // 2. Otherwise, use the first forced tool ID from the forcedToolIds array
        const effectiveForcedToolId = forceSearch
          ? searchToolNumericId ?? null
          : forcedToolIds.length > 0
            ? forcedToolIds[0]
            : null;

        // Determine origin for telemetry tracking (also used for frontend PostHog tracking below)
        const { isExtension, context: extensionContext } =
          getExtensionContext();
        const messageOrigin = isExtension ? "chrome_extension" : "webapp";

        const stack = new CurrentMessageFIFO();
        updateCurrentMessageFIFO(stack, {
          signal: controller.signal,
          message: currMessage,
          fileDescriptors: effectiveFileDescriptors,
          parentMessageId: (() => {
            const parentId =
              regenerationRequest?.parentMessage.messageId ||
              messageToResendParent?.messageId ||
              lastSuccessfulMessageId;
            // Don't send SYSTEM_MESSAGE_ID (-3) as parent, use null instead
            // The backend expects null for "the first message in the chat"
            return parentId === SYSTEM_MESSAGE_ID ? null : parentId;
          })(),
          chatSessionId: currChatSessionId,
          filters: buildFilters(
            filterManager.selectedSources,
            filterManager.selectedDocumentSets,
            filterManager.timeRange,
            filterManager.selectedTags
          ),
          modelProvider:
            modelOverride?.name || llmManager.currentLlm.name || undefined,
          modelVersion:
            modelOverride?.modelName ||
            llmManager.currentLlm.modelName ||
            searchParams?.get(SEARCH_PARAM_NAMES.MODEL_VERSION) ||
            undefined,
          temperature: llmManager.temperature || undefined,
          deepResearch,
          enabledToolIds:
            disabledToolIds && liveAgent
              ? liveAgent.tools
                  .filter((tool) => !disabledToolIds?.includes(tool.id))
                  .map((tool) => tool.id)
              : undefined,
          forcedToolId: effectiveForcedToolId,
          origin: messageOrigin,
          additionalContext,
        });

        const delay = (ms: number) => {
          return new Promise((resolve) => setTimeout(resolve, ms));
        };

        await delay(50);
        while (!stack.isComplete || !stack.isEmpty()) {
          if (stack.isEmpty()) {
            await delay(0.5);
          }

          if (!stack.isEmpty() && !controller.signal.aborted) {
            const packet = stack.nextPacket();
            if (!packet) {
              continue;
            }

            // We've processed initial packets and are starting to stream content.
            // Transition from 'loading' to 'streaming'.
            updateChatStateAction(frozenSessionId, "streaming");
            // Only set start time once (guard prevents reset on each packet)
            // Use getState() to avoid stale closure - sessions captured at render time becomes stale in async loop
            if (
              !useChatSessionStore.getState().sessions.get(frozenSessionId)
                ?.streamingStartTime
            ) {
              setStreamingStartTime(frozenSessionId, Date.now());
            }

            if ((packet as MessageResponseIDInfo).user_message_id) {
              newUserMessageId = (packet as MessageResponseIDInfo)
                .user_message_id;

              // Track extension queries in PostHog (reuses isExtension/extensionContext from above)
              if (isExtension) {
                track(AnalyticsEvent.EXTENSION_CHAT_QUERY, {
                  extension_context: extensionContext,
                  assistant_id: liveAgent?.id,
                  has_files: effectiveFileDescriptors.length > 0,
                  deep_research: deepResearch,
                });
              }
            }

            if (
              (packet as MessageResponseIDInfo).reserved_assistant_message_id
            ) {
              newAgentMessageId = (packet as MessageResponseIDInfo)
                .reserved_assistant_message_id;
            }

            if (Object.hasOwn(packet, "user_files")) {
              const userFiles = (packet as UserKnowledgeFilePacket).user_files;
              // Ensure files are unique by id
              const newUserFiles = userFiles.filter(
                (newFile) =>
                  !files.some((existingFile) => existingFile.id === newFile.id)
              );
              files = files.concat(newUserFiles);
            }

            if (Object.hasOwn(packet, "file_ids")) {
              aiMessageImages = (packet as FileChatDisplay).file_ids.map(
                (fileId) => {
                  return {
                    id: fileId,
                    type: ChatFileType.IMAGE,
                  };
                }
              );
            } else if (
              Object.hasOwn(packet, "error") &&
              (packet as any).error != null
            ) {
              const streamingError = packet as StreamingError;
              error = streamingError.error;
              stackTrace = streamingError.stack_trace || null;
              errorCode = streamingError.error_code || null;
              isRetryable = streamingError.is_retryable ?? true;
              errorDetails = streamingError.details || null;

              setUncaughtError(frozenSessionId, streamingError.error);
              updateChatStateAction(frozenSessionId, "input");
              updateSubmittedMessage(getCurrentSessionId(), "");

              throw new Error(streamingError.error);
            } else if (Object.hasOwn(packet, "message_id")) {
              finalMessage = packet as BackendMessage;
            } else if (Object.hasOwn(packet, "stop_reason")) {
              const stop_reason = (packet as StreamStopInfo).stop_reason;
              if (stop_reason === StreamStopReason.CONTEXT_LENGTH) {
                updateCanContinue(true, frozenSessionId);
              }
            } else if (Object.hasOwn(packet, "obj")) {
              packets.push(packet as Packet);
              packetsVersion++;

              // Check if the packet contains document information
              const packetObj = (packet as Packet).obj;

              if (packetObj.type === "citation_info") {
                // Individual citation packet from backend streaming
                const citationInfo = packetObj as {
                  type: "citation_info";
                  citation_number: number;
                  document_id: string;
                };
                // Incrementally build citations map
                citations = {
                  ...(citations || {}),
                  [citationInfo.citation_number]: citationInfo.document_id,
                };
              } else if (packetObj.type === "message_start") {
                const messageStart = packetObj as MessageStart;
                if (messageStart.final_documents) {
                  documents = messageStart.final_documents;
                  updateSelectedNodeForDocDisplay(
                    frozenSessionId,
                    initialAgentNode.nodeId
                  );
                }
              }
            } else {
              console.warn("Unknown packet:", JSON.stringify(packet));
            }

            // on initial message send, we insert a dummy system message
            // set this as the parent here if no parent is set
            parentMessage =
              parentMessage || currentMessageTreeLocal?.get(SYSTEM_NODE_ID)!;

            currentMessageTreeLocal = upsertToCompleteMessageTree({
              messages: [
                {
                  ...initialUserNode,
                  messageId: newUserMessageId ?? undefined,
                  files: files,
                },
                {
                  ...initialAgentNode,
                  messageId: newAgentMessageId ?? undefined,
                  message: error || answer,
                  type: error ? "error" : "assistant",
                  retrievalType,
                  query: finalMessage?.rephrased_query || query,
                  documents: documents,
                  citations: finalMessage?.citations || citations || {},
                  files: finalMessage?.files || aiMessageImages || [],
                  toolCall: finalMessage?.tool_call || toolCall,
                  stackTrace: stackTrace,
                  overridden_model: finalMessage?.overridden_model,
                  stopReason: stopReason,
                  packets: packets,
                  packetCount: packets.length,
                  processingDurationSeconds:
                    finalMessage?.processing_duration_seconds ??
                    (() => {
                      const startTime = useChatSessionStore
                        .getState()
                        .getStreamingStartTime(frozenSessionId);
                      return startTime
                        ? Math.floor((Date.now() - startTime) / 1000)
                        : undefined;
                    })(),
                },
              ],
              // Pass the latest map state
              completeMessageTreeOverride: currentMessageTreeLocal,
              chatSessionId: frozenSessionId!,
            });
          }
        }
        // Surface FIFO errors (e.g. 429 before any packets arrive) so the
        // catch block replaces the thinking placeholder with an error message.
        if (stack.error) {
          throw new Error(stack.error);
        }
      } catch (e: any) {
        console.log("Error:", e);
        const errorMsg = e.message;
        currentMessageTreeLocal = upsertToCompleteMessageTree({
          messages: [
            {
              nodeId: initialUserNode.nodeId,
              message: currMessage,
              type: "user",
              files: effectiveFileDescriptors,
              toolCall: null,
              parentNodeId: parentMessage?.nodeId || SYSTEM_NODE_ID,
              packets: [],
              packetCount: 0,
            },
            {
              nodeId: initialAgentNode.nodeId,
              message: errorMsg,
              type: "error",
              files: aiMessageImages || [],
              toolCall: null,
              parentNodeId: initialUserNode.nodeId,
              packets: [],
              packetCount: 0,
              stackTrace: stackTrace,
              errorCode: errorCode,
              isRetryable: isRetryable,
              errorDetails: errorDetails,
            },
          ],
          completeMessageTreeOverride: currentMessageTreeLocal,
          chatSessionId: frozenSessionId,
        });
      }

      resetRegenerationState(frozenSessionId);
      setStreamingStartTime(frozenSessionId, null);
      updateChatStateAction(frozenSessionId, "input");

      // Name the chat now that we have the first AI response (navigation already happened before streaming)
      if (shouldAutoNameChatSessionAfterResponse) {
        handleNewSessionNaming(currChatSessionId);
      }
    },
    [
      // Narrow to stable fields from managers to avoid re-creation
      filterManager.selectedSources,
      filterManager.selectedDocumentSets,
      filterManager.selectedTags,
      filterManager.timeRange,
      llmManager.currentLlm,
      llmManager.temperature,
      // Others that affect logic
      liveAgent,
      availableAgents,
      existingChatSessionId,
      selectedDocuments,
      searchParams,
      resetInputBar,
      setSelectedAgentFromId,
      updateSelectedNodeForDocDisplay,
      currentMessageTree,
      currentChatState,
      // Ensure latest forced tools are used when submitting
      forcedToolIds,
      // Keep tool preference-derived values fresh
      agentPreferences,
      fetchProjects,
      // For auto-pinning agents
      pinnedAgents,
      togglePinnedAgent,
    ]
  );

  const handleMessageSpecificFileUpload = useCallback(
    async (acceptedFiles: File[]) => {
      const [_, llmModel] = getFinalLLM(
        llmManager.llmProviders || [],
        liveAgent || null,
        llmManager.currentLlm
      );
      const llmAcceptsImages = modelSupportsImageInput(
        llmManager.llmProviders || [],
        llmModel
      );

      const imageFiles = acceptedFiles.filter((file) =>
        file.type.startsWith("image/")
      );

      if (imageFiles.length > 0 && !llmAcceptsImages) {
        toast.error(
          "The current model does not support image input. Please select a model with Vision support."
        );
        return;
      }
      updateChatStateAction(getCurrentSessionId(), "uploading");
      const uploadedMessageFiles = await beginUpload(
        Array.from(acceptedFiles),
        null
      );
      setCurrentMessageFiles((prev) => [...prev, ...uploadedMessageFiles]);
      updateChatStateAction(getCurrentSessionId(), "input");
    },
    [liveAgent, llmManager, forcedToolIds]
  );

  useEffect(() => {
    return () => {
      // Cleanup which only runs when the component unmounts (i.e. when you navigate away).
      const currentSession = getCurrentSessionId();
      const abortController = sessions.get(currentSession)?.abortController;
      if (abortController) {
        abortController.abort();
        setAbortController(currentSession, new AbortController());
      }
    };
  }, [pathname]);

  // update chosen assistant if we navigate between pages
  useEffect(() => {
    if (currentMessageHistory.length === 0 && existingChatSessionId === null) {
      // Select from available assistants so shared assistants appear.
      setSelectedAgentFromId(null);
    }
  }, [existingChatSessionId, availableAgents, currentMessageHistory.length]);

  useEffect(() => {
    const handleSlackChatRedirect = async () => {
      const slackChatId = searchParams.get("slackChatId");
      if (!slackChatId) return;

      // Set isReady to false before starting retrieval to display loading text
      const currentSessionId = getCurrentSessionId();
      if (currentSessionId) {
        setIsReady(currentSessionId, false);
      }

      try {
        const response = await fetch("/api/chat/seed-chat-session-from-slack", {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify({
            chat_session_id: slackChatId,
          }),
        });

        if (!response.ok) {
          throw new Error("Failed to seed chat from Slack");
        }

        const data = await response.json();

        router.push(data.redirect_url);
      } catch (error) {
        console.error("Error seeding chat from Slack:", error);
        toast.error("Failed to load chat from Slack");
      }
    };

    handleSlackChatRedirect();
  }, [searchParams, router]);

  // Available context tokens: if a chat session exists, fetch from the session
  // API (dynamic per session/model). Otherwise derive from the persona's max
  // document tokens. The backend already accounts for system prompt, tools,
  // and user-message reservations.
  const [availableContextTokens, setAvailableContextTokens] = useState<number>(
    DEFAULT_CONTEXT_TOKENS
  );

  useEffect(() => {
    if (!llmManager.hasAnyProvider) return;

    let cancelled = false;

    const setIfActive = (tokens: number) => {
      if (!cancelled) setAvailableContextTokens(tokens);
    };

    // Prefer the Zustand session ID, but fall back to the URL-derived prop
    // so we don't incorrectly take the persona path while the store is
    // still initialising on navigation to an existing chat.
    const sessionId = currentSessionId || existingChatSessionId;

    (async () => {
      try {
        if (sessionId) {
          const available = await getAvailableContextTokens(sessionId);
          setIfActive(available ?? DEFAULT_CONTEXT_TOKENS);
          return;
        }

        const personaId = liveAgent?.id;
        if (personaId == null) {
          setIfActive(DEFAULT_CONTEXT_TOKENS);
          return;
        }

        const maxTokens = await getMaxSelectedDocumentTokens(personaId);
        setIfActive(maxTokens ?? DEFAULT_CONTEXT_TOKENS);
      } catch (e) {
        console.error("Failed to fetch available context tokens:", e);
        setIfActive(DEFAULT_CONTEXT_TOKENS);
      }
    })();

    return () => {
      cancelled = true;
    };
  }, [
    currentSessionId,
    existingChatSessionId,
    liveAgent?.id,
    llmManager.hasAnyProvider,
  ]);

  // check if there's an image file in the message history so that we know
  // which LLMs are available to use
  const imageFileInMessageHistory = useMemo(() => {
    return currentMessageHistory
      .filter((message) => message.type === "user")
      .some((message) =>
        message.files.some((file) => file.type === ChatFileType.IMAGE)
      );
  }, [currentMessageHistory]);

  useEffect(() => {
    llmManager.updateImageFilesPresent(imageFileInMessageHistory);
  }, [imageFileInMessageHistory]);

  // set isReady once component is mounted
  useEffect(() => {
    const currentSessionId = getCurrentSessionId();
    if (currentSessionId) {
      setIsReady(currentSessionId, true);
    }
  }, []);

  return {
    // actions
    onSubmit,
    stopGenerating,
    handleMessageSpecificFileUpload,
    // data
    availableContextTokens,
  };
}


================================================
FILE: web/src/hooks/useChatSessionController.ts
================================================
"use client";

import { useEffect, useCallback, useState } from "react";
import { ReadonlyURLSearchParams } from "next/navigation";
import {
  nameChatSession,
  processRawChatHistory,
  patchMessageToBeLatest,
} from "@/app/app/services/lib";
import {
  getLatestMessageChain,
  setMessageAsLatest,
} from "@/app/app/services/messageTree";
import {
  BackendChatSession,
  ChatSessionSharedStatus,
} from "@/app/app/interfaces";
import {
  SEARCH_PARAM_NAMES,
  shouldSubmitOnLoad,
} from "@/app/app/services/searchParams";
import { FilterManager } from "@/lib/hooks";
import { OnyxDocument } from "@/lib/search/interfaces";
import {
  useChatSessionStore,
  useCurrentMessageHistory,
} from "@/app/app/stores/useChatSessionStore";
import { useForcedTools } from "@/lib/hooks/useForcedTools";
import { ProjectFile } from "@/app/app/projects/projectsService";
import { getSessionProjectTokenCount } from "@/app/app/projects/projectsService";
import { getProjectFilesForSession } from "@/app/app/projects/projectsService";
import { AppInputBarHandle } from "@/sections/input/AppInputBar";

interface UseChatSessionControllerProps {
  existingChatSessionId: string | null;
  searchParams: ReadonlyURLSearchParams;
  filterManager: FilterManager;
  firstMessage?: string;

  // UI state setters
  setSelectedAgentFromId: (agentId: number | null) => void;
  setSelectedDocuments: (documents: OnyxDocument[]) => void;
  setCurrentMessageFiles: (
    files: ProjectFile[] | ((prev: ProjectFile[]) => ProjectFile[])
  ) => void;

  // Refs
  chatSessionIdRef: React.RefObject<string | null>;
  loadedIdSessionRef: React.RefObject<string | null>;
  chatInputBarRef: React.RefObject<AppInputBarHandle | null>;
  isInitialLoad: React.RefObject<boolean>;
  submitOnLoadPerformed: React.RefObject<boolean>;

  // Actions
  refreshChatSessions: () => void;
  onSubmit: (params: {
    message: string;
    currentMessageFiles: ProjectFile[];
    deepResearch: boolean;
    isSeededChat?: boolean;
  }) => Promise<void>;
}

export type SessionFetchError = {
  type: "not_found" | "access_denied" | "unknown";
  detail: string;
} | null;

export default function useChatSessionController({
  existingChatSessionId,
  searchParams,
  filterManager,
  firstMessage,
  setSelectedAgentFromId,
  setSelectedDocuments,
  setCurrentMessageFiles,
  chatSessionIdRef,
  loadedIdSessionRef,
  chatInputBarRef,
  isInitialLoad,
  submitOnLoadPerformed,
  refreshChatSessions,
  onSubmit,
}: UseChatSessionControllerProps) {
  const [currentSessionFileTokenCount, setCurrentSessionFileTokenCount] =
    useState<number>(0);
  const [projectFiles, setProjectFiles] = useState<ProjectFile[]>([]);
  const [sessionFetchError, setSessionFetchError] =
    useState<SessionFetchError>(null);
  // Store actions
  const updateSessionAndMessageTree = useChatSessionStore(
    (state) => state.updateSessionAndMessageTree
  );
  const updateSessionMessageTree = useChatSessionStore(
    (state) => state.updateSessionMessageTree
  );
  const setIsFetchingChatMessages = useChatSessionStore(
    (state) => state.setIsFetchingChatMessages
  );
  const setCurrentSession = useChatSessionStore(
    (state) => state.setCurrentSession
  );
  const initializeSession = useChatSessionStore(
    (state) => state.initializeSession
  );
  const updateCurrentChatSessionSharedStatus = useChatSessionStore(
    (state) => state.updateCurrentChatSessionSharedStatus
  );
  const updateCurrentSelectedNodeForDocDisplay = useChatSessionStore(
    (state) => state.updateCurrentSelectedNodeForDocDisplay
  );
  const currentChatState = useChatSessionStore(
    (state) =>
      state.sessions.get(state.currentSessionId || "")?.chatState || "input"
  );
  const currentChatHistory = useCurrentMessageHistory();
  const chatSessions = useChatSessionStore((state) => state.sessions);
  const { setForcedToolIds } = useForcedTools();

  // Fetch chat messages for the chat session
  useEffect(() => {
    const priorChatSessionId = chatSessionIdRef.current;
    const loadedSessionId = loadedIdSessionRef.current;
    chatSessionIdRef.current = existingChatSessionId;
    loadedIdSessionRef.current = existingChatSessionId;

    chatInputBarRef.current?.focus();

    const isCreatingNewSession =
      priorChatSessionId === null && existingChatSessionId !== null;
    const isSwitchingBetweenSessions =
      priorChatSessionId !== null &&
      existingChatSessionId !== priorChatSessionId;

    // Clear uploaded files on any session change (they're already in context)
    if (isCreatingNewSession || isSwitchingBetweenSessions) {
      setCurrentMessageFiles([]);
    }

    // Only reset filters/selections when switching between existing sessions
    if (isSwitchingBetweenSessions) {
      setSelectedDocuments([]);
      filterManager.setSelectedDocumentSets([]);
      filterManager.setSelectedTags([]);
      filterManager.setTimeRange(null);

      // Remove uploaded files
      setCurrentMessageFiles([]);

      // If switching from one chat to another, then need to scroll again
      // If we're creating a brand new chat, then don't need to scroll
      if (priorChatSessionId !== null) {
        setSelectedDocuments([]);

        // Clear forced tool ids if and only if we're switching to a new chat session
        setForcedToolIds([]);
      }
    }

    async function initialSessionFetch() {
      setSessionFetchError(null);

      if (existingChatSessionId === null) {
        // Clear the current session in the store to show intro messages
        setCurrentSession(null);

        // Reset the selected agent back to default
        setSelectedAgentFromId(null);
        updateCurrentChatSessionSharedStatus(ChatSessionSharedStatus.Private);

        // If we're supposed to submit on initial load, then do that here
        if (
          shouldSubmitOnLoad(searchParams) &&
          !submitOnLoadPerformed.current
        ) {
          submitOnLoadPerformed.current = true;
          await onSubmit({
            message: firstMessage || "",
            currentMessageFiles: [],
            deepResearch: false,
          });
        }
        return;
      }

      // Set the current session first, then set fetching state to prevent intro flash
      setCurrentSession(existingChatSessionId);
      setIsFetchingChatMessages(existingChatSessionId, true);

      let response: Response;
      try {
        response = await fetch(
          `/api/chat/get-chat-session/${existingChatSessionId}`
        );
      } catch (error) {
        setIsFetchingChatMessages(existingChatSessionId, false);
        console.error("Failed to fetch chat session", {
          chatSessionId: existingChatSessionId,
          error,
        });
        setSessionFetchError({
          type: "unknown",
          detail: "Failed to load chat session. Please check your connection.",
        });
        return;
      }

      if (!response.ok) {
        setIsFetchingChatMessages(existingChatSessionId, false);
        let detail = "An unexpected error occurred.";
        try {
          const errorBody = await response.json();
          detail = errorBody.detail || detail;
        } catch {
          // ignore parse errors
        }
        const type =
          response.status === 404
            ? "not_found"
            : response.status === 403
              ? "access_denied"
              : "unknown";
        setSessionFetchError({ type, detail });
        return;
      }

      const session = await response.json();
      const chatSession = session as BackendChatSession;
      setSelectedAgentFromId(chatSession.persona_id);

      // Ensure the current session is set to the actual session ID from the response
      setCurrentSession(chatSession.chat_session_id);

      // Initialize session data including personaId
      initializeSession(chatSession.chat_session_id, chatSession);

      const newMessageMap = processRawChatHistory(
        chatSession.messages,
        chatSession.packets
      );
      const newMessageHistory = getLatestMessageChain(newMessageMap);

      // Update message history except for edge where where
      // last message is an error and we're on a new chat.
      // This corresponds to a "renaming" of chat, which occurs after first message
      // stream
      if (
        (newMessageHistory[newMessageHistory.length - 1]?.type !== "error" ||
          loadedSessionId != null) &&
        !(
          currentChatState == "toolBuilding" ||
          currentChatState == "streaming" ||
          currentChatState == "loading"
        )
      ) {
        updateCurrentSelectedNodeForDocDisplay(
          newMessageHistory[newMessageHistory.length - 1]?.nodeId ?? null
        );

        updateSessionAndMessageTree(chatSession.chat_session_id, newMessageMap);
        chatSessionIdRef.current = chatSession.chat_session_id;
      }

      setIsFetchingChatMessages(chatSession.chat_session_id, false);

      // Fetch token count for this chat session's project (if any)
      try {
        if (chatSession.chat_session_id) {
          const total = await getSessionProjectTokenCount(
            chatSession.chat_session_id
          );
          setCurrentSessionFileTokenCount(total || 0);
        } else {
          setCurrentSessionFileTokenCount(0);
        }
      } catch (e) {
        setCurrentSessionFileTokenCount(0);
      }

      // Fetch project files for this chat session (if any)
      try {
        if (chatSession.chat_session_id) {
          const files = await getProjectFilesForSession(
            chatSession.chat_session_id
          );
          setProjectFiles(files || []);
        } else {
          setProjectFiles([]);
        }
      } catch (e) {
        setProjectFiles([]);
      }

      // If this is a seeded chat, then kick off the AI message generation
      if (
        newMessageHistory.length === 1 &&
        !submitOnLoadPerformed.current &&
        searchParams?.get(SEARCH_PARAM_NAMES.SEEDED) === "true"
      ) {
        submitOnLoadPerformed.current = true;

        const seededMessage = newMessageHistory[0]?.message;
        if (!seededMessage) {
          return;
        }

        await onSubmit({
          message: seededMessage,
          isSeededChat: true,
          currentMessageFiles: [],
          deepResearch: false,
        });
        // Force re-name if the chat session doesn't have one
        if (!chatSession.description) {
          await nameChatSession(existingChatSessionId);
          refreshChatSessions();
        }
      } else if (newMessageHistory.length >= 2 && !chatSession.description) {
        await nameChatSession(existingChatSessionId);
        refreshChatSessions();
      }
    }

    // SKIP_RELOAD is used after completing the first message in a new session.
    // We don't need to re-fetch at that point, we have everything we need.
    // For safety, we should always re-fetch if there are no messages in the chat history.
    if (
      !searchParams?.get(SEARCH_PARAM_NAMES.SKIP_RELOAD) ||
      currentChatHistory.length === 0
    ) {
      const existingChatSession = existingChatSessionId
        ? chatSessions.get(existingChatSessionId)
        : null;

      if (
        !existingChatSession?.chatState ||
        existingChatSession.chatState === "input"
      ) {
        initialSessionFetch();
      } else {
        // no need to fetch if the chat session is currently streaming (it would be )
        // out of date).
        // this means that the user kicked off a message, switched to a different
        // chat, and then switched back.
        setCurrentSession(existingChatSessionId);
      }
    } else {
      // Remove SKIP_RELOAD param without triggering a page reload
      const currentSearchParams = new URLSearchParams(searchParams?.toString());
      if (currentSearchParams.has(SEARCH_PARAM_NAMES.SKIP_RELOAD)) {
        currentSearchParams.delete(SEARCH_PARAM_NAMES.SKIP_RELOAD);
        const newUrl = `${window.location.pathname}${
          currentSearchParams.toString()
            ? "?" + currentSearchParams.toString()
            : ""
        }`;
        window.history.replaceState({}, "", newUrl);
      }
    }
  }, [
    existingChatSessionId,
    searchParams?.get(SEARCH_PARAM_NAMES.PERSONA_ID),
    // Note: We're intentionally not including all dependencies to avoid infinite loops
    // This effect should only run when existingChatSessionId or persona ID changes
  ]);

  const onMessageSelection = useCallback(
    (nodeId: number) => {
      updateCurrentSelectedNodeForDocDisplay(nodeId);
      const currentMessageTree = useChatSessionStore
        .getState()
        .sessions.get(useChatSessionStore.getState().currentSessionId || "")
        ?.messageTree;

      if (currentMessageTree) {
        const newMessageTree = setMessageAsLatest(currentMessageTree, nodeId);
        const currentSessionId =
          useChatSessionStore.getState().currentSessionId;
        if (currentSessionId) {
          updateSessionMessageTree(currentSessionId, newMessageTree);
        }

        const message = currentMessageTree.get(nodeId);

        if (message?.messageId) {
          // Makes actual API call to set message as latest in the DB so we can
          // edit this message and so it sticks around on page reload
          patchMessageToBeLatest(message.messageId);
        } else {
          console.error("Message has no messageId", nodeId);
        }
      }
    },
    [updateCurrentSelectedNodeForDocDisplay, updateSessionMessageTree]
  );

  return {
    currentSessionFileTokenCount,
    onMessageSelection,
    projectFiles,
    sessionFetchError,
  };
}


================================================
FILE: web/src/hooks/useChatSessions.ts
================================================
"use client";

import {
  useCallback,
  useEffect,
  useMemo,
  useState,
  useSyncExternalStore,
} from "react";
import useSWRInfinite from "swr/infinite";
import { ChatSession, ChatSessionSharedStatus } from "@/app/app/interfaces";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import useAppFocus from "./useAppFocus";
import { useAgents } from "./useAgents";
import { DEFAULT_AGENT_ID } from "@/lib/constants";

const PAGE_SIZE = 50;
const MIN_LOADING_DURATION_MS = 500;

interface ChatSessionsResponse {
  sessions: ChatSession[];
  has_more: boolean;
}

export interface PendingChatSessionParams {
  chatSessionId: string;
  personaId: number;
  projectId?: number | null;
}

interface UseChatSessionsOutput {
  chatSessions: ChatSession[];
  currentChatSessionId: string | null;
  currentChatSession: ChatSession | null;
  agentForCurrentChatSession: MinimalPersonaSnapshot | null;
  isLoading: boolean;
  error: any;
  refreshChatSessions: () => Promise<ChatSessionsResponse[] | undefined>;
  addPendingChatSession: (params: PendingChatSessionParams) => void;
  removeSession: (sessionId: string) => void;
  hasMore: boolean;
  isLoadingMore: boolean;
  loadMore: () => void;
}

// ---------------------------------------------------------------------------
// Shared module-level store for pending chat sessions
// ---------------------------------------------------------------------------
// Pending sessions are optimistic new sessions shown in the sidebar before
// the server returns them. This must be module-level so all hook instances
// (sidebar, ChatButton, etc.) share the same state.

const pendingSessionsStore = {
  sessions: new Map<string, ChatSession>(),
  listeners: new Set<() => void>(),
  cachedSnapshot: [] as ChatSession[],

  add(session: ChatSession) {
    this.sessions.set(session.id, session);
    this.updateSnapshot();
    this.notify();
  },

  remove(sessionId: string) {
    if (this.sessions.delete(sessionId)) {
      this.updateSnapshot();
      this.notify();
    }
  },

  has(sessionId: string): boolean {
    return this.sessions.has(sessionId);
  },

  subscribe(listener: () => void) {
    this.listeners.add(listener);
    return () => this.listeners.delete(listener);
  },

  notify() {
    this.listeners.forEach((listener) => listener());
  },

  updateSnapshot() {
    this.cachedSnapshot = Array.from(this.sessions.values());
  },

  getSnapshot(): ChatSession[] {
    return this.cachedSnapshot;
  },
};

// Stable empty array for SSR
const EMPTY_SESSIONS: ChatSession[] = [];

function usePendingSessions(): ChatSession[] {
  return useSyncExternalStore(
    (callback) => pendingSessionsStore.subscribe(callback),
    () => pendingSessionsStore.getSnapshot(),
    () => EMPTY_SESSIONS
  );
}

// ---------------------------------------------------------------------------
// Helper hooks
// ---------------------------------------------------------------------------

function useFindAgentForCurrentChatSession(
  currentChatSession: ChatSession | null
): MinimalPersonaSnapshot | null {
  const { agents } = useAgents();
  const appFocus = useAppFocus();

  let agentIdToFind: number;

  // This could be an alreaady existing chat session.
  if (currentChatSession) {
    agentIdToFind = currentChatSession.persona_id;
  }

  // This could be a new chat-session. Therefore, `currentChatSession` is false, but there could still be some agent.
  else if (appFocus.isNewSession()) {
    agentIdToFind = DEFAULT_AGENT_ID;
  }

  // Or this could be a new chat-session with an agent.
  else if (appFocus.isAgent()) {
    agentIdToFind = Number.parseInt(appFocus.getId()!);
  }

  return agents.find((agent) => agent.id === agentIdToFind) ?? null;
}

// ---------------------------------------------------------------------------
// Main hook
// ---------------------------------------------------------------------------

export default function useChatSessions(): UseChatSessionsOutput {
  const getKey = (
    pageIndex: number,
    previousPageData: ChatSessionsResponse | null
  ): string | null => {
    // No more pages
    if (previousPageData && !previousPageData.has_more) return null;

    // First page — no cursor
    if (pageIndex === 0) {
      return `${SWR_KEYS.chatSessions}?page_size=${PAGE_SIZE}`;
    }

    // Subsequent pages — cursor from the last session of the previous page
    const lastSession =
      previousPageData!.sessions[previousPageData!.sessions.length - 1];
    if (!lastSession) return null;

    const params = new URLSearchParams({
      page_size: PAGE_SIZE.toString(),
      before: lastSession.time_updated,
    });
    return `${SWR_KEYS.chatSessions}?${params.toString()}`;
  };

  const { data, error, setSize, mutate } = useSWRInfinite<ChatSessionsResponse>(
    getKey,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      revalidateFirstPage: true,
      revalidateAll: false,
      dedupingInterval: 30000,
    }
  );

  const appFocus = useAppFocus();
  const pendingSessions = usePendingSessions();

  // Flatten all pages into a single session list
  const allFetchedSessions = useMemo(
    () => (data ? data.flatMap((page) => page.sessions) : []),
    [data]
  );

  // hasMore: check the last loaded page
  const hasMore = useMemo(() => {
    if (!data || data.length === 0) return false;
    const lastPage = data[data.length - 1];
    return lastPage ? lastPage.has_more : false;
  }, [data]);

  const [isLoadingMore, setIsLoadingMore] = useState(false);

  const loadMore = useCallback(async () => {
    if (isLoadingMore || !hasMore) return;

    setIsLoadingMore(true);
    const loadStart = Date.now();

    try {
      await setSize((s) => s + 1);

      // Enforce minimum loading duration to avoid skeleton flash
      const elapsed = Date.now() - loadStart;
      if (elapsed < MIN_LOADING_DURATION_MS) {
        await new Promise((r) =>
          setTimeout(r, MIN_LOADING_DURATION_MS - elapsed)
        );
      }
    } catch (err) {
      console.error("Failed to load more chat sessions:", err);
    } finally {
      setIsLoadingMore(false);
    }
  }, [isLoadingMore, hasMore, setSize]);

  // Clean up pending sessions that now appear in fetched data
  // (they now have messages and the server returns them)
  useEffect(() => {
    const fetchedIds = new Set(allFetchedSessions.map((s) => s.id));
    pendingSessions.forEach((pending) => {
      if (fetchedIds.has(pending.id)) {
        pendingSessionsStore.remove(pending.id);
      }
    });
  }, [allFetchedSessions, pendingSessions]);

  // Merge fetched sessions with pending sessions.
  // This ensures pending sessions persist across SWR revalidations.
  const chatSessions = useMemo(() => {
    const fetchedIds = new Set(allFetchedSessions.map((s) => s.id));

    // Get pending sessions that are not yet in fetched data
    const remainingPending = pendingSessions.filter(
      (pending) => !fetchedIds.has(pending.id)
    );

    // Pending sessions go first (most recent), then fetched sessions
    return [...remainingPending, ...allFetchedSessions];
  }, [allFetchedSessions, pendingSessions]);

  const currentChatSessionId = appFocus.isChat() ? appFocus.getId() : null;
  const currentChatSession =
    chatSessions.find(
      (chatSession) => chatSession.id === currentChatSessionId
    ) ?? null;

  const agentForCurrentChatSession =
    useFindAgentForCurrentChatSession(currentChatSession);

  // Add a pending chat session that will persist across SWR revalidations.
  // The session will be automatically removed once it appears in the server response.
  const addPendingChatSession = useCallback(
    ({ chatSessionId, personaId, projectId }: PendingChatSessionParams) => {
      // Don't add sessions that belong to a project
      if (projectId != null) return;

      // Don't add if already in pending store (duplicates are also filtered during merge)
      if (pendingSessionsStore.has(chatSessionId)) return;

      const now = new Date().toISOString();
      pendingSessionsStore.add({
        id: chatSessionId,
        name: "", // Empty name will display as "New Chat" via UNNAMED_CHAT constant
        persona_id: personaId,
        time_created: now,
        time_updated: now,
        shared_status: ChatSessionSharedStatus.Private,
        project_id: projectId ?? null,
        current_alternate_model: "",
        current_temperature_override: null,
      });
    },
    []
  );

  const removeSession = useCallback(
    (sessionId: string) => {
      pendingSessionsStore.remove(sessionId);
      // Optimistically remove from all loaded pages
      mutate(
        (pages) =>
          pages?.map((page) => ({
            ...page,
            sessions: page.sessions.filter((s) => s.id !== sessionId),
          })),
        { revalidate: false }
      );
    },
    [mutate]
  );

  const refreshChatSessions = useCallback(() => mutate(), [mutate]);

  return {
    chatSessions,
    currentChatSessionId,
    currentChatSession,
    agentForCurrentChatSession,
    isLoading: !error && !data,
    error,
    refreshChatSessions,
    addPendingChatSession,
    removeSession,
    hasMore,
    isLoadingMore,
    loadMore,
  };
}


================================================
FILE: web/src/hooks/useClickOutside.ts
================================================
"use client";

import { useEffect, RefObject } from "react";

/**
 * A generic hook that detects clicks outside of referenced element(s).
 *
 * @param ref - A ref or array of refs to monitor for outside clicks
 * @param callback - Function to call when a click outside is detected
 * @param enabled - Whether the hook is enabled. Defaults to true.
 *
 * @example
 * ```tsx
 * // Single ref example
 * const MyComponent = () => {
 *   const ref = useRef<HTMLDivElement>(null);
 *   const [isOpen, setIsOpen] = useState(false);
 *
 *   useClickOutside(ref, () => setIsOpen(false), isOpen);
 *
 *   return (
 *     <div ref={ref}>
 *       {isOpen && <div>Content</div>}
 *     </div>
 *   );
 * };
 * ```
 *
 * @example
 * ```tsx
 * // Single ref example with dropdown
 * const Dropdown = () => {
 *   const dropdownRef = useRef<HTMLDivElement>(null);
 *   const [isOpen, setIsOpen] = useState(false);
 *
 *   useClickOutside(dropdownRef, () => setIsOpen(false), isOpen);
 *
 *   return (
 *     <div>
 *       {isOpen && <div ref={dropdownRef}>Dropdown content</div>}
 *     </div>
 *   );
 * };
 * ```
 *
 * @example
 * ```tsx
 * // Multiple refs example - useful for combobox/dropdown with separate input and menu
 * const ComboBox = () => {
 *   const inputRef = useRef<HTMLInputElement>(null);
 *   const dropdownRef = useRef<HTMLDivElement>(null);
 *   const [isOpen, setIsOpen] = useState(false);
 *
 *   // Close dropdown only if click is outside BOTH input and dropdown
 *   useClickOutside([inputRef, dropdownRef], () => setIsOpen(false), isOpen);
 *
 *   return (
 *     <div>
 *       <input ref={inputRef} onClick={() => setIsOpen(true)} />
 *       {isOpen && (
 *         <div ref={dropdownRef}>
 *           <div>Option 1</div>
 *           <div>Option 2</div>
 *         </div>
 *       )}
 *     </div>
 *   );
 * };
 * ```
 */
export function useClickOutside<T extends HTMLElement>(
  ref: RefObject<T> | RefObject<T>[] | null,
  callback: () => void,
  enabled: boolean = true
): void {
  useEffect(() => {
    if (!enabled) {
      return;
    }

    const handleClickOutside = (event: Event) => {
      const target = event.target as Node;

      // Normalize to array for consistent handling
      const refs = Array.isArray(ref) ? ref : [ref];

      // Check if click is outside all provided refs
      const isOutside = refs.every(
        (r) => !r?.current || !r.current.contains(target)
      );

      if (isOutside) {
        callback();
      }
    };

    document.addEventListener("mousedown", handleClickOutside);

    return () => {
      document.removeEventListener("mousedown", handleClickOutside);
    };
  }, [ref, callback, enabled]);
}


================================================
FILE: web/src/hooks/useCloudSubscription.ts
================================================
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { hasPaidSubscription } from "@/lib/billing/interfaces";
import { useBillingInformation } from "@/hooks/useBillingInformation";

/**
 * Returns whether the current tenant has an active paid subscription on cloud.
 *
 * Self-hosted deployments always return true (no billing gate).
 * Cloud deployments check billing status via the billing API.
 * Returns true while loading to avoid flashing the upgrade prompt.
 */
export function useCloudSubscription(): boolean {
  const { data: billingData, isLoading } = useBillingInformation();

  if (!NEXT_PUBLIC_CLOUD_ENABLED) {
    return true;
  }

  // Treat loading as subscribed to avoid UI flash
  if (isLoading || billingData == null) {
    return true;
  }

  return hasPaidSubscription(billingData);
}


================================================
FILE: web/src/hooks/useCodeInterpreter.ts
================================================
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";

const HEALTH_ENDPOINT = "/api/admin/code-interpreter/health";
const STATUS_ENDPOINT = "/api/admin/code-interpreter";

interface CodeInterpreterHealth {
  healthy: boolean;
}

interface CodeInterpreterStatus {
  enabled: boolean;
}

export default function useCodeInterpreter() {
  const {
    data: healthData,
    error: healthError,
    isLoading: isHealthLoading,
    mutate: refetchHealth,
  } = useSWR<CodeInterpreterHealth>(HEALTH_ENDPOINT, errorHandlingFetcher, {
    refreshInterval: 30000,
  });

  const {
    data: statusData,
    error: statusError,
    isLoading: isStatusLoading,
    mutate: refetchStatus,
  } = useSWR<CodeInterpreterStatus>(STATUS_ENDPOINT, errorHandlingFetcher);

  function refetch() {
    refetchHealth();
    refetchStatus();
  }

  return {
    isHealthy: healthData?.healthy ?? false,
    isEnabled: statusData?.enabled ?? false,
    isLoading: isHealthLoading || isStatusLoading,
    error: healthError || statusError,
    refetch,
  };
}


================================================
FILE: web/src/hooks/useContainerCenter.ts
================================================
"use client";

import { useState, useEffect } from "react";
import { usePathname } from "next/navigation";
import useScreenSize from "@/hooks/useScreenSize";

const SELECTOR = "[data-main-container]";

interface ContainerCenter {
  centerX: number | null;
  centerY: number | null;
  hasContainerCenter: boolean;
}

const NULL_CENTER = { x: null, y: null } as const;

function measure(el: HTMLElement): { x: number; y: number } | null {
  if (!el.isConnected) return null;
  const rect = el.getBoundingClientRect();
  if (rect.width === 0 && rect.height === 0) return null;
  return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
}

/**
 * Tracks the center point of the `[data-main-container]` element so that
 * portaled overlays (modals, command menus) can center relative to the main
 * content area rather than the full viewport.
 *
 * Returns `{ centerX, centerY, hasContainerCenter }`.
 * When the container is not present (e.g. pages without `AppLayouts.Root`),
 * both center values are `null` and `hasContainerCenter` is `false`, allowing
 * callers to fall back to standard viewport centering.
 *
 * Uses a lazy `useState` initializer so the first render already has the
 * correct values (no flash), and a `ResizeObserver` to stay reactive when
 * the sidebar folds/unfolds. Re-subscribes on route changes because each
 * page renders its own `AppLayouts.Root`, replacing the DOM element.
 */
export default function useContainerCenter(): ContainerCenter {
  const pathname = usePathname();
  const { isMediumScreen } = useScreenSize();
  const [center, setCenter] = useState<{ x: number | null; y: number | null }>(
    () => {
      if (typeof document === "undefined") return NULL_CENTER;
      const el = document.querySelector<HTMLElement>(SELECTOR);
      if (!el) return NULL_CENTER;
      const m = measure(el);
      return m ?? NULL_CENTER;
    }
  );

  useEffect(() => {
    const container = document.querySelector<HTMLElement>(SELECTOR);
    if (!container) {
      setCenter(NULL_CENTER);
      return;
    }

    const update = () => {
      const m = measure(container);
      setCenter(m ?? NULL_CENTER);
    };

    update();
    const observer = new ResizeObserver(update);
    observer.observe(container);
    return () => observer.disconnect();
  }, [pathname]);

  return {
    centerX: isMediumScreen ? null : center.x,
    centerY: isMediumScreen ? null : center.y,
    hasContainerCenter: isMediumScreen
      ? false
      : center.x !== null && center.y !== null,
  };
}


================================================
FILE: web/src/hooks/useContentSize.ts
================================================
"use client";

import { useRef, useEffect, useState } from "react";

interface ContentSize {
  width: number;
  height: number;
}

/**
 * A hook that measures the content size (scrollWidth/scrollHeight) of a DOM element.
 *
 * This hook measures the natural content size of an element including overflow,
 * which is useful for determining how much space content needs before wrapping
 * or being cut off. It can automatically track size changes via ResizeObserver
 * and/or re-measure when dependencies change.
 *
 * @param dependencies - Optional dependency array to trigger re-measurement when values change
 * @param observeResize - Whether to continuously observe size changes via ResizeObserver. Defaults to true.
 *
 * @returns A tuple containing:
 *   - `ref`: A ref object to attach to the element you want to measure
 *   - `size`: An object with `width` and `height` properties (in pixels)
 *
 * @example
 * ```tsx
 * // Basic usage - measure button content to determine if it needs to wrap
 * const MyButton = ({ children }) => {
 *   const [ref, { width }] = useContentSize();
 *
 *   return (
 *     <button ref={ref}>
 *       Content is {width}px wide
 *     </button>
 *   );
 * };
 * ```
 *
 * @example
 * ```tsx
 * // Measure content when it changes
 * const DynamicContent = ({ text }) => {
 *   const [ref, { width, height }] = useContentSize([text]);
 *
 *   return (
 *     <div ref={ref}>
 *       {text}
 *       <p>Size: {width}x{height}</p>
 *     </div>
 *   );
 * };
 * ```
 *
 * @example
 * ```tsx
 * // Measure once without observing resize (better performance)
 * const SelectButton = ({ children }) => {
 *   const content = useMemo(() => <span>{children}</span>, [children]);
 *   const [measureRef, { width: contentWidth }] = useContentSize([content], false);
 *
 *   return (
 *     <div>
 *       // Hidden element for measurement
 *       <div ref={measureRef} style={{ position: 'absolute', visibility: 'hidden' }}>
 *         {content}
 *       </div>
 *       // Actual button with calculated width
 *       <button style={{ width: contentWidth }}>
 *         {content}
 *       </button>
 *     </div>
 *   );
 * };
 * ```
 *
 * @example
 * ```tsx
 * // Auto-expanding textarea
 * const AutoExpandingTextarea = () => {
 *   const [value, setValue] = useState('');
 *   const [ref, { height }] = useContentSize([value]);
 *
 *   return (
 *     <textarea
 *       ref={ref}
 *       value={value}
 *       onChange={(e) => setValue(e.target.value)}
 *       style={{ height: `${height}px` }}
 *     />
 *   );
 * };
 * ```
 */
export function useContentSize(
  dependencies?: React.DependencyList,
  observeResize: boolean = true
): [React.RefObject<HTMLDivElement | null>, ContentSize] {
  const ref = useRef<HTMLDivElement>(null);
  const [size, setSize] = useState<ContentSize>({ width: 0, height: 0 });

  const measureSize = () => {
    if (ref.current) {
      const newSize: ContentSize = {
        width: ref.current.scrollWidth,
        height: ref.current.scrollHeight,
      };
      setSize(newSize);
    }
  };

  // Measure on dependencies change
  // We intentionally use the `dependencies` parameter directly as the dependency array.
  // The exhaustive-deps rule is disabled because:
  // 1. `measureSize` is stable (doesn't change) and doesn't need to be in the deps
  // 2. We want to re-measure ONLY when the caller's dependencies change, not when measureSize changes
  // 3. The caller passes their own dependency array to control when measurement happens
  useEffect(() => {
    measureSize();
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, dependencies);

  // Observe resize if enabled
  useEffect(() => {
    if (!observeResize || !ref.current) return;

    const resizeObserver = new ResizeObserver(() => {
      // Use requestAnimationFrame to ensure measurements happen after the resize is complete
      requestAnimationFrame(() => {
        measureSize();
      });
    });

    // Observe the container itself
    resizeObserver.observe(ref.current);

    // Also observe all descendant elements (like textareas)
    const descendants = ref.current.querySelectorAll("*");
    descendants.forEach((el) => {
      resizeObserver.observe(el);
    });

    return () => {
      resizeObserver.disconnect();
    };
  }, [observeResize]);

  return [ref, size];
}


================================================
FILE: web/src/hooks/useCurrentUser.ts
================================================
import useSWR, { type KeyedMutator } from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { User } from "@/lib/types";
import { SWR_KEYS } from "@/lib/swr-keys";

/**
 * Fetches the current authenticated user via SWR (`/api/me`).
 *
 * This hook is intentionally configured with conservative revalidation
 * settings to avoid hammering the backend on every focus/reconnect event:
 *
 * - `revalidateOnFocus: false`      — tab switches won't trigger a refetch
 * - `revalidateOnReconnect: false`   — network recovery won't trigger a refetch
 * - `dedupingInterval: 30_000`       — duplicate requests within 30 s are deduped
 *
 * The returned `mutateUser` handle lets callers imperatively refetch (e.g.
 * after a token refresh) without changing the global SWR config.
 *
 * @example
 * ```ts
 * const { user, mutateUser, userError } = useCurrentUser();
 * ```
 */
export function useCurrentUser(): {
  /** The authenticated user, or `undefined` while loading. */
  user: User | undefined;
  /** Imperatively revalidate / update the cached user. */
  mutateUser: KeyedMutator<User>;
  /** The error thrown by the fetcher, if any. */
  userError: (Error & { status?: number }) | undefined;
} {
  const { data, mutate, error } = useSWR<User>(
    SWR_KEYS.me,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateOnReconnect: false,
      revalidateIfStale: false,
      dedupingInterval: 30_000,
    }
  );

  return { user: data, mutateUser: mutate, userError: error };
}


================================================
FILE: web/src/hooks/useDeepResearchToggle.ts
================================================
"use client";

import { useState, useEffect, useRef, useCallback } from "react";

interface UseDeepResearchToggleProps {
  chatSessionId: string | null;
  agentId: number | undefined;
}

/**
 * Custom hook for managing the agent search (deep research) toggle state.
 * Automatically resets the toggle to false when:
 * - Switching between existing chat sessions
 * - The assistant changes
 * - The page is reloaded (since state initializes to false)
 *
 * The toggle is preserved when transitioning from no chat session to a new session.
 *
 * @param chatSessionId - The current chat session ID
 * @param agentId - The current agent ID
 * @returns An object containing the toggle state and toggle function
 */
export default function useDeepResearchToggle({
  chatSessionId,
  agentId,
}: UseDeepResearchToggleProps) {
  const [deepResearchEnabled, setDeepResearchEnabled] = useState(false);
  const previousChatSessionId = useRef<string | null>(chatSessionId);

  // Reset when switching chat sessions, but preserve when going from null to a new session
  useEffect(() => {
    const previousId = previousChatSessionId.current;
    previousChatSessionId.current = chatSessionId;

    // Only reset if we're switching between actual sessions (not from null to a new session)
    if (previousId !== null && previousId !== chatSessionId) {
      setDeepResearchEnabled(false);
    }
  }, [chatSessionId]);

  // Reset when switching assistants
  useEffect(() => {
    setDeepResearchEnabled(false);
  }, [agentId]);

  const toggleDeepResearch = useCallback(() => {
    setDeepResearchEnabled(!deepResearchEnabled);
  }, [deepResearchEnabled]);

  return {
    deepResearchEnabled,
    toggleDeepResearch,
  };
}


================================================
FILE: web/src/hooks/useFederatedOAuthStatus.ts
================================================
"use client";

import { useMemo } from "react";
import useSWR from "swr";
import { FederatedConnectorOAuthStatus } from "@/components/chat/FederatedOAuthModal";
import { errorHandlingFetcher } from "@/lib/fetcher";

/**
 * Hook for fetching federated OAuth connector authentication status.
 *
 * Retrieves the authentication status for all federated connectors (e.g., Gmail,
 * Google Drive, Slack) and provides utilities to identify which connectors need
 * OAuth authentication. Uses SWR for caching and automatic revalidation.
 *
 * @returns Object containing:
 *   - connectors: Array of all federated connector statuses
 *   - needsAuth: Array of connectors that lack OAuth tokens
 *   - hasUnauthenticatedConnectors: Boolean indicating if any connectors need auth
 *   - isLoading: Boolean indicating if data is being fetched
 *   - error: Error object if the fetch failed
 *   - refetch: Function to manually reload connector statuses
 *
 * @example
 * ```tsx
 * // Display connectors requiring authentication
 * const OAuthPrompt = () => {
 *   const { needsAuth, isLoading } = useFederatedOAuthStatus();
 *
 *   if (isLoading) return <Spinner />;
 *   if (needsAuth.length === 0) return null;
 *
 *   return (
 *     <div>
 *       <h3>Connect your accounts:</h3>
 *       {needsAuth.map(connector => (
 *         <ConnectButton key={connector.source} connector={connector} />
 *       ))}
 *     </div>
 *   );
 * };
 * ```
 *
 * @example
 * ```tsx
 * // Show warning banner if any connectors need authentication
 * const AuthWarningBanner = () => {
 *   const { hasUnauthenticatedConnectors } = useFederatedOAuthStatus();
 *
 *   if (!hasUnauthenticatedConnectors) return null;
 *
 *   return (
 *     <Banner variant="warning">
 *       Some connectors need authentication to access your data.
 *     </Banner>
 *   );
 * };
 * ```
 *
 * @example
 * ```tsx
 * // List all connectors with their auth status
 * const ConnectorList = () => {
 *   const { connectors, refetch } = useFederatedOAuthStatus();
 *
 *   return (
 *     <div>
 *       {connectors.map(connector => (
 *         <ConnectorRow
 *           key={connector.source}
 *           connector={connector}
 *           authenticated={connector.has_oauth_token}
 *           onReconnect={refetch}
 *         />
 *       ))}
 *     </div>
 *   );
 * };
 * ```
 */
export default function useFederatedOAuthStatus() {
  const { data, error, isLoading, mutate } = useSWR<
    FederatedConnectorOAuthStatus[]
  >("/api/federated/oauth-status", errorHandlingFetcher);

  const connectors = data ?? [];
  const needsAuth = useMemo(
    () => (data ?? []).filter((c) => !c.has_oauth_token),
    [data]
  );
  const hasUnauthenticatedConnectors = needsAuth.length > 0;

  return {
    connectors,
    needsAuth,
    hasUnauthenticatedConnectors,
    isLoading,
    error,
    refetch: mutate,
  };
}


================================================
FILE: web/src/hooks/useFeedbackController.ts
================================================
"use client";

import { useCallback } from "react";
import { useChatSessionStore } from "@/app/app/stores/useChatSessionStore";
import { FeedbackType } from "@/app/app/interfaces";
import { handleChatFeedback, removeChatFeedback } from "@/app/app/services/lib";
import { getMessageByMessageId } from "@/app/app/services/messageTree";
import { toast } from "@/hooks/useToast";

/**
 * Hook for managing chat message feedback (like/dislike)
 *
 * Provides optimistic UI updates with automatic rollback on errors.
 * Handles both adding/updating feedback and removing existing feedback.
 *
 * @returns Object containing:
 *   - handleFeedbackChange: Function to submit feedback changes
 *
 * @example
 * ```tsx
 * const { handleFeedbackChange } = useFeedbackController();
 *
 * // Add positive feedback
 * await handleFeedbackChange(messageId, "like", "Great response!");
 *
 * // Remove feedback
 * await handleFeedbackChange(messageId, null);
 * ```
 */
export default function useFeedbackController() {
  const updateCurrentMessageFeedback = useChatSessionStore(
    (state) => state.updateCurrentMessageFeedback
  );

  const handleFeedbackChange = useCallback(
    async (
      messageId: number,
      newFeedback: FeedbackType | null,
      feedbackText?: string,
      predefinedFeedback?: string
    ): Promise<boolean> => {
      // Get current feedback state for rollback on error
      const { currentSessionId, sessions } = useChatSessionStore.getState();
      const messageTree = currentSessionId
        ? sessions.get(currentSessionId)?.messageTree
        : undefined;
      const previousFeedback = messageTree
        ? getMessageByMessageId(messageTree, messageId)?.currentFeedback ?? null
        : null;

      // Optimistically update the UI
      updateCurrentMessageFeedback(messageId, newFeedback);

      try {
        if (newFeedback === null) {
          // Remove feedback
          const response = await removeChatFeedback(messageId);
          if (!response.ok) {
            // Rollback on error
            updateCurrentMessageFeedback(messageId, previousFeedback);
            const errorData = await response.json();
            toast.error(
              `Failed to remove feedback - ${
                errorData.detail || errorData.message
              }`
            );
            return false;
          }
        } else {
          // Add/update feedback
          const response = await handleChatFeedback(
            messageId,
            newFeedback,
            feedbackText || "",
            predefinedFeedback
          );
          if (!response.ok) {
            // Rollback on error
            updateCurrentMessageFeedback(messageId, previousFeedback);
            const errorData = await response.json();
            toast.error(
              `Failed to submit feedback - ${
                errorData.detail || errorData.message
              }`
            );
            return false;
          }
        }
        return true;
      } catch (error) {
        // Rollback on network error
        updateCurrentMessageFeedback(messageId, previousFeedback);
        toast.error("Failed to submit feedback - network error");
        return false;
      }
    },
    [updateCurrentMessageFeedback]
  );

  return { handleFeedbackChange };
}


================================================
FILE: web/src/hooks/useFilter.ts
================================================
"use client";

import { useMemo, useState } from "react";

/**
 * A generic filtering hook that filters an array of items based on a query string.
 *
 * The hook manages its own query state and uses an extractor function to convert
 * each item into a searchable string, then performs a case-insensitive substring
 * match against the query.
 *
 * @template T - The type of items being filtered
 * @param items - The array of items to filter
 * @param extractor - A function that extracts a searchable string from each item
 * @returns An object containing the query, setQuery function, and filtered items
 *
 * @example
 * ```tsx
 * function MyComponent() {
 *   const tools = [
 *     { name: "File Reader", description: "Read files" },
 *     { name: "Web Search", description: "Search the web" }
 *   ];
 *
 *   const { query, setQuery, filtered } = useFilter(
 *     tools,
 *     (tool) => `${tool.name} ${tool.description}`
 *   );
 *
 *   return (
 *     <>
 *       <input value={query} onChange={(e) => setQuery(e.target.value)} />
 *       {filtered.map(tool => <div key={tool.name}>{tool.name}</div>)}
 *     </>
 *   );
 * }
 * ```
 *
 * @remarks
 * - Returns all items if the query is empty or whitespace-only
 * - Performs case-insensitive matching
 * - Uses substring matching (includes)
 * - The extractor function is included in dependencies to prevent stale closures.
 *   For optimal performance, memoize the extractor with useCallback if it's expensive.
 */
export default function useFilter<T>(
  items: T[],
  extractor: (item: T) => string
) {
  const [query, setQuery] = useState("");

  const filtered = useMemo(() => {
    const trimmedQuery = query.trim();

    // Return all items if query is empty
    if (!trimmedQuery) {
      return items;
    }

    const lowerQuery = trimmedQuery.toLowerCase();

    return items.filter((item) => {
      const searchableText = extractor(item).toLowerCase();
      return searchableText.includes(lowerQuery);
    });
  }, [query, items, extractor]);

  return { query, setQuery, filtered };
}


================================================
FILE: web/src/hooks/useGroups.ts
================================================
"use client";

import useSWR, { mutate } from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { UserGroup } from "@/lib/types";
import { useContext } from "react";
import { SettingsContext } from "@/providers/SettingsProvider";
import { SWR_KEYS } from "@/lib/swr-keys";

/**
 * Fetches all user groups in the organization.
 *
 * Returns group information including group members, curators, and associated resources.
 * Use this for displaying group lists in sharing dialogs, admin panels, or permission
 * management interfaces.
 *
 * Note: This hook only returns data if enterprise features are enabled. In non-enterprise
 * environments, it returns an empty array.
 *
 * @returns Object containing:
 *   - data: Array of UserGroup objects, or undefined while loading
 *   - isLoading: Boolean indicating if data is being fetched
 *   - error: Any error that occurred during fetch
 *   - refreshGroups: Function to manually revalidate the data
 *
 * @example
 * // Fetch groups for sharing dialogs
 * const { data: groupsData, isLoading } = useGroups();
 * if (isLoading) return <Spinner />;
 * return <GroupList groups={groupsData ?? []} />;
 *
 * @example
 * // Fetch groups with manual refresh
 * const { data: groupsData, refreshGroups } = useGroups();
 * // Later...
 * await createNewGroup(...);
 * refreshGroups(); // Refresh the group list
 */
export default function useGroups() {
  const combinedSettings = useContext(SettingsContext);
  const settingsLoading = combinedSettings?.settingsLoading ?? false;
  const isPaidEnterpriseFeaturesEnabled =
    !settingsLoading &&
    combinedSettings &&
    combinedSettings.enterpriseSettings !== null;

  const { data, error, isLoading } = useSWR<UserGroup[]>(
    isPaidEnterpriseFeaturesEnabled ? SWR_KEYS.adminUserGroups : null,
    errorHandlingFetcher
  );

  const refreshGroups = () => mutate(SWR_KEYS.adminUserGroups);

  if (settingsLoading) {
    return {
      data: undefined,
      isLoading: true,
      error: undefined,
      refreshGroups,
    };
  }

  if (!isPaidEnterpriseFeaturesEnabled) {
    return {
      data: [],
      isLoading: false,
      error: undefined,
      refreshGroups,
    };
  }

  return {
    data,
    isLoading,
    error,
    refreshGroups,
  };
}


================================================
FILE: web/src/hooks/useImageDropzone.ts
================================================
"use client";

import { useCallback } from "react";
import { useDropzone, DropzoneOptions, FileRejection } from "react-dropzone";

const ACCEPTED_IMAGE_TYPES = {
  "image/png": [".png"],
  "image/jpeg": [".jpeg", ".jpg"],
};

export interface UseImageDropzoneOptions {
  /** Callback when a valid image file is dropped/selected */
  onImageAccepted: (file: File) => void;
  /** Callback when file is rejected (wrong type, too many files, etc.) */
  onImageRejected?: (rejections: FileRejection[]) => void;
  /** Whether dropzone is disabled */
  disabled?: boolean;
  /** Custom accepted file types - defaults to png, jpeg, jpg */
  accept?: DropzoneOptions["accept"];
}

export interface UseImageDropzoneReturn {
  /** Whether user is actively dragging files over the drop zone */
  isDragActive: boolean;
  /** Props to spread onto the drop zone container element */
  getRootProps: ReturnType<typeof useDropzone>["getRootProps"];
  /** Props to spread onto a hidden input element */
  getInputProps: ReturnType<typeof useDropzone>["getInputProps"];
  /** Programmatically open the file picker (for click-to-edit) */
  openFilePicker: () => void;
}

export function useImageDropzone({
  onImageAccepted,
  onImageRejected,
  disabled = false,
  accept = ACCEPTED_IMAGE_TYPES,
}: UseImageDropzoneOptions): UseImageDropzoneReturn {
  const onDrop = useCallback(
    (acceptedFiles: File[], rejections: FileRejection[]) => {
      if (rejections.length > 0) {
        onImageRejected?.(rejections);
        return;
      }

      const file = acceptedFiles[0];
      if (file) {
        onImageAccepted(file);
      }
    },
    [onImageAccepted, onImageRejected]
  );

  const { getRootProps, getInputProps, open, isDragActive } = useDropzone({
    onDrop,
    accept,
    multiple: false,
    disabled,
    noClick: true,
    noKeyboard: true,
  });

  return {
    isDragActive,
    getRootProps,
    getInputProps,
    openFilePicker: open,
  };
}


================================================
FILE: web/src/hooks/useIsDefaultAgent.ts
================================================
"use client";

import { useMemo } from "react";
import { useSearchParams } from "next/navigation";
import { SEARCH_PARAM_NAMES } from "@/app/app/services/searchParams";
import { CombinedSettings } from "@/interfaces/settings";
import { ChatSession } from "@/app/app/interfaces";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { DEFAULT_AGENT_ID } from "@/lib/constants";

/**
 * Determines if the current assistant is the default agent based on:
 * 1. Whether default agent is disabled in settings
 * 2. If URL has an agentId specified
 * 3. Based on the current chat session
 */
export default function useIsDefaultAgent({
  liveAgent,
  existingChatSessionId,
  selectedChatSession,
  settings,
}: {
  liveAgent: MinimalPersonaSnapshot | undefined;
  existingChatSessionId: string | null;
  selectedChatSession: ChatSession | undefined;
  settings: CombinedSettings | null;
}) {
  const searchParams = useSearchParams();
  const urlAssistantId = searchParams?.get(SEARCH_PARAM_NAMES.PERSONA_ID);

  return useMemo(() => {
    // If default agent is disabled, it can never be the default agent
    if (settings?.settings?.disable_default_assistant) {
      return false;
    }

    // If URL has an agentId, it's explicitly selected, not default
    if (
      urlAssistantId !== null &&
      urlAssistantId !== DEFAULT_AGENT_ID.toString()
    ) {
      return false;
    }

    // If there's an existing chat session with a persona_id, it's not default
    if (
      existingChatSessionId &&
      selectedChatSession?.persona_id !== DEFAULT_AGENT_ID
    ) {
      return false;
    }

    // If just on `/chat` page, it's the default agent
    return true;
  }, [
    settings?.settings?.disable_default_assistant,
    urlAssistantId,
    existingChatSessionId,
    selectedChatSession?.persona_id,
    liveAgent?.id,
  ]);
}


================================================
FILE: web/src/hooks/useKeyPress.ts
================================================
"use client";

import { useEffect } from "react";

export function useKeyPress(
  callback: () => void,
  key: string,
  enabled: boolean = true
) {
  useEffect(() => {
    if (!enabled) return;
    function handleKeyDown(event: KeyboardEvent) {
      if (event.key !== key) return;
      event.preventDefault();
      callback();
    }
    document.addEventListener("keydown", handleKeyDown);
    return () => {
      document.removeEventListener("keydown", handleKeyDown);
    };
  }, [callback, enabled, key]);
}

/**
 * Custom hook that listens for the "Escape" key and calls the provided callback.
 *
 * @param callback - Function to call when the Escape key is pressed
 * @param enabled - Optional boolean to enable/disable the hook (defaults to true)
 */
export function useEscape(callback: () => void, enabled: boolean = true) {
  useKeyPress(callback, "Escape", enabled);
}

/**
 * Custom hook that listens for the "Enter" key and calls the provided callback.
 *
 * @param callback - Function to call when the Enter key is pressed
 * @param enabled - Optional boolean to enable/disable the hook (defaults to true)
 */
export function useEnter(callback: () => void, enabled: boolean = true) {
  useKeyPress(callback, "Enter", enabled);
}


================================================
FILE: web/src/hooks/useLLMProviders.ts
================================================
"use client";

import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";
import {
  LLMProviderDescriptor,
  LLMProviderResponse,
  LLMProviderView,
  WellKnownLLMProviderDescriptor,
} from "@/interfaces/llm";

/**
 * Fetches configured LLM providers accessible to the current user.
 *
 * Hits the **non-admin** endpoints which return `LLMProviderDescriptor`
 * (no `id` or sensitive fields like `api_key`). Use this hook in
 * user-facing UI (chat, popovers, onboarding) where you need the list
 * of providers and their visible models but don't need admin-level details.
 *
 * The backend wraps the provider list in an `LLMProviderResponse` envelope
 * that also carries the global default text and vision models. This hook
 * unwraps `.providers` for convenience while still exposing the defaults.
 *
 * **Endpoints:**
 * - No `personaId` → `GET /api/llm/provider`
 *   Returns all public providers plus restricted providers the user can
 *   access via group membership.
 * - With `personaId` → `GET /api/llm/persona/{personaId}/providers`
 *   Returns providers scoped to a specific persona, respecting RBAC
 *   restrictions. Use this when displaying model options for a particular
 *   assistant.
 *
 * @param personaId - Optional persona ID for RBAC-scoped providers.
 *
 * @returns
 * - `llmProviders` — The array of provider descriptors, or `undefined`
 *    while loading.
 * - `defaultText` — The global (or persona-overridden) default text model.
 * - `defaultVision` — The global (or persona-overridden) default vision model.
 * - `isLoading` — `true` until the first successful response or error.
 * - `error` — The SWR error object, if any.
 * - `refetch` — SWR `mutate` function to trigger a revalidation.
 */
export function useLLMProviders(personaId?: number) {
  const url =
    personaId !== undefined
      ? SWR_KEYS.llmProvidersForPersona(personaId)
      : SWR_KEYS.llmProviders;

  const { data, error, mutate } = useSWR<
    LLMProviderResponse<LLMProviderDescriptor>
  >(url, errorHandlingFetcher, {
    revalidateOnFocus: false,
    revalidateIfStale: false,
    dedupingInterval: 60000,
  });

  return {
    llmProviders: data?.providers,
    defaultText: data?.default_text ?? null,
    defaultVision: data?.default_vision ?? null,
    isLoading: !error && !data,
    error,
    refetch: mutate,
  };
}

/**
 * Fetches configured LLM providers via the **admin** endpoint.
 *
 * Hits `GET /api/admin/llm/provider` which returns `LLMProviderView` —
 * the full provider object including `id`, `api_key` (masked),
 * group/persona assignments, and all other admin-visible fields.
 *
 * Use this hook on admin pages (e.g. the LLM Configuration page) where
 * you need provider IDs for mutations (setting defaults, editing, deleting)
 * or need to display admin-only metadata. **Do not use in user-facing UI**
 * — use `useLLMProviders` instead.
 *
 * @returns
 * - `llmProviders` — The array of full provider views, or `undefined`
 *    while loading.
 * - `defaultText` — The global default text model.
 * - `defaultVision` — The global default vision model.
 * - `isLoading` — `true` until the first successful response or error.
 * - `error` — The SWR error object, if any.
 * - `refetch` — SWR `mutate` function to trigger a revalidation.
 */
export function useAdminLLMProviders() {
  const { data, error, mutate } = useSWR<LLMProviderResponse<LLMProviderView>>(
    SWR_KEYS.adminLlmProviders,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    llmProviders: data?.providers,
    defaultText: data?.default_text ?? null,
    defaultVision: data?.default_vision ?? null,
    isLoading: !error && !data,
    error,
    refetch: mutate,
  };
}

/**
 * Fetches the catalog of well-known (built-in) LLM providers.
 *
 * Hits `GET /api/admin/llm/built-in/options` which returns the static
 * list of provider descriptors that Onyx ships with out of the box
 * (OpenAI, Anthropic, Vertex AI, Bedrock, Azure, Ollama, OpenRouter,
 * etc.). Each descriptor includes the provider's known models and the
 * recommended default model.
 *
 * Used primarily on the LLM Configuration page and onboarding flows
 * to show which providers are available to set up, and to pre-populate
 * model lists before the user has entered credentials.
 *
 * @returns
 * - `wellKnownLLMProviders` — The array of built-in provider descriptors,
 *    or `null` while loading.
 * - `isLoading` — `true` until the first successful response or error.
 * - `error` — The SWR error object, if any.
 * - `mutate` — SWR `mutate` function to trigger a revalidation.
 */
/**
 * Fetches the descriptor for a single well-known (built-in) LLM provider.
 *
 * Hits `GET /api/admin/llm/built-in/options/{providerEndpoint}` which returns
 * the provider descriptor including its known models and the recommended
 * default model.
 *
 * Used inside individual provider modals to pre-populate model lists
 * before the user has entered credentials.
 *
 * @param providerEndpoint - The provider's API endpoint name (e.g. "openai", "anthropic").
 *   Pass `null` to suppress the request.
 */
export function useWellKnownLLMProvider(providerEndpoint: string | null) {
  const { data, error, isLoading } = useSWR<WellKnownLLMProviderDescriptor>(
    providerEndpoint ? SWR_KEYS.wellKnownLlmProvider(providerEndpoint) : null,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    wellKnownLLMProvider: data ?? null,
    isLoading,
    error,
  };
}

export function useWellKnownLLMProviders() {
  const {
    data: wellKnownLLMProviders,
    error,
    isLoading,
    mutate,
  } = useSWR<WellKnownLLMProviderDescriptor[]>(
    SWR_KEYS.wellKnownLlmProviders,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    wellKnownLLMProviders: wellKnownLLMProviders ?? null,
    isLoading,
    error,
    mutate,
  };
}


================================================
FILE: web/src/hooks/useLicense.ts
================================================
import useSWR from "swr";

import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { LicenseStatus } from "@/lib/billing/interfaces";
import { SWR_KEYS } from "@/lib/swr-keys";

/**
 * Hook to fetch license status for self-hosted deployments.
 *
 * Skips the fetch on cloud deployments (uses tenant auth instead).
 */
export function useLicense() {
  const url = NEXT_PUBLIC_CLOUD_ENABLED ? null : SWR_KEYS.license;

  const { data, error, mutate, isLoading } = useSWR<LicenseStatus>(
    url,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateOnReconnect: false,
      revalidateIfStale: false,
      dedupingInterval: 30000,
      shouldRetryOnError: false,
      keepPreviousData: true,
    }
  );

  if (!url) {
    return {
      data: undefined,
      isLoading: false,
      error: undefined,
      refresh: () => Promise.resolve(undefined),
    };
  }

  return { data, isLoading, error, refresh: mutate };
}


================================================
FILE: web/src/hooks/useMcpServers.ts
================================================
"use client";

import { errorHandlingFetcher } from "@/lib/fetcher";
import { MCPServersResponse } from "@/lib/tools/interfaces";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";

/**
 * Fetches MCP (Model Context Protocol) servers configuration.
 *
 * MCP servers provide additional tools and capabilities to agents through
 * the Model Context Protocol.
 *
 * @returns Object containing:
 *   - mcpData: MCPServersResponse data or null if not loaded
 *   - isLoading: Boolean indicating if data is being fetched
 *   - error: Any error that occurred during fetch
 *   - mutateMcpServers: Function to manually revalidate the data
 *
 * @example
 * const { mcpData, isLoading } = useMcpServers();
 * if (isLoading) return <Spinner />;
 * return <MCPServersList servers={mcpData} />;
 */
export default function useMcpServers() {
  const {
    data: mcpData,
    error,
    isLoading: isMcpLoading,
    mutate: mutateMcpServers,
  } = useSWR<MCPServersResponse>(
    SWR_KEYS.adminMcpServers,
    errorHandlingFetcher
  );

  return {
    mcpData: mcpData ?? null,
    isLoading: isMcpLoading,
    error,
    mutateMcpServers,
  };
}


================================================
FILE: web/src/hooks/useMcpServersForAgentEditor.ts
================================================
"use client";

import { errorHandlingFetcher } from "@/lib/fetcher";
import { MCPServersResponse } from "@/lib/tools/interfaces";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";

/**
 * Fetch MCP servers for non-admin UIs (e.g. agent editor).
 *
 * This endpoint is available to all authenticated users so basic users can
 * attach MCP actions to assistants.
 */
export default function useMcpServersForAgentEditor() {
  const {
    data: mcpData,
    error,
    isLoading: isMcpLoading,
    mutate: mutateMcpServers,
  } = useSWR<MCPServersResponse>(SWR_KEYS.mcpServers, errorHandlingFetcher);

  return {
    mcpData: mcpData ?? null,
    isLoading: isMcpLoading,
    error,
    mutateMcpServers,
  };
}


================================================
FILE: web/src/hooks/useMemoryManager.ts
================================================
import { useRef, useCallback, useEffect, useState } from "react";
import { MemoryItem } from "@/lib/types";

export interface LocalMemory {
  id: number;
  content: string;
  isNew: boolean;
}

export const MAX_MEMORY_LENGTH = 200;
export const MAX_MEMORY_COUNT = 10;

interface UseMemoryManagerArgs {
  memories: MemoryItem[];
  onSaveMemories: (memories: MemoryItem[]) => Promise<boolean>;
  onNotify: (message: string, type: "success" | "error") => void;
}

export function useMemoryManager({
  memories,
  onSaveMemories,
  onNotify,
}: UseMemoryManagerArgs) {
  const [localMemories, setLocalMemories] = useState<LocalMemory[]>([]);
  const [searchQuery, setSearchQuery] = useState("");
  const initialMemoriesRef = useRef<MemoryItem[]>([]);
  const isSavingRef = useRef(false);

  // Initialize local memories from props
  useEffect(() => {
    const existingMemories: LocalMemory[] = memories.map((mem, index) => ({
      id: mem.id ?? -(index + 1),
      content: mem.content,
      isNew: mem.id === null,
    }));

    setLocalMemories((prev) => {
      const emptyNewItems = prev.filter((m) => m.isNew && !m.content.trim());
      const availableSlots = MAX_MEMORY_COUNT - existingMemories.length;
      return [
        ...emptyNewItems.slice(0, Math.max(0, availableSlots)),
        ...existingMemories,
      ];
    });
    initialMemoriesRef.current = memories;
  }, [memories]);

  const canAddMemory = localMemories.length < MAX_MEMORY_COUNT;

  const handleAddMemory = useCallback((): number | null => {
    if (localMemories.length >= MAX_MEMORY_COUNT) {
      return null;
    }

    const existingEmpty = localMemories.find(
      (m) => m.isNew && !m.content.trim()
    );
    if (existingEmpty) {
      return existingEmpty.id;
    }

    // Save any unsaved new item with content before creating a new one
    const unsavedNewItem = localMemories.find(
      (m) => m.isNew && m.content.trim()
    );
    if (unsavedNewItem && !isSavingRef.current) {
      const newMemories: MemoryItem[] = localMemories
        .filter((m) => m.content.trim())
        .map((m) => ({ id: m.isNew ? null : m.id, content: m.content }));

      const memoriesChanged =
        JSON.stringify(newMemories) !==
        JSON.stringify(initialMemoriesRef.current);

      if (memoriesChanged) {
        isSavingRef.current = true;
        onSaveMemories(newMemories).then((success) => {
          isSavingRef.current = false;
          if (success) {
            initialMemoriesRef.current = newMemories;
            onNotify("Memory saved", "success");
          } else {
            onNotify("Failed to save memory", "error");
          }
        });
      }
    }

    const newId = Date.now();
    setLocalMemories((prev) => [
      { id: newId, content: "", isNew: true },
      ...prev,
    ]);
    return newId;
  }, [localMemories, onSaveMemories, onNotify]);

  const handleUpdateMemory = useCallback((index: number, value: string) => {
    setLocalMemories((prev) =>
      prev.map((memory, i) =>
        i === index ? { ...memory, content: value } : memory
      )
    );
  }, []);

  const handleRemoveMemory = useCallback(
    async (index: number) => {
      const memory = localMemories[index];
      if (!memory) return;

      if (memory.isNew) {
        setLocalMemories((prev) => prev.filter((_, i) => i !== index));
        return;
      }

      const newMemories: MemoryItem[] = localMemories
        .filter((_, i) => i !== index)
        .filter((m) => !m.isNew || m.content.trim())
        .map((m) => ({ id: m.isNew ? null : m.id, content: m.content }));

      const success = await onSaveMemories(newMemories);
      if (success) {
        onNotify("Memory deleted", "success");
      } else {
        onNotify("Failed to delete memory", "error");
      }
    },
    [localMemories, onSaveMemories, onNotify]
  );

  const handleBlurMemory = useCallback(
    async (index: number) => {
      const memory = localMemories[index];
      if (!memory || !memory.content.trim()) return;
      if (isSavingRef.current) return;

      const newMemories: MemoryItem[] = localMemories
        .filter((m) => m.content.trim())
        .map((m) => ({ id: m.isNew ? null : m.id, content: m.content }));

      const memoriesChanged =
        JSON.stringify(newMemories) !==
        JSON.stringify(initialMemoriesRef.current);

      if (!memoriesChanged) return;

      isSavingRef.current = true;
      const success = await onSaveMemories(newMemories);
      isSavingRef.current = false;
      if (success) {
        initialMemoriesRef.current = newMemories;
        onNotify("Memory saved", "success");
      } else {
        onNotify("Failed to save memory", "error");
      }
    },
    [localMemories, onSaveMemories, onNotify]
  );

  const filteredMemories = localMemories
    .map((memory, originalIndex) => ({ memory, originalIndex }))
    .filter(({ memory }) => {
      if (!searchQuery.trim()) return true;
      return memory.content
        .toLowerCase()
        .includes(searchQuery.trim().toLowerCase());
    });

  const totalLineCount = localMemories.filter(
    (m) => m.content.trim() || m.isNew
  ).length;

  return {
    localMemories,
    searchQuery,
    setSearchQuery,
    filteredMemories,
    totalLineCount,
    canAddMemory,
    handleAddMemory,
    handleUpdateMemory,
    handleRemoveMemory,
    handleBlurMemory,
  };
}


================================================
FILE: web/src/hooks/useOnMount.ts
================================================
"use client";

import { useEffect, useState } from "react";

/**
 * Hook that tracks whether the component has mounted on the client.
 *
 * Useful for avoiding hydration mismatches in SSR/SSG environments where
 * certain browser-only APIs (e.g., `window`, `localStorage`) are unavailable
 * on the server. By checking `isMounted`, you can defer rendering of
 * client-only content until after hydration.
 *
 * @param f - Optional callback to execute once on mount. This allows you to
 *            run initialization logic (e.g., setting up event listeners,
 *            fetching initial data) without needing a separate `useEffect`
 *            in the consuming component.
 * @returns `true` after the component has mounted, `false` during SSR and
 *          initial render.
 *
 * @example
 * ```tsx
 * function MyComponent() {
 *   const isMounted = useOnMount(() => {
 *     console.log("Component mounted!");
 *   });
 *
 *   if (!isMounted) return null; // or a loading skeleton
 *
 *   return <div>Client-only content using window.innerWidth</div>;
 * }
 * ```
 */
export default function useOnMount(f?: React.EffectCallback): boolean {
  const [mounted, setMounted] = useState(false);

  useEffect(() => {
    setMounted(true);
    return f?.();
  }, []);

  return mounted;
}


================================================
FILE: web/src/hooks/useOpenApiTools.ts
================================================
"use client";

import { errorHandlingFetcher } from "@/lib/fetcher";
import { ToolSnapshot } from "@/lib/tools/interfaces";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";

/**
 * Fetches OpenAPI tools configuration.
 *
 * OpenAPI tools provide custom actions and integrations to agents through
 * OpenAPI specifications.
 *
 * @returns Object containing:
 *   - openApiTools: ToolSnapshot[] data or null if not loaded
 *   - isLoading: Boolean indicating if data is being fetched
 *   - error: Any error that occurred during fetch
 *   - mutateOpenApiTools: Function to manually revalidate the data
 *
 * @example
 * const { openApiTools, isLoading } = useOpenApiTools();
 * if (isLoading) return <Spinner />;
 * return <OpenApiToolsList tools={openApiTools} />;
 */
export default function useOpenApiTools() {
  const {
    data: openApiTools,
    error,
    isLoading: isOpenApiLoading,
    mutate: mutateOpenApiTools,
  } = useSWR<ToolSnapshot[]>(SWR_KEYS.openApiTools, errorHandlingFetcher);

  return {
    openApiTools: openApiTools ?? null,
    isLoading: isOpenApiLoading,
    error,
    mutateOpenApiTools,
  };
}


================================================
FILE: web/src/hooks/usePaginatedFetch.ts
================================================
"use client";

import { useCallback, useEffect, useState, useRef, useMemo } from "react";
import { usePathname, useRouter, useSearchParams } from "next/navigation";
import type { Route } from "next";

import { errorHandlingFetcher } from "@/lib/fetcher";

// Any type that has an id property
type PaginatedType = {
  id: number | string;
  [key: string]: any;
};

interface PaginatedApiResponse<T extends PaginatedType> {
  items: T[];
  total_items: number;
}

interface PaginationConfig {
  itemsPerPage: number;
  pagesPerBatch: number;
  endpoint: string;
  query?: string;
  filter?: Record<string, string | boolean | number | string[] | Date>;
  refreshIntervalInMs?: number;
}

interface PaginatedHookReturnData<T extends PaginatedType> {
  currentPageData: T[] | null;
  isLoading: boolean;
  error: Error | null;
  currentPage: number;
  totalPages: number;
  totalItems: number;
  goToPage: (page: number) => void;
  refresh: () => Promise<void>;
}

function usePaginatedFetch<T extends PaginatedType>({
  itemsPerPage,
  pagesPerBatch,
  endpoint,
  query,
  filter,
  refreshIntervalInMs = 5000,
}: PaginationConfig): PaginatedHookReturnData<T> {
  const router = useRouter();
  const currentPath = usePathname();
  const searchParams = useSearchParams();

  // State to initialize and hold the current page number
  const [currentPage, setCurrentPage] = useState(() =>
    parseInt(searchParams?.get("page") || "1", 10)
  );
  const [currentPageData, setCurrentPageData] = useState<T[] | null>(null);
  const [error, setError] = useState<Error | null>(null);
  const [isLoading, setIsLoading] = useState<boolean>(false);
  const [totalItems, setTotalItems] = useState<number>(0);
  const [cachedBatches, setCachedBatches] = useState<{ [key: number]: T[][] }>(
    {}
  );

  // Tracks ongoing requests to avoid duplicate requests, uses ref to persist across renders
  const ongoingRequestsRef = useRef<Set<number>>(new Set());

  const totalPages = useMemo(() => {
    if (totalItems === 0) return 1;
    return Math.ceil(totalItems / itemsPerPage);
  }, [totalItems, itemsPerPage]);

  // Calculates which batch we're in, and which page within that batch
  const batchAndPageIndices = useMemo(() => {
    const batchNum = Math.floor((currentPage - 1) / pagesPerBatch);
    const batchPageNum = (currentPage - 1) % pagesPerBatch;
    return { batchNum, batchPageNum };
  }, [currentPage, pagesPerBatch]);

  // Fetches a batch of data and stores it in the cache
  const fetchBatchData = useCallback(
    async (batchNum: number) => {
      // Prevents duplicate requests
      if (ongoingRequestsRef.current.has(batchNum)) {
        return;
      }
      ongoingRequestsRef.current.add(batchNum);

      try {
        // Build query params
        const params = new URLSearchParams({
          page_num: batchNum.toString(),
          page_size: (pagesPerBatch * itemsPerPage).toString(),
        });

        if (query) params.set("q", query);

        if (filter) {
          for (const [key, value] of Object.entries(filter)) {
            if (Array.isArray(value)) {
              value.forEach((str) => params.append(key, str));
            } else {
              params.set(key, value.toString());
            }
          }
        }

        const url = `${endpoint}?${params.toString()}`;
        const responseData =
          await errorHandlingFetcher<PaginatedApiResponse<T>>(url);

        // Validate response data structure
        if (
          !Array.isArray(
            responseData.items || typeof responseData.total_items !== "number"
          )
        ) {
          throw new Error(
            "Sorry, we encountered an issue with the data format. Please try again or contact support if the problem persists."
          );
        }

        setTotalItems(responseData.total_items);

        // Splits a batch into pages
        const pagesInBatch = Array.from({ length: pagesPerBatch }, (_, i) => {
          const startIndex = i * itemsPerPage;
          return responseData.items.slice(
            startIndex,
            startIndex + itemsPerPage
          );
        });

        setCachedBatches((prev) => ({
          ...prev,
          [batchNum]: pagesInBatch,
        }));
      } catch (error) {
        setError(error instanceof Error ? error : new Error(String(error)));
      } finally {
        ongoingRequestsRef.current.delete(batchNum);
      }
    },
    [endpoint, pagesPerBatch, itemsPerPage, query, filter]
  );

  // Updates the URL with the current page number
  const updatePageUrl = useCallback(
    (page: number) => {
      if (currentPath && searchParams) {
        const params = new URLSearchParams(searchParams);
        params.set("page", page.toString());
        router.replace(`${currentPath}?${params.toString()}` as Route, {
          scroll: false,
        });
      }
    },
    [currentPath, router, searchParams]
  );

  // Updates the current page
  const goToPage = useCallback(
    (newPage: number) => {
      setCurrentPage(newPage);
      updatePageUrl(newPage);
    },
    [updatePageUrl]
  );

  // Loads the current and adjacent batches
  useEffect(() => {
    const { batchNum } = batchAndPageIndices;
    const nextBatchNum = batchNum + 1;
    const prevBatchNum = Math.max(batchNum - 1, 0);

    if (!cachedBatches[batchNum]) {
      setIsLoading(true);
      fetchBatchData(batchNum);
    }

    // Possible total number of items including the next batch
    const totalItemsIncludingNextBatch =
      nextBatchNum * pagesPerBatch * itemsPerPage;
    // Preload next batch if we're not on the last batch
    if (
      totalItemsIncludingNextBatch <= totalItems &&
      !cachedBatches[nextBatchNum]
    ) {
      fetchBatchData(nextBatchNum);
    }

    // Load previous batch if missing
    if (!cachedBatches[prevBatchNum]) {
      fetchBatchData(prevBatchNum);
    }

    // Ensure first batch is always loaded
    if (!cachedBatches[0]) {
      fetchBatchData(0);
    }
  }, [currentPage, cachedBatches, totalPages, pagesPerBatch, fetchBatchData]);

  // Updates current page data from the cache
  useEffect(() => {
    const { batchNum, batchPageNum } = batchAndPageIndices;

    const cachedBatch = cachedBatches[batchNum];
    if (cachedBatch !== undefined) {
      const cachedBatchPage = cachedBatch[batchPageNum];
      if (cachedBatchPage !== undefined) {
        setCurrentPageData(cachedBatchPage);
        setIsLoading(false);
      }
    }
  }, [currentPage, cachedBatches, pagesPerBatch]);

  // Implements periodic refresh
  useEffect(() => {
    if (!refreshIntervalInMs) return;

    const interval = setInterval(() => {
      const { batchNum } = batchAndPageIndices;
      fetchBatchData(batchNum);
    }, refreshIntervalInMs);

    return () => clearInterval(interval);
  }, [currentPage, pagesPerBatch, refreshIntervalInMs, fetchBatchData]);

  // Manually refreshes the current batch
  const refresh = useCallback(async () => {
    const { batchNum } = batchAndPageIndices;
    await fetchBatchData(batchNum);
  }, [currentPage, pagesPerBatch, fetchBatchData]);

  // Cache invalidation
  useEffect(() => {
    setCachedBatches({});
    setTotalItems(0);
    goToPage(1);
    setError(null);
  }, [currentPath, query, filter]);

  return {
    currentPage,
    currentPageData,
    totalPages,
    totalItems,
    goToPage,
    refresh,
    isLoading,
    error,
  };
}

export default usePaginatedFetch;


================================================
FILE: web/src/hooks/usePromptShortcuts.ts
================================================
"use client";

import useSWR from "swr";
import { InputPrompt } from "@/app/app/interfaces";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";

export default function usePromptShortcuts() {
  const { data, error, isLoading, mutate } = useSWR<InputPrompt[]>(
    SWR_KEYS.promptShortcuts,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  const promptShortcuts = data ?? [];
  const userPromptShortcuts = promptShortcuts.filter((p) => !p.is_public);
  const activePromptShortcuts = promptShortcuts.filter((p) => p.active);

  return {
    promptShortcuts,
    userPromptShortcuts,
    activePromptShortcuts,
    isLoading,
    error,
    refresh: mutate,
  };
}


================================================
FILE: web/src/hooks/useScimToken.ts
================================================
import useSWR from "swr";

import { errorHandlingFetcher } from "@/lib/fetcher";
import type { ScimTokenResponse } from "@/app/admin/scim/interfaces";
import { SWR_KEYS } from "@/lib/swr-keys";

export function useScimToken() {
  const { data, error, isLoading, mutate } = useSWR<ScimTokenResponse>(
    SWR_KEYS.scimToken,
    errorHandlingFetcher,
    { shouldRetryOnError: false }
  );

  return { data, error, isLoading, mutate };
}


================================================
FILE: web/src/hooks/useScreenSize.ts
================================================
"use client";

import {
  DESKTOP_SMALL_BREAKPOINT_PX,
  DESKTOP_MEDIUM_BREAKPOINT_PX,
  MOBILE_SIDEBAR_BREAKPOINT_PX,
} from "@/lib/constants";
import { useState, useCallback } from "react";
import useOnMount from "@/hooks/useOnMount";

export interface ScreenSize {
  height: number;
  width: number;
  isMobile: boolean;
  isSmallScreen: boolean;
  isMediumScreen: boolean;
}

export default function useScreenSize(): ScreenSize {
  const [sizes, setSizes] = useState(() => ({
    width: typeof window !== "undefined" ? window.innerWidth : 0,
    height: typeof window !== "undefined" ? window.innerHeight : 0,
  }));

  const handleResize = useCallback(() => {
    setSizes({
      width: window.innerWidth,
      height: window.innerHeight,
    });
  }, []);

  const isMounted = useOnMount(() => {
    window.addEventListener("resize", handleResize);
    return () => window.removeEventListener("resize", handleResize);
  });

  const isMobile = sizes.width <= MOBILE_SIDEBAR_BREAKPOINT_PX;
  const isSmall = sizes.width <= DESKTOP_SMALL_BREAKPOINT_PX;
  const isMedium = sizes.width <= DESKTOP_MEDIUM_BREAKPOINT_PX;

  return {
    height: sizes.height,
    width: sizes.width,
    isMobile: isMounted && isMobile,
    isSmallScreen: isMounted && isSmall,
    isMediumScreen: isMounted && isMedium,
  };
}


================================================
FILE: web/src/hooks/useServerTools.ts
================================================
"use client";

import useSWR, { KeyedMutator } from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { getActionIcon } from "@/lib/tools/mcpUtils";
import { MCPServer, MCPTool, ToolSnapshot } from "@/lib/tools/interfaces";

/**
 * Return type for the useServerTools hook
 */
interface UseServerToolsReturn {
  /** Array of tools available for the MCP server, formatted for UI display */
  tools: MCPTool[];

  /** Loading state - true when fetching tools from the API */
  isLoading: boolean;

  /** Error object if the fetch failed, undefined otherwise */
  error: Error | undefined;

  /** SWR mutate function for manually revalidating or updating the tools cache */
  mutate: KeyedMutator<ToolSnapshot[]>;
}

/**
 * useServerTools
 *
 * A custom hook for lazily loading and managing tools for a specific MCP server.
 * This hook only fetches tools when the server is expanded, reducing unnecessary
 * API calls and improving performance.
 *
 * @param server - The MCP server object containing server metadata (id, url, name)
 * @param isExpanded - Boolean flag indicating whether the server card is expanded.
 *                     Tools are only fetched when this is true.
 *
 * @returns An object containing:
 *   - tools: Array of MCPTool objects formatted for UI display
 *   - isLoading: Boolean indicating if tools are currently being fetched
 *   - error: Error object if fetch failed
 *   - mutate: Function to manually revalidate or update the tools cache
 *
 * @example
 * ```tsx
 * function ServerCard({ server }) {
 *   const [isExpanded, setIsExpanded] = useState(false);
 *   const { tools, isLoading, error, mutate } = useServerTools(server, isExpanded);
 *
 *   if (isLoading) return <div>Loading tools...</div>;
 *   if (error) return <div>Failed to load tools</div>;
 *
 *   return (
 *     <div>
 *       <button onClick={() => setIsExpanded(!isExpanded)}>
 *         {isExpanded ? 'Collapse' : 'Expand'}
 *       </button>
 *       {isExpanded && tools.map(tool => (
 *         <ToolItem key={tool.id} {...tool} />
 *       ))}
 *     </div>
 *   );
 * }
 * ```
 *
 * @remarks
 * - Uses SWR for caching and automatic revalidation
 * - Automatically converts ToolSnapshot[] from API to MCPTool[] for UI
 * - Revalidation on focus and reconnect are disabled to reduce API calls
 * - The hook will not fetch if isExpanded is false (lazy loading)
 */
export default function useServerTools(
  server: MCPServer,
  isExpanded: boolean
): UseServerToolsReturn {
  const shouldFetch = isExpanded;

  const {
    data: toolsData,
    isLoading,
    error,
    mutate,
  } = useSWR<ToolSnapshot[]>(
    shouldFetch
      ? `/api/admin/mcp/server/${server.id}/tools/snapshots?source=db`
      : null,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateOnReconnect: false,
    }
  );

  // Convert ToolSnapshot[] to MCPTool[] format for UI consumption
  const tools: MCPTool[] = toolsData
    ? toolsData.map((tool) => ({
        id: tool.id.toString(),
        icon: getActionIcon(server.server_url, server.name),
        name: tool.display_name || tool.name,
        description: tool.description,
        isAvailable: true,
        isEnabled: tool.enabled,
      }))
    : [];

  return {
    tools,
    isLoading: isLoading && shouldFetch,
    error,
    mutate,
  };
}


================================================
FILE: web/src/hooks/useSettings.test.ts
================================================
import useSWR from "swr";
import {
  useSettings,
  useEnterpriseSettings,
  useCustomAnalyticsScript,
} from "@/hooks/useSettings";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ApplicationStatus, QueryHistoryType } from "@/interfaces/settings";

jest.mock("swr", () => ({
  __esModule: true,
  default: jest.fn(),
}));

jest.mock("@/lib/fetcher", () => ({
  errorHandlingFetcher: jest.fn(),
}));

jest.mock("@/lib/constants", () => ({
  EE_ENABLED: false,
}));

const mockUseSWR = useSWR as jest.MockedFunction<typeof useSWR>;

describe("useSettings", () => {
  beforeEach(() => {
    mockUseSWR.mockReset();
  });

  test("returns DEFAULT_SETTINGS when SWR data is undefined", () => {
    mockUseSWR.mockReturnValue({
      data: undefined,
      error: undefined,
      isLoading: true,
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    const result = useSettings();

    expect(result.settings).toEqual({
      auto_scroll: true,
      application_status: ApplicationStatus.ACTIVE,
      gpu_enabled: false,
      maximum_chat_retention_days: null,
      notifications: [],
      needs_reindexing: false,
      anonymous_user_enabled: false,
      invite_only_enabled: false,
      deep_research_enabled: true,
      temperature_override_enabled: true,
      query_history_type: QueryHistoryType.NORMAL,
    });
    expect(result.isLoading).toBe(true);
  });

  test("returns fetched settings when SWR has data", () => {
    const mockSettings = {
      auto_scroll: false,
      application_status: ApplicationStatus.ACTIVE,
      gpu_enabled: true,
      maximum_chat_retention_days: 30,
      notifications: [],
      needs_reindexing: false,
      anonymous_user_enabled: false,
      invite_only_enabled: false,
      deep_research_enabled: true,
      temperature_override_enabled: true,
      query_history_type: QueryHistoryType.NORMAL,
    };

    mockUseSWR.mockReturnValue({
      data: mockSettings,
      error: undefined,
      isLoading: false,
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    const result = useSettings();

    expect(result.settings).toBe(mockSettings);
    expect(result.isLoading).toBe(false);
    expect(result.error).toBeUndefined();
  });

  test("fetches from /api/settings with correct SWR config", () => {
    mockUseSWR.mockReturnValue({
      data: undefined,
      error: undefined,
      isLoading: true,
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    useSettings();

    expect(mockUseSWR).toHaveBeenCalledWith(
      "/api/settings",
      errorHandlingFetcher,
      expect.objectContaining({
        revalidateOnFocus: false,
        revalidateOnReconnect: false,
        dedupingInterval: 30_000,
        errorRetryInterval: 5_000,
      })
    );
  });
});

describe("useEnterpriseSettings", () => {
  beforeEach(() => {
    mockUseSWR.mockReset();
  });

  test("passes null key when EE is disabled at both build and runtime", () => {
    mockUseSWR.mockReturnValue({
      data: undefined,
      error: undefined,
      isLoading: false,
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    const result = useEnterpriseSettings(false);

    expect(mockUseSWR).toHaveBeenCalledWith(
      null,
      errorHandlingFetcher,
      expect.any(Object)
    );
    expect(result.enterpriseSettings).toBeNull();
    expect(result.isLoading).toBe(false);
  });

  test("fetches from /api/enterprise-settings when runtime EE is enabled", () => {
    mockUseSWR.mockReturnValue({
      data: undefined,
      error: undefined,
      isLoading: true,
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    useEnterpriseSettings(true);

    expect(mockUseSWR).toHaveBeenCalledWith(
      "/api/enterprise-settings",
      errorHandlingFetcher,
      expect.any(Object)
    );
  });

  test("uses referential equality for compare to ensure logo cache-busters update", () => {
    mockUseSWR.mockReturnValue({
      data: undefined,
      error: undefined,
      isLoading: true,
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    useEnterpriseSettings(true);

    const swrConfig = mockUseSWR.mock.calls[0]![2] as any;
    expect(swrConfig.compare).toBeDefined();

    // Same reference should be equal
    const obj = { use_custom_logo: true };
    expect(swrConfig.compare(obj, obj)).toBe(true);

    // Different references with same values should NOT be equal
    // (this is the key behavior — SWR's default deep compare would return true)
    const a = { use_custom_logo: true };
    const b = { use_custom_logo: true };
    expect(swrConfig.compare(a, b)).toBe(false);
  });

  test("returns enterprise settings when SWR has data", () => {
    const mockEnterprise = {
      application_name: "Acme Corp",
      use_custom_logo: true,
    };

    mockUseSWR.mockReturnValue({
      data: mockEnterprise,
      error: undefined,
      isLoading: false,
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    const result = useEnterpriseSettings(true);

    expect(result.enterpriseSettings).toBe(mockEnterprise);
    expect(result.isLoading).toBe(false);
  });
});

describe("useCustomAnalyticsScript", () => {
  beforeEach(() => {
    mockUseSWR.mockReset();
  });

  test("returns null when EE is disabled", () => {
    mockUseSWR.mockReturnValue({
      data: undefined,
      error: undefined,
      isLoading: false,
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    const result = useCustomAnalyticsScript(false);

    expect(mockUseSWR).toHaveBeenCalledWith(
      null,
      errorHandlingFetcher,
      expect.any(Object)
    );
    expect(result).toBeNull();
  });

  test("returns script content when available", () => {
    const script = "console.log('analytics');";
    mockUseSWR.mockReturnValue({
      data: script,
      error: undefined,
      isLoading: false,
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    const result = useCustomAnalyticsScript(true);

    expect(result).toBe(script);
  });
});


================================================
FILE: web/src/hooks/useSettings.ts
================================================
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";
import {
  Settings,
  EnterpriseSettings,
  ApplicationStatus,
  QueryHistoryType,
} from "@/interfaces/settings";
import { EE_ENABLED } from "@/lib/constants";

// Longer retry delay for critical settings fetches — avoids rapid error→success
// flicker in the SettingsProvider error boundary when there's a transient blip.
const SETTINGS_ERROR_RETRY_INTERVAL = 5_000;

const DEFAULT_SETTINGS = {
  auto_scroll: true,
  application_status: ApplicationStatus.ACTIVE,
  gpu_enabled: false,
  maximum_chat_retention_days: null,
  notifications: [],
  needs_reindexing: false,
  anonymous_user_enabled: false,
  invite_only_enabled: false,
  deep_research_enabled: true,
  temperature_override_enabled: true,
  query_history_type: QueryHistoryType.NORMAL,
} satisfies Settings;

export function useSettings(): {
  settings: Settings;
  isLoading: boolean;
  error: Error | undefined;
} {
  const { data, error, isLoading } = useSWR<Settings>(
    SWR_KEYS.settings,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateOnReconnect: false,
      revalidateIfStale: false,
      dedupingInterval: 30_000,
      errorRetryInterval: SETTINGS_ERROR_RETRY_INTERVAL,
    }
  );

  return {
    settings: data ?? DEFAULT_SETTINGS,
    isLoading,
    error,
  };
}

export function useEnterpriseSettings(eeEnabledRuntime: boolean): {
  enterpriseSettings: EnterpriseSettings | null;
  isLoading: boolean;
  error: Error | undefined;
} {
  // Gate on the build-time flag OR the runtime ee_features_enabled from
  // /api/settings. The build-time flag (NEXT_PUBLIC_ENABLE_PAID_EE_FEATURES)
  // may be unset even when the server enables EE via LICENSE_ENFORCEMENT_ENABLED,
  // so the runtime check is needed as a fallback.
  const shouldFetch = EE_ENABLED || eeEnabledRuntime;

  const { data, error, isLoading } = useSWR<EnterpriseSettings>(
    shouldFetch ? SWR_KEYS.enterpriseSettings : null,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateOnReconnect: false,
      revalidateIfStale: false,
      dedupingInterval: 30_000,
      errorRetryInterval: SETTINGS_ERROR_RETRY_INTERVAL,
      // Referential equality instead of SWR's default deep comparison.
      // The logo image can change without the settings JSON changing
      // (same use_custom_logo: true), so we need every mutate() call
      // to propagate a new reference so cache-busters recalculate.
      compare: (a, b) => a === b,
    }
  );

  return {
    enterpriseSettings: data ?? null,
    isLoading: shouldFetch ? isLoading : false,
    error,
  };
}

export function useCustomAnalyticsScript(
  eeEnabledRuntime: boolean
): string | null {
  const shouldFetch = EE_ENABLED || eeEnabledRuntime;

  const { data } = useSWR<string>(
    shouldFetch ? SWR_KEYS.customAnalyticsScript : null,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateOnReconnect: false,
      revalidateIfStale: false,
      dedupingInterval: 60_000,
    }
  );

  return data ?? null;
}


================================================
FILE: web/src/hooks/useShareableGroups.ts
================================================
"use client";

import useSWR, { mutate } from "swr";
import { useContext } from "react";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SettingsContext } from "@/providers/SettingsProvider";
import { SWR_KEYS } from "@/lib/swr-keys";

export interface MinimalUserGroupSnapshot {
  id: number;
  name: string;
}

// TODO (@raunakab):
// Refactor this hook to live inside of a special `ee` directory.

export default function useShareableGroups() {
  const combinedSettings = useContext(SettingsContext);
  const settingsLoading = combinedSettings?.settingsLoading ?? false;
  const isPaidEnterpriseFeaturesEnabled =
    !settingsLoading &&
    combinedSettings &&
    combinedSettings.enterpriseSettings !== null;

  const { data, error, isLoading } = useSWR<MinimalUserGroupSnapshot[]>(
    isPaidEnterpriseFeaturesEnabled ? SWR_KEYS.shareableGroups : null,
    errorHandlingFetcher
  );

  const refreshShareableGroups = () => mutate(SWR_KEYS.shareableGroups);

  if (settingsLoading) {
    return {
      data: undefined,
      isLoading: true,
      error: undefined,
      refreshShareableGroups,
    };
  }

  if (!isPaidEnterpriseFeaturesEnabled) {
    return {
      data: [],
      isLoading: false,
      error: undefined,
      refreshShareableGroups,
    };
  }

  return {
    data,
    isLoading,
    error,
    refreshShareableGroups,
  };
}


================================================
FILE: web/src/hooks/useShareableUsers.ts
================================================
"use client";

import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { MinimalUserSnapshot } from "@/lib/types";

export interface UseShareableUsersParams {
  includeApiKeys: boolean;
}

export default function useShareableUsers({
  includeApiKeys,
}: UseShareableUsersParams) {
  const { data, error, mutate, isLoading } = useSWR<MinimalUserSnapshot[]>(
    `/api/users?include_api_keys=${includeApiKeys}`,
    errorHandlingFetcher
  );

  return {
    data,
    isLoading,
    error,
    refreshShareableUsers: mutate,
  };
}


================================================
FILE: web/src/hooks/useShowOnboarding.ts
================================================
"use client";

import { useReducer, useCallback, useEffect, useRef, useState } from "react";
import { onboardingReducer, initialState } from "@/sections/onboarding/reducer";
import {
  OnboardingActions,
  OnboardingActionType,
  OnboardingData,
  OnboardingState,
  OnboardingStep,
} from "@/interfaces/onboarding";
import { WellKnownLLMProviderDescriptor } from "@/interfaces/llm";
import { updateUserPersonalization } from "@/lib/userSettings";
import { useUser } from "@/providers/UserProvider";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import { useProviderStatus } from "@/components/chat/ProviderContext";

function getOnboardingCompletedKey(userId: string): string {
  return `onyx:onboardingCompleted:${userId}`;
}

function useOnboardingState(liveAgent?: MinimalPersonaSnapshot): {
  state: OnboardingState;
  llmDescriptors: WellKnownLLMProviderDescriptor[];
  actions: OnboardingActions;
  isLoading: boolean;
  hasProviders: boolean;
} {
  const [state, dispatch] = useReducer(onboardingReducer, initialState);
  const { user, refreshUser } = useUser();

  // Get provider data from ProviderContext instead of duplicating the call
  const {
    llmProviders,
    isLoadingProviders,
    hasProviders: hasLlmProviders,
    providerOptions,
    refreshProviderInfo,
  } = useProviderStatus();

  // Only fetch persona-specific providers (different endpoint)
  const { refetch: refreshPersonaProviders } = useLLMProviders(liveAgent?.id);

  const userName = user?.personalization?.name;
  const llmDescriptors = providerOptions;

  const nameUpdateTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(
    null
  );
  const hasInitializedForUserRef = useRef<string | undefined>(undefined);

  // Initialize onboarding to the earliest incomplete step — runs once per user
  // after both user data and provider data have loaded.  After initialization,
  // user actions (Next / Prev / goToStep) drive navigation; the effect never
  // re-runs so it cannot override user-driven state (e.g. button active).
  useEffect(() => {
    if (
      isLoadingProviders ||
      !user ||
      hasInitializedForUserRef.current === user.id
    ) {
      return;
    }
    hasInitializedForUserRef.current = user.id;

    // Pre-populate state with existing data
    if (userName) {
      dispatch({
        type: OnboardingActionType.UPDATE_DATA,
        payload: { userName },
      });
    }
    dispatch({
      type: OnboardingActionType.UPDATE_DATA,
      payload: { llmProviders: (llmProviders ?? []).map((p) => p.provider) },
    });

    // Determine the earliest incomplete step
    // Name step is incomplete if userName is not set
    if (!userName) {
      // Stay at Welcome/Name step (no dispatch needed, this is the initial state)
      return;
    }

    // LlmSetup step is incomplete if no LLM providers are configured
    if (!hasLlmProviders) {
      dispatch({
        type: OnboardingActionType.SET_BUTTON_ACTIVE,
        isButtonActive: false,
      });
      dispatch({
        type: OnboardingActionType.GO_TO_STEP,
        step: OnboardingStep.LlmSetup,
      });
      return;
    }

    // All steps complete - go to Complete step
    dispatch({
      type: OnboardingActionType.SET_BUTTON_ACTIVE,
      isButtonActive: true,
    });
    dispatch({
      type: OnboardingActionType.GO_TO_STEP,
      step: OnboardingStep.Complete,
    });
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [llmProviders, isLoadingProviders, userName, hasLlmProviders, user]);

  const nextStep = useCallback(() => {
    dispatch({
      type: OnboardingActionType.SET_BUTTON_ACTIVE,
      isButtonActive: false,
    });

    if (state.currentStep === OnboardingStep.Name) {
      const hasProviders = (state.data.llmProviders?.length ?? 0) > 0;
      if (hasProviders) {
        dispatch({
          type: OnboardingActionType.SET_BUTTON_ACTIVE,
          isButtonActive: true,
        });
      } else {
        dispatch({
          type: OnboardingActionType.SET_BUTTON_ACTIVE,
          isButtonActive: false,
        });
      }
    }

    if (state.currentStep === OnboardingStep.LlmSetup) {
      refreshProviderInfo();
      if (liveAgent) {
        refreshPersonaProviders();
      }
    }
    dispatch({ type: OnboardingActionType.NEXT_STEP });
  }, [state, refreshProviderInfo, refreshPersonaProviders, liveAgent]);

  const prevStep = useCallback(() => {
    dispatch({ type: OnboardingActionType.PREV_STEP });
  }, []);

  const goToStep = useCallback(
    (step: OnboardingStep) => {
      const hasProviders = (state.data.llmProviders?.length ?? 0) > 0;
      if (step === OnboardingStep.LlmSetup && hasProviders) {
        dispatch({
          type: OnboardingActionType.SET_BUTTON_ACTIVE,
          isButtonActive: true,
        });
      } else if (step === OnboardingStep.LlmSetup) {
        dispatch({
          type: OnboardingActionType.SET_BUTTON_ACTIVE,
          isButtonActive: false,
        });
      }
      dispatch({ type: OnboardingActionType.GO_TO_STEP, step });
    },
    [state]
  );

  const updateName = useCallback(
    (name: string) => {
      dispatch({
        type: OnboardingActionType.UPDATE_DATA,
        payload: { userName: name },
      });

      if (nameUpdateTimeoutRef.current) {
        clearTimeout(nameUpdateTimeoutRef.current);
      }

      if (name === "") {
        dispatch({
          type: OnboardingActionType.SET_BUTTON_ACTIVE,
          isButtonActive: false,
        });
      } else {
        dispatch({
          type: OnboardingActionType.SET_BUTTON_ACTIVE,
          isButtonActive: true,
        });
      }

      nameUpdateTimeoutRef.current = setTimeout(async () => {
        try {
          await updateUserPersonalization({ name });
          await refreshUser();
        } catch (_e) {
          dispatch({
            type: OnboardingActionType.SET_BUTTON_ACTIVE,
            isButtonActive: false,
          });
          console.error("Error updating user name:", _e);
        } finally {
          nameUpdateTimeoutRef.current = null;
        }
      }, 500);
    },
    [refreshUser]
  );

  const updateData = useCallback((data: Partial<OnboardingData>) => {
    dispatch({ type: OnboardingActionType.UPDATE_DATA, payload: data });
  }, []);

  const setLoading = useCallback((isLoading: boolean) => {
    dispatch({ type: OnboardingActionType.SET_LOADING, isLoading });
  }, []);

  const setButtonActive = useCallback((active: boolean) => {
    dispatch({
      type: OnboardingActionType.SET_BUTTON_ACTIVE,
      isButtonActive: active,
    });
  }, []);

  const setError = useCallback((error: string | undefined) => {
    dispatch({ type: OnboardingActionType.SET_ERROR, error });
  }, []);

  const reset = useCallback(() => {
    dispatch({ type: OnboardingActionType.RESET });
  }, []);

  useEffect(() => {
    return () => {
      if (nameUpdateTimeoutRef.current) {
        clearTimeout(nameUpdateTimeoutRef.current);
      }
    };
  }, []);

  return {
    state,
    llmDescriptors,
    actions: {
      nextStep,
      prevStep,
      goToStep,
      setButtonActive,
      updateName,
      updateData,
      setLoading,
      setError,
      reset,
    },
    isLoading: isLoadingProviders,
    hasProviders: hasLlmProviders,
  };
}

interface UseShowOnboardingParams {
  liveAgent: MinimalPersonaSnapshot | undefined;
  isLoadingChatSessions: boolean;
  chatSessionsCount: number;
  userId: string | undefined;
}

export function useShowOnboarding({
  liveAgent,
  isLoadingChatSessions,
  chatSessionsCount,
  userId,
}: UseShowOnboardingParams) {
  const [showOnboarding, setShowOnboarding] = useState(false);
  const [onboardingDismissed, setOnboardingDismissed] = useState(false);

  // Read localStorage once userId is available to check if onboarding was dismissed
  useEffect(() => {
    if (userId === undefined) return;
    const dismissed =
      localStorage.getItem(getOnboardingCompletedKey(userId)) === "true";
    setOnboardingDismissed(dismissed);
  }, [userId]);

  // Initialize onboarding state — single source of truth for provider data
  const {
    state: onboardingState,
    actions: onboardingActions,
    llmDescriptors,
    isLoading: isLoadingOnboarding,
    hasProviders: hasAnyProvider,
  } = useOnboardingState(liveAgent);

  const isLoadingProviders = isLoadingOnboarding;

  // Track which user we've already evaluated onboarding for.
  // Re-check when userId changes (logout/login, account switching without full reload).
  const hasCheckedOnboardingForUserId = useRef<string | undefined>(undefined);

  // Evaluate onboarding once per user after data loads.
  // Show onboarding only if no LLM providers are configured.
  // Skip entirely if user has existing chat sessions.
  useEffect(() => {
    // If onboarding was previously dismissed, never show it again
    if (onboardingDismissed) {
      setShowOnboarding(false);
      return;
    }

    // Wait for data to load
    if (isLoadingProviders || isLoadingChatSessions || userId === undefined) {
      return;
    }

    // Only check once per user — but allow self-correction from true→false
    // when provider data arrives (e.g. after a transient fetch error).
    if (hasCheckedOnboardingForUserId.current === userId) {
      if (showOnboarding && hasAnyProvider && onboardingState.stepIndex === 0) {
        setShowOnboarding(false);
      }
      return;
    }
    hasCheckedOnboardingForUserId.current = userId;

    // Skip onboarding if user has any chat sessions
    if (chatSessionsCount > 0) {
      setShowOnboarding(false);
      return;
    }

    // Show onboarding if no LLM providers are configured.
    setShowOnboarding(hasAnyProvider === false);
  }, [
    isLoadingProviders,
    isLoadingChatSessions,
    hasAnyProvider,
    chatSessionsCount,
    userId,
    showOnboarding,
    onboardingDismissed,
    onboardingState.stepIndex,
  ]);

  const dismissOnboarding = useCallback(() => {
    if (userId === undefined) return;
    setShowOnboarding(false);
    setOnboardingDismissed(true);
    localStorage.setItem(getOnboardingCompletedKey(userId), "true");
  }, [userId]);

  const hideOnboarding = dismissOnboarding;
  const finishOnboarding = dismissOnboarding;

  return {
    showOnboarding,
    onboardingDismissed,
    onboardingState,
    onboardingActions,
    llmDescriptors,
    isLoadingOnboarding,
    hideOnboarding,
    finishOnboarding,
  };
}


================================================
FILE: web/src/hooks/useTags.ts
================================================
import useSWR from "swr";
import { Tag } from "@/lib/types";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";

interface TagsResponse {
  tags: Tag[];
}

/**
 * Fetches the set of valid tags from the server.
 *
 * Tags are deduplicated for 60 s and not re-fetched on window focus.
 *
 * @returns tags - The array of available {@link Tag} objects (empty while loading).
 * @returns isLoading - `true` until the first successful fetch or an error.
 * @returns error - The error object if the request failed.
 * @returns refresh - SWR mutate function to manually re-fetch.
 */
export default function useTags() {
  const { data, error, mutate } = useSWR<TagsResponse>(
    SWR_KEYS.tags,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    tags: data?.tags ?? [],
    isLoading: !error && !data,
    error,
    refresh: mutate,
  };
}


================================================
FILE: web/src/hooks/useToast.ts
================================================
import { useEffect, useSyncExternalStore } from "react";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

export type ToastLevel = "success" | "error" | "warning" | "info" | "default";

export interface ToastOptions {
  message: string;
  level?: ToastLevel;
  description?: string;
  duration?: number; // ms – default 4000, Infinity = persistent
  dismissible?: boolean; // default true (shows close button)
  actionLabel?: string;
  onAction?: () => void;
}

export interface Toast extends ToastOptions {
  id: string;
  createdAt: number;
  leaving?: boolean; // true while exit‑animation plays
}

// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------

export const MAX_VISIBLE_TOASTS = 3;
const DEFAULT_DURATION = 4000;

// ---------------------------------------------------------------------------
// Module‑level store (external to React)
// ---------------------------------------------------------------------------

let toasts: Toast[] = [];
const subscribers = new Set<() => void>();
const timers = new Map<string, ReturnType<typeof setTimeout>>();

let nextId = 0;

function notify() {
  subscribers.forEach((cb) => cb());
}

function addToast(options: ToastOptions): string {
  const id = `toast-${++nextId}-${Date.now()}`;
  const duration = options.duration ?? DEFAULT_DURATION;

  const entry: Toast = {
    ...options,
    id,
    level: options.level ?? "info",
    dismissible: options.dismissible ?? true,
    createdAt: Date.now(),
  };

  toasts = [...toasts, entry];
  notify();

  if (duration !== Infinity) {
    const timer = setTimeout(() => {
      removeToast(id);
    }, duration);
    timers.set(id, timer);
  }

  return id;
}

function removeToast(id: string): void {
  const timer = timers.get(id);
  if (timer) {
    clearTimeout(timer);
    timers.delete(id);
  }
  toasts = toasts.filter((t) => t.id !== id);
  notify();
}

function markLeaving(id: string): void {
  toasts = toasts.map((t) => (t.id === id ? { ...t, leaving: true } : t));
  notify();
}

function clearAll(): void {
  timers.forEach((timer) => clearTimeout(timer));
  timers.clear();
  toasts = [];
  notify();
}

function subscribe(cb: () => void): () => void {
  subscribers.add(cb);
  return () => {
    subscribers.delete(cb);
  };
}

function getSnapshot(): Toast[] {
  return toasts;
}

// ---------------------------------------------------------------------------
// Imperative API (works anywhere – components, hooks, plain .ts files)
// ---------------------------------------------------------------------------

interface ToastFn {
  (options: ToastOptions): string;
  success: (
    message: string,
    opts?: Omit<ToastOptions, "message" | "level">
  ) => string;
  error: (
    message: string,
    opts?: Omit<ToastOptions, "message" | "level">
  ) => string;
  warning: (
    message: string,
    opts?: Omit<ToastOptions, "message" | "level">
  ) => string;
  info: (
    message: string,
    opts?: Omit<ToastOptions, "message" | "level">
  ) => string;
  dismiss: (id: string) => void;
  clearAll: () => void;
  /** @internal – used by ToastContainer for exit animation */
  _markLeaving: (id: string) => void;
}

function toastBase(options: ToastOptions): string {
  return addToast(options);
}

export const toast: ToastFn = Object.assign(toastBase, {
  success: (message: string, opts?: Omit<ToastOptions, "message" | "level">) =>
    addToast({ ...opts, message, level: "success" }),
  error: (message: string, opts?: Omit<ToastOptions, "message" | "level">) =>
    addToast({ ...opts, message, level: "error" }),
  warning: (message: string, opts?: Omit<ToastOptions, "message" | "level">) =>
    addToast({ ...opts, message, level: "warning" }),
  info: (message: string, opts?: Omit<ToastOptions, "message" | "level">) =>
    addToast({ ...opts, message, level: "info" }),
  dismiss: removeToast,
  clearAll,
  _markLeaving: markLeaving,
});

// ---------------------------------------------------------------------------
// React hook (convenience wrapper)
// ---------------------------------------------------------------------------

export function useToast() {
  useSyncExternalStore(subscribe, getSnapshot, getSnapshot);
  return { toast, dismiss: toast.dismiss, clearAll: toast.clearAll };
}

// ---------------------------------------------------------------------------
// Query-param toast hook
// ---------------------------------------------------------------------------

interface ToastFromQueryMessages {
  [key: string]: {
    message: string;
    type?: ToastLevel | null;
  };
}

/**
 * Reads a `?message=<key>` query param on mount, fires the matching toast,
 * and strips the param from the URL.
 */
export function useToastFromQuery(messages: ToastFromQueryMessages) {
  useEffect(() => {
    const searchParams = new URLSearchParams(window.location.search);
    const messageValue = searchParams?.get("message");

    if (messageValue && messageValue in messages) {
      searchParams.delete("message");
      const newSearch = searchParams.toString()
        ? "?" + searchParams.toString()
        : "";
      window.history.replaceState(
        null,
        "",
        window.location.pathname + newSearch
      );
      const spec = messages[messageValue];
      if (spec !== undefined) {
        toast({
          message: spec.message,
          level: spec.type ?? "info",
        });
      }
    }
  }, []);
}

// ---------------------------------------------------------------------------
// Store accessors (used by ToastContainer)
// ---------------------------------------------------------------------------

export const toastStore = {
  subscribe,
  getSnapshot,
};


================================================
FILE: web/src/hooks/useTokenRefresh.ts
================================================
"use client";

import { useState, useEffect, useRef } from "react";
import { User } from "@/lib/types";
import { NO_AUTH_USER_ID } from "@/lib/extension/constants";
import { AuthTypeMetadata } from "@/hooks/useAuthTypeMetadata";
import { AuthType } from "@/lib/constants";

// Refresh token every 10 minutes (600000ms)
// This is shorter than the session expiry time to ensure tokens stay valid
const REFRESH_INTERVAL = 600000;

//  Custom hook for handling JWT token refresh for current user
export function useTokenRefresh(
  user: User | null,
  authTypeMetadata: AuthTypeMetadata,
  onRefreshFail: () => Promise<void>
) {
  // Track last refresh time to avoid unnecessary calls
  const [lastTokenRefresh, setLastTokenRefresh] = useState<number>(Date.now());

  // Use a ref to track first load
  const isFirstLoad = useRef(true);

  useEffect(() => {
    if (
      !user ||
      user.id === NO_AUTH_USER_ID ||
      authTypeMetadata.authType === AuthType.OIDC ||
      authTypeMetadata.authType === AuthType.SAML
    )
      return;

    const refreshTokenPeriodically = async () => {
      try {
        // Skip time check if this is first load - we always refresh on first load
        const isTimeToRefresh =
          isFirstLoad.current ||
          Date.now() - lastTokenRefresh > REFRESH_INTERVAL - 60000;

        if (!isTimeToRefresh) {
          return;
        }

        // Reset first load flag
        if (isFirstLoad.current) {
          isFirstLoad.current = false;
        }

        const response = await fetch("/api/auth/refresh", {
          method: "POST",
          credentials: "include",
        });

        if (response.ok) {
          // Update last refresh time on success
          setLastTokenRefresh(Date.now());
          console.debug("Auth token refreshed successfully");
        } else {
          console.warn("Failed to refresh auth token:", response.status);
          // If token refresh fails, try to get current user info
          await onRefreshFail();
        }
      } catch (error) {
        console.error("Error refreshing auth token:", error);
      }
    };

    // Always attempt to refresh on first component mount
    // This helps ensure tokens are fresh, especially after browser refresh
    refreshTokenPeriodically();

    // Set up interval for periodic refreshes
    const intervalId = setInterval(refreshTokenPeriodically, REFRESH_INTERVAL);

    // Also refresh token on window focus, but no more than once per minute
    const handleVisibilityChange = () => {
      if (
        document.visibilityState === "visible" &&
        Date.now() - lastTokenRefresh > 60000
      ) {
        refreshTokenPeriodically();
      }
    };

    document.addEventListener("visibilitychange", handleVisibilityChange);

    return () => {
      clearInterval(intervalId);
      document.removeEventListener("visibilitychange", handleVisibilityChange);
    };
  }, [user, lastTokenRefresh, onRefreshFail]);

  return { lastTokenRefresh };
}


================================================
FILE: web/src/hooks/useUserCounts.ts
================================================
"use client";

import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import type { InvitedUserSnapshot } from "@/lib/types";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { SWR_KEYS } from "@/lib/swr-keys";
import type { StatusCountMap } from "@/refresh-pages/admin/UsersPage/interfaces";

type UserCountsResponse = {
  role_counts: Record<string, number>;
  status_counts: Record<string, number>;
};

type UserCounts = {
  activeCount: number | null;
  invitedCount: number | null;
  pendingCount: number | null;
  roleCounts: Record<string, number>;
  statusCounts: StatusCountMap;
  refreshCounts: () => void;
};

export default function useUserCounts(): UserCounts {
  const { data: countsData, mutate: refreshCounts } =
    useSWR<UserCountsResponse>(SWR_KEYS.userCounts, errorHandlingFetcher);

  const { data: invitedUsers } = useSWR<InvitedUserSnapshot[]>(
    SWR_KEYS.invitedUsers,
    errorHandlingFetcher
  );

  const { data: pendingUsers } = useSWR<InvitedUserSnapshot[]>(
    NEXT_PUBLIC_CLOUD_ENABLED ? SWR_KEYS.pendingTenantUsers : null,
    errorHandlingFetcher
  );

  const activeCount = countsData?.status_counts?.active ?? null;
  const inactiveCount = countsData?.status_counts?.inactive ?? null;

  return {
    activeCount,
    invitedCount: invitedUsers?.length ?? null,
    pendingCount: pendingUsers?.length ?? null,
    roleCounts: countsData?.role_counts ?? {},
    statusCounts: {
      ...(activeCount !== null ? { active: activeCount } : {}),
      ...(inactiveCount !== null ? { inactive: inactiveCount } : {}),
      ...(invitedUsers ? { invited: invitedUsers.length } : {}),
      ...(pendingUsers ? { requested: pendingUsers.length } : {}),
    } satisfies StatusCountMap,
    refreshCounts,
  };
}


================================================
FILE: web/src/hooks/useUserPersonalization.ts
================================================
"use client";

import { useCallback, useEffect, useMemo, useState } from "react";
import { MemoryItem, User, UserPersonalization } from "@/lib/types";

const DEFAULT_PERSONALIZATION: UserPersonalization = {
  name: "",
  role: "",
  memories: [],
  use_memories: true,
  enable_memory_tool: true,
  user_preferences: "",
};

function derivePersonalizationFromUser(user: User | null): UserPersonalization {
  if (!user?.personalization) {
    return { ...DEFAULT_PERSONALIZATION, memories: [] };
  }

  return {
    name: user.personalization.name ?? "",
    role: user.personalization.role ?? "",
    memories: [...(user.personalization.memories ?? [])],
    use_memories:
      user.personalization.use_memories ?? DEFAULT_PERSONALIZATION.use_memories,
    enable_memory_tool:
      user.personalization.enable_memory_tool ??
      DEFAULT_PERSONALIZATION.enable_memory_tool,
    user_preferences: user.personalization.user_preferences ?? "",
  };
}

interface UseUserPersonalizationOptions {
  onSuccess?: (personalization: UserPersonalization) => void;
  onError?: (error: unknown) => void;
}

/**
 * Hook for managing user personalization settings
 *
 * Handles user personalization data including name, role, and memories.
 * Provides state management and persistence for personalization fields with
 * optimistic updates and error handling.
 *
 * @param user - The current user object containing personalization data
 * @param persistPersonalization - Async function to persist personalization changes to the server
 * @param options - Optional callbacks for success and error handling
 * @param options.onSuccess - Callback invoked when personalization is successfully saved
 * @param options.onError - Callback invoked when personalization save fails
 * @returns Object containing personalization state and handler functions
 *
 * @example
 * ```tsx
 * import useUserPersonalization from "@/hooks/useUserPersonalization";
 * import { useUser } from "@/providers/UserProvider";
 *
 * function PersonalizationSettings() {
 *   const { user, updateUserPersonalization } = useUser();
 *   const {
 *     personalizationValues,
 *     updatePersonalizationField,
 *     toggleUseMemories,
 *     updateMemoryAtIndex,
 *     addMemory,
 *     handleSavePersonalization,
 *     isSavingPersonalization
 *   } = useUserPersonalization(user, updateUserPersonalization, {
 *     onSuccess: () => console.log("Saved!"),
 *     onError: () => console.log("Failed!")
 *   });
 *
 *   return (
 *     <div>
 *       <input
 *         value={personalizationValues.name}
 *         onChange={(e) => updatePersonalizationField("name", e.target.value)}
 *       />
 *       <button
 *         onClick={handleSavePersonalization}
 *         disabled={isSavingPersonalization}
 *       >
 *         Save
 *       </button>
 *     </div>
 *   );
 * }
 * ```
 *
 * @remarks
 * - Changes are optimistic - UI updates immediately before server persistence
 * - On error, state reverts to the last known good value from the user object
 * - Memories are automatically trimmed and filtered (empty strings removed) on save
 * - The hook synchronizes with user prop changes to stay in sync with external updates
 */
export default function useUserPersonalization(
  user: User | null,
  persistPersonalization: (
    personalization: UserPersonalization
  ) => Promise<void>,
  options?: UseUserPersonalizationOptions
) {
  const [personalizationValues, setPersonalizationValues] =
    useState<UserPersonalization>(() => derivePersonalizationFromUser(user));
  const [isSavingPersonalization, setIsSavingPersonalization] = useState(false);

  const onSuccess = options?.onSuccess;
  const onError = options?.onError;

  const basePersonalization = useMemo(
    () => derivePersonalizationFromUser(user),
    [user]
  );

  useEffect(() => {
    setPersonalizationValues(basePersonalization);
  }, [basePersonalization]);

  const updatePersonalizationField = useCallback(
    (field: "name" | "role", value: string) => {
      setPersonalizationValues((prev) => ({
        ...prev,
        [field]: value,
      }));
    },
    []
  );

  const toggleUseMemories = useCallback((useMemories: boolean) => {
    setPersonalizationValues((prev) => ({
      ...prev,
      use_memories: useMemories,
    }));
  }, []);

  const toggleEnableMemoryTool = useCallback((enabled: boolean) => {
    setPersonalizationValues((prev) => ({
      ...prev,
      enable_memory_tool: enabled,
    }));
  }, []);

  const updateUserPreferences = useCallback((value: string) => {
    setPersonalizationValues((prev) => ({
      ...prev,
      user_preferences: value,
    }));
  }, []);

  const updateMemoryAtIndex = useCallback((index: number, value: string) => {
    setPersonalizationValues((prev) => {
      const updatedMemories = [...prev.memories];
      const existing = updatedMemories[index];
      if (existing) {
        updatedMemories[index] = { ...existing, content: value };
      }
      return {
        ...prev,
        memories: updatedMemories,
      };
    });
  }, []);

  const addMemory = useCallback(() => {
    setPersonalizationValues((prev) => ({
      ...prev,
      memories: [...prev.memories, { id: null, content: "" }],
    }));
  }, []);

  const setMemories = useCallback((memories: MemoryItem[]) => {
    setPersonalizationValues((prev) => ({
      ...prev,
      memories,
    }));
  }, []);

  const handleSavePersonalization = useCallback(
    async (overrides?: Partial<UserPersonalization>, silent?: boolean) => {
      setIsSavingPersonalization(true);

      const valuesToSave = { ...personalizationValues, ...overrides };
      const trimmedMemories = valuesToSave.memories
        .map((memory) => ({ ...memory, content: memory.content.trim() }))
        .filter((memory) => memory.content.length > 0);

      const updatedPersonalization: UserPersonalization = {
        ...valuesToSave,
        memories: trimmedMemories,
      };

      try {
        await persistPersonalization(updatedPersonalization);
        setPersonalizationValues(updatedPersonalization);
        if (!silent) {
          onSuccess?.(updatedPersonalization);
        }
        return updatedPersonalization;
      } catch (error) {
        setPersonalizationValues(basePersonalization);
        if (!silent) {
          onError?.(error);
        }
        return null;
      } finally {
        setIsSavingPersonalization(false);
      }
    },
    [
      basePersonalization,
      onError,
      onSuccess,
      persistPersonalization,
      personalizationValues,
    ]
  );

  return {
    personalizationValues,
    updatePersonalizationField,
    toggleUseMemories,
    toggleEnableMemoryTool,
    updateUserPreferences,
    updateMemoryAtIndex,
    addMemory,
    setMemories,
    handleSavePersonalization,
    isSavingPersonalization,
  };
}


================================================
FILE: web/src/hooks/useUsers.ts
================================================
"use client";

import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { AllUsersResponse } from "@/lib/types";

export interface UseUsersParams {
  includeApiKeys: boolean;
}

/**
 * Fetches all users in the organization.
 *
 * Returns user information including accepted users, invited users, and optionally
 * API key users. Use this for displaying user lists in sharing dialogs, admin panels,
 * or permission management interfaces.
 *
 * @param params - Configuration object
 * @param params.includeApiKeys - Whether to include API key users in the response
 *
 * @returns Object containing:
 *   - data: AllUsersResponse containing accepted, invited, and API key users, or undefined while loading
 *   - isLoading: Boolean indicating if data is being fetched
 *   - error: Any error that occurred during fetch
 *   - refreshUsers: Function to manually revalidate the data
 *
 * @example
 * // Fetch users without API keys (for sharing dialogs)
 * const { data: usersData, isLoading } = useUsers({ includeApiKeys: false });
 * if (isLoading) return <Spinner />;
 * return <UserList users={usersData?.accepted ?? []} />;
 *
 * @example
 * // Fetch all users including API keys (for admin panel)
 * const { data: usersData, refreshUsers } = useUsers({ includeApiKeys: true });
 * // Later...
 * await createNewUser(...);
 * refreshUsers(); // Refresh the user list
 */
export default function useUsers({ includeApiKeys }: UseUsersParams) {
  const { data, error, mutate, isLoading } = useSWR<AllUsersResponse>(
    `/api/manage/users?include_api_keys=${includeApiKeys}`,
    errorHandlingFetcher
  );

  return {
    data,
    isLoading,
    error,
    refreshUsers: mutate,
  };
}


================================================
FILE: web/src/hooks/useVoicePlayback.ts
================================================
import { useState, useRef, useCallback, useEffect } from "react";
import { StreamingTTSPlayer } from "@/lib/streamingTTS";
import { useVoiceMode } from "@/providers/VoiceModeProvider";

export interface UseVoicePlaybackReturn {
  isPlaying: boolean;
  isLoading: boolean;
  error: string | null;
  play: (text: string, voice?: string, speed?: number) => Promise<void>;
  pause: () => void;
  stop: () => void;
}

export function useVoicePlayback(): UseVoicePlaybackReturn {
  const [isPlaying, setIsPlaying] = useState(false);
  const [isLoading, setIsLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);

  const playerRef = useRef<StreamingTTSPlayer | null>(null);
  const suppressPlayerErrorsRef = useRef(false);
  const { setManualTTSPlaying, isTTSMuted, registerManualTTSMuteHandler } =
    useVoiceMode();

  useEffect(() => {
    registerManualTTSMuteHandler((muted) => {
      playerRef.current?.setMuted(muted);
    });
    return () => {
      registerManualTTSMuteHandler(null);
    };
  }, [registerManualTTSMuteHandler]);

  const stop = useCallback(() => {
    suppressPlayerErrorsRef.current = true;
    if (playerRef.current) {
      playerRef.current.stop();
      playerRef.current = null;
    }
    setManualTTSPlaying(false);
    setError(null);
    setIsPlaying(false);
    setIsLoading(false);
  }, [setManualTTSPlaying]);

  const pause = useCallback(() => {
    // Streaming player currently supports stop/resume via restart, not true pause.
    stop();
  }, [stop]);

  const play = useCallback(
    async (text: string, voice?: string, speed?: number) => {
      // Stop any existing playback
      stop();
      suppressPlayerErrorsRef.current = false;
      setError(null);
      setIsLoading(true);

      try {
        const player = new StreamingTTSPlayer({
          onPlayingChange: (playing) => {
            setIsPlaying(playing);
            setManualTTSPlaying(playing);
            if (playing) {
              setIsLoading(false);
            }
          },
          onError: (playbackError) => {
            if (suppressPlayerErrorsRef.current) {
              return;
            }
            console.error("Voice playback error:", playbackError);
            setManualTTSPlaying(false);
            setError(playbackError);
            setIsLoading(false);
            setIsPlaying(false);
          },
        });
        playerRef.current = player;
        player.setMuted(isTTSMuted);

        await player.speak(text, voice, speed);
        setIsLoading(false);
      } catch (err) {
        if (err instanceof Error && err.name === "AbortError") {
          // Request was cancelled, not an error
          return;
        }
        const message =
          err instanceof Error ? err.message : "Speech synthesis failed";
        setError(message);
        setIsLoading(false);
        setIsPlaying(false);
        setManualTTSPlaying(false);
      }
    },
    [isTTSMuted, setManualTTSPlaying, stop]
  );

  return {
    isPlaying,
    isLoading,
    error,
    play,
    pause,
    stop,
  };
}


================================================
FILE: web/src/hooks/useVoiceProviders.ts
================================================
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";

export interface VoiceProviderView {
  id: number;
  name: string;
  provider_type: string;
  is_default_stt: boolean;
  is_default_tts: boolean;
  stt_model: string | null;
  tts_model: string | null;
  default_voice: string | null;
  has_api_key: boolean;
  target_uri: string | null;
}

export function useVoiceProviders() {
  const { data, error, isLoading, mutate } = useSWR<VoiceProviderView[]>(
    SWR_KEYS.voiceProviders,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    providers: data ?? [],
    isLoading,
    error,
    refresh: mutate,
  };
}


================================================
FILE: web/src/hooks/useVoiceRecorder.ts
================================================
import { useState, useRef, useCallback, useEffect } from "react";

import { INTERNAL_URL, IS_DEV } from "@/lib/constants";

// Target format for OpenAI Realtime API
const TARGET_SAMPLE_RATE = 24000;
const CHUNK_INTERVAL_MS = 250;
const DUPLICATE_FINAL_TRANSCRIPT_WINDOW_MS = 1500;
// When VAD-based auto-stop is disabled, force-stop after this much silence as a fallback
const SILENCE_FALLBACK_TIMEOUT_MS = 10000;

interface TranscriptMessage {
  type: "transcript" | "error";
  text?: string;
  message?: string;
  is_final?: boolean;
}

export interface UseVoiceRecorderOptions {
  /** Called when VAD detects silence and final transcript is received */
  onFinalTranscript?: (text: string) => void;
  /** If true, automatically stop recording when VAD detects silence */
  autoStopOnSilence?: boolean;
}

export interface UseVoiceRecorderReturn {
  isRecording: boolean;
  isProcessing: boolean;
  isMuted: boolean;
  error: string | null;
  liveTranscript: string;
  /** Current microphone audio level (0-1, RMS-based) */
  audioLevel: number;
  startRecording: () => Promise<void>;
  stopRecording: () => Promise<string | null>;
  setMuted: (muted: boolean) => void;
}

/**
 * Encapsulates all browser resources for a voice recording session.
 * Manages WebSocket, Web Audio API, and audio buffering.
 */
class VoiceRecorderSession {
  // Browser resources
  private websocket: WebSocket | null = null;
  private audioContext: AudioContext | null = null;
  private scriptNode: ScriptProcessorNode | null = null;
  private sourceNode: MediaStreamAudioSourceNode | null = null;
  private mediaStream: MediaStream | null = null;
  private sendInterval: NodeJS.Timeout | null = null;

  // State
  private audioBuffer: Float32Array[] = [];
  private transcript = "";
  private stopResolver: ((text: string | null) => void) | null = null;
  private isActive = false;
  // Guard: true once onFinalTranscript has fired for the current utterance.
  // Prevents the same transcript from being delivered twice when VAD-triggered
  // stop causes the server to echo the final transcript a second time.
  private finalTranscriptDelivered = false;
  private lastDeliveredFinalText: string | null = null;
  private lastDeliveredFinalAtMs = 0;
  // Fallback timer: force-stop after extended silence when VAD auto-stop is disabled
  private silenceFallbackTimer: NodeJS.Timeout | null = null;

  // Callbacks to update React state
  private onTranscriptChange: (text: string) => void;
  private onFinalTranscript: ((text: string) => void) | null;
  private onError: (error: string) => void;
  private onAudioLevel: (level: number) => void;
  private onSilenceTimeout: (() => void) | null;
  private onVADStop: (() => void) | null;
  private autoStopOnSilence: boolean;

  constructor(
    onTranscriptChange: (text: string) => void,
    onFinalTranscript: ((text: string) => void) | null,
    onError: (error: string) => void,
    onAudioLevel: (level: number) => void,
    onSilenceTimeout?: () => void,
    autoStopOnSilence?: boolean,
    onVADStop?: () => void
  ) {
    this.onTranscriptChange = onTranscriptChange;
    this.onFinalTranscript = onFinalTranscript;
    this.onError = onError;
    this.onAudioLevel = onAudioLevel;
    this.onSilenceTimeout = onSilenceTimeout || null;
    this.autoStopOnSilence = autoStopOnSilence ?? false;
    this.onVADStop = onVADStop || null;
  }

  get recording(): boolean {
    return this.isActive;
  }

  get currentTranscript(): string {
    return this.transcript;
  }

  setMuted(muted: boolean): void {
    if (this.mediaStream) {
      this.mediaStream.getAudioTracks().forEach((track) => {
        track.enabled = !muted;
      });
    }
  }

  async start(): Promise<void> {
    if (this.isActive) return;

    this.cleanup();
    this.transcript = "";
    this.audioBuffer = [];
    this.finalTranscriptDelivered = false;
    this.lastDeliveredFinalText = null;
    this.lastDeliveredFinalAtMs = 0;

    // Get microphone
    this.mediaStream = await navigator.mediaDevices.getUserMedia({
      audio: {
        channelCount: 1,
        sampleRate: { ideal: TARGET_SAMPLE_RATE },
        echoCancellation: true,
        noiseSuppression: true,
      },
    });

    // Get WS token and connect WebSocket
    const wsUrl = await this.getWebSocketUrl();
    this.websocket = new WebSocket(wsUrl);
    this.websocket.onmessage = this.handleMessage;
    this.websocket.onerror = () => this.onError("Connection failed");
    this.websocket.onclose = () => {
      if (this.stopResolver) {
        this.stopResolver(this.transcript || null);
        this.stopResolver = null;
      }
    };

    await this.waitForConnection();

    // Restore error handler after connection (waitForConnection overwrites it)
    this.websocket.onerror = () => this.onError("Connection failed");

    // Set up audio capture
    this.audioContext = new AudioContext({ sampleRate: TARGET_SAMPLE_RATE });
    this.sourceNode = this.audioContext.createMediaStreamSource(
      this.mediaStream
    );
    this.scriptNode = this.audioContext.createScriptProcessor(4096, 1, 1);

    this.scriptNode.onaudioprocess = (event) => {
      const inputData = event.inputBuffer.getChannelData(0);
      this.audioBuffer.push(new Float32Array(inputData));

      // Compute RMS audio level (0-1) for waveform visualization
      let sum = 0;
      for (let i = 0; i < inputData.length; i++) {
        sum += inputData[i]! * inputData[i]!;
      }
      const rms = Math.sqrt(sum / inputData.length);
      // Scale RMS to a more visible range (raw RMS is usually very small)
      this.onAudioLevel(Math.min(1, rms * 5));
    };

    this.sourceNode.connect(this.scriptNode);
    this.scriptNode.connect(this.audioContext.destination);

    // Start sending audio chunks
    this.sendInterval = setInterval(
      () => this.sendAudioBuffer(),
      CHUNK_INTERVAL_MS
    );
    this.isActive = true;
  }

  async stop(): Promise<string | null> {
    if (!this.isActive) return this.transcript || null;

    this.resetSilenceFallbackTimer();

    // Stop audio capture
    if (this.sendInterval) {
      clearInterval(this.sendInterval);
      this.sendInterval = null;
    }
    if (this.scriptNode) {
      this.scriptNode.disconnect();
      this.scriptNode = null;
    }
    if (this.sourceNode) {
      this.sourceNode.disconnect();
      this.sourceNode = null;
    }
    if (this.audioContext) {
      this.audioContext.close();
      this.audioContext = null;
    }
    if (this.mediaStream) {
      this.mediaStream.getTracks().forEach((track) => track.stop());
      this.mediaStream = null;
    }

    this.audioBuffer = [];
    this.isActive = false;

    // Get final transcript from server
    if (this.websocket?.readyState === WebSocket.OPEN) {
      return new Promise((resolve) => {
        this.stopResolver = resolve;
        this.websocket!.send(JSON.stringify({ type: "end" }));

        // Timeout fallback
        setTimeout(() => {
          if (this.stopResolver) {
            this.stopResolver(this.transcript || null);
            this.stopResolver = null;
          }
        }, 3000);
      });
    }

    return this.transcript || null;
  }

  cleanup(): void {
    this.resetSilenceFallbackTimer();
    if (this.sendInterval) clearInterval(this.sendInterval);
    if (this.scriptNode) this.scriptNode.disconnect();
    if (this.sourceNode) this.sourceNode.disconnect();
    if (this.audioContext) this.audioContext.close();
    if (this.mediaStream) this.mediaStream.getTracks().forEach((t) => t.stop());
    if (this.websocket) this.websocket.close();

    this.sendInterval = null;
    this.scriptNode = null;
    this.sourceNode = null;
    this.audioContext = null;
    this.mediaStream = null;
    this.websocket = null;
    this.isActive = false;
  }

  private async getWebSocketUrl(): Promise<string> {
    // Fetch short-lived WS token
    const tokenResponse = await fetch("/api/voice/ws-token", {
      method: "POST",
      credentials: "include",
    });
    if (!tokenResponse.ok) {
      throw new Error("Failed to get WebSocket authentication token");
    }
    const { token } = await tokenResponse.json();

    const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
    const host = IS_DEV ? new URL(INTERNAL_URL).host : window.location.host;
    const path = IS_DEV
      ? "/voice/transcribe/stream"
      : "/api/voice/transcribe/stream";
    return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;
  }

  private waitForConnection(): Promise<void> {
    return new Promise((resolve, reject) => {
      if (!this.websocket) return reject(new Error("No WebSocket"));

      const timeout = setTimeout(
        () => reject(new Error("Connection timeout")),
        5000
      );

      this.websocket.onopen = () => {
        clearTimeout(timeout);
        resolve();
      };
      this.websocket.onerror = () => {
        clearTimeout(timeout);
        reject(new Error("Connection failed"));
      };
    });
  }

  private resetSilenceFallbackTimer(): void {
    if (this.silenceFallbackTimer) {
      clearTimeout(this.silenceFallbackTimer);
      this.silenceFallbackTimer = null;
    }
  }

  private startSilenceFallbackTimer(): void {
    this.resetSilenceFallbackTimer();
    this.silenceFallbackTimer = setTimeout(() => {
      // 10s of silence with no new speech — force-stop as a safety fallback
      if (this.isActive && this.onVADStop) {
        this.onVADStop();
      }
    }, SILENCE_FALLBACK_TIMEOUT_MS);
  }

  private handleMessage = (event: MessageEvent): void => {
    try {
      const data: TranscriptMessage = JSON.parse(event.data);

      if (data.type === "transcript") {
        if (data.text) {
          this.transcript = data.text;
          // Only push live updates to React while actively recording.
          // After stop(), the final transcript is returned via stopResolver
          // instead — this prevents stale text from reappearing in the
          // input box when the user clears it and starts a new recording.
          if (this.isActive) {
            this.onTranscriptChange(data.text);
          }
        }

        if (data.is_final && data.text) {
          // Resolve stop promise if waiting — must run even after stop()
          // so the caller receives the final transcript.
          if (this.stopResolver) {
            this.stopResolver(data.text);
            this.stopResolver = null;
          }

          // Skip VAD logic if session is no longer active
          if (!this.isActive) return;

          if (this.autoStopOnSilence) {
            // VAD detected silence — auto-stop and trigger callback
            const now = Date.now();
            const isLikelyDuplicateFinal =
              this.lastDeliveredFinalText === data.text &&
              now - this.lastDeliveredFinalAtMs <
                DUPLICATE_FINAL_TRANSCRIPT_WINDOW_MS;

            if (
              this.onFinalTranscript &&
              !this.finalTranscriptDelivered &&
              !isLikelyDuplicateFinal
            ) {
              this.finalTranscriptDelivered = true;
              this.lastDeliveredFinalText = data.text;
              this.lastDeliveredFinalAtMs = now;
              this.onFinalTranscript(data.text);
            }

            if (this.onVADStop) {
              this.onVADStop();
            }
          } else {
            // Auto-stop disabled (push-to-talk): ignore VAD, keep recording.
            // Start/reset a 10s fallback timer — if no new speech arrives,
            // force-stop to avoid recording silence indefinitely.
            this.startSilenceFallbackTimer();
          }
        }
      } else if (data.type === "error") {
        this.onError(data.message || "Transcription error");
      }
    } catch (e) {
      console.error("Failed to parse transcript message:", e);
    }
  };

  private resetBackendTranscript(): void {
    if (this.websocket?.readyState === WebSocket.OPEN) {
      this.websocket.send(JSON.stringify({ type: "reset" }));
    }
  }

  private sendAudioBuffer(): void {
    if (
      !this.websocket ||
      this.websocket.readyState !== WebSocket.OPEN ||
      !this.audioContext ||
      this.audioBuffer.length === 0
    ) {
      return;
    }

    // Concatenate buffered chunks
    const totalLength = this.audioBuffer.reduce(
      (sum, chunk) => sum + chunk.length,
      0
    );

    // Prevent buffer overflow
    if (totalLength > this.audioContext.sampleRate * 0.5 * 2) {
      this.audioBuffer = this.audioBuffer.slice(-10);
      return;
    }

    const concatenated = new Float32Array(totalLength);
    let offset = 0;
    for (const chunk of this.audioBuffer) {
      concatenated.set(chunk, offset);
      offset += chunk.length;
    }
    this.audioBuffer = [];

    // Resample and convert to PCM16
    const resampled = this.resampleAudio(
      concatenated,
      this.audioContext.sampleRate
    );
    const pcm16 = this.float32ToInt16(resampled);

    this.websocket.send(pcm16.buffer);
  }

  private resampleAudio(input: Float32Array, inputRate: number): Float32Array {
    if (inputRate === TARGET_SAMPLE_RATE) return input;

    const ratio = inputRate / TARGET_SAMPLE_RATE;
    const outputLength = Math.round(input.length / ratio);
    const output = new Float32Array(outputLength);

    for (let i = 0; i < outputLength; i++) {
      const srcIndex = i * ratio;
      const floor = Math.floor(srcIndex);
      const ceil = Math.min(floor + 1, input.length - 1);
      const fraction = srcIndex - floor;
      output[i] = input[floor]! * (1 - fraction) + input[ceil]! * fraction;
    }

    return output;
  }

  private float32ToInt16(float32: Float32Array): Int16Array {
    const int16 = new Int16Array(float32.length);
    for (let i = 0; i < float32.length; i++) {
      const s = Math.max(-1, Math.min(1, float32[i]!));
      int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
    }
    return int16;
  }
}

/**
 * Hook for voice recording with streaming transcription.
 */
export function useVoiceRecorder(
  options?: UseVoiceRecorderOptions
): UseVoiceRecorderReturn {
  const [isRecording, setIsRecording] = useState(false);
  const [isProcessing, setIsProcessing] = useState(false);
  const [isMuted, setIsMutedState] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const [liveTranscript, setLiveTranscript] = useState("");
  const [audioLevel, setAudioLevel] = useState(0);

  const sessionRef = useRef<VoiceRecorderSession | null>(null);
  const onFinalTranscriptRef = useRef(options?.onFinalTranscript);
  const autoStopOnSilenceRef = useRef(options?.autoStopOnSilence ?? true); // Default to true

  // Keep callback ref in sync
  useEffect(() => {
    onFinalTranscriptRef.current = options?.onFinalTranscript;
    autoStopOnSilenceRef.current = options?.autoStopOnSilence ?? true;
  }, [options?.onFinalTranscript, options?.autoStopOnSilence]);

  // Cleanup on unmount
  useEffect(() => {
    return () => {
      sessionRef.current?.cleanup();
    };
  }, []);

  const startRecording = useCallback(async () => {
    if (sessionRef.current?.recording) return;

    setError(null);
    setLiveTranscript("");

    // Clear any stale, inactive session before starting a new one.
    if (sessionRef.current && !sessionRef.current.recording) {
      sessionRef.current.cleanup();
      sessionRef.current = null;
    }

    // Create VAD stop handler that will stop the session
    const currentSession = new VoiceRecorderSession(
      setLiveTranscript,
      (text) => onFinalTranscriptRef.current?.(text),
      setError,
      setAudioLevel,
      undefined, // onSilenceTimeout
      autoStopOnSilenceRef.current,
      () => {
        // Stop only this session instance, and only clear recording state if it
        // is still the active session when stop resolves.
        currentSession.stop().then(() => {
          if (sessionRef.current === currentSession) {
            setIsRecording(false);
            setIsMutedState(false);
            sessionRef.current = null;
          }
        });
      }
    );
    sessionRef.current = currentSession;

    try {
      await currentSession.start();
      if (sessionRef.current === currentSession) {
        setIsRecording(true);
      }
    } catch (err) {
      currentSession.cleanup();
      setError(
        err instanceof Error ? err.message : "Failed to start recording"
      );
      if (sessionRef.current === currentSession) {
        sessionRef.current = null;
      }
      throw err;
    }
  }, []);

  const stopRecording = useCallback(async (): Promise<string | null> => {
    if (!sessionRef.current) return null;
    const currentSession = sessionRef.current;

    setIsProcessing(true);

    try {
      const transcript = await currentSession.stop();
      return transcript;
    } finally {
      // Only clear state if this is still the active session.
      if (sessionRef.current === currentSession) {
        setIsRecording(false);
        setIsMutedState(false); // Reset mute state when recording stops
        sessionRef.current = null;
      }
      setIsProcessing(false);
    }
  }, []);

  const setMuted = useCallback((muted: boolean) => {
    setIsMutedState(muted);
    sessionRef.current?.setMuted(muted);
  }, []);

  return {
    isRecording,
    isProcessing,
    isMuted,
    error,
    liveTranscript,
    audioLevel,
    startRecording,
    stopRecording,
    setMuted,
  };
}


================================================
FILE: web/src/hooks/useVoiceStatus.ts
================================================
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";

interface VoiceStatus {
  stt_enabled: boolean;
  tts_enabled: boolean;
}

export function useVoiceStatus() {
  const { data, error, isLoading } = useSWR<VoiceStatus>(
    SWR_KEYS.voiceStatus,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    sttEnabled: data?.stt_enabled ?? false,
    ttsEnabled: data?.tts_enabled ?? false,
    isLoading,
    error,
  };
}


================================================
FILE: web/src/hooks/useWebSocket.ts
================================================
import { useState, useRef, useCallback, useEffect } from "react";

export type WebSocketStatus =
  | "connecting"
  | "connected"
  | "disconnected"
  | "error";

export interface UseWebSocketOptions<T> {
  /** URL to connect to */
  url: string;
  /** Called when a message is received */
  onMessage?: (data: T) => void;
  /** Called when connection opens */
  onOpen?: () => void;
  /** Called when connection closes */
  onClose?: () => void;
  /** Called on error */
  onError?: (error: Event) => void;
  /** Auto-connect on mount */
  autoConnect?: boolean;
}

export interface UseWebSocketReturn<T> {
  /** Current connection status */
  status: WebSocketStatus;
  /** Send JSON data */
  sendJson: (data: T) => void;
  /** Send binary data */
  sendBinary: (data: Blob | ArrayBuffer) => void;
  /** Connect to WebSocket */
  connect: () => Promise<void>;
  /** Disconnect from WebSocket */
  disconnect: () => void;
}

export function useWebSocket<TReceive = unknown, TSend = unknown>({
  url,
  onMessage,
  onOpen,
  onClose,
  onError,
  autoConnect = false,
}: UseWebSocketOptions<TReceive>): UseWebSocketReturn<TSend> {
  const [status, setStatus] = useState<WebSocketStatus>("disconnected");
  const wsRef = useRef<WebSocket | null>(null);
  const onMessageRef = useRef(onMessage);
  const onOpenRef = useRef(onOpen);
  const onCloseRef = useRef(onClose);
  const onErrorRef = useRef(onError);

  // Keep refs updated
  useEffect(() => {
    onMessageRef.current = onMessage;
    onOpenRef.current = onOpen;
    onCloseRef.current = onClose;
    onErrorRef.current = onError;
  }, [onMessage, onOpen, onClose, onError]);

  const connect = useCallback(async (): Promise<void> => {
    if (
      wsRef.current?.readyState === WebSocket.OPEN ||
      wsRef.current?.readyState === WebSocket.CONNECTING
    ) {
      return;
    }

    setStatus("connecting");

    return new Promise((resolve, reject) => {
      const ws = new WebSocket(url);
      wsRef.current = ws;

      const timeout = setTimeout(() => {
        ws.close();
        reject(new Error("WebSocket connection timeout"));
      }, 10000);

      ws.onopen = () => {
        clearTimeout(timeout);
        setStatus("connected");
        onOpenRef.current?.();
        resolve();
      };

      ws.onmessage = (event) => {
        try {
          const data = JSON.parse(event.data) as TReceive;
          onMessageRef.current?.(data);
        } catch {
          // Non-JSON message, ignore or handle differently
        }
      };

      ws.onclose = () => {
        clearTimeout(timeout);
        setStatus("disconnected");
        onCloseRef.current?.();
        wsRef.current = null;
      };

      ws.onerror = (error) => {
        clearTimeout(timeout);
        setStatus("error");
        onErrorRef.current?.(error);
        reject(new Error("WebSocket connection failed"));
      };
    });
  }, [url]);

  const disconnect = useCallback(() => {
    if (wsRef.current) {
      wsRef.current.close();
      wsRef.current = null;
    }
    setStatus("disconnected");
  }, []);

  const sendJson = useCallback((data: TSend) => {
    if (wsRef.current?.readyState === WebSocket.OPEN) {
      wsRef.current.send(JSON.stringify(data));
    }
  }, []);

  const sendBinary = useCallback((data: Blob | ArrayBuffer) => {
    if (wsRef.current?.readyState === WebSocket.OPEN) {
      wsRef.current.send(data);
    }
  }, []);

  // Auto-connect if enabled
  useEffect(() => {
    if (autoConnect) {
      connect().catch(() => {
        // Error handled via onError callback
      });
    }
    return () => {
      disconnect();
    };
  }, [autoConnect, connect, disconnect]);

  return {
    status,
    sendJson,
    sendBinary,
    connect,
    disconnect,
  };
}


================================================
FILE: web/src/instrumentation-client.ts
================================================
// This file configures the initialization of Sentry on the client.
// The added config here will be used whenever a users loads a page in their browser.
// https://docs.sentry.io/platforms/javascript/guides/nextjs/

import * as Sentry from "@sentry/nextjs";

if (process.env.NEXT_PUBLIC_SENTRY_DSN) {
  Sentry.init({
    dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,
    release: process.env.SENTRY_RELEASE,

    // Setting this option to true will print useful information to the console while you're setting up Sentry.
    debug: false,

    integrations: [],

    tracesSampleRate: 0.0,
    profilesSampleRate: 0.0,
  });
}

// This export will instrument router navigations, and is only relevant if you enable tracing.
// `captureRouterTransitionStart` is available from SDK version 9.12.0 onwards
export const onRouterTransitionStart = Sentry.captureRouterTransitionStart;


================================================
FILE: web/src/instrumentation.ts
================================================
import * as Sentry from "@sentry/nextjs";

export async function register() {
  if (process.env.NEXT_RUNTIME === "nodejs") {
    await import("../sentry.server.config");
  }

  if (process.env.NEXT_RUNTIME === "edge") {
    await import("../sentry.edge.config");
  }
}

export const onRequestError = Sentry.captureRequestError;


================================================
FILE: web/src/interfaces/llm.ts
================================================
import type {
  OnboardingState,
  OnboardingActions,
} from "@/interfaces/onboarding";

export enum LLMProviderName {
  OPENAI = "openai",
  ANTHROPIC = "anthropic",
  OLLAMA_CHAT = "ollama_chat",
  LM_STUDIO = "lm_studio",
  AZURE = "azure",
  OPENROUTER = "openrouter",
  VERTEX_AI = "vertex_ai",
  BEDROCK = "bedrock",
  LITELLM = "litellm",
  LITELLM_PROXY = "litellm_proxy",
  BIFROST = "bifrost",
  CUSTOM = "custom",
}

export interface ModelConfiguration {
  name: string;
  is_visible: boolean;
  max_input_tokens: number | null;
  supports_image_input: boolean;
  supports_reasoning: boolean;
  display_name?: string;
  provider_display_name?: string;
  vendor?: string;
  version?: string;
  region?: string;
}

export interface SimpleKnownModel {
  name: string;
  display_name: string | null;
}

export interface WellKnownLLMProviderDescriptor {
  name: string;
  known_models: ModelConfiguration[];
  recommended_default_model: SimpleKnownModel | null;
}

export interface LLMModelDescriptor {
  modelName: string;
  provider: string;
  maxTokens: number;
}

export interface LLMProviderView {
  id: number;
  name: string;
  provider: string;
  api_key: string | null;
  api_base: string | null;
  api_version: string | null;
  custom_config: { [key: string]: string } | null;
  is_public: boolean;
  is_auto_mode: boolean;
  groups: number[];
  personas: number[];
  deployment_name: string | null;
  model_configurations: ModelConfiguration[];
}

export interface VisionProvider extends LLMProviderView {
  vision_models: string[];
}

export interface LLMProviderDescriptor {
  id: number;
  name: string;
  provider: string;
  provider_display_name: string;
  model_configurations: ModelConfiguration[];
}

export interface OllamaModelResponse {
  name: string;
  display_name: string;
  max_input_tokens: number | null;
  supports_image_input: boolean;
}

export interface OpenRouterModelResponse {
  name: string;
  display_name: string;
  max_input_tokens: number | null;
  supports_image_input: boolean;
}

export interface BedrockModelResponse {
  name: string;
  display_name: string;
  max_input_tokens: number;
  supports_image_input: boolean;
}

export interface LMStudioModelResponse {
  name: string;
  display_name: string;
  max_input_tokens: number | null;
  supports_image_input: boolean;
  supports_reasoning: boolean;
}

export interface DefaultModel {
  provider_id: number;
  model_name: string;
}

export interface LLMProviderResponse<T> {
  providers: T[];
  default_text: DefaultModel | null;
  default_vision: DefaultModel | null;
}

export type LLMModalVariant = "onboarding" | "llm-configuration";

export interface LLMProviderFormProps {
  variant?: LLMModalVariant;
  existingLlmProvider?: LLMProviderView;
  shouldMarkAsDefault?: boolean;
  open?: boolean;
  onOpenChange?: (open: boolean) => void;

  /** The current default model name for this provider (from the global default). */
  defaultModelName?: string;

  // Onboarding-specific (only when variant === "onboarding")
  onboardingState?: OnboardingState;
  onboardingActions?: OnboardingActions;
  llmDescriptor?: WellKnownLLMProviderDescriptor;
}

// Param types for model fetching functions - use snake_case to match API structure
export interface BedrockFetchParams {
  aws_region_name: string;
  aws_access_key_id?: string;
  aws_secret_access_key?: string;
  aws_bearer_token_bedrock?: string;
  provider_name?: string;
}

export interface OllamaFetchParams {
  api_base?: string;
  provider_name?: string;
  signal?: AbortSignal;
}

export interface OpenRouterFetchParams {
  api_base?: string;
  api_key?: string;
  provider_name?: string;
}

export interface LiteLLMProxyFetchParams {
  api_base?: string;
  api_key?: string;
  provider_name?: string;
  signal?: AbortSignal;
}

export interface LiteLLMProxyModelResponse {
  provider_name: string;
  model_name: string;
}

export interface BifrostFetchParams {
  api_base?: string;
  api_key?: string;
  provider_name?: string;
  signal?: AbortSignal;
}

export interface BifrostModelResponse {
  name: string;
  display_name: string;
  max_input_tokens: number | null;
  supports_image_input: boolean;
  supports_reasoning: boolean;
}

export interface VertexAIFetchParams {
  model_configurations?: ModelConfiguration[];
}

export interface LMStudioFetchParams {
  api_base?: string;
  api_key?: string;
  api_key_changed?: boolean;
  provider_name?: string;
  signal?: AbortSignal;
}

export type FetchModelsParams =
  | BedrockFetchParams
  | OllamaFetchParams
  | OpenRouterFetchParams
  | LiteLLMProxyFetchParams
  | BifrostFetchParams
  | VertexAIFetchParams
  | LMStudioFetchParams;


================================================
FILE: web/src/interfaces/onboarding.ts
================================================
import type { IconProps } from "@opal/types";

export enum OnboardingStep {
  Welcome = "welcome",
  Name = "name",
  LlmSetup = "llm-setup",
  Complete = "complete",
}

export interface OnboardingData {
  userName?: string;
  llmProviders?: string[];
  llmApiKey?: string;
}

export interface OnboardingState {
  currentStep: OnboardingStep;
  stepIndex: number;
  totalSteps: number;
  data: OnboardingData;
  isButtonActive: boolean;
  isLoading?: boolean;
  error?: string;
}

export enum OnboardingActionType {
  NEXT_STEP = "NEXT_STEP",
  PREV_STEP = "PREV_STEP",
  GO_TO_STEP = "GO_TO_STEP",
  UPDATE_DATA = "UPDATE_DATA",
  SET_BUTTON_ACTIVE = "SET_BUTTON_ACTIVE",
  SET_LOADING = "SET_LOADING",
  SET_ERROR = "SET_ERROR",
  RESET = "RESET",
}

export type OnboardingAction =
  | { type: OnboardingActionType.NEXT_STEP }
  | { type: OnboardingActionType.PREV_STEP }
  | { type: OnboardingActionType.GO_TO_STEP; step: OnboardingStep }
  | { type: OnboardingActionType.UPDATE_DATA; payload: Partial<OnboardingData> }
  | { type: OnboardingActionType.SET_BUTTON_ACTIVE; isButtonActive: boolean }
  | { type: OnboardingActionType.SET_LOADING; isLoading: boolean }
  | { type: OnboardingActionType.SET_ERROR; error: string | undefined }
  | { type: OnboardingActionType.RESET };

export type FinalStepItemProps = {
  title: string;
  description: string;
  icon: React.FunctionComponent<IconProps>;
  buttonText: string;
  buttonHref: string;
};

export type OnboardingActions = {
  nextStep: () => void;
  prevStep: () => void;
  goToStep: (step: OnboardingStep) => void;
  setButtonActive: (active: boolean) => void;
  updateName: (name: string) => void;
  updateData: (data: Partial<OnboardingData>) => void;
  setLoading: (isLoading: boolean) => void;
  setError: (error: string | undefined) => void;
  reset: () => void;
};


================================================
FILE: web/src/interfaces/settings.ts
================================================
export enum ApplicationStatus {
  PAYMENT_REMINDER = "payment_reminder",
  GATED_ACCESS = "gated_access",
  ACTIVE = "active",
  SEAT_LIMIT_EXCEEDED = "seat_limit_exceeded",
}

export enum QueryHistoryType {
  DISABLED = "disabled",
  ANONYMIZED = "anonymized",
  NORMAL = "normal",
}

export interface Settings {
  anonymous_user_enabled: boolean;
  invite_only_enabled: boolean;
  anonymous_user_path?: string;
  maximum_chat_retention_days?: number | null;
  company_name?: string | null;
  company_description?: string | null;
  notifications: Notification[];
  needs_reindexing: boolean;
  gpu_enabled: boolean;
  application_status: ApplicationStatus;
  auto_scroll: boolean;
  temperature_override_enabled: boolean;
  query_history_type: QueryHistoryType;

  deep_research_enabled?: boolean;
  search_ui_enabled?: boolean;

  // Image processing settings
  image_extraction_and_analysis_enabled?: boolean;
  search_time_image_analysis_enabled?: boolean;
  image_analysis_max_size_mb?: number | null;

  // User Knowledge settings
  user_knowledge_enabled?: boolean;
  user_file_max_upload_size_mb?: number | null;
  file_token_count_threshold_k?: number | null;

  // Connector settings
  show_extra_connectors?: boolean;

  // Default Assistant settings
  disable_default_assistant?: boolean;

  // Onyx Craft (Build Mode) feature flag
  onyx_craft_enabled?: boolean;

  // Whether EE features are unlocked (user has a valid enterprise license).
  // Controls UI visibility of EE features like user groups, analytics, RBAC.
  ee_features_enabled?: boolean;

  // Seat usage - populated when seat limit is exceeded
  seat_count?: number | null;
  used_seats?: number | null;

  // OpenSearch migration
  opensearch_indexing_enabled?: boolean;

  // Vector DB availability flag - false when DISABLE_VECTOR_DB is set.
  // When false, connectors, RAG search, document sets, and related features
  // are unavailable.
  vector_db_enabled?: boolean;

  // True when hooks are available: single-tenant deployment with HOOK_ENABLED=true.
  hooks_enabled?: boolean;

  // Application version from the ONYX_VERSION env var on the server.
  version?: string | null;
  // Hard ceiling for user_file_max_upload_size_mb, derived from env var.
  max_allowed_upload_size_mb?: number;

  // Factory defaults for the restore button.
  default_user_file_max_upload_size_mb?: number;
  default_file_token_count_threshold_k?: number;
}

export enum NotificationType {
  PERSONA_SHARED = "persona_shared",
  REINDEX = "reindex",
  TRIAL_ENDS_TWO_DAYS = "two_day_trial_ending",
  ASSISTANT_FILES_READY = "assistant_files_ready",
  RELEASE_NOTES = "release_notes",
  FEATURE_ANNOUNCEMENT = "feature_announcement",
}

export interface Notification {
  id: number;
  notif_type: string;
  title: string;
  description: string | null;
  dismissed: boolean;
  first_shown: string;
  last_shown: string;
  additional_data?: {
    persona_id?: number;
    link?: string;
    version?: string; // For release notes notifications
    [key: string]: any;
  };
}

export interface NavigationItem {
  link: string;
  icon?: string;
  svg_logo?: string;
  title: string;
}

export interface EnterpriseSettings {
  application_name: string | null;
  use_custom_logo: boolean;
  use_custom_logotype: boolean;
  logo_display_style: "logo_and_name" | "logo_only" | "name_only" | null;

  // custom navigation
  custom_nav_items: NavigationItem[];

  // custom Chat components
  custom_lower_disclaimer_content: string | null;
  custom_header_content: string | null;
  two_lines_for_chat_header: boolean | null;
  custom_popup_header: string | null;
  custom_popup_content: string | null;
  enable_consent_screen: boolean | null;
  consent_screen_prompt: string | null;
  show_first_visit_notice: boolean | null;
  custom_greeting_message: string | null;
}

export interface CombinedSettings {
  settings: Settings;
  enterpriseSettings: EnterpriseSettings | null;
  customAnalyticsScript: string | null;
  isMobile?: boolean;
  webVersion: string | null;
  webDomain: string | null;

  /**
   * NOTE (@raunakab):
   * Whether search mode is actually available to users.
   *
   * Prefer this over reading `settings.search_ui_enabled` directly.
   * `search_ui_enabled` only reflects the admin's *preference* — it does not
   * account for prerequisites like connectors being configured. This derived
   * flag combines the admin setting with runtime checks (e.g. connectors
   * exist) so consumers get a single, accurate boolean.
   */
  isSearchModeAvailable: boolean;
  settingsLoading: boolean;
}


================================================
FILE: web/src/layouts/actions-layouts.tsx
================================================
/**
 * Actions Layout Components
 *
 * A namespaced collection of components for building consistent action cards
 * (MCP servers, OpenAPI tools, etc.). These components provide a standardized
 * layout that separates presentation from business logic, making it easier to
 * build and maintain action-related UIs.
 *
 * Built on top of ExpandableCard layouts for the underlying card structure.
 *
 * @example
 * ```tsx
 * import * as ActionsLayouts from "@/layouts/actions-layouts";
 * import * as ExpandableCard from "@/layouts/expandable-card-layouts";
 * import { SvgServer } from "@opal/icons";
 * import Switch from "@/components/ui/switch";
 *
 * function MyActionCard() {
 *   return (
 *     <ExpandableCard.Root>
 *       <ActionsLayouts.Header
 *         title="My MCP Server"
 *         description="A powerful MCP server for automation"
 *         icon={SvgServer}
 *         rightChildren={
 *           <Button onClick={handleDisconnect}>Disconnect</Button>
 *         }
 *       />
 *       <ActionsLayouts.Content>
 *         <ActionsLayouts.Tool
 *           title="File Reader"
 *           description="Read files from the filesystem"
 *           icon={SvgFile}
 *           rightChildren={
 *             <Switch checked={enabled} onCheckedChange={setEnabled} />
 *           }
 *         />
 *         <ActionsLayouts.Tool
 *           title="Web Search"
 *           description="Search the web"
 *           icon={SvgGlobe}
 *           disabled={true}
 *           rightChildren={
 *             <Switch checked={false} disabled />
 *           }
 *         />
 *       </ActionsLayouts.Content>
 *     </ExpandableCard.Root>
 *   );
 * }
 * ```
 */

"use client";

import React, { HtmlHTMLAttributes } from "react";
import type { IconProps } from "@opal/types";
import { WithoutStyles } from "@/types";
import { ContentAction } from "@opal/layouts";
import * as ExpandableCard from "@/layouts/expandable-card-layouts";
import { Card } from "@/refresh-components/cards";
import Label from "@/refresh-components/form/Label";

/**
 * Actions Header Component
 *
 * The header section of an action card. Displays icon, title, description,
 * and optional right-aligned actions.
 *
 * Features:
 * - Icon, title, and description display
 * - Custom right-aligned actions via rightChildren
 * - Responsive layout with truncated text
 *
 * @example
 * ```tsx
 * // Basic header
 * <ActionsLayouts.Header
 *   title="File Server"
 *   description="Manage local files"
 *   icon={SvgFolder}
 * />
 *
 * // With actions
 * <ActionsLayouts.Header
 *   title="API Server"
 *   description="RESTful API integration"
 *   icon={SvgCloud}
 *   rightChildren={
 *     <div className="flex gap-2">
 *       <Button onClick={handleEdit}>Edit</Button>
 *       <Button onClick={handleDelete}>Delete</Button>
 *     </div>
 *   }
 * />
 * ```
 */
export interface ActionsHeaderProps
  extends WithoutStyles<HtmlHTMLAttributes<HTMLDivElement>> {
  // Core content
  name?: string;
  title: string;
  description?: string;
  icon: React.FunctionComponent<IconProps>;

  // Custom content
  rightChildren?: React.ReactNode;
}
function ActionsHeader({
  name,
  title,
  description,
  icon: Icon,
  rightChildren,
  ...props
}: ActionsHeaderProps) {
  return (
    <ExpandableCard.Header>
      <div className="flex flex-col gap-2 pt-4 pb-2">
        <div className="px-4">
          <Label name={name}>
            <ContentAction
              icon={Icon}
              title={title}
              description={description}
              sizePreset="section"
              variant="section"
              rightChildren={rightChildren}
              paddingVariant="fit"
            />
          </Label>
        </div>
        <div {...props} className="px-2" />
      </div>
    </ExpandableCard.Header>
  );
}

/**
 * Actions Content Component
 *
 * A container for the content area of an action card.
 * Use this to wrap tools, settings, or other expandable content.
 * Features a maximum height with scrollable overflow.
 *
 * IMPORTANT: Only ONE ActionsContent should be used within a single ExpandableCard.Root.
 * This component self-registers with the ActionsLayout context to inform
 * ActionsHeader whether content exists (for border-radius styling). Using
 * multiple ActionsContent components will cause incorrect unmount behavior -
 * when any one unmounts, it will incorrectly signal that no content exists,
 * even if other ActionsContent components remain mounted.
 *
 * @example
 * ```tsx
 * <ActionsLayouts.Content>
 *   <ActionsLayouts.Tool {...} />
 *   <ActionsLayouts.Tool {...} />
 * </ActionsLayouts.Content>
 * ```
 */
function ActionsContent({
  children,
  ...props
}: WithoutStyles<React.HTMLAttributes<HTMLDivElement>>) {
  return (
    <ExpandableCard.Content {...props}>
      <div className="flex flex-col gap-2 p-2">{children}</div>
    </ExpandableCard.Content>
  );
}

/**
 * Actions Tool Component
 *
 * Represents a single tool within an actions content area. Displays the tool's
 * title, description, and icon. The component provides a label wrapper for
 * custom right-aligned controls (like toggle switches).
 *
 * Features:
 * - Tool title and description
 * - Custom icon
 * - Disabled state (applies strikethrough to title)
 * - Custom right-aligned content via rightChildren
 * - Responsive layout with truncated text
 *
 * @example
 * ```tsx
 * // Basic tool with switch
 * <ActionsLayouts.Tool
 *   title="File Reader"
 *   description="Read files from the filesystem"
 *   icon={SvgFile}
 *   rightChildren={
 *     <Switch checked={enabled} onCheckedChange={setEnabled} />
 *   }
 * />
 *
 * // Disabled tool
 * <ActionsLayouts.Tool
 *   title="Premium Feature"
 *   description="This feature requires a premium subscription"
 *   icon={SvgLock}
 *   disabled={true}
 *   rightChildren={
 *     <Switch checked={false} disabled />
 *   }
 * />
 *
 * // Tool with custom action
 * <ActionsLayouts.Tool
 *   name="config_tool"
 *   title="Configuration"
 *   description="Configure system settings"
 *   icon={SvgSettings}
 *   rightChildren={
 *     <Button onClick={openSettings}>Configure</Button>
 *   }
 * />
 * ```
 */
export type ActionsToolProps = WithoutStyles<{
  // Core content
  name?: string;
  title: string;
  description: string;
  icon?: React.FunctionComponent<IconProps>;

  // State
  disabled?: boolean;
  rightChildren?: React.ReactNode;
}>;
function ActionsTool({
  name,
  title,
  description,
  icon,
  disabled,
  rightChildren,
}: ActionsToolProps) {
  return (
    <Card padding={0.75} variant={disabled ? "disabled" : undefined}>
      <Label name={name} disabled={disabled}>
        <ContentAction
          icon={icon}
          title={title}
          description={description}
          sizePreset="main-ui"
          variant="section"
          rightChildren={rightChildren}
          paddingVariant="fit"
        />
      </Label>
    </Card>
  );
}

export {
  ActionsHeader as Header,
  ActionsContent as Content,
  ActionsTool as Tool,
};


================================================
FILE: web/src/layouts/app-layouts.tsx
================================================
/**
 * App Page Layout Components
 *
 * Provides the root layout, header, and footer for app pages.
 * AppRoot renders AppHeader and Footer by default (both can be disabled via props).
 *
 * @example
 * ```tsx
 * import * as AppLayouts from "@/layouts/app-layouts";
 *
 * export default function ChatPage() {
 *   return (
 *     <AppLayouts.Root>
 *       <ChatInterface />
 *     </AppLayouts.Root>
 *   );
 * }
 * ```
 */

"use client";

import {
  cn,
  ensureHrefProtocol,
  INTERACTIVE_SELECTOR,
  noProp,
} from "@/lib/utils";
import type { Components } from "react-markdown";
import Text from "@/refresh-components/texts/Text";
import { useCallback, useMemo, useRef, useState, useEffect } from "react";
import { useAppBackground } from "@/providers/AppBackgroundProvider";
import { useTheme } from "next-themes";
import ShareChatSessionModal from "@/sections/modals/ShareChatSessionModal";
import IconButton from "@/refresh-components/buttons/IconButton";
import LineItem from "@/refresh-components/buttons/LineItem";
import { useProjectsContext } from "@/providers/ProjectsContext";
import useChatSessions from "@/hooks/useChatSessions";
import {
  handleMoveOperation,
  shouldShowMoveModal,
  showErrorNotification,
} from "@/sections/sidebar/sidebarUtils";
import { LOCAL_STORAGE_KEYS } from "@/sections/sidebar/constants";
import { deleteChatSession } from "@/app/app/services/lib";
import { useRouter } from "next/navigation";
import MoveCustomAgentChatModal from "@/components/modals/MoveCustomAgentChatModal";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import FrostedDiv from "@/refresh-components/FrostedDiv";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import { PopoverSearchInput } from "@/sections/sidebar/ChatButton";
import SimplePopover from "@/refresh-components/SimplePopover";
import { Interactive } from "@opal/core";
import { Button, OpenButton } from "@opal/components";
import { useAppSidebarContext } from "@/providers/AppSidebarProvider";
import useScreenSize from "@/hooks/useScreenSize";
import {
  SvgBubbleText,
  SvgFolderIn,
  SvgMoreHorizontal,
  SvgSearchMenu,
  SvgShare,
  SvgSidebar,
  SvgTrash,
} from "@opal/icons";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import { useSettingsContext } from "@/providers/SettingsProvider";
import type { AppMode } from "@/providers/QueryControllerProvider";
import useAppFocus from "@/hooks/useAppFocus";
import { useQueryController } from "@/providers/QueryControllerProvider";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import useBrowserInfo from "@/hooks/useBrowserInfo";
import { APP_SLOGAN } from "@/lib/constants";

/**
 * App Header Component
 *
 * Renders the header for chat sessions with share, move, and delete actions.
 * Designed to be rendered inside ChatScrollContainer with sticky positioning.
 *
 * Features:
 * - Share chat functionality
 * - Move chat to project (with confirmation for custom agents)
 * - Delete chat with confirmation
 * - Mobile-responsive sidebar toggle
 * - Custom header content from enterprise settings
 * - App-Mode toggle (EE gated)
 */
function Header() {
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const { state, setAppMode } = useQueryController();
  const settings = useSettingsContext();
  const { isMobile } = useScreenSize();
  const { setFolded } = useAppSidebarContext();
  const [showShareModal, setShowShareModal] = useState(false);
  const [deleteModalOpen, setDeleteModalOpen] = useState(false);
  const [showMoveCustomAgentModal, setShowMoveCustomAgentModal] =
    useState(false);
  const [pendingMoveProjectId, setPendingMoveProjectId] = useState<
    number | null
  >(null);
  const [showMoveOptions, setShowMoveOptions] = useState(false);
  const [searchTerm, setSearchTerm] = useState("");
  const [popoverOpen, setPopoverOpen] = useState(false);
  const [popoverItems, setPopoverItems] = useState<React.ReactNode[]>([]);
  const [modePopoverOpen, setModePopoverOpen] = useState(false);
  const {
    projects,
    fetchProjects,
    refreshCurrentProjectDetails,
    currentProjectId,
  } = useProjectsContext();
  const { currentChatSession, refreshChatSessions, removeSession } =
    useChatSessions();
  const router = useRouter();
  const appFocus = useAppFocus();

  const customHeaderContent =
    settings?.enterpriseSettings?.custom_header_content;
  // Some pages don't want the custom header content, namely every page except Chat, Search, and
  // NewSession. The header provides features such as the open sidebar button on mobile which pages
  // without this content still use.
  const pageWithHeaderContent = appFocus.isChat() || appFocus.isNewSession();

  const effectiveMode: AppMode =
    appFocus.isNewSession() && state.phase === "idle" ? state.appMode : "chat";

  const availableProjects = useMemo(() => {
    if (!projects) return [];
    return projects.filter((project) => project.id !== currentProjectId);
  }, [projects, currentProjectId]);

  const filteredProjects = useMemo(() => {
    if (!searchTerm) return availableProjects;
    const term = searchTerm.toLowerCase();
    return availableProjects.filter((project) =>
      project.name.toLowerCase().includes(term)
    );
  }, [availableProjects, searchTerm]);

  const resetMoveState = useCallback(() => {
    setShowMoveOptions(false);
    setSearchTerm("");
    setPendingMoveProjectId(null);
    setShowMoveCustomAgentModal(false);
  }, []);

  const performMove = useCallback(
    async (targetProjectId: number) => {
      if (!currentChatSession) return;
      try {
        await handleMoveOperation({
          chatSession: currentChatSession,
          targetProjectId,
          refreshChatSessions,
          refreshCurrentProjectDetails,
          fetchProjects,
          currentProjectId,
        });
        resetMoveState();
        setPopoverOpen(false);
      } catch (error) {
        console.error("Failed to move chat session:", error);
      }
    },
    [
      currentChatSession,
      refreshChatSessions,
      refreshCurrentProjectDetails,
      fetchProjects,
      currentProjectId,
      resetMoveState,
    ]
  );

  const handleMoveClick = useCallback(
    (projectId: number) => {
      if (!currentChatSession) return;
      if (shouldShowMoveModal(currentChatSession)) {
        setPendingMoveProjectId(projectId);
        setShowMoveCustomAgentModal(true);
        return;
      }
      void performMove(projectId);
    },
    [currentChatSession, performMove]
  );

  const handleDeleteChat = useCallback(async () => {
    if (!currentChatSession) return;
    try {
      const response = await deleteChatSession(currentChatSession.id);
      if (!response.ok) {
        throw new Error("Failed to delete chat session");
      }
      removeSession(currentChatSession.id);
      await Promise.all([refreshChatSessions(), fetchProjects()]);
      router.replace("/app");
      setDeleteModalOpen(false);
    } catch (error) {
      console.error("Failed to delete chat:", error);
      showErrorNotification("Failed to delete chat. Please try again.");
    }
  }, [
    currentChatSession,
    refreshChatSessions,
    removeSession,
    fetchProjects,
    router,
  ]);

  const setDeleteConfirmationModalOpen = useCallback((open: boolean) => {
    setDeleteModalOpen(open);
    if (open) {
      setPopoverOpen(false);
    }
  }, []);

  useEffect(() => {
    const items = showMoveOptions
      ? [
          <PopoverSearchInput
            key="search"
            setShowMoveOptions={setShowMoveOptions}
            onSearch={setSearchTerm}
          />,
          ...filteredProjects.map((project) => (
            <LineItem
              key={project.id}
              icon={SvgFolderIn}
              onClick={noProp(() => handleMoveClick(project.id))}
            >
              {project.name}
            </LineItem>
          )),
        ]
      : [
          <LineItem
            key="move"
            icon={SvgFolderIn}
            onClick={noProp(() => setShowMoveOptions(true))}
          >
            Move to Project
          </LineItem>,
          <LineItem
            key="delete"
            icon={SvgTrash}
            onClick={noProp(() => setDeleteConfirmationModalOpen(true))}
            danger
          >
            Delete
          </LineItem>,
        ];

    setPopoverItems(items);
  }, [
    showMoveOptions,
    filteredProjects,
    currentChatSession,
    setDeleteConfirmationModalOpen,
    handleMoveClick,
  ]);

  return (
    <>
      {showShareModal && currentChatSession && (
        <ShareChatSessionModal
          chatSession={currentChatSession}
          onClose={() => setShowShareModal(false)}
        />
      )}

      {showMoveCustomAgentModal && (
        <MoveCustomAgentChatModal
          onCancel={resetMoveState}
          onConfirm={async (doNotShowAgain: boolean) => {
            if (doNotShowAgain && typeof window !== "undefined") {
              window.localStorage.setItem(
                LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL,
                "true"
              );
            }
            if (pendingMoveProjectId != null) {
              await performMove(pendingMoveProjectId);
            }
          }}
        />
      )}

      {deleteModalOpen && (
        <ConfirmationModalLayout
          title="Delete Chat"
          icon={SvgTrash}
          onClose={() => setDeleteModalOpen(false)}
          submit={
            <Button variant="danger" onClick={handleDeleteChat}>
              Delete
            </Button>
          }
        >
          Are you sure you want to delete this chat? This action cannot be
          undone.
        </ConfirmationModalLayout>
      )}

      <div
        className={cn(
          "w-full flex flex-row flex-wrap justify-center items-center px-4",
          // # Note (@raunakab):
          //
          // We add an additional top margin to align this header with the `LogoSection` inside of the App-Sidebar.
          // For more information, check out `SidebarWrapper.tsx`.
          "mt-2"
        )}
      >
        {/*
          Left:
          - (mobile) sidebar toggle
          - app-mode (for Unified S+C [EE gated])
        */}
        <div className="flex-1 flex flex-row items-center gap-2 h-[3.3rem]">
          {isMobile && (
            <Button
              prominence="internal"
              icon={SvgSidebar}
              onClick={() => setFolded(false)}
            />
          )}
          {isPaidEnterpriseFeaturesEnabled &&
            settings.isSearchModeAvailable &&
            appFocus.isNewSession() &&
            state.phase === "idle" && (
              <Popover open={modePopoverOpen} onOpenChange={setModePopoverOpen}>
                <Popover.Trigger asChild>
                  <OpenButton
                    aria-label="Change app mode"
                    icon={
                      effectiveMode === "search" ? SvgSearchMenu : SvgBubbleText
                    }
                  >
                    {effectiveMode === "search" ? "Search" : "Chat"}
                  </OpenButton>
                </Popover.Trigger>
                <Popover.Content align="start" width="lg">
                  <Popover.Menu>
                    <LineItem
                      icon={SvgSearchMenu}
                      selected={effectiveMode === "search"}
                      description="Quick search for documents"
                      onClick={noProp(() => {
                        setAppMode("search");
                        setModePopoverOpen(false);
                      })}
                    >
                      Search
                    </LineItem>
                    <LineItem
                      icon={SvgBubbleText}
                      selected={effectiveMode === "chat"}
                      description="Conversation and research"
                      onClick={noProp(() => {
                        setAppMode("chat");
                        setModePopoverOpen(false);
                      })}
                    >
                      Chat
                    </LineItem>
                  </Popover.Menu>
                </Popover.Content>
              </Popover>
            )}
        </div>

        {/*
          Center:
          - custom-header-content
          - Wraps to its own row below left/right on mobile when content is present
        */}
        <div
          className={cn(
            "flex flex-col items-center overflow-hidden",
            pageWithHeaderContent && customHeaderContent
              ? "order-last basis-full py-2 sm:py-0 sm:order-none sm:basis-auto sm:flex-1"
              : "flex-1"
          )}
        >
          <Text text03 className="text-center w-full">
            {pageWithHeaderContent && customHeaderContent}
          </Text>
        </div>

        {/*
          Right:
          - share button
          - more-options buttons
        */}
        <div className="flex flex-1 justify-end items-center h-[3.3rem]">
          {appFocus.isChat() && currentChatSession && (
            <FrostedDiv className="flex shrink flex-row items-center">
              <Button
                icon={SvgShare}
                prominence="tertiary"
                interaction={showShareModal ? "hover" : "rest"}
                responsiveHideText
                onClick={() => setShowShareModal(true)}
                aria-label="share-chat-button"
              >
                Share
              </Button>
              <SimplePopover
                trigger={
                  /* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */
                  <IconButton
                    icon={SvgMoreHorizontal}
                    className="ml-2"
                    transient={popoverOpen}
                    tertiary
                  />
                }
                onOpenChange={(state) => {
                  setPopoverOpen(state);
                  if (!state) setShowMoveOptions(false);
                }}
                side="bottom"
                align="end"
              >
                <PopoverMenu>{popoverItems}</PopoverMenu>
              </SimplePopover>
            </FrostedDiv>
          )}
        </div>
      </div>
    </>
  );
}

const footerMarkdownComponents = {
  p: ({ children }) => (
    //dont remove the !my-0 class, it's important for the markdown to render without any alignment issues
    <Text as="p" text03 secondaryAction className="!my-0 text-center">
      {children}
    </Text>
  ),
  a: ({ node, href, className, children, ...rest }) => {
    const fullHref = ensureHrefProtocol(href);
    return (
      <a
        href={fullHref}
        target="_blank"
        rel="noopener noreferrer"
        {...rest}
        className={cn(className, "underline underline-offset-2")}
      >
        <Text text03 secondaryAction>
          {children}
        </Text>
      </a>
    );
  },
} satisfies Partial<Components>;

function Footer() {
  const settings = useSettingsContext();
  const appFocus = useAppFocus();

  const customFooterContent =
    settings?.enterpriseSettings?.custom_lower_disclaimer_content ||
    `[Onyx ${
      settings?.webVersion || "dev"
    }](https://www.onyx.app/) - ${APP_SLOGAN}`;

  return (
    <footer
      className={cn(
        "relative w-full flex flex-row justify-center items-center gap-2 px-2 mt-auto",
        // # Note (from @raunakab):
        //
        // The conditional rendering of vertical padding based on the current page is intentional.
        // The `AppInputBar` has `shadow-01` applied, which extends ~14px below it.
        // Because the content area in `Root` uses `overflow-auto`, the shadow would be
        // clipped at the container boundary — causing a visible rendering artefact.
        //
        // To fix this, `AppPage.tsx` uses animated spacer divs around `AppInputBar` to
        // give the shadow breathing room. However, that extra space adds visible gap
        // between the input and the Footer. To compensate, we remove the Footer's top
        // padding when `appFocus.isChat()`.
        //
        // There is a corresponding note inside `AppInputBar.tsx` and `AppPage.tsx`
        // explaining this. Please refer to those notes as well.
        appFocus.isChat() ? "pb-2" : "py-2"
      )}
    >
      <MinimalMarkdown
        content={customFooterContent}
        className={cn("max-w-full text-center")}
        components={footerMarkdownComponents}
      />
    </footer>
  );
}

/**
 * App Root Component
 *
 * Wraps app pages with header (AppHeader) and footer chrome.
 *
 * Layout Structure:
 * ```
 * ┌──────────────────────────────────┐
 * │ AppHeader                        │
 * ├──────────────────────────────────┤
 * │                                  │
 * │ Content Area (children)          │
 * │                                  │
 * ├──────────────────────────────────┤
 * │ Footer (custom disclaimer)       │
 * └──────────────────────────────────┘
 * ```
 *
 * @example
 * ```tsx
 * <AppLayouts.Root>
 *   <ChatInterface />
 * </AppLayouts.Root>
 * ```
 */
export interface AppRootProps {
  /** Opt-in to render the user's custom background image */
  enableBackground?: boolean;
  children?: React.ReactNode;
}

function Root({ children, enableBackground }: AppRootProps) {
  const { hasBackground, appBackgroundUrl } = useAppBackground();
  const { resolvedTheme } = useTheme();
  const appFocus = useAppFocus();
  const { isSafari } = useBrowserInfo();
  const isLightMode = resolvedTheme === "light";
  const showBackground = hasBackground && enableBackground;

  // Track whether the chat input was focused before a mousedown, so we can
  // restore focus on mouseup if no text was selected. This preserves
  // click-drag text selection while keeping the input focused on plain clicks.
  const inputWasFocused = useRef(false);

  const handleMouseDown = useCallback(
    (event: React.MouseEvent<HTMLDivElement>) => {
      const activeEl = document.activeElement;
      const isFocused =
        activeEl instanceof HTMLElement &&
        activeEl.id === "onyx-chat-input-textarea";
      const target = event.target;
      const isInteractive =
        target instanceof HTMLElement && !!target.closest(INTERACTIVE_SELECTOR);
      inputWasFocused.current = isFocused && !isInteractive;
    },
    []
  );

  const handleMouseUp = useCallback(() => {
    if (!inputWasFocused.current) return;
    inputWasFocused.current = false;
    const sel = window.getSelection();
    if (sel && !sel.isCollapsed) return;
    const textarea = document.getElementById("onyx-chat-input-textarea");
    // Only restore focus if no other element has grabbed it since mousedown.
    if (textarea && document.activeElement !== textarea) {
      textarea.focus();
    }
  }, []);
  const horizontalBlurMask = `linear-gradient(
    to right,
    transparent 0%,
    black max(0%, calc(50% - 25rem)),
    black min(100%, calc(50% + 25rem)),
    transparent 100%
  )`;

  return (
    /* NOTE: Some elements, markdown tables in particular, refer to this `@container` in order to
      breakout of their immediate containers using cqw units.
      The `data-main-container` attribute is used by portaled elements (e.g. CommandMenu) to
      render inside this container so they can be centered relative to the main content area
      rather than the full viewport (which would include the sidebar).
    */
    <div
      data-main-container
      onMouseDown={handleMouseDown}
      onMouseUp={handleMouseUp}
      className={cn(
        "@container flex flex-col h-full w-full relative overflow-hidden",
        showBackground && "bg-cover bg-center bg-fixed"
      )}
      style={
        showBackground
          ? { backgroundImage: `url(${appBackgroundUrl})` }
          : undefined
      }
    >
      {/* Effect 1 */}
      {/* Vignette overlay for custom backgrounds (disabled in light mode) */}
      {showBackground && !isLightMode && (
        <div
          className="absolute z-0 inset-0 pointer-events-none"
          style={{
            background: `
              linear-gradient(to bottom, rgba(0, 0, 0, 0.4) 0%, transparent 4rem),
              linear-gradient(to top, rgba(0, 0, 0, 0.4) 0%, transparent 4rem)
            `,
          }}
        />
      )}

      {/* Effect 2 */}
      {/* Semi-transparent overlay for readability when background is set */}
      {showBackground && appFocus.isChat() && (
        <>
          <div className="absolute inset-0 backdrop-blur-[1px] pointer-events-none" />
          {isSafari ? (
            <div
              className="absolute z-0 inset-0 bg-cover bg-center bg-fixed pointer-events-none"
              style={{
                backgroundImage: `url(${appBackgroundUrl})`,
                filter: "blur(16px)",
                maskImage: horizontalBlurMask,
                WebkitMaskImage: horizontalBlurMask,
              }}
            />
          ) : (
            <div
              className="absolute z-0 inset-0 backdrop-blur-md transition-all duration-600 pointer-events-none"
              style={{
                maskImage: horizontalBlurMask,
                WebkitMaskImage: horizontalBlurMask,
              }}
            />
          )}
        </>
      )}

      <div className="z-app-layout">
        {!appFocus.isSharedChat() && <Header />}
      </div>
      <div className="z-app-layout flex-1 overflow-auto h-full w-full">
        {children}
      </div>
      <div className="z-app-layout">
        <Footer />
      </div>
    </div>
  );
}

export { Root };


================================================
FILE: web/src/layouts/expandable-card-layouts.tsx
================================================
/**
 * Expandable Card Layout Components
 *
 * A namespaced collection of components for building expandable cards with
 * collapsible content sections. These provide the structural foundation
 * without opinionated content styling - just pure containers.
 *
 * Use these components when you need:
 * - A card with a header that can have expandable content below it
 * - Automatic border-radius handling based on whether content exists/is folded
 * - Controlled or uncontrolled folding state
 *
 * @example
 * ```tsx
 * import * as ExpandableCard from "@/layouts/expandable-card-layouts";
 *
 * // Uncontrolled — Root manages its own state
 * function MyCard() {
 *   return (
 *     <ExpandableCard.Root>
 *       <ExpandableCard.Header>
 *         <div className="p-4">
 *           <h3>My Header</h3>
 *         </div>
 *       </ExpandableCard.Header>
 *       <ExpandableCard.Content>
 *         <div className="p-4">
 *           <p>Expandable content goes here</p>
 *         </div>
 *       </ExpandableCard.Content>
 *     </ExpandableCard.Root>
 *   );
 * }
 *
 * // Controlled — consumer owns the state
 * function MyControlledCard() {
 *   const [isFolded, setIsFolded] = useState(false);
 *
 *   return (
 *     <ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>
 *       <ExpandableCard.Header>
 *         <button onClick={() => setIsFolded(!isFolded)}>Toggle</button>
 *       </ExpandableCard.Header>
 *       <ExpandableCard.Content>
 *         <p>Content here</p>
 *       </ExpandableCard.Content>
 *     </ExpandableCard.Root>
 *   );
 * }
 * ```
 */

"use client";

import React, {
  createContext,
  useContext,
  useState,
  useMemo,
  useLayoutEffect,
  Dispatch,
  SetStateAction,
} from "react";
import { cn } from "@/lib/utils";
import { WithoutStyles } from "@/types";
import ShadowDiv from "@/refresh-components/ShadowDiv";
import { Section, SectionProps } from "@/layouts/general-layouts";
import {
  Collapsible,
  CollapsibleContent,
} from "@/refresh-components/Collapsible";

/**
 * Expandable Card Context
 *
 * Provides folding state management for expandable cards without prop drilling.
 * Also tracks whether content is present via self-registration.
 */
interface ExpandableCardContextValue {
  isFolded: boolean;
  setIsFolded: Dispatch<SetStateAction<boolean>>;
  hasContent: boolean;
  registerContent: () => () => void;
}

const ExpandableCardContext = createContext<
  ExpandableCardContextValue | undefined
>(undefined);

function useExpandableCardContext() {
  const context = useContext(ExpandableCardContext);
  if (!context) {
    throw new Error(
      "ExpandableCard components must be used within an ExpandableCard.Root"
    );
  }
  return context;
}

/**
 * Expandable Card Root Component
 *
 * The root container and context provider for an expandable card. Provides a
 * flex column layout with no gap or padding by default.
 *
 * Supports both controlled and uncontrolled folding state:
 * - **Uncontrolled**: Manages its own state. Use `defaultFolded` to set the
 *   initial folding state (defaults to `false`, i.e. expanded).
 * - **Controlled**: Pass `isFolded` and `onFoldedChange` to manage folding
 *   state externally.
 *
 * @example
 * ```tsx
 * // Uncontrolled
 * <ExpandableCard.Root>
 *   <ExpandableCard.Header>...</ExpandableCard.Header>
 *   <ExpandableCard.Content>...</ExpandableCard.Content>
 * </ExpandableCard.Root>
 *
 * // Uncontrolled, starts folded
 * <ExpandableCard.Root defaultFolded>
 *   ...
 * </ExpandableCard.Root>
 *
 * // Controlled
 * const [isFolded, setIsFolded] = useState(false);
 * <ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>
 *   ...
 * </ExpandableCard.Root>
 * ```
 */
export interface ExpandableCardRootProps extends SectionProps {
  /** Controlled folding state. When provided, the component is controlled. */
  isFolded?: boolean;
  /** Callback when folding state changes. Required for controlled usage. */
  onFoldedChange?: Dispatch<SetStateAction<boolean>>;
  /** Initial folding state for uncontrolled usage. Defaults to `false`. */
  defaultFolded?: boolean;
}

function ExpandableCardRoot({
  isFolded: controlledFolded,
  onFoldedChange,
  defaultFolded = false,
  ...props
}: ExpandableCardRootProps) {
  const [uncontrolledFolded, setUncontrolledFolded] = useState(defaultFolded);
  const isControlled = controlledFolded !== undefined;
  const isFolded = isControlled ? controlledFolded : uncontrolledFolded;
  const setIsFolded = isControlled
    ? onFoldedChange ?? (() => {})
    : setUncontrolledFolded;

  const [hasContent, setHasContent] = useState(false);

  // Registration function for Content to announce its presence
  const registerContent = useMemo(
    () => () => {
      setHasContent(true);
      return () => setHasContent(false);
    },
    []
  );

  const contextValue = useMemo(
    () => ({ isFolded, setIsFolded, hasContent, registerContent }),
    [isFolded, setIsFolded, hasContent, registerContent]
  );

  return (
    <ExpandableCardContext.Provider value={contextValue}>
      <Section gap={0} padding={0} {...props} />
    </ExpandableCardContext.Provider>
  );
}

/**
 * Expandable Card Header Component
 *
 * The header section of an expandable card. This is a pure container that:
 * - Has a border and neutral background
 * - Automatically handles border-radius based on content state:
 *   - Fully rounded when no content exists or when content is folded
 *   - Only top-rounded when content is visible
 *
 * You are responsible for adding your own padding, layout, and content inside.
 *
 * @example
 * ```tsx
 * <ExpandableCard.Header>
 *   <div className="flex items-center justify-between p-4">
 *     <h3>My Title</h3>
 *     <button>Action</button>
 *   </div>
 * </ExpandableCard.Header>
 * ```
 */
export interface ExpandableCardHeaderProps
  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
  children?: React.ReactNode;
}

function ExpandableCardHeader({
  children,
  ...props
}: ExpandableCardHeaderProps) {
  const { isFolded, hasContent } = useExpandableCardContext();

  // Round all corners if there's no content, or if content exists but is folded
  const shouldFullyRound = !hasContent || isFolded;

  return (
    <div
      {...props}
      className={cn(
        "border bg-background-neutral-00 w-full transition-[border-radius] duration-200 ease-out",
        shouldFullyRound ? "rounded-16" : "rounded-t-16"
      )}
    >
      {children}
    </div>
  );
}

/**
 * Expandable Card Content Component
 *
 * The expandable content section of the card. This is a pure container that:
 * - Self-registers with context to inform Header about its presence
 * - Animates open/closed using Radix Collapsible (slide down/up)
 * - Has side and bottom borders that connect to the header
 * - Has a max-height with scrollable overflow via ShadowDiv
 *
 * You are responsible for adding your own content inside.
 *
 * IMPORTANT: Only ONE Content component should be used within a single Root.
 * This component self-registers with the context to inform Header whether
 * content exists (for border-radius styling). Using multiple Content components
 * will cause incorrect unmount behavior.
 *
 * @example
 * ```tsx
 * <ExpandableCard.Content>
 *   <div className="p-4">
 *     <p>Your expandable content here</p>
 *   </div>
 * </ExpandableCard.Content>
 * ```
 */
export interface ExpandableCardContentProps
  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
  children?: React.ReactNode;
}

function ExpandableCardContent({
  children,
  ...props
}: ExpandableCardContentProps) {
  const { isFolded, registerContent } = useExpandableCardContext();

  // Self-register with context to inform Header that content exists
  useLayoutEffect(() => {
    return registerContent();
  }, [registerContent]);

  return (
    <Collapsible open={!isFolded} className="w-full">
      <CollapsibleContent>
        <div
          className={cn(
            "border-x border-b rounded-b-16 overflow-hidden w-full transition-opacity duration-200 ease-out",
            isFolded ? "opacity-0" : "opacity-100"
          )}
        >
          <ShadowDiv
            className="flex flex-col rounded-b-16 max-h-[20rem]"
            {...props}
          >
            {children}
          </ShadowDiv>
        </div>
      </CollapsibleContent>
    </Collapsible>
  );
}

export {
  ExpandableCardRoot as Root,
  ExpandableCardHeader as Header,
  ExpandableCardContent as Content,
};


================================================
FILE: web/src/layouts/general-layouts.tsx
================================================
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import Truncated from "@/refresh-components/texts/Truncated";
import { WithoutStyles } from "@/types";
import { Content } from "@opal/layouts";
import { IconProps } from "@opal/types";
import React from "react";

export type FlexDirection = "row" | "column";
export type JustifyContent = "start" | "center" | "end" | "between";
export type AlignItems = "start" | "center" | "end" | "stretch";
export type Length = "auto" | "fit" | "full" | number;

const flexDirectionClassMap: Record<FlexDirection, string> = {
  row: "flex-row",
  column: "flex-col",
};
const justifyClassMap: Record<JustifyContent, string> = {
  start: "justify-start",
  center: "justify-center",
  end: "justify-end",
  between: "justify-between",
};
const alignClassMap: Record<AlignItems, string> = {
  start: "items-start",
  center: "items-center",
  end: "items-end",
  stretch: "items-stretch",
};
export const widthClassmap: Record<Length, string> = {
  auto: "w-auto flex-shrink-0",
  fit: "w-fit flex-shrink-0",
  full: "w-full",
};
export const heightClassmap: Record<Length, string> = {
  auto: "h-auto",
  fit: "h-fit",
  full: "h-full min-h-0",
};

/**
 * Section - A flexible container component for grouping related content
 *
 * Provides a standardized layout container with configurable direction and spacing.
 * Uses flexbox layout with customizable gap between children. Defaults to column layout.
 *
 * @param flexDirection - Flex direction. Default: "column".
 * @param justifyContent - Justify content along the main axis. Default: "center".
 * @param alignItems - Align items along the cross axis. Default: "center".
 * @param width - Width of the container: "auto", "fit", or "full". Default: "full".
 * @param height - Height of the container: "auto", "fit", or "full". Default: "full".
 * @param gap - Gap in REM units between children. Default: 1 (translates to gap-4 in Tailwind)
 * @param padding - Padding in REM units. Default: 0
 * @param wrap - If true, enables flex-wrap. Default: false
 * @param dbg - If true, adds a debug red border for visual debugging. Default: false
 *
 * @example
 * ```tsx
 * import * as GeneralLayouts from "@/layouts/general-layouts";
 *
 * // Column section with default gap - centered
 * <GeneralLayouts.Section>
 *   <Card>First item</Card>
 *   <Card>Second item</Card>
 * </GeneralLayouts.Section>
 *
 * // Row section aligned to the left and vertically centered
 * <GeneralLayouts.Section flexDirection="row" justifyContent="start" alignItems="center">
 *   <Button>Cancel</Button>
 *   <Button>Save</Button>
 * </GeneralLayouts.Section>
 *
 * // Column section with items aligned to the right
 * <GeneralLayouts.Section alignItems="end" gap={2}>
 *   <InputTypeIn label="Name" />
 *   <InputTypeIn label="Email" />
 * </GeneralLayouts.Section>
 *
 * // Row section centered both ways
 * <GeneralLayouts.Section flexDirection="row" justifyContent="center" alignItems="center">
 *   <Text>Centered content</Text>
 * </GeneralLayouts.Section>
 *
 * // Section with fit width
 * <GeneralLayouts.Section width="fit">
 *   <Button>Fit to content</Button>
 * </GeneralLayouts.Section>
 * ```
 *
 * @remarks
 * - The component defaults to column layout when no direction is specified
 * - Full width and height by default
 * - Accepts className for additional styling; style prop is not available
 * - Import using namespace import for consistent usage: `import * as GeneralLayouts from "@/layouts/general-layouts"`
 */
export interface SectionProps
  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {
  className?: string;
  flexDirection?: FlexDirection;
  justifyContent?: JustifyContent;
  alignItems?: AlignItems;
  width?: Length;
  height?: Length;

  gap?: number;
  padding?: number;
  wrap?: boolean;

  // Debugging utilities
  dbg?: boolean;

  ref?: React.Ref<HTMLDivElement>;
}

/**
 * `<Disabled>` from `@opal/core` uses `display: contents` — it can safely
 * wrap a `Section` without affecting layout.
 */
function Section({
  className,
  flexDirection = "column",
  justifyContent = "center",
  alignItems = "center",
  width = "full",
  height = "full",
  gap = 1,
  padding = 0,
  wrap,
  dbg,
  ref,
  ...rest
}: SectionProps) {
  return (
    <div
      ref={ref}
      className={cn(
        "flex",

        flexDirectionClassMap[flexDirection],
        justifyClassMap[justifyContent],
        alignClassMap[alignItems],
        typeof width === "string" && widthClassmap[width],
        typeof height === "string" && heightClassmap[height],
        typeof height === "number" && "overflow-hidden",

        wrap && "flex-wrap",
        dbg && "dbg-red",
        className
      )}
      style={{
        gap: `${gap}rem`,
        padding: `${padding}rem`,
        ...(typeof width === "number" && { width: `${width}rem` }),
        ...(typeof height === "number" && { height: `${height}rem` }),
      }}
      {...rest}
    />
  );
}

export interface AttachmentItemLayoutProps {
  title: string;
  description: string;
  icon: React.FunctionComponent<IconProps>;
  middleText?: string;
  rightChildren?: React.ReactNode;
}
function AttachmentItemLayout({
  title,
  description,
  icon: Icon,
  middleText,
  rightChildren,
}: AttachmentItemLayoutProps) {
  return (
    <Section flexDirection="row" gap={0.25} padding={0.25}>
      <div className={cn("h-[2.25rem] aspect-square rounded-08")}>
        <Section>
          <div
            className="attachment-button__icon-wrapper"
            data-testid="attachment-item-icon-wrapper"
          >
            <Icon className="attachment-button__icon" />
          </div>
        </Section>
      </div>
      <Section
        flexDirection="row"
        justifyContent="between"
        alignItems="center"
        gap={1.5}
      >
        <div data-testid="attachment-item-title" className="flex-1 min-w-0">
          <Content
            title={title}
            description={description}
            sizePreset="main-ui"
            variant="section"
            widthVariant="full"
          />
        </div>
        {middleText && (
          <div className="flex-1 min-w-0">
            <Truncated text03 secondaryBody>
              {middleText}
            </Truncated>
          </div>
        )}
        {rightChildren && (
          <div className="flex-shrink-0 px-1">{rightChildren}</div>
        )}
      </Section>
    </Section>
  );
}

/**
 * CardItemLayout - A layout for card headers with icon, title, description, and actions
 *
 * Structure:
 *   Column [
 *     Row [
 *       Row [ Icon (18px), Title ],
 *       rightChildren (action buttons)
 *     ],
 *     Description (optional, 2-line clamp)
 *   ]
 *
 * Used for card components that display an entity with:
 * - An icon on the left (18px, controlled by this layout)
 * - A title next to the icon
 * - Optional action buttons on the right
 * - Optional description below (2-line max)
 *
 * @param icon - Icon component to render on the left. Receives `size` prop from layout.
 *               Use a callback for custom components: `(props) => <AgentAvatar {...props} />`
 * @param title - The main title text
 * @param description - Optional description text below the title row (clamped to 2 lines)
 * @param rightChildren - Optional content on the right (typically action buttons)
 */
export interface CardItemLayoutProps {
  icon: React.FunctionComponent<IconProps>;
  title: string;
  description?: string;
  rightChildren?: React.ReactNode;
}
function CardItemLayout({
  icon: Icon,
  title,
  description,
  rightChildren,
}: CardItemLayoutProps) {
  return (
    <div className="flex flex-col flex-1 self-stretch items-center gap-1 p-1">
      <div className="flex flex-row self-stretch items-center justify-between gap-1">
        <div className="flex flex-row items-center self-stretch p-1.5 gap-1.5">
          <div className="px-0.5">
            <Icon size={18} />
          </div>
          <Truncated mainContentBody>{title}</Truncated>
        </div>

        {rightChildren && (
          <div className={cn("flex flex-row p-0.5 items-center")}>
            {rightChildren}
          </div>
        )}
      </div>

      {description && (
        <div className="pb-1 px-2 flex self-stretch">
          <Text
            as="p"
            secondaryBody
            text03
            className="line-clamp-2 truncate whitespace-normal h-[2.2rem] break-words"
          >
            {description}
          </Text>
        </div>
      )}
    </div>
  );
}
export { Section, CardItemLayout, AttachmentItemLayout };


================================================
FILE: web/src/layouts/input-layouts.tsx
================================================
"use client";

import type { RichStr } from "@opal/types";
import { resolveStr } from "@opal/components/text/InlineMarkdown";
import Text from "@/refresh-components/texts/Text";
import { SvgXOctagon, SvgAlertCircle } from "@opal/icons";
import { useField, useFormikContext } from "formik";
import { Section } from "@/layouts/general-layouts";
import { Content } from "@opal/layouts";
import Label from "@/refresh-components/form/Label";

interface OrientationLayoutProps {
  name?: string;
  disabled?: boolean;
  nonInteractive?: boolean;
  children?: React.ReactNode;
  title: string | RichStr;
  description?: string | RichStr;
  suffix?: "optional" | (string & {});
  sizePreset?: "main-content" | "main-ui";
}

/**
 * VerticalInputLayout - A layout component for form fields with vertical label arrangement
 *
 * Use this layout when you want the label, input, and error message stacked vertically.
 * Common for most form inputs where the label appears above the input field.
 *
 * Exported as `Vertical` for convenient usage.
 *
 * @example
 * ```tsx
 * import { Vertical } from "@/layouts/input-layouts";
 *
 * <Vertical
 *   name="email"
 *   title="Email Address"
 *   description="We'll never share your email"
 *   optional
 * >
 *   <InputTypeIn name="email" type="email" />
 * </Vertical>
 * ```
 */
export interface VerticalLayoutProps extends OrientationLayoutProps {
  subDescription?: string | RichStr;
}
function VerticalInputLayout({
  name,
  disabled,
  nonInteractive,
  children,
  subDescription,
  title,
  description,
  suffix,
  sizePreset = "main-content",
}: VerticalLayoutProps) {
  const content = (
    <Section gap={0.25} alignItems="start">
      <Content
        title={title}
        description={description}
        suffix={suffix}
        sizePreset={sizePreset}
        variant="section"
      />
      {children}
      {name && <ErrorLayout name={name} />}
      {subDescription && (
        <Text secondaryBody text03>
          {resolveStr(subDescription)}
        </Text>
      )}
    </Section>
  );

  if (nonInteractive) return content;
  return (
    <Label name={name} disabled={disabled}>
      {content}
    </Label>
  );
}

/**
 * HorizontalInputLayout - A layout component for form fields with horizontal label arrangement
 *
 * Use this layout when you want the label on the left and the input control on the right.
 * Commonly used for toggles, switches, and checkboxes where the label and control
 * should be side-by-side.
 *
 * Exported as `Horizontal` for convenient usage.
 *
 * @example
 * ```tsx
 * import { Horizontal } from "@/layouts/input-layouts";
 *
 * // Default behavior (top-aligned)
 * <Horizontal
 *   name="notifications"
 *   title="Enable Notifications"
 *   description="Receive updates about your account"
 * >
 *   <Switch name="notifications" />
 * </Horizontal>
 *
 * // Force center alignment (vertically centers input with label)
 * <Horizontal
 *   name="notifications"
 *   title="Enable Notifications"
 *   description="Receive updates about your account"
 *   center
 * >
 *   <Switch name="notifications" />
 * </Horizontal>
 * ```
 */
export interface HorizontalLayoutProps extends OrientationLayoutProps {
  /** Align input to the center (middle) of the label/description */
  center?: boolean;
}
function HorizontalInputLayout({
  name,
  disabled,
  nonInteractive,
  children,
  center,
  title,
  description,
  suffix,
  sizePreset = "main-content",
}: HorizontalLayoutProps) {
  const content = (
    <Section gap={0.25} alignItems="start">
      <Section
        flexDirection="row"
        justifyContent="between"
        alignItems={center ? "center" : "start"}
      >
        <div className="flex flex-col flex-1 min-w-0 self-stretch">
          <Content
            title={title}
            description={description}
            suffix={suffix}
            sizePreset={sizePreset}
            variant="section"
            widthVariant="full"
          />
        </div>
        <div className="flex flex-col items-end">{children}</div>
      </Section>
      {name && <ErrorLayout name={name} />}
    </Section>
  );

  if (nonInteractive) return content;
  return (
    <Label name={name} disabled={disabled}>
      {content}
    </Label>
  );
}

/**
 * ErrorLayout - Displays Formik field validation errors
 *
 * Automatically shows error messages from Formik's validation state.
 * Only displays when the field has been touched and has an error.
 *
 * Exported as `Error` for convenient usage.
 *
 * @param name - The Formik field name to display errors for
 *
 * @example
 * ```tsx
 * import { Error } from "@/layouts/input-layouts";
 *
 * <InputTypeIn name="email" />
 * <Error name="email" />
 * ```
 *
 * @remarks
 * This component uses Formik's `useField` hook internally and requires
 * the component to be rendered within a Formik context.
 */
interface ErrorLayoutProps {
  name: string;
}
function ErrorLayout({ name }: ErrorLayoutProps) {
  const [, meta] = useField(name);
  const { status } = useFormikContext();
  const warning = status?.warnings?.[name];
  if (warning && typeof warning !== "string")
    throw new Error("The warning that is set must ALWAYS be a string");

  const hasError = meta.touched && meta.error;
  const hasWarning = warning; // Don't require touched for warnings

  // If `hasError` and `hasWarning` are both true at the same time, the error is prioritized and returned first.
  if (hasError)
    return <ErrorTextLayout type="error">{meta.error}</ErrorTextLayout>;
  else if (hasWarning)
    return <ErrorTextLayout type="warning">{warning}</ErrorTextLayout>;
  else return null;
}

export type ErrorTextType = "error" | "warning";
interface ErrorTextLayoutProps {
  children?: React.ReactNode;
  type?: ErrorTextType;
}
function ErrorTextLayout({ children, type = "error" }: ErrorTextLayoutProps) {
  const Icon = type === "error" ? SvgXOctagon : SvgAlertCircle;
  const colorClass =
    type === "error" ? "text-status-error-05" : "text-status-warning-05";
  const strokeClass =
    type === "error" ? "stroke-status-error-05" : "stroke-status-warning-05";

  return (
    <div className="px-1">
      <Section flexDirection="row" justifyContent="start" gap={0.25}>
        <Icon size={12} className={strokeClass} />
        <Text secondaryBody className={colorClass} role="alert">
          {children}
        </Text>
      </Section>
    </div>
  );
}

export {
  VerticalInputLayout as Vertical,
  HorizontalInputLayout as Horizontal,
  ErrorLayout as Error,
  ErrorTextLayout,
};


================================================
FILE: web/src/layouts/settings-layouts.tsx
================================================
"use client";

/**
 * Settings Page Layout Components
 *
 * A namespaced collection of components for building consistent settings pages.
 * These components provide a standardized layout with scroll-aware headers,
 * centered content containers, and automatic responsive behavior.
 *
 * @example
 * ```tsx
 * import SettingsLayouts from "@/layouts/settings-layouts";
 * import { SvgSettings } from "@opal/icons";
 *
 * function MySettingsPage() {
 *   return (
 *     <SettingsLayouts.Root>
 *       <SettingsLayouts.Header
 *         icon={SvgSettings}
 *         title="Account Settings"
 *         description="Manage your account preferences and settings"
 *         rightChildren={<Button>Save</Button>}
 *       >
 *         <InputTypeIn placeholder="Search settings..." />
 *       </SettingsLayouts.Header>
 *
 *       <SettingsLayouts.Body>
 *         <Card>Settings content here</Card>
 *       </SettingsLayouts.Body>
 *     </SettingsLayouts.Root>
 *   );
 * }
 * ```
 */

import BackButton from "@/refresh-components/buttons/BackButton";
import { cn } from "@/lib/utils";
import Separator from "@/refresh-components/Separator";
import { WithoutStyles } from "@/types";
import { IconFunctionComponent } from "@opal/types";
import { HtmlHTMLAttributes, useEffect, useRef, useState } from "react";
import { Content } from "@opal/layouts";
import Spacer from "@/refresh-components/Spacer";

const widthClasses = {
  sm: "w-[min(var(--container-sm),100%)]",
  "sm-md": "w-[min(var(--container-sm-md),100%)]",
  md: "w-[min(var(--container-md),100%)]",
  lg: "w-[min(var(--container-lg),100%)]",
  full: "w-[var(--container-full)]",
};

/**
 * Settings Root Component
 *
 * Wrapper component that provides the base structure for settings pages.
 * Creates a centered, scrollable container with configurable width.
 *
 * Features:
 * - Full height container with centered content
 * - Automatic overflow-y scrolling
 * - Contains the scroll container ID that Settings.Header uses for shadow detection
 * - Configurable width via CSS variables defined in sizes.css:
 *   "sm" (672px), "sm-md" (752px), "md" (872px, default), "lg" (992px), "full" (100%)
 *
 * @example
 * ```tsx
 * // Default medium width (872px max)
 * <SettingsLayouts.Root>
 *   <SettingsLayouts.Header {...} />
 *   <SettingsLayouts.Body>...</SettingsLayouts.Body>
 * </SettingsLayouts.Root>
 *
 * // Large width (992px max)
 * <SettingsLayouts.Root width="lg">
 *   <SettingsLayouts.Header {...} />
 *   <SettingsLayouts.Body>...</SettingsLayouts.Body>
 * </SettingsLayouts.Root>
 * ```
 */
interface SettingsRootProps
  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {
  width?: keyof typeof widthClasses;
}
function SettingsRoot({ width = "md", ...props }: SettingsRootProps) {
  return (
    <div
      id="page-wrapper-scroll-container"
      className="w-full h-full flex flex-col items-center overflow-y-auto"
    >
      {/* WARNING: The id="page-wrapper-scroll-container" above is used by SettingsHeader
          to detect scroll position and show/hide the scroll shadow.
          DO NOT REMOVE this ID without updating SettingsHeader accordingly. */}
      <div className={cn("h-full", widthClasses[width])}>
        <div {...props} />
      </div>
    </div>
  );
}

/**
 * Settings Header Component
 *
 * Sticky header component for settings pages with icon, title, description,
 * and optional actions. Automatically shows a scroll shadow when the page
 * has been scrolled down.
 *
 * Features:
 * - Sticky positioning at the top of the page
 * - Icon display (1.75rem size)
 * - Title (headingH2 style)
 * - Optional description (string)
 * - Optional right-aligned action buttons via rightChildren
 * - Optional children content below title/description
 * - Optional back button
 * - Optional bottom separator
 * - Automatic scroll shadow effect
 *
 * @example
 * ```tsx
 * // Basic header
 * <SettingsLayouts.Header
 *   icon={SvgUser}
 *   title="Profile Settings"
 *   description="Update your profile information"
 * />
 *
 * // Without description
 * <SettingsLayouts.Header
 *   icon={SvgUser}
 *   title="Profile Settings"
 * />
 *
 * // With action buttons
 * <SettingsLayouts.Header
 *   icon={SvgSettings}
 *   title="General Settings"
 *   description="Configure your preferences"
 *   rightChildren={
 *     <Button onClick={handleSave}>Save Changes</Button>
 *   }
 * />
 *
 * // With search/filter below and bottom separator
 * <SettingsLayouts.Header
 *   icon={SvgDatabase}
 *   title="Data Sources"
 *   description="Manage your connected data sources"
 *   separator
 * >
 *   <InputTypeIn placeholder="Search data sources..." />
 * </SettingsLayouts.Header>
 *
 * // With back button
 * <SettingsLayouts.Header
 *   icon={SvgArrow}
 *   title="Advanced Settings"
 *   description="Expert configuration options"
 *   backButton
 * />
 *
 * // With string description
 * <SettingsLayouts.Header
 *   icon={SvgDatabase}
 *   title="API Keys"
 *   description="Manage your API keys"
 * />
 * ```
 */
export interface SettingsHeaderProps {
  icon: IconFunctionComponent;
  title: string;
  description?: string;
  children?: React.ReactNode;
  rightChildren?: React.ReactNode;
  backButton?: boolean;
  onBack?: () => void;
  separator?: boolean;
}
function SettingsHeader({
  icon: Icon,
  title,
  description,
  children,
  rightChildren,
  backButton,
  onBack,
  separator,
}: SettingsHeaderProps) {
  const [showShadow, setShowShadow] = useState(false);
  const headerRef = useRef<HTMLDivElement>(null);

  // # NOTE (@Subash-Mohan)
  // Headers with actions are always sticky, others are not.
  const isSticky = !!rightChildren;

  useEffect(() => {
    if (!isSticky) return;

    // IMPORTANT: This component relies on SettingsRoot having the ID "page-wrapper-scroll-container"
    // on its scrollable container. If that ID is removed or changed, the scroll shadow will not work.
    const scrollContainer = document.getElementById(
      "page-wrapper-scroll-container"
    );
    if (!scrollContainer) return;

    const handleScroll = () => {
      // Show shadow if the scroll container has been scrolled down
      setShowShadow(scrollContainer.scrollTop > 0);
    };

    scrollContainer.addEventListener("scroll", handleScroll);
    handleScroll(); // Check initial state

    return () => scrollContainer.removeEventListener("scroll", handleScroll);
  }, [isSticky]);

  return (
    <div
      ref={headerRef}
      className={cn(
        "w-full bg-background-tint-01",
        isSticky && "sticky top-0 z-settings-header",
        backButton && "md:pt-4"
      )}
    >
      {backButton && (
        <div className="px-2">
          <BackButton behaviorOverride={onBack} />
        </div>
      )}

      <Spacer vertical rem={2.5} />

      <div className="flex flex-col gap-6 px-4">
        <div className="flex w-full justify-between">
          <div aria-label="admin-page-title">
            <Content
              icon={Icon}
              title={title}
              description={description}
              sizePreset="headline"
              variant="heading"
            />
          </div>
          {rightChildren}
        </div>

        {children}
      </div>

      {separator ? (
        <>
          <Spacer vertical rem={1.5} />
          <Separator noPadding className="px-4" />
        </>
      ) : (
        <Spacer vertical rem={0.5} />
      )}

      {isSticky && (
        <div
          className={cn(
            "absolute left-0 right-0 h-[0.5rem] pointer-events-none transition-opacity duration-300 rounded-b-08 opacity-0",
            showShadow && "opacity-100"
          )}
          style={{
            background:
              "linear-gradient(to bottom, var(--mask-02), transparent)",
          }}
        />
      )}
    </div>
  );
}

/**
 * Settings Body Component
 *
 * Content container for settings page body. Provides consistent padding
 * and vertical spacing for content sections.
 *
 * Features:
 * - Top padding: 1.5rem (pt-6)
 * - Bottom padding: 4.5rem (pb-[4.5rem])
 * - Horizontal padding: 1rem (px-4)
 * - Flex column layout with 2rem gap (gap-8)
 * - Full width container
 *
 * @example
 * ```tsx
 * <SettingsLayouts.Body>
 *   <Card>
 *     <h3>Section 1</h3>
 *     <p>Content here</p>
 *   </Card>
 *   <Card>
 *     <h3>Section 2</h3>
 *     <p>More content</p>
 *   </Card>
 * </SettingsLayouts.Body>
 * ```
 */
function SettingsBody(
  props: WithoutStyles<HtmlHTMLAttributes<HTMLDivElement>>
) {
  return (
    <div
      className="pt-6 pb-[4.5rem] px-4 flex flex-col gap-8 w-full"
      {...props}
    />
  );
}

export { SettingsRoot as Root, SettingsHeader as Header, SettingsBody as Body };


================================================
FILE: web/src/layouts/table-layouts.tsx
================================================
import { cn } from "@/lib/utils";
import { WithoutStyles } from "@/types";
import React from "react";

// ============================================================================
// TABLE LAYOUTS - For building table-like structures without raw divs
// ============================================================================

/**
 * TableRow - A horizontal row layout for tables/lists
 *
 * @param selected - If true, applies selected background styling
 * @param onClick - Click handler for the row
 * @param children - Row content
 */
interface TableRowProps
  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {
  selected?: boolean;
}
function TableRow({ selected, children, onClick, ...rest }: TableRowProps) {
  return (
    <div
      className={cn("table-row-layout", onClick && "cursor-pointer")}
      data-selected={selected ? "true" : undefined}
      onClick={onClick}
      {...rest}
    >
      {children}
    </div>
  );
}

/**
 * TableCell - A cell within a table row
 *
 * @param flex - If true, cell takes remaining space (flex: 1)
 * @param fixed - If true, cell has fixed width (doesn't shrink)
 * @param width - Optional fixed width in rem
 * @param children - Cell content
 */
interface TableCellProps
  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {
  flex?: boolean;
  width?: number;
}
function TableCell({ flex, width, children, ...rest }: TableCellProps) {
  return (
    <div
      className="table-cell-layout"
      data-flex={flex ? "true" : undefined}
      data-fixed={width ? "true" : undefined}
      style={width ? { width: `${width}rem` } : undefined}
      {...rest}
    >
      {children}
    </div>
  );
}

/**
 * SidebarLayout - A fixed-width sidebar container
 *
 * @param children - Sidebar content
 */
interface SidebarLayoutProps
  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {}
function SidebarLayout({ children, ...rest }: SidebarLayoutProps) {
  return (
    <div className="sidebar-layout" {...rest}>
      {children}
    </div>
  );
}

/**
 * TwoColumnLayout - A two-column layout with sidebar and content
 *
 * @param children - Should contain sidebar and content sections
 */
interface TwoColumnLayoutProps
  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {
  minHeight?: number;
}
function TwoColumnLayout({
  minHeight,
  children,
  ...rest
}: TwoColumnLayoutProps) {
  return (
    <div
      className="two-column-layout"
      style={minHeight ? { minHeight: `${minHeight}rem` } : undefined}
      {...rest}
    >
      {children}
    </div>
  );
}

/**
 * ContentColumn - The main content area in a two-column layout
 */
interface ContentColumnProps
  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {}
function ContentColumn({ children, ...rest }: ContentColumnProps) {
  return (
    <div className="content-column-layout" {...rest}>
      {children}
    </div>
  );
}

/**
 * HiddenInput - A hidden input element (for file uploads, etc.)
 */
interface HiddenInputProps extends React.InputHTMLAttributes<HTMLInputElement> {
  inputRef?: React.Ref<HTMLInputElement>;
}
function HiddenInput({ inputRef, ...rest }: HiddenInputProps) {
  return <input ref={inputRef} className="hidden-input" {...rest} />;
}

/**
 * CheckboxCell - A fixed-width cell for checkboxes in tables
 */
interface CheckboxCellProps
  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {}
function CheckboxCell({ children, ...rest }: CheckboxCellProps) {
  return (
    <div className="checkbox-cell-layout" {...rest}>
      {children}
    </div>
  );
}

/**
 * SourceIconsRow - A row of source icons
 */
interface SourceIconsRowProps
  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {}
function SourceIconsRow({ children, ...rest }: SourceIconsRowProps) {
  return (
    <div className="source-icons-layout" {...rest}>
      {children}
    </div>
  );
}

export {
  TableRow,
  TableCell,
  SidebarLayout,
  TwoColumnLayout,
  ContentColumn,
  HiddenInput,
  CheckboxCell,
  SourceIconsRow,
};


================================================
FILE: web/src/lib/admin/users/userMutationFetcher.ts
================================================
const userMutationFetcher = async (
  url: string,
  { arg }: { arg: { user_email: string; new_role?: string; method?: string } }
) => {
  const { method = "PATCH", ...body } = arg;
  return fetch(url, {
    method,
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(body),
  }).then(async (res) => {
    if (res.ok) return res.json();

    const errorDetail = (await res.json()).detail;
    throw Error(errorDetail);
  });
};

export default userMutationFetcher;


================================================
FILE: web/src/lib/admin/voice/svc.ts
================================================
const VOICE_PROVIDERS_URL = "/api/admin/voice/providers";

export async function activateVoiceProvider(
  providerId: number,
  mode: "stt" | "tts",
  ttsModel?: string
): Promise<Response> {
  const url = new URL(
    `${VOICE_PROVIDERS_URL}/${providerId}/activate-${mode}`,
    window.location.origin
  );
  if (mode === "tts" && ttsModel) {
    url.searchParams.set("tts_model", ttsModel);
  }
  return fetch(url.toString(), { method: "POST" });
}

export async function deactivateVoiceProvider(
  providerId: number,
  mode: "stt" | "tts"
): Promise<Response> {
  return fetch(`${VOICE_PROVIDERS_URL}/${providerId}/deactivate-${mode}`, {
    method: "POST",
  });
}

export async function testVoiceProvider(request: {
  provider_type: string;
  api_key?: string;
  target_uri?: string;
  use_stored_key?: boolean;
}): Promise<Response> {
  return fetch(`${VOICE_PROVIDERS_URL}/test`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(request),
  });
}

export async function upsertVoiceProvider(
  request: Record<string, unknown>
): Promise<Response> {
  return fetch(VOICE_PROVIDERS_URL, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(request),
  });
}

export async function fetchVoicesByType(
  providerType: string
): Promise<Response> {
  return fetch(`/api/admin/voice/voices?provider_type=${providerType}`);
}

export async function deleteVoiceProvider(
  providerId: number
): Promise<Response> {
  return fetch(`${VOICE_PROVIDERS_URL}/${providerId}`, { method: "DELETE" });
}

export async function fetchLLMProviders(): Promise<Response> {
  return fetch("/api/admin/llm/provider");
}


================================================
FILE: web/src/lib/admin-routes.ts
================================================
import { IconFunctionComponent } from "@opal/types";
import {
  SvgActions,
  SvgActivity,
  SvgArrowExchange,
  SvgAudio,
  SvgShareWebhook,
  SvgBarChart,
  SvgBookOpen,
  SvgBubbleText,
  SvgClipboard,
  SvgCpu,
  SvgDiscordMono,
  SvgDownload,
  SvgEmpty,
  SvgFileText,
  SvgFiles,
  SvgGlobe,
  SvgHistory,
  SvgImage,
  SvgMcp,
  SvgNetworkGraph,
  SvgOnyxOctagon,
  SvgPaintBrush,
  SvgProgressBars,
  SvgSearchMenu,
  SvgSlack,
  SvgTerminal,
  SvgThumbsUp,
  SvgUploadCloud,
  SvgUser,
  SvgUserKey,
  SvgUserSync,
  SvgUsers,
  SvgWallet,
  SvgZoomIn,
} from "@opal/icons";

export interface AdminRouteEntry {
  path: string;
  icon: IconFunctionComponent;
  title: string;
  sidebarLabel: string;
}

/**
 * Single source of truth for every admin route: path, icon, page-header
 * title, and sidebar label.
 */
export const ADMIN_ROUTES = {
  INDEXING_STATUS: {
    path: "/admin/indexing/status",
    icon: SvgBookOpen,
    title: "Existing Connectors",
    sidebarLabel: "Existing Connectors",
  },
  ADD_CONNECTOR: {
    path: "/admin/add-connector",
    icon: SvgUploadCloud,
    title: "Add Connector",
    sidebarLabel: "Add Connector",
  },
  DOCUMENT_SETS: {
    path: "/admin/documents/sets",
    icon: SvgFiles,
    title: "Document Sets",
    sidebarLabel: "Document Sets",
  },
  DOCUMENT_EXPLORER: {
    path: "/admin/documents/explorer",
    icon: SvgZoomIn,
    title: "Document Explorer",
    sidebarLabel: "Explorer",
  },
  DOCUMENT_FEEDBACK: {
    path: "/admin/documents/feedback",
    icon: SvgThumbsUp,
    title: "Document Feedback",
    sidebarLabel: "Feedback",
  },
  AGENTS: {
    path: "/admin/agents",
    icon: SvgOnyxOctagon,
    title: "Agents",
    sidebarLabel: "Agents",
  },
  SLACK_BOTS: {
    path: "/admin/bots",
    icon: SvgSlack,
    title: "Slack Integration",
    sidebarLabel: "Slack Integration",
  },
  DISCORD_BOTS: {
    path: "/admin/discord-bot",
    icon: SvgDiscordMono,
    title: "Discord Integration",
    sidebarLabel: "Discord Integration",
  },
  MCP_ACTIONS: {
    path: "/admin/actions/mcp",
    icon: SvgMcp,
    title: "MCP Actions",
    sidebarLabel: "MCP Actions",
  },
  OPENAPI_ACTIONS: {
    path: "/admin/actions/open-api",
    icon: SvgActions,
    title: "OpenAPI Actions",
    sidebarLabel: "OpenAPI Actions",
  },
  STANDARD_ANSWERS: {
    path: "/admin/standard-answer",
    icon: SvgClipboard,
    title: "Standard Answers",
    sidebarLabel: "Standard Answers",
  },
  GROUPS: {
    path: "/admin/groups",
    icon: SvgUsers,
    title: "Manage User Groups",
    sidebarLabel: "Groups",
  },
  CHAT_PREFERENCES: {
    path: "/admin/configuration/chat-preferences",
    icon: SvgBubbleText,
    title: "Chat Preferences",
    sidebarLabel: "Chat Preferences",
  },
  LLM_MODELS: {
    path: "/admin/configuration/llm",
    icon: SvgCpu,
    title: "Language Models",
    sidebarLabel: "Language Models",
  },
  WEB_SEARCH: {
    path: "/admin/configuration/web-search",
    icon: SvgGlobe,
    title: "Web Search",
    sidebarLabel: "Web Search",
  },
  IMAGE_GENERATION: {
    path: "/admin/configuration/image-generation",
    icon: SvgImage,
    title: "Image Generation",
    sidebarLabel: "Image Generation",
  },
  VOICE: {
    path: "/admin/configuration/voice",
    icon: SvgAudio,
    title: "Voice",
    sidebarLabel: "Voice",
  },
  CODE_INTERPRETER: {
    path: "/admin/configuration/code-interpreter",
    icon: SvgTerminal,
    title: "Code Interpreter",
    sidebarLabel: "Code Interpreter",
  },
  INDEX_SETTINGS: {
    path: "/admin/configuration/search",
    icon: SvgSearchMenu,
    title: "Index Settings",
    sidebarLabel: "Index Settings",
  },
  DOCUMENT_PROCESSING: {
    path: "/admin/configuration/document-processing",
    icon: SvgFileText,
    title: "Document Processing",
    sidebarLabel: "Document Processing",
  },
  KNOWLEDGE_GRAPH: {
    path: "/admin/kg",
    icon: SvgNetworkGraph,
    title: "Knowledge Graph",
    sidebarLabel: "Knowledge Graph",
  },
  USERS: {
    path: "/admin/users",
    icon: SvgUser,
    title: "Users & Requests",
    sidebarLabel: "Users",
  },
  API_KEYS: {
    path: "/admin/service-accounts",
    icon: SvgUserKey,
    title: "Service Accounts",
    sidebarLabel: "Service Accounts",
  },
  TOKEN_RATE_LIMITS: {
    path: "/admin/token-rate-limits",
    icon: SvgProgressBars,
    title: "Spending Limits",
    sidebarLabel: "Spending Limits",
  },
  USAGE: {
    path: "/admin/performance/usage",
    icon: SvgActivity,
    title: "Usage Statistics",
    sidebarLabel: "Usage Statistics",
  },
  QUERY_HISTORY: {
    path: "/admin/performance/query-history",
    icon: SvgHistory,
    title: "Query History",
    sidebarLabel: "Query History",
  },
  CUSTOM_ANALYTICS: {
    path: "/admin/performance/custom-analytics",
    icon: SvgBarChart,
    title: "Custom Analytics",
    sidebarLabel: "Custom Analytics",
  },
  THEME: {
    path: "/admin/theme",
    icon: SvgPaintBrush,
    title: "Appearance & Theming",
    sidebarLabel: "Appearance & Theming",
  },
  BILLING: {
    path: "/admin/billing",
    icon: SvgWallet,
    title: "Plans & Billing",
    sidebarLabel: "Plans & Billing",
  },
  INDEX_MIGRATION: {
    path: "/admin/document-index-migration",
    icon: SvgArrowExchange,
    title: "Document Index Migration",
    sidebarLabel: "Document Index Migration",
  },
  HOOKS: {
    path: "/admin/hooks",
    icon: SvgShareWebhook,
    title: "Hook Extensions",
    sidebarLabel: "Hook Extensions",
  },
  SCIM: {
    path: "/admin/scim",
    icon: SvgUserSync,
    title: "SCIM",
    sidebarLabel: "SCIM",
  },
  DEBUG: {
    path: "/admin/debug",
    icon: SvgDownload,
    title: "Debug Logs",
    sidebarLabel: "Debug Logs",
  },
  // Prefix-only entries used for layout matching — not rendered as sidebar
  // items or page headers.
  DOCUMENTS: {
    path: "/admin/documents",
    icon: SvgEmpty,
    title: "",
    sidebarLabel: "",
  },
  PERFORMANCE: {
    path: "/admin/performance",
    icon: SvgEmpty,
    title: "",
    sidebarLabel: "",
  },
} as const satisfies Record<string, AdminRouteEntry>;

/**
 * Helper that converts a route entry into the `{ name, icon, link }`
 * shape expected by the sidebar.
 */
export function sidebarItem(route: AdminRouteEntry) {
  return { name: route.sidebarLabel, icon: route.icon, link: route.path };
}


================================================
FILE: web/src/lib/agents.ts
================================================
import { MinimalPersonaSnapshot, Persona } from "@/app/admin/agents/interfaces";
import { User } from "./types";
import { checkUserIsNoAuthUser } from "./user";
import { personaComparator } from "@/app/admin/agents/lib";

/**
 * Checks if the given user owns the specified assistant.
 *
 * @param user - The user to check ownership for, or null if no user is logged in
 * @param assistant - The assistant to check ownership of
 * @returns true if the user owns the agent (or no auth is required), false otherwise
 */
export function checkUserOwnsAgent(
  user: User | null,
  agent: MinimalPersonaSnapshot | Persona
) {
  return checkUserIdOwnsAgent(user?.id, agent);
}

/**
 * Checks if the given user ID owns the specified assistant.
 *
 * Returns true if a valid user ID is provided and any of the following conditions
 * are met (and the agent is not built-in):
 * - The user is a no-auth user (authentication is disabled)
 * - The user ID matches the agent owner's ID
 *
 * Returns false if userId is undefined (e.g., user is loading or unauthenticated)
 * to prevent granting ownership access prematurely.
 *
 * @param userId - The user ID to check ownership for
 * @param assistant - The assistant to check ownership of
 * @returns true if the user owns the agent, false otherwise
 */
export function checkUserIdOwnsAgent(
  userId: string | undefined,
  agent: MinimalPersonaSnapshot | Persona
) {
  return (
    !!userId &&
    (checkUserIsNoAuthUser(userId) || agent.owner?.id === userId) &&
    !agent.builtin_persona
  );
}

/**
 * Updates the user's pinned assistants with the given ordered list of agent IDs.
 *
 * @param pinnedAgentIds - Array of agent IDs in the desired pinned order
 * @throws Error if the API request fails
 */
export async function pinAgents(pinnedAgentIds: number[]) {
  // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766
  const response = await fetch(`/api/user/pinned-assistants`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      ordered_assistant_ids: pinnedAgentIds, // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766
    }),
  });
  if (!response.ok) {
    throw new Error("Failed to update pinned assistants");
  }
}

/**
 * Filters and sorts assistants based on visibility.
 *
 * Only returns assistants that are marked as visible, sorted using the persona comparator.
 *
 * @param assistants - Array of assistants to filter
 * @returns Filtered and sorted array of visible assistants
 */
export function filterAgents(
  assistants: MinimalPersonaSnapshot[]
): MinimalPersonaSnapshot[] {
  let filteredAgents = assistants.filter((assistant) => assistant.is_listed);
  return filteredAgents.sort(personaComparator);
}

/**
 * Deletes an agent by its ID.
 *
 * @param agentId - The ID of the agent to delete
 * @returns null on success, or an error message string on failure
 */
export async function deleteAgent(agentId: number): Promise<string | null> {
  try {
    const response = await fetch(`/api/persona/${agentId}`, {
      method: "DELETE",
    });

    if (response.ok) {
      return null;
    }

    const errorMessage = (await response.json()).detail || "Unknown error";
    return errorMessage;
  } catch (error) {
    console.error("deleteAgent: Network error", error);
    return "Network error. Please check your connection and try again.";
  }
}

/**
 * Updates agent sharing settings.
 *
 * For MIT versions, group_ids should not be sent since group-based sharing
 * is an EE-only feature.
 *
 * @param agentId - The ID of the agent to update
 * @param userIds - Array of user IDs to share with
 * @param groupIds - Array of group IDs to share with (ignored when isPaidEnterpriseFeaturesEnabled is false)
 * @param isPublic - Whether the agent should be public
 * @param isPaidEnterpriseFeaturesEnabled - Whether enterprise features are enabled
 * @returns null on success, or an error message string on failure
 *
 * @example
 * const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
 * const error = await updateAgentSharedStatus(agentId, userIds, groupIds, isPublic, isPaidEnterpriseFeaturesEnabled);
 * if (error) console.error(error);
 */
export async function updateAgentSharedStatus(
  agentId: number,
  userIds: string[],
  groupIds: number[],
  isPublic: boolean | undefined,
  isPaidEnterpriseFeaturesEnabled: boolean,
  labelIds?: number[]
): Promise<null | string> {
  // MIT versions should not send group_ids - warn if caller provided non-empty groups
  if (!isPaidEnterpriseFeaturesEnabled && groupIds.length > 0) {
    console.error(
      "updateAgentSharedStatus: groupIds provided but enterprise features are disabled. " +
        "Group sharing is an EE-only feature. Discarding groupIds."
    );
  }

  try {
    const response = await fetch(`/api/persona/${agentId}/share`, {
      method: "PATCH",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        user_ids: userIds,
        // Only include group_ids for enterprise versions
        group_ids: isPaidEnterpriseFeaturesEnabled ? groupIds : undefined,
        is_public: isPublic,
        label_ids: labelIds,
      }),
    });

    if (response.ok) {
      return null;
    }

    const errorMessage = (await response.json()).detail || "Unknown error";
    return errorMessage;
  } catch (error) {
    console.error("updateAgentSharedStatus: Network error", error);
    return "Network error. Please check your connection and try again.";
  }
}

/**
 * Updates the labels assigned to an agent via the share endpoint.
 *
 * @param agentId - The ID of the agent to update
 * @param labelIds - Array of label IDs to assign to the agent
 * @returns null on success, or an error message string on failure
 */
export async function updateAgentLabels(
  agentId: number,
  labelIds: number[]
): Promise<string | null> {
  try {
    const response = await fetch(`/api/persona/${agentId}/share`, {
      method: "PATCH",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ label_ids: labelIds }),
    });

    if (response.ok) {
      return null;
    }

    const errorMessage = (await response.json()).detail || "Unknown error";
    return errorMessage;
  } catch (error) {
    console.error("updateAgentLabels: Network error", error);
    return "Network error. Please check your connection and try again.";
  }
}

/**
 * Updates the featured (default) status of an agent.
 *
 * @param agentId - The ID of the agent to update
 * @param isFeatured - Whether the agent should be featured
 * @returns null on success, or an error message string on failure
 */
export async function updateAgentFeaturedStatus(
  agentId: number,
  isFeatured: boolean
): Promise<string | null> {
  try {
    const response = await fetch(`/api/admin/persona/${agentId}/featured`, {
      method: "PATCH",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ is_featured: isFeatured }),
    });

    if (response.ok) {
      return null;
    }

    const errorMessage = (await response.json()).detail || "Unknown error";
    return errorMessage;
  } catch (error) {
    console.error("updateAgentFeaturedStatus: Network error", error);
    return "Network error. Please check your connection and try again.";
  }
}


================================================
FILE: web/src/lib/agentsSS.ts
================================================
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { fetchSS } from "./utilsSS";

export type FetchAgentsResponse = [MinimalPersonaSnapshot[], string | null];

// Fetch agents server-side
export async function fetchAgentsSS(): Promise<FetchAgentsResponse> {
  const response = await fetchSS("/persona");
  if (response.ok) {
    return [(await response.json()) as MinimalPersonaSnapshot[], null];
  }
  return [[], (await response.json()).detail || "Unknown Error"];
}


================================================
FILE: web/src/lib/analytics.ts
================================================
import posthog from "posthog-js";

// ─── Event Registry ────────────────────────────────────────────────────────
// All tracked event names. Add new events here to get type-safe tracking.

export enum AnalyticsEvent {
  CONFIGURED_LLM_PROVIDER = "configured_llm_provider",
  COMPLETED_CRAFT_ONBOARDING = "completed_craft_onboarding",
  COMPLETED_CRAFT_USER_INFO = "completed_craft_user_info",
  SENT_CRAFT_MESSAGE = "sent_craft_message",
  SAW_CRAFT_INTRO = "saw_craft_intro",
  CLICKED_GO_HOME = "clicked_go_home",
  CLICKED_TRY_CRAFT = "clicked_try_craft",
  CLICKED_CRAFT_IN_SIDEBAR = "clicked_craft_in_sidebar",
  RELEASE_NOTIFICATION_CLICKED = "release_notification_clicked",
  EXTENSION_CHAT_QUERY = "extension_chat_query",
}

// ─── Shared Enums ──────────────────────────────────────────────────────────

export enum LLMProviderConfiguredSource {
  ADMIN_PAGE = "admin_page",
  CHAT_ONBOARDING = "chat_onboarding",
  CRAFT_ONBOARDING = "craft_onboarding",
}

// ─── Event Property Types ──────────────────────────────────────────────────
// Maps each event to its required properties. Use `void` for events with no
// properties — this makes the second argument to `track()` optional for those
// events while requiring it for events that carry data.

interface AnalyticsEventProperties {
  [AnalyticsEvent.CONFIGURED_LLM_PROVIDER]: {
    provider: string;
    is_creation: boolean;
    source: LLMProviderConfiguredSource;
  };
  [AnalyticsEvent.COMPLETED_CRAFT_ONBOARDING]: void;
  [AnalyticsEvent.COMPLETED_CRAFT_USER_INFO]: {
    first_name: string;
    last_name: string | undefined;
    work_area: string | undefined;
    level: string | undefined;
  };
  [AnalyticsEvent.SENT_CRAFT_MESSAGE]: void;
  [AnalyticsEvent.SAW_CRAFT_INTRO]: void;
  [AnalyticsEvent.CLICKED_GO_HOME]: void;
  [AnalyticsEvent.CLICKED_TRY_CRAFT]: void;
  [AnalyticsEvent.CLICKED_CRAFT_IN_SIDEBAR]: void;
  [AnalyticsEvent.RELEASE_NOTIFICATION_CLICKED]: {
    version: string | undefined;
  };
  [AnalyticsEvent.EXTENSION_CHAT_QUERY]: {
    extension_context: string | null | undefined;
    assistant_id: number | undefined;
    has_files: boolean;
    deep_research: boolean;
  };
}

// ─── Typed Track Function ──────────────────────────────────────────────────

export function track<E extends AnalyticsEvent>(
  ...args: AnalyticsEventProperties[E] extends void
    ? [event: E]
    : [event: E, properties: AnalyticsEventProperties[E]]
): void {
  const [event, properties] = args as [E, Record<string, unknown>?];
  posthog.capture(event, properties ?? {});
}


================================================
FILE: web/src/lib/appSidebarSS.ts
================================================
import { cookies } from "next/headers";
import { SIDEBAR_TOGGLED_COOKIE_NAME } from "@/components/resizable/constants";
import { User } from "@/lib/types";

export interface AppSidebarMetadata {
  folded: boolean;
}

export async function fetchAppSidebarMetadata(
  user?: User | null
): Promise<AppSidebarMetadata> {
  const requestCookies = await cookies();
  const sidebarToggled = requestCookies.get(SIDEBAR_TOGGLED_COOKIE_NAME);

  const folded = !user?.is_anonymous_user && sidebarToggled?.value === "true";

  return {
    folded,
  };
}


================================================
FILE: web/src/lib/auth/redirectValidation.ts
================================================
/**
 * Validates a redirect URL to prevent Open Redirect vulnerabilities.
 * Only allows internal paths (relative URLs starting with /).
 *
 * @param url - The URL to validate (typically from query params like ?next=...)
 * @returns The validated URL if safe, otherwise null
 *
 * Security: Rejects:
 * - External URLs (https://evil.com)
 * - Protocol-relative URLs (//evil.com)
 * - JavaScript URLs (javascript:alert(1))
 * - Data URLs (data:text/html,...)
 * - Absolute URLs with protocols
 */
export function validateInternalRedirect(
  url: string | null | undefined
): string | null {
  if (!url) {
    return null;
  }

  // Trim whitespace
  const trimmedUrl = url.trim();

  // Must start with / (internal path)
  if (!trimmedUrl.startsWith("/")) {
    return null;
  }

  // Reject protocol-relative URLs (//evil.com)
  if (trimmedUrl.startsWith("//")) {
    return null;
  }

  // Reject URLs with protocol schemes in the path (before query/hash)
  //
  // Regex breakdown: /^[^?#]*:/
  //   ^        - Start of string
  //   [^?#]*   - Match any characters EXCEPT ? and # (zero or more times)
  //              This matches everything before the query string or hash
  //   :        - Match a literal colon
  //
  // This rejects: /javascript:alert(1), /http://evil.com, /data:text/html
  // But allows:   /chat?time=12:30:00, /admin#section:1
  //               (colons after ? or # are safe)
  if (trimmedUrl.match(/^[^?#]*:/)) {
    return null;
  }

  // Additional safety: check for backslash sequences that could bypass validation
  if (trimmedUrl.includes("\\")) {
    return null;
  }

  return trimmedUrl;
}


================================================
FILE: web/src/lib/auth/requireAuth.ts
================================================
import { User, UserRole } from "@/lib/types";
import {
  AuthTypeMetadata,
  getAuthTypeMetadataSS,
  getCurrentUserSS,
} from "@/lib/userSS";
import { AuthType } from "@/lib/constants";

/**
 * Result of an authentication check.
 * If redirect is set, the caller should redirect immediately.
 */
export interface AuthCheckResult {
  user: User | null;
  authTypeMetadata: AuthTypeMetadata | null;
  redirect?: string;
}

/**
 * Requires that the user is authenticated.
 * If not authenticated and auth is enabled, returns a redirect to login.
 * Also checks email verification if required.
 *
 * @returns AuthCheckResult with user, auth metadata, and optional redirect
 *
 * @example
 * ```typescript
 * const authResult = await requireAuth();
 * if (authResult.redirect) {
 *   return redirect(authResult.redirect);
 * }
 * // User is authenticated, proceed with logic
 * const { user } = authResult;
 * ```
 */
export async function requireAuth(): Promise<AuthCheckResult> {
  // Fetch auth information
  let user: User | null = null;
  let authTypeMetadata: AuthTypeMetadata | null = null;

  try {
    [authTypeMetadata, user] = await Promise.all([
      getAuthTypeMetadataSS(),
      getCurrentUserSS(),
    ]);
  } catch (e) {
    console.log(`Failed to fetch auth information - ${e}`);
  }

  // If user is not logged in, redirect to login
  if (!user) {
    return {
      user,
      authTypeMetadata,
      redirect: "/auth/login",
    };
  }

  // Check email verification if required
  if (user && !user.is_verified && authTypeMetadata?.requiresVerification) {
    return {
      user,
      authTypeMetadata,
      redirect: "/auth/waiting-on-verification",
    };
  }

  return {
    user,
    authTypeMetadata,
  };
}

// Allowlist of roles that can access admin pages (all roles except BASIC)
const ADMIN_ALLOWED_ROLES = [
  UserRole.ADMIN,
  UserRole.CURATOR,
  UserRole.GLOBAL_CURATOR,
];

/**
 * Requires that the user is authenticated AND has admin role.
 * If not authenticated, redirects to login.
 * If authenticated but not admin, redirects to /chat.
 * Also checks email verification if required.
 *
 * @returns AuthCheckResult with user, auth metadata, and optional redirect
 *
 * @example
 * ```typescript
 * const authResult = await requireAdminAuth();
 * if (authResult.redirect) {
 *   return redirect(authResult.redirect);
 * }
 * // User is authenticated admin, proceed with admin logic
 * const { user } = authResult;
 * ```
 */
export async function requireAdminAuth(): Promise<AuthCheckResult> {
  const authResult = await requireAuth();

  // If already has a redirect (not authenticated or not verified), return it
  if (authResult.redirect) {
    return authResult;
  }

  const { user, authTypeMetadata } = authResult;

  // Check if user has an allowed role
  if (user && !ADMIN_ALLOWED_ROLES.includes(user.role)) {
    return {
      user,
      authTypeMetadata,
      redirect: "/app",
    };
  }

  return authResult;
}


================================================
FILE: web/src/lib/azureTargetUri.ts
================================================
const getApiVersionParam = (url: URL): string => {
  const directApiVersion = url.searchParams.get("api-version");
  if (directApiVersion?.trim()) {
    return directApiVersion.trim();
  }

  let normalized: string | null = null;
  url.searchParams.forEach((value, key) => {
    if (normalized) {
      return;
    }
    if (key.toLowerCase() === "api-version" && value?.trim()) {
      normalized = value.trim();
    }
  });

  return normalized ?? "";
};

const getDeploymentNameParam = (url: URL): string => {
  const match = url.pathname.match(/\/openai\/deployments\/([^/]+)/i);
  const deployment = match?.[1] ?? "";
  return deployment ? deployment.toLowerCase() : "";
};

const isResponsesPath = (url: URL): boolean =>
  /\/openai\/responses/i.test(url.pathname);

export const parseAzureTargetUri = (
  rawUri: string
): {
  url: URL;
  apiVersion: string;
  deploymentName: string;
  isResponsesPath: boolean;
} => {
  const url = new URL(rawUri);
  return {
    url,
    apiVersion: getApiVersionParam(url),
    deploymentName: getDeploymentNameParam(url),
    isResponsesPath: isResponsesPath(url),
  };
};

export const isValidAzureTargetUri = (rawUri: string): boolean => {
  try {
    const { apiVersion, deploymentName, isResponsesPath } =
      parseAzureTargetUri(rawUri);

    return Boolean(apiVersion) && (Boolean(deploymentName) || isResponsesPath);
  } catch {
    return false;
  }
};


================================================
FILE: web/src/lib/billing/index.ts
================================================
/**
 * Billing module - re-exports for convenience.
 */

// Types and interfaces
export * from "./interfaces";

// Service functions
export * from "./svc";

// Hooks
export { useBillingInformation } from "@/hooks/useBillingInformation";
export { useLicense } from "@/hooks/useLicense";


================================================
FILE: web/src/lib/billing/interfaces.ts
================================================
/**
 * Billing and License interfaces.
 *
 * These types match the backend Pydantic models:
 * - LicenseStatusResponse (backend/ee/onyx/server/license/models.py)
 * - BillingInformationResponse (backend/ee/onyx/server/billing/models.py)
 */

// ----------------------------------------------------------------------------
// License Types (Self-hosted only)
// ----------------------------------------------------------------------------

export type PlanType = "monthly" | "annual";

export type LicenseSource = "auto_fetch" | "manual_upload";

export type ApplicationStatus =
  | "active"
  | "payment_reminder"
  | "gated_access"
  | "expired"
  | "seat_limit_exceeded";

/**
 * Billing status from Stripe subscription.
 */
export enum BillingStatus {
  TRIALING = "trialing",
  ACTIVE = "active",
  CANCELLED = "cancelled",
  EXPIRED = "expired",
  PAST_DUE = "past_due",
  UNPAID = "unpaid",
}

/**
 * License status response from /api/license endpoint.
 * Only relevant for self-hosted deployments.
 */
export interface LicenseStatus {
  has_license: boolean;
  seats: number;
  used_seats: number;
  plan_type: PlanType | null;
  issued_at: string | null;
  expires_at: string | null;
  grace_period_end: string | null;
  status: ApplicationStatus | null;
  source: LicenseSource | null;
}

// ----------------------------------------------------------------------------
// Billing Types (Cloud and Self-hosted)
// ----------------------------------------------------------------------------

/**
 * Billing information from Stripe subscription.
 * Available for both cloud and self-hosted with active subscription.
 */
export interface BillingInformation {
  tenant_id: string;
  status: string | null;
  plan_type: string | null;
  seats: number | null;
  billing_period: string | null;
  current_period_start: string | null;
  current_period_end: string | null;
  cancel_at_period_end: boolean;
  canceled_at: string | null;
  trial_start: string | null;
  trial_end: string | null;
  payment_method_enabled: boolean;
}

/**
 * Response when no subscription exists.
 */
export interface SubscriptionStatus {
  subscribed: boolean;
}

// ----------------------------------------------------------------------------
// Checkout & Portal Types
// ----------------------------------------------------------------------------

export interface CreateCheckoutSessionRequest {
  billing_period?: "monthly" | "annual";
  seats?: number;
  email?: string;
}

export interface CreateCheckoutSessionResponse {
  stripe_checkout_url: string;
}

export interface CreateCustomerPortalSessionRequest {
  return_url?: string;
}

export interface CreateCustomerPortalSessionResponse {
  stripe_customer_portal_url: string;
}

// ----------------------------------------------------------------------------
// Seat Management Types
// ----------------------------------------------------------------------------

export interface SeatUpdateRequest {
  new_seat_count: number;
}

export interface SeatUpdateResponse {
  success: boolean;
  current_seats: number;
  used_seats: number;
  message: string | null;
}

// ----------------------------------------------------------------------------
// Type Guards
// ----------------------------------------------------------------------------

/**
 * Check if the response indicates an active subscription.
 * Returns true only if the data is BillingInformation with a non-null status.
 */
export function hasActiveSubscription(
  data: BillingInformation | SubscriptionStatus
): data is BillingInformation {
  // SubscriptionStatus (bare { subscribed: boolean }) is never BillingInformation
  if ("subscribed" in data) {
    return false;
  }
  return data.status !== null;
}

/**
 * Check if the response indicates an active *paid* subscription.
 * Returns true only for status === "active" (excludes trialing, past_due, etc.).
 */
export function hasPaidSubscription(
  data: BillingInformation | SubscriptionStatus
): data is BillingInformation {
  if ("subscribed" in data) {
    return false;
  }
  return data.status === BillingStatus.ACTIVE;
}

/**
 * Check if a license is valid and active.
 */
export function isLicenseValid(license: LicenseStatus): boolean {
  return license.has_license && license.status === "active";
}

// ----------------------------------------------------------------------------
// Display Utilities
// ----------------------------------------------------------------------------

/**
 * Convert status string to human-readable display format.
 */
export function statusToDisplay(status: string | null): string {
  if (!status) return "Unknown";

  switch (status) {
    case "trialing":
      return "Trialing";
    case "active":
      return "Active";
    case "canceled":
    case "cancelled":
      return "Canceled";
    case "past_due":
      return "Past Due";
    case "unpaid":
      return "Unpaid";
    case "expired":
      return "Expired";
    default:
      return status.charAt(0).toUpperCase() + status.slice(1);
  }
}


================================================
FILE: web/src/lib/billing/svc.test.ts
================================================
/**
 * Tests for billing action functions.
 */

import {
  createCheckoutSession,
  createCustomerPortalSession,
  updateSeatCount,
  refreshLicenseCache,
  uploadLicense,
} from "./svc";

// Mock NEXT_PUBLIC_CLOUD_ENABLED
jest.mock("@/lib/constants", () => ({
  NEXT_PUBLIC_CLOUD_ENABLED: false,
}));

describe("billing actions", () => {
  let fetchSpy: jest.SpyInstance;

  beforeEach(() => {
    fetchSpy = jest.spyOn(global, "fetch");
  });

  afterEach(() => {
    fetchSpy.mockRestore();
  });

  describe("createCheckoutSession", () => {
    test("calls correct endpoint with request body", async () => {
      // Mock POST /api/admin/billing/create-checkout-session
      fetchSpy.mockResolvedValueOnce({
        ok: true,
        json: async () => ({ url: "https://checkout.stripe.com/session123" }),
      } as Response);

      const result = await createCheckoutSession({
        billing_period: "monthly",
        email: "test@example.com",
      });

      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/admin/billing/create-checkout-session",
        expect.objectContaining({
          method: "POST",
          headers: { "Content-Type": "application/json" },
        })
      );

      const callArgs = fetchSpy.mock.calls[0];
      const requestBody = JSON.parse(callArgs[1].body);
      expect(requestBody).toEqual({
        billing_period: "monthly",
        email: "test@example.com",
      });

      expect(result).toEqual({ url: "https://checkout.stripe.com/session123" });
    });

    test("throws error on failed response", async () => {
      // Mock POST /api/admin/billing/create-checkout-session (error)
      fetchSpy.mockResolvedValueOnce({
        ok: false,
        json: async () => ({ detail: "Invalid request" }),
      } as Response);

      await expect(createCheckoutSession()).rejects.toThrow("Invalid request");
    });

    test("throws default error when no detail provided", async () => {
      // Mock POST /api/admin/billing/create-checkout-session (error, no detail)
      fetchSpy.mockResolvedValueOnce({
        ok: false,
        json: async () => ({}),
      } as Response);

      await expect(createCheckoutSession()).rejects.toThrow(
        "Billing request failed"
      );
    });
  });

  describe("createCustomerPortalSession", () => {
    test("calls correct endpoint and returns portal URL", async () => {
      // Mock POST /api/admin/billing/create-customer-portal-session
      fetchSpy.mockResolvedValueOnce({
        ok: true,
        json: async () => ({ url: "https://billing.stripe.com/portal123" }),
      } as Response);

      const result = await createCustomerPortalSession({
        return_url: "https://example.com/billing",
      });

      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/admin/billing/create-customer-portal-session",
        expect.objectContaining({ method: "POST" })
      );

      expect(result).toEqual({ url: "https://billing.stripe.com/portal123" });
    });
  });

  describe("updateSeatCount", () => {
    test("calls correct endpoint with seat count", async () => {
      // Mock POST /api/admin/billing/seats/update
      fetchSpy.mockResolvedValueOnce({
        ok: true,
        json: async () => ({
          success: true,
          current_seats: 10,
          used_seats: 5,
          message: null,
        }),
      } as Response);

      const result = await updateSeatCount({ new_seat_count: 10 });

      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/admin/billing/seats/update",
        expect.objectContaining({ method: "POST" })
      );

      const callArgs = fetchSpy.mock.calls[0];
      const requestBody = JSON.parse(callArgs[1].body);
      expect(requestBody).toEqual({ new_seat_count: 10 });

      expect(result.current_seats).toBe(10);
    });
  });

  describe("refreshLicenseCache (self-hosted only)", () => {
    test("calls license refresh endpoint", async () => {
      // Mock POST /api/license/refresh
      fetchSpy.mockResolvedValueOnce({
        ok: true,
        json: async () => ({ success: true, message: "Cache refreshed" }),
      } as Response);

      const result = await refreshLicenseCache();

      expect(fetchSpy).toHaveBeenCalledWith("/api/license/refresh", {
        method: "POST",
      });

      expect(result).toEqual({ success: true, message: "Cache refreshed" });
    });
  });

  describe("uploadLicense (self-hosted only)", () => {
    test("calls license upload endpoint with FormData", async () => {
      // Mock POST /api/license/upload
      fetchSpy.mockResolvedValueOnce({
        ok: true,
        json: async () => ({
          success: true,
          message:
            "License uploaded successfully. 10 seats, expires 2025-12-31",
        }),
      } as Response);

      const licenseKey = "test-license-key-12345";
      const result = await uploadLicense(licenseKey);

      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/license/upload",
        expect.objectContaining({
          method: "POST",
        })
      );

      // Verify FormData was used
      const callArgs = fetchSpy.mock.calls[0];
      expect(callArgs[1].body).toBeInstanceOf(FormData);

      expect(result).toEqual({
        success: true,
        message: "License uploaded successfully. 10 seats, expires 2025-12-31",
      });
    });

    test("throws error on failed upload", async () => {
      // Mock POST /api/license/upload (error)
      fetchSpy.mockResolvedValueOnce({
        ok: false,
        json: async () => ({ detail: "Invalid license signature" }),
      } as Response);

      await expect(uploadLicense("invalid-key")).rejects.toThrow(
        "Invalid license signature"
      );
    });
  });
});

describe("billing actions (cloud mode)", () => {
  let fetchSpy: jest.SpyInstance;

  beforeEach(() => {
    fetchSpy = jest.spyOn(global, "fetch");
    // Override to cloud mode
    jest.resetModules();
    jest.doMock("@/lib/constants", () => ({
      NEXT_PUBLIC_CLOUD_ENABLED: true,
    }));
  });

  afterEach(() => {
    fetchSpy.mockRestore();
    jest.resetModules();
  });

  test("uses cloud endpoint for checkout session", async () => {
    // Re-import with cloud mode
    const { createCheckoutSession: cloudCheckout } = await import("./svc");

    // Mock POST /api/tenants/create-checkout-session
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({ url: "https://checkout.stripe.com/cloud123" }),
    } as Response);

    await cloudCheckout();

    expect(fetchSpy).toHaveBeenCalledWith(
      "/api/tenants/create-checkout-session",
      expect.any(Object)
    );
  });

  test("uploadLicense throws error in cloud mode", async () => {
    // Re-import with cloud mode
    const { uploadLicense: cloudUploadLicense } = await import("./svc");

    await expect(cloudUploadLicense("test-key")).rejects.toThrow(
      "only available for self-hosted"
    );
  });
});


================================================
FILE: web/src/lib/billing/svc.ts
================================================
/**
 * Billing action functions for mutations.
 *
 * These are async functions for one-off actions like creating
 * checkout sessions or portal sessions. They don't need SWR caching.
 *
 * Endpoints:
 * - Cloud: /api/tenants/* (legacy, will migrate to /api/admin/billing/*)
 * - Self-hosted: /api/admin/billing/* (unified billing API)
 *
 * License actions (self-hosted only):
 * - /api/license/fetch - Fetch license from control plane after checkout
 * - /api/license/refresh - Refresh cached license data
 * - /api/license/upload - Upload license key manually (air-gapped deployments)
 */

import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import {
  CreateCheckoutSessionRequest,
  CreateCheckoutSessionResponse,
  CreateCustomerPortalSessionRequest,
  CreateCustomerPortalSessionResponse,
  SeatUpdateRequest,
  SeatUpdateResponse,
} from "@/lib/billing/interfaces";

function getBillingBaseUrl(): string {
  return NEXT_PUBLIC_CLOUD_ENABLED ? "/api/tenants" : "/api/admin/billing";
}

async function billingPost<T>(endpoint: string, body?: unknown): Promise<T> {
  const response = await fetch(`${getBillingBaseUrl()}${endpoint}`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(body ?? {}),
  });

  if (!response.ok) {
    const error = await response.json().catch(() => ({}));
    throw new Error(error.detail || "Billing request failed");
  }

  return response.json();
}

export const createCheckoutSession = (request?: CreateCheckoutSessionRequest) =>
  billingPost<CreateCheckoutSessionResponse>(
    "/create-checkout-session",
    request
  );

export const createCustomerPortalSession = (
  request?: CreateCustomerPortalSessionRequest
) =>
  billingPost<CreateCustomerPortalSessionResponse>(
    "/create-customer-portal-session",
    request
  );

export const updateSeatCount = (request: SeatUpdateRequest) =>
  billingPost<SeatUpdateResponse>("/seats/update", request);

/**
 * Reset the Stripe connection circuit breaker (self-hosted only).
 * Called when user clicks "Connect to Stripe" to retry after a previous failure.
 */
export const resetStripeConnection = () =>
  billingPost<{ success: boolean; message: string }>("/reset-connection");

// Self-hosted only actions
async function selfHostedPost<T>(endpoint: string): Promise<T> {
  if (NEXT_PUBLIC_CLOUD_ENABLED) {
    throw new Error(`${endpoint} is only available for self-hosted`);
  }

  const response = await fetch(`/api/license${endpoint}`, {
    method: "POST",
  });

  if (!response.ok) {
    const error = await response.json().catch(() => ({}));
    throw new Error(error.detail || "License request failed");
  }

  return response.json();
}

/**
 * Claim a license from the control plane (self-hosted only).
 *
 * Two modes:
 * - With sessionId: After Stripe checkout, exchange session_id for license
 * - Without sessionId: Re-claim using existing license for auth
 */
export const claimLicense = (sessionId?: string) =>
  selfHostedPost<{ success: boolean; license?: unknown }>(
    sessionId ? `/claim?session_id=${encodeURIComponent(sessionId)}` : "/claim"
  );

/**
 * Refresh the cached license data (self-hosted only).
 * Forces a re-read of the license and updates the cache.
 */
export const refreshLicenseCache = () =>
  selfHostedPost<{ success: boolean; message?: string }>("/refresh");

/**
 * Upload a license key string (self-hosted only).
 * Used for air-gapped deployments where users paste license keys manually.
 */
export async function uploadLicense(
  licenseKey: string
): Promise<{ success: boolean; message?: string }> {
  if (NEXT_PUBLIC_CLOUD_ENABLED) {
    throw new Error("License upload is only available for self-hosted");
  }

  // Create a file from the license key string
  const blob = new Blob([licenseKey], { type: "text/plain" });
  const formData = new FormData();
  formData.append("license_file", blob, "license.txt");

  const response = await fetch("/api/license/upload", {
    method: "POST",
    body: formData,
  });

  if (!response.ok) {
    const error = await response.json().catch(() => ({}));
    throw new Error(error.detail || "License upload failed");
  }

  return response.json();
}


================================================
FILE: web/src/lib/browserUtilities.tsx
================================================
"use client";

import { MacIcon, WindowsIcon } from "@/components/icons/icons";
import { useState, useEffect } from "react";

export enum OperatingSystem {
  Windows = "Windows",
  Mac = "Mac",
  Other = "Other",
}

export const useOperatingSystem = (): OperatingSystem => {
  const [os, setOS] = useState<OperatingSystem>(OperatingSystem.Other);

  useEffect(() => {
    const userAgent = window.navigator.userAgent.toLowerCase();
    if (userAgent.includes("win")) {
      setOS(OperatingSystem.Windows);
    } else if (userAgent.includes("mac")) {
      setOS(OperatingSystem.Mac);
    }
  }, []);

  return os;
};

// Use this to handle the sidebar shortcut for the chat page
// The shortcut is Ctrl+E on Windows/Linux and Cmd+E on Mac
// This hook handles the keyboard event and toggles the sidebar
export const useSidebarShortcut = (router: any, toggleSidebar: () => void) => {
  const os = useOperatingSystem();

  useEffect(() => {
    const handleKeyDown = (event: KeyboardEvent) => {
      const isMac = os === OperatingSystem.Mac;
      const modifierKey = isMac ? event.metaKey : event.ctrlKey;

      if (modifierKey && event.key.toLowerCase() === "e") {
        event.preventDefault();
        toggleSidebar();
      }
    };

    window.addEventListener("keydown", handleKeyDown);
    return () => {
      window.removeEventListener("keydown", handleKeyDown);
    };
  }, [router, toggleSidebar, os]);
};

const KeyboardSymbol = () => {
  const os = useOperatingSystem();

  if (os === OperatingSystem.Windows) {
    return <WindowsIcon size={12} />;
  } else {
    return <MacIcon size={12} />;
  }
};

export default KeyboardSymbol;


================================================
FILE: web/src/lib/build/client.ts
================================================
export interface CreateSessionRequest {
  task: string;
  available_sources?: string[];
}

export interface CreateSessionResponse {
  session_id: string;
}

export interface ArtifactInfo {
  artifact_type: "webapp" | "file" | "markdown" | "image";
  path: string;
  filename: string;
  mime_type?: string;
}

// =============================================================================
// ACP Event Types (from Agent Client Protocol)
// =============================================================================

/** Text or image content from the agent */
export interface AgentMessageChunkEvent {
  sessionUpdate: "agent_message_chunk";
  content: Array<{
    type: "text" | "image";
    text?: string;
    image?: string;
    mimeType?: string;
  }>;
}

/** Agent's internal reasoning/thinking */
export interface AgentThoughtChunkEvent {
  sessionUpdate: "agent_thought_chunk";
  thought: string;
}

/** Tool invocation started */
export interface ToolCallStartEvent {
  sessionUpdate: "tool_call";
  toolCallId: string;
  toolName: string;
  toolInput?: Record<string, unknown>;
}

/** Tool execution progress/result */
export interface ToolCallProgressEvent {
  sessionUpdate: "tool_call_update";
  toolCallId: string;
  content?: Array<{
    type: "text" | "image";
    text?: string;
    image?: string;
    mimeType?: string;
  }>;
  error?: string;
  isComplete?: boolean;
}

/** Agent's execution plan */
export interface AgentPlanUpdateEvent {
  sessionUpdate: "plan";
  plan: Array<{
    id: string;
    description: string;
    status: "pending" | "in_progress" | "completed" | "failed";
  }>;
}

/** Agent mode change */
export interface CurrentModeUpdateEvent {
  sessionUpdate: "current_mode_update";
  mode: string;
}

/** Agent finished processing prompt */
export interface PromptResponseEvent {
  stopReason?: string;
  usage?: {
    inputTokens?: number;
    outputTokens?: number;
  };
}

/** ACP error event */
export interface ACPErrorEvent {
  code: number;
  message: string;
}

/** File write event (custom Onyx extension) */
export interface FileWriteEvent {
  path: string;
  size_bytes?: number;
  operation?: "create" | "update" | "delete";
}

// =============================================================================
// Legacy Event Types (kept for backwards compatibility)
// =============================================================================

export interface OutputEvent {
  stream: "stdout" | "stderr";
  data: string;
}

export interface StatusEvent {
  status: "running" | "completed" | "failed";
  message?: string;
}

export interface ArtifactEvent {
  artifact_type: string;
  path: string;
  filename: string;
}

export interface ErrorEvent {
  message: string;
}

export interface FileSystemEntry {
  name: string;
  path: string;
  is_directory: boolean;
  size: number | null;
  mime_type: string | null;
}

export interface DirectoryListing {
  path: string;
  entries: FileSystemEntry[];
}

// =============================================================================
// Union Types
// =============================================================================

/** All possible ACP events from the agent */
export type ACPEvent =
  | { type: "agent_message_chunk"; data: AgentMessageChunkEvent }
  | { type: "agent_thought_chunk"; data: AgentThoughtChunkEvent }
  | { type: "tool_call"; data: ToolCallStartEvent }
  | { type: "tool_call_update"; data: ToolCallProgressEvent }
  | { type: "plan"; data: AgentPlanUpdateEvent }
  | { type: "current_mode_update"; data: CurrentModeUpdateEvent }
  | { type: "prompt_response"; data: PromptResponseEvent }
  | { type: "error"; data: ACPErrorEvent }
  | { type: "status"; data: StatusEvent }
  | { type: "artifact"; data: ArtifactEvent }
  | { type: "file_write"; data: FileWriteEvent };

/** Legacy BuildEvent type - alias for ACPEvent */
export type BuildEvent = ACPEvent;

export async function createSession(
  request: CreateSessionRequest
): Promise<CreateSessionResponse> {
  const response = await fetch("/api/build/sessions", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(request),
  });
  if (!response.ok) {
    throw new Error(`Failed to create session: ${response.statusText}`);
  }
  return response.json();
}

export async function deleteSession(sessionId: string): Promise<void> {
  const response = await fetch(`/api/build/sessions/${sessionId}`, {
    method: "DELETE",
  });
  if (!response.ok) {
    throw new Error(`Failed to delete session: ${response.statusText}`);
  }
}

export async function executeTask(
  sessionId: string,
  task: string,
  context: string | undefined,
  onEvent: (event: BuildEvent) => void,
  onError: (error: Error) => void,
  onComplete: () => void
): Promise<void> {
  try {
    const response = await fetch(`/api/build/sessions/${sessionId}/execute`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Accept: "text/event-stream",
      },
      body: JSON.stringify({ task, context }),
    });

    if (!response.ok) {
      throw new Error(`HTTP ${response.status}: ${response.statusText}`);
    }

    const reader = response.body?.getReader();
    if (!reader) {
      throw new Error("No response body");
    }

    const decoder = new TextDecoder();
    let buffer = "";

    while (true) {
      const { done, value } = await reader.read();
      if (done) break;

      buffer += decoder.decode(value, { stream: true });
      const lines = buffer.split("\n");
      buffer = lines.pop() || "";

      let currentEventType = "output";
      for (const line of lines) {
        if (line.startsWith("event:")) {
          currentEventType = line.slice(6).trim();
        } else if (line.startsWith("data:")) {
          const data = line.slice(5).trim();
          if (data) {
            try {
              const parsed = JSON.parse(data);
              onEvent({ type: currentEventType, data: parsed } as BuildEvent);
            } catch {
              // Skip malformed JSON
            }
          }
        }
      }
    }

    onComplete();
  } catch (error) {
    onError(error instanceof Error ? error : new Error(String(error)));
  }
}

/**
 * Send a message to the build session using the new messages API endpoint.
 * This endpoint streams SSE events with message-prefixed packet types.
 */
export async function sendMessage(
  sessionId: string,
  message: string,
  onEvent: (event: BuildEvent) => void,
  onError: (error: Error) => void,
  onComplete: () => void
): Promise<void> {
  try {
    const response = await fetch(`/api/build/sessions/${sessionId}/messages`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Accept: "text/event-stream",
      },
      body: JSON.stringify({ content: message }),
    });

    if (!response.ok) {
      const errorText = await response.text();
      throw new Error(
        `HTTP ${response.status}: ${errorText || response.statusText}`
      );
    }

    const reader = response.body?.getReader();
    if (!reader) {
      throw new Error("No response body");
    }

    const decoder = new TextDecoder();
    let buffer = "";

    while (true) {
      const { done, value } = await reader.read();
      if (done) break;

      buffer += decoder.decode(value, { stream: true });
      const lines = buffer.split("\n");
      buffer = lines.pop() || "";

      for (const line of lines) {
        if (line.startsWith("event:")) {
          // Skip event type line (all events are "message")
          continue;
        } else if (line.startsWith("data:")) {
          const data = line.slice(5).trim();
          if (data) {
            try {
              const parsed = JSON.parse(data);
              // Map frontend packet types to BuildEvent types
              const eventType = mapMessagePacketToEventType(parsed.type);
              if (eventType) {
                onEvent({ type: eventType, data: parsed } as BuildEvent);
              }
            } catch (err) {
              console.error("Failed to parse SSE data:", err);
            }
          }
        }
      }
    }

    onComplete();
  } catch (error) {
    onError(error instanceof Error ? error : new Error(String(error)));
  }
}

/**
 * Map message API packet types to BuildEvent types.
 * Uses direct ACP event names from the backend, plus custom Onyx packet types.
 */
function mapMessagePacketToEventType(packetType: string): string | null {
  const mapping: Record<string, string> = {
    // Direct ACP event types
    agent_message_chunk: "agent_message_chunk",
    agent_thought_chunk: "agent_thought_chunk",
    tool_call_start: "tool_call",
    tool_call_progress: "tool_call_update",
    agent_plan_update: "plan",
    current_mode_update: "current_mode_update",
    prompt_response: "prompt_response",
    error: "error",
    // Custom Onyx packet types (extensions to ACP)
    artifact_created: "artifact",
    file_write: "file_write",
  };
  return mapping[packetType] || null;
}

export async function listArtifacts(
  sessionId: string
): Promise<ArtifactInfo[]> {
  const response = await fetch(`/api/build/sessions/${sessionId}/artifacts`);
  if (!response.ok) {
    throw new Error(`Failed to list artifacts: ${response.statusText}`);
  }
  return response.json();
}

export function getArtifactUrl(sessionId: string, path: string): string {
  return `/api/build/sessions/${sessionId}/artifacts/${path}`;
}

export async function listDirectory(
  sessionId: string,
  path: string = ""
): Promise<DirectoryListing> {
  const url = path
    ? `/api/build/sessions/${sessionId}/files?path=${encodeURIComponent(path)}`
    : `/api/build/sessions/${sessionId}/files`;
  const response = await fetch(url);
  if (!response.ok) {
    throw new Error(`Failed to list directory: ${response.statusText}`);
  }
  return response.json();
}

export function getWebappUrl(sessionId: string, path: string = ""): string {
  return `/api/build/sessions/${sessionId}/webapp${path ? `/${path}` : ""}`;
}


================================================
FILE: web/src/lib/ccPair.ts
================================================
import { ConnectorCredentialPairStatus } from "@/app/admin/connector/[ccPairId]/types";
import { toast } from "@/hooks/useToast";

export async function setCCPairStatus(
  ccPairId: number,
  ccPairStatus: ConnectorCredentialPairStatus,
  onUpdate?: () => void
) {
  try {
    const response = await fetch(
      `/api/manage/admin/cc-pair/${ccPairId}/status`,
      {
        method: "PUT",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ status: ccPairStatus }),
      }
    );

    if (!response.ok) {
      const { detail } = await response.json();
      toast.error(`Failed to update connector status - ${detail}`);
      return;
    }

    toast.success(
      ccPairStatus === ConnectorCredentialPairStatus.ACTIVE
        ? "Enabled connector!"
        : "Paused connector!"
    );

    onUpdate && onUpdate();
  } catch (error) {
    console.error("Error updating CC pair status:", error);
    toast.error("Failed to update connector status");
  }
}

export const getCCPairStatusMessage = (
  isDisabled: boolean,
  isIndexing: boolean,
  ccPairStatus: ConnectorCredentialPairStatus
) => {
  if (ccPairStatus === ConnectorCredentialPairStatus.INVALID) {
    return "Connector is in an invalid state. Please update the credentials or configuration before re-indexing.";
  }
  if (ccPairStatus === ConnectorCredentialPairStatus.DELETING) {
    return "Cannot index while connector is deleting";
  }
  if (isIndexing) {
    return "Indexing is already in progress";
  }
  if (isDisabled) {
    return "Connector must be re-enabled before indexing";
  }
  return undefined;
};


================================================
FILE: web/src/lib/chat/fetchAgentData.ts
================================================
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { filterAgents } from "@/lib/agents";
import { fetchAgentsSS } from "@/lib/agentsSS";

export async function fetchAgentData(): Promise<MinimalPersonaSnapshot[]> {
  try {
    // Fetch core assistants data
    const [assistants, agentsFetchError] = await fetchAgentsSS();
    if (agentsFetchError) {
      // This is not a critical error and occurs when the user is not logged in
      console.warn(`Failed to fetch agents - ${agentsFetchError}`);
      return [];
    }

    return filterAgents(assistants);
  } catch (error) {
    console.error("Unexpected error in fetchAgentData:", error);
    return [];
  }
}


================================================
FILE: web/src/lib/chat/fetchBackendChatSessionSS.ts
================================================
import { BackendChatSession } from "@/app/app/interfaces";
import { fetchSS } from "@/lib/utilsSS";

export async function fetchBackendChatSessionSS(
  chatId: string
): Promise<BackendChatSession | null> {
  const response = await fetchSS(`/chat/get-chat-session/${chatId}`);
  if (!response.ok) return null;
  return (await response.json()) as BackendChatSession;
}


================================================
FILE: web/src/lib/chat/greetingMessages.ts
================================================
export const GREETING_MESSAGES = ["How can I help?", "Let's get started."];

export function getRandomGreeting(): string {
  return GREETING_MESSAGES[
    Math.floor(Math.random() * GREETING_MESSAGES.length)
  ] as string;
}


================================================
FILE: web/src/lib/chat/svc.ts
================================================
const CHAT_FILE_PREFIX = "/api/chat/file";

/**
 * Fetch a chat file by its ID, returning the raw Response.
 *
 * The caller is responsible for consuming the body (e.g. `.blob()`,
 * `.text()`) since different consumers need different formats.
 */
export async function fetchChatFile(fileId: string): Promise<Response> {
  const response = await fetch(
    `${CHAT_FILE_PREFIX}/${encodeURIComponent(fileId)}`,
    {
      method: "GET",
      cache: "force-cache",
    }
  );

  if (!response.ok) {
    throw new Error("Failed to load document.");
  }

  return response;
}


================================================
FILE: web/src/lib/clipboard.test.ts
================================================
import { getPastedFilesIfNoText } from "./clipboard";

type MockClipboardData = Parameters<typeof getPastedFilesIfNoText>[0];

function makeClipboardData({
  textPlain = "",
  text = "",
  files = [],
}: {
  textPlain?: string;
  text?: string;
  files?: File[];
}): MockClipboardData {
  return {
    items: files.map((file) => ({
      kind: "file",
      getAsFile: () => file,
    })),
    getData: (format: string) => {
      if (format === "text/plain") {
        return textPlain;
      }

      if (format === "text") {
        return text;
      }

      return "";
    },
  };
}

describe("getPastedFilesIfNoText", () => {
  it("prefers plain text over pasted files when both are present", () => {
    const imageFile = new File(["slide preview"], "slide.png", {
      type: "image/png",
    });

    expect(
      getPastedFilesIfNoText(
        makeClipboardData({
          textPlain: "Welcome to PowerPoint for Mac",
          files: [imageFile],
        })
      )
    ).toEqual([]);
  });

  it("falls back to text data when text/plain is empty", () => {
    const imageFile = new File(["slide preview"], "slide.png", {
      type: "image/png",
    });

    expect(
      getPastedFilesIfNoText(
        makeClipboardData({
          text: "Welcome to PowerPoint for Mac",
          files: [imageFile],
        })
      )
    ).toEqual([]);
  });

  it("still returns files for image-only pastes", () => {
    const imageFile = new File(["slide preview"], "slide.png", {
      type: "image/png",
    });

    expect(
      getPastedFilesIfNoText(makeClipboardData({ files: [imageFile] }))
    ).toEqual([imageFile]);
  });

  it("ignores whitespace-only text and keeps file pastes working", () => {
    const imageFile = new File(["slide preview"], "slide.png", {
      type: "image/png",
    });

    expect(
      getPastedFilesIfNoText(
        makeClipboardData({
          textPlain: "   ",
          text: "\n",
          files: [imageFile],
        })
      )
    ).toEqual([imageFile]);
  });
});


================================================
FILE: web/src/lib/clipboard.ts
================================================
type ClipboardFileItem = {
  kind: string;
  getAsFile: () => File | null;
};

type ClipboardDataLike = {
  items?: ArrayLike<ClipboardFileItem> | null;
  getData: (format: string) => string;
};

function getClipboardText(
  clipboardData: ClipboardDataLike,
  format: "text/plain" | "text"
): string {
  try {
    return clipboardData.getData(format);
  } catch {
    return "";
  }
}

export function getPastedFilesIfNoText(
  clipboardData?: ClipboardDataLike | null
): File[] {
  if (!clipboardData) {
    return [];
  }

  const plainText = getClipboardText(clipboardData, "text/plain").trim();
  const fallbackText = getClipboardText(clipboardData, "text").trim();

  // Apps like PowerPoint on macOS can place both rendered image data and the
  // original text on the clipboard. Prefer letting the textarea consume text.
  if (plainText || fallbackText || !clipboardData.items) {
    return [];
  }

  const pastedFiles: File[] = [];
  for (let i = 0; i < clipboardData.items.length; i++) {
    const item = clipboardData.items[i];
    if (item?.kind !== "file") {
      continue;
    }

    const file = item.getAsFile();
    if (file) {
      pastedFiles.push(file);
    }
  }

  return pastedFiles;
}


================================================
FILE: web/src/lib/connector.ts
================================================
import { ValidSources } from "./types";
import {
  Connector,
  ConnectorBase,
  ConnectorSnapshot,
} from "./connectors/connectors";
async function handleResponse(
  response: Response
): Promise<[string | null, any]> {
  const responseJson = await response.json();
  if (response.ok) {
    return [null, responseJson];
  }
  return [responseJson.detail, null];
}

export async function fetchConnectors(
  credential_id: number
): Promise<ConnectorSnapshot[]> {
  const url = `/api/manage/admin/connector?credential=${credential_id}`;
  const response = await fetch(url);
  if (!response.ok) {
    throw new Error(`Failed to fetch connectors: ${await response.text()}`);
  }
  const connectors: ConnectorSnapshot[] = await response.json();
  return connectors;
}

export async function createConnector<T>(
  connector: ConnectorBase<T>
): Promise<[string | null, Connector<T> | null]> {
  const response = await fetch(`/api/manage/admin/connector`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(connector),
  });
  return handleResponse(response);
}

export async function updateConnectorCredentialPairName(
  ccPairId: number,
  newName: string
): Promise<Response> {
  return fetch(
    `/api/manage/admin/cc-pair/${ccPairId}/name?new_name=${encodeURIComponent(
      newName
    )}`,
    {
      method: "PUT",
      headers: {
        "Content-Type": "application/json",
      },
    }
  );
}

export async function updateConnectorCredentialPairProperty(
  ccPairId: number,
  name: string,
  value: string
): Promise<Response> {
  return fetch(`/api/manage/admin/cc-pair/${ccPairId}/property`, {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      name: name,
      value: value,
    }),
  });
}

export async function updateConnector<T>(
  connector: Connector<T>
): Promise<Connector<T>> {
  const response = await fetch(`/api/manage/admin/connector/${connector.id}`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(connector),
  });
  return await response.json();
}

export async function deleteConnector(
  connectorId: number
): Promise<string | null> {
  const response = await fetch(`/api/manage/admin/connector/${connectorId}`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
  });
  if (response.ok) {
    return null;
  }
  return (await response.json()).detail;
}

export async function runConnector(
  connectorId: number,
  credentialIds: number[],
  fromBeginning: boolean = false
): Promise<string | null> {
  const response = await fetch("/api/manage/admin/connector/run-once", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      connector_id: connectorId,
      credentialIds,
      from_beginning: fromBeginning,
    }),
  });
  if (!response.ok) {
    return (await response.json()).detail;
  }
  return null;
}

export async function deleteConnectorIfExistsAndIsUnlinked({
  source,
  name,
}: {
  source: ValidSources;
  name?: string;
}): Promise<string | null> {
  const connectorsResponse = await fetch("/api/manage/connector");
  if (connectorsResponse.ok) {
    const connectors = (await connectorsResponse.json()) as Connector<any>[];
    const matchingConnectors = connectors.filter(
      (connector) =>
        connector.source === source && (!name || connector.name === name)
    );
    if (
      matchingConnectors.length > 0 &&
      matchingConnectors[0] &&
      matchingConnectors[0].credential_ids.length === 0
    ) {
      const errorMsg = await deleteConnector(matchingConnectors[0].id);
      if (errorMsg) {
        return errorMsg;
      }
    }
  }
  return null;
}


================================================
FILE: web/src/lib/connectors/AutoSyncOptionFields.tsx
================================================
import { JSX } from "react";
import { ValidAutoSyncSource } from "@/lib/types";

// The first key is the connector type, and the second key is the field name
export const autoSyncConfigBySource: Record<
  ValidAutoSyncSource,
  Record<
    string,
    {
      label: string;
      subtext: JSX.Element;
    }
  >
> = {
  confluence: {},
  jira: {},
  google_drive: {},
  gmail: {},
  github: {},
  slack: {},
  salesforce: {},
  sharepoint: {},
  teams: {},
};


================================================
FILE: web/src/lib/connectors/connectors.tsx
================================================
import * as Yup from "yup";
import { ConfigurableSources, ValidInputTypes, ValidSources } from "../types";
import { AccessTypeGroupSelectorFormType } from "@/components/admin/connectors/AccessTypeGroupSelector";
import { Credential } from "@/lib/connectors/credentials"; // Import Credential type
import { DOCS_ADMINS_PATH } from "@/lib/constants";

export function isLoadState(connector_name: string): boolean {
  // TODO: centralize connector metadata like this somewhere instead of hardcoding it here
  const loadStateConnectors = ["web", "xenforo", "file", "airtable"];
  if (loadStateConnectors.includes(connector_name)) {
    return true;
  }

  return false;
}

export type InputType =
  | "list"
  | "text"
  | "select"
  | "multiselect"
  | "boolean"
  | "number"
  | "file";

export type StringWithDescription = {
  value: string;
  name: string;
  description?: string;
};

export interface Option {
  label: string | ((currentCredential: Credential<any> | null) => string);
  name: string;
  description?:
    | string
    | ((currentCredential: Credential<any> | null) => string);
  query?: string;
  optional?: boolean;
  hidden?: boolean;
  visibleCondition?: (
    values: any,
    currentCredential: Credential<any> | null
  ) => boolean;
  wrapInCollapsible?: boolean;
  disabled?: boolean | ((currentCredential: Credential<any> | null) => boolean);
}

export interface SelectOption extends Option {
  type: "select";
  options?: StringWithDescription[];
  default?: string;
}

export interface MultiSelectOption extends Option {
  type: "multiselect";
  options?: StringWithDescription[];
  default?: string[];
}

export interface ListOption extends Option {
  type: "list";
  default?: string[];
  transform?: (values: string[]) => string[];
}

export interface TextOption extends Option {
  type: "text";
  default?: string;
  initial?: string | ((currentCredential: Credential<any> | null) => string);
  isTextArea?: boolean;
}

export interface NumberOption extends Option {
  type: "number";
  default?: number;
}

export interface BooleanOption extends Option {
  type: "checkbox";
  default?: boolean;
}

export interface FileOption extends Option {
  type: "file";
  default?: string;
}

export interface StringTabOption extends Option {
  type: "string_tab";
  default?: string;
}

export interface TabOption extends Option {
  type: "tab";
  defaultTab?: string;
  tabs: {
    label: string;
    value: string;
    fields: (
      | BooleanOption
      | ListOption
      | TextOption
      | NumberOption
      | SelectOption
      | MultiSelectOption
      | FileOption
      | StringTabOption
    )[];
  }[];
  default?: [];
}

export interface ConnectionConfiguration {
  description: string;
  subtext?: string;
  initialConnectorName?: string; // a key in the credential to prepopulate the connector name field
  values: (
    | BooleanOption
    | ListOption
    | TextOption
    | NumberOption
    | SelectOption
    | MultiSelectOption
    | FileOption
    | TabOption
  )[];
  advanced_values: (
    | BooleanOption
    | ListOption
    | TextOption
    | NumberOption
    | SelectOption
    | MultiSelectOption
    | FileOption
    | TabOption
  )[];
  overrideDefaultFreq?: number;
  advancedValuesVisibleCondition?: (
    values: any,
    currentCredential: Credential<any> | null
  ) => boolean;
}

export const connectorConfigs: Record<
  ConfigurableSources,
  ConnectionConfiguration
> = {
  web: {
    description: "Configure Web connector",
    values: [
      {
        type: "text",
        query: "Enter the website URL to scrape e.g. https://docs.onyx.app/:",
        label: "Base URL",
        name: "base_url",
        optional: false,
      },
      {
        type: "select",
        query: "Select the web connector type:",
        label: "Scrape Method",
        name: "web_connector_type",
        options: [
          { name: "recursive", value: "recursive" },
          { name: "single", value: "single" },
          { name: "sitemap", value: "sitemap" },
        ],
      },
    ],
    advanced_values: [
      {
        type: "checkbox",
        query: "Scroll before scraping:",
        label: "Scroll before scraping",
        description:
          "Enable if the website requires scrolling for the desired content to load",
        name: "scroll_before_scraping",
        optional: true,
      },
    ],
    overrideDefaultFreq: 60 * 60 * 24,
  },
  github: {
    description: "Configure GitHub connector",
    values: [
      {
        type: "text",
        query: "Enter the GitHub username or organization:",
        label: "Repository Owner",
        name: "repo_owner",
        optional: false,
      },
      {
        type: "tab",
        name: "github_mode",
        label: "What should we index from GitHub?",
        optional: true,
        tabs: [
          {
            value: "repo",
            label: "Specific Repository",
            fields: [
              {
                type: "text",
                query: "Enter the repository name(s):",
                label: "Repository Name(s)",
                name: "repositories",
                optional: false,
                description:
                  "For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)",
              },
            ],
          },
          {
            value: "everything",
            label: "Everything",
            fields: [
              {
                type: "string_tab",
                label: "Everything",
                name: "everything",
                description:
                  "This connector will index all repositories the provided credentials have access to!",
              },
            ],
          },
        ],
      },
      {
        type: "checkbox",
        query: "Include pull requests?",
        label: "Include pull requests?",
        description: "Index pull requests from repositories",
        name: "include_prs",
        optional: true,
      },
      {
        type: "checkbox",
        query: "Include issues?",
        label: "Include Issues?",
        name: "include_issues",
        description: "Index issues from repositories",
        optional: true,
      },
    ],
    advanced_values: [],
  },
  testrail: {
    description: "Configure TestRail connector",
    values: [
      {
        type: "text",
        label: "Project IDs",
        name: "project_ids",
        optional: true,
        description:
          "Comma-separated list of TestRail project IDs to index (e.g., 1 or 1,2,3). Leave empty to index all projects.",
      },
    ],
    advanced_values: [
      {
        type: "number",
        label: "Cases Page Size",
        name: "cases_page_size",
        optional: true,
        description:
          "Number of test cases to fetch per page from the TestRail API (default: 250)",
      },
      {
        type: "number",
        label: "Max Pages",
        name: "max_pages",
        optional: true,
        description:
          "Maximum number of pages to fetch to prevent infinite loops (default: 10000)",
      },
      {
        type: "number",
        label: "Skip Document Character Limit",
        name: "skip_doc_absolute_chars",
        optional: true,
        description:
          "Skip indexing test cases that exceed this character limit (default: 200000)",
      },
    ],
  },
  gitlab: {
    description: "Configure GitLab connector",
    values: [
      {
        type: "text",
        query: "Enter the project owner:",
        label: "Project Owner",
        name: "project_owner",
        optional: false,
      },
      {
        type: "text",
        query: "Enter the project name:",
        label: "Project Name",
        name: "project_name",
        optional: false,
      },
    ],
    advanced_values: [
      {
        type: "checkbox",
        query: "Include merge requests?",
        label: "Include MRs",
        name: "include_mrs",
        description: "Index merge requests from repositories",
        default: true,
      },
      {
        type: "checkbox",
        query: "Include issues?",
        label: "Include Issues",
        name: "include_issues",
        description: "Index issues from repositories",
        default: true,
      },
    ],
  },
  bitbucket: {
    description: "Configure Bitbucket connector",
    subtext:
      "Configure Bitbucket connector (Cloud only). You can index a workspace, specific projects or repositories.",
    values: [
      {
        type: "text",
        label: "Workspace",
        name: "workspace",
        optional: false,
        description: `The Bitbucket workspace to index (e.g., "atlassian" from https://bitbucket.org/atlassian/workspace ).`,
      },
      {
        type: "tab",
        name: "bitbucket_mode",
        label: "What should be indexed from Bitbucket?",
        optional: true,
        tabs: [
          {
            value: "repo",
            label: "Specific Repositories",
            fields: [
              {
                type: "text",
                label: "Repository Slugs",
                name: "repositories",
                optional: false,
                description:
                  "For multiple repositories, enter comma-separated slugs (e.g., repo1,repo2,repo3)",
              },
            ],
          },
          {
            value: "project",
            label: "Project(s)",
            fields: [
              {
                type: "text",
                label: "Project Key(s)",
                name: "projects",
                optional: false,
                description:
                  "One or more Bitbucket Project Keys (comma-separated) to index all repositories in those projects (e.g., PROJ1,PROJ2)",
              },
            ],
          },
          {
            value: "workspace",
            label: "Workspace",
            fields: [
              {
                type: "string_tab",
                label: "Workspace",
                name: "workspace_tab",
                description:
                  "This connector will index all repositories in the workspace.",
              },
            ],
          },
        ],
      },
    ],
    advanced_values: [],
  },
  gitbook: {
    description: "Configure GitBook connector",
    values: [
      {
        type: "text",
        query: "Enter the space ID:",
        label: "Space ID",
        name: "space_id",
        optional: false,
        description:
          "The ID of the GitBook space to index. This can be found in the URL " +
          "of a page in the space. For example, if your URL looks like " +
          "`https://app.gitbook.com/o/ccLx08XZ5wZ54LwdP9QU/s/8JkzVx8QCIGRrmxhGHU8/`, " +
          "then your space ID is `8JkzVx8QCIGRrmxhGHU8`.",
      },
    ],
    advanced_values: [],
  },
  google_drive: {
    description: "Configure Google Drive connector",
    values: [
      {
        type: "tab",
        name: "indexing_scope",
        label: "How should we index your Google Drive?",
        optional: true,
        tabs: [
          {
            value: "general",
            label: "General",
            fields: [
              {
                type: "checkbox",
                label: "Include shared drives?",
                description: (currentCredential) => {
                  return currentCredential?.credential_json?.google_tokens
                    ? "This will allow Onyx to index everything in the shared drives you have access to."
                    : "This will allow Onyx to index everything in your Organization's shared drives.";
                },
                name: "include_shared_drives",
                default: false,
              },
              {
                type: "checkbox",
                label: (currentCredential) => {
                  return currentCredential?.credential_json?.google_tokens
                    ? "Include My Drive?"
                    : "Include Everyone's My Drive?";
                },
                description: (currentCredential) => {
                  return currentCredential?.credential_json?.google_tokens
                    ? "This will allow Onyx to index everything in your My Drive."
                    : "This will allow Onyx to index everything in everyone's My Drives.";
                },
                name: "include_my_drives",
                default: false,
              },
              {
                type: "checkbox",
                description:
                  "This will allow Onyx to index all files shared with you.",
                label: "Include All Files Shared With You?",
                name: "include_files_shared_with_me",
                visibleCondition: (values, currentCredential) =>
                  currentCredential?.credential_json?.google_tokens,
                default: false,
              },
            ],
          },
          {
            value: "specific",
            label: "Specific",
            fields: [
              {
                type: "text",
                description: (currentCredential) => {
                  return currentCredential?.credential_json?.google_tokens
                    ? "Enter a comma separated list of the URLs for the shared drive you would like to index. You must have access to these shared drives."
                    : "Enter a comma separated list of the URLs for the shared drive you would like to index.";
                },
                label: "Shared Drive URLs",
                name: "shared_drive_urls",
                default: "",
                isTextArea: true,
              },
              {
                type: "text",
                description:
                  "Enter a comma separated list of the URLs of any folders you would like to index. The files located in these folders (and all subfolders) will be indexed.",
                label: "Folder URLs",
                name: "shared_folder_urls",
                default: "",
                isTextArea: true,
              },
              {
                type: "text",
                description:
                  "Enter a comma separated list of the emails of the users whose MyDrive you want to index.",
                label: "My Drive Emails",
                name: "my_drive_emails",
                visibleCondition: (values, currentCredential) =>
                  !currentCredential?.credential_json?.google_tokens,
                default: "",
                isTextArea: true,
              },
            ],
          },
        ],
        defaultTab: "general",
      },
    ],
    advanced_values: [
      {
        type: "text",
        description:
          "Enter a comma separated list of specific user emails to index. This will only index files accessible to these users.",
        label: "Specific User Emails",
        name: "specific_user_emails",
        optional: true,
        default: "",
        isTextArea: true,
        visibleCondition: (values, currentCredential) =>
          !currentCredential?.credential_json?.google_tokens,
      },
      {
        type: "checkbox",
        label: "Hide domain link-only files?",
        description:
          "When enabled, Onyx skips files that are shared broadly (domain or public) but require the link to access.",
        name: "exclude_domain_link_only",
        optional: true,
        default: false,
      },
    ],
  },
  gmail: {
    description: "Configure Gmail connector",
    values: [],
    advanced_values: [],
  },
  bookstack: {
    description: "Configure Bookstack connector",
    values: [],
    advanced_values: [],
  },
  outline: {
    description: "Configure Outline connector",
    values: [],
    advanced_values: [],
  },
  confluence: {
    description: "Configure Confluence connector",
    initialConnectorName: "cloud_name",
    values: [
      {
        type: "checkbox",
        query: "Is this a Confluence Cloud instance?",
        label: "Is Cloud",
        name: "is_cloud",
        optional: false,
        default: true,
        description:
          "Check if this is a Confluence Cloud instance, uncheck for Confluence Server/Data Center",
        disabled: (currentCredential) => {
          if (currentCredential?.credential_json?.confluence_refresh_token) {
            return true;
          }
          return false;
        },
      },
      {
        type: "text",
        query: "Enter the wiki base URL:",
        label: "Wiki Base URL",
        name: "wiki_base",
        optional: false,
        initial: (currentCredential) => {
          return currentCredential?.credential_json?.wiki_base ?? "";
        },
        disabled: (currentCredential) => {
          if (currentCredential?.credential_json?.confluence_refresh_token) {
            return true;
          }
          return false;
        },
        description:
          "The base URL of your Confluence instance (e.g., https://your-domain.atlassian.net/wiki)",
      },
      {
        type: "checkbox",
        query: "Using scoped token?",
        label: "Using scoped token",
        name: "scoped_token",
        optional: true,
        default: false,
      },
      {
        type: "tab",
        name: "indexing_scope",
        label: "How Should We Index Your Confluence?",
        optional: true,
        tabs: [
          {
            value: "everything",
            label: "Everything",
            fields: [
              {
                type: "string_tab",
                label: "Everything",
                name: "everything",
                description:
                  "This connector will index all pages the provided credentials have access to!",
              },
            ],
          },
          {
            value: "space",
            label: "Space",
            fields: [
              {
                type: "text",
                query: "Enter the space:",
                label: "Space Key",
                name: "space",
                default: "",
                description: "The Confluence space key to index (e.g. `KB`).",
              },
            ],
          },
          {
            value: "page",
            label: "Page",
            fields: [
              {
                type: "text",
                query: "Enter the page ID:",
                label: "Page ID",
                name: "page_id",
                default: "",
                description: "Specific page ID to index (e.g. `131368`)",
              },
              {
                type: "checkbox",
                query: "Should index pages recursively?",
                label: "Index Recursively",
                name: "index_recursively",
                description:
                  "If this is set, we will index the page indicated by the Page ID as well as all of its children.",
                optional: false,
                default: true,
              },
            ],
          },
          {
            value: "cql",
            label: "CQL Query",
            fields: [
              {
                type: "text",
                query: "Enter the CQL query (optional):",
                label: "CQL Query",
                name: "cql_query",
                default: "",
                description:
                  "IMPORTANT: We currently only support CQL queries that return objects of type 'page'. This means all CQL queries must contain 'type=page' as the only type filter. It is also important that no filters for 'lastModified' are used as it will cause issues with our connector polling logic. We will still get all attachments and comments for the pages returned by the CQL query. Any 'lastmodified' filters will be overwritten. See Atlassian's [CQL documentation](https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/) for more details.",
              },
            ],
          },
        ],
        defaultTab: "space",
      },
    ],
    advanced_values: [],
  },
  jira: {
    description: "Configure Jira connector",
    subtext: `Configure which Jira content to index. You can index everything or specify a particular project.`,
    values: [
      {
        type: "text",
        query: "Enter the Jira base URL:",
        label: "Jira Base URL",
        name: "jira_base_url",
        optional: false,
        description:
          "The base URL of your Jira instance (e.g., https://your-domain.atlassian.net)",
      },
      {
        type: "checkbox",
        query: "Using scoped token?",
        label: "Using scoped token",
        name: "scoped_token",
        optional: true,
        default: false,
      },
      {
        type: "tab",
        name: "indexing_scope",
        label: "How Should We Index Your Jira?",
        optional: true,
        tabs: [
          {
            value: "everything",
            label: "Everything",
            fields: [
              {
                type: "string_tab",
                label: "Everything",
                name: "everything",
                description:
                  "This connector will index all issues the provided credentials have access to!",
              },
            ],
          },
          {
            value: "project",
            label: "Project",
            fields: [
              {
                type: "text",
                query: "Enter the project key:",
                label: "Project Key",
                name: "project_key",
                description:
                  "The key of a specific project to index (e.g., 'PROJ').",
              },
            ],
          },
          {
            value: "jql",
            label: "JQL Query",
            fields: [
              {
                type: "text",
                query: "Enter the JQL query:",
                label: "JQL Query",
                name: "jql_query",
                description:
                  "A custom JQL query to filter Jira issues." +
                  "\n\nIMPORTANT: Do not include any time-based filters in the JQL query as that will conflict with the connector's logic. Additionally, do not include ORDER BY clauses." +
                  "\n\nSee Atlassian's [JQL documentation](https://support.atlassian.com/jira-software-cloud/docs/advanced-search-reference-jql-fields/) for more details on syntax.",
              },
            ],
          },
        ],
        defaultTab: "everything",
      },
      {
        type: "list",
        query: "Enter email addresses to blacklist from comments:",
        label: "Comment Email Blacklist",
        name: "comment_email_blacklist",
        description:
          "This is generally useful to ignore certain bots. Add user emails which comments should NOT be indexed.",
        optional: true,
      },
    ],
    advanced_values: [],
  },
  salesforce: {
    description: "Configure Salesforce connector",
    values: [
      {
        type: "tab",
        name: "salesforce_config_type",
        label: "Configuration Type",
        optional: true,
        tabs: [
          {
            value: "simple",
            label: "Simple",
            fields: [
              {
                type: "list",
                query: "Enter requested objects:",
                label: "Requested Objects",
                name: "requested_objects",
                optional: true,
                description:
                  "Specify the Salesforce object types you want us to index. If unsure, don't specify any objects and Onyx will default to indexing by 'Account'." +
                  "\n\nHint: Use the singular form of the object name (e.g., 'Opportunity' instead of 'Opportunities').",
              },
            ],
          },
          {
            value: "advanced",
            label: "Advanced",
            fields: [
              {
                type: "text",
                query: "Enter custom query config:",
                label: "Custom Query Config",
                name: "custom_query_config",
                optional: true,
                isTextArea: true,
                description:
                  "Enter a JSON configuration that precisely defines which fields and child objects to index. This gives you complete control over the data structure." +
                  "\n\nExample:" +
                  "\n{" +
                  '\n  "Account": {' +
                  '\n    "fields": ["Id", "Name", "Industry"],' +
                  '\n    "associations": {' +
                  '\n      "Contact": ["Id", "FirstName", "LastName", "Email"]' +
                  "\n    }" +
                  "\n  }" +
                  "\n}" +
                  `\n\n[See our docs](${DOCS_ADMINS_PATH}/connectors/official/salesforce) for more details.`,
              },
            ],
          },
        ],
        defaultTab: "simple",
      },
    ],
    advanced_values: [],
  },
  sharepoint: {
    description: "Configure SharePoint connector",
    values: [
      {
        type: "list",
        query: "Enter SharePoint sites:",
        label: "Sites",
        name: "sites",
        optional: true,
        description: `• If no sites are specified, all sites in your organization will be indexed (Sites.Read.All permission required).
• Specifying 'https://onyxai.sharepoint.com/sites/support' for example only indexes this site.
• Specifying 'https://onyxai.sharepoint.com/sites/support/subfolder' for example only indexes this folder.
• Specifying sites currently works for SharePoint instances using English, Spanish, or German. Contact the Onyx team if you need another language supported.
`,
      },
    ],
    advanced_values: [
      {
        type: "checkbox",
        query: "Index Documents:",
        label: "Index Documents",
        name: "include_site_documents",
        optional: true,
        default: true,
        description:
          "Index documents of all SharePoint libraries or folders defined above.",
      },
      {
        type: "checkbox",
        query: "Index ASPX Sites:",
        label: "Index ASPX Sites",
        name: "include_site_pages",
        optional: true,
        default: true,
        description:
          "Index aspx-pages of all SharePoint sites defined above, even if a library or folder is specified.",
      },
      {
        type: "checkbox",
        label: "Treat sharing links as public?",
        description:
          "When enabled, documents with a sharing link (anonymous or organization-wide) " +
          "are treated as public (visible to all Onyx users). " +
          "When disabled, only users and groups with explicit role assignments can see the document.",
        name: "treat_sharing_link_as_public",
        optional: true,
        default: false,
      },
      {
        type: "list",
        query: "Enter site URLs to exclude:",
        label: "Excluded Sites",
        name: "excluded_sites",
        optional: true,
        description:
          "Site URLs or glob patterns to exclude from indexing. " +
          "Matched sites will never be indexed, even if they appear in the sites list above. " +
          "Examples: 'https://contoso.sharepoint.com/sites/archive' (exact), " +
          "'*://*/sites/archive-*' (glob pattern).",
      },
      {
        type: "list",
        query: "Enter file path patterns to exclude:",
        label: "Excluded Paths",
        name: "excluded_paths",
        optional: true,
        description:
          "Glob patterns for file paths to exclude from indexing within document libraries. " +
          "Patterns are matched against both the full relative path and the filename. " +
          "Examples: '*.tmp' (temp files), '~$*' (Office lock files), 'Archive/*' (folder).",
      },
      {
        type: "text",
        query: "Microsoft Authority Host:",
        label: "Authority Host",
        name: "authority_host",
        optional: true,
        default: "https://login.microsoftonline.com",
        description:
          "The Microsoft identity authority host used for authentication. " +
          "For most deployments, leave as default. " +
          "For GCC High / DoD, use https://login.microsoftonline.us",
      },
      {
        type: "text",
        query: "Microsoft Graph API Host:",
        label: "Graph API Host",
        name: "graph_api_host",
        optional: true,
        default: "https://graph.microsoft.com",
        description:
          "The Microsoft Graph API host. " +
          "For most deployments, leave as default. " +
          "For GCC High / DoD, use https://graph.microsoft.us",
      },
      {
        type: "text",
        query: "SharePoint Domain Suffix:",
        label: "SharePoint Domain Suffix",
        name: "sharepoint_domain_suffix",
        optional: true,
        default: "sharepoint.com",
        description:
          "The domain suffix for SharePoint sites (e.g. sharepoint.com). " +
          "For most deployments, leave as default. " +
          "For GCC High, use sharepoint.us",
      },
    ],
  },
  teams: {
    description: "Configure Teams connector",
    values: [
      {
        type: "list",
        query: "Enter Teams to include:",
        label: "Teams",
        name: "teams",
        optional: true,
        description: `Specify 0 or more Teams to index. For example, specifying the Team 'Support' for the 'onyxai' Org will cause us to only index messages sent in channels belonging to the 'Support' Team. If no Teams are specified, all Teams in your organization will be indexed.`,
      },
    ],
    advanced_values: [
      {
        type: "text",
        query: "Microsoft Authority Host:",
        label: "Authority Host",
        name: "authority_host",
        optional: true,
        default: "https://login.microsoftonline.com",
        description:
          "The Microsoft identity authority host used for authentication. " +
          "For most deployments, leave as default. " +
          "For GCC High / DoD, use https://login.microsoftonline.us",
      },
      {
        type: "text",
        query: "Microsoft Graph API Host:",
        label: "Graph API Host",
        name: "graph_api_host",
        optional: true,
        default: "https://graph.microsoft.com",
        description:
          "The Microsoft Graph API host. " +
          "For most deployments, leave as default. " +
          "For GCC High / DoD, use https://graph.microsoft.us",
      },
    ],
  },
  discourse: {
    description: "Configure Discourse connector",
    values: [
      {
        type: "text",
        query: "Enter the base URL:",
        label: "Base URL",
        name: "base_url",
        optional: false,
      },
      {
        type: "list",
        query: "Enter categories to include:",
        label: "Categories",
        name: "categories",
        optional: true,
      },
    ],
    advanced_values: [],
  },
  drupal_wiki: {
    description: "Configure Drupal Wiki connector",
    values: [
      {
        type: "text",
        query: "Enter the base URL of the Drupal Wiki instance:",
        label: "Base URL",
        name: "base_url",
        optional: false,
        description:
          "The base URL of your Drupal Wiki instance (e.g., https://help.drupal-wiki.com )",
      },
      {
        type: "tab",
        name: "indexing_scope",
        label: "What should we index from Drupal Wiki?",
        optional: true,
        tabs: [
          {
            value: "everything",
            label: "Everything",
            fields: [
              {
                type: "string_tab",
                label: "Everything",
                name: "everything_description",
                description:
                  "This connector will index all spaces the provided credentials have access to!",
              },
            ],
          },
          {
            value: "specific",
            label: "Specific Spaces/Pages",
            fields: [
              {
                type: "list",
                query: "Enter space IDs to include:",
                label: "Space IDs",
                name: "spaces",
                description:
                  "Specify one or more space IDs to index. Only numeric values are allowed.",
                optional: true,
                transform: (values: string[]) =>
                  values.filter((value) => /^\d+$/.test(value.trim())),
              },
              {
                type: "list",
                query: "Enter page IDs to include:",
                label: "Page IDs",
                name: "pages",
                description:
                  "Specify one or more page IDs to index. Only numeric values are allowed.",
                optional: true,
                transform: (values: string[]) =>
                  values.filter((value) => /^\d+$/.test(value.trim())),
              },
            ],
          },
        ],
      },
      {
        type: "checkbox",
        query: "Include attachments?",
        label: "Include Attachments",
        name: "include_attachments",
        description:
          "Enable processing of page attachments including images and documents",
        default: false,
      },
    ],
    advanced_values: [],
  },
  axero: {
    description: "Configure Axero connector",
    values: [
      {
        type: "list",
        query: "Enter spaces to include:",
        label: "Spaces",
        name: "spaces",
        optional: true,
        description:
          "Specify zero or more Spaces to index (by the Space IDs). If no Space IDs are specified, all Spaces will be indexed.",
      },
    ],
    advanced_values: [],
    overrideDefaultFreq: 60 * 60 * 24,
  },
  productboard: {
    description: "Configure Productboard connector",
    values: [],
    advanced_values: [],
  },
  slack: {
    description: "Configure Slack connector",
    values: [],
    advanced_values: [
      {
        type: "list",
        query: "Enter channels to include:",
        label: "Channels",
        name: "channels",
        description: `Specify 0 or more channels to index. For example, specifying the channel "support" will cause us to only index all content within the "#support" channel. If no channels are specified, all channels in your workspace will be indexed.`,
        optional: true,
        // Slack Channels can only be lowercase
        transform: (values) => values.map((value) => value.toLowerCase()),
      },
      {
        type: "checkbox",
        query: "Enable channel regex?",
        label: "Enable Channel Regex",
        name: "channel_regex_enabled",
        description: `If enabled, we will treat the "channels" specified above as regular expressions. A channel's messages will be pulled in by the connector if the name of the channel fully matches any of the specified regular expressions.
For example, specifying .*-support.* as a "channel" will cause the connector to include any channels with "-support" in the name.`,
        optional: true,
      },
      {
        type: "checkbox",
        query: "Include bot messages?",
        label: "Include Bot Messages",
        name: "include_bot_messages",
        description:
          "If enabled, messages from bots and apps will be indexed. Useful for channels that are primarily bot-driven feeds (e.g. CRM updates, automated notes).",
        optional: true,
      },
    ],
  },
  slab: {
    description: "Configure Slab connector",
    values: [
      {
        type: "text",
        query: "Enter the base URL:",
        label: "Base URL",
        name: "base_url",
        optional: false,
        description: `Specify the base URL for your Slab team. This will look something like: https://onyx.slab.com/`,
      },
    ],
    advanced_values: [],
  },
  guru: {
    description: "Configure Guru connector",
    values: [],
    advanced_values: [],
  },
  gong: {
    description: "Configure Gong connector",
    values: [
      {
        type: "list",
        query: "Enter workspaces to include:",
        label: "Workspaces",
        name: "workspaces",
        optional: true,
        description:
          "Specify 0 or more workspaces to index. Provide the workspace ID or the EXACT workspace name from Gong. If no workspaces are specified, transcripts from all workspaces will be indexed.",
      },
    ],
    advanced_values: [],
  },
  loopio: {
    description: "Configure Loopio connector",
    values: [
      {
        type: "text",
        query: "Enter the Loopio stack name",
        label: "Loopio Stack Name",
        name: "loopio_stack_name",
        description:
          "Must be exact match to the name in Library Management, leave this blank if you want to index all Stacks",
        optional: true,
      },
    ],
    advanced_values: [],
    overrideDefaultFreq: 60 * 60 * 24,
  },
  file: {
    description: "Configure File connector",
    values: [
      {
        type: "file",
        query: "Enter file locations:",
        label: "Files",
        name: "file_locations",
        optional: false,
      },
    ],
    advanced_values: [],
  },
  zulip: {
    description: "Configure Zulip connector",
    values: [
      {
        type: "text",
        query: "Enter the realm name",
        label: "Realm Name",
        name: "realm_name",
        optional: false,
      },
      {
        type: "text",
        query: "Enter the realm URL",
        label: "Realm URL",
        name: "realm_url",
        optional: false,
      },
    ],
    advanced_values: [],
  },
  coda: {
    description: "Configure Coda connector",
    values: [],
    advanced_values: [],
  },
  notion: {
    description: "Configure Notion connector",
    values: [
      {
        type: "text",
        query: "Enter the root page ID",
        label: "Root Page ID",
        name: "root_page_id",
        optional: true,
        description:
          "If specified, will only index the specified page + all of its child pages. If left blank, will index all pages the integration has been given access to.",
      },
    ],
    advanced_values: [],
  },
  hubspot: {
    description: "Configure HubSpot connector",
    values: [
      {
        type: "multiselect",
        query: "Select which HubSpot objects to index:",
        label: "Object Types",
        name: "object_types",
        options: [
          { name: "Tickets", value: "tickets" },
          { name: "Companies", value: "companies" },
          { name: "Deals", value: "deals" },
          { name: "Contacts", value: "contacts" },
        ],
        default: ["tickets", "companies", "deals", "contacts"],
        description:
          "Choose which HubSpot object types to index. All types are selected by default.",
        optional: false,
      },
    ],
    advanced_values: [],
  },
  document360: {
    description: "Configure Document360 connector",
    values: [
      {
        type: "text",
        query: "Enter the workspace",
        label: "Workspace",
        name: "workspace",
        optional: false,
      },
      {
        type: "list",
        query: "Enter categories to include",
        label: "Categories",
        name: "categories",
        optional: true,
        description:
          "Specify 0 or more categories to index. For instance, specifying the category 'Help' will cause us to only index all content within the 'Help' category. If no categories are specified, all categories in your workspace will be indexed.",
      },
    ],
    advanced_values: [],
  },
  clickup: {
    description: "Configure ClickUp connector",
    values: [
      {
        type: "select",
        query: "Select the connector type:",
        label: "Connector Type",
        name: "connector_type",
        optional: false,
        options: [
          { name: "list", value: "list" },
          { name: "folder", value: "folder" },
          { name: "space", value: "space" },
          { name: "workspace", value: "workspace" },
        ],
      },
      {
        type: "list",
        query: "Enter connector IDs:",
        label: "Connector IDs",
        name: "connector_ids",
        description: "Specify 0 or more id(s) to index from.",
        optional: true,
      },
      {
        type: "checkbox",
        query: "Retrieve task comments?",
        label: "Retrieve Task Comments",
        name: "retrieve_task_comments",
        description:
          "If checked, then all the comments for each task will also be retrieved and indexed.",
        optional: false,
      },
    ],
    advanced_values: [],
  },
  google_sites: {
    description: "Configure Google Sites connector",
    values: [
      {
        type: "file",
        query: "Enter the zip path:",
        label: "File Locations",
        name: "file_locations",
        optional: false,
        description:
          "Upload a zip file containing the HTML of your Google Site",
      },
      {
        type: "text",
        query: "Enter the base URL:",
        label: "Base URL",
        name: "base_url",
        optional: false,
      },
    ],
    advanced_values: [],
  },
  zendesk: {
    description: "Configure Zendesk connector",
    values: [
      {
        type: "select",
        query: "Select the what content this connector will index:",
        label: "Content Type",
        name: "content_type",
        optional: false,
        options: [
          { name: "articles", value: "articles" },
          { name: "tickets", value: "tickets" },
        ],
        default: "articles",
      },
    ],
    advanced_values: [
      {
        type: "number",
        label: "API Calls per Minute",
        name: "calls_per_minute",
        optional: true,
        description:
          "Restricts how many Zendesk API calls this connector can make per minute (applies only to this connector). See defaults: https://developer.zendesk.com/api-reference/introduction/rate-limits/",
      },
    ],
  },
  linear: {
    description: "Configure Linear connector",
    values: [],
    advanced_values: [],
  },
  dropbox: {
    description: "Configure Dropbox connector",
    values: [],
    advanced_values: [],
  },
  s3: {
    description: "Configure S3 connector",
    values: [
      {
        type: "text",
        query: "Enter the bucket name:",
        label: "Bucket Name",
        name: "bucket_name",
        optional: false,
      },
      {
        type: "text",
        query: "Enter the prefix:",
        label: "Prefix",
        name: "prefix",
        optional: true,
      },
      {
        type: "text",
        label: "Bucket Type",
        name: "bucket_type",
        optional: false,
        default: "s3",
        hidden: true,
      },
    ],
    advanced_values: [],
    overrideDefaultFreq: 60 * 60 * 24,
  },
  r2: {
    description: "Configure R2 connector",
    values: [
      {
        type: "text",
        query: "Enter the bucket name:",
        label: "Bucket Name",
        name: "bucket_name",
        optional: false,
      },
      {
        type: "text",
        query: "Enter the prefix:",
        label: "Prefix",
        name: "prefix",
        optional: true,
      },
      {
        type: "checkbox",
        label: "EU Data Residency",
        name: "european_residency",
        description:
          "Check this box if your bucket has EU data residency enabled.",
        optional: true,
        default: false,
      },
      {
        type: "text",
        label: "Bucket Type",
        name: "bucket_type",
        optional: false,
        default: "r2",
        hidden: true,
      },
    ],
    advanced_values: [],
    overrideDefaultFreq: 60 * 60 * 24,
  },
  google_cloud_storage: {
    description: "Configure Google Cloud Storage connector",
    values: [
      {
        type: "text",
        query: "Enter the bucket name:",
        label: "Bucket Name",
        name: "bucket_name",
        optional: false,
        description: "Name of the GCS bucket to index, e.g. my-gcs-bucket",
      },
      {
        type: "text",
        query: "Enter the prefix:",
        label: "Path Prefix",
        name: "prefix",
        optional: true,
      },
      {
        type: "text",
        label: "Bucket Type",
        name: "bucket_type",
        optional: false,
        default: "google_cloud_storage",
        hidden: true,
      },
    ],
    advanced_values: [],
    overrideDefaultFreq: 60 * 60 * 24,
  },
  oci_storage: {
    description: "Configure OCI Storage connector",
    values: [
      {
        type: "text",
        query: "Enter the bucket name:",
        label: "Bucket Name",
        name: "bucket_name",
        optional: false,
      },
      {
        type: "text",
        query: "Enter the prefix:",
        label: "Prefix",
        name: "prefix",
        optional: true,
      },
      {
        type: "text",
        label: "Bucket Type",
        name: "bucket_type",
        optional: false,
        default: "oci_storage",
        hidden: true,
      },
    ],
    advanced_values: [],
  },
  wikipedia: {
    description: "Configure Wikipedia connector",
    values: [
      {
        type: "text",
        query: "Enter the language code:",
        label: "Language Code",
        name: "language_code",
        optional: false,
        description: "Input a valid Wikipedia language code (e.g. 'en', 'es')",
      },
      {
        type: "list",
        query: "Enter categories to include:",
        label: "Categories to index",
        name: "categories",
        description:
          "Specify 0 or more names of categories to index. For most Wikipedia sites, these are pages with a name of the form 'Category: XYZ', that are lists of other pages/categories. Only specify the name of the category, not its url.",
        optional: true,
      },
      {
        type: "list",
        query: "Enter pages to include:",
        label: "Pages",
        name: "pages",
        optional: true,
        description: "Specify 0 or more names of pages to index.",
      },
      {
        type: "number",
        query: "Enter the recursion depth:",
        label: "Recursion Depth",
        name: "recurse_depth",
        description:
          "When indexing categories that have sub-categories, this will determine how may levels to index. Specify 0 to only index the category itself (i.e. no recursion). Specify -1 for unlimited recursion depth. Note, that in some rare instances, a category might contain itself in its dependencies, which will cause an infinite loop. Only use -1 if you confident that this will not happen.",
        optional: false,
      },
    ],
    advanced_values: [],
  },
  xenforo: {
    description: "Configure Xenforo connector",
    values: [
      {
        type: "text",
        query: "Enter forum or thread URL:",
        label: "URL",
        name: "base_url",
        optional: false,
        description:
          "The XenForo v2.2 forum URL to index. Can be board or thread.",
      },
    ],
    advanced_values: [],
  },
  asana: {
    description: "Configure Asana connector",
    values: [
      {
        type: "text",
        query: "Enter your Asana workspace ID:",
        label: "Workspace ID",
        name: "asana_workspace_id",
        optional: false,
        description:
          "The ID of the Asana workspace to index. You can find this at https://app.asana.com/api/1.0/workspaces. It's a number that looks like 1234567890123456.",
      },
      {
        type: "text",
        query: "Enter project IDs to index (optional):",
        label: "Project IDs",
        name: "asana_project_ids",
        description:
          "IDs of specific Asana projects to index, separated by commas. Leave empty to index all projects in the workspace. Example: 1234567890123456,2345678901234567",
        optional: true,
      },
      {
        type: "text",
        query: "Enter the Team ID (optional):",
        label: "Team ID",
        name: "asana_team_id",
        optional: true,
        description:
          "ID of a team to use for accessing team-visible tasks. This allows indexing of team-visible tasks in addition to public tasks. Leave empty if you don't want to use this feature.",
      },
    ],
    advanced_values: [],
  },
  mediawiki: {
    description: "Configure MediaWiki connector",
    values: [
      {
        type: "text",
        query: "Enter the language code:",
        label: "Language Code",
        name: "language_code",
        optional: false,
        description: "Input a valid MediaWiki language code (e.g. 'en', 'es')",
      },
      {
        type: "text",
        query: "Enter the MediaWiki Site URL",
        label: "MediaWiki Site URL",
        name: "hostname",
        optional: false,
      },
      {
        type: "list",
        query: "Enter categories to include:",
        label: "Categories to index",
        name: "categories",
        description:
          "Specify 0 or more names of categories to index. For most MediaWiki sites, these are pages with a name of the form 'Category: XYZ', that are lists of other pages/categories. Only specify the name of the category, not its url.",
        optional: true,
      },
      {
        type: "list",
        query: "Enter pages to include:",
        label: "Pages",
        name: "pages",
        optional: true,
        description:
          "Specify 0 or more names of pages to index. Only specify the name of the page, not its url.",
      },
      {
        type: "number",
        query: "Enter the recursion depth:",
        label: "Recursion Depth",
        name: "recurse_depth",
        description:
          "When indexing categories that have sub-categories, this will determine how may levels to index. Specify 0 to only index the category itself (i.e. no recursion). Specify -1 for unlimited recursion depth. Note, that in some rare instances, a category might contain itself in its dependencies, which will cause an infinite loop. Only use -1 if you confident that this will not happen.",
        optional: true,
      },
    ],
    advanced_values: [],
  },
  discord: {
    description: "Configure Discord connector",
    values: [],
    advanced_values: [
      {
        type: "list",
        query: "Enter Server IDs to include:",
        label: "Server IDs",
        name: "server_ids",
        description: `Specify 0 or more server ids to include. Only channels inside them will be used for indexing`,
        optional: true,
      },
      {
        type: "list",
        query: "Enter channel names to include:",
        label: "Channels",
        name: "channel_names",
        description: `Specify 0 or more channels to index. For example, specifying the channel "support" will cause us to only index all content within the "#support" channel. If no channels are specified, all channels the bot has access to will be indexed.`,
        optional: true,
      },
      {
        type: "text",
        query: "Enter the Start Date:",
        label: "Start Date",
        name: "start_date",
        description: `Only messages after this date will be indexed. Format: YYYY-MM-DD`,
        optional: true,
      },
    ],
  },
  freshdesk: {
    description: "Configure Freshdesk connector",
    values: [],
    advanced_values: [],
  },
  fireflies: {
    description: "Configure Fireflies connector",
    values: [],
    advanced_values: [],
  },
  egnyte: {
    description: "Configure Egnyte connector",
    values: [
      {
        type: "text",
        query: "Enter folder path to index:",
        label: "Folder Path",
        name: "folder_path",
        optional: true,
        description:
          "The folder path to index (e.g., '/Shared/Documents'). Leave empty to index everything.",
      },
    ],
    advanced_values: [],
  },
  airtable: {
    description: "Configure Airtable connector",
    values: [
      {
        type: "tab",
        name: "airtable_scope",
        label: "What should we index from Airtable?",
        optional: true,
        tabs: [
          {
            value: "everything",
            label: "Everything",
            fields: [
              {
                type: "string_tab",
                label: "Everything",
                name: "everything_description",
                description:
                  "This connector will automatically discover and index all bases and tables accessible by your API token.",
              },
            ],
          },
          {
            value: "specific",
            label: "Specific Table",
            fields: [
              {
                type: "text",
                query: "Paste the Airtable URL:",
                label: "Airtable URL",
                name: "airtable_url",
                optional: false,
                description:
                  "Paste the URL from your browser when viewing the table, e.g. https://airtable.com/appXXX/tblYYY/viwZZZ",
              },
              {
                type: "text",
                label: "Share ID",
                name: "share_id",
                optional: true,
                description:
                  "Optional. If you want record links to use a shared view URL, put the share ID here e.g. shrkfjEzDmLaDtK83.",
              },
            ],
          },
        ],
      },
      {
        type: "checkbox",
        label: "Treat all fields except attachments as metadata",
        name: "treat_all_non_attachment_fields_as_metadata",
        description:
          "Choose this if the primary content to index are attachments and all other columns are metadata for these attachments.",
        optional: false,
      },
    ],
    advanced_values: [],
    overrideDefaultFreq: 60 * 60 * 24,
  },
  highspot: {
    description: "Configure Highspot connector",
    values: [
      {
        type: "tab",
        name: "highspot_scope",
        label: "What should we index from Highspot?",
        optional: true,
        tabs: [
          {
            value: "spots",
            label: "Specific Spots",
            fields: [
              {
                type: "list",
                query: "Enter the spot name(s):",
                label: "Spot Name(s)",
                name: "spot_names",
                optional: false,
                description: "For multiple spots, enter your spot one by one.",
              },
            ],
          },
          {
            value: "everything",
            label: "Everything",
            fields: [
              {
                type: "string_tab",
                label: "Everything",
                name: "everything",
                description:
                  "This connector will index all spots the provided credentials have access to!",
              },
            ],
          },
        ],
      },
    ],
    advanced_values: [],
  },
  imap: {
    description: "Configure Email connector",
    values: [
      {
        type: "text",
        query: "Enter the IMAP server host:",
        label: "IMAP Server Host",
        name: "host",
        optional: false,
        description:
          "The IMAP server hostname (e.g., imap.gmail.com, outlook.office365.com)",
      },
      {
        type: "number",
        query: "Enter the IMAP server port:",
        label: "IMAP Server Port",
        name: "port",
        optional: true,
        default: 993,
        description: "The IMAP server port (default: 993 for SSL)",
      },
      {
        type: "list",
        query: "Enter mailboxes to include:",
        label: "Mailboxes",
        name: "mailboxes",
        optional: true,
        description:
          "Specify mailboxes to index (e.g., INBOX, Sent, Drafts). Leave empty to index all mailboxes.",
      },
    ],
    advanced_values: [],
  },
};
type ConnectorField = ConnectionConfiguration["values"][number];

const buildInitialValuesForFields = (
  fields: ConnectorField[]
): Record<string, any> =>
  fields.reduce(
    (acc, field) => {
      if (field.type === "select") {
        acc[field.name] = null;
      } else if (field.type === "list") {
        acc[field.name] = field.default || [];
      } else if (field.type === "multiselect") {
        acc[field.name] = field.default || [];
      } else if (field.type === "checkbox") {
        acc[field.name] = field.default ?? false;
      } else if (field.default !== undefined) {
        acc[field.name] = field.default;
      }
      return acc;
    },
    {} as Record<string, any>
  );

export function createConnectorInitialValues(
  connector: ConfigurableSources
): Record<string, any> & AccessTypeGroupSelectorFormType {
  const configuration = connectorConfigs[connector];

  return {
    name: "",
    groups: [],
    access_type: "public",
    ...buildInitialValuesForFields(configuration.values),
    ...buildInitialValuesForFields(configuration.advanced_values),
  };
}

export function createConnectorValidationSchema(
  connector: ConfigurableSources
): Yup.ObjectSchema<Record<string, any>> {
  const configuration = connectorConfigs[connector];

  const object = Yup.object().shape({
    access_type: Yup.string().required("Access Type is required"),
    name: Yup.string().required("Connector Name is required"),
    ...[...configuration.values, ...configuration.advanced_values].reduce(
      (acc, field) => {
        let schema: any =
          field.type === "select"
            ? Yup.string()
            : field.type === "list"
              ? Yup.array().of(Yup.string())
              : field.type === "multiselect"
                ? Yup.array().of(Yup.string())
                : field.type === "checkbox"
                  ? Yup.boolean()
                  : field.type === "file"
                    ? Yup.mixed()
                    : Yup.string();

        if (!field.optional) {
          schema = schema.required(`${field.label} is required`);
        }

        acc[field.name] = schema;
        return acc;
      },
      {} as Record<string, any>
    ),
    // These are advanced settings
    indexingStart: Yup.string().nullable(),
    pruneFreq: Yup.number().min(
      0.083,
      "Prune frequency must be at least 0.083 hours (5 minutes)"
    ),
    refreshFreq: Yup.number().min(
      1,
      "Refresh frequency must be at least 1 minute"
    ),
  });

  return object;
}

export const defaultPruneFreqHours = 720; // 30 days in hours
export const defaultRefreshFreqMinutes = 30; // 30 minutes

// CONNECTORS
export interface ConnectorBase<T> {
  name: string;
  source: ValidSources;
  input_type: ValidInputTypes;
  connector_specific_config: T;
  refresh_freq: number | null;
  prune_freq: number | null;
  indexing_start: Date | null;
  access_type: string;
  groups?: number[];
  from_beginning?: boolean;
}

export interface Connector<T> extends ConnectorBase<T> {
  id: number;
  credential_ids: number[];
  time_created: string;
  time_updated: string;
}

export interface ConnectorSnapshot {
  id: number;
  name: string;
  source: ValidSources;
  input_type: ValidInputTypes;
  // connector_specific_config
  refresh_freq: number | null;
  prune_freq: number | null;
  credential_ids: number[];
  indexing_start: number | null;
  time_created: string;
  time_updated: string;
  from_beginning?: boolean;
}

export interface WebConfig {
  base_url: string;
  web_connector_type?: "recursive" | "single" | "sitemap";
}

export interface GithubConfig {
  repo_owner: string;
  repositories: string; // Comma-separated list of repository names
  include_prs: boolean;
  include_issues: boolean;
}

export interface GitlabConfig {
  project_owner: string;
  project_name: string;
  include_mrs: boolean;
  include_issues: boolean;
}

export interface BitbucketConfig {
  workspace: string;
  repositories?: string;
  projects?: string;
}

export interface GoogleDriveConfig {
  include_shared_drives?: boolean;
  shared_drive_urls?: string;
  include_my_drives?: boolean;
  my_drive_emails?: string;
  shared_folder_urls?: string;
}

export interface GmailConfig {}

export interface BookstackConfig {}

export interface OutlineConfig {}

export interface ConfluenceConfig {
  wiki_base: string;
  space?: string;
  page_id?: string;
  is_cloud?: boolean;
  index_recursively?: boolean;
  cql_query?: string;
}

export interface JiraConfig {
  jira_project_url: string;
  project_key?: string;
  comment_email_blacklist?: string[];
  jql_query?: string;
}

export interface SalesforceConfig {
  requested_objects?: string[];
}

export interface SharepointConfig {
  sites?: string[];
  include_site_pages?: boolean;
  treat_sharing_link_as_public?: boolean;
  include_site_documents?: boolean;
  authority_host?: string;
  graph_api_host?: string;
  sharepoint_domain_suffix?: string;
}

export interface TeamsConfig {
  teams?: string[];
  authority_host?: string;
  graph_api_host?: string;
}

export interface DiscourseConfig {
  base_url: string;
  categories?: string[];
}

export interface AxeroConfig {
  spaces?: string[];
}

export interface DrupalWikiConfig {
  base_url: string;
  spaces?: string[];
  pages?: string[];
  include_attachments?: boolean;
}

export interface ProductboardConfig {}

export interface SlackConfig {
  workspace: string;
  channels?: string[];
  channel_regex_enabled?: boolean;
  include_bot_messages?: boolean;
}

export interface SlabConfig {
  base_url: string;
}

export interface GuruConfig {}

export interface GongConfig {
  workspaces?: string[];
}

export interface LoopioConfig {
  loopio_stack_name?: string;
}

export interface FileConfig {
  file_locations: string[];
  file_names: string[];
  zip_metadata_file_id: string | null;
}

export interface ZulipConfig {
  realm_name: string;
  realm_url: string;
}

export interface CodaConfig {
  workspace_id?: string;
}

export interface NotionConfig {
  root_page_id?: string;
}

export interface HubSpotConfig {
  object_types?: string[];
}

export interface Document360Config {
  workspace: string;
  categories?: string[];
}

export interface ClickupConfig {
  connector_type: "list" | "folder" | "space" | "workspace";
  connector_ids?: string[];
  retrieve_task_comments: boolean;
}

export interface GoogleSitesConfig {
  zip_path: string;
  base_url: string;
}

export interface XenforoConfig {
  base_url: string;
}

export interface ZendeskConfig {
  content_type?: "articles" | "tickets";
  calls_per_minute?: number;
}

export interface DropboxConfig {}

export interface S3Config {
  bucket_type: "s3";
  bucket_name: string;
  prefix: string;
}

export interface R2Config {
  bucket_type: "r2";
  bucket_name: string;
  prefix: string;
  european_residency?: boolean;
}

export interface GCSConfig {
  bucket_type: "google_cloud_storage";
  bucket_name: string;
  prefix: string;
}

export interface OCIConfig {
  bucket_type: "oci_storage";
  bucket_name: string;
  prefix: string;
}

export interface MediaWikiBaseConfig {
  connector_name: string;
  language_code: string;
  categories?: string[];
  pages?: string[];
  recurse_depth?: number;
}

export interface AsanaConfig {
  asana_workspace_id: string;
  asana_project_ids?: string;
  asana_team_id?: string;
}

export interface FreshdeskConfig {}

export interface FirefliesConfig {}

export interface MediaWikiConfig extends MediaWikiBaseConfig {
  hostname: string;
}

export interface WikipediaConfig extends MediaWikiBaseConfig {}

export interface ImapConfig {
  host: string;
  port?: number;
  mailboxes?: string[];
}


================================================
FILE: web/src/lib/connectors/credentials.ts
================================================
import { ValidSources } from "../types";
import { TypedFile } from "./fileTypes";

export interface OAuthAdditionalKwargDescription {
  name: string;
  display_name: string;
  description: string;
}

export interface OAuthDetails {
  oauth_enabled: boolean;
  additional_kwargs: OAuthAdditionalKwargDescription[];
}
export interface AuthMethodOption<TFields> {
  value: string;
  label: string;
  fields: TFields;
  description?: string;
  // UI-only: if true, hide/disable the "Auto Sync Permissions" access type when this auth is used
  disablePermSync?: boolean;
}
export interface CredentialTemplateWithAuth<TFields> {
  authentication_method?: string;
  authMethods?: AuthMethodOption<Partial<TFields>>[];
}

export interface CredentialBase<T> {
  credential_json: T;
  admin_public: boolean;
  source: ValidSources;
  name?: string;
  curator_public?: boolean;
  groups?: number[];
}

export interface CredentialWithPrivateKey<T> extends CredentialBase<T> {
  private_key: TypedFile;
}

export interface Credential<T> extends CredentialBase<T> {
  id: number;
  user_id: string | null;
  user_email: string | null;
  time_created: string;
  time_updated: string;
}
export interface GithubCredentialJson {
  github_access_token: string;
}

export interface GitbookCredentialJson {
  gitbook_api_key: string;
}

export interface GitlabCredentialJson {
  gitlab_url: string;
  gitlab_access_token: string;
}

export interface BitbucketCredentialJson {
  bitbucket_email: string;
  bitbucket_api_token: string;
}

export interface BookstackCredentialJson {
  bookstack_base_url: string;
  bookstack_api_token_id: string;
  bookstack_api_token_secret: string;
}

export interface OutlineCredentialJson {
  outline_base_url: string;
  outline_api_token: string;
}

export interface ConfluenceCredentialJson {
  confluence_username: string;
  confluence_access_token: string;
}

export interface JiraCredentialJson {
  jira_user_email: string | null;
  jira_api_token: string;
}

export interface JiraServerCredentialJson {
  jira_api_token: string;
}

export interface ProductboardCredentialJson {
  productboard_access_token: string;
}

export interface SlackCredentialJson {
  slack_bot_token: string;
}

export interface GmailCredentialJson {
  google_tokens: string;
  google_primary_admin: string;
}

export interface GoogleDriveCredentialJson {
  google_tokens: string;
  google_primary_admin: string;
  authentication_method?: string;
}

export interface GmailServiceAccountCredentialJson {
  google_service_account_key: string;
  google_primary_admin: string;
}

export interface GoogleDriveServiceAccountCredentialJson {
  google_service_account_key: string;
  google_primary_admin: string;
  authentication_method?: string;
}

export interface SlabCredentialJson {
  slab_bot_token: string;
}

export interface CodaCredentialJson {
  coda_bearer_token: string;
}

export interface NotionCredentialJson {
  notion_integration_token: string;
}

export interface ZulipCredentialJson {
  zuliprc_content: string;
}

export interface GuruCredentialJson {
  guru_user: string;
  guru_user_token: string;
}

export interface GongCredentialJson {
  gong_access_key: string;
  gong_access_key_secret: string;
}

export interface LoopioCredentialJson {
  loopio_subdomain: string;
  loopio_client_id: string;
  loopio_client_token: string;
}

export interface LinearCredentialJson {
  linear_access_token: string;
}

export interface HubSpotCredentialJson {
  hubspot_access_token: string;
}

export interface Document360CredentialJson {
  portal_id: string;
  document360_api_token: string;
}

export interface ClickupCredentialJson {
  clickup_api_token: string;
  clickup_team_id: string;
}

export interface ZendeskCredentialJson {
  zendesk_subdomain: string;
  zendesk_email: string;
  zendesk_token: string;
}

export interface DropboxCredentialJson {
  dropbox_access_token: string;
}

export interface R2CredentialJson {
  account_id: string;
  r2_access_key_id: string;
  r2_secret_access_key: string;
}

export interface S3CredentialJson {
  aws_access_key_id?: string;
  aws_secret_access_key?: string;
  aws_role_arn?: string;
}

export interface GCSCredentialJson {
  access_key_id: string;
  secret_access_key: string;
}

export interface OCICredentialJson {
  namespace: string;
  region: string;
  access_key_id: string;
  secret_access_key: string;
}
export interface SalesforceCredentialJson {
  sf_username: string;
  sf_password: string;
  sf_security_token: string;
  is_sandbox: boolean;
}

export interface SharepointCredentialJson {
  sp_client_id: string;
  sp_client_secret?: string;
  sp_directory_id: string;
  sp_certificate_password?: string;
  sp_private_key?: TypedFile;
}

export interface AsanaCredentialJson {
  asana_api_token_secret: string;
}

export interface TeamsCredentialJson {
  teams_client_id: string;
  teams_client_secret: string;
  teams_directory_id: string;
}

export interface DiscourseCredentialJson {
  discourse_api_key: string;
  discourse_api_username: string;
}

export interface AxeroCredentialJson {
  base_url: string;
  axero_api_token: string;
}

export interface DiscordCredentialJson {
  discord_bot_token: string;
}

export interface FreshdeskCredentialJson {
  freshdesk_domain: string;
  freshdesk_api_key: string;
}

export interface FirefliesCredentialJson {
  fireflies_api_key: string;
}

export interface MediaWikiCredentialJson {}
export interface WikipediaCredentialJson extends MediaWikiCredentialJson {}

export interface EgnyteCredentialJson {
  domain: string;
  access_token: string;
}

export interface AirtableCredentialJson {
  airtable_access_token: string;
}

export interface HighspotCredentialJson {
  highspot_url: string;
  highspot_key: string;
  highspot_secret: string;
}

export interface DrupalWikiCredentialJson {
  drupal_wiki_api_token: string;
}

export interface ImapCredentialJson {
  imap_username: string;
  imap_password: string;
}

export interface TestRailCredentialJson {
  testrail_base_url: string;
  testrail_username: string;
  testrail_api_key: string;
}

export const credentialTemplates: Record<ValidSources, any> = {
  github: { github_access_token: "" } as GithubCredentialJson,
  gitlab: {
    gitlab_url: "",
    gitlab_access_token: "",
  } as GitlabCredentialJson,
  bitbucket: {
    bitbucket_email: "",
    bitbucket_api_token: "",
  } as BitbucketCredentialJson,
  slack: { slack_bot_token: "" } as SlackCredentialJson,
  bookstack: {
    bookstack_base_url: "",
    bookstack_api_token_id: "",
    bookstack_api_token_secret: "",
  } as BookstackCredentialJson,
  outline: {
    outline_base_url: "",
    outline_api_token: "",
  } as OutlineCredentialJson,
  confluence: {
    confluence_username: "",
    confluence_access_token: "",
  } as ConfluenceCredentialJson,
  jira: {
    jira_user_email: null,
    jira_api_token: "",
  } as JiraCredentialJson,
  productboard: { productboard_access_token: "" } as ProductboardCredentialJson,
  slab: { slab_bot_token: "" } as SlabCredentialJson,
  coda: { coda_bearer_token: "" } as CodaCredentialJson,
  notion: { notion_integration_token: "" } as NotionCredentialJson,
  guru: { guru_user: "", guru_user_token: "" } as GuruCredentialJson,
  gong: {
    gong_access_key: "",
    gong_access_key_secret: "",
  } as GongCredentialJson,
  zulip: { zuliprc_content: "" } as ZulipCredentialJson,
  linear: { linear_access_token: "" } as LinearCredentialJson,
  hubspot: { hubspot_access_token: "" } as HubSpotCredentialJson,
  document360: {
    portal_id: "",
    document360_api_token: "",
  } as Document360CredentialJson,
  loopio: {
    loopio_subdomain: "",
    loopio_client_id: "",
    loopio_client_token: "",
  } as LoopioCredentialJson,
  dropbox: { dropbox_access_token: "" } as DropboxCredentialJson,
  salesforce: {
    sf_username: "",
    sf_password: "",
    sf_security_token: "",
    is_sandbox: false,
  } as SalesforceCredentialJson,
  sharepoint: {
    authentication_method: "client_credentials",
    authMethods: [
      {
        value: "client_secret",
        label: "Client Secret",
        fields: {
          sp_client_id: "",
          sp_client_secret: "",
          sp_directory_id: "",
        },
        description:
          "If you select this mode, the SharePoint connector will use a client secret to authenticate. You will need to provide the client ID and client secret.",
        disablePermSync: true,
      },
      {
        value: "certificate",
        label: "Certificate Authentication",
        fields: {
          sp_client_id: "",
          sp_directory_id: "",
          sp_certificate_password: "",
          sp_private_key: null,
        },
        description:
          "If you select this mode, the SharePoint connector will use a certificate to authenticate. You will need to provide the client ID, directory ID, certificate password, and PFX data.",
        disablePermSync: false,
      },
    ],
  } as CredentialTemplateWithAuth<SharepointCredentialJson>,
  asana: {
    asana_api_token_secret: "",
  } as AsanaCredentialJson,
  teams: {
    teams_client_id: "",
    teams_client_secret: "",
    teams_directory_id: "",
  } as TeamsCredentialJson,
  zendesk: {
    zendesk_subdomain: "",
    zendesk_email: "",
    zendesk_token: "",
  } as ZendeskCredentialJson,
  discourse: {
    discourse_api_key: "",
    discourse_api_username: "",
  } as DiscourseCredentialJson,
  axero: {
    base_url: "",
    axero_api_token: "",
  } as AxeroCredentialJson,
  clickup: {
    clickup_api_token: "",
    clickup_team_id: "",
  } as ClickupCredentialJson,

  s3: {
    authentication_method: "access_key",
    authMethods: [
      {
        value: "access_key",
        label: "Access Key and Secret",
        fields: {
          aws_access_key_id: "",
          aws_secret_access_key: "",
        },
        disablePermSync: false,
      },
      {
        value: "iam_role",
        label: "IAM Role",
        fields: {
          aws_role_arn: "",
        },
        disablePermSync: false,
      },
      {
        value: "assume_role",
        label: "Assume Role",
        fields: {},
        description:
          "If you select this mode, the Amazon EC2 instance will assume its existing role to access S3. No additional credentials are required.",
        disablePermSync: false,
      },
    ],
  } as CredentialTemplateWithAuth<S3CredentialJson>,
  r2: {
    account_id: "",
    r2_access_key_id: "",
    r2_secret_access_key: "",
  } as R2CredentialJson,
  google_cloud_storage: {
    access_key_id: "",
    secret_access_key: "",
  } as GCSCredentialJson,
  oci_storage: {
    namespace: "",
    region: "",
    access_key_id: "",
    secret_access_key: "",
  } as OCICredentialJson,
  freshdesk: {
    freshdesk_domain: "",
    freshdesk_api_key: "",
  } as FreshdeskCredentialJson,
  fireflies: {
    fireflies_api_key: "",
  } as FirefliesCredentialJson,
  egnyte: {
    domain: "",
    access_token: "",
  } as EgnyteCredentialJson,
  airtable: {
    airtable_access_token: "",
  } as AirtableCredentialJson,
  drupal_wiki: {
    drupal_wiki_api_token: "",
  } as DrupalWikiCredentialJson,
  xenforo: null,
  google_sites: null,
  file: null,
  user_file: null,
  craft_file: null, // User Library - managed through dedicated UI
  wikipedia: null,
  mediawiki: null,
  web: null,
  not_applicable: null,
  ingestion_api: null,
  federated_slack: null,
  discord: { discord_bot_token: "" } as DiscordCredentialJson,

  // NOTE: These are Special Cases
  google_drive: { google_tokens: "" } as GoogleDriveCredentialJson,
  gmail: { google_tokens: "" } as GmailCredentialJson,
  gitbook: {
    gitbook_api_key: "",
  } as GitbookCredentialJson,
  highspot: {
    highspot_url: "",
    highspot_key: "",
    highspot_secret: "",
  } as HighspotCredentialJson,
  imap: {
    imap_username: "",
    imap_password: "",
  } as ImapCredentialJson,
  testrail: {
    testrail_base_url: "",
    testrail_username: "",
    testrail_api_key: "",
  } as TestRailCredentialJson,
};

export const credentialDisplayNames: Record<string, string> = {
  // Github
  github_access_token: "GitHub Access Token",

  // Gitlab
  gitlab_url: "GitLab URL",
  gitlab_access_token: "GitLab Access Token",

  // Bookstack
  bookstack_base_url: "Bookstack Base URL",
  bookstack_api_token_id: "Bookstack API Token ID",
  bookstack_api_token_secret: "Bookstack API Token Secret",

  // Outline
  outline_base_url:
    "Outline Base URL (e.g. https://app.getoutline.com or your self-hosted URL)",
  outline_api_token: "Outline API Token",

  // Confluence
  confluence_username: "Confluence Username",
  confluence_access_token: "Confluence Access Token",

  // Jira
  jira_user_email: "Jira User Email (required for Jira Cloud)",
  jira_api_token: "API or Personal Access Token",

  // Productboard
  productboard_access_token: "Productboard Access Token",

  // Slack
  slack_bot_token: "Slack Bot Token",

  // Discord
  discord_bot_token: "Discord Bot Token",

  // Gmail and Google Drive
  google_tokens: "Google Oauth Tokens",
  google_service_account_key: "Google Service Account Key",
  google_primary_admin: "Primary Admin Email",

  // Slab
  slab_bot_token: "Slab Bot Token",

  // Coda
  coda_bearer_token: "Coda Bearer Token",

  // Notion
  notion_integration_token: "Notion Integration Token",

  // Zulip
  zuliprc_content: "Zuliprc Content",

  // Guru
  guru_user: "Guru User",
  guru_user_token: "Guru User Token",

  // Gong
  gong_access_key: "Gong Access Key",
  gong_access_key_secret: "Gong Access Key Secret",

  // Loopio
  loopio_subdomain: "Loopio Subdomain",
  loopio_client_id: "Loopio Client ID",
  loopio_client_token: "Loopio Client Token",

  // Linear
  linear_access_token: "Linear Access Token",

  // HubSpot
  hubspot_access_token: "HubSpot Access Token",
  // Document360
  portal_id: "Document360 Portal ID",
  document360_api_token: "Document360 API Token",

  // Clickup
  clickup_api_token: "ClickUp API Token",
  clickup_team_id: "ClickUp Team ID",

  // Zendesk
  zendesk_subdomain: "Zendesk Subdomain",
  zendesk_email: "Zendesk Email",
  zendesk_token: "Zendesk Token",

  // Dropbox
  dropbox_access_token: "Dropbox API Key",

  // R2
  account_id: "R2 Account ID",
  r2_access_key_id: "R2 Access Key ID",
  r2_secret_access_key: "R2 Secret Access Key",

  // IMAP
  imap_username: "IMAP Username",
  imap_password: "IMAP Password",

  // TestRail
  testrail_base_url: "TestRail Base URL (e.g. https://yourcompany.testrail.io)",
  testrail_username: "TestRail Username or Email",
  testrail_api_key: "TestRail API Key",

  // S3
  aws_access_key_id: "AWS Access Key ID",
  aws_secret_access_key: "AWS Secret Access Key",
  aws_role_arn: "AWS Role ARN",
  authentication_method: "Authentication Method",

  // GCS
  access_key_id: "GCS Access Key ID",
  secret_access_key: "GCS Secret Access Key",

  // OCI
  namespace: "OCI Namespace",
  region: "OCI Region",

  // Salesforce
  sf_username: "Salesforce Username",
  sf_password: "Salesforce Password",
  sf_security_token: "Salesforce Security Token",
  is_sandbox: "Is Sandbox Environment",

  // Sharepoint
  sp_client_id: "SharePoint Client ID",
  sp_client_secret: "SharePoint Client Secret",
  sp_directory_id: "SharePoint Directory ID",
  sp_certificate_password: "SharePoint Certificate Password",
  sp_private_key: "SharePoint Private Key",

  // Asana
  asana_api_token_secret: "Asana API Token",

  // Teams
  teams_client_id: "Microsoft Teams Client ID",
  teams_client_secret: "Microsoft Teams Client Secret",
  teams_directory_id: "Microsoft Teams Directory ID",

  // Discourse
  discourse_api_key: "Discourse API Key",
  discourse_api_username: "Discourse API Username",

  // Axero
  base_url: "Axero Base URL",
  axero_api_token: "Axero API Token",

  // Freshdesk
  freshdesk_domain: "Freshdesk Domain",
  freshdesk_api_key: "Freshdesk API Key",

  // Fireflies
  fireflies_api_key: "Fireflies API Key",

  // GitBook
  gitbook_space_id: "GitBook Space ID",
  gitbook_api_key: "GitBook API Key",

  //Highspot
  highspot_url: "Highspot URL",
  highspot_key: "Highspot Key",
  highspot_secret: "Highspot Secret",

  // Drupal Wiki
  drupal_wiki_api_token: "Drupal Wiki Personal Access Token",

  // Bitbucket
  bitbucket_email: "Bitbucket Account Email",
  bitbucket_api_token: "Bitbucket API Token",
};

export function getDisplayNameForCredentialKey(key: string): string {
  return credentialDisplayNames[key] || key;
}


================================================
FILE: web/src/lib/connectors/fileTypes.ts
================================================
export enum FileTypeCategory {
  SHAREPOINT_PFX_FILE = "sharepoint_pfx_file",
}

export interface FileValidationRule {
  maxSizeKB?: number;
  allowedExtensions?: string[];
  contentValidation?: (file: File) => Promise<boolean>;
}

export interface FileTypeDefinition {
  category: FileTypeCategory;
  validation?: FileValidationRule;
  description?: string;
}

export const FILE_TYPE_DEFINITIONS: Record<
  FileTypeCategory,
  FileTypeDefinition
> = {
  [FileTypeCategory.SHAREPOINT_PFX_FILE]: {
    category: FileTypeCategory.SHAREPOINT_PFX_FILE,
    validation: {
      maxSizeKB: 10,
      allowedExtensions: [".pfx"],
    },
    description:
      "Please upload a .pfx file containing the private key for SharePoint. The file size must be under 10KB.",
  },
};

export class TypedFile {
  constructor(
    public readonly file: File,
    public readonly typeDefinition: FileTypeDefinition,
    public readonly fieldKey: string
  ) {}

  async validate(): Promise<{ isValid: boolean; errors: string[] }> {
    const errors: string[] = [];
    const { validation } = this.typeDefinition;

    if (!validation) {
      return {
        isValid: true,
        errors: [],
      };
    }

    // Size validation
    if (validation.maxSizeKB && this.file.size > validation.maxSizeKB * 1024) {
      errors.push(`File size must not exceed ${validation.maxSizeKB}KB`);
    }

    // Extension validation
    if (validation.allowedExtensions) {
      const extension = this.file.name.toLowerCase().split(".").pop();
      if (
        !extension ||
        !validation.allowedExtensions.includes(`.${extension}`)
      ) {
        errors.push(
          `File must have one of these extensions: ${validation.allowedExtensions.join(
            ", "
          )}`
        );
      }
    }

    // Content validation
    if (validation.contentValidation) {
      try {
        const isContentValid = await validation.contentValidation(this.file);
        if (!isContentValid) {
          errors.push(`File content validation failed`);
        }
      } catch (error) {
        errors.push(
          `Content validation error: ${
            error instanceof Error ? error.message : "Unknown error"
          }`
        );
      }
    }

    return {
      isValid: errors.length === 0,
      errors,
    };
  }
}

export function createTypedFile(
  file: File,
  fieldKey: string,
  typeDefinitionKey: FileTypeCategory
): TypedFile {
  const typeDefinition = FILE_TYPE_DEFINITIONS[typeDefinitionKey];
  if (!typeDefinition) {
    throw new Error(`Unknown file type definition: ${typeDefinitionKey}`);
  }

  return new TypedFile(file, typeDefinition, fieldKey);
}

export function isTypedFileField(fieldKey: string): boolean {
  // Define which fields should be typed files
  const typedFileFields = new Set(["sp_private_key"]);
  return typedFileFields.has(fieldKey);
}

// Get the appropriate file type definition for a field
export function getFileTypeDefinitionForField(
  fieldKey: string
): FileTypeCategory | null {
  const fieldToTypeMap: Record<string, FileTypeCategory> = {
    sp_private_key: FileTypeCategory.SHAREPOINT_PFX_FILE,
  };

  return fieldToTypeMap[fieldKey] || null;
}


================================================
FILE: web/src/lib/connectors/oauth.ts
================================================
import useSWR from "swr";
import { ValidSources } from "../types";
import { OAuthDetails } from "./credentials";
import { errorHandlingFetcher } from "../fetcher";

export async function getConnectorOauthRedirectUrl(
  connector: ValidSources,
  additional_kwargs: Record<string, string>
): Promise<string | null> {
  const queryParams = new URLSearchParams({
    desired_return_url: window.location.href,
    ...additional_kwargs,
  });
  const response = await fetch(
    `/api/connector/oauth/authorize/${connector}?${queryParams.toString()}`
  );

  if (!response.ok) {
    console.error(`Failed to fetch OAuth redirect URL for ${connector}`);
    return null;
  }

  const data = await response.json();
  return data.redirect_url as string;
}

export function useOAuthDetails(sourceType: ValidSources) {
  return useSWR<OAuthDetails>(
    `/api/connector/oauth/details/${sourceType}`,
    errorHandlingFetcher,
    {
      shouldRetryOnError: false,
    }
  );
}


================================================
FILE: web/src/lib/constants/chatBackgrounds.ts
================================================
// Default chat background images

export const CHAT_BACKGROUND_NONE = "none";

export interface ChatBackgroundOption {
  id: string;
  src: string;
  thumbnail: string;
  label: string;
}

// Curated collection of scenic backgrounds that work well as chat backgrounds
export const CHAT_BACKGROUND_OPTIONS: ChatBackgroundOption[] = [
  {
    id: "none",
    src: CHAT_BACKGROUND_NONE,
    thumbnail: CHAT_BACKGROUND_NONE,
    label: "None",
  },
  {
    id: "clouds",
    src: "/chat-backgrounds/clouds.jpg",
    thumbnail: "/chat-backgrounds/thumbnails/clouds.jpg",
    label: "Clouds",
  },
  {
    id: "hills",
    src: "/chat-backgrounds/hills.jpg",
    thumbnail: "/chat-backgrounds/thumbnails/hills.jpg",
    label: "Hills",
  },
  {
    id: "plant",
    src: "/chat-backgrounds/plant.jpg",
    thumbnail: "/chat-backgrounds/thumbnails/plant.jpg",
    label: "Plants",
  },
  {
    id: "mountains",
    src: "/chat-backgrounds/mountains.jpg",
    thumbnail: "/chat-backgrounds/thumbnails/mountains.jpg",
    label: "Mountains",
  },
  {
    id: "night",
    src: "/chat-backgrounds/night.jpg",
    thumbnail: "/chat-backgrounds/thumbnails/night.jpg",
    label: "Night",
  },
];

export const getBackgroundById = (
  id: string | null
): ChatBackgroundOption | undefined => {
  if (!id || id === CHAT_BACKGROUND_NONE) {
    return CHAT_BACKGROUND_OPTIONS[0];
  }
  return CHAT_BACKGROUND_OPTIONS.find((bg) => bg.id === id);
};


================================================
FILE: web/src/lib/constants.ts
================================================
export const IS_DEV = process.env.NODE_ENV === "development";

export enum AuthType {
  BASIC = "basic",
  GOOGLE_OAUTH = "google_oauth",
  OIDC = "oidc",
  SAML = "saml",
  CLOUD = "cloud",
}

export const HOST_URL = process.env.WEB_DOMAIN || "http://localhost:3000";

export const INTERNAL_URL = process.env.INTERNAL_URL || "http://localhost:8080";

// Documentation URLs
export const DOCS_BASE_URL = "https://docs.onyx.app";
export const DOCS_ADMINS_PATH = `${DOCS_BASE_URL}/admins`;

export const MCP_INTERNAL_URL =
  process.env.MCP_INTERNAL_URL || "http://127.0.0.1:8090";

// NOTE: this should ONLY be used on the server-side (including middleware).
// The AUTH_TYPE environment variable is set in the backend and shared with Next.js
export const SERVER_SIDE_ONLY__AUTH_TYPE = (process.env.AUTH_TYPE ||
  AuthType.BASIC) as AuthType;

export const NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED =
  process.env.NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED?.toLowerCase() ===
  "true";

export const TENANT_ID_COOKIE_NAME = "onyx_tid";

export const SEARCH_TYPE_COOKIE_NAME = "search_type";
export const AGENTIC_SEARCH_TYPE_COOKIE_NAME = "agentic_type";

export const LOGOUT_DISABLED =
  process.env.NEXT_PUBLIC_DISABLE_LOGOUT?.toLowerCase() === "true";

export const TOGGLED_CONNECTORS_COOKIE_NAME = "toggled_connectors";

/* Enterprise-only settings */
export const NEXT_PUBLIC_CUSTOM_REFRESH_URL =
  process.env.NEXT_PUBLIC_CUSTOM_REFRESH_URL;

// NOTE: this should ONLY be used on the server-side. If used client side,
// it will not be accurate (will always be false).
// Mirrors backend logic: EE is enabled if EITHER the legacy flag OR license
// enforcement is active. LICENSE_ENFORCEMENT_ENABLED defaults to true on the
// backend, so we treat undefined as enabled here to match.
export const SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED =
  process.env.ENABLE_PAID_ENTERPRISE_EDITION_FEATURES?.toLowerCase() ===
    "true" ||
  process.env.LICENSE_ENFORCEMENT_ENABLED?.toLowerCase() !== "false";
// NOTE: since this is a `NEXT_PUBLIC_` variable, it will be set at
// build-time
// TODO: consider moving this to an API call so that the api_server
// can be the single source of truth
export const EE_ENABLED =
  process.env.NEXT_PUBLIC_ENABLE_PAID_EE_FEATURES?.toLowerCase() === "true";

export const CUSTOM_ANALYTICS_ENABLED = process.env.CUSTOM_ANALYTICS_SECRET_KEY
  ? true
  : false;

export const GTM_ENABLED =
  process.env.NEXT_PUBLIC_GTM_ENABLED?.toLowerCase() === "true";

export const NEXT_PUBLIC_CLOUD_ENABLED =
  process.env.NEXT_PUBLIC_CLOUD_ENABLED?.toLowerCase() === "true";

export const REGISTRATION_URL =
  process.env.INTERNAL_URL || "http://127.0.0.1:3001";

export const SERVER_SIDE_ONLY__CLOUD_ENABLED =
  process.env.NEXT_PUBLIC_CLOUD_ENABLED?.toLowerCase() === "true";

export const NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED =
  process.env.NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED?.toLowerCase() === "true" &&
  !NEXT_PUBLIC_CLOUD_ENABLED;

export const NEXT_PUBLIC_TEST_ENV =
  process.env.NEXT_PUBLIC_TEST_ENV?.toLowerCase() === "true";

export const NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK =
  process.env.NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK?.toLowerCase() ===
  "true";

// Restrict markdown links to safe protocols
export const ALLOWED_URL_PROTOCOLS = ["http:", "https:", "mailto:"] as const;

// Agent/Persona related constants
export const MAX_CHARACTERS_PERSONA_DESCRIPTION = 5000000;
export const MAX_CHARACTERS_AGENT_DESCRIPTION = 500;
export const MAX_STARTER_MESSAGES = 4;
export const MAX_CHARACTERS_STARTER_MESSAGE = 200;
export const STARTER_MESSAGES_EXAMPLES = [
  "Give me an overview of some documents.",
  "Find the latest sales report.",
  "Compile a list of our engineering goals for this quarter.",
  "Summarize my goals for today.",
];

//Credential form data key constants
export const CREDENTIAL_NAME = "name";
export const CREDENTIAL_SOURCE = "source";
export const CREDENTIAL_UPLOADED_FILE = "uploaded_file";
export const CREDENTIAL_FIELD_KEY = "field_key";
export const CREDENTIAL_TYPE_DEFINITION_KEY = "type_definition_key";
export const CREDENTIAL_JSON = "credential_json";

export const MODAL_ROOT_ID = "modal-root";

export const UNNAMED_CHAT = "New Chat";

export const DEFAULT_AGENT_ID = 0;
export const GENERAL_ASSISTANT_ID = -1;
export const IMAGE_ASSISTANT_ID = -2;
export const ART_ASSISTANT_ID = -3;

// Used in the File Picker to show a max number of files.
// The rest will be hidden behind an "All Recent Files" button.
export const MAX_FILES_TO_SHOW = 3;

// SIZES
export const MOBILE_SIDEBAR_BREAKPOINT_PX = 640;
export const DESKTOP_SMALL_BREAKPOINT_PX = 912;
export const DESKTOP_MEDIUM_BREAKPOINT_PX = 1232;
export const DEFAULT_AVATAR_SIZE_PX = 18;
export const HORIZON_DISTANCE_PX = 800;
export const DEFAULT_LOGO_SIZE_PX = 24;

export const DEFAULT_CONTEXT_TOKENS = 120_000;
export const MAX_CHUNKS_FED_TO_CHAT = 25;

export const APP_SLOGAN = "Open Source AI Platform";


================================================
FILE: web/src/lib/contains.ts
================================================
import { RefObject } from "react";

interface SomeNonNestedObject {
  [key: string]: any;
}

export function objectsAreEquivalent(
  a: SomeNonNestedObject,
  b: SomeNonNestedObject
): boolean {
  // NOTE: only works for non-nested objects
  const aProps = Object.getOwnPropertyNames(a);
  const bProps = Object.getOwnPropertyNames(b);

  if (aProps.length !== bProps.length) {
    return false;
  }

  for (let i = 0; i < aProps.length; i++) {
    const propName = aProps[i];
    if (propName === undefined) {
      continue;
    }

    if (a[propName] !== b[propName]) {
      return false;
    }
  }

  return true;
}

export function containsObject(
  list: SomeNonNestedObject[],
  obj: SomeNonNestedObject
): boolean {
  // NOTE: only works for non-nested objects
  return list.some((item) => objectsAreEquivalent(item, obj));
}

export function isEventWithinRef(
  event: MouseEvent | TouchEvent,
  ref: RefObject<HTMLElement>
): boolean {
  if (!ref.current) return false;

  const rect = ref.current.getBoundingClientRect();

  let clientX: number;
  let clientY: number;
  if (event instanceof TouchEvent) {
    const touches_0 = event.touches[0];
    if (touches_0 === undefined) {
      throw new Error("Touch event must exist!");
    }
    clientX = touches_0.clientX;
    clientY = touches_0.clientY;
  } else {
    clientX = event.clientX;
    clientY = event.clientY;
  }

  return (
    clientX >= rect.left &&
    clientX <= rect.right &&
    clientY >= rect.top &&
    clientY <= rect.bottom
  );
}


================================================
FILE: web/src/lib/credential.ts
================================================
import {
  CredentialBase,
  CredentialWithPrivateKey,
} from "./connectors/credentials";
import { AccessType, ProcessingMode } from "@/lib/types";
import { TypedFile } from "./connectors/fileTypes";
import {
  CREDENTIAL_NAME,
  CREDENTIAL_SOURCE,
  CREDENTIAL_UPLOADED_FILE,
  CREDENTIAL_FIELD_KEY,
  CREDENTIAL_TYPE_DEFINITION_KEY,
  CREDENTIAL_JSON,
} from "./constants";

export async function createCredential(credential: CredentialBase<any>) {
  return await fetch(`/api/manage/credential`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(credential),
  });
}

export async function createCredentialWithPrivateKey(
  credential: CredentialWithPrivateKey<any>
) {
  const formData = new FormData();
  formData.append(CREDENTIAL_JSON, JSON.stringify(credential.credential_json));
  formData.append("admin_public", credential.admin_public.toString());
  formData.append(
    "curator_public",
    credential.curator_public?.toString() || "false"
  );
  if (credential.groups && credential.groups.length > 0) {
    credential.groups.forEach((group) => {
      formData.append("groups", String(group));
    });
  }
  formData.append(CREDENTIAL_NAME, credential.name || "");
  formData.append(CREDENTIAL_SOURCE, credential.source);
  if (credential.private_key) {
    formData.append(CREDENTIAL_UPLOADED_FILE, credential.private_key.file);
    formData.append(CREDENTIAL_FIELD_KEY, credential.private_key.fieldKey);
    formData.append(
      CREDENTIAL_TYPE_DEFINITION_KEY,
      credential.private_key.typeDefinition.category
    );
  }
  return await fetch(`/api/manage/credential/private-key`, {
    method: "POST",
    body: formData,
  });
}

export async function adminDeleteCredential<T>(credentialId: number) {
  return await fetch(`/api/manage/admin/credential/${credentialId}`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
  });
}

export async function deleteCredential<T>(
  credentialId: number,
  force?: boolean
) {
  return await fetch(`/api/manage/credential/${credentialId}`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
  });
}

export async function forceDeleteCredential<T>(credentialId: number) {
  return await fetch(`/api/manage/credential/force/${credentialId}`, {
    method: "DELETE",
    headers: {
      "Content-Type": "application/json",
    },
  });
}

export function linkCredential(
  connectorId: number,
  credentialId: number,
  name: string,
  accessType?: AccessType,
  groups?: number[],
  autoSyncOptions?: Record<string, any>,
  processingMode?: ProcessingMode
) {
  return fetch(
    `/api/manage/connector/${connectorId}/credential/${credentialId}`,
    {
      method: "PUT",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        name,
        access_type: accessType !== undefined ? accessType : "public",
        groups: groups || null,
        auto_sync_options: autoSyncOptions || null,
        processing_mode: processingMode || "REGULAR",
      }),
    }
  );
}

export function updateCredential(credentialId: number, newDetails: any) {
  const name = newDetails.name;
  const details = Object.fromEntries(
    Object.entries(newDetails).filter(
      ([key, value]) => key !== CREDENTIAL_NAME && value !== ""
    )
  );
  return fetch(`/api/manage/admin/credential/${credentialId}`, {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      name: name,
      credential_json: details,
    }),
  });
}

export function updateCredentialWithPrivateKey(
  credentialId: number,
  newDetails: any,
  privateKey: TypedFile
) {
  const name = newDetails.name;
  const details = Object.fromEntries(
    Object.entries(newDetails).filter(
      ([key, value]) => key !== CREDENTIAL_NAME && value !== ""
    )
  );
  const formData = new FormData();
  formData.append(CREDENTIAL_NAME, name);
  formData.append(CREDENTIAL_JSON, JSON.stringify(details));
  formData.append(CREDENTIAL_UPLOADED_FILE, privateKey.file);
  formData.append(CREDENTIAL_FIELD_KEY, privateKey.fieldKey);
  formData.append(
    CREDENTIAL_TYPE_DEFINITION_KEY,
    privateKey.typeDefinition.category
  );
  return fetch(`/api/manage/admin/credential/private-key/${credentialId}`, {
    method: "PUT",
    body: formData,
  });
}

export function swapCredential(
  newCredentialId: number,
  connectorId: number,
  accessType: AccessType
) {
  return fetch(`/api/manage/admin/credential/swap`, {
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      new_credential_id: newCredentialId,
      connector_id: connectorId,
      access_type: accessType,
    }),
  });
}


================================================
FILE: web/src/lib/dateUtils.ts
================================================
"use client";

import { useEffect } from "react";
import { useState } from "react";

export const useNightTime = () => {
  const [isNight, setIsNight] = useState(false);

  useEffect(() => {
    const checkNightTime = () => {
      const currentHour = new Date().getHours();
      setIsNight(currentHour >= 18 || currentHour < 6);
    };

    checkNightTime();
    const interval = setInterval(checkNightTime, 60000); // Check every minute

    return () => clearInterval(interval);
  }, []);

  return { isNight };
};

export function getXDaysAgo(daysAgo: number) {
  const today = new Date();
  const daysAgoDate = new Date(today);
  daysAgoDate.setDate(today.getDate() - daysAgo);
  return daysAgoDate;
}

export function getXYearsAgo(yearsAgo: number) {
  const today = new Date();
  const yearsAgoDate = new Date(today);
  yearsAgoDate.setFullYear(yearsAgoDate.getFullYear() - yearsAgo);
  return yearsAgoDate;
}

export function normalizeDate(date: Date): Date {
  const normalizedDate = new Date(date);
  normalizedDate.setHours(0, 0, 0, 0);
  return normalizedDate;
}

export function isAfterDate(date: Date, maxDate: Date): boolean {
  return normalizeDate(date).getTime() > normalizeDate(maxDate).getTime();
}

export function isDateInFuture(date: Date): boolean {
  return isAfterDate(date, new Date());
}

export const timestampToDateString = (timestamp: string) => {
  const date = new Date(timestamp);
  const year = date.getFullYear();
  const month = date.getMonth() + 1; // getMonth() is zero-based
  const day = date.getDate();

  const formattedDate = `${year}-${month.toString().padStart(2, "0")}-${day
    .toString()
    .padStart(2, "0")}`;
  return formattedDate;
};

// Options for formatting the date
const dateOptions: Intl.DateTimeFormatOptions = {
  year: "numeric",
  month: "2-digit",
  day: "2-digit",
};

// Options for formatting the time
const timeOptions: Intl.DateTimeFormatOptions = {
  hour: "numeric",
  minute: "2-digit",
  hour12: true, // Use 12-hour format with AM/PM
};

export const timestampToReadableDate = (timestamp: string) => {
  const date = new Date(timestamp);
  return (
    date.toLocaleDateString(undefined, dateOptions) +
    ", " +
    date.toLocaleTimeString(undefined, timeOptions)
  );
};

export const buildDateString = (date: Date | null) => {
  return date
    ? `${Math.round(
        (new Date().getTime() - date.getTime()) / (1000 * 60 * 60 * 24)
      )} days ago`
    : "Select a time range";
};

export const getFormattedDateRangeString = (
  from: Date | null,
  to: Date | null
) => {
  if (!from || !to) return null;

  const options: Intl.DateTimeFormatOptions = {
    month: "short",
    day: "numeric",
    year: "numeric",
  };
  const fromString = from.toLocaleDateString("en-US", options);
  const toString = to.toLocaleDateString("en-US", options);

  return `${fromString} - ${toString}`;
};

export const getDateRangeString = (from: Date | null, to: Date | null) => {
  if (!from || !to) return null;

  const now = new Date();
  const fromDiffMs = now.getTime() - from.getTime();
  const toDiffMs = now.getTime() - to.getTime();

  const fromDiffDays = Math.floor(fromDiffMs / (1000 * 60 * 60 * 24));
  const toDiffDays = Math.floor(toDiffMs / (1000 * 60 * 60 * 24));

  const fromString = getTimeAgoString(from);
  const toString = getTimeAgoString(to);

  if (fromString === toString) return fromString;

  if (toDiffDays === 0) {
    return `${fromString} - Today`;
  }

  return `${fromString} - ${toString}`;
};

export const getTimeAgoString = (date: Date | null) => {
  if (!date) return null;

  const now = new Date();
  const diffMs = now.getTime() - date.getTime();
  const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
  const diffWeeks = Math.floor(diffDays / 7);
  const diffMonths = Math.floor(diffDays / 30);

  if (now.toDateString() === date.toDateString()) return "Today";
  if (diffDays === 1) return "Yesterday";
  if (diffDays < 7) return `${diffDays}d ago`;
  if (diffDays < 30) return `${diffWeeks}w ago`;
  return `${diffMonths}mo ago`;
};

/**
 * Format a date to short format like "Jan 27, 2026".
 * Always shows date, never time.
 */
export const formatDateShort = (dateStr: string | null | undefined): string => {
  if (!dateStr) return "—";
  return new Date(dateStr).toLocaleDateString("en-US", {
    month: "short",
    day: "numeric",
    year: "numeric",
  });
};

/**
 * Format an ISO timestamp as "YYYY/MM/DD HH:MM:SS" (24-hour, local time).
 * Intended for log displays where full precision is needed.
 */
export function formatDateTimeLog(iso: string): string {
  const d = new Date(iso);
  const pad = (n: number) => String(n).padStart(2, "0");
  return `${d.getFullYear()}/${pad(d.getMonth() + 1)}/${pad(d.getDate())} ${pad(
    d.getHours()
  )}:${pad(d.getMinutes())}:${pad(d.getSeconds())}`;
}

/**
 * Format an ISO timestamp as "HH:MM:SS" (24-hour, local time).
 * Intended for compact time-only displays.
 */
export function formatTimeOnly(iso: string): string {
  return new Date(iso).toLocaleTimeString(undefined, {
    hour: "2-digit",
    minute: "2-digit",
    second: "2-digit",
    hour12: false,
  });
}

export function formatMmDdYyyy(d: string): string {
  const date = new Date(d);
  return `${date.getMonth() + 1}/${date.getDate()}/${date.getFullYear()}`;
}

/**
 * Format a duration in seconds as MM:SS (e.g. 65 → "01:05").
 */
export function formatElapsedTime(totalSeconds: number): string {
  const minutes = Math.floor(totalSeconds / 60);
  const seconds = totalSeconds % 60;
  return `${minutes.toString().padStart(2, "0")}:${seconds
    .toString()
    .padStart(2, "0")}`;
}

export const getFormattedDateTime = (date: Date | null) => {
  if (!date) return null;

  const now = new Date();
  const isToday = now.toDateString() === date.toDateString();

  if (isToday) {
    // If it's today, return the time in format like "3:45 PM"
    return date.toLocaleTimeString("en-US", {
      hour: "numeric",
      minute: "2-digit",
      hour12: true,
    });
  } else {
    // Otherwise return the date in format like "Jan 15, 2023"
    return date.toLocaleDateString("en-US", {
      month: "short",
      day: "numeric",
      year: "numeric",
    });
  }
};


================================================
FILE: web/src/lib/documentDeletion.ts
================================================
import { toast } from "@/hooks/useToast";
import { DeletionAttemptSnapshot } from "./types";

export async function scheduleDeletionJobForConnector(
  connectorId: number,
  credentialId: number
) {
  // Will schedule a background job which will:
  // 1. Remove all documents indexed by the connector / credential pair
  // 2. Remove the connector (if this is the only pair using the connector)
  const response = await fetch(`/api/manage/admin/deletion-attempt`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      connector_id: connectorId,
      credential_id: credentialId,
    }),
  });
  if (response.ok) {
    return null;
  }
  return (await response.json()).detail;
}

export async function deleteCCPair(
  connectorId: number,
  credentialId: number,
  onCompletion?: () => void
) {
  const deletionScheduleError = await scheduleDeletionJobForConnector(
    connectorId,
    credentialId
  );
  if (deletionScheduleError) {
    throw new Error(deletionScheduleError);
  }
  toast.success("Scheduled deletion of connector!");
  onCompletion?.();
}

export function isCurrentlyDeleting(
  deletionAttempt: DeletionAttemptSnapshot | null
) {
  if (!deletionAttempt) {
    return false;
  }

  return (
    deletionAttempt.status === "PENDING" || deletionAttempt.status === "STARTED"
  );
}


================================================
FILE: web/src/lib/documentUtils.ts
================================================
import { OnyxDocument } from "./search/interfaces";

export function removeDuplicateDocs(
  documents: OnyxDocument[],
  agentic?: boolean,
  relevance?: any
) {
  const seen = new Set<string>();
  const output: OnyxDocument[] = [];
  documents.forEach((document) => {
    if (
      document.document_id &&
      !seen.has(document.document_id) &&
      (!agentic || (agentic && relevance && relevance[document.document_id]))
    ) {
      output.push(document);
      seen.add(document.document_id);
    }
  });
  return output;
}


================================================
FILE: web/src/lib/download.ts
================================================
/**
 * Trigger a browser file download.
 *
 * Supports two modes:
 *  1. **From content** — pass `content` (string) and optional `mimeType`.
 *     A Blob is created, downloaded, and the object URL is revoked.
 *  2. **From URL** — pass `url` (string). The browser navigates to the
 *     URL with the `download` attribute set.
 */
export function downloadFile(
  filename: string,
  opts: { content: string; mimeType?: string } | { url: string }
): void {
  const a = document.createElement("a");

  if ("content" in opts) {
    const blob = new Blob([opts.content], {
      type: opts.mimeType ?? "text/plain",
    });
    const url = URL.createObjectURL(blob);
    a.href = url;
    a.download = filename;
    document.body.appendChild(a);
    a.click();
    document.body.removeChild(a);
    setTimeout(() => URL.revokeObjectURL(url), 0);
  } else {
    a.href = opts.url;
    a.download = filename;
    document.body.appendChild(a);
    a.click();
    document.body.removeChild(a);
  }
}


================================================
FILE: web/src/lib/drag/constants.ts
================================================
export const CHAT_SESSION_ID_KEY = "chatSessionId";


================================================
FILE: web/src/lib/error.ts
================================================
/**
 * Extract a human-readable error message from an SWR error object.
 * SWR errors from `errorHandlingFetcher` attach `info.message` or `info.detail`.
 */
export function getErrorMsg(
  error: { info?: { message?: string; detail?: string } } | null | undefined,
  fallback = "An unknown error occurred"
): string {
  return error?.info?.message || error?.info?.detail || fallback;
}


================================================
FILE: web/src/lib/extension/constants.ts
================================================
export const darkExtensionImages = [
  "https://images.unsplash.com/photo-1692520883599-d543cfe6d43d?q=80&w=2666&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D",
  "https://images.unsplash.com/photo-1520330461350-508fab483d6a?q=80&w=2723&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D",
];

export const lightExtensionImages = [
  "https://images.unsplash.com/photo-1473830439578-14e9a9e61d55?q=80&w=2670&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D",
  "https://images.unsplash.com/photo-1500964757637-c85e8a162699?q=80&w=2703&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D",
  "https://images.unsplash.com/photo-1475924156734-496f6cac6ec1?q=80&w=2670&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D",
];

// Chrome message types
export const CHROME_MESSAGE = {
  PREFERENCES_UPDATED: "PREFERENCES_UPDATED",
  ONYX_APP_LOADED: "ONYX_APP_LOADED",
  SET_DEFAULT_NEW_TAB: "SET_DEFAULT_NEW_TAB",
  LOAD_NEW_CHAT_PAGE: "LOAD_NEW_CHAT_PAGE",
  LOAD_NEW_PAGE: "LOAD_NEW_PAGE",
  AUTH_REQUIRED: "AUTH_REQUIRED",
  TAB_READING_ENABLED: "TAB_READING_ENABLED",
  TAB_READING_DISABLED: "TAB_READING_DISABLED",
  TAB_URL_UPDATED: "TAB_URL_UPDATED",
};

export const SUBMIT_MESSAGE_TYPES = {
  PAGE_CHANGE: "PAGE_CHANGE",
};

export const LocalStorageKeys = {
  THEME: "onyxTheme",
  LIGHT_BG_URL: "lightBgUrl",
  DARK_BG_URL: "darkBgUrl",
  SHORTCUTS: "shortCuts",
  SHOW_SHORTCUTS: "showShortcuts",
  USE_ONYX_AS_NEW_TAB: "useOnyxAsDefaultNewTab",
};

export const SEARCH_PARAMS = {
  DEFAULT_SIDEBAR_OFF: "defaultSidebarOff",
};

export const NO_AUTH_USER_ID = "__no_auth_user__";


================================================
FILE: web/src/lib/extension/utils.ts
================================================
import { useEffect } from "react";
import { CHROME_MESSAGE } from "./constants";

export type ExtensionContext = "new_tab" | "side_panel" | null;

// Returns the origin of the Chrome extension panel (our parent frame).
// window.location.ancestorOrigins is Chrome-specific and only populated
// when the page is loaded inside an iframe (e.g. the Chrome extension panel).
// Falls back to "*" in regular browser contexts (no parent frame).
export function getPanelOrigin(): string {
  return window.location.ancestorOrigins?.[0] ?? "*";
}

export function getExtensionContext(): {
  isExtension: boolean;
  context: ExtensionContext;
} {
  if (typeof window === "undefined")
    return { isExtension: false, context: null };

  const pathname = window.location.pathname;
  if (pathname.includes("/nrf/side-panel")) {
    return { isExtension: true, context: "side_panel" };
  }
  if (pathname.includes("/nrf")) {
    return { isExtension: true, context: "new_tab" };
  }
  return { isExtension: false, context: null };
}
export function sendSetDefaultNewTabMessage(value: boolean) {
  if (typeof window !== "undefined" && window.parent !== window) {
    window.parent.postMessage(
      { type: CHROME_MESSAGE.SET_DEFAULT_NEW_TAB, value },
      getPanelOrigin()
    );
  }
}

export const sendAuthRequiredMessage = () => {
  if (typeof window !== "undefined" && window.parent !== window) {
    window.parent.postMessage(
      { type: CHROME_MESSAGE.AUTH_REQUIRED },
      getPanelOrigin()
    );
  }
};

export const useSendAuthRequiredMessage = () => {
  useEffect(() => {
    sendAuthRequiredMessage();
  }, []);
};

export const sendMessageToParent = () => {
  if (typeof window !== "undefined" && window.parent !== window) {
    window.parent.postMessage(
      { type: CHROME_MESSAGE.ONYX_APP_LOADED },
      getPanelOrigin()
    );
  }
};
export const useSendMessageToParent = () => {
  useEffect(() => {
    sendMessageToParent();
  }, []);
};


================================================
FILE: web/src/lib/fetchUtils.ts
================================================
export const getErrorMsg = async (response: Response) => {
  if (response.ok) {
    return null;
  }
  const responseJson = await response.json();
  return responseJson.message || responseJson.detail || "Unknown error";
};


================================================
FILE: web/src/lib/fetcher.ts
================================================
export class FetchError extends Error {
  status: number;
  info: any;
  constructor(message: string, status: number, info: any) {
    super(message);
    this.status = status;
    this.info = info;
  }
}

export class RedirectError extends FetchError {
  constructor(message: string, status: number, info: any) {
    super(message, status, info);
  }
}

const DEFAULT_AUTH_ERROR_MSG =
  "An error occurred while fetching the data, related to the user's authentication status.";

const DEFAULT_ERROR_MSG = "An error occurred while fetching the data.";

/**
 * SWR `onErrorRetry` callback that suppresses automatic retries for
 * authentication errors (401/403). Pass this to any SWR hook whose endpoint
 * requires auth so that unauthenticated pages don't spam the backend.
 */
export const skipRetryOnAuthError: NonNullable<
  import("swr").SWRConfiguration["onErrorRetry"]
> = (error, _key, _config, revalidate, { retryCount }) => {
  if (
    error instanceof FetchError &&
    (error.status === 401 || error.status === 403)
  )
    return;
  // For non-auth errors, retry with exponential backoff
  if (
    _config.errorRetryCount !== undefined &&
    retryCount >= _config.errorRetryCount
  )
    return;
  const delay = Math.min(2000 * 2 ** retryCount, 30000);
  setTimeout(() => revalidate({ retryCount }), delay);
};

export const errorHandlingFetcher = async <T>(url: string): Promise<T> => {
  const res = await fetch(url);

  if (res.status === 403) {
    const redirect = new RedirectError(
      DEFAULT_AUTH_ERROR_MSG,
      res.status,
      await res.json()
    );
    throw redirect;
  }

  if (!res.ok) {
    const error = new FetchError(
      DEFAULT_ERROR_MSG,
      res.status,
      await res.json()
    );
    throw error;
  }

  return res.json();
};


================================================
FILE: web/src/lib/fileConnector.ts
================================================
export interface ConnectorFileInfo {
  file_id: string;
  file_name: string;
  file_size?: number;
  upload_date?: string;
}

export interface ConnectorFilesResponse {
  files: ConnectorFileInfo[];
}

export interface FileUploadResponse {
  file_paths: string[];
  file_names: string[];
  zip_metadata_file_id: string | null;
}

export async function updateConnectorFiles(
  connectorId: number,
  fileIdsToRemove: string[],
  filesToAdd: File[]
): Promise<void> {
  const formData = new FormData();

  // Add files to remove as JSON
  formData.append("file_ids_to_remove", JSON.stringify(fileIdsToRemove));

  // Add new files
  filesToAdd.forEach((file) => {
    formData.append("files", file);
  });

  const response = await fetch(
    `/api/manage/admin/connector/${connectorId}/files/update`,
    {
      method: "POST",
      body: formData,
    }
  );

  if (!response.ok) {
    const error = await response.json();
    throw new Error(
      `Failed to update connector files (${response.status}): ${
        error.detail || "Unknown error"
      }`
    );
  }
}


================================================
FILE: web/src/lib/filters.ts
================================================
import { Persona } from "@/app/admin/agents/interfaces";
import { DocumentSetSummary, ValidSources } from "./types";
import { getSourcesForPersona } from "./sources";

export function computeAvailableFilters({
  selectedPersona,
  availableSources,
  availableDocumentSets,
}: {
  selectedPersona: Persona | undefined | null;
  availableSources: ValidSources[];
  availableDocumentSets: DocumentSetSummary[];
}): [ValidSources[], DocumentSetSummary[]] {
  const finalAvailableSources =
    selectedPersona && selectedPersona.document_sets.length
      ? getSourcesForPersona(selectedPersona)
      : availableSources;

  // only display document sets that are available to the persona
  // in filters
  const personaDocumentSetIds =
    selectedPersona && selectedPersona.document_sets.length
      ? selectedPersona.document_sets.map((documentSet) => documentSet.id)
      : null;
  const finalAvailableDocumentSets = personaDocumentSetIds
    ? availableDocumentSets.filter((documentSet) =>
        personaDocumentSetIds.includes(documentSet.id)
      )
    : availableDocumentSets;

  return [finalAvailableSources, finalAvailableDocumentSets];
}


================================================
FILE: web/src/lib/generated/README.md
================================================
- Generated Files
* Generated files live here. This directory should be git ignored.


================================================
FILE: web/src/lib/gmail.ts
================================================
import { Credential } from "./connectors/credentials";

export const setupGmailOAuth = async ({
  isAdmin,
}: {
  isAdmin: boolean;
}): Promise<[string | null, string]> => {
  const credentialCreationResponse = await fetch("/api/manage/credential", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      admin_public: isAdmin,
      credential_json: {},
      source: "gmail",
    }),
  });
  if (!credentialCreationResponse.ok) {
    return [
      null,
      `Failed to create credential - ${credentialCreationResponse.status}`,
    ];
  }
  const credential =
    (await credentialCreationResponse.json()) as Credential<{}>;

  const authorizationUrlResponse = await fetch(
    `/api/manage/connector/gmail/authorize/${credential.id}`
  );
  if (!authorizationUrlResponse.ok) {
    return [
      null,
      `Failed to create credential - ${authorizationUrlResponse.status}`,
    ];
  }
  const authorizationUrlJson = (await authorizationUrlResponse.json()) as {
    auth_url: string;
  };

  return [authorizationUrlJson.auth_url, ""];
};


================================================
FILE: web/src/lib/googleConnector.ts
================================================
import useSWR, { mutate } from "swr";
import { FetchError, errorHandlingFetcher } from "@/lib/fetcher";
import { Credential } from "@/lib/connectors/credentials";
import { ConnectorSnapshot } from "@/lib/connectors/connectors";
import { ValidSources } from "@/lib/types";
import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib";
import { SWR_KEYS } from "@/lib/swr-keys";

// Constants for service names to avoid typos
export const GOOGLE_SERVICES = {
  GMAIL: "gmail",
  GOOGLE_DRIVE: "google-drive",
} as const;

export const useGoogleAppCredential = (service: "gmail" | "google_drive") => {
  const endpoint = `/api/manage/admin/connector/${
    service === "gmail" ? GOOGLE_SERVICES.GMAIL : GOOGLE_SERVICES.GOOGLE_DRIVE
  }/app-credential`;

  return useSWR<{ client_id: string }, FetchError>(
    endpoint,
    errorHandlingFetcher
  );
};

export const useGoogleServiceAccountKey = (
  service: "gmail" | "google_drive"
) => {
  const endpoint = `/api/manage/admin/connector/${
    service === "gmail" ? GOOGLE_SERVICES.GMAIL : GOOGLE_SERVICES.GOOGLE_DRIVE
  }/service-account-key`;

  return useSWR<{ service_account_email: string }, FetchError>(
    endpoint,
    errorHandlingFetcher
  );
};

export const useGoogleCredentials = (
  source: ValidSources.Gmail | ValidSources.GoogleDrive
) => {
  return useSWR<Credential<any>[]>(
    buildSimilarCredentialInfoURL(source),
    errorHandlingFetcher,
    { refreshInterval: 5000 }
  );
};

export const useConnectorsByCredentialId = (credential_id: number | null) => {
  let url: string | null = null;
  if (credential_id !== null) {
    url = `/api/manage/admin/connector?credential=${credential_id}`;
  }
  const swrResponse = useSWR<ConnectorSnapshot[]>(url, errorHandlingFetcher);

  return {
    ...swrResponse,
    refreshConnectorsByCredentialId: () => mutate(url),
  };
};

export const checkCredentialsFetched = (
  appCredentialData: any,
  appCredentialError: FetchError | undefined,
  serviceAccountKeyData: any,
  serviceAccountKeyError: FetchError | undefined
) => {
  const appCredentialSuccessfullyFetched =
    appCredentialData ||
    (appCredentialError && appCredentialError.status === 404);

  const serviceAccountKeySuccessfullyFetched =
    serviceAccountKeyData ||
    (serviceAccountKeyError && serviceAccountKeyError.status === 404);

  return {
    appCredentialSuccessfullyFetched,
    serviceAccountKeySuccessfullyFetched,
  };
};

export const filterUploadedCredentials = <
  T extends { authentication_method?: string },
>(
  credentials: Credential<T>[] | undefined
): { credential_id: number | null; uploadedCredentials: Credential<T>[] } => {
  let credential_id = null;
  let uploadedCredentials: Credential<T>[] = [];

  if (credentials) {
    uploadedCredentials = credentials.filter(
      (credential) =>
        credential.credential_json.authentication_method !== "oauth_interactive"
    );

    if (uploadedCredentials.length > 0 && uploadedCredentials[0]) {
      credential_id = uploadedCredentials[0].id;
    }
  }

  return { credential_id, uploadedCredentials };
};

export const checkConnectorsExist = (
  connectors: ConnectorSnapshot[] | undefined
): boolean => {
  return !!connectors && connectors.length > 0;
};

export const refreshAllGoogleData = (
  source: ValidSources.Gmail | ValidSources.GoogleDrive
) => {
  mutate(buildSimilarCredentialInfoURL(source));

  const service =
    source === ValidSources.Gmail
      ? GOOGLE_SERVICES.GMAIL
      : GOOGLE_SERVICES.GOOGLE_DRIVE;
  mutate(SWR_KEYS.googleConnectorAppCredential(service));
  mutate(SWR_KEYS.googleConnectorServiceAccountKey(service));
};


================================================
FILE: web/src/lib/googleDrive.ts
================================================
import { Credential } from "./connectors/credentials";

export const setupGoogleDriveOAuth = async ({
  isAdmin,
  name,
}: {
  isAdmin: boolean;
  name: string;
}): Promise<[string | null, string]> => {
  const credentialCreationResponse = await fetch("/api/manage/credential", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      admin_public: isAdmin,
      credential_json: {},
      source: "google_drive",
      name: name,
    }),
  });

  if (!credentialCreationResponse.ok) {
    return [
      null,
      `Failed to create credential - ${credentialCreationResponse.status}`,
    ];
  }
  const credential =
    (await credentialCreationResponse.json()) as Credential<{}>;

  const authorizationUrlResponse = await fetch(
    `/api/manage/connector/google-drive/authorize/${credential.id}`
  );
  if (!authorizationUrlResponse.ok) {
    return [
      null,
      `Failed to create credential - ${authorizationUrlResponse.status}`,
    ];
  }

  const authorizationUrlJson = (await authorizationUrlResponse.json()) as {
    auth_url: string;
  };

  return [authorizationUrlJson.auth_url, ""];
};


================================================
FILE: web/src/lib/headers/fetchHeaderDataSS.ts
================================================
import { CombinedSettings } from "@/interfaces/settings";
import { ChatSession, toChatSession } from "@/app/app/interfaces";
import { fetchSettingsSS } from "@/components/settings/lib";
import { fetchBackendChatSessionSS } from "@/lib/chat/fetchBackendChatSessionSS";

export interface HeaderData {
  settings: CombinedSettings | null;
  chatSession: ChatSession | null;
}

export async function fetchHeaderDataSS(
  chatSessionId?: string
): Promise<HeaderData> {
  const [settings, backendChatSession] = await Promise.all([
    fetchSettingsSS(),
    chatSessionId
      ? fetchBackendChatSessionSS(chatSessionId)
      : Promise.resolve(null),
  ]);
  const chatSession = backendChatSession
    ? toChatSession(backendChatSession)
    : null;

  return {
    settings,
    chatSession,
  };
}


================================================
FILE: web/src/lib/hierarchy/interfaces.ts
================================================
import { ValidSources } from "@/lib/types";

// Sort options for document pagination
export type DocumentSortField = "name" | "last_updated";
export type DocumentSortDirection = "asc" | "desc";
export type FolderPosition = "on_top" | "mixed";

// Hierarchy Node types matching backend models
export interface HierarchyNodeSummary {
  id: number;
  title: string;
  link: string | null;
  parent_id: number | null;
}

export interface HierarchyNodesRequest {
  source: ValidSources;
}

export interface HierarchyNodesResponse {
  nodes: HierarchyNodeSummary[];
}

// Document types for hierarchy
export interface DocumentPageCursor {
  // Fields for last_updated sorting
  last_modified?: string | null;
  last_synced?: string | null;
  // Field for name sorting
  name?: string | null;
  // Document ID for tie-breaking (always required)
  document_id: string;
}

export interface HierarchyNodeDocumentsRequest {
  parent_hierarchy_node_id: number;
  cursor?: DocumentPageCursor | null;
  sort_field?: DocumentSortField;
  sort_direction?: DocumentSortDirection;
  folder_position?: FolderPosition;
}

export interface DocumentSummary {
  id: string;
  title: string;
  link: string | null;
  parent_id: number | null;
  last_modified: string | null;
  last_synced: string | null;
}

export interface HierarchyNodeDocumentsResponse {
  documents: DocumentSummary[];
  next_cursor: DocumentPageCursor | null;
  page_size: number;
  sort_field: DocumentSortField;
  sort_direction: DocumentSortDirection;
  folder_position: FolderPosition;
}

// Connected source type for display
export interface ConnectedSource {
  source: ValidSources;
  connectorCount: number;
}

// Union type for folders and documents in hierarchy tables
export type HierarchyItem =
  | { type: "folder"; data: HierarchyNodeSummary }
  | { type: "document"; data: DocumentSummary };

// Props for hierarchy breadcrumb navigation
export interface HierarchyBreadcrumbProps {
  source: ValidSources;
  path: HierarchyNodeSummary[];
  onNavigateToRoot: () => void;
  onNavigateToNode: (node: HierarchyNodeSummary, index: number) => void;
}


================================================
FILE: web/src/lib/hierarchy/svc.ts
================================================
import { ValidSources } from "@/lib/types";
import {
  HierarchyNodesResponse,
  HierarchyNodeDocumentsRequest,
  HierarchyNodeDocumentsResponse,
} from "./interfaces";

const HIERARCHY_NODES_PREFIX = "/api/hierarchy-nodes";

async function extractErrorDetail(
  response: Response,
  fallback: string
): Promise<string> {
  try {
    const body = await response.json();
    if (body.detail) return body.detail;
  } catch {
    // JSON parsing failed — fall through to fallback
  }
  return fallback;
}

export async function fetchHierarchyNodes(
  source: ValidSources
): Promise<HierarchyNodesResponse> {
  const response = await fetch(
    `${HIERARCHY_NODES_PREFIX}?source=${encodeURIComponent(source)}`
  );

  if (!response.ok) {
    const detail = await extractErrorDetail(
      response,
      `Failed to fetch hierarchy nodes: ${response.statusText}`
    );
    throw new Error(detail);
  }

  return response.json();
}

export async function fetchHierarchyNodeDocuments(
  request: HierarchyNodeDocumentsRequest
): Promise<HierarchyNodeDocumentsResponse> {
  const response = await fetch(`${HIERARCHY_NODES_PREFIX}/documents`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(request),
  });

  if (!response.ok) {
    const detail = await extractErrorDetail(
      response,
      `Failed to fetch hierarchy node documents: ${response.statusText}`
    );
    throw new Error(detail);
  }

  return response.json();
}


================================================
FILE: web/src/lib/hooks/useCaptcha.ts
================================================
/**
 * Hook for Google reCAPTCHA v3 integration.
 *
 * Usage:
 * 1. Add NEXT_PUBLIC_RECAPTCHA_SITE_KEY to your environment
 * 2. Include the reCAPTCHA script in your page/layout
 * 3. Use the hook to get a captcha token before form submission
 *
 * Example:
 * ```tsx
 * const { getCaptchaToken, isCaptchaEnabled } = useCaptcha();
 *
 * const handleSubmit = async () => {
 *   const token = await getCaptchaToken('signup');
 *   await basicSignup(email, password, referralSource, token);
 * };
 * ```
 */

import { useCallback, useEffect, useState } from "react";

// Declare the global grecaptcha object
declare global {
  interface Window {
    grecaptcha?: {
      ready: (callback: () => void) => void;
      execute: (
        siteKey: string,
        options: { action: string }
      ) => Promise<string>;
    };
  }
}

const RECAPTCHA_SITE_KEY = process.env.NEXT_PUBLIC_RECAPTCHA_SITE_KEY || "";

export function useCaptcha() {
  const [isLoaded, setIsLoaded] = useState(false);

  const isCaptchaEnabled = Boolean(RECAPTCHA_SITE_KEY);

  useEffect(() => {
    if (!isCaptchaEnabled) {
      return;
    }

    const scriptSrc = `https://www.google.com/recaptcha/api.js?render=${RECAPTCHA_SITE_KEY}`;

    // Check if the script is already loaded
    if (window.grecaptcha) {
      window.grecaptcha.ready(() => {
        setIsLoaded(true);
      });
      return;
    }

    // Check if the script is already in the DOM (loading but not yet executed)
    const existingScript = document.querySelector(`script[src="${scriptSrc}"]`);
    if (existingScript) {
      // Script exists but hasn't loaded yet, wait for it
      existingScript.addEventListener("load", () => {
        if (window.grecaptcha) {
          window.grecaptcha.ready(() => {
            setIsLoaded(true);
          });
        }
      });
      return;
    }

    // Load the reCAPTCHA script
    const script = document.createElement("script");
    script.src = scriptSrc;
    script.async = true;
    script.defer = true;

    script.onload = () => {
      if (window.grecaptcha) {
        window.grecaptcha.ready(() => {
          setIsLoaded(true);
        });
      }
    };

    document.head.appendChild(script);

    return () => {
      // Cleanup is tricky with reCAPTCHA, so we leave the script in place
    };
  }, [isCaptchaEnabled]);

  const getCaptchaToken = useCallback(
    async (action: string = "submit"): Promise<string | undefined> => {
      if (!isCaptchaEnabled) {
        return undefined;
      }

      if (!isLoaded || !window.grecaptcha) {
        console.warn("reCAPTCHA not loaded yet");
        return undefined;
      }

      try {
        const token = await window.grecaptcha.execute(RECAPTCHA_SITE_KEY, {
          action,
        });
        return token;
      } catch (error) {
        console.error("Failed to execute reCAPTCHA:", error);
        return undefined;
      }
    },
    [isCaptchaEnabled, isLoaded]
  );

  return {
    getCaptchaToken,
    isCaptchaEnabled,
    isLoaded,
  };
}


================================================
FILE: web/src/lib/hooks/useCustomAnalyticsEnabled.ts
================================================
import { CUSTOM_ANALYTICS_ENABLED } from "@/lib/constants";

export type CustomAnalyticsStatus = {
  customAnalyticsEnabled: boolean;
  isLoading: boolean;
};

/**
 * Hook to check if custom analytics is enabled.
 * Returns the status and loading state for consistency with other hooks.
 * Since this is based on an environment variable, there's no actual loading state.
 */
export function useCustomAnalyticsEnabled(): CustomAnalyticsStatus {
  return {
    customAnalyticsEnabled: CUSTOM_ANALYTICS_ENABLED,
    isLoading: false,
  };
}


================================================
FILE: web/src/lib/hooks/useDocumentSets.ts
================================================
import useSWR from "swr";
import { DocumentSetSummary } from "@/lib/types";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";

export function useDocumentSets() {
  const { data, error, mutate } = useSWR<DocumentSetSummary[]>(
    SWR_KEYS.documentSets,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 60000,
    }
  );

  return {
    documentSets: data ?? [],
    isLoading: !error && !data,
    error,
    refresh: mutate,
  };
}


================================================
FILE: web/src/lib/hooks/useForcedTools.ts
================================================
import { create } from "zustand";

interface ForcedToolsState {
  forcedToolIds: number[];
  setForcedToolIds: (ids: number[]) => void;
  toggleForcedTool: (id: number) => void;
  clearForcedTools: () => void;
}

/**
 * Zustand store for managing forced tool IDs.
 * This is local UI state - tools that are forced to be used in the next message.
 *
 * When a tool is "forced", it will be included in the next chat request
 * regardless of whether the LLM would normally choose to use it.
 */
export const useForcedTools = create<ForcedToolsState>((set, get) => ({
  forcedToolIds: [],

  setForcedToolIds: (ids) => set({ forcedToolIds: ids }),

  toggleForcedTool: (id) => {
    const { forcedToolIds } = get();
    if (forcedToolIds.includes(id)) {
      // If clicking already forced tool, clear all forced tools
      set({ forcedToolIds: [] });
    } else {
      // Replace with single forced tool
      set({ forcedToolIds: [id] });
    }
  },

  clearForcedTools: () => set({ forcedToolIds: [] }),
}));


================================================
FILE: web/src/lib/hooks/useLLMProviderOptions.ts
================================================
import useSWR from "swr";
import { WellKnownLLMProviderDescriptor } from "@/interfaces/llm";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";

export function useLLMProviderOptions() {
  const { data, error, mutate } = useSWR<
    WellKnownLLMProviderDescriptor[] | undefined
  >(SWR_KEYS.wellKnownLlmProviders, errorHandlingFetcher, {
    revalidateOnFocus: false,
    revalidateIfStale: false,
    dedupingInterval: 60000,
  });

  return {
    llmProviderOptions: data,
    isLoading: !error && !data,
    error,
    refetch: mutate,
  };
}


================================================
FILE: web/src/lib/hooks/useLLMProviders.test.ts
================================================
import useSWR from "swr";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import { errorHandlingFetcher } from "@/lib/fetcher";

jest.mock("swr", () => ({
  __esModule: true,
  default: jest.fn(),
}));

jest.mock("@/lib/fetcher", () => ({
  errorHandlingFetcher: jest.fn(),
}));

const mockUseSWR = useSWR as jest.MockedFunction<typeof useSWR>;

describe("useLLMProviders", () => {
  beforeEach(() => {
    mockUseSWR.mockReset();
  });

  test("uses public providers endpoint when personaId is not provided", () => {
    const mockMutate = jest.fn();
    mockUseSWR.mockReturnValue({
      data: undefined,
      error: undefined,
      mutate: mockMutate,
      isValidating: false,
    } as any);

    const result = useLLMProviders();

    expect(mockUseSWR).toHaveBeenCalledWith(
      "/api/llm/provider",
      errorHandlingFetcher,
      expect.objectContaining({
        revalidateOnFocus: false,
        dedupingInterval: 60000,
      })
    );
    expect(result.isLoading).toBe(true);
    expect(result.refetch).toBe(mockMutate);
  });

  test("uses persona-specific providers endpoint when personaId is provided", () => {
    const mockMutate = jest.fn();
    const providers = [{ name: "Persona Provider" }];
    mockUseSWR.mockReturnValue({
      data: { providers, default_text: null, default_vision: null },
      error: undefined,
      mutate: mockMutate,
      isValidating: false,
    } as any);

    const result = useLLMProviders(42);

    expect(mockUseSWR).toHaveBeenCalledWith(
      "/api/llm/persona/42/providers",
      errorHandlingFetcher,
      expect.objectContaining({
        revalidateOnFocus: false,
        dedupingInterval: 60000,
      })
    );
    expect(result.llmProviders).toBe(providers);
    expect(result.isLoading).toBe(false);
    expect(result.refetch).toBe(mockMutate);
  });

  test("reports not loading when SWR returns an error", () => {
    mockUseSWR.mockReturnValue({
      data: undefined,
      error: new Error("request failed"),
      mutate: jest.fn(),
      isValidating: false,
    } as any);

    const result = useLLMProviders();

    expect(result.isLoading).toBe(false);
    expect(result.error).toBeInstanceOf(Error);
  });
});


================================================
FILE: web/src/lib/hooks/useProjects.ts
================================================
import useSWR from "swr";
import { Project } from "@/app/app/projects/projectsService";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";

export function useProjects() {
  const { data, error, mutate } = useSWR<Project[]>(
    SWR_KEYS.userProjects,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      revalidateIfStale: false,
      dedupingInterval: 30000,
    }
  );

  return {
    projects: data ?? [],
    isLoading: !error && !data,
    error,
    refreshProjects: mutate,
  };
}


================================================
FILE: web/src/lib/hooks/useToolOAuthStatus.ts
================================================
import { useCallback, useEffect, useRef } from "react";
import useSWR from "swr";
import { errorHandlingFetcher, skipRetryOnAuthError } from "@/lib/fetcher";
import { initiateOAuthFlow } from "@/lib/oauth/api";
import { OAuthTokenStatus, ToolSnapshot } from "@/lib/tools/interfaces";
import { SWR_KEYS } from "@/lib/swr-keys";

export interface ToolAuthStatus {
  // whether or not the user has EVER auth'd
  hasToken: boolean;
  // whether or not the user's current token is expired
  isTokenExpired: boolean;
}

export function useToolOAuthStatus(agentId?: number) {
  const {
    data: oauthTokenStatuses = [],
    isLoading: loading,
    error: swrError,
    mutate,
  } = useSWR<OAuthTokenStatus[]>(
    SWR_KEYS.oauthTokenStatus,
    errorHandlingFetcher,
    {
      revalidateOnFocus: false,
      dedupingInterval: 60_000,
      onErrorRetry: skipRetryOnAuthError,
      onError: (err) =>
        console.error("[useToolOAuthStatus] fetch failed:", err),
    }
  );

  const error: string | null = swrError
    ? swrError instanceof Error
      ? swrError.message
      : "An error occurred"
    : null;

  // Re-validate when the active agent changes so the UI reflects fresh token
  // state for the new agent's tools without waiting for the dedup interval.
  const prevAgentIdRef = useRef(agentId);
  useEffect(() => {
    if (prevAgentIdRef.current !== agentId) {
      prevAgentIdRef.current = agentId;
      mutate();
    }
  }, [agentId, mutate]);

  /**
   * Get OAuth status for a specific tool
   */
  const getToolAuthStatus = useCallback(
    (tool: ToolSnapshot): ToolAuthStatus | undefined => {
      if (!tool.oauth_config_id) return undefined;

      const status = oauthTokenStatuses.find(
        (s) => s.oauth_config_id === tool.oauth_config_id
      );

      if (!status)
        return {
          hasToken: false,
          isTokenExpired: false,
        };

      return {
        hasToken: true,
        isTokenExpired: status.is_expired,
      };
    },
    [oauthTokenStatuses]
  );

  /**
   * Initiate OAuth authentication flow for a tool
   */
  const authenticateTool = useCallback(
    async (tool: ToolSnapshot): Promise<void> => {
      if (!tool.oauth_config_id) {
        throw new Error("Tool does not have OAuth configuration");
      }

      try {
        await initiateOAuthFlow(
          tool.oauth_config_id,
          window.location.pathname + window.location.search
        );
      } catch (err) {
        console.error("Error initiating OAuth flow:", err);
        throw err;
      }
    },
    []
  );

  /**
   * Get all tools that need authentication from a list
   */
  const getToolsNeedingAuth = useCallback(
    (tools: ToolSnapshot[]): ToolSnapshot[] => {
      return tools.filter((tool) => !getToolAuthStatus(tool));
    },
    [getToolAuthStatus]
  );

  return {
    oauthTokenStatuses,
    loading,
    error,
    getToolAuthStatus,
    authenticateTool,
    getToolsNeedingAuth,
    refetch: () => mutate(),
  };
}


================================================
FILE: web/src/lib/hooks.llmResolver.test.ts
================================================
import {
  getDefaultLlmDescriptor,
  getValidLlmDescriptorForProviders,
} from "@/lib/hooks";
import { structureValue } from "@/lib/llmConfig/utils";
import { LLMProviderDescriptor } from "@/interfaces/llm";
import { makeProvider } from "@tests/setup/llmProviderTestUtils";

describe("LLM resolver helpers", () => {
  test("chooses provider-specific descriptor when model names collide", () => {
    const sharedModel = "shared-runtime-model";
    const providers: LLMProviderDescriptor[] = [
      makeProvider({
        id: 1,
        name: "OpenAI Provider",
        provider: "openai",
        model_configurations: [
          {
            name: sharedModel,
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
      makeProvider({
        id: 2,
        name: "Anthropic Provider",
        provider: "anthropic",
        model_configurations: [
          {
            name: sharedModel,
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
    ];

    const descriptor = getValidLlmDescriptorForProviders(
      structureValue("Anthropic Provider", "anthropic", sharedModel),
      providers
    );

    expect(descriptor).toEqual({
      name: "Anthropic Provider",
      provider: "anthropic",
      modelName: sharedModel,
    });
  });

  test("falls back to default provider when model is unavailable", () => {
    const providers: LLMProviderDescriptor[] = [
      makeProvider({
        id: 10,
        name: "Default OpenAI",
        provider: "openai",
        model_configurations: [
          {
            name: "gpt-4o-mini",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: true,
            supports_reasoning: false,
          },
        ],
      }),
      makeProvider({
        id: 20,
        name: "Anthropic Backup",
        provider: "anthropic",
        model_configurations: [
          {
            name: "claude-3-5-sonnet",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: true,
            supports_reasoning: false,
          },
        ],
      }),
    ];

    const descriptor = getValidLlmDescriptorForProviders(
      "unknown-model-name",
      providers
    );

    expect(descriptor).toEqual({
      name: "Default OpenAI",
      provider: "openai",
      modelName: "gpt-4o-mini",
    });
  });

  test("prefers provider by name when multiple share the same type", () => {
    const providers: LLMProviderDescriptor[] = [
      makeProvider({
        id: 1,
        name: "Anthropic",
        provider: "anthropic",
        model_configurations: [
          {
            name: "claude-sonnet-4-5",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
      makeProvider({
        id: 2,
        name: "PersonalAnthropicToken",
        provider: "anthropic",
        model_configurations: [
          {
            name: "claude-sonnet-4-5",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
    ];

    const descriptor = getValidLlmDescriptorForProviders(
      structureValue(
        "PersonalAnthropicToken",
        "anthropic",
        "claude-sonnet-4-5"
      ),
      providers
    );

    expect(descriptor).toEqual({
      name: "PersonalAnthropicToken",
      provider: "anthropic",
      modelName: "claude-sonnet-4-5",
    });
  });

  test("uses first provider with models when no explicit default exists", () => {
    const providers: LLMProviderDescriptor[] = [
      makeProvider({
        id: 30,
        name: "First Provider",
        provider: "openai",
        model_configurations: [
          {
            name: "gpt-first",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
      makeProvider({
        id: 40,
        name: "Second Provider",
        provider: "anthropic",
        model_configurations: [
          {
            name: "claude-second",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
    ];

    expect(getDefaultLlmDescriptor(providers)).toEqual({
      name: "First Provider",
      provider: "openai",
      modelName: "gpt-first",
    });
  });
});


================================================
FILE: web/src/lib/hooks.ts
================================================
"use client";

import {
  DocumentBoostStatus,
  Tag,
  UserGroup,
  ConnectorStatus,
  CCPairBasicInfo,
  FederatedConnectorDetail,
  ValidSources,
  ConnectorIndexingStatusLiteResponse,
  IndexingStatusRequest,
} from "@/lib/types";
import useSWR, { mutate, useSWRConfig } from "swr";
import { errorHandlingFetcher } from "./fetcher";
import {
  useCallback,
  useContext,
  useEffect,
  useMemo,
  useRef,
  useState,
} from "react";
import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { SourceMetadata } from "./search/interfaces";
import { parseLlmDescriptor } from "./llmConfig/utils";
import { ChatSession } from "@/app/app/interfaces";
import { Credential } from "./connectors/credentials";
import { SettingsContext } from "@/providers/SettingsProvider";
import {
  MinimalPersonaSnapshot,
  PersonaLabel,
} from "@/app/admin/agents/interfaces";
import { DefaultModel, LLMProviderDescriptor } from "@/interfaces/llm";
import { isAnthropic } from "@/app/admin/configuration/llm/utils";
import { getSourceMetadataForSources } from "./sources";
import { AuthType, NEXT_PUBLIC_CLOUD_ENABLED } from "./constants";
import { useUser } from "@/providers/UserProvider";
import { SEARCH_TOOL_ID } from "@/app/app/components/tools/constants";
import { updateTemperatureOverrideForChatSession } from "@/app/app/services/lib";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import { SWR_KEYS } from "@/lib/swr-keys";

export const usePublicCredentials = () => {
  const { mutate } = useSWRConfig();
  const swrResponse = useSWR<Credential<any>[]>(
    SWR_KEYS.adminCredentials,
    errorHandlingFetcher
  );

  return {
    ...swrResponse,
    refreshCredentials: () => mutate(SWR_KEYS.adminCredentials),
  };
};

const buildReactedDocsUrl = (ascending: boolean, limit: number) => {
  return `/api/manage/admin/doc-boosts?ascending=${ascending}&limit=${limit}`;
};

export const useMostReactedToDocuments = (
  ascending: boolean,
  limit: number
) => {
  const url = buildReactedDocsUrl(ascending, limit);
  const swrResponse = useSWR<DocumentBoostStatus[]>(url, errorHandlingFetcher);

  return {
    ...swrResponse,
    refreshDocs: () => mutate(url),
  };
};

export const useObjectState = <T>(
  initialValue: T
): [T, (update: Partial<T>) => void] => {
  const [state, setState] = useState<T>(initialValue);
  const set = (update: Partial<T>) => {
    setState((prevState) => {
      return {
        ...prevState,
        ...update,
      };
    });
  };
  return [state, set];
};

export const useConnectorIndexingStatusWithPagination = (
  filters: Omit<IndexingStatusRequest, "source" | "source_to_page"> = {},
  refreshInterval = 30000,
  enabled: boolean = true
) => {
  const { mutate } = useSWRConfig();
  //maintains the current page for each source
  const [sourcePages, setSourcePages] = useState<Record<ValidSources, number>>(
    {} as Record<ValidSources, number>
  );
  const [mergedData, setMergedData] = useState<
    ConnectorIndexingStatusLiteResponse[]
  >([]);
  //maintains the loading state for each source
  const [sourceLoadingStates, setSourceLoadingStates] = useState<
    Record<ValidSources, boolean>
  >({} as Record<ValidSources, boolean>);

  //ref to maintain the current source pages for the main request
  const sourcePagesRef = useRef(sourcePages);
  sourcePagesRef.current = sourcePages;

  // Main request that includes current pagination state
  const mainRequest: IndexingStatusRequest = useMemo(
    () => ({
      secondary_index: false,
      access_type_filters: [],
      last_status_filters: [],
      docs_count_operator: null,
      docs_count_value: null,
      ...filters,
    }),
    [filters]
  );

  const swrKey = enabled
    ? [SWR_KEYS.indexingStatus, JSON.stringify(mainRequest)]
    : null;

  // Main data fetch with auto-refresh
  const { data, isLoading, error } = useSWR<
    ConnectorIndexingStatusLiteResponse[]
  >(
    swrKey,
    () => fetchConnectorIndexingStatus(mainRequest, sourcePagesRef.current),
    {
      refreshInterval,
    }
  );

  // Update merged data when main data changes
  useEffect(() => {
    if (data) {
      setMergedData(data);
    }
  }, [data]);

  // Function to handle page changes for a specific source
  const handlePageChange = useCallback(
    async (source: ValidSources, page: number) => {
      // Update the source page state
      setSourcePages((prev) => ({ ...prev, [source]: page }));

      const sourceRequest: IndexingStatusRequest = {
        ...filters,
        source: source,
        source_to_page: { [source]: page } as Record<ValidSources, number>,
      };
      setSourceLoadingStates((prev) => ({ ...prev, [source]: true }));

      try {
        const sourceData = await fetchConnectorIndexingStatus(sourceRequest);
        if (sourceData && sourceData.length > 0) {
          setMergedData((prevData) =>
            prevData
              .map((existingSource) =>
                existingSource.source === source
                  ? sourceData[0]
                  : existingSource
              )
              .filter(
                (item): item is ConnectorIndexingStatusLiteResponse =>
                  item !== undefined
              )
          );
        }
      } catch (error) {
        console.error(
          `Failed to fetch page ${page} for source ${source}:`,
          error
        );
      } finally {
        setSourceLoadingStates((prev) => ({ ...prev, [source]: false }));
      }
    },
    [filters]
  );

  // Function to refresh all data (maintains current pagination)
  const refreshAllData = useCallback(() => {
    if (swrKey) mutate(swrKey);
  }, [mutate, swrKey]);

  // Reset pagination when filters change (but not search)
  const resetPagination = useCallback(() => {
    setSourcePages({} as Record<ValidSources, number>);
  }, []);

  return {
    data: mergedData,
    isLoading,
    error,
    handlePageChange,
    sourcePages,
    sourceLoadingStates,
    refreshAllData,
    resetPagination,
  };
};

export const useConnectorStatus = (
  refreshInterval = 30000,
  enabled: boolean = true
) => {
  const { mutate } = useSWRConfig();
  const url = SWR_KEYS.adminConnectorStatus;
  const swrResponse = useSWR<ConnectorStatus<any, any>[]>(
    enabled ? url : null,
    errorHandlingFetcher,
    { refreshInterval: refreshInterval }
  );

  return {
    ...swrResponse,
    refreshIndexingStatus: enabled ? () => mutate(url) : () => {},
  };
};

export const useBasicConnectorStatus = (enabled: boolean = true) => {
  const url = SWR_KEYS.connectorStatus;
  const swrResponse = useSWR<CCPairBasicInfo[]>(
    enabled ? url : null,
    errorHandlingFetcher
  );
  return {
    ...swrResponse,
    refreshIndexingStatus: enabled ? () => mutate(url) : () => {},
  };
};

export const useFederatedConnectors = () => {
  const { mutate } = useSWRConfig();
  const url = SWR_KEYS.federatedConnectors;
  const swrResponse = useSWR<FederatedConnectorDetail[]>(
    url,
    errorHandlingFetcher
  );

  return {
    ...swrResponse,
    refreshFederatedConnectors: () => mutate(url),
  };
};

export const useLabels = () => {
  const { mutate } = useSWRConfig();
  const { data: labels, error } = useSWR<PersonaLabel[]>(
    SWR_KEYS.personaLabels,
    errorHandlingFetcher
  );

  const refreshLabels = async () => {
    return mutate(SWR_KEYS.personaLabels);
  };

  const createLabel = async (name: string): Promise<PersonaLabel | null> => {
    const response = await fetch(SWR_KEYS.personaLabels, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ name }),
    });

    if (!response.ok) {
      return null;
    }

    const newLabel: PersonaLabel = await response.json();
    mutate(
      SWR_KEYS.personaLabels,
      (currentLabels: PersonaLabel[] | undefined) => [
        ...(currentLabels || []),
        newLabel,
      ],
      false
    );
    return newLabel;
  };

  const updateLabel = async (id: number, name: string) => {
    const response = await fetch(`/api/admin/persona/label/${id}`, {
      method: "PATCH",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ label_name: name }),
    });

    if (response.ok) {
      mutate(
        SWR_KEYS.personaLabels,
        labels?.map((label) => (label.id === id ? { ...label, name } : label)),
        false
      );
    }

    return response;
  };

  const deleteLabel = async (id: number) => {
    const response = await fetch(`/api/admin/persona/label/${id}`, {
      method: "DELETE",
      headers: { "Content-Type": "application/json" },
    });

    if (response.ok) {
      mutate(
        SWR_KEYS.personaLabels,
        labels?.filter((label) => label.id !== id),
        false
      );
    }

    return response;
  };

  return {
    labels,
    error,
    refreshLabels,
    createLabel,
    updateLabel,
    deleteLabel,
  };
};

export const useTimeRange = (initialValue?: DateRangePickerValue) => {
  return useState<DateRangePickerValue | null>(null);
};

export interface FilterManager {
  timeRange: DateRangePickerValue | null;
  setTimeRange: React.Dispatch<
    React.SetStateAction<DateRangePickerValue | null>
  >;
  selectedSources: SourceMetadata[];
  setSelectedSources: React.Dispatch<React.SetStateAction<SourceMetadata[]>>;
  selectedDocumentSets: string[];
  setSelectedDocumentSets: React.Dispatch<React.SetStateAction<string[]>>;
  selectedTags: Tag[];
  setSelectedTags: React.Dispatch<React.SetStateAction<Tag[]>>;
  getFilterString: () => string;
  buildFiltersFromQueryString: (
    filterString: string,
    availableSources: SourceMetadata[],
    availableDocumentSets: string[],
    availableTags: Tag[]
  ) => void;
  clearFilters: () => void;
}

export function useFilters(): FilterManager {
  const [timeRange, setTimeRange] = useTimeRange();
  const [selectedSources, setSelectedSources] = useState<SourceMetadata[]>([]);
  const [selectedDocumentSets, setSelectedDocumentSets] = useState<string[]>(
    []
  );
  const [selectedTags, setSelectedTags] = useState<Tag[]>([]);

  function getFilterString() {
    const params = new URLSearchParams();

    if (timeRange) {
      params.set("from", timeRange.from.toISOString());
      params.set("to", timeRange.to.toISOString());
    }

    if (selectedSources.length > 0) {
      const sourcesParam = selectedSources
        .map((source) => encodeURIComponent(source.internalName))
        .join(",");
      params.set("sources", sourcesParam);
    }

    if (selectedDocumentSets.length > 0) {
      const docSetsParam = selectedDocumentSets
        .map((ds) => encodeURIComponent(ds))
        .join(",");
      params.set("documentSets", docSetsParam);
    }

    if (selectedTags.length > 0) {
      const tagsParam = selectedTags
        .map((tag) => encodeURIComponent(tag.tag_value))
        .join(",");
      params.set("tags", tagsParam);
    }

    const queryString = params.toString();
    return queryString ? `&${queryString}` : "";
  }

  function clearFilters() {
    setTimeRange(null);
    setSelectedSources([]);
    setSelectedDocumentSets([]);
    setSelectedTags([]);
  }

  function buildFiltersFromQueryString(
    filterString: string,
    availableSources: SourceMetadata[],
    availableDocumentSets: string[],
    availableTags: Tag[]
  ): void {
    const params = new URLSearchParams(filterString);

    // Parse the "from" parameter as a DateRangePickerValue
    let newTimeRange: DateRangePickerValue | null = null;
    const fromParam = params.get("from");
    const toParam = params.get("to");
    if (fromParam && toParam) {
      const fromDate = new Date(fromParam);
      const toDate = new Date(toParam);
      if (!isNaN(fromDate.getTime()) && !isNaN(toDate.getTime())) {
        newTimeRange = { from: fromDate, to: toDate, selectValue: "" };
      }
    }

    // Parse sources
    let newSelectedSources: SourceMetadata[] = [];
    const sourcesParam = params.get("sources");
    if (sourcesParam) {
      const sourceNames = sourcesParam.split(",").map(decodeURIComponent);
      newSelectedSources = availableSources.filter((source) =>
        sourceNames.includes(source.internalName)
      );
    }

    // Parse document sets
    let newSelectedDocSets: string[] = [];
    const docSetsParam = params.get("documentSets");
    if (docSetsParam) {
      const docSetNames = docSetsParam.split(",").map(decodeURIComponent);
      newSelectedDocSets = availableDocumentSets.filter((ds) =>
        docSetNames.includes(ds)
      );
    }

    // Parse tags
    let newSelectedTags: Tag[] = [];
    const tagsParam = params.get("tags");
    if (tagsParam) {
      const tagValues = tagsParam.split(",").map(decodeURIComponent);
      newSelectedTags = availableTags.filter((tag) =>
        tagValues.includes(tag.tag_value)
      );
    }

    // Update filter manager's values instead of returning
    setTimeRange(newTimeRange);
    setSelectedSources(newSelectedSources);
    setSelectedDocumentSets(newSelectedDocSets);
    setSelectedTags(newSelectedTags);
  }

  return {
    clearFilters,
    timeRange,
    setTimeRange,
    selectedSources,
    setSelectedSources,
    selectedDocumentSets,
    setSelectedDocumentSets,
    selectedTags,
    setSelectedTags,
    getFilterString,
    buildFiltersFromQueryString,
  };
}

export interface LlmDescriptor {
  name: string;
  provider: string;
  modelName: string;
}

export interface LlmManager {
  currentLlm: LlmDescriptor;
  updateCurrentLlm: (newOverride: LlmDescriptor) => void;
  temperature: number;
  updateTemperature: (temperature: number) => void;
  updateModelOverrideBasedOnChatSession: (chatSession?: ChatSession) => void;
  imageFilesPresent: boolean;
  updateImageFilesPresent: (present: boolean) => void;
  liveAgent: MinimalPersonaSnapshot | null;
  maxTemperature: number;
  llmProviders: LLMProviderDescriptor[] | undefined;
  isLoadingProviders: boolean;
  hasAnyProvider: boolean;
}

// Things to test
// 1. User override
// 2. User preference (defaults to system wide default if no preference set)
// 3. Current assistant
// 4. Current chat session
// 5. Live assistant

/*
LLM Override is as follows (i.e. this order)
- User override (explicitly set in the chat input bar)
- User preference (defaults to system wide default if no preference set)

On switching to an existing or new chat session or a different assistant:
- If we have a live assistant after any switch with a model override, use that- otherwise use the above hierarchy

Thus, the input should be
- User preference
- LLM Providers (which contain the system wide default)
- Current assistant

Changes take place as
- liveAgent or currentChatSession changes (and the associated model override is set)
- (updateCurrentLlm) User explicitly setting a model override (and we explicitly override and set the userSpecifiedOverride which we'll use in place of the user preferences unless overridden by an agent)

If we have a live assistant, we should use that model override

Relevant test: `llm_ordering.spec.ts`.

Temperature override is set as follows:
- For existing chat sessions:
  - If the user has previously overridden the temperature for a specific chat session,
    that value is persisted and used when the user returns to that chat.
  - This persistence applies even if the temperature was set before sending the first message in the chat.
- For new chat sessions:
  - If the search tool is available, the default temperature is set to 0.
  - If the search tool is not available, the default temperature is set to 0.5.

This approach ensures that user preferences are maintained for existing chats while
providing appropriate defaults for new conversations based on the available tools.
*/

export function getDefaultLlmDescriptor(
  llmProviders: LLMProviderDescriptor[],
  defaultText?: DefaultModel | null
): LlmDescriptor | null {
  if (defaultText) {
    const provider = llmProviders.find((p) => p.id === defaultText.provider_id);
    if (provider) {
      return {
        name: provider.name,
        provider: provider.provider,
        modelName: defaultText.model_name,
      };
    }
  }
  // Fallback: first provider with visible models
  const firstLlmProvider = llmProviders.find(
    (provider) => provider.model_configurations.length > 0
  );
  if (firstLlmProvider) {
    const firstModel = firstLlmProvider.model_configurations.find(
      (m) => m.is_visible
    );
    return {
      name: firstLlmProvider.name,
      provider: firstLlmProvider.provider,
      modelName: firstModel?.name ?? "",
    };
  }
  return null;
}

export function getValidLlmDescriptorForProviders(
  modelName: string | null | undefined,
  llmProviders: LLMProviderDescriptor[] | undefined | null
): LlmDescriptor {
  // Return early if providers haven't loaded yet (undefined/null)
  // Empty arrays are valid (user has no provider access for this assistant)
  if (llmProviders === undefined || llmProviders === null) {
    return { name: "", provider: "", modelName: "" };
  }

  if (modelName) {
    const model = parseLlmDescriptor(modelName);
    // If we have no parsed modelName, try to find the provider by the raw modelName string
    if (!(model.modelName && model.modelName.length > 0)) {
      const provider = llmProviders.find((p) =>
        p.model_configurations
          .map((modelConfiguration) => modelConfiguration.name)
          .includes(modelName)
      );
      if (provider) {
        return {
          modelName: modelName,
          name: provider.name,
          provider: provider.provider,
        };
      }
    }

    // If we have parsed provider info, try to find that specific provider.
    // This ensures we don't incorrectly match a model to the wrong provider
    // when the same model name exists across multiple providers (e.g., gpt-5 in Azure and OpenAI)
    if (model.provider && model.provider.length > 0) {
      const hasModel = (p: LLMProviderDescriptor) =>
        p.model_configurations.some((mc) => mc.name === model.modelName);
      const typeMatches = llmProviders.filter(
        (p) => p.provider === model.provider && hasModel(p)
      );
      // When multiple providers share the same type (e.g., two "anthropic"
      // providers with different API keys), prefer the one whose name matches
      // the user's explicit selection to avoid silently switching providers.
      const matchingProvider =
        typeMatches.find((p) => p.name === model.name) ?? typeMatches[0];
      if (matchingProvider) {
        return {
          ...model,
          name: matchingProvider.name,
          provider: matchingProvider.provider,
        };
      }
      // Provider info was present but not found - fall through to default
    } else {
      // Only search by model name when no provider info was parsed
      const provider = llmProviders.find((p) =>
        p.model_configurations
          .map((modelConfiguration) => modelConfiguration.name)
          .includes(model.modelName)
      );

      if (provider) {
        return { ...model, provider: provider.provider, name: provider.name };
      }
    }
  }

  // Model not found in available providers - fall back to default model
  return (
    getDefaultLlmDescriptor(llmProviders) ?? {
      name: "",
      provider: "",
      modelName: "",
    }
  );
}

export function useLlmManager(
  currentChatSession?: ChatSession,
  liveAgent?: MinimalPersonaSnapshot
): LlmManager {
  const { user } = useUser();

  // Get all user-accessible providers via SWR (general providers - no persona filter)
  // This includes public + all restricted providers user can access via groups
  const {
    llmProviders: allUserProviders,
    defaultText: allUserDefaultText,
    isLoading: isLoadingAllProviders,
  } = useLLMProviders();
  // Fetch persona-specific providers to enforce RBAC restrictions per assistant
  // Only fetch if we have an agent selected
  const personaId = liveAgent?.id !== undefined ? liveAgent.id : undefined;
  const {
    llmProviders: personaProviders,
    defaultText: personaDefaultText,
    isLoading: isLoadingPersonaProviders,
  } = useLLMProviders(personaId);

  const llmProviders =
    personaProviders !== undefined ? personaProviders : allUserProviders;
  const defaultText =
    personaProviders !== undefined ? personaDefaultText : allUserDefaultText;

  const [userHasManuallyOverriddenLLM, setUserHasManuallyOverriddenLLM] =
    useState(false);
  const [chatSession, setChatSession] = useState<ChatSession | null>(null);
  const [currentLlm, setCurrentLlm] = useState<LlmDescriptor>({
    name: "",
    provider: "",
    modelName: "",
  });

  // Track the previous assistant ID to detect when it changes
  const prevAgentIdRef = useRef<number | undefined>(undefined);

  // Reset manual override when switching to a different assistant
  useEffect(() => {
    if (
      liveAgent?.id !== undefined &&
      prevAgentIdRef.current !== undefined &&
      liveAgent.id !== prevAgentIdRef.current
    ) {
      // User switched to a different assistant - reset manual override
      setUserHasManuallyOverriddenLLM(false);
    }
    prevAgentIdRef.current = liveAgent?.id;
  }, [liveAgent?.id]);

  const llmUpdate = () => {
    /* Should be called when the live assistant or current chat session changes */

    // Don't update if providers haven't loaded yet (undefined/null)
    // Empty arrays are valid (user has no provider access for this assistant)
    if (llmProviders === undefined || llmProviders === null) {
      return;
    }

    // separate function so we can `return` to break out
    const _llmUpdate = () => {
      // if the user has overridden in this session and just switched to a brand
      // new session, use their manually specified model
      if (userHasManuallyOverriddenLLM && !currentChatSession) {
        return;
      }

      if (currentChatSession?.current_alternate_model) {
        setCurrentLlm(
          getValidLlmDescriptor(currentChatSession.current_alternate_model)
        );
      } else if (liveAgent?.llm_model_version_override) {
        setCurrentLlm(
          getValidLlmDescriptor(liveAgent.llm_model_version_override)
        );
      } else if (userHasManuallyOverriddenLLM) {
        // if the user has an override and there's nothing special about the
        // current chat session, use the override
        return;
      } else if (user?.preferences?.default_model) {
        setCurrentLlm(getValidLlmDescriptor(user.preferences.default_model));
      } else {
        const defaultLlm = getDefaultLlmDescriptor(llmProviders, defaultText);
        if (defaultLlm) {
          setCurrentLlm(defaultLlm);
        }
      }
    };

    _llmUpdate();
    setChatSession(currentChatSession || null);
  };

  function getValidLlmDescriptor(
    modelName: string | null | undefined
  ): LlmDescriptor {
    return getValidLlmDescriptorForProviders(modelName, llmProviders);
  }

  const [imageFilesPresent, setImageFilesPresent] = useState(false);

  const updateImageFilesPresent = (present: boolean) => {
    setImageFilesPresent(present);
  };

  // Manually set the LLM
  const updateCurrentLlm = (newLlm: LlmDescriptor) => {
    setCurrentLlm(newLlm);
    setUserHasManuallyOverriddenLLM(true);
  };

  const updateCurrentLlmToModelName = (modelName: string) => {
    setCurrentLlm(getValidLlmDescriptor(modelName));
    setUserHasManuallyOverriddenLLM(true);
  };

  const updateModelOverrideBasedOnChatSession = (chatSession?: ChatSession) => {
    if (chatSession && chatSession.current_alternate_model?.length > 0) {
      setCurrentLlm(getValidLlmDescriptor(chatSession.current_alternate_model));
    }
  };

  const [temperature, setTemperature] = useState<number>(() => {
    if (currentChatSession?.current_temperature_override != null) {
      // Derive Anthropic check from chat session since currentLlm isn't populated yet
      const sessionModel = currentChatSession.current_alternate_model
        ? parseLlmDescriptor(currentChatSession.current_alternate_model)
        : null;
      const isAnthropicModel = sessionModel
        ? isAnthropic(sessionModel.provider, sessionModel.modelName)
        : false;
      return Math.min(
        currentChatSession.current_temperature_override,
        isAnthropicModel ? 1.0 : 2.0
      );
    } else if (liveAgent?.tools.some((tool) => tool.name === SEARCH_TOOL_ID)) {
      return 0;
    }
    return 0.5;
  });

  const maxTemperature = useMemo(() => {
    // Check currentLlm first, fall back to chat session model if currentLlm isn't populated
    if (currentLlm.provider) {
      return isAnthropic(currentLlm.provider, currentLlm.modelName) ? 1.0 : 2.0;
    }
    const sessionModel = currentChatSession?.current_alternate_model
      ? parseLlmDescriptor(currentChatSession.current_alternate_model)
      : null;
    if (sessionModel?.provider) {
      return isAnthropic(sessionModel.provider, sessionModel.modelName)
        ? 1.0
        : 2.0;
    }
    return 2.0; // Default max when no model info available
  }, [currentLlm, currentChatSession]);

  useEffect(() => {
    if (isAnthropic(currentLlm.provider, currentLlm.modelName)) {
      const newTemperature = Math.min(temperature, 1.0);
      setTemperature(newTemperature);
      if (chatSession?.id) {
        updateTemperatureOverrideForChatSession(chatSession.id, newTemperature);
      }
    }
  }, [currentLlm]);

  useEffect(() => {
    llmUpdate();

    if (!chatSession && currentChatSession) {
      if (temperature) {
        updateTemperatureOverrideForChatSession(
          currentChatSession.id,
          temperature
        );
      }
      return;
    }

    if (currentChatSession?.current_temperature_override) {
      setTemperature(currentChatSession.current_temperature_override);
    } else if (liveAgent?.tools.some((tool) => tool.name === SEARCH_TOOL_ID)) {
      setTemperature(0);
    } else {
      setTemperature(0.5);
    }
  }, [
    liveAgent,
    currentChatSession,
    llmProviders,
    user?.preferences?.default_model,
  ]);

  const updateTemperature = (temperature: number) => {
    const clampedTemp = isAnthropic(currentLlm.provider, currentLlm.modelName)
      ? Math.min(temperature, 1.0)
      : temperature;
    setTemperature(clampedTemp);
    if (chatSession) {
      updateTemperatureOverrideForChatSession(chatSession.id, clampedTemp);
    }
  };

  // Track if any provider exists for the current persona context.
  // Uses the persona-aware list so chat input reflects actual access,
  // falling back to the global list when no persona is selected.
  const hasAnyProvider = (llmProviders?.length ?? 0) > 0;

  return {
    updateModelOverrideBasedOnChatSession,
    currentLlm,
    updateCurrentLlm,
    temperature,
    updateTemperature,
    imageFilesPresent,
    updateImageFilesPresent,
    liveAgent: liveAgent ?? null,
    maxTemperature,
    llmProviders,
    isLoadingProviders:
      isLoadingAllProviders ||
      (personaId !== undefined && isLoadingPersonaProviders),
    hasAnyProvider,
  };
}

export function useAuthType(): AuthType | null {
  const { data, error } = useSWR<{ auth_type: AuthType }>(
    SWR_KEYS.authType,
    errorHandlingFetcher
  );

  if (NEXT_PUBLIC_CLOUD_ENABLED) {
    return AuthType.CLOUD;
  }

  if (error || !data) {
    return null;
  }

  return data.auth_type;
}

/*
EE Only APIs
*/

export const useUserGroups = (): {
  data: UserGroup[] | undefined;
  isLoading: boolean;
  error: string;
  refreshUserGroups: () => void;
} => {
  const combinedSettings = useContext(SettingsContext);
  const isLoading = combinedSettings?.settingsLoading ?? false;
  const isPaidEnterpriseFeaturesEnabled =
    !isLoading &&
    combinedSettings &&
    combinedSettings.enterpriseSettings !== null;

  const swrResponse = useSWR<UserGroup[]>(
    isPaidEnterpriseFeaturesEnabled ? SWR_KEYS.adminUserGroups : null,
    errorHandlingFetcher
  );

  const refreshUserGroups = () => mutate(SWR_KEYS.adminUserGroups);

  if (isLoading) {
    return {
      data: undefined,
      isLoading: true,
      error: "",
      refreshUserGroups,
    };
  }

  if (!isPaidEnterpriseFeaturesEnabled) {
    return {
      data: [],
      isLoading: false,
      error: "",
      refreshUserGroups,
    };
  }

  return {
    ...swrResponse,
    refreshUserGroups,
  };
};

export const fetchConnectorIndexingStatus = async (
  request: IndexingStatusRequest = {},
  sourcePages: Record<ValidSources, number> | null = null
): Promise<ConnectorIndexingStatusLiteResponse[]> => {
  const response = await fetch(SWR_KEYS.indexingStatus, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      secondary_index: false,
      access_type_filters: [],
      last_status_filters: [],
      docs_count_operator: null,
      docs_count_value: null,
      source_to_page: sourcePages || {}, // Use current pagination state
      ...request,
    }),
  });

  if (!response.ok) {
    throw new Error(`HTTP error! status: ${response.status}`);
  }

  return response.json();
};

// Get source metadata for configured sources - deduplicated by source type
function getConfiguredSources(
  availableSources: ValidSources[]
): Array<SourceMetadata & { originalName: string; uniqueKey: string }> {
  const allSources = getSourceMetadataForSources(availableSources);

  const seenSources = new Set<string>();
  const configuredSources: Array<
    SourceMetadata & { originalName: string; uniqueKey: string }
  > = [];

  availableSources.forEach((sourceName) => {
    // Handle federated connectors by removing the federated_ prefix
    const cleanName = sourceName.replace("federated_", "");
    // Skip if we've already seen this source type
    if (seenSources.has(cleanName)) return;
    seenSources.add(cleanName);
    const source = allSources.find(
      (source) => source.internalName === cleanName
    );
    if (source) {
      configuredSources.push({
        ...source,
        originalName: sourceName,
        uniqueKey: cleanName,
      });
    }
  });
  return configuredSources;
}

interface UseSourcePreferencesProps {
  availableSources: ValidSources[];
  selectedSources: SourceMetadata[];
  setSelectedSources: (sources: SourceMetadata[]) => void;
}

interface SourcePreferencesSnapshot {
  sourcePreferences: Record<string, boolean>; // uniqueKey -> enabled status
}

const LS_SELECTED_INTERNAL_SEARCH_SOURCES_KEY = "selectedInternalSearchSources";

export function useSourcePreferences({
  availableSources,
  selectedSources,
  setSelectedSources,
}: UseSourcePreferencesProps) {
  const [sourcesInitialized, setSourcesInitialized] = useState(false);

  const configuredSources = useMemo(
    () => getConfiguredSources(availableSources),
    [availableSources]
  );

  // Load saved source preferences from localStorage
  const loadSavedSourcePreferences = (): SourcePreferencesSnapshot | null => {
    if (typeof window === "undefined") return null;
    const saved = localStorage.getItem(LS_SELECTED_INTERNAL_SEARCH_SOURCES_KEY);
    if (!saved) return null;
    try {
      const res = JSON.parse(saved);

      // Validate the snapshot structure
      if (
        typeof res !== "object" ||
        res === null ||
        typeof res.sourcePreferences !== "object" ||
        res.sourcePreferences === null ||
        Array.isArray(res.sourcePreferences)
      ) {
        return null;
      }

      // Validate that all values in sourcePreferences are booleans
      for (const value of Object.values(res.sourcePreferences)) {
        if (typeof value !== "boolean") {
          return null;
        }
      }

      return res as SourcePreferencesSnapshot;
    } catch {
      return null;
    }
  };

  const persistSourcePreferencesState = (
    enabledSources: SourceMetadata[],
    allKnownSources: SourceMetadata[]
  ) => {
    if (typeof window === "undefined") return;

    const enabledKeys = new Set(enabledSources.map((s) => s.uniqueKey));

    const snapshot: SourcePreferencesSnapshot = {
      sourcePreferences: Object.fromEntries(
        allKnownSources
          .filter((src) => src.uniqueKey !== undefined)
          .map((src) => [src.uniqueKey, enabledKeys.has(src.uniqueKey)])
      ),
    };

    localStorage.setItem(
      LS_SELECTED_INTERNAL_SEARCH_SOURCES_KEY,
      JSON.stringify(snapshot)
    );
  };

  // Initialize sources - load from localStorage or enable all by default
  useEffect(() => {
    if (!sourcesInitialized && availableSources.length > 0) {
      const savedSources = loadSavedSourcePreferences();

      if (savedSources !== null) {
        // Filter out saved sources that no longer exist
        const { sourcePreferences } = savedSources;

        // Helper to check if there is a preference for a key
        const hasPref = (key: string) =>
          Object.prototype.hasOwnProperty.call(sourcePreferences, key);

        // Get sources with no preference
        const newSources = configuredSources.filter((source) => {
          return !hasPref(source.uniqueKey);
        });

        const enabledSources = configuredSources.filter((source) => {
          return (
            hasPref(source.uniqueKey) && sourcePreferences[source.uniqueKey]
          );
        });

        // Merge valid saved sources with new sources (enable new sources by default)
        const mergedSources = [...enabledSources, ...newSources];
        setSelectedSources(mergedSources);

        // Persist the merged state
        persistSourcePreferencesState(mergedSources, configuredSources);
      } else {
        // First time user or invalid data - enable all sources by default
        setSelectedSources(configuredSources);
        persistSourcePreferencesState(configuredSources, configuredSources);
      }
      setSourcesInitialized(true);
    }
  }, [
    availableSources,
    configuredSources,
    sourcesInitialized,
    setSelectedSources,
  ]);

  const enableSources = (sources: SourceMetadata[]) => {
    setSelectedSources([...sources]);
    persistSourcePreferencesState(sources, configuredSources);
  };

  const enableAllSources = () => {
    enableSources(configuredSources);
  };

  const disableAllSources = () => {
    setSelectedSources([]);
    persistSourcePreferencesState([], configuredSources);
  };

  const toggleSource = (sourceUniqueKey: string) => {
    const configuredSource = configuredSources.find(
      (s) => s.uniqueKey === sourceUniqueKey
    );
    if (!configuredSource) return;

    const isCurrentlySelected = selectedSources.some(
      (s) => s.uniqueKey === configuredSource.uniqueKey
    );

    let newSources: SourceMetadata[];
    if (isCurrentlySelected) {
      newSources = selectedSources.filter(
        (s) => s.uniqueKey !== configuredSource.uniqueKey
      );
    } else {
      newSources = [...selectedSources, configuredSource];
    }

    setSelectedSources(newSources);
    persistSourcePreferencesState(newSources, configuredSources);
  };

  const isSourceEnabled = (sourceUniqueKey: string) => {
    const configuredSource = configuredSources.find(
      (s) => s.uniqueKey === sourceUniqueKey
    );
    if (!configuredSource) return false;
    return selectedSources.some(
      (s: SourceMetadata) => s.uniqueKey === configuredSource.uniqueKey
    );
  };

  return {
    sourcesInitialized,
    enableSources,
    enableAllSources,
    disableAllSources,
    toggleSource,
    isSourceEnabled,
  };
}


================================================
FILE: web/src/lib/indexAttempt.ts
================================================
import { IndexAttemptSnapshot } from "./types";

export const getDocsProcessedPerMinute = (
  indexAttempt: IndexAttemptSnapshot | null
): number | null => {
  if (
    !indexAttempt ||
    !indexAttempt.time_started ||
    !indexAttempt.time_updated ||
    indexAttempt.total_docs_indexed === 0
  ) {
    return null;
  }

  const timeStarted = new Date(indexAttempt.time_started);
  const timeUpdated = new Date(indexAttempt.time_updated);
  const timeDiff = timeUpdated.getTime() - timeStarted.getTime();
  const seconds = timeDiff / 1000;
  return (indexAttempt.total_docs_indexed / seconds) * 60;
};


================================================
FILE: web/src/lib/languages.test.ts
================================================
import {
  getCodeLanguage,
  getDataLanguage,
  getLanguageByMime,
  isMarkdownFile,
} from "./languages";

describe("getCodeLanguage", () => {
  it.each([
    ["app.py", "python"],
    ["index.ts", "typescript"],
    ["main.go", "go"],
    ["style.css", "css"],
    ["page.html", "html"],
    ["App.vue", "vue"],
    ["lib.rs", "rust"],
    ["main.cpp", "c++"],
    ["util.c", "c"],
    ["script.js", "javascript"],
  ])("%s → %s", (filename, expected) => {
    expect(getCodeLanguage(filename)).toBe(expected);
  });

  it.each([
    [".h", "c"],
    [".inc", "php"],
    [".m", "objective-c"],
    [".re", "reason"],
  ])("override: %s → %s", (ext, expected) => {
    expect(getCodeLanguage(`file${ext}`)).toBe(expected);
  });

  it("resolves by exact filename when there is no extension", () => {
    expect(getCodeLanguage("Dockerfile")).toBe("dockerfile");
    expect(getCodeLanguage("Makefile")).toBe("makefile");
  });

  it("is case-insensitive for filenames", () => {
    expect(getCodeLanguage("INDEX.JS")).toBe("javascript");
    expect(getCodeLanguage("dockerfile")).toBe("dockerfile");
  });

  it("returns null for unknown extensions", () => {
    expect(getCodeLanguage("file.xyz123")).toBeNull();
  });

  it("excludes markdown extensions", () => {
    expect(getCodeLanguage("README.md")).toBeNull();
    expect(getCodeLanguage("notes.markdown")).toBeNull();
  });
});

describe("getDataLanguage", () => {
  it.each([
    ["config.json", "json"],
    ["config.yaml", "yaml"],
    ["config.yml", "yaml"],
    ["config.toml", "toml"],
    ["data.xml", "xml"],
    ["data.csv", "csv"],
  ])("%s → %s", (filename, expected) => {
    expect(getDataLanguage(filename)).toBe(expected);
  });

  it("returns null for code files", () => {
    expect(getDataLanguage("app.py")).toBeNull();
    expect(getDataLanguage("header.h")).toBeNull();
    expect(getDataLanguage("view.m")).toBeNull();
    expect(getDataLanguage("component.re")).toBeNull();
  });
});

describe("isMarkdownFile", () => {
  it("recognises markdown extensions", () => {
    expect(isMarkdownFile("README.md")).toBe(true);
    expect(isMarkdownFile("doc.markdown")).toBe(true);
  });

  it("is case-insensitive", () => {
    expect(isMarkdownFile("NOTES.MD")).toBe(true);
  });

  it("rejects non-markdown files", () => {
    expect(isMarkdownFile("app.py")).toBe(false);
    expect(isMarkdownFile("data.json")).toBe(false);
  });
});

describe("getLanguageByMime", () => {
  it("resolves known MIME types", () => {
    expect(getLanguageByMime("text/x-python")).toBe("python");
    expect(getLanguageByMime("text/javascript")).toBe("javascript");
  });

  it("strips parameters before matching", () => {
    expect(getLanguageByMime("text/x-python; charset=utf-8")).toBe("python");
  });

  it("returns null for unknown MIME types", () => {
    expect(getLanguageByMime("application/x-unknown-thing")).toBeNull();
  });
});


================================================
FILE: web/src/lib/languages.ts
================================================
import * as languages from "linguist-languages";

const LANGUAGE_EXT_PATTERN = /\.[^.]+$/;

interface LinguistLanguage {
  name: string;
  type: string;
  extensions?: string[];
  filenames?: string[];
  codemirrorMimeType?: string;
}

interface LanguageMaps {
  extensions: Map<string, string>;
  filenames: Map<string, string>;
}

// Explicit winners for extensions claimed by multiple linguist-languages entries
// where the "most extensions" heuristic below picks the wrong language.
const EXTENSION_OVERRIDES: Record<string, string> = {
  ".h": "c",
  ".inc": "php",
  ".m": "objective-c",
  ".re": "reason",
  ".rs": "rust",
};

// Sort so that languages with more extensions (i.e. more general-purpose) win
// when multiple languages claim the same extension (e.g. Ecmarkup vs HTML both
// claim .html — HTML should win because it's the canonical language for that
// extension). Known mis-rankings are patched by EXTENSION_OVERRIDES above.
const allLanguages = (Object.values(languages) as LinguistLanguage[]).sort(
  (a, b) => (b.extensions?.length ?? 0) - (a.extensions?.length ?? 0)
);

// Collect extensions that linguist-languages assigns to "Markdown" so we can
// exclude them from the code-language map
const markdownExtensions = new Set(
  allLanguages
    .find((lang) => lang.name === "Markdown")
    ?.extensions?.map((ext) => ext.toLowerCase()) ?? []
);

function buildLanguageMaps(
  types: string[],
  excludedExtensions?: Set<string>
): LanguageMaps {
  const typeSet = new Set(types);
  const extensions = new Map<string, string>();
  const filenames = new Map<string, string>();

  if (typeSet.has("programming") || typeSet.has("markup")) {
    for (const [ext, lang] of Object.entries(EXTENSION_OVERRIDES)) {
      if (excludedExtensions?.has(ext.toLowerCase())) continue;
      extensions.set(ext, lang);
    }
  }

  for (const lang of allLanguages) {
    if (!typeSet.has(lang.type)) continue;

    const name = lang.name.toLowerCase();
    for (const ext of lang.extensions ?? []) {
      if (excludedExtensions?.has(ext.toLowerCase())) continue;
      if (!extensions.has(ext)) {
        extensions.set(ext, name);
      }
    }
    for (const filename of lang.filenames ?? []) {
      if (!filenames.has(filename.toLowerCase())) {
        filenames.set(filename.toLowerCase(), name);
      }
    }
  }

  return { extensions, filenames };
}

function lookupLanguage(name: string, maps: LanguageMaps): string | null {
  const lower = name.toLowerCase();
  const ext = lower.match(LANGUAGE_EXT_PATTERN)?.[0];
  return (ext && maps.extensions.get(ext)) ?? maps.filenames.get(lower) ?? null;
}

const codeMaps = buildLanguageMaps(
  ["programming", "markup"],
  markdownExtensions
);
const dataMaps = buildLanguageMaps(["data"]);

/**
 * Returns the language name for a given file name, or null if it's not a
 * recognised code or markup file (programming + markup types from
 * linguist-languages, e.g. Python, HTML, CSS, Vue). Looks up by extension
 * first, then by exact filename (e.g. "Dockerfile", "Makefile"). Runs in O(1).
 */
export function getCodeLanguage(name: string): string | null {
  return lookupLanguage(name, codeMaps);
}

/**
 * Returns the language name for a given file name if it's a recognised
 * "data" type in linguist-languages (e.g. JSON, YAML, TOML, XML).
 * Returns null otherwise. Runs in O(1).
 */
export function getDataLanguage(name: string): string | null {
  return lookupLanguage(name, dataMaps);
}

/**
 * Returns true if the file name has a Markdown extension (as defined by
 * linguist-languages) and should be rendered as rich text rather than code.
 */
export function isMarkdownFile(name: string): boolean {
  const ext = name.toLowerCase().match(LANGUAGE_EXT_PATTERN)?.[0];
  return !!ext && markdownExtensions.has(ext);
}

const mimeToLanguage = new Map<string, string>();
for (const lang of allLanguages) {
  if (lang.codemirrorMimeType && !mimeToLanguage.has(lang.codemirrorMimeType)) {
    mimeToLanguage.set(lang.codemirrorMimeType, lang.name.toLowerCase());
  }
}

/**
 * Returns the language name for a given MIME type using the codemirrorMimeType
 * field from linguist-languages (~297 entries). Returns null if unrecognised.
 */
export function getLanguageByMime(mime: string): string | null {
  const base = mime.split(";")[0];
  if (!base) return null;
  return mimeToLanguage.get(base.trim().toLowerCase()) ?? null;
}


================================================
FILE: web/src/lib/llmConfig/cache.ts
================================================
import { ScopedMutator } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";

const PERSONA_PROVIDER_ENDPOINT_PATTERN =
  /^\/api\/llm\/persona\/\d+\/providers$/;

export async function refreshLlmProviderCaches(
  mutate: ScopedMutator
): Promise<void> {
  await Promise.all([
    mutate(SWR_KEYS.adminLlmProviders),
    mutate(SWR_KEYS.llmProviders),
    mutate(
      (key) =>
        typeof key === "string" && PERSONA_PROVIDER_ENDPOINT_PATTERN.test(key)
    ),
  ]);
}


================================================
FILE: web/src/lib/llmConfig/constants.ts
================================================
export const LLM_ADMIN_URL = "/api/admin/llm";
export const LLM_PROVIDERS_ADMIN_URL = `${LLM_ADMIN_URL}/provider`;
export const LLM_CHAT_PROVIDERS_URL = "/api/llm/provider";

export const LLM_CONTEXTUAL_COST_ADMIN_URL =
  "/api/admin/llm/provider-contextual-cost";

export const EMBEDDING_PROVIDERS_ADMIN_URL =
  "/api/admin/embedding/embedding-provider";

export const EMBEDDING_MODELS_ADMIN_URL = "/api/admin/embedding";


================================================
FILE: web/src/lib/llmConfig/providers.ts
================================================
import type { IconFunctionComponent } from "@opal/types";
import {
  SvgBifrost,
  SvgCpu,
  SvgOpenai,
  SvgClaude,
  SvgOllama,
  SvgAws,
  SvgOpenrouter,
  SvgServer,
  SvgAzure,
  SvgGemini,
  SvgLitellm,
  SvgLmStudio,
} from "@opal/icons";
import { LLMProviderName } from "@/interfaces/llm";

const PROVIDER_ICONS: Record<string, IconFunctionComponent> = {
  [LLMProviderName.OPENAI]: SvgOpenai,
  [LLMProviderName.ANTHROPIC]: SvgClaude,
  [LLMProviderName.VERTEX_AI]: SvgGemini,
  [LLMProviderName.BEDROCK]: SvgAws,
  [LLMProviderName.AZURE]: SvgAzure,
  [LLMProviderName.LITELLM]: SvgLitellm,
  [LLMProviderName.LITELLM_PROXY]: SvgLitellm,
  [LLMProviderName.OLLAMA_CHAT]: SvgOllama,
  [LLMProviderName.OPENROUTER]: SvgOpenrouter,
  [LLMProviderName.LM_STUDIO]: SvgLmStudio,
  [LLMProviderName.BIFROST]: SvgBifrost,

  // fallback
  [LLMProviderName.CUSTOM]: SvgServer,
};

const PROVIDER_PRODUCT_NAMES: Record<string, string> = {
  [LLMProviderName.OPENAI]: "GPT",
  [LLMProviderName.ANTHROPIC]: "Claude",
  [LLMProviderName.VERTEX_AI]: "Gemini",
  [LLMProviderName.BEDROCK]: "Amazon Bedrock",
  [LLMProviderName.AZURE]: "Azure OpenAI",
  [LLMProviderName.LITELLM]: "LiteLLM",
  [LLMProviderName.LITELLM_PROXY]: "LiteLLM Proxy",
  [LLMProviderName.OLLAMA_CHAT]: "Ollama",
  [LLMProviderName.OPENROUTER]: "OpenRouter",
  [LLMProviderName.LM_STUDIO]: "LM Studio",
  [LLMProviderName.BIFROST]: "Bifrost",

  // fallback
  [LLMProviderName.CUSTOM]: "Custom Models",
};

const PROVIDER_DISPLAY_NAMES: Record<string, string> = {
  [LLMProviderName.OPENAI]: "OpenAI",
  [LLMProviderName.ANTHROPIC]: "Anthropic",
  [LLMProviderName.VERTEX_AI]: "Google Cloud Vertex AI",
  [LLMProviderName.BEDROCK]: "AWS",
  [LLMProviderName.AZURE]: "Microsoft Azure",
  [LLMProviderName.LITELLM]: "LiteLLM",
  [LLMProviderName.LITELLM_PROXY]: "LiteLLM Proxy",
  [LLMProviderName.OLLAMA_CHAT]: "Ollama",
  [LLMProviderName.OPENROUTER]: "OpenRouter",
  [LLMProviderName.LM_STUDIO]: "LM Studio",
  [LLMProviderName.BIFROST]: "Bifrost",

  // fallback
  [LLMProviderName.CUSTOM]: "Other providers or self-hosted",
};

export function getProviderProductName(providerName: string): string {
  return PROVIDER_PRODUCT_NAMES[providerName] ?? providerName;
}

export function getProviderDisplayName(providerName: string): string {
  return PROVIDER_DISPLAY_NAMES[providerName] ?? providerName;
}

export function getProviderIcon(providerName: string): IconFunctionComponent {
  return PROVIDER_ICONS[providerName] ?? SvgCpu;
}


================================================
FILE: web/src/lib/llmConfig/svc.ts
================================================
/**
 * LLM action functions for mutations.
 *
 * These are async functions for one-off actions that don't need SWR caching.
 *
 * Endpoints:
 * - /api/admin/llm/test/default - Test the default LLM provider connection
 * - /api/admin/llm/default - Set the default LLM model
 * - /api/admin/llm/provider/{id} - Delete an LLM provider
 */

import {
  LLM_ADMIN_URL,
  LLM_PROVIDERS_ADMIN_URL,
} from "@/lib/llmConfig/constants";

/**
 * Test the default LLM provider.
 * Returns true if the default provider is configured and working, false otherwise.
 */
export async function testDefaultProvider(): Promise<boolean> {
  try {
    const response = await fetch(`${LLM_ADMIN_URL}/test/default`, {
      method: "POST",
    });
    return response?.ok || false;
  } catch {
    return false;
  }
}

/**
 * Set the default LLM model.
 * @param providerId - The provider ID
 * @param modelName - The model name within that provider
 * @throws Error with the detail message from the API on failure
 */
export async function setDefaultLlmModel(
  providerId: number,
  modelName: string
): Promise<void> {
  const response = await fetch(`${LLM_ADMIN_URL}/default`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      provider_id: providerId,
      model_name: modelName,
    }),
  });

  if (!response.ok) {
    const errorMsg = (await response.json()).detail;
    throw new Error(errorMsg);
  }
}

/**
 * Delete an LLM provider.
 * @param providerId - The provider ID to delete
 * @throws Error with the detail message from the API on failure
 */
export async function deleteLlmProvider(providerId: number): Promise<void> {
  const response = await fetch(`${LLM_PROVIDERS_ADMIN_URL}/${providerId}`, {
    method: "DELETE",
  });

  if (!response.ok) {
    const errorMsg = (await response.json()).detail;
    throw new Error(errorMsg);
  }
}


================================================
FILE: web/src/lib/llmConfig/utils.ts
================================================
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import {
  DefaultModel,
  LLMProviderDescriptor,
  ModelConfiguration,
} from "@/interfaces/llm";
import { LlmDescriptor } from "@/lib/hooks";

export function getFinalLLM(
  llmProviders: LLMProviderDescriptor[],
  persona: MinimalPersonaSnapshot | null,
  currentLlm: LlmDescriptor | null,
  defaultText?: DefaultModel | null
): [string, string] {
  const defaultProvider = defaultText
    ? llmProviders.find((p) => p.id === defaultText.provider_id)
    : llmProviders.find((p) =>
        p.model_configurations.some((m) => m.is_visible)
      );

  let provider = defaultProvider?.provider || "";
  let model =
    defaultText?.model_name ||
    defaultProvider?.model_configurations.find((m) => m.is_visible)?.name ||
    "";

  if (persona) {
    // Map "provider override" to actual LLLMProvider
    if (persona.llm_model_provider_override) {
      const underlyingProvider = llmProviders.find(
        (item: LLMProviderDescriptor) =>
          item.name === persona.llm_model_provider_override
      );
      provider = underlyingProvider?.provider || provider;
    }
    model = persona.llm_model_version_override || model;
  }

  if (currentLlm) {
    provider = currentLlm.provider || provider;
    model = currentLlm.modelName || model;
  }

  return [provider, model];
}

export function getLLMProviderOverrideForPersona(
  liveAgent: MinimalPersonaSnapshot,
  llmProviders: LLMProviderDescriptor[]
): LlmDescriptor | null {
  const overrideProvider = liveAgent.llm_model_provider_override;
  const overrideModel = liveAgent.llm_model_version_override;

  if (!overrideModel) {
    return null;
  }

  const matchingProvider = llmProviders.find(
    (provider) =>
      (overrideProvider ? provider.name === overrideProvider : true) &&
      provider.model_configurations
        .map((modelConfiguration) => modelConfiguration.name)
        .includes(overrideModel)
  );

  if (matchingProvider) {
    return {
      name: matchingProvider.name,
      provider: matchingProvider.provider,
      modelName: overrideModel,
    };
  }

  return null;
}

export const structureValue = (
  name: string,
  provider: string,
  modelName: string
) => {
  return `${name}__${provider}__${modelName}`;
};

export const parseLlmDescriptor = (value: string): LlmDescriptor => {
  const [displayName, provider, modelName] = value.split("__");
  if (displayName === undefined) {
    return { name: "Unknown", provider: "", modelName: "" };
  }

  return {
    name: displayName,
    provider: provider ?? "",
    modelName: modelName ?? "",
  };
};

export const findModelInModelConfigurations = (
  modelConfigurations: ModelConfiguration[],
  modelName: string
): ModelConfiguration | null => {
  return modelConfigurations.find((m) => m.name === modelName) || null;
};

export const findModelConfiguration = (
  llmProviders: LLMProviderDescriptor[],
  modelName: string,
  providerName: string | null = null
): ModelConfiguration | null => {
  if (providerName) {
    const provider = llmProviders.find((p) => p.name === providerName);
    return provider
      ? findModelInModelConfigurations(provider.model_configurations, modelName)
      : null;
  }

  for (const provider of llmProviders) {
    const modelConfiguration = findModelInModelConfigurations(
      provider.model_configurations,
      modelName
    );
    if (modelConfiguration) {
      return modelConfiguration;
    }
  }

  return null;
};

export const modelSupportsImageInput = (
  llmProviders: LLMProviderDescriptor[],
  modelName: string,
  providerName: string | null = null
): boolean => {
  const modelConfiguration = findModelConfiguration(
    llmProviders,
    modelName,
    providerName
  );
  return modelConfiguration?.supports_image_input || false;
};

export function getDisplayName(
  agent: MinimalPersonaSnapshot,
  llmProviders: LLMProviderDescriptor[]
): string | undefined {
  const llmDescriptor = getLLMProviderOverrideForPersona(
    agent,
    llmProviders ?? []
  );
  const llmProvider = llmProviders?.find(
    (llmProvider) => llmProvider.name === agent.llm_model_provider_override
  );
  const modelConfig = llmProvider?.model_configurations.find(
    (modelConfig) => modelConfig.name === llmDescriptor?.modelName
  );
  return modelConfig?.display_name;
}


================================================
FILE: web/src/lib/llmConfig/visionLLM.ts
================================================
import { LLMProviderResponse, VisionProvider } from "@/interfaces/llm";
import { LLM_ADMIN_URL } from "@/lib/llmConfig/constants";

export async function fetchVisionProviders(): Promise<VisionProvider[]> {
  const response = await fetch(`${LLM_ADMIN_URL}/vision-providers`, {
    headers: {
      "Content-Type": "application/json",
    },
  });
  if (!response.ok) {
    throw new Error(
      `Failed to fetch vision providers: ${await response.text()}`
    );
  }
  const data = (await response.json()) as LLMProviderResponse<VisionProvider>;
  return data.providers;
}

export async function setDefaultVisionProvider(
  providerId: number,
  visionModel: string
): Promise<void> {
  const response = await fetch(`${LLM_ADMIN_URL}/default-vision`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      provider_id: providerId,
      model_name: visionModel,
    }),
  });

  if (!response.ok) {
    const errorMsg = await response.text();
    throw new Error(errorMsg);
  }
}


================================================
FILE: web/src/lib/oauth/api.ts
================================================
import {
  OAuthConfig,
  OAuthConfigCreate,
  OAuthConfigUpdate,
  OAuthTokenStatus,
} from "@/lib/tools/interfaces";

// Admin OAuth Config Management

export async function createOAuthConfig(
  config: OAuthConfigCreate
): Promise<OAuthConfig> {
  const response = await fetch("/api/admin/oauth-config/create", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(config),
  });

  if (!response.ok) {
    const errorData = await response.json().catch(() => ({}));
    throw new Error(
      errorData.detail ||
        `Failed to create OAuth config: ${response.statusText}`
    );
  }

  return await response.json();
}

export async function getOAuthConfigs(): Promise<OAuthConfig[]> {
  const response = await fetch("/api/admin/oauth-config");

  if (!response.ok) {
    throw new Error(`Failed to fetch OAuth configs: ${response.statusText}`);
  }

  return await response.json();
}

export async function getOAuthConfig(id: number): Promise<OAuthConfig> {
  const response = await fetch(`/api/admin/oauth-config/${id}`);

  if (!response.ok) {
    const errorData = await response.json().catch(() => ({}));
    throw new Error(
      errorData.detail || `Failed to fetch OAuth config: ${response.statusText}`
    );
  }

  return await response.json();
}

export async function updateOAuthConfig(
  id: number,
  updates: OAuthConfigUpdate
): Promise<OAuthConfig> {
  const response = await fetch(`/api/admin/oauth-config/${id}`, {
    method: "PUT",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(updates),
  });

  if (!response.ok) {
    const errorData = await response.json().catch(() => ({}));
    throw new Error(
      errorData.detail ||
        `Failed to update OAuth config: ${response.statusText}`
    );
  }

  return await response.json();
}

export async function deleteOAuthConfig(id: number): Promise<void> {
  const response = await fetch(`/api/admin/oauth-config/${id}`, {
    method: "DELETE",
  });

  if (!response.ok) {
    const errorData = await response.json().catch(() => ({}));
    throw new Error(
      errorData.detail ||
        `Failed to delete OAuth config: ${response.statusText}`
    );
  }
}

// User OAuth Flow

export async function initiateOAuthFlow(
  oauthConfigId: number,
  returnPath: string = "/app"
): Promise<void> {
  const response = await fetch("/api/oauth-config/initiate", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      oauth_config_id: oauthConfigId,
      return_path: returnPath,
    }),
  });

  if (!response.ok) {
    const errorData = await response.json().catch(() => ({}));
    throw new Error(
      errorData.detail ||
        `Failed to initiate OAuth flow: ${response.statusText}`
    );
  }

  const data = await response.json();
  // Redirect to authorization URL
  window.location.href = data.authorization_url;
}

export async function handleOAuthCallback(
  code: string,
  state: string,
  oauthConfigId: number
): Promise<{ success: boolean; redirect_url: string; error?: string }> {
  const response = await fetch("/api/oauth-config/callback", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      code,
      state,
      oauth_config_id: oauthConfigId,
    }),
  });

  if (!response.ok) {
    const errorData = await response.json().catch(() => ({}));
    throw new Error(
      errorData.detail || `OAuth callback failed: ${response.statusText}`
    );
  }

  return await response.json();
}

export async function getUserOAuthTokenStatus(): Promise<OAuthTokenStatus[]> {
  const response = await fetch("/api/user-oauth-token/status");

  if (!response.ok) {
    throw new Error(
      `Failed to fetch OAuth token status: ${response.statusText}`
    );
  }

  return await response.json();
}

export async function revokeOAuthToken(oauthConfigId: number): Promise<void> {
  const response = await fetch(`/api/oauth-config/${oauthConfigId}/token`, {
    method: "DELETE",
  });

  if (!response.ok) {
    const errorData = await response.json().catch(() => ({}));
    throw new Error(
      errorData.detail || `Failed to revoke OAuth token: ${response.statusText}`
    );
  }
}


================================================
FILE: web/src/lib/oauth_utils.ts
================================================
import {
  OAuthBaseCallbackResponse,
  OAuthConfluenceFinalizeResponse,
  OAuthConfluencePrepareFinalizationResponse,
  OAuthPrepareAuthorizationResponse,
  OAuthSlackCallbackResponse,
} from "./types";

// server side handler to help initiate the oauth authorization request
export async function prepareOAuthAuthorizationRequest(
  connector: string,
  finalRedirect: string | null // a redirect (not the oauth redirect) for the user to return to after oauth is complete)
): Promise<OAuthPrepareAuthorizationResponse> {
  let url = `/api/oauth/prepare-authorization-request?connector=${encodeURIComponent(
    connector
  )}`;

  // Conditionally append the `redirect_on_success` parameter
  if (finalRedirect) {
    url += `&redirect_on_success=${encodeURIComponent(finalRedirect)}`;
  }

  const response = await fetch(url, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      connector: connector,
      redirect_on_success: finalRedirect,
    }),
  });

  if (!response.ok) {
    throw new Error(
      `Failed to prepare OAuth authorization request: ${response.status}`
    );
  }

  // Parse the JSON response
  const data = (await response.json()) as OAuthPrepareAuthorizationResponse;
  return data;
}

export async function handleOAuthAuthorizationResponse(
  connector: string,
  code: string,
  state: string
) {
  if (connector === "slack") {
    return handleOAuthSlackAuthorizationResponse(code, state);
  }

  if (connector === "google-drive") {
    return handleOAuthGoogleDriveAuthorizationResponse(code, state);
  }

  if (connector === "confluence") {
    return handleOAuthConfluenceAuthorizationResponse(code, state);
  }

  return;
}

// Handler for federated connector OAuth callbacks
export async function handleFederatedOAuthCallback(
  federatedConnectorId: string,
  code: string,
  state: string
): Promise<OAuthBaseCallbackResponse> {
  // Use the generic callback endpoint - the connector ID will be extracted from the state parameter
  const url = `/api/federated/callback?code=${encodeURIComponent(
    code
  )}&state=${encodeURIComponent(state)}`;

  const response = await fetch(url, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
  });

  if (!response.ok) {
    let errorDetails = `Failed to handle federated OAuth callback: ${response.status}`;

    try {
      const responseBody = await response.text();
      errorDetails += `\nResponse Body: ${responseBody}`;
    } catch (err) {
      if (err instanceof Error) {
        errorDetails += `\nUnable to read response body: ${err.message}`;
      } else {
        errorDetails += `\nUnable to read response body: Unknown error type`;
      }
    }

    throw new Error(errorDetails);
  }

  // Parse the JSON response and extract the data field
  const result = await response.json();

  if (!result.success) {
    throw new Error(result.message || "OAuth callback failed");
  }

  return {
    success: true,
    message: result.message || "OAuth authorization successful",
    redirect_on_success: `/admin/federated/${federatedConnectorId}`,
    finalize_url: null,
  };
}

// server side handler to process the oauth redirect callback
// https://api.slack.com/authentication/oauth-v2#exchanging
export async function handleOAuthSlackAuthorizationResponse(
  code: string,
  state: string
): Promise<OAuthSlackCallbackResponse> {
  const url = `/api/oauth/connector/slack/callback?code=${encodeURIComponent(
    code
  )}&state=${encodeURIComponent(state)}`;

  const response = await fetch(url, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ code, state }),
  });

  if (!response.ok) {
    let errorDetails = `Failed to handle OAuth Slack authorization response: ${response.status}`;

    try {
      const responseBody = await response.text(); // Read the body as text
      errorDetails += `\nResponse Body: ${responseBody}`;
    } catch (err) {
      if (err instanceof Error) {
        errorDetails += `\nUnable to read response body: ${err.message}`;
      } else {
        errorDetails += `\nUnable to read response body: Unknown error type`;
      }
    }

    throw new Error(errorDetails);
  }

  // Parse the JSON response
  const data = (await response.json()) as OAuthSlackCallbackResponse;
  return data;
}

export async function handleOAuthGoogleDriveAuthorizationResponse(
  code: string,
  state: string
): Promise<OAuthBaseCallbackResponse> {
  const url = `/api/oauth/connector/google-drive/callback?code=${encodeURIComponent(
    code
  )}&state=${encodeURIComponent(state)}`;

  const response = await fetch(url, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ code, state }),
  });

  if (!response.ok) {
    let errorDetails = `Failed to handle OAuth Google Drive authorization response: ${response.status}`;

    try {
      const responseBody = await response.text(); // Read the body as text
      errorDetails += `\nResponse Body: ${responseBody}`;
    } catch (err) {
      if (err instanceof Error) {
        errorDetails += `\nUnable to read response body: ${err.message}`;
      } else {
        errorDetails += `\nUnable to read response body: Unknown error type`;
      }
    }

    throw new Error(errorDetails);
  }

  // Parse the JSON response
  const data = (await response.json()) as OAuthBaseCallbackResponse;
  return data;
}

// call server side helper
// https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps
export async function handleOAuthConfluenceAuthorizationResponse(
  code: string,
  state: string
): Promise<OAuthBaseCallbackResponse> {
  const url = `/api/oauth/connector/confluence/callback?code=${encodeURIComponent(
    code
  )}&state=${encodeURIComponent(state)}`;

  const response = await fetch(url, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ code, state }),
  });

  if (!response.ok) {
    let errorDetails = `Failed to handle OAuth Confluence authorization response: ${response.status}`;

    try {
      const responseBody = await response.text(); // Read the body as text
      errorDetails += `\nResponse Body: ${responseBody}`;
    } catch (err) {
      if (err instanceof Error) {
        errorDetails += `\nUnable to read response body: ${err.message}`;
      } else {
        errorDetails += `\nUnable to read response body: Unknown error type`;
      }
    }

    throw new Error(errorDetails);
  }

  // Parse the JSON response
  const data = (await response.json()) as OAuthBaseCallbackResponse;
  return data;
}

export async function handleOAuthPrepareFinalization(
  connector: string,
  credential: number
) {
  if (connector === "confluence") {
    return handleOAuthConfluencePrepareFinalization(credential);
  }

  return;
}

// call server side helper
// https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps
export async function handleOAuthConfluencePrepareFinalization(
  credential: number
): Promise<OAuthConfluencePrepareFinalizationResponse> {
  const url = `/api/oauth/connector/confluence/accessible-resources?credential_id=${encodeURIComponent(
    credential
  )}`;

  const response = await fetch(url, {
    method: "GET",
    headers: {
      "Content-Type": "application/json",
    },
  });

  if (!response.ok) {
    let errorDetails = `Failed to handle OAuth Confluence prepare finalization response: ${response.status}`;

    try {
      const responseBody = await response.text(); // Read the body as text
      errorDetails += `\nResponse Body: ${responseBody}`;
    } catch (err) {
      if (err instanceof Error) {
        errorDetails += `\nUnable to read response body: ${err.message}`;
      } else {
        errorDetails += `\nUnable to read response body: Unknown error type`;
      }
    }

    throw new Error(errorDetails);
  }

  // Parse the JSON response
  const data =
    (await response.json()) as OAuthConfluencePrepareFinalizationResponse;
  return data;
}

export async function handleOAuthConfluenceFinalize(
  credential_id: number,
  cloud_id: string,
  cloud_name: string,
  cloud_url: string
): Promise<OAuthConfluenceFinalizeResponse> {
  const url = `/api/oauth/connector/confluence/finalize?credential_id=${encodeURIComponent(
    credential_id
  )}&cloud_id=${encodeURIComponent(cloud_id)}&cloud_name=${encodeURIComponent(
    cloud_name
  )}&cloud_url=${encodeURIComponent(cloud_url)}`;

  const response = await fetch(url, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
  });

  if (!response.ok) {
    let errorDetails = `Failed to handle OAuth Confluence finalization response: ${response.status}`;

    try {
      const responseBody = await response.text(); // Read the body as text
      errorDetails += `\nResponse Body: ${responseBody}`;
    } catch (err) {
      if (err instanceof Error) {
        errorDetails += `\nUnable to read response body: ${err.message}`;
      } else {
        errorDetails += `\nUnable to read response body: Unknown error type`;
      }
    }

    throw new Error(errorDetails);
  }

  // Parse the JSON response
  const data = (await response.json()) as OAuthConfluenceFinalizeResponse;
  return data;
}


================================================
FILE: web/src/lib/redirectSS.ts
================================================
import { NextRequest } from "next/server";

export const getDomain = (request: NextRequest) => {
  // Use the WEB_DOMAIN env variable if set (required in production).
  // Never trust X-Forwarded-* headers from the request — they can be
  // spoofed by an attacker to poison redirect URLs (host header poisoning).
  if (process.env.WEB_DOMAIN) {
    return process.env.WEB_DOMAIN;
  }

  // Fallback for local development: use Next.js's own origin.
  return request.nextUrl.origin;
};


================================================
FILE: web/src/lib/search/interfaces.ts
================================================
import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";
import { Tag, ValidSources } from "../types";
import { Persona } from "@/app/admin/agents/interfaces";

export const FlowType = {
  SEARCH: "search",
  QUESTION_ANSWER: "question-answer",
};
export type FlowType = (typeof FlowType)[keyof typeof FlowType];
export const SearchType = {
  SEMANTIC: "semantic",
  KEYWORD: "keyword",
  AUTOMATIC: "automatic",
  INTERNET: "internet",
};
export type SearchType = (typeof SearchType)[keyof typeof SearchType];

export interface ToolResponse {
  id?: string | null;
  response?: any;
}
export interface ExtendedToolResponse extends ToolResponse {
  level: number;
  level_question_num: number;
}

export enum StreamStopReason {
  CONTEXT_LENGTH = "CONTEXT_LENGTH",
  CANCELLED = "CANCELLED",
}

export interface StreamStopInfo {
  stop_reason: StreamStopReason;
  level?: number;
  level_question_num?: number;
  stream_type?: "sub_answer" | "sub_questions" | "main_answer";
}

export interface ErrorMessagePacket {
  error: string;
}

export interface Quote {
  quote: string;
  document_id: string;
  link: string | null;
  source_type: ValidSources;
  blurb: string;
  semantic_identifier: string;
}

export interface QuotesInfoPacket {
  quotes: Quote[];
}
export interface MinimalOnyxDocument {
  document_id: string;
  semantic_identifier: string | null;
}

export interface OnyxDocument extends MinimalOnyxDocument {
  link: string;
  source_type: ValidSources;
  blurb: string;
  boost: number;
  hidden: boolean;
  score: number;
  chunk_ind: number;
  match_highlights: string[];
  metadata: { [key: string]: string };
  updated_at: string | null;
  db_doc_id?: number;
  is_internet: boolean;
  validationState?: null | "good" | "bad";
}

export interface LoadedOnyxDocument extends OnyxDocument {
  icon: React.FC<{ size?: number; className?: string }>;
}

export interface SearchOnyxDocument extends OnyxDocument {
  is_relevant: boolean;
  relevance_explanation: string;
}

export interface FilteredOnyxDocument extends OnyxDocument {
  included: boolean;
}
export interface DocumentInfoPacket {
  top_documents: OnyxDocument[];
  predicted_flow: FlowType | null;
  predicted_search: SearchType | null;
  time_cutoff: string | null;
  favor_recent: boolean;
}

export interface DocumentRelevance {
  relevant: boolean;
  content: string;
}

export interface Relevance {
  [url: string]: DocumentRelevance;
}

export interface RelevanceChunk {
  relevance_summaries: Relevance;
}

export interface SearchResponse {
  suggestedSearchType: SearchType | null;
  suggestedFlowType: FlowType | null;
  answer: string | null;
  quotes: Quote[] | null;
  documents: SearchOnyxDocument[] | null;
  selectedDocIndices: number[] | null;
  error: string | null;
  messageId: number | null;
  additional_relevance?: Relevance;
}

export enum SourceCategory {
  Wiki = "Knowledge Base & Wikis",
  Storage = "Cloud Storage",
  TicketingAndTaskManagement = "Ticketing & Task Management",
  Messaging = "Messaging",
  Sales = "Sales",
  CodeRepository = "Code Repository",
  Other = "Others",
}

export interface SourceMetadata {
  icon: React.FC<{ size?: number; className?: string }>;
  displayName: string;
  category: SourceCategory;
  shortDescription?: string;
  internalName: ValidSources;
  adminUrl: string;
  isPopular?: boolean;
  oauthSupported?: boolean;
  federated?: boolean;
  federatedTooltip?: string;
  uniqueKey?: string;
  // For federated connectors, this stores the base source type for the icon
  baseSourceType?: ValidSources;
  // For connectors that are always available (don't need connection setup)
  // e.g., User Library (CraftFile) where users just upload files
  alwaysConnected?: boolean;
  // Custom description to show instead of status (e.g., "Manage your uploaded files")
  customDescription?: string;
}

export interface SearchDefaultOverrides {
  forceDisplayQA: boolean;
  offset: number;
}

export interface Filters {
  source_type: string[] | null;
  document_set: string[] | null;
  time_cutoff: Date | null;
}

export interface SearchRequestArgs {
  query: string;
  agentic?: boolean;
  sources: SourceMetadata[];
  documentSets: string[];
  timeRange: DateRangePickerValue | null;
  tags: Tag[];
  persona: Persona;
  updateDocumentRelevance: (relevance: any) => void;
  updateCurrentAnswer: (val: string) => void;
  updateQuotes: (quotes: Quote[]) => void;
  updateDocs: (documents: OnyxDocument[]) => void;
  updateSelectedDocIndices: (docIndices: number[]) => void;
  updateSuggestedSearchType: (searchType: SearchType) => void;
  updateSuggestedFlowType: (flowType: FlowType) => void;
  updateError: (error: string) => void;
  updateMessageAndThreadId: (
    messageId: number,
    chat_session_id: string
  ) => void;
  finishedSearching: () => void;
  updateComments: (comments: any) => void;
  selectedSearchType: SearchType | null;
}

export interface SearchRequestOverrides {
  searchType?: SearchType;
  offset?: number;
  overrideMessage?: string;
  agentic?: boolean;
}

export interface ValidQuestionResponse {
  reasoning: string | null;
  error: string | null;
}

// ============================================================================
// Classification API
// ============================================================================

/**
 * Request to classify a query as search or chat flow
 * POST /api/search/search-flow-classification
 */
export interface SearchFlowClassificationRequest {
  user_query: string;
}

/**
 * Response from query classification
 */
export interface SearchFlowClassificationResponse {
  is_search_flow: boolean;
}

// ============================================================================
// Search API (Unified Search + Chat)
// ============================================================================

/**
 * Base filters for search queries
 * Matches backend/onyx/context/search/models.py BaseFilters
 */
export interface BaseFilters {
  source_type?: ValidSources[] | null;
  document_set?: string[] | null;
  time_cutoff?: string | null; // ISO date string
  tags?: Array<{ tag_key: string; tag_value: string }> | null;
}

/**
 * Request to perform a document search
 * POST /api/search/send-search-message
 */
export interface SendSearchQueryRequest {
  search_query: string;
  filters?: BaseFilters | null;
  num_docs_fed_to_llm_selection?: number | null;
  run_query_expansion?: boolean;
  num_hits?: number; // default 30
  include_content?: boolean;
  stream?: boolean;
}

/**
 * Search document with optional content
 * Matches backend SearchDocWithContent
 */
export interface SearchDocWithContent {
  document_id: string;
  chunk_ind: number;
  semantic_identifier: string;
  link: string | null;
  blurb: string;
  source_type: ValidSources;
  boost: number;
  hidden: boolean;
  metadata: Record<string, string | string[]>;
  score: number | null;
  is_relevant?: boolean | null;
  relevance_explanation?: string | null;
  match_highlights: string[];
  updated_at: string | null; // ISO date string
  primary_owners?: string[] | null;
  secondary_owners?: string[] | null;
  is_internet: boolean;
  content?: string | null;
}

/**
 * Full response from a search query (non-streaming)
 */
export interface SearchFullResponse {
  all_executed_queries: string[];
  search_docs: SearchDocWithContent[];
  doc_selection_reasoning?: string | null;
  llm_selected_doc_ids?: string[] | null;
  error?: string | null;
}

// ============================================================================
// Search History API
// ============================================================================

/**
 * Single search query in history
 */
export interface SearchQueryResponse {
  query: string;
  query_expansions: string[] | null;
  created_at: string; // ISO date string
}

/**
 * Response from search history endpoint
 * GET /api/search/search-history
 */
export interface SearchHistoryResponse {
  search_queries: SearchQueryResponse[];
}

// ============================================================================
// Streaming Packets (for stream=true)
// ============================================================================

export interface SearchDocsPacket {
  type: "search_docs";
  search_docs: SearchDocWithContent[];
}

export interface SearchErrorPacket {
  type: "search_error";
  error: string;
}

export interface LLMSelectedDocsPacket {
  type: "llm_selected_docs";
  llm_selected_doc_ids: string[] | null;
}

export interface QueryExpansionsPacket {
  type: "query_expansions";
  executed_queries: string[];
}

export interface DocSelectionReasoningPacket {
  type: "doc_selection_reasoning";
  reasoning: string;
}

export type SearchStreamPacket =
  | SearchDocsPacket
  | SearchErrorPacket
  | LLMSelectedDocsPacket
  | QueryExpansionsPacket
  | DocSelectionReasoningPacket;


================================================
FILE: web/src/lib/search/streamingUtils.ts
================================================
import { PacketType } from "@/app/app/services/lib";

export async function* handleSSEStream<T extends PacketType>(
  streamingResponse: Response,
  signal?: AbortSignal
): AsyncGenerator<T, void, unknown> {
  const reader = streamingResponse.body?.getReader();
  const decoder = new TextDecoder();
  let buffer = "";
  if (signal) {
    signal.addEventListener("abort", () => {
      console.log("aborting");
      reader?.cancel();
    });
  }
  while (true) {
    const rawChunk = await reader?.read();
    if (!rawChunk) {
      throw new Error("Unable to process chunk");
    }
    const { done, value } = rawChunk;
    if (done) {
      break;
    }

    buffer += decoder.decode(value, { stream: true });
    const lines = buffer.split("\n");
    buffer = lines.pop() || "";

    for (const line of lines) {
      if (line.trim() === "") continue;

      try {
        const data = JSON.parse(line) as T;
        yield data;
      } catch (error) {
        console.error("Error parsing SSE data:", error);

        // Detect JSON objects (ie. check if parseable json has been accumulated)
        const jsonObjects = line.match(/\{[^{}]*\}/g);
        if (jsonObjects) {
          for (const jsonObj of jsonObjects) {
            try {
              const data = JSON.parse(jsonObj) as T;
              yield data;
            } catch (innerError) {
              console.error("Error parsing extracted JSON:", innerError);
            }
          }
        }
      }
    }
  }

  // Process any remaining data in the buffer
  if (buffer.trim() !== "") {
    try {
      const data = JSON.parse(buffer) as T;
      yield data;
    } catch (error) {
      console.error("Error parsing remaining buffer:", error);
    }
  }
}


================================================
FILE: web/src/lib/search/utils.ts
================================================
import { Tag, ValidSources } from "../types";
import {
  Filters,
  MinimalOnyxDocument,
  OnyxDocument,
  SourceMetadata,
} from "./interfaces";
import { DateRangePickerValue } from "@/components/dateRangeSelectors/AdminDateRangeSelector";

export const buildFilters = (
  sources: SourceMetadata[],
  documentSets: string[],
  timeRange: DateRangePickerValue | null,
  tags: Tag[]
): Filters => {
  const filters = {
    source_type:
      sources.length > 0 ? sources.map((source) => source.internalName) : null,
    document_set: documentSets.length > 0 ? documentSets : null,
    time_cutoff: timeRange?.from ? timeRange.from : null,
    tags: tags,
  };

  return filters;
};

// If we have a link, open it in a new tab (including if it's a file)
// If above fails and we have a file, update the presenting document
export const openDocument = (
  document: OnyxDocument,
  updatePresentingDocument?: (document: MinimalOnyxDocument) => void
) => {
  if (document.link) {
    window.open(document.link, "_blank");
  } else if (document.source_type === ValidSources.File) {
    updatePresentingDocument?.(document);
  }
};


================================================
FILE: web/src/lib/search/utilsSS.ts
================================================
import { DocumentSetSummary } from "../types";
import { fetchSS } from "../utilsSS";
import { Connector } from "../connectors/connectors";

export async function fetchValidFilterInfo() {
  const [connectorsResponse, documentSetResponse] = await Promise.all([
    fetchSS("/manage/connector"),
    fetchSS("/manage/document-set"),
  ]);

  let connectors = [] as Connector<any>[];
  if (connectorsResponse.ok) {
    connectors = (await connectorsResponse.json()) as Connector<any>[];
  } else {
    console.log(
      `Failed to fetch connectors - ${connectorsResponse.status} - ${connectorsResponse.statusText}`
    );
  }

  let documentSets = [] as DocumentSetSummary[];
  if (documentSetResponse.ok) {
    documentSets = (await documentSetResponse.json()) as DocumentSetSummary[];
  } else {
    console.log(
      `Failed to fetch document sets - ${documentSetResponse.status} - ${documentSetResponse.statusText}`
    );
  }

  return { connectors, documentSets };
}


================================================
FILE: web/src/lib/sources.ts
================================================
import {
  AxeroIcon,
  BookstackIcon,
  OutlineIcon,
  ClickupIcon,
  ConfluenceIcon,
  DiscourseIcon,
  Document360Icon,
  DropboxIcon,
  GithubIcon,
  GitlabIcon,
  BitbucketIcon,
  GmailIcon,
  GongIcon,
  GoogleDriveIcon,
  GoogleSitesIcon,
  GuruIcon,
  HubSpotIcon,
  JiraIcon,
  LinearIcon,
  LoopioIcon,
  CodaIcon,
  NotionIcon,
  ProductboardIcon,
  R2Icon,
  SalesforceIcon,
  SharepointIcon,
  TeamsIcon,
  SlabIcon,
  ZendeskIcon,
  ZulipIcon,
  MediaWikiIcon,
  WikipediaIcon,
  AsanaIcon,
  S3Icon,
  OCIStorageIcon,
  GoogleStorageIcon,
  ColorSlackIcon,
  XenforoIcon,
  ColorDiscordIcon,
  FreshdeskIcon,
  FirefliesIcon,
  EgnyteIcon,
  AirtableIcon,
  GitbookIcon,
  HighspotIcon,
  DrupalWikiIcon,
  EmailIcon,
  TestRailIcon,
} from "@/components/icons/icons";
import { ValidSources } from "./types";
import { SourceCategory, SourceMetadata } from "./search/interfaces";
import { Persona } from "@/app/admin/agents/interfaces";
import React from "react";
import { DOCS_ADMINS_PATH } from "./constants";
import { SvgFileText, SvgGlobe } from "@opal/icons";

interface PartialSourceMetadata {
  icon: React.FC<{ size?: number; className?: string }>;
  displayName: string;
  category: SourceCategory;
  isPopular?: boolean;
  docs?: string;
  oauthSupported?: boolean;
  federated?: boolean;
  federatedTooltip?: string;
  // federated connectors store the base source type if it's a source
  // that has both indexed connectors and federated connectors
  baseSourceType?: ValidSources;
  // For connectors that are always available (don't need connection setup)
  // e.g., User Library (CraftFile) where users just upload files
  alwaysConnected?: boolean;
  // Custom description to show instead of status (e.g., "Manage your uploaded files")
  customDescription?: string;
}

type SourceMap = {
  [K in ValidSources | "federated_slack"]: PartialSourceMetadata;
};

const slackMetadata = {
  icon: ColorSlackIcon,
  displayName: "Slack",
  category: SourceCategory.Messaging,
  isPopular: true,
  docs: `${DOCS_ADMINS_PATH}/connectors/official/slack`,
  oauthSupported: true,
  // Federated Slack is available as an option but not the default
  federated: true,
  federatedTooltip:
    "⚠️ WARNING: Federated Slack results in significantly greater latency and lower search quality.",
  baseSourceType: "slack",
};

export const SOURCE_METADATA_MAP: SourceMap = {
  // Knowledge Base & Wikis
  confluence: {
    icon: ConfluenceIcon,
    displayName: "Confluence",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/confluence`,
    oauthSupported: true,
    isPopular: true,
  },
  sharepoint: {
    icon: SharepointIcon,
    displayName: "Sharepoint",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/sharepoint`,
    isPopular: true,
  },
  coda: {
    icon: CodaIcon,
    displayName: "Coda",
    category: SourceCategory.Wiki,
    docs: "https://docs.onyx.app/connectors/coda",
  },
  notion: {
    icon: NotionIcon,
    displayName: "Notion",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/notion`,
  },
  bookstack: {
    icon: BookstackIcon,
    displayName: "BookStack",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/bookstack`,
  },
  document360: {
    icon: Document360Icon,
    displayName: "Document360",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/document360`,
  },
  discourse: {
    icon: DiscourseIcon,
    displayName: "Discourse",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/discourse`,
  },
  gitbook: {
    icon: GitbookIcon,
    displayName: "GitBook",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/gitbook`,
  },
  slab: {
    icon: SlabIcon,
    displayName: "Slab",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/slab`,
  },
  outline: {
    icon: OutlineIcon,
    displayName: "Outline",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/outline`,
  },
  google_sites: {
    icon: GoogleSitesIcon,
    displayName: "Google Sites",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/google_sites`,
  },
  guru: {
    icon: GuruIcon,
    displayName: "Guru",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/guru`,
  },
  mediawiki: {
    icon: MediaWikiIcon,
    displayName: "MediaWiki",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/mediawiki`,
  },
  axero: {
    icon: AxeroIcon,
    displayName: "Axero",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/axero`,
  },
  wikipedia: {
    icon: WikipediaIcon,
    displayName: "Wikipedia",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/wikipedia`,
  },

  // Cloud Storage
  google_drive: {
    icon: GoogleDriveIcon,
    displayName: "Google Drive",
    category: SourceCategory.Storage,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/google_drive/overview`,
    oauthSupported: true,
    isPopular: true,
  },
  dropbox: {
    icon: DropboxIcon,
    displayName: "Dropbox",
    category: SourceCategory.Storage,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/dropbox`,
  },
  s3: {
    icon: S3Icon,
    displayName: "S3",
    category: SourceCategory.Storage,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/s3`,
  },
  google_cloud_storage: {
    icon: GoogleStorageIcon,
    displayName: "Google Storage",
    category: SourceCategory.Storage,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/google_storage`,
  },
  egnyte: {
    icon: EgnyteIcon,
    displayName: "Egnyte",
    category: SourceCategory.Storage,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/egnyte`,
  },
  oci_storage: {
    icon: OCIStorageIcon,
    displayName: "Oracle Storage",
    category: SourceCategory.Storage,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/oci_storage`,
  },
  r2: {
    icon: R2Icon,
    displayName: "R2",
    category: SourceCategory.Storage,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/r2`,
  },

  // Ticketing & Task Management
  jira: {
    icon: JiraIcon,
    displayName: "Jira",
    category: SourceCategory.TicketingAndTaskManagement,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/jira`,
    isPopular: true,
  },
  zendesk: {
    icon: ZendeskIcon,
    displayName: "Zendesk",
    category: SourceCategory.TicketingAndTaskManagement,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/zendesk`,
    isPopular: true,
  },
  airtable: {
    icon: AirtableIcon,
    displayName: "Airtable",
    category: SourceCategory.TicketingAndTaskManagement,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/airtable`,
  },
  linear: {
    icon: LinearIcon,
    displayName: "Linear",
    category: SourceCategory.TicketingAndTaskManagement,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/linear`,
  },
  freshdesk: {
    icon: FreshdeskIcon,
    displayName: "Freshdesk",
    category: SourceCategory.TicketingAndTaskManagement,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/freshdesk`,
  },
  asana: {
    icon: AsanaIcon,
    displayName: "Asana",
    category: SourceCategory.TicketingAndTaskManagement,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/asana`,
  },
  clickup: {
    icon: ClickupIcon,
    displayName: "Clickup",
    category: SourceCategory.TicketingAndTaskManagement,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/clickup`,
  },
  productboard: {
    icon: ProductboardIcon,
    displayName: "Productboard",
    category: SourceCategory.TicketingAndTaskManagement,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/productboard`,
  },
  testrail: {
    icon: TestRailIcon,
    displayName: "TestRail",
    category: SourceCategory.TicketingAndTaskManagement,
  },

  // Messaging
  slack: slackMetadata,
  federated_slack: slackMetadata,
  teams: {
    icon: TeamsIcon,
    displayName: "Teams",
    category: SourceCategory.Messaging,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/teams`,
  },
  gmail: {
    icon: GmailIcon,
    displayName: "Gmail",
    category: SourceCategory.Messaging,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/gmail/overview`,
  },
  drupal_wiki: {
    icon: DrupalWikiIcon,
    displayName: "Drupal Wiki",
    category: SourceCategory.Wiki,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/drupal_wiki`,
  },
  imap: {
    icon: EmailIcon,
    displayName: "Email",
    category: SourceCategory.Messaging,
  },
  discord: {
    icon: ColorDiscordIcon,
    displayName: "Discord",
    category: SourceCategory.Messaging,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/discord`,
  },
  xenforo: {
    icon: XenforoIcon,
    displayName: "Xenforo",
    category: SourceCategory.Messaging,
  },
  zulip: {
    icon: ZulipIcon,
    displayName: "Zulip",
    category: SourceCategory.Messaging,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/zulip`,
  },

  // Sales
  salesforce: {
    icon: SalesforceIcon,
    displayName: "Salesforce",
    category: SourceCategory.Sales,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/salesforce`,
    isPopular: true,
  },
  hubspot: {
    icon: HubSpotIcon,
    displayName: "HubSpot",
    category: SourceCategory.Sales,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/hubspot`,
    isPopular: true,
  },
  gong: {
    icon: GongIcon,
    displayName: "Gong",
    category: SourceCategory.Sales,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/gong`,
    isPopular: true,
  },
  fireflies: {
    icon: FirefliesIcon,
    displayName: "Fireflies",
    category: SourceCategory.Sales,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/fireflies`,
  },
  highspot: {
    icon: HighspotIcon,
    displayName: "Highspot",
    category: SourceCategory.Sales,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/highspot`,
  },
  loopio: {
    icon: LoopioIcon,
    displayName: "Loopio",
    category: SourceCategory.Sales,
  },

  // Code Repository
  github: {
    icon: GithubIcon,
    displayName: "Github",
    category: SourceCategory.CodeRepository,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/github`,
    isPopular: true,
  },
  gitlab: {
    icon: GitlabIcon,
    displayName: "Gitlab",
    category: SourceCategory.CodeRepository,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/gitlab`,
  },
  bitbucket: {
    icon: BitbucketIcon,
    displayName: "Bitbucket",
    category: SourceCategory.CodeRepository,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/bitbucket`,
  },

  // Others
  web: {
    icon: SvgGlobe,
    displayName: "Web",
    category: SourceCategory.Other,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/web`,
    isPopular: true,
  },
  file: {
    icon: SvgFileText,
    displayName: "File",
    category: SourceCategory.Other,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/file`,
    isPopular: true,
  },
  user_file: {
    // TODO: write docs for projects and link them here
    icon: SvgFileText,
    displayName: "File",
    category: SourceCategory.Other,
    docs: `${DOCS_ADMINS_PATH}/connectors/official/file`,
    isPopular: false, // Needs to be false to hide from the Add Connector page
  },

  // Other
  ingestion_api: {
    icon: SvgGlobe,
    displayName: "Ingestion",
    category: SourceCategory.Other,
  },

  // Craft-specific sources
  craft_file: {
    icon: SvgFileText,
    displayName: "Your Files",
    category: SourceCategory.Other,
    isPopular: false, // Hidden from standard Add Connector page
    alwaysConnected: true, // No setup required, just upload files
    customDescription: "Manage your uploaded files",
  },

  // Placeholder (non-null default)
  not_applicable: {
    icon: SvgGlobe,
    displayName: "Not Applicable",
    category: SourceCategory.Other,
  },
  mock_connector: {
    icon: SvgGlobe,
    displayName: "Mock Connector",
    category: SourceCategory.Other,
  },
} as SourceMap;

function fillSourceMetadata(
  partialMetadata: PartialSourceMetadata,
  internalName: ValidSources
): SourceMetadata {
  return {
    internalName: partialMetadata.baseSourceType || internalName,
    ...partialMetadata,
    adminUrl: `/admin/connectors/${internalName}`,
  };
}

export function getSourceMetadata(sourceType: ValidSources): SourceMetadata {
  const partialMetadata = SOURCE_METADATA_MAP[sourceType];

  // Fallback to not_applicable if sourceType not found in map
  if (!partialMetadata) {
    return fillSourceMetadata(
      SOURCE_METADATA_MAP[ValidSources.NotApplicable],
      ValidSources.NotApplicable
    );
  }

  return fillSourceMetadata(partialMetadata, sourceType);
}

export function listSourceMetadata(): SourceMetadata[] {
  /* This gives back all the viewable / common sources, primarily for
  display in the Add Connector page */
  const entries = Object.entries(SOURCE_METADATA_MAP)
    .filter(
      ([source, _]) =>
        source !== "not_applicable" &&
        source !== "ingestion_api" &&
        source !== "mock_connector" &&
        // use the "regular" slack connector when listing
        source !== "federated_slack" &&
        // user_file is for internal use (projects), not the Add Connector page
        source !== "user_file"
    )
    .map(([source, metadata]) => {
      return fillSourceMetadata(metadata, source as ValidSources);
    });
  return entries;
}

export function getSourceDocLink(sourceType: ValidSources): string | null {
  return SOURCE_METADATA_MAP[sourceType].docs || null;
}

export const isValidSource = (sourceType: string) => {
  return Object.keys(SOURCE_METADATA_MAP).includes(sourceType);
};

export function getSourceDisplayName(sourceType: ValidSources): string | null {
  return getSourceMetadata(sourceType).displayName;
}

export function getSourceMetadataForSources(sources: ValidSources[]) {
  return sources.map((source) => getSourceMetadata(source));
}

export function getSourcesForPersona(persona: Persona): ValidSources[] {
  const personaSources: ValidSources[] = [];
  persona.document_sets.forEach((documentSet) => {
    documentSet.cc_pair_summaries.forEach((ccPair) => {
      if (!personaSources.includes(ccPair.source)) {
        personaSources.push(ccPair.source);
      }
    });
  });
  return personaSources;
}

export async function fetchTitleFromUrl(url: string): Promise<string | null> {
  try {
    const response = await fetch(url, {
      method: "GET",
      // If the remote site has no CORS header, this may fail in the browser
      mode: "cors",
    });
    if (!response.ok) {
      // Non-200 response, treat as a failure
      return null;
    }
    const html = await response.text();
    const parser = new DOMParser();
    const doc = parser.parseFromString(html, "text/html");
    // If the site has <title>My Demo Page</title>, we retrieve "My Demo Page"
    const pageTitle = doc.querySelector("title")?.innerText.trim() ?? null;
    return pageTitle;
  } catch (error) {
    console.error("Error fetching page title:", error);
    return null;
  }
}


================================================
FILE: web/src/lib/streamingTTS.ts
================================================
/**
 * Real-time streaming TTS using HTTP streaming with MediaSource Extensions.
 * Plays audio chunks as they arrive for smooth, low-latency playback.
 */

import { INTERNAL_URL, IS_DEV } from "@/lib/constants";

/**
 * HTTPStreamingTTSPlayer - Uses HTTP streaming with MediaSource Extensions
 * for smooth, gapless audio playback. This is the recommended approach for
 * real-time TTS as it properly handles MP3 frame boundaries.
 */
export class HTTPStreamingTTSPlayer {
  private mediaSource: MediaSource | null = null;
  private mediaSourceUrl: string | null = null;
  private sourceBuffer: SourceBuffer | null = null;
  private audioElement: HTMLAudioElement | null = null;
  private pendingChunks: Uint8Array[] = [];
  private isAppending: boolean = false;
  private isPlaying: boolean = false;
  private streamComplete: boolean = false;
  private onPlayingChange?: (playing: boolean) => void;
  private onError?: (error: string) => void;
  private abortController: AbortController | null = null;
  private isMuted: boolean = false;

  constructor(options?: {
    onPlayingChange?: (playing: boolean) => void;
    onError?: (error: string) => void;
  }) {
    this.onPlayingChange = options?.onPlayingChange;
    this.onError = options?.onError;
  }

  private getAPIUrl(): string {
    // Always go through the frontend proxy to ensure cookies are sent correctly
    // The Next.js proxy at /api/* forwards to the backend
    return "/api/voice/synthesize";
  }

  /**
   * Speak text using HTTP streaming with real-time playback.
   * Audio begins playing as soon as the first chunks arrive.
   */
  async speak(
    text: string,
    voice?: string,
    speed: number = 1.0
  ): Promise<void> {
    // Cleanup any previous playback
    this.cleanup();

    // Create abort controller for this request
    this.abortController = new AbortController();

    // Build URL with query params
    const params = new URLSearchParams();
    params.set("text", text);
    if (voice) params.set("voice", voice);
    params.set("speed", speed.toString());

    const url = `${this.getAPIUrl()}?${params}`;

    // Check if MediaSource is supported
    if (!window.MediaSource || !MediaSource.isTypeSupported("audio/mpeg")) {
      // Fallback to simple buffered playback
      return this.fallbackSpeak(url);
    }

    // Create MediaSource and audio element
    this.mediaSource = new MediaSource();
    this.audioElement = new Audio();
    this.mediaSourceUrl = URL.createObjectURL(this.mediaSource);
    this.audioElement.src = this.mediaSourceUrl;
    this.audioElement.muted = this.isMuted;

    // Set up audio element event handlers
    this.audioElement.onplay = () => {
      if (!this.isPlaying) {
        this.isPlaying = true;
        this.onPlayingChange?.(true);
      }
    };

    this.audioElement.onended = () => {
      this.isPlaying = false;
      this.onPlayingChange?.(false);
    };

    this.audioElement.onerror = () => {
      this.onError?.("Audio playback error");
      this.isPlaying = false;
      this.onPlayingChange?.(false);
    };

    // Wait for MediaSource to be ready
    await new Promise<void>((resolve, reject) => {
      if (!this.mediaSource) {
        reject(new Error("MediaSource not initialized"));
        return;
      }

      this.mediaSource.onsourceopen = () => {
        try {
          // Create SourceBuffer for MP3
          this.sourceBuffer = this.mediaSource!.addSourceBuffer("audio/mpeg");
          this.sourceBuffer.mode = "sequence";

          this.sourceBuffer.onupdateend = () => {
            this.isAppending = false;
            this.processNextChunk();
          };

          resolve();
        } catch (err) {
          reject(err);
        }
      };

      // MediaSource doesn't have onerror in all browsers, use onsourceclose as fallback
      this.mediaSource.onsourceclose = () => {
        if (this.mediaSource?.readyState === "closed") {
          reject(new Error("MediaSource closed unexpectedly"));
        }
      };
    });

    // Start fetching and streaming audio
    try {
      const response = await fetch(url, {
        method: "POST",
        signal: this.abortController.signal,
        credentials: "include", // Include cookies for authentication
      });

      if (!response.ok) {
        const errorText = await response.text();
        throw new Error(
          `TTS request failed: ${response.status} - ${errorText}`
        );
      }

      const reader = response.body?.getReader();
      if (!reader) {
        throw new Error("No response body");
      }

      // Start playback as soon as we have some data
      let firstChunk = true;

      while (true) {
        const { done, value } = await reader.read();

        if (done) {
          this.streamComplete = true;
          // End the stream when all chunks are appended
          this.finalizeStream();
          break;
        }

        if (value) {
          this.pendingChunks.push(value);
          this.processNextChunk();

          // Start playback after first chunk
          if (firstChunk && this.audioElement) {
            firstChunk = false;
            // Small delay to buffer a bit before starting
            setTimeout(() => {
              this.audioElement?.play().catch(() => {
                // Ignore playback start errors
              });
            }, 100);
          }
        }
      }
    } catch (err) {
      if (err instanceof Error && err.name === "AbortError") {
        return;
      }
      this.onError?.(err instanceof Error ? err.message : "TTS error");
      throw err;
    }
  }

  /**
   * Process next chunk from the queue.
   */
  private processNextChunk(): void {
    if (
      this.isAppending ||
      this.pendingChunks.length === 0 ||
      !this.sourceBuffer ||
      this.sourceBuffer.updating
    ) {
      return;
    }

    const chunk = this.pendingChunks.shift();
    if (chunk) {
      this.isAppending = true;
      try {
        // Use ArrayBuffer directly for better TypeScript compatibility
        const buffer = chunk.buffer.slice(
          chunk.byteOffset,
          chunk.byteOffset + chunk.byteLength
        ) as ArrayBuffer;
        this.sourceBuffer.appendBuffer(buffer);
      } catch {
        this.isAppending = false;
        // Try next chunk
        this.processNextChunk();
      }
    }
  }

  /**
   * Finalize the stream when all data has been received.
   */
  private finalizeStream(): void {
    if (this.pendingChunks.length > 0 || this.isAppending) {
      // Wait for remaining chunks to be appended
      setTimeout(() => this.finalizeStream(), 50);
      return;
    }

    if (
      this.mediaSource &&
      this.mediaSource.readyState === "open" &&
      this.sourceBuffer &&
      !this.sourceBuffer.updating
    ) {
      try {
        this.mediaSource.endOfStream();
      } catch {
        // Ignore errors when ending stream
      }
    }
  }

  /**
   * Fallback for browsers that don't support MediaSource Extensions.
   * Buffers all audio before playing.
   */
  private async fallbackSpeak(url: string): Promise<void> {
    const response = await fetch(url, {
      method: "POST",
      signal: this.abortController?.signal,
      credentials: "include", // Include cookies for authentication
    });

    if (!response.ok) {
      const errorText = await response.text();
      throw new Error(`TTS request failed: ${response.status} - ${errorText}`);
    }

    const audioData = await response.arrayBuffer();

    const blob = new Blob([audioData], { type: "audio/mpeg" });
    const audioUrl = URL.createObjectURL(blob);

    this.audioElement = new Audio(audioUrl);
    this.audioElement.muted = this.isMuted;

    this.audioElement.onplay = () => {
      this.isPlaying = true;
      this.onPlayingChange?.(true);
    };

    this.audioElement.onended = () => {
      this.isPlaying = false;
      this.onPlayingChange?.(false);
      URL.revokeObjectURL(audioUrl);
    };

    this.audioElement.onerror = () => {
      this.onError?.("Audio playback error");
    };

    await this.audioElement.play();
  }

  /**
   * Stop playback and cleanup resources.
   */
  stop(): void {
    // Abort any ongoing request
    if (this.abortController) {
      this.abortController.abort();
      this.abortController = null;
    }

    this.cleanup();
  }

  setMuted(muted: boolean): void {
    this.isMuted = muted;
    if (this.audioElement) {
      this.audioElement.muted = muted;
    }
  }

  /**
   * Cleanup all resources.
   */
  private cleanup(): void {
    // Revoke Object URL to prevent memory leak
    if (this.mediaSourceUrl) {
      URL.revokeObjectURL(this.mediaSourceUrl);
      this.mediaSourceUrl = null;
    }

    // Stop and cleanup audio element
    if (this.audioElement) {
      this.audioElement.pause();
      this.audioElement.src = "";
      this.audioElement = null;
    }

    // Cleanup MediaSource
    if (this.mediaSource && this.mediaSource.readyState === "open") {
      try {
        if (this.sourceBuffer) {
          this.mediaSource.removeSourceBuffer(this.sourceBuffer);
        }
        this.mediaSource.endOfStream();
      } catch {
        // Ignore cleanup errors
      }
    }

    this.mediaSource = null;
    this.sourceBuffer = null;
    this.pendingChunks = [];
    this.isAppending = false;
    this.streamComplete = false;

    if (this.isPlaying) {
      this.isPlaying = false;
      this.onPlayingChange?.(false);
    }
  }

  get playing(): boolean {
    return this.isPlaying;
  }
}

/**
 * WebSocketStreamingTTSPlayer - Uses WebSocket for bidirectional streaming.
 * Useful for scenarios where you want to stream text in and get audio out
 * incrementally (e.g., as LLM generates text).
 */
export class WebSocketStreamingTTSPlayer {
  private websocket: WebSocket | null = null;
  private mediaSource: MediaSource | null = null;
  private mediaSourceUrl: string | null = null;
  private sourceBuffer: SourceBuffer | null = null;
  private audioElement: HTMLAudioElement | null = null;
  private pendingChunks: Uint8Array[] = [];
  private isAppending: boolean = false;
  private isPlaying: boolean = false;
  private onPlayingChange?: (playing: boolean) => void;
  private onError?: (error: string) => void;
  private hasStartedPlayback: boolean = false;

  constructor(options?: {
    onPlayingChange?: (playing: boolean) => void;
    onError?: (error: string) => void;
  }) {
    this.onPlayingChange = options?.onPlayingChange;
    this.onError = options?.onError;
  }

  private async getWebSocketUrl(): Promise<string> {
    // Fetch short-lived WS token
    const tokenResponse = await fetch("/api/voice/ws-token", {
      method: "POST",
      credentials: "include",
    });
    if (!tokenResponse.ok) {
      throw new Error("Failed to get WebSocket authentication token");
    }
    const { token } = await tokenResponse.json();

    const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
    const host = IS_DEV ? new URL(INTERNAL_URL).host : window.location.host;
    const path = IS_DEV
      ? "/voice/synthesize/stream"
      : "/api/voice/synthesize/stream";
    return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;
  }

  async connect(voice?: string, speed?: number): Promise<void> {
    // Cleanup any previous connection
    this.cleanup();

    // Check MediaSource support
    if (!window.MediaSource || !MediaSource.isTypeSupported("audio/mpeg")) {
      throw new Error("MediaSource Extensions not supported");
    }

    // Create MediaSource and audio element
    this.mediaSource = new MediaSource();
    this.audioElement = new Audio();
    this.mediaSourceUrl = URL.createObjectURL(this.mediaSource);
    this.audioElement.src = this.mediaSourceUrl;

    this.audioElement.onplay = () => {
      if (!this.isPlaying) {
        this.isPlaying = true;
        this.onPlayingChange?.(true);
      }
    };

    this.audioElement.onended = () => {
      this.isPlaying = false;
      this.onPlayingChange?.(false);
    };

    // Wait for MediaSource to be ready
    await new Promise<void>((resolve, reject) => {
      this.mediaSource!.onsourceopen = () => {
        try {
          this.sourceBuffer = this.mediaSource!.addSourceBuffer("audio/mpeg");
          this.sourceBuffer.mode = "sequence";
          this.sourceBuffer.onupdateend = () => {
            this.isAppending = false;
            this.processNextChunk();
          };
          resolve();
        } catch (err) {
          reject(err);
        }
      };
    });

    // Connect WebSocket
    const url = await this.getWebSocketUrl();
    return new Promise((resolve, reject) => {
      this.websocket = new WebSocket(url);

      this.websocket.onopen = () => {
        // Send initial config
        this.websocket?.send(
          JSON.stringify({
            type: "config",
            voice: voice,
            speed: speed || 1.0,
          })
        );
        resolve();
      };

      this.websocket.onerror = () => {
        reject(new Error("WebSocket connection failed"));
      };

      this.websocket.onmessage = async (event) => {
        if (event.data instanceof Blob) {
          // Audio chunk received
          const arrayBuffer = await event.data.arrayBuffer();
          this.pendingChunks.push(new Uint8Array(arrayBuffer));
          this.processNextChunk();

          // Start playback after first chunk
          if (!this.hasStartedPlayback && this.audioElement) {
            this.hasStartedPlayback = true;
            setTimeout(() => {
              this.audioElement?.play().catch(() => {
                // Ignore playback errors
              });
            }, 100);
          }
        } else {
          // JSON message
          try {
            const data = JSON.parse(event.data);
            if (data.type === "audio_done") {
              this.finalizeStream();
            } else if (data.type === "error") {
              this.onError?.(data.message);
            }
          } catch {
            // Ignore parse errors
          }
        }
      };

      this.websocket.onclose = () => {
        this.finalizeStream();
      };
    });
  }

  private processNextChunk(): void {
    if (
      this.isAppending ||
      this.pendingChunks.length === 0 ||
      !this.sourceBuffer ||
      this.sourceBuffer.updating
    ) {
      return;
    }

    const chunk = this.pendingChunks.shift();
    if (chunk) {
      this.isAppending = true;
      try {
        // Use ArrayBuffer directly for better TypeScript compatibility
        const buffer = chunk.buffer.slice(
          chunk.byteOffset,
          chunk.byteOffset + chunk.byteLength
        ) as ArrayBuffer;
        this.sourceBuffer.appendBuffer(buffer);
      } catch {
        this.isAppending = false;
        this.processNextChunk();
      }
    }
  }

  private finalizeStream(): void {
    if (this.pendingChunks.length > 0 || this.isAppending) {
      setTimeout(() => this.finalizeStream(), 50);
      return;
    }

    if (
      this.mediaSource &&
      this.mediaSource.readyState === "open" &&
      this.sourceBuffer &&
      !this.sourceBuffer.updating
    ) {
      try {
        this.mediaSource.endOfStream();
      } catch {
        // Ignore
      }
    }
  }

  async speak(text: string): Promise<void> {
    if (!this.websocket || this.websocket.readyState !== WebSocket.OPEN) {
      throw new Error("WebSocket not connected");
    }

    this.websocket.send(
      JSON.stringify({
        type: "synthesize",
        text: text,
      })
    );
  }

  stop(): void {
    this.cleanup();
  }

  disconnect(): void {
    if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {
      this.websocket.send(JSON.stringify({ type: "end" }));
      this.websocket.close();
    }
    this.cleanup();
  }

  private cleanup(): void {
    if (this.websocket) {
      this.websocket.close();
      this.websocket = null;
    }

    // Revoke Object URL to prevent memory leak
    if (this.mediaSourceUrl) {
      URL.revokeObjectURL(this.mediaSourceUrl);
      this.mediaSourceUrl = null;
    }

    if (this.audioElement) {
      this.audioElement.pause();
      this.audioElement.src = "";
      this.audioElement = null;
    }

    if (this.mediaSource && this.mediaSource.readyState === "open") {
      try {
        if (this.sourceBuffer) {
          this.mediaSource.removeSourceBuffer(this.sourceBuffer);
        }
        this.mediaSource.endOfStream();
      } catch {
        // Ignore
      }
    }

    this.mediaSource = null;
    this.sourceBuffer = null;
    this.pendingChunks = [];
    this.isAppending = false;
    this.hasStartedPlayback = false;

    if (this.isPlaying) {
      this.isPlaying = false;
      this.onPlayingChange?.(false);
    }
  }

  get playing(): boolean {
    return this.isPlaying;
  }
}

// Export the HTTP player as the default/recommended option
export { HTTPStreamingTTSPlayer as StreamingTTSPlayer };


================================================
FILE: web/src/lib/swr-keys.ts
================================================
/**
 * Centralized SWR cache key registry.
 *
 * All useSWR calls and mutate() calls should reference these constants
 * instead of inline strings to prevent typos and make key usage greppable.
 *
 * For dynamic keys (e.g. per-ID endpoints), use the builder functions.
 */
export const SWR_KEYS = {
  // ── User ──────────────────────────────────────────────────────────────────
  me: "/api/me",

  // ── Health ────────────────────────────────────────────────────────────────
  health: "/api/health",

  // ── Settings ──────────────────────────────────────────────────────────────
  settings: "/api/settings",
  enterpriseSettings: "/api/enterprise-settings",
  customAnalyticsScript: "/api/enterprise-settings/custom-analytics-script",
  authType: "/api/auth/type",

  // ── Agents / Personas ─────────────────────────────────────────────────────
  personas: "/api/persona",
  persona: (id: number) => `/api/persona/${id}`,
  agentPreferences: "/api/user/assistant/preferences",
  defaultAssistantConfig: "/api/admin/default-assistant/configuration",
  personaLabels: "/api/persona/labels",

  // ── LLM Providers ─────────────────────────────────────────────────────────
  llmProviders: "/api/llm/provider",
  llmProvidersForPersona: (personaId: number) =>
    `/api/llm/persona/${personaId}/providers`,
  adminLlmProviders: "/api/admin/llm/provider",
  llmProvidersWithImageGen: "/api/admin/llm/provider?include_image_gen=true",
  wellKnownLlmProviders: "/api/admin/llm/built-in/options",
  wellKnownLlmProvider: (providerEndpoint: string) =>
    `/api/admin/llm/built-in/options/${providerEndpoint}`,

  // ── Image Generation ──────────────────────────────────────────────────────
  imageGenConfig: "/api/admin/image-generation/config",

  // ── Documents ─────────────────────────────────────────────────────────────
  documentSets: "/api/manage/document-set",
  documentSetsEditable: "/api/manage/document-set?get_editable=true",
  tags: "/api/query/valid-tags",
  connectorStatus: "/api/manage/connector-status",

  // ── Credentials & Connectors ──────────────────────────────────────────────
  adminCredentials: "/api/manage/admin/credential",
  indexingStatus: "/api/manage/admin/connector/indexing-status",
  adminConnectorStatus: "/api/manage/admin/connector/status",
  federatedConnectors: "/api/federated",

  // ── Google Connectors ─────────────────────────────────────────────────────
  googleConnectorAppCredential: (service: "gmail" | "google-drive") =>
    `/api/manage/admin/connector/${service}/app-credential`,
  googleConnectorServiceAccountKey: (service: "gmail" | "google-drive") =>
    `/api/manage/admin/connector/${service}/service-account-key`,
  googleConnectorCredentials: (service: "gmail" | "google-drive") =>
    `/api/manage/admin/connector/${service}/credentials`,
  googleConnectorPublicCredential: (service: "gmail" | "google-drive") =>
    `/api/manage/admin/connector/${service}/public-credential`,
  googleConnectorServiceAccountCredential: (
    service: "gmail" | "google-drive"
  ) => `/api/manage/admin/connector/${service}/service-account-credential`,

  // ── Search Settings ───────────────────────────────────────────────────────
  currentSearchSettings: "/api/search-settings/get-current-search-settings",
  secondarySearchSettings: "/api/search-settings/get-secondary-search-settings",

  // ── Chat Sessions ─────────────────────────────────────────────────────────
  chatSessions: "/api/chat/get-user-chat-sessions",

  // ── Projects & Files ──────────────────────────────────────────────────────
  userProjects: "/api/user/projects",
  recentFiles: "/api/user/files/recent",
  userPats: "/api/user/pats",
  notifications: "/api/notifications",

  // ── Users ─────────────────────────────────────────────────────────────────
  acceptedUsers: "/api/manage/users/accepted/all",
  invitedUsers: "/api/manage/users/invited",
  pendingTenantUsers: "/api/tenants/users/pending",
  userCounts: "/api/manage/users/counts",

  // ── API Keys ──────────────────────────────────────────────────────────────
  adminApiKeys: "/api/admin/api-key",

  // ── Groups ────────────────────────────────────────────────────────────────
  adminUserGroups: "/api/manage/admin/user-group",
  shareableGroups: "/api/manage/user-groups/minimal",
  scimToken: "/api/admin/enterprise-settings/scim/token",

  // ── MCP Servers ───────────────────────────────────────────────────────────
  adminMcpServers: "/api/admin/mcp/servers",
  mcpServers: "/api/mcp/servers",

  // ── Tools ─────────────────────────────────────────────────────────────────
  tools: "/api/tool",
  openApiTools: "/api/tool/openapi",
  oauthTokenStatus: "/api/user-oauth-token/status",

  // ── Voice ─────────────────────────────────────────────────────────────────
  voiceProviders: "/api/admin/voice/providers",
  voiceStatus: "/api/voice/status",

  // ── Build (Craft) ─────────────────────────────────────────────────────────
  buildConnectors: "/api/build/connectors",
  buildUserLibraryTree: "/api/build/user-library/tree",
  buildSessionFiles: (sessionId: string) =>
    `/api/build/sessions/${sessionId}/files?path=`,
  buildSessionOutputFiles: (sessionId: string) =>
    `/api/build/sessions/${sessionId}/files?path=outputs`,
  buildSessionWebappInfo: (sessionId: string) =>
    `/api/build/sessions/${sessionId}/webapp-info`,
  buildSessionArtifacts: (sessionId: string) =>
    `/api/build/sessions/${sessionId}/artifacts`,
  buildSessionArtifactFile: (sessionId: string, filePath: string) =>
    `/api/build/sessions/${sessionId}/artifacts/${filePath}`,
  buildSessionPptxPreview: (sessionId: string, filePath: string) =>
    `/api/build/sessions/${sessionId}/pptx-preview/${filePath}`,

  // ── Knowledge Graph ───────────────────────────────────────────────────────
  kgConfig: "/api/admin/kg/config",
  kgEntityTypes: "/api/admin/kg/entity-types",
  kgExposed: "/api/admin/kg/exposed",

  // ── OpenSearch Migration ──────────────────────────────────────────────────
  opensearchMigrationStatus: "/api/admin/opensearch-migration/status",
  opensearchMigrationRetrieval: "/api/admin/opensearch-migration/retrieval",

  // ── Token Rate Limits ─────────────────────────────────────────────────────
  globalTokenRateLimits: "/api/admin/token-rate-limits/global",
  userTokenRateLimits: "/api/admin/token-rate-limits/users",
  userGroupTokenRateLimits: "/api/admin/token-rate-limits/user-groups",
  userGroupTokenRateLimit: (groupId: number) =>
    `/api/admin/token-rate-limits/user-group/${groupId}`,

  // ── Usage Reports ─────────────────────────────────────────────────────────
  usageReport: "/api/admin/usage-report",

  // ── Web Search ────────────────────────────────────────────────────────────
  webSearchContentProviders: "/api/admin/web-search/content-providers",
  webSearchSearchProviders: "/api/admin/web-search/search-providers",

  // ── Prompt shortcuts ──────────────────────────────────────────────────────
  promptShortcuts: "/api/input_prompt",

  // ── License & Billing ─────────────────────────────────────────────────────
  license: "/api/license",
  billingInformationCloud: "/api/tenants/billing-information",
  billingInformationSelfHosted: "/api/admin/billing/billing-information",

  // ── Admin ─────────────────────────────────────────────────────────────────
  hooks: "/api/admin/hooks",
  hookSpecs: "/api/admin/hooks/specs",

  // ── Slack Bots ────────────────────────────────────────────────────────────
  slackChannels: "/api/manage/admin/slack-app/channel",
  slackBots: "/api/manage/admin/slack-app/bots",
  slackBot: (botId: number) => `/api/manage/admin/slack-app/bots/${botId}`,
  slackBotConfig: (botId: number) =>
    `/api/manage/admin/slack-app/bots/${botId}/config`,

  // ── Standard Answers (EE) ─────────────────────────────────────────────────
  standardAnswerCategories: "/api/manage/admin/standard-answer/category",
  standardAnswers: "/api/manage/admin/standard-answer",

  // ── Query History (EE) ────────────────────────────────────────────────────
  adminChatSessionHistory: "/api/admin/chat-session-history",
  adminChatSession: (id: string) => `/api/admin/chat-session-history/${id}`,

  // ── MCP Server (per-ID) ───────────────────────────────────────────────────
  adminMcpServer: (id: number) => `/api/admin/mcp/servers/${id}`,

  // ── Document Processing ───────────────────────────────────────────────────
  unstructuredApiKeySet: "/api/search-settings/unstructured-api-key-set",

  // ── Connectors ────────────────────────────────────────────────────────────
  connector: "/api/manage/connector",
} as const;


================================================
FILE: web/src/lib/time.ts
================================================
import { User } from "@/lib/types";

const conditionallyAddPlural = (noun: string, cnt: number) => {
  if (cnt > 1) {
    return `${noun}s`;
  }
  return noun;
};

export const timeAgo = (
  dateString: string | undefined | null
): string | null => {
  if (!dateString) {
    return null;
  }

  const date = new Date(dateString);
  const now = new Date();
  const secondsDiff = Math.floor((now.getTime() - date.getTime()) / 1000);

  if (secondsDiff < 60) {
    return `${secondsDiff} ${conditionallyAddPlural(
      "second",
      secondsDiff
    )} ago`;
  }

  const minutesDiff = Math.floor(secondsDiff / 60);
  if (minutesDiff < 60) {
    return `${minutesDiff} ${conditionallyAddPlural(
      "minute",
      secondsDiff
    )} ago`;
  }

  const hoursDiff = Math.floor(minutesDiff / 60);
  if (hoursDiff < 24) {
    return `${hoursDiff} ${conditionallyAddPlural("hour", hoursDiff)} ago`;
  }

  const daysDiff = Math.floor(hoursDiff / 24);
  if (daysDiff < 30) {
    return `${daysDiff} ${conditionallyAddPlural("day", daysDiff)} ago`;
  }

  const weeksDiff = Math.floor(daysDiff / 7);
  if (weeksDiff < 4) {
    return `${weeksDiff} ${conditionallyAddPlural("week", weeksDiff)} ago`;
  }

  const monthsDiff = Math.floor(daysDiff / 30);
  if (monthsDiff < 12) {
    return `${monthsDiff} ${conditionallyAddPlural("month", monthsDiff)} ago`;
  }

  const yearsDiff = Math.floor(monthsDiff / 12);
  return `${yearsDiff} ${conditionallyAddPlural("year", yearsDiff)} ago`;
};

export function localizeAndPrettify(dateString: string) {
  const date = new Date(dateString);
  return date.toLocaleString();
}

export function humanReadableFormat(dateString: string): string {
  // Create a Date object from the dateString
  const date = new Date(dateString);

  // Use Intl.DateTimeFormat to format the date
  // Specify the locale as 'en-US' and options for month, day, and year
  const formatter = new Intl.DateTimeFormat("en-US", {
    month: "long", // full month name
    day: "numeric", // numeric day
    year: "numeric", // numeric year
  });

  // Format the date and return it
  return formatter.format(date);
}

/**
 * Format a date as "Jan 15, 2025" (short month name).
 */
export function humanReadableFormatShort(date: string | Date | null): string {
  if (!date) return "";
  const d = typeof date === "string" ? new Date(date) : date;
  const formatter = new Intl.DateTimeFormat("en-US", {
    month: "short",
    day: "numeric",
    year: "numeric",
  });
  return formatter.format(d);
}

export function humanReadableFormatWithTime(datetimeString: string): string {
  // Create a Date object from the dateString
  const date = new Date(datetimeString);

  // Use Intl.DateTimeFormat to format the date
  // Specify the locale as 'en-US' and options for month, day, and year
  const formatter = new Intl.DateTimeFormat("en-US", {
    month: "long", // full month name
    day: "numeric", // numeric day
    year: "numeric", // numeric year
    hour: "numeric",
    minute: "numeric",
  });
  // Format the date and return it
  return formatter.format(date);
}

export function getSecondsUntilExpiration(
  userInfo: User | null
): number | null {
  if (!userInfo) {
    return null;
  }

  const { oidc_expiry, current_token_created_at, current_token_expiry_length } =
    userInfo;

  const now = new Date();

  let secondsUntilTokenExpiration: number | null = null;
  let secondsUntilOIDCExpiration: number | null = null;

  if (current_token_created_at && current_token_expiry_length !== undefined) {
    const createdAt = new Date(current_token_created_at);
    const expiresAt = new Date(
      createdAt.getTime() + current_token_expiry_length * 1000
    );
    secondsUntilTokenExpiration = Math.floor(
      (expiresAt.getTime() - now.getTime()) / 1000
    );
  }

  if (oidc_expiry) {
    const expiresAtFromOIDC = new Date(oidc_expiry);
    secondsUntilOIDCExpiration = Math.floor(
      (expiresAtFromOIDC.getTime() - now.getTime()) / 1000
    );
  }

  if (
    secondsUntilTokenExpiration === null &&
    secondsUntilOIDCExpiration === null
  ) {
    return null;
  }

  return Math.max(
    0,
    Math.min(
      secondsUntilTokenExpiration ?? Infinity,
      secondsUntilOIDCExpiration ?? Infinity
    )
  );
}

export type TimeFilter = "day" | "week" | "month" | "year";

export function getTimeFilterDate(filter: TimeFilter): Date | null {
  const now = new Date();
  switch (filter) {
    case "day":
      return new Date(now.getTime() - 24 * 60 * 60 * 1000);
    case "week":
      return new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
    case "month":
      return new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
    case "year":
      return new Date(now.getTime() - 365 * 24 * 60 * 60 * 1000);
    default:
      return null;
  }
}

export function formatDurationSeconds(seconds: number): string {
  const totalSeconds = Math.ceil(seconds);
  if (totalSeconds < 60) {
    return `${totalSeconds}s`;
  }
  const mins = Math.floor(totalSeconds / 60);
  const secs = totalSeconds % 60;
  return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;
}


================================================
FILE: web/src/lib/tools/fetchTools.ts
================================================
import { ToolSnapshot } from "./interfaces";
import { fetchSS } from "../utilsSS";

export async function fetchToolsSS(): Promise<ToolSnapshot[] | null> {
  try {
    const response = await fetchSS("/tool");
    if (!response.ok) {
      throw new Error(`Failed to fetch tools: ${await response.text()}`);
    }
    const tools: ToolSnapshot[] = await response.json();
    return tools;
  } catch (error) {
    console.error("Error fetching tools:", error);
    return null;
  }
}

export async function fetchToolByIdSS(
  toolId: string
): Promise<ToolSnapshot | null> {
  try {
    const response = await fetchSS(`/tool/${toolId}`);
    if (!response.ok) {
      throw new Error(
        `Failed to fetch tool with ID ${toolId}: ${await response.text()}`
      );
    }
    const tool: ToolSnapshot = await response.json();
    return tool;
  } catch (error) {
    console.error(`Error fetching tool with ID ${toolId}:`, error);
    return null;
  }
}


================================================
FILE: web/src/lib/tools/interfaces.ts
================================================
import type React from "react";
import type { IconProps } from "@opal/types";

// Generic action status for UI components
export enum ActionStatus {
  CONNECTED = "connected",
  PENDING = "pending",
  DISCONNECTED = "disconnected",
  FETCHING = "fetching",
}

export enum MCPServerStatus {
  CREATED = "CREATED",
  AWAITING_AUTH = "AWAITING_AUTH",
  FETCHING_TOOLS = "FETCHING_TOOLS",
  CONNECTED = "CONNECTED",
  DISCONNECTED = "DISCONNECTED",
}

export interface MCPServer {
  id: number;
  name: string;
  description?: string;
  server_url: string;
  owner: string;
  transport?: MCPTransportType;
  auth_type?: MCPAuthenticationType;
  auth_performer?: MCPAuthenticationPerformer;
  is_authenticated: boolean;
  user_authenticated?: boolean;
  auth_template?: any;
  admin_credentials?: Record<string, string>;
  user_credentials?: Record<string, string>;
  status: MCPServerStatus;
  last_refreshed_at?: string;
  tool_count: number;
}

export interface MCPServersResponse {
  assistant_id?: string | null;
  mcp_servers: MCPServer[];
}

export interface MCPServerCreateRequest {
  name: string;
  description?: string;
  server_url: string;
}

export interface MCPServerUpdateRequest {
  name?: string;
  description?: string;
  server_url?: string;
}

export interface MCPTool {
  id: string;
  name: string;
  description: string;
  icon?: React.FunctionComponent<IconProps>;
  isAvailable: boolean;
  isEnabled: boolean;
}

export interface MethodSpec {
  /* Defines a single method that is part of a custom tool. Each method maps to a single
  action that the LLM can choose to take. */
  name: string;
  summary: string;
  path: string;
  method: string;
  spec: Record<string, any>;
  custom_headers: { key: string; value: string }[];
}

export interface ToolSnapshot {
  id: number;
  name: string;
  display_name: string;
  description: string;

  // only specified for Custom Tools. OpenAPI schema which represents
  // the tool's API.
  definition: Record<string, any> | null;

  // only specified for Custom Tools. Custom headers to add to the tool's API requests.
  custom_headers: { key: string; value: string }[];

  // only specified for Custom Tools. ID of the tool in the codebase.
  in_code_tool_id: string | null;

  // whether to pass through the user's OAuth token as Authorization header
  passthrough_auth: boolean;

  // OAuth configuration for this tool
  oauth_config_id?: number | null;
  oauth_config_name?: string | null;

  // If this is an MCP tool, which server it belongs to
  mcp_server_id?: number | null;
  user_id?: string | null;

  // Whether the tool is enabled
  enabled: boolean;

  // Visibility settings from backend TOOL_VISIBILITY_CONFIG
  chat_selectable: boolean;
  agent_creation_selectable: boolean;
  default_enabled: boolean;
}

export enum MCPAuthenticationType {
  NONE = "NONE",
  API_TOKEN = "API_TOKEN",
  OAUTH = "OAUTH",
  PT_OAUTH = "PT_OAUTH", // Pass-Through OAuth
}

export enum MCPAuthenticationPerformer {
  ADMIN = "ADMIN",
  PER_USER = "PER_USER",
}

export interface ApiResponse<T> {
  data: T | null;
  error: string | null;
}

export interface OAuthConfig {
  id: number;
  name: string;
  authorization_url: string;
  token_url: string;
  scopes: string[] | null;
  has_client_credentials: boolean;
  tool_count: number;
  created_at: string;
  updated_at: string;
}

export enum MCPTransportType {
  STDIO = "STDIO",
  STREAMABLE_HTTP = "STREAMABLE_HTTP",
  SSE = "SSE",
}

export interface OAuthConfigCreate {
  name: string;
  authorization_url: string;
  token_url: string;
  client_id: string;
  client_secret: string;
  scopes?: string[];
  additional_params?: Record<string, any>;
}

export interface OAuthConfigUpdate {
  name?: string;
  authorization_url?: string;
  token_url?: string;
  client_id?: string;
  client_secret?: string;
  scopes?: string[];
  additional_params?: Record<string, any>;
}

export interface OAuthTokenStatus {
  oauth_config_id: number;
  expires_at: number | null;
  is_expired: boolean;
}


================================================
FILE: web/src/lib/tools/mcpService.ts
================================================
/**
 * Service layer for MCP (Model Context Protocol) related API calls
 */

import {
  MCPServer,
  MCPServerCreateRequest,
  MCPServerUpdateRequest,
  MCPServerStatus,
  ApiResponse,
  ToolSnapshot,
  MCPAuthenticationType,
  MCPAuthenticationPerformer,
} from "@/lib/tools/interfaces";
export interface ToolStatusUpdateRequest {
  tool_ids: number[];
  enabled: boolean;
}

export interface ToolStatusUpdateResponse {
  updated_count: number;
  tool_ids: number[];
}

/**
 * Delete an MCP server
 */
export async function deleteMCPServer(serverId: number): Promise<void> {
  const response = await fetch(`/api/admin/mcp/server/${serverId}`, {
    method: "DELETE",
  });

  if (!response.ok) {
    const errorText = await response.text();
    throw new Error(errorText || "Failed to delete MCP server");
  }
}

/**
 * This performs actual discovery from the MCP server and syncs to DB
 */
export async function refreshMCPServerTools(
  serverId: number
): Promise<ToolSnapshot[]> {
  // Discovers tools from MCP server, upserts to DB, and returns ToolSnapshot format
  const response = await fetch(
    `/api/admin/mcp/server/${serverId}/tools/snapshots?source=mcp`
  );
  if (!response.ok) {
    const errorText = await response.text();
    throw new Error(errorText || "Failed to refresh tools");
  }

  return await response.json();
}

/**
 * Update status (enable/disable) for one or more tools
 */
export async function updateToolsStatus(
  toolIds: number[],
  enabled: boolean
): Promise<ToolStatusUpdateResponse> {
  const response = await fetch("/api/admin/tool/status", {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      tool_ids: toolIds,
      enabled: enabled,
    } as ToolStatusUpdateRequest),
  });

  if (!response.ok) {
    const errorText = await response.text();
    throw new Error(errorText || "Failed to update tool status");
  }

  return await response.json();
}

/**
 * Update status for a single tool
 */
export async function updateToolStatus(
  toolId: number,
  enabled: boolean
): Promise<ToolStatusUpdateResponse> {
  return updateToolsStatus([toolId], enabled);
}

/**
 * Disable all tools for a specific MCP server
 */
export async function disableAllServerTools(
  toolIds: number[]
): Promise<ToolStatusUpdateResponse> {
  return updateToolsStatus(toolIds, false);
}

/**
 * Create a new MCP server with basic information
 */
export async function createMCPServer(
  data: MCPServerCreateRequest
): Promise<MCPServer> {
  const response = await fetch("/api/admin/mcp/server", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(data),
  });

  if (!response.ok) {
    const errorText = await response.text();
    throw new Error(errorText || "Failed to create MCP server");
  }

  return await response.json();
}

/**
 * Update an existing MCP server
 */
export async function updateMCPServer(
  serverId: number,
  data: MCPServerUpdateRequest
): Promise<MCPServer> {
  const response = await fetch(`/api/admin/mcp/server/${serverId}`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(data),
  });

  if (!response.ok) {
    const errorText = await response.text();
    throw new Error(errorText || "Failed to update MCP server");
  }

  return await response.json();
}

/**
 * Update the status of an MCP server
 */
export async function updateMCPServerStatus(
  serverId: number,
  status: MCPServerStatus
): Promise<void> {
  const response = await fetch(
    `/api/admin/mcp/server/${serverId}/status?status=${status}`,
    {
      method: "PATCH",
    }
  );

  if (!response.ok) {
    const errorText = await response.text();
    throw new Error(errorText || "Failed to update MCP server status");
  }
}

interface UpsertMCPServerResponse {
  server_id: number;
  server_name: string;
  server_url: string;
  auth_type: string;
  auth_performer: string;
  is_authenticated: boolean;
}

export async function upsertMCPServer(serverData: {
  name: string;
  description?: string;
  server_url: string;
  transport: string;
  auth_type: MCPAuthenticationType;
  auth_performer: MCPAuthenticationPerformer;
  api_token?: string;
  oauth_client_id?: string;
  oauth_client_secret?: string;
  auth_template?: any;
  admin_credentials?: Record<string, string>;
  existing_server_id?: number;
}): Promise<ApiResponse<UpsertMCPServerResponse>> {
  try {
    const response = await fetch("/api/admin/mcp/servers/create", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify(serverData),
    });

    if (!response.ok) {
      const errorDetail = (await response.json()).detail;
      return {
        data: null,
        error: `Failed to create MCP server: ${errorDetail}`,
      };
    }

    const result: UpsertMCPServerResponse = await response.json();
    return { data: result, error: null };
  } catch (error) {
    console.error("Error creating MCP server:", error);
    return { data: null, error: `Error creating MCP server: ${error}` };
  }
}


================================================
FILE: web/src/lib/tools/mcpUtils.tsx
================================================
import { SOURCE_METADATA_MAP } from "../sources";
import { MCPServer } from "./interfaces";
import { DatabaseIcon, FileIcon } from "@/components/icons/icons";
import type { IconProps } from "@opal/types";
import { SvgServer } from "@opal/icons";

/**
 * Get an appropriate icon for an MCP server based on its URL and name.
 * Leverages the existing SOURCE_METADATA_MAP for connector icons.
 */
export function getActionIcon(
  serverUrl: string,
  serverName: string
): React.FunctionComponent<IconProps> {
  const url = serverUrl.toLowerCase();
  const name = serverName.toLowerCase();

  for (const [sourceKey, metadata] of Object.entries(SOURCE_METADATA_MAP)) {
    const keyword = sourceKey.toLowerCase();

    if (url.includes(keyword) || name.includes(keyword)) {
      const Icon = metadata.icon;
      return Icon;
    }
  }

  if (
    url.includes("postgres") ||
    url.includes("mysql") ||
    url.includes("mongodb") ||
    url.includes("redis")
  ) {
    return DatabaseIcon;
  }
  if (url.includes("filesystem") || name.includes("file system")) {
    return FileIcon;
  }

  return SvgServer;
}


================================================
FILE: web/src/lib/tools/openApiService.ts
================================================
import { MethodSpec, ApiResponse, ToolSnapshot } from "@/lib/tools/interfaces";

const SUPPORTED_HTTP_METHODS = new Set([
  "get",
  "post",
  "put",
  "patch",
  "delete",
  "options",
  "head",
]);

const isPlainRecord = (value: unknown): value is Record<string, any> =>
  Boolean(value) && typeof value === "object" && !Array.isArray(value);

export function extractMethodSpecsFromDefinition(
  definition?: Record<string, any> | null
): MethodSpec[] {
  if (!isPlainRecord(definition) || !isPlainRecord(definition.paths)) {
    return [];
  }

  const pathEntries = Object.entries(definition.paths as Record<string, any>);
  const methods: MethodSpec[] = [];

  for (const [path, operations] of pathEntries) {
    if (!isPlainRecord(operations)) {
      continue;
    }

    for (const [methodName, spec] of Object.entries(operations)) {
      if (!isPlainRecord(spec)) {
        continue;
      }

      if (!SUPPORTED_HTTP_METHODS.has(methodName.toLowerCase())) {
        continue;
      }

      const name = spec.operationId ?? spec.operationID;
      const summary = spec.summary ?? spec.description;

      if (!name || !summary) {
        continue;
      }

      methods.push({
        name,
        summary,
        path,
        method: methodName.toUpperCase(),
        spec,
        custom_headers: [],
      });
    }
  }

  return methods;
}

export async function validateToolDefinition(toolData: {
  definition: Record<string, any>;
}): Promise<ApiResponse<MethodSpec[]>> {
  try {
    const response = await fetch(`/api/admin/tool/custom/validate`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify(toolData),
    });

    if (!response.ok) {
      const errorDetail = (await response.json()).detail;
      return { data: null, error: errorDetail };
    }

    const responseJson = await response.json();
    return { data: responseJson.methods, error: null };
  } catch (error) {
    console.error("Error validating tool:", error);
    return { data: null, error: "Unexpected error validating tool definition" };
  }
}

export async function createCustomTool(toolData: {
  name: string;
  description?: string;
  definition: Record<string, any>;
  custom_headers: { key: string; value: string }[];
  passthrough_auth: boolean;
}): Promise<ApiResponse<ToolSnapshot>> {
  try {
    const response = await fetch("/api/admin/tool/custom", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify(toolData),
    });

    if (!response.ok) {
      const errorDetail = (await response.json()).detail;
      return { data: null, error: `Failed to create tool: ${errorDetail}` };
    }

    const tool: ToolSnapshot = await response.json();
    return { data: tool, error: null };
  } catch (error) {
    console.error("Error creating tool:", error);
    return { data: null, error: "Error creating tool" };
  }
}

type ToolUpdatePayload = {
  name?: string;
  description?: string;
  definition?: Record<string, any>;
  custom_headers?: { key: string; value: string }[] | null;
  passthrough_auth?: boolean;
  oauth_config_id?: number | null;
};

export async function updateCustomTool(
  toolId: number,
  toolData: ToolUpdatePayload
): Promise<ApiResponse<ToolSnapshot>> {
  try {
    const response = await fetch(`/api/admin/tool/custom/${toolId}`, {
      method: "PUT",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify(toolData),
    });

    if (!response.ok) {
      const errorDetail = (await response.json()).detail;
      return { data: null, error: `Failed to update tool: ${errorDetail}` };
    }

    const updatedTool: ToolSnapshot = await response.json();
    return { data: updatedTool, error: null };
  } catch (error) {
    console.error("Error updating tool:", error);
    return { data: null, error: "Error updating tool" };
  }
}

export async function deleteCustomTool(
  toolId: number
): Promise<ApiResponse<boolean>> {
  try {
    const response = await fetch(`/api/admin/tool/custom/${toolId}`, {
      method: "DELETE",
      headers: {
        "Content-Type": "application/json",
      },
    });

    if (!response.ok) {
      const errorDetail = (await response.json()).detail;
      return { data: false, error: `Failed to delete tool: ${errorDetail}` };
    }

    return { data: true, error: null };
  } catch (error) {
    console.error("Error deleting tool:", error);
    return { data: false, error: "Error deleting tool" };
  }
}


================================================
FILE: web/src/lib/types.ts
================================================
import { Persona } from "@/app/admin/agents/interfaces";
import { Credential } from "./connectors/credentials";
import { Connector } from "./connectors/connectors";
import { ConnectorCredentialPairStatus } from "@/app/admin/connector/[ccPairId]/types";

export interface UserSpecificAgentPreference {
  disabled_tool_ids?: number[];
}

export type UserSpecificAgentPreferences = Record<
  number,
  UserSpecificAgentPreference
>;

export enum ThemePreference {
  LIGHT = "light",
  DARK = "dark",
  SYSTEM = "system",
}

interface UserPreferences {
  // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766
  chosen_assistants: number[] | null;
  visible_assistants: number[];
  hidden_assistants: number[];
  pinned_assistants?: number[];
  default_model: string | null;
  recent_assistants: number[];
  auto_scroll: boolean;
  shortcut_enabled: boolean;
  temperature_override_enabled: boolean;
  theme_preference: ThemePreference | null;
  chat_background: string | null;
  default_app_mode: "AUTO" | "CHAT" | "SEARCH";
  // Voice preferences
  voice_auto_send?: boolean;
  voice_auto_playback?: boolean;
  voice_playback_speed?: number;
}

export interface MemoryItem {
  id: number | null;
  content: string;
}

export interface UserPersonalization {
  name: string;
  role: string;
  memories: MemoryItem[];
  use_memories: boolean;
  enable_memory_tool: boolean;
  user_preferences: string;
}

export enum AccountType {
  STANDARD = "STANDARD",
  BOT = "BOT",
  EXT_PERM_USER = "EXT_PERM_USER",
  SERVICE_ACCOUNT = "SERVICE_ACCOUNT",
  ANONYMOUS = "ANONYMOUS",
}

export enum UserRole {
  LIMITED = "limited",
  BASIC = "basic",
  ADMIN = "admin",
  CURATOR = "curator",
  GLOBAL_CURATOR = "global_curator",
  EXT_PERM_USER = "ext_perm_user",
  SLACK_USER = "slack_user",
}

export const USER_ROLE_LABELS: Record<UserRole, string> = {
  [UserRole.BASIC]: "Basic",
  [UserRole.ADMIN]: "Admin",
  [UserRole.GLOBAL_CURATOR]: "Global Curator",
  [UserRole.CURATOR]: "Curator",
  [UserRole.LIMITED]: "Limited",
  [UserRole.EXT_PERM_USER]: "External Permissioned User",
  [UserRole.SLACK_USER]: "Slack User",
};

export enum UserStatus {
  ACTIVE = "active",
  INACTIVE = "inactive",
  INVITED = "invited",
  REQUESTED = "requested",
}

export const USER_STATUS_LABELS: Record<UserStatus, string> = {
  [UserStatus.ACTIVE]: "Active",
  [UserStatus.INACTIVE]: "Inactive",
  [UserStatus.INVITED]: "Invite Pending",
  [UserStatus.REQUESTED]: "Request to Join",
};

export const INVALID_ROLE_HOVER_TEXT: Partial<Record<UserRole, string>> = {
  [UserRole.BASIC]: "Basic users can't perform any admin actions",
  [UserRole.ADMIN]: "Admin users can perform all admin actions",
  [UserRole.GLOBAL_CURATOR]:
    "Global Curator users can perform admin actions for all groups they are a member of",
  [UserRole.CURATOR]: "Curator role must be assigned in the Groups tab",
  [UserRole.SLACK_USER]:
    "This role is automatically assigned to users who only use Onyx via Slack",
};

export interface User {
  id: string;
  email: string;
  is_active: boolean;
  is_superuser: boolean;
  is_verified: boolean;
  role: UserRole;
  preferences: UserPreferences;
  current_token_created_at?: Date;
  current_token_expiry_length?: number;
  oidc_expiry?: Date;
  is_cloud_superuser?: boolean;
  team_name: string | null;
  is_anonymous_user?: boolean;
  // If user does not have a configured password
  // (i.e.) they are using an oauth flow
  // or are in a no-auth situation
  // we don't want to show them things like the reset password
  // functionality
  password_configured?: boolean;
  tenant_info?: TenantInfo | null;
  personalization?: UserPersonalization;
}

export interface TenantInfo {
  new_tenant?: NewTenantInfo | null;
  invitation?: NewTenantInfo | null;
}

export interface NewTenantInfo {
  tenant_id: string;
  number_of_users: number;
}

export interface AllUsersResponse {
  accepted: User[];
  invited: User[];
  slack_users: User[];
  accepted_pages: number;
  invited_pages: number;
  slack_users_pages: number;
}

export interface AcceptedUserSnapshot {
  id: string;
  email: string;
  role: UserRole;
  is_active: boolean;
}

export interface InvitedUserSnapshot {
  email: string;
}

export interface MinimalUserSnapshot {
  id: string;
  email: string;
}

export type ValidInputTypes =
  | "load_state"
  | "poll"
  | "event"
  | "slim_retrieval";
export type ValidStatuses =
  | "invalid"
  | "success"
  | "completed_with_errors"
  | "canceled"
  | "failed"
  | "in_progress"
  | "not_started";
export type TaskStatus = "PENDING" | "STARTED" | "SUCCESS" | "FAILURE";
export type Feedback = "like" | "dislike" | "mixed";
export type AccessType = "public" | "private" | "sync";
export type ProcessingMode = "REGULAR" | "FILE_SYSTEM";
export type SessionType = "Chat" | "Search" | "Slack";

export interface DocumentBoostStatus {
  document_id: string;
  semantic_id: string;
  link: string;
  boost: number;
  hidden: boolean;
}

export interface FailedConnectorIndexingStatus {
  cc_pair_id: number;
  name: string;
  error_msg: string | null;
  is_deletable: boolean;
  connector_id: number;
  credential_id: number;
}

export interface IndexAttemptSnapshot {
  id: number;
  status: ValidStatuses | null;
  from_beginning: boolean;
  new_docs_indexed: number;
  docs_removed_from_index: number;
  total_docs_indexed: number;
  error_msg: string | null;
  error_count: number;
  full_exception_trace: string | null;
  time_started: string | null;
  time_updated: string;
}

export interface ConnectorStatus<ConnectorConfigType, ConnectorCredentialType> {
  cc_pair_id: number;
  name: string;
  connector: Connector<ConnectorConfigType>;
  credential: Credential<ConnectorCredentialType>;
  access_type: AccessType;
  groups: number[];
}

export interface ConnectorIndexingStatus<
  ConnectorConfigType,
  ConnectorCredentialType,
> extends ConnectorStatus<ConnectorConfigType, ConnectorCredentialType> {
  // Inlcude data only necessary for indexing statuses in admin page
  last_success: string | null;
  last_status: ValidStatuses | null;
  last_finished_status: ValidStatuses | null;
  cc_pair_status: ConnectorCredentialPairStatus;
  in_repeated_error_state: boolean;
  latest_index_attempt: IndexAttemptSnapshot | null;
  docs_indexed: number;
}

export interface ConnectorIndexingStatusLite {
  cc_pair_id: number;
  name: string;
  source: ValidSources;
  access_type: AccessType;
  in_progress: boolean;
  cc_pair_status: ConnectorCredentialPairStatus;
  last_finished_status: ValidStatuses | null;
  last_status: ValidStatuses | null;
  last_success: string | null;
  is_editable: boolean;
  docs_indexed: number;
  in_repeated_error_state: boolean;
  latest_index_attempt_docs_indexed: number | null;
}

export interface FederatedConnectorStatus {
  id: number;
  source: ValidSources;
  name: string;
}

export interface SourceSummary {
  total_connectors: number;
  active_connectors: number;
  public_connectors: number;
  total_docs_indexed: number;
}

export interface ConnectorIndexingStatusLiteResponse {
  source: ValidSources;
  summary: SourceSummary;
  current_page: number;
  total_pages: number;
  indexing_statuses: (ConnectorIndexingStatusLite | FederatedConnectorStatus)[];
}

export interface FederatedConnectorDetail {
  id: number;
  source: ValidSources.FederatedSlack;
  name: string;
  credentials: Record<string, any>;
  config: Record<string, any>;
  oauth_token_exists: boolean;
  oauth_token_expires_at: string | null;
  document_sets: Array<{
    id: number;
    name: string;
    entities: Record<string, any>;
  }>;
}

export interface OAuthPrepareAuthorizationResponse {
  url: string;
}

export interface OAuthBaseCallbackResponse {
  success: boolean;
  message: string;
  finalize_url: string | null;
  redirect_on_success: string;
}

export interface OAuthSlackCallbackResponse extends OAuthBaseCallbackResponse {
  team_id: string;
  authed_user_id: string;
}

export interface ConfluenceAccessibleResource {
  id: string;
  name: string;
  url: string;
  scopes: string[];
  avatarUrl: string;
}

export interface OAuthConfluencePrepareFinalizationResponse {
  success: boolean;
  message: string;
  accessible_resources: ConfluenceAccessibleResource[];
}

export interface OAuthConfluenceFinalizeResponse {
  success: boolean;
  message: string;
  redirect_url: string;
}

export interface CCPairBasicInfo {
  has_successful_run: boolean;
  source: ValidSources;
  status: ConnectorCredentialPairStatus;
}

export type ConnectorSummary = {
  count: number;
  active: number;
  public: number;
  totalDocsIndexed: number;
  errors: number; // New field for error count
};

export type GroupedConnectorSummaries = Record<ValidSources, ConnectorSummary>;

// DELETION

export interface DeletionAttemptSnapshot {
  connector_id: number;
  credential_id: number;
  status: TaskStatus;
}

// DOCUMENT SETS
export interface CCPairDescriptor<ConnectorType, CredentialType> {
  id: number;
  name: string;
  connector: Connector<ConnectorType>;
  credential: Credential<CredentialType>;
  access_type: AccessType;
}

export interface FederatedConnectorConfig {
  federated_connector_id: number;
  entities: Record<string, any>;
}

export interface FederatedConnectorDescriptor {
  id: number;
  name: string;
  source: string;
  entities: Record<string, any>;
}

// Simplified interfaces with minimal data
export interface CCPairSummary {
  id: number;
  name: string;
  source: ValidSources;
  access_type: AccessType;
}

export interface FederatedConnectorSummary {
  id: number;
  name: string;
  source: string;
  entities: Record<string, any>;
}

export interface DocumentSetSummary {
  id: number;
  name: string;
  description: string;
  cc_pair_summaries: CCPairSummary[];
  is_up_to_date: boolean;
  is_public: boolean;
  users: string[];
  groups: number[];
  federated_connector_summaries: FederatedConnectorSummary[];
}

export interface Tag {
  tag_key: string;
  tag_value: string;
  source: ValidSources;
}

// STANDARD ANSWERS
export interface StandardAnswerCategory {
  id: number;
  name: string;
}

export interface StandardAnswer {
  id: number;
  keyword: string;
  answer: string;
  match_regex: boolean;
  match_any_keywords: boolean;
  categories: StandardAnswerCategory[];
}

// SLACK BOT CONFIGS

export type AnswerFilterOption =
  | "well_answered_postfilter"
  | "questionmark_prefilter";

export interface ChannelConfig {
  channel_name: string;
  respond_tag_only?: boolean;
  respond_to_bots?: boolean;
  is_ephemeral?: boolean;
  show_continue_in_web_ui?: boolean;
  respond_member_group_list?: string[];
  answer_filters?: AnswerFilterOption[];
  follow_up_tags?: string[];
  disabled?: boolean;
}

export type SlackBotResponseType = "quotes" | "citations";

export interface SlackChannelConfig {
  id: number;
  slack_bot_id: number;
  persona_id: number | null;
  persona: Persona | null;
  channel_config: ChannelConfig;
  enable_auto_filters: boolean;
  standard_answer_categories: StandardAnswerCategory[];
  is_default: boolean;
}

export interface SlackChannelDescriptor {
  id: string;
  name: string;
}

export type SlackBot = {
  id: number;
  name: string;
  enabled: boolean;
  configs_count: number;
  slack_channel_configs: Array<{
    id: number;
    is_default: boolean;
    channel_config: {
      channel_name: string;
    };
  }>;
  bot_token: string;
  app_token: string;
  user_token?: string;
};

export interface SlackBotTokens {
  bot_token: string;
  app_token: string;
  user_token?: string;
}

/* EE Only Types */
export interface UserGroup {
  id: number;
  name: string;
  users: User[];
  curator_ids: string[];
  cc_pairs: CCPairDescriptor<any, any>[];
  document_sets: DocumentSetSummary[];
  personas: Persona[];
  is_up_to_date: boolean;
  is_up_for_deletion: boolean;
  is_default: boolean;
}

export enum ValidSources {
  Web = "web",
  GitHub = "github",
  GitLab = "gitlab",
  Slack = "slack",
  GoogleDrive = "google_drive",
  Gmail = "gmail",
  Bookstack = "bookstack",
  Outline = "outline",
  Confluence = "confluence",
  Jira = "jira",
  Productboard = "productboard",
  Slab = "slab",
  Coda = "coda",
  Notion = "notion",
  Guru = "guru",
  Gong = "gong",
  Zulip = "zulip",
  Linear = "linear",
  Hubspot = "hubspot",
  Document360 = "document360",
  File = "file",
  UserFile = "user_file",
  GoogleSites = "google_sites",
  Loopio = "loopio",
  Dropbox = "dropbox",
  Discord = "discord",
  Salesforce = "salesforce",
  Sharepoint = "sharepoint",
  Teams = "teams",
  Zendesk = "zendesk",
  Discourse = "discourse",
  Axero = "axero",
  Clickup = "clickup",
  Wikipedia = "wikipedia",
  Mediawiki = "mediawiki",
  Asana = "asana",
  S3 = "s3",
  R2 = "r2",
  GoogleCloudStorage = "google_cloud_storage",
  Xenforo = "xenforo",
  OciStorage = "oci_storage",
  NotApplicable = "not_applicable",
  IngestionApi = "ingestion_api",
  Freshdesk = "freshdesk",
  Fireflies = "fireflies",
  Egnyte = "egnyte",
  Airtable = "airtable",
  Gitbook = "gitbook",
  Highspot = "highspot",
  DrupalWiki = "drupal_wiki",
  Imap = "imap",
  Bitbucket = "bitbucket",
  TestRail = "testrail",

  // Craft-specific sources
  CraftFile = "craft_file",

  // Federated Connectors
  FederatedSlack = "federated_slack",
}

export const federatedSourceToRegularSource = (
  maybeFederatedSource: ValidSources
): ValidSources => {
  if (maybeFederatedSource === ValidSources.FederatedSlack) {
    return ValidSources.Slack;
  }
  return maybeFederatedSource;
};

export const validAutoSyncSources = [
  ValidSources.Confluence,
  ValidSources.Jira,
  ValidSources.GoogleDrive,
  ValidSources.Gmail,
  ValidSources.Slack,
  ValidSources.Salesforce,
  ValidSources.GitHub,
  ValidSources.Sharepoint,
  ValidSources.Teams,
] as const;

// Create a type from the array elements
export type ValidAutoSyncSource = (typeof validAutoSyncSources)[number];

export type ConfigurableSources = Exclude<
  ValidSources,
  | ValidSources.NotApplicable
  | ValidSources.IngestionApi
  | ValidSources.FederatedSlack // is part of ValiedSources.Slack
  | ValidSources.UserFile
  | ValidSources.CraftFile // User Library - managed through dedicated UI
>;

export const oauthSupportedSources: ConfigurableSources[] = [
  ValidSources.Slack,
  // NOTE: temporarily disabled until our GDrive App is approved
  // ValidSources.GoogleDrive,
  ValidSources.Confluence,
];

export type OAuthSupportedSource = (typeof oauthSupportedSources)[number];

// Federated Connector Types
export interface CredentialFieldSpec {
  type: string;
  description: string;
  required: boolean;
  default?: any;
  example?: any;
  secret: boolean;
}

export interface ConfigurationFieldSpec {
  type: string;
  description: string;
  required: boolean;
  default?: any;
  example?: any;
  secret: boolean;
  hidden_when?: Record<string, any>;
}

export interface CredentialSchemaResponse {
  credentials: Record<string, CredentialFieldSpec>;
}

export interface ConfigurationSchemaResponse {
  configuration: Record<string, ConfigurationFieldSpec>;
}

export interface FederatedConnectorCreateRequest {
  source: string;
  credentials: Record<string, any>;
  config?: Record<string, any>;
}

export interface FederatedConnectorCreateResponse {
  id: number;
  source: string;
}

export interface IndexingStatusRequest {
  secondary_index?: boolean;
  access_type_filters?: string[];
  last_status_filters?: string[];
  docs_count_operator?: ">" | "<" | "=" | null;
  docs_count_value?: number | null;
  source_to_page?: Record<ValidSources, number>;
  source?: ValidSources;
  get_all_connectors?: boolean;
}


================================================
FILE: web/src/lib/typingUtils.ts
================================================
import { useEffect } from "react";

type Handler = (event: React.KeyboardEvent) => void;

export function handleKeyPress(
  requestedKey: string,
  callback?: Handler,
  passthrough?: Handler
): Handler {
  return (event) => {
    const func = event.key === requestedKey ? callback : passthrough;
    func?.(event);
  };
}

export function handleEnterPress(
  callback?: Handler,
  passthrough?: Handler
): Handler {
  return handleKeyPress("Enter", callback, passthrough);
}

export function useEscapePress(callback: () => void, enabled?: boolean) {
  useEffect(() => {
    if (!enabled) return;

    const handleEscape = (event: KeyboardEvent) => {
      if (event.key === "Escape") {
        callback();
      }
    };

    document.addEventListener("keydown", handleEscape);
    return () => {
      document.removeEventListener("keydown", handleEscape);
    };
  }, [callback, enabled]);
}


================================================
FILE: web/src/lib/updateSlackBotField.ts
================================================
import { SlackBot } from "@/lib/types";

export async function updateSlackBotField(
  slackBot: SlackBot,
  field: keyof SlackBot,
  value: any
): Promise<Response> {
  return fetch(`/api/manage/admin/slack-app/bots/${slackBot.id}`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      ...slackBot,
      [field]: value,
    }),
  });
}


================================================
FILE: web/src/lib/urlBuilder.ts
================================================
type QueryParams = {
  [key: string]: string | number | boolean | null | undefined;
};

export function buildApiPath(base: string, params?: QueryParams): string {
  let queryString = "";
  if (params) {
    const entries = Object.entries(params)
      .filter(([key, value]) => value !== null && value !== undefined)
      .map(
        ([key, value]) =>
          `${encodeURIComponent(key)}=${encodeURIComponent(value!.toString())}`
      );

    if (entries.length > 0) {
      queryString = `?${entries.join("&")}`;
    }
  }

  return `${base}${queryString}`;
}


================================================
FILE: web/src/lib/user.test.ts
================================================
import { getUserInitials } from "@/lib/user";

describe("getUserInitials", () => {
  it("returns first letters of first two name parts", () => {
    expect(getUserInitials("Alice Smith", "alice@example.com")).toBe("AS");
  });

  it("returns first two chars of a single-word name", () => {
    expect(getUserInitials("Alice", "alice@example.com")).toBe("AL");
  });

  it("handles three-word names (uses first two)", () => {
    expect(getUserInitials("Alice B. Smith", "alice@example.com")).toBe("AB");
  });

  it("falls back to email local part with dot separator", () => {
    expect(getUserInitials(null, "alice.smith@example.com")).toBe("AS");
  });

  it("falls back to email local part with underscore separator", () => {
    expect(getUserInitials(null, "alice_smith@example.com")).toBe("AS");
  });

  it("falls back to email local part with hyphen separator", () => {
    expect(getUserInitials(null, "alice-smith@example.com")).toBe("AS");
  });

  it("uses first two chars of email local if no separator", () => {
    expect(getUserInitials(null, "alice@example.com")).toBe("AL");
  });

  it("returns null for empty email local part", () => {
    expect(getUserInitials(null, "@example.com")).toBeNull();
  });

  it("uppercases the result", () => {
    expect(getUserInitials("john doe", "jd@test.com")).toBe("JD");
  });

  it("trims whitespace from name", () => {
    expect(getUserInitials("  Alice Smith  ", "a@test.com")).toBe("AS");
  });

  it("returns null for numeric name parts", () => {
    expect(getUserInitials("Alice 1st", "x@test.com")).toBeNull();
  });

  it("returns null for numeric email", () => {
    expect(getUserInitials(null, "42@domain.com")).toBeNull();
  });

  it("falls back to email when name has non-alpha chars", () => {
    expect(getUserInitials("A1", "alice@example.com")).toBe("AL");
  });
});


================================================
FILE: web/src/lib/user.ts
================================================
import { User } from "@/lib/types";

export const checkUserIsNoAuthUser = (userId: string) => {
  return userId === "__no_auth_user__";
};

export const getCurrentUser = async (): Promise<User | null> => {
  const response = await fetch("/api/me", {
    credentials: "include",
  });
  if (!response.ok) {
    return null;
  }
  const user = await response.json();
  return user;
};

export const logout = async (): Promise<Response> => {
  const response = await fetch("/auth/logout", {
    method: "POST",
    credentials: "include",
  });
  return response;
};

export const basicLogin = async (
  email: string,
  password: string
): Promise<Response> => {
  const params = new URLSearchParams([
    ["username", email],
    ["password", password],
  ]);

  const response = await fetch("/api/auth/login", {
    method: "POST",
    credentials: "include",
    headers: {
      "Content-Type": "application/x-www-form-urlencoded",
    },
    body: params,
  });
  return response;
};

export const basicSignup = async (
  email: string,
  password: string,
  referralSource?: string,
  captchaToken?: string
) => {
  const headers: Record<string, string> = {
    "Content-Type": "application/json",
  };

  // Add captcha token to headers if provided
  if (captchaToken) {
    headers["X-Captcha-Token"] = captchaToken;
  }

  const response = await fetch("/api/auth/register", {
    method: "POST",
    credentials: "include",
    headers,
    body: JSON.stringify({
      email,
      username: email,
      password,
      referral_source: referralSource,
      captcha_token: captchaToken,
    }),
  });
  return response;
};

export interface CustomRefreshTokenResponse {
  access_token: string;
  refresh_token: string;
  session: {
    exp: number;
  };
  userinfo: {
    sub: string;
    familyName: string;
    givenName: string;
    fullName: string;
    userId: string;
    email: string;
  };
}

export async function refreshToken(
  customRefreshUrl: string
): Promise<CustomRefreshTokenResponse | null> {
  try {
    console.debug("Sending request to custom refresh URL");
    // support both absolute and relative
    const url = customRefreshUrl.startsWith("http")
      ? new URL(customRefreshUrl)
      : new URL(customRefreshUrl, window.location.origin);
    url.searchParams.append("info", "json");
    url.searchParams.append("access_token_refresh_interval", "3600");

    const response = await fetch(url.toString());
    if (!response.ok) {
      console.error(`Failed to refresh token: ${await response.text()}`);
      return null;
    }

    return await response.json();
  } catch (error) {
    console.error("Error refreshing token:", error);
    throw error;
  }
}

export function getUserDisplayName(user: User | null): string {
  // Prioritize custom personal name, if set.
  if (!!user?.personalization?.name) return user.personalization.name;

  // Then, prioritize personal email.
  if (!!user?.email) {
    const atIndex = user.email.indexOf("@");
    if (atIndex > 0) {
      return user.email.substring(0, atIndex);
    }
  }

  // If nothing works, then fall back to anonymous user name
  return "Anonymous";
}

export function getUserEmail(user: User | null): string {
  // Prioritize personal email.
  if (!!user?.email) return user.email;

  // If nothing works, then fall back to anonymous email.
  return "anonymous@email.com";
}

/**
 * Derive display initials from a user's name or email.
 *
 * - If a name is provided, uses the first letter of the first two words.
 * - Falls back to the email local part, splitting on `.`, `_`, or `-`.
 * - Returns `null` when no valid alpha initials can be derived.
 */
export function getUserInitials(
  name: string | null,
  email: string
): string | null {
  if (name) {
    const words = name.trim().split(/\s+/);
    if (words.length >= 2) {
      const first = words[0]?.[0];
      const second = words[1]?.[0];
      if (first && second) {
        const result = (first + second).toUpperCase();
        if (/^[A-Z]{2}$/.test(result)) return result;
      }
      return null;
    }
    if (name.trim().length >= 1) {
      const result = name.trim().slice(0, 2).toUpperCase();
      if (/^[A-Z]{1,2}$/.test(result)) return result;
    }
  }

  const local = email.split("@")[0];
  if (!local || local.length === 0) return null;
  const parts = local.split(/[._-]/);
  if (parts.length >= 2) {
    const first = parts[0]?.[0];
    const second = parts[1]?.[0];
    if (first && second) {
      const result = (first + second).toUpperCase();
      if (/^[A-Z]{2}$/.test(result)) return result;
    }
    return null;
  }
  if (local.length >= 2) {
    const result = local.slice(0, 2).toUpperCase();
    if (/^[A-Z]{2}$/.test(result)) return result;
  }
  if (local.length === 1) {
    const result = local.toUpperCase();
    if (/^[A-Z]$/.test(result)) return result;
  }
  return null;
}


================================================
FILE: web/src/lib/userSS.ts
================================================
import { cookies } from "next/headers";
import { User } from "./types";
import { buildUrl, UrlBuilder } from "./utilsSS";
import { ReadonlyRequestCookies } from "next/dist/server/web/spec-extension/adapters/request-cookies";
import { AuthType, NEXT_PUBLIC_CLOUD_ENABLED } from "./constants";

export interface AuthTypeMetadata {
  authType: AuthType;
  autoRedirect: boolean;
  requiresVerification: boolean;
  anonymousUserEnabled: boolean | null;
  passwordMinLength: number;
  hasUsers: boolean;
  oauthEnabled: boolean;
}

export const getAuthTypeMetadataSS = async (): Promise<AuthTypeMetadata> => {
  const res = await fetch(buildUrl("/auth/type"));
  if (!res.ok) {
    throw new Error("Failed to fetch data");
  }

  const data: {
    auth_type: string;
    requires_verification: boolean;
    anonymous_user_enabled: boolean | null;
    password_min_length: number;
    has_users: boolean;
    oauth_enabled: boolean;
  } = await res.json();

  let authType: AuthType;

  // Override fastapi users auth so we can use both
  if (NEXT_PUBLIC_CLOUD_ENABLED) {
    authType = AuthType.CLOUD;
  } else {
    authType = data.auth_type as AuthType;
  }

  // for SAML / OIDC, we auto-redirect the user to the IdP when the user visits
  // Onyx in an un-authenticated state
  if (authType === AuthType.OIDC || authType === AuthType.SAML) {
    return {
      authType,
      autoRedirect: true,
      requiresVerification: data.requires_verification,
      anonymousUserEnabled: data.anonymous_user_enabled,
      passwordMinLength: data.password_min_length,
      hasUsers: data.has_users,
      oauthEnabled: data.oauth_enabled,
    };
  }
  return {
    authType,
    autoRedirect: false,
    requiresVerification: data.requires_verification,
    anonymousUserEnabled: data.anonymous_user_enabled,
    passwordMinLength: data.password_min_length,
    hasUsers: data.has_users,
    oauthEnabled: data.oauth_enabled,
  };
};

const getOIDCAuthUrlSS = async (nextUrl: string | null): Promise<string> => {
  const url = UrlBuilder.fromClientUrl("/api/auth/oidc/authorize");
  if (nextUrl) {
    url.addParam("next", nextUrl);
  }
  url.addParam("redirect", true);

  return url.toString();
};

const getGoogleOAuthUrlSS = async (nextUrl: string | null): Promise<string> => {
  const url = UrlBuilder.fromClientUrl("/api/auth/oauth/authorize");
  if (nextUrl) {
    url.addParam("next", nextUrl);
  }
  url.addParam("redirect", true);

  return url.toString();
};

const getSAMLAuthUrlSS = async (nextUrl: string | null): Promise<string> => {
  const url = UrlBuilder.fromInternalUrl("/auth/saml/authorize");
  if (nextUrl) {
    url.addParam("next", nextUrl);
  }

  const res = await fetch(url.toString());
  if (!res.ok) {
    throw new Error("Failed to fetch data");
  }

  const data: { authorization_url: string } = await res.json();
  return data.authorization_url;
};

export const getAuthUrlSS = async (
  authType: AuthType,
  nextUrl: string | null
): Promise<string> => {
  // Returns the auth url for the given auth type

  switch (authType) {
    case AuthType.BASIC:
      return "";
    case AuthType.GOOGLE_OAUTH: {
      return await getGoogleOAuthUrlSS(nextUrl);
    }
    case AuthType.CLOUD: {
      return await getGoogleOAuthUrlSS(nextUrl);
    }
    case AuthType.SAML: {
      return await getSAMLAuthUrlSS(nextUrl);
    }
    case AuthType.OIDC: {
      return await getOIDCAuthUrlSS(nextUrl);
    }
  }
};

const logoutStandardSS = async (headers: Headers): Promise<Response> => {
  return await fetch(buildUrl("/auth/logout"), {
    method: "POST",
    headers: headers,
  });
};

const logoutSAMLSS = async (headers: Headers): Promise<Response> => {
  return await fetch(buildUrl("/auth/saml/logout"), {
    method: "POST",
    headers: headers,
  });
};

export const logoutSS = async (
  authType: AuthType,
  headers: Headers
): Promise<Response | null> => {
  switch (authType) {
    case AuthType.SAML: {
      return await logoutSAMLSS(headers);
    }
    default: {
      return await logoutStandardSS(headers);
    }
  }
};

export const getCurrentUserSS = async (): Promise<User | null> => {
  try {
    const cookieString = processCookies(await cookies());

    const response = await fetch(buildUrl("/me"), {
      credentials: "include",
      next: { revalidate: 0 },
      headers: {
        cookie: cookieString,
      },
    });

    if (!response.ok) {
      return null;
    }

    const user = await response.json();
    return user;
  } catch (e) {
    console.log(`Error fetching user: ${e}`);
    return null;
  }
};

export const processCookies = (cookies: ReadonlyRequestCookies): string => {
  let cookieString = cookies
    .getAll()
    .map((cookie) => `${cookie.name}=${cookie.value}`)
    .join("; ");

  // Inject debug auth cookie for local development against remote backend (only if not already present)
  if (process.env.DEBUG_AUTH_COOKIE && process.env.NODE_ENV === "development") {
    const hasAuthCookie = cookieString
      .split(/;\s*/)
      .some((c) => c.startsWith("fastapiusersauth="));
    if (!hasAuthCookie) {
      const debugCookie = `fastapiusersauth=${process.env.DEBUG_AUTH_COOKIE}`;
      cookieString = cookieString
        ? `${cookieString}; ${debugCookie}`
        : debugCookie;
    }
  }

  return cookieString;
};


================================================
FILE: web/src/lib/userSettings.ts
================================================
import { UserPersonalization } from "@/lib/types";

export async function setUserDefaultModel(
  model: string | null
): Promise<Response> {
  const response = await fetch(`/api/user/default-model`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ default_model: model }),
  });

  return response;
}

/**
 * Update the current user's personalization settings.
 */
export async function updateUserPersonalization(
  personalization: Partial<UserPersonalization>
): Promise<Response> {
  return fetch(`/api/user/personalization`, {
    method: "PATCH",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(personalization),
  });
}


================================================
FILE: web/src/lib/utils.test.ts
================================================
import { ensureHrefProtocol, transformLinkUri } from "./utils";

describe("ensureHrefProtocol", () => {
  it("adds https protocol to bare domains", () => {
    expect(ensureHrefProtocol("anthropic.com")).toBe("https://anthropic.com");
  });

  it("preserves links that already include a protocol", () => {
    expect(ensureHrefProtocol("https://anthropic.com")).toBe(
      "https://anthropic.com"
    );
    expect(ensureHrefProtocol("mailto:support@anthropic.com")).toBe(
      "mailto:support@anthropic.com"
    );
  });

  it("converts bare email addresses to mailto links", () => {
    expect(ensureHrefProtocol("support@anthropic.com")).toBe(
      "mailto:support@anthropic.com"
    );
  });
});

describe("transformLinkUri", () => {
  it("allows safe protocols", () => {
    expect(transformLinkUri("https://anthropic.com")).toBe(
      "https://anthropic.com"
    );
    expect(transformLinkUri("mailto:support@anthropic.com")).toBe(
      "mailto:support@anthropic.com"
    );
  });

  it("converts bare email addresses to mailto links", () => {
    expect(transformLinkUri("support@anthropic.com")).toBe(
      "mailto:support@anthropic.com"
    );
  });

  it("blocks unsafe protocols", () => {
    expect(transformLinkUri("javascript:alert(1)")).toBeNull();
  });
});


================================================
FILE: web/src/lib/utils.ts
================================================
import type { ComponentType } from "react";
import { clsx, type ClassValue } from "clsx";
import { twMerge } from "tailwind-merge";
import type { IconProps } from "@opal/types";
import {
  SvgImage,
  SvgFileChartPie,
  SvgFileBraces,
  SvgFileText,
} from "@opal/icons";
import { ALLOWED_URL_PROTOCOLS } from "./constants";

const URI_SCHEME_REGEX = /^[a-zA-Z][a-zA-Z\d+.-]*:/;
const BARE_EMAIL_REGEX = /^[^\s@/]+@[^\s@/:]+\.[^\s@/:]+$/;

export const INTERACTIVE_SELECTOR =
  "a, button, input, textarea, select, label, [role='button'], [tabindex]:not([tabindex='-1']), [contenteditable]:not([contenteditable='false'])";

export function cn(...inputs: ClassValue[]) {
  return twMerge(clsx(inputs));
}

export const truncateString = (str: string, maxLength: number) => {
  return str.length > maxLength ? str.slice(0, maxLength - 1) + "..." : str;
};

/**
 * Ensures an href has a protocol, adding https:// only to bare domains.
 * Converts bare email addresses to mailto: links.
 * Preserves existing protocols, relative paths, and anchors.
 */
export function ensureHrefProtocol(
  href: string | undefined
): string | undefined {
  if (!href) return href;
  const trimmedHref = href.trim();
  if (!trimmedHref) return href;

  const needsProtocol =
    !URI_SCHEME_REGEX.test(trimmedHref) &&
    !trimmedHref.startsWith("/") &&
    !trimmedHref.startsWith("#");
  if (!needsProtocol) {
    return trimmedHref;
  }

  if (BARE_EMAIL_REGEX.test(trimmedHref)) {
    return `mailto:${trimmedHref}`;
  }

  return `https://${trimmedHref}`;
}

/**
 * Custom URL transformer function for ReactMarkdown.
 * Only allows a small, safe set of protocols and strips everything else.
 * Bare email addresses are normalized to mailto: links.
 * Returning null removes the href attribute entirely.
 */
export function transformLinkUri(href: string): string | null {
  if (!href) return null;

  const trimmedHref = href.trim();
  if (!trimmedHref) return null;

  try {
    const parsedUrl = new URL(trimmedHref);
    const protocol = parsedUrl.protocol.toLowerCase();

    if (ALLOWED_URL_PROTOCOLS.some((allowed) => allowed === protocol)) {
      return trimmedHref;
    }

    return null;
  } catch {
    if (BARE_EMAIL_REGEX.test(trimmedHref)) {
      return `mailto:${trimmedHref}`;
    }

    // Allow relative URLs, but drop anything that looks like a protocol-prefixed link
    if (URI_SCHEME_REGEX.test(trimmedHref)) {
      return null;
    }

    return trimmedHref;
  }
}

export function isSubset(parent: string[], child: string[]): boolean {
  const parentSet = new Set(parent);
  return Array.from(new Set(child)).every((item) => parentSet.has(item));
}

export function trinaryLogic<T>(
  a: boolean | undefined,
  b: boolean,
  ifTrue: T,
  ifFalse: T
): T {
  const condition = a !== undefined ? a : b;
  return condition ? ifTrue : ifFalse;
}

// A convenience function to prevent propagation of click events to items higher up in the DOM tree.
//
// # Note:
// This is a desired behaviour in MANY locations, since we have buttons nested within buttons.
// When the nested button is pressed, the click event that triggered it should (in most scenarios) NOT trigger its parent button!
export function noProp(
  f?: (event: React.MouseEvent) => void
): React.MouseEventHandler {
  return (event) => {
    event.stopPropagation();
    f?.(event);
  };
}

/**
 * Extracts the file extension from a filename and returns it in uppercase.
 * Returns an empty string if no valid extension is found.
 */
export function getFileExtension(fileName: string): string {
  const idx = fileName.lastIndexOf(".");
  if (idx === -1) return "";
  const ext = fileName.slice(idx + 1).toLowerCase();
  if (ext === "txt") return "PLAINTEXT";
  return ext.toUpperCase();
}

/**
 * Centralized list of image file extensions (lowercase, no leading dots)
 */
export const IMAGE_EXTENSIONS = [
  "png",
  "jpg",
  "jpeg",
  "gif",
  "webp",
  "svg",
  "bmp",
] as const;

export type ImageExtension = (typeof IMAGE_EXTENSIONS)[number];

/**
 * Checks whether a provided extension string corresponds to an image extension.
 * Accepts values with any casing and without a leading dot.
 */
export function isImageExtension(
  extension: string | null | undefined
): boolean {
  if (!extension) {
    return false;
  }
  const normalized = extension.toLowerCase();
  return (IMAGE_EXTENSIONS as readonly string[]).includes(normalized);
}

/**
 * Formats bytes to human-readable file size.
 */
export function formatBytes(
  bytes: number | undefined,
  decimals: number = 2
): string {
  if (bytes == null) return "Unknown";
  if (bytes === 0) return "0 Bytes";

  const k = 1024;
  const dm = decimals < 0 ? 0 : decimals;
  const sizes = ["Bytes", "KB", "MB", "GB", "TB"];

  let unitIndex = Math.floor(Math.log(bytes) / Math.log(k));
  if (unitIndex < 0) unitIndex = 0;
  if (unitIndex >= sizes.length) unitIndex = sizes.length - 1;
  return (
    parseFloat((bytes / Math.pow(k, unitIndex)).toFixed(dm)) +
    " " +
    sizes[unitIndex]
  );
}

/**
 * Checks if a filename represents an image file based on its extension.
 */
export function isImageFile(fileName: string | null | undefined): boolean {
  if (!fileName) return false;
  const lowerFileName = String(fileName).toLowerCase();
  return IMAGE_EXTENSIONS.some((ext) => lowerFileName.endsWith(`.${ext}`));
}

/**
 * Typical code/config file extensions (lowercase, no leading dots)
 */
export const CODE_EXTENSIONS = [
  "ts",
  "tsx",
  "js",
  "jsx",
  "mjs",
  "cjs",
  "py",
  "pyw",
  "java",
  "kt",
  "kts",
  "c",
  "h",
  "cpp",
  "cc",
  "cxx",
  "hpp",
  "cs",
  "go",
  "rs",
  "rb",
  "php",
  "swift",
  "scala",
  "r",
  "sql",
  "sh",
  "bash",
  "zsh",
  "yaml",
  "yml",
  "json",
  "xml",
  "html",
  "htm",
  "css",
  "scss",
  "sass",
  "less",
  "lua",
  "pl",
  "vue",
  "svelte",
  "m",
  "mm",
  "md",
  "markdown",
] as const;

/**
 * Checks if a filename represents a code/config file based on its extension.
 */
export function isCodeFile(fileName: string | null | undefined): boolean {
  if (!fileName) return false;
  const lowerFileName = String(fileName).toLowerCase();
  return CODE_EXTENSIONS.some((ext) => lowerFileName.endsWith(`.${ext}`));
}

/**
 * Returns the icon component for a file based on its name/path.
 * Used for file tree and preview tab icons.
 */
export function getFileIcon(
  fileName: string | null | undefined
): ComponentType<IconProps> {
  if (!fileName) return SvgFileText;
  if (isImageFile(fileName)) return SvgImage;
  if (/\.pptx$/i.test(fileName)) return SvgFileChartPie;
  if (/\.pdf$/i.test(fileName)) return SvgFileText;
  if (isCodeFile(fileName)) return SvgFileBraces;
  return SvgFileText;
}

/**
 * Checks if a collection of files contains any non-image files.
 * Useful for determining whether image previews should be compact.
 */
export function hasNonImageFiles(
  files: Array<{ name?: string | null }>
): boolean {
  return files.some((file) => !isImageFile(file.name));
}

/**
 * Merges multiple refs into a single callback ref.
 * Useful when a component needs both an internal ref and a forwarded ref.
 */
export function mergeRefs<T>(
  ...refs: (React.Ref<T> | undefined)[]
): React.RefCallback<T> {
  return (node: T | null) => {
    refs.forEach((ref) => {
      if (typeof ref === "function") {
        ref(node);
      } else if (ref) {
        (ref as React.MutableRefObject<T | null>).current = node;
      }
    });
  };
}


================================================
FILE: web/src/lib/utilsSS.ts
================================================
import { cookies } from "next/headers";
import { HOST_URL, INTERNAL_URL } from "./constants";
import { processCookies } from "@/lib/userSS";

export function buildClientUrl(path: string) {
  if (path.startsWith("/")) {
    return `${HOST_URL}${path}`;
  }
  return `${HOST_URL}/${path}`;
}

export function buildUrl(path: string) {
  if (path.startsWith("/")) {
    return `${INTERNAL_URL}${path}`;
  }
  return `${INTERNAL_URL}/${path}`;
}

export class UrlBuilder {
  private url: URL;

  constructor(baseUrl: string) {
    try {
      this.url = new URL(baseUrl);
    } catch {
      // Handle relative URLs by prepending a base
      this.url = new URL(baseUrl, "http://placeholder.com");
    }
  }

  addParam(key: string, value: string | number | boolean): UrlBuilder {
    this.url.searchParams.set(key, String(value));
    return this;
  }

  addParams(params: Record<string, string | number | boolean>): UrlBuilder {
    Object.entries(params).forEach(([key, value]) => {
      this.url.searchParams.set(key, String(value));
    });
    return this;
  }

  toString(): string {
    // Extract just the path and query parts for relative URLs
    if (this.url.origin === "http://placeholder.com") {
      return `${this.url.pathname}${this.url.search}`;
    }
    return this.url.toString();
  }

  static fromInternalUrl(path: string): UrlBuilder {
    return new UrlBuilder(buildUrl(path));
  }

  static fromClientUrl(path: string): UrlBuilder {
    return new UrlBuilder(buildClientUrl(path));
  }
}

export async function fetchSS(url: string, options?: RequestInit) {
  const cookieString = processCookies(await cookies());

  const init: RequestInit = {
    credentials: "include",
    cache: "no-store",
    ...options,
    headers: {
      ...options?.headers,
      cookie: cookieString,
    },
  };

  return fetch(buildUrl(url), init);
}


================================================
FILE: web/src/lib/version.ts
================================================
import { buildUrl } from "./utilsSS";

// Maybe improve type-safety by creating a 'VersionType' instead of generic string
export const getBackendVersion = async (): Promise<string | null> => {
  try {
    const res = await fetch(buildUrl("/version"));
    if (!res.ok) {
      //throw new Error("Failed to fetch data");
      return null;
    }

    const data: { backend_version: string } = await res.json();
    return data.backend_version as string;
  } catch (e) {
    console.log(`Error fetching backend version info: ${e}`);
    return null;
  }
};

// Frontend?
export const getWebVersion = (): string | null => {
  return process.env.ONYX_VERSION || "dev";
};


================================================
FILE: web/src/providers/AppBackgroundProvider.tsx
================================================
"use client";

import React, { createContext, useContext, useMemo } from "react";
import { useUser } from "@/providers/UserProvider";
import {
  CHAT_BACKGROUND_NONE,
  getBackgroundById,
  ChatBackgroundOption,
} from "@/lib/constants/chatBackgrounds";

interface AppBackgroundContextType {
  /** The full background option object, or undefined if none/invalid */
  appBackground: ChatBackgroundOption | undefined;
  /** The URL of the background image, or null if no background is set */
  appBackgroundUrl: string | null;
  /** Whether a background is currently active */
  hasBackground: boolean;
}

const AppBackgroundContext = createContext<
  AppBackgroundContextType | undefined
>(undefined);

export function AppBackgroundProvider({
  children,
}: {
  children: React.ReactNode;
}) {
  const { user } = useUser();

  const value = useMemo(() => {
    const chatBackgroundId = user?.preferences?.chat_background;
    const appBackground = getBackgroundById(chatBackgroundId ?? null);
    const hasBackground =
      !!appBackground && appBackground.src !== CHAT_BACKGROUND_NONE;
    const appBackgroundUrl = hasBackground ? appBackground.src : null;

    return {
      appBackground,
      appBackgroundUrl,
      hasBackground,
    };
  }, [user?.preferences?.chat_background]);

  return (
    <AppBackgroundContext.Provider value={value}>
      {children}
    </AppBackgroundContext.Provider>
  );
}

export function useAppBackground() {
  const context = useContext(AppBackgroundContext);
  if (context === undefined) {
    throw new Error(
      "useAppBackground must be used within an AppBackgroundProvider"
    );
  }
  return context;
}


================================================
FILE: web/src/providers/AppProvider.tsx
================================================
/**
 * AppProvider - Root Provider Composition
 *
 * This component serves as a centralized wrapper that composes all of the
 * application's context providers into a single component. It is rendered
 * at the root layout level (`app/layout.tsx`) and provides global state
 * and functionality to the entire application.
 *
 * All data is fetched client-side by individual providers via SWR hooks,
 * eliminating server-side data fetching from the root layout and preventing
 * RSC prefetch amplification.
 *
 * ## Provider Hierarchy (outermost to innermost)
 *
 * 1. **SettingsProvider** - Application settings and feature flags
 * 2. **UserProvider** - Current user authentication and profile
 * 3. **AppBackgroundProvider** - App background image/URL based on user preferences
 * 4. **ProviderContextProvider** - LLM provider configuration
 * 5. **ModalProvider** - Global modal state management
 * 6. **AppSidebarProvider** - Sidebar open/closed state
 * 7. **QueryControllerProvider** - Search/Chat mode + query lifecycle
 */
"use client";

import { UserProvider } from "@/providers/UserProvider";
import { ProviderContextProvider } from "@/components/chat/ProviderContext";
import { SettingsProvider } from "@/providers/SettingsProvider";
import { ModalProvider } from "@/components/context/ModalContext";
import { AppSidebarProvider } from "@/providers/AppSidebarProvider";
import { AppBackgroundProvider } from "@/providers/AppBackgroundProvider";
import { QueryControllerProvider } from "@/providers/QueryControllerProvider";
import ToastProvider from "@/providers/ToastProvider";

interface AppProviderProps {
  children: React.ReactNode;
}

export default function AppProvider({ children }: AppProviderProps) {
  return (
    <SettingsProvider>
      <UserProvider>
        <AppBackgroundProvider>
          <ProviderContextProvider>
            <ModalProvider>
              <AppSidebarProvider>
                <QueryControllerProvider>
                  <ToastProvider>{children}</ToastProvider>
                </QueryControllerProvider>
              </AppSidebarProvider>
            </ModalProvider>
          </ProviderContextProvider>
        </AppBackgroundProvider>
      </UserProvider>
    </SettingsProvider>
  );
}


================================================
FILE: web/src/providers/AppSidebarProvider.tsx
================================================
"use client";

import {
  createContext,
  useContext,
  useState,
  ReactNode,
  Dispatch,
  SetStateAction,
  useEffect,
} from "react";
import Cookies from "js-cookie";
import { SIDEBAR_TOGGLED_COOKIE_NAME } from "@/components/resizable/constants";

function setFoldedCookie(folded: boolean) {
  const foldedAsString = folded.toString();
  Cookies.set(SIDEBAR_TOGGLED_COOKIE_NAME, foldedAsString, { expires: 365 });
  if (typeof window !== "undefined") {
    localStorage.setItem(SIDEBAR_TOGGLED_COOKIE_NAME, foldedAsString);
  }
}

export interface AppSidebarProviderProps {
  children: ReactNode;
}

export function AppSidebarProvider({ children }: AppSidebarProviderProps) {
  const [folded, setFoldedInternal] = useState(false);

  useEffect(() => {
    const stored =
      Cookies.get(SIDEBAR_TOGGLED_COOKIE_NAME) ??
      localStorage.getItem(SIDEBAR_TOGGLED_COOKIE_NAME);
    if (stored === "true") {
      setFoldedInternal(true);
    }
  }, []);

  const setFolded: Dispatch<SetStateAction<boolean>> = (value) => {
    setFoldedInternal((prev) => {
      const newState = typeof value === "function" ? value(prev) : value;
      setFoldedCookie(newState);
      return newState;
    });
  };

  useEffect(() => {
    function handleKeyDown(event: KeyboardEvent) {
      const isMac = navigator.userAgent.toLowerCase().includes("mac");
      const isModifierPressed = isMac ? event.metaKey : event.ctrlKey;
      if (!isModifierPressed || event.key !== "e") return;

      event.preventDefault();
      setFolded((prev) => !prev);
    }

    document.addEventListener("keydown", handleKeyDown);
    return () => {
      document.removeEventListener("keydown", handleKeyDown);
    };
  }, []);

  return (
    <AppSidebarContext.Provider
      value={{
        folded,
        setFolded,
      }}
    >
      {children}
    </AppSidebarContext.Provider>
  );
}

export interface AppSidebarContextType {
  folded: boolean;
  setFolded: Dispatch<SetStateAction<boolean>>;
}

const AppSidebarContext = createContext<AppSidebarContextType | undefined>(
  undefined
);

export function useAppSidebarContext() {
  const context = useContext(AppSidebarContext);
  if (context === undefined) {
    throw new Error(
      "useAppSidebarContext must be used within an AppSidebarProvider"
    );
  }
  return context;
}


================================================
FILE: web/src/providers/CustomAnalyticsScript.tsx
================================================
"use client";

import { useEffect, useRef } from "react";
import { useSettingsContext } from "@/providers/SettingsProvider";

export default function CustomAnalyticsScript() {
  const { customAnalyticsScript } = useSettingsContext();
  const injectedRef = useRef(false);

  useEffect(() => {
    if (!customAnalyticsScript || injectedRef.current) return;
    injectedRef.current = true;

    const script = document.createElement("script");
    script.type = "text/javascript";
    script.textContent = customAnalyticsScript;
    document.head.appendChild(script);
  }, [customAnalyticsScript]);

  return null;
}


================================================
FILE: web/src/providers/DynamicMetadata.tsx
================================================
"use client";

import { useEffect, useMemo } from "react";
import { useSettingsContext } from "@/providers/SettingsProvider";

export default function DynamicMetadata() {
  const { enterpriseSettings } = useSettingsContext();

  useEffect(() => {
    const title = enterpriseSettings?.application_name || "Onyx";
    if (document.title !== title) {
      document.title = title;
    }
  }, [enterpriseSettings]);

  // Cache-buster so the favicon re-fetches after an admin uploads a new logo.
  const cacheBuster = useMemo(
    () => Date.now(),
    // eslint-disable-next-line react-hooks/exhaustive-deps
    [enterpriseSettings]
  );

  const favicon = enterpriseSettings?.use_custom_logo
    ? `/api/enterprise-settings/logo?v=${cacheBuster}`
    : "/onyx.ico";

  return <link rel="icon" href={favicon} />;
}


================================================
FILE: web/src/providers/ProductGatingWrapper.tsx
================================================
"use client";

import { ApplicationStatus } from "@/interfaces/settings";
import { useSettingsContext } from "@/providers/SettingsProvider";
import GatedContentWrapper from "@/components/GatedContentWrapper";

export default function ProductGatingWrapper({
  children,
}: {
  children: React.ReactNode;
}) {
  const { settings, settingsLoading } = useSettingsContext();
  const status = settings.application_status;

  if (settingsLoading) return null;

  if (
    status === ApplicationStatus.GATED_ACCESS ||
    status === ApplicationStatus.SEAT_LIMIT_EXCEEDED
  ) {
    return <GatedContentWrapper>{children}</GatedContentWrapper>;
  }

  return children;
}


================================================
FILE: web/src/providers/ProjectsContext.tsx
================================================
"use client";

import {
  createContext,
  useCallback,
  useContext,
  useEffect,
  useMemo,
  useState,
  useRef,
  ReactNode,
  Dispatch,
  SetStateAction,
} from "react";
import useSWR from "swr";
import { errorHandlingFetcher, skipRetryOnAuthError } from "@/lib/fetcher";
import type {
  CategorizedFiles,
  Project,
  ProjectFile,
  UserFileDeleteResult,
} from "@/app/app/projects/projectsService";
import {
  fetchProjects as svcFetchProjects,
  createProject as svcCreateProject,
  uploadFiles as svcUploadFiles,
  getRecentFiles as svcGetRecentFiles,
  getFilesInProject as svcGetFilesInProject,
  getProject as svcGetProject,
  getProjectInstructions as svcGetProjectInstructions,
  upsertProjectInstructions as svcUpsertProjectInstructions,
  getProjectDetails as svcGetProjectDetails,
  ProjectDetails,
  renameProject as svcRenameProject,
  deleteProject as svcDeleteProject,
  deleteUserFile as svcDeleteUserFile,
  getUserFileStatuses as svcGetUserFileStatuses,
  unlinkFileFromProject as svcUnlinkFileFromProject,
  linkFileToProject as svcLinkFileToProject,
  UserFileStatus,
} from "@/app/app/projects/projectsService";
import { useSearchParams } from "next/navigation";
import { SEARCH_PARAM_NAMES } from "@/app/app/services/searchParams";
import { useAppRouter } from "@/hooks/appNavigation";
import { ChatFileType } from "@/app/app/interfaces";
import { toast } from "@/hooks/useToast";
import { useProjects } from "@/lib/hooks/useProjects";
import { SettingsContext } from "@/providers/SettingsProvider";

export type { Project, ProjectFile } from "@/app/app/projects/projectsService";

// Helper to generate unique temp IDs
const generateTempId = () => {
  try {
    return `temp_${crypto.randomUUID()}`;
  } catch {
    // Extremely unlikely fallback
    return `temp_${Date.now()}_${Math.random().toString(36).slice(2, 11)}`;
  }
};

// Create optimistic file from File object
const createOptimisticFile = (
  file: File,
  projectId: number | null = null
): ProjectFile => {
  const tempId = generateTempId();
  return {
    id: tempId, // Use temp ID as the actual ID initially
    file_id: tempId,
    name: file.name,
    project_id: projectId,
    user_id: null,
    created_at: new Date().toISOString(),
    status: UserFileStatus.UPLOADING,
    file_type: file.type,
    last_accessed_at: new Date().toISOString(),
    chat_file_type: ChatFileType.DOCUMENT,
    token_count: null,
    chunk_count: null,
    temp_id: tempId, // Store temp_id for mapping later
  };
};

function buildFileKey(file: File): string {
  const namePrefix = (file.name ?? "").slice(0, 50);
  return `${file.size}|${namePrefix}`;
}

interface ProjectsContextType {
  projects: Project[];
  recentFiles: ProjectFile[];
  currentProjectDetails: ProjectDetails | null;
  currentProjectId: number | null;
  currentMessageFiles: ProjectFile[];
  beginUpload: (
    files: File[],
    projectId?: number | null,
    onSuccess?: (uploaded: CategorizedFiles) => void,
    onFailure?: (failedTempIds: string[]) => void
  ) => Promise<ProjectFile[]>;
  allRecentFiles: ProjectFile[];
  allCurrentProjectFiles: ProjectFile[];
  isLoadingProjectDetails: boolean;
  setCurrentMessageFiles: Dispatch<SetStateAction<ProjectFile[]>>;
  upsertInstructions: (instructions: string) => Promise<void>;
  fetchProjects: () => Promise<Project[]>;
  createProject: (name: string) => Promise<Project>;
  renameProject: (projectId: number, name: string) => Promise<Project>;
  deleteProject: (projectId: number) => Promise<void>;
  uploadFiles: (
    files: File[],
    projectId?: number | null
  ) => Promise<CategorizedFiles>;
  getRecentFiles: () => Promise<ProjectFile[]>;
  getFilesInProject: (projectId: number) => Promise<ProjectFile[]>;
  refreshCurrentProjectDetails: () => Promise<void>;
  refreshRecentFiles: () => Promise<void>;
  deleteUserFile: (fileId: string) => Promise<UserFileDeleteResult>;
  unlinkFileFromProject: (projectId: number, fileId: string) => Promise<void>;
  linkFileToProject?: (projectId: number, file: ProjectFile) => void;
  lastFailedFiles: ProjectFile[];
  clearLastFailedFiles: () => void;
}

const ProjectsContext = createContext<ProjectsContextType | undefined>(
  undefined
);

interface ProjectsProviderProps {
  children: ReactNode;
}

export function ProjectsProvider({ children }: ProjectsProviderProps) {
  // Use SWR hook for projects list - no more SSR initial data
  const { projects, refreshProjects } = useProjects();
  const [recentFiles, setRecentFiles] = useState<ProjectFile[]>([]);
  const [currentProjectDetails, setCurrentProjectDetails] =
    useState<ProjectDetails | null>(null);
  const searchParams = useSearchParams();
  const currentProjectIdRaw = searchParams.get(SEARCH_PARAM_NAMES.PROJECT_ID);
  const currentProjectId = currentProjectIdRaw
    ? Number.parseInt(currentProjectIdRaw)
    : null;
  const [currentMessageFiles, setCurrentMessageFiles] = useState<ProjectFile[]>(
    []
  );
  const pollIntervalRef = useRef<number | null>(null);
  const isPollingRef = useRef<boolean>(false);
  const [lastFailedFiles, setLastFailedFiles] = useState<ProjectFile[]>([]);
  const [trackedUploadIds, setTrackedUploadIds] = useState<Set<string>>(
    new Set()
  );
  const [allRecentFiles, setAllRecentFiles] = useState<ProjectFile[]>([]);
  const [allCurrentProjectFiles, setAllCurrentProjectFiles] = useState<
    ProjectFile[]
  >([]);
  const [isLoadingProjectDetails, setIsLoadingProjectDetails] = useState(false);
  const projectToUploadFilesMapRef = useRef<Map<number, ProjectFile[]>>(
    new Map()
  );
  const route = useAppRouter();
  const settingsContext = useContext(SettingsContext);

  // SWR-backed fetch for recent files. Deduplicates across all mounts and
  // handles React StrictMode double-invocation without firing duplicate requests.
  const { data: recentFilesData, mutate: mutateRecentFiles } = useSWR<
    ProjectFile[]
  >("/api/user/files/recent", errorHandlingFetcher, {
    revalidateOnFocus: false,
    dedupingInterval: 60_000,
    onErrorRetry: skipRetryOnAuthError,
    onError: (err) =>
      console.error("[ProjectsContext] recent files fetch failed:", err),
  });
  // Track whether allRecentFiles has been seeded from the initial server fetch.
  // Subsequent updates come through the merge effect below, not a full reset.
  const hasInitializedAllRecentFilesRef = useRef(false);

  // Use SWR's mutate to refresh projects - returns the new data
  const fetchProjects = useCallback(async (): Promise<Project[]> => {
    try {
      const result = await refreshProjects();
      return result ?? [];
    } catch (err) {
      return [];
    }
  }, [refreshProjects]);

  // Load full details for current project
  const refreshCurrentProjectDetails = useCallback(async () => {
    if (currentProjectId) {
      setIsLoadingProjectDetails(true);
      try {
        const details = await svcGetProjectDetails(currentProjectId);
        await fetchProjects();
        setCurrentProjectDetails(details);
        setAllCurrentProjectFiles(details.files || []);
        if (projectToUploadFilesMapRef.current.has(currentProjectId)) {
          setAllCurrentProjectFiles((prev) => [
            ...prev,
            ...(projectToUploadFilesMapRef.current.get(currentProjectId) || []),
          ]);
        }
      } finally {
        setIsLoadingProjectDetails(false);
      }
    }
  }, [
    fetchProjects,
    currentProjectId,
    setCurrentProjectDetails,
    projectToUploadFilesMapRef,
  ]);

  const upsertInstructions = useCallback(
    async (instructions: string) => {
      if (!currentProjectId) {
        throw new Error("No project selected");
      }
      await svcUpsertProjectInstructions(currentProjectId, instructions);
      await refreshCurrentProjectDetails();
    },
    [currentProjectId, refreshCurrentProjectDetails]
  );

  const createProject = useCallback(
    async (name: string): Promise<Project> => {
      try {
        const project: Project = await svcCreateProject(name);
        // Navigate to the newly created project's page
        route({ projectId: project.id });
        // Refresh list to keep order consistent with backend
        await fetchProjects();
        return project;
      } catch (err) {
        const message =
          err instanceof Error ? err.message : "Failed to create project";
        throw err;
      }
    },
    [fetchProjects, route]
  );

  const renameProject = useCallback(
    async (projectId: number, name: string): Promise<Project> => {
      // Optimistically update project details UI if this is the current project
      if (currentProjectId === projectId) {
        setCurrentProjectDetails((prev) =>
          prev ? { ...prev, project: { ...prev.project, name } } : prev
        );
      }

      try {
        const updated = await svcRenameProject(projectId, name);
        // Refresh to get canonical state from server (SWR handles projects list)
        await fetchProjects();
        if (currentProjectId === projectId) {
          await refreshCurrentProjectDetails();
        }
        return updated;
      } catch (err) {
        // Refresh to restore on failure
        await fetchProjects();
        if (currentProjectId === projectId) {
          await refreshCurrentProjectDetails();
        }
        const message =
          err instanceof Error ? err.message : "Failed to rename project";
        throw err;
      }
    },
    [fetchProjects, currentProjectId, refreshCurrentProjectDetails]
  );

  const deleteProject = useCallback(
    async (projectId: number): Promise<void> => {
      try {
        await svcDeleteProject(projectId);
        await fetchProjects();
        if (currentProjectId === projectId) {
          setCurrentProjectDetails(null);
          setAllCurrentProjectFiles([]);
          projectToUploadFilesMapRef.current.delete(projectId);
          route();
        }
      } catch (err) {
        throw err;
      }
    },
    [fetchProjects, currentProjectId, projectToUploadFilesMapRef, route]
  );

  const getRecentFiles = useCallback(async (): Promise<ProjectFile[]> => {
    try {
      const data: ProjectFile[] = await svcGetRecentFiles();
      return data;
    } catch (err) {
      const message =
        err instanceof Error ? err.message : "Failed to fetch recent files";
      return [];
    }
  }, []);

  const refreshRecentFiles = useCallback(async () => {
    await mutateRecentFiles();
  }, [mutateRecentFiles]);

  const getTempIdMap = (files: File[], optimisticFiles: ProjectFile[]) => {
    const tempIdMap = new Map<string, string>();
    for (const f of files) {
      const tempId = optimisticFiles.find((o) => o.name === f.name)?.temp_id;
      if (tempId) {
        tempIdMap.set(buildFileKey(f), tempId);
      }
    }
    return tempIdMap;
  };

  const removeOptimisticFilesByTempIds = useCallback(
    (optimisticTempIds: Set<string>, projectId?: number | null) => {
      // Remove from recent optimistic list
      setAllRecentFiles((prev) =>
        prev.filter((f) => !f.temp_id || !optimisticTempIds.has(f.temp_id))
      );

      // Remove from current message files if present
      setCurrentMessageFiles((prev) =>
        prev.filter((f) => !f.temp_id || !optimisticTempIds.has(f.temp_id))
      );

      // Remove from project optimistic list
      if (projectId) {
        setAllCurrentProjectFiles((prev) =>
          prev.filter((f) => !f.temp_id || !optimisticTempIds.has(f.temp_id))
        );

        // Clear the tracked optimistic files for this project
        let projectIdToFiles: ProjectFile[] =
          projectToUploadFilesMapRef.current.get(projectId) || [];
        projectIdToFiles = projectIdToFiles.filter(
          (f: ProjectFile) => !f.temp_id || !optimisticTempIds.has(f.temp_id)
        );
        projectToUploadFilesMapRef.current.set(projectId, projectIdToFiles);
      }
    },
    [projectToUploadFilesMapRef]
  );

  const beginUpload = useCallback(
    async (
      files: File[],
      projectId?: number | null,
      onSuccess?: (uploaded: CategorizedFiles) => void,
      onFailure?: (failedTempIds: string[]) => void
    ): Promise<ProjectFile[]> => {
      const rawMax = settingsContext?.settings?.user_file_max_upload_size_mb;

      const oversizedFiles =
        rawMax && rawMax > 0
          ? files.filter((file) => file.size > rawMax * 1024 * 1024)
          : [];
      const validFiles =
        rawMax && rawMax > 0
          ? files.filter((file) => file.size <= rawMax * 1024 * 1024)
          : files;

      if (oversizedFiles.length > 0) {
        const skippedNames = oversizedFiles.map((file) => file.name).join(", ");
        toast.warning(
          `Skipped ${oversizedFiles.length} oversized file(s) (>${rawMax} MB): ${skippedNames}`
        );
      }

      if (validFiles.length === 0) {
        onFailure?.([]);
        return [];
      }

      const optimisticFiles = validFiles.map((f) =>
        createOptimisticFile(f, projectId)
      );
      const tempIdMap = getTempIdMap(validFiles, optimisticFiles);
      setAllRecentFiles((prev) => [...optimisticFiles, ...prev]);
      if (projectId) {
        setAllCurrentProjectFiles((prev) => [...optimisticFiles, ...prev]);
        projectToUploadFilesMapRef.current.set(projectId, optimisticFiles);
      }
      svcUploadFiles(validFiles, projectId, tempIdMap)
        .then((uploaded) => {
          const uploadedFiles = uploaded.user_files || [];
          const tempIdToUploadedFileMap = new Map(
            uploadedFiles.map((f) => [f.temp_id, f])
          );

          setAllRecentFiles((prev) =>
            prev.map((f) => {
              if (f.temp_id) {
                const u = tempIdToUploadedFileMap.get(f.temp_id);
                return u ? { ...f, ...u } : f;
              }
              return f;
            })
          );
          setCurrentMessageFiles((prev) =>
            prev.map((f) => {
              if (f.temp_id) {
                const u = tempIdToUploadedFileMap.get(f.temp_id);
                return u ? { ...f, ...u } : f;
              }
              return f;
            })
          );
          if (projectId) {
            setAllCurrentProjectFiles((prev) =>
              prev.map((f) => {
                if (f.temp_id) {
                  const u = tempIdToUploadedFileMap.get(f.temp_id);
                  return u ? { ...f, ...u } : f;
                }
                return f;
              })
            );
            projectToUploadFilesMapRef.current.set(projectId, []);
          }
          const rejected_files = uploaded.rejected_files || [];

          if (rejected_files.length > 0) {
            const uniqueReasons = new Set(
              rejected_files.map((rejected_file) => rejected_file.reason)
            );
            const detailsParts = Array.from(uniqueReasons);

            toast.warning(
              `Some files were not uploaded. ${detailsParts.join(" | ")}`
            );

            const failedNameSet = new Set<string>(
              rejected_files.map((file) => file.file_name)
            );
            const failedTempIds = Array.from(
              new Set(
                optimisticFiles
                  .filter((f) => f.temp_id && failedNameSet.has(f.name))
                  .map((f) => f.temp_id as string)
              )
            );
            removeOptimisticFilesByTempIds(new Set(failedTempIds), projectId);
            if (failedTempIds.length > 0) {
              onFailure?.(failedTempIds);
            }
          }
          if (uploadedFiles.length > 0) {
            setTrackedUploadIds((prev) => {
              const next = new Set(prev);
              for (const f of uploadedFiles) next.add(f.id);
              return next;
            });
          }
          onSuccess?.(uploaded);
        })
        .catch((err) => {
          // Roll back optimistic inserts on failure
          const optimisticTempIds = new Set(
            optimisticFiles
              .map((f) => f.temp_id)
              .filter((id): id is string => Boolean(id))
          );

          removeOptimisticFilesByTempIds(optimisticTempIds, projectId);

          toast.error("Failed to upload files");

          onFailure?.(Array.from(optimisticTempIds));
        })
        .finally(() => {
          if (projectId && currentProjectId === projectId) {
            refreshCurrentProjectDetails();
          }
          refreshRecentFiles();
        });
      return optimisticFiles;
    },
    [
      currentProjectId,
      refreshCurrentProjectDetails,
      refreshRecentFiles,
      removeOptimisticFilesByTempIds,
      settingsContext,
    ]
  );

  const uploadFiles = useCallback(
    async (
      files: File[],
      projectId?: number | null
    ): Promise<CategorizedFiles> => {
      try {
        const uploaded: CategorizedFiles = await svcUploadFiles(
          files,
          projectId
        );
        const uploadedFiles = uploaded.user_files || [];
        // Track these uploaded file IDs for targeted polling
        if (uploadedFiles.length > 0) {
          setTrackedUploadIds((prev) => {
            const next = new Set(prev);
            for (const f of uploadedFiles) next.add(f.id);
            return next;
          });
        }

        // Refresh canonical sources instead of manual merges
        if (projectId && currentProjectId === projectId) {
          await refreshCurrentProjectDetails();
        }
        await refreshRecentFiles();
        return uploaded;
      } catch (err) {
        throw err;
      }
    },
    [currentProjectId, refreshCurrentProjectDetails, refreshRecentFiles]
  );

  const getFilesInProject = useCallback(
    async (projectId: number): Promise<ProjectFile[]> => {
      try {
        const data: ProjectFile[] = await svcGetFilesInProject(projectId);
        return data;
      } catch (err) {
        const message =
          err instanceof Error ? err.message : "Failed to fetch project files";
        return [];
      }
    },
    []
  );

  // Sync SWR-fetched recent files into local state. On first arrival, seed
  // allRecentFiles as well; subsequent updates only touch recentFiles so the
  // merge effect below can non-destructively apply them to allRecentFiles.
  useEffect(() => {
    if (!recentFilesData) return;
    setRecentFiles(recentFilesData);
    if (!hasInitializedAllRecentFilesRef.current) {
      setAllRecentFiles(recentFilesData);
      hasInitializedAllRecentFilesRef.current = true;
    }
  }, [recentFilesData]);

  useEffect(() => {
    setAllRecentFiles((prev) =>
      prev.map((f) => {
        const newFile = recentFiles.find((f2) => f2.id === f.id);
        return newFile ? { ...f, ...newFile } : f;
      })
    );
  }, [recentFiles]);

  // Clear project details when switching projects to show skeleton
  useEffect(() => {
    setCurrentProjectDetails(null);
    setAllCurrentProjectFiles([]);
  }, [currentProjectId]);

  useEffect(() => {
    if (currentProjectId) {
      refreshCurrentProjectDetails();
    }
  }, [currentProjectId, refreshCurrentProjectDetails]);

  // Targeted polling for tracked uploaded files only
  useEffect(() => {
    const ids = Array.from(trackedUploadIds);
    const shouldPoll = ids.length > 0;

    const poll = async () => {
      if (isPollingRef.current) return;
      isPollingRef.current = true;
      try {
        const statuses = await svcGetUserFileStatuses(ids);
        if (!statuses || statuses.length === 0) return;

        // Build maps for quick lookup
        const statusById = new Map(statuses.map((f) => [f.id, f]));

        // Update currentMessageFiles inline based on polled statuses
        setCurrentMessageFiles((prev) => {
          let changed = false;
          const next: ProjectFile[] = [];
          const newlyFailedLocal: ProjectFile[] = [];
          for (const f of prev) {
            const latest = statusById.get(f.id);
            if (latest) {
              const latestStatus = String(latest.status).toLowerCase();
              if (latestStatus === "failed") {
                if (String(f.status).toLowerCase() !== "failed") {
                  newlyFailedLocal.push(latest);
                }
                changed = true;
                continue;
              }
              if (
                latest.status !== f.status ||
                latest.name !== f.name ||
                latest.file_type !== f.file_type
              ) {
                next.push({ ...f, ...latest } as ProjectFile);
                changed = true;
                continue;
              }
            }
            next.push(f);
          }
          if (newlyFailedLocal.length > 0) {
            setLastFailedFiles(newlyFailedLocal);
          }
          return changed || next.length !== prev.length ? next : prev;
        });

        // Update currentProjectDetails.files with latest statuses
        setCurrentProjectDetails((prev) => {
          if (!prev || !prev.files || prev.files.length === 0) return prev;
          let changed = false;
          const nextFiles = prev.files.map((f) => {
            const latest = statusById.get(f.id);
            if (latest) {
              if (
                latest.status !== f.status ||
                latest.name !== f.name ||
                latest.file_type !== f.file_type
              ) {
                changed = true;
                return { ...f, ...latest } as ProjectFile;
              }
            }
            return f;
          });
          return changed
            ? ({ ...prev, files: nextFiles } as ProjectDetails)
            : prev;
        });

        // Update recent files list inline as well
        setRecentFiles((prev) => {
          if (prev.length === 0) return prev;
          let changed = false;
          const map = new Map(prev.map((f) => [f.id, f]));
          for (const latest of statuses) {
            const id = latest.id;
            if (map.has(id)) {
              const prevVal = map.get(id)!;
              if (
                latest.status !== prevVal.status ||
                latest.name !== prevVal.name ||
                latest.file_type !== prevVal.file_type
              ) {
                map.set(id, latest);
                changed = true;
              }
            }
          }
          return changed ? Array.from(map.values()) : prev;
        });

        // Remove completed/skipped/failed from tracking
        const remaining = new Set(trackedUploadIds);
        const newlyFailed: ProjectFile[] = [];
        for (const f of statuses) {
          const s = String(f.status).toLowerCase();
          if (s === "completed" || s === "skipped") {
            remaining.delete(f.id);
          } else if (s === "failed") {
            remaining.delete(f.id);
            newlyFailed.push(f);
          }
        }
        if (newlyFailed.length > 0) {
          setLastFailedFiles(newlyFailed);
        }
        const trackingChanged = remaining.size !== trackedUploadIds.size;
        if (trackingChanged) {
          setTrackedUploadIds(remaining);
        }

        // If all tracked uploads finished (completed or failed), do a single refresh
        if (remaining.size === 0) {
          if (currentProjectId) {
            await refreshCurrentProjectDetails();
          }
          await refreshRecentFiles();
        }
      } finally {
        isPollingRef.current = false;
      }
    };

    if (shouldPoll && pollIntervalRef.current === null) {
      // Kick once immediately, then start interval
      poll();
      pollIntervalRef.current = window.setInterval(poll, 3000);
    }

    if (!shouldPoll && pollIntervalRef.current !== null) {
      window.clearInterval(pollIntervalRef.current);
      pollIntervalRef.current = null;
    }

    return () => {
      if (pollIntervalRef.current !== null) {
        window.clearInterval(pollIntervalRef.current);
        pollIntervalRef.current = null;
      }
    };
  }, [
    trackedUploadIds,
    currentProjectId,
    refreshCurrentProjectDetails,
    refreshRecentFiles,
  ]);

  const value: ProjectsContextType = useMemo(
    () => ({
      projects,
      recentFiles,
      currentProjectDetails,
      currentProjectId,
      currentMessageFiles,
      allRecentFiles,
      allCurrentProjectFiles,
      isLoadingProjectDetails,
      beginUpload,
      setCurrentMessageFiles,
      upsertInstructions,
      fetchProjects,
      createProject,
      renameProject,
      deleteProject,
      uploadFiles,
      getRecentFiles,
      getFilesInProject,
      refreshCurrentProjectDetails,
      refreshRecentFiles,
      lastFailedFiles,
      clearLastFailedFiles: () => setLastFailedFiles([]),
      deleteUserFile: async (fileId: string) => {
        const result = await svcDeleteUserFile(fileId);
        // If no associations, backend enqueues deletion and status moves to DELETING; refresh lists
        if (!result.has_associations) {
          if (currentProjectId) {
            await refreshCurrentProjectDetails();
          }
          await refreshRecentFiles();
        }
        return result;
      },
      unlinkFileFromProject: async (projectId: number, fileId: string) => {
        const file = allCurrentProjectFiles.find((f) => f.id === fileId);
        if (!file) return;
        setAllCurrentProjectFiles((prev) =>
          prev.filter((f) => f.id !== file.id)
        );
        svcUnlinkFileFromProject(projectId, file.id).then(async (result) => {
          if (result.ok) {
            if (currentProjectId === projectId) {
              await refreshCurrentProjectDetails();
            }
            await refreshRecentFiles();
          } else {
            if (currentProjectId === projectId) {
              setAllCurrentProjectFiles((prev) => [file, ...prev]);
            }
          }
        });
      },
      linkFileToProject: async (projectId: number, file: ProjectFile) => {
        const existing = allCurrentProjectFiles.find((f) => f.id === file.id);
        if (existing) return;
        setAllCurrentProjectFiles((prev) => [file, ...prev]);
        svcLinkFileToProject(projectId, file.id).then(async (result) => {
          if (result.ok) {
            if (currentProjectId === projectId) {
              await refreshCurrentProjectDetails();
            }
            await refreshRecentFiles();
          } else {
            if (currentProjectId === projectId) {
              setAllCurrentProjectFiles((prev) =>
                prev.filter((f) => f.id !== file.id)
              );
            }
          }
        });
      },
    }),
    [
      projects,
      recentFiles,
      currentProjectDetails,
      currentProjectId,
      currentMessageFiles,
      allRecentFiles,
      allCurrentProjectFiles,
      isLoadingProjectDetails,
      beginUpload,
      setCurrentMessageFiles,
      upsertInstructions,
      fetchProjects,
      createProject,
      renameProject,
      deleteProject,
      uploadFiles,
      getRecentFiles,
      getFilesInProject,
      refreshCurrentProjectDetails,
      refreshRecentFiles,
      lastFailedFiles,
    ]
  );

  return (
    <ProjectsContext.Provider value={value}>
      {children}
    </ProjectsContext.Provider>
  );
}

export function useProjectsContext(): ProjectsContextType {
  const ctx = useContext(ProjectsContext);
  if (!ctx) {
    throw new Error(
      "useProjectsContext must be used within a ProjectsProvider"
    );
  }
  return ctx;
}


================================================
FILE: web/src/providers/QueryControllerProvider.tsx
================================================
"use client";

import { createContext, useContext } from "react";
import { eeGated } from "@/ce";
import { QueryControllerProvider as EEQueryControllerProvider } from "@/ee/providers/QueryControllerProvider";
import { SearchDocWithContent, BaseFilters } from "@/lib/search/interfaces";

export type AppMode = "auto" | "search" | "chat";

export type QueryState =
  | { phase: "idle"; appMode: AppMode }
  | { phase: "classifying" }
  | { phase: "searching" }
  | { phase: "search-results" }
  | { phase: "chat" };

export interface QueryControllerValue {
  /** Single state variable encoding both the query lifecycle phase and (when idle) the user's mode selection. */
  state: QueryState;
  /** Update the app mode. Only takes effect when idle. No-op in CE or when search is unavailable. */
  setAppMode: (mode: AppMode) => void;
  /** Search results (empty if chat or not yet searched) */
  searchResults: SearchDocWithContent[];
  /** Document IDs selected by the LLM as most relevant */
  llmSelectedDocIds: string[] | null;
  /** User-facing error message from the last search or classification request, null when idle */
  error: string | null;
  /** Submit a query - routes to search or chat based on app mode */
  submit: (
    query: string,
    onChat: (query: string) => void,
    filters?: BaseFilters
  ) => Promise<void>;
  /** Re-run the current search query with updated server-side filters */
  refineSearch: (filters: BaseFilters) => Promise<void>;
  /** Reset all state to initial values */
  reset: () => void;
}

export const QueryControllerContext = createContext<QueryControllerValue>({
  state: { phase: "idle", appMode: "chat" },
  setAppMode: () => undefined,
  searchResults: [],
  llmSelectedDocIds: null,
  error: null,
  submit: async (_q, onChat) => {
    onChat(_q);
  },
  refineSearch: async () => undefined,
  reset: () => undefined,
});

export function useQueryController(): QueryControllerValue {
  return useContext(QueryControllerContext);
}

export const QueryControllerProvider = eeGated(EEQueryControllerProvider);


================================================
FILE: web/src/providers/SWRConfigProvider.tsx
================================================
"use client";

import { SWRConfig } from "swr";
import { skipRetryOnAuthError } from "@/lib/fetcher";

export default function SWRConfigProvider({
  children,
}: {
  children: React.ReactNode;
}) {
  return (
    <SWRConfig value={{ onErrorRetry: skipRetryOnAuthError }}>
      {children}
    </SWRConfig>
  );
}


================================================
FILE: web/src/providers/SettingsProvider.tsx
================================================
"use client";

import { CombinedSettings } from "@/interfaces/settings";
import {
  createContext,
  useContext,
  useEffect,
  useState,
  useMemo,
  JSX,
} from "react";
import useCCPairs from "@/hooks/useCCPairs";
import {
  useSettings,
  useEnterpriseSettings,
  useCustomAnalyticsScript,
} from "@/hooks/useSettings";
import { HOST_URL, NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import CloudError from "@/components/errorPages/CloudErrorPage";
import ErrorPage from "@/components/errorPages/ErrorPage";
import { FetchError } from "@/lib/fetcher";

export function SettingsProvider({
  children,
}: {
  children: React.ReactNode | JSX.Element;
}) {
  const {
    settings,
    isLoading: coreSettingsLoading,
    error: settingsError,
  } = useSettings();

  // Once core settings load, check if the backend reports EE as enabled.
  // This handles deployments where NEXT_PUBLIC_ENABLE_PAID_EE_FEATURES is
  // unset but LICENSE_ENFORCEMENT_ENABLED defaults to true on the server.
  const eeEnabledRuntime =
    !coreSettingsLoading &&
    !settingsError &&
    settings.ee_features_enabled !== false;

  const {
    enterpriseSettings,
    isLoading: enterpriseSettingsLoading,
    error: enterpriseSettingsError,
  } = useEnterpriseSettings(eeEnabledRuntime);
  const customAnalyticsScript = useCustomAnalyticsScript(eeEnabledRuntime);

  const [isMobile, setIsMobile] = useState<boolean | undefined>();
  const settingsLoading = coreSettingsLoading || enterpriseSettingsLoading;
  const vectorDbEnabled =
    !coreSettingsLoading &&
    !settingsError &&
    settings.vector_db_enabled !== false;
  const { ccPairs } = useCCPairs(vectorDbEnabled);

  useEffect(() => {
    const checkMobile = () => {
      setIsMobile(window.innerWidth < 768);
    };

    checkMobile();
    window.addEventListener("resize", checkMobile);
    return () => window.removeEventListener("resize", checkMobile);
  }, []);

  /**
   * NOTE (@raunakab):
   * Whether search mode is actually available to users.
   *
   * Prefer `isSearchModeAvailable` over `settings.search_ui_enabled`.
   * The raw setting only captures the admin's *intent*. This derived value
   * also checks runtime prerequisites (connectors must exist) so that
   * consumers don't need to independently verify availability.
   */
  const isSearchModeAvailable = useMemo(
    () => settings.search_ui_enabled !== false && ccPairs.length > 0,
    [settings.search_ui_enabled, ccPairs.length]
  );

  const combinedSettings: CombinedSettings = useMemo(
    () => ({
      settings,
      enterpriseSettings,
      customAnalyticsScript,
      webVersion: settings.version ?? null,
      webDomain: HOST_URL,
      isMobile,
      isSearchModeAvailable,
      settingsLoading,
    }),
    [
      settings,
      enterpriseSettings,
      customAnalyticsScript,
      isMobile,
      isSearchModeAvailable,
      settingsLoading,
    ]
  );

  // Auth errors (401/403) are expected for unauthenticated users (e.g. login
  // page). Fall through with default settings so the app can render normally.
  const isAuthError = (err: Error | undefined) =>
    err instanceof FetchError && (err.status === 401 || err.status === 403);

  const hasFatalError =
    (settingsError && !isAuthError(settingsError)) ||
    (enterpriseSettingsError && !isAuthError(enterpriseSettingsError));

  if (hasFatalError) {
    return NEXT_PUBLIC_CLOUD_ENABLED ? <CloudError /> : <ErrorPage />;
  }

  return (
    <SettingsContext.Provider value={combinedSettings}>
      {children}
    </SettingsContext.Provider>
  );
}

export const SettingsContext = createContext<CombinedSettings | null>(null);

export function useSettingsContext() {
  const context = useContext(SettingsContext);
  if (context === null) {
    throw new Error(
      "useSettingsContext must be used within a SettingsProvider"
    );
  }
  return context;
}

export function useVectorDbEnabled(): boolean {
  const settings = useSettingsContext();
  return settings.settings.vector_db_enabled !== false;
}


================================================
FILE: web/src/providers/ToastProvider.tsx
================================================
"use client";

import { useCallback, useSyncExternalStore } from "react";
import { cn } from "@/lib/utils";
import Message from "@/refresh-components/messages/Message";
import { NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK } from "@/lib/constants";
import { toast, toastStore, MAX_VISIBLE_TOASTS } from "@/hooks/useToast";
import type { Toast, ToastLevel } from "@/hooks/useToast";

const ANIMATION_DURATION = 200; // matches tailwind fade-out-scale (0.2s)
const MAX_TOAST_MESSAGE_LENGTH = 150;

function levelProps(level: ToastLevel): Record<string, boolean> {
  switch (level) {
    case "success":
      return { success: true };
    case "error":
      return { error: true };
    case "warning":
      return { warning: true };
    case "info":
      return { info: true };
    default:
      return { default: true };
  }
}

function buildDescription(t: Toast): string | undefined {
  const parts: string[] = [];
  if (t.description) parts.push(t.description);
  if (t.level === "error" && NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK) {
    parts.push(
      "Need help? Join our community at https://discord.gg/4NA5SbzrWb for support!"
    );
  }
  return parts.length > 0 ? parts.join(" ") : undefined;
}

function ToastContainer() {
  const allToasts = useSyncExternalStore(
    toastStore.subscribe,
    toastStore.getSnapshot,
    toastStore.getSnapshot
  );

  const visible = allToasts.slice(-MAX_VISIBLE_TOASTS);

  const handleClose = useCallback((id: string) => {
    toast._markLeaving(id);
    setTimeout(() => {
      toast.dismiss(id);
    }, ANIMATION_DURATION);
  }, []);

  if (visible.length === 0) return null;

  return (
    <div
      data-testid="toast-container"
      className={cn(
        "fixed bottom-4 right-4 z-[10000]",
        "flex flex-col gap-2 items-end",
        "max-w-[420px]"
      )}
    >
      {visible.map((t) => {
        const text =
          t.message.length > MAX_TOAST_MESSAGE_LENGTH
            ? t.message.slice(0, MAX_TOAST_MESSAGE_LENGTH) + "…"
            : t.message;
        return (
          <div
            key={t.id}
            className={cn(
              t.leaving ? "animate-fade-out-scale" : "animate-fade-in-scale"
            )}
          >
            <Message
              flash
              medium
              {...levelProps(t.level ?? "info")}
              text={text}
              description={buildDescription(t)}
              close={t.dismissible}
              onClose={() => handleClose(t.id)}
              actions={t.actionLabel ? t.actionLabel : undefined}
              onAction={t.onAction}
            />
          </div>
        );
      })}
    </div>
  );
}

interface ToastProviderProps {
  children: React.ReactNode;
}

export default function ToastProvider({ children }: ToastProviderProps) {
  return (
    <>
      {children}
      <ToastContainer />
    </>
  );
}


================================================
FILE: web/src/providers/UserProvider.tsx
================================================
"use client";

import React, {
  createContext,
  useCallback,
  useContext,
  useMemo,
  useState,
  useEffect,
  useRef,
} from "react";
import {
  User,
  UserPersonalization,
  UserRole,
  ThemePreference,
} from "@/lib/types";
import { usePostHog } from "posthog-js/react";
import { SettingsContext } from "@/providers/SettingsProvider";
import { useTokenRefresh } from "@/hooks/useTokenRefresh";
import { useCurrentUser } from "@/hooks/useCurrentUser";
import {
  useAuthTypeMetadata,
  AuthTypeMetadata,
} from "@/hooks/useAuthTypeMetadata";
import { updateUserPersonalization as persistPersonalization } from "@/lib/userSettings";
import { useTheme } from "next-themes";

interface UserContextType {
  user: User | null;
  isAdmin: boolean;
  isCurator: boolean;
  refreshUser: () => Promise<void>;
  isCloudSuperuser: boolean;
  authTypeMetadata: AuthTypeMetadata;
  updateUserAutoScroll: (autoScroll: boolean) => Promise<void>;
  updateUserShortcuts: (enabled: boolean) => Promise<void>;
  toggleAgentPinnedStatus: (
    currentPinnedAgentIDs: number[],
    agentId: number,
    isPinned: boolean
  ) => Promise<boolean>;
  updateUserTemperatureOverrideEnabled: (enabled: boolean) => Promise<void>;
  updateUserPersonalization: (
    personalization: UserPersonalization
  ) => Promise<void>;
  updateUserThemePreference: (
    themePreference: ThemePreference
  ) => Promise<void>;
  updateUserChatBackground: (chatBackground: string | null) => Promise<void>;
  updateUserDefaultModel: (defaultModel: string | null) => Promise<void>;
  updateUserDefaultAppMode: (mode: "CHAT" | "SEARCH") => Promise<void>;
  updateUserVoiceSettings: (settings: {
    auto_send?: boolean;
    auto_playback?: boolean;
    playback_speed?: number;
  }) => Promise<void>;
}

const UserContext = createContext<UserContextType | undefined>(undefined);

export function UserProvider({ children }: { children: React.ReactNode }) {
  const { user: fetchedUser, mutateUser } = useCurrentUser();
  const { authTypeMetadata } = useAuthTypeMetadata();
  const updatedSettings = useContext(SettingsContext);
  const posthog = usePostHog();

  // For auto_scroll and temperature_override_enabled:
  // - If user has a preference set, use that
  // - Otherwise, use the workspace setting if available
  const mergeUserPreferences = useCallback(
    (currentUser: User | null): User | null => {
      if (!currentUser) return null;
      return {
        ...currentUser,
        preferences: {
          ...currentUser.preferences,
          auto_scroll:
            currentUser.preferences?.auto_scroll ??
            updatedSettings?.settings?.auto_scroll ??
            false,
          temperature_override_enabled:
            currentUser.preferences?.temperature_override_enabled ??
            updatedSettings?.settings?.temperature_override_enabled ??
            false,
        },
      };
    },
    [updatedSettings]
  );

  const [upToDateUser, setUpToDateUser] = useState<User | null>(null);

  useEffect(() => {
    setUpToDateUser(mergeUserPreferences(fetchedUser ?? null));
  }, [fetchedUser, mergeUserPreferences]);

  useEffect(() => {
    if (!posthog) return;

    if (fetchedUser?.id) {
      const identifyData: Record<string, any> = {
        email: fetchedUser.email,
      };
      if (fetchedUser.team_name) {
        identifyData.team_name = fetchedUser.team_name;
      }
      posthog.identify(fetchedUser.id, identifyData);
    } else {
      posthog.reset();
    }
  }, [posthog, fetchedUser]);

  // Use the custom token refresh hook — on refresh failure, revalidate via SWR
  // so the result goes through mergeUserPreferences
  const onRefreshFail = useCallback(async () => {
    await mutateUser();
  }, [mutateUser]);
  useTokenRefresh(upToDateUser, authTypeMetadata, onRefreshFail);

  // Sync user's theme preference from DB to next-themes on load
  const { setTheme, theme } = useTheme();
  const hasSyncedThemeRef = useRef(false);

  useEffect(() => {
    // Only sync once per session
    if (hasSyncedThemeRef.current) return;

    // Wait for next-themes to initialize
    if (!theme) return;

    // Wait for user data to load
    if (!upToDateUser?.id) return;

    // Only sync if user has a saved preference
    const savedTheme = upToDateUser?.preferences?.theme_preference;
    if (!savedTheme) return;

    // Sync DB theme to localStorage
    setTheme(savedTheme);
    hasSyncedThemeRef.current = true;
  }, [
    upToDateUser?.id,
    upToDateUser?.preferences?.theme_preference,
    theme,
    setTheme,
  ]);

  const updateUserTemperatureOverrideEnabled = async (enabled: boolean) => {
    try {
      setUpToDateUser((prevUser) => {
        if (prevUser) {
          return {
            ...prevUser,
            preferences: {
              ...prevUser.preferences,
              temperature_override_enabled: enabled,
            },
          };
        }
        return prevUser;
      });

      const response = await fetch(
        `/api/temperature-override-enabled?temperature_override_enabled=${enabled}`,
        {
          method: "PATCH",
          headers: {
            "Content-Type": "application/json",
          },
        }
      );

      if (!response.ok) {
        await refreshUser();
        throw new Error("Failed to update user temperature override setting");
      }
    } catch (error) {
      console.error("Error updating user temperature override setting:", error);
      throw error;
    }
  };

  const updateUserShortcuts = async (enabled: boolean) => {
    try {
      setUpToDateUser((prevUser) => {
        if (prevUser) {
          return {
            ...prevUser,
            preferences: {
              ...prevUser.preferences,
              shortcut_enabled: enabled,
            },
          };
        }
        return prevUser;
      });

      const response = await fetch(
        `/api/shortcut-enabled?shortcut_enabled=${enabled}`,
        {
          method: "PATCH",
          headers: {
            "Content-Type": "application/json",
          },
        }
      );

      if (!response.ok) {
        await refreshUser();
        throw new Error("Failed to update user shortcut setting");
      }
    } catch (error) {
      console.error("Error updating user shortcut setting:", error);
      throw error;
    }
  };

  const updateUserAutoScroll = async (autoScroll: boolean) => {
    try {
      const response = await fetch("/api/auto-scroll", {
        method: "PATCH",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ auto_scroll: autoScroll }),
      });
      setUpToDateUser((prevUser) => {
        if (prevUser) {
          return {
            ...prevUser,
            preferences: {
              ...prevUser.preferences,
              auto_scroll: autoScroll,
            },
          };
        }
        return prevUser;
      });

      if (!response.ok) {
        throw new Error("Failed to update auto-scroll setting");
      }
    } catch (error) {
      console.error("Error updating auto-scroll setting:", error);
      throw error;
    }
  };

  const updateUserPersonalization = async (
    personalization: UserPersonalization
  ) => {
    try {
      setUpToDateUser((prevUser) => {
        if (!prevUser) {
          return prevUser;
        }

        return {
          ...prevUser,
          personalization,
        };
      });

      const response = await persistPersonalization(personalization);

      if (!response.ok) {
        await refreshUser();
        throw new Error("Failed to update personalization settings");
      }

      await refreshUser();
    } catch (error) {
      console.error("Error updating personalization settings:", error);
      throw error;
    }
  };

  const toggleAgentPinnedStatus = async (
    currentPinnedAgentIDs: number[],
    agentId: number,
    isPinned: boolean
  ) => {
    setUpToDateUser((prevUser) => {
      if (!prevUser) return prevUser;
      return {
        ...prevUser,
        preferences: {
          ...prevUser.preferences,
          pinned_assistants: isPinned
            ? [...currentPinnedAgentIDs, agentId]
            : currentPinnedAgentIDs.filter((id) => id !== agentId),
        },
      };
    });

    let updatedPinnedAgentsIds = isPinned
      ? [...currentPinnedAgentIDs, agentId]
      : currentPinnedAgentIDs.filter((id) => id !== agentId);
    try {
      const response = await fetch(`/api/user/pinned-assistants`, {
        method: "PATCH",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          ordered_assistant_ids: updatedPinnedAgentsIds,
        }),
      });

      if (!response.ok) {
        throw new Error("Failed to update pinned assistants");
      }

      await refreshUser();
      return true;
    } catch (error) {
      console.error("Error updating pinned assistants:", error);
      return false;
    }
  };

  const updateUserThemePreference = async (
    themePreference: ThemePreference
  ) => {
    try {
      setUpToDateUser((prevUser) => {
        if (prevUser) {
          return {
            ...prevUser,
            preferences: {
              ...prevUser.preferences,
              theme_preference: themePreference,
            },
          };
        }
        return prevUser;
      });

      const response = await fetch(`/api/user/theme-preference`, {
        method: "PATCH",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ theme_preference: themePreference }),
      });

      if (!response.ok) {
        await refreshUser();
        throw new Error("Failed to update theme preference");
      }
    } catch (error) {
      console.error("Error updating theme preference:", error);
      throw error;
    }
  };

  const updateUserChatBackground = async (chatBackground: string | null) => {
    try {
      setUpToDateUser((prevUser) => {
        if (prevUser) {
          return {
            ...prevUser,
            preferences: {
              ...prevUser.preferences,
              chat_background: chatBackground,
            },
          };
        }
        return prevUser;
      });

      const response = await fetch(`/api/user/chat-background`, {
        method: "PATCH",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ chat_background: chatBackground }),
      });

      if (!response.ok) {
        await refreshUser();
        throw new Error("Failed to update chat background");
      }
    } catch (error) {
      console.error("Error updating chat background:", error);
      throw error;
    }
  };

  const updateUserDefaultModel = async (defaultModel: string | null) => {
    try {
      setUpToDateUser((prevUser) => {
        if (prevUser) {
          return {
            ...prevUser,
            preferences: {
              ...prevUser.preferences,
              default_model: defaultModel,
            },
          };
        }
        return prevUser;
      });

      const response = await fetch(`/api/user/default-model`, {
        method: "PATCH",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ default_model: defaultModel }),
      });

      if (!response.ok) {
        await refreshUser();
        throw new Error("Failed to update default model");
      }
    } catch (error) {
      console.error("Error updating default model:", error);
      throw error;
    }
  };

  const updateUserDefaultAppMode = async (mode: "CHAT" | "SEARCH") => {
    try {
      setUpToDateUser((prevUser) => {
        if (prevUser) {
          return {
            ...prevUser,
            preferences: {
              ...prevUser.preferences,
              default_app_mode: mode,
            },
          };
        }
        return prevUser;
      });

      const response = await fetch("/api/user/default-app-mode", {
        method: "PATCH",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ default_app_mode: mode }),
      });

      if (!response.ok) {
        await refreshUser();
        throw new Error("Failed to update default app mode");
      }
    } catch (error) {
      console.error("Error updating default app mode:", error);
      throw error;
    }
  };

  const updateUserVoiceSettings = async (settings: {
    auto_send?: boolean;
    auto_playback?: boolean;
    playback_speed?: number;
  }) => {
    try {
      setUpToDateUser((prevUser) => {
        if (prevUser) {
          return {
            ...prevUser,
            preferences: {
              ...prevUser.preferences,
              voice_auto_send:
                settings.auto_send ?? prevUser.preferences.voice_auto_send,
              voice_auto_playback:
                settings.auto_playback ??
                prevUser.preferences.voice_auto_playback,
              voice_playback_speed:
                settings.playback_speed ??
                prevUser.preferences.voice_playback_speed,
            },
          };
        }
        return prevUser;
      });

      const response = await fetch("/api/voice/settings", {
        method: "PATCH",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify(settings),
      });

      if (!response.ok) {
        await refreshUser();
        throw new Error("Failed to update voice settings");
      }
    } catch (error) {
      console.error("Error updating voice settings:", error);
      throw error;
    }
  };

  const refreshUser = async () => {
    await mutateUser();
  };

  return (
    <UserContext.Provider
      value={{
        user: upToDateUser,
        refreshUser,
        authTypeMetadata,
        updateUserAutoScroll,
        updateUserShortcuts,
        updateUserTemperatureOverrideEnabled,
        updateUserPersonalization,
        updateUserThemePreference,
        updateUserChatBackground,
        updateUserDefaultModel,
        updateUserDefaultAppMode,
        updateUserVoiceSettings,
        toggleAgentPinnedStatus,
        isAdmin: upToDateUser?.role === UserRole.ADMIN,
        // Curator status applies for either global or basic curator
        isCurator:
          upToDateUser?.role === UserRole.CURATOR ||
          upToDateUser?.role === UserRole.GLOBAL_CURATOR,
        isCloudSuperuser: upToDateUser?.is_cloud_superuser ?? false,
      }}
    >
      {children}
    </UserContext.Provider>
  );
}

export function useUser() {
  const context = useContext(UserContext);
  if (context === undefined) {
    throw new Error("useUser must be used within a UserProvider");
  }
  return context;
}


================================================
FILE: web/src/providers/VoiceModeProvider.tsx
================================================
"use client";

import React, {
  createContext,
  useContext,
  useState,
  useCallback,
  useRef,
  useEffect,
} from "react";
import { useUser } from "@/providers/UserProvider";
import { useVoiceStatus } from "@/hooks/useVoiceStatus";
import { INTERNAL_URL, IS_DEV } from "@/lib/constants";

// --- TTS Configuration Constants ---

/** WebSocket path for TTS streaming (backend-direct, used in dev) */
const TTS_WS_PATH = "/voice/synthesize/stream";

/** WebSocket path for TTS streaming (proxied, used in production) */
const TTS_WS_PATH_PROXIED = "/api/voice/synthesize/stream";

/** API endpoint to fetch a short-lived WebSocket auth token */
const WS_TOKEN_ENDPOINT = "/api/voice/ws-token";

/** Delay before starting audio playback to buffer initial chunks (ms) */
const AUDIO_START_DELAY_MS = 100;

/** Interval for checking if audio playback has ended (ms) */
const END_CHECK_INTERVAL_MS = 200;

/** Delay before retrying WebSocket end signal (ms) */
const WS_END_RETRY_DELAY_MS = 100;

/** Delay before checking finalizeStream readiness (ms) */
const FINALIZE_RETRY_DELAY_MS = 50;

/** Fast-start timer: how long to wait before sending first TTS chunk (ms) */
const FAST_START_DELAY_MS = 200;

/** Flush timer: how long to wait after punctuation before flushing (ms) */
const FLUSH_DELAY_MS = 250;

/** Safety timeout for TTS loading — resets state if generation stalls (ms) */
const TTS_LOADING_TIMEOUT_MS = 60_000;

/** Hard safety timeout for entire TTS playback session (ms).
 *  Prevents stuck audio from blocking the UI indefinitely. */
const TTS_SESSION_TIMEOUT_MS = 5 * 60 * 1000;

/** Characters revealed per second when audio duration is unknown */
const BASE_CHARS_PER_SECOND = 15;

/** How far ahead (in seconds) text reveal leads audio playback */
const REVEAL_LEAD_SECONDS = 0.28;

/** Max characters to reveal per animation frame (smooths catch-up) */
const MAX_CATCHUP_CHARS_PER_FRAME = 8;

interface VoiceModeContextType {
  /** Whether TTS audio is currently playing */
  isTTSPlaying: boolean;
  /** Whether manual read-aloud playback is currently speaking */
  isManualTTSPlaying: boolean;
  /** Whether TTS is loading/generating audio */
  isTTSLoading: boolean;
  /** Text that has been spoken so far (for synced display) */
  spokenText: string;
  /** Node id of the assistant message currently being spoken */
  activeMessageNodeId: number | null;
  /** Stream text for TTS - speaks sentences as they complete */
  streamTTS: (
    text: string,
    isComplete?: boolean,
    messageNodeId?: number
  ) => void;
  /** Stop TTS playback */
  stopTTS: (options?: { manual?: boolean }) => void;
  /** Increments when TTS is manually stopped by the user */
  manualStopCount: number;
  /** Reset state for new message */
  resetTTS: () => void;
  /** Audio playback progress (0-1) based on currentTime vs estimated duration */
  audioProgress: number;
  /** Number of clean characters to reveal based on audio progress */
  revealedCharCount: number;
  /** Whether audio sync is active for progressive text reveal */
  isAudioSyncActive: boolean;
  /** Whether auto-playback is enabled in user preferences */
  autoPlayback: boolean;
  /** True after text is queued for autoplay but before audio starts playing */
  isAwaitingAutoPlaybackStart: boolean;
  /** Whether TTS audio is muted */
  isTTSMuted: boolean;
  /** Toggle TTS mute state */
  toggleTTSMute: () => void;
  /** Set manual read-aloud speaking state for shared UI (e.g., waveform) */
  setManualTTSPlaying: (playing: boolean) => void;
  /** Register manual read-aloud mute handler so shared mute controls affect it */
  registerManualTTSMuteHandler: (
    handler: ((muted: boolean) => void) | null
  ) => void;
}

const VoiceModeContext = createContext<VoiceModeContextType | null>(null);

/**
 * Clean text for TTS - remove markdown formatting
 */
function cleanTextForTTS(text: string): string {
  return text
    .replace(/\*\*/g, "") // Remove bold markers
    .replace(/\*/g, "") // Remove italic markers
    .replace(/`{1,3}/g, "") // Remove code markers
    .replace(/#{1,6}\s*/g, "") // Remove headers
    .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Convert links to just text
    .replace(/\n+/g, " ") // Replace newlines with spaces
    .replace(/\s+/g, " ") // Normalize whitespace
    .trim();
}

/**
 * Find the next natural chunk boundary in text.
 * Prefers sentence endings for natural speech rhythm.
 */
function findChunkBoundary(text: string): number {
  // Look for sentence endings (. ! ?) - these are natural speech breaks
  const sentenceRegex = /[.!?](?:\s|$)/g;
  let match;
  let lastSentenceEnd = -1;

  while ((match = sentenceRegex.exec(text)) !== null) {
    const endPos = match.index + 1;
    if (endPos >= 10) {
      lastSentenceEnd = endPos;
      if (endPos >= 30) return endPos;
    }
  }

  if (lastSentenceEnd > 0) return lastSentenceEnd;

  // Only break at clauses for very long text (150+ chars)
  if (text.length >= 150) {
    const clauseRegex = /[,;:]\s/g;
    while ((match = clauseRegex.exec(text)) !== null) {
      const endPos = match.index + 1;
      if (endPos >= 70) return endPos;
    }
  }

  // Break at word boundary for extremely long text (200+ chars)
  if (text.length >= 200) {
    const spaceIndex = text.lastIndexOf(" ", 120);
    if (spaceIndex > 80) return spaceIndex;
  }

  return -1;
}

export function VoiceModeProvider({ children }: { children: React.ReactNode }) {
  const { user } = useUser();
  const { ttsEnabled } = useVoiceStatus();
  const autoPlayback =
    (user?.preferences?.voice_auto_playback ?? false) && ttsEnabled;
  const playbackSpeed = user?.preferences?.voice_playback_speed ?? 1.0;

  const [isTTSPlaying, setIsTTSPlaying] = useState(false);
  const [isManualTTSPlaying, setIsManualTTSPlaying] = useState(false);
  const [isTTSLoading, setIsTTSLoading] = useState(false);
  const [spokenText, setSpokenText] = useState("");
  const [activeMessageNodeId, setActiveMessageNodeId] = useState<number | null>(
    null
  );
  const [isAwaitingAutoPlaybackStart, setIsAwaitingAutoPlaybackStart] =
    useState(false);
  const [manualStopCount, setManualStopCount] = useState(0);
  const [isTTSMuted, setIsTTSMuted] = useState(false);
  const manualTTSMuteHandlerRef = useRef<((muted: boolean) => void) | null>(
    null
  );

  // Audio progress tracking for progressive text reveal
  const [audioProgress, setAudioProgress] = useState(0);
  const [totalSpokenCharCount, setTotalSpokenCharCount] = useState(0);
  const [revealedCharCount, setRevealedCharCount] = useState(0);

  // WebSocket and audio state
  const wsRef = useRef<WebSocket | null>(null);
  const mediaSourceRef = useRef<MediaSource | null>(null);
  const sourceBufferRef = useRef<SourceBuffer | null>(null);
  const audioElementRef = useRef<HTMLAudioElement | null>(null);
  const audioUrlRef = useRef<string | null>(null);
  const pendingChunksRef = useRef<Uint8Array[]>([]);
  const isAppendingRef = useRef(false);
  const isPlayingRef = useRef(false);
  const hasStartedPlaybackRef = useRef(false);

  // Audio progress tracking refs
  const totalBytesReceivedRef = useRef(0);
  const animationFrameRef = useRef<number | null>(null);
  const lastRevealedCharCountRef = useRef(0);

  // Text tracking
  const committedPositionRef = useRef(0);
  const lastRawTextRef = useRef("");
  const pendingTextRef = useRef<string[]>([]);
  const isConnectingRef = useRef(false);

  // Timers
  const flushTimerRef = useRef<NodeJS.Timeout | null>(null);
  const fastStartTimerRef = useRef<NodeJS.Timeout | null>(null);
  const loadingTimeoutRef = useRef<NodeJS.Timeout | null>(null);
  const endCheckIntervalRef = useRef<NodeJS.Timeout | null>(null);
  const sessionTimeoutRef = useRef<NodeJS.Timeout | null>(null);
  const hasSpokenFirstChunkRef = useRef(false);
  const hasSignaledEndRef = useRef(false);
  const streamEndedRef = useRef(false);

  // Process next chunk from the pending queue
  const processNextChunk = useCallback(() => {
    if (
      isAppendingRef.current ||
      pendingChunksRef.current.length === 0 ||
      !sourceBufferRef.current ||
      sourceBufferRef.current.updating
    ) {
      return;
    }

    const chunk = pendingChunksRef.current.shift();
    if (chunk) {
      isAppendingRef.current = true;
      try {
        const buffer = chunk.buffer.slice(
          chunk.byteOffset,
          chunk.byteOffset + chunk.byteLength
        ) as ArrayBuffer;
        sourceBufferRef.current.appendBuffer(buffer);
      } catch {
        isAppendingRef.current = false;
        processNextChunk();
      }
    }
  }, []);

  // Finalize the media stream when done
  const finalizeStream = useCallback(() => {
    if (pendingChunksRef.current.length > 0 || isAppendingRef.current) {
      setTimeout(() => finalizeStream(), FINALIZE_RETRY_DELAY_MS);
      return;
    }

    streamEndedRef.current = true;

    // Don't call endOfStream if no audio was received - it causes errors
    if (totalBytesReceivedRef.current === 0) {
      return;
    }

    if (
      mediaSourceRef.current &&
      mediaSourceRef.current.readyState === "open" &&
      sourceBufferRef.current &&
      !sourceBufferRef.current.updating
    ) {
      try {
        mediaSourceRef.current.endOfStream();
      } catch {
        // Ignore endOfStream errors
      }
    }

    // Clear any existing end check interval
    if (endCheckIntervalRef.current) {
      clearInterval(endCheckIntervalRef.current);
      endCheckIntervalRef.current = null;
    }

    // More aggressive end detection: check every 200ms if audio has ended
    // This handles cases where onended event doesn't fire with MediaSource
    endCheckIntervalRef.current = setInterval(() => {
      const audioEl = audioElementRef.current;

      // If audio element is gone or stream was reset, clean up
      if (!audioEl || !streamEndedRef.current) {
        if (endCheckIntervalRef.current) {
          clearInterval(endCheckIntervalRef.current);
          endCheckIntervalRef.current = null;
        }
        return;
      }

      // Only check audio.ended - don't use duration comparison as it's unreliable
      // with MediaSource streaming (duration updates as chunks arrive)
      const hasEnded = audioEl.ended;

      if (hasEnded && isPlayingRef.current) {
        isPlayingRef.current = false;
        setIsTTSPlaying(false);
        setActiveMessageNodeId(null);
        setIsAwaitingAutoPlaybackStart(false);
        if (endCheckIntervalRef.current) {
          clearInterval(endCheckIntervalRef.current);
          endCheckIntervalRef.current = null;
        }
      }
    }, END_CHECK_INTERVAL_MS);

    // No fixed timeout fallback here.
    // Long responses can legitimately continue playing well past 10s after stream end.
    // We rely on onended / interval end detection instead.
  }, []);

  // Initialize MediaSource for streaming audio
  const initMediaSource = useCallback(async () => {
    // Check if MediaSource is supported
    if (!window.MediaSource || !MediaSource.isTypeSupported("audio/mpeg")) {
      return false;
    }

    // Clean up any existing MediaSource before creating a new one
    if (audioUrlRef.current) {
      URL.revokeObjectURL(audioUrlRef.current);
      audioUrlRef.current = null;
    }
    if (audioElementRef.current) {
      audioElementRef.current.pause();
      audioElementRef.current.src = "";
      audioElementRef.current = null;
    }
    if (
      mediaSourceRef.current &&
      mediaSourceRef.current.readyState === "open"
    ) {
      try {
        if (sourceBufferRef.current) {
          mediaSourceRef.current.removeSourceBuffer(sourceBufferRef.current);
        }
        mediaSourceRef.current.endOfStream();
      } catch {
        // Ignore cleanup errors
      }
    }
    mediaSourceRef.current = null;
    sourceBufferRef.current = null;

    // Create MediaSource and audio element
    mediaSourceRef.current = new MediaSource();
    audioElementRef.current = new Audio();
    audioUrlRef.current = URL.createObjectURL(mediaSourceRef.current);
    audioElementRef.current.src = audioUrlRef.current;

    audioElementRef.current.onplay = () => {
      if (!isPlayingRef.current) {
        isPlayingRef.current = true;
        setIsTTSPlaying(true);
        setIsAwaitingAutoPlaybackStart(false);
      }
    };

    audioElementRef.current.onended = () => {
      isPlayingRef.current = false;
      setIsTTSPlaying(false);
      setActiveMessageNodeId(null);
      setIsAwaitingAutoPlaybackStart(false);
    };

    audioElementRef.current.onerror = () => {
      const audioEl = audioElementRef.current;
      const mediaError = audioEl?.error;

      // Ignore spurious errors with no actual error code (happens during cleanup)
      if (!mediaError || mediaError.code === undefined) {
        return;
      }

      isPlayingRef.current = false;
      setIsTTSPlaying(false);
      setActiveMessageNodeId(null);
      setIsAwaitingAutoPlaybackStart(false);
    };

    // Wait for MediaSource to be ready
    await new Promise<void>((resolve, reject) => {
      if (!mediaSourceRef.current) {
        reject(new Error("MediaSource not initialized"));
        return;
      }

      mediaSourceRef.current.onsourceopen = () => {
        try {
          sourceBufferRef.current =
            mediaSourceRef.current!.addSourceBuffer("audio/mpeg");
          sourceBufferRef.current.mode = "sequence";

          sourceBufferRef.current.onupdateend = () => {
            isAppendingRef.current = false;
            processNextChunk();
          };

          resolve();
        } catch (err) {
          reject(err);
        }
      };

      mediaSourceRef.current.onsourceclose = () => {
        if (mediaSourceRef.current?.readyState === "closed") {
          reject(new Error("MediaSource closed unexpectedly"));
        }
      };
    });

    return true;
  }, [processNextChunk]);

  // Handle incoming audio data from WebSocket
  const handleAudioData = useCallback(
    async (data: ArrayBuffer) => {
      // Track total bytes for duration estimation
      totalBytesReceivedRef.current += data.byteLength;

      // If we are receiving audio bytes, playback startup is no longer pending.
      // This avoids UI getting stuck in "thinking" when onplay is delayed.
      setIsAwaitingAutoPlaybackStart(false);

      pendingChunksRef.current.push(new Uint8Array(data));
      processNextChunk();

      // Start playback after first chunk
      if (!hasStartedPlaybackRef.current && audioElementRef.current) {
        // Small delay to buffer a bit before starting
        setTimeout(() => {
          const audioEl = audioElementRef.current;
          if (!audioEl || hasStartedPlaybackRef.current) {
            return;
          }

          audioEl
            .play()
            .then(() => {
              hasStartedPlaybackRef.current = true;
            })
            .catch(() => {
              // Keep hasStartedPlaybackRef as false so we retry on next audio chunk.
            });
        }, AUDIO_START_DELAY_MS);
      }
    },
    [processNextChunk]
  );

  // Get WebSocket URL for TTS with authentication token
  const getWebSocketUrl = useCallback(async () => {
    // Fetch short-lived WS token
    const tokenResponse = await fetch(WS_TOKEN_ENDPOINT, {
      method: "POST",
      credentials: "include",
    });
    if (!tokenResponse.ok) {
      throw new Error("Failed to get WebSocket authentication token");
    }
    const { token } = await tokenResponse.json();

    // In development, the Next.js dev server (port 3000) does not proxy
    // WebSocket connections, so we connect directly to the backend (port 8080).
    // In production, the reverse proxy handles the /api prefix routing.
    const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
    const host = IS_DEV ? new URL(INTERNAL_URL).host : window.location.host;
    const path = IS_DEV ? TTS_WS_PATH : TTS_WS_PATH_PROXIED;
    // Auth: the token query param is validated server-side by
    // current_user_from_websocket (single-use, 60s TTL, same checks as HTTP auth).
    return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;
  }, []);

  // Connect to WebSocket TTS
  const connectWebSocket = useCallback(async () => {
    // Skip if already connected, connecting, or in the process of connecting
    if (
      wsRef.current?.readyState === WebSocket.OPEN ||
      wsRef.current?.readyState === WebSocket.CONNECTING ||
      isConnectingRef.current
    ) {
      return;
    }

    // Set connecting flag to prevent concurrent connection attempts
    isConnectingRef.current = true;

    try {
      // Initialize MediaSource first
      const initialized = await initMediaSource();
      if (!initialized) {
        isConnectingRef.current = false;
        return;
      }

      // Get WebSocket URL with auth token
      const wsUrl = await getWebSocketUrl();

      const ws = new WebSocket(wsUrl);

      ws.onopen = () => {
        isConnectingRef.current = false;
        // Send initial config
        ws.send(
          JSON.stringify({
            type: "config",
            speed: playbackSpeed,
          })
        );

        // Send any pending text
        for (const text of pendingTextRef.current) {
          ws.send(JSON.stringify({ type: "synthesize", text }));
        }
        pendingTextRef.current = [];
      };

      ws.onmessage = async (event) => {
        if (event.data instanceof Blob) {
          const arrayBuffer = await event.data.arrayBuffer();
          handleAudioData(arrayBuffer);
        } else if (typeof event.data === "string") {
          try {
            const msg = JSON.parse(event.data);
            if (msg.type === "audio_done") {
              if (loadingTimeoutRef.current) {
                clearTimeout(loadingTimeoutRef.current);
                loadingTimeoutRef.current = null;
              }
              setIsTTSLoading(false);
              finalizeStream();
            }
          } catch {
            // Ignore parse errors
          }
        }
      };

      ws.onerror = () => {
        isConnectingRef.current = false;
        setIsTTSLoading(false);
        setIsAwaitingAutoPlaybackStart(false);
      };

      ws.onclose = () => {
        wsRef.current = null;
        isConnectingRef.current = false;
        setIsTTSLoading(false);
        setIsAwaitingAutoPlaybackStart(false);
        finalizeStream();
      };

      wsRef.current = ws;
    } catch {
      isConnectingRef.current = false;
    }
  }, [
    playbackSpeed,
    handleAudioData,
    getWebSocketUrl,
    initMediaSource,
    finalizeStream,
  ]);

  const stopTTS = useCallback((options?: { manual?: boolean }) => {
    // Clear timers
    if (flushTimerRef.current) {
      clearTimeout(flushTimerRef.current);
      flushTimerRef.current = null;
    }
    if (fastStartTimerRef.current) {
      clearTimeout(fastStartTimerRef.current);
      fastStartTimerRef.current = null;
    }
    if (loadingTimeoutRef.current) {
      clearTimeout(loadingTimeoutRef.current);
      loadingTimeoutRef.current = null;
    }
    if (endCheckIntervalRef.current) {
      clearInterval(endCheckIntervalRef.current);
      endCheckIntervalRef.current = null;
    }
    if (sessionTimeoutRef.current) {
      clearTimeout(sessionTimeoutRef.current);
      sessionTimeoutRef.current = null;
    }

    // Revoke blob URL to prevent memory leak
    if (audioUrlRef.current) {
      URL.revokeObjectURL(audioUrlRef.current);
      audioUrlRef.current = null;
    }

    // Stop audio element
    if (audioElementRef.current) {
      audioElementRef.current.pause();
      audioElementRef.current.src = "";
      audioElementRef.current = null;
    }

    // Cleanup MediaSource
    if (
      mediaSourceRef.current &&
      mediaSourceRef.current.readyState === "open"
    ) {
      try {
        if (sourceBufferRef.current) {
          mediaSourceRef.current.removeSourceBuffer(sourceBufferRef.current);
        }
        mediaSourceRef.current.endOfStream();
      } catch {
        // Ignore cleanup errors
      }
    }

    mediaSourceRef.current = null;
    sourceBufferRef.current = null;
    pendingChunksRef.current = [];
    isAppendingRef.current = false;
    hasStartedPlaybackRef.current = false;
    pendingTextRef.current = [];
    isPlayingRef.current = false;
    hasSignaledEndRef.current = false;
    isConnectingRef.current = false;
    streamEndedRef.current = false;

    // Close WebSocket
    if (wsRef.current) {
      try {
        wsRef.current.send(JSON.stringify({ type: "end" }));
        wsRef.current.close();
      } catch {
        // Ignore
      }
      wsRef.current = null;
    }

    setIsTTSPlaying(false);
    setIsTTSLoading(false);
    setIsAwaitingAutoPlaybackStart(false);
    if (options?.manual) {
      setManualStopCount((count) => count + 1);
    }
  }, []);

  // Send text to TTS via WebSocket
  const sendTextToTTS = useCallback(
    (text: string) => {
      if (!text.trim()) return;

      setIsTTSLoading(true);
      setIsAwaitingAutoPlaybackStart(true);
      setSpokenText((prev) => (prev ? prev + " " + text : text));

      // Track character count for progressive text reveal
      // Note: text is already cleaned (from cleanTextForTTS) when called from streamTTS
      setTotalSpokenCharCount((prev) => prev + text.length);

      // Set a timeout to reset loading state if TTS doesn't complete
      if (loadingTimeoutRef.current) {
        clearTimeout(loadingTimeoutRef.current);
      }
      loadingTimeoutRef.current = setTimeout(() => {
        setIsTTSLoading(false);
        setIsTTSPlaying(false);
      }, TTS_LOADING_TIMEOUT_MS);

      // Hard safety timeout: if the entire TTS session hasn't finished in 5 minutes,
      // force cleanup to prevent the UI from being stuck indefinitely.
      if (!sessionTimeoutRef.current) {
        sessionTimeoutRef.current = setTimeout(() => {
          sessionTimeoutRef.current = null;
          stopTTS();
        }, TTS_SESSION_TIMEOUT_MS);
      }

      if (wsRef.current?.readyState === WebSocket.OPEN) {
        wsRef.current.send(JSON.stringify({ type: "synthesize", text }));
      } else {
        pendingTextRef.current.push(text);
        connectWebSocket();
      }
    },
    [connectWebSocket, stopTTS]
  );

  const streamTTS = useCallback(
    (text: string, isComplete: boolean = false, messageNodeId?: number) => {
      if (!autoPlayback) {
        return;
      }

      if (typeof messageNodeId === "number") {
        setActiveMessageNodeId((prev) =>
          prev === messageNodeId ? prev : messageNodeId
        );
      }

      // Skip if text hasn't changed
      if (text === lastRawTextRef.current && !isComplete) return;
      lastRawTextRef.current = text;

      // Clear pending timers
      if (flushTimerRef.current) {
        clearTimeout(flushTimerRef.current);
        flushTimerRef.current = null;
      }
      if (fastStartTimerRef.current) {
        clearTimeout(fastStartTimerRef.current);
        fastStartTimerRef.current = null;
      }

      // Clean the full text
      const cleanedText = cleanTextForTTS(text);
      const uncommittedText = cleanedText.slice(committedPositionRef.current);

      // On completion, we must still signal "end" even if there's no new text.
      // Otherwise ElevenLabs waits for more input and eventually times out.
      if (uncommittedText.length === 0) {
        if (isComplete && !hasSignaledEndRef.current) {
          hasSignaledEndRef.current = true;

          if (wsRef.current?.readyState === WebSocket.OPEN) {
            wsRef.current.send(JSON.stringify({ type: "end" }));
          } else {
            const sendEnd = () => {
              if (wsRef.current?.readyState === WebSocket.OPEN) {
                if (pendingTextRef.current.length === 0) {
                  wsRef.current.send(JSON.stringify({ type: "end" }));
                } else {
                  setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);
                }
              } else if (wsRef.current?.readyState === WebSocket.CONNECTING) {
                setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);
              }
            };
            setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);
          }
        }
        return;
      }

      // Find chunk boundaries and send immediately
      let remaining = uncommittedText;
      let offset = 0;

      while (remaining.length > 0) {
        const boundaryIndex = findChunkBoundary(remaining);

        if (boundaryIndex > 0) {
          const chunkText = remaining.slice(0, boundaryIndex).trim();
          if (chunkText.length > 0) {
            sendTextToTTS(chunkText);
            hasSpokenFirstChunkRef.current = true;
          }
          offset += boundaryIndex;
          remaining = remaining.slice(boundaryIndex).trim();
        } else {
          break;
        }
      }

      committedPositionRef.current += offset;

      // Handle remaining text when stream is complete
      if (isComplete && remaining.trim().length > 0) {
        sendTextToTTS(remaining.trim());
        committedPositionRef.current = cleanedText.length;
        hasSpokenFirstChunkRef.current = true;
      }

      // When streaming is complete, signal end to flush remaining audio
      if (isComplete && !hasSignaledEndRef.current) {
        hasSignaledEndRef.current = true;

        if (wsRef.current?.readyState === WebSocket.OPEN) {
          wsRef.current.send(JSON.stringify({ type: "end" }));
        } else {
          const sendEnd = () => {
            if (wsRef.current?.readyState === WebSocket.OPEN) {
              if (pendingTextRef.current.length === 0) {
                wsRef.current.send(JSON.stringify({ type: "end" }));
              } else {
                setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);
              }
            } else if (wsRef.current?.readyState === WebSocket.CONNECTING) {
              setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);
            }
          };
          setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);
        }
      }

      const currentUncommitted = cleanedText
        .slice(committedPositionRef.current)
        .trim();

      // Fast start: send the first TTS chunk as soon as we have enough text (20+ chars)
      // without waiting for a full sentence boundary. This reduces perceived latency —
      // the user hears audio begin within ~200ms of the first text arriving, rather than
      // waiting for the LLM to produce a complete sentence.
      if (
        !hasSpokenFirstChunkRef.current &&
        currentUncommitted.length >= 20 &&
        !isComplete
      ) {
        fastStartTimerRef.current = setTimeout(() => {
          if (hasSpokenFirstChunkRef.current) return;

          const nowCleaned = cleanTextForTTS(lastRawTextRef.current);
          const nowUncommitted = nowCleaned
            .slice(committedPositionRef.current)
            .trim();

          if (nowUncommitted.length >= 20) {
            // Find a reasonable break point
            let breakPoint = nowUncommitted.length;
            const spaceIdx = nowUncommitted.lastIndexOf(" ", 50);
            if (spaceIdx >= 15) breakPoint = spaceIdx;

            const chunk = nowUncommitted.slice(0, breakPoint).trim();
            if (chunk.length > 0) {
              sendTextToTTS(chunk);
              committedPositionRef.current += breakPoint;
              hasSpokenFirstChunkRef.current = true;
            }
          }
        }, FAST_START_DELAY_MS);
      }

      // Flush timer for text ending with punctuation
      if (
        currentUncommitted.length > 0 &&
        !isComplete &&
        /[.!?]$/.test(currentUncommitted)
      ) {
        flushTimerRef.current = setTimeout(() => {
          const nowCleaned = cleanTextForTTS(lastRawTextRef.current);
          const nowUncommitted = nowCleaned
            .slice(committedPositionRef.current)
            .trim();

          if (nowUncommitted.length > 0) {
            sendTextToTTS(nowUncommitted);
            committedPositionRef.current = nowCleaned.length;
            hasSpokenFirstChunkRef.current = true;
          }
        }, FLUSH_DELAY_MS);
      }
    },
    [autoPlayback, sendTextToTTS]
  );

  const resetTTS = useCallback(() => {
    stopTTS();
    if (sessionTimeoutRef.current) {
      clearTimeout(sessionTimeoutRef.current);
      sessionTimeoutRef.current = null;
    }
    committedPositionRef.current = 0;
    lastRawTextRef.current = "";
    hasSpokenFirstChunkRef.current = false;
    hasSignaledEndRef.current = false;
    setSpokenText("");
    setActiveMessageNodeId(null);
    setIsAwaitingAutoPlaybackStart(false);
    setIsTTSMuted(false);
    setIsManualTTSPlaying(false);

    // Reset audio progress tracking
    totalBytesReceivedRef.current = 0;
    setAudioProgress(0);
    setTotalSpokenCharCount(0);
    setRevealedCharCount(0);
    lastRevealedCharCountRef.current = 0;

    // Cancel animation frame if running
    if (animationFrameRef.current) {
      cancelAnimationFrame(animationFrameRef.current);
      animationFrameRef.current = null;
    }
  }, [stopTTS]);

  // Toggle TTS mute state
  const toggleTTSMute = useCallback(() => {
    setIsTTSMuted((prev) => {
      const newMuted = !prev;
      if (audioElementRef.current) {
        audioElementRef.current.muted = newMuted;
      }
      manualTTSMuteHandlerRef.current?.(newMuted);
      return newMuted;
    });
  }, []);

  const registerManualTTSMuteHandler = useCallback(
    (handler: ((muted: boolean) => void) | null) => {
      manualTTSMuteHandlerRef.current = handler;
      if (handler) {
        handler(isTTSMuted);
      }
    },
    [isTTSMuted]
  );

  // Animation loop to track audio playback progress for progressive text reveal
  useEffect(() => {
    if (!isTTSPlaying || !audioElementRef.current) {
      return;
    }

    const updateProgress = () => {
      const audio = audioElementRef.current;
      if (!audio) return;

      // Use playback position + a small lead.
      const effectiveSeconds = Math.max(
        audio.currentTime + REVEAL_LEAD_SECONDS,
        0
      );
      const hasDuration = Number.isFinite(audio.duration) && audio.duration > 0;
      const rawTargetChars = hasDuration
        ? Math.floor(
            Math.min(effectiveSeconds / audio.duration, 1) *
              totalSpokenCharCount
          )
        : Math.floor(effectiveSeconds * BASE_CHARS_PER_SECOND * playbackSpeed);
      const targetChars = Math.max(
        0,
        Math.min(rawTargetChars, totalSpokenCharCount)
      );

      // Smooth catch-up to avoid sudden end-of-response jumps.
      const prevChars = lastRevealedCharCountRef.current;
      const nextChars =
        targetChars > prevChars + MAX_CATCHUP_CHARS_PER_FRAME
          ? prevChars + MAX_CATCHUP_CHARS_PER_FRAME
          : targetChars;
      lastRevealedCharCountRef.current = nextChars;
      setRevealedCharCount(nextChars);

      // Calculate progress as ratio of chars revealed to total
      let progress = 0;
      if (totalSpokenCharCount > 0) {
        progress = Math.min(nextChars / totalSpokenCharCount, 1);
      }

      setAudioProgress(progress);

      if (isTTSPlaying) {
        animationFrameRef.current = requestAnimationFrame(updateProgress);
      }
    };

    animationFrameRef.current = requestAnimationFrame(updateProgress);

    return () => {
      if (animationFrameRef.current) {
        cancelAnimationFrame(animationFrameRef.current);
        animationFrameRef.current = null;
      }
    };
  }, [isTTSPlaying, totalSpokenCharCount]);

  // Reset TTS state when voice auto-playback is disabled
  // This prevents the mic button from being stuck disabled
  const prevAutoPlaybackRef = useRef(autoPlayback);
  useEffect(() => {
    if (prevAutoPlaybackRef.current && !autoPlayback) {
      // Auto-playback was just disabled, clean up TTS state
      resetTTS();
    }
    prevAutoPlaybackRef.current = autoPlayback;
  }, [autoPlayback, resetTTS]);

  // Cleanup on unmount
  useEffect(() => {
    return () => {
      if (flushTimerRef.current) clearTimeout(flushTimerRef.current);
      if (fastStartTimerRef.current) clearTimeout(fastStartTimerRef.current);
      if (loadingTimeoutRef.current) clearTimeout(loadingTimeoutRef.current);
      if (endCheckIntervalRef.current)
        clearInterval(endCheckIntervalRef.current);
      if (animationFrameRef.current)
        cancelAnimationFrame(animationFrameRef.current);
      if (sessionTimeoutRef.current) clearTimeout(sessionTimeoutRef.current);
      if (audioUrlRef.current) {
        URL.revokeObjectURL(audioUrlRef.current);
      }
      if (wsRef.current) {
        try {
          wsRef.current.close();
        } catch (err) {
          // WebSocket may already be closed or in CLOSING state — non-critical
          console.warn("Failed to close TTS WebSocket during cleanup:", err);
        }
      }
      if (audioElementRef.current) {
        try {
          audioElementRef.current.pause();
          audioElementRef.current.src = "";
        } catch {
          // Ignore
        }
      }
      if (
        mediaSourceRef.current &&
        mediaSourceRef.current.readyState === "open"
      ) {
        try {
          mediaSourceRef.current.endOfStream();
        } catch {
          // Ignore
        }
      }
    };
  }, []);

  const isAudioSyncActive = autoPlayback && (isTTSPlaying || isTTSLoading);

  return (
    <VoiceModeContext.Provider
      value={{
        isTTSPlaying,
        isManualTTSPlaying,
        isTTSLoading,
        spokenText,
        activeMessageNodeId,
        streamTTS,
        stopTTS,
        manualStopCount,
        resetTTS,
        audioProgress,
        revealedCharCount,
        isAudioSyncActive,
        autoPlayback,
        isAwaitingAutoPlaybackStart,
        isTTSMuted,
        toggleTTSMute,
        setManualTTSPlaying: setIsManualTTSPlaying,
        registerManualTTSMuteHandler,
      }}
    >
      {children}
    </VoiceModeContext.Provider>
  );
}

export function useVoiceMode(): VoiceModeContextType {
  const context = useContext(VoiceModeContext);
  if (!context) {
    throw new Error("useVoiceMode must be used within VoiceModeProvider");
  }
  return context;
}


================================================
FILE: web/src/providers/__tests__/ProjectsContext.test.tsx
================================================
import React, { PropsWithChildren } from "react";
import { act, renderHook } from "@testing-library/react";
import {
  ProjectsProvider,
  useProjectsContext,
} from "@/providers/ProjectsContext";
import { SettingsContext } from "@/providers/SettingsProvider";
import { CombinedSettings } from "@/interfaces/settings";
import type { ProjectFile } from "@/app/app/projects/projectsService";

const mockUploadFiles = jest.fn();
const mockGetRecentFiles = jest.fn();
const mockToastWarning = jest.fn();

jest.mock("next/navigation", () => ({
  useSearchParams: () => ({
    get: () => null,
  }),
}));

jest.mock("@/hooks/appNavigation", () => ({
  useAppRouter: () => jest.fn(),
}));

jest.mock("@/lib/hooks/useProjects", () => ({
  useProjects: () => ({
    projects: [],
    refreshProjects: jest.fn().mockResolvedValue([]),
  }),
}));

jest.mock("@/hooks/useToast", () => ({
  toast: {
    warning: (...args: unknown[]) => mockToastWarning(...args),
    error: jest.fn(),
    success: jest.fn(),
  },
}));

jest.mock("@/app/app/projects/projectsService", () => {
  const actual = jest.requireActual("@/app/app/projects/projectsService");
  return {
    ...actual,
    fetchProjects: jest.fn().mockResolvedValue([]),
    createProject: jest.fn(),
    uploadFiles: (...args: unknown[]) => mockUploadFiles(...args),
    getRecentFiles: (...args: unknown[]) => mockGetRecentFiles(...args),
    getFilesInProject: jest.fn().mockResolvedValue([]),
    getProject: jest.fn(),
    getProjectInstructions: jest.fn(),
    upsertProjectInstructions: jest.fn(),
    getProjectDetails: jest.fn(),
    renameProject: jest.fn(),
    deleteProject: jest.fn(),
    deleteUserFile: jest.fn(),
    getUserFileStatuses: jest.fn().mockResolvedValue([]),
    unlinkFileFromProject: jest.fn(),
    linkFileToProject: jest.fn(),
  };
});

const settingsValue: CombinedSettings = {
  settings: {
    user_file_max_upload_size_mb: 1,
  } as CombinedSettings["settings"],
  enterpriseSettings: null,
  customAnalyticsScript: null,
  webVersion: null,
  webDomain: null,
  isSearchModeAvailable: true,
  settingsLoading: false,
};

const wrapper = ({ children }: PropsWithChildren) => (
  <SettingsContext.Provider value={settingsValue}>
    <ProjectsProvider>{children}</ProjectsProvider>
  </SettingsContext.Provider>
);

describe("ProjectsContext beginUpload size precheck", () => {
  beforeEach(() => {
    mockUploadFiles.mockReset();
    mockGetRecentFiles.mockReset();
    mockToastWarning.mockReset();

    mockUploadFiles.mockResolvedValue({
      user_files: [],
      rejected_files: [],
    });
    mockGetRecentFiles.mockResolvedValue([]);
  });

  it("only sends valid files to the upload API when oversized files are present", async () => {
    const { result } = renderHook(() => useProjectsContext(), { wrapper });

    const valid = new File(["small"], "small.txt", { type: "text/plain" });
    const oversized = new File([new Uint8Array(2 * 1024 * 1024)], "big.txt", {
      type: "text/plain",
    });

    let optimisticFiles: ProjectFile[] = [];
    await act(async () => {
      optimisticFiles = await result.current.beginUpload(
        [valid, oversized],
        null
      );
    });

    expect(mockUploadFiles).toHaveBeenCalledTimes(1);
    const [uploadedFiles] = mockUploadFiles.mock.calls[0];
    expect((uploadedFiles as File[]).map((f) => f.name)).toEqual(["small.txt"]);
    expect(optimisticFiles.map((f) => f.name)).toEqual(["small.txt"]);
    expect(mockToastWarning).toHaveBeenCalledTimes(1);
  });

  it("uploads all files when none are oversized", async () => {
    const { result } = renderHook(() => useProjectsContext(), { wrapper });

    const first = new File(["small"], "first.txt", { type: "text/plain" });
    const second = new File(["small"], "second.txt", { type: "text/plain" });

    let optimisticFiles: ProjectFile[] = [];
    await act(async () => {
      optimisticFiles = await result.current.beginUpload([first, second], null);
    });

    expect(mockUploadFiles).toHaveBeenCalledTimes(1);
    const [uploadedFiles] = mockUploadFiles.mock.calls[0];
    expect((uploadedFiles as File[]).map((f) => f.name)).toEqual([
      "first.txt",
      "second.txt",
    ]);
    expect(mockToastWarning).not.toHaveBeenCalled();
    expect(optimisticFiles.map((f) => f.name)).toEqual([
      "first.txt",
      "second.txt",
    ]);
  });

  it("does not call upload API when all files are oversized", async () => {
    const { result } = renderHook(() => useProjectsContext(), { wrapper });

    const oversized = new File(
      [new Uint8Array(2 * 1024 * 1024)],
      "too-big.txt",
      { type: "text/plain" }
    );
    const onSuccess = jest.fn();
    const onFailure = jest.fn();

    let optimisticFiles: ProjectFile[] = [];
    await act(async () => {
      optimisticFiles = await result.current.beginUpload(
        [oversized],
        null,
        onSuccess,
        onFailure
      );
    });

    expect(mockUploadFiles).not.toHaveBeenCalled();
    expect(optimisticFiles).toEqual([]);
    expect(mockToastWarning).toHaveBeenCalledTimes(1);
    expect(onSuccess).not.toHaveBeenCalled();
    expect(onFailure).toHaveBeenCalledWith([]);
  });
});


================================================
FILE: web/src/proxy.ts
================================================
import { NextResponse } from "next/server";
import type { NextRequest } from "next/server";
import {
  AuthType,
  SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED,
  SERVER_SIDE_ONLY__AUTH_TYPE,
} from "./lib/constants";

// Authentication cookie names (matches backend constants)
const FASTAPI_USERS_AUTH_COOKIE_NAME = "fastapiusersauth";
const ANONYMOUS_USER_COOKIE_NAME = "onyx_anonymous_user";

// Protected route prefixes (require authentication)
const PROTECTED_ROUTES = ["/app", "/admin", "/agents", "/connector"];

// Public route prefixes (no authentication required)
const PUBLIC_ROUTES = ["/auth", "/anonymous", "/_next", "/api"];

// NOTE: have to have the "/:path*" here since NextJS doesn't allow any real JS to
// be run before the config is defined e.g. if we try and do a .map it will complain
export const config = {
  matcher: [
    // Auth-protected routes (for middleware auth check)
    "/app/:path*",
    "/admin/:path*",
    "/agents/:path*",
    "/connector/:path*",

    // Enterprise Edition routes (for /ee rewriting)
    // These are ONLY the EE-specific routes that should be rewritten
    "/admin/groups/:path*",
    "/admin/performance/usage/:path*",
    "/admin/performance/query-history/:path*",
    "/admin/theme/:path*",
    "/admin/performance/custom-analytics/:path*",
    "/admin/standard-answer/:path*",
    "/agents/stats/:path*",

    // Cloud only
    "/admin/billing/:path*",
  ],
};

// Enterprise Edition specific routes (ONLY these get /ee rewriting)
const EE_ROUTES = [
  "/admin/groups",
  "/admin/performance/usage",
  "/admin/performance/query-history",
  "/admin/theme",
  "/admin/performance/custom-analytics",
  "/admin/standard-answer",
  "/agents/stats",
];

export async function proxy(request: NextRequest) {
  const pathname = request.nextUrl.pathname;

  // Auth Check: Fast-fail at edge if no cookie (defense in depth)
  // Note: Layouts still do full verification (token validity, roles, etc.)
  const isProtectedRoute = PROTECTED_ROUTES.some((route) =>
    pathname.startsWith(route)
  );
  const isPublicRoute = PUBLIC_ROUTES.some((route) =>
    pathname.startsWith(route)
  );

  if (isProtectedRoute && !isPublicRoute) {
    const authCookie = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME);
    const anonymousCookie = request.cookies.get(ANONYMOUS_USER_COOKIE_NAME);

    // Allow access if user has either a regular auth cookie or anonymous user cookie
    if (!authCookie && !anonymousCookie) {
      const loginUrl = new URL("/auth/login", request.url);
      // Preserve full URL including query params and hash for deep linking
      const fullPath = pathname + request.nextUrl.search + request.nextUrl.hash;
      loginUrl.searchParams.set("next", fullPath);
      return NextResponse.redirect(loginUrl);
    }
  }

  // Enterprise Edition: Rewrite EE-specific routes to /ee prefix
  if (SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED) {
    if (EE_ROUTES.some((route) => pathname.startsWith(route))) {
      const newUrl = new URL(`/ee${pathname}`, request.url);
      return NextResponse.rewrite(newUrl);
    }
  }

  return NextResponse.next();
}


================================================
FILE: web/src/refresh-components/Attachment.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Attachment from "./Attachment";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof Attachment> = {
  title: "refresh-components/Attachment",
  component: Attachment,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Attachment>;

export const Default: Story = {
  args: {
    fileName: "quarterly-report.pdf",
  },
};

export const WithOpenAction: Story = {
  args: {
    fileName: "meeting-notes.docx",
    open: () => alert("Opening document"),
  },
};

export const LongFileName: Story = {
  args: {
    fileName:
      "very-long-document-name-that-might-overflow-the-container-2024-Q4-final-draft.pdf",
  },
};


================================================
FILE: web/src/refresh-components/Attachment.tsx
================================================
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import { SvgFileText, SvgMaximize2 } from "@opal/icons";
export interface AttachmentsProps {
  fileName: string;
  open?: () => void;
}

export default function Attachments({ fileName, open }: AttachmentsProps) {
  return (
    <div className="flex items-center border bg-background-tint-00 rounded-12 p-1 gap-1">
      <div className="p-2 bg-background-tint-01 rounded-08">
        <SvgFileText className="w-[1.25rem] h-[1.25rem] stroke-text-02" />
      </div>
      <div className="flex flex-col px-2">
        <Text as="p" secondaryAction>
          {fileName}
        </Text>
        <Text as="p" secondaryBody text03>
          Document
        </Text>
      </div>

      {open && (
        <Button
          aria-label="Expand document"
          onClick={open}
          icon={SvgMaximize2}
          prominence="tertiary"
          size="sm"
        />
      )}
    </div>
  );
}


================================================
FILE: web/src/refresh-components/Calendar.stories.tsx
================================================
import React from "react";
import type { Meta, StoryObj } from "@storybook/react";
import Calendar from "./Calendar";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import type { DateRange } from "react-day-picker";

const meta: Meta<typeof Calendar> = {
  title: "refresh-components/Calendar",
  component: Calendar,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Calendar>;

// ---------------------------------------------------------------------------
// Single selection
// ---------------------------------------------------------------------------

function SingleSelectDemo() {
  const [selected, setSelected] = React.useState<Date | undefined>(new Date());
  return <Calendar mode="single" selected={selected} onSelect={setSelected} />;
}

export const SingleSelect: Story = {
  render: () => <SingleSelectDemo />,
};

// ---------------------------------------------------------------------------
// Range selection
// ---------------------------------------------------------------------------

function RangeSelectDemo() {
  const [range, setRange] = React.useState<DateRange | undefined>({
    from: new Date(2025, 2, 10),
    to: new Date(2025, 2, 20),
  });
  return <Calendar mode="range" selected={range} onSelect={setRange} />;
}

export const RangeSelect: Story = {
  render: () => <RangeSelectDemo />,
};

// ---------------------------------------------------------------------------
// Without outside days
// ---------------------------------------------------------------------------

export const NoOutsideDays: Story = {
  args: {
    mode: "single",
    showOutsideDays: false,
  },
};


================================================
FILE: web/src/refresh-components/Calendar.tsx
================================================
"use client";

import React from "react";
import { DayButton, DayPicker, getDefaultClassNames } from "react-day-picker";
import { cn } from "@/lib/utils";
import { Button as OpalButton } from "@opal/components";
import { SvgChevronDown, SvgChevronLeft, SvgChevronRight } from "@opal/icons";
import Button from "@/refresh-components/buttons/Button";

function CalendarDayButton({
  className,
  day,
  modifiers,
  ...props
}: React.ComponentProps<typeof DayButton>) {
  const ref = React.useRef<HTMLButtonElement>(null);
  React.useEffect(() => {
    if (modifiers.focused) ref.current?.focus();
  }, [modifiers.focused]);

  return (
    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
    <Button
      ref={ref}
      tertiary
      className="w-full"
      transient={modifiers.selected}
      data-day={day.date.toLocaleDateString()}
      data-selected-single={
        modifiers.selected &&
        !modifiers.range_start &&
        !modifiers.range_end &&
        !modifiers.range_middle
      }
      data-range-start={modifiers.range_start}
      data-range-end={modifiers.range_end}
      data-range-middle={modifiers.range_middle}
      {...props}
    />
  );
}

export default function Calendar({
  className,
  classNames,
  showOutsideDays = true,
  captionLayout = "label",
  formatters,
  components,
  ...props
}: React.ComponentProps<typeof DayPicker>) {
  const defaultClassNames = getDefaultClassNames();

  return (
    <DayPicker
      showOutsideDays={showOutsideDays}
      className={cn(
        "group/calendar p-0 [--cell-size:2rem] [[data-slot=card-content]_&]:bg-transparent [[data-slot=popover-content]_&]:bg-transparent",
        String.raw`rtl:**:[.rdp-button\_next>svg]:rotate-180`,
        String.raw`rtl:**:[.rdp-button\_previous>svg]:rotate-180`,
        className
      )}
      captionLayout={captionLayout}
      formatters={{
        formatMonthDropdown: (date) =>
          date.toLocaleString("default", { month: "short" }),
        ...formatters,
      }}
      classNames={{
        root: cn("w-fit", defaultClassNames.root),
        months: cn(
          "relative flex flex-col gap-4 md:flex-row",
          defaultClassNames.months
        ),
        month: cn("flex w-full flex-col gap-4", defaultClassNames.month),
        nav: cn(
          "absolute inset-x-0 top-0 flex w-full items-center justify-between gap-1",
          defaultClassNames.nav
        ),
        button_previous: cn(
          "h-[--cell-size] w-[--cell-size] select-none p-0 aria-disabled:opacity-50",
          defaultClassNames.button_previous
        ),
        button_next: cn(
          "h-[--cell-size] w-[--cell-size] select-none p-0 aria-disabled:opacity-50",
          defaultClassNames.button_next
        ),
        month_caption: cn(
          "flex h-[--cell-size] w-full items-center justify-center px-[--cell-size]",
          defaultClassNames.month_caption
        ),
        dropdowns: cn(
          "flex h-[--cell-size] w-full items-center justify-center gap-1.5 text-sm font-medium",
          defaultClassNames.dropdowns
        ),
        dropdown_root: cn(
          "has-focus:border-border-05 border-border-03 shadow-xs has-focus:ring-border-05/50 has-focus:ring-[3px] relative rounded-md border",
          defaultClassNames.dropdown_root
        ),
        dropdown: cn(
          "bg-background-neutral-00 absolute inset-0 opacity-0",
          defaultClassNames.dropdown
        ),
        caption_label: cn(
          "select-none font-medium",
          captionLayout === "label"
            ? "text-sm"
            : "[&>svg]:text-text-03 flex h-8 items-center gap-1 rounded-md pl-2 pr-1 text-sm [&>svg]:size-3.5",
          defaultClassNames.caption_label
        ),
        table: "w-full border-collapse",
        weekdays: cn("flex", defaultClassNames.weekdays),
        weekday: cn(
          "text-text-02 flex-1 select-none font-secondary-mono pb-2",
          defaultClassNames.weekday
        ),
        week: cn("flex w-full", defaultClassNames.week),
        day: cn(
          "group/day relative h-full w-full select-none",
          defaultClassNames.day
        ),
        // week_number_header: cn(defaultClassNames.week_number_header),
        // week_number: cn(defaultClassNames.week_number),
        // range_start: cn(defaultClassNames.range_start),
        // range_middle: cn(defaultClassNames.range_middle),
        // range_end: cn(defaultClassNames.range_end),
        // today: cn(defaultClassNames.today),
        // outside: cn(defaultClassNames.outside),
        // disabled: cn(defaultClassNames.disabled),
        // hidden: cn(defaultClassNames.hidden),
        ...classNames,
      }}
      components={{
        Root: ({ className, rootRef, ...props }) => {
          return (
            <div
              data-slot="calendar"
              ref={rootRef}
              className={cn(className)}
              {...props}
            />
          );
        },
        Chevron: ({ className, orientation, size: _size, ...props }) => {
          if (orientation === "left")
            return (
              <OpalButton
                icon={SvgChevronLeft}
                prominence="tertiary"
                {...props}
              />
            );
          if (orientation === "right")
            return (
              <OpalButton
                icon={SvgChevronRight}
                prominence="tertiary"
                {...props}
              />
            );
          return (
            <OpalButton
              icon={SvgChevronDown}
              prominence="tertiary"
              {...props}
            />
          );
        },
        DayButton: CalendarDayButton,
        WeekNumber: ({ children, ...props }) => {
          return (
            <td {...props}>
              <div className="flex size-[--cell-size] items-center justify-center text-center">
                {children}
              </div>
            </td>
          );
        },
        ...components,
      }}
      {...props}
    />
  );
}


================================================
FILE: web/src/refresh-components/CharacterCount.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import CharacterCount from "./CharacterCount";

const meta: Meta<typeof CharacterCount> = {
  title: "refresh-components/CharacterCount",
  component: CharacterCount,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof CharacterCount>;

export const UnderLimit: Story = {
  args: {
    value: "Hello world",
    limit: 100,
  },
};

export const NearLimit: Story = {
  args: {
    value: "A".repeat(95),
    limit: 100,
  },
};

export const AtLimit: Story = {
  args: {
    value: "A".repeat(100),
    limit: 100,
  },
};

export const Empty: Story = {
  args: {
    value: "",
    limit: 256,
  },
};


================================================
FILE: web/src/refresh-components/CharacterCount.tsx
================================================
import Text from "@/refresh-components/texts/Text";
export interface CharacterCountProps {
  value: string;
  limit: number;
}
export default function CharacterCount({ value, limit }: CharacterCountProps) {
  const length = value?.length || 0;
  return (
    <Text text03 secondaryBody>
      ({length}/{limit} characters)
    </Text>
  );
}


================================================
FILE: web/src/refresh-components/Chip.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Chip from "./Chip";
import { SvgUser } from "@opal/icons";

const meta: Meta<typeof Chip> = {
  title: "refresh-components/Chip",
  component: Chip,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Chip>;

export const Default: Story = {
  args: {
    children: "Tag Name",
  },
};

export const WithIcon: Story = {
  args: {
    children: "John Doe",
    icon: SvgUser,
  },
};

export const Removable: Story = {
  args: {
    children: "Removable Tag",
    onRemove: () => alert("Removed!"),
  },
};

export const WithIconAndRemove: Story = {
  args: {
    children: "Jane Smith",
    icon: SvgUser,
    onRemove: () => alert("Removed!"),
  },
};


================================================
FILE: web/src/refresh-components/Chip.tsx
================================================
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import { SvgX } from "@opal/icons";
import { Button } from "@opal/components";
import type { IconProps } from "@opal/types";

export interface ChipProps {
  children?: string;
  icon?: React.FunctionComponent<IconProps>;
  /** Icon rendered after the label (e.g. a warning indicator) */
  rightIcon?: React.FunctionComponent<IconProps>;
  onRemove?: () => void;
  smallLabel?: boolean;
  /** When true, applies warning-coloured styling to the right icon. */
  error?: boolean;
}

/**
 * A simple chip/tag component for displaying metadata.
 * Supports an optional remove button via the `onRemove` prop.
 *
 * @example
 * ```tsx
 * <Chip>Tag Name</Chip>
 * <Chip icon={SvgUser}>John Doe</Chip>
 * <Chip onRemove={() => removeTag(id)}>Removable</Chip>
 * ```
 */
export default function Chip({
  children,
  icon: Icon,
  rightIcon: RightIcon,
  onRemove,
  smallLabel = true,
  error = false,
}: ChipProps) {
  return (
    <div
      className={cn(
        "flex items-center gap-1 px-1.5 py-0.5 rounded-08",
        "bg-background-tint-02"
      )}
    >
      {Icon && <Icon size={12} className="text-text-03" />}
      {children && (
        <Text figureSmallLabel={smallLabel} text03>
          {children}
        </Text>
      )}
      {RightIcon && (
        <RightIcon
          size={14}
          className={cn(error ? "text-status-warning-05" : "text-text-03")}
        />
      )}
      {onRemove && (
        <Button
          onClick={(e) => {
            e.stopPropagation();
            onRemove();
          }}
          prominence="tertiary"
          icon={SvgX}
          size="xs"
        />
      )}
    </div>
  );
}


================================================
FILE: web/src/refresh-components/Code.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import Code from "./Code";

const meta: Meta<typeof Code> = {
  title: "refresh-components/Code",
  component: Code,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Code>;

export const Default: Story = {
  args: {
    children: `const greeting = "Hello, world!";\nconsole.log(greeting);`,
  },
};

export const WithoutCopyButton: Story = {
  args: {
    children: `npm install @onyx/sdk`,
    showCopyButton: false,
  },
};

export const MultiLine: Story = {
  args: {
    children: `function fibonacci(n: number): number {
  if (n <= 1) return n;
  return fibonacci(n - 1) + fibonacci(n - 2);
}

console.log(fibonacci(10));`,
  },
};


================================================
FILE: web/src/refresh-components/Code.tsx
================================================
import { WithoutStyles } from "@/types";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";

interface CodeProps extends WithoutStyles<React.HTMLAttributes<HTMLElement>> {
  children: string;
  showCopyButton?: boolean;
}

export default function Code({
  children,
  showCopyButton = true,
  ...props
}: CodeProps) {
  return (
    <div className="relative code-wrapper">
      <code className="code-block" {...props}>
        {children}
      </code>
      {showCopyButton && (
        <div className="code-copy-button">
          <CopyIconButton getCopyText={() => children} />
        </div>
      )}
    </div>
  );
}


================================================
FILE: web/src/refresh-components/Collapsible.stories.tsx
================================================
import React from "react";
import type { Meta, StoryObj } from "@storybook/react";
import {
  Collapsible,
  CollapsibleTrigger,
  CollapsibleContent,
} from "./Collapsible";

const meta: Meta<typeof Collapsible> = {
  title: "refresh-components/Collapsible",
  component: Collapsible,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof Collapsible>;

export const Default: Story = {
  render: () => (
    <Collapsible defaultOpen={false}>
      <CollapsibleTrigger asChild>
        <button className="p-2 bg-background-tint-03 rounded-08 font-main-ui-action w-full text-left">
          Click to toggle
        </button>
      </CollapsibleTrigger>
      <CollapsibleContent>
        <div className="p-4 border border-border-01 rounded-08 mt-2">
          This content can be expanded and collapsed with a smooth animation.
        </div>
      </CollapsibleContent>
    </Collapsible>
  ),
};

export const DefaultOpen: Story = {
  render: () => (
    <Collapsible defaultOpen>
      <CollapsibleTrigger asChild>
        <button className="p-2 bg-background-tint-03 rounded-08 font-main-ui-action w-full text-left">
          Already open — click to close
        </button>
      </CollapsibleTrigger>
      <CollapsibleContent>
        <div className="p-4 border border-border-01 rounded-08 mt-2">
          This section starts open by default.
        </div>
      </CollapsibleContent>
    </Collapsible>
  ),
};

function ControlledDemo() {
  const [open, setOpen] = React.useState(false);
  return (
    <div style={{ width: 320 }}>
      <Collapsible open={open} onOpenChange={setOpen}>
        <CollapsibleTrigger asChild>
          <button className="p-2 bg-background-tint-03 rounded-08 font-main-ui-action w-full text-left">
            {open ? "Close" : "Open"} (controlled)
          </button>
        </CollapsibleTrigger>
        <CollapsibleContent>
          <div className="p-4 border border-border-01 rounded-08 mt-2">
            Controlled collapsible content. Current state:{" "}
            {open ? "open" : "closed"}.
          </div>
        </CollapsibleContent>
      </Collapsible>
    </div>
  );
}

export const Controlled: Story = {
  render: () => <ControlledDemo />,
};

export const MultipleCollapsibles: Story = {
  render: () => (
    <div className="flex flex-col gap-2" style={{ width: 320 }}>
      {["Section A", "Section B", "Section C"].map((title) => (
        <Collapsible key={title}>
          <CollapsibleTrigger asChild>
            <button className="p-2 bg-background-tint-03 rounded-08 font-main-ui-action w-full text-left">
              {title}
            </button>
          </CollapsibleTrigger>
          <CollapsibleContent>
            <div className="p-4 border border-border-01 rounded-08 mt-1">
              Content for {title}
            </div>
          </CollapsibleContent>
        </Collapsible>
      ))}
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/Collapsible.tsx
================================================
/**
 * Collapsible Components
 *
 * A set of components for creating expandable/collapsible sections.
 * Built on Radix UI Collapsible primitives with custom animations.
 *
 * Components:
 * - Collapsible: Root container that manages open/closed state
 * - CollapsibleTrigger: Interactive element that toggles the collapsible
 * - CollapsibleContent: Content area that expands/collapses with animation
 *
 * @example
 * ```tsx
 * import { Collapsible, CollapsibleTrigger, CollapsibleContent } from "@/refresh-components/Collapsible";
 *
 * // Basic usage
 * <Collapsible>
 *   <CollapsibleTrigger>
 *     <button>Toggle Content</button>
 *   </CollapsibleTrigger>
 *   <CollapsibleContent>
 *     <div className="p-4">
 *       Your collapsible content here
 *     </div>
 *   </CollapsibleContent>
 * </Collapsible>
 *
 * // Controlled state
 * const [isOpen, setIsOpen] = useState(false);
 * <Collapsible open={isOpen} onOpenChange={setIsOpen}>
 *   <CollapsibleTrigger asChild>
 *     <button>{isOpen ? "Close" : "Open"}</button>
 *   </CollapsibleTrigger>
 *   <CollapsibleContent>
 *     <div>Content</div>
 *   </CollapsibleContent>
 * </Collapsible>
 * ```
 */

"use client";

import { cn } from "@/lib/utils";
import * as CollapsiblePrimitive from "@radix-ui/react-collapsible";
import * as React from "react";

/**
 * Collapsible Root Component
 *
 * The root container for a collapsible section. Manages the open/closed state
 * and provides context to trigger and content components.
 *
 * This is a re-export of Radix UI's Collapsible.Root component.
 *
 * @see https://www.radix-ui.com/primitives/docs/components/collapsible
 */
const Collapsible = CollapsiblePrimitive.Root;

/**
 * Collapsible Trigger Component
 *
 * The interactive element that controls the open/closed state of the collapsible.
 * Typically wraps a button or other clickable element.
 *
 * Supports the `asChild` prop to merge props with a child element instead of
 * rendering a default button.
 *
 * This is a re-export of Radix UI's CollapsibleTrigger component.
 *
 * @see https://www.radix-ui.com/primitives/docs/components/collapsible
 */
const CollapsibleTrigger = CollapsiblePrimitive.CollapsibleTrigger;

/**
 * Collapsible Content Component
 *
 * The expandable/collapsible content area. Automatically animates when
 * opening and closing based on the collapsible state.
 *
 * Features:
 * - Smooth slide-down animation when opening (animate-collapsible-down)
 * - Smooth slide-up animation when closing (animate-collapsible-up)
 * - Overflow hidden to prevent content bleeding during animation
 * - Supports custom className for additional styling
 *
 * Built on Radix UI's CollapsibleContent with custom animations.
 *
 * @see https://www.radix-ui.com/primitives/docs/components/collapsible
 */
const CollapsibleContent = React.forwardRef<
  React.ElementRef<typeof CollapsiblePrimitive.CollapsibleContent>,
  React.ComponentPropsWithoutRef<typeof CollapsiblePrimitive.CollapsibleContent>
>(({ className, ...props }, ref) => (
  <CollapsiblePrimitive.CollapsibleContent
    ref={ref}
    className={cn(
      "overflow-hidden data-[state=open]:animate-collapsible-down data-[state=closed]:animate-collapsible-up",
      className
    )}
    {...props}
  />
));
CollapsibleContent.displayName = "CollapsibleContent";

export { Collapsible, CollapsibleContent, CollapsibleTrigger };


================================================
FILE: web/src/refresh-components/ColorSwatch.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import ColorSwatch from "./ColorSwatch";

const meta: Meta<typeof ColorSwatch> = {
  title: "refresh-components/ColorSwatch",
  component: ColorSwatch,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof ColorSwatch>;

export const Light: Story = {
  args: {
    light: true,
  },
};

export const Dark: Story = {
  args: {
    dark: true,
  },
};

export const SideBySide: Story = {
  render: () => (
    <div className="flex gap-4 items-center">
      <ColorSwatch light />
      <ColorSwatch dark />
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/ColorSwatch.tsx
================================================
import "@/app/css/color-swatch.css";

/**
 * A small color swatch chip component that displays a visual preview of light or dark color modes.
 * Shows "Aa" text sample with appropriate background and text colors.
 *
 * @param light - If true, displays light mode swatch with light background and dark text
 * @param dark - If true, displays dark mode swatch with dark background and light text
 *
 * @example
 * <ColorSwatch light />
 * <ColorSwatch dark />
 */
export interface ColorSwatchProps {
  /** Display light mode variant */
  light?: boolean;
  /** Display dark mode variant */
  dark?: boolean;
}

export default function ColorSwatch({ light, dark }: ColorSwatchProps) {
  const mode = light ? "light" : dark ? "dark" : "light";

  return (
    <div className="color-swatch" data-state={mode}>
      <div className="rounded-full h-[0.3rem] w-[0.3rem] bg-action-link-05" />
      <span className="color-swatch__text">Aa</span>
    </div>
  );
}


================================================
FILE: web/src/refresh-components/ConnectionProviderIcon.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import ConnectionProviderIcon from "./ConnectionProviderIcon";
import { SvgSettings, SvgStar } from "@opal/icons";

const meta: Meta<typeof ConnectionProviderIcon> = {
  title: "refresh-components/ConnectionProviderIcon",
  component: ConnectionProviderIcon,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof ConnectionProviderIcon>;

export const WithSettingsIcon: Story = {
  args: {
    icon: <SvgSettings className="w-5 h-5 stroke-text-04" />,
  },
};

export const WithStarIcon: Story = {
  args: {
    icon: <SvgStar className="w-5 h-5 stroke-text-04" />,
  },
};

export const WithCustomEmoji: Story = {
  args: {
    icon: <span className="text-lg">📄</span>,
  },
};


================================================
FILE: web/src/refresh-components/ConnectionProviderIcon.tsx
================================================
import React, { memo } from "react";
import { SvgArrowExchange, SvgOnyxLogo } from "@opal/icons";

type ConnectionProviderIconProps = {
  icon: React.ReactNode;
};

const ConnectionProviderIcon = memo(({ icon }: ConnectionProviderIconProps) => {
  return (
    <div className="flex items-center gap-1">
      <div className="w-7 h-7 flex items-center justify-center">{icon}</div>
      <div className="w-4 h-4 flex items-center justify-center">
        <SvgArrowExchange className="w-3 h-3 stroke-text-04" />
      </div>
      <div className="w-7 h-7 flex items-center justify-center">
        <SvgOnyxLogo size={24} className="fill-text-04" />
      </div>
    </div>
  );
});

ConnectionProviderIcon.displayName = "ConnectionProviderIcon";

export default ConnectionProviderIcon;


================================================
FILE: web/src/refresh-components/Divider.stories.tsx
================================================
import React from "react";
import type { Meta, StoryObj } from "@storybook/react";
import Divider from "./Divider";
import { SvgSettings } from "@opal/icons";

const meta: Meta<typeof Divider> = {
  title: "refresh-components/Divider",
  component: Divider,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Divider>;

export const SimpleLine: Story = {
  args: {},
};

export const WithTitle: Story = {
  args: {
    showTitle: true,
    text: "Section Title",
  },
};

export const WithTitleAndDescription: Story = {
  args: {
    showTitle: true,
    text: "Advanced Settings",
    description: "Configure additional options for this section.",
    showDescription: true,
  },
};

export const WithInfoText: Story = {
  args: {
    showTitle: true,
    text: "Items",
    infoText: "3 items",
    showInfo: true,
  },
};

function FoldableDividerDemo() {
  const [expanded, setExpanded] = React.useState(false);
  return (
    <div style={{ width: 400 }}>
      <Divider
        showTitle
        text="Click to toggle"
        foldable
        expanded={expanded}
        onClick={() => setExpanded(!expanded)}
      />
      {expanded && (
        <div style={{ padding: 12 }}>Expanded content goes here.</div>
      )}
    </div>
  );
}

export const Foldable: Story = {
  render: () => <FoldableDividerDemo />,
};

export const WithIcon: Story = {
  args: {
    showTitle: true,
    text: "Settings",
    icon: SvgSettings,
  },
};

export const Highlighted: Story = {
  args: {
    showTitle: true,
    text: "Active Section",
    foldable: true,
    expanded: false,
    isHighlighted: true,
  },
};

export const NoDividerLine: Story = {
  args: {
    showTitle: true,
    text: "No Lines",
    dividerLine: false,
  },
};


================================================
FILE: web/src/refresh-components/Divider.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";
import { SvgChevronRight, SvgChevronDown, SvgInfoSmall } from "@opal/icons";
import Text from "@/refresh-components/texts/Text";
import type { IconProps } from "@opal/types";
import Truncated from "./texts/Truncated";

export interface DividerProps
  extends Omit<React.HTMLAttributes<HTMLDivElement>, "title"> {
  /** Ref to the root element */
  ref?: React.Ref<HTMLDivElement>;
  /** Show title content instead of simple line */
  showTitle?: boolean;
  /** Title text */
  text?: string;
  /** Description text below title */
  description?: string;
  /** Show description */
  showDescription?: boolean;
  /** Enable foldable/collapsible behavior */
  foldable?: boolean;
  /** Controlled expanded state */
  expanded?: boolean;
  /** Callback when expanded changes */
  onClick?: () => void;
  /** Leading icon */
  icon?: React.FunctionComponent<IconProps>;
  /** Show info icon */
  showInfo?: boolean;
  /** Info text on right side */
  infoText?: string;
  /** Apply highlighted (hover) state styling */
  isHighlighted?: boolean;
  /** Show horizontal divider lines (default: true) */
  dividerLine?: boolean;
}

/**
 * Divider Component
 *
 * A flexible divider component that supports two modes:
 * 1. Simple horizontal line divider
 * 2. Title divider with optional foldable/collapsible behavior, icons, and multiple interactive states
 *
 * @example
 * ```tsx
 * // Simple horizontal line divider
 * <Divider />
 *
 * // Title divider
 * <Divider showTitle text="Section Title" />
 *
 * // Title divider with icon
 * <Divider showTitle text="Settings" icon={SvgSettings} />
 *
 * // Foldable divider (collapsed)
 * <Divider showTitle text="Details" foldable expanded={false} onClick={setExpanded} />
 *
 * // Foldable divider (expanded)
 * <Divider showTitle text="Details" foldable expanded onClick={setExpanded} />
 *
 * // With info icon and text
 * <Divider showTitle text="Section" showInfo infoText="3 items" />
 *
 * // With description
 * <Divider showTitle text="Title" description="Optional description" showDescription />
 * ```
 */
export default function Divider({
  ref,
  showTitle,
  text = "Title",
  description,
  showDescription,
  foldable,
  expanded,
  onClick,
  icon: Icon,
  showInfo,
  infoText,
  isHighlighted,
  dividerLine = true,
  className,
  ...props
}: DividerProps) {
  const handleClick = () => {
    if (foldable && onClick) {
      onClick();
    }
  };

  // Simple horizontal line divider
  if (!showTitle) {
    return (
      <div
        ref={ref}
        role="separator"
        className={cn("w-full py-1", className)}
        {...props}
      >
        <div className="h-px w-full bg-border-01" />
      </div>
    );
  }

  // Title divider with optional features
  return (
    <div
      ref={ref}
      role={foldable ? "button" : "separator"}
      aria-expanded={foldable ? expanded : undefined}
      tabIndex={foldable ? 0 : undefined}
      data-selected={isHighlighted ? "true" : undefined}
      onClick={foldable ? handleClick : undefined}
      onKeyDown={
        foldable
          ? (e) => {
              if (e.key === "Enter" || e.key === " ") {
                e.preventDefault();
                handleClick();
              }
            }
          : undefined
      }
      className={cn(
        "w-full mt-1 py-0.5 rounded-08",
        foldable && "group/divider cursor-pointer",
        foldable && !expanded && "hover:bg-background-tint-02",
        foldable && !expanded && isHighlighted && "bg-background-tint-02",
        foldable &&
          expanded &&
          "bg-background-tint-01 hover:bg-background-tint-02",
        className
      )}
      {...props}
    >
      {/* Title line */}
      <div
        className={cn(
          "flex items-center py-1",
          !dividerLine && (foldable ? "pl-1.5" : "px-2"),
          dividerLine && !foldable && "pl-1.5"
        )}
      >
        {/* Left divider line (only for foldable dividers) */}
        {dividerLine && foldable && (
          <div className={cn("h-px bg-border-01 w-1.5")} />
        )}

        {/* Content container */}
        <div className="flex items-center gap-0.5 px-0.5">
          {/* Icon container */}
          {Icon && (
            <div className="flex items-center justify-center size-5 p-0.5">
              <Icon
                className={cn(
                  "size-4 stroke-text-03",
                  foldable && "group-hover/divider:stroke-text-04",
                  foldable && expanded && "stroke-text-04",
                  foldable && isHighlighted && "stroke-text-04"
                )}
              />
            </div>
          )}

          {/* Title text */}
          <Text
            secondaryBody
            className={cn(
              "leading-4 truncate",
              !foldable && "text-text-03",
              foldable &&
                !expanded &&
                "text-text-03 group-hover/divider:text-text-04",
              foldable && expanded && "text-text-04",
              foldable && isHighlighted && "text-text-04"
            )}
          >
            {text}
          </Text>

          {/* Info icon */}
          {showInfo && (
            <div className="flex items-center justify-center size-5 p-0.5">
              <SvgInfoSmall
                className={cn(
                  "size-3 stroke-text-03",
                  foldable && "group-hover/divider:stroke-text-04",
                  foldable && expanded && "stroke-text-04",
                  foldable && isHighlighted && "stroke-text-04"
                )}
              />
            </div>
          )}
        </div>

        {/* Center divider line (flex-1 to fill remaining space) */}
        <div className={cn("flex-1", dividerLine && "h-px bg-border-01")} />

        {/* Info text on right side */}
        {infoText && (
          <>
            <Text
              secondaryBody
              className={cn(
                "leading-4 px-0.5",
                !foldable && "text-text-03",
                foldable &&
                  !expanded &&
                  "text-text-03 group-hover/divider:text-text-04",
                foldable && expanded && "text-text-04",
                foldable && isHighlighted && "text-text-04"
              )}
            >
              {infoText}
            </Text>
            {/* Right divider line after info text */}
            {dividerLine && (
              <div
                className={cn("h-px bg-border-01", foldable ? "w-1.5" : "w-2")}
              />
            )}
          </>
        )}

        {/* Chevron button for foldable */}
        {foldable && (
          <div className="flex items-center justify-center size-6">
            {expanded ? (
              <SvgChevronDown
                className={cn(
                  "size-4 stroke-text-03",
                  "group-hover/divider:stroke-text-04",
                  expanded && "stroke-text-04",
                  isHighlighted && "stroke-text-04"
                )}
              />
            ) : (
              <SvgChevronRight
                className={cn(
                  "size-4 stroke-text-03",
                  "group-hover/divider:stroke-text-04",
                  isHighlighted && "stroke-text-04"
                )}
              />
            )}
          </div>
        )}
      </div>

      {/* Description line */}
      {showDescription && description && (
        <div className="flex items-center py-1 pl-2">
          <Truncated secondaryBody text03>
            {description}
          </Truncated>
        </div>
      )}
    </div>
  );
}


================================================
FILE: web/src/refresh-components/EmptyMessage.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import EmptyMessage from "./EmptyMessage";
import { SvgFileText, SvgUsers } from "@opal/icons";

const meta: Meta<typeof EmptyMessage> = {
  title: "refresh-components/messages/EmptyMessage",
  component: EmptyMessage,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof EmptyMessage>;

export const Default: Story = {
  args: {
    title: "No items found",
  },
};

export const WithDescription: Story = {
  args: {
    title: "No connectors configured",
    description:
      "Set up a connector to start indexing documents from your data sources.",
  },
};

export const WithCustomIcon: Story = {
  args: {
    icon: SvgFileText,
    title: "No documents available",
    description: "Upload documents or connect a data source to get started.",
  },
};

export const UsersEmpty: Story = {
  args: {
    icon: SvgUsers,
    title: "No users in this group",
    description: "Add users to this group to grant them access.",
  },
};


================================================
FILE: web/src/refresh-components/EmptyMessage.tsx
================================================
/**
 * EmptyMessage - A component for displaying empty state messages
 *
 * Displays a translucent card with an icon and message text to indicate
 * when no data or content is available.
 *
 * Features:
 * - Translucent card background with dashed border
 * - Horizontal layout with icon on left, text on right
 * - 0.5rem gap between icon and text
 * - Accepts string children for the message text
 * - Customizable icon
 *
 * @example
 * ```tsx
 * import EmptyMessage from "@/refresh-components/EmptyMessage";
 * import { SvgActivity } from "@opal/icons";
 *
 * // Basic usage
 * <EmptyMessage icon={SvgActivity}>
 *   No connectors set up for your organization.
 * </EmptyMessage>
 *
 * // With different icon
 * <EmptyMessage icon={SvgFileText}>
 *   No documents available.
 * </EmptyMessage>
 * ```
 */

import { SvgEmpty } from "@opal/icons";
import Card from "@/refresh-components/cards/Card";
import Text from "@/refresh-components/texts/Text";
import { Content } from "@opal/layouts";
import { IconProps } from "@opal/types";

export interface EmptyMessageProps {
  icon?: React.FunctionComponent<IconProps>;
  title: string;
  description?: string;
}

export default function EmptyMessage({
  icon: Icon = SvgEmpty,
  title,
  description,
}: EmptyMessageProps) {
  return (
    <Card variant="tertiary">
      <Content
        icon={Icon}
        title={title}
        sizePreset="main-ui"
        variant="body"
        prominence="muted"
      />
      {description && (
        <Text secondaryBody text03>
          {description}
        </Text>
      )}
    </Card>
  );
}


================================================
FILE: web/src/refresh-components/EnabledCount.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import EnabledCount from "./EnabledCount";

const meta: Meta<typeof EnabledCount> = {
  title: "refresh-components/EnabledCount",
  component: EnabledCount,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof EnabledCount>;

export const Default: Story = {
  args: {
    enabledCount: 5,
    totalCount: 12,
  },
};

export const WithName: Story = {
  args: {
    name: "connector",
    enabledCount: 3,
    totalCount: 10,
  },
};

export const AllEnabled: Story = {
  args: {
    name: "source",
    enabledCount: 8,
    totalCount: 8,
  },
};

export const NoneEnabled: Story = {
  args: {
    name: "item",
    enabledCount: 0,
    totalCount: 15,
  },
};

export const SingleItem: Story = {
  args: {
    name: "document",
    enabledCount: 1,
    totalCount: 1,
  },
};


================================================
FILE: web/src/refresh-components/EnabledCount.tsx
================================================
"use client";

import { memo } from "react";
import Text from "@/refresh-components/texts/Text";

interface EnabledCountProps {
  name?: string;
  enabledCount: number;
  totalCount: number;
}

const EnabledCount = memo(
  ({ name, enabledCount, totalCount }: EnabledCountProps) => {
    return (
      <Text text03 mainUiBody>
        <Text mainUiBody className="text-action-link-05">
          {enabledCount}
        </Text>
        {` of ${totalCount} ${name ?? ""}${
          name && totalCount !== 1 ? "s" : ""
        }`}
      </Text>
    );
  }
);
EnabledCount.displayName = "EnabledCount";

export default EnabledCount;


================================================
FILE: web/src/refresh-components/FadingEdgeContainer.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import FadingEdgeContainer from "./FadingEdgeContainer";

const meta: Meta<typeof FadingEdgeContainer> = {
  title: "refresh-components/FadingEdgeContainer",
  component: FadingEdgeContainer,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof FadingEdgeContainer>;

const sampleItems = Array.from({ length: 20 }, (_, i) => (
  <div key={i} className="p-2 border-b border-border-01">
    Item {i + 1}
  </div>
));

export const BottomFade: Story = {
  args: {
    direction: "bottom",
    className: "max-h-[200px] overflow-y-auto",
    children: sampleItems,
  },
};

export const TopFade: Story = {
  args: {
    direction: "top",
    className: "max-h-[200px] overflow-y-auto",
    children: sampleItems,
  },
};

export const CustomFadeHeight: Story = {
  args: {
    direction: "bottom",
    className: "max-h-[200px] overflow-y-auto",
    fadeClassName: "h-16",
    children: sampleItems,
  },
};


================================================
FILE: web/src/refresh-components/FadingEdgeContainer.tsx
================================================
import React from "react";
import { cn } from "@/lib/utils";

interface FadingEdgeContainerProps {
  /** Classes applied to the inner scrollable container */
  className?: string;
  /** Classes to customize the fade gradient (e.g., height, color) */
  fadeClassName?: string;
  children: React.ReactNode;
  /** Which edge to show the fade on */
  direction?: "top" | "bottom";
}

/**
 * A container that adds a gradient fade overlay at the top or bottom edge.
 *
 * Use this component to wrap scrollable content where you want to visually
 * indicate that more content exists beyond the visible area. The fade stays
 * fixed relative to the container bounds, not the scroll content.
 *
 * @example
 * // Bottom fade for a scrollable list
 * <FadingEdgeContainer
 *   direction="bottom"
 *   className="max-h-[300px] overflow-y-auto"
 * >
 *   {items.map(item => <Item key={item.id} />)}
 * </FadingEdgeContainer>
 *
 * @example
 * // Top fade with custom fade styling
 * <FadingEdgeContainer
 *   direction="top"
 *   className="max-h-[200px] overflow-y-auto"
 *   fadeClassName="h-12"
 * >
 *   {content}
 * </FadingEdgeContainer>
 */
const FadingEdgeContainer: React.FC<FadingEdgeContainerProps> = ({
  className,
  fadeClassName,
  children,
  direction = "top",
}) => {
  const isTop = direction === "top";

  return (
    <div className="relative">
      <div className={className}>{children}</div>
      <div
        className={cn(
          "absolute inset-x-0 h-8 pointer-events-none z-10",
          isTop
            ? "top-0 bg-gradient-to-b from-background to-transparent"
            : "bottom-0 bg-gradient-to-t from-background to-transparent",
          fadeClassName
        )}
      />
    </div>
  );
};

export default FadingEdgeContainer;


================================================
FILE: web/src/refresh-components/FrostedDiv.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import FrostedDiv from "./FrostedDiv";

const meta: Meta<typeof FrostedDiv> = {
  title: "refresh-components/FrostedDiv",
  component: FrostedDiv,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
  decorators: [
    (Story) => (
      <div
        className="p-12"
        style={{
          background:
            "linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%)",
        }}
      >
        <Story />
      </div>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof FrostedDiv>;

export const Default: Story = {
  args: {
    className: "p-4",
    children: (
      <span className="text-text-04 font-main-ui-action">
        Frosted glass content
      </span>
    ),
  },
};

export const CustomBlur: Story = {
  args: {
    blur: "30px",
    backdropBlur: "10px",
    className: "p-6",
    children: (
      <span className="text-text-04 font-main-ui-action">
        Heavy blur effect
      </span>
    ),
  },
};

export const CustomBorderRadius: Story = {
  args: {
    borderRadius: "0.5rem",
    className: "p-4",
    children: (
      <span className="text-text-04 font-main-ui-action">Rounded corners</span>
    ),
  },
};


================================================
FILE: web/src/refresh-components/FrostedDiv.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";

export interface FrostedDivProps extends React.HTMLAttributes<HTMLDivElement> {
  /**
   * Background color for the frost effect.
   * Defaults to a semi-transparent white
   */
  backgroundColor?: string;

  /**
   * Blur amount for the frosted glass effect (filter blur).
   * Defaults to "20px"
   */
  blur?: string;

  /**
   * Backdrop blur for the glass effect.
   * Defaults to "6px"
   */
  backdropBlur?: string;

  /**
   * Border radius for the frost effect.
   * Defaults to "1rem" (16px)
   */
  borderRadius?: string;

  /**
   * Additional classes for the frost overlay element itself
   */
  overlayClassName?: string;
}

/**
 * FrostedDiv - A wrapper that adds a frosted glass bloom effect behind its children
 *
 * This component wraps content and adds a frosted glass effect behind it.
 * The wrapper adds `relative` positioning - pass layout classes via `className`.
 *
 * @example
 * ```tsx
 * <FrostedDiv>
 *   <Button>Click me</Button>
 * </FrostedDiv>
 * ```
 *
 * @example
 * // Custom blur intensity and layout
 * <FrostedDiv blur="30px" className="flex items-center gap-2 p-2">
 *   <Button>One</Button>
 *   <Button>Two</Button>
 * </FrostedDiv>
 */
export default function FrostedDiv({
  backgroundColor = "var(--frost-overlay)",
  blur = "20px",
  backdropBlur = "6px",
  borderRadius = "1rem",
  overlayClassName,
  className,
  style,
  children,
  ...props
}: FrostedDivProps) {
  return (
    <div className="relative">
      {/* Frost effect overlay - positioned behind content with bloom extending outward */}
      <div
        className={cn("absolute pointer-events-none", overlayClassName)}
        style={{
          borderRadius,
          background: backgroundColor,
          filter: `blur(${blur})`,
          backdropFilter: `blur(${backdropBlur})`,
        }}
      />
      {/* Content rendered above the frost effect */}
      <div className={cn("relative", className)} style={style} {...props}>
        {children}
      </div>
    </div>
  );
}


================================================
FILE: web/src/refresh-components/InlineExternalLink.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import InlineExternalLink from "./InlineExternalLink";

const meta: Meta<typeof InlineExternalLink> = {
  title: "refresh-components/InlineExternalLink",
  component: InlineExternalLink,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof InlineExternalLink>;

export const Default: Story = {
  args: {
    href: "https://docs.onyx.app",
    children: "Onyx Documentation",
  },
};

export const CustomClassName: Story = {
  args: {
    href: "https://github.com/onyx-dot-app/onyx",
    children: "GitHub Repository",
    className: "text-action-link-05 underline hover:opacity-80",
  },
};

export const InContext: Story = {
  render: () => (
    <p className="font-main-content-body text-text-04">
      For more information, visit the{" "}
      <InlineExternalLink href="https://docs.onyx.app">
        official documentation
      </InlineExternalLink>{" "}
      or check out the{" "}
      <InlineExternalLink href="https://github.com/onyx-dot-app/onyx">
        source code
      </InlineExternalLink>
      .
    </p>
  ),
};


================================================
FILE: web/src/refresh-components/InlineExternalLink.tsx
================================================
import { ReactNode } from "react";

export interface InlineExternalLinkProps {
  href: string;
  children: ReactNode;
  className?: string;
}

export default function InlineExternalLink({
  href,
  children,
  className,
}: InlineExternalLinkProps) {
  return (
    <a
      href={href}
      target="_blank"
      rel="noopener noreferrer"
      className={className ?? "underline"}
    >
      {children}
    </a>
  );
}


================================================
FILE: web/src/refresh-components/Logo.tsx
================================================
"use client";

import { useSettingsContext } from "@/providers/SettingsProvider";
import {
  DEFAULT_LOGO_SIZE_PX,
  NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED,
} from "@/lib/constants";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import Truncated from "@/refresh-components/texts/Truncated";
import { useMemo } from "react";
import { SvgOnyxLogo, SvgOnyxLogoTyped } from "@opal/icons";

export interface LogoProps {
  folded?: boolean;
  size?: number;
  className?: string;
}

export default function Logo({ folded, size, className }: LogoProps) {
  const resolvedSize = size ?? DEFAULT_LOGO_SIZE_PX;
  const settings = useSettingsContext();
  const logoDisplayStyle = settings.enterpriseSettings?.logo_display_style;
  const applicationName = settings.enterpriseSettings?.application_name;

  // Cache-buster: the logo URL never changes (/api/enterprise-settings/logo)
  // so the browser serves the in-memory cached image even after an admin
  // uploads a new one. Generating a fresh timestamp each time enterprise
  // settings are revalidated by SWR appends a unique query param to force
  // the browser to re-fetch the image.
  const logoBuster = useMemo(
    () => Date.now(),
    // eslint-disable-next-line react-hooks/exhaustive-deps
    [settings.enterpriseSettings]
  );

  const logo = settings.enterpriseSettings?.use_custom_logo ? (
    <div
      className={cn(
        "aspect-square rounded-full overflow-hidden relative flex-shrink-0",
        className
      )}
      style={{ height: resolvedSize }}
    >
      {/* eslint-disable-next-line @next/next/no-img-element */}
      <img
        alt="Logo"
        src={`/api/enterprise-settings/logo?v=${logoBuster}`}
        className="object-cover object-center w-full h-full"
      />
    </div>
  ) : (
    <SvgOnyxLogo
      size={resolvedSize}
      className={cn("flex-shrink-0", className)}
    />
  );

  const renderNameAndPoweredBy = (opts: {
    includeLogo: boolean;
    includeName: boolean;
  }) => {
    return (
      <div className="flex min-w-0 gap-2">
        {opts.includeLogo && logo}
        {!folded && (
          /* H3 text is 4px larger (28px) than the Logo icon (24px), so negative margin hack. */
          <div className="flex flex-1 flex-col -mt-0.5">
            {opts.includeName && (
              <Truncated headingH3>{applicationName}</Truncated>
            )}
            {!NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED && (
              <Text
                secondaryBody
                text03
                className={"line-clamp-1 truncate"}
                nowrap
              >
                Powered by Onyx
              </Text>
            )}
          </div>
        )}
      </div>
    );
  };

  // Handle "logo_only" display style
  if (logoDisplayStyle === "logo_only") {
    return renderNameAndPoweredBy({ includeLogo: true, includeName: false });
  }

  // Handle "name_only" display style
  if (logoDisplayStyle === "name_only") {
    return renderNameAndPoweredBy({ includeLogo: false, includeName: true });
  }

  // Handle "logo_and_name" or default behavior
  return applicationName ? (
    renderNameAndPoweredBy({ includeLogo: true, includeName: true })
  ) : folded ? (
    <SvgOnyxLogo
      size={resolvedSize}
      className={cn("flex-shrink-0", className)}
    />
  ) : (
    <SvgOnyxLogoTyped size={resolvedSize} className={className} />
  );
}


================================================
FILE: web/src/refresh-components/Modal.stories.tsx
================================================
import React from "react";
import type { Meta, StoryObj } from "@storybook/react";
import Modal from "./Modal";
import { Button } from "@opal/components";
import { SvgInfoSmall } from "@opal/icons";

const meta: Meta<typeof Modal> = {
  title: "refresh-components/Modal",
  component: Modal,
  tags: ["autodocs"],
  parameters: {
    layout: "fullscreen",
  },
};

export default meta;
type Story = StoryObj<typeof Modal>;

function ModalDemo() {
  const [open, setOpen] = React.useState(false);
  return (
    <div style={{ padding: 32 }}>
      <Button onClick={() => setOpen(true)}>Open Modal</Button>
      <Modal open={open} onOpenChange={setOpen}>
        <Modal.Content width="sm" height="fit">
          <Modal.Header
            icon={SvgInfoSmall}
            title="Example Modal"
            description="This is a demo modal with header, body, and footer."
            onClose={() => setOpen(false)}
          />
          <Modal.Body>
            <div style={{ padding: 16 }}>
              Some body content goes here. You can put forms, text, or anything
              else inside the modal body.
            </div>
          </Modal.Body>
          <Modal.Footer>
            <Button
              variant="default"
              prominence="secondary"
              onClick={() => setOpen(false)}
            >
              Cancel
            </Button>
            <Button
              variant="action"
              prominence="primary"
              onClick={() => setOpen(false)}
            >
              Confirm
            </Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    </div>
  );
}

export const Default: Story = {
  render: () => <ModalDemo />,
};

function LargeModalDemo() {
  const [open, setOpen] = React.useState(false);
  return (
    <div style={{ padding: 32 }}>
      <Button onClick={() => setOpen(true)}>Open Large Modal</Button>
      <Modal open={open} onOpenChange={setOpen}>
        <Modal.Content width="full" height="full">
          <Modal.Header
            icon={SvgInfoSmall}
            title="Large Modal"
            description="A large modal with full height."
            onClose={() => setOpen(false)}
          />
          <Modal.Body>
            <div style={{ padding: 16 }}>
              {Array.from({ length: 20 }, (_, i) => (
                <p key={i} style={{ marginBottom: 12 }}>
                  Paragraph {i + 1}: Lorem ipsum dolor sit amet, consectetur
                  adipiscing elit. Sed do eiusmod tempor incididunt ut labore et
                  dolore magna aliqua.
                </p>
              ))}
            </div>
          </Modal.Body>
          <Modal.Footer>
            <Button
              variant="default"
              prominence="secondary"
              onClick={() => setOpen(false)}
            >
              Close
            </Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    </div>
  );
}

export const Large: Story = {
  render: () => <LargeModalDemo />,
};

function GrayBackgroundDemo() {
  const [open, setOpen] = React.useState(false);
  return (
    <div style={{ padding: 32 }}>
      <Button onClick={() => setOpen(true)}>Open Gray Modal</Button>
      <Modal open={open} onOpenChange={setOpen}>
        <Modal.Content width="sm" height="fit" background="gray">
          <Modal.Header
            icon={SvgInfoSmall}
            title="Gray Background"
            description="This modal uses background='gray' for a tinted card."
            onClose={() => setOpen(false)}
          />
          <Modal.Body>
            <div style={{ padding: 16 }}>
              The modal card background uses the tinted color variant.
            </div>
          </Modal.Body>
          <Modal.Footer>
            <Button
              variant="default"
              prominence="secondary"
              onClick={() => setOpen(false)}
            >
              Close
            </Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    </div>
  );
}

export const GrayBackground: Story = {
  render: () => <GrayBackgroundDemo />,
};

function NoOverlayDemo() {
  const [open, setOpen] = React.useState(false);
  return (
    <div style={{ padding: 32 }}>
      <Button onClick={() => setOpen(true)}>Open Without Overlay</Button>
      <Modal open={open} onOpenChange={setOpen}>
        <Modal.Content width="sm" height="fit" skipOverlay>
          <Modal.Header
            icon={SvgInfoSmall}
            title="No Overlay"
            description="This modal skips the backdrop overlay."
            onClose={() => setOpen(false)}
          />
          <Modal.Body>
            <div style={{ padding: 16 }}>
              The page behind remains fully visible with no blur or mask.
            </div>
          </Modal.Body>
          <Modal.Footer>
            <Button
              variant="default"
              prominence="secondary"
              onClick={() => setOpen(false)}
            >
              Close
            </Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    </div>
  );
}

export const NoOverlay: Story = {
  render: () => <NoOverlayDemo />,
};


================================================
FILE: web/src/refresh-components/Modal.tsx
================================================
"use client";

import React from "react";
import * as DialogPrimitive from "@radix-ui/react-dialog";
import { cn } from "@/lib/utils";
import type { IconFunctionComponent, RichStr } from "@opal/types";
import { Button } from "@opal/components";
import { Content } from "@opal/layouts";
import { toPlainString } from "@opal/components/text/InlineMarkdown";
import { SvgX } from "@opal/icons";
import { WithoutStyles } from "@/types";
import { Section, SectionProps } from "@/layouts/general-layouts";
import useContainerCenter from "@/hooks/useContainerCenter";

/**
 * Modal Root Component
 *
 * Wrapper around Radix Dialog.Root for managing modal state.
 *
 * @example
 * ```tsx
 * <Modal open={isOpen} onOpenChange={setIsOpen}>
 *   <Modal.Content>
 *     {/* Modal content *\/}
 *   </Modal.Content>
 * </Modal>
 * ```
 */
const ModalRoot = DialogPrimitive.Root;

/**
 * Modal Overlay Component
 *
 * Backdrop overlay that appears behind the modal.
 *
 * @example
 * ```tsx
 * <Modal.Overlay />
 * ```
 */
const ModalOverlay = React.forwardRef<
  React.ComponentRef<typeof DialogPrimitive.Overlay>,
  WithoutStyles<React.ComponentPropsWithoutRef<typeof DialogPrimitive.Overlay>>
>(({ ...props }, ref) => (
  <DialogPrimitive.Overlay
    ref={ref}
    className={cn(
      "fixed inset-0 z-modal-overlay bg-mask-03 backdrop-blur-03 pointer-events-none",
      "data-[state=open]:animate-in data-[state=closed]:animate-out",
      "data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0"
    )}
    {...props}
  />
));
ModalOverlay.displayName = DialogPrimitive.Overlay.displayName;

/**
 * Modal Context for managing close button ref, warning state, and height variant
 */
interface ModalContextValue {
  closeButtonRef: React.RefObject<HTMLDivElement | null>;
  hasAttemptedClose: boolean;
  setHasAttemptedClose: (value: boolean) => void;
  height: keyof typeof heightClasses;
  hasDescription: boolean;
  setHasDescription: (value: boolean) => void;
}

const ModalContext = React.createContext<ModalContextValue | null>(null);

const useModalContext = () => {
  const context = React.useContext(ModalContext);
  if (!context) {
    throw new Error("Modal compound components must be used within Modal");
  }
  return context;
};

const widthClasses = {
  full: "w-[80dvw]",
  xl: "w-[60rem]",
  lg: "w-[50rem]",
  md: "w-[40rem]",
  sm: "w-[30rem]",
};

const heightClasses = {
  fit: "h-fit",
  sm: "max-h-[30rem] overflow-y-auto",
  lg: "max-h-[calc(100dvh-4rem)] overflow-y-auto",
  full: "h-[80dvh] overflow-y-auto",
};

/**
 * Modal Content Component
 *
 * Main modal container with default styling.
 *
 * @example
 * ```tsx
 * // Using width and height props
 * <Modal.Content width="full" height="full">
 *   {/* Full modal: w-[80dvw] h-[80dvh] *\/}
 * </Modal.Content>
 *
 * <Modal.Content width="xl" height="fit">
 *   {/* XL modal: w-[60rem] h-fit *\/}
 * </Modal.Content>
 *
 * <Modal.Content width="sm" height="sm">
 *   {/* Small modal: w-[30rem] max-h-[30rem] *\/}
 * </Modal.Content>
 *
 * <Modal.Content width="sm" height="lg">
 *   {/* Tall modal: w-[30rem] max-h-[calc(100dvh-4rem)] *\/}
 * </Modal.Content>
 * ```
 */
export interface ModalContentProps
  extends WithoutStyles<
    React.ComponentPropsWithoutRef<typeof DialogPrimitive.Content>
  > {
  width?: keyof typeof widthClasses;
  height?: keyof typeof heightClasses;
  /** Vertical placement of the modal. `"center"` (default) centers in the
   *  viewport/container. `"top"` pins the modal near the top of the viewport,
   *  matching the position used by CommandMenu. */
  position?: "center" | "top";
  preventAccidentalClose?: boolean;
  skipOverlay?: boolean;
  background?: "default" | "gray";
  /** Content rendered below the modal card, floating with gap-4 (1rem) separation.
   *  Stays inside DialogPrimitive.Content for proper focus management. */
  bottomSlot?: React.ReactNode;
}
const ModalContent = React.forwardRef<
  React.ComponentRef<typeof DialogPrimitive.Content>,
  ModalContentProps
>(
  (
    {
      children,
      width = "xl",
      height = "fit",
      position = "center",
      preventAccidentalClose = true,
      skipOverlay = false,
      background = "default",
      bottomSlot,
      ...props
    },
    ref
  ) => {
    const closeButtonRef = React.useRef<HTMLDivElement>(null);
    const [hasAttemptedClose, setHasAttemptedClose] = React.useState(false);
    const [hasDescription, setHasDescription] = React.useState(false);
    const hasUserTypedRef = React.useRef(false);

    // Reset state when modal closes or opens
    const resetState = React.useCallback(() => {
      setHasAttemptedClose(false);
      hasUserTypedRef.current = false;
    }, []);

    // Handle input events to detect typing
    const handleInput = React.useCallback((e: Event) => {
      // Early exit if already detected typing (performance optimization)
      if (hasUserTypedRef.current) {
        return;
      }

      // Only trust events triggered by actual user interaction
      if (!e.isTrusted) {
        return;
      }

      const target = e.target as HTMLElement;

      // Only handle input and textarea elements
      if (
        !(
          target instanceof HTMLInputElement ||
          target instanceof HTMLTextAreaElement
        )
      ) {
        return;
      }

      // Skip non-text inputs
      if (
        target.type === "hidden" ||
        target.type === "submit" ||
        target.type === "button" ||
        target.type === "checkbox" ||
        target.type === "radio"
      ) {
        return;
      }
      // Mark that user has typed something
      hasUserTypedRef.current = true;
    }, []);

    // Keep track of the container node for cleanup
    const containerNodeRef = React.useRef<HTMLDivElement | null>(null);

    // Callback ref to attach event listener when element mounts
    const contentRef = React.useCallback(
      (node: HTMLDivElement | null) => {
        // Cleanup previous listener if exists
        if (containerNodeRef.current) {
          containerNodeRef.current.removeEventListener(
            "input",
            handleInput,
            true
          );
        }

        // Attach new listener if node exists
        if (node) {
          node.addEventListener("input", handleInput, true);
          containerNodeRef.current = node;
        } else {
          containerNodeRef.current = null;
        }
      },
      [handleInput]
    );

    // Check if user has typed anything
    const hasModifiedInputs = React.useCallback(() => {
      return hasUserTypedRef.current;
    }, []);

    // Handle escape key and outside clicks
    const handleInteractOutside = React.useCallback(
      (e: Event) => {
        // If preventAccidentalClose is disabled, always allow immediate close
        if (!preventAccidentalClose) {
          setHasAttemptedClose(false);
          return;
        }

        // If preventAccidentalClose is enabled, check if user has modified inputs
        if (hasModifiedInputs()) {
          if (!hasAttemptedClose) {
            // First attempt: prevent close and focus the close button
            e.preventDefault();
            setHasAttemptedClose(true);
            setTimeout(() => {
              closeButtonRef.current?.focus();
            }, 0);
          } else {
            // Second attempt: allow close
            setHasAttemptedClose(false);
          }
        } else {
          // No modified inputs: allow immediate close
          setHasAttemptedClose(false);
        }
      },
      [preventAccidentalClose, hasModifiedInputs, hasAttemptedClose]
    );

    const handleRef = (node: HTMLDivElement | null) => {
      // Handle forwarded ref
      if (typeof ref === "function") {
        ref(node);
      } else if (ref) {
        ref.current = node;
      }
      // Handle content ref with event listener
      contentRef(node);
    };

    const { centerX, centerY, hasContainerCenter } = useContainerCenter();

    const isTop = position === "top";

    const animationClasses = cn(
      "data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0",
      "data-[state=open]:zoom-in-95 data-[state=closed]:zoom-out-95",
      !isTop &&
        "data-[state=open]:slide-in-from-top-1/2 data-[state=closed]:slide-out-to-top-1/2",
      "duration-200"
    );

    const containerStyle: React.CSSProperties | undefined =
      hasContainerCenter && !isTop
        ? ({
            left: centerX,
            top: centerY,
            "--tw-enter-translate-x": "-50%",
            "--tw-exit-translate-x": "-50%",
            "--tw-enter-translate-y": "-50%",
            "--tw-exit-translate-y": "-50%",
          } as React.CSSProperties)
        : hasContainerCenter && isTop
          ? ({
              left: centerX,
              "--tw-enter-translate-x": "-50%",
              "--tw-exit-translate-x": "-50%",
            } as React.CSSProperties)
          : undefined;

    const positionClasses = cn(
      "fixed -translate-x-1/2",
      isTop
        ? cn("top-[72px]", !hasContainerCenter && "left-1/2")
        : cn("-translate-y-1/2", !hasContainerCenter && "left-1/2 top-1/2")
    );

    const dialogEventHandlers = {
      onOpenAutoFocus: (e: Event) => {
        resetState();
        props.onOpenAutoFocus?.(e);
      },
      onCloseAutoFocus: (e: Event) => {
        resetState();
        props.onCloseAutoFocus?.(e);
      },
      onEscapeKeyDown: handleInteractOutside,
      onPointerDownOutside: handleInteractOutside,
      ...(!hasDescription && { "aria-describedby": undefined }),
      ...props,
    };

    const cardClasses = cn(
      "overflow-hidden",
      background === "gray" ? "bg-background-tint-01" : "bg-background-tint-00",
      "border rounded-16 shadow-2xl",
      "flex flex-col",
      heightClasses[height]
    );

    return (
      <ModalContext.Provider
        value={{
          closeButtonRef,
          hasAttemptedClose,
          setHasAttemptedClose,
          height,
          hasDescription,
          setHasDescription,
        }}
      >
        <DialogPrimitive.Portal>
          {!skipOverlay && <ModalOverlay />}
          {bottomSlot ? (
            // With bottomSlot: use asChild to wrap card + slot in a flex column
            <DialogPrimitive.Content
              asChild
              ref={handleRef}
              {...dialogEventHandlers}
            >
              <div
                style={containerStyle}
                className={cn(
                  positionClasses,
                  "z-modal",
                  "flex flex-col gap-4 items-center",
                  "max-w-[calc(100dvw-2rem)] max-h-[calc(100dvh-2rem)]",
                  animationClasses,
                  widthClasses[width]
                )}
              >
                <div className={cn(cardClasses, "w-full min-h-0")}>
                  {children}
                </div>
                <div className="w-full flex-shrink-0">{bottomSlot}</div>
              </div>
            </DialogPrimitive.Content>
          ) : (
            // Without bottomSlot: original single-element rendering
            <DialogPrimitive.Content
              ref={handleRef}
              style={containerStyle}
              className={cn(
                positionClasses,
                "overflow-hidden",
                "z-modal",
                background === "gray"
                  ? "bg-background-tint-01"
                  : "bg-background-tint-00",
                "border rounded-16 shadow-2xl",
                "flex flex-col",
                "max-w-[calc(100dvw-2rem)] max-h-[calc(100dvh-2rem)]",
                animationClasses,
                widthClasses[width],
                heightClasses[height]
              )}
              {...dialogEventHandlers}
            >
              {children}
            </DialogPrimitive.Content>
          )}
        </DialogPrimitive.Portal>
      </ModalContext.Provider>
    );
  }
);
ModalContent.displayName = DialogPrimitive.Content.displayName;

/**
 * Modal Header Component
 *
 * Container for header content with optional bottom shadow. All header visuals
 * (icon, title, description, close button) are now controlled via this single
 * component using props, so no additional subcomponents are required.
 *
 * When `icon` is omitted the header renders a minimal variant: just the
 * title + description with the close button inline to the right of the title.
 * This is JUST to be used for preview windows
 *
 * @example
 * ```tsx
 * <Modal.Header icon={SvgWarning} title="Confirm Action" description="Are you sure?" />
 *
 * // Minimal variant (no icon)
 * <Modal.Header title="Confirm Action" description="Are you sure?" />
 *
 * // With custom content
 * // Children render below the provided title/description stack.
 * <Modal.Header icon={SvgFile} title="Select Files">
 *   <InputTypeIn placeholder="Search..." />
 * </Modal.Header>
 * ```
 */
interface ModalHeaderProps extends Omit<WithoutStyles<SectionProps>, "title"> {
  icon?: IconFunctionComponent;
  moreIcon1?: IconFunctionComponent;
  moreIcon2?: IconFunctionComponent;
  title: string | RichStr;
  description?: string | RichStr;
  onClose?: () => void;
}
const ModalHeader = React.forwardRef<HTMLDivElement, ModalHeaderProps>(
  (
    {
      icon,
      moreIcon1,
      moreIcon2,
      title,
      description,
      onClose,
      children,
      ...props
    },
    ref
  ) => {
    const { closeButtonRef, setHasDescription } = useModalContext();

    React.useLayoutEffect(() => {
      setHasDescription(!!description);
    }, [description, setHasDescription]);

    const closeButton = onClose && (
      <div
        tabIndex={-1}
        ref={closeButtonRef as React.RefObject<HTMLDivElement>}
        className="outline-none"
      >
        <DialogPrimitive.Close asChild>
          <Button
            icon={SvgX}
            prominence="tertiary"
            size="sm"
            onClick={onClose}
          />
        </DialogPrimitive.Close>
      </div>
    );

    return (
      <Section
        ref={ref}
        padding={0.5}
        alignItems="start"
        height="fit"
        {...props}
      >
        <Section
          flexDirection="row"
          justifyContent="between"
          alignItems="start"
          gap={0}
          padding={0.5}
        >
          <div className="relative w-full">
            {/* Close button is absolutely positioned because:
               1. Figma mocks place it overlapping the top-right of the content area
               2. Using ContentAction with rightChildren causes the description
                  to wrap to the second line early due to the button reserving space */}
            <div className="absolute top-0 right-0">{closeButton}</div>
            <DialogPrimitive.Title asChild>
              <div>
                <Content
                  icon={icon}
                  moreIcon1={moreIcon1}
                  moreIcon2={moreIcon2}
                  title={title}
                  description={description}
                  sizePreset="section"
                  variant="heading"
                />
                {description && (
                  <DialogPrimitive.Description className="hidden">
                    {toPlainString(description)}
                  </DialogPrimitive.Description>
                )}
              </div>
            </DialogPrimitive.Title>
          </div>
        </Section>
        {children}
      </Section>
    );
  }
);
ModalHeader.displayName = "ModalHeader";

/**
 * Modal Body Component
 *
 * Content area for the main modal content.
 *
 * @example
 * ```tsx
 * <Modal.Body>
 *   {/* Content *\/}
 * </Modal.Body>
 * ```
 */
interface ModalBodyProps extends WithoutStyles<SectionProps> {
  twoTone?: boolean;
}
const ModalBody = React.forwardRef<HTMLDivElement, ModalBodyProps>(
  ({ twoTone = true, children, ...props }, ref) => {
    return (
      <div
        ref={ref}
        className={cn(
          twoTone && "bg-background-tint-01",
          "flex-auto min-h-0 overflow-y-auto w-full"
        )}
      >
        <Section
          height="auto"
          padding={1}
          gap={1}
          alignItems="start"
          {...props}
        >
          {children}
        </Section>
      </div>
    );
  }
);
ModalBody.displayName = "ModalBody";

/**
 * Modal Footer Component
 *
 * Footer section for actions/buttons.
 *
 * @example
 * ```tsx
 * // Right-aligned buttons
 * <Modal.Footer>
 *   <Button secondary>Cancel</Button>
 *   <Button primary>Confirm</Button>
 * </Modal.Footer>
 * ```
 */
const ModalFooter = React.forwardRef<
  HTMLDivElement,
  WithoutStyles<SectionProps>
>(({ ...props }, ref) => {
  return (
    <Section
      ref={ref}
      flexDirection="row"
      justifyContent="end"
      gap={0.5}
      padding={1}
      height="fit"
      {...props}
    />
  );
});
ModalFooter.displayName = "ModalFooter";

export default Object.assign(ModalRoot, {
  Content: ModalContent,
  Header: ModalHeader,
  Body: ModalBody,
  Footer: ModalFooter,
});

// ============================================================================
// Common Layouts
// ============================================================================

export interface BasicModalFooterProps {
  left?: React.ReactNode;
  cancel?: React.ReactNode;
  submit?: React.ReactNode;
}

export function BasicModalFooter({
  left,
  cancel,
  submit,
}: BasicModalFooterProps) {
  return (
    <>
      {left && <Section alignItems="start">{left}</Section>}
      {(cancel || submit) && (
        <Section flexDirection="row" justifyContent="end" gap={0.5}>
          {cancel}
          {submit}
        </Section>
      )}
    </>
  );
}


================================================
FILE: web/src/refresh-components/OverflowDiv.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import OverflowDiv from "./OverflowDiv";

const meta: Meta<typeof OverflowDiv> = {
  title: "refresh-components/OverflowDiv",
  component: OverflowDiv,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof OverflowDiv>;

const sampleItems = Array.from({ length: 25 }, (_, i) => (
  <div key={i} className="p-2 border-b border-border-01">
    Sidebar item {i + 1}
  </div>
));

export const Default: Story = {
  args: {
    style: { width: 260, height: 300 },
    children: sampleItems,
  },
};

export const MaskDisabled: Story = {
  args: {
    disableMask: true,
    style: { width: 260, height: 300 },
    children: sampleItems,
  },
};

export const CustomHeight: Story = {
  args: {
    height: "4rem",
    style: { width: 260, height: 300 },
    children: sampleItems,
  },
};


================================================
FILE: web/src/refresh-components/OverflowDiv.tsx
================================================
"use client";

import React, { useRef, useEffect, useLayoutEffect } from "react";
import { usePathname } from "next/navigation";
import { cn } from "@/lib/utils";

export interface VerticalShadowScrollerProps
  extends React.HtmlHTMLAttributes<HTMLDivElement> {
  // Mask related
  disableMask?: boolean;
  backgroundColor?: string;
  height?: string;
  /**
   * Unique identifier for this scroll container to enable scroll position persistence across navigation.
   *
   * When provided, the scroll position will be saved to a global Map and restored when the pathname changes
   * (e.g., navigating between admin pages). This prevents the sidebar from jumping to the top when clicking links.
   *
   * If not provided, scroll position will NOT be saved/restored (opt-out of scroll persistence).
   *
   * @example scrollKey="admin-sidebar"
   */
  scrollKey?: string;
}

const SCROLL_POSITION_PREFIX = "onyx-scroll-";

export default function OverflowDiv({
  disableMask,
  backgroundColor = "var(--background-tint-02)",
  height: minHeight = "2rem",
  scrollKey,

  className,
  ...rest
}: VerticalShadowScrollerProps) {
  const scrollRef = useRef<HTMLDivElement>(null);
  const pathname = usePathname();

  // Save scroll position on every scroll event (only if scrollKey is provided)
  useEffect(() => {
    if (!scrollKey) return; // Opt-out: no scroll persistence if scrollKey not provided

    const scrollElement = scrollRef.current;
    if (!scrollElement) return;

    const storageKey = `${SCROLL_POSITION_PREFIX}${scrollKey}`;
    const handleScroll = () => {
      sessionStorage.setItem(storageKey, scrollElement.scrollTop.toString());
    };

    scrollElement.addEventListener("scroll", handleScroll, { passive: true });
    return () => scrollElement.removeEventListener("scroll", handleScroll);
  }, [scrollKey]);

  // Restore scroll position immediately after pathname changes (before paint)
  useLayoutEffect(() => {
    if (!scrollKey) return; // Opt-out: no scroll restoration if scrollKey not provided

    const scrollElement = scrollRef.current;
    if (!scrollElement) return;

    const storageKey = `${SCROLL_POSITION_PREFIX}${scrollKey}`;
    const savedPosition = parseInt(
      sessionStorage.getItem(storageKey) || "0",
      10
    );
    scrollElement.scrollTop = savedPosition;
  }, [pathname, scrollKey]);

  return (
    <div className="relative flex-1 min-h-0 overflow-y-hidden flex flex-col">
      <div
        ref={scrollRef}
        className="flex-1 min-h-0 overflow-y-auto flex flex-col"
      >
        <div className={cn("flex-1 flex flex-col", className)} {...rest} />
        <div style={{ minHeight }} />
      </div>
      {!disableMask && (
        <div
          className="absolute bottom-0 left-0 right-0 h-[1rem] z-[20] pointer-events-none"
          style={{
            background: `linear-gradient(to bottom, transparent, ${backgroundColor})`,
          }}
        />
      )}
    </div>
  );
}


================================================
FILE: web/src/refresh-components/Popover.stories.tsx
================================================
import React from "react";
import type { Meta, StoryObj } from "@storybook/react";
import Popover from "./Popover";
import { Button } from "@opal/components";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof Popover> = {
  title: "refresh-components/Popover",
  component: Popover,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Popover>;

export const Default: Story = {
  render: () => (
    <Popover>
      <Popover.Trigger asChild>
        <Button>Open Popover</Button>
      </Popover.Trigger>
      <Popover.Content>
        <div style={{ padding: 8 }}>
          <p>Popover content goes here.</p>
        </div>
      </Popover.Content>
    </Popover>
  ),
};

export const WidthVariants: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 16 }}>
      {(["fit", "md", "lg", "xl"] as const).map((width) => (
        <Popover key={width}>
          <Popover.Trigger asChild>
            <Button prominence="secondary">{width}</Button>
          </Popover.Trigger>
          <Popover.Content width={width}>
            <div style={{ padding: 8 }}>
              <p>Width: {width}</p>
            </div>
          </Popover.Content>
        </Popover>
      ))}
    </div>
  ),
};

export const WithMenu: Story = {
  render: () => (
    <Popover>
      <Popover.Trigger asChild>
        <Button>Options</Button>
      </Popover.Trigger>
      <Popover.Content width="lg">
        <Popover.Menu>
          <Popover.Close asChild>
            <Button prominence="tertiary" width="full">
              Edit
            </Button>
          </Popover.Close>
          <Popover.Close asChild>
            <Button prominence="tertiary" width="full">
              Duplicate
            </Button>
          </Popover.Close>
          {null}
          <Popover.Close asChild>
            <Button variant="danger" prominence="tertiary" width="full">
              Delete
            </Button>
          </Popover.Close>
        </Popover.Menu>
      </Popover.Content>
    </Popover>
  ),
};


================================================
FILE: web/src/refresh-components/Popover.tsx
================================================
"use client";

import React from "react";
import * as PopoverPrimitive from "@radix-ui/react-popover";
import { cn } from "@/lib/utils";
import Separator from "@/refresh-components/Separator";
import ShadowDiv from "@/refresh-components/ShadowDiv";
import { WithoutStyles } from "@/types";
import { Section } from "@/layouts/general-layouts";

/**
 * Popover Root Component
 *
 * Wrapper around Radix Popover.Root for managing popover state.
 *
 * @example
 * ```tsx
 * <Popover open={isOpen} onOpenChange={setIsOpen}>
 *   <Popover.Trigger>
 *     <button>Open</button>
 *   </Popover.Trigger>
 *   <Popover.Content>
 *     {/* Popover content *\/}
 *   </Popover.Content>
 * </Popover>
 * ```
 */
const PopoverRoot = PopoverPrimitive.Root;

/**
 * Popover Trigger Component
 *
 * Button or element that triggers the popover to open.
 *
 * @example
 * ```tsx
 * <Popover.Trigger asChild>
 *   <button>Click me</button>
 * </Popover.Trigger>
 * ```
 */
const PopoverTrigger = PopoverPrimitive.Trigger;

/**
 * Popover Anchor Component
 *
 * An optional element to position the popover relative to.
 *
 * @example
 * ```tsx
 * <Popover>
 *   <Popover.Anchor asChild>
 *     <div>Anchor element</div>
 *   </Popover.Anchor>
 *   <Popover.Trigger>
 *     <button>Click me</button>
 *   </Popover.Trigger>
 *   <Popover.Content>
 *     {/* This will be positioned relative to the anchor *\/}
 *   </Popover.Content>
 * </Popover>
 * ```
 */
const PopoverAnchor = PopoverPrimitive.Anchor;

/**
 * Popover Close Component
 *
 * Element that closes the popover when clicked.
 *
 * @example
 * ```tsx
 * <Popover.Close asChild>
 *   <button>Close</button>
 * </Popover.Close>
 * ```
 */
const PopoverClose = PopoverPrimitive.Close;

/**
 * Popover Content Component
 *
 * The main popover container with default styling.
 *
 * Widths:
 * - `fit`: Fits content width (default)
 * - `md`: Medium width (12rem)
 * - `lg`: Large width (15rem)
 * - `xl`: Extra large width (18rem)
 *
 * @param width - Width of the popover. Default: "fit"
 *
 * @example
 * ```tsx
 * <Popover.Content align="start" sideOffset={8}>
 *   <div>Popover content here</div>
 * </Popover.Content>
 *
 * // Medium width
 * <Popover.Content width="md">
 *   <div>Medium width content</div>
 * </Popover.Content>
 *
 * // Extra large width
 * <Popover.Content width="xl">
 *   <div>Extra large width content</div>
 * </Popover.Content>
 * ```
 */
type PopoverWidths = "fit" | "sm" | "md" | "lg" | "xl" | "trigger";
const widthClasses: Record<PopoverWidths, string> = {
  fit: "w-fit",
  sm: "w-[10rem]",
  md: "w-[12rem]",
  lg: "w-[15rem]",
  xl: "w-[18rem]",
  trigger: "w-[var(--radix-popover-trigger-width)]",
};
interface PopoverContentProps
  extends WithoutStyles<
    React.ComponentPropsWithoutRef<typeof PopoverPrimitive.Content>
  > {
  width?: PopoverWidths;
  /** Portal container. Set to a DOM element to render inside it (e.g. inside a modal). */
  container?: HTMLElement | null;
  ref?: React.Ref<React.ComponentRef<typeof PopoverPrimitive.Content>>;
}
function PopoverContent({
  width = "fit",
  container,
  align = "center",
  sideOffset = 4,
  ref,
  ...props
}: PopoverContentProps) {
  return (
    <PopoverPrimitive.Portal container={container}>
      <PopoverPrimitive.Content
        ref={ref}
        align={align}
        sideOffset={sideOffset}
        collisionPadding={8}
        className={cn(
          "bg-background-neutral-00 p-1 z-popover rounded-12 border shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
          "flex flex-col",
          "max-h-[var(--radix-popover-content-available-height)]",
          "overflow-hidden",
          widthClasses[width]
        )}
        {...props}
      />
    </PopoverPrimitive.Portal>
  );
}

export default Object.assign(PopoverRoot, {
  Trigger: PopoverTrigger,
  Anchor: PopoverAnchor,
  Content: PopoverContent,
  Close: PopoverClose,
  Menu: PopoverMenu,
});

// ============================================================================
// Common Layouts
// ============================================================================

function SeparatorHelper() {
  return <Separator className="py-0 px-2" />;
}

/**
 * Popover Menu Component
 *
 * Converts a list of React nodes into a vertical menu with automatic separator handling.
 *
 * @remarks
 * - Treats `null` values as separator lines
 * - Filters out `undefined` and `false` values
 * - Removes separators at the beginning and end of the list
 *
 * @example
 * ```tsx
 * <Popover>
 *   <Popover.Trigger asChild>
 *     <button>Options</button>
 *   </Popover.Trigger>
 *   <Popover.Content>
 *     <Popover.Menu>
 *       <MenuItem>Option 1</MenuItem>
 *       <MenuItem>Option 2</MenuItem>
 *       {null}  {/* Separator line *\/}
 *       <MenuItem>Option 3</MenuItem>
 *     </Popover.Menu>
 *   </Popover.Content>
 * </Popover>
 *
 * // With footer
 * <Popover.Menu
 *   footer={<Button>Apply</Button>}
 * >
 *   <MenuItem>Item 1</MenuItem>
 *   <MenuItem>Item 2</MenuItem>
 * </Popover.Menu>
 * ```
 */
export interface PopoverMenuProps {
  children?: React.ReactNode[];
  footer?: React.ReactNode;

  // Ref for the scrollable container (useful for programmatic scrolling)
  scrollContainerRef?: React.RefObject<HTMLDivElement | null>;
}
export function PopoverMenu({
  children,
  footer,
  scrollContainerRef,
}: PopoverMenuProps) {
  if (!children) return null;

  const definedChildren = children.filter(
    (child) => child !== undefined && child !== false
  );
  const filteredChildren = definedChildren.filter((child, index) => {
    if (child !== null) return true;
    return index !== 0 && index !== definedChildren.length - 1;
  });

  return (
    <Section alignItems="stretch" height="auto" className="flex-1 min-h-0">
      <ShadowDiv
        scrollContainerRef={scrollContainerRef}
        className="flex flex-col gap-1 max-h-[20rem] w-full"
      >
        {filteredChildren.map((child, index) => (
          <div key={index}>
            {child === undefined ? (
              <></>
            ) : child === null ? (
              // Render `null`s as separator lines
              <SeparatorHelper />
            ) : (
              child
            )}
          </div>
        ))}
      </ShadowDiv>
      {footer && (
        <>
          <SeparatorHelper />
          {footer}
        </>
      )}
    </Section>
  );
}


================================================
FILE: web/src/refresh-components/PreviewImage.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import PreviewImage from "./PreviewImage";

const meta: Meta<typeof PreviewImage> = {
  title: "refresh-components/PreviewImage",
  component: PreviewImage,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof PreviewImage>;

export const Default: Story = {
  args: {
    src: "https://placehold.co/400x300/EEE/31343C?text=Preview+Image",
    alt: "Sample preview image",
  },
};

export const WithCustomClass: Story = {
  args: {
    src: "https://placehold.co/200x200/EEE/31343C?text=Square",
    alt: "Square preview",
    className: "w-[200px] h-[200px] rounded-12",
  },
};

export const Landscape: Story = {
  args: {
    src: "https://placehold.co/600x200/EEE/31343C?text=Landscape",
    alt: "Landscape preview",
    className: "max-w-[400px]",
  },
};


================================================
FILE: web/src/refresh-components/PreviewImage.tsx
================================================
import { cn } from "@/lib/utils";

interface PreviewImageProps {
  src: string;
  alt: string;
  className?: string;
}

export default function PreviewImage({
  src,
  alt,
  className,
}: PreviewImageProps) {
  return (
    <img
      src={src}
      alt={alt}
      className={cn("object-contain object-center", className)}
    />
  );
}


================================================
FILE: web/src/refresh-components/ScrollIndicatorDiv.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import ScrollIndicatorDiv from "./ScrollIndicatorDiv";

const meta: Meta<typeof ScrollIndicatorDiv> = {
  title: "refresh-components/ScrollIndicatorDiv",
  component: ScrollIndicatorDiv,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof ScrollIndicatorDiv>;

const sampleItems = Array.from({ length: 30 }, (_, i) => (
  <div key={i} className="p-2 border-b border-border-01">
    Scrollable item {i + 1}
  </div>
));

export const GradientVariant: Story = {
  args: {
    variant: "gradient",
    style: { width: 300, height: 250 },
    children: sampleItems,
  },
};

export const ShadowVariant: Story = {
  args: {
    variant: "shadow",
    style: { width: 300, height: 250 },
    children: sampleItems,
  },
};

export const DisabledIndicators: Story = {
  args: {
    disableIndicators: true,
    style: { width: 300, height: 250 },
    children: sampleItems,
  },
};

export const WithBottomSpacing: Story = {
  args: {
    variant: "gradient",
    bottomSpacing: "2rem",
    style: { width: 300, height: 250 },
    children: sampleItems,
  },
};


================================================
FILE: web/src/refresh-components/ScrollIndicatorDiv.tsx
================================================
"use client";

import React, { useEffect, useRef, useState, useCallback } from "react";
import { cn } from "@/lib/utils";

// Throttle interval for scroll events (~60fps)
const SCROLL_THROTTLE_MS = 16;

/**
 * A scrollable container that shows gradient or shadow indicators when
 * content overflows above or below the visible area.
 *
 * HEIGHT CONSTRAINT REQUIREMENT
 *
 * This component relies on its inner scroll container having a smaller
 * clientHeight than its scrollHeight. For that to happen, the entire
 * ancestor chain must constrain height via flex sizing (flex-1 min-h-0),
 * NOT via percentage heights (h-full).
 *
 * height: 100% resolves to "auto" when the containing block's height is
 * determined by flex layout (flex-auto, flex-1) rather than an explicit
 * height property — this is per the CSS spec. When that happens, the
 * container grows to fit its content and scrollHeight === clientHeight,
 * making scroll indicators invisible.
 *
 * Correct pattern: every ancestor up to the nearest fixed-height boundary
 * must form an unbroken flex column chain using "flex-1 min-h-0":
 *
 *   fixed-height-ancestor  (e.g. h-[500px])
 *     flex flex-col flex-1 min-h-0   <-- use flex-1, NOT h-full
 *       ScrollIndicatorDiv
 *         ...tall content...
 *
 * Common mistakes:
 *  - Using h-full instead of flex-1 min-h-0 anywhere in the chain.
 *  - Placing this inside a parent with overflow-y: auto (e.g. Modal.Body),
 *    which becomes the scroll container instead of this component's inner div.
 */
export interface ScrollIndicatorDivProps
  extends React.HTMLAttributes<HTMLDivElement> {
  // Mask/Shadow options
  disableIndicators?: boolean;
  disableTopIndicator?: boolean;
  disableBottomIndicator?: boolean;
  backgroundColor?: string;
  indicatorHeight?: string;

  // Choose between gradient mask or box shadow
  variant?: "gradient" | "shadow";

  // Optional spacing at bottom (defaults to none)
  bottomSpacing?: string;
}

export default function ScrollIndicatorDiv({
  disableIndicators = false,
  disableTopIndicator = false,
  disableBottomIndicator = false,
  backgroundColor = "var(--background-tint-02)",
  indicatorHeight = "3rem",
  variant = "gradient",
  bottomSpacing,

  className,
  children,
  ...rest
}: ScrollIndicatorDivProps) {
  const scrollContainerRef = useRef<HTMLDivElement>(null);
  const [showTopIndicator, setShowTopIndicator] = useState(false);
  const [showBottomIndicator, setShowBottomIndicator] = useState(false);
  const throttleTimeoutRef = useRef<number | null>(null);
  const isThrottledRef = useRef(false);

  const updateScrollIndicators = useCallback(() => {
    const container = scrollContainerRef.current;
    if (!container) return;

    const { scrollTop, scrollHeight, clientHeight } = container;
    const isScrollable = scrollHeight > clientHeight;

    // Show top indicator if scrolled down from top
    setShowTopIndicator(isScrollable && scrollTop > 0);

    // Show bottom indicator if not scrolled to bottom
    // Add small threshold (1px) to account for rounding errors
    setShowBottomIndicator(
      isScrollable && scrollTop < scrollHeight - clientHeight - 1
    );
  }, []);

  // Throttled scroll handler for better performance
  const handleScroll = useCallback(() => {
    if (isThrottledRef.current) return;

    isThrottledRef.current = true;
    updateScrollIndicators();

    throttleTimeoutRef.current = window.setTimeout(() => {
      isThrottledRef.current = false;
      updateScrollIndicators();
    }, SCROLL_THROTTLE_MS);
  }, [updateScrollIndicators]);

  useEffect(() => {
    const container = scrollContainerRef.current;
    if (!container) return;

    // Initial check
    updateScrollIndicators();

    // Update on scroll (throttled)
    container.addEventListener("scroll", handleScroll, { passive: true });

    // Update when the container itself resizes
    const resizeObserver = new ResizeObserver(updateScrollIndicators);
    resizeObserver.observe(container);

    // Update when descendants change (e.g. syntax highlighting mutates the
    // DOM after initial render, which changes scrollHeight without firing
    // resize or scroll events on the container).
    const mutationObserver = new MutationObserver(handleScroll);

    return () => {
      container.removeEventListener("scroll", handleScroll);
      resizeObserver.disconnect();
      mutationObserver.disconnect();
      if (throttleTimeoutRef.current) {
        clearTimeout(throttleTimeoutRef.current);
      }
    };
  }, [updateScrollIndicators, handleScroll]);

  // Update when children change
  useEffect(() => {
    updateScrollIndicators();
  }, [children]);

  const getIndicatorStyle = (direction: "top" | "bottom") => {
    if (variant === "shadow") {
      return {
        height: "2px",
        backgroundColor: backgroundColor,
        boxShadow:
          direction === "top"
            ? "0 -2px 12px 0 var(--shadow-02), 0 0 4px 1px var(--shadow-02)"
            : "0 4px 24px 0 var(--shadow-02), 0 2px 8px 2px var(--shadow-02)",
      };
    }

    // Gradient variant - use full indicator height
    return {
      height: indicatorHeight,
      background:
        direction === "top"
          ? `linear-gradient(to top, transparent, ${backgroundColor})`
          : `linear-gradient(to bottom, transparent, ${backgroundColor})`,
    };
  };

  return (
    <div className="relative flex-1 min-h-0 overflow-y-hidden flex flex-col w-full">
      {/* Top indicator */}
      {!disableIndicators && !disableTopIndicator && showTopIndicator && (
        <div
          className="absolute top-0 left-0 right-0 z-[20] pointer-events-none transition-opacity duration-200"
          style={getIndicatorStyle("top")}
        />
      )}

      {/* Scrollable content */}
      <div
        ref={scrollContainerRef}
        className={cn(
          "flex-1 min-h-0 overflow-y-auto flex flex-col",
          className
        )}
        {...rest}
      >
        {children}
        {bottomSpacing && <div style={{ minHeight: bottomSpacing }} />}
      </div>

      {/* Bottom indicator */}
      {!disableIndicators && !disableBottomIndicator && showBottomIndicator && (
        <div
          className="absolute bottom-0 left-0 right-0 z-[20] pointer-events-none transition-opacity duration-200"
          style={getIndicatorStyle("bottom")}
        />
      )}
    </div>
  );
}


================================================
FILE: web/src/refresh-components/Separator.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Separator from "./Separator";

const meta: Meta<typeof Separator> = {
  title: "refresh-components/Separator",
  component: Separator,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Separator>;

export const Horizontal: Story = {
  decorators: [
    (Story) => (
      <div style={{ width: 400 }}>
        <div>Content above</div>
        <Story />
        <div>Content below</div>
      </div>
    ),
  ],
};

export const Vertical: Story = {
  args: {
    orientation: "vertical",
  },
  decorators: [
    (Story) => (
      <div style={{ display: "flex", alignItems: "center", height: 60 }}>
        <span>Left</span>
        <Story />
        <span>Right</span>
      </div>
    ),
  ],
};

export const NoPadding: Story = {
  args: {
    noPadding: true,
  },
  decorators: [
    (Story) => (
      <div style={{ width: 400 }}>
        <div>No padding above</div>
        <Story />
        <div>No padding below</div>
      </div>
    ),
  ],
};


================================================
FILE: web/src/refresh-components/Separator.tsx
================================================
"use client";

import React from "react";
import * as SeparatorPrimitive from "@radix-ui/react-separator";
import { cn } from "@/lib/utils";

export interface SeparatorProps
  extends React.ComponentPropsWithoutRef<typeof SeparatorPrimitive.Root> {
  noPadding?: boolean;
  /** Custom horizontal padding in rem. Overrides the default padding. */
  paddingXRem?: number;
  /** Custom vertical padding in rem. Overrides the default padding. */
  paddingYRem?: number;
}

/**
 * Separator Component
 *
 * A visual divider that separates content either horizontally or vertically.
 * Built on Radix UI's Separator primitive.
 *
 * @example
 * ```tsx
 * // Horizontal separator (default)
 * <Separator />
 *
 * // Vertical separator
 * <Separator orientation="vertical" />
 *
 * // With custom className
 * <Separator className="my-8" />
 *
 * // Non-decorative (announced by screen readers)
 * <Separator decorative={false} />
 * ```
 */
const Separator = React.forwardRef(
  (
    {
      noPadding,
      paddingXRem,
      paddingYRem,
      className,
      orientation = "horizontal",
      decorative = true,
      ...props
    }: SeparatorProps,
    ref: React.ForwardedRef<React.ComponentRef<typeof SeparatorPrimitive.Root>>
  ) => {
    const isHorizontal = orientation === "horizontal";

    return (
      <div
        style={{
          ...(paddingXRem != null
            ? {
                paddingLeft: `${paddingXRem}rem`,
                paddingRight: `${paddingXRem}rem`,
              }
            : {}),
          ...(paddingYRem != null
            ? {
                paddingTop: `${paddingYRem}rem`,
                paddingBottom: `${paddingYRem}rem`,
              }
            : {}),
        }}
        className={cn(
          isHorizontal ? "w-full" : "h-full",
          paddingXRem == null && !noPadding && (isHorizontal ? "py-4" : "px-4"),
          className
        )}
      >
        <SeparatorPrimitive.Root
          ref={ref}
          decorative={decorative}
          orientation={orientation}
          className={cn(
            "bg-border-01",
            isHorizontal ? "h-[1px] w-full" : "h-full w-[1px]"
          )}
          {...props}
        />
      </div>
    );
  }
);
Separator.displayName = SeparatorPrimitive.Root.displayName;

export default Separator;


================================================
FILE: web/src/refresh-components/ShadowDiv.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import ShadowDiv from "./ShadowDiv";

const meta: Meta<typeof ShadowDiv> = {
  title: "refresh-components/ShadowDiv",
  component: ShadowDiv,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof ShadowDiv>;

const sampleItems = Array.from({ length: 30 }, (_, i) => (
  <div key={i} className="p-2 border-b border-border-01">
    Scrollable item {i + 1}
  </div>
));

export const Default: Story = {
  args: {
    className: "max-h-[250px]",
    style: { width: 300 },
    children: sampleItems,
  },
};

export const BottomOnly: Story = {
  args: {
    bottomOnly: true,
    className: "max-h-[250px]",
    style: { width: 300 },
    children: sampleItems,
  },
};

export const TopOnly: Story = {
  args: {
    topOnly: true,
    className: "max-h-[250px]",
    style: { width: 300 },
    children: sampleItems,
  },
};

export const CustomShadowHeight: Story = {
  args: {
    shadowHeight: "3rem",
    className: "max-h-[250px]",
    style: { width: 300 },
    children: sampleItems,
  },
};


================================================
FILE: web/src/refresh-components/ShadowDiv.tsx
================================================
"use client";

import React, { useState, useEffect, useCallback } from "react";
import { cn } from "@/lib/utils";

export interface ShadowDivProps extends React.HTMLAttributes<HTMLDivElement> {
  /**
   * Background color to use for the shadow gradients.
   * Defaults to --background-neutral-00
   */
  backgroundColor?: string;

  /**
   * Height of the shadow gradients.
   * Defaults to 1.5rem (24px)
   */
  shadowHeight?: string;

  /**
   * Ref for the scrollable container (useful for programmatic scrolling)
   */
  scrollContainerRef?: React.RefObject<HTMLDivElement | null>;

  /**
   * Show only bottom shadow (similar to OverflowDiv behavior)
   */
  bottomOnly?: boolean;

  /**
   * Show only top shadow
   */
  topOnly?: boolean;
}

/**
 * ShadowDiv - A scrollable container with automatic top/bottom shadow indicators
 *
 * This component wraps content in a scrollable div and automatically displays
 * gradient shadows at the top and/or bottom to indicate there's more content
 * to scroll in those directions.
 *
 * @example
 * ```tsx
 * <ShadowDiv className="max-h-[20rem]">
 *   <div>Long content...</div>
 *   <div>More content...</div>
 * </ShadowDiv>
 * ```
 *
 * @example
 * // Only show bottom shadow
 * <ShadowDiv bottomOnly className="max-h-[20rem]">
 *   <div>Content...</div>
 * </ShadowDiv>
 */
export default function ShadowDiv({
  backgroundColor = "var(--background-neutral-00)",
  shadowHeight = "1.5rem",
  scrollContainerRef,
  bottomOnly = false,
  topOnly = false,
  className,
  children,
  ...props
}: ShadowDivProps) {
  const [showTopShadow, setShowTopShadow] = useState(false);
  const [showBottomShadow, setShowBottomShadow] = useState(false);
  const internalRef = React.useRef<HTMLDivElement>(null);
  const containerRef = scrollContainerRef || internalRef;

  const checkScroll = useCallback(() => {
    const container = containerRef.current;
    if (!container) return;

    // Show top shadow if scrolled down
    if (!bottomOnly) {
      setShowTopShadow(container.scrollTop > 1);
    }

    // Show bottom shadow if there's more content to scroll down
    if (!topOnly) {
      const hasMoreBelow =
        container.scrollHeight - container.scrollTop - container.clientHeight >
        1;
      setShowBottomShadow(hasMoreBelow);
    }
  }, [containerRef, bottomOnly, topOnly]);

  useEffect(() => {
    const container = containerRef.current;
    if (!container) return;

    // Check initial state
    checkScroll();

    container.addEventListener("scroll", checkScroll);
    // Also check on resize in case content changes
    const resizeObserver = new ResizeObserver(checkScroll);
    resizeObserver.observe(container);

    return () => {
      container.removeEventListener("scroll", checkScroll);
      resizeObserver.disconnect();
    };
  }, [containerRef, checkScroll]);

  return (
    <div className="relative min-h-0 flex flex-col">
      <div
        ref={containerRef}
        className={cn("overflow-y-auto", className)}
        {...props}
      >
        {children}
      </div>

      {/* Top scroll shadow indicator */}
      {!bottomOnly && (
        <div
          className={cn(
            "absolute top-0 left-0 right-0 pointer-events-none transition-opacity duration-150",
            showTopShadow ? "opacity-100" : "opacity-0"
          )}
          style={{
            height: shadowHeight,
            background: `linear-gradient(to bottom, ${backgroundColor}, transparent)`,
          }}
        />
      )}

      {/* Bottom scroll shadow indicator */}
      {!topOnly && (
        <div
          className={cn(
            "absolute bottom-0 left-0 right-0 pointer-events-none transition-opacity duration-150",
            showBottomShadow ? "opacity-100" : "opacity-0"
          )}
          style={{
            height: shadowHeight,
            background: `linear-gradient(to top, ${backgroundColor}, transparent)`,
          }}
        />
      )}
    </div>
  );
}


================================================
FILE: web/src/refresh-components/SimpleCollapsible.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import SimpleCollapsible from "./SimpleCollapsible";

const meta: Meta<typeof SimpleCollapsible> = {
  title: "refresh-components/SimpleCollapsible",
  component: SimpleCollapsible,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof SimpleCollapsible>;

export const DefaultOpen: Story = {
  render: () => (
    <SimpleCollapsible>
      <SimpleCollapsible.Header
        title="Section Title"
        description="This section is open by default."
      />
      <SimpleCollapsible.Content>
        <div>Here is some collapsible content that starts expanded.</div>
      </SimpleCollapsible.Content>
    </SimpleCollapsible>
  ),
};

export const DefaultClosed: Story = {
  render: () => (
    <SimpleCollapsible defaultOpen={false}>
      <SimpleCollapsible.Header
        title="Initially Closed"
        description="Click the button to expand this section."
      />
      <SimpleCollapsible.Content>
        <div>This content was hidden until you clicked expand.</div>
      </SimpleCollapsible.Content>
    </SimpleCollapsible>
  ),
};

export const TitleOnly: Story = {
  render: () => (
    <SimpleCollapsible>
      <SimpleCollapsible.Header title="No Description" />
      <SimpleCollapsible.Content>
        <div>Content with a header that has no description.</div>
      </SimpleCollapsible.Content>
    </SimpleCollapsible>
  ),
};


================================================
FILE: web/src/refresh-components/SimpleCollapsible.tsx
================================================
/**
 * SimpleCollapsible - A collapsible container component
 *
 * Provides an expandable/collapsible section with a header and content area.
 * Supports both controlled and uncontrolled modes.
 *
 * @example
 * ```tsx
 * import SimpleCollapsible from "@/refresh-components/SimpleCollapsible";
 *
 * // Basic usage
 * <SimpleCollapsible>
 *   <SimpleCollapsible.Header
 *     title="Section Title"
 *     description="Optional description"
 *   />
 *   <SimpleCollapsible.Content>
 *     <div>Content goes here</div>
 *   </SimpleCollapsible.Content>
 * </SimpleCollapsible>
 *
 * // Controlled state
 * const [open, setOpen] = useState(true);
 * <SimpleCollapsible open={open} onOpenChange={setOpen}>
 *   <SimpleCollapsible.Header title="Controlled Section" />
 *   <SimpleCollapsible.Content>
 *     <div>Content</div>
 *   </SimpleCollapsible.Content>
 * </SimpleCollapsible>
 *
 * // Default closed
 * <SimpleCollapsible defaultOpen={false}>
 *   <SimpleCollapsible.Header title="Initially Closed" />
 *   <SimpleCollapsible.Content>
 *     <div>Content</div>
 *   </SimpleCollapsible.Content>
 * </SimpleCollapsible>
 * ```
 */

"use client";

import * as React from "react";
import { useBoundingBox } from "@/hooks/useBoundingBox";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/refresh-components/Collapsible";
import { Button } from "@opal/components";
import { Content } from "@opal/layouts";
import { SvgFold, SvgExpand } from "@opal/icons";
import { WithoutStyles } from "@/types";

// Context for sharing state between compound components
interface SimpleCollapsibleContextValue {
  open: boolean;
}
const SimpleCollapsibleContext =
  React.createContext<SimpleCollapsibleContextValue | null>(null);
function useSimpleCollapsible() {
  const context = React.useContext(SimpleCollapsibleContext);
  if (!context) {
    throw new Error(
      "SimpleCollapsible compound components must be used within SimpleCollapsible"
    );
  }
  return context;
}

/**
 * SimpleCollapsible Root Component
 *
 * A collapsible container with a header and expandable content area.
 * Built on Radix UI Collapsible primitives.
 *
 * @example
 * ```tsx
 * <SimpleCollapsible>
 *   <SimpleCollapsible.Header title="Settings" description="Configure your preferences" />
 *   <SimpleCollapsible.Content>
 *     <div>Content here</div>
 *   </SimpleCollapsible.Content>
 * </SimpleCollapsible>
 *
 * // Controlled state
 * <SimpleCollapsible open={isOpen} onOpenChange={setIsOpen}>
 *   <SimpleCollapsible.Header title="Controlled" />
 *   <SimpleCollapsible.Content>
 *     <div>Content</div>
 *   </SimpleCollapsible.Content>
 * </SimpleCollapsible>
 *
 * // Default closed
 * <SimpleCollapsible defaultOpen={false}>
 *   <SimpleCollapsible.Header title="Initially Closed" />
 *   <SimpleCollapsible.Content>
 *     <div>Content</div>
 *   </SimpleCollapsible.Content>
 * </SimpleCollapsible>
 * ```
 */
interface SimpleCollapsibleRootProps
  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
  /** Controlled open state - when provided, component becomes controlled */
  open?: boolean;
  /** Default open state for uncontrolled mode (defaults to true) */
  defaultOpen?: boolean;
  /** Callback fired when the open state changes */
  onOpenChange?: (open: boolean) => void;
}
const Root = React.forwardRef<HTMLDivElement, SimpleCollapsibleRootProps>(
  (
    {
      children,
      open: controlledOpen,
      defaultOpen = true,
      onOpenChange,
      ...props
    },
    ref
  ) => {
    const [internalOpen, setInternalOpen] = React.useState(defaultOpen);

    const isControlled = controlledOpen !== undefined;
    const open = isControlled ? controlledOpen : internalOpen;

    const handleOpenChange = React.useCallback(
      (newOpen: boolean) => {
        onOpenChange?.(newOpen);
        if (!isControlled) {
          setInternalOpen(newOpen);
        }
      },
      [isControlled, onOpenChange]
    );

    return (
      <SimpleCollapsibleContext.Provider value={{ open }}>
        <Collapsible
          ref={ref}
          open={open}
          onOpenChange={handleOpenChange}
          className="flex flex-col flex-1 self-stretch"
          {...props}
        >
          {children}
        </Collapsible>
      </SimpleCollapsibleContext.Provider>
    );
  }
);
Root.displayName = "SimpleCollapsible";

/**
 * SimpleCollapsible Header Component
 *
 * A pre-styled header component for the collapsible trigger.
 * Displays a title and optional description.
 *
 * @example
 * ```tsx
 * <SimpleCollapsible>
 *   <SimpleCollapsible.Header
 *     title="Advanced Settings"
 *     description="Configure advanced options"
 *   />
 *   <SimpleCollapsible.Content>
 *     <div>Content</div>
 *   </SimpleCollapsible.Content>
 * </SimpleCollapsible>
 *
 * // Title only
 * <SimpleCollapsible>
 *   <SimpleCollapsible.Header title="Quick Settings" />
 *   <SimpleCollapsible.Content>
 *     <div>Content</div>
 *   </SimpleCollapsible.Content>
 * </SimpleCollapsible>
 * ```
 */
interface SimpleCollapsibleHeaderProps
  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
  /** The main heading text displayed in emphasized style */
  title: string;
  /** Optional secondary description text displayed below the title */
  description?: string;
}
const Header = React.forwardRef<HTMLDivElement, SimpleCollapsibleHeaderProps>(
  ({ title, description, ...props }, ref) => {
    const { open } = useSimpleCollapsible();
    const { ref: boundingRef, inside } = useBoundingBox();

    return (
      <CollapsibleTrigger asChild>
        <div
          ref={ref}
          className="flex flex-row items-center justify-between gap-4 cursor-pointer select-none"
          {...props}
        >
          <div ref={boundingRef} className="w-full">
            <Content
              title={title}
              description={description}
              sizePreset="main-content"
              variant="section"
            />
          </div>
          <Button
            icon={open ? SvgFold : SvgExpand}
            prominence="tertiary"
            size="sm"
            interaction={inside ? "hover" : "rest"}
            tooltip={open ? "Fold" : "Expand"}
          />
        </div>
      </CollapsibleTrigger>
    );
  }
);
Header.displayName = "SimpleCollapsible.Header";

/**
 * SimpleCollapsible Content Component
 *
 * Container for the collapsible content area.
 *
 * @example
 * ```tsx
 * <SimpleCollapsible>
 *   <SimpleCollapsible.Header title="Settings" />
 *   <SimpleCollapsible.Content>
 *     <div>Your content here</div>
 *   </SimpleCollapsible.Content>
 * </SimpleCollapsible>
 * ```
 */
const ContentPanel = React.forwardRef<
  HTMLDivElement,
  WithoutStyles<React.HTMLAttributes<HTMLDivElement>>
>(({ children, ...props }, ref) => {
  return (
    <CollapsibleContent>
      <div ref={ref} className="pt-4" {...props}>
        {children}
      </div>
    </CollapsibleContent>
  );
});
ContentPanel.displayName = "SimpleCollapsible.Content";

export default Object.assign(Root, {
  Header,
  Content: ContentPanel,
});


================================================
FILE: web/src/refresh-components/SimplePopover.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import SimplePopover from "./SimplePopover";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";

const meta: Meta<typeof SimplePopover> = {
  title: "refresh-components/modals/SimplePopover",
  component: SimplePopover,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof SimplePopover>;

export const Default: Story = {
  args: {
    trigger: <Button>Open Popover</Button>,
    children: (
      <div style={{ padding: 16 }}>
        <Text mainUiBody text04>
          Popover content goes here.
        </Text>
      </div>
    ),
  },
};

export const WithRenderPropTrigger: Story = {
  args: {
    trigger: (open: boolean) => (
      <Button>{`${open ? "Close" : "Open"} Popover`}</Button>
    ),
    children: (
      <div style={{ padding: 16 }}>
        <Text mainUiBody text04>
          The trigger updates its label based on open state.
        </Text>
      </div>
    ),
  },
};


================================================
FILE: web/src/refresh-components/SimplePopover.tsx
================================================
"use client";

import React, { useState } from "react";
import Popover from "@/refresh-components/Popover";

export interface SimplePopoverProps
  extends React.ComponentPropsWithoutRef<typeof Popover.Content> {
  onOpenChange?: (open: boolean) => void;
  trigger: React.ReactNode | ((open: boolean) => React.ReactNode);
}

export default function SimplePopover({
  trigger,
  onOpenChange,
  ...rest
}: SimplePopoverProps) {
  const [open, setOpen] = useState(false);

  function handleOnOpenChange(state: boolean) {
    setOpen(state);
    onOpenChange?.(state);
  }

  return (
    <Popover open={open} onOpenChange={handleOnOpenChange}>
      <Popover.Trigger asChild>
        <div>{typeof trigger === "function" ? trigger(open) : trigger}</div>
      </Popover.Trigger>
      <Popover.Content align="start" side="top" width="md" {...rest} />
    </Popover>
  );
}


================================================
FILE: web/src/refresh-components/SimpleTabs.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import SimpleTabs from "./SimpleTabs";

const meta: Meta<typeof SimpleTabs> = {
  title: "refresh-components/SimpleTabs",
  component: SimpleTabs,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof SimpleTabs>;

export const Default: Story = {
  args: {
    tabs: {
      overview: {
        name: "Overview",
        content: <div style={{ padding: 16 }}>Overview content goes here.</div>,
      },
      settings: {
        name: "Settings",
        content: <div style={{ padding: 16 }}>Settings content goes here.</div>,
      },
      activity: {
        name: "Activity",
        content: <div style={{ padding: 16 }}>Activity content goes here.</div>,
      },
    },
    defaultValue: "overview",
  },
};

export const TwoTabs: Story = {
  args: {
    tabs: {
      users: {
        name: "Users",
        content: (
          <div style={{ padding: 16 }}>User management panel content.</div>
        ),
      },
      groups: {
        name: "Groups",
        content: (
          <div style={{ padding: 16 }}>Group management panel content.</div>
        ),
      },
    },
    defaultValue: "users",
  },
};

export const WithDisabledTab: Story = {
  args: {
    tabs: {
      active: {
        name: "Active",
        content: <div style={{ padding: 16 }}>This tab is active.</div>,
      },
      disabled: {
        name: "Disabled",
        content: <div style={{ padding: 16 }}>You should not see this.</div>,
        disabled: true,
      },
      another: {
        name: "Another",
        content: <div style={{ padding: 16 }}>Another tab content.</div>,
      },
    },
    defaultValue: "active",
  },
};


================================================
FILE: web/src/refresh-components/SimpleTabs.tsx
================================================
"use client";

import React from "react";
import Tabs from "./Tabs";
import { IconProps } from "@opal/types";

/**
 * Tab Definition
 *
 * Defines a single tab with its trigger label and content.
 */
export interface TabDefinition {
  /** Display name for the tab trigger */
  name: string;
  /** Content to render when this tab is active */
  content: React.ReactNode;
  /** Optional icon to display in the tab trigger */
  icon?: React.FunctionComponent<IconProps>;
  /** Optional tooltip text to display on hover */
  tooltip?: string;
  /** Optional tooltip side */
  tooltipSide?: "top" | "bottom" | "left" | "right";
  /** Whether the tab is disabled */
  disabled?: boolean;
}

/**
 * Simple Tabs Props
 */
export interface SimpleTabsProps {
  /** Record of tab definitions, where the key is the tab value */
  tabs: Record<string, TabDefinition>;
  /** The tab value that should be active by default */
  defaultValue?: string;
  /** The controlled active tab value */
  value?: string;
  /** Callback when the active tab changes */
  onValueChange?: (value: string) => void;
}

/**
 * SimpleTabs Component
 *
 * A simplified API for creating tabs when you don't need granular control.
 * For complex use cases, use the base Tabs component with Tabs.List, Tabs.Trigger, and Tabs.Content.
 *
 * @example
 * ```tsx
 * const UserComponent = () => {
 *   const [count, setCount] = useState(0);
 *   return <div>User tab with state: {count}</div>;
 * };
 *
 * const AdminComponent = () => {
 *   return <div>Admin content</div>;
 * };
 *
 * <SimpleTabs
 *   tabs={{
 *     user: {
 *       name: "Users",
 *       content: <UserComponent />,
 *       icon: SvgUser,
 *       tooltip: "Manage users"
 *     },
 *     admin: {
 *       name: "Admin",
 *       content: <AdminComponent />,
 *       icon: SvgSettings
 *     }
 *   }}
 *   defaultValue="user"
 * />
 * ```
 *
 * @remarks
 * - This is a convenience wrapper around the base Tabs component
 * - For complex layouts or custom styling, use Tabs.List, Tabs.Trigger, and Tabs.Content directly
 * - Tab keys become the tab values, so they should be stable and URL-friendly
 * - Content components can use React hooks and maintain their own state
 */
export default function SimpleTabs({
  tabs,
  defaultValue,
  value,
  onValueChange,
}: SimpleTabsProps) {
  const tabEntries = Object.entries(tabs);

  // Use the first tab as default if none specified
  const effectiveDefaultValue = defaultValue ?? tabEntries[0]?.[0];

  return (
    <Tabs
      defaultValue={effectiveDefaultValue}
      value={value}
      onValueChange={onValueChange}
    >
      <Tabs.List>
        {tabEntries.map(([key, tab]) => (
          <Tabs.Trigger
            key={key}
            value={key}
            icon={tab.icon}
            tooltip={tab.tooltip}
            tooltipSide={tab.tooltipSide}
            disabled={tab.disabled}
          >
            {tab.name}
          </Tabs.Trigger>
        ))}
      </Tabs.List>

      {tabEntries.map(([key, tab]) => (
        <Tabs.Content key={key} value={key}>
          {tab.content}
        </Tabs.Content>
      ))}
    </Tabs>
  );
}

/**
 * Helper function to generate tab definitions with type safety
 *
 * This is optional but provides better autocomplete and type checking when defining tabs.
 *
 * @example
 * ```tsx
 * const pageTabs = SimpleTabs.generateTabs({
 *   userTab: {
 *     name: "Some name",
 *     content: <SomeComponent />
 *   },
 *   anothaOne: {
 *     name: "DJ Khalid",
 *     content: <AnothaOne />
 *   }
 * });
 *
 * <SimpleTabs tabs={pageTabs} />
 * ```
 */
SimpleTabs.generateTabs = <T extends Record<string, TabDefinition>>(
  tabs: T
): T => tabs;


================================================
FILE: web/src/refresh-components/SimpleTooltip.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import SimpleTooltip from "./SimpleTooltip";

const meta: Meta<typeof SimpleTooltip> = {
  title: "refresh-components/SimpleTooltip",
  component: SimpleTooltip,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof SimpleTooltip>;

export const Default: Story = {
  args: {
    tooltip: "This is a tooltip",
    children: <button>Hover me</button>,
  },
};

export const SideVariants: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 24, padding: 48 }}>
      {(["top", "right", "bottom", "left"] as const).map((side) => (
        <SimpleTooltip key={side} tooltip={`Tooltip on ${side}`} side={side}>
          <button>{side}</button>
        </SimpleTooltip>
      ))}
    </div>
  ),
};

export const Disabled: Story = {
  args: {
    tooltip: "You won't see this",
    disabled: true,
    children: <button>Tooltip disabled</button>,
  },
};

export const StringChild: Story = {
  render: () => (
    <SimpleTooltip>
      <span>String child auto-tooltips itself</span>
    </SimpleTooltip>
  ),
};


================================================
FILE: web/src/refresh-components/SimpleTooltip.tsx
================================================
/**
 * SimpleTooltip - A wrapper component for easily adding tooltips to elements.
 *
 * IMPORTANT: Children must be ref-compatible (either a DOM element or a component
 * that uses forwardRef). This is required because TooltipTrigger uses `asChild`
 * which needs to attach a ref to the child element for positioning.
 *
 * Valid children:
 * - DOM elements: <div>, <button>, <span>, etc.
 * - forwardRef components: Components wrapped with React.forwardRef()
 *
 * Invalid children (will cause errors or warnings):
 * - Fragments: <>{content}</>
 * - Regular function components that don't forward refs
 * - Multiple children
 *
 * @example
 * // Valid - DOM element
 * <SimpleTooltip tooltip="Hello">
 *   <button>Hover me</button>
 * </SimpleTooltip>
 *
 * // Valid - forwardRef component
 * <SimpleTooltip tooltip="Card tooltip">
 *   <Card>Content</Card>
 * </SimpleTooltip>
 *
 * // Invalid - will cause React warning
 * <SimpleTooltip tooltip="Won't work">
 *   <NonForwardRefComponent />
 * </SimpleTooltip>
 */

"use client";

import React from "react";
import {
  Tooltip,
  TooltipContent,
  TooltipProvider,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import Text from "@/refresh-components/texts/Text";

export interface SimpleTooltipProps
  extends React.ComponentPropsWithoutRef<typeof TooltipContent> {
  disabled?: boolean;
  tooltip?: React.ReactNode;
  children?: React.ReactNode;
  delayDuration?: number;
}

export default function SimpleTooltip({
  disabled = false,
  tooltip,
  className,
  children,
  side = "right",
  delayDuration,
  ...rest
}: SimpleTooltipProps) {
  // Determine hover content based on the logic:
  // 1. If tooltip is defined, use tooltip
  // 2. If tooltip is undefined and children is a string, use children
  // 3. Otherwise, no tooltip
  const hoverContent =
    tooltip ?? (typeof children === "string" ? children : undefined);

  // If no hover content, just render children without tooltip
  if (!hoverContent) return children;

  // Check if tooltip is a string to wrap in Text component, otherwise render as-is
  const tooltipContent =
    typeof hoverContent === "string" ? (
      <Text as="p" textLight05>
        {hoverContent}
      </Text>
    ) : (
      hoverContent
    );

  return (
    <TooltipProvider delayDuration={delayDuration}>
      <Tooltip>
        <TooltipTrigger asChild>{children}</TooltipTrigger>
        {!disabled && (
          <TooltipContent side={side} className={className} {...rest}>
            {tooltipContent}
          </TooltipContent>
        )}
      </Tooltip>
    </TooltipProvider>
  );
}


================================================
FILE: web/src/refresh-components/Spacer.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Spacer from "./Spacer";

const meta: Meta<typeof Spacer> = {
  title: "refresh-components/Spacer",
  component: Spacer,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof Spacer>;

export const VerticalDefault: Story = {
  render: () => (
    <div className="flex flex-col items-start">
      <div className="p-2 bg-background-tint-03">Above</div>
      <Spacer />
      <div className="p-2 bg-background-tint-03">Below (1rem gap)</div>
    </div>
  ),
};

export const VerticalCustomRem: Story = {
  render: () => (
    <div className="flex flex-col items-start">
      <div className="p-2 bg-background-tint-03">Above</div>
      <Spacer vertical rem={3} />
      <div className="p-2 bg-background-tint-03">Below (3rem gap)</div>
    </div>
  ),
};

export const Horizontal: Story = {
  render: () => (
    <div className="flex flex-row items-center">
      <div className="p-2 bg-background-tint-03">Left</div>
      <Spacer horizontal rem={2} />
      <div className="p-2 bg-background-tint-03">Right (2rem gap)</div>
    </div>
  ),
};

export const PixelBased: Story = {
  render: () => (
    <div className="flex flex-col items-start">
      <div className="p-2 bg-background-tint-03">Above</div>
      <Spacer pixels={48} />
      <div className="p-2 bg-background-tint-03">Below (48px gap)</div>
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/Spacer.tsx
================================================
type DirectionProps = {
  vertical?: boolean;
  horizontal?: boolean;
};

export type SpacerProps = DirectionProps &
  ({ rem?: number; pixels?: never } | { pixels: number; rem?: never });

export default function Spacer({
  vertical,
  horizontal,
  rem = 1,
  pixels,
}: SpacerProps) {
  const isVertical = vertical ? true : horizontal ? false : true;
  const size = pixels !== undefined ? `${pixels}px` : `${rem}rem`;

  return (
    <div
      style={{
        height: isVertical ? size : undefined,
        width: !isVertical ? size : undefined,
      }}
    />
  );
}


================================================
FILE: web/src/refresh-components/Tabs.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Tabs from "./Tabs";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { SvgSettings, SvgStar, SvgRefreshCw } from "@opal/icons";

const meta: Meta<typeof Tabs> = {
  title: "refresh-components/Tabs",
  component: Tabs,
  tags: ["autodocs"],
  parameters: {
    layout: "padded",
  },
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Tabs>;

// ---------------------------------------------------------------------------
// Contained variant (default)
// ---------------------------------------------------------------------------

export const Contained: Story = {
  render: () => (
    <Tabs defaultValue="overview">
      <Tabs.List variant="contained">
        <Tabs.Trigger value="overview">Overview</Tabs.Trigger>
        <Tabs.Trigger value="details">Details</Tabs.Trigger>
        <Tabs.Trigger value="settings">Settings</Tabs.Trigger>
      </Tabs.List>
      <Tabs.Content value="overview">Overview tab content</Tabs.Content>
      <Tabs.Content value="details">Details tab content</Tabs.Content>
      <Tabs.Content value="settings">Settings tab content</Tabs.Content>
    </Tabs>
  ),
};

// ---------------------------------------------------------------------------
// Pill variant
// ---------------------------------------------------------------------------

export const Pill: Story = {
  render: () => (
    <Tabs defaultValue="all">
      <Tabs.List variant="pill">
        <Tabs.Trigger value="all">All</Tabs.Trigger>
        <Tabs.Trigger value="active">Active</Tabs.Trigger>
        <Tabs.Trigger value="archived">Archived</Tabs.Trigger>
      </Tabs.List>
      <Tabs.Content value="all">All items</Tabs.Content>
      <Tabs.Content value="active">Active items</Tabs.Content>
      <Tabs.Content value="archived">Archived items</Tabs.Content>
    </Tabs>
  ),
};

// ---------------------------------------------------------------------------
// With icons
// ---------------------------------------------------------------------------

export const WithIcons: Story = {
  render: () => (
    <Tabs defaultValue="general">
      <Tabs.List variant="contained">
        <Tabs.Trigger value="general" icon={SvgSettings}>
          General
        </Tabs.Trigger>
        <Tabs.Trigger value="favorites" icon={SvgStar}>
          Favorites
        </Tabs.Trigger>
        <Tabs.Trigger value="sync" icon={SvgRefreshCw}>
          Sync
        </Tabs.Trigger>
      </Tabs.List>
      <Tabs.Content value="general">General settings</Tabs.Content>
      <Tabs.Content value="favorites">Your favorites</Tabs.Content>
      <Tabs.Content value="sync">Sync configuration</Tabs.Content>
    </Tabs>
  ),
};

// ---------------------------------------------------------------------------
// Pill with right content
// ---------------------------------------------------------------------------

export const PillWithRightContent: Story = {
  render: () => (
    <Tabs defaultValue="users">
      <Tabs.List
        variant="pill"
        rightContent={
          <button className="px-3 py-1 text-sm bg-background-tint-03 rounded-08">
            Add New
          </button>
        }
      >
        <Tabs.Trigger value="users">Users</Tabs.Trigger>
        <Tabs.Trigger value="groups">Groups</Tabs.Trigger>
        <Tabs.Trigger value="roles">Roles</Tabs.Trigger>
      </Tabs.List>
      <Tabs.Content value="users">Users list</Tabs.Content>
      <Tabs.Content value="groups">Groups list</Tabs.Content>
      <Tabs.Content value="roles">Roles list</Tabs.Content>
    </Tabs>
  ),
};

// ---------------------------------------------------------------------------
// With disabled and tooltip
// ---------------------------------------------------------------------------

export const WithDisabledTab: Story = {
  render: () => (
    <Tabs defaultValue="active">
      <Tabs.List variant="contained">
        <Tabs.Trigger value="active">Active</Tabs.Trigger>
        <Tabs.Trigger value="pending" disabled tooltip="Coming soon">
          Pending
        </Tabs.Trigger>
        <Tabs.Trigger value="completed">Completed</Tabs.Trigger>
      </Tabs.List>
      <Tabs.Content value="active">Active tasks</Tabs.Content>
      <Tabs.Content value="completed">Completed tasks</Tabs.Content>
    </Tabs>
  ),
};

// ---------------------------------------------------------------------------
// Loading state
// ---------------------------------------------------------------------------

export const LoadingTab: Story = {
  render: () => (
    <Tabs defaultValue="data">
      <Tabs.List variant="pill">
        <Tabs.Trigger value="data" isLoading>
          Loading Data
        </Tabs.Trigger>
        <Tabs.Trigger value="ready">Ready</Tabs.Trigger>
      </Tabs.List>
      <Tabs.Content value="data">Data is loading...</Tabs.Content>
      <Tabs.Content value="ready">Ready content</Tabs.Content>
    </Tabs>
  ),
};


================================================
FILE: web/src/refresh-components/Tabs.tsx
================================================
"use client";

import React, {
  useRef,
  useState,
  useEffect,
  useMemo,
  useCallback,
} from "react";
import * as TabsPrimitive from "@radix-ui/react-tabs";
import { cn, mergeRefs } from "@/lib/utils";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { WithoutStyles } from "@/types";
import { Section, SectionProps } from "@/layouts/general-layouts";
import { IconProps } from "@opal/types";
import { SvgChevronLeft, SvgChevronRight } from "@opal/icons";
import Text from "./texts/Text";
import { Button } from "@opal/components";

/* =============================================================================
   CONTEXT
   ============================================================================= */

interface TabsContextValue {
  variant: "contained" | "pill";
}

const TabsContext = React.createContext<TabsContextValue | undefined>(
  undefined
);

const useTabsContext = () => {
  const context = React.useContext(TabsContext);
  return context; // Returns undefined if used outside Tabs.List (allows explicit override)
};

/**
 * TABS COMPONENT VARIANTS
 *
 * Contained (default):
 * ┌─────────────────────────────────────────────────┐
 * │ ┌──────────┐ ╔══════════╗ ┌──────────┐          │
 * │ │   Tab 1  │ ║  Tab 2   ║ │   Tab 3  │          │  ← gray background
 * │ └──────────┘ ╚══════════╝ └──────────┘          │
 * └─────────────────────────────────────────────────┘
 *                 ↑ active tab (white bg, shadow)
 *
 * Pill:
 *    Tab 1      Tab 2      Tab 3          [Action]
 *              ╔═════╗
 *              ║     ║                        ↑ optional rightContent
 * ─────────────╨═════╨─────────────────────────────
 *              ↑ sliding indicator under active tab
 *
 * @example
 * <Tabs defaultValue="tab1">
 *   <Tabs.List variant="pill">
 *     <Tabs.Trigger value="tab1">Overview</Tabs.Trigger>
 *     <Tabs.Trigger value="tab2">Details</Tabs.Trigger>
 *   </Tabs.List>
 *   <Tabs.Content value="tab1">Overview content</Tabs.Content>
 *   <Tabs.Content value="tab2">Details content</Tabs.Content>
 * </Tabs>
 */

/* =============================================================================
   VARIANT STYLES
   Centralized styling definitions for tabs variants.
   ============================================================================= */

/** Style classes for TabsList variants */
const listVariants = {
  contained: "grid w-full rounded-08 bg-background-tint-03",
  pill: "relative flex w-full items-center pb-[5px] bg-background-tint-00 overflow-hidden",
} as const;

/** Base style classes for TabsTrigger variants */
const triggerBaseStyles = {
  contained: "p-2 gap-2",
  pill: "p-1 font-secondary-action transition-all duration-200 ease-out",
} as const;

/** Icon style classes for TabsTrigger variants */
const iconVariants = {
  contained: "stroke-text-03",
  pill: "stroke-current",
} as const;

/* =============================================================================
   CONSTANTS
   ============================================================================= */

/** Pixel tolerance for detecting scroll boundaries (accounts for rounding) */
const SCROLL_TOLERANCE_PX = 1;

/** Pixel amount to scroll when clicking scroll arrows */
const SCROLL_AMOUNT_PX = 200;

/* =============================================================================
   HOOKS
   ============================================================================= */

/** Style properties for the pill indicator position */
interface IndicatorStyle {
  left: number;
  width: number;
  opacity: number;
}

/**
 * Hook to track and animate a sliding indicator under the active tab.
 *
 * Uses MutationObserver to detect when the active tab changes (via data-state
 * attribute updates from Radix UI) and calculates the indicator position.
 *
 * @param listRef - Ref to the TabsList container element
 * @param enabled - Whether indicator tracking is enabled (only true for pill variant)
 * @returns Style object with left, width, and opacity for the indicator element
 */
function usePillIndicator(
  listRef: React.RefObject<HTMLElement | null>,
  enabled: boolean,
  scrollContainerRef?: React.RefObject<HTMLElement | null>
): { style: IndicatorStyle; isScrolling: boolean } {
  const [style, setStyle] = useState<IndicatorStyle>({
    left: 0,
    width: 0,
    opacity: 0,
  });
  const [isScrolling, setIsScrolling] = useState(false);
  const scrollTimeoutRef = useRef<NodeJS.Timeout | null>(null);

  useEffect(() => {
    if (!enabled) return;

    const list = listRef.current;
    if (!list) return;

    const updateIndicator = () => {
      const activeTab = list.querySelector<HTMLElement>(
        '[data-state="active"]'
      );
      if (activeTab) {
        const listRect = list.getBoundingClientRect();
        const tabRect = activeTab.getBoundingClientRect();
        setStyle({
          left: tabRect.left - listRect.left,
          width: tabRect.width,
          opacity: 1,
        });
      }
    };

    const handleScroll = () => {
      setIsScrolling(true);
      updateIndicator();

      // Clear existing timeout
      if (scrollTimeoutRef.current) {
        clearTimeout(scrollTimeoutRef.current);
      }
      // Reset scrolling state after scroll ends
      scrollTimeoutRef.current = setTimeout(() => {
        setIsScrolling(false);
      }, 150);
    };

    updateIndicator();

    // Watch for size changes on ANY tab (sibling size changes affect active tab position)
    const resizeObserver = new ResizeObserver(() => updateIndicator());
    list.querySelectorAll<HTMLElement>('[role="tab"]').forEach((tab) => {
      resizeObserver.observe(tab);
    });

    // Watch for data-state changes (tab switches)
    const mutationObserver = new MutationObserver(() => updateIndicator());
    mutationObserver.observe(list, {
      attributes: true,
      subtree: true,
      attributeFilter: ["data-state"],
    });

    // Listen for scroll events on scroll container
    const scrollContainer = scrollContainerRef?.current;
    if (scrollContainer) {
      scrollContainer.addEventListener("scroll", handleScroll);
    }

    return () => {
      mutationObserver.disconnect();
      resizeObserver.disconnect();
      if (scrollContainer) {
        scrollContainer.removeEventListener("scroll", handleScroll);
      }
      if (scrollTimeoutRef.current) {
        clearTimeout(scrollTimeoutRef.current);
      }
    };
  }, [enabled, listRef, scrollContainerRef]);

  return { style, isScrolling };
}

/** State for horizontal scroll arrows */
interface ScrollState {
  canScrollLeft: boolean;
  canScrollRight: boolean;
  scrollLeft: () => void;
  scrollRight: () => void;
}

/**
 * Hook to manage horizontal scrolling with arrow navigation.
 *
 * Tracks scroll position and overflow state of a container, providing
 * scroll functions and boolean flags for arrow visibility.
 *
 * @param containerRef - Ref to the scrollable container element
 * @param enabled - Whether scroll tracking is enabled
 * @returns Object with canScrollLeft, canScrollRight, and scroll functions
 */
function useHorizontalScroll(
  containerRef: React.RefObject<HTMLElement | null>,
  enabled: boolean
): ScrollState {
  const [canScrollLeft, setCanScrollLeft] = useState(false);
  const [canScrollRight, setCanScrollRight] = useState(false);

  const updateScrollState = useCallback(() => {
    const container = containerRef.current;
    if (!container) return;

    const { scrollLeft, scrollWidth, clientWidth } = container;
    setCanScrollLeft(scrollLeft > 0);
    setCanScrollRight(
      scrollLeft + clientWidth < scrollWidth - SCROLL_TOLERANCE_PX
    );
  }, [containerRef]);

  useEffect(() => {
    if (!enabled) return;

    const container = containerRef.current;
    if (!container) return;

    // Delay initial measurement until after layout
    const rafId = requestAnimationFrame(() => {
      updateScrollState();
    });

    container.addEventListener("scroll", updateScrollState);

    const resizeObserver = new ResizeObserver(updateScrollState);
    resizeObserver.observe(container);

    // Also observe children for size changes
    Array.from(container.children).forEach((child) => {
      resizeObserver.observe(child);
    });

    return () => {
      cancelAnimationFrame(rafId);
      container.removeEventListener("scroll", updateScrollState);
      resizeObserver.disconnect();
    };
  }, [enabled, containerRef, updateScrollState]);

  const scrollLeft = useCallback(() => {
    containerRef.current?.scrollBy({
      left: -SCROLL_AMOUNT_PX,
      behavior: "smooth",
    });
  }, [containerRef]);

  const scrollRight = useCallback(() => {
    containerRef.current?.scrollBy({
      left: SCROLL_AMOUNT_PX,
      behavior: "smooth",
    });
  }, [containerRef]);

  return { canScrollLeft, canScrollRight, scrollLeft, scrollRight };
}

/* =============================================================================
   SUB-COMPONENTS
   ============================================================================= */

/**
 * Renders the bottom line and sliding indicator for the pill variant.
 * The indicator animates smoothly when switching between tabs.
 *
 * @param style - Position and opacity for the sliding indicator
 * @param rightOffset - Distance from the right edge where the border line should stop (for rightContent)
 */
function PillIndicator({
  style,
  rightOffset = 0,
}: {
  style: IndicatorStyle;
  rightOffset?: number;
}) {
  return (
    <>
      <div
        className="absolute bottom-0 left-0 h-px bg-border-02 pointer-events-none"
        style={{ right: rightOffset }}
      />
      <div
        className="absolute bottom-0 h-[2px] bg-background-tint-inverted-03 z-10 pointer-events-none transition-all duration-200 ease-out"
        style={{
          left: style.left,
          width: style.width,
          opacity: style.opacity,
        }}
      />
    </>
  );
}

/* =============================================================================
   MAIN COMPONENTS
   ============================================================================= */

/**
 * Tabs Root Component
 *
 * Container for tab navigation and content. Manages the active tab state.
 * Supports both controlled and uncontrolled modes.
 *
 * @param defaultValue - The tab value that should be active by default (uncontrolled mode)
 * @param value - The controlled active tab value
 * @param onValueChange - Callback fired when the active tab changes
 */
const TabsRoot = React.forwardRef<
  React.ElementRef<typeof TabsPrimitive.Root>,
  WithoutStyles<React.ComponentPropsWithoutRef<typeof TabsPrimitive.Root>>
>(({ ...props }, ref) => (
  <TabsPrimitive.Root ref={ref} className="w-full" {...props} />
));
TabsRoot.displayName = TabsPrimitive.Root.displayName;

/* -------------------------------------------------------------------------- */

/**
 * Tabs List Props
 */
interface TabsListProps
  extends Omit<
    React.ComponentPropsWithoutRef<typeof TabsPrimitive.List>,
    "style"
  > {
  /**
   * Visual variant of the tabs list.
   *
   * - `contained` (default): Rounded background with equal-width tabs in a grid.
   *   Best for primary navigation where tabs should fill available space.
   *
   * - `pill`: Transparent background with a sliding underline indicator.
   *   Best for secondary navigation or filter-style tabs with flexible widths.
   */
  variant?: "contained" | "pill";

  /**
   * Content to render on the right side of the tab list.
   * Only applies to the `pill` variant (ignored for `contained`).
   *
   * @example
   * ```tsx
   * <Tabs.List variant="pill" rightContent={<Button size="sm">Add New</Button>}>
   *   <Tabs.Trigger value="all">All</Tabs.Trigger>
   *   <Tabs.Trigger value="active">Active</Tabs.Trigger>
   * </Tabs.List>
   * ```
   */
  rightContent?: React.ReactNode;

  /**
   * Enable horizontal scroll arrows when tabs overflow.
   * Only applies to the `pill` variant.
   * @default false
   */
  enableScrollArrows?: boolean;
}

/**
 * Tabs List Component
 *
 * Container for tab triggers. Renders as a horizontal list with automatic
 * keyboard navigation (arrow keys, Home/End) and accessibility attributes.
 *
 * @remarks
 * - **Contained**: Uses CSS Grid for equal-width tabs with rounded background
 * - **Pill**: Uses Flexbox for content-width tabs with animated bottom indicator
 * - The `variant` prop is automatically propagated to child `Tabs.Trigger` components via context
 */
const TabsList = React.forwardRef<
  React.ElementRef<typeof TabsPrimitive.List>,
  TabsListProps
>(
  (
    {
      variant = "contained",
      rightContent,
      enableScrollArrows = false,
      children,
      className,
      ...props
    },
    ref
  ) => {
    const listRef = useRef<HTMLDivElement>(null);
    const tabsContainerRef = useRef<HTMLDivElement>(null);
    const scrollArrowsRef = useRef<HTMLDivElement>(null);
    const rightContentRef = useRef<HTMLDivElement>(null);
    const [rightOffset, setRightOffset] = useState(0);
    const isPill = variant === "pill";
    const { style: indicatorStyle } = usePillIndicator(
      listRef,
      isPill,
      enableScrollArrows ? tabsContainerRef : undefined
    );
    const contextValue = useMemo(() => ({ variant }), [variant]);
    const {
      canScrollLeft,
      canScrollRight,
      scrollLeft: handleScrollLeft,
      scrollRight: handleScrollRight,
    } = useHorizontalScroll(tabsContainerRef, isPill && enableScrollArrows);

    const showScrollArrows =
      isPill && enableScrollArrows && (canScrollLeft || canScrollRight);

    // Track right content and scroll arrows width to offset the border line
    useEffect(() => {
      if (!isPill) {
        setRightOffset(0);
        return;
      }

      const updateWidth = () => {
        let totalWidth = 0;

        // Add scroll arrows width if visible
        if (scrollArrowsRef.current) {
          totalWidth += scrollArrowsRef.current.offsetWidth;
        }

        // Add right content width if present
        if (rightContentRef.current) {
          totalWidth += rightContentRef.current.offsetWidth;
        }

        setRightOffset(totalWidth);
      };

      updateWidth();

      const resizeObserver = new ResizeObserver(updateWidth);
      if (scrollArrowsRef.current)
        resizeObserver.observe(scrollArrowsRef.current);
      if (rightContentRef.current)
        resizeObserver.observe(rightContentRef.current);

      return () => resizeObserver.disconnect();
    }, [isPill, rightContent, showScrollArrows]);

    return (
      <TabsPrimitive.List
        ref={mergeRefs(listRef, ref)}
        className={cn(listVariants[variant], className)}
        style={
          variant === "contained"
            ? {
                gridTemplateColumns: `repeat(${React.Children.count(
                  children
                )}, 1fr)`,
              }
            : undefined
        }
        {...props}
      >
        <TabsContext.Provider value={contextValue}>
          {isPill ? (
            enableScrollArrows ? (
              <div
                ref={tabsContainerRef}
                className="flex items-center gap-2 overflow-x-auto scrollbar-hide flex-1 min-w-0"
                style={{ scrollbarWidth: "none", msOverflowStyle: "none" }}
              >
                {children}
              </div>
            ) : (
              <div className="flex items-center gap-2 pt-1">{children}</div>
            )
          ) : (
            children
          )}

          {showScrollArrows && (
            <div
              ref={scrollArrowsRef}
              className="flex items-center gap-1 pl-2 flex-shrink-0"
            >
              <Button
                disabled={!canScrollLeft}
                prominence="tertiary"
                size="sm"
                icon={SvgChevronLeft}
                onClick={handleScrollLeft}
                tooltip="Scroll tabs left"
              />
              <Button
                disabled={!canScrollRight}
                prominence="tertiary"
                size="sm"
                icon={SvgChevronRight}
                onClick={handleScrollRight}
                tooltip="Scroll tabs right"
              />
            </div>
          )}

          {isPill && rightContent && (
            <div ref={rightContentRef} className="ml-auto flex-shrink-0">
              {rightContent}
            </div>
          )}

          {isPill && (
            <PillIndicator style={indicatorStyle} rightOffset={rightOffset} />
          )}
        </TabsContext.Provider>
      </TabsPrimitive.List>
    );
  }
);
TabsList.displayName = TabsPrimitive.List.displayName;

/* -------------------------------------------------------------------------- */

/**
 * Tabs Trigger Props
 */
interface TabsTriggerProps
  extends WithoutStyles<
    Omit<
      React.ComponentPropsWithoutRef<typeof TabsPrimitive.Trigger>,
      "children"
    >
  > {
  /**
   * Visual variant of the tab trigger.
   * Automatically inherited from the parent `Tabs.List` variant via context.
   * Can be explicitly set to override the inherited value.
   *
   * - `contained` (default): White background with shadow when active
   * - `pill`: Dark pill background when active, transparent when inactive
   */
  variant?: "contained" | "pill";

  /** Optional tooltip text to display on hover */
  tooltip?: string;

  /** Side where tooltip appears. @default "top" */
  tooltipSide?: "top" | "bottom" | "left" | "right";

  /** Optional icon component to render before the label */
  icon?: React.FunctionComponent<IconProps>;

  /** Tab label - can be string or ReactNode for custom content */
  children?: React.ReactNode;

  /** Show loading spinner after label */
  isLoading?: boolean;
}

/**
 * Tabs Trigger Component
 *
 * Individual tab button that switches the active tab when clicked.
 * Supports icons, tooltips, loading states, and disabled state.
 *
 * @remarks
 * - **Contained active**: White background with subtle shadow
 * - **Pill active**: Dark inverted background
 * - Tooltips work on disabled triggers via wrapper span technique
 * - Loading spinner appears after the label text
 */
const TabsTrigger = React.forwardRef<
  React.ElementRef<typeof TabsPrimitive.Trigger>,
  TabsTriggerProps
>(
  (
    {
      variant: variantProp,
      tooltip,
      tooltipSide = "top",
      icon: Icon,
      children,
      disabled,
      isLoading,
      ...props
    },
    ref
  ) => {
    const context = useTabsContext();
    const variant = variantProp ?? context?.variant ?? "contained";

    const inner = (
      <>
        {Icon && (
          <div className="p-0.5">
            <Icon size={14} className={cn(iconVariants[variant])} />
          </div>
        )}
        {typeof children === "string" ? (
          <div className="px-0.5">
            <Text>{children}</Text>
          </div>
        ) : (
          children
        )}
        {isLoading && (
          <span
            className="inline-block w-3 h-3 border-2 border-current border-t-transparent rounded-full animate-spin ml-1"
            aria-label="Loading"
          />
        )}
      </>
    );

    const trigger = (
      <TabsPrimitive.Trigger
        ref={ref}
        disabled={disabled}
        className={cn(
          "inline-flex items-center justify-center whitespace-nowrap rounded-08",
          triggerBaseStyles[variant],
          variant === "contained" && [
            "data-[state=active]:bg-background-neutral-00",
            "data-[state=active]:text-text-04",
            "data-[state=active]:shadow-01",
            "data-[state=active]:border",
            "data-[state=active]:border-border-01",
          ],
          variant === "pill" && [
            "data-[state=active]:bg-background-tint-inverted-03",
            "data-[state=active]:text-text-inverted-05",
          ],
          variant === "contained" && [
            "data-[state=inactive]:text-text-03",
            "data-[state=inactive]:bg-transparent",
            "data-[state=inactive]:border",
            "data-[state=inactive]:border-transparent",
          ],
          variant === "pill" && [
            "data-[state=inactive]:bg-background-tint-00",
            "data-[state=inactive]:text-text-03",
          ]
        )}
        {...props}
      >
        {tooltip && !disabled ? (
          <SimpleTooltip tooltip={tooltip} side={tooltipSide}>
            <span className="inline-flex items-center gap-inherit">
              {inner}
            </span>
          </SimpleTooltip>
        ) : (
          inner
        )}
      </TabsPrimitive.Trigger>
    );

    // Disabled native buttons don't emit pointer/focus events, so tooltips
    // inside them won't trigger. Wrap the entire trigger with a neutral span
    // only when disabled so layout stays unchanged for the enabled case.
    if (tooltip && disabled) {
      return (
        <SimpleTooltip tooltip={tooltip} side={tooltipSide}>
          <span className="flex-1 inline-flex align-middle justify-center">
            {trigger}
          </span>
        </SimpleTooltip>
      );
    }

    return trigger;
  }
);
TabsTrigger.displayName = TabsPrimitive.Trigger.displayName;

/* -------------------------------------------------------------------------- */

/**
 * Tabs Content Component
 *
 * Container for the content associated with each tab.
 * Only the content for the active tab is rendered and visible.
 *
 * @param value - The tab value this content is associated with (must match a Tabs.Trigger value)
 */
const TabsContent = React.forwardRef<
  React.ElementRef<typeof TabsPrimitive.Content>,
  SectionProps & { value: string }
>(({ children, value, ...props }, ref) => (
  <TabsPrimitive.Content
    ref={ref}
    value={value}
    className="pt-4 focus:outline-none focus:border-theme-primary-05 w-full"
  >
    <Section padding={0} {...props}>
      {children}
    </Section>
  </TabsPrimitive.Content>
));
TabsContent.displayName = TabsPrimitive.Content.displayName;

/* =============================================================================
   EXPORTS
   ============================================================================= */

export default Object.assign(TabsRoot, {
  List: TabsList,
  Trigger: TabsTrigger,
  Content: TabsContent,
});


================================================
FILE: web/src/refresh-components/TextSeparator.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import TextSeparator from "./TextSeparator";

const meta: Meta<typeof TextSeparator> = {
  title: "refresh-components/TextSeparator",
  component: TextSeparator,
  tags: ["autodocs"],
  parameters: {
    layout: "padded",
  },
};

export default meta;
type Story = StoryObj<typeof TextSeparator>;

export const TextOnly: Story = {
  args: {
    text: "Older messages",
  },
};

export const WithCount: Story = {
  args: {
    text: "results",
    count: 42,
  },
};

export const InContext: Story = {
  render: () => (
    <div className="flex flex-col gap-2" style={{ width: 400 }}>
      <div className="p-2 bg-background-tint-01 rounded-08">Message 1</div>
      <div className="p-2 bg-background-tint-01 rounded-08">Message 2</div>
      <TextSeparator text="older messages" count={15} />
      <div className="p-2 bg-background-tint-01 rounded-08">Message 3</div>
      <div className="p-2 bg-background-tint-01 rounded-08">Message 4</div>
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/TextSeparator.tsx
================================================
import React from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";

export interface TextSeparatorProps {
  count?: number;
  text: string;
  className?: string;
}

export default function TextSeparator({
  count,
  text,
  className,
}: TextSeparatorProps) {
  return (
    <div
      className={cn("flex flex-row items-center w-full gap-2 px-4", className)}
    >
      <div className="flex-1 h-px bg-border" />
      <div className="flex flex-row items-center gap-1 flex-shrink-0">
        {count !== undefined && (
          <Text as="p" secondaryBody text03>
            {count}
          </Text>
        )}
        <Text as="p" secondaryBody text03>
          {text}
        </Text>
      </div>
      <div className="flex-1 h-px bg-border" />
    </div>
  );
}


================================================
FILE: web/src/refresh-components/avatars/AgentAvatar.tsx
================================================
"use client";

import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { buildImgUrl } from "@/app/app/components/files/images/utils";
import { OnyxIcon } from "@/components/icons/icons";
import { useSettingsContext } from "@/providers/SettingsProvider";
import { DEFAULT_AVATAR_SIZE_PX, DEFAULT_AGENT_ID } from "@/lib/constants";
import CustomAgentAvatar from "@/refresh-components/avatars/CustomAgentAvatar";
import Image from "next/image";

export interface AgentAvatarProps {
  agent: MinimalPersonaSnapshot;
  size?: number;
}

export default function AgentAvatar({
  agent,
  size = DEFAULT_AVATAR_SIZE_PX,
  ...props
}: AgentAvatarProps) {
  const settings = useSettingsContext();

  if (agent.id === DEFAULT_AGENT_ID) {
    return settings.enterpriseSettings?.use_custom_logo ? (
      <div
        className="aspect-square rounded-full overflow-hidden relative"
        style={{ height: size, width: size }}
      >
        <Image
          alt="Logo"
          src="/api/enterprise-settings/logo"
          fill
          className="object-cover object-center"
          sizes={`${size}px`}
        />
      </div>
    ) : (
      <OnyxIcon size={size} className="shrink-0" />
    );
  }

  return (
    <CustomAgentAvatar
      name={agent.name}
      src={
        agent.uploaded_image_id
          ? buildImgUrl(agent.uploaded_image_id)
          : undefined
      }
      iconName={agent.icon_name}
      size={size}
      {...props}
    />
  );
}


================================================
FILE: web/src/refresh-components/avatars/CustomAgentAvatar.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import CustomAgentAvatar from "./CustomAgentAvatar";

const meta: Meta<typeof CustomAgentAvatar> = {
  title: "refresh-components/Avatars/CustomAgentAvatar",
  component: CustomAgentAvatar,
  tags: ["autodocs"],
  parameters: {
    layout: "centered",
  },
};

export default meta;
type Story = StoryObj<typeof CustomAgentAvatar>;

// ---------------------------------------------------------------------------
// Default — falls back to letter from name
// ---------------------------------------------------------------------------

export const WithName: Story = {
  args: {
    name: "Research Assistant",
    size: 40,
  },
};

// ---------------------------------------------------------------------------
// Icon variants
// ---------------------------------------------------------------------------

export const WithIconSearch: Story = {
  args: {
    name: "Search Agent",
    iconName: "Search",
    size: 40,
  },
};

export const WithIconTerminal: Story = {
  args: {
    name: "Code Agent",
    iconName: "Terminal",
    size: 40,
  },
};

export const WithIconPen: Story = {
  args: {
    name: "Writer Agent",
    iconName: "Pen",
    size: 40,
  },
};

export const WithIconBarChart: Story = {
  args: {
    name: "Analytics Agent",
    iconName: "BarChart",
    size: 40,
  },
};

// ---------------------------------------------------------------------------
// Fallback — no name, no icon
// ---------------------------------------------------------------------------

export const NoNameNoIcon: Story = {
  args: {
    size: 40,
  },
};

// ---------------------------------------------------------------------------
// Sizes
// ---------------------------------------------------------------------------

export const Small: Story = {
  args: {
    name: "Tiny",
    iconName: "Info",
    size: 24,
  },
};

export const Large: Story = {
  args: {
    name: "Big Agent",
    iconName: "BooksStack",
    size: 64,
  },
};


================================================
FILE: web/src/refresh-components/avatars/CustomAgentAvatar.tsx
================================================
"use client";

import { cn } from "@/lib/utils";
import type { IconProps } from "@opal/types";
import Text from "@/refresh-components/texts/Text";
import Image from "next/image";
import { DEFAULT_AVATAR_SIZE_PX } from "@/lib/constants";
import {
  SvgActivitySmall,
  SvgAudioEqSmall,
  SvgBarChartSmall,
  SvgBooksLineSmall,
  SvgBooksStackSmall,
  SvgCheckSmall,
  SvgClockHandsSmall,
  SvgFileSmall,
  SvgHashSmall,
  SvgImageSmall,
  SvgInfoSmall,
  SvgMusicSmall,
  SvgOnyxOctagon,
  SvgPenSmall,
  SvgQuestionMarkSmall,
  SvgSearchSmall,
  SvgSlidersSmall,
  SvgTerminalSmall,
  SvgTextLinesSmall,
  SvgTwoLineSmall,
} from "@opal/icons";

interface IconConfig {
  Icon: React.FunctionComponent<IconProps>;
  className?: string;
}

export const agentAvatarIconMap: Record<string, IconConfig> = {
  Info: { Icon: SvgInfoSmall, className: "stroke-theme-primary-05" },
  QuestionMark: {
    Icon: SvgQuestionMarkSmall,
    className: "stroke-theme-primary-05",
  },

  // blue
  TextLines: { Icon: SvgTextLinesSmall, className: "stroke-theme-blue-05" },
  Pen: { Icon: SvgPenSmall, className: "stroke-theme-blue-05" },
  ClockHands: { Icon: SvgClockHandsSmall, className: "stroke-theme-blue-05" },
  Hash: { Icon: SvgHashSmall, className: "stroke-theme-blue-05" },

  // green
  Search: { Icon: SvgSearchSmall, className: "stroke-theme-green-05" },
  Check: { Icon: SvgCheckSmall, className: "stroke-theme-green-05" },
  BarChart: { Icon: SvgBarChartSmall, className: "stroke-theme-green-05" },
  Activity: { Icon: SvgActivitySmall, className: "stroke-theme-green-05" },

  // purple
  File: { Icon: SvgFileSmall, className: "stroke-theme-purple-05" },
  Image: { Icon: SvgImageSmall, className: "stroke-theme-purple-05" },
  BooksStack: { Icon: SvgBooksStackSmall, className: "stroke-theme-purple-05" },
  BooksLine: { Icon: SvgBooksLineSmall, className: "stroke-theme-purple-05" },

  // orange
  Terminal: { Icon: SvgTerminalSmall, className: "stroke-theme-orange-04" },
  Sliders: { Icon: SvgSlidersSmall, className: "stroke-theme-orange-04" },

  // amber
  AudioEq: { Icon: SvgAudioEqSmall, className: "stroke-theme-amber-04" },
  Music: { Icon: SvgMusicSmall, className: "stroke-theme-amber-04" },
};

interface SvgOctagonWrapperProps {
  size: number;
  children: React.ReactNode;
}

function SvgOctagonWrapper({ size, children }: SvgOctagonWrapperProps) {
  return (
    <div className="relative flex flex-col items-center justify-center">
      <div className="absolute inset-0 flex items-center justify-center">
        {children}
      </div>
      <SvgOnyxOctagon className="stroke-text-04" height={size} width={size} />
    </div>
  );
}

export interface CustomAgentAvatarProps {
  name?: string;
  src?: string;
  iconName?: string;

  size?: number;
}

export default function CustomAgentAvatar({
  name,
  src,
  iconName,

  size = DEFAULT_AVATAR_SIZE_PX,
}: CustomAgentAvatarProps) {
  if (src) {
    return (
      <div
        className="aspect-square rounded-full overflow-hidden relative"
        style={{ height: size, width: size }}
      >
        <Image
          alt={name || "Agent avatar"}
          src={src}
          fill
          className="object-cover object-center"
          sizes={`${size}px`}
        />
      </div>
    );
  }

  const iconConfig = iconName && agentAvatarIconMap[iconName];
  if (iconConfig) {
    const { Icon, className } = iconConfig;
    const multiplier = 0.7;
    return (
      <SvgOctagonWrapper size={size}>
        <Icon
          className={cn("stroke-text-04", className)}
          style={{ width: size * multiplier, height: size * multiplier }}
        />
      </SvgOctagonWrapper>
    );
  }

  // Display first letter of name if available, otherwise fall back to two-line-small icon
  const trimmedName = name?.trim();
  const firstLetter =
    trimmedName && trimmedName.length > 0
      ? trimmedName[0]!.toUpperCase()
      : undefined;
  const validFirstLetter = !!firstLetter && /^[a-zA-Z]$/.test(firstLetter);
  if (validFirstLetter) {
    return (
      <SvgOctagonWrapper size={size}>
        <Text style={{ fontSize: size * 0.5 }}>{firstLetter}</Text>
      </SvgOctagonWrapper>
    );
  }

  return (
    <SvgOctagonWrapper size={size}>
      <SvgTwoLineSmall
        className="stroke-text-04"
        style={{ width: size * 0.8, height: size * 0.8 }}
      />
    </SvgOctagonWrapper>
  );
}


================================================
FILE: web/src/refresh-components/avatars/UserAvatar.tsx
================================================
import { SvgUser } from "@opal/icons";
import { DEFAULT_AVATAR_SIZE_PX } from "@/lib/constants";
import { getUserEmail, getUserInitials } from "@/lib/user";
import Text from "@/refresh-components/texts/Text";
import type { User } from "@/lib/types";

export interface UserAvatarProps {
  user: User | null;
  size?: number;
}

export default function UserAvatar({
  user,
  size = DEFAULT_AVATAR_SIZE_PX,
}: UserAvatarProps) {
  const userEmail = getUserEmail(user);
  const userInitials = getUserInitials(
    user?.personalization?.name ?? null,
    userEmail
  );

  if (!userInitials) {
    return (
      <div
        role="img"
        aria-label={`${userEmail} avatar`}
        className="flex items-center justify-center rounded-full bg-background-tint-01"
        style={{ width: size, height: size }}
      >
        <SvgUser size={size * 0.55} className="stroke-text-03" aria-hidden />
      </div>
    );
  }

  return (
    <div
      role="img"
      aria-label={`${userEmail} avatar`}
      className="flex items-center justify-center rounded-full bg-background-neutral-inverted-00"
      style={{ width: size, height: size }}
    >
      <Text
        inverted
        secondaryAction
        text05
        className="select-none"
        style={{ fontSize: size * 0.4 }}
      >
        {userInitials}
      </Text>
    </div>
  );
}


================================================
FILE: web/src/refresh-components/buttons/AttachmentButton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import AttachmentButton from "./AttachmentButton";
import { SvgTextLines, SvgTrash, SvgFiles } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof AttachmentButton> = {
  title: "refresh-components/buttons/AttachmentButton",
  component: AttachmentButton,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 400 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof AttachmentButton>;

export const Default: Story = {
  args: {
    icon: SvgTextLines,
    children: "Project Proposal",
    description: "document.pdf",
    rightText: "2.4 MB",
  },
};

export const Selected: Story = {
  args: {
    icon: SvgTextLines,
    children: "Project Proposal",
    description: "document.pdf",
    rightText: "2.4 MB",
    selected: true,
  },
};

export const Processing: Story = {
  args: {
    icon: SvgTextLines,
    children: "Project Proposal",
    description: "Uploading...",
    rightText: "45%",
    processing: true,
  },
};

export const WithViewButton: Story = {
  args: {
    icon: SvgTextLines,
    children: "Project Proposal",
    description: "document.pdf",
    rightText: "2.4 MB",
    onView: () => {},
  },
};

export const WithActionButton: Story = {
  args: {
    icon: SvgTextLines,
    children: "Project Proposal",
    description: "document.pdf",
    rightText: "2.4 MB",
    actionIcon: SvgTrash,
    onAction: () => {},
  },
};

export const FileList: Story = {
  render: () => (
    <div style={{ display: "flex", flexDirection: "column", gap: 4 }}>
      <AttachmentButton
        icon={SvgTextLines}
        description="proposal.pdf"
        rightText="2.4 MB"
        onView={() => {}}
      >
        Project Proposal
      </AttachmentButton>
      <AttachmentButton
        icon={SvgFiles}
        description="report.xlsx"
        rightText="1.1 MB"
        selected
      >
        Quarterly Report
      </AttachmentButton>
      <AttachmentButton
        icon={SvgTextLines}
        description="Uploading..."
        rightText="72%"
        processing
      >
        Meeting Notes
      </AttachmentButton>
      <AttachmentButton
        icon={SvgFiles}
        description="readme.md"
        rightText="4 KB"
        actionIcon={SvgTrash}
        onAction={() => {}}
      >
        README
      </AttachmentButton>
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/buttons/AttachmentButton.tsx
================================================
/**
 * AttachmentButton - A button component for displaying file attachments or similar items
 *
 * Displays an attachment item with an icon, title, description, metadata text,
 * and optional action buttons. Commonly used for file lists, attachment pickers,
 * and similar UI patterns where items can be viewed or acted upon.
 *
 * Features:
 * - Three visual states: default, selected (shows checkbox), processing
 * - Left icon that changes to checkbox when selected
 * - Truncated title and description text
 * - Right-aligned metadata text (e.g., file size, date)
 * - Optional view button (external link icon) that appears on hover
 * - Optional action button (custom icon) that appears on hover
 * - Full-width button with hover states
 * - Prevents event bubbling for nested action buttons
 *
 * @example
 * ```tsx
 * import AttachmentButton from "@/refresh-components/buttons/AttachmentButton";
 * import { SvgFileText, SvgTrash } from "@opal/icons";
 *
 * // Basic attachment
 * <AttachmentButton
 *   icon={SvgFileText}
 *   description="document.pdf"
 *   rightText="2.4 MB"
 * >
 *   Project Proposal
 * </AttachmentButton>
 *
 * // Selected state with view button
 * <AttachmentButton
 *   icon={SvgFileText}
 *   selected
 *   description="document.pdf"
 *   rightText="2.4 MB"
 *   onView={() => window.open('/view/doc')}
 * >
 *   Project Proposal
 * </AttachmentButton>
 *
 * // With action button (delete)
 * <AttachmentButton
 *   icon={SvgFileText}
 *   description="document.pdf"
 *   rightText="2.4 MB"
 *   actionIcon={SvgTrash}
 *   onAction={() => handleDelete()}
 * >
 *   Project Proposal
 * </AttachmentButton>
 *
 * // Processing state
 * <AttachmentButton
 *   icon={SvgFileText}
 *   processing
 *   description="Uploading..."
 *   rightText="45%"
 * >
 *   Project Proposal
 * </AttachmentButton>
 * ```
 */

import React from "react";
import { noProp } from "@/lib/utils";
import Truncated from "@/refresh-components/texts/Truncated";
import IconButton from "@/refresh-components/buttons/IconButton";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import type { IconProps } from "@opal/types";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import { SvgExternalLink } from "@opal/icons";
import { WithoutStyles } from "@/types";

export interface AttachmentProps
  extends WithoutStyles<React.ButtonHTMLAttributes<HTMLButtonElement>> {
  selected?: boolean;
  processing?: boolean;

  icon: React.FunctionComponent<IconProps>;
  children: string;
  description?: string;
  rightText?: string;
  onView?: () => void;

  // Action button: An optional secondary action button that appears on hover.
  // Commonly used for actions like delete, download, or remove.
  // Both `actionIcon` and `onAction` must be provided for the button to appear.
  actionIcon?: React.FunctionComponent<IconProps>;
  onAction?: () => void;
}

export default function AttachmentButton({
  selected,
  processing,
  icon: Icon,
  children,
  description,
  rightText,
  onView,
  actionIcon,
  onAction,
  ...props
}: AttachmentProps) {
  const state = selected ? "selected" : processing ? "processing" : "default";

  return (
    <button
      type="button"
      className="attachment-button"
      data-state={state}
      {...props}
    >
      <div className="attachment-button__content">
        <div className="attachment-button__icon-wrapper">
          {selected ? (
            <Checkbox checked />
          ) : (
            <Icon className="attachment-button__icon" />
          )}
        </div>
        <div className="attachment-button__text-container">
          <div className="attachment-button__title-row">
            <div className="attachment-button__title-wrapper">
              <Truncated mainUiMuted text04 nowrap>
                {children}
              </Truncated>
            </div>
            {onView && (
              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
              <IconButton
                icon={SvgExternalLink}
                onClick={noProp(onView)}
                internal
                className="attachment-button__view-button"
              />
            )}
          </div>
          {description && (
            <Truncated secondaryBody text03 className="w-full">
              {description}
            </Truncated>
          )}
        </div>
      </div>

      <div className="attachment-button__actions">
        {rightText && (
          <Text as="p" secondaryBody text03>
            {rightText}
          </Text>
        )}
        {actionIcon && onAction && (
          <div className="attachment-button__action-button">
            <Button
              icon={actionIcon}
              onClick={noProp(onAction)}
              prominence="tertiary"
              size="sm"
            />
          </div>
        )}
      </div>
    </button>
  );
}


================================================
FILE: web/src/refresh-components/buttons/BackButton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import BackButton from "./BackButton";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof BackButton> = {
  title: "refresh-components/buttons/BackButton",
  component: BackButton,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof BackButton>;

export const Default: Story = {};

export const WithBehaviorOverride: Story = {
  args: {
    behaviorOverride: () => {
      console.log("Custom back behavior");
    },
  },
};


================================================
FILE: web/src/refresh-components/buttons/BackButton.tsx
================================================
"use client";

import { useRouter } from "next/navigation";
import type { Route } from "next";
import { Button } from "@opal/components";
import { SvgArrowLeft } from "@opal/icons";

export interface BackButtonProps {
  behaviorOverride?: () => void;
  routerOverride?: string;
}

export default function BackButton({
  behaviorOverride,
  routerOverride,
}: BackButtonProps) {
  const router = useRouter();

  return (
    <Button
      icon={SvgArrowLeft}
      prominence="tertiary"
      onClick={() => {
        if (behaviorOverride) {
          behaviorOverride();
        } else if (routerOverride) {
          router.push(routerOverride as Route);
        } else {
          router.back();
        }
      }}
    >
      Back
    </Button>
  );
}


================================================
FILE: web/src/refresh-components/buttons/Button.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Button from "./Button";
import { SvgPlus, SvgArrowRight } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof Button> = {
  title: "refresh-components/buttons/Button",
  component: Button,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Button>;

export const Default: Story = {
  args: {
    children: "Button",
  },
};

export const Variants: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 12, alignItems: "center" }}>
      <Button main>Main</Button>
      <Button action>Action</Button>
      <Button danger>Danger</Button>
    </div>
  ),
};

export const Prominences: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 12, alignItems: "center" }}>
      <Button primary>Primary</Button>
      <Button secondary>Secondary</Button>
      <Button tertiary>Tertiary</Button>
    </div>
  ),
};

export const WithIcons: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 12, alignItems: "center" }}>
      <Button leftIcon={SvgPlus}>With Left Icon</Button>
      <Button rightIcon={SvgArrowRight}>With Right Icon</Button>
    </div>
  ),
};

export const Small: Story = {
  args: {
    size: "md",
    children: "Small Button",
  },
};

export const Disabled: Story = {
  args: {
    disabled: true,
    children: "Disabled",
  },
};

export const AsLink: Story = {
  args: {
    href: "https://example.com",
    children: "Link Button",
  },
};


================================================
FILE: web/src/refresh-components/buttons/Button.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";
import Link from "next/link";
import type { Route } from "next";
import type { IconProps } from "@opal/types";
import Text from "@/refresh-components/texts/Text";

export interface ButtonProps
  extends React.ButtonHTMLAttributes<HTMLButtonElement> {
  // Button variants:
  main?: boolean;
  action?: boolean;
  danger?: boolean;

  // Button subvariants:
  primary?: boolean;
  secondary?: boolean;
  tertiary?: boolean;
  internal?: boolean;

  // Button states:
  transient?: boolean;

  // Button sizes:
  size?: "lg" | "md";

  // Icons:
  leftIcon?: React.FunctionComponent<IconProps>;
  rightIcon?: React.FunctionComponent<IconProps>;

  href?: string;
  target?: string;
}

const BUTTON_SIZE_CLASS_MAP = {
  lg: {
    button: "p-2 rounded-12 gap-1.5",
    content: {
      left: "pr-1",
      right: "pl-1",
      none: "",
    },
  },
  md: {
    button: "p-1 rounded-08 gap-0",
    content: {
      left: "pr-1 py-0.5",
      right: "pl-1 py-0.5",
      none: "py-0.5",
    },
  },
} as const;

const Button = React.forwardRef<HTMLButtonElement, ButtonProps>(
  (
    {
      main,
      action,
      danger,

      primary,
      secondary,
      tertiary,
      internal,

      disabled,
      transient,
      size = "lg",

      leftIcon: LeftIcon,
      rightIcon: RightIcon,

      href,
      target,
      children,
      className,
      ...props
    },
    ref
  ) => {
    if (LeftIcon && RightIcon)
      throw new Error(
        "The left and right icons cannot be both specified at the same time"
      );

    const variant = main
      ? "main"
      : action
        ? "action"
        : danger
          ? "danger"
          : "main";
    const subvariant = primary
      ? "primary"
      : secondary
        ? "secondary"
        : tertiary
          ? "tertiary"
          : internal
            ? "internal"
            : "primary";

    const buttonClass = `button-${variant}-${subvariant}`;
    const textClass = `button-${variant}-${subvariant}-text`;
    const iconClass = `button-${variant}-${subvariant}-icon`;
    const iconPlacement = LeftIcon ? "left" : RightIcon ? "right" : "none";
    const sizeClasses = BUTTON_SIZE_CLASS_MAP[size];
    const textSizeProps =
      size === "md"
        ? { secondaryAction: true as const }
        : { mainUiBody: true as const };

    const content = (
      <button
        ref={ref}
        className={cn(
          "h-fit w-fit flex flex-row items-center justify-center",
          sizeClasses.button,
          buttonClass,
          className
        )}
        disabled={disabled}
        data-state={transient ? "transient" : undefined}
        type="button"
        {...props}
      >
        {LeftIcon && (
          <div className="w-[1rem] h-[1rem] flex flex-col items-center justify-center">
            <LeftIcon className={cn("w-[1rem] h-[1rem]", iconClass)} />
          </div>
        )}
        {/* Buttons may conditionally pass text as children (e.g. responsive
            breakpoints), so skip content padding when children is empty. */}
        {children !== "" && (
          <div
            className={cn("leading-none", sizeClasses.content[iconPlacement])}
          >
            {typeof children === "string" ? (
              <Text
                {...textSizeProps}
                className={cn("whitespace-nowrap", textClass)}
              >
                {children}
              </Text>
            ) : (
              children
            )}
          </div>
        )}
        {RightIcon && (
          <div className="w-[1rem] h-[1rem]">
            <RightIcon className={cn("w-[1rem] h-[1rem]", iconClass)} />
          </div>
        )}
      </button>
    );

    if (!href) return content;
    return (
      <Link
        href={href as Route}
        target={target}
        rel={target === "_blank" ? "noopener noreferrer" : undefined}
      >
        {content}
      </Link>
    );
  }
);
Button.displayName = "Button";

export default Button;


================================================
FILE: web/src/refresh-components/buttons/ButtonRenaming.stories.tsx
================================================
import React from "react";
import type { Meta, StoryObj } from "@storybook/react";
import ButtonRenaming from "./ButtonRenaming";

const noop = () => {};

const meta: Meta<typeof ButtonRenaming> = {
  title: "refresh-components/buttons/ButtonRenaming",
  component: ButtonRenaming,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <div
        style={{
          width: 260,
          padding: 8,
          background: "var(--background-neutral-01)",
          borderRadius: 8,
        }}
      >
        <Story />
      </div>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof ButtonRenaming>;

export const Default: Story = {
  args: {
    initialName: "My Chat Session",
    onRename: async () => {},
    onClose: noop,
  },
};

export const EmptyName: Story = {
  args: {
    initialName: null,
    onRename: async () => {},
    onClose: noop,
  },
};

export const LongName: Story = {
  args: {
    initialName: "This is a very long chat session name that should overflow",
    onRename: async () => {},
    onClose: noop,
  },
};


================================================
FILE: web/src/refresh-components/buttons/ButtonRenaming.tsx
================================================
"use client";

import React, { useState } from "react";
import { handleEnterPress, useEscapePress } from "@/lib/typingUtils";
import { UNNAMED_CHAT } from "@/lib/constants";
import { cn } from "@/lib/utils";

interface ButtonRenamingProps {
  initialName: string | null;
  onRename: (newName: string) => Promise<void>;
  onClose: () => void;
  className?: string;
}

export default function ButtonRenaming({
  initialName,
  onRename,
  onClose,
  className,
}: ButtonRenamingProps) {
  const [renamingValue, setRenamingValue] = useState(
    initialName || UNNAMED_CHAT
  );

  useEscapePress(onClose, true);

  async function submitRename() {
    const newName = renamingValue.trim();
    if (newName === "" || newName === initialName) {
      onClose();
      return;
    }

    // Close immediately for instant feedback
    onClose();

    // Proceed with the rename operation after closing
    try {
      await onRename(newName);
    } catch (error) {
      console.error("Failed to rename:", error);
    }
  }

  return (
    <input
      onBlur={onClose}
      value={renamingValue}
      className={cn(
        "bg-transparent outline-none w-full resize-none overflow-x-hidden overflow-y-hidden whitespace-nowrap no-scrollbar font-main-content-body",
        className
      )}
      onChange={(event) => setRenamingValue(event.target.value)}
      onKeyDown={handleEnterPress(() => submitRename())}
      autoFocus
    />
  );
}


================================================
FILE: web/src/refresh-components/buttons/CopyIconButton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import CopyIconButton from "./CopyIconButton";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof CopyIconButton> = {
  title: "refresh-components/buttons/CopyIconButton",
  component: CopyIconButton,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof CopyIconButton>;

export const Default: Story = {
  args: {
    getCopyText: () => "Copied text!",
  },
};

export const WithTooltip: Story = {
  args: {
    getCopyText: () => "Copied text!",
    tooltip: "Copy to clipboard",
  },
};


================================================
FILE: web/src/refresh-components/buttons/CopyIconButton.tsx
================================================
"use client";

import { useEffect, useRef, useState } from "react";
import { Button, ButtonProps } from "@opal/components";
import { SvgAlertTriangle, SvgCheck, SvgCopy } from "@opal/icons";

type CopyState = "idle" | "copied" | "error";

/** Omit that distributes over unions, preserving discriminated-union branches. */
type DistributiveOmit<T, K extends PropertyKey> = T extends unknown
  ? Omit<T, K>
  : never;

export type CopyIconButtonProps = DistributiveOmit<
  ButtonProps,
  "variant" | "icon" | "onClick"
> & {
  // Function that returns the text to copy to clipboard
  getCopyText: () => string;
  // Optional function to get HTML content for rich copy
  getHtmlContent?: () => string;
};

export default function CopyIconButton({
  getCopyText,
  getHtmlContent,
  tooltip,
  prominence = "tertiary",
  ...iconButtonProps
}: CopyIconButtonProps) {
  const [copyState, setCopyState] = useState<CopyState>("idle");
  const copyTimeoutRef = useRef<NodeJS.Timeout | null>(null);

  async function handleCopy() {
    const text = getCopyText();

    // Clear existing timeout if any
    if (copyTimeoutRef.current) {
      clearTimeout(copyTimeoutRef.current);
    }

    try {
      // Check if Clipboard API is available
      if (!navigator.clipboard) {
        throw new Error("Clipboard API not available");
      }

      // If HTML content getter is provided, copy both HTML and plain text
      if (getHtmlContent) {
        const htmlContent = getHtmlContent();
        const clipboardItem = new ClipboardItem({
          "text/html": new Blob([htmlContent], { type: "text/html" }),
          "text/plain": new Blob([text], { type: "text/plain" }),
        });
        await navigator.clipboard.write([clipboardItem]);
      }
      // Default: plain text only
      else {
        await navigator.clipboard.writeText(text);
      }

      // Show "copied" state
      setCopyState("copied");
    } catch (err) {
      console.error("Failed to copy:", err);

      // Show "error" state
      setCopyState("error");
    }

    // Reset to normal state after 3 seconds
    copyTimeoutRef.current = setTimeout(() => {
      setCopyState("idle");
    }, 3000);
  }

  // Clean up timeout on unmount
  useEffect(() => {
    return () => {
      if (copyTimeoutRef.current) {
        clearTimeout(copyTimeoutRef.current);
      }
    };
  }, []);

  function getIcon() {
    switch (copyState) {
      case "copied":
        return SvgCheck;
      case "error":
        return SvgAlertTriangle;
      case "idle":
      default:
        return SvgCopy;
    }
  }

  function getTooltip() {
    switch (copyState) {
      case "copied":
        return "Copied!";
      case "error":
        return "Failed to copy";
      case "idle":
      default:
        return tooltip || "Copy";
    }
  }

  // Assertion is safe: CopyIconButton always supplies icon + onClick,
  // satisfying Button's content union. Spread may override prominence.
  const buttonProps = {
    prominence,
    ...iconButtonProps,
    icon: getIcon(),
    onClick: handleCopy,
    tooltip: getTooltip(),
  } as ButtonProps;

  return <Button {...buttonProps} />;
}


================================================
FILE: web/src/refresh-components/buttons/CreateButton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import CreateButton from "./CreateButton";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof CreateButton> = {
  title: "refresh-components/buttons/CreateButton",
  component: CreateButton,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof CreateButton>;

export const Default: Story = {};

export const CustomLabel: Story = {
  args: {
    children: "New Document",
  },
};

export const RightIcon: Story = {
  args: {
    rightIcon: true,
    children: "Add Item",
  },
};

export const Disabled: Story = {
  args: {
    disabled: true,
  },
};

export const AllVariants: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 12, alignItems: "center" }}>
      <CreateButton />
      <CreateButton>New Document</CreateButton>
      <CreateButton rightIcon>Add Item</CreateButton>
      <CreateButton disabled />
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/buttons/CreateButton.tsx
================================================
"use client";

import Button, { ButtonProps } from "@/refresh-components/buttons/Button";
import { WithoutStyles } from "@/types";
import { SvgPlusCircle } from "@opal/icons";

export interface CreateButtonProps
  extends Omit<WithoutStyles<ButtonProps>, "leftIcon" | "rightIcon"> {
  rightIcon?: boolean;
}

export default function CreateButton({
  rightIcon,
  children,
  ...props
}: CreateButtonProps) {
  return (
    <Button
      secondary
      leftIcon={rightIcon ? undefined : SvgPlusCircle}
      rightIcon={rightIcon ? SvgPlusCircle : undefined}
      {...props}
    >
      {children ?? "Create"}
    </Button>
  );
}


================================================
FILE: web/src/refresh-components/buttons/IconButton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import IconButton from "./IconButton";
import { SvgSettings, SvgPlus, SvgX } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof IconButton> = {
  title: "refresh-components/buttons/IconButton",
  component: IconButton,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof IconButton>;

export const Default: Story = {
  args: {
    icon: SvgSettings,
  },
};

export const Variants: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 12, alignItems: "center" }}>
      <IconButton main icon={SvgSettings} />
      <IconButton action icon={SvgPlus} />
      <IconButton danger icon={SvgX} />
    </div>
  ),
};

export const Small: Story = {
  args: {
    icon: SvgSettings,
    small: true,
  },
};

export const WithTooltip: Story = {
  args: {
    icon: SvgSettings,
    tooltip: "Settings",
  },
};

export const Disabled: Story = {
  args: {
    icon: SvgSettings,
    disabled: true,
  },
};


================================================
FILE: web/src/refresh-components/buttons/IconButton.tsx
================================================
"use client";

import React, { useMemo } from "react";
import type { IconProps } from "@opal/types";
import { cn } from "@/lib/utils";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";

const buttonClasses = (transient: boolean | undefined) =>
  ({
    main: {
      primary: {
        enabled: [
          "bg-theme-primary-05",
          "hover:bg-theme-primary-04",
          transient && "bg-theme-primary-04",
          "active:bg-theme-primary-06",
        ],
        disabled: ["bg-background-neutral-04"],
      },
      secondary: {
        enabled: [
          "bg-background-tint-02",
          "hover:bg-background-tint-02",
          transient && "bg-background-tint-02",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-background-neutral-03"],
      },
      tertiary: {
        enabled: [
          "bg-transparent",
          "hover:bg-background-tint-02",
          transient && "bg-background-tint-02",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-transparent"],
      },
      internal: {
        enabled: [
          "bg-transparent",
          "hover:bg-background-tint-00",
          transient && "bg-background-tint-00",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-transparent"],
      },
      small: {
        enabled: [
          "bg-transparent",
          "hover:bg-background-tint-00",
          transient && "bg-background-tint-00",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-transparent"],
      },
    },
    action: {
      primary: {
        enabled: [
          "bg-action-link-05",
          "hover:bg-action-link-04",
          transient && "bg-action-link-04",
          "active:bg-action-link-06",
        ],
        disabled: ["bg-action-link-02"],
      },
      secondary: {
        enabled: [
          "bg-background-tint-02",
          "hover:bg-background-tint-02",
          transient && "bg-background-tint-02",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-background-neutral-02"],
      },
      tertiary: {
        enabled: [
          "bg-transparent",
          "hover:bg-background-tint-02",
          transient && "bg-background-tint-02",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-background-neutral-02"],
      },
      internal: {
        enabled: [
          "bg-transparent",
          "hover:bg-background-tint-00",
          transient && "bg-background-tint-00",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-transparent"],
      },
      small: {
        enabled: [
          "bg-transparent",
          "hover:bg-background-tint-00",
          transient && "bg-background-tint-00",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-transparent"],
      },
    },
    danger: {
      primary: {
        enabled: [
          "bg-action-danger-05",
          "hover:bg-action-danger-04",
          transient && "bg-action-danger-04",
          "active:bg-action-danger-06",
        ],
        disabled: ["bg-action-danger-02"],
      },
      secondary: {
        enabled: [
          "bg-background-tint-02",
          "hover:bg-background-tint-02",
          transient && "bg-background-tint-02",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-background-neutral-02"],
      },
      tertiary: {
        enabled: [
          "bg-transparent",
          "hover:bg-background-tint-02",
          transient && "bg-background-tint-02",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-background-neutral-02"],
      },
      internal: {
        enabled: [
          "bg-transparent",
          "hover:bg-background-tint-00",
          transient && "bg-background-tint-00",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-transparent"],
      },
      small: {
        enabled: [
          "bg-transparent",
          "hover:bg-background-tint-00",
          transient && "bg-background-tint-00",
          "active:bg-background-tint-00",
        ],
        disabled: ["bg-transparent"],
      },
    },
  }) as const;

const iconClasses = (transient: boolean | undefined) =>
  ({
    main: {
      primary: {
        enabled: ["stroke-text-inverted-05"],
        disabled: ["stroke-text-inverted-05"],
      },
      secondary: {
        enabled: [
          "stroke-text-03",
          "group-hover/IconButton:stroke-text-04",
          transient && "stroke-text-04",
          "group-active/IconButton:stroke-text-05",
        ],
        disabled: ["stroke-text-01"],
      },
      tertiary: {
        enabled: [
          "stroke-text-03",
          "group-hover/IconButton:stroke-text-04",
          transient && "stroke-text-04",
          "group-active/IconButton:stroke-text-05",
        ],
        disabled: ["stroke-text-01"],
      },
      internal: {
        enabled: [
          "stroke-text-02",
          "group-hover/IconButton:stroke-text-04",
          transient && "stroke-text-04",
          "group-active/IconButton:stroke-text-05",
        ],
        disabled: ["stroke-text-01"],
      },
      small: {
        enabled: [
          "stroke-text-02",
          "group-hover/IconButton:stroke-text-04",
          transient && "stroke-text-04",
          "group-active/IconButton:stroke-text-05",
        ],
        disabled: ["stroke-text-01"],
      },
    },
    action: {
      primary: {
        enabled: ["stroke-text-light-05"],
        disabled: ["stroke-text-01"],
      },
      secondary: {
        enabled: [
          "stroke-action-link-05",
          "group-hover/IconButton:stroke-action-link-05",
          transient && "stroke-action-link-05",
          "group-active/IconButton:stroke-action-link-06",
        ],
        disabled: ["stroke-action-link-02"],
      },
      tertiary: {
        enabled: [
          "stroke-action-link-05",
          "group-hover/IconButton:stroke-action-link-05",
          transient && "stroke-action-link-05",
          "group-active/IconButton:stroke-action-link-06",
        ],
        disabled: ["stroke-action-link-02"],
      },
      internal: {
        enabled: [
          "stroke-action-link-05",
          "group-hover/IconButton:stroke-action-link-05",
          transient && "stroke-action-link-05",
          "group-active/IconButton:stroke-action-link-06",
        ],
        disabled: ["stroke-action-link-02"],
      },
      small: {
        enabled: [
          "stroke-action-link-05",
          "group-hover/IconButton:stroke-action-link-05",
          transient && "stroke-action-link-05",
          "group-active/IconButton:stroke-action-link-06",
        ],
        disabled: ["stroke-action-link-02"],
      },
    },
    danger: {
      primary: {
        enabled: ["stroke-text-light-05"],
        disabled: ["stroke-text-01"],
      },
      secondary: {
        enabled: [
          "stroke-action-danger-05",
          "group-hover/IconButton:stroke-action-danger-05",
          transient && "stroke-action-danger-05",
          "group-active/IconButton:stroke-action-danger-06",
        ],
        disabled: ["stroke-action-danger-02"],
      },
      tertiary: {
        enabled: [
          "stroke-action-danger-05",
          "group-hover/IconButton:stroke-action-danger-05",
          transient && "stroke-action-danger-05",
          "group-active/IconButton:stroke-action-danger-06",
        ],
        disabled: ["stroke-action-danger-02"],
      },
      internal: {
        enabled: [
          "stroke-action-danger-05",
          "group-hover/IconButton:stroke-action-danger-05",
          transient && "stroke-action-danger-05",
          "group-active/IconButton:stroke-action-danger-06",
        ],
        disabled: ["stroke-action-danger-02"],
      },
      small: {
        enabled: [
          "stroke-action-danger-05",
          "group-hover/IconButton:stroke-action-danger-05",
          transient && "stroke-action-danger-05",
          "group-active/IconButton:stroke-action-danger-06",
        ],
        disabled: ["stroke-action-danger-02"],
      },
    },
  }) as const;

export interface IconButtonProps
  extends React.ButtonHTMLAttributes<HTMLButtonElement> {
  // Top level button variants
  main?: boolean;
  action?: boolean;
  danger?: boolean;

  // Button sub-variants
  primary?: boolean;
  secondary?: boolean;
  tertiary?: boolean;
  internal?: boolean;

  // Button size
  small?: boolean;

  // Button states
  transient?: boolean;
  disabled?: boolean;

  // Button properties
  onHover?: (isHovering: boolean) => void;
  onClick?: React.MouseEventHandler<HTMLButtonElement>;
  icon: React.FunctionComponent<IconProps>;
  tooltip?: string;
  toolTipPosition?: "top" | "bottom" | "left" | "right";
  tooltipSize?: "sm" | "md" | "lg";
  /** Additional className to apply to the icon element */
  iconClassName?: string;
}

export default function IconButton({
  main,
  action,
  danger,

  primary,
  secondary,
  tertiary,
  internal,
  small,

  transient,
  disabled,

  onHover,
  onClick,
  icon: Icon,
  className,
  iconClassName,
  tooltip,
  toolTipPosition = "top",
  tooltipSize = "lg",
  ...props
}: IconButtonProps) {
  const variant = main
    ? "main"
    : action
      ? "action"
      : danger
        ? "danger"
        : "main";
  const subvariant = primary
    ? "primary"
    : secondary
      ? "secondary"
      : tertiary
        ? "tertiary"
        : internal
          ? "internal"
          : small
            ? "small"
            : "primary";
  const abled = disabled ? "disabled" : "enabled";

  const buttonClass = useMemo(
    () => buttonClasses(transient)[variant][subvariant][abled],
    [transient, variant, subvariant, abled]
  );
  const iconClass = useMemo(
    () => iconClasses(transient)[variant][subvariant][abled],
    [transient, variant, subvariant, abled]
  );

  const buttonElement = (
    <button
      type="button"
      className={cn(
        "flex items-center justify-center h-fit w-fit group/IconButton",
        small || internal ? "p-1" : "p-2",
        disabled && "cursor-not-allowed",
        small || internal ? "rounded-08" : "rounded-12",
        buttonClass,
        className
      )}
      onClick={disabled ? undefined : onClick}
      onMouseEnter={(e) => {
        props.onMouseEnter?.(e);
        if (!disabled) onHover?.(true);
      }}
      onMouseLeave={(e) => {
        props.onMouseLeave?.(e);
        if (!disabled) onHover?.(false);
      }}
      disabled={disabled}
      {...props}
    >
      <Icon
        className={cn(
          small ? "h-[0.75rem] w-[0.75rem]" : "h-[1rem] w-[1rem]",
          iconClass,
          iconClassName
        )}
      />
    </button>
  );

  if (!tooltip) return buttonElement;

  return (
    <SimpleTooltip side={toolTipPosition} size={tooltipSize} tooltip={tooltip}>
      {buttonElement}
    </SimpleTooltip>
  );
}


================================================
FILE: web/src/refresh-components/buttons/LineItem.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import LineItem from "./LineItem";
import {
  SvgUser,
  SvgSettings,
  SvgTrash,
  SvgFolder,
  SvgCheck,
  SvgSearch,
} from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import Text from "@/refresh-components/texts/Text";

const meta: Meta<typeof LineItem> = {
  title: "refresh-components/buttons/LineItem",
  component: LineItem,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 300 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof LineItem>;

export const Default: Story = {
  args: {
    icon: SvgUser,
    children: "Profile Settings",
  },
};

export const WithDescription: Story = {
  args: {
    icon: SvgSettings,
    children: "Settings",
    description: "Manage your account settings",
  },
};

export const Selected: Story = {
  args: {
    icon: SvgCheck,
    children: "Active Item",
    selected: true,
  },
};

export const SelectedEmphasized: Story = {
  args: {
    icon: SvgFolder,
    children: "Selected Folder",
    selected: true,
    emphasized: true,
  },
};

export const Danger: Story = {
  args: {
    icon: SvgTrash,
    children: "Delete Account",
    danger: true,
  },
};

export const Action: Story = {
  args: {
    icon: SvgSearch,
    children: "Search Results",
    action: true,
  },
};

export const Muted: Story = {
  args: {
    icon: SvgFolder,
    children: "Secondary Item",
    muted: true,
  },
};

export const Strikethrough: Story = {
  args: {
    icon: SvgFolder,
    children: "Archived Feature",
    strikethrough: true,
  },
};

export const WithRightChildren: Story = {
  args: {
    icon: SvgSettings,
    children: "Keyboard Shortcuts",
    rightChildren: (
      <Text as="p" secondaryBody text03>
        Cmd+K
      </Text>
    ),
  },
};

export const MenuExample: Story = {
  render: () => (
    <div style={{ display: "flex", flexDirection: "column", gap: 2 }}>
      <LineItem icon={SvgUser}>Profile</LineItem>
      <LineItem icon={SvgSettings} description="Manage your preferences">
        Settings
      </LineItem>
      <LineItem icon={SvgFolder} selected emphasized>
        Documents
      </LineItem>
      <LineItem icon={SvgSearch} action>
        Search
      </LineItem>
      <LineItem icon={SvgFolder} muted>
        Archived
      </LineItem>
      <LineItem icon={SvgTrash} danger>
        Delete
      </LineItem>
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/buttons/LineItem.tsx
================================================
import React from "react";
import { cn } from "@/lib/utils";
import type { IconProps } from "@opal/types";
import Truncated from "@/refresh-components/texts/Truncated";
import Link from "next/link";
import type { Route } from "next";
import { Section } from "@/layouts/general-layouts";
import { WithoutStyles } from "@/types";

const buttonClassNames = {
  main: {
    normal: "line-item-button-main",
    emphasized: "line-item-button-main-emphasized",
  },
  strikethrough: {
    normal: "line-item-button-strikethrough",
    emphasized: "line-item-button-strikethrough-emphasized",
  },
  disabled: {
    normal: "line-item-button-disabled",
    emphasized: "line-item-button-disabled-emphasized",
  },
  danger: {
    normal: "line-item-button-danger",
    emphasized: "line-item-button-danger-emphasized",
  },
  action: {
    normal: "line-item-button-action",
    emphasized: "line-item-button-action-emphasized",
  },
  muted: {
    normal: "line-item-button-muted",
    emphasized: "line-item-button-muted-emphasized",
  },
  skeleton: {
    normal: "line-item-button-skeleton",
    emphasized: "line-item-button-skeleton-emphasized",
  },
} as const;

const textClassNames = {
  main: "line-item-text-main",
  strikethrough: "line-item-text-strikethrough",
  disabled: "line-item-text-disabled",
  danger: "line-item-text-danger",
  action: "line-item-text-action",
  muted: "line-item-text-muted",
  skeleton: "line-item-text-skeleton",
} as const;

const iconClassNames = {
  main: "line-item-icon-main",
  strikethrough: "line-item-icon-strikethrough",
  disabled: "line-item-icon-disabled",
  danger: "line-item-icon-danger",
  action: "line-item-icon-action",
  muted: "line-item-icon-muted",
  skeleton: "line-item-icon-skeleton",
} as const;

export interface LineItemProps
  extends Omit<
    WithoutStyles<React.HTMLAttributes<HTMLDivElement>>,
    "children"
  > {
  /**
   * Whether the row should behave like a standalone interactive button.
   * Set to false when nested inside another interactive primitive
   * (e.g. Radix Select.Item) to avoid nested focus targets.
   */
  interactive?: boolean;
  // line-item variants
  strikethrough?: boolean;
  disabled?: boolean;
  danger?: boolean;
  action?: boolean;
  muted?: boolean;
  skeleton?: boolean;

  // modifier (makes the background more pronounced when selected).
  emphasized?: boolean;

  selected?: boolean;
  icon?: React.FunctionComponent<IconProps>;
  description?: string;
  rightChildren?: React.ReactNode;
  href?: string;
  rel?: string;
  target?: string;
  ref?: React.Ref<HTMLDivElement>;
  children?: React.ReactNode;
}

/**
 * LineItem Component
 *
 * A versatile menu item button component designed for use in dropdowns, sidebars, and menus.
 * Supports icons, descriptions, and multiple visual states.
 *
 * @example
 * ```tsx
 * // Basic usage
 * <LineItem icon={SvgUser}>Profile Settings</LineItem>
 *
 * // With selection state
 * <LineItem icon={SvgCheck} selected>Active Item</LineItem>
 *
 * // With emphasis (highlighted background)
 * <LineItem icon={SvgFolder} selected emphasized>
 *   Selected Folder
 * </LineItem>
 *
 * // Danger variant
 * <LineItem icon={SvgTrash} danger>Delete Account</LineItem>
 *
 * // With description
 * <LineItem icon={SvgSettings} description="Manage your account settings">
 *   Settings
 * </LineItem>
 *
 * // With right content
 * <LineItem icon={SvgKey} rightChildren={<Text as="p" text03>⌘K</Text>}>
 *   Keyboard Shortcuts
 * </LineItem>
 *
 * // As a link
 * <LineItem icon={SvgHome} href="/dashboard">Dashboard</LineItem>
 *
 * // Strikethrough (disabled/deprecated items)
 * <LineItem icon={SvgArchive} strikethrough>
 *   Archived Feature
 * </LineItem>
 *
 * // Muted variant (less prominent items)
 * <LineItem icon={SvgFolder} muted>
 *   Secondary Item
 * </LineItem>
 * ```
 *
 * @remarks
 * - Variants are mutually exclusive: only one of `strikethrough`, `danger`, `action`, `muted`, or `skeleton` should be used
 * - The `selected` prop modifies text/icon colors for `main` and `danger` variants
 * - The `emphasized` prop adds background colors when combined with `selected`
 * - The component automatically adds a `data-selected="true"` attribute for custom styling
 */
export default function LineItem({
  interactive = true,
  selected,
  strikethrough,
  disabled,
  danger,
  action,
  muted,
  skeleton,
  emphasized,
  icon: Icon,
  description,
  children,
  rightChildren,
  href,
  rel,
  target,
  ref,
  ...props
}: LineItemProps) {
  // Determine variant (mutually exclusive, with priority order: strikethrough > disabled > danger > action > muted > main)
  const variant = strikethrough
    ? "strikethrough"
    : disabled
      ? "disabled"
      : danger
        ? "danger"
        : action
          ? "action"
          : muted
            ? "muted"
            : skeleton
              ? "skeleton"
              : "main";

  const emphasisKey = emphasized ? "emphasized" : "normal";

  const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
    if (disabled) {
      e.preventDefault();
      e.stopPropagation();
      return;
    }
    props.onClick?.(e);
  };

  const handleKeyDown = (e: React.KeyboardEvent<HTMLDivElement>) => {
    if (!interactive) {
      props.onKeyDown?.(e);
      return;
    }

    if (e.key === "Enter") {
      e.preventDefault();
      if (!disabled) {
        (e.currentTarget as HTMLDivElement).click();
      }
    } else if (e.key === " ") {
      e.preventDefault();
    }
    props.onKeyDown?.(e);
  };

  const handleKeyUp = (e: React.KeyboardEvent<HTMLDivElement>) => {
    if (!interactive) {
      props.onKeyUp?.(e);
      return;
    }

    if (e.key === " ") {
      e.preventDefault();
      if (!disabled) {
        (e.currentTarget as HTMLDivElement).click();
      }
    }
    props.onKeyUp?.(e);
  };

  const content = (
    <div
      ref={ref}
      role={interactive ? "button" : undefined}
      tabIndex={interactive ? 0 : undefined}
      aria-disabled={disabled || undefined}
      className={cn(
        "flex flex-row w-full items-start p-2 rounded-08 group/LineItem gap-2",
        !!(children && description) ? "items-start" : "items-center",
        buttonClassNames[variant][emphasisKey]
      )}
      data-selected={selected}
      {...props}
      onClick={handleClick}
      onKeyDown={handleKeyDown}
      onKeyUp={handleKeyUp}
    >
      {Icon && (
        <div
          className={cn(
            "flex flex-col justify-center items-center h-[1rem] min-w-[1rem]",
            !!(children && description) && "mt-0.5"
          )}
        >
          <Icon className={cn("h-[1rem] w-[1rem]", iconClassNames[variant])} />
        </div>
      )}
      <Section alignItems="start" gap={0}>
        {children ? (
          <>
            <Section flexDirection="row" gap={0.5}>
              <Truncated
                mainUiMuted
                className={cn("text-left w-full", textClassNames[variant])}
              >
                {children}
              </Truncated>
              {rightChildren && (
                <Section alignItems="end" width="fit">
                  {rightChildren}
                </Section>
              )}
            </Section>
            {description && (
              <Truncated secondaryBody text03 className="text-left w-full">
                {description}
              </Truncated>
            )}
          </>
        ) : description ? (
          <Section flexDirection="row" gap={0.5}>
            <Truncated secondaryBody text03 className="text-left w-full">
              {description}
            </Truncated>
            {rightChildren && (
              <Section alignItems="end" width="fit">
                {rightChildren}
              </Section>
            )}
          </Section>
        ) : null}
      </Section>
    </div>
  );

  if (!href) return content;
  return (
    <Link href={href as Route} rel={rel} target={target}>
      {content}
    </Link>
  );
}


================================================
FILE: web/src/refresh-components/buttons/SelectButton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import SelectButton from "./SelectButton";
import { SvgFilter, SvgSettings } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof SelectButton> = {
  title: "refresh-components/buttons/SelectButton",
  component: SelectButton,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof SelectButton>;

export const Default: Story = {
  args: {
    children: "Select Option",
  },
};

export const MainVariant: Story = {
  args: {
    main: true,
    children: "Main Select",
    leftIcon: SvgFilter,
  },
};

export const ActionVariant: Story = {
  args: {
    action: true,
    children: "Action Select",
    leftIcon: SvgSettings,
  },
};

export const Engaged: Story = {
  args: {
    action: true,
    engaged: true,
    children: "Engaged",
    leftIcon: SvgSettings,
  },
};

export const WithChevron: Story = {
  args: {
    main: true,
    children: "Dropdown",
    leftIcon: SvgFilter,
    rightChevronIcon: true,
  },
};

export const Transient: Story = {
  args: {
    main: true,
    transient: true,
    children: "Transient",
    leftIcon: SvgFilter,
    rightChevronIcon: true,
  },
};

export const Folded: Story = {
  args: {
    main: true,
    folded: true,
    children: "Folded Label",
    leftIcon: SvgFilter,
  },
};

export const FoldedAction: Story = {
  args: {
    action: true,
    folded: true,
    children: "Set as Default",
    rightIcon: SvgSettings,
  },
};

export const Disabled: Story = {
  args: {
    main: true,
    disabled: true,
    children: "Disabled",
    leftIcon: SvgFilter,
  },
};

export const ActionDisabled: Story = {
  args: {
    action: true,
    disabled: true,
    children: "Disabled Action",
    leftIcon: SvgSettings,
  },
};

export const AllStates: Story = {
  render: () => (
    <div
      style={{
        display: "flex",
        gap: 16,
        alignItems: "center",
        flexWrap: "wrap",
      }}
    >
      <SelectButton main leftIcon={SvgFilter}>
        Main
      </SelectButton>
      <SelectButton action leftIcon={SvgSettings}>
        Action
      </SelectButton>
      <SelectButton action engaged leftIcon={SvgSettings}>
        Engaged
      </SelectButton>
      <SelectButton main transient leftIcon={SvgFilter} rightChevronIcon>
        Transient
      </SelectButton>
      <SelectButton main disabled leftIcon={SvgFilter}>
        Disabled
      </SelectButton>
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/buttons/SelectButton.tsx
================================================
"use client";

import React, { useState, useMemo } from "react";
import { cn } from "@/lib/utils";
import type { IconProps } from "@opal/types";
import Text from "@/refresh-components/texts/Text";
import { SvgChevronDownSmall } from "@opal/icons";
import { useContentSize } from "@/hooks/useContentSize";

const MARGIN = 5;

const baseClassNames = (engaged?: boolean, transient?: boolean) =>
  ({
    main: {
      enabled: [
        "bg-transparent",
        "hover:bg-background-tint-02",
        transient && "bg-background-tint-02",
        "active:bg-background-tint-00",
      ],
      disabled: ["bg-background-neutral-02"],
    },
    action: {
      enabled: [
        engaged ? "bg-action-link-01" : "bg-transparent",
        engaged ? "hover:bg-action-link-01" : "hover:bg-background-tint-02",
        "active:bg-background-tint-00",
      ],
      disabled: ["bg-background-neutral-02"],
    },
  }) as const;

const iconClassNames = (engaged?: boolean, transient?: boolean) =>
  ({
    main: {
      enabled: [
        "stroke-text-03",
        "group-hover/SelectButton:stroke-text-04",
        transient && "stroke-text-04",
        "group-active/SelectButton:stroke-text-05",
      ],
      disabled: ["stroke-text-02"],
    },
    action: {
      enabled: [
        engaged ? "stroke-action-link-05" : "stroke-text-03",
        engaged
          ? "group-hover/SelectButton:stroke-action-link-05"
          : "group-hover/SelectButton:stroke-text-04",
        engaged
          ? "group-active/SelectButton:stroke-action-link-06"
          : "group-active/SelectButton:stroke-text-05",
      ],
      disabled: ["stroke-action-link-03"],
    },
  }) as const;

const textClassNames = (engaged?: boolean, transient?: boolean) =>
  ({
    main: {
      enabled: [
        "text-text-03",
        "group-hover/SelectButton:text-text-04",
        transient && "text-text-04",
        "group-active/SelectButton:text-text-05",
      ],
      disabled: ["text-text-01"],
    },
    action: {
      enabled: [
        engaged ? "text-action-link-05" : "text-text-03",
        engaged
          ? "group-hover/SelectButton:text-action-link-05"
          : "group-hover/SelectButton:text-text-04",
        engaged
          ? "group-active/SelectButton:text-action-link-06"
          : "group-active/SelectButton:text-text-05",
      ],
      disabled: ["stroke-action-link-03"],
    },
  }) as const;

export interface SelectButtonProps {
  // Button variants
  main?: boolean;
  action?: boolean;

  // Button states
  transient?: boolean;
  engaged?: boolean;
  disabled?: boolean;
  folded?: boolean;

  // Content
  children: string;
  leftIcon?: React.FunctionComponent<IconProps>;
  rightIcon?: React.FunctionComponent<IconProps>;
  rightChevronIcon?: boolean;
  onClick?: () => void;
  className?: string;
}

export default function SelectButton({
  main,
  action,

  transient,
  engaged,
  disabled,
  folded,

  children,
  leftIcon: LeftIcon,
  rightIcon: RightIcon,
  rightChevronIcon,
  onClick,
  className,
}: SelectButtonProps) {
  const hasRightIcon = !!RightIcon;
  const hasLeftIcon = !!LeftIcon;
  const variant = main ? "main" : action ? "action" : "main";
  const state = disabled ? "disabled" : "enabled";

  // Refs and state for measuring foldedContent width
  const [hovered, setHovered] = useState<boolean>(false);

  // Memoize class name invocations
  const baseClasses = useMemo(
    () => baseClassNames(engaged, transient)[variant][state],
    [engaged, transient, variant, state]
  );
  const iconClasses = useMemo(
    () => iconClassNames(engaged, transient)[variant][state],
    [engaged, transient, variant, state]
  );
  const textClasses = useMemo(
    () => textClassNames(engaged, transient)[variant][state],
    [engaged, transient, variant, state]
  );

  const content = useMemo(
    () => (
      <div className="flex flex-row items-center justify-center">
        <Text as="p" className={cn("whitespace-nowrap", textClasses)}>
          {children}
        </Text>

        {rightChevronIcon && (
          <SvgChevronDownSmall
            className={cn(
              "w-[1rem] h-[1rem] transition-all duration-300 ease-in-out",
              iconClasses,
              transient && "-rotate-180"
            )}
          />
        )}
      </div>
    ),
    [textClasses, iconClasses, rightChevronIcon, children, transient]
  );
  const [measureRef, { width: foldedContentWidth }] = useContentSize([content]);

  return (
    <>
      {/* Hidden element for measuring the natural width of the content */}
      <div
        ref={measureRef}
        className="flex items-center w-auto h-fit absolute -left-[9999rem] opacity-0 pointer-events-none"
      >
        {content}
      </div>

      <button
        className={cn(
          baseClasses,
          "group/SelectButton flex items-center px-2 py-2 rounded-12 h-fit w-fit",
          className
        )}
        onClick={disabled ? undefined : onClick}
        disabled={disabled}
        onMouseEnter={() => setHovered(true)}
        onMouseOver={() => setHovered(true)}
        onMouseLeave={() => setHovered(false)}
      >
        {/* Left icon */}
        {hasLeftIcon && LeftIcon && (
          <LeftIcon className={cn("w-[1rem] h-[1rem]", iconClasses)} />
        )}

        {/* Animation component */}
        <div
          className={cn(
            "flex items-center transition-all duration-300 ease-in-out overflow-hidden",
            folded
              ? engaged || transient || hovered
                ? "opacity-100"
                : "opacity-0"
              : "opacity-100"
          )}
          style={{
            width: folded
              ? engaged || transient || hovered
                ? `${foldedContentWidth}px`
                : "0px"
              : `${foldedContentWidth}px`,
            margin: folded
              ? engaged || transient || hovered
                ? hasRightIcon
                  ? `0px ${MARGIN}px 0px 0px`
                  : `0px 0px 0px ${MARGIN}px`
                : "0px"
              : hasRightIcon
                ? `0px ${MARGIN}px 0px 0px`
                : `0px 0px 0px ${MARGIN}px`,
          }}
        >
          {content}
        </div>

        {/* Right icon */}
        {hasRightIcon && RightIcon && (
          <RightIcon className={cn("w-[1rem] h-[1rem]", iconClasses)} />
        )}
      </button>
    </>
  );
}


================================================
FILE: web/src/refresh-components/buttons/SquareButton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import SquareButton from "./SquareButton";
import { SvgPlus, SvgSettings, SvgSearch, SvgX } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof SquareButton> = {
  title: "refresh-components/buttons/SquareButton",
  component: SquareButton,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof SquareButton>;

export const Default: Story = {
  args: {
    icon: SvgPlus,
  },
};

export const Transient: Story = {
  args: {
    icon: SvgSettings,
    transient: true,
  },
};

export const Disabled: Story = {
  args: {
    icon: SvgPlus,
    disabled: true,
  },
};

export const AllVariants: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 12, alignItems: "center" }}>
      <SquareButton icon={SvgPlus} />
      <SquareButton icon={SvgSettings} transient />
      <SquareButton icon={SvgSearch} />
      <SquareButton icon={SvgX} />
      <SquareButton icon={SvgPlus} disabled />
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/buttons/SquareButton.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";
import type { IconProps } from "@opal/types";

export interface SquareButtonProps
  extends Omit<React.ComponentPropsWithoutRef<"button">, "children"> {
  transient?: boolean;
  icon: React.FunctionComponent<IconProps>;
}

const SquareButton = React.forwardRef<HTMLButtonElement, SquareButtonProps>(
  ({ transient = false, icon: Icon, className, ...props }, ref) => {
    return (
      <button
        ref={ref}
        type="button"
        data-state={transient ? "transient" : "normal"}
        className={cn("square-button rounded-08", className)}
        {...props}
      >
        <Icon className="h-5 w-5" />
      </button>
    );
  }
);
SquareButton.displayName = "SquareButton";

export default SquareButton;


================================================
FILE: web/src/refresh-components/buttons/Tag.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Tag from "./Tag";
import { SvgFilter, SvgUser, SvgFolder } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof Tag> = {
  title: "refresh-components/buttons/Tag",
  component: Tag,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Tag>;

export const Default: Story = {
  args: {
    label: "Label",
  },
};

export const DisplayVariant: Story = {
  args: {
    label: "Display Tag",
    variant: "display",
  },
};

export const EditableVariant: Story = {
  args: {
    label: "Editable Tag",
    variant: "editable",
  },
};

export const WithIcon: Story = {
  args: {
    label: "With Icon",
    icon: SvgFilter,
  },
};

export const Removable: Story = {
  args: {
    label: "Removable",
    variant: "editable",
    onRemove: () => {},
  },
};

export const Clickable: Story = {
  args: {
    label: "Click Me",
    onClick: () => {},
  },
};

export const WithIconAndRemove: Story = {
  args: {
    label: "Filter: Active",
    variant: "editable",
    icon: SvgFilter,
    onRemove: () => {},
  },
};

export const TagGroup: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 8, flexWrap: "wrap" }}>
      <Tag label="React" variant="display" />
      <Tag label="TypeScript" variant="display" icon={SvgFolder} />
      <Tag
        label="Active Filter"
        variant="editable"
        icon={SvgFilter}
        onRemove={() => {}}
      />
      <Tag
        label="John Doe"
        variant="editable"
        icon={SvgUser}
        onRemove={() => {}}
      />
      <Tag label="Clickable" onClick={() => {}} />
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/buttons/Tag.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import { SvgX } from "@opal/icons";
import type { IconProps } from "@opal/types";

const variantStyles = {
  display: {
    container: "flex items-center p-1",
    icon: "size-4 stroke-text-03",
    text: { secondaryBody: true, text03: true },
  },
  editable: {
    container: "flex items-center gap-1 px-2 py-1",
    icon: "size-3 stroke-text-03",
    text: { mainUiBody: true, text04: true },
  },
} as const;

export interface TagProps {
  label: string;
  variant?: "display" | "editable";
  icon?: React.FunctionComponent<IconProps>;
  onRemove?: () => void;
  onClick?: () => void;
  className?: string;
  ref?: React.Ref<HTMLDivElement>;
}

export default function Tag({
  label,
  variant = "display",
  icon: Icon,
  onRemove,
  onClick,
  className,
  ref,
}: TagProps) {
  const styles = variantStyles[variant];

  return (
    <div
      ref={ref}
      className={cn(
        styles.container,
        "rounded-08",
        "bg-background-tint-02 hover:bg-background-tint-03",
        "focus-visible:shadow-[0_0_0_2px_var(--background-tint-04)]",
        "outline-none transition-colors",
        onClick || variant === "display" ? "cursor-pointer" : undefined,
        className
      )}
      onClick={onClick}
      role={onClick ? "button" : undefined}
      tabIndex={onClick ? 0 : undefined}
      onKeyDown={
        onClick
          ? (e) => {
              if (e.key === "Enter" || e.key === " ") {
                e.preventDefault();
                onClick();
              }
            }
          : undefined
      }
    >
      {Icon && <Icon className={styles.icon} />}
      <Text {...styles.text}>{label}</Text>
      {onRemove && (
        <button
          type="button"
          onClick={(e) => {
            e.stopPropagation();
            onRemove();
          }}
          className="p-0.5 stroke-text-02 hover:stroke-text-03"
          aria-label={`Remove ${label} filter`}
        >
          <SvgX className="size-3" />
        </button>
      )}
    </div>
  );
}


================================================
FILE: web/src/refresh-components/buttons/source-tag/SourceTag.tsx
================================================
"use client";

import {
  memo,
  useState,
  useMemo,
  useCallback,
  useRef,
  useLayoutEffect,
} from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import Truncated from "@/refresh-components/texts/Truncated";
import {
  Tooltip,
  TooltipContent,
  TooltipProvider,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { SourceIcon } from "@/components/SourceIcon";
import { WebResultIcon } from "@/components/WebResultIcon";
import { ValidSources } from "@/lib/types";
import SourceTagDetailsCard, {
  SourceInfo,
} from "@/refresh-components/buttons/source-tag/SourceTagDetailsCard";

export type { SourceInfo };

// Variant-specific styles
const sizeClasses = {
  inlineCitation: {
    container: "rounded-04 p-0.5 gap-0.5",
  },
  tag: {
    container: "rounded-08 p-1 gap-1",
  },
  button: {
    container: "rounded-08 h-[2.25rem] min-w-[2.25rem] p-2 gap-1",
  },
} as const;

/**
 * Hook to detect if text content is truncated within its container.
 *
 * Compares scrollWidth vs clientWidth to determine if CSS truncation is active.
 * Re-checks on window resize and when the text content changes.
 *
 * @param text - The text content to monitor for truncation
 * @returns Object containing:
 *   - `isTruncated`: Whether the text is currently truncated
 *   - `textRef`: Ref to attach to the text container element
 *
 * @example
 * ```tsx
 * const { isTruncated, textRef } = useIsTruncated(displayName);
 * return (
 *   <span ref={textRef} className="truncate">
 *     {displayName}
 *   </span>
 * );
 * ```
 */
function useIsTruncated(text: string) {
  const [isTruncated, setIsTruncated] = useState(false);
  const textRef = useRef<HTMLSpanElement>(null);

  useLayoutEffect(() => {
    function checkTruncation() {
      if (textRef.current) {
        setIsTruncated(
          textRef.current.scrollWidth > textRef.current.clientWidth
        );
      }
    }

    const timeoutId = setTimeout(checkTruncation, 0);
    window.addEventListener("resize", checkTruncation);

    return () => {
      clearTimeout(timeoutId);
      window.removeEventListener("resize", checkTruncation);
    };
  }, [text]);

  return { isTruncated, textRef };
}

/**
 * Generates a unique key for a source based on its icon type.
 *
 * Used to deduplicate sources with identical icons when displaying stacked icons.
 *
 * @param source - The source info object
 * @returns A unique string key based on:
 *   - Custom icon name if `source.icon` exists
 *   - Hostname from URL for web sources
 *   - Source type string for other sources
 */
const getIconKey = (source: SourceInfo): string => {
  if (source.icon) return source.icon.name || "custom";
  if (source.sourceType === ValidSources.Web && source.sourceUrl) {
    try {
      return new URL(source.sourceUrl).hostname;
    } catch {
      return source.sourceUrl;
    }
  }
  return source.sourceType;
};

/**
 * Renders the appropriate icon for a source based on its type and properties.
 *
 * Icon selection priority:
 * 1. Custom icon component (`source.icon`) - rendered directly
 * 2. Web source with URL - renders favicon via `WebResultIcon`
 * 3. Default - renders standard `SourceIcon` for the source type
 *
 * @param props.source - The source info containing icon, sourceType, and optional sourceUrl
 */
const SourceIconRenderer = ({ source }: { source: SourceInfo }) => {
  if (source.icon) {
    return <source.icon size={12} />;
  }
  if (source.sourceType === ValidSources.Web && source.sourceUrl) {
    return <WebResultIcon url={source.sourceUrl} size={12} />;
  }
  return <SourceIcon sourceType={source.sourceType} iconSize={12} />;
};

/**
 * Props for the IconStack sub-component.
 */
interface IconStackProps {
  sources: SourceInfo[];
  isQuery?: boolean;
  isOpen: boolean;
  showDetailsCard: boolean;
}

/**
 * Renders a horizontal stack of up to 3 source icons with overlapping layout.
 *
 * Icons are displayed with negative spacing to create a stacked/overlapping effect.
 * Each icon has a border that changes color based on open/hover states.
 *
 * @param props.sources - Array of sources to display (max 3 shown)
 * @param props.isQuery - When true, removes icon background
 * @param props.isOpen - Whether the details card is currently open
 * @param props.showDetailsCard - Whether hover interactions are enabled
 */
const IconStack = ({
  sources,
  isQuery,
  isOpen,
  showDetailsCard,
}: IconStackProps) => (
  <div className="flex items-center -space-x-1.5">
    {sources.slice(0, 3).map((source, index) => (
      <div
        key={source.id ?? `source-${index}`}
        className={cn(
          "relative flex items-center justify-center p-0.5 rounded-04",
          !isQuery && "bg-background-tint-00",
          "border transition-colors duration-150",
          isOpen
            ? "border-background-tint-inverted-03"
            : "border-background-tint-02",
          !showDetailsCard &&
            !isQuery &&
            "group-hover:border-background-tint-inverted-03"
        )}
        style={{ zIndex: sources.length - index }}
      >
        <SourceIconRenderer source={source} />
      </div>
    ))}
  </div>
);

/**
 * Shared text styling props passed to Text and Truncated components.
 * Computed based on `inlineCitation` and `isOpen` state.
 */
interface TextStyleProps {
  figureSmallValue?: boolean;
  secondaryBody?: boolean;
  text05?: boolean;
  text03?: boolean;
  text04?: boolean;
  inverted?: boolean;
}

/**
 * Props for the QueryText sub-component.
 */
interface QueryTextProps {
  expanded: boolean;
  displayName: string;
  tooltipText?: string;
  isTruncated: boolean;
  textRef: React.RefObject<HTMLSpanElement | null>;
  textStyleProps: TextStyleProps;
}

/**
 * Renders query text with two display modes based on expansion state.
 *
 * **Collapsed mode** (default):
 * - Text is truncated at 10rem with CSS overflow
 * - Shows tooltip with full text when truncated
 * - Clicking expands to full width
 *
 * **Expanded mode**:
 * - Text displays at full width using `Truncated` component
 * - Provides its own overflow handling with tooltip
 *
 * @param props.expanded - Whether text is in expanded (full-width) mode
 * @param props.displayName - The text content to display
 * @param props.tooltipText - Custom tooltip text (defaults to displayName)
 * @param props.isTruncated - Whether the collapsed text is currently truncated
 * @param props.textRef - Ref for measuring text truncation in collapsed mode
 * @param props.textStyleProps - Shared text styling props (colors, typography)
 */
const QueryText = ({
  expanded,
  displayName,
  tooltipText,
  isTruncated,
  textRef,
  textStyleProps,
}: QueryTextProps) => {
  if (expanded) {
    return (
      <Truncated
        {...textStyleProps}
        className="max-w-full transition-colors duration-150"
      >
        {displayName}
      </Truncated>
    );
  }

  return (
    <TooltipProvider delayDuration={300}>
      <Tooltip>
        <TooltipTrigger asChild>
          <span ref={textRef} className="max-w-[10rem] truncate block">
            <Text
              as="span"
              {...textStyleProps}
              className="transition-colors duration-150"
            >
              {displayName}
            </Text>
          </span>
        </TooltipTrigger>
        {isTruncated && (
          <TooltipContent
            side="top"
            className="max-w-[400px] break-words whitespace-normal"
          >
            <Text as="p" textLight05>
              {tooltipText ?? displayName}
            </Text>
          </TooltipContent>
        )}
      </Tooltip>
    </TooltipProvider>
  );
};

/**
 * Props for the SourceTag component.
 */
export interface SourceTagProps {
  /** Sizing variant: "inlineCitation" for compact in-text use, "button" for interactive contexts, "tag" (default) for standard display */
  variant?: "inlineCitation" | "tag" | "button";

  /** Display name shown on the tag (e.g., "Google Drive", "Business Insider") */
  displayName: string;

  /** URL to display below name (for site type - shows domain) */
  displayUrl?: string;

  /** Array of sources for navigation in details card */
  sources: SourceInfo[];

  /** Callback when a source is clicked in the details card */
  onSourceClick?: () => void;

  /** Whether to show the details card on hover (defaults to true) */
  showDetailsCard?: boolean;

  /** Additional CSS classes */
  className?: string;

  /** When true, removes icon background and wraps displayName with Truncated */
  isQuery?: boolean;

  /** When true, hides icon, removes background, shows bg-background-tint-02 on hover */
  isMore?: boolean;

  /** When true, no details card, no background, tint-02 on hover */
  toggleSource?: boolean;

  /** Tooltip text shown when query is truncated (defaults to displayName) */
  tooltipText?: string;
}

/**
 * A tag component for displaying source citations with multiple display modes.
 *
 * ## Display Modes
 *
 * **Standard Tag** (default):
 * - Shows stacked source icons + display name
 * - Hovering opens a details card with source navigation
 *
 * **Inline Citation** (`variant="inlineCitation"`):
 * - Compact size for use within text content
 * - Shows "+N" count for multiple sources
 *
 * **Query Mode** (`isQuery`):
 * - No icon background, text-only appearance
 * - Truncated text expands on click
 * - Shows tooltip when truncated
 *
 * **More Mode** (`isMore`):
 * - Hides icons, shows only text
 * - No default background, shows tint on hover
 *
 * **Toggle Source** (`toggleSource`):
 * - No details card on hover
 * - No default background, shows tint on hover
 *
 * @example
 * ```tsx
 * // Standard tag with details card
 * <SourceTag
 *   displayName="Google Drive"
 *   sources={[{ sourceType: ValidSources.GoogleDrive, ... }]}
 * />
 *
 * // Inline citation within text
 * <SourceTag
 *   variant="inlineCitation"
 *   displayName="Source 1"
 *   sources={multipleSources}
 * />
 *
 * // Query mode for search queries
 * <SourceTag
 *   isQuery
 *   displayName="What is the meaning of life?"
 *   sources={[]}
 * />
 * ```
 */
const SourceTagInner = ({
  variant = "tag",
  displayName,
  displayUrl,
  sources,
  onSourceClick,
  showDetailsCard = true,
  className,
  isQuery,
  isMore,
  toggleSource,
  tooltipText,
}: SourceTagProps) => {
  const inlineCitation = variant === "inlineCitation";

  const [currentIndex, setCurrentIndex] = useState(0);
  const [isOpen, setIsOpen] = useState(false);
  const [expanded, setExpanded] = useState(false);
  const { isTruncated, textRef } = useIsTruncated(displayName);

  const uniqueSources = useMemo(
    () =>
      sources.filter(
        (source, index, arr) =>
          arr.findIndex((s) => getIconKey(s) === getIconKey(source)) === index
      ),
    [sources]
  );

  const extraCount = sources.length - 1;

  const size = variant;
  const styles = sizeClasses[size];

  // Shared text styling props
  const textStyleProps = useMemo<TextStyleProps>(
    () => ({
      figureSmallValue: inlineCitation,
      secondaryBody: !inlineCitation,
      text05: isOpen,
      text03: !isOpen && inlineCitation,
      text04: !isOpen && !inlineCitation,
      inverted: isOpen,
    }),
    [inlineCitation, isOpen]
  );

  // Cursor class based on mode and state
  const cursorClass = useMemo(() => {
    if (!isQuery) return "cursor-pointer";
    if (!isTruncated || expanded) return "cursor-default";
    return "cursor-pointer";
  }, [isQuery, isTruncated, expanded]);

  // Background class based on mode and state
  const backgroundClass = useMemo(() => {
    if (isOpen) return "bg-background-tint-inverted-03";
    if (isMore || toggleSource) return "hover:bg-background-tint-02";
    if (!showDetailsCard && !isQuery)
      return "bg-background-tint-02 hover:bg-background-tint-inverted-03";
    return "bg-background-tint-02";
  }, [isOpen, isMore, toggleSource, showDetailsCard, isQuery]);

  const handlePrev = useCallback(() => {
    setCurrentIndex((prev) => Math.max(0, prev - 1));
  }, []);

  const handleNext = useCallback(() => {
    setCurrentIndex((prev) => Math.min(sources.length - 1, prev + 1));
  }, [sources.length]);

  // Reset to first source when tooltip closes
  const handleOpenChange = useCallback((open: boolean) => {
    setIsOpen(open);
    if (!open) {
      setCurrentIndex(0);
    }
  }, []);

  const handleClick = useCallback(() => {
    // Only expand if truncated
    if (isQuery && !expanded && isTruncated) {
      setExpanded(true);
    }
    onSourceClick?.();
  }, [isQuery, expanded, isTruncated, onSourceClick]);

  const buttonContent = (
    <button
      type="button"
      className={cn(
        "group inline-flex items-center transition-all duration-150",
        "appearance-none border-none",
        backgroundClass,
        styles.container,
        isQuery && "gap-0",
        isQuery && expanded && "w-fit",
        cursorClass,
        className
      )}
      onClick={handleClick}
    >
      {/* Stacked icons container - only for tag variant */}
      {!inlineCitation && !isMore && (
        <IconStack
          sources={uniqueSources}
          isQuery={isQuery}
          isOpen={isOpen}
          showDetailsCard={showDetailsCard}
        />
      )}

      <div
        className={cn(
          "flex items-baseline",
          !inlineCitation && "pr-0.5",
          isQuery && expanded && "w-fit"
        )}
      >
        {isQuery ? (
          <QueryText
            expanded={expanded}
            displayName={displayName}
            tooltipText={tooltipText}
            isTruncated={isTruncated}
            textRef={textRef}
            textStyleProps={textStyleProps}
          />
        ) : (
          <Text
            {...textStyleProps}
            className={cn(
              "max-w-[10rem] truncate transition-colors duration-150",
              !showDetailsCard &&
                !isQuery &&
                "group-hover:text-text-inverted-05"
            )}
          >
            {displayName}
          </Text>
        )}

        {/* Count - for inline citation */}
        {inlineCitation && sources.length > 1 && (
          <Text
            figureSmallValue
            text05={isOpen}
            text03={!isOpen}
            inverted={isOpen}
            className={cn(
              "transition-colors duration-150",
              !showDetailsCard &&
                !isQuery &&
                "group-hover:text-text-inverted-05"
            )}
          >
            +{extraCount}
          </Text>
        )}

        {/* URL - for tag variant */}
        {!inlineCitation && displayUrl && (
          <Text
            figureSmallValue
            text05={isOpen}
            text02={!isOpen}
            inverted={isOpen}
            className={cn(
              "max-w-[10rem] truncate transition-colors duration-150",
              !showDetailsCard &&
                !isQuery &&
                "group-hover:text-text-inverted-05"
            )}
          >
            {displayUrl}
          </Text>
        )}
      </div>
    </button>
  );

  if (!showDetailsCard || toggleSource) {
    return buttonContent;
  }

  return (
    <TooltipProvider delayDuration={50}>
      <Tooltip open={isOpen} onOpenChange={handleOpenChange}>
        <TooltipTrigger asChild>{buttonContent}</TooltipTrigger>
        <TooltipContent
          side="bottom"
          align="start"
          sideOffset={4}
          className="bg-transparent p-0 shadow-none border-none"
        >
          <SourceTagDetailsCard
            sources={sources}
            currentIndex={currentIndex}
            onPrev={handlePrev}
            onNext={handleNext}
          />
        </TooltipContent>
      </Tooltip>
    </TooltipProvider>
  );
};

/**
 * Memoized SourceTag component for displaying source citations.
 *
 * @see {@link SourceTagInner} for full documentation and examples.
 */
const SourceTag = memo(SourceTagInner);
export default SourceTag;


================================================
FILE: web/src/refresh-components/buttons/source-tag/SourceTagDetailsCard.tsx
================================================
"use client";

import React, { memo } from "react";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import {
  SvgArrowLeft,
  SvgArrowRight,
  SvgUser,
  SvgQuestionMarkSmall,
} from "@opal/icons";
import { SourceIcon } from "@/components/SourceIcon";
import { WebResultIcon } from "@/components/WebResultIcon";
import { ValidSources } from "@/lib/types";
import { timeAgo } from "@/lib/time";
import { IconProps } from "@/components/icons/icons";
import { SubQuestionDetail } from "@/app/app/interfaces";

export interface SourceInfo {
  id: string;
  title: string;
  sourceType: ValidSources;
  sourceUrl?: string;
  description?: string;
  metadata?: {
    author?: string;
    date?: string | Date;
    tags?: string[];
  };
  icon?: React.FunctionComponent<IconProps>;
  // Support for questions
  isQuestion?: boolean;
  questionData?: SubQuestionDetail;
}

interface SourceTagDetailsCardProps {
  sources: SourceInfo[];
  currentIndex: number;
  onPrev: () => void;
  onNext: () => void;
}

interface MetadataChipProps {
  icon?: React.FunctionComponent<IconProps>;
  text: string;
}

const MetadataChip = memo(function MetadataChip({
  icon: Icon,
  text,
}: MetadataChipProps) {
  return (
    <div className="flex items-center gap-0 bg-background-tint-02 rounded-08 p-1">
      {Icon && (
        <div className="flex items-center justify-center p-0.5 w-4 h-4">
          <Icon className="w-3 h-3 stroke-text-03" />
        </div>
      )}

      <Text secondaryBody text03 className="px-0.5 max-w-[10rem] truncate">
        {text}
      </Text>
    </div>
  );
});

const SourceTagDetailsCardInner = ({
  sources,
  currentIndex,
  onPrev,
  onNext,
}: SourceTagDetailsCardProps) => {
  const currentSource = sources[currentIndex];
  if (!currentSource) return null;

  const showNavigation = sources.length > 1;
  const isFirst = currentIndex === 0;
  const isLast = currentIndex === sources.length - 1;
  const isWebSource = currentSource.sourceType === "web";
  const isQuestion = currentSource.isQuestion;
  const relativeDate = timeAgo(
    currentSource.metadata?.date instanceof Date
      ? currentSource.metadata.date.toISOString()
      : currentSource.metadata?.date
  );

  return (
    <div className="w-[17.5rem] bg-background-neutral-00 border border-border-01 rounded-12 shadow-01 overflow-hidden">
      {/* Navigation header - only shown for multiple sources */}
      {showNavigation && (
        <div className="flex items-center justify-between p-2 bg-background-tint-01 border-b border-border-01">
          <div className="flex items-center gap-1">
            <Button
              disabled={isFirst}
              prominence="internal"
              icon={SvgArrowLeft}
              onClick={onPrev}
              size="sm"
            />
            <Button
              disabled={isLast}
              prominence="internal"
              icon={SvgArrowRight}
              onClick={onNext}
              size="sm"
            />
          </div>
          <Text secondaryBody text03 className="px-1">
            {currentIndex + 1}/{sources.length}
          </Text>
        </div>
      )}

      <div className="p-1 flex flex-col gap-1">
        {/* Header with icon and title */}
        <div className="flex items-start gap-1 p-0.5 min-h-[1.75rem] w-full text-left hover:bg-background-tint-01 rounded-08 transition-colors">
          <div className="flex items-center justify-center p-0.5 shrink-0 w-5 h-5">
            {isQuestion ? (
              <SvgQuestionMarkSmall size={16} className="text-text-03" />
            ) : currentSource.icon ? (
              <currentSource.icon size={16} />
            ) : isWebSource && currentSource.sourceUrl ? (
              <WebResultIcon url={currentSource.sourceUrl} size={16} />
            ) : (
              <SourceIcon
                sourceType={
                  currentSource.sourceType === "web"
                    ? ValidSources.Web
                    : currentSource.sourceType
                }
                iconSize={16}
              />
            )}
          </div>
          <div className="flex-1 min-w-0 px-0.5">
            <Text
              mainUiAction
              text04
              className="truncate w-full block leading-5"
            >
              {currentSource.title}
            </Text>
          </div>
        </div>

        {/* Metadata row */}
        {(currentSource.metadata?.author ||
          currentSource.metadata?.tags?.length ||
          relativeDate) && (
          <div className="flex flex-row items-center gap-2 ">
            <div className="flex flex-wrap gap-1 items-center">
              {currentSource.metadata?.author && (
                <MetadataChip
                  icon={SvgUser}
                  text={currentSource.metadata.author}
                />
              )}
              {currentSource.metadata?.tags
                ?.slice(0, 2)
                .map((tag) => <MetadataChip key={tag} text={tag} />)}
              {relativeDate && (
                <Text secondaryBody text02>
                  {relativeDate}
                </Text>
              )}
            </div>
          </div>
        )}

        {/* Description */}
        {currentSource.description && (
          <div className="px-1.5 pb-1">
            <Text secondaryBody text03 as="span" className="line-clamp-4">
              {currentSource.description}
            </Text>
          </div>
        )}
      </div>
    </div>
  );
};

const SourceTagDetailsCard = memo(SourceTagDetailsCardInner);
export default SourceTagDetailsCard;


================================================
FILE: web/src/refresh-components/buttons/source-tag/index.ts
================================================
export {
  default as SourceTag,
  type SourceTagProps,
  type SourceInfo,
} from "./SourceTag";
export { default as SourceTagDetailsCard } from "./SourceTagDetailsCard";


================================================
FILE: web/src/refresh-components/buttons/source-tag/sourceTagUtils.ts
================================================
import { OnyxDocument } from "@/lib/search/interfaces";
import { SubQuestionDetail } from "@/app/app/interfaces";
import { StreamingCitation } from "@/app/app/services/streamingModels";
import { ValidSources } from "@/lib/types";
import { getSourceDisplayName } from "@/lib/sources";
import { SourceInfo } from "./SourceTagDetailsCard";

const MAX_TITLE_LENGTH = 40;

function truncateText(str: string, maxLength: number): string {
  if (str.length <= maxLength) return str;
  return str.slice(0, maxLength) + "...";
}

/**
 * Convert an OnyxDocument to a SourceInfo object for use with SourceTag
 */
export function documentToSourceInfo(doc: OnyxDocument): SourceInfo {
  const sourceType = doc.source_type as ValidSources;

  return {
    id: doc.document_id,
    title: doc.semantic_identifier || "Unknown",
    sourceType,
    sourceUrl: doc.link,
    description: doc.blurb,
    metadata: doc.updated_at
      ? {
          date: doc.updated_at,
        }
      : undefined,
  };
}

/**
 * Convert a SubQuestionDetail to a SourceInfo object for use with SourceTag
 */
export function questionToSourceInfo(
  question: SubQuestionDetail,
  index: number
): SourceInfo {
  return {
    id: `question-${question.level}-${question.level_question_num}`,
    title: truncateText(question.question, MAX_TITLE_LENGTH),
    sourceType: ValidSources.NotApplicable,
    description: question.answer,
    isQuestion: true,
    questionData: question,
  };
}

/**
 * Convert an array of citations and document map to SourceInfo array
 * Used for end-of-message Sources tag
 */
export function citationsToSourceInfoArray(
  citations: StreamingCitation[],
  documentMap: Map<string, OnyxDocument>
): SourceInfo[] {
  const sources: SourceInfo[] = [];
  const seenDocIds = new Set<string>();

  for (const citation of citations) {
    if (seenDocIds.has(citation.document_id)) continue;

    const doc = documentMap.get(citation.document_id);
    if (doc) {
      seenDocIds.add(citation.document_id);
      sources.push(documentToSourceInfo(doc));
    }
  }

  // Fallback: if no citations but we have documents, use first few documents
  if (sources.length === 0 && documentMap.size > 0) {
    const entries = Array.from(documentMap.entries());
    for (const [, doc] of entries) {
      sources.push(documentToSourceInfo(doc));
      if (sources.length >= 3) break;
    }
  }

  return sources;
}

/**
 * Get a display name for a source, used for inline citations
 */
export function getDisplayNameForSource(doc: OnyxDocument): string {
  const sourceType = doc.source_type as ValidSources;

  if (sourceType === ValidSources.Web || doc.is_internet) {
    return truncateText(doc.semantic_identifier || "", MAX_TITLE_LENGTH);
  }

  return (
    getSourceDisplayName(sourceType) ||
    truncateText(doc.semantic_identifier || "", MAX_TITLE_LENGTH)
  );
}


================================================
FILE: web/src/refresh-components/cards/Card.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Card from "./Card";
import Text from "@/refresh-components/texts/Text";

const meta: Meta<typeof Card> = {
  title: "refresh-components/cards/Card",
  component: Card,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Card>;

export const Primary: Story = {
  args: {
    variant: "primary",
    children: (
      <>
        <Text as="p" mainUiAction text05>
          Card Title
        </Text>
        <Text as="p" secondaryBody text03>
          This is a primary card with some content inside.
        </Text>
      </>
    ),
  },
};

export const Secondary: Story = {
  args: {
    variant: "secondary",
    children: (
      <>
        <Text as="p" mainUiAction text05>
          Secondary Card
        </Text>
        <Text as="p" secondaryBody text03>
          Less prominent content or nested cards.
        </Text>
      </>
    ),
  },
};

export const Tertiary: Story = {
  args: {
    variant: "tertiary",
    children: (
      <Text as="p" secondaryBody text03>
        Dashed border for placeholder or empty states.
      </Text>
    ),
  },
};

export const Disabled: Story = {
  args: {
    variant: "disabled",
    children: (
      <>
        <Text as="p" mainUiAction text05>
          Disabled Card
        </Text>
        <Text as="p" secondaryBody text03>
          This content is unavailable.
        </Text>
      </>
    ),
  },
};

export const Borderless: Story = {
  args: {
    variant: "borderless",
    children: (
      <>
        <Text as="p" mainUiAction text05>
          Borderless Card
        </Text>
        <Text as="p" secondaryBody text03>
          No border, solid background.
        </Text>
      </>
    ),
  },
};

export const AllVariants: Story = {
  render: () => (
    <div
      style={{
        display: "flex",
        flexDirection: "column",
        gap: 16,
        maxWidth: 400,
      }}
    >
      <Card variant="primary">
        <Text as="p" mainUiAction text05>
          Primary
        </Text>
        <Text as="p" secondaryBody text03>
          Default card style
        </Text>
      </Card>
      <Card variant="secondary">
        <Text as="p" mainUiAction text05>
          Secondary
        </Text>
        <Text as="p" secondaryBody text03>
          Transparent background
        </Text>
      </Card>
      <Card variant="tertiary">
        <Text as="p" mainUiAction text05>
          Tertiary
        </Text>
        <Text as="p" secondaryBody text03>
          Dashed border
        </Text>
      </Card>
      <Card variant="disabled">
        <Text as="p" mainUiAction text05>
          Disabled
        </Text>
        <Text as="p" secondaryBody text03>
          Dimmed / unavailable
        </Text>
      </Card>
      <Card variant="borderless">
        <Text as="p" mainUiAction text05>
          Borderless
        </Text>
        <Text as="p" secondaryBody text03>
          No border
        </Text>
      </Card>
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/cards/Card.tsx
================================================
/**
 * Card - A styled container component
 *
 * Provides a consistent card-style container with background, padding, border, and rounded corners.
 * Uses a vertical flex layout with automatic gap spacing between children.
 *
 * Features:
 * - Padding: 1rem by default (configurable)
 * - Flex column layout with 1rem gap
 * - Rounded-16 corners
 * - Accepts optional className for custom styling
 * - Accepts all standard div HTML attributes except style (enforced by WithoutStyles)
 *
 * Variants:
 * - `primary`: Solid background with border. The default, most prominent card style.
 * - `secondary`: Transparent background with border. Use for less prominent content or nested cards.
 * - `tertiary`: Transparent background with dashed border. Use for placeholder or empty states.
 * - `disabled`: Dimmed primary style with reduced opacity. Indicates unavailable or locked content.
 * - `borderless`: Solid background without border. Use when cards are visually grouped or in tight layouts.
 *
 * @example
 * ```tsx
 * import { Card } from "@/refresh-components/cards";
 *
 * // Basic usage (primary variant)
 * <Card>
 *   <h2>Card Title</h2>
 *   <p>Card content goes here</p>
 * </Card>
 *
 * // Secondary variant for nested content
 * <Card variant="secondary">
 *   <div>Less prominent content</div>
 * </Card>
 *
 * // Tertiary variant for empty states
 * <Card variant="tertiary">
 *   <div>No items yet</div>
 * </Card>
 * ```
 */

import { Section, SectionProps } from "@/layouts/general-layouts";
import { cn } from "@/lib/utils";

type CardVariant =
  // The main card variant.
  | "primary"
  // A background-colorless card variant.
  | "secondary"
  // A background-colorless card variant with a dashed border.
  | "tertiary"
  // A dimmed version of the primary variant (indicates that this card is unavailable).
  | "disabled"
  // A borderless version of the primary variant.
  | "borderless";

export interface CardProps extends SectionProps {
  // variants
  variant?: CardVariant;
  // Optional className to apply custom styles
  className?: string;

  ref?: React.Ref<HTMLDivElement>;
}

export default function Card({
  variant = "primary",
  padding = 1,
  className,
  ref,
  ...props
}: CardProps) {
  const dataProps: Record<string, unknown> = {};
  const sectionProps: Record<string, unknown> = {};
  for (const [key, value] of Object.entries(props)) {
    if (key.startsWith("data-")) {
      dataProps[key] = value;
    } else {
      sectionProps[key] = value;
    }
  }

  return (
    <div
      ref={ref}
      className={cn("card", className)}
      data-variant={variant}
      {...dataProps}
    >
      <Section
        alignItems="start"
        padding={padding}
        height="fit"
        {...sectionProps}
      />
    </div>
  );
}


================================================
FILE: web/src/refresh-components/cards/index.ts
================================================
export { default as Card } from "./Card";
export type { CardProps } from "./Card";


================================================
FILE: web/src/refresh-components/commandmenu/CommandMenu.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { useState } from "react";
import CommandMenu from "./CommandMenu";
import {
  SvgFileText,
  SvgUsers,
  SvgSettings,
  SvgPlus,
  SvgSearch,
  SvgArrowRight,
} from "@opal/icons";

const meta: Meta<typeof CommandMenu> = {
  title: "refresh-components/modals/CommandMenu",
  component: CommandMenu,
  tags: ["autodocs"],
  parameters: {
    layout: "fullscreen",
  },
};

export default meta;
type Story = StoryObj<typeof CommandMenu>;

export const Default: Story = {
  render: () => {
    const [open, setOpen] = useState(true);
    const [search, setSearch] = useState("");

    return (
      <>
        <button onClick={() => setOpen(true)}>Open Command Menu</button>
        <CommandMenu open={open} onOpenChange={setOpen}>
          <CommandMenu.Content>
            <CommandMenu.Header
              placeholder="Type a command or search..."
              value={search}
              onValueChange={setSearch}
              onClose={() => setOpen(false)}
            />
            <CommandMenu.List emptyMessage="No results found.">
              <CommandMenu.Item
                value="documents"
                icon={SvgFileText}
                onSelect={() => alert("Documents")}
              >
                Search Documents
              </CommandMenu.Item>
              <CommandMenu.Item
                value="people"
                icon={SvgUsers}
                onSelect={() => alert("People")}
              >
                Find People
              </CommandMenu.Item>
              <CommandMenu.Item
                value="settings"
                icon={SvgSettings}
                onSelect={() => alert("Settings")}
              >
                Open Settings
              </CommandMenu.Item>
              <CommandMenu.Action
                value="new-chat"
                icon={SvgPlus}
                shortcut="⌘N"
                onSelect={() => alert("New chat")}
              >
                New Chat
              </CommandMenu.Action>
            </CommandMenu.List>
            <CommandMenu.Footer
              leftActions={
                <>
                  <CommandMenu.FooterAction
                    icon={SvgArrowRight}
                    label="Select"
                  />
                  <CommandMenu.FooterAction icon={SvgSearch} label="Search" />
                </>
              }
            />
          </CommandMenu.Content>
        </CommandMenu>
      </>
    );
  },
};

export const WithFilters: Story = {
  render: () => {
    const [open, setOpen] = useState(true);
    const [search, setSearch] = useState("");

    return (
      <>
        <button onClick={() => setOpen(true)}>Open Command Menu</button>
        <CommandMenu open={open} onOpenChange={setOpen}>
          <CommandMenu.Content>
            <CommandMenu.Header
              placeholder="Search within filter..."
              value={search}
              onValueChange={setSearch}
              onClose={() => setOpen(false)}
              filters={[{ id: "docs", label: "Documents", icon: SvgFileText }]}
              onFilterRemove={(id) => alert(`Remove filter: ${id}`)}
            />
            <CommandMenu.List>
              <CommandMenu.Filter
                value="filter-docs"
                icon={SvgFileText}
                isApplied
              >
                Documents
              </CommandMenu.Filter>
              <CommandMenu.Item value="doc-1" onSelect={() => {}}>
                Q3 Financial Report
              </CommandMenu.Item>
              <CommandMenu.Item value="doc-2" onSelect={() => {}}>
                Engineering Roadmap 2025
              </CommandMenu.Item>
              <CommandMenu.Item value="doc-3" onSelect={() => {}}>
                Onboarding Guide
              </CommandMenu.Item>
            </CommandMenu.List>
          </CommandMenu.Content>
        </CommandMenu>
      </>
    );
  },
};

export const EmptyState: Story = {
  render: () => {
    const [open, setOpen] = useState(true);

    return (
      <>
        <button onClick={() => setOpen(true)}>Open Command Menu</button>
        <CommandMenu open={open} onOpenChange={setOpen}>
          <CommandMenu.Content>
            <CommandMenu.Header
              placeholder="Search..."
              onClose={() => setOpen(false)}
            />
            <CommandMenu.List emptyMessage="No commands match your search.">
              <div />
            </CommandMenu.List>
          </CommandMenu.Content>
        </CommandMenu>
      </>
    );
  },
};


================================================
FILE: web/src/refresh-components/commandmenu/CommandMenu.test.tsx
================================================
import React, { useState } from "react";
import { render, screen, fireEvent, waitFor } from "@testing-library/react";
import "@testing-library/jest-dom";
import userEvent from "@testing-library/user-event";
import CommandMenu, {
  useCommandMenuContext,
} from "@/refresh-components/commandmenu/CommandMenu";

// Mock Radix Dialog portal to render inline for testing
jest.mock("@radix-ui/react-dialog", () => {
  const actual = jest.requireActual("@radix-ui/react-dialog");
  return {
    ...actual,
    Portal: ({ children }: { children: React.ReactNode }) => <>{children}</>,
  };
});

// Mock scrollIntoView which is not available in jsdom
Element.prototype.scrollIntoView = jest.fn();

// Mock requestAnimationFrame for highlight updates
const originalRAF = global.requestAnimationFrame;
beforeAll(() => {
  global.requestAnimationFrame = (cb: FrameRequestCallback) => {
    cb(0);
    return 0;
  };
});
afterAll(() => {
  global.requestAnimationFrame = originalRAF;
});

function setupUser() {
  return userEvent.setup({ delay: null });
}

/**
 * Test wrapper for CommandMenu compound component
 */
function TestCommandMenu({
  open = true,
  onOpenChange = jest.fn(),
  includeFilter = false,
  defaultHighlightAction = true,
}: {
  open?: boolean;
  onOpenChange?: (open: boolean) => void;
  includeFilter?: boolean;
  defaultHighlightAction?: boolean;
}) {
  const [selected, setSelected] = useState<string | null>(null);

  return (
    <CommandMenu open={open} onOpenChange={onOpenChange}>
      <CommandMenu.Content>
        <CommandMenu.Header placeholder="Search..." />
        <CommandMenu.List>
          <CommandMenu.Action
            value="action-1"
            onSelect={() => setSelected("action-1")}
            defaultHighlight={defaultHighlightAction}
          >
            Action 1
          </CommandMenu.Action>
          {includeFilter && (
            <CommandMenu.Filter value="filter-1" onSelect={() => {}}>
              Filter 1
            </CommandMenu.Filter>
          )}
          <CommandMenu.Item
            value="item-1"
            onSelect={() => setSelected("item-1")}
          >
            Item 1
          </CommandMenu.Item>
          <CommandMenu.Item
            value="item-2"
            onSelect={() => setSelected("item-2")}
          >
            Item 2
          </CommandMenu.Item>
        </CommandMenu.List>
        <CommandMenu.Footer
          leftActions={
            <CommandMenu.FooterAction
              icon={() => <span>Icon</span>}
              label="Select"
            />
          }
        />
      </CommandMenu.Content>
      {selected && <div data-testid="selected">{selected}</div>}
    </CommandMenu>
  );
}

/**
 * Minimal test wrapper for context hook testing
 */
function ContextTestComponent() {
  const context = useCommandMenuContext();
  return (
    <div>
      <div data-testid="highlighted-value">
        {context.highlightedValue ?? "none"}
      </div>
      <div data-testid="highlighted-type">
        {context.highlightedItemType ?? "none"}
      </div>
      <div data-testid="is-keyboard-nav">
        {context.isKeyboardNav ? "true" : "false"}
      </div>
    </div>
  );
}

function TestCommandMenuWithContext({
  open = true,
  onOpenChange = jest.fn(),
}: {
  open?: boolean;
  onOpenChange?: (open: boolean) => void;
}) {
  return (
    <CommandMenu open={open} onOpenChange={onOpenChange}>
      <CommandMenu.Content>
        <CommandMenu.Header placeholder="Search..." />
        <CommandMenu.List>
          <CommandMenu.Action value="action-1" onSelect={() => {}}>
            Action 1
          </CommandMenu.Action>
          <CommandMenu.Item value="item-1" onSelect={() => {}}>
            Item 1
          </CommandMenu.Item>
        </CommandMenu.List>
        <ContextTestComponent />
      </CommandMenu.Content>
    </CommandMenu>
  );
}

describe("CommandMenu", () => {
  describe("Rendering", () => {
    test("renders children when open", () => {
      render(<TestCommandMenu open={true} />);
      expect(screen.getByPlaceholderText("Search...")).toBeInTheDocument();
      // Use getAllByText since Truncated component creates visible + hidden measurement elements
      expect(screen.getAllByText("Action 1").length).toBeGreaterThan(0);
      expect(screen.getAllByText("Item 1").length).toBeGreaterThan(0);
      expect(screen.getAllByText("Item 2").length).toBeGreaterThan(0);
    });

    test("does not render content when closed", () => {
      render(<TestCommandMenu open={false} />);
      expect(
        screen.queryByPlaceholderText("Search...")
      ).not.toBeInTheDocument();
      expect(screen.queryByText("Action 1")).not.toBeInTheDocument();
    });

    test("renders header with placeholder text", () => {
      render(<TestCommandMenu open={true} />);
      const input = screen.getByPlaceholderText("Search...");
      expect(input).toBeInTheDocument();
      expect(input).toHaveFocus();
    });

    test("renders filter items", () => {
      render(<TestCommandMenu open={true} includeFilter={true} />);
      expect(screen.getByText("Filter 1")).toBeInTheDocument();
    });

    test("renders action items", () => {
      render(<TestCommandMenu open={true} />);
      // Use getAllByText since Truncated component creates visible + hidden measurement elements
      expect(screen.getAllByText("Action 1").length).toBeGreaterThan(0);
      // Verify the item is registered
      expect(
        document.querySelector('[data-command-item="action-1"]')
      ).toBeInTheDocument();
    });

    test("renders regular items", () => {
      render(<TestCommandMenu open={true} />);
      // Use getAllByText since Truncated component creates visible + hidden measurement elements
      expect(screen.getAllByText("Item 1").length).toBeGreaterThan(0);
      expect(screen.getAllByText("Item 2").length).toBeGreaterThan(0);
      // Verify the items are registered
      expect(
        document.querySelector('[data-command-item="item-1"]')
      ).toBeInTheDocument();
      expect(
        document.querySelector('[data-command-item="item-2"]')
      ).toBeInTheDocument();
    });

    test("renders footer with actions", () => {
      render(<TestCommandMenu open={true} />);
      expect(screen.getByText("Select")).toBeInTheDocument();
    });
  });

  describe("Keyboard Navigation", () => {
    test("ArrowDown highlights next item", async () => {
      const user = setupUser();
      render(<TestCommandMenuWithContext open={true} />);

      // Wait for initial highlight
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });

      await user.keyboard("{ArrowDown}");

      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "item-1"
        );
      });
    });

    test("ArrowUp highlights previous item", async () => {
      const user = setupUser();
      render(<TestCommandMenuWithContext open={true} />);

      // Move down first
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });

      await user.keyboard("{ArrowDown}");
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "item-1"
        );
      });

      await user.keyboard("{ArrowUp}");
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });
    });

    test("ArrowDown wraps to first item at end", async () => {
      const user = setupUser();
      render(<TestCommandMenuWithContext open={true} />);

      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });

      // Move through all items
      await user.keyboard("{ArrowDown}");
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "item-1"
        );
      });

      // Should wrap back to action-1
      await user.keyboard("{ArrowDown}");
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });
    });

    test("ArrowUp wraps to last item at start", async () => {
      const user = setupUser();
      render(<TestCommandMenuWithContext open={true} />);

      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });

      // Going up from first should wrap to last
      await user.keyboard("{ArrowUp}");
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "item-1"
        );
      });
    });

    test("Enter selects highlighted item", async () => {
      const user = setupUser();
      const onOpenChange = jest.fn();
      render(<TestCommandMenu open={true} onOpenChange={onOpenChange} />);

      // Wait for initial highlight and then press Enter
      await waitFor(() => {
        const items = document.querySelectorAll("[data-command-item]");
        expect(items.length).toBeGreaterThan(0);
      });

      await user.keyboard("{ArrowDown}"); // Move to item-1
      await user.keyboard("{Enter}");

      // Menu should close after selecting a non-filter item
      await waitFor(() => {
        expect(onOpenChange).toHaveBeenCalledWith(false);
      });
    });

    test("Escape closes menu", async () => {
      const user = setupUser();
      const onOpenChange = jest.fn();
      render(<TestCommandMenu open={true} onOpenChange={onOpenChange} />);

      await user.keyboard("{Escape}");

      expect(onOpenChange).toHaveBeenCalledWith(false);
    });

    test("Enter on filter does not close menu", async () => {
      const user = setupUser();
      const onOpenChange = jest.fn();
      render(
        <TestCommandMenu
          open={true}
          onOpenChange={onOpenChange}
          includeFilter={true}
        />
      );

      // Navigate to filter
      await waitFor(() => {
        const items = document.querySelectorAll("[data-command-item]");
        expect(items.length).toBeGreaterThan(0);
      });

      await user.keyboard("{ArrowDown}"); // Move to filter-1
      await user.keyboard("{Enter}");

      // Menu should NOT close after selecting a filter
      await waitFor(() => {
        // Give it time to potentially call onOpenChange incorrectly
        return new Promise((r) => setTimeout(r, 100));
      });

      // onOpenChange should not have been called with false for filter selection
      const closeCalls = onOpenChange.mock.calls.filter(
        (call) => call[0] === false
      );
      expect(closeCalls.length).toBe(0);
    });
  });

  describe("Mouse Interaction", () => {
    test("Mouse hover highlights item", async () => {
      render(<TestCommandMenuWithContext open={true} />);

      // Use data-command-item selector directly
      const itemContainer = document.querySelector(
        '[data-command-item="item-1"]'
      );
      expect(itemContainer).toBeInTheDocument();

      // The LineItem component has a button inside that handles click events
      const button = itemContainer!.querySelector('[role="button"]');
      expect(button).toBeInTheDocument();
      fireEvent.mouseEnter(button!);

      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "item-1"
        );
      });
    });

    test("Click selects item", async () => {
      const user = setupUser();
      const onOpenChange = jest.fn();
      render(<TestCommandMenu open={true} onOpenChange={onOpenChange} />);

      // Use data-command-item selector to find the clickable item container
      const itemContainer = document.querySelector(
        '[data-command-item="item-1"]'
      );
      expect(itemContainer).toBeInTheDocument();

      // The LineItem component has a button inside that handles click events
      const button = itemContainer!.querySelector('[role="button"]');
      expect(button).toBeInTheDocument();
      await user.click(button!);

      await waitFor(() => {
        expect(onOpenChange).toHaveBeenCalledWith(false);
      });
    });

    test("Click on filter does not close menu", async () => {
      const user = setupUser();
      const onOpenChange = jest.fn();
      render(
        <TestCommandMenu
          open={true}
          onOpenChange={onOpenChange}
          includeFilter={true}
        />
      );

      // Use data-command-item selector directly
      const filterContainer = document.querySelector(
        '[data-command-item="filter-1"]'
      );
      expect(filterContainer).toBeInTheDocument();
      await user.click(filterContainer!);

      // Give it time to potentially call onOpenChange incorrectly
      await waitFor(() => {
        return new Promise((r) => setTimeout(r, 100));
      });

      // onOpenChange should not have been called with false for filter click
      const closeCalls = onOpenChange.mock.calls.filter(
        (call) => call[0] === false
      );
      expect(closeCalls.length).toBe(0);
    });
  });

  describe("Item Registration", () => {
    test("Items with defaultHighlight=false are skipped in initial highlight", async () => {
      render(<TestCommandMenuWithContext open={true} />);

      // The first selectable item (action-1) should be highlighted initially
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });
    });

    test("First selectable item is highlighted on open", async () => {
      render(<TestCommandMenuWithContext open={true} />);

      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });
    });

    test("Non-default-highlight action is skipped for initial highlight", async () => {
      // Render with defaultHighlightAction=false, so action-1 should be skipped
      render(<TestCommandMenu open={true} defaultHighlightAction={false} />);

      // The item-1 should be highlighted instead (first item with defaultHighlight=true)
      await waitFor(() => {
        const highlightedItems = document.querySelectorAll(
          '[aria-selected="true"]'
        );
        expect(highlightedItems.length).toBeGreaterThan(0);
        // Check that the highlighted item is item-1, not action-1
        const highlightedValues = Array.from(highlightedItems).map((el) =>
          el.getAttribute("data-command-item")
        );
        expect(highlightedValues).toContain("item-1");
      });
    });
  });

  describe("Context Hook", () => {
    test("useCommandMenuContext provides correct highlighted value", async () => {
      render(<TestCommandMenuWithContext open={true} />);

      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });
    });

    test("useCommandMenuContext provides correct highlighted item type", async () => {
      render(<TestCommandMenuWithContext open={true} />);

      await waitFor(() => {
        expect(screen.getByTestId("highlighted-type")).toHaveTextContent(
          "action"
        );
      });

      // Navigate to regular item
      const user = setupUser();
      await user.keyboard("{ArrowDown}");

      await waitFor(() => {
        expect(screen.getByTestId("highlighted-type")).toHaveTextContent(
          "item"
        );
      });
    });

    test("useCommandMenuContext throws when used outside CommandMenu", () => {
      // Suppress console.error for this test since we expect an error
      const consoleSpy = jest
        .spyOn(console, "error")
        .mockImplementation(() => {});

      expect(() => {
        render(<ContextTestComponent />);
      }).toThrow(
        "CommandMenu compound components must be used within CommandMenu"
      );

      consoleSpy.mockRestore();
    });

    test("isKeyboardNav is true after keyboard navigation", async () => {
      const user = setupUser();
      render(<TestCommandMenuWithContext open={true} />);

      // Initially should not be keyboard nav
      expect(screen.getByTestId("is-keyboard-nav")).toHaveTextContent("false");

      await user.keyboard("{ArrowDown}");

      await waitFor(() => {
        expect(screen.getByTestId("is-keyboard-nav")).toHaveTextContent("true");
      });
    });
  });

  describe("Menu State Reset", () => {
    test("highlight resets when menu closes and reopens", async () => {
      const { rerender } = render(<TestCommandMenuWithContext open={true} />);

      // Wait for initial highlight
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });

      // Navigate to item-1
      const user = setupUser();
      await user.keyboard("{ArrowDown}");
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "item-1"
        );
      });

      // Close menu
      rerender(<TestCommandMenuWithContext open={false} />);

      // Reopen menu
      rerender(<TestCommandMenuWithContext open={true} />);

      // Should reset to first item
      await waitFor(() => {
        expect(screen.getByTestId("highlighted-value")).toHaveTextContent(
          "action-1"
        );
      });
    });
  });

  describe("Header Input Behavior", () => {
    test("typing in input does not trigger keyboard navigation", async () => {
      const user = setupUser();
      const onValueChange = jest.fn();

      render(
        <CommandMenu open={true} onOpenChange={() => {}}>
          <CommandMenu.Content>
            <CommandMenu.Header
              placeholder="Search..."
              value=""
              onValueChange={onValueChange}
            />
            <CommandMenu.List>
              <CommandMenu.Item value="item-1" onSelect={() => {}}>
                Item 1
              </CommandMenu.Item>
            </CommandMenu.List>
          </CommandMenu.Content>
        </CommandMenu>
      );

      const input = screen.getByPlaceholderText("Search...");
      await user.type(input, "test");

      expect(onValueChange).toHaveBeenCalledWith("t");
      expect(onValueChange).toHaveBeenCalledWith("e");
      expect(onValueChange).toHaveBeenCalledWith("s");
      expect(onValueChange).toHaveBeenCalledWith("t");
    });
  });
});


================================================
FILE: web/src/refresh-components/commandmenu/CommandMenu.tsx
================================================
"use client";

import React, {
  createContext,
  useContext,
  useEffect,
  useCallback,
  useRef,
  useMemo,
} from "react";
import * as DialogPrimitive from "@radix-ui/react-dialog";
import * as VisuallyHidden from "@radix-ui/react-visually-hidden";
import useContainerCenter from "@/hooks/useContainerCenter";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import LineItem from "@/refresh-components/buttons/LineItem";
import Tag from "@/refresh-components/buttons/Tag";
import { Button } from "@opal/components";
import ScrollIndicatorDiv from "@/refresh-components/ScrollIndicatorDiv";
import Divider from "@/refresh-components/Divider";
import { Section } from "@/layouts/general-layouts";
import { SvgSearch, SvgX } from "@opal/icons";
import type {
  CommandMenuProps,
  CommandMenuContentProps,
  CommandMenuHeaderProps,
  CommandMenuListProps,
  CommandMenuFilterProps,
  CommandMenuItemProps,
  CommandMenuActionProps,
  CommandMenuFooterProps,
  CommandMenuFooterActionProps,
  CommandMenuContextValue,
} from "./types";

// =============================================================================
// Context
// =============================================================================

const CommandMenuContext = createContext<CommandMenuContextValue | null>(null);

function useCommandMenuContext() {
  const context = useContext(CommandMenuContext);
  if (!context) {
    throw new Error(
      "CommandMenu compound components must be used within CommandMenu"
    );
  }
  return context;
}

// =============================================================================
// CommandMenu Root
// =============================================================================

/**
 * Gets ordered items by querying DOM for data-command-item elements.
 * Safe to call in event handlers (after DOM is committed).
 */
function getOrderedItems(): string[] {
  const container = document.querySelector("[data-command-menu-list]");
  if (!container) return [];
  const elements = container.querySelectorAll("[data-command-item]");
  return Array.from(elements)
    .map((el) => el.getAttribute("data-command-item"))
    .filter((v): v is string => v !== null);
}

/**
 * CommandMenu Root Component
 *
 * Wrapper around Radix Dialog.Root for managing command menu state.
 * Centralizes all keyboard/selection logic - items only render and report mouse events.
 *
 * @example
 * ```tsx
 * <CommandMenu open={isOpen} onOpenChange={setIsOpen}>
 *   <CommandMenu.Content>
 *     <CommandMenu.Header placeholder="Search..." />
 *     <CommandMenu.List>
 *       <CommandMenu.Item value="1">Item 1</CommandMenu.Item>
 *     </CommandMenu.List>
 *     <CommandMenu.Footer />
 *   </CommandMenu.Content>
 * </CommandMenu>
 * ```
 */
function CommandMenuRoot({ open, onOpenChange, children }: CommandMenuProps) {
  const [highlightedValue, setHighlightedValue] = React.useState<string | null>(
    null
  );
  const [isKeyboardNav, setIsKeyboardNav] = React.useState(false);
  const [itemsRevision, setItemsRevision] = React.useState(0);

  // Centralized callback registry - items register their onSelect callback, type, and defaultHighlight
  const itemCallbacks = useRef<
    Map<
      string,
      {
        callback: () => void;
        type: "filter" | "item" | "action";
        defaultHighlight: boolean;
      }
    >
  >(new Map());

  // Track previous itemsRevision to detect when items actually change
  const prevItemsRevisionRef = useRef(itemsRevision);

  // Reset state when menu closes
  useEffect(() => {
    if (!open) {
      setHighlightedValue(null);
      setIsKeyboardNav(false);
      itemCallbacks.current.clear();
    }
  }, [open]);

  // Ensure valid highlight when menu is open and items change
  useEffect(() => {
    if (open) {
      const frame = requestAnimationFrame(() => {
        const items = getOrderedItems();
        const currentEntry = highlightedValue
          ? itemCallbacks.current.get(highlightedValue)
          : null;

        const itemsChanged = prevItemsRevisionRef.current !== itemsRevision;
        prevItemsRevisionRef.current = itemsRevision;

        // Re-evaluate if:
        // 1. No highlight set
        // 2. Current highlight is not in DOM
        // 3. Items changed AND current highlight has defaultHighlight=false
        const shouldReselect =
          !highlightedValue ||
          !items.includes(highlightedValue) ||
          (itemsChanged && currentEntry?.defaultHighlight === false);

        if (shouldReselect) {
          // Find first item eligible for default highlight
          const defaultItem = items.find((value) => {
            const entry = itemCallbacks.current.get(value);
            return entry?.defaultHighlight !== false;
          });
          // Use default item if found, otherwise fall back to first item
          const targetItem = defaultItem || items[0];
          setHighlightedValue(targetItem || null);
        }
      });
      return () => cancelAnimationFrame(frame);
    }
  }, [open, highlightedValue, itemsRevision]);

  // Registration functions (items call on mount)
  const registerItem = useCallback(
    (
      value: string,
      onSelect: () => void,
      type: "filter" | "item" | "action" = "item",
      defaultHighlight: boolean = true
    ) => {
      if (
        process.env.NODE_ENV === "development" &&
        itemCallbacks.current.has(value)
      ) {
        console.warn(
          `[CommandMenu] Duplicate value "${value}" registered. ` +
            `Values must be unique across all Filter, Item, and Action components.`
        );
      }
      itemCallbacks.current.set(value, {
        callback: onSelect,
        type,
        defaultHighlight,
      });
      setItemsRevision((r) => r + 1);
    },
    []
  );

  const unregisterItem = useCallback((value: string) => {
    itemCallbacks.current.delete(value);
    setItemsRevision((r) => r + 1);
  }, []);

  // Shared mouse handlers (items call on events)
  const onItemMouseEnter = useCallback(
    (value: string) => {
      if (!isKeyboardNav) {
        setHighlightedValue(value);
      }
    },
    [isKeyboardNav]
  );

  const onItemMouseMove = useCallback(
    (value: string) => {
      if (isKeyboardNav) {
        setIsKeyboardNav(false);
      }
      if (highlightedValue !== value) {
        setHighlightedValue(value);
      }
    },
    [isKeyboardNav, highlightedValue]
  );

  const onItemClick = useCallback(
    (value: string) => {
      const entry = itemCallbacks.current.get(value);
      entry?.callback();
      if (entry?.type !== "filter") {
        onOpenChange(false);
      }
    },
    [onOpenChange]
  );

  const onListMouseLeave = useCallback(() => {
    if (!isKeyboardNav) {
      setHighlightedValue(null);
    }
  }, [isKeyboardNav]);

  // Compute the type of the currently highlighted item
  const highlightedItemType = useMemo(() => {
    if (!highlightedValue) return null;
    return itemCallbacks.current.get(highlightedValue)?.type ?? null;
  }, [highlightedValue]);

  // Keyboard handler - centralized for all keys including Enter
  const handleKeyDown = useCallback(
    (e: React.KeyboardEvent) => {
      switch (e.key) {
        case "ArrowDown": {
          e.preventDefault();
          setIsKeyboardNav(true);
          const items = getOrderedItems();
          if (items.length === 0) return;

          const currentIndex = highlightedValue
            ? items.indexOf(highlightedValue)
            : -1;
          const nextIndex =
            currentIndex < items.length - 1 ? currentIndex + 1 : 0;
          const nextItem = items[nextIndex];
          if (nextItem !== undefined) {
            setHighlightedValue(nextItem);
          }
          break;
        }
        case "ArrowUp": {
          e.preventDefault();
          setIsKeyboardNav(true);
          const items = getOrderedItems();
          if (items.length === 0) return;

          const currentIndex = highlightedValue
            ? items.indexOf(highlightedValue)
            : 0;
          const prevIndex =
            currentIndex > 0 ? currentIndex - 1 : items.length - 1;
          const prevItem = items[prevIndex];
          if (prevItem !== undefined) {
            setHighlightedValue(prevItem);
          }
          break;
        }
        case "Enter": {
          e.preventDefault();
          e.stopPropagation();
          if (highlightedValue) {
            const entry = itemCallbacks.current.get(highlightedValue);
            entry?.callback();
            if (entry?.type !== "filter") {
              onOpenChange(false);
            }
          }
          break;
        }
        case "Escape": {
          e.preventDefault();
          onOpenChange(false);
          break;
        }
      }
    },
    [highlightedValue, onOpenChange]
  );

  // Scroll highlighted item into view on keyboard nav
  // Uses manual scroll calculation instead of scrollIntoView to only scroll
  // the list container, not the modal or other ancestors
  useEffect(() => {
    if (isKeyboardNav && highlightedValue) {
      const container = document.querySelector("[data-command-menu-list]");
      // Use safe attribute matching instead of direct selector interpolation
      // to prevent CSS selector injection
      const el = Array.from(
        container?.querySelectorAll("[data-command-item]") ?? []
      ).find((e) => e.getAttribute("data-command-item") === highlightedValue);

      if (container && el instanceof HTMLElement) {
        const containerRect = container.getBoundingClientRect();
        const elRect = el.getBoundingClientRect();

        const scrollMargin = 60;
        if (elRect.top < containerRect.top + scrollMargin) {
          container.scrollTop -= containerRect.top + scrollMargin - elRect.top;
        } else if (elRect.bottom > containerRect.bottom) {
          container.scrollTop += elRect.bottom - containerRect.bottom;
        }
      }
    }
  }, [highlightedValue, isKeyboardNav]);

  const contextValue = useMemo<CommandMenuContextValue>(
    () => ({
      highlightedValue,
      highlightedItemType,
      isKeyboardNav,
      registerItem,
      unregisterItem,
      onItemMouseEnter,
      onItemMouseMove,
      onItemClick,
      onListMouseLeave,
      handleKeyDown,
    }),
    [
      highlightedValue,
      highlightedItemType,
      isKeyboardNav,
      registerItem,
      unregisterItem,
      onItemMouseEnter,
      onItemMouseMove,
      onItemClick,
      onListMouseLeave,
      handleKeyDown,
    ]
  );

  return (
    <CommandMenuContext.Provider value={contextValue}>
      <DialogPrimitive.Root open={open} onOpenChange={onOpenChange}>
        {children}
      </DialogPrimitive.Root>
    </CommandMenuContext.Provider>
  );
}

// =============================================================================
// CommandMenu Content
// =============================================================================

/**
 * CommandMenu Content Component
 *
 * Modal container with overlay, sizing, and animations.
 * Keyboard handling is centralized in Root and accessed via context.
 */
const CommandMenuContent = React.forwardRef<
  React.ComponentRef<typeof DialogPrimitive.Content>,
  CommandMenuContentProps
>(({ children }, ref) => {
  const { handleKeyDown } = useCommandMenuContext();
  const { centerX, hasContainerCenter } = useContainerCenter();

  return (
    <DialogPrimitive.Portal>
      {/* Overlay - fixed to full viewport, hidden from assistive technology */}
      <DialogPrimitive.Overlay
        aria-hidden="true"
        className={cn(
          "fixed inset-0 z-modal-overlay bg-mask-03 backdrop-blur-03 pointer-events-none",
          "data-[state=open]:animate-in data-[state=closed]:animate-out",
          "data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0"
        )}
      />
      {/* Content - centered within the main container when available,
          otherwise falls back to viewport centering */}
      <DialogPrimitive.Content
        ref={ref}
        onKeyDown={handleKeyDown}
        style={
          hasContainerCenter
            ? ({
                left: centerX,
                "--tw-enter-translate-x": "-50%",
                "--tw-exit-translate-x": "-50%",
              } as React.CSSProperties)
            : undefined
        }
        className={cn(
          "fixed top-[72px]",
          hasContainerCenter ? "-translate-x-1/2" : "inset-x-0 mx-auto",
          "z-modal",
          "bg-background-tint-00 border rounded-16 shadow-2xl outline-none",
          "flex flex-col overflow-hidden",
          "max-w-[calc(100dvw-2rem)] max-h-[calc(100dvh-144px)]",
          "data-[state=open]:animate-in data-[state=closed]:animate-out",
          "data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0",
          "data-[state=open]:slide-in-from-bottom data-[state=open]:slide-in-from-left-0",
          "data-[state=closed]:slide-out-to-bottom data-[state=closed]:slide-out-to-left-0",
          "duration-200",
          "w-[32rem]",
          "min-h-[15rem]"
        )}
      >
        <VisuallyHidden.Root asChild>
          <DialogPrimitive.Title>Command Menu</DialogPrimitive.Title>
        </VisuallyHidden.Root>
        {children}
      </DialogPrimitive.Content>
    </DialogPrimitive.Portal>
  );
});
CommandMenuContent.displayName = "CommandMenuContent";

// =============================================================================
// CommandMenu Header
// =============================================================================

/**
 * CommandMenu Header Component
 *
 * Contains filter tags and search input.
 * Arrow keys preventDefault at input level (to stop cursor movement) then bubble to Content.
 */
function CommandMenuHeader({
  placeholder = "Search...",
  filters = [],
  value = "",
  onValueChange,
  onFilterRemove,
  onClose,
  onEmptyBackspace,
}: CommandMenuHeaderProps) {
  // Prevent default for arrow/enter keys so they don't move cursor or submit forms
  // The actual handling happens in Root's centralized handler via event bubbling
  const handleInputKeyDown = useCallback(
    (e: React.KeyboardEvent<HTMLInputElement>) => {
      if (e.key === "ArrowDown" || e.key === "ArrowUp" || e.key === "Enter") {
        e.preventDefault();
      }
      // Handle backspace on empty input for navigation
      if (e.key === "Backspace" && !value) {
        onEmptyBackspace?.();
      }
    },
    [value, onEmptyBackspace]
  );

  return (
    <div className="flex-shrink-0">
      {/* Top row: Search icon, filters, close button */}
      <div className="px-3 pt-3 flex flex-row justify-between items-center">
        <Section
          flexDirection="row"
          justifyContent="start"
          gap={0.5}
          width="fit"
        >
          {/* Standalone search icon */}
          <SvgSearch className="w-6 h-6 stroke-text-04" />
          {filters.map((filter) => (
            <Tag
              variant="editable"
              key={filter.id}
              label={filter.label}
              icon={filter.icon}
              onRemove={
                onFilterRemove ? () => onFilterRemove(filter.id) : undefined
              }
            />
          ))}
        </Section>
        {onClose && (
          <DialogPrimitive.Close asChild>
            <Button
              icon={SvgX}
              prominence="tertiary"
              size="sm"
              onClick={onClose}
              aria-label="Close menu"
            />
          </DialogPrimitive.Close>
        )}
      </div>
      {/* Search input - arrow/enter keys bubble up to Content for centralized handling */}
      <div className="px-2 pb-2 pt-0.5">
        <InputTypeIn
          placeholder={placeholder}
          value={value}
          onChange={(e) => onValueChange?.(e.target.value)}
          onKeyDown={handleInputKeyDown}
          autoFocus
          className="w-full !bg-transparent !border-transparent [&:is(:hover,:active,:focus,:focus-within)]:!bg-background-neutral-00 [&:is(:hover)]:!border-border-01 [&:is(:focus,:focus-within)]:!shadow-none"
          showClearButton={false}
        />
      </div>
    </div>
  );
}

// =============================================================================
// CommandMenu List
// =============================================================================

/**
 * CommandMenu List Component
 *
 * Scrollable container for menu items with scroll shadow indicators.
 * Uses ScrollIndicatorDiv for automatic scroll shadows.
 */
function CommandMenuList({ children, emptyMessage }: CommandMenuListProps) {
  const { isKeyboardNav, onListMouseLeave } = useCommandMenuContext();
  const childCount = React.Children.count(children);

  if (childCount === 0 && emptyMessage) {
    return (
      <div
        className="bg-background-tint-01 p-4"
        role="status"
        aria-live="polite"
      >
        <Text secondaryBody text03>
          {emptyMessage}
        </Text>
      </div>
    );
  }

  return (
    <ScrollIndicatorDiv
      role="listbox"
      aria-label="Command menu options"
      className="p-1 gap-1 max-h-[60vh] bg-background-tint-01"
      backgroundColor="var(--background-tint-01)"
      data-command-menu-list
      data-keyboard-nav={isKeyboardNav ? "true" : undefined}
      variant="shadow"
      onMouseLeave={onListMouseLeave}
    >
      {children}
    </ScrollIndicatorDiv>
  );
}

// =============================================================================
// CommandMenu Filter
// =============================================================================

/**
 * CommandMenu Filter Component
 *
 * When `isApplied` is true, renders as a non-interactive group label.
 * Otherwise, renders as a selectable filter with a chevron indicator.
 * Dumb component - registers callback on mount, renders based on context state.
 */
function CommandMenuFilter({
  value,
  children,
  icon,
  isApplied,
  onSelect,
}: CommandMenuFilterProps) {
  const {
    highlightedValue,
    registerItem,
    unregisterItem,
    onItemMouseEnter,
    onItemMouseMove,
    onItemClick,
  } = useCommandMenuContext();

  // Register callback on mount - NO keyboard listener needed
  useEffect(() => {
    if (!isApplied && onSelect) {
      registerItem(value, () => onSelect(), "filter");
      return () => unregisterItem(value);
    }
  }, [value, isApplied, onSelect, registerItem, unregisterItem]);

  // When filter is applied, show as group label (non-interactive)
  if (isApplied) {
    return (
      <Divider
        showTitle
        text={children as string}
        icon={icon}
        dividerLine={false}
      />
    );
  }

  const isHighlighted = value === highlightedValue;

  // Selectable filter - uses LineItem, delegates all events to context
  return (
    <div data-command-item={value} role="option" aria-selected={isHighlighted}>
      <Divider
        showTitle
        text={children as string}
        icon={icon}
        foldable
        isHighlighted={isHighlighted}
        onClick={() => onItemClick(value)}
        onMouseEnter={() => onItemMouseEnter(value)}
        onMouseMove={() => onItemMouseMove(value)}
        dividerLine={false}
      />
    </div>
  );
}

// =============================================================================
// CommandMenu Item
// =============================================================================

/**
 * CommandMenu Item Component
 *
 * Dumb component - registers callback on mount, renders based on context state.
 * Use rightContent for timestamps, badges, etc.
 */
function CommandMenuItem({
  value,
  icon,
  rightContent,
  onSelect,
  children,
}: CommandMenuItemProps) {
  const {
    highlightedValue,
    registerItem,
    unregisterItem,
    onItemMouseEnter,
    onItemMouseMove,
    onItemClick,
  } = useCommandMenuContext();

  // Register callback on mount - NO keyboard listener needed
  useEffect(() => {
    registerItem(value, () => onSelect?.(value));
    return () => unregisterItem(value);
  }, [value, onSelect, registerItem, unregisterItem]);

  const isHighlighted = value === highlightedValue;

  // Resolve rightContent - supports both static ReactNode and render function
  const resolvedRightContent =
    typeof rightContent === "function"
      ? rightContent({ isHighlighted })
      : rightContent;

  return (
    <div data-command-item={value} role="option" aria-selected={isHighlighted}>
      <LineItem
        muted
        icon={icon}
        rightChildren={resolvedRightContent}
        emphasized={isHighlighted}
        selected={isHighlighted}
        onClick={() => onItemClick(value)}
        onMouseEnter={() => onItemMouseEnter(value)}
        onMouseMove={() => onItemMouseMove(value)}
      >
        {children}
      </LineItem>
    </div>
  );
}

// =============================================================================
// CommandMenu Action
// =============================================================================

/**
 * CommandMenu Action Component
 *
 * Dumb component - registers callback on mount, renders based on context state.
 * Uses LineItem with action variant for visual distinction.
 */
function CommandMenuAction({
  value,
  icon,
  shortcut,
  onSelect,
  children,
  defaultHighlight = true,
}: CommandMenuActionProps) {
  const {
    highlightedValue,
    registerItem,
    unregisterItem,
    onItemMouseEnter,
    onItemMouseMove,
    onItemClick,
  } = useCommandMenuContext();

  // Register callback on mount - NO keyboard listener needed
  useEffect(() => {
    registerItem(value, () => onSelect?.(value), "action", defaultHighlight);
    return () => unregisterItem(value);
  }, [value, onSelect, defaultHighlight, registerItem, unregisterItem]);

  const isHighlighted = value === highlightedValue;

  return (
    <div data-command-item={value} role="option" aria-selected={isHighlighted}>
      <LineItem
        action
        icon={icon}
        rightChildren={
          shortcut ? (
            <Text figureKeystroke text02>
              {shortcut}
            </Text>
          ) : undefined
        }
        emphasized={isHighlighted}
        selected={isHighlighted}
        onClick={() => onItemClick(value)}
        onMouseEnter={() => onItemMouseEnter(value)}
        onMouseMove={() => onItemMouseMove(value)}
      >
        {children}
      </LineItem>
    </div>
  );
}

// =============================================================================
// CommandMenu Footer
// =============================================================================

/**
 * CommandMenu Footer Component
 *
 * Footer section with keyboard hint actions.
 */
function CommandMenuFooter({ leftActions }: CommandMenuFooterProps) {
  return (
    <div className="flex-shrink-0">
      <Section
        flexDirection="row"
        justifyContent="start"
        gap={1}
        padding={0.75}
      >
        {leftActions}
      </Section>
    </div>
  );
}

// =============================================================================
// CommandMenu Footer Action
// =============================================================================

/**
 * CommandMenu Footer Action Component
 *
 * Display-only visual hint showing a keyboard shortcut.
 */
function CommandMenuFooterAction({
  icon: Icon,
  label,
}: CommandMenuFooterActionProps) {
  return (
    <div className="flex items-center gap-1" aria-label={label}>
      <Icon
        className="w-[0.875rem] h-[0.875rem] stroke-text-02"
        aria-hidden="true"
      />
      <Text mainUiBody text03>
        {label}
      </Text>
    </div>
  );
}

// =============================================================================
// Export Compound Component
// =============================================================================

export { useCommandMenuContext };

export default Object.assign(CommandMenuRoot, {
  Content: CommandMenuContent,
  Header: CommandMenuHeader,
  List: CommandMenuList,
  Filter: CommandMenuFilter,
  Item: CommandMenuItem,
  Action: CommandMenuAction,
  Footer: CommandMenuFooter,
  FooterAction: CommandMenuFooterAction,
});


================================================
FILE: web/src/refresh-components/commandmenu/types.ts
================================================
import type { IconProps } from "@opal/types";

// =============================================================================
// Filter Object (for header display)
// =============================================================================

/**
 * Filter object for CommandMenu header
 */
export interface CommandMenuFilter {
  id: string;
  label: string;
  icon?: React.FunctionComponent<IconProps>;
}

/**
 * Props for CommandMenu root component
 */
export interface CommandMenuProps {
  open: boolean;
  onOpenChange: (open: boolean) => void;
  children: React.ReactNode;
}

/**
 * Props for CommandMenu content (modal container)
 */
export interface CommandMenuContentProps {
  children: React.ReactNode;
}

/**
 * Props for CommandMenu header with search and filters
 */
export interface CommandMenuHeaderProps {
  placeholder?: string;
  filters?: CommandMenuFilter[];
  value?: string;
  onValueChange?: (value: string) => void;
  onFilterRemove?: (filterId: string) => void;
  onClose?: () => void;
  onEmptyBackspace?: () => void;
}

/**
 * Props for CommandMenu list container
 */
export interface CommandMenuListProps {
  children: React.ReactNode;
  emptyMessage?: string;
}

/**
 * Props for CommandMenu filter (selectable or as applied group label)
 */
export interface CommandMenuFilterProps {
  /**
   * Unique identifier for this item within the CommandMenu.
   * Must be unique across all Filter, Item, and Action components.
   * Used for keyboard navigation, selection callbacks, and highlight state.
   */
  value: string;
  children: string;
  icon?: React.FunctionComponent<IconProps>;
  isApplied?: boolean; // When true, renders as non-interactive group label
  onSelect?: () => void;
}

/**
 * Props for CommandMenu item
 */
export interface CommandMenuItemProps {
  /**
   * Unique identifier for this item within the CommandMenu.
   * Must be unique across all Filter, Item, and Action components.
   * Used for keyboard navigation, selection callbacks, and highlight state.
   */
  value: string;
  icon?: React.FunctionComponent<IconProps>;
  rightContent?:
    | React.ReactNode
    | ((params: { isHighlighted: boolean }) => React.ReactNode); // For timestamps, badges, etc.
  onSelect?: (value: string) => void;
  children: React.ReactNode;
}

/**
 * Props for CommandMenu action (quick actions with keyboard shortcuts)
 */
export interface CommandMenuActionProps {
  /**
   * Unique identifier for this item within the CommandMenu.
   * Must be unique across all Filter, Item, and Action components.
   * Used for keyboard navigation, selection callbacks, and highlight state.
   */
  value: string;
  icon?: React.FunctionComponent<IconProps>;
  shortcut?: string; // Keyboard shortcut like "⌘N", "⌘P"
  onSelect?: (value: string) => void;
  children: React.ReactNode;
  /**
   * Whether this action should be considered for initial highlight.
   * Default: true. Set false to skip this item when determining initial highlight.
   * Arrow key navigation still includes all items regardless of this setting.
   */
  defaultHighlight?: boolean;
}

/**
 * Props for CommandMenu footer
 */
export interface CommandMenuFooterProps {
  leftActions?: React.ReactNode;
}

/**
 * Props for CommandMenu footer action hint
 */
export interface CommandMenuFooterActionProps {
  icon: React.FunctionComponent<IconProps>;
  label: string;
}

/**
 * Context value for CommandMenu keyboard navigation
 * Uses centralized control with callback registry - items are "dumb" renderers
 */
export interface CommandMenuContextValue {
  // State
  highlightedValue: string | null;
  highlightedItemType: "filter" | "item" | "action" | null;
  isKeyboardNav: boolean;

  // Registration (items call on mount with their callback)
  registerItem: (
    value: string,
    onSelect: () => void,
    type?: "filter" | "item" | "action",
    defaultHighlight?: boolean
  ) => void;
  unregisterItem: (value: string) => void;

  // Mouse interaction (items call on events - centralized in root)
  onItemMouseEnter: (value: string) => void;
  onItemMouseMove: (value: string) => void;
  onItemClick: (value: string) => void;
  onListMouseLeave: () => void;

  // Keyboard handler (Content attaches this to DialogPrimitive.Content)
  handleKeyDown: (e: React.KeyboardEvent) => void;
}


================================================
FILE: web/src/refresh-components/contexts/ModalContext.tsx
================================================
"use client";

import React, { createContext, useContext, useState, useCallback } from "react";

const ModalContext = createContext<ModalInterface | null>(null);

export interface ProviderProps {
  children?: React.ReactNode;
}

export interface ModalCreationInterface {
  isOpen: boolean;
  toggle: (state: boolean) => void;
  Provider: React.FunctionComponent<ProviderProps>;
}

export function useCreateModal(): ModalCreationInterface {
  const [isOpen, setIsOpen] = useState(false);

  const toggle = useCallback(
    (state: boolean) => {
      setIsOpen(state);
    },
    [setIsOpen]
  );

  const Provider: React.FunctionComponent<ProviderProps> = useCallback(
    ({ children }: ProviderProps) => {
      if (!isOpen) return null;

      return (
        <ModalContext.Provider value={{ isOpen, toggle }}>
          {children}
        </ModalContext.Provider>
      );
    },
    [isOpen, toggle]
  );

  return { isOpen, toggle, Provider };
}

export interface ModalInterface {
  isOpen: boolean;
  toggle: (state: boolean) => void;
}

export function useModal(): ModalInterface {
  const context = useContext(ModalContext);

  if (!context) {
    throw new Error(
      "useModal must be used within the `Modal` field returned by `useCreateModal`"
    );
  }

  return context;
}

export function useModalClose(onClose?: () => void): (() => void) | undefined {
  const context = useContext(ModalContext);

  return context
    ? () => {
        context.toggle(false);
        onClose?.();
      }
    : onClose;
}


================================================
FILE: web/src/refresh-components/form/CheckboxField.tsx
================================================
"use client";

import { useField } from "formik";
import Checkbox, { CheckboxProps } from "@/refresh-components/inputs/Checkbox";
import { useOnChangeValue } from "@/hooks/formHooks";

interface CheckboxFieldProps extends Omit<CheckboxProps, "checked"> {
  name: string;
}

export default function UnlabeledCheckboxField({
  name,
  onCheckedChange,
  ...props
}: CheckboxFieldProps) {
  const [field] = useField<boolean>({ name, type: "checkbox" });
  const onChange = useOnChangeValue(name, onCheckedChange);

  return (
    <Checkbox checked={field.value} onCheckedChange={onChange} {...props} />
  );
}


================================================
FILE: web/src/refresh-components/form/FieldContext.tsx
================================================
"use client";

import { createContext, useContext } from "react";
import { FieldContextType } from "./types";

export const FieldContext = createContext<FieldContextType | undefined>(
  undefined
);

export const useFieldContext = () => {
  const context = useContext(FieldContext);
  if (context === undefined) {
    throw new Error(
      "useFieldContext must be used within a FieldContextProvider"
    );
  }
  return context;
};


================================================
FILE: web/src/refresh-components/form/FormField.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { FormField } from "./FormField";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";

const meta: Meta<typeof FormField> = {
  title: "refresh-components/form/FormField",
  component: FormField,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof FormField>;

export const Default: Story = {
  render: () => (
    <FormField state="idle" name="email">
      <FormField.Label>Email Address</FormField.Label>
      <FormField.Description>
        We will never share your email with anyone.
      </FormField.Description>
      <FormField.Control>
        <InputTypeIn placeholder="you@example.com" />
      </FormField.Control>
    </FormField>
  ),
};

export const Required: Story = {
  render: () => (
    <FormField state="idle" name="name" required>
      <FormField.Label required>Full Name</FormField.Label>
      <FormField.Control>
        <InputTypeIn placeholder="Jane Doe" />
      </FormField.Control>
    </FormField>
  ),
};

export const Optional: Story = {
  render: () => (
    <FormField state="idle" name="nickname">
      <FormField.Label optional>Nickname</FormField.Label>
      <FormField.Control>
        <InputTypeIn placeholder="Optional nickname" />
      </FormField.Control>
    </FormField>
  ),
};

export const ErrorState: Story = {
  render: () => (
    <FormField state="error" name="username">
      <FormField.Label>Username</FormField.Label>
      <FormField.Control>
        <InputTypeIn placeholder="Choose a username" variant="error" />
      </FormField.Control>
      <FormField.Message
        messages={{ error: "This username is already taken." }}
      />
    </FormField>
  ),
};

export const SuccessState: Story = {
  render: () => (
    <FormField state="success" name="username">
      <FormField.Label>Username</FormField.Label>
      <FormField.Control>
        <InputTypeIn placeholder="Choose a username" />
      </FormField.Control>
      <FormField.Message messages={{ success: "Username is available!" }} />
    </FormField>
  ),
};

export const WithAPIMessage: Story = {
  render: () => (
    <FormField state="idle" name="domain">
      <FormField.Label>Custom Domain</FormField.Label>
      <FormField.Control>
        <InputTypeIn placeholder="your-domain.com" />
      </FormField.Control>
      <FormField.APIMessage
        state="loading"
        messages={{ loading: "Verifying DNS records..." }}
      />
    </FormField>
  ),
};


================================================
FILE: web/src/refresh-components/form/FormField.tsx
================================================
"use client";

import { cn } from "@/lib/utils";
import { FieldContext } from "./FieldContext";
import {
  ControlProps,
  DescriptionProps,
  FieldContextType,
  FormFieldRootProps,
  LabelProps,
  MessageProps,
  APIMessageProps,
} from "./types";
import React, { useId, useMemo } from "react";
import { useFieldContext } from "./FieldContext";
import { Slot } from "@radix-ui/react-slot";
import Text from "../texts/Text";
import { FieldMessage } from "../messages/FieldMessage";

export const FormFieldRoot: React.FC<FormFieldRootProps> = ({
  id,
  name,
  state = "idle",
  required,
  className,
  children,
  ...props
}) => {
  const reactId = useId();
  const baseId = id ?? `field_${reactId}`;

  const describedByIds = useMemo(() => {
    return [`${baseId}-desc`, `${baseId}-msg`, `${baseId}-api-msg`];
  }, [baseId]);

  const contextValue: FieldContextType = {
    baseId,
    name,
    required,
    state,
    describedByIds,
  };

  return (
    <FieldContext.Provider value={contextValue}>
      <div
        id={baseId}
        className={cn("flex flex-col gap-y-1", className)}
        {...props}
      >
        {children}
      </div>
    </FieldContext.Provider>
  );
};

export const FormFieldLabel: React.FC<LabelProps> = ({
  leftIcon,
  rightIcon,
  optional,
  required,
  rightAction,
  className,
  children,
  ...props
}) => {
  const { baseId } = useFieldContext();
  return (
    <label
      id={`${baseId}-label`}
      htmlFor={`${baseId}-control`}
      className={cn(
        "ml-0.5 text-text-04 font-main-ui-action flex flex-row items-center gap-1",
        className
      )}
      {...props}
    >
      {leftIcon && <span className="flex items-center">{leftIcon}</span>}
      {children}
      {required ? (
        <Text as="p" text03 mainUiMuted className="mx-0.5">
          {"(Required)"}
        </Text>
      ) : optional ? (
        <Text as="p" text03 mainUiMuted className="mx-0.5">
          {"(Optional)"}
        </Text>
      ) : null}
      {rightIcon && <span className="flex items-center">{rightIcon}</span>}
      {rightAction && (
        <span className="ml-auto flex items-center">{rightAction}</span>
      )}
    </label>
  );
};

export const FormFieldControl: React.FC<ControlProps> = ({
  asChild,
  children,
}) => {
  const { baseId, state, describedByIds, required } = useFieldContext();

  const ariaAttributes = {
    id: `${baseId}-control`,
    "aria-invalid": state === "error",
    "aria-describedby": describedByIds?.join(" "),
    "aria-required": required,
  };

  if (asChild) {
    return <Slot {...ariaAttributes}>{children}</Slot>;
  }

  if (React.isValidElement(children)) {
    return React.cloneElement(children, {
      ...ariaAttributes,
      ...(children.props as any),
    });
  }

  return <>{children}</>;
};

export const FormFieldDescription: React.FC<DescriptionProps> = ({
  className,
  children,
  ...props
}) => {
  const { baseId } = useFieldContext();
  const content = children;
  if (!content) return null;
  return (
    <Text
      as="p"
      id={`${baseId}-desc`}
      text03
      secondaryBody
      className={cn("ml-0.5", className)}
      {...props}
    >
      {content}
    </Text>
  );
};

export const FormFieldMessage: React.FC<MessageProps> = ({
  className,
  messages,
  render,
}) => {
  const { baseId, state } = useFieldContext();
  let tempState = state;
  let content = messages?.[tempState];
  // If the state is success and there is no content, set the state to idle and use the idle message
  if (tempState === "success" && !content) {
    tempState = "idle";
    content = messages?.idle;
  }
  return content ? (
    <FieldMessage variant={tempState} className={className}>
      <FieldMessage.Content id={`${baseId}-msg`}>
        {content}
      </FieldMessage.Content>
    </FieldMessage>
  ) : null;
};

export const FormAPIFieldMessage: React.FC<APIMessageProps> = ({
  className,
  messages,
  state = "loading",
}) => {
  const { baseId } = useFieldContext();
  const content = messages?.[state];
  return content ? (
    <FieldMessage variant={state} className={className}>
      <FieldMessage.Content id={`${baseId}-api-msg`}>
        {content}
      </FieldMessage.Content>
    </FieldMessage>
  ) : null;
};

export const FormField = Object.assign(FormFieldRoot, {
  Label: FormFieldLabel,
  Control: FormFieldControl,
  Description: FormFieldDescription,
  Message: FormFieldMessage,
  APIMessage: FormAPIFieldMessage,
});


================================================
FILE: web/src/refresh-components/form/FormikField.tsx
================================================
"use client";

import {
  useField,
  FieldInputProps,
  FieldHelperProps,
  FieldMetaProps,
} from "formik";
import { FormFieldState } from "./types";
import React, { useMemo, memo } from "react";

export type FormikFieldProps<T = any> = {
  name: string;
  render: (
    field: FieldInputProps<T>,
    helper: FieldHelperProps<T>,
    meta: FieldMetaProps<T>,
    status: FormFieldState
  ) => React.ReactElement;
};

function FormikFieldComponent<T>({ name, render }: FormikFieldProps<T>) {
  const [field, meta, helper] = useField<T>(name);

  const state = useMemo(
    (): FormFieldState =>
      meta.touched ? (meta.error ? "error" : "success") : "idle",
    [meta.touched, meta.error]
  );

  return render(field, helper, meta, state);
}

export const FormikField = memo(
  FormikFieldComponent
) as typeof FormikFieldComponent;


================================================
FILE: web/src/refresh-components/form/FormikFields.stories.tsx
================================================
/**
 * Stories for Formik-connected form field components.
 *
 * All these components call `useField` from Formik internally, so every story
 * wraps the component in a minimal `<Formik>` provider. The forms are
 * non-submitting; they exist purely to demonstrate the field UI.
 *
 * Components covered:
 * - CheckboxField (unlabeled, from CheckboxField.tsx)
 * - LabeledCheckboxField (from LabeledCheckboxField.tsx)
 * - SwitchField
 * - InputTypeInField
 * - InputTextAreaField
 * - InputSelectField
 * - InputDatePickerField
 * - PasswordInputTypeInField
 */

import type { Meta, StoryObj } from "@storybook/react";
import { Formik, Form } from "formik";
import React from "react";

import UnlabeledCheckboxField from "./CheckboxField";
import { CheckboxField as LabeledCheckboxField } from "./LabeledCheckboxField";
import SwitchField from "./SwitchField";
import InputTypeInField from "./InputTypeInField";
import InputTextAreaField from "./InputTextAreaField";
import InputSelectField from "./InputSelectField";
import InputDatePickerField from "./InputDatePickerField";
import PasswordInputTypeInField from "./PasswordInputTypeInField";
import InputSelect from "@/refresh-components/inputs/InputSelect";

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

/** Minimal Formik wrapper that never submits. */
function FormikWrapper({
  initialValues,
  children,
}: {
  initialValues: Record<string, unknown>;
  children: React.ReactNode;
}) {
  return (
    <Formik initialValues={initialValues} onSubmit={() => {}}>
      <Form
        style={{
          display: "flex",
          flexDirection: "column",
          gap: 16,
          maxWidth: 400,
        }}
      >
        {children}
      </Form>
    </Formik>
  );
}

// ---------------------------------------------------------------------------
// Meta (we use a dummy component since this file covers multiple components)
// ---------------------------------------------------------------------------

const meta: Meta = {
  title: "refresh-components/form/FormikFields",
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj;

// ---------------------------------------------------------------------------
// CheckboxField (unlabeled)
// ---------------------------------------------------------------------------

export const Checkbox: Story = {
  name: "CheckboxField (unlabeled)",
  render: () => (
    <FormikWrapper initialValues={{ agree: false }}>
      <UnlabeledCheckboxField name="agree" />
    </FormikWrapper>
  ),
};

// ---------------------------------------------------------------------------
// LabeledCheckboxField
// ---------------------------------------------------------------------------

export const LabeledCheckbox: Story = {
  name: "LabeledCheckboxField",
  render: () => (
    <FormikWrapper initialValues={{ terms: false }}>
      <LabeledCheckboxField
        name="terms"
        label="I agree to the terms and conditions"
        sublabel="You must accept before continuing."
      />
    </FormikWrapper>
  ),
};

export const LabeledCheckboxWithTooltip: Story = {
  name: "LabeledCheckboxField with tooltip",
  render: () => (
    <FormikWrapper initialValues={{ newsletter: true }}>
      <LabeledCheckboxField
        name="newsletter"
        label="Subscribe to newsletter"
        tooltip="We send at most one email per week."
      />
    </FormikWrapper>
  ),
};

// ---------------------------------------------------------------------------
// SwitchField
// ---------------------------------------------------------------------------

export const Switch: Story = {
  name: "SwitchField",
  render: () => (
    <FormikWrapper initialValues={{ notifications: true }}>
      <label htmlFor="notifications" style={{ fontWeight: 500 }}>
        Enable notifications
      </label>
      <SwitchField name="notifications" />
    </FormikWrapper>
  ),
};

// ---------------------------------------------------------------------------
// InputTypeInField
// ---------------------------------------------------------------------------

export const TextInput: Story = {
  name: "InputTypeInField",
  render: () => (
    <FormikWrapper initialValues={{ username: "" }}>
      <InputTypeInField name="username" placeholder="Enter your username" />
    </FormikWrapper>
  ),
};

export const TextInputDisabled: Story = {
  name: "InputTypeInField (disabled)",
  render: () => (
    <FormikWrapper initialValues={{ locked: "read-only value" }}>
      <InputTypeInField
        name="locked"
        placeholder="Disabled"
        variant="disabled"
      />
    </FormikWrapper>
  ),
};

// ---------------------------------------------------------------------------
// InputTextAreaField
// ---------------------------------------------------------------------------

export const TextArea: Story = {
  name: "InputTextAreaField",
  render: () => (
    <FormikWrapper initialValues={{ bio: "" }}>
      <InputTextAreaField name="bio" placeholder="Tell us about yourself..." />
    </FormikWrapper>
  ),
};

// ---------------------------------------------------------------------------
// InputSelectField
// ---------------------------------------------------------------------------

export const Select: Story = {
  name: "InputSelectField",
  render: () => (
    <FormikWrapper initialValues={{ role: "" }}>
      <InputSelectField name="role">
        <InputSelect.Trigger placeholder="Select a role" />
        <InputSelect.Content>
          <InputSelect.Item value="admin">Admin</InputSelect.Item>
          <InputSelect.Item value="editor">Editor</InputSelect.Item>
          <InputSelect.Item value="viewer">Viewer</InputSelect.Item>
        </InputSelect.Content>
      </InputSelectField>
    </FormikWrapper>
  ),
};

// ---------------------------------------------------------------------------
// InputDatePickerField
// ---------------------------------------------------------------------------

export const DatePicker: Story = {
  name: "InputDatePickerField",
  render: () => (
    <FormikWrapper initialValues={{ startDate: null }}>
      <InputDatePickerField name="startDate" />
    </FormikWrapper>
  ),
};

// ---------------------------------------------------------------------------
// PasswordInputTypeInField
// ---------------------------------------------------------------------------

export const PasswordInput: Story = {
  name: "PasswordInputTypeInField",
  render: () => (
    <FormikWrapper initialValues={{ apiKey: "" }}>
      <PasswordInputTypeInField name="apiKey" placeholder="sk-..." />
    </FormikWrapper>
  ),
};

export const PasswordInputNoLabel: Story = {
  name: "PasswordInputTypeInField (no label)",
  render: () => (
    <FormikWrapper initialValues={{ secret: "" }}>
      <PasswordInputTypeInField name="secret" placeholder="Enter secret" />
    </FormikWrapper>
  ),
};


================================================
FILE: web/src/refresh-components/form/InputDatePickerField.tsx
================================================
"use client";

import { useField } from "formik";
import InputDatePicker, {
  InputDatePickerProps,
} from "@/refresh-components/inputs/InputDatePicker";
import { useOnChangeValue } from "@/hooks/formHooks";

interface InputDatePickerFieldProps
  extends Omit<InputDatePickerProps, "selectedDate" | "setSelectedDate"> {
  name: string;
  setSelectedDate?: (date: Date | null) => void;
}

export default function InputDatePickerField({
  name,
  setSelectedDate,
  ...props
}: InputDatePickerFieldProps) {
  const [field] = useField<Date | null>(name);
  const onChange = useOnChangeValue(name, setSelectedDate);

  return (
    <InputDatePicker
      name={name}
      selectedDate={field.value}
      setSelectedDate={onChange}
      {...props}
    />
  );
}


================================================
FILE: web/src/refresh-components/form/InputSelectField.tsx
================================================
"use client";

import { useField } from "formik";
import InputSelect, {
  InputSelectRootProps,
} from "@/refresh-components/inputs/InputSelect";
import { useOnChangeValue } from "@/hooks/formHooks";

export interface InputSelectFieldProps
  extends Omit<InputSelectRootProps, "value"> {
  name: string;
}

export default function InputSelectField({
  name,
  children,
  onValueChange,
  ...selectProps
}: InputSelectFieldProps) {
  const [field, meta] = useField(name);
  const onChange = useOnChangeValue(name, onValueChange);
  const hasError = meta.touched && meta.error;

  return (
    <InputSelect
      name={name}
      value={field.value}
      onValueChange={onChange}
      error={!!hasError}
      {...selectProps}
    >
      {children}
    </InputSelect>
  );
}


================================================
FILE: web/src/refresh-components/form/InputTextAreaField.tsx
================================================
"use client";

import { useField } from "formik";
import InputTextArea, {
  InputTextAreaProps,
} from "@/refresh-components/inputs/InputTextArea";
import { useOnChangeEvent, useOnBlurEvent } from "@/hooks/formHooks";

export interface InputTextAreaFieldProps
  extends Omit<InputTextAreaProps, "value"> {
  name: string;
}

export default function InputTextAreaField({
  name,
  onChange: onChangeProp,
  onBlur: onBlurProp,
  ...textareaProps
}: InputTextAreaFieldProps) {
  const [field, meta] = useField(name);
  const onChange = useOnChangeEvent(name, onChangeProp);
  const onBlur = useOnBlurEvent(name, onBlurProp);
  const hasError = meta.touched && meta.error;
  const isNonEditable =
    textareaProps.variant === "disabled" ||
    textareaProps.variant === "readOnly";

  return (
    <InputTextArea
      {...textareaProps}
      id={name}
      name={name}
      value={field.value ?? ""}
      onChange={onChange}
      onBlur={onBlur}
      variant={
        isNonEditable
          ? textareaProps.variant
          : hasError
            ? "error"
            : textareaProps.variant
      }
    />
  );
}


================================================
FILE: web/src/refresh-components/form/InputTypeInElementField.tsx
================================================
"use client";

import { useField } from "formik";
import InputTypeIn, {
  InputTypeInProps,
} from "@/refresh-components/inputs/InputTypeIn";
import { Button } from "@opal/components";
import { SvgMinusCircle } from "@opal/icons";
import { useOnChangeEvent, useOnBlurEvent } from "@/hooks/formHooks";
import { Section } from "@/layouts/general-layouts";

export interface InputTypeInElementFieldProps
  extends Omit<InputTypeInProps, "value" | "onClear"> {
  name: string;
  onRemove?: () => void;
}

// This component should be used inside of a list in `formik`'s "Form" context.
export default function InputTypeInElementField({
  name,
  onRemove,
  onChange: onChangeProp,
  onBlur: onBlurProp,
  ...inputProps
}: InputTypeInElementFieldProps) {
  const [field, meta] = useField(name);
  const onChange = useOnChangeEvent(name, onChangeProp);
  const onBlur = useOnBlurEvent(name, onBlurProp);
  const hasError = meta.touched && meta.error;
  const isEmpty = !field.value || field.value.trim() === "";
  const isNonEditable =
    inputProps.variant === "disabled" || inputProps.variant === "readOnly";

  return (
    <Section flexDirection="row" gap={0.25}>
      {/* Input */}
      <InputTypeIn
        {...inputProps}
        id={name}
        name={name}
        value={field.value ?? ""}
        onChange={onChange}
        onBlur={onBlur}
        variant={
          isNonEditable
            ? inputProps.variant
            : hasError
              ? "error"
              : inputProps.variant
        }
        showClearButton={false}
      />
      <Button
        disabled={!onRemove || isEmpty}
        icon={SvgMinusCircle}
        prominence="tertiary"
        onClick={onRemove}
        tooltip="Remove"
      />
    </Section>
  );
}


================================================
FILE: web/src/refresh-components/form/InputTypeInField.tsx
================================================
"use client";

import { useField } from "formik";
import InputTypeIn, {
  InputTypeInProps,
} from "@/refresh-components/inputs/InputTypeIn";
import { useOnChangeEvent, useOnBlurEvent } from "@/hooks/formHooks";

export interface InputTypeInFieldProps
  extends Omit<InputTypeInProps, "value" | "onClear"> {
  name: string;
}

export default function InputTypeInField({
  name,
  onChange: onChangeProp,
  onBlur: onBlurProp,
  ...inputProps
}: InputTypeInFieldProps) {
  const [field, meta, helpers] = useField(name);
  const onChange = useOnChangeEvent(name, onChangeProp);
  const onBlur = useOnBlurEvent(name, onBlurProp);
  const hasError = meta.touched && meta.error;
  const isNonEditable =
    inputProps.variant === "disabled" || inputProps.variant === "readOnly";

  return (
    <InputTypeIn
      {...inputProps}
      id={name}
      name={name}
      value={field.value ?? ""}
      onChange={onChange}
      onBlur={onBlur}
      onClear={() => {
        helpers.setValue("");
      }}
      variant={
        isNonEditable
          ? inputProps.variant
          : hasError
            ? "error"
            : inputProps.variant
      }
    />
  );
}


================================================
FILE: web/src/refresh-components/form/Label.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Label from "./Label";

const meta: Meta<typeof Label> = {
  title: "refresh-components/form/Label",
  component: Label,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Label>;

export const Default: Story = {
  args: {
    children: "Email Address",
    name: "email",
  },
};

export const Disabled: Story = {
  args: {
    children: "Disabled Label",
    name: "disabled-input",
    disabled: true,
  },
};

export const NonInteractive: Story = {
  args: {
    children: "Non-Interactive Label",
    name: "readonly-input",
    nonInteractive: true,
  },
};


================================================
FILE: web/src/refresh-components/form/Label.tsx
================================================
"use client";

import { cn } from "@/lib/utils";
import { WithoutStyles } from "@/types";

/**
 * Label - A form label component
 *
 * Renders a label element that associates with a form input via the `name` prop.
 *
 * @example
 * ```tsx
 * import Label from "@/refresh-components/form/Label";
 *
 * <Label name="email">
 *   Email Address
 * </Label>
 * ```
 */

interface LabelProps
  extends WithoutStyles<
    // The `htmlFor` prop is instead renamed to `name?: string`.
    Omit<React.LabelHTMLAttributes<HTMLLabelElement>, "htmlFor">
  > {
  /** The name/id of the form element this label is associated with */
  name?: string;
  /** Whether the associated input is disabled */
  disabled?: boolean;
  nonInteractive?: boolean;
  ref?: React.Ref<HTMLLabelElement>;
}

export default function Label({
  name,
  disabled,
  nonInteractive,
  ref,
  ...props
}: LabelProps) {
  return (
    <label
      ref={ref}
      data-non-interactive={nonInteractive ? "true" : undefined}
      className={cn(
        "flex-1 self-stretch",
        "peer-disabled:cursor-not-allowed data-[non-interactive=true]:cursor-default",
        disabled
          ? "cursor-not-allowed"
          : nonInteractive
            ? undefined
            : "cursor-pointer"
      )}
      htmlFor={name}
      {...props}
    />
  );
}


================================================
FILE: web/src/refresh-components/form/LabeledCheckboxField.tsx
================================================
"use client";

import React from "react";
import { useField } from "formik";
import { cn } from "@/lib/utils";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import Checkbox from "@/refresh-components/inputs/Checkbox";

interface CheckboxFieldProps {
  name: string;
  label: string;
  labelClassName?: string;
  sublabel?: string;
  size?: "sm" | "md" | "lg";
  tooltip?: string;
  onChange?: (checked: boolean) => void;
  disabled?: boolean;
}

export const CheckboxField: React.FC<CheckboxFieldProps> = ({
  name,
  label,
  onChange,
  sublabel,
  size = "md",
  tooltip,
  labelClassName,
  disabled,
  ...props
}) => {
  const [field, , helpers] = useField<boolean>({ name, type: "checkbox" });

  const sizeClasses = {
    sm: "h-2 w-2",
    md: "h-3 w-3",
    lg: "h-4 w-4",
  };

  const handleClick = (e: React.MouseEvent<HTMLLabelElement>) => {
    e.preventDefault();
    const next = !field.value;
    helpers.setValue(next);
    onChange?.(next);
  };

  const labelId = `${name}-label`;

  const checkboxContent = (
    <div className="flex w-fit items-start space-x-2">
      <Checkbox
        id={name}
        aria-labelledby={labelId}
        checked={field.value}
        onCheckedChange={(checked) => {
          helpers.setValue(Boolean(checked));
          onChange?.(Boolean(checked));
        }}
        className={cn(sizeClasses[size])}
        disabled={disabled}
        {...props}
      />
      <div className="flex flex-col">
        <label
          id={labelId}
          htmlFor={name}
          className="flex flex-col cursor-pointer"
          onClick={handleClick}
        >
          <span
            className={cn(
              "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70",
              labelClassName
            )}
          >
            {label}
          </span>
          {sublabel && (
            <span className="text-sm text-muted-foreground mt-1">
              {sublabel}
            </span>
          )}
        </label>
      </div>
    </div>
  );

  return (
    <SimpleTooltip tooltip={tooltip} side="top" sideOffset={25}>
      {checkboxContent}
    </SimpleTooltip>
  );
};

export default CheckboxField;


================================================
FILE: web/src/refresh-components/form/PasswordInputTypeInField.tsx
================================================
"use client";

import { useField } from "formik";
import PasswordInputTypeIn, {
  PasswordInputTypeInProps,
} from "@/refresh-components/inputs/PasswordInputTypeIn";
import { useOnChangeEvent, useOnBlurEvent } from "@/hooks/formHooks";

export interface PasswordInputTypeInFieldProps
  extends Omit<PasswordInputTypeInProps, "value"> {
  name: string;
}

export default function PasswordInputTypeInField({
  name,
  onChange: onChangeProp,
  onBlur: onBlurProp,
  ...inputProps
}: PasswordInputTypeInFieldProps) {
  const [field, meta] = useField(name);
  const onChange = useOnChangeEvent(name, onChangeProp);
  const onBlur = useOnBlurEvent(name, onBlurProp);
  const hasError = meta.touched && meta.error;
  const showError = hasError && !inputProps.disabled;

  return (
    <PasswordInputTypeIn
      {...inputProps}
      id={name}
      name={name}
      value={field.value ?? ""}
      onChange={onChange}
      onBlur={onBlur}
      error={showError ? true : inputProps.error}
    />
  );
}


================================================
FILE: web/src/refresh-components/form/SwitchField.tsx
================================================
"use client";

import { useField } from "formik";
import Switch, { SwitchProps } from "@/refresh-components/inputs/Switch";
import { useOnChangeValue } from "@/hooks/formHooks";

interface SwitchFieldProps extends Omit<SwitchProps, "checked"> {
  name: string;
}

export default function SwitchField({
  name,
  onCheckedChange,
  ...props
}: SwitchFieldProps) {
  const [field] = useField<boolean>({ name, type: "checkbox" });
  const onChange = useOnChangeValue(name, onCheckedChange);

  return (
    <Switch
      id={name}
      name={name}
      checked={field.value}
      onCheckedChange={onChange}
      {...props}
    />
  );
}


================================================
FILE: web/src/refresh-components/form/types.ts
================================================
import type React from "react";
export type FormFieldState = "idle" | "success" | "error";
export type APIFormFieldState = FormFieldState | "loading";

export interface FieldContextType {
  baseId: string;
  name?: string;
  required?: boolean;
  state: FormFieldState;
  describedByIds: string[];
}

export type FormFieldRootProps = React.HTMLAttributes<HTMLDivElement> & {
  name?: string;
  state?: FormFieldState;
  required?: boolean;
  id?: string;
};

export type LabelProps = React.HTMLAttributes<HTMLLabelElement> & {
  leftIcon?: React.ReactNode;
  rightIcon?: React.ReactNode;
  optional?: boolean;
  required?: boolean;
  rightAction?: React.ReactNode;
};

export type ControlProps = React.PropsWithChildren<{
  asChild?: boolean;
}>;

export type DescriptionProps = React.HTMLAttributes<HTMLParagraphElement>;
export type MessageByState = Partial<
  Record<FormFieldState, string | React.ReactNode>
>;
export type APIMessageByState = Partial<
  Record<FormFieldState | "loading", string>
>;

export type MessageProps = React.HTMLAttributes<HTMLDivElement> & {
  messages?: MessageByState;
  render?: (state: FormFieldState) => React.ReactNode;
};

export type APIMessageProps = React.HTMLAttributes<HTMLDivElement> & {
  state?: APIFormFieldState;
  messages?: APIMessageByState;
};


================================================
FILE: web/src/refresh-components/inputs/Checkbox.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Checkbox from "./Checkbox";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof Checkbox> = {
  title: "refresh-components/inputs/Checkbox",
  component: Checkbox,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Checkbox>;

export const Default: Story = {
  args: {},
};

export const Checked: Story = {
  args: {
    checked: true,
  },
};

export const Indeterminate: Story = {
  args: {
    indeterminate: true,
  },
};

export const WithLabel: Story = {
  render: () => (
    <div style={{ display: "flex", alignItems: "center", gap: 8 }}>
      <Checkbox id="terms" />
      <label htmlFor="terms" style={{ cursor: "pointer" }}>
        Accept terms and conditions
      </label>
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/inputs/Checkbox.test.tsx
================================================
import React from "react";
import { render, screen, fireEvent } from "@testing-library/react";
import "@testing-library/jest-dom";
import Checkbox from "./Checkbox";

describe("Checkbox", () => {
  describe("Rendering", () => {
    test("renders unchecked by default", () => {
      const { container } = render(<Checkbox />);
      const checkbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;
      expect(checkbox).toHaveAttribute("aria-checked", "false");
      expect(input).not.toBeChecked();
    });

    test("renders checked when checked prop is true", () => {
      const { container } = render(
        <Checkbox checked={true} onCheckedChange={() => {}} />
      );
      const checkbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;
      expect(checkbox).toHaveAttribute("aria-checked", "true");
      expect(input).toBeChecked();
    });

    test("renders unchecked when checked prop is false", () => {
      const { container } = render(
        <Checkbox checked={false} onCheckedChange={() => {}} />
      );
      const checkbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;
      expect(checkbox).toHaveAttribute("aria-checked", "false");
      expect(input).not.toBeChecked();
    });

    test("renders with defaultChecked", () => {
      const { container } = render(<Checkbox defaultChecked={true} />);
      const checkbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;
      expect(checkbox).toHaveAttribute("aria-checked", "true");
      expect(input).toBeChecked();
    });

    test("applies custom className", () => {
      const { container } = render(<Checkbox className="custom-class" />);
      const visualCheckbox = container.querySelector(".custom-class");
      expect(visualCheckbox).toBeInTheDocument();
    });
  });

  describe("Controlled mode", () => {
    test("calls onCheckedChange when clicked", () => {
      const handleChange = jest.fn();
      render(<Checkbox checked={false} onCheckedChange={handleChange} />);
      const visualCheckbox = screen.getByRole("checkbox");
      fireEvent.click(visualCheckbox);
      expect(handleChange).toHaveBeenCalledWith(true);
    });

    test("does not change state when controlled", () => {
      const handleChange = jest.fn();
      const { container } = render(
        <Checkbox checked={false} onCheckedChange={handleChange} />
      );
      const visualCheckbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;
      fireEvent.click(visualCheckbox);
      expect(input).not.toBeChecked(); // Should not change without parent updating prop
    });

    test("updates when checked prop changes", () => {
      const { rerender, container } = render(
        <Checkbox checked={false} onCheckedChange={() => {}} />
      );
      let checkbox = screen.getByRole("checkbox");
      let input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;
      expect(checkbox).toHaveAttribute("aria-checked", "false");
      expect(input).not.toBeChecked();

      rerender(<Checkbox checked={true} onCheckedChange={() => {}} />);
      checkbox = screen.getByRole("checkbox");
      input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;
      expect(checkbox).toHaveAttribute("aria-checked", "true");
      expect(input).toBeChecked();
    });
  });

  describe("Uncontrolled mode", () => {
    test("toggles state and calls onCheckedChange when clicked", () => {
      const handleChange = jest.fn();
      const { container } = render(<Checkbox onCheckedChange={handleChange} />);
      const visualCheckbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;

      expect(visualCheckbox).toHaveAttribute("aria-checked", "false");
      expect(input).not.toBeChecked();
      fireEvent.click(visualCheckbox);
      expect(visualCheckbox).toHaveAttribute("aria-checked", "true");
      expect(input).toBeChecked();
      expect(handleChange).toHaveBeenCalledWith(true);

      fireEvent.click(visualCheckbox);
      expect(visualCheckbox).toHaveAttribute("aria-checked", "false");
      expect(input).not.toBeChecked();
      expect(handleChange).toHaveBeenCalledWith(false);
    });
  });

  describe("Indeterminate state", () => {
    test("sets correct aria-checked values for all states", () => {
      const { rerender, container } = render(
        <Checkbox checked={false} onCheckedChange={() => {}} />
      );
      let checkbox = screen.getByRole("checkbox");
      expect(checkbox).toHaveAttribute("aria-checked", "false");

      rerender(<Checkbox checked={true} onCheckedChange={() => {}} />);
      checkbox = screen.getByRole("checkbox");
      expect(checkbox).toHaveAttribute("aria-checked", "true");

      rerender(<Checkbox indeterminate={true} />);
      checkbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;
      expect(checkbox).toHaveAttribute("aria-checked", "mixed");
      expect(input.indeterminate).toBe(true);
    });
  });

  describe("Disabled state", () => {
    test("sets disabled attribute and prevents interaction", () => {
      const handleChange = jest.fn();
      const { container } = render(
        <Checkbox disabled={true} onCheckedChange={handleChange} />
      );
      const visualCheckbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;

      expect(input).toBeDisabled();
      expect(input).not.toBeChecked();

      fireEvent.click(visualCheckbox);
      expect(input).not.toBeChecked();
      expect(handleChange).not.toHaveBeenCalled();
    });
  });

  describe("Keyboard interaction", () => {
    test("toggles when spacebar is pressed on visual checkbox", () => {
      const { container } = render(<Checkbox />);
      const visualCheckbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;

      visualCheckbox.focus();
      expect(input).not.toBeChecked();

      fireEvent.keyDown(visualCheckbox, { key: " ", code: "Space" });
      expect(input).toBeChecked();
    });

    test("toggles when Enter is pressed on visual checkbox", () => {
      const { container } = render(<Checkbox />);
      const visualCheckbox = screen.getByRole("checkbox");
      const input = container.querySelector(
        'input[type="checkbox"]'
      ) as HTMLInputElement;

      visualCheckbox.focus();
      expect(input).not.toBeChecked();

      fireEvent.keyDown(visualCheckbox, { key: "Enter", code: "Enter" });
      expect(input).toBeChecked();
    });
  });

  describe("onChange handler", () => {
    test("calls onChange when provided", () => {
      const handleChange = jest.fn();
      render(<Checkbox onChange={handleChange} />);
      const checkbox = screen.getByRole("checkbox");

      fireEvent.click(checkbox);
      expect(handleChange).toHaveBeenCalled();
    });

    test("calls both onChange and onCheckedChange", () => {
      const handleChange = jest.fn();
      const handleCheckedChange = jest.fn();
      render(
        <Checkbox
          onChange={handleChange}
          onCheckedChange={handleCheckedChange}
        />
      );
      const checkbox = screen.getByRole("checkbox");

      fireEvent.click(checkbox);
      expect(handleChange).toHaveBeenCalled();
      expect(handleCheckedChange).toHaveBeenCalledWith(true);
    });
  });

  describe("Ref forwarding", () => {
    test("forwards ref to input element", () => {
      const ref = React.createRef<HTMLInputElement>();
      render(<Checkbox ref={ref} />);
      expect(ref.current).toBeInstanceOf(HTMLInputElement);
      expect(ref.current?.type).toBe("checkbox");
    });
  });

  describe("Accessibility", () => {
    test("has role checkbox", () => {
      render(<Checkbox />);
      const checkbox = screen.getByRole("checkbox");
      expect(checkbox).toBeInTheDocument();
    });

    test("supports aria-label", () => {
      render(<Checkbox aria-label="Accept terms" />);
      const checkbox = screen.getByRole("checkbox");
      expect(checkbox).toHaveAttribute("aria-label", "Accept terms");
    });

    test("supports aria-labelledby", () => {
      render(
        <div>
          <span id="checkbox-label">Accept terms</span>
          <Checkbox aria-labelledby="checkbox-label" />
        </div>
      );
      const checkbox = screen.getByRole("checkbox");
      expect(checkbox).toHaveAttribute("aria-labelledby", "checkbox-label");
    });
  });
});


================================================
FILE: web/src/refresh-components/inputs/Checkbox.tsx
================================================
"use client";

import React, { useEffect, useRef, useState } from "react";
import { cn } from "@/lib/utils";
import { SvgCheck, SvgMinus } from "@opal/icons";
const getRootClasses = (checked: boolean, indeterminate: boolean) => ({
  main:
    checked || indeterminate
      ? [
          "bg-action-link-05",
          "hover:bg-action-link-04",
          "focus-visible:border-border-05",
          "focus-visible:focus-shadow",
        ]
      : [
          "bg-background-neutral-00",
          "border",
          "border-border-02",
          "hover:border-border-03",
          "focus-visible:border-border-05",
          "focus-visible:focus-shadow",
        ],
  disabled:
    checked || indeterminate
      ? ["bg-background-neutral-04"]
      : ["bg-background-neutral-03", "border", "border-border-02"],
});

export interface CheckboxProps
  extends Omit<React.ComponentPropsWithoutRef<"input">, "type" | "size"> {
  checked?: boolean;
  defaultChecked?: boolean;
  onCheckedChange?: (checked: boolean) => void;
  indeterminate?: boolean;
}

function CheckboxInner(
  {
    checked: controlledChecked,
    defaultChecked,
    onCheckedChange,
    indeterminate = false,
    disabled,
    className,
    onChange,
    id,
    name,
    "aria-label": ariaLabel,
    "aria-labelledby": ariaLabelledby,
    "aria-describedby": ariaDescribedby,
    ...props
  }: CheckboxProps,
  ref: React.ForwardedRef<HTMLInputElement>
) {
  const [uncontrolledChecked, setUncontrolledChecked] = useState(
    defaultChecked ?? false
  );
  const inputRef = useRef<HTMLInputElement>(null);

  // Merge refs
  useEffect(() => {
    if (ref) {
      if (typeof ref === "function") {
        ref(inputRef.current);
      } else {
        ref.current = inputRef.current;
      }
    }

    // Cleanup: clear ref on unmount
    return () => {
      if (ref) {
        if (typeof ref === "function") {
          ref(null);
        } else {
          ref.current = null;
        }
      }
    };
  }, [ref]);

  const isControlled = controlledChecked !== undefined;
  const checked = isControlled ? controlledChecked : uncontrolledChecked;

  // Set indeterminate state on the DOM element
  useEffect(() => {
    if (inputRef.current) {
      inputRef.current.indeterminate = indeterminate;
    }
  }, [indeterminate]);

  function handleChange(event: React.ChangeEvent<HTMLInputElement>) {
    if (disabled) return;

    const newChecked = event.target.checked;

    if (!isControlled) setUncontrolledChecked(newChecked);
    onChange?.(event);
    onCheckedChange?.(newChecked);
  }

  const variant = disabled ? "disabled" : "main";
  const rootClasses = getRootClasses(checked, indeterminate);

  return (
    <div className="relative inline-flex shrink-0">
      {/*
        Dual-element pattern for custom checkbox:
        1. Hidden input: Maintains form state, enables form submission, supports indeterminate property
        2. Visible div: Provides custom styling, handles user interaction, accessible via role="checkbox"
        The div's click handler triggers the input's native click, preserving standard checkbox behavior.
      */}
      <input
        ref={inputRef}
        id={id}
        type="checkbox"
        role="presentation"
        className="sr-only peer"
        checked={checked}
        disabled={disabled}
        onChange={handleChange}
        name={name}
        {...props}
      />
      <div
        role="checkbox"
        aria-checked={indeterminate ? "mixed" : checked}
        aria-label={ariaLabel}
        aria-labelledby={ariaLabelledby}
        aria-describedby={ariaDescribedby}
        tabIndex={disabled ? -1 : 0}
        className={cn(
          "flex h-4 w-4 shrink-0 items-center justify-center rounded-04 transition-colors",
          disabled ? "cursor-not-allowed" : "cursor-pointer",
          rootClasses[variant],
          className
        )}
        onClick={(e) => {
          if (!disabled && inputRef.current) {
            inputRef.current.click();
            e.preventDefault();
          }
        }}
        onKeyDown={(e) => {
          if (
            !disabled &&
            inputRef.current &&
            (e.key === " " || e.key === "Enter")
          ) {
            e.preventDefault();
            inputRef.current.click();
          }
        }}
      >
        {(checked || indeterminate) && (
          <div>
            {indeterminate ? (
              <SvgMinus className="h-3 w-3 stroke-text-light-05" />
            ) : (
              <SvgCheck className="h-3 w-3 stroke-text-light-05" />
            )}
          </div>
        )}
      </div>
    </div>
  );
}

const Checkbox = React.forwardRef(CheckboxInner);
Checkbox.displayName = "Checkbox";
export default Checkbox;


================================================
FILE: web/src/refresh-components/inputs/InputAvatar.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import InputAvatar from "./InputAvatar";
import * as AvatarPrimitive from "@radix-ui/react-avatar";

const meta: Meta<typeof InputAvatar> = {
  title: "refresh-components/inputs/InputAvatar",
  component: InputAvatar,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <div
        style={{
          width: 320,
          display: "flex",
          justifyContent: "center",
          padding: 24,
        }}
      >
        <Story />
      </div>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputAvatar>;

export const WithImage: Story = {
  render: () => (
    <InputAvatar>
      <AvatarPrimitive.Image
        src="https://picsum.photos/80"
        alt="User avatar"
        className="h-full w-full object-cover"
      />
      <AvatarPrimitive.Fallback className="flex h-full w-full items-center justify-center bg-background-tint-02 text-text-03 text-sm font-medium">
        AB
      </AvatarPrimitive.Fallback>
    </InputAvatar>
  ),
};

export const WithFallback: Story = {
  render: () => (
    <InputAvatar>
      <AvatarPrimitive.Fallback className="flex h-full w-full items-center justify-center bg-background-tint-02 text-text-03 text-sm font-medium">
        JD
      </AvatarPrimitive.Fallback>
    </InputAvatar>
  ),
};

export const Empty: Story = {
  render: () => (
    <InputAvatar>
      <AvatarPrimitive.Fallback className="flex h-full w-full items-center justify-center bg-background-tint-02 text-text-04 text-xs">
        ?
      </AvatarPrimitive.Fallback>
    </InputAvatar>
  ),
};


================================================
FILE: web/src/refresh-components/inputs/InputAvatar.tsx
================================================
"use client";

import * as React from "react";
import * as AvatarPrimitive from "@radix-ui/react-avatar";
import { cn } from "@/lib/utils";
import { wrapperClasses } from "@/refresh-components/inputs/styles";

const InputAvatar = React.forwardRef<
  React.ElementRef<typeof AvatarPrimitive.Root>,
  React.ComponentPropsWithoutRef<typeof AvatarPrimitive.Root>
>(({ className, ...props }, ref) => (
  <AvatarPrimitive.Root
    ref={ref}
    className={cn(
      "relative flex h-10 w-10 shrink-0 overflow-hidden rounded-full",
      wrapperClasses.primary,
      className
    )}
    {...props}
  />
));
InputAvatar.displayName = AvatarPrimitive.Root.displayName;

export default InputAvatar;


================================================
FILE: web/src/refresh-components/inputs/InputChipField.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import InputChipField from "./InputChipField";
import type { ChipItem } from "./InputChipField";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputChipField> = {
  title: "refresh-components/inputs/InputChipField",
  component: InputChipField,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 400 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputChipField>;

export const Default: Story = {
  render: function DefaultStory() {
    const [chips, setChips] = React.useState<ChipItem[]>([]);
    const [value, setValue] = React.useState("");

    return (
      <InputChipField
        chips={chips}
        onRemoveChip={(id) => setChips((c) => c.filter((ch) => ch.id !== id))}
        onAdd={(label) => {
          setChips((c) => [...c, { id: crypto.randomUUID(), label }]);
          setValue("");
        }}
        value={value}
        onChange={setValue}
        placeholder="Type and press Enter..."
      />
    );
  },
};

export const WithChips: Story = {
  render: function WithChipsStory() {
    const [chips, setChips] = React.useState<ChipItem[]>([
      { id: "1", label: "React" },
      { id: "2", label: "TypeScript" },
      { id: "3", label: "Tailwind" },
    ]);
    const [value, setValue] = React.useState("");

    return (
      <InputChipField
        chips={chips}
        onRemoveChip={(id) => setChips((c) => c.filter((ch) => ch.id !== id))}
        onAdd={(label) => {
          setChips((c) => [...c, { id: crypto.randomUUID(), label }]);
          setValue("");
        }}
        value={value}
        onChange={setValue}
        placeholder="Add tags..."
      />
    );
  },
};

export const Disabled: Story = {
  render: () => (
    <InputChipField
      chips={[
        { id: "1", label: "Locked" },
        { id: "2", label: "Tag" },
      ]}
      onRemoveChip={() => {}}
      onAdd={() => {}}
      value=""
      onChange={() => {}}
      placeholder="Disabled"
      disabled
    />
  ),
};

export const ErrorVariant: Story = {
  render: function ErrorStory() {
    const [chips, setChips] = React.useState<ChipItem[]>([
      { id: "1", label: "Invalid" },
    ]);
    const [value, setValue] = React.useState("");

    return (
      <InputChipField
        chips={chips}
        onRemoveChip={(id) => setChips((c) => c.filter((ch) => ch.id !== id))}
        onAdd={(label) => {
          setChips((c) => [...c, { id: crypto.randomUUID(), label }]);
          setValue("");
        }}
        value={value}
        onChange={setValue}
        placeholder="Add labels..."
        variant="error"
      />
    );
  },
};


================================================
FILE: web/src/refresh-components/inputs/InputChipField.tsx
================================================
"use client";

import * as React from "react";
import { cn } from "@/lib/utils";
import Chip from "@/refresh-components/Chip";
import {
  innerClasses,
  textClasses,
  Variants,
  wrapperClasses,
} from "@/refresh-components/inputs/styles";
import { SvgAlertTriangle } from "@opal/icons";
import type { IconProps } from "@opal/types";

export interface ChipItem {
  id: string;
  label: string;
  /** When true the chip shows a warning icon */
  error?: boolean;
}

export interface InputChipFieldProps {
  chips: ChipItem[];
  onRemoveChip: (id: string) => void;
  onAdd: (value: string) => void;

  value: string;
  onChange: (value: string) => void;

  placeholder?: string;
  disabled?: boolean;
  variant?: Variants;
  icon?: React.FunctionComponent<IconProps>;
  className?: string;
  /** "inline" renders chips and input in one row; "stacked" puts chips above the input */
  layout?: "inline" | "stacked";
}

/**
 * A tag/chip input field that renders chips inline alongside a text input.
 *
 * Pressing Enter adds a chip via `onAdd`. Pressing Backspace on an empty
 * input removes the last chip. Each chip has a remove button.
 *
 * @example
 * ```tsx
 * <InputChipField
 *   chips={[{ id: "1", label: "Search" }]}
 *   onRemoveChip={(id) => remove(id)}
 *   onAdd={(value) => add(value)}
 *   value={inputValue}
 *   onChange={setInputValue}
 *   placeholder="Add labels..."
 *   icon={SvgTag}
 * />
 * ```
 */
function InputChipField({
  chips,
  onRemoveChip,
  onAdd,
  value,
  onChange,
  placeholder,
  disabled = false,
  variant = "primary",
  icon: Icon,
  className,
  layout = "inline",
}: InputChipFieldProps) {
  const inputRef = React.useRef<HTMLInputElement>(null);

  function handleKeyDown(e: React.KeyboardEvent<HTMLInputElement>) {
    if (disabled) {
      return;
    }

    if (e.key === "Enter") {
      e.preventDefault();
      e.stopPropagation();
      const trimmed = value.trim();
      if (trimmed) {
        onAdd(trimmed);
      }
    }
    if (e.key === "Backspace" && value === "") {
      const lastChip = chips[chips.length - 1];
      if (lastChip) {
        onRemoveChip(lastChip.id);
      }
    }
  }

  const chipElements =
    chips.length > 0
      ? chips.map((chip) => (
          <Chip
            key={chip.id}
            onRemove={disabled ? undefined : () => onRemoveChip(chip.id)}
            rightIcon={chip.error ? SvgAlertTriangle : undefined}
            error={chip.error}
            smallLabel={layout === "stacked"}
          >
            {chip.label}
          </Chip>
        ))
      : null;

  const inputElement = (
    <>
      {Icon && <Icon size={16} className="text-text-04 shrink-0" />}
      <input
        ref={inputRef}
        type="text"
        disabled={disabled}
        value={value}
        onChange={(e) => onChange(e.target.value)}
        onKeyDown={handleKeyDown}
        placeholder={placeholder}
        className={cn(
          "flex-1 min-w-[80px] h-[1.5rem] bg-transparent p-0.5 focus:outline-none",
          innerClasses[variant],
          textClasses[variant]
        )}
      />
    </>
  );

  return (
    <div
      className={cn(
        "flex p-1.5 rounded-08 cursor-text w-full",
        layout === "stacked"
          ? "flex-col gap-1"
          : "flex-row flex-wrap items-center gap-1",
        wrapperClasses[variant],
        className
      )}
      onClick={() => inputRef.current?.focus()}
    >
      {layout === "stacked" ? (
        <>
          {chipElements && (
            <div className="flex flex-row items-center flex-wrap gap-1">
              {chipElements}
            </div>
          )}
          <div className="flex flex-row items-center gap-1">{inputElement}</div>
        </>
      ) : (
        <>
          {chipElements}
          {inputElement}
        </>
      )}
    </div>
  );
}

export default InputChipField;


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/InputComboBox.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import InputComboBox from "./InputComboBox";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputComboBox> = {
  title: "refresh-components/inputs/InputComboBox",
  component: InputComboBox,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 320 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputComboBox>;

const fruitOptions = [
  { value: "apple", label: "Apple" },
  { value: "banana", label: "Banana" },
  { value: "cherry", label: "Cherry" },
  { value: "dragonfruit", label: "Dragonfruit" },
  { value: "elderberry", label: "Elderberry" },
];

export const Default: Story = {
  render: function DefaultStory() {
    const [value, setValue] = React.useState("");
    return (
      <InputComboBox
        placeholder="Type or select..."
        value={value}
        onChange={(e) => setValue(e.target.value)}
        options={fruitOptions}
      />
    );
  },
};

export const InputModeNoOptions: Story = {
  render: function InputModeStory() {
    const [value, setValue] = React.useState("");
    return (
      <InputComboBox
        placeholder="Type anything..."
        value={value}
        onChange={(e) => setValue(e.target.value)}
      />
    );
  },
};

export const StrictMode: Story = {
  render: function StrictStory() {
    const [value, setValue] = React.useState("");
    return (
      <InputComboBox
        placeholder="Select a fruit (strict)"
        value={value}
        onChange={(e) => setValue(e.target.value)}
        options={fruitOptions}
        strict
      />
    );
  },
};

export const WithPreselectedValue: Story = {
  render: function PreselectedStory() {
    const [value, setValue] = React.useState("cherry");
    return (
      <InputComboBox
        placeholder="Select a fruit"
        value={value}
        onChange={(e) => setValue(e.target.value)}
        onValueChange={setValue}
        options={fruitOptions}
      />
    );
  },
};

export const Disabled: Story = {
  render: () => (
    <InputComboBox
      placeholder="Cannot interact"
      value="banana"
      options={fruitOptions}
      disabled
    />
  ),
};

export const WithSearchIcon: Story = {
  render: function SearchIconStory() {
    const [value, setValue] = React.useState("");
    return (
      <InputComboBox
        placeholder="Search fruits..."
        value={value}
        onChange={(e) => setValue(e.target.value)}
        options={fruitOptions}
        leftSearchIcon
      />
    );
  },
};

export const ErrorState: Story = {
  render: function ErrorStory() {
    const [value, setValue] = React.useState("invalid-value");
    return (
      <InputComboBox
        placeholder="Select a fruit"
        value={value}
        onChange={(e) => setValue(e.target.value)}
        options={fruitOptions}
        isError
      />
    );
  },
};

export const WithOtherOptions: Story = {
  render: function OtherOptionsStory() {
    const [value, setValue] = React.useState("");
    return (
      <InputComboBox
        placeholder="Search or select..."
        value={value}
        onChange={(e) => setValue(e.target.value)}
        options={fruitOptions}
        showOtherOptions
        separatorLabel="Other fruits"
      />
    );
  },
};


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/InputComboBox.test.tsx
================================================
import React from "react";
import { render, screen, fireEvent, waitFor } from "@testing-library/react";
import "@testing-library/jest-dom";
import userEvent from "@testing-library/user-event";
import InputComboBox from "./InputComboBox";

// Mock createPortal for dropdown rendering
jest.mock("react-dom", () => ({
  ...jest.requireActual("react-dom"),
  createPortal: (node: React.ReactNode) => node,
}));

// Mock scrollIntoView which is not available in jsdom
Element.prototype.scrollIntoView = jest.fn();

const mockOptions = [
  { value: "apple", label: "Apple" },
  { value: "banana", label: "Banana" },
  { value: "cherry", label: "Cherry" },
];

const mockOptionsWithDescriptions = [
  { value: "apple", label: "Apple", description: "A red fruit" },
  { value: "banana", label: "Banana", description: "A yellow fruit" },
];

function setupUser() {
  return userEvent.setup({ delay: null });
}

describe("InputComboBox", () => {
  describe("Rendering", () => {
    test("renders with placeholder", () => {
      render(
        <InputComboBox
          placeholder="Select an option"
          value=""
          options={mockOptions}
        />
      );
      const input = screen.getByPlaceholderText("Select an option");
      expect(input).toBeInTheDocument();
    });

    test("renders with initial value", () => {
      render(
        <InputComboBox
          placeholder="Select"
          value="apple"
          options={mockOptions}
        />
      );
      const input = screen.getByDisplayValue("Apple");
      expect(input).toBeInTheDocument();
    });

    test("renders without options (input mode)", () => {
      render(<InputComboBox placeholder="Type here" value="" options={[]} />);
      const input = screen.getByPlaceholderText("Type here");
      expect(input).toBeInTheDocument();
    });

    test("renders disabled state", () => {
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptions}
          disabled
        />
      );
      const input = screen.getByPlaceholderText("Select");
      expect(input).toBeDisabled();
    });

    test("renders with options that have descriptions", () => {
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptionsWithDescriptions}
        />
      );
      const input = screen.getByPlaceholderText("Select");
      fireEvent.focus(input);
      expect(screen.getByText("A red fruit")).toBeInTheDocument();
    });
  });

  describe("Dropdown Behavior", () => {
    test("opens dropdown on focus when options exist", () => {
      render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByPlaceholderText("Select");
      fireEvent.focus(input);
      expect(screen.getByRole("listbox")).toBeInTheDocument();
    });

    test("does not open dropdown on focus when no options", () => {
      render(<InputComboBox placeholder="Select" value="" options={[]} />);
      const input = screen.getByPlaceholderText("Select");
      fireEvent.focus(input);
      expect(screen.queryByRole("listbox")).not.toBeInTheDocument();
    });

    test("closes dropdown on escape", async () => {
      const user = setupUser();
      render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.click(input);
      expect(screen.getByRole("listbox")).toBeInTheDocument();

      await user.keyboard("{Escape}");
      expect(screen.queryByRole("listbox")).not.toBeInTheDocument();
    });

    test("shows all options on focus when a value is already selected", () => {
      render(
        <InputComboBox
          placeholder="Select"
          value="apple"
          options={mockOptions}
        />
      );
      const input = screen.getByDisplayValue("Apple");
      fireEvent.focus(input);

      const options = screen.getAllByRole("option");
      expect(options.length).toBe(3);
    });

    test("closes dropdown on tab", async () => {
      const user = setupUser();
      render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.click(input);
      expect(screen.getByRole("listbox")).toBeInTheDocument();

      await user.tab();
      expect(screen.queryByRole("listbox")).not.toBeInTheDocument();
    });
  });

  describe("Keyboard Navigation", () => {
    test("ArrowDown opens dropdown and highlights first option", async () => {
      const user = setupUser();
      render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.click(input);
      await user.keyboard("{ArrowDown}");

      const listbox = screen.getByRole("listbox");
      expect(listbox).toBeInTheDocument();
    });

    test("ArrowUp moves highlight up through options", async () => {
      const user = setupUser();
      render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.click(input);
      await user.keyboard("{ArrowDown}");
      await user.keyboard("{ArrowDown}");
      await user.keyboard("{ArrowUp}");

      // Highlight should have moved
      expect(screen.getByRole("listbox")).toBeInTheDocument();
    });

    test("Enter selects highlighted option", async () => {
      const handleValueChange = jest.fn();
      const user = setupUser();
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptions}
          onValueChange={handleValueChange}
        />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.click(input);
      await user.keyboard("{ArrowDown}");
      await user.keyboard("{Enter}");

      expect(handleValueChange).toHaveBeenCalledWith("apple");
    });
  });

  describe("Filtering", () => {
    test("filters options based on input value", async () => {
      const user = setupUser();
      render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.type(input, "app");

      // In non-strict mode, searching shows:
      // 1) a create option for the current input and
      // 2) matched options.
      const options = screen.getAllByRole("option");
      expect(options.length).toBe(2);
      expect(screen.getByLabelText('Create "app"')).toBeInTheDocument();
      expect(
        options.some((option) => option.textContent?.includes("Apple"))
      ).toBe(true);
      expect(screen.queryByText("Banana")).not.toBeInTheDocument();
    });

    test("shows 'No options found' when no matches and strict mode", async () => {
      const user = setupUser();
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptions}
          strict
        />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.type(input, "xyz");

      expect(screen.getByText("No options found")).toBeInTheDocument();
    });

    test("shows separator between matched and unmatched options when enabled", async () => {
      const user = setupUser();
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptions}
          separatorLabel="Other fruits"
          showOtherOptions
        />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.type(input, "app");

      expect(screen.getByText("Other fruits")).toBeInTheDocument();
    });
  });

  describe("Selection", () => {
    test("clicking option selects it and closes dropdown", async () => {
      const handleChange = jest.fn();
      const handleValueChange = jest.fn();
      const user = setupUser();
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptions}
          onChange={handleChange}
          onValueChange={handleValueChange}
        />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.click(input);
      const option = screen.getByText("Banana");
      await user.click(option);

      expect(handleChange).toHaveBeenCalled();
      expect(handleValueChange).toHaveBeenCalledWith("banana");
      expect(screen.queryByRole("listbox")).not.toBeInTheDocument();
    });

    test("displays label instead of value when closed", () => {
      render(
        <InputComboBox
          placeholder="Select"
          value="apple"
          options={mockOptions}
        />
      );
      // Should show "Apple" (label) not "apple" (value)
      expect(screen.getByDisplayValue("Apple")).toBeInTheDocument();
    });
  });

  describe("Strict Mode", () => {
    test("strict=true shows error when value not in options", () => {
      render(
        <InputComboBox
          placeholder="Select"
          value="invalid"
          options={mockOptions}
          strict
        />
      );
      expect(
        screen.getByText("Please select a valid option from the list")
      ).toBeInTheDocument();
    });

    test("strict=false allows custom values", () => {
      render(
        <InputComboBox
          placeholder="Select"
          value="custom-value"
          options={mockOptions}
          strict={false}
        />
      );
      expect(
        screen.queryByText("Please select a valid option from the list")
      ).not.toBeInTheDocument();
    });

    test("strict=false shows create option when no matches", async () => {
      const user = setupUser();
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptions}
          strict={false}
        />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.type(input, "newvalue");

      // Should show the create option with the typed value
      expect(screen.getByText("newvalue")).toBeInTheDocument();
    });
  });

  describe("External Error State", () => {
    test("shows error styling when isError is true", () => {
      const { container } = render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptions}
          isError
        />
      );
      // The input should have error styling applied
      expect(container.querySelector("input")).toBeInTheDocument();
    });

    test("does not show internal error when isError is provided", () => {
      render(
        <InputComboBox
          placeholder="Select"
          value="invalid"
          options={mockOptions}
          strict
          isError={false}
        />
      );
      // Internal validation error should not show when isError is explicitly false
      expect(
        screen.queryByText("Please select a valid option from the list")
      ).not.toBeInTheDocument();
    });
  });

  describe("Accessibility", () => {
    test("has correct ARIA attributes", () => {
      render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByRole("combobox");
      expect(input).toHaveAttribute("aria-autocomplete", "list");
      expect(input).toHaveAttribute("aria-expanded", "false");
    });

    test("aria-expanded is true when dropdown is open", () => {
      render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByRole("combobox");
      fireEvent.focus(input);
      expect(input).toHaveAttribute("aria-expanded", "true");
    });

    test("options have role option", () => {
      render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByPlaceholderText("Select");
      fireEvent.focus(input);

      const options = screen.getAllByRole("option");
      expect(options.length).toBe(3);
    });

    test("listbox has correct aria-label", () => {
      render(
        <InputComboBox
          placeholder="Select a fruit"
          value=""
          options={mockOptions}
        />
      );
      const input = screen.getByPlaceholderText("Select a fruit");
      fireEvent.focus(input);

      const listbox = screen.getByRole("listbox");
      expect(listbox).toHaveAttribute("aria-label", "Select a fruit");
    });
  });

  describe("Text Highlighting", () => {
    test("matching text is highlighted in option labels", async () => {
      const user = setupUser();
      const { container } = render(
        <InputComboBox placeholder="Select" value="" options={mockOptions} />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.type(input, "app");

      // Look for the bold/highlighted text
      const boldText = container.querySelector(".font-semibold");
      expect(boldText).toBeInTheDocument();
      expect(boldText?.textContent).toBe("App");
    });
  });

  describe("onChange vs onValueChange", () => {
    test("onChange is called on every keystroke", async () => {
      const handleChange = jest.fn();
      const user = setupUser();
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptions}
          onChange={handleChange}
        />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.type(input, "abc");

      expect(handleChange).toHaveBeenCalledTimes(3);
    });

    test("onValueChange is only called on option select", async () => {
      const handleChange = jest.fn();
      const handleValueChange = jest.fn();
      const user = setupUser();
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={mockOptions}
          onChange={handleChange}
          onValueChange={handleValueChange}
        />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.type(input, "app");
      expect(handleValueChange).not.toHaveBeenCalled();

      // Get the Apple option by role and click it
      const options = screen.getAllByRole("option");
      const appleOption = options.find((opt) => opt.textContent === "Apple");
      expect(appleOption).toBeDefined();
      await user.click(appleOption!);
      expect(handleValueChange).toHaveBeenCalledWith("apple");
    });
  });

  describe("Disabled Options", () => {
    test("disabled options cannot be selected", async () => {
      const handleValueChange = jest.fn();
      const user = setupUser();
      const optionsWithDisabled = [
        { value: "apple", label: "Apple" },
        { value: "banana", label: "Banana", disabled: true },
      ];
      render(
        <InputComboBox
          placeholder="Select"
          value=""
          options={optionsWithDisabled}
          onValueChange={handleValueChange}
        />
      );
      const input = screen.getByPlaceholderText("Select");

      await user.click(input);
      const disabledOption = screen.getByText("Banana");
      await user.click(disabledOption);

      expect(handleValueChange).not.toHaveBeenCalled();
    });
  });
});


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/InputComboBox.tsx
================================================
"use client";

/**
 * InputComboBox - A flexible combo box component that combines input and select functionality
 *
 * Features:
 * - Dual mode: Acts as input when no options, acts as filterable select with options
 * - Automatic filtering based on user input
 * - Strict/non-strict mode: Controls whether only option values are allowed
 * - Built-in validation with inline error display
 * - Full accessibility with ARIA support
 * - Integrates with FormField and form libraries
 * - Based on InputTypeIn with dropdown functionality
 * - **InputSelect API compatible**: Can be used as a drop-in replacement for InputSelect
 *
 * @example Basic Usage - Input Mode (no options)
 * ```tsx
 * const [value, setValue] = useState("");
 *
 * <InputComboBox
 *   placeholder="Enter or select"
 *   value={value}
 *   onChange={(e) => setValue(e.target.value)}
 * />
 * ```
 *
 * @example Select Mode with Filtering
 * ```tsx
 * const options = [
 *   { value: "apple", label: "Apple" },
 *   { value: "banana", label: "Banana" },
 * ];
 *
 * <InputComboBox
 *   placeholder="Select fruit"
 *   value={value}
 *   onChange={(e) => setValue(e.target.value)}
 *   options={options}
 *   strict={true}
 * />
 * ```
 *
 * @example InputSelect-compatible API (drop-in replacement)
 * ```tsx
 * // Works exactly like InputSelect but with filtering capability
 * // onValueChange is only called when user selects from dropdown
 * <InputComboBox
 *   value={model}
 *   onValueChange={(value) => {
 *     setModel(value);
 *     testApiKey(value); // Only called when option is selected
 *   }}
 *   options={modelOptions}
 *   placeholder="Select model"
 *   isError={!!error}
 *   rightSection={<RefreshButton />}
 * />
 * ```
 *
 * @example With FormField Integration
 * ```tsx
 * <FormField state={error ? "error" : "idle"}>
 *   <FormField.Label>Country</FormField.Label>
 *   <FormField.Control asChild>
 *     <InputComboBox
 *       placeholder="Select or type country"
 *       value={country}
 *       onChange={(e) => setCountry(e.target.value)}
 *       options={countryOptions}
 *       strict={false}
 *       onValidationError={setError}
 *     />
 *   </FormField.Control>
 * </FormField>
 * ```
 */

import React, {
  useCallback,
  useContext,
  useMemo,
  useRef,
  useId,
  useEffect,
} from "react";
import {
  useFloating,
  autoUpdate,
  flip,
  offset,
  shift,
  size,
} from "@floating-ui/react-dom";
import { cn, noProp } from "@/lib/utils";
import InputTypeIn from "../InputTypeIn";
import { FieldContext } from "../../form/FieldContext";
import { Button } from "@opal/components";
import { FieldMessage } from "../../messages/FieldMessage";

// Hooks
import {
  useComboBoxState,
  useComboBoxKeyboard,
  useOptionFiltering,
} from "./hooks";
import { useClickOutside } from "@/hooks/useClickOutside";
import { useValidation } from "./utils/validation";
import { buildAriaAttributes } from "./utils/aria";

// Components
import { ComboBoxDropdown } from "./components/ComboBoxDropdown";

// Types
import { InputComboBoxProps, ComboBoxOption } from "./types";
import { SvgChevronDown, SvgChevronUp } from "@opal/icons";
import { WithoutStyles } from "@/types";

const InputComboBox = ({
  value,
  onChange,
  onValueChange,
  options = [],
  strict = false,
  disabled = false,
  placeholder,
  isError: externalIsError,
  onValidationError,
  name,
  leftSearchIcon = false,
  rightSection,
  separatorLabel = "Other options",
  showAddPrefix = false,
  showOtherOptions = false,
  ...rest
}: WithoutStyles<InputComboBoxProps>) => {
  const inputRef = useRef<HTMLInputElement>(null);
  const dropdownRef = useRef<HTMLDivElement>(null);
  const fieldContext = useContext(FieldContext);

  const hasOptions = options.length > 0;

  //State Management Hook
  const {
    isOpen,
    setIsOpen,
    inputValue,
    setInputValue,
    highlightedIndex,
    setHighlightedIndex,
    isKeyboardNav,
    setIsKeyboardNav,
  } = useComboBoxState({ value, options });

  // Filtering Hook
  const { matchedOptions, unmatchedOptions, hasSearchTerm } =
    useOptionFiltering({ options, inputValue });
  const visibleUnmatchedOptions =
    hasSearchTerm && showOtherOptions ? unmatchedOptions : [];

  // Whether to show the create option (always show when typing in non-strict mode)
  const showCreateOption = !strict && hasSearchTerm && inputValue.trim() !== "";

  // Combined list for keyboard navigation (includes create option when shown)
  // Only show matched options when searching (hide unmatched)
  const allVisibleOptions = useMemo(() => {
    const baseOptions = [...matchedOptions, ...visibleUnmatchedOptions];
    if (showCreateOption) {
      // Prepend a synthetic option for the "create new" item
      return [{ value: inputValue, label: inputValue }, ...baseOptions];
    }
    return baseOptions;
  }, [matchedOptions, visibleUnmatchedOptions, showCreateOption, inputValue]);

  // Floating UI for dropdown positioning
  const { refs, floatingStyles } = useFloating({
    open: isOpen,
    placement: "bottom-start",
    middleware: [
      offset(4),
      flip(),
      shift({ padding: 8 }),
      size({
        apply({ rects, elements }) {
          Object.assign(elements.floating.style, {
            width: `${rects.reference.width}px`,
          });
        },
      }),
    ],
    whileElementsMounted: autoUpdate,
  });

  // Check if an option is an exact match
  const isExactMatch = useCallback(
    (option: ComboBoxOption) => {
      const currentValue = (inputValue || value || "").trim().toLowerCase();
      if (!currentValue) return false;

      return (
        option.value.toLowerCase() === currentValue ||
        option.label.toLowerCase() === currentValue
      );
    },
    [inputValue, value]
  );

  // Validation Logic
  const { isValid, errorMessage } = useValidation({
    value,
    options,
    strict,
    externalIsError,
    onValidationError,
  });

  // Sync highlightedIndex with exact match when typing (not keyboard nav)
  useEffect(() => {
    // Skip if keyboard navigating or dropdown closed
    if (isKeyboardNav || !isOpen) return;
    if (!inputValue.trim()) return;

    const exactMatchIndex = allVisibleOptions.findIndex(
      (opt) =>
        opt.value.toLowerCase() === inputValue.trim().toLowerCase() ||
        opt.label.toLowerCase() === inputValue.trim().toLowerCase()
    );

    if (exactMatchIndex >= 0) {
      setHighlightedIndex(exactMatchIndex);
    }
  }, [
    inputValue,
    allVisibleOptions,
    isKeyboardNav,
    isOpen,
    setHighlightedIndex,
  ]);

  // Event Handlers
  const handleInputChange = useCallback(
    (e: React.ChangeEvent<HTMLInputElement>) => {
      const newValue = e.target.value;
      setInputValue(newValue);

      // Only call onChange while typing (for controlled input behavior)
      // onValueChange is only called when selecting from dropdown
      onChange?.(e);

      // Open dropdown when user starts typing and there are options
      if (hasOptions && !isOpen) {
        setIsOpen(true);
      }

      // Auto-highlight first match when typing
      setHighlightedIndex(0);
      setIsKeyboardNav(false); // Reset keyboard navigation mode when typing
    },
    [
      onChange,
      hasOptions,
      isOpen,
      setInputValue,
      setIsOpen,
      setHighlightedIndex,
      setIsKeyboardNav,
    ]
  );

  const handleOptionSelect = useCallback(
    (option: ComboBoxOption) => {
      if (option.disabled) return;

      setInputValue(option.value);

      // Support both onChange (event) and onValueChange (value) patterns
      if (onChange) {
        const syntheticEvent = {
          target: { value: option.value },
          currentTarget: { value: option.value },
          type: "change",
          bubbles: true,
          cancelable: true,
        } as React.ChangeEvent<HTMLInputElement>;
        onChange(syntheticEvent);
      }

      onValueChange?.(option.value);

      setIsOpen(false);
      inputRef.current?.focus();
    },
    [onChange, onValueChange, setInputValue, setIsOpen]
  );

  // Keyboard Navigation Hook
  const { handleKeyDown } = useComboBoxKeyboard({
    isOpen,
    setIsOpen,
    highlightedIndex,
    setHighlightedIndex,
    setIsKeyboardNav,
    allVisibleOptions,
    onSelect: handleOptionSelect,
    hasOptions,
  });

  // Click Outside Hook
  useClickOutside<HTMLElement>(
    [
      inputRef as React.RefObject<HTMLElement>,
      dropdownRef as React.RefObject<HTMLElement>,
    ],
    useCallback(() => {
      setIsOpen(false);
      setIsKeyboardNav(false);
    }, [setIsOpen, setIsKeyboardNav]),
    isOpen
  );

  const handleFocus = useCallback(() => {
    if (hasOptions) {
      setInputValue("");
      setIsOpen(true);
      setHighlightedIndex(-1);
      setIsKeyboardNav(false);
    }
  }, [
    hasOptions,
    setInputValue,
    setIsOpen,
    setHighlightedIndex,
    setIsKeyboardNav,
  ]);

  const toggleDropdown = useCallback(() => {
    if (!disabled && hasOptions) {
      setIsOpen((prev) => {
        const newOpen = !prev;
        if (newOpen) {
          setInputValue("");
          setHighlightedIndex(-1);
        }
        return newOpen;
      });
      inputRef.current?.focus();
    }
  }, [disabled, hasOptions, setIsOpen, setInputValue, setHighlightedIndex]);

  const autoId = useId();
  const fieldId = fieldContext?.baseId || name || `combo-box-${autoId}`;

  // ARIA Attributes Builder
  const ariaProps = buildAriaAttributes({
    hasOptions,
    isOpen,
    isValid,
    highlightedIndex,
    fieldId,
    allVisibleOptions,
    placeholder,
  });

  // Get display label for the current value
  const displayLabel = useMemo(() => {
    // If dropdown is open, show what user is typing
    if (isOpen) return inputValue;

    // When closed, show the matched option label or the value
    if (!value || !hasOptions) return inputValue;
    const option = options.find((opt) => opt.value === value);
    return option ? option.label : inputValue;
  }, [isOpen, inputValue, value, options, hasOptions]);

  return (
    <div ref={refs.setReference} className="relative w-full">
      <>
        <InputTypeIn
          ref={inputRef}
          placeholder={placeholder}
          value={displayLabel}
          onChange={handleInputChange}
          onFocus={handleFocus}
          onKeyDown={handleKeyDown}
          variant={disabled ? "disabled" : !isValid ? "error" : undefined}
          leftSearchIcon={leftSearchIcon}
          showClearButton={false}
          rightSection={
            <>
              {rightSection && (
                <div
                  className="flex items-center"
                  onPointerDown={(e) => {
                    e.stopPropagation();
                  }}
                  onClick={(e) => {
                    e.stopPropagation();
                  }}
                >
                  {rightSection}
                </div>
              )}
              {hasOptions && (
                <Button
                  disabled={disabled}
                  prominence="tertiary"
                  size="sm"
                  onClick={noProp(toggleDropdown)}
                  icon={isOpen ? SvgChevronUp : SvgChevronDown}
                  aria-label={isOpen ? "Close dropdown" : "Open dropdown"}
                  tabIndex={-1}
                  type="button"
                />
              )}
            </>
          }
          {...ariaProps}
          {...rest}
        />

        {/* Dropdown - Rendered in Portal */}
        <ComboBoxDropdown
          ref={dropdownRef}
          isOpen={isOpen}
          disabled={disabled}
          floatingStyles={floatingStyles}
          setFloatingRef={refs.setFloating}
          fieldId={fieldId}
          placeholder={placeholder}
          matchedOptions={matchedOptions}
          unmatchedOptions={visibleUnmatchedOptions}
          hasSearchTerm={hasSearchTerm}
          separatorLabel={separatorLabel}
          value={value}
          highlightedIndex={highlightedIndex}
          onSelect={handleOptionSelect}
          onMouseEnter={(index) => {
            setIsKeyboardNav(false);
            setHighlightedIndex(index);
          }}
          onMouseMove={() => {
            if (isKeyboardNav) {
              setIsKeyboardNav(false);
            }
          }}
          isExactMatch={isExactMatch}
          inputValue={inputValue}
          allowCreate={!strict}
          showCreateOption={showCreateOption}
          showAddPrefix={showAddPrefix}
        />
      </>

      {/* Error message - only show internal error messages when not using external isError */}
      {!isValid && errorMessage && externalIsError === undefined && (
        <FieldMessage variant="error" className="ml-0.5 mt-1">
          <FieldMessage.Content
            id={`${fieldId}-error`}
            role="alert"
            className="ml-0.5"
          >
            {errorMessage}
          </FieldMessage.Content>
        </FieldMessage>
      )}
    </div>
  );
};

export default InputComboBox;


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/components/ComboBoxDropdown.tsx
================================================
import React, { useEffect, forwardRef } from "react";
import { createPortal } from "react-dom";
import { cn } from "@/lib/utils";
import { OptionsList } from "./OptionsList";
import { ComboBoxOption } from "../types";

interface ComboBoxDropdownProps {
  isOpen: boolean;
  disabled: boolean;
  floatingStyles: React.CSSProperties;
  setFloatingRef: (node: HTMLDivElement | null) => void;
  fieldId: string;
  placeholder: string;
  matchedOptions: ComboBoxOption[];
  unmatchedOptions: ComboBoxOption[];
  hasSearchTerm: boolean;
  separatorLabel: string;
  value: string;
  highlightedIndex: number;
  onSelect: (option: ComboBoxOption) => void;
  onMouseEnter: (index: number) => void;
  onMouseMove: () => void;
  isExactMatch: (option: ComboBoxOption) => boolean;
  /** Current input value for creating new option */
  inputValue: string;
  /** Whether to show create option when no exact match */
  allowCreate: boolean;
  /** Whether to show create option (pre-computed by parent) */
  showCreateOption: boolean;
  /** Show "Add" prefix in create option */
  showAddPrefix: boolean;
}

/**
 * Renders the dropdown menu in a portal
 * Handles scroll-into-view for highlighted options
 */
export const ComboBoxDropdown = forwardRef<
  HTMLDivElement,
  ComboBoxDropdownProps
>(
  (
    {
      isOpen,
      disabled,
      floatingStyles,
      setFloatingRef,
      fieldId,
      placeholder,
      matchedOptions,
      unmatchedOptions,
      hasSearchTerm,
      separatorLabel,
      value,
      highlightedIndex,
      onSelect,
      onMouseEnter,
      onMouseMove,
      isExactMatch,
      inputValue,
      allowCreate,
      showCreateOption,
      showAddPrefix,
    },
    ref
  ) => {
    // Scroll highlighted option into view
    useEffect(() => {
      if (
        isOpen &&
        ref &&
        typeof ref !== "function" &&
        ref.current &&
        highlightedIndex >= 0
      ) {
        const highlightedElement = ref.current.querySelector(
          `[data-index="${highlightedIndex}"]`
        );
        if (highlightedElement) {
          highlightedElement.scrollIntoView({
            block: "nearest",
            behavior: "instant",
          });
        }
      }
    }, [highlightedIndex, isOpen, ref]);

    if (!isOpen || disabled || typeof document === "undefined") {
      return null;
    }

    return createPortal(
      <div
        ref={(node) => {
          // Handle both the forwarded ref and the floating ref
          setFloatingRef(node);
          if (typeof ref === "function") {
            ref(node);
          } else if (ref) {
            ref.current = node;
          }
        }}
        id={`${fieldId}-listbox`}
        role="listbox"
        aria-label={placeholder}
        className={cn(
          "z-[10000] bg-background-neutral-00 border border-border-02 rounded-12 shadow-02 max-h-60 overflow-y-auto overflow-x-hidden p-1 pointer-events-auto touch-auto"
        )}
        style={{
          ...floatingStyles,
          // Ensure the dropdown can scroll independently
          overscrollBehavior: "contain",
        }}
        onWheel={(e) => {
          // Prevent event from bubbling to prevent any parent scroll blocking
          e.stopPropagation();
        }}
        onTouchMove={(e) => {
          // Prevent event from bubbling for touch devices
          e.stopPropagation();
        }}
      >
        <OptionsList
          matchedOptions={matchedOptions}
          unmatchedOptions={unmatchedOptions}
          hasSearchTerm={hasSearchTerm}
          separatorLabel={separatorLabel}
          value={value}
          highlightedIndex={highlightedIndex}
          fieldId={fieldId}
          onSelect={onSelect}
          onMouseEnter={onMouseEnter}
          onMouseMove={onMouseMove}
          isExactMatch={isExactMatch}
          inputValue={inputValue}
          allowCreate={allowCreate}
          showCreateOption={showCreateOption}
          showAddPrefix={showAddPrefix}
        />
      </div>,
      document.body
    );
  }
);

ComboBoxDropdown.displayName = "ComboBoxDropdown";


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/components/OptionItem.tsx
================================================
import React from "react";
import { cn } from "@/lib/utils";
import { ComboBoxOption } from "../types";
import { sanitizeOptionId } from "../utils/aria";

interface OptionItemProps {
  option: ComboBoxOption;
  index: number;
  fieldId: string;
  isHighlighted: boolean;
  isSelected: boolean;
  isExact: boolean;
  onSelect: (option: ComboBoxOption) => void;
  onMouseEnter: (index: number) => void;
  onMouseMove: () => void;
  /** Search term to highlight in the label */
  searchTerm: string;
}

/**
 * Escapes special regex characters in a string
 */
const escapeRegex = (str: string) => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");

/**
 * Highlights matching text within a string
 */
const highlightMatch = (text: string, searchTerm: string): React.ReactNode => {
  if (!searchTerm.trim()) return text;

  const regex = new RegExp(`(${escapeRegex(searchTerm)})`, "gi");
  const parts = text.split(regex);

  if (parts.length === 1) return text;

  return parts.map((part, i) =>
    part.toLowerCase() === searchTerm.toLowerCase() ? (
      <span key={i} className="font-semibold">
        {part}
      </span>
    ) : (
      part
    )
  );
};

/**
 * Renders a single option item in the dropdown
 * Memoized to prevent unnecessary re-renders
 */
export const OptionItem = React.memo(
  ({
    option,
    index,
    fieldId,
    isHighlighted,
    isSelected,
    isExact,
    onSelect,
    onMouseEnter,
    onMouseMove,
    searchTerm,
  }: OptionItemProps) => {
    return (
      <div
        id={`${fieldId}-option-${sanitizeOptionId(option.value)}`}
        data-index={index}
        role="option"
        aria-selected={isSelected}
        aria-disabled={option.disabled}
        onClick={(e) => {
          e.stopPropagation();
          onSelect(option);
        }}
        onMouseDown={(e) => {
          e.preventDefault();
        }}
        onMouseEnter={() => onMouseEnter(index)}
        onMouseMove={onMouseMove}
        className={cn(
          "px-3 py-2 cursor-pointer transition-colors",
          "flex flex-col rounded-08",
          isExact && "bg-action-link-01",
          !isExact && isHighlighted && "bg-background-tint-02",
          !isExact && isSelected && "bg-background-tint-02",
          option.disabled &&
            "opacity-50 cursor-not-allowed bg-background-neutral-02",
          !option.disabled && !isExact && "hover:bg-background-tint-02"
        )}
      >
        <span
          className={cn(
            "font-main-ui-action",
            isExact && "text-action-link-05 font-medium",
            !isExact && "text-text-04",
            !isExact && isSelected && "font-medium"
          )}
        >
          {highlightMatch(option.label, searchTerm)}
        </span>
        {option.description && (
          <span
            className={cn(
              "mt-0.5 font-secondary-body",
              isExact ? "text-action-link-04" : "text-text-03"
            )}
          >
            {option.description}
          </span>
        )}
      </div>
    );
  }
);

OptionItem.displayName = "OptionItem";


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/components/OptionsList.tsx
================================================
import React from "react";
import Text from "@/refresh-components/texts/Text";
import { OptionItem } from "./OptionItem";
import { ComboBoxOption } from "../types";
import { cn } from "@/lib/utils";
import { SvgPlus } from "@opal/icons";
import { sanitizeOptionId } from "../utils/aria";

interface OptionsListProps {
  matchedOptions: ComboBoxOption[];
  unmatchedOptions: ComboBoxOption[];
  hasSearchTerm: boolean;
  separatorLabel: string;
  value: string;
  highlightedIndex: number;
  fieldId: string;
  onSelect: (option: ComboBoxOption) => void;
  onMouseEnter: (index: number) => void;
  onMouseMove: () => void;
  isExactMatch: (option: ComboBoxOption) => boolean;
  /** Current input value for creating new option */
  inputValue: string;
  /** Whether to show create option when no exact match */
  allowCreate: boolean;
  /** Whether to show create option (pre-computed by parent) */
  showCreateOption: boolean;
  /** Show "Add" prefix in create option */
  showAddPrefix: boolean;
}

/**
 * Renders the list of options with matched/unmatched sections
 * Includes separator between sections when filtering
 */
export const OptionsList: React.FC<OptionsListProps> = ({
  matchedOptions,
  unmatchedOptions,
  hasSearchTerm,
  separatorLabel,
  value,
  highlightedIndex,
  fieldId,
  onSelect,
  onMouseEnter,
  onMouseMove,
  isExactMatch,
  inputValue,
  allowCreate,
  showCreateOption,
  showAddPrefix,
}) => {
  // Index offset for other options when create option is shown
  const indexOffset = showCreateOption ? 1 : 0;

  if (
    matchedOptions.length === 0 &&
    unmatchedOptions.length === 0 &&
    !showCreateOption
  ) {
    return (
      <div className="px-3 py-2 text-text-02 font-secondary-body">
        No options found
      </div>
    );
  }

  return (
    <>
      {/* Create New Option */}
      {showCreateOption && (
        <div
          id={`${fieldId}-option-${sanitizeOptionId(inputValue)}`}
          data-index={0}
          role="option"
          aria-selected={false}
          aria-label={`${showAddPrefix ? "Add" : "Create"} "${inputValue}"`}
          onClick={(e) => {
            e.stopPropagation();
            onSelect({ value: inputValue, label: inputValue });
          }}
          onMouseDown={(e) => {
            e.preventDefault();
          }}
          onMouseEnter={() => onMouseEnter(0)}
          onMouseMove={onMouseMove}
          className={cn(
            "cursor-pointer transition-colors",
            "flex items-center justify-between rounded-08",
            highlightedIndex === 0 && "bg-background-tint-02",
            "hover:bg-background-tint-02",
            showAddPrefix ? "px-1.5 py-1.5" : "px-3 py-2"
          )}
        >
          <span
            className={cn(
              "font-main-ui-action truncate min-w-0",
              showAddPrefix ? "px-1" : ""
            )}
          >
            {showAddPrefix ? (
              <>
                <span className="text-text-03">Add</span>
                <span className="text-text-04">{` ${inputValue}`}</span>
              </>
            ) : (
              <span className="text-text-04">{inputValue}</span>
            )}
          </span>
          <SvgPlus
            className={cn(
              "w-4 h-4 flex-shrink-0",
              showAddPrefix ? "text-text-04 mx-1" : "text-text-03 ml-2"
            )}
          />
        </div>
      )}

      {/* Separator - show when there are options to display */}
      {separatorLabel &&
        (matchedOptions.length > 0 ||
          (!hasSearchTerm && unmatchedOptions.length > 0)) && (
          <div className="px-3 py-1">
            <Text as="p" text03 secondaryBody>
              {separatorLabel}
            </Text>
          </div>
        )}

      {/* Matched/Filtered Options */}
      {matchedOptions.map((option, idx) => {
        const globalIndex = idx + indexOffset;
        // Only highlight first exact match, not all matches
        const isExact = idx === 0 && isExactMatch(option);
        return (
          <OptionItem
            key={option.value}
            option={option}
            index={globalIndex}
            fieldId={fieldId}
            isHighlighted={globalIndex === highlightedIndex}
            isSelected={value === option.value}
            isExact={isExact}
            onSelect={onSelect}
            onMouseEnter={onMouseEnter}
            onMouseMove={onMouseMove}
            searchTerm={inputValue}
          />
        );
      })}

      {/* Unmatched Options - only show when NOT searching */}
      {!hasSearchTerm &&
        unmatchedOptions.map((option, idx) => {
          const globalIndex = matchedOptions.length + idx + indexOffset;
          const isExact = isExactMatch(option);
          return (
            <OptionItem
              key={option.value}
              option={option}
              index={globalIndex}
              fieldId={fieldId}
              isHighlighted={globalIndex === highlightedIndex}
              isSelected={value === option.value}
              isExact={isExact}
              onSelect={onSelect}
              onMouseEnter={onMouseEnter}
              onMouseMove={onMouseMove}
              searchTerm={inputValue}
            />
          );
        })}
    </>
  );
};


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/hooks.ts
================================================
import { useState, useEffect, useCallback, useMemo, useRef } from "react";
import { ComboBoxOption } from "./types";

// =============================================================================
// HOOK: useComboBoxState
// =============================================================================

interface UseComboBoxStateProps {
  value: string;
  options: ComboBoxOption[];
}

/**
 * Manages the internal state of the ComboBox component
 * Handles state synchronization between external value prop and internal input state
 */
export function useComboBoxState({ value, options }: UseComboBoxStateProps) {
  const [isOpen, setIsOpen] = useState(false);
  const [inputValue, setInputValue] = useState(value);
  const [highlightedIndex, setHighlightedIndex] = useState(-1);
  const [isKeyboardNav, setIsKeyboardNav] = useState(false);
  const prevIsOpenRef = useRef(false);

  // Sync inputValue with the external value prop.
  // When the dropdown is closed, always reflect the controlled value.
  // When the dropdown is open, only sync if the *value prop itself* changes
  // (e.g. parent programmatically updates it), not when inputValue changes
  // (e.g. user clears the field on focus to browse all options).
  useEffect(() => {
    if (!isOpen) {
      setInputValue(value);
    }
  }, [value, isOpen]);

  useEffect(() => {
    if (isOpen) {
      const isExactOptionMatch = options.some((opt) => opt.value === value);
      if (isExactOptionMatch) {
        setInputValue(value);
      }
    }
    // Only react to value prop changes while open, not inputValue changes
  }, [value]);

  // Reset highlight and keyboard nav when closing dropdown
  useEffect(() => {
    if (!isOpen) {
      setHighlightedIndex(-1);
      setIsKeyboardNav(false);
    }
  }, [isOpen]);

  return {
    isOpen,
    setIsOpen,
    inputValue,
    setInputValue,
    highlightedIndex,
    setHighlightedIndex,
    isKeyboardNav,
    setIsKeyboardNav,
  };
}

// =============================================================================
// HOOK: useComboBoxKeyboard
// =============================================================================

interface UseComboBoxKeyboardProps {
  isOpen: boolean;
  setIsOpen: (open: boolean) => void;
  highlightedIndex: number;
  setHighlightedIndex: (index: number | ((prev: number) => number)) => void;
  setIsKeyboardNav: (isKeyboard: boolean) => void;
  allVisibleOptions: ComboBoxOption[];
  onSelect: (option: ComboBoxOption) => void;
  hasOptions: boolean;
}

/**
 * Manages keyboard navigation for the ComboBox
 * Handles arrow keys, Enter, Escape, and Tab
 */
export function useComboBoxKeyboard({
  isOpen,
  setIsOpen,
  highlightedIndex,
  setHighlightedIndex,
  setIsKeyboardNav,
  allVisibleOptions,
  onSelect,
  hasOptions,
}: UseComboBoxKeyboardProps) {
  const handleKeyDown = useCallback(
    (e: React.KeyboardEvent<HTMLInputElement>) => {
      if (!hasOptions) return;

      switch (e.key) {
        case "ArrowDown":
          e.preventDefault();
          setIsKeyboardNav(true); // Mark as keyboard navigation
          if (!isOpen) {
            setIsOpen(true);
            setHighlightedIndex(0);
          } else {
            setHighlightedIndex((prev) => {
              // If no item highlighted yet (-1), start at 0
              if (prev === -1) return 0;
              // Otherwise move down if not at end
              return prev < allVisibleOptions.length - 1 ? prev + 1 : prev;
            });
          }
          break;
        case "ArrowUp":
          e.preventDefault();
          setIsKeyboardNav(true); // Mark as keyboard navigation
          if (isOpen) {
            setHighlightedIndex((prev) => {
              // If at first item or no highlight, don't go further up
              if (prev <= 0) return -1;
              return prev - 1;
            });
          }
          break;
        case "Enter":
          // Always prevent default and stop propagation when dropdown is open
          // to avoid bubbling to parent forms
          if (isOpen) {
            e.preventDefault();
            e.stopPropagation();
            if (highlightedIndex >= 0) {
              const option = allVisibleOptions[highlightedIndex];
              if (option) {
                onSelect(option);
              }
            }
          }
          break;
        case "Escape":
          e.preventDefault();
          setIsOpen(false);
          setIsKeyboardNav(false);
          break;
        case "Tab":
          setIsOpen(false);
          setIsKeyboardNav(false);
          break;
      }
    },
    [
      hasOptions,
      isOpen,
      allVisibleOptions,
      highlightedIndex,
      onSelect,
      setIsOpen,
      setHighlightedIndex,
      setIsKeyboardNav,
    ]
  );

  return { handleKeyDown };
}

// =============================================================================
// HOOK: useOptionFiltering
// =============================================================================

interface UseOptionFilteringProps {
  options: ComboBoxOption[];
  inputValue: string;
}

interface FilterResult {
  matchedOptions: ComboBoxOption[];
  unmatchedOptions: ComboBoxOption[];
  hasSearchTerm: boolean;
}

/**
 * Filters options based on input value
 * Splits options into matched and unmatched for better UX
 */
export function useOptionFiltering({
  options,
  inputValue,
}: UseOptionFilteringProps): FilterResult {
  return useMemo(() => {
    if (!options.length) {
      return { matchedOptions: [], unmatchedOptions: [], hasSearchTerm: false };
    }

    if (!inputValue || !inputValue.trim()) {
      return {
        matchedOptions: options,
        unmatchedOptions: [],
        hasSearchTerm: false,
      };
    }

    const searchTerm = inputValue.toLowerCase().trim();
    const matched: ComboBoxOption[] = [];
    const unmatched: ComboBoxOption[] = [];

    options.forEach((option) => {
      const matchesLabel = option.label.toLowerCase().includes(searchTerm);
      const matchesValue = option.value.toLowerCase().includes(searchTerm);

      if (matchesLabel || matchesValue) {
        matched.push(option);
      } else {
        unmatched.push(option);
      }
    });

    return {
      matchedOptions: matched,
      unmatchedOptions: unmatched,
      hasSearchTerm: true,
    };
  }, [options, inputValue]);
}


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/index.ts
================================================
export { default } from "./InputComboBox";
export type { InputComboBoxProps, ComboBoxOption } from "./types";


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/types.ts
================================================
export type ComboBoxOption = {
  value: string;
  label: string;
  description?: string;
  disabled?: boolean;
};

export interface InputComboBoxProps
  extends Omit<
    React.InputHTMLAttributes<HTMLInputElement>,
    "onChange" | "value"
  > {
  /** Current value */
  value: string;
  /** Change handler (React event style) - Called on every keystroke */
  onChange?: (e: React.ChangeEvent<HTMLInputElement>) => void;
  /** Change handler (direct value style, for InputSelect compatibility) - Only called when option is selected from dropdown */
  onValueChange?: (value: string) => void;
  /** Array of options for select mode */
  options?: ComboBoxOption[];
  /**
   * Strict mode:
   * - true: Only option values allowed (if options exist)
   * - false: User can type anything
   */
  strict?: boolean;
  /** Disabled state */
  disabled?: boolean;
  /** Placeholder text */
  placeholder: string;
  /** External error state (for InputSelect compatibility) - overrides internal validation */
  isError?: boolean;
  /** Callback to handle validation errors - integrates with form libraries */
  onValidationError?: (errorMessage: string | null) => void;
  /** Optional name for the field (for accessibility) */
  name?: string;
  /** Left search icon */
  leftSearchIcon?: boolean;
  /** Right section for custom UI elements (e.g., refresh button) */
  rightSection?: React.ReactNode;
  /** Label for the separator between matched and unmatched options */
  separatorLabel?: string;
  /** Show "Add" prefix in create option (e.g., "Add [value]") */
  showAddPrefix?: boolean;
  /**
   * When true, keep non-matching options visible under a separator while searching.
   * Defaults to false so search results are strictly filtered.
   */
  showOtherOptions?: boolean;
}


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/utils/aria.ts
================================================
import { ComboBoxOption } from "../types";

/**
 * Sanitizes a value for use in HTML element IDs.
 * Encodes characters that are invalid in IDs (spaces, special chars).
 */
export function sanitizeOptionId(value: string): string {
  return `option-${encodeURIComponent(value)}`;
}

interface BuildAriaAttributesProps {
  hasOptions: boolean;
  isOpen: boolean;
  isValid: boolean;
  highlightedIndex: number;
  fieldId: string;
  allVisibleOptions: ComboBoxOption[];
  placeholder: string;
}

/**
 * Builds ARIA attributes for accessibility
 * Ensures proper screen reader support
 */
export function buildAriaAttributes({
  hasOptions,
  isOpen,
  isValid,
  highlightedIndex,
  fieldId,
  allVisibleOptions,
  placeholder,
}: BuildAriaAttributesProps) {
  const activeOption =
    hasOptions && isOpen && highlightedIndex >= 0
      ? allVisibleOptions[highlightedIndex]
      : undefined;

  return {
    "aria-label": placeholder,
    "aria-invalid": !isValid,
    "aria-describedby": !isValid ? `${fieldId}-error` : undefined,
    "aria-expanded": hasOptions ? isOpen : undefined,
    "aria-haspopup": hasOptions ? ("listbox" as const) : undefined,
    "aria-controls": hasOptions ? `${fieldId}-listbox` : undefined,
    "aria-activedescendant": activeOption
      ? `${fieldId}-option-${sanitizeOptionId(activeOption.value)}`
      : undefined,
    "aria-autocomplete": hasOptions ? ("list" as const) : undefined,
    role: hasOptions ? ("combobox" as const) : undefined,
  };
}


================================================
FILE: web/src/refresh-components/inputs/InputComboBox/utils/validation.ts
================================================
import { useMemo, useEffect } from "react";
import { ComboBoxOption } from "../types";

interface UseValidationProps {
  value: string;
  options: ComboBoxOption[];
  strict: boolean;
  externalIsError?: boolean;
  onValidationError?: (errorMessage: string | null) => void;
}

interface ValidationResult {
  isValid: boolean;
  errorMessage: string | null;
}

/**
 * Handles validation logic for the ComboBox
 * Supports both external error state and internal strict mode validation
 * external error state has precedence over internal validation.When we have external error, internal error is
 * not displayed we need to display external error separately
 */
export function useValidation({
  value,
  options,
  strict,
  externalIsError,
  onValidationError,
}: UseValidationProps): ValidationResult {
  const hasOptions = options.length > 0;

  // Validation logic - use external error if provided, otherwise use internal validation
  const { isValid, errorMessage } = useMemo(() => {
    // If external error is provided, use it
    if (externalIsError !== undefined) {
      return { isValid: !externalIsError, errorMessage: null };
    }

    // Otherwise use internal validation
    if (!strict || !hasOptions || !value) {
      return { isValid: true, errorMessage: null };
    }

    // In strict mode with options, value must be one of the option values
    const isValidOption = options.some((opt) => opt.value === value);

    if (!isValidOption) {
      return {
        isValid: false,
        errorMessage: "Please select a valid option from the list",
      };
    }

    return { isValid: true, errorMessage: null };
  }, [externalIsError, strict, hasOptions, value, options]);

  // Notify parent of error state
  useEffect(() => {
    onValidationError?.(errorMessage);
  }, [errorMessage, onValidationError]);

  return { isValid, errorMessage };
}


================================================
FILE: web/src/refresh-components/inputs/InputDatePicker.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import InputDatePicker from "./InputDatePicker";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputDatePicker> = {
  title: "refresh-components/inputs/InputDatePicker",
  component: InputDatePicker,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 320 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputDatePicker>;

export const Default: Story = {
  render: function DefaultStory() {
    const [date, setDate] = React.useState<Date | null>(null);
    return <InputDatePicker selectedDate={date} setSelectedDate={setDate} />;
  },
};

export const WithSelectedDate: Story = {
  render: function SelectedDateStory() {
    const [date, setDate] = React.useState<Date | null>(new Date(2025, 0, 15));
    return <InputDatePicker selectedDate={date} setSelectedDate={setDate} />;
  },
};

export const CustomStartYear: Story = {
  render: function CustomStartYearStory() {
    const [date, setDate] = React.useState<Date | null>(null);
    return (
      <InputDatePicker
        selectedDate={date}
        setSelectedDate={setDate}
        startYear={2020}
      />
    );
  },
};

export const Disabled: Story = {
  render: () => (
    <InputDatePicker
      selectedDate={new Date()}
      setSelectedDate={() => {}}
      disabled
    />
  ),
};


================================================
FILE: web/src/refresh-components/inputs/InputDatePicker.tsx
================================================
"use client";

import { Button } from "@opal/components";
import { isAfterDate, normalizeDate } from "@/lib/dateUtils";
import Calendar from "@/refresh-components/Calendar";
import Popover from "@/refresh-components/Popover";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import { useMemo, useState } from "react";
import { SvgCalendar } from "@opal/icons";
import { Section } from "@/layouts/general-layouts";

export interface InputDatePickerProps {
  name?: string;
  selectedDate: Date | null;
  setSelectedDate: (date: Date | null) => void;
  startYear?: number;
  disabled?: boolean;
  maxDate?: Date;
}

function extractYear(date: Date | null): number {
  return (date ?? new Date()).getFullYear();
}

function clampToMaxDate(date: Date, maxDate?: Date): Date {
  if (!maxDate || !isAfterDate(date, maxDate)) {
    return date;
  }

  return normalizeDate(maxDate);
}

export default function InputDatePicker({
  name,
  selectedDate,
  setSelectedDate,
  startYear = 1970,
  disabled = false,
  maxDate,
}: InputDatePickerProps) {
  const validStartYear = Math.max(startYear, 1970);
  const normalizedMaxDate = useMemo(
    () => (maxDate ? normalizeDate(maxDate) : undefined),
    [maxDate]
  );
  const currYear = Math.max(
    validStartYear,
    extractYear(normalizedMaxDate ?? new Date())
  );
  const years = useMemo(
    () =>
      Array(currYear - validStartYear + 1)
        .fill(currYear)
        .map((year, index) => year - index),
    [currYear, validStartYear]
  );
  const [open, setOpen] = useState(false);
  const [displayedMonth, setDisplayedMonth] = useState<Date>(
    clampToMaxDate(
      selectedDate ?? normalizedMaxDate ?? new Date(),
      normalizedMaxDate
    )
  );

  function handleDateSelection(date: Date) {
    setSelectedDate(date);
    setDisplayedMonth(date);
    setOpen(false);
  }

  return (
    <Popover open={open} onOpenChange={setOpen}>
      <Popover.Trigger asChild id={name} name={name}>
        <Button disabled={disabled} prominence="secondary" icon={SvgCalendar}>
          {selectedDate ? selectedDate.toLocaleDateString() : "Select Date"}
        </Button>
      </Popover.Trigger>
      <Popover.Content>
        <Section padding={0.25}>
          <Section flexDirection="row" gap={0.5}>
            <InputSelect
              value={`${extractYear(displayedMonth)}`}
              onValueChange={(value) => {
                const year = parseInt(value);
                setDisplayedMonth(new Date(year, 0));
              }}
            >
              <InputSelect.Trigger />
              <InputSelect.Content>
                {years.map((year) => (
                  <InputSelect.Item key={year} value={`${year}`}>
                    {`${year}`}
                  </InputSelect.Item>
                ))}
              </InputSelect.Content>
            </InputSelect>
            <Button
              onClick={() => {
                const now = normalizedMaxDate ?? new Date();
                setSelectedDate(now);
                setDisplayedMonth(now);
                setOpen(false);
              }}
            >
              Today
            </Button>
          </Section>
          <Calendar
            mode="single"
            selected={selectedDate ?? undefined}
            onSelect={(date) => {
              if (date) {
                handleDateSelection(date);
              }
            }}
            month={displayedMonth}
            onMonthChange={setDisplayedMonth}
            disabled={
              normalizedMaxDate ? [{ after: normalizedMaxDate }] : undefined
            }
            startMonth={new Date(validStartYear, 0)}
            endMonth={normalizedMaxDate ?? new Date()}
            showOutsideDays={false}
          />
        </Section>
      </Popover.Content>
    </Popover>
  );
}


================================================
FILE: web/src/refresh-components/inputs/InputFile.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import InputFile from "./InputFile";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputFile> = {
  title: "refresh-components/inputs/InputFile",
  component: InputFile,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 400 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputFile>;

export const Default: Story = {
  render: function DefaultStory() {
    const [, setValue] = React.useState("");
    return (
      <InputFile placeholder="Paste or attach a file..." setValue={setValue} />
    );
  },
};

export const WithAcceptFilter: Story = {
  render: function AcceptFilterStory() {
    const [, setValue] = React.useState("");
    return (
      <InputFile
        placeholder="JSON files only..."
        setValue={setValue}
        accept="application/json,.json"
      />
    );
  },
};

export const WithMaxSize: Story = {
  render: function MaxSizeStory() {
    const [, setValue] = React.useState("");
    return (
      <InputFile
        placeholder="Max 100KB..."
        setValue={setValue}
        maxSizeKb={100}
        onFileSizeExceeded={({ file, maxSizeKb }) =>
          alert(`${file.name} exceeds ${maxSizeKb}KB limit`)
        }
      />
    );
  },
};

export const Disabled: Story = {
  render: () => (
    <InputFile placeholder="Cannot upload" setValue={() => {}} disabled />
  ),
};

export const ErrorState: Story = {
  render: function ErrorStory() {
    const [, setValue] = React.useState("");
    return (
      <InputFile placeholder="Required file..." setValue={setValue} error />
    );
  },
};


================================================
FILE: web/src/refresh-components/inputs/InputFile.tsx
================================================
"use client";

import React, { useRef, useState } from "react";
import InputTypeIn, {
  InputTypeInProps,
} from "@/refresh-components/inputs/InputTypeIn";
import { Button } from "@opal/components";
import { noProp } from "@/lib/utils";
import { SvgPaperclip } from "@opal/icons";

export interface InputFileProps
  extends Omit<
    InputTypeInProps,
    "type" | "rightSection" | "value" | "onChange" | "readOnly" | "onClear"
  > {
  /**
   * Whether the input is disabled.
   */
  disabled?: boolean;
  /**
   * Whether the input has an error.
   */
  error?: boolean;
  // Receives the extracted file content (text) or pasted value
  setValue: (value: string) => void;
  // Called when a value is committed via file selection or paste (not on each keystroke)
  onValueSet?: (value: string, source: "file" | "paste") => void;
  // HTML accept attribute e.g. "application/json" or ".txt,.md"
  accept?: string;
  // Maximum allowed file size in kilobytes. If exceeded, file is rejected.
  maxSizeKb?: number;
  // Optional callback when the selected file exceeds max size
  onFileSizeExceeded?: (args: { file: File; maxSizeKb: number }) => void;
}

export default function InputFile({
  setValue,
  onValueSet,
  accept,
  maxSizeKb,
  onFileSizeExceeded,
  disabled,
  error,
  variant,
  placeholder,
  className,
  ...rest
}: InputFileProps) {
  const [displayValue, setDisplayValue] = useState<string>("");
  const [selectedFileName, setSelectedFileName] = useState<string | null>(null);
  const [isFileMode, setIsFileMode] = useState<boolean>(false);
  const fileInputRef = useRef<HTMLInputElement>(null);

  // Derive disabled/readOnly state from either the boolean props or the variant
  const isDisabled = disabled || variant === "disabled";
  const isReadOnly = variant === "readOnly";
  const isNonEditable = isDisabled || isReadOnly;

  function openFilePicker() {
    if (isNonEditable) return;
    fileInputRef.current?.click();
  }

  function handleFileChange(e: React.ChangeEvent<HTMLInputElement>) {
    const file = e.target.files?.[0];
    if (!file) return;

    // Enforce file size limit if provided
    if (typeof maxSizeKb === "number" && maxSizeKb >= 0) {
      const maxBytes = maxSizeKb * 1024;
      if (file.size > maxBytes) {
        onFileSizeExceeded?.({ file, maxSizeKb });
        // Reset file input to allow re-selecting the same file
        e.target.value = "";
        return;
      }
    }

    const reader = new FileReader();
    reader.onload = () => {
      const textContent =
        typeof reader.result === "string" ? reader.result : "";
      setValue(textContent);
      setSelectedFileName(file.name);
      setDisplayValue(file.name);
      setIsFileMode(true);
      onValueSet?.(textContent, "file");
    };
    reader.onerror = () => {
      // Reset state on error
      setSelectedFileName(null);
      setDisplayValue("");
      setIsFileMode(false);
      setValue("");
    };
    reader.readAsText(file);
    // clear the input value to allow re-selecting the same file if needed
    e.target.value = "";
  }

  function handleClear() {
    setSelectedFileName(null);
    setDisplayValue("");
    setIsFileMode(false);
    setValue("");
  }

  function handleChangeWhenTyping(e: React.ChangeEvent<HTMLInputElement>) {
    if (isFileMode) return; // ignore typing when file-mode is active
    const next = e.target.value;
    setDisplayValue(next);
    setValue(next);
  }

  function handlePaste(e: React.ClipboardEvent<HTMLInputElement>) {
    // Don't allow paste when non-editable
    if (isNonEditable) return;
    // Switch to editable mode and use pasted text as the value
    const pastedText = e.clipboardData.getData("text");
    if (!pastedText) return;
    e.preventDefault();
    setIsFileMode(false);
    setSelectedFileName(null);
    setDisplayValue(pastedText);
    setValue(pastedText);
    onValueSet?.(pastedText, "paste");
  }

  const rightSection = (
    <Button
      disabled={isNonEditable}
      icon={SvgPaperclip}
      onClick={noProp(openFilePicker)}
      type="button"
      prominence="tertiary"
      size="sm"
      aria-label="Attach file"
    />
  );

  return (
    <>
      <input
        ref={fileInputRef}
        type="file"
        accept={accept}
        onChange={handleFileChange}
        aria-hidden
        className="hidden"
        tabIndex={-1}
        disabled={isNonEditable}
      />
      <InputTypeIn
        {...rest}
        className={className}
        placeholder={placeholder}
        variant={isDisabled ? "disabled" : error ? "error" : variant}
        value={displayValue}
        onChange={handleChangeWhenTyping}
        onPaste={handlePaste}
        onClear={handleClear}
        readOnly={isFileMode || isReadOnly}
        rightSection={rightSection}
      />
    </>
  );
}


================================================
FILE: web/src/refresh-components/inputs/InputImage.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import InputImage from "./InputImage";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputImage> = {
  title: "refresh-components/inputs/InputImage",
  component: InputImage,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div
          style={{
            width: 320,
            display: "flex",
            justifyContent: "center",
            padding: 24,
          }}
        >
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputImage>;

export const Empty: Story = {
  args: {
    onDrop: () => {},
  },
};

export const WithImage: Story = {
  args: {
    src: "https://picsum.photos/200",
    alt: "Sample image",
    onEdit: () => {},
    onRemove: () => {},
  },
};

export const Disabled: Story = {
  args: {
    disabled: true,
    onDrop: () => {},
  },
};

export const DisabledWithImage: Story = {
  args: {
    src: "https://picsum.photos/200",
    alt: "Cannot edit",
    disabled: true,
  },
};

export const CustomSize: Story = {
  args: {
    size: 80,
    onDrop: () => {},
  },
};

export const LargeSize: Story = {
  args: {
    src: "https://picsum.photos/300",
    alt: "Large avatar",
    size: 160,
    onEdit: () => {},
    onRemove: () => {},
  },
};

export const NoEditOverlay: Story = {
  args: {
    src: "https://picsum.photos/200",
    alt: "No overlay",
    showEditOverlay: false,
    onEdit: () => {},
  },
};


================================================
FILE: web/src/refresh-components/inputs/InputImage.tsx
================================================
"use client";

import { cn, noProp } from "@/lib/utils";
import { SvgPlus, SvgX } from "@opal/icons";
import { Hoverable } from "@opal/core";
import IconButton from "@/refresh-components/buttons/IconButton";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import Text from "@/refresh-components/texts/Text";
import { useImageDropzone } from "@/hooks/useImageDropzone";

type ImageState = "empty" | "withImage" | "dragActive";
type AbledState = "enabled" | "disabled";

const inputImageClasses = {
  container: {
    empty: {
      enabled: [
        "bg-background-neutral-00",
        "hover:bg-background-tint-02",
        "active:bg-background-tint-00",
        "focus-visible:bg-background-tint-01",
        "focus-visible:hover:bg-background-tint-02",
        "border-dashed",
        "hover:border-solid",
        "active:border-solid",
        "border-border-01",
        "hover:border-border-03",
        "active:border-border-05",
        "focus-visible:border-border-05",
      ],
      disabled: [
        "bg-background-neutral-00",
        "border-dashed",
        "border-border-01",
        "opacity-50",
        "cursor-not-allowed",
      ],
    },
    withImage: {
      enabled: [
        "bg-background-neutral-00",
        "hover:bg-background-tint-02",
        "active:bg-background-tint-00",
        "focus-visible:bg-background-tint-01",
        "focus-visible:hover:bg-background-tint-02",
        "border-solid",
        "border-border-01",
        "hover:border-border-03",
        "active:border-border-05",
        "focus-visible:border-border-05",
      ],
      disabled: [
        "bg-background-neutral-00",
        "border-solid",
        "border-border-01",
        "opacity-50",
        "cursor-not-allowed",
      ],
    },
    dragActive: {
      enabled: [
        "bg-background-neutral-00",
        "border-solid",
        "border-2",
        "border-action-link-05",
      ],
      disabled: [
        "bg-background-neutral-00",
        "border-solid",
        "border-2",
        "border-action-link-05",
        "opacity-50",
        "cursor-not-allowed",
      ],
    },
  },
  placeholder: {
    empty: {
      enabled: [
        "stroke-text-02",
        "group-hover:stroke-text-03",
        "group-active:stroke-text-04",
        "group-focus-visible:stroke-text-02",
        "group-focus-visible:group-hover:stroke-text-03",
      ],
      disabled: ["stroke-text-01"],
    },
    withImage: {
      enabled: [],
      disabled: [],
    },
    dragActive: {
      enabled: ["stroke-action-link-05"],
      disabled: ["stroke-action-link-05"],
    },
  },
} as const;

export interface InputImageProps {
  // State control
  disabled?: boolean;

  // Image source
  src?: string;
  alt?: string;

  // Callbacks
  onEdit?: () => void;
  onRemove?: () => void;
  /** Callback when image is dropped onto the component */
  onDrop?: (file: File) => void;
  /** Callback when file is rejected */
  onDropRejected?: (reason: string) => void;

  /** Whether to show the edit overlay on hover (default: true) */
  showEditOverlay?: boolean;

  // Size control
  size?: number;

  className?: string;
}

export default function InputImage({
  disabled = false,
  src,
  alt = "Image",
  onEdit,
  onRemove,
  onDrop,
  onDropRejected,
  showEditOverlay = true,
  size = 120,
  className,
}: InputImageProps) {
  const isInteractive = !disabled && (onEdit || onDrop);
  const hasImage = !!src;

  const { isDragActive, getRootProps, getInputProps, openFilePicker } =
    useImageDropzone({
      onImageAccepted: (file) => {
        onDrop?.(file);
      },
      onImageRejected: (rejections) => {
        const firstRejection = rejections[0];
        const reason = firstRejection?.errors[0]?.message || "File rejected";
        onDropRejected?.(reason);
      },
      disabled: disabled || !onDrop,
    });

  const handleClick = () => {
    if (disabled) return;
    if (onEdit) {
      onEdit();
    } else if (onDrop) {
      openFilePicker();
    }
  };

  // Derive states once
  const imageState: ImageState = isDragActive
    ? "dragActive"
    : hasImage
      ? "withImage"
      : "empty";
  const abled: AbledState = disabled ? "disabled" : "enabled";

  // Single lookup pattern for all classes
  const containerClass = inputImageClasses.container[imageState][abled];
  const placeholderClass = inputImageClasses.placeholder[imageState][abled];

  const dropzoneProps = onDrop ? getRootProps() : {};

  return (
    <Hoverable.Root group="inputImage" widthVariant="fit">
      <div
        className={cn("relative", className)}
        style={{ width: size, height: size }}
        {...dropzoneProps}
      >
        {/* Hidden input for file selection */}
        {onDrop && <input {...getInputProps()} />}

        {/* Main container */}
        <button
          type="button"
          onClick={handleClick}
          disabled={disabled}
          className={cn(
            "group relative w-full h-full rounded-full overflow-hidden",
            "border flex items-center justify-center",
            "transition-all duration-150",
            containerClass
          )}
          aria-label={
            isInteractive
              ? hasImage
                ? "Edit image"
                : "Upload image"
              : undefined
          }
        >
          {/* Content */}
          {hasImage ? (
            <img
              src={src}
              alt={alt}
              className="absolute inset-0 w-full h-full object-cover pointer-events-none"
            />
          ) : (
            <SvgPlus
              className={cn("w-6 h-6", placeholderClass, "pointer-events-none")}
            />
          )}

          {/* Drag overlay indicator */}
          {isDragActive && (
            <div className="absolute inset-0 bg-action-link-05/10 flex items-center justify-center rounded-full pointer-events-none">
              <SvgPlus className="w-8 h-8 stroke-action-link-05" />
            </div>
          )}

          {/* Edit overlay - shows on hover/focus when image is uploaded */}
          {showEditOverlay && isInteractive && hasImage && !isDragActive && (
            <div className="absolute bottom-0 left-0 right-0 pointer-events-none">
              <Hoverable.Item group="inputImage" variant="opacity-on-hover">
                <div
                  className={cn(
                    "flex items-center justify-center",
                    "pb-2.5 pt-1.5",
                    "backdrop-blur-sm bg-mask-01",
                    "pointer-events-none"
                  )}
                >
                  <div className="pointer-events-auto">
                    <SimpleTooltip tooltip="Edit" side="top">
                      <div
                        className={cn(
                          "flex items-center justify-center",
                          "px-1 py-0.5 rounded-08"
                        )}
                      >
                        <Text
                          className="text-text-03 font-secondary-action"
                          style={{ fontSize: "12px", lineHeight: "16px" }}
                        >
                          Edit
                        </Text>
                      </div>
                    </SimpleTooltip>
                  </div>
                </div>
              </Hoverable.Item>
            </div>
          )}
        </button>

        {/* Remove button - top left corner (only when image is uploaded) */}
        {isInteractive && hasImage && onRemove && (
          <div className="absolute top-1 left-1">
            <Hoverable.Item group="inputImage" variant="opacity-on-hover">
              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
              <IconButton
                icon={SvgX}
                onClick={noProp(onRemove)}
                type="button"
                primary
                className="!w-5 !h-5 !p-0.5 !rounded-04"
                aria-label="Remove image"
              />
            </Hoverable.Item>
          </div>
        )}
      </div>
    </Hoverable.Root>
  );
}


================================================
FILE: web/src/refresh-components/inputs/InputKeyValue.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import KeyValueInput from "./InputKeyValue";
import type { KeyValue } from "./InputKeyValue";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof KeyValueInput> = {
  title: "refresh-components/inputs/InputKeyValue",
  component: KeyValueInput,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 400 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof KeyValueInput>;

export const Default: Story = {
  render: function DefaultStory() {
    const [items, setItems] = React.useState<KeyValue[]>([
      { key: "", value: "" },
    ]);
    return (
      <KeyValueInput
        keyTitle="Key"
        valueTitle="Value"
        items={items}
        onChange={setItems}
      />
    );
  },
};

export const WithValues: Story = {
  render: function WithValuesStory() {
    const [items, setItems] = React.useState<KeyValue[]>([
      { key: "API_KEY", value: "sk-abc123" },
      { key: "BASE_URL", value: "https://api.example.com" },
    ]);
    return (
      <KeyValueInput
        keyTitle="Variable Name"
        valueTitle="Value"
        items={items}
        onChange={setItems}
      />
    );
  },
};

export const FixedLineMode: Story = {
  render: function FixedLineStory() {
    const [items, setItems] = React.useState<KeyValue[]>([
      { key: "Content-Type", value: "application/json" },
    ]);
    return (
      <KeyValueInput
        keyTitle="Header"
        valueTitle="Value"
        items={items}
        onChange={setItems}
        mode="fixed-line"
      />
    );
  },
};

export const KeyWideLayout: Story = {
  render: function KeyWideStory() {
    const [items, setItems] = React.useState<KeyValue[]>([
      { key: "Authorization", value: "Bearer token" },
    ]);
    return (
      <KeyValueInput
        keyTitle="Header"
        valueTitle="Value"
        items={items}
        onChange={setItems}
        layout="key-wide"
      />
    );
  },
};

export const Disabled: Story = {
  render: () => (
    <KeyValueInput
      keyTitle="Key"
      valueTitle="Value"
      items={[{ key: "LOCKED", value: "cannot-edit" }]}
      onChange={() => {}}
      disabled
    />
  ),
};

export const EmptyLineMode: Story = {
  render: function EmptyStory() {
    const [items, setItems] = React.useState<KeyValue[]>([]);
    return (
      <KeyValueInput
        keyTitle="Key"
        valueTitle="Value"
        items={items}
        onChange={setItems}
        mode="line"
      />
    );
  },
};

export const CustomAddLabel: Story = {
  render: function CustomLabelStory() {
    const [items, setItems] = React.useState<KeyValue[]>([
      { key: "", value: "" },
    ]);
    return (
      <KeyValueInput
        keyTitle="Name"
        valueTitle="Endpoint"
        items={items}
        onChange={setItems}
        addButtonLabel="Add Endpoint"
      />
    );
  },
};


================================================
FILE: web/src/refresh-components/inputs/InputKeyValue.tsx
================================================
"use client";

/**
 * KeyValueInput - A comprehensive key-value pair input component
 *
 * Features:
 * - Two modes: 'line' (can remove all) and 'fixed-line' (minimum 1 item)
 * - Built-in validation for duplicate keys and empty keys
 * - Full accessibility with ARIA support
 * - Integrates with Formik, FormField, and custom form libraries
 * - Inline error display with danger-colored borders
 *
 * @example Basic Usage
 * ```tsx
 * const [items, setItems] = useState([{ key: "API_KEY", value: "value" }]);
 *
 * <KeyValueInput
 *   keyTitle="Variable Name"
 *   valueTitle="Value"
 *   items={items}
 *   onChange={setItems}
 *   mode="line"
 * />
 * ```
 *
 * @example With Formik Integration
 * ```tsx
 * <Formik initialValues={{ envVars: [] }}>
 *   {({ values, setFieldValue, setFieldError }) => (
 *     <FormField state={errors.envVars ? "error" : "idle"}>
 *       <FormField.Label>Environment Variables</FormField.Label>
 *       <FormField.Control asChild>
 *         <KeyValueInput
 *           keyTitle="Variable Name"
 *           valueTitle="Value"
 *           items={values.envVars}
 *           onChange={(items) => setFieldValue("envVars", items)}
 *           onValidationError={(error) => {
 *             if (error) {
 *               setFieldError("envVars", error);
 *             } else {
 *               setFieldError("envVars", undefined);
 *             }
 *           }}
 *         />
 *       </FormField.Control>
 *     </FormField>
 *   )}
 * </Formik>
 * ```
 *
 * @example With Local Error State
 * ```tsx
 * const [error, setError] = useState<string | null>(null);
 *
 * <FormField state={error ? "error" : "idle"}>
 *   <FormField.Label>Headers</FormField.Label>
 *   <FormField.Control asChild>
 *     <KeyValueInput
 *       keyTitle="Header"
 *       valueTitle="Value"
 *       items={headers}
 *       onChange={setHeaders}
 *       onValidationError={setError}
 *     />
 *   </FormField.Control>
 * </FormField>
 * ```
 */

import React, {
  useCallback,
  useContext,
  useEffect,
  useMemo,
  useId,
  useRef,
} from "react";
import { cn } from "@/lib/utils";
import InputTypeIn from "./InputTypeIn";
import { Button, EmptyMessageCard } from "@opal/components";
import type { WithoutStyles } from "@opal/types";
import Text from "@/refresh-components/texts/Text";
import { FieldContext } from "../form/FieldContext";
import { FieldMessage } from "../messages/FieldMessage";
import { SvgMinusCircle, SvgPlusCircle } from "@opal/icons";

export type KeyValue = { key: string; value: string };

type KeyValueError = {
  key?: string;
  value?: string;
};

/*
 * CSS Grid is used instead of flexbox so that the key column, value column,
 * and remove button stay perfectly aligned across every row — including the
 * header titles. With flex + width restrictions each row is laid out
 * independently, so columns can drift when content (e.g. validation errors)
 * causes one cell to grow. Grid's shared column tracks prevent that.
 */
const GRID_COLS = {
  equal: "grid-cols-[1fr_1fr_2.25rem]",
  "key-wide": "grid-cols-[3fr_2fr_2.25rem]",
} as const;

interface KeyValueInputItemProps {
  item: KeyValue;
  onChange: (next: KeyValue) => void;
  disabled?: boolean;
  onRemove: () => void;
  keyPlaceholder?: string;
  valuePlaceholder?: string;
  error?: KeyValueError;
  canRemove: boolean;
  index: number;
  fieldId: string;
}

function KeyValueInputItem({
  item,
  onChange,
  disabled,
  onRemove,
  keyPlaceholder,
  valuePlaceholder,
  error,
  canRemove,
  index,
  fieldId,
}: KeyValueInputItemProps) {
  return (
    <>
      <div className="flex flex-col gap-y-0.5">
        <InputTypeIn
          placeholder={keyPlaceholder || "Key"}
          value={item.key}
          onChange={(e) => onChange({ ...item, key: e.target.value })}
          aria-label={`${keyPlaceholder || "Key"} ${index + 1}`}
          aria-invalid={!!error?.key}
          aria-describedby={
            error?.key ? `${fieldId}-key-error-${index}` : undefined
          }
          variant={disabled ? "disabled" : undefined}
          showClearButton={false}
        />
        {error?.key && (
          <FieldMessage variant="error" className="ml-0.5">
            <FieldMessage.Content
              id={`${fieldId}-key-error-${index}`}
              role="alert"
              className="ml-0.5"
            >
              {error.key}
            </FieldMessage.Content>
          </FieldMessage>
        )}
      </div>
      <div className="flex flex-col gap-y-0.5">
        <InputTypeIn
          placeholder={valuePlaceholder || "Value"}
          value={item.value}
          onChange={(e) => onChange({ ...item, value: e.target.value })}
          aria-label={`${valuePlaceholder || "Value"} ${index + 1}`}
          aria-invalid={!!error?.value}
          aria-describedby={
            error?.value ? `${fieldId}-value-error-${index}` : undefined
          }
          variant={disabled ? "disabled" : undefined}
          showClearButton={false}
        />
        {error?.value && (
          <FieldMessage variant="error" className="ml-0.5">
            <FieldMessage.Content
              id={`${fieldId}-value-error-${index}`}
              role="alert"
              className="ml-0.5"
            >
              {error.value}
            </FieldMessage.Content>
          </FieldMessage>
        )}
      </div>
      <Button
        disabled={disabled || !canRemove}
        prominence="tertiary"
        icon={SvgMinusCircle}
        onClick={onRemove}
        aria-label={`Remove ${keyPlaceholder || "key-value"} pair ${index + 1}`}
      />
    </>
  );
}

export interface KeyValueInputProps
  extends WithoutStyles<
    Omit<React.HTMLAttributes<HTMLDivElement>, "onChange">
  > {
  /** Title for the key column */
  keyTitle?: string;
  /** Title for the value column */
  valueTitle?: string;
  /** Array of key-value pairs */
  items: KeyValue[];
  /** Callback when items change */
  onChange: (nextItems: KeyValue[]) => void;
  /** Custom add handler */
  onAdd?: () => void;
  /** Custom remove handler */
  onRemove?: (index: number) => void;
  /** Disabled state */
  disabled?: boolean;
  /** Mode: 'line' allows removing all items, 'fixed-line' requires at least one item */
  mode?: "line" | "fixed-line";
  /** Layout: 'equal' - both inputs same width, 'key-wide' - key input is wider (60/40 split) */
  layout?: "equal" | "key-wide";
  /** Callback when validation state changes */
  onValidationChange?: (isValid: boolean, errors: KeyValueError[]) => void;
  /** Callback to handle validation errors - integrates with Formik or custom error handling. Called with error message when invalid, null when valid */
  onValidationError?: (errorMessage: string | null) => void;
  /** Optional custom validator for the key field. Return { isValid, message } */
  onKeyValidate?: (
    key: string,
    index: number,
    item: KeyValue,
    items: KeyValue[]
  ) => { isValid: boolean; message?: string };
  /** Optional custom validator for the value field. Return { isValid, message } */
  onValueValidate?: (
    value: string,
    index: number,
    item: KeyValue,
    items: KeyValue[]
  ) => { isValid: boolean; message?: string };
  /** Whether to validate for duplicate keys */
  validateDuplicateKeys?: boolean;
  /** Whether to validate for empty keys */
  validateEmptyKeys?: boolean;
  /** Optional name for the field (for accessibility) */
  name?: string;
  /** Custom label for the add button (defaults to "Add Line") */
  addButtonLabel?: string;
}

export default function KeyValueInput({
  keyTitle = "Key",
  valueTitle = "Value",
  items = [],
  onChange,
  onAdd,
  onRemove,
  disabled = false,
  mode = "line",
  layout = "equal",
  onValidationChange,
  onValidationError,
  onKeyValidate,
  onValueValidate,
  validateDuplicateKeys = true,
  validateEmptyKeys = true,
  name,
  addButtonLabel = "Add Line",
  ...rest
}: KeyValueInputProps) {
  // Try to get field context if used within FormField (safe access)
  const fieldContext = useContext(FieldContext);

  // Validation logic
  const errors = useMemo((): KeyValueError[] => {
    if (!items || items.length === 0) return [];

    const errorsList: KeyValueError[] = items.map(() => ({}));
    const keyCount = new Map<string, number[]>();

    items.forEach((item, index) => {
      // Validate empty keys - only if value is filled (user is actively working on this row)
      if (
        validateEmptyKeys &&
        item.key.trim() === "" &&
        item.value.trim() !== ""
      ) {
        const error = errorsList[index];
        if (error) {
          error.key = "Key cannot be empty";
        }
      }

      // Track key occurrences for duplicate validation
      if (item.key.trim() !== "") {
        const existing = keyCount.get(item.key) || [];
        existing.push(index);
        keyCount.set(item.key, existing);
      }

      // Custom key validation
      if (onKeyValidate) {
        const result = onKeyValidate(item.key, index, item, items);
        if (result && result.isValid === false) {
          const error = errorsList[index];
          if (error) {
            error.key = result.message || "Invalid key";
          }
        }
      }

      // Custom value validation
      if (onValueValidate) {
        const result = onValueValidate(item.value, index, item, items);
        if (result && result.isValid === false) {
          const error = errorsList[index];
          if (error) {
            error.value = result.message || "Invalid value";
          }
        }
      }
    });

    // Validate duplicate keys
    if (validateDuplicateKeys) {
      keyCount.forEach((indices, key) => {
        if (indices.length > 1) {
          indices.forEach((index) => {
            const error = errorsList[index];
            if (error) {
              error.key = "Duplicate key";
            }
          });
        }
      });
    }

    return errorsList;
  }, [
    items,
    validateDuplicateKeys,
    validateEmptyKeys,
    onKeyValidate,
    onValueValidate,
  ]);

  const isValid = useMemo(() => {
    return errors.every((error) => !error.key && !error.value);
  }, [errors]);

  const hasAnyError = useMemo(() => {
    return errors.some((error) => error.key || error.value);
  }, [errors]);

  // Generate error message for external form libraries (Formik, etc.)
  const errorMessage = useMemo(() => {
    if (!hasAnyError) return null;

    const errorCount = errors.filter((e) => e.key || e.value).length;
    const duplicateCount = errors.filter(
      (e) => e.key === "Duplicate key"
    ).length;
    const emptyCount = errors.filter(
      (e) => e.key === "Key cannot be empty"
    ).length;

    if (duplicateCount > 0) {
      return `${duplicateCount} duplicate ${
        duplicateCount === 1 ? "key" : "keys"
      } found`;
    } else if (emptyCount > 0) {
      return `${emptyCount} empty ${emptyCount === 1 ? "key" : "keys"} found`;
    }
    return `${errorCount} validation ${
      errorCount === 1 ? "error" : "errors"
    } found`;
  }, [hasAnyError, errors]);

  // Notify parent of validation changes
  const onValidationChangeRef = useRef(onValidationChange);
  const onValidationErrorRef = useRef(onValidationError);

  useEffect(() => {
    onValidationChangeRef.current = onValidationChange;
  }, [onValidationChange]);

  useEffect(() => {
    onValidationErrorRef.current = onValidationError;
  }, [onValidationError]);

  useEffect(() => {
    onValidationChangeRef.current?.(isValid, errors);
  }, [isValid, errors]);

  // Notify parent of error state for form library integration
  useEffect(() => {
    onValidationErrorRef.current?.(errorMessage);
  }, [errorMessage]);

  const canRemoveItems = mode === "line" || items.length > 1;

  const handleAdd = useCallback(() => {
    if (onAdd) {
      onAdd();
      return;
    }
    onChange([...(items || []), { key: "", value: "" }]);
  }, [onAdd, onChange, items]);

  const handleRemove = useCallback(
    (index: number) => {
      if (!canRemoveItems && items.length === 1) return;

      if (onRemove) {
        onRemove(index);
        return;
      }
      const next = (items || []).filter((_, i) => i !== index);
      onChange(next);
    },
    [canRemoveItems, items, onRemove, onChange]
  );

  const handleItemChange = useCallback(
    (index: number, nextItem: KeyValue) => {
      const next = [...(items || [])];
      next[index] = nextItem;
      onChange(next);
    },
    [items, onChange]
  );

  // Initialize with at least one item for fixed-line mode
  useEffect(() => {
    if (mode === "fixed-line" && (!items || items.length === 0)) {
      onChange([{ key: "", value: "" }]);
    }
  }, [mode]); // Only run on mode change

  const autoId = useId();
  const fieldId = fieldContext?.baseId || name || `key-value-input-${autoId}`;
  const gridCols = GRID_COLS[layout];

  return (
    <div
      className="w-full flex flex-col gap-y-2"
      role="group"
      aria-label={`${keyTitle} and ${valueTitle} pairs`}
      {...rest}
    >
      {items && items.length > 0 ? (
        <div className={cn("grid items-start gap-1", gridCols)}>
          {/*
            # NOTE (@raunakab)
            We add this space below the "title"-row to add some breathing room between the titles and the key-value items.
            Since we're using a `grid` template, the padding below *one* item in a row applies additional height to *all* items in that row.
          */}
          <div className="pb-1">
            <Text mainUiAction>{keyTitle}</Text>
          </div>
          <Text mainUiAction>{valueTitle}</Text>
          <div aria-hidden />

          {items.map((item, index) => (
            <KeyValueInputItem
              key={index}
              item={item}
              onChange={(next) => handleItemChange(index, next)}
              disabled={disabled}
              onRemove={() => handleRemove(index)}
              keyPlaceholder={keyTitle}
              valuePlaceholder={valueTitle}
              error={errors[index]}
              canRemove={canRemoveItems}
              index={index}
              fieldId={fieldId}
            />
          ))}
        </div>
      ) : (
        <EmptyMessageCard title="No items added yet." />
      )}

      <Button
        disabled={disabled}
        prominence="secondary"
        onClick={handleAdd}
        icon={SvgPlusCircle}
        aria-label={`Add ${keyTitle} and ${valueTitle} pair`}
        type="button"
      >
        {addButtonLabel}
      </Button>
    </div>
  );
}


================================================
FILE: web/src/refresh-components/inputs/InputNumber.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import InputNumber from "./InputNumber";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputNumber> = {
  title: "refresh-components/inputs/InputNumber",
  component: InputNumber,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 200 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputNumber>;

export const Default: Story = {
  args: {
    value: 5,
    onChange: () => {},
  },
};

export const WithMinMax: Story = {
  args: {
    value: 50,
    onChange: () => {},
    min: 0,
    max: 100,
  },
};

export const WithReset: Story = {
  args: {
    value: 42,
    onChange: () => {},
    showReset: true,
    defaultValue: 10,
  },
};


================================================
FILE: web/src/refresh-components/inputs/InputNumber.tsx
================================================
"use client";

import * as React from "react";
import { cn } from "@/lib/utils";
import { Button } from "@opal/components";
import {
  Variants,
  wrapperClasses,
  innerClasses,
  textClasses,
} from "@/refresh-components/inputs/styles";
import { SvgChevronUp, SvgChevronDown, SvgRevert } from "@opal/icons";

/**
 * InputNumber Component
 *
 * A number input with increment/decrement stepper buttons and optional reset.
 *
 * @example
 * ```tsx
 * // Basic usage
 * <InputNumber
 *   value={count}
 *   onChange={setCount}
 *   min={0}
 *   max={100}
 * />
 *
 * // With reset button
 * <InputNumber
 *   value={count}
 *   onChange={setCount}
 *   defaultValue={10}
 *   showReset
 * />
 *
 * // With step
 * <InputNumber
 *   value={count}
 *   onChange={setCount}
 *   step={5}
 * />
 * ```
 */
export interface InputNumberProps {
  value: number | null;
  onChange: (value: number | null) => void;
  min?: number;
  max?: number;
  step?: number;
  defaultValue?: number;
  showReset?: boolean;
  variant?: Variants;
  disabled?: boolean;
  className?: string;
  placeholder?: string;
}

export default function InputNumber({
  value,
  onChange,
  min,
  max,
  step = 1,
  defaultValue,
  showReset = false,
  variant = "primary",
  disabled = false,
  className,
  placeholder,
}: InputNumberProps) {
  const inputRef = React.useRef<HTMLInputElement | null>(null);
  const [inputValue, setInputValue] = React.useState(
    value === null ? "" : String(value)
  );
  const isDisabled = disabled || variant === "disabled";

  // Sync input value when external value changes (e.g., from stepper buttons or reset)
  React.useEffect(() => {
    setInputValue(value === null ? "" : String(value));
  }, [value]);

  const effectiveValue = value ?? 0;
  const canIncrement = max === undefined || effectiveValue < max;
  const canDecrement =
    value !== null && (min === undefined || effectiveValue > min);
  const canReset =
    showReset && defaultValue !== undefined && value !== defaultValue;

  const handleIncrement = () => {
    if (canIncrement) {
      const newValue = effectiveValue + step;
      onChange(max !== undefined ? Math.min(newValue, max) : newValue);
    }
  };

  const handleDecrement = () => {
    if (canDecrement) {
      const newValue = effectiveValue - step;
      onChange(min !== undefined ? Math.max(newValue, min) : newValue);
    }
  };

  const handleReset = () => {
    if (defaultValue !== undefined) {
      onChange(defaultValue);
    }
  };

  const handleBlur = () => {
    // On blur, if empty, keep as null so placeholder shows
    if (inputValue.trim() === "") {
      onChange(null);
    } else {
      setInputValue(value === null ? "" : String(value));
    }
  };

  const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    const rawValue = e.target.value;

    // Only allow digits (and empty string)
    if (rawValue !== "" && !/^\d+$/.test(rawValue)) {
      return;
    }

    setInputValue(rawValue);

    // Allow empty input while typing (fallback applied on blur)
    if (rawValue === "") {
      return;
    }

    const val = parseInt(rawValue, 10);
    let newValue = val;
    if (min !== undefined) newValue = Math.max(newValue, min);
    if (max !== undefined) newValue = Math.min(newValue, max);
    onChange(newValue);
  };

  return (
    <div
      className={cn(
        "flex flex-row items-center justify-between w-full h-fit pr-1.5 pl-1.5 rounded-08",
        wrapperClasses[variant],
        className
      )}
      onClick={() => inputRef.current?.focus()}
    >
      <input
        ref={inputRef}
        type="text"
        inputMode="numeric"
        pattern="[0-9]*"
        disabled={isDisabled}
        value={inputValue}
        placeholder={placeholder}
        onChange={handleInputChange}
        onBlur={handleBlur}
        className={cn(
          "w-full h-[1.5rem] bg-transparent p-0.5 focus:outline-none",
          innerClasses[variant],
          textClasses[variant]
        )}
      />

      <div className="flex flex-row items-center gap-1">
        {showReset && (
          <Button
            disabled={!canReset || isDisabled}
            icon={SvgRevert}
            onClick={handleReset}
            prominence="tertiary"
          />
        )}
        <div className="flex flex-col">
          <button
            type="button"
            onClick={handleIncrement}
            disabled={!canIncrement || isDisabled}
            className="p-0.5 text-text-03 hover:text-text-04 disabled:text-text-02 disabled:cursor-not-allowed transition-colors"
          >
            <SvgChevronUp size={14} />
          </button>
          <button
            type="button"
            onClick={handleDecrement}
            disabled={!canDecrement || isDisabled}
            className="p-0.5 text-text-03 hover:text-text-04 disabled:text-text-02 disabled:cursor-not-allowed transition-colors"
          >
            <SvgChevronDown size={14} />
          </button>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/refresh-components/inputs/InputSearch.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import InputSearch from "./InputSearch";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputSearch> = {
  title: "refresh-components/inputs/InputSearch",
  component: InputSearch,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 320 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputSearch>;

export const Default: Story = {
  render: function DefaultStory() {
    const [value, setValue] = React.useState("");
    return (
      <InputSearch
        placeholder="Search..."
        value={value}
        onChange={(e) => setValue(e.target.value)}
      />
    );
  },
};

export const WithValue: Story = {
  render: function WithValueStory() {
    const [value, setValue] = React.useState("Search Value");
    return (
      <InputSearch
        placeholder="Search..."
        value={value}
        onChange={(e) => setValue(e.target.value)}
      />
    );
  },
};

export const Disabled: Story = {
  render: function DisabledStory() {
    return (
      <InputSearch
        placeholder="Search..."
        value=""
        onChange={() => {}}
        disabled
      />
    );
  },
};


================================================
FILE: web/src/refresh-components/inputs/InputSearch.tsx
================================================
"use client";

import * as React from "react";
import { cn } from "@/lib/utils";
import InputTypeIn, {
  InputTypeInProps,
} from "@/refresh-components/inputs/InputTypeIn";

/**
 * InputSearch Component
 *
 * A subtle search input that follows the "Subtle Input Styles" spec:
 * no border by default, border appears on hover/focus/active.
 *
 * @example
 * ```tsx
 * // Basic usage
 * <InputSearch
 *   placeholder="Search..."
 *   value={search}
 *   onChange={(e) => setSearch(e.target.value)}
 * />
 *
 * // Disabled state
 * <InputSearch
 *   disabled
 *   placeholder="Search..."
 *   value=""
 *   onChange={() => {}}
 * />
 * ```
 */
export interface InputSearchProps
  extends Omit<InputTypeInProps, "variant" | "leftSearchIcon"> {
  /**
   * Ref to the underlying input element.
   */
  ref?: React.Ref<HTMLInputElement>;
  /**
   * Whether the input is disabled.
   */
  disabled?: boolean;
}

export default function InputSearch({
  ref,
  disabled,
  className,
  ...props
}: InputSearchProps) {
  return (
    <InputTypeIn
      ref={ref}
      variant={disabled ? "disabled" : "internal"}
      leftSearchIcon
      className={cn(
        "[&_input]:font-main-ui-muted [&_input]:text-text-02 [&_input]:placeholder:text-text-02",
        !disabled && [
          "border border-transparent",
          "hover:border-border-03",
          "active:border-border-05",
          "focus-within:shadow-[0px_0px_0px_2px_var(--background-tint-04)]",
          "focus-within:hover:border-border-03",
        ],
        className
      )}
      {...props}
    />
  );
}


================================================
FILE: web/src/refresh-components/inputs/InputSelect.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import InputSelect from "./InputSelect";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputSelect> = {
  title: "refresh-components/inputs/InputSelect",
  component: InputSelect,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 320 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputSelect>;

export const Default: Story = {
  render: () => (
    <InputSelect defaultValue="option1">
      <InputSelect.Trigger placeholder="Select an option" />
      <InputSelect.Content>
        <InputSelect.Item value="option1">Option 1</InputSelect.Item>
        <InputSelect.Item value="option2">Option 2</InputSelect.Item>
        <InputSelect.Item value="option3">Option 3</InputSelect.Item>
      </InputSelect.Content>
    </InputSelect>
  ),
};

export const WithPlaceholder: Story = {
  render: () => (
    <InputSelect>
      <InputSelect.Trigger placeholder="Choose a fruit..." />
      <InputSelect.Content>
        <InputSelect.Item value="apple">Apple</InputSelect.Item>
        <InputSelect.Item value="banana">Banana</InputSelect.Item>
        <InputSelect.Item value="cherry">Cherry</InputSelect.Item>
      </InputSelect.Content>
    </InputSelect>
  ),
};

export const Controlled: Story = {
  render: function ControlledStory() {
    const [value, setValue] = React.useState("b");
    return (
      <InputSelect value={value} onValueChange={setValue}>
        <InputSelect.Trigger placeholder="Select..." />
        <InputSelect.Content>
          <InputSelect.Item value="a">Alpha</InputSelect.Item>
          <InputSelect.Item value="b">Bravo</InputSelect.Item>
          <InputSelect.Item value="c">Charlie</InputSelect.Item>
        </InputSelect.Content>
      </InputSelect>
    );
  },
};

export const Disabled: Story = {
  render: () => (
    <InputSelect defaultValue="option1" disabled>
      <InputSelect.Trigger placeholder="Select an option" />
      <InputSelect.Content>
        <InputSelect.Item value="option1">Option 1</InputSelect.Item>
        <InputSelect.Item value="option2">Option 2</InputSelect.Item>
      </InputSelect.Content>
    </InputSelect>
  ),
};

export const ErrorState: Story = {
  render: () => (
    <InputSelect error>
      <InputSelect.Trigger placeholder="Required field" />
      <InputSelect.Content>
        <InputSelect.Item value="x">X</InputSelect.Item>
        <InputSelect.Item value="y">Y</InputSelect.Item>
      </InputSelect.Content>
    </InputSelect>
  ),
};

export const WithGroups: Story = {
  render: () => (
    <InputSelect defaultValue="gpt4o">
      <InputSelect.Trigger placeholder="Choose a model..." />
      <InputSelect.Content>
        <InputSelect.Group>
          <InputSelect.Label>OpenAI</InputSelect.Label>
          <InputSelect.Item value="gpt4o">GPT-4o</InputSelect.Item>
          <InputSelect.Item value="gpt4o-mini">GPT-4o Mini</InputSelect.Item>
        </InputSelect.Group>
        <InputSelect.Separator />
        <InputSelect.Group>
          <InputSelect.Label>Anthropic</InputSelect.Label>
          <InputSelect.Item value="opus">Claude Opus</InputSelect.Item>
          <InputSelect.Item value="sonnet">Claude Sonnet</InputSelect.Item>
        </InputSelect.Group>
      </InputSelect.Content>
    </InputSelect>
  ),
};

export const WithDescription: Story = {
  render: () => (
    <InputSelect>
      <InputSelect.Trigger placeholder="Select a plan..." />
      <InputSelect.Content>
        <InputSelect.Item value="free" description="Up to 5 users">
          Free
        </InputSelect.Item>
        <InputSelect.Item value="pro" description="Unlimited users">
          Pro
        </InputSelect.Item>
        <InputSelect.Item value="enterprise" description="Custom limits">
          Enterprise
        </InputSelect.Item>
      </InputSelect.Content>
    </InputSelect>
  ),
};


================================================
FILE: web/src/refresh-components/inputs/InputSelect.tsx
================================================
"use client";

import * as React from "react";
import * as SelectPrimitive from "@radix-ui/react-select";
import { cn } from "@/lib/utils";
import LineItem, { LineItemProps } from "@/refresh-components/buttons/LineItem";
import Text from "@/refresh-components/texts/Text";
import type { IconProps } from "@opal/types";
import {
  iconClasses,
  textClasses,
  Variants,
  wrapperClasses,
} from "@/refresh-components/inputs/styles";
import Truncated from "@/refresh-components/texts/Truncated";
import { SvgChevronDownSmall } from "@opal/icons";
import Separator, { SeparatorProps } from "@/refresh-components/Separator";
import { WithoutStyles } from "@/types";

// ============================================================================
// Context
// ============================================================================

interface SelectedItemDisplay {
  childrenRef: React.MutableRefObject<React.ReactNode>;
  iconRef: React.MutableRefObject<
    React.FunctionComponent<IconProps> | undefined
  >;
}

interface InputSelectContextValue {
  variant: Variants;
  currentValue?: string;
  disabled?: boolean;
  selectedItemDisplay: SelectedItemDisplay | null;
  setSelectedItemDisplay: (display: SelectedItemDisplay | null) => void;
}

const InputSelectContext = React.createContext<InputSelectContextValue | null>(
  null
);

const useInputSelectContext = () => {
  const context = React.useContext(InputSelectContext);
  if (!context) {
    throw new Error(
      "InputSelect compound components must be used within InputSelect"
    );
  }
  return context;
};

// ============================================================================
// InputSelect Root
// ============================================================================

/**
 * InputSelect Root Component
 *
 * A styled select/dropdown component built on Radix UI Select primitives.
 * Provides full control over trigger and content rendering.
 *
 * @example
 * ```tsx
 * <InputSelect defaultValue="option1">
 *   <InputSelect.Trigger placeholder="Select an option" />
 *   <InputSelect.Content>
 *     <InputSelect.Item value="option1">Option 1</InputSelect.Item>
 *     <InputSelect.Item value="option2">Option 2</InputSelect.Item>
 *   </InputSelect.Content>
 * </InputSelect>
 *
 * // Controlled
 * <InputSelect value={value} onValueChange={setValue}>
 *   <InputSelect.Trigger placeholder="Select..." />
 *   <InputSelect.Content>
 *     <InputSelect.Item value="a">A</InputSelect.Item>
 *   </InputSelect.Content>
 * </InputSelect>
 *
 * // With error state
 * <InputSelect error>
 *   <InputSelect.Trigger placeholder="Required field" />
 *   <InputSelect.Content>
 *     <InputSelect.Item value="x">X</InputSelect.Item>
 *   </InputSelect.Content>
 * </InputSelect>
 * ```
 */
interface InputSelectRootProps
  extends WithoutStyles<
    React.ComponentPropsWithoutRef<typeof SelectPrimitive.Root>
  > {
  /** Whether to show error styling */
  error?: boolean;
  /** Whether the select is disabled */
  disabled?: boolean;
  children: React.ReactNode;
  ref?: React.Ref<HTMLDivElement>;
}
function InputSelectRoot({
  disabled,
  error,
  value,
  defaultValue,
  onValueChange,
  children,
  ref,
  ...props
}: InputSelectRootProps) {
  const variant: Variants = disabled ? "disabled" : error ? "error" : "primary";

  // Support both controlled and uncontrolled modes
  const isControlled = value !== undefined;
  const [internalValue, setInternalValue] = React.useState<string | undefined>(
    defaultValue
  );
  const currentValue = isControlled ? value : internalValue;

  React.useEffect(() => {
    if (isControlled) return;
    setInternalValue(defaultValue);
  }, [defaultValue, isControlled]);

  const handleValueChange = React.useCallback(
    (nextValue: string) => {
      onValueChange?.(nextValue);

      if (isControlled) return;
      setInternalValue(nextValue);
    },
    [isControlled, onValueChange]
  );

  // Store the selected item's display data (children/icon refs)
  // Only the currently selected item registers itself
  const [selectedItemDisplay, setSelectedItemDisplay] =
    React.useState<SelectedItemDisplay | null>(null);

  React.useEffect(() => {
    if (!currentValue) setSelectedItemDisplay(null);
  }, [currentValue]);

  const contextValue = React.useMemo<InputSelectContextValue>(
    () => ({
      variant,
      currentValue,
      disabled,
      selectedItemDisplay,
      setSelectedItemDisplay,
    }),
    [variant, currentValue, disabled, selectedItemDisplay]
  );

  return (
    <div className="w-full min-w-[var(--block-width-form-input-min)] relative">
      <InputSelectContext.Provider value={contextValue}>
        <SelectPrimitive.Root
          {...(isControlled ? { value: currentValue } : { defaultValue })}
          onValueChange={handleValueChange}
          disabled={disabled}
          {...props}
        >
          <div ref={ref} className="w-full">
            {children}
          </div>
        </SelectPrimitive.Root>
      </InputSelectContext.Provider>
    </div>
  );
}

// ============================================================================
// InputSelect Trigger
// ============================================================================

/**
 * InputSelect Trigger Component
 *
 * The clickable trigger that opens the dropdown.
 *
 * @example
 * ```tsx
 * // With placeholder
 * <InputSelect.Trigger placeholder="Select..." />
 *
 * // With right section
 * <InputSelect.Trigger placeholder="Select..." rightSection={<Badge>New</Badge>} />
 * ```
 */
interface InputSelectTriggerProps
  extends WithoutStyles<React.ComponentProps<typeof SelectPrimitive.Trigger>> {
  /** Placeholder when no value selected */
  placeholder?: React.ReactNode;
  /** Content to render on the right side of the trigger */
  rightSection?: React.ReactNode;
}
function InputSelectTrigger({
  placeholder,
  rightSection,
  children,
  ref,
  ...props
}: InputSelectTriggerProps) {
  const { variant, selectedItemDisplay } = useInputSelectContext();

  // Don't memoize - we need to read the latest ref values on every render
  let displayContent: React.ReactNode;

  if (!selectedItemDisplay) {
    displayContent = placeholder ? (
      typeof placeholder === "string" ? (
        <Text as="p" text03>
          {placeholder}
        </Text>
      ) : (
        placeholder
      )
    ) : (
      <Text as="p" text03>
        Select an option
      </Text>
    );
  } else {
    const Icon = selectedItemDisplay.iconRef.current;
    displayContent = (
      <div className="flex flex-row items-center gap-2 flex-1 w-full">
        {Icon && <Icon className={cn("h-4 w-4", iconClasses[variant])} />}
        <Truncated className={cn(textClasses[variant])}>
          {selectedItemDisplay.childrenRef.current}
        </Truncated>
      </div>
    );
  }

  return (
    <SelectPrimitive.Trigger
      ref={ref}
      className={cn(
        "group/InputSelect flex w-full items-center justify-between p-1.5 rounded-08 focus:outline-none",
        wrapperClasses[variant],
        variant === "primary" && "data-[state=open]:border-border-05"
      )}
      {...props}
    >
      <div className="flex flex-row items-center justify-between w-full p-0.5 gap-1">
        {children ?? displayContent}

        <div className="flex flex-row items-center gap-1">
          {rightSection}

          <SelectPrimitive.Icon asChild>
            <SvgChevronDownSmall
              className={cn(
                "h-4 w-4 transition-transform",
                iconClasses[variant],
                "group-data-[state=open]/InputSelect:-rotate-180"
              )}
            />
          </SelectPrimitive.Icon>
        </div>
      </div>
    </SelectPrimitive.Trigger>
  );
}

// ============================================================================
// InputSelect Content
// ============================================================================

/**
 * InputSelect Content Component
 *
 * The dropdown content container with animations and styling.
 *
 * @example
 * ```tsx
 * <InputSelect.Content>
 *   <InputSelect.Item value="1">Item 1</InputSelect.Item>
 *   <InputSelect.Item value="2">Item 2</InputSelect.Item>
 * </InputSelect.Content>
 * ```
 */
function InputSelectContent({
  children,
  ref,
  ...props
}: WithoutStyles<React.ComponentProps<typeof SelectPrimitive.Content>>) {
  return (
    <SelectPrimitive.Portal>
      <SelectPrimitive.Content
        ref={ref}
        className={cn(
          "z-popover w-[var(--radix-select-trigger-width)] max-h-72 overflow-auto rounded-12 border bg-background-neutral-00 p-1",
          "data-[state=open]:animate-in data-[state=closed]:animate-out",
          "data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0",
          "data-[state=open]:zoom-in-95 data-[state=closed]:zoom-out-95"
        )}
        sideOffset={4}
        position="popper"
        onMouseDown={(e) => {
          e.stopPropagation();
          e.preventDefault();
        }}
        {...props}
      >
        <SelectPrimitive.Viewport className="flex flex-col gap-1">
          {children}
        </SelectPrimitive.Viewport>
      </SelectPrimitive.Content>
    </SelectPrimitive.Portal>
  );
}

// ============================================================================
// InputSelect Item
// ============================================================================

/**
 * InputSelect Item Component
 *
 * Individual selectable option within the dropdown.
 *
 * @example
 * ```tsx
 * <InputSelect.Item value="option1" icon={SvgIcon}>
 *   Option 1
 * </InputSelect.Item>
 *
 * <InputSelect.Item value="option2" description="Additional info">
 *   Option 2
 * </InputSelect.Item>
 * ```
 */
interface InputSelectItemProps
  extends WithoutStyles<Omit<LineItemProps, "heavyForced" | "ref">> {
  /** Unique value for this option */
  value: string;
  /** Optional callback when item is selected */
  onClick?: (event: React.SyntheticEvent) => void;
  ref?: React.Ref<React.ComponentRef<typeof SelectPrimitive.Item>>;
}
function InputSelectItem({
  value,
  children,
  description,
  onClick,
  icon,
  ref,
  ...props
}: InputSelectItemProps) {
  const { currentValue, setSelectedItemDisplay } = useInputSelectContext();
  const isSelected = value === currentValue;

  // Use refs to hold latest children/icon - these are passed to the context
  // so the trigger always reads current values without needing re-registration
  const childrenRef = React.useRef(children);
  const iconRef = React.useRef(icon);
  childrenRef.current = children;
  iconRef.current = icon;

  // Only the selected item registers its display data
  React.useEffect(() => {
    if (!isSelected) return;
    setSelectedItemDisplay({ childrenRef, iconRef });

    // Clean up functions only need to return for items which are selected.
    return () => setSelectedItemDisplay(null);
  }, [isSelected]);

  return (
    <SelectPrimitive.Item
      ref={ref}
      value={value}
      className="outline-none focus:outline-none rounded-08 data-[highlighted]:bg-background-tint-02"
      onSelect={onClick}
    >
      {/* Hidden ItemText for Radix to track selection */}
      <span className="hidden">
        <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
      </span>

      <LineItem
        {...props}
        icon={icon}
        selected={isSelected}
        emphasized
        description={description}
        interactive={false}
      >
        {children}
      </LineItem>
    </SelectPrimitive.Item>
  );
}

// ============================================================================
// InputSelect Group
// ============================================================================

/**
 * InputSelect Group Component
 *
 * Groups related items together with an optional label.
 *
 * @example
 * ```tsx
 * <InputSelect.Group>
 *   <InputSelect.Label>Fruits</InputSelect.Label>
 *   <InputSelect.Item value="apple">Apple</InputSelect.Item>
 *   <InputSelect.Item value="banana">Banana</InputSelect.Item>
 * </InputSelect.Group>
 * ```
 */
function InputSelectGroup({
  ref,
  ...props
}: WithoutStyles<React.ComponentProps<typeof SelectPrimitive.Group>>) {
  return <SelectPrimitive.Group ref={ref} {...props} />;
}

// ============================================================================
// InputSelect Label
// ============================================================================

/**
 * InputSelect Label Component
 *
 * A label for a group of items.
 *
 * @example
 * ```tsx
 * <InputSelect.Label>Category Name</InputSelect.Label>
 * ```
 */
function InputSelectLabel({
  ref,
  ...props
}: WithoutStyles<React.ComponentProps<typeof SelectPrimitive.Label>>) {
  return (
    <SelectPrimitive.Label
      ref={ref}
      className="px-2 py-1.5 text-xs font-medium text-text-03 uppercase tracking-wide"
      {...props}
    />
  );
}

// ============================================================================
// InputSelect Separator
// ============================================================================

/**
 * InputSelect Separator Component
 *
 * A visual divider between items in the dropdown.
 * Uses the app's standard Separator component with appropriate defaults for dropdown menus.
 *
 * @example
 * ```tsx
 * <InputSelect.Content>
 *   <InputSelect.Item value="1">Option 1</InputSelect.Item>
 *   <InputSelect.Separator />
 *   <InputSelect.Item value="2">Option 2</InputSelect.Item>
 * </InputSelect.Content>
 * ```
 */
function InputSelectSeparator({
  noPadding = true,
  ref,
  ...props
}: WithoutStyles<SeparatorProps> & {
  ref?: React.Ref<React.ComponentRef<typeof Separator>>;
}) {
  return (
    <Separator
      ref={ref}
      noPadding={noPadding}
      className="px-2 py-1"
      {...props}
    />
  );
}

// ============================================================================
// Exports
// ============================================================================

/**
 * InputSelect - A styled select/dropdown component
 *
 * @example
 * ```tsx
 * import InputSelect from "@/refresh-components/inputs/InputSelect";
 *
 * <InputSelect defaultValue="1">
 *   <InputSelect.Trigger placeholder="Choose..." />
 *   <InputSelect.Content>
 *     <InputSelect.Item value="1">Option 1</InputSelect.Item>
 *     <InputSelect.Item value="2">Option 2</InputSelect.Item>
 *   </InputSelect.Content>
 * </InputSelect>
 *
 * // With groups
 * <InputSelect defaultValue="1">
 *   <InputSelect.Trigger placeholder="Choose a model..." />
 *   <InputSelect.Content>
 *     <InputSelect.Group>
 *       <InputSelect.Label>OpenAI</InputSelect.Label>
 *       <InputSelect.Item value="1">GPT-4o Mini</InputSelect.Item>
 *       <InputSelect.Item value="2">GPT-4o</InputSelect.Item>
 *     </InputSelect.Group>
 *     <InputSelect.Group>
 *       <InputSelect.Label>Anthropic</InputSelect.Label>
 *       <InputSelect.Item value="3">Claude Opus 4.5</InputSelect.Item>
 *       <InputSelect.Item value="4">Claude Sonnet 4.5</InputSelect.Item>
 *     </InputSelect.Group>
 *   </InputSelect.Content>
 * </InputSelect>
 * ```
 */
export default Object.assign(InputSelectRoot, {
  Trigger: InputSelectTrigger,
  Content: InputSelectContent,
  Item: InputSelectItem,
  Group: InputSelectGroup,
  Label: InputSelectLabel,
  Separator: InputSelectSeparator,
});

export {
  type InputSelectRootProps,
  type InputSelectTriggerProps,
  type InputSelectItemProps,
};


================================================
FILE: web/src/refresh-components/inputs/InputTextArea.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import InputTextArea from "./InputTextArea";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputTextArea> = {
  title: "refresh-components/inputs/InputTextArea",
  component: InputTextArea,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 400 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputTextArea>;

export const Default: Story = {
  args: {
    placeholder: "Enter a description...",
  },
};

export const AutoResize: Story = {
  args: {
    autoResize: true,
    placeholder: "This textarea grows as you type...",
  },
};

export const WithMaxRows: Story = {
  args: {
    autoResize: true,
    maxRows: 5,
    placeholder: "Grows up to 5 rows...",
  },
};

export const Error: Story = {
  args: {
    variant: "error",
    value: "Invalid content",
    placeholder: "Enter a description...",
  },
};

export const Disabled: Story = {
  args: {
    variant: "disabled",
    value: "Cannot edit this textarea",
  },
};

export const ReadOnly: Story = {
  args: {
    variant: "readOnly",
    value: "This content is read-only and cannot be modified.",
  },
};

export const NonResizable: Story = {
  args: {
    resizable: false,
    placeholder: "This textarea cannot be resized...",
  },
};


================================================
FILE: web/src/refresh-components/inputs/InputTextArea.tsx
================================================
"use client";

import * as React from "react";
import { cn, mergeRefs } from "@/lib/utils";
import {
  innerClasses,
  textClasses,
  Variants,
  wrapperClasses,
} from "@/refresh-components/inputs/styles";

/**
 * InputTextArea Component
 *
 * A styled textarea component with support for various states and auto-resize.
 *
 * @example
 * ```tsx
 * // Basic usage
 * <InputTextArea
 *   value={value}
 *   onChange={(e) => setValue(e.target.value)}
 *   placeholder="Enter description..."
 * />
 *
 * // With error state
 * <InputTextArea
 *   variant="error"
 *   value={value}
 *   onChange={(e) => setValue(e.target.value)}
 * />
 *
 * // Disabled state
 * <InputTextArea variant="disabled" value="Cannot edit" />
 *
 * // Read-only state (non-editable, minimal styling)
 * <InputTextArea variant="readOnly" value="Read-only value" />
 *
 * // Custom rows
 * <InputTextArea
 *   rows={8}
 *   value={value}
 *   onChange={(e) => setValue(e.target.value)}
 * />
 *
 * // Internal styling (no border)
 * <InputTextArea variant="internal" value={value} onChange={handleChange} />
 * ```
 */
export interface InputTextAreaProps
  extends Omit<React.TextareaHTMLAttributes<HTMLTextAreaElement>, "disabled"> {
  variant?: Variants;
  autoResize?: boolean;
  maxRows?: number;
  resizable?: boolean;
  rightSection?: React.ReactNode;
}
const InputTextArea = React.forwardRef<HTMLTextAreaElement, InputTextAreaProps>(
  (
    {
      variant = "primary",
      className,
      rows = 4,
      readOnly,
      autoResize = false,
      maxRows,
      resizable = true,
      rightSection,
      ...props
    },
    ref
  ) => {
    const disabled = variant === "disabled";
    const isReadOnlyVariant = variant === "readOnly";
    const isReadOnly = isReadOnlyVariant || readOnly;

    const internalRef = React.useRef<HTMLTextAreaElement | null>(null);
    const cachedLineHeight = React.useRef<number | null>(null);

    const adjustHeight = React.useCallback(() => {
      const textarea = internalRef.current;
      if (!textarea || !autoResize) return;

      if (cachedLineHeight.current === null) {
        cachedLineHeight.current =
          parseFloat(getComputedStyle(textarea).lineHeight) || 20;
      }
      const lineHeight = cachedLineHeight.current;

      // Reset to auto so scrollHeight reflects actual content
      textarea.style.height = "auto";
      textarea.style.overflowY = "hidden";

      const minHeight = rows * lineHeight;
      const maxHeight = maxRows ? maxRows * lineHeight : Infinity;

      const contentHeight = textarea.scrollHeight;
      const clampedHeight = Math.min(
        Math.max(contentHeight, minHeight),
        maxHeight
      );

      textarea.style.height = `${clampedHeight}px`;
      textarea.style.overflowY = contentHeight > maxHeight ? "auto" : "hidden";
    }, [autoResize, rows, maxRows]);

    React.useEffect(() => {
      adjustHeight();
    }, [adjustHeight, props.value]);

    const resizeClass = autoResize || !resizable ? "resize-none" : "resize-y";

    return (
      <div
        className={cn(
          wrapperClasses[variant],
          "flex flex-row items-start justify-between w-full h-fit p-1.5 rounded-08 relative",
          !isReadOnlyVariant && "bg-background-neutral-00",
          className
        )}
      >
        <textarea
          ref={mergeRefs(internalRef, ref)}
          disabled={disabled}
          readOnly={isReadOnly}
          className={cn(
            "w-full min-w-0 flex-1 min-h-[3rem] bg-transparent focus:outline-none p-0.5",
            resizeClass,
            innerClasses[variant],
            textClasses[variant]
          )}
          rows={rows}
          {...props}
        />
        {rightSection && (
          <div className="shrink-0 self-start -my-1 -mr-1 font-sans text-base">
            {rightSection}
          </div>
        )}
      </div>
    );
  }
);
InputTextArea.displayName = "InputTextArea";

export default InputTextArea;


================================================
FILE: web/src/refresh-components/inputs/InputTypeIn.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import InputTypeIn from "./InputTypeIn";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof InputTypeIn> = {
  title: "refresh-components/inputs/InputTypeIn",
  component: InputTypeIn,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 320 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof InputTypeIn>;

export const Default: Story = {
  args: {
    placeholder: "Enter text...",
  },
};

export const WithPrefix: Story = {
  args: {
    prefixText: "https://",
    placeholder: "example.com",
  },
};

export const WithSearchIcon: Story = {
  args: {
    leftSearchIcon: true,
    placeholder: "Search...",
  },
};

export const WithClearButton: Story = {
  args: {
    showClearButton: true,
    value: "Some text to clear",
    onChange: () => {},
  },
};

export const Disabled: Story = {
  args: {
    variant: "disabled",
    value: "Cannot edit",
  },
};

export const Error: Story = {
  args: {
    variant: "error",
    value: "Invalid input",
    placeholder: "Enter text...",
  },
};

export const ReadOnly: Story = {
  args: {
    variant: "readOnly",
    value: "Read-only value",
  },
};


================================================
FILE: web/src/refresh-components/inputs/InputTypeIn.tsx
================================================
"use client";

import * as React from "react";
import { cn, noProp } from "@/lib/utils";
import IconButton from "@/refresh-components/buttons/IconButton";
import { Button } from "@opal/components";
import {
  innerClasses,
  textClasses,
  Variants,
  wrapperClasses,
} from "@/refresh-components/inputs/styles";
import { SvgSearch, SvgX } from "@opal/icons";

/**
 * InputTypeIn Component
 *
 * A styled text input component with support for search icon, clear button,
 * and custom right section content.
 *
 * @example
 * ```tsx
 * // Basic usage
 * <InputTypeIn
 *   value={value}
 *   onChange={(e) => setValue(e.target.value)}
 *   placeholder="Enter text..."
 * />
 *
 * // With search icon
 * <InputTypeIn
 *   leftSearchIcon
 *   value={search}
 *   onChange={(e) => setSearch(e.target.value)}
 *   placeholder="Search..."
 * />
 *
 * // With error state
 * <InputTypeIn
 *   variant="error"
 *   value={value}
 *   onChange={(e) => setValue(e.target.value)}
 * />
 *
 * // Disabled state
 * <InputTypeIn variant="disabled" value="Cannot edit" />
 *
 * // Read-only state (non-editable, minimal styling)
 * <InputTypeIn variant="readOnly" value="Read-only value" />
 *
 * // With custom right section
 * <InputTypeIn
 *   value={password}
 *   onChange={(e) => setPassword(e.target.value)}
 *   type={showPassword ? "text" : "password"}
 *   rightSection={<Button icon={SvgEye} onClick={togglePassword}/>}
 * />
 *
 * // Without clear button
 * <InputTypeIn
 *   showClearButton={false}
 *   value={value}
 *   onChange={(e) => setValue(e.target.value)}
 * />
 * ```
 */
export interface InputTypeInProps
  extends Omit<React.InputHTMLAttributes<HTMLInputElement>, "disabled"> {
  variant?: Variants;

  prefixText?: string;
  leftSearchIcon?: boolean;
  rightSection?: React.ReactNode;
  showClearButton?: boolean;
  onClear?: () => void;
}
const InputTypeIn = React.forwardRef<HTMLInputElement, InputTypeInProps>(
  (
    {
      variant = "primary",
      prefixText,
      leftSearchIcon,
      rightSection,
      showClearButton = true,
      onClear,
      className,
      value,
      onChange,
      readOnly,
      ...props
    },
    ref
  ) => {
    const localInputRef = React.useRef<HTMLInputElement | null>(null);
    const disabled = variant === "disabled";
    const isReadOnlyVariant = variant === "readOnly";
    const isReadOnly = isReadOnlyVariant || readOnly;

    // Combine forwarded ref with local ref
    const setInputRef = React.useCallback(
      (node: HTMLInputElement | null) => {
        localInputRef.current = node;
        if (typeof ref === "function") {
          ref(node);
        } else if (ref) {
          (ref as React.MutableRefObject<HTMLInputElement | null>).current =
            node;
        }
      },
      [ref]
    );

    const handleClear = React.useCallback(() => {
      if (onClear) {
        onClear();
        return;
      }

      onChange?.({
        target: { value: "" },
        currentTarget: { value: "" },
        type: "change",
        bubbles: true,
        cancelable: true,
      } as React.ChangeEvent<HTMLInputElement>);
    }, [onClear, onChange]);

    return (
      <div
        className={cn(
          "flex flex-row items-center justify-between flex-1 h-fit p-1.5 rounded-08 relative w-full",
          wrapperClasses[variant],
          className
        )}
        onClick={() => {
          localInputRef.current?.focus();
        }}
      >
        {leftSearchIcon && (
          <div className="pr-2 pl-1">
            <div className="pl-[2px]">
              <SvgSearch className="w-[1rem] h-[1rem] stroke-text-02" />
            </div>
          </div>
        )}

        {prefixText && (
          <span className="select-none pointer-events-none text-text-02 pl-0.5">
            {prefixText}
          </span>
        )}

        <input
          ref={setInputRef}
          type="text"
          disabled={disabled}
          readOnly={isReadOnly}
          value={value}
          onChange={onChange}
          className={cn(
            "w-full h-[1.5rem] bg-transparent p-0.5 focus:outline-none",
            innerClasses[variant],
            textClasses[variant]
          )}
          {...props}
        />

        {showClearButton && !disabled && !isReadOnly && (
          // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
          <IconButton
            icon={SvgX}
            disabled={disabled}
            onClick={noProp(handleClear)}
            type="button"
            internal
            className={value ? "" : "invisible"}
          />
        )}

        {rightSection}
      </div>
    );
  }
);
InputTypeIn.displayName = "InputTypeIn";

export default InputTypeIn;


================================================
FILE: web/src/refresh-components/inputs/ListFieldInput.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import { ListFieldInput } from "./ListFieldInput";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof ListFieldInput> = {
  title: "refresh-components/inputs/ListFieldInput",
  component: ListFieldInput,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 400 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof ListFieldInput>;

export const Default: Story = {
  render: function DefaultStory() {
    const [values, setValues] = React.useState<string[]>([]);
    return (
      <ListFieldInput
        values={values}
        onChange={setValues}
        placeholder="Type and press Enter..."
      />
    );
  },
};

export const WithValues: Story = {
  render: function WithValuesStory() {
    const [values, setValues] = React.useState([
      "admin@example.com",
      "user@example.com",
      "dev@example.com",
    ]);
    return (
      <ListFieldInput
        values={values}
        onChange={setValues}
        placeholder="Add email..."
      />
    );
  },
};

export const Disabled: Story = {
  render: () => (
    <ListFieldInput
      values={["locked-item"]}
      onChange={() => {}}
      placeholder="Cannot edit"
      disabled
    />
  ),
};

export const ErrorState: Story = {
  render: function ErrorStory() {
    const [values, setValues] = React.useState(["invalid"]);
    return (
      <ListFieldInput
        values={values}
        onChange={setValues}
        placeholder="Add value..."
        error
      />
    );
  },
};


================================================
FILE: web/src/refresh-components/inputs/ListFieldInput.tsx
================================================
import { useState, KeyboardEvent } from "react";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Button from "@/refresh-components/buttons/Button";
import { SvgX } from "@opal/icons";
interface ListFieldInputProps {
  values: string[];
  onChange: (values: string[]) => void;
  placeholder?: string;
  disabled?: boolean;
  error?: boolean;
}

/**
 * ListFieldInput is a component that allows the user to input a list of values by typing and pressing Enter.
 * It displays the values in a list of chips, and allows the user to add and remove values.

 * @param values - The array of values to display in the input field.
 * @param onChange - The function to call when the value changes.
 * @param placeholder - The placeholder text to display in the input field.
 * @param disabled - Whether the input field is disabled.
 **/
export function ListFieldInput({
  values,
  onChange,
  placeholder = "",
  disabled = false,
  error = false,
}: ListFieldInputProps) {
  const [inputValue, setInputValue] = useState("");

  const handleKeyDown = (e: KeyboardEvent<HTMLInputElement>) => {
    if (e.key === "Enter" && inputValue.trim()) {
      e.preventDefault();
      const trimmedValue = inputValue.trim();

      // Avoid duplicates
      if (!values.includes(trimmedValue)) {
        onChange([...values, trimmedValue]);
      }

      setInputValue("");
    }
  };

  const removeValue = (indexToRemove: number) => {
    onChange(values.filter((_, index) => index !== indexToRemove));
  };

  return (
    <div className="flex flex-col w-full space-y-2 mb-4">
      <InputTypeIn
        placeholder={placeholder}
        value={inputValue}
        variant={disabled ? "disabled" : error ? "error" : undefined}
        onChange={(e) => setInputValue(e.target.value)}
        onKeyDown={handleKeyDown}
      />

      <div className="mt-3">
        <div className="flex flex-wrap gap-1.5">
          {values.map((value, index) => (
            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
            <Button
              key={index}
              internal
              secondary
              type="button"
              aria-label={`Remove ${value}`}
              onClick={() => removeValue(index)}
              rightIcon={SvgX}
              className="rounded h-8"
            >
              {value}
            </Button>
          ))}
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/refresh-components/inputs/PasswordInputTypeIn.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import React from "react";
import PasswordInputTypeIn from "./PasswordInputTypeIn";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof PasswordInputTypeIn> = {
  title: "refresh-components/inputs/PasswordInputTypeIn",
  component: PasswordInputTypeIn,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ width: 320 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof PasswordInputTypeIn>;

export const Default: Story = {
  render: function DefaultStory() {
    const [value, setValue] = React.useState("");
    return (
      <PasswordInputTypeIn
        placeholder="Enter password..."
        value={value}
        onChange={(e) => setValue(e.target.value)}
      />
    );
  },
};

export const WithValue: Story = {
  render: function WithValueStory() {
    const [value, setValue] = React.useState("supersecret123");
    return (
      <PasswordInputTypeIn
        placeholder="Enter password..."
        value={value}
        onChange={(e) => setValue(e.target.value)}
      />
    );
  },
};

export const NonRevealable: Story = {
  render: function NonRevealableStory() {
    const [value, setValue] = React.useState("stored-secret-value");
    return (
      <PasswordInputTypeIn
        placeholder="Stored secret"
        value={value}
        onChange={(e) => setValue(e.target.value)}
        isNonRevealable
      />
    );
  },
};

export const Disabled: Story = {
  render: () => (
    <PasswordInputTypeIn
      placeholder="Cannot edit"
      value="disabled-password"
      onChange={() => {}}
      disabled
    />
  ),
};

export const ErrorState: Story = {
  render: function ErrorStory() {
    const [value, setValue] = React.useState("bad");
    return (
      <PasswordInputTypeIn
        placeholder="Enter password..."
        value={value}
        onChange={(e) => setValue(e.target.value)}
        error
      />
    );
  },
};


================================================
FILE: web/src/refresh-components/inputs/PasswordInputTypeIn.test.ts
================================================
import { computeMaskedInputChange } from "./PasswordInputTypeIn";

describe("computeMaskedInputChange", () => {
  const MASK = "∗"; // ASTERISK OPERATOR (U+2217)

  test("handles typing at any position", () => {
    // Typing "x" in middle of "abcd" -> "abxcd"
    const result = computeMaskedInputChange(
      MASK.repeat(2) + "x" + MASK.repeat(2),
      "abcd",
      3,
      { start: 2, end: 2 }
    );
    expect(result).toEqual({ newValue: "abxcd", cursorPosition: 3 });
  });

  test("handles deletion", () => {
    // Delete at position 1 of "abcd" -> "acd"
    const result = computeMaskedInputChange(MASK.repeat(3), "abcd", 1, {
      start: 1,
      end: 1,
    });
    expect(result).toEqual({ newValue: "acd", cursorPosition: 1 });
  });

  test("handles selection replacement", () => {
    // Select "bc" in "abcd", type "xyz" -> "axyzd"
    const result = computeMaskedInputChange(MASK + "xyz" + MASK, "abcd", 4, {
      start: 1,
      end: 3,
    });
    expect(result).toEqual({ newValue: "axyzd", cursorPosition: 4 });
  });

  test("handles clearing the field", () => {
    const result = computeMaskedInputChange("", "password", 0, {
      start: 0,
      end: 8,
    });
    expect(result).toEqual({ newValue: "", cursorPosition: 0 });
  });

  test("preserves mask character in user input", () => {
    // Pasting "∗∗" to replace "bc" in "abcd" -> "a∗∗d"
    const result = computeMaskedInputChange(
      MASK.repeat(4), // display shows 4 masks
      "abcd",
      3,
      { start: 1, end: 3 }
    );
    expect(result).toEqual({ newValue: "a∗∗d", cursorPosition: 3 });
  });
});


================================================
FILE: web/src/refresh-components/inputs/PasswordInputTypeIn.tsx
================================================
"use client";

import * as React from "react";
import InputTypeIn, {
  InputTypeInProps,
} from "@/refresh-components/inputs/InputTypeIn";
import { Button } from "@opal/components";
import { noProp } from "@/lib/utils";
import { SvgEye, SvgEyeClosed } from "@opal/icons";

/**
 * Custom mask character for password display.
 *
 * We use ASTERISK OPERATOR (U+2217) instead of the browser's native password
 * masking (typically bullet •) to follow our design guidelines. This requires
 * custom change handling logic to track the real value while displaying masks.
 */
const MASK_CHARACTER = "∗";

// Backend placeholder pattern - indicates a stored value that can't be revealed
const BACKEND_PLACEHOLDER_PATTERN = /^•+$/; // All bullet characters (U+2022)

/**
 * Check if a value is a backend placeholder (all bullet characters).
 * The backend sends this to indicate a stored secret exists without revealing it.
 */
function isBackendPlaceholder(value: string): boolean {
  return !!value && BACKEND_PLACEHOLDER_PATTERN.test(value);
}

export interface SelectionRange {
  start: number;
  end: number;
}

export interface MaskedInputChangeResult {
  newValue: string;
  cursorPosition: number;
}

/**
 * Computes the real value from a masked input change event.
 *
 * Since we display mask characters (∗) instead of the actual password,
 * we need to reverse-engineer what the user typed/deleted by comparing
 * the new display value with the previous real value and selection state.
 *
 * @param newDisplayValue - The new value from the input (mix of masks and typed chars)
 * @param previousValue - The actual password value before the change
 * @param cursorPosition - Current cursor position after the change
 * @param previousSelection - Selection range before the change occurred
 * @returns The computed real value and where to place the cursor
 */
export function computeMaskedInputChange(
  newDisplayValue: string,
  previousValue: string,
  cursorPosition: number,
  previousSelection: SelectionRange
): MaskedInputChangeResult {
  const oldLength = previousValue.length;
  const newLength = newDisplayValue.length;
  const hadSelection = previousSelection.end > previousSelection.start;

  // Field was cleared
  if (newLength === 0) {
    return { newValue: "", cursorPosition: 0 };
  }

  // Text was selected and replaced/deleted
  if (hadSelection) {
    const selectionLength = previousSelection.end - previousSelection.start;
    const insertedLength = newLength - oldLength + selectionLength;

    // Extract inserted characters from their position in the display value
    const insertedChars = newDisplayValue.slice(
      previousSelection.start,
      previousSelection.start + insertedLength
    );

    const newValue =
      previousValue.slice(0, previousSelection.start) +
      insertedChars +
      previousValue.slice(previousSelection.end);

    return {
      newValue,
      cursorPosition: previousSelection.start + insertedChars.length,
    };
  }

  // Characters were added (typed or pasted) without selection
  if (newLength > oldLength) {
    const charsAdded = newLength - oldLength;
    const insertPos = cursorPosition - charsAdded;
    const addedChars = newDisplayValue.slice(insertPos, cursorPosition);

    return {
      newValue:
        previousValue.slice(0, insertPos) +
        addedChars +
        previousValue.slice(insertPos),
      cursorPosition,
    };
  }

  // Characters were deleted without selection
  if (newLength < oldLength) {
    const charsDeleted = oldLength - newLength;
    const deleteEnd = cursorPosition + charsDeleted;

    return {
      newValue:
        previousValue.slice(0, cursorPosition) + previousValue.slice(deleteEnd),
      cursorPosition,
    };
  }

  // Same length without selection - no change
  return { newValue: previousValue, cursorPosition };
}

export interface PasswordInputTypeInProps
  extends Omit<
    InputTypeInProps,
    "type" | "rightSection" | "leftSearchIcon" | "variant"
  > {
  /**
   * Ref to the input element.
   */
  ref?: React.Ref<HTMLInputElement>;
  /**
   * Whether the input is disabled.
   */
  disabled?: boolean;
  /**
   * Whether the input has an error.
   */
  error?: boolean;
  /**
   * When true, the reveal toggle is disabled.
   * Use this when displaying a stored/masked value from the backend
   * that cannot actually be revealed.
   * The input remains editable so users can type a new value.
   */
  isNonRevealable?: boolean;
}

/**
 * PasswordInputTypeIn Component
 *
 * A password input with custom mask character (∗) and reveal/hide toggle.
 * Built on top of InputTypeIn for consistency.
 *
 * Features:
 * - Custom mask character (∗) instead of browser default
 * - Show/hide toggle button only visible when input has value or is focused
 * - When revealed, the toggle icon uses action style (more prominent)
 * - When hidden, the toggle icon uses internal style (muted)
 * - Optional `isNonRevealable` prop to disable reveal (for stored backend values)
 */
export default function PasswordInputTypeIn({
  ref,
  isNonRevealable = false,
  value,
  onChange,
  onFocus,
  onBlur,
  disabled,
  error,
  showClearButton = false,
  ...props
}: PasswordInputTypeInProps) {
  const [isPasswordVisible, setIsPasswordVisible] = React.useState(false);
  const [isFocused, setIsFocused] = React.useState(false);
  const containerRef = React.useRef<HTMLDivElement>(null);

  // Track selection range before changes occur
  const selectionRef = React.useRef<{ start: number; end: number }>({
    start: 0,
    end: 0,
  });

  const realValue = String(value || "");
  const hasValue = realValue.length > 0;
  const effectiveNonRevealable =
    isNonRevealable || isBackendPlaceholder(realValue);
  const isHidden = !isPasswordVisible || effectiveNonRevealable;

  const getDisplayValue = (): string => {
    if (isHidden) {
      return MASK_CHARACTER.repeat(realValue.length);
    }
    return realValue;
  };

  const handleContainerFocus = React.useCallback(() => {
    setIsFocused(true);
  }, []);

  const handleContainerBlur = React.useCallback(
    (e: React.FocusEvent<HTMLDivElement>) => {
      if (containerRef.current?.contains(e.relatedTarget as Node)) {
        return;
      }
      setIsFocused(false);
    },
    []
  );

  const handleFocus = React.useCallback(
    (e: React.FocusEvent<HTMLInputElement>) => {
      onFocus?.(e);
    },
    [onFocus]
  );

  const handleBlur = React.useCallback(
    (e: React.FocusEvent<HTMLInputElement>) => {
      onBlur?.(e);
    },
    [onBlur]
  );

  // Track selection before any change occurs (used by both onSelect and onKeyDown)
  const captureSelection = React.useCallback(
    (e: React.SyntheticEvent<HTMLInputElement>) => {
      const target = e.target as HTMLInputElement;
      selectionRef.current = {
        start: target.selectionStart ?? 0,
        end: target.selectionEnd ?? 0,
      };
    },
    []
  );

  const handleChange = React.useCallback(
    (e: React.ChangeEvent<HTMLInputElement>) => {
      // When visible, pass through directly - no masking needed
      if (!isHidden) {
        onChange?.(e);
        return;
      }

      const input = e.target;
      const cursorPos = input.selectionStart ?? input.value.length;

      // Compute the real value from the masked input change
      const result = computeMaskedInputChange(
        input.value,
        realValue,
        cursorPos,
        selectionRef.current
      );

      // Restore cursor position after React re-renders with new masked value
      requestAnimationFrame(() => {
        if (input && document.activeElement === input) {
          input.setSelectionRange(result.cursorPosition, result.cursorPosition);
        }
      });

      // Create synthetic event for Formik compatibility
      const syntheticEvent = {
        target: { name: input.name, value: result.newValue, type: "text" },
        currentTarget: {
          name: input.name,
          value: result.newValue,
          type: "text",
        },
        type: "change",
        persist: () => {},
      } as unknown as React.ChangeEvent<HTMLInputElement>;

      onChange?.(syntheticEvent);
    },
    [isHidden, realValue, onChange]
  );

  const showToggleButton = hasValue || isFocused;
  const isRevealed = isPasswordVisible && !effectiveNonRevealable;
  const toggleLabel = effectiveNonRevealable
    ? "Value cannot be revealed"
    : isPasswordVisible
      ? "Hide password"
      : "Show password";

  return (
    <div
      ref={containerRef}
      className="contents"
      onFocus={handleContainerFocus}
      onBlur={handleContainerBlur}
    >
      <InputTypeIn
        ref={ref}
        value={getDisplayValue()}
        onChange={handleChange}
        onFocus={handleFocus}
        onBlur={handleBlur}
        onSelect={captureSelection}
        onKeyDown={captureSelection}
        variant={disabled ? "disabled" : error ? "error" : undefined}
        showClearButton={showClearButton}
        autoComplete="off"
        data-ph-no-capture
        rightSection={
          showToggleButton ? (
            <Button
              disabled={disabled || effectiveNonRevealable}
              icon={isRevealed ? SvgEye : SvgEyeClosed}
              onClick={noProp(() => setIsPasswordVisible((v) => !v))}
              type="button"
              variant={isRevealed ? "action" : undefined}
              prominence="tertiary"
              size="sm"
              tooltipSide="left"
              tooltip={toggleLabel}
              aria-label={toggleLabel}
            />
          ) : undefined
        }
        {...props}
      />
    </div>
  );
}


================================================
FILE: web/src/refresh-components/inputs/Switch.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Switch from "./Switch";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof Switch> = {
  title: "refresh-components/inputs/Switch",
  component: Switch,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof Switch>;

export const Default: Story = {
  args: {},
};

export const Checked: Story = {
  args: {
    checked: true,
  },
};

export const Disabled: Story = {
  args: {
    disabled: true,
  },
};


================================================
FILE: web/src/refresh-components/inputs/Switch.tsx
================================================
"use client";

import React, { useState } from "react";
import { cn } from "@/lib/utils";
import { WithoutStyles } from "@/types";

export interface SwitchProps
  extends WithoutStyles<
    Omit<React.ComponentPropsWithoutRef<"button">, "onChange">
  > {
  // Switch variants
  disabled?: boolean;

  checked?: boolean;
  defaultChecked?: boolean;
  onCheckedChange?: (checked: boolean) => void;
}

const Switch = React.forwardRef<HTMLButtonElement, SwitchProps>(
  (
    {
      disabled,

      checked: controlledChecked,
      defaultChecked,
      onCheckedChange,

      onClick,
      ...props
    },
    ref
  ) => {
    const [uncontrolledChecked, setUncontrolledChecked] = useState(
      defaultChecked ?? false
    );

    const isControlled = controlledChecked !== undefined;
    const checked = isControlled ? controlledChecked : uncontrolledChecked;

    function handleClick(event: React.MouseEvent<HTMLButtonElement>) {
      if (disabled) return;

      const newChecked = !checked;

      if (!isControlled) setUncontrolledChecked(newChecked);
      onClick?.(event);
      onCheckedChange?.(newChecked);
    }

    return (
      <button
        ref={ref}
        type="button"
        role="switch"
        aria-checked={checked}
        className={cn(
          "peer inline-flex h-[1.125rem] w-[2rem] shrink-0 cursor-pointer items-center rounded-full transition-colors focus-visible:outline-none",
          disabled
            ? checked
              ? "switch-disabled-checked"
              : "switch-disabled"
            : checked
              ? "switch-normal-checked"
              : "switch-normal"
        )}
        disabled={disabled}
        onClick={handleClick}
        {...props}
      >
        <span
          className={cn(
            "pointer-events-none block h-[0.875rem] w-[0.875rem] rounded-full ring-0 transition-transform",
            checked ? "translate-x-[15px]" : "translate-x-[1px]",
            disabled ? "switch-thumb-disabled" : "switch-thumb"
          )}
        />
      </button>
    );
  }
);
Switch.displayName = "Switch";

export default Switch;


================================================
FILE: web/src/refresh-components/inputs/styles.ts
================================================
export type Variants =
  | "primary"
  | "internal"
  | "error"
  | "disabled"
  | "readOnly";

type ClassNamesMap = Record<Variants, string | null>;

export const MIN_WIDTH_CLASS = "min-w-[14rem]";

export const wrapperClasses: ClassNamesMap = {
  primary: "input-normal",
  internal: null,
  error: "input-error",
  disabled: "input-disabled",
  readOnly: "bg-transparent border rounded-08",
} as const;

export const innerClasses: ClassNamesMap = {
  primary:
    "text-text-04 placeholder:!font-secondary-body placeholder:text-text-02",
  internal: null,
  error: null,
  disabled: "text-text-02",
  readOnly: null,
} as const;

export const iconClasses: ClassNamesMap = {
  primary: "stroke-text-03",
  internal: "stroke-text-03",
  error: "stroke-text-03",
  disabled: "stroke-text-01",
  readOnly: "stroke-text-01",
} as const;

export const textClasses: ClassNamesMap = {
  primary: "text-text-04",
  internal: "text-text-04",
  error: "text-text-04",
  disabled: "text-text-01",
  readOnly: "text-text-01",
} as const;


================================================
FILE: web/src/refresh-components/layouts/ConfirmationModalLayout.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { useState } from "react";
import ConfirmationModalLayout from "./ConfirmationModalLayout";
import { SvgAlertTriangle, SvgTrash, SvgCheckCircle } from "@opal/icons";
import { Button } from "@opal/components";

const meta: Meta<typeof ConfirmationModalLayout> = {
  title: "refresh-components/modals/ConfirmationModalLayout",
  component: ConfirmationModalLayout,
  tags: ["autodocs"],
  parameters: {
    layout: "fullscreen",
  },
};

export default meta;
type Story = StoryObj<typeof ConfirmationModalLayout>;

/**
 * NOTE: ConfirmationModalLayout calls `useModalClose` internally, which reads
 * from ModalContext. Outside of that context, it falls back to the `onClose`
 * prop, so these stories work without wrapping in a ModalContext provider.
 */

export const DeleteConfirmation: Story = {
  render: () => {
    const [open, setOpen] = useState(true);
    return (
      <>
        <button onClick={() => setOpen(true)}>Open Modal</button>
        {open && (
          <ConfirmationModalLayout
            icon={SvgTrash}
            title="Delete Item"
            description="Are you sure you want to delete this item? This action cannot be undone."
            submit={
              <Button variant="danger" onClick={() => setOpen(false)}>
                Delete
              </Button>
            }
            onClose={() => setOpen(false)}
          />
        )}
      </>
    );
  },
};

export const WarningConfirmation: Story = {
  render: () => {
    const [open, setOpen] = useState(true);
    return (
      <>
        <button onClick={() => setOpen(true)}>Open Modal</button>
        {open && (
          <ConfirmationModalLayout
            icon={SvgAlertTriangle}
            title="Proceed with Caution"
            description="This operation will affect all users in the organization."
            submit={<Button onClick={() => setOpen(false)}>Confirm</Button>}
            onClose={() => setOpen(false)}
          />
        )}
      </>
    );
  },
};

export const WithChildren: Story = {
  render: () => {
    const [open, setOpen] = useState(true);
    return (
      <>
        <button onClick={() => setOpen(true)}>Open Modal</button>
        {open && (
          <ConfirmationModalLayout
            icon={SvgCheckCircle}
            title="Review Changes"
            description="Please review the following changes before confirming."
            submit={<Button onClick={() => setOpen(false)}>Approve</Button>}
            onClose={() => setOpen(false)}
          >
            <ul style={{ listStyle: "disc", paddingLeft: 20 }}>
              <li>Updated email notification settings</li>
              <li>Changed default connector timeout to 30s</li>
              <li>Enabled automatic document syncing</li>
            </ul>
          </ConfirmationModalLayout>
        )}
      </>
    );
  },
};

export const HiddenCancel: Story = {
  render: () => {
    const [open, setOpen] = useState(true);
    return (
      <>
        <button onClick={() => setOpen(true)}>Open Modal</button>
        {open && (
          <ConfirmationModalLayout
            icon={SvgCheckCircle}
            title="Welcome!"
            description="Thanks for signing up. Let's get you started."
            hideCancel
            submit={<Button onClick={() => setOpen(false)}>Get Started</Button>}
            onClose={() => setOpen(false)}
          />
        )}
      </>
    );
  },
};


================================================
FILE: web/src/refresh-components/layouts/ConfirmationModalLayout.tsx
================================================
"use client";

import React from "react";
import type { IconProps } from "@opal/types";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import Modal from "@/refresh-components/Modal";
import { useModalClose } from "../contexts/ModalContext";

export interface ConfirmationModalProps {
  icon: React.FunctionComponent<IconProps>;
  title: string;
  description?: string;
  children?: React.ReactNode;

  submit: React.ReactNode;
  hideCancel?: boolean;
  onClose?: () => void;
  /** If false, removes the gray background from the body. Defaults to true. */
  twoTone?: boolean;
}

export default function ConfirmationModalLayout({
  icon,
  title,
  description,
  children,

  submit,
  hideCancel,
  onClose: externalOnClose,
  twoTone = true,
}: ConfirmationModalProps) {
  const onClose = useModalClose(externalOnClose);

  return (
    <Modal open onOpenChange={(isOpen) => !isOpen && onClose?.()}>
      <Modal.Content width="sm">
        <Modal.Header
          icon={icon}
          title={title}
          description={description}
          onClose={onClose}
        />
        <Modal.Body twoTone={twoTone}>
          {typeof children === "string" ? (
            <Text as="p" text03>
              {children}
            </Text>
          ) : (
            children
          )}
        </Modal.Body>
        <Modal.Footer>
          {!hideCancel && (
            <Button prominence="secondary" onClick={onClose}>
              Cancel
            </Button>
          )}
          {submit}
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/refresh-components/loaders/SimpleLoader.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import SimpleLoader from "./SimpleLoader";

const meta: Meta<typeof SimpleLoader> = {
  title: "refresh-components/loaders/SimpleLoader",
  component: SimpleLoader,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof SimpleLoader>;

export const Default: Story = {
  args: {},
};

export const Large: Story = {
  args: {
    className: "h-8 w-8",
  },
};

export const CustomColor: Story = {
  args: {
    className: "h-6 w-6 stroke-text-05",
  },
};


================================================
FILE: web/src/refresh-components/loaders/SimpleLoader.tsx
================================================
import type { IconProps } from "@opal/types";
import { cn } from "@/lib/utils";
import { SvgLoader } from "@opal/icons";

export default function SimpleLoader({ className, ...props }: IconProps) {
  return (
    <SvgLoader
      className={cn("h-[1rem] w-[1rem] animate-spin", className)}
      {...props}
    />
  );
}


================================================
FILE: web/src/refresh-components/messages/FieldMessage.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import { FieldMessage } from "./FieldMessage";

const meta: Meta<typeof FieldMessage> = {
  title: "refresh-components/messages/FieldMessage",
  component: FieldMessage,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof FieldMessage>;

export const Error: Story = {
  args: {
    variant: "error",
    children: (
      <FieldMessage.Content>This field is required.</FieldMessage.Content>
    ),
  },
};

export const Success: Story = {
  args: {
    variant: "success",
    children: (
      <FieldMessage.Content>Username is available!</FieldMessage.Content>
    ),
  },
};

export const Warning: Story = {
  args: {
    variant: "warning",
    children: (
      <FieldMessage.Content>This action cannot be undone.</FieldMessage.Content>
    ),
  },
};

export const Loading: Story = {
  args: {
    variant: "loading",
    children: (
      <FieldMessage.Content>Checking availability...</FieldMessage.Content>
    ),
  },
};

export const Info: Story = {
  args: {
    variant: "info",
    children: (
      <FieldMessage.Content>
        Passwords must be at least 8 characters.
      </FieldMessage.Content>
    ),
  },
};

export const Idle: Story = {
  args: {
    variant: "idle",
    children: (
      <FieldMessage.Content>Enter your email address.</FieldMessage.Content>
    ),
  },
};

export const AllVariants: Story = {
  name: "All Variants",
  render: () => (
    <div style={{ display: "flex", flexDirection: "column", gap: 12 }}>
      <FieldMessage variant="error">
        <FieldMessage.Content>Error message</FieldMessage.Content>
      </FieldMessage>
      <FieldMessage variant="success">
        <FieldMessage.Content>Success message</FieldMessage.Content>
      </FieldMessage>
      <FieldMessage variant="warning">
        <FieldMessage.Content>Warning message</FieldMessage.Content>
      </FieldMessage>
      <FieldMessage variant="loading">
        <FieldMessage.Content>Loading message</FieldMessage.Content>
      </FieldMessage>
      <FieldMessage variant="info">
        <FieldMessage.Content>Info message</FieldMessage.Content>
      </FieldMessage>
      <FieldMessage variant="idle">
        <FieldMessage.Content>Idle message</FieldMessage.Content>
      </FieldMessage>
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/messages/FieldMessage.tsx
================================================
import { cn } from "@/lib/utils";
import React from "react";
import Text from "../texts/Text";
import {
  SvgAlertCircle,
  SvgCheckCircle,
  SvgLoader,
  SvgXOctagon,
} from "@opal/icons";
type MessageVariant =
  | "error"
  | "success"
  | "loading"
  | "warning"
  | "info"
  | "idle";

const iconMap: Record<MessageVariant, React.ReactNode> = {
  error: <SvgXOctagon className="h-3 w-3 stroke-status-error-05" />,
  success: <SvgCheckCircle className="h-3 w-3 stroke-status-success-05" />,
  loading: <SvgLoader className="h-3 w-3 stroke-text-02 animate-spin" />,
  warning: <SvgAlertCircle className="h-3 w-3 stroke-status-warning-05" />,
  info: <SvgAlertCircle className="h-3 w-3 stroke-text-03" />,
  idle: null,
};

interface FieldMessageRootProps extends React.HTMLAttributes<HTMLDivElement> {
  variant: MessageVariant;
  children: React.ReactNode;
}

const FieldMessageRoot: React.FC<FieldMessageRootProps> = ({
  variant,
  className,
  children,
  ...props
}) => {
  const icon = iconMap[variant];

  return (
    <div
      className={cn("flex flex-row items-center gap-x-0.5", className)}
      {...props}
    >
      {icon !== null && (
        <div className="w-4 h-4 flex items-center justify-center">{icon}</div>
      )}
      {children}
    </div>
  );
};

interface FieldMessageContentProps
  extends React.HTMLAttributes<HTMLParagraphElement> {
  children: React.ReactNode;
}

const FieldMessageContent: React.FC<FieldMessageContentProps> = ({
  className,
  children,
  ...props
}) => {
  return (
    <Text
      as="p"
      text03
      secondaryBody
      className={cn("ml-0.5", className)}
      {...props}
    >
      {children}
    </Text>
  );
};

export const FieldMessage = Object.assign(FieldMessageRoot, {
  Content: FieldMessageContent,
});


================================================
FILE: web/src/refresh-components/messages/InfoBlock.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import InfoBlock from "./InfoBlock";
import { SvgAlertCircle, SvgCheckCircle, SvgSettings } from "@opal/icons";

const meta: Meta<typeof InfoBlock> = {
  title: "refresh-components/messages/InfoBlock",
  component: InfoBlock,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof InfoBlock>;

export const Default: Story = {
  args: {
    icon: SvgAlertCircle,
    title: "Important Notice",
    description: "This is a description providing additional context.",
  },
};

export const TitleOnly: Story = {
  args: {
    icon: SvgCheckCircle,
    title: "All systems operational",
  },
};

export const WithCustomIcon: Story = {
  args: {
    icon: SvgSettings,
    title: "Configuration Required",
    description: "Please update your settings before continuing.",
  },
};

export const LongContent: Story = {
  args: {
    icon: SvgAlertCircle,
    title:
      "This is a very long title that should get truncated when it exceeds the available width",
    description:
      "And this is a very long description that provides detailed context about the situation at hand and should also truncate gracefully.",
  },
};


================================================
FILE: web/src/refresh-components/messages/InfoBlock.tsx
================================================
"use client";

import React, { memo } from "react";
import { cn } from "@/lib/utils";
import type { IconProps } from "@opal/types";
import Truncated from "@/refresh-components/texts/Truncated";

export interface InfoBlockProps extends React.HTMLAttributes<HTMLDivElement> {
  icon: React.FunctionComponent<IconProps>;
  title: string;
  description?: string;
  iconClassName?: string;
}

const InfoBlockInner = React.forwardRef<HTMLDivElement, InfoBlockProps>(
  (
    { icon: Icon, title, description, iconClassName, className, ...props },
    ref
  ) => {
    return (
      <div
        ref={ref}
        className={cn("flex flex-row items-start gap-1", className)}
        {...props}
      >
        {/* Icon Container */}
        <div className="flex items-center justify-center p-0.5 size-5 shrink-0">
          <Icon className={cn("size-4 stroke-text-02", iconClassName)} />
        </div>

        {/* Text Content */}
        <div className="flex flex-col flex-1 items-start min-w-0">
          <Truncated mainUiAction text04>
            {title}
          </Truncated>
          {description && (
            <Truncated secondaryBody text03>
              {description}
            </Truncated>
          )}
        </div>
      </div>
    );
  }
);
const InfoBlock = memo(InfoBlockInner);
InfoBlock.displayName = "InfoBlock";

export default InfoBlock;


================================================
FILE: web/src/refresh-components/messages/Message.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Message from "./Message";

const meta: Meta<typeof Message> = {
  title: "refresh-components/messages/Message",
  component: Message,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Message>;

export const Default: Story = {
  args: {
    text: "This is a default message.",
  },
};

export const FlashInfo: Story = {
  args: {
    flash: true,
    info: true,
    text: "Your changes have been saved.",
    description: "The settings will take effect immediately.",
  },
};

export const FlashSuccess: Story = {
  args: {
    flash: true,
    success: true,
    text: "Operation completed successfully!",
  },
};

export const FlashWarning: Story = {
  args: {
    flash: true,
    warning: true,
    text: "Your session is about to expire.",
    description: "Please save your work before the session ends.",
  },
};

export const FlashError: Story = {
  args: {
    flash: true,
    error: true,
    text: "Something went wrong.",
    description: "Please try again or contact support.",
  },
};

export const StaticInfo: Story = {
  args: {
    static: true,
    info: true,
    text: "This is informational.",
    description: "Here is some extra context.",
  },
};

export const StaticSuccess: Story = {
  args: {
    static: true,
    success: true,
    text: "All checks passed.",
  },
};

export const StaticWarning: Story = {
  args: {
    static: true,
    warning: true,
    text: "Proceed with caution.",
  },
};

export const StaticError: Story = {
  args: {
    static: true,
    error: true,
    text: "Failed to load resource.",
  },
};

export const MediumSize: Story = {
  args: {
    flash: true,
    info: true,
    medium: true,
    text: "Medium sized message.",
    description: "Compact layout for tight spaces.",
  },
};

export const WithAction: Story = {
  args: {
    flash: true,
    warning: true,
    text: "Unsaved changes detected.",
    actions: "Undo",
    onAction: () => alert("Action clicked"),
  },
};

export const WithoutIcon: Story = {
  args: {
    flash: true,
    info: true,
    icon: false,
    text: "Message without an icon.",
  },
};

export const WithoutCloseButton: Story = {
  args: {
    flash: true,
    success: true,
    close: false,
    text: "This message cannot be dismissed.",
  },
};

export const AllLevels: Story = {
  name: "All Levels (Flash / Large)",
  render: () => (
    <div style={{ display: "flex", flexDirection: "column", gap: 16 }}>
      <Message flash default text="Default flash message" />
      <Message flash info text="Info flash message" />
      <Message flash success text="Success flash message" />
      <Message flash warning text="Warning flash message" />
      <Message flash error text="Error flash message" />
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/messages/Message.tsx
================================================
"use client";

import React, { useMemo } from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import {
  SvgAlertCircle,
  SvgAlertTriangle,
  SvgCheckCircle,
  SvgX,
  SvgXOctagon,
} from "@opal/icons";
import type { IconFunctionComponent } from "@opal/types";

const containerClasses = {
  flash: {
    default: {
      large: [
        "bg-background-neutral-00",
        "shadow-02",
        "rounded-16",
        "w-[40rem]",
      ],
      medium: [
        "bg-background-neutral-00",
        "shadow-02",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
    info: {
      large: [
        "bg-status-info-00",
        "border",
        "border-status-info-05",
        "rounded-16",
        "w-[40rem]",
      ],
      medium: [
        "bg-status-info-00",
        "border",
        "border-status-info-02",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
    success: {
      large: [
        "bg-status-success-00",
        "border",
        "border-status-success-05",
        "rounded-16",
        "w-[40rem]",
      ],
      medium: [
        "bg-status-success-00",
        "border",
        "border-status-success-02",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
    warning: {
      large: [
        "bg-status-warning-00",
        "border",
        "border-status-warning-05",
        "rounded-16",
        "w-[40rem]",
      ],
      medium: [
        "bg-status-warning-00",
        "border",
        "border-status-warning-02",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
    error: {
      large: [
        "bg-status-error-00",
        "border",
        "border-status-error-05",
        "rounded-16",
        "w-[40rem]",
      ],
      medium: [
        "bg-status-error-00",
        "border",
        "border-status-error-02",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
  },
  static: {
    default: {
      large: [
        "bg-background-tint-01",
        "border",
        "border-border-01",
        "rounded-16",
        "w-[19.375rem]",
      ],
      medium: [
        "bg-background-tint-01",
        "border",
        "border-border-01",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
    info: {
      large: [
        "bg-status-info-00",
        "border",
        "border-status-info-02",
        "rounded-16",
        "w-[19.375rem]",
      ],
      medium: [
        "bg-status-info-00",
        "border",
        "border-status-info-02",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
    success: {
      large: [
        "bg-status-success-00",
        "border",
        "border-status-success-02",
        "rounded-16",
        "w-[19.375rem]",
      ],
      medium: [
        "bg-status-success-00",
        "border",
        "border-status-success-02",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
    warning: {
      large: [
        "bg-status-warning-00",
        "border",
        "border-status-warning-02",
        "rounded-16",
        "w-[19.375rem]",
      ],
      medium: [
        "bg-status-warning-00",
        "border",
        "border-status-warning-02",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
    error: {
      large: [
        "bg-status-error-00",
        "border",
        "border-status-error-02",
        "rounded-16",
        "w-[19.375rem]",
      ],
      medium: [
        "bg-status-error-00",
        "border",
        "border-status-error-02",
        "rounded-12",
        "w-[19.375rem]",
      ],
    },
  },
} as const;

const iconClasses = {
  default: "stroke-text-03",
  info: "stroke-status-info-05",
  success: "stroke-status-success-05",
  warning: "stroke-status-warning-05",
  error: "stroke-status-error-05",
} as const;

const textClasses = {
  flash: {
    text: "font-main-ui-action text-text-04",
    description: "font-secondary-body text-text-02",
  },
  static: {
    text: "font-main-ui-body text-text-04",
    description: "font-secondary-body text-text-02",
  },
} as const;

export interface MessageProps extends React.HTMLAttributes<HTMLDivElement> {
  // Type variants:
  flash?: boolean;
  static?: boolean;

  // Level variants:
  default?: boolean;
  info?: boolean;
  success?: boolean;
  warning?: boolean;
  error?: boolean;

  // Size variants:
  large?: boolean;
  medium?: boolean;

  // Content:
  text: string;
  description?: string;

  // Features:
  icon?: boolean;
  iconComponent?: IconFunctionComponent;
  actions?: boolean | string;
  close?: boolean;

  // Action button customization:
  actionIcon?: IconFunctionComponent;
  actionPrimary?: boolean;

  // Callbacks:
  onClose?: () => void;
  onAction?: () => void;
}

function MessageInner(
  {
    flash,
    static: staticProp,

    default: defaultProp,
    info,
    success,
    warning,
    error,

    large,
    medium,

    text,
    description,

    icon = true,
    iconComponent,
    actions,
    close = true,

    actionIcon,
    actionPrimary,

    onClose,
    onAction,

    className,
    ...props
  }: MessageProps,
  ref: React.ForwardedRef<HTMLDivElement>
) {
  const type = flash ? "flash" : staticProp ? "static" : "flash";
  const level = info
    ? "info"
    : success
      ? "success"
      : warning
        ? "warning"
        : error
          ? "error"
          : defaultProp
            ? "default"
            : "default";
  const size = large ? "large" : medium ? "medium" : "large";

  const containerClass = useMemo(
    () => containerClasses[type][level][size],
    [type, level, size]
  );

  const iconClass = useMemo(() => iconClasses[level], [level]);

  const textClass = useMemo(() => textClasses[type].text, [type]);
  const descriptionClass = useMemo(() => textClasses[type].description, [type]);

  const IconComponent = iconComponent
    ? iconComponent
    : level === "success"
      ? SvgCheckCircle
      : level === "warning"
        ? SvgAlertTriangle
        : level === "error"
          ? SvgXOctagon
          : SvgAlertCircle;

  const contentPadding = size === "large" ? "p-2" : "p-1";
  const closeButtonSize =
    size === "large" ? "size-[2.25rem]" : "size-[1.75rem]";

  return (
    <div
      ref={ref}
      className={cn(
        "flex flex-row items-start gap-1 p-1",
        containerClass,
        className
      )}
      {...props}
    >
      {/* Content Container */}
      <div
        className={cn(
          "flex flex-1 flex-row items-start gap-1 min-w-0",
          contentPadding
        )}
      >
        {/* Icon Container */}
        {icon && (
          <div className="flex items-center justify-center p-0.5 size-[1.25rem] shrink-0">
            <IconComponent className={cn("size-[1rem]", iconClass)} />
          </div>
        )}

        {/* Text Content */}
        <div className="flex flex-col flex-1 items-start min-w-0 px-0.5">
          <Text as="p" className={cn("w-full", textClass)}>
            {text}
          </Text>
          {description && (
            <Text as="p" className={cn("w-full", descriptionClass)}>
              {description}
            </Text>
          )}
        </div>
      </div>

      {/* Actions */}
      {actions && (
        <div className="flex items-center justify-end shrink-0 self-center pr-2">
          <Button
            prominence={actionPrimary ? "primary" : "secondary"}
            icon={actionIcon}
            onClick={onAction}
            size={size === "large" ? "lg" : "md"}
          >
            {typeof actions === "string" ? actions : "Cancel"}
          </Button>
        </div>
      )}

      {/* Close Container */}
      {close && (
        <div className="flex items-center justify-center shrink-0">
          <div className={cn("flex items-start", closeButtonSize)}>
            <Button
              prominence="internal"
              icon={SvgX}
              onClick={onClose}
              aria-label="Close"
              size={size === "large" ? "lg" : "sm"}
            />
          </div>
        </div>
      )}
    </div>
  );
}

const Message = React.forwardRef<HTMLDivElement, MessageProps>(MessageInner);
Message.displayName = "Message";

export default Message;


================================================
FILE: web/src/refresh-components/modals/MemoriesModal.tsx
================================================
"use client";

import { Fragment, useState, useRef, useEffect, useCallback } from "react";
import Modal from "@/refresh-components/Modal";
import { Section } from "@/layouts/general-layouts";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import InputTextArea from "@/refresh-components/inputs/InputTextArea";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import CharacterCount from "@/refresh-components/CharacterCount";
import Separator from "@/refresh-components/Separator";
import TextSeparator from "@/refresh-components/TextSeparator";
import { toast } from "@/hooks/useToast";
import { useModalClose } from "@/refresh-components/contexts/ModalContext";
import { SvgAddLines, SvgMinusCircle, SvgPlusCircle } from "@opal/icons";
import {
  useMemoryManager,
  MAX_MEMORY_LENGTH,
  MAX_MEMORY_COUNT,
  LocalMemory,
} from "@/hooks/useMemoryManager";
import { cn } from "@/lib/utils";
import { useUser } from "@/providers/UserProvider";
import useUserPersonalization from "@/hooks/useUserPersonalization";
import type { MemoryItem } from "@/lib/types";

interface MemoryItemProps {
  memory: LocalMemory;
  originalIndex: number;
  onUpdate: (index: number, value: string) => void;
  onBlur: (index: number) => void;
  onRemove: (index: number) => void;
  shouldFocus?: boolean;
  onFocused?: () => void;
  shouldHighlight?: boolean;
  onHighlighted?: () => void;
}

function MemoryItem({
  memory,
  originalIndex,
  onUpdate,
  onBlur,
  onRemove,
  shouldFocus,
  onFocused,
  shouldHighlight,
  onHighlighted,
}: MemoryItemProps) {
  const [isFocused, setIsFocused] = useState(false);
  const [isHighlighting, setIsHighlighting] = useState(false);
  const textareaRef = useRef<HTMLTextAreaElement>(null);
  const wrapperRef = useRef<HTMLDivElement>(null);

  useEffect(() => {
    if (shouldFocus && textareaRef.current) {
      const el = textareaRef.current;
      el.focus();
      el.selectionStart = el.selectionEnd = el.value.length;
      onFocused?.();
    }
  }, [shouldFocus, onFocused]);

  useEffect(() => {
    if (!shouldHighlight) return;

    wrapperRef.current?.scrollIntoView({
      block: "start",
      behavior: "smooth",
    });
    setIsHighlighting(true);

    const timer = setTimeout(() => {
      setIsHighlighting(false);
      onHighlighted?.();
    }, 1000);

    return () => clearTimeout(timer);
  }, [shouldHighlight, onHighlighted]);

  return (
    <div
      ref={wrapperRef}
      className={cn(
        "rounded-08 w-full p-0.5 border border-transparent",
        "transition-colors ",
        isHighlighting &&
          "bg-action-link-01 hover:bg-action-link-01 border-action-link-05 duration-700"
      )}
    >
      <Section gap={0.25} alignItems="start">
        <Section flexDirection="row" alignItems="start" gap={0.5}>
          <InputTextArea
            ref={textareaRef}
            placeholder="Type or paste in a personal note or memory"
            value={memory.content}
            onChange={(e) => onUpdate(originalIndex, e.target.value)}
            onFocus={() => setIsFocused(true)}
            onBlur={() => {
              setIsFocused(false);
              void onBlur(originalIndex);
            }}
            onKeyDown={(e) => {
              if (
                e.key === "Enter" &&
                !e.shiftKey &&
                !e.nativeEvent.isComposing
              ) {
                e.preventDefault();
                textareaRef.current?.blur();
              }
            }}
            rows={1}
            autoResize
            maxRows={3}
            maxLength={MAX_MEMORY_LENGTH}
            resizable={false}
            className="bg-background-tint-01 hover:bg-background-tint-00 focus-within:bg-background-tint-00"
          />
          <Button
            disabled={!memory.content.trim() && memory.isNew}
            prominence="tertiary"
            icon={SvgMinusCircle}
            onClick={() => void onRemove(originalIndex)}
            aria-label="Remove Line"
            tooltip="Remove Line"
          />
        </Section>
        <div
          className={isFocused ? "visible" : "invisible h-0 overflow-hidden"}
        >
          <CharacterCount value={memory.content} limit={MAX_MEMORY_LENGTH} />
        </div>
      </Section>
    </div>
  );
}

function resolveTargetMemoryId(
  targetMemoryId: number | null | undefined,
  targetIndex: number | null | undefined,
  memories: MemoryItem[]
): number | null {
  if (targetMemoryId != null) return targetMemoryId;

  if (targetIndex != null && memories.length > 0) {
    // Backend index is ASC (oldest-first), frontend displays DESC (newest-first)
    const descIdx = memories.length - 1 - targetIndex;
    return memories[descIdx]?.id ?? null;
  }

  return null;
}

interface MemoriesModalProps {
  memories?: MemoryItem[];
  onSaveMemories?: (memories: MemoryItem[]) => Promise<boolean>;
  onClose?: () => void;
  initialTargetMemoryId?: number | null;
  initialTargetIndex?: number | null;
  highlightOnOpen?: boolean;
  focusNewLine?: boolean;
}

export default function MemoriesModal({
  memories: memoriesProp,
  onSaveMemories: onSaveMemoriesProp,
  onClose,
  initialTargetMemoryId,
  initialTargetIndex,
  highlightOnOpen = false,
  focusNewLine = false,
}: MemoriesModalProps) {
  const close = useModalClose(onClose);
  const [focusMemoryId, setFocusMemoryId] = useState<number | null>(null);

  // Self-fetching: when no props provided, fetch from UserProvider
  const { user, refreshUser, updateUserPersonalization } = useUser();
  const { handleSavePersonalization } = useUserPersonalization(
    user,
    updateUserPersonalization,
    {
      onSuccess: () => toast.success("Preferences saved"),
      onError: () => toast.error("Failed to save preferences"),
    }
  );

  useEffect(() => {
    if (memoriesProp === undefined) {
      void refreshUser();
    }
    // Only run on mount
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, []);

  const internalSaveMemories = useCallback(
    async (newMemories: MemoryItem[]): Promise<boolean> => {
      const result = await handleSavePersonalization(
        { memories: newMemories },
        true
      );
      return !!result;
    },
    [handleSavePersonalization]
  );

  const effectiveMemories =
    memoriesProp ?? user?.personalization?.memories ?? [];
  const effectiveSave = onSaveMemoriesProp ?? internalSaveMemories;

  // Drives scroll-into-view + highlight when opening from a FileTile click
  const [highlightMemoryId, setHighlightMemoryId] = useState<number | null>(
    null
  );

  useEffect(() => {
    const targetId = resolveTargetMemoryId(
      initialTargetMemoryId,
      initialTargetIndex,
      effectiveMemories
    );
    if (targetId == null) return;

    setFocusMemoryId(targetId);
    if (highlightOnOpen) {
      setHighlightMemoryId(targetId);
    }
  }, [initialTargetMemoryId, initialTargetIndex]);

  const {
    searchQuery,
    setSearchQuery,
    filteredMemories,
    totalLineCount,
    canAddMemory,
    handleAddMemory,
    handleUpdateMemory,
    handleRemoveMemory,
    handleBlurMemory,
  } = useMemoryManager({
    memories: effectiveMemories,
    onSaveMemories: effectiveSave,
    onNotify: (message, type) => toast[type](message),
  });

  // Always start with an empty card; optionally focus it (View/Add button)
  const hasAddedEmptyRef = useRef(false);
  useEffect(() => {
    if (hasAddedEmptyRef.current) return;
    hasAddedEmptyRef.current = true;

    const id = handleAddMemory();
    if (id !== null && focusNewLine) {
      setFocusMemoryId(id);
    }
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, []);

  const onAddLine = () => {
    const id = handleAddMemory();
    if (id !== null) {
      setFocusMemoryId(id);
    }
  };

  return (
    <Modal open onOpenChange={(open) => !open && close?.()}>
      <Modal.Content width="sm" height="lg" position="top">
        <Modal.Header
          icon={SvgAddLines}
          title="Memory"
          description="Let Onyx reference these stored notes and memories in chats."
          onClose={close}
        >
          <Section flexDirection="row" gap={0.5}>
            <InputTypeIn
              placeholder="Search..."
              value={searchQuery}
              onChange={(e) => setSearchQuery(e.target.value)}
              leftSearchIcon
              showClearButton={false}
              className="w-full !bg-transparent !border-transparent [&:is(:hover,:active,:focus,:focus-within)]:!bg-background-neutral-00 [&:is(:hover)]:!border-border-01 [&:is(:focus,:focus-within)]:!shadow-none"
            />
            <Button
              disabled={!canAddMemory}
              prominence="tertiary"
              onClick={onAddLine}
              rightIcon={SvgPlusCircle}
              title={
                !canAddMemory
                  ? `Maximum of ${MAX_MEMORY_COUNT} memories reached`
                  : undefined
              }
            >
              Add Line
            </Button>
          </Section>
        </Modal.Header>

        <Modal.Body padding={0.5}>
          {filteredMemories.length === 0 ? (
            <Section alignItems="center" padding={2}>
              <Text secondaryBody text03>
                {searchQuery.trim()
                  ? "No memories match your search."
                  : 'No memories yet. Click "Add Line" to get started.'}
              </Text>
            </Section>
          ) : (
            <Section gap={0.5}>
              {filteredMemories.map(({ memory, originalIndex }) => (
                <Fragment key={memory.id}>
                  <MemoryItem
                    memory={memory}
                    originalIndex={originalIndex}
                    onUpdate={handleUpdateMemory}
                    onBlur={handleBlurMemory}
                    onRemove={handleRemoveMemory}
                    shouldFocus={memory.id === focusMemoryId}
                    onFocused={() => setFocusMemoryId(null)}
                    shouldHighlight={memory.id === highlightMemoryId}
                    onHighlighted={() => {
                      setHighlightMemoryId(null);
                    }}
                  />
                  {memory.isNew && <Separator noPadding />}
                </Fragment>
              ))}
            </Section>
          )}
          <TextSeparator
            count={totalLineCount}
            text={totalLineCount === 1 ? "Line" : "Lines"}
          />
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/refresh-components/popovers/ActionsPopover/ActionLineItem.tsx
================================================
"use client";

import React from "react";
import { SEARCH_TOOL_ID } from "@/app/app/components/tools/constants";
import { ToolSnapshot } from "@/lib/tools/interfaces";
import { getIconForAction } from "@/app/app/services/actionUtils";
import { ToolAuthStatus } from "@/lib/hooks/useToolOAuthStatus";
import LineItem from "@/refresh-components/buttons/LineItem";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import IconButton from "@/refresh-components/buttons/IconButton";
import { Button } from "@opal/components";
import { cn, noProp } from "@/lib/utils";
import type { IconProps } from "@opal/types";
import { SvgChevronRight, SvgKey, SvgSettings, SvgSlash } from "@opal/icons";
import { useProjectsContext } from "@/providers/ProjectsContext";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import EnabledCount from "@/refresh-components/EnabledCount";
import { Section } from "@/layouts/general-layouts";

export interface ActionItemProps {
  tool?: ToolSnapshot;
  Icon?: React.FunctionComponent<IconProps>;
  label?: string;
  disabled: boolean;
  isForced: boolean;
  isUnavailable?: boolean;
  tooltip?: string;
  showAdminConfigure?: boolean;
  adminConfigureHref?: string;
  adminConfigureTooltip?: string;
  onToggle: () => void;
  onForceToggle: () => void;
  onSourceManagementOpen?: () => void;
  hasNoConnectors?: boolean;
  toolAuthStatus?: ToolAuthStatus;
  onOAuthAuthenticate?: () => void;
  onClose?: () => void;
  // Source counts for internal search tool
  sourceCounts?: { enabled: number; total: number };
}

export default function ActionLineItem({
  tool,
  Icon: ProvidedIcon,
  label: providedLabel,
  disabled,
  isForced,
  isUnavailable = false,
  tooltip,
  showAdminConfigure = false,
  adminConfigureHref,
  adminConfigureTooltip = "Configure",
  onToggle,
  onForceToggle,
  onSourceManagementOpen,
  hasNoConnectors = false,
  toolAuthStatus,
  onOAuthAuthenticate,
  onClose,
  sourceCounts,
}: ActionItemProps) {
  const router = useRouter();
  const { currentProjectId } = useProjectsContext();

  const Icon = tool ? getIconForAction(tool) : ProvidedIcon!;
  const toolName = tool?.name || providedLabel || "";

  let label = tool ? tool.display_name || tool.name : providedLabel!;
  if (!!currentProjectId && tool?.in_code_tool_id === SEARCH_TOOL_ID) {
    label = "Project Search";
  }

  const isSearchToolWithNoConnectors =
    !currentProjectId &&
    tool?.in_code_tool_id === SEARCH_TOOL_ID &&
    hasNoConnectors;

  const isSearchToolAndNotInProject =
    tool?.in_code_tool_id === SEARCH_TOOL_ID && !currentProjectId;

  // Show source count when: internal search is pinned, has some (but not all) sources enabled
  const shouldShowSourceCount =
    isSearchToolAndNotInProject &&
    !isSearchToolWithNoConnectors &&
    isForced &&
    sourceCounts &&
    sourceCounts.enabled > 0 &&
    sourceCounts.enabled < sourceCounts.total;

  const tooltipText = tooltip || tool?.description;

  return (
    <SimpleTooltip tooltip={tooltipText} className="max-w-[30rem]">
      <LineItem
        data-testid={`tool-option-${toolName}`}
        onClick={() => {
          if (isUnavailable) {
            onForceToggle();
            return;
          }
          if (disabled) onToggle();
          onForceToggle();
          if (isSearchToolAndNotInProject && !isForced)
            onSourceManagementOpen?.();
          else onClose?.();
        }}
        selected={isForced}
        disabled={isSearchToolWithNoConnectors || (isUnavailable && !isForced)}
        muted={isUnavailable && isForced}
        strikethrough={disabled}
        icon={Icon}
        rightChildren={
          <Section gap={0.25} flexDirection="row">
            {!isUnavailable && tool?.oauth_config_id && toolAuthStatus && (
              <Button
                icon={SvgKey}
                prominence="secondary"
                size="sm"
                onClick={noProp(() => {
                  if (
                    !toolAuthStatus.hasToken ||
                    toolAuthStatus.isTokenExpired
                  ) {
                    onOAuthAuthenticate?.();
                  }
                })}
              />
            )}

            {!isSearchToolWithNoConnectors && !isUnavailable && (
              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
              <IconButton
                icon={SvgSlash}
                onClick={noProp(onToggle)}
                internal
                aria-label={disabled ? "Enable" : "Disable"}
                className={cn(
                  !disabled && "invisible group-hover/LineItem:visible",
                  // Hide when showing source count (it has its own hover behavior)
                  shouldShowSourceCount && "!hidden"
                )}
                tooltip={disabled ? "Enable" : "Disable"}
              />
            )}

            {isUnavailable && showAdminConfigure && adminConfigureHref && (
              <Button
                icon={SvgSettings}
                onClick={noProp(() => {
                  router.push(adminConfigureHref as Route);
                  onClose?.();
                })}
                prominence="tertiary"
                size="sm"
                tooltip={adminConfigureTooltip}
              />
            )}

            {/* Source count for internal search - show when some but not all sources selected AND tool is pinned */}
            {shouldShowSourceCount && (
              <span className="relative flex items-center whitespace-nowrap">
                {/* Show count normally, disable icon on hover - both in same space */}
                <span className="group-hover/LineItem:invisible">
                  <EnabledCount
                    enabledCount={sourceCounts.enabled}
                    totalCount={sourceCounts.total}
                  />
                </span>
                <span className="absolute inset-0 flex items-center justify-center invisible group-hover/LineItem:visible">
                  <Button
                    icon={SvgSlash}
                    onClick={noProp(onToggle)}
                    prominence="tertiary"
                    size="sm"
                    tooltip={disabled ? "Enable" : "Disable"}
                  />
                </span>
              </span>
            )}

            {isSearchToolAndNotInProject && (
              <Button
                aria-label={
                  isSearchToolWithNoConnectors
                    ? "Add Connectors"
                    : "Configure Connectors"
                }
                icon={
                  isSearchToolWithNoConnectors ? SvgSettings : SvgChevronRight
                }
                onClick={noProp(() => {
                  if (isSearchToolWithNoConnectors)
                    router.push("/admin/add-connector");
                  else onSourceManagementOpen?.();
                })}
                prominence="tertiary"
                size="sm"
                tooltip={
                  isSearchToolWithNoConnectors
                    ? "Add Connectors"
                    : "Configure Connectors"
                }
              />
            )}
          </Section>
        }
      >
        {label}
      </LineItem>
    </SimpleTooltip>
  );
}


================================================
FILE: web/src/refresh-components/popovers/ActionsPopover/MCPLineItem.tsx
================================================
"use client";

import React from "react";
import {
  MCPAuthenticationType,
  MCPAuthenticationPerformer,
  ToolSnapshot,
} from "@/lib/tools/interfaces";
import LineItem from "@/refresh-components/buttons/LineItem";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { cn, noProp } from "@/lib/utils";
import type { IconProps } from "@opal/types";
import {
  SvgCheck,
  SvgChevronRight,
  SvgKey,
  SvgLock,
  SvgServer,
} from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import { Button } from "@opal/components";
import EnabledCount from "@/refresh-components/EnabledCount";

export interface MCPServer {
  id: number;
  name: string;
  owner_email: string;
  server_url: string;
  auth_type: MCPAuthenticationType;
  auth_performer: MCPAuthenticationPerformer;
  is_authenticated: boolean;
  user_authenticated?: boolean;
  auth_template?: any;
  user_credentials?: Record<string, string>;
}

export interface MCPLineItemProps {
  server: MCPServer;
  isActive: boolean;
  onSelect: () => void;
  onAuthenticate: () => void;
  tools: ToolSnapshot[];
  enabledTools: ToolSnapshot[];
  isAuthenticated: boolean;
  isLoading: boolean;
}

export default function MCPLineItem({
  server,
  isActive,
  onSelect,
  onAuthenticate,
  tools,
  enabledTools,
  isAuthenticated,
  isLoading,
}: MCPLineItemProps) {
  const showAuthTrigger =
    server.auth_performer === MCPAuthenticationPerformer.PER_USER &&
    server.auth_type !== MCPAuthenticationType.NONE;

  const canClickIntoServer = isAuthenticated && tools.length > 0;
  const showInlineReauth = showAuthTrigger && canClickIntoServer;
  const showReauthButton = showAuthTrigger && !showInlineReauth;

  function getServerIcon(): React.FunctionComponent<IconProps> {
    if (isLoading) return SimpleLoader;
    if (isAuthenticated) {
      return (({ className }) => (
        <SvgCheck className={cn(className, "stroke-status-success-05")} />
      )) as React.FunctionComponent<IconProps>;
    }
    if (server.auth_type === MCPAuthenticationType.NONE) return SvgServer;
    if (server.auth_performer === MCPAuthenticationPerformer.PER_USER) {
      return (({ className }) => (
        <SvgKey className={cn(className, "stroke-status-warning-05")} />
      )) as React.FunctionComponent<IconProps>;
    }
    return (({ className }) => (
      <SvgLock className={cn(className, "stroke-status-error-05")} />
    )) as React.FunctionComponent<IconProps>;
  }

  const handleClick = noProp(() => {
    if (canClickIntoServer) {
      onSelect();
      return;
    }
    if (showAuthTrigger) {
      onAuthenticate();
    }
  });

  const allToolsDisabled = enabledTools.length === 0 && tools.length > 0;

  return (
    <LineItem
      data-mcp-server-id={server.id}
      data-mcp-server-name={server.name}
      icon={getServerIcon()}
      onClick={handleClick}
      strikethrough={allToolsDisabled}
      selected={isActive}
      rightChildren={
        <Section gap={0.25} flexDirection="row">
          {isAuthenticated &&
            tools.length > 0 &&
            enabledTools.length > 0 &&
            tools.length !== enabledTools.length && (
              <EnabledCount
                enabledCount={enabledTools.length}
                totalCount={tools.length}
              />
            )}
          {canClickIntoServer && (
            <Button
              icon={SvgChevronRight}
              prominence="tertiary"
              size="sm"
              onClick={onSelect}
            />
          )}
          {showReauthButton && (
            <Button
              icon={SvgKey}
              prominence="tertiary"
              size="sm"
              onClick={onAuthenticate}
            />
          )}
        </Section>
      }
    >
      {server.name}
    </LineItem>
  );
}


================================================
FILE: web/src/refresh-components/popovers/ActionsPopover/SwitchList.tsx
================================================
"use client";

import React, { useMemo, useState } from "react";
import { Button } from "@opal/components";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { PopoverMenu } from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import type { IconProps } from "@opal/types";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import Switch from "@/refresh-components/inputs/Switch";
import { SvgChevronLeft, SvgPlug, SvgUnplug } from "@opal/icons";

export interface SwitchListItem {
  id: string;
  label: string;
  description?: string;
  leading?: React.ReactNode;
  isEnabled: boolean;
  onToggle: () => void;
  disabled?: boolean;
  disabledTooltip?: string;
}

export interface SwitchListProps {
  items: SwitchListItem[];
  searchPlaceholder: string;
  allDisabled: boolean;
  onDisableAll: () => void;
  onEnableAll: () => void;
  disableAllLabel: string;
  enableAllLabel: string;
  onBack: () => void;
  footer?: React.ReactNode;
}

export default function SwitchList({
  items,
  searchPlaceholder,
  allDisabled,
  onDisableAll,
  onEnableAll,
  onBack,
  footer,
}: SwitchListProps) {
  const [searchTerm, setSearchTerm] = useState("");
  const filteredItems = useMemo(() => {
    if (!searchTerm) return items;
    const searchLower = searchTerm.toLowerCase();
    return items.filter((item) => {
      return (
        item.label.toLowerCase().includes(searchLower) ||
        (item.description &&
          item.description.toLowerCase().includes(searchLower))
      );
    });
  }, [items, searchTerm]);

  return (
    <PopoverMenu footer={footer}>
      {[
        <div className="flex items-center gap-1" key="search">
          <Button
            icon={SvgChevronLeft}
            prominence="tertiary"
            size="sm"
            aria-label="Back"
            onClick={() => {
              setSearchTerm("");
              onBack();
            }}
          />
          <InputTypeIn
            variant="internal"
            placeholder={searchPlaceholder}
            value={searchTerm}
            onChange={(e) => setSearchTerm(e.target.value)}
            autoFocus
          />
        </div>,

        <LineItem
          key="enable-disable-all"
          icon={allDisabled ? SvgPlug : SvgUnplug}
          onClick={allDisabled ? onEnableAll : onDisableAll}
        >
          {allDisabled ? "Enable All" : "Disable All"}
        </LineItem>,

        ...filteredItems.map((item) => {
          const tooltip = item.disabled
            ? item.disabledTooltip
            : item.description;
          return (
            <SimpleTooltip
              key={item.id}
              tooltip={tooltip}
              className="max-w-[30rem]"
            >
              <LineItem
                icon={
                  item.leading
                    ? ((() =>
                        item.leading) as React.FunctionComponent<IconProps>)
                    : undefined
                }
                rightChildren={
                  <Switch
                    checked={item.isEnabled}
                    onCheckedChange={item.onToggle}
                    aria-label={`Toggle ${item.label}`}
                    disabled={item.disabled}
                  />
                }
              >
                {item.label}
              </LineItem>
            </SimpleTooltip>
          );
        }),
      ]}
    </PopoverMenu>
  );
}


================================================
FILE: web/src/refresh-components/popovers/ActionsPopover/index.tsx
================================================
"use client";

import {
  FILE_READER_TOOL_ID,
  IMAGE_GENERATION_TOOL_ID,
  PYTHON_TOOL_ID,
  SEARCH_TOOL_ID,
  WEB_SEARCH_TOOL_ID,
} from "@/app/app/components/tools/constants";
import { useState, useEffect, useMemo, useCallback, useRef } from "react";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import SwitchList, {
  SwitchListItem,
} from "@/refresh-components/popovers/ActionsPopover/SwitchList";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import {
  MCPAuthenticationType,
  MCPAuthenticationPerformer,
  ToolSnapshot,
} from "@/lib/tools/interfaces";
import { useForcedTools } from "@/lib/hooks/useForcedTools";
import useAgentPreferences from "@/hooks/useAgentPreferences";
import { useUser } from "@/providers/UserProvider";
import { FilterManager, useSourcePreferences } from "@/lib/hooks";
import { listSourceMetadata } from "@/lib/sources";
import MCPApiKeyModal from "@/components/chat/MCPApiKeyModal";
import { ValidSources } from "@/lib/types";
import { SourceMetadata } from "@/lib/search/interfaces";
import { SourceIcon } from "@/components/SourceIcon";
import { useAvailableTools } from "@/hooks/useAvailableTools";
import useCCPairs from "@/hooks/useCCPairs";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import { useVectorDbEnabled } from "@/providers/SettingsProvider";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { useToolOAuthStatus } from "@/lib/hooks/useToolOAuthStatus";
import LineItem from "@/refresh-components/buttons/LineItem";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import ActionLineItem from "@/refresh-components/popovers/ActionsPopover/ActionLineItem";
import MCPLineItem, {
  MCPServer,
} from "@/refresh-components/popovers/ActionsPopover/MCPLineItem";
import { useProjectsContext } from "@/providers/ProjectsContext";
import { SvgActions, SvgChevronRight, SvgKey, SvgSliders } from "@opal/icons";
import { Button } from "@opal/components";

function buildTooltipMessage(
  actionDescription: string,
  isConfigured: boolean,
  canManageAction: boolean
) {
  const _CONFIGURE_MESSAGE = "Press the settings cog to enable.";
  const _USER_NOT_ADMIN_MESSAGE = "Ask an admin to configure.";

  if (isConfigured) {
    return actionDescription;
  }

  if (canManageAction) {
    return actionDescription + " " + _CONFIGURE_MESSAGE;
  }

  return actionDescription + " " + _USER_NOT_ADMIN_MESSAGE;
}

const TOOL_DESCRIPTIONS: Record<string, string> = {
  [SEARCH_TOOL_ID]: "Search through connected knowledge to inform the answer.",
  [IMAGE_GENERATION_TOOL_ID]: "Generate images based on a prompt.",
  [WEB_SEARCH_TOOL_ID]: "Search the web for up-to-date information.",
  [PYTHON_TOOL_ID]: "Execute code for complex analysis.",
};

const DEFAULT_TOOL_DESCRIPTION = "This action is not configured yet.";

function getToolTooltip(
  tool: ToolSnapshot,
  isConfigured: boolean,
  canManageAction: boolean
): string {
  const description =
    (tool.in_code_tool_id && TOOL_DESCRIPTIONS[tool.in_code_tool_id]) ||
    tool.description ||
    DEFAULT_TOOL_DESCRIPTION;
  return buildTooltipMessage(description, isConfigured, canManageAction);
}

const ADMIN_CONFIG_LINKS: Record<string, { href: string; tooltip: string }> = {
  [IMAGE_GENERATION_TOOL_ID]: {
    href: "/admin/configuration/image-generation",
    tooltip: "Configure Image Generation",
  },
  [WEB_SEARCH_TOOL_ID]: {
    href: "/admin/configuration/web-search",
    tooltip: "Configure Web Search",
  },
  [PYTHON_TOOL_ID]: {
    href: "/admin/configuration/code-interpreter",
    tooltip: "Configure Code Interpreter",
  },
  KnowledgeGraphTool: {
    href: "/admin/kg",
    tooltip: "Configure Knowledge Graph",
  },
};

const OPENAPI_ADMIN_CONFIG = {
  href: "/admin/actions/open-api",
  tooltip: "Manage OpenAPI Actions",
};

const getAdminConfigureInfo = (
  tool: ToolSnapshot
): { href: string; tooltip: string } | null => {
  if (tool.in_code_tool_id && ADMIN_CONFIG_LINKS[tool.in_code_tool_id]) {
    return ADMIN_CONFIG_LINKS[tool.in_code_tool_id] ?? null;
  }

  if (!tool.in_code_tool_id && !tool.mcp_server_id) {
    return OPENAPI_ADMIN_CONFIG;
  }

  return null;
};

// Get source metadata for configured sources - deduplicated by source type
function getConfiguredSources(
  availableSources: ValidSources[]
): Array<SourceMetadata & { originalName: string; uniqueKey: string }> {
  const allSources = listSourceMetadata();

  const seenSources = new Set<string>();
  const configuredSources: Array<
    SourceMetadata & { originalName: string; uniqueKey: string }
  > = [];

  availableSources.forEach((sourceName) => {
    // Handle federated connectors by removing the federated_ prefix
    const cleanName = sourceName.replace("federated_", "");
    // Skip if we've already seen this source type
    if (seenSources.has(cleanName)) return;
    seenSources.add(cleanName);
    const source = allSources.find(
      (source) => source.internalName === cleanName
    );
    if (source) {
      configuredSources.push({
        ...source,
        originalName: sourceName,
        uniqueKey: cleanName,
      });
    }
  });
  return configuredSources;
}

type SecondaryViewState =
  | { type: "sources" }
  | { type: "mcp"; serverId: number };

export interface ActionsPopoverProps {
  selectedAgent: MinimalPersonaSnapshot;
  filterManager: FilterManager;
  availableSources?: ValidSources[];
  disabled?: boolean;
}

export default function ActionsPopover({
  selectedAgent,
  filterManager,
  availableSources = [],
  disabled = false,
}: ActionsPopoverProps) {
  const [open, setOpen] = useState(false);
  const [secondaryView, setSecondaryView] = useState<SecondaryViewState | null>(
    null
  );
  const [searchTerm, setSearchTerm] = useState("");
  // const [showFadeMask, setShowFadeMask] = useState(false);
  // const [showTopShadow, setShowTopShadow] = useState(false);
  const { selectedSources, setSelectedSources } = filterManager;
  const [mcpServers, setMcpServers] = useState<MCPServer[]>([]);
  const { llmProviders, isLoading: isLLMLoading } = useLLMProviders(
    selectedAgent.id
  );
  const hasAnyProvider = !isLLMLoading && (llmProviders?.length ?? 0) > 0;

  // Use the OAuth hook
  const { getToolAuthStatus, authenticateTool } = useToolOAuthStatus(
    selectedAgent.id
  );

  const {
    sourcesInitialized,
    enableSources,
    enableAllSources: baseEnableAllSources,
    disableAllSources: baseDisableAllSources,
    toggleSource: baseToggleSource,
    isSourceEnabled,
  } = useSourcePreferences({
    availableSources,
    selectedSources,
    setSelectedSources,
  });

  // Store previously enabled sources when search tool is disabled
  const previouslyEnabledSourcesRef = useRef<SourceMetadata[]>([]);

  const isDefaultAgent = selectedAgent.id === 0;

  // Check if the search tool is explicitly enabled on this persona (admin enabled "Use Knowledge")
  const hasSearchTool = selectedAgent.tools.some(
    (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID
  );

  // Get sources the agent has access to via document sets, hierarchy nodes, and attached documents
  // Default agent has access to all sources
  const agentAccessibleSources = useMemo(() => {
    if (isDefaultAgent) {
      return null; // null means "all accessible"
    }

    const sourceSet = new Set<string>();

    // Add sources from document sets
    selectedAgent.document_sets.forEach((docSet) => {
      // Check cc_pair_summaries (regular connectors)
      docSet.cc_pair_summaries?.forEach((ccPair) => {
        // Normalize by removing federated_ prefix
        const normalized = ccPair.source.replace("federated_", "");
        sourceSet.add(normalized);
      });

      // Check federated_connector_summaries (federated connectors)
      docSet.federated_connector_summaries?.forEach((fedConnector) => {
        // Normalize by removing federated_ prefix
        const normalized = fedConnector.source.replace("federated_", "");
        sourceSet.add(normalized);
      });
    });

    // Add sources from hierarchy nodes and attached documents (via knowledge_sources)
    selectedAgent.knowledge_sources?.forEach((source) => {
      // Normalize by removing federated_ prefix
      const normalized = source.replace("federated_", "");
      sourceSet.add(normalized);
    });

    // If agent has search tool but no specific sources, it can search everything
    if (sourceSet.size === 0 && hasSearchTool) {
      return null;
    }

    return sourceSet;
  }, [
    isDefaultAgent,
    selectedAgent.document_sets,
    selectedAgent.knowledge_sources,
    hasSearchTool,
  ]);

  // Check if non-default agent has no knowledge sources (Internal Search should be disabled)
  // Knowledge sources include document sets, hierarchy nodes, and attached documents
  // If the search tool is present, the admin intentionally enabled knowledge search
  const hasNoKnowledgeSources =
    !isDefaultAgent &&
    !hasSearchTool &&
    selectedAgent.document_sets.length === 0 &&
    (selectedAgent.hierarchy_node_count ?? 0) === 0 &&
    (selectedAgent.attached_document_count ?? 0) === 0;

  // Store MCP server auth/loading state (tools are part of selectedAgent.tools)
  const [mcpServerData, setMcpServerData] = useState<{
    [serverId: number]: {
      isAuthenticated: boolean;
      isLoading: boolean;
    };
  }>({});

  const [mcpApiKeyModal, setMcpApiKeyModal] = useState<{
    isOpen: boolean;
    serverId: number | null;
    serverName: string;
    authTemplate?: any;
    onSuccess?: () => void;
    isAuthenticated?: boolean;
    existingCredentials?: Record<string, string>;
  }>({
    isOpen: false,
    serverId: null,
    serverName: "",
    authTemplate: undefined,
    onSuccess: undefined,
    isAuthenticated: false,
  });

  // Get the agent preference for this assistant
  const { agentPreferences, setSpecificAgentPreferences } =
    useAgentPreferences();
  const { forcedToolIds, setForcedToolIds } = useForcedTools();

  // Reset state when assistant changes
  useEffect(() => {
    setForcedToolIds([]);
  }, [selectedAgent.id, setForcedToolIds]);

  const { isAdmin, isCurator } = useUser();
  const vectorDbEnabled = useVectorDbEnabled();

  const { tools: availableTools } = useAvailableTools();
  const { ccPairs } = useCCPairs(vectorDbEnabled);
  const { currentProjectId, allCurrentProjectFiles } = useProjectsContext();
  const availableToolIdSet = new Set(availableTools.map((tool) => tool.id));

  // Check if there are any connectors available
  const hasNoConnectors = ccPairs.length === 0;

  const agentPreference = agentPreferences?.[selectedAgent.id];
  const disabledToolIds = agentPreference?.disabled_tool_ids || [];
  const toggleToolForCurrentAgent = (toolId: number) => {
    const disabled = disabledToolIds.includes(toolId);
    setSpecificAgentPreferences(selectedAgent.id, {
      disabled_tool_ids: disabled
        ? disabledToolIds.filter((id) => id !== toolId)
        : [...disabledToolIds, toolId],
    });

    // If we're disabling a tool that is currently forced, remove it from forced tools
    if (!disabled && forcedToolIds.includes(toolId)) {
      setForcedToolIds(forcedToolIds.filter((id) => id !== toolId));
    }
  };

  const toggleForcedTool = (toolId: number) => {
    if (forcedToolIds.includes(toolId)) {
      // If clicking on already forced tool, unforce it
      setForcedToolIds([]);
    } else {
      // If clicking on a new tool, replace any existing forced tools with just this one
      setForcedToolIds([toolId]);
    }
  };

  // Get internal search tool reference for auto-pin logic
  const internalSearchTool = useMemo(
    () =>
      selectedAgent.tools.find(
        (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID && !tool.mcp_server_id
      ),
    [selectedAgent.tools]
  );

  // Handle explicit force toggle from ActionLineItem
  const handleForceToggleWithTracking = useCallback(
    (toolId: number, wasForced: boolean) => {
      // If pinning internal search, enable all accessible sources
      if (
        !wasForced &&
        internalSearchTool &&
        toolId === internalSearchTool.id
      ) {
        const sources = getConfiguredSources(availableSources);
        const accessibleSources = sources.filter(
          (s) =>
            agentAccessibleSources === null ||
            agentAccessibleSources.has(s.uniqueKey)
        );
        setSelectedSources(accessibleSources);
      }
      toggleForcedTool(toolId);
    },
    [
      toggleForcedTool,
      internalSearchTool,
      availableSources,
      agentAccessibleSources,
      setSelectedSources,
    ]
  );

  // Wrapped source functions that auto-pin internal search when sources change
  const enableAllSources = useCallback(() => {
    // Only enable sources the agent has access to
    const allConfiguredSources = getConfiguredSources(availableSources);
    const accessibleSources = allConfiguredSources.filter(
      (s) =>
        agentAccessibleSources === null ||
        agentAccessibleSources.has(s.uniqueKey)
    );
    setSelectedSources(accessibleSources);

    if (internalSearchTool) {
      setForcedToolIds([internalSearchTool.id]);
    }
  }, [
    agentAccessibleSources,
    availableSources,
    setSelectedSources,
    internalSearchTool,
    setForcedToolIds,
  ]);

  const disableAllSources = useCallback(() => {
    baseDisableAllSources();
    const willUnpin =
      internalSearchTool && forcedToolIds.includes(internalSearchTool.id);
    if (willUnpin) {
      setForcedToolIds([]);
    }
  }, [
    baseDisableAllSources,
    internalSearchTool,
    forcedToolIds,
    setForcedToolIds,
  ]);

  const toggleSource = useCallback(
    (sourceUniqueKey: string) => {
      const wasEnabled = isSourceEnabled(sourceUniqueKey);
      baseToggleSource(sourceUniqueKey);

      const configuredSources = getConfiguredSources(availableSources);

      if (internalSearchTool) {
        if (!wasEnabled) {
          // Enabling a source - auto-pin internal search
          setForcedToolIds([internalSearchTool.id]);
        } else {
          // Disabling a source - check if all sources will be disabled
          const remainingEnabled = configuredSources.filter(
            (s) =>
              s.uniqueKey !== sourceUniqueKey && isSourceEnabled(s.uniqueKey)
          );
          if (
            remainingEnabled.length === 0 &&
            forcedToolIds.includes(internalSearchTool.id)
          ) {
            // All sources disabled - unpin
            setForcedToolIds([]);
          }
        }
      }
    },
    [
      baseToggleSource,
      internalSearchTool,
      isSourceEnabled,
      availableSources,
      forcedToolIds,
      setForcedToolIds,
    ]
  );

  // Filter out MCP tools from the main list (they have mcp_server_id)
  // Also filter out internal search tool for basic users when there are no connectors
  // Also filter out tools that are not chat-selectable (e.g., OpenURL)
  const displayTools = selectedAgent.tools.filter((tool) => {
    // Filter out MCP tools
    if (tool.mcp_server_id) return false;

    // Filter out tools that are not chat-selectable (visibility set by backend)
    if (!tool.chat_selectable) return false;

    // Always hide File Reader from the actions popover
    if (tool.in_code_tool_id === FILE_READER_TOOL_ID) return false;

    // Special handling for Project Search
    // Ensure Project Search is hidden if no files exist
    if (tool.in_code_tool_id === SEARCH_TOOL_ID && !!currentProjectId) {
      if (!allCurrentProjectFiles || allCurrentProjectFiles.length === 0) {
        return false;
      }
      // If files exist, show it (even if backend thinks it's strictly unavailable due to no connectors)
      return true;
    }

    // Advertise to admin/curator users that they can connect an internal search tool
    // even if it's not available or has no connectors
    if (tool.in_code_tool_id === SEARCH_TOOL_ID && (isAdmin || isCurator)) {
      return true;
    }

    // Filter out internal search tool for non-admin/curator users when there are no connectors
    if (
      tool.in_code_tool_id === SEARCH_TOOL_ID &&
      hasNoConnectors &&
      !isAdmin &&
      !isCurator
    ) {
      return false;
    }

    return true;
  });

  const searchToolId =
    displayTools.find((tool) => tool.in_code_tool_id === SEARCH_TOOL_ID)?.id ??
    null;

  // Fetch MCP servers for the agent on mount
  useEffect(() => {
    if (selectedAgent == null || selectedAgent.id == null || !hasAnyProvider)
      return;

    const abortController = new AbortController();

    const fetchMCPServers = async () => {
      try {
        const response = await fetch(
          `/api/mcp/servers/persona/${selectedAgent.id}`,
          {
            signal: abortController.signal,
          }
        );
        if (response.ok) {
          const data = await response.json();
          const servers = data.mcp_servers || [];
          setMcpServers(servers);
          // Seed auth/loading state based on response
          setMcpServerData((prev) => {
            const next = { ...prev } as any;
            servers.forEach((s: any) => {
              next[s.id as number] = {
                isAuthenticated: !!s.user_authenticated || !!s.is_authenticated,
                isLoading: false,
              };
            });
            return next;
          });
        }
      } catch (error) {
        if (abortController.signal.aborted) {
          return;
        }
        console.error("Error fetching MCP servers:", error);
      }
    };

    fetchMCPServers();

    return () => {
      abortController.abort();
    };
  }, [selectedAgent?.id, hasAnyProvider]);

  // No separate MCP tool loading; tools already exist in selectedAgent.tools

  // Handle MCP authentication
  const handleMCPAuthenticate = async (
    serverId: number,
    authType: MCPAuthenticationType
  ) => {
    if (authType === MCPAuthenticationType.OAUTH) {
      const updateLoadingState = (loading: boolean) => {
        setMcpServerData((prev) => {
          const previous = prev[serverId] ?? {
            isAuthenticated: false,
            isLoading: false,
          };
          return {
            ...prev,
            [serverId]: {
              ...previous,
              isLoading: loading,
            },
          };
        });
      };

      updateLoadingState(true);
      try {
        const response = await fetch("/api/mcp/oauth/connect", {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify({
            server_id: serverId,
            return_path: window.location.pathname + window.location.search,
            include_resource_param: true,
          }),
        });

        if (response.ok) {
          const { oauth_url } = await response.json();
          window.location.href = oauth_url;
        } else {
          updateLoadingState(false);
        }
      } catch (error) {
        console.error("Error initiating OAuth:", error);
        updateLoadingState(false);
      }
    }
  };

  const handleMCPApiKeySubmit = async (serverId: number, apiKey: string) => {
    try {
      const response = await fetch("/api/mcp/user-credentials", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          server_id: serverId,
          credentials: { api_key: apiKey },
          transport: "streamable-http",
        }),
      });

      if (!response.ok) {
        const errorData = await response.json().catch(() => ({}));
        const errorMessage = errorData.detail || "Failed to save API key";
        throw new Error(errorMessage);
      }
    } catch (error) {
      console.error("Error saving API key:", error);
      throw error;
    }
  };

  const handleMCPCredentialsSubmit = async (
    serverId: number,
    credentials: Record<string, string>
  ) => {
    try {
      const response = await fetch("/api/mcp/user-credentials", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          server_id: serverId,
          credentials: credentials,
          transport: "streamable-http",
        }),
      });

      if (!response.ok) {
        const errorData = await response.json().catch(() => ({}));
        const errorMessage = errorData.detail || "Failed to save credentials";
        throw new Error(errorMessage);
      }
    } catch (error) {
      console.error("Error saving credentials:", error);
      throw error;
    }
  };

  const handleServerAuthentication = (server: MCPServer) => {
    const authType = server.auth_type;
    const performer = server.auth_performer;

    if (
      authType === MCPAuthenticationType.NONE ||
      performer === MCPAuthenticationPerformer.ADMIN
    ) {
      return;
    }

    if (authType === MCPAuthenticationType.OAUTH) {
      handleMCPAuthenticate(server.id, MCPAuthenticationType.OAUTH);
    } else if (authType === MCPAuthenticationType.API_TOKEN) {
      setMcpApiKeyModal({
        isOpen: true,
        serverId: server.id,
        serverName: server.name,
        authTemplate: server.auth_template,
        onSuccess: () => {
          // Update the authentication state after successful credential submission
          setMcpServerData((prev) => ({
            ...prev,
            [server.id]: {
              ...prev[server.id],
              isAuthenticated: true,
              isLoading: false,
            },
          }));
        },
        isAuthenticated: server.user_authenticated,
        existingCredentials: server.user_credentials,
      });
    }
  };

  // Filter tools based on search term
  const filteredTools = displayTools.filter((tool) => {
    if (!searchTerm) return true;
    const searchLower = searchTerm.toLowerCase();
    return (
      tool.display_name?.toLowerCase().includes(searchLower) ||
      tool.name.toLowerCase().includes(searchLower) ||
      tool.description?.toLowerCase().includes(searchLower)
    );
  });

  // Filter MCP servers based on search term
  const filteredMCPServers = mcpServers.filter((server) => {
    if (!searchTerm) return true;
    const searchLower = searchTerm.toLowerCase();
    return server.name.toLowerCase().includes(searchLower);
  });

  const selectedMcpServerId =
    secondaryView?.type === "mcp" ? secondaryView.serverId : null;
  const selectedMcpServer = selectedMcpServerId
    ? mcpServers.find((server) => server.id === selectedMcpServerId)
    : undefined;
  const selectedMcpTools =
    selectedMcpServerId !== null
      ? selectedAgent.tools.filter(
          (t) => t.mcp_server_id === Number(selectedMcpServerId)
        )
      : [];
  const selectedMcpServerData = selectedMcpServer
    ? mcpServerData[selectedMcpServer.id]
    : undefined;
  const isActiveServerAuthenticated =
    selectedMcpServerData?.isAuthenticated ??
    !!(
      selectedMcpServer?.user_authenticated ||
      selectedMcpServer?.is_authenticated
    );
  const showActiveReauthRow =
    !!selectedMcpServer &&
    selectedMcpTools.length > 0 &&
    selectedMcpServer.auth_performer === MCPAuthenticationPerformer.PER_USER &&
    selectedMcpServer.auth_type !== MCPAuthenticationType.NONE &&
    isActiveServerAuthenticated;

  const mcpToggleItems: SwitchListItem[] = selectedMcpTools.map((tool) => ({
    id: tool.id.toString(),
    label: tool.display_name || tool.name,
    description: tool.description,
    isEnabled: !disabledToolIds.includes(tool.id),
    onToggle: () => toggleToolForCurrentAgent(tool.id),
  }));

  const mcpAllDisabled = selectedMcpTools.every((tool) =>
    disabledToolIds.includes(tool.id)
  );

  const disableAllToolsForSelectedServer = () => {
    if (!selectedMcpServer) return;
    const serverToolIds = selectedMcpTools.map((tool) => tool.id);
    const merged = Array.from(new Set([...disabledToolIds, ...serverToolIds]));
    setSpecificAgentPreferences(selectedAgent.id, {
      disabled_tool_ids: merged,
    });
    setForcedToolIds(forcedToolIds.filter((id) => !serverToolIds.includes(id)));
  };

  const enableAllToolsForSelectedServer = () => {
    if (!selectedMcpServer) return;
    const serverToolIdSet = new Set(selectedMcpTools.map((tool) => tool.id));
    setSpecificAgentPreferences(selectedAgent.id, {
      disabled_tool_ids: disabledToolIds.filter(
        (id) => !serverToolIdSet.has(id)
      ),
    });
  };

  const handleFooterReauthClick = () => {
    if (selectedMcpServer) {
      handleServerAuthentication(selectedMcpServer);
    }
  };

  const handleOpenChange = (newOpen: boolean) => {
    setOpen(newOpen);
    if (newOpen) {
      setSecondaryView(null);
      setSearchTerm("");
    }
  };

  const mcpFooter = showActiveReauthRow ? (
    <LineItem
      onClick={handleFooterReauthClick}
      icon={selectedMcpServerData?.isLoading ? SimpleLoader : SvgKey}
      rightChildren={
        <Button icon={SvgChevronRight} prominence="tertiary" size="sm" />
      }
    >
      Re-Authenticate
    </LineItem>
  ) : undefined;

  const configuredSources = getConfiguredSources(availableSources);

  const numSourcesEnabled = configuredSources.filter((source) =>
    isSourceEnabled(source.uniqueKey)
  ).length;
  const searchToolDisabled =
    searchToolId !== null && disabledToolIds.includes(searchToolId);

  // Sync search tool state with sources on mount/when states change
  useEffect(() => {
    if (searchToolId === null || !sourcesInitialized) return;

    const hasEnabledSources = numSourcesEnabled > 0;
    if (hasEnabledSources && searchToolDisabled) {
      // Sources are enabled but search tool is disabled - enable it
      toggleToolForCurrentAgent(searchToolId);
    } else if (!hasEnabledSources && !searchToolDisabled) {
      // No sources enabled but search tool is enabled - disable it
      toggleToolForCurrentAgent(searchToolId);
    }
  }, [
    searchToolId,
    numSourcesEnabled,
    searchToolDisabled,
    sourcesInitialized,
    toggleToolForCurrentAgent,
  ]);

  // Set search tool to a specific enabled/disabled state (only toggles if needed)
  const setSearchToolEnabled = (enabled: boolean) => {
    if (searchToolId === null) return;

    if (enabled && searchToolDisabled) {
      toggleToolForCurrentAgent(searchToolId);
    } else if (!enabled && !searchToolDisabled) {
      toggleToolForCurrentAgent(searchToolId);
    }
  };

  const handleSourceToggle = (sourceUniqueKey: string) => {
    const willEnable = !isSourceEnabled(sourceUniqueKey);
    const newEnabledCount = numSourcesEnabled + (willEnable ? 1 : -1);

    toggleSource(sourceUniqueKey);
    setSearchToolEnabled(newEnabledCount > 0);
  };

  const handleDisableAllSources = () => {
    disableAllSources();
    setSearchToolEnabled(false);
  };

  const handleEnableAllSources = () => {
    enableAllSources();
    setSearchToolEnabled(true);
  };

  const handleToggleTool = (toolId: number) => {
    const wasDisabled = disabledToolIds.includes(toolId);
    toggleToolForCurrentAgent(toolId);

    if (toolId === searchToolId) {
      if (wasDisabled) {
        // Enabling - restore previous sources or enable all (persisted to localStorage)
        const previous = previouslyEnabledSourcesRef.current;
        if (previous.length > 0) {
          enableSources(previous);
        } else {
          baseEnableAllSources();
        }
        previouslyEnabledSourcesRef.current = [];
      } else {
        // Disabling - store current sources then disable all (persisted to localStorage)
        previouslyEnabledSourcesRef.current = [...selectedSources];
        baseDisableAllSources();
      }
    }
  };

  // Only show sources the agent has access to
  const accessibleConfiguredSources = configuredSources.filter(
    (source) =>
      agentAccessibleSources === null ||
      agentAccessibleSources.has(source.uniqueKey)
  );

  const sourceToggleItems: SwitchListItem[] = accessibleConfiguredSources.map(
    (source) => ({
      id: source.uniqueKey,
      label: source.displayName,
      leading: <SourceIcon sourceType={source.internalName} iconSize={16} />,
      isEnabled: isSourceEnabled(source.uniqueKey),
      onToggle: () => handleSourceToggle(source.uniqueKey),
    })
  );

  const allSourcesDisabled = configuredSources.every(
    (source) => !isSourceEnabled(source.uniqueKey)
  );

  // Count enabled sources for display (only accessible sources)
  const enabledSourceCount = accessibleConfiguredSources.filter((source) =>
    isSourceEnabled(source.uniqueKey)
  ).length;
  const totalSourceCount = accessibleConfiguredSources.length;

  const primaryView = (
    <PopoverMenu>
      {[
        <InputTypeIn
          key="search"
          placeholder="Search Actions"
          value={searchTerm}
          onChange={(event) => setSearchTerm(event.target.value)}
          autoFocus
          variant="internal"
        />,

        // Actions
        ...filteredTools.map((tool) =>
          (() => {
            const isToolAvailable = availableToolIdSet.has(tool.id);
            const isUnavailable =
              !isToolAvailable && tool.in_code_tool_id !== SEARCH_TOOL_ID;
            const canAdminConfigure = isAdmin || isCurator;
            const adminConfigureInfo =
              isUnavailable && canAdminConfigure
                ? getAdminConfigureInfo(tool)
                : null;
            return (
              <ActionLineItem
                key={tool.id}
                tool={tool}
                disabled={disabledToolIds.includes(tool.id)}
                isForced={forcedToolIds.includes(tool.id)}
                isUnavailable={isUnavailable}
                tooltip={getToolTooltip(
                  tool,
                  isToolAvailable,
                  canAdminConfigure
                )}
                showAdminConfigure={!!adminConfigureInfo}
                adminConfigureHref={adminConfigureInfo?.href}
                adminConfigureTooltip={adminConfigureInfo?.tooltip}
                onToggle={() => handleToggleTool(tool.id)}
                onForceToggle={() =>
                  handleForceToggleWithTracking(
                    tool.id,
                    forcedToolIds.includes(tool.id)
                  )
                }
                onSourceManagementOpen={() =>
                  setSecondaryView({ type: "sources" })
                }
                hasNoConnectors={hasNoConnectors}
                toolAuthStatus={getToolAuthStatus(tool)}
                onOAuthAuthenticate={() => authenticateTool(tool)}
                onClose={() => setOpen(false)}
                sourceCounts={{
                  enabled: enabledSourceCount,
                  total: totalSourceCount,
                }}
              />
            );
          })()
        ),

        // MCP Servers
        ...filteredMCPServers.map((server) => {
          const serverData = mcpServerData[server.id] || {
            isAuthenticated:
              !!server.user_authenticated || !!server.is_authenticated,
            isLoading: false,
          };

          // Tools for this server come from assistant.tools
          const serverTools = selectedAgent.tools.filter(
            (t) => t.mcp_server_id === Number(server.id)
          );
          const enabledTools = serverTools.filter(
            (t) => !disabledToolIds.includes(t.id)
          );

          return (
            <MCPLineItem
              key={server.id}
              server={server}
              isActive={selectedMcpServerId === server.id}
              tools={serverTools}
              enabledTools={enabledTools}
              isAuthenticated={serverData.isAuthenticated}
              isLoading={serverData.isLoading}
              onSelect={() =>
                setSecondaryView({
                  type: "mcp",
                  serverId: server.id,
                })
              }
              onAuthenticate={() => handleServerAuthentication(server)}
            />
          );
        }),

        null,

        (isAdmin || isCurator) && (
          <LineItem href="/admin/actions" icon={SvgActions} key="more-actions">
            More Actions
          </LineItem>
        ),
      ]}
    </PopoverMenu>
  );

  const toolsView = (
    <SwitchList
      items={sourceToggleItems}
      searchPlaceholder="Search Filters"
      allDisabled={allSourcesDisabled}
      onDisableAll={handleDisableAllSources}
      onEnableAll={handleEnableAllSources}
      disableAllLabel="Disable All Sources"
      enableAllLabel="Enable All Sources"
      onBack={() => setSecondaryView(null)}
    />
  );

  const mcpView = (
    <SwitchList
      items={mcpToggleItems}
      searchPlaceholder={`Search ${selectedMcpServer?.name ?? "server"} tools`}
      allDisabled={mcpAllDisabled}
      onDisableAll={disableAllToolsForSelectedServer}
      onEnableAll={enableAllToolsForSelectedServer}
      disableAllLabel="Disable All Tools"
      enableAllLabel="Enable All Tools"
      onBack={() => setSecondaryView(null)}
      footer={mcpFooter}
    />
  );

  // If no tools or MCP servers are available, don't render the component
  if (displayTools.length === 0 && mcpServers.length === 0) return null;

  return (
    <>
      <Popover open={open} onOpenChange={handleOpenChange}>
        <Popover.Trigger asChild>
          <div data-testid="action-management-toggle">
            <Button
              disabled={disabled}
              icon={SvgSliders}
              interaction={open ? "hover" : "rest"}
              prominence="tertiary"
              tooltip="Manage Actions"
            />
          </div>
        </Popover.Trigger>
        <Popover.Content side="bottom" align="start" width="lg">
          <div data-testid="tool-options">
            {secondaryView
              ? secondaryView.type === "mcp"
                ? mcpView
                : toolsView
              : primaryView}
          </div>
        </Popover.Content>
      </Popover>

      {/* MCP API Key Modal */}
      {mcpApiKeyModal.isOpen && (
        <MCPApiKeyModal
          isOpen={mcpApiKeyModal.isOpen}
          onClose={() =>
            setMcpApiKeyModal({
              isOpen: false,
              serverId: null,
              serverName: "",
              authTemplate: undefined,
              onSuccess: undefined,
              isAuthenticated: false,
              existingCredentials: undefined,
            })
          }
          serverName={mcpApiKeyModal.serverName}
          serverId={mcpApiKeyModal.serverId ?? 0}
          authTemplate={mcpApiKeyModal.authTemplate}
          onSubmit={handleMCPApiKeySubmit}
          onSubmitCredentials={handleMCPCredentialsSubmit}
          onSuccess={mcpApiKeyModal.onSuccess}
          isAuthenticated={mcpApiKeyModal.isAuthenticated}
          existingCredentials={mcpApiKeyModal.existingCredentials}
        />
      )}
    </>
  );
}


================================================
FILE: web/src/refresh-components/popovers/FilePickerPopover.tsx
================================================
"use client";

import React, { useEffect, useMemo, useRef, useState } from "react";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import { cn, noProp } from "@/lib/utils";
import UserFilesModal from "@/components/modals/UserFilesModal";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import {
  ProjectFile,
  UserFileStatus,
} from "@/app/app/projects/projectsService";
import LineItem from "@/refresh-components/buttons/LineItem";
import IconButton from "@/refresh-components/buttons/IconButton";
import { toast } from "@/hooks/useToast";
import { useProjectsContext } from "@/providers/ProjectsContext";
import Text from "@/refresh-components/texts/Text";
import { MAX_FILES_TO_SHOW } from "@/lib/constants";
import { isImageFile } from "@/lib/utils";
import {
  SvgExternalLink,
  SvgFileText,
  SvgImage,
  SvgLoader,
  SvgMoreHorizontal,
  SvgPaperclip,
} from "@opal/icons";
const getFileExtension = (fileName: string): string => {
  const idx = fileName.lastIndexOf(".");
  if (idx === -1) return "";
  const ext = fileName.slice(idx + 1).toLowerCase();
  if (ext === "txt") return "PLAINTEXT";
  return ext.toUpperCase();
};

interface FileLineItemProps {
  projectFile: ProjectFile;
  onPickRecent: (file: ProjectFile) => void;
  onFileClick: (file: ProjectFile) => void;
}

function FileLineItem({
  projectFile,
  onPickRecent,
  onFileClick,
}: FileLineItemProps) {
  const showLoader = useMemo(
    () =>
      String(projectFile.status) === UserFileStatus.PROCESSING ||
      String(projectFile.status) === UserFileStatus.UPLOADING ||
      String(projectFile.status) === UserFileStatus.DELETING,
    [projectFile.status]
  );

  const disableActionButton = useMemo(
    () =>
      String(projectFile.status) === UserFileStatus.UPLOADING ||
      String(projectFile.status) === UserFileStatus.DELETING,
    [projectFile.status]
  );

  return (
    <LineItem
      key={projectFile.id}
      onClick={noProp(() => onPickRecent(projectFile))}
      icon={
        showLoader
          ? ({ className }) => (
              <SvgLoader className={cn(className, "animate-spin")} />
            )
          : isImageFile(projectFile.name)
            ? SvgImage
            : SvgFileText
      }
      rightChildren={
        <div className="h-[1rem] flex flex-col justify-center">
          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
          <IconButton
            icon={SvgExternalLink}
            onClick={noProp(() => onFileClick(projectFile))}
            tooltip="View File"
            disabled={disableActionButton}
            internal
            className="hidden group-hover/LineItem:flex"
          />
          <Text
            as="p"
            className="flex group-hover/LineItem:hidden"
            secondaryBody
            text03
          >
            {getFileExtension(projectFile.name)}
          </Text>
        </div>
      }
    >
      {projectFile.name}
    </LineItem>
  );
}

interface FilePickerPopoverContentsProps {
  recentFiles: ProjectFile[];
  onPickRecent: (file: ProjectFile) => void;
  onFileClick: (file: ProjectFile) => void;
  triggerUploadPicker: () => void;
  openRecentFilesModal: () => void;
}

function FilePickerPopoverContents({
  recentFiles,
  onPickRecent,
  onFileClick,
  triggerUploadPicker,
  openRecentFilesModal,
}: FilePickerPopoverContentsProps) {
  // These are the "quick" files that we show. Essentially "speed dial", but for files.
  // The rest of the files will be hidden behind the "All Recent Files" button, should there be more files left to show!
  const hasFiles = recentFiles.length > 0;
  const shouldShowMoreFilesButton = recentFiles.length > MAX_FILES_TO_SHOW;
  const quickAccessFiles = recentFiles.slice(0, MAX_FILES_TO_SHOW);

  return (
    <PopoverMenu>
      {[
        // Action button to upload more files
        <LineItem
          key="upload-files"
          icon={SvgPaperclip}
          description="Upload a file from your device"
          onClick={triggerUploadPicker}
        >
          Upload Files
        </LineItem>,

        // Separator
        null,

        // Title
        hasFiles && (
          <div key="recent-files" className="pt-1">
            <Text as="p" text02 secondaryBody className="py-1 px-3">
              Recent Files
            </Text>
          </div>
        ),

        // Quick access files
        ...quickAccessFiles.map((projectFile) => (
          <FileLineItem
            key={projectFile.id}
            projectFile={projectFile}
            onPickRecent={onPickRecent}
            onFileClick={onFileClick}
          />
        )),

        // Rest of the files
        shouldShowMoreFilesButton && (
          <LineItem icon={SvgMoreHorizontal} onClick={openRecentFilesModal}>
            All Recent Files
          </LineItem>
        ),
      ]}
    </PopoverMenu>
  );
}

export interface FilePickerPopoverProps {
  onPickRecent?: (file: ProjectFile) => void;
  onUnpickRecent?: (file: ProjectFile) => void;
  onFileClick?: (file: ProjectFile) => void;
  handleUploadChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
  trigger?: React.ReactNode | ((open: boolean) => React.ReactNode);
  selectedFileIds?: string[];
}

export default function FilePickerPopover({
  onPickRecent,
  onUnpickRecent,
  onFileClick,
  handleUploadChange,
  trigger,
  selectedFileIds,
}: FilePickerPopoverProps) {
  const { allRecentFiles } = useProjectsContext();
  const fileInputRef = useRef<HTMLInputElement | null>(null);
  const recentFilesModal = useCreateModal();
  const [open, setOpen] = useState(false);
  // Snapshot of recent files to avoid re-arranging when the modal is open
  const [recentFilesSnapshot, setRecentFilesSnapshot] = useState<ProjectFile[]>(
    []
  );
  const { deleteUserFile, setCurrentMessageFiles } = useProjectsContext();
  const [deletedFileIds, setDeletedFileIds] = useState<string[]>([]);

  const triggerUploadPicker = () => fileInputRef.current?.click();

  useEffect(() => {
    setRecentFilesSnapshot(
      allRecentFiles.slice().filter((f) => !deletedFileIds.includes(f.id))
    );
  }, [allRecentFiles]);

  const handleDeleteFile = (file: ProjectFile) => {
    const lastStatus = file.status;
    setRecentFilesSnapshot((prev) =>
      prev.map((f) =>
        f.id === file.id ? { ...f, status: UserFileStatus.DELETING } : f
      )
    );
    deleteUserFile(file.id)
      .then((result) => {
        if (!result.has_associations) {
          toast.success("File deleted successfully");
          setCurrentMessageFiles((prev) =>
            prev.filter((f) => f.id !== file.id)
          );
          setDeletedFileIds((prev) => [...prev, file.id]);
          setRecentFilesSnapshot((prev) => prev.filter((f) => f.id != file.id));
        } else {
          setRecentFilesSnapshot((prev) =>
            prev.map((f) =>
              f.id === file.id ? { ...f, status: lastStatus } : f
            )
          );
          let projects = result.project_names.join(", ");
          let assistants = result.assistant_names.join(", ");
          let message = "Cannot delete file. It is associated with";
          if (projects) {
            message += ` projects: ${projects}`;
          }
          if (projects && assistants) {
            message += " and ";
          }
          if (assistants) {
            message += `assistants: ${assistants}`;
          }

          toast.error(message);
        }
      })
      .catch((error) => {
        // Revert status and show error if the delete request fails
        setRecentFilesSnapshot((prev) =>
          prev.map((f) => (f.id === file.id ? { ...f, status: lastStatus } : f))
        );
        toast.error("Failed to delete file. Please try again.");
        // Useful for debugging; safe in client components
        console.error("Failed to delete file", error);
      });
  };

  return (
    <>
      <input
        ref={fileInputRef}
        type="file"
        className="hidden"
        multiple
        onChange={handleUploadChange}
        accept={"*/*"}
      />

      <recentFilesModal.Provider>
        <UserFilesModal
          title="Recent Files"
          description="Upload files or pick from your recent files."
          recentFiles={recentFilesSnapshot}
          onPickRecent={(file) => {
            onPickRecent && onPickRecent(file);
          }}
          onUnpickRecent={(file) => {
            onUnpickRecent && onUnpickRecent(file);
          }}
          handleUploadChange={handleUploadChange}
          onView={onFileClick}
          selectedFileIds={selectedFileIds}
          onDelete={handleDeleteFile}
        />
      </recentFilesModal.Provider>

      <Popover open={open} onOpenChange={setOpen}>
        <Popover.Trigger asChild>
          {typeof trigger === "function" ? trigger(open) : trigger}
        </Popover.Trigger>
        <Popover.Content align="start" side="bottom" width="lg">
          <FilePickerPopoverContents
            recentFiles={recentFilesSnapshot}
            onPickRecent={(file) => {
              onPickRecent && onPickRecent(file);
              setOpen(false);
            }}
            onFileClick={(file) => {
              onFileClick && onFileClick(file);
              setOpen(false);
            }}
            triggerUploadPicker={() => {
              triggerUploadPicker();
              setOpen(false);
            }}
            openRecentFilesModal={() => {
              recentFilesModal.toggle(true);
              // Close the small popover when opening the dialog
              setOpen(false);
            }}
          />
        </Popover.Content>
      </Popover>
    </>
  );
}


================================================
FILE: web/src/refresh-components/popovers/LLMPopover.test.tsx
================================================
import { buildLlmOptions, groupLlmOptions } from "./LLMPopover";
import { LLMOption } from "./interfaces";
import { LLMProviderDescriptor } from "@/interfaces/llm";
import { makeProvider } from "@tests/setup/llmProviderTestUtils";

describe("LLMPopover helpers", () => {
  test("deduplicates identical provider+model combinations across provider entries", () => {
    const providers: LLMProviderDescriptor[] = [
      makeProvider({
        name: "OpenAI A",
        provider: "openai",
        model_configurations: [
          {
            name: "shared-model",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
      makeProvider({
        name: "OpenAI B",
        provider: "openai",
        model_configurations: [
          {
            name: "shared-model",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
      makeProvider({
        name: "Anthropic A",
        provider: "anthropic",
        model_configurations: [
          {
            name: "shared-model",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
    ];

    const options = buildLlmOptions(providers);
    const sharedModelOptions = options.filter(
      (o) => o.modelName === "shared-model"
    );

    expect(sharedModelOptions).toHaveLength(2);
    expect(sharedModelOptions.map((o) => o.provider).sort()).toEqual([
      "anthropic",
      "openai",
    ]);
  });

  test("includes currently selected hidden model in options", () => {
    const providers: LLMProviderDescriptor[] = [
      makeProvider({
        name: "OpenAI A",
        provider: "openai",
        model_configurations: [
          {
            name: "hidden-selected-model",
            is_visible: false,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
    ];

    const options = buildLlmOptions(providers, "hidden-selected-model");
    expect(options.map((o) => o.modelName)).toContain("hidden-selected-model");
  });

  test("groups aggregator options by provider/vendor and sorts by display name", () => {
    const options: LLMOption[] = [
      {
        name: "Bedrock Provider",
        provider: "bedrock",
        providerDisplayName: "Amazon Bedrock",
        modelName: "claude-3-5-sonnet",
        displayName: "Claude 3.5 Sonnet",
        vendor: "anthropic",
      },
      {
        name: "OpenAI Provider",
        provider: "openai",
        providerDisplayName: "ChatGPT (OpenAI)",
        modelName: "gpt-4o-mini",
        displayName: "GPT-4o Mini",
        vendor: null,
      },
    ];

    const grouped = groupLlmOptions(options);

    expect(grouped.map((group) => group.key)).toEqual([
      "bedrock/anthropic",
      "openai",
    ]);
    expect(grouped[0]?.displayName).toBe("Amazon Bedrock/Anthropic");
    expect(grouped[1]?.displayName).toBe("ChatGPT (OpenAI)");
    expect(grouped[0]?.options).toHaveLength(1);
    expect(grouped[1]?.options).toHaveLength(1);
  });
});


================================================
FILE: web/src/refresh-components/popovers/LLMPopover.tsx
================================================
"use client";

import { useState, useEffect, useCallback, useMemo, useRef } from "react";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import { LlmDescriptor, LlmManager } from "@/lib/hooks";
import { structureValue } from "@/lib/llmConfig/utils";
import {
  getProviderIcon,
  AGGREGATOR_PROVIDERS,
} from "@/app/admin/configuration/llm/utils";
import { LLMProviderDescriptor } from "@/interfaces/llm";
import { Slider } from "@/components/ui/slider";
import { useUser } from "@/providers/UserProvider";
import LineItem from "@/refresh-components/buttons/LineItem";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Text from "@/refresh-components/texts/Text";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import {
  Accordion,
  AccordionContent,
  AccordionItem,
  AccordionTrigger,
} from "@/components/ui/accordion";
import {
  SvgCheck,
  SvgChevronDown,
  SvgChevronRight,
  SvgRefreshCw,
} from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import { OpenButton } from "@opal/components";
import { LLMOption, LLMOptionGroup } from "./interfaces";

export interface LLMPopoverProps {
  llmManager: LlmManager;
  requiresImageInput?: boolean;
  foldable?: boolean;
  onSelect?: (value: string) => void;
  currentModelName?: string;
  disabled?: boolean;
}

export function buildLlmOptions(
  llmProviders: LLMProviderDescriptor[] | undefined,
  currentModelName?: string
): LLMOption[] {
  if (!llmProviders) {
    return [];
  }

  // Track seen combinations of provider + exact model name to avoid true duplicates
  // (same model appearing from multiple LLM provider configs with same provider type)
  const seenKeys = new Set<string>();
  const options: LLMOption[] = [];

  llmProviders.forEach((llmProvider) => {
    llmProvider.model_configurations
      .filter(
        (modelConfiguration) =>
          modelConfiguration.is_visible ||
          modelConfiguration.name === currentModelName
      )
      .forEach((modelConfiguration) => {
        // Deduplicate by exact provider + model name combination
        const key = `${llmProvider.provider}:${modelConfiguration.name}`;
        if (seenKeys.has(key)) {
          return;
        }
        seenKeys.add(key);

        options.push({
          name: llmProvider.name,
          provider: llmProvider.provider,
          providerDisplayName:
            llmProvider.provider_display_name || llmProvider.provider,
          modelName: modelConfiguration.name,
          displayName:
            modelConfiguration.display_name || modelConfiguration.name,
          vendor: modelConfiguration.vendor || null,
          maxInputTokens: modelConfiguration.max_input_tokens,
          region: modelConfiguration.region || null,
          version: modelConfiguration.version || null,
          supportsReasoning: modelConfiguration.supports_reasoning || false,
          supportsImageInput: modelConfiguration.supports_image_input || false,
        });
      });
  });

  return options;
}

export function groupLlmOptions(
  filteredOptions: LLMOption[]
): LLMOptionGroup[] {
  const groups = new Map<string, Omit<LLMOptionGroup, "key">>();

  filteredOptions.forEach((option) => {
    const provider = option.provider.toLowerCase();
    const isAggregator = AGGREGATOR_PROVIDERS.has(provider);
    const groupKey =
      isAggregator && option.vendor
        ? `${provider}/${option.vendor.toLowerCase()}`
        : provider;

    if (!groups.has(groupKey)) {
      let displayName: string;

      if (isAggregator && option.vendor) {
        const vendorDisplayName =
          option.vendor.charAt(0).toUpperCase() + option.vendor.slice(1);
        displayName = `${option.providerDisplayName}/${vendorDisplayName}`;
      } else {
        displayName = option.providerDisplayName;
      }

      groups.set(groupKey, {
        displayName,
        options: [],
        Icon: getProviderIcon(provider),
      });
    }

    groups.get(groupKey)!.options.push(option);
  });

  const sortedKeys = Array.from(groups.keys()).sort((a, b) =>
    groups.get(a)!.displayName.localeCompare(groups.get(b)!.displayName)
  );

  return sortedKeys.map((key) => {
    const group = groups.get(key)!;
    return {
      key,
      displayName: group.displayName,
      options: group.options,
      Icon: group.Icon,
    };
  });
}

export default function LLMPopover({
  llmManager,
  requiresImageInput,
  foldable,
  onSelect,
  currentModelName,
  disabled = false,
}: LLMPopoverProps) {
  const llmProviders = llmManager.llmProviders;
  const isLoadingProviders = llmManager.isLoadingProviders;

  const [open, setOpen] = useState(false);
  const [searchQuery, setSearchQuery] = useState("");
  const { user } = useUser();

  const [localTemperature, setLocalTemperature] = useState(
    llmManager.temperature ?? 0.5
  );

  useEffect(() => {
    setLocalTemperature(llmManager.temperature ?? 0.5);
  }, [llmManager.temperature]);

  const searchInputRef = useRef<HTMLInputElement>(null);
  const scrollContainerRef = useRef<HTMLDivElement>(null);
  const selectedItemRef = useRef<HTMLDivElement>(null);

  const handleGlobalTemperatureChange = useCallback((value: number[]) => {
    const value_0 = value[0];
    if (value_0 !== undefined) {
      setLocalTemperature(value_0);
    }
  }, []);

  const handleGlobalTemperatureCommit = useCallback(
    (value: number[]) => {
      const value_0 = value[0];
      if (value_0 !== undefined) {
        llmManager.updateTemperature(value_0);
      }
    },
    [llmManager]
  );

  const llmOptions = useMemo(
    () => buildLlmOptions(llmProviders, currentModelName),
    [llmProviders, currentModelName]
  );

  // Filter options by vision capability (when images are uploaded) and search query
  const filteredOptions = useMemo(() => {
    let result = llmOptions;
    if (requiresImageInput) {
      result = result.filter((opt) => opt.supportsImageInput);
    }
    if (searchQuery.trim()) {
      const query = searchQuery.toLowerCase();
      result = result.filter(
        (opt) =>
          opt.displayName.toLowerCase().includes(query) ||
          opt.modelName.toLowerCase().includes(query) ||
          (opt.vendor && opt.vendor.toLowerCase().includes(query))
      );
    }
    return result;
  }, [llmOptions, searchQuery, requiresImageInput]);

  // Group options by provider using backend-provided display names and ordering
  // For aggregator providers (bedrock, openrouter, vertex_ai), flatten to "Provider/Vendor" format
  const groupedOptions = useMemo(
    () => groupLlmOptions(filteredOptions),
    [filteredOptions]
  );

  // Get display name for the model to show in the button
  // Use currentModelName prop if provided (e.g., for regenerate showing the model used),
  // otherwise fall back to the globally selected model
  const currentLlmDisplayName = useMemo(() => {
    // Only use currentModelName if it's a non-empty string
    const currentModel =
      currentModelName && currentModelName.trim()
        ? currentModelName
        : llmManager.currentLlm.modelName;
    if (!llmProviders) return currentModel;

    for (const provider of llmProviders) {
      const config = provider.model_configurations.find(
        (m) => m.name === currentModel
      );
      if (config) {
        return config.display_name || config.name;
      }
    }
    return currentModel;
  }, [llmProviders, currentModelName, llmManager.currentLlm.modelName]);

  // Determine which group the current model belongs to (for auto-expand)
  const currentGroupKey = useMemo(() => {
    const currentModel = llmManager.currentLlm.modelName;
    const currentProvider = llmManager.currentLlm.provider;
    // Match by both modelName AND provider to handle same model name across providers
    const option = llmOptions.find(
      (o) => o.modelName === currentModel && o.provider === currentProvider
    );
    if (!option) return "openai";

    const provider = option.provider.toLowerCase();
    const isAggregator = AGGREGATOR_PROVIDERS.has(provider);

    if (isAggregator && option.vendor) {
      return `${provider}/${option.vendor.toLowerCase()}`;
    }
    return provider;
  }, [
    llmOptions,
    llmManager.currentLlm.modelName,
    llmManager.currentLlm.provider,
  ]);

  // Track expanded groups - initialize with current model's group
  const [expandedGroups, setExpandedGroups] = useState<string[]>([
    currentGroupKey,
  ]);

  // Reset state when popover closes/opens
  useEffect(() => {
    if (!open) {
      setSearchQuery("");
    } else {
      // Reset expanded groups to only show the selected model's group
      setExpandedGroups([currentGroupKey]);
    }
  }, [open, currentGroupKey]);

  // Auto-scroll to selected model when popover opens
  useEffect(() => {
    if (open) {
      // Small delay to let accordion content render
      const timer = setTimeout(() => {
        selectedItemRef.current?.scrollIntoView({
          behavior: "instant",
          block: "center",
        });
      }, 50);
      return () => clearTimeout(timer);
    }
  }, [open]);

  const isSearching = searchQuery.trim().length > 0;

  // Compute final expanded groups
  const effectiveExpandedGroups = useMemo(() => {
    if (isSearching) {
      // Force expand all when searching
      return groupedOptions.map((g) => g.key);
    }
    return expandedGroups;
  }, [isSearching, groupedOptions, expandedGroups]);

  // Handler for accordion changes
  const handleAccordionChange = (value: string[]) => {
    // Only update state when not searching (force-expanding)
    if (!isSearching) {
      setExpandedGroups(value);
    }
  };

  const handleSelectModel = (option: LLMOption) => {
    llmManager.updateCurrentLlm({
      modelName: option.modelName,
      provider: option.provider,
      name: option.name,
    } as LlmDescriptor);
    onSelect?.(structureValue(option.name, option.provider, option.modelName));
    setOpen(false);
  };

  const renderModelItem = (option: LLMOption) => {
    const isSelected =
      option.modelName === llmManager.currentLlm.modelName &&
      option.provider === llmManager.currentLlm.provider;

    const capabilities: string[] = [];
    if (option.supportsReasoning) {
      capabilities.push("Reasoning");
    }
    if (option.supportsImageInput) {
      capabilities.push("Vision");
    }
    const description =
      capabilities.length > 0 ? capabilities.join(", ") : undefined;

    return (
      <div
        key={`${option.name}-${option.modelName}`}
        ref={isSelected ? selectedItemRef : undefined}
      >
        <LineItem
          selected={isSelected}
          description={description}
          onClick={() => handleSelectModel(option)}
          rightChildren={
            isSelected ? (
              <SvgCheck className="h-4 w-4 stroke-action-link-05 shrink-0" />
            ) : null
          }
        >
          {option.displayName}
        </LineItem>
      </div>
    );
  };

  return (
    <Popover open={open} onOpenChange={setOpen}>
      <div data-testid="llm-popover-trigger">
        <Popover.Trigger asChild disabled={disabled}>
          <OpenButton
            disabled={disabled}
            icon={
              foldable
                ? SvgRefreshCw
                : getProviderIcon(
                    llmManager.currentLlm.provider,
                    llmManager.currentLlm.modelName
                  )
            }
            foldable={foldable}
          >
            {currentLlmDisplayName}
          </OpenButton>
        </Popover.Trigger>
      </div>

      <Popover.Content side="top" align="end" width="xl">
        <Section gap={0.5}>
          {/* Search Input */}
          <InputTypeIn
            ref={searchInputRef}
            leftSearchIcon
            variant="internal"
            value={searchQuery}
            onChange={(e) => setSearchQuery(e.target.value)}
            placeholder="Search models..."
          />

          {/* Model List with Vendor Groups */}
          <PopoverMenu scrollContainerRef={scrollContainerRef}>
            {isLoadingProviders
              ? [
                  <div key="loading" className="flex items-center gap-2 py-3">
                    <SimpleLoader />
                    <Text secondaryBody text03>
                      Loading models...
                    </Text>
                  </div>,
                ]
              : groupedOptions.length === 0
                ? [
                    <div key="empty" className="py-3">
                      <Text secondaryBody text03>
                        No models found
                      </Text>
                    </div>,
                  ]
                : groupedOptions.length === 1
                  ? // Single provider - show models directly without accordion
                    [
                      <div
                        key="single-provider"
                        className="flex flex-col gap-1"
                      >
                        {groupedOptions[0]!.options.map(renderModelItem)}
                      </div>,
                    ]
                  : // Multiple providers - show accordion with groups
                    [
                      <Accordion
                        key="accordion"
                        type="multiple"
                        value={effectiveExpandedGroups}
                        onValueChange={handleAccordionChange}
                        className="w-full flex flex-col"
                      >
                        {groupedOptions.map((group) => {
                          const isExpanded = effectiveExpandedGroups.includes(
                            group.key
                          );
                          return (
                            <AccordionItem
                              key={group.key}
                              value={group.key}
                              className="border-none pt-1"
                            >
                              {/* Group Header */}
                              <AccordionTrigger className="flex items-center rounded-08 hover:no-underline hover:bg-background-tint-02 group [&>svg]:hidden w-full py-1">
                                <div className="flex items-center gap-1 shrink-0">
                                  <div className="flex items-center justify-center size-5 shrink-0">
                                    <group.Icon size={16} />
                                  </div>
                                  <Text
                                    secondaryBody
                                    text03
                                    nowrap
                                    className="px-0.5"
                                  >
                                    {group.displayName}
                                  </Text>
                                </div>
                                <div className="flex-1" />
                                <div className="flex items-center justify-center size-6 shrink-0">
                                  {isExpanded ? (
                                    <SvgChevronDown className="h-4 w-4 stroke-text-04 shrink-0" />
                                  ) : (
                                    <SvgChevronRight className="h-4 w-4 stroke-text-04 shrink-0" />
                                  )}
                                </div>
                              </AccordionTrigger>

                              {/* Model Items - full width highlight */}
                              <AccordionContent className="pb-0 pt-0">
                                <div className="flex flex-col gap-1">
                                  {group.options.map(renderModelItem)}
                                </div>
                              </AccordionContent>
                            </AccordionItem>
                          );
                        })}
                      </Accordion>,
                    ]}
          </PopoverMenu>

          {/* Global Temperature Slider (shown if enabled in user prefs) */}
          {user?.preferences?.temperature_override_enabled && (
            <>
              <div className="border-t border-border-02 mx-2" />
              <div className="flex flex-col w-full py-2 gap-2">
                <Slider
                  value={[localTemperature]}
                  max={llmManager.maxTemperature}
                  min={0}
                  step={0.01}
                  onValueChange={handleGlobalTemperatureChange}
                  onValueCommit={handleGlobalTemperatureCommit}
                  className="w-full"
                />
                <div className="flex flex-row items-center justify-between">
                  <Text secondaryBody text03>
                    Temperature (creativity)
                  </Text>
                  <Text secondaryBody text03>
                    {localTemperature.toFixed(1)}
                  </Text>
                </div>
              </div>
            </>
          )}
        </Section>
      </Popover.Content>
    </Popover>
  );
}


================================================
FILE: web/src/refresh-components/popovers/interfaces.ts
================================================
import { IconProps } from "@/components/icons/icons";
import { FunctionComponent } from "react";

export interface LLMOption {
  name: string;
  provider: string;
  providerDisplayName: string;
  modelName: string;
  displayName: string;
  description?: string;
  vendor: string | null;
  maxInputTokens?: number | null;
  region?: string | null;
  version?: string | null;
  supportsReasoning?: boolean;
  supportsImageInput?: boolean;
}

export interface LLMOptionGroup {
  key: string;
  displayName: string;
  options: LLMOption[];
  Icon: FunctionComponent<IconProps>;
}


================================================
FILE: web/src/refresh-components/skeletons/ChatSessionSkeleton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import ChatSessionSkeleton from "./ChatSessionSkeleton";

const meta: Meta<typeof ChatSessionSkeleton> = {
  title: "refresh-components/Skeletons/ChatSessionSkeleton",
  component: ChatSessionSkeleton,
  tags: ["autodocs"],
  parameters: {
    layout: "padded",
  },
};

export default meta;
type Story = StoryObj<typeof ChatSessionSkeleton>;

export const Default: Story = {};

export const Multiple: Story = {
  render: () => (
    <div className="flex flex-col gap-1" style={{ width: 300 }}>
      <ChatSessionSkeleton />
      <ChatSessionSkeleton />
      <ChatSessionSkeleton />
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/skeletons/ChatSessionSkeleton.tsx
================================================
export default function ChatSessionSkeleton() {
  return (
    <div className="w-full rounded-08 py-2 p-1.5">
      <div className="flex gap-3 min-w-0 w-full">
        <div className="flex h-full w-fit pt-1 pl-1">
          <div className="h-4 w-4 rounded-full bg-background-tint-02 animate-pulse" />
        </div>
        <div className="flex flex-col w-full gap-1">
          <div className="h-5 w-2/3 rounded bg-background-tint-02 animate-pulse" />
          <div className="h-4 w-1/2 rounded bg-background-tint-02 animate-pulse" />
        </div>
      </div>
    </div>
  );
}


================================================
FILE: web/src/refresh-components/skeletons/SidebarTabSkeleton.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import SidebarTabSkeleton from "./SidebarTabSkeleton";

const meta: Meta<typeof SidebarTabSkeleton> = {
  title: "refresh-components/Skeletons/SidebarTabSkeleton",
  component: SidebarTabSkeleton,
  tags: ["autodocs"],
  parameters: {
    layout: "padded",
  },
};

export default meta;
type Story = StoryObj<typeof SidebarTabSkeleton>;

export const Default: Story = {};

export const NarrowText: Story = {
  args: {
    textWidth: "w-1/3",
  },
};

export const WideText: Story = {
  args: {
    textWidth: "w-full",
  },
};

export const Multiple: Story = {
  render: () => (
    <div className="flex flex-col gap-1" style={{ width: 260 }}>
      <SidebarTabSkeleton textWidth="w-3/4" />
      <SidebarTabSkeleton textWidth="w-1/2" />
      <SidebarTabSkeleton textWidth="w-2/3" />
      <SidebarTabSkeleton textWidth="w-1/3" />
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/skeletons/SidebarTabSkeleton.tsx
================================================
import { cn } from "@/lib/utils";

interface SidebarTabSkeletonProps {
  textWidth?: string;
}

export default function SidebarTabSkeleton({
  textWidth = "w-2/3",
}: SidebarTabSkeletonProps) {
  return (
    <div className="w-full rounded-08 p-1.5">
      <div className="h-[1.5rem] flex flex-row items-center px-1 py-0.5">
        <div
          className={cn(
            "h-3 rounded bg-background-tint-04 animate-pulse",
            textWidth
          )}
        />
      </div>
    </div>
  );
}


================================================
FILE: web/src/refresh-components/texts/ExpandableTextDisplay.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import ExpandableTextDisplay from "./ExpandableTextDisplay";

const meta: Meta<typeof ExpandableTextDisplay> = {
  title: "refresh-components/texts/ExpandableTextDisplay",
  component: ExpandableTextDisplay,
  tags: ["autodocs"],
  parameters: {
    layout: "padded",
  },
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <Story />
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof ExpandableTextDisplay>;

const shortContent =
  "This is a short piece of content that fits within the default line clamp.";

const longContent = Array.from(
  { length: 30 },
  (_, i) =>
    `Line ${i + 1}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.`
).join("\n");

export const ShortContent: Story = {
  args: {
    title: "Short Content",
    content: shortContent,
  },
};

export const LongContent: Story = {
  args: {
    title: "Log Output",
    content: longContent,
  },
};

export const CustomMaxLines: Story = {
  args: {
    title: "Compact View",
    content: longContent,
    maxLines: 3,
  },
};

export const WithSubtitle: Story = {
  args: {
    title: "Build Log",
    content: longContent,
    subtitle: "2.4 KB - 30 lines",
  },
};

export const StreamingMode: Story = {
  args: {
    title: "Live Output",
    content: longContent,
    isStreaming: true,
    maxLines: 5,
  },
};

export const WithCustomRenderer: Story = {
  args: {
    title: "Formatted Content",
    content:
      "# Hello World\n\nThis is **bold** and this is *italic*.\n\n- Item 1\n- Item 2\n- Item 3",
    renderContent: (content: string) => (
      <pre
        style={{
          whiteSpace: "pre-wrap",
          fontFamily: "monospace",
          fontSize: 13,
        }}
      >
        {content}
      </pre>
    ),
  },
};


================================================
FILE: web/src/refresh-components/texts/ExpandableTextDisplay.tsx
================================================
"use client";

import { useState, useMemo, useRef, useEffect, useLayoutEffect } from "react";
import * as DialogPrimitive from "@radix-ui/react-dialog";
import Modal from "@/refresh-components/Modal";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import Text from "@/refresh-components/texts/Text";
import { SvgDownload, SvgMaximize2, SvgX } from "@opal/icons";
import { Button } from "@opal/components";
import { cn } from "@/lib/utils";

export interface ExpandableTextDisplayProps {
  /** Title shown in header and modal */
  title: string;
  /** The full text content to display (used in modal and for copy/download) */
  content: string;
  /** Optional content to display in collapsed view (e.g., for streaming animation). Falls back to `content`. */
  displayContent?: string;
  /** Subtitle text (e.g., file size). If not provided, calculates from content */
  subtitle?: string;
  /** Maximum lines to show in collapsed state (1-10). Values outside this range default to 8. */
  maxLines?: 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10;
  /** Additional className for the container */
  className?: string;
  /** Optional custom renderer for content (e.g., markdown). Falls back to plain text.
   * @param content - The text content to render
   * @param isExpanded - Whether the content is being rendered in expanded (modal) view
   */
  renderContent?: (content: string, isExpanded: boolean) => React.ReactNode;
  /** When true, shows last N lines with top-truncation (ellipsis at top) instead of bottom-truncation */
  isStreaming?: boolean;
}

/** Calculate content size in human-readable format */
function getContentSize(text: string): string {
  const bytes = new Blob([text]).size;
  if (bytes < 1024) return `${bytes} Bytes`;
  return `${(bytes / 1024).toFixed(2)} KB`;
}

/** Count lines in text */
function getLineCount(text: string): number {
  return text.split("\n").length;
}

/** Extract the last N lines from text for streaming display.
 * When truncated, returns (maxLines - 1) lines to leave room for ellipsis.
 */
function getLastLines(
  text: string,
  maxLines: number
): { lines: string; hasTruncation: boolean } {
  const allLines = text.split("\n");
  if (allLines.length <= maxLines) {
    return { lines: text, hasTruncation: false };
  }
  // Reserve one line for ellipsis, show last (maxLines - 1) content lines
  const linesToShow = maxLines - 1;
  if (linesToShow <= 0) {
    return { lines: "", hasTruncation: true };
  }
  return {
    lines: allLines.slice(-linesToShow).join("\n"),
    hasTruncation: true,
  };
}

/** Download content as a .txt file */
function downloadAsTxt(content: string, filename: string) {
  const blob = new Blob([content], { type: "text/plain" });
  const url = URL.createObjectURL(blob);
  try {
    const a = document.createElement("a");
    a.href = url;
    a.download = `${filename}.txt`;
    document.body.appendChild(a);
    a.click();
    document.body.removeChild(a);
  } finally {
    URL.revokeObjectURL(url);
  }
}

/** Block-level HTML tags used by the snap algorithm to recurse into containers. */
const CONTAINER_TAGS = new Set([
  "UL",
  "OL",
  "LI",
  "BLOCKQUOTE",
  "DIV",
  "DL",
  "DD",
  "TABLE",
  "TBODY",
  "THEAD",
  "TR",
  "TH",
  "TD",
  "SECTION",
  "DETAILS",
  "PRE",
  "FIGURE",
  "FIGCAPTION",
  "ARTICLE",
  "ASIDE",
  "HEADER",
  "FOOTER",
  "MAIN",
  "NAV",
]);

export default function ExpandableTextDisplay({
  title,
  content,
  displayContent,
  subtitle,
  maxLines = 8,
  className,
  renderContent,
  isStreaming = false,
}: ExpandableTextDisplayProps) {
  const [isModalOpen, setIsModalOpen] = useState(false);
  const [isTruncated, setIsTruncated] = useState(false);
  const scrollRef = useRef<HTMLDivElement>(null);
  const contentInnerRef = useRef<HTMLDivElement>(null);

  const lineCount = useMemo(() => getLineCount(content), [content]);
  const contentSize = useMemo(() => getContentSize(content), [content]);
  const displaySubtitle = subtitle ?? contentSize;

  // Truncation detection (read-only, doesn't need to block paint)
  useEffect(() => {
    if (renderContent && scrollRef.current) {
      setIsTruncated(
        scrollRef.current.scrollHeight > scrollRef.current.clientHeight
      );
    } else if (isStreaming) {
      const textToCheck = displayContent ?? content;
      setIsTruncated(getLineCount(textToCheck) > maxLines);
    } else if (scrollRef.current) {
      setIsTruncated(
        scrollRef.current.scrollHeight > scrollRef.current.clientHeight
      );
    }
  }, [isStreaming, renderContent, content, displayContent, maxLines]);

  // Shift content upward during streaming for renderContent mode,
  // snapping to element boundaries so blocks are never partially clipped.
  // Must block paint to avoid flicker.
  useLayoutEffect(() => {
    if (
      !isStreaming ||
      !renderContent ||
      !scrollRef.current ||
      !contentInnerRef.current
    ) {
      return;
    }

    const containerHeight = scrollRef.current.clientHeight;
    const contentHeight = contentInnerRef.current.scrollHeight;
    let overflow = Math.max(0, contentHeight - containerHeight);

    if (overflow > 0) {
      let blockParent: Element = contentInnerRef.current;
      while (
        blockParent.children.length === 1 &&
        blockParent.children[0]!.children.length > 0
      ) {
        blockParent = blockParent.children[0]!;
      }

      contentInnerRef.current.style.transform = "translateY(0)";
      const refTop = contentInnerRef.current.getBoundingClientRect().top;

      let snapParent: Element = blockParent;
      let snap = overflow;
      while (true) {
        let found = false;
        for (let i = 0; i < snapParent.children.length; i++) {
          const child = snapParent.children[i] as HTMLElement;
          const rect = child.getBoundingClientRect();
          const top = rect.top - refTop;
          const bottom = top + rect.height;
          if (top < snap && snap < bottom) {
            if (
              child.children.length > 0 &&
              CONTAINER_TAGS.has(child.tagName)
            ) {
              snapParent = child;
              found = true;
              break;
            }
            snap = bottom;
            found = true;
            break;
          }
        }
        if (!found) break;
        if (snap !== overflow) break;
      }
      overflow = snap;
    }

    contentInnerRef.current.style.transform =
      overflow > 0 ? `translateY(-${overflow}px)` : "translateY(0)";
  }, [isStreaming, renderContent, content, displayContent, maxLines]);

  const handleDownload = () => {
    const sanitizedTitle = title.replace(/[^a-z0-9]/gi, "_").toLowerCase();
    downloadAsTxt(content, sanitizedTitle);
  };

  // Map maxLines to Tailwind line-clamp classes (fallback to 8 for invalid runtime values)
  const lineClampClass =
    {
      1: "line-clamp-1",
      2: "line-clamp-2",
      3: "line-clamp-3",
      4: "line-clamp-4",
      5: "line-clamp-5",
      6: "line-clamp-6",
      7: "line-clamp-7",
      8: "line-clamp-8",
      9: "line-clamp-9",
      10: "line-clamp-10",
    }[maxLines] ?? "line-clamp-8";

  // Single container for renderContent mode (both streaming and static)
  // Keeps scrollRef alive across the streaming → static transition
  const renderContentWithRef = () => {
    const textToDisplay = displayContent ?? content;

    if (isStreaming) {
      // During streaming: use max-height with overflow-hidden and CSS transform to shift
      // content upward, showing the latest content from the bottom without scroll jitter.
      // Line height is approximately 1.5rem (24px) for body text.
      // We show a top ellipsis indicator when content is truncated.
      return (
        <div>
          {isTruncated && (
            <Text as="p" text03 mainUiMuted className="!my-0">
              …
            </Text>
          )}
          <div
            ref={scrollRef}
            className="overflow-hidden"
            style={{ maxHeight: `calc(${maxLines} * 1.5rem)` }}
          >
            <div ref={contentInnerRef}>
              {renderContent!(textToDisplay, false)}
            </div>
          </div>
        </div>
      );
    }

    // Static mode: use CSS line-clamp for bottom truncation
    return (
      <div ref={scrollRef} className={cn("overflow-hidden", lineClampClass)}>
        {renderContent!(textToDisplay, false)}
      </div>
    );
  };

  // Render plain text streaming (top-truncation with last N lines)
  const renderPlainTextStreaming = () => {
    const textToDisplay = displayContent ?? content;
    const { lines, hasTruncation } = getLastLines(textToDisplay, maxLines);

    return (
      <div ref={scrollRef} className="overflow-hidden">
        {hasTruncation && (
          <Text as="span" mainUiMuted text03>
            …{"\n"}
          </Text>
        )}
        <Text as="p" mainUiMuted text03 className="whitespace-pre-wrap">
          {lines}
        </Text>
      </div>
    );
  };

  // Render plain text static (CSS line-clamp + scroll-based truncation detection)
  const renderPlainTextStatic = () => (
    <div ref={scrollRef} className={cn("overflow-hidden", lineClampClass)}>
      <Text as="span" mainUiMuted text03 className="whitespace-pre-wrap">
        {displayContent ?? content}
      </Text>
    </div>
  );

  return (
    <>
      {/* Collapsed View */}
      <div className={cn("w-full flex", className)}>
        <div className="flex-1 min-w-0">
          {renderContent
            ? renderContentWithRef()
            : isStreaming
              ? renderPlainTextStreaming()
              : renderPlainTextStatic()}
        </div>

        {/* Expand button - only show when content is truncated */}

        <div className="flex justify-end self-end mt-1 w-8">
          {isTruncated && (
            <Button
              prominence="tertiary"
              size="sm"
              icon={SvgMaximize2}
              tooltip="View Full Text"
              onClick={() => setIsModalOpen(true)}
            />
          )}
        </div>
      </div>

      {/* Expanded Modal */}
      <Modal open={isModalOpen} onOpenChange={setIsModalOpen}>
        <Modal.Content height="lg" width="lg" preventAccidentalClose={false}>
          {/* Header */}
          <div className="flex items-start justify-between px-4 py-3">
            <div className="flex flex-col">
              <DialogPrimitive.Title asChild>
                <Text as="span" text04 headingH3>
                  {title}
                </Text>
              </DialogPrimitive.Title>
              <DialogPrimitive.Description asChild>
                <Text as="span" text03 secondaryBody>
                  {displaySubtitle}
                </Text>
              </DialogPrimitive.Description>
            </div>
            <DialogPrimitive.Close asChild>
              <Button
                icon={SvgX}
                prominence="tertiary"
                size="sm"
                onClick={() => setIsModalOpen(false)}
              />
            </DialogPrimitive.Close>
          </div>

          {/* Body */}
          <Modal.Body>
            {renderContent ? (
              renderContent(content, true)
            ) : (
              <Text as="p" mainUiMuted text03 className="whitespace-pre-wrap">
                {content}
              </Text>
            )}
          </Modal.Body>

          {/* Footer */}
          <div className="flex items-center justify-between p-2 bg-background-tint-01">
            <div className="px-2">
              <Text as="span" mainUiMuted text03>
                {lineCount} {lineCount === 1 ? "line" : "lines"}
              </Text>
            </div>
            <div className="flex items-center gap-1 bg-background-tint-00 p-1 rounded-12">
              <CopyIconButton
                prominence="tertiary"
                size="sm"
                getCopyText={() => content}
                tooltip="Copy"
              />
              <Button
                prominence="tertiary"
                size="sm"
                icon={SvgDownload}
                tooltip="Download"
                onClick={handleDownload}
              />
            </div>
          </div>
        </Modal.Content>
      </Modal>
    </>
  );
}


================================================
FILE: web/src/refresh-components/texts/Text.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Text from "./Text";

const meta: Meta<typeof Text> = {
  title: "refresh-components/texts/Text",
  component: Text,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Text>;

export const Default: Story = {
  args: {
    children: "Hello, this is some default text.",
  },
};

export const Colors: Story = {
  render: () => (
    <div style={{ display: "flex", flexDirection: "column", gap: 8 }}>
      <Text text01 mainUiBody>
        text01 — Primary text color
      </Text>
      <Text text02 mainUiBody>
        text02 — Secondary text color
      </Text>
      <Text text03 mainUiBody>
        text03 — Tertiary text color
      </Text>
      <Text text04 mainUiBody>
        text04 — Quaternary text color
      </Text>
      <Text text05 mainUiBody>
        text05 — Quinary text color
      </Text>
    </div>
  ),
};

export const Typography: Story = {
  render: () => (
    <div style={{ display: "flex", flexDirection: "column", gap: 12 }}>
      <Text headingH2>Heading H2</Text>
      <Text mainContentBody>Main Content Body</Text>
      <Text mainUiBody>Main UI Body</Text>
      <Text secondaryBody>Secondary Body</Text>
    </div>
  ),
};

export const Emphasis: Story = {
  render: () => (
    <div style={{ display: "flex", flexDirection: "column", gap: 8 }}>
      <Text mainContentEmphasis>Main Content Emphasis</Text>
      <Text mainUiAction>Main UI Action</Text>
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/texts/Text.tsx
================================================
import type { HTMLAttributes } from "react";

import { cn } from "@/lib/utils";

const fonts = {
  // Heading
  headingH1: "font-heading-h1",
  headingH2: "font-heading-h2",
  headingH3: "font-heading-h3",
  headingH3Muted: "font-heading-h3-muted",

  // Main Content
  mainContentBody: "font-main-content-body",
  mainContentMuted: "font-main-content-muted",
  mainContentEmphasis: "font-main-content-emphasis",
  mainContentMono: "font-main-content-mono",

  // Main UI
  mainUiBody: "font-main-ui-body",
  mainUiMuted: "font-main-ui-muted",
  mainUiAction: "font-main-ui-action",
  mainUiMono: "font-main-ui-mono",

  // Secondary
  secondaryBody: "font-secondary-body",
  secondaryAction: "font-secondary-action",
  secondaryMono: "font-secondary-mono",
  secondaryMonoLabel: "font-secondary-mono-label",

  // Figure
  figureSmallLabel: "font-figure-small-label",
  figureSmallValue: "font-figure-small-value",
  figureKeystroke: "font-figure-keystroke",
};

const colors = {
  text05: "text-text-05",
  text04: "text-text-04",
  text03: "text-text-03",
  text02: "text-text-02",
  text01: "text-text-01",
  textLight03: "text-text-light-03",
  textLight05: "text-text-light-05",
  textDark03: "text-text-dark-03",
  textDark05: "text-text-dark-05",

  inverted: {
    text05: "text-text-inverted-05",
    text04: "text-text-inverted-04",
    text03: "text-text-inverted-03",
    text02: "text-text-inverted-02",
    text01: "text-text-inverted-01",
    textLight03: "text-text-light-03",
    textLight05: "text-text-light-05",
    textDark03: "text-text-dark-03",
    textDark05: "text-text-dark-05",
  },
};

export interface TextProps extends Omit<HTMLAttributes<HTMLElement>, "as"> {
  nowrap?: boolean;

  // Fonts
  headingH1?: boolean;
  headingH2?: boolean;
  headingH3?: boolean;
  headingH3Muted?: boolean;
  mainContentBody?: boolean;
  mainContentMuted?: boolean;
  mainContentEmphasis?: boolean;
  mainContentMono?: boolean;
  mainUiBody?: boolean;
  mainUiMuted?: boolean;
  mainUiAction?: boolean;
  mainUiMono?: boolean;
  secondaryBody?: boolean;
  secondaryAction?: boolean;
  secondaryMono?: boolean;
  secondaryMonoLabel?: boolean;
  figureSmallLabel?: boolean;
  figureSmallValue?: boolean;
  figureKeystroke?: boolean;

  // Colors
  text05?: boolean;
  text04?: boolean;
  text03?: boolean;
  text02?: boolean;
  text01?: boolean;
  inverted?: boolean;
  textLight03?: boolean;
  textLight05?: boolean;
  textDark03?: boolean;
  textDark05?: boolean;

  // Tag type override
  as?: "p" | "span" | "li";
}

export default function Text({
  nowrap,
  headingH1,
  headingH2,
  headingH3,
  headingH3Muted,
  mainContentBody,
  mainContentMuted,
  mainContentEmphasis,
  mainContentMono,
  mainUiBody,
  mainUiMuted,
  mainUiAction,
  mainUiMono,
  secondaryBody,
  secondaryAction,
  secondaryMono,
  secondaryMonoLabel,
  figureSmallLabel,
  figureSmallValue,
  figureKeystroke,
  text05,
  text04,
  text03,
  text02,
  text01,
  inverted,
  textLight03,
  textLight05,
  textDark03,
  textDark05,
  children,
  className,
  as,
  ...rest
}: TextProps) {
  const font = headingH1
    ? "headingH1"
    : headingH2
      ? "headingH2"
      : headingH3
        ? "headingH3"
        : headingH3Muted
          ? "headingH3Muted"
          : mainContentBody
            ? "mainContentBody"
            : mainContentMuted
              ? "mainContentMuted"
              : mainContentEmphasis
                ? "mainContentEmphasis"
                : mainContentMono
                  ? "mainContentMono"
                  : mainUiBody
                    ? "mainUiBody"
                    : mainUiMuted
                      ? "mainUiMuted"
                      : mainUiAction
                        ? "mainUiAction"
                        : mainUiMono
                          ? "mainUiMono"
                          : secondaryBody
                            ? "secondaryBody"
                            : secondaryAction
                              ? "secondaryAction"
                              : secondaryMono
                                ? "secondaryMono"
                                : secondaryMonoLabel
                                  ? "secondaryMonoLabel"
                                  : figureSmallLabel
                                    ? "figureSmallLabel"
                                    : figureSmallValue
                                      ? "figureSmallValue"
                                      : figureKeystroke
                                        ? "figureKeystroke"
                                        : "mainUiBody";

  const color = text01
    ? "text01"
    : text02
      ? "text02"
      : text03
        ? "text03"
        : text04
          ? "text04"
          : text05
            ? "text05"
            : textLight03
              ? "textLight03"
              : textLight05
                ? "textLight05"
                : textDark03
                  ? "textDark03"
                  : textDark05
                    ? "textDark05"
                    : "text05";

  const Tag = as ?? "span";

  return (
    <Tag
      {...rest}
      className={cn(
        fonts[font],
        inverted ? colors.inverted[color] : colors[color],
        nowrap && "whitespace-nowrap",
        className
      )}
    >
      {children}
    </Tag>
  );
}


================================================
FILE: web/src/refresh-components/texts/Truncated.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import Truncated from "./Truncated";

const meta: Meta<typeof Truncated> = {
  title: "refresh-components/texts/Truncated",
  component: Truncated,
  tags: ["autodocs"],
};

export default meta;
type Story = StoryObj<typeof Truncated>;

export const ShortText: Story = {
  args: {
    children: "Short text that fits.",
    mainUiBody: true,
    text04: true,
  },
  decorators: [
    (Story) => (
      <div style={{ width: 300 }}>
        <Story />
      </div>
    ),
  ],
};

export const LongText: Story = {
  args: {
    children:
      "This is a very long piece of text that will definitely get truncated because it exceeds the width of the container and should show a tooltip on hover.",
    mainUiBody: true,
    text04: true,
  },
  decorators: [
    (Story) => (
      <div style={{ width: 200 }}>
        <Story />
      </div>
    ),
  ],
};

export const TooltipDisabled: Story = {
  args: {
    children:
      "Long text but tooltip is disabled so it won't appear even when truncated.",
    mainUiBody: true,
    text03: true,
    disable: true,
  },
  decorators: [
    (Story) => (
      <div style={{ width: 200 }}>
        <Story />
      </div>
    ),
  ],
};

export const CustomTooltipSide: Story = {
  args: {
    children:
      "Hover to see the tooltip appear on the right side instead of the default top.",
    mainUiBody: true,
    text04: true,
    side: "right",
  },
  decorators: [
    (Story) => (
      <div style={{ width: 200, paddingTop: 40 }}>
        <Story />
      </div>
    ),
  ],
};


================================================
FILE: web/src/refresh-components/texts/Truncated.tsx
================================================
"use client";

import React, { useState, useRef, useCallback, useLayoutEffect } from "react";
import { TextProps } from "@/refresh-components/texts/Text";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
  TooltipProvider,
} from "@/components/ui/tooltip";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";

/**
 * Hook to detect if text is truncated by comparing visible width vs full width
 */
function useTruncated(children: React.ReactNode) {
  const [isTruncated, setIsTruncated] = useState(false);
  const visibleRef = useRef<HTMLDivElement>(null);
  const hiddenRef = useRef<HTMLDivElement>(null);

  useLayoutEffect(() => {
    function checkTruncation() {
      if (visibleRef.current && hiddenRef.current) {
        const visibleWidth = visibleRef.current.offsetWidth;
        const fullTextWidth = hiddenRef.current.offsetWidth;
        setIsTruncated(fullTextWidth > visibleWidth);
      }
    }

    // Use a small delay to ensure DOM is ready
    const timeoutId = setTimeout(checkTruncation, 0);

    window.addEventListener("resize", checkTruncation);
    return () => {
      clearTimeout(timeoutId);
      window.removeEventListener("resize", checkTruncation);
    };
  }, [children]);

  return { isTruncated, visibleRef, hiddenRef };
}

export interface TruncatedProps extends TextProps {
  side?: "top" | "right" | "bottom" | "left";
  sideOffset?: number;
  disable?: boolean;
}

/**
 * Renders passed in text on a single line. If text is truncated,
 * shows a tooltip on hover with the full text.
 */
export default function Truncated({
  side = "top",
  sideOffset,
  disable,
  children,
  className,
  ...rest
}: TruncatedProps) {
  const { isTruncated, visibleRef, hiddenRef } = useTruncated(children);

  const text = (
    <Text
      as="p"
      className={cn("line-clamp-1 break-all text-left", className)}
      {...rest}
    >
      {children}
    </Text>
  );

  const showTooltip = !disable && isTruncated;

  // Radix's composeEventHandlers skips its internal handler when
  // event.defaultPrevented is true. When there is nothing to show we
  // block onPointerMove so the inner Tooltip never starts its open-delay
  // timer and therefore never dispatches the global "tooltip.open" custom
  // event that would close any *outer* tooltip wrapping this component.
  const blockPointerWhenInert = useCallback(
    (e: React.PointerEvent) => {
      if (!showTooltip) e.preventDefault();
    },
    [showTooltip]
  );

  return (
    <>
      <TooltipProvider>
        <Tooltip>
          <div
            ref={visibleRef}
            className="flex-grow overflow-hidden text-left w-full"
          >
            <TooltipTrigger asChild>
              <div onPointerMove={blockPointerWhenInert}>{text}</div>
            </TooltipTrigger>
          </div>

          {showTooltip && (
            <TooltipContent
              side={side}
              sideOffset={sideOffset}
              className="max-w-[400px] break-words whitespace-normal"
            >
              {typeof children === "string" ? (
                <Text as="p" textLight05>
                  {children}
                </Text>
              ) : (
                children
              )}
            </TooltipContent>
          )}
        </Tooltip>
      </TooltipProvider>

      {/*
        Hide offscreen to measure full text width

        # Note

        The placement of this `div` *after* the above `TooltipProvider` is *VERY* important to our tests!
        If the bottom `div` were placed first, any tests that try locating the string that the `Truncated` component is trying to render would find the bottom div first.
        This can break expectations (since it's supposed to be hidden in the first place).

        All in all, keep the below `div` *below* the above `TooltipProvider`.

        - @raunakab
      */}
      <div
        ref={hiddenRef}
        className="fixed left-[-9999px] top-[0rem] whitespace-nowrap pointer-events-none opacity-0"
        aria-hidden="true"
      >
        {text}
      </div>
    </>
  );
}


================================================
FILE: web/src/refresh-components/tiles/ButtonTile.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import ButtonTile from "./ButtonTile";
import { SvgArrowRight, SvgPlus, SvgSettings, SvgSearch } from "@opal/icons";

const meta: Meta<typeof ButtonTile> = {
  title: "refresh-components/tiles/ButtonTile",
  component: ButtonTile,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <div style={{ maxWidth: 300 }}>
        <Story />
      </div>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof ButtonTile>;

export const Default: Story = {
  args: {
    title: "Create New",
    description: "Start from scratch",
    icon: SvgArrowRight,
    onClick: () => {},
  },
};

export const TitleOnly: Story = {
  args: {
    title: "Quick Action",
    icon: SvgPlus,
    onClick: () => {},
  },
};

export const DescriptionOnly: Story = {
  args: {
    description: "Click to configure settings",
    icon: SvgSettings,
    onClick: () => {},
  },
};

export const NoIcon: Story = {
  args: {
    title: "Simple Tile",
    description: "Without an icon",
    onClick: () => {},
  },
};

export const Disabled: Story = {
  args: {
    title: "Unavailable",
    description: "This feature is not enabled",
    icon: SvgSettings,
    disabled: true,
  },
};

export const TileGrid: Story = {
  render: () => (
    <div
      style={{
        display: "grid",
        gridTemplateColumns: "1fr 1fr",
        gap: 8,
        maxWidth: 500,
      }}
    >
      <ButtonTile
        title="Search"
        description="Find documents"
        icon={SvgSearch}
        onClick={() => {}}
      />
      <ButtonTile
        title="Create"
        description="New document"
        icon={SvgPlus}
        onClick={() => {}}
      />
      <ButtonTile
        title="Settings"
        description="Configure"
        icon={SvgSettings}
        onClick={() => {}}
      />
      <ButtonTile
        title="Disabled"
        description="Not available"
        icon={SvgArrowRight}
        disabled
      />
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/tiles/ButtonTile.tsx
================================================
import type { FunctionComponent } from "react";

import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import { Interactive } from "@opal/core";
import type { IconProps } from "@opal/types";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface ButtonTileProps {
  title?: string;
  description?: string;
  icon?: FunctionComponent<IconProps>;
  onClick?: () => void;
  disabled?: boolean;
}

// ---------------------------------------------------------------------------
// ButtonTile
// ---------------------------------------------------------------------------

export default function ButtonTile({
  title,
  description,
  icon,
  onClick,
  disabled,
}: ButtonTileProps) {
  const Icon = icon;

  return (
    <Interactive.Stateless
      variant="default"
      prominence="secondary"
      group="group/Tile"
      disabled={disabled}
      onClick={onClick}
    >
      <div className={cn("rounded-08 p-1.5", "flex flex-row gap-2")}>
        {(title || description) && (
          <div className="min-w-0 flex flex-col px-0.5">
            {title && (
              <Text
                secondaryAction
                text02={disabled}
                text04={!disabled}
                className="truncate"
              >
                {title}
              </Text>
            )}
            {description && (
              <Text secondaryBody text02={disabled} text03={!disabled}>
                {description}
              </Text>
            )}
          </div>
        )}

        {Icon && (
          <div className="flex items-start justify-center">
            <Icon
              size={16}
              className={cn(
                disabled
                  ? "stroke-text-01"
                  : "stroke-text-03 group-hover/Tile:stroke-text-04"
              )}
            />
          </div>
        )}
      </div>
    </Interactive.Stateless>
  );
}


================================================
FILE: web/src/refresh-components/tiles/FileTile.stories.tsx
================================================
import type { Meta, StoryObj } from "@storybook/react";
import FileTile from "./FileTile";
import { SvgTextLines, SvgFiles } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";

const meta: Meta<typeof FileTile> = {
  title: "refresh-components/tiles/FileTile",
  component: FileTile,
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <TooltipPrimitive.Provider>
        <div style={{ maxWidth: 300 }}>
          <Story />
        </div>
      </TooltipPrimitive.Provider>
    ),
  ],
};

export default meta;
type Story = StoryObj<typeof FileTile>;

export const Default: Story = {
  args: {
    title: "document.pdf",
    description: "Project proposal document",
    icon: SvgTextLines,
  },
};

export const WithOpen: Story = {
  args: {
    title: "report.xlsx",
    description: "Quarterly report",
    icon: SvgFiles,
    onOpen: () => {},
  },
};

export const WithRemove: Story = {
  args: {
    title: "notes.md",
    description: "Meeting notes",
    icon: SvgTextLines,
    onRemove: () => {},
  },
};

export const Processing: Story = {
  args: {
    title: "uploading.pdf",
    description: "Processing...",
    icon: SvgTextLines,
    state: "processing",
  },
};

export const Disabled: Story = {
  args: {
    title: "locked.pdf",
    description: "Access denied",
    icon: SvgFiles,
    state: "disabled",
  },
};

export const TitleOnly: Story = {
  args: {
    title: "image.png",
    icon: SvgFiles,
  },
};

export const DefaultIcon: Story = {
  args: {
    title: "unknown-file",
    description: "Uses default text lines icon",
  },
};

export const FileList: Story = {
  render: () => (
    <div style={{ display: "flex", gap: 8, flexWrap: "wrap" }}>
      <FileTile
        title="proposal.pdf"
        description="2.4 MB"
        icon={SvgTextLines}
        onOpen={() => {}}
        onRemove={() => {}}
      />
      <FileTile
        title="report.xlsx"
        description="1.1 MB"
        icon={SvgFiles}
        onOpen={() => {}}
      />
      <FileTile
        title="uploading.doc"
        description="Processing..."
        icon={SvgTextLines}
        state="processing"
      />
      <FileTile
        title="locked.pdf"
        description="No access"
        icon={SvgFiles}
        state="disabled"
      />
    </div>
  ),
};


================================================
FILE: web/src/refresh-components/tiles/FileTile.tsx
================================================
import type { FunctionComponent } from "react";

import { cn, noProp } from "@/lib/utils";
import { SvgMaximize2, SvgTextLines, SvgX } from "@opal/icons";
import type { IconProps } from "@opal/types";
import { Hoverable } from "@opal/core";
import IconButton from "../buttons/IconButton";
import Text from "../texts/Text";
import Truncated from "../texts/Truncated";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

export type FileTileState = "default" | "processing" | "disabled";

interface FileTileProps {
  title?: string;
  description?: string;
  icon?: FunctionComponent<IconProps>;
  onRemove?: () => void;
  onOpen?: () => void;
  state?: FileTileState;
}

// ---------------------------------------------------------------------------
// RemoveButton (internal)
// ---------------------------------------------------------------------------

interface RemoveButtonProps {
  onRemove: () => void;
}

function RemoveButton({ onRemove }: RemoveButtonProps) {
  return (
    <div
      className={cn(
        "absolute -left-1 -top-1 z-10",
        "pointer-events-none focus-within:pointer-events-auto"
      )}
    >
      <Hoverable.Item group="fileTile" variant="opacity-on-hover">
        <button
          type="button"
          onClick={(e) => {
            e.stopPropagation();
            onRemove();
          }}
          title="Remove"
          aria-label="Remove"
          className={cn(
            "h-4 w-4",
            "flex items-center justify-center",
            "rounded-full bg-theme-primary-05 text-text-inverted-05",
            "pointer-events-auto"
          )}
        >
          <SvgX size={10} />
        </button>
      </Hoverable.Item>
    </div>
  );
}

// ---------------------------------------------------------------------------
// FileTile
// ---------------------------------------------------------------------------

export default function FileTile({
  title,
  description,
  icon,
  onRemove,
  onOpen,
  state = "default",
}: FileTileProps) {
  const Icon = icon ?? SvgTextLines;
  const isMuted = state === "processing" || state === "disabled";

  return (
    <Hoverable.Root group="fileTile" widthVariant="fit">
      <div
        onClick={onOpen && state !== "disabled" ? () => onOpen() : undefined}
        className={cn(
          "relative min-w-[7.5rem] max-w-[15rem] h-full",
          "border rounded-12 p-1",
          "flex flex-row items-center",
          "transition-colors duration-150",
          // Outer container bg + border per state
          isMuted
            ? "bg-background-neutral-02 border-border-01"
            : "bg-background-tint-00 border-border-01",
          // Hover overrides (disabled gets none)
          state !== "disabled" && "hover:border-border-02",
          state === "default" && "hover:bg-background-tint-02",
          // Clickable cursor when onOpen is provided and not disabled
          onOpen && state !== "disabled" && "cursor-pointer"
        )}
      >
        {onRemove && <RemoveButton onRemove={onRemove} />}

        <div
          className={cn(
            "shrink-0 h-9 w-9 rounded-08",
            "flex items-center justify-center",
            isMuted ? "bg-background-neutral-03" : "bg-background-tint-01"
          )}
        >
          <Icon
            size={16}
            className={cn(isMuted ? "stroke-text-01" : "stroke-text-02")}
          />
        </div>

        {(title || description || onOpen) && (
          <div className="min-w-0 flex pl-1 w-full justify-between h-full">
            {isMuted ? (
              <div className="flex flex-col min-w-0">
                {title && (
                  <Truncated
                    secondaryAction
                    text02
                    className={cn(
                      "truncate",
                      state === "processing" && "hover:text-text-03"
                    )}
                  >
                    {title}
                  </Truncated>
                )}
                {description && (
                  <Text
                    secondaryBody
                    text02
                    className={cn(
                      "line-clamp-2",
                      state === "processing" && "hover:text-text-03"
                    )}
                  >
                    {description}
                  </Text>
                )}
              </div>
            ) : (
              <div className="flex flex-col min-w-0">
                {title && (
                  <Truncated secondaryAction text04 className="truncate">
                    {title}
                  </Truncated>
                )}
                {description && (
                  <Text secondaryBody text03 className="line-clamp-2">
                    {description}
                  </Text>
                )}
              </div>
            )}
            {onOpen && (
              <div className="h-full">
                <IconButton
                  small
                  icon={SvgMaximize2}
                  onClick={noProp(onOpen)}
                />
              </div>
            )}
          </div>
        )}
      </div>
    </Hoverable.Root>
  );
}


================================================
FILE: web/src/refresh-pages/AgentEditorPage.tsx
================================================
"use client";

import { useState, useRef, useEffect, useCallback } from "react";
import { useRouter } from "next/navigation";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import * as GeneralLayouts from "@/layouts/general-layouts";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import { Hoverable } from "@opal/core";
import { FullPersona } from "@/app/admin/agents/interfaces";
import { buildImgUrl } from "@/app/app/components/files/images/utils";
import { Formik, Form, FieldArray } from "formik";
import * as Yup from "yup";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import InputTextAreaField from "@/refresh-components/form/InputTextAreaField";
import InputTypeInElementField from "@/refresh-components/form/InputTypeInElementField";
import InputDatePickerField from "@/refresh-components/form/InputDatePickerField";
import Message from "@/refresh-components/messages/Message";
import Separator from "@/refresh-components/Separator";
import * as InputLayouts from "@/layouts/input-layouts";
import { useFormikContext } from "formik";
import LLMSelector from "@/components/llm/LLMSelector";
import { parseLlmDescriptor, structureValue } from "@/lib/llmConfig/utils";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import {
  STARTER_MESSAGES_EXAMPLES,
  MAX_CHARACTERS_STARTER_MESSAGE,
  MAX_CHARACTERS_AGENT_DESCRIPTION,
} from "@/lib/constants";
import {
  IMAGE_GENERATION_TOOL_ID,
  WEB_SEARCH_TOOL_ID,
  PYTHON_TOOL_ID,
  SEARCH_TOOL_ID,
  OPEN_URL_TOOL_ID,
} from "@/app/app/components/tools/constants";
import Text from "@/refresh-components/texts/Text";
import { Card } from "@/refresh-components/cards";
import SimpleCollapsible from "@/refresh-components/SimpleCollapsible";
import SwitchField from "@/refresh-components/form/SwitchField";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { useDocumentSets } from "@/app/admin/documents/sets/hooks";
import { useProjectsContext } from "@/providers/ProjectsContext";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import { toast } from "@/hooks/useToast";
import UserFilesModal from "@/components/modals/UserFilesModal";
import {
  ProjectFile,
  UserFileStatus,
} from "@/app/app/projects/projectsService";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import {
  SvgActions,
  SvgExpand,
  SvgFold,
  SvgImage,
  SvgLock,
  SvgOnyxOctagon,
  SvgSliders,
  SvgUsers,
  SvgTrash,
} from "@opal/icons";
import CustomAgentAvatar, {
  agentAvatarIconMap,
} from "@/refresh-components/avatars/CustomAgentAvatar";
import InputAvatar from "@/refresh-components/inputs/InputAvatar";
import SquareButton from "@/refresh-components/buttons/SquareButton";
import { useAgents } from "@/hooks/useAgents";
import {
  createPersona,
  updatePersona,
  PersonaUpsertParameters,
} from "@/app/admin/agents/lib";
import useMcpServersForAgentEditor from "@/hooks/useMcpServersForAgentEditor";
import useOpenApiTools from "@/hooks/useOpenApiTools";
import { useAvailableTools } from "@/hooks/useAvailableTools";
import * as ActionsLayouts from "@/layouts/actions-layouts";
import * as ExpandableCard from "@/layouts/expandable-card-layouts";
import { getActionIcon } from "@/lib/tools/mcpUtils";
import { MCPServer, MCPTool, ToolSnapshot } from "@/lib/tools/interfaces";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import useFilter from "@/hooks/useFilter";
import EnabledCount from "@/refresh-components/EnabledCount";
import { useAppRouter } from "@/hooks/appNavigation";
import { isDateInFuture } from "@/lib/dateUtils";
import {
  deleteAgent,
  updateAgentFeaturedStatus,
  updateAgentSharedStatus,
} from "@/lib/agents";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import ShareAgentModal from "@/sections/modals/ShareAgentModal";
import AgentKnowledgePane from "@/sections/knowledge/AgentKnowledgePane";
import { ValidSources } from "@/lib/types";
import { useVectorDbEnabled } from "@/providers/SettingsProvider";
import { useUser } from "@/providers/UserProvider";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";

interface AgentIconEditorProps {
  existingAgent?: FullPersona | null;
}

function FormWarningsEffect() {
  const { values, setStatus } = useFormikContext<{
    web_search: boolean;
    open_url: boolean;
  }>();

  useEffect(() => {
    const warnings: Record<string, string> = {};
    if (values.web_search && !values.open_url) {
      warnings.open_url =
        "Web Search without the ability to open URLs can lead to significantly worse web based results.";
    }
    setStatus({ warnings });
  }, [values.web_search, values.open_url, setStatus]);

  return null;
}

function AgentIconEditor({ existingAgent }: AgentIconEditorProps) {
  const { values, setFieldValue } = useFormikContext<{
    name: string;
    icon_name: string | null;
    uploaded_image_id: string | null;
    remove_image: boolean | null;
  }>();
  const [uploadedImagePreview, setUploadedImagePreview] = useState<
    string | null
  >(null);
  const [popoverOpen, setPopoverOpen] = useState(false);
  const fileInputRef = useRef<HTMLInputElement | null>(null);

  async function handleImageUpload(e: React.ChangeEvent<HTMLInputElement>) {
    const file = e.target.files?.[0];
    if (!file) return;

    // Clear previous preview to free memory
    setUploadedImagePreview(null);

    // Clear selected icon and remove_image flag when uploading an image
    setFieldValue("icon_name", null);
    setFieldValue("remove_image", false);

    // Show preview immediately
    const reader = new FileReader();
    reader.onloadend = () => {
      setUploadedImagePreview(reader.result as string);
    };
    reader.readAsDataURL(file);

    // Upload the file
    try {
      const formData = new FormData();
      formData.append("file", file);
      const response = await fetch("/api/admin/persona/upload-image", {
        method: "POST",
        body: formData,
      });

      if (!response.ok) {
        console.error("Failed to upload image");
        setUploadedImagePreview(null);
        return;
      }

      const { file_id } = await response.json();
      setFieldValue("uploaded_image_id", file_id);
      setPopoverOpen(false);
    } catch (error) {
      console.error("Upload error:", error);
      setUploadedImagePreview(null);
    }
  }

  const imageSrc = uploadedImagePreview
    ? uploadedImagePreview
    : values.uploaded_image_id
      ? buildImgUrl(values.uploaded_image_id)
      : values.icon_name
        ? undefined
        : values.remove_image
          ? undefined
          : existingAgent?.uploaded_image_id
            ? buildImgUrl(existingAgent.uploaded_image_id)
            : undefined;

  function handleIconClick(iconName: string | null) {
    setFieldValue("icon_name", iconName);
    setFieldValue("uploaded_image_id", null);
    setFieldValue("remove_image", true);
    setUploadedImagePreview(null);
    setPopoverOpen(false);

    // Reset the file input so the same file can be uploaded again later
    if (fileInputRef.current) {
      fileInputRef.current.value = "";
    }
  }

  return (
    <>
      <input
        ref={fileInputRef}
        type="file"
        accept="image/*"
        onChange={handleImageUpload}
        className="hidden"
      />

      <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>
        <Popover.Trigger asChild>
          <Hoverable.Root group="inputAvatar" widthVariant="fit">
            <InputAvatar className="relative flex flex-col items-center justify-center h-[7.5rem] w-[7.5rem]">
              {/* We take the `InputAvatar`'s height/width (in REM) and multiply it by 16 (the REM -> px conversion factor). */}
              <CustomAgentAvatar
                size={imageSrc ? 7.5 * 16 : 40}
                src={imageSrc}
                iconName={values.icon_name ?? undefined}
                name={values.name}
              />
              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
              <div className="absolute bottom-0 left-1/2 -translate-x-1/2 mb-2">
                <Hoverable.Item group="inputAvatar" variant="opacity-on-hover">
                  <Button className="h-[1.75rem]" secondary>
                    Edit
                  </Button>
                </Hoverable.Item>
              </div>
            </InputAvatar>
          </Hoverable.Root>
        </Popover.Trigger>
        <Popover.Content>
          <PopoverMenu>
            {[
              <LineItem
                key="upload-image"
                icon={SvgImage}
                onClick={() => fileInputRef.current?.click()}
                emphasized
              >
                Upload Image
              </LineItem>,
              null,
              <div className="grid grid-cols-4 gap-1">
                <SquareButton
                  key="default-icon"
                  icon={() => (
                    <CustomAgentAvatar name={values.name} size={30} />
                  )}
                  onClick={() => handleIconClick(null)}
                  transient={!imageSrc && values.icon_name === null}
                />
                {Object.keys(agentAvatarIconMap).map((iconName) => (
                  <SquareButton
                    key={iconName}
                    onClick={() => handleIconClick(iconName)}
                    icon={() => (
                      <CustomAgentAvatar iconName={iconName} size={30} />
                    )}
                    transient={values.icon_name === iconName}
                  />
                ))}
              </div>,
            ]}
          </PopoverMenu>
        </Popover.Content>
      </Popover>
    </>
  );
}

interface OpenApiToolCardProps {
  tool: ToolSnapshot;
}

function OpenApiToolCard({ tool }: OpenApiToolCardProps) {
  const toolFieldName = `openapi_tool_${tool.id}`;

  return (
    <ExpandableCard.Root defaultFolded>
      <ActionsLayouts.Header
        title={tool.display_name || tool.name}
        description={tool.description}
        icon={SvgActions}
        rightChildren={<SwitchField name={toolFieldName} />}
      />
    </ExpandableCard.Root>
  );
}

interface MCPServerCardProps {
  server: MCPServer;
  tools: MCPTool[];
  isLoading: boolean;
}

function MCPServerCard({
  server,
  tools: enabledTools,
  isLoading,
}: MCPServerCardProps) {
  const [isFolded, setIsFolded] = useState(false);
  const { values, setFieldValue, getFieldMeta } = useFormikContext<any>();
  const serverFieldName = `mcp_server_${server.id}`;
  const isServerEnabled = values[serverFieldName]?.enabled ?? false;
  const {
    query,
    setQuery,
    filtered: filteredTools,
  } = useFilter(enabledTools, (tool) => `${tool.name} ${tool.description}`);

  // Calculate enabled and total tool counts
  const enabledCount = enabledTools.filter((tool) => {
    const toolFieldValue = values[serverFieldName]?.[`tool_${tool.id}`];
    return toolFieldValue === true;
  }).length;

  return (
    <ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>
      <ActionsLayouts.Header
        title={server.name}
        description={server.description}
        icon={getActionIcon(server.server_url, server.name)}
        rightChildren={
          <GeneralLayouts.Section
            flexDirection="row"
            gap={0.5}
            alignItems="start"
          >
            <EnabledCount
              enabledCount={enabledCount}
              totalCount={enabledTools.length}
            />
            <SwitchField
              name={`${serverFieldName}.enabled`}
              onCheckedChange={(checked) => {
                enabledTools.forEach((tool) => {
                  setFieldValue(`${serverFieldName}.tool_${tool.id}`, checked);
                });
                if (!checked) return;
                setIsFolded(false);
              }}
            />
          </GeneralLayouts.Section>
        }
      >
        <GeneralLayouts.Section flexDirection="row" gap={0.5}>
          <InputTypeIn
            placeholder="Search tools..."
            variant="internal"
            leftSearchIcon
            value={query}
            onChange={(e) => setQuery(e.target.value)}
          />
          {enabledTools.length > 0 && (
            <OpalButton
              prominence="internal"
              rightIcon={isFolded ? SvgExpand : SvgFold}
              onClick={() => setIsFolded((prev) => !prev)}
            >
              {isFolded ? "Expand" : "Fold"}
            </OpalButton>
          )}
        </GeneralLayouts.Section>
      </ActionsLayouts.Header>
      {isLoading ? (
        <ActionsLayouts.Content>
          <GeneralLayouts.Section padding={1}>
            <SimpleLoader />
          </GeneralLayouts.Section>
        </ActionsLayouts.Content>
      ) : (
        enabledTools.length > 0 &&
        filteredTools.length > 0 && (
          <ActionsLayouts.Content>
            {filteredTools.map((tool) => (
              <ActionsLayouts.Tool
                key={tool.id}
                name={`${serverFieldName}.tool_${tool.id}`}
                title={tool.name}
                description={tool.description}
                icon={tool.icon ?? SvgSliders}
                disabled={
                  !tool.isAvailable ||
                  !getFieldMeta<boolean>(`${serverFieldName}.enabled`).value
                }
                rightChildren={
                  <SwitchField
                    name={`${serverFieldName}.tool_${tool.id}`}
                    disabled={!isServerEnabled}
                  />
                }
              />
            ))}
          </ActionsLayouts.Content>
        )
      )}
    </ExpandableCard.Root>
  );
}

function StarterMessages() {
  const max_starters = STARTER_MESSAGES_EXAMPLES.length;

  const { values } = useFormikContext<{
    starter_messages: string[];
  }>();

  const starters = values.starter_messages || [];

  // Count how many non-empty starters we have
  const filledStarters = starters.filter((s) => s).length;
  const canAddMore = filledStarters < max_starters;

  // Show at least 1, or all filled ones, or filled + 1 empty (up to max)
  const visibleCount = Math.min(
    max_starters,
    Math.max(
      1,
      filledStarters === 0 ? 1 : filledStarters + (canAddMore ? 1 : 0)
    )
  );

  return (
    <FieldArray name="starter_messages">
      {(arrayHelpers) => (
        <GeneralLayouts.Section gap={0.5}>
          {Array.from({ length: visibleCount }, (_, i) => (
            <InputTypeInElementField
              key={`starter_messages.${i}`}
              name={`starter_messages.${i}`}
              placeholder={
                STARTER_MESSAGES_EXAMPLES[i] ||
                "Enter a conversation starter..."
              }
              onRemove={() => arrayHelpers.remove(i)}
            />
          ))}
        </GeneralLayouts.Section>
      )}
    </FieldArray>
  );
}

export interface AgentEditorPageProps {
  agent?: FullPersona;
  refreshAgent?: () => void;
}

export default function AgentEditorPage({
  agent: existingAgent,
  refreshAgent,
}: AgentEditorPageProps) {
  const router = useRouter();
  const appRouter = useAppRouter();
  const { refresh: refreshAgents } = useAgents();
  const shareAgentModal = useCreateModal();
  const deleteAgentModal = useCreateModal();
  const { isAdmin, isCurator } = useUser();
  const canUpdateFeaturedStatus = isAdmin || isCurator;
  const vectorDbEnabled = useVectorDbEnabled();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  // LLM Model Selection
  const getCurrentLlm = useCallback(
    (values: any, llmProviders: any) =>
      values.llm_model_version_override && values.llm_model_provider_override
        ? (() => {
            const provider = llmProviders?.find(
              (p: any) => p.name === values.llm_model_provider_override
            );
            return structureValue(
              values.llm_model_provider_override,
              provider?.provider || "",
              values.llm_model_version_override
            );
          })()
        : null,
    []
  );

  const onLlmSelect = useCallback(
    (selected: string | null, setFieldValue: any) => {
      if (selected === null) {
        setFieldValue("llm_model_version_override", null);
        setFieldValue("llm_model_provider_override", null);
      } else {
        const { modelName, name } = parseLlmDescriptor(selected);
        if (modelName && name) {
          setFieldValue("llm_model_version_override", modelName);
          setFieldValue("llm_model_provider_override", name);
        }
      }
    },
    []
  );

  // Hooks for Knowledge section
  const { allRecentFiles, beginUpload } = useProjectsContext();
  const { data: documentSets } = useDocumentSets();
  const userFilesModal = useCreateModal();
  const [presentingDocument, setPresentingDocument] = useState<{
    document_id: string;
    semantic_identifier: string;
  } | null>(null);

  const { mcpData, isLoading: isMcpLoading } = useMcpServersForAgentEditor();
  const { openApiTools: openApiToolsRaw, isLoading: isOpenApiLoading } =
    useOpenApiTools();
  const { llmProviders } = useLLMProviders(existingAgent?.id);
  const mcpServers = mcpData?.mcp_servers ?? [];
  const openApiTools = openApiToolsRaw ?? [];

  // Check if the *BUILT-IN* tools are available.
  // The built-in tools are:
  // - image-gen
  // - web-search
  // - code-interpreter
  const { tools: availableTools, isLoading: isToolsLoading } =
    useAvailableTools();
  const searchTool = availableTools?.find(
    (t) => t.in_code_tool_id === SEARCH_TOOL_ID
  );
  const imageGenTool = availableTools?.find(
    (t) => t.in_code_tool_id === IMAGE_GENERATION_TOOL_ID
  );
  const webSearchTool = availableTools?.find(
    (t) => t.in_code_tool_id === WEB_SEARCH_TOOL_ID
  );
  const openURLTool = availableTools?.find(
    (t) => t.in_code_tool_id === OPEN_URL_TOOL_ID
  );
  const codeInterpreterTool = availableTools?.find(
    (t) => t.in_code_tool_id === PYTHON_TOOL_ID
  );
  const isImageGenerationAvailable = !!imageGenTool;
  const imageGenerationDisabledTooltip = isImageGenerationAvailable
    ? undefined
    : "Image generation requires a configured model. If you have access, set one up under Settings > Image Generation, or ask an admin.";

  // Group MCP server tools from availableTools by server ID
  const mcpServersWithTools = mcpServers.map((server) => {
    const serverTools: MCPTool[] = (availableTools || [])
      .filter((tool) => tool.mcp_server_id === server.id)
      .map((tool) => ({
        id: tool.id.toString(),
        icon: getActionIcon(server.server_url, server.name),
        name: tool.display_name || tool.name,
        description: tool.description,
        isAvailable: true,
        isEnabled: tool.enabled,
      }));

    return { server, tools: serverTools, isLoading: false };
  });

  const initialValues = {
    // General
    icon_name: existingAgent?.icon_name ?? null,
    uploaded_image_id: existingAgent?.uploaded_image_id ?? null,
    remove_image: false,
    name: existingAgent?.name ?? "",
    description: existingAgent?.description ?? "",

    // Prompts
    instructions: existingAgent?.system_prompt ?? "",
    starter_messages: Array.from(
      { length: STARTER_MESSAGES_EXAMPLES.length },
      (_, i) => existingAgent?.starter_messages?.[i]?.message ?? ""
    ),

    // Knowledge - enabled if agent has any knowledge sources attached
    enable_knowledge:
      (existingAgent?.document_sets?.length ?? 0) > 0 ||
      (existingAgent?.hierarchy_nodes?.length ?? 0) > 0 ||
      (existingAgent?.attached_documents?.length ?? 0) > 0 ||
      (existingAgent?.user_file_ids?.length ?? 0) > 0,
    document_set_ids: existingAgent?.document_sets?.map((ds) => ds.id) ?? [],
    // Individual document IDs from hierarchy browsing
    document_ids: existingAgent?.attached_documents?.map((doc) => doc.id) ?? [],
    // Hierarchy node IDs (folders/spaces/channels) for scoped search
    hierarchy_node_ids:
      existingAgent?.hierarchy_nodes?.map((node) => node.id) ?? [],
    user_file_ids: existingAgent?.user_file_ids ?? [],
    // Selected sources for the new knowledge UI - derived from document sets
    selected_sources: [] as ValidSources[],

    // Advanced
    llm_model_provider_override:
      existingAgent?.llm_model_provider_override ?? null,
    llm_model_version_override:
      existingAgent?.llm_model_version_override ?? null,
    knowledge_cutoff_date: existingAgent?.search_start_date
      ? new Date(existingAgent.search_start_date)
      : null,
    replace_base_system_prompt:
      existingAgent?.replace_base_system_prompt ?? false,
    reminders: existingAgent?.task_prompt ?? "",
    // For new agents, default to false for optional tools to avoid
    // "Tool not available" errors when the tool isn't configured.
    // For existing agents, preserve the current tool configuration.
    image_generation:
      !!imageGenTool &&
      (existingAgent?.tools?.some(
        (tool) => tool.in_code_tool_id === IMAGE_GENERATION_TOOL_ID
      ) ??
        false),
    web_search:
      !!webSearchTool &&
      (existingAgent?.tools?.some(
        (tool) => tool.in_code_tool_id === WEB_SEARCH_TOOL_ID
      ) ??
        false),
    open_url:
      !!openURLTool &&
      (existingAgent?.tools?.some(
        (tool) => tool.in_code_tool_id === OPEN_URL_TOOL_ID
      ) ??
        false),
    code_interpreter:
      !!codeInterpreterTool &&
      (existingAgent?.tools?.some(
        (tool) => tool.in_code_tool_id === PYTHON_TOOL_ID
      ) ??
        false),
    // MCP servers - dynamically add fields for each server with nested tool fields
    ...Object.fromEntries(
      mcpServersWithTools.map(({ server, tools }) => {
        // Find all tools from existingAgent that belong to this MCP server
        const serverToolsFromAgent =
          existingAgent?.tools?.filter(
            (tool) => tool.mcp_server_id === server.id
          ) ?? [];

        // Build the tool field object with tool_{id} for ALL available tools
        const toolFields: Record<string, boolean> = {};
        tools.forEach((tool) => {
          // Set to true if this tool was enabled in existingAgent, false otherwise
          toolFields[`tool_${tool.id}`] = serverToolsFromAgent.some(
            (t) => t.id === Number(tool.id)
          );
        });

        return [
          `mcp_server_${server.id}`,
          {
            enabled: serverToolsFromAgent.length > 0, // Server is enabled if it has any tools
            ...toolFields, // Add individual tool states for ALL tools
          },
        ];
      })
    ),

    // OpenAPI tools - add a boolean field for each tool
    ...Object.fromEntries(
      openApiTools.map((openApiTool) => [
        `openapi_tool_${openApiTool.id}`,
        existingAgent?.tools?.some((t) => t.id === openApiTool.id) ?? false,
      ])
    ),

    // Sharing
    shared_user_ids: existingAgent?.users?.map((user) => user.id) ?? [],
    shared_group_ids: existingAgent?.groups ?? [],
    is_public: existingAgent?.is_public ?? false,
    label_ids: existingAgent?.labels?.map((l) => l.id) ?? [],
    is_featured: existingAgent?.is_featured ?? false,
  };

  const validationSchema = Yup.object().shape({
    // General
    icon_name: Yup.string().nullable(),
    remove_image: Yup.boolean().optional(),
    uploaded_image_id: Yup.string().nullable(),
    name: Yup.string().required("Agent name is required."),
    description: Yup.string()
      .max(
        MAX_CHARACTERS_AGENT_DESCRIPTION,
        `Description must be ${MAX_CHARACTERS_AGENT_DESCRIPTION} characters or less`
      )
      .optional(),

    // Prompts
    instructions: Yup.string().optional(),
    starter_messages: Yup.array().of(
      Yup.string().max(
        MAX_CHARACTERS_STARTER_MESSAGE,
        `Conversation starter must be ${MAX_CHARACTERS_STARTER_MESSAGE} characters or less`
      )
    ),

    // Knowledge
    enable_knowledge: Yup.boolean(),
    document_set_ids: Yup.array().of(Yup.number()),
    document_ids: Yup.array().of(Yup.string()),
    hierarchy_node_ids: Yup.array().of(Yup.number()),
    user_file_ids: Yup.array().of(Yup.string()),
    selected_sources: Yup.array().of(Yup.string()),

    // Advanced
    llm_model_provider_override: Yup.string().nullable().optional(),
    llm_model_version_override: Yup.string().nullable().optional(),
    knowledge_cutoff_date: Yup.date()
      .nullable()
      .optional()
      .test(
        "knowledge-cutoff-date-not-in-future",
        "Knowledge cutoff date must be today or earlier.",
        (value) => !value || !isDateInFuture(value)
      ),
    replace_base_system_prompt: Yup.boolean(),
    reminders: Yup.string().optional(),

    // MCP servers - dynamically add validation for each server with nested tool validation
    ...Object.fromEntries(
      mcpServers.map((server) => [
        `mcp_server_${server.id}`,
        Yup.object(), // Allow any nested tool fields as booleans
      ])
    ),

    // OpenAPI tools - add boolean validation for each tool
    ...Object.fromEntries(
      openApiTools.map((openApiTool) => [
        `openapi_tool_${openApiTool.id}`,
        Yup.boolean(),
      ])
    ),
  });

  async function handleSubmit(values: typeof initialValues) {
    try {
      // Map conversation starters
      const starterMessages = values.starter_messages
        .filter((message: string) => message.trim() !== "")
        .map((message: string) => ({
          message: message,
          name: message,
        }));

      // Send null instead of empty array if no starter messages
      const finalStarterMessages =
        starterMessages.length > 0 ? starterMessages : null;

      // Always look up tools in availableTools to ensure we can find all tools

      const toolIds = [];
      if (values.enable_knowledge) {
        if (vectorDbEnabled && searchTool) {
          toolIds.push(searchTool.id);
        }
      }
      if (values.image_generation && imageGenTool) {
        toolIds.push(imageGenTool.id);
      }
      if (values.web_search && webSearchTool) {
        toolIds.push(webSearchTool.id);
      }
      if (values.open_url && openURLTool) {
        toolIds.push(openURLTool.id);
      }
      if (values.code_interpreter && codeInterpreterTool) {
        toolIds.push(codeInterpreterTool.id);
      }

      // Collect enabled MCP tool IDs
      mcpServers.forEach((server) => {
        const serverFieldName = `mcp_server_${server.id}`;
        const serverData = (values as any)[serverFieldName];

        if (
          serverData &&
          typeof serverData === "object" &&
          serverData.enabled
        ) {
          // Server is enabled, collect all enabled tools
          Object.keys(serverData).forEach((key) => {
            if (key.startsWith("tool_") && serverData[key] === true) {
              // Extract tool ID from key (e.g., "tool_123" -> 123)
              const toolId = parseInt(key.replace("tool_", ""), 10);
              if (!isNaN(toolId)) {
                toolIds.push(toolId);
              }
            }
          });
        }
      });

      // Collect enabled OpenAPI tool IDs
      openApiTools.forEach((openApiTool) => {
        const toolFieldName = `openapi_tool_${openApiTool.id}`;
        if ((values as any)[toolFieldName] === true) {
          toolIds.push(openApiTool.id);
        }
      });

      // Build submission data
      const submissionData: PersonaUpsertParameters = {
        name: values.name,
        description: values.description,
        document_set_ids: values.enable_knowledge
          ? values.document_set_ids
          : [],
        is_public: values.is_public,
        llm_model_provider_override: values.llm_model_provider_override || null,
        llm_model_version_override: values.llm_model_version_override || null,
        starter_messages: finalStarterMessages,
        users: values.shared_user_ids,
        groups: values.shared_group_ids,
        tool_ids: toolIds,
        // uploaded_image: null, // Already uploaded separately
        remove_image: values.remove_image ?? false,
        uploaded_image_id: values.uploaded_image_id,
        icon_name: values.icon_name,
        search_start_date: values.knowledge_cutoff_date || null,
        label_ids: values.label_ids,
        is_featured: values.is_featured,
        // display_priority: ...,

        user_file_ids: values.enable_knowledge ? values.user_file_ids : [],
        hierarchy_node_ids: values.enable_knowledge
          ? values.hierarchy_node_ids
          : [],
        document_ids: values.enable_knowledge ? values.document_ids : [],

        system_prompt: values.instructions,
        replace_base_system_prompt: values.replace_base_system_prompt,
        task_prompt: values.reminders || "",
        datetime_aware: false,
      };

      // Call API
      let personaResponse;
      if (!!existingAgent) {
        personaResponse = await updatePersona(existingAgent.id, submissionData);
      } else {
        personaResponse = await createPersona(submissionData);
      }

      // Handle response
      if (!personaResponse || !personaResponse.ok) {
        const error = personaResponse
          ? await personaResponse.text()
          : "No response received";
        toast.error(
          `Failed to ${existingAgent ? "update" : "create"} agent - ${error}`
        );
        return;
      }

      // Success
      const agent = await personaResponse.json();
      toast.success(
        `Agent "${agent.name}" ${
          existingAgent ? "updated" : "created"
        } successfully`
      );

      // Refresh agents list and the specific agent
      await refreshAgents();
      if (refreshAgent) {
        refreshAgent();
      }

      // Immediately start a chat with this agent.
      appRouter({ agentId: agent.id });
    } catch (error) {
      console.error("Submit error:", error);
      toast.error(`An error occurred: ${error}`);
    }
  }

  // Delete agent handler
  async function handleDeleteAgent() {
    if (!existingAgent) return;

    const error = await deleteAgent(existingAgent.id);

    if (error) {
      toast.error(`Failed to delete agent: ${error}`);
    } else {
      toast.success("Agent deleted successfully");

      deleteAgentModal.toggle(false);
      await refreshAgents();
      router.push("/app/agents");
    }
  }

  // FilePickerPopover callbacks for Knowledge section
  function handlePickRecentFile(
    file: ProjectFile,
    currentFileIds: string[],
    setFieldValue: (field: string, value: unknown) => void
  ) {
    if (!currentFileIds.includes(file.id)) {
      setFieldValue("user_file_ids", [...currentFileIds, file.id]);
    }
  }

  function handleUnpickRecentFile(
    file: ProjectFile,
    currentFileIds: string[],
    setFieldValue: (field: string, value: unknown) => void
  ) {
    setFieldValue(
      "user_file_ids",
      currentFileIds.filter((id) => id !== file.id)
    );
  }

  function handleFileClick(file: ProjectFile) {
    setPresentingDocument({
      document_id: `project_file__${file.file_id}`,
      semantic_identifier: file.name,
    });
  }

  async function handleUploadChange(
    e: React.ChangeEvent<HTMLInputElement>,
    currentFileIds: string[],
    setFieldValue: (field: string, value: unknown) => void
  ) {
    const files = e.target.files;
    if (!files || files.length === 0) return;
    try {
      let selectedIds = [...(currentFileIds || [])];
      const optimistic = await beginUpload(
        Array.from(files),
        null,
        (result) => {
          const uploadedFiles = result.user_files || [];
          if (uploadedFiles.length === 0) return;
          const tempToFinal = new Map(
            uploadedFiles
              .filter((f) => f.temp_id)
              .map((f) => [f.temp_id as string, f.id])
          );
          const replaced = (selectedIds || []).map(
            (id: string) => tempToFinal.get(id) ?? id
          );
          selectedIds = replaced;
          setFieldValue("user_file_ids", replaced);
        }
      );
      if (optimistic) {
        const optimisticIds = optimistic.map((f) => f.id);
        selectedIds = [...selectedIds, ...optimisticIds];
        setFieldValue("user_file_ids", selectedIds);
      }
    } catch (error) {
      console.error("Upload error:", error);
    }
  }

  // Wait for async tool data before rendering the form. Formik captures
  // initialValues on mount — if tools haven't loaded yet, the initial values
  // won't include MCP tool fields. Later, toggling those fields would make
  // the form permanently dirty since they have no baseline to compare against.
  if (isToolsLoading || isMcpLoading || isOpenApiLoading) {
    return null;
  }

  return (
    <>
      <div
        data-testid="AgentsEditorPage/container"
        aria-label="Agents Editor Page"
        className="h-full w-full"
      >
        <Formik
          initialValues={initialValues}
          validationSchema={validationSchema}
          onSubmit={handleSubmit}
          validateOnChange
          validateOnBlur
          validateOnMount
          initialTouched={{
            description:
              initialValues.description.length >
              MAX_CHARACTERS_AGENT_DESCRIPTION,
            starter_messages: initialValues.starter_messages.map(
              (msg) => msg.length > MAX_CHARACTERS_STARTER_MESSAGE
            ) as unknown as boolean,
          }}
          initialStatus={{ warnings: {} }}
        >
          {({ isSubmitting, isValid, dirty, values, setFieldValue }) => {
            const fileStatusMap = new Map(
              allRecentFiles.map((f) => [f.id, f.status])
            );

            const hasUploadingFiles = values.user_file_ids.some(
              (fileId: string) => {
                const status = fileStatusMap.get(fileId);
                if (status === undefined) {
                  return fileId.startsWith("temp_");
                }
                return status === UserFileStatus.UPLOADING;
              }
            );

            const hasProcessingFiles = values.user_file_ids.some(
              (fileId: string) =>
                fileStatusMap.get(fileId) === UserFileStatus.PROCESSING
            );
            const isShared =
              values.is_public ||
              values.shared_user_ids.length > 0 ||
              values.shared_group_ids.length > 0;

            return (
              <>
                <FormWarningsEffect />

                <userFilesModal.Provider>
                  <UserFilesModal
                    title="User Files"
                    description="All files selected for this agent"
                    recentFiles={values.user_file_ids
                      .map((userFileId: string) => {
                        const rf = allRecentFiles.find(
                          (f) => f.id === userFileId
                        );
                        if (rf) return rf;
                        return {
                          id: userFileId,
                          name: `File ${userFileId.slice(0, 8)}`,
                          status: UserFileStatus.COMPLETED,
                          file_id: userFileId,
                          created_at: new Date().toISOString(),
                          project_id: null,
                          user_id: null,
                          file_type: "",
                          last_accessed_at: new Date().toISOString(),
                          chat_file_type: "file" as const,
                        } as unknown as ProjectFile;
                      })
                      .filter((f): f is ProjectFile => f !== null)}
                    selectedFileIds={values.user_file_ids}
                    onPickRecent={(file: ProjectFile) => {
                      if (!values.user_file_ids.includes(file.id)) {
                        setFieldValue("user_file_ids", [
                          ...values.user_file_ids,
                          file.id,
                        ]);
                      }
                    }}
                    onUnpickRecent={(file: ProjectFile) => {
                      setFieldValue(
                        "user_file_ids",
                        values.user_file_ids.filter((id) => id !== file.id)
                      );
                    }}
                    onView={(file: ProjectFile) => {
                      setPresentingDocument({
                        document_id: `project_file__${file.file_id}`,
                        semantic_identifier: file.name,
                      });
                    }}
                  />
                </userFilesModal.Provider>

                <shareAgentModal.Provider>
                  <ShareAgentModal
                    agentId={existingAgent?.id}
                    userIds={values.shared_user_ids}
                    groupIds={values.shared_group_ids}
                    isPublic={values.is_public}
                    isFeatured={values.is_featured}
                    labelIds={values.label_ids}
                    onShare={async (
                      userIds,
                      groupIds,
                      isPublic,
                      isFeatured,
                      labelIds
                    ) => {
                      if (!existingAgent) {
                        // New agents are not persisted until the main Create action.
                        setFieldValue("shared_user_ids", userIds);
                        setFieldValue("shared_group_ids", groupIds);
                        setFieldValue("is_public", isPublic);
                        setFieldValue("is_featured", isFeatured);
                        setFieldValue("label_ids", labelIds);
                        shareAgentModal.toggle(false);
                        return;
                      }

                      const applySharingFields = () => {
                        setFieldValue("shared_user_ids", userIds);
                        setFieldValue("shared_group_ids", groupIds);
                        setFieldValue("is_public", isPublic);
                        setFieldValue("label_ids", labelIds);
                      };

                      const refreshSharedUi = async () => {
                        try {
                          await refreshAgents();
                          refreshAgent?.();
                        } catch (error) {
                          console.error(
                            "Refresh failed after successful share:",
                            error
                          );
                          toast.error(
                            "Agent sharing was saved, but failed to refresh. Please reload."
                          );
                        }
                      };

                      let shareError: string | null;
                      try {
                        shareError = await updateAgentSharedStatus(
                          existingAgent.id,
                          userIds,
                          groupIds,
                          isPublic,
                          isPaidEnterpriseFeaturesEnabled,
                          labelIds
                        );
                      } catch (error) {
                        console.error(
                          "Share agent mutation failed unexpectedly:",
                          error
                        );
                        toast.error("Failed to share agent. Please try again.");
                        return;
                      }

                      if (shareError) {
                        toast.error(`Failed to share agent: ${shareError}`);
                        return;
                      }

                      if (canUpdateFeaturedStatus) {
                        let featuredError: string | null;
                        try {
                          featuredError = await updateAgentFeaturedStatus(
                            existingAgent.id,
                            isFeatured
                          );
                        } catch (error) {
                          console.error(
                            "Featured mutation failed unexpectedly:",
                            error
                          );
                          // Share succeeded; sync form and UI before returning.
                          applySharingFields();
                          await refreshSharedUi();
                          toast.error(
                            "Failed to update featured status. Please try again."
                          );
                          return;
                        }

                        if (featuredError) {
                          // Share succeeded, featured failed: keep modal open for retry.
                          applySharingFields();
                          await refreshSharedUi();
                          toast.error(
                            `Failed to update featured status: ${featuredError}`
                          );
                          return;
                        }

                        applySharingFields();
                        setFieldValue("is_featured", isFeatured);
                        shareAgentModal.toggle(false);
                        await refreshSharedUi();
                        return;
                      }

                      applySharingFields();
                      shareAgentModal.toggle(false);
                      await refreshSharedUi();
                    }}
                  />
                </shareAgentModal.Provider>
                <deleteAgentModal.Provider>
                  {deleteAgentModal.isOpen && (
                    <ConfirmationModalLayout
                      icon={SvgTrash}
                      title="Delete Agent"
                      submit={
                        <OpalButton
                          variant="danger"
                          onClick={handleDeleteAgent}
                        >
                          Delete Agent
                        </OpalButton>
                      }
                      onClose={() => deleteAgentModal.toggle(false)}
                    >
                      <GeneralLayouts.Section alignItems="start" gap={0.5}>
                        <Text>
                          Anyone using this agent will no longer be able to
                          access it. Deletion cannot be undone.
                        </Text>
                        <Text>Are you sure you want to delete this agent?</Text>
                      </GeneralLayouts.Section>
                    </ConfirmationModalLayout>
                  )}
                </deleteAgentModal.Provider>

                <Form className="h-full w-full">
                  <SettingsLayouts.Root>
                    <SettingsLayouts.Header
                      icon={SvgOnyxOctagon}
                      title={existingAgent ? "Edit Agent" : "Create Agent"}
                      rightChildren={
                        <div className="flex gap-2">
                          <OpalButton
                            prominence="secondary"
                            type="button"
                            onClick={() => router.back()}
                          >
                            Cancel
                          </OpalButton>
                          <SimpleTooltip
                            tooltip={
                              isSubmitting
                                ? "Saving changes..."
                                : !isValid
                                  ? "Please fix the errors in the form before saving."
                                  : !dirty
                                    ? "No changes have been made."
                                    : hasUploadingFiles
                                      ? "Please wait for files to finish uploading."
                                      : undefined
                            }
                            side="bottom"
                          >
                            <OpalButton
                              disabled={
                                isSubmitting ||
                                !isValid ||
                                !dirty ||
                                hasUploadingFiles
                              }
                              type="submit"
                            >
                              {existingAgent ? "Save" : "Create"}
                            </OpalButton>
                          </SimpleTooltip>
                        </div>
                      }
                      backButton
                      separator
                    />

                    {/* Agent Form Content */}
                    <SettingsLayouts.Body>
                      <GeneralLayouts.Section
                        flexDirection="row"
                        gap={2.5}
                        alignItems="start"
                      >
                        <GeneralLayouts.Section>
                          <InputLayouts.Vertical name="name" title="Name">
                            <InputTypeInField
                              name="name"
                              placeholder="Name your agent"
                            />
                          </InputLayouts.Vertical>

                          <InputLayouts.Vertical
                            name="description"
                            title="Description"
                            suffix="optional"
                          >
                            <InputTextAreaField
                              name="description"
                              placeholder="What does this agent do?"
                            />
                          </InputLayouts.Vertical>
                        </GeneralLayouts.Section>

                        <GeneralLayouts.Section width="fit">
                          <InputLayouts.Vertical
                            name="agent_avatar"
                            title="Agent Avatar"
                          >
                            <AgentIconEditor existingAgent={existingAgent} />
                          </InputLayouts.Vertical>
                        </GeneralLayouts.Section>
                      </GeneralLayouts.Section>

                      <Separator noPadding />

                      <GeneralLayouts.Section>
                        <InputLayouts.Vertical
                          name="instructions"
                          title="Instructions"
                          suffix="optional"
                          description="Add instructions to tailor the response for this agent."
                        >
                          <InputTextAreaField
                            name="instructions"
                            placeholder="Think step by step and show reasoning for complex problems. Use specific examples. Emphasize action items, and leave blanks for the human to fill in when you have unknown. Use a polite enthusiastic tone."
                          />
                        </InputLayouts.Vertical>

                        <InputLayouts.Vertical
                          name="starter_messages"
                          title="Conversation Starters"
                          description="Example messages that help users understand what this agent can do and how to interact with it effectively."
                          suffix="optional"
                        >
                          <StarterMessages />
                        </InputLayouts.Vertical>
                      </GeneralLayouts.Section>

                      <Separator noPadding />

                      <AgentKnowledgePane
                        enableKnowledge={values.enable_knowledge}
                        onEnableKnowledgeChange={(enabled) =>
                          setFieldValue("enable_knowledge", enabled)
                        }
                        selectedSources={values.selected_sources}
                        onSourcesChange={(sources) =>
                          setFieldValue("selected_sources", sources)
                        }
                        documentSets={documentSets ?? []}
                        selectedDocumentSetIds={values.document_set_ids}
                        onDocumentSetIdsChange={(ids) =>
                          setFieldValue("document_set_ids", ids)
                        }
                        selectedDocumentIds={values.document_ids}
                        onDocumentIdsChange={(ids) =>
                          setFieldValue("document_ids", ids)
                        }
                        selectedFolderIds={values.hierarchy_node_ids}
                        onFolderIdsChange={(ids) =>
                          setFieldValue("hierarchy_node_ids", ids)
                        }
                        selectedFileIds={values.user_file_ids}
                        onFileIdsChange={(ids) =>
                          setFieldValue("user_file_ids", ids)
                        }
                        allRecentFiles={allRecentFiles}
                        onFileClick={handleFileClick}
                        onUploadChange={(e) =>
                          handleUploadChange(
                            e,
                            values.user_file_ids,
                            setFieldValue
                          )
                        }
                        hasProcessingFiles={hasProcessingFiles}
                        initialAttachedDocuments={
                          existingAgent?.attached_documents
                        }
                        initialHierarchyNodes={existingAgent?.hierarchy_nodes}
                        vectorDbEnabled={vectorDbEnabled}
                      />

                      <Separator noPadding />

                      <SimpleCollapsible>
                        <SimpleCollapsible.Header
                          title="Actions"
                          description="Tools and capabilities available for this agent to use."
                        />
                        <SimpleCollapsible.Content>
                          <GeneralLayouts.Section gap={0.5}>
                            <SimpleTooltip
                              tooltip={imageGenerationDisabledTooltip}
                              side="top"
                            >
                              <Card
                                variant={
                                  isImageGenerationAvailable
                                    ? undefined
                                    : "disabled"
                                }
                              >
                                <InputLayouts.Horizontal
                                  name="image_generation"
                                  title="Image Generation"
                                  description="Generate and manipulate images using AI-powered tools."
                                  disabled={!isImageGenerationAvailable}
                                >
                                  <SwitchField
                                    name="image_generation"
                                    disabled={!isImageGenerationAvailable}
                                  />
                                </InputLayouts.Horizontal>
                              </Card>
                            </SimpleTooltip>

                            <Card
                              variant={!!webSearchTool ? undefined : "disabled"}
                            >
                              <InputLayouts.Horizontal
                                name="web_search"
                                title="Web Search"
                                description="Search the web for real-time information and up-to-date results."
                                disabled={!webSearchTool}
                              >
                                <SwitchField
                                  name="web_search"
                                  disabled={!webSearchTool}
                                />
                              </InputLayouts.Horizontal>
                            </Card>

                            <Card
                              variant={!!openURLTool ? undefined : "disabled"}
                            >
                              <InputLayouts.Horizontal
                                name="open_url"
                                title="Open URL"
                                description="Fetch and read content from web URLs."
                                disabled={!openURLTool}
                              >
                                <SwitchField
                                  name="open_url"
                                  disabled={!openURLTool}
                                />
                              </InputLayouts.Horizontal>
                            </Card>

                            <Card
                              variant={
                                !!codeInterpreterTool ? undefined : "disabled"
                              }
                            >
                              <InputLayouts.Horizontal
                                name="code_interpreter"
                                title="Code Interpreter"
                                description="Generate and run code."
                                disabled={!codeInterpreterTool}
                              >
                                <SwitchField
                                  name="code_interpreter"
                                  disabled={!codeInterpreterTool}
                                />
                              </InputLayouts.Horizontal>
                            </Card>

                            {/* Tools */}
                            <>
                              {/* render the separator if there is at least one mcp-server or open-api-tool */}
                              {(mcpServers.length > 0 ||
                                openApiTools.length > 0) && (
                                <Separator noPadding className="py-1" />
                              )}

                              {/* MCP tools */}
                              {mcpServersWithTools.length > 0 && (
                                <GeneralLayouts.Section gap={0.5}>
                                  {mcpServersWithTools.map(
                                    ({ server, tools, isLoading }) => (
                                      <MCPServerCard
                                        key={server.id}
                                        server={server}
                                        tools={tools}
                                        isLoading={isLoading}
                                      />
                                    )
                                  )}
                                </GeneralLayouts.Section>
                              )}

                              {/* OpenAPI tools */}
                              {openApiTools.length > 0 && (
                                <GeneralLayouts.Section gap={0.5}>
                                  {openApiTools.map((tool) => (
                                    <OpenApiToolCard
                                      key={tool.id}
                                      tool={tool}
                                    />
                                  ))}
                                </GeneralLayouts.Section>
                              )}
                            </>
                          </GeneralLayouts.Section>
                        </SimpleCollapsible.Content>
                      </SimpleCollapsible>

                      <Separator noPadding />

                      <SimpleCollapsible>
                        <SimpleCollapsible.Header
                          title="Advanced Options"
                          description="Fine-tune agent prompts and knowledge."
                        />
                        <SimpleCollapsible.Content>
                          <GeneralLayouts.Section>
                            <Card>
                              <InputLayouts.Horizontal
                                title="Share This Agent"
                                description="with other users, groups, or everyone in your organization."
                                center
                              >
                                <OpalButton
                                  prominence="secondary"
                                  icon={isShared ? SvgUsers : SvgLock}
                                  onClick={() => shareAgentModal.toggle(true)}
                                >
                                  Share
                                </OpalButton>
                              </InputLayouts.Horizontal>
                              {canUpdateFeaturedStatus && (
                                <>
                                  <InputLayouts.Horizontal
                                    name="is_featured"
                                    title="Feature This Agent"
                                    description="Show this agent at the top of the explore agents list and automatically pin it to the sidebar for new users with access."
                                  >
                                    <SwitchField name="is_featured" />
                                  </InputLayouts.Horizontal>
                                  {values.is_featured && !isShared && (
                                    <Message
                                      static
                                      close={false}
                                      className="w-full"
                                      text="This agent is private to you and will only be featured for yourself."
                                    />
                                  )}
                                </>
                              )}
                            </Card>

                            <Card>
                              <InputLayouts.Horizontal
                                name="llm_model"
                                title="Default Model"
                                description="This model will be used by Onyx by default in your chats."
                              >
                                <LLMSelector
                                  name="llm_model"
                                  llmProviders={llmProviders ?? []}
                                  currentLlm={getCurrentLlm(
                                    values,
                                    llmProviders
                                  )}
                                  onSelect={(selected) =>
                                    onLlmSelect(selected, setFieldValue)
                                  }
                                />
                              </InputLayouts.Horizontal>
                              <InputLayouts.Horizontal
                                name="knowledge_cutoff_date"
                                title="Knowledge Cutoff Date"
                                suffix="optional"
                                description="Documents with a last-updated date prior to this will be ignored."
                              >
                                <InputDatePickerField
                                  name="knowledge_cutoff_date"
                                  maxDate={new Date()}
                                />
                              </InputLayouts.Horizontal>
                              <InputLayouts.Horizontal
                                name="replace_base_system_prompt"
                                title="Overwrite System Prompt"
                                suffix="(Not Recommended)"
                                description='Remove the base system prompt which includes useful instructions (e.g. "You can use Markdown tables"). This may affect response quality.'
                              >
                                <SwitchField name="replace_base_system_prompt" />
                              </InputLayouts.Horizontal>
                            </Card>

                            <GeneralLayouts.Section gap={0.25}>
                              <InputLayouts.Vertical
                                name="reminders"
                                title="Reminders"
                                suffix="optional"
                              >
                                <InputTextAreaField
                                  name="reminders"
                                  placeholder="Remember, I want you to always format your response as a numbered list."
                                />
                              </InputLayouts.Vertical>
                              <Text text03 secondaryBody>
                                Append a brief reminder to the prompt messages.
                                Use this to remind the agent if you find that it
                                tends to forget certain instructions as the chat
                                progresses. This should be brief and not
                                interfere with the user messages.
                              </Text>
                            </GeneralLayouts.Section>
                          </GeneralLayouts.Section>
                        </SimpleCollapsible.Content>
                      </SimpleCollapsible>

                      {existingAgent && (
                        <>
                          <Separator noPadding />

                          <Card>
                            <InputLayouts.Horizontal
                              title="Delete This Agent"
                              description="Anyone using this agent will no longer be able to access it."
                              center
                            >
                              <OpalButton
                                variant="danger"
                                prominence="secondary"
                                onClick={() => deleteAgentModal.toggle(true)}
                              >
                                Delete Agent
                              </OpalButton>
                            </InputLayouts.Horizontal>
                          </Card>
                        </>
                      )}
                    </SettingsLayouts.Body>
                  </SettingsLayouts.Root>
                </Form>
              </>
            );
          }}
        </Formik>
      </div>
    </>
  );
}


================================================
FILE: web/src/refresh-pages/AgentsNavigationPage.tsx
================================================
"use client";

import { useMemo, useState, useRef, useEffect } from "react";
import AgentCard from "@/sections/cards/AgentCard";
import { useUser } from "@/providers/UserProvider";
import { checkUserOwnsAgent as checkUserOwnsAgent } from "@/lib/agents";
import { useAgents } from "@/hooks/useAgents";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import Text from "@/refresh-components/texts/Text";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import TextSeparator from "@/refresh-components/TextSeparator";
import Tabs from "@/refresh-components/Tabs";
import { FilterButton } from "@opal/components";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import { Button } from "@opal/components";
import {
  SEARCH_TOOL_ID,
  IMAGE_GENERATION_TOOL_ID,
  OPEN_URL_TOOL_ID,
  OPEN_URL_TOOL_NAME,
  WEB_SEARCH_TOOL_ID,
  SYSTEM_TOOL_ICONS,
} from "@/app/app/components/tools/constants";
import {
  SvgActions,
  SvgCheck,
  SvgOnyxOctagon,
  SvgPlus,
  SvgUser,
} from "@opal/icons";
import useOnMount from "@/hooks/useOnMount";

interface AgentsSectionProps {
  title: string;
  description?: string;
  agents: MinimalPersonaSnapshot[];
}

function AgentsSection({ title, description, agents }: AgentsSectionProps) {
  if (agents.length === 0) return null;

  return (
    <div className="flex flex-col gap-4">
      <div>
        <Text as="p" headingH3>
          {title}
        </Text>
        <Text as="p" secondaryBody text03>
          {description}
        </Text>
      </div>
      <div className="w-full grid grid-cols-1 md:grid-cols-2 gap-2">
        {agents
          .sort((a, b) => b.id - a.id)
          .map((agent) => (
            <AgentCard key={agent.id} agent={agent} />
          ))}
      </div>
    </div>
  );
}

export default function AgentsNavigationPage() {
  const { agents } = useAgents();
  const [creatorFilterOpen, setCreatorFilterOpen] = useState(false);
  const [actionsFilterOpen, setActionsFilterOpen] = useState(false);
  const { user } = useUser();
  const [searchQuery, setSearchQuery] = useState("");
  const [activeTab, setActiveTab] = useState<"all" | "your">("all");
  const [selectedCreatorIds, setSelectedCreatorIds] = useState<Set<string>>(
    new Set()
  );
  const [selectedActionIds, setSelectedActionIds] = useState<Set<number>>(
    new Set()
  );
  const [selectedMcpServerIds, setSelectedMcpServerIds] = useState<Set<number>>(
    new Set()
  );
  const [creatorSearchQuery, setCreatorSearchQuery] = useState("");
  const [actionsSearchQuery, setActionsSearchQuery] = useState("");
  const [mcpServersMap, setMcpServersMap] = useState<
    Map<number, { id: number; name: string }>
  >(new Map());
  const searchInputRef = useRef<HTMLInputElement>(null);

  useOnMount(() => {
    // Focus the search input when the page loads
    searchInputRef.current?.focus();
  });

  // Fetch all MCP servers used by agents
  useEffect(() => {
    const fetchMCPServers = async () => {
      const serverIds = new Set<number>();
      agents.forEach((agent) => {
        agent.tools.forEach((tool) => {
          if (tool.mcp_server_id !== null && tool.mcp_server_id !== undefined) {
            serverIds.add(tool.mcp_server_id);
          }
        });
      });

      if (serverIds.size === 0) return;

      const serversMap = new Map<number, { id: number; name: string }>();

      // Fetch server data for each unique server ID
      for (const serverId of Array.from(serverIds)) {
        try {
          // We need to fetch from an agent that has this server
          const agentWithServer = agents.find((agent) =>
            agent.tools.some((tool) => tool.mcp_server_id === serverId)
          );

          if (agentWithServer) {
            const response = await fetch(
              `/api/mcp/servers/persona/${agentWithServer.id}`
            );
            if (response.ok) {
              const data = await response.json();
              const server = data.mcp_servers?.find(
                (s: any) => s.id === serverId
              );
              if (server) {
                serversMap.set(serverId, { id: server.id, name: server.name });
              }
            }
          }
        } catch (error) {
          console.error(`Error fetching MCP server ${serverId}:`, error);
        }
      }

      setMcpServersMap(serversMap);
    };

    fetchMCPServers();
  }, [agents]);

  const uniqueCreators = useMemo(() => {
    const creatorsMap = new Map<string, { id: string; email: string }>();
    agents.forEach((agent) => {
      if (agent.owner) {
        creatorsMap.set(agent.owner.id, agent.owner);
      }
    });

    let creators = Array.from(creatorsMap.values()).sort((a, b) =>
      a.email.localeCompare(b.email)
    );

    // Add current user if not in the list, and put them first
    if (user) {
      const hasCurrentUser = creators.some((c) => c.id === user.id);

      if (!hasCurrentUser) {
        creators = [{ id: user.id, email: user.email }, ...creators];
      } else {
        // Sort to put current user first
        creators = creators.sort((a, b) => {
          if (a.id === user.id) return -1;
          if (b.id === user.id) return 1;
          return 0;
        });
      }
    }

    return creators;
  }, [agents, user]);

  const filteredCreators = useMemo(() => {
    if (!creatorSearchQuery) return uniqueCreators;

    return uniqueCreators.filter((creator) =>
      creator.email.toLowerCase().includes(creatorSearchQuery.toLowerCase())
    );
  }, [uniqueCreators, creatorSearchQuery]);

  const uniqueActions = useMemo(() => {
    const actionsMap = new Map<
      number,
      {
        id: number;
        name: string;
        display_name: string;
        mcp_server_id?: number | null;
      }
    >();
    agents.forEach((agent) => {
      agent.tools.forEach((tool) => {
        if (
          tool.in_code_tool_id === OPEN_URL_TOOL_ID ||
          tool.name === OPEN_URL_TOOL_ID ||
          tool.name === OPEN_URL_TOOL_NAME
        ) {
          return;
        }
        actionsMap.set(tool.id, {
          id: tool.id,
          name: tool.name,
          display_name: tool.display_name,
          mcp_server_id: tool.mcp_server_id,
        });
      });
    });

    const systemToolIds = [
      SEARCH_TOOL_ID,
      IMAGE_GENERATION_TOOL_ID,
      WEB_SEARCH_TOOL_ID,
    ];

    const allActions = Array.from(actionsMap.values());
    const systemTools = allActions.filter((action) =>
      systemToolIds.includes(action.name)
    );
    const otherTools = allActions.filter(
      (action) => !systemToolIds.includes(action.name)
    );

    // Sort each group by display name
    systemTools.sort((a, b) => a.display_name.localeCompare(b.display_name));
    otherTools.sort((a, b) => a.display_name.localeCompare(b.display_name));

    // Group ALL tools by mcp_server_id (both system and other)
    const mcpGroupsMap = new Map<number, typeof allActions>();
    const nonMcpSystemTools: typeof systemTools = [];
    const nonMcpOtherTools: typeof otherTools = [];

    // Group system tools by MCP server
    systemTools.forEach((tool) => {
      if (tool.mcp_server_id !== null && tool.mcp_server_id !== undefined) {
        const group = mcpGroupsMap.get(tool.mcp_server_id) || [];
        group.push(tool);
        mcpGroupsMap.set(tool.mcp_server_id, group);
      } else {
        nonMcpSystemTools.push(tool);
      }
    });

    // Group other tools by MCP server
    otherTools.forEach((tool) => {
      if (tool.mcp_server_id !== null && tool.mcp_server_id !== undefined) {
        const group = mcpGroupsMap.get(tool.mcp_server_id) || [];
        group.push(tool);
        mcpGroupsMap.set(tool.mcp_server_id, group);
      } else {
        nonMcpOtherTools.push(tool);
      }
    });

    // Create grouped action items
    type ActionItem =
      | {
          type: "tool";
          id: number;
          name: string;
          display_name: string;
          mcp_server_id?: number | null;
        }
      | {
          type: "mcp_group";
          mcp_server_id: number;
          server_name: string;
          tools: Array<{ id: number; name: string; display_name: string }>;
        };

    const mcpGroupItems: ActionItem[] = Array.from(mcpGroupsMap.entries()).map(
      ([serverId, tools]) => {
        const serverInfo = mcpServersMap.get(serverId);
        return {
          type: "mcp_group" as const,
          mcp_server_id: serverId,
          server_name: serverInfo?.name || `MCP Server ${serverId}`,
          tools: tools.map((t) => ({
            id: t.id,
            name: t.name,
            display_name: t.display_name,
          })),
        };
      }
    );

    const nonMcpSystemToolItems: ActionItem[] = nonMcpSystemTools.map(
      (tool) => ({ type: "tool" as const, ...tool })
    );
    const nonMcpOtherToolItems: ActionItem[] = nonMcpOtherTools.map((tool) => ({
      type: "tool" as const,
      ...tool,
    }));

    // Return non-MCP system tools first, then MCP groups, then non-MCP other tools
    return [
      ...nonMcpSystemToolItems,
      ...mcpGroupItems,
      ...nonMcpOtherToolItems,
    ];
  }, [agents, mcpServersMap]);

  const filteredActions = useMemo(() => {
    if (!actionsSearchQuery) return uniqueActions;

    const query = actionsSearchQuery.toLowerCase();
    return uniqueActions.filter((action) => {
      if (action.type === "tool") {
        return action.display_name.toLowerCase().includes(query);
      } else {
        // For MCP groups, search through all tool names in the group
        return action.tools.some((tool) =>
          tool.display_name.toLowerCase().includes(query)
        );
      }
    });
  }, [uniqueActions, actionsSearchQuery]);

  const memoizedCurrentlyVisibleAgents = useMemo(() => {
    return agents.filter((agent) => {
      const nameMatches = agent.name
        .toLowerCase()
        .includes(searchQuery.toLowerCase());
      const labelMatches = agent.labels?.some((label) =>
        label.name.toLowerCase().includes(searchQuery.toLowerCase())
      );

      const mineFilter =
        activeTab === "your" ? checkUserOwnsAgent(user, agent) : true;
      const isNotUnifiedAgent = agent.id !== 0;

      const creatorFilter =
        selectedCreatorIds.size === 0 ||
        (agent.owner && selectedCreatorIds.has(agent.owner.id));

      const actionsFilter =
        (selectedActionIds.size === 0 && selectedMcpServerIds.size === 0) ||
        agent.tools.some(
          (tool) =>
            selectedActionIds.has(tool.id) ||
            (tool.mcp_server_id !== null &&
              tool.mcp_server_id !== undefined &&
              selectedMcpServerIds.has(tool.mcp_server_id))
        );

      return (
        (nameMatches || labelMatches) &&
        mineFilter &&
        isNotUnifiedAgent &&
        creatorFilter &&
        actionsFilter
      );
    });
  }, [
    agents,
    searchQuery,
    activeTab,
    user,
    selectedCreatorIds,
    selectedActionIds,
    selectedMcpServerIds,
  ]);

  const featuredAgents = [
    ...memoizedCurrentlyVisibleAgents.filter((agent) => agent.is_featured),
  ];
  const allAgents = memoizedCurrentlyVisibleAgents.filter(
    (agent) => !agent.is_featured
  );

  const agentCount = featuredAgents.length + allAgents.length;

  const creatorFilterButtonText = useMemo(() => {
    if (selectedCreatorIds.size === 0) {
      return "Everyone";
    } else if (selectedCreatorIds.size === 1) {
      const selectedId = Array.from(selectedCreatorIds)[0];
      const creator = uniqueCreators.find((c) => c.id === selectedId);
      return `By ${creator?.email}` || "Everyone";
    } else {
      return `${selectedCreatorIds.size} people`;
    }
  }, [selectedCreatorIds, uniqueCreators]);

  const actionsFilterButtonText = useMemo(() => {
    const totalSelected = selectedActionIds.size + selectedMcpServerIds.size;

    if (totalSelected === 0) {
      return "All Actions";
    } else if (totalSelected === 1) {
      // Check if it's a single tool
      if (selectedActionIds.size === 1) {
        const selectedId = Array.from(selectedActionIds)[0];
        for (const action of uniqueActions) {
          if (action.type === "tool" && action.id === selectedId) {
            return action.display_name;
          }
        }
      }

      // Check if it's a single MCP server
      if (selectedMcpServerIds.size === 1) {
        const selectedServerId = Array.from(selectedMcpServerIds)[0];
        for (const action of uniqueActions) {
          if (
            action.type === "mcp_group" &&
            action.mcp_server_id === selectedServerId
          ) {
            return action.server_name;
          }
        }
      }

      return "All Actions";
    } else {
      return `${totalSelected} selected`;
    }
  }, [selectedActionIds, selectedMcpServerIds, uniqueActions]);

  return (
    <SettingsLayouts.Root
      data-testid="AgentsPage/container"
      aria-label="Agents Page"
    >
      <SettingsLayouts.Header
        icon={SvgOnyxOctagon}
        title="Agents"
        description="Customize AI behavior and knowledge for you and your team's use cases."
        rightChildren={
          <Button
            href="/app/agents/create"
            icon={SvgPlus}
            aria-label="AgentsPage/new-agent-button"
          >
            New Agent
          </Button>
        }
      >
        <div className="flex flex-col gap-2">
          <div className="flex flex-row items-center gap-2">
            <div className="flex-[2]">
              <InputTypeIn
                ref={searchInputRef}
                placeholder="Search agents..."
                value={searchQuery}
                onChange={(event) => setSearchQuery(event.target.value)}
                leftSearchIcon
              />
            </div>
            <div className="flex-1">
              <Tabs
                value={activeTab}
                onValueChange={(value) => setActiveTab(value as "all" | "your")}
              >
                <Tabs.List>
                  <Tabs.Trigger value="all">All Agents</Tabs.Trigger>
                  <Tabs.Trigger value="your">Your Agents</Tabs.Trigger>
                </Tabs.List>
              </Tabs>
            </div>
          </div>
          <div className="flex flex-row gap-2">
            <Popover
              open={creatorFilterOpen}
              onOpenChange={setCreatorFilterOpen}
            >
              <Popover.Trigger asChild>
                <FilterButton
                  icon={SvgUser}
                  active={selectedCreatorIds.size > 0}
                  onClear={() => setSelectedCreatorIds(new Set())}
                >
                  {creatorFilterButtonText}
                </FilterButton>
              </Popover.Trigger>
              <Popover.Content align="start">
                <PopoverMenu>
                  {[
                    <InputTypeIn
                      key="created-by"
                      placeholder="Created by..."
                      variant="internal"
                      leftSearchIcon
                      value={creatorSearchQuery}
                      onChange={(e) => setCreatorSearchQuery(e.target.value)}
                    />,
                    ...filteredCreators.flatMap((creator, index) => {
                      const isSelected = selectedCreatorIds.has(creator.id);
                      const isCurrentUser = user && creator.id === user.id;

                      // Check if we need to add a separator after this item
                      const nextCreator = filteredCreators[index + 1];
                      const nextIsCurrentUser =
                        user && nextCreator && nextCreator.id === user.id;
                      const needsSeparator =
                        isCurrentUser && nextCreator && !nextIsCurrentUser;

                      // Determine icon: Check if selected, User icon if current user, otherwise no icon
                      const icon = isCurrentUser
                        ? SvgUser
                        : isSelected
                          ? SvgCheck
                          : () => null;

                      const lineItem = (
                        <LineItem
                          key={creator.id}
                          icon={icon}
                          selected={isSelected}
                          emphasized
                          onClick={() => {
                            setSelectedCreatorIds((prev) => {
                              const newSet = new Set(prev);
                              if (newSet.has(creator.id)) {
                                newSet.delete(creator.id);
                              } else {
                                newSet.add(creator.id);
                              }
                              return newSet;
                            });
                          }}
                        >
                          {creator.email}
                        </LineItem>
                      );

                      // Return the line item, and optionally a separator
                      return needsSeparator ? [lineItem, null] : [lineItem];
                    }),
                  ]}
                </PopoverMenu>
              </Popover.Content>
            </Popover>
            <Popover
              open={actionsFilterOpen}
              onOpenChange={setActionsFilterOpen}
            >
              <Popover.Trigger asChild>
                <FilterButton
                  icon={SvgActions}
                  active={
                    selectedActionIds.size > 0 || selectedMcpServerIds.size > 0
                  }
                  onClear={() => {
                    setSelectedActionIds(new Set());
                    setSelectedMcpServerIds(new Set());
                  }}
                >
                  {actionsFilterButtonText}
                </FilterButton>
              </Popover.Trigger>
              <Popover.Content align="start">
                <PopoverMenu>
                  {[
                    <InputTypeIn
                      key="actions"
                      placeholder="Filter actions..."
                      variant="internal"
                      leftSearchIcon
                      value={actionsSearchQuery}
                      onChange={(e) => setActionsSearchQuery(e.target.value)}
                    />,
                    ...filteredActions.flatMap((action, index) => {
                      if (action.type === "tool") {
                        const isSelected = selectedActionIds.has(action.id);
                        const systemIcon = SYSTEM_TOOL_ICONS[action.name];
                        const isSystemTool = !!systemIcon;

                        // Check if we need to add a separator after this item
                        const nextAction = filteredActions[index + 1];
                        const nextIsSystemTool =
                          nextAction && nextAction.type === "tool"
                            ? !!SYSTEM_TOOL_ICONS[nextAction.name]
                            : false;
                        const needsSeparator =
                          isSystemTool && nextAction && !nextIsSystemTool;

                        // Determine icon: system icon if available, otherwise Actions icon
                        const icon = systemIcon ? systemIcon : SvgActions;

                        const lineItem = (
                          <LineItem
                            key={action.id}
                            icon={icon}
                            selected={isSelected}
                            emphasized
                            onClick={() => {
                              setSelectedActionIds((prev) => {
                                const newSet = new Set(prev);
                                if (newSet.has(action.id)) {
                                  newSet.delete(action.id);
                                } else {
                                  newSet.add(action.id);
                                }
                                return newSet;
                              });
                            }}
                          >
                            {action.display_name}
                          </LineItem>
                        );

                        return needsSeparator ? [lineItem, null] : [lineItem];
                      } else {
                        // MCP Group - render only the server name, not individual tools
                        const groupKey = `mcp-group-${action.mcp_server_id}`;
                        const isSelected = selectedMcpServerIds.has(
                          action.mcp_server_id
                        );

                        const lineItem = (
                          <LineItem
                            key={groupKey}
                            icon={SvgActions}
                            selected={isSelected}
                            emphasized
                            onClick={() => {
                              setSelectedMcpServerIds((prev) => {
                                const newSet = new Set(prev);
                                if (newSet.has(action.mcp_server_id)) {
                                  newSet.delete(action.mcp_server_id);
                                } else {
                                  newSet.add(action.mcp_server_id);
                                }
                                return newSet;
                              });
                            }}
                          >
                            {action.server_name}
                          </LineItem>
                        );

                        return [lineItem];
                      }
                    }),
                  ]}
                </PopoverMenu>
              </Popover.Content>
            </Popover>
          </div>
        </div>
      </SettingsLayouts.Header>

      {/* Agents List */}
      <SettingsLayouts.Body>
        {agentCount === 0 ? (
          <Text
            as="p"
            className="w-full h-full flex flex-col items-center justify-center py-12"
            text03
          >
            No Agents found
          </Text>
        ) : (
          <>
            <AgentsSection
              title="Featured Agents"
              description="Curated by your team"
              agents={featuredAgents}
            />
            <AgentsSection title="All Agents" agents={allAgents} />
            <TextSeparator
              count={agentCount}
              text={agentCount === 1 ? "Agent" : "Agents"}
            />
          </>
        )}
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/refresh-pages/AppPage.tsx
================================================
"use client";

import { redirect, useRouter, useSearchParams } from "next/navigation";
import { personaIncludesRetrieval } from "@/app/app/services/lib";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { toast, useToastFromQuery } from "@/hooks/useToast";
import { SEARCH_PARAM_NAMES } from "@/app/app/services/searchParams";
import { Section } from "@/layouts/general-layouts";
import { useFederatedConnectors, useFilters, useLlmManager } from "@/lib/hooks";
import { useForcedTools } from "@/lib/hooks/useForcedTools";
import OnyxInitializingLoader from "@/components/OnyxInitializingLoader";
import { OnyxDocument, MinimalOnyxDocument } from "@/lib/search/interfaces";
import {
  useSettingsContext,
  useVectorDbEnabled,
} from "@/providers/SettingsProvider";
import Dropzone from "react-dropzone";
import AppInputBar, { AppInputBarHandle } from "@/sections/input/AppInputBar";
import useChatSessions from "@/hooks/useChatSessions";
import useCCPairs from "@/hooks/useCCPairs";
import useTags from "@/hooks/useTags";
import { useDocumentSets } from "@/lib/hooks/useDocumentSets";
import { useAgents } from "@/hooks/useAgents";
import { AppPopup } from "@/app/app/components/AppPopup";
import { useUser } from "@/providers/UserProvider";
import NoAgentModal from "@/components/modals/NoAgentModal";
import PreviewModal from "@/sections/modals/PreviewModal";
import Modal from "@/refresh-components/Modal";
import { useSendMessageToParent } from "@/lib/extension/utils";
import { SUBMIT_MESSAGE_TYPES } from "@/lib/extension/constants";
import { getSourceMetadata } from "@/lib/sources";
import { SourceMetadata } from "@/lib/search/interfaces";
import { FederatedConnectorDetail, UserRole, ValidSources } from "@/lib/types";
import DocumentsSidebar from "@/sections/document-sidebar/DocumentsSidebar";
import useChatController from "@/hooks/useChatController";
import useAgentController from "@/hooks/useAgentController";
import useChatSessionController from "@/hooks/useChatSessionController";
import useDeepResearchToggle from "@/hooks/useDeepResearchToggle";
import useIsDefaultAgent from "@/hooks/useIsDefaultAgent";
import AgentDescription from "@/app/app/components/AgentDescription";
import {
  useChatSessionStore,
  useCurrentMessageHistory,
} from "@/app/app/stores/useChatSessionStore";
import {
  useCurrentChatState,
  useIsReady,
  useDocumentSidebarVisible,
} from "@/app/app/stores/useChatSessionStore";
import FederatedOAuthModal from "@/components/chat/FederatedOAuthModal";
import ChatScrollContainer, {
  ChatScrollContainerHandle,
} from "@/sections/chat/ChatScrollContainer";
import ProjectContextPanel from "@/app/app/components/projects/ProjectContextPanel";
import { useProjectsContext } from "@/providers/ProjectsContext";
import { getProjectTokenCount } from "@/app/app/projects/projectsService";
import ProjectChatSessionList from "@/app/app/components/projects/ProjectChatSessionList";
import { cn } from "@/lib/utils";
import Suggestions from "@/sections/Suggestions";
import OnboardingFlow from "@/sections/onboarding/OnboardingFlow";
import { OnboardingStep } from "@/interfaces/onboarding";
import { useShowOnboarding } from "@/hooks/useShowOnboarding";
import * as AppLayouts from "@/layouts/app-layouts";
import { SvgChevronDown, SvgFileText } from "@opal/icons";
import { Button } from "@opal/components";
import { IllustrationContent } from "@opal/layouts";
import SvgNotFound from "@opal/illustrations/not-found";
import SvgNoAccess from "@opal/illustrations/no-access";
import Spacer from "@/refresh-components/Spacer";
import useAppFocus from "@/hooks/useAppFocus";
import { useQueryController } from "@/providers/QueryControllerProvider";
import WelcomeMessage from "@/app/app/components/WelcomeMessage";
import ChatUI from "@/sections/chat/ChatUI";
import { eeGated } from "@/ce";
import EESearchUI from "@/ee/sections/SearchUI";
const SearchUI = eeGated(EESearchUI);
import { motion, AnimatePresence } from "motion/react";

interface FadeProps {
  show: boolean;
  children?: React.ReactNode;
  className?: string;
}

function Fade({ show, children, className }: FadeProps) {
  return (
    <AnimatePresence>
      {show && (
        <motion.div
          initial={{ opacity: 0 }}
          animate={{ opacity: 1 }}
          exit={{ opacity: 0 }}
          transition={{ duration: 0.15 }}
          className={className}
        >
          {children}
        </motion.div>
      )}
    </AnimatePresence>
  );
}

export interface ChatPageProps {
  firstMessage?: string;
}

export default function AppPage({ firstMessage }: ChatPageProps) {
  // Performance tracking
  // Keeping this here in case we need to track down slow renders in the future
  // const renderCount = useRef(0);
  // renderCount.current++;
  // const renderStartTime = performance.now();

  // useEffect(() => {
  //   const renderTime = performance.now() - renderStartTime;
  //   if (renderTime > 10) {
  //     console.log(
  //       `[ChatPage] Slow render #${renderCount.current}: ${renderTime.toFixed(
  //         2
  //       )}ms`
  //     );
  //   }
  // });

  const router = useRouter();
  const appFocus = useAppFocus();

  useToastFromQuery({
    oauth_connected: {
      message: "Authentication successful",
      type: "success",
    },
  });
  const searchParams = useSearchParams();

  // Use SWR hooks for data fetching
  const {
    chatSessions,
    refreshChatSessions,
    currentChatSession,
    currentChatSessionId,
    isLoading: isLoadingChatSessions,
  } = useChatSessions();
  // handle redirect if chat page is disabled
  // NOTE: this must be done here, in a client component since
  // settings are passed in via Context and therefore aren't
  // available in server-side components
  const settings = useSettingsContext();
  const vectorDbEnabled = useVectorDbEnabled();
  const { ccPairs } = useCCPairs(vectorDbEnabled);
  const { tags } = useTags();
  const { documentSets } = useDocumentSets();
  const {
    currentMessageFiles,
    setCurrentMessageFiles,
    currentProjectId,
    currentProjectDetails,
    lastFailedFiles,
    clearLastFailedFiles,
  } = useProjectsContext();

  // When changing from project chat to main chat (or vice-versa), clear forced tools
  const { setForcedToolIds } = useForcedTools();
  useEffect(() => {
    setForcedToolIds([]);
  }, [currentProjectId, setForcedToolIds]);

  const isInitialLoad = useRef(true);

  const { agents, isLoading: isLoadingAgents } = useAgents();

  // Also fetch federated connectors for the sources list
  const { data: federatedConnectorsData } = useFederatedConnectors();

  const { user } = useUser();

  function processSearchParamsAndSubmitMessage(searchParamsString: string) {
    const newSearchParams = new URLSearchParams(searchParamsString);
    const message = newSearchParams?.get("user-prompt");

    filterManager.buildFiltersFromQueryString(
      newSearchParams.toString(),
      sources,
      documentSets.map((ds) => ds.name),
      tags
    );

    newSearchParams.delete(SEARCH_PARAM_NAMES.SEND_ON_LOAD);

    router.replace(`?${newSearchParams.toString()}`, { scroll: false });

    // If there's a message, submit it
    if (message) {
      onSubmit({
        message,
        currentMessageFiles,
        deepResearch: deepResearchEnabledForCurrentWorkflow,
      });
    }
  }

  const { selectedAgent, setSelectedAgentFromId, liveAgent } =
    useAgentController({
      selectedChatSession: currentChatSession,
      onAgentSelect: () => {
        // Only remove project context if user explicitly selected an agent
        // (i.e., agentId is present). Avoid clearing project when agentId was removed.
        const newSearchParams = new URLSearchParams(
          searchParams?.toString() || ""
        );
        if (newSearchParams.has(SEARCH_PARAM_NAMES.PERSONA_ID)) {
          newSearchParams.delete(SEARCH_PARAM_NAMES.PROJECT_ID);
          router.replace(`?${newSearchParams.toString()}`, { scroll: false });
        }
      },
    });

  const { deepResearchEnabled, toggleDeepResearch } = useDeepResearchToggle({
    chatSessionId: currentChatSessionId,
    agentId: selectedAgent?.id,
  });
  const deepResearchEnabledForCurrentWorkflow =
    currentProjectId === null && deepResearchEnabled;

  const [presentingDocument, setPresentingDocument] =
    useState<MinimalOnyxDocument | null>(null);

  const llmManager = useLlmManager(currentChatSession ?? undefined, liveAgent);

  const {
    showOnboarding,
    onboardingDismissed,
    onboardingState,
    onboardingActions,
    llmDescriptors,
    isLoadingOnboarding,
    finishOnboarding,
    hideOnboarding,
  } = useShowOnboarding({
    liveAgent,
    isLoadingChatSessions,
    chatSessionsCount: chatSessions.length,
    userId: user?.id,
  });

  const noAgents = liveAgent === null || liveAgent === undefined;

  const availableSources: ValidSources[] = useMemo(() => {
    return ccPairs.map((ccPair) => ccPair.source);
  }, [ccPairs]);

  const sources: SourceMetadata[] = useMemo(() => {
    const uniqueSources = Array.from(new Set(availableSources));
    const regularSources = uniqueSources.map((source) =>
      getSourceMetadata(source)
    );

    // Add federated connectors as sources
    const federatedSources =
      federatedConnectorsData?.map((connector: FederatedConnectorDetail) => {
        return getSourceMetadata(connector.source);
      }) || [];

    // Combine sources and deduplicate based on internalName
    const allSources = [...regularSources, ...federatedSources];
    const deduplicatedSources = allSources.reduce((acc, source) => {
      const existing = acc.find((s) => s.internalName === source.internalName);
      if (!existing) {
        acc.push(source);
      }
      return acc;
    }, [] as SourceMetadata[]);

    return deduplicatedSources;
  }, [availableSources, federatedConnectorsData]);

  // Show toast if any files failed in ProjectsContext reconciliation
  useEffect(() => {
    if (lastFailedFiles && lastFailedFiles.length > 0) {
      const names = lastFailedFiles.map((f) => f.name).join(", ");
      toast.error(
        lastFailedFiles.length === 1
          ? `File failed and was removed: ${names}`
          : `Files failed and were removed: ${names}`
      );
      clearLastFailedFiles();
    }
  }, [lastFailedFiles, clearLastFailedFiles]);

  const chatInputBarRef = useRef<AppInputBarHandle>(null);

  const filterManager = useFilters();

  const isDefaultAgent = useIsDefaultAgent({
    liveAgent,
    existingChatSessionId: currentChatSessionId,
    selectedChatSession: currentChatSession ?? undefined,
    settings,
  });

  const scrollContainerRef = useRef<ChatScrollContainerHandle>(null);
  const [showScrollButton, setShowScrollButton] = useState(false);

  // Reset scroll button when session changes
  useEffect(() => {
    setShowScrollButton(false);
  }, [currentChatSessionId]);

  const handleScrollToBottom = useCallback(() => {
    scrollContainerRef.current?.scrollToBottom();
  }, []);

  const resetInputBar = useCallback(() => {
    chatInputBarRef.current?.reset();
    setCurrentMessageFiles([]);
  }, [setCurrentMessageFiles]);

  // Add refs needed by useChatSessionController
  const chatSessionIdRef = useRef<string | null>(currentChatSessionId);
  const loadedIdSessionRef = useRef<string | null>(currentChatSessionId);
  const submitOnLoadPerformed = useRef<boolean>(false);

  function loadNewPageLogic(event: MessageEvent) {
    if (event.data.type === SUBMIT_MESSAGE_TYPES.PAGE_CHANGE) {
      try {
        const url = new URL(event.data.href);
        processSearchParamsAndSubmitMessage(url.searchParams.toString());
      } catch (error) {
        console.error("Error parsing URL:", error);
      }
    }
  }

  // Equivalent to `loadNewPageLogic`
  useEffect(() => {
    if (searchParams?.get(SEARCH_PARAM_NAMES.SEND_ON_LOAD)) {
      processSearchParamsAndSubmitMessage(searchParams.toString());
    }
  }, [searchParams, router]);

  useEffect(() => {
    window.addEventListener("message", loadNewPageLogic);

    return () => {
      window.removeEventListener("message", loadNewPageLogic);
    };
  }, []);

  const [selectedDocuments, setSelectedDocuments] = useState<OnyxDocument[]>(
    []
  );

  // Access chat state directly from the store
  const currentChatState = useCurrentChatState();
  const isReady = useIsReady();
  const documentSidebarVisible = useDocumentSidebarVisible();
  const updateCurrentDocumentSidebarVisible = useChatSessionStore(
    (state) => state.updateCurrentDocumentSidebarVisible
  );
  const messageHistory = useCurrentMessageHistory();

  // Determine anchor: second-to-last message (last user message before current response)
  const anchorMessage = messageHistory.at(-2) ?? messageHistory[0];
  const anchorNodeId = anchorMessage?.nodeId;
  const anchorSelector = anchorNodeId ? `#message-${anchorNodeId}` : undefined;

  // Auto-scroll preference from user settings
  const autoScrollEnabled = user?.preferences?.auto_scroll !== false;
  const isStreaming = currentChatState === "streaming";

  const {
    onSubmit,
    stopGenerating,
    handleMessageSpecificFileUpload,
    availableContextTokens,
  } = useChatController({
    filterManager,
    llmManager,
    availableAgents: agents,
    liveAgent,
    existingChatSessionId: currentChatSessionId,
    selectedDocuments,
    searchParams,
    resetInputBar,
    setSelectedAgentFromId,
  });

  const {
    onMessageSelection,
    currentSessionFileTokenCount,
    sessionFetchError,
  } = useChatSessionController({
    existingChatSessionId: currentChatSessionId,
    searchParams,
    filterManager,
    firstMessage,
    setSelectedAgentFromId,
    setSelectedDocuments,
    setCurrentMessageFiles,
    chatSessionIdRef,
    loadedIdSessionRef,
    chatInputBarRef,
    isInitialLoad,
    submitOnLoadPerformed,
    refreshChatSessions,
    onSubmit,
  });

  useSendMessageToParent();

  const retrievalEnabled = useMemo(() => {
    if (liveAgent) {
      return personaIncludesRetrieval(liveAgent);
    }
    return false;
  }, [liveAgent]);

  useEffect(() => {
    if (
      (!personaIncludesRetrieval &&
        (!selectedDocuments || selectedDocuments.length === 0) &&
        documentSidebarVisible) ||
      !currentChatSessionId
    ) {
      updateCurrentDocumentSidebarVisible(false);
    }
  }, [currentChatSessionId]);

  const handleResubmitLastMessage = useCallback(() => {
    // Grab the last user-type message
    const lastUserMsg = messageHistory
      .slice()
      .reverse()
      .find((m) => m.type === "user");
    if (!lastUserMsg) {
      toast.error("No previously-submitted user message found.");
      return;
    }

    // We call onSubmit, passing a `messageOverride`
    onSubmit({
      message: lastUserMsg.message,
      currentMessageFiles: currentMessageFiles,
      deepResearch: deepResearchEnabledForCurrentWorkflow,
      messageIdToResend: lastUserMsg.messageId,
    });
  }, [
    messageHistory,
    onSubmit,
    currentMessageFiles,
    deepResearchEnabledForCurrentWorkflow,
  ]);

  const toggleDocumentSidebar = useCallback(() => {
    if (!documentSidebarVisible) {
      updateCurrentDocumentSidebarVisible(true);
    } else {
      updateCurrentDocumentSidebarVisible(false);
    }
  }, [documentSidebarVisible, updateCurrentDocumentSidebarVisible]);

  if (!user) {
    redirect("/auth/login");
  }

  const onChat = useCallback(
    (message: string) => {
      onSubmit({
        message,
        currentMessageFiles,
        deepResearch: deepResearchEnabledForCurrentWorkflow,
      });
      if (showOnboarding || !onboardingDismissed) {
        finishOnboarding();
      }
    },
    [
      onSubmit,
      currentMessageFiles,
      deepResearchEnabledForCurrentWorkflow,
      showOnboarding,
      onboardingDismissed,
      finishOnboarding,
    ]
  );
  const { submit: submitQuery, state, setAppMode } = useQueryController();

  const defaultAppMode =
    (user?.preferences?.default_app_mode?.toLowerCase() as "chat" | "search") ??
    "chat";

  const isNewSession = appFocus.isNewSession();

  const isSearch =
    state.phase === "searching" || state.phase === "search-results";

  // 1. Reset the app-mode back to the user's default when navigating back to the "New Sessions" tab.
  // 2. If we're navigating away from the "New Session" tab after performing a search, we reset the app-input-bar.
  useEffect(() => {
    if (isNewSession) setAppMode(defaultAppMode);
    if (!isNewSession && isSearch) resetInputBar();
  }, [isNewSession, defaultAppMode, isSearch, resetInputBar, setAppMode]);

  const handleSearchDocumentClick = useCallback(
    (doc: MinimalOnyxDocument) => setPresentingDocument(doc),
    []
  );

  const handleAppInputBarSubmit = useCallback(
    async (message: string) => {
      // If we're in an existing chat session, always use chat mode
      // (appMode only applies to new sessions)
      if (currentChatSessionId) {
        onSubmit({
          message,
          currentMessageFiles,
          deepResearch: deepResearchEnabledForCurrentWorkflow,
        });
        if (showOnboarding || !onboardingDismissed) {
          finishOnboarding();
        }
        return;
      }

      // For new sessions, let the query controller handle routing.
      // resetInputBar is called inside useChatController.onSubmit for chat-routed queries.
      // For search-routed queries, the input bar is intentionally kept
      // so the user can see and refine their search query.
      await submitQuery(message, onChat);
    },
    [
      currentChatSessionId,
      submitQuery,
      onChat,
      onSubmit,
      currentMessageFiles,
      deepResearchEnabledForCurrentWorkflow,
      showOnboarding,
      onboardingDismissed,
      finishOnboarding,
    ]
  );

  // Memoized callbacks for DocumentsSidebar
  const handleMobileDocumentSidebarClose = useCallback(() => {
    updateCurrentDocumentSidebarVisible(false);
  }, [updateCurrentDocumentSidebarVisible]);

  const handleDesktopDocumentSidebarClose = useCallback(() => {
    setTimeout(() => updateCurrentDocumentSidebarVisible(false), 300);
  }, [updateCurrentDocumentSidebarVisible]);

  const desktopDocumentSidebar =
    retrievalEnabled && !settings.isMobile ? (
      <div
        className={cn(
          "flex-shrink-0 overflow-hidden transition-all duration-300 ease-in-out",
          documentSidebarVisible ? "w-[25rem]" : "w-[0rem]"
        )}
      >
        <div className="h-full w-[25rem]">
          <DocumentsSidebar
            setPresentingDocument={setPresentingDocument}
            modal={false}
            closeSidebar={handleDesktopDocumentSidebarClose}
            selectedDocuments={selectedDocuments}
          />
        </div>
      </div>
    ) : null;

  // When no chat session exists but a project is selected, fetch the
  // total tokens for the project's files so upload UX can compare
  // against available context similar to session-based flows.
  const [projectContextTokenCount, setProjectContextTokenCount] = useState(0);
  // Fetch project-level token count when no chat session exists.
  // Note: useEffect cannot be async, so we define an inner async function (run)
  // and invoke it. The `cancelled` guard prevents setting state after the
  // component unmounts or when the dependencies change and a newer effect run
  // supersedes an older in-flight request.
  useEffect(() => {
    let cancelled = false;
    async function run() {
      if (!currentChatSessionId && currentProjectId !== null) {
        try {
          const total = await getProjectTokenCount(currentProjectId);
          if (!cancelled) setProjectContextTokenCount(total || 0);
        } catch {
          if (!cancelled) setProjectContextTokenCount(0);
        }
      } else {
        setProjectContextTokenCount(0);
      }
    }
    run();
    return () => {
      cancelled = true;
    };
  }, [currentChatSessionId, currentProjectId, currentProjectDetails?.files]);

  // handle error case where no assistants are available
  // Only show this after agents have loaded to prevent flash during initial load
  if (noAgents && !isLoadingAgents) {
    return <NoAgentModal />;
  }

  const hasStarterMessages = (liveAgent?.starter_messages?.length ?? 0) > 0;

  const gridStyle = {
    gridTemplateColumns: "1fr",
    gridTemplateRows: isSearch
      ? "0fr auto 1fr"
      : appFocus.isChat()
        ? "1fr auto 0fr"
        : appFocus.isProject()
          ? "auto auto 1fr"
          : "1fr auto 1fr",
  };

  if (!isReady) return <OnyxInitializingLoader />;

  return (
    <>
      <AppPopup />

      {retrievalEnabled && documentSidebarVisible && settings.isMobile && (
        <div className="md:hidden">
          <Modal
            open
            onOpenChange={() => updateCurrentDocumentSidebarVisible(false)}
          >
            <Modal.Content>
              <Modal.Header
                icon={SvgFileText}
                title="Sources"
                onClose={() => updateCurrentDocumentSidebarVisible(false)}
              />
              <Modal.Body>
                {/* IMPORTANT: this is a memoized component, and it's very important
                for performance reasons that this stays true. MAKE SURE that all function
                props are wrapped in useCallback. */}
                <DocumentsSidebar
                  setPresentingDocument={setPresentingDocument}
                  modal
                  closeSidebar={handleMobileDocumentSidebarClose}
                  selectedDocuments={selectedDocuments}
                />
              </Modal.Body>
            </Modal.Content>
          </Modal>
        </div>
      )}

      {presentingDocument && (
        <PreviewModal
          presentingDocument={presentingDocument}
          onClose={() => setPresentingDocument(null)}
        />
      )}

      <FederatedOAuthModal />

      <AppLayouts.Root enableBackground={!appFocus.isProject()}>
        <Dropzone
          onDrop={(acceptedFiles) =>
            handleMessageSpecificFileUpload(acceptedFiles)
          }
          noClick
        >
          {({ getRootProps }) => (
            <div
              className="h-full w-full flex flex-col items-center outline-none relative"
              {...getRootProps({ tabIndex: -1 })}
            >
              {/* Main content grid — 3 rows, animated */}
              <div
                className="flex-1 w-full grid min-h-0 transition-[grid-template-rows] duration-150 ease-in-out"
                style={gridStyle}
              >
                {/* ── Top row: ChatUI / WelcomeMessage / ProjectUI ── */}
                <div className="row-start-1 min-h-0 overflow-hidden flex flex-col items-center">
                  {/* ChatUI */}
                  <Fade
                    show={
                      appFocus.isChat() &&
                      !!currentChatSessionId &&
                      !!liveAgent &&
                      !sessionFetchError
                    }
                    className="h-full w-full flex flex-col items-center"
                  >
                    <ChatScrollContainer
                      ref={scrollContainerRef}
                      sessionId={currentChatSessionId!}
                      anchorSelector={anchorSelector}
                      autoScroll={autoScrollEnabled}
                      isStreaming={isStreaming}
                      onScrollButtonVisibilityChange={setShowScrollButton}
                    >
                      <ChatUI
                        liveAgent={liveAgent!}
                        llmManager={llmManager}
                        deepResearchEnabled={
                          deepResearchEnabledForCurrentWorkflow
                        }
                        currentMessageFiles={currentMessageFiles}
                        setPresentingDocument={setPresentingDocument}
                        onSubmit={onSubmit}
                        onMessageSelection={onMessageSelection}
                        stopGenerating={stopGenerating}
                        onResubmit={handleResubmitLastMessage}
                        anchorNodeId={anchorNodeId}
                      />
                    </ChatScrollContainer>
                  </Fade>

                  {/* Session fetch error (404 / 403) */}
                  <Fade
                    show={appFocus.isChat() && sessionFetchError !== null}
                    className="h-full w-full flex flex-col items-center justify-center"
                  >
                    {sessionFetchError && (
                      <Section
                        flexDirection="column"
                        alignItems="center"
                        gap={1}
                      >
                        <IllustrationContent
                          illustration={
                            sessionFetchError.type === "access_denied"
                              ? SvgNoAccess
                              : SvgNotFound
                          }
                          title={
                            sessionFetchError.type === "not_found"
                              ? "Chat not found"
                              : sessionFetchError.type === "access_denied"
                                ? "Access denied"
                                : "Something went wrong"
                          }
                          description={
                            sessionFetchError.type === "not_found"
                              ? "This chat session doesn't exist or has been deleted."
                              : sessionFetchError.type === "access_denied"
                                ? "You don't have permission to view this chat session."
                                : sessionFetchError.detail
                          }
                        />
                        <Button href="/app" prominence="secondary">
                          Start a new chat
                        </Button>
                      </Section>
                    )}
                  </Fade>

                  {/* ProjectUI */}
                  {appFocus.isProject() && (
                    <div className="w-full max-h-[50vh] overflow-y-auto overscroll-y-none">
                      <ProjectContextPanel
                        projectTokenCount={projectContextTokenCount}
                        availableContextTokens={availableContextTokens}
                        setPresentingDocument={setPresentingDocument}
                      />
                    </div>
                  )}

                  {/* WelcomeMessageUI */}
                  <Fade
                    show={
                      (appFocus.isNewSession() || appFocus.isAgent()) &&
                      (state.phase === "idle" || state.phase === "classifying")
                    }
                    className="w-full flex-1 flex flex-col items-center justify-end"
                  >
                    <WelcomeMessage
                      agent={liveAgent}
                      isDefaultAgent={isDefaultAgent}
                    />
                    <Spacer rem={1.5} />
                  </Fade>
                </div>

                {/* ── Middle-center: AppInputBar ── */}
                <div
                  className={cn(
                    "row-start-2 flex flex-col items-center px-4",
                    sessionFetchError && "hidden"
                  )}
                >
                  <div className="relative w-full max-w-[var(--app-page-main-content-width)] flex flex-col">
                    {/* Scroll to bottom button - positioned absolutely above AppInputBar */}
                    {appFocus.isChat() && showScrollButton && (
                      <div className="absolute top-[-3.5rem] self-center">
                        <Button
                          icon={SvgChevronDown}
                          onClick={handleScrollToBottom}
                          aria-label="Scroll to bottom"
                          prominence="secondary"
                        />
                      </div>
                    )}

                    {/* OnboardingUI */}
                    {(appFocus.isNewSession() || appFocus.isAgent()) &&
                      (state.phase === "idle" ||
                        state.phase === "classifying") &&
                      (showOnboarding || !user?.personalization?.name) &&
                      !onboardingDismissed && (
                        <OnboardingFlow
                          showOnboarding={showOnboarding}
                          handleHideOnboarding={hideOnboarding}
                          handleFinishOnboarding={finishOnboarding}
                          state={onboardingState}
                          actions={onboardingActions}
                          llmDescriptors={llmDescriptors}
                        />
                      )}

                    {/*
                      # Note (@raunakab)

                      `shadow-01` on AppInputBar extends ~14px below the element
                      (2px offset + 12px blur). Because the content area in `Root`
                      (app-layouts.tsx) uses `overflow-auto`, shadows that exceed
                      the container bounds are clipped.

                      The animated spacer divs above and below the AppInputBar
                      provide 14px of breathing room so the shadow renders fully.
                      They transition between h-0 and h-[14px] depending on whether
                      the classification is "search" (spacer above) or "chat"
                      (spacer below).

                      There is a corresponding note inside `app-layouts.tsx`
                      (Footer) that explains why the Footer removes its top
                      padding during chat to compensate for this extra space.
                    */}
                    <div>
                      <div
                        className={cn(
                          "transition-all duration-150 ease-in-out overflow-hidden",
                          isSearch ? "h-[14px]" : "h-0"
                        )}
                      />
                      <AppInputBar
                        ref={chatInputBarRef}
                        deepResearchEnabled={
                          deepResearchEnabledForCurrentWorkflow
                        }
                        toggleDeepResearch={toggleDeepResearch}
                        filterManager={filterManager}
                        llmManager={llmManager}
                        initialMessage={
                          searchParams?.get(SEARCH_PARAM_NAMES.USER_PROMPT) ||
                          ""
                        }
                        stopGenerating={stopGenerating}
                        onSubmit={handleAppInputBarSubmit}
                        chatState={currentChatState}
                        currentSessionFileTokenCount={
                          currentChatSessionId
                            ? currentSessionFileTokenCount
                            : projectContextTokenCount
                        }
                        availableContextTokens={availableContextTokens}
                        selectedAgent={selectedAgent || liveAgent}
                        handleFileUpload={handleMessageSpecificFileUpload}
                        setPresentingDocument={setPresentingDocument}
                        // Intentionally enabled during name-only onboarding (showOnboarding=false)
                        // since LLM providers are already configured and the user can chat.
                        disabled={
                          (!llmManager.isLoadingProviders &&
                            llmManager.hasAnyProvider === false) ||
                          (showOnboarding &&
                            !isLoadingOnboarding &&
                            onboardingState.currentStep !==
                              OnboardingStep.Complete)
                        }
                      />
                      <div
                        className={cn(
                          "transition-all duration-150 ease-in-out overflow-hidden",
                          appFocus.isChat() ? "h-[14px]" : "h-0"
                        )}
                      />
                    </div>
                  </div>
                </div>

                {/* ── Bottom: SearchResults + SourceFilter / Suggestions / ProjectChatList ── */}
                <div className="row-start-3 min-h-0 overflow-hidden flex flex-col items-center w-full px-4">
                  {/* Agent description below input */}
                  {(appFocus.isNewSession() || appFocus.isAgent()) &&
                    !isDefaultAgent && (
                      <>
                        <Spacer rem={1} />
                        <AgentDescription agent={liveAgent} />
                        <Spacer rem={1.5} />
                      </>
                    )}
                  {/* ProjectChatSessionList */}
                  {appFocus.isProject() && (
                    <div className="w-full max-w-[var(--app-page-main-content-width)] h-full overflow-y-auto overscroll-y-none mx-auto">
                      <ProjectChatSessionList />
                    </div>
                  )}

                  {/* SuggestionsUI */}
                  <Fade
                    show={
                      (appFocus.isNewSession() || appFocus.isAgent()) &&
                      hasStarterMessages
                    }
                    className="h-full flex-1 w-full max-w-[var(--app-page-main-content-width)]"
                  >
                    <Spacer rem={0.5} />
                    <Suggestions onSubmit={onSubmit} />
                  </Fade>

                  {/* SearchUI */}
                  <Fade
                    show={isSearch}
                    className="h-full flex-1 w-full max-w-[var(--app-page-main-content-width)] px-1 flex flex-col"
                  >
                    <Spacer rem={0.75} />
                    <SearchUI onDocumentClick={handleSearchDocumentClick} />
                  </Fade>
                </div>
              </div>
            </div>
          )}
        </Dropzone>
      </AppLayouts.Root>

      {desktopDocumentSidebar}
    </>
  );
}


================================================
FILE: web/src/refresh-pages/SettingsPage.tsx
================================================
"use client";

import { useRef, useCallback, useEffect, useState } from "react";
import { usePathname, useRouter } from "next/navigation";
import * as InputLayouts from "@/layouts/input-layouts";
import { Section, AttachmentItemLayout } from "@/layouts/general-layouts";
import { Content, ContentAction } from "@opal/layouts";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import {
  SvgArrowExchange,
  SvgKey,
  SvgLock,
  SvgMinusCircle,
  SvgTrash,
  SvgUnplug,
} from "@opal/icons";
import { getSourceMetadata } from "@/lib/sources";
import Card from "@/refresh-components/cards/Card";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import PasswordInputTypeIn from "@/refresh-components/inputs/PasswordInputTypeIn";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import InputTextArea from "@/refresh-components/inputs/InputTextArea";
import Switch from "@/refresh-components/inputs/Switch";
import { useUser } from "@/providers/UserProvider";
import { useTheme } from "next-themes";
import { MemoryItem, ThemePreference } from "@/lib/types";
import useUserPersonalization from "@/hooks/useUserPersonalization";
import { toast } from "@/hooks/useToast";
import LLMPopover from "@/refresh-components/popovers/LLMPopover";
import { deleteAllChatSessions } from "@/app/app/services/lib";
import { useAuthType, useLlmManager } from "@/lib/hooks";
import useChatSessions from "@/hooks/useChatSessions";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { errorHandlingFetcher } from "@/lib/fetcher";
import useFilter from "@/hooks/useFilter";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import { Button } from "@opal/components";
import useFederatedOAuthStatus from "@/hooks/useFederatedOAuthStatus";
import useCCPairs from "@/hooks/useCCPairs";
import { ValidSources } from "@/lib/types";
import { ConnectorCredentialPairStatus } from "@/app/admin/connector/[ccPairId]/types";
import Separator from "@/refresh-components/Separator";
import Text from "@/refresh-components/texts/Text";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import Code from "@/refresh-components/Code";
import CharacterCount from "@/refresh-components/CharacterCount";
import { InputPrompt } from "@/app/app/interfaces";
import usePromptShortcuts from "@/hooks/usePromptShortcuts";
import ColorSwatch from "@/refresh-components/ColorSwatch";
import EmptyMessage from "@/refresh-components/EmptyMessage";
import Memories from "@/sections/settings/Memories";
import { FederatedConnectorOAuthStatus } from "@/components/chat/FederatedOAuthModal";
import {
  CHAT_BACKGROUND_OPTIONS,
  CHAT_BACKGROUND_NONE,
} from "@/lib/constants/chatBackgrounds";
import { SvgCheck } from "@opal/icons";
import { cn } from "@/lib/utils";
import { Interactive } from "@opal/core";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { useSettingsContext } from "@/providers/SettingsProvider";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { useCloudSubscription } from "@/hooks/useCloudSubscription";

interface PAT {
  id: number;
  name: string;
  token_display: string;
  created_at: string;
  expires_at: string | null;
  last_used_at: string | null;
}

interface CreatedTokenState {
  id: number;
  token: string;
  name: string;
}

interface PATModalProps {
  isCreating: boolean;
  newTokenName: string;
  setNewTokenName: (name: string) => void;
  expirationDays: string;
  setExpirationDays: (days: string) => void;
  onClose: () => void;
  onCreate: () => void;
  createdToken: CreatedTokenState | null;
}

function PATModal({
  isCreating,
  newTokenName,
  setNewTokenName,
  expirationDays,
  setExpirationDays,
  onClose,
  onCreate,
  createdToken,
}: PATModalProps) {
  return (
    <ConfirmationModalLayout
      icon={SvgKey}
      title="Create Access Token"
      description="All API requests using this token will inherit your access permissions and be attributed to you as an individual."
      onClose={onClose}
      submit={
        !!createdToken?.token ? (
          <Button onClick={onClose}>Done</Button>
        ) : (
          <Button
            disabled={isCreating || !newTokenName.trim()}
            onClick={onCreate}
          >
            {isCreating ? "Creating Token..." : "Create Token"}
          </Button>
        )
      }
      hideCancel={!!createdToken}
    >
      <Section gap={1}>
        {/* Token Creation*/}
        {!!createdToken?.token ? (
          <InputLayouts.Vertical title="Token Value">
            <Code>{createdToken.token}</Code>
          </InputLayouts.Vertical>
        ) : (
          <>
            <InputLayouts.Vertical title="Token Name">
              <InputTypeIn
                placeholder="Name your token"
                value={newTokenName}
                onChange={(e) => setNewTokenName(e.target.value)}
                variant={isCreating ? "disabled" : undefined}
                autoComplete="new-password"
              />
            </InputLayouts.Vertical>
            <InputLayouts.Vertical
              title="Expires in"
              subDescription={
                expirationDays === "null"
                  ? undefined
                  : (() => {
                      const expiryDate = new Date();
                      expiryDate.setUTCDate(
                        expiryDate.getUTCDate() + parseInt(expirationDays)
                      );
                      expiryDate.setUTCHours(23, 59, 59, 999);
                      return `This token will expire at: ${expiryDate
                        .toISOString()
                        .replace("T", " ")
                        .replace(".999Z", " UTC")}`;
                    })()
              }
            >
              <InputSelect
                value={expirationDays}
                onValueChange={setExpirationDays}
                disabled={isCreating}
              >
                <InputSelect.Trigger placeholder="Select expiration" />
                <InputSelect.Content>
                  <InputSelect.Item value="7">7 days</InputSelect.Item>
                  <InputSelect.Item value="30">30 days</InputSelect.Item>
                  <InputSelect.Item value="365">365 days</InputSelect.Item>
                  <InputSelect.Item value="null">
                    No expiration
                  </InputSelect.Item>
                </InputSelect.Content>
              </InputSelect>
            </InputLayouts.Vertical>
          </>
        )}
      </Section>
    </ConfirmationModalLayout>
  );
}

function GeneralSettings() {
  const {
    user,
    updateUserPersonalization,
    updateUserThemePreference,
    updateUserChatBackground,
  } = useUser();
  const { theme, setTheme, systemTheme } = useTheme();
  const { refreshChatSessions } = useChatSessions();
  const router = useRouter();
  const pathname = usePathname();
  const [isDeleting, setIsDeleting] = useState(false);
  const [showDeleteConfirmation, setShowDeleteConfirmation] = useState(false);

  const {
    personalizationValues,
    updatePersonalizationField,
    handleSavePersonalization,
  } = useUserPersonalization(user, updateUserPersonalization, {
    onSuccess: () => toast.success("Personalization updated successfully"),
    onError: () => toast.error("Failed to update personalization"),
  });

  // Track initial values to detect changes
  const initialNameRef = useRef(personalizationValues.name);
  const initialRoleRef = useRef(personalizationValues.role);

  // Update refs when personalization values change from external source
  useEffect(() => {
    initialNameRef.current = personalizationValues.name;
    initialRoleRef.current = personalizationValues.role;
  }, [user?.personalization]);

  const handleDeleteAllChats = useCallback(async () => {
    setIsDeleting(true);
    try {
      const response = await deleteAllChatSessions();
      if (response.ok) {
        toast.success("All your chat sessions have been deleted.");
        await refreshChatSessions();
        setShowDeleteConfirmation(false);
      } else {
        throw new Error("Failed to delete all chat sessions");
      }
    } catch (error) {
      toast.error("Failed to delete all chat sessions");
    } finally {
      setIsDeleting(false);
    }
  }, [pathname, router, refreshChatSessions]);

  return (
    <>
      {showDeleteConfirmation && (
        <ConfirmationModalLayout
          icon={SvgTrash}
          title="Delete All Chats"
          onClose={() => setShowDeleteConfirmation(false)}
          submit={
            <Button
              disabled={isDeleting}
              variant="danger"
              onClick={() => {
                void handleDeleteAllChats();
              }}
            >
              {isDeleting ? "Deleting..." : "Delete"}
            </Button>
          }
        >
          <Section gap={0.5} alignItems="start">
            <Text>
              All your chat sessions and history will be permanently deleted.
              Deletion cannot be undone.
            </Text>
            <Text>Are you sure you want to delete all chats?</Text>
          </Section>
        </ConfirmationModalLayout>
      )}

      <Section gap={2}>
        <Section gap={0.75}>
          <Content
            title="Profile"
            sizePreset="main-content"
            variant="section"
            widthVariant="full"
          />
          <Card>
            <InputLayouts.Horizontal
              title="Full Name"
              description="We'll display this name in the app."
              center
            >
              <InputTypeIn
                placeholder="Your name"
                value={personalizationValues.name}
                onChange={(e) =>
                  updatePersonalizationField("name", e.target.value)
                }
                onKeyDown={(e) => {
                  if (e.key === "Enter") {
                    e.currentTarget.blur();
                  }
                }}
                onBlur={() => {
                  // Only save if the value has changed
                  if (personalizationValues.name !== initialNameRef.current) {
                    void handleSavePersonalization();
                    initialNameRef.current = personalizationValues.name;
                  }
                }}
              />
            </InputLayouts.Horizontal>
            <InputLayouts.Horizontal
              title="Work Role"
              description="Share your role to better tailor responses."
              center
            >
              <InputTypeIn
                placeholder="Your role"
                value={personalizationValues.role}
                onChange={(e) =>
                  updatePersonalizationField("role", e.target.value)
                }
                onKeyDown={(e) => {
                  if (e.key === "Enter") {
                    e.currentTarget.blur();
                  }
                }}
                onBlur={() => {
                  // Only save if the value has changed
                  if (personalizationValues.role !== initialRoleRef.current) {
                    void handleSavePersonalization();
                    initialRoleRef.current = personalizationValues.role;
                  }
                }}
              />
            </InputLayouts.Horizontal>
          </Card>
        </Section>

        <Section gap={0.75}>
          <Content
            title="Appearance"
            sizePreset="main-content"
            variant="section"
            widthVariant="full"
          />
          <Card>
            <InputLayouts.Horizontal
              title="Color Mode"
              description="Select your preferred color mode for the UI."
              center
            >
              <InputSelect
                value={theme}
                onValueChange={(value) => {
                  setTheme(value);
                  updateUserThemePreference(value as ThemePreference);
                }}
              >
                <InputSelect.Trigger />
                <InputSelect.Content>
                  <InputSelect.Item
                    value={ThemePreference.SYSTEM}
                    icon={() => (
                      <ColorSwatch
                        light={systemTheme === "light"}
                        dark={systemTheme === "dark"}
                      />
                    )}
                    description={
                      systemTheme
                        ? systemTheme.charAt(0).toUpperCase() +
                          systemTheme.slice(1)
                        : undefined
                    }
                  >
                    Auto
                  </InputSelect.Item>
                  <InputSelect.Separator />
                  <InputSelect.Item
                    value={ThemePreference.LIGHT}
                    icon={() => <ColorSwatch light />}
                  >
                    Light
                  </InputSelect.Item>
                  <InputSelect.Item
                    value={ThemePreference.DARK}
                    icon={() => <ColorSwatch dark />}
                  >
                    Dark
                  </InputSelect.Item>
                </InputSelect.Content>
              </InputSelect>
            </InputLayouts.Horizontal>
            <InputLayouts.Vertical title="Chat Background">
              <div className="flex flex-wrap gap-2">
                {CHAT_BACKGROUND_OPTIONS.map((bg) => {
                  const currentBackgroundId =
                    user?.preferences?.chat_background ?? "none";
                  const isSelected = currentBackgroundId === bg.id;
                  const isNone = bg.src === CHAT_BACKGROUND_NONE;

                  return (
                    <button
                      key={bg.id}
                      onClick={() =>
                        updateUserChatBackground(
                          bg.id === CHAT_BACKGROUND_NONE ? null : bg.id
                        )
                      }
                      className="relative overflow-hidden rounded-lg transition-all w-[90px] h-[68px] cursor-pointer border-none p-0 bg-transparent group"
                      title={bg.label}
                      aria-label={`${bg.label} background${
                        isSelected ? " (selected)" : ""
                      }`}
                    >
                      {isNone ? (
                        <div className="absolute inset-0 bg-background flex items-center justify-center">
                          <span className="text-xs text-text-02">None</span>
                        </div>
                      ) : (
                        <div
                          className="absolute inset-0 bg-cover bg-center transition-transform duration-300 group-hover:scale-105"
                          style={{ backgroundImage: `url(${bg.thumbnail})` }}
                        />
                      )}
                      <div
                        className={cn(
                          "absolute inset-0 transition-all rounded-lg",
                          isSelected
                            ? "ring-2 ring-inset ring-theme-primary-05"
                            : "ring-1 ring-inset ring-border-02 group-hover:ring-border-03"
                        )}
                      />
                      {isSelected && (
                        <div className="absolute top-1.5 right-1.5 w-4 h-4 rounded-full bg-theme-primary-05 flex items-center justify-center">
                          <SvgCheck className="w-2.5 h-2.5 stroke-text-inverted-05" />
                        </div>
                      )}
                    </button>
                  );
                })}
              </div>
            </InputLayouts.Vertical>
          </Card>
        </Section>

        <Separator noPadding />

        <Section gap={0.75}>
          <Content
            title="Danger Zone"
            sizePreset="main-content"
            variant="section"
            widthVariant="full"
          />
          <Card>
            <InputLayouts.Horizontal
              title="Delete All Chats"
              description="Permanently delete all your chat sessions."
              center
            >
              <Button
                variant="danger"
                prominence="secondary"
                onClick={() => setShowDeleteConfirmation(true)}
                icon={SvgTrash}
                interaction={showDeleteConfirmation ? "hover" : "rest"}
              >
                Delete All Chats
              </Button>
            </InputLayouts.Horizontal>
          </Card>
        </Section>
      </Section>
    </>
  );
}

interface LocalShortcut extends InputPrompt {
  isNew: boolean;
}

function PromptShortcuts() {
  const { promptShortcuts, isLoading, error, refresh } = usePromptShortcuts();
  const [shortcuts, setShortcuts] = useState<LocalShortcut[]>([]);
  const [isInitialLoad, setIsInitialLoad] = useState(true);

  // Initialize shortcuts when input prompts are loaded
  useEffect(() => {
    if (isLoading || error) return;

    // Convert InputPrompt[] to LocalShortcut[] with isNew: false for existing items
    // Sort by id to maintain stable ordering when editing
    const existingShortcuts: LocalShortcut[] = promptShortcuts
      .map((shortcut) => ({
        ...shortcut,
        isNew: false,
      }))
      .sort((a, b) => a.id - b.id);

    // Always ensure there's at least one empty row
    setShortcuts([
      ...existingShortcuts,
      {
        id: Date.now(),
        prompt: "",
        content: "",
        active: true,
        is_public: false,
        isNew: true,
      },
    ]);
    setIsInitialLoad(false);
  }, [promptShortcuts, isLoading, error]);

  // Show error popup if fetch fails
  useEffect(() => {
    if (!error) return;
    toast.error("Failed to load shortcuts");
  }, [error]);

  const handleUpdateShortcut = useCallback(
    (index: number, field: "prompt" | "content", value: string) => {
      setShortcuts((prev) => {
        const next = prev.map((shortcut, i) =>
          i === index ? { ...shortcut, [field]: value } : shortcut
        );

        const isEmptyNew = (s: LocalShortcut) =>
          s.isNew && !s.prompt.trim() && !s.content.trim();

        const emptyCount = next.filter(isEmptyNew).length;

        if (emptyCount === 0) {
          return [
            ...next,
            {
              id: Date.now(),
              prompt: "",
              content: "",
              active: true,
              is_public: false,
              isNew: true,
            },
          ];
        }

        if (emptyCount > 1) {
          const userRow = next[index];
          const userRowEmpty = userRow !== undefined && isEmptyNew(userRow);
          let keepIndex = -1;
          if (userRowEmpty) {
            keepIndex = index;
          } else {
            for (let i = next.length - 1; i >= 0; i--) {
              const row = next[i];
              if (row !== undefined && isEmptyNew(row)) {
                keepIndex = i;
                break;
              }
            }
          }
          return next.filter((s, i) => !isEmptyNew(s) || i === keepIndex);
        }

        return next;
      });
    },
    []
  );

  const handleRemoveShortcut = useCallback(
    async (index: number) => {
      const shortcut = shortcuts[index];
      if (!shortcut) return;

      // If it's a new shortcut, just remove from state
      if (shortcut.isNew) {
        setShortcuts((prev) => prev.filter((_, i) => i !== index));
        return;
      }

      // Otherwise, delete from backend
      try {
        const response = await fetch(`/api/input_prompt/${shortcut.id}`, {
          method: "DELETE",
        });

        if (response.ok) {
          setShortcuts((prev) => prev.filter((_, i) => i !== index));
          await refresh();
          toast.success("Shortcut deleted");
        } else {
          throw new Error("Failed to delete shortcut");
        }
      } catch (error) {
        toast.error("Failed to delete shortcut");
      }
    },
    [shortcuts, refresh]
  );

  const handleSaveShortcut = useCallback(
    async (index: number) => {
      const shortcut = shortcuts[index];
      if (!shortcut || !shortcut.prompt.trim() || !shortcut.content.trim()) {
        toast.error("Both shortcut and expansion are required");
        return;
      }

      try {
        if (shortcut.isNew) {
          // Create new shortcut
          const response = await fetch("/api/input_prompt", {
            method: "POST",
            headers: { "Content-Type": "application/json" },
            body: JSON.stringify({
              prompt: shortcut.prompt,
              content: shortcut.content,
              active: true,
              is_public: false,
            }),
          });

          if (response.ok) {
            await refresh();
            toast.success("Shortcut created");
          } else {
            throw new Error("Failed to create shortcut");
          }
        } else {
          // Update existing shortcut
          const response = await fetch(`/api/input_prompt/${shortcut.id}`, {
            method: "PATCH",
            headers: { "Content-Type": "application/json" },
            body: JSON.stringify({
              prompt: shortcut.prompt,
              content: shortcut.content,
              active: true,
              is_public: false,
            }),
          });

          if (response.ok) {
            await refresh();
            toast.success("Shortcut updated");
          } else {
            throw new Error("Failed to update shortcut");
          }
        }
      } catch (error) {
        toast.error("Failed to save shortcut");
      }
    },
    [shortcuts, refresh]
  );

  const handleBlurShortcut = useCallback(
    async (index: number) => {
      const shortcut = shortcuts[index];
      if (!shortcut) return;

      const hasPrompt = shortcut.prompt.trim();
      const hasContent = shortcut.content.trim();

      // Both fields are filled - save/update the shortcut
      if (hasPrompt && hasContent) {
        await handleSaveShortcut(index);
      }
      // For existing shortcuts with incomplete fields, error state will be shown in UI
      // User must use the delete button to remove them
    },
    [shortcuts, handleSaveShortcut]
  );

  return (
    <>
      {shortcuts.length > 0 && (
        <Section gap={0.75}>
          {shortcuts.map((shortcut, index) => {
            const isEmpty = !shortcut.prompt.trim() && !shortcut.content.trim();
            const isExisting = !shortcut.isNew;
            const hasPrompt = shortcut.prompt.trim();
            const hasContent = shortcut.content.trim();

            // Show error for existing shortcuts with incomplete fields
            // (either one field empty or both fields empty)
            const showPromptError = isExisting && !hasPrompt;
            const showContentError = isExisting && !hasContent;

            return (
              <div
                key={shortcut.id}
                className="w-full grid grid-cols-[1fr_min-content] gap-x-1 gap-y-1"
              >
                <InputTypeIn
                  prefixText="/"
                  placeholder="Summarize"
                  value={shortcut.prompt}
                  onChange={(e) =>
                    handleUpdateShortcut(index, "prompt", e.target.value)
                  }
                  onBlur={
                    shortcut.is_public
                      ? undefined
                      : () => void handleBlurShortcut(index)
                  }
                  variant={
                    shortcut.is_public
                      ? "readOnly"
                      : showPromptError
                        ? "error"
                        : undefined
                  }
                />
                <Section>
                  <Button
                    disabled={(shortcut.isNew && isEmpty) || shortcut.is_public}
                    icon={SvgMinusCircle}
                    onClick={() => void handleRemoveShortcut(index)}
                    prominence="tertiary"
                    aria-label="Remove shortcut"
                    tooltip={
                      shortcut.is_public
                        ? "Cannot delete public prompt-shortcuts."
                        : undefined
                    }
                  />
                </Section>
                <InputTextArea
                  placeholder="Provide a concise 1–2 sentence summary of the following:"
                  value={shortcut.content}
                  onChange={(e) =>
                    handleUpdateShortcut(index, "content", e.target.value)
                  }
                  onBlur={
                    shortcut.is_public
                      ? undefined
                      : () => void handleBlurShortcut(index)
                  }
                  variant={
                    shortcut.is_public
                      ? "readOnly"
                      : showContentError
                        ? "error"
                        : undefined
                  }
                  rows={3}
                />
                <div />
              </div>
            );
          })}
        </Section>
      )}
    </>
  );
}

function ChatPreferencesSettings() {
  const {
    user,
    updateUserPersonalization,
    updateUserAutoScroll,
    updateUserShortcuts,
    updateUserDefaultModel,
    updateUserDefaultAppMode,
    updateUserVoiceSettings,
  } = useUser();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const settings = useSettingsContext();
  const { isSearchModeAvailable: searchUiEnabled } = settings;
  const llmManager = useLlmManager();

  const {
    personalizationValues,
    toggleUseMemories,
    toggleEnableMemoryTool,
    updateUserPreferences,
    handleSavePersonalization,
  } = useUserPersonalization(user, updateUserPersonalization, {
    onSuccess: () => toast.success("Preferences saved"),
    onError: () => toast.error("Failed to save preferences"),
  });
  const [draftVoicePlaybackSpeed, setDraftVoicePlaybackSpeed] = useState(
    user?.preferences.voice_playback_speed ?? 1
  );

  useEffect(() => {
    setDraftVoicePlaybackSpeed(user?.preferences.voice_playback_speed ?? 1);
  }, [user?.preferences.voice_playback_speed]);

  const saveVoiceSettings = useCallback(
    async (settings: {
      auto_send?: boolean;
      auto_playback?: boolean;
      playback_speed?: number;
    }) => {
      try {
        await updateUserVoiceSettings(settings);
        toast.success("Preferences saved");
      } catch {
        toast.error("Failed to save preferences");
      }
    },
    [updateUserVoiceSettings]
  );

  const commitVoicePlaybackSpeed = useCallback(() => {
    const currentSpeed = user?.preferences.voice_playback_speed ?? 1;
    if (Math.abs(currentSpeed - draftVoicePlaybackSpeed) < 0.001) {
      return;
    }
    void saveVoiceSettings({
      playback_speed: draftVoicePlaybackSpeed,
    });
  }, [
    draftVoicePlaybackSpeed,
    saveVoiceSettings,
    user?.preferences.voice_playback_speed,
  ]);

  // Wrapper to save memories and return success/failure
  const handleSaveMemories = useCallback(
    async (newMemories: MemoryItem[]): Promise<boolean> => {
      const result = await handleSavePersonalization(
        { memories: newMemories },
        true
      );
      return !!result;
    },
    [handleSavePersonalization]
  );

  return (
    <Section gap={2}>
      <Section gap={0.75}>
        <Content
          title="Chats"
          sizePreset="main-content"
          variant="section"
          widthVariant="full"
        />
        <Card>
          <InputLayouts.Horizontal
            title="Default Model"
            description="This model will be used by Onyx by default in your chats."
          >
            <LLMPopover
              llmManager={llmManager}
              onSelect={(selected) => {
                void updateUserDefaultModel(selected);
              }}
            />
          </InputLayouts.Horizontal>

          <InputLayouts.Horizontal
            title="Chat Auto-scroll"
            description="Automatically scroll to new content as chat generates response."
          >
            <Switch
              checked={user?.preferences.auto_scroll}
              onCheckedChange={(checked) => {
                updateUserAutoScroll(checked);
              }}
            />
          </InputLayouts.Horizontal>

          {isPaidEnterpriseFeaturesEnabled && (
            <SimpleTooltip
              tooltip={
                searchUiEnabled
                  ? undefined
                  : "Search UI is disabled and can only be enabled by an admin."
              }
              side="top"
            >
              <InputLayouts.Horizontal
                title="Default App Mode"
                description="Choose whether new sessions start in Search or Chat mode."
                center
                disabled={!searchUiEnabled}
              >
                <InputSelect
                  value={user?.preferences.default_app_mode ?? "CHAT"}
                  onValueChange={(value) => {
                    void updateUserDefaultAppMode(value as "CHAT" | "SEARCH");
                  }}
                  disabled={!searchUiEnabled}
                >
                  <InputSelect.Trigger />
                  <InputSelect.Content>
                    <InputSelect.Item value="CHAT">Chat</InputSelect.Item>
                    <InputSelect.Item value="SEARCH">Search</InputSelect.Item>
                  </InputSelect.Content>
                </InputSelect>
              </InputLayouts.Horizontal>
            </SimpleTooltip>
          )}
        </Card>
      </Section>

      <Section gap={0.75}>
        <InputLayouts.Vertical
          title="Personal Preferences"
          description="Provide your custom preferences in natural language."
        >
          <InputTextArea
            placeholder="Describe how you want the system to behave and the tone it should use."
            value={personalizationValues.user_preferences}
            onChange={(e) => updateUserPreferences(e.target.value)}
            onBlur={() => void handleSavePersonalization()}
            rows={4}
            maxRows={10}
            autoResize
            maxLength={500}
          />
          <CharacterCount
            value={personalizationValues.user_preferences || ""}
            limit={500}
          />
        </InputLayouts.Vertical>
        <Content
          title="Memory"
          sizePreset="main-content"
          variant="section"
          widthVariant="full"
        />
        <Card>
          <InputLayouts.Horizontal
            title="Reference Stored Memories"
            description="Let Onyx reference stored memories in chats."
          >
            <Switch
              checked={personalizationValues.use_memories}
              onCheckedChange={(checked) => {
                toggleUseMemories(checked);
                void handleSavePersonalization({ use_memories: checked });
              }}
            />
          </InputLayouts.Horizontal>
          <InputLayouts.Horizontal
            title="Update Memories"
            description="Let Onyx generate and update stored memories."
          >
            <Switch
              checked={personalizationValues.enable_memory_tool}
              onCheckedChange={(checked) => {
                toggleEnableMemoryTool(checked);
                void handleSavePersonalization({
                  enable_memory_tool: checked,
                });
              }}
            />
          </InputLayouts.Horizontal>

          {(personalizationValues.use_memories ||
            personalizationValues.enable_memory_tool ||
            personalizationValues.memories.length > 0) && (
            <Memories
              memories={personalizationValues.memories}
              onSaveMemories={handleSaveMemories}
            />
          )}
        </Card>
      </Section>

      <Section gap={0.75}>
        <Content
          title="Prompt Shortcuts"
          sizePreset="main-content"
          variant="section"
          widthVariant="full"
        />
        <Card>
          <InputLayouts.Horizontal
            title="Use Prompt Shortcuts"
            description="Enable shortcuts to quickly insert common prompts."
          >
            <Switch
              checked={user?.preferences?.shortcut_enabled}
              onCheckedChange={(checked) => {
                updateUserShortcuts(checked);
              }}
            />
          </InputLayouts.Horizontal>

          {user?.preferences?.shortcut_enabled && <PromptShortcuts />}
        </Card>
      </Section>

      <Section gap={0.75}>
        <Content
          title="Voice"
          sizePreset="main-content"
          variant="section"
          widthVariant="full"
        />
        <Card>
          <InputLayouts.Horizontal
            title="Auto-Send on Pause"
            description="Automatically send voice input when you stop speaking."
          >
            <Switch
              checked={user?.preferences.voice_auto_send ?? false}
              onCheckedChange={(checked) => {
                void saveVoiceSettings({ auto_send: checked });
              }}
            />
          </InputLayouts.Horizontal>

          <InputLayouts.Horizontal
            title="Auto-Playback"
            description="Automatically play voice responses."
          >
            <Switch
              checked={user?.preferences.voice_auto_playback ?? false}
              onCheckedChange={(checked) => {
                void saveVoiceSettings({ auto_playback: checked });
              }}
            />
          </InputLayouts.Horizontal>

          <InputLayouts.Horizontal
            title="Playback Speed"
            description="Adjust the speed of voice playback."
          >
            <div className="flex items-center gap-3">
              <input
                type="range"
                min="0.5"
                max="2"
                step="0.1"
                value={draftVoicePlaybackSpeed}
                onChange={(e) => {
                  setDraftVoicePlaybackSpeed(parseFloat(e.target.value));
                }}
                onMouseUp={commitVoicePlaybackSpeed}
                onTouchEnd={commitVoicePlaybackSpeed}
                onKeyUp={(e) => {
                  if (e.key === "ArrowLeft" || e.key === "ArrowRight") {
                    commitVoicePlaybackSpeed();
                  }
                }}
                className="w-24 h-2 rounded-lg appearance-none cursor-pointer bg-background-neutral-02"
              />
              <span className="text-sm text-text-02 w-10">
                {draftVoicePlaybackSpeed.toFixed(1)}x
              </span>
            </div>
          </InputLayouts.Horizontal>
        </Card>
      </Section>
    </Section>
  );
}

function AccountsAccessSettings() {
  const { user, authTypeMetadata } = useUser();
  const authType = useAuthType();
  const [showPasswordModal, setShowPasswordModal] = useState(false);

  const passwordValidationSchema = Yup.object().shape({
    currentPassword: Yup.string().required("Current password is required"),
    newPassword: Yup.string()
      .min(
        authTypeMetadata.passwordMinLength,
        `Password must be at least ${authTypeMetadata.passwordMinLength} characters`
      )
      .required("New password is required"),
    confirmPassword: Yup.string()
      .oneOf([Yup.ref("newPassword")], "Passwords do not match")
      .required("Please confirm your new password"),
  });

  // PAT state
  const [showCreateModal, setShowCreateModal] = useState(false);
  const [isCreating, setIsCreating] = useState(false);
  const [newTokenName, setNewTokenName] = useState("");
  const [expirationDays, setExpirationDays] = useState<string>("30");
  const [newlyCreatedToken, setNewlyCreatedToken] =
    useState<CreatedTokenState | null>(null);
  const [tokenToDelete, setTokenToDelete] = useState<PAT | null>(null);

  const canCreateTokens = useCloudSubscription();

  const showPasswordSection = Boolean(user?.password_configured);
  const showTokensSection = authType !== null;

  // Fetch PATs with SWR
  const {
    data: pats = [],
    mutate,
    error,
    isLoading,
  } = useSWR<PAT[]>(
    showTokensSection ? SWR_KEYS.userPats : null,
    errorHandlingFetcher,
    {
      revalidateOnFocus: true,
      dedupingInterval: 2000,
      fallbackData: [],
    }
  );

  // Use filter hook for searching tokens
  const {
    query,
    setQuery,
    filtered: filteredPats,
  } = useFilter(pats, (pat) => `${pat.name} ${pat.token_display}`);

  // Show error popup if SWR fetch fails
  useEffect(() => {
    if (error) {
      toast.error("Failed to load tokens");
    }
  }, [error]);

  const createPAT = useCallback(async () => {
    if (!newTokenName.trim()) {
      toast.error("Token name is required");
      return;
    }

    setIsCreating(true);
    try {
      const response = await fetch("/api/user/pats", {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          name: newTokenName,
          expiration_days:
            expirationDays === "null" ? null : parseInt(expirationDays),
        }),
      });

      if (response.ok) {
        const data = await response.json();
        // Store the newly created token - modal will switch to display view
        setNewlyCreatedToken({
          id: data.id,
          token: data.token,
          name: newTokenName,
        });
        toast.success("Token created successfully");
        // Revalidate the token list
        await mutate();
      } else {
        const errorData = await response.json();
        toast.error(errorData.detail || "Failed to create token");
      }
    } catch (error) {
      toast.error("Network error creating token");
    } finally {
      setIsCreating(false);
    }
  }, [newTokenName, expirationDays, mutate]);

  const deletePAT = useCallback(
    async (patId: number) => {
      try {
        const response = await fetch(`/api/user/pats/${patId}`, {
          method: "DELETE",
        });

        if (response.ok) {
          // Clear the newly created token if it's the one being deleted
          if (newlyCreatedToken?.id === patId) {
            setNewlyCreatedToken(null);
          }
          await mutate();
          toast.success("Token deleted successfully");
          setTokenToDelete(null);
        } else {
          toast.error("Failed to delete token");
        }
      } catch (error) {
        toast.error("Network error deleting token");
      }
    },
    [newlyCreatedToken, mutate]
  );

  const handleChangePassword = useCallback(
    async (values: {
      currentPassword: string;
      newPassword: string;
      confirmPassword: string;
    }) => {
      try {
        const response = await fetch("/api/password/change-password", {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify({
            old_password: values.currentPassword,
            new_password: values.newPassword,
          }),
        });

        if (response.ok) {
          toast.success("Password updated successfully");
          setShowPasswordModal(false);
        } else {
          const errorData = await response.json();
          toast.error(errorData.detail || "Failed to change password");
        }
      } catch (error) {
        toast.error("An error occurred while changing the password");
      }
    },
    []
  );

  return (
    <>
      {showCreateModal && (
        <PATModal
          isCreating={isCreating}
          newTokenName={newTokenName}
          setNewTokenName={setNewTokenName}
          expirationDays={expirationDays}
          setExpirationDays={setExpirationDays}
          onClose={() => {
            setShowCreateModal(false);
            setNewTokenName("");
            setExpirationDays("30");
            setNewlyCreatedToken(null);
          }}
          onCreate={createPAT}
          createdToken={newlyCreatedToken}
        />
      )}

      {tokenToDelete && (
        <ConfirmationModalLayout
          icon={SvgTrash}
          title="Revoke Access Token"
          onClose={() => setTokenToDelete(null)}
          submit={
            <Button
              variant="danger"
              onClick={() => deletePAT(tokenToDelete.id)}
            >
              Revoke
            </Button>
          }
        >
          <Section gap={0.5} alignItems="start">
            <Text>
              Any application using the token{" "}
              <Text className="!font-bold">{tokenToDelete.name}</Text>{" "}
              <Text secondaryMono>({tokenToDelete.token_display})</Text> will
              lose access to Onyx. This action cannot be undone.
            </Text>
            <Text>Are you sure you want to revoke this token?</Text>
          </Section>
        </ConfirmationModalLayout>
      )}

      {showPasswordModal && (
        <Formik
          initialValues={{
            currentPassword: "",
            newPassword: "",
            confirmPassword: "",
          }}
          validationSchema={passwordValidationSchema}
          validateOnChange={true}
          validateOnBlur={true}
          onSubmit={() => undefined}
        >
          {({
            values,
            handleChange,
            handleBlur,
            isSubmitting,
            dirty,
            isValid,
            errors,
            touched,
            setSubmitting,
          }) => (
            <Form>
              <ConfirmationModalLayout
                icon={SvgLock}
                title="Change Password"
                submit={
                  <Button
                    disabled={isSubmitting || !dirty || !isValid}
                    onClick={async () => {
                      setSubmitting(true);
                      try {
                        await handleChangePassword(values);
                      } finally {
                        setSubmitting(false);
                      }
                    }}
                  >
                    {isSubmitting ? "Updating..." : "Update"}
                  </Button>
                }
                onClose={() => {
                  setShowPasswordModal(false);
                }}
              >
                <Section gap={1}>
                  <Section gap={0.25} alignItems="start">
                    <InputLayouts.Vertical
                      name="currentPassword"
                      title="Current Password"
                    >
                      <PasswordInputTypeIn
                        name="currentPassword"
                        value={values.currentPassword}
                        onChange={handleChange}
                        onBlur={handleBlur}
                        error={
                          touched.currentPassword && !!errors.currentPassword
                        }
                      />
                    </InputLayouts.Vertical>
                  </Section>
                  <Section gap={0.25} alignItems="start">
                    <InputLayouts.Vertical
                      name="newPassword"
                      title="New Password"
                    >
                      <PasswordInputTypeIn
                        name="newPassword"
                        value={values.newPassword}
                        onChange={handleChange}
                        onBlur={handleBlur}
                        error={touched.newPassword && !!errors.newPassword}
                      />
                    </InputLayouts.Vertical>
                  </Section>
                  <Section gap={0.25} alignItems="start">
                    <InputLayouts.Vertical
                      name="confirmPassword"
                      title="Confirm New Password"
                    >
                      <PasswordInputTypeIn
                        name="confirmPassword"
                        value={values.confirmPassword}
                        onChange={handleChange}
                        onBlur={handleBlur}
                        error={
                          touched.confirmPassword && !!errors.confirmPassword
                        }
                      />
                    </InputLayouts.Vertical>
                  </Section>
                </Section>
              </ConfirmationModalLayout>
            </Form>
          )}
        </Formik>
      )}

      <Section gap={2}>
        <Section gap={0.75}>
          <Content
            title="Accounts"
            sizePreset="main-content"
            variant="section"
            widthVariant="full"
          />
          <Card>
            <InputLayouts.Horizontal
              title="Email"
              description="Your account email address."
              center
              nonInteractive
            >
              <Text>{user?.email ?? "anonymous"}</Text>
            </InputLayouts.Horizontal>

            {showPasswordSection && (
              <InputLayouts.Horizontal
                title="Password"
                description="Update your account password."
                center
              >
                <Button
                  prominence="secondary"
                  icon={SvgLock}
                  onClick={() => setShowPasswordModal(true)}
                  interaction={showPasswordModal ? "hover" : "rest"}
                >
                  Change Password
                </Button>
              </InputLayouts.Horizontal>
            )}
          </Card>
        </Section>

        {showTokensSection && (
          <Section gap={0.75}>
            <Content
              title="Access Tokens"
              sizePreset="main-content"
              variant="section"
              widthVariant="full"
            />
            {canCreateTokens ? (
              <Card padding={0.25}>
                <Section gap={0}>
                  <Section flexDirection="row" padding={0.25} gap={0.5}>
                    {pats.length === 0 ? (
                      <Section padding={0.5} alignItems="start">
                        <Text text03 secondaryBody>
                          {isLoading
                            ? "Loading tokens..."
                            : "No access tokens created."}
                        </Text>
                      </Section>
                    ) : (
                      <InputTypeIn
                        placeholder="Search..."
                        value={query}
                        onChange={(e) => setQuery(e.target.value)}
                        leftSearchIcon
                        variant="internal"
                      />
                    )}
                    <CreateButton
                      onClick={() => setShowCreateModal(true)}
                      secondary={false}
                      internal
                      transient={showCreateModal}
                      rightIcon
                    >
                      New Access Token
                    </CreateButton>
                  </Section>

                  <Section gap={0.25}>
                    {filteredPats.map((pat) => {
                      const now = new Date();
                      const createdDate = new Date(pat.created_at);
                      const daysSinceCreation = Math.floor(
                        (now.getTime() - createdDate.getTime()) /
                          (1000 * 60 * 60 * 24)
                      );

                      let expiryText = "Never expires";
                      if (pat.expires_at) {
                        const expiresDate = new Date(pat.expires_at);
                        const daysUntilExpiry = Math.ceil(
                          (expiresDate.getTime() - now.getTime()) /
                            (1000 * 60 * 60 * 24)
                        );
                        expiryText = `Expires in ${daysUntilExpiry} day${
                          daysUntilExpiry === 1 ? "" : "s"
                        }`;
                      }

                      const middleText = `Created ${daysSinceCreation} day${
                        daysSinceCreation === 1 ? "" : "s"
                      } ago - ${expiryText}`;

                      return (
                        <Interactive.Container
                          key={pat.id}
                          heightVariant="fit"
                          widthVariant="full"
                        >
                          <div className="w-full bg-background-tint-01">
                            <AttachmentItemLayout
                              icon={SvgKey}
                              title={pat.name}
                              description={pat.token_display}
                              middleText={middleText}
                              rightChildren={
                                <Button
                                  icon={SvgTrash}
                                  onClick={() => setTokenToDelete(pat)}
                                  prominence="tertiary"
                                  size="sm"
                                  aria-label={`Delete token ${pat.name}`}
                                />
                              }
                            />
                          </div>
                        </Interactive.Container>
                      );
                    })}
                  </Section>
                </Section>
              </Card>
            ) : (
              <Card>
                <Section flexDirection="row" justifyContent="between">
                  <Text text03 secondaryBody>
                    Access tokens require an active paid subscription.
                  </Text>
                  <Button prominence="secondary" href="/admin/billing">
                    Upgrade Plan
                  </Button>
                </Section>
              </Card>
            )}
          </Section>
        )}
      </Section>
    </>
  );
}

interface IndexedConnectorCardProps {
  source: ValidSources;
  isActive: boolean;
}

function IndexedConnectorCard({ source, isActive }: IndexedConnectorCardProps) {
  const sourceMetadata = getSourceMetadata(source);

  return (
    <Card>
      <Content
        icon={sourceMetadata.icon}
        title={sourceMetadata.displayName}
        description={isActive ? "Connected" : "Paused"}
        sizePreset="main-content"
        variant="section"
      />
    </Card>
  );
}

interface FederatedConnectorCardProps {
  connector: FederatedConnectorOAuthStatus;
  onDisconnectSuccess: () => void;
}

function FederatedConnectorCard({
  connector,
  onDisconnectSuccess,
}: FederatedConnectorCardProps) {
  const [isDisconnecting, setIsDisconnecting] = useState(false);
  const [showDisconnectConfirmation, setShowDisconnectConfirmation] =
    useState(false);
  const sourceMetadata = getSourceMetadata(connector.source as ValidSources);

  const handleDisconnect = useCallback(async () => {
    setIsDisconnecting(true);
    try {
      const response = await fetch(
        `/api/federated/${connector.federated_connector_id}/oauth`,
        { method: "DELETE" }
      );

      if (response.ok) {
        toast.success("Disconnected successfully");
        setShowDisconnectConfirmation(false);
        onDisconnectSuccess();
      } else {
        throw new Error("Failed to disconnect");
      }
    } catch (error) {
      toast.error("Failed to disconnect");
    } finally {
      setIsDisconnecting(false);
    }
  }, [connector.federated_connector_id, onDisconnectSuccess]);

  return (
    <>
      {showDisconnectConfirmation && (
        <ConfirmationModalLayout
          icon={SvgUnplug}
          title={`Disconnect ${sourceMetadata.displayName}`}
          onClose={() => setShowDisconnectConfirmation(false)}
          submit={
            <Button
              disabled={isDisconnecting}
              variant="danger"
              onClick={() => void handleDisconnect()}
            >
              {isDisconnecting ? "Disconnecting..." : "Disconnect"}
            </Button>
          }
        >
          <Section gap={0.5} alignItems="start">
            <Text>
              Onyx will no longer be able to access or search content from your{" "}
              <Text className="!font-bold">{sourceMetadata.displayName}</Text>{" "}
              account.
            </Text>
            <Text>
              You can still continue existing sessions referencing{" "}
              {sourceMetadata.displayName} content.
            </Text>
          </Section>
        </ConfirmationModalLayout>
      )}

      <Card padding={0.5}>
        <ContentAction
          icon={sourceMetadata.icon}
          title={sourceMetadata.displayName}
          description={
            connector.has_oauth_token ? "Connected" : "Not connected"
          }
          sizePreset="main-content"
          variant="section"
          paddingVariant="sm"
          rightChildren={
            connector.has_oauth_token ? (
              <Button
                disabled={isDisconnecting}
                icon={SvgUnplug}
                prominence="tertiary"
                size="sm"
                onClick={() => setShowDisconnectConfirmation(true)}
              />
            ) : connector.authorize_url ? (
              <Button
                prominence="internal"
                href={connector.authorize_url}
                target="_blank"
                rightIcon={SvgArrowExchange}
              >
                Connect
              </Button>
            ) : undefined
          }
        />
      </Card>
    </>
  );
}

function ConnectorsSettings() {
  const {
    connectors: federatedConnectors,
    refetch: refetchFederatedConnectors,
  } = useFederatedOAuthStatus();
  const { ccPairs } = useCCPairs();

  const ACTIVE_STATUSES: ConnectorCredentialPairStatus[] = [
    ConnectorCredentialPairStatus.ACTIVE,
    ConnectorCredentialPairStatus.SCHEDULED,
    ConnectorCredentialPairStatus.INITIAL_INDEXING,
  ];

  // Group indexed connectors by source
  const groupedConnectors = ccPairs.reduce(
    (acc, ccPair) => {
      if (!acc[ccPair.source]) {
        acc[ccPair.source] = {
          source: ccPair.source,
          hasActiveConnector: false,
        };
      }
      if (ACTIVE_STATUSES.includes(ccPair.status)) {
        acc[ccPair.source]!.hasActiveConnector = true;
      }
      return acc;
    },
    {} as Record<
      string,
      {
        source: ValidSources;
        hasActiveConnector: boolean;
      }
    >
  );

  const hasConnectors =
    Object.keys(groupedConnectors).length > 0 || federatedConnectors.length > 0;

  return (
    <Section gap={2}>
      <Section gap={0.75} justifyContent="start">
        <Content
          title="Connectors"
          sizePreset="main-content"
          variant="section"
          widthVariant="full"
        />
        {hasConnectors ? (
          <>
            {/* Indexed Connectors */}
            {Object.values(groupedConnectors).map((connector) => (
              <IndexedConnectorCard
                key={connector.source}
                source={connector.source}
                isActive={connector.hasActiveConnector}
              />
            ))}

            {/* Federated Connectors */}
            {federatedConnectors.map((connector) => (
              <FederatedConnectorCard
                key={connector.federated_connector_id}
                connector={connector}
                onDisconnectSuccess={() => refetchFederatedConnectors?.()}
              />
            ))}
          </>
        ) : (
          <EmptyMessage title="No connectors set up for your organization." />
        )}
      </Section>
    </Section>
  );
}

export {
  GeneralSettings,
  ChatPreferencesSettings,
  AccountsAccessSettings,
  ConnectorsSettings,
};


================================================
FILE: web/src/refresh-pages/admin/AgentsPage/AgentRowActions.tsx
================================================
"use client";

import { useCallback, useState } from "react";
import { Button } from "@opal/components";
// TODO(@raunakab): migrate to Opal LineItemButton once it supports danger variant
import LineItem from "@/refresh-components/buttons/LineItem";
import { cn } from "@opal/utils";
import {
  SvgMoreHorizontal,
  SvgEdit,
  SvgEye,
  SvgEyeOff,
  SvgStar,
  SvgStarOff,
  SvgShare,
  SvgBarChart,
  SvgTrash,
} from "@opal/icons";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import Text from "@/refresh-components/texts/Text";
import { toast } from "@/hooks/useToast";
import { useRouter } from "next/navigation";
import {
  deleteAgent,
  toggleAgentFeatured,
  toggleAgentListed,
} from "@/refresh-pages/admin/AgentsPage/svc";
import type { AgentRow } from "@/refresh-pages/admin/AgentsPage/interfaces";
import type { Route } from "next";
import ShareAgentModal from "@/sections/modals/ShareAgentModal";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import { useAgent } from "@/hooks/useAgents";
import {
  updateAgentSharedStatus,
  updateAgentFeaturedStatus,
} from "@/lib/agents";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { useUser } from "@/providers/UserProvider";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface AgentRowActionsProps {
  agent: AgentRow;
  onMutate: () => void;
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

export default function AgentRowActions({
  agent,
  onMutate,
}: AgentRowActionsProps) {
  const router = useRouter();
  const { isAdmin, isCurator } = useUser();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const canUpdateFeaturedStatus = isAdmin || isCurator;
  const { agent: fullAgent, refresh: refreshAgent } = useAgent(agent.id);
  const shareModal = useCreateModal();

  const [popoverOpen, setPopoverOpen] = useState(false);
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [deleteOpen, setDeleteOpen] = useState(false);
  const [featuredOpen, setFeaturedOpen] = useState(false);
  const [unlistOpen, setUnlistOpen] = useState(false);

  async function handleAction(action: () => Promise<void>, close: () => void) {
    setIsSubmitting(true);
    try {
      await action();
      onMutate();
      toast.success(`${agent.name} updated successfully.`);
      close();
    } catch (err) {
      toast.error(err instanceof Error ? err.message : "An error occurred");
    } finally {
      setIsSubmitting(false);
    }
  }

  const handleShare = useCallback(
    async (
      userIds: string[],
      groupIds: number[],
      isPublic: boolean,
      isFeatured: boolean,
      labelIds: number[]
    ) => {
      const shareError = await updateAgentSharedStatus(
        agent.id,
        userIds,
        groupIds,
        isPublic,
        isPaidEnterpriseFeaturesEnabled,
        labelIds
      );

      if (shareError) {
        toast.error(`Failed to share agent: ${shareError}`);
        return;
      }

      if (canUpdateFeaturedStatus) {
        const featuredError = await updateAgentFeaturedStatus(
          agent.id,
          isFeatured
        );
        if (featuredError) {
          toast.error(`Failed to update featured status: ${featuredError}`);
          refreshAgent();
          return;
        }
      }

      refreshAgent();
      onMutate();
      shareModal.toggle(false);
    },
    [
      agent.id,
      isPaidEnterpriseFeaturesEnabled,
      canUpdateFeaturedStatus,
      refreshAgent,
      onMutate,
    ]
  );

  return (
    <>
      <shareModal.Provider>
        <ShareAgentModal
          agentId={agent.id}
          userIds={fullAgent?.users?.map((u) => u.id) ?? []}
          groupIds={fullAgent?.groups ?? []}
          isPublic={fullAgent?.is_public ?? false}
          isFeatured={fullAgent?.is_featured ?? false}
          labelIds={fullAgent?.labels?.map((l) => l.id) ?? []}
          onShare={handleShare}
        />
      </shareModal.Provider>

      <div className="flex items-center gap-0.5">
        {/* TODO(@raunakab): abstract a more standardized way of doing this
            opacity-on-hover animation. Making Hoverable more extensible
            (e.g. supporting table row groups) would let us use it here
            instead of raw Tailwind group-hover. */}
        {!agent.builtin_persona && (
          <div className="opacity-0 group-hover/row:opacity-100 transition-opacity">
            <Button
              prominence="tertiary"
              icon={SvgEdit}
              tooltip="Edit Agent"
              onClick={() =>
                router.push(
                  `/app/agents/edit/${
                    agent.id
                  }?u=${Date.now()}&admin=true` as Route
                )
              }
            />
          </div>
        )}
        {!agent.is_listed ? (
          <Button
            prominence="tertiary"
            icon={SvgEyeOff}
            tooltip="Re-list Agent"
            onClick={() =>
              handleAction(
                () => toggleAgentListed(agent.id, agent.is_listed),
                () => {}
              )
            }
          />
        ) : (
          <div
            className={cn(
              !agent.is_featured &&
                "opacity-0 group-hover/row:opacity-100 transition-opacity"
            )}
          >
            <Button
              prominence="tertiary"
              icon={SvgStar}
              interaction={featuredOpen ? "hover" : "rest"}
              tooltip={
                agent.is_featured ? "Remove Featured" : "Set as Featured"
              }
              onClick={() => {
                setPopoverOpen(false);
                setFeaturedOpen(true);
              }}
            />
          </div>
        )}

        {/* Overflow menu */}
        <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>
          <div
            className={cn(
              !popoverOpen &&
                "opacity-0 group-hover/row:opacity-100 transition-opacity"
            )}
          >
            <Popover.Trigger asChild>
              <Button prominence="tertiary" icon={SvgMoreHorizontal} />
            </Popover.Trigger>
          </div>
          <Popover.Content align="end" width="sm">
            <PopoverMenu>
              {[
                <LineItem
                  key="visibility"
                  icon={agent.is_listed ? SvgEyeOff : SvgEye}
                  onClick={() => {
                    setPopoverOpen(false);
                    if (agent.is_listed) {
                      setUnlistOpen(true);
                    } else {
                      handleAction(
                        () => toggleAgentListed(agent.id, agent.is_listed),
                        () => {}
                      );
                    }
                  }}
                >
                  {agent.is_listed ? "Unlist Agent" : "List Agent"}
                </LineItem>,
                <LineItem
                  key="share"
                  icon={SvgShare}
                  onClick={() => {
                    setPopoverOpen(false);
                    shareModal.toggle(true);
                  }}
                >
                  Share
                </LineItem>,
                isPaidEnterpriseFeaturesEnabled ? (
                  <LineItem
                    key="stats"
                    icon={SvgBarChart}
                    onClick={() => {
                      setPopoverOpen(false);
                      router.push(`/ee/agents/stats/${agent.id}` as Route);
                    }}
                  >
                    Stats
                  </LineItem>
                ) : undefined,
                !agent.builtin_persona ? null : undefined,
                !agent.builtin_persona ? (
                  <LineItem
                    key="delete"
                    icon={SvgTrash}
                    danger
                    onClick={() => {
                      setPopoverOpen(false);
                      setDeleteOpen(true);
                    }}
                  >
                    Delete
                  </LineItem>
                ) : undefined,
              ]}
            </PopoverMenu>
          </Popover.Content>
        </Popover>
      </div>

      {deleteOpen && (
        <ConfirmationModalLayout
          icon={SvgTrash}
          title="Delete Agent"
          onClose={isSubmitting ? undefined : () => setDeleteOpen(false)}
          submit={
            <Button
              disabled={isSubmitting}
              variant="danger"
              onClick={() => {
                handleAction(
                  () => deleteAgent(agent.id),
                  () => setDeleteOpen(false)
                );
              }}
            >
              Delete
            </Button>
          }
        >
          <Text as="p" text03>
            Are you sure you want to delete{" "}
            <Text as="span" text05>
              {agent.name}
            </Text>
            ? This action cannot be undone.
          </Text>
        </ConfirmationModalLayout>
      )}

      {featuredOpen && (
        <ConfirmationModalLayout
          icon={agent.is_featured ? SvgStarOff : SvgStar}
          title={
            agent.is_featured
              ? `Remove ${agent.name} from Featured`
              : `Feature ${agent.name}`
          }
          onClose={isSubmitting ? undefined : () => setFeaturedOpen(false)}
          submit={
            <Button
              disabled={isSubmitting}
              onClick={() => {
                handleAction(
                  () => toggleAgentFeatured(agent.id, agent.is_featured),
                  () => setFeaturedOpen(false)
                );
              }}
            >
              {agent.is_featured ? "Unfeature" : "Feature"}
            </Button>
          }
        >
          <div className="flex flex-col gap-2">
            <Text as="p" text03>
              {agent.is_featured
                ? `This will remove ${agent.name} from the featured section on top of the explore agents list. New users will no longer see it pinned to their sidebar, but existing pins are unaffected.`
                : "Featured agents appear at the top of the explore agents list and are automatically pinned to the sidebar for new users with access. Use this to highlight recommended agents across your organization."}
            </Text>
            <Text as="p" text03>
              This does not change who can access this agent.
            </Text>
          </div>
        </ConfirmationModalLayout>
      )}

      {unlistOpen && (
        <ConfirmationModalLayout
          icon={SvgEyeOff}
          title={`Unlist ${agent.name}`}
          onClose={isSubmitting ? undefined : () => setUnlistOpen(false)}
          submit={
            <Button
              disabled={isSubmitting}
              onClick={() => {
                handleAction(
                  () => toggleAgentListed(agent.id, agent.is_listed),
                  () => setUnlistOpen(false)
                );
              }}
            >
              Unlist
            </Button>
          }
        >
          <div className="flex flex-col gap-2">
            <Text as="p" text03>
              Unlisted agents don&apos;t appear in the explore agents list but
              remain accessible via direct link, and to users who have
              previously used or pinned them.
            </Text>
            <Text as="p" text03>
              This does not change who can access this agent.
            </Text>
          </div>
        </ConfirmationModalLayout>
      )}
    </>
  );
}


================================================
FILE: web/src/refresh-pages/admin/AgentsPage/AgentsTable.tsx
================================================
"use client";

import { useMemo, useState } from "react";
import { Table, createTableColumns } from "@opal/components";
import { Content, IllustrationContent } from "@opal/layouts";
import SvgNoResult from "@opal/illustrations/no-result";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import Text from "@/refresh-components/texts/Text";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import type { MinimalUserSnapshot } from "@/lib/types";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import type { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { useAdminPersonas } from "@/hooks/useAdminPersonas";
import { toast } from "@/hooks/useToast";
import AgentRowActions from "@/refresh-pages/admin/AgentsPage/AgentRowActions";
import { updateAgentDisplayPriorities } from "@/refresh-pages/admin/AgentsPage/svc";
import type { AgentRow } from "@/refresh-pages/admin/AgentsPage/interfaces";
import type { Persona } from "@/app/admin/agents/interfaces";
import { SvgUser } from "@opal/icons";

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

function toAgentRow(persona: Persona): AgentRow {
  return {
    id: persona.id,
    name: persona.name,
    description: persona.description,
    is_public: persona.is_public,
    is_listed: persona.is_listed,
    is_featured: persona.is_featured,
    builtin_persona: persona.builtin_persona,
    display_priority: persona.display_priority,
    owner: persona.owner,
    groups: persona.groups,
    users: persona.users,
    uploaded_image_id: persona.uploaded_image_id,
    icon_name: persona.icon_name,
  };
}

// ---------------------------------------------------------------------------
// Column renderers
// ---------------------------------------------------------------------------

function renderCreatedByColumn(
  _value: MinimalUserSnapshot | null,
  row: AgentRow
) {
  return (
    <Content
      sizePreset="main-ui"
      variant="section"
      icon={SvgUser}
      title={row.builtin_persona ? "System" : row.owner?.email ?? "\u2014"}
    />
  );
}

function getAccessTitle(row: AgentRow): string {
  if (row.is_public) return "Public";
  if (row.groups.length > 0 || row.users.length > 0) return "Shared";
  return "Private";
}

function renderAccessColumn(_isPublic: boolean, row: AgentRow) {
  return (
    <Content
      sizePreset="main-ui"
      variant="section"
      title={getAccessTitle(row)}
      description={
        !row.is_listed ? "Unlisted" : row.is_featured ? "Featured" : undefined
      }
    />
  );
}

// ---------------------------------------------------------------------------
// Columns
// ---------------------------------------------------------------------------

const tc = createTableColumns<AgentRow>();

function buildColumns(onMutate: () => void) {
  return [
    tc.qualifier({
      content: "icon",
      background: true,
      getContent: (row) => (props) => (
        <AgentAvatar
          agent={row as unknown as MinimalPersonaSnapshot}
          size={props.size}
        />
      ),
    }),
    tc.column("name", {
      header: "Name",
      weight: 25,
      cell: (value) => (
        <Text as="span" mainUiBody text05>
          {value}
        </Text>
      ),
    }),
    tc.column("description", {
      header: "Description",
      weight: 35,
      cell: (value) => (
        <Text as="span" mainUiBody text03>
          {value || "\u2014"}
        </Text>
      ),
    }),
    tc.column("owner", {
      header: "Created By",
      weight: 20,
      cell: renderCreatedByColumn,
    }),
    tc.column("is_public", {
      header: "Access",
      weight: 12,
      cell: renderAccessColumn,
    }),
    tc.actions({
      cell: (row) => <AgentRowActions agent={row} onMutate={onMutate} />,
    }),
  ];
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

const PAGE_SIZE = 10;

export default function AgentsTable() {
  const [searchTerm, setSearchTerm] = useState("");

  const { personas, isLoading, error, refresh } = useAdminPersonas();

  const columns = useMemo(() => buildColumns(refresh), [refresh]);

  const agentRows: AgentRow[] = useMemo(
    () => personas.filter((p) => !p.builtin_persona).map(toAgentRow),
    [personas]
  );

  const handleReorder = async (
    _orderedIds: string[],
    changedOrders: Record<string, number>
  ) => {
    try {
      await updateAgentDisplayPriorities(changedOrders);
      refresh();
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to update agent order"
      );
      refresh();
    }
  };

  if (isLoading) {
    return (
      <div className="flex justify-center py-12">
        <SimpleLoader className="h-6 w-6" />
      </div>
    );
  }

  if (error) {
    console.error("Failed to load agents:", error);
    return (
      <Text as="p" secondaryBody text03>
        Failed to load agents. Please try refreshing the page.
      </Text>
    );
  }

  return (
    <div className="flex flex-col gap-3">
      <InputTypeIn
        value={searchTerm}
        onChange={(e) => setSearchTerm(e.target.value)}
        placeholder="Search agents..."
        leftSearchIcon
      />
      <Table
        data={agentRows}
        columns={columns}
        getRowId={(row) => String(row.id)}
        pageSize={PAGE_SIZE}
        searchTerm={searchTerm}
        draggable={{
          onReorder: handleReorder,
        }}
        emptyState={
          <IllustrationContent
            illustration={SvgNoResult}
            title="No agents found"
            description="No agents match the current search."
          />
        }
        footer={{}}
      />
    </div>
  );
}


================================================
FILE: web/src/refresh-pages/admin/AgentsPage/interfaces.ts
================================================
import type { MinimalUserSnapshot } from "@/lib/types";

export interface AgentRow {
  id: number;
  name: string;
  description: string;
  is_public: boolean;
  is_listed: boolean;
  is_featured: boolean;
  builtin_persona: boolean;
  display_priority: number | null;
  owner: MinimalUserSnapshot | null;
  groups: number[];
  users: MinimalUserSnapshot[];
  uploaded_image_id?: string;
  icon_name?: string;
}


================================================
FILE: web/src/refresh-pages/admin/AgentsPage/svc.ts
================================================
async function parseErrorDetail(
  res: Response,
  fallback: string
): Promise<string> {
  try {
    const body = await res.json();
    return body?.detail ?? fallback;
  } catch (err) {
    console.error("Failed to parse error response:", err);
    return fallback;
  }
}

export async function deleteAgent(agentId: number): Promise<void> {
  const res = await fetch(`/api/persona/${agentId}`, {
    method: "DELETE",
    credentials: "include",
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to delete agent"));
  }
}

export async function toggleAgentFeatured(
  agentId: number,
  currentlyFeatured: boolean
): Promise<void> {
  const res = await fetch(`/api/admin/persona/${agentId}/featured`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ is_featured: !currentlyFeatured }),
    credentials: "include",
  });
  if (!res.ok) {
    throw new Error(
      await parseErrorDetail(res, "Failed to toggle featured status")
    );
  }
}

export async function toggleAgentListed(
  agentId: number,
  currentlyListed: boolean
): Promise<void> {
  const res = await fetch(`/api/admin/persona/${agentId}/listed`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ is_listed: !currentlyListed }),
    credentials: "include",
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to toggle visibility"));
  }
}

export async function updateAgentDisplayPriorities(
  displayPriorityMap: Record<string, number>
): Promise<void> {
  const res = await fetch("/api/admin/agents/display-priorities", {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ display_priority_map: displayPriorityMap }),
  });
  if (!res.ok) {
    throw new Error(
      await parseErrorDetail(res, "Failed to update agent order")
    );
  }
}


================================================
FILE: web/src/refresh-pages/admin/AgentsPage.tsx
================================================
"use client";

import { SvgOnyxOctagon, SvgPlus } from "@opal/icons";
import { Button } from "@opal/components";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import Link from "next/link";

import AgentsTable from "./AgentsPage/AgentsTable";

// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------

export default function AgentsPage() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        title="Agents"
        description="Customize AI behavior and knowledge with agents. Manage agents in your organization."
        icon={SvgOnyxOctagon}
        rightChildren={
          <Button href="/app/agents/create?admin=true" icon={SvgPlus}>
            New Agent
          </Button>
        }
      />
      <SettingsLayouts.Body>
        <AgentsTable />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ChatPreferencesPage.tsx
================================================
"use client";

import { markdown } from "@opal/utils";
import React, { useCallback, useRef, useState } from "react";
import { useRouter } from "next/navigation";
import { Formik, Form, useFormikContext } from "formik";
import useSWR, { mutate } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { errorHandlingFetcher } from "@/lib/fetcher";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import * as InputLayouts from "@/layouts/input-layouts";
import { Section } from "@/layouts/general-layouts";
import Card from "@/refresh-components/cards/Card";
import Separator from "@/refresh-components/Separator";
import SimpleCollapsible from "@/refresh-components/SimpleCollapsible";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import SwitchField from "@/refresh-components/form/SwitchField";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import InputTextAreaField from "@/refresh-components/form/InputTextAreaField";
import InputSelectField from "@/refresh-components/form/InputSelectField";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import {
  SvgAddLines,
  SvgActions,
  SvgExpand,
  SvgFold,
  SvgExternalLink,
  SvgAlertCircle,
  SvgRefreshCw,
} from "@opal/icons";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { Content } from "@opal/layouts";
import {
  useSettingsContext,
  useVectorDbEnabled,
} from "@/providers/SettingsProvider";
import useCCPairs from "@/hooks/useCCPairs";
import { getSourceMetadata } from "@/lib/sources";
import EmptyMessage from "@/refresh-components/EmptyMessage";
import { Settings } from "@/interfaces/settings";
import { toast } from "@/hooks/useToast";
import { useAvailableTools } from "@/hooks/useAvailableTools";
import {
  SEARCH_TOOL_ID,
  IMAGE_GENERATION_TOOL_ID,
  WEB_SEARCH_TOOL_ID,
  PYTHON_TOOL_ID,
  OPEN_URL_TOOL_ID,
} from "@/app/app/components/tools/constants";
import { Button, Text, Card as OpalCard } from "@opal/components";
import Modal from "@/refresh-components/Modal";
import Switch from "@/refresh-components/inputs/Switch";
import useMcpServersForAgentEditor from "@/hooks/useMcpServersForAgentEditor";
import useOpenApiTools from "@/hooks/useOpenApiTools";
import * as ExpandableCard from "@/layouts/expandable-card-layouts";
import * as ActionsLayouts from "@/layouts/actions-layouts";
import { getActionIcon } from "@/lib/tools/mcpUtils";
import { Disabled, Hoverable } from "@opal/core";
import IconButton from "@/refresh-components/buttons/IconButton";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import useFilter from "@/hooks/useFilter";
import { MCPServer } from "@/lib/tools/interfaces";
import type { IconProps } from "@opal/types";

const route = ADMIN_ROUTES.CHAT_PREFERENCES;

interface DefaultAgentConfiguration {
  tool_ids: number[];
  system_prompt: string | null;
  default_system_prompt: string;
}

interface ChatPreferencesFormValues {
  // Features
  search_ui_enabled: boolean;
  deep_research_enabled: boolean;
  auto_scroll: boolean;

  // Team context
  company_name: string;
  company_description: string;

  // Advanced
  maximum_chat_retention_days: string;
  anonymous_user_enabled: boolean;
  disable_default_assistant: boolean;

  // File limits
  user_file_max_upload_size_mb: string;
  file_token_count_threshold_k: string;
}

interface MCPServerCardTool {
  id: number;
  icon: React.FunctionComponent<IconProps>;
  name: string;
  description: string;
}

interface MCPServerCardProps {
  server: MCPServer;
  tools: MCPServerCardTool[];
  isToolEnabled: (toolDbId: number) => boolean;
  onToggleTool: (toolDbId: number, enabled: boolean) => void;
  onToggleTools: (toolDbIds: number[], enabled: boolean) => void;
}

function MCPServerCard({
  server,
  tools,
  isToolEnabled,
  onToggleTool,
  onToggleTools,
}: MCPServerCardProps) {
  const [isFolded, setIsFolded] = useState(true);
  const {
    query,
    setQuery,
    filtered: filteredTools,
  } = useFilter(tools, (tool) => `${tool.name} ${tool.description}`);

  const allToolIds = tools.map((t) => t.id);
  const serverEnabled =
    tools.length > 0 && tools.some((t) => isToolEnabled(t.id));
  const needsAuth = !server.is_authenticated;
  const authTooltip = needsAuth
    ? "Authenticate this MCP server before enabling its tools."
    : undefined;

  return (
    <ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>
      <ActionsLayouts.Header
        title={server.name}
        description={server.description}
        icon={getActionIcon(server.server_url, server.name)}
        rightChildren={
          <SimpleTooltip tooltip={authTooltip} side="top">
            <Switch
              checked={serverEnabled}
              onCheckedChange={(checked) => onToggleTools(allToolIds, checked)}
              disabled={needsAuth}
            />
          </SimpleTooltip>
        }
      >
        {tools.length > 0 && (
          <Section flexDirection="row" gap={0.5}>
            <InputTypeIn
              placeholder="Search tools..."
              variant="internal"
              leftSearchIcon
              value={query}
              onChange={(e) => setQuery(e.target.value)}
            />
            <Button
              rightIcon={isFolded ? SvgExpand : SvgFold}
              onClick={() => setIsFolded((prev) => !prev)}
              prominence="internal"
              size="lg"
            >
              {isFolded ? "Expand" : "Fold"}
            </Button>
          </Section>
        )}
      </ActionsLayouts.Header>
      {tools.length > 0 && filteredTools.length > 0 && (
        <ActionsLayouts.Content>
          <div className="flex flex-col gap-2">
            {filteredTools.map((tool) => (
              <ActionsLayouts.Tool
                key={tool.id}
                title={tool.name}
                description={tool.description}
                icon={tool.icon}
                rightChildren={
                  <SimpleTooltip tooltip={authTooltip} side="top">
                    <Switch
                      checked={isToolEnabled(tool.id)}
                      onCheckedChange={(checked) =>
                        onToggleTool(tool.id, checked)
                      }
                      disabled={needsAuth}
                    />
                  </SimpleTooltip>
                }
              />
            ))}
          </div>
        </ActionsLayouts.Content>
      )}
    </ExpandableCard.Root>
  );
}

type FileLimitFieldName =
  | "user_file_max_upload_size_mb"
  | "file_token_count_threshold_k";

interface NumericLimitFieldProps {
  name: FileLimitFieldName;
  defaultValue: string;
  saveSettings: (updates: Partial<Settings>) => Promise<void>;
  maxValue?: number;
  allowZero?: boolean;
}

function NumericLimitField({
  name,
  defaultValue,
  saveSettings,
  maxValue,
  allowZero = false,
}: NumericLimitFieldProps) {
  const { values, setFieldValue } =
    useFormikContext<ChatPreferencesFormValues>();
  const initialValue = useRef(values[name]);
  const restoringRef = useRef(false);
  const value = values[name];

  const parsed = parseInt(value, 10);
  const isOverMax =
    maxValue !== undefined && !isNaN(parsed) && parsed > maxValue;

  const handleRestore = () => {
    restoringRef.current = true;
    initialValue.current = defaultValue;
    void setFieldValue(name, defaultValue);
    void saveSettings({ [name]: parseInt(defaultValue, 10) });
  };

  const handleBlur = () => {
    // The restore button triggers a blur — skip since handleRestore already saved.
    if (restoringRef.current) {
      restoringRef.current = false;
      return;
    }

    const parsed = parseInt(value, 10);
    const isValid = !isNaN(parsed) && (allowZero ? parsed >= 0 : parsed > 0);

    // Revert invalid input (empty, NaN, negative).
    if (!isValid) {
      if (allowZero) {
        // Empty/invalid means "no limit" — persist 0 and clear the field.
        void setFieldValue(name, "");
        void saveSettings({ [name]: 0 });
        initialValue.current = "";
      } else {
        void setFieldValue(name, initialValue.current);
      }
      return;
    }

    // Block save when the value exceeds the hard ceiling.
    if (maxValue !== undefined && parsed > maxValue) {
      return;
    }

    // For allowZero fields, 0 means "no limit" — clear the display
    // so the "No limit" placeholder is visible, but still persist 0.
    if (allowZero && parsed === 0) {
      void setFieldValue(name, "");
      if (initialValue.current !== "") {
        void saveSettings({ [name]: 0 });
        initialValue.current = "";
      }
      return;
    }

    const normalizedDisplay = String(parsed);

    // Update the display to the canonical form (e.g. strip leading zeros).
    if (value !== normalizedDisplay) {
      void setFieldValue(name, normalizedDisplay);
    }

    // Persist only when the value actually changed.
    if (normalizedDisplay !== initialValue.current) {
      void saveSettings({ [name]: parsed });
      initialValue.current = normalizedDisplay;
    }
  };

  return (
    <Hoverable.Root group="numericLimit" widthVariant="full">
      <InputTypeInField
        name={name}
        inputMode="numeric"
        showClearButton={false}
        pattern="[0-9]*"
        placeholder={allowZero ? "No limit" : `Default: ${defaultValue}`}
        variant={isOverMax ? "error" : undefined}
        rightSection={
          (value || "") !== defaultValue ? (
            <Hoverable.Item group="numericLimit" variant="opacity-on-hover">
              <IconButton
                icon={SvgRefreshCw}
                tooltip="Restore default"
                internal
                type="button"
                onClick={handleRestore}
              />
            </Hoverable.Item>
          ) : undefined
        }
        onBlur={handleBlur}
      />
    </Hoverable.Root>
  );
}

interface FileSizeLimitFieldsProps {
  saveSettings: (updates: Partial<Settings>) => Promise<void>;
  defaultUploadSizeMb: string;
  defaultTokenThresholdK: string;
  maxAllowedUploadSizeMb?: number;
}

function FileSizeLimitFields({
  saveSettings,
  defaultUploadSizeMb,
  defaultTokenThresholdK,
  maxAllowedUploadSizeMb,
}: FileSizeLimitFieldsProps) {
  return (
    <div className="flex gap-4 w-full items-start">
      <div className="flex-1">
        <InputLayouts.Vertical
          title="File Size Limit (MB)"
          subDescription={
            maxAllowedUploadSizeMb
              ? `Max: ${maxAllowedUploadSizeMb} MB`
              : undefined
          }
          nonInteractive
        >
          <NumericLimitField
            name="user_file_max_upload_size_mb"
            defaultValue={defaultUploadSizeMb}
            saveSettings={saveSettings}
            maxValue={maxAllowedUploadSizeMb}
          />
        </InputLayouts.Vertical>
      </div>
      <div className="flex-1">
        <InputLayouts.Vertical
          title="File Token Limit (thousand tokens)"
          nonInteractive
        >
          <NumericLimitField
            name="file_token_count_threshold_k"
            defaultValue={defaultTokenThresholdK}
            saveSettings={saveSettings}
            allowZero
          />
        </InputLayouts.Vertical>
      </div>
    </div>
  );
}

/**
 * Inner form component that uses useFormikContext to access values
 * and create save handlers for settings fields.
 */
function ChatPreferencesForm() {
  const router = useRouter();
  const settings = useSettingsContext();
  const { values } = useFormikContext<ChatPreferencesFormValues>();

  // Track initial text values to avoid unnecessary saves on blur
  const initialCompanyName = useRef(values.company_name);
  const initialCompanyDescription = useRef(values.company_description);

  // Tools availability
  const { tools: availableTools } = useAvailableTools();
  const vectorDbEnabled = useVectorDbEnabled();

  const searchTool = availableTools.find(
    (t) => t.in_code_tool_id === SEARCH_TOOL_ID
  );
  const imageGenTool = availableTools.find(
    (t) => t.in_code_tool_id === IMAGE_GENERATION_TOOL_ID
  );
  const webSearchTool = availableTools.find(
    (t) => t.in_code_tool_id === WEB_SEARCH_TOOL_ID
  );
  const openURLTool = availableTools.find(
    (t) => t.in_code_tool_id === OPEN_URL_TOOL_ID
  );
  const codeInterpreterTool = availableTools.find(
    (t) => t.in_code_tool_id === PYTHON_TOOL_ID
  );

  // Connectors
  const { ccPairs } = useCCPairs();
  const uniqueSources = Array.from(new Set(ccPairs.map((p) => p.source)));

  // MCP servers and OpenAPI tools
  const { mcpData } = useMcpServersForAgentEditor();
  const { openApiTools: openApiToolsRaw } = useOpenApiTools();
  const mcpServers = mcpData?.mcp_servers ?? [];
  const openApiTools = openApiToolsRaw ?? [];

  const mcpServersWithTools = mcpServers.map((server) => ({
    server,
    tools: availableTools
      .filter((tool) => tool.mcp_server_id === server.id)
      .map((tool) => ({
        id: tool.id,
        icon: getActionIcon(server.server_url, server.name),
        name: tool.display_name || tool.name,
        description: tool.description,
      })),
  }));

  // Default agent configuration (system prompt)
  const { data: defaultAgentConfig, mutate: mutateDefaultAgent } =
    useSWR<DefaultAgentConfiguration>(
      SWR_KEYS.defaultAssistantConfig,
      errorHandlingFetcher
    );

  const enabledToolIds = defaultAgentConfig?.tool_ids ?? [];

  const isToolEnabled = useCallback(
    (toolDbId: number) => enabledToolIds.includes(toolDbId),
    [enabledToolIds]
  );

  const saveToolIds = useCallback(
    async (newToolIds: number[]) => {
      // Optimistic update so subsequent toggles read fresh state
      const optimisticData = defaultAgentConfig
        ? { ...defaultAgentConfig, tool_ids: newToolIds }
        : undefined;
      try {
        await mutateDefaultAgent(
          async () => {
            const response = await fetch("/api/admin/default-assistant", {
              method: "PATCH",
              headers: { "Content-Type": "application/json" },
              body: JSON.stringify({ tool_ids: newToolIds }),
            });
            if (!response.ok) {
              const errorMsg = (await response.json()).detail;
              throw new Error(errorMsg);
            }
            return optimisticData;
          },
          { optimisticData, revalidate: true }
        );
        toast.success("Tools updated");
      } catch {
        toast.error("Failed to update tools");
      }
    },
    [defaultAgentConfig, mutateDefaultAgent]
  );

  const toggleTool = useCallback(
    (toolDbId: number, enabled: boolean) => {
      const newToolIds = enabled
        ? [...enabledToolIds, toolDbId]
        : enabledToolIds.filter((id) => id !== toolDbId);
      void saveToolIds(newToolIds);
    },
    [enabledToolIds, saveToolIds]
  );

  const toggleTools = useCallback(
    (toolDbIds: number[], enabled: boolean) => {
      const idsSet = new Set(toolDbIds);
      const withoutIds = enabledToolIds.filter((id) => !idsSet.has(id));
      const newToolIds = enabled ? [...withoutIds, ...toolDbIds] : withoutIds;
      void saveToolIds(newToolIds);
    },
    [enabledToolIds, saveToolIds]
  );

  // System prompt modal state
  const [systemPromptModalOpen, setSystemPromptModalOpen] = useState(false);

  const saveSettings = useCallback(
    async (updates: Partial<Settings>) => {
      const currentSettings = settings?.settings;
      if (!currentSettings) return;

      const newSettings = { ...currentSettings, ...updates };

      try {
        const response = await fetch("/api/admin/settings", {
          method: "PUT",
          headers: { "Content-Type": "application/json" },
          body: JSON.stringify(newSettings),
        });

        if (!response.ok) {
          const errorMsg = (await response.json()).detail;
          throw new Error(errorMsg);
        }

        router.refresh();
        await mutate(SWR_KEYS.settings);
        toast.success("Settings updated");
      } catch (error) {
        toast.error("Failed to update settings");
      }
    },
    [settings, router]
  );

  return (
    <>
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={route.icon}
          title={route.title}
          description="Organization-wide chat settings and defaults. Users can override some of these in their personal settings."
          separator
        />

        <SettingsLayouts.Body>
          {/* Team Context */}
          <Section gap={1}>
            <InputLayouts.Vertical
              title="Team Name"
              subDescription="This is added to all chat sessions as additional context to provide a richer/customized experience."
            >
              <InputTypeInField
                name="company_name"
                placeholder="Enter team name"
                onBlur={() => {
                  if (values.company_name !== initialCompanyName.current) {
                    void saveSettings({
                      company_name: values.company_name || null,
                    });
                    initialCompanyName.current = values.company_name;
                  }
                }}
              />
            </InputLayouts.Vertical>

            <InputLayouts.Vertical
              title="Team Context"
              subDescription="Users can also provide additional individual context in their personal settings."
            >
              <InputTextAreaField
                name="company_description"
                placeholder="Describe your team and how Onyx should behave."
                rows={4}
                maxRows={10}
                autoResize
                onBlur={() => {
                  if (
                    values.company_description !==
                    initialCompanyDescription.current
                  ) {
                    void saveSettings({
                      company_description: values.company_description || null,
                    });
                    initialCompanyDescription.current =
                      values.company_description;
                  }
                }}
              />
            </InputLayouts.Vertical>
          </Section>

          <InputLayouts.Horizontal
            title="System Prompt"
            description="Base prompt for all chats, agents, and projects. Modify with caution: Significant changes may degrade response quality."
          >
            <Button
              prominence="tertiary"
              icon={SvgAddLines}
              onClick={() => setSystemPromptModalOpen(true)}
            >
              Modify Prompt
            </Button>
          </InputLayouts.Horizontal>

          <Separator noPadding />

          {/* Features */}
          <Section gap={0.75}>
            <Content
              title="Features"
              sizePreset="main-content"
              variant="section"
            />
            <Card>
              <SimpleTooltip
                tooltip={
                  uniqueSources.length === 0
                    ? "Set up connectors to use Search Mode"
                    : undefined
                }
                side="top"
              >
                <Disabled disabled={uniqueSources.length === 0} allowClick>
                  <div className="w-full">
                    <InputLayouts.Horizontal
                      title="Search Mode"
                      description="UI mode for quick document search across your organization."
                      disabled={uniqueSources.length === 0}
                    >
                      <SwitchField
                        name="search_ui_enabled"
                        onCheckedChange={(checked) => {
                          void saveSettings({ search_ui_enabled: checked });
                        }}
                        disabled={uniqueSources.length === 0}
                      />
                    </InputLayouts.Horizontal>
                  </div>
                </Disabled>
              </SimpleTooltip>
              <InputLayouts.Horizontal
                title="Deep Research"
                description="Agentic research system that works across the web and connected sources. Uses significantly more tokens per query."
              >
                <SwitchField
                  name="deep_research_enabled"
                  onCheckedChange={(checked) => {
                    void saveSettings({ deep_research_enabled: checked });
                  }}
                />
              </InputLayouts.Horizontal>
              <InputLayouts.Horizontal
                title="Chat Auto-Scroll"
                description="Automatically scroll to new content as chat generates response. Users can override this in their personal settings."
              >
                <SwitchField
                  name="auto_scroll"
                  onCheckedChange={(checked) => {
                    void saveSettings({ auto_scroll: checked });
                  }}
                />
              </InputLayouts.Horizontal>
            </Card>
          </Section>

          <Separator noPadding />

          <Disabled disabled={values.disable_default_assistant}>
            <div>
              <Section gap={1.5}>
                {/* Connectors */}
                <Section gap={0.75}>
                  <Content
                    title="Connectors"
                    sizePreset="main-content"
                    variant="section"
                  />

                  <Section
                    flexDirection="row"
                    justifyContent="between"
                    alignItems="center"
                    gap={0.25}
                  >
                    {uniqueSources.length === 0 ? (
                      <EmptyMessage title="No connectors set up" />
                    ) : (
                      <>
                        <Section
                          flexDirection="row"
                          justifyContent="start"
                          alignItems="center"
                          gap={0.25}
                        >
                          {uniqueSources.slice(0, 3).map((source) => {
                            const meta = getSourceMetadata(source);
                            return (
                              <Card
                                key={source}
                                padding={0.75}
                                className="w-[10rem]"
                              >
                                <Content
                                  icon={meta.icon}
                                  title={meta.displayName}
                                  sizePreset="main-ui"
                                />
                              </Card>
                            );
                          })}
                        </Section>

                        <Button
                          href="/admin/indexing/status"
                          prominence="tertiary"
                          rightIcon={SvgExternalLink}
                        >
                          Manage All
                        </Button>
                      </>
                    )}
                  </Section>
                </Section>

                {/* Actions & Tools */}
                <SimpleCollapsible>
                  <SimpleCollapsible.Header
                    title="Actions & Tools"
                    description="Tools and capabilities available for chat to use. This does not apply to agents."
                  />
                  <SimpleCollapsible.Content>
                    <Section gap={0.5}>
                      {vectorDbEnabled && searchTool && (
                        <Card>
                          <InputLayouts.Horizontal
                            title="Internal Search"
                            description="Search through your organization's connected knowledge base and documents."
                          >
                            <Switch
                              checked={isToolEnabled(searchTool.id)}
                              onCheckedChange={(checked) =>
                                void toggleTool(searchTool.id, checked)
                              }
                            />
                          </InputLayouts.Horizontal>
                        </Card>
                      )}

                      <SimpleTooltip
                        tooltip={
                          imageGenTool
                            ? undefined
                            : "Image generation requires a configured model. Set one up under Configuration > Image Generation, or ask an admin."
                        }
                        side="top"
                      >
                        <Card variant={imageGenTool ? undefined : "disabled"}>
                          <InputLayouts.Horizontal
                            title="Image Generation"
                            description="Generate and manipulate images using AI-powered tools."
                            disabled={!imageGenTool}
                          >
                            <Switch
                              checked={
                                imageGenTool
                                  ? isToolEnabled(imageGenTool.id)
                                  : false
                              }
                              onCheckedChange={(checked) =>
                                imageGenTool &&
                                void toggleTool(imageGenTool.id, checked)
                              }
                              disabled={!imageGenTool}
                            />
                          </InputLayouts.Horizontal>
                        </Card>
                      </SimpleTooltip>

                      <Card variant={webSearchTool ? undefined : "disabled"}>
                        <InputLayouts.Horizontal
                          title="Web Search"
                          description="Search the web for real-time information and up-to-date results."
                          disabled={!webSearchTool}
                        >
                          <Switch
                            checked={
                              webSearchTool
                                ? isToolEnabled(webSearchTool.id)
                                : false
                            }
                            onCheckedChange={(checked) =>
                              webSearchTool &&
                              void toggleTool(webSearchTool.id, checked)
                            }
                            disabled={!webSearchTool}
                          />
                        </InputLayouts.Horizontal>
                      </Card>

                      <Card variant={openURLTool ? undefined : "disabled"}>
                        <InputLayouts.Horizontal
                          title="Open URL"
                          description="Fetch and read content from web URLs."
                          disabled={!openURLTool}
                        >
                          <Switch
                            checked={
                              openURLTool
                                ? isToolEnabled(openURLTool.id)
                                : false
                            }
                            onCheckedChange={(checked) =>
                              openURLTool &&
                              void toggleTool(openURLTool.id, checked)
                            }
                            disabled={!openURLTool}
                          />
                        </InputLayouts.Horizontal>
                      </Card>

                      <Card
                        variant={codeInterpreterTool ? undefined : "disabled"}
                      >
                        <InputLayouts.Horizontal
                          title="Code Interpreter"
                          description="Generate and run code."
                          disabled={!codeInterpreterTool}
                        >
                          <Switch
                            checked={
                              codeInterpreterTool
                                ? isToolEnabled(codeInterpreterTool.id)
                                : false
                            }
                            onCheckedChange={(checked) =>
                              codeInterpreterTool &&
                              void toggleTool(codeInterpreterTool.id, checked)
                            }
                            disabled={!codeInterpreterTool}
                          />
                        </InputLayouts.Horizontal>
                      </Card>
                    </Section>

                    {/* Separator between built-in tools and MCP/OpenAPI tools */}
                    {(mcpServersWithTools.length > 0 ||
                      openApiTools.length > 0) && (
                      <Separator noPadding className="py-3" />
                    )}

                    {/* MCP Servers & OpenAPI Tools */}
                    <Section gap={0.5}>
                      {mcpServersWithTools.map(({ server, tools }) => (
                        <MCPServerCard
                          key={server.id}
                          server={server}
                          tools={tools}
                          isToolEnabled={isToolEnabled}
                          onToggleTool={toggleTool}
                          onToggleTools={toggleTools}
                        />
                      ))}
                      {openApiTools.map((tool) => (
                        <ExpandableCard.Root key={tool.id} defaultFolded>
                          <ActionsLayouts.Header
                            title={tool.display_name || tool.name}
                            description={tool.description}
                            icon={SvgActions}
                            rightChildren={
                              <Switch
                                checked={isToolEnabled(tool.id)}
                                onCheckedChange={(checked) =>
                                  toggleTool(tool.id, checked)
                                }
                              />
                            }
                          />
                        </ExpandableCard.Root>
                      ))}
                    </Section>
                  </SimpleCollapsible.Content>
                </SimpleCollapsible>
              </Section>
            </div>
          </Disabled>

          <Separator noPadding />

          {/* Advanced Options */}
          <SimpleCollapsible defaultOpen={false}>
            <SimpleCollapsible.Header title="Advanced Options" />
            <SimpleCollapsible.Content>
              <Section gap={1}>
                <Card>
                  <InputLayouts.Horizontal
                    title="Keep Chat History"
                    description="Specify how long Onyx should retain chats in your organization."
                  >
                    <InputSelectField
                      name="maximum_chat_retention_days"
                      onValueChange={(value) => {
                        void saveSettings({
                          maximum_chat_retention_days:
                            value === "forever" ? null : parseInt(value, 10),
                        });
                      }}
                    >
                      <InputSelect.Trigger />
                      <InputSelect.Content>
                        <InputSelect.Item value="forever">
                          Forever
                        </InputSelect.Item>
                        <InputSelect.Item value="7">7 days</InputSelect.Item>
                        <InputSelect.Item value="30">30 days</InputSelect.Item>
                        <InputSelect.Item value="90">90 days</InputSelect.Item>
                        <InputSelect.Item value="365">
                          365 days
                        </InputSelect.Item>
                      </InputSelect.Content>
                    </InputSelectField>
                  </InputLayouts.Horizontal>
                </Card>

                <Card>
                  <InputLayouts.Vertical
                    title="File Attachment Size Limit"
                    description="Files attached in chats and projects must fit within both limits to be accepted. Larger files increase latency, memory usage, and token costs."
                  >
                    <FileSizeLimitFields
                      saveSettings={saveSettings}
                      defaultUploadSizeMb={
                        settings?.settings.default_user_file_max_upload_size_mb?.toString() ??
                        "100"
                      }
                      defaultTokenThresholdK={
                        settings?.settings.default_file_token_count_threshold_k?.toString() ??
                        "200"
                      }
                      maxAllowedUploadSizeMb={
                        settings?.settings.max_allowed_upload_size_mb
                      }
                    />
                  </InputLayouts.Vertical>
                </Card>

                <Card>
                  <InputLayouts.Horizontal
                    title="Allow Anonymous Users"
                    description="Allow anyone to start chats without logging in. They do not see any other chats and cannot create agents or update settings."
                  >
                    <SwitchField
                      name="anonymous_user_enabled"
                      onCheckedChange={(checked) => {
                        void saveSettings({ anonymous_user_enabled: checked });
                      }}
                    />
                  </InputLayouts.Horizontal>

                  <InputLayouts.Horizontal
                    title="Always Start with an Agent"
                    description="This removes the default chat. Users will always start in an agent, and new chats will be created in their last active agent. Set featured agents to help new users get started."
                  >
                    <SwitchField
                      name="disable_default_assistant"
                      onCheckedChange={(checked) => {
                        void saveSettings({
                          disable_default_assistant: checked,
                        });
                      }}
                    />
                  </InputLayouts.Horizontal>
                </Card>
              </Section>
            </SimpleCollapsible.Content>
          </SimpleCollapsible>
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>

      <Modal
        open={systemPromptModalOpen}
        onOpenChange={setSystemPromptModalOpen}
      >
        <Modal.Content width="xl" height="fit">
          <Formik
            initialValues={{
              system_prompt:
                defaultAgentConfig?.system_prompt ??
                defaultAgentConfig?.default_system_prompt ??
                "",
            }}
            onSubmit={async ({ system_prompt }) => {
              try {
                const response = await fetch("/api/admin/default-assistant", {
                  method: "PATCH",
                  headers: { "Content-Type": "application/json" },
                  body: JSON.stringify({ system_prompt }),
                });
                if (!response.ok) {
                  const errorMsg = (await response.json()).detail;
                  throw new Error(errorMsg);
                }
                await mutateDefaultAgent();
                setSystemPromptModalOpen(false);
                toast.success("System prompt updated");
              } catch {
                toast.error("Failed to update system prompt");
              }
            }}
          >
            {({ dirty, isSubmitting, submitForm }) => (
              <Form>
                <Modal.Header
                  icon={SvgAddLines}
                  title="System Prompt"
                  description="This base prompt is prepended to all chats, agents, and projects."
                  onClose={() => setSystemPromptModalOpen(false)}
                />
                <Modal.Body>
                  <Section gap={0.25} alignItems="start">
                    <InputTextAreaField
                      name="system_prompt"
                      placeholder="Enter your system prompt..."
                      rows={8}
                      maxRows={20}
                      autoResize
                    />
                    <Text font="secondary-body" color="text-03">
                      {markdown(
                        "You can use the following placeholders in your prompt:\n`{{CURRENT_DATETIME}}` - Current date and day of the week in a human-readable format.\n`{{CITATION_GUIDANCE}}` - Instructions for providing citations when facts are retrieved from search tools.\nOnly included when search tools are used."
                      )}
                    </Text>
                  </Section>
                  <OpalCard background="none" border="solid" padding="sm">
                    <Content
                      sizePreset="main-ui"
                      icon={SvgAlertCircle}
                      title="Modify with caution."
                      description="System prompt affects all chats, agents, and projects. Significant changes may degrade response quality."
                    />
                  </OpalCard>
                </Modal.Body>
                <Modal.Footer>
                  <Button
                    prominence="secondary"
                    onClick={() => setSystemPromptModalOpen(false)}
                  >
                    Cancel
                  </Button>
                  <Button
                    prominence="primary"
                    onClick={submitForm}
                    disabled={!dirty || isSubmitting}
                  >
                    Save
                  </Button>
                </Modal.Footer>
              </Form>
            )}
          </Formik>
        </Modal.Content>
      </Modal>
    </>
  );
}

export default function ChatPreferencesPage() {
  const settings = useSettingsContext();

  const initialValues: ChatPreferencesFormValues = {
    // Features
    search_ui_enabled: settings.settings.search_ui_enabled ?? false,
    deep_research_enabled: settings.settings.deep_research_enabled ?? true,
    auto_scroll: settings.settings.auto_scroll ?? false,

    // Team context
    company_name: settings.settings.company_name ?? "",
    company_description: settings.settings.company_description ?? "",

    // Advanced
    maximum_chat_retention_days:
      settings.settings.maximum_chat_retention_days?.toString() ?? "forever",
    anonymous_user_enabled: settings.settings.anonymous_user_enabled ?? false,
    disable_default_assistant:
      settings.settings.disable_default_assistant ?? false,

    // File limits — for upload size: 0/null means "use default";
    // for token threshold: null means "use default", 0 means "no limit".
    user_file_max_upload_size_mb:
      (settings.settings.user_file_max_upload_size_mb ?? 0) <= 0
        ? settings.settings.default_user_file_max_upload_size_mb?.toString() ??
          "100"
        : settings.settings.user_file_max_upload_size_mb!.toString(),
    file_token_count_threshold_k:
      settings.settings.file_token_count_threshold_k == null
        ? settings.settings.default_file_token_count_threshold_k?.toString() ??
          "200"
        : settings.settings.file_token_count_threshold_k === 0
          ? ""
          : settings.settings.file_token_count_threshold_k.toString(),
  };

  return (
    <Formik
      initialValues={initialValues}
      onSubmit={() => {}}
      enableReinitialize
    >
      <Form className="h-full w-full">
        <ChatPreferencesForm />
      </Form>
    </Formik>
  );
}


================================================
FILE: web/src/refresh-pages/admin/CodeInterpreterPage/index.tsx
================================================
"use client";

import { useState } from "react";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import {
  SvgArrowExchange,
  SvgCheckCircle,
  SvgRefreshCw,
  SvgTerminal,
  SvgUnplug,
  SvgXOctagon,
} from "@opal/icons";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { Section } from "@/layouts/general-layouts";
import { Button, SelectCard } from "@opal/components";
import { CardHeaderLayout } from "@opal/layouts";
import { Disabled, Hoverable } from "@opal/core";
import Text from "@/refresh-components/texts/Text";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import useCodeInterpreter from "@/hooks/useCodeInterpreter";
import { updateCodeInterpreter } from "@/refresh-pages/admin/CodeInterpreterPage/svc";
import { toast } from "@/hooks/useToast";

const route = ADMIN_ROUTES.CODE_INTERPRETER;

// ---------------------------------------------------------------------------
// Sub-components
// ---------------------------------------------------------------------------

function CheckingStatus() {
  return (
    <Section
      flexDirection="row"
      justifyContent="end"
      alignItems="center"
      gap={0.25}
      padding={0.5}
    >
      <Text mainUiAction text03>
        Checking...
      </Text>
      <SimpleLoader />
    </Section>
  );
}

interface ConnectionStatusProps {
  healthy: boolean;
  isLoading: boolean;
}

function ConnectionStatus({ healthy, isLoading }: ConnectionStatusProps) {
  if (isLoading) {
    return <CheckingStatus />;
  }

  const label = healthy ? "Connected" : "Connection Lost";
  const Icon = healthy ? SvgCheckCircle : SvgXOctagon;
  const iconColor = healthy ? "text-status-success-05" : "text-status-error-05";

  return (
    <Section
      flexDirection="row"
      justifyContent="end"
      alignItems="center"
      gap={0.25}
      padding={0.5}
    >
      <Text mainUiAction text03>
        {label}
      </Text>
      <Icon size={16} className={iconColor} />
    </Section>
  );
}

// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------

export default function CodeInterpreterPage() {
  const { isHealthy, isEnabled, isLoading, refetch } = useCodeInterpreter();
  const [showDisconnectModal, setShowDisconnectModal] = useState(false);
  const [isReconnecting, setIsReconnecting] = useState(false);

  async function handleToggle(enabled: boolean) {
    const action = enabled ? "reconnect" : "disconnect";
    setIsReconnecting(enabled);
    try {
      const response = await updateCodeInterpreter({ enabled });
      if (!response.ok) {
        toast.error(`Failed to ${action} Code Interpreter`);
        return;
      }
      setShowDisconnectModal(false);
      refetch();
    } finally {
      setIsReconnecting(false);
    }
  }

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title={route.title}
        description="Safe and sandboxed Python runtime available to your LLM. See docs for more details."
        separator
      />

      <SettingsLayouts.Body>
        {isEnabled || isLoading ? (
          <Hoverable.Root group="code-interpreter/Card">
            <SelectCard state="filled" padding="sm" rounding="lg">
              <CardHeaderLayout
                sizePreset="main-ui"
                variant="section"
                icon={SvgTerminal}
                title="Code Interpreter"
                description="Built-in Python runtime"
                rightChildren={
                  <ConnectionStatus healthy={isHealthy} isLoading={isLoading} />
                }
                bottomRightChildren={
                  <Section
                    flexDirection="row"
                    justifyContent="end"
                    alignItems="center"
                    gap={0.25}
                    padding={0.25}
                  >
                    <Disabled disabled={isLoading}>
                      <Hoverable.Item group="code-interpreter/Card">
                        <Button
                          prominence="tertiary"
                          size="sm"
                          icon={SvgUnplug}
                          onClick={() => setShowDisconnectModal(true)}
                          tooltip="Disconnect"
                        />
                      </Hoverable.Item>
                    </Disabled>
                    <Button
                      disabled={isLoading}
                      prominence="tertiary"
                      size="sm"
                      icon={SvgRefreshCw}
                      onClick={refetch}
                      tooltip="Refresh"
                    />
                  </Section>
                }
              />
            </SelectCard>
          </Hoverable.Root>
        ) : (
          <SelectCard
            state="empty"
            padding="sm"
            rounding="lg"
            onClick={() => handleToggle(true)}
          >
            <CardHeaderLayout
              sizePreset="main-ui"
              variant="section"
              icon={SvgTerminal}
              title="Code Interpreter (Disconnected)"
              description="Built-in Python runtime"
              rightChildren={
                <Section flexDirection="row" alignItems="center" padding={0.5}>
                  {isReconnecting ? (
                    <CheckingStatus />
                  ) : (
                    <Button
                      prominence="tertiary"
                      rightIcon={SvgArrowExchange}
                      onClick={(e) => {
                        e.stopPropagation();
                        handleToggle(true);
                      }}
                    >
                      Reconnect
                    </Button>
                  )}
                </Section>
              }
            />
          </SelectCard>
        )}
      </SettingsLayouts.Body>

      {showDisconnectModal && (
        <ConfirmationModalLayout
          icon={SvgUnplug}
          title="Disconnect Code Interpreter"
          onClose={() => setShowDisconnectModal(false)}
          submit={
            <Button variant="danger" onClick={() => handleToggle(false)}>
              Disconnect
            </Button>
          }
        >
          <Text as="p" text03>
            All running sessions connected to{" "}
            <Text as="span" mainContentEmphasis text03>
              Code Interpreter
            </Text>{" "}
            will stop working. Note that this will not remove any data from your
            runtime. You can reconnect to this runtime later if needed.
          </Text>
        </ConfirmationModalLayout>
      )}
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/refresh-pages/admin/CodeInterpreterPage/svc.ts
================================================
const UPDATE_ENDPOINT = "/api/admin/code-interpreter";

interface CodeInterpreterUpdateRequest {
  enabled: boolean;
}

export async function updateCodeInterpreter(
  request: CodeInterpreterUpdateRequest
): Promise<Response> {
  return fetch(UPDATE_ENDPOINT, {
    method: "PUT",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(request),
  });
}


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/CreateGroupPage.tsx
================================================
"use client";

import { useMemo, useState } from "react";
import { useRouter } from "next/navigation";
import useSWR from "swr";
import { Table, Button } from "@opal/components";
import { IllustrationContent } from "@opal/layouts";
import { SvgUsers } from "@opal/icons";
import SvgNoResult from "@opal/illustrations/no-result";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { Section } from "@/layouts/general-layouts";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Text from "@/refresh-components/texts/Text";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import Separator from "@/refresh-components/Separator";
import { toast } from "@/hooks/useToast";
import { errorHandlingFetcher } from "@/lib/fetcher";
import useAdminUsers from "@/hooks/useAdminUsers";
import { SWR_KEYS } from "@/lib/swr-keys";
import type { ApiKeyDescriptor, MemberRow } from "./interfaces";
import {
  createGroup,
  updateAgentGroupSharing,
  updateDocSetGroupSharing,
  saveTokenLimits,
} from "./svc";
import { apiKeyToMemberRow, memberTableColumns, PAGE_SIZE } from "./shared";
import SharedGroupResources from "@/refresh-pages/admin/GroupsPage/SharedGroupResources";
import TokenLimitSection from "./TokenLimitSection";
import type { TokenLimit } from "./TokenLimitSection";

function CreateGroupPage() {
  const router = useRouter();
  const [groupName, setGroupName] = useState("");
  const [selectedUserIds, setSelectedUserIds] = useState<string[]>([]);
  const [searchTerm, setSearchTerm] = useState("");
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [selectedCcPairIds, setSelectedCcPairIds] = useState<number[]>([]);
  const [selectedDocSetIds, setSelectedDocSetIds] = useState<number[]>([]);
  const [selectedAgentIds, setSelectedAgentIds] = useState<number[]>([]);
  const [tokenLimits, setTokenLimits] = useState<TokenLimit[]>([
    { tokenBudget: null, periodHours: null },
  ]);

  const { users, isLoading: usersLoading, error: usersError } = useAdminUsers();

  const {
    data: apiKeys,
    isLoading: apiKeysLoading,
    error: apiKeysError,
  } = useSWR<ApiKeyDescriptor[]>(SWR_KEYS.adminApiKeys, errorHandlingFetcher);

  const isLoading = usersLoading || apiKeysLoading;
  const error = usersError ?? apiKeysError;

  const allRows: MemberRow[] = useMemo(() => {
    const activeUsers = users.filter((u) => u.is_active);
    const serviceAccountRows = (apiKeys ?? []).map(apiKeyToMemberRow);
    return [...activeUsers, ...serviceAccountRows];
  }, [users, apiKeys]);

  async function handleCreate() {
    const trimmed = groupName.trim();
    if (!trimmed) {
      toast.error("Group name is required");
      return;
    }

    setIsSubmitting(true);
    try {
      const groupId = await createGroup(
        trimmed,
        selectedUserIds,
        selectedCcPairIds
      );
      await updateAgentGroupSharing(groupId, [], selectedAgentIds);
      await updateDocSetGroupSharing(groupId, [], selectedDocSetIds);
      await saveTokenLimits(groupId, tokenLimits, []);
      toast.success(`Group "${trimmed}" created`);
      router.push("/admin/groups");
    } catch (e) {
      toast.error(e instanceof Error ? e.message : "Failed to create group");
    } finally {
      setIsSubmitting(false);
    }
  }

  const headerActions = (
    <Section flexDirection="row" gap={0.5} width="auto" height="auto">
      <Button
        prominence="secondary"
        onClick={() => router.push("/admin/groups")}
      >
        Cancel
      </Button>
      <Button
        onClick={handleCreate}
        disabled={!groupName.trim() || isSubmitting}
      >
        Create
      </Button>
    </Section>
  );

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={SvgUsers}
        title="Create Group"
        separator
        rightChildren={headerActions}
      />

      <SettingsLayouts.Body>
        {/* Group Name */}
        <Section
          gap={0.5}
          height="auto"
          alignItems="stretch"
          justifyContent="start"
        >
          <Text mainUiBody text04>
            Group Name
          </Text>
          <InputTypeIn
            placeholder="Name your group"
            value={groupName}
            onChange={(e) => setGroupName(e.target.value)}
          />
        </Section>

        <Separator noPadding />

        {/* Members table */}
        {isLoading && <SimpleLoader />}

        {error && (
          <Text as="p" secondaryBody text03>
            Failed to load users.
          </Text>
        )}

        {!isLoading && !error && (
          <Section
            gap={0.75}
            height="auto"
            alignItems="stretch"
            justifyContent="start"
          >
            <InputTypeIn
              value={searchTerm}
              onChange={(e) => setSearchTerm(e.target.value)}
              placeholder="Search users and accounts..."
              leftSearchIcon
            />
            <Table
              data={allRows}
              columns={memberTableColumns}
              getRowId={(row) => row.id ?? row.email}
              pageSize={PAGE_SIZE}
              searchTerm={searchTerm}
              selectionBehavior="multi-select"
              onSelectionChange={setSelectedUserIds}
              footer={{}}
              emptyState={
                <IllustrationContent
                  illustration={SvgNoResult}
                  title="No users found"
                  description="No users match your search."
                />
              }
            />
          </Section>
        )}
        <SharedGroupResources
          selectedCcPairIds={selectedCcPairIds}
          onCcPairIdsChange={setSelectedCcPairIds}
          selectedDocSetIds={selectedDocSetIds}
          onDocSetIdsChange={setSelectedDocSetIds}
          selectedAgentIds={selectedAgentIds}
          onAgentIdsChange={setSelectedAgentIds}
        />

        <TokenLimitSection
          limits={tokenLimits}
          onLimitsChange={setTokenLimits}
        />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}

export default CreateGroupPage;


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/EditGroupPage.tsx
================================================
"use client";

import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { useRouter } from "next/navigation";
import useSWR, { useSWRConfig } from "swr";
import { Table, Button } from "@opal/components";
import { IllustrationContent } from "@opal/layouts";
import { SvgUsers, SvgTrash, SvgMinusCircle, SvgPlusCircle } from "@opal/icons";
import IconButton from "@/refresh-components/buttons/IconButton";
import Card from "@/refresh-components/cards/Card";
import * as InputLayouts from "@/layouts/input-layouts";
import SvgNoResult from "@opal/illustrations/no-result";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { Section } from "@/layouts/general-layouts";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Text from "@/refresh-components/texts/Text";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import Separator from "@/refresh-components/Separator";
import { toast } from "@/hooks/useToast";
import { errorHandlingFetcher } from "@/lib/fetcher";
import useAdminUsers from "@/hooks/useAdminUsers";
import type { UserGroup } from "@/lib/types";
import type {
  ApiKeyDescriptor,
  MemberRow,
  TokenRateLimitDisplay,
} from "./interfaces";
import {
  apiKeyToMemberRow,
  baseColumns,
  memberTableColumns,
  tc,
  PAGE_SIZE,
} from "./shared";
import {
  renameGroup,
  updateGroup,
  deleteGroup,
  updateAgentGroupSharing,
  updateDocSetGroupSharing,
  saveTokenLimits,
} from "./svc";
import { SWR_KEYS } from "@/lib/swr-keys";
import SharedGroupResources from "@/refresh-pages/admin/GroupsPage/SharedGroupResources";
import TokenLimitSection from "./TokenLimitSection";
import type { TokenLimit } from "./TokenLimitSection";

const addModeColumns = memberTableColumns;

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

interface EditGroupPageProps {
  groupId: number;
}

function EditGroupPage({ groupId }: EditGroupPageProps) {
  const router = useRouter();
  const { mutate } = useSWRConfig();

  // Fetch the group data — poll every 5s while syncing so the UI updates
  // automatically when the backend finishes processing the previous edit.
  const {
    data: groups,
    isLoading: groupLoading,
    error: groupError,
  } = useSWR<UserGroup[]>(SWR_KEYS.adminUserGroups, errorHandlingFetcher, {
    refreshInterval: (latestData) => {
      const g = latestData?.find((g) => g.id === groupId);
      return g && !g.is_up_to_date ? 5000 : 0;
    },
  });

  const group = useMemo(
    () => groups?.find((g) => g.id === groupId) ?? null,
    [groups, groupId]
  );

  const isSyncing = group != null && !group.is_up_to_date;

  // Fetch token rate limits for this group
  const { data: tokenRateLimits, isLoading: tokenLimitsLoading } = useSWR<
    TokenRateLimitDisplay[]
  >(SWR_KEYS.userGroupTokenRateLimit(groupId), errorHandlingFetcher);

  // Form state
  const [groupName, setGroupName] = useState("");
  const [selectedUserIds, setSelectedUserIds] = useState<string[]>([]);
  const [searchTerm, setSearchTerm] = useState("");
  const [isSubmitting, setIsSubmitting] = useState(false);
  const isSubmittingRef = useRef(false);
  const [selectedCcPairIds, setSelectedCcPairIds] = useState<number[]>([]);
  const [selectedDocSetIds, setSelectedDocSetIds] = useState<number[]>([]);
  const [selectedAgentIds, setSelectedAgentIds] = useState<number[]>([]);
  const [tokenLimits, setTokenLimits] = useState<TokenLimit[]>([
    { tokenBudget: null, periodHours: null },
  ]);
  const [showDeleteModal, setShowDeleteModal] = useState(false);
  const [isDeleting, setIsDeleting] = useState(false);
  const [initialized, setInitialized] = useState(false);
  const [isAddingMembers, setIsAddingMembers] = useState(false);
  const initialAgentIdsRef = useRef<number[]>([]);
  const initialDocSetIdsRef = useRef<number[]>([]);

  // Users and API keys
  const { users, isLoading: usersLoading, error: usersError } = useAdminUsers();

  const {
    data: apiKeys,
    isLoading: apiKeysLoading,
    error: apiKeysError,
  } = useSWR<ApiKeyDescriptor[]>(SWR_KEYS.adminApiKeys, errorHandlingFetcher);

  const isLoading =
    groupLoading || usersLoading || apiKeysLoading || tokenLimitsLoading;
  const error = groupError ?? usersError ?? apiKeysError;

  // Pre-populate form when group data loads
  useEffect(() => {
    if (group && !initialized) {
      setGroupName(group.name);
      setSelectedUserIds(group.users.map((u) => u.id));
      setSelectedCcPairIds(group.cc_pairs.map((cc) => cc.id));
      const docSetIds = group.document_sets.map((ds) => ds.id);
      setSelectedDocSetIds(docSetIds);
      initialDocSetIdsRef.current = docSetIds;
      const agentIds = group.personas.map((p) => p.id);
      setSelectedAgentIds(agentIds);
      initialAgentIdsRef.current = agentIds;
      setInitialized(true);
    }
  }, [group, initialized]);

  // Pre-populate token limits when fetched
  useEffect(() => {
    if (tokenRateLimits && tokenRateLimits.length > 0) {
      setTokenLimits(
        tokenRateLimits.map((trl) => ({
          tokenBudget: trl.token_budget,
          periodHours: trl.period_hours,
        }))
      );
    }
  }, [tokenRateLimits]);

  const allRows = useMemo(() => {
    const activeUsers = users.filter((u) => u.is_active);
    const serviceAccountRows = (apiKeys ?? []).map(apiKeyToMemberRow);
    return [...activeUsers, ...serviceAccountRows];
  }, [users, apiKeys]);

  const memberRows = useMemo(() => {
    const selected = new Set(selectedUserIds);
    return allRows.filter((r) => selected.has(r.id ?? r.email));
  }, [allRows, selectedUserIds]);

  const currentRowSelection = useMemo(() => {
    const sel: Record<string, boolean> = {};
    for (const id of selectedUserIds) sel[id] = true;
    return sel;
  }, [selectedUserIds]);

  const handleRemoveMember = useCallback((userId: string) => {
    setSelectedUserIds((prev) => prev.filter((id) => id !== userId));
  }, []);

  const memberColumns = useMemo(
    () => [
      ...baseColumns,
      tc.actions({
        showSorting: false,
        showColumnVisibility: false,
        cell: (row: MemberRow) => (
          <IconButton
            icon={SvgMinusCircle}
            tertiary
            onClick={(e) => {
              e.stopPropagation();
              handleRemoveMember(row.id ?? row.email);
            }}
          />
        ),
      }),
    ],
    [handleRemoveMember]
  );

  // IDs of members not visible in the add-mode table (e.g. inactive users).
  // We preserve these so they aren't silently removed when the table fires
  // onSelectionChange with only the visible rows.
  const hiddenMemberIds = useMemo(() => {
    const visibleIds = new Set(allRows.map((r) => r.id ?? r.email));
    return selectedUserIds.filter((id) => !visibleIds.has(id));
  }, [allRows, selectedUserIds]);

  // Guard onSelectionChange: ignore updates until the form is fully initialized.
  // Without this, TanStack fires onSelectionChange before all rows are loaded,
  // which overwrites selectedUserIds with a partial set.
  const handleSelectionChange = useCallback(
    (ids: string[]) => {
      if (!initialized) return;
      setSelectedUserIds([...ids, ...hiddenMemberIds]);
    },
    [initialized, hiddenMemberIds]
  );

  async function handleSave() {
    if (isSubmittingRef.current) return;

    const trimmed = groupName.trim();
    if (!trimmed) {
      toast.error("Group name is required");
      return;
    }

    // Re-fetch group to check sync status before saving
    const freshGroups = await fetch(SWR_KEYS.adminUserGroups).then((r) =>
      r.json()
    );
    const freshGroup = freshGroups.find((g: UserGroup) => g.id === groupId);
    if (freshGroup && !freshGroup.is_up_to_date) {
      toast.error(
        "This group is currently syncing. Please wait a moment and try again."
      );
      return;
    }

    isSubmittingRef.current = true;
    setIsSubmitting(true);
    try {
      // Rename if name changed
      if (group && trimmed !== group.name) {
        await renameGroup(group.id, trimmed);
      }

      // Update members and cc_pairs
      await updateGroup(groupId, selectedUserIds, selectedCcPairIds);

      // Update agent sharing (add/remove this group from changed agents)
      await updateAgentGroupSharing(
        groupId,
        initialAgentIdsRef.current,
        selectedAgentIds
      );

      // Update document set sharing (add/remove this group from changed doc sets)
      await updateDocSetGroupSharing(
        groupId,
        initialDocSetIdsRef.current,
        selectedDocSetIds
      );

      // Save token rate limits (create/update/delete)
      await saveTokenLimits(groupId, tokenLimits, tokenRateLimits ?? []);

      // Update refs so subsequent saves diff correctly
      initialAgentIdsRef.current = selectedAgentIds;
      initialDocSetIdsRef.current = selectedDocSetIds;

      mutate(SWR_KEYS.adminUserGroups);
      mutate(SWR_KEYS.userGroupTokenRateLimit(groupId));
      toast.success(`Group "${trimmed}" updated`);
      router.push("/admin/groups");
    } catch (e) {
      toast.error(e instanceof Error ? e.message : "Failed to update group");
    } finally {
      isSubmittingRef.current = false;
      setIsSubmitting(false);
    }
  }

  async function handleDelete() {
    setIsDeleting(true);
    try {
      await deleteGroup(groupId);
      mutate(SWR_KEYS.adminUserGroups);
      toast.success(`Group "${group?.name}" deleted`);
      router.push("/admin/groups");
    } catch (e) {
      toast.error(e instanceof Error ? e.message : "Failed to delete group");
    } finally {
      setIsDeleting(false);
      setShowDeleteModal(false);
    }
  }

  // 404 state
  if (!isLoading && !error && !group) {
    return (
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={SvgUsers}
          title="Group Not Found"
          separator
        />
        <SettingsLayouts.Body>
          <IllustrationContent
            illustration={SvgNoResult}
            title="Group not found"
            description="This group doesn't exist or may have been deleted."
          />
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>
    );
  }

  const headerActions = (
    <Section flexDirection="row" gap={0.5} width="auto" height="auto">
      <Button
        prominence="secondary"
        onClick={() => router.push("/admin/groups")}
      >
        Cancel
      </Button>
      <Button
        onClick={handleSave}
        disabled={!groupName.trim() || isSubmitting || isSyncing}
        tooltip={
          isSyncing
            ? "Document embeddings are being updated due to recent changes to this group."
            : undefined
        }
      >
        {isSubmitting ? "Saving..." : isSyncing ? "Syncing..." : "Save Changes"}
      </Button>
    </Section>
  );

  return (
    <>
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={SvgUsers}
          title="Edit Group"
          separator
          rightChildren={headerActions}
        />

        <SettingsLayouts.Body>
          {isLoading && <SimpleLoader />}

          {error && (
            <Text as="p" secondaryBody text03>
              Failed to load group data.
            </Text>
          )}

          {!isLoading && !error && group && (
            <>
              {/* Group Name */}
              <Section
                gap={0.5}
                height="auto"
                alignItems="stretch"
                justifyContent="start"
              >
                <Text mainUiBody text04>
                  Group Name
                </Text>
                <InputTypeIn
                  placeholder="Name your group"
                  value={groupName}
                  onChange={(e) => setGroupName(e.target.value)}
                />
              </Section>

              <Separator noPadding />

              {/* Members table */}
              <Section
                gap={0.75}
                height="auto"
                alignItems="stretch"
                justifyContent="start"
              >
                <Section
                  flexDirection="row"
                  gap={0.5}
                  height="auto"
                  alignItems="center"
                  justifyContent="start"
                >
                  <InputTypeIn
                    value={searchTerm}
                    onChange={(e) => setSearchTerm(e.target.value)}
                    placeholder={
                      isAddingMembers
                        ? "Search users and accounts..."
                        : "Search members..."
                    }
                    leftSearchIcon
                    className="flex-1"
                  />
                  {isAddingMembers ? (
                    <Button
                      prominence="secondary"
                      onClick={() => setIsAddingMembers(false)}
                    >
                      Done
                    </Button>
                  ) : (
                    <Button
                      prominence="tertiary"
                      icon={SvgPlusCircle}
                      onClick={() => setIsAddingMembers(true)}
                    >
                      Add
                    </Button>
                  )}
                </Section>

                {isAddingMembers ? (
                  <Table
                    key="add-members"
                    data={allRows as MemberRow[]}
                    columns={addModeColumns}
                    getRowId={(row) => row.id ?? row.email}
                    pageSize={PAGE_SIZE}
                    searchTerm={searchTerm}
                    selectionBehavior="multi-select"
                    initialRowSelection={currentRowSelection}
                    onSelectionChange={handleSelectionChange}
                    footer={{}}
                    emptyState={
                      <IllustrationContent
                        illustration={SvgNoResult}
                        title="No users found"
                        description="No users match your search."
                      />
                    }
                  />
                ) : (
                  <Table
                    data={memberRows}
                    columns={memberColumns}
                    getRowId={(row) => row.id ?? row.email}
                    pageSize={PAGE_SIZE}
                    searchTerm={searchTerm}
                    footer={{}}
                    emptyState={
                      <IllustrationContent
                        illustration={SvgNoResult}
                        title="No members"
                        description="Add members to this group."
                      />
                    }
                  />
                )}
              </Section>

              <SharedGroupResources
                selectedCcPairIds={selectedCcPairIds}
                onCcPairIdsChange={setSelectedCcPairIds}
                selectedDocSetIds={selectedDocSetIds}
                onDocSetIdsChange={setSelectedDocSetIds}
                selectedAgentIds={selectedAgentIds}
                onAgentIdsChange={setSelectedAgentIds}
              />

              <TokenLimitSection
                limits={tokenLimits}
                onLimitsChange={setTokenLimits}
              />

              {/* Delete This Group */}
              <Card>
                <InputLayouts.Horizontal
                  title="Delete This Group"
                  description="Members will lose access to any resources shared with this group."
                  center
                  nonInteractive
                >
                  <Button
                    variant="danger"
                    prominence="secondary"
                    icon={SvgTrash}
                    onClick={() => setShowDeleteModal(true)}
                  >
                    Delete Group
                  </Button>
                </InputLayouts.Horizontal>
              </Card>
            </>
          )}
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>

      {showDeleteModal && (
        <ConfirmationModalLayout
          icon={SvgTrash}
          title="Delete Group"
          onClose={() => setShowDeleteModal(false)}
          submit={
            <Button
              variant="danger"
              onClick={handleDelete}
              disabled={isDeleting}
            >
              {isDeleting ? "Deleting..." : "Delete"}
            </Button>
          }
        >
          <Text as="p" text03>
            Members of group{" "}
            <Text as="span" text05>
              {group?.name}
            </Text>{" "}
            will lose access to any resources shared with this group, unless
            they have been granted access directly. Deletion cannot be undone.
          </Text>
        </ConfirmationModalLayout>
      )}
    </>
  );
}

export default EditGroupPage;


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/GroupCard.tsx
================================================
"use client";

import type { Route } from "next";
import { useRouter } from "next/navigation";
import type { UserGroup } from "@/lib/types";
import { SvgChevronRight, SvgUserManage, SvgUsers } from "@opal/icons";
import { ContentAction } from "@opal/layouts";
import { Section } from "@/layouts/general-layouts";
import Card from "@/refresh-components/cards/Card";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import {
  isBuiltInGroup,
  buildGroupDescription,
  formatMemberCount,
} from "./utils";
import { renameGroup } from "./svc";
import { toast } from "@/hooks/useToast";
import { useSWRConfig } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";

interface GroupCardProps {
  group: UserGroup;
}

function GroupCard({ group }: GroupCardProps) {
  const router = useRouter();
  const { mutate } = useSWRConfig();
  const builtIn = isBuiltInGroup(group);
  const isAdmin = group.name === "Admin";
  const isBasic = group.name === "Basic";
  const isSyncing = !group.is_up_to_date;

  async function handleRename(newName: string) {
    try {
      await renameGroup(group.id, newName);
      mutate(SWR_KEYS.adminUserGroups);
      toast.success(`Group renamed to "${newName}"`);
    } catch (e) {
      console.error("Failed to rename group:", e);
      toast.error(e instanceof Error ? e.message : "Failed to rename group");
    }
  }

  return (
    <Card padding={0.5} data-card>
      <ContentAction
        icon={isAdmin ? SvgUserManage : SvgUsers}
        title={group.name}
        description={buildGroupDescription(group)}
        sizePreset="main-content"
        variant="section"
        tag={isBasic ? { title: "Default" } : undefined}
        editable={!builtIn && !isSyncing}
        onTitleChange={!builtIn && !isSyncing ? handleRename : undefined}
        rightChildren={
          <Section flexDirection="row" alignItems="start" gap={0}>
            <div className="py-1">
              <Text mainUiBody text03>
                {formatMemberCount(
                  group.users.filter((u) => u.is_active).length
                )}
              </Text>
            </div>
            <Button
              icon={SvgChevronRight}
              prominence="tertiary"
              tooltip="View group"
              aria-label="View group"
              onClick={() => router.push(`/admin/groups/${group.id}` as Route)}
            />
          </Section>
        }
      />
    </Card>
  );
}

export default GroupCard;


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/GroupsList.tsx
================================================
"use client";

import { useMemo } from "react";
import type { UserGroup } from "@/lib/types";
import Separator from "@/refresh-components/Separator";
import GroupCard from "./GroupCard";
import { isBuiltInGroup } from "./utils";
import { Section } from "@/layouts/general-layouts";
import { IllustrationContent } from "@opal/layouts";
import SvgNoResult from "@opal/illustrations/no-result";

interface GroupsListProps {
  groups: UserGroup[];
  searchQuery: string;
}

function GroupsList({ groups, searchQuery }: GroupsListProps) {
  const filtered = useMemo(() => {
    if (!searchQuery.trim()) return groups;
    const q = searchQuery.toLowerCase();
    return groups.filter((g) => g.name.toLowerCase().includes(q));
  }, [groups, searchQuery]);

  if (filtered.length === 0) {
    return (
      <IllustrationContent
        illustration={SvgNoResult}
        title="No groups found"
        description={`No groups matching "${searchQuery}"`}
      />
    );
  }

  const builtInGroups = filtered.filter(isBuiltInGroup);
  const customGroups = filtered.filter((g) => !isBuiltInGroup(g));

  return (
    <Section flexDirection="column" gap={0.5}>
      {builtInGroups.map((group) => (
        <GroupCard key={group.id} group={group} />
      ))}

      {builtInGroups.length > 0 && customGroups.length > 0 && (
        <Separator paddingYRem={0.5} />
      )}

      {customGroups.map((group) => (
        <GroupCard key={group.id} group={group} />
      ))}
    </Section>
  );
}

export default GroupsList;


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/ResourceContent.tsx
================================================
"use client";

import type { ReactNode } from "react";
import { SvgX } from "@opal/icons";
import type { IconFunctionComponent } from "@opal/types";
import { Content } from "@opal/layouts";
import IconButton from "@/refresh-components/buttons/IconButton";

interface ResourceContentProps {
  /** SVG icon for connectors/doc sets. */
  icon?: IconFunctionComponent;
  /** Custom ReactNode icon (e.g. AgentAvatar). Takes priority over `icon`. */
  leftContent?: ReactNode;
  title: string;
  description?: string;
  /** Inline info rendered after description (e.g. source icon stack). */
  infoContent?: ReactNode;
  onRemove: () => void;
}

function ResourceContent({
  icon,
  leftContent,
  title,
  description,
  infoContent,
  onRemove,
}: ResourceContentProps) {
  return (
    <div className="flex flex-1 gap-0.5 items-start p-1.5 rounded-08 bg-background-tint-01 min-w-[240px] max-w-[302px]">
      <div className="flex flex-1 gap-1 p-0.5 items-center min-w-0">
        {leftContent ? (
          <>
            {leftContent}
            <div className="flex-1 min-w-0">
              <Content
                title={title}
                description={description}
                sizePreset="main-ui"
                variant="section"
              />
            </div>
          </>
        ) : (
          <div className="flex-1 min-w-0">
            <Content
              icon={icon}
              title={title}
              description={description}
              sizePreset="main-ui"
              variant="section"
            />
          </div>
        )}
      </div>
      {infoContent}
      <IconButton small icon={SvgX} onClick={onRemove} className="shrink-0" />
    </div>
  );
}

export default ResourceContent;


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/ResourcePopover.tsx
================================================
"use client";

import { useState } from "react";
import { SvgEmpty } from "@opal/icons";
import { Content } from "@opal/layouts";
import { Section } from "@/layouts/general-layouts";
import Popover from "@/refresh-components/Popover";
import Separator from "@/refresh-components/Separator";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import type { ResourcePopoverProps } from "@/refresh-pages/admin/GroupsPage/SharedGroupResources/interfaces";

function ResourcePopover({
  placeholder,
  searchValue,
  onSearchChange,
  sections,
}: ResourcePopoverProps) {
  const [open, setOpen] = useState(false);

  const totalItems = sections.reduce((sum, s) => sum + s.items.length, 0);

  return (
    <Popover open={open} onOpenChange={setOpen}>
      <Popover.Anchor>
        <InputTypeIn
          placeholder={placeholder}
          value={searchValue}
          onChange={(e) => {
            onSearchChange(e.target.value);
            if (!open) setOpen(true);
          }}
          onFocus={() => setOpen(true)}
        />
      </Popover.Anchor>
      <Popover.Content
        width="trigger"
        align="start"
        sideOffset={4}
        onOpenAutoFocus={(e) => e.preventDefault()}
      >
        <div className="flex flex-col gap-1 max-h-64 overflow-y-auto">
          {totalItems === 0 ? (
            <div className="px-3 py-3">
              <Content
                icon={SvgEmpty}
                title="No results found"
                sizePreset="secondary"
                variant="section"
              />
            </div>
          ) : (
            sections.map(
              (section, idx) =>
                section.items.length > 0 && (
                  <div key={section.label ?? `section-${idx}`}>
                    {section.label && (
                      <Section
                        flexDirection="row"
                        gap={0.25}
                        padding={0}
                        height="auto"
                        alignItems="center"
                        justifyContent="start"
                        className="px-2 pt-2 pb-1"
                      >
                        <Text secondaryBody text03 className="shrink-0">
                          {section.label}
                        </Text>
                        <Separator noPadding className="flex-1" />
                      </Section>
                    )}
                    <Section
                      gap={0.25}
                      alignItems="stretch"
                      justifyContent="start"
                    >
                      {section.items.map((item) => (
                        <div
                          key={item.key}
                          className={cn(
                            "rounded-08 cursor-pointer",
                            item.disabled
                              ? "bg-background-tint-02"
                              : "hover:bg-background-tint-02 transition-colors"
                          )}
                          onClick={() => {
                            item.onSelect();
                          }}
                        >
                          {item.render(!!item.disabled)}
                        </div>
                      ))}
                    </Section>
                  </div>
                )
            )
          )}
        </div>
      </Popover.Content>
    </Popover>
  );
}

export default ResourcePopover;


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/index.tsx
================================================
"use client";

import { useState, useMemo } from "react";
import { SvgEmpty, SvgFiles, SvgXOctagon } from "@opal/icons";
import { Content } from "@opal/layouts";
import { Section } from "@/layouts/general-layouts";
import Card from "@/refresh-components/cards/Card";
import LineItem from "@/refresh-components/buttons/LineItem";
import Text from "@/refresh-components/texts/Text";
import Separator from "@/refresh-components/Separator";
import SimpleCollapsible from "@/refresh-components/SimpleCollapsible";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import { useConnectorStatus } from "@/lib/hooks";
import { useDocumentSets } from "@/lib/hooks/useDocumentSets";
import { useAgents } from "@/hooks/useAgents";
import { getSourceMetadata } from "@/lib/sources";
import type { ValidSources } from "@/lib/types";
import ResourceContent from "@/refresh-pages/admin/GroupsPage/SharedGroupResources/ResourceContent";
import ResourcePopover from "@/refresh-pages/admin/GroupsPage/SharedGroupResources/ResourcePopover";
import type { PopoverSection } from "@/refresh-pages/admin/GroupsPage/SharedGroupResources/interfaces";

interface SharedGroupResourcesProps {
  selectedCcPairIds: number[];
  onCcPairIdsChange: (ids: number[]) => void;
  selectedDocSetIds: number[];
  onDocSetIdsChange: (ids: number[]) => void;
  selectedAgentIds: number[];
  onAgentIdsChange: (ids: number[]) => void;
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

function SharedBadge() {
  return (
    <Text as="span" secondaryBody text03>
      Shared
    </Text>
  );
}

interface SourceIconStackProps {
  sources: { source: ValidSources }[];
}

function SourceIconStack({ sources }: SourceIconStackProps) {
  if (sources.length === 0) return null;

  const unique = Array.from(
    new Map(sources.map((s) => [s.source, s])).values()
  ).slice(0, 3);

  return (
    <Section
      flexDirection="row"
      alignItems="center"
      width="auto"
      height="auto"
      gap={0}
      className="shrink-0 p-0.5"
    >
      {unique.map((s, i) => {
        const Icon = getSourceMetadata(s.source).icon;
        return (
          <div
            key={s.source}
            className="flex items-center justify-center size-4 rounded-04 bg-background-tint-00 border border-border-01 overflow-hidden [&_img]:!size-4 [&_img]:!m-0 [&_svg]:size-4"
            style={{ zIndex: unique.length - i, marginLeft: i > 0 ? -6 : 0 }}
          >
            <Icon />
          </div>
        );
      })}
    </Section>
  );
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

function SharedGroupResources({
  selectedCcPairIds,
  onCcPairIdsChange,
  selectedDocSetIds,
  onDocSetIdsChange,
  selectedAgentIds,
  onAgentIdsChange,
}: SharedGroupResourcesProps) {
  const [connectorSearch, setConnectorSearch] = useState("");
  const [agentSearch, setAgentSearch] = useState("");

  const { data: connectors = [] } = useConnectorStatus();
  const { documentSets } = useDocumentSets();
  const { agents } = useAgents();

  // --- Derived data ---

  const selectedCcPairSet = useMemo(
    () => new Set(selectedCcPairIds),
    [selectedCcPairIds]
  );
  const selectedDocSetSet = useMemo(
    () => new Set(selectedDocSetIds),
    [selectedDocSetIds]
  );
  const selectedAgentSet = useMemo(
    () => new Set(selectedAgentIds),
    [selectedAgentIds]
  );

  const selectedPairs = useMemo(
    () => connectors.filter((p) => selectedCcPairSet.has(p.cc_pair_id)),
    [connectors, selectedCcPairSet]
  );
  const selectedDocSets = useMemo(
    () => documentSets.filter((ds) => selectedDocSetSet.has(ds.id)),
    [documentSets, selectedDocSetSet]
  );
  const selectedAgentObjects = useMemo(
    () => agents.filter((a) => selectedAgentSet.has(a.id)),
    [agents, selectedAgentSet]
  );

  // --- Popover sections ---

  const connectorDocSetSections: PopoverSection[] = useMemo(() => {
    const q = connectorSearch.toLowerCase();

    const connectorItems = connectors
      .filter((p) => !q || (p.name ?? "").toLowerCase().includes(q))
      .map((p) => {
        const isSelected = selectedCcPairSet.has(p.cc_pair_id);
        return {
          key: `c-${p.cc_pair_id}`,
          disabled: isSelected,
          onSelect: () =>
            isSelected
              ? onCcPairIdsChange(
                  selectedCcPairIds.filter((id) => id !== p.cc_pair_id)
                )
              : onCcPairIdsChange([...selectedCcPairIds, p.cc_pair_id]),
          render: (dimmed: boolean) => (
            <LineItem
              interactive={!dimmed}
              muted={dimmed}
              icon={getSourceMetadata(p.connector.source).icon}
              rightChildren={
                p.groups.length > 0 || dimmed ? <SharedBadge /> : undefined
              }
            >
              {p.name ?? `Connector #${p.cc_pair_id}`}
            </LineItem>
          ),
        };
      });

    const docSetItems = documentSets
      .filter((ds) => !q || ds.name.toLowerCase().includes(q))
      .map((ds) => {
        const isSelected = selectedDocSetSet.has(ds.id);
        return {
          key: `d-${ds.id}`,
          disabled: isSelected,
          onSelect: () =>
            isSelected
              ? onDocSetIdsChange(
                  selectedDocSetIds.filter((id) => id !== ds.id)
                )
              : onDocSetIdsChange([...selectedDocSetIds, ds.id]),
          render: (dimmed: boolean) => (
            <LineItem
              interactive={!dimmed}
              muted={dimmed}
              icon={SvgFiles}
              rightChildren={
                ds.groups.length > 0 || dimmed ? <SharedBadge /> : undefined
              }
            >
              {ds.name}
            </LineItem>
          ),
        };
      });

    return [
      ...(connectorItems.length > 0
        ? [{ label: "Connectors", items: connectorItems }]
        : []),
      ...(docSetItems.length > 0
        ? [{ label: "Document Sets", items: docSetItems }]
        : []),
    ];
  }, [
    connectors,
    documentSets,
    connectorSearch,
    selectedCcPairSet,
    selectedDocSetSet,
    selectedCcPairIds,
    selectedDocSetIds,
    onCcPairIdsChange,
    onDocSetIdsChange,
  ]);

  const agentSections: PopoverSection[] = useMemo(() => {
    const q = agentSearch.toLowerCase();

    const items = agents
      .filter((a) => !q || a.name.toLowerCase().includes(q))
      .map((a) => {
        const isSelected = selectedAgentSet.has(a.id);
        return {
          key: `a-${a.id}`,
          disabled: isSelected,
          onSelect: () =>
            isSelected
              ? onAgentIdsChange(selectedAgentIds.filter((id) => id !== a.id))
              : onAgentIdsChange([...selectedAgentIds, a.id]),
          render: (dimmed: boolean) => (
            <LineItem
              interactive={!dimmed}
              muted={dimmed}
              icon={(_props) => <AgentAvatar agent={a} size={16} />}
              description="agent"
              rightChildren={
                !a.is_public || dimmed ? <SharedBadge /> : undefined
              }
            >
              {a.name}
            </LineItem>
          ),
        };
      });

    return items.length > 0 ? [{ items }] : [];
  }, [
    agents,
    agentSearch,
    selectedAgentSet,
    selectedAgentIds,
    onAgentIdsChange,
  ]);

  // --- Handlers ---

  function removeConnector(id: number) {
    onCcPairIdsChange(selectedCcPairIds.filter((cid) => cid !== id));
  }

  function removeDocSet(id: number) {
    onDocSetIdsChange(selectedDocSetIds.filter((did) => did !== id));
  }

  function removeAgent(id: number) {
    onAgentIdsChange(selectedAgentIds.filter((aid) => aid !== id));
  }

  const hasSelectedResources =
    selectedPairs.length > 0 || selectedDocSets.length > 0;

  return (
    <SimpleCollapsible>
      <SimpleCollapsible.Header
        title="Shared with This Group"
        description="Share connectors, document sets, agents with members of this group."
      />
      <SimpleCollapsible.Content>
        <Card>
          <Section
            gap={1}
            height="auto"
            alignItems="stretch"
            justifyContent="start"
            width="full"
          >
            {/* Connectors & Document Sets */}
            <Section
              gap={0.5}
              height="auto"
              alignItems="stretch"
              justifyContent="start"
            >
              <Section
                gap={0.25}
                height="auto"
                alignItems="stretch"
                justifyContent="start"
              >
                <Text mainUiAction text04>
                  Connectors & Document Sets
                </Text>
                <ResourcePopover
                  placeholder="Add connectors, document sets"
                  searchValue={connectorSearch}
                  onSearchChange={setConnectorSearch}
                  sections={connectorDocSetSections}
                />
              </Section>
              {hasSelectedResources ? (
                <Section
                  flexDirection="row"
                  wrap
                  gap={0.25}
                  height="auto"
                  alignItems="start"
                  justifyContent="start"
                >
                  {selectedPairs.map((pair) => (
                    <ResourceContent
                      key={`c-${pair.cc_pair_id}`}
                      icon={getSourceMetadata(pair.connector.source).icon}
                      title={pair.name ?? `Connector #${pair.cc_pair_id}`}
                      description="Connector"
                      onRemove={() => removeConnector(pair.cc_pair_id)}
                    />
                  ))}
                  {selectedDocSets.map((ds) => (
                    <ResourceContent
                      key={`d-${ds.id}`}
                      icon={SvgFiles}
                      title={ds.name}
                      description="Document Set"
                      infoContent={
                        <SourceIconStack sources={ds.cc_pair_summaries} />
                      }
                      onRemove={() => removeDocSet(ds.id)}
                    />
                  ))}
                </Section>
              ) : (
                <Content
                  icon={SvgEmpty}
                  title="No connectors or document sets added"
                  description="Add connectors or document set to share with this group."
                  sizePreset="secondary"
                  variant="section"
                />
              )}
            </Section>

            <Separator noPadding />

            {/* Agents */}
            <Section
              gap={0.5}
              height="auto"
              alignItems="stretch"
              justifyContent="start"
            >
              <Section
                gap={0.25}
                height="auto"
                alignItems="stretch"
                justifyContent="start"
              >
                <Text mainUiAction text04>
                  Agents
                </Text>
                <ResourcePopover
                  placeholder="Add agents"
                  searchValue={agentSearch}
                  onSearchChange={setAgentSearch}
                  sections={agentSections}
                />
              </Section>
              {selectedAgentObjects.length > 0 ? (
                <Section
                  flexDirection="row"
                  wrap
                  gap={0.25}
                  height="auto"
                  alignItems="start"
                  justifyContent="start"
                >
                  {selectedAgentObjects.map((agent) => (
                    <ResourceContent
                      key={agent.id}
                      leftContent={
                        <div className="flex items-center justify-center shrink-0 size-5 p-0.5 rounded-04">
                          <AgentAvatar agent={agent} size={16} />
                        </div>
                      }
                      title={agent.name}
                      description="agent"
                      onRemove={() => removeAgent(agent.id)}
                    />
                  ))}
                </Section>
              ) : (
                <Content
                  icon={SvgXOctagon}
                  title="No agents added"
                  description="Add agents to share with this group."
                  sizePreset="secondary"
                  variant="section"
                />
              )}
            </Section>
          </Section>
        </Card>
      </SimpleCollapsible.Content>
    </SimpleCollapsible>
  );
}

export default SharedGroupResources;


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/interfaces.ts
================================================
export interface PopoverItem {
  key: string;
  render: (disabled: boolean) => React.ReactNode;
  onSelect: () => void;
  /** When true, the item is already selected — shown dimmed with bg-tint-02. */
  disabled?: boolean;
}

export interface PopoverSection {
  label?: string;
  items: PopoverItem[];
}

export interface ResourcePopoverProps {
  placeholder: string;
  searchValue: string;
  onSearchChange: (value: string) => void;
  sections: PopoverSection[];
}


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/TokenLimitSection.tsx
================================================
"use client";

import { useRef } from "react";
import { SvgPlusCircle, SvgMinusCircle } from "@opal/icons";
import { Button } from "@opal/components";
import { Section } from "@/layouts/general-layouts";
import Card from "@/refresh-components/cards/Card";
import InputNumber from "@/refresh-components/inputs/InputNumber";
import Text from "@/refresh-components/texts/Text";
import IconButton from "@/refresh-components/buttons/IconButton";
import SimpleCollapsible from "@/refresh-components/SimpleCollapsible";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

export interface TokenLimit {
  tokenBudget: number | null;
  periodHours: number | null;
}

interface TokenLimitSectionProps {
  limits: TokenLimit[];
  onLimitsChange: (limits: TokenLimit[]) => void;
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

function TokenLimitSection({ limits, onLimitsChange }: TokenLimitSectionProps) {
  const nextKeyRef = useRef(limits.length);
  const keysRef = useRef<number[]>(limits.map((_, i) => i));

  // Sync keys if the parent provides a different number of limits externally
  // (e.g. loaded from server after initial mount).
  if (keysRef.current.length < limits.length) {
    while (keysRef.current.length < limits.length) {
      keysRef.current.push(nextKeyRef.current++);
    }
  } else if (keysRef.current.length > limits.length) {
    keysRef.current = keysRef.current.slice(0, limits.length);
  }

  function addLimit() {
    const emptyIndex = limits.findIndex(
      (l) => l.tokenBudget === null && l.periodHours === null
    );
    if (emptyIndex !== -1) return;
    const key = nextKeyRef.current++;
    keysRef.current = [...keysRef.current, key];
    onLimitsChange([...limits, { tokenBudget: null, periodHours: null }]);
  }

  function removeLimit(index: number) {
    keysRef.current = keysRef.current.filter((_, i) => i !== index);
    onLimitsChange(limits.filter((_, i) => i !== index));
  }

  function updateLimit(
    index: number,
    field: keyof TokenLimit,
    value: number | null
  ) {
    onLimitsChange(
      limits.map((l, i) => (i === index ? { ...l, [field]: value } : l))
    );
  }

  return (
    <SimpleCollapsible>
      <SimpleCollapsible.Header
        title="Token Rate Limit"
        description="Limit number of tokens this group can use within a given time period."
      />
      <SimpleCollapsible.Content>
        <Card>
          <Section
            gap={0.5}
            height="auto"
            alignItems="stretch"
            justifyContent="start"
            width="full"
          >
            {/* Column headers */}
            <div className="flex flex-wrap items-center gap-1 pr-[40px]">
              <div className="flex-1 flex items-center min-w-[160px]">
                <Text mainUiAction text04>
                  Token Limit
                </Text>
                <Text mainUiMuted text03 className="ml-0.5">
                  (thousand tokens)
                </Text>
              </div>
              <div className="flex-1 flex items-center min-w-[160px]">
                <Text mainUiAction text04>
                  Time Window
                </Text>
                <Text mainUiMuted text03 className="ml-0.5">
                  (hours)
                </Text>
              </div>
            </div>

            {/* Limit rows */}
            {limits.map((limit, i) => (
              <div key={keysRef.current[i]} className="flex items-center gap-1">
                <div className="flex-1">
                  <InputNumber
                    value={limit.tokenBudget}
                    onChange={(v) => updateLimit(i, "tokenBudget", v)}
                    min={0}
                    placeholder="Token limit in thousands"
                  />
                </div>
                <div className="flex-1">
                  <InputNumber
                    value={limit.periodHours}
                    onChange={(v) => updateLimit(i, "periodHours", v)}
                    min={1}
                    placeholder="24"
                  />
                </div>
                <IconButton
                  small
                  icon={SvgMinusCircle}
                  onClick={() => removeLimit(i)}
                />
              </div>
            ))}

            {/* Add button */}
            <Button
              icon={SvgPlusCircle}
              prominence="secondary"
              size="md"
              onClick={addLimit}
            >
              Add Limit
            </Button>
          </Section>
        </Card>
      </SimpleCollapsible.Content>
    </SimpleCollapsible>
  );
}

export default TokenLimitSection;


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/index.tsx
================================================
"use client";

import type { Route } from "next";
import { useState } from "react";
import { useRouter } from "next/navigation";
import useSWR from "swr";
import { SvgUsers } from "@opal/icons";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { errorHandlingFetcher } from "@/lib/fetcher";
import type { UserGroup } from "@/lib/types";
import { SWR_KEYS } from "@/lib/swr-keys";
import GroupsList from "./GroupsList";
import AdminListHeader from "@/sections/admin/AdminListHeader";
import { IllustrationContent } from "@opal/layouts";
import SvgNoResult from "@opal/illustrations/no-result";

function GroupsPage() {
  const router = useRouter();
  const [searchQuery, setSearchQuery] = useState("");

  const {
    data: groups,
    error,
    isLoading,
  } = useSWR<UserGroup[]>(SWR_KEYS.adminUserGroups, errorHandlingFetcher);

  return (
    <SettingsLayouts.Root>
      <div data-testid="groups-page-heading">
        <SettingsLayouts.Header icon={SvgUsers} title="Groups" separator />
      </div>

      <SettingsLayouts.Body>
        <AdminListHeader
          hasItems={!isLoading && !error && (groups?.length ?? 0) > 0}
          searchQuery={searchQuery}
          onSearchQueryChange={setSearchQuery}
          placeholder="Search groups..."
          emptyStateText="Create groups to organize users and manage access."
          onAction={() => router.push("/admin/groups/create" as Route)}
          actionLabel="New Group"
        />

        {isLoading && <SimpleLoader />}

        {error && (
          <IllustrationContent
            illustration={SvgNoResult}
            title="Failed to load groups."
            description="Please check the console for more details."
          />
        )}

        {!isLoading && !error && groups && (
          <GroupsList groups={groups} searchQuery={searchQuery} />
        )}
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}

export default GroupsPage;


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/interfaces.ts
================================================
import type { UserRole } from "@/lib/types";
import type { UserRow } from "@/refresh-pages/admin/UsersPage/interfaces";

export interface ApiKeyDescriptor {
  api_key_id: number;
  api_key_display: string;
  api_key_name: string | null;
  api_key_role: UserRole;
  user_id: string;
}

/** Extends UserRow with an optional API key display for service accounts. */
export interface MemberRow extends UserRow {
  api_key_display?: string;
}

export interface TokenRateLimitDisplay {
  token_id: number;
  enabled: boolean;
  token_budget: number;
  period_hours: number;
}


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/shared.tsx
================================================
import { createTableColumns } from "@opal/components";
import { Content } from "@opal/layouts";
import { SvgUser, SvgUserManage, SvgGlobe, SvgSlack } from "@opal/icons";
import type { IconFunctionComponent } from "@opal/types";
import Text from "@/refresh-components/texts/Text";
import { UserRole, UserStatus, USER_ROLE_LABELS } from "@/lib/types";
import type { ApiKeyDescriptor, MemberRow } from "./interfaces";

// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------

export const PAGE_SIZE = 10;

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

export function apiKeyToMemberRow(key: ApiKeyDescriptor): MemberRow {
  return {
    id: key.user_id,
    email: "Service Account",
    role: key.api_key_role,
    status: UserStatus.ACTIVE,
    is_active: true,
    is_scim_synced: false,
    personal_name: key.api_key_name ?? "Unnamed Key",
    created_at: null,
    updated_at: null,
    groups: [],
    api_key_display: key.api_key_display,
  };
}

// ---------------------------------------------------------------------------
// Role icon mapping (mirrors UsersPage/UserRoleCell)
// ---------------------------------------------------------------------------

const ROLE_ICONS: Partial<Record<UserRole, IconFunctionComponent>> = {
  [UserRole.ADMIN]: SvgUserManage,
  [UserRole.GLOBAL_CURATOR]: SvgGlobe,
  [UserRole.SLACK_USER]: SvgSlack,
};

// ---------------------------------------------------------------------------
// Column renderers
// ---------------------------------------------------------------------------

function renderNameColumn(email: string, row: MemberRow) {
  return (
    <Content
      sizePreset="main-ui"
      variant="section"
      title={row.personal_name ?? email}
      description={row.personal_name ? email : undefined}
    />
  );
}

function renderAccountTypeColumn(_value: unknown, row: MemberRow) {
  const Icon = (row.role && ROLE_ICONS[row.role]) || SvgUser;
  return (
    <div className="flex flex-row items-center gap-1">
      <Icon className="w-4 h-4 text-text-03" />
      <Text as="span" mainUiBody text03>
        {row.role ? USER_ROLE_LABELS[row.role] ?? row.role : "\u2014"}
      </Text>
    </div>
  );
}

// ---------------------------------------------------------------------------
// Columns
// ---------------------------------------------------------------------------

export const tc = createTableColumns<MemberRow>();

export const baseColumns = [
  tc.qualifier(),
  tc.column("email", {
    header: "Name",
    weight: 25,
    cell: renderNameColumn,
  }),
  tc.column("api_key_display", {
    header: "",
    weight: 15,
    enableSorting: false,
    cell: (value) =>
      value ? (
        <Text as="span" secondaryBody text03>
          {value}
        </Text>
      ) : null,
  }),
  tc.column("role", {
    header: "Account Type",
    weight: 15,
    cell: renderAccountTypeColumn,
  }),
];

export const memberTableColumns = [
  ...baseColumns,
  tc.actions({ showSorting: false }),
];


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/svc.ts
================================================
/** API helpers for the Groups pages. */

import { SWR_KEYS } from "@/lib/swr-keys";

const USER_GROUP_URL = SWR_KEYS.adminUserGroups;

async function renameGroup(groupId: number, newName: string): Promise<void> {
  const res = await fetch(`${USER_GROUP_URL}/rename`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ id: groupId, name: newName }),
  });
  if (!res.ok) {
    const detail = await res.json().catch(() => null);
    throw new Error(
      detail?.detail ?? `Failed to rename group: ${res.statusText}`
    );
  }
}

async function createGroup(
  name: string,
  userIds: string[],
  ccPairIds: number[] = []
): Promise<number> {
  const res = await fetch(USER_GROUP_URL, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      name,
      user_ids: userIds,
      cc_pair_ids: ccPairIds,
    }),
  });
  if (!res.ok) {
    const detail = await res.json().catch(() => null);
    throw new Error(
      detail?.detail ?? `Failed to create group: ${res.statusText}`
    );
  }
  const group = await res.json();
  return group.id;
}

async function updateGroup(
  groupId: number,
  userIds: string[],
  ccPairIds: number[]
): Promise<void> {
  const res = await fetch(`${USER_GROUP_URL}/${groupId}`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      user_ids: userIds,
      cc_pair_ids: ccPairIds,
    }),
  });
  if (!res.ok) {
    const detail = await res.json().catch(() => null);
    throw new Error(
      detail?.detail ?? `Failed to update group: ${res.statusText}`
    );
  }
}

async function deleteGroup(groupId: number): Promise<void> {
  const res = await fetch(`${USER_GROUP_URL}/${groupId}`, {
    method: "DELETE",
  });
  if (!res.ok) {
    const detail = await res.json().catch(() => null);
    throw new Error(
      detail?.detail ?? `Failed to delete group: ${res.statusText}`
    );
  }
}

// ---------------------------------------------------------------------------
// Agent (persona) sharing — managed from the persona side
// ---------------------------------------------------------------------------

async function updateAgentGroupSharing(
  groupId: number,
  initialAgentIds: number[],
  currentAgentIds: number[]
): Promise<void> {
  const initialSet = new Set(initialAgentIds);
  const currentSet = new Set(currentAgentIds);

  const added_agent_ids = currentAgentIds.filter((id) => !initialSet.has(id));
  const removed_agent_ids = initialAgentIds.filter((id) => !currentSet.has(id));

  if (added_agent_ids.length === 0 && removed_agent_ids.length === 0) return;

  const res = await fetch(`${USER_GROUP_URL}/${groupId}/agents`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ added_agent_ids, removed_agent_ids }),
  });
  if (!res.ok) {
    const detail = await res.json().catch(() => null);
    throw new Error(
      detail?.detail ?? `Failed to update agent sharing: ${res.statusText}`
    );
  }
}

// ---------------------------------------------------------------------------
// Document set sharing — managed from the document set side
// ---------------------------------------------------------------------------

interface DocumentSetSummary {
  id: number;
  description: string;
  cc_pair_summaries: { id: number }[];
  federated_connector_summaries: { id: number }[];
  is_public: boolean;
  users: string[];
  groups: number[];
}

async function updateDocSetGroupSharing(
  groupId: number,
  initialDocSetIds: number[],
  currentDocSetIds: number[]
): Promise<void> {
  const initialSet = new Set(initialDocSetIds);
  const currentSet = new Set(currentDocSetIds);

  const added = currentDocSetIds.filter((id) => !initialSet.has(id));
  const removed = initialDocSetIds.filter((id) => !currentSet.has(id));

  if (added.length === 0 && removed.length === 0) return;

  // Fetch all document sets to get their current state
  const allRes = await fetch("/api/manage/document-set");
  if (!allRes.ok) {
    throw new Error("Failed to fetch document sets");
  }
  const allDocSets: DocumentSetSummary[] = await allRes.json();
  const docSetMap = new Map(allDocSets.map((ds) => [ds.id, ds]));

  for (const dsId of added) {
    const ds = docSetMap.get(dsId);
    if (!ds) {
      throw new Error(`Document set ${dsId} not found`);
    }
    const updatedGroups = ds.groups.includes(groupId)
      ? ds.groups
      : [...ds.groups, groupId];
    const res = await fetch("/api/manage/admin/document-set", {
      method: "PATCH",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({
        id: ds.id,
        description: ds.description,
        cc_pair_ids: ds.cc_pair_summaries.map((cc) => cc.id),
        federated_connectors: ds.federated_connector_summaries.map((fc) => ({
          federated_connector_id: fc.id,
        })),
        is_public: ds.is_public,
        users: ds.users,
        groups: updatedGroups,
      }),
    });
    if (!res.ok) {
      throw new Error(`Failed to add group to document set ${dsId}`);
    }
  }

  for (const dsId of removed) {
    const ds = docSetMap.get(dsId);
    if (!ds) {
      throw new Error(`Document set ${dsId} not found`);
    }
    const updatedGroups = ds.groups.filter((id) => id !== groupId);
    const res = await fetch("/api/manage/admin/document-set", {
      method: "PATCH",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({
        id: ds.id,
        description: ds.description,
        cc_pair_ids: ds.cc_pair_summaries.map((cc) => cc.id),
        federated_connectors: ds.federated_connector_summaries.map((fc) => ({
          federated_connector_id: fc.id,
        })),
        is_public: ds.is_public,
        users: ds.users,
        groups: updatedGroups,
      }),
    });
    if (!res.ok) {
      throw new Error(`Failed to remove group from document set ${dsId}`);
    }
  }
}

// ---------------------------------------------------------------------------
// Token rate limits — create / update / delete
// ---------------------------------------------------------------------------

interface TokenLimitPayload {
  tokenBudget: number | null;
  periodHours: number | null;
}

interface ExistingTokenLimit {
  token_id: number;
  enabled: boolean;
  token_budget: number;
  period_hours: number;
}

async function saveTokenLimits(
  groupId: number,
  limits: TokenLimitPayload[],
  existing: ExistingTokenLimit[]
): Promise<void> {
  // Filter to only valid (non-null) limits
  const validLimits = limits.filter(
    (l): l is { tokenBudget: number; periodHours: number } =>
      l.tokenBudget != null && l.periodHours != null
  );

  // Update existing limits (match by index position)
  const toUpdate = Math.min(validLimits.length, existing.length);
  for (let i = 0; i < toUpdate; i++) {
    const limit = validLimits[i]!;
    const existingLimit = existing[i]!;
    const updateRes = await fetch(
      `/api/admin/token-rate-limits/rate-limit/${existingLimit.token_id}`,
      {
        method: "PUT",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          enabled: existingLimit.enabled,
          token_budget: limit.tokenBudget,
          period_hours: limit.periodHours,
        }),
      }
    );
    if (!updateRes.ok) {
      throw new Error(
        `Failed to update token rate limit ${existingLimit.token_id}`
      );
    }
  }

  // Create new limits beyond existing count
  for (let i = toUpdate; i < validLimits.length; i++) {
    const limit = validLimits[i]!;
    const createRes = await fetch(
      `/api/admin/token-rate-limits/user-group/${groupId}`,
      {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          enabled: true,
          token_budget: limit.tokenBudget,
          period_hours: limit.periodHours,
        }),
      }
    );
    if (!createRes.ok) {
      throw new Error("Failed to create token rate limit");
    }
  }

  // Delete excess existing limits
  for (let i = toUpdate; i < existing.length; i++) {
    const existingLimit = existing[i]!;
    const deleteRes = await fetch(
      `/api/admin/token-rate-limits/rate-limit/${existingLimit.token_id}`,
      { method: "DELETE" }
    );
    if (!deleteRes.ok) {
      throw new Error(
        `Failed to delete token rate limit ${existingLimit.token_id}`
      );
    }
  }
}

export {
  renameGroup,
  createGroup,
  updateGroup,
  deleteGroup,
  updateAgentGroupSharing,
  updateDocSetGroupSharing,
  saveTokenLimits,
};


================================================
FILE: web/src/refresh-pages/admin/GroupsPage/utils.ts
================================================
import type { UserGroup } from "@/lib/types";

/** Whether this group is a system default group (Admin, Basic). */
export function isBuiltInGroup(group: UserGroup): boolean {
  return group.is_default;
}

/** Human-readable description for built-in groups. */
const BUILT_IN_DESCRIPTIONS: Record<string, string> = {
  Basic: "Default group for all users with basic permissions.",
  Admin: "Built-in admin group with full access to manage all permissions.",
};

/**
 * Build the description line(s) shown beneath the group name.
 *
 * Built-in groups use a fixed label.
 * Custom groups list resource counts ("3 connectors · 2 document sets · 2 agents")
 * or fall back to "No private connectors / document sets / agents".
 */
export function buildGroupDescription(group: UserGroup): string {
  if (isBuiltInGroup(group)) {
    return BUILT_IN_DESCRIPTIONS[group.name] ?? "";
  }

  const parts: string[] = [];
  if (group.cc_pairs.length > 0) {
    parts.push(
      `${group.cc_pairs.length} connector${
        group.cc_pairs.length !== 1 ? "s" : ""
      }`
    );
  }
  if (group.document_sets.length > 0) {
    parts.push(
      `${group.document_sets.length} document set${
        group.document_sets.length !== 1 ? "s" : ""
      }`
    );
  }
  if (group.personas.length > 0) {
    parts.push(
      `${group.personas.length} agent${group.personas.length !== 1 ? "s" : ""}`
    );
  }

  return parts.length > 0
    ? parts.join(" · ")
    : "No private connectors / document sets / agents";
}

/** Format the member count badge, e.g. "306 Members" or "1 Member". */
export function formatMemberCount(count: number): string {
  return `${count} ${count === 1 ? "Member" : "Members"}`;
}


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/ImageGenerationContent.tsx
================================================
"use client";

import { useState, useMemo, useEffect } from "react";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import { toast } from "@/hooks/useToast";
import { Section } from "@/layouts/general-layouts";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { LLMProviderResponse, LLMProviderView } from "@/interfaces/llm";
import {
  IMAGE_PROVIDER_GROUPS,
  ImageProvider,
} from "@/refresh-pages/admin/ImageGenerationPage/constants";
import {
  ImageGenerationConfigView,
  setDefaultImageGenerationConfig,
  unsetDefaultImageGenerationConfig,
  deleteImageGenerationConfig,
} from "@/refresh-pages/admin/ImageGenerationPage/svc";
import { ProviderIcon } from "@/app/admin/configuration/llm/ProviderIcon";
import Message from "@/refresh-components/messages/Message";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import { Button, SelectCard, Text } from "@opal/components";
import { Content, CardHeaderLayout } from "@opal/layouts";
import { Hoverable } from "@opal/core";
import {
  SvgArrowExchange,
  SvgArrowRightCircle,
  SvgCheckSquare,
  SvgSettings,
  SvgSlash,
  SvgUnplug,
} from "@opal/icons";
import { markdown } from "@opal/utils";
import { getImageGenForm } from "@/refresh-pages/admin/ImageGenerationPage/forms";

const NO_DEFAULT_VALUE = "__none__";

const STATUS_TO_STATE = {
  disconnected: "empty",
  connected: "filled",
  selected: "selected",
} as const;

export default function ImageGenerationContent() {
  const {
    data: llmProviderResponse,
    error: llmError,
    mutate: refetchProviders,
  } = useSWR<LLMProviderResponse<LLMProviderView>>(
    SWR_KEYS.llmProvidersWithImageGen,
    errorHandlingFetcher
  );
  const llmProviders = llmProviderResponse?.providers ?? [];

  const {
    data: configs = [],
    error: configError,
    mutate: refetchConfigs,
  } = useSWR<ImageGenerationConfigView[]>(
    SWR_KEYS.imageGenConfig,
    errorHandlingFetcher
  );

  const modal = useCreateModal();
  const [activeProvider, setActiveProvider] = useState<ImageProvider | null>(
    null
  );
  const [editConfig, setEditConfig] =
    useState<ImageGenerationConfigView | null>(null);
  const [disconnectProvider, setDisconnectProvider] =
    useState<ImageProvider | null>(null);
  const [replacementProviderId, setReplacementProviderId] = useState<
    string | null
  >(null);

  const connectedProviderIds = useMemo(() => {
    return new Set(configs.map((c) => c.image_provider_id));
  }, [configs]);

  const defaultConfig = useMemo(() => {
    return configs.find((c) => c.is_default);
  }, [configs]);

  const getStatus = (
    provider: ImageProvider
  ): "disconnected" | "connected" | "selected" => {
    if (defaultConfig?.image_provider_id === provider.image_provider_id)
      return "selected";
    if (connectedProviderIds.has(provider.image_provider_id))
      return "connected";
    return "disconnected";
  };

  const handleConnect = (provider: ImageProvider) => {
    setEditConfig(null);
    setActiveProvider(provider);
    modal.toggle(true);
  };

  const handleSelect = async (provider: ImageProvider) => {
    const config = configs.find(
      (c) => c.image_provider_id === provider.image_provider_id
    );
    if (config) {
      try {
        await setDefaultImageGenerationConfig(config.image_provider_id);
        toast.success(`${provider.title} set as default`);
        refetchConfigs();
      } catch (error) {
        toast.error(
          error instanceof Error ? error.message : "Failed to set default"
        );
      }
    }
  };

  const handleDeselect = async (provider: ImageProvider) => {
    const config = configs.find(
      (c) => c.image_provider_id === provider.image_provider_id
    );
    if (config) {
      try {
        await unsetDefaultImageGenerationConfig(config.image_provider_id);
        toast.success(`${provider.title} deselected`);
        refetchConfigs();
      } catch (error) {
        toast.error(
          error instanceof Error ? error.message : "Failed to deselect"
        );
      }
    }
  };

  const handleEdit = (provider: ImageProvider) => {
    const config = configs.find(
      (c) => c.image_provider_id === provider.image_provider_id
    );
    setEditConfig(config || null);
    setActiveProvider(provider);
    modal.toggle(true);
  };

  const handleDisconnect = async () => {
    if (!disconnectProvider) return;
    try {
      // If a replacement was selected (not "No Default"), activate it first
      if (replacementProviderId && replacementProviderId !== NO_DEFAULT_VALUE) {
        await setDefaultImageGenerationConfig(replacementProviderId);
      }

      await deleteImageGenerationConfig(disconnectProvider.image_provider_id);
      toast.success(`${disconnectProvider.title} disconnected`);
      refetchConfigs();
      refetchProviders();
    } catch (error) {
      console.error("Failed to disconnect image generation provider:", error);
      toast.error(
        error instanceof Error ? error.message : "Failed to disconnect"
      );
    } finally {
      setDisconnectProvider(null);
      setReplacementProviderId(null);
    }
  };

  const handleModalSuccess = () => {
    toast.success("Provider configured successfully");
    setEditConfig(null);
    refetchConfigs();
    refetchProviders();
  };

  if (llmError || configError) {
    return (
      <div className="text-error">
        Failed to load configuration. Please refresh the page.
      </div>
    );
  }

  // Compute replacement options when disconnecting an active provider
  const isDisconnectingDefault =
    disconnectProvider &&
    defaultConfig?.image_provider_id === disconnectProvider.image_provider_id;

  // Group connected replacement models by provider (excluding the model being disconnected)
  const replacementGroups = useMemo(() => {
    if (!disconnectProvider) return [];
    return IMAGE_PROVIDER_GROUPS.map((group) => ({
      ...group,
      providers: group.providers.filter(
        (p) =>
          p.image_provider_id !== disconnectProvider.image_provider_id &&
          connectedProviderIds.has(p.image_provider_id)
      ),
    })).filter((g) => g.providers.length > 0);
  }, [disconnectProvider, connectedProviderIds]);

  const needsReplacement = !!isDisconnectingDefault;
  const hasReplacements = replacementGroups.length > 0;

  // Auto-select first replacement when modal opens
  useEffect(() => {
    if (needsReplacement && !replacementProviderId && hasReplacements) {
      const firstGroup = replacementGroups[0];
      const firstModel = firstGroup?.providers[0];
      if (firstModel) setReplacementProviderId(firstModel.image_provider_id);
    }
  }, [disconnectProvider]); // eslint-disable-line react-hooks/exhaustive-deps

  return (
    <>
      <div className="flex flex-col gap-4">
        <Content
          title="Image Generation Model"
          description="Select a model to generate images in chat."
          sizePreset="main-content"
          variant="section"
        />

        {connectedProviderIds.size === 0 && (
          <Message
            info
            static
            large
            close={false}
            text="Connect an image generation model to use in chat."
            className="w-full"
          />
        )}

        {/* Provider Groups */}
        {IMAGE_PROVIDER_GROUPS.map((group) => (
          <div key={group.name} className="flex flex-col gap-2">
            <Content title={group.name} sizePreset="secondary" variant="body" />
            {group.providers.map((provider) => {
              const status = getStatus(provider);
              const isDisconnected = status === "disconnected";
              const isConnected = status === "connected";
              const isSelected = status === "selected";

              return (
                <Hoverable.Root
                  key={provider.image_provider_id}
                  group="image-gen/ProviderCard"
                >
                  <SelectCard
                    state={STATUS_TO_STATE[status]}
                    padding="sm"
                    rounding="lg"
                    aria-label={`image-gen-provider-${provider.image_provider_id}`}
                    onClick={
                      isDisconnected
                        ? () => handleConnect(provider)
                        : isSelected
                          ? () => handleDeselect(provider)
                          : undefined
                    }
                  >
                    <CardHeaderLayout
                      sizePreset="main-ui"
                      variant="section"
                      icon={() => (
                        <ProviderIcon
                          provider={provider.provider_name}
                          size={16}
                        />
                      )}
                      title={provider.title}
                      description={provider.description}
                      rightChildren={
                        isDisconnected ? (
                          <Button
                            prominence="tertiary"
                            rightIcon={SvgArrowExchange}
                            onClick={(e) => {
                              e.stopPropagation();
                              handleConnect(provider);
                            }}
                          >
                            Connect
                          </Button>
                        ) : isConnected ? (
                          <Button
                            prominence="tertiary"
                            rightIcon={SvgArrowRightCircle}
                            onClick={(e) => {
                              e.stopPropagation();
                              handleSelect(provider);
                            }}
                          >
                            Set as Default
                          </Button>
                        ) : isSelected ? (
                          <div className="p-2">
                            <Content
                              title="Current Default"
                              sizePreset="main-ui"
                              variant="section"
                              icon={SvgCheckSquare}
                            />
                          </div>
                        ) : undefined
                      }
                      bottomRightChildren={
                        !isDisconnected ? (
                          <div className="flex flex-row px-1 pb-1">
                            <Hoverable.Item group="image-gen/ProviderCard">
                              <Button
                                icon={SvgUnplug}
                                tooltip="Disconnect"
                                aria-label={`Disconnect ${provider.title}`}
                                prominence="tertiary"
                                onClick={(e) => {
                                  e.stopPropagation();
                                  setDisconnectProvider(provider);
                                }}
                                size="md"
                              />
                            </Hoverable.Item>
                            <Button
                              icon={SvgSettings}
                              tooltip="Edit"
                              aria-label={`Edit ${provider.title}`}
                              prominence="tertiary"
                              onClick={(e) => {
                                e.stopPropagation();
                                handleEdit(provider);
                              }}
                              size="md"
                            />
                          </div>
                        ) : undefined
                      }
                    />
                  </SelectCard>
                </Hoverable.Root>
              );
            })}
          </div>
        ))}
      </div>

      {disconnectProvider && (
        <ConfirmationModalLayout
          icon={SvgUnplug}
          title={`Disconnect ${disconnectProvider.title}`}
          description="This will remove the stored credentials for this provider."
          onClose={() => {
            setDisconnectProvider(null);
            setReplacementProviderId(null);
          }}
          submit={
            <Button
              variant="danger"
              onClick={() => void handleDisconnect()}
              disabled={
                needsReplacement && hasReplacements && !replacementProviderId
              }
            >
              Disconnect
            </Button>
          }
        >
          {needsReplacement ? (
            hasReplacements ? (
              <Section alignItems="start">
                <Text as="p" color="text-03">
                  {markdown(
                    `**${disconnectProvider.title}** is currently the default image generation model. Session history will be preserved.`
                  )}
                </Text>
                <Section alignItems="start" gap={0.25}>
                  <Text as="p" color="text-04">
                    Set New Default
                  </Text>
                  <InputSelect
                    value={replacementProviderId ?? undefined}
                    onValueChange={(v) => setReplacementProviderId(v)}
                  >
                    <InputSelect.Trigger placeholder="Select a replacement model" />
                    <InputSelect.Content>
                      {replacementGroups.map((group) => (
                        <InputSelect.Group key={group.name}>
                          <InputSelect.Label>{group.name}</InputSelect.Label>
                          {group.providers.map((p) => (
                            <InputSelect.Item
                              key={p.image_provider_id}
                              value={p.image_provider_id}
                              icon={() => (
                                <ProviderIcon
                                  provider={p.provider_name}
                                  size={16}
                                />
                              )}
                            >
                              {p.title}
                            </InputSelect.Item>
                          ))}
                        </InputSelect.Group>
                      ))}
                      <InputSelect.Separator />
                      <InputSelect.Item
                        value={NO_DEFAULT_VALUE}
                        icon={SvgSlash}
                      >
                        <span>
                          <b>No Default</b>
                          <span className="text-text-03">
                            {" "}
                            (Disable Image Generation)
                          </span>
                        </span>
                      </InputSelect.Item>
                    </InputSelect.Content>
                  </InputSelect>
                </Section>
              </Section>
            ) : (
              <>
                <Text as="p" color="text-03">
                  {markdown(
                    `**${disconnectProvider.title}** is currently the default image generation model.`
                  )}
                </Text>
                <Text as="p" color="text-03">
                  Connect another provider to continue using image generation.
                </Text>
              </>
            )
          ) : (
            <>
              <Text as="p" color="text-03">
                {markdown(
                  `**${disconnectProvider.title}** models will no longer be used to generate images.`
                )}
              </Text>
              <Text as="p" color="text-03">
                Session history will be preserved.
              </Text>
            </>
          )}
        </ConfirmationModalLayout>
      )}

      {activeProvider && (
        <modal.Provider>
          {getImageGenForm({
            modal: modal,
            imageProvider: activeProvider,
            existingProviders: llmProviders,
            existingConfig: editConfig || undefined,
            onSuccess: handleModalSuccess,
          })}
        </modal.Provider>
      )}
    </>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/constants.ts
================================================
export interface ImageProvider {
  image_provider_id: string; // Static unique key for UI-DB mapping
  model_name: string; // Actual model name for LLM API
  provider_name: string;
  title: string;
  description: string;
}

export interface ProviderGroup {
  name: string;
  providers: ImageProvider[];
}

export const IMAGE_PROVIDER_GROUPS: ProviderGroup[] = [
  {
    name: "OpenAI",
    providers: [
      {
        image_provider_id: "openai_gpt_image_1_5",
        model_name: "gpt-image-1.5",
        provider_name: "openai",
        title: "GPT Image 1.5",
        description:
          "OpenAI's latest Image Generation model with the highest prompt fidelity.",
      },
      {
        image_provider_id: "openai_gpt_image_1",
        model_name: "gpt-image-1",
        provider_name: "openai",
        title: "GPT Image 1",
        description:
          "A capable image generation model from OpenAI with strong prompt adherence.",
      },
      {
        image_provider_id: "openai_dalle_3",
        model_name: "dall-e-3",
        provider_name: "openai",
        title: "DALL-E 3",
        description:
          "OpenAI image generation model capable of generating rich and expressive images.",
      },
    ],
  },
  {
    name: "Azure OpenAI",
    providers: [
      {
        image_provider_id: "azure_gpt_image_1_5",
        model_name: "", // Extracted from deployment in target URI
        provider_name: "azure",
        title: "Azure OpenAI GPT Image 1.5",
        description:
          "GPT Image 1.5 image generation model hosted on Microsoft Azure.",
      },
      {
        image_provider_id: "azure_gpt_image_1",
        model_name: "", // Extracted from deployment in target URI
        provider_name: "azure",
        title: "Azure OpenAI GPT Image 1",
        description:
          "GPT Image 1 image generation model hosted on Microsoft Azure.",
      },
      {
        image_provider_id: "azure_dalle_3",
        model_name: "", // Extracted from deployment in target URI
        provider_name: "azure",
        title: "Azure OpenAI DALL-E 3",
        description:
          "DALL-E 3 image generation model hosted on Microsoft Azure.",
      },
    ],
  },
  {
    name: "Google Cloud Vertex AI",
    providers: [
      {
        image_provider_id: "gemini-2.5-flash-image",
        model_name: "gemini-2.5-flash-image",
        provider_name: "vertex_ai",
        title: "Gemini 2.5 Flash Image",
        description:
          "Gemini 2.5 Flash Image (Nano Banana) model is designed for speed and efficiency.",
      },
      {
        image_provider_id: "gemini-3-pro-image-preview",
        model_name: "gemini-3-pro-image-preview",
        provider_name: "vertex_ai",
        title: "Gemini 3 Pro Image Preview",
        description:
          "Gemini 3 Pro Image Preview (Nano Banana Pro) is designed for professional asset production.",
      },
    ],
  },
];


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/forms/AzureImageGenForm.tsx
================================================
"use client";

import React from "react";
import * as Yup from "yup";
import { FormikField } from "@/refresh-components/form/FormikField";
import { FormField } from "@/refresh-components/form/FormField";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import InputComboBox from "@/refresh-components/inputs/InputComboBox";
import PasswordInputTypeIn from "@/refresh-components/inputs/PasswordInputTypeIn";
import { ImageGenFormWrapper } from "@/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper";
import {
  ImageGenFormBaseProps,
  ImageGenFormChildProps,
  ImageGenSubmitPayload,
} from "@/refresh-pages/admin/ImageGenerationPage/forms/types";
import { ImageGenerationCredentials } from "@/refresh-pages/admin/ImageGenerationPage/svc";
import { ImageProvider } from "@/refresh-pages/admin/ImageGenerationPage/constants";
import {
  parseAzureTargetUri,
  isValidAzureTargetUri,
} from "@/lib/azureTargetUri";

// Azure form values - target URI and API key
interface AzureFormValues {
  target_uri: string;
  api_key: string;
}

const initialValues: AzureFormValues = {
  target_uri: "",
  api_key: "",
};

const validationSchema = Yup.object().shape({
  target_uri: Yup.string()
    .required("Target URI is required")
    .test(
      "valid-target-uri",
      "Target URI must be a valid URL with api-version and deployment name",
      (value) => (value ? isValidAzureTargetUri(value) : false)
    ),
  api_key: Yup.string().required("API Key is required"),
});

function AzureFormFields(props: ImageGenFormChildProps<AzureFormValues>) {
  const {
    formikProps,
    apiStatus,
    showApiMessage,
    errorMessage,
    disabled,
    isLoadingCredentials,
    apiKeyOptions,
    resetApiState,
    imageProvider,
  } = props;

  return (
    <>
      {/* Target URI field */}
      <FormikField<string>
        name="target_uri"
        render={(field, helper, meta, state) => (
          <FormField name="target_uri" state={state} className="w-full">
            <FormField.Label>Target URI</FormField.Label>
            <FormField.Control>
              <InputTypeIn
                {...field}
                placeholder="https://your-resource.cognitiveservices.azure.com/openai/deployments/deployment-name/images/generations?api-version=2025-01-01-preview"
                showClearButton={false}
                variant={disabled ? "disabled" : undefined}
              />
            </FormField.Control>
            <FormField.Message
              messages={{
                idle: (
                  <>
                    Paste your endpoint target URI from{" "}
                    <a
                      href="https://oai.azure.com"
                      target="_blank"
                      rel="noopener noreferrer"
                      className="underline"
                    >
                      Azure OpenAI
                    </a>{" "}
                    (including API endpoint base, deployment name, and API
                    version).
                  </>
                ),
                error: meta.error,
              }}
            />
          </FormField>
        )}
      />

      {/* API Key field */}
      <FormikField<string>
        name="api_key"
        render={(field, helper, meta, state) => (
          <FormField
            name="api_key"
            state={apiStatus === "error" ? "error" : state}
            className="w-full"
          >
            <FormField.Label>API Key</FormField.Label>
            <FormField.Control>
              {apiKeyOptions.length > 0 ? (
                <InputComboBox
                  value={field.value}
                  onChange={(e) => {
                    helper.setValue(e.target.value);
                    resetApiState();
                  }}
                  onValueChange={(value) => {
                    helper.setValue(value);
                    resetApiState();
                  }}
                  onBlur={field.onBlur}
                  options={apiKeyOptions}
                  placeholder={
                    isLoadingCredentials
                      ? "Loading..."
                      : "Enter new API key or select existing provider"
                  }
                  disabled={disabled || !formikProps.values.target_uri?.trim()}
                  isError={apiStatus === "error"}
                />
              ) : (
                <PasswordInputTypeIn
                  {...field}
                  onChange={(e) => {
                    field.onChange(e);
                    resetApiState();
                  }}
                  placeholder={
                    isLoadingCredentials ? "Loading..." : "Enter your API key"
                  }
                  showClearButton={false}
                  disabled={disabled || !formikProps.values.target_uri?.trim()}
                  error={apiStatus === "error"}
                />
              )}
            </FormField.Control>
            {showApiMessage ? (
              <FormField.APIMessage
                state={apiStatus}
                messages={{
                  loading: `Testing API key with ${imageProvider.title}...`,
                  success: "API key is valid. Configuration saved.",
                  error: errorMessage || "Invalid API key",
                }}
              />
            ) : (
              <FormField.Message
                messages={{
                  idle: (
                    <>
                      {"Paste your "}
                      <a
                        href="https://oai.azure.com"
                        target="_blank"
                        rel="noopener noreferrer"
                        className="underline"
                      >
                        API key
                      </a>
                      {" from Azure OpenAI to access your models."}
                    </>
                  ),
                  error: meta.error,
                }}
              />
            )}
          </FormField>
        )}
      />
    </>
  );
}

function getInitialValuesFromCredentials(
  credentials: ImageGenerationCredentials,
  imageProvider: ImageProvider
): Partial<AzureFormValues> {
  // Reconstruct target_uri from credentials
  let targetUri = "";
  if (credentials.api_base && credentials.api_version) {
    const deployment = credentials.deployment_name || imageProvider.model_name;
    targetUri = `${credentials.api_base}/openai/deployments/${deployment}/images/generations?api-version=${credentials.api_version}`;
  }

  return {
    api_key: credentials.api_key || "",
    target_uri: targetUri,
  };
}

function transformValues(
  values: AzureFormValues,
  imageProvider: ImageProvider
): ImageGenSubmitPayload {
  // Parse target_uri to extract api_base, api_version, deployment_name
  let apiBase: string | undefined;
  let apiVersion: string | undefined;
  let deploymentName: string | undefined;
  let modelName = imageProvider.model_name;

  if (values.target_uri) {
    try {
      const parsed = parseAzureTargetUri(values.target_uri);
      apiBase = parsed.url.origin;
      apiVersion = parsed.apiVersion;
      deploymentName = parsed.deploymentName || undefined;
      // For Azure, use deployment name as model name
      modelName = deploymentName || imageProvider.model_name;
    } catch (error) {
      console.error("Failed to parse target_uri:", error);
    }
  }

  return {
    modelName,
    imageProviderId: imageProvider.image_provider_id,
    provider: "azure",
    apiKey: values.api_key,
    apiBase,
    apiVersion,
    deploymentName,
  };
}

export function AzureImageGenForm(props: ImageGenFormBaseProps) {
  const { imageProvider, existingConfig } = props;

  return (
    <ImageGenFormWrapper<AzureFormValues>
      {...props}
      title={
        existingConfig
          ? `Edit ${imageProvider.title}`
          : `Connect ${imageProvider.title}`
      }
      description={imageProvider.description}
      initialValues={initialValues}
      validationSchema={validationSchema}
      getInitialValuesFromCredentials={getInitialValuesFromCredentials}
      transformValues={(values) => transformValues(values, imageProvider)}
    >
      {(childProps) => <AzureFormFields {...childProps} />}
    </ImageGenFormWrapper>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper.tsx
================================================
"use client";

import React, { useState, useMemo, useEffect } from "react";
import { Form, Formik, FormikProps } from "formik";
import ProviderModal from "@/components/modals/ProviderModal";
import { ProviderIcon } from "@/app/admin/configuration/llm/ProviderIcon";
import ConnectionProviderIcon from "@/refresh-components/ConnectionProviderIcon";
import {
  testImageGenerationApiKey,
  createImageGenerationConfig,
  updateImageGenerationConfig,
  fetchImageGenerationCredentials,
} from "@/refresh-pages/admin/ImageGenerationPage/svc";
import { APIFormFieldState } from "@/refresh-components/form/types";
import {
  ImageGenFormWrapperProps,
  ImageGenFormChildProps,
  ImageGenSubmitPayload,
  FormValues,
} from "@/refresh-pages/admin/ImageGenerationPage/forms/types";
import { toast } from "@/hooks/useToast";

export function ImageGenFormWrapper<T extends FormValues>({
  modal,
  imageProvider,
  existingProviders,
  existingConfig,
  onSuccess,
  title,
  description,
  initialValues,
  validationSchema,
  children,
  transformValues,
  getInitialValuesFromCredentials,
}: ImageGenFormWrapperProps<T>) {
  // State management
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [apiStatus, setApiStatus] = useState<APIFormFieldState>("idle");
  const [showApiMessage, setShowApiMessage] = useState(false);
  const [errorMessage, setErrorMessage] = useState("");
  const [isLoadingCredentials, setIsLoadingCredentials] = useState(false);

  // Track merged initial values with fetched credentials
  const [mergedInitialValues, setMergedInitialValues] =
    useState<T>(initialValues);

  const isEditMode = !!existingConfig;

  // Compute API key options from existing providers matching this image provider
  // API keys from LLM providers are already masked by backend (first 4 + **** + last 4)
  const apiKeyOptions = useMemo(() => {
    return existingProviders
      .filter((p) => p.provider === imageProvider.provider_name)
      .map((provider) => ({
        value: `existing:${provider.id}:${provider.name}`,
        label: provider.api_key || "****",
      }));
  }, [existingProviders, imageProvider.provider_name]);

  // Fetch credentials when modal opens in edit mode
  useEffect(() => {
    if (existingConfig && modal.isOpen) {
      setIsLoadingCredentials(true);
      fetchImageGenerationCredentials(existingConfig.image_provider_id)
        .then((creds) => {
          if (getInitialValuesFromCredentials) {
            const credValues = getInitialValuesFromCredentials(
              creds,
              imageProvider
            );
            setMergedInitialValues((prev) => ({ ...prev, ...credValues }));
          }
        })
        .catch((err) => {
          console.error("Failed to fetch credentials:", err);
        })
        .finally(() => {
          setIsLoadingCredentials(false);
        });
    } else if (!modal.isOpen) {
      // Reset when modal closes
      setMergedInitialValues(initialValues);
      setApiStatus("idle");
      setShowApiMessage(false);
      setErrorMessage("");
    }
  }, [
    existingConfig,
    modal.isOpen,
    getInitialValuesFromCredentials,
    imageProvider,
    initialValues,
  ]);

  // Close modal after successful connection (1 second delay)
  useEffect(() => {
    if (apiStatus === "success" && !isSubmitting) {
      const timer = setTimeout(() => {
        onSuccess();
        modal.toggle(false);
      }, 1000);
      return () => clearTimeout(timer);
    }
  }, [apiStatus, isSubmitting, modal, onSuccess]);

  const resetApiState = () => {
    if (showApiMessage) {
      setShowApiMessage(false);
      setApiStatus("idle");
      setErrorMessage("");
    }
  };

  const handleSubmit = async (values: T) => {
    setIsSubmitting(true);
    setShowApiMessage(true);
    setApiStatus("loading");

    try {
      // Get the submit payload from transformValues or use defaults
      const payload: ImageGenSubmitPayload = transformValues
        ? transformValues(values)
        : {
            modelName: imageProvider.model_name,
            imageProviderId: imageProvider.image_provider_id,
            provider: imageProvider.provider_name,
            apiKey: (values as Record<string, unknown>).api_key as
              | string
              | undefined,
          };

      // Check if user selected existing provider (clone mode)
      const apiKeyValue = (values as Record<string, unknown>).api_key as string;
      const isCloneMode = apiKeyValue?.startsWith("existing:");

      if (isCloneMode) {
        const parts = apiKeyValue.split(":");
        const providerIdStr = parts[1];
        if (!providerIdStr) {
          throw new Error("Invalid provider selection");
        }
        const providerId = parseInt(providerIdStr, 10);

        // Test API key from existing provider before creating config
        const result = await testImageGenerationApiKey(payload.modelName, {
          sourceLlmProviderId: providerId,
          apiBase: payload.apiBase,
          apiVersion: payload.apiVersion,
          deploymentName: payload.deploymentName,
          customConfig: payload.customConfig,
        });

        if (!result.ok) {
          setApiStatus("error");
          setErrorMessage(result.errorMessage || "API key validation failed");
          setIsSubmitting(false);
          return;
        }

        // Test passed - now create/update config
        if (isEditMode && existingConfig) {
          await updateImageGenerationConfig(existingConfig.image_provider_id, {
            modelName: payload.modelName,
            sourceLlmProviderId: providerId,
            apiBase: payload.apiBase,
            apiVersion: payload.apiVersion,
            deploymentName: payload.deploymentName,
            customConfig: payload.customConfig,
          });
        } else {
          await createImageGenerationConfig({
            imageProviderId: payload.imageProviderId,
            modelName: payload.modelName,
            sourceLlmProviderId: providerId,
            apiBase: payload.apiBase,
            apiVersion: payload.apiVersion,
            deploymentName: payload.deploymentName,
            customConfig: payload.customConfig,
            isDefault: true,
          });
        }
      } else {
        // New credentials mode - check if API key was changed from masked value
        // A masked key contains "****", so if present, user hasn't entered a new key
        const apiKeyChanged = !apiKeyValue?.includes("****");

        // Test the API key first (only if changed or creating new config)
        if (apiKeyChanged) {
          const result = await testImageGenerationApiKey(payload.modelName, {
            provider: payload.provider,
            apiKey: payload.apiKey,
            apiBase: payload.apiBase,
            apiVersion: payload.apiVersion,
            deploymentName: payload.deploymentName,
            customConfig: payload.customConfig,
          });

          if (!result.ok) {
            setApiStatus("error");
            setErrorMessage(result.errorMessage || "API key validation failed");
            setIsSubmitting(false);
            return;
          }
        }

        // Create or update config
        if (isEditMode && existingConfig) {
          await updateImageGenerationConfig(existingConfig.image_provider_id, {
            modelName: payload.modelName,
            provider: payload.provider,
            apiKey: payload.apiKey,
            apiBase: payload.apiBase,
            apiVersion: payload.apiVersion,
            deploymentName: payload.deploymentName,
            customConfig: payload.customConfig,
            apiKeyChanged,
          });
        } else {
          await createImageGenerationConfig({
            imageProviderId: payload.imageProviderId,
            modelName: payload.modelName,
            provider: payload.provider,
            apiKey: payload.apiKey,
            apiBase: payload.apiBase,
            apiVersion: payload.apiVersion,
            deploymentName: payload.deploymentName,
            customConfig: payload.customConfig,
            isDefault: true,
          });
        }
      }

      setApiStatus("success");
      setErrorMessage("");
      setIsSubmitting(false);
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Unknown error occurred";
      setApiStatus("error");
      setErrorMessage(message);
      toast.error(message);
      setIsSubmitting(false);
    }
  };

  const icon = () => (
    <ConnectionProviderIcon
      icon={<ProviderIcon provider={imageProvider.provider_name} size={24} />}
    />
  );

  // Create child props
  const createChildProps = (
    formikProps: FormikProps<T>
  ): ImageGenFormChildProps<T> => ({
    formikProps,
    apiStatus,
    setApiStatus,
    showApiMessage,
    setShowApiMessage,
    errorMessage,
    setErrorMessage,
    isSubmitting,
    disabled: isSubmitting || isLoadingCredentials,
    isEditMode,
    isLoadingCredentials,
    apiKeyOptions,
    resetApiState,
    imageProvider,
  });

  return (
    <Formik<T>
      initialValues={mergedInitialValues}
      onSubmit={handleSubmit}
      validationSchema={validationSchema}
      enableReinitialize
    >
      {(formikProps) => {
        const childProps = createChildProps(formikProps);

        return (
          <ProviderModal
            open={modal.isOpen}
            onOpenChange={modal.toggle}
            title={title}
            description={description}
            icon={icon}
            onSubmit={formikProps.submitForm}
            submitDisabled={
              !formikProps.isValid ||
              (!isEditMode && !formikProps.dirty) ||
              isSubmitting
            }
            isSubmitting={isSubmitting}
          >
            <Form className="flex flex-col gap-0 bg-background-tint-01 w-full">
              <div className="flex flex-col gap-4 w-full">
                {children(childProps)}
              </div>
            </Form>
          </ProviderModal>
        );
      }}
    </Formik>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/forms/OpenAIImageGenForm.tsx
================================================
"use client";

import React from "react";
import * as Yup from "yup";
import { FormikField } from "@/refresh-components/form/FormikField";
import { FormField } from "@/refresh-components/form/FormField";
import InputComboBox from "@/refresh-components/inputs/InputComboBox";
import PasswordInputTypeIn from "@/refresh-components/inputs/PasswordInputTypeIn";
import { ImageGenFormWrapper } from "@/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper";
import {
  ImageGenFormBaseProps,
  ImageGenFormChildProps,
  ImageGenSubmitPayload,
} from "@/refresh-pages/admin/ImageGenerationPage/forms/types";
import { ImageGenerationCredentials } from "@/refresh-pages/admin/ImageGenerationPage/svc";
import { ImageProvider } from "@/refresh-pages/admin/ImageGenerationPage/constants";

// OpenAI form values - just API key
interface OpenAIFormValues {
  api_key: string;
}

const initialValues: OpenAIFormValues = {
  api_key: "",
};

const validationSchema = Yup.object().shape({
  api_key: Yup.string().required("API Key is required"),
});

function OpenAIFormFields(props: ImageGenFormChildProps<OpenAIFormValues>) {
  const {
    apiStatus,
    showApiMessage,
    errorMessage,
    disabled,
    isLoadingCredentials,
    apiKeyOptions,
    resetApiState,
    imageProvider,
  } = props;

  return (
    <FormikField<string>
      name="api_key"
      render={(field, helper, meta, state) => (
        <FormField
          name="api_key"
          state={apiStatus === "error" ? "error" : state}
          className="w-full"
        >
          <FormField.Label>API Key</FormField.Label>
          <FormField.Control>
            {apiKeyOptions.length > 0 ? (
              <InputComboBox
                value={field.value}
                onChange={(e) => {
                  helper.setValue(e.target.value);
                  resetApiState();
                }}
                onValueChange={(value) => {
                  helper.setValue(value);
                  resetApiState();
                }}
                onBlur={field.onBlur}
                options={apiKeyOptions}
                placeholder={
                  isLoadingCredentials
                    ? "Loading..."
                    : "Enter new API key or select existing provider"
                }
                disabled={disabled}
                isError={apiStatus === "error"}
              />
            ) : (
              <PasswordInputTypeIn
                {...field}
                onChange={(e) => {
                  field.onChange(e);
                  resetApiState();
                }}
                placeholder={
                  isLoadingCredentials ? "Loading..." : "Enter your API key"
                }
                showClearButton={false}
                disabled={disabled}
                error={apiStatus === "error"}
              />
            )}
          </FormField.Control>
          {showApiMessage ? (
            <FormField.APIMessage
              state={apiStatus}
              messages={{
                loading: `Testing API key with ${imageProvider.title}...`,
                success: "API key is valid. Configuration saved.",
                error: errorMessage || "Invalid API key",
              }}
            />
          ) : (
            <FormField.Message
              messages={{
                idle: "Enter a new API key or select an existing provider.",
                error: meta.error,
              }}
            />
          )}
        </FormField>
      )}
    />
  );
}

function getInitialValuesFromCredentials(
  credentials: ImageGenerationCredentials,
  _imageProvider: ImageProvider
): Partial<OpenAIFormValues> {
  return {
    api_key: credentials.api_key || "",
  };
}

function transformValues(
  values: OpenAIFormValues,
  imageProvider: ImageProvider
): ImageGenSubmitPayload {
  return {
    modelName: imageProvider.model_name,
    imageProviderId: imageProvider.image_provider_id,
    provider: "openai",
    apiKey: values.api_key,
  };
}

export function OpenAIImageGenForm(props: ImageGenFormBaseProps) {
  const { imageProvider, existingConfig } = props;

  return (
    <ImageGenFormWrapper<OpenAIFormValues>
      {...props}
      title={
        existingConfig
          ? `Edit ${imageProvider.title}`
          : `Connect ${imageProvider.title}`
      }
      description={imageProvider.description}
      initialValues={initialValues}
      validationSchema={validationSchema}
      getInitialValuesFromCredentials={getInitialValuesFromCredentials}
      transformValues={(values) => transformValues(values, imageProvider)}
    >
      {(childProps) => <OpenAIFormFields {...childProps} />}
    </ImageGenFormWrapper>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/forms/VertexImageGenForm.tsx
================================================
"use client";

import * as Yup from "yup";
import { FormikField } from "@/refresh-components/form/FormikField";
import { FormField } from "@/refresh-components/form/FormField";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import InputFile from "@/refresh-components/inputs/InputFile";
import InlineExternalLink from "@/refresh-components/InlineExternalLink";
import { ImageGenFormWrapper } from "@/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper";
import {
  ImageGenFormBaseProps,
  ImageGenFormChildProps,
  ImageGenSubmitPayload,
} from "@/refresh-pages/admin/ImageGenerationPage/forms/types";
import { ImageProvider } from "@/refresh-pages/admin/ImageGenerationPage/constants";
import { ImageGenerationCredentials } from "@/refresh-pages/admin/ImageGenerationPage/svc";

const VERTEXAI_PROVIDER_NAME = "vertex_ai";
const VERTEXAI_DEFAULT_LOCATION = "global";

// Vertex form values
interface VertexImageGenFormValues {
  custom_config: {
    vertex_credentials: string;
    vertex_location: string;
  };
}

const initialValues: VertexImageGenFormValues = {
  custom_config: {
    vertex_credentials: "",
    vertex_location: VERTEXAI_DEFAULT_LOCATION,
  },
};

const validationSchema = Yup.object().shape({
  custom_config: Yup.object().shape({
    vertex_credentials: Yup.string().required("Credentials file is required"),
    vertex_location: Yup.string().required("Location is required"),
  }),
});

function getInitialValuesFromCredentials(
  credentials: ImageGenerationCredentials,
  _imageProvider: ImageProvider
): Partial<VertexImageGenFormValues> {
  return {
    custom_config: {
      vertex_credentials: credentials.custom_config?.vertex_credentials || "",
      vertex_location:
        credentials.custom_config?.vertex_location || VERTEXAI_DEFAULT_LOCATION,
    },
  };
}

function transformValues(
  values: VertexImageGenFormValues,
  imageProvider: ImageProvider
): ImageGenSubmitPayload {
  return {
    modelName: imageProvider.model_name,
    imageProviderId: imageProvider.image_provider_id,
    provider: VERTEXAI_PROVIDER_NAME,
    customConfig: {
      vertex_credentials: values.custom_config.vertex_credentials,
      vertex_location: values.custom_config.vertex_location,
    },
  };
}

function VertexFormFields(
  props: ImageGenFormChildProps<VertexImageGenFormValues>
) {
  const { apiStatus, showApiMessage, errorMessage, disabled, imageProvider } =
    props;

  return (
    <>
      {/* Credentials File field */}
      <FormikField<string>
        name="custom_config.vertex_credentials"
        render={(field, helper, meta, state) => (
          <FormField
            name="custom_config.vertex_credentials"
            state={apiStatus === "error" ? "error" : state}
            className="w-full"
          >
            <FormField.Label>Credentials File</FormField.Label>
            <FormField.Control>
              <InputFile
                setValue={(value) => helper.setValue(value)}
                error={apiStatus === "error"}
                onBlur={field.onBlur}
                showClearButton={true}
                disabled={disabled}
                accept="application/json"
                placeholder="Upload or paste your credentials"
              />
            </FormField.Control>
            {showApiMessage ? (
              <FormField.APIMessage
                state={apiStatus}
                messages={{
                  loading: `Testing credentials with ${imageProvider.title}...`,
                  success: "Credentials valid. Configuration saved.",
                  error: errorMessage || "Invalid credentials",
                }}
              />
            ) : (
              <FormField.Message
                messages={{
                  idle: (
                    <>
                      {"Upload or paste your "}
                      <InlineExternalLink href="https://console.cloud.google.com/projectselector2/iam-admin/serviceaccounts?supportedpurview=project">
                        service account credentials
                      </InlineExternalLink>
                      {" from Google Cloud."}
                    </>
                  ),
                  error: meta.error,
                }}
              />
            )}
          </FormField>
        )}
      />

      {/* Location field */}
      <FormikField<string>
        name="custom_config.vertex_location"
        render={(field, helper, meta, state) => (
          <FormField
            name="custom_config.vertex_location"
            state={state}
            className="w-full"
          >
            <FormField.Label>Location</FormField.Label>
            <FormField.Control>
              <InputTypeIn
                value={field.value}
                onChange={(e) => helper.setValue(e.target.value)}
                onBlur={field.onBlur}
                placeholder="global"
                showClearButton={false}
                variant={disabled ? "disabled" : undefined}
              />
            </FormField.Control>
            <FormField.Message
              messages={{
                idle: (
                  <>
                    {"The Google Cloud region for your Vertex AI models. See "}
                    <InlineExternalLink href="https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations">
                      Google&apos;s documentation
                    </InlineExternalLink>
                    {" for available regions."}
                  </>
                ),
                error: meta.error,
              }}
            />
          </FormField>
        )}
      />
    </>
  );
}

export function VertexImageGenForm(props: ImageGenFormBaseProps) {
  const { imageProvider, existingConfig } = props;

  return (
    <ImageGenFormWrapper<VertexImageGenFormValues>
      {...props}
      title={
        existingConfig
          ? `Edit ${imageProvider.title}`
          : `Connect ${imageProvider.title}`
      }
      description={imageProvider.description}
      initialValues={initialValues}
      validationSchema={validationSchema}
      getInitialValuesFromCredentials={getInitialValuesFromCredentials}
      transformValues={(values) => transformValues(values, imageProvider)}
    >
      {(childProps) => <VertexFormFields {...childProps} />}
    </ImageGenFormWrapper>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/forms/getImageGenForm.tsx
================================================
import React from "react";
import { ImageGenFormBaseProps } from "@/refresh-pages/admin/ImageGenerationPage/forms/types";
import { OpenAIImageGenForm } from "@/refresh-pages/admin/ImageGenerationPage/forms/OpenAIImageGenForm";
import { AzureImageGenForm } from "@/refresh-pages/admin/ImageGenerationPage/forms/AzureImageGenForm";
import { VertexImageGenForm } from "@/refresh-pages/admin/ImageGenerationPage/forms/VertexImageGenForm";

/**
 * Factory function that routes to the correct provider-specific form
 * based on the imageProvider.provider_name.
 */
export function getImageGenForm(props: ImageGenFormBaseProps): React.ReactNode {
  const providerName = props.imageProvider.provider_name;

  switch (providerName) {
    case "openai":
      return <OpenAIImageGenForm {...props} />;
    case "azure":
      return <AzureImageGenForm {...props} />;
    case "vertex_ai":
      return <VertexImageGenForm {...props} />;
    default:
      // Fallback to OpenAI form for unknown providers
      console.warn(
        `Unknown image provider: ${providerName}, falling back to OpenAI form`
      );
      return <OpenAIImageGenForm {...props} />;
  }
}


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/forms/index.ts
================================================
export * from "@/refresh-pages/admin/ImageGenerationPage/forms/types";
export { ImageGenFormWrapper } from "@/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper";
export { OpenAIImageGenForm } from "@/refresh-pages/admin/ImageGenerationPage/forms/OpenAIImageGenForm";
export { AzureImageGenForm } from "@/refresh-pages/admin/ImageGenerationPage/forms/AzureImageGenForm";
export { getImageGenForm } from "@/refresh-pages/admin/ImageGenerationPage/forms/getImageGenForm";


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/forms/types.ts
================================================
import { FormikProps } from "formik";
import { ImageProvider } from "@/refresh-pages/admin/ImageGenerationPage/constants";
import { LLMProviderView } from "@/interfaces/llm";
import {
  ImageGenerationConfigView,
  ImageGenerationCredentials,
} from "@/refresh-pages/admin/ImageGenerationPage/svc";
import { ModalCreationInterface } from "@/refresh-components/contexts/ModalContext";
import { APIFormFieldState } from "@/refresh-components/form/types";

// Base props for all image generation forms
export interface ImageGenFormBaseProps {
  modal: ModalCreationInterface;
  imageProvider: ImageProvider;
  existingProviders: LLMProviderView[];
  existingConfig?: ImageGenerationConfigView;
  onSuccess: () => void;
}

// Base type for form values - allows any object structure
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export type FormValues = Record<string, any>;

// Props for the generic wrapper component
export interface ImageGenFormWrapperProps<T extends FormValues>
  extends ImageGenFormBaseProps {
  title: string;
  description: string;
  initialValues: T;
  validationSchema: unknown;
  children: (props: ImageGenFormChildProps<T>) => React.ReactNode;
  transformValues?: (values: T) => ImageGenSubmitPayload;
  getInitialValuesFromCredentials?: (
    credentials: ImageGenerationCredentials,
    imageProvider: ImageProvider
  ) => Partial<T>;
}

// Props passed to form field children
export interface ImageGenFormChildProps<T extends FormValues> {
  formikProps: FormikProps<T>;
  apiStatus: APIFormFieldState;
  setApiStatus: (status: APIFormFieldState) => void;
  showApiMessage: boolean;
  setShowApiMessage: (show: boolean) => void;
  errorMessage: string;
  setErrorMessage: (message: string) => void;
  isSubmitting: boolean;
  disabled: boolean;
  isEditMode: boolean;
  isLoadingCredentials: boolean;
  apiKeyOptions: { value: string; label: string }[];
  resetApiState: () => void;
  imageProvider: ImageProvider;
}

// Payload for submitting image generation config
export interface ImageGenSubmitPayload {
  modelName: string;
  imageProviderId: string;
  isDefault?: boolean;

  // Clone mode - reuse credentials from existing LLM provider
  sourceLlmProviderId?: number;

  // New credentials mode
  provider?: string;
  apiKey?: string;
  apiBase?: string;
  apiVersion?: string;
  deploymentName?: string;
  customConfig?: Record<string, string>;
}


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/index.tsx
================================================
"use client";

import * as SettingsLayouts from "@/layouts/settings-layouts";
import ImageGenerationContent from "@/refresh-pages/admin/ImageGenerationPage/ImageGenerationContent";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

const route = ADMIN_ROUTES.IMAGE_GENERATION;

export default function ImageGenerationPage() {
  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title={route.title}
        description="Settings for in-chat image generation."
        separator
      />
      <SettingsLayouts.Body>
        <ImageGenerationContent />
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ImageGenerationPage/svc.ts
================================================
/**
 * Image Generation Configuration Service
 * API functions for managing image generation configurations
 */

// Types
export interface ImageGenerationConfigView {
  image_provider_id: string; // Primary key
  model_configuration_id: number;
  model_name: string;
  llm_provider_id: number;
  llm_provider_name: string;
  is_default: boolean;
}

export interface TestApiKeyResult {
  ok: boolean;
  errorMessage?: string;
}

export interface ImageGenerationCredentials {
  api_key: string | null;
  api_base: string | null;
  api_version: string | null;
  deployment_name: string | null;
  custom_config: Record<string, string> | null;
}

// Creation options - either clone from existing provider or use new credentials
export interface ImageGenerationConfigCreateOptions {
  imageProviderId: string;
  modelName: string;
  isDefault?: boolean;

  // Option 1: Clone mode - use credentials from existing provider
  sourceLlmProviderId?: number;

  // Option 2: New credentials mode
  provider?: string;
  apiKey?: string;
  apiBase?: string;
  apiVersion?: string;
  deploymentName?: string;
  customConfig?: Record<string, string>;
}

// API Endpoints
const IMAGE_GEN_CONFIG_URL = "/api/admin/image-generation/config";
const IMAGE_GEN_TEST_URL = "/api/admin/image-generation/test";

/**
 * Test API key for image generation provider
 *
 * Two modes:
 * 1. Direct: provider + apiKey provided
 * 2. From existing provider: sourceLlmProviderId provided (backend fetches API key)
 */
export async function testImageGenerationApiKey(
  modelName: string,
  options: {
    // Option 1: Direct API key
    provider?: string;
    apiKey?: string;
    // Option 2: Use existing provider
    sourceLlmProviderId?: number;
    // Additional fields
    apiBase?: string;
    apiVersion?: string;
    deploymentName?: string;
    customConfig?: Record<string, string>;
  }
): Promise<TestApiKeyResult> {
  try {
    const response = await fetch(IMAGE_GEN_TEST_URL, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({
        model_name: modelName,
        provider: options.provider || null,
        api_key: options.apiKey || null,
        source_llm_provider_id: options.sourceLlmProviderId || null,
        api_base: options.apiBase || null,
        api_version: options.apiVersion || null,
        deployment_name: options.deploymentName || null,
        custom_config: options.customConfig || null,
      }),
    });

    if (!response.ok) {
      const error = await response.json();
      return {
        ok: false,
        errorMessage: error.detail || "API key validation failed",
      };
    }

    return { ok: true };
  } catch (error) {
    return {
      ok: false,
      errorMessage:
        error instanceof Error ? error.message : "An error occurred",
    };
  }
}

/**
 * Fetch all image generation configurations
 */
export async function fetchImageGenerationConfigs(): Promise<
  ImageGenerationConfigView[]
> {
  const response = await fetch(IMAGE_GEN_CONFIG_URL);
  if (!response.ok) {
    throw new Error("Failed to fetch image generation configs");
  }
  return response.json();
}

/**
 * Fetch credentials for an image generation config (for edit mode)
 */
export async function fetchImageGenerationCredentials(
  imageProviderId: string
): Promise<ImageGenerationCredentials> {
  const response = await fetch(
    `${IMAGE_GEN_CONFIG_URL}/${imageProviderId}/credentials`
  );
  if (!response.ok) {
    throw new Error("Failed to fetch credentials");
  }
  return response.json();
}

/**
 * Create image generation configuration
 * Backend creates new LLM provider + model config + image config
 */
export async function createImageGenerationConfig(
  options: ImageGenerationConfigCreateOptions
): Promise<ImageGenerationConfigView> {
  const response = await fetch(IMAGE_GEN_CONFIG_URL, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      image_provider_id: options.imageProviderId,
      model_name: options.modelName,
      is_default: options.isDefault ?? false,
      // Clone mode
      source_llm_provider_id: options.sourceLlmProviderId,
      // New credentials mode
      provider: options.provider,
      api_key: options.apiKey,
      api_base: options.apiBase,
      api_version: options.apiVersion,
      deployment_name: options.deploymentName,
      custom_config: options.customConfig,
    }),
  });

  if (!response.ok) {
    const error = await response.json();
    throw new Error(error.detail || "Failed to create config");
  }

  return response.json();
}

// Update options - same structure but without isDefault
export interface ImageGenerationConfigUpdateOptions {
  modelName: string;

  // Option 1: Clone mode - use credentials from existing provider
  sourceLlmProviderId?: number;

  // Option 2: New credentials mode
  provider?: string;
  apiKey?: string;
  apiBase?: string;
  apiVersion?: string;
  deploymentName?: string;
  customConfig?: Record<string, string>;

  // If true, apiKey was changed by user; if false, backend preserves existing key
  apiKeyChanged?: boolean;
}

/**
 * Update image generation configuration
 * Backend deletes old LLM provider and creates new one
 */
export async function updateImageGenerationConfig(
  imageProviderId: string,
  options: ImageGenerationConfigUpdateOptions
): Promise<ImageGenerationConfigView> {
  const response = await fetch(`${IMAGE_GEN_CONFIG_URL}/${imageProviderId}`, {
    method: "PUT",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model_name: options.modelName,
      // Clone mode
      source_llm_provider_id: options.sourceLlmProviderId,
      // New credentials mode
      provider: options.provider,
      api_key: options.apiKey,
      api_base: options.apiBase,
      api_version: options.apiVersion,
      deployment_name: options.deploymentName,
      custom_config: options.customConfig,
      // If false, backend preserves existing API key
      api_key_changed: options.apiKeyChanged ?? false,
    }),
  });

  if (!response.ok) {
    const error = await response.json();
    throw new Error(error.detail || "Failed to update config");
  }

  return response.json();
}

/**
 * Set image generation config as default
 */
export async function setDefaultImageGenerationConfig(
  imageProviderId: string
): Promise<void> {
  const response = await fetch(
    `${IMAGE_GEN_CONFIG_URL}/${imageProviderId}/default`,
    {
      method: "POST",
    }
  );

  if (!response.ok) {
    const error = await response.json();
    throw new Error(error.detail || "Failed to set default");
  }
}

/**
 * Unset image generation config as default
 */
export async function unsetDefaultImageGenerationConfig(
  imageProviderId: string
): Promise<void> {
  const response = await fetch(
    `${IMAGE_GEN_CONFIG_URL}/${imageProviderId}/default`,
    {
      method: "DELETE",
    }
  );

  if (!response.ok) {
    const error = await response.json();
    throw new Error(error.detail || "Failed to unset default");
  }
}

/**
 * Delete image generation configuration
 */
export async function deleteImageGenerationConfig(
  imageProviderId: string
): Promise<void> {
  const response = await fetch(`${IMAGE_GEN_CONFIG_URL}/${imageProviderId}`, {
    method: "DELETE",
  });

  if (!response.ok) {
    const error = await response.json();
    throw new Error(error.detail || "Failed to delete config");
  }
}


================================================
FILE: web/src/refresh-pages/admin/LLMConfigurationPage.tsx
================================================
"use client";

import { useState } from "react";
import { useSWRConfig } from "swr";
import { toast } from "@/hooks/useToast";
import {
  useAdminLLMProviders,
  useWellKnownLLMProviders,
} from "@/hooks/useLLMProviders";
import { ThreeDotsLoader } from "@/components/Loading";
import { Content, CardHeaderLayout } from "@opal/layouts";
import { Button, SelectCard } from "@opal/components";
import { Hoverable } from "@opal/core";
import { SvgArrowExchange, SvgSettings, SvgTrash } from "@opal/icons";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import * as GeneralLayouts from "@/layouts/general-layouts";
import {
  getProviderDisplayName,
  getProviderIcon,
  getProviderProductName,
} from "@/lib/llmConfig/providers";
import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
import { deleteLlmProvider, setDefaultLlmModel } from "@/lib/llmConfig/svc";
import Text from "@/refresh-components/texts/Text";
import { Horizontal as HorizontalInput } from "@/layouts/input-layouts";
import Card from "@/refresh-components/cards/Card";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import Message from "@/refresh-components/messages/Message";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import Separator from "@/refresh-components/Separator";
import {
  LLMProviderView,
  WellKnownLLMProviderDescriptor,
} from "@/interfaces/llm";
import { getModalForExistingProvider } from "@/sections/modals/llmConfig/getModal";
import OpenAIModal from "@/sections/modals/llmConfig/OpenAIModal";
import AnthropicModal from "@/sections/modals/llmConfig/AnthropicModal";
import OllamaModal from "@/sections/modals/llmConfig/OllamaModal";
import AzureModal from "@/sections/modals/llmConfig/AzureModal";
import BedrockModal from "@/sections/modals/llmConfig/BedrockModal";
import VertexAIModal from "@/sections/modals/llmConfig/VertexAIModal";
import OpenRouterModal from "@/sections/modals/llmConfig/OpenRouterModal";
import CustomModal from "@/sections/modals/llmConfig/CustomModal";
import LMStudioForm from "@/sections/modals/llmConfig/LMStudioForm";
import LiteLLMProxyModal from "@/sections/modals/llmConfig/LiteLLMProxyModal";
import BifrostModal from "@/sections/modals/llmConfig/BifrostModal";
import { Section } from "@/layouts/general-layouts";

const route = ADMIN_ROUTES.LLM_MODELS;

// ============================================================================
// Provider form mapping (keyed by provider name from the API)
// ============================================================================

// Client-side ordering for the "Add Provider" cards. The backend may return
// wellKnownLLMProviders in an arbitrary order, so we sort explicitly here.
const PROVIDER_DISPLAY_ORDER: string[] = [
  "openai",
  "anthropic",
  "vertex_ai",
  "bedrock",
  "azure",
  "litellm_proxy",
  "ollama_chat",
  "openrouter",
  "lm_studio",
  "bifrost",
];

const PROVIDER_MODAL_MAP: Record<
  string,
  (
    shouldMarkAsDefault: boolean,
    open: boolean,
    onOpenChange: (open: boolean) => void
  ) => React.ReactNode
> = {
  openai: (d, open, onOpenChange) => (
    <OpenAIModal
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
  anthropic: (d, open, onOpenChange) => (
    <AnthropicModal
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
  ollama_chat: (d, open, onOpenChange) => (
    <OllamaModal
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
  azure: (d, open, onOpenChange) => (
    <AzureModal
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
  bedrock: (d, open, onOpenChange) => (
    <BedrockModal
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
  vertex_ai: (d, open, onOpenChange) => (
    <VertexAIModal
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
  openrouter: (d, open, onOpenChange) => (
    <OpenRouterModal
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
  lm_studio: (d, open, onOpenChange) => (
    <LMStudioForm
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
  litellm_proxy: (d, open, onOpenChange) => (
    <LiteLLMProxyModal
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
  bifrost: (d, open, onOpenChange) => (
    <BifrostModal
      shouldMarkAsDefault={d}
      open={open}
      onOpenChange={onOpenChange}
    />
  ),
};

// ============================================================================
// ExistingProviderCard — card for configured (existing) providers
// ============================================================================

interface ExistingProviderCardProps {
  provider: LLMProviderView;
  isDefault: boolean;
  isLastProvider: boolean;
  defaultModelName?: string;
}

function ExistingProviderCard({
  provider,
  isDefault,
  isLastProvider,
  defaultModelName,
}: ExistingProviderCardProps) {
  const { mutate } = useSWRConfig();
  const [isOpen, setIsOpen] = useState(false);
  const deleteModal = useCreateModal();

  const handleDelete = async () => {
    try {
      await deleteLlmProvider(provider.id);
      await refreshLlmProviderCaches(mutate);
      deleteModal.toggle(false);
      toast.success("Provider deleted successfully!");
    } catch (e) {
      const message = e instanceof Error ? e.message : "Unknown error";
      toast.error(`Failed to delete provider: ${message}`);
    }
  };

  return (
    <>
      {deleteModal.isOpen && (
        <ConfirmationModalLayout
          icon={SvgTrash}
          title={`Delete ${provider.name}`}
          onClose={() => deleteModal.toggle(false)}
          submit={
            <Button variant="danger" onClick={handleDelete}>
              Delete
            </Button>
          }
        >
          <Section alignItems="start" gap={0.5}>
            <Text text03>
              All LLM models from provider <b>{provider.name}</b> will be
              removed and unavailable for future chats. Chat history will be
              preserved.
            </Text>
            {isLastProvider && (
              <Text text03>
                Connect another provider to continue using chats.
              </Text>
            )}
          </Section>
        </ConfirmationModalLayout>
      )}

      <Hoverable.Root group="ExistingProviderCard">
        <SelectCard
          state="filled"
          padding="sm"
          rounding="lg"
          onClick={() => setIsOpen(true)}
        >
          <CardHeaderLayout
            icon={getProviderIcon(provider.provider)}
            title={provider.name}
            description={getProviderDisplayName(provider.provider)}
            sizePreset="main-ui"
            variant="section"
            tag={isDefault ? { title: "Default", color: "blue" } : undefined}
            rightChildren={
              <div className="flex flex-row">
                <Hoverable.Item
                  group="ExistingProviderCard"
                  variant="opacity-on-hover"
                >
                  <Button
                    icon={SvgTrash}
                    prominence="tertiary"
                    aria-label={`Delete ${provider.name}`}
                    onClick={(e) => {
                      e.stopPropagation();
                      deleteModal.toggle(true);
                    }}
                  />
                </Hoverable.Item>
                <Button
                  icon={SvgSettings}
                  prominence="tertiary"
                  aria-label={`Edit ${provider.name}`}
                  onClick={(e) => {
                    e.stopPropagation();
                    setIsOpen(true);
                  }}
                />
              </div>
            }
          />
          {getModalForExistingProvider(
            provider,
            isOpen,
            setIsOpen,
            defaultModelName
          )}
        </SelectCard>
      </Hoverable.Root>
    </>
  );
}

// ============================================================================
// NewProviderCard — card for the "Add Provider" list
// ============================================================================

interface NewProviderCardProps {
  provider: WellKnownLLMProviderDescriptor;
  isFirstProvider: boolean;
  formFn: (
    shouldMarkAsDefault: boolean,
    open: boolean,
    onOpenChange: (open: boolean) => void
  ) => React.ReactNode;
}

function NewProviderCard({
  provider,
  isFirstProvider,
  formFn,
}: NewProviderCardProps) {
  const [isOpen, setIsOpen] = useState(false);

  return (
    <SelectCard
      state="empty"
      padding="sm"
      rounding="lg"
      onClick={() => setIsOpen(true)}
    >
      <CardHeaderLayout
        icon={getProviderIcon(provider.name)}
        title={getProviderProductName(provider.name)}
        description={getProviderDisplayName(provider.name)}
        sizePreset="main-ui"
        variant="section"
        rightChildren={
          <Button
            rightIcon={SvgArrowExchange}
            prominence="tertiary"
            onClick={(e) => {
              e.stopPropagation();
              setIsOpen(true);
            }}
          >
            Connect
          </Button>
        }
      />
      {formFn(isFirstProvider, isOpen, setIsOpen)}
    </SelectCard>
  );
}

// ============================================================================
// NewCustomProviderCard — card for adding a custom LLM provider
// ============================================================================

interface NewCustomProviderCardProps {
  isFirstProvider: boolean;
}

function NewCustomProviderCard({
  isFirstProvider,
}: NewCustomProviderCardProps) {
  const [isOpen, setIsOpen] = useState(false);

  return (
    <SelectCard
      state="empty"
      padding="sm"
      rounding="lg"
      onClick={() => setIsOpen(true)}
    >
      <CardHeaderLayout
        icon={getProviderIcon("custom")}
        title={getProviderProductName("custom")}
        description={getProviderDisplayName("custom")}
        sizePreset="main-ui"
        variant="section"
        rightChildren={
          <Button
            rightIcon={SvgArrowExchange}
            prominence="tertiary"
            onClick={(e) => {
              e.stopPropagation();
              setIsOpen(true);
            }}
          >
            Set Up
          </Button>
        }
      />
      <CustomModal
        shouldMarkAsDefault={isFirstProvider}
        open={isOpen}
        onOpenChange={setIsOpen}
      />
    </SelectCard>
  );
}

// ============================================================================
// LLMConfigurationPage — main page component
// ============================================================================

export default function LLMConfigurationPage() {
  const { mutate } = useSWRConfig();
  const { llmProviders: existingLlmProviders, defaultText } =
    useAdminLLMProviders();
  const { wellKnownLLMProviders } = useWellKnownLLMProviders();

  if (!existingLlmProviders) {
    return <ThreeDotsLoader />;
  }

  const hasProviders = existingLlmProviders.length > 0;
  const isFirstProvider = !hasProviders;

  // Pre-sort providers so the default appears first
  const sortedProviders = [...existingLlmProviders].sort((a, b) => {
    const aIsDefault = defaultText?.provider_id === a.id;
    const bIsDefault = defaultText?.provider_id === b.id;
    if (aIsDefault && !bIsDefault) return -1;
    if (!aIsDefault && bIsDefault) return 1;
    return 0;
  });

  // Pre-filter to providers that have at least one visible model
  const providersWithVisibleModels = existingLlmProviders
    .map((provider) => ({
      provider,
      visibleModels: provider.model_configurations.filter((m) => m.is_visible),
    }))
    .filter(({ visibleModels }) => visibleModels.length > 0);

  // Default model logic — use the global default from the API response
  const currentDefaultValue = defaultText
    ? `${defaultText.provider_id}:${defaultText.model_name}`
    : undefined;

  async function handleDefaultModelChange(compositeValue: string) {
    const separatorIndex = compositeValue.indexOf(":");
    const providerId = Number(compositeValue.slice(0, separatorIndex));
    const modelName = compositeValue.slice(separatorIndex + 1);

    try {
      await setDefaultLlmModel(providerId, modelName);
      await refreshLlmProviderCaches(mutate);
      toast.success("Default model updated successfully!");
    } catch (e) {
      const message = e instanceof Error ? e.message : "Unknown error";
      toast.error(`Failed to set default model: ${message}`);
    }
  }

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />

      <SettingsLayouts.Body>
        {hasProviders ? (
          <Card>
            <HorizontalInput
              title="Default Model"
              description="This model will be used by Onyx by default in your chats."
              nonInteractive
              center
            >
              <InputSelect
                value={currentDefaultValue}
                onValueChange={handleDefaultModelChange}
              >
                <InputSelect.Trigger placeholder="Select a default model" />
                <InputSelect.Content>
                  {providersWithVisibleModels.map(
                    ({ provider, visibleModels }) => (
                      <InputSelect.Group key={provider.id}>
                        <InputSelect.Label>{provider.name}</InputSelect.Label>
                        {visibleModels.map((model) => (
                          <InputSelect.Item
                            key={`${provider.id}:${model.name}`}
                            value={`${provider.id}:${model.name}`}
                          >
                            {model.display_name || model.name}
                          </InputSelect.Item>
                        ))}
                      </InputSelect.Group>
                    )
                  )}
                </InputSelect.Content>
              </InputSelect>
            </HorizontalInput>
          </Card>
        ) : (
          <Message
            info
            large
            icon
            close={false}
            text="Set up an LLM provider to start chatting."
            className="w-full"
          />
        )}

        {/* ── Available Providers (only when providers exist) ── */}
        {hasProviders && (
          <>
            <GeneralLayouts.Section
              gap={0.75}
              height="fit"
              alignItems="stretch"
              justifyContent="start"
            >
              <Content
                title="Available Providers"
                sizePreset="main-content"
                variant="section"
              />

              <div className="flex flex-col gap-2">
                {sortedProviders.map((provider) => (
                  <ExistingProviderCard
                    key={provider.id}
                    provider={provider}
                    isDefault={defaultText?.provider_id === provider.id}
                    isLastProvider={sortedProviders.length === 1}
                    defaultModelName={
                      defaultText?.provider_id === provider.id
                        ? defaultText.model_name
                        : undefined
                    }
                  />
                ))}
              </div>
            </GeneralLayouts.Section>

            <Separator noPadding />
          </>
        )}

        {/* ── Add Provider (always visible) ── */}
        <GeneralLayouts.Section
          gap={0.75}
          height="fit"
          alignItems="stretch"
          justifyContent="start"
        >
          <Content
            title="Add Provider"
            description="Onyx supports both popular providers and self-hosted models."
            sizePreset="main-content"
            variant="section"
          />

          <div className="grid grid-cols-2 gap-2">
            {[...(wellKnownLLMProviders ?? [])]
              .sort((a, b) => {
                const aIndex = PROVIDER_DISPLAY_ORDER.indexOf(a.name);
                const bIndex = PROVIDER_DISPLAY_ORDER.indexOf(b.name);
                return (
                  (aIndex === -1 ? Infinity : aIndex) -
                  (bIndex === -1 ? Infinity : bIndex)
                );
              })
              .map((provider) => {
                const formFn = PROVIDER_MODAL_MAP[provider.name];
                if (!formFn) {
                  toast.error(
                    `No modal mapping for provider "${provider.name}".`
                  );
                  return null;
                }
                return (
                  <NewProviderCard
                    key={provider.name}
                    provider={provider}
                    isFirstProvider={isFirstProvider}
                    formFn={formFn}
                  />
                );
              })}
            <NewCustomProviderCard isFirstProvider={isFirstProvider} />
          </div>
        </GeneralLayouts.Section>
      </SettingsLayouts.Body>
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ServiceAccountsPage/ApiKeyFormModal.tsx
================================================
"use client";

import { Form, Formik } from "formik";
import { toast } from "@/hooks/useToast";
import {
  createApiKey,
  updateApiKey,
} from "@/refresh-pages/admin/ServiceAccountsPage/svc";
import type { APIKey } from "@/refresh-pages/admin/ServiceAccountsPage/interfaces";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import { FormikField } from "@/refresh-components/form/FormikField";
import { Vertical as VerticalInput } from "@/layouts/input-layouts";
import { USER_ROLE_LABELS, UserRole } from "@/lib/types";
import { SvgKey, SvgLock, SvgUser, SvgUserManage } from "@opal/icons";

interface ApiKeyFormModalProps {
  onClose: () => void;
  onCreateApiKey: (apiKey: APIKey) => void;
  apiKey?: APIKey;
}

export default function ApiKeyFormModal({
  onClose,
  onCreateApiKey,
  apiKey,
}: ApiKeyFormModalProps) {
  const isUpdate = apiKey !== undefined;

  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="sm" height="lg">
        <Modal.Header
          icon={SvgKey}
          title={isUpdate ? "Update Service Account" : "Create Service Account"}
          description={
            isUpdate
              ? undefined
              : "Use service account API key to programmatically access Onyx API with user-level permissions. You can modify the account details later."
          }
          onClose={onClose}
        />
        <Formik
          initialValues={{
            name: apiKey?.api_key_name || "",
            role: apiKey?.api_key_role || UserRole.BASIC.toString(),
          }}
          onSubmit={async (values, formikHelpers) => {
            formikHelpers.setSubmitting(true);

            const payload = {
              ...values,
              role: values.role as UserRole,
            };

            try {
              let response;
              if (isUpdate) {
                response = await updateApiKey(apiKey.api_key_id, payload);
              } else {
                response = await createApiKey(payload);
              }
              if (response.ok) {
                toast.success(
                  isUpdate
                    ? "Successfully updated service account!"
                    : "Successfully created service account!"
                );
                if (!isUpdate) {
                  onCreateApiKey(await response.json());
                }
                onClose();
              } else {
                const responseJson = await response.json();
                const errorMsg = responseJson.detail || responseJson.message;
                toast.error(
                  isUpdate
                    ? `Error updating service account - ${errorMsg}`
                    : `Error creating service account - ${errorMsg}`
                );
              }
            } catch (e) {
              toast.error(
                e instanceof Error ? e.message : "An unexpected error occurred."
              );
            } finally {
              formikHelpers.setSubmitting(false);
            }
          }}
        >
          {({ isSubmitting, values }) => (
            <Form className="w-full overflow-visible">
              <Modal.Body>
                <VerticalInput
                  name="name"
                  title="Name"
                  nonInteractive
                  sizePreset="main-ui"
                >
                  <FormikField<string>
                    name="name"
                    render={(field, helper) => (
                      <InputTypeIn
                        {...field}
                        placeholder="Enter a name"
                        onClear={() => helper.setValue("")}
                        showClearButton={false}
                      />
                    )}
                  />
                </VerticalInput>

                <VerticalInput
                  name="role"
                  title="Account Permissions"
                  nonInteractive
                  sizePreset="main-ui"
                >
                  <FormikField<string>
                    name="role"
                    render={(field, helper) => (
                      <InputSelect
                        value={field.value}
                        onValueChange={(value) => helper.setValue(value)}
                      >
                        <InputSelect.Trigger placeholder="Select permissions" />
                        <InputSelect.Content>
                          <InputSelect.Item
                            value={UserRole.ADMIN.toString()}
                            icon={SvgUserManage}
                            description="Unrestricted admin access to all endpoints."
                          >
                            {USER_ROLE_LABELS[UserRole.ADMIN]}
                          </InputSelect.Item>
                          <InputSelect.Item
                            value={UserRole.BASIC.toString()}
                            icon={SvgUser}
                            description="Standard user-level access to non-admin endpoints."
                          >
                            {USER_ROLE_LABELS[UserRole.BASIC]}
                          </InputSelect.Item>
                          <InputSelect.Item
                            value={UserRole.LIMITED.toString()}
                            icon={SvgLock}
                            description="For agents: chat posting and read-only access to other endpoints."
                          >
                            {USER_ROLE_LABELS[UserRole.LIMITED]}
                          </InputSelect.Item>
                        </InputSelect.Content>
                      </InputSelect>
                    )}
                  />
                </VerticalInput>
              </Modal.Body>

              <Modal.Footer>
                <Button prominence="secondary" type="button" onClick={onClose}>
                  Cancel
                </Button>
                <Button
                  disabled={isSubmitting || !values.name.trim()}
                  type="submit"
                >
                  {isUpdate ? "Update" : "Create Account"}
                </Button>
              </Modal.Footer>
            </Form>
          )}
        </Formik>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ServiceAccountsPage/index.tsx
================================================
"use client";

import { useMemo, useState } from "react";
import useSWR, { mutate } from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { toast } from "@/hooks/useToast";
import { Button, Text } from "@opal/components";
import { Content, IllustrationContent } from "@opal/layouts";
import SvgNoResult from "@opal/illustrations/no-result";
import {
  SvgDownload,
  SvgKey,
  SvgLock,
  SvgMoreHorizontal,
  SvgRefreshCw,
  SvgTrash,
  SvgUser,
  SvgUserEdit,
  SvgUserKey,
  SvgUserManage,
} from "@opal/icons";
import { USER_ROLE_LABELS, UserRole } from "@/lib/types";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import AdminListHeader from "@/sections/admin/AdminListHeader";
import Modal, { BasicModalFooter } from "@/refresh-components/Modal";
import Code from "@/refresh-components/Code";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { markdown } from "@opal/utils";
import Message from "@/refresh-components/messages/Message";

import { useBillingInformation } from "@/hooks/useBillingInformation";
import { BillingStatus, hasActiveSubscription } from "@/lib/billing/interfaces";
import {
  deleteApiKey,
  regenerateApiKey,
  updateApiKey,
} from "@/refresh-pages/admin/ServiceAccountsPage/svc";
import type { APIKey } from "@/refresh-pages/admin/ServiceAccountsPage/interfaces";
import { DISCORD_SERVICE_API_KEY_NAME } from "@/refresh-pages/admin/ServiceAccountsPage/interfaces";
import ApiKeyFormModal from "@/refresh-pages/admin/ServiceAccountsPage/ApiKeyFormModal";
import { Table } from "@opal/components";
import { createTableColumns } from "@opal/components/table/columns";
import { Section } from "@/layouts/general-layouts";

const API_KEY_SWR_KEY = SWR_KEYS.adminApiKeys;
const route = ADMIN_ROUTES.API_KEYS;

const tc = createTableColumns<APIKey>();

// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------

export default function ServiceAccountsPage() {
  const {
    data: apiKeys,
    isLoading,
    error,
  } = useSWR<APIKey[]>(API_KEY_SWR_KEY, errorHandlingFetcher);

  const { data: billingData } = useBillingInformation();
  const isTrialing =
    billingData !== undefined &&
    hasActiveSubscription(billingData) &&
    billingData.status === BillingStatus.TRIALING;

  const [fullApiKey, setFullApiKey] = useState<string | null>(null);
  const [showCreateUpdateForm, setShowCreateUpdateForm] = useState(false);
  const [selectedApiKey, setSelectedApiKey] = useState<APIKey | undefined>();
  const [search, setSearch] = useState("");
  const [regenerateTarget, setRegenerateTarget] = useState<APIKey | null>(null);
  const [deleteTarget, setDeleteTarget] = useState<APIKey | null>(null);

  const visibleApiKeys = (apiKeys ?? []).filter(
    (key) => key.api_key_name !== DISCORD_SERVICE_API_KEY_NAME
  );

  const filteredApiKeys = visibleApiKeys.filter(
    (key) =>
      !search ||
      (key.api_key_name ?? "").toLowerCase().includes(search.toLowerCase()) ||
      key.api_key_display.toLowerCase().includes(search.toLowerCase())
  );

  const handleRoleChange = async (apiKey: APIKey, newRole: UserRole) => {
    try {
      const response = await updateApiKey(apiKey.api_key_id, {
        name: apiKey.api_key_name ?? undefined,
        role: newRole,
      });
      if (!response.ok) {
        const errorMsg = await response.text();
        toast.error(`Failed to update role: ${errorMsg}`);
        return;
      }
      mutate(API_KEY_SWR_KEY);
      toast.success("Role updated.");
    } catch {
      toast.error("Failed to update role.");
    }
  };

  const handleRegenerate = async (apiKey: APIKey) => {
    try {
      const response = await regenerateApiKey(apiKey);
      if (!response.ok) {
        const errorMsg = await response.text();
        toast.error(`Failed to regenerate API Key: ${errorMsg}`);
        return;
      }
      const newKey = (await response.json()) as APIKey;
      setFullApiKey(newKey.api_key);
      mutate(API_KEY_SWR_KEY);
    } catch (e) {
      toast.error(
        e instanceof Error ? e.message : "Failed to regenerate API Key."
      );
    }
  };

  const handleDelete = async (apiKey: APIKey) => {
    try {
      const response = await deleteApiKey(apiKey.api_key_id);
      if (!response.ok) {
        const errorMsg = await response.text();
        toast.error(`Failed to delete API Key: ${errorMsg}`);
        return;
      }
      mutate(API_KEY_SWR_KEY);
    } catch (e) {
      toast.error(e instanceof Error ? e.message : "Failed to delete API Key.");
    }
  };

  const columns = useMemo(
    () => [
      tc.qualifier({
        content: "icon",
        getContent: () => SvgUserKey,
      }),
      tc.column("api_key_name", {
        header: "Name",
        weight: 25,
        cell: (value) => (
          <Content
            title={value || "Unnamed"}
            sizePreset="main-ui"
            variant="body"
          />
        ),
      }),
      tc.column("api_key_display", {
        header: "API Key",
        weight: 30,
        cell: (value) => (
          <Text font="secondary-mono" color="text-03">
            {value}
          </Text>
        ),
      }),
      tc.displayColumn({
        id: "account_type",
        header: "Account Type",
        width: { weight: 25, minWidth: 160 },
        cell: (row) => (
          <InputSelect
            value={row.api_key_role}
            onValueChange={(value) => handleRoleChange(row, value as UserRole)}
          >
            <InputSelect.Trigger />
            <InputSelect.Content>
              <InputSelect.Item
                value={UserRole.ADMIN.toString()}
                icon={SvgUserManage}
                description="Unrestricted admin access to all endpoints."
              >
                {USER_ROLE_LABELS[UserRole.ADMIN]}
              </InputSelect.Item>
              <InputSelect.Item
                value={UserRole.BASIC.toString()}
                icon={SvgUser}
                description="Standard user-level access to non-admin endpoints."
              >
                {USER_ROLE_LABELS[UserRole.BASIC]}
              </InputSelect.Item>
              <InputSelect.Item
                value={UserRole.LIMITED.toString()}
                icon={SvgLock}
                description="For agents: chat posting and read-only access to other endpoints."
              >
                {USER_ROLE_LABELS[UserRole.LIMITED]}
              </InputSelect.Item>
            </InputSelect.Content>
          </InputSelect>
        ),
      }),
      tc.actions({
        cell: (row) => (
          <div className="flex flex-row gap-1">
            <Button
              icon={SvgRefreshCw}
              prominence="tertiary"
              tooltip="Regenerate"
              onClick={() => setRegenerateTarget(row)}
            />
            <Popover>
              <Popover.Trigger asChild>
                <Button
                  icon={SvgMoreHorizontal}
                  prominence="tertiary"
                  tooltip="More"
                />
              </Popover.Trigger>
              <Popover.Content side="bottom" align="end" width="md">
                <PopoverMenu>
                  <LineItem
                    icon={SvgUserEdit}
                    onClick={() => {
                      setSelectedApiKey(row);
                      setShowCreateUpdateForm(true);
                    }}
                  >
                    Edit Account
                  </LineItem>
                  <LineItem
                    icon={SvgTrash}
                    danger
                    onClick={() => setDeleteTarget(row)}
                  >
                    Delete Account
                  </LineItem>
                </PopoverMenu>
              </Popover.Content>
            </Popover>
          </div>
        ),
      }),
    ],
    [] // eslint-disable-line react-hooks/exhaustive-deps
  );

  if (error) {
    return (
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          title={route.title}
          icon={route.icon}
          description="Use service accounts to programmatically access Onyx API."
          separator
        />
        <SettingsLayouts.Body>
          <IllustrationContent
            illustration={SvgNoResult}
            title="Failed to load service accounts."
            description="Please check the console for more details."
          />
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>
    );
  }

  if (isLoading) {
    return (
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          title={route.title}
          icon={route.icon}
          description="Use service accounts to programmatically access Onyx API."
          separator
        />
        <SettingsLayouts.Body>
          <SimpleLoader />
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>
    );
  }

  const hasKeys = visibleApiKeys.length > 0;

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        title={route.title}
        icon={route.icon}
        description="Use service accounts to programmatically access Onyx API."
        separator
      />

      <SettingsLayouts.Body>
        {isTrialing && (
          <Message
            static
            warning
            close={false}
            className="w-full"
            text="Upgrade to a paid plan to create API keys."
            description="Trial accounts do not include API key access — purchase a paid subscription to unlock this feature."
          />
        )}

        <div className="flex flex-col">
          <AdminListHeader
            hasItems={hasKeys}
            searchQuery={search}
            onSearchQueryChange={setSearch}
            placeholder="Search service accounts..."
            emptyStateText="Create service account API keys with user-level access."
            onAction={() => {
              setSelectedApiKey(undefined);
              setShowCreateUpdateForm(true);
            }}
            actionLabel="New Service Account"
          />

          {hasKeys && (
            <Table
              data={filteredApiKeys}
              getRowId={(row) => String(row.api_key_id)}
              columns={columns}
              searchTerm={search}
            />
          )}
        </div>
      </SettingsLayouts.Body>

      <Modal open={!!fullApiKey}>
        <Modal.Content width="sm" height="sm">
          <Modal.Header
            title="Service Account API Key"
            icon={SvgKey}
            onClose={() => setFullApiKey(null)}
            description="Save this key before continuing. It won't be shown again."
          />
          <Modal.Body>
            <Code showCopyButton={false}>{fullApiKey ?? ""}</Code>
          </Modal.Body>
          <Modal.Footer>
            <BasicModalFooter
              left={
                <Button
                  prominence="secondary"
                  icon={SvgDownload}
                  onClick={() => {
                    if (!fullApiKey) return;
                    const blob = new Blob([fullApiKey], {
                      type: "text/plain",
                    });
                    const url = URL.createObjectURL(blob);
                    const a = document.createElement("a");
                    a.href = url;
                    a.download = "onyx-api-key.txt";
                    a.click();
                    URL.revokeObjectURL(url);
                  }}
                >
                  Download
                </Button>
              }
              submit={
                // TODO(@raunakab): Create an opalified copy-button and replace it here
                <Button
                  onClick={() => {
                    if (fullApiKey) {
                      navigator.clipboard.writeText(fullApiKey);
                      toast.success("API key copied to clipboard.");
                    }
                  }}
                >
                  Copy API Key
                </Button>
              }
            />
          </Modal.Footer>
        </Modal.Content>
      </Modal>

      {showCreateUpdateForm && (
        <ApiKeyFormModal
          onCreateApiKey={(apiKey) => {
            setFullApiKey(apiKey.api_key);
          }}
          onClose={() => {
            setShowCreateUpdateForm(false);
            setSelectedApiKey(undefined);
            mutate(API_KEY_SWR_KEY);
          }}
          apiKey={selectedApiKey}
        />
      )}

      {regenerateTarget && (
        <ConfirmationModalLayout
          icon={SvgRefreshCw}
          title="Regenerate API Key"
          onClose={() => setRegenerateTarget(null)}
          submit={
            <Button
              variant="danger"
              onClick={async () => {
                const target = regenerateTarget;
                setRegenerateTarget(null);
                await handleRegenerate(target);
              }}
            >
              Regenerate Key
            </Button>
          }
        >
          <Text as="p" color="text-03">
            {markdown(
              `Your current API key *${
                regenerateTarget.api_key_name || "Unnamed"
              }* (\`${
                regenerateTarget.api_key_display
              }\`) will be revoked and a new key will be generated. You will need to update any applications using this key with the new one.`
            )}
          </Text>
        </ConfirmationModalLayout>
      )}

      {deleteTarget && (
        <ConfirmationModalLayout
          icon={SvgTrash}
          title="Delete Account"
          onClose={() => setDeleteTarget(null)}
          submit={
            <Button
              variant="danger"
              onClick={async () => {
                await handleDelete(deleteTarget);
                setDeleteTarget(null);
              }}
            >
              Delete
            </Button>
          }
        >
          <Section alignItems="start" gap={0.5}>
            <Text as="p" color="text-03">
              {markdown(
                `Any application using the API key of account *${
                  deleteTarget.api_key_name || "Unnamed"
                }* (\`${
                  deleteTarget.api_key_display
                }\`) will lose access to Onyx.`
              )}
            </Text>
            <Text as="p" color="text-03">
              Deletion cannot be undone.
            </Text>
          </Section>
        </ConfirmationModalLayout>
      )}
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/refresh-pages/admin/ServiceAccountsPage/interfaces.ts
================================================
import { UserRole } from "@/lib/types";

export const DISCORD_SERVICE_API_KEY_NAME = "discord-bot-service";

export interface APIKey {
  api_key_id: number;
  api_key_display: string;
  api_key: string | null;
  api_key_name: string | null;
  api_key_role: UserRole;
  user_id: string;
}

export interface APIKeyArgs {
  name?: string;
  role: UserRole;
}


================================================
FILE: web/src/refresh-pages/admin/ServiceAccountsPage/svc.ts
================================================
import type {
  APIKeyArgs,
  APIKey,
} from "@/refresh-pages/admin/ServiceAccountsPage/interfaces";

const API_KEY_URL = "/api/admin/api-key";

export async function createApiKey(args: APIKeyArgs): Promise<Response> {
  return fetch(API_KEY_URL, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(args),
  });
}

export async function regenerateApiKey(apiKey: APIKey): Promise<Response> {
  return fetch(`${API_KEY_URL}/${apiKey.api_key_id}/regenerate`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
  });
}

export async function updateApiKey(
  apiKeyId: number,
  args: APIKeyArgs
): Promise<Response> {
  return fetch(`${API_KEY_URL}/${apiKeyId}`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(args),
  });
}

export async function deleteApiKey(apiKeyId: number): Promise<Response> {
  return fetch(`${API_KEY_URL}/${apiKeyId}`, {
    method: "DELETE",
  });
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/EditUserModal.tsx
================================================
"use client";

import { useState, useMemo, useCallback } from "react";
import { Button } from "@opal/components";
import { SvgUsers, SvgUser, SvgLogOut, SvgCheck } from "@opal/icons";
import { ContentAction } from "@opal/layouts";
import Modal from "@/refresh-components/Modal";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import Popover from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import Separator from "@/refresh-components/Separator";
import ShadowDiv from "@/refresh-components/ShadowDiv";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { Section } from "@/layouts/general-layouts";
import { toast } from "@/hooks/useToast";
import { UserRole, USER_ROLE_LABELS } from "@/lib/types";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import useGroups from "@/hooks/useGroups";
import { addUserToGroup, removeUserFromGroup, setUserRole } from "./svc";
import type { UserRow } from "./interfaces";
import { cn } from "../../../lib/utils";

// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------

const ASSIGNABLE_ROLES: UserRole[] = [
  UserRole.ADMIN,
  UserRole.GLOBAL_CURATOR,
  UserRole.BASIC,
];

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface EditUserModalProps {
  user: UserRow & { id: string };
  onClose: () => void;
  onMutate: () => void;
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

export default function EditUserModal({
  user,
  onClose,
  onMutate,
}: EditUserModalProps) {
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const { data: allGroups, isLoading: groupsLoading } = useGroups();
  const [searchTerm, setSearchTerm] = useState("");
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [popoverOpen, setPopoverOpen] = useState(false);
  const [selectedRole, setSelectedRole] = useState<UserRole | "">(
    user.role ?? ""
  );

  const initialMemberGroupIds = useMemo(
    () => new Set(user.groups.map((g) => g.id)),
    [user.groups]
  );
  const [memberGroupIds, setMemberGroupIds] = useState<Set<number>>(
    () => new Set(initialMemberGroupIds)
  );

  // Dropdown shows all groups filtered by search term
  const dropdownGroups = useMemo(() => {
    if (!allGroups) return [];
    if (searchTerm.length === 0) return allGroups;
    const lower = searchTerm.toLowerCase();
    return allGroups.filter((g) => g.name.toLowerCase().includes(lower));
  }, [allGroups, searchTerm]);

  // Joined groups shown in the modal body
  const joinedGroups = useMemo(() => {
    if (!allGroups) return [];
    return allGroups.filter((g) => memberGroupIds.has(g.id));
  }, [allGroups, memberGroupIds]);

  const hasGroupChanges = useMemo(() => {
    if (memberGroupIds.size !== initialMemberGroupIds.size) return true;
    return Array.from(memberGroupIds).some(
      (id) => !initialMemberGroupIds.has(id)
    );
  }, [memberGroupIds, initialMemberGroupIds]);

  const visibleRoles = isPaidEnterpriseFeaturesEnabled
    ? ASSIGNABLE_ROLES
    : ASSIGNABLE_ROLES.filter((r) => r !== UserRole.GLOBAL_CURATOR);

  const hasRoleChange =
    user.role !== null && selectedRole !== "" && selectedRole !== user.role;
  const hasChanges = hasGroupChanges || hasRoleChange;

  const toggleGroup = (groupId: number) => {
    setMemberGroupIds((prev) => {
      const next = new Set(prev);
      if (next.has(groupId)) {
        next.delete(groupId);
      } else {
        next.add(groupId);
      }
      return next;
    });
  };

  const handleSave = async () => {
    setIsSubmitting(true);
    try {
      const toAdd = Array.from(memberGroupIds).filter(
        (id) => !initialMemberGroupIds.has(id)
      );
      const toRemove = Array.from(initialMemberGroupIds).filter(
        (id) => !memberGroupIds.has(id)
      );

      if (user.id) {
        for (const groupId of toAdd) {
          await addUserToGroup(groupId, user.id);
        }
        for (const groupId of toRemove) {
          const group = allGroups?.find((g) => g.id === groupId);
          if (group) {
            const currentUserIds = group.users.map((u) => u.id);
            const ccPairIds = group.cc_pairs.map((cc) => cc.id);
            await removeUserFromGroup(
              groupId,
              currentUserIds,
              user.id,
              ccPairIds
            );
          }
        }
      }

      if (
        user.role !== null &&
        selectedRole !== "" &&
        selectedRole !== user.role
      ) {
        await setUserRole(user.email, selectedRole);
      }

      onMutate();
      toast.success("User updated");
      onClose();
    } catch (err) {
      onMutate(); // refresh to show partially-applied state
      toast.error(err instanceof Error ? err.message : "An error occurred");
    } finally {
      setIsSubmitting(false);
    }
  };

  const displayName = user.personal_name ?? user.email;
  const [contentEl, setContentEl] = useState<HTMLDivElement | null>(null);
  const contentRef = useCallback((node: HTMLDivElement | null) => {
    setContentEl(node);
  }, []);

  return (
    <Modal
      open
      onOpenChange={(isOpen) => !isOpen && !isSubmitting && onClose()}
    >
      <Modal.Content width="sm" ref={contentRef}>
        <Modal.Header
          icon={SvgUsers}
          title="Edit User's Groups & Roles"
          description={
            user.personal_name
              ? `${user.personal_name} (${user.email})`
              : user.email
          }
          onClose={isSubmitting ? undefined : onClose}
        />
        <Modal.Body twoTone>
          <Section padding={0} height="auto" alignItems="stretch">
            <Section
              gap={0.5}
              padding={0.25}
              height={joinedGroups.length === 0 && !popoverOpen ? "auto" : 14.5}
              alignItems="stretch"
              justifyContent="start"
              className="bg-background-tint-02 rounded-08"
            >
              <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>
                <Popover.Trigger asChild>
                  {/* asChild merges trigger props onto this div instead of rendering a <button>.
                     Without it, the trigger <button> would nest around InputTypeIn's
                     internal IconButton <button>, causing a hydration error. */}
                  <div>
                    <InputTypeIn
                      value={searchTerm}
                      onChange={(e) => setSearchTerm(e.target.value)}
                      placeholder="Search groups to join..."
                      leftSearchIcon
                    />
                  </div>
                </Popover.Trigger>
                <Popover.Content
                  width="trigger"
                  align="start"
                  container={contentEl}
                >
                  {groupsLoading ? (
                    <LineItem skeleton description="Loading groups...">
                      Loading...
                    </LineItem>
                  ) : dropdownGroups.length === 0 ? (
                    <LineItem
                      skeleton
                      description="Try a different search term."
                    >
                      No groups found
                    </LineItem>
                  ) : (
                    <ShadowDiv
                      shadowHeight="0.75rem"
                      className={cn(
                        "flex flex-col gap-1 max-h-[15rem] rounded-08"
                      )}
                    >
                      {dropdownGroups.map((group) => {
                        const isMember = memberGroupIds.has(group.id);
                        return (
                          <LineItem
                            key={group.id}
                            icon={isMember ? SvgCheck : SvgUsers}
                            description={`${group.users.length} ${
                              group.users.length === 1 ? "user" : "users"
                            }`}
                            selected={isMember}
                            emphasized={isMember}
                            onClick={() => toggleGroup(group.id)}
                          >
                            {group.name}
                          </LineItem>
                        );
                      })}
                    </ShadowDiv>
                  )}
                </Popover.Content>
              </Popover>

              <ShadowDiv
                className={cn(" max-h-[11rem] flex flex-col gap-1 rounded-08")}
                shadowHeight="0.75rem"
              >
                {joinedGroups.length === 0 ? (
                  <LineItem
                    icon={SvgUsers}
                    skeleton
                    interactive={false}
                    description={`${displayName} is not in any groups.`}
                  >
                    No groups found
                  </LineItem>
                ) : (
                  joinedGroups.map((group) => (
                    <div
                      key={group.id}
                      className="bg-background-tint-01 rounded-08"
                    >
                      <LineItem
                        key={group.id}
                        icon={SvgUsers}
                        description={`${group.users.length} ${
                          group.users.length === 1 ? "user" : "users"
                        }`}
                        rightChildren={
                          <SimpleTooltip
                            tooltip="Remove from group"
                            side="left"
                          >
                            <SvgLogOut height={16} width={16} />
                          </SimpleTooltip>
                        }
                        onClick={() => toggleGroup(group.id)}
                      >
                        {group.name}
                      </LineItem>
                    </div>
                  ))
                )}
              </ShadowDiv>
            </Section>
            {user.role && (
              <>
                <Separator noPadding />

                <ContentAction
                  title="User Role"
                  description="This controls their general permissions."
                  sizePreset="main-ui"
                  variant="section"
                  paddingVariant="fit"
                  rightChildren={
                    <InputSelect
                      value={selectedRole}
                      onValueChange={(v) => setSelectedRole(v as UserRole)}
                    >
                      <InputSelect.Trigger />
                      <InputSelect.Content>
                        {user.role && !visibleRoles.includes(user.role) && (
                          <InputSelect.Item
                            key={user.role}
                            value={user.role}
                            icon={SvgUser}
                          >
                            {USER_ROLE_LABELS[user.role]}
                          </InputSelect.Item>
                        )}
                        {visibleRoles.map((role) => (
                          <InputSelect.Item
                            key={role}
                            value={role}
                            icon={SvgUser}
                          >
                            {USER_ROLE_LABELS[role]}
                          </InputSelect.Item>
                        ))}
                      </InputSelect.Content>
                    </InputSelect>
                  }
                />
              </>
            )}
          </Section>
        </Modal.Body>

        <Modal.Footer>
          <Button
            prominence="secondary"
            onClick={isSubmitting ? undefined : onClose}
          >
            Cancel
          </Button>
          <Button disabled={isSubmitting || !hasChanges} onClick={handleSave}>
            Save Changes
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/GroupsCell.tsx
================================================
"use client";

import {
  useState,
  useRef,
  useLayoutEffect,
  useCallback,
  useEffect,
} from "react";
import { Hoverable } from "@opal/core";
import { SvgEdit } from "@opal/icons";
import { Button, Tag } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import EditUserModal from "./EditUserModal";
import type { UserRow, UserGroupInfo } from "./interfaces";

interface GroupsCellProps {
  groups: UserGroupInfo[];
  user: UserRow;
  onMutate: () => void;
}

/**
 * Measures how many Tag pills fit in the container, accounting for a "+N"
 * overflow counter when not all tags are visible. Uses a two-phase render:
 * first renders all tags (clipped by overflow:hidden) for measurement, then
 * re-renders with only the visible subset + "+N".
 *
 * Hovering the cell shows a tooltip with ALL groups. Clicking opens the
 * edit groups modal.
 */
export default function GroupsCell({
  groups,
  user,
  onMutate,
}: GroupsCellProps) {
  const [showModal, setShowModal] = useState(false);
  const [visibleCount, setVisibleCount] = useState<number | null>(null);
  const containerRef = useRef<HTMLDivElement>(null);

  const computeVisibleCount = useCallback(() => {
    const container = containerRef.current;
    if (!container || groups.length <= 1) {
      setVisibleCount(groups.length);
      return;
    }

    const tags = container.querySelectorAll<HTMLElement>("[data-group-tag]");
    if (tags.length === 0) return;

    const containerWidth = container.clientWidth;
    const gap = 4; // gap-1
    const counterWidth = 32; // "+N" Tag approximate width

    let used = 0;
    let count = 0;

    for (let i = 0; i < tags.length; i++) {
      const tagWidth = tags[i]!.offsetWidth;
      const gapBefore = count > 0 ? gap : 0;
      const hasMore = i < tags.length - 1;
      const reserve = hasMore ? gap + counterWidth : 0;

      if (used + gapBefore + tagWidth + reserve <= containerWidth) {
        used += gapBefore + tagWidth;
        count++;
      } else {
        break;
      }
    }

    setVisibleCount(Math.max(1, count));
  }, [groups]);

  // Reset to measurement phase when groups change
  useLayoutEffect(() => {
    setVisibleCount(null);
  }, [groups]);

  // Measure after the "show all" render
  useLayoutEffect(() => {
    if (visibleCount !== null) return;
    computeVisibleCount();
  }, [visibleCount, computeVisibleCount]);

  // Re-measure when the container width changes (e.g. window resize).
  // Track width so height-only changes (from the measurement cycle toggling
  // visible tags) don't cause an infinite render loop.
  const lastWidthRef = useRef(0);

  useEffect(() => {
    const node = containerRef.current;
    if (!node) return;

    const observer = new ResizeObserver((entries) => {
      const width = entries[0]?.contentRect.width ?? 0;
      if (Math.abs(width - lastWidthRef.current) < 1) return;
      lastWidthRef.current = width;
      setVisibleCount(null);
    });
    observer.observe(node);

    return () => observer.disconnect();
  }, [groups]);

  const isMeasuring = visibleCount === null;
  const effectiveVisible = visibleCount ?? groups.length;
  const overflowCount = groups.length - effectiveVisible;
  const hasOverflow = !isMeasuring && overflowCount > 0;

  const allGroupsTooltip = (
    <div className="flex flex-wrap gap-1 max-w-[14rem]">
      {groups.map((g) => (
        <div key={g.id} className="max-w-[10rem]">
          <Tag title={g.name} size="md" />
        </div>
      ))}
    </div>
  );

  const tagsContent = (
    <>
      {(isMeasuring ? groups : groups.slice(0, effectiveVisible)).map((g) => (
        <div key={g.id} data-group-tag className="flex-shrink-0">
          <Tag title={g.name} size="md" />
        </div>
      ))}
      {hasOverflow && (
        <div className="flex-shrink-0">
          <Tag title={`+${overflowCount}`} size="md" />
        </div>
      )}
    </>
  );

  return (
    <>
      <Hoverable.Root group="tags">
        <div
          className={`relative flex justify-between items-center w-full min-w-0 ${
            user.id ? "cursor-pointer" : ""
          }`}
          onClick={user.id ? () => setShowModal(true) : undefined}
        >
          {groups.length === 0 ? (
            <div
              ref={containerRef}
              className="flex items-center gap-1 overflow-hidden flex-nowrap min-w-0 -mr-7"
            >
              <Text as="span" secondaryBody text03>
                —
              </Text>
            </div>
          ) : (
            <SimpleTooltip
              side="bottom"
              align="start"
              tooltip={allGroupsTooltip}
              disabled={!hasOverflow}
              className="bg-background-neutral-01 shadow-sm"
              delayDuration={200}
            >
              <div
                ref={containerRef}
                className="flex items-center gap-1 overflow-hidden flex-nowrap min-w-0 -mr-7"
              >
                {tagsContent}
              </div>
            </SimpleTooltip>
          )}
          {user.id && (
            <Hoverable.Item group="tags" variant="opacity-on-hover">
              <Button
                icon={SvgEdit}
                prominence="tertiary"
                tooltip="Edit"
                tooltipSide="left"
                onClick={(e) => {
                  e.stopPropagation();
                  setShowModal(true);
                }}
              />
            </Hoverable.Item>
          )}
        </div>
      </Hoverable.Root>
      {showModal && user.id != null && (
        <EditUserModal
          user={{ ...user, id: user.id }}
          onClose={() => setShowModal(false)}
          onMutate={onMutate}
        />
      )}
    </>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/InviteUsersModal.tsx
================================================
"use client";

import { useState, useCallback } from "react";
import { Button } from "@opal/components";
import { SvgUsers, SvgAlertTriangle } from "@opal/icons";
import Modal, { BasicModalFooter } from "@/refresh-components/Modal";
import InputChipField from "@/refresh-components/inputs/InputChipField";
import type { ChipItem } from "@/refresh-components/inputs/InputChipField";
import Text from "@/refresh-components/texts/Text";
import { toast } from "@/hooks/useToast";
import { inviteUsers } from "./svc";

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

const EMAIL_REGEX = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

interface InviteUsersModalProps {
  open: boolean;
  onOpenChange: (open: boolean) => void;
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

export default function InviteUsersModal({
  open,
  onOpenChange,
}: InviteUsersModalProps) {
  const [chips, setChips] = useState<ChipItem[]>([]);
  const [inputValue, setInputValue] = useState("");
  const [isSubmitting, setIsSubmitting] = useState(false);

  /** Parse a comma-separated string into de-duped ChipItems */
  function parseEmails(value: string, existing: ChipItem[]): ChipItem[] {
    const entries = value
      .split(",")
      .map((e) => e.trim().toLowerCase())
      .filter(Boolean);

    const newChips: ChipItem[] = [];
    for (const email of entries) {
      const alreadyAdded =
        existing.some((c) => c.label === email) ||
        newChips.some((c) => c.label === email);
      if (!alreadyAdded) {
        newChips.push({
          id: email,
          label: email,
          error: !EMAIL_REGEX.test(email),
        });
      }
    }
    return newChips;
  }

  function addEmail(value: string) {
    const newChips = parseEmails(value, chips);
    if (newChips.length > 0) {
      setChips((prev) => [...prev, ...newChips]);
    }
    setInputValue("");
  }

  function removeChip(id: string) {
    setChips((prev) => prev.filter((c) => c.id !== id));
  }

  const handleClose = useCallback(() => {
    onOpenChange(false);
    // Reset state after close animation
    setTimeout(() => {
      setChips([]);
      setInputValue("");
      setIsSubmitting(false);
    }, 200);
  }, [onOpenChange]);

  /** Intercept backdrop/ESC closes so state is always reset */
  const handleOpenChange = useCallback(
    (next: boolean) => {
      if (!next) {
        if (!isSubmitting) handleClose();
      } else {
        onOpenChange(next);
      }
    },
    [handleClose, isSubmitting, onOpenChange]
  );

  async function handleInvite() {
    // Flush any pending text in the input into chips synchronously
    const pending = inputValue.trim();
    const allChips = pending
      ? [...chips, ...parseEmails(pending, chips)]
      : chips;

    if (pending) {
      setChips(allChips);
      setInputValue("");
    }

    const validEmails = allChips.filter((c) => !c.error).map((c) => c.label);

    if (validEmails.length === 0) {
      toast.error("Please add at least one valid email address");
      return;
    }

    setIsSubmitting(true);
    try {
      await inviteUsers(validEmails);
      toast.success(
        `Invited ${validEmails.length} user${validEmails.length > 1 ? "s" : ""}`
      );
      handleClose();
    } catch (err) {
      toast.error(
        err instanceof Error ? err.message : "Failed to invite users"
      );
    } finally {
      setIsSubmitting(false);
    }
  }

  return (
    <Modal open={open} onOpenChange={handleOpenChange}>
      <Modal.Content width="sm" height="fit">
        <Modal.Header
          icon={SvgUsers}
          title="Invite Users"
          onClose={isSubmitting ? undefined : handleClose}
        />

        <Modal.Body>
          <InputChipField
            chips={chips}
            onRemoveChip={removeChip}
            onAdd={addEmail}
            value={inputValue}
            onChange={setInputValue}
            placeholder="Add an email and press enter"
            layout="stacked"
          />
          {chips.some((c) => c.error) && (
            <div className="flex items-center gap-1 pt-1">
              <SvgAlertTriangle
                size={14}
                className="text-status-warning-05 shrink-0"
              />
              <Text secondaryBody text03>
                Some email addresses are invalid and will be skipped.
              </Text>
            </div>
          )}
        </Modal.Body>

        <Modal.Footer>
          <BasicModalFooter
            cancel={
              <Button
                disabled={isSubmitting}
                prominence="tertiary"
                onClick={handleClose}
              >
                Cancel
              </Button>
            }
            submit={
              <Button
                disabled={
                  isSubmitting ||
                  chips.length === 0 ||
                  chips.every((c) => c.error)
                }
                onClick={handleInvite}
              >
                Invite
              </Button>
            }
          />
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/UserActionModals.tsx
================================================
"use client";

import { useState } from "react";
import { Button } from "@opal/components";
import { SvgUserPlus, SvgUserX, SvgXCircle, SvgKey } from "@opal/icons";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import Text from "@/refresh-components/texts/Text";
import { toast } from "@/hooks/useToast";
import {
  deactivateUser,
  activateUser,
  deleteUser,
  cancelInvite,
  resetPassword,
} from "./svc";

// ---------------------------------------------------------------------------
// Shared helper
// ---------------------------------------------------------------------------

async function runAction(
  action: () => Promise<void>,
  successMessage: string,
  onDone: () => void,
  setIsSubmitting: (v: boolean) => void
) {
  setIsSubmitting(true);
  try {
    await action();
    onDone();
    toast.success(successMessage);
  } catch (err) {
    toast.error(err instanceof Error ? err.message : "An error occurred");
  } finally {
    setIsSubmitting(false);
  }
}

// ---------------------------------------------------------------------------
// Cancel Invite Modal
// ---------------------------------------------------------------------------

interface CancelInviteModalProps {
  email: string;
  onClose: () => void;
  onMutate: () => void;
}

export function CancelInviteModal({
  email,
  onClose,
  onMutate,
}: CancelInviteModalProps) {
  const [isSubmitting, setIsSubmitting] = useState(false);

  return (
    <ConfirmationModalLayout
      icon={(props) => (
        <SvgUserX {...props} className="text-action-danger-05" />
      )}
      title="Cancel Invite"
      onClose={isSubmitting ? undefined : onClose}
      submit={
        <Button
          disabled={isSubmitting}
          variant="danger"
          onClick={() =>
            runAction(
              () => cancelInvite(email),
              "Invite cancelled",
              () => {
                onMutate();
                onClose();
              },
              setIsSubmitting
            )
          }
        >
          Cancel Invite
        </Button>
      }
    >
      <Text as="p" text03>
        <Text as="span" text05>
          {email}
        </Text>{" "}
        will no longer be able to join Onyx with this invite.
      </Text>
    </ConfirmationModalLayout>
  );
}

// ---------------------------------------------------------------------------
// Deactivate User Modal
// ---------------------------------------------------------------------------

interface DeactivateUserModalProps {
  email: string;
  onClose: () => void;
  onMutate: () => void;
}

export function DeactivateUserModal({
  email,
  onClose,
  onMutate,
}: DeactivateUserModalProps) {
  const [isSubmitting, setIsSubmitting] = useState(false);

  return (
    <ConfirmationModalLayout
      icon={(props) => (
        <SvgUserX {...props} className="text-action-danger-05" />
      )}
      title="Deactivate User"
      onClose={isSubmitting ? undefined : onClose}
      submit={
        <Button
          disabled={isSubmitting}
          variant="danger"
          onClick={() =>
            runAction(
              () => deactivateUser(email),
              "User deactivated",
              () => {
                onMutate();
                onClose();
              },
              setIsSubmitting
            )
          }
        >
          Deactivate
        </Button>
      }
    >
      <Text as="p" text03>
        <Text as="span" text05>
          {email}
        </Text>{" "}
        will immediately lose access to Onyx. Their sessions and agents will be
        preserved. Their license seat will be freed. You can reactivate this
        account later.
      </Text>
    </ConfirmationModalLayout>
  );
}

// ---------------------------------------------------------------------------
// Activate User Modal
// ---------------------------------------------------------------------------

interface ActivateUserModalProps {
  email: string;
  onClose: () => void;
  onMutate: () => void;
}

export function ActivateUserModal({
  email,
  onClose,
  onMutate,
}: ActivateUserModalProps) {
  const [isSubmitting, setIsSubmitting] = useState(false);

  return (
    <ConfirmationModalLayout
      icon={SvgUserPlus}
      title="Activate User"
      onClose={isSubmitting ? undefined : onClose}
      submit={
        <Button
          disabled={isSubmitting}
          onClick={() =>
            runAction(
              () => activateUser(email),
              "User activated",
              () => {
                onMutate();
                onClose();
              },
              setIsSubmitting
            )
          }
        >
          Activate
        </Button>
      }
    >
      <Text as="p" text03>
        <Text as="span" text05>
          {email}
        </Text>{" "}
        will regain access to Onyx.
      </Text>
    </ConfirmationModalLayout>
  );
}

// ---------------------------------------------------------------------------
// Delete User Modal
// ---------------------------------------------------------------------------

interface DeleteUserModalProps {
  email: string;
  onClose: () => void;
  onMutate: () => void;
}

export function DeleteUserModal({
  email,
  onClose,
  onMutate,
}: DeleteUserModalProps) {
  const [isSubmitting, setIsSubmitting] = useState(false);

  return (
    <ConfirmationModalLayout
      icon={(props) => (
        <SvgUserX {...props} className="text-action-danger-05" />
      )}
      title="Delete User"
      onClose={isSubmitting ? undefined : onClose}
      submit={
        <Button
          disabled={isSubmitting}
          variant="danger"
          onClick={() =>
            runAction(
              () => deleteUser(email),
              "User deleted",
              () => {
                onMutate();
                onClose();
              },
              setIsSubmitting
            )
          }
        >
          Delete
        </Button>
      }
    >
      <Text as="p" text03>
        <Text as="span" text05>
          {email}
        </Text>{" "}
        will be permanently removed from Onyx. All of their session history will
        be deleted. Deletion cannot be undone.
      </Text>
    </ConfirmationModalLayout>
  );
}

// ---------------------------------------------------------------------------
// Reset Password Modal
// ---------------------------------------------------------------------------

interface ResetPasswordModalProps {
  email: string;
  onClose: () => void;
}

export function ResetPasswordModal({
  email,
  onClose,
}: ResetPasswordModalProps) {
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [newPassword, setNewPassword] = useState<string | null>(null);

  const handleClose = () => {
    onClose();
    setNewPassword(null);
  };

  return (
    <ConfirmationModalLayout
      icon={SvgKey}
      title={newPassword ? "Password Reset" : "Reset Password"}
      onClose={isSubmitting ? undefined : handleClose}
      submit={
        newPassword ? (
          <Button onClick={handleClose}>Done</Button>
        ) : (
          <Button
            disabled={isSubmitting}
            variant="danger"
            onClick={async () => {
              setIsSubmitting(true);
              try {
                const result = await resetPassword(email);
                setNewPassword(result.new_password);
              } catch (err) {
                toast.error(
                  err instanceof Error
                    ? err.message
                    : "Failed to reset password"
                );
              } finally {
                setIsSubmitting(false);
              }
            }}
          >
            Reset Password
          </Button>
        )
      }
    >
      {newPassword ? (
        <div className="flex flex-col gap-2">
          <Text as="p" text03>
            The password for{" "}
            <Text as="span" text05>
              {email}
            </Text>{" "}
            has been reset. Copy the new password below — it will not be shown
            again.
          </Text>
          <code className="rounded-sm bg-background-neutral-02 px-3 py-2 text-sm select-all">
            {newPassword}
          </code>
        </div>
      ) : (
        <Text as="p" text03>
          This will generate a new random password for{" "}
          <Text as="span" text05>
            {email}
          </Text>
          . Their current password will stop working immediately.
        </Text>
      )}
    </ConfirmationModalLayout>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/UserFilters.tsx
================================================
"use client";

import { useState } from "react";
import {
  SvgCheck,
  SvgSlack,
  SvgUser,
  SvgUserManage,
  SvgUsers,
} from "@opal/icons";
import type { IconFunctionComponent } from "@opal/types";
import { FilterButton } from "@opal/components";
import Popover from "@/refresh-components/Popover";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import LineItem from "@/refresh-components/buttons/LineItem";
import Text from "@/refresh-components/texts/Text";
import ShadowDiv from "@/refresh-components/ShadowDiv";
import {
  UserRole,
  UserStatus,
  USER_ROLE_LABELS,
  USER_STATUS_LABELS,
} from "@/lib/types";
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import type { GroupOption, StatusFilter, StatusCountMap } from "./interfaces";

// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------

const VISIBLE_FILTER_ROLES: UserRole[] = [
  UserRole.ADMIN,
  UserRole.GLOBAL_CURATOR,
  UserRole.BASIC,
  UserRole.SLACK_USER,
];

const FILTERABLE_ROLES = VISIBLE_FILTER_ROLES.map(
  (role) => [role, USER_ROLE_LABELS[role]] as [UserRole, string]
);

const FILTERABLE_STATUSES = (
  Object.entries(USER_STATUS_LABELS) as [UserStatus, string][]
).filter(
  ([value]) => value !== UserStatus.REQUESTED || NEXT_PUBLIC_CLOUD_ENABLED
);

const ROLE_ICONS: Partial<Record<UserRole, IconFunctionComponent>> = {
  [UserRole.ADMIN]: SvgUserManage,
  [UserRole.SLACK_USER]: SvgSlack,
};

/** Map UserStatus enum values to the keys returned by the counts endpoint. */
const STATUS_COUNT_KEY: Record<UserStatus, keyof StatusCountMap> = {
  [UserStatus.ACTIVE]: "active",
  [UserStatus.INACTIVE]: "inactive",
  [UserStatus.INVITED]: "invited",
  [UserStatus.REQUESTED]: "requested",
};

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

function CountBadge({ count }: { count: number | undefined }) {
  return (
    <Text as="span" secondaryBody text03>
      {count ?? 0}
    </Text>
  );
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

interface UserFiltersProps {
  selectedRoles: UserRole[];
  onRolesChange: (roles: UserRole[]) => void;
  selectedGroups: number[];
  onGroupsChange: (groupIds: number[]) => void;
  groups: GroupOption[];
  selectedStatuses: StatusFilter;
  onStatusesChange: (statuses: StatusFilter) => void;
  roleCounts: Record<string, number>;
  statusCounts: StatusCountMap;
}

export default function UserFilters({
  selectedRoles,
  onRolesChange,
  selectedGroups,
  onGroupsChange,
  groups,
  selectedStatuses,
  onStatusesChange,
  roleCounts,
  statusCounts,
}: UserFiltersProps) {
  const hasRoleFilter = selectedRoles.length > 0;
  const hasGroupFilter = selectedGroups.length > 0;
  const hasStatusFilter = selectedStatuses.length > 0;
  const [groupSearch, setGroupSearch] = useState("");
  const [groupPopoverOpen, setGroupPopoverOpen] = useState(false);

  const toggleRole = (role: UserRole) => {
    if (selectedRoles.includes(role)) {
      onRolesChange(selectedRoles.filter((r) => r !== role));
    } else {
      onRolesChange([...selectedRoles, role]);
    }
  };

  const toggleGroup = (groupId: number) => {
    if (selectedGroups.includes(groupId)) {
      onGroupsChange(selectedGroups.filter((id) => id !== groupId));
    } else {
      onGroupsChange([...selectedGroups, groupId]);
    }
  };

  const toggleStatus = (status: UserStatus) => {
    if (selectedStatuses.includes(status)) {
      onStatusesChange(selectedStatuses.filter((s) => s !== status));
    } else {
      onStatusesChange([...selectedStatuses, status]);
    }
  };

  const roleLabel = hasRoleFilter
    ? FILTERABLE_ROLES.filter(([role]) => selectedRoles.includes(role))
        .map(([, label]) => label)
        .slice(0, 2)
        .join(", ") +
      (selectedRoles.length > 2 ? `, +${selectedRoles.length - 2}` : "")
    : "All Account Types";

  const groupLabel = hasGroupFilter
    ? groups
        .filter((g) => selectedGroups.includes(g.id))
        .map((g) => g.name)
        .slice(0, 2)
        .join(", ") +
      (selectedGroups.length > 2 ? `, +${selectedGroups.length - 2}` : "")
    : "All Groups";

  const statusLabel = hasStatusFilter
    ? FILTERABLE_STATUSES.filter(([status]) =>
        selectedStatuses.includes(status)
      )
        .map(([, label]) => label)
        .slice(0, 2)
        .join(", ") +
      (selectedStatuses.length > 2 ? `, +${selectedStatuses.length - 2}` : "")
    : "All Status";

  const filteredGroups = groupSearch
    ? groups.filter((g) =>
        g.name.toLowerCase().includes(groupSearch.toLowerCase())
      )
    : groups;

  return (
    <div className="flex gap-2">
      {/* Role filter */}
      <Popover>
        <Popover.Trigger asChild>
          <FilterButton
            aria-label="Filter by role"
            icon={SvgUsers}
            active={hasRoleFilter}
            onClear={() => onRolesChange([])}
          >
            {roleLabel}
          </FilterButton>
        </Popover.Trigger>
        <Popover.Content align="start">
          <div className="flex flex-col gap-1 p-1 min-w-[200px]">
            <LineItem
              icon={!hasRoleFilter ? SvgCheck : SvgUsers}
              selected={!hasRoleFilter}
              emphasized={!hasRoleFilter}
              onClick={() => onRolesChange([])}
            >
              All Account Types
            </LineItem>
            {FILTERABLE_ROLES.map(([role, label]) => {
              const isSelected = selectedRoles.includes(role);
              const roleIcon = ROLE_ICONS[role] ?? SvgUser;
              return (
                <LineItem
                  key={role}
                  icon={isSelected ? SvgCheck : roleIcon}
                  selected={isSelected}
                  emphasized={isSelected}
                  onClick={() => toggleRole(role)}
                  rightChildren={<CountBadge count={roleCounts[role]} />}
                >
                  {label}
                </LineItem>
              );
            })}
          </div>
        </Popover.Content>
      </Popover>

      {/* Groups filter */}
      <Popover
        open={groupPopoverOpen}
        onOpenChange={(open) => {
          setGroupPopoverOpen(open);
          if (!open) setGroupSearch("");
        }}
      >
        <Popover.Trigger asChild>
          <FilterButton
            aria-label="Filter by group"
            icon={SvgUsers}
            active={hasGroupFilter}
            onClear={() => onGroupsChange([])}
          >
            {groupLabel}
          </FilterButton>
        </Popover.Trigger>
        <Popover.Content align="start">
          <div className="flex flex-col gap-1 p-1 min-w-[200px]">
            <InputTypeIn
              value={groupSearch}
              onChange={(e) => setGroupSearch(e.target.value)}
              placeholder="Search groups..."
              leftSearchIcon
              variant="internal"
            />
            <LineItem
              icon={!hasGroupFilter ? SvgCheck : SvgUsers}
              selected={!hasGroupFilter}
              emphasized={!hasGroupFilter}
              onClick={() => onGroupsChange([])}
            >
              All Groups
            </LineItem>
            <ShadowDiv className="flex flex-col gap-1 max-h-[240px]">
              {filteredGroups.map((group) => {
                const isSelected = selectedGroups.includes(group.id);
                return (
                  <LineItem
                    key={group.id}
                    icon={isSelected ? SvgCheck : SvgUsers}
                    selected={isSelected}
                    emphasized={isSelected}
                    onClick={() => toggleGroup(group.id)}
                    rightChildren={<CountBadge count={group.memberCount} />}
                  >
                    {group.name}
                  </LineItem>
                );
              })}
              {filteredGroups.length === 0 && (
                <Text as="span" secondaryBody text03 className="px-2 py-1.5">
                  No groups found
                </Text>
              )}
            </ShadowDiv>
          </div>
        </Popover.Content>
      </Popover>

      {/* Status filter */}
      <Popover>
        <Popover.Trigger asChild>
          <FilterButton
            aria-label="Filter by status"
            icon={SvgUsers}
            active={hasStatusFilter}
            onClear={() => onStatusesChange([])}
          >
            {statusLabel}
          </FilterButton>
        </Popover.Trigger>
        <Popover.Content align="start">
          <div className="flex flex-col gap-1 p-1 min-w-[200px]">
            <LineItem
              icon={!hasStatusFilter ? SvgCheck : SvgUser}
              selected={!hasStatusFilter}
              emphasized={!hasStatusFilter}
              onClick={() => onStatusesChange([])}
            >
              All Status
            </LineItem>
            {FILTERABLE_STATUSES.map(([status, label]) => {
              const isSelected = selectedStatuses.includes(status);
              const countKey = STATUS_COUNT_KEY[status];
              return (
                <LineItem
                  key={status}
                  icon={isSelected ? SvgCheck : SvgUser}
                  selected={isSelected}
                  emphasized={isSelected}
                  onClick={() => toggleStatus(status)}
                  rightChildren={<CountBadge count={statusCounts[countKey]} />}
                >
                  {label}
                </LineItem>
              );
            })}
          </div>
        </Popover.Content>
      </Popover>
    </div>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/UserRoleCell.tsx
================================================
"use client";

import { useState, useRef } from "react";
import { UserRole, USER_ROLE_LABELS } from "@/lib/types";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { OpenButton } from "@opal/components";
import { Disabled } from "@opal/core";
import {
  SvgCheck,
  SvgGlobe,
  SvgUser,
  SvgSlack,
  SvgUserManage,
} from "@opal/icons";
import type { IconFunctionComponent } from "@opal/types";
import Text from "@/refresh-components/texts/Text";
import Popover from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import { toast } from "@/hooks/useToast";
import { setUserRole } from "./svc";
import type { UserRow } from "./interfaces";

const ROLE_ICONS: Partial<Record<UserRole, IconFunctionComponent>> = {
  [UserRole.ADMIN]: SvgUserManage,
  [UserRole.GLOBAL_CURATOR]: SvgGlobe,
  [UserRole.SLACK_USER]: SvgSlack,
};

const SELECTABLE_ROLES = [
  UserRole.ADMIN,
  UserRole.GLOBAL_CURATOR,
  UserRole.BASIC,
] as const;

interface UserRoleCellProps {
  user: UserRow;
  onMutate: () => void;
}

export default function UserRoleCell({ user, onMutate }: UserRoleCellProps) {
  const [isUpdating, setIsUpdating] = useState(false);
  const [open, setOpen] = useState(false);
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const isUpdatingRef = useRef(false);

  if (!user.role) {
    return (
      <Text as="span" secondaryBody text03>
        —
      </Text>
    );
  }

  const applyRole = async (newRole: UserRole) => {
    if (isUpdatingRef.current) return;
    isUpdatingRef.current = true;
    setIsUpdating(true);
    try {
      await setUserRole(user.email, newRole);
      toast.success("Role updated");
      onMutate();
    } catch (err) {
      toast.error(err instanceof Error ? err.message : "Failed to update role");
      onMutate();
    } finally {
      setIsUpdating(false);
      isUpdatingRef.current = false;
    }
  };

  const handleSelect = (role: UserRole) => {
    if (role === user.role) {
      setOpen(false);
      return;
    }
    setOpen(false);
    void applyRole(role);
  };

  const currentIcon = ROLE_ICONS[user.role] ?? SvgUser;

  const visibleRoles = isPaidEnterpriseFeaturesEnabled
    ? SELECTABLE_ROLES
    : SELECTABLE_ROLES.filter((r) => r !== UserRole.GLOBAL_CURATOR);

  const roleItems = visibleRoles.map((role) => {
    const isSelected = user.role === role;
    const icon = ROLE_ICONS[role] ?? SvgUser;
    return (
      <LineItem
        key={role}
        icon={isSelected ? SvgCheck : icon}
        selected={isSelected}
        emphasized={isSelected}
        onClick={() => handleSelect(role)}
      >
        {USER_ROLE_LABELS[role]}
      </LineItem>
    );
  });

  return (
    <Disabled disabled={isUpdating}>
      <Popover open={open} onOpenChange={setOpen}>
        <Popover.Trigger asChild>
          <OpenButton
            icon={currentIcon}
            variant="select-tinted"
            width="full"
            justifyContent="between"
            roundingVariant="sm"
          >
            {USER_ROLE_LABELS[user.role]}
          </OpenButton>
        </Popover.Trigger>
        <Popover.Content align="start">
          <div className="flex flex-col gap-1 p-1 min-w-[160px]">
            {roleItems}
          </div>
        </Popover.Content>
      </Popover>
    </Disabled>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/UserRowActions.tsx
================================================
"use client";

import { useState } from "react";
import { Button } from "@opal/components";
import {
  SvgMoreHorizontal,
  SvgUsers,
  SvgXCircle,
  SvgUserCheck,
  SvgUserPlus,
  SvgUserX,
  SvgKey,
} from "@opal/icons";
import { Disabled } from "@opal/core";
import LineItem from "@/refresh-components/buttons/LineItem";
import Popover from "@/refresh-components/Popover";
import Separator from "@/refresh-components/Separator";
import { Section } from "@/layouts/general-layouts";
import Text from "@/refresh-components/texts/Text";
import { UserStatus } from "@/lib/types";
import { toast } from "@/hooks/useToast";
import { approveRequest } from "./svc";
import EditUserModal from "./EditUserModal";
import {
  CancelInviteModal,
  DeactivateUserModal,
  ActivateUserModal,
  DeleteUserModal,
  ResetPasswordModal,
} from "./UserActionModals";
import type { UserRow } from "./interfaces";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

enum Modal {
  DEACTIVATE = "deactivate",
  ACTIVATE = "activate",
  DELETE = "delete",
  CANCEL_INVITE = "cancelInvite",
  EDIT_GROUPS = "editGroups",
  RESET_PASSWORD = "resetPassword",
}

interface UserRowActionsProps {
  user: UserRow;
  onMutate: () => void;
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

export default function UserRowActions({
  user,
  onMutate,
}: UserRowActionsProps) {
  const [modal, setModal] = useState<Modal | null>(null);
  const [popoverOpen, setPopoverOpen] = useState(false);

  const openModal = (type: Modal) => {
    setPopoverOpen(false);
    setModal(type);
  };

  const closeModal = () => setModal(null);

  const closeAndMutate = () => {
    setModal(null);
    onMutate();
  };

  // Status-aware action menus
  const actionButtons = (() => {
    // SCIM-managed users get limited actions — most changes would be
    // overwritten on the next IdP sync.
    if (user.is_scim_synced) {
      return (
        <>
          {user.id && (
            <LineItem
              icon={SvgUsers}
              onClick={() => openModal(Modal.EDIT_GROUPS)}
            >
              Groups &amp; Roles
            </LineItem>
          )}
          <Disabled disabled>
            <LineItem danger icon={SvgUserX}>
              Deactivate User
            </LineItem>
          </Disabled>
          <Separator paddingXRem={0.5} />
          <Text as="p" secondaryBody text03 className="px-3 py-1">
            This is a synced SCIM user managed by your identity provider.
          </Text>
        </>
      );
    }

    switch (user.status) {
      case UserStatus.INVITED:
        return (
          <LineItem
            danger
            icon={SvgXCircle}
            onClick={() => openModal(Modal.CANCEL_INVITE)}
          >
            Cancel Invite
          </LineItem>
        );

      case UserStatus.REQUESTED:
        return (
          <LineItem
            icon={SvgUserCheck}
            onClick={() => {
              setPopoverOpen(false);
              void (async () => {
                try {
                  await approveRequest(user.email);
                  onMutate();
                  toast.success("Request approved");
                } catch (err) {
                  toast.error(
                    err instanceof Error ? err.message : "An error occurred"
                  );
                }
              })();
            }}
          >
            Approve
          </LineItem>
        );

      case UserStatus.ACTIVE:
        return (
          <>
            {user.id && (
              <LineItem
                icon={SvgUsers}
                onClick={() => openModal(Modal.EDIT_GROUPS)}
              >
                Groups &amp; Roles
              </LineItem>
            )}
            <LineItem
              icon={SvgKey}
              onClick={() => openModal(Modal.RESET_PASSWORD)}
            >
              Reset Password
            </LineItem>
            <Separator paddingXRem={0.5} />
            <LineItem
              danger
              icon={SvgUserX}
              onClick={() => openModal(Modal.DEACTIVATE)}
            >
              Deactivate User
            </LineItem>
          </>
        );

      case UserStatus.INACTIVE:
        return (
          <>
            {user.id && (
              <LineItem
                icon={SvgUsers}
                onClick={() => openModal(Modal.EDIT_GROUPS)}
              >
                Groups &amp; Roles
              </LineItem>
            )}
            <LineItem
              icon={SvgKey}
              onClick={() => openModal(Modal.RESET_PASSWORD)}
            >
              Reset Password
            </LineItem>
            <Separator paddingXRem={0.5} />
            <LineItem
              icon={SvgUserPlus}
              onClick={() => openModal(Modal.ACTIVATE)}
            >
              Activate User
            </LineItem>
            <Separator paddingXRem={0.5} />
            <LineItem
              danger
              icon={SvgUserX}
              onClick={() => openModal(Modal.DELETE)}
            >
              Delete User
            </LineItem>
          </>
        );

      default: {
        const _exhaustive: never = user.status;
        return null;
      }
    }
  })();

  return (
    <>
      <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>
        <Popover.Trigger asChild>
          <Button prominence="tertiary" icon={SvgMoreHorizontal} />
        </Popover.Trigger>
        <Popover.Content align="end" width="sm">
          <Section
            gap={0.5}
            height="auto"
            alignItems="stretch"
            justifyContent="start"
          >
            {actionButtons}
          </Section>
        </Popover.Content>
      </Popover>

      {modal === Modal.EDIT_GROUPS && user.id && (
        <EditUserModal
          user={user as UserRow & { id: string }}
          onClose={closeModal}
          onMutate={onMutate}
        />
      )}

      {modal === Modal.CANCEL_INVITE && (
        <CancelInviteModal
          email={user.email}
          onClose={closeModal}
          onMutate={onMutate}
        />
      )}

      {modal === Modal.DEACTIVATE && (
        <DeactivateUserModal
          email={user.email}
          onClose={closeModal}
          onMutate={onMutate}
        />
      )}

      {modal === Modal.ACTIVATE && (
        <ActivateUserModal
          email={user.email}
          onClose={closeModal}
          onMutate={onMutate}
        />
      )}

      {modal === Modal.DELETE && (
        <DeleteUserModal
          email={user.email}
          onClose={closeModal}
          onMutate={onMutate}
        />
      )}

      {modal === Modal.RESET_PASSWORD && (
        <ResetPasswordModal email={user.email} onClose={closeModal} />
      )}
    </>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/UsersSummary.tsx
================================================
import { SvgArrowUpRight, SvgFilterPlus, SvgUserSync } from "@opal/icons";
import { ContentAction } from "@opal/layouts";
import { Button } from "@opal/components";
import { Hoverable } from "@opal/core";
import { Section } from "@/layouts/general-layouts";
import Card from "@/refresh-components/cards/Card";
import IconButton from "@/refresh-components/buttons/IconButton";
import Text from "@/refresh-components/texts/Text";
import Link from "next/link";
import { ADMIN_ROUTES } from "@/lib/admin-routes";

// ---------------------------------------------------------------------------
// Stats cell — number + label + hover filter icon
// ---------------------------------------------------------------------------

type StatCellProps = {
  value: number | null;
  label: string;
  onFilter?: () => void;
};

function StatCell({ value, label, onFilter }: StatCellProps) {
  const display = value === null ? "\u2014" : value.toLocaleString();

  return (
    <Hoverable.Root group="stat" widthVariant="full">
      <div
        className={`relative flex flex-col items-start gap-0.5 w-full p-2 rounded-08 transition-colors ${
          onFilter ? "cursor-pointer hover:bg-background-tint-02" : ""
        }`}
        onClick={onFilter}
      >
        <Text as="span" mainUiAction text04>
          {display}
        </Text>
        <Text as="span" secondaryBody text03>
          {label}
        </Text>
        {onFilter && (
          <div className="absolute right-1 top-1">
            <Hoverable.Item group="stat" variant="opacity-on-hover">
              <IconButton
                tertiary
                icon={SvgFilterPlus}
                tooltip="Add Filter"
                toolTipPosition="left"
                tooltipSize="sm"
                onClick={(e) => {
                  e.stopPropagation();
                  onFilter();
                }}
              />
            </Hoverable.Item>
          </div>
        )}
      </div>
    </Hoverable.Root>
  );
}

// ---------------------------------------------------------------------------
// SCIM card
// ---------------------------------------------------------------------------

function ScimCard() {
  return (
    <Card gap={0.5} padding={0.75}>
      <ContentAction
        icon={SvgUserSync}
        title="SCIM Sync"
        description="Users are synced from your identity provider."
        sizePreset="main-ui"
        variant="section"
        paddingVariant="fit"
        rightChildren={
          <Link href={ADMIN_ROUTES.SCIM.path}>
            <Button prominence="tertiary" rightIcon={SvgArrowUpRight} size="sm">
              Manage
            </Button>
          </Link>
        }
      />
    </Card>
  );
}

// ---------------------------------------------------------------------------
// Stats bar — layout varies by SCIM status
// ---------------------------------------------------------------------------

type UsersSummaryProps = {
  activeUsers: number | null;
  pendingInvites: number | null;
  requests: number | null;
  showScim: boolean;
  onFilterActive?: () => void;
  onFilterInvites?: () => void;
  onFilterRequests?: () => void;
};

export default function UsersSummary({
  activeUsers,
  pendingInvites,
  requests,
  showScim,
  onFilterActive,
  onFilterInvites,
  onFilterRequests,
}: UsersSummaryProps) {
  const showRequests = requests !== null && requests > 0;

  const statsCard = (
    <Card padding={0.5}>
      <Section flexDirection="row" gap={0}>
        <StatCell
          value={activeUsers}
          label="active users"
          onFilter={onFilterActive}
        />
        <StatCell
          value={pendingInvites}
          label="pending invites"
          onFilter={onFilterInvites}
        />
        {showRequests && (
          <StatCell
            value={requests}
            label="requests to join"
            onFilter={onFilterRequests}
          />
        )}
      </Section>
    </Card>
  );

  if (showScim) {
    return (
      <Section
        flexDirection="row"
        justifyContent="start"
        alignItems="stretch"
        gap={0.5}
      >
        {statsCard}
        <ScimCard />
      </Section>
    );
  }

  // No SCIM — each stat gets its own card
  return (
    <Section flexDirection="row" gap={0.5}>
      <Card padding={0.5}>
        <StatCell
          value={activeUsers}
          label="active users"
          onFilter={onFilterActive}
        />
      </Card>
      <Card padding={0.5}>
        <StatCell
          value={pendingInvites}
          label="pending invites"
          onFilter={onFilterInvites}
        />
      </Card>
      {showRequests && (
        <Card padding={0.5}>
          <StatCell
            value={requests}
            label="requests to join"
            onFilter={onFilterRequests}
          />
        </Card>
      )}
    </Section>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/UsersTable.tsx
================================================
"use client";

import { useMemo, useState } from "react";
import { Table, createTableColumns } from "@opal/components";
import { Content } from "@opal/layouts";
import { Button } from "@opal/components";
import { SvgDownload } from "@opal/icons";
import SvgNoResult from "@opal/illustrations/no-result";
import { IllustrationContent } from "@opal/layouts";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { UserRole, UserStatus, USER_STATUS_LABELS } from "@/lib/types";
import { timeAgo } from "@/lib/time";
import Text from "@/refresh-components/texts/Text";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { toast } from "@/hooks/useToast";
import useAdminUsers from "@/hooks/useAdminUsers";
import useGroups from "@/hooks/useGroups";
import { downloadUsersCsv } from "./svc";
import UserFilters from "./UserFilters";
import GroupsCell from "./GroupsCell";
import UserRowActions from "./UserRowActions";
import UserRoleCell from "./UserRoleCell";
import type {
  UserRow,
  GroupOption,
  StatusFilter,
  StatusCountMap,
} from "./interfaces";
import UserAvatar from "@/refresh-components/avatars/UserAvatar";
import type { User } from "@/lib/types";

// ---------------------------------------------------------------------------
// Column renderers
// ---------------------------------------------------------------------------

function renderNameColumn(email: string, row: UserRow) {
  return (
    <Content
      sizePreset="main-ui"
      variant="section"
      title={row.personal_name ?? email}
      description={row.personal_name ? email : undefined}
    />
  );
}

function renderStatusColumn(value: UserStatus, row: UserRow) {
  return (
    <div className="flex flex-col">
      <Text as="span" mainUiBody text03>
        {USER_STATUS_LABELS[value] ?? value}
      </Text>
      {row.is_scim_synced && (
        <Text as="span" secondaryBody text03>
          SCIM synced
        </Text>
      )}
    </div>
  );
}

function renderLastUpdatedColumn(value: string | null) {
  return (
    <Text as="span" secondaryBody text03>
      {value ? timeAgo(value) ?? "\u2014" : "\u2014"}
    </Text>
  );
}

// ---------------------------------------------------------------------------
// Columns
// ---------------------------------------------------------------------------

const tc = createTableColumns<UserRow>();

function buildColumns(onMutate: () => void) {
  return [
    tc.qualifier({
      content: "icon",
      iconSize: "lg",
      getContent: (row) => {
        const user = {
          email: row.email,
          personalization: row.personal_name
            ? { name: row.personal_name }
            : undefined,
        } as User;
        return (props) => <UserAvatar user={user} size={props.size} />;
      },
    }),
    tc.column("email", {
      header: "Name",
      weight: 22,
      cell: renderNameColumn,
    }),
    tc.column("groups", {
      header: "Groups",
      weight: 24,
      enableSorting: false,
      cell: (value, row) => (
        <GroupsCell groups={value} user={row} onMutate={onMutate} />
      ),
    }),
    tc.column("role", {
      header: "Account Type",
      weight: 16,
      cell: (_value, row) => <UserRoleCell user={row} onMutate={onMutate} />,
    }),
    tc.column("status", {
      header: "Status",
      weight: 14,
      cell: renderStatusColumn,
    }),
    tc.column("updated_at", {
      header: "Last Updated",
      weight: 14,
      cell: renderLastUpdatedColumn,
    }),
    tc.actions({
      cell: (row) => <UserRowActions user={row} onMutate={onMutate} />,
    }),
  ];
}

// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------

const PAGE_SIZE = 8;

interface UsersTableProps {
  selectedStatuses: StatusFilter;
  onStatusesChange: (statuses: StatusFilter) => void;
  roleCounts: Record<string, number>;
  statusCounts: StatusCountMap;
}

export default function UsersTable({
  selectedStatuses,
  onStatusesChange,
  roleCounts,
  statusCounts,
}: UsersTableProps) {
  const [searchTerm, setSearchTerm] = useState("");
  const [selectedRoles, setSelectedRoles] = useState<UserRole[]>([]);
  const [selectedGroups, setSelectedGroups] = useState<number[]>([]);

  const { data: allGroups } = useGroups();

  const groupOptions: GroupOption[] = useMemo(
    () =>
      (allGroups ?? []).map((g) => ({
        id: g.id,
        name: g.name,
        memberCount: g.users.length,
      })),
    [allGroups]
  );

  const { users, isLoading, error, refresh } = useAdminUsers();

  const columns = useMemo(() => buildColumns(refresh), [refresh]);

  // Client-side filtering
  const filteredUsers = useMemo(() => {
    let result = users;

    if (selectedRoles.length > 0) {
      result = result.filter(
        (u) => u.role !== null && selectedRoles.includes(u.role)
      );
    }

    if (selectedStatuses.length > 0) {
      result = result.filter((u) => selectedStatuses.includes(u.status));
    }

    if (selectedGroups.length > 0) {
      result = result.filter((u) =>
        u.groups.some((g) => selectedGroups.includes(g.id))
      );
    }

    return result;
  }, [users, selectedRoles, selectedStatuses, selectedGroups]);

  if (isLoading) {
    return (
      <div className="flex justify-center py-12">
        <SimpleLoader className="h-6 w-6" />
      </div>
    );
  }

  if (error) {
    return (
      <Text as="p" secondaryBody text03>
        Failed to load users. Please try refreshing the page.
      </Text>
    );
  }

  return (
    <div className="flex flex-col gap-3">
      <InputTypeIn
        value={searchTerm}
        onChange={(e) => setSearchTerm(e.target.value)}
        placeholder="Search users..."
        leftSearchIcon
      />
      <UserFilters
        selectedRoles={selectedRoles}
        onRolesChange={setSelectedRoles}
        selectedGroups={selectedGroups}
        onGroupsChange={setSelectedGroups}
        groups={groupOptions}
        selectedStatuses={selectedStatuses}
        onStatusesChange={onStatusesChange}
        roleCounts={roleCounts}
        statusCounts={statusCounts}
      />
      <Table
        data={filteredUsers}
        columns={columns}
        getRowId={(row) => row.id ?? row.email}
        pageSize={PAGE_SIZE}
        searchTerm={searchTerm}
        emptyState={
          <IllustrationContent
            illustration={SvgNoResult}
            title="No users found"
            description="No users match the current filters."
          />
        }
        footer={{
          leftExtra: (
            <Button
              icon={SvgDownload}
              prominence="tertiary"
              size="sm"
              tooltip="Download CSV"
              aria-label="Download CSV"
              onClick={() => {
                downloadUsersCsv().catch((err) => {
                  toast.error(
                    err instanceof Error
                      ? err.message
                      : "Failed to download CSV"
                  );
                });
              }}
            />
          ),
        }}
      />
    </div>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/index.tsx
================================================
"use client";

import { useState } from "react";
import { SvgUser, SvgUserPlus } from "@opal/icons";
import { Button } from "@opal/components";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { useScimToken } from "@/hooks/useScimToken";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import useUserCounts from "@/hooks/useUserCounts";
import { UserStatus } from "@/lib/types";
import type { StatusFilter } from "./interfaces";

import UsersSummary from "./UsersSummary";
import UsersTable from "./UsersTable";
import InviteUsersModal from "./InviteUsersModal";

// ---------------------------------------------------------------------------
// Users page content
// ---------------------------------------------------------------------------

function UsersContent() {
  const isEe = usePaidEnterpriseFeaturesEnabled();

  const { data: scimToken } = useScimToken();
  const showScim = isEe && !!scimToken;

  const { activeCount, invitedCount, pendingCount, roleCounts, statusCounts } =
    useUserCounts();

  const [selectedStatuses, setSelectedStatuses] = useState<StatusFilter>([]);

  const toggleStatus = (target: UserStatus) => {
    setSelectedStatuses((prev) =>
      prev.includes(target)
        ? prev.filter((s) => s !== target)
        : [...prev, target]
    );
  };

  return (
    <>
      <UsersSummary
        activeUsers={activeCount}
        pendingInvites={invitedCount}
        requests={pendingCount}
        showScim={showScim}
        onFilterActive={() => toggleStatus(UserStatus.ACTIVE)}
        onFilterInvites={() => toggleStatus(UserStatus.INVITED)}
        onFilterRequests={() => toggleStatus(UserStatus.REQUESTED)}
      />

      <UsersTable
        selectedStatuses={selectedStatuses}
        onStatusesChange={setSelectedStatuses}
        roleCounts={roleCounts}
        statusCounts={statusCounts}
      />
    </>
  );
}

// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------

export default function UsersPage() {
  const [inviteOpen, setInviteOpen] = useState(false);

  return (
    <SettingsLayouts.Root width="lg">
      <SettingsLayouts.Header
        title="Users & Requests"
        icon={SvgUser}
        rightChildren={
          <Button icon={SvgUserPlus} onClick={() => setInviteOpen(true)}>
            Invite Users
          </Button>
        }
      />
      <SettingsLayouts.Body>
        <UsersContent />
      </SettingsLayouts.Body>

      <InviteUsersModal open={inviteOpen} onOpenChange={setInviteOpen} />
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/refresh-pages/admin/UsersPage/interfaces.ts
================================================
import type { UserRole, UserStatus } from "@/lib/types";

export interface UserGroupInfo {
  id: number;
  name: string;
}

export interface UserRow {
  id: string | null;
  email: string;
  role: UserRole | null;
  status: UserStatus;
  is_active: boolean;
  is_scim_synced: boolean;
  personal_name: string | null;
  created_at: string | null;
  updated_at: string | null;
  groups: UserGroupInfo[];
}

export interface GroupOption {
  id: number;
  name: string;
  memberCount?: number;
}

/** Empty array = no filter (show all). */
export type StatusFilter = UserStatus[];

/** Keys match the UserStatus-derived labels used in filter badges. */
export type StatusCountMap = {
  active?: number;
  inactive?: number;
  invited?: number;
  requested?: number;
};


================================================
FILE: web/src/refresh-pages/admin/UsersPage/svc.ts
================================================
import { UserRole } from "@/lib/types";

async function parseErrorDetail(
  res: Response,
  fallback: string
): Promise<string> {
  try {
    const body = await res.json();
    return body?.detail ?? fallback;
  } catch {
    return fallback;
  }
}

export async function deactivateUser(email: string): Promise<void> {
  const res = await fetch("/api/manage/admin/deactivate-user", {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ user_email: email }),
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to deactivate user"));
  }
}

export async function activateUser(email: string): Promise<void> {
  const res = await fetch("/api/manage/admin/activate-user", {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ user_email: email }),
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to activate user"));
  }
}

export async function deleteUser(email: string): Promise<void> {
  const res = await fetch("/api/manage/admin/delete-user", {
    method: "DELETE",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ user_email: email }),
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to delete user"));
  }
}

export async function setUserRole(
  email: string,
  newRole: UserRole
): Promise<void> {
  const res = await fetch("/api/manage/set-user-role", {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ user_email: email, new_role: newRole }),
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to update user role"));
  }
}

export async function addUserToGroup(
  groupId: number,
  userId: string
): Promise<void> {
  const res = await fetch(`/api/manage/admin/user-group/${groupId}/add-users`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ user_ids: [userId] }),
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to add user to group"));
  }
}

export async function removeUserFromGroup(
  groupId: number,
  currentUserIds: string[],
  userIdToRemove: string,
  ccPairIds: number[]
): Promise<void> {
  const res = await fetch(`/api/manage/admin/user-group/${groupId}`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      user_ids: currentUserIds.filter((id) => id !== userIdToRemove),
      cc_pair_ids: ccPairIds,
    }),
  });
  if (!res.ok) {
    throw new Error(
      await parseErrorDetail(res, "Failed to remove user from group")
    );
  }
}

export async function cancelInvite(email: string): Promise<void> {
  const res = await fetch("/api/manage/admin/remove-invited-user", {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ user_email: email }),
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to cancel invite"));
  }
}

export async function approveRequest(email: string): Promise<void> {
  const res = await fetch("/api/tenants/users/invite/approve", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ email }),
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to approve request"));
  }
}

export async function inviteUsers(emails: string[]): Promise<void> {
  const res = await fetch("/api/manage/admin/users", {
    method: "PUT",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ emails }),
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to invite users"));
  }
}

export async function resetPassword(
  email: string
): Promise<{ user_id: string; new_password: string }> {
  const res = await fetch("/api/password/reset_password", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ user_email: email }),
  });
  if (!res.ok) {
    throw new Error(await parseErrorDetail(res, "Failed to reset password"));
  }
  return res.json();
}

export async function downloadUsersCsv(): Promise<void> {
  const res = await fetch("/api/manage/users/download");
  if (!res.ok) {
    throw new Error(
      await parseErrorDetail(res, "Failed to download users CSV")
    );
  }
  const blob = await res.blob();
  const url = URL.createObjectURL(blob);
  const a = document.createElement("a");
  a.href = url;
  const ts = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
  a.download = `onyx_users_${ts}.csv`;
  a.click();
  URL.revokeObjectURL(url);
}


================================================
FILE: web/src/refresh-pages/admin/VoiceConfigurationPage.tsx
================================================
"use client";

import { useEffect, useMemo, useState } from "react";
import {
  AzureIcon,
  ElevenLabsIcon,
  IconProps,
  OpenAIIcon,
} from "@/components/icons/icons";
import ProviderCard from "@/sections/admin/ProviderCard";
import Message from "@/refresh-components/messages/Message";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { FetchError } from "@/lib/fetcher";
import {
  useVoiceProviders,
  VoiceProviderView,
} from "@/hooks/useVoiceProviders";
import {
  activateVoiceProvider,
  deactivateVoiceProvider,
  deleteVoiceProvider,
} from "@/lib/admin/voice/svc";
import { ThreeDotsLoader } from "@/components/Loading";
import { toast } from "@/hooks/useToast";
import { Callout } from "@/components/ui/callout";
import { Content } from "@opal/layouts";
import { SvgMicrophone, SvgSlash, SvgUnplug } from "@opal/icons";
import { Button, Text } from "@opal/components";
import { markdown } from "@opal/utils";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { Section } from "@/layouts/general-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import VoiceProviderSetupModal from "@/app/admin/configuration/voice/VoiceProviderSetupModal";

interface ModelDetails {
  id: string;
  label: string;
  subtitle: string;
  providerType: string;
}

interface ProviderGroup {
  providerType: string;
  providerLabel: string;
  models: ModelDetails[];
}

// STT Models - individual cards
const STT_MODELS: ModelDetails[] = [
  {
    id: "whisper",
    label: "Whisper",
    subtitle: "OpenAI's general purpose speech recognition model.",
    providerType: "openai",
  },
  {
    id: "azure-speech-stt",
    label: "Azure Speech",
    subtitle: "Speech to text in Microsoft Foundry Tools.",
    providerType: "azure",
  },
  {
    id: "elevenlabs-stt",
    label: "ElevenAPI",
    subtitle: "ElevenLabs Speech to Text API.",
    providerType: "elevenlabs",
  },
];

// TTS Models - grouped by provider
const TTS_PROVIDER_GROUPS: ProviderGroup[] = [
  {
    providerType: "openai",
    providerLabel: "OpenAI",
    models: [
      {
        id: "tts-1",
        label: "TTS-1",
        subtitle: "OpenAI's text-to-speech model optimized for speed.",
        providerType: "openai",
      },
      {
        id: "tts-1-hd",
        label: "TTS-1 HD",
        subtitle: "OpenAI's text-to-speech model optimized for quality.",
        providerType: "openai",
      },
    ],
  },
  {
    providerType: "azure",
    providerLabel: "Azure",
    models: [
      {
        id: "azure-speech-tts",
        label: "Azure Speech",
        subtitle: "Text to speech in Microsoft Foundry Tools.",
        providerType: "azure",
      },
    ],
  },
  {
    providerType: "elevenlabs",
    providerLabel: "ElevenLabs",
    models: [
      {
        id: "elevenlabs-tts",
        label: "ElevenAPI",
        subtitle: "ElevenLabs Text to Speech API.",
        providerType: "elevenlabs",
      },
    ],
  },
];

const FallbackMicrophoneIcon = ({ size, className }: IconProps) => (
  <SvgMicrophone size={size} className={className} />
);

function getProviderIcon(
  providerType: string
): React.FunctionComponent<IconProps> {
  switch (providerType) {
    case "openai":
      return OpenAIIcon;
    case "azure":
      return AzureIcon;
    case "elevenlabs":
      return ElevenLabsIcon;
    default:
      return FallbackMicrophoneIcon;
  }
}

type ProviderMode = "stt" | "tts";

function getProviderLabel(providerType: string): string {
  switch (providerType) {
    case "openai":
      return "OpenAI";
    case "azure":
      return "Azure";
    case "elevenlabs":
      return "ElevenLabs";
    default:
      return providerType;
  }
}

const NO_DEFAULT_VALUE = "__none__";

const route = ADMIN_ROUTES.VOICE;
const pageDescription =
  "Configure speech-to-text and text-to-speech providers for voice input and spoken responses.";

interface VoiceDisconnectModalProps {
  disconnectTarget: {
    providerId: number;
    providerLabel: string;
    providerType: string;
  };
  providers: VoiceProviderView[];
  replacementProviderId: string | null;
  onReplacementChange: (id: string | null) => void;
  onClose: () => void;
  onDisconnect: () => void;
}

function VoiceDisconnectModal({
  disconnectTarget,
  providers,
  replacementProviderId,
  onReplacementChange,
  onClose,
  onDisconnect,
}: VoiceDisconnectModalProps) {
  const targetProvider = providers.find(
    (p) => p.id === disconnectTarget.providerId
  );
  const isActive =
    (targetProvider?.is_default_stt ?? false) ||
    (targetProvider?.is_default_tts ?? false);

  // Find other configured providers that could serve as replacements
  const replacementOptions = providers.filter(
    (p) => p.id !== disconnectTarget.providerId && p.has_api_key
  );

  const needsReplacement = isActive;
  const hasReplacements = replacementOptions.length > 0;

  // Auto-select first replacement when modal opens
  useEffect(() => {
    if (needsReplacement && hasReplacements && !replacementProviderId) {
      const first = replacementOptions[0];
      if (first) onReplacementChange(String(first.id));
    }
  }, []); // eslint-disable-line react-hooks/exhaustive-deps

  return (
    <ConfirmationModalLayout
      icon={SvgUnplug}
      title={`Disconnect ${disconnectTarget.providerLabel}`}
      description="Voice models"
      onClose={onClose}
      submit={
        <Button
          variant="danger"
          onClick={onDisconnect}
          disabled={
            needsReplacement && hasReplacements && !replacementProviderId
          }
        >
          Disconnect
        </Button>
      }
    >
      {needsReplacement ? (
        hasReplacements ? (
          <Section alignItems="start">
            <Text as="p" color="text-03">
              {markdown(
                `**${disconnectTarget.providerLabel}** models will no longer be used for speech-to-text or text-to-speech, and it will no longer be your default. Session history will be preserved.`
              )}
            </Text>
            <Section alignItems="start" gap={0.25}>
              <Text as="p" color="text-04">
                Set New Default
              </Text>
              <InputSelect
                value={replacementProviderId ?? undefined}
                onValueChange={(v) => onReplacementChange(v)}
              >
                <InputSelect.Trigger placeholder="Select a replacement provider" />
                <InputSelect.Content>
                  {replacementOptions.map((p) => (
                    <InputSelect.Item
                      key={p.id}
                      value={String(p.id)}
                      icon={getProviderIcon(p.provider_type)}
                    >
                      {getProviderLabel(p.provider_type)}
                    </InputSelect.Item>
                  ))}
                  <InputSelect.Separator />
                  <InputSelect.Item value={NO_DEFAULT_VALUE} icon={SvgSlash}>
                    <span>
                      <b>No Default</b>
                      <span className="text-text-03"> (Disable Voice)</span>
                    </span>
                  </InputSelect.Item>
                </InputSelect.Content>
              </InputSelect>
            </Section>
          </Section>
        ) : (
          <>
            <Text as="p" color="text-03">
              {markdown(
                `**${disconnectTarget.providerLabel}** models will no longer be used for speech-to-text or text-to-speech, and it will no longer be your default.`
              )}
            </Text>
            <Text as="p" color="text-03">
              Connect another provider to continue using voice.
            </Text>
          </>
        )
      ) : (
        <>
          <Text as="p" color="text-03">
            {markdown(
              `**${disconnectTarget.providerLabel}** models will no longer be available for voice.`
            )}
          </Text>
          <Text as="p" color="text-03">
            Session history will be preserved.
          </Text>
        </>
      )}
    </ConfirmationModalLayout>
  );
}

export default function VoiceConfigurationPage() {
  const [modalOpen, setModalOpen] = useState(false);
  const [selectedProvider, setSelectedProvider] = useState<string | null>(null);
  const [editingProvider, setEditingProvider] =
    useState<VoiceProviderView | null>(null);
  const [modalMode, setModalMode] = useState<ProviderMode>("stt");
  const [selectedModelId, setSelectedModelId] = useState<string | null>(null);
  const [sttActivationError, setSTTActivationError] = useState<string | null>(
    null
  );
  const [ttsActivationError, setTTSActivationError] = useState<string | null>(
    null
  );
  const [disconnectTarget, setDisconnectTarget] = useState<{
    providerId: number;
    providerLabel: string;
    providerType: string;
  } | null>(null);
  const [replacementProviderId, setReplacementProviderId] = useState<
    string | null
  >(null);

  const { providers, error, isLoading, refresh: mutate } = useVoiceProviders();

  const handleConnect = (
    providerType: string,
    mode: ProviderMode,
    modelId?: string
  ) => {
    setSelectedProvider(providerType);
    setEditingProvider(null);
    setModalMode(mode);
    setSelectedModelId(modelId ?? null);
    setModalOpen(true);
    setSTTActivationError(null);
    setTTSActivationError(null);
  };

  const handleEdit = (
    provider: VoiceProviderView,
    mode: ProviderMode,
    modelId?: string
  ) => {
    setSelectedProvider(provider.provider_type);
    setEditingProvider(provider);
    setModalMode(mode);
    setSelectedModelId(modelId ?? null);
    setModalOpen(true);
    setSTTActivationError(null);
    setTTSActivationError(null);
  };

  const handleSetDefault = async (
    providerId: number,
    mode: ProviderMode,
    modelId?: string
  ) => {
    const setError =
      mode === "stt" ? setSTTActivationError : setTTSActivationError;
    setError(null);
    try {
      const response = await activateVoiceProvider(providerId, mode, modelId);
      if (!response.ok) {
        const errorBody = await response.json().catch(() => ({}));
        throw new Error(
          typeof errorBody?.detail === "string"
            ? errorBody.detail
            : `Failed to set provider as default ${mode.toUpperCase()}.`
        );
      }
      await mutate();
    } catch (err) {
      const message =
        err instanceof Error ? err.message : "Unexpected error occurred.";
      setError(message);
    }
  };

  const handleDeactivate = async (providerId: number, mode: ProviderMode) => {
    const setError =
      mode === "stt" ? setSTTActivationError : setTTSActivationError;
    setError(null);
    try {
      const response = await deactivateVoiceProvider(providerId, mode);
      if (!response.ok) {
        const errorBody = await response.json().catch(() => ({}));
        throw new Error(
          typeof errorBody?.detail === "string"
            ? errorBody.detail
            : `Failed to deactivate ${mode.toUpperCase()} provider.`
        );
      }
      await mutate();
    } catch (err) {
      const message =
        err instanceof Error ? err.message : "Unexpected error occurred.";
      setError(message);
    }
  };

  const handleModalClose = () => {
    setModalOpen(false);
    setSelectedProvider(null);
    setEditingProvider(null);
    setSelectedModelId(null);
  };

  const handleModalSuccess = () => {
    mutate();
    handleModalClose();
  };

  const handleDisconnect = async () => {
    if (!disconnectTarget) return;
    try {
      const targetProvider = providers.find(
        (p) => p.id === disconnectTarget.providerId
      );

      // If a replacement was selected (not "No Default"), activate it for each
      // mode the disconnected provider was default for
      if (replacementProviderId && replacementProviderId !== NO_DEFAULT_VALUE) {
        const repId = Number(replacementProviderId);

        if (targetProvider?.is_default_stt) {
          const resp = await activateVoiceProvider(repId, "stt");
          if (!resp.ok) {
            const errorBody = await resp.json().catch(() => ({}));
            throw new Error(
              typeof errorBody?.detail === "string"
                ? errorBody.detail
                : "Failed to activate replacement STT provider."
            );
          }
        }

        if (targetProvider?.is_default_tts) {
          const resp = await activateVoiceProvider(repId, "tts");
          if (!resp.ok) {
            const errorBody = await resp.json().catch(() => ({}));
            throw new Error(
              typeof errorBody?.detail === "string"
                ? errorBody.detail
                : "Failed to activate replacement TTS provider."
            );
          }
        }
      }

      const response = await deleteVoiceProvider(disconnectTarget.providerId);
      if (!response.ok) {
        const errorBody = await response.json().catch(() => ({}));
        throw new Error(
          typeof errorBody?.detail === "string"
            ? errorBody.detail
            : "Failed to disconnect provider."
        );
      }
      await mutate();
      toast.success(`${disconnectTarget.providerLabel} disconnected`);
    } catch (err) {
      console.error("Failed to disconnect voice provider:", err);
      toast.error(
        err instanceof Error ? err.message : "Unexpected error occurred."
      );
    } finally {
      setDisconnectTarget(null);
      setReplacementProviderId(null);
    }
  };

  const isProviderConfigured = (provider?: VoiceProviderView): boolean => {
    return !!provider?.has_api_key;
  };

  const providersByType = useMemo(() => {
    return new Map((providers ?? []).map((p) => [p.provider_type, p] as const));
  }, [providers]);

  const hasActiveSTTProvider =
    providers?.some((p) => p.is_default_stt) ?? false;
  const hasActiveTTSProvider =
    providers?.some((p) => p.is_default_tts) ?? false;

  const getModelStatus = (
    model: ModelDetails,
    mode: ProviderMode
  ): "disconnected" | "connected" | "selected" => {
    const provider = providersByType.get(model.providerType);
    if (!provider || !isProviderConfigured(provider)) return "disconnected";

    const isActive =
      mode === "stt"
        ? provider.is_default_stt
        : provider.is_default_tts && provider.tts_model === model.id;

    if (isActive) return "selected";
    return "connected";
  };

  const renderModelSelect = (model: ModelDetails, mode: ProviderMode) => {
    const provider = providersByType.get(model.providerType);
    const status = getModelStatus(model, mode);
    const Icon = getProviderIcon(model.providerType);

    return (
      <ProviderCard
        key={`${mode}-${model.id}`}
        aria-label={`voice-${mode}-${model.id}`}
        icon={Icon}
        title={model.label}
        description={model.subtitle}
        status={status}
        onConnect={() => handleConnect(model.providerType, mode, model.id)}
        onSelect={() => {
          if (provider?.id) handleSetDefault(provider.id, mode, model.id);
        }}
        onDeselect={() => {
          if (provider?.id) handleDeactivate(provider.id, mode);
        }}
        onEdit={() => {
          if (provider) handleEdit(provider, mode, model.id);
        }}
        onDisconnect={
          status !== "disconnected" && provider
            ? () =>
                setDisconnectTarget({
                  providerId: provider.id,
                  providerLabel: getProviderLabel(model.providerType),
                  providerType: model.providerType,
                })
            : undefined
        }
      />
    );
  };

  if (error) {
    const message = error?.message || "Unable to load voice configuration.";
    const detail =
      error instanceof FetchError && typeof error.info?.detail === "string"
        ? error.info.detail
        : undefined;

    return (
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={route.icon}
          title={route.title}
          description={pageDescription}
        />
        <SettingsLayouts.Body>
          <Callout type="danger" title="Failed to load voice settings">
            {message}
            {detail && (
              <Text as="p" font="main-content-body" color="text-03">
                {detail}
              </Text>
            )}
          </Callout>
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>
    );
  }

  if (isLoading) {
    return (
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={route.icon}
          title={route.title}
          description={pageDescription}
        />
        <SettingsLayouts.Body>
          <ThreeDotsLoader />
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>
    );
  }

  return (
    <SettingsLayouts.Root>
      <SettingsLayouts.Header
        icon={route.icon}
        title={route.title}
        description={pageDescription}
      />
      <SettingsLayouts.Body>
        <div className="flex flex-col gap-6">
          <Content
            title="Speech to Text"
            description="Select a model to transcribe speech to text in chats."
            sizePreset="main-content"
            variant="section"
          />

          {sttActivationError && (
            <Callout type="danger" title="Unable to update STT provider">
              {sttActivationError}
            </Callout>
          )}

          {!hasActiveSTTProvider && (
            <Message
              info
              static
              large
              close={false}
              text="Connect a speech to text provider to use in chat."
              className="w-full"
            />
          )}

          <div className="flex flex-col gap-2">
            {STT_MODELS.map((model) => renderModelSelect(model, "stt"))}
          </div>
        </div>

        <div className="flex flex-col gap-6">
          <Content
            title="Text to Speech"
            description="Select a model to speak out chat responses."
            sizePreset="main-content"
            variant="section"
          />

          {ttsActivationError && (
            <Callout type="danger" title="Unable to update TTS provider">
              {ttsActivationError}
            </Callout>
          )}

          {!hasActiveTTSProvider && (
            <Message
              info
              static
              large
              close={false}
              text="Connect a text to speech provider to use in chat."
              className="w-full"
            />
          )}

          {TTS_PROVIDER_GROUPS.map((group) => (
            <div key={group.providerType} className="flex flex-col gap-2">
              <Text font="secondary-body" color="text-03">
                {group.providerLabel}
              </Text>
              <div className="flex flex-col gap-2">
                {group.models.map((model) => renderModelSelect(model, "tts"))}
              </div>
            </div>
          ))}
        </div>
      </SettingsLayouts.Body>

      {disconnectTarget && (
        <VoiceDisconnectModal
          disconnectTarget={disconnectTarget}
          providers={providers}
          replacementProviderId={replacementProviderId}
          onReplacementChange={setReplacementProviderId}
          onClose={() => {
            setDisconnectTarget(null);
            setReplacementProviderId(null);
          }}
          onDisconnect={() => void handleDisconnect()}
        />
      )}

      {modalOpen && selectedProvider && (
        <VoiceProviderSetupModal
          providerType={selectedProvider}
          existingProvider={editingProvider}
          mode={modalMode}
          defaultModelId={selectedModelId}
          onClose={handleModalClose}
          onSuccess={handleModalSuccess}
        />
      )}
    </SettingsLayouts.Root>
  );
}


================================================
FILE: web/src/refresh-pages/admin/WebSearchPage/WebProviderModalReducer.ts
================================================
export type WebProviderModalState = {
  /** Provider type currently being configured in the modal (null when closed). */
  providerType: string | null;

  /** Existing provider ID when editing (null for new providers). */
  existingProviderId: number | null;

  /** Raw API key input value (may be the masked placeholder). */
  apiKeyValue: string;
  /** Single provider-specific config field value (e.g. cx / base URL). */
  configValue: string;

  /** Request phase for disabling inputs/buttons. */
  phase: "idle" | "validating" | "saving";

  /**
   * UI message shown in the modal helper region.
   * - kind=error: red error message
   * - kind=status: neutral/green status message
   */
  message: { kind: "status" | "error"; text: string } | null;
};

export type WebProviderModalAction =
  | {
      type: "OPEN";
      providerType: string;
      existingProviderId: number | null;
      initialApiKeyValue: string;
      initialConfigValue: string;
    }
  | { type: "CLOSE" }
  | { type: "SET_API_KEY"; value: string }
  | { type: "SET_CONFIG_VALUE"; value: string }
  | { type: "SET_PHASE"; phase: "idle" | "validating" | "saving" }
  | { type: "SET_STATUS_MESSAGE"; text: string }
  | { type: "SET_ERROR_MESSAGE"; text: string }
  | { type: "CLEAR_MESSAGE" };

export const initialWebProviderModalState: WebProviderModalState = {
  providerType: null,
  existingProviderId: null,
  apiKeyValue: "",
  configValue: "",
  phase: "idle",
  message: null,
};

export const MASKED_API_KEY_PLACEHOLDER = "••••••••••••••••";

export function WebProviderModalReducer(
  state: WebProviderModalState,
  action: WebProviderModalAction
): WebProviderModalState {
  switch (action.type) {
    case "OPEN":
      return {
        ...state,
        providerType: action.providerType,
        existingProviderId: action.existingProviderId,
        apiKeyValue: action.initialApiKeyValue,
        configValue: action.initialConfigValue,
        phase: "idle",
        message: null,
      };
    case "CLOSE":
      return {
        ...state,
        providerType: null,
        existingProviderId: null,
        apiKeyValue: "",
        configValue: "",
        phase: "idle",
        message: null,
      };
    case "SET_API_KEY": {
      return {
        ...state,
        apiKeyValue: action.value,
      };
    }
    case "SET_CONFIG_VALUE":
      return {
        ...state,
        configValue: action.value,
      };
    case "SET_PHASE":
      return {
        ...state,
        phase: action.phase,
      };
    case "SET_STATUS_MESSAGE":
      return {
        ...state,
        message: { kind: "status", text: action.text },
      };
    case "SET_ERROR_MESSAGE":
      return {
        ...state,
        phase: "idle",
        message: { kind: "error", text: action.text },
      };
    case "CLEAR_MESSAGE":
      return {
        ...state,
        message: null,
      };
    default:
      return state;
  }
}


================================================
FILE: web/src/refresh-pages/admin/WebSearchPage/WebProviderSetupModal.tsx
================================================
"use client";

import { memo, useMemo, type ReactNode, type FunctionComponent } from "react";

import { FormField } from "@/refresh-components/form/FormField";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import PasswordInputTypeIn from "@/refresh-components/inputs/PasswordInputTypeIn";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";

import { SvgArrowExchange, SvgOnyxLogo } from "@opal/icons";
import type { IconProps } from "@opal/types";

export type WebProviderSetupModalProps = {
  isOpen: boolean;
  onClose: () => void;
  providerLabel: string;
  providerLogo: ReactNode;
  description: string;
  apiKeyValue: string;
  onApiKeyChange: (value: string) => void;
  /**
   * When true, the API key is a stored/masked value from the backend
   * that cannot actually be revealed. The reveal toggle will be disabled.
   */
  isStoredApiKey?: boolean;
  optionalField?: {
    label: string;
    value: string;
    onChange: (value: string) => void;
    placeholder: string;
    description?: ReactNode;
    showFirst?: boolean;
  };
  helperMessage: ReactNode;
  helperClass: string;
  isProcessing: boolean;
  canConnect: boolean;
  onConnect: () => void;
  apiKeyAutoFocus?: boolean;
  hideApiKey?: boolean;
};

export const WebProviderSetupModal = memo(
  ({
    isOpen,
    onClose,
    providerLabel,
    providerLogo,
    description,
    apiKeyValue,
    onApiKeyChange,
    isStoredApiKey = false,
    optionalField,
    helperMessage,
    helperClass,
    isProcessing,
    canConnect,
    onConnect,
    apiKeyAutoFocus = true,
    hideApiKey = false,
  }: WebProviderSetupModalProps) => {
    const LogoArrangement = useMemo(() => {
      const Component: FunctionComponent<IconProps> = () => (
        <div className="flex items-center gap-1">
          {providerLogo}
          <div className="flex items-center justify-center size-4 p-0.5 shrink-0">
            <SvgArrowExchange className="size-3 text-text-04" />
          </div>
          <div className="flex items-center justify-center size-7 p-0.5 shrink-0 overflow-clip">
            <SvgOnyxLogo size={24} className="shrink-0" />
          </div>
        </div>
      );
      return Component;
    }, [providerLogo]);

    return (
      <Modal open={isOpen} onOpenChange={(open) => !open && onClose()}>
        <Modal.Content width="sm" preventAccidentalClose>
          <Modal.Header
            icon={LogoArrangement}
            title={`Set up ${providerLabel}`}
            description={description}
            onClose={onClose}
          />
          <Modal.Body>
            {optionalField?.showFirst && (
              <FormField
                name={optionalField.label.toLowerCase().replace(/\s+/g, "_")}
                state="idle"
                className="w-full"
              >
                <FormField.Label>{optionalField.label}</FormField.Label>
                <FormField.Control asChild>
                  <InputTypeIn
                    placeholder={optionalField.placeholder}
                    value={optionalField.value}
                    onChange={(event) =>
                      optionalField.onChange(event.target.value)
                    }
                  />
                </FormField.Control>
                {optionalField.description && (
                  <FormField.Description>
                    {optionalField.description}
                  </FormField.Description>
                )}
              </FormField>
            )}

            {!hideApiKey && (
              <FormField
                name="api_key"
                state={
                  helperClass.includes("status-error") ||
                  helperClass.includes("error")
                    ? "error"
                    : helperClass.includes("green")
                      ? "success"
                      : "idle"
                }
                className="w-full"
              >
                <FormField.Label>API Key</FormField.Label>
                <FormField.Control asChild>
                  <PasswordInputTypeIn
                    data-testid="web-provider-api-key-input"
                    placeholder="Enter API key"
                    value={apiKeyValue}
                    autoFocus={apiKeyAutoFocus}
                    isNonRevealable={isStoredApiKey}
                    onFocus={(e) => {
                      if (isStoredApiKey) {
                        e.target.select();
                      }
                    }}
                    onChange={(event) => onApiKeyChange(event.target.value)}
                    showClearButton={false}
                  />
                </FormField.Control>
                {isProcessing ? (
                  <FormField.APIMessage
                    state="loading"
                    messages={{
                      loading:
                        typeof helperMessage === "string"
                          ? helperMessage
                          : "Validating API key...",
                    }}
                  />
                ) : typeof helperMessage === "string" ? (
                  <FormField.Message
                    messages={{
                      idle:
                        helperClass.includes("status-error") ||
                        helperClass.includes("error")
                          ? ""
                          : helperClass.includes("green")
                            ? ""
                            : helperMessage,
                      error:
                        helperClass.includes("status-error") ||
                        helperClass.includes("error")
                          ? helperMessage
                          : "",
                      success: helperClass.includes("green")
                        ? helperMessage
                        : "",
                    }}
                  />
                ) : (
                  <FormField.Description className={helperClass}>
                    {helperMessage}
                  </FormField.Description>
                )}
              </FormField>
            )}

            {optionalField && !optionalField.showFirst && (
              <FormField
                name={optionalField.label.toLowerCase().replace(/\s+/g, "_")}
                state={
                  hideApiKey &&
                  (helperClass.includes("status-error") ||
                    helperClass.includes("error"))
                    ? "error"
                    : "idle"
                }
                className="w-full"
              >
                <FormField.Label>{optionalField.label}</FormField.Label>
                <FormField.Control asChild>
                  <InputTypeIn
                    placeholder={optionalField.placeholder}
                    value={optionalField.value}
                    onChange={(event) =>
                      optionalField.onChange(event.target.value)
                    }
                  />
                </FormField.Control>
                {optionalField.description && (
                  <FormField.Description>
                    {optionalField.description}
                  </FormField.Description>
                )}

                {hideApiKey && (
                  <>
                    {isProcessing ? (
                      <FormField.APIMessage
                        state="loading"
                        messages={{
                          loading:
                            typeof helperMessage === "string"
                              ? helperMessage
                              : "Testing connection...",
                        }}
                      />
                    ) : typeof helperMessage === "string" ? (
                      <FormField.Message
                        messages={{
                          idle:
                            helperClass.includes("status-error") ||
                            helperClass.includes("error")
                              ? ""
                              : helperClass.includes("green")
                                ? ""
                                : "",
                          error:
                            helperClass.includes("status-error") ||
                            helperClass.includes("error")
                              ? helperMessage
                              : "",
                          success: helperClass.includes("green")
                            ? helperMessage
                            : "",
                        }}
                      />
                    ) : null}
                  </>
                )}
              </FormField>
            )}
          </Modal.Body>
          <Modal.Footer>
            <Button prominence="secondary" type="button" onClick={onClose}>
              Cancel
            </Button>
            <Button
              disabled={!canConnect || isProcessing}
              type="button"
              onClick={onConnect}
            >
              {isProcessing ? "Connecting..." : "Connect"}
            </Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    );
  }
);

WebProviderSetupModal.displayName = "WebProviderSetupModal";


================================================
FILE: web/src/refresh-pages/admin/WebSearchPage/connectProviderFlow.ts
================================================
export type ProviderTestPayload = {
  provider_type: string;
  api_key: string | null;
  use_stored_key: boolean;
  config: Record<string, string>;
};

export type ProviderUpsertPayload = {
  id: number | null;
  name: string;
  provider_type: string;
  api_key: string | null;
  api_key_changed: boolean;
  config: Record<string, string>;
  activate: boolean;
};

const WEB_SEARCH_PROVIDER_ENDPOINTS = {
  search: {
    upsertUrl: "/api/admin/web-search/search-providers",
    testUrl: "/api/admin/web-search/search-providers/test",
  },
  content: {
    upsertUrl: "/api/admin/web-search/content-providers",
    testUrl: "/api/admin/web-search/content-providers/test",
  },
} as const;

/**
 * Which web-search provider category we are configuring.
 * - `search`: search engine provider
 * - `content`: crawler/content provider used to fetch full pages
 */
export type WebProviderCategory = keyof typeof WEB_SEARCH_PROVIDER_ENDPOINTS;

export type ConnectProviderFlowArgs = {
  category: WebProviderCategory;
  providerType: string;

  existingProviderId: number | null;
  existingProviderName: string | null;
  existingProviderHasApiKey: boolean;

  displayName: string;

  providerRequiresApiKey: boolean;
  apiKeyChangedForProvider: boolean;
  apiKey: string;

  config: Record<string, string>;
  configChanged: boolean;

  onValidating: (message: string) => void;
  onSaving: (message: string) => void;
  onError: (message: string) => void;
  onClose: () => void;

  mutate: () => Promise<unknown>;
};

export async function connectProviderFlow({
  category,
  providerType,
  existingProviderId,
  existingProviderName,
  existingProviderHasApiKey,
  displayName,
  providerRequiresApiKey,
  apiKeyChangedForProvider,
  apiKey,
  config,
  configChanged,
  onValidating,
  onSaving,
  onError,
  onClose,
  mutate,
}: ConnectProviderFlowArgs): Promise<void> {
  const { testUrl, upsertUrl } = WEB_SEARCH_PROVIDER_ENDPOINTS[category];
  const isNewProvider = existingProviderId == null;
  const needsValidation =
    isNewProvider || apiKeyChangedForProvider || configChanged;
  const msg = {
    validating: "Validating configuration...",
    activating: "Activating provider...",
    validatedThenActivating: "Configuration validated. Activating provider...",
    validationFailedFallback: "Failed to validate configuration.",
    activateFailedFallback: "Failed to activate provider.",
  };

  if (providerRequiresApiKey) {
    if (isNewProvider && !apiKey) {
      return;
    }
    if (apiKeyChangedForProvider && !apiKey) {
      return;
    }
  }

  try {
    if (needsValidation) {
      onValidating(msg.validating);

      const testPayload: ProviderTestPayload = {
        provider_type: providerType,
        api_key: apiKeyChangedForProvider ? apiKey : null,
        use_stored_key:
          providerRequiresApiKey &&
          !apiKeyChangedForProvider &&
          existingProviderHasApiKey,
        config,
      };

      const testResponse = await fetch(testUrl, {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify(testPayload),
      });

      if (!testResponse.ok) {
        const errorBody = await testResponse.json().catch(() => ({}));
        throw new Error(
          typeof (errorBody as any)?.detail === "string"
            ? (errorBody as any).detail
            : msg.validationFailedFallback
        );
      }

      onSaving(msg.validatedThenActivating);
    } else {
      onSaving(msg.activating);
    }

    const upsertPayload: ProviderUpsertPayload = {
      id: existingProviderId,
      name: existingProviderName ?? displayName,
      provider_type: providerType,
      api_key: apiKeyChangedForProvider ? apiKey : null,
      api_key_changed: apiKeyChangedForProvider,
      config,
      activate: true,
    };

    const upsertResponse = await fetch(upsertUrl, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify(upsertPayload),
    });

    if (!upsertResponse.ok) {
      const errorBody = await upsertResponse.json().catch(() => ({}));
      throw new Error(
        typeof (errorBody as any)?.detail === "string"
          ? (errorBody as any).detail
          : msg.activateFailedFallback
      );
    }

    await mutate();
    onClose();
  } catch (e) {
    const message =
      e instanceof Error ? e.message : "Unexpected error occurred.";
    onError(message);
  }
}


================================================
FILE: web/src/refresh-pages/admin/WebSearchPage/contentProviderUtils.ts
================================================
export type WebContentProviderType =
  | "firecrawl"
  | "onyx_web_crawler"
  | "exa"
  | (string & {});

export const CONTENT_PROVIDER_DETAILS: Record<
  string,
  { label: string; subtitle: string; description: string; logoSrc?: string }
> = {
  onyx_web_crawler: {
    label: "Onyx Web Crawler",
    subtitle:
      "Built-in web crawler. Works for most pages but less performant in edge cases.",
    description:
      "Onyx's built-in crawler processes URLs returned by your search engine.",
  },
  firecrawl: {
    label: "Firecrawl",
    subtitle: "Leading open-source crawler.",
    description:
      "Connect Firecrawl to fetch and summarize page content from search results.",
    logoSrc: "/firecrawl.svg",
  },
  exa: {
    label: "Exa",
    subtitle: "Exa.ai",
    description:
      "Use Exa to fetch and summarize page content from search results.",
    logoSrc: "/Exa.svg",
  },
};

/**
 * Display order for built-in providers.
 * Derived from insertion order of `CONTENT_PROVIDER_DETAILS` for a single source of truth.
 */
export const CONTENT_PROVIDER_ORDER = Object.keys(
  CONTENT_PROVIDER_DETAILS
) as WebContentProviderType[];

export type ContentProviderConfig = Record<string, string> | null | undefined;

export type ContentProviderLike =
  | {
      has_api_key: boolean;
      config: ContentProviderConfig;
    }
  | null
  | undefined;

type ContentProviderCapabilities = {
  requiresApiKey: boolean;
  requiredConfigKeys: string[];
  storedConfigAliases?: Record<string, string[]>;
};

const CONTENT_PROVIDER_CAPABILITIES: Record<
  string,
  ContentProviderCapabilities
> = {
  onyx_web_crawler: {
    requiresApiKey: false,
    requiredConfigKeys: [],
  },
  firecrawl: {
    requiresApiKey: true,
    requiredConfigKeys: ["base_url"],
    storedConfigAliases: {
      base_url: ["base_url", "api_base_url"],
    },
  },
  // exa uses default capabilities
};

const DEFAULT_CONTENT_PROVIDER_CAPABILITIES: ContentProviderCapabilities = {
  requiresApiKey: true,
  requiredConfigKeys: [],
};

function getCapabilities(
  providerType: WebContentProviderType
): ContentProviderCapabilities {
  return (
    CONTENT_PROVIDER_CAPABILITIES[providerType as string] ??
    DEFAULT_CONTENT_PROVIDER_CAPABILITIES
  );
}

function getStoredContentConfigValue(
  providerType: WebContentProviderType,
  canonicalKey: string,
  config: ContentProviderConfig
): string {
  const caps = getCapabilities(providerType);
  const aliases = caps.storedConfigAliases?.[canonicalKey] ?? [canonicalKey];

  const safeConfig = config ?? {};
  for (const key of aliases) {
    const value = safeConfig[key];
    if (typeof value === "string" && value.length > 0) {
      return value;
    }
  }
  return "";
}

export function isContentProviderConfigured(
  providerType: WebContentProviderType,
  provider: ContentProviderLike
): boolean {
  const caps = getCapabilities(providerType);

  if (caps.requiresApiKey && !(provider?.has_api_key ?? false)) {
    return false;
  }

  for (const requiredKey of caps.requiredConfigKeys) {
    const value = getStoredContentConfigValue(
      providerType,
      requiredKey,
      provider?.config
    );
    if (!value) {
      return false;
    }
  }

  return true;
}

export function getCurrentContentProviderType(
  providers: Array<{
    is_active: boolean;
    provider_type: WebContentProviderType;
  }>
): WebContentProviderType {
  return (
    providers.find((p) => p.is_active && p.provider_type !== "onyx_web_crawler")
      ?.provider_type ??
    providers.find((p) => p.is_active)?.provider_type ??
    "onyx_web_crawler"
  );
}

export function buildContentProviderConfig(
  providerType: WebContentProviderType,
  baseUrl: string
): Record<string, string> {
  const caps = getCapabilities(providerType);
  const trimmed = baseUrl.trim();
  const config: Record<string, string> = {};

  if (caps.requiredConfigKeys.length === 0 || !trimmed) {
    return config;
  }

  const requiredKey = caps.requiredConfigKeys[0];
  if (!requiredKey) {
    return config;
  }

  config[requiredKey] = trimmed;
  return config;
}

export function canConnectContentProvider(
  providerType: WebContentProviderType,
  apiKey: string,
  baseUrl: string
): boolean {
  const caps = getCapabilities(providerType);

  if (caps.requiresApiKey && apiKey.trim().length === 0) {
    return false;
  }

  if (caps.requiredConfigKeys.length > 0 && baseUrl.trim().length === 0) {
    return false;
  }

  return true;
}

export function getSingleContentConfigFieldValueForForm(
  providerType: WebContentProviderType,
  provider: ContentProviderLike,
  defaultValue = ""
): string {
  const caps = getCapabilities(providerType);
  if (caps.requiredConfigKeys.length === 0) {
    return defaultValue;
  }

  const requiredKey = caps.requiredConfigKeys[0];
  if (!requiredKey) {
    return defaultValue;
  }

  return (
    getStoredContentConfigValue(providerType, requiredKey, provider?.config) ||
    defaultValue
  );
}


================================================
FILE: web/src/refresh-pages/admin/WebSearchPage/index.tsx
================================================
"use client";

import Image from "next/image";
import { useEffect, useMemo, useState, useReducer } from "react";
import { InfoIcon } from "@/components/icons/icons";
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import { Content, CardHeaderLayout } from "@opal/layouts";
import useSWR from "swr";
import { errorHandlingFetcher, FetchError } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";
import { ThreeDotsLoader } from "@/components/Loading";
import { Callout } from "@/components/ui/callout";
import { cn } from "@/lib/utils";
import { toast } from "@/hooks/useToast";
import {
  SvgArrowExchange,
  SvgArrowRightCircle,
  SvgCheckSquare,
  SvgGlobe,
  SvgOnyxLogo,
  SvgSettings,
  SvgSlash,
  SvgUnplug,
} from "@opal/icons";
import { Button, SelectCard } from "@opal/components";
import { Hoverable } from "@opal/core";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { WebProviderSetupModal } from "@/refresh-pages/admin/WebSearchPage/WebProviderSetupModal";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import {
  SEARCH_PROVIDER_DETAILS,
  SEARCH_PROVIDER_ORDER,
  getSearchProviderDisplayLabel,
  buildSearchProviderConfig,
  canConnectSearchProvider,
  getSingleConfigFieldValueForForm,
  isBuiltInSearchProviderType,
  isSearchProviderConfigured,
  searchProviderRequiresApiKey,
  type WebSearchProviderType,
} from "@/refresh-pages/admin/WebSearchPage/searchProviderUtils";
import {
  CONTENT_PROVIDER_DETAILS,
  CONTENT_PROVIDER_ORDER,
  buildContentProviderConfig,
  canConnectContentProvider,
  getSingleContentConfigFieldValueForForm,
  getCurrentContentProviderType,
  isContentProviderConfigured,
  type WebContentProviderType,
} from "@/refresh-pages/admin/WebSearchPage/contentProviderUtils";
import {
  initialWebProviderModalState,
  WebProviderModalReducer,
  MASKED_API_KEY_PLACEHOLDER,
} from "@/refresh-pages/admin/WebSearchPage/WebProviderModalReducer";
import { connectProviderFlow } from "@/refresh-pages/admin/WebSearchPage/connectProviderFlow";
import {
  activateSearchProvider,
  deactivateSearchProvider,
  activateContentProvider,
  deactivateContentProvider,
  disconnectProvider,
} from "@/refresh-pages/admin/WebSearchPage/svc";
import type {
  WebSearchProviderView,
  WebContentProviderView,
  DisconnectTargetState,
} from "@/refresh-pages/admin/WebSearchPage/interfaces";

const NO_DEFAULT_VALUE = "__none__";

const route = ADMIN_ROUTES.WEB_SEARCH;

// ---------------------------------------------------------------------------
// WebSearchDisconnectModal
// ---------------------------------------------------------------------------

function WebSearchDisconnectModal({
  disconnectTarget,
  searchProviders,
  contentProviders,
  replacementProviderId,
  onReplacementChange,
  onClose,
  onDisconnect,
}: {
  disconnectTarget: DisconnectTargetState;
  searchProviders: WebSearchProviderView[];
  contentProviders: WebContentProviderView[];
  replacementProviderId: string | null;
  onReplacementChange: (id: string | null) => void;
  onClose: () => void;
  onDisconnect: () => void;
}) {
  const isSearch = disconnectTarget.category === "search";

  // Determine if the target is currently the active/selected provider
  const isActive = isSearch
    ? searchProviders.find((p) => p.id === disconnectTarget.id)?.is_active ??
      false
    : contentProviders.find((p) => p.id === disconnectTarget.id)?.is_active ??
      false;

  // Find other configured providers as replacements
  const replacementOptions = isSearch
    ? searchProviders.filter(
        (p) => p.id !== disconnectTarget.id && p.id > 0 && p.has_api_key
      )
    : contentProviders.filter(
        (p) =>
          p.id !== disconnectTarget.id &&
          p.provider_type !== "onyx_web_crawler" &&
          p.id > 0 &&
          p.has_api_key
      );

  const needsReplacement = isActive;
  const hasReplacements = replacementOptions.length > 0;

  const getLabel = (p: { name: string; provider_type: string }) => {
    if (isSearch) {
      const details =
        SEARCH_PROVIDER_DETAILS[p.provider_type as WebSearchProviderType];
      return details?.label ?? p.name ?? p.provider_type;
    }
    const details = CONTENT_PROVIDER_DETAILS[p.provider_type];
    return details?.label ?? p.name ?? p.provider_type;
  };

  const categoryLabel = isSearch ? "search engine" : "web crawler";
  const featureLabel = isSearch ? "web search" : "web crawling";
  const disableLabel = isSearch ? "Disable Web Search" : "Disable Web Crawling";

  // Auto-select first replacement when modal opens
  useEffect(() => {
    if (needsReplacement && hasReplacements && !replacementProviderId) {
      const first = replacementOptions[0];
      if (first) onReplacementChange(String(first.id));
    }
  }, []); // eslint-disable-line react-hooks/exhaustive-deps

  return (
    <ConfirmationModalLayout
      icon={SvgUnplug}
      title={`Disconnect ${disconnectTarget.label}`}
      description="This will remove the stored credentials for this provider."
      onClose={onClose}
      submit={
        <Button
          variant="danger"
          onClick={onDisconnect}
          disabled={
            needsReplacement && hasReplacements && !replacementProviderId
          }
        >
          Disconnect
        </Button>
      }
    >
      {needsReplacement ? (
        hasReplacements ? (
          <Section alignItems="start">
            <Text as="p" text03>
              <b>{disconnectTarget.label}</b> is currently the active{" "}
              {categoryLabel}. Search history will be preserved.
            </Text>
            <Section alignItems="start" gap={0.25}>
              <Text as="p" secondaryBody text03>
                Set New Default
              </Text>
              <InputSelect
                value={replacementProviderId ?? undefined}
                onValueChange={(v) => onReplacementChange(v)}
              >
                <InputSelect.Trigger placeholder="Select a replacement provider" />
                <InputSelect.Content>
                  {replacementOptions.map((p) => (
                    <InputSelect.Item key={p.id} value={String(p.id)}>
                      {getLabel(p)}
                    </InputSelect.Item>
                  ))}
                  <InputSelect.Separator />
                  <InputSelect.Item value={NO_DEFAULT_VALUE} icon={SvgSlash}>
                    <span>
                      <b>No Default</b>
                      <span className="text-text-03"> ({disableLabel})</span>
                    </span>
                  </InputSelect.Item>
                </InputSelect.Content>
              </InputSelect>
            </Section>
          </Section>
        ) : (
          <>
            <Text as="p" text03>
              <b>{disconnectTarget.label}</b> is currently the active{" "}
              {categoryLabel}.
            </Text>
            <Text as="p" text03>
              Connect another provider to continue using {featureLabel}.
            </Text>
          </>
        )
      ) : (
        <>
          <Text as="p" text03>
            {isSearch ? "Web search" : "Web crawling"} will no longer be routed
            through <b>{disconnectTarget.label}</b>.
          </Text>
          <Text as="p" text03>
            Search history will be preserved.
          </Text>
        </>
      )}
    </ConfirmationModalLayout>
  );
}

// ---------------------------------------------------------------------------
// ProviderCard — uses SelectCard for stateful interactive provider cards
// ---------------------------------------------------------------------------

type ProviderStatus = "disconnected" | "connected" | "selected";

interface ProviderCardProps {
  icon: React.FunctionComponent<{ size?: number; className?: string }>;
  title: string;
  description: string;
  status: ProviderStatus;
  onConnect?: () => void;
  onSelect?: () => void;
  onDeselect?: () => void;
  onEdit?: () => void;
  onDisconnect?: () => void;
  selectedLabel?: string;
}

const STATUS_TO_STATE = {
  disconnected: "empty",
  connected: "filled",
  selected: "selected",
} as const;

function ProviderCard({
  icon,
  title,
  description,
  status,
  onConnect,
  onSelect,
  onDeselect,
  onEdit,
  onDisconnect,
  selectedLabel = "Current Default",
}: ProviderCardProps) {
  const isDisconnected = status === "disconnected";
  const isConnected = status === "connected";
  const isSelected = status === "selected";

  return (
    <Hoverable.Root group="web-search/ProviderCard">
      <SelectCard
        state={STATUS_TO_STATE[status]}
        padding="sm"
        rounding="lg"
        onClick={
          isDisconnected && onConnect
            ? onConnect
            : isSelected && onDeselect
              ? onDeselect
              : undefined
        }
      >
        <CardHeaderLayout
          sizePreset="main-ui"
          variant="section"
          icon={icon}
          title={title}
          description={description}
          rightChildren={
            isDisconnected && onConnect ? (
              <Button
                prominence="tertiary"
                rightIcon={SvgArrowExchange}
                onClick={(e) => {
                  e.stopPropagation();
                  onConnect();
                }}
              >
                Connect
              </Button>
            ) : isConnected && onSelect ? (
              <Button
                prominence="tertiary"
                rightIcon={SvgArrowRightCircle}
                onClick={(e) => {
                  e.stopPropagation();
                  onSelect();
                }}
              >
                Set as Default
              </Button>
            ) : isSelected ? (
              <div className="p-2">
                <Content
                  title={selectedLabel}
                  sizePreset="main-ui"
                  variant="section"
                  icon={SvgCheckSquare}
                />
              </div>
            ) : undefined
          }
          bottomRightChildren={
            !isDisconnected ? (
              <div className="flex flex-row px-1 pb-1">
                {onDisconnect && (
                  <Hoverable.Item group="web-search/ProviderCard">
                    <Button
                      icon={SvgUnplug}
                      tooltip="Disconnect"
                      aria-label={`Disconnect ${title}`}
                      prominence="tertiary"
                      onClick={(e) => {
                        e.stopPropagation();
                        onDisconnect();
                      }}
                      size="md"
                    />
                  </Hoverable.Item>
                )}
                {onEdit && (
                  <Button
                    icon={SvgSettings}
                    tooltip="Edit"
                    aria-label={`Edit ${title}`}
                    prominence="tertiary"
                    onClick={(e) => {
                      e.stopPropagation();
                      onEdit();
                    }}
                    size="md"
                  />
                )}
              </div>
            ) : undefined
          }
        />
      </SelectCard>
    </Hoverable.Root>
  );
}

// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------

export default function WebSearchPage() {
  const [searchModal, dispatchSearchModal] = useReducer(
    WebProviderModalReducer,
    initialWebProviderModalState
  );
  const [disconnectTarget, setDisconnectTarget] =
    useState<DisconnectTargetState | null>(null);
  const [replacementProviderId, setReplacementProviderId] = useState<
    string | null
  >(null);
  const [contentModal, dispatchContentModal] = useReducer(
    WebProviderModalReducer,
    initialWebProviderModalState
  );
  const [activationError, setActivationError] = useState<string | null>(null);
  const [contentActivationError, setContentActivationError] = useState<
    string | null
  >(null);
  const {
    data: searchProvidersData,
    error: searchProvidersError,
    isLoading: isLoadingSearchProviders,
    mutate: mutateSearchProviders,
  } = useSWR<WebSearchProviderView[]>(
    SWR_KEYS.webSearchSearchProviders,
    errorHandlingFetcher
  );

  const {
    data: contentProvidersData,
    error: contentProvidersError,
    isLoading: isLoadingContentProviders,
    mutate: mutateContentProviders,
  } = useSWR<WebContentProviderView[]>(
    SWR_KEYS.webSearchContentProviders,
    errorHandlingFetcher
  );

  const searchProviders = searchProvidersData ?? [];
  const contentProviders = contentProvidersData ?? [];

  const isLoading = isLoadingSearchProviders || isLoadingContentProviders;

  // Exa shares API key between search and content providers
  const exaSearchProvider = searchProviders.find(
    (p) => p.provider_type === "exa"
  );
  const exaContentProvider = contentProviders.find(
    (p) => p.provider_type === "exa"
  );
  const hasSharedExaKey =
    (exaSearchProvider?.has_api_key || exaContentProvider?.has_api_key) ??
    false;

  // Modal form state is owned by reducers

  const openSearchModal = (
    providerType: WebSearchProviderType,
    provider?: WebSearchProviderView
  ) => {
    const requiresApiKey = searchProviderRequiresApiKey(providerType);
    const hasStoredKey = provider?.has_api_key ?? false;

    // For Exa search provider, check if we can use the shared Exa key
    const isExa = providerType === "exa";
    const canUseSharedExaKey = isExa && hasSharedExaKey && !hasStoredKey;

    dispatchSearchModal({
      type: "OPEN",
      providerType,
      existingProviderId: provider?.id ?? null,
      initialApiKeyValue:
        requiresApiKey && (hasStoredKey || canUseSharedExaKey)
          ? MASKED_API_KEY_PLACEHOLDER
          : "",
      initialConfigValue: getSingleConfigFieldValueForForm(
        providerType,
        provider
      ),
    });
  };

  const openContentModal = (
    providerType: WebContentProviderType,
    provider?: WebContentProviderView
  ) => {
    const hasStoredKey = provider?.has_api_key ?? false;
    const defaultFirecrawlBaseUrl = "https://api.firecrawl.dev/v2/scrape";

    // For Exa content provider, check if we can use the shared Exa key
    const isExa = providerType === "exa";
    const canUseSharedExaKey = isExa && hasSharedExaKey && !hasStoredKey;

    dispatchContentModal({
      type: "OPEN",
      providerType,
      existingProviderId: provider?.id ?? null,
      initialApiKeyValue:
        hasStoredKey || canUseSharedExaKey ? MASKED_API_KEY_PLACEHOLDER : "",
      initialConfigValue:
        providerType === "firecrawl"
          ? getSingleContentConfigFieldValueForForm(
              providerType,
              provider,
              defaultFirecrawlBaseUrl
            )
          : "",
    });
  };

  const hasActiveSearchProvider = searchProviders.some(
    (provider) => provider.is_active
  );

  const hasConfiguredSearchProvider = searchProviders.some((provider) =>
    isSearchProviderConfigured(provider.provider_type, provider)
  );

  const combinedSearchProviders = useMemo(() => {
    const byType = new Map(
      searchProviders.map((p) => [p.provider_type, p] as const)
    );

    const ordered = SEARCH_PROVIDER_ORDER.map((providerType) => {
      const provider = byType.get(providerType);
      const details = SEARCH_PROVIDER_DETAILS[providerType];
      return {
        key: provider?.id ?? providerType,
        providerType,
        label: getSearchProviderDisplayLabel(providerType, provider?.name),
        subtitle: details.subtitle,
        logoSrc: details.logoSrc,
        provider,
      };
    });

    const additional = searchProviders
      .filter((p) => !SEARCH_PROVIDER_ORDER.includes(p.provider_type))
      .map((provider) => ({
        key: provider.id,
        providerType: provider.provider_type,
        label: getSearchProviderDisplayLabel(
          provider.provider_type,
          provider.name
        ),
        subtitle: "Custom integration",
        logoSrc: undefined,
        provider,
      }));

    return [...ordered, ...additional];
  }, [searchProviders]);

  const selectedProviderType =
    searchModal.providerType as WebSearchProviderType | null;
  const selectedContentProviderType =
    contentModal.providerType as WebContentProviderType | null;

  const providerLabel = selectedProviderType
    ? getSearchProviderDisplayLabel(selectedProviderType)
    : "";
  const searchProviderValues = useMemo(
    () => ({
      apiKey: searchModal.apiKeyValue.trim(),
      config: searchModal.configValue.trim(),
    }),
    [searchModal.apiKeyValue, searchModal.configValue]
  );
  const canConnect =
    !!selectedProviderType &&
    canConnectSearchProvider(
      selectedProviderType,
      searchProviderValues.apiKey,
      searchProviderValues.config
    );
  const contentProviderLabel = selectedContentProviderType
    ? CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.label ||
      selectedContentProviderType
    : "";
  const contentProviderValues = useMemo(
    () => ({
      apiKey: contentModal.apiKeyValue.trim(),
      config: contentModal.configValue.trim(),
    }),
    [contentModal.apiKeyValue, contentModal.configValue]
  );
  const canConnectContent =
    !!selectedContentProviderType &&
    canConnectContentProvider(
      selectedContentProviderType,
      contentProviderValues.apiKey,
      contentProviderValues.config
    );

  const renderLogo = ({
    logoSrc,
    alt,
    fallback,
    size = 16,
    containerSize,
  }: {
    logoSrc?: string;
    alt: string;
    fallback?: React.ReactNode;
    size?: number;
    containerSize?: number;
  }) => {
    const containerSizeClass =
      size === 24 || containerSize === 28 ? "size-7" : "size-5";

    return (
      <div
        className={cn(
          "flex items-center justify-center px-0.5 py-0 shrink-0 overflow-clip",
          containerSizeClass
        )}
      >
        {logoSrc ? (
          <Image src={logoSrc} alt={alt} width={size} height={size} />
        ) : fallback ? (
          fallback
        ) : (
          <SvgGlobe size={size} className="text-text-02" />
        )}
      </div>
    );
  };

  const combinedContentProviders = useMemo(() => {
    const byType = new Map(
      contentProviders.map((p) => [p.provider_type, p] as const)
    );

    // Always include our built-in providers in a stable order. If missing, inject
    // a virtual placeholder so the UI can still render/activate it.
    const ordered = CONTENT_PROVIDER_ORDER.map((providerType) => {
      const existing = byType.get(providerType);
      if (existing) return existing;

      if (providerType === "onyx_web_crawler") {
        return {
          id: -1,
          name: "Onyx Web Crawler",
          provider_type: "onyx_web_crawler",
          is_active: true,
          config: null,
          has_api_key: true,
        } satisfies WebContentProviderView;
      }

      if (providerType === "firecrawl") {
        return {
          id: -2,
          name: "Firecrawl",
          provider_type: "firecrawl",
          is_active: false,
          config: null,
          has_api_key: false,
        } satisfies WebContentProviderView;
      }

      if (providerType === "exa") {
        return {
          id: -3,
          name: "Exa",
          provider_type: "exa",
          is_active: false,
          config: null,
          has_api_key: hasSharedExaKey,
        } satisfies WebContentProviderView;
      }

      return null;
    }).filter(Boolean) as WebContentProviderView[];

    const additional = contentProviders.filter(
      (p) => !CONTENT_PROVIDER_ORDER.includes(p.provider_type)
    );

    return [...ordered, ...additional];
  }, [contentProviders, hasSharedExaKey]);

  const currentContentProviderType =
    getCurrentContentProviderType(contentProviders);

  if (searchProvidersError || contentProvidersError) {
    const message =
      searchProvidersError?.message ||
      contentProvidersError?.message ||
      "Unable to load web search configuration.";

    const detail =
      (searchProvidersError instanceof FetchError &&
      typeof searchProvidersError.info?.detail === "string"
        ? searchProvidersError.info.detail
        : undefined) ||
      (contentProvidersError instanceof FetchError &&
      typeof contentProvidersError.info?.detail === "string"
        ? contentProvidersError.info.detail
        : undefined);

    return (
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={route.icon}
          title={route.title}
          description="Search settings for external search across the internet."
          separator
        />
        <SettingsLayouts.Body>
          <Callout type="danger" title="Failed to load web search settings">
            {message}
            {detail && (
              <Text as="p" className="mt-2 text-text-03" mainContentBody text03>
                {detail}
              </Text>
            )}
          </Callout>
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>
    );
  }

  if (isLoading) {
    return (
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={route.icon}
          title={route.title}
          description="Search settings for external search across the internet."
          separator
        />
        <SettingsLayouts.Body>
          <ThreeDotsLoader />
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>
    );
  }

  const handleSearchConnect = async () => {
    if (!selectedProviderType) {
      return;
    }

    const config = buildSearchProviderConfig(
      selectedProviderType,
      searchProviderValues.config
    );

    const existingProviderId = searchModal.existingProviderId;
    const existingProvider = existingProviderId
      ? searchProviders.find((p) => p.id === existingProviderId)
      : null;

    const providerRequiresApiKey =
      searchProviderRequiresApiKey(selectedProviderType);
    const apiKeyChangedForProvider =
      providerRequiresApiKey &&
      searchModal.apiKeyValue !== MASKED_API_KEY_PLACEHOLDER &&
      searchProviderValues.apiKey.length > 0;

    const storedConfigValue = getSingleConfigFieldValueForForm(
      selectedProviderType,
      existingProvider
    );
    const configChanged =
      Object.keys(config).length > 0 &&
      storedConfigValue !== searchProviderValues.config;

    dispatchSearchModal({ type: "SET_PHASE", phase: "saving" });
    dispatchSearchModal({ type: "CLEAR_MESSAGE" });
    setActivationError(null);

    await connectProviderFlow({
      category: "search",
      providerType: selectedProviderType,
      existingProviderId: existingProvider?.id ?? null,
      existingProviderName: existingProvider?.name ?? null,
      existingProviderHasApiKey: existingProvider?.has_api_key ?? false,
      displayName:
        SEARCH_PROVIDER_DETAILS[selectedProviderType]?.label ??
        selectedProviderType,
      providerRequiresApiKey,
      apiKeyChangedForProvider,
      apiKey: searchProviderValues.apiKey,
      config,
      configChanged,
      onValidating: (message) => (
        dispatchSearchModal({ type: "SET_PHASE", phase: "validating" }),
        dispatchSearchModal({ type: "SET_STATUS_MESSAGE", text: message })
      ),
      onSaving: (message) => (
        dispatchSearchModal({ type: "SET_PHASE", phase: "saving" }),
        dispatchSearchModal({ type: "SET_STATUS_MESSAGE", text: message })
      ),
      onError: (message) =>
        dispatchSearchModal({ type: "SET_ERROR_MESSAGE", text: message }),
      onClose: () => {
        dispatchSearchModal({ type: "CLOSE" });
      },
      mutate: async () => {
        await mutateSearchProviders();
        if (selectedProviderType === "exa") {
          await mutateContentProviders();
        }
      },
    });
  };

  const handleActivateSearchProvider = async (providerId: number) => {
    setActivationError(null);
    try {
      await activateSearchProvider(providerId);
      await mutateSearchProviders();
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Unexpected error occurred.";
      setActivationError(message);
    }
  };

  const handleDeactivateSearchProvider = async (providerId: number) => {
    setActivationError(null);
    try {
      await deactivateSearchProvider(providerId);
      await mutateSearchProviders();
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Unexpected error occurred.";
      setActivationError(message);
    }
  };

  const handleActivateContentProvider = async (
    provider: WebContentProviderView
  ) => {
    setContentActivationError(null);
    try {
      await activateContentProvider(provider);
      await mutateContentProviders();
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Unexpected error occurred.";
      setContentActivationError(message);
    }
  };

  const handleDeactivateContentProvider = async (
    providerId: number,
    providerType: string
  ) => {
    setContentActivationError(null);
    try {
      await deactivateContentProvider(providerId, providerType);
      await mutateContentProviders();
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Unexpected error occurred.";
      setContentActivationError(message);
    }
  };

  const handleContentConnect = async () => {
    if (!selectedContentProviderType) {
      return;
    }

    const config = buildContentProviderConfig(
      selectedContentProviderType,
      contentProviderValues.config
    );

    const existingProviderId = contentModal.existingProviderId;
    const existingProvider = existingProviderId
      ? contentProviders.find((p) => p.id === existingProviderId)
      : null;

    const storedBaseUrl = getSingleContentConfigFieldValueForForm(
      selectedContentProviderType,
      existingProvider,
      "https://api.firecrawl.dev/v2/scrape"
    );
    const configChanged =
      selectedContentProviderType === "firecrawl" &&
      storedBaseUrl !== contentProviderValues.config;

    dispatchContentModal({ type: "SET_PHASE", phase: "saving" });
    dispatchContentModal({ type: "CLEAR_MESSAGE" });

    const apiKeyChangedForContentProvider =
      contentModal.apiKeyValue !== MASKED_API_KEY_PLACEHOLDER &&
      contentProviderValues.apiKey.length > 0;

    await connectProviderFlow({
      category: "content",
      providerType: selectedContentProviderType,
      existingProviderId: existingProvider?.id ?? null,
      existingProviderName: existingProvider?.name ?? null,
      existingProviderHasApiKey: existingProvider?.has_api_key ?? false,
      displayName:
        CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.label ??
        selectedContentProviderType,
      providerRequiresApiKey: true,
      apiKeyChangedForProvider: apiKeyChangedForContentProvider,
      apiKey: contentProviderValues.apiKey,
      config,
      configChanged,
      onValidating: (message) => (
        dispatchContentModal({ type: "SET_PHASE", phase: "validating" }),
        dispatchContentModal({ type: "SET_STATUS_MESSAGE", text: message })
      ),
      onSaving: (message) => (
        dispatchContentModal({ type: "SET_PHASE", phase: "saving" }),
        dispatchContentModal({ type: "SET_STATUS_MESSAGE", text: message })
      ),
      onError: (message) =>
        dispatchContentModal({ type: "SET_ERROR_MESSAGE", text: message }),
      onClose: () => {
        dispatchContentModal({ type: "CLOSE" });
      },
      mutate: async () => {
        await mutateContentProviders();
        if (selectedContentProviderType === "exa") {
          await mutateSearchProviders();
        }
      },
    });
  };

  const getContentProviderHelperMessage = () => {
    if (contentModal.message?.kind === "error") {
      return contentModal.message.text;
    }
    if (contentModal.message?.kind === "status") {
      return contentModal.message.text;
    }
    if (
      contentModal.phase === "validating" ||
      contentModal.phase === "saving"
    ) {
      return "Validating API key...";
    }

    const providerName = selectedContentProviderType
      ? CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.label ||
        selectedContentProviderType
      : "";

    if (selectedContentProviderType === "exa") {
      return (
        <>
          Paste your{" "}
          <a
            href="https://dashboard.exa.ai/api-keys"
            target="_blank"
            rel="noopener noreferrer"
            className="underline"
          >
            API key
          </a>{" "}
          from Exa to enable crawling.
        </>
      );
    }

    return selectedContentProviderType === "firecrawl" ? (
      <>
        Paste your <span className="underline">API key</span> from Firecrawl to
        access your search engine.
      </>
    ) : (
      `Paste your API key from ${providerName} to enable crawling.`
    );
  };

  const getContentProviderHelperClass = () => {
    if (contentModal.message?.kind === "error") return "text-status-error-05";
    if (contentModal.message?.kind === "status") {
      return contentModal.message.text.toLowerCase().includes("validated")
        ? "text-green-500"
        : "text-text-03";
    }
    return "text-text-03";
  };

  const handleDisconnectProvider = async () => {
    if (!disconnectTarget) return;
    const { id, category } = disconnectTarget;

    try {
      await disconnectProvider(id, category, replacementProviderId);
      toast.success(`${disconnectTarget.label} disconnected`);
      await mutateSearchProviders();
      await mutateContentProviders();
    } catch (error) {
      console.error("Failed to disconnect web search provider:", error);
      const message =
        error instanceof Error ? error.message : "Unexpected error occurred.";
      if (category === "search") {
        setActivationError(message);
      } else {
        setContentActivationError(message);
      }
    } finally {
      setDisconnectTarget(null);
      setReplacementProviderId(null);
    }
  };

  return (
    <>
      <SettingsLayouts.Root>
        <SettingsLayouts.Header
          icon={route.icon}
          title={route.title}
          description="Search settings for external search across the internet."
          separator
        />

        <SettingsLayouts.Body>
          <div className="flex w-full flex-col gap-3">
            <Content
              title="Search Engine"
              description="External search engine API used for web search result URLs, snippets, and metadata."
              sizePreset="main-content"
              variant="section"
            />

            {activationError && (
              <Callout type="danger" title="Unable to update default provider">
                {activationError}
              </Callout>
            )}

            {!hasActiveSearchProvider && (
              <div
                className="flex items-start rounded-16 border p-1"
                style={{
                  backgroundColor: "var(--status-info-00)",
                  borderColor: "var(--status-info-02)",
                }}
              >
                <div className="flex items-start gap-1 p-2">
                  <div
                    className="flex size-5 items-center justify-center rounded-full p-0.5"
                    style={{
                      backgroundColor: "var(--status-info-01)",
                    }}
                  >
                    <div style={{ color: "var(--status-text-info-05)" }}>
                      <InfoIcon size={16} />
                    </div>
                  </div>
                  <Text as="p" className="flex-1 px-0.5" mainUiBody text04>
                    {hasConfiguredSearchProvider
                      ? "Select a search engine to enable web search."
                      : "Connect a search engine to set up web search."}
                  </Text>
                </div>
              </div>
            )}

            <div className="flex flex-col gap-2">
              {combinedSearchProviders.map(
                ({ key, providerType, label, subtitle, logoSrc, provider }) => {
                  const isConfigured = isSearchProviderConfigured(
                    providerType,
                    provider
                  );
                  const isActive = provider?.is_active ?? false;
                  const providerId = provider?.id;
                  const canOpenModal =
                    isBuiltInSearchProviderType(providerType);

                  const status: "disconnected" | "connected" | "selected" =
                    !isConfigured
                      ? "disconnected"
                      : isActive
                        ? "selected"
                        : "connected";

                  return (
                    <ProviderCard
                      key={`${key}-${providerType}`}
                      icon={() =>
                        logoSrc ? (
                          <Image
                            src={logoSrc}
                            alt={`${label} logo`}
                            width={16}
                            height={16}
                          />
                        ) : (
                          <SvgGlobe size={16} />
                        )
                      }
                      title={label}
                      description={subtitle}
                      status={status}
                      onConnect={
                        canOpenModal
                          ? () => {
                              openSearchModal(providerType, provider);
                              setActivationError(null);
                            }
                          : undefined
                      }
                      onSelect={
                        providerId
                          ? () => {
                              void handleActivateSearchProvider(providerId);
                            }
                          : undefined
                      }
                      onDeselect={
                        providerId
                          ? () => {
                              void handleDeactivateSearchProvider(providerId);
                            }
                          : undefined
                      }
                      onEdit={
                        isConfigured && canOpenModal
                          ? () => {
                              openSearchModal(
                                providerType as WebSearchProviderType,
                                provider
                              );
                            }
                          : undefined
                      }
                      onDisconnect={
                        isConfigured && provider && provider.id > 0
                          ? () =>
                              setDisconnectTarget({
                                id: provider.id,
                                label,
                                category: "search",
                                providerType,
                              })
                          : undefined
                      }
                    />
                  );
                }
              )}
            </div>
          </div>

          <div className="flex w-full flex-col gap-3">
            <Content
              title="Web Crawler"
              description="Used to read the full contents of search result pages."
              sizePreset="main-content"
              variant="section"
            />

            {contentActivationError && (
              <Callout type="danger" title="Unable to update crawler">
                {contentActivationError}
              </Callout>
            )}

            <div className="flex flex-col gap-2">
              {combinedContentProviders.map((provider) => {
                const label =
                  provider.name ||
                  CONTENT_PROVIDER_DETAILS[provider.provider_type]?.label ||
                  provider.provider_type;

                const subtitle =
                  CONTENT_PROVIDER_DETAILS[provider.provider_type]?.subtitle ||
                  provider.provider_type;

                const providerId = provider.id;
                const isConfigured = isContentProviderConfigured(
                  provider.provider_type,
                  provider
                );
                const isCurrentCrawler =
                  provider.provider_type === currentContentProviderType;

                const status: "disconnected" | "connected" | "selected" =
                  !isConfigured
                    ? "disconnected"
                    : isCurrentCrawler
                      ? "selected"
                      : "connected";

                const canActivate =
                  providerId > 0 ||
                  provider.provider_type === "onyx_web_crawler" ||
                  isConfigured;

                const contentLogoSrc =
                  CONTENT_PROVIDER_DETAILS[provider.provider_type]?.logoSrc;

                return (
                  <ProviderCard
                    key={`${provider.provider_type}-${provider.id}`}
                    icon={() =>
                      contentLogoSrc ? (
                        <Image
                          src={contentLogoSrc}
                          alt={`${label} logo`}
                          width={16}
                          height={16}
                        />
                      ) : provider.provider_type === "onyx_web_crawler" ? (
                        <SvgOnyxLogo size={16} />
                      ) : (
                        <SvgGlobe size={16} />
                      )
                    }
                    title={label}
                    description={subtitle}
                    status={status}
                    selectedLabel="Current Crawler"
                    onConnect={() => {
                      openContentModal(provider.provider_type, provider);
                      setContentActivationError(null);
                    }}
                    onSelect={
                      canActivate
                        ? () => {
                            void handleActivateContentProvider(provider);
                          }
                        : undefined
                    }
                    onDeselect={() => {
                      void handleDeactivateContentProvider(
                        providerId,
                        provider.provider_type
                      );
                    }}
                    onEdit={
                      provider.provider_type !== "onyx_web_crawler" &&
                      isConfigured
                        ? () => {
                            openContentModal(provider.provider_type, provider);
                          }
                        : undefined
                    }
                    onDisconnect={
                      provider.provider_type !== "onyx_web_crawler" &&
                      isConfigured &&
                      provider.id > 0
                        ? () =>
                            setDisconnectTarget({
                              id: provider.id,
                              label,
                              category: "content",
                              providerType: provider.provider_type,
                            })
                        : undefined
                    }
                  />
                );
              })}
            </div>
          </div>
        </SettingsLayouts.Body>
      </SettingsLayouts.Root>

      {disconnectTarget && (
        <WebSearchDisconnectModal
          disconnectTarget={disconnectTarget}
          searchProviders={searchProviders}
          contentProviders={combinedContentProviders}
          replacementProviderId={replacementProviderId}
          onReplacementChange={setReplacementProviderId}
          onClose={() => {
            setDisconnectTarget(null);
            setReplacementProviderId(null);
          }}
          onDisconnect={() => void handleDisconnectProvider()}
        />
      )}

      <WebProviderSetupModal
        isOpen={selectedProviderType !== null}
        onClose={() => {
          dispatchSearchModal({ type: "CLOSE" });
        }}
        providerLabel={providerLabel}
        providerLogo={renderLogo({
          logoSrc: selectedProviderType
            ? SEARCH_PROVIDER_DETAILS[selectedProviderType]?.logoSrc
            : undefined,
          alt: `${providerLabel} logo`,
          size: 24,
          containerSize: 28,
        })}
        description={
          selectedProviderType
            ? SEARCH_PROVIDER_DETAILS[selectedProviderType]?.helper ??
              SEARCH_PROVIDER_DETAILS[selectedProviderType]?.subtitle ??
              ""
            : ""
        }
        apiKeyValue={searchModal.apiKeyValue}
        onApiKeyChange={(value) =>
          dispatchSearchModal({ type: "SET_API_KEY", value })
        }
        isStoredApiKey={searchModal.apiKeyValue === MASKED_API_KEY_PLACEHOLDER}
        optionalField={
          selectedProviderType === "google_pse"
            ? {
                label: "Search Engine ID",
                value: searchModal.configValue,
                onChange: (value) =>
                  dispatchSearchModal({ type: "SET_CONFIG_VALUE", value }),
                placeholder: "Enter search engine ID",
                description: (
                  <>
                    Paste your{" "}
                    <a
                      href="https://programmablesearchengine.google.com/controlpanel/all"
                      target="_blank"
                      rel="noopener noreferrer"
                      className="underline"
                    >
                      search engine ID
                    </a>{" "}
                    you want to use for web search.
                  </>
                ),
              }
            : selectedProviderType === "searxng"
              ? {
                  label: "SearXNG Base URL",
                  value: searchModal.configValue,
                  onChange: (value) =>
                    dispatchSearchModal({ type: "SET_CONFIG_VALUE", value }),
                  placeholder: "https://your-searxng-instance.com",
                  description: (
                    <>
                      Paste the base URL of your{" "}
                      <a
                        href="https://docs.searxng.org/admin/installation.html"
                        target="_blank"
                        rel="noopener noreferrer"
                        className="underline"
                      >
                        private SearXNG instance
                      </a>
                      .
                    </>
                  ),
                }
              : undefined
        }
        helperMessage={
          searchModal.message?.kind === "error" ? (
            searchModal.message.text
          ) : searchModal.phase === "validating" ||
            searchModal.phase === "saving" ? (
            "Checking connection..."
          ) : (
            <>
              Paste your{" "}
              <a
                href={
                  (selectedProviderType
                    ? SEARCH_PROVIDER_DETAILS[selectedProviderType]?.apiKeyUrl
                    : undefined) ?? "#"
                }
                target="_blank"
                rel="noopener noreferrer"
                className="underline"
              >
                API key
              </a>{" "}
              to access your search engine.
            </>
          )
        }
        helperClass={
          searchModal.message?.kind === "error"
            ? "text-status-error-05"
            : searchModal.phase === "validating" ||
                searchModal.phase === "saving"
              ? "text-text-03"
              : "text-text-03"
        }
        isProcessing={
          searchModal.phase === "validating" || searchModal.phase === "saving"
        }
        canConnect={canConnect}
        onConnect={() => {
          void handleSearchConnect();
        }}
        hideApiKey={
          !!selectedProviderType &&
          !searchProviderRequiresApiKey(selectedProviderType)
        }
      />

      <WebProviderSetupModal
        isOpen={selectedContentProviderType !== null}
        onClose={() => {
          dispatchContentModal({ type: "CLOSE" });
        }}
        providerLabel={contentProviderLabel}
        providerLogo={renderLogo({
          logoSrc: selectedContentProviderType
            ? CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.logoSrc
            : undefined,
          alt: `${
            contentProviderLabel || selectedContentProviderType || "provider"
          } logo`,
          fallback:
            selectedContentProviderType === "onyx_web_crawler" ? (
              <SvgOnyxLogo size={24} />
            ) : undefined,
          size: 24,
          containerSize: 28,
        })}
        description={
          selectedContentProviderType
            ? CONTENT_PROVIDER_DETAILS[selectedContentProviderType]
                ?.description ||
              CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.subtitle ||
              `Provide credentials for ${contentProviderLabel} to enable crawling.`
            : ""
        }
        apiKeyValue={contentModal.apiKeyValue}
        onApiKeyChange={(value) =>
          dispatchContentModal({ type: "SET_API_KEY", value })
        }
        isStoredApiKey={contentModal.apiKeyValue === MASKED_API_KEY_PLACEHOLDER}
        optionalField={
          selectedContentProviderType === "firecrawl"
            ? {
                label: "API Base URL",
                value: contentModal.configValue,
                onChange: (value) =>
                  dispatchContentModal({ type: "SET_CONFIG_VALUE", value }),
                placeholder: "https://",
                description: "Your Firecrawl API base URL.",
                showFirst: true,
              }
            : undefined
        }
        helperMessage={getContentProviderHelperMessage()}
        helperClass={getContentProviderHelperClass()}
        isProcessing={
          contentModal.phase === "validating" || contentModal.phase === "saving"
        }
        canConnect={canConnectContent}
        onConnect={() => {
          void handleContentConnect();
        }}
        apiKeyAutoFocus={
          !selectedContentProviderType ||
          selectedContentProviderType !== "firecrawl"
        }
      />
    </>
  );
}


================================================
FILE: web/src/refresh-pages/admin/WebSearchPage/interfaces.ts
================================================
import type { WebSearchProviderType } from "@/refresh-pages/admin/WebSearchPage/searchProviderUtils";
import type { WebContentProviderType } from "@/refresh-pages/admin/WebSearchPage/contentProviderUtils";

export interface WebSearchProviderView {
  id: number;
  name: string;
  provider_type: WebSearchProviderType;
  is_active: boolean;
  config: Record<string, string> | null;
  has_api_key: boolean;
}

export interface WebContentProviderView {
  id: number;
  name: string;
  provider_type: WebContentProviderType;
  is_active: boolean;
  config: Record<string, string> | null;
  has_api_key: boolean;
}

export interface DisconnectTargetState {
  id: number;
  label: string;
  category: "search" | "content";
  providerType: string;
}


================================================
FILE: web/src/refresh-pages/admin/WebSearchPage/searchProviderUtils.ts
================================================
export type WebSearchProviderType =
  | "google_pse"
  | "serper"
  | "exa"
  | "searxng"
  | "brave";

export const SEARCH_PROVIDER_DETAILS: Record<
  WebSearchProviderType,
  {
    label: string;
    subtitle: string;
    helper: string;
    logoSrc?: string;
    apiKeyUrl?: string;
  }
> = {
  exa: {
    label: "Exa",
    subtitle: "Exa.ai",
    helper: "Connect to Exa to set up web search.",
    logoSrc: "/Exa.svg",
    apiKeyUrl: "https://dashboard.exa.ai/api-keys",
  },
  serper: {
    label: "Serper",
    subtitle: "Serper.dev",
    helper: "Connect to Serper to set up web search.",
    logoSrc: "/Serper.svg",
    apiKeyUrl: "https://serper.dev/api-key",
  },
  brave: {
    label: "Brave",
    subtitle: "Brave Search API",
    helper: "Connect to Brave Search API to set up web search.",
    logoSrc: "/Brave.svg",
    apiKeyUrl:
      "https://api-dashboard.search.brave.com/app/documentation/web-search/get-started",
  },
  google_pse: {
    label: "Google PSE",
    subtitle: "Google",
    helper: "Connect to Google PSE to set up web search.",
    logoSrc: "/Google.svg",
    apiKeyUrl: "https://programmablesearchengine.google.com/controlpanel/all",
  },
  searxng: {
    label: "SearXNG",
    subtitle: "SearXNG",
    helper: "Connect to SearXNG to set up web search.",
    logoSrc: "/SearXNG.svg",
  },
};

/**
 * Display order for built-in providers.
 * Derived from insertion order of `SEARCH_PROVIDER_DETAILS` for a single source of truth.
 */
export const SEARCH_PROVIDER_ORDER = Object.keys(
  SEARCH_PROVIDER_DETAILS
) as WebSearchProviderType[];

export function getSearchProviderDisplayLabel(
  providerType: string,
  providerName?: string | null
): string {
  if (providerName) return providerName;
  return (
    (SEARCH_PROVIDER_DETAILS as Record<string, { label: string }>)[providerType]
      ?.label ?? providerType
  );
}

export type SearchProviderConfig = Record<string, string> | null | undefined;

export type SearchProviderLike =
  | {
      has_api_key: boolean;
      config: SearchProviderConfig;
    }
  | null
  | undefined;

type SearchProviderCapabilities = {
  requiresApiKey: boolean;
  /** Keys required in `config` to consider the provider configured / connectable. */
  requiredConfigKeys: string[];
  /**
   * Some providers historically stored config under different keys.
   * When reading stored config, we consider these aliases equivalent.
   */
  storedConfigAliases?: Record<string, string[]>;
};

const SEARCH_PROVIDER_CAPABILITIES: Record<
  WebSearchProviderType,
  SearchProviderCapabilities
> = {
  exa: {
    requiresApiKey: true,
    requiredConfigKeys: [],
  },
  serper: {
    requiresApiKey: true,
    requiredConfigKeys: [],
  },
  brave: {
    requiresApiKey: true,
    requiredConfigKeys: [],
  },
  google_pse: {
    requiresApiKey: true,
    requiredConfigKeys: ["search_engine_id"],
    storedConfigAliases: {
      search_engine_id: ["search_engine_id", "cx", "search_engine"],
    },
  },
  searxng: {
    requiresApiKey: false,
    requiredConfigKeys: ["searxng_base_url"],
    storedConfigAliases: {
      searxng_base_url: ["searxng_base_url"],
    },
  },
};

const DEFAULT_SEARCH_PROVIDER_CAPABILITIES: SearchProviderCapabilities = {
  requiresApiKey: true,
  requiredConfigKeys: [],
};

function getCapabilities(providerType: string): SearchProviderCapabilities {
  return (
    (
      SEARCH_PROVIDER_CAPABILITIES as Record<string, SearchProviderCapabilities>
    )[providerType] ?? DEFAULT_SEARCH_PROVIDER_CAPABILITIES
  );
}

export function isBuiltInSearchProviderType(
  providerType: string
): providerType is WebSearchProviderType {
  return Object.prototype.hasOwnProperty.call(
    SEARCH_PROVIDER_DETAILS,
    providerType
  );
}

export function searchProviderRequiresApiKey(providerType: string): boolean {
  return getCapabilities(providerType).requiresApiKey;
}

function getStoredConfigValue(
  providerType: string,
  canonicalKey: string,
  config: SearchProviderConfig
): string {
  const caps = getCapabilities(providerType);
  const aliases = caps.storedConfigAliases?.[canonicalKey] ?? [canonicalKey];

  const safeConfig = config ?? {};
  for (const key of aliases) {
    const value = safeConfig[key];
    if (typeof value === "string" && value.length > 0) {
      return value;
    }
  }
  return "";
}

/** True when the provider has all required credentials/config to be usable. */
export function isSearchProviderConfigured(
  providerType: string,
  provider: SearchProviderLike
): boolean {
  const caps = getCapabilities(providerType);

  if (caps.requiresApiKey && !(provider?.has_api_key ?? false)) {
    return false;
  }

  for (const requiredKey of caps.requiredConfigKeys) {
    const value = getStoredConfigValue(
      providerType,
      requiredKey,
      provider?.config
    );
    if (!value) {
      return false;
    }
  }

  return true;
}

export function canConnectSearchProvider(
  providerType: string,
  apiKey: string,
  searchEngineIdOrBaseUrl: string
): boolean {
  const caps = getCapabilities(providerType);

  if (caps.requiresApiKey && apiKey.trim().length === 0) {
    return false;
  }

  // Today, all config-driven search providers only expose a single required string field.
  if (
    caps.requiredConfigKeys.length > 0 &&
    searchEngineIdOrBaseUrl.trim().length === 0
  ) {
    return false;
  }

  return true;
}

/** Build the `config` payload to send to the backend for a provider. */
export function buildSearchProviderConfig(
  providerType: string,
  searchEngineIdOrBaseUrl: string
): Record<string, string> {
  const caps = getCapabilities(providerType);
  const value = searchEngineIdOrBaseUrl.trim();

  const config: Record<string, string> = {};
  if (!value || caps.requiredConfigKeys.length === 0) {
    return config;
  }

  // Only one required key for now.
  const requiredKey = caps.requiredConfigKeys[0];
  if (!requiredKey) {
    return config;
  }
  config[requiredKey] = value;
  return config;
}

/**
 * For providers that have a single required config field, return that stored value for form prefilling.
 */
export function getSingleConfigFieldValueForForm(
  providerType: string,
  provider: SearchProviderLike
): string {
  const caps = getCapabilities(providerType);
  if (caps.requiredConfigKeys.length === 0) {
    return "";
  }

  const requiredKey = caps.requiredConfigKeys[0];
  if (!requiredKey) {
    return "";
  }
  return getStoredConfigValue(providerType, requiredKey, provider?.config);
}


================================================
FILE: web/src/refresh-pages/admin/WebSearchPage/svc.ts
================================================
import { CONTENT_PROVIDER_DETAILS } from "@/refresh-pages/admin/WebSearchPage/contentProviderUtils";
import type { WebContentProviderView } from "@/refresh-pages/admin/WebSearchPage/interfaces";

async function parseErrorDetail(
  res: Response,
  fallback: string
): Promise<string> {
  try {
    const body = await res.json();
    return body?.detail ?? fallback;
  } catch {
    return fallback;
  }
}

export async function activateSearchProvider(
  providerId: number
): Promise<void> {
  const res = await fetch(
    `/api/admin/web-search/search-providers/${providerId}/activate`,
    {
      method: "POST",
      headers: { "Content-Type": "application/json" },
    }
  );
  if (!res.ok) {
    throw new Error(
      await parseErrorDetail(res, "Failed to set provider as default.")
    );
  }
}

export async function deactivateSearchProvider(
  providerId: number
): Promise<void> {
  const res = await fetch(
    `/api/admin/web-search/search-providers/${providerId}/deactivate`,
    {
      method: "POST",
      headers: { "Content-Type": "application/json" },
    }
  );
  if (!res.ok) {
    throw new Error(
      await parseErrorDetail(res, "Failed to deactivate provider.")
    );
  }
}

export async function activateContentProvider(
  provider: WebContentProviderView
): Promise<void> {
  if (provider.provider_type === "onyx_web_crawler") {
    const res = await fetch(
      "/api/admin/web-search/content-providers/reset-default",
      {
        method: "POST",
        headers: { "Content-Type": "application/json" },
      }
    );
    if (!res.ok) {
      throw new Error(
        await parseErrorDetail(res, "Failed to set crawler as default.")
      );
    }
  } else if (provider.id > 0) {
    const res = await fetch(
      `/api/admin/web-search/content-providers/${provider.id}/activate`,
      {
        method: "POST",
        headers: { "Content-Type": "application/json" },
      }
    );
    if (!res.ok) {
      throw new Error(
        await parseErrorDetail(res, "Failed to set crawler as default.")
      );
    }
  } else {
    const payload = {
      id: null,
      name:
        provider.name ||
        CONTENT_PROVIDER_DETAILS[provider.provider_type]?.label ||
        provider.provider_type,
      provider_type: provider.provider_type,
      api_key: null,
      api_key_changed: false,
      config: provider.config ?? null,
      activate: true,
    };

    const res = await fetch("/api/admin/web-search/content-providers", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify(payload),
    });
    if (!res.ok) {
      throw new Error(
        await parseErrorDetail(res, "Failed to set crawler as default.")
      );
    }
  }
}

export async function deactivateContentProvider(
  providerId: number,
  providerType: string
): Promise<void> {
  const endpoint =
    providerType === "onyx_web_crawler" || providerId < 0
      ? "/api/admin/web-search/content-providers/reset-default"
      : `/api/admin/web-search/content-providers/${providerId}/deactivate`;

  const res = await fetch(endpoint, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
  });
  if (!res.ok) {
    throw new Error(
      await parseErrorDetail(res, "Failed to deactivate provider.")
    );
  }
}

export async function disconnectProvider(
  id: number,
  category: "search" | "content",
  replacementProviderId: string | null
): Promise<void> {
  // If a replacement was selected (not "No Default"), activate it first
  if (replacementProviderId && replacementProviderId !== "__none__") {
    const repId = Number(replacementProviderId);
    const activateEndpoint =
      category === "search"
        ? `/api/admin/web-search/search-providers/${repId}/activate`
        : `/api/admin/web-search/content-providers/${repId}/activate`;
    const activateRes = await fetch(activateEndpoint, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
    });
    if (!activateRes.ok) {
      throw new Error(
        await parseErrorDetail(
          activateRes,
          "Failed to activate replacement provider."
        )
      );
    }
  }

  const res = await fetch(`/api/admin/web-search/${category}-providers/${id}`, {
    method: "DELETE",
  });
  if (!res.ok) {
    throw new Error(
      await parseErrorDetail(res, "Failed to disconnect provider.")
    );
  }
}


================================================
FILE: web/src/sections/AppHealthBanner.tsx
================================================
"use client";

import { errorHandlingFetcher, RedirectError } from "@/lib/fetcher";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import Modal from "@/refresh-components/Modal";
import { useCallback, useEffect, useState, useRef } from "react";
import { getSecondsUntilExpiration } from "@/lib/time";
import { refreshToken } from "@/lib/user";
import { NEXT_PUBLIC_CUSTOM_REFRESH_URL } from "@/lib/constants";
import { Button } from "@opal/components";
import { logout } from "@/lib/user";
import { usePathname, useRouter } from "next/navigation";
import { SvgAlertTriangle, SvgLogOut } from "@opal/icons";
import { Content } from "@opal/layouts";
import { useCurrentUser } from "@/hooks/useCurrentUser";
import { getExtensionContext } from "@/lib/extension/utils";

export default function AppHealthBanner() {
  const router = useRouter();
  const { error } = useSWR(SWR_KEYS.health, errorHandlingFetcher);
  const [expired, setExpired] = useState(false);
  const [showLoggedOutModal, setShowLoggedOutModal] = useState(false);
  const pathname = usePathname();
  const expirationTimeoutRef = useRef<NodeJS.Timeout | null>(null);
  const refreshIntervalRef = useRef<NodeJS.Timer | null>(null);

  const { user, mutateUser, userError } = useCurrentUser();

  // Handle 403 errors from the /api/me endpoint.
  // Skip entirely on auth pages — the user isn't logged in yet, so there's
  // nothing to "log out" of and hitting /auth/logout just creates noise.
  useEffect(() => {
    if (userError && userError.status === 403 && !pathname?.includes("/auth")) {
      logout().then(() => {
        setShowLoggedOutModal(true);
      });
    }
  }, [userError, pathname]);

  // Function to handle the "Log in" button click
  function handleLogin() {
    setShowLoggedOutModal(false);
    const { isExtension } = getExtensionContext();
    if (isExtension) {
      // In the Chrome extension, open login in a new tab so OAuth popups
      // work correctly (the extension iframe has no navigable URL origin).
      window.open(
        window.location.origin + "/auth/login",
        "_blank",
        "noopener,noreferrer"
      );
    } else {
      router.push("/auth/login");
    }
  }

  // Function to set up expiration timeout
  const setupExpirationTimeout = useCallback(
    (secondsUntilExpiration: number) => {
      // Clear any existing timeout
      if (expirationTimeoutRef.current) {
        clearTimeout(expirationTimeoutRef.current);
      }

      // Set timeout to show logout modal when session expires
      const timeUntilExpire = (secondsUntilExpiration + 10) * 1000;
      expirationTimeoutRef.current = setTimeout(() => {
        setExpired(true);

        if (!pathname?.includes("/auth")) {
          setShowLoggedOutModal(true);
        }
      }, timeUntilExpire);
    },
    [pathname]
  );

  // Clean up any timeouts/intervals when component unmounts
  useEffect(() => {
    return () => {
      if (expirationTimeoutRef.current) {
        clearTimeout(expirationTimeoutRef.current);
      }

      if (refreshIntervalRef.current) {
        clearInterval(refreshIntervalRef.current);
      }
    };
  }, []);

  // Set up token refresh logic if custom refresh URL exists
  useEffect(() => {
    if (!user) return;

    const secondsUntilExpiration = getSecondsUntilExpiration(user);
    if (secondsUntilExpiration === null) return;

    // Set up expiration timeout based on current user data
    setupExpirationTimeout(secondsUntilExpiration);

    if (NEXT_PUBLIC_CUSTOM_REFRESH_URL) {
      const refreshUrl = NEXT_PUBLIC_CUSTOM_REFRESH_URL;

      const attemptTokenRefresh = async () => {
        let retryCount = 0;
        const maxRetries = 3;

        while (retryCount < maxRetries) {
          try {
            const refreshTokenData = await refreshToken(refreshUrl);
            if (!refreshTokenData) {
              throw new Error("Failed to refresh token");
            }

            const response = await fetch(
              "/api/enterprise-settings/refresh-token",
              {
                method: "POST",
                headers: {
                  "Content-Type": "application/json",
                },
                body: JSON.stringify(refreshTokenData),
              }
            );
            if (!response.ok) {
              throw new Error(`HTTP error! status: ${response.status}`);
            }

            // Wait for backend to process the token
            await new Promise((resolve) => setTimeout(resolve, 4000));

            // Get updated user data
            const updatedUser = await mutateUser();

            if (updatedUser) {
              // Reset expiration timeout with new expiration time
              const newSecondsUntilExpiration =
                getSecondsUntilExpiration(updatedUser);
              if (newSecondsUntilExpiration !== null) {
                setupExpirationTimeout(newSecondsUntilExpiration);
                console.debug(
                  `Token refreshed, new expiration in ${newSecondsUntilExpiration} seconds`
                );
              }
            }

            break; // Success - exit the retry loop
          } catch (error) {
            console.error(
              `Error refreshing token (attempt ${
                retryCount + 1
              }/${maxRetries}):`,
              error
            );
            retryCount++;

            if (retryCount === maxRetries) {
              console.error("Max retry attempts reached");
            } else {
              // Wait before retrying (exponential backoff)
              await new Promise((resolve) =>
                setTimeout(resolve, Math.pow(2, retryCount) * 1000)
              );
            }
          }
        }
      };

      // Set up refresh interval
      const refreshInterval = 60 * 15; // 15 mins

      // Clear any existing interval
      if (refreshIntervalRef.current) {
        clearInterval(refreshIntervalRef.current);
      }

      refreshIntervalRef.current = setInterval(
        attemptTokenRefresh,
        refreshInterval * 1000
      );

      // If we're going to expire before the next refresh, kick off a refresh now
      if (secondsUntilExpiration < refreshInterval) {
        attemptTokenRefresh();
      }
    }
  }, [user, setupExpirationTimeout, mutateUser]);

  // Logged out modal
  if (showLoggedOutModal) {
    return (
      <Modal open>
        <Modal.Content width="sm" height="sm">
          <Modal.Header icon={SvgLogOut} title="You Have Been Logged Out" />
          <Modal.Body>
            <p className="text-sm">
              Your session has expired. Please log in again to continue.
            </p>
          </Modal.Body>
          <Modal.Footer>
            <Button onClick={handleLogin}>Log In</Button>
          </Modal.Footer>
        </Modal.Content>
      </Modal>
    );
  }

  if (!error && !expired) {
    return null;
  }

  if (error instanceof RedirectError || expired) {
    if (!pathname?.includes("/auth")) {
      setShowLoggedOutModal(true);
    }
    return null;
  } else {
    return (
      <div className="fixed top-0 left-0 z-[101] w-full bg-status-error-01 p-3">
        <Content
          icon={SvgAlertTriangle}
          title="The backend is currently unavailable"
          description="If this is your initial setup or you just updated your Onyx deployment, this is likely because the backend is still starting up. Give it a minute or two, and then refresh the page. If that does not work, make sure the backend is setup and/or contact an administrator."
          sizePreset="main-content"
          variant="section"
        />
      </div>
    );
  }
}


================================================
FILE: web/src/sections/Suggestions.tsx
================================================
"use client";

import { OnSubmitProps } from "@/hooks/useChatController";
import { useCurrentAgent } from "@/hooks/useAgents";
import { Interactive } from "@opal/core";
import { Content } from "@opal/layouts";

export interface SuggestionsProps {
  onSubmit: (props: OnSubmitProps) => void;
}

export default function Suggestions({ onSubmit }: SuggestionsProps) {
  const currentAgent = useCurrentAgent();

  if (
    !currentAgent ||
    !currentAgent.starter_messages ||
    currentAgent.starter_messages.length === 0
  )
    return null;

  const handleSuggestionClick = (suggestion: string) => {
    onSubmit({
      message: suggestion,
      currentMessageFiles: [],
      deepResearch: false,
    });
  };

  return (
    <div className="max-w-[var(--app-page-main-content-width)] flex flex-col w-full p-1">
      {currentAgent.starter_messages.map(({ message }, index) => (
        <Interactive.Stateless
          key={index}
          variant="default"
          prominence="tertiary"
          onClick={() => handleSuggestionClick(message)}
        >
          <Interactive.Container
            widthVariant="full"
            roundingVariant="sm"
            heightVariant="lg"
          >
            <Content
              title={message}
              sizePreset="main-ui"
              variant="body"
              widthVariant="full"
              prominence="muted"
            />
          </Interactive.Container>
        </Interactive.Stateless>
      ))}
    </div>
  );
}


================================================
FILE: web/src/sections/actions/ActionCard.tsx
================================================
"use client";

import React, { useState, useEffect, useRef } from "react";
import ActionCardHeader from "@/sections/actions/ActionCardHeader";
import ToolsSection from "@/sections/actions/ToolsSection";
import { cn } from "@/lib/utils";
import { ActionStatus } from "@/lib/tools/interfaces";
import type { IconProps } from "@opal/types";
import { SvgServer } from "@opal/icons";
import {
  ActionCardProvider,
  ActionCardContextValue,
} from "@/sections/actions/ActionCardContext";

export interface ActionCardProps {
  // Core content
  title: string;
  description: string;
  icon?: React.FunctionComponent<IconProps>;

  // Status
  status: ActionStatus;

  // Header actions (right side of header)
  actions: React.ReactNode;

  // Edit handler for header
  onEdit?: () => void;

  // Rename handler for header
  onRename?: (newName: string) => Promise<void>;

  // Expansion control (can be controlled or uncontrolled)
  initialExpanded?: boolean;
  isExpanded?: boolean;
  onExpandedChange?: (expanded: boolean) => void;

  // Search functionality
  enableSearch?: boolean;
  searchQuery?: string;
  onSearchQueryChange?: (query: string) => void;

  // Tools section actions
  onFold?: () => void;

  // Content
  children?: React.ReactNode;

  // Accessibility
  ariaLabel?: string;

  // Optional styling
  className?: string;
}

// Main Component
export default function ActionCard({
  title,
  description,
  icon,
  status,
  actions,
  onEdit,
  onRename,
  initialExpanded = false,
  isExpanded: controlledIsExpanded,
  onExpandedChange,
  enableSearch = false,
  searchQuery = "",
  onSearchQueryChange,
  onFold,
  children,
  ariaLabel,
  className,
}: ActionCardProps) {
  // Internal state for uncontrolled mode
  const [internalExpanded, setInternalExpanded] = useState(initialExpanded);

  const hasInitializedExpansion = useRef(false);
  const [isHovered, setIsHovered] = useState(false);

  // Determine if we're in controlled mode
  const isControlled = controlledIsExpanded !== undefined;
  const isExpandedActual = isControlled
    ? controlledIsExpanded
    : internalExpanded;

  // Apply initial expansion only once per component lifetime (uncontrolled mode)
  useEffect(() => {
    if (!isControlled && initialExpanded && !hasInitializedExpansion.current) {
      setInternalExpanded(true);
      hasInitializedExpansion.current = true;
    }
  }, [initialExpanded, isControlled]);

  const isConnected = status === ActionStatus.CONNECTED;
  const isDisconnected = status === ActionStatus.DISCONNECTED;

  const backgroundColor = isConnected
    ? "bg-background-tint-00"
    : isDisconnected
      ? "bg-background-neutral-02"
      : "";

  const contextValue: ActionCardContextValue = { isHovered };

  return (
    <ActionCardProvider value={contextValue}>
      <div
        className={cn(
          "w-full",
          backgroundColor,
          "border border-border-01 rounded-16",
          "transition-shadow duration-200",
          isHovered && "shadow-00",
          className
        )}
        role="article"
        aria-label={ariaLabel || `${title} action card`}
        onMouseEnter={() => setIsHovered(true)}
        onMouseLeave={() => setIsHovered(false)}
      >
        <div className="flex flex-col w-full">
          {/* Header Section */}
          <div className="flex items-start justify-between gap-2 p-3 w-full">
            <ActionCardHeader
              title={title}
              description={description}
              icon={icon || SvgServer}
              status={status}
              onEdit={onEdit}
              onRename={onRename}
            />

            {/* Action Buttons */}
            <div className="shrink-0 flex items-start">{actions}</div>
          </div>

          {/* Tools Section (Only when expanded and search is enabled) */}
          {isExpandedActual && enableSearch && (
            <ToolsSection
              onFold={onFold}
              searchQuery={searchQuery}
              onSearchQueryChange={onSearchQueryChange || (() => {})}
            />
          )}
        </div>

        {/* Content Area - Only render when expanded */}
        {isExpandedActual && children && (
          <div className="animate-in fade-in slide-in-from-top-2 duration-300 p-2 border-t border-border-01">
            {children}
          </div>
        )}
      </div>
    </ActionCardProvider>
  );
}


================================================
FILE: web/src/sections/actions/ActionCardContext.tsx
================================================
"use client";

import { createContext, ReactNode, useContext } from "react";

export interface ActionCardContextValue {
  isHovered: boolean;
}

const defaultValue: ActionCardContextValue = {
  isHovered: false,
};

const ActionCardContext = createContext<ActionCardContextValue>(defaultValue);

interface ActionCardProviderProps {
  value: ActionCardContextValue;
  children: ReactNode;
}

export function ActionCardProvider({
  value,
  children,
}: ActionCardProviderProps) {
  return (
    <ActionCardContext.Provider value={value}>
      {children}
    </ActionCardContext.Provider>
  );
}

export function useActionCardContext() {
  return useContext(ActionCardContext);
}


================================================
FILE: web/src/sections/actions/ActionCardHeader.tsx
================================================
"use client";

import React, { useState } from "react";
import { cn } from "@/lib/utils";
import { ActionStatus } from "@/lib/tools/interfaces";
import Text from "@/refresh-components/texts/Text";
import IconButton from "@/refresh-components/buttons/IconButton";
import ButtonRenaming from "@/refresh-components/buttons/ButtonRenaming";
import type { IconProps } from "@opal/types";
import Truncated from "@/refresh-components/texts/Truncated";
import { SvgEdit } from "@opal/icons";
import { useActionCardContext } from "@/sections/actions/ActionCardContext";

interface ActionCardHeaderProps {
  title: string;
  description: string;
  icon: React.FunctionComponent<IconProps>;
  status: ActionStatus;
  onEdit?: () => void;
  onRename?: (newName: string) => Promise<void>;
}

function ActionCardHeader({
  title,
  description,
  icon: Icon,
  status,
  onEdit,
  onRename,
}: ActionCardHeaderProps) {
  const [isRenaming, setIsRenaming] = useState(false);
  const { isHovered } = useActionCardContext();

  const isConnected = status === ActionStatus.CONNECTED;
  const isPending = status === ActionStatus.PENDING;
  const isDisconnected = status === ActionStatus.DISCONNECTED;
  const isFetching = status === ActionStatus.FETCHING;

  const showRenameIcon = onRename && isHovered && !isRenaming;

  const handleRename = async (newName: string) => {
    if (onRename) {
      await onRename(newName);
    }
    setIsRenaming(false);
  };

  const handleRenameClick = () => {
    if (onRename) {
      setIsRenaming(true);
    }
  };

  return (
    <div className="flex gap-2 items-start flex-1 min-w-0 mr-2">
      <div
        className={cn(
          "flex items-center px-0 py-0.5 shrink-0",
          isConnected && "h-7 w-7 justify-center p-1"
        )}
      >
        <Icon size={20} className="h-5 w-5 stroke-text-04" />
      </div>

      <div className="flex flex-col items-start flex-1 min-w-0 overflow-hidden">
        <div className="flex items-center gap-1 min-w-0 w-full">
          {isRenaming ? (
            <ButtonRenaming
              initialName={title}
              onRename={handleRename}
              onClose={() => setIsRenaming(false)}
              className={cn(
                "font-main-content-emphasis",
                isConnected || isFetching
                  ? "text-text-04"
                  : isDisconnected
                    ? "text-text-03"
                    : "text-text-04"
              )}
            />
          ) : (
            <div className="min-w-0 shrink overflow-hidden">
              <Truncated
                mainContentEmphasis
                className={cn(
                  "truncate",
                  isConnected || isFetching
                    ? "text-text-04"
                    : isDisconnected
                      ? "text-text-03 line-through"
                      : "text-text-04"
                )}
              >
                {title}
              </Truncated>
            </div>
          )}
          {isPending && !isRenaming && (
            <Text
              as="p"
              mainUiMuted
              text03
              className="shrink-0 whitespace-nowrap"
            >
              (Not Authenticated)
            </Text>
          )}
          {isDisconnected && !isRenaming && (
            <Text
              as="p"
              mainUiMuted
              text02
              className="shrink-0 whitespace-nowrap"
            >
              (Disconnected)
            </Text>
          )}
          {showRenameIcon && (
            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
            <IconButton
              icon={SvgEdit}
              tooltip="Rename"
              internal
              tertiary
              onClick={handleRenameClick}
              className="h-6 w-6 opacity-70 hover:opacity-100"
              aria-label={`Rename ${title}`}
            />
          )}
        </div>

        {isConnected ? (
          <Text as="p" secondaryBody text03 className="w-full">
            {description}
          </Text>
        ) : (
          <Text as="p" secondaryBody text02 className="w-full">
            {description}
          </Text>
        )}
      </div>
    </div>
  );
}

export default ActionCardHeader;


================================================
FILE: web/src/sections/actions/Actions.tsx
================================================
"use client";
import { ActionStatus } from "@/lib/tools/interfaces";
import React from "react";
import { Button } from "@opal/components";
import {
  SvgArrowExchange,
  SvgChevronDown,
  SvgPlug,
  SvgSettings,
  SvgTrash,
  SvgUnplug,
} from "@opal/icons";
import { useActionCardContext } from "@/sections/actions/ActionCardContext";
import { cn } from "@/lib/utils";

interface ActionsProps {
  status: ActionStatus;
  serverName: string;
  onDisconnect?: () => void;
  onManage?: () => void;
  onAuthenticate?: () => void;
  onReconnect?: () => void;
  onDelete?: () => void;
  toolCount?: number;
  isToolsExpanded?: boolean;
  onToggleTools?: () => void;
}

const Actions = React.memo(
  ({
    status,
    serverName,
    onDisconnect,
    onManage,
    onAuthenticate,
    onReconnect,
    onDelete,
    toolCount,
    isToolsExpanded,
    onToggleTools,
  }: ActionsProps) => {
    const { isHovered: isParentHovered } = useActionCardContext();
    const showViewToolsButton =
      (status === ActionStatus.CONNECTED ||
        status === ActionStatus.FETCHING ||
        status === ActionStatus.DISCONNECTED) &&
      !isToolsExpanded &&
      onToggleTools;

    // Connected state
    if (status === ActionStatus.CONNECTED || status === ActionStatus.FETCHING) {
      return (
        <div className="flex flex-col gap-1 items-end">
          <div className="flex items-center">
            {onDisconnect && (
              <div
                className={cn(
                  "inline-flex transition-all duration-200 ease-out",
                  isParentHovered
                    ? "opacity-100 translate-x-0 pointer-events-auto"
                    : "opacity-0 translate-x-2 pointer-events-none"
                )}
              >
                <Button
                  icon={SvgUnplug}
                  tooltip="Disconnect Server"
                  prominence="tertiary"
                  onClick={onDisconnect}
                  aria-label={`Disconnect ${serverName} server`}
                />
              </div>
            )}
            {onManage && (
              <Button
                icon={SvgSettings}
                tooltip="Manage Server"
                prominence="tertiary"
                onClick={onManage}
                aria-label={`Manage ${serverName} server`}
              />
            )}
          </div>
          {showViewToolsButton && (
            <Button
              prominence="tertiary"
              onClick={onToggleTools}
              rightIcon={SvgChevronDown}
              aria-label={`View tools for ${serverName}`}
            >
              {status === ActionStatus.FETCHING
                ? "Fetching tools..."
                : `View ${toolCount ?? 0} tool${toolCount !== 1 ? "s" : ""}`}
            </Button>
          )}
        </div>
      );
    }

    // Pending state
    if (status === ActionStatus.PENDING) {
      return (
        <div className="flex flex-col gap-1 items-end shrink-0">
          {onAuthenticate && (
            <Button
              prominence="tertiary"
              onClick={onAuthenticate}
              rightIcon={SvgArrowExchange}
              aria-label={`Authenticate and connect to ${serverName}`}
            >
              Authenticate
            </Button>
          )}
          <div
            className={cn(
              "flex gap-1 items-center transition-opacity duration-200 ease-out",
              isParentHovered
                ? "opacity-100 pointer-events-auto"
                : "opacity-0 pointer-events-none"
            )}
          >
            {onDelete && (
              <Button
                icon={SvgTrash}
                tooltip="Delete Server"
                prominence="tertiary"
                onClick={onDelete}
                aria-label={`Delete ${serverName} server`}
              />
            )}
            {onManage && (
              <Button
                icon={SvgSettings}
                tooltip="Manage Server"
                prominence="tertiary"
                onClick={onManage}
                aria-label={`Manage ${serverName} server`}
              />
            )}
          </div>
        </div>
      );
    }

    // Disconnected state
    return (
      <div className="flex flex-col gap-1 items-end shrink-0">
        <div className="flex gap-1 items-end">
          {onReconnect && (
            <Button
              prominence="secondary"
              onClick={onReconnect}
              rightIcon={SvgPlug}
              aria-label={`Reconnect to ${serverName}`}
            >
              Reconnect
            </Button>
          )}
          {onManage && (
            <Button
              icon={SvgSettings}
              tooltip="Manage Server"
              prominence="tertiary"
              onClick={onManage}
              aria-label={`Manage ${serverName} server`}
            />
          )}
        </div>
        {showViewToolsButton && (
          <Button
            disabled
            prominence="tertiary"
            onClick={onToggleTools}
            rightIcon={SvgChevronDown}
            aria-label={`View tools for ${serverName}`}
          >
            {`View ${toolCount ?? 0} tool${toolCount !== 1 ? "s" : ""}`}
          </Button>
        )}
      </div>
    );
  }
);
Actions.displayName = "Actions";

export default Actions;


================================================
FILE: web/src/sections/actions/MCPActionCard.tsx
================================================
"use client";

import React, {
  useState,
  useMemo,
  useEffect,
  useRef,
  useCallback,
} from "react";
import ActionCard from "@/sections/actions/ActionCard";
import Actions from "@/sections/actions/Actions";
import ToolItem from "@/sections/actions/ToolItem";
import ToolsList from "@/sections/actions/ToolsList";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import {
  ActionStatus,
  ToolSnapshot,
  MCPServerStatus,
  MCPServer,
} from "@/lib/tools/interfaces";
import useServerTools from "@/hooks/useServerTools";
import { KeyedMutator } from "swr";
import type { IconProps } from "@opal/types";
import { SvgRefreshCw, SvgServer, SvgTrash } from "@opal/icons";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { timeAgo } from "@/lib/time";
import { cn } from "@/lib/utils";
import Modal from "@/refresh-components/layouts/ConfirmationModalLayout";

export interface MCPActionCardProps {
  // Server identification
  serverId: number;
  server: MCPServer;

  // Core content
  title: string;
  description: string;
  logo?: React.FunctionComponent<IconProps>;

  // Status
  status: ActionStatus;

  // Initial expanded state
  initialExpanded?: boolean;

  // Tool count (only for connected state)
  toolCount?: number;

  // Actions
  onDisconnect?: () => void;
  onManage?: () => void;
  onEdit?: () => void;
  onDelete?: () => Promise<void> | void;
  onAuthenticate?: () => void; // For pending state
  onReconnect?: () => void; // For disconnected state
  onRename?: (serverId: number, newName: string) => Promise<void>; // For renaming

  // Tool-related actions (now includes SWR mutate function for optimistic updates)
  onToolToggle?: (
    serverId: number,
    toolId: string,
    enabled: boolean,
    mutate: KeyedMutator<ToolSnapshot[]>
  ) => void;
  onRefreshTools?: (
    serverId: number,
    mutate: KeyedMutator<ToolSnapshot[]>
  ) => void;
  onUpdateToolsStatus?: (
    serverId: number,
    toolIds: number[],
    enabled: boolean,
    mutate: KeyedMutator<ToolSnapshot[]>
  ) => void;

  // Optional styling
  className?: string;
}

// Main Component
export default function MCPActionCard({
  serverId,
  server,
  title,
  description,
  logo,
  status,
  initialExpanded = false,
  toolCount,
  onDisconnect,
  onManage,
  onEdit,
  onDelete,
  onAuthenticate,
  onReconnect,
  onRename,
  onToolToggle,
  onRefreshTools,
  onUpdateToolsStatus,
  className,
}: MCPActionCardProps) {
  const [isToolsExpanded, setIsToolsExpanded] = useState(initialExpanded);
  const [searchQuery, setSearchQuery] = useState("");
  const [showOnlyEnabled, setShowOnlyEnabled] = useState(false);
  const [isToolsRefreshing, setIsToolsRefreshing] = useState(false);
  const deleteModal = useCreateModal();

  // Update expanded state when initialExpanded changes
  const hasInitializedExpansion = useRef(false);
  const previousStatus = useRef<MCPServerStatus>(server.status);
  const hasRetriedTools = useRef(false);

  // Apply initial expansion only once per component lifetime
  useEffect(() => {
    if (initialExpanded && !hasInitializedExpansion.current) {
      setIsToolsExpanded(true);
      hasInitializedExpansion.current = true;
    }
  }, [initialExpanded]);

  // Collapse tools when server becomes disconnected or awaiting auth
  useEffect(() => {
    if (
      server.status === MCPServerStatus.DISCONNECTED ||
      server.status === MCPServerStatus.AWAITING_AUTH
    ) {
      setIsToolsExpanded(false);
    }
  }, [server.status]);

  // Lazy load tools only when expanded
  const { tools, isLoading, mutate } = useServerTools(server, isToolsExpanded);

  // Retry tools fetch when server transitions from FETCHING_TOOLS to CONNECTED
  useEffect(() => {
    const statusChanged =
      previousStatus.current === MCPServerStatus.FETCHING_TOOLS &&
      server.status === MCPServerStatus.CONNECTED;

    if (statusChanged && tools.length === 0 && !hasRetriedTools.current) {
      console.log(
        "Server status changed to CONNECTED with empty tools, retrying fetch"
      );
      hasRetriedTools.current = true;
      mutate();
    }

    // Update previous status
    previousStatus.current = server.status;
  }, [server.status, tools.length, mutate]);

  const isNotAuthenticated = status === ActionStatus.PENDING;

  // Filter tools based on search query and enabled status
  const filteredTools = useMemo(() => {
    if (!tools) return [];

    let filtered = tools;

    // Filter by enabled status if showOnlyEnabled is true
    if (showOnlyEnabled) {
      filtered = filtered.filter((tool) => tool.isEnabled);
    }

    // Filter by search query
    if (searchQuery.trim()) {
      const query = searchQuery.toLowerCase();
      filtered = filtered.filter(
        (tool) =>
          tool.name.toLowerCase().includes(query) ||
          tool.description.toLowerCase().includes(query)
      );
    }

    return filtered;
  }, [tools, searchQuery, showOnlyEnabled]);

  const icon = isNotAuthenticated ? SvgServer : logo;

  const handleToggleTools = useCallback(() => {
    setIsToolsExpanded((prev) => !prev);
    if (isToolsExpanded) {
      setSearchQuery("");
    }
  }, [isToolsExpanded]);

  const handleFold = () => {
    setIsToolsExpanded(false);
    setSearchQuery("");
    setShowOnlyEnabled(false);
  };

  const handleToggleShowOnlyEnabled = () => {
    setShowOnlyEnabled((prev) => !prev);
  };

  // Build the actions component
  const actionsComponent = useMemo(
    () => (
      <Actions
        status={status}
        serverName={title}
        onDisconnect={onDisconnect}
        onManage={onManage}
        onAuthenticate={onAuthenticate}
        onReconnect={onReconnect}
        onDelete={onDelete ? () => deleteModal.toggle(true) : undefined}
        toolCount={toolCount}
        isToolsExpanded={isToolsExpanded}
        onToggleTools={handleToggleTools}
      />
    ),
    [
      deleteModal,
      handleToggleTools,
      isToolsExpanded,
      onAuthenticate,
      onDelete,
      onDisconnect,
      onManage,
      onReconnect,
      status,
      title,
      toolCount,
    ]
  );

  const handleRename = async (newName: string) => {
    if (onRename) {
      await onRename(serverId, newName);
    }
  };

  const handleRefreshTools = () => {
    setIsToolsRefreshing(true);
    onRefreshTools?.(serverId, mutate);
    setTimeout(() => {
      setIsToolsRefreshing(false);
    }, 1000);
  };

  // Left action for ToolsList footer
  const leftAction = useMemo(() => {
    const lastRefreshedText = timeAgo(server.last_refreshed_at);

    return (
      <div className="flex items-center gap-2">
        <Button
          icon={isToolsRefreshing ? SimpleLoader : SvgRefreshCw}
          prominence="internal"
          onClick={handleRefreshTools}
          tooltip="Refresh tools"
          aria-label="Refresh tools"
        />
        {lastRefreshedText && (
          <Text as="p" text03 mainUiBody className="whitespace-nowrap">
            Tools last refreshed {lastRefreshedText}
          </Text>
        )}
      </div>
    );
  }, [
    server.last_refreshed_at,
    serverId,
    mutate,
    onRefreshTools,
    isToolsRefreshing,
  ]);

  return (
    <>
      <ActionCard
        title={title}
        description={description}
        icon={icon}
        status={status}
        actions={actionsComponent}
        onEdit={onEdit}
        onRename={handleRename}
        isExpanded={isToolsExpanded}
        onExpandedChange={setIsToolsExpanded}
        enableSearch={true}
        searchQuery={searchQuery}
        onSearchQueryChange={setSearchQuery}
        onFold={handleFold}
        className={className}
        ariaLabel={`${title} MCP server card`}
      >
        <ToolsList
          isFetching={
            server.status === MCPServerStatus.FETCHING_TOOLS || isLoading
          }
          totalCount={tools.length}
          enabledCount={tools.filter((tool) => tool.isEnabled).length}
          showOnlyEnabled={showOnlyEnabled}
          onToggleShowOnlyEnabled={handleToggleShowOnlyEnabled}
          onUpdateToolsStatus={(enabled) => {
            const toolIds = tools.map((tool) => parseInt(tool.id));
            onUpdateToolsStatus?.(serverId, toolIds, enabled, mutate);
          }}
          isEmpty={filteredTools.length === 0}
          searchQuery={searchQuery}
          emptyMessage="No tools available"
          emptySearchMessage="No tools found"
          leftAction={leftAction}
        >
          {filteredTools.map((tool) => (
            <ToolItem
              key={tool.id}
              name={tool.name}
              description={tool.description}
              icon={tool.icon}
              isAvailable={tool.isAvailable}
              isEnabled={tool.isEnabled}
              onToggle={(enabled) =>
                onToolToggle?.(serverId, tool.id, enabled, mutate)
              }
              variant="mcp"
            />
          ))}
        </ToolsList>
      </ActionCard>

      {deleteModal.isOpen && (
        <Modal
          icon={({ className }) => (
            <SvgTrash className={cn(className, "stroke-action-danger-05")} />
          )}
          title="Delete MCP server"
          onClose={() => deleteModal.toggle(false)}
          submit={
            <Button
              variant="danger"
              onClick={async () => {
                if (!onDelete) return;
                try {
                  await onDelete();
                  deleteModal.toggle(false);
                } catch (error) {
                  // Keep modal open if deletion fails; caller should surface error feedback.
                  console.error("Failed to delete MCP server", error);
                }
              }}
            >
              Delete
            </Button>
          }
        >
          <div className="flex flex-col gap-4">
            <Text as="p" text03>
              All tools connected to <b>{title}</b> will be removed. Deletion is
              irreversible.
            </Text>
            <Text as="p" text03>
              Are you sure you want to delete this MCP server?
            </Text>
          </div>
        </Modal>
      )}
    </>
  );
}


================================================
FILE: web/src/sections/actions/MCPPageContent.tsx
================================================
"use client";

import { useState, useCallback, useMemo, useEffect } from "react";
import { KeyedMutator } from "swr";
import MCPActionCard from "@/sections/actions/MCPActionCard";
import AdminListHeader from "@/sections/admin/AdminListHeader";
import ActionCardSkeleton from "@/sections/actions/skeleton/ActionCardSkeleton";
import { getActionIcon } from "@/lib/tools/mcpUtils";
import {
  ActionStatus,
  MCPServerStatus,
  MCPServer,
  ToolSnapshot,
} from "@/lib/tools/interfaces";
import { toast } from "@/hooks/useToast";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import MCPAuthenticationModal from "@/sections/actions/modals/MCPAuthenticationModal";
import AddMCPServerModal from "@/sections/actions/modals/AddMCPServerModal";
import DisconnectEntityModal from "./modals/DisconnectEntityModal";
import {
  deleteMCPServer,
  refreshMCPServerTools,
  updateToolStatus,
  updateMCPServerStatus,
  updateMCPServer,
  updateToolsStatus,
} from "@/lib/tools/mcpService";
import { useSearchParams } from "next/navigation";
import { useRouter } from "next/navigation";
import useMcpServers from "@/hooks/useMcpServers";

export default function MCPPageContent() {
  // Data fetching
  const {
    mcpData,
    isLoading: isMcpLoading,
    mutateMcpServers,
  } = useMcpServers();

  // Modal management
  const authModal = useCreateModal();
  const disconnectModal = useCreateModal();
  const manageServerModal = useCreateModal();

  // Local state
  const [activeServer, setActiveServer] = useState<MCPServer | null>(null);
  const [serverToExpand, setServerToExpand] = useState<number | null>(null);
  const [isDisconnecting, setIsDisconnecting] = useState(false);
  const [showSharedOverlay, setShowSharedOverlay] = useState(false);
  const [fetchingToolsServerIds, setFetchingToolsServerIds] = useState<
    number[]
  >([]);
  const [searchQuery, setSearchQuery] = useState("");

  const mcpServers = useMemo(
    () => (mcpData?.mcp_servers || []) as MCPServer[],
    [mcpData?.mcp_servers]
  );
  const isLoading = isMcpLoading;

  const searchParams = useSearchParams();
  const router = useRouter();

  useEffect(() => {
    const serverId = searchParams.get("server_id");
    const triggerFetch = searchParams.get("trigger_fetch");

    // Only process if we have a server_id and trigger_fetch flag
    if (
      serverId &&
      triggerFetch === "true" &&
      !fetchingToolsServerIds.includes(parseInt(serverId))
    ) {
      const serverIdInt = parseInt(serverId);

      const handleFetchingTools = async () => {
        try {
          await updateMCPServerStatus(
            serverIdInt,
            MCPServerStatus.FETCHING_TOOLS
          );

          await mutateMcpServers();

          router.replace("/admin/actions/mcp");

          // Automatically expand the tools for this server
          setServerToExpand(serverIdInt);

          await refreshMCPServerTools(serverIdInt);

          toast.success("Successfully connected and fetched tools");

          await mutateMcpServers();
        } catch (error) {
          console.error("Failed to fetch tools:", error);
          toast.error(
            `Failed to fetch tools: ${
              error instanceof Error ? error.message : "Unknown error"
            }`
          );
          await mutateMcpServers();
        }
      };

      handleFetchingTools();
    }
  }, [
    searchParams,
    router,
    fetchingToolsServerIds,
    mutateMcpServers,
    setServerToExpand,
  ]);

  // Track fetching tools server IDs
  useEffect(() => {
    if (mcpServers) {
      const fetchingIds = mcpServers
        .filter((server) => server.status === MCPServerStatus.FETCHING_TOOLS)
        .map((server) => server.id);
      setFetchingToolsServerIds(fetchingIds);
    }
  }, [mcpServers]);

  // Track if any modal is open to manage the shared overlay
  useEffect(() => {
    const anyModalOpen =
      authModal.isOpen || disconnectModal.isOpen || manageServerModal.isOpen;
    setShowSharedOverlay(anyModalOpen);
  }, [authModal.isOpen, disconnectModal.isOpen, manageServerModal.isOpen]);

  // Determine action status based on server status field
  const getActionStatusForServer = useCallback(
    (server: MCPServer): ActionStatus => {
      if (server.status === MCPServerStatus.CONNECTED) {
        return ActionStatus.CONNECTED;
      } else if (
        server.status === MCPServerStatus.AWAITING_AUTH ||
        server.status === MCPServerStatus.CREATED
      ) {
        return ActionStatus.PENDING;
      } else if (server.status === MCPServerStatus.FETCHING_TOOLS) {
        return ActionStatus.FETCHING;
      }
      return ActionStatus.DISCONNECTED;
    },
    []
  );

  // Handler callbacks
  const handleDisconnect = useCallback(
    (serverId: number) => {
      const server = mcpServers.find((s) => s.id === serverId);
      if (server) {
        setActiveServer(server);
        disconnectModal.toggle(true);
      }
    },
    [mcpServers, disconnectModal]
  );

  const handleConfirmDisconnect = useCallback(async () => {
    if (!activeServer) return;

    setIsDisconnecting(true);
    try {
      await updateMCPServerStatus(
        activeServer.id,
        MCPServerStatus.DISCONNECTED
      );

      toast.success("MCP Server disconnected successfully");

      await mutateMcpServers();
      disconnectModal.toggle(false);
      setActiveServer(null);
    } catch (error) {
      console.error("Error disconnecting server:", error);
      toast.error(
        error instanceof Error
          ? error.message
          : "Failed to disconnect MCP Server"
      );
    } finally {
      setIsDisconnecting(false);
    }
  }, [activeServer, mutateMcpServers, disconnectModal]);

  const handleConfirmDisconnectAndDelete = useCallback(async () => {
    if (!activeServer) return;

    setIsDisconnecting(true);
    try {
      await deleteMCPServer(activeServer.id);

      toast.success("MCP Server deleted successfully");

      await mutateMcpServers();
      disconnectModal.toggle(false);
      setActiveServer(null);
    } catch (error) {
      console.error("Error deleting server:", error);
      toast.error(
        error instanceof Error ? error.message : "Failed to delete MCP Server"
      );
    } finally {
      setIsDisconnecting(false);
    }
  }, [activeServer, mutateMcpServers, disconnectModal]);

  const openManageServerModal = useCallback(
    (serverId: number) => {
      const server = mcpServers.find((s) => s.id === serverId);
      if (server) {
        setActiveServer(server);
        manageServerModal.toggle(true);
      }
    },
    [mcpServers, manageServerModal]
  );

  const handleManage = useCallback(
    (serverId: number) => {
      openManageServerModal(serverId);
    },
    [openManageServerModal]
  );

  const handleEdit = useCallback(
    (serverId: number) => {
      openManageServerModal(serverId);
    },
    [openManageServerModal]
  );

  const handleDelete = useCallback(
    async (serverId: number) => {
      try {
        await deleteMCPServer(serverId);

        toast.success("MCP Server deleted successfully");

        await mutateMcpServers();
      } catch (error) {
        console.error("Error deleting server:", error);
        toast.error(
          error instanceof Error ? error.message : "Failed to delete MCP Server"
        );
      }
    },
    [mutateMcpServers]
  );

  const handleAuthenticate = useCallback(
    (serverId: number) => {
      const server = mcpServers.find((s) => s.id === serverId);
      if (server) {
        setActiveServer(server);
        authModal.toggle(true);
      }
    },
    [mcpServers, authModal]
  );

  const triggerFetchToolsInPlace = useCallback(
    async (serverId: number) => {
      if (fetchingToolsServerIds.includes(serverId)) {
        return;
      }

      try {
        // Expand tools list immediately so the user sees the skeleton
        setServerToExpand(serverId);

        await updateMCPServerStatus(serverId, MCPServerStatus.FETCHING_TOOLS);
        await mutateMcpServers();

        await refreshMCPServerTools(serverId);

        toast.success("Successfully connected and fetched tools");

        await mutateMcpServers();
      } catch (error) {
        console.error("Failed to fetch tools:", error);
        toast.error(
          `Failed to fetch tools: ${
            error instanceof Error ? error.message : "Unknown error"
          }`
        );
        await mutateMcpServers();
      }
    },
    [fetchingToolsServerIds, mutateMcpServers, setServerToExpand]
  );

  const handleReconnect = useCallback(
    async (serverId: number) => {
      try {
        await updateMCPServerStatus(serverId, MCPServerStatus.CONNECTED);

        toast.success("MCP Server reconnected successfully");

        await mutateMcpServers();
      } catch (error) {
        console.error("Error reconnecting server:", error);
        toast.error(
          error instanceof Error
            ? error.message
            : "Failed to reconnect MCP Server"
        );
      }
    },
    [mutateMcpServers]
  );

  const handleToolToggle = useCallback(
    async (
      serverId: number,
      toolId: string,
      enabled: boolean,
      mutateServerTools: KeyedMutator<ToolSnapshot[]>
    ) => {
      try {
        // Optimistically update the UI
        await mutateServerTools(
          async (currentTools) => {
            if (!currentTools) return currentTools;
            return currentTools.map((tool) =>
              tool.id.toString() === toolId ? { ...tool, enabled } : tool
            );
          },
          { revalidate: false }
        );

        await updateToolStatus(parseInt(toolId), enabled);

        // Revalidate to get fresh data from server
        await mutateServerTools();

        toast.success(`Tool ${enabled ? "enabled" : "disabled"} successfully`);
      } catch (error) {
        console.error("Error toggling tool:", error);

        // Revert on error by revalidating
        await mutateServerTools();

        toast.error(
          error instanceof Error ? error.message : "Failed to update tool"
        );
      }
    },
    []
  );

  const handleRefreshTools = useCallback(
    async (
      serverId: number,
      mutateServerTools: KeyedMutator<ToolSnapshot[]>
    ) => {
      try {
        // Refresh tools for this specific server (discovers from MCP and syncs to DB)
        await refreshMCPServerTools(serverId);

        // Update the local cache with fresh data
        await mutateServerTools();

        // Also refresh the servers list to update tool counts
        await mutateMcpServers();

        toast.success("Tools refreshed successfully");
      } catch (error) {
        console.error("Error refreshing tools:", error);
        toast.error(
          error instanceof Error ? error.message : "Failed to refresh tools"
        );
      }
    },
    [mutateMcpServers]
  );

  const handleUpdateToolsStatus = useCallback(
    async (
      serverId: number,
      toolIds: number[],
      enabled: boolean,
      mutateServerTools: KeyedMutator<ToolSnapshot[]>
    ) => {
      try {
        if (toolIds.length === 0) {
          toast.info("No tools to disable");
          return;
        }

        // Optimistically update - disable all tools in the UI
        await mutateServerTools(
          async (currentTools) => {
            if (!currentTools) return currentTools;
            return currentTools.map((tool) =>
              toolIds.includes(tool.id) ? { ...tool, enabled } : tool
            );
          },
          { revalidate: false }
        );

        const result = await updateToolsStatus(toolIds, enabled);

        // Revalidate to get fresh data from server
        await mutateServerTools();

        toast.success(
          `${result.updated_count} tool${
            result.updated_count !== 1 ? "s" : ""
          } ${enabled ? "enabled" : "disabled"} successfully`
        );
      } catch (error) {
        console.error(
          `Error ${enabled ? "enabling" : "disabling"} all tools:`,
          error
        );

        // Revert on error by revalidating
        await mutateServerTools();

        toast.error(
          error instanceof Error
            ? error.message
            : `Failed to ${enabled ? "enable" : "disable"} all tools`
        );
      }
    },
    []
  );

  const onServerCreated = useCallback(
    (server: MCPServer) => {
      setActiveServer(server);
      authModal.toggle(true);
    },
    [authModal]
  );

  const handleAddServer = useCallback(() => {
    setActiveServer(null);
    manageServerModal.toggle(true);
  }, [manageServerModal]);

  const handleRenameServer = useCallback(
    async (serverId: number, newName: string) => {
      try {
        await updateMCPServer(serverId, { name: newName });
        toast.success("MCP Server renamed successfully");
        await mutateMcpServers();
      } catch (error) {
        console.error("Error renaming server:", error);
        toast.error(
          error instanceof Error ? error.message : "Failed to rename MCP Server"
        );
        throw error; // Re-throw so ButtonRenaming can handle it
      }
    },
    [mutateMcpServers]
  );

  // Filter servers based on search query
  const filteredServers = useMemo(() => {
    if (!searchQuery.trim()) return mcpServers;

    const query = searchQuery.toLowerCase();
    return mcpServers.filter(
      (server) =>
        server.name.toLowerCase().includes(query) ||
        server.description?.toLowerCase().includes(query) ||
        server.server_url.toLowerCase().includes(query)
    );
  }, [mcpServers, searchQuery]);

  return (
    <div className="flex flex-col h-full overflow-hidden">
      {/* Shared overlay that persists across modal transitions */}
      {showSharedOverlay && (
        <div
          className="fixed inset-0 z-modal-overlay bg-mask-03 backdrop-blur-03 pointer-events-none data-[state=open]:animate-in data-[state=open]:fade-in-0"
          data-state="open"
          aria-hidden="true"
        />
      )}

      <div className="flex-shrink-0 mb-4">
        <AdminListHeader
          hasItems={isLoading || mcpServers.length > 0}
          searchQuery={searchQuery}
          onSearchQueryChange={setSearchQuery}
          onAction={handleAddServer}
          actionLabel="Add MCP Server"
          emptyStateText="Connect MCP server to add custom actions."
        />
      </div>

      <div className="flex-1 overflow-y-auto min-h-0">
        <div className="flex flex-col gap-4 w-full pb-4">
          {isLoading ? (
            <>
              <ActionCardSkeleton />
              <ActionCardSkeleton />
            </>
          ) : (
            filteredServers.map((server) => {
              const status = getActionStatusForServer(server);

              return (
                <MCPActionCard
                  key={server.id}
                  serverId={server.id}
                  server={server}
                  title={server.name}
                  description={server.description || server.server_url}
                  logo={getActionIcon(server.server_url, server.name)}
                  status={status}
                  toolCount={server.tool_count}
                  initialExpanded={server.id === serverToExpand}
                  onDisconnect={() => handleDisconnect(server.id)}
                  onManage={() => handleManage(server.id)}
                  onEdit={() => handleEdit(server.id)}
                  onDelete={() => handleDelete(server.id)}
                  onAuthenticate={() => handleAuthenticate(server.id)}
                  onReconnect={() => handleReconnect(server.id)}
                  onRename={handleRenameServer}
                  onToolToggle={handleToolToggle}
                  onRefreshTools={handleRefreshTools}
                  onUpdateToolsStatus={handleUpdateToolsStatus}
                />
              );
            })
          )}
        </div>
      </div>

      <authModal.Provider>
        <MCPAuthenticationModal
          mcpServer={activeServer}
          skipOverlay
          onTriggerFetchTools={triggerFetchToolsInPlace}
          mutateMcpServers={mutateMcpServers}
        />
      </authModal.Provider>

      <manageServerModal.Provider>
        <AddMCPServerModal
          skipOverlay
          activeServer={activeServer}
          setActiveServer={setActiveServer}
          disconnectModal={disconnectModal}
          manageServerModal={manageServerModal}
          onServerCreated={onServerCreated}
          handleAuthenticate={handleAuthenticate}
          mutateMcpServers={async () => {
            await mutateMcpServers();
          }}
        />
      </manageServerModal.Provider>

      <DisconnectEntityModal
        isOpen={disconnectModal.isOpen}
        onClose={() => {
          disconnectModal.toggle(false);
          setActiveServer(null);
        }}
        name={activeServer?.name ?? null}
        onConfirmDisconnect={handleConfirmDisconnect}
        onConfirmDisconnectAndDelete={handleConfirmDisconnectAndDelete}
        isDisconnecting={isDisconnecting}
        skipOverlay
      />
    </div>
  );
}


================================================
FILE: web/src/sections/actions/OpenApiActionCard.tsx
================================================
"use client";

import React, { useCallback, useEffect, useMemo, useState } from "react";
import { toast } from "@/hooks/useToast";
import ActionCard from "@/sections/actions/ActionCard";
import Actions from "@/sections/actions/Actions";
import ToolsList from "@/sections/actions/ToolsList";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import { ToolSnapshot, ActionStatus, MethodSpec } from "@/lib/tools/interfaces";
import ToolItem from "@/sections/actions/ToolItem";
import { extractMethodSpecsFromDefinition } from "@/lib/tools/openApiService";
import { updateToolStatus } from "@/lib/tools/mcpService";
import { SvgServer, SvgTrash } from "@opal/icons";
import Modal from "@/refresh-components/layouts/ConfirmationModalLayout";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";

export interface OpenApiActionCardProps {
  tool: ToolSnapshot;
  onAuthenticate: (tool: ToolSnapshot) => void;
  onManage?: (tool: ToolSnapshot) => void;
  onDelete?: (tool: ToolSnapshot) => Promise<void> | void;
  onRename?: (toolId: number, newName: string) => Promise<void>;
  mutateOpenApiTools: () => Promise<unknown> | void;
  onOpenDisconnectModal?: (tool: ToolSnapshot) => void;
}

export default function OpenApiActionCard({
  tool,
  onAuthenticate,
  onManage,
  onDelete,
  onRename,
  mutateOpenApiTools,
  onOpenDisconnectModal,
}: OpenApiActionCardProps) {
  const [isToolsExpanded, setIsToolsExpanded] = useState(false);
  const [searchQuery, setSearchQuery] = useState("");
  const [updatingStatus, setUpdatingStatus] = useState(false);
  const deleteModal = useCreateModal();

  const methodSpecs = useMemo<MethodSpec[]>(() => {
    try {
      return extractMethodSpecsFromDefinition(tool.definition) ?? [];
    } catch (error) {
      console.error("Failed to parse OpenAPI definition", error);
      return [];
    }
  }, [tool.definition]);

  const filteredTools = useMemo(() => {
    if (!searchQuery.trim()) return methodSpecs;

    const query = searchQuery.toLowerCase();
    return methodSpecs.filter((method) => {
      const name = method.name?.toLowerCase() ?? "";
      const summary = method.summary?.toLowerCase() ?? "";
      return name.includes(query) || summary.includes(query);
    });
  }, [methodSpecs, searchQuery]);

  const hasCustomHeaders =
    Array.isArray(tool.custom_headers) && tool.custom_headers.length > 0;
  const hasAuthConfigured =
    Boolean(tool.oauth_config_id) ||
    Boolean(tool.passthrough_auth) ||
    hasCustomHeaders;
  const isDisconnected = !tool.enabled;

  // Compute generic ActionStatus for the OpenAPI tool
  const status = isDisconnected
    ? ActionStatus.DISCONNECTED
    : hasAuthConfigured
      ? ActionStatus.CONNECTED
      : ActionStatus.PENDING;

  const handleConnectionUpdate = useCallback(
    async (shouldEnable: boolean) => {
      if (updatingStatus || tool.enabled === shouldEnable) {
        return;
      }

      try {
        setUpdatingStatus(true);
        await updateToolStatus(tool.id, shouldEnable);
        await mutateOpenApiTools();
      } catch (error) {
        console.error("Failed to update OpenAPI tool status", error);
      } finally {
        setUpdatingStatus(false);
      }
    },
    [updatingStatus, mutateOpenApiTools, tool.enabled, tool.id]
  );

  const handleToggleTools = useCallback(() => {
    setIsToolsExpanded((prev) => !prev);
    if (isToolsExpanded) {
      setSearchQuery("");
    }
  }, [isToolsExpanded]);

  useEffect(() => {
    if (isDisconnected) {
      setIsToolsExpanded(false);
    }
  }, [isDisconnected]);

  const handleFold = () => {
    setIsToolsExpanded(false);
    setSearchQuery("");
  };

  // Build the actions component
  const actionsComponent = useMemo(
    () => (
      <Actions
        status={status}
        serverName={tool.name}
        toolCount={methodSpecs.length}
        isToolsExpanded={isToolsExpanded}
        onToggleTools={methodSpecs.length ? handleToggleTools : undefined}
        onDisconnect={() => onOpenDisconnectModal?.(tool)}
        onManage={onManage ? () => onManage(tool) : undefined}
        onAuthenticate={() => {
          onAuthenticate(tool);
        }}
        onReconnect={() => handleConnectionUpdate(true)}
        onDelete={onDelete ? () => deleteModal.toggle(true) : undefined}
      />
    ),
    [
      deleteModal,
      handleConnectionUpdate,
      handleToggleTools,
      isToolsExpanded,
      methodSpecs.length,
      onAuthenticate,
      onDelete,
      onManage,
      onOpenDisconnectModal,
      status,
      tool,
    ]
  );

  const handleRename = async (newName: string) => {
    if (onRename) {
      await onRename(tool.id, newName);
    }
  };

  return (
    <>
      <ActionCard
        title={tool.name}
        description={tool.description}
        icon={SvgServer}
        status={status}
        actions={actionsComponent}
        onRename={handleRename}
        isExpanded={isToolsExpanded}
        onExpandedChange={setIsToolsExpanded}
        enableSearch={true}
        searchQuery={searchQuery}
        onSearchQueryChange={setSearchQuery}
        onFold={handleFold}
        ariaLabel={`${tool.name} OpenAPI action card`}
      >
        <ToolsList
          isEmpty={filteredTools.length === 0}
          searchQuery={searchQuery}
          emptyMessage="No actions defined for this OpenAPI schema"
          emptySearchMessage="No actions match your search"
          className="gap-2"
        >
          {filteredTools.map((method) => (
            <ToolItem
              key={`${tool.id}-${method.method}-${method.path}-${method.name}`}
              name={method.name}
              description={method.summary || "No summary provided"}
              variant="openapi"
              openApiMetadata={{
                method: method.method,
                path: method.path,
              }}
            />
          ))}
        </ToolsList>
      </ActionCard>

      {deleteModal.isOpen && onDelete && (
        <Modal
          icon={({ className }) => (
            <SvgTrash className={cn(className, "stroke-action-danger-05")} />
          )}
          title="Delete OpenAPI action"
          onClose={() => deleteModal.toggle(false)}
          submit={
            <Button
              variant="danger"
              onClick={async () => {
                await onDelete(tool);
                deleteModal.toggle(false);
              }}
            >
              Delete
            </Button>
          }
        >
          <div className="flex flex-col gap-4">
            <Text as="p" text03>
              This will permanently delete the OpenAPI action <b>{tool.name}</b>{" "}
              and its configuration.
            </Text>
            <Text as="p" text03>
              Are you sure you want to delete this OpenAPI action?
            </Text>
          </div>
        </Modal>
      )}
    </>
  );
}


================================================
FILE: web/src/sections/actions/OpenApiPageContent.tsx
================================================
"use client";

import { ToolSnapshot } from "@/lib/tools/interfaces";
import { useCallback, useEffect, useMemo, useState } from "react";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import OpenAPIAuthenticationModal, {
  AuthMethod,
  OpenAPIAuthFormValues,
} from "./modals/OpenAPIAuthenticationModal";
import AddOpenAPIActionModal from "./modals/AddOpenAPIActionModal";
import AdminListHeader from "@/sections/admin/AdminListHeader";
import { toast } from "@/hooks/useToast";
import OpenApiActionCard from "./OpenApiActionCard";
import { createOAuthConfig, updateOAuthConfig } from "@/lib/oauth/api";
import { updateCustomTool, deleteCustomTool } from "@/lib/tools/openApiService";
import { updateToolStatus } from "@/lib/tools/mcpService";
import DisconnectEntityModal from "./modals/DisconnectEntityModal";
import ActionCardSkeleton from "./skeleton/ActionCardSkeleton";
import useOpenApiTools from "@/hooks/useOpenApiTools";

export default function OpenApiPageContent() {
  const {
    openApiTools,
    mutateOpenApiTools,
    isLoading: isOpenApiLoading,
  } = useOpenApiTools();
  const addOpenAPIActionModal = useCreateModal();
  const openAPIAuthModal = useCreateModal();
  const disconnectModal = useCreateModal();
  const [selectedTool, setSelectedTool] = useState<ToolSnapshot | null>(null);
  const [toolBeingEdited, setToolBeingEdited] = useState<ToolSnapshot | null>(
    null
  );
  const [toolPendingDisconnect, setToolPendingDisconnect] =
    useState<ToolSnapshot | null>(null);
  const [isDisconnecting, setIsDisconnecting] = useState(false);
  const [isDeleting, setIsDeleting] = useState(false);
  const [searchQuery, setSearchQuery] = useState("");
  const [showSharedOverlay, setShowSharedOverlay] = useState(false);

  useEffect(() => {
    const anyModalOpen =
      addOpenAPIActionModal.isOpen ||
      openAPIAuthModal.isOpen ||
      disconnectModal.isOpen;
    setShowSharedOverlay(anyModalOpen);
  }, [
    addOpenAPIActionModal.isOpen,
    openAPIAuthModal.isOpen,
    disconnectModal.isOpen,
  ]);

  const handleOpenAuthModal = useCallback(
    (tool: ToolSnapshot) => {
      setSelectedTool(tool);
      openAPIAuthModal.toggle(true);
    },
    [openAPIAuthModal]
  );

  const resetAuthModal = useCallback(() => {
    setSelectedTool(null);
    openAPIAuthModal.toggle(false);
  }, [openAPIAuthModal]);

  const handleConnect = useCallback(
    async (values: OpenAPIAuthFormValues) => {
      if (!selectedTool) {
        throw new Error("No OpenAPI action selected for authentication.");
      }

      try {
        if (values.authMethod === "oauth") {
          const parsedScopes = values.scopes
            .split(",")
            .map((scope) => scope.trim())
            .filter(Boolean);
          const trimmedClientId = values.clientId.trim();
          const trimmedClientSecret = values.clientSecret.trim();

          let oauthConfigId = selectedTool.oauth_config_id ?? null;

          if (oauthConfigId) {
            await updateOAuthConfig(oauthConfigId, {
              authorization_url: values.authorizationUrl,
              token_url: values.tokenUrl,
              scopes: parsedScopes,
              ...(trimmedClientId ? { client_id: trimmedClientId } : {}),
              ...(trimmedClientSecret
                ? { client_secret: trimmedClientSecret }
                : {}),
            });
          } else {
            const oauthConfig = await createOAuthConfig({
              name: `${selectedTool.name} OAuth`,
              authorization_url: values.authorizationUrl,
              token_url: values.tokenUrl,
              client_id: trimmedClientId,
              client_secret: trimmedClientSecret,
              scopes: parsedScopes.length ? parsedScopes : undefined,
            });
            oauthConfigId = oauthConfig.id;
          }

          const response = await updateCustomTool(selectedTool.id, {
            custom_headers: [],
            passthrough_auth: false,
            oauth_config_id: oauthConfigId,
          });

          if (response.error) {
            throw new Error(response.error);
          }

          toast.success(
            `${selectedTool.name} authentication ${
              selectedTool.oauth_config_id ? "updated" : "saved"
            } successfully.`
          );
        } else if (values.authMethod === "custom-header") {
          const customHeaders = values.headers
            .map(({ key, value }) => ({
              key: key.trim(),
              value: value.trim(),
            }))
            .filter(({ key, value }) => key && value);

          const response = await updateCustomTool(selectedTool.id, {
            custom_headers: customHeaders,
            passthrough_auth: false,
            oauth_config_id: null,
          });

          if (response.error) {
            throw new Error(response.error);
          }

          toast.success(
            `${selectedTool.name} authentication headers saved successfully.`
          );
        } else if (values.authMethod === "pt-oauth") {
          const response = await updateCustomTool(selectedTool.id, {
            passthrough_auth: true,
            oauth_config_id: null,
            custom_headers: [],
          });
          if (response.error) {
            throw new Error(response.error);
          }
          toast.success(
            `${selectedTool.name} authentication passthrough saved successfully.`
          );
        }

        await mutateOpenApiTools();
        setSelectedTool(null);
      } catch (error) {
        const message =
          error instanceof Error
            ? error.message
            : "Failed to save authentication settings.";
        toast.error(message);
        throw error;
      }
    },
    [selectedTool, mutateOpenApiTools]
  );

  const handleManageTool = useCallback(
    (tool: ToolSnapshot) => {
      setToolBeingEdited(tool);
      addOpenAPIActionModal.toggle(true);
    },
    [addOpenAPIActionModal]
  );

  const handleEditAuthenticationFromModal = useCallback(
    (tool: ToolSnapshot) => {
      setSelectedTool(tool);
      openAPIAuthModal.toggle(true);
    },
    [openAPIAuthModal]
  );

  const handleDisableTool = useCallback(
    async (tool: ToolSnapshot) => {
      try {
        await updateToolStatus(tool.id, false);

        toast.success(`${tool.name} has been disconnected.`);

        await mutateOpenApiTools();
      } catch (error) {
        const message =
          error instanceof Error
            ? error.message
            : "Failed to disconnect OpenAPI action.";
        toast.error(message);
        throw error instanceof Error
          ? error
          : new Error("Failed to disconnect OpenAPI action.");
      }
    },
    [mutateOpenApiTools]
  );

  const handleOpenDisconnectModal = useCallback(
    (tool: ToolSnapshot) => {
      setToolPendingDisconnect(tool);
      addOpenAPIActionModal.toggle(false);
      disconnectModal.toggle(true);
    },
    [disconnectModal, addOpenAPIActionModal]
  );

  const handleConfirmDisconnectFromModal = useCallback(async () => {
    if (!toolPendingDisconnect) {
      return;
    }

    try {
      setIsDisconnecting(true);
      await handleDisableTool(toolPendingDisconnect);
    } finally {
      setIsDisconnecting(false);
      disconnectModal.toggle(false);
      setToolPendingDisconnect(null);
    }
  }, [disconnectModal, handleDisableTool, toolPendingDisconnect]);

  const executeDeleteTool = useCallback(
    async (tool: ToolSnapshot) => {
      try {
        setIsDeleting(true);
        const response = await deleteCustomTool(tool.id);
        if (response.data) {
          toast.success(`${tool.name} deleted successfully.`);
          await mutateOpenApiTools();
        } else {
          throw new Error(response.error || "Failed to delete tool.");
        }
      } catch (error) {
        console.error("Failed to delete OpenAPI tool", error);
        toast.error(
          error instanceof Error
            ? error.message
            : "An unexpected error occurred while deleting the tool."
        );
        throw error;
      } finally {
        setIsDeleting(false);
      }
    },
    [mutateOpenApiTools]
  );

  const handleDeleteToolFromModal = useCallback(async () => {
    if (!toolPendingDisconnect || isDeleting) {
      return;
    }

    try {
      await executeDeleteTool(toolPendingDisconnect);
    } finally {
      disconnectModal.toggle(false);
      setToolPendingDisconnect(null);
    }
  }, [disconnectModal, executeDeleteTool, isDeleting, toolPendingDisconnect]);

  const handleDeleteTool = useCallback(
    async (tool: ToolSnapshot) => {
      if (isDeleting) return;
      await executeDeleteTool(tool);
    },
    [executeDeleteTool, isDeleting]
  );

  const handleAddAction = useCallback(() => {
    setToolBeingEdited(null);
    addOpenAPIActionModal.toggle(true);
  }, [addOpenAPIActionModal]);

  const handleAddModalClose = useCallback(() => {
    setToolBeingEdited(null);
  }, []);

  const handleRenameTool = useCallback(
    async (toolId: number, newName: string) => {
      try {
        const response = await updateCustomTool(toolId, { name: newName });
        if (response.error) {
          throw new Error(response.error);
        }
        toast.success("OpenAPI action renamed successfully");
        await mutateOpenApiTools();
      } catch (error) {
        console.error("Error renaming tool:", error);
        toast.error(
          error instanceof Error
            ? error.message
            : "Failed to rename OpenAPI action"
        );
        throw error; // Re-throw so ButtonRenaming can handle it
      }
    },
    [mutateOpenApiTools]
  );

  const authenticationModalTitle = useMemo(() => {
    if (!selectedTool) {
      return "Authenticate OpenAPI Action";
    }
    const hasExistingAuth =
      Boolean(selectedTool.oauth_config_id) ||
      Boolean(selectedTool.custom_headers?.length);
    const prefix = hasExistingAuth
      ? "Update authentication for"
      : "Authenticate";
    return `${prefix} ${selectedTool.name}`;
  }, [selectedTool]);

  const authenticationDefaultMethod = useMemo<AuthMethod>(() => {
    if (!selectedTool) {
      return "oauth";
    }
    return selectedTool.custom_headers?.length ? "custom-header" : "oauth";
  }, [selectedTool]);

  // Filter tools based on search query
  const filteredTools = useMemo(() => {
    if (!openApiTools) return [];
    if (!searchQuery.trim()) return openApiTools;

    const query = searchQuery.toLowerCase();
    return openApiTools.filter(
      (tool) =>
        tool.name.toLowerCase().includes(query) ||
        tool.description?.toLowerCase().includes(query)
    );
  }, [openApiTools, searchQuery]);

  return (
    <div className="flex flex-col h-full overflow-hidden">
      {showSharedOverlay && (
        <div
          className="fixed inset-0 z-modal-overlay bg-mask-03 backdrop-blur-03 pointer-events-none data-[state=open]:animate-in data-[state=open]:fade-in-0"
          data-state="open"
          aria-hidden="true"
        />
      )}

      <div className="flex-shrink-0 mb-4">
        <AdminListHeader
          hasItems={isOpenApiLoading || (openApiTools?.length ?? 0) > 0}
          searchQuery={searchQuery}
          onSearchQueryChange={setSearchQuery}
          onAction={handleAddAction}
          actionLabel="Add OpenAPI Action"
          emptyStateText="Add custom actions from OpenAPI schemas."
        />
      </div>

      <div className="flex-1 overflow-y-auto min-h-0">
        <div className="flex flex-col gap-4 w-full pb-4">
          {isOpenApiLoading ? (
            <>
              <ActionCardSkeleton />
              <ActionCardSkeleton />
            </>
          ) : (
            filteredTools.map((tool) => (
              <OpenApiActionCard
                key={tool.id}
                tool={tool}
                onAuthenticate={handleOpenAuthModal}
                onManage={handleManageTool}
                onDelete={handleDeleteTool}
                onRename={handleRenameTool}
                mutateOpenApiTools={mutateOpenApiTools}
                onOpenDisconnectModal={handleOpenDisconnectModal}
              />
            ))
          )}
        </div>
      </div>

      <addOpenAPIActionModal.Provider>
        <AddOpenAPIActionModal
          skipOverlay
          existingTool={toolBeingEdited}
          onEditAuthentication={handleEditAuthenticationFromModal}
          onDisconnectTool={(tool: ToolSnapshot) => {
            handleOpenDisconnectModal(tool);
            resetAuthModal();
          }}
          onSuccess={(tool) => {
            setSelectedTool(tool);
            openAPIAuthModal.toggle(true);
            mutateOpenApiTools();
          }}
          onUpdate={() => {
            mutateOpenApiTools();
          }}
          onClose={handleAddModalClose}
        />
      </addOpenAPIActionModal.Provider>
      <openAPIAuthModal.Provider>
        <OpenAPIAuthenticationModal
          isOpen={openAPIAuthModal.isOpen}
          skipOverlay
          onClose={resetAuthModal}
          title={authenticationModalTitle}
          entityName={selectedTool?.name ?? null}
          defaultMethod={authenticationDefaultMethod}
          oauthConfigId={selectedTool?.oauth_config_id ?? null}
          initialHeaders={selectedTool?.custom_headers ?? null}
          passthroughOAuthEnabled={selectedTool?.passthrough_auth ?? false}
          onConnect={handleConnect}
          onSkip={resetAuthModal}
        />
      </openAPIAuthModal.Provider>

      <DisconnectEntityModal
        isOpen={disconnectModal.isOpen}
        onClose={() => {
          disconnectModal.toggle(false);
          setToolPendingDisconnect(null);
        }}
        name={toolPendingDisconnect?.name ?? null}
        onConfirmDisconnect={handleConfirmDisconnectFromModal}
        onConfirmDisconnectAndDelete={handleDeleteToolFromModal}
        isDisconnecting={isDisconnecting || isDeleting}
        skipOverlay
      />
    </div>
  );
}


================================================
FILE: web/src/sections/actions/PerUserAuthConfig.tsx
================================================
"use client";

import { useEffect, useState } from "react";
import { FormField } from "@/refresh-components/form/FormField";
import InputKeyValue, {
  KeyValue,
} from "@/refresh-components/inputs/InputKeyValue";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Text from "@/refresh-components/texts/Text";
import Separator from "@/refresh-components/Separator";
import type { MCPAuthFormValues } from "@/sections/actions/modals/MCPAuthenticationModal";
import { SvgUser } from "@opal/icons";

interface PerUserAuthConfigProps {
  values: MCPAuthFormValues;
  setFieldValue: (
    field: keyof MCPAuthFormValues | string,
    value: unknown
  ) => void;
}

export function PerUserAuthConfig({
  values,
  setFieldValue,
}: PerUserAuthConfigProps) {
  // Use draft state for KeyValue array (like in LLMConnectionFieldsCustom)
  const [headersDraft, setHeadersDraft] = useState<KeyValue[]>(
    Object.entries(values.auth_template?.headers || {}).map(([key, value]) => ({
      key,
      value: String(value),
    }))
  );

  // Initialize auth template if not exists
  useEffect(() => {
    if (!values.auth_template) {
      const initialHeaders = { Authorization: "Bearer {api_key}" };
      setFieldValue("auth_template", {
        headers: initialHeaders,
        required_fields: ["api_key"],
      });
      setHeadersDraft([{ key: "Authorization", value: "Bearer {api_key}" }]);
    }
  }, [values.auth_template, setFieldValue]);

  // Update headers from KeyValue array
  const handleHeadersChange = (items: KeyValue[]) => {
    // Update draft state first
    setHeadersDraft(items);

    // Convert KeyValue[] to Record<string, string> for form value
    const headersObject: Record<string, string> = {};
    items.forEach((item) => {
      if (item.key.trim()) {
        headersObject[item.key] = item.value;
      }
    });
    setFieldValue("auth_template.headers", headersObject);
    updateRequiredFields(headersObject);
  };

  const computeRequiredFieldsFromHeaders = (
    headers: Record<string, string>
  ): string[] => {
    const placeholderRegex = /\{([^}]+)\}/g;
    const requiredFields = new Set<string>();

    Object.values(headers).forEach((value) => {
      const matches = value.match(placeholderRegex);
      if (matches) {
        matches.forEach((match: string) => {
          const field = match.slice(1, -1);
          if (field !== "user_email") {
            // user_email is automatically provided
            requiredFields.add(field);
          }
        });
      }
    });
    return Array.from(requiredFields);
  };

  // Extract required fields from placeholders in header values
  const updateRequiredFields = (headers: Record<string, string>) => {
    const requiredFields = computeRequiredFieldsFromHeaders(headers);
    setFieldValue("auth_template.required_fields", requiredFields);
  };

  // Update user credential value
  const updateUserCredential = (field: string, value: string) => {
    const currentCreds = values.user_credentials || {};
    setFieldValue("user_credentials", {
      ...currentCreds,
      [field]: value,
    });
  };

  const requiredFields: string[] = values.auth_template?.required_fields?.length
    ? values.auth_template.required_fields
    : computeRequiredFieldsFromHeaders(values.auth_template?.headers || {});
  const userCredentials = values.user_credentials || {};

  return (
    <div className="flex flex-col gap-4 -mx-2 px-2 py-2 bg-background-tint-00 rounded-12">
      {/* Authentication Headers */}
      <FormField name="auth_template.headers" state="idle">
        <FormField.Label>Authentication Headers</FormField.Label>
        <FormField.Control asChild>
          <InputKeyValue
            keyTitle="Header Name"
            valueTitle="Header Value"
            items={headersDraft}
            onChange={handleHeadersChange}
            mode="fixed-line"
            layout="equal"
            addButtonLabel="Add Header"
          />
        </FormField.Control>
        <FormField.Description>
          Format headers for each user to fill in their individual credentials.
          Use placeholders like{" "}
          <Text text03 secondaryMono className="inline">
            {"{api_key}"}
          </Text>{" "}
          or{" "}
          <Text text03 secondaryMono className="inline">
            {"{user_email}"}
          </Text>
          . Users will be prompted to provide values for placeholders (except
          user_email).
        </FormField.Description>
      </FormField>

      {/* Only show user credentials section if there are required fields */}
      {requiredFields.length > 0 && (
        <>
          <Separator className="-my-2" />

          <div className="flex flex-col gap-4">
            <div className="flex items-start gap-1">
              <SvgUser className="w-4 h-4 stroke-text-04 mt-0.5" />
              <div className="flex flex-col gap-1">
                <Text text04 secondaryAction as="p">
                  Only for your own account
                </Text>
                <Text text03 secondaryBody as="p">
                  The following credentials will not be shared with your
                  organization.
                </Text>
              </div>
            </div>

            {/* User Credentials Fields */}
            <div className="flex flex-col gap-3">
              {requiredFields.map((field: string) => {
                const isSecretField =
                  field.toLowerCase().includes("key") ||
                  field.toLowerCase().includes("token") ||
                  field.toLowerCase().includes("secret") ||
                  field.toLowerCase().includes("password");

                return (
                  <FormField
                    key={field}
                    name={`user_credentials.${field}`}
                    state="idle"
                  >
                    <FormField.Label>
                      {field
                        .replace(/_/g, " ")
                        .replace(/\b\w/g, (l) => l.toUpperCase())}
                    </FormField.Label>
                    <FormField.Control asChild>
                      <InputTypeIn
                        name={`user_credentials.${field}`}
                        type={isSecretField ? "password" : "text"}
                        value={userCredentials[field] || ""}
                        onChange={(e) =>
                          updateUserCredential(field, e.target.value)
                        }
                        placeholder={`Enter ${field.replace(/_/g, " ")}`}
                        showClearButton={false}
                      />
                    </FormField.Control>
                  </FormField>
                );
              })}
            </div>
          </div>
        </>
      )}
    </div>
  );
}


================================================
FILE: web/src/sections/actions/ToolItem.tsx
================================================
"use client";

import React, { useMemo } from "react";
import { cn } from "@/lib/utils";
import Switch from "@/refresh-components/inputs/Switch";
import Text from "@/refresh-components/texts/Text";
import Truncated from "@/refresh-components/texts/Truncated";
import type { IconProps } from "@opal/types";
import {
  SvgAlertTriangle,
  SvgArrowLeftDot,
  SvgArrowRightDot,
  SvgCornerRightUpDot,
  SvgMinusCircle,
} from "@opal/icons";

type ToolItemVariant = "mcp" | "openapi";

interface OpenApiMetadata {
  method?: string;
  path?: string;
}

const METHOD_ICON_MAP: Record<string, React.ReactNode> = {
  GET: <SvgArrowLeftDot className="size-4 stroke-status-success-05" />,
  POST: <SvgArrowRightDot className="size-4 stroke-status-info-05" />,
  PUT: <SvgCornerRightUpDot className="size-4 stroke-status-info-05" />,
  PATCH: <SvgCornerRightUpDot className="size-4 stroke-status-warning-05" />,
  DELETE: <SvgMinusCircle className="size-4 stroke-status-error-05" />,
};
const METHOD_STYLE_MAP: Record<string, { bg: string; text: string }> = {
  GET: { bg: "bg-status-success-00", text: "text-status-success-05" },
  POST: { bg: "bg-status-info-00", text: "text-status-info-05" },
  PUT: { bg: "bg-status-info-00", text: "text-status-info-05" },
  PATCH: { bg: "bg-status-warning-00", text: "text-status-warning-05" },
  DELETE: { bg: "bg-status-error-00", text: "text-status-error-05" },
};

function getMethodStyles(method?: string) {
  if (!method) {
    return {
      label: undefined,
      bg: "bg-background-neutral-01",
      text: "text-text-03",
    };
  }

  const upperMethod = method.toUpperCase();
  const styles = METHOD_STYLE_MAP[upperMethod] ?? {
    bg: "bg-background-neutral-01",
    text: "text-text-03",
  };

  return {
    label: upperMethod,
    ...styles,
  };
}

export interface ToolItemProps {
  // Tool information
  name: string;
  description: string;
  icon?: React.FunctionComponent<IconProps>;

  // Tool state
  isAvailable?: boolean;
  isEnabled?: boolean;

  // Variant
  variant?: ToolItemVariant;
  openApiMetadata?: OpenApiMetadata;

  // Handlers
  onToggle?: (enabled: boolean) => void;

  // Optional styling
  className?: string;
}

const ToolItem: React.FC<ToolItemProps> = ({
  name,
  description,
  icon: Icon,
  isAvailable = true,
  isEnabled = true,
  variant = "mcp",
  openApiMetadata,
  onToggle,
  className,
}) => {
  const isMcpVariant = variant === "mcp";

  const unavailableStyles =
    isMcpVariant && !isAvailable
      ? "bg-background-neutral-02"
      : "bg-background-tint-00";

  const textOpacity = isMcpVariant && !isAvailable ? "opacity-50" : "";

  const {
    label: methodLabel,
    bg: methodBg,
    text: methodText,
  } = isMcpVariant
    ? { label: undefined, bg: "", text: "" }
    : getMethodStyles(openApiMetadata?.method);

  const highlightedPathContent = useMemo(() => {
    if (!openApiMetadata?.path) {
      return null;
    }

    // Example: "/repos/{owner}/{repo}" => plain spans for static segments,
    // colored spans for "{owner}" and "{repo}".
    const path = openApiMetadata.path;
    const segments: React.ReactNode[] = [];
    const paramRegex = /\{[^}]+\}/g;
    let lastIndex = 0;
    let match: RegExpExecArray | null;
    const highlightClass = methodText || "text-text-03";

    while ((match = paramRegex.exec(path)) !== null) {
      // Push plain text before the param, then the colored "{param}" segment.
      if (match.index > lastIndex) {
        segments.push(
          <span key={`text-${match.index}`}>
            {path.slice(lastIndex, match.index)}
          </span>
        );
      }

      segments.push(
        <span key={`param-${match.index}`} className={highlightClass}>
          {match[0]}
        </span>
      );

      lastIndex = paramRegex.lastIndex;
    }

    if (lastIndex < path.length) {
      segments.push(<span key="text-end">{path.slice(lastIndex)}</span>);
    }

    return segments;
  }, [openApiMetadata?.path, methodText]);

  return (
    <div
      className={cn(
        "flex items-start justify-between w-full p-2 rounded-08 border border-border-01 gap-2",
        unavailableStyles,
        className
      )}
    >
      {/* Left Section: Icon and Content */}
      <div className="flex gap-1 items-start flex-1 min-w-0 pr-2">
        {/* Icon Container */}
        {Icon ? (
          <div
            className={cn(
              "flex items-center justify-center shrink-0",
              textOpacity
            )}
          >
            <Icon size={20} className="h-5 w-5 stroke-text-04" />
          </div>
        ) : (
          <div className="flex items-center justify-center h-5 w-5">
            {METHOD_ICON_MAP[openApiMetadata?.method?.toUpperCase() ?? ""]}
          </div>
        )}

        {/* Content Container */}
        <div className="flex flex-col items-start flex-1 min-w-0">
          {/* Tool Name */}
          <div className="flex items-center w-full min-h-[20px] px-0.5">
            <Truncated
              mainUiAction
              text04
              className={cn(
                "truncate",
                textOpacity,
                !isAvailable && "line-through"
              )}
            >
              {name}
            </Truncated>
          </div>

          {/* Description */}
          <div className="px-0.5 w-full">
            <Truncated
              text03
              secondaryBody
              className={cn("whitespace-pre-wrap", textOpacity)}
            >
              {description}
            </Truncated>
          </div>
        </div>
      </div>

      {/* Right Section */}
      {isMcpVariant ? (
        <div className="flex gap-2 items-start justify-end shrink-0">
          {/* Unavailable Badge */}
          {!isAvailable && (
            <div className="flex items-center min-h-[20px] px-0 py-0.5">
              <div className="flex gap-0.5 items-center">
                <div className="flex items-center px-0.5">
                  <Text as="p" text03 secondaryBody className="text-right">
                    Tool unavailable
                  </Text>
                </div>
                <div className="flex items-center justify-center p-0.5 w-4 h-4">
                  <SvgAlertTriangle className="w-3 h-3 stroke-status-warning-05" />
                </div>
              </div>
            </div>
          )}

          {/* Switch */}
          <div className="flex items-center justify-center gap-1 h-5 px-0.5 py-0.5">
            <Switch
              checked={isEnabled}
              onCheckedChange={onToggle}
              disabled={!isAvailable}
              aria-label={`tool-toggle-${name}`}
            />
          </div>
        </div>
      ) : (
        <div className="flex flex-col items-end justify-center">
          {methodLabel && (
            <div
              className={cn("rounded-04 border border-transparent", methodBg)}
            >
              <Text
                as="p"
                figureSmallLabel
                className={cn("uppercase tracking-wide p-0.5 ", methodText)}
              >
                {methodLabel}
              </Text>
            </div>
          )}

          {openApiMetadata?.path && (
            <Truncated secondaryMono text03 className="text-right truncate">
              {highlightedPathContent}
            </Truncated>
          )}
        </div>
      )}
    </div>
  );
};

ToolItem.displayName = "ToolItem";
export default ToolItem;


================================================
FILE: web/src/sections/actions/ToolsList.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import FadingEdgeContainer from "@/refresh-components/FadingEdgeContainer";
import ToolItemSkeleton from "@/sections/actions/skeleton/ToolItemSkeleton";
import EnabledCount from "@/refresh-components/EnabledCount";
import { SvgEye, SvgXCircle } from "@opal/icons";

export interface ToolsListProps {
  // Loading state
  isFetching?: boolean;

  // Tool count for footer
  totalCount?: number;
  enabledCount?: number;
  showOnlyEnabled?: boolean;
  onToggleShowOnlyEnabled?: () => void;
  onUpdateToolsStatus?: (enabled: boolean) => void;

  // Empty state of filtered tools
  isEmpty?: boolean;
  searchQuery?: string;
  emptyMessage?: string;
  emptySearchMessage?: string;

  // Content
  children?: React.ReactNode;

  // Left action (for refresh button and last verified text)
  leftAction?: React.ReactNode;

  // Styling
  className?: string;
}

const ToolsList: React.FC<ToolsListProps> = ({
  isFetching = false,
  totalCount,
  enabledCount = 0,
  showOnlyEnabled = false,
  onToggleShowOnlyEnabled,
  onUpdateToolsStatus,
  isEmpty = false,
  searchQuery,
  emptyMessage = "No tools available",
  emptySearchMessage = "No tools found",
  children,
  leftAction,
  className,
}) => {
  const showFooter =
    totalCount !== undefined && enabledCount !== undefined && totalCount > 0;

  return (
    <>
      <FadingEdgeContainer
        direction="bottom"
        className={cn(
          "flex flex-col gap-1 items-start max-h-[30vh] overflow-y-auto",
          className
        )}
      >
        {isFetching ? (
          Array.from({ length: 5 }).map((_, index) => (
            <ToolItemSkeleton key={`skeleton-${index}`} />
          ))
        ) : isEmpty ? (
          <div className="flex items-center justify-center w-full py-8">
            <Text as="p" text03 mainUiBody>
              {searchQuery ? emptySearchMessage : emptyMessage}
            </Text>
          </div>
        ) : (
          children
        )}
      </FadingEdgeContainer>

      {/* Footer showing enabled tool count with filter toggle */}
      {showFooter && !(totalCount === 0) && !isFetching && (
        <div className="pt-2 px-2">
          <div className="flex items-center justify-between gap-2 w-full">
            {/* Left action area */}
            {leftAction}

            {/* Right action area */}
            <div className="flex items-center gap-1 ml-auto">
              {enabledCount > 0 && (
                <EnabledCount
                  enabledCount={enabledCount}
                  totalCount={totalCount}
                  name="tool"
                />
              )}
              {onToggleShowOnlyEnabled && enabledCount > 0 && (
                <Button
                  icon={SvgEye}
                  prominence="tertiary"
                  size="sm"
                  onClick={onToggleShowOnlyEnabled}
                  interaction={showOnlyEnabled ? "hover" : "rest"}
                  tooltip={
                    showOnlyEnabled ? "Show all tools" : "Show only enabled"
                  }
                  aria-label={
                    showOnlyEnabled
                      ? "Show all tools"
                      : "Show only enabled tools"
                  }
                />
              )}
              {onUpdateToolsStatus && enabledCount > 0 && (
                <Button
                  icon={SvgXCircle}
                  prominence="tertiary"
                  size="sm"
                  onClick={() => onUpdateToolsStatus(false)}
                  tooltip="Disable all tools"
                  aria-label="Disable all tools"
                />
              )}
              {onUpdateToolsStatus && enabledCount === 0 && (
                <Button
                  prominence="tertiary"
                  onClick={() => onUpdateToolsStatus(true)}
                >
                  Enable all
                </Button>
              )}
            </div>
          </div>
        </div>
      )}
    </>
  );
};
ToolsList.displayName = "ToolsList";

export default ToolsList;


================================================
FILE: web/src/sections/actions/ToolsSection.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";
import { Button } from "@opal/components";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { SvgFold } from "@opal/icons";
interface ToolsSectionProps {
  onFold?: () => void;
  searchQuery: string;
  onSearchQueryChange: (query: string) => void;
  className?: string;
}

const ToolsSection: React.FC<ToolsSectionProps> = ({
  onFold,
  searchQuery,
  onSearchQueryChange,
  className,
}) => {
  const handleSearchChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    onSearchQueryChange(e.target.value);
  };

  return (
    <div className={cn("w-full", className)}>
      <div className="flex gap-1 items-center w-full transition-all duration-300 ease-in-out px-2 pb-2">
        {/* Search Bar */}
        <div className="flex-1 min-w-[160px]">
          <InputTypeIn
            placeholder="Search tools…"
            aria-label="Search tools"
            value={searchQuery}
            onChange={handleSearchChange}
            leftSearchIcon
            showClearButton
            className="w-full"
          />
        </div>

        {/* Actions */}
        <div className="flex gap-1 items-center p-1">
          {/* Fold Button */}
          {onFold && (
            <Button prominence="tertiary" onClick={onFold} rightIcon={SvgFold}>
              Fold
            </Button>
          )}
        </div>
      </div>
    </div>
  );
};

ToolsSection.displayName = "ToolsSection";
export default ToolsSection;


================================================
FILE: web/src/sections/actions/modals/AddMCPServerModal.tsx
================================================
"use client";

import { useState } from "react";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import Modal from "@/refresh-components/Modal";
import * as InputLayouts from "@/layouts/input-layouts";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import InputTextAreaField from "@/refresh-components/form/InputTextAreaField";
import { createMCPServer, updateMCPServer } from "@/lib/tools/mcpService";
import {
  MCPServerCreateRequest,
  MCPServerStatus,
  MCPServer,
} from "@/lib/tools/interfaces";
import { useModal } from "@/refresh-components/contexts/ModalContext";
import Separator from "@/refresh-components/Separator";
import { Button } from "@opal/components";
import { toast } from "@/hooks/useToast";
import { ModalCreationInterface } from "@/refresh-components/contexts/ModalContext";
import { SvgCheckCircle, SvgServer, SvgUnplug } from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import Text from "@/refresh-components/texts/Text";

interface AddMCPServerModalProps {
  skipOverlay?: boolean;
  activeServer: MCPServer | null;
  setActiveServer: (server: MCPServer | null) => void;
  disconnectModal: ModalCreationInterface;
  manageServerModal: ModalCreationInterface;
  onServerCreated?: (server: MCPServer) => void;
  handleAuthenticate: (serverId: number) => void;
  mutateMcpServers?: () => Promise<void>;
}

const validationSchema = Yup.object().shape({
  name: Yup.string().required("Server name is required"),
  description: Yup.string(),
  server_url: Yup.string()
    .url("Must be a valid URL")
    .required("Server URL is required"),
});

export default function AddMCPServerModal({
  skipOverlay = false,
  activeServer,
  disconnectModal,
  manageServerModal,
  onServerCreated,
  handleAuthenticate,
  mutateMcpServers,
}: AddMCPServerModalProps) {
  const { isOpen, toggle } = useModal();
  const [isSubmitting, setIsSubmitting] = useState(false);

  // Use activeServer from props
  const server = activeServer;

  // Handler for disconnect button
  const handleDisconnectClick = () => {
    if (activeServer) {
      // Server stays the same, just toggle modals
      manageServerModal.toggle(false);
      disconnectModal.toggle(true);
    }
  };

  // Determine if we're in edit mode
  const isEditMode = !!server;

  const initialValues: MCPServerCreateRequest = {
    name: server?.name || "",
    description: server?.description || "",
    server_url: server?.server_url || "",
  };

  const handleSubmit = async (values: MCPServerCreateRequest) => {
    setIsSubmitting(true);

    try {
      if (isEditMode && server) {
        // Update existing server
        await updateMCPServer(server.id, values);
        toast.success("MCP Server updated successfully");
        await mutateMcpServers?.();
      } else {
        // Create new server
        const createdServer = await createMCPServer(values);

        toast.success("MCP Server created successfully");

        await mutateMcpServers?.();

        if (onServerCreated) {
          onServerCreated(createdServer);
        }
      }
      // Close modal. Do NOT clear `activeServer` here because this modal
      // frequently transitions to other modals (authenticate/disconnect), and
      // clearing would race those flows.
      toggle(false);
    } catch (error) {
      console.error(
        `Error ${isEditMode ? "updating" : "creating"} MCP server:`,
        error
      );
      toast.error(
        error instanceof Error
          ? error.message
          : `Failed to ${isEditMode ? "update" : "create"} MCP server`
      );
    } finally {
      setIsSubmitting(false);
    }
  };

  // Handle modal close to clear server state
  const handleModalClose = (open: boolean) => {
    toggle(open);
  };

  return (
    <Modal open={isOpen} onOpenChange={handleModalClose}>
      <Modal.Content
        width="sm"
        height="lg"
        preventAccidentalClose={false}
        skipOverlay={skipOverlay}
      >
        <Formik
          initialValues={initialValues}
          validationSchema={validationSchema}
          onSubmit={handleSubmit}
        >
          {({ isValid, dirty }) => (
            <Form>
              <Modal.Header
                icon={SvgServer}
                title={isEditMode ? "Manage MCP Server" : "Add MCP Server"}
                description={
                  isEditMode
                    ? "Update your MCP server configuration and manage authentication."
                    : "Connect MCP (Model Context Protocol) server to add custom actions."
                }
                onClose={() => handleModalClose(false)}
              />

              <Modal.Body>
                <InputLayouts.Vertical name="name" title="Server Name">
                  <InputTypeInField
                    name="name"
                    placeholder="Name your MCP server"
                    autoFocus
                  />
                </InputLayouts.Vertical>

                <InputLayouts.Vertical
                  name="description"
                  title="Description"
                  suffix="optional"
                >
                  <InputTextAreaField
                    name="description"
                    placeholder="More details about the MCP server"
                    rows={3}
                  />
                </InputLayouts.Vertical>

                <Separator noPadding />

                <InputLayouts.Vertical
                  name="server_url"
                  title="MCP Server URL"
                  subDescription="Only connect to servers you trust. You are responsible for actions taken with this connection and keeping your tools updated."
                >
                  <InputTypeInField
                    name="server_url"
                    placeholder="https://your-mcp-server.com/mcp"
                  />
                </InputLayouts.Vertical>

                {/* Authentication Status Section - Only show in edit mode when authenticated */}
                {isEditMode &&
                  server?.is_authenticated &&
                  server?.status === MCPServerStatus.CONNECTED && (
                    <Section
                      flexDirection="row"
                      justifyContent="between"
                      alignItems="start"
                      gap={1}
                    >
                      <Section gap={0.25} alignItems="start">
                        <Section
                          flexDirection="row"
                          gap={0.5}
                          alignItems="center"
                          width="fit"
                        >
                          <SvgCheckCircle className="w-4 h-4 stroke-status-success-05" />
                          <Text>Authenticated &amp; Connected</Text>
                        </Section>
                        <Text secondaryBody text03>
                          {server.auth_type === "OAUTH"
                            ? `OAuth connected to ${server.owner}`
                            : server.auth_type === "API_TOKEN"
                              ? "API token configured"
                              : "Connected"}
                        </Text>
                      </Section>
                      <Section
                        flexDirection="row"
                        gap={0.5}
                        alignItems="center"
                        width="fit"
                      >
                        <Button
                          icon={SvgUnplug}
                          prominence="tertiary"
                          type="button"
                          tooltip="Disconnect Server"
                          onClick={handleDisconnectClick}
                        />
                        <Button
                          prominence="secondary"
                          type="button"
                          onClick={() => {
                            // Close this modal and open the auth modal for this server
                            toggle(false);
                            handleAuthenticate(server.id);
                          }}
                        >
                          Edit Configs
                        </Button>
                      </Section>
                    </Section>
                  )}
              </Modal.Body>

              <Modal.Footer>
                <Button
                  disabled={isSubmitting}
                  prominence="secondary"
                  type="button"
                  onClick={() => handleModalClose(false)}
                >
                  Cancel
                </Button>
                <Button
                  disabled={isSubmitting || !isValid || !dirty}
                  type="submit"
                >
                  {isSubmitting
                    ? isEditMode
                      ? "Saving..."
                      : "Adding..."
                    : isEditMode
                      ? "Save Changes"
                      : "Add Server"}
                </Button>
              </Modal.Footer>
            </Form>
          )}
        </Formik>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/actions/modals/AddOpenAPIActionModal.tsx
================================================
"use client";

import { markdown } from "@opal/utils";
import Link from "next/link";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import * as InputLayouts from "@/layouts/input-layouts";
import InputTextAreaField from "@/refresh-components/form/InputTextAreaField";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import Separator from "@/refresh-components/Separator";
import { useCallback, useEffect, useMemo, useState } from "react";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import { Button } from "@opal/components";
import { Hoverable } from "@opal/core";
import { MethodSpec, ToolSnapshot } from "@/lib/tools/interfaces";
import {
  validateToolDefinition,
  createCustomTool,
  updateCustomTool,
} from "@/lib/tools/openApiService";
import ToolItem from "@/sections/actions/ToolItem";
import debounce from "lodash/debounce";
import { DOCS_ADMINS_PATH } from "@/lib/constants";
import { useModal } from "@/refresh-components/contexts/ModalContext";
import { Formik, Form, useFormikContext } from "formik";
import * as Yup from "yup";
import { toast } from "@/hooks/useToast";
import {
  SvgActions,
  SvgBracketCurly,
  SvgCheckCircle,
  SvgAlertCircle,
  SvgUnplug,
} from "@opal/icons";
import InfoBlock from "@/refresh-components/messages/InfoBlock";
import { getActionIcon } from "@/lib/tools/mcpUtils";
import { Section } from "@/layouts/general-layouts";
import EmptyMessage from "@/refresh-components/EmptyMessage";

interface AddOpenAPIActionModalProps {
  skipOverlay?: boolean;
  onSuccess?: (tool: ToolSnapshot) => void;
  onUpdate?: (tool: ToolSnapshot) => void;
  existingTool?: ToolSnapshot | null;
  onClose?: () => void;
  onEditAuthentication?: (tool: ToolSnapshot) => void;
  onDisconnectTool?: (tool: ToolSnapshot) => Promise<void> | void;
}

interface OpenAPIActionFormValues {
  definition: string;
}

const validationSchema = Yup.object().shape({
  definition: Yup.string().required("OpenAPI schema definition is required"),
});

function parseJsonWithTrailingCommas(jsonString: string) {
  // Regular expression to remove trailing commas before } or ]
  let cleanedJsonString = jsonString.replace(/,\s*([}\]])/g, "$1");
  // Replace True with true, False with false, and None with null
  cleanedJsonString = cleanedJsonString
    .replace(/\bTrue\b/g, "true")
    .replace(/\bFalse\b/g, "false")
    .replace(/\bNone\b/g, "null");
  // Now parse the cleaned JSON string
  return JSON.parse(cleanedJsonString);
}

function prettifyDefinition(definition: any) {
  return JSON.stringify(definition, null, 2);
}

interface FormContentProps {
  handleClose: () => void;
  existingTool: ToolSnapshot | null;
  onEditAuthentication?: (tool: ToolSnapshot) => void;
  onDisconnectTool?: (tool: ToolSnapshot) => Promise<void> | void;
}

function FormContent({
  handleClose,
  existingTool,
  onEditAuthentication,
  onDisconnectTool,
}: FormContentProps) {
  const { values, setFieldValue, setFieldError, dirty, isSubmitting } =
    useFormikContext<OpenAPIActionFormValues>();

  const [methodSpecs, setMethodSpecs] = useState<MethodSpec[] | null>(null);
  const [name, setName] = useState<string | null>(null);
  const [description, setDescription] = useState<string | undefined>(undefined);
  const [url, setUrl] = useState<string | undefined>(undefined);

  const isEditMode = Boolean(existingTool);

  const handleFormat = useCallback(() => {
    if (!values.definition.trim()) {
      return;
    }

    try {
      const formatted = prettifyDefinition(
        parseJsonWithTrailingCommas(values.definition)
      );
      setFieldValue("definition", formatted);
      setFieldError("definition", "");
    } catch {
      setFieldError("definition", "Invalid JSON format");
    }
  }, [values.definition, setFieldValue, setFieldError]);

  const validateDefinition = useCallback(
    async (
      rawDefinition: string,
      setFieldError: (field: string, message: string) => void
    ) => {
      if (!rawDefinition.trim()) {
        setMethodSpecs(null);
        setFieldError("definition", "");
        return;
      }

      try {
        const parsedDefinition = parseJsonWithTrailingCommas(rawDefinition);
        const derivedName = parsedDefinition?.info?.title;
        const derivedDescription = parsedDefinition?.info?.description;
        const derivedUrl = parsedDefinition?.servers?.[0]?.url;

        setName(derivedName);
        setDescription(derivedDescription);
        setUrl(derivedUrl);

        const response = await validateToolDefinition({
          definition: parsedDefinition,
        });

        if (response.error) {
          setMethodSpecs(null);
          setFieldError("definition", response.error);
        } else {
          setMethodSpecs(response.data ?? []);
          setFieldError("definition", "");
        }
      } catch {
        setMethodSpecs(null);
        setFieldError("definition", "Invalid JSON format");
      }
    },
    []
  );

  const debouncedValidateDefinition = useMemo(
    () => debounce(validateDefinition, 300),
    [validateDefinition]
  );

  const modalTitle = isEditMode ? "Edit OpenAPI action" : "Add OpenAPI action";
  const modalDescription = isEditMode
    ? "Update the OpenAPI schema for this action."
    : "Add OpenAPI schema to add custom actions.";
  const primaryButtonLabel = isSubmitting
    ? isEditMode
      ? "Saving..."
      : "Adding..."
    : isEditMode
      ? "Save Changes"
      : "Add Action";

  const hasOAuthConfig = Boolean(existingTool?.oauth_config_id);
  const hasCustomHeaders =
    Array.isArray(existingTool?.custom_headers) &&
    (existingTool?.custom_headers?.length ?? 0) > 0;
  const hasPassthroughAuth = Boolean(existingTool?.passthrough_auth);
  const hasAuthenticationConfigured =
    hasOAuthConfig || hasCustomHeaders || hasPassthroughAuth;
  const authenticationDescription = useMemo(() => {
    if (!existingTool) {
      return "";
    }
    if (hasOAuthConfig) {
      return existingTool.oauth_config_name
        ? `OAuth connected via ${existingTool.oauth_config_name}`
        : "OAuth authentication configured";
    }
    if (hasCustomHeaders) {
      return "Custom authentication headers configured";
    }
    if (hasPassthroughAuth) {
      return "Passthrough authentication enabled";
    }
    return "";
  }, [existingTool, hasOAuthConfig, hasCustomHeaders, hasPassthroughAuth]);

  const showAuthenticationStatus = Boolean(
    isEditMode && existingTool?.enabled && hasAuthenticationConfigured
  );

  const handleEditAuthenticationClick = useCallback(() => {
    if (!existingTool || !onEditAuthentication) {
      return;
    }
    handleClose();
    onEditAuthentication(existingTool);
  }, [existingTool, onEditAuthentication, handleClose]);

  useEffect(() => {
    if (!values.definition.trim()) {
      setMethodSpecs(null);
      setFieldError("definition", "");
      debouncedValidateDefinition.cancel();
      return () => {
        debouncedValidateDefinition.cancel();
      };
    }

    debouncedValidateDefinition(values.definition, setFieldError);

    return () => {
      debouncedValidateDefinition.cancel();
    };
  }, [
    values.definition,
    debouncedValidateDefinition,
    setFieldError,
    setMethodSpecs,
  ]);

  return (
    <Form>
      <Modal.Header
        icon={SvgActions}
        title={modalTitle}
        description={modalDescription}
        onClose={handleClose}
      />

      <Modal.Body>
        <InputLayouts.Vertical
          name="definition"
          title="OpenAPI Schema Definition"
          subDescription={markdown(
            `Specify an OpenAPI schema that defines the APIs you want to make available as part of this action. Learn more about [OpenAPI actions](${DOCS_ADMINS_PATH}/actions/openapi).`
          )}
        >
          <Hoverable.Root group="definitionField" widthVariant="full">
            <div className="relative w-full">
              {values.definition.trim() && (
                <div className="absolute z-[100000] top-2 right-2 bg-background-tint-00">
                  <Hoverable.Item
                    group="definitionField"
                    variant="opacity-on-hover"
                  >
                    <div className="flex">
                      <CopyIconButton
                        prominence="tertiary"
                        size="sm"
                        getCopyText={() => values.definition}
                        tooltip="Copy definition"
                      />
                      <Button
                        prominence="tertiary"
                        size="sm"
                        icon={SvgBracketCurly}
                        tooltip="Format definition"
                        onClick={handleFormat}
                      />
                    </div>
                  </Hoverable.Item>
                </div>
              )}
              <InputTextAreaField
                name="definition"
                rows={14}
                placeholder="Enter your OpenAPI schema here"
                className="font-main-ui-mono"
              />
            </div>
          </Hoverable.Root>
        </InputLayouts.Vertical>

        <Separator noPadding />

        {methodSpecs && methodSpecs.length > 0 ? (
          <>
            {name && (
              <InfoBlock
                icon={getActionIcon(url || "", name || "")}
                title={name}
                description={description}
              />
            )}
            {url && (
              <InfoBlock
                icon={SvgAlertCircle}
                title={url || ""}
                description="URL found in the schema. Only connect to servers you trust."
              />
            )}
            <Separator noPadding />
            <Section gap={0.5}>
              {methodSpecs.map((method) => (
                <ToolItem
                  key={`${method.method}-${method.path}-${method.name}`}
                  name={method.name}
                  description={method.summary || "No summary provided"}
                  variant="openapi"
                  openApiMetadata={{
                    method: method.method,
                    path: method.path,
                  }}
                />
              ))}
            </Section>
          </>
        ) : (
          <EmptyMessage
            title="No Actions Found"
            icon={SvgActions}
            description="Provide OpenAPI schema to preview actions here."
          />
        )}

        {showAuthenticationStatus && (
          <Section
            flexDirection="row"
            justifyContent="between"
            alignItems="start"
            gap={1}
          >
            <Section gap={0.25} alignItems="start">
              <Section
                flexDirection="row"
                gap={0.5}
                alignItems="center"
                width="fit"
              >
                <SvgCheckCircle className="w-4 h-4 stroke-status-success-05" />
                <Text>
                  {existingTool?.enabled
                    ? "Authenticated & Enabled"
                    : "Authentication configured"}
                </Text>
              </Section>
              {authenticationDescription && (
                <Text secondaryBody text03 className="pl-5">
                  {authenticationDescription}
                </Text>
              )}
            </Section>
            <Section
              flexDirection="row"
              gap={0.5}
              alignItems="center"
              width="fit"
            >
              <Button
                icon={SvgUnplug}
                prominence="tertiary"
                type="button"
                tooltip="Disable action"
                onClick={() => {
                  if (!existingTool || !onDisconnectTool) {
                    return;
                  }
                  onDisconnectTool(existingTool);
                }}
              />
              <Button
                disabled={!onEditAuthentication}
                prominence="secondary"
                type="button"
                onClick={handleEditAuthenticationClick}
              >
                Edit Configs
              </Button>
            </Section>
          </Section>
        )}
      </Modal.Body>

      <Modal.Footer>
        <Button
          disabled={isSubmitting}
          prominence="secondary"
          type="button"
          onClick={handleClose}
        >
          Cancel
        </Button>
        <Button disabled={isSubmitting || !dirty} type="submit">
          {primaryButtonLabel}
        </Button>
      </Modal.Footer>
    </Form>
  );
}

export default function AddOpenAPIActionModal({
  skipOverlay = false,
  onSuccess,
  onUpdate,
  existingTool = null,
  onClose,
  onEditAuthentication,
  onDisconnectTool,
}: AddOpenAPIActionModalProps) {
  const { isOpen, toggle } = useModal();

  const handleModalClose = useCallback(
    (open: boolean) => {
      toggle(open);
      if (!open) {
        onClose?.();
      }
    },
    [toggle, onClose]
  );

  const handleClose = useCallback(() => {
    handleModalClose(false);
  }, [handleModalClose]);

  const initialValues: OpenAPIActionFormValues = useMemo(
    () => ({
      definition: existingTool?.definition
        ? prettifyDefinition(existingTool.definition)
        : "",
    }),
    [existingTool]
  );

  const handleSubmit = async (values: OpenAPIActionFormValues) => {
    let parsedDefinition;
    try {
      parsedDefinition = parseJsonWithTrailingCommas(values.definition);
    } catch (error) {
      console.error("Error parsing OpenAPI definition:", error);
      toast.error("Invalid JSON format in OpenAPI schema definition");
      return;
    }

    const derivedName = parsedDefinition?.info?.title;
    const derivedDescription = parsedDefinition?.info?.description;

    if (existingTool) {
      try {
        const updatePayload: {
          name?: string;
          description?: string;
          definition: Record<string, any>;
          custom_headers?: { key: string; value: string }[];
          passthrough_auth?: boolean;
          oauth_config_id?: number | null;
        } = {
          definition: parsedDefinition,
          custom_headers: existingTool.custom_headers,
          passthrough_auth: existingTool.passthrough_auth,
          oauth_config_id: existingTool.oauth_config_id,
        };

        if (derivedName) {
          updatePayload.name = derivedName;
        }

        if (derivedDescription) {
          updatePayload.description = derivedDescription;
        }

        const response = await updateCustomTool(existingTool.id, updatePayload);

        if (response.error) {
          toast.error(response.error);
        } else {
          toast.success("OpenAPI action updated successfully");
          handleClose();
          if (response.data && onUpdate) {
            onUpdate(response.data);
          }
        }
      } catch (error) {
        console.error("Error updating OpenAPI action:", error);
        toast.error("Failed to update OpenAPI action");
      }
      return;
    }

    try {
      const response = await createCustomTool({
        name: derivedName,
        description: derivedDescription || undefined,
        definition: parsedDefinition,
        custom_headers: [],
        passthrough_auth: false,
      });

      if (response.error) {
        toast.error(response.error);
      } else {
        toast.success("OpenAPI action created successfully");
        handleClose();
        if (response.data && onSuccess) {
          onSuccess(response.data);
        }
      }
    } catch (error) {
      console.error("Error creating OpenAPI action:", error);
      toast.error("Failed to create OpenAPI action");
    }
  };

  return (
    <Modal open={isOpen} onOpenChange={handleModalClose}>
      <Modal.Content width="sm" height="lg" skipOverlay={skipOverlay}>
        <Formik
          initialValues={initialValues}
          validationSchema={validationSchema}
          onSubmit={handleSubmit}
          enableReinitialize
        >
          <FormContent
            handleClose={handleClose}
            existingTool={existingTool}
            onEditAuthentication={onEditAuthentication}
            onDisconnectTool={onDisconnectTool}
          />
        </Formik>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/actions/modals/DisconnectEntityModal.tsx
================================================
"use client";

import { useRef } from "react";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import { SvgUnplug } from "@opal/icons";
interface DisconnectEntityModalProps {
  isOpen: boolean;
  onClose: () => void;
  name: string | null;
  onConfirmDisconnect: () => void;
  onConfirmDisconnectAndDelete?: () => void;
  isDisconnecting?: boolean;
  skipOverlay?: boolean;
}

export default function DisconnectEntityModal({
  isOpen,
  onClose,
  name,
  onConfirmDisconnect,
  onConfirmDisconnectAndDelete,
  isDisconnecting = false,
  skipOverlay = false,
}: DisconnectEntityModalProps) {
  const disconnectButtonRef = useRef<HTMLButtonElement>(null);

  if (!name) return null;

  return (
    <Modal
      open={isOpen}
      onOpenChange={(open) => {
        if (!open) {
          onClose();
        }
      }}
    >
      <Modal.Content
        width="sm"
        preventAccidentalClose={false}
        skipOverlay={skipOverlay}
        onOpenAutoFocus={(e) => {
          e.preventDefault();
          disconnectButtonRef.current?.focus();
        }}
      >
        <Modal.Header
          icon={({ className }) => (
            <SvgUnplug className={cn(className, "stroke-action-danger-05")} />
          )}
          title={`Disconnect ${name}`}
          onClose={onClose}
        />

        <Modal.Body>
          <Text as="p" text03 mainUiBody>
            All tools connected to {name} will stop working. You can reconnect
            to this server later if needed.
          </Text>
          <Text as="p" text03 mainUiBody>
            Are you sure you want to proceed?
          </Text>
        </Modal.Body>

        <Modal.Footer>
          <Button
            disabled={isDisconnecting}
            prominence="secondary"
            onClick={onClose}
          >
            Cancel
          </Button>
          {onConfirmDisconnectAndDelete && (
            <Button
              disabled={isDisconnecting}
              variant="danger"
              prominence="secondary"
              onClick={onConfirmDisconnectAndDelete}
            >
              Disconnect &amp; Delete
            </Button>
          )}
          <Button
            disabled={isDisconnecting}
            variant="danger"
            onClick={onConfirmDisconnect}
            ref={disconnectButtonRef}
          >
            {isDisconnecting ? "Disconnecting..." : "Disconnect"}
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/actions/modals/MCPAuthenticationModal.tsx
================================================
"use client";

import { useState, useMemo, useEffect } from "react";
import useSWR, { KeyedMutator } from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { errorHandlingFetcher } from "@/lib/fetcher";
import Modal from "@/refresh-components/Modal";
import { FormField } from "@/refresh-components/form/FormField";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import PasswordInputTypeIn from "@/refresh-components/inputs/PasswordInputTypeIn";
import { Button } from "@opal/components";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import Text from "@/refresh-components/texts/Text";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import { useModal } from "@/refresh-components/contexts/ModalContext";
import {
  MCPAuthenticationPerformer,
  MCPAuthenticationType,
  MCPTransportType,
  MCPServerStatus,
  MCPServer,
  MCPServersResponse,
} from "@/lib/tools/interfaces";
import Separator from "@/refresh-components/Separator";
import Tabs from "@/refresh-components/Tabs";
import { PerUserAuthConfig } from "@/sections/actions/PerUserAuthConfig";
import { updateMCPServerStatus, upsertMCPServer } from "@/lib/tools/mcpService";
import Message from "@/refresh-components/messages/Message";
import { toast } from "@/hooks/useToast";
import { SvgArrowExchange } from "@opal/icons";
import { useAuthType } from "@/lib/hooks";
import { AuthType } from "@/lib/constants";

interface MCPAuthenticationModalProps {
  mcpServer: MCPServer | null;
  skipOverlay?: boolean;
  onTriggerFetchTools?: (serverId: number) => Promise<void> | void;
  mutateMcpServers: KeyedMutator<MCPServersResponse>;
}

interface MCPAuthTemplate {
  headers: Record<string, string>;
  required_fields: string[];
}

export interface MCPAuthFormValues {
  transport: MCPTransportType;
  auth_type: MCPAuthenticationType;
  auth_performer: MCPAuthenticationPerformer;
  api_token: string;
  auth_template: MCPAuthTemplate;
  user_credentials: Record<string, string>;
  oauth_client_id: string;
  oauth_client_secret: string;
}

const validationSchema = Yup.object().shape({
  transport: Yup.string()
    .oneOf([MCPTransportType.STREAMABLE_HTTP, MCPTransportType.SSE])
    .required("Transport is required"),
  auth_type: Yup.string()
    .oneOf([
      MCPAuthenticationType.NONE,
      MCPAuthenticationType.API_TOKEN,
      MCPAuthenticationType.OAUTH,
      MCPAuthenticationType.PT_OAUTH,
    ])
    .required("Authentication type is required"),
  auth_performer: Yup.string().when("auth_type", {
    is: (auth_type: string) => auth_type !== MCPAuthenticationType.NONE,
    then: (schema) =>
      schema
        .oneOf([
          MCPAuthenticationPerformer.ADMIN,
          MCPAuthenticationPerformer.PER_USER,
        ])
        .required("Authentication performer is required"),
    otherwise: (schema) => schema.notRequired(),
  }),
  api_token: Yup.string().when(["auth_type", "auth_performer"], {
    is: (auth_type: string, auth_performer: string) =>
      auth_type === MCPAuthenticationType.API_TOKEN &&
      auth_performer === MCPAuthenticationPerformer.ADMIN,
    then: (schema) => schema.required("API token is required"),
    otherwise: (schema) => schema.notRequired(),
  }),
  oauth_client_id: Yup.string().when("auth_type", {
    is: MCPAuthenticationType.OAUTH,
    then: (schema) => schema.notRequired(),
    otherwise: (schema) => schema.notRequired(),
  }),
  oauth_client_secret: Yup.string().when("auth_type", {
    is: MCPAuthenticationType.OAUTH,
    then: (schema) => schema.notRequired(),
    otherwise: (schema) => schema.notRequired(),
  }),
});

export default function MCPAuthenticationModal({
  mcpServer,
  skipOverlay = false,
  onTriggerFetchTools,
  mutateMcpServers,
}: MCPAuthenticationModalProps) {
  const { isOpen, toggle } = useModal();
  const [activeAuthTab, setActiveAuthTab] = useState<"per-user" | "admin">(
    "per-user"
  );
  const [isSubmitting, setIsSubmitting] = useState(false);

  // Check if OAuth is enabled for the Onyx instance
  const authType = useAuthType();
  const isOAuthEnabled =
    authType === AuthType.OIDC || authType === AuthType.GOOGLE_OAUTH;

  const redirectUri = useMemo(() => {
    if (typeof window === "undefined") {
      return "https://{YOUR_DOMAIN}/mcp/oauth/callback";
    }
    return `${window.location.origin}/mcp/oauth/callback`;
  }, []);

  // Get the current frontend URL for redirect URI
  const { data: fullServer } = useSWR<MCPServer>(
    mcpServer ? SWR_KEYS.adminMcpServer(mcpServer.id) : null,
    errorHandlingFetcher
  );

  // Set the initial active tab based on the server configuration
  useEffect(() => {
    if (fullServer) {
      if (
        fullServer.auth_performer === MCPAuthenticationPerformer.ADMIN ||
        fullServer.auth_type === MCPAuthenticationType.NONE
      ) {
        setActiveAuthTab("admin");
      } else {
        setActiveAuthTab("per-user");
      }
    }
  }, [fullServer]);

  // Helper function to determine transport from URL
  const getTransportFromUrl = (url: string): MCPTransportType => {
    const lowerUrl = url.toLowerCase();
    if (lowerUrl.endsWith("sse")) {
      return MCPTransportType.SSE;
    } else if (lowerUrl.endsWith("mcp")) {
      return MCPTransportType.STREAMABLE_HTTP;
    }
    // Default to STREAMABLE_HTTP
    return MCPTransportType.STREAMABLE_HTTP;
  };

  const initialValues = useMemo<MCPAuthFormValues>(() => {
    if (!fullServer) {
      return {
        transport: mcpServer?.server_url
          ? getTransportFromUrl(mcpServer.server_url)
          : MCPTransportType.STREAMABLE_HTTP,
        auth_type: MCPAuthenticationType.OAUTH,
        auth_performer: MCPAuthenticationPerformer.PER_USER,
        api_token: "",
        auth_template: {
          headers: { Authorization: "Bearer {api_key}" },
          required_fields: ["api_key"],
        },
        user_credentials: {},
        oauth_client_id: "",
        oauth_client_secret: "",
      };
    }

    return {
      transport: fullServer.server_url
        ? getTransportFromUrl(fullServer.server_url)
        : (fullServer.transport as MCPTransportType) ||
          MCPTransportType.STREAMABLE_HTTP,
      auth_type:
        (fullServer.auth_type as MCPAuthenticationType) ||
        MCPAuthenticationType.OAUTH,
      auth_performer:
        (fullServer.auth_performer as MCPAuthenticationPerformer) ||
        MCPAuthenticationPerformer.PER_USER,
      // Admin API Token
      api_token: fullServer.admin_credentials?.api_key || "",
      // OAuth Credentials
      oauth_client_id: fullServer.admin_credentials?.client_id || "",
      oauth_client_secret: fullServer.admin_credentials?.client_secret || "",
      // Auth Template
      auth_template: (fullServer.auth_template as MCPAuthTemplate) || {
        headers: { Authorization: "Bearer {api_key}" },
        required_fields: ["api_key"],
      },
      // User Credentials (substitutions)
      user_credentials:
        (fullServer.user_credentials as Record<string, string>) || {},
    };
  }, [fullServer, mcpServer?.server_url]);

  const constructServerData = (values: MCPAuthFormValues) => {
    if (!mcpServer) return null;
    const authType = values.auth_type;

    return {
      name: mcpServer.name,
      description: mcpServer.description || undefined,
      server_url: mcpServer.server_url,
      transport: values.transport,
      auth_type: values.auth_type,
      auth_performer: values.auth_performer,
      api_token:
        authType === MCPAuthenticationType.API_TOKEN &&
        values.auth_performer === MCPAuthenticationPerformer.ADMIN
          ? values.api_token
          : undefined,
      auth_template:
        values.auth_performer === MCPAuthenticationPerformer.PER_USER &&
        authType === MCPAuthenticationType.API_TOKEN
          ? values.auth_template
          : undefined,
      admin_credentials:
        values.auth_performer === MCPAuthenticationPerformer.PER_USER &&
        authType === MCPAuthenticationType.API_TOKEN
          ? values.user_credentials || {}
          : undefined,
      oauth_client_id:
        authType === MCPAuthenticationType.OAUTH
          ? values.oauth_client_id
          : undefined,
      oauth_client_secret:
        authType === MCPAuthenticationType.OAUTH
          ? values.oauth_client_secret
          : undefined,
      existing_server_id: mcpServer.id,
    };
  };

  const handleSubmit = async (values: MCPAuthFormValues) => {
    const serverData = constructServerData(values);
    if (!serverData || !mcpServer) return;

    setIsSubmitting(true);

    try {
      const authType = values.auth_type;
      // Step 1: Save the authentication configuration to the MCP server
      const { data: serverResult, error: serverError } =
        await upsertMCPServer(serverData);

      if (serverError || !serverResult) {
        throw new Error(serverError || "Failed to save server configuration");
      }

      // Step 2: Update status to AWAITING_AUTH after successful config save
      if (authType === MCPAuthenticationType.OAUTH) {
        await updateMCPServerStatus(
          mcpServer.id,
          MCPServerStatus.AWAITING_AUTH
        );
      }

      // Step 3: For OAuth, initiate the OAuth flow
      if (authType === MCPAuthenticationType.OAUTH) {
        const oauthResponse = await fetch("/api/admin/mcp/oauth/connect", {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify({
            server_id: mcpServer.id.toString(),
            oauth_client_id: values.oauth_client_id,
            oauth_client_secret: values.oauth_client_secret,
            return_path: `/admin/actions/mcp/?server_id=${mcpServer.id}&trigger_fetch=true`,
            include_resource_param: true,
          }),
        });

        if (!oauthResponse.ok) {
          const error = await oauthResponse.json();
          // Refresh server list so latest status is visible after auth failure
          await mutateMcpServers();
          toggle(false);
          throw new Error("Failed to initiate OAuth: " + error.detail);
        }

        const { oauth_url } = await oauthResponse.json();
        window.location.href = oauth_url;
      } else {
        // For non-OAuth authentication, trigger tools fetch in-place (no hard navigation)
        if (onTriggerFetchTools) {
          onTriggerFetchTools(mcpServer.id);
        } else {
          // Fallback to previous behavior if parent didn't provide handler
          window.location.href = `/admin/actions/mcp/?server_id=${mcpServer.id}&trigger_fetch=true`;
        }
        toggle(false);
      }
    } catch (error) {
      console.error("Error saving authentication:", error);
      // Ensure UI reflects latest status after any auth/config failure
      await mutateMcpServers();
      toast.error(
        error instanceof Error
          ? error.message
          : "Failed to save authentication configuration"
      );
    } finally {
      setIsSubmitting(false);
    }
  };

  return (
    <Modal open={isOpen} onOpenChange={toggle}>
      <Modal.Content width="sm" height="lg" skipOverlay={skipOverlay}>
        <Modal.Header
          icon={SvgArrowExchange}
          title={`Authenticate ${mcpServer?.name || "MCP Server"}`}
          description="Authenticate your connection to start using the MCP server."
        />

        <Formik<MCPAuthFormValues>
          initialValues={initialValues}
          validationSchema={validationSchema}
          onSubmit={handleSubmit}
          enableReinitialize
        >
          {({
            values,
            handleChange,
            setFieldValue,
            errors,
            touched,
            isValid,
            dirty,
          }) => {
            // Auto-populate transport based on URL
            useEffect(() => {
              if (mcpServer?.server_url) {
                const transport = getTransportFromUrl(mcpServer.server_url);
                setFieldValue("transport", transport);
              }
            }, [mcpServer?.server_url, setFieldValue]);

            return (
              <Form className="flex flex-col h-full">
                <Modal.Body>
                  <div className="flex flex-col gap-4 p-2">
                    {/* Authentication Type */}
                    <FormField
                      name="auth_type"
                      state={
                        errors.auth_type && touched.auth_type
                          ? "error"
                          : touched.auth_type
                            ? "success"
                            : "idle"
                      }
                    >
                      <FormField.Label>Authentication Method</FormField.Label>
                      <FormField.Control asChild>
                        <InputSelect
                          value={values.auth_type}
                          onValueChange={(value) => {
                            setFieldValue("auth_type", value);
                            // For OAuth + OAuth pass-through, we only support per-user auth
                            if (
                              value === MCPAuthenticationType.OAUTH ||
                              value === MCPAuthenticationType.PT_OAUTH
                            ) {
                              setFieldValue(
                                "auth_performer",
                                MCPAuthenticationPerformer.PER_USER
                              );
                            } else if (
                              value === MCPAuthenticationType.API_TOKEN
                            ) {
                              // Keep auth_performer in sync with the selected API token tab
                              setFieldValue(
                                "auth_performer",
                                activeAuthTab === "admin"
                                  ? MCPAuthenticationPerformer.ADMIN
                                  : MCPAuthenticationPerformer.PER_USER
                              );
                            }
                          }}
                        >
                          <InputSelect.Trigger
                            placeholder="Select method"
                            data-testid="mcp-auth-method-select"
                          />
                          <InputSelect.Content>
                            <InputSelect.Item
                              value={MCPAuthenticationType.OAUTH}
                              description="Each user need to authenticate via OAuth with their own credentials."
                            >
                              OAuth
                            </InputSelect.Item>
                            {isOAuthEnabled && (
                              <InputSelect.Item
                                value={MCPAuthenticationType.PT_OAUTH}
                                description="Forward the user's OAuth access token used to authenticate Onyx."
                              >
                                OAuth Pass-through
                              </InputSelect.Item>
                            )}
                            <InputSelect.Item
                              value={MCPAuthenticationType.API_TOKEN}
                              description="Use per-user individual API key or organization-wide shared API key."
                            >
                              API Key
                            </InputSelect.Item>
                            <InputSelect.Item
                              value={MCPAuthenticationType.NONE}
                              description="Not Recommended"
                            >
                              None
                            </InputSelect.Item>
                          </InputSelect.Content>
                        </InputSelect>
                      </FormField.Control>
                      <FormField.Message
                        messages={{
                          error: errors.auth_type,
                        }}
                      />
                    </FormField>
                    <Separator className="py-0" />
                  </div>

                  {/* OAuth Section */}
                  {values.auth_type === MCPAuthenticationType.OAUTH && (
                    <div className="flex flex-col gap-4 px-2 py-2 bg-background-tint-00 rounded-12">
                      {/* OAuth Client ID */}
                      <FormField
                        name="oauth_client_id"
                        state={
                          errors.oauth_client_id && touched.oauth_client_id
                            ? "error"
                            : touched.oauth_client_id
                              ? "success"
                              : "idle"
                        }
                      >
                        <FormField.Label optional>Client ID</FormField.Label>
                        <FormField.Control asChild>
                          <InputTypeIn
                            name="oauth_client_id"
                            value={values.oauth_client_id}
                            onChange={handleChange}
                            placeholder=" "
                            showClearButton={false}
                          />
                        </FormField.Control>
                        <FormField.Message
                          messages={{
                            error: errors.oauth_client_id,
                          }}
                        />
                      </FormField>
                      {/* OAuth Client Secret */}
                      <FormField
                        name="oauth_client_secret"
                        state={
                          errors.oauth_client_secret &&
                          touched.oauth_client_secret
                            ? "error"
                            : touched.oauth_client_secret
                              ? "success"
                              : "idle"
                        }
                      >
                        <FormField.Label optional>
                          Client Secret
                        </FormField.Label>
                        <FormField.Control asChild>
                          <PasswordInputTypeIn
                            name="oauth_client_secret"
                            value={values.oauth_client_secret}
                            onChange={handleChange}
                            placeholder=" "
                            showClearButton={false}
                          />
                        </FormField.Control>
                        <FormField.Message
                          messages={{
                            error: errors.oauth_client_secret,
                          }}
                        />
                      </FormField>

                      {/* Info Text */}
                      <div className="flex flex-col gap-2">
                        <Text as="p" text03 secondaryBody>
                          Client ID and secret are optional if the server
                          connection supports Dynamic Client Registration (DCR).
                        </Text>
                        <Text as="p" text03 secondaryBody>
                          If your server does not support DCR, you need register
                          your Onyx instance with the server provider to obtain
                          these credentials first. Make sure to grant Onyx
                          necessary scopes/permissions for your actions.
                        </Text>

                        {/* Redirect URI */}
                        <div className="flex items-center gap-1 w-full">
                          <Text
                            as="p"
                            text03
                            secondaryBody
                            className="whitespace-nowrap"
                          >
                            Use{" "}
                            <span className="font-secondary-action">
                              redirect URI
                            </span>
                            :
                          </Text>
                          <Text
                            as="p"
                            text04
                            className="font-mono text-[12px] leading-[16px] truncate"
                          >
                            {redirectUri}
                          </Text>
                          <CopyIconButton
                            getCopyText={() => redirectUri}
                            tooltip="Copy redirect URI"
                            prominence="tertiary"
                            size="sm"
                          />
                        </div>
                      </div>
                    </div>
                  )}

                  {/* API Key Section with Tabs */}
                  {values.auth_type === MCPAuthenticationType.API_TOKEN && (
                    <div className="flex flex-col gap-4 px-2 py-2 bg-background-tint-00 rounded-12">
                      <Tabs
                        value={activeAuthTab}
                        onValueChange={(value) => {
                          setActiveAuthTab(value as "per-user" | "admin");
                          // Update auth_performer based on tab selection
                          setFieldValue(
                            "auth_performer",
                            value === "per-user"
                              ? MCPAuthenticationPerformer.PER_USER
                              : MCPAuthenticationPerformer.ADMIN
                          );
                        }}
                      >
                        <Tabs.List>
                          <Tabs.Trigger value="per-user">
                            Individual Key (Per User)
                          </Tabs.Trigger>
                          <Tabs.Trigger value="admin">
                            Shared Key (Admin)
                          </Tabs.Trigger>
                        </Tabs.List>

                        {/* Per-user Tab Content */}
                        <Tabs.Content value="per-user">
                          <PerUserAuthConfig
                            values={values}
                            setFieldValue={setFieldValue}
                          />
                        </Tabs.Content>

                        {/* Admin Tab Content */}
                        <Tabs.Content value="admin">
                          <div className="flex flex-col gap-4 px-2 py-2 bg-background-tint-00 rounded-12">
                            <FormField
                              name="api_token"
                              state={
                                errors.api_token && touched.api_token
                                  ? "error"
                                  : touched.api_token
                                    ? "success"
                                    : "idle"
                              }
                            >
                              <FormField.Label>API Key</FormField.Label>
                              <FormField.Control asChild>
                                <PasswordInputTypeIn
                                  name="api_token"
                                  value={values.api_token}
                                  onChange={handleChange}
                                  placeholder="Shared API key for your organization"
                                  showClearButton={false}
                                />
                              </FormField.Control>
                              <FormField.Description>
                                Do not use your personal API key. Make sure this
                                key is appropriate to share with everyone in
                                your organization.
                              </FormField.Description>
                              <FormField.Message
                                messages={{
                                  error: errors.api_token,
                                }}
                              />
                            </FormField>
                          </div>
                        </Tabs.Content>
                      </Tabs>
                    </div>
                  )}
                  {values.auth_type === MCPAuthenticationType.NONE && (
                    <Message
                      text="No authentication for this MCP server"
                      description="No authentication will be used for this connection. Make sure you trust this server. You are responsible for actions taken with this connection."
                      default
                      medium
                      static
                      className="w-full"
                      close={false}
                    />
                  )}
                  {values.auth_type === MCPAuthenticationType.PT_OAUTH && (
                    <Message
                      text="Use pass-through for services with shared identity provider."
                      description="Onyx will forward the user's OAuth access token directly to the server as an Authorization header. Make sure the server supports authentication with the same provider."
                      default
                      medium
                      static
                      className="w-full"
                      close={false}
                    />
                  )}
                </Modal.Body>

                <Modal.Footer>
                  <Button
                    prominence="tertiary"
                    type="button"
                    onClick={() => toggle(false)}
                  >
                    Cancel
                  </Button>
                  <Button
                    disabled={!isValid || isSubmitting}
                    type="submit"
                    data-testid="mcp-auth-connect-button"
                  >
                    {isSubmitting ? "Connecting..." : "Connect"}
                  </Button>
                </Modal.Footer>
              </Form>
            );
          }}
        </Formik>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/actions/modals/OpenAPIAuthenticationModal.tsx
================================================
"use client";

import React, { useCallback, useEffect, useMemo, useState } from "react";
import { Formik, Form, FormikHelpers } from "formik";
import * as Yup from "yup";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import PasswordInputTypeIn from "@/refresh-components/inputs/PasswordInputTypeIn";
import { FormField } from "@/refresh-components/form/FormField";
import Separator from "@/refresh-components/Separator";
import Text from "@/refresh-components/texts/Text";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import KeyValueInput, {
  KeyValue,
} from "@/refresh-components/inputs/InputKeyValue";
import { OAuthConfig } from "@/lib/tools/interfaces";
import { getOAuthConfig } from "@/lib/oauth/api";
import { SvgArrowExchange } from "@opal/icons";
import { useAuthType } from "@/lib/hooks";
import { AuthType } from "@/lib/constants";
import Message from "@/refresh-components/messages/Message";

export type AuthMethod = "oauth" | "custom-header" | "pt-oauth";

export interface OpenAPIAuthFormValues {
  authMethod: AuthMethod;
  authorizationUrl: string;
  tokenUrl: string;
  clientId: string;
  clientSecret: string;
  scopes: string;
  headers: KeyValue[];
}

interface OpenAPIAuthenticationModalProps {
  isOpen: boolean;
  onClose: () => void;
  title: string;
  description?: string;
  skipOverlay?: boolean;
  defaultMethod?: AuthMethod;
  oauthConfigId?: number | null;
  initialHeaders?: KeyValue[] | null;
  onConnect?: (values: OpenAPIAuthFormValues) => Promise<void> | void;
  onSkip?: () => void;
  entityName?: string | null;
  passthroughOAuthEnabled?: boolean;
}

const MASKED_CREDENTIAL_VALUE = "********";

const defaultValues: OpenAPIAuthFormValues = {
  authMethod: "oauth",
  authorizationUrl: "",
  tokenUrl: "",
  clientId: "",
  clientSecret: "",
  scopes: "",
  headers: [
    {
      key: "Authorization",
      value: "",
    },
  ],
};

export default function OpenAPIAuthenticationModal({
  isOpen,
  onClose,
  title,
  description = "Authenticate your connection to start using the OpenAPI actions.",
  skipOverlay = false,
  defaultMethod = "oauth",
  oauthConfigId = null,
  initialHeaders = null,
  passthroughOAuthEnabled = false,
  onConnect,
  onSkip,
  entityName = null,
}: OpenAPIAuthenticationModalProps) {
  const authType = useAuthType();
  const isOAuthEnabled =
    authType === AuthType.OIDC || authType === AuthType.GOOGLE_OAUTH;
  const [existingOAuthConfig, setExistingOAuthConfig] =
    useState<OAuthConfig | null>(null);
  const [isLoadingOAuthConfig, setIsLoadingOAuthConfig] = useState(false);
  const [oauthConfigError, setOAuthConfigError] = useState<string | null>(null);

  const isEditingOAuthConfig = Boolean(oauthConfigId);
  const hasInitialHeaders =
    Array.isArray(initialHeaders) && initialHeaders.length > 0;
  const isEditMode = isEditingOAuthConfig || hasInitialHeaders;
  const shouldDisableForm =
    isEditingOAuthConfig &&
    isLoadingOAuthConfig &&
    !existingOAuthConfig &&
    !oauthConfigError;

  const redirectUri = useMemo(() => {
    if (typeof window === "undefined") {
      return "https://{YOUR_DOMAIN}/oauth-config/callback";
    }
    return `${window.location.origin}/oauth-config/callback`;
  }, []);

  useEffect(() => {
    let isActive = true;

    if (!isOpen || !oauthConfigId) {
      setExistingOAuthConfig(null);
      setOAuthConfigError(null);
      setIsLoadingOAuthConfig(false);
      return () => {
        isActive = false;
      };
    }

    const fetchConfig = async () => {
      setIsLoadingOAuthConfig(true);
      setOAuthConfigError(null);
      try {
        const config = await getOAuthConfig(oauthConfigId);
        if (!isActive) {
          return;
        }
        setExistingOAuthConfig(config);
      } catch (error) {
        console.error("Failed to load OAuth configuration", error);
        if (isActive) {
          setExistingOAuthConfig(null);
          setOAuthConfigError(
            "Failed to load existing OAuth configuration. Re-enter the details to update it."
          );
        }
      } finally {
        if (isActive) {
          setIsLoadingOAuthConfig(false);
        }
      }
    };

    fetchConfig();

    return () => {
      isActive = false;
    };
  }, [isOpen, oauthConfigId]);

  const dynamicValidationSchema = useMemo(
    () =>
      Yup.object({
        authMethod: Yup.mixed<AuthMethod>()
          .oneOf(["oauth", "pt-oauth", "custom-header"])
          .required("Authentication method is required"),
        authorizationUrl: Yup.string()
          .url("Enter a valid URL")
          .when("authMethod", {
            is: "oauth",
            then: (schema) => schema.required("Authorization URL is required"),
            otherwise: (schema) => schema.notRequired(),
          }),
        tokenUrl: Yup.string()
          .url("Enter a valid URL")
          .when("authMethod", {
            is: "oauth",
            then: (schema) => schema.required("Token URL is required"),
            otherwise: (schema) => schema.notRequired(),
          }),
        clientId: Yup.string().when("authMethod", {
          is: "oauth",
          then: (schema) =>
            isEditingOAuthConfig
              ? schema.optional()
              : schema.required("Client ID is required"),
          otherwise: (schema) => schema.notRequired(),
        }),
        clientSecret: Yup.string().when("authMethod", {
          is: "oauth",
          then: (schema) =>
            isEditingOAuthConfig
              ? schema.optional()
              : schema.required("Client secret is required"),
          otherwise: (schema) => schema.notRequired(),
        }),
        scopes: Yup.string().notRequired(),
        headers: Yup.array().when("authMethod", {
          is: "custom-header",
          then: () =>
            Yup.array()
              .of(
                Yup.object({
                  key: Yup.string().required("Header key is required"),
                  value: Yup.string().required("Header value is required"),
                })
              )
              .min(1, "Add at least one authentication header"),
          otherwise: () =>
            Yup.array().of(
              Yup.object({
                key: Yup.string(),
                value: Yup.string(),
              })
            ),
        }),
      }),
    [isEditingOAuthConfig]
  );

  const computedInitialValues = useMemo<OpenAPIAuthFormValues>(() => {
    const baseHeaders =
      hasInitialHeaders && initialHeaders
        ? initialHeaders.map((header) => ({ ...header }))
        : defaultValues.headers.map((header) => ({ ...header }));

    if (isEditingOAuthConfig) {
      const shouldMaskCredentials = Boolean(
        existingOAuthConfig?.has_client_credentials
      );
      return {
        authMethod: "oauth",
        authorizationUrl:
          existingOAuthConfig?.authorization_url ||
          defaultValues.authorizationUrl,
        tokenUrl: existingOAuthConfig?.token_url || defaultValues.tokenUrl,
        clientId: shouldMaskCredentials ? MASKED_CREDENTIAL_VALUE : "",
        clientSecret: shouldMaskCredentials ? MASKED_CREDENTIAL_VALUE : "",
        scopes: existingOAuthConfig?.scopes?.join(", ") || "",
        headers: baseHeaders,
      };
    }

    if (hasInitialHeaders && initialHeaders) {
      return {
        ...defaultValues,
        authMethod: "custom-header",
        headers: baseHeaders,
      };
    }

    if (passthroughOAuthEnabled) {
      return {
        ...defaultValues,
        authMethod: "pt-oauth",
      };
    }

    return {
      ...defaultValues,
      authMethod: defaultMethod,
      headers: baseHeaders,
    };
  }, [
    defaultMethod,
    existingOAuthConfig,
    hasInitialHeaders,
    initialHeaders,
    isEditingOAuthConfig,
    passthroughOAuthEnabled,
  ]);

  const handleSubmit = useCallback(
    async (
      values: OpenAPIAuthFormValues,
      formikHelpers: FormikHelpers<OpenAPIAuthFormValues>
    ) => {
      if (shouldDisableForm) {
        formikHelpers.setSubmitting(false);
        return;
      }
      const sanitizeCredentials = (
        formValues: OpenAPIAuthFormValues
      ): OpenAPIAuthFormValues => {
        if (!isEditingOAuthConfig || formValues.authMethod !== "oauth") {
          return formValues;
        }

        const sanitizeValue = (value: string) =>
          value === MASKED_CREDENTIAL_VALUE ? "" : value;

        return {
          ...formValues,
          clientId: sanitizeValue(formValues.clientId),
          clientSecret: sanitizeValue(formValues.clientSecret),
        };
      };

      try {
        const sanitizedValues = sanitizeCredentials(values);
        await onConnect?.(sanitizedValues);
        onClose();
      } finally {
        formikHelpers.setSubmitting(false);
      }
    },
    [onConnect, onClose, shouldDisableForm]
  );

  const handleSkip = useCallback(() => {
    if (onSkip) {
      onSkip();
    } else {
      onClose();
    }
  }, [onSkip, onClose]);

  return (
    <Modal
      open={isOpen}
      onOpenChange={(open) => {
        if (!open) {
          onClose();
        }
      }}
    >
      <Modal.Content width="sm" height="lg" skipOverlay={skipOverlay}>
        <Modal.Header
          icon={SvgArrowExchange}
          title={title}
          description={description}
          onClose={onClose}
        />

        <Formik
          initialValues={computedInitialValues}
          validationSchema={dynamicValidationSchema}
          validateOnMount
          enableReinitialize
          onSubmit={handleSubmit}
        >
          {({
            values,
            errors,
            touched,
            handleChange,
            setFieldValue,
            setFieldError,
            isSubmitting,
            isValid,
            dirty,
          }) => (
            <Form className="flex flex-col h-full">
              <Modal.Body>
                {oauthConfigError && (
                  <div className="mb-3">
                    <Text
                      as="p"
                      mainUiBody
                      className="text-action-text-danger-05"
                    >
                      {oauthConfigError}
                    </Text>
                  </div>
                )}

                {shouldDisableForm ? (
                  <div className="flex min-h-[220px] items-center justify-center rounded-12 border border-border-01 bg-background-tint-00">
                    <Text as="p" secondaryBody text03>
                      Loading existing configuration...
                    </Text>
                  </div>
                ) : (
                  <>
                    <div className="flex flex-col gap-4 px-2 pt-2">
                      <FormField
                        name="authMethod"
                        state={
                          errors.authMethod && touched.authMethod
                            ? "error"
                            : touched.authMethod
                              ? "success"
                              : "idle"
                        }
                      >
                        <FormField.Label>Authentication Method</FormField.Label>
                        <FormField.Control asChild>
                          <InputSelect
                            value={values.authMethod}
                            onValueChange={(value) =>
                              setFieldValue("authMethod", value)
                            }
                          >
                            <InputSelect.Trigger placeholder="Select method" />
                            <InputSelect.Content>
                              <InputSelect.Item
                                value="oauth"
                                description="Each user authenticates via OAuth with their own credentials."
                              >
                                OAuth
                              </InputSelect.Item>
                              {isOAuthEnabled && (
                                <InputSelect.Item
                                  value="pt-oauth"
                                  description="Forward the user's OAuth access token used to authenticate Onyx."
                                >
                                  OAuth Pass-through
                                </InputSelect.Item>
                              )}
                              <InputSelect.Item
                                value="custom-header"
                                description="Send custom headers with every request."
                              >
                                Custom Authorization Header
                              </InputSelect.Item>
                            </InputSelect.Content>
                          </InputSelect>
                        </FormField.Control>
                        <FormField.Message
                          messages={{
                            error: errors.authMethod,
                          }}
                        />
                      </FormField>
                    </div>

                    <Separator className="py-0" />

                    {values.authMethod === "oauth" && (
                      <section className="flex flex-col gap-4 rounded-12 bg-background-tint-00 border border-border-01 p-4">
                        <FormField
                          name="authorizationUrl"
                          state={
                            errors.authorizationUrl && touched.authorizationUrl
                              ? "error"
                              : touched.authorizationUrl
                                ? "success"
                                : "idle"
                          }
                        >
                          <FormField.Label>Authorization URL</FormField.Label>
                          <FormField.Control asChild>
                            <InputTypeIn
                              name="authorizationUrl"
                              value={values.authorizationUrl}
                              onChange={handleChange}
                              placeholder="https://example.com/oauth/authorize"
                              showClearButton={false}
                            />
                          </FormField.Control>
                          <FormField.Message
                            messages={{
                              error: errors.authorizationUrl,
                            }}
                          />
                        </FormField>

                        <FormField
                          name="tokenUrl"
                          state={
                            errors.tokenUrl && touched.tokenUrl
                              ? "error"
                              : touched.tokenUrl
                                ? "success"
                                : "idle"
                          }
                        >
                          <FormField.Label>Token URL</FormField.Label>
                          <FormField.Control asChild>
                            <InputTypeIn
                              name="tokenUrl"
                              value={values.tokenUrl}
                              onChange={handleChange}
                              placeholder="https://example.com/oauth/access_token"
                              showClearButton={false}
                            />
                          </FormField.Control>
                          <FormField.Message
                            messages={{
                              error: errors.tokenUrl,
                            }}
                          />
                        </FormField>

                        <FormField
                          name="clientId"
                          state={
                            errors.clientId && touched.clientId
                              ? "error"
                              : touched.clientId
                                ? "success"
                                : "idle"
                          }
                        >
                          <FormField.Label>OAuth Client ID</FormField.Label>
                          <FormField.Control asChild>
                            <InputTypeIn
                              name="clientId"
                              value={values.clientId}
                              onChange={handleChange}
                              placeholder=" "
                              showClearButton={false}
                            />
                          </FormField.Control>
                          {isEditingOAuthConfig && (
                            <FormField.Description>
                              Leave blank to keep the current client ID.
                            </FormField.Description>
                          )}
                          <FormField.Message
                            messages={{
                              error: errors.clientId,
                            }}
                          />
                        </FormField>

                        <FormField
                          name="clientSecret"
                          state={
                            errors.clientSecret && touched.clientSecret
                              ? "error"
                              : touched.clientSecret
                                ? "success"
                                : "idle"
                          }
                        >
                          <FormField.Label>OAuth Client Secret</FormField.Label>
                          <FormField.Control asChild>
                            <PasswordInputTypeIn
                              name="clientSecret"
                              value={values.clientSecret}
                              onChange={handleChange}
                              placeholder=" "
                              showClearButton={false}
                            />
                          </FormField.Control>
                          {isEditingOAuthConfig && (
                            <FormField.Description>
                              Leave blank to keep the current client secret.
                            </FormField.Description>
                          )}
                          <FormField.Message
                            messages={{
                              error: errors.clientSecret,
                            }}
                          />
                        </FormField>

                        <FormField
                          name="scopes"
                          state={
                            errors.scopes && touched.scopes
                              ? "error"
                              : touched.scopes
                                ? "success"
                                : "idle"
                          }
                        >
                          <FormField.Label>
                            Scopes{" "}
                            <span className="text-text-03">(Optional)</span>
                          </FormField.Label>
                          <FormField.Control asChild>
                            <InputTypeIn
                              name="scopes"
                              value={values.scopes}
                              onChange={handleChange}
                              placeholder="e.g. repo, user"
                              showClearButton={false}
                            />
                          </FormField.Control>
                          <FormField.Description>
                            Comma-separated list of OAuth scopes to request.
                          </FormField.Description>
                          <FormField.Message
                            messages={{
                              error: errors.scopes,
                            }}
                          />
                        </FormField>

                        <div className="flex flex-col gap-3 rounded-12 bg-background-tint-01 p-3">
                          <Text as="p" text03 secondaryBody>
                            OAuth passthrough is only available if you enable
                            OIDC or OAuth authentication.
                          </Text>
                          <div className="flex flex-col gap-2 w-full">
                            <Text
                              as="p"
                              text03
                              secondaryBody
                              className="flex flex-wrap gap-1"
                            >
                              Use{" "}
                              <span className="font-secondary-action">
                                redirect URI
                              </span>
                              :
                            </Text>
                            <div className="flex items-center gap-2 rounded-08 border border-border-01 bg-background-tint-00 px-3 py-2">
                              <Text
                                as="p"
                                text04
                                className="font-mono text-[12px] leading-[16px] truncate flex-1"
                              >
                                {redirectUri}
                              </Text>
                              <CopyIconButton
                                getCopyText={() => redirectUri}
                                tooltip="Copy redirect URI"
                                prominence="tertiary"
                                size="sm"
                              />
                            </div>
                          </div>
                        </div>
                      </section>
                    )}
                    {values.authMethod === "custom-header" && (
                      <section className="flex flex-col gap-4 rounded-12 bg-background-tint-00 border border-border-01 p-4">
                        <div className="flex flex-col gap-2">
                          <Text as="p" mainUiAction text04>
                            Authentication Headers
                          </Text>
                          <Text as="p" secondaryBody text03>
                            Specify custom headers for all requests sent to this
                            action&apos;s API endpoint.
                          </Text>
                        </div>
                        <FormField
                          name="headers"
                          state={errors.headers ? "error" : "idle"}
                        >
                          <FormField.Control asChild>
                            <KeyValueInput
                              keyTitle="Header"
                              valueTitle="Value"
                              items={values.headers}
                              onChange={(items) =>
                                setFieldValue("headers", items)
                              }
                              addButtonLabel="Add Header"
                              onValidationError={(message) =>
                                setFieldError("headers", message || undefined)
                              }
                              layout="equal"
                            />
                          </FormField.Control>
                          <FormField.Message
                            messages={{
                              error:
                                typeof errors.headers === "string"
                                  ? errors.headers
                                  : undefined,
                            }}
                          />
                        </FormField>
                      </section>
                    )}
                    {values.authMethod === "pt-oauth" && (
                      <Message
                        text="Use pass-through for services with shared identity provider."
                        description="Onyx will forward the user's OAuth access token directly to the server as an Authorization header. Make sure the server supports authentication with the same provider."
                        default
                        medium
                        static
                        className="w-full"
                        close={false}
                      />
                    )}
                  </>
                )}
              </Modal.Body>

              <Modal.Footer>
                <Button
                  prominence="tertiary"
                  type="button"
                  onClick={handleSkip}
                >
                  Cancel
                </Button>
                <Button
                  disabled={
                    !isValid || isSubmitting || shouldDisableForm || !dirty
                  }
                  type="submit"
                >
                  {isSubmitting ? "Connecting..." : "Connect"}
                </Button>
              </Modal.Footer>
            </Form>
          )}
        </Formik>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/actions/skeleton/ActionCardSkeleton.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";

interface ActionCardSkeletonProps {
  className?: string;
}

const ActionCardSkeleton: React.FC<ActionCardSkeletonProps> = ({
  className,
}) => {
  return (
    <div
      className={cn(
        "w-full border border-border-01 rounded-16 bg-background-tint-00",
        className
      )}
      role="status"
      aria-label="Loading action card"
    >
      <div className="flex flex-col w-full">
        {/* Header Section */}
        <div className="flex items-start justify-between gap-2 p-3 w-full">
          {/* Left: Icon + Title / Description */}
          <div className="flex gap-2 items-start flex-1 min-w-0 mr-2">
            {/* Icon */}
            <div className="flex items-center px-0 py-0.5 shrink-0">
              <div className="h-7 w-7 rounded-12 bg-background-tint-02 animate-pulse" />
            </div>

            {/* Title & Description */}
            <div className="flex flex-col items-start flex-1 min-w-0 gap-2">
              <div className="h-4 w-1/3 rounded bg-background-tint-02 animate-pulse" />
              <div className="h-3 w-2/3 rounded bg-background-tint-02 animate-pulse" />
            </div>
          </div>

          {/* Right: Actions / View tools button */}
          <div className="flex flex-col gap-2 items-end shrink-0">
            {/* Top row: icon buttons / status */}
            <div className="flex items-center gap-2">
              <div className="h-8 w-8 rounded-full bg-background-tint-02 animate-pulse" />
              <div className="h-8 w-8 rounded-full bg-background-tint-02 animate-pulse" />
            </div>

            {/* View tools button placeholder */}
            <div className="h-8 w-32 rounded-full bg-background-tint-02 animate-pulse" />
          </div>
        </div>
      </div>
    </div>
  );
};

ActionCardSkeleton.displayName = "ActionCardSkeleton";

export default ActionCardSkeleton;


================================================
FILE: web/src/sections/actions/skeleton/ToolItemSkeleton.tsx
================================================
"use client";

import React from "react";
import { cn } from "@/lib/utils";

interface ToolItemSkeletonProps {
  className?: string;
}

const ToolItemSkeleton: React.FC<ToolItemSkeletonProps> = ({ className }) => {
  return (
    <div
      className={cn(
        "flex items-start justify-between w-full p-2 rounded-08 border border-border-01 bg-background-tint-00",
        className
      )}
    >
      {/* Left Section: Icon and Content */}
      <div className="flex gap-1 items-start flex-1 min-w-0 pr-2">
        {/* Icon Container Skeleton */}
        <div className="flex items-center justify-center shrink-0">
          <div className="h-5 w-5 rounded bg-background-tint-02 animate-pulse" />
        </div>

        {/* Content Container */}
        <div className="flex flex-col items-start flex-1 min-w-0 gap-1">
          {/* Tool Name Skeleton */}
          <div className="flex items-center w-full min-h-[20px] px-0.5">
            <div className="h-4 w-1/3 rounded bg-background-tint-02 animate-pulse" />
          </div>

          {/* Description Skeleton */}
          <div className="px-0.5 w-full space-y-1">
            <div className="h-3 w-full rounded bg-background-tint-02 animate-pulse" />
            <div className="h-3 w-2/3 rounded bg-background-tint-02 animate-pulse" />
          </div>
        </div>
      </div>

      {/* Right Section: Switch Skeleton */}
      <div className="flex gap-2 items-start justify-end shrink-0">
        <div className="flex items-center justify-center gap-1 h-5 px-0.5 py-0.5">
          <div className="h-5 w-9 rounded-full bg-background-tint-02 animate-pulse" />
        </div>
      </div>
    </div>
  );
};

ToolItemSkeleton.displayName = "ToolItemSkeleton";
export default ToolItemSkeleton;


================================================
FILE: web/src/sections/admin/AdminListHeader.tsx
================================================
"use client";

import { Button, Card } from "@opal/components";
import { Content } from "@opal/layouts";
import { SvgPlusCircle } from "@opal/icons";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";

interface AdminListHeaderProps {
  /** Whether items exist — controls search bar vs empty-state card. */
  hasItems: boolean;
  /** Current search query. */
  searchQuery: string;
  /** Called when the search query changes. */
  onSearchQueryChange: (query: string) => void;
  /** Search input placeholder. */
  placeholder?: string;
  /** Text shown in the empty-state card when no items exist. */
  emptyStateText: string;
  /** Called when the action button is clicked. */
  onAction: () => void;
  /** Label for the action button. */
  actionLabel: string;
}

/**
 * AdminListHeader — the top bar for simple admin list pages.
 *
 * Handles two states:
 *
 * 1. **Items exist** (`hasItems = true`): renders a search input on the left
 *    with a primary action button on the right.
 * 2. **No items** (`hasItems = false`): renders a bordered card with
 *    descriptive text on the left and the same action button on the right.
 *
 * The action button always renders with a `SvgPlusCircle` right icon.
 *
 * Used on admin pages that have a flat list of items with no advanced
 * filtering — e.g. Service Accounts, Groups, OpenAPI Actions, MCP Servers.
 *
 * @example
 * ```tsx
 * <AdminListHeader
 *   hasItems={items.length > 0}
 *   searchQuery={search}
 *   onSearchQueryChange={setSearch}
 *   placeholder="Search service accounts..."
 *   emptyStateText="Create service account API keys with user-level access."
 *   onAction={handleCreate}
 *   actionLabel="New Service Account"
 * />
 * ```
 */
export default function AdminListHeader({
  hasItems,
  searchQuery,
  onSearchQueryChange,
  placeholder = "Search...",
  emptyStateText,
  onAction,
  actionLabel,
}: AdminListHeaderProps) {
  const actionButton = (
    <Button rightIcon={SvgPlusCircle} onClick={onAction}>
      {actionLabel}
    </Button>
  );

  if (!hasItems) {
    return (
      <Card rounding="lg" border="solid">
        <div className="flex flex-row items-center justify-between gap-3">
          <Content
            title={emptyStateText}
            sizePreset="main-ui"
            variant="body"
            prominence="muted"
            widthVariant="fit"
          />
          {actionButton}
        </div>
      </Card>
    );
  }

  return (
    <div className="flex flex-row gap-3 items-center px-2 pb-3">
      <InputTypeIn
        variant="internal"
        leftSearchIcon
        placeholder={placeholder}
        value={searchQuery}
        onChange={(e) => onSearchQueryChange(e.target.value)}
        showClearButton={false}
      />
      {actionButton}
    </div>
  );
}


================================================
FILE: web/src/sections/admin/ProviderCard.tsx
================================================
"use client";

import type { IconFunctionComponent } from "@opal/types";
import { Button, SelectCard } from "@opal/components";
import { Content, CardHeaderLayout } from "@opal/layouts";
import {
  SvgArrowExchange,
  SvgArrowRightCircle,
  SvgCheckSquare,
  SvgSettings,
  SvgUnplug,
} from "@opal/icons";

/**
 * ProviderCard — a stateful card for selecting / connecting / disconnecting
 * an external service provider (LLM, search engine, voice model, etc.).
 *
 * Built on opal `SelectCard` + `CardHeaderLayout`. Maps a three-state
 * status model to the `SelectCard` state system:
 *
 * | Status         | SelectCard state | Right action           |
 * |----------------|------------------|------------------------|
 * | `disconnected` | `empty`          | "Connect" button       |
 * | `connected`    | `filled`         | "Set as Default" button|
 * | `selected`     | `selected`       | "Current Default" label|
 *
 * Bottom-right actions (Disconnect, Edit) are always visible when the
 * provider is connected or selected.
 *
 * Used on admin configuration pages: Web Search, Image Generation,
 * Voice, and LLM Configuration.
 *
 * @example
 * ```tsx
 * <ProviderCard
 *   icon={SvgGlobe}
 *   title="Exa"
 *   description="Exa.ai"
 *   status="connected"
 *   onConnect={() => openModal()}
 *   onSelect={() => setDefault(id)}
 *   onEdit={() => openEditModal()}
 *   onDisconnect={() => confirmDisconnect(id)}
 * />
 * ```
 */

type ProviderStatus = "disconnected" | "connected" | "selected";

interface ProviderCardProps {
  icon: IconFunctionComponent;
  title: string;
  description: string;
  status: ProviderStatus;
  onConnect?: () => void;
  onSelect?: () => void;
  onDeselect?: () => void;
  onEdit?: () => void;
  onDisconnect?: () => void;
  selectedLabel?: string;
  "aria-label"?: string;
}

const STATUS_TO_STATE = {
  disconnected: "empty",
  connected: "filled",
  selected: "selected",
} as const;

export default function ProviderCard({
  icon,
  title,
  description,
  status,
  onConnect,
  onSelect,
  onDeselect,
  onEdit,
  onDisconnect,
  selectedLabel = "Current Default",
  "aria-label": ariaLabel,
}: ProviderCardProps) {
  const isDisconnected = status === "disconnected";
  const isConnected = status === "connected";
  const isSelected = status === "selected";

  return (
    <SelectCard
      state={STATUS_TO_STATE[status]}
      padding="sm"
      rounding="lg"
      aria-label={ariaLabel}
      onClick={isDisconnected && onConnect ? onConnect : undefined}
    >
      <CardHeaderLayout
        sizePreset="main-ui"
        variant="section"
        icon={icon}
        title={title}
        description={description}
        rightChildren={
          isDisconnected && onConnect ? (
            <Button
              prominence="tertiary"
              rightIcon={SvgArrowExchange}
              onClick={(e) => {
                e.stopPropagation();
                onConnect();
              }}
            >
              Connect
            </Button>
          ) : isConnected && onSelect ? (
            <Button
              prominence="tertiary"
              rightIcon={SvgArrowRightCircle}
              onClick={(e) => {
                e.stopPropagation();
                onSelect();
              }}
            >
              Set as Default
            </Button>
          ) : isSelected ? (
            <div className="p-2">
              <Content
                title={selectedLabel}
                sizePreset="main-ui"
                variant="section"
                icon={SvgCheckSquare}
              />
            </div>
          ) : undefined
        }
        bottomRightChildren={
          !isDisconnected ? (
            <div className="flex flex-row px-1 pb-1">
              {onDisconnect && (
                <Button
                  icon={SvgUnplug}
                  tooltip="Disconnect"
                  aria-label={`Disconnect ${title}`}
                  prominence="tertiary"
                  onClick={(e) => {
                    e.stopPropagation();
                    onDisconnect();
                  }}
                  size="md"
                />
              )}
              {onEdit && (
                <Button
                  icon={SvgSettings}
                  tooltip="Edit"
                  aria-label={`Edit ${title}`}
                  prominence="tertiary"
                  onClick={(e) => {
                    e.stopPropagation();
                    onEdit();
                  }}
                  size="md"
                />
              )}
            </div>
          ) : undefined
        }
      />
    </SelectCard>
  );
}

export type { ProviderCardProps, ProviderStatus };


================================================
FILE: web/src/sections/cards/AgentCard.tsx
================================================
"use client";

import { useMemo, useCallback } from "react";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import { Button } from "@opal/components";
import { useAppRouter } from "@/hooks/appNavigation";
import IconButton from "@/refresh-components/buttons/IconButton";
import { usePinnedAgents, useAgent } from "@/hooks/useAgents";
import { cn, noProp } from "@/lib/utils";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import {
  checkUserOwnsAgent,
  updateAgentSharedStatus,
  updateAgentFeaturedStatus,
} from "@/lib/agents";
import { useUser } from "@/providers/UserProvider";
import {
  SvgActions,
  SvgBarChart,
  SvgBubbleText,
  SvgEdit,
  SvgPin,
  SvgPinned,
  SvgShare,
  SvgUser,
} from "@opal/icons";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import ShareAgentModal from "@/sections/modals/ShareAgentModal";
import AgentViewerModal from "@/sections/modals/AgentViewerModal";
import { toast } from "@/hooks/useToast";
import { CardItemLayout } from "@/layouts/general-layouts";
import { Content } from "@opal/layouts";
import { Interactive } from "@opal/core";
import { Card } from "@/refresh-components/cards";

export interface AgentCardProps {
  agent: MinimalPersonaSnapshot;
}

export default function AgentCard({ agent }: AgentCardProps) {
  const route = useAppRouter();
  const router = useRouter();
  const { pinnedAgents, togglePinnedAgent } = usePinnedAgents();
  const pinned = useMemo(
    () => pinnedAgents.some((pinnedAgent) => pinnedAgent.id === agent.id),
    [agent.id, pinnedAgents]
  );
  const { user, isAdmin, isCurator } = useUser();
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
  const canUpdateFeaturedStatus = isAdmin || isCurator;
  const isOwnedByUser = checkUserOwnsAgent(user, agent);
  const shareAgentModal = useCreateModal();
  const agentViewerModal = useCreateModal();
  const { agent: fullAgent, refresh: refreshAgent } = useAgent(agent.id);

  // Start chat and auto-pin unpinned agents to the sidebar
  const handleStartChat = useCallback(() => {
    if (!pinned) {
      togglePinnedAgent(agent, true);
    }
    route({ agentId: agent.id });
  }, [pinned, togglePinnedAgent, agent, route]);

  const handleShare = useCallback(
    async (
      userIds: string[],
      groupIds: number[],
      isPublic: boolean,
      isFeatured: boolean,
      labelIds: number[]
    ) => {
      const shareError = await updateAgentSharedStatus(
        agent.id,
        userIds,
        groupIds,
        isPublic,
        isPaidEnterpriseFeaturesEnabled,
        labelIds
      );

      if (shareError) {
        toast.error(`Failed to share agent: ${shareError}`);
        return;
      }

      if (canUpdateFeaturedStatus) {
        const featuredError = await updateAgentFeaturedStatus(
          agent.id,
          isFeatured
        );
        if (featuredError) {
          toast.error(`Failed to update featured status: ${featuredError}`);
          refreshAgent();
          return;
        }
      }

      refreshAgent();
      shareAgentModal.toggle(false);
    },
    [
      agent.id,
      canUpdateFeaturedStatus,
      isPaidEnterpriseFeaturesEnabled,
      refreshAgent,
    ]
  );

  return (
    <>
      <shareAgentModal.Provider>
        <ShareAgentModal
          agentId={agent.id}
          userIds={fullAgent?.users?.map((u) => u.id) ?? []}
          groupIds={fullAgent?.groups ?? []}
          isPublic={fullAgent?.is_public ?? false}
          isFeatured={fullAgent?.is_featured ?? false}
          labelIds={fullAgent?.labels?.map((l) => l.id) ?? []}
          onShare={handleShare}
        />
      </shareAgentModal.Provider>

      <agentViewerModal.Provider>
        {fullAgent && <AgentViewerModal agent={fullAgent} />}
      </agentViewerModal.Provider>

      <Interactive.Simple
        onClick={() => agentViewerModal.toggle(true)}
        group="group/AgentCard"
      >
        <Card
          padding={0}
          gap={0}
          height="full"
          className="radial-00 hover:shadow-00"
        >
          <div className="flex self-stretch h-[6rem]">
            <CardItemLayout
              icon={(props) => <AgentAvatar agent={agent} {...props} />}
              title={agent.name}
              description={agent.description}
              rightChildren={
                <>
                  {isOwnedByUser && isPaidEnterpriseFeaturesEnabled && (
                    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
                    <IconButton
                      icon={SvgBarChart}
                      tertiary
                      onClick={noProp(() =>
                        router.push(`/ee/agents/stats/${agent.id}` as Route)
                      )}
                      tooltip="View Agent Stats"
                      className="hidden group-hover/AgentCard:flex"
                    />
                  )}
                  {isOwnedByUser && (
                    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
                    <IconButton
                      icon={SvgEdit}
                      tertiary
                      onClick={noProp(() =>
                        router.push(`/app/agents/edit/${agent.id}` as Route)
                      )}
                      tooltip="Edit Agent"
                      className="hidden group-hover/AgentCard:flex"
                    />
                  )}
                  {isOwnedByUser && (
                    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
                    <IconButton
                      icon={SvgShare}
                      tertiary
                      onClick={noProp(() => shareAgentModal.toggle(true))}
                      tooltip="Share Agent"
                      className="hidden group-hover/AgentCard:flex"
                    />
                  )}
                  {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
                  <IconButton
                    icon={pinned ? SvgPinned : SvgPin}
                    tertiary
                    onClick={noProp(() => togglePinnedAgent(agent, !pinned))}
                    tooltip={pinned ? "Unpin from Sidebar" : "Pin to Sidebar"}
                    className={cn(
                      !pinned && "hidden group-hover/AgentCard:flex"
                    )}
                  />
                </>
              }
            />
          </div>

          {/* Footer section - bg-background-tint-01 */}
          <div className="bg-background-tint-01 p-1 flex flex-row items-end justify-between w-full">
            {/* Left side - creator and actions */}
            <div className="flex flex-col gap-1 py-1 px-2">
              <Content
                icon={SvgUser}
                title={agent.owner?.email || "Onyx"}
                sizePreset="secondary"
                variant="body"
                prominence="muted"
              />
              <Content
                icon={SvgActions}
                title={
                  agent.tools.length > 0
                    ? `${agent.tools.length} Action${
                        agent.tools.length > 1 ? "s" : ""
                      }`
                    : "No Actions"
                }
                sizePreset="secondary"
                variant="body"
                prominence="muted"
              />
            </div>

            {/* Right side - Start Chat button */}
            <div className="p-0.5">
              <Button
                prominence="tertiary"
                rightIcon={SvgBubbleText}
                onClick={noProp(handleStartChat)}
              >
                Start Chat
              </Button>
            </div>
          </div>
        </Card>
      </Interactive.Simple>
    </>
  );
}


================================================
FILE: web/src/sections/cards/DocumentSetCard.tsx
================================================
"use client";

import { DocumentSetSummary } from "@/lib/types";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { SvgFiles } from "@opal/icons";
import { Interactive } from "@opal/core";
import { AttachmentItemLayout } from "@/layouts/general-layouts";
import Spacer from "@/refresh-components/Spacer";

export interface DocumentSetCardProps {
  documentSet: DocumentSetSummary;
  isSelected?: boolean;
  onSelectToggle?: (isSelected: boolean) => void;
  disabled?: boolean;
  disabledTooltip?: string;
}

export default function DocumentSetCard({
  documentSet,
  isSelected,
  onSelectToggle,
  disabled,
  disabledTooltip,
}: DocumentSetCardProps) {
  return (
    <SimpleTooltip
      tooltip={disabled && disabledTooltip ? disabledTooltip : undefined}
      disabled={!disabled || !disabledTooltip}
    >
      <div className="max-w-[12rem]">
        <Interactive.Simple
          onClick={
            disabled || isSelected === undefined
              ? undefined
              : () => onSelectToggle?.(!isSelected)
          }
        >
          <Interactive.Container
            data-testid={`document-set-card-${documentSet.id}`}
            border
            heightVariant="fit"
          >
            <AttachmentItemLayout
              icon={SvgFiles}
              title={documentSet.name}
              description={documentSet.description}
              rightChildren={
                isSelected === undefined ? undefined : (
                  <div onClick={(e) => e.stopPropagation()}>
                    <Checkbox
                      checked={isSelected}
                      disabled={disabled}
                      onCheckedChange={
                        disabled
                          ? undefined
                          : () => onSelectToggle?.(!isSelected)
                      }
                    />
                  </div>
                )
              }
            />
            <Spacer horizontal rem={0.5} />
          </Interactive.Container>
        </Interactive.Simple>
      </div>
    </SimpleTooltip>
  );
}


================================================
FILE: web/src/sections/cards/FileCard.tsx
================================================
"use client";

import { useMemo, useState } from "react";
import type { ProjectFile } from "@/app/app/projects/projectsService";
import { UserFileStatus } from "@/app/app/projects/projectsService";
import { cn, isImageFile } from "@/lib/utils";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { SvgFileText, SvgX } from "@opal/icons";
import { Interactive, Hoverable } from "@opal/core";
import { AttachmentItemLayout } from "@/layouts/general-layouts";
import Spacer from "@/refresh-components/Spacer";

interface RemovableProps {
  onRemove?: () => void;
  children: React.ReactNode;
}

function Removable({ onRemove, children }: RemovableProps) {
  if (!onRemove) {
    return <>{children}</>;
  }

  return (
    <Hoverable.Root group="fileCard" widthVariant="fit">
      <div className="relative">
        <div
          className={cn(
            "absolute -left-2 -top-2 z-10",
            "pointer-events-none focus-within:pointer-events-auto"
          )}
        >
          <Hoverable.Item group="fileCard" variant="opacity-on-hover">
            <button
              type="button"
              onClick={(e) => {
                e.stopPropagation();
                onRemove();
              }}
              title="Remove"
              aria-label="Remove"
              className={cn(
                "h-4 w-4",
                "flex items-center justify-center",
                "rounded-04 border border-border text-[11px]",
                "bg-background-neutral-inverted-01 text-text-inverted-05 shadow-sm",
                "pointer-events-auto",
                "hover:opacity-90"
              )}
            >
              <SvgX className="h-3 w-3 stroke-text-inverted-03" />
            </button>
          </Hoverable.Item>
        </div>
        {children}
      </div>
    </Hoverable.Root>
  );
}

interface ImageFileCardProps {
  file: ProjectFile;
  imageUrl: string | null;
  removeFile?: (fileId: string) => void;
  onFileClick?: (file: ProjectFile) => void;
  isProcessing?: boolean;
  compact?: boolean;
}
function ImageFileCard({
  file,
  imageUrl,
  removeFile,
  onFileClick,
  isProcessing = false,
  compact = false,
}: ImageFileCardProps) {
  const sizeClass = compact ? "h-11 w-11" : "h-20 w-20";
  const loaderSize = compact ? "h-5 w-5" : "h-8 w-8";
  const iconSize = compact ? "h-5 w-5" : "h-8 w-8";
  const [imgError, setImgError] = useState(false);

  const doneUploading = String(file.status) !== UserFileStatus.UPLOADING;

  return (
    <Removable
      onRemove={
        removeFile && doneUploading ? () => removeFile(file.id) : undefined
      }
    >
      <div
        className={cn(
          sizeClass,
          "rounded-08 border border-border-01",
          isProcessing && "bg-background-neutral-02",
          onFileClick && !isProcessing && "cursor-pointer hover:opacity-90"
        )}
        onClick={() => {
          if (onFileClick && !isProcessing) {
            onFileClick(file);
          }
        }}
      >
        {!doneUploading || !imageUrl ? (
          <div className="h-full w-full flex items-center justify-center">
            <SimpleLoader className={loaderSize} />
          </div>
        ) : imgError ? (
          <div className="h-full w-full flex items-center justify-center">
            <SvgFileText className={iconSize} />
          </div>
        ) : (
          <img
            src={imageUrl}
            alt={file.name}
            className="h-full w-full object-cover rounded-08"
            onError={() => setImgError(true)}
          />
        )}
      </div>
    </Removable>
  );
}

export interface FileCardProps {
  file: ProjectFile;
  removeFile?: (fileId: string) => void;
  hideProcessingState?: boolean;
  onFileClick?: (file: ProjectFile) => void;
  compactImages?: boolean;
}
export function FileCard({
  file,
  removeFile,
  hideProcessingState = false,
  onFileClick,
  compactImages = false,
}: FileCardProps) {
  const typeLabel = useMemo(() => {
    const name = String(file.name || "");
    const lastDotIndex = name.lastIndexOf(".");
    if (lastDotIndex <= 0 || lastDotIndex === name.length - 1) {
      return "";
    }
    return name.slice(lastDotIndex + 1).toUpperCase();
  }, [file.name]);

  const isImage = useMemo(() => {
    return isImageFile(file.name);
  }, [file.name]);

  const imageUrl = useMemo(() => {
    if (isImage && file.file_id) {
      return `/api/chat/file/${file.file_id}`;
    }
    return null;
  }, [isImage, file.file_id]);

  const isActuallyProcessing =
    String(file.status) === UserFileStatus.UPLOADING ||
    String(file.status) === UserFileStatus.PROCESSING;

  // When hideProcessingState is true, we treat processing files as completed for display purposes
  const isProcessing = hideProcessingState ? false : isActuallyProcessing;

  const doneUploading = String(file.status) !== UserFileStatus.UPLOADING;

  // For images, always show the larger preview layout (even while processing)
  if (isImage) {
    return (
      <ImageFileCard
        file={file}
        imageUrl={imageUrl}
        removeFile={removeFile}
        onFileClick={onFileClick}
        isProcessing={isProcessing}
        compact={compactImages}
      />
    );
  }

  return (
    <Removable
      onRemove={
        removeFile && doneUploading ? () => removeFile(file.id) : undefined
      }
    >
      <div className="min-w-0 max-w-[12rem]">
        <Interactive.Container border heightVariant="fit">
          <div className="[&_.opal-content-md-title-row]:min-w-0 [&_.opal-content-md-title]:break-all">
            <AttachmentItemLayout
              icon={isProcessing ? SimpleLoader : SvgFileText}
              title={file.name}
              description={
                isProcessing
                  ? file.status === UserFileStatus.UPLOADING
                    ? "Uploading..."
                    : "Processing..."
                  : typeLabel
              }
            />
          </div>
          <Spacer horizontal rem={0.5} />
        </Interactive.Container>
      </div>
    </Removable>
  );
}

// Skeleton loading component for file cards
export function FileCardSkeleton() {
  return (
    <div className="min-w-[120px] max-w-[240px] h-11 rounded-08 bg-background-tint-02 animate-pulse" />
  );
}


================================================
FILE: web/src/sections/cards/README.md
================================================
# Cards

This directory contains feature-specific card components.

Cards are self-contained UI components that display information about a specific entity (e.g., an agent, a document set, a connector) in a visually distinct, bounded container. They typically include:

- Entity identification (name, avatar, icon)
- Summary information
- Quick actions (buttons, menus)

## Guidelines

- Each card should be focused on a single entity type
- Cards should be reusable across different pages/contexts
- Keep card-specific logic within the card component
- Use shared components from `@/refresh-components` for common UI elements


================================================
FILE: web/src/sections/chat/ChatScrollContainer.tsx
================================================
"use client";

import React, {
  ForwardedRef,
  useCallback,
  useEffect,
  useImperativeHandle,
  useRef,
  useState,
} from "react";
import { ScrollContainerProvider } from "@/components/chat/ScrollContainerContext";
import { cn } from "@/lib/utils";

// Size constants
const DEFAULT_ANCHOR_OFFSET_PX = 16; // 1rem
const DEFAULT_FADE_THRESHOLD_PX = 80; // 5rem
const DEFAULT_BUTTON_THRESHOLD_PX = 32; // 2rem

// Fade configuration
const TOP_FADE_HEIGHT = "1rem";
const BOTTOM_FADE_HEIGHT = "1rem";

export interface ScrollState {
  isAtBottom: boolean;
  hasContentAbove: boolean;
  hasContentBelow: boolean;
}

export interface ChatScrollContainerHandle {
  scrollToBottom: (behavior?: ScrollBehavior) => void;
}

export interface ChatScrollContainerProps {
  children: React.ReactNode;

  /**
   * CSS selector for the anchor element (e.g., "#message-123")
   * Used to scroll to a specific message position
   */
  anchorSelector?: string;

  /** Enable auto-scroll behavior (follow new content) */
  autoScroll?: boolean;

  /** Whether content is currently streaming (affects scroll button visibility) */
  isStreaming?: boolean;

  /** Callback when scroll button visibility should change */
  onScrollButtonVisibilityChange?: (visible: boolean) => void;

  /** Session ID - resets scroll state when changed */
  sessionId?: string;

  /** Hide the scrollbar (scroll still works, just invisible) */
  hideScrollbar?: boolean;
}

// Build a CSS mask that fades content opacity at top/bottom edges
function buildContentMask(): string {
  // Mask uses black = visible, transparent = hidden
  // Top: fades from transparent to visible over 1rem
  // Bottom: fades from visible to transparent over 1rem
  return `linear-gradient(to bottom, transparent 0%, transparent 0rem, black ${TOP_FADE_HEIGHT}, black calc(100% - ${BOTTOM_FADE_HEIGHT}), transparent 100%)`;
}

const ChatScrollContainer = React.memo(
  React.forwardRef(
    (
      {
        children,
        anchorSelector,
        autoScroll = true,
        isStreaming = false,
        onScrollButtonVisibilityChange,
        sessionId,
        hideScrollbar = false,
      }: ChatScrollContainerProps,
      ref: ForwardedRef<ChatScrollContainerHandle>
    ) => {
      const anchorOffsetPx = DEFAULT_ANCHOR_OFFSET_PX;
      const fadeThresholdPx = DEFAULT_FADE_THRESHOLD_PX;
      const buttonThresholdPx = DEFAULT_BUTTON_THRESHOLD_PX;
      const scrollContainerRef = useRef<HTMLDivElement>(null);
      const contentWrapperRef = useRef<HTMLDivElement>(null);
      const spacerHeightRef = useRef(0);
      const endDivRef = useRef<HTMLDivElement>(null);
      const scrolledForSessionRef = useRef<string | null>(null);
      const prevAnchorSelectorRef = useRef<string | null>(null);

      const [hasContentAbove, setHasContentAbove] = useState(false);
      const [hasContentBelow, setHasContentBelow] = useState(false);
      const [isAtBottom, setIsAtBottom] = useState(true);
      const isAtBottomRef = useRef(true); // Ref for use in callbacks
      const isAutoScrollingRef = useRef(false); // Prevent handleScroll from interfering during auto-scroll
      const prevScrollTopRef = useRef(0); // Track scroll position to detect scroll direction
      const [isScrollReady, setIsScrollReady] = useState(false);

      // Use refs for values that change during streaming to prevent effect re-runs
      const onScrollButtonVisibilityChangeRef = useRef(
        onScrollButtonVisibilityChange
      );
      onScrollButtonVisibilityChangeRef.current =
        onScrollButtonVisibilityChange;
      const autoScrollRef = useRef(autoScroll);
      autoScrollRef.current = autoScroll;
      const isStreamingRef = useRef(isStreaming);
      isStreamingRef.current = isStreaming;

      // Get current scroll state
      const getScrollState = useCallback((): ScrollState => {
        const container = scrollContainerRef.current;
        if (!container || !endDivRef.current) {
          return {
            isAtBottom: true,
            hasContentAbove: false,
            hasContentBelow: false,
          };
        }

        // Exclude the dynamic spacer — it's cosmetic (push-up effect) and
        // shouldn't make the system think there's real content below the viewport.
        const contentEnd =
          endDivRef.current.offsetTop - spacerHeightRef.current;
        const viewportBottom = container.scrollTop + container.clientHeight;
        const contentBelowViewport = contentEnd - viewportBottom;

        return {
          isAtBottom: contentBelowViewport <= buttonThresholdPx,
          hasContentAbove: container.scrollTop > fadeThresholdPx,
          hasContentBelow: contentBelowViewport > fadeThresholdPx,
        };
      }, [buttonThresholdPx, fadeThresholdPx]);

      // Update scroll state and notify parent about button visibility
      const updateScrollState = useCallback(() => {
        const state = getScrollState();
        setIsAtBottom(state.isAtBottom);
        isAtBottomRef.current = state.isAtBottom; // Keep ref in sync
        setHasContentAbove(state.hasContentAbove);
        setHasContentBelow(state.hasContentBelow);

        // Show button when user is not at bottom (e.g., scrolled up)
        onScrollButtonVisibilityChangeRef.current?.(!state.isAtBottom);
      }, [getScrollState]);

      // Scroll to bottom of content
      const scrollToBottom = useCallback(
        (behavior: ScrollBehavior = "smooth") => {
          const container = scrollContainerRef.current;
          if (!container || !endDivRef.current) return;

          // Mark as auto-scrolling to prevent handleScroll interference
          isAutoScrollingRef.current = true;

          // Use scrollTo instead of scrollIntoView for better cross-browser support
          const targetScrollTop =
            container.scrollHeight - container.clientHeight;
          container.scrollTo({ top: targetScrollTop, behavior });

          // Update tracking refs
          prevScrollTopRef.current = targetScrollTop;
          isAtBottomRef.current = true;

          // For smooth scrolling, keep isAutoScrollingRef true longer
          if (behavior === "smooth") {
            // Clear after animation likely completes (Safari smooth scroll is ~500ms)
            setTimeout(() => {
              isAutoScrollingRef.current = false;
              if (container) {
                prevScrollTopRef.current = container.scrollTop;
              }
              // Refresh scroll state so the scroll-to-bottom button hides
              updateScrollState();
            }, 600);
          } else {
            isAutoScrollingRef.current = false;
          }
        },
        [updateScrollState]
      );

      // Expose scrollToBottom via ref
      useImperativeHandle(ref, () => ({ scrollToBottom }), [scrollToBottom]);

      // Re-evaluate button visibility when at-bottom state changes
      useEffect(() => {
        onScrollButtonVisibilityChangeRef.current?.(!isAtBottom);
      }, [isAtBottom]);

      // Handle scroll events (user scrolls)
      const handleScroll = useCallback(() => {
        const container = scrollContainerRef.current;
        if (!container) return;

        // Skip if this scroll was triggered by auto-scroll
        if (isAutoScrollingRef.current) return;

        const currentScrollTop = container.scrollTop;
        const scrolledUp = currentScrollTop < prevScrollTopRef.current - 5; // 5px threshold to ignore micro-movements
        prevScrollTopRef.current = currentScrollTop;

        // Only update isAtBottomRef when user explicitly scrolls UP
        // This prevents content growth or programmatic scrolls from disabling auto-scroll
        if (scrolledUp) {
          updateScrollState();
        } else {
          // Still update fade overlays, but preserve isAtBottomRef
          const state = getScrollState();
          setHasContentAbove(state.hasContentAbove);
          setHasContentBelow(state.hasContentBelow);
          // Update button visibility based on actual position
          onScrollButtonVisibilityChangeRef.current?.(!state.isAtBottom);
        }
      }, [updateScrollState, getScrollState]);

      // Watch for content changes (MutationObserver + ResizeObserver)
      useEffect(() => {
        const container = scrollContainerRef.current;
        if (!container) return;

        let rafId: number | null = null;

        const onContentChange = () => {
          if (rafId) return;
          rafId = requestAnimationFrame(() => {
            rafId = null;

            // Capture whether we were at bottom BEFORE content changed
            const wasAtBottom = isAtBottomRef.current;

            // Auto-scroll: follow content if we were at bottom.
            // Skip instant auto-scroll during DynamicBottomSpacer's smooth
            // scroll to avoid competing scroll commands.
            if (
              autoScrollRef.current &&
              wasAtBottom &&
              container.dataset.smoothScrollActive !== "true"
            ) {
              // scrollToBottom handles isAutoScrollingRef and ref updates
              scrollToBottom("instant");
            }

            updateScrollState();
          });
        };

        // MutationObserver for content changes
        const mutationObserver = new MutationObserver(onContentChange);
        mutationObserver.observe(container, {
          childList: true,
          subtree: true,
          characterData: true,
        });

        // ResizeObserver for container size changes
        const resizeObserver = new ResizeObserver(onContentChange);
        resizeObserver.observe(container);

        return () => {
          mutationObserver.disconnect();
          resizeObserver.disconnect();
          if (rafId) cancelAnimationFrame(rafId);
        };
      }, [updateScrollState, scrollToBottom]);

      // Handle session changes and anchor changes
      useEffect(() => {
        const container = scrollContainerRef.current;
        if (!container) return;

        const isNewSession =
          scrolledForSessionRef.current !== null &&
          scrolledForSessionRef.current !== sessionId;
        const isNewAnchor = prevAnchorSelectorRef.current !== anchorSelector;

        // Reset on session change
        if (isNewSession) {
          scrolledForSessionRef.current = null;
          setIsScrollReady(false);
          prevScrollTopRef.current = 0;
          isAtBottomRef.current = true;
        }

        const shouldScroll =
          (scrolledForSessionRef.current !== sessionId || isNewAnchor) &&
          anchorSelector;

        if (!shouldScroll) {
          prevAnchorSelectorRef.current = anchorSelector ?? null;
          return;
        }

        const anchorElement = container.querySelector(
          anchorSelector!
        ) as HTMLElement;
        if (!anchorElement || !endDivRef.current) {
          setIsScrollReady(true);
          scrolledForSessionRef.current = sessionId ?? null;
          prevAnchorSelectorRef.current = anchorSelector ?? null;
          return;
        }

        // Determine scroll behavior
        // New session with existing content = instant, new anchor = smooth
        const isLoadingExistingContent =
          isNewSession || scrolledForSessionRef.current === null;
        const behavior: ScrollBehavior = isLoadingExistingContent
          ? "instant"
          : "smooth";

        // Defer scroll to next tick for layout to settle
        const timeoutId = setTimeout(() => {
          let targetScrollTop: number;

          // When loading an existing conversation, scroll to bottom
          // Otherwise (e.g., anchor change during conversation), scroll to anchor
          if (isLoadingExistingContent) {
            targetScrollTop = container.scrollHeight - container.clientHeight;
          } else {
            targetScrollTop = Math.max(
              0,
              anchorElement.offsetTop - anchorOffsetPx
            );
          }

          container.scrollTo({ top: targetScrollTop, behavior });

          // Update prevScrollTopRef so scroll direction is measured from new position
          prevScrollTopRef.current = targetScrollTop;

          updateScrollState();

          // Mark as "at bottom" after scrolling to bottom so auto-scroll continues
          if (isLoadingExistingContent || autoScrollRef.current) {
            isAtBottomRef.current = true;
          }

          setIsScrollReady(true);
          scrolledForSessionRef.current = sessionId ?? null;
          prevAnchorSelectorRef.current = anchorSelector ?? null;
        }, 0);

        return () => clearTimeout(timeoutId);
      }, [sessionId, anchorSelector, anchorOffsetPx, updateScrollState]);

      // Build mask to fade content opacity at edges
      const contentMask = buildContentMask();

      return (
        <div className="flex flex-col flex-1 min-h-0 w-full relative overflow-hidden mb-1">
          <div
            key={sessionId}
            ref={scrollContainerRef}
            data-testid="chat-scroll-container"
            className={cn(
              "flex flex-col flex-1 min-h-0 overflow-y-auto overflow-x-hidden",
              hideScrollbar ? "no-scrollbar" : "default-scrollbar"
            )}
            onScroll={handleScroll}
            style={{
              scrollbarGutter: "stable both-edges",
              // Apply mask to fade content opacity at edges
              maskImage: contentMask,
              WebkitMaskImage: contentMask,
            }}
          >
            <div
              ref={contentWrapperRef}
              className="w-full flex-1 flex flex-col items-center px-4"
              data-scroll-ready={isScrollReady}
              style={{
                visibility: isScrollReady ? "visible" : "hidden",
              }}
            >
              <ScrollContainerProvider
                scrollContainerRef={scrollContainerRef}
                contentWrapperRef={contentWrapperRef}
                spacerHeightRef={spacerHeightRef}
              >
                {children}
              </ScrollContainerProvider>

              {/* End marker to measure content end */}
              <div ref={endDivRef} />
            </div>
          </div>
        </div>
      );
    }
  )
);

ChatScrollContainer.displayName = "ChatScrollContainer";

export default ChatScrollContainer;


================================================
FILE: web/src/sections/chat/ChatUI.tsx
================================================
"use client";

import React, { useCallback, useMemo, useRef } from "react";
import { Message } from "@/app/app/interfaces";
import { OnyxDocument, MinimalOnyxDocument } from "@/lib/search/interfaces";
import HumanMessage from "@/app/app/message/HumanMessage";
import { ErrorBanner } from "@/app/app/message/Resubmit";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { LlmDescriptor, LlmManager } from "@/lib/hooks";
import AgentMessage from "@/app/app/message/messageComponents/AgentMessage";
import Spacer from "@/refresh-components/Spacer";
import DynamicBottomSpacer from "@/components/chat/DynamicBottomSpacer";
import {
  useCurrentMessageHistory,
  useCurrentMessageTree,
  useLoadingError,
  useUncaughtError,
} from "@/app/app/stores/useChatSessionStore";

export interface ChatUIProps {
  liveAgent: MinimalPersonaSnapshot;
  llmManager: LlmManager;
  setPresentingDocument: (doc: MinimalOnyxDocument | null) => void;
  onMessageSelection: (nodeId: number) => void;
  stopGenerating: () => void;

  // Submit handlers
  onSubmit: (args: {
    message: string;
    messageIdToResend?: number;
    currentMessageFiles: any[];
    deepResearch: boolean;
    modelOverride?: LlmDescriptor;
    regenerationRequest?: {
      messageId: number;
      parentMessage: Message;
      forceSearch?: boolean;
    };
    forceSearch?: boolean;
  }) => Promise<void>;
  deepResearchEnabled: boolean;
  currentMessageFiles: any[];

  onResubmit: () => void;

  /**
   * Node ID of the message to use as scroll anchor.
   * Used by DynamicBottomSpacer to position the push-up effect.
   */
  anchorNodeId?: number;
}

const ChatUI = React.memo(
  ({
    liveAgent,
    llmManager,
    setPresentingDocument,
    onMessageSelection,
    stopGenerating,
    onSubmit,
    deepResearchEnabled,
    currentMessageFiles,
    onResubmit,
    anchorNodeId,
  }: ChatUIProps) => {
    // Get messages and error state from store
    const messages = useCurrentMessageHistory();
    const messageTree = useCurrentMessageTree();
    const error = useUncaughtError();
    const loadError = useLoadingError();
    // Stable fallbacks to avoid changing prop identities on each render
    const emptyDocs = useMemo<OnyxDocument[]>(() => [], []);
    const emptyChildrenIds = useMemo<number[]>(() => [], []);

    // Use refs to keep callbacks stable while always using latest values
    const onSubmitRef = useRef(onSubmit);
    const deepResearchEnabledRef = useRef(deepResearchEnabled);
    const currentMessageFilesRef = useRef(currentMessageFiles);
    onSubmitRef.current = onSubmit;
    deepResearchEnabledRef.current = deepResearchEnabled;
    currentMessageFilesRef.current = currentMessageFiles;

    const createRegenerator = useCallback(
      (regenerationRequest: {
        messageId: number;
        parentMessage: Message;
        forceSearch?: boolean;
      }) => {
        return async function (modelOverride: LlmDescriptor) {
          return await onSubmitRef.current({
            message: regenerationRequest.parentMessage.message,
            currentMessageFiles: currentMessageFilesRef.current,
            deepResearch: deepResearchEnabledRef.current,
            modelOverride,
            messageIdToResend: regenerationRequest.parentMessage.messageId,
            regenerationRequest,
            forceSearch: regenerationRequest.forceSearch,
          });
        };
      },
      []
    );

    const handleEditWithMessageId = useCallback(
      (editedContent: string, msgId: number) => {
        onSubmitRef.current({
          message: editedContent,
          messageIdToResend: msgId,
          currentMessageFiles: [],
          deepResearch: deepResearchEnabledRef.current,
        });
      },
      []
    );

    return (
      <>
        <div className="flex flex-col w-full max-w-[var(--app-page-main-content-width)] h-full pt-4 pb-8 pr-1 gap-12">
          {messages.map((message, i) => {
            const messageReactComponentKey = `message-${message.nodeId}`;
            const parentMessage = message.parentNodeId
              ? messageTree?.get(message.parentNodeId)
              : null;
            if (message.type === "user") {
              const nextMessage =
                messages.length > i + 1 ? messages[i + 1] : null;

              return (
                <div
                  id={messageReactComponentKey}
                  key={messageReactComponentKey}
                >
                  <HumanMessage
                    disableSwitchingForStreaming={
                      (nextMessage && nextMessage.is_generating) || false
                    }
                    stopGenerating={stopGenerating}
                    content={message.message}
                    files={message.files}
                    messageId={message.messageId}
                    nodeId={message.nodeId}
                    onEdit={handleEditWithMessageId}
                    otherMessagesCanSwitchTo={
                      parentMessage?.childrenNodeIds ?? emptyChildrenIds
                    }
                    onMessageSelection={onMessageSelection}
                  />
                </div>
              );
            } else if (message.type === "assistant") {
              if ((error || loadError) && i === messages.length - 1) {
                return (
                  <div key={`error-${message.nodeId}`} className="p-4">
                    <ErrorBanner
                      resubmit={onResubmit}
                      error={error || loadError || ""}
                      errorCode={message.errorCode || undefined}
                      isRetryable={message.isRetryable ?? true}
                      details={message.errorDetails || undefined}
                      stackTrace={message.stackTrace || undefined}
                    />
                  </div>
                );
              }

              const previousMessage = i !== 0 ? messages[i - 1] : null;
              const chatStateData = {
                agent: liveAgent,
                docs: message.documents ?? emptyDocs,
                citations: message.citations,
                setPresentingDocument,
                overriddenModel: llmManager.currentLlm?.modelName,
                researchType: message.researchType,
              };

              return (
                <div
                  id={`message-${message.nodeId}`}
                  key={messageReactComponentKey}
                >
                  <AgentMessage
                    rawPackets={message.packets}
                    packetCount={message.packetCount}
                    chatState={chatStateData}
                    nodeId={message.nodeId}
                    messageId={message.messageId}
                    currentFeedback={message.currentFeedback}
                    llmManager={llmManager}
                    otherMessagesCanSwitchTo={
                      parentMessage?.childrenNodeIds ?? emptyChildrenIds
                    }
                    onMessageSelection={onMessageSelection}
                    onRegenerate={createRegenerator}
                    parentMessage={previousMessage}
                    processingDurationSeconds={
                      message.processingDurationSeconds
                    }
                  />
                </div>
              );
            }
            return null;
          })}

          {/* Error banner when last message is user message or error type */}
          {(((error !== null || loadError !== null) &&
            messages[messages.length - 1]?.type === "user") ||
            messages[messages.length - 1]?.type === "error") && (
            <div className="p-4">
              <ErrorBanner
                resubmit={onResubmit}
                error={error || loadError || ""}
                errorCode={
                  messages[messages.length - 1]?.errorCode || undefined
                }
                isRetryable={messages[messages.length - 1]?.isRetryable ?? true}
                details={
                  messages[messages.length - 1]?.errorDetails || undefined
                }
                stackTrace={
                  messages[messages.length - 1]?.stackTrace || undefined
                }
              />
            </div>
          )}
        </div>
        {/* Dynamic spacer for "fresh chat" effect - pushes content up when new message is sent */}
        <DynamicBottomSpacer anchorNodeId={anchorNodeId} />
      </>
    );
  }
);
ChatUI.displayName = "ChatUI";

export default ChatUI;


================================================
FILE: web/src/sections/document-sidebar/ChatDocumentDisplay.tsx
================================================
import { SourceIcon } from "@/components/SourceIcon";
import { MinimalOnyxDocument, OnyxDocument } from "@/lib/search/interfaces";
import { FiTag } from "react-icons/fi";
import { buildDocumentSummaryDisplay } from "@/components/search/DocumentDisplay";
import { DocumentUpdatedAtBadge } from "@/components/search/DocumentUpdatedAtBadge";
import { MetadataBadge } from "@/components/MetadataBadge";
import { WebResultIcon } from "@/components/WebResultIcon";
import { Dispatch, SetStateAction, useMemo } from "react";
import { openDocument } from "@/lib/search/utils";
import { ValidSources } from "@/lib/types";
import { cn } from "@/lib/utils";
import Truncated from "@/refresh-components/texts/Truncated";
import Text from "@/refresh-components/texts/Text";

interface DocumentMetadataBlockProps {
  modal?: boolean;
  document: OnyxDocument;
}

function DocumentMetadataBlock({
  modal,
  document,
}: DocumentMetadataBlockProps) {
  const MAX_METADATA_ITEMS = 3;
  const metadataEntries = Object.entries(document.metadata);

  return (
    <div className="flex items-center overflow-hidden">
      {document.updated_at && (
        <DocumentUpdatedAtBadge updatedAt={document.updated_at} modal={modal} />
      )}

      {metadataEntries.length > 0 && (
        <>
          <div className="flex items-center overflow-hidden">
            {metadataEntries
              .slice(0, MAX_METADATA_ITEMS)
              .map(([key, value], index) => (
                <MetadataBadge
                  key={index}
                  icon={FiTag}
                  value={`${key}=${value}`}
                />
              ))}
            {metadataEntries.length > MAX_METADATA_ITEMS && (
              <span className="ml-1 text-xs text-text-500">...</span>
            )}
          </div>
        </>
      )}
    </div>
  );
}

export interface ChatDocumentDisplayProps {
  document: OnyxDocument;
  modal?: boolean;
  isSelected: boolean;
  setPresentingDocument: Dispatch<SetStateAction<MinimalOnyxDocument | null>>;
}

export default function ChatDocumentDisplay({
  document,
  modal,
  isSelected,
  setPresentingDocument,
}: ChatDocumentDisplayProps) {
  const isInternet = document.is_internet;
  const title = useMemo(
    () => document.semantic_identifier || document.document_id,
    [document.semantic_identifier, document.document_id]
  );

  if (document.score === null) {
    return null;
  }

  const hasMetadata =
    document.updated_at || Object.keys(document.metadata).length > 0;

  return (
    <div
      onClick={() => openDocument(document, setPresentingDocument)}
      className={cn(
        "flex w-full flex-col p-3 gap-2 rounded-12 hover:bg-background-tint-00 cursor-pointer",
        isSelected && "bg-action-link-02"
      )}
    >
      <div className="flex items-center gap-2">
        {document.is_internet || document.source_type === ValidSources.Web ? (
          <WebResultIcon url={document.link} />
        ) : (
          <SourceIcon sourceType={document.source_type} iconSize={18} />
        )}
        <Truncated className="line-clamp-2" side="left">
          {title}
        </Truncated>
      </div>

      {hasMetadata && (
        <DocumentMetadataBlock modal={modal} document={document} />
      )}

      <Text as="p" className="line-clamp-2 text-left" secondaryBody text03>
        {buildDocumentSummaryDisplay(document.match_highlights, document.blurb)}
      </Text>
    </div>
  );
}


================================================
FILE: web/src/sections/document-sidebar/DocumentsSidebar.tsx
================================================
"use client";

import { MinimalOnyxDocument, OnyxDocument } from "@/lib/search/interfaces";
import ChatDocumentDisplay from "@/sections/document-sidebar/ChatDocumentDisplay";
import { removeDuplicateDocs } from "@/lib/documentUtils";
import { Dispatch, SetStateAction, useMemo, memo } from "react";
import { getCitations } from "@/app/app/services/packetUtils";
import {
  useCurrentMessageTree,
  useSelectedNodeForDocDisplay,
} from "@/app/app/stores/useChatSessionStore";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import { SvgSearchMenu, SvgX } from "@opal/icons";
import Separator from "@/refresh-components/Separator";

// Build an OnyxDocument from basic file info
const buildOnyxDocumentFromFile = (
  id: string,
  name?: string | null,
  appendProjectPrefix?: boolean
): OnyxDocument => {
  const document_id = appendProjectPrefix ? `project_file__${id}` : id;
  return {
    document_id,
    semantic_identifier: name || id,
    link: "",
    source_type: "file" as any,
    blurb: "",
    boost: 0,
    hidden: false,
    score: 1,
    chunk_ind: 0,
    match_highlights: [],
    metadata: {},
    updated_at: null,
    is_internet: false,
  } as any;
};

interface HeaderProps {
  children: string;
  onClose: () => void;
}

function Header({ children, onClose }: HeaderProps) {
  return (
    <div className="sticky top-0 z-sticky bg-background-tint-01">
      <div className="flex flex-row w-full items-center justify-between gap-2 py-3">
        <div className="flex items-center gap-2 w-full px-3">
          <SvgSearchMenu className="w-[1.3rem] h-[1.3rem] stroke-text-03" />
          <Text as="p" headingH3 text03>
            {children}
          </Text>
        </div>
        <Button
          icon={SvgX}
          prominence="tertiary"
          onClick={onClose}
          tooltip="Close Sidebar"
        />
      </div>
      <Separator noPadding />
    </div>
  );
}

interface ChatDocumentDisplayWrapperProps {
  children?: React.ReactNode;
}

function ChatDocumentDisplayWrapper({
  children,
}: ChatDocumentDisplayWrapperProps) {
  return (
    <div className="flex flex-col gap-1 items-center justify-center">
      {children}
    </div>
  );
}

interface DocumentsSidebarProps {
  closeSidebar: () => void;
  selectedDocuments: OnyxDocument[] | null;
  modal: boolean;
  setPresentingDocument: Dispatch<SetStateAction<MinimalOnyxDocument | null>>;
}

const DocumentsSidebar = memo(
  ({
    closeSidebar,
    modal,
    selectedDocuments,
    setPresentingDocument,
  }: DocumentsSidebarProps) => {
    const idOfMessageToDisplay = useSelectedNodeForDocDisplay();
    const currentMessageTree = useCurrentMessageTree();

    const selectedMessage = idOfMessageToDisplay
      ? currentMessageTree?.get(idOfMessageToDisplay)
      : null;

    // Get citations in order and build a set of cited document IDs
    const { citedDocumentIds, citationOrder } = useMemo(() => {
      if (!selectedMessage) {
        return {
          citedDocumentIds: new Set<string>(),
          citationOrder: new Map<string, number>(),
        };
      }

      const citedDocumentIds = new Set<string>();
      const citationOrder = new Map<string, number>();
      const citations = getCitations(selectedMessage.packets);
      citations.forEach((citation, index) => {
        citedDocumentIds.add(citation.document_id);
        // Only set the order for the first occurrence
        if (!citationOrder.has(citation.document_id)) {
          citationOrder.set(citation.document_id, index);
        }
      });
      return { citedDocumentIds, citationOrder };
    }, [idOfMessageToDisplay, selectedMessage?.packets.length]);

    // if these are missing for some reason, then nothing we can do. Just
    // don't render.
    // TODO: improve this display
    if (!selectedMessage || !currentMessageTree) return null;

    const humanMessage = selectedMessage.parentNodeId
      ? currentMessageTree.get(selectedMessage.parentNodeId)
      : null;
    const humanFileDescriptors = humanMessage?.files.filter(
      (file) => file.user_file_id !== null
    );
    const selectedDocumentIds =
      selectedDocuments?.map((document) => document.document_id) || [];
    const currentDocuments = selectedMessage.documents || null;
    const dedupedDocuments = removeDuplicateDocs(currentDocuments || []);
    const citedDocuments = dedupedDocuments
      .filter(
        (doc) =>
          doc.document_id !== null &&
          doc.document_id !== undefined &&
          citedDocumentIds.has(doc.document_id)
      )
      .sort((a, b) => {
        // Sort by citation order (order citations appeared in the answer)
        const orderA = citationOrder.get(a.document_id) ?? Infinity;
        const orderB = citationOrder.get(b.document_id) ?? Infinity;
        return orderA - orderB;
      });
    const otherDocuments = dedupedDocuments.filter(
      (doc) =>
        doc.document_id === null ||
        doc.document_id === undefined ||
        !citedDocumentIds.has(doc.document_id)
    );
    const hasCited = citedDocuments.length > 0;
    const hasOther = otherDocuments.length > 0;

    return (
      <div
        id="onyx-chat-sidebar"
        className="bg-background-tint-01 overflow-y-scroll h-full w-full border-l"
      >
        <div className="flex flex-col px-3 gap-6">
          {hasCited && (
            <div>
              <Header onClose={closeSidebar}>Cited Sources</Header>
              <ChatDocumentDisplayWrapper>
                {citedDocuments.map((document) => (
                  <ChatDocumentDisplay
                    key={document.document_id}
                    setPresentingDocument={setPresentingDocument}
                    modal={modal}
                    document={document}
                    isSelected={selectedDocumentIds.includes(
                      document.document_id
                    )}
                  />
                ))}
              </ChatDocumentDisplayWrapper>
            </div>
          )}

          {hasOther && (
            <div>
              <Header onClose={closeSidebar}>
                {citedDocuments.length > 0 ? "More" : "Found Sources"}
              </Header>
              <ChatDocumentDisplayWrapper>
                {otherDocuments.map((document) => (
                  <ChatDocumentDisplay
                    key={document.document_id}
                    setPresentingDocument={setPresentingDocument}
                    modal={modal}
                    document={document}
                    isSelected={selectedDocumentIds.includes(
                      document.document_id
                    )}
                  />
                ))}
              </ChatDocumentDisplayWrapper>
            </div>
          )}

          {humanFileDescriptors && humanFileDescriptors.length > 0 && (
            <div>
              <Header onClose={closeSidebar}>User Files</Header>
              <ChatDocumentDisplayWrapper>
                {humanFileDescriptors.map((file) => (
                  <ChatDocumentDisplay
                    key={file.id}
                    setPresentingDocument={setPresentingDocument}
                    modal={modal}
                    document={buildOnyxDocumentFromFile(
                      file.id,
                      file.name,
                      false
                    )}
                    isSelected={false}
                  />
                ))}
              </ChatDocumentDisplayWrapper>
            </div>
          )}
        </div>
      </div>
    );
  }
);
DocumentsSidebar.displayName = "DocumentsSidebar";

export default DocumentsSidebar;


================================================
FILE: web/src/sections/input/AppInputBar.tsx
================================================
"use client";

import React, {
  useCallback,
  useContext,
  useEffect,
  useMemo,
  useRef,
  useState,
} from "react";
import LineItem from "@/refresh-components/buttons/LineItem";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import LLMPopover from "@/refresh-components/popovers/LLMPopover";
import { InputPrompt } from "@/app/app/interfaces";
import { FilterManager, LlmManager, useFederatedConnectors } from "@/lib/hooks";
import usePromptShortcuts from "@/hooks/usePromptShortcuts";
import useFilter from "@/hooks/useFilter";
import useCCPairs from "@/hooks/useCCPairs";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import { ChatState } from "@/app/app/interfaces";
import { useForcedTools } from "@/lib/hooks/useForcedTools";
import useAppFocus from "@/hooks/useAppFocus";
import { getPastedFilesIfNoText } from "@/lib/clipboard";
import { cn, isImageFile } from "@/lib/utils";
import { Disabled } from "@opal/core";
import { useUser } from "@/providers/UserProvider";
import {
  SettingsContext,
  useVectorDbEnabled,
} from "@/providers/SettingsProvider";
import { useProjectsContext } from "@/providers/ProjectsContext";
import { FileCard } from "@/sections/cards/FileCard";
import {
  ProjectFile,
  UserFileStatus,
} from "@/app/app/projects/projectsService";
import FilePickerPopover from "@/refresh-components/popovers/FilePickerPopover";
import ActionsPopover from "@/refresh-components/popovers/ActionsPopover";
import {
  getIconForAction,
  hasSearchToolsAvailable,
} from "@/app/app/services/actionUtils";
import {
  SvgArrowUp,
  SvgGlobe,
  SvgHourglass,
  SvgMicrophone,
  SvgPlus,
  SvgPlusCircle,
  SvgSearch,
  SvgStop,
  SvgX,
} from "@opal/icons";
import { Button, SelectButton } from "@opal/components";
import Popover from "@/refresh-components/Popover";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { useQueryController } from "@/providers/QueryControllerProvider";
import { Section } from "@/layouts/general-layouts";
import Spacer from "@/refresh-components/Spacer";
import MicrophoneButton from "@/sections/input/MicrophoneButton";
import Waveform from "@/components/voice/Waveform";
import { useVoiceMode } from "@/providers/VoiceModeProvider";
import { useVoiceStatus } from "@/hooks/useVoiceStatus";

const MIN_INPUT_HEIGHT = 44;
const MAX_INPUT_HEIGHT = 200;

export interface AppInputBarHandle {
  reset: () => void;
  focus: () => void;
}

export interface AppInputBarProps {
  initialMessage?: string;
  stopGenerating: () => void;
  onSubmit: (message: string) => void;
  llmManager: LlmManager;
  chatState: ChatState;
  currentSessionFileTokenCount: number;
  availableContextTokens: number;

  // agents
  selectedAgent: MinimalPersonaSnapshot | undefined;

  handleFileUpload: (files: File[]) => void;
  filterManager: FilterManager;
  deepResearchEnabled: boolean;
  setPresentingDocument?: (document: MinimalOnyxDocument) => void;
  toggleDeepResearch: () => void;
  disabled: boolean;
  ref?: React.Ref<AppInputBarHandle>;
  // Side panel tab reading
  tabReadingEnabled?: boolean;
  currentTabUrl?: string | null;
  onToggleTabReading?: () => void;
}

const AppInputBar = React.memo(
  ({
    filterManager,
    initialMessage = "",
    stopGenerating,
    onSubmit,
    chatState,
    currentSessionFileTokenCount,
    availableContextTokens,
    selectedAgent,

    handleFileUpload,
    llmManager,
    deepResearchEnabled,
    toggleDeepResearch,
    setPresentingDocument,
    disabled,
    ref,
    tabReadingEnabled,
    currentTabUrl,
    onToggleTabReading,
  }: AppInputBarProps) => {
    // Internal message state - kept local to avoid parent re-renders on every keystroke
    const [message, setMessage] = useState(initialMessage);
    const [isRecording, setIsRecording] = useState(false);
    const [recordingCycleCount, setRecordingCycleCount] = useState(0);
    const [isMuted, setIsMuted] = useState(false);
    const [audioLevel, setAudioLevel] = useState(0);
    const stopRecordingRef = useRef<(() => Promise<string | null>) | null>(
      null
    );
    const setMutedRef = useRef<((muted: boolean) => void) | null>(null);
    const textAreaRef = useRef<HTMLTextAreaElement>(null);
    const textAreaWrapperRef = useRef<HTMLDivElement>(null);
    const filesWrapperRef = useRef<HTMLDivElement>(null);
    const filesContentRef = useRef<HTMLDivElement>(null);
    const containerRef = useRef<HTMLDivElement>(null);
    const { user, isAdmin } = useUser();
    const { state } = useQueryController();
    const isClassifying = state.phase === "classifying";
    const isSearchActive =
      state.phase === "searching" || state.phase === "search-results";
    const {
      stopTTS,
      isTTSPlaying,
      isManualTTSPlaying,
      isTTSLoading,
      isAwaitingAutoPlaybackStart,
      isTTSMuted,
      toggleTTSMute,
    } = useVoiceMode();
    const { sttEnabled } = useVoiceStatus();
    // Show mic button: always if STT configured, or greyed-out for admins to prompt setup
    const showMicButton = sttEnabled || isAdmin;
    const isVoicePlaybackActive =
      isTTSPlaying || isTTSLoading || isAwaitingAutoPlaybackStart;
    const isVoicePlaybackControllable = isVoicePlaybackActive && !isRecording;
    const isTTSActuallySpeaking = isTTSPlaying || isManualTTSPlaying;
    const appFocus = useAppFocus();
    const isNewSession = appFocus.isNewSession();
    const appMode = state.phase === "idle" ? state.appMode : undefined;
    const isSearchMode =
      (isNewSession && appMode === "search") || isSearchActive;

    const handleRecordingChange = useCallback((nextIsRecording: boolean) => {
      setIsRecording((prevIsRecording) => {
        if (!prevIsRecording && nextIsRecording) {
          setRecordingCycleCount((count) => count + 1);
        }
        return nextIsRecording;
      });
    }, []);

    // Wrapper for onSubmit that stops TTS first to prevent overlapping voices
    const handleSubmit = useCallback(
      (text: string) => {
        stopTTS();
        onSubmit(text);
      },
      [stopTTS, onSubmit]
    );
    const submitMessage = useCallback(
      (text: string) => {
        if (!text.trim()) {
          return;
        }
        handleSubmit(text);
      },
      [handleSubmit]
    );

    // Expose reset and focus methods to parent via ref
    React.useImperativeHandle(ref, () => ({
      reset: () => {
        setMessage("");
      },
      focus: () => {
        textAreaRef.current?.focus();
      },
    }));

    // Sync non-empty prop changes to internal state (e.g. NRFPage reads URL params
    // after mount). Intentionally skips empty strings — clearing is handled via the
    // imperative ref.reset() method, not by passing initialMessage="".
    useEffect(() => {
      if (initialMessage) {
        setMessage(initialMessage);
      }
    }, [initialMessage]);
    const shouldShowRecordingWaveformBelow =
      isRecording &&
      !isVoicePlaybackActive &&
      (isNewSession || recordingCycleCount === 1);

    useEffect(() => {
      if (isNewSession && !initialMessage) {
        setMessage("");
      }
    }, [isNewSession, initialMessage]);

    const { forcedToolIds, setForcedToolIds } = useForcedTools();
    const { currentMessageFiles, setCurrentMessageFiles, currentProjectId } =
      useProjectsContext();

    const currentIndexingFiles = useMemo(() => {
      return currentMessageFiles.filter(
        (file) => file.status === UserFileStatus.PROCESSING
      );
    }, [currentMessageFiles]);

    const hasUploadingFiles = useMemo(() => {
      return currentMessageFiles.some(
        (file) => file.status === UserFileStatus.UPLOADING
      );
    }, [currentMessageFiles]);

    // Convert ProjectFile to MinimalOnyxDocument format for viewing
    const handleFileClick = useCallback(
      (file: ProjectFile) => {
        if (!setPresentingDocument) return;

        const documentForViewer: MinimalOnyxDocument = {
          document_id: `project_file__${file.file_id}`,
          semantic_identifier: file.name,
        };

        setPresentingDocument(documentForViewer);
      },
      [setPresentingDocument]
    );

    const handleUploadChange = useCallback(
      async (e: React.ChangeEvent<HTMLInputElement>) => {
        const files = e.target.files;
        if (!files || files.length === 0) return;
        handleFileUpload(Array.from(files));
        e.target.value = "";
      },
      [handleFileUpload]
    );

    const combinedSettings = useContext(SettingsContext);

    // TODO(@raunakab): Replace this useEffect with CSS `field-sizing: content` once
    // Firefox ships it unflagged (currently behind `layout.css.field-sizing.enabled`).
    // Auto-resize textarea based on content (chat mode only).
    // Reset to min-height first so scrollHeight reflects actual content size,
    // then clamp between min and max. This handles both growing and shrinking.
    useEffect(() => {
      const wrapper = textAreaWrapperRef.current;
      const textarea = textAreaRef.current;
      if (!wrapper || !textarea) return;

      // Reset so scrollHeight reflects actual content size
      wrapper.style.height = `${MIN_INPUT_HEIGHT}px`;

      // scrollHeight doesn't include the wrapper's padding, so add it back
      const wrapperStyle = getComputedStyle(wrapper);
      const paddingTop = parseFloat(wrapperStyle.paddingTop);
      const paddingBottom = parseFloat(wrapperStyle.paddingBottom);
      const contentHeight = textarea.scrollHeight + paddingTop + paddingBottom;

      wrapper.style.height = `${Math.min(
        Math.max(contentHeight, MIN_INPUT_HEIGHT),
        MAX_INPUT_HEIGHT
      )}px`;
    }, [message, isSearchMode]);

    // Animate attached files wrapper to its content height so CSS transitions
    // can interpolate between concrete pixel values (0px ↔ Npx).
    const showFiles = !isSearchMode && currentMessageFiles.length > 0;
    useEffect(() => {
      const wrapper = filesWrapperRef.current;
      const content = filesContentRef.current;
      if (!wrapper || !content) return;

      if (showFiles) {
        // Measure the inner content's actual height, then add padding (p-1 = 8px total)
        const PADDING = 8;
        wrapper.style.height = `${content.offsetHeight + PADDING}px`;
      } else {
        wrapper.style.height = "0px";
      }
    }, [showFiles, currentMessageFiles]);

    function handlePaste(event: React.ClipboardEvent) {
      const pastedFiles = getPastedFilesIfNoText(event.clipboardData);
      if (pastedFiles.length > 0) {
        event.preventDefault();
        handleFileUpload(pastedFiles);
      }
    }

    const handleRemoveMessageFile = useCallback(
      (fileId: string) => {
        setCurrentMessageFiles((prev) => prev.filter((f) => f.id !== fileId));
      },
      [setCurrentMessageFiles]
    );

    const { activePromptShortcuts } = usePromptShortcuts();
    const vectorDbEnabled = useVectorDbEnabled();
    const { ccPairs, isLoading: ccPairsLoading } = useCCPairs(vectorDbEnabled);
    const { data: federatedConnectorsData, isLoading: federatedLoading } =
      useFederatedConnectors();

    // Bottom controls are hidden until all data is loaded
    const controlsLoading =
      ccPairsLoading ||
      federatedLoading ||
      !selectedAgent ||
      llmManager.isLoadingProviders;
    const [showPrompts, setShowPrompts] = useState(false);

    // Memoize availableSources to prevent unnecessary re-renders
    const memoizedAvailableSources = useMemo(
      () => [
        ...ccPairs.map((ccPair) => ccPair.source),
        ...(federatedConnectorsData?.map((connector) => connector.source) ||
          []),
      ],
      [ccPairs, federatedConnectorsData]
    );

    const [tabbingIconIndex, setTabbingIconIndex] = useState(0);

    const hidePrompts = useCallback(() => {
      setTimeout(() => {
        setShowPrompts(false);
      }, 50);
      setTabbingIconIndex(0);
    }, []);

    function updateInputPrompt(prompt: InputPrompt) {
      hidePrompts();
      setMessage(`${prompt.content}`);
    }

    const { filtered: filteredPrompts, setQuery: setPromptFilterQuery } =
      useFilter(activePromptShortcuts, (prompt) => prompt.prompt);

    // Memoize sorted prompts to avoid re-sorting on every render
    const sortedFilteredPrompts = useMemo(
      () => [...filteredPrompts].sort((a, b) => a.id - b.id),
      [filteredPrompts]
    );

    // Reset tabbingIconIndex when filtered prompts change to avoid out-of-bounds
    useEffect(() => {
      setTabbingIconIndex(0);
    }, [filteredPrompts]);

    const handlePromptInput = useCallback(
      (text: string) => {
        if (text.startsWith("/")) {
          setShowPrompts(true);
        } else {
          hidePrompts();
        }
      },
      [hidePrompts]
    );

    const handleInputChange = useCallback(
      (event: React.ChangeEvent<HTMLTextAreaElement>) => {
        const text = event.target.value;
        setMessage(text);
        handlePromptInput(text);

        const promptFilterQuery = text.startsWith("/") ? text.slice(1) : "";
        setPromptFilterQuery(promptFilterQuery);
      },
      [setMessage, handlePromptInput, setPromptFilterQuery]
    );

    // Determine if we should hide processing state based on context limits
    const hideProcessingState = useMemo(() => {
      if (currentMessageFiles.length > 0 && currentIndexingFiles.length > 0) {
        const currentFilesTokenTotal = currentMessageFiles.reduce(
          (acc, file) => acc + (file.token_count || 0),
          0
        );
        const totalTokens =
          (currentSessionFileTokenCount || 0) + currentFilesTokenTotal;
        // Hide processing state when files are within context limits
        return totalTokens < availableContextTokens;
      }
      return false;
    }, [
      currentMessageFiles,
      currentSessionFileTokenCount,
      currentIndexingFiles,
      availableContextTokens,
    ]);

    const shouldCompactImages = useMemo(() => {
      return currentMessageFiles.length > 1;
    }, [currentMessageFiles]);

    const hasImageFiles = useMemo(
      () => currentMessageFiles.some((f) => isImageFile(f.name)),
      [currentMessageFiles]
    );

    // Check if the agent has search tools available (internal search or web search)
    // AND if deep research is globally enabled in admin settings
    const showDeepResearch = useMemo(() => {
      const deepResearchGloballyEnabled =
        combinedSettings?.settings?.deep_research_enabled ?? true;
      const isProjectWorkflow = currentProjectId !== null;

      // TODO(@yuhong): Re-enable Deep Research in Projects workflow once it is fully supported.
      // https://linear.app/onyx-app/issue/ENG-3818/re-enable-deep-research-in-projects
      return (
        !isProjectWorkflow &&
        deepResearchGloballyEnabled &&
        hasSearchToolsAvailable(selectedAgent?.tools || [])
      );
    }, [
      selectedAgent?.tools,
      combinedSettings?.settings?.deep_research_enabled,
      currentProjectId,
    ]);

    function handleKeyDownForPromptShortcuts(
      e: React.KeyboardEvent<HTMLTextAreaElement>
    ) {
      if (!user?.preferences?.shortcut_enabled || !showPrompts) return;

      if (e.key === "Enter") {
        e.preventDefault();
        if (tabbingIconIndex === sortedFilteredPrompts.length) {
          // "Create a new prompt" is selected
          window.open("/app/settings/chat-preferences", "_self");
        } else {
          const selectedPrompt = sortedFilteredPrompts[tabbingIconIndex];
          if (selectedPrompt) {
            updateInputPrompt(selectedPrompt);
          }
        }
      } else if (e.key === "Tab" && e.shiftKey) {
        // Shift+Tab: cycle backward
        e.preventDefault();
        setTabbingIconIndex((prev) => Math.max(prev - 1, 0));
      } else if (e.key === "Tab") {
        // Tab: cycle forward
        e.preventDefault();
        setTabbingIconIndex((prev) =>
          Math.min(prev + 1, sortedFilteredPrompts.length)
        );
      } else if (e.key === "ArrowDown") {
        e.preventDefault();
        setTabbingIconIndex((prev) =>
          Math.min(prev + 1, sortedFilteredPrompts.length)
        );
      } else if (e.key === "ArrowUp") {
        e.preventDefault();
        setTabbingIconIndex((prev) => Math.max(prev - 1, 0));
      }
    }

    const chatControls = (
      <div
        {...(isSearchMode ? { inert: true } : {})}
        className={cn(
          "flex justify-between items-center w-full",
          isSearchMode
            ? "opacity-0 p-0 h-0 overflow-hidden pointer-events-none"
            : "opacity-100 p-1 h-[2.75rem] pointer-events-auto",
          "transition-all duration-150"
        )}
      >
        {/* Bottom left controls */}
        <div className="flex flex-row items-center">
          {/* (+) button - always visible */}
          <FilePickerPopover
            onFileClick={handleFileClick}
            onPickRecent={(file: ProjectFile) => {
              // Check if file with same ID already exists
              if (
                !currentMessageFiles.some(
                  (existingFile) => existingFile.file_id === file.file_id
                )
              ) {
                setCurrentMessageFiles((prev) => [...prev, file]);
              }
            }}
            onUnpickRecent={(file: ProjectFile) => {
              setCurrentMessageFiles((prev) =>
                prev.filter(
                  (existingFile) => existingFile.file_id !== file.file_id
                )
              );
            }}
            handleUploadChange={handleUploadChange}
            trigger={(open) => (
              <Button
                disabled={disabled}
                icon={SvgPlusCircle}
                tooltip="Attach Files"
                interaction={open ? "hover" : "rest"}
                prominence="tertiary"
              />
            )}
            selectedFileIds={currentMessageFiles.map((f) => f.id)}
          />

          {/* Controls that load in when data is ready */}
          <div
            data-testid="actions-container"
            className={cn(
              "flex flex-row items-center",
              controlsLoading && "invisible"
            )}
          >
            {selectedAgent && selectedAgent.tools.length > 0 && (
              <ActionsPopover
                selectedAgent={selectedAgent}
                filterManager={filterManager}
                availableSources={memoizedAvailableSources}
                disabled={disabled}
              />
            )}
            {onToggleTabReading ? (
              <SelectButton
                disabled={disabled}
                icon={SvgGlobe}
                onClick={onToggleTabReading}
                state={tabReadingEnabled ? "selected" : "empty"}
              >
                {tabReadingEnabled
                  ? currentTabUrl
                    ? (() => {
                        try {
                          return new URL(currentTabUrl).hostname;
                        } catch {
                          return currentTabUrl;
                        }
                      })()
                    : "Reading tab..."
                  : "Read this tab"}
              </SelectButton>
            ) : (
              showDeepResearch && (
                <SelectButton
                  disabled={disabled}
                  variant="select-light"
                  icon={SvgHourglass}
                  onClick={toggleDeepResearch}
                  state={deepResearchEnabled ? "selected" : "empty"}
                  foldable={!deepResearchEnabled}
                >
                  Deep Research
                </SelectButton>
              )
            )}

            {selectedAgent &&
              forcedToolIds.length > 0 &&
              forcedToolIds.map((toolId) => {
                const tool = selectedAgent.tools.find(
                  (tool) => tool.id === toolId
                );
                if (!tool) {
                  return null;
                }
                return (
                  <Disabled disabled={disabled} key={toolId}>
                    <SelectButton
                      variant="select-light"
                      icon={getIconForAction(tool)}
                      onClick={() => {
                        setForcedToolIds(
                          forcedToolIds.filter((id) => id !== toolId)
                        );
                      }}
                      state="selected"
                    >
                      {tool.display_name}
                    </SelectButton>
                  </Disabled>
                );
              })}
          </div>
        </div>

        {/* Bottom right controls */}
        <div className="flex flex-row items-center gap-1">
          <div
            data-testid="AppInputBar/llm-popover-trigger"
            className={cn(controlsLoading && "invisible")}
          >
            <LLMPopover
              llmManager={llmManager}
              requiresImageInput={hasImageFiles}
              disabled={disabled}
            />
          </div>
          {showMicButton &&
            (sttEnabled ? (
              <MicrophoneButton
                onTranscription={(text) => setMessage(text)}
                disabled={disabled || chatState === "streaming"}
                autoSend={user?.preferences?.voice_auto_send ?? false}
                autoListen={user?.preferences?.voice_auto_playback ?? false}
                isNewSession={isNewSession}
                chatState={chatState}
                onRecordingChange={handleRecordingChange}
                stopRecordingRef={stopRecordingRef}
                currentMessage={message}
                onRecordingStart={() => {}}
                onAutoSend={(text) => {
                  submitMessage(text);
                }}
                onMuteChange={setIsMuted}
                setMutedRef={setMutedRef}
                onAudioLevel={setAudioLevel}
              />
            ) : (
              <Button
                disabled
                icon={SvgMicrophone}
                aria-label="Set up voice"
                prominence="tertiary"
                tooltip="Voice not configured. Set up in admin settings."
              />
            ))}

          <Button
            disabled={
              (chatState === "input" &&
                !isVoicePlaybackControllable &&
                !message) ||
              hasUploadingFiles ||
              isClassifying
            }
            id="onyx-chat-input-send-button"
            icon={
              isClassifying
                ? SimpleLoader
                : chatState === "streaming" || isVoicePlaybackControllable
                  ? SvgStop
                  : SvgArrowUp
            }
            onClick={() => {
              if (chatState == "streaming") {
                stopTTS({ manual: true });
                stopGenerating();
              } else if (isVoicePlaybackControllable) {
                stopTTS({ manual: true });
              } else if (message) {
                submitMessage(message);
              }
            }}
          />
        </div>
      </div>
    );

    return (
      <Disabled disabled={disabled} allowClick>
        <div
          ref={containerRef}
          id="onyx-chat-input"
          className={cn(
            "relative w-full flex flex-col shadow-01 bg-background-neutral-00 rounded-16"
            // # Note (from @raunakab):
            //
            // `shadow-01` extends ~14px below the element (2px offset + 12px blur).
            // Because the content area in `Root` (app-layouts.tsx) uses `overflow-auto`,
            // shadows that exceed the container bounds are clipped.
            //
            // The 14px breathing room is now applied externally via animated spacer
            // divs in `AppPage.tsx` (above and below the AppInputBar) so that the
            // spacing can transition smoothly when switching between search and chat
            // modes. See the corresponding note there for details.
          )}
        >
          {/* Voice waveform overlay (positioned outside normal flow to avoid resizing input) */}
          {isTTSActuallySpeaking ? (
            <div className="absolute bottom-full mb-1 left-1 z-10">
              <Waveform
                variant="speaking"
                isActive={isTTSActuallySpeaking}
                isMuted={isTTSMuted}
                onMuteToggle={toggleTTSMute}
              />
            </div>
          ) : isRecording &&
            !isVoicePlaybackActive &&
            !shouldShowRecordingWaveformBelow ? (
            <div className="absolute bottom-full mb-1 left-1 right-1 z-10">
              <Waveform
                variant="recording"
                isActive={isRecording}
                isMuted={isMuted}
                audioLevel={audioLevel}
                onMuteToggle={() => {
                  setMutedRef.current?.(!isMuted);
                }}
              />
            </div>
          ) : null}

          {/* Attached Files */}
          <div
            ref={filesWrapperRef}
            {...(!showFiles ? { inert: true } : {})}
            className={cn(
              "transition-all duration-150",
              showFiles
                ? "opacity-100 p-1"
                : "opacity-0 p-0 overflow-hidden pointer-events-none"
            )}
          >
            <div ref={filesContentRef} className="flex flex-wrap gap-1">
              {currentMessageFiles.map((file) => (
                <FileCard
                  key={file.id}
                  file={file}
                  removeFile={handleRemoveMessageFile}
                  hideProcessingState={hideProcessingState}
                  onFileClick={handleFileClick}
                  compactImages={shouldCompactImages}
                />
              ))}
            </div>
          </div>

          <div className="flex flex-row items-center w-full">
            <Popover
              open={user?.preferences?.shortcut_enabled && showPrompts}
              onOpenChange={setShowPrompts}
            >
              <Popover.Anchor asChild>
                <div
                  ref={textAreaWrapperRef}
                  className="px-3 py-2 flex-1 flex h-[2.75rem]"
                >
                  <textarea
                    id="onyx-chat-input-textarea"
                    role="textarea"
                    ref={textAreaRef}
                    onPaste={handlePaste}
                    onKeyDownCapture={handleKeyDownForPromptShortcuts}
                    onChange={handleInputChange}
                    className={cn(
                      "p-[2px] w-full h-full outline-none bg-transparent resize-none placeholder:text-text-03 whitespace-pre-wrap break-words",
                      "overflow-y-auto"
                    )}
                    autoFocus
                    rows={1}
                    style={{ scrollbarWidth: "thin" }}
                    aria-multiline={true}
                    placeholder={
                      isRecording
                        ? "Listening..."
                        : isVoicePlaybackActive
                          ? "Onyx is speaking..."
                          : isSearchMode
                            ? "Search connected sources"
                            : "How can I help you today?"
                    }
                    value={message}
                    onKeyDown={(event) => {
                      if (
                        event.key === "Enter" &&
                        !showPrompts &&
                        !event.shiftKey &&
                        !(event.nativeEvent as any).isComposing
                      ) {
                        event.preventDefault();
                        if (
                          message &&
                          !disabled &&
                          !isClassifying &&
                          !hasUploadingFiles
                        ) {
                          submitMessage(message);
                        }
                      }
                    }}
                    suppressContentEditableWarning={true}
                    disabled={disabled}
                  />
                </div>
              </Popover.Anchor>

              <Popover.Content
                side="top"
                align="start"
                onOpenAutoFocus={(e) => e.preventDefault()}
                width="xl"
              >
                <Popover.Menu>
                  {[
                    ...sortedFilteredPrompts.map((prompt, index) => (
                      <LineItem
                        key={prompt.id}
                        selected={tabbingIconIndex === index}
                        emphasized={tabbingIconIndex === index}
                        description={prompt.content?.trim()}
                        onClick={() => updateInputPrompt(prompt)}
                      >
                        {prompt.prompt}
                      </LineItem>
                    )),
                    sortedFilteredPrompts.length > 0 ? null : undefined,
                    <LineItem
                      key="create-new"
                      href="/app/settings/chat-preferences"
                      icon={SvgPlus}
                      selected={
                        tabbingIconIndex === sortedFilteredPrompts.length
                      }
                      emphasized={
                        tabbingIconIndex === sortedFilteredPrompts.length
                      }
                    >
                      Create New Prompt
                    </LineItem>,
                  ]}
                </Popover.Menu>
              </Popover.Content>
            </Popover>

            {isSearchMode && (
              <Section flexDirection="row" width="fit" gap={0}>
                <Button
                  disabled={!message || isClassifying}
                  icon={SvgX}
                  onClick={() => setMessage("")}
                  prominence="tertiary"
                />
                <Button
                  disabled={!message || isClassifying || hasUploadingFiles}
                  id="onyx-chat-input-send-button"
                  icon={isClassifying ? SimpleLoader : SvgSearch}
                  onClick={() => {
                    if (chatState == "streaming") {
                      stopGenerating();
                    } else if (message) {
                      submitMessage(message);
                    }
                  }}
                  prominence="tertiary"
                />
                <Spacer horizontal rem={0.25} />
              </Section>
            )}
          </div>

          {chatControls}

          {/* First recording cycle waveform below input */}
          {shouldShowRecordingWaveformBelow && (
            <div className="absolute top-full mt-1 left-1 right-1 z-10">
              <Waveform
                variant="recording"
                isActive={isRecording}
                isMuted={isMuted}
                audioLevel={audioLevel}
                onMuteToggle={() => {
                  setMutedRef.current?.(!isMuted);
                }}
              />
            </div>
          )}
        </div>
      </Disabled>
    );
  }
);
AppInputBar.displayName = "AppInputBar";

export default AppInputBar;


================================================
FILE: web/src/sections/input/MicrophoneButton.tsx
================================================
"use client";

import { useCallback, useEffect, useRef } from "react";
import { Button } from "@opal/components";
import { SvgMicrophone } from "@opal/icons";
import { useVoiceRecorder } from "@/hooks/useVoiceRecorder";
import { useVoiceMode } from "@/providers/VoiceModeProvider";
import { toast } from "@/hooks/useToast";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { ChatState } from "@/app/app/interfaces";

interface MicrophoneButtonProps {
  onTranscription: (text: string) => void;
  disabled?: boolean;
  autoSend?: boolean;
  /** Called with transcribed text when autoSend is enabled */
  onAutoSend?: (text: string) => void;
  /**
   * Internal prop: auto-start listening when TTS finishes or chat response completes.
   * Tied to voice_auto_playback user preference.
   * Enables conversation flow: speak → AI responds → auto-listen again.
   * Note: autoSend is separate - it controls whether message auto-submits after recording.
   */
  autoListen?: boolean;
  /** Current chat state - used to detect when response streaming finishes */
  chatState?: ChatState;
  /** Called when recording state changes */
  onRecordingChange?: (isRecording: boolean) => void;
  /** Ref to expose stop recording function to parent */
  stopRecordingRef?: React.MutableRefObject<
    (() => Promise<string | null>) | null
  >;
  /** Called when recording starts */
  onRecordingStart?: () => void;
  /** Existing message text to prepend to transcription (append mode) */
  currentMessage?: string;
  /** Called when mute state changes */
  onMuteChange?: (isMuted: boolean) => void;
  /** Ref to expose setMuted function to parent */
  setMutedRef?: React.MutableRefObject<((muted: boolean) => void) | null>;
  /** Called with current microphone audio level (0-1) for waveform visualization */
  onAudioLevel?: (level: number) => void;
  /** Whether current chat is a new session (used to reset auto-listen arming) */
  isNewSession?: boolean;
}

function MicrophoneButton({
  onTranscription,
  disabled = false,
  autoSend = false,
  onAutoSend,
  autoListen = false,
  chatState,
  onRecordingChange,
  stopRecordingRef,
  onRecordingStart,
  currentMessage = "",
  onMuteChange,
  setMutedRef,
  onAudioLevel,
  isNewSession = false,
}: MicrophoneButtonProps) {
  const {
    isTTSPlaying,
    isTTSLoading,
    isAwaitingAutoPlaybackStart,
    manualStopCount,
  } = useVoiceMode();

  // Refs for tracking state across renders
  // Track whether TTS was actually playing audio (not just loading)
  const wasTTSActuallyPlayingRef = useRef(false);
  const manualStopRequestedRef = useRef(false);
  const lastHandledManualStopCountRef = useRef(manualStopCount);
  const autoListenCooldownTimerRef = useRef<NodeJS.Timeout | null>(null);
  const hasManualRecordStartRef = useRef(false);
  // Prevent late transcript events from repopulating input after auto-send.
  const suppressTranscriptUpdatesRef = useRef(false);
  // Snapshot of existing message text when recording starts (for append mode)
  const messagePrefixRef = useRef("");
  const currentMessageRef = useRef(currentMessage);

  useEffect(() => {
    currentMessageRef.current = currentMessage;
  }, [currentMessage]);

  // Helper to combine prefix with new transcript
  const withPrefix = useCallback((text: string) => {
    const prefix = messagePrefixRef.current;
    if (!prefix) return text;
    return prefix + (prefix.endsWith(" ") ? "" : " ") + text;
  }, []);

  // Handler for VAD (Voice Activity Detection) triggered auto-send.
  // VAD runs server-side in the STT provider and detects when the user stops speaking.
  const handleFinalTranscript = useCallback(
    (text: string) => {
      const combined = withPrefix(text);
      if (!suppressTranscriptUpdatesRef.current) {
        onTranscription(combined);
      }
      const isManualStop = manualStopRequestedRef.current;
      // Only auto-send if chat is ready for input (not streaming)
      if (!isManualStop && autoSend && onAutoSend && chatState === "input") {
        suppressTranscriptUpdatesRef.current = true;
        onAutoSend(combined);
        // Clear prefix after send to prevent stale text in next auto-listen cycle
        messagePrefixRef.current = "";
      }
    },
    [onTranscription, autoSend, onAutoSend, chatState, withPrefix]
  );

  const {
    isRecording,
    isProcessing,
    isMuted,
    error,
    liveTranscript,
    audioLevel,
    startRecording,
    stopRecording,
    setMuted,
  } = useVoiceRecorder({
    onFinalTranscript: handleFinalTranscript,
    autoStopOnSilence: autoSend,
  });

  // Expose stopRecording to parent
  useEffect(() => {
    if (stopRecordingRef) {
      stopRecordingRef.current = stopRecording;
    }
  }, [stopRecording, stopRecordingRef]);

  // Expose setMuted to parent
  useEffect(() => {
    if (setMutedRef) {
      setMutedRef.current = setMuted;
    }
  }, [setMuted, setMutedRef]);

  // Notify parent when mute state changes
  useEffect(() => {
    onMuteChange?.(isMuted);
  }, [isMuted, onMuteChange]);

  // Forward audio level to parent for waveform visualization
  useEffect(() => {
    onAudioLevel?.(audioLevel);
  }, [audioLevel, onAudioLevel]);

  // Notify parent when recording state changes
  useEffect(() => {
    onRecordingChange?.(isRecording);
  }, [isRecording, onRecordingChange]);

  // Update input with live transcript as user speaks (appending to existing text)
  useEffect(() => {
    if (
      isRecording &&
      liveTranscript &&
      !suppressTranscriptUpdatesRef.current
    ) {
      onTranscription(withPrefix(liveTranscript));
    }
  }, [isRecording, liveTranscript, onTranscription, withPrefix]);

  const handleClick = useCallback(async () => {
    if (isRecording) {
      // When recording, clicking the mic button stops recording
      manualStopRequestedRef.current = true;
      try {
        const finalTranscript = await stopRecording();
        if (finalTranscript) {
          const combined = withPrefix(finalTranscript);
          onTranscription(combined);
          if (
            autoSend &&
            onAutoSend &&
            chatState === "input" &&
            combined.trim()
          ) {
            onAutoSend(combined);
          }
        }
        messagePrefixRef.current = "";
      } finally {
        manualStopRequestedRef.current = false;
      }
    } else {
      try {
        // Snapshot existing text so transcription can append to it
        suppressTranscriptUpdatesRef.current = false;
        messagePrefixRef.current = currentMessage;
        onRecordingStart?.();
        await startRecording();
        // Arm auto-listen only after first manual mic start in this session.
        hasManualRecordStartRef.current = true;
      } catch (err) {
        console.error("Microphone access failed:", err);
        toast.error("Could not access microphone");
      }
    }
  }, [
    isRecording,
    startRecording,
    stopRecording,
    onRecordingStart,
    onTranscription,
    autoSend,
    onAutoSend,
    chatState,
    currentMessage,
    withPrefix,
  ]);

  // Auto-start listening shortly after TTS finishes (only if autoListen is enabled).
  // Small cooldown reduces playback bleed being re-captured by the microphone.
  // IMPORTANT: Only trigger auto-listen if TTS was actually playing audio,
  // not just loading. This prevents auto-listen from triggering when TTS fails.
  useEffect(() => {
    if (autoListenCooldownTimerRef.current) {
      clearTimeout(autoListenCooldownTimerRef.current);
      autoListenCooldownTimerRef.current = null;
    }

    const stoppedManually =
      manualStopCount !== lastHandledManualStopCountRef.current;

    // Only trigger auto-listen if TTS was actually playing (not just loading)
    if (
      wasTTSActuallyPlayingRef.current &&
      !isTTSPlaying &&
      !isTTSLoading &&
      !isAwaitingAutoPlaybackStart &&
      autoListen &&
      hasManualRecordStartRef.current &&
      !disabled &&
      !isRecording &&
      !stoppedManually
    ) {
      autoListenCooldownTimerRef.current = setTimeout(() => {
        autoListenCooldownTimerRef.current = null;
        if (
          !autoListen ||
          disabled ||
          isRecording ||
          isTTSPlaying ||
          isTTSLoading ||
          isAwaitingAutoPlaybackStart
        ) {
          return;
        }
        messagePrefixRef.current = currentMessageRef.current;
        startRecording().catch((err) => {
          console.error("Auto-start microphone failed:", err);
          toast.error("Could not auto-start microphone");
        });
      }, 400);
    }

    if (stoppedManually) {
      lastHandledManualStopCountRef.current = manualStopCount;
    }

    // Only track actual playback - not loading states
    // This ensures auto-listen only triggers after audio actually played
    if (isTTSPlaying) {
      wasTTSActuallyPlayingRef.current = true;
    } else if (!isTTSPlaying && !isTTSLoading && !isAwaitingAutoPlaybackStart) {
      // Reset when TTS is completely done
      wasTTSActuallyPlayingRef.current = false;
    }
  }, [
    isTTSPlaying,
    isTTSLoading,
    isAwaitingAutoPlaybackStart,
    autoListen,
    disabled,
    isRecording,
    startRecording,
    manualStopCount,
  ]);

  // New sessions must start with an explicit manual mic press.
  useEffect(() => {
    if (isNewSession) {
      hasManualRecordStartRef.current = false;
      suppressTranscriptUpdatesRef.current = false;
    }
  }, [isNewSession]);

  useEffect(() => {
    if (!isRecording) {
      suppressTranscriptUpdatesRef.current = false;
    }
  }, [isRecording]);

  useEffect(() => {
    return () => {
      if (autoListenCooldownTimerRef.current) {
        clearTimeout(autoListenCooldownTimerRef.current);
        autoListenCooldownTimerRef.current = null;
      }
    };
  }, []);

  useEffect(() => {
    if (error) {
      console.error("Voice recorder error:", error);
      toast.error(error);
    }
  }, [error]);

  // Icon: show loader when processing, otherwise mic
  const icon = isProcessing ? SimpleLoader : SvgMicrophone;

  // Disable when processing or TTS is playing (don't want to pick up TTS audio)
  const isDisabled =
    disabled ||
    isProcessing ||
    isTTSPlaying ||
    isTTSLoading ||
    isAwaitingAutoPlaybackStart;

  // Recording = darkened (primary), not recording = light (tertiary)
  const prominence = isRecording ? "primary" : "tertiary";

  return (
    <Button
      disabled={isDisabled}
      icon={icon}
      onClick={handleClick}
      aria-label={isRecording ? "Stop recording" : "Start recording"}
      prominence={prominence}
    />
  );
}

export default MicrophoneButton;


================================================
FILE: web/src/sections/input/SharedAppInputBar.tsx
================================================
"use client";

import Text from "@/refresh-components/texts/Text";
import { Button, OpenButton, SelectButton } from "@opal/components";
import { OpenAISVG } from "@/components/icons/icons";
import {
  SvgPlusCircle,
  SvgArrowUp,
  SvgSliders,
  SvgHourglass,
  SvgEditBig,
} from "@opal/icons";

export default function SharedAppInputBar() {
  return (
    <div className="relative w-full">
      <div className="w-full flex flex-col shadow-01 bg-background-neutral-00 rounded-16">
        {/* Textarea area */}
        <div className="flex flex-row items-center w-full">
          <Text text03 className="w-full px-3 pt-3 pb-2 select-none">
            How can Onyx help you today
          </Text>
        </div>

        {/* Bottom toolbar */}
        <div className="flex justify-between items-center w-full p-1 min-h-[40px]">
          {/* Left side controls */}
          <div className="flex flex-row items-center">
            <Button disabled icon={SvgPlusCircle} prominence="tertiary" />
            <Button disabled icon={SvgSliders} prominence="tertiary" />
            <SelectButton disabled icon={SvgHourglass} />
          </div>

          {/* Right side controls */}
          <div className="flex flex-row items-center gap-1">
            <OpenButton disabled icon={OpenAISVG}>
              GPT-4o
            </OpenButton>
            <Button disabled icon={SvgArrowUp} />
          </div>
        </div>
      </div>

      {/* Fade overlay */}
      <div className="absolute inset-0 rounded-16 backdrop-blur-sm bg-background-neutral-00/50" />

      {/* CTA button */}
      <div className="absolute inset-0 flex items-center justify-center">
        <Button prominence="secondary" icon={SvgEditBig} href="/app">
          Start New Session
        </Button>
      </div>
    </div>
  );
}


================================================
FILE: web/src/sections/knowledge/AgentKnowledgePane.tsx
================================================
"use client";

import React, {
  useState,
  useMemo,
  useRef,
  memo,
  useCallback,
  useEffect,
} from "react";
import * as GeneralLayouts from "@/layouts/general-layouts";
import { Content } from "@opal/layouts";
import * as TableLayouts from "@/layouts/table-layouts";
import * as InputLayouts from "@/layouts/input-layouts";
import { Card } from "@/refresh-components/cards";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import LineItem from "@/refresh-components/buttons/LineItem";
import Separator from "@/refresh-components/Separator";
import Switch from "@/refresh-components/inputs/Switch";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import {
  SvgPlusCircle,
  SvgArrowUpRight,
  SvgFiles,
  SvgFolder,
} from "@opal/icons";
import type { CCPairSummary } from "@/lib/types";
import { getSourceMetadata } from "@/lib/sources";
import { ValidSources, DocumentSetSummary } from "@/lib/types";
import useCCPairs from "@/hooks/useCCPairs";
import { ConnectedSource } from "@/lib/hierarchy/interfaces";
import { ProjectFile } from "@/app/app/projects/projectsService";
import {
  AttachedDocumentSnapshot,
  HierarchyNodeSnapshot,
} from "@/app/admin/agents/interfaces";
import { timeAgo } from "@/lib/time";
import Spacer from "@/refresh-components/Spacer";
import { Disabled } from "@opal/core";
import SourceHierarchyBrowser from "./SourceHierarchyBrowser";

// Knowledge pane view states
type KnowledgeView = "main" | "add" | "document-sets" | "sources" | "recent";

// ============================================================================
// KNOWLEDGE SIDEBAR - Left column showing all knowledge categories
// ============================================================================

interface KnowledgeSidebarProps {
  activeView: KnowledgeView;
  activeSource?: ValidSources;
  connectedSources: ConnectedSource[];
  selectedSources: ValidSources[];
  selectedDocumentSetIds: number[];
  selectedFileIds: string[];
  sourceSelectionCounts: Map<ValidSources, number>;
  onNavigateToRecent: () => void;
  onNavigateToDocumentSets: () => void;
  onNavigateToSource: (source: ValidSources) => void;
  vectorDbEnabled: boolean;
}

function KnowledgeSidebar({
  activeView,
  activeSource,
  connectedSources,
  selectedSources,
  selectedDocumentSetIds,
  selectedFileIds,
  sourceSelectionCounts,
  onNavigateToRecent,
  onNavigateToDocumentSets,
  onNavigateToSource,
  vectorDbEnabled,
}: KnowledgeSidebarProps) {
  return (
    <TableLayouts.SidebarLayout aria-label="knowledge-sidebar">
      <LineItem
        icon={SvgFiles}
        onClick={onNavigateToRecent}
        selected={activeView === "recent"}
        emphasized={activeView === "recent" || selectedFileIds.length > 0}
        aria-label="knowledge-sidebar-files"
        rightChildren={
          selectedFileIds.length > 0 ? (
            <Text mainUiAction className="text-action-link-05">
              {selectedFileIds.length}
            </Text>
          ) : undefined
        }
      >
        Your Files
      </LineItem>

      {vectorDbEnabled && (
        <>
          <LineItem
            icon={SvgFolder}
            onClick={onNavigateToDocumentSets}
            selected={activeView === "document-sets"}
            emphasized={
              activeView === "document-sets" ||
              selectedDocumentSetIds.length > 0
            }
            aria-label="knowledge-sidebar-document-sets"
            rightChildren={
              selectedDocumentSetIds.length > 0 ? (
                <Text mainUiAction className="text-action-link-05">
                  {selectedDocumentSetIds.length}
                </Text>
              ) : undefined
            }
          >
            Document Set
          </LineItem>

          <Separator noPadding />

          {connectedSources.map((connectedSource) => {
            const sourceMetadata = getSourceMetadata(connectedSource.source);
            const isSelected = selectedSources.includes(connectedSource.source);
            const isActive =
              activeView === "sources" &&
              activeSource === connectedSource.source;
            const selectionCount =
              sourceSelectionCounts.get(connectedSource.source) ?? 0;

            return (
              <LineItem
                key={connectedSource.source}
                icon={sourceMetadata.icon}
                onClick={() => onNavigateToSource(connectedSource.source)}
                selected={isActive}
                emphasized={isActive || isSelected || selectionCount > 0}
                aria-label={`knowledge-sidebar-source-${connectedSource.source}`}
                rightChildren={
                  selectionCount > 0 ? (
                    <Text mainUiAction className="text-action-link-05">
                      {selectionCount}
                    </Text>
                  ) : undefined
                }
              >
                {sourceMetadata.displayName}
              </LineItem>
            );
          })}
        </>
      )}
    </TableLayouts.SidebarLayout>
  );
}

// ============================================================================
// KNOWLEDGE TABLE - Generic table component for knowledge items
// ============================================================================

interface KnowledgeTableColumn<T> {
  key: string;
  header: string;
  sortable?: boolean;
  width?: number; // Width in rem
  render: (item: T) => React.ReactNode;
}

interface KnowledgeTableProps<T> {
  items: T[];
  columns: KnowledgeTableColumn<T>[];
  getItemId: (item: T) => string | number;
  selectedIds: (string | number)[];
  onToggleItem: (id: string | number) => void;
  searchValue?: string;
  onSearchChange?: (value: string) => void;
  searchPlaceholder?: string;
  headerActions?: React.ReactNode;
  emptyMessage?: string;
}

function KnowledgeTable<T>({
  items,
  columns,
  getItemId,
  selectedIds,
  onToggleItem,
  searchValue,
  onSearchChange,
  searchPlaceholder = "Search...",
  headerActions,
  emptyMessage = "No items available.",
  ariaLabelPrefix,
}: KnowledgeTableProps<T> & { ariaLabelPrefix?: string }) {
  return (
    <GeneralLayouts.Section gap={0} alignItems="stretch" justifyContent="start">
      {/* Header with search and actions */}
      <GeneralLayouts.Section
        flexDirection="row"
        justifyContent="start"
        alignItems="center"
        gap={0.5}
        height="auto"
      >
        {onSearchChange !== undefined && (
          <GeneralLayouts.Section height="auto">
            <InputTypeIn
              leftSearchIcon
              value={searchValue ?? ""}
              onChange={(e) => onSearchChange?.(e.target.value)}
              placeholder={searchPlaceholder}
              variant="internal"
            />
          </GeneralLayouts.Section>
        )}
        {headerActions}
      </GeneralLayouts.Section>

      <Spacer rem={0.5} />

      {/* Table header */}
      <TableLayouts.TableRow>
        <TableLayouts.CheckboxCell />
        {columns.map((column) => (
          <TableLayouts.TableCell
            key={column.key}
            flex={!column.width}
            width={column.width}
          >
            <GeneralLayouts.Section
              flexDirection="row"
              justifyContent="start"
              alignItems="center"
              gap={0.25}
              height="auto"
            >
              <Text secondaryBody text03>
                {column.header}
              </Text>
            </GeneralLayouts.Section>
          </TableLayouts.TableCell>
        ))}
      </TableLayouts.TableRow>

      <Separator noPadding />

      {/* Table body */}
      {items.length === 0 ? (
        <GeneralLayouts.Section height="auto" padding={1}>
          <Text text03 secondaryBody>
            {emptyMessage}
          </Text>
        </GeneralLayouts.Section>
      ) : (
        <GeneralLayouts.Section gap={0} alignItems="stretch" height="auto">
          {items.map((item) => {
            const id = getItemId(item);
            const isSelected = selectedIds.includes(id);

            return (
              <TableLayouts.TableRow
                key={String(id)}
                selected={isSelected}
                onClick={() => onToggleItem(id)}
                aria-label={
                  ariaLabelPrefix ? `${ariaLabelPrefix}-${id}` : undefined
                }
              >
                <TableLayouts.CheckboxCell>
                  <Checkbox
                    checked={isSelected}
                    onCheckedChange={() => onToggleItem(id)}
                  />
                </TableLayouts.CheckboxCell>
                {columns.map((column) => (
                  <TableLayouts.TableCell
                    key={column.key}
                    flex={!column.width}
                    width={column.width}
                  >
                    {column.render(item)}
                  </TableLayouts.TableCell>
                ))}
              </TableLayouts.TableRow>
            );
          })}
        </GeneralLayouts.Section>
      )}
    </GeneralLayouts.Section>
  );
}

// ============================================================================
// DOCUMENT SETS TABLE - Table content for document sets view
// ============================================================================

interface DocumentSetsTableContentProps {
  documentSets: DocumentSetSummary[];
  selectedDocumentSetIds: number[];
  onDocumentSetToggle: (documentSetId: number) => void;
}

function DocumentSetsTableContent({
  documentSets,
  selectedDocumentSetIds,
  onDocumentSetToggle,
}: DocumentSetsTableContentProps) {
  const [searchValue, setSearchValue] = useState("");

  const filteredDocumentSets = useMemo(() => {
    if (!searchValue) return documentSets;
    const lower = searchValue.toLowerCase();
    return documentSets.filter((ds) => ds.name.toLowerCase().includes(lower));
  }, [documentSets, searchValue]);

  const columns: KnowledgeTableColumn<DocumentSetSummary>[] = [
    {
      key: "name",
      header: "Name",
      sortable: true,
      render: (ds) => (
        <Content
          icon={SvgFolder}
          title={ds.name}
          sizePreset="main-ui"
          variant="section"
        />
      ),
    },
    {
      key: "sources",
      header: "Sources",
      width: 8,
      render: (ds) => (
        <TableLayouts.SourceIconsRow>
          {ds.cc_pair_summaries
            ?.slice(0, 4)
            .map((summary: CCPairSummary, idx: number) => {
              const sourceMetadata = getSourceMetadata(summary.source);
              return <sourceMetadata.icon key={idx} size={16} />;
            })}
          {(ds.cc_pair_summaries?.length ?? 0) > 4 && (
            <Text text03 secondaryBody>
              +{(ds.cc_pair_summaries?.length ?? 0) - 4}
            </Text>
          )}
        </TableLayouts.SourceIconsRow>
      ),
    },
  ];

  return (
    <KnowledgeTable
      items={filteredDocumentSets}
      columns={columns}
      getItemId={(ds) => ds.id}
      selectedIds={selectedDocumentSetIds}
      onToggleItem={(id) => onDocumentSetToggle(id as number)}
      searchValue={searchValue}
      onSearchChange={setSearchValue}
      searchPlaceholder="Search document sets..."
      emptyMessage="No document sets available."
      ariaLabelPrefix="document-set-row"
    />
  );
}

interface SourcesTableContentProps {
  source: ValidSources;
  selectedDocumentIds: string[];
  onToggleDocument: (documentId: string) => void;
  onSetDocumentIds: (ids: string[]) => void;
  selectedFolderIds: number[];
  onToggleFolder: (folderId: number) => void;
  onSetFolderIds: (ids: number[]) => void;
  onDeselectAllDocuments: () => void;
  onDeselectAllFolders: () => void;
  initialAttachedDocuments?: AttachedDocumentSnapshot[];
  onSelectionCountChange?: (source: ValidSources, count: number) => void;
}

function SourcesTableContent({
  source,
  selectedDocumentIds,
  onToggleDocument,
  onSetDocumentIds,
  selectedFolderIds,
  onToggleFolder,
  onSetFolderIds,
  onDeselectAllDocuments,
  onDeselectAllFolders,
  initialAttachedDocuments,
  onSelectionCountChange,
}: SourcesTableContentProps) {
  return (
    <GeneralLayouts.Section gap={0.5} alignItems="stretch">
      {/* Hierarchy browser */}
      <SourceHierarchyBrowser
        source={source}
        selectedDocumentIds={selectedDocumentIds}
        onToggleDocument={onToggleDocument}
        onSetDocumentIds={onSetDocumentIds}
        selectedFolderIds={selectedFolderIds}
        onToggleFolder={onToggleFolder}
        onSetFolderIds={onSetFolderIds}
        initialAttachedDocuments={initialAttachedDocuments}
        onDeselectAllDocuments={onDeselectAllDocuments}
        onDeselectAllFolders={onDeselectAllFolders}
        onSelectionCountChange={onSelectionCountChange}
      />
    </GeneralLayouts.Section>
  );
}

// ============================================================================
// RECENT FILES TABLE - Table content for user files view
// ============================================================================

interface RecentFilesTableContentProps {
  allRecentFiles: ProjectFile[];
  selectedFileIds: string[];
  onToggleFile: (fileId: string) => void;
  onUploadChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
  hasProcessingFiles: boolean;
}

function RecentFilesTableContent({
  allRecentFiles,
  selectedFileIds,
  onToggleFile,
  onUploadChange,
  hasProcessingFiles,
}: RecentFilesTableContentProps) {
  const [searchValue, setSearchValue] = useState("");

  const filteredFiles = useMemo(() => {
    if (!searchValue) return allRecentFiles;
    const lower = searchValue.toLowerCase();
    return allRecentFiles.filter((f) => f.name.toLowerCase().includes(lower));
  }, [allRecentFiles, searchValue]);

  const columns: KnowledgeTableColumn<ProjectFile>[] = [
    {
      key: "name",
      header: "Name",
      sortable: true,
      render: (file) => (
        <Content
          icon={SvgFiles}
          title={file.name}
          sizePreset="main-ui"
          variant="section"
        />
      ),
    },
    {
      key: "lastUpdated",
      header: "Last Updated",
      sortable: true,
      width: 8,
      render: (file) => (
        <Text text03 secondaryBody>
          {timeAgo(file.last_accessed_at || file.created_at)}
        </Text>
      ),
    },
  ];

  const fileInputRef = React.useRef<HTMLInputElement>(null);

  return (
    <GeneralLayouts.Section gap={0.5} alignItems="stretch">
      <TableLayouts.HiddenInput
        inputRef={fileInputRef}
        type="file"
        multiple
        onChange={onUploadChange}
      />

      <KnowledgeTable
        items={filteredFiles}
        columns={columns}
        getItemId={(file) => file.id}
        selectedIds={selectedFileIds}
        onToggleItem={(id) => onToggleFile(id as string)}
        searchValue={searchValue}
        onSearchChange={setSearchValue}
        searchPlaceholder="Search files..."
        ariaLabelPrefix="user-file-row"
        headerActions={
          <Button
            prominence="internal"
            icon={SvgPlusCircle}
            onClick={() => fileInputRef.current?.click()}
          >
            Add File
          </Button>
        }
        emptyMessage="No files available. Upload files to get started."
      />

      {hasProcessingFiles && (
        <GeneralLayouts.Section height="auto" alignItems="start">
          <Text as="p" text03 secondaryBody>
            Onyx is still processing your uploaded files. You can create the
            agent now, but it will not have access to all files until processing
            completes.
          </Text>
        </GeneralLayouts.Section>
      )}
    </GeneralLayouts.Section>
  );
}

// ============================================================================
// TWO-COLUMN LAYOUT - Sidebar + Table for detailed views
// ============================================================================

interface KnowledgeTwoColumnViewProps {
  activeView: KnowledgeView;
  activeSource?: ValidSources;
  connectedSources: ConnectedSource[];
  selectedSources: ValidSources[];
  selectedDocumentSetIds: number[];
  selectedFileIds: string[];
  selectedDocumentIds: string[];
  selectedFolderIds: number[];
  sourceSelectionCounts: Map<ValidSources, number>;
  documentSets: DocumentSetSummary[];
  allRecentFiles: ProjectFile[];
  onNavigateToRecent: () => void;
  onNavigateToDocumentSets: () => void;
  onNavigateToSource: (source: ValidSources) => void;
  onDocumentSetToggle: (id: number) => void;
  onSourceToggle: (source: ValidSources) => void;
  onFileToggle: (fileId: string) => void;
  onToggleDocument: (documentId: string) => void;
  onToggleFolder: (folderId: number) => void;
  onSetDocumentIds: (ids: string[]) => void;
  onSetFolderIds: (ids: number[]) => void;
  onDeselectAllDocuments: () => void;
  onDeselectAllFolders: () => void;
  onUploadChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
  hasProcessingFiles: boolean;
  initialAttachedDocuments?: AttachedDocumentSnapshot[];
  onSelectionCountChange: (source: ValidSources, count: number) => void;
  vectorDbEnabled: boolean;
}

const KnowledgeTwoColumnView = memo(function KnowledgeTwoColumnView({
  activeView,
  activeSource,
  connectedSources,
  selectedSources,
  selectedDocumentSetIds,
  selectedFileIds,
  selectedDocumentIds,
  selectedFolderIds,
  sourceSelectionCounts,
  documentSets,
  allRecentFiles,
  onNavigateToRecent,
  onNavigateToDocumentSets,
  onNavigateToSource,
  onDocumentSetToggle,
  onSourceToggle,
  onFileToggle,
  onToggleDocument,
  onToggleFolder,
  onSetDocumentIds,
  onSetFolderIds,
  onDeselectAllDocuments,
  onDeselectAllFolders,
  onUploadChange,
  hasProcessingFiles,
  initialAttachedDocuments,
  onSelectionCountChange,
  vectorDbEnabled,
}: KnowledgeTwoColumnViewProps) {
  return (
    <TableLayouts.TwoColumnLayout minHeight={18.75}>
      <KnowledgeSidebar
        activeView={activeView}
        activeSource={activeSource}
        connectedSources={connectedSources}
        selectedSources={selectedSources}
        selectedDocumentSetIds={selectedDocumentSetIds}
        selectedFileIds={selectedFileIds}
        sourceSelectionCounts={sourceSelectionCounts}
        onNavigateToRecent={onNavigateToRecent}
        onNavigateToDocumentSets={onNavigateToDocumentSets}
        onNavigateToSource={onNavigateToSource}
        vectorDbEnabled={vectorDbEnabled}
      />

      <TableLayouts.ContentColumn>
        {activeView === "document-sets" && (
          <DocumentSetsTableContent
            documentSets={documentSets}
            selectedDocumentSetIds={selectedDocumentSetIds}
            onDocumentSetToggle={onDocumentSetToggle}
          />
        )}
        {activeView === "sources" && activeSource && (
          <SourcesTableContent
            source={activeSource}
            selectedDocumentIds={selectedDocumentIds}
            onToggleDocument={onToggleDocument}
            onSetDocumentIds={onSetDocumentIds}
            selectedFolderIds={selectedFolderIds}
            onToggleFolder={onToggleFolder}
            onSetFolderIds={onSetFolderIds}
            onDeselectAllDocuments={onDeselectAllDocuments}
            onDeselectAllFolders={onDeselectAllFolders}
            initialAttachedDocuments={initialAttachedDocuments}
            onSelectionCountChange={onSelectionCountChange}
          />
        )}
        {activeView === "recent" && (
          <RecentFilesTableContent
            allRecentFiles={allRecentFiles}
            selectedFileIds={selectedFileIds}
            onToggleFile={onFileToggle}
            onUploadChange={onUploadChange}
            hasProcessingFiles={hasProcessingFiles}
          />
        )}
      </TableLayouts.ContentColumn>
    </TableLayouts.TwoColumnLayout>
  );
});

// ============================================================================
// KNOWLEDGE ADD VIEW - Initial pill selection view
// ============================================================================

interface KnowledgeAddViewProps {
  connectedSources: ConnectedSource[];
  onNavigateToDocumentSets: () => void;
  onNavigateToRecent: () => void;
  onNavigateToSource: (source: ValidSources) => void;
  selectedDocumentSetIds: number[];
  selectedFileIds: string[];
  selectedSources: ValidSources[];
  sourceSelectionCounts: Map<ValidSources, number>;
  vectorDbEnabled: boolean;
}

const KnowledgeAddView = memo(function KnowledgeAddView({
  connectedSources,
  onNavigateToDocumentSets,
  onNavigateToRecent,
  onNavigateToSource,
  selectedDocumentSetIds,
  selectedFileIds,
  selectedSources,
  sourceSelectionCounts,
  vectorDbEnabled,
}: KnowledgeAddViewProps) {
  return (
    <GeneralLayouts.Section
      gap={0.5}
      alignItems="start"
      height="auto"
      aria-label="knowledge-add-view"
    >
      <GeneralLayouts.Section
        flexDirection="row"
        justifyContent="start"
        gap={0.5}
        height="auto"
        wrap
      >
        {vectorDbEnabled && (
          <LineItem
            icon={SvgFolder}
            onClick={onNavigateToDocumentSets}
            emphasized={selectedDocumentSetIds.length > 0}
            aria-label="knowledge-add-document-sets"
            rightChildren={
              selectedDocumentSetIds.length > 0 ? (
                <Text mainUiAction className="text-action-link-05">
                  {selectedDocumentSetIds.length}
                </Text>
              ) : undefined
            }
          >
            Document Sets
          </LineItem>
        )}

        <LineItem
          icon={SvgFiles}
          description="Recent or new uploads"
          onClick={onNavigateToRecent}
          emphasized={selectedFileIds.length > 0}
          aria-label="knowledge-add-files"
          rightChildren={
            selectedFileIds.length > 0 ? (
              <Text mainUiAction className="text-action-link-05">
                {selectedFileIds.length}
              </Text>
            ) : undefined
          }
        >
          Your Files
        </LineItem>
      </GeneralLayouts.Section>

      {vectorDbEnabled && connectedSources.length > 0 && (
        <>
          <Text as="p" text03 secondaryBody>
            Connected Sources
          </Text>
          {connectedSources.map((connectedSource) => {
            const sourceMetadata = getSourceMetadata(connectedSource.source);
            const isSelected = selectedSources.includes(connectedSource.source);
            const selectionCount =
              sourceSelectionCounts.get(connectedSource.source) ?? 0;
            return (
              <LineItem
                key={connectedSource.source}
                icon={sourceMetadata.icon}
                onClick={() => onNavigateToSource(connectedSource.source)}
                emphasized={isSelected || selectionCount > 0}
                aria-label={`knowledge-add-source-${connectedSource.source}`}
                rightChildren={
                  selectionCount > 0 ? (
                    <Text mainUiAction className="text-action-link-05">
                      {selectionCount}
                    </Text>
                  ) : undefined
                }
              >
                {sourceMetadata.displayName}
              </LineItem>
            );
          })}
        </>
      )}
    </GeneralLayouts.Section>
  );
});

// ============================================================================
// KNOWLEDGE MAIN CONTENT - Empty state and preview
// ============================================================================

interface KnowledgeMainContentProps {
  hasAnyKnowledge: boolean;
  selectedDocumentSetIds: number[];
  selectedDocumentIds: string[];
  selectedFolderIds: number[];
  selectedFileIds: string[];
  selectedSources: ValidSources[];
  documentSets: DocumentSetSummary[];
  allRecentFiles: ProjectFile[];
  connectedSources: ConnectedSource[];
  onAddKnowledge: () => void;
  onViewEdit: () => void;
  onFileClick?: (file: ProjectFile) => void;
}

const KnowledgeMainContent = memo(function KnowledgeMainContent({
  hasAnyKnowledge,
  selectedDocumentSetIds,
  selectedDocumentIds,
  selectedFolderIds,
  selectedFileIds,
  selectedSources,
  documentSets,
  allRecentFiles,
  connectedSources,
  onAddKnowledge,
  onViewEdit,
  onFileClick,
}: KnowledgeMainContentProps) {
  if (!hasAnyKnowledge) {
    return (
      <GeneralLayouts.Section
        flexDirection="row"
        justifyContent="between"
        alignItems="center"
        height="auto"
      >
        <Text text03 secondaryBody>
          Add documents or connected sources to use for this agent.
        </Text>
        <Button
          icon={SvgPlusCircle}
          onClick={onAddKnowledge}
          prominence="tertiary"
          aria-label="knowledge-add-button"
        />
      </GeneralLayouts.Section>
    );
  }

  // Has knowledge - show preview with count
  const totalSelected =
    selectedDocumentSetIds.length +
    selectedDocumentIds.length +
    selectedFolderIds.length +
    selectedFileIds.length +
    selectedSources.length;

  return (
    <GeneralLayouts.Section
      flexDirection="row"
      justifyContent="between"
      alignItems="center"
      height="auto"
    >
      <Text as="p" text03 secondaryBody>
        {totalSelected} knowledge source{totalSelected !== 1 ? "s" : ""}{" "}
        selected
      </Text>
      <Button
        prominence="internal"
        icon={SvgArrowUpRight}
        onClick={onViewEdit}
        aria-label="knowledge-view-edit"
      >
        View / Edit
      </Button>
    </GeneralLayouts.Section>
  );
});

// ============================================================================
// MAIN COMPONENT - AgentKnowledgePane
// ============================================================================

interface AgentKnowledgePaneProps {
  enableKnowledge: boolean;
  onEnableKnowledgeChange: (enabled: boolean) => void;
  selectedSources: ValidSources[];
  onSourcesChange: (sources: ValidSources[]) => void;
  documentSets: DocumentSetSummary[];
  selectedDocumentSetIds: number[];
  onDocumentSetIdsChange: (ids: number[]) => void;
  selectedDocumentIds: string[];
  onDocumentIdsChange: (ids: string[]) => void;
  selectedFolderIds: number[];
  onFolderIdsChange: (ids: number[]) => void;
  selectedFileIds: string[];
  onFileIdsChange: (ids: string[]) => void;
  allRecentFiles: ProjectFile[];
  onFileClick?: (file: ProjectFile) => void;
  onUploadChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
  hasProcessingFiles: boolean;
  // Initial attached documents for existing agents (to populate selectedDocumentDetails)
  initialAttachedDocuments?: AttachedDocumentSnapshot[];
  // Initial hierarchy nodes for existing agents (to calculate per-source counts)
  initialHierarchyNodes?: HierarchyNodeSnapshot[];
  // When false, hides document sets, connected sources, and hierarchy nodes
  // (these require a vector DB). User files are still shown.
  vectorDbEnabled?: boolean;
}

export default function AgentKnowledgePane({
  enableKnowledge,
  onEnableKnowledgeChange,
  selectedSources,
  onSourcesChange,
  documentSets,
  selectedDocumentSetIds,
  onDocumentSetIdsChange,
  selectedDocumentIds,
  onDocumentIdsChange,
  selectedFolderIds,
  onFolderIdsChange,
  selectedFileIds,
  onFileIdsChange,
  allRecentFiles,
  onFileClick,
  onUploadChange,
  hasProcessingFiles,
  initialAttachedDocuments,
  initialHierarchyNodes,
  vectorDbEnabled = true,
}: AgentKnowledgePaneProps) {
  // View state
  const [view, setView] = useState<KnowledgeView>("main");
  const [activeSource, setActiveSource] = useState<ValidSources | undefined>();

  // Reset view to main when knowledge is disabled
  useEffect(() => {
    if (!enableKnowledge) {
      setView("main");
    }
  }, [enableKnowledge]);

  // Get connected sources from CC pairs
  const { ccPairs } = useCCPairs(vectorDbEnabled);
  const connectedSources: ConnectedSource[] = useMemo(() => {
    if (!ccPairs || ccPairs.length === 0) return [];
    const sourceSet = new Set<ValidSources>();
    ccPairs.forEach((pair) => sourceSet.add(pair.source));
    return Array.from(sourceSet).map((source) => ({
      source,
      connectorCount: ccPairs.filter((p) => p.source === source).length,
    }));
  }, [ccPairs]);

  // Track per-source selection counts
  // Initialized from initialHierarchyNodes and initialAttachedDocuments
  const [sourceSelectionCounts, setSourceSelectionCounts] = useState<
    Map<ValidSources, number>
  >(() => {
    const counts = new Map<ValidSources, number>();

    // Count folders from initialHierarchyNodes (which have source info)
    if (initialHierarchyNodes) {
      for (const node of initialHierarchyNodes) {
        const current = counts.get(node.source) ?? 0;
        counts.set(node.source, current + 1);
      }
    }

    // Count documents from initialAttachedDocuments (which now include source)
    if (initialAttachedDocuments) {
      for (const doc of initialAttachedDocuments) {
        if (doc.source) {
          const current = counts.get(doc.source) ?? 0;
          counts.set(doc.source, current + 1);
        }
      }
    }

    return counts;
  });

  // Handler for selection count changes from SourceHierarchyBrowser
  const handleSelectionCountChange = useCallback(
    (source: ValidSources, count: number) => {
      setSourceSelectionCounts((prev) => {
        const newCounts = new Map(prev);
        if (count === 0) {
          newCounts.delete(source);
        } else {
          newCounts.set(source, count);
        }
        return newCounts;
      });
    },
    []
  );

  // Check if any knowledge is selected
  const hasAnyKnowledge =
    selectedDocumentSetIds.length > 0 ||
    selectedDocumentIds.length > 0 ||
    selectedFolderIds.length > 0 ||
    selectedFileIds.length > 0 ||
    selectedSources.length > 0;

  // Navigation handlers - memoized to prevent unnecessary re-renders
  const handleNavigateToAdd = useCallback(() => setView("add"), []);
  const handleNavigateToMain = useCallback(() => setView("main"), []);
  const handleNavigateToDocumentSets = useCallback(
    () => setView("document-sets"),
    []
  );
  const handleNavigateToRecent = useCallback(() => setView("recent"), []);
  const handleNavigateToSource = useCallback((source: ValidSources) => {
    setActiveSource(source);
    setView("sources");
  }, []);

  // Toggle handlers - memoized to prevent unnecessary re-renders
  const handleDocumentSetToggle = useCallback(
    (documentSetId: number) => {
      const newIds = selectedDocumentSetIds.includes(documentSetId)
        ? selectedDocumentSetIds.filter((id) => id !== documentSetId)
        : [...selectedDocumentSetIds, documentSetId];
      onDocumentSetIdsChange(newIds);
    },
    [selectedDocumentSetIds, onDocumentSetIdsChange]
  );

  const handleSourceToggle = useCallback(
    (source: ValidSources) => {
      const newSources = selectedSources.includes(source)
        ? selectedSources.filter((s) => s !== source)
        : [...selectedSources, source];
      onSourcesChange(newSources);
    },
    [selectedSources, onSourcesChange]
  );

  const handleFileToggle = useCallback(
    (fileId: string) => {
      const newIds = selectedFileIds.includes(fileId)
        ? selectedFileIds.filter((id) => id !== fileId)
        : [...selectedFileIds, fileId];
      onFileIdsChange(newIds);
    },
    [selectedFileIds, onFileIdsChange]
  );

  const handleDocumentToggle = useCallback(
    (documentId: string) => {
      const newIds = selectedDocumentIds.includes(documentId)
        ? selectedDocumentIds.filter((id) => id !== documentId)
        : [...selectedDocumentIds, documentId];
      onDocumentIdsChange(newIds);
    },
    [selectedDocumentIds, onDocumentIdsChange]
  );

  const handleFolderToggle = useCallback(
    (folderId: number) => {
      const newIds = selectedFolderIds.includes(folderId)
        ? selectedFolderIds.filter((id) => id !== folderId)
        : [...selectedFolderIds, folderId];
      onFolderIdsChange(newIds);
    },
    [selectedFolderIds, onFolderIdsChange]
  );

  const handleDeselectAllDocuments = useCallback(() => {
    onDocumentIdsChange([]);
  }, [onDocumentIdsChange]);

  const handleDeselectAllFolders = useCallback(() => {
    onFolderIdsChange([]);
  }, [onFolderIdsChange]);

  // Memoized content based on view - prevents unnecessary re-renders
  const renderedContent = useMemo(() => {
    switch (view) {
      case "main":
        return (
          <KnowledgeMainContent
            hasAnyKnowledge={hasAnyKnowledge}
            selectedDocumentSetIds={selectedDocumentSetIds}
            selectedDocumentIds={selectedDocumentIds}
            selectedFolderIds={selectedFolderIds}
            selectedFileIds={selectedFileIds}
            selectedSources={selectedSources}
            documentSets={documentSets}
            allRecentFiles={allRecentFiles}
            connectedSources={connectedSources}
            onAddKnowledge={handleNavigateToAdd}
            onViewEdit={handleNavigateToAdd}
            onFileClick={onFileClick}
          />
        );

      case "add":
        return (
          <KnowledgeAddView
            connectedSources={connectedSources}
            onNavigateToDocumentSets={handleNavigateToDocumentSets}
            onNavigateToRecent={handleNavigateToRecent}
            onNavigateToSource={handleNavigateToSource}
            selectedDocumentSetIds={selectedDocumentSetIds}
            selectedFileIds={selectedFileIds}
            selectedSources={selectedSources}
            sourceSelectionCounts={sourceSelectionCounts}
            vectorDbEnabled={vectorDbEnabled}
          />
        );

      case "document-sets":
      case "sources":
      case "recent":
        return (
          <KnowledgeTwoColumnView
            activeView={view}
            activeSource={activeSource}
            connectedSources={connectedSources}
            selectedSources={selectedSources}
            selectedDocumentSetIds={selectedDocumentSetIds}
            selectedFileIds={selectedFileIds}
            selectedDocumentIds={selectedDocumentIds}
            selectedFolderIds={selectedFolderIds}
            sourceSelectionCounts={sourceSelectionCounts}
            documentSets={documentSets}
            allRecentFiles={allRecentFiles}
            onNavigateToRecent={handleNavigateToRecent}
            onNavigateToDocumentSets={handleNavigateToDocumentSets}
            onNavigateToSource={handleNavigateToSource}
            onDocumentSetToggle={handleDocumentSetToggle}
            onSourceToggle={handleSourceToggle}
            onFileToggle={handleFileToggle}
            onToggleDocument={handleDocumentToggle}
            onToggleFolder={handleFolderToggle}
            onSetDocumentIds={onDocumentIdsChange}
            onSetFolderIds={onFolderIdsChange}
            onDeselectAllDocuments={handleDeselectAllDocuments}
            onDeselectAllFolders={handleDeselectAllFolders}
            onUploadChange={onUploadChange}
            hasProcessingFiles={hasProcessingFiles}
            initialAttachedDocuments={initialAttachedDocuments}
            onSelectionCountChange={handleSelectionCountChange}
            vectorDbEnabled={vectorDbEnabled}
          />
        );

      default:
        return null;
    }
  }, [
    view,
    activeSource,
    hasAnyKnowledge,
    selectedDocumentSetIds,
    selectedDocumentIds,
    selectedFolderIds,
    selectedFileIds,
    selectedSources,
    sourceSelectionCounts,
    documentSets,
    allRecentFiles,
    connectedSources,
    hasProcessingFiles,
    initialAttachedDocuments,
    vectorDbEnabled,
    onFileClick,
    onUploadChange,
    onDocumentIdsChange,
    onFolderIdsChange,
    handleNavigateToAdd,
    handleNavigateToDocumentSets,
    handleNavigateToRecent,
    handleNavigateToSource,
    handleDocumentSetToggle,
    handleSourceToggle,
    handleFileToggle,
    handleDocumentToggle,
    handleFolderToggle,
    handleDeselectAllDocuments,
    handleDeselectAllFolders,
    handleSelectionCountChange,
  ]);

  return (
    <GeneralLayouts.Section gap={0.5} alignItems="stretch" height="auto">
      <Content
        title="Knowledge"
        description="Add specific connectors and documents for this agent to use to inform its responses."
        sizePreset="main-content"
        variant="section"
      />

      <Card>
        <GeneralLayouts.Section gap={0.5} alignItems="stretch" height="auto">
          <InputLayouts.Horizontal
            title="Use Knowledge"
            description="Let this agent reference these documents to inform its responses."
          >
            <Switch
              name="enable_knowledge"
              checked={enableKnowledge}
              onCheckedChange={onEnableKnowledgeChange}
            />
          </InputLayouts.Horizontal>

          <Disabled disabled={!enableKnowledge}>
            <GeneralLayouts.Section alignItems="stretch" height="auto">
              {renderedContent}
            </GeneralLayouts.Section>
          </Disabled>
        </GeneralLayouts.Section>
      </Card>
    </GeneralLayouts.Section>
  );
}


================================================
FILE: web/src/sections/knowledge/SourceHierarchyBrowser.tsx
================================================
"use client";

import React, {
  useState,
  useMemo,
  useEffect,
  useCallback,
  useRef,
} from "react";
import * as GeneralLayouts from "@/layouts/general-layouts";
import * as TableLayouts from "@/layouts/table-layouts";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import Truncated from "@/refresh-components/texts/Truncated";
import Separator from "@/refresh-components/Separator";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Popover from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import SelectButton from "@/refresh-components/buttons/SelectButton";
import Divider from "@/refresh-components/Divider";
import {
  SvgFolder,
  SvgChevronRight,
  SvgFileText,
  SvgEye,
  SvgXCircle,
  SvgCheck,
  SvgArrowUpDown,
} from "@opal/icons";
import { getSourceMetadata } from "@/lib/sources";
import { ValidSources } from "@/lib/types";
import {
  HierarchyNodeSummary,
  DocumentSummary,
  DocumentPageCursor,
  HierarchyItem,
  HierarchyBreadcrumbProps,
  DocumentSortField,
  DocumentSortDirection,
  FolderPosition,
} from "@/lib/hierarchy/interfaces";
import {
  fetchHierarchyNodes,
  fetchHierarchyNodeDocuments,
} from "@/lib/hierarchy/svc";
import { AttachedDocumentSnapshot } from "@/app/admin/agents/interfaces";
import { timeAgo } from "@/lib/time";
import Spacer from "@/refresh-components/Spacer";

// ============================================================================
// HIERARCHY BREADCRUMB - Navigation path for folder hierarchy
// ============================================================================

function HierarchyBreadcrumb({
  source,
  path,
  onNavigateToRoot,
  onNavigateToNode,
}: HierarchyBreadcrumbProps) {
  const sourceMetadata = getSourceMetadata(source);
  const MAX_VISIBLE_SEGMENTS = 3;

  // Determine which segments to show
  const shouldCollapse = path.length > MAX_VISIBLE_SEGMENTS;
  const visiblePath = shouldCollapse
    ? path.slice(path.length - MAX_VISIBLE_SEGMENTS + 1)
    : path;
  const collapsedCount = shouldCollapse
    ? path.length - MAX_VISIBLE_SEGMENTS + 1
    : 0;

  return (
    <GeneralLayouts.Section
      flexDirection="row"
      justifyContent="start"
      alignItems="center"
      gap={0.25}
      height="auto"
    >
      {/* Root source link */}
      {path.length > 0 ? (
        <Button prominence="tertiary" onClick={onNavigateToRoot}>
          {sourceMetadata.displayName}
        </Button>
      ) : (
        <Text text03>{sourceMetadata.displayName}</Text>
      )}

      {/* Collapsed indicator */}
      {shouldCollapse && (
        <>
          <SvgChevronRight size={12} className="stroke-text-04" />
          <Text text03 secondaryBody>
            ...
          </Text>
        </>
      )}

      {/* Visible path segments */}
      {visiblePath.map((node, visibleIndex) => {
        const actualIndex = shouldCollapse
          ? collapsedCount + visibleIndex
          : visibleIndex;
        const isLast = actualIndex === path.length - 1;

        return (
          <React.Fragment key={node.id}>
            <SvgChevronRight size={12} className="stroke-text-04" />
            {isLast ? (
              <Text text03>{node.title}</Text>
            ) : (
              <Button
                prominence="tertiary"
                onClick={() => onNavigateToNode(node, actualIndex)}
              >
                {node.title}
              </Button>
            )}
          </React.Fragment>
        );
      })}
    </GeneralLayouts.Section>
  );
}

// ============================================================================
// SOURCE HIERARCHY BROWSER - Browsable folder/document hierarchy for a source
// ============================================================================

export interface SourceHierarchyBrowserProps {
  source: ValidSources;
  selectedDocumentIds: string[];
  onToggleDocument: (documentId: string) => void;
  onSetDocumentIds: (ids: string[]) => void;
  selectedFolderIds: number[];
  onToggleFolder: (folderId: number) => void;
  onSetFolderIds: (ids: number[]) => void;
  onDeselectAllDocuments: () => void;
  onDeselectAllFolders: () => void;
  initialAttachedDocuments?: AttachedDocumentSnapshot[];
  // Callback to report selection count changes for this source
  onSelectionCountChange?: (source: ValidSources, count: number) => void;
}

export default function SourceHierarchyBrowser({
  source,
  selectedDocumentIds,
  onToggleDocument,
  onSetDocumentIds,
  selectedFolderIds,
  onToggleFolder,
  onSetFolderIds,
  onDeselectAllDocuments,
  onDeselectAllFolders,
  initialAttachedDocuments,
  onSelectionCountChange,
}: SourceHierarchyBrowserProps) {
  // State for hierarchy nodes (loaded once per source)
  const [allNodes, setAllNodes] = useState<HierarchyNodeSummary[]>([]);
  const [isLoadingNodes, setIsLoadingNodes] = useState(false);
  const [nodesError, setNodesError] = useState<string | null>(null);

  // State for current navigation path
  const [path, setPath] = useState<HierarchyNodeSummary[]>([]);

  // State for documents (paginated)
  const [documents, setDocuments] = useState<DocumentSummary[]>([]);
  const [nextCursor, setNextCursor] = useState<DocumentPageCursor | null>(null);
  const [isLoadingDocuments, setIsLoadingDocuments] = useState(false);
  const [hasMoreDocuments, setHasMoreDocuments] = useState(true);

  // Search state
  const [searchValue, setSearchValue] = useState("");

  // Sort state
  const [sortField, setSortField] = useState<DocumentSortField>("last_updated");
  const [sortDirection, setSortDirection] =
    useState<DocumentSortDirection>("desc");
  const [folderPosition, setFolderPosition] =
    useState<FolderPosition>("on_top");
  const [sortDropdownOpen, setSortDropdownOpen] = useState(false);

  // View selected only filter state
  const [viewSelectedOnly, setViewSelectedOnly] = useState(false);

  // Store path before entering view selected mode so we can restore it
  const [savedPath, setSavedPath] = useState<HierarchyNodeSummary[]>([]);

  // Store selected document details (for showing all selected documents in view selected mode)
  // Note: useState (not useMemo) because this is modified independently when users select/deselect documents
  const [selectedDocumentDetails, setSelectedDocumentDetails] = useState<
    Map<string, DocumentSummary>
  >(() => new Map(initialAttachedDocuments?.map((doc) => [doc.id, doc]) ?? []));

  // Ref for scroll container
  const scrollContainerRef = useRef<HTMLDivElement>(null);

  // Get current parent node ID (null for root)
  const lastPathNode = path[path.length - 1];
  const currentParentId = lastPathNode ? lastPathNode.id : null;

  // Load hierarchy nodes when source changes
  useEffect(() => {
    const loadNodes = async () => {
      setIsLoadingNodes(true);
      setNodesError(null);
      setAllNodes([]);
      setPath([]);
      setDocuments([]);
      setNextCursor(null);
      setHasMoreDocuments(true);

      try {
        const response = await fetchHierarchyNodes(source);
        setAllNodes(response.nodes);
      } catch (error) {
        setNodesError(
          error instanceof Error ? error.message : "Failed to load folders"
        );
      } finally {
        setIsLoadingNodes(false);
      }
    };

    loadNodes();
  }, [source]);

  // Load documents when current path or sort options change
  useEffect(() => {
    const loadDocuments = async () => {
      // Skip if no nodes loaded yet (still loading hierarchy)
      if (allNodes.length === 0 && !nodesError) return;

      setIsLoadingDocuments(true);
      setDocuments([]);
      setNextCursor(null);
      setHasMoreDocuments(true);

      try {
        // We need a parent hierarchy node to fetch documents
        // For root level, we need to find the root node(s)
        const parentNodeId = currentParentId;
        if (parentNodeId === null) {
          // At root level - find root nodes (nodes with no parent)
          const rootNodes = allNodes.filter((n) => n.parent_id === null);
          if (rootNodes.length === 0) {
            setHasMoreDocuments(false);
            return;
          }
          // For now, just don't load documents at root level
          // Documents are always children of a hierarchy node
          setHasMoreDocuments(false);
          return;
        }

        const response = await fetchHierarchyNodeDocuments({
          parent_hierarchy_node_id: parentNodeId,
          cursor: null,
          sort_field: sortField,
          sort_direction: sortDirection,
          folder_position: folderPosition,
        });

        setDocuments(response.documents);
        setNextCursor(response.next_cursor);
        setHasMoreDocuments(response.next_cursor !== null);
      } catch (error) {
        console.error("Failed to load documents:", error);
      } finally {
        setIsLoadingDocuments(false);
      }
    };

    loadDocuments();
  }, [
    currentParentId,
    allNodes,
    nodesError,
    sortField,
    sortDirection,
    folderPosition,
  ]);

  // Load more documents (for infinite scroll)
  const loadMoreDocuments = useCallback(async () => {
    if (!hasMoreDocuments || isLoadingDocuments || !nextCursor) return;
    if (currentParentId === null) return;

    setIsLoadingDocuments(true);

    try {
      const response = await fetchHierarchyNodeDocuments({
        parent_hierarchy_node_id: currentParentId,
        cursor: nextCursor,
        sort_field: sortField,
        sort_direction: sortDirection,
        folder_position: folderPosition,
      });

      setDocuments((prev) => [...prev, ...response.documents]);
      setNextCursor(response.next_cursor);
      setHasMoreDocuments(response.next_cursor !== null);
    } catch (error) {
      console.error("Failed to load more documents:", error);
    } finally {
      setIsLoadingDocuments(false);
    }
  }, [
    currentParentId,
    nextCursor,
    hasMoreDocuments,
    isLoadingDocuments,
    sortField,
    sortDirection,
    folderPosition,
  ]);

  // Infinite scroll handler
  const handleScroll = useCallback(() => {
    const container = scrollContainerRef.current;
    if (!container) return;

    const { scrollTop, scrollHeight, clientHeight } = container;
    const scrollThreshold = 100; // Load more when within 100px of bottom

    if (scrollHeight - scrollTop - clientHeight < scrollThreshold) {
      loadMoreDocuments();
    }
  }, [loadMoreDocuments]);

  // Populate selectedDocumentDetails for any documents that are already selected
  // but don't have their details stored (e.g., when editing an existing agent)
  useEffect(() => {
    if (documents.length === 0) return;

    const missingDetails = documents.filter(
      (doc) =>
        selectedDocumentIds.includes(doc.id) &&
        !selectedDocumentDetails.has(doc.id)
    );

    if (missingDetails.length > 0) {
      setSelectedDocumentDetails((prev) => {
        const updated = new Map(prev);
        missingDetails.forEach((doc) => updated.set(doc.id, doc));
        return updated;
      });
    }
  }, [documents, selectedDocumentIds, selectedDocumentDetails]);

  // Get child folders of the current path
  const childFolders = useMemo(() => {
    return allNodes.filter((node) => node.parent_id === currentParentId);
  }, [allNodes, currentParentId]);

  // Combine folders and documents into items list
  const items: HierarchyItem[] = useMemo(() => {
    const folderItems: HierarchyItem[] = childFolders.map((node) => ({
      type: "folder",
      data: node,
    }));
    const documentItems: HierarchyItem[] = documents.map((doc) => ({
      type: "document",
      data: doc,
    }));

    // Sort folders based on the sort field and direction
    const sortedFolders = [...folderItems].sort((a, b) => {
      const aTitle = a.data.title.toLowerCase();
      const bTitle = b.data.title.toLowerCase();
      if (sortField === "name") {
        return sortDirection === "asc"
          ? aTitle.localeCompare(bTitle)
          : bTitle.localeCompare(aTitle);
      }
      // For last_updated, folders don't have timestamps, so sort by name
      return aTitle.localeCompare(bTitle);
    });

    // Handle folder position
    if (folderPosition === "on_top") {
      return [...sortedFolders, ...documentItems];
    }

    // Mixed: interleave folders with documents based on sort order
    // Since folders don't have last_modified, we treat them as coming first in the sort
    // when sorting by last_updated, or we sort them alphabetically with docs by name
    if (sortField === "name") {
      const combined = [...sortedFolders, ...documentItems];
      return combined.sort((a, b) => {
        const aTitle = a.data.title.toLowerCase();
        const bTitle = b.data.title.toLowerCase();
        return sortDirection === "asc"
          ? aTitle.localeCompare(bTitle)
          : bTitle.localeCompare(aTitle);
      });
    }

    // For last_updated with mixed, put folders at the end since they don't have timestamps
    return [...documentItems, ...sortedFolders];
  }, [childFolders, documents, sortField, sortDirection, folderPosition]);

  // Filter items by search and view selected mode
  const filteredItems = useMemo(() => {
    let result: HierarchyItem[];

    if (viewSelectedOnly) {
      // In view selected mode, show selected items from THIS source only
      // allNodes is already source-specific, so filtering against it gives us source-specific folders
      const selectedFolders: HierarchyItem[] = allNodes
        .filter((node) => selectedFolderIds.includes(node.id))
        .map((node) => ({ type: "folder" as const, data: node }));

      // Create a set of node IDs from this source to filter documents
      const nodeIdsInSource = new Set(allNodes.map((node) => node.id));

      // Only include documents whose parent belongs to this source
      const selectedDocs: HierarchyItem[] = selectedDocumentIds
        .map((docId) => selectedDocumentDetails.get(docId))
        .filter((doc): doc is DocumentSummary => doc !== undefined)
        .filter(
          (doc) => doc.parent_id !== null && nodeIdsInSource.has(doc.parent_id)
        )
        .map((doc) => ({ type: "document" as const, data: doc }));

      result = [...selectedFolders, ...selectedDocs];
    } else {
      // Normal mode: show items from current folder
      result = items;
    }

    // Filter by search
    if (searchValue) {
      const lower = searchValue.toLowerCase();
      result = result.filter((item) =>
        item.data.title.toLowerCase().includes(lower)
      );
    }

    return result;
  }, [
    items,
    searchValue,
    viewSelectedOnly,
    selectedFolderIds,
    selectedDocumentIds,
    allNodes,
    selectedDocumentDetails,
  ]);

  // Count selected items for this source only
  const currentSourceSelectedCount = useMemo(() => {
    // Folders: count how many selectedFolderIds are in allNodes (source-specific)
    const folderCount = allNodes.filter((node) =>
      selectedFolderIds.includes(node.id)
    ).length;

    // Documents: count how many selected documents have parent in this source
    const nodeIdsInSource = new Set(allNodes.map((node) => node.id));
    const docCount = selectedDocumentIds.filter((docId) => {
      const doc = selectedDocumentDetails.get(docId);
      return (
        doc && doc.parent_id !== null && nodeIdsInSource.has(doc.parent_id)
      );
    }).length;

    return folderCount + docCount;
  }, [
    allNodes,
    selectedFolderIds,
    selectedDocumentIds,
    selectedDocumentDetails,
  ]);

  // Report selection count changes to parent
  useEffect(() => {
    onSelectionCountChange?.(source, currentSourceSelectedCount);
  }, [source, currentSourceSelectedCount, onSelectionCountChange]);

  // Header checkbox state: count how many visible items are selected
  const visibleSelectedCount = useMemo(() => {
    return filteredItems.filter((item) => {
      const isFolder = item.type === "folder";
      if (isFolder) {
        return selectedFolderIds.includes(item.data.id as number);
      }
      return selectedDocumentIds.includes(item.data.id as string);
    }).length;
  }, [filteredItems, selectedFolderIds, selectedDocumentIds]);

  const allVisibleSelected =
    filteredItems.length > 0 && visibleSelectedCount === filteredItems.length;
  const someVisibleSelected =
    visibleSelectedCount > 0 && visibleSelectedCount < filteredItems.length;

  // Handler for header checkbox click
  const handleHeaderCheckboxClick = () => {
    // Get visible folders and documents
    const visibleFolders = filteredItems.filter(
      (item) => item.type === "folder"
    );
    const visibleDocs = filteredItems.filter(
      (item) => item.type === "document"
    );
    const visibleFolderIds = visibleFolders.map(
      (item) => item.data.id as number
    );
    const visibleDocumentIds = visibleDocs.map(
      (item) => item.data.id as string
    );

    if (allVisibleSelected) {
      // Deselect all visible items by removing them from the selected arrays
      const newFolderIds = selectedFolderIds.filter(
        (id) => !visibleFolderIds.includes(id)
      );
      const newDocumentIds = selectedDocumentIds.filter(
        (id) => !visibleDocumentIds.includes(id)
      );
      onSetFolderIds(newFolderIds);
      onSetDocumentIds(newDocumentIds);

      // Remove deselected documents from details map
      setSelectedDocumentDetails((prev) => {
        const updated = new Map(prev);
        visibleDocumentIds.forEach((id) => updated.delete(id));
        return updated;
      });

      // If we deselected everything, exit view selected mode
      if (newFolderIds.length === 0 && newDocumentIds.length === 0) {
        setViewSelectedOnly(false);
      }
    } else {
      // Select all visible items by adding them to the selected arrays
      const newFolderIds = [
        ...selectedFolderIds,
        ...visibleFolderIds.filter((id) => !selectedFolderIds.includes(id)),
      ];
      const newDocumentIds = [
        ...selectedDocumentIds,
        ...visibleDocumentIds.filter((id) => !selectedDocumentIds.includes(id)),
      ];
      onSetFolderIds(newFolderIds);
      onSetDocumentIds(newDocumentIds);

      // Store details for newly selected documents
      setSelectedDocumentDetails((prev) => {
        const updated = new Map(prev);
        visibleDocs.forEach((item) => {
          const docId = item.data.id as string;
          if (!prev.has(docId)) {
            updated.set(docId, item.data as DocumentSummary);
          }
        });
        return updated;
      });
    }
  };

  // Navigation handlers
  const handleNavigateToRoot = () => setPath([]);

  const handleNavigateToNode = (node: HierarchyNodeSummary, index: number) => {
    setPath((prev) => prev.slice(0, index + 1));
  };

  const handleClickIntoFolder = (folder: HierarchyNodeSummary) => {
    if (viewSelectedOnly) {
      // Exit view selected mode and navigate to the folder
      // We need to build the path to this folder from root
      const buildPathToFolder = (
        targetId: number
      ): HierarchyNodeSummary[] | null => {
        const node = allNodes.find((n) => n.id === targetId);
        if (!node) return null;
        if (node.parent_id === null) return [node];
        const parentPath = buildPathToFolder(node.parent_id);
        if (!parentPath) return null;
        return [...parentPath, node];
      };
      const pathToFolder = buildPathToFolder(folder.id);
      if (pathToFolder) {
        setPath(pathToFolder);
      } else {
        // Fallback: just set the folder as the path
        setPath([folder]);
      }
      setViewSelectedOnly(false);
    } else {
      setPath((prev) => [...prev, folder]);
    }
  };

  // Handler for deselecting all items
  const handleDeselectAll = () => {
    onDeselectAllDocuments();
    onDeselectAllFolders();
    setSelectedDocumentDetails(new Map());
    setViewSelectedOnly(false);
  };

  // Handler for toggling view selected mode
  const handleToggleViewSelected = () => {
    setViewSelectedOnly((prev) => {
      if (!prev) {
        // Entering view selected mode - save current path
        setSavedPath(path);
      } else {
        // Exiting view selected mode - restore saved path
        setPath(savedPath);
      }
      return !prev;
    });
  };

  // Handler for clicking a row (folder or document)
  const handleItemClick = (item: HierarchyItem) => {
    if (item.type === "folder") {
      onToggleFolder(item.data.id);
      return;
    }
    const docId = item.data.id;
    const isCurrentlySelected = selectedDocumentIds.includes(docId);
    if (isCurrentlySelected) {
      setSelectedDocumentDetails((prev) => {
        const updated = new Map(prev);
        updated.delete(docId);
        return updated;
      });
    } else {
      setSelectedDocumentDetails((prev) => {
        const updated = new Map(prev);
        updated.set(docId, item.data);
        return updated;
      });
    }
    onToggleDocument(docId);
  };

  // Get the icon for a hierarchy item row
  const getItemIcon = (item: HierarchyItem, isSelected: boolean) => {
    if (item.type === "folder") {
      return <SvgFolder size={16} />;
    }
    if (isSelected) {
      return <Checkbox checked={true} />;
    }
    return <SvgFileText size={16} />;
  };

  // Render loading state
  if (isLoadingNodes) {
    return (
      <GeneralLayouts.Section height="auto" padding={1}>
        <Text text03 secondaryBody>
          Loading folders...
        </Text>
      </GeneralLayouts.Section>
    );
  }

  // Render error state
  if (nodesError) {
    return (
      <GeneralLayouts.Section height="auto" padding={1}>
        <Text text03 secondaryBody>
          {nodesError}
        </Text>
      </GeneralLayouts.Section>
    );
  }

  return (
    <GeneralLayouts.Section gap={0} alignItems="stretch" justifyContent="start">
      {/* Header with search */}
      <GeneralLayouts.Section
        flexDirection="row"
        justifyContent="start"
        alignItems="center"
        gap={0.5}
        height="auto"
      >
        <GeneralLayouts.Section height="auto" width="fit">
          <InputTypeIn
            leftSearchIcon
            value={searchValue}
            onChange={(e) => setSearchValue(e.target.value)}
            placeholder="Search..."
            variant="internal"
          />
        </GeneralLayouts.Section>
      </GeneralLayouts.Section>

      {/* Breadcrumb OR "Selected items" pill - mutually exclusive */}
      {viewSelectedOnly ? (
        <>
          <Spacer rem={0.5} />
          <Button
            variant="action"
            prominence="tertiary"
            onClick={handleToggleViewSelected}
          >
            Selected items
          </Button>
        </>
      ) : (
        (path.length > 0 || allNodes.length > 0) && (
          <>
            <Spacer rem={0.5} />
            <HierarchyBreadcrumb
              source={source}
              path={path}
              onNavigateToRoot={handleNavigateToRoot}
              onNavigateToNode={handleNavigateToNode}
            />
          </>
        )
      )}

      <Spacer rem={0.5} />

      {/* Table header */}
      <TableLayouts.TableRow>
        <TableLayouts.CheckboxCell>
          {filteredItems.length > 0 && (
            <Checkbox
              checked={allVisibleSelected}
              indeterminate={someVisibleSelected}
              onCheckedChange={handleHeaderCheckboxClick}
            />
          )}
        </TableLayouts.CheckboxCell>
        <TableLayouts.TableCell flex>
          <Text secondaryBody text03>
            Name
          </Text>
        </TableLayouts.TableCell>
        <TableLayouts.TableCell width={8}>
          <Popover open={sortDropdownOpen} onOpenChange={setSortDropdownOpen}>
            <Popover.Trigger asChild>
              <div>
                <SelectButton
                  rightIcon={SvgArrowUpDown}
                  transient={sortDropdownOpen}
                  onClick={() => setSortDropdownOpen(true)}
                >
                  {sortField === "name" ? "Name" : "Last Updated"}
                </SelectButton>
              </div>
            </Popover.Trigger>
            <Popover.Content align="end" sideOffset={4} width="lg">
              <Popover.Menu>
                {/* Sort by section */}
                <Divider showTitle text="Sort by" dividerLine={false} />
                <LineItem
                  selected={sortField === "name"}
                  onClick={() => setSortField("name")}
                  rightChildren={
                    sortField === "name" ? <SvgCheck size={16} /> : undefined
                  }
                >
                  Name
                </LineItem>
                <LineItem
                  selected={sortField === "last_updated"}
                  onClick={() => setSortField("last_updated")}
                  rightChildren={
                    sortField === "last_updated" ? (
                      <SvgCheck size={16} />
                    ) : undefined
                  }
                >
                  Last Updated
                </LineItem>
                {/* Sorting Order section */}
                <Divider showTitle text="Sorting Order" dividerLine={false} />
                <LineItem
                  selected={sortDirection === "desc"}
                  onClick={() => setSortDirection("desc")}
                  rightChildren={
                    sortDirection === "desc" ? (
                      <SvgCheck size={16} />
                    ) : undefined
                  }
                >
                  {sortField === "name" ? "Z to A" : "Recent to Old"}
                </LineItem>
                <LineItem
                  selected={sortDirection === "asc"}
                  onClick={() => setSortDirection("asc")}
                  rightChildren={
                    sortDirection === "asc" ? <SvgCheck size={16} /> : undefined
                  }
                >
                  {sortField === "name" ? "A to Z" : "Old to Recent"}
                </LineItem>
                {/* Folders section */}
                <Divider showTitle text="Folders" dividerLine={false} />
                <LineItem
                  selected={folderPosition === "on_top"}
                  onClick={() => setFolderPosition("on_top")}
                  rightChildren={
                    folderPosition === "on_top" ? (
                      <SvgCheck size={16} />
                    ) : undefined
                  }
                >
                  On top
                </LineItem>
                <LineItem
                  selected={folderPosition === "mixed"}
                  onClick={() => setFolderPosition("mixed")}
                  rightChildren={
                    folderPosition === "mixed" ? (
                      <SvgCheck size={16} />
                    ) : undefined
                  }
                >
                  Mixed with Files
                </LineItem>
              </Popover.Menu>
            </Popover.Content>
          </Popover>
        </TableLayouts.TableCell>
      </TableLayouts.TableRow>

      <Separator noPadding />

      {/* Scrollable table body */}
      <div
        ref={scrollContainerRef}
        onScroll={handleScroll}
        className="overflow-y-auto max-h-[20rem]"
      >
        {filteredItems.length === 0 && !isLoadingDocuments ? (
          <GeneralLayouts.Section height="auto" padding={1}>
            <Text text03 secondaryBody>
              {path.length === 0
                ? "Select a folder to browse documents."
                : "No items in this folder."}
            </Text>
          </GeneralLayouts.Section>
        ) : (
          <GeneralLayouts.Section gap={0} alignItems="stretch" height="auto">
            {filteredItems.map((item) => {
              const isFolder = item.type === "folder";
              const id = isFolder ? `folder-${item.data.id}` : item.data.id;
              const isSelected = isFolder
                ? selectedFolderIds.includes(item.data.id as number)
                : selectedDocumentIds.includes(item.data.id as string);

              return (
                <TableLayouts.TableRow
                  key={id}
                  selected={isSelected}
                  onClick={() => handleItemClick(item)}
                >
                  <TableLayouts.CheckboxCell>
                    {getItemIcon(item, isSelected)}
                  </TableLayouts.CheckboxCell>
                  <TableLayouts.TableCell flex>
                    <GeneralLayouts.Section
                      flexDirection="row"
                      justifyContent="start"
                      alignItems="center"
                      gap={0.25}
                      height="auto"
                      width="fit"
                    >
                      <Truncated>{item.data.title}</Truncated>
                      {isFolder && (
                        <Button
                          icon={SvgChevronRight}
                          prominence="tertiary"
                          size="sm"
                          onClick={(e) => {
                            e.stopPropagation();
                            handleClickIntoFolder(
                              item.data as HierarchyNodeSummary
                            );
                          }}
                        />
                      )}
                    </GeneralLayouts.Section>
                  </TableLayouts.TableCell>
                  <TableLayouts.TableCell width={8}>
                    <Text text03 secondaryBody>
                      {isFolder
                        ? "—"
                        : timeAgo(
                            (item.data as DocumentSummary).last_modified
                          ) || "—"}
                    </Text>
                  </TableLayouts.TableCell>
                </TableLayouts.TableRow>
              );
            })}

            {/* Loading more indicator */}
            {isLoadingDocuments && documents.length > 0 && (
              <GeneralLayouts.Section height="auto" padding={0.5}>
                <Text text03 secondaryBody>
                  Loading more...
                </Text>
              </GeneralLayouts.Section>
            )}
          </GeneralLayouts.Section>
        )}
      </div>

      {/* Table footer - only show when items are selected for this source */}
      {currentSourceSelectedCount > 0 && (
        <>
          <Spacer rem={0.5} />
          <GeneralLayouts.Section
            flexDirection="row"
            justifyContent="start"
            alignItems="center"
            gap={0.5}
            height="auto"
          >
            <Text text03 secondaryBody>
              {currentSourceSelectedCount}{" "}
              {currentSourceSelectedCount === 1 ? "item" : "items"} selected
            </Text>
            <Button
              icon={SvgEye}
              variant={viewSelectedOnly ? "action" : undefined}
              prominence="tertiary"
              size={viewSelectedOnly ? undefined : "sm"}
              onClick={handleToggleViewSelected}
            />
            <Button
              icon={SvgXCircle}
              prominence="tertiary"
              size="sm"
              onClick={handleDeselectAll}
            />
          </GeneralLayouts.Section>
        </>
      )}
    </GeneralLayouts.Section>
  );
}


================================================
FILE: web/src/sections/modals/AgentViewerModal.tsx
================================================
"use client";

import { useCallback, useMemo, useState } from "react";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import { FullPersona } from "@/app/admin/agents/interfaces";
import { useModal } from "@/refresh-components/contexts/ModalContext";
import Modal from "@/refresh-components/Modal";
import { Section } from "@/layouts/general-layouts";
import { Content, ContentAction } from "@opal/layouts";
import Text from "@/refresh-components/texts/Text";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import Separator from "@/refresh-components/Separator";
import SimpleCollapsible from "@/refresh-components/SimpleCollapsible";
import {
  SvgActions,
  SvgBubbleText,
  SvgExpand,
  SvgFold,
  SvgOrganization,
  SvgStar,
  SvgUser,
} from "@opal/icons";
import * as ExpandableCard from "@/layouts/expandable-card-layouts";
import * as ActionsLayouts from "@/layouts/actions-layouts";
import useMcpServersForAgentEditor from "@/hooks/useMcpServersForAgentEditor";
import { getActionIcon } from "@/lib/tools/mcpUtils";
import { MCPServer, ToolSnapshot } from "@/lib/tools/interfaces";
import EmptyMessage from "@/refresh-components/EmptyMessage";
import { Horizontal } from "@/layouts/input-layouts";
import Switch from "@/refresh-components/inputs/Switch";
import { Button } from "@opal/components";
import { SEARCH_PARAM_NAMES } from "@/app/app/services/searchParams";
import AppInputBar from "@/sections/input/AppInputBar";
import { useFilters, useLlmManager } from "@/lib/hooks";
import { formatMmDdYyyy } from "@/lib/dateUtils";
import { useProjectsContext } from "@/providers/ProjectsContext";
import { FileCard } from "@/sections/cards/FileCard";
import DocumentSetCard from "@/sections/cards/DocumentSetCard";
import { getDisplayName } from "@/lib/llmConfig/utils";
import { useLLMProviders } from "@/hooks/useLLMProviders";
import { Interactive } from "@opal/core";

/**
 * Read-only MCP Server card for the viewer modal.
 * Displays the server header with its tools listed in the expandable content area.
 */
interface ViewerMCPServerCardProps {
  server: MCPServer;
  tools: ToolSnapshot[];
}

function ViewerMCPServerCard({ server, tools }: ViewerMCPServerCardProps) {
  const [folded, setFolded] = useState(false);
  const serverIcon = getActionIcon(server.server_url, server.name);

  return (
    <ExpandableCard.Root isFolded={folded} onFoldedChange={setFolded}>
      <ExpandableCard.Header>
        <div className="p-2">
          <ContentAction
            icon={serverIcon}
            title={server.name}
            description={server.description}
            sizePreset="main-ui"
            variant="section"
            rightChildren={
              <Button
                prominence="internal"
                rightIcon={folded ? SvgExpand : SvgFold}
                onClick={() => setFolded((prev) => !prev)}
              >
                {folded ? "Expand" : "Fold"}
              </Button>
            }
          />
        </div>
      </ExpandableCard.Header>
      {tools.length > 0 && (
        <ActionsLayouts.Content>
          {tools.map((tool) => (
            <Section key={tool.id} padding={0.25}>
              <Content
                title={tool.display_name}
                description={tool.description}
                sizePreset="main-ui"
                variant="section"
              />
            </Section>
          ))}
        </ActionsLayouts.Content>
      )}
    </ExpandableCard.Root>
  );
}

/**
 * Read-only OpenAPI tool card for the viewer modal.
 * Displays just the tool header (no expandable content).
 */
function ViewerOpenApiToolCard({ tool }: { tool: ToolSnapshot }) {
  return (
    <ExpandableCard.Root>
      <ExpandableCard.Header>
        <div className="p-2">
          <Content
            icon={SvgActions}
            title={tool.display_name}
            description={tool.description}
            sizePreset="main-ui"
            variant="section"
          />
        </div>
      </ExpandableCard.Header>
    </ExpandableCard.Root>
  );
}

/**
 * Floating ChatInputBar below the AgentViewerModal.
 * On submit, navigates to the agent's chat with the message pre-filled.
 */
interface AgentChatInputProps {
  agent: FullPersona;
  onSubmit: (message: string) => void;
}
function AgentChatInput({ agent, onSubmit }: AgentChatInputProps) {
  const llmManager = useLlmManager(undefined, agent);
  const filterManager = useFilters();

  return (
    <AppInputBar
      onSubmit={onSubmit}
      llmManager={llmManager}
      chatState="input"
      filterManager={filterManager}
      selectedAgent={agent}
      stopGenerating={() => {}}
      handleFileUpload={() => {}}
      currentSessionFileTokenCount={0}
      availableContextTokens={Infinity}
      deepResearchEnabled={false}
      toggleDeepResearch={() => {}}
      disabled={false}
    />
  );
}

/**
 * AgentViewerModal - A read-only view of an agent's configuration
 *
 * This modal is the view-only counterpart to `AgentEditorPage.tsx`. While
 * AgentEditorPage allows creating and editing agents with forms and inputs,
 * AgentViewerModal displays the same information in a read-only format.
 *
 * Key differences from AgentEditorPage:
 * - Modal presentation instead of full page
 * - Read-only display (no form inputs, switches, or editable fields)
 * - Static text/badges instead of form controls
 * - Designed to be opened from AgentCard when clicking on the card body
 *
 * Sections displayed (mirroring AgentEditorPage):
 * - Agent info: name, description, avatar
 * - Instructions (system prompt)
 * - Conversation starters
 * - Knowledge configuration
 * - Actions/tools
 * - Advanced options (model, sharing status)
 */
export interface AgentViewerModalProps {
  agent: FullPersona;
}
export default function AgentViewerModal({ agent }: AgentViewerModalProps) {
  const agentViewerModal = useModal();
  const router = useRouter();
  const { allRecentFiles } = useProjectsContext();
  const { llmProviders } = useLLMProviders(agent.id);

  const handleStartChat = useCallback(
    (message: string) => {
      const params = new URLSearchParams({
        [SEARCH_PARAM_NAMES.PERSONA_ID]: String(agent.id),
        [SEARCH_PARAM_NAMES.USER_PROMPT]: message,
        [SEARCH_PARAM_NAMES.SEND_ON_LOAD]: "true",
      });
      router.push(`/app?${params.toString()}` as Route);
      agentViewerModal.toggle(false);
    },
    [agent.id, router, agentViewerModal]
  );

  const hasKnowledge =
    (agent.document_sets && agent.document_sets.length > 0) ||
    (agent.hierarchy_nodes && agent.hierarchy_nodes.length > 0) ||
    (agent.user_file_ids && agent.user_file_ids.length > 0);

  // Categorize tools into MCP, OpenAPI, and built-in
  const mcpToolsByServerId = useMemo(() => {
    const map = new Map<number, ToolSnapshot[]>();
    agent.tools.forEach((tool) => {
      if (tool.mcp_server_id != null) {
        const existing = map.get(tool.mcp_server_id) || [];
        existing.push(tool);
        map.set(tool.mcp_server_id, existing);
      }
    });
    return map;
  }, [agent.tools]);

  const openApiTools = useMemo(
    () =>
      agent.tools.filter((t) => !t.in_code_tool_id && t.mcp_server_id == null),
    [agent.tools]
  );

  // Fetch MCP server metadata for display
  const { mcpData } = useMcpServersForAgentEditor();
  const mcpServers = mcpData?.mcp_servers ?? [];

  const mcpServersWithTools = useMemo(
    () =>
      mcpServers
        .filter((server) => mcpToolsByServerId.has(server.id))
        .map((server) => ({
          server,
          tools: mcpToolsByServerId.get(server.id)!,
        })),
    [mcpServers, mcpToolsByServerId]
  );

  const hasActions = mcpServersWithTools.length > 0 || openApiTools.length > 0;
  const defaultModel = getDisplayName(agent, llmProviders ?? []);

  return (
    <Modal
      open={agentViewerModal.isOpen}
      onOpenChange={agentViewerModal.toggle}
    >
      <Modal.Content
        width="lg"
        height="lg"
        bottomSlot={<AgentChatInput agent={agent} onSubmit={handleStartChat} />}
      >
        <Modal.Header
          icon={(props) => <AgentAvatar agent={agent} {...props} size={24} />}
          title={agent.name}
          onClose={() => agentViewerModal.toggle(false)}
        />

        <Modal.Body>
          {/* Metadata */}
          <Section flexDirection="row" justifyContent="start">
            {agent.is_featured && (
              <Content
                icon={SvgStar}
                title="Featured"
                sizePreset="main-ui"
                variant="body"
                widthVariant="fit"
              />
            )}
            <Content
              icon={SvgUser}
              title={agent.owner?.email ?? "Onyx"}
              sizePreset="main-ui"
              variant="body"
              prominence="muted"
              widthVariant="fit"
            />
            {agent.is_public && (
              <Content
                icon={SvgOrganization}
                title="Public to your organization"
                sizePreset="main-ui"
                variant="body"
                prominence="muted"
                widthVariant="fit"
              />
            )}
          </Section>

          {/* Description */}
          {agent.description && <Text text03>{agent.description}</Text>}

          {/* Knowledge */}
          <Separator noPadding />
          <Section gap={0.5} alignItems="start">
            <Content
              title="Knowledge"
              sizePreset="main-content"
              variant="section"
            />
            {hasKnowledge ? (
              <Section
                gap={0.5}
                flexDirection="row"
                justifyContent="start"
                wrap
                alignItems="start"
              >
                {agent.document_sets?.map((docSet) => (
                  <DocumentSetCard key={docSet.id} documentSet={docSet} />
                ))}
                {agent.user_file_ids?.map((fileId) => {
                  const file = allRecentFiles.find((f) => f.id === fileId);
                  if (!file) return null;
                  return <FileCard key={fileId} file={file} />;
                })}
              </Section>
            ) : (
              <EmptyMessage title="No Knowledge" />
            )}
          </Section>

          {/* Actions & Tools */}
          <SimpleCollapsible>
            <SimpleCollapsible.Header title="Actions & Tools" />
            <SimpleCollapsible.Content>
              {hasActions ? (
                <Section gap={0.5} alignItems="start">
                  {mcpServersWithTools.map(({ server, tools }) => (
                    <ViewerMCPServerCard
                      key={server.id}
                      server={server}
                      tools={tools}
                    />
                  ))}
                  {openApiTools.map((tool) => (
                    <ViewerOpenApiToolCard key={tool.id} tool={tool} />
                  ))}
                </Section>
              ) : (
                <EmptyMessage title="No Actions" />
              )}
            </SimpleCollapsible.Content>
          </SimpleCollapsible>

          {/* More Info (Collapsible) */}
          <Separator noPadding />
          <SimpleCollapsible>
            <SimpleCollapsible.Header title="More Info" />
            <SimpleCollapsible.Content>
              <Section gap={0.5} alignItems="start">
                {agent.system_prompt && (
                  <Content
                    title="Instructions"
                    description={agent.system_prompt}
                    sizePreset="main-ui"
                    variant="section"
                  />
                )}
                {defaultModel && (
                  <Horizontal
                    title="Default Model"
                    description="This model will be used by Onyx by default in your chats."
                    nonInteractive
                    sizePreset="main-ui"
                  >
                    <Text>{defaultModel}</Text>
                  </Horizontal>
                )}
                {agent.search_start_date && (
                  <Horizontal
                    title="Knowledge Cutoff Date"
                    description="Documents with a last-updated date prior to this will be ignored."
                    nonInteractive
                    sizePreset="main-ui"
                  >
                    <Text mainUiMono>
                      {formatMmDdYyyy(agent.search_start_date)}
                    </Text>
                  </Horizontal>
                )}
                <Horizontal
                  title="Overwrite System Prompts"
                  description='Remove the base system prompt which includes useful instructions (e.g. "You can use Markdown tables"). This may affect response quality.'
                  nonInteractive
                  sizePreset="main-ui"
                >
                  <Switch disabled checked={agent.replace_base_system_prompt} />
                </Horizontal>
              </Section>
            </SimpleCollapsible.Content>
          </SimpleCollapsible>

          {/* Prompt Reminders */}
          {agent.task_prompt && (
            <>
              <Separator noPadding />
              <Content
                title="Prompt Reminders"
                description={agent.task_prompt}
                sizePreset="main-content"
                variant="section"
              />
            </>
          )}

          {/* Conversation Starters */}
          {agent.starter_messages && agent.starter_messages.length > 0 && (
            <>
              <Separator noPadding />
              <Content
                title="Conversation Starters"
                sizePreset="main-content"
                variant="section"
              />
              <div className="grid grid-cols-2 gap-1 w-full">
                {agent.starter_messages.map((starter, index) => (
                  <Interactive.Stateless
                    key={index}
                    onClick={() => handleStartChat(starter.message)}
                    prominence="tertiary"
                  >
                    <Interactive.Container>
                      <Content
                        icon={SvgBubbleText}
                        title={starter.message}
                        sizePreset="main-ui"
                        variant="body"
                        prominence="muted"
                        widthVariant="full"
                      />
                    </Interactive.Container>
                  </Interactive.Stateless>
                ))}
              </div>
            </>
          )}
        </Modal.Body>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/modals/FeedbackModal.tsx
================================================
"use client";

import { FeedbackType } from "@/app/app/interfaces";
import { Button } from "@opal/components";
import useFeedbackController from "@/hooks/useFeedbackController";
import { useModal } from "@/refresh-components/contexts/ModalContext";
import { SvgThumbsDown, SvgThumbsUp } from "@opal/icons";
import Modal from "@/refresh-components/Modal";
import { Formik } from "formik";
import * as Yup from "yup";
import * as InputLayouts from "@/layouts/input-layouts";
import InputTextAreaField from "@/refresh-components/form/InputTextAreaField";

export interface FeedbackModalProps {
  feedbackType: FeedbackType;
  messageId: number;
}

interface FeedbackFormValues {
  additional_feedback: string;
}

export default function FeedbackModal({
  feedbackType,
  messageId,
}: FeedbackModalProps) {
  const modal = useModal();
  const { handleFeedbackChange } = useFeedbackController();

  const initialValues: FeedbackFormValues = {
    additional_feedback: "",
  };

  const validationSchema = Yup.object({
    additional_feedback:
      feedbackType === "dislike"
        ? Yup.string().trim().required("Feedback is required")
        : Yup.string().trim(),
  });

  async function handleSubmit(values: FeedbackFormValues) {
    const feedbackText = values.additional_feedback;

    const success = await handleFeedbackChange(
      messageId,
      feedbackType,
      feedbackText,
      undefined
    );

    // Only close modal if submission was successful
    if (success) {
      modal.toggle(false);
    }
  }

  return (
    <>
      <Modal open={modal.isOpen} onOpenChange={modal.toggle}>
        <Modal.Content width="sm">
          <Modal.Header
            icon={feedbackType === "like" ? SvgThumbsUp : SvgThumbsDown}
            title="Feedback"
            onClose={() => modal.toggle(false)}
          />
          <Formik
            initialValues={initialValues}
            validationSchema={validationSchema}
            onSubmit={handleSubmit}
          >
            {({
              isSubmitting,
              handleSubmit: formikHandleSubmit,
              dirty,
              isValid,
            }) => (
              <>
                <Modal.Body>
                  <InputLayouts.Vertical
                    name="additional_feedback"
                    title="Provide Additional Details"
                    suffix={feedbackType === "like" ? "optional" : undefined}
                  >
                    <InputTextAreaField
                      name="additional_feedback"
                      placeholder={`What did you ${feedbackType} about this response?`}
                    />
                  </InputLayouts.Vertical>
                </Modal.Body>

                <Modal.Footer>
                  <Button
                    prominence="secondary"
                    onClick={() => modal.toggle(false)}
                    type="button"
                  >
                    Cancel
                  </Button>
                  <Button
                    disabled={
                      isSubmitting ||
                      (feedbackType === "dislike" && (!dirty || !isValid))
                    }
                    onClick={() => formikHandleSubmit()}
                  >
                    {isSubmitting ? "Submitting..." : "Submit"}
                  </Button>
                </Modal.Footer>
              </>
            )}
          </Formik>
        </Modal.Content>
      </Modal>
    </>
  );
}


================================================
FILE: web/src/sections/modals/NewTenantModal.tsx
================================================
"use client";

import { useState } from "react";
import Modal, { BasicModalFooter } from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import { toast } from "@/hooks/useToast";
import { SvgArrowRight, SvgUsers, SvgX } from "@opal/icons";
import { logout } from "@/lib/user";
import { useUser } from "@/providers/UserProvider";
import { NewTenantInfo } from "@/lib/types";
import { useRouter } from "next/navigation";
import Text from "@/refresh-components/texts/Text";
import { ErrorTextLayout } from "@/layouts/input-layouts";

// App domain should not be hardcoded
const APP_DOMAIN = process.env.NEXT_PUBLIC_APP_DOMAIN || "onyx.app";

export interface NewTenantModalProps {
  tenantInfo: NewTenantInfo;
  isInvite?: boolean;
  onClose?: () => void;
}

export default function NewTenantModal({
  tenantInfo,
  isInvite = false,
  onClose,
}: NewTenantModalProps) {
  const router = useRouter();
  const { user } = useUser();
  const [isLoading, setIsLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);

  async function handleJoinTenant() {
    setIsLoading(true);
    setError(null);

    try {
      if (isInvite) {
        // Accept the invitation through the API
        const response = await fetch("/api/tenants/users/invite/accept", {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify({ tenant_id: tenantInfo.tenant_id }),
        });

        if (!response.ok) {
          const errorData = await response.json().catch(() => ({}));
          throw new Error(
            errorData.detail ||
              errorData.message ||
              "Failed to accept invitation"
          );
        }

        toast.success("You have accepted the invitation.");
      } else {
        // For non-invite flow, just show success message
        toast.success("Processing your team join request...");
      }

      // Common logout and redirect for both flows
      await logout();
      router.push(`/auth/join?email=${encodeURIComponent(user?.email || "")}`);
      onClose?.();
    } catch (error) {
      const message =
        error instanceof Error
          ? error.message
          : "Failed to join the team. Please try again.";

      setError(message);
      toast.error(message);
    } finally {
      setIsLoading(false);
    }
  }

  async function handleRejectInvite() {
    if (!isInvite) return;

    setIsLoading(true);
    setError(null);

    try {
      // Deny the invitation through the API
      const response = await fetch("/api/tenants/users/invite/deny", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({ tenant_id: tenantInfo.tenant_id }),
      });

      if (!response.ok) {
        const errorData = await response.json().catch(() => ({}));
        throw new Error(
          errorData.detail ||
            errorData.message ||
            "Failed to decline invitation"
        );
      }

      toast.info("You have declined the invitation.");
      onClose?.();
    } catch (error) {
      const message =
        error instanceof Error
          ? error.message
          : "Failed to decline the invitation. Please try again.";

      setError(message);
      toast.error(message);
    } finally {
      setIsLoading(false);
    }
  }

  const title = isInvite
    ? `You have been invited to join ${
        tenantInfo.number_of_users
      } other teammate${
        tenantInfo.number_of_users === 1 ? "" : "s"
      } of ${APP_DOMAIN}.`
    : `Your request to join ${tenantInfo.number_of_users} other users of ${APP_DOMAIN} has been approved.`;

  const description = isInvite
    ? `By accepting this invitation, you will join the existing ${APP_DOMAIN} team and lose access to your current team. Note: you will lose access to your current agents, prompts, chats, and connected sources.`
    : `To finish joining your team, please reauthenticate with ${user?.email}.`;

  return (
    <Modal open>
      <Modal.Content width="sm" height="sm" preventAccidentalClose={false}>
        <Modal.Header icon={SvgUsers} title={title} onClose={onClose} />

        <Modal.Body>
          <Text>{description}</Text>
          {error && <ErrorTextLayout>{error}</ErrorTextLayout>}
        </Modal.Body>

        <Modal.Footer>
          <BasicModalFooter
            cancel={
              isInvite ? (
                <Button
                  disabled={isLoading}
                  prominence="secondary"
                  onClick={handleRejectInvite}
                  icon={SvgX}
                >
                  Decline
                </Button>
              ) : undefined
            }
            submit={
              <Button
                disabled={isLoading}
                onClick={handleJoinTenant}
                rightIcon={SvgArrowRight}
              >
                {isLoading
                  ? isInvite
                    ? "Accepting..."
                    : "Joining..."
                  : isInvite
                    ? "Accept Invitation"
                    : "Reauthenticate"}
              </Button>
            }
          />
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/modals/PreviewModal/ExceptionTraceModal.tsx
================================================
import Modal from "@/refresh-components/Modal";
import { SvgAlertTriangle } from "@opal/icons";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import { CopyButton } from "@/sections/modals/PreviewModal/variants/shared";
import FloatingFooter from "@/sections/modals/PreviewModal/FloatingFooter";

interface ExceptionTraceModalProps {
  onOutsideClick: () => void;
  exceptionTrace: string;
  language?: string;
}

export default function ExceptionTraceModal({
  onOutsideClick,
  exceptionTrace,
  language = "python",
}: ExceptionTraceModalProps) {
  return (
    <Modal open onOpenChange={onOutsideClick}>
      <Modal.Content width="full" height="full">
        <Modal.Header
          icon={SvgAlertTriangle}
          title="Full Exception Trace"
          onClose={onOutsideClick}
          height="fit"
        />

        <div className="flex flex-col flex-1 min-h-0 overflow-hidden w-full bg-background-tint-01">
          <CodePreview content={exceptionTrace} language={language} normalize />
        </div>

        <FloatingFooter
          right={<CopyButton getText={() => exceptionTrace} />}
          codeBackground
        />
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/modals/PreviewModal/FloatingFooter.tsx
================================================
import { cn } from "@/lib/utils";
import { ReactNode } from "react";

interface FloatingFooterProps {
  left?: ReactNode;
  right?: ReactNode;
  codeBackground?: boolean;
}

export default function FloatingFooter({
  left,
  right,
  codeBackground,
}: FloatingFooterProps) {
  return (
    <div
      className={cn(
        "absolute bottom-0 left-0 right-0",
        "flex items-center justify-between",
        "p-4 pointer-events-none w-full"
      )}
      style={{
        background: `linear-gradient(to top, var(--background-${
          codeBackground ? "code-01" : "tint-01"
        }) 40%, transparent)`,
      }}
    >
      {/* Left slot */}
      <div className="pointer-events-auto">{left}</div>

      {/* Right slot */}
      {right ? (
        <div className="pointer-events-auto rounded-12 bg-background-tint-00 p-1 shadow-lg">
          {right}
        </div>
      ) : null}
    </div>
  );
}


================================================
FILE: web/src/sections/modals/PreviewModal/PreviewModal.tsx
================================================
"use client";

import { useState, useEffect, useCallback, useMemo } from "react";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { Section } from "@/layouts/general-layouts";
import FloatingFooter from "@/sections/modals/PreviewModal/FloatingFooter";
import mime from "mime";
import {
  getCodeLanguage,
  getDataLanguage,
  getLanguageByMime,
} from "@/lib/languages";
import { fetchChatFile } from "@/lib/chat/svc";
import { PreviewContext } from "@/sections/modals/PreviewModal/interfaces";
import { resolveVariant } from "@/sections/modals/PreviewModal/variants";

interface PreviewModalProps {
  presentingDocument: MinimalOnyxDocument;
  onClose: () => void;
}

export default function PreviewModal({
  presentingDocument,
  onClose,
}: PreviewModalProps) {
  const [fileContent, setFileContent] = useState("");
  const [fileUrl, setFileUrl] = useState("");
  const [fileName, setFileName] = useState("");
  const [isLoading, setIsLoading] = useState(true);
  const [loadError, setLoadError] = useState<string | null>(null);
  const [mimeType, setMimeType] = useState("application/octet-stream");
  const [zoom, setZoom] = useState(100);

  const variant = useMemo(
    () => resolveVariant(presentingDocument.semantic_identifier, mimeType),
    [presentingDocument.semantic_identifier, mimeType]
  );

  const language = useMemo(
    () =>
      getCodeLanguage(presentingDocument.semantic_identifier || "") ||
      getLanguageByMime(mimeType) ||
      getDataLanguage(presentingDocument.semantic_identifier || "") ||
      "plaintext",
    [mimeType, presentingDocument.semantic_identifier]
  );

  const lineCount = useMemo(() => {
    if (!fileContent) return 0;
    return fileContent.split("\n").length;
  }, [fileContent]);

  const fileSize = useMemo(() => {
    if (!fileContent) return "";
    const bytes = new TextEncoder().encode(fileContent).length;
    if (bytes < 1024) return `${bytes} B`;
    const kb = bytes / 1024;
    if (kb < 1024) return `${kb.toFixed(2)} KB`;
    const mb = kb / 1024;
    return `${mb.toFixed(2)} MB`;
  }, [fileContent]);

  const fetchFile = useCallback(async () => {
    setIsLoading(true);
    setLoadError(null);
    setFileContent("");
    const fileIdLocal =
      presentingDocument.document_id.split("__")[1] ||
      presentingDocument.document_id;

    try {
      const response = await fetchChatFile(fileIdLocal);

      const blob = await response.blob();
      const url = window.URL.createObjectURL(blob);
      setFileUrl((prev) => {
        if (prev) window.URL.revokeObjectURL(prev);
        return url;
      });

      const originalFileName =
        presentingDocument.semantic_identifier || "document";
      setFileName(originalFileName);

      const rawContentType =
        response.headers.get("Content-Type") || "application/octet-stream";
      const resolvedMime =
        rawContentType === "application/octet-stream"
          ? mime.getType(originalFileName) ?? rawContentType
          : rawContentType;
      setMimeType(resolvedMime);

      const resolved = resolveVariant(
        presentingDocument.semantic_identifier,
        resolvedMime
      );
      if (resolved.needsTextContent) {
        setFileContent(await blob.text());
      }
    } catch {
      setLoadError("Failed to load document.");
    } finally {
      setIsLoading(false);
    }
  }, [presentingDocument]);

  useEffect(() => {
    fetchFile();
  }, [fetchFile]);

  useEffect(() => {
    return () => {
      if (fileUrl) window.URL.revokeObjectURL(fileUrl);
    };
  }, [fileUrl]);

  const handleZoomIn = useCallback(
    () => setZoom((prev) => Math.min(prev + 25, 200)),
    []
  );
  const handleZoomOut = useCallback(
    () => setZoom((prev) => Math.max(prev - 25, 25)),
    []
  );

  const ctx: PreviewContext = useMemo(
    () => ({
      fileContent,
      fileUrl,
      fileName,
      language,
      lineCount,
      fileSize,
      zoom,
      onZoomIn: handleZoomIn,
      onZoomOut: handleZoomOut,
    }),
    [
      fileContent,
      fileUrl,
      fileName,
      language,
      lineCount,
      fileSize,
      zoom,
      handleZoomIn,
      handleZoomOut,
    ]
  );

  return (
    <Modal
      open
      onOpenChange={(open) => {
        if (!open) onClose();
      }}
    >
      <Modal.Content
        width={variant.width}
        height={variant.height}
        preventAccidentalClose={false}
        onOpenAutoFocus={(e) => e.preventDefault()}
      >
        <Modal.Header
          title={fileName || "Document"}
          description={variant.headerDescription(ctx)}
          onClose={onClose}
        />

        {/* Body — uses flex-1/min-h-0/overflow-hidden (not Modal.Body)
            so that child ScrollIndicatorDivs become the actual scroll
            container instead of the body stealing it via overflow-y-auto. */}
        <div className="flex flex-col flex-1 min-h-0 overflow-hidden w-full bg-background-tint-01">
          {isLoading ? (
            <Section>
              <SimpleLoader className="h-8 w-8" />
            </Section>
          ) : loadError ? (
            <Section padding={1}>
              <Text text03 mainUiBody>
                {loadError}
              </Text>
            </Section>
          ) : (
            variant.renderContent(ctx)
          )}
        </div>

        {!isLoading && !loadError && (
          <FloatingFooter
            left={variant.renderFooterLeft(ctx)}
            right={variant.renderFooterRight(ctx)}
            codeBackground={variant.codeBackground}
          />
        )}
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/modals/PreviewModal/index.ts
================================================
export { default } from "@/sections/modals/PreviewModal/PreviewModal";


================================================
FILE: web/src/sections/modals/PreviewModal/interfaces.ts
================================================
import React from "react";
import { ModalContentProps } from "@/refresh-components/Modal";

export interface PreviewContext {
  fileContent: string;
  fileUrl: string;
  fileName: string;
  language: string;
  lineCount: number;
  fileSize: string;
  zoom: number;
  onZoomIn: () => void;
  onZoomOut: () => void;
}

export interface PreviewVariant
  extends Required<Pick<ModalContentProps, "width" | "height">> {
  /** Return true if this variant should handle the given file. */
  matches: (semanticIdentifier: string | null, mimeType: string) => boolean;
  /** Whether the fetcher should read the blob as text. */
  needsTextContent: boolean;
  /** Whether the variant renders on a code-style background (bg-background-code-01). */
  codeBackground: boolean;
  /** String shown below the title in the modal header. */
  headerDescription: (ctx: PreviewContext) => string;
  /** Body content. */
  renderContent: (ctx: PreviewContext) => React.ReactNode;
  /** Left side of the floating footer (e.g. line count text, zoom controls). Return null for nothing. */
  renderFooterLeft: (ctx: PreviewContext) => React.ReactNode;
  /** Right side of the floating footer (e.g. copy + download buttons). */
  renderFooterRight: (ctx: PreviewContext) => React.ReactNode;
}


================================================
FILE: web/src/sections/modals/PreviewModal/variants/CodePreview.tsx
================================================
"use client";

import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import ScrollIndicatorDiv from "@/refresh-components/ScrollIndicatorDiv";
import { cn } from "@/lib/utils";
import "@/app/app/message/custom-code-styles.css";

interface CodePreviewProps {
  content: string;
  language?: string | null;
  normalize?: boolean;
}

export function CodePreview({
  content,
  language,
  normalize,
}: CodePreviewProps) {
  // Wrap raw content in a fenced code block for syntax highlighting. Uses ~~~
  // instead of ``` to avoid conflicts with backticks in the content. Any literal
  // ~~~ sequences in the content are escaped so they don't accidentally close the fence.
  const markdownContent = normalize
    ? `~~~${language || ""}\n${content.replace(/~~~/g, "\\~\\~\\~")}\n~~~`
    : content;

  return (
    <ScrollIndicatorDiv
      className={cn("p-4", normalize && "bg-background-code-01")}
      backgroundColor={normalize ? "var(--background-code-01)" : undefined}
      variant="shadow"
      bottomSpacing="2rem"
      disableBottomIndicator
    >
      <MinimalMarkdown content={markdownContent} showHeader={false} />
    </ScrollIndicatorDiv>
  );
}


================================================
FILE: web/src/sections/modals/PreviewModal/variants/codeVariant.tsx
================================================
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";
import { getCodeLanguage } from "@/lib/languages";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import {
  CopyButton,
  DownloadButton,
} from "@/sections/modals/PreviewModal/variants/shared";

export const codeVariant: PreviewVariant = {
  matches: (name) => !!getCodeLanguage(name || ""),
  width: "xl",
  height: "lg",
  needsTextContent: true,
  codeBackground: true,

  headerDescription: (ctx) =>
    ctx.fileContent
      ? `${ctx.language} - ${ctx.lineCount} ${
          ctx.lineCount === 1 ? "line" : "lines"
        } · ${ctx.fileSize}`
      : "",

  renderContent: (ctx) => (
    <CodePreview normalize content={ctx.fileContent} language={ctx.language} />
  ),

  renderFooterLeft: (ctx) => (
    <Text text03 mainUiBody className="select-none">
      {ctx.lineCount} {ctx.lineCount === 1 ? "line" : "lines"}
    </Text>
  ),

  renderFooterRight: (ctx) => (
    <Section flexDirection="row" width="fit">
      <CopyButton getText={() => ctx.fileContent} />
      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
    </Section>
  ),
};


================================================
FILE: web/src/sections/modals/PreviewModal/variants/csvVariant.tsx
================================================
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import { Section } from "@/layouts/general-layouts";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import {
  CopyButton,
  DownloadButton,
} from "@/sections/modals/PreviewModal/variants/shared";
import TextSeparator from "@/refresh-components/TextSeparator";

interface CsvData {
  headers: string[];
  rows: string[][];
}

function parseCsv(content: string): CsvData {
  const lines = content.split(/\r?\n/).filter((l) => l.length > 0);
  const headers = lines.length > 0 ? lines[0]?.split(",") ?? [] : [];
  const rows = lines.slice(1).map((line) => line.split(","));
  return { headers, rows };
}

export const csvVariant: PreviewVariant = {
  matches: (name, mime) =>
    mime.startsWith("text/csv") || (name || "").toLowerCase().endsWith(".csv"),
  width: "full",
  height: "full",
  needsTextContent: true,
  codeBackground: false,
  headerDescription: (ctx) => {
    if (!ctx.fileContent) return "";
    const { rows } = parseCsv(ctx.fileContent);
    return `CSV - ${rows.length} rows • ${ctx.fileSize}`;
  },

  renderContent: (ctx) => {
    if (!ctx.fileContent) return null;
    const { headers, rows } = parseCsv(ctx.fileContent);
    return (
      <Section justifyContent="start" alignItems="start" padding={1}>
        <Table>
          <TableHeader className="sticky top-0 z-sticky bg-background-tint-01">
            <TableRow noHover>
              {headers.map((h: string, i: number) => (
                <TableHead key={i}>
                  <Text as="p" className="line-clamp-2" text04 secondaryAction>
                    {h}
                  </Text>
                </TableHead>
              ))}
            </TableRow>
          </TableHeader>
          <TableBody>
            {rows.map((row: string[], rIdx: number) => (
              <TableRow key={rIdx} noHover>
                {headers.map((_: string, cIdx: number) => (
                  <TableCell
                    key={cIdx}
                    className={cn(
                      cIdx === 0 && "sticky left-0 bg-background-tint-01",
                      "py-4 px-4 whitespace-normal break-words"
                    )}
                  >
                    <Text
                      as="p"
                      {...(cIdx === 0
                        ? { text04: true, secondaryAction: true }
                        : { text03: true, secondaryBody: true })}
                    >
                      {row?.[cIdx] ?? ""}
                    </Text>
                  </TableCell>
                ))}
              </TableRow>
            ))}
          </TableBody>
        </Table>
        <TextSeparator
          count={rows.length}
          text={rows.length === 1 ? "row" : "rows"}
        />
      </Section>
    );
  },

  renderFooterLeft: (ctx) => {
    if (!ctx.fileContent) return null;
    const { headers, rows } = parseCsv(ctx.fileContent);
    return (
      <Text text03 mainUiBody className="select-none">
        {headers.length} {headers.length === 1 ? "column" : "columns"} •{" "}
        {rows.length} {rows.length === 1 ? "row" : "rows"}
      </Text>
    );
  },
  renderFooterRight: (ctx) => (
    <Section flexDirection="row" width="fit">
      <CopyButton getText={() => ctx.fileContent} />
      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
    </Section>
  ),
};


================================================
FILE: web/src/sections/modals/PreviewModal/variants/dataVariant.tsx
================================================
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";
import { getDataLanguage, getLanguageByMime } from "@/lib/languages";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import {
  CopyButton,
  DownloadButton,
} from "@/sections/modals/PreviewModal/variants/shared";

function formatContent(language: string, content: string): string {
  if (language === "json") {
    try {
      return JSON.stringify(JSON.parse(content), null, 2);
    } catch {
      return content;
    }
  }
  return content;
}

export const dataVariant: PreviewVariant = {
  matches: (name, mime) =>
    !!getDataLanguage(name || "") || !!getLanguageByMime(mime),
  width: "xl",
  height: "lg",
  needsTextContent: true,
  codeBackground: true,

  headerDescription: (ctx) =>
    ctx.fileContent
      ? `${ctx.language} - ${ctx.lineCount} ${
          ctx.lineCount === 1 ? "line" : "lines"
        } · ${ctx.fileSize}`
      : "",

  renderContent: (ctx) => {
    const formatted = formatContent(ctx.language, ctx.fileContent);
    return (
      <CodePreview normalize content={formatted} language={ctx.language} />
    );
  },

  renderFooterLeft: (ctx) => (
    <Text text03 mainUiBody className="select-none">
      {ctx.lineCount} {ctx.lineCount === 1 ? "line" : "lines"}
    </Text>
  ),

  renderFooterRight: (ctx) => (
    <Section flexDirection="row" width="fit">
      <CopyButton getText={() => ctx.fileContent} />
      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
    </Section>
  ),
};


================================================
FILE: web/src/sections/modals/PreviewModal/variants/docxVariant.tsx
================================================
"use client";

import { useState, useEffect, useRef } from "react";
import { renderAsync } from "docx-preview";
import ScrollIndicatorDiv from "@/refresh-components/ScrollIndicatorDiv";
import Text from "@/refresh-components/texts/Text";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { Section } from "@/layouts/general-layouts";
import { PreviewContext } from "@/sections/modals/PreviewModal/interfaces";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import {
  CopyButton,
  DownloadButton,
} from "@/sections/modals/PreviewModal/variants/shared";

const DOCX_MIMES = [
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
  "application/msword",
];

function isLegacyDoc(fileName: string): boolean {
  const lower = fileName.toLowerCase();
  return lower.endsWith(".doc") && !lower.endsWith(".docx");
}

interface DocxLoadResult {
  plainText: string;
  wordCount: number;
}

interface DocxPreviewProps {
  fileUrl: string;
  onLoad: (result: DocxLoadResult) => void;
}

function DocxPreview({ fileUrl, onLoad }: DocxPreviewProps) {
  const [isLoading, setIsLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);
  const bodyRef = useRef<HTMLDivElement>(null);
  const styleRef = useRef<HTMLDivElement>(null);
  const onLoadRef = useRef(onLoad);
  onLoadRef.current = onLoad;

  useEffect(() => {
    async function loadDocument() {
      setIsLoading(true);
      setError(null);
      try {
        const response = await fetch(fileUrl);
        if (!response.ok) {
          throw new Error(`Failed to fetch document: ${response.status}`);
        }
        const buffer = await response.arrayBuffer();

        // Render the DOCX with full layout fidelity
        if (bodyRef.current && styleRef.current) {
          bodyRef.current.innerHTML = "";
          styleRef.current.innerHTML = "";

          await renderAsync(buffer, bodyRef.current, styleRef.current, {
            className: "docx",
            inWrapper: false,
            ignoreWidth: false,
            ignoreHeight: false,
            ignoreFonts: false,
            breakPages: true,
            useBase64URL: true,
            renderHeaders: true,
            renderFooters: true,
            renderFootnotes: true,
            renderEndnotes: true,
          });
        }

        // Extract plain text from the rendered DOM
        const text = bodyRef.current?.innerText ?? "";
        const words = text
          .split(/\s+/)
          .filter((w: string) => w.length > 0).length;

        onLoadRef.current({ plainText: text, wordCount: words });
      } catch {
        setError(
          "Could not preview this document. Download the file to view it."
        );
      } finally {
        setIsLoading(false);
      }
    }
    loadDocument();
  }, [fileUrl]);

  if (error) {
    return (
      <Section justifyContent="center" alignItems="center" padding={1.5}>
        <Text text03 mainUiBody>
          {error}
        </Text>
      </Section>
    );
  }

  return (
    <ScrollIndicatorDiv
      className="flex-1 min-h-0 bg-background-tint-00"
      variant="shadow"
    >
      {isLoading && (
        <Section>
          <SimpleLoader className="h-8 w-8" />
        </Section>
      )}
      {/* Style container for docx-preview generated styles */}
      <div ref={styleRef} />
      {/* Body container where docx-preview renders the document */}
      <div ref={bodyRef} className="docx-host px-32 pb-16" />
    </ScrollIndicatorDiv>
  );
}

// Store parsed result outside the variant so footer can access it
let lastDocxResult: DocxLoadResult | null = null;

export const docxVariant: PreviewVariant = {
  matches: (name, mime) => {
    if (DOCX_MIMES.some((m) => mime === m)) return true;
    const lower = (name || "").toLowerCase();
    return lower.endsWith(".docx") || lower.endsWith(".doc");
  },
  width: "full",
  height: "full",
  needsTextContent: false,
  codeBackground: false,
  headerDescription: () => {
    if (lastDocxResult) {
      const count = lastDocxResult.wordCount;
      return `Word Document • ${count.toLocaleString()} ${
        count === 1 ? "word" : "words"
      }`;
    }
    return "Word Document";
  },

  renderContent: (ctx: PreviewContext) => {
    if (isLegacyDoc(ctx.fileName)) {
      lastDocxResult = null;
      return (
        <Section justifyContent="center" alignItems="center" padding={1.5}>
          <Text text03 mainUiBody>
            Legacy .doc format cannot be previewed. Download the file to view
            it.
          </Text>
        </Section>
      );
    }
    return (
      <DocxPreview
        fileUrl={ctx.fileUrl}
        onLoad={(result) => {
          lastDocxResult = result;
        }}
      />
    );
  },

  renderFooterLeft: () => null,
  renderFooterRight: (ctx: PreviewContext) => (
    <Section flexDirection="row" width="fit">
      {lastDocxResult && (
        <CopyButton getText={() => lastDocxResult?.plainText ?? ""} />
      )}
      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
    </Section>
  ),
};


================================================
FILE: web/src/sections/modals/PreviewModal/variants/imageVariant.tsx
================================================
import { Section } from "@/layouts/general-layouts";
import PreviewImage from "@/refresh-components/PreviewImage";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import {
  DownloadButton,
  ZoomControls,
} from "@/sections/modals/PreviewModal/variants/shared";

export const imageVariant: PreviewVariant = {
  matches: (_name, mime) => mime.startsWith("image/"),
  width: "full",
  height: "full",
  needsTextContent: false,
  codeBackground: false,
  headerDescription: () => "",

  renderContent: (ctx) => (
    <div
      className="flex flex-1 min-h-0 items-center justify-center p-4 transition-transform duration-300 ease-in-out"
      style={{
        transform: `scale(${ctx.zoom / 100})`,
        transformOrigin: "center",
      }}
    >
      <PreviewImage
        src={ctx.fileUrl}
        alt={ctx.fileName}
        className="max-w-full max-h-full"
      />
    </div>
  ),

  renderFooterLeft: (ctx) => (
    <ZoomControls
      zoom={ctx.zoom}
      onZoomIn={ctx.onZoomIn}
      onZoomOut={ctx.onZoomOut}
    />
  ),

  renderFooterRight: (ctx) => (
    <Section flexDirection="row" width="fit">
      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
    </Section>
  ),
};


================================================
FILE: web/src/sections/modals/PreviewModal/variants/index.ts
================================================
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { codeVariant } from "@/sections/modals/PreviewModal/variants/codeVariant";
import { imageVariant } from "@/sections/modals/PreviewModal/variants/imageVariant";
import { pdfVariant } from "@/sections/modals/PreviewModal/variants/pdfVariant";
import { csvVariant } from "@/sections/modals/PreviewModal/variants/csvVariant";
import { markdownVariant } from "@/sections/modals/PreviewModal/variants/markdownVariant";
import { dataVariant } from "@/sections/modals/PreviewModal/variants/dataVariant";
import { textVariant } from "@/sections/modals/PreviewModal/variants/textVariant";
import { unsupportedVariant } from "@/sections/modals/PreviewModal/variants/unsupportedVariant";
import { docxVariant } from "@/sections/modals/PreviewModal/variants/docxVariant";

// Note: Order does matter for the order that filters that are hit
const PREVIEW_VARIANTS: PreviewVariant[] = [
  codeVariant,
  imageVariant,
  pdfVariant,
  csvVariant,
  markdownVariant,
  docxVariant,
  textVariant,
  dataVariant,
];

export function resolveVariant(
  semanticIdentifier: string | null,
  mimeType: string
): PreviewVariant {
  return (
    PREVIEW_VARIANTS.find((v) => v.matches(semanticIdentifier, mimeType)) ??
    unsupportedVariant
  );
}


================================================
FILE: web/src/sections/modals/PreviewModal/variants/markdownVariant.tsx
================================================
import { Section } from "@/layouts/general-layouts";
import { isMarkdownFile } from "@/lib/languages";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import {
  CopyButton,
  DownloadButton,
} from "@/sections/modals/PreviewModal/variants/shared";

const MARKDOWN_MIMES = [
  "text/markdown",
  "text/x-markdown",
  "text/x-rst",
  "text/x-org",
];

export const markdownVariant: PreviewVariant = {
  matches: (name, mime) => {
    if (MARKDOWN_MIMES.some((m) => mime.startsWith(m))) return true;
    return isMarkdownFile(name || "");
  },
  width: "full",
  height: "full",
  needsTextContent: true,
  codeBackground: false,
  headerDescription: () => "",

  renderContent: (ctx) => (
    <CodePreview content={ctx.fileContent} language={ctx.language} />
  ),

  renderFooterLeft: () => null,

  renderFooterRight: (ctx) => (
    <Section flexDirection="row" width="fit">
      <CopyButton getText={() => ctx.fileContent} />
      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
    </Section>
  ),
};


================================================
FILE: web/src/sections/modals/PreviewModal/variants/pdfVariant.tsx
================================================
import { Section } from "@/layouts/general-layouts";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { DownloadButton } from "@/sections/modals/PreviewModal/variants/shared";

export const pdfVariant: PreviewVariant = {
  matches: (_name, mime) => mime === "application/pdf",
  width: "full",
  height: "full",
  needsTextContent: false,
  codeBackground: false,
  headerDescription: () => "",

  renderContent: (ctx) => (
    <iframe
      src={`${ctx.fileUrl}#toolbar=0`}
      className="w-full h-full flex-1 min-h-0 border-none"
      title="PDF Viewer"
    />
  ),

  renderFooterLeft: () => null,
  renderFooterRight: (ctx) => (
    <Section flexDirection="row" width="fit">
      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
    </Section>
  ),
};


================================================
FILE: web/src/sections/modals/PreviewModal/variants/shared.tsx
================================================
import { Button } from "@opal/components";
import { SvgDownload, SvgZoomIn, SvgZoomOut } from "@opal/icons";
import Text from "@/refresh-components/texts/Text";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import { Section } from "@/layouts/general-layouts";

interface DownloadButtonProps {
  fileUrl: string;
  fileName: string;
}

export function DownloadButton({ fileUrl, fileName }: DownloadButtonProps) {
  return (
    <a href={fileUrl} download={fileName}>
      <Button
        prominence="tertiary"
        size="sm"
        icon={SvgDownload}
        tooltip="Download"
      />
    </a>
  );
}

interface CopyButtonProps {
  getText: () => string;
}

export function CopyButton({ getText }: CopyButtonProps) {
  return (
    <CopyIconButton getCopyText={getText} tooltip="Copy content" size="sm" />
  );
}

interface ZoomControlsProps {
  zoom: number;
  onZoomIn: () => void;
  onZoomOut: () => void;
}

export function ZoomControls({ zoom, onZoomIn, onZoomOut }: ZoomControlsProps) {
  return (
    <div className="rounded-12 bg-background-tint-00 p-1 shadow-lg">
      <Section flexDirection="row" width="fit">
        <Button
          prominence="tertiary"
          size="sm"
          icon={SvgZoomOut}
          onClick={onZoomOut}
          tooltip="Zoom Out"
        />
        <Text mainUiMono text03>
          {zoom}%
        </Text>
        <Button
          prominence="tertiary"
          size="sm"
          icon={SvgZoomIn}
          onClick={onZoomIn}
          tooltip="Zoom In"
        />
      </Section>
    </div>
  );
}


================================================
FILE: web/src/sections/modals/PreviewModal/variants/textVariant.tsx
================================================
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import {
  CopyButton,
  DownloadButton,
} from "@/sections/modals/PreviewModal/variants/shared";

const TEXT_MIMES = [
  "text/plain",
  "text/x-log",
  "text/x-config",
  "text/tab-separated-values",
];

const TEXT_EXTENSIONS = [".txt", ".log", ".conf", ".tsv"];

export const textVariant: PreviewVariant = {
  matches: (name, mime) => {
    if (TEXT_MIMES.some((supportedMime) => mime.startsWith(supportedMime))) {
      return true;
    }

    const lowerName = (name || "").toLowerCase();
    return TEXT_EXTENSIONS.some((extension) => lowerName.endsWith(extension));
  },
  width: "xl",
  height: "lg",
  needsTextContent: true,
  codeBackground: true,
  headerDescription: (ctx) =>
    ctx.fileContent
      ? `${ctx.lineCount} ${ctx.lineCount === 1 ? "line" : "lines"} · ${
          ctx.fileSize
        }`
      : "",

  renderContent: (ctx) => (
    <CodePreview normalize content={ctx.fileContent} language={ctx.language} />
  ),

  renderFooterLeft: (ctx) => (
    <Text text03 mainUiBody className="select-none">
      {ctx.lineCount} {ctx.lineCount === 1 ? "line" : "lines"}
    </Text>
  ),

  renderFooterRight: (ctx) => (
    <Section flexDirection="row" width="fit">
      <CopyButton getText={() => ctx.fileContent} />
      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
    </Section>
  ),
};


================================================
FILE: web/src/sections/modals/PreviewModal/variants/unsupportedVariant.tsx
================================================
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { DownloadButton } from "@/sections/modals/PreviewModal/variants/shared";

export const unsupportedVariant: PreviewVariant = {
  matches: () => true,
  width: "xl",
  height: "full",
  needsTextContent: false,
  codeBackground: false,
  headerDescription: () => "",

  renderContent: (ctx) => (
    <div className="flex flex-col items-center justify-center flex-1 w-full min-h-0 gap-4 p-6">
      <Text as="p" text03 mainUiBody>
        This file format is not supported for preview.
      </Text>
      <a href={ctx.fileUrl} download={ctx.fileName}>
        <Button>Download File</Button>
      </a>
    </div>
  ),

  renderFooterLeft: () => null,
  renderFooterRight: (ctx) => (
    <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
  ),
};


================================================
FILE: web/src/sections/modals/ShareAgentModal.test.tsx
================================================
import React, { useEffect } from "react";
import { render, screen, waitFor } from "@tests/setup/test-utils";
import ShareAgentModal, { ShareAgentModalProps } from "./ShareAgentModal";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";

jest.mock("@/hooks/useShareableUsers", () => ({
  __esModule: true,
  default: jest.fn(() => ({ data: [] })),
}));

jest.mock("@/hooks/useShareableGroups", () => ({
  __esModule: true,
  default: jest.fn(() => ({ data: [] })),
}));

jest.mock("@/hooks/useAgents", () => ({
  useAgent: jest.fn(() => ({ agent: null })),
}));

jest.mock("@/lib/hooks", () => ({
  useLabels: jest.fn(() => ({
    labels: [],
    createLabel: jest.fn(),
  })),
}));

function ModalHarness(props: ShareAgentModalProps) {
  const modal = useCreateModal();

  useEffect(() => {
    modal.toggle(true);
  }, [modal]);

  return (
    <modal.Provider>
      <ShareAgentModal {...props} />
    </modal.Provider>
  );
}

function renderShareAgentModal(overrides: Partial<ShareAgentModalProps> = {}) {
  const props: ShareAgentModalProps = {
    userIds: [],
    groupIds: [],
    isPublic: false,
    isFeatured: false,
    labelIds: [],
    ...overrides,
  };

  return render(<ModalHarness {...props} />);
}

describe("ShareAgentModal", () => {
  it("defaults to Users & Groups when the agent is private", async () => {
    renderShareAgentModal({ isPublic: false });

    await waitFor(() =>
      expect(
        screen.getByRole("tab", { name: "Users & Groups" })
      ).toHaveAttribute("data-state", "active")
    );

    expect(
      screen.getByRole("tab", { name: "Your Organization" })
    ).toHaveAttribute("data-state", "inactive");
  });

  it("defaults to Your Organization when the agent is public", async () => {
    renderShareAgentModal({ isPublic: true });

    await waitFor(() =>
      expect(
        screen.getByRole("tab", { name: "Your Organization" })
      ).toHaveAttribute("data-state", "active")
    );

    expect(screen.getByRole("tab", { name: "Users & Groups" })).toHaveAttribute(
      "data-state",
      "inactive"
    );
  });
});


================================================
FILE: web/src/sections/modals/ShareAgentModal.tsx
================================================
"use client";

import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import Modal, { BasicModalFooter } from "@/refresh-components/Modal";
import {
  SvgLink,
  SvgOrganization,
  SvgShare,
  SvgTag,
  SvgUser,
  SvgUsers,
  SvgX,
} from "@opal/icons";
import InputChipField from "@/refresh-components/inputs/InputChipField";
import Message from "@/refresh-components/messages/Message";
import Tabs from "@/refresh-components/Tabs";
import { Card } from "@/refresh-components/cards";
import InputComboBox from "@/refresh-components/inputs/InputComboBox/InputComboBox";
import * as InputLayouts from "@/layouts/input-layouts";
import SwitchField from "@/refresh-components/form/SwitchField";
import LineItem from "@/refresh-components/buttons/LineItem";
import { Section } from "@/layouts/general-layouts";
import Text from "@/refresh-components/texts/Text";
import useShareableUsers from "@/hooks/useShareableUsers";
import useShareableGroups from "@/hooks/useShareableGroups";
import { useModal } from "@/refresh-components/contexts/ModalContext";
import { useUser } from "@/providers/UserProvider";
import { Formik, useFormikContext } from "formik";
import { useAgent } from "@/hooks/useAgents";
import { Button } from "@opal/components";
import { useLabels } from "@/lib/hooks";
import { PersonaLabel } from "@/app/admin/agents/interfaces";

const YOUR_ORGANIZATION_TAB = "Your Organization";
const USERS_AND_GROUPS_TAB = "Users & Groups";

// ============================================================================
// Types
// ============================================================================

interface ShareAgentFormValues {
  selectedUserIds: string[];
  selectedGroupIds: number[];
  isPublic: boolean;
  isFeatured: boolean;
  labelIds: number[];
}

// ============================================================================
// ShareAgentFormContent
// ============================================================================

interface ShareAgentFormContentProps {
  agentId?: number;
}

function ShareAgentFormContent({ agentId }: ShareAgentFormContentProps) {
  const { values, setFieldValue, handleSubmit, dirty, isSubmitting } =
    useFormikContext<ShareAgentFormValues>();
  const { data: usersData } = useShareableUsers({ includeApiKeys: true });
  const { data: groupsData } = useShareableGroups();
  const { user: currentUser, isAdmin, isCurator } = useUser();
  const { agent: fullAgent } = useAgent(agentId ?? null);
  const shareAgentModal = useModal();
  const { labels: allLabels, createLabel } = useLabels();
  const [labelInputValue, setLabelInputValue] = useState("");

  const acceptedUsers = usersData ?? [];
  const groups = groupsData ?? [];
  const canUpdateFeaturedStatus = isAdmin || isCurator;

  // Create options for InputComboBox from all accepted users and groups
  const comboBoxOptions = useMemo(() => {
    const userOptions = acceptedUsers
      .filter((user) => user.id !== currentUser?.id)
      .map((user) => ({
        value: `user-${user.id}`,
        label: user.email,
      }));

    const groupOptions = groups.map((group) => ({
      value: `group-${group.id}`,
      label: group.name,
    }));

    return [...userOptions, ...groupOptions];
  }, [acceptedUsers, groups, currentUser?.id]);

  // Compute owner and displayed users
  const ownerId = fullAgent?.owner?.id;
  const owner = ownerId
    ? acceptedUsers.find((user) => user.id === ownerId)
    : acceptedUsers.find((user) => user.id === currentUser?.id);
  const otherUsers = owner
    ? acceptedUsers.filter(
        (user) =>
          user.id !== owner.id && values.selectedUserIds.includes(user.id)
      )
    : acceptedUsers;
  const displayedUsers = [...(owner ? [owner] : []), ...otherUsers];

  // Compute displayed groups based on current form values
  const displayedGroups = groups.filter((group) =>
    values.selectedGroupIds.includes(group.id)
  );

  // Handlers
  function handleClose() {
    shareAgentModal.toggle(false);
  }

  function handleCopyLink() {
    if (!agentId) return;
    const url = `${window.location.origin}/chat?agentId=${agentId}`;
    navigator.clipboard.writeText(url);
  }

  function handleComboBoxSelect(selectedValue: string) {
    if (selectedValue.startsWith("user-")) {
      const userId = selectedValue.replace("user-", "");
      if (!values.selectedUserIds.includes(userId)) {
        setFieldValue("selectedUserIds", [...values.selectedUserIds, userId]);
      }
    } else if (selectedValue.startsWith("group-")) {
      const groupId = parseInt(selectedValue.replace("group-", ""));
      if (!values.selectedGroupIds.includes(groupId)) {
        setFieldValue("selectedGroupIds", [
          ...values.selectedGroupIds,
          groupId,
        ]);
      }
    }
  }

  function handleRemoveUser(userId: string) {
    setFieldValue(
      "selectedUserIds",
      values.selectedUserIds.filter((id) => id !== userId)
    );
  }

  function handleRemoveGroup(groupId: number) {
    setFieldValue(
      "selectedGroupIds",
      values.selectedGroupIds.filter((id) => id !== groupId)
    );
  }

  const selectedLabels: PersonaLabel[] = useMemo(() => {
    if (!allLabels) return [];
    return allLabels.filter((label) => values.labelIds.includes(label.id));
  }, [allLabels, values.labelIds]);

  function handleRemoveLabel(labelId: number) {
    setFieldValue(
      "labelIds",
      values.labelIds.filter((id) => id !== labelId)
    );
  }

  const addLabel = useCallback(
    async (name: string) => {
      const trimmed = name.trim();
      if (!trimmed) return;

      const existing = allLabels?.find(
        (l) => l.name.toLowerCase() === trimmed.toLowerCase()
      );
      if (existing) {
        if (!values.labelIds.includes(existing.id)) {
          setFieldValue("labelIds", [...values.labelIds, existing.id]);
        }
      } else {
        const newLabel = await createLabel(trimmed);
        if (newLabel) {
          setFieldValue("labelIds", [...values.labelIds, newLabel.id]);
        }
      }
      setLabelInputValue("");
    },
    [allLabels, values.labelIds, setFieldValue, createLabel]
  );

  const chipItems = useMemo(
    () =>
      selectedLabels.map((label) => ({
        id: String(label.id),
        label: label.name,
      })),
    [selectedLabels]
  );

  return (
    <Modal.Content width="sm" height="lg">
      <Modal.Header icon={SvgShare} title="Share Agent" onClose={handleClose} />

      <Modal.Body padding={0.5}>
        <Card variant="borderless" padding={0.5}>
          <Tabs
            defaultValue={
              values.isPublic ? YOUR_ORGANIZATION_TAB : USERS_AND_GROUPS_TAB
            }
          >
            <Tabs.List>
              <Tabs.Trigger icon={SvgUsers} value={USERS_AND_GROUPS_TAB}>
                {USERS_AND_GROUPS_TAB}
              </Tabs.Trigger>
              <Tabs.Trigger
                icon={SvgOrganization}
                value={YOUR_ORGANIZATION_TAB}
              >
                {YOUR_ORGANIZATION_TAB}
              </Tabs.Trigger>
            </Tabs.List>

            <Tabs.Content value={USERS_AND_GROUPS_TAB}>
              <Section gap={0.5} alignItems="start">
                <InputComboBox
                  placeholder="Add users and groups"
                  value=""
                  onChange={() => {}}
                  onValueChange={handleComboBoxSelect}
                  options={comboBoxOptions}
                  strict
                />
                {(displayedUsers.length > 0 || displayedGroups.length > 0) && (
                  <Section gap={0} alignItems="stretch">
                    {/* Shared Users */}
                    {displayedUsers.map((user) => {
                      const isOwner = fullAgent?.owner?.id === user.id;
                      const isCurrentUser = currentUser?.id === user.id;

                      return (
                        <LineItem
                          key={`user-${user.id}`}
                          icon={SvgUser}
                          description={isCurrentUser ? "You" : undefined}
                          rightChildren={
                            isOwner || (isCurrentUser && !agentId) ? (
                              // Owner will always have the agent "shared" with it.
                              // Therefore, we never render any `IconButton SvgX` to remove it.
                              //
                              // Note:
                              // This user, during creation, is assumed to be the "owner".
                              // That is why the `(isCurrentUser && !agent)` condition exists.
                              <Text secondaryBody text03>
                                Owner
                              </Text>
                            ) : (
                              // For all other cases (including for "self-unsharing"),
                              // we render an `IconButton SvgX` to remove a person from the list.
                              <Button
                                prominence="tertiary"
                                size="sm"
                                icon={SvgX}
                                onClick={() => handleRemoveUser(user.id)}
                              />
                            )
                          }
                        >
                          {user.email}
                        </LineItem>
                      );
                    })}

                    {/* Shared Groups */}
                    {displayedGroups.map((group) => (
                      <LineItem
                        key={`group-${group.id}`}
                        icon={SvgUsers}
                        rightChildren={
                          <Button
                            prominence="tertiary"
                            size="sm"
                            icon={SvgX}
                            onClick={() => handleRemoveGroup(group.id)}
                          />
                        }
                      >
                        {group.name}
                      </LineItem>
                    ))}
                  </Section>
                )}
              </Section>
              {values.isPublic && (
                <Section>
                  <Message
                    iconComponent={SvgOrganization}
                    close={false}
                    static
                    className="w-full"
                    text="This agent is public to your organization."
                    description="Everyone in your organization has access to this agent."
                  />
                </Section>
              )}
            </Tabs.Content>

            <Tabs.Content value={YOUR_ORGANIZATION_TAB} padding={0.5}>
              <Section gap={1} alignItems="stretch">
                <InputLayouts.Horizontal
                  title="Publish This Agent"
                  description="Make this agent available to everyone in your organization."
                >
                  <SwitchField name="isPublic" />
                </InputLayouts.Horizontal>

                {canUpdateFeaturedStatus && (
                  <>
                    <div className="border-t border-border-02" />

                    <InputLayouts.Horizontal
                      title="Feature This Agent"
                      description="Show this agent at the top of the explore agents list and automatically pin it to the sidebar for new users with access."
                    >
                      <SwitchField name="isFeatured" />
                    </InputLayouts.Horizontal>
                  </>
                )}

                <InputChipField
                  chips={chipItems}
                  onRemoveChip={(id) => handleRemoveLabel(Number(id))}
                  onAdd={addLabel}
                  value={labelInputValue}
                  onChange={setLabelInputValue}
                  placeholder="Add labels..."
                  icon={SvgTag}
                />
                <Text secondaryBody text04>
                  Add labels and categories to help people better discover this
                  agent.
                </Text>
              </Section>
            </Tabs.Content>
          </Tabs>
        </Card>
      </Modal.Body>

      <Modal.Footer>
        <BasicModalFooter
          left={
            agentId ? (
              <Button
                prominence="secondary"
                icon={SvgLink}
                onClick={handleCopyLink}
              >
                Copy Link
              </Button>
            ) : undefined
          }
          cancel={
            <Button
              disabled={isSubmitting}
              prominence="secondary"
              onClick={handleClose}
            >
              Cancel
            </Button>
          }
          submit={
            <Button
              disabled={!dirty || isSubmitting}
              onClick={() => handleSubmit()}
            >
              Save
            </Button>
          }
        />
      </Modal.Footer>
    </Modal.Content>
  );
}

// ============================================================================
// ShareAgentModal
// ============================================================================

export interface ShareAgentModalProps {
  agentId?: number;
  userIds: string[];
  groupIds: number[];
  isPublic: boolean;
  isFeatured: boolean;
  labelIds: number[];
  onShare?: (
    userIds: string[],
    groupIds: number[],
    isPublic: boolean,
    isFeatured: boolean,
    labelIds: number[]
  ) => Promise<void> | void;
}

export default function ShareAgentModal({
  agentId,
  userIds,
  groupIds,
  isPublic,
  isFeatured,
  labelIds,
  onShare,
}: ShareAgentModalProps) {
  const shareAgentModal = useModal();

  const initialValues = useMemo(
    (): ShareAgentFormValues => ({
      selectedUserIds: userIds,
      selectedGroupIds: groupIds,
      isPublic: isPublic,
      isFeatured: isFeatured,
      labelIds: labelIds,
    }),
    [userIds, groupIds, isPublic, isFeatured, labelIds]
  );
  const [modalInitialValues, setModalInitialValues] =
    useState<ShareAgentFormValues>(initialValues);
  const wasOpenRef = useRef(false);

  useEffect(() => {
    // Capture fresh props exactly when the modal opens, then keep them stable
    // while open so in-flight parent updates don't reset form state.
    if (shareAgentModal.isOpen && !wasOpenRef.current) {
      setModalInitialValues(initialValues);
    }
    wasOpenRef.current = shareAgentModal.isOpen;
  }, [shareAgentModal.isOpen, initialValues]);

  async function handleSubmit(values: ShareAgentFormValues) {
    await onShare?.(
      values.selectedUserIds,
      values.selectedGroupIds,
      values.isPublic,
      values.isFeatured,
      values.labelIds
    );
  }

  return (
    <Modal open={shareAgentModal.isOpen} onOpenChange={shareAgentModal.toggle}>
      <Formik
        initialValues={modalInitialValues}
        onSubmit={handleSubmit}
        enableReinitialize
      >
        <ShareAgentFormContent agentId={agentId} />
      </Formik>
    </Modal>
  );
}


================================================
FILE: web/src/sections/modals/ShareChatSessionModal.tsx
================================================
"use client";

import { useState } from "react";
import { cn } from "@/lib/utils";
import { ChatSession, ChatSessionSharedStatus } from "@/app/app/interfaces";
import { toast } from "@/hooks/useToast";
import { useChatSessionStore } from "@/app/app/stores/useChatSessionStore";
import { copyAll } from "@/app/app/message/copyingUtils";
import { Section } from "@/layouts/general-layouts";
import Modal from "@/refresh-components/Modal";
import { Button } from "@opal/components";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import Text from "@/refresh-components/texts/Text";
import { SvgLink, SvgShare, SvgUsers } from "@opal/icons";
import SvgCheck from "@opal/icons/check";
import SvgLock from "@opal/icons/lock";

import type { IconProps } from "@opal/types";
import useChatSessions from "@/hooks/useChatSessions";

function buildShareLink(chatSessionId: string) {
  const baseUrl = `${window.location.protocol}//${window.location.host}`;
  return `${baseUrl}/app/shared/${chatSessionId}`;
}

async function generateShareLink(chatSessionId: string) {
  const response = await fetch(`/api/chat/chat-session/${chatSessionId}`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ sharing_status: "public" }),
  });

  if (response.ok) {
    return buildShareLink(chatSessionId);
  }
  return null;
}

async function deleteShareLink(chatSessionId: string) {
  const response = await fetch(`/api/chat/chat-session/${chatSessionId}`, {
    method: "PATCH",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ sharing_status: "private" }),
  });

  return response.ok;
}

interface PrivacyOptionProps {
  icon: React.FunctionComponent<IconProps>;
  title: string;
  description: string;
  selected: boolean;
  onClick: () => void;
  ariaLabel?: string;
}

function PrivacyOption({
  icon: Icon,
  title,
  description,
  selected,
  onClick,
  ariaLabel,
}: PrivacyOptionProps) {
  return (
    <div
      className={cn(
        "p-1.5 rounded-08 cursor-pointer ",
        selected ? "bg-background-tint-00" : "bg-transparent",
        "hover:bg-background-tint-02"
      )}
      onClick={onClick}
      aria-label={ariaLabel}
    >
      <div className="flex flex-row gap-1 items-center">
        <div className="flex w-5 p-[2px] self-stretch justify-center">
          <Icon
            size={16}
            className={cn(selected ? "stroke-text-05" : "stroke-text-03")}
          />
        </div>
        <div className="flex flex-col flex-1 px-0.5">
          <Text mainUiBody text05={selected} text03={!selected}>
            {title}
          </Text>
          <Text secondaryBody text03>
            {description}
          </Text>
        </div>
        {selected && (
          <div className="flex w-5 self-stretch justify-center">
            <SvgCheck size={16} className="stroke-action-link-05" />
          </div>
        )}
      </div>
    </div>
  );
}

interface ShareChatSessionModalProps {
  chatSession: ChatSession;
  onClose: () => void;
}

export default function ShareChatSessionModal({
  chatSession,
  onClose,
}: ShareChatSessionModalProps) {
  const isCurrentlyPublic =
    chatSession.shared_status === ChatSessionSharedStatus.Public;

  const [selectedPrivacy, setSelectedPrivacy] = useState<"private" | "public">(
    isCurrentlyPublic ? "public" : "private"
  );
  const [shareLink, setShareLink] = useState<string>(
    isCurrentlyPublic ? buildShareLink(chatSession.id) : ""
  );
  const [isLoading, setIsLoading] = useState(false);
  const updateCurrentChatSessionSharedStatus = useChatSessionStore(
    (state) => state.updateCurrentChatSessionSharedStatus
  );
  const { refreshChatSessions } = useChatSessions();

  const wantsPublic = selectedPrivacy === "public";

  const isShared = shareLink && selectedPrivacy === "public";

  let submitButtonText = "Done";
  if (wantsPublic && !isCurrentlyPublic && !shareLink) {
    submitButtonText = "Create Share Link";
  } else if (!wantsPublic && isCurrentlyPublic) {
    submitButtonText = "Make Private";
  } else if (isShared) {
    submitButtonText = "Copy Link";
  }

  async function handleSubmit() {
    setIsLoading(true);
    try {
      if (wantsPublic && !isCurrentlyPublic && !shareLink) {
        const link = await generateShareLink(chatSession.id);
        if (link) {
          setShareLink(link);
          updateCurrentChatSessionSharedStatus(ChatSessionSharedStatus.Public);
          await refreshChatSessions();
          copyAll(link);
          toast.success("Share link copied to clipboard!");
        } else {
          toast.error("Failed to generate share link");
        }
      } else if (!wantsPublic && isCurrentlyPublic) {
        const success = await deleteShareLink(chatSession.id);
        if (success) {
          setShareLink("");
          updateCurrentChatSessionSharedStatus(ChatSessionSharedStatus.Private);
          await refreshChatSessions();
          toast.success("Chat is now private");
          onClose();
        } else {
          toast.error("Failed to make chat private");
        }
      } else if (wantsPublic && shareLink) {
        copyAll(shareLink);
        toast.success("Share link copied to clipboard!");
      } else {
        onClose();
      }
    } catch (e) {
      console.error(e);
      toast.error("An error occurred");
    } finally {
      setIsLoading(false);
    }
  }

  return (
    <Modal open onOpenChange={(isOpen) => !isOpen && onClose()}>
      <Modal.Content width="sm">
        <Modal.Header
          icon={SvgShare}
          title={isShared ? "Chat shared" : "Share this chat"}
          description="All existing and future messages in this chat will be shared."
          onClose={onClose}
        />
        <Modal.Body twoTone>
          <Section
            justifyContent="start"
            alignItems="stretch"
            height="auto"
            gap={0.12}
          >
            <PrivacyOption
              icon={SvgLock}
              title="Private"
              description="Only you have access to this chat."
              selected={selectedPrivacy === "private"}
              onClick={() => setSelectedPrivacy("private")}
              ariaLabel="share-modal-option-private"
            />
            <PrivacyOption
              icon={SvgUsers}
              title="Your Organization"
              description="Anyone in your organization can view this chat."
              selected={selectedPrivacy === "public"}
              onClick={() => setSelectedPrivacy("public")}
              ariaLabel="share-modal-option-public"
            />
          </Section>

          {isShared && (
            <InputTypeIn
              aria-label="share-modal-link-input"
              readOnly
              value={shareLink}
              rightSection={
                <CopyIconButton
                  getCopyText={() => shareLink}
                  tooltip="Copy link"
                  size="sm"
                  aria-label="share-modal-copy-link"
                />
              }
            />
          )}
        </Modal.Body>
        <Modal.Footer>
          {!isShared && (
            <Button
              prominence="secondary"
              onClick={onClose}
              aria-label="share-modal-cancel"
            >
              Cancel
            </Button>
          )}
          <Button
            disabled={isLoading}
            onClick={handleSubmit}
            icon={isShared ? SvgLink : undefined}
            width={isShared ? "full" : undefined}
            aria-label="share-modal-submit"
          >
            {submitButtonText}
          </Button>
        </Modal.Footer>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/AnthropicModal.tsx
================================================
"use client";

import { useState } from "react";
import { useSWRConfig } from "swr";
import { Formik } from "formik";
import { LLMProviderFormProps } from "@/interfaces/llm";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  APIKeyField,
  ModelsField,
  DisplayNameField,
  ModelsAccessField,
  FieldSeparator,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";

const ANTHROPIC_PROVIDER_NAME = "anthropic";
const DEFAULT_DEFAULT_MODEL_NAME = "claude-sonnet-4-5";

export default function AnthropicModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const isOnboarding = variant === "onboarding";
  const [isTesting, setIsTesting] = useState(false);
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } = useWellKnownLLMProvider(
    ANTHROPIC_PROVIDER_NAME
  );

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const modelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  const initialValues = isOnboarding
    ? {
        ...buildOnboardingInitialValues(),
        name: ANTHROPIC_PROVIDER_NAME,
        provider: ANTHROPIC_PROVIDER_NAME,
        api_key: "",
        default_model_name: DEFAULT_DEFAULT_MODEL_NAME,
      }
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        api_key: existingLlmProvider?.api_key ?? "",
        api_base: existingLlmProvider?.api_base ?? undefined,
        default_model_name:
          (defaultModelName &&
          modelConfigurations.some((m) => m.name === defaultModelName)
            ? defaultModelName
            : undefined) ??
          wellKnownLLMProvider?.recommended_default_model?.name ??
          DEFAULT_DEFAULT_MODEL_NAME,
        is_auto_mode: existingLlmProvider?.is_auto_mode ?? true,
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        api_key: Yup.string().required("API Key is required"),
        default_model_name: Yup.string().required("Model name is required"),
      })
    : buildDefaultValidationSchema().shape({
        api_key: Yup.string().required("API Key is required"),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            (wellKnownLLMProvider ?? llmDescriptor)?.known_models ?? [];

          await submitOnboardingProvider({
            providerName: ANTHROPIC_PROVIDER_NAME,
            payload: {
              ...values,
              model_configurations: modelConfigsToUse,
              is_auto_mode:
                values.default_model_name === DEFAULT_DEFAULT_MODEL_NAME,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: ANTHROPIC_PROVIDER_NAME,
            values,
            initialValues,
            modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <LLMConfigurationModalWrapper
          providerEndpoint={ANTHROPIC_PROVIDER_NAME}
          existingProviderName={existingLlmProvider?.name}
          onClose={onClose}
          isFormValid={formikProps.isValid}
          isDirty={formikProps.dirty}
          isTesting={isTesting}
          isSubmitting={formikProps.isSubmitting}
        >
          <APIKeyField providerName="Anthropic" />

          {!isOnboarding && (
            <>
              <FieldSeparator />
              <DisplayNameField disabled={!!existingLlmProvider} />
            </>
          )}

          <FieldSeparator />
          {isOnboarding ? (
            <SingleDefaultModelField placeholder="E.g. claude-sonnet-4-5" />
          ) : (
            <ModelsField
              modelConfigurations={modelConfigurations}
              formikProps={formikProps}
              recommendedDefaultModel={
                wellKnownLLMProvider?.recommended_default_model ?? null
              }
              shouldShowAutoUpdateToggle={true}
            />
          )}

          {!isOnboarding && (
            <>
              <FieldSeparator />
              <ModelsAccessField formikProps={formikProps} />
            </>
          )}
        </LLMConfigurationModalWrapper>
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/AzureModal.tsx
================================================
"use client";

import { useState } from "react";
import { useSWRConfig } from "swr";
import { Formik } from "formik";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import * as InputLayouts from "@/layouts/input-layouts";
import {
  LLMProviderFormProps,
  LLMProviderView,
  ModelConfiguration,
} from "@/interfaces/llm";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
  BaseLLMFormValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  APIKeyField,
  DisplayNameField,
  FieldSeparator,
  FieldWrapper,
  ModelsAccessField,
  ModelsField,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";
import {
  isValidAzureTargetUri,
  parseAzureTargetUri,
} from "@/lib/azureTargetUri";
import { toast } from "@/hooks/useToast";

const AZURE_PROVIDER_NAME = "azure";

interface AzureModalValues extends BaseLLMFormValues {
  api_key: string;
  target_uri: string;
  api_base?: string;
  api_version?: string;
  deployment_name?: string;
}

function buildTargetUri(existingLlmProvider?: LLMProviderView): string {
  if (!existingLlmProvider?.api_base || !existingLlmProvider?.api_version) {
    return "";
  }

  const deploymentName =
    existingLlmProvider.deployment_name || "your-deployment";
  return `${existingLlmProvider.api_base}/openai/deployments/${deploymentName}/chat/completions?api-version=${existingLlmProvider.api_version}`;
}

const processValues = (values: AzureModalValues): AzureModalValues => {
  let processedValues = { ...values };
  if (values.target_uri) {
    try {
      const { url, apiVersion, deploymentName } = parseAzureTargetUri(
        values.target_uri
      );
      processedValues = {
        ...processedValues,
        api_base: url.origin,
        api_version: apiVersion,
        deployment_name: deploymentName || processedValues.deployment_name,
      };
    } catch {
      toast.warning("Failed to parse target URI — using original values.");
    }
  }
  return processedValues;
};

export default function AzureModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const isOnboarding = variant === "onboarding";
  const [isTesting, setIsTesting] = useState(false);
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } = useWellKnownLLMProvider(AZURE_PROVIDER_NAME);

  const [addedModels, setAddedModels] = useState<ModelConfiguration[]>([]);

  if (open === false) return null;

  const onClose = () => {
    setAddedModels([]);
    onOpenChange?.(false);
  };

  const baseModelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  // Merge base models with any user-added models (dedup by name)
  const existingNames = new Set(baseModelConfigurations.map((m) => m.name));
  const modelConfigurations = [
    ...baseModelConfigurations,
    ...addedModels.filter((m) => !existingNames.has(m.name)),
  ];

  const initialValues: AzureModalValues = isOnboarding
    ? ({
        ...buildOnboardingInitialValues(),
        name: AZURE_PROVIDER_NAME,
        provider: AZURE_PROVIDER_NAME,
        api_key: "",
        target_uri: "",
        default_model_name: "",
      } as AzureModalValues)
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        api_key: existingLlmProvider?.api_key ?? "",
        target_uri: buildTargetUri(existingLlmProvider),
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        api_key: Yup.string().required("API Key is required"),
        target_uri: Yup.string()
          .required("Target URI is required")
          .test(
            "valid-target-uri",
            "Target URI must be a valid URL with api-version query parameter and either a deployment name in the path or /openai/responses",
            (value) => (value ? isValidAzureTargetUri(value) : false)
          ),
        default_model_name: Yup.string().required("Model name is required"),
      })
    : buildDefaultValidationSchema().shape({
        api_key: Yup.string().required("API Key is required"),
        target_uri: Yup.string()
          .required("Target URI is required")
          .test(
            "valid-target-uri",
            "Target URI must be a valid URL with api-version query parameter and either a deployment name in the path or /openai/responses",
            (value) => (value ? isValidAzureTargetUri(value) : false)
          ),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        const processedValues = processValues(values);

        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            (wellKnownLLMProvider ?? llmDescriptor)?.known_models ?? [];

          await submitOnboardingProvider({
            providerName: AZURE_PROVIDER_NAME,
            payload: {
              ...processedValues,
              model_configurations: modelConfigsToUse,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: AZURE_PROVIDER_NAME,
            values: processedValues,
            initialValues,
            modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <LLMConfigurationModalWrapper
          providerEndpoint={AZURE_PROVIDER_NAME}
          existingProviderName={existingLlmProvider?.name}
          onClose={onClose}
          isFormValid={formikProps.isValid}
          isDirty={formikProps.dirty}
          isTesting={isTesting}
          isSubmitting={formikProps.isSubmitting}
        >
          <FieldWrapper>
            <InputLayouts.Vertical
              name="target_uri"
              title="Target URI"
              subDescription="Paste your endpoint target URI from Azure OpenAI (including API endpoint base, deployment name, and API version)."
            >
              <InputTypeInField
                name="target_uri"
                placeholder="https://your-resource.cognitiveservices.azure.com/openai/deployments/deployment-name/chat/completions?api-version=2025-01-01-preview"
              />
            </InputLayouts.Vertical>
          </FieldWrapper>

          <APIKeyField providerName="Azure" />

          {!isOnboarding && (
            <>
              <FieldSeparator />
              <DisplayNameField disabled={!!existingLlmProvider} />
            </>
          )}

          <FieldSeparator />

          {isOnboarding ? (
            <SingleDefaultModelField placeholder="E.g. gpt-4o" />
          ) : (
            <ModelsField
              modelConfigurations={modelConfigurations}
              formikProps={formikProps}
              recommendedDefaultModel={null}
              shouldShowAutoUpdateToggle={false}
              onAddModel={(modelName) => {
                const newModel: ModelConfiguration = {
                  name: modelName,
                  is_visible: true,
                  max_input_tokens: null,
                  supports_image_input: false,
                  supports_reasoning: false,
                };
                setAddedModels((prev) => [...prev, newModel]);
                const currentSelected =
                  formikProps.values.selected_model_names ?? [];
                formikProps.setFieldValue("selected_model_names", [
                  ...currentSelected,
                  modelName,
                ]);
                if (!formikProps.values.default_model_name) {
                  formikProps.setFieldValue("default_model_name", modelName);
                }
              }}
            />
          )}

          {!isOnboarding && (
            <>
              <FieldSeparator />
              <ModelsAccessField formikProps={formikProps} />
            </>
          )}
        </LLMConfigurationModalWrapper>
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/BedrockModal.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { useSWRConfig } from "swr";
import { Formik, FormikProps } from "formik";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import InputSelectField from "@/refresh-components/form/InputSelectField";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import * as InputLayouts from "@/layouts/input-layouts";
import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
import {
  LLMProviderFormProps,
  LLMProviderView,
  ModelConfiguration,
} from "@/interfaces/llm";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
  BaseLLMFormValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  ModelsField,
  DisplayNameField,
  FieldSeparator,
  FieldWrapper,
  ModelsAccessField,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";
import { fetchBedrockModels } from "@/app/admin/configuration/llm/utils";
import { Card } from "@opal/components";
import { Section } from "@/layouts/general-layouts";
import { SvgAlertCircle } from "@opal/icons";
import { Content } from "@opal/layouts";
import { toast } from "@/hooks/useToast";
import useOnMount from "@/hooks/useOnMount";

const BEDROCK_PROVIDER_NAME = "bedrock";
const AWS_REGION_OPTIONS = [
  { name: "us-east-1", value: "us-east-1" },
  { name: "us-east-2", value: "us-east-2" },
  { name: "us-west-2", value: "us-west-2" },
  { name: "us-gov-east-1", value: "us-gov-east-1" },
  { name: "us-gov-west-1", value: "us-gov-west-1" },
  { name: "ap-northeast-1", value: "ap-northeast-1" },
  { name: "ap-south-1", value: "ap-south-1" },
  { name: "ap-southeast-1", value: "ap-southeast-1" },
  { name: "ap-southeast-2", value: "ap-southeast-2" },
  { name: "ap-east-1", value: "ap-east-1" },
  { name: "ca-central-1", value: "ca-central-1" },
  { name: "eu-central-1", value: "eu-central-1" },
  { name: "eu-west-2", value: "eu-west-2" },
];
const AUTH_METHOD_IAM = "iam";
const AUTH_METHOD_ACCESS_KEY = "access_key";
const AUTH_METHOD_LONG_TERM_API_KEY = "long_term_api_key";
const FIELD_AWS_REGION_NAME = "custom_config.AWS_REGION_NAME";
const FIELD_BEDROCK_AUTH_METHOD = "custom_config.BEDROCK_AUTH_METHOD";
const FIELD_AWS_ACCESS_KEY_ID = "custom_config.AWS_ACCESS_KEY_ID";
const FIELD_AWS_SECRET_ACCESS_KEY = "custom_config.AWS_SECRET_ACCESS_KEY";
const FIELD_AWS_BEARER_TOKEN_BEDROCK = "custom_config.AWS_BEARER_TOKEN_BEDROCK";

interface BedrockModalValues extends BaseLLMFormValues {
  custom_config: {
    AWS_REGION_NAME: string;
    BEDROCK_AUTH_METHOD?: string;
    AWS_ACCESS_KEY_ID?: string;
    AWS_SECRET_ACCESS_KEY?: string;
    AWS_BEARER_TOKEN_BEDROCK?: string;
  };
}

interface BedrockModalInternalsProps {
  formikProps: FormikProps<BedrockModalValues>;
  existingLlmProvider: LLMProviderView | undefined;
  fetchedModels: ModelConfiguration[];
  setFetchedModels: (models: ModelConfiguration[]) => void;
  modelConfigurations: ModelConfiguration[];
  isTesting: boolean;
  onClose: () => void;
  isOnboarding: boolean;
}

function BedrockModalInternals({
  formikProps,
  existingLlmProvider,
  fetchedModels,
  setFetchedModels,
  modelConfigurations,
  isTesting,
  onClose,
  isOnboarding,
}: BedrockModalInternalsProps) {
  const authMethod = formikProps.values.custom_config?.BEDROCK_AUTH_METHOD;

  useEffect(() => {
    if (authMethod === AUTH_METHOD_IAM) {
      formikProps.setFieldValue(FIELD_AWS_ACCESS_KEY_ID, "");
      formikProps.setFieldValue(FIELD_AWS_SECRET_ACCESS_KEY, "");
      formikProps.setFieldValue(FIELD_AWS_BEARER_TOKEN_BEDROCK, "");
    } else if (authMethod === AUTH_METHOD_ACCESS_KEY) {
      formikProps.setFieldValue(FIELD_AWS_BEARER_TOKEN_BEDROCK, "");
    } else if (authMethod === AUTH_METHOD_LONG_TERM_API_KEY) {
      formikProps.setFieldValue(FIELD_AWS_ACCESS_KEY_ID, "");
      formikProps.setFieldValue(FIELD_AWS_SECRET_ACCESS_KEY, "");
    }
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [authMethod]);

  const currentModels =
    fetchedModels.length > 0
      ? fetchedModels
      : existingLlmProvider?.model_configurations || modelConfigurations;

  const isAuthComplete =
    authMethod === AUTH_METHOD_IAM ||
    (authMethod === AUTH_METHOD_ACCESS_KEY &&
      formikProps.values.custom_config?.AWS_ACCESS_KEY_ID &&
      formikProps.values.custom_config?.AWS_SECRET_ACCESS_KEY) ||
    (authMethod === AUTH_METHOD_LONG_TERM_API_KEY &&
      formikProps.values.custom_config?.AWS_BEARER_TOKEN_BEDROCK);

  const isFetchDisabled =
    !formikProps.values.custom_config?.AWS_REGION_NAME || !isAuthComplete;

  const handleFetchModels = async () => {
    const { models, error } = await fetchBedrockModels({
      aws_region_name: formikProps.values.custom_config?.AWS_REGION_NAME ?? "",
      aws_access_key_id: formikProps.values.custom_config?.AWS_ACCESS_KEY_ID,
      aws_secret_access_key:
        formikProps.values.custom_config?.AWS_SECRET_ACCESS_KEY,
      aws_bearer_token_bedrock:
        formikProps.values.custom_config?.AWS_BEARER_TOKEN_BEDROCK,
      provider_name: existingLlmProvider?.name,
    });
    if (error) {
      throw new Error(error);
    }
    setFetchedModels(models);
  };

  // Auto-fetch models on initial load when editing an existing provider
  useOnMount(() => {
    if (existingLlmProvider && !isFetchDisabled) {
      handleFetchModels().catch((err) => {
        toast.error(
          err instanceof Error ? err.message : "Failed to fetch models"
        );
      });
    }
  });

  return (
    <LLMConfigurationModalWrapper
      providerEndpoint={BEDROCK_PROVIDER_NAME}
      existingProviderName={existingLlmProvider?.name}
      onClose={onClose}
      isFormValid={formikProps.isValid}
      isDirty={formikProps.dirty}
      isTesting={isTesting}
      isSubmitting={formikProps.isSubmitting}
    >
      <FieldWrapper>
        <Section gap={1}>
          <InputLayouts.Vertical
            name={FIELD_AWS_REGION_NAME}
            title="AWS Region"
            subDescription="Region where your Amazon Bedrock models are hosted."
          >
            <InputSelectField name={FIELD_AWS_REGION_NAME}>
              <InputSelect.Trigger placeholder="Select a region" />
              <InputSelect.Content>
                {AWS_REGION_OPTIONS.map((option) => (
                  <InputSelect.Item key={option.value} value={option.value}>
                    {option.name}
                  </InputSelect.Item>
                ))}
              </InputSelect.Content>
            </InputSelectField>
          </InputLayouts.Vertical>

          <InputLayouts.Vertical
            name={FIELD_BEDROCK_AUTH_METHOD}
            title="Authentication Method"
            subDescription="Choose how Onyx should authenticate with Bedrock."
          >
            <InputSelect
              value={authMethod || AUTH_METHOD_ACCESS_KEY}
              onValueChange={(value) =>
                formikProps.setFieldValue(FIELD_BEDROCK_AUTH_METHOD, value)
              }
            >
              <InputSelect.Trigger defaultValue={AUTH_METHOD_IAM} />
              <InputSelect.Content>
                <InputSelect.Item
                  value={AUTH_METHOD_IAM}
                  description="Recommended for AWS environments"
                >
                  Environment IAM Role
                </InputSelect.Item>
                <InputSelect.Item
                  value={AUTH_METHOD_ACCESS_KEY}
                  description="For non-AWS environments"
                >
                  Access Key
                </InputSelect.Item>
                <InputSelect.Item
                  value={AUTH_METHOD_LONG_TERM_API_KEY}
                  description="For non-AWS environments"
                >
                  Long-term API Key
                </InputSelect.Item>
              </InputSelect.Content>
            </InputSelect>
          </InputLayouts.Vertical>
        </Section>
      </FieldWrapper>

      {authMethod === AUTH_METHOD_ACCESS_KEY && (
        <Card background="light" border="none" padding="sm">
          <Section gap={1}>
            <InputLayouts.Vertical
              name={FIELD_AWS_ACCESS_KEY_ID}
              title="AWS Access Key ID"
            >
              <InputTypeInField
                name={FIELD_AWS_ACCESS_KEY_ID}
                placeholder="AKIAIOSFODNN7EXAMPLE"
              />
            </InputLayouts.Vertical>
            <InputLayouts.Vertical
              name={FIELD_AWS_SECRET_ACCESS_KEY}
              title="AWS Secret Access Key"
            >
              <PasswordInputTypeInField
                name={FIELD_AWS_SECRET_ACCESS_KEY}
                placeholder="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
              />
            </InputLayouts.Vertical>
          </Section>
        </Card>
      )}

      {authMethod === AUTH_METHOD_IAM && (
        <FieldWrapper>
          <Card background="none" border="solid" padding="sm">
            <Content
              icon={SvgAlertCircle}
              title="Onyx will use the IAM role attached to the environment it’s running in to authenticate."
              variant="body"
              sizePreset="main-ui"
            />
          </Card>
        </FieldWrapper>
      )}

      {authMethod === AUTH_METHOD_LONG_TERM_API_KEY && (
        <Card background="light" border="none" padding="sm">
          <Section gap={0.5}>
            <InputLayouts.Vertical
              name={FIELD_AWS_BEARER_TOKEN_BEDROCK}
              title="Long-term API Key"
            >
              <PasswordInputTypeInField
                name={FIELD_AWS_BEARER_TOKEN_BEDROCK}
                placeholder="Your long-term API key"
              />
            </InputLayouts.Vertical>
          </Section>
        </Card>
      )}

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <DisplayNameField disabled={!!existingLlmProvider} />
        </>
      )}

      <FieldSeparator />

      {isOnboarding ? (
        <SingleDefaultModelField placeholder="E.g. us.anthropic.claude-sonnet-4-5-v1" />
      ) : (
        <ModelsField
          modelConfigurations={currentModels}
          formikProps={formikProps}
          recommendedDefaultModel={null}
          shouldShowAutoUpdateToggle={false}
          onRefetch={isFetchDisabled ? undefined : handleFetchModels}
        />
      )}

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <ModelsAccessField formikProps={formikProps} />
        </>
      )}
    </LLMConfigurationModalWrapper>
  );
}

export default function BedrockModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);
  const [isTesting, setIsTesting] = useState(false);
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } = useWellKnownLLMProvider(
    BEDROCK_PROVIDER_NAME
  );

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const modelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  const initialValues: BedrockModalValues = isOnboarding
    ? ({
        ...buildOnboardingInitialValues(),
        name: BEDROCK_PROVIDER_NAME,
        provider: BEDROCK_PROVIDER_NAME,
        default_model_name: "",
        custom_config: {
          AWS_REGION_NAME: "",
          BEDROCK_AUTH_METHOD: "access_key",
          AWS_ACCESS_KEY_ID: "",
          AWS_SECRET_ACCESS_KEY: "",
          AWS_BEARER_TOKEN_BEDROCK: "",
        },
      } as BedrockModalValues)
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        custom_config: {
          AWS_REGION_NAME:
            (existingLlmProvider?.custom_config?.AWS_REGION_NAME as string) ??
            "",
          BEDROCK_AUTH_METHOD:
            (existingLlmProvider?.custom_config
              ?.BEDROCK_AUTH_METHOD as string) ?? "access_key",
          AWS_ACCESS_KEY_ID:
            (existingLlmProvider?.custom_config?.AWS_ACCESS_KEY_ID as string) ??
            "",
          AWS_SECRET_ACCESS_KEY:
            (existingLlmProvider?.custom_config
              ?.AWS_SECRET_ACCESS_KEY as string) ?? "",
          AWS_BEARER_TOKEN_BEDROCK:
            (existingLlmProvider?.custom_config
              ?.AWS_BEARER_TOKEN_BEDROCK as string) ?? "",
        },
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        default_model_name: Yup.string().required("Model name is required"),
        custom_config: Yup.object({
          AWS_REGION_NAME: Yup.string().required("AWS Region is required"),
        }),
      })
    : buildDefaultValidationSchema().shape({
        custom_config: Yup.object({
          AWS_REGION_NAME: Yup.string().required("AWS Region is required"),
        }),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        const filteredCustomConfig = Object.fromEntries(
          Object.entries(values.custom_config || {}).filter(([, v]) => v !== "")
        );

        const submitValues = {
          ...values,
          custom_config:
            Object.keys(filteredCustomConfig).length > 0
              ? filteredCustomConfig
              : undefined,
        };

        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            fetchedModels.length > 0 ? fetchedModels : [];

          await submitOnboardingProvider({
            providerName: BEDROCK_PROVIDER_NAME,
            payload: {
              ...submitValues,
              model_configurations: modelConfigsToUse,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: BEDROCK_PROVIDER_NAME,
            values: submitValues,
            initialValues,
            modelConfigurations:
              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <BedrockModalInternals
          formikProps={formikProps}
          existingLlmProvider={existingLlmProvider}
          fetchedModels={fetchedModels}
          setFetchedModels={setFetchedModels}
          modelConfigurations={modelConfigurations}
          isTesting={isTesting}
          onClose={onClose}
          isOnboarding={isOnboarding}
        />
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/BifrostModal.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { markdown } from "@opal/utils";
import { useSWRConfig } from "swr";
import { Formik, FormikProps } from "formik";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
import * as InputLayouts from "@/layouts/input-layouts";
import {
  LLMProviderFormProps,
  LLMProviderName,
  LLMProviderView,
  ModelConfiguration,
} from "@/interfaces/llm";
import { fetchBifrostModels } from "@/app/admin/configuration/llm/utils";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
  BaseLLMFormValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  ModelsField,
  DisplayNameField,
  ModelsAccessField,
  FieldSeparator,
  FieldWrapper,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";
import { toast } from "@/hooks/useToast";

const BIFROST_PROVIDER_NAME = LLMProviderName.BIFROST;
const DEFAULT_API_BASE = "";

interface BifrostModalValues extends BaseLLMFormValues {
  api_key: string;
  api_base: string;
}

interface BifrostModalInternalsProps {
  formikProps: FormikProps<BifrostModalValues>;
  existingLlmProvider: LLMProviderView | undefined;
  fetchedModels: ModelConfiguration[];
  setFetchedModels: (models: ModelConfiguration[]) => void;
  modelConfigurations: ModelConfiguration[];
  isTesting: boolean;
  onClose: () => void;
  isOnboarding: boolean;
}

function BifrostModalInternals({
  formikProps,
  existingLlmProvider,
  fetchedModels,
  setFetchedModels,
  modelConfigurations,
  isTesting,
  onClose,
  isOnboarding,
}: BifrostModalInternalsProps) {
  const currentModels =
    fetchedModels.length > 0
      ? fetchedModels
      : existingLlmProvider?.model_configurations || modelConfigurations;

  const isFetchDisabled = !formikProps.values.api_base;

  const handleFetchModels = async () => {
    const { models, error } = await fetchBifrostModels({
      api_base: formikProps.values.api_base,
      api_key: formikProps.values.api_key || undefined,
      provider_name: existingLlmProvider?.name,
    });
    if (error) {
      throw new Error(error);
    }
    setFetchedModels(models);
  };

  // Auto-fetch models on initial load when editing an existing provider
  useEffect(() => {
    if (existingLlmProvider && !isFetchDisabled) {
      handleFetchModels().catch((err) => {
        console.error("Failed to fetch Bifrost models:", err);
        toast.error(
          err instanceof Error ? err.message : "Failed to fetch models"
        );
      });
    }
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, []);

  return (
    <LLMConfigurationModalWrapper
      providerEndpoint={LLMProviderName.BIFROST}
      existingProviderName={existingLlmProvider?.name}
      onClose={onClose}
      isFormValid={formikProps.isValid}
      isDirty={formikProps.dirty}
      isTesting={isTesting}
      isSubmitting={formikProps.isSubmitting}
    >
      <FieldWrapper>
        <InputLayouts.Vertical
          name="api_base"
          title="API Base URL"
          subDescription="Paste your Bifrost gateway endpoint URL (including API version)."
        >
          <InputTypeInField
            name="api_base"
            placeholder="https://your-bifrost-gateway.com/v1"
          />
        </InputLayouts.Vertical>
      </FieldWrapper>

      <FieldWrapper>
        <InputLayouts.Vertical
          name="api_key"
          title="API Key"
          suffix="optional"
          subDescription={markdown(
            "Paste your API key from [Bifrost](https://docs.getbifrost.ai/overview) to access your models."
          )}
        >
          <PasswordInputTypeInField name="api_key" placeholder="API Key" />
        </InputLayouts.Vertical>
      </FieldWrapper>

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <DisplayNameField disabled={!!existingLlmProvider} />
        </>
      )}

      <FieldSeparator />

      {isOnboarding ? (
        <SingleDefaultModelField placeholder="E.g. anthropic/claude-sonnet-4-6" />
      ) : (
        <ModelsField
          modelConfigurations={currentModels}
          formikProps={formikProps}
          recommendedDefaultModel={null}
          shouldShowAutoUpdateToggle={false}
          onRefetch={isFetchDisabled ? undefined : handleFetchModels}
        />
      )}

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <ModelsAccessField formikProps={formikProps} />
        </>
      )}
    </LLMConfigurationModalWrapper>
  );
}

export default function BifrostModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);
  const [isTesting, setIsTesting] = useState(false);
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } = useWellKnownLLMProvider(
    BIFROST_PROVIDER_NAME
  );

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const modelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  const initialValues: BifrostModalValues = isOnboarding
    ? ({
        ...buildOnboardingInitialValues(),
        name: BIFROST_PROVIDER_NAME,
        provider: BIFROST_PROVIDER_NAME,
        api_key: "",
        api_base: DEFAULT_API_BASE,
        default_model_name: "",
      } as BifrostModalValues)
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        api_key: existingLlmProvider?.api_key ?? "",
        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        api_base: Yup.string().required("API Base URL is required"),
        default_model_name: Yup.string().required("Model name is required"),
      })
    : buildDefaultValidationSchema().shape({
        api_base: Yup.string().required("API Base URL is required"),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            fetchedModels.length > 0 ? fetchedModels : [];

          await submitOnboardingProvider({
            providerName: BIFROST_PROVIDER_NAME,
            payload: {
              ...values,
              model_configurations: modelConfigsToUse,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: BIFROST_PROVIDER_NAME,
            values,
            initialValues,
            modelConfigurations:
              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <BifrostModalInternals
          formikProps={formikProps}
          existingLlmProvider={existingLlmProvider}
          fetchedModels={fetchedModels}
          setFetchedModels={setFetchedModels}
          modelConfigurations={modelConfigurations}
          isTesting={isTesting}
          onClose={onClose}
          isOnboarding={isOnboarding}
        />
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/CustomModal.test.tsx
================================================
/**
 * Integration Test: Custom LLM Provider Configuration Workflow
 *
 * Tests the complete user journey for configuring a custom LLM provider.
 * This tests the full workflow: open modal → form fill → test config → save → set as default
 */

import { render, screen, setupUser, waitFor } from "@tests/setup/test-utils";
import CustomModal from "@/sections/modals/llmConfig/CustomModal";
import { toast } from "@/hooks/useToast";

// Mock SWR's mutate function and useSWR
const mockMutate = jest.fn();
jest.mock("swr", () => {
  const actual = jest.requireActual("swr");
  return {
    ...actual,
    useSWRConfig: () => ({ mutate: mockMutate }),
    __esModule: true,
    default: () => ({ data: undefined, error: undefined, isLoading: false }),
  };
});

// Mock toast
jest.mock("@/hooks/useToast", () => {
  const success = jest.fn();
  const error = jest.fn();
  const toastFn = Object.assign(jest.fn(), {
    success,
    error,
    info: jest.fn(),
    warning: jest.fn(),
    dismiss: jest.fn(),
    clearAll: jest.fn(),
    _markLeaving: jest.fn(),
  });
  return {
    toast: toastFn,
    useToast: () => ({
      toast: toastFn,
      dismiss: toastFn.dismiss,
      clearAll: toastFn.clearAll,
    }),
  };
});

// Mock usePaidEnterpriseFeaturesEnabled
jest.mock("@/components/settings/usePaidEnterpriseFeaturesEnabled", () => ({
  usePaidEnterpriseFeaturesEnabled: () => false,
}));

describe("Custom LLM Provider Configuration Workflow", () => {
  let fetchSpy: jest.SpyInstance;

  beforeEach(() => {
    jest.clearAllMocks();
    fetchSpy = jest.spyOn(global, "fetch");
  });

  afterEach(() => {
    fetchSpy.mockRestore();
  });

  async function fillBasicFields(
    user: ReturnType<typeof setupUser>,
    options: {
      name: string;
      provider: string;
      modelName: string;
    }
  ) {
    const nameInput = screen.getByPlaceholderText("Display Name");
    const providerInput = screen.getByPlaceholderText("Provider Name");

    await user.type(nameInput, options.name);
    await user.type(providerInput, options.provider);

    // Fill in model name (first model row)
    const modelNameInput = screen.getByPlaceholderText("Model name");
    await user.type(modelNameInput, options.modelName);
  }

  test("creates a new custom LLM provider successfully", async () => {
    const user = setupUser();

    // Mock POST /api/admin/llm/test
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    // Mock PUT /api/admin/llm/provider?is_creation=true
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({
        id: 1,
        name: "My Custom Provider",
        provider: "openai",
      }),
    } as Response);

    render(<CustomModal open={true} onOpenChange={() => {}} />);

    await fillBasicFields(user, {
      name: "My Custom Provider",
      provider: "openai",
      modelName: "gpt-4",
    });

    // Submit the form
    const submitButton = screen.getByRole("button", { name: /connect/i });
    await user.click(submitButton);

    // Verify test API was called first
    await waitFor(() => {
      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/admin/llm/test",
        expect.objectContaining({
          method: "POST",
          headers: { "Content-Type": "application/json" },
        })
      );
    });

    // Verify create API was called
    await waitFor(() => {
      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/admin/llm/provider?is_creation=true",
        expect.objectContaining({
          method: "PUT",
          headers: { "Content-Type": "application/json" },
        })
      );
    });

    // Verify success toast
    await waitFor(() => {
      expect(toast.success).toHaveBeenCalledWith(
        "Provider enabled successfully!"
      );
    });

    // Verify SWR cache was invalidated
    expect(mockMutate).toHaveBeenCalledWith("/api/admin/llm/provider");
    expect(mockMutate).toHaveBeenCalledWith("/api/llm/provider");

    const personaProvidersMutateCall = mockMutate.mock.calls.find(
      ([key]) => typeof key === "function"
    );
    expect(personaProvidersMutateCall).toBeDefined();

    const personaProviderFilter = personaProvidersMutateCall?.[0] as (
      key: unknown
    ) => boolean;
    expect(personaProviderFilter("/api/llm/persona/42/providers")).toBe(true);
    expect(personaProviderFilter("/api/llm/provider")).toBe(false);
  });

  test("shows error when test configuration fails", async () => {
    const user = setupUser();

    // Mock POST /api/admin/llm/test (failure)
    fetchSpy.mockResolvedValueOnce({
      ok: false,
      status: 400,
      json: async () => ({ detail: "Invalid API key" }),
    } as Response);

    render(<CustomModal open={true} onOpenChange={() => {}} />);

    await fillBasicFields(user, {
      name: "Bad Provider",
      provider: "openai",
      modelName: "gpt-4",
    });

    // Submit the form
    const submitButton = screen.getByRole("button", { name: /connect/i });
    await user.click(submitButton);

    // Verify test API was called
    await waitFor(() => {
      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/admin/llm/test",
        expect.objectContaining({
          method: "POST",
        })
      );
    });

    // Verify error toast is displayed with the API error message
    await waitFor(() => {
      expect(toast.error).toHaveBeenCalledWith("Invalid API key");
    });

    // Verify create API was NOT called
    expect(
      fetchSpy.mock.calls.find((call) =>
        call[0].includes("/api/admin/llm/provider")
      )
    ).toBeUndefined();
  });

  test("updates an existing LLM provider", async () => {
    const user = setupUser();

    const existingProvider = {
      id: 1,
      name: "Existing Provider",
      provider: "anthropic",
      api_key: "old-key",
      api_base: "",
      api_version: "",
      model_configurations: [
        {
          name: "claude-3-opus",
          display_name: "",
          is_visible: true,
          max_input_tokens: null,
          supports_image_input: false,
          supports_reasoning: false,
        },
      ],
      custom_config: {},
      is_public: true,
      is_auto_mode: false,
      groups: [],
      personas: [],
      deployment_name: null,
    };

    // Mock POST /api/admin/llm/test
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    // Mock PUT /api/admin/llm/provider (update, no is_creation param)
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({ ...existingProvider }),
    } as Response);

    render(
      <CustomModal
        existingLlmProvider={existingProvider}
        open={true}
        onOpenChange={() => {}}
      />
    );

    // Make a change to dirty the form (Update is disabled until dirty)
    const modelInputs = screen.getAllByPlaceholderText("Model name");
    await user.type(modelInputs[0]!, "-updated");

    // Submit — button says "Update" for existing providers
    const submitButton = screen.getByRole("button", { name: /update/i });
    await user.click(submitButton);

    // Verify test was called
    await waitFor(() => {
      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/admin/llm/test",
        expect.any(Object)
      );
    });

    // Verify update API was called (without is_creation param)
    await waitFor(() => {
      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/admin/llm/provider",
        expect.objectContaining({
          method: "PUT",
        })
      );
    });

    // Verify success message says "updated"
    await waitFor(() => {
      expect(toast.success).toHaveBeenCalledWith(
        "Provider updated successfully!"
      );
    });
  });

  test("preserves additional models when updating a provider", async () => {
    const user = setupUser();

    const existingProvider = {
      id: 7,
      name: "ArcAI",
      provider: "openai",
      api_key: "old-key",
      api_base: "https://example-openai-compatible.local/v1",
      api_version: "",
      model_configurations: [
        {
          name: "gpt-oss-20b-bw-failover",
          display_name: "",
          is_visible: true,
          max_input_tokens: null,
          supports_image_input: false,
          supports_reasoning: false,
        },
      ],
      custom_config: {},
      is_public: true,
      is_auto_mode: false,
      groups: [],
      personas: [],
      deployment_name: null,
    };

    // Mock POST /api/admin/llm/test
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    // Mock PUT /api/admin/llm/provider
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({
        ...existingProvider,
        model_configurations: [
          ...existingProvider.model_configurations,
          {
            name: "nemotron",
            display_name: "",
            is_visible: true,
            max_input_tokens: null,
            supports_image_input: false,
            supports_reasoning: false,
          },
        ],
      }),
    } as Response);

    render(
      <CustomModal
        existingLlmProvider={existingProvider}
        open={true}
        onOpenChange={() => {}}
      />
    );

    // Add a new model
    const addModelButton = screen.getByRole("button", { name: /add model/i });
    await user.click(addModelButton);

    // Fill in second model name
    const modelInputs = screen.getAllByPlaceholderText("Model name");
    await user.type(modelInputs[1]!, "nemotron");

    const submitButton = screen.getByRole("button", { name: /update/i });
    await user.click(submitButton);

    await waitFor(() => {
      expect(fetchSpy).toHaveBeenCalledWith(
        "/api/admin/llm/provider",
        expect.objectContaining({
          method: "PUT",
        })
      );
    });

    const updateCall = fetchSpy.mock.calls.find(
      (call) =>
        call[0] === "/api/admin/llm/provider" &&
        call[1]?.method?.toUpperCase() === "PUT"
    );
    expect(updateCall).toBeDefined();

    const requestBody = JSON.parse(updateCall![1].body as string);
    expect(requestBody.model_configurations).toHaveLength(2);
    expect(requestBody.model_configurations).toEqual(
      expect.arrayContaining([
        expect.objectContaining({ name: "gpt-oss-20b-bw-failover" }),
        expect.objectContaining({ name: "nemotron" }),
      ])
    );
  });

  test("sets provider as default when shouldMarkAsDefault is true", async () => {
    const user = setupUser();

    // Mock POST /api/admin/llm/test
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    // Mock PUT /api/admin/llm/provider?is_creation=true
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({
        id: 5,
        name: "New Default Provider",
        provider: "openai",
      }),
    } as Response);

    // Mock POST /api/admin/llm/default
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    render(
      <CustomModal
        shouldMarkAsDefault={true}
        open={true}
        onOpenChange={() => {}}
      />
    );

    await fillBasicFields(user, {
      name: "New Default Provider",
      provider: "openai",
      modelName: "gpt-4",
    });

    // Submit
    const submitButton = screen.getByRole("button", { name: /connect/i });
    await user.click(submitButton);

    // Verify set as default API was called with correct endpoint and body
    await waitFor(() => {
      const defaultCall = fetchSpy.mock.calls.find(
        ([url]) => url === "/api/admin/llm/default"
      );
      expect(defaultCall).toBeDefined();

      const [, options] = defaultCall!;
      expect(options.method).toBe("POST");
      expect(options.headers).toEqual({ "Content-Type": "application/json" });

      const body = JSON.parse(options.body);
      expect(body.provider_id).toBe(5);
      expect(body).toHaveProperty("model_name");
    });
  });

  test("shows error when provider creation fails", async () => {
    const user = setupUser();

    // Mock POST /api/admin/llm/test
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    // Mock PUT /api/admin/llm/provider?is_creation=true (failure)
    fetchSpy.mockResolvedValueOnce({
      ok: false,
      status: 500,
      json: async () => ({ detail: "Database error" }),
    } as Response);

    render(<CustomModal open={true} onOpenChange={() => {}} />);

    await fillBasicFields(user, {
      name: "Test Provider",
      provider: "openai",
      modelName: "gpt-4",
    });

    // Submit
    const submitButton = screen.getByRole("button", { name: /connect/i });
    await user.click(submitButton);

    // Verify error toast
    await waitFor(() => {
      expect(toast.error).toHaveBeenCalledWith(
        "Failed to enable provider: Database error"
      );
    });
  });

  test("adds custom configuration key-value pairs", async () => {
    const user = setupUser();

    // Mock POST /api/admin/llm/test
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    // Mock PUT /api/admin/llm/provider?is_creation=true
    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({ id: 1, name: "Provider with Custom Config" }),
    } as Response);

    render(<CustomModal open={true} onOpenChange={() => {}} />);

    // Fill basic fields
    const nameInput = screen.getByPlaceholderText("Display Name");
    await user.type(nameInput, "Cloudflare Provider");

    const providerInput = screen.getByPlaceholderText("Provider Name");
    await user.type(providerInput, "cloudflare");

    // Click "Add Line" button for custom config (aria-label from KeyValueInput)
    const addLineButton = screen.getByRole("button", {
      name: /add key and value pair/i,
    });
    await user.click(addLineButton);

    // Fill in custom config key-value pair
    const keyInputs = screen.getAllByPlaceholderText("Key");
    const valueInputs = screen.getAllByPlaceholderText("Value");

    await user.type(keyInputs[0]!, "CLOUDFLARE_ACCOUNT_ID");
    await user.type(valueInputs[0]!, "my-account-id-123");

    // Fill in model name
    const modelNameInput = screen.getByPlaceholderText("Model name");
    await user.type(modelNameInput, "@cf/meta/llama-2-7b-chat-int8");

    // Submit
    const submitButton = screen.getByRole("button", { name: /connect/i });
    await user.click(submitButton);

    // Verify the custom config was included in the request
    await waitFor(() => {
      const createCall = fetchSpy.mock.calls.find((call) =>
        call[0].includes("/api/admin/llm/provider")
      );
      expect(createCall).toBeDefined();

      const requestBody = JSON.parse(createCall![1].body);
      expect(requestBody.custom_config).toEqual({
        CLOUDFLARE_ACCOUNT_ID: "my-account-id-123",
      });
    });
  });
});


================================================
FILE: web/src/sections/modals/llmConfig/CustomModal.tsx
================================================
"use client";

import { useState } from "react";
import { useSWRConfig } from "swr";
import { Formik, FormikProps } from "formik";
import { LLMProviderFormProps, ModelConfiguration } from "@/interfaces/llm";
import * as Yup from "yup";
import {
  buildDefaultInitialValues,
  buildOnboardingInitialValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  DisplayNameField,
  FieldSeparator,
  ModelsAccessField,
  LLMConfigurationModalWrapper,
  FieldWrapper,
} from "@/sections/modals/llmConfig/shared";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import * as InputLayouts from "@/layouts/input-layouts";
import KeyValueInput, {
  KeyValue,
} from "@/refresh-components/inputs/InputKeyValue";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import Text from "@/refresh-components/texts/Text";
import { Button, Card, EmptyMessageCard } from "@opal/components";
import { SvgMinusCircle, SvgPlusCircle } from "@opal/icons";
import { toast } from "@/hooks/useToast";
import { Content } from "@opal/layouts";
import { Section } from "@/layouts/general-layouts";

// ─── Model Configuration List ─────────────────────────────────────────────────

const MODEL_GRID_COLS = "grid-cols-[2fr_2fr_minmax(10rem,1fr)_1fr_2.25rem]";

type CustomModelConfiguration = Pick<
  ModelConfiguration,
  "name" | "max_input_tokens" | "supports_image_input"
> & {
  display_name: string;
};

interface ModelConfigurationItemProps {
  model: CustomModelConfiguration;
  onChange: (next: CustomModelConfiguration) => void;
  onRemove: () => void;
  canRemove: boolean;
}

function ModelConfigurationItem({
  model,
  onChange,
  onRemove,
  canRemove,
}: ModelConfigurationItemProps) {
  return (
    <>
      <InputTypeIn
        placeholder="Model name"
        value={model.name}
        onChange={(e) => onChange({ ...model, name: e.target.value })}
        showClearButton={false}
      />
      <InputTypeIn
        placeholder="Display name"
        value={model.display_name}
        onChange={(e) => onChange({ ...model, display_name: e.target.value })}
        showClearButton={false}
      />
      <InputSelect
        value={model.supports_image_input ? "text-image" : "text-only"}
        onValueChange={(value) =>
          onChange({ ...model, supports_image_input: value === "text-image" })
        }
      >
        <InputSelect.Trigger placeholder="Input type" />
        <InputSelect.Content>
          <InputSelect.Item value="text-only">Text Only</InputSelect.Item>
          <InputSelect.Item value="text-image">Text & Image</InputSelect.Item>
        </InputSelect.Content>
      </InputSelect>
      <InputTypeIn
        placeholder="Default"
        value={model.max_input_tokens?.toString() ?? ""}
        onChange={(e) =>
          onChange({
            ...model,
            max_input_tokens:
              e.target.value === "" ? null : Number(e.target.value),
          })
        }
        showClearButton={false}
        type="number"
      />
      <Button
        disabled={!canRemove}
        prominence="tertiary"
        icon={SvgMinusCircle}
        onClick={onRemove}
      />
    </>
  );
}

interface ModelConfigurationListProps {
  formikProps: FormikProps<{
    model_configurations: CustomModelConfiguration[];
  }>;
}

function ModelConfigurationList({ formikProps }: ModelConfigurationListProps) {
  const models = formikProps.values.model_configurations;

  function handleChange(index: number, next: CustomModelConfiguration) {
    const updated = [...models];
    updated[index] = next;
    formikProps.setFieldValue("model_configurations", updated);
  }

  function handleRemove(index: number) {
    formikProps.setFieldValue(
      "model_configurations",
      models.filter((_, i) => i !== index)
    );
  }

  function handleAdd() {
    formikProps.setFieldValue("model_configurations", [
      ...models,
      {
        name: "",
        display_name: "",
        max_input_tokens: null,
        supports_image_input: false,
      },
    ]);
  }

  return (
    <div className="w-full flex flex-col gap-y-2">
      {models.length > 0 ? (
        <div className={`grid items-center gap-1 ${MODEL_GRID_COLS}`}>
          <div className="pb-1">
            <Text mainUiAction>Model Name</Text>
          </div>
          <Text mainUiAction>Display Name</Text>
          <Text mainUiAction>Input Type</Text>
          <Text mainUiAction>Max Tokens</Text>
          <div aria-hidden />

          {models.map((model, index) => (
            <ModelConfigurationItem
              key={index}
              model={model}
              onChange={(next) => handleChange(index, next)}
              onRemove={() => handleRemove(index)}
              canRemove={models.length > 1}
            />
          ))}
        </div>
      ) : (
        <EmptyMessageCard title="No models added yet." padding="sm" />
      )}

      <Button
        prominence="secondary"
        icon={SvgPlusCircle}
        onClick={handleAdd}
        type="button"
      >
        Add Model
      </Button>
    </div>
  );
}

// ─── Custom Config Processing ─────────────────────────────────────────────────

function customConfigProcessing(items: KeyValue[]) {
  const customConfig: { [key: string]: string } = {};
  items.forEach(({ key, value }) => {
    customConfig[key] = value;
  });
  return customConfig;
}

export default function CustomModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
}: LLMProviderFormProps) {
  const isOnboarding = variant === "onboarding";
  const [isTesting, setIsTesting] = useState(false);
  const { mutate } = useSWRConfig();

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const initialValues = {
    ...buildDefaultInitialValues(
      existingLlmProvider,
      undefined,
      defaultModelName
    ),
    ...(isOnboarding ? buildOnboardingInitialValues() : {}),
    provider: existingLlmProvider?.provider ?? "",
    model_configurations: existingLlmProvider?.model_configurations.map(
      (mc) => ({
        name: mc.name,
        display_name: mc.display_name ?? "",
        max_input_tokens: mc.max_input_tokens ?? null,
        supports_image_input: mc.supports_image_input,
      })
    ) ?? [
      {
        name: "",
        display_name: "",
        max_input_tokens: null,
        supports_image_input: false,
      },
    ],
    custom_config_list: existingLlmProvider?.custom_config
      ? Object.entries(existingLlmProvider.custom_config).map(
          ([key, value]) => ({ key, value: String(value) })
        )
      : [],
  };

  const modelConfigurationSchema = Yup.object({
    name: Yup.string().required("Model name is required"),
    max_input_tokens: Yup.number()
      .transform((value, originalValue) =>
        originalValue === "" || originalValue === undefined ? null : value
      )
      .nullable()
      .optional(),
  });

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        provider: Yup.string().required("Provider Name is required"),
        model_configurations: Yup.array(modelConfigurationSchema),
      })
    : Yup.object().shape({
        name: Yup.string().required("Display Name is required"),
        provider: Yup.string().required("Provider Name is required"),
        model_configurations: Yup.array(modelConfigurationSchema),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        setSubmitting(true);

        const modelConfigurations = values.model_configurations
          .filter((mc) => mc.name.trim() !== "")
          .map((mc) => ({
            name: mc.name,
            display_name: mc.display_name || undefined,
            is_visible: true,
            max_input_tokens: mc.max_input_tokens ?? null,
            supports_image_input: mc.supports_image_input,
            supports_reasoning: false,
          }));

        if (modelConfigurations.length === 0) {
          toast.error("At least one model name is required");
          setSubmitting(false);
          return;
        }

        if (isOnboarding && onboardingState && onboardingActions) {
          await submitOnboardingProvider({
            providerName: values.provider,
            payload: {
              ...values,
              model_configurations: modelConfigurations,
              custom_config: customConfigProcessing(values.custom_config_list),
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: true,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          const selectedModelNames = modelConfigurations.map(
            (config) => config.name
          );

          await submitLLMProvider({
            providerName: values.provider,
            values: {
              ...values,
              selected_model_names: selectedModelNames,
              custom_config: customConfigProcessing(values.custom_config_list),
            },
            initialValues: {
              ...initialValues,
              custom_config: customConfigProcessing(
                initialValues.custom_config_list
              ),
            },
            modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <LLMConfigurationModalWrapper
          providerEndpoint="custom"
          existingProviderName={existingLlmProvider?.name}
          onClose={onClose}
          isFormValid={formikProps.isValid}
          isDirty={formikProps.dirty}
          isTesting={isTesting}
          isSubmitting={formikProps.isSubmitting}
        >
          {!isOnboarding && (
            <Section gap={0}>
              <DisplayNameField disabled={!!existingLlmProvider} />

              <FieldWrapper>
                <InputLayouts.Vertical
                  name="provider"
                  title="Provider Name"
                  subDescription="Should be one of the providers listed at https://docs.litellm.ai/docs/providers."
                >
                  <InputTypeInField
                    name="provider"
                    placeholder="Provider Name"
                    variant={existingLlmProvider ? "disabled" : undefined}
                  />
                </InputLayouts.Vertical>
              </FieldWrapper>
            </Section>
          )}

          <FieldSeparator />

          <FieldWrapper>
            <Section gap={0.75}>
              <Content
                title="Provider Configs"
                description="Add properties as needed by the model provider. This is passed to LiteLLM completion() call as arguments in the environment variable. See LiteLLM documentation for more instructions."
                widthVariant="full"
                variant="section"
                sizePreset="main-content"
              />

              <KeyValueInput
                items={formikProps.values.custom_config_list}
                onChange={(items) =>
                  formikProps.setFieldValue("custom_config_list", items)
                }
                addButtonLabel="Add Line"
              />
            </Section>
          </FieldWrapper>

          <FieldSeparator />

          <Section gap={0.5}>
            <FieldWrapper>
              <Content
                title="Models"
                description="List LLM models you wish to use and their configurations for this provider. See full list of models at LiteLLM."
                variant="section"
                sizePreset="main-content"
                widthVariant="full"
              />
            </FieldWrapper>

            <Card padding="sm">
              <ModelConfigurationList formikProps={formikProps as any} />
            </Card>
          </Section>

          {!isOnboarding && (
            <>
              <FieldSeparator />
              <ModelsAccessField formikProps={formikProps} />
            </>
          )}
        </LLMConfigurationModalWrapper>
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/LMStudioForm.tsx
================================================
"use client";

import { useCallback, useEffect, useMemo, useState } from "react";
import { useSWRConfig } from "swr";
import { Formik, FormikProps } from "formik";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import * as InputLayouts from "@/layouts/input-layouts";
import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
import {
  LLMProviderFormProps,
  LLMProviderName,
  LLMProviderView,
  ModelConfiguration,
} from "@/interfaces/llm";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
  BaseLLMFormValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  ModelsField,
  DisplayNameField,
  ModelsAccessField,
  FieldSeparator,
  FieldWrapper,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";
import { fetchModels } from "@/app/admin/configuration/llm/utils";
import debounce from "lodash/debounce";
import { toast } from "@/hooks/useToast";

const DEFAULT_API_BASE = "http://localhost:1234";

interface LMStudioFormValues extends BaseLLMFormValues {
  api_base: string;
  custom_config: {
    LM_STUDIO_API_KEY?: string;
  };
}

interface LMStudioFormInternalsProps {
  formikProps: FormikProps<LMStudioFormValues>;
  existingLlmProvider: LLMProviderView | undefined;
  fetchedModels: ModelConfiguration[];
  setFetchedModels: (models: ModelConfiguration[]) => void;
  isTesting: boolean;
  onClose: () => void;
  isOnboarding: boolean;
}

function LMStudioFormInternals({
  formikProps,
  existingLlmProvider,
  fetchedModels,
  setFetchedModels,
  isTesting,
  onClose,
  isOnboarding,
}: LMStudioFormInternalsProps) {
  const initialApiKey =
    (existingLlmProvider?.custom_config?.LM_STUDIO_API_KEY as string) ?? "";

  const doFetchModels = useCallback(
    (apiBase: string, apiKey: string | undefined, signal: AbortSignal) => {
      fetchModels(
        LLMProviderName.LM_STUDIO,
        {
          api_base: apiBase,
          custom_config: apiKey ? { LM_STUDIO_API_KEY: apiKey } : {},
          api_key_changed: apiKey !== initialApiKey,
          name: existingLlmProvider?.name,
        },
        signal
      ).then((data) => {
        if (signal.aborted) return;
        if (data.error) {
          toast.error(data.error);
          setFetchedModels([]);
          return;
        }
        setFetchedModels(data.models);
      });
    },
    [existingLlmProvider?.name, initialApiKey, setFetchedModels]
  );

  const debouncedFetchModels = useMemo(
    () => debounce(doFetchModels, 500),
    [doFetchModels]
  );

  const apiBase = formikProps.values.api_base;
  const apiKey = formikProps.values.custom_config?.LM_STUDIO_API_KEY;

  useEffect(() => {
    if (apiBase) {
      const controller = new AbortController();
      debouncedFetchModels(apiBase, apiKey, controller.signal);
      return () => {
        debouncedFetchModels.cancel();
        controller.abort();
      };
    } else {
      setFetchedModels([]);
    }
  }, [apiBase, apiKey, debouncedFetchModels, setFetchedModels]);

  const currentModels =
    fetchedModels.length > 0
      ? fetchedModels
      : existingLlmProvider?.model_configurations || [];

  return (
    <LLMConfigurationModalWrapper
      providerEndpoint={LLMProviderName.LM_STUDIO}
      existingProviderName={existingLlmProvider?.name}
      onClose={onClose}
      isFormValid={formikProps.isValid}
      isDirty={formikProps.dirty}
      isTesting={isTesting}
      isSubmitting={formikProps.isSubmitting}
    >
      <FieldWrapper>
        <InputLayouts.Vertical
          name="api_base"
          title="API Base URL"
          subDescription="The base URL for your LM Studio server."
        >
          <InputTypeInField
            name="api_base"
            placeholder="Your LM Studio API base URL"
          />
        </InputLayouts.Vertical>
      </FieldWrapper>

      <FieldWrapper>
        <InputLayouts.Vertical
          name="custom_config.LM_STUDIO_API_KEY"
          title="API Key"
          subDescription="Optional API key if your LM Studio server requires authentication."
          suffix="optional"
        >
          <PasswordInputTypeInField
            name="custom_config.LM_STUDIO_API_KEY"
            placeholder="API Key"
          />
        </InputLayouts.Vertical>
      </FieldWrapper>

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <DisplayNameField disabled={!!existingLlmProvider} />
        </>
      )}

      <FieldSeparator />

      {isOnboarding ? (
        <SingleDefaultModelField placeholder="E.g. llama3.1" />
      ) : (
        <ModelsField
          modelConfigurations={currentModels}
          formikProps={formikProps}
          recommendedDefaultModel={null}
          shouldShowAutoUpdateToggle={false}
        />
      )}

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <ModelsAccessField formikProps={formikProps} />
        </>
      )}
    </LLMConfigurationModalWrapper>
  );
}

export default function LMStudioForm({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);
  const [isTesting, setIsTesting] = useState(false);
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } = useWellKnownLLMProvider(
    LLMProviderName.LM_STUDIO
  );

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const modelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  const initialValues: LMStudioFormValues = isOnboarding
    ? ({
        ...buildOnboardingInitialValues(),
        name: LLMProviderName.LM_STUDIO,
        provider: LLMProviderName.LM_STUDIO,
        api_base: DEFAULT_API_BASE,
        default_model_name: "",
        custom_config: {
          LM_STUDIO_API_KEY: "",
        },
      } as LMStudioFormValues)
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
        custom_config: {
          LM_STUDIO_API_KEY:
            (existingLlmProvider?.custom_config?.LM_STUDIO_API_KEY as string) ??
            "",
        },
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        api_base: Yup.string().required("API Base URL is required"),
        default_model_name: Yup.string().required("Model name is required"),
      })
    : buildDefaultValidationSchema().shape({
        api_base: Yup.string().required("API Base URL is required"),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        const filteredCustomConfig = Object.fromEntries(
          Object.entries(values.custom_config || {}).filter(([, v]) => v !== "")
        );

        const submitValues = {
          ...values,
          custom_config:
            Object.keys(filteredCustomConfig).length > 0
              ? filteredCustomConfig
              : undefined,
        };

        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            fetchedModels.length > 0 ? fetchedModels : [];

          await submitOnboardingProvider({
            providerName: LLMProviderName.LM_STUDIO,
            payload: {
              ...submitValues,
              model_configurations: modelConfigsToUse,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: LLMProviderName.LM_STUDIO,
            values: submitValues,
            initialValues,
            modelConfigurations:
              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <LMStudioFormInternals
          formikProps={formikProps}
          existingLlmProvider={existingLlmProvider}
          fetchedModels={fetchedModels}
          setFetchedModels={setFetchedModels}
          isTesting={isTesting}
          onClose={onClose}
          isOnboarding={isOnboarding}
        />
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/LiteLLMProxyModal.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { useSWRConfig } from "swr";
import { Formik, FormikProps } from "formik";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import * as InputLayouts from "@/layouts/input-layouts";
import {
  LLMProviderFormProps,
  LLMProviderName,
  LLMProviderView,
  ModelConfiguration,
} from "@/interfaces/llm";
import { fetchLiteLLMProxyModels } from "@/app/admin/configuration/llm/utils";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
  BaseLLMFormValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  APIKeyField,
  ModelsField,
  DisplayNameField,
  ModelsAccessField,
  FieldSeparator,
  FieldWrapper,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";
import { toast } from "@/hooks/useToast";

const DEFAULT_API_BASE = "http://localhost:4000";

interface LiteLLMProxyModalValues extends BaseLLMFormValues {
  api_key: string;
  api_base: string;
}

interface LiteLLMProxyModalInternalsProps {
  formikProps: FormikProps<LiteLLMProxyModalValues>;
  existingLlmProvider: LLMProviderView | undefined;
  fetchedModels: ModelConfiguration[];
  setFetchedModels: (models: ModelConfiguration[]) => void;
  modelConfigurations: ModelConfiguration[];
  isTesting: boolean;
  onClose: () => void;
  isOnboarding: boolean;
}

function LiteLLMProxyModalInternals({
  formikProps,
  existingLlmProvider,
  fetchedModels,
  setFetchedModels,
  modelConfigurations,
  isTesting,
  onClose,
  isOnboarding,
}: LiteLLMProxyModalInternalsProps) {
  const currentModels =
    fetchedModels.length > 0
      ? fetchedModels
      : existingLlmProvider?.model_configurations || modelConfigurations;

  const isFetchDisabled =
    !formikProps.values.api_base || !formikProps.values.api_key;

  const handleFetchModels = async () => {
    const { models, error } = await fetchLiteLLMProxyModels({
      api_base: formikProps.values.api_base,
      api_key: formikProps.values.api_key,
      provider_name: existingLlmProvider?.name,
    });
    if (error) {
      throw new Error(error);
    }
    setFetchedModels(models);
  };

  // Auto-fetch models on initial load when editing an existing provider
  useEffect(() => {
    if (existingLlmProvider && !isFetchDisabled) {
      handleFetchModels().catch((err) => {
        toast.error(
          err instanceof Error ? err.message : "Failed to fetch models"
        );
      });
    }
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, []);

  return (
    <LLMConfigurationModalWrapper
      providerEndpoint={LLMProviderName.LITELLM_PROXY}
      existingProviderName={existingLlmProvider?.name}
      onClose={onClose}
      isFormValid={formikProps.isValid}
      isDirty={formikProps.dirty}
      isTesting={isTesting}
      isSubmitting={formikProps.isSubmitting}
    >
      <FieldWrapper>
        <InputLayouts.Vertical
          name="api_base"
          title="API Base URL"
          subDescription="The base URL for your LiteLLM Proxy server."
        >
          <InputTypeInField
            name="api_base"
            placeholder="https://your-litellm-proxy.com"
          />
        </InputLayouts.Vertical>
      </FieldWrapper>

      <APIKeyField providerName="LiteLLM Proxy" />

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <DisplayNameField disabled={!!existingLlmProvider} />
        </>
      )}

      <FieldSeparator />

      {isOnboarding ? (
        <SingleDefaultModelField placeholder="E.g. gpt-4o" />
      ) : (
        <ModelsField
          modelConfigurations={currentModels}
          formikProps={formikProps}
          recommendedDefaultModel={null}
          shouldShowAutoUpdateToggle={false}
          onRefetch={isFetchDisabled ? undefined : handleFetchModels}
        />
      )}

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <ModelsAccessField formikProps={formikProps} />
        </>
      )}
    </LLMConfigurationModalWrapper>
  );
}

export default function LiteLLMProxyModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);
  const [isTesting, setIsTesting] = useState(false);
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } = useWellKnownLLMProvider(
    LLMProviderName.LITELLM_PROXY
  );

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const modelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  const initialValues: LiteLLMProxyModalValues = isOnboarding
    ? ({
        ...buildOnboardingInitialValues(),
        name: LLMProviderName.LITELLM_PROXY,
        provider: LLMProviderName.LITELLM_PROXY,
        api_key: "",
        api_base: DEFAULT_API_BASE,
        default_model_name: "",
      } as LiteLLMProxyModalValues)
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        api_key: existingLlmProvider?.api_key ?? "",
        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        api_key: Yup.string().required("API Key is required"),
        api_base: Yup.string().required("API Base URL is required"),
        default_model_name: Yup.string().required("Model name is required"),
      })
    : buildDefaultValidationSchema().shape({
        api_key: Yup.string().required("API Key is required"),
        api_base: Yup.string().required("API Base URL is required"),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            fetchedModels.length > 0 ? fetchedModels : [];

          await submitOnboardingProvider({
            providerName: LLMProviderName.LITELLM_PROXY,
            payload: {
              ...values,
              model_configurations: modelConfigsToUse,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: LLMProviderName.LITELLM_PROXY,
            values,
            initialValues,
            modelConfigurations:
              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <LiteLLMProxyModalInternals
          formikProps={formikProps}
          existingLlmProvider={existingLlmProvider}
          fetchedModels={fetchedModels}
          setFetchedModels={setFetchedModels}
          modelConfigurations={modelConfigurations}
          isTesting={isTesting}
          onClose={onClose}
          isOnboarding={isOnboarding}
        />
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/OllamaModal.tsx
================================================
"use client";

import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { useSWRConfig } from "swr";
import { Formik, FormikProps } from "formik";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import * as InputLayouts from "@/layouts/input-layouts";
import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
import {
  LLMProviderFormProps,
  LLMProviderView,
  ModelConfiguration,
} from "@/interfaces/llm";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
  BaseLLMFormValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  ModelsField,
  DisplayNameField,
  ModelsAccessField,
  FieldSeparator,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";
import { fetchOllamaModels } from "@/app/admin/configuration/llm/utils";
import debounce from "lodash/debounce";
import Tabs from "@/refresh-components/Tabs";
import { Card } from "@opal/components";
import { toast } from "@/hooks/useToast";

const OLLAMA_PROVIDER_NAME = "ollama_chat";
const DEFAULT_API_BASE = "http://127.0.0.1:11434";
const TAB_SELF_HOSTED = "self-hosted";
const TAB_CLOUD = "cloud";

interface OllamaModalValues extends BaseLLMFormValues {
  api_base: string;
  custom_config: {
    OLLAMA_API_KEY?: string;
  };
}

interface OllamaModalInternalsProps {
  formikProps: FormikProps<OllamaModalValues>;
  existingLlmProvider: LLMProviderView | undefined;
  fetchedModels: ModelConfiguration[];
  setFetchedModels: (models: ModelConfiguration[]) => void;
  isTesting: boolean;
  onClose: () => void;
  isOnboarding: boolean;
}

function OllamaModalInternals({
  formikProps,
  existingLlmProvider,
  fetchedModels,
  setFetchedModels,
  isTesting,
  onClose,
  isOnboarding,
}: OllamaModalInternalsProps) {
  const isInitialMount = useRef(true);

  const doFetchModels = useCallback(
    (apiBase: string, signal: AbortSignal) => {
      fetchOllamaModels({
        api_base: apiBase,
        provider_name: existingLlmProvider?.name,
        signal,
      }).then((data) => {
        if (signal.aborted) return;
        if (data.error) {
          toast.error(data.error);
          setFetchedModels([]);
          return;
        }
        setFetchedModels(data.models);
      });
    },
    [existingLlmProvider?.name, setFetchedModels]
  );

  const debouncedFetchModels = useMemo(
    () => debounce(doFetchModels, 500),
    [doFetchModels]
  );

  // Skip the initial fetch for new providers — api_base starts with a default
  // value, which would otherwise trigger a fetch before the user has done
  // anything. Existing providers should still auto-fetch on mount.
  useEffect(() => {
    if (isInitialMount.current) {
      isInitialMount.current = false;
      if (!existingLlmProvider) return;
    }

    if (formikProps.values.api_base) {
      const controller = new AbortController();
      debouncedFetchModels(formikProps.values.api_base, controller.signal);
      return () => {
        debouncedFetchModels.cancel();
        controller.abort();
      };
    } else {
      setFetchedModels([]);
    }
  }, [
    formikProps.values.api_base,
    debouncedFetchModels,
    setFetchedModels,
    existingLlmProvider,
  ]);

  const currentModels =
    fetchedModels.length > 0
      ? fetchedModels
      : existingLlmProvider?.model_configurations || [];

  const hasApiKey = !!formikProps.values.custom_config?.OLLAMA_API_KEY;
  const defaultTab =
    existingLlmProvider && hasApiKey ? TAB_CLOUD : TAB_SELF_HOSTED;

  return (
    <LLMConfigurationModalWrapper
      providerEndpoint={OLLAMA_PROVIDER_NAME}
      existingProviderName={existingLlmProvider?.name}
      onClose={onClose}
      isFormValid={formikProps.isValid}
      isDirty={formikProps.dirty}
      isTesting={isTesting}
      isSubmitting={formikProps.isSubmitting}
    >
      <Card background="light" border="none" padding="sm">
        <Tabs defaultValue={defaultTab}>
          <Tabs.List>
            <Tabs.Trigger value={TAB_SELF_HOSTED}>
              Self-hosted Ollama
            </Tabs.Trigger>
            <Tabs.Trigger value={TAB_CLOUD}>Ollama Cloud</Tabs.Trigger>
          </Tabs.List>
          <Tabs.Content value={TAB_SELF_HOSTED}>
            <InputLayouts.Vertical
              name="api_base"
              title="API Base URL"
              subDescription="The base URL for your Ollama instance."
            >
              <InputTypeInField
                name="api_base"
                placeholder="Your Ollama API base URL"
              />
            </InputLayouts.Vertical>
          </Tabs.Content>

          <Tabs.Content value={TAB_CLOUD}>
            <InputLayouts.Vertical
              name="custom_config.OLLAMA_API_KEY"
              title="API Key"
              subDescription="Your Ollama Cloud API key."
            >
              <PasswordInputTypeInField
                name="custom_config.OLLAMA_API_KEY"
                placeholder="API Key"
              />
            </InputLayouts.Vertical>
          </Tabs.Content>
        </Tabs>
      </Card>

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <DisplayNameField disabled={!!existingLlmProvider} />
        </>
      )}

      <FieldSeparator />

      {isOnboarding ? (
        <SingleDefaultModelField placeholder="E.g. llama3.1" />
      ) : (
        <ModelsField
          modelConfigurations={currentModels}
          formikProps={formikProps}
          recommendedDefaultModel={null}
          shouldShowAutoUpdateToggle={false}
        />
      )}

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <ModelsAccessField formikProps={formikProps} />
        </>
      )}
    </LLMConfigurationModalWrapper>
  );
}

export default function OllamaModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);
  const [isTesting, setIsTesting] = useState(false);
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } =
    useWellKnownLLMProvider(OLLAMA_PROVIDER_NAME);

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const modelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  const initialValues: OllamaModalValues = isOnboarding
    ? ({
        ...buildOnboardingInitialValues(),
        name: OLLAMA_PROVIDER_NAME,
        provider: OLLAMA_PROVIDER_NAME,
        api_base: DEFAULT_API_BASE,
        default_model_name: "",
        custom_config: {
          OLLAMA_API_KEY: "",
        },
      } as OllamaModalValues)
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
        custom_config: {
          OLLAMA_API_KEY:
            (existingLlmProvider?.custom_config?.OLLAMA_API_KEY as string) ??
            "",
        },
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        api_base: Yup.string().required("API Base URL is required"),
        default_model_name: Yup.string().required("Model name is required"),
      })
    : buildDefaultValidationSchema().shape({
        api_base: Yup.string().required("API Base URL is required"),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        const filteredCustomConfig = Object.fromEntries(
          Object.entries(values.custom_config || {}).filter(([, v]) => v !== "")
        );

        const submitValues = {
          ...values,
          custom_config:
            Object.keys(filteredCustomConfig).length > 0
              ? filteredCustomConfig
              : undefined,
        };

        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            fetchedModels.length > 0 ? fetchedModels : [];

          await submitOnboardingProvider({
            providerName: OLLAMA_PROVIDER_NAME,
            payload: {
              ...submitValues,
              model_configurations: modelConfigsToUse,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: OLLAMA_PROVIDER_NAME,
            values: submitValues,
            initialValues,
            modelConfigurations:
              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <OllamaModalInternals
          formikProps={formikProps}
          existingLlmProvider={existingLlmProvider}
          fetchedModels={fetchedModels}
          setFetchedModels={setFetchedModels}
          isTesting={isTesting}
          onClose={onClose}
          isOnboarding={isOnboarding}
        />
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/OpenAIModal.tsx
================================================
"use client";

import { useState } from "react";
import { useSWRConfig } from "swr";
import { Formik } from "formik";
import { LLMProviderFormProps } from "@/interfaces/llm";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  APIKeyField,
  ModelsField,
  DisplayNameField,
  FieldSeparator,
  ModelsAccessField,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";

const OPENAI_PROVIDER_NAME = "openai";
const DEFAULT_DEFAULT_MODEL_NAME = "gpt-5.2";

export default function OpenAIModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const isOnboarding = variant === "onboarding";
  const [isTesting, setIsTesting] = useState(false);
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } =
    useWellKnownLLMProvider(OPENAI_PROVIDER_NAME);

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const modelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  const initialValues = isOnboarding
    ? {
        ...buildOnboardingInitialValues(),
        name: OPENAI_PROVIDER_NAME,
        provider: OPENAI_PROVIDER_NAME,
        api_key: "",
        default_model_name: DEFAULT_DEFAULT_MODEL_NAME,
      }
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        api_key: existingLlmProvider?.api_key ?? "",
        default_model_name:
          (defaultModelName &&
          modelConfigurations.some((m) => m.name === defaultModelName)
            ? defaultModelName
            : undefined) ??
          wellKnownLLMProvider?.recommended_default_model?.name ??
          DEFAULT_DEFAULT_MODEL_NAME,
        is_auto_mode: existingLlmProvider?.is_auto_mode ?? true,
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        api_key: Yup.string().required("API Key is required"),
        default_model_name: Yup.string().required("Model name is required"),
      })
    : buildDefaultValidationSchema().shape({
        api_key: Yup.string().required("API Key is required"),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            (wellKnownLLMProvider ?? llmDescriptor)?.known_models ?? [];

          await submitOnboardingProvider({
            providerName: OPENAI_PROVIDER_NAME,
            payload: {
              ...values,
              model_configurations: modelConfigsToUse,
              is_auto_mode:
                values.default_model_name === DEFAULT_DEFAULT_MODEL_NAME,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: OPENAI_PROVIDER_NAME,
            values,
            initialValues,
            modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <LLMConfigurationModalWrapper
          providerEndpoint={OPENAI_PROVIDER_NAME}
          existingProviderName={existingLlmProvider?.name}
          onClose={onClose}
          isFormValid={formikProps.isValid}
          isDirty={formikProps.dirty}
          isTesting={isTesting}
          isSubmitting={formikProps.isSubmitting}
        >
          <APIKeyField providerName="OpenAI" />

          {!isOnboarding && (
            <>
              <FieldSeparator />
              <DisplayNameField disabled={!!existingLlmProvider} />
            </>
          )}

          <FieldSeparator />
          {isOnboarding ? (
            <SingleDefaultModelField placeholder="E.g. gpt-5.2" />
          ) : (
            <ModelsField
              modelConfigurations={modelConfigurations}
              formikProps={formikProps}
              recommendedDefaultModel={
                wellKnownLLMProvider?.recommended_default_model ?? null
              }
              shouldShowAutoUpdateToggle={true}
            />
          )}

          {!isOnboarding && (
            <>
              <FieldSeparator />
              <ModelsAccessField formikProps={formikProps} />
            </>
          )}
        </LLMConfigurationModalWrapper>
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/OpenRouterModal.tsx
================================================
"use client";

import { useState, useEffect } from "react";
import { useSWRConfig } from "swr";
import { Formik, FormikProps } from "formik";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import * as InputLayouts from "@/layouts/input-layouts";
import {
  LLMProviderFormProps,
  LLMProviderView,
  ModelConfiguration,
} from "@/interfaces/llm";
import { fetchOpenRouterModels } from "@/app/admin/configuration/llm/utils";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
  BaseLLMFormValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  APIKeyField,
  ModelsField,
  DisplayNameField,
  ModelsAccessField,
  FieldSeparator,
  FieldWrapper,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";
import { toast } from "@/hooks/useToast";

const OPENROUTER_PROVIDER_NAME = "openrouter";
const DEFAULT_API_BASE = "https://openrouter.ai/api/v1";
interface OpenRouterModalValues extends BaseLLMFormValues {
  api_key: string;
  api_base: string;
}

interface OpenRouterModalInternalsProps {
  formikProps: FormikProps<OpenRouterModalValues>;
  existingLlmProvider: LLMProviderView | undefined;
  fetchedModels: ModelConfiguration[];
  setFetchedModels: (models: ModelConfiguration[]) => void;
  modelConfigurations: ModelConfiguration[];
  isTesting: boolean;
  onClose: () => void;
  isOnboarding: boolean;
}

function OpenRouterModalInternals({
  formikProps,
  existingLlmProvider,
  fetchedModels,
  setFetchedModels,
  modelConfigurations,
  isTesting,
  onClose,
  isOnboarding,
}: OpenRouterModalInternalsProps) {
  const currentModels =
    fetchedModels.length > 0
      ? fetchedModels
      : existingLlmProvider?.model_configurations || modelConfigurations;

  const isFetchDisabled =
    !formikProps.values.api_base || !formikProps.values.api_key;

  const handleFetchModels = async () => {
    const { models, error } = await fetchOpenRouterModels({
      api_base: formikProps.values.api_base,
      api_key: formikProps.values.api_key,
      provider_name: existingLlmProvider?.name,
    });
    if (error) {
      throw new Error(error);
    }
    setFetchedModels(models);
  };

  // Auto-fetch models on initial load when editing an existing provider
  useEffect(() => {
    if (existingLlmProvider && !isFetchDisabled) {
      handleFetchModels().catch((err) => {
        toast.error(
          err instanceof Error ? err.message : "Failed to fetch models"
        );
      });
    }
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, []);

  return (
    <LLMConfigurationModalWrapper
      providerEndpoint={OPENROUTER_PROVIDER_NAME}
      existingProviderName={existingLlmProvider?.name}
      onClose={onClose}
      isFormValid={formikProps.isValid}
      isDirty={formikProps.dirty}
      isTesting={isTesting}
      isSubmitting={formikProps.isSubmitting}
    >
      <FieldWrapper>
        <InputLayouts.Vertical
          name="api_base"
          title="API Base URL"
          subDescription="Paste your OpenRouter-compatible endpoint URL or use OpenRouter API directly."
        >
          <InputTypeInField
            name="api_base"
            placeholder="Your OpenRouter base URL"
          />
        </InputLayouts.Vertical>
      </FieldWrapper>

      <APIKeyField providerName="OpenRouter" />

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <DisplayNameField disabled={!!existingLlmProvider} />
        </>
      )}

      <FieldSeparator />

      {isOnboarding ? (
        <SingleDefaultModelField placeholder="E.g. openai/gpt-4o" />
      ) : (
        <ModelsField
          modelConfigurations={currentModels}
          formikProps={formikProps}
          recommendedDefaultModel={null}
          shouldShowAutoUpdateToggle={false}
          onRefetch={isFetchDisabled ? undefined : handleFetchModels}
        />
      )}

      {!isOnboarding && (
        <>
          <FieldSeparator />
          <ModelsAccessField formikProps={formikProps} />
        </>
      )}
    </LLMConfigurationModalWrapper>
  );
}

export default function OpenRouterModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);
  const [isTesting, setIsTesting] = useState(false);
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } = useWellKnownLLMProvider(
    OPENROUTER_PROVIDER_NAME
  );

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const modelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  const initialValues: OpenRouterModalValues = isOnboarding
    ? ({
        ...buildOnboardingInitialValues(),
        name: OPENROUTER_PROVIDER_NAME,
        provider: OPENROUTER_PROVIDER_NAME,
        api_key: "",
        api_base: DEFAULT_API_BASE,
        default_model_name: "",
      } as OpenRouterModalValues)
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        api_key: existingLlmProvider?.api_key ?? "",
        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        api_key: Yup.string().required("API Key is required"),
        api_base: Yup.string().required("API Base URL is required"),
        default_model_name: Yup.string().required("Model name is required"),
      })
    : buildDefaultValidationSchema().shape({
        api_key: Yup.string().required("API Key is required"),
        api_base: Yup.string().required("API Base URL is required"),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            fetchedModels.length > 0 ? fetchedModels : [];

          await submitOnboardingProvider({
            providerName: OPENROUTER_PROVIDER_NAME,
            payload: {
              ...values,
              model_configurations: modelConfigsToUse,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: OPENROUTER_PROVIDER_NAME,
            values,
            initialValues,
            modelConfigurations:
              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <OpenRouterModalInternals
          formikProps={formikProps}
          existingLlmProvider={existingLlmProvider}
          fetchedModels={fetchedModels}
          setFetchedModels={setFetchedModels}
          modelConfigurations={modelConfigurations}
          isTesting={isTesting}
          onClose={onClose}
          isOnboarding={isOnboarding}
        />
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/VertexAIModal.tsx
================================================
"use client";

import { useState } from "react";
import { useSWRConfig } from "swr";
import { Formik } from "formik";
import { FileUploadFormField } from "@/components/Field";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import * as InputLayouts from "@/layouts/input-layouts";
import { LLMProviderFormProps } from "@/interfaces/llm";
import * as Yup from "yup";
import { useWellKnownLLMProvider } from "@/hooks/useLLMProviders";
import {
  buildDefaultInitialValues,
  buildDefaultValidationSchema,
  buildAvailableModelConfigurations,
  buildOnboardingInitialValues,
  BaseLLMFormValues,
} from "@/sections/modals/llmConfig/utils";
import {
  submitLLMProvider,
  submitOnboardingProvider,
} from "@/sections/modals/llmConfig/svc";
import {
  ModelsField,
  DisplayNameField,
  FieldSeparator,
  FieldWrapper,
  ModelsAccessField,
  SingleDefaultModelField,
  LLMConfigurationModalWrapper,
} from "@/sections/modals/llmConfig/shared";

const VERTEXAI_PROVIDER_NAME = "vertex_ai";
const VERTEXAI_DISPLAY_NAME = "Google Cloud Vertex AI";
const VERTEXAI_DEFAULT_MODEL = "gemini-2.5-pro";
const VERTEXAI_DEFAULT_LOCATION = "global";

interface VertexAIModalValues extends BaseLLMFormValues {
  custom_config: {
    vertex_credentials: string;
    vertex_location: string;
  };
}

export default function VertexAIModal({
  variant = "llm-configuration",
  existingLlmProvider,
  shouldMarkAsDefault,
  open,
  onOpenChange,
  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
}: LLMProviderFormProps) {
  const isOnboarding = variant === "onboarding";
  const [isTesting, setIsTesting] = useState(false);
  const { mutate } = useSWRConfig();
  const { wellKnownLLMProvider } = useWellKnownLLMProvider(
    VERTEXAI_PROVIDER_NAME
  );

  if (open === false) return null;

  const onClose = () => onOpenChange?.(false);

  const modelConfigurations = buildAvailableModelConfigurations(
    existingLlmProvider,
    wellKnownLLMProvider ?? llmDescriptor
  );

  const initialValues: VertexAIModalValues = isOnboarding
    ? ({
        ...buildOnboardingInitialValues(),
        name: VERTEXAI_PROVIDER_NAME,
        provider: VERTEXAI_PROVIDER_NAME,
        default_model_name: VERTEXAI_DEFAULT_MODEL,
        custom_config: {
          vertex_credentials: "",
          vertex_location: VERTEXAI_DEFAULT_LOCATION,
        },
      } as VertexAIModalValues)
    : {
        ...buildDefaultInitialValues(
          existingLlmProvider,
          modelConfigurations,
          defaultModelName
        ),
        default_model_name:
          (defaultModelName &&
          modelConfigurations.some((m) => m.name === defaultModelName)
            ? defaultModelName
            : undefined) ??
          wellKnownLLMProvider?.recommended_default_model?.name ??
          VERTEXAI_DEFAULT_MODEL,
        is_auto_mode: existingLlmProvider?.is_auto_mode ?? true,
        custom_config: {
          vertex_credentials:
            (existingLlmProvider?.custom_config
              ?.vertex_credentials as string) ?? "",
          vertex_location:
            (existingLlmProvider?.custom_config?.vertex_location as string) ??
            VERTEXAI_DEFAULT_LOCATION,
        },
      };

  const validationSchema = isOnboarding
    ? Yup.object().shape({
        default_model_name: Yup.string().required("Model name is required"),
        custom_config: Yup.object({
          vertex_credentials: Yup.string().required(
            "Credentials file is required"
          ),
          vertex_location: Yup.string(),
        }),
      })
    : buildDefaultValidationSchema().shape({
        custom_config: Yup.object({
          vertex_credentials: Yup.string().required(
            "Credentials file is required"
          ),
          vertex_location: Yup.string(),
        }),
      });

  return (
    <Formik
      initialValues={initialValues}
      validationSchema={validationSchema}
      validateOnMount={true}
      onSubmit={async (values, { setSubmitting }) => {
        const filteredCustomConfig = Object.fromEntries(
          Object.entries(values.custom_config || {}).filter(
            ([key, v]) => key === "vertex_credentials" || v !== ""
          )
        );

        const submitValues = {
          ...values,
          custom_config:
            Object.keys(filteredCustomConfig).length > 0
              ? filteredCustomConfig
              : undefined,
        };

        if (isOnboarding && onboardingState && onboardingActions) {
          const modelConfigsToUse =
            (wellKnownLLMProvider ?? llmDescriptor)?.known_models ?? [];

          await submitOnboardingProvider({
            providerName: VERTEXAI_PROVIDER_NAME,
            payload: {
              ...submitValues,
              model_configurations: modelConfigsToUse,
              is_auto_mode:
                values.default_model_name === VERTEXAI_DEFAULT_MODEL,
            },
            onboardingState,
            onboardingActions,
            isCustomProvider: false,
            onClose,
            setIsSubmitting: setSubmitting,
          });
        } else {
          await submitLLMProvider({
            providerName: VERTEXAI_PROVIDER_NAME,
            values: submitValues,
            initialValues,
            modelConfigurations,
            existingLlmProvider,
            shouldMarkAsDefault,
            setIsTesting,
            mutate,
            onClose,
            setSubmitting,
          });
        }
      }}
    >
      {(formikProps) => (
        <LLMConfigurationModalWrapper
          providerEndpoint={VERTEXAI_PROVIDER_NAME}
          providerName={VERTEXAI_DISPLAY_NAME}
          existingProviderName={existingLlmProvider?.name}
          onClose={onClose}
          isFormValid={formikProps.isValid}
          isDirty={formikProps.dirty}
          isTesting={isTesting}
          isSubmitting={formikProps.isSubmitting}
        >
          <FieldWrapper>
            <InputLayouts.Vertical
              name="custom_config.vertex_location"
              title="Google Cloud Region Name"
              subDescription="Region where your Google Vertex AI models are hosted. See full list of regions supported at Google Cloud."
            >
              <InputTypeInField
                name="custom_config.vertex_location"
                placeholder={VERTEXAI_DEFAULT_LOCATION}
              />
            </InputLayouts.Vertical>
          </FieldWrapper>

          <FieldWrapper>
            <InputLayouts.Vertical
              name="custom_config.vertex_credentials"
              title="API Key"
              subDescription="Attach your API key JSON from Google Cloud to access your models."
            >
              <FileUploadFormField
                name="custom_config.vertex_credentials"
                label=""
              />
            </InputLayouts.Vertical>
          </FieldWrapper>

          <FieldSeparator />

          {!isOnboarding && (
            <DisplayNameField disabled={!!existingLlmProvider} />
          )}

          <FieldSeparator />

          {isOnboarding ? (
            <SingleDefaultModelField placeholder="E.g. gemini-2.5-pro" />
          ) : (
            <ModelsField
              modelConfigurations={modelConfigurations}
              formikProps={formikProps}
              recommendedDefaultModel={
                wellKnownLLMProvider?.recommended_default_model ?? null
              }
              shouldShowAutoUpdateToggle={true}
            />
          )}

          {!isOnboarding && <ModelsAccessField formikProps={formikProps} />}
        </LLMConfigurationModalWrapper>
      )}
    </Formik>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/getModal.tsx
================================================
import { LLMProviderName, LLMProviderView } from "@/interfaces/llm";
import AnthropicModal from "@/sections/modals/llmConfig/AnthropicModal";
import OpenAIModal from "@/sections/modals/llmConfig/OpenAIModal";
import OllamaModal from "@/sections/modals/llmConfig/OllamaModal";
import AzureModal from "@/sections/modals/llmConfig/AzureModal";
import VertexAIModal from "@/sections/modals/llmConfig/VertexAIModal";
import OpenRouterModal from "@/sections/modals/llmConfig/OpenRouterModal";
import CustomModal from "@/sections/modals/llmConfig/CustomModal";
import BedrockModal from "@/sections/modals/llmConfig/BedrockModal";
import LMStudioForm from "@/sections/modals/llmConfig/LMStudioForm";
import LiteLLMProxyModal from "@/sections/modals/llmConfig/LiteLLMProxyModal";
import BifrostModal from "@/sections/modals/llmConfig/BifrostModal";

function detectIfRealOpenAIProvider(provider: LLMProviderView) {
  return (
    provider.provider === LLMProviderName.OPENAI &&
    provider.api_key &&
    !provider.api_base &&
    Object.keys(provider.custom_config || {}).length === 0
  );
}

export function getModalForExistingProvider(
  provider: LLMProviderView,
  open?: boolean,
  onOpenChange?: (open: boolean) => void,
  defaultModelName?: string
) {
  const props = {
    existingLlmProvider: provider,
    open,
    onOpenChange,
    defaultModelName,
  };

  switch (provider.provider) {
    case LLMProviderName.OPENAI:
      // "openai" as a provider name can be used for litellm proxy / any OpenAI-compatible provider
      if (detectIfRealOpenAIProvider(provider)) {
        return <OpenAIModal {...props} />;
      } else {
        return <CustomModal {...props} />;
      }
    case LLMProviderName.ANTHROPIC:
      return <AnthropicModal {...props} />;
    case LLMProviderName.OLLAMA_CHAT:
      return <OllamaModal {...props} />;
    case LLMProviderName.AZURE:
      return <AzureModal {...props} />;
    case LLMProviderName.VERTEX_AI:
      return <VertexAIModal {...props} />;
    case LLMProviderName.BEDROCK:
      return <BedrockModal {...props} />;
    case LLMProviderName.OPENROUTER:
      return <OpenRouterModal {...props} />;
    case LLMProviderName.LM_STUDIO:
      return <LMStudioForm {...props} />;
    case LLMProviderName.LITELLM_PROXY:
      return <LiteLLMProxyModal {...props} />;
    case LLMProviderName.BIFROST:
      return <BifrostModal {...props} />;
    default:
      return <CustomModal {...props} />;
  }
}


================================================
FILE: web/src/sections/modals/llmConfig/shared.tsx
================================================
"use client";

import { ReactNode, useState } from "react";
import { Form, FormikProps } from "formik";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { useAgents } from "@/hooks/useAgents";
import { useUserGroups } from "@/lib/hooks";
import { ModelConfiguration, SimpleKnownModel } from "@/interfaces/llm";
import * as InputLayouts from "@/layouts/input-layouts";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import InputTypeInField from "@/refresh-components/form/InputTypeInField";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import InputComboBox from "@/refresh-components/inputs/InputComboBox";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
import Switch from "@/refresh-components/inputs/Switch";
import Text from "@/refresh-components/texts/Text";
import { Button, LineItemButton, Tag } from "@opal/components";
import { BaseLLMFormValues } from "@/sections/modals/llmConfig/utils";
import { WithoutStyles } from "@opal/types";
import Separator from "@/refresh-components/Separator";
import { Section } from "@/layouts/general-layouts";
import { Hoverable } from "@opal/core";
import { Content } from "@opal/layouts";
import {
  SvgArrowExchange,
  SvgOnyxOctagon,
  SvgOrganization,
  SvgPlusCircle,
  SvgRefreshCw,
  SvgSparkle,
  SvgUserManage,
  SvgUsers,
  SvgX,
} from "@opal/icons";
import SvgOnyxLogo from "@opal/icons/onyx-logo";
import { Card, EmptyMessageCard } from "@opal/components";
import { ContentAction } from "@opal/layouts";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import useUsers from "@/hooks/useUsers";
import { toast } from "@/hooks/useToast";
import { UserRole } from "@/lib/types";
import Modal from "@/refresh-components/Modal";
import {
  getProviderIcon,
  getProviderDisplayName,
  getProviderProductName,
} from "@/lib/llmConfig/providers";

export function FieldSeparator() {
  return <Separator noPadding className="px-2" />;
}

export type FieldWrapperProps = WithoutStyles<
  React.HTMLAttributes<HTMLDivElement>
>;

export function FieldWrapper(props: FieldWrapperProps) {
  return <div {...props} className="p-2 w-full" />;
}

// ─── DisplayNameField ────────────────────────────────────────────────────────

export interface DisplayNameFieldProps {
  disabled?: boolean;
}

export function DisplayNameField({ disabled = false }: DisplayNameFieldProps) {
  return (
    <FieldWrapper>
      <InputLayouts.Vertical
        name="name"
        title="Display Name"
        subDescription="Used to identify this provider in the app."
      >
        <InputTypeInField
          name="name"
          placeholder="Display Name"
          variant={disabled ? "disabled" : undefined}
        />
      </InputLayouts.Vertical>
    </FieldWrapper>
  );
}

// ─── APIKeyField ─────────────────────────────────────────────────────────────

export interface APIKeyFieldProps {
  optional?: boolean;
  providerName?: string;
}

export function APIKeyField({
  optional = false,
  providerName,
}: APIKeyFieldProps) {
  return (
    <FieldWrapper>
      <InputLayouts.Vertical
        name="api_key"
        title="API Key"
        subDescription={
          providerName
            ? `Paste your API key from ${providerName} to access your models.`
            : "Paste your API key to access your models."
        }
        suffix={optional ? "optional" : undefined}
      >
        <PasswordInputTypeInField name="api_key" placeholder="API Key" />
      </InputLayouts.Vertical>
    </FieldWrapper>
  );
}

// ─── SingleDefaultModelField ─────────────────────────────────────────────────

export interface SingleDefaultModelFieldProps {
  placeholder?: string;
}

export function SingleDefaultModelField({
  placeholder = "E.g. gpt-4o",
}: SingleDefaultModelFieldProps) {
  return (
    <InputLayouts.Vertical
      name="default_model_name"
      title="Default Model"
      description="The model to use by default for this provider unless otherwise specified."
    >
      <InputTypeInField name="default_model_name" placeholder={placeholder} />
    </InputLayouts.Vertical>
  );
}

// ─── ModelsAccessField ──────────────────────────────────────────────────────

/** Prefix used to distinguish group IDs from agent IDs in the combobox. */
const GROUP_PREFIX = "group:";
const AGENT_PREFIX = "agent:";

interface ModelsAccessFieldProps<T> {
  formikProps: FormikProps<T>;
}

export function ModelsAccessField<T extends BaseLLMFormValues>({
  formikProps,
}: ModelsAccessFieldProps<T>) {
  const { agents } = useAgents();
  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();
  const { data: usersData } = useUsers({ includeApiKeys: false });
  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();

  const adminCount =
    usersData?.accepted.filter((u) => u.role === UserRole.ADMIN).length ?? 0;

  const isPublic = formikProps.values.is_public;
  const selectedGroupIds = formikProps.values.groups ?? [];
  const selectedAgentIds = formikProps.values.personas ?? [];

  // Build a flat list of combobox options from groups + agents
  const groupOptions =
    isPaidEnterpriseFeaturesEnabled && !userGroupsIsLoading && userGroups
      ? userGroups.map((g) => ({
          value: `${GROUP_PREFIX}${g.id}`,
          label: g.name,
          description: "Group",
        }))
      : [];

  const agentOptions = agents.map((a) => ({
    value: `${AGENT_PREFIX}${a.id}`,
    label: a.name,
    description: "Agent",
  }));

  // Exclude already-selected items from the dropdown
  const selectedKeys = new Set([
    ...selectedGroupIds.map((id) => `${GROUP_PREFIX}${id}`),
    ...selectedAgentIds.map((id) => `${AGENT_PREFIX}${id}`),
  ]);

  const availableOptions = [...groupOptions, ...agentOptions].filter(
    (opt) => !selectedKeys.has(opt.value)
  );

  // Resolve selected IDs back to full objects for display
  const groupById = new Map((userGroups ?? []).map((g) => [g.id, g]));
  const agentMap = new Map(agents.map((a) => [a.id, a]));

  function handleAccessChange(value: string) {
    if (value === "public") {
      formikProps.setFieldValue("is_public", true);
      formikProps.setFieldValue("groups", []);
      formikProps.setFieldValue("personas", []);
    } else {
      formikProps.setFieldValue("is_public", false);
    }
  }

  function handleSelect(compositeValue: string) {
    if (compositeValue.startsWith(GROUP_PREFIX)) {
      const id = Number(compositeValue.slice(GROUP_PREFIX.length));
      if (!selectedGroupIds.includes(id)) {
        formikProps.setFieldValue("groups", [...selectedGroupIds, id]);
      }
    } else if (compositeValue.startsWith(AGENT_PREFIX)) {
      const id = Number(compositeValue.slice(AGENT_PREFIX.length));
      if (!selectedAgentIds.includes(id)) {
        formikProps.setFieldValue("personas", [...selectedAgentIds, id]);
      }
    }
  }

  function handleRemoveGroup(id: number) {
    formikProps.setFieldValue(
      "groups",
      selectedGroupIds.filter((gid) => gid !== id)
    );
  }

  function handleRemoveAgent(id: number) {
    formikProps.setFieldValue(
      "personas",
      selectedAgentIds.filter((aid) => aid !== id)
    );
  }

  return (
    <div className="flex flex-col w-full">
      <FieldWrapper>
        <InputLayouts.Horizontal
          name="is_public"
          title="Models Access"
          description="Who can access this provider."
        >
          <InputSelect
            value={isPublic ? "public" : "private"}
            onValueChange={handleAccessChange}
          >
            <InputSelect.Trigger placeholder="Select access level" />
            <InputSelect.Content>
              <InputSelect.Item value="public" icon={SvgOrganization}>
                All Users & Agents
              </InputSelect.Item>
              <InputSelect.Item value="private" icon={SvgUsers}>
                Named Groups & Agents
              </InputSelect.Item>
            </InputSelect.Content>
          </InputSelect>
        </InputLayouts.Horizontal>
      </FieldWrapper>

      {!isPublic && (
        <Card background="light" border="none" padding="sm">
          <Section gap={0.5}>
            <InputComboBox
              placeholder="Add groups and agents"
              value=""
              onChange={() => {}}
              onValueChange={handleSelect}
              options={availableOptions}
              strict
              leftSearchIcon
            />

            <Card background="heavy" border="none" padding="sm">
              <ContentAction
                icon={SvgUserManage}
                title="Admin"
                description={`${adminCount} ${
                  adminCount === 1 ? "member" : "members"
                }`}
                sizePreset="main-ui"
                variant="section"
                rightChildren={
                  <Text secondaryBody text03>
                    Always shared
                  </Text>
                }
                paddingVariant="fit"
              />
            </Card>
            {selectedGroupIds.length > 0 && (
              <div className="grid grid-cols-2 gap-1 w-full">
                {selectedGroupIds.map((id) => {
                  const group = groupById.get(id);
                  const memberCount = group?.users.length ?? 0;
                  return (
                    <div key={`group-${id}`} className="min-w-0">
                      <Card background="heavy" border="none" padding="sm">
                        <ContentAction
                          icon={SvgUsers}
                          title={group?.name ?? `Group ${id}`}
                          description={`${memberCount} ${
                            memberCount === 1 ? "member" : "members"
                          }`}
                          sizePreset="main-ui"
                          variant="section"
                          rightChildren={
                            <Button
                              size="sm"
                              prominence="internal"
                              icon={SvgX}
                              onClick={() => handleRemoveGroup(id)}
                              type="button"
                            />
                          }
                          paddingVariant="fit"
                        />
                      </Card>
                    </div>
                  );
                })}
              </div>
            )}

            <FieldSeparator />

            {selectedAgentIds.length > 0 ? (
              <div className="grid grid-cols-2 gap-1 w-full">
                {selectedAgentIds.map((id) => {
                  const agent = agentMap.get(id);
                  return (
                    <div key={`agent-${id}`} className="min-w-0">
                      <Card background="heavy" border="none" padding="sm">
                        <ContentAction
                          icon={
                            agent
                              ? () => <AgentAvatar agent={agent} size={20} />
                              : SvgSparkle
                          }
                          title={agent?.name ?? `Agent ${id}`}
                          description="Agent"
                          sizePreset="main-ui"
                          variant="section"
                          rightChildren={
                            <Button
                              size="sm"
                              prominence="internal"
                              icon={SvgX}
                              onClick={() => handleRemoveAgent(id)}
                              type="button"
                            />
                          }
                          paddingVariant="fit"
                        />
                      </Card>
                    </div>
                  );
                })}
              </div>
            ) : (
              <div className="w-full p-2">
                <Content
                  icon={SvgOnyxOctagon}
                  title="No agents added"
                  description="This provider will not be used by any agents."
                  variant="section"
                  sizePreset="main-ui"
                />
              </div>
            )}
          </Section>
        </Card>
      )}
    </div>
  );
}

// ─── ModelsField ─────────────────────────────────────────────────────

export interface ModelsFieldProps<T> {
  formikProps: FormikProps<T>;
  modelConfigurations: ModelConfiguration[];
  recommendedDefaultModel: SimpleKnownModel | null;
  shouldShowAutoUpdateToggle: boolean;
  /** Called when the user clicks the refresh button to re-fetch models. */
  onRefetch?: () => Promise<void> | void;
  /** Called when the user adds a custom model by name. Enables the "Add Model" input. */
  onAddModel?: (modelName: string) => void;
}

export function ModelsField<T extends BaseLLMFormValues>({
  formikProps,
  modelConfigurations,
  recommendedDefaultModel,
  shouldShowAutoUpdateToggle,
  onRefetch,
  onAddModel,
}: ModelsFieldProps<T>) {
  const [newModelName, setNewModelName] = useState("");
  const isAutoMode = formikProps.values.is_auto_mode;
  const selectedModels = formikProps.values.selected_model_names ?? [];
  const defaultModel = formikProps.values.default_model_name;

  function handleCheckboxChange(modelName: string, checked: boolean) {
    // Read current values inside the handler to avoid stale closure issues
    const currentSelected = formikProps.values.selected_model_names ?? [];
    const currentDefault = formikProps.values.default_model_name;

    if (checked) {
      const newSelected = [...currentSelected, modelName];
      formikProps.setFieldValue("selected_model_names", newSelected);
      // If this is the first model, set it as default
      if (currentSelected.length === 0) {
        formikProps.setFieldValue("default_model_name", modelName);
      }
    } else {
      const newSelected = currentSelected.filter((name) => name !== modelName);
      formikProps.setFieldValue("selected_model_names", newSelected);
      // If removing the default, set the first remaining model as default
      if (currentDefault === modelName && newSelected.length > 0) {
        formikProps.setFieldValue("default_model_name", newSelected[0]);
      } else if (newSelected.length === 0) {
        formikProps.setFieldValue("default_model_name", undefined);
      }
    }
  }

  function handleSetDefault(modelName: string) {
    formikProps.setFieldValue("default_model_name", modelName);
  }

  function handleToggleAutoMode(nextIsAutoMode: boolean) {
    formikProps.setFieldValue("is_auto_mode", nextIsAutoMode);
    formikProps.setFieldValue(
      "selected_model_names",
      modelConfigurations.filter((m) => m.is_visible).map((m) => m.name)
    );
    formikProps.setFieldValue(
      "default_model_name",
      recommendedDefaultModel?.name ?? undefined
    );
  }

  const allSelected =
    modelConfigurations.length > 0 &&
    modelConfigurations.every((m) => selectedModels.includes(m.name));

  function handleToggleSelectAll() {
    if (allSelected) {
      formikProps.setFieldValue("selected_model_names", []);
      formikProps.setFieldValue("default_model_name", undefined);
    } else {
      const allNames = modelConfigurations.map((m) => m.name);
      formikProps.setFieldValue("selected_model_names", allNames);
      if (!formikProps.values.default_model_name && allNames.length > 0) {
        formikProps.setFieldValue("default_model_name", allNames[0]);
      }
    }
  }

  const visibleModels = modelConfigurations.filter((m) => m.is_visible);

  return (
    <Card background="light" border="none" padding="sm">
      <Section gap={0.5}>
        <InputLayouts.Horizontal
          title="Models"
          description="Select models to make available for this provider."
          nonInteractive
          center
        >
          <Section flexDirection="row" gap={0}>
            <Button
              disabled={isAutoMode || modelConfigurations.length === 0}
              prominence="tertiary"
              size="md"
              onClick={handleToggleSelectAll}
            >
              {allSelected ? "Unselect All" : "Select All"}
            </Button>
            {onRefetch && (
              <Button
                prominence="tertiary"
                icon={SvgRefreshCw}
                onClick={async () => {
                  try {
                    await onRefetch();
                  } catch (err) {
                    toast.error(
                      err instanceof Error
                        ? err.message
                        : "Failed to fetch models"
                    );
                  }
                }}
              />
            )}
          </Section>
        </InputLayouts.Horizontal>

        {modelConfigurations.length === 0 ? (
          <EmptyMessageCard title="No models available." padding="sm" />
        ) : (
          <Section gap={0.25}>
            {isAutoMode
              ? // Auto mode: read-only display
                visibleModels.map((model) => (
                  <Hoverable.Root
                    key={model.name}
                    group="LLMConfigurationButton"
                    widthVariant="full"
                  >
                    <LineItemButton
                      variant="section"
                      sizePreset="main-ui"
                      selectVariant="select-heavy"
                      state="selected"
                      icon={() => <Checkbox checked />}
                      title={model.display_name || model.name}
                      rightChildren={
                        model.name === defaultModel ? (
                          <Section>
                            <Tag title="Default Model" color="blue" />
                          </Section>
                        ) : undefined
                      }
                    />
                  </Hoverable.Root>
                ))
              : // Manual mode: checkbox selection
                modelConfigurations.map((modelConfiguration) => {
                  const isSelected = selectedModels.includes(
                    modelConfiguration.name
                  );
                  const isDefault = defaultModel === modelConfiguration.name;

                  return (
                    <Hoverable.Root
                      key={modelConfiguration.name}
                      group="LLMConfigurationButton"
                      widthVariant="full"
                    >
                      <LineItemButton
                        variant="section"
                        sizePreset="main-ui"
                        selectVariant="select-heavy"
                        state={isSelected ? "selected" : "empty"}
                        icon={() => <Checkbox checked={isSelected} />}
                        title={modelConfiguration.name}
                        onClick={() =>
                          handleCheckboxChange(
                            modelConfiguration.name,
                            !isSelected
                          )
                        }
                        rightChildren={
                          isSelected ? (
                            isDefault ? (
                              <Section>
                                <Tag color="blue" title="Default Model" />
                              </Section>
                            ) : (
                              <Hoverable.Item
                                group="LLMConfigurationButton"
                                variant="opacity-on-hover"
                              >
                                <Button
                                  size="sm"
                                  prominence="internal"
                                  onClick={(e) => {
                                    e.stopPropagation();
                                    handleSetDefault(modelConfiguration.name);
                                  }}
                                  type="button"
                                >
                                  Set as default
                                </Button>
                              </Hoverable.Item>
                            )
                          ) : undefined
                        }
                      />
                    </Hoverable.Root>
                  );
                })}
          </Section>
        )}

        {onAddModel && !isAutoMode && (
          <Section flexDirection="row" gap={0.5}>
            <div className="flex-1">
              <InputTypeIn
                placeholder="Enter model name"
                value={newModelName}
                onChange={(e) => setNewModelName(e.target.value)}
                onKeyDown={(e) => {
                  if (e.key === "Enter" && newModelName.trim()) {
                    e.preventDefault();
                    const trimmed = newModelName.trim();
                    if (!modelConfigurations.some((m) => m.name === trimmed)) {
                      onAddModel(trimmed);
                      setNewModelName("");
                    }
                  }
                }}
                showClearButton={false}
              />
            </div>
            <Button
              prominence="secondary"
              icon={SvgPlusCircle}
              type="button"
              disabled={
                !newModelName.trim() ||
                modelConfigurations.some((m) => m.name === newModelName.trim())
              }
              onClick={() => {
                const trimmed = newModelName.trim();
                if (
                  trimmed &&
                  !modelConfigurations.some((m) => m.name === trimmed)
                ) {
                  onAddModel(trimmed);
                  setNewModelName("");
                }
              }}
            >
              Add Model
            </Button>
          </Section>
        )}

        {shouldShowAutoUpdateToggle && (
          <InputLayouts.Horizontal
            title="Auto Update"
            description="Update the available models when new models are released."
          >
            <Switch
              checked={isAutoMode}
              onCheckedChange={handleToggleAutoMode}
            />
          </InputLayouts.Horizontal>
        )}
      </Section>
    </Card>
  );
}

// ============================================================================
// LLMConfigurationModalWrapper
// ============================================================================

interface LLMConfigurationModalWrapperProps {
  providerEndpoint: string;
  providerName?: string;
  existingProviderName?: string;
  onClose: () => void;
  isFormValid: boolean;
  isDirty?: boolean;
  isTesting?: boolean;
  isSubmitting?: boolean;
  children: ReactNode;
}

export function LLMConfigurationModalWrapper({
  providerEndpoint,
  providerName,
  existingProviderName,
  onClose,
  isFormValid,
  isDirty,
  isTesting,
  isSubmitting,
  children,
}: LLMConfigurationModalWrapperProps) {
  const busy = isTesting || isSubmitting;
  const providerIcon = getProviderIcon(providerEndpoint);
  const providerDisplayName =
    providerName ?? getProviderDisplayName(providerEndpoint);
  const providerProductName = getProviderProductName(providerEndpoint);

  const title = existingProviderName
    ? `Configure "${existingProviderName}"`
    : `Set up ${providerProductName}`;
  const description = `Connect to ${providerDisplayName} and set up your ${providerProductName} models.`;

  return (
    <Modal open onOpenChange={onClose}>
      <Modal.Content width="lg" height="lg">
        <Form className="flex flex-col h-full min-h-0">
          <Modal.Header
            icon={providerIcon}
            moreIcon1={SvgArrowExchange}
            moreIcon2={SvgOnyxLogo}
            title={title}
            description={description}
            onClose={onClose}
          />
          <Modal.Body padding={0.5} gap={0.5}>
            {children}
          </Modal.Body>
          <Modal.Footer>
            <Button prominence="secondary" onClick={onClose} type="button">
              Cancel
            </Button>
            <Button
              disabled={
                !isFormValid || busy || (!!existingProviderName && !isDirty)
              }
              type="submit"
              icon={busy ? SimpleLoader : undefined}
            >
              {existingProviderName
                ? busy
                  ? "Updating"
                  : "Update"
                : busy
                  ? "Connecting"
                  : "Connect"}
            </Button>
          </Modal.Footer>
        </Form>
      </Modal.Content>
    </Modal>
  );
}


================================================
FILE: web/src/sections/modals/llmConfig/svc.ts
================================================
import {
  LLMProviderName,
  LLMProviderView,
  ModelConfiguration,
} from "@/interfaces/llm";
import {
  LLM_ADMIN_URL,
  LLM_PROVIDERS_ADMIN_URL,
} from "@/lib/llmConfig/constants";
import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
import { toast } from "@/hooks/useToast";
import isEqual from "lodash/isEqual";
import { parseAzureTargetUri } from "@/lib/azureTargetUri";
import {
  track,
  AnalyticsEvent,
  LLMProviderConfiguredSource,
} from "@/lib/analytics";
import {
  BaseLLMFormValues,
  SubmitLLMProviderParams,
  SubmitOnboardingProviderParams,
  TestApiKeyResult,
  filterModelConfigurations,
  getAutoModeModelConfigurations,
} from "@/sections/modals/llmConfig/utils";

const submitLlmTestRequest = async (
  payload: Record<string, unknown>,
  fallbackErrorMessage: string
): Promise<TestApiKeyResult> => {
  try {
    const response = await fetch("/api/admin/llm/test", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify(payload),
    });

    if (!response.ok) {
      const errorMsg = (await response.json()).detail;
      return { ok: false, errorMessage: errorMsg };
    }

    return { ok: true };
  } catch {
    return {
      ok: false,
      errorMessage: fallbackErrorMessage,
    };
  }
};

export const submitLLMProvider = async <T extends BaseLLMFormValues>({
  providerName,
  values,
  initialValues,
  modelConfigurations,
  existingLlmProvider,
  shouldMarkAsDefault,
  hideSuccess,
  setIsTesting,
  mutate,
  onClose,
  setSubmitting,
}: SubmitLLMProviderParams<T>): Promise<void> => {
  setSubmitting(true);

  const { selected_model_names: visibleModels, api_key, ...rest } = values;

  // In auto mode, use recommended models from descriptor
  // In manual mode, use user's selection
  let filteredModelConfigurations: ModelConfiguration[];
  let finalDefaultModelName = rest.default_model_name;

  if (values.is_auto_mode) {
    filteredModelConfigurations =
      getAutoModeModelConfigurations(modelConfigurations);

    // In auto mode, use the first recommended model as default if current default isn't in the list
    const visibleModelNames = new Set(
      filteredModelConfigurations.map((m) => m.name)
    );
    if (
      finalDefaultModelName &&
      !visibleModelNames.has(finalDefaultModelName)
    ) {
      finalDefaultModelName = filteredModelConfigurations[0]?.name ?? "";
    }
  } else {
    filteredModelConfigurations = filterModelConfigurations(
      modelConfigurations,
      visibleModels,
      rest.default_model_name as string | undefined
    );
  }

  const customConfigChanged = !isEqual(
    values.custom_config,
    initialValues.custom_config
  );

  const normalizedApiBase =
    typeof rest.api_base === "string" && rest.api_base.trim() === ""
      ? undefined
      : rest.api_base;

  const finalValues = {
    ...rest,
    api_base: normalizedApiBase,
    default_model_name: finalDefaultModelName,
    api_key,
    api_key_changed: api_key !== (initialValues.api_key as string | undefined),
    custom_config_changed: customConfigChanged,
    model_configurations: filteredModelConfigurations,
  };

  // Test the configuration
  if (!isEqual(finalValues, initialValues)) {
    setIsTesting(true);

    const response = await fetch("/api/admin/llm/test", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        provider: providerName,
        ...finalValues,
        model: finalDefaultModelName,
        id: existingLlmProvider?.id,
      }),
    });
    setIsTesting(false);

    if (!response.ok) {
      const errorMsg = (await response.json()).detail;
      toast.error(errorMsg);
      setSubmitting(false);
      return;
    }
  }

  const response = await fetch(
    `${LLM_PROVIDERS_ADMIN_URL}${
      existingLlmProvider ? "" : "?is_creation=true"
    }`,
    {
      method: "PUT",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        provider: providerName,
        ...finalValues,
        id: existingLlmProvider?.id,
      }),
    }
  );

  if (!response.ok) {
    const errorMsg = (await response.json()).detail;
    const fullErrorMsg = existingLlmProvider
      ? `Failed to update provider: ${errorMsg}`
      : `Failed to enable provider: ${errorMsg}`;
    toast.error(fullErrorMsg);
    return;
  }

  if (shouldMarkAsDefault) {
    const newLlmProvider = (await response.json()) as LLMProviderView;
    const setDefaultResponse = await fetch(`${LLM_ADMIN_URL}/default`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        provider_id: newLlmProvider.id,
        model_name: finalDefaultModelName,
      }),
    });
    if (!setDefaultResponse.ok) {
      const errorMsg = (await setDefaultResponse.json()).detail;
      toast.error(`Failed to set provider as default: ${errorMsg}`);
      return;
    }
  }

  await refreshLlmProviderCaches(mutate);
  onClose();

  if (!hideSuccess) {
    const successMsg = existingLlmProvider
      ? "Provider updated successfully!"
      : "Provider enabled successfully!";
    toast.success(successMsg);
  }

  const knownProviders = new Set<string>(Object.values(LLMProviderName));
  track(AnalyticsEvent.CONFIGURED_LLM_PROVIDER, {
    provider: knownProviders.has(providerName) ? providerName : "custom",
    is_creation: !existingLlmProvider,
    source: LLMProviderConfiguredSource.ADMIN_PAGE,
  });

  setSubmitting(false);
};

export const testApiKeyHelper = async (
  providerName: string,
  formValues: Record<string, unknown>,
  apiKey?: string,
  modelName?: string,
  customConfigOverride?: Record<string, unknown>
): Promise<TestApiKeyResult> => {
  let finalApiBase = formValues?.api_base;
  let finalApiVersion = formValues?.api_version;
  let finalDeploymentName = formValues?.deployment_name;

  if (providerName === "azure" && formValues?.target_uri) {
    try {
      const { url, apiVersion, deploymentName } = parseAzureTargetUri(
        formValues.target_uri as string
      );
      finalApiBase = url.origin;
      finalApiVersion = apiVersion;
      finalDeploymentName = deploymentName || "";
    } catch {
      // leave defaults so validation can surface errors upstream
    }
  }

  const payload = {
    api_key: apiKey ?? formValues?.api_key,
    api_base: finalApiBase,
    api_version: finalApiVersion,
    deployment_name: finalDeploymentName,
    provider: providerName,
    api_key_changed: true,
    custom_config_changed: true,
    custom_config: {
      ...((formValues?.custom_config as Record<string, unknown>) ?? {}),
      ...(customConfigOverride ?? {}),
    },
    model: modelName ?? (formValues?.default_model_name as string) ?? "",
  };

  return await submitLlmTestRequest(
    payload,
    "An error occurred while testing the API key."
  );
};

export const testCustomProvider = async (
  formValues: Record<string, unknown>
): Promise<TestApiKeyResult> => {
  return await submitLlmTestRequest(
    { ...formValues },
    "An error occurred while testing the custom provider."
  );
};

export const submitOnboardingProvider = async ({
  providerName,
  payload,
  onboardingState,
  onboardingActions,
  isCustomProvider,
  onClose,
  setIsSubmitting,
}: SubmitOnboardingProviderParams): Promise<void> => {
  setIsSubmitting(true);

  // Test credentials
  let result: TestApiKeyResult;
  if (isCustomProvider) {
    result = await testCustomProvider(payload);
  } else {
    result = await testApiKeyHelper(providerName, payload);
  }

  if (!result.ok) {
    toast.error(result.errorMessage);
    setIsSubmitting(false);
    return;
  }

  // Create provider
  const response = await fetch(`${LLM_PROVIDERS_ADMIN_URL}?is_creation=true`, {
    method: "PUT",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(payload),
  });

  if (!response.ok) {
    const errorMsg = (await response.json()).detail;
    toast.error(errorMsg);
    setIsSubmitting(false);
    return;
  }

  // Set as default if first provider
  if (
    onboardingState?.data?.llmProviders == null ||
    onboardingState.data.llmProviders.length === 0
  ) {
    try {
      const newLlmProvider = await response.json();
      if (newLlmProvider?.id != null) {
        const defaultModelName =
          (payload as Record<string, string>).default_model_name ??
          (payload as Record<string, ModelConfiguration[]>)
            .model_configurations?.[0]?.name ??
          "";

        if (defaultModelName) {
          const setDefaultResponse = await fetch(`${LLM_ADMIN_URL}/default`, {
            method: "POST",
            headers: { "Content-Type": "application/json" },
            body: JSON.stringify({
              provider_id: newLlmProvider.id,
              model_name: defaultModelName,
            }),
          });
          if (!setDefaultResponse.ok) {
            const err = await setDefaultResponse.json().catch(() => ({}));
            toast.error(err?.detail ?? "Failed to set provider as default");
            setIsSubmitting(false);
            return;
          }
        }
      }
    } catch (_e) {
      toast.error("Failed to set new provider as default");
    }
  }

  track(AnalyticsEvent.CONFIGURED_LLM_PROVIDER, {
    provider: isCustomProvider ? "custom" : providerName,
    is_creation: true,
    source: LLMProviderConfiguredSource.CHAT_ONBOARDING,
  });

  // Update onboarding state
  onboardingActions.updateData({
    llmProviders: [
      ...(onboardingState?.data.llmProviders ?? []),
      isCustomProvider ? "custom" : providerName,
    ],
  });
  onboardingActions.setButtonActive(true);

  setIsSubmitting(false);
  onClose();
};


================================================
FILE: web/src/sections/modals/llmConfig/utils.ts
================================================
import {
  LLMProviderView,
  ModelConfiguration,
  WellKnownLLMProviderDescriptor,
} from "@/interfaces/llm";
import * as Yup from "yup";
import { ScopedMutator } from "swr";
import { OnboardingActions, OnboardingState } from "@/interfaces/onboarding";

// Common class names for the Form component across all LLM provider forms
export const LLM_FORM_CLASS_NAME = "flex flex-col gap-y-4 items-stretch mt-6";

export const buildDefaultInitialValues = (
  existingLlmProvider?: LLMProviderView,
  modelConfigurations?: ModelConfiguration[],
  currentDefaultModelName?: string
) => {
  const defaultModelName =
    (currentDefaultModelName &&
    existingLlmProvider?.model_configurations?.some(
      (m) => m.name === currentDefaultModelName
    )
      ? currentDefaultModelName
      : undefined) ??
    existingLlmProvider?.model_configurations?.[0]?.name ??
    modelConfigurations?.[0]?.name ??
    "";

  // Auto mode must be explicitly enabled by the user
  // Default to false for new providers, preserve existing value when editing
  const isAutoMode = existingLlmProvider?.is_auto_mode ?? false;

  return {
    name: existingLlmProvider?.name || "",
    default_model_name: defaultModelName,
    is_public: existingLlmProvider?.is_public ?? true,
    is_auto_mode: isAutoMode,
    groups: existingLlmProvider?.groups ?? [],
    personas: existingLlmProvider?.personas ?? [],
    selected_model_names: existingLlmProvider
      ? existingLlmProvider.model_configurations
          .filter((modelConfiguration) => modelConfiguration.is_visible)
          .map((modelConfiguration) => modelConfiguration.name)
      : modelConfigurations
          ?.filter((modelConfiguration) => modelConfiguration.is_visible)
          .map((modelConfiguration) => modelConfiguration.name) ?? [],
  };
};

export const buildDefaultValidationSchema = () => {
  return Yup.object({
    name: Yup.string().required("Display Name is required"),
    default_model_name: Yup.string().required("Model name is required"),
    is_public: Yup.boolean().required(),
    is_auto_mode: Yup.boolean().required(),
    groups: Yup.array().of(Yup.number()),
    personas: Yup.array().of(Yup.number()),
    selected_model_names: Yup.array().of(Yup.string()),
  });
};

export const buildAvailableModelConfigurations = (
  existingLlmProvider?: LLMProviderView,
  wellKnownLLMProvider?: WellKnownLLMProviderDescriptor
): ModelConfiguration[] => {
  const existingModels = existingLlmProvider?.model_configurations ?? [];
  const wellKnownModels = wellKnownLLMProvider?.known_models ?? [];

  // Create a map to deduplicate by model name, preferring existing models
  const modelMap = new Map<string, ModelConfiguration>();

  // Add well-known models first
  wellKnownModels.forEach((model) => {
    modelMap.set(model.name, model);
  });

  // Override with existing models (they take precedence)
  existingModels.forEach((model) => {
    modelMap.set(model.name, model);
  });

  return Array.from(modelMap.values());
};

// Base form values that all provider forms share
export interface BaseLLMFormValues {
  name: string;
  api_key?: string;
  api_base?: string;
  default_model_name?: string;
  is_public: boolean;
  is_auto_mode: boolean;
  groups: number[];
  personas: number[];
  selected_model_names: string[];
  custom_config?: Record<string, string>;
}

export interface SubmitLLMProviderParams<
  T extends BaseLLMFormValues = BaseLLMFormValues,
> {
  providerName: string;
  values: T;
  initialValues: T;
  modelConfigurations: ModelConfiguration[];
  existingLlmProvider?: LLMProviderView;
  shouldMarkAsDefault?: boolean;
  hideSuccess?: boolean;
  setIsTesting: (testing: boolean) => void;
  mutate: ScopedMutator;
  onClose: () => void;
  setSubmitting: (submitting: boolean) => void;
}

export const filterModelConfigurations = (
  currentModelConfigurations: ModelConfiguration[],
  visibleModels: string[],
  defaultModelName?: string
): ModelConfiguration[] => {
  return currentModelConfigurations
    .map(
      (modelConfiguration): ModelConfiguration => ({
        name: modelConfiguration.name,
        is_visible: visibleModels.includes(modelConfiguration.name),
        max_input_tokens: modelConfiguration.max_input_tokens ?? null,
        supports_image_input: modelConfiguration.supports_image_input,
        supports_reasoning: modelConfiguration.supports_reasoning,
        display_name: modelConfiguration.display_name,
      })
    )
    .filter(
      (modelConfiguration) =>
        modelConfiguration.name === defaultModelName ||
        modelConfiguration.is_visible
    );
};

// Helper to get model configurations for auto mode
// In auto mode, we include ALL models but preserve their visibility status
// Models in the auto config are visible, others are created but not visible
export const getAutoModeModelConfigurations = (
  modelConfigurations: ModelConfiguration[]
): ModelConfiguration[] => {
  return modelConfigurations.map(
    (modelConfiguration): ModelConfiguration => ({
      name: modelConfiguration.name,
      is_visible: modelConfiguration.is_visible,
      max_input_tokens: modelConfiguration.max_input_tokens ?? null,
      supports_image_input: modelConfiguration.supports_image_input,
      supports_reasoning: modelConfiguration.supports_reasoning,
      display_name: modelConfiguration.display_name,
    })
  );
};

export type TestApiKeyResult =
  | { ok: true }
  | { ok: false; errorMessage: string };

export const getModelOptions = (
  fetchedModelConfigurations: Array<{ name: string }>
) => {
  return fetchedModelConfigurations.map((model) => ({
    label: model.name,
    value: model.name,
  }));
};

/** Initial values used by onboarding forms (flat shape, always creating new). */
export const buildOnboardingInitialValues = () => ({
  name: "",
  provider: "",
  api_key: "",
  api_base: "",
  api_version: "",
  default_model_name: "",
  model_configurations: [] as ModelConfiguration[],
  custom_config: {} as Record<string, string>,
  api_key_changed: true,
  groups: [] as number[],
  is_public: true,
  is_auto_mode: false,
  personas: [] as number[],
  selected_model_names: [] as string[],
  deployment_name: "",
  target_uri: "",
});

export interface SubmitOnboardingProviderParams {
  providerName: string;
  payload: Record<string, unknown>;
  onboardingState: OnboardingState;
  onboardingActions: OnboardingActions;
  isCustomProvider: boolean;
  onClose: () => void;
  setIsSubmitting: (submitting: boolean) => void;
}


================================================
FILE: web/src/sections/onboarding/OnboardingFlow.tsx
================================================
"use client";

import { memo } from "react";
import OnboardingHeader from "./components/OnboardingHeader";
import NameStep from "./steps/NameStep";
import LLMStep from "./steps/LLMStep";
import FinalStep from "./steps/FinalStep";
import {
  OnboardingActions,
  OnboardingState,
  OnboardingStep,
} from "@/interfaces/onboarding";
import { WellKnownLLMProviderDescriptor } from "@/interfaces/llm";
import { useUser } from "@/providers/UserProvider";
import { UserRole } from "@/lib/types";
import NonAdminStep from "./components/NonAdminStep";

type OnboardingFlowProps = {
  showOnboarding: boolean;
  handleHideOnboarding: () => void;
  handleFinishOnboarding: () => void;
  state: OnboardingState;
  actions: OnboardingActions;
  llmDescriptors: WellKnownLLMProviderDescriptor[];
};

const OnboardingFlowInner = ({
  showOnboarding,
  handleHideOnboarding,
  handleFinishOnboarding,
  state: onboardingState,
  actions: onboardingActions,
  llmDescriptors,
}: OnboardingFlowProps) => {
  const { user } = useUser();

  if (!user) return null;

  const hasStarted = onboardingState.currentStep !== OnboardingStep.Welcome;

  return user.role === UserRole.ADMIN ? (
    showOnboarding ? (
      <div
        className="flex flex-col items-center justify-center w-full max-w-[var(--app-page-main-content-width)] gap-2 mb-4"
        aria-label="onboarding-flow"
      >
        <OnboardingHeader
          state={onboardingState}
          actions={onboardingActions}
          handleHideOnboarding={handleHideOnboarding}
          handleFinishOnboarding={handleFinishOnboarding}
        />
        {hasStarted && (
          <div className="relative w-full overflow-hidden">
            <div className="flex flex-col gap-2 animate-in slide-in-from-right duration-500 ease-out">
              <NameStep state={onboardingState} actions={onboardingActions} />
              <LLMStep
                state={onboardingState}
                actions={onboardingActions}
                llmDescriptors={llmDescriptors}
                disabled={
                  onboardingState.currentStep !== OnboardingStep.LlmSetup
                }
              />
              <div
                className={
                  "transition-all duration-500 ease-out " +
                  (onboardingState.currentStep === OnboardingStep.Complete
                    ? "opacity-100 translate-x-0"
                    : "opacity-0 translate-x-full")
                }
              >
                {onboardingState.currentStep === OnboardingStep.Complete && (
                  <FinalStep />
                )}
              </div>
            </div>
          </div>
        )}
      </div>
    ) : (
      // When showOnboarding is false, the parent only renders this component
      // if the admin hasn't set their name.
      <NonAdminStep />
    )
  ) : !user.personalization?.name ? (
    <NonAdminStep />
  ) : null;
};

const OnboardingFlow = memo(OnboardingFlowInner);
export default OnboardingFlow;


================================================
FILE: web/src/sections/onboarding/__tests__/onboardingReducer.test.ts
================================================
import { onboardingReducer, initialState } from "../reducer";
import {
  OnboardingActionType,
  OnboardingStep,
  OnboardingState,
} from "@/interfaces/onboarding";

describe("onboardingReducer", () => {
  describe("initial state", () => {
    it("starts at Welcome step with default values", () => {
      expect(initialState).toEqual({
        currentStep: OnboardingStep.Welcome,
        stepIndex: 0,
        totalSteps: 3,
        data: {},
        isButtonActive: true,
        isLoading: false,
      });
    });
  });

  describe("NEXT_STEP", () => {
    it("advances Welcome -> Name", () => {
      const result = onboardingReducer(initialState, {
        type: OnboardingActionType.NEXT_STEP,
      });
      expect(result.currentStep).toBe(OnboardingStep.Name);
      expect(result.stepIndex).toBe(1);
    });

    it("advances Name -> LlmSetup", () => {
      const state: OnboardingState = {
        ...initialState,
        currentStep: OnboardingStep.Name,
        stepIndex: 1,
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.NEXT_STEP,
      });
      expect(result.currentStep).toBe(OnboardingStep.LlmSetup);
      expect(result.stepIndex).toBe(2);
    });

    it("advances LlmSetup -> Complete and sets isButtonActive to true", () => {
      const state: OnboardingState = {
        ...initialState,
        currentStep: OnboardingStep.LlmSetup,
        stepIndex: 2,
        isButtonActive: false,
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.NEXT_STEP,
      });
      expect(result.currentStep).toBe(OnboardingStep.Complete);
      expect(result.stepIndex).toBe(3);
      expect(result.isButtonActive).toBe(true);
    });

    it("is a no-op when already at Complete", () => {
      const state: OnboardingState = {
        ...initialState,
        currentStep: OnboardingStep.Complete,
        stepIndex: 3,
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.NEXT_STEP,
      });
      expect(result).toBe(state);
    });
  });

  describe("PREV_STEP", () => {
    it("goes Complete -> LlmSetup", () => {
      const state: OnboardingState = {
        ...initialState,
        currentStep: OnboardingStep.Complete,
        stepIndex: 3,
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.PREV_STEP,
      });
      expect(result.currentStep).toBe(OnboardingStep.LlmSetup);
      expect(result.stepIndex).toBe(2);
    });

    it("goes LlmSetup -> Name", () => {
      const state: OnboardingState = {
        ...initialState,
        currentStep: OnboardingStep.LlmSetup,
        stepIndex: 2,
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.PREV_STEP,
      });
      expect(result.currentStep).toBe(OnboardingStep.Name);
      expect(result.stepIndex).toBe(1);
    });

    it("is a no-op when already at Welcome", () => {
      const result = onboardingReducer(initialState, {
        type: OnboardingActionType.PREV_STEP,
      });
      expect(result).toBe(initialState);
    });
  });

  describe("GO_TO_STEP", () => {
    it("jumps directly to any step", () => {
      const result = onboardingReducer(initialState, {
        type: OnboardingActionType.GO_TO_STEP,
        step: OnboardingStep.LlmSetup,
      });
      expect(result.currentStep).toBe(OnboardingStep.LlmSetup);
      expect(result.stepIndex).toBe(2);
    });

    it("sets isButtonActive to true when jumping to Complete", () => {
      const state: OnboardingState = {
        ...initialState,
        isButtonActive: false,
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.GO_TO_STEP,
        step: OnboardingStep.Complete,
      });
      expect(result.isButtonActive).toBe(true);
      expect(result.stepIndex).toBe(3);
    });

    it("preserves isButtonActive when jumping to non-Complete step", () => {
      const state: OnboardingState = {
        ...initialState,
        isButtonActive: false,
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.GO_TO_STEP,
        step: OnboardingStep.Name,
      });
      expect(result.isButtonActive).toBe(false);
    });
  });

  describe("UPDATE_DATA", () => {
    it("merges userName into data", () => {
      const result = onboardingReducer(initialState, {
        type: OnboardingActionType.UPDATE_DATA,
        payload: { userName: "Alice" },
      });
      expect(result.data.userName).toBe("Alice");
    });

    it("merges llmProviders into data", () => {
      const result = onboardingReducer(initialState, {
        type: OnboardingActionType.UPDATE_DATA,
        payload: { llmProviders: ["openai", "anthropic"] },
      });
      expect(result.data.llmProviders).toEqual(["openai", "anthropic"]);
    });

    it("preserves existing data fields when merging new ones", () => {
      const state: OnboardingState = {
        ...initialState,
        data: { userName: "Alice" },
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.UPDATE_DATA,
        payload: { llmProviders: ["openai"] },
      });
      expect(result.data.userName).toBe("Alice");
      expect(result.data.llmProviders).toEqual(["openai"]);
    });
  });

  describe("SET_BUTTON_ACTIVE", () => {
    it("sets isButtonActive to false", () => {
      const result = onboardingReducer(initialState, {
        type: OnboardingActionType.SET_BUTTON_ACTIVE,
        isButtonActive: false,
      });
      expect(result.isButtonActive).toBe(false);
    });

    it("sets isButtonActive to true", () => {
      const state: OnboardingState = {
        ...initialState,
        isButtonActive: false,
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.SET_BUTTON_ACTIVE,
        isButtonActive: true,
      });
      expect(result.isButtonActive).toBe(true);
    });
  });

  describe("SET_LOADING", () => {
    it("sets isLoading to true", () => {
      const result = onboardingReducer(initialState, {
        type: OnboardingActionType.SET_LOADING,
        isLoading: true,
      });
      expect(result.isLoading).toBe(true);
    });

    it("sets isLoading to false", () => {
      const state: OnboardingState = {
        ...initialState,
        isLoading: true,
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.SET_LOADING,
        isLoading: false,
      });
      expect(result.isLoading).toBe(false);
    });
  });

  describe("RESET", () => {
    it("returns to initial state", () => {
      const state: OnboardingState = {
        currentStep: OnboardingStep.Complete,
        stepIndex: 3,
        totalSteps: 3,
        data: { userName: "Alice", llmProviders: ["openai"] },
        isButtonActive: false,
        isLoading: true,
        error: "some error",
      };
      const result = onboardingReducer(state, {
        type: OnboardingActionType.RESET,
      });
      expect(result).toEqual(initialState);
    });
  });

  describe("unknown action", () => {
    it("returns state unchanged for unknown action type", () => {
      const result = onboardingReducer(initialState, {
        type: "UNKNOWN_ACTION" as OnboardingActionType,
      } as any);
      expect(result).toBe(initialState);
    });
  });
});


================================================
FILE: web/src/sections/onboarding/components/LLMProviderCard.tsx
================================================
"use client";

import { memo, useCallback, useState } from "react";
import Text from "@/refresh-components/texts/Text";
import Truncated from "@/refresh-components/texts/Truncated";
import IconButton from "@/refresh-components/buttons/IconButton";
import { cn, noProp } from "@/lib/utils";
import { Disabled } from "@opal/core";
import {
  SvgArrowExchange,
  SvgCheckCircle,
  SvgServer,
  SvgSettings,
} from "@opal/icons";
import { ProviderIcon } from "@/app/admin/configuration/llm/ProviderIcon";

export interface LLMProviderCardProps {
  title: string;
  subtitle: string;
  providerName?: string;
  disabled?: boolean;
  isConnected?: boolean;
  onClick: () => void;
}

function LLMProviderCardInner({
  title,
  subtitle,
  providerName,
  disabled,
  isConnected,
  onClick,
}: LLMProviderCardProps) {
  const [isHovered, setIsHovered] = useState(false);

  const handleCardClick = useCallback(() => {
    if (disabled) {
      return;
    }

    if (isConnected) {
      // If connected, redirect to admin page
      window.location.href = "/admin/configuration/llm";
      return;
    }

    // If not connected, call onClick to open the form
    onClick();
  }, [disabled, isConnected, onClick]);

  const handleSettingsClick = useCallback(
    noProp(() => (window.location.href = "/admin/configuration/llm")),
    []
  );

  return (
    <Disabled disabled={disabled} allowClick>
      <div
        role="button"
        tabIndex={0}
        onClick={handleCardClick}
        onKeyDown={(e) => {
          if (!disabled && (e.key === "Enter" || e.key === " ")) {
            e.preventDefault();
            handleCardClick();
          }
        }}
        onMouseEnter={() => setIsHovered(true)}
        onMouseLeave={() => setIsHovered(false)}
        className={cn(
          "flex justify-between h-full w-full p-1 rounded-12 border border-border-01 bg-background-neutral-01 transition-colors text-left",
          !disabled && "hover:bg-background-neutral-02 cursor-pointer"
        )}
      >
        <div className="flex gap-1 p-1 flex-1 min-w-0">
          <div className="flex items-start h-full pt-0.5">
            {providerName ? (
              <ProviderIcon provider={providerName} size={16} className="" />
            ) : (
              <SvgServer className="w-4 h-4 stroke-text-04" />
            )}
          </div>
          <div className="min-w-0 flex flex-col justify-center">
            <Text as="p" text04 mainUiAction>
              {title}
            </Text>
            <Truncated text03 secondaryBody>
              {subtitle}
            </Truncated>
          </div>
        </div>
        {isConnected ? (
          <div className="flex items-start gap-1 p-1">
            {isHovered && (
              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
              <IconButton
                internal
                icon={SvgSettings}
                disabled={disabled}
                onClick={handleSettingsClick}
                className="hover:bg-transparent"
              />
            )}
            <div className="p-1">
              <SvgCheckCircle className="w-4 h-4 stroke-status-success-05" />
            </div>
          </div>
        ) : (
          <div className="flex items-start p-1">
            <div className="flex items-center gap-0.5">
              <Text as="p" text03 secondaryAction>
                Connect
              </Text>
              <div className="p-0.5">
                <SvgArrowExchange className="w-4 h-4 stroke-text-03" />
              </div>
            </div>
          </div>
        )}
      </div>
    </Disabled>
  );
}

const LLMProviderCard = memo(LLMProviderCardInner);
export default LLMProviderCard;


================================================
FILE: web/src/sections/onboarding/components/NonAdminStep.tsx
================================================
"use client";

import React, { useRef, useState, useEffect } from "react";
import Text from "@/refresh-components/texts/Text";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { updateUserPersonalization } from "@/lib/userSettings";
import { useUser } from "@/providers/UserProvider";
import { toast } from "@/hooks/useToast";
import IconButton from "@/refresh-components/buttons/IconButton";
import { Button } from "@opal/components";
import InputAvatar from "@/refresh-components/inputs/InputAvatar";
import { cn } from "@/lib/utils";
import { SvgCheckCircle, SvgEdit, SvgUser, SvgX } from "@opal/icons";
import { ContentAction } from "@opal/layouts";
import { Hoverable } from "@opal/core";

export default function NonAdminStep() {
  const inputRef = useRef<HTMLInputElement>(null);
  const { user, refreshUser } = useUser();
  const [name, setName] = useState("");
  const [showHeader, setShowHeader] = useState(false);
  const [isEditing, setIsEditing] = useState(true);
  const [savedName, setSavedName] = useState("");

  // Initialize name from user if available
  useEffect(() => {
    if (user?.personalization?.name && !savedName) {
      setSavedName(user.personalization.name);
      setIsEditing(false);
    }
  }, [user?.personalization?.name, savedName]);

  const containerClasses = cn(
    "flex items-center justify-between w-full p-3 bg-background-tint-00 rounded-16 border border-border-01 mb-4"
  );

  const handleSave = () => {
    updateUserPersonalization({ name })
      .then(() => {
        setSavedName(name);
        setShowHeader(true);
        setIsEditing(false);
        // Don't call refreshUser() here — it would cause OnboardingFlow to
        // unmount this component (since user.personalization.name becomes set),
        // hiding the confirmation banner before the user sees it.
        // refreshUser() is called in handleDismissConfirmation instead.
      })
      .catch((error) => {
        toast.error("Failed to save name. Please try again.");
        console.error(error);
      });
  };

  const handleDismissConfirmation = () => {
    setShowHeader(false);
    refreshUser();
  };

  return (
    <>
      {showHeader && (
        <div
          className="flex items-center justify-between w-full min-h-11 py-1 pl-3 pr-2 bg-background-tint-00 rounded-16 shadow-01 mb-2"
          aria-label="non-admin-confirmation"
        >
          <ContentAction
            icon={({ className, ...props }) => (
              <SvgCheckCircle
                className={cn(className, "stroke-status-success-05")}
                {...props}
              />
            )}
            title="You're all set!"
            sizePreset="main-ui"
            variant="body"
            prominence="muted"
            paddingVariant="fit"
            rightChildren={
              <Button
                prominence="tertiary"
                size="sm"
                icon={SvgX}
                onClick={handleDismissConfirmation}
              />
            }
          />
        </div>
      )}
      {isEditing ? (
        <div
          className={containerClasses}
          onClick={() => inputRef.current?.focus()}
          role="group"
          aria-label="non-admin-name-prompt"
        >
          <ContentAction
            icon={SvgUser}
            title="What should Onyx call you?"
            description="We will display this name in the app."
            sizePreset="main-ui"
            variant="section"
            paddingVariant="fit"
            rightChildren={
              <div className="flex items-center justify-end gap-2">
                <InputTypeIn
                  ref={inputRef}
                  placeholder="Your name"
                  value={name || ""}
                  onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
                    setName(e.target.value)
                  }
                  onKeyDown={(e) => {
                    if (e.key === "Enter" && name && name.trim().length > 0) {
                      e.preventDefault();
                      handleSave();
                    }
                  }}
                  className="w-[26%] min-w-40"
                />
                <Button disabled={name === ""} onClick={handleSave}>
                  Save
                </Button>
              </div>
            }
          />
        </div>
      ) : (
        <Hoverable.Root group="nonAdminName" widthVariant="full">
          <div
            className={containerClasses}
            aria-label="Edit display name"
            role="button"
            tabIndex={0}
            onClick={() => {
              setIsEditing(true);
              setName(savedName);
            }}
          >
            <div className="flex items-center gap-1">
              <InputAvatar
                className={cn(
                  "flex items-center justify-center bg-background-neutral-inverted-00",
                  "w-5 h-5"
                )}
              >
                <Text as="p" inverted secondaryBody>
                  {savedName?.[0]?.toUpperCase()}
                </Text>
              </InputAvatar>
              <Text as="p" text04 mainUiAction>
                {savedName}
              </Text>
            </div>
            <div className="p-1 flex items-center gap-1">
              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
              <Hoverable.Item group="nonAdminName" variant="opacity-on-hover">
                <IconButton internal icon={SvgEdit} tooltip="Edit" />
              </Hoverable.Item>
              <SvgCheckCircle className="w-4 h-4 stroke-status-success-05" />
            </div>
          </div>
        </Hoverable.Root>
      )}
    </>
  );
}


================================================
FILE: web/src/sections/onboarding/components/OnboardingHeader.tsx
================================================
import React from "react";
import { STEP_CONFIG } from "@/sections/onboarding/constants";
import {
  OnboardingActions,
  OnboardingState,
  OnboardingStep,
} from "@/interfaces/onboarding";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import { SvgProgressCircle, SvgX } from "@opal/icons";
import { Card } from "@/refresh-components/cards";
import { Section } from "@/layouts/general-layouts";
import { ContentAction } from "@opal/layouts";

interface OnboardingHeaderProps {
  state: OnboardingState;
  actions: OnboardingActions;
  handleHideOnboarding: () => void;
  handleFinishOnboarding: () => void;
}
const OnboardingHeader = React.memo(
  ({
    state: onboardingState,
    actions: onboardingActions,
    handleHideOnboarding,
    handleFinishOnboarding,
  }: OnboardingHeaderProps) => {
    const iconPercentage =
      STEP_CONFIG[onboardingState.currentStep].iconPercentage;
    const stepButtonText = STEP_CONFIG[onboardingState.currentStep].buttonText;
    const isWelcomeStep =
      onboardingState.currentStep === OnboardingStep.Welcome;
    const isCompleteStep =
      onboardingState.currentStep === OnboardingStep.Complete;

    function handleButtonClick() {
      if (isCompleteStep) handleFinishOnboarding();
      else onboardingActions.nextStep();
    }

    return (
      <Card padding={0.5} data-label="onboarding-header">
        <ContentAction
          icon={(props) => (
            <SvgProgressCircle value={iconPercentage} {...props} />
          )}
          title={STEP_CONFIG[onboardingState.currentStep].title}
          sizePreset="main-ui"
          variant="body"
          prominence="muted"
          paddingVariant="sm"
          rightChildren={
            stepButtonText ? (
              <Section flexDirection="row">
                {!isWelcomeStep && (
                  <Text as="p" text03 mainUiBody>
                    Step {onboardingState.stepIndex} of{" "}
                    {onboardingState.totalSteps}
                  </Text>
                )}
                <Button
                  disabled={!onboardingState.isButtonActive}
                  onClick={handleButtonClick}
                >
                  {stepButtonText}
                </Button>
              </Section>
            ) : (
              <Button
                prominence="tertiary"
                size="sm"
                icon={SvgX}
                onClick={handleHideOnboarding}
              />
            )
          }
        />
      </Card>
    );
  }
);
OnboardingHeader.displayName = "OnboardingHeader";

export default OnboardingHeader;


================================================
FILE: web/src/sections/onboarding/constants.ts
================================================
import { OnboardingStep, FinalStepItemProps } from "@/interfaces/onboarding";
import { SvgGlobe, SvgImage, SvgUsers } from "@opal/icons";

type StepConfig = {
  index: number;
  title: string;
  buttonText: string;
  iconPercentage: number;
};

export const STEP_CONFIG: Record<OnboardingStep, StepConfig> = {
  [OnboardingStep.Welcome]: {
    index: 0,
    title: "Let's take a moment to get you set up.",
    buttonText: "Let's Go",
    iconPercentage: 10,
  },
  [OnboardingStep.Name]: {
    index: 1,
    title: "Let's take a moment to get you set up.",
    buttonText: "Next",
    iconPercentage: 40,
  },
  [OnboardingStep.LlmSetup]: {
    index: 2,
    title: "Almost there! Connect your models to start chatting.",
    buttonText: "Next",
    iconPercentage: 70,
  },
  [OnboardingStep.Complete]: {
    index: 3,
    title: "You're all set, review the optional settings or click Finish Setup",
    buttonText: "Finish Setup",
    iconPercentage: 100,
  },
} as const;

export const TOTAL_STEPS = 3;

export const STEP_NAVIGATION: Record<
  OnboardingStep,
  { next?: OnboardingStep; prev?: OnboardingStep }
> = {
  [OnboardingStep.Welcome]: { next: OnboardingStep.Name },
  [OnboardingStep.Name]: {
    next: OnboardingStep.LlmSetup,
    prev: OnboardingStep.Welcome,
  },
  [OnboardingStep.LlmSetup]: {
    next: OnboardingStep.Complete,
    prev: OnboardingStep.Name,
  },
  [OnboardingStep.Complete]: { prev: OnboardingStep.LlmSetup },
};

export const FINAL_SETUP_CONFIG: FinalStepItemProps[] = [
  {
    title: "Select web search provider",
    description: "Enable Onyx to search the internet for information.",
    icon: SvgGlobe,
    buttonText: "Web Search",
    buttonHref: "/admin/configuration/web-search",
  },
  {
    title: "Enable image generation",
    description: "Set up models to create images in your chats.",
    icon: SvgImage,
    buttonText: "Image Generation",
    buttonHref: "/admin/configuration/image-generation",
  },
  {
    title: "Invite your team",
    description: "Manage users and permissions for your team",
    icon: SvgUsers,
    buttonText: "Manage Users",
    buttonHref: "/admin/users",
  },
];


================================================
FILE: web/src/sections/onboarding/forms/getOnboardingForm.tsx
================================================
import React from "react";
import {
  WellKnownLLMProviderDescriptor,
  LLMProviderName,
} from "@/interfaces/llm";
import { OnboardingActions, OnboardingState } from "@/interfaces/onboarding";
import OpenAIModal from "@/sections/modals/llmConfig/OpenAIModal";
import AnthropicModal from "@/sections/modals/llmConfig/AnthropicModal";
import OllamaModal from "@/sections/modals/llmConfig/OllamaModal";
import AzureModal from "@/sections/modals/llmConfig/AzureModal";
import BedrockModal from "@/sections/modals/llmConfig/BedrockModal";
import VertexAIModal from "@/sections/modals/llmConfig/VertexAIModal";
import OpenRouterModal from "@/sections/modals/llmConfig/OpenRouterModal";
import CustomModal from "@/sections/modals/llmConfig/CustomModal";
import LMStudioForm from "@/sections/modals/llmConfig/LMStudioForm";
import LiteLLMProxyModal from "@/sections/modals/llmConfig/LiteLLMProxyModal";

// Display info for LLM provider cards - title is the product name, displayName is the company/platform
const PROVIDER_DISPLAY_INFO: Record<
  string,
  { title: string; displayName: string }
> = {
  [LLMProviderName.OPENAI]: { title: "GPT", displayName: "OpenAI" },
  [LLMProviderName.ANTHROPIC]: { title: "Claude", displayName: "Anthropic" },
  [LLMProviderName.OLLAMA_CHAT]: { title: "Ollama", displayName: "Ollama" },
  [LLMProviderName.AZURE]: {
    title: "Azure OpenAI",
    displayName: "Microsoft Azure Cloud",
  },
  [LLMProviderName.BEDROCK]: {
    title: "Amazon Bedrock",
    displayName: "AWS",
  },
  [LLMProviderName.VERTEX_AI]: {
    title: "Gemini",
    displayName: "Google Cloud Vertex AI",
  },
  [LLMProviderName.OPENROUTER]: {
    title: "OpenRouter",
    displayName: "OpenRouter",
  },
  [LLMProviderName.LM_STUDIO]: {
    title: "LM Studio",
    displayName: "LM Studio",
  },
  [LLMProviderName.LITELLM_PROXY]: {
    title: "LiteLLM Proxy",
    displayName: "LiteLLM Proxy",
  },
};

export function getProviderDisplayInfo(providerName: string): {
  title: string;
  displayName: string;
} {
  return (
    PROVIDER_DISPLAY_INFO[providerName] ?? {
      title: providerName,
      displayName: providerName,
    }
  );
}

export interface OnboardingFormProps {
  llmDescriptor?: WellKnownLLMProviderDescriptor;
  isCustomProvider?: boolean;
  onboardingState: OnboardingState;
  onboardingActions: OnboardingActions;
  open: boolean;
  onOpenChange: (open: boolean) => void;
}

export function getOnboardingForm({
  llmDescriptor,
  isCustomProvider,
  onboardingState,
  onboardingActions,
  open,
  onOpenChange,
}: OnboardingFormProps): React.ReactNode {
  const sharedProps = {
    variant: "onboarding" as const,
    onboardingState,
    onboardingActions,
    open,
    onOpenChange,
  };

  // Handle custom provider
  if (isCustomProvider || !llmDescriptor) {
    return <CustomModal {...sharedProps} />;
  }

  const providerProps = {
    ...sharedProps,
    llmDescriptor,
  };

  switch (llmDescriptor.name) {
    case LLMProviderName.OPENAI:
      return <OpenAIModal {...providerProps} />;

    case LLMProviderName.ANTHROPIC:
      return <AnthropicModal {...providerProps} />;

    case LLMProviderName.OLLAMA_CHAT:
      return <OllamaModal {...providerProps} />;

    case LLMProviderName.AZURE:
      return <AzureModal {...providerProps} />;

    case LLMProviderName.BEDROCK:
      return <BedrockModal {...providerProps} />;

    case LLMProviderName.VERTEX_AI:
      return <VertexAIModal {...providerProps} />;

    case LLMProviderName.OPENROUTER:
      return <OpenRouterModal {...providerProps} />;

    case LLMProviderName.LM_STUDIO:
      return <LMStudioForm {...providerProps} />;

    case LLMProviderName.LITELLM_PROXY:
      return <LiteLLMProxyModal {...providerProps} />;

    default:
      return <CustomModal {...sharedProps} />;
  }
}


================================================
FILE: web/src/sections/onboarding/reducer.ts
================================================
import {
  OnboardingState,
  OnboardingAction,
  OnboardingActionType,
  OnboardingStep,
} from "@/interfaces/onboarding";
import { STEP_NAVIGATION, STEP_CONFIG, TOTAL_STEPS } from "./constants";

export const initialState: OnboardingState = {
  currentStep: OnboardingStep.Welcome,
  stepIndex: 0,
  totalSteps: TOTAL_STEPS,
  data: {},
  isButtonActive: true,
  isLoading: false,
};

export function onboardingReducer(
  state: OnboardingState,
  action: OnboardingAction
): OnboardingState {
  switch (action.type) {
    case OnboardingActionType.NEXT_STEP: {
      const nextStep = STEP_NAVIGATION[state.currentStep].next;
      if (!nextStep) return state;
      return {
        ...state,
        currentStep: nextStep,
        stepIndex: STEP_CONFIG[nextStep].index,
        isButtonActive:
          nextStep === OnboardingStep.Complete ? true : state.isButtonActive,
        error: undefined,
      };
    }

    case OnboardingActionType.PREV_STEP: {
      const prevStep = STEP_NAVIGATION[state.currentStep].prev;
      if (!prevStep) return state;

      return {
        ...state,
        currentStep: prevStep,
        stepIndex: STEP_CONFIG[prevStep].index,
        error: undefined,
      };
    }

    case OnboardingActionType.GO_TO_STEP:
      return {
        ...state,
        currentStep: action.step,
        stepIndex: STEP_CONFIG[action.step].index,
        isButtonActive:
          action.step === OnboardingStep.Complete ? true : state.isButtonActive,
        error: undefined,
      };

    case OnboardingActionType.UPDATE_DATA:
      return {
        ...state,
        data: { ...state.data, ...action.payload },
      };

    case OnboardingActionType.SET_BUTTON_ACTIVE:
      return {
        ...state,
        isButtonActive: action.isButtonActive,
      };

    case OnboardingActionType.SET_LOADING:
      return {
        ...state,
        isLoading: action.isLoading,
      };

    case OnboardingActionType.SET_ERROR:
      return {
        ...state,
        error: action.error,
      };

    case OnboardingActionType.RESET:
      return initialState;

    default:
      return state;
  }
}


================================================
FILE: web/src/sections/onboarding/steps/FinalStep.tsx
================================================
import React from "react";
import Link from "next/link";
import type { Route } from "next";
import { Button } from "@opal/components";
import { FINAL_SETUP_CONFIG } from "@/sections/onboarding/constants";
import { FinalStepItemProps } from "@/interfaces/onboarding";
import { SvgExternalLink } from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import { ContentAction } from "@opal/layouts";
import { Card } from "@/refresh-components/cards";

const FinalStepItem = React.memo(
  ({
    title,
    description,
    icon: Icon,
    buttonText,
    buttonHref,
  }: FinalStepItemProps) => {
    const isExternalLink = buttonHref.startsWith("http");
    const linkProps = isExternalLink
      ? { target: "_blank", rel: "noopener noreferrer" }
      : {};

    return (
      <Card padding={0.25} variant="secondary">
        <ContentAction
          icon={Icon}
          title={title}
          description={description}
          sizePreset="main-ui"
          variant="section"
          paddingVariant="sm"
          rightChildren={
            <Link href={buttonHref as Route} {...linkProps}>
              <Button prominence="tertiary" rightIcon={SvgExternalLink}>
                {buttonText}
              </Button>
            </Link>
          }
        />
      </Card>
    );
  }
);
FinalStepItem.displayName = "FinalStepItem";

export default function FinalStep() {
  return (
    <Section gap={0.5}>
      {FINAL_SETUP_CONFIG.map((item) => (
        <FinalStepItem key={item.title} {...item} />
      ))}
    </Section>
  );
}


================================================
FILE: web/src/sections/onboarding/steps/LLMStep.tsx
================================================
"use client";

import { memo, useState, useCallback } from "react";
import Text from "@/refresh-components/texts/Text";
import { Button } from "@opal/components";
import Separator from "@/refresh-components/Separator";
import LLMProviderCard from "../components/LLMProviderCard";
import {
  OnboardingActions,
  OnboardingState,
  OnboardingStep,
} from "@/interfaces/onboarding";
import { WellKnownLLMProviderDescriptor } from "@/interfaces/llm";
import {
  getOnboardingForm,
  getProviderDisplayInfo,
} from "../forms/getOnboardingForm";
import { Disabled } from "@opal/core";
import { ProviderIcon } from "@/app/admin/configuration/llm/ProviderIcon";
import { SvgCheckCircle, SvgCpu, SvgExternalLink } from "@opal/icons";
import { ContentAction } from "@opal/layouts";

type LLMStepProps = {
  state: OnboardingState;
  actions: OnboardingActions;
  llmDescriptors: WellKnownLLMProviderDescriptor[];
  disabled?: boolean;
};

interface SelectedProvider {
  llmDescriptor?: WellKnownLLMProviderDescriptor;
  isCustomProvider: boolean;
}

const LLMProviderSkeleton = () => {
  return (
    <div className="flex justify-between h-full w-full p-1 rounded-12 border border-border-01 bg-background-neutral-01 animate-pulse">
      <div className="flex gap-1 p-1 flex-1 min-w-0">
        <div className="h-full p-0.5">
          <div className="w-4 h-4 rounded-full bg-neutral-200" />
        </div>
        <div className="min-w-0 flex-1">
          <div className="h-3 w-1/2 bg-neutral-200 rounded" />
          <div className="mt-2 h-2 w-3/4 bg-neutral-200 rounded" />
        </div>
      </div>
      <div className="h-6 w-16 bg-neutral-200 rounded" />
    </div>
  );
};

type StackedProviderIconsProps = {
  providers: string[];
};

const StackedProviderIcons = ({ providers }: StackedProviderIconsProps) => {
  if (!providers || providers.length === 0) {
    return null;
  }

  return (
    <div className="flex items-center">
      {providers.slice(0, 3).map((provider, index) => (
        <div
          key={provider}
          className="relative flex items-center justify-center w-6 h-6 rounded-04 bg-background-neutral-01 border border-border-01"
          style={{
            marginLeft: index > 0 ? "-8px" : "0",
            zIndex: providers.length - index,
          }}
        >
          <ProviderIcon provider={provider} size={16} />
        </div>
      ))}
      {providers.length > 3 && (
        <div
          className="relative flex items-center justify-center w-6 h-6 rounded-04 bg-background-neutral-01 border border-border-01"
          style={{
            marginLeft: "-8px",
            zIndex: 0,
          }}
        >
          <Text as="p" text03 secondaryBody>
            +{providers.length - 3}
          </Text>
        </div>
      )}
    </div>
  );
};

const LLMStepInner = ({
  state: onboardingState,
  actions: onboardingActions,
  llmDescriptors,
  disabled,
}: LLMStepProps) => {
  const isLoading = !llmDescriptors || llmDescriptors.length === 0;

  const [selectedProvider, setSelectedProvider] =
    useState<SelectedProvider | null>(null);
  const [isModalOpen, setIsModalOpen] = useState(false);

  const handleProviderClick = useCallback(
    (
      llmDescriptor?: WellKnownLLMProviderDescriptor,
      isCustomProvider: boolean = false
    ) => {
      setSelectedProvider({ llmDescriptor, isCustomProvider });
      setIsModalOpen(true);
    },
    []
  );

  const handleModalClose = useCallback((open: boolean) => {
    setIsModalOpen(open);
    if (!open) {
      setSelectedProvider(null);
    }
  }, []);

  if (
    onboardingState.currentStep === OnboardingStep.LlmSetup ||
    onboardingState.currentStep === OnboardingStep.Name
  ) {
    return (
      <Disabled disabled={disabled} allowClick>
        <div
          className="flex flex-col items-center justify-between w-full p-1 rounded-16 border border-border-01 bg-background-tint-00"
          aria-label="onboarding-llm-step"
        >
          <ContentAction
            icon={SvgCpu}
            title="Connect your LLM models"
            description="Onyx supports both self-hosted models and popular providers."
            sizePreset="main-ui"
            variant="section"
            paddingVariant="lg"
            rightChildren={
              <Button
                disabled={disabled}
                prominence="tertiary"
                rightIcon={SvgExternalLink}
                href="/admin/configuration/llm"
              >
                View in Admin Panel
              </Button>
            }
          />
          <Separator />
          <div className="flex flex-wrap gap-1 [&>*:last-child:nth-child(odd)]:basis-full">
            {isLoading ? (
              Array.from({ length: 8 }).map((_, idx) => (
                <div
                  key={idx}
                  className="basis-[calc(50%-theme(spacing.1)/2)] grow"
                >
                  <LLMProviderSkeleton />
                </div>
              ))
            ) : (
              <>
                {/* Render the selected provider form */}
                {selectedProvider &&
                  getOnboardingForm({
                    llmDescriptor: selectedProvider.llmDescriptor,
                    isCustomProvider: selectedProvider.isCustomProvider,
                    onboardingState,
                    onboardingActions,
                    open: isModalOpen,
                    onOpenChange: handleModalClose,
                  })}

                {/* Render provider cards */}
                {llmDescriptors.map((llmDescriptor) => {
                  const displayInfo = getProviderDisplayInfo(
                    llmDescriptor.name
                  );
                  return (
                    <div
                      key={llmDescriptor.name}
                      className="basis-[calc(50%-theme(spacing.1)/2)] grow"
                    >
                      <LLMProviderCard
                        title={displayInfo.title}
                        subtitle={displayInfo.displayName}
                        providerName={llmDescriptor.name}
                        disabled={disabled}
                        isConnected={onboardingState.data.llmProviders?.some(
                          (provider) => provider === llmDescriptor.name
                        )}
                        onClick={() =>
                          handleProviderClick(llmDescriptor, false)
                        }
                      />
                    </div>
                  );
                })}

                {/* Custom provider card */}
                <div className="basis-[calc(50%-theme(spacing.1)/2)] grow">
                  <LLMProviderCard
                    title="Custom LLM Provider"
                    subtitle="LiteLLM Compatible APIs"
                    disabled={disabled}
                    isConnected={onboardingState.data.llmProviders?.some(
                      (provider) => provider === "custom"
                    )}
                    onClick={() => handleProviderClick(undefined, true)}
                  />
                </div>
              </>
            )}
          </div>
        </div>
      </Disabled>
    );
  } else {
    return (
      <button
        type="button"
        className="flex items-center justify-between w-full p-3 bg-background-tint-00 rounded-16 border border-border-01 opacity-50"
        onClick={() => {
          onboardingActions.setButtonActive(true);
          onboardingActions.goToStep(OnboardingStep.LlmSetup);
        }}
        aria-label="Edit LLM providers"
      >
        <div className="flex items-center gap-1">
          <StackedProviderIcons
            providers={onboardingState.data.llmProviders || []}
          />
          <Text as="p" text04 mainUiAction>
            {onboardingState.data.llmProviders?.length || 0}{" "}
            {(onboardingState.data.llmProviders?.length || 0) === 1
              ? "model"
              : "models"}{" "}
            connected
          </Text>
        </div>
        <div className="p-1">
          <SvgCheckCircle className="w-4 h-4 stroke-status-success-05" />
        </div>
      </button>
    );
  }
};

const LLMStep = memo(LLMStepInner);
export default LLMStep;


================================================
FILE: web/src/sections/onboarding/steps/NameStep.tsx
================================================
"use client";

import React, { useRef } from "react";
import Text from "@/refresh-components/texts/Text";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import {
  OnboardingState,
  OnboardingActions,
  OnboardingStep,
} from "@/interfaces/onboarding";
import InputAvatar from "@/refresh-components/inputs/InputAvatar";
import { cn } from "@/lib/utils";
import IconButton from "@/refresh-components/buttons/IconButton";
import { SvgCheckCircle, SvgEdit, SvgUser } from "@opal/icons";
import { ContentAction } from "@opal/layouts";
import { Hoverable } from "@opal/core";

export interface NameStepProps {
  state: OnboardingState;
  actions: OnboardingActions;
}

const NameStep = React.memo(
  ({ state: onboardingState, actions: onboardingActions }: NameStepProps) => {
    const { userName } = onboardingState.data;
    const { updateName, goToStep, setButtonActive, nextStep } =
      onboardingActions;

    const isActive = onboardingState.currentStep === OnboardingStep.Name;
    const containerClasses = cn(
      "flex items-center justify-between w-full p-3 bg-background-tint-00 rounded-16 border border-border-01"
    );

    const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
      if (e.key === "Enter" && userName && userName.trim().length > 0) {
        e.preventDefault();
        nextStep();
      }
    };

    const inputRef = useRef<HTMLInputElement>(null);
    return isActive ? (
      <div
        className={containerClasses}
        onClick={() => inputRef.current?.focus()}
        role="group"
        aria-label="onboarding-name-step"
      >
        <ContentAction
          icon={SvgUser}
          title="What should Onyx call you?"
          description="We will display this name in the app."
          sizePreset="main-ui"
          variant="section"
          paddingVariant="fit"
          rightChildren={
            <InputTypeIn
              ref={inputRef}
              placeholder="Your name"
              value={userName || ""}
              onChange={(e) => updateName(e.target.value)}
              onKeyDown={handleKeyDown}
              className="max-w-60"
            />
          }
        />
      </div>
    ) : (
      <Hoverable.Root group="nameStep" widthVariant="full">
        <div
          className={containerClasses}
          onClick={() => {
            setButtonActive(true);
            goToStep(OnboardingStep.Name);
          }}
          aria-label="Edit display name"
          role="button"
          tabIndex={0}
        >
          <div
            className={cn("flex items-center gap-1", !isActive && "opacity-50")}
          >
            <InputAvatar
              className={cn(
                "flex items-center justify-center bg-background-neutral-inverted-00",
                "w-5 h-5"
              )}
            >
              <Text as="p" inverted secondaryBody>
                {userName?.[0]?.toUpperCase()}
              </Text>
            </InputAvatar>
            <Text as="p" text04 mainUiAction>
              {userName}
            </Text>
          </div>
          <div className="p-1 flex items-center gap-1">
            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
            <Hoverable.Item group="nameStep" variant="opacity-on-hover">
              <IconButton internal icon={SvgEdit} tooltip="Edit" />
            </Hoverable.Item>
            <SvgCheckCircle
              className={cn(
                "w-4 h-4 stroke-status-success-05",
                !isActive && "opacity-50"
              )}
            />
          </div>
        </div>
      </Hoverable.Root>
    );
  }
);
NameStep.displayName = "NameStep";

export default NameStep;


================================================
FILE: web/src/sections/settings/Memories.tsx
================================================
"use client";

import { useState } from "react";
import FileTile from "@/refresh-components/tiles/FileTile";
import ButtonTile from "@/refresh-components/tiles/ButtonTile";
import { SvgAddLines, SvgFilter, SvgMenu, SvgPlusCircle } from "@opal/icons";
import MemoriesModal from "@/refresh-components/modals/MemoriesModal";
import LineItem from "@/refresh-components/buttons/LineItem";
import { Button } from "@opal/components";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import { MemoryItem } from "@/lib/types";

interface MemoriesProps {
  memories: MemoryItem[];
  onSaveMemories: (memories: MemoryItem[]) => Promise<boolean>;
}

export default function Memories({ memories, onSaveMemories }: MemoriesProps) {
  const memoriesModal = useCreateModal();
  const [targetMemoryId, setTargetMemoryId] = useState<number | null>(null);

  return (
    <>
      {memories.length === 0 ? (
        <LineItem
          skeleton
          description="Add personal note or memory that Onyx should remember."
          onClick={() => {
            setTargetMemoryId(null);
            memoriesModal.toggle(true);
          }}
          rightChildren={
            <Button
              prominence="internal"
              icon={SvgPlusCircle}
              onClick={() => {
                setTargetMemoryId(null);
                memoriesModal.toggle(true);
              }}
            />
          }
        />
      ) : (
        <div className="self-stretch flex flex-row items-center justify-between gap-2">
          <div className="flex flex-row items-center gap-2">
            {memories.slice(0, 2).map((memory, index) => (
              <FileTile
                key={memory.id ?? index}
                description={memory.content}
                onOpen={() => {
                  setTargetMemoryId(memory.id);
                  memoriesModal.toggle(true);
                }}
              />
            ))}
          </div>
          <ButtonTile
            title="View/Add"
            description="All Memories"
            icon={SvgAddLines}
            onClick={() => {
              setTargetMemoryId(null);
              memoriesModal.toggle(true);
            }}
          />
        </div>
      )}

      <memoriesModal.Provider>
        <MemoriesModal
          memories={memories}
          onSaveMemories={onSaveMemories}
          initialTargetMemoryId={targetMemoryId}
          focusNewLine={targetMemoryId === null}
        />
      </memoriesModal.Provider>
    </>
  );
}


================================================
FILE: web/src/sections/sidebar/AdminSidebar.tsx
================================================
"use client";

import { useCallback } from "react";
import { usePathname } from "next/navigation";
import { useSettingsContext } from "@/providers/SettingsProvider";
import SidebarSection from "@/sections/sidebar/SidebarSection";
import SidebarWrapper from "@/sections/sidebar/SidebarWrapper";
import { useIsKGExposed } from "@/app/admin/kg/utils";
import { useCustomAnalyticsEnabled } from "@/lib/hooks/useCustomAnalyticsEnabled";
import { useUser } from "@/providers/UserProvider";
import { UserRole } from "@/lib/types";
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
import { CombinedSettings } from "@/interfaces/settings";
import { SidebarTab } from "@opal/components";
import SidebarBody from "@/sections/sidebar/SidebarBody";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { Disabled } from "@opal/core";
import { SvgArrowUpCircle, SvgUserManage, SvgX } from "@opal/icons";
import {
  useBillingInformation,
  useLicense,
  hasActiveSubscription,
} from "@/lib/billing";
import { Content } from "@opal/layouts";
import { ADMIN_ROUTES, sidebarItem } from "@/lib/admin-routes";
import useFilter from "@/hooks/useFilter";
import { IconFunctionComponent } from "@opal/types";
import { Section } from "@/layouts/general-layouts";
import Text from "@/refresh-components/texts/Text";
import { getUserDisplayName } from "@/lib/user";
import { APP_SLOGAN } from "@/lib/constants";

const SECTIONS = {
  UNLABELED: "",
  AGENTS_AND_ACTIONS: "Agents & Actions",
  DOCUMENTS_AND_KNOWLEDGE: "Documents & Knowledge",
  INTEGRATIONS: "Integrations",
  PERMISSIONS: "Permissions",
  ORGANIZATION: "Organization",
  USAGE: "Usage",
} as const;

interface SidebarItemEntry {
  section: string;
  name: string;
  icon: IconFunctionComponent;
  link: string;
  error?: boolean;
  disabled?: boolean;
}

function buildItems(
  isCurator: boolean,
  enableCloud: boolean,
  enableEnterprise: boolean,
  settings: CombinedSettings | null,
  kgExposed: boolean,
  customAnalyticsEnabled: boolean,
  hasSubscription: boolean,
  hooksEnabled: boolean
): SidebarItemEntry[] {
  const vectorDbEnabled = settings?.settings.vector_db_enabled !== false;
  const items: SidebarItemEntry[] = [];

  const add = (section: string, route: Parameters<typeof sidebarItem>[0]) => {
    items.push({ ...sidebarItem(route), section });
  };

  const addDisabled = (
    section: string,
    route: Parameters<typeof sidebarItem>[0],
    isDisabled: boolean
  ) => {
    items.push({ ...sidebarItem(route), section, disabled: isDisabled });
  };

  // 1. No header — core configuration (admin only)
  if (!isCurator) {
    add(SECTIONS.UNLABELED, ADMIN_ROUTES.LLM_MODELS);
    add(SECTIONS.UNLABELED, ADMIN_ROUTES.WEB_SEARCH);
    add(SECTIONS.UNLABELED, ADMIN_ROUTES.IMAGE_GENERATION);
    add(SECTIONS.UNLABELED, ADMIN_ROUTES.VOICE);
    add(SECTIONS.UNLABELED, ADMIN_ROUTES.CODE_INTERPRETER);
    add(SECTIONS.UNLABELED, ADMIN_ROUTES.CHAT_PREFERENCES);

    if (vectorDbEnabled && kgExposed) {
      add(SECTIONS.UNLABELED, ADMIN_ROUTES.KNOWLEDGE_GRAPH);
    }

    if (!enableCloud && customAnalyticsEnabled) {
      addDisabled(
        SECTIONS.UNLABELED,
        ADMIN_ROUTES.CUSTOM_ANALYTICS,
        !enableEnterprise
      );
    }
  }

  // 2. Agents & Actions
  add(SECTIONS.AGENTS_AND_ACTIONS, ADMIN_ROUTES.AGENTS);
  add(SECTIONS.AGENTS_AND_ACTIONS, ADMIN_ROUTES.MCP_ACTIONS);
  add(SECTIONS.AGENTS_AND_ACTIONS, ADMIN_ROUTES.OPENAPI_ACTIONS);

  // 3. Documents & Knowledge
  if (vectorDbEnabled) {
    add(SECTIONS.DOCUMENTS_AND_KNOWLEDGE, ADMIN_ROUTES.INDEXING_STATUS);
    add(SECTIONS.DOCUMENTS_AND_KNOWLEDGE, ADMIN_ROUTES.ADD_CONNECTOR);
    add(SECTIONS.DOCUMENTS_AND_KNOWLEDGE, ADMIN_ROUTES.DOCUMENT_SETS);
    if (!isCurator && !enableCloud) {
      items.push({
        ...sidebarItem(ADMIN_ROUTES.INDEX_SETTINGS),
        section: SECTIONS.DOCUMENTS_AND_KNOWLEDGE,
        error: settings?.settings.needs_reindexing,
      });
    }
    if (!isCurator && settings?.settings.opensearch_indexing_enabled) {
      add(SECTIONS.DOCUMENTS_AND_KNOWLEDGE, ADMIN_ROUTES.INDEX_MIGRATION);
    }
  }

  // 4. Integrations (admin only)
  if (!isCurator) {
    add(SECTIONS.INTEGRATIONS, ADMIN_ROUTES.API_KEYS);
    add(SECTIONS.INTEGRATIONS, ADMIN_ROUTES.SLACK_BOTS);
    add(SECTIONS.INTEGRATIONS, ADMIN_ROUTES.DISCORD_BOTS);
    if (hooksEnabled) {
      add(SECTIONS.INTEGRATIONS, ADMIN_ROUTES.HOOKS);
    }
  }

  // 5. Permissions
  if (!isCurator) {
    add(SECTIONS.PERMISSIONS, ADMIN_ROUTES.USERS);
    addDisabled(SECTIONS.PERMISSIONS, ADMIN_ROUTES.GROUPS, !enableEnterprise);
    addDisabled(SECTIONS.PERMISSIONS, ADMIN_ROUTES.SCIM, !enableEnterprise);
  } else if (enableEnterprise) {
    add(SECTIONS.PERMISSIONS, ADMIN_ROUTES.GROUPS);
  }

  // 6. Organization (admin only)
  if (!isCurator) {
    if (hasSubscription) {
      add(SECTIONS.ORGANIZATION, ADMIN_ROUTES.BILLING);
    } else {
      items.push({
        section: SECTIONS.ORGANIZATION,
        name: "Upgrade Plan",
        icon: SvgArrowUpCircle,
        link: ADMIN_ROUTES.BILLING.path,
      });
    }
    add(SECTIONS.ORGANIZATION, ADMIN_ROUTES.TOKEN_RATE_LIMITS);
    addDisabled(SECTIONS.ORGANIZATION, ADMIN_ROUTES.THEME, !enableEnterprise);
  }

  // 7. Usage (admin only)
  if (!isCurator) {
    addDisabled(SECTIONS.USAGE, ADMIN_ROUTES.USAGE, !enableEnterprise);
    if (settings?.settings.query_history_type !== "disabled") {
      addDisabled(
        SECTIONS.USAGE,
        ADMIN_ROUTES.QUERY_HISTORY,
        !enableEnterprise
      );
    }
  }

  return items;
}

/** Preserve section ordering while grouping consecutive items by section. */
function groupBySection(items: SidebarItemEntry[]) {
  const groups: { section: string; items: SidebarItemEntry[] }[] = [];
  for (const item of items) {
    const last = groups[groups.length - 1];
    if (last && last.section === item.section) {
      last.items.push(item);
    } else {
      groups.push({ section: item.section, items: [item] });
    }
  }
  return groups;
}

interface AdminSidebarProps {
  enableCloudSS: boolean;
}

export default function AdminSidebar({ enableCloudSS }: AdminSidebarProps) {
  const { kgExposed } = useIsKGExposed();
  const pathname = usePathname();
  const { customAnalyticsEnabled } = useCustomAnalyticsEnabled();
  const { user } = useUser();
  const settings = useSettingsContext();
  const enableEnterprise = usePaidEnterpriseFeaturesEnabled();
  const { data: billingData, isLoading: billingLoading } =
    useBillingInformation();
  const { data: licenseData, isLoading: licenseLoading } = useLicense();
  const isCurator =
    user?.role === UserRole.CURATOR || user?.role === UserRole.GLOBAL_CURATOR;
  // Default to true while loading to avoid flashing "Upgrade Plan"
  const hasSubscriptionOrLicense =
    billingLoading || licenseLoading
      ? true
      : Boolean(
          (billingData && hasActiveSubscription(billingData)) ||
            licenseData?.has_license
        );
  const hooksEnabled =
    enableEnterprise && (settings?.settings.hooks_enabled ?? false);

  const allItems = buildItems(
    isCurator,
    enableCloudSS,
    enableEnterprise,
    settings,
    kgExposed,
    customAnalyticsEnabled,
    hasSubscriptionOrLicense,
    hooksEnabled
  );

  const itemExtractor = useCallback((item: SidebarItemEntry) => item.name, []);

  const { query, setQuery, filtered } = useFilter(allItems, itemExtractor);

  const groups = groupBySection(filtered);

  return (
    <SidebarWrapper>
      <SidebarBody
        scrollKey="admin-sidebar"
        pinnedContent={
          <div className="flex flex-col w-full">
            <SidebarTab
              icon={({ className }) => <SvgX className={className} size={16} />}
              href="/app"
              variant="sidebar-light"
            >
              Exit Admin Panel
            </SidebarTab>
            <InputTypeIn
              variant="internal"
              leftSearchIcon
              placeholder="Search..."
              value={query}
              onChange={(e) => setQuery(e.target.value)}
            />
          </div>
        }
        footer={
          <Section gap={0} height="fit" alignItems="start">
            <div className="p-[0.38rem] w-full">
              <Content
                icon={SvgUserManage}
                title={getUserDisplayName(user)}
                sizePreset="main-ui"
                variant="body"
                prominence="muted"
                widthVariant="full"
              />
            </div>
            <div className="flex flex-row gap-1 p-[0.38rem] w-full">
              <Text text03 secondaryAction>
                <a
                  className="underline"
                  href="https://onyx.app"
                  target="_blank"
                >
                  Onyx
                </a>
              </Text>
              <Text text03 secondaryBody>
                |
              </Text>
              {settings.webVersion ? (
                <Text text03 secondaryBody>
                  {settings.webVersion}
                </Text>
              ) : (
                <Text text03 secondaryBody>
                  {APP_SLOGAN}
                </Text>
              )}
            </div>
          </Section>
        }
      >
        {groups.map((group, groupIndex) => {
          const tabs = group.items.map(({ link, icon, name, disabled }) => (
            <Disabled key={link} disabled={disabled}>
              {/*
                # NOTE (@raunakab)
                We intentionally add a `div` intermediary here.
                Without it, the disabled styling that is default provided by the `Disabled` component (which we want here) would be overridden by the custom disabled styling provided by the `SidebarTab`.
                Therefore, in order to avoid that overriding, we add a layer of indirection.
              */}
              <div>
                <SidebarTab
                  disabled={disabled}
                  icon={icon}
                  href={disabled ? undefined : link}
                  selected={pathname.startsWith(link)}
                >
                  {name}
                </SidebarTab>
              </div>
            </Disabled>
          ));

          if (!group.section) {
            return <div key={groupIndex}>{tabs}</div>;
          }

          return (
            <SidebarSection key={groupIndex} title={group.section}>
              {tabs}
            </SidebarSection>
          );
        })}
      </SidebarBody>
    </SidebarWrapper>
  );
}


================================================
FILE: web/src/sections/sidebar/AgentButton.tsx
================================================
"use client";

import React, { memo } from "react";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import { usePinnedAgents, useCurrentAgent } from "@/hooks/useAgents";
import { cn, noProp } from "@/lib/utils";
import { SidebarTab } from "@opal/components";
import IconButton from "@/refresh-components/buttons/IconButton";
import { useSortable } from "@dnd-kit/sortable";
import { CSS } from "@dnd-kit/utilities";
import useOnMount from "@/hooks/useOnMount";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
import { SvgPin, SvgX } from "@opal/icons";

interface SortableItemProps {
  id: number;
  children?: React.ReactNode;
}

function SortableItem({ id, children }: SortableItemProps) {
  const isMounted = useOnMount();
  const { attributes, listeners, setNodeRef, transform, isDragging } =
    useSortable({ id });

  if (!isMounted) {
    return <div className="flex items-center group">{children}</div>;
  }

  return (
    <div
      ref={setNodeRef}
      style={{
        transform: CSS.Transform.toString(transform),
        ...(isDragging && { zIndex: 1000, position: "relative" as const }),
      }}
      {...attributes}
      {...listeners}
      className="flex items-center group"
    >
      {children}
    </div>
  );
}

export interface AgentButtonProps {
  agent: MinimalPersonaSnapshot;
}

const AgentButton = memo(({ agent }: AgentButtonProps) => {
  const currentAgent = useCurrentAgent();
  const { pinnedAgents, togglePinnedAgent } = usePinnedAgents();
  const isActuallyPinned = pinnedAgents.some((a) => a.id === agent.id);
  const isCurrentAgent = currentAgent?.id === agent.id;

  const handleClick = async () => {
    if (!isActuallyPinned) {
      await togglePinnedAgent(agent, true);
    }
  };

  return (
    <SortableItem id={agent.id}>
      <div className="flex flex-col w-full h-full">
        <SidebarTab
          key={agent.id}
          icon={() => <AgentAvatar agent={agent} />}
          href={`/app?agentId=${agent.id}`}
          onClick={handleClick}
          selected={isCurrentAgent}
          rightChildren={
            // Hide unpin button for current agent since auto-pin would immediately re-pin
            isCurrentAgent ? null : (
              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
              <IconButton
                icon={
                  SvgX /* We only show the unpin button for pinned agents */
                }
                internal
                onClick={noProp(() => togglePinnedAgent(agent, false))}
                className={cn("hidden group-hover/SidebarTab:flex")}
                tooltip={"Unpin Agent"}
              />
            )
          }
        >
          {agent.name}
        </SidebarTab>
      </div>
    </SortableItem>
  );
});
AgentButton.displayName = "AgentButton";

export default AgentButton;


================================================
FILE: web/src/sections/sidebar/AppSidebar.tsx
================================================
"use client";

import { useCallback, memo, useMemo, useState, useEffect, useRef } from "react";
import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { useRouter } from "next/navigation";
import { useSettingsContext } from "@/providers/SettingsProvider";
import { MinimalPersonaSnapshot } from "@/app/admin/agents/interfaces";
import Text from "@/refresh-components/texts/Text";
import ChatButton from "@/sections/sidebar/ChatButton";
import AgentButton from "@/sections/sidebar/AgentButton";
import { DragEndEvent } from "@dnd-kit/core";
import {
  DndContext,
  closestCenter,
  KeyboardSensor,
  PointerSensor,
  useSensor,
  useSensors,
  pointerWithin,
} from "@dnd-kit/core";
import {
  arrayMove,
  SortableContext,
  sortableKeyboardCoordinates,
  verticalListSortingStrategy,
} from "@dnd-kit/sortable";
import { useDroppable } from "@dnd-kit/core";
import {
  restrictToFirstScrollableAncestor,
  restrictToVerticalAxis,
} from "@dnd-kit/modifiers";
import SidebarSection from "@/sections/sidebar/SidebarSection";
import useChatSessions from "@/hooks/useChatSessions";
import { useProjects } from "@/lib/hooks/useProjects";
import { useAgents, useCurrentAgent, usePinnedAgents } from "@/hooks/useAgents";
import { useAppSidebarContext } from "@/providers/AppSidebarProvider";
import ProjectFolderButton from "@/sections/sidebar/ProjectFolderButton";
import CreateProjectModal from "@/components/modals/CreateProjectModal";
import MoveCustomAgentChatModal from "@/components/modals/MoveCustomAgentChatModal";
import { useProjectsContext } from "@/providers/ProjectsContext";
import { removeChatSessionFromProject } from "@/app/app/projects/projectsService";
import type { Project } from "@/app/app/projects/projectsService";
import SidebarWrapper from "@/sections/sidebar/SidebarWrapper";
import { Button as OpalButton } from "@opal/components";
import { cn } from "@/lib/utils";
import {
  DRAG_TYPES,
  DEFAULT_PERSONA_ID,
  FEATURE_FLAGS,
  LOCAL_STORAGE_KEYS,
} from "@/sections/sidebar/constants";
import { showErrorNotification, handleMoveOperation } from "./sidebarUtils";
import { SidebarTab } from "@opal/components";
import { ChatSession } from "@/app/app/interfaces";
import SidebarBody from "@/sections/sidebar/SidebarBody";
import { useUser } from "@/providers/UserProvider";
import useAppFocus from "@/hooks/useAppFocus";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import { useModalContext } from "@/components/context/ModalContext";
import useScreenSize from "@/hooks/useScreenSize";
import {
  SvgDevKit,
  SvgEditBig,
  SvgFolderPlus,
  SvgMoreHorizontal,
  SvgOnyxOctagon,
  SvgSearchMenu,
  SvgSettings,
} from "@opal/icons";
import SidebarTabSkeleton from "@/refresh-components/skeletons/SidebarTabSkeleton";
import BuildModeIntroBackground from "@/app/craft/components/IntroBackground";
import BuildModeIntroContent from "@/app/craft/components/IntroContent";
import { CRAFT_PATH } from "@/app/craft/v1/constants";
import { usePostHog } from "posthog-js/react";
import { track, AnalyticsEvent } from "@/lib/analytics";
import { motion, AnimatePresence } from "motion/react";
import { Notification, NotificationType } from "@/interfaces/settings";
import { errorHandlingFetcher } from "@/lib/fetcher";
import UserAvatarPopover from "@/sections/sidebar/UserAvatarPopover";
import ChatSearchCommandMenu from "@/sections/sidebar/ChatSearchCommandMenu";
import { useQueryController } from "@/providers/QueryControllerProvider";

// Visible-agents = pinned-agents + current-agent (if current-agent not in pinned-agents)
// OR Visible-agents = pinned-agents (if current-agent in pinned-agents)
function buildVisibleAgents(
  pinnedAgents: MinimalPersonaSnapshot[],
  currentAgent: MinimalPersonaSnapshot | null
): [MinimalPersonaSnapshot[], boolean] {
  /* NOTE: The unified agent (id = 0) is not visible in the sidebar,
  so we filter it out. */
  if (!currentAgent)
    return [pinnedAgents.filter((agent) => agent.id !== 0), false];
  const currentAgentIsPinned = pinnedAgents.some(
    (pinnedAgent) => pinnedAgent.id === currentAgent.id
  );
  const visibleAgents = (
    currentAgentIsPinned ? pinnedAgents : [...pinnedAgents, currentAgent]
  ).filter((agent) => agent.id !== 0);

  return [visibleAgents, currentAgentIsPinned];
}

const SKELETON_WIDTHS_BASE = ["w-4/5", "w-4/5", "w-3/5"];

function shuffleWidths(): string[] {
  return [...SKELETON_WIDTHS_BASE].sort(() => Math.random() - 0.5);
}

interface RecentsSectionProps {
  chatSessions: ChatSession[];
  hasMore: boolean;
  isLoadingMore: boolean;
  onLoadMore: () => void;
}

function RecentsSection({
  chatSessions,
  hasMore,
  isLoadingMore,
  onLoadMore,
}: RecentsSectionProps) {
  const { setNodeRef, isOver } = useDroppable({
    id: DRAG_TYPES.RECENTS,
    data: {
      type: DRAG_TYPES.RECENTS,
    },
  });

  // Re-shuffle skeleton widths each time loaded session count changes
  const skeletonWidths = useMemo(shuffleWidths, [chatSessions.length]);

  // Sentinel ref for IntersectionObserver-based infinite scroll
  const sentinelRef = useRef<HTMLDivElement | null>(null);
  const onLoadMoreRef = useRef(onLoadMore);
  onLoadMoreRef.current = onLoadMore;

  useEffect(() => {
    if (!hasMore || isLoadingMore) return;

    const sentinel = sentinelRef.current;
    if (!sentinel) return;

    const observer = new IntersectionObserver(
      (entries) => {
        if (entries[0]?.isIntersecting) {
          onLoadMoreRef.current();
        }
      },
      { threshold: 0 }
    );

    observer.observe(sentinel);
    return () => observer.disconnect();
  }, [hasMore, isLoadingMore]);

  return (
    <div
      ref={setNodeRef}
      className={cn(
        "transition-colors duration-200 rounded-08 h-full",
        isOver && "bg-background-tint-03"
      )}
    >
      <SidebarSection title="Recents">
        {chatSessions.length === 0 ? (
          <Text as="p" text01 className="px-3">
            Try sending a message! Your chat history will appear here.
          </Text>
        ) : (
          <>
            {chatSessions.map((chatSession) => (
              <ChatButton
                key={chatSession.id}
                chatSession={chatSession}
                draggable
              />
            ))}
            {hasMore &&
              skeletonWidths.map((width, i) => (
                <div
                  key={i}
                  ref={i === 0 ? sentinelRef : undefined}
                  className={cn(
                    "transition-opacity duration-300",
                    isLoadingMore ? "opacity-100" : "opacity-40"
                  )}
                >
                  <SidebarTabSkeleton textWidth={width} />
                </div>
              ))}
          </>
        )}
      </SidebarSection>
    </div>
  );
}

interface AppSidebarInnerProps {
  folded: boolean;
  onFoldClick: () => void;
}

const MemoizedAppSidebarInner = memo(
  ({ folded, onFoldClick }: AppSidebarInnerProps) => {
    const router = useRouter();
    const combinedSettings = useSettingsContext();
    const posthog = usePostHog();
    const { newTenantInfo, invitationInfo } = useModalContext();
    const { setAppMode, reset } = useQueryController();

    // Use SWR hooks for data fetching
    const {
      chatSessions,
      refreshChatSessions,
      isLoading: isLoadingChatSessions,
      hasMore,
      isLoadingMore,
      loadMore,
    } = useChatSessions();
    const {
      projects,
      refreshProjects,
      isLoading: isLoadingProjects,
    } = useProjects();
    const { isLoading: isLoadingAgents } = useAgents();
    const currentAgent = useCurrentAgent();
    const {
      pinnedAgents,
      updatePinnedAgents,
      isLoading: isLoadingPinnedAgents,
    } = usePinnedAgents();

    // Wait for ALL dynamic data before showing any sections
    const isLoadingDynamicContent =
      isLoadingChatSessions ||
      isLoadingProjects ||
      isLoadingAgents ||
      isLoadingPinnedAgents;

    // Still need some context for stateful operations
    const { refreshCurrentProjectDetails, currentProjectId } =
      useProjectsContext();

    // State for custom agent modal
    const [pendingMoveChatSession, setPendingMoveChatSession] =
      useState<ChatSession | null>(null);
    const [pendingMoveProjectId, setPendingMoveProjectId] = useState<
      number | null
    >(null);
    const [showMoveCustomAgentModal, setShowMoveCustomAgentModal] =
      useState(false);

    // Fetch notifications for build mode intro
    const { data: notifications, mutate: mutateNotifications } = useSWR<
      Notification[]
    >(SWR_KEYS.notifications, errorHandlingFetcher);

    // Check if Onyx Craft is enabled via settings (backed by PostHog feature flag)
    // Only explicit true enables the feature; false or undefined = disabled
    const isOnyxCraftEnabled =
      combinedSettings?.settings?.onyx_craft_enabled === true;

    // Find build_mode feature announcement notification (only if Onyx Craft is enabled)
    const buildModeNotification = isOnyxCraftEnabled
      ? notifications?.find(
          (n) =>
            n.notif_type === NotificationType.FEATURE_ANNOUNCEMENT &&
            n.additional_data?.feature === "build_mode" &&
            !n.dismissed
        )
      : undefined;

    // State for intro animation overlay
    const [showIntroAnimation, setShowIntroAnimation] = useState(false);
    // Track if auto-trigger has fired (prevents race condition during dismiss)
    const hasAutoTriggeredRef = useRef(false);

    // Auto-show intro once when there's an undismissed notification
    // Don't show if tenant/invitation modal is open (e.g., "join existing team" modal)
    // Gated by PostHog feature flag: if `craft-animation-disabled` is true (or
    // PostHog is unavailable), skip the auto-show entirely.
    const isCraftAnimationDisabled =
      posthog?.isFeatureEnabled(FEATURE_FLAGS.CRAFT_ANIMATION_DISABLED) ?? true;
    const hasTenantModal = !!(newTenantInfo || invitationInfo);
    useEffect(() => {
      if (
        isOnyxCraftEnabled &&
        buildModeNotification &&
        !hasAutoTriggeredRef.current &&
        !hasTenantModal &&
        !isCraftAnimationDisabled
      ) {
        hasAutoTriggeredRef.current = true;
        setShowIntroAnimation(true);
      }
    }, [
      buildModeNotification,
      isOnyxCraftEnabled,
      hasTenantModal,
      isCraftAnimationDisabled,
    ]);

    // Dismiss the build mode notification
    const dismissBuildModeNotification = useCallback(async () => {
      if (!buildModeNotification) return;
      try {
        await fetch(`/api/notifications/${buildModeNotification.id}/dismiss`, {
          method: "POST",
        });
        mutateNotifications();
      } catch (error) {
        console.error("Error dismissing notification:", error);
      }
    }, [buildModeNotification, mutateNotifications]);

    const [visibleAgents, currentAgentIsPinned] = useMemo(
      () => buildVisibleAgents(pinnedAgents, currentAgent),
      [pinnedAgents, currentAgent]
    );
    const visibleAgentIds = useMemo(
      () => visibleAgents.map((agent) => agent.id),
      [visibleAgents]
    );

    const sensors = useSensors(
      useSensor(PointerSensor, {
        activationConstraint: {
          distance: 8,
        },
      }),
      useSensor(KeyboardSensor, {
        coordinateGetter: sortableKeyboardCoordinates,
      })
    );

    // Handle agent drag and drop
    const handleAgentDragEnd = useCallback(
      (event: DragEndEvent) => {
        const { active, over } = event;
        if (!over) return;
        if (active.id === over.id) return;

        const activeIndex = visibleAgentIds.findIndex(
          (agentId) => agentId === active.id
        );
        const overIndex = visibleAgentIds.findIndex(
          (agentId) => agentId === over.id
        );

        let newPinnedAgents: MinimalPersonaSnapshot[];

        if (currentAgent && !currentAgentIsPinned) {
          // This is the case in which the user is dragging the UNPINNED agent and moving it to somewhere else in the list.
          // This is an indication that we WANT to pin this agent!
          if (activeIndex === visibleAgentIds.length - 1) {
            const pinnedWithCurrent = [...pinnedAgents, currentAgent];
            newPinnedAgents = arrayMove(
              pinnedWithCurrent,
              activeIndex,
              overIndex
            );
          } else {
            // Use visibleAgents to ensure the indices match with `visibleAgentIds`
            newPinnedAgents = arrayMove(visibleAgents, activeIndex, overIndex);
          }
        } else {
          // Use visibleAgents to ensure the indices match with `visibleAgentIds`
          newPinnedAgents = arrayMove(visibleAgents, activeIndex, overIndex);
        }

        updatePinnedAgents(newPinnedAgents);
      },
      [
        visibleAgentIds,
        visibleAgents,
        pinnedAgents,
        updatePinnedAgents,
        currentAgent,
        currentAgentIsPinned,
      ]
    );

    // Perform the actual move
    async function performChatMove(
      targetProjectId: number,
      chatSession: ChatSession
    ) {
      try {
        await handleMoveOperation({
          chatSession,
          targetProjectId,
          refreshChatSessions,
          refreshCurrentProjectDetails,
          fetchProjects: refreshProjects,
          currentProjectId,
        });
        const projectRefreshPromise = currentProjectId
          ? refreshCurrentProjectDetails()
          : refreshProjects();
        await Promise.all([refreshChatSessions(), projectRefreshPromise]);
      } catch (error) {
        console.error("Failed to move chat:", error);
        throw error;
      }
    }

    // Handle chat to project drag and drop
    const handleChatProjectDragEnd = useCallback(
      async (event: DragEndEvent) => {
        const { active, over } = event;
        if (!over) return;

        const activeData = active.data.current;
        const overData = over.data.current;

        if (!activeData || !overData) {
          return;
        }

        // Check if we're dragging a chat onto a project
        if (
          activeData?.type === DRAG_TYPES.CHAT &&
          overData?.type === DRAG_TYPES.PROJECT
        ) {
          const chatSession = activeData.chatSession as ChatSession;
          const targetProject = overData.project as Project;
          const sourceProjectId = activeData.projectId;

          // Don't do anything if dropping on the same project
          if (sourceProjectId === targetProject.id) {
            return;
          }

          const hideModal =
            typeof window !== "undefined" &&
            window.localStorage.getItem(
              LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL
            ) === "true";

          const isChatUsingDefaultAgent =
            chatSession.persona_id === DEFAULT_PERSONA_ID;

          if (!isChatUsingDefaultAgent && !hideModal) {
            setPendingMoveChatSession(chatSession);
            setPendingMoveProjectId(targetProject.id);
            setShowMoveCustomAgentModal(true);
            return;
          }

          try {
            await performChatMove(targetProject.id, chatSession);
          } catch (error) {
            showErrorNotification("Failed to move chat. Please try again.");
          }
        }

        // Check if we're dragging a chat from a project to the Recents section
        if (
          activeData?.type === DRAG_TYPES.CHAT &&
          overData?.type === DRAG_TYPES.RECENTS
        ) {
          const chatSession = activeData.chatSession as ChatSession;
          const sourceProjectId = activeData.projectId;

          // Only remove from project if it was in a project
          if (sourceProjectId) {
            try {
              await removeChatSessionFromProject(chatSession.id);
              const projectRefreshPromise = currentProjectId
                ? refreshCurrentProjectDetails()
                : refreshProjects();
              await Promise.all([refreshChatSessions(), projectRefreshPromise]);
            } catch (error) {
              console.error("Failed to remove chat from project:", error);
            }
          }
        }
      },
      [
        currentProjectId,
        refreshChatSessions,
        refreshCurrentProjectDetails,
        refreshProjects,
      ]
    );

    const { isAdmin, isCurator, user } = useUser();
    const activeSidebarTab = useAppFocus();
    const createProjectModal = useCreateModal();
    const defaultAppMode =
      (user?.preferences?.default_app_mode?.toLowerCase() as
        | "chat"
        | "search") ?? "chat";
    const newSessionButton = useMemo(() => {
      const href =
        combinedSettings?.settings?.disable_default_assistant && currentAgent
          ? `/app?agentId=${currentAgent.id}`
          : "/app";
      return (
        <div data-testid="AppSidebar/new-session">
          <SidebarTab
            icon={SvgEditBig}
            folded={folded}
            href={href}
            selected={activeSidebarTab.isNewSession()}
            onClick={() => {
              if (!activeSidebarTab.isNewSession()) return;
              setAppMode(defaultAppMode);
              reset();
            }}
          >
            New Session
          </SidebarTab>
        </div>
      );
    }, [
      folded,
      activeSidebarTab,
      combinedSettings,
      currentAgent,
      defaultAppMode,
    ]);

    const buildButton = useMemo(
      () => (
        <div data-testid="AppSidebar/build">
          <SidebarTab
            icon={SvgDevKit}
            folded={folded}
            href={CRAFT_PATH}
            onClick={() => track(AnalyticsEvent.CLICKED_CRAFT_IN_SIDEBAR)}
          >
            Craft
          </SidebarTab>
        </div>
      ),
      [folded, posthog]
    );

    const searchChatsButton = useMemo(
      () => (
        <ChatSearchCommandMenu
          trigger={
            <SidebarTab icon={SvgSearchMenu} folded={folded}>
              Search Chats
            </SidebarTab>
          }
        />
      ),
      [folded]
    );
    const moreAgentsButton = useMemo(
      () => (
        <div data-testid="AppSidebar/more-agents">
          <SidebarTab
            icon={
              folded || visibleAgents.length === 0
                ? SvgOnyxOctagon
                : SvgMoreHorizontal
            }
            href="/app/agents"
            folded={folded}
            selected={activeSidebarTab.isMoreAgents()}
            variant={folded ? "sidebar-heavy" : "sidebar-light"}
          >
            {visibleAgents.length === 0 ? "Explore Agents" : "More Agents"}
          </SidebarTab>
        </div>
      ),
      [folded, activeSidebarTab, visibleAgents]
    );
    const newProjectButton = useMemo(
      () => (
        <SidebarTab
          icon={SvgFolderPlus}
          onClick={() => createProjectModal.toggle(true)}
          selected={createProjectModal.isOpen}
          folded={folded}
          variant={folded ? "sidebar-heavy" : "sidebar-light"}
        >
          New Project
        </SidebarTab>
      ),
      [folded, createProjectModal.toggle, createProjectModal.isOpen]
    );
    const handleShowBuildIntro = useCallback(() => {
      setShowIntroAnimation(true);
    }, []);

    const settingsButton = useMemo(
      () => (
        <div>
          {(isAdmin || isCurator) && (
            <SidebarTab
              href={isCurator ? "/admin/agents" : "/admin/configuration/llm"}
              icon={SvgSettings}
              folded={folded}
            >
              {isAdmin ? "Admin Panel" : "Curator Panel"}
            </SidebarTab>
          )}
          <UserAvatarPopover
            folded={folded}
            onShowBuildIntro={
              isOnyxCraftEnabled ? handleShowBuildIntro : undefined
            }
          />
        </div>
      ),
      [folded, isAdmin, isCurator, handleShowBuildIntro, isOnyxCraftEnabled]
    );

    return (
      <>
        <createProjectModal.Provider>
          <CreateProjectModal />
        </createProjectModal.Provider>

        {showMoveCustomAgentModal && (
          <MoveCustomAgentChatModal
            onCancel={() => {
              setShowMoveCustomAgentModal(false);
              setPendingMoveChatSession(null);
              setPendingMoveProjectId(null);
            }}
            onConfirm={async (doNotShowAgain: boolean) => {
              if (doNotShowAgain && typeof window !== "undefined") {
                window.localStorage.setItem(
                  LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL,
                  "true"
                );
              }
              const chat = pendingMoveChatSession;
              const target = pendingMoveProjectId;
              setShowMoveCustomAgentModal(false);
              setPendingMoveChatSession(null);
              setPendingMoveProjectId(null);
              if (chat && target != null) {
                try {
                  await performChatMove(target, chat);
                } catch (error) {
                  showErrorNotification(
                    "Failed to move chat. Please try again."
                  );
                }
              }
            }}
          />
        )}

        {/* Intro animation overlay */}
        <AnimatePresence>
          {showIntroAnimation && (
            <motion.div
              className="fixed inset-0 z-[9999]"
              initial={{ opacity: 0 }}
              animate={{ opacity: 1 }}
              exit={{ opacity: 0 }}
              transition={{ duration: 0.5 }}
            >
              <BuildModeIntroBackground />
              <BuildModeIntroContent
                onClose={() => {
                  setShowIntroAnimation(false);
                  dismissBuildModeNotification();
                }}
                onTryBuildMode={() => {
                  setShowIntroAnimation(false);
                  dismissBuildModeNotification();
                  router.push(CRAFT_PATH);
                }}
              />
            </motion.div>
          )}
        </AnimatePresence>

        <SidebarWrapper folded={folded} onFoldClick={onFoldClick}>
          <SidebarBody
            scrollKey="app-sidebar"
            footer={settingsButton}
            pinnedContent={
              <div className="flex flex-col">
                {newSessionButton}
                {searchChatsButton}
                {isOnyxCraftEnabled && buildButton}
                {folded && moreAgentsButton}
                {folded && newProjectButton}
              </div>
            }
          >
            {/* When folded, all nav buttons are in pinnedContent — nothing here */}
            {folded ? null : isLoadingDynamicContent ? null : (
              <>
                {/* Agents */}
                <DndContext
                  sensors={sensors}
                  collisionDetection={closestCenter}
                  onDragEnd={handleAgentDragEnd}
                >
                  <SidebarSection title="Agents">
                    <SortableContext
                      items={visibleAgentIds}
                      strategy={verticalListSortingStrategy}
                    >
                      {visibleAgents.map((visibleAgent) => (
                        <AgentButton
                          key={visibleAgent.id}
                          agent={visibleAgent}
                        />
                      ))}
                    </SortableContext>
                    {moreAgentsButton}
                  </SidebarSection>
                </DndContext>

                {/* Wrap Projects and Recents in a shared DndContext for chat-to-project drag */}
                <DndContext
                  sensors={sensors}
                  collisionDetection={pointerWithin}
                  modifiers={[
                    restrictToFirstScrollableAncestor,
                    restrictToVerticalAxis,
                  ]}
                  onDragEnd={handleChatProjectDragEnd}
                >
                  {/* Projects */}
                  <SidebarSection
                    title="Projects"
                    action={
                      <OpalButton
                        icon={SvgFolderPlus}
                        prominence="tertiary"
                        size="sm"
                        tooltip="New Project"
                        onClick={() => createProjectModal.toggle(true)}
                      />
                    }
                  >
                    {projects.map((project) => (
                      <ProjectFolderButton key={project.id} project={project} />
                    ))}
                    {projects.length === 0 && newProjectButton}
                  </SidebarSection>

                  {/* Recents */}
                  <RecentsSection
                    chatSessions={chatSessions}
                    hasMore={hasMore}
                    isLoadingMore={isLoadingMore}
                    onLoadMore={loadMore}
                  />
                </DndContext>
              </>
            )}
          </SidebarBody>
        </SidebarWrapper>
      </>
    );
  }
);
MemoizedAppSidebarInner.displayName = "AppSidebar";

export default function AppSidebar() {
  const { folded, setFolded } = useAppSidebarContext();
  const { isMobile } = useScreenSize();

  if (!isMobile)
    return (
      <MemoizedAppSidebarInner
        folded={folded}
        onFoldClick={() => setFolded((prev) => !prev)}
      />
    );

  return (
    <>
      <div
        className={cn(
          "fixed inset-y-0 left-0 z-50 transition-transform duration-200",
          folded ? "-translate-x-full" : "translate-x-0"
        )}
      >
        <MemoizedAppSidebarInner
          folded={false}
          onFoldClick={() => setFolded(true)}
        />
      </div>

      {/* Hitbox to close the sidebar if anything outside of it is touched */}
      <div
        className={cn(
          "fixed inset-0 z-40 bg-mask-03 backdrop-blur-03 transition-opacity duration-200",
          folded
            ? "opacity-0 pointer-events-none"
            : "opacity-100 pointer-events-auto"
        )}
        onClick={() => setFolded(true)}
      />
    </>
  );
}


================================================
FILE: web/src/sections/sidebar/ChatButton.tsx
================================================
"use client";

import React, { useState, memo, useMemo, useEffect } from "react";
import { useDraggable } from "@dnd-kit/core";
import useChatSessions from "@/hooks/useChatSessions";
import { deleteChatSession, renameChatSession } from "@/app/app/services/lib";
import { ChatSession } from "@/app/app/interfaces";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { cn, noProp } from "@/lib/utils";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import { useAppRouter } from "@/hooks/appNavigation";
import {
  Project,
  removeChatSessionFromProject,
  createProject as createProjectService,
} from "@/app/app/projects/projectsService";
import { useProjectsContext } from "@/providers/ProjectsContext";
import MoveCustomAgentChatModal from "@/components/modals/MoveCustomAgentChatModal";
import { UNNAMED_CHAT } from "@/lib/constants";
import ShareChatSessionModal from "@/sections/modals/ShareChatSessionModal";
import { SidebarTab } from "@opal/components";
import IconButton from "@/refresh-components/buttons/IconButton";
import { Button } from "@opal/components";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import { DRAG_TYPES, LOCAL_STORAGE_KEYS } from "@/sections/sidebar/constants";
import {
  shouldShowMoveModal,
  showErrorNotification,
  handleMoveOperation,
} from "@/sections/sidebar/sidebarUtils";
import ButtonRenaming from "@/refresh-components/buttons/ButtonRenaming";
import useAppFocus from "@/hooks/useAppFocus";
import LineItem from "@/refresh-components/buttons/LineItem";
import {
  SvgChevronLeft,
  SvgEdit,
  SvgFolder,
  SvgFolderIn,
  SvgFolderPlus,
  SvgMoreHorizontal,
  SvgShare,
  SvgTrash,
} from "@opal/icons";
import useOnMount from "@/hooks/useOnMount";
import { useAgents, usePinnedAgents } from "@/hooks/useAgents";

export interface PopoverSearchInputProps {
  setShowMoveOptions: (show: boolean) => void;
  onSearch: (term: string) => void;
}

export function PopoverSearchInput({
  setShowMoveOptions,
  onSearch,
}: PopoverSearchInputProps) {
  const [searchTerm, setSearchTerm] = useState("");

  const handleChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    const value = e.target.value;
    setSearchTerm(value);
    onSearch(value);
  };
  const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
    if (e.key === "Escape") {
      setShowMoveOptions(false);
    }
  };

  const handleClickBackButton = (e: React.MouseEvent<HTMLButtonElement>) => {
    e.stopPropagation();
    setShowMoveOptions(false);
    setSearchTerm("");
  };

  return (
    <div className="flex flex-row items-center">
      <Button
        icon={SvgChevronLeft}
        onClick={handleClickBackButton}
        prominence="tertiary"
        size="sm"
      />
      <InputTypeIn
        type="text"
        value={searchTerm}
        onChange={handleChange}
        onKeyDown={handleKeyDown}
        placeholder="Search Projects"
        onClick={noProp()}
        variant="internal"
        autoFocus
      />
    </div>
  );
}

export interface ChatButtonProps {
  chatSession: ChatSession;
  project?: Project;
  draggable?: boolean;
}

const ChatButton = memo(
  ({ chatSession, project, draggable = false }: ChatButtonProps) => {
    const route = useAppRouter();
    const activeSidebarTab = useAppFocus();
    const active = useMemo(
      () =>
        activeSidebarTab.isChat() &&
        activeSidebarTab.getId() === chatSession.id,
      [activeSidebarTab, chatSession.id]
    );
    const mounted = useOnMount();
    const [displayName, setDisplayName] = useState(
      chatSession.name || UNNAMED_CHAT
    );
    const [renaming, setRenaming] = useState(false);
    const [deleteConfirmationModalOpen, setDeleteConfirmationModalOpen] =
      useState(false);
    const [showMoveOptions, setShowMoveOptions] = useState(false);
    const [showShareModal, setShowShareModal] = useState(false);
    const [searchTerm, setSearchTerm] = useState("");
    const [popoverItems, setPopoverItems] = useState<React.ReactNode[]>([]);
    const { refreshChatSessions, removeSession } = useChatSessions();
    const {
      refreshCurrentProjectDetails,
      projects,
      fetchProjects,
      currentProjectId,
      createProject,
    } = useProjectsContext();
    const { agents } = useAgents();
    const { pinnedAgents, togglePinnedAgent } = usePinnedAgents();
    const [popoverOpen, setPopoverOpen] = useState(false);
    const [pendingMoveProjectId, setPendingMoveProjectId] = useState<
      number | null
    >(null);
    const [showMoveCustomAgentModal, setShowMoveCustomAgentModal] =
      useState(false);
    const [navigateAfterMoveProjectId, setNavigateAfterMoveProjectId] =
      useState<number | null>(null);

    // Drag and drop setup for chat sessions
    const dragId = `${DRAG_TYPES.CHAT}-${chatSession.id}`;
    const { attributes, listeners, setNodeRef, transform, isDragging } =
      useDraggable({
        id: dragId,
        data: {
          type: DRAG_TYPES.CHAT,
          chatSession,
          projectId: project?.id,
        },
        disabled: !draggable || renaming,
      });

    // Sync local name state when chatSession.name changes (e.g., after auto-naming)
    useEffect(() => {
      const newName = chatSession.name || UNNAMED_CHAT;
      const oldName = displayName;

      // Only animate if transitioning from UNNAMED_CHAT to a real name
      if (oldName === UNNAMED_CHAT && newName !== UNNAMED_CHAT && mounted) {
        // Type out the name character by character
        let currentIndex = 0;
        const typingInterval = setInterval(() => {
          currentIndex++;
          setDisplayName(newName.slice(0, currentIndex));

          if (currentIndex >= newName.length) {
            clearInterval(typingInterval);
          }
        }, 30); // 30ms per character

        return () => clearInterval(typingInterval);
      } else {
        // No animation for other changes (manual rename, initial load, etc.)
        setDisplayName(newName);
      }
    }, [chatSession.name, mounted]);

    const filteredProjects = useMemo(() => {
      if (!searchTerm) return projects;
      const term = searchTerm.toLowerCase();
      return projects.filter((project) =>
        project.name.toLowerCase().includes(term)
      );
    }, [projects, searchTerm]);

    useEffect(() => {
      if (!showMoveOptions) {
        const popoverItems = [
          <LineItem
            key="share"
            icon={SvgShare}
            onClick={noProp(() => setShowShareModal(true))}
          >
            Share
          </LineItem>,
          <LineItem
            key="rename"
            icon={SvgEdit}
            onClick={noProp(() => setRenaming(true))}
          >
            Rename
          </LineItem>,
          <LineItem
            key="move"
            icon={SvgFolderIn}
            onClick={noProp(() => setShowMoveOptions(true))}
          >
            Move to Project
          </LineItem>,
          project && (
            <LineItem
              key="remove"
              icon={SvgFolder}
              onClick={noProp(() => handleRemoveFromProject())}
            >
              {`Remove from ${project.name}`}
            </LineItem>
          ),
          null,
          <LineItem
            key="delete"
            icon={SvgTrash}
            danger
            onClick={noProp(() => setDeleteConfirmationModalOpen(true))}
          >
            Delete
          </LineItem>,
        ];
        setPopoverItems(popoverItems);
      } else {
        const availableProjects = filteredProjects.filter(
          (candidateProject) => candidateProject.id !== project?.id
        );

        const popoverItems = [
          <PopoverSearchInput
            key="search"
            setShowMoveOptions={setShowMoveOptions}
            onSearch={setSearchTerm}
          />,
          ...availableProjects.map((targetProject) => (
            <LineItem
              key={targetProject.id}
              icon={SvgFolder}
              onClick={noProp(() => handleChatMove(targetProject))}
            >
              {targetProject.name}
            </LineItem>
          )),
          // Show "Create New Project" option when no projects match the search
          ...(availableProjects.length === 0 && searchTerm.trim() !== ""
            ? [
                null,
                <LineItem
                  key="create-new"
                  icon={SvgFolderPlus}
                  onClick={noProp(() =>
                    handleCreateProjectAndMove(searchTerm.trim())
                  )}
                >
                  {`Create ${searchTerm.trim()}`}
                </LineItem>,
              ]
            : []),
        ];
        setPopoverItems(popoverItems);
      }
    }, [
      showMoveOptions,
      filteredProjects,
      refreshChatSessions,
      fetchProjects,
      currentProjectId,
      refreshCurrentProjectDetails,
      project,
      chatSession.id,
      searchTerm,
      createProject,
    ]);

    // Pin the chat's agent when clicking on the conversation
    async function handleClick() {
      const agent = agents.find((a) => a.id === chatSession.persona_id);
      if (agent) {
        const isAlreadyPinned = pinnedAgents.some((a) => a.id === agent.id);
        if (!isAlreadyPinned) {
          await togglePinnedAgent(agent, true);
        }
      }
    }

    async function handleRename(newName: string) {
      setDisplayName(newName);
      await renameChatSession(chatSession.id, newName);
      await refreshChatSessions();
    }

    async function handleChatDelete() {
      try {
        await deleteChatSession(chatSession.id);
        removeSession(chatSession.id);

        if (project) {
          await fetchProjects();
          await refreshCurrentProjectDetails();

          // Only route if the deleted chat is the currently opened chat session
          if (active) {
            route({ projectId: project.id });
          }
        }
        await refreshChatSessions();
      } catch (error) {
        console.error("Failed to delete chat:", error);
        showErrorNotification("Failed to delete chat. Please try again.");
      }
    }

    async function performMove(targetProjectId: number) {
      try {
        await handleMoveOperation({
          chatSession,
          targetProjectId,
          refreshChatSessions,
          refreshCurrentProjectDetails,
          fetchProjects,
          currentProjectId,
        });
        setShowMoveOptions(false);
        setSearchTerm("");
      } catch (error) {
        // handleMoveOperation already handles error notification
        console.error("Failed to move chat:", error);
      }
    }

    async function handleChatMove(targetProject: Project) {
      if (shouldShowMoveModal(chatSession)) {
        setPendingMoveProjectId(targetProject.id);
        setShowMoveCustomAgentModal(true);
        return;
      }
      await performMove(targetProject.id);
    }

    async function handleRemoveFromProject() {
      try {
        await removeChatSessionFromProject(chatSession.id);
        const projectRefreshPromise = currentProjectId
          ? refreshCurrentProjectDetails()
          : fetchProjects();
        await Promise.all([refreshChatSessions(), projectRefreshPromise]);
        setShowMoveOptions(false);
        setSearchTerm("");
      } catch (error) {
        console.error("Failed to remove chat from project:", error);
      }
    }

    async function handleCreateProjectAndMove(projectName: string) {
      try {
        // Create the new project using the service directly (without navigation)
        const newProject = await createProjectService(projectName);

        // Refresh projects list to include the new project
        await fetchProjects();

        // Mark that we want to navigate to this project after moving
        setNavigateAfterMoveProjectId(newProject.id);

        // Check if we should show the move modal for custom agents
        if (shouldShowMoveModal(chatSession)) {
          setPendingMoveProjectId(newProject.id);
          setShowMoveCustomAgentModal(true);
          setShowMoveOptions(false);
          setSearchTerm("");
          return;
        }

        // Move the chat to the newly created project
        await performMove(newProject.id);

        // Navigate to the new project to see the chat
        route({ projectId: newProject.id });
        setNavigateAfterMoveProjectId(null);
      } catch (error) {
        console.error("Failed to create project and move chat:", error);
        showErrorNotification("Failed to create project. Please try again.");
        setNavigateAfterMoveProjectId(null);
      }
    }

    const rightMenu = (
      <>
        <Popover.Trigger asChild onClick={noProp()}>
          <div>
            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
            <IconButton
              icon={SvgMoreHorizontal}
              className={cn(
                !popoverOpen && "hidden",
                !renaming && "group-hover/SidebarTab:flex"
              )}
              transient={popoverOpen}
              internal
            />
          </div>
        </Popover.Trigger>
        <Popover.Content side="right" align="start" width="md">
          <PopoverMenu>{popoverItems}</PopoverMenu>
        </Popover.Content>
      </>
    );

    const popover = (
      <Popover
        onOpenChange={(state) => {
          setPopoverOpen(state);
          if (!state) {
            setShowMoveOptions(false);
            setSearchTerm("");
          }
        }}
      >
        <Popover.Anchor>
          <SidebarTab
            href={isDragging ? undefined : `/app?chatId=${chatSession.id}`}
            onClick={handleClick}
            selected={active}
            rightChildren={rightMenu}
            nested={!!project}
          >
            {renaming ? (
              <ButtonRenaming
                initialName={chatSession.name}
                onRename={handleRename}
                onClose={() => setRenaming(false)}
              />
            ) : (
              displayName
            )}
          </SidebarTab>
        </Popover.Anchor>
      </Popover>
    );

    return (
      <>
        {deleteConfirmationModalOpen && (
          <ConfirmationModalLayout
            title="Delete Chat"
            icon={SvgTrash}
            onClose={() => setDeleteConfirmationModalOpen(false)}
            submit={
              <Button
                variant="danger"
                onClick={() => {
                  setDeleteConfirmationModalOpen(false);
                  handleChatDelete();
                }}
              >
                Delete
              </Button>
            }
          >
            Are you sure you want to delete this chat? This action cannot be
            undone.
          </ConfirmationModalLayout>
        )}

        {showMoveCustomAgentModal && (
          <MoveCustomAgentChatModal
            onCancel={() => {
              setShowMoveCustomAgentModal(false);
              setPendingMoveProjectId(null);
              setNavigateAfterMoveProjectId(null);
            }}
            onConfirm={async (doNotShowAgain: boolean) => {
              if (doNotShowAgain && typeof window !== "undefined") {
                window.localStorage.setItem(
                  LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL,
                  "true"
                );
              }
              const target = pendingMoveProjectId;
              const shouldNavigate = navigateAfterMoveProjectId;
              setShowMoveCustomAgentModal(false);
              setPendingMoveProjectId(null);
              if (target != null) {
                await performMove(target);
                // Navigate if this was triggered by creating a new project
                if (shouldNavigate != null) {
                  route({ projectId: shouldNavigate });
                  setNavigateAfterMoveProjectId(null);
                }
              }
            }}
          />
        )}

        {showShareModal && (
          <ShareChatSessionModal
            chatSession={chatSession}
            onClose={() => setShowShareModal(false)}
          />
        )}

        {draggable ? (
          <div
            ref={setNodeRef}
            style={{
              transform: transform
                ? `translate3d(0px, ${transform.y}px, 0)`
                : undefined,
              opacity: isDragging ? 0.5 : 1,
            }}
            {...(mounted ? attributes : {})}
            {...(mounted ? listeners : {})}
          >
            {popover}
          </div>
        ) : (
          popover
        )}
      </>
    );
  }
);
ChatButton.displayName = "ChatButton";

export default ChatButton;


================================================
FILE: web/src/sections/sidebar/ChatSearchCommandMenu.tsx
================================================
"use client";

import React, { useState, useMemo, useCallback } from "react";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import CommandMenu, {
  useCommandMenuContext,
} from "@/refresh-components/commandmenu/CommandMenu";
import { useProjects } from "@/lib/hooks/useProjects";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import CreateProjectModal from "@/components/modals/CreateProjectModal";
import {
  formatDisplayTime,
  highlightMatch,
} from "@/sections/sidebar/chatSearchUtils";
import { useSettingsContext } from "@/providers/SettingsProvider";
import { useCurrentAgent } from "@/hooks/useAgents";
import Text from "@/refresh-components/texts/Text";
import {
  useChatSearchOptimistic,
  FilterableChat,
} from "./useChatSearchOptimistic";
import {
  SvgEditBig,
  SvgFolder,
  SvgFolderPlus,
  SvgBubbleText,
  SvgArrowUpDown,
  SvgKeystroke,
} from "@opal/icons";
import TextSeparator from "@/refresh-components/TextSeparator";

/**
 * Dynamic footer that shows contextual action labels based on highlighted item type
 */
function DynamicFooter() {
  const { highlightedItemType } = useCommandMenuContext();

  // "Show all" for filters, "Open" for everything else (items, actions, or no highlight)
  const actionLabel = highlightedItemType === "filter" ? "Show all" : "Open";

  return (
    <CommandMenu.Footer
      leftActions={
        <>
          <CommandMenu.FooterAction icon={SvgArrowUpDown} label="Select" />
          <CommandMenu.FooterAction icon={SvgKeystroke} label={actionLabel} />
        </>
      }
    />
  );
}

interface ChatSearchCommandMenuProps {
  trigger: React.ReactNode;
}

interface FilterableProject {
  id: number;
  label: string;
  description: string | null;
  time: string;
}

export default function ChatSearchCommandMenu({
  trigger,
}: ChatSearchCommandMenuProps) {
  const [open, setOpen] = useState(false);
  const [searchValue, setSearchValue] = useState("");
  const [activeFilter, setActiveFilter] = useState<
    "all" | "chats" | "projects"
  >("all");
  const [initialProjectName, setInitialProjectName] = useState<
    string | undefined
  >();
  const router = useRouter();

  // Data hooks
  const { projects } = useProjects();
  const combinedSettings = useSettingsContext();
  const currentAgent = useCurrentAgent();
  const createProjectModal = useCreateModal();

  // Constants for preview limits
  const PREVIEW_CHATS_LIMIT = 4;
  const PREVIEW_PROJECTS_LIMIT = 3;

  // Determine if we should enable optimistic search (when searching or viewing chats filter)
  const shouldUseOptimisticSearch =
    searchValue.trim().length > 0 || activeFilter === "chats";

  // Use optimistic search hook for chat sessions (includes fallback from useChatSessions + useProjects)
  const {
    results: filteredChats,
    isSearching,
    hasMore,
    isLoadingMore,
    sentinelRef,
  } = useChatSearchOptimistic({
    searchQuery: searchValue,
    enabled: shouldUseOptimisticSearch,
  });

  // Transform and filter projects (sorted by latest first)
  const filteredProjects = useMemo<FilterableProject[]>(() => {
    const projectList = projects
      .map((project) => ({
        id: project.id,
        label: project.name,
        description: project.description,
        time: project.created_at,
      }))
      .sort((a, b) => new Date(b.time).getTime() - new Date(a.time).getTime());

    if (!searchValue.trim()) return projectList;

    const term = searchValue.toLowerCase();
    return projectList.filter(
      (project) =>
        project.label.toLowerCase().includes(term) ||
        project.description?.toLowerCase().includes(term)
    );
  }, [projects, searchValue]);

  // Compute displayed items based on filter state
  const displayedChats = useMemo(() => {
    if (activeFilter === "all" && !searchValue.trim()) {
      return filteredChats.slice(0, PREVIEW_CHATS_LIMIT);
    }
    return filteredChats;
  }, [filteredChats, activeFilter, searchValue]);

  const displayedProjects = useMemo(() => {
    if (activeFilter === "all" && !searchValue.trim()) {
      return filteredProjects.slice(0, PREVIEW_PROJECTS_LIMIT);
    }
    return filteredProjects;
  }, [filteredProjects, activeFilter, searchValue]);

  // Header filters for showing active filter as a chip
  const headerFilters = useMemo(() => {
    if (activeFilter === "chats") {
      return [{ id: "chats", label: "Sessions" }];
    }
    if (activeFilter === "projects") {
      return [{ id: "projects", label: "Projects" }];
    }
    return [];
  }, [activeFilter]);

  const handleFilterRemove = useCallback(() => {
    setActiveFilter("all");
  }, []);

  // Navigation handlers
  const handleNewSession = useCallback(() => {
    const href =
      combinedSettings?.settings?.disable_default_assistant && currentAgent
        ? `/app?agentId=${currentAgent.id}`
        : "/app";
    router.push(href as Route);
    setOpen(false);
  }, [router, combinedSettings, currentAgent]);

  const handleChatSelect = useCallback(
    (chatId: string) => {
      router.push(`/chat?chatId=${chatId}` as Route);
      setOpen(false);
    },
    [router]
  );

  const handleProjectSelect = useCallback(
    (projectId: number) => {
      router.push(`/chat?projectId=${projectId}` as Route);
      setOpen(false);
    },
    [router]
  );

  const handleNewProject = useCallback(
    (initialName?: string) => {
      setInitialProjectName(initialName);
      setOpen(false);
      createProjectModal.toggle(true);
    },
    [createProjectModal]
  );

  const handleOpenChange = useCallback((newOpen: boolean) => {
    setOpen(newOpen);
    if (!newOpen) {
      setSearchValue("");
      setActiveFilter("all");
    }
  }, []);

  const handleEmptyBackspace = useCallback(() => {
    if (activeFilter !== "all") {
      // Remove active filter, return to root menu
      setActiveFilter("all");
    } else {
      // No filter active, close the menu
      setOpen(false);
    }
  }, [activeFilter]);

  const hasSearchValue = searchValue.trim().length > 0;

  return (
    <>
      <div aria-label="Open chat search" onClick={() => setOpen(true)}>
        {trigger}
      </div>

      <CommandMenu open={open} onOpenChange={handleOpenChange}>
        <CommandMenu.Content>
          <CommandMenu.Header
            placeholder="Search chat sessions, projects..."
            value={searchValue}
            onValueChange={setSearchValue}
            filters={headerFilters}
            onFilterRemove={handleFilterRemove}
            onClose={() => setOpen(false)}
            onEmptyBackspace={handleEmptyBackspace}
          />

          <CommandMenu.List
            emptyMessage={
              hasSearchValue ? "No results found" : "No chats or projects yet"
            }
          >
            {/* New Session action - always visible in "all" filter, even during search */}
            {activeFilter === "all" && (
              <CommandMenu.Action
                value="new-session"
                icon={SvgEditBig}
                onSelect={handleNewSession}
                defaultHighlight={!hasSearchValue}
              >
                New Session
              </CommandMenu.Action>
            )}

            {/* Recent Sessions section - show if filter is 'all' or 'chats' */}
            {(activeFilter === "all" || activeFilter === "chats") &&
              displayedChats.length > 0 && (
                <>
                  {searchValue.trim().length === 0 && (
                    <CommandMenu.Filter
                      value="recent-sessions"
                      onSelect={() => setActiveFilter("chats")}
                      isApplied={
                        activeFilter === "chats" ||
                        filteredChats.length <= PREVIEW_CHATS_LIMIT
                      }
                    >
                      {activeFilter === "chats" ? "Recent" : "Recent Sessions"}
                    </CommandMenu.Filter>
                  )}
                  {displayedChats.map((chat) => (
                    <CommandMenu.Item
                      key={chat.id}
                      value={`chat-${chat.id}`}
                      icon={SvgBubbleText}
                      rightContent={({ isHighlighted }) =>
                        isHighlighted ? (
                          <Text figureKeystroke text02>
                            ↵
                          </Text>
                        ) : (
                          <Text secondaryBody text03>
                            {formatDisplayTime(chat.time)}
                          </Text>
                        )
                      }
                      onSelect={() => handleChatSelect(chat.id)}
                    >
                      {highlightMatch(chat.label, searchValue)}
                    </CommandMenu.Item>
                  ))}
                  {/* Infinite scroll sentinel and loading indicator for chats */}
                  {activeFilter === "chats" && hasMore && (
                    <div ref={sentinelRef} className="h-1" aria-hidden="true" />
                  )}
                  {activeFilter === "chats" &&
                    (isLoadingMore || isSearching) && (
                      <div className="flex justify-center items-center py-3">
                        <div className="h-5 w-5 animate-spin rounded-full border-2 border-solid border-text-04 border-t-text-02" />
                      </div>
                    )}
                </>
              )}

            {/* Projects section - show if filter is 'all' or 'projects' */}
            {(activeFilter === "all" || activeFilter === "projects") && (
              <>
                <CommandMenu.Filter
                  value="projects"
                  onSelect={() => setActiveFilter("projects")}
                  isApplied={
                    activeFilter === "projects" ||
                    filteredProjects.length <= PREVIEW_PROJECTS_LIMIT
                  }
                >
                  Projects
                </CommandMenu.Filter>
                {/* New Project action - shown after Projects filter when no search term */}
                {!hasSearchValue && activeFilter === "all" && (
                  <CommandMenu.Action
                    value="new-project"
                    icon={SvgFolderPlus}
                    onSelect={() => handleNewProject()}
                  >
                    New Project
                  </CommandMenu.Action>
                )}
                {displayedProjects.map((project) => (
                  <CommandMenu.Item
                    key={project.id}
                    value={`project-${project.id}`}
                    icon={SvgFolder}
                    rightContent={({ isHighlighted }) =>
                      isHighlighted ? (
                        <Text figureKeystroke text02>
                          ↵
                        </Text>
                      ) : (
                        <Text secondaryBody text03>
                          {formatDisplayTime(project.time)}
                        </Text>
                      )
                    }
                    onSelect={() => handleProjectSelect(project.id)}
                  >
                    {highlightMatch(project.label, searchValue)}
                  </CommandMenu.Item>
                ))}
              </>
            )}

            {/* Create New Project with search term - shown at bottom when searching */}
            {hasSearchValue &&
              (activeFilter === "all" || activeFilter === "projects") && (
                <CommandMenu.Action
                  value="create-project-with-name"
                  icon={SvgFolderPlus}
                  onSelect={() => handleNewProject(searchValue.trim())}
                >
                  <>
                    Create New Project "
                    <span className="text-text-05">{searchValue.trim()}</span>"
                  </>
                </CommandMenu.Action>
              )}

            {/* No more results separator - shown when no results for the active filter */}
            {((activeFilter === "chats" && displayedChats.length === 0) ||
              (activeFilter === "projects" && displayedProjects.length === 0) ||
              (activeFilter === "all" &&
                displayedChats.length === 0 &&
                displayedProjects.length === 0)) && (
              <TextSeparator text="No more results" className="mt-auto mb-2" />
            )}
          </CommandMenu.List>

          <DynamicFooter />
        </CommandMenu.Content>
      </CommandMenu>

      {/* Project creation modal */}
      <createProjectModal.Provider>
        <CreateProjectModal initialProjectName={initialProjectName} />
      </createProjectModal.Provider>
    </>
  );
}


================================================
FILE: web/src/sections/sidebar/CreateConnectorSidebar.tsx
================================================
import { useFormContext } from "@/components/context/FormContext";
import { credentialTemplates } from "@/lib/connectors/credentials";
import Text from "@/refresh-components/texts/Text";
import StepSidebar from "@/sections/sidebar/StepSidebarWrapper";
import { useUser } from "@/providers/UserProvider";
import { SvgSettings } from "@opal/icons";

export default function Sidebar() {
  const { formStep, setFormStep, connector, allowAdvanced, allowCreate } =
    useFormContext();
  const noCredential = credentialTemplates[connector] == null;

  const { isAdmin } = useUser();
  const buttonName = isAdmin ? "Admin Page" : "Curator Page";

  const settingSteps = [
    ...(!noCredential ? ["Credential"] : []),
    "Connector",
    ...(connector == "file" ? [] : ["Advanced (optional)"]),
  ];

  return (
    <StepSidebar
      buttonName={buttonName}
      buttonIcon={SvgSettings}
      buttonHref="/admin/add-connector"
    >
      <div className="relative">
        {connector != "file" && (
          <div className="absolute h-[85%] left-[6px] top-[8px] bottom-0 w-0.5 bg-background-tint-04"></div>
        )}
        {settingSteps.map((step, index) => {
          const allowed =
            (step == "Connector" && allowCreate) ||
            (step == "Advanced (optional)" && allowAdvanced) ||
            index <= formStep;

          return (
            <div
              key={index}
              className={`flex items-center mb-6 relative ${
                !allowed ? "cursor-not-allowed" : "cursor-pointer"
              }`}
              onClick={() => {
                if (allowed) {
                  setFormStep(index - (noCredential ? 1 : 0));
                }
              }}
            >
              <div className="flex-shrink-0 mr-4 z-10">
                <div
                  className={`rounded-full h-3.5 w-3.5 flex items-center justify-center ${
                    allowed ? "bg-blue-500" : "bg-background-tint-04"
                  }`}
                >
                  {formStep === index && (
                    <div className="h-2 w-2 rounded-full bg-white"></div>
                  )}
                </div>
              </div>
              <Text as="p" text04={index <= formStep} text02={index > formStep}>
                {step}
              </Text>
            </div>
          );
        })}
      </div>
    </StepSidebar>
  );
}


================================================
FILE: web/src/sections/sidebar/NotificationsPopover.tsx
================================================
"use client";

import useSWR from "swr";
import { SWR_KEYS } from "@/lib/swr-keys";
import { useRouter } from "next/navigation";
import { Route } from "next";
import { track, AnalyticsEvent } from "@/lib/analytics";
import { Notification, NotificationType } from "@/interfaces/settings";
import { errorHandlingFetcher } from "@/lib/fetcher";
import Text from "@/refresh-components/texts/Text";
import LineItem from "@/refresh-components/buttons/LineItem";
import { SvgSparkle, SvgRefreshCw, SvgX } from "@opal/icons";
import { IconProps } from "@opal/types";
import { Button } from "@opal/components";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { Section } from "@/layouts/general-layouts";
import Separator from "@/refresh-components/Separator";

function getNotificationIcon(
  notifType: string
): React.FunctionComponent<IconProps> {
  switch (notifType) {
    case NotificationType.REINDEX:
      return SvgRefreshCw;
    default:
      return SvgSparkle;
  }
}

interface NotificationsPopoverProps {
  onClose: () => void;
  onNavigate: () => void;
  onShowBuildIntro?: () => void;
}

export default function NotificationsPopover({
  onClose,
  onNavigate,
  onShowBuildIntro,
}: NotificationsPopoverProps) {
  const router = useRouter();
  const {
    data: notifications,
    mutate,
    isLoading,
  } = useSWR<Notification[]>(SWR_KEYS.notifications, errorHandlingFetcher);

  const handleNotificationClick = (notification: Notification) => {
    // Handle build_mode feature announcement specially - show intro animation
    if (
      notification.notif_type === NotificationType.FEATURE_ANNOUNCEMENT &&
      notification.additional_data?.feature === "build_mode" &&
      onShowBuildIntro
    ) {
      onNavigate();
      onShowBuildIntro();
      return;
    }

    const link = notification.additional_data?.link;
    if (!link) return;

    // Track release notes clicks
    if (notification.notif_type === NotificationType.RELEASE_NOTES) {
      track(AnalyticsEvent.RELEASE_NOTIFICATION_CLICKED, {
        version: notification.additional_data?.version,
      });
    }

    // External links open in new tab
    if (link.startsWith("http://") || link.startsWith("https://")) {
      if (!notification.dismissed) {
        handleDismiss(notification.id);
      }
      window.open(link, "_blank", "noopener,noreferrer");
      return;
    }

    // Relative links navigate internally
    onNavigate();
    router.push(link as Route);
  };

  const handleDismiss = async (
    notificationId: number,
    e?: React.MouseEvent
  ) => {
    e?.stopPropagation(); // Prevent triggering the LineItem onClick
    try {
      const response = await fetch(
        `/api/notifications/${notificationId}/dismiss`,
        {
          method: "POST",
        }
      );
      if (response.ok) {
        mutate(); // Refresh the notifications list
      }
    } catch (error) {
      console.error("Error dismissing notification:", error);
    }
  };

  return (
    <Section gap={0.5} padding={0.25}>
      <Section flexDirection="row" justifyContent="between" padding={0.5}>
        <Text headingH3>Notifications</Text>
        <Button icon={SvgX} prominence="tertiary" size="sm" onClick={onClose} />
      </Section>

      <Separator noPadding className="px-2" />

      <Section>
        {isLoading ? (
          <div className="h-48">
            <Section>
              <SimpleLoader />
            </Section>
          </div>
        ) : !notifications || notifications.length === 0 ? (
          <div className="h-48">
            <Section>
              <Text as="p" text03>
                No notifications
              </Text>
            </Section>
          </div>
        ) : (
          <div className="max-h-96 overflow-y-auto w-full">
            <Section alignItems="stretch" gap={0}>
              {notifications.map((notification) => (
                <LineItem
                  key={notification.id}
                  icon={getNotificationIcon(notification.notif_type)}
                  description={notification.description ?? undefined}
                  onClick={() => handleNotificationClick(notification)}
                  strikethrough={notification.dismissed}
                  rightChildren={
                    !notification.dismissed ? (
                      <Button
                        prominence="tertiary"
                        size="sm"
                        icon={SvgX}
                        onClick={(e) => handleDismiss(notification.id, e)}
                        tooltip="Dismiss"
                      />
                    ) : undefined
                  }
                >
                  {notification.title}
                </LineItem>
              ))}
            </Section>
          </div>
        )}
      </Section>
    </Section>
  );
}


================================================
FILE: web/src/sections/sidebar/ProjectFolderButton.tsx
================================================
"use client";

import React, { useState, memo } from "react";
import { Project, useProjectsContext } from "@/providers/ProjectsContext";
import { useDroppable } from "@dnd-kit/core";
import LineItem from "@/refresh-components/buttons/LineItem";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import ChatButton from "@/sections/sidebar/ChatButton";
import { useAppRouter } from "@/hooks/appNavigation";
import { cn, noProp } from "@/lib/utils";
import { DRAG_TYPES } from "./constants";
import { SidebarTab } from "@opal/components";
import IconButton from "@/refresh-components/buttons/IconButton";
import Truncated from "@/refresh-components/texts/Truncated";
import { Button } from "@opal/components";
import ButtonRenaming from "@/refresh-components/buttons/ButtonRenaming";
import type { IconProps } from "@opal/types";
import useAppFocus from "@/hooks/useAppFocus";
import {
  SvgEdit,
  SvgFolder,
  SvgFolderOpen,
  SvgFolderPartialOpen,
  SvgMoreHorizontal,
  SvgTrash,
} from "@opal/icons";

export interface ProjectFolderButtonProps {
  project: Project;
}

const ProjectFolderButton = memo(({ project }: ProjectFolderButtonProps) => {
  const route = useAppRouter();
  const [open, setOpen] = useState(false);
  const [deleteConfirmationModalOpen, setDeleteConfirmationModalOpen] =
    useState(false);
  const { renameProject, deleteProject } = useProjectsContext();
  const [isEditing, setIsEditing] = useState(false);
  const [popoverOpen, setPopoverOpen] = useState(false);
  const [isHoveringIcon, setIsHoveringIcon] = useState(false);
  const [allowHoverEffect, setAllowHoverEffect] = useState(true);
  const activeSidebar = useAppFocus();

  // Make project droppable
  const dropId = `project-${project.id}`;
  const { setNodeRef, isOver } = useDroppable({
    id: dropId,
    data: {
      type: DRAG_TYPES.PROJECT,
      project,
    },
  });

  function getFolderIcon(): React.FunctionComponent<IconProps> {
    if (open) {
      return SvgFolderOpen;
    } else {
      return isHoveringIcon && allowHoverEffect
        ? SvgFolderPartialOpen
        : SvgFolder;
    }
  }

  function handleIconClick() {
    setOpen((prev) => !prev);
    setAllowHoverEffect(false);
  }

  function handleIconHover(hovering: boolean) {
    setIsHoveringIcon(hovering);
    // Re-enable hover effects when cursor leaves the icon
    if (!hovering) {
      setAllowHoverEffect(true);
    }
  }

  function handleTextClick() {
    route({ projectId: project.id });
  }

  async function handleRename(newName: string) {
    await renameProject(project.id, newName);
  }

  const popoverItems = [
    <LineItem
      key="rename-project"
      icon={SvgEdit}
      onClick={noProp(() => setIsEditing(true))}
    >
      Rename Project
    </LineItem>,
    null,
    <LineItem
      key="delete-project"
      icon={SvgTrash}
      onClick={noProp(() => setDeleteConfirmationModalOpen(true))}
      danger
    >
      Delete Project
    </LineItem>,
  ];

  return (
    <div
      ref={setNodeRef}
      className={cn(
        "transition-colors duration-200",
        isOver && "bg-background-tint-03 rounded-08"
      )}
    >
      {/* Confirmation Modal (only for deletion) */}
      {deleteConfirmationModalOpen && (
        <ConfirmationModalLayout
          title="Delete Project"
          icon={SvgTrash}
          onClose={() => setDeleteConfirmationModalOpen(false)}
          submit={
            <Button
              variant="danger"
              onClick={() => {
                setDeleteConfirmationModalOpen(false);
                deleteProject(project.id);
              }}
            >
              Delete
            </Button>
          }
        >
          Are you sure you want to delete this project? This action cannot be
          undone.
        </ConfirmationModalLayout>
      )}

      {/* Project Folder */}
      <Popover onOpenChange={setPopoverOpen}>
        <Popover.Anchor>
          <SidebarTab
            icon={() => (
              <Button
                onMouseEnter={() => handleIconHover(true)}
                onMouseLeave={() => handleIconHover(false)}
                icon={getFolderIcon()}
                prominence="tertiary"
                size="sm"
                onClick={noProp(handleIconClick)}
              />
            )}
            selected={
              activeSidebar.isProject() &&
              activeSidebar.getId() === String(project.id)
            }
            onClick={noProp(handleTextClick)}
            rightChildren={
              <>
                <Popover.Trigger asChild onClick={noProp()}>
                  <div>
                    {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
                    <IconButton
                      icon={SvgMoreHorizontal}
                      className={cn(
                        !popoverOpen && "hidden",
                        !isEditing && "group-hover/SidebarTab:flex"
                      )}
                      transient={popoverOpen}
                      internal
                    />
                  </div>
                </Popover.Trigger>

                <Popover.Content side="right" align="end" width="md">
                  <PopoverMenu>{popoverItems}</PopoverMenu>
                </Popover.Content>
              </>
            }
          >
            {isEditing ? (
              <ButtonRenaming
                initialName={project.name}
                onRename={handleRename}
                onClose={() => setIsEditing(false)}
              />
            ) : (
              <Truncated text03>{project.name}</Truncated>
            )}
          </SidebarTab>
        </Popover.Anchor>
      </Popover>

      {/* Project Chat-Sessions */}
      {open &&
        project.chat_sessions.map((chatSession) => (
          <ChatButton
            key={chatSession.id}
            chatSession={chatSession}
            project={project}
            draggable
          />
        ))}
    </div>
  );
});
ProjectFolderButton.displayName = "ProjectFolderButton";

export default ProjectFolderButton;


================================================
FILE: web/src/sections/sidebar/SidebarBody.tsx
================================================
"use client";

import React from "react";
import OverflowDiv from "@/refresh-components/OverflowDiv";

export interface SidebarBodyProps {
  pinnedContent?: React.ReactNode;
  children?: React.ReactNode;
  footer?: React.ReactNode;
  /**
   * Unique key to enable scroll position persistence across navigation.
   * Pass this through from parent sidebar components (e.g., "admin-sidebar", "app-sidebar").
   */
  scrollKey: string;
}

export default function SidebarBody({
  pinnedContent,
  children,
  footer,
  scrollKey,
}: SidebarBodyProps) {
  return (
    <div className="flex flex-col min-h-0 h-full gap-3">
      {pinnedContent && <div className="px-2">{pinnedContent}</div>}
      <OverflowDiv className="gap-3 px-2" scrollKey={scrollKey}>
        {children}
      </OverflowDiv>
      {footer && <div className="px-2">{footer}</div>}
    </div>
  );
}


================================================
FILE: web/src/sections/sidebar/SidebarSection.tsx
================================================
"use client";

import React from "react";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";

export interface SidebarSectionProps {
  title: string;
  children?: React.ReactNode;
  action?: React.ReactNode;
  className?: string;
}

export default function SidebarSection({
  title,
  children,
  action,
  className,
}: SidebarSectionProps) {
  return (
    <div className={cn("flex flex-col group/SidebarSection", className)}>
      <div className="pl-2 pr-1.5 py-1 sticky top-[0rem] bg-background-tint-02 z-10 flex flex-row items-center justify-between min-h-[2rem]">
        <Text as="p" secondaryBody text02>
          {title}
        </Text>
        {action && (
          <div className="flex-shrink-0 opacity-0 group-hover/SidebarSection:opacity-100 transition-opacity">
            {action}
          </div>
        )}
      </div>
      <div>{children}</div>
    </div>
  );
}


================================================
FILE: web/src/sections/sidebar/SidebarWrapper.tsx
================================================
import React, { useMemo } from "react";
import { cn } from "@/lib/utils";
import { Button } from "@opal/components";
import Logo from "@/refresh-components/Logo";
import { SvgSidebar } from "@opal/icons";
import { useSettingsContext } from "@/providers/SettingsProvider";

interface LogoSectionProps {
  folded?: boolean;
  onFoldClick?: () => void;
}

function LogoSection({ folded, onFoldClick }: LogoSectionProps) {
  const settings = useSettingsContext();
  const logoDisplayStyle = settings.enterpriseSettings?.logo_display_style;

  const logo = useMemo(
    () => (
      <div className="px-1">
        <Logo folded={folded} size={28} />
      </div>
    ),
    [folded]
  );
  const closeButton = useMemo(
    () => (
      <div className="px-1">
        <Button
          icon={SvgSidebar}
          prominence="tertiary"
          tooltip="Close Sidebar"
          size="md"
          onClick={onFoldClick}
        />
      </div>
    ),
    [onFoldClick]
  );

  return (
    <div className="flex flex-row justify-between items-start pt-3 px-2">
      {folded === undefined ? (
        logo
      ) : folded && logoDisplayStyle !== "name_only" ? (
        <>
          <div className="group-hover/SidebarWrapper:hidden">{logo}</div>
          <div className="hidden group-hover/SidebarWrapper:flex">
            {closeButton}
          </div>
        </>
      ) : folded ? (
        closeButton
      ) : (
        <>
          {logo}
          {closeButton}
        </>
      )}
    </div>
  );
}

export interface SidebarWrapperProps {
  folded?: boolean;
  onFoldClick?: () => void;
  children?: React.ReactNode;
}

export default function SidebarWrapper({
  folded,
  onFoldClick,
  children,
}: SidebarWrapperProps) {
  return (
    // This extra `div` wrapping needs to be present (for some reason).
    // Without, the widths of the sidebars don't properly get set to the explicitly declared widths (i.e., `4rem` folded and `15rem` unfolded).
    <div>
      <div
        className={cn(
          "h-screen flex flex-col bg-background-tint-02 py-2 gap-4 group/SidebarWrapper transition-width duration-200 ease-in-out",
          folded ? "w-[3.25rem]" : "w-[15rem]"
        )}
      >
        <LogoSection folded={folded} onFoldClick={onFoldClick} />
        {children}
      </div>
    </div>
  );
}


================================================
FILE: web/src/sections/sidebar/StepSidebarWrapper.tsx
================================================
import { ReactNode } from "react";
import type { IconProps } from "@opal/types";
import { SidebarTab } from "@opal/components";
import SidebarWrapper from "@/sections/sidebar/SidebarWrapper";

export interface StepSidebarProps {
  children: ReactNode;
  buttonName: string;
  buttonIcon: React.FunctionComponent<IconProps>;
  buttonHref: string;
}

export default function StepSidebar({
  children,
  buttonName,
  buttonIcon,
  buttonHref,
}: StepSidebarProps) {
  return (
    <SidebarWrapper>
      <div className="px-2">
        <SidebarTab icon={buttonIcon} href={buttonHref}>
          {buttonName}
        </SidebarTab>
      </div>

      <div className="h-full w-full px-4">{children}</div>
    </SidebarWrapper>
  );
}


================================================
FILE: web/src/sections/sidebar/UpsertEmbeddingSidebar.tsx
================================================
import { useEmbeddingFormContext } from "@/components/context/EmbeddingContext";
import Text from "@/refresh-components/texts/Text";
import StepSidebar from "@/sections/sidebar/StepSidebarWrapper";
import { SvgSettings } from "@opal/icons";
export default function EmbeddingSidebar() {
  const { formStep, setFormStep } = useEmbeddingFormContext();

  const settingSteps = ["Embedding Model", "Reranking Model", "Advanced"];

  return (
    <StepSidebar
      buttonName="Index Settings"
      buttonIcon={SvgSettings}
      buttonHref="/admin/configuration/search"
    >
      <div className="relative">
        <div className="absolute h-[85%] left-[6px] top-[8px] bottom-0 w-0.5 bg-background-tint-04"></div>
        {settingSteps.map((step, index) => {
          const allowed = true; // All steps are always allowed for embedding configuration

          return (
            <div
              key={index}
              className={`flex items-center mb-6 relative ${
                !allowed ? "cursor-not-allowed" : "cursor-pointer"
              }`}
              onClick={() => {
                if (allowed) {
                  setFormStep(index);
                }
              }}
            >
              <div className="flex-shrink-0 mr-4 z-10">
                <div
                  className={`rounded-full h-3.5 w-3.5 flex items-center justify-center ${
                    allowed ? "bg-blue-500" : "bg-background-tint-04"
                  }`}
                >
                  {formStep === index && (
                    <div className="h-2 w-2 rounded-full bg-white"></div>
                  )}
                </div>
              </div>
              <Text as="p" text04={index <= formStep} text02={index > formStep}>
                {step}
              </Text>
            </div>
          );
        })}
      </div>
    </StepSidebar>
  );
}


================================================
FILE: web/src/sections/sidebar/UserAvatarPopover.tsx
================================================
"use client";

import { useState } from "react";
import { LOGOUT_DISABLED } from "@/lib/constants";
import { Notification } from "@/interfaces/settings";
import useSWR, { preload } from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { SWR_KEYS } from "@/lib/swr-keys";
import { checkUserIsNoAuthUser, getUserDisplayName, logout } from "@/lib/user";
import { useUser } from "@/providers/UserProvider";
import LineItem from "@/refresh-components/buttons/LineItem";
import Popover, { PopoverMenu } from "@/refresh-components/Popover";
import { usePathname, useRouter, useSearchParams } from "next/navigation";
import { SidebarTab } from "@opal/components";
import NotificationsPopover from "@/sections/sidebar/NotificationsPopover";
import {
  SvgBell,
  SvgExternalLink,
  SvgLogOut,
  SvgUser,
  SvgNotificationBubble,
} from "@opal/icons";
import { Section } from "@/layouts/general-layouts";
import { toast } from "@/hooks/useToast";
import useAppFocus from "@/hooks/useAppFocus";
import { useVectorDbEnabled } from "@/providers/SettingsProvider";
import UserAvatar from "@/refresh-components/avatars/UserAvatar";

interface SettingsPopoverProps {
  onUserSettingsClick: () => void;
  onOpenNotifications: () => void;
}

function SettingsPopover({
  onUserSettingsClick,
  onOpenNotifications,
}: SettingsPopoverProps) {
  const { user } = useUser();
  const { data: notifications } = useSWR<Notification[]>(
    SWR_KEYS.notifications,
    errorHandlingFetcher,
    { revalidateOnFocus: false }
  );
  const router = useRouter();
  const pathname = usePathname();
  const searchParams = useSearchParams();
  const undismissedCount =
    notifications?.filter((n) => !n.dismissed).length ?? 0;
  const isAnonymousUser =
    user?.is_anonymous_user || checkUserIsNoAuthUser(user?.id ?? "");
  const showLogout = user && !isAnonymousUser && !LOGOUT_DISABLED;
  const showLogin = isAnonymousUser;

  const handleLogin = () => {
    const currentUrl = `${pathname}${
      searchParams?.toString() ? `?${searchParams.toString()}` : ""
    }`;
    const encodedRedirect = encodeURIComponent(currentUrl);
    router.push(`/auth/login?next=${encodedRedirect}`);
  };

  const handleLogout = () => {
    logout()
      .then((response) => {
        if (!response?.ok) {
          alert("Failed to logout");
          return;
        }

        const currentUrl = `${pathname}${
          searchParams?.toString() ? `?${searchParams.toString()}` : ""
        }`;

        const encodedRedirect = encodeURIComponent(currentUrl);

        router.push(
          `/auth/login?disableAutoRedirect=true&next=${encodedRedirect}`
        );
      })

      .catch(() => {
        toast.error("Failed to logout");
      });
  };

  return (
    <>
      <PopoverMenu>
        {[
          <div key="user-settings" data-testid="Settings/user-settings">
            <LineItem
              icon={SvgUser}
              href="/app/settings"
              onClick={onUserSettingsClick}
            >
              User Settings
            </LineItem>
          </div>,
          <LineItem
            key="notifications"
            icon={SvgBell}
            onClick={onOpenNotifications}
          >
            {`Notifications${
              undismissedCount > 0 ? ` (${undismissedCount})` : ""
            }`}
          </LineItem>,
          <LineItem
            key="help-faq"
            icon={SvgExternalLink}
            href="https://docs.onyx.app"
            target="_blank"
            rel="noopener noreferrer"
          >
            Help & FAQ
          </LineItem>,
          null,
          showLogin && (
            <LineItem key="log-in" icon={SvgUser} onClick={handleLogin}>
              Log in
            </LineItem>
          ),
          showLogout && (
            <LineItem
              key="log-out"
              icon={SvgLogOut}
              danger
              onClick={handleLogout}
            >
              Log out
            </LineItem>
          ),
        ]}
      </PopoverMenu>
    </>
  );
}

export interface SettingsProps {
  folded?: boolean;
  onShowBuildIntro?: () => void;
}

export default function UserAvatarPopover({
  folded,
  onShowBuildIntro,
}: SettingsProps) {
  const [popupState, setPopupState] = useState<
    "Settings" | "Notifications" | undefined
  >(undefined);
  const { user } = useUser();
  const appFocus = useAppFocus();
  const vectorDbEnabled = useVectorDbEnabled();

  // Fetch notifications for display
  // The GET endpoint also triggers a refresh if release notes are stale
  const { data: notifications } = useSWR<Notification[]>(
    SWR_KEYS.notifications,
    errorHandlingFetcher
  );

  const userDisplayName = getUserDisplayName(user);
  const undismissedCount =
    notifications?.filter((n) => !n.dismissed).length ?? 0;
  const hasNotifications = undismissedCount > 0;

  const handlePopoverOpen = (state: boolean) => {
    if (state) {
      // Prefetch user settings data when popover opens for instant modal display
      preload("/api/user/pats", errorHandlingFetcher);
      preload("/api/federated/oauth-status", errorHandlingFetcher);
      if (vectorDbEnabled) {
        preload("/api/manage/connector-status", errorHandlingFetcher);
      }
      preload("/api/llm/provider", errorHandlingFetcher);
      setPopupState("Settings");
    } else {
      setPopupState(undefined);
    }
  };

  return (
    <Popover open={!!popupState} onOpenChange={handlePopoverOpen}>
      <Popover.Trigger asChild>
        <div id="onyx-user-dropdown">
          <SidebarTab
            icon={() => (
              <div className="w-[16px] flex flex-col justify-center items-center">
                <UserAvatar user={user} size={18} />
              </div>
            )}
            rightChildren={
              hasNotifications ? (
                <Section padding={0.5}>
                  <SvgNotificationBubble size={6} />
                </Section>
              ) : undefined
            }
            type="button"
            selected={!!popupState || appFocus.isUserSettings()}
            folded={folded}
          >
            {userDisplayName}
          </SidebarTab>
        </div>
      </Popover.Trigger>

      <Popover.Content
        align="end"
        side="right"
        width={popupState === "Notifications" ? "xl" : "md"}
      >
        {popupState === "Settings" && (
          <SettingsPopover
            onUserSettingsClick={() => {
              setPopupState(undefined);
            }}
            onOpenNotifications={() => setPopupState("Notifications")}
          />
        )}
        {popupState === "Notifications" && (
          <NotificationsPopover
            onClose={() => setPopupState("Settings")}
            onNavigate={() => setPopupState(undefined)}
            onShowBuildIntro={onShowBuildIntro}
          />
        )}
      </Popover.Content>
    </Popover>
  );
}


================================================
FILE: web/src/sections/sidebar/chatSearchUtils.ts
================================================
import React from "react";

/**
 * Escapes special regex characters in a string
 */
function escapeRegex(str: string): string {
  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

/**
 * Highlights matched portions of text based on search query.
 * Matched portions get text-05 (highlighted), non-matched stay as default.
 */
export function highlightMatch(text: string, query: string): React.ReactNode {
  if (!query.trim()) return text;

  const escapedQuery = escapeRegex(query.trim());
  const regex = new RegExp(`(${escapedQuery})`, "gi");
  const parts = text.split(regex);

  if (parts.length === 1) return text; // No matches

  return parts.map((part, i) =>
    i % 2 === 1
      ? React.createElement("span", { key: i, className: "text-text-05" }, part)
      : React.createElement(React.Fragment, { key: i }, part)
  );
}

/**
 * Formats a date string for display in the chat search menu.
 * Examples: "just now", "5 mins ago", "3 hours ago", "yesterday", "3 days ago", "October 23"
 */
export function formatDisplayTime(isoDate: string): string {
  const date = new Date(isoDate);
  const now = new Date();
  const diffMs = now.getTime() - date.getTime();

  if (diffMs < 0) {
    return "just now";
  }

  const diffMins = Math.floor(diffMs / (1000 * 60));
  const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
  const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));

  // Just now (less than 1 minute)
  if (diffMins < 1) {
    return "just now";
  }

  // X mins ago (1-59 minutes)
  if (diffMins < 60) {
    return `${diffMins} ${diffMins === 1 ? "min" : "mins"} ago`;
  }

  // X hours ago (1-23 hours)
  if (diffHours < 24) {
    return `${diffHours} ${diffHours === 1 ? "hour" : "hours"} ago`;
  }

  // Check if yesterday
  const yesterday = new Date(now);
  yesterday.setDate(yesterday.getDate() - 1);
  if (
    date.getDate() === yesterday.getDate() &&
    date.getMonth() === yesterday.getMonth() &&
    date.getFullYear() === yesterday.getFullYear()
  ) {
    return "yesterday";
  }

  // X days ago (2-7 days)
  if (diffDays <= 7) {
    return `${diffDays} ${diffDays === 1 ? "day" : "days"} ago`;
  }

  // Month Day format (e.g., "October 23")
  return date.toLocaleDateString("en-US", {
    month: "long",
    day: "numeric",
  });
}


================================================
FILE: web/src/sections/sidebar/constants.ts
================================================
export const DRAG_TYPES = {
  CHAT: "chat",
  PROJECT: "project",
  RECENTS: "recents",
} as const;

export const LOCAL_STORAGE_KEYS = {
  HIDE_MOVE_CUSTOM_AGENT_MODAL: "onyx:hideMoveCustomAgentModal",
} as const;

export const DEFAULT_PERSONA_ID = 0;

export const FEATURE_FLAGS = {
  CRAFT_ANIMATION_DISABLED: "craft-animation-disabled",
} as const;


================================================
FILE: web/src/sections/sidebar/sidebarUtils.ts
================================================
import { ChatSession } from "@/app/app/interfaces";
import { LOCAL_STORAGE_KEYS, DEFAULT_PERSONA_ID } from "./constants";
import { moveChatSession } from "@/app/app/projects/projectsService";
import { toast } from "@/hooks/useToast";

export const shouldShowMoveModal = (chatSession: ChatSession): boolean => {
  const hideModal =
    typeof window !== "undefined" &&
    window.localStorage.getItem(
      LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL
    ) === "true";

  return !hideModal && chatSession.persona_id !== DEFAULT_PERSONA_ID;
};

export const showErrorNotification = (message: string) => {
  toast.error(message);
};

export interface MoveOperationParams {
  chatSession: ChatSession;
  targetProjectId: number;
  refreshChatSessions: () => Promise<any>;
  refreshCurrentProjectDetails: () => Promise<any>;
  fetchProjects: () => Promise<any>;
  currentProjectId: number | null;
}

export const handleMoveOperation = async ({
  chatSession,
  targetProjectId,
  refreshChatSessions,
  refreshCurrentProjectDetails,
  fetchProjects,
  currentProjectId,
}: MoveOperationParams) => {
  try {
    await moveChatSession(targetProjectId, chatSession.id);
    const projectRefreshPromise = currentProjectId
      ? refreshCurrentProjectDetails()
      : fetchProjects();
    await Promise.all([refreshChatSessions(), projectRefreshPromise]);
  } catch (error) {
    console.error("Failed to perform move operation:", error);
    toast.error("Failed to move chat. Please try again.");
    throw error;
  }
};


================================================
FILE: web/src/sections/sidebar/useChatSearchOptimistic.ts
================================================
import { useState, useEffect, useCallback, useMemo, useRef } from "react";
import useSWRInfinite from "swr/infinite";
import useChatSessions from "@/hooks/useChatSessions";
import { useProjects } from "@/lib/hooks/useProjects";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ChatSearchResponse } from "@/app/app/interfaces";
import { UNNAMED_CHAT } from "@/lib/constants";

export interface FilterableChat {
  id: string;
  label: string;
  time: string;
}

interface UseChatSearchOptimisticOptions {
  searchQuery: string;
  enabled?: boolean;
}

interface UseChatSearchOptimisticResult {
  results: FilterableChat[];
  isSearching: boolean;
  hasMore: boolean;
  fetchMore: () => Promise<void>;
  isLoadingMore: boolean;
  sentinelRef: React.RefObject<HTMLDivElement | null>;
}

const PAGE_SIZE = 20;
const DEBOUNCE_MS = 300;

// --- Helper Functions ---

function transformApiResponse(response: ChatSearchResponse): FilterableChat[] {
  const chats: FilterableChat[] = [];
  for (const group of response.groups) {
    for (const chat of group.chats) {
      chats.push({
        id: chat.id,
        label: chat.name || UNNAMED_CHAT,
        time: chat.time_created,
      });
    }
  }
  return chats;
}

function filterLocalSessions(
  sessions: FilterableChat[],
  searchQuery: string
): FilterableChat[] {
  if (!searchQuery.trim()) {
    return sessions;
  }
  const term = searchQuery.toLowerCase();
  return sessions.filter((chat) => chat.label.toLowerCase().includes(term));
}

// --- Hook ---

export function useChatSearchOptimistic(
  options: UseChatSearchOptimisticOptions
): UseChatSearchOptimisticResult {
  const { searchQuery, enabled = true } = options;

  // Debounced search query for API calls
  const [debouncedQuery, setDebouncedQuery] = useState(searchQuery);

  // Ref for infinite scroll sentinel
  const sentinelRef = useRef<HTMLDivElement | null>(null);

  // 1. Get already-cached data from existing hooks
  const { chatSessions } = useChatSessions();
  const { projects } = useProjects();

  // 2. Build combined fallback data (instant display)
  const fallbackSessions = useMemo<FilterableChat[]>(() => {
    const chatMap = new Map<string, FilterableChat>();

    // Add regular chats from useChatSessions
    for (const chat of chatSessions) {
      chatMap.set(chat.id, {
        id: chat.id,
        label: chat.name || UNNAMED_CHAT,
        time: chat.time_updated || chat.time_created,
      });
    }

    // Add project chats from useProjects
    for (const project of projects) {
      for (const chat of project.chat_sessions) {
        chatMap.set(chat.id, {
          id: chat.id,
          label: chat.name || UNNAMED_CHAT,
          time: chat.time_updated || chat.time_created,
        });
      }
    }

    // Sort by most recent
    return Array.from(chatMap.values()).sort(
      (a, b) => new Date(b.time).getTime() - new Date(a.time).getTime()
    );
  }, [chatSessions, projects]);

  // Debounce the search query
  useEffect(() => {
    const timer = setTimeout(() => setDebouncedQuery(searchQuery), DEBOUNCE_MS);
    return () => clearTimeout(timer);
  }, [searchQuery]);

  // 3. SWR key generator for infinite scroll
  const getKey = useCallback(
    (pageIndex: number, previousPageData: ChatSearchResponse | null) => {
      // Don't fetch if not enabled
      if (!enabled) return null;

      // Reached the end
      if (previousPageData && !previousPageData.has_more) return null;

      const page = pageIndex + 1;
      const params = new URLSearchParams();
      params.set("page", page.toString());
      params.set("page_size", PAGE_SIZE.toString());

      if (debouncedQuery.trim()) {
        params.set("query", debouncedQuery);
      }

      return `/api/chat/search?${params.toString()}`;
    },
    [enabled, debouncedQuery]
  );

  // 4. Use SWR for paginated data (replaces fallback after fetch)
  const { data, size, setSize, isValidating } =
    useSWRInfinite<ChatSearchResponse>(getKey, errorHandlingFetcher, {
      revalidateOnFocus: false,
      dedupingInterval: 30000,
      revalidateFirstPage: false,
      persistSize: true,
    });

  // Transform SWR data to FilterableChat[]
  const swrResults = useMemo<FilterableChat[]>(() => {
    if (!data || data.length === 0) return [];

    const allChats: FilterableChat[] = [];
    for (const page of data) {
      allChats.push(...transformApiResponse(page));
    }

    // Deduplicate by id (keep first occurrence)
    const seen = new Set<string>();
    return allChats.filter((chat) => {
      if (seen.has(chat.id)) return false;
      seen.add(chat.id);
      return true;
    });
  }, [data]);

  // Determine if we have more pages
  const hasMore = useMemo(() => {
    if (!data || data.length === 0) return true;
    const lastPage = data[data.length - 1];
    return lastPage?.has_more ?? false;
  }, [data]);

  // 5. Return fallback if no SWR data yet, otherwise return SWR data
  const results = useMemo<FilterableChat[]>(() => {
    // If SWR has data, use it (paginated, searchable)
    if (swrResults.length > 0) {
      return swrResults;
    }

    // Otherwise use fallback (already-cached data)
    // Apply local filtering if there's a search query
    if (searchQuery.trim()) {
      return filterLocalSessions(fallbackSessions, searchQuery);
    }

    return fallbackSessions;
  }, [swrResults, fallbackSessions, searchQuery]);

  // Loading states
  const isSearching = isValidating && size === 1;
  const isLoadingMore = isValidating && size > 1;

  // Fetch more results for infinite scroll
  const fetchMore = useCallback(async () => {
    if (!enabled || isValidating || !hasMore) {
      return;
    }
    await setSize(size + 1);
  }, [enabled, isValidating, hasMore, setSize, size]);

  // IntersectionObserver for infinite scroll
  useEffect(() => {
    const sentinel = sentinelRef.current;
    if (!sentinel || !enabled) return;

    const observer = new IntersectionObserver(
      (entries) => {
        const entry = entries[0];
        if (entry?.isIntersecting && hasMore && !isValidating) {
          fetchMore();
        }
      },
      {
        root: null,
        rootMargin: "100px",
        threshold: 0,
      }
    );

    observer.observe(sentinel);

    return () => {
      observer.disconnect();
    };
  }, [enabled, hasMore, isValidating, fetchMore]);

  return {
    results,
    isSearching,
    hasMore,
    fetchMore,
    isLoadingMore,
    sentinelRef,
  };
}


================================================
FILE: web/src/types.ts
================================================
/**
 * Utility type that removes style override properties from a component's props.
 *
 * This type omits `className` and `style` properties from type `T`, preventing
 * external style customization. Useful for enforcing consistent design system
 * styling and preventing arbitrary style overrides.
 *
 * @template T - The base type to remove style properties from
 *
 * @example
 * ```tsx
 * // Create a button that doesn't allow style overrides
 * interface ButtonProps extends WithoutStyles<React.ComponentProps<"button">> {
 *   variant: "primary" | "secondary";
 * }
 *
 * function Button({ variant, ...props }: ButtonProps) {
 *   // Users cannot pass className or style props
 *   return <button {...props} className={getVariantClass(variant)} />;
 * }
 *
 * // ✅ Valid
 * <Button variant="primary" onClick={handleClick} />
 *
 * // ❌ TypeScript error - className not allowed
 * <Button variant="primary" className="custom-class" />
 * ```
 */
export type WithoutStyles<T> = Omit<T, "className" | "style">;


================================================
FILE: web/tailwind-themes/tailwind.config.js
================================================
const plugin = require("tailwindcss/plugin");

/** @type {import('tailwindcss').Config} */

module.exports = {
  darkMode: "class",
  content: ["./src/**/*.{js,jsx,ts,tsx}", "./lib/opal/**/*.{js,jsx,ts,tsx}"],
  theme: {
    container: {
      center: true,
    },
    transparent: "transparent",
    current: "currentColor",
    extend: {
      lineClamp: {
        7: "7",
        8: "8",
        9: "9",
        10: "10",
      },
      transitionProperty: {
        spacing: "margin, padding",
      },
      keyframes: {
        shimmer: {
          "0%": { backgroundPosition: "100% 0" },
          "100%": { backgroundPosition: "-100% 0" },
        },
        "subtle-pulse": {
          "0%, 100%": { opacity: 0.9 },
          "50%": { opacity: 0.5 },
        },
        pulse: {
          "0%, 100%": { opacity: 0.9 },
          "50%": { opacity: 0.4 },
        },
        "fade-in-scale": {
          "0%": { opacity: "0", transform: "scale(0.95)" },
          "100%": { opacity: "1", transform: "scale(1)" },
        },
        "fade-out-scale": {
          "0%": { opacity: "1", transform: "scale(1)" },
          "100%": { opacity: "0", transform: "scale(0.95)" },
        },
        "collapsible-down": {
          from: { height: "0" },
          to: { height: "var(--radix-collapsible-content-height)" },
        },
        "collapsible-up": {
          from: { height: "var(--radix-collapsible-content-height)" },
          to: { height: "0" },
        },
      },
      animation: {
        shimmer: "shimmer 1.8s ease-out infinite",
        "fade-in-up": "fadeInUp 0.5s ease-out",
        "subtle-pulse": "subtle-pulse 2s ease-in-out infinite",
        pulse: "pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite",
        "fade-in-scale": "fade-in-scale 0.2s ease-out forwards",
        "fade-out-scale": "fade-out-scale 0.2s ease-in forwards",
        "collapsible-down": "collapsible-down 0.2s ease-out",
        "collapsible-up": "collapsible-up 0.2s ease-out",
      },
      gradientColorStops: {
        "neutral-10": "var(--neutral-10) 5%",
      },
      screens: {
        "2xl": "1420px",
        "3xl": "1700px",
        "4xl": "2000px",
        mobile: { max: "767px" },
        desktop: "768px",
        tall: { raw: "(min-height: 800px)" },
        short: { raw: "(max-height: 799px)" },
        "very-short": { raw: "(max-height: 600px)" },
      },
      fontFamily: {
        sans: ["Hanken Grotesk", "sans-serif"],
        hanken: ["Hanken Grotesk", "sans-serif"],
      },
      width: {
        "message-xs": "450px",
        "message-sm": "550px",
        "message-default": "740px",
        "searchbar-xs": "560px",
        "searchbar-sm": "660px",
        searchbar: "850px",
        "document-sidebar": "800px",
        "document-sidebar-large": "1000px",
        "searchbar-max": "60px",
      },
      maxWidth: {
        "document-sidebar": "1000px",
        "message-max": "850px",
        "content-max": "725px",
        "searchbar-max": "800px",
      },
      colors: {
        // New and updated Figma stylings
        "text-05": "var(--text-05)",
        "text-04": "var(--text-04)",
        "text-03": "var(--text-03)",
        "text-02": "var(--text-02)",
        "text-01": "var(--text-01)",
        "text-inverted-01": "var(--text-inverted-01)",
        "text-inverted-02": "var(--text-inverted-02)",
        "text-inverted-03": "var(--text-inverted-03)",
        "text-inverted-04": "var(--text-inverted-04)",
        "text-inverted-05": "var(--text-inverted-05)",
        "text-light-03": "var(--text-light-03)",
        "text-light-05": "var(--text-light-05)",
        "text-dark-03": "var(--text-dark-03)",
        "text-dark-05": "var(--text-dark-05)",
        "background-neutral-00": "var(--background-neutral-00)",
        "background-neutral-01": "var(--background-neutral-01)",
        "background-neutral-02": "var(--background-neutral-02)",
        "background-neutral-03": "var(--background-neutral-03)",
        "background-neutral-04": "var(--background-neutral-04)",
        "background-neutral-light-00": "var(--background-neutral-light-00)",
        "background-neutral-light-03": "var(--background-neutral-light-03)",
        "background-neutral-dark-03": "var(--background-neutral-dark-03)",
        "background-neutral-inverted-04":
          "var(--background-neutral-inverted-04)",
        "background-neutral-inverted-03":
          "var(--background-neutral-inverted-03)",
        "background-neutral-inverted-02":
          "var(--background-neutral-inverted-02)",
        "background-neutral-inverted-01":
          "var(--background-neutral-inverted-01)",
        "background-neutral-inverted-00":
          "var(--background-neutral-inverted-00)",
        "background-tint-00": "var(--background-tint-00)",
        "background-tint-01": "var(--background-tint-01)",
        "background-tint-02": "var(--background-tint-02)",
        "background-tint-03": "var(--background-tint-03)",
        "background-tint-04": "var(--background-tint-04)",
        "background-tint-inverted-04": "var(--background-tint-inverted-04)",
        "background-tint-inverted-03": "var(--background-tint-inverted-03)",
        "background-tint-inverted-02": "var(--background-tint-inverted-02)",
        "background-tint-inverted-01": "var(--background-tint-inverted-01)",
        "background-tint-inverted-00": "var(--background-tint-inverted-00)",
        "border-01": "var(--border-01)",
        "border-02": "var(--border-02)",
        "border-03": "var(--border-03)",
        "border-04": "var(--border-04)",
        "border-05": "var(--border-05)",
        "border-inverted-05": "var(--border-inverted-05)",
        "border-inverted-04": "var(--border-inverted-04)",
        "border-inverted-03": "var(--border-inverted-03)",
        "border-inverted-02": "var(--border-inverted-02)",
        "border-inverted-01": "var(--border-inverted-01)",
        "action-link-06": "var(--action-link-06)",
        "action-link-05": "var(--action-link-05)",
        "action-link-04": "var(--action-link-04)",
        "action-link-03": "var(--action-link-03)",
        "action-link-02": "var(--action-link-02)",
        "action-link-01": "var(--action-link-01)",
        "action-link-00": "var(--action-link-00)",
        "action-danger-06": "var(--action-danger-06)",
        "action-danger-05": "var(--action-danger-05)",
        "action-danger-04": "var(--action-danger-04)",
        "action-danger-03": "var(--action-danger-03)",
        "action-danger-02": "var(--action-danger-02)",
        "action-danger-01": "var(--action-danger-01)",
        "action-text-link-05": "var(--action-text-link-05)",
        "action-text-danger-05": "var(--action-text-danger-05)",
        "highlight-match": "var(--highlight-match)",
        "highlight-selection": "var(--highlight-selection)",
        "highlight-active": "var(--highlight-active)",
        "highlight-accent": "var(--highlight-accent)",
        "theme-primary-06": "var(--theme-primary-06)",
        "theme-primary-05": "var(--theme-primary-05)",
        "theme-primary-04": "var(--theme-primary-04)",
        "theme-gradient-05": "var(--theme-gradient-05)",
        "theme-gradient-00": "var(--theme-gradient-00)",
        "theme-red-05": "var(--theme-red-05)",
        "theme-red-04": "var(--theme-red-04)",
        "theme-red-02": "var(--theme-red-02)",
        "theme-red-01": "var(--theme-red-01)",
        "theme-orange-05": "var(--theme-orange-05)",
        "theme-orange-04": "var(--theme-orange-04)",
        "theme-orange-02": "var(--theme-orange-02)",
        "theme-orange-01": "var(--theme-orange-01)",
        "theme-amber-05": "var(--theme-amber-05)",
        "theme-amber-04": "var(--theme-amber-04)",
        "theme-amber-02": "var(--theme-amber-02)",
        "theme-amber-01": "var(--theme-amber-01)",
        "theme-yellow-05": "var(--theme-yellow-05)",
        "theme-yellow-02": "var(--theme-yellow-02)",
        "theme-yellow-01": "var(--theme-yellow-01)",
        "theme-green-05": "var(--theme-green-05)",
        "theme-green-02": "var(--theme-green-02)",
        "theme-green-01": "var(--theme-green-01)",
        "theme-lime-05": "var(--theme-lime-05)",
        "theme-lime-02": "var(--theme-lime-02)",
        "theme-lime-01": "var(--theme-lime-01)",
        "theme-cyan-05": "var(--theme-cyan-05)",
        "theme-cyan-02": "var(--theme-cyan-02)",
        "theme-cyan-01": "var(--theme-cyan-01)",
        "theme-sky-05": "var(--theme-sky-05)",
        "theme-sky-02": "var(--theme-sky-02)",
        "theme-sky-01": "var(--theme-sky-01)",
        "theme-blue-05": "var(--theme-blue-05)",
        "theme-blue-02": "var(--theme-blue-02)",
        "theme-blue-01": "var(--theme-blue-01)",
        "theme-purple-05": "var(--theme-purple-05)",
        "theme-purple-02": "var(--theme-purple-02)",
        "theme-purple-01": "var(--theme-purple-01)",
        "theme-magenta-05": "var(--theme-magenta-05)",
        "theme-magenta-02": "var(--theme-magenta-02)",
        "theme-magenta-01": "var(--theme-magenta-01)",
        "onyx-ink-100": "var(--onyx-ink-100)",
        "onyx-ink-95": "var(--onyx-ink-95)",
        "onyx-ink-90": "var(--onyx-ink-90)",
        "onyx-chrome-20": "var(--onyx-chrome-20)",
        "onyx-chrome-10": "var(--onyx-chrome-10)",
        "onyx-chrome-00": "var(--onyx-chrome-00)",
        "tint-98": "var(--tint-98)",
        "tint-95": "var(--tint-95)",
        "tint-90": "var(--tint-90)",
        "tint-85": "var(--tint-85)",
        "tint-80": "var(--tint-80)",
        "tint-60": "var(--tint-60)",
        "tint-50": "var(--tint-50)",
        "tint-40": "var(--tint-40)",
        "tint-20": "var(--tint-20)",
        "tint-10": "var(--tint-10)",
        "tint-05": "var(--tint-05)",
        "tint-02": "var(--tint-02)",
        "shadow-01": "var(--shadow-01)",
        "shadow-02": "var(--shadow-02)",
        "shadow-03": "var(--shadow-03)",
        "mask-01": "var(--mask-01)",
        "mask-02": "var(--mask-02)",
        "mask-03": "var(--mask-03)",
        "status-info-05": "var(--status-info-05)",
        "status-info-02": "var(--status-info-02)",
        "status-info-01": "var(--status-info-01)",
        "status-info-00": "var(--status-info-00)",
        "status-success-05": "var(--status-success-05)",
        "status-success-02": "var(--status-success-02)",
        "status-success-01": "var(--status-success-01)",
        "status-success-00": "var(--status-success-00)",
        "status-warning-05": "var(--status-warning-05)",
        "status-warning-02": "var(--status-warning-02)",
        "status-warning-01": "var(--status-warning-01)",
        "status-warning-00": "var(--status-warning-00)",
        "status-error-05": "var(--status-error-05)",
        "status-error-02": "var(--status-error-02)",
        "status-error-01": "var(--status-error-01)",
        "status-error-00": "var(--status-error-00)",
        "status-text-success-05": "var(--status-text-success-05)",
        "status-text-info-05": "var(--status-text-info-05)",
        "status-text-warning-05": "var(--status-text-warning-05)",
        "status-text-error-05": "var(--status-text-error-05)",

        "code-code": "var(--code-code)",
        "code-comment": "var(--code-comment)",
        "code-keyword": "var(--code-keyword)",
        "code-string": "var(--code-string)",
        "code-number": "var(--code-number)",
        "code-definition": "var(--code-definition)",
        "background-code-01": "var(--background-code-01)",

        // Shimmer colors for loading animations
        "shimmer-base": "var(--shimmer-base)",
        "shimmer-highlight": "var(--shimmer-highlight)",

        // Tailwind defaults
        background: "var(--background-tint-01)",
        foreground: "var(--background-tint-inverted-01)",
        border: "var(--border-01)",
        text: "var(--text-04)",
      },
      borderRadius: {
        "02": "var(--border-radius-02)",
        "04": "var(--border-radius-04)",
        "08": "var(--border-radius-08)",
        12: "var(--border-radius-12)",
        16: "var(--border-radius-16)",
        full: "var(--border-radius-full)",
      },
      fontSize: {
        "2xs": "0.625rem",
        "code-sm": "small",
      },
      fontWeight: {
        description: "375",
        "token-bold": "bold",
      },
      fontStyle: {
        "token-italic": "italic",
      },
      backdropBlur: {
        "01": "var(--backdrop-blur-01)",
        "02": "var(--backdrop-blur-02)",
        "03": "var(--backdrop-blur-03)",
      },
      calendar: {
        // Light mode
        "bg-selected": "var(--calendar-bg-selected)",
        "bg-outside-selected": "var(--calendar-bg-outside-selected)",
        "text-muted": "var(--calendar-text-muted)",
        "text-selected": "var(--calendar-text-selected)",
        "range-start": "var(--calendar-range-start)",
        "range-middle": "var(--calendar-range-middle)",
        "range-end": "var(--calendar-range-end)",
        "text-in-range": "var(--calendar-text-in-range)",

        // Dark mode
        "bg-selected-dark": "var(--calendar-bg-selected-dark)",
        "bg-outside-selected-dark": "var(--calendar-bg-outside-selected-dark)",
        "text-muted-dark": "var(--calendar-text-muted-dark)",
        "text-selected-dark": "var(--calendar-text-selected-dark)",
        "range-start-dark": "var(--calendar-range-start-dark)",
        "range-middle-dark": "var(--calendar-range-middle-dark)",
        "range-end-dark": "var(--calendar-range-end-dark)",
        "text-in-range-dark": "var(--calendar-text-in-range-dark)",

        // Hover effects
        "hover-bg": "var(--calendar-hover-bg)",
        "hover-bg-dark": "var(--calendar-hover-bg-dark)",
        "hover-text": "var(--calendar-hover-text)",
        "hover-text-dark": "var(--calendar-hover-text-dark)",

        // Today's date
        "today-bg": "var(--calendar-today-bg)",
        "today-bg-dark": "var(--calendar-today-bg-dark)",
        "today-text": "var(--calendar-today-text)",
        "today-text-dark": "var(--calendar-today-text-dark)",
      },
    },
  },
  safelist: [
    {
      pattern:
        /^(bg-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
      variants: ["hover", "ui-selected"],
    },
    {
      pattern:
        /^(text-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
      variants: ["hover", "ui-selected"],
    },
    {
      pattern:
        /^(border-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
      variants: ["hover", "ui-selected"],
    },
    {
      pattern:
        /^(ring-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
    },
    {
      pattern:
        /^(stroke-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
    },
    {
      pattern:
        /^(fill-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
    },
  ],
  // Note: @tailwindcss/container-queries plugin is not needed here.
  // Container queries (@container, cqw units) are native in Tailwind v4+.
  // After upgrading to Tailwind 4, this comment can be removed.
  plugins: [
    require("@tailwindcss/typography"),
    require("@headlessui/tailwindcss"),
    require("tailwindcss-animate"),
    require("@tailwindcss/container-queries"),
    plugin(({ addVariant }) => {
      addVariant("focus-within-nonactive", "&:focus-within:not(:active)");
    }),
    plugin(({ addUtilities }) => {
      addUtilities({
        ".break-anywhere": {
          "overflow-wrap": "anywhere",
        },
      });
    }),
  ],
};


================================================
FILE: web/tailwind.config.js
================================================
var merge = require("lodash/merge");
var path = require("path");
var fs = require("fs");
var { createRequire } = require("module");

// Use relative paths for imports
const baseThemes = require("./tailwind-themes/tailwind.config.js");

let customThemes = null;

// Determine which theme to load: custom theme if specified, otherwise default
const themeName = process.env.NEXT_PUBLIC_THEME || "default";
const customThemePath = path.join(
  __dirname,
  "tailwind-themes/custom",
  themeName,
  "tailwind.config.js"
);

if (fs.existsSync(customThemePath)) {
  // Use createRequire to avoid bundler static analysis without using eval
  const dynamicRequire = createRequire(__filename);
  customThemes = dynamicRequire(customThemePath);
}

/** @type {import('tailwindcss').Config} */
module.exports = customThemes ? merge(baseThemes, customThemes) : baseThemes;


================================================
FILE: web/tests/README.md
================================================
# React Integration Testing Guide

Comprehensive guide for writing integration tests in the Onyx web application using Jest and React Testing Library.

## Table of Contents

- [Running Tests](#running-tests)
- [Core Concepts](#core-concepts)
- [Writing Tests](#writing-tests)
- [Query Selectors](#query-selectors)
- [User Interactions](#user-interactions)
- [Async Operations](#async-operations)
- [Mocking](#mocking)
- [Common Patterns](#common-patterns)
- [Testing Philosophy](#testing-philosophy)
- [Troubleshooting](#troubleshooting)

## Running Tests

```bash
# Run all tests
npm test

# Run specific test file
npm test -- EmailPasswordForm.test

# Run tests matching pattern
npm test -- --testPathPattern="auth"

# Run without coverage
npm test -- --no-coverage

# Run in watch mode
npm test -- --watch

# Run with verbose output
npm test -- --verbose
```

## Core Concepts

### Test Structure

Tests are **co-located** with source files for easy discovery and maintenance:

```
src/app/auth/login/
├── EmailPasswordForm.tsx
└── EmailPasswordForm.test.tsx
```

### Test Anatomy

Every test follows this structure:

```typescript
import { render, screen, setupUser, waitFor } from "@tests/setup/test-utils";
import MyComponent from "./MyComponent";

test("descriptive test name explaining user behavior", async () => {
  // 1. Setup - Create user, mock APIs
  const user = setupUser();
  const fetchSpy = jest.spyOn(global, "fetch");

  fetchSpy.mockResolvedValueOnce({
    ok: true,
    json: async () => ({ data: "value" }),
  } as Response);

  // 2. Render - Display the component
  render(<MyComponent />);

  // 3. Act - Simulate user interactions
  await user.type(screen.getByRole("textbox"), "test input");
  await user.click(screen.getByRole("button", { name: /submit/i }));

  // 4. Assert - Verify expected outcomes
  await waitFor(() => {
    expect(screen.getByText(/success/i)).toBeInTheDocument();
  });

  // 5. Cleanup - Restore mocks
  fetchSpy.mockRestore();
});
```

### setupUser() - Automatic act() Wrapping

**ALWAYS use `setupUser()` instead of `userEvent.setup()`**

```typescript
// ✅ Correct - Automatic act() wrapping
const user = setupUser();
await user.click(button);
await user.type(input, "text");

// ❌ Wrong - Manual act() required, verbose
const user = userEvent.setup();
await act(async () => {
  await user.click(button);
});
```

The `setupUser()` helper automatically wraps all user interactions in React's `act()` to prevent warnings and ensure proper state updates.

## Writing Tests

### Query Selectors

Use queries in this priority order (most accessible first):

#### 1. Role Queries (Preferred)

```typescript
// Buttons
screen.getByRole("button", { name: /submit/i });
screen.getByRole("button", { name: /cancel/i });

// Text inputs
screen.getByRole("textbox", { name: /email/i });

// Checkboxes
screen.getByRole("checkbox", { name: /remember me/i });

// Links
screen.getByRole("link", { name: /learn more/i });

// Headings
screen.getByRole("heading", { name: /welcome/i });
```

#### 2. Label Queries

```typescript
// For form inputs with labels
screen.getByLabelText(/password/i);
screen.getByLabelText(/email address/i);
```

#### 3. Placeholder Queries

```typescript
// When no label exists
screen.getByPlaceholderText(/enter email/i);
```

#### 4. Text Queries

```typescript
// For non-interactive text
screen.getByText(/welcome back/i);
screen.getByText(/error occurred/i);
```

#### Query Variants

```typescript
// getBy - Throws error if not found (immediate)
screen.getByRole("button");

// queryBy - Returns null if not found (checking absence)
expect(screen.queryByText(/error/i)).not.toBeInTheDocument();

// findBy - Returns promise, waits for element (async)
expect(await screen.findByText(/success/i)).toBeInTheDocument();

// getAllBy - Returns array of all matches
const inputs = screen.getAllByRole("textbox");
```

### Query Selectors: The Wrong Way

**❌ Avoid these anti-patterns:**

```typescript
// DON'T query by test IDs
screen.getByTestId("submit-button");

// DON'T query by class names
container.querySelector(".submit-btn");

// DON'T query by element types
container.querySelector("button");
```

## User Interactions

### Basic Interactions

```typescript
const user = setupUser();

// Click
await user.click(screen.getByRole("button", { name: /submit/i }));

// Type text
await user.type(screen.getByRole("textbox"), "test input");

// Clear and type
await user.clear(input);
await user.type(input, "new value");

// Check/uncheck checkbox
await user.click(screen.getByRole("checkbox"));

// Select from dropdown
await user.selectOptions(screen.getByRole("combobox"), "option-value");

// Upload file
const file = new File(["content"], "test.txt", { type: "text/plain" });
const input = screen.getByLabelText(/upload/i);
await user.upload(input, file);
```

### Form Interactions

```typescript
test("user can fill and submit form", async () => {
  const user = setupUser();

  render(<ContactForm />);

  await user.type(screen.getByLabelText(/name/i), "John Doe");
  await user.type(screen.getByLabelText(/email/i), "john@example.com");
  await user.type(screen.getByLabelText(/message/i), "Hello!");
  await user.click(screen.getByRole("button", { name: /send/i }));

  await waitFor(() => {
    expect(screen.getByText(/message sent/i)).toBeInTheDocument();
  });
});
```

## Async Operations

### Handling Async State Updates

**Rule**: After triggering state changes, always wait for UI updates before asserting.

#### Pattern 1: findBy Queries (Simplest)

```typescript
// Element appears after async operation
await user.click(createButton);
expect(await screen.findByRole("textbox")).toBeInTheDocument();
```

#### Pattern 2: waitFor (Complex Assertions)

```typescript
await user.click(submitButton);

await waitFor(() => {
  expect(screen.getByText("Success")).toBeInTheDocument();
  expect(screen.getByText("Count: 5")).toBeInTheDocument();
});
```

#### Pattern 3: waitForElementToBeRemoved

```typescript
await user.click(deleteButton);

await waitForElementToBeRemoved(() => screen.queryByText(/item name/i));
```

### Common Async Mistakes

```typescript
// ❌ Wrong - getBy immediately after state change
await user.click(button);
expect(screen.getByText("Updated")).toBeInTheDocument(); // May fail!

// ✅ Correct - Wait for state update
await user.click(button);
expect(await screen.findByText("Updated")).toBeInTheDocument();

// ❌ Wrong - Multiple getBy calls without waiting
await user.click(button);
expect(screen.getByText("Success")).toBeInTheDocument();
expect(screen.getByText("Data loaded")).toBeInTheDocument();

// ✅ Correct - Single waitFor with multiple assertions
await user.click(button);
await waitFor(() => {
  expect(screen.getByText("Success")).toBeInTheDocument();
  expect(screen.getByText("Data loaded")).toBeInTheDocument();
});
```

## Mocking

### Mocking fetch API

**IMPORTANT**: Always document which endpoint each mock corresponds to using comments.

```typescript
let fetchSpy: jest.SpyInstance;

beforeEach(() => {
  fetchSpy = jest.spyOn(global, "fetch");
});

afterEach(() => {
  fetchSpy.mockRestore();
});

test("fetches data successfully", async () => {
  // Mock GET /api/data
  fetchSpy.mockResolvedValueOnce({
    ok: true,
    json: async () => ({ data: [1, 2, 3] }),
  } as Response);

  render(<MyComponent />);

  await waitFor(() => {
    expect(fetchSpy).toHaveBeenCalledWith("/api/data");
  });
});
```

**Why comment the endpoint?** Sequential mocks can be confusing. Comments make it clear which API call each mock corresponds to, making tests easier to understand and maintain.

### Multiple API Calls

**Pattern**: Document each endpoint with a comment, then verify it was called correctly.

```typescript
test("handles multiple API calls", async () => {
  const user = setupUser();

  // Mock GET /api/items
  fetchSpy.mockResolvedValueOnce({
    ok: true,
    json: async () => ({ items: [] }),
  } as Response);

  // Mock POST /api/items
  fetchSpy.mockResolvedValueOnce({
    ok: true,
    json: async () => ({ id: 1, name: "New Item" }),
  } as Response);

  render(<MyComponent />);

  // Verify GET was called
  await waitFor(() => {
    expect(fetchSpy).toHaveBeenCalledWith("/api/items");
  });

  await user.click(screen.getByRole("button", { name: /create/i }));

  // Verify POST was called
  await waitFor(() => {
    expect(fetchSpy).toHaveBeenCalledWith(
      "/api/items",
      expect.objectContaining({ method: "POST" })
    );
  });
});
```

**Three API calls example:**

```typescript
test("test, create, and set as default", async () => {
  const user = setupUser();

  // Mock POST /api/llm/test
  fetchSpy.mockResolvedValueOnce({
    ok: true,
    json: async () => ({}),
  } as Response);

  // Mock PUT /api/llm/provider?is_creation=true
  fetchSpy.mockResolvedValueOnce({
    ok: true,
    json: async () => ({ id: 5, name: "New Provider" }),
  } as Response);

  // Mock POST /api/llm/provider/5/default
  fetchSpy.mockResolvedValueOnce({
    ok: true,
    json: async () => ({}),
  } as Response);

  render(<MyForm />);

  await user.type(screen.getByLabelText(/name/i), "New Provider");
  await user.click(screen.getByRole("button", { name: /create/i }));

  // Verify all three endpoints were called
  await waitFor(() => {
    expect(fetchSpy).toHaveBeenCalledWith(
      "/api/llm/test",
      expect.objectContaining({ method: "POST" })
    );
    expect(fetchSpy).toHaveBeenCalledWith(
      "/api/llm/provider",
      expect.objectContaining({ method: "PUT" })
    );
    expect(fetchSpy).toHaveBeenCalledWith(
      "/api/llm/provider/5/default",
      expect.objectContaining({ method: "POST" })
    );
  });
});
```

### Verifying Request Body

```typescript
test("sends correct data", async () => {
  const user = setupUser();

  fetchSpy.mockResolvedValueOnce({
    ok: true,
    json: async () => ({}),
  } as Response);

  render(<MyForm />);

  await user.type(screen.getByLabelText(/name/i), "Test");
  await user.click(screen.getByRole("button", { name: /submit/i }));

  await waitFor(() => {
    expect(fetchSpy).toHaveBeenCalled();
  });

  const callArgs = fetchSpy.mock.calls[0];
  const requestBody = JSON.parse(callArgs[1].body);

  expect(requestBody).toEqual({
    name: "Test",
    active: true,
  });
});
```

### Mocking Errors

```typescript
test("displays error message on failure", async () => {
  // Mock GET /api/data (network error)
  fetchSpy.mockRejectedValueOnce(new Error("Network error"));

  render(<MyComponent />);

  await waitFor(() => {
    expect(screen.getByText(/failed to load/i)).toBeInTheDocument();
  });
});

test("handles API error response", async () => {
  // Mock POST /api/items (server error)
  fetchSpy.mockResolvedValueOnce({
    ok: false,
    status: 500,
  } as Response);

  render(<MyComponent />);

  await waitFor(() => {
    expect(screen.getByText(/something went wrong/i)).toBeInTheDocument();
  });
});
```

### Mocking Next.js Router

```typescript
// At top of test file
jest.mock("next/navigation", () => ({
  useRouter: () => ({
    push: jest.fn(),
    back: jest.fn(),
    refresh: jest.fn(),
  }),
  usePathname: () => "/current-path",
}));
```

## Common Patterns

### Testing CRUD Operations

```typescript
describe("User Management", () => {
  test("creates new user", async () => {
    const user = setupUser();

    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({ id: 1, name: "New User" }),
    } as Response);

    render(<UserForm />);

    await user.type(screen.getByLabelText(/name/i), "New User");
    await user.click(screen.getByRole("button", { name: /create/i }));

    await waitFor(() => {
      expect(screen.getByText(/user created/i)).toBeInTheDocument();
    });
  });

  test("edits existing user", async () => {
    const user = setupUser();

    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({ id: 1, name: "Updated User" }),
    } as Response);

    render(<UserForm initialData={{ id: 1, name: "Old Name" }} />);

    await user.clear(screen.getByLabelText(/name/i));
    await user.type(screen.getByLabelText(/name/i), "Updated User");
    await user.click(screen.getByRole("button", { name: /save/i }));

    await waitFor(() => {
      expect(screen.getByText(/user updated/i)).toBeInTheDocument();
    });
  });

  test("deletes user", async () => {
    const user = setupUser();

    fetchSpy.mockResolvedValueOnce({
      ok: true,
      json: async () => ({}),
    } as Response);

    render(<UserList />);

    await waitFor(() => {
      expect(screen.getByText("John Doe")).toBeInTheDocument();
    });

    await user.click(screen.getByRole("button", { name: /delete/i }));

    await waitFor(() => {
      expect(screen.queryByText("John Doe")).not.toBeInTheDocument();
    });
  });
});
```

### Testing Conditional Rendering

```typescript
test("shows edit form when edit button clicked", async () => {
  const user = setupUser();

  render(<MyComponent />);

  expect(screen.queryByRole("textbox")).not.toBeInTheDocument();

  await user.click(screen.getByRole("button", { name: /edit/i }));

  expect(await screen.findByRole("textbox")).toBeInTheDocument();
});

test("toggles between states", async () => {
  const user = setupUser();

  render(<Toggle />);

  const button = screen.getByRole("button", { name: /show details/i });

  await user.click(button);
  expect(await screen.findByText(/details content/i)).toBeInTheDocument();

  await user.click(button);
  expect(screen.queryByText(/details content/i)).not.toBeInTheDocument();
});
```

### Testing Lists and Tables

```typescript
test("displays list of items", async () => {
  fetchSpy.mockResolvedValueOnce({
    ok: true,
    json: async () => ({
      items: [
        { id: 1, name: "Item 1" },
        { id: 2, name: "Item 2" },
        { id: 3, name: "Item 3" },
      ],
    }),
  } as Response);

  render(<ItemList />);

  await waitFor(() => {
    expect(screen.getByText("Item 1")).toBeInTheDocument();
    expect(screen.getByText("Item 2")).toBeInTheDocument();
    expect(screen.getByText("Item 3")).toBeInTheDocument();
  });
});

test("filters items", async () => {
  const user = setupUser();

  render(<FilterableList items={mockItems} />);

  await user.type(screen.getByRole("searchbox"), "specific");

  await waitFor(() => {
    expect(screen.getByText("Specific Item")).toBeInTheDocument();
    expect(screen.queryByText("Other Item")).not.toBeInTheDocument();
  });
});
```

### Testing Validation

```typescript
test("shows validation errors", async () => {
  const user = setupUser();

  render(<LoginForm />);

  await user.click(screen.getByRole("button", { name: /submit/i }));

  await waitFor(() => {
    expect(screen.getByText(/email is required/i)).toBeInTheDocument();
    expect(screen.getByText(/password is required/i)).toBeInTheDocument();
  });
});

test("clears validation on valid input", async () => {
  const user = setupUser();

  render(<LoginForm />);

  await user.click(screen.getByRole("button", { name: /submit/i }));

  await waitFor(() => {
    expect(screen.getByText(/email is required/i)).toBeInTheDocument();
  });

  await user.type(screen.getByLabelText(/email/i), "valid@email.com");

  await waitFor(() => {
    expect(screen.queryByText(/email is required/i)).not.toBeInTheDocument();
  });
});
```

## Testing Philosophy

### What to Test

**✅ Test user-visible behavior:**

- Forms can be filled and submitted
- Buttons trigger expected actions
- Success/error messages appear
- Navigation works correctly
- Data is displayed after loading
- Validation errors show and clear appropriately

**✅ Test integration points:**

- API calls are made with correct parameters
- Responses are handled properly
- Error states are handled
- Loading states appear

**❌ Don't test implementation details:**

- Internal state values
- Component lifecycle methods
- CSS class names
- Specific React hooks being used

### Test Naming

Write test names that describe user behavior:

```typescript
// ✅ Good - Describes what user can do
test("user can create new prompt", async () => {});
test("shows error when API call fails", async () => {});
test("filters items by search term", async () => {});

// ❌ Bad - Implementation-focused
test("handleSubmit is called", async () => {});
test("state updates correctly", async () => {});
test("renders without crashing", async () => {});
```

### Minimal Mocking

Only mock external dependencies:

```typescript
// ✅ Mock external APIs
jest.spyOn(global, "fetch");

// ✅ Mock Next.js router
jest.mock("next/navigation");

// ✅ Mock problematic packages
// (configured in tests/setup/__mocks__)

// ❌ Don't mock application code
// ❌ Don't mock component internals
// ❌ Don't mock utility functions
```

## Troubleshooting

### "Not wrapped in act()" Warning

**Solution**: Always use `setupUser()` instead of `userEvent.setup()`

```typescript
// ✅ Correct
const user = setupUser();

// ❌ Wrong
const user = userEvent.setup();
```

### "Unable to find element" Error

**Solution**: Element hasn't appeared yet, use `findBy` or `waitFor`

```typescript
// ❌ Wrong - getBy doesn't wait
await user.click(button);
expect(screen.getByText("Success")).toBeInTheDocument();

// ✅ Correct - findBy waits
await user.click(button);
expect(await screen.findByText("Success")).toBeInTheDocument();
```

### "Multiple elements found" Error

**Solution**: Be more specific with your query

```typescript
// ❌ Too broad
screen.getByRole("button");

// ✅ Specific
screen.getByRole("button", { name: /submit/i });
```

### Test Times Out

**Causes**:

1. Async operation never completes
2. Waiting for element that never appears
3. Missing mock for API call

**Solutions**:

```typescript
// Check fetch is mocked
expect(fetchSpy).toHaveBeenCalled()

// Use queryBy to check if element exists
expect(screen.queryByText("Text")).toBeInTheDocument()

// Verify mock is set up before render
fetchSpy.mockResolvedValueOnce(...)
render(<Component />)
```

## Examples

See comprehensive test examples:

- `src/app/auth/login/EmailPasswordForm.test.tsx` - Login/signup workflows, validation
- `src/app/chat/input-prompts/InputPrompts.test.tsx` - CRUD operations, conditional rendering
- `src/app/admin/configuration/llm/CustomLLMProviderUpdateForm.test.tsx` - Complex forms, multi-step workflows

## Built-in Mocks

Only essential mocks in `tests/setup/__mocks__/`:

- `UserProvider` - Removes auth requirement for tests
- `react-markdown` / `remark-gfm` - ESM compatibility

See `tests/setup/__mocks__/README.md` for details.


================================================
FILE: web/tests/e2e/admin/admin_auth.setup.ts
================================================
// dependency for all admin user tests
import { test as setup } from "@playwright/test";

setup("authenticate as admin", async ({ browser }) => {
  const context = await browser.newContext({ storageState: "admin_auth.json" });
  const page = await context.newPage();
  await page.goto("/app");
  await page.waitForURL("/app");
});


================================================
FILE: web/tests/e2e/admin/admin_oauth_redirect_uri.spec.ts
================================================
import { test, expect } from "@playwright/test";

test.use({ storageState: "admin_auth.json" });

test("Admin - OAuth Redirect - Missing Code", async ({ page }) => {
  await page.goto("/admin/connectors/slack/oauth/callback?state=xyz");

  await expect(page.locator("p.text-text-500")).toHaveText(
    "Missing authorization code."
  );
});

test("Admin - OAuth Redirect - Missing State", async ({ page }) => {
  await page.goto("/admin/connectors/slack/oauth/callback?code=123");

  await expect(page.locator("p.text-text-500")).toHaveText(
    "Missing state parameter."
  );
});

test("Admin - OAuth Redirect - Invalid Connector", async ({ page }) => {
  await page.goto(
    "/admin/connectors/invalid-connector/oauth/callback?code=123&state=xyz"
  );

  await expect(page.locator("p.text-text-500")).toHaveText(
    "invalid_connector is not a valid source type."
  );
});


================================================
FILE: web/tests/e2e/admin/admin_pages.spec.ts
================================================
import { test, expect } from "@playwright/test";
import type { Page } from "@playwright/test";
import { THEMES, setThemeBeforeNavigation } from "@tests/e2e/utils/theme";
import { expectScreenshot } from "@tests/e2e/utils/visualRegression";

test.use({ storageState: "admin_auth.json" });
test.describe.configure({ mode: "parallel" });

/**
 * Discover all navigable admin pages by collecting links from the sidebar.
 * The sidebar is rendered on every `/admin/*` page, so we visit one admin
 * route and scrape the `<a>` elements that are present for the current
 * user / feature-flag configuration.
 */
async function discoverAdminPages(page: Page): Promise<string[]> {
  await page.goto("/admin/configuration/llm");
  await page.waitForLoadState("networkidle");

  return page.evaluate(() => {
    const sidebar = document.querySelector('[class*="group/SidebarWrapper"]');
    if (!sidebar) return [];

    const hrefs = new Set<string>();
    sidebar
      .querySelectorAll<HTMLAnchorElement>('a[href^="/admin/"]')
      .forEach((a) => hrefs.add(a.getAttribute("href")!));
    return Array.from(hrefs);
  });
}

for (const theme of THEMES) {
  test(`Admin pages – ${theme} mode`, async ({ page }) => {
    await setThemeBeforeNavigation(page, theme);

    const adminHrefs = await discoverAdminPages(page);
    expect(
      adminHrefs.length,
      "Expected to discover at least one admin page from the sidebar"
    ).toBeGreaterThan(0);

    for (const href of adminHrefs) {
      const slug = href.replace(/^\/admin\//, "").replace(/\//g, "--");

      await test.step(
        slug,
        async () => {
          await page.goto(href);

          try {
            await expect(
              page.locator('[aria-label="admin-page-title"]')
            ).toBeVisible({ timeout: 10000 });
          } catch (error) {
            console.error(`Failed to find admin-page-title for "${href}"`);
            throw error;
          }

          await page.waitForLoadState("networkidle");

          await expectScreenshot(page, {
            name: `admin-${theme}-${slug}`,
            mask: [
              '[data-testid="admin-date-range-selector-button"]',
              '[data-column-id="updated_at"]',
            ],
          });
        },
        { box: true }
      );
    }
  });
}


================================================
FILE: web/tests/e2e/admin/code-interpreter/code_interpreter.spec.ts
================================================
import { test, expect } from "@playwright/test";
import type { Page } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";

const CODE_INTERPRETER_URL = "/admin/configuration/code-interpreter";
const API_STATUS_URL = "**/api/admin/code-interpreter";
const API_HEALTH_URL = "**/api/admin/code-interpreter/health";

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

/**
 * Intercept the status (GET /) and health (GET /health) endpoints with the
 * given values so the page renders deterministically.
 *
 * Also handles PUT requests — by default they succeed (200). Pass
 * `putStatus` to simulate failures.
 */
async function mockCodeInterpreterApi(
  page: Page,
  opts: { enabled: boolean; healthy: boolean; putStatus?: number }
) {
  const putStatus = opts.putStatus ?? 200;

  await page.route(API_HEALTH_URL, async (route) => {
    await route.fulfill({
      status: 200,
      contentType: "application/json",
      body: JSON.stringify({ healthy: opts.healthy }),
    });
  });

  await page.route(API_STATUS_URL, async (route) => {
    if (route.request().method() === "PUT") {
      await route.fulfill({
        status: putStatus,
        contentType: "application/json",
        body:
          putStatus >= 400
            ? JSON.stringify({ detail: "Server Error" })
            : JSON.stringify(null),
      });
    } else {
      await route.fulfill({
        status: 200,
        contentType: "application/json",
        body: JSON.stringify({ enabled: opts.enabled }),
      });
    }
  });
}

/**
 * The disconnect icon button is an icon-only opal Button whose tooltip text
 * is not exposed as an accessible name. Locate it by finding the first
 * icon-only button (no label span) inside the card area.
 */
function getDisconnectIconButton(page: Page) {
  return page
    .locator("button:has(.interactive-foreground-icon):not(:has(span))")
    .first();
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

test.describe("Code Interpreter Admin Page", () => {
  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");
  });

  test("page loads with header and description", async ({ page }) => {
    await mockCodeInterpreterApi(page, { enabled: true, healthy: true });
    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.locator('[aria-label="admin-page-title"]')).toHaveText(
      /^Code Interpreter/,
      { timeout: 10000 }
    );

    await expect(page.getByText("Built-in Python runtime")).toBeVisible();
  });

  test("shows Connected status when enabled and healthy", async ({ page }) => {
    await mockCodeInterpreterApi(page, { enabled: true, healthy: true });
    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.getByText("Connected")).toBeVisible({ timeout: 10000 });
  });

  test("shows Connection Lost when enabled but unhealthy", async ({ page }) => {
    await mockCodeInterpreterApi(page, { enabled: true, healthy: false });
    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.getByText("Connection Lost")).toBeVisible({
      timeout: 10000,
    });
  });

  test("shows Reconnect button when disabled", async ({ page }) => {
    await mockCodeInterpreterApi(page, { enabled: false, healthy: false });
    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.getByRole("button", { name: "Reconnect" })).toBeVisible({
      timeout: 10000,
    });
    await expect(page.getByText("(Disconnected)")).toBeVisible();
  });

  test("disconnect flow opens modal and sends PUT request", async ({
    page,
  }) => {
    await mockCodeInterpreterApi(page, { enabled: true, healthy: true });
    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.getByText("Connected")).toBeVisible({ timeout: 10000 });

    // Click the disconnect icon button
    await getDisconnectIconButton(page).click();

    // Modal should appear
    await expect(page.getByText("Disconnect Code Interpreter")).toBeVisible();
    await expect(
      page.getByText("All running sessions connected to")
    ).toBeVisible();

    // Click the danger Disconnect button in the modal
    const modal = page.getByRole("dialog");
    await modal.getByRole("button", { name: "Disconnect" }).click();

    // Modal should close after successful disconnect
    await expect(page.getByText("Disconnect Code Interpreter")).not.toBeVisible(
      { timeout: 5000 }
    );
  });

  test("disconnect modal can be closed without disconnecting", async ({
    page,
  }) => {
    await mockCodeInterpreterApi(page, { enabled: true, healthy: true });
    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.getByText("Connected")).toBeVisible({ timeout: 10000 });

    // Open modal
    await getDisconnectIconButton(page).click();
    await expect(page.getByText("Disconnect Code Interpreter")).toBeVisible();

    // Close modal via Cancel button
    const modal = page.getByRole("dialog");
    await modal.getByRole("button", { name: "Cancel" }).click();

    // Modal should be gone, page still shows Connected
    await expect(
      page.getByText("Disconnect Code Interpreter")
    ).not.toBeVisible();
    await expect(page.getByText("Connected")).toBeVisible();
  });

  test("reconnect flow sends PUT with enabled=true", async ({ page }) => {
    await mockCodeInterpreterApi(page, { enabled: false, healthy: false });
    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.getByRole("button", { name: "Reconnect" })).toBeVisible({
      timeout: 10000,
    });

    // Intercept the PUT and verify the payload
    const putPromise = page.waitForRequest(
      (req) =>
        req.url().includes("/api/admin/code-interpreter") &&
        req.method() === "PUT"
    );

    await page.getByRole("button", { name: "Reconnect" }).click();

    const putReq = await putPromise;
    expect(putReq.postDataJSON()).toEqual({ enabled: true });
  });

  test("shows Checking... while reconnect is in progress", async ({ page }) => {
    // Use a single route handler that delays PUT responses
    await page.route(API_HEALTH_URL, async (route) => {
      await route.fulfill({
        status: 200,
        contentType: "application/json",
        body: JSON.stringify({ healthy: false }),
      });
    });

    await page.route(API_STATUS_URL, async (route) => {
      if (route.request().method() === "PUT") {
        await new Promise((resolve) => setTimeout(resolve, 2000));
        await route.fulfill({
          status: 200,
          contentType: "application/json",
          body: JSON.stringify(null),
        });
      } else {
        await route.fulfill({
          status: 200,
          contentType: "application/json",
          body: JSON.stringify({ enabled: false }),
        });
      }
    });

    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.getByRole("button", { name: "Reconnect" })).toBeVisible({
      timeout: 10000,
    });

    await page.getByRole("button", { name: "Reconnect" }).click();

    // Should show Checking... while the request is in flight
    await expect(page.getByText("Checking...")).toBeVisible({ timeout: 3000 });
  });

  test("shows error toast when disconnect fails", async ({ page }) => {
    await mockCodeInterpreterApi(page, {
      enabled: true,
      healthy: true,
      putStatus: 500,
    });
    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.getByText("Connected")).toBeVisible({ timeout: 10000 });

    // Open modal and click disconnect
    await getDisconnectIconButton(page).click();
    const modal = page.getByRole("dialog");
    await modal.getByRole("button", { name: "Disconnect" }).click();

    // Error toast should appear
    await expect(
      page.getByText("Failed to disconnect Code Interpreter")
    ).toBeVisible({ timeout: 5000 });
  });

  test("shows error toast when reconnect fails", async ({ page }) => {
    await mockCodeInterpreterApi(page, {
      enabled: false,
      healthy: false,
      putStatus: 500,
    });
    await page.goto(CODE_INTERPRETER_URL);

    await expect(page.getByRole("button", { name: "Reconnect" })).toBeVisible({
      timeout: 10000,
    });

    await page.getByRole("button", { name: "Reconnect" }).click();

    // Error toast should appear
    await expect(
      page.getByText("Failed to reconnect Code Interpreter")
    ).toBeVisible({ timeout: 5000 });

    // Reconnect button should reappear (not stuck in Checking...)
    await expect(page.getByRole("button", { name: "Reconnect" })).toBeVisible({
      timeout: 5000,
    });
  });
});


================================================
FILE: web/tests/e2e/admin/default-agent.spec.ts
================================================
import { test, expect } from "@playwright/test";
import type { Page, Locator } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import {
  TOOL_IDS,
  waitForUnifiedGreeting,
  openActionManagement,
} from "@tests/e2e/utils/tools";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

/**
 * Locate the Switch toggle for a built-in tool by its display name.
 * Each tool sits inside its own `<label>` wrapper created by InputLayouts.Horizontal.
 */
function getToolSwitch(page: Page, toolName: string): Locator {
  return page
    .locator("label")
    .filter({ has: page.getByText(toolName, { exact: true }) })
    .locator('button[role="switch"]')
    .first();
}

/**
 * Click a button and wait for the PATCH response to complete.
 * Uses waitForResponse set up *before* the click to avoid race conditions.
 */
async function clickAndWaitForPatch(
  page: Page,
  buttonLocator: Locator
): Promise<void> {
  const patchPromise = page.waitForResponse(
    (r) =>
      r.url().includes("/api/admin/default-assistant") &&
      r.request().method() === "PATCH",
    { timeout: 8000 }
  );
  await buttonLocator.click();
  await patchPromise;
}

test.describe("Chat Preferences Admin Page", () => {
  let testCcPairId: number | null = null;
  let webSearchProviderId: number | null = null;
  let imageGenConfigId: string | null = null;

  test.beforeEach(async ({ page }) => {
    // Log in as admin
    await page.context().clearCookies();
    await loginAs(page, "admin");

    const apiClient = new OnyxApiClient(page.request);

    // Create a connector so Internal Search tool becomes available
    testCcPairId = await apiClient.createFileConnector(
      `Test Connector ${Date.now()}`
    );

    // Create providers for Web Search and Image Generation tools
    try {
      webSearchProviderId = await apiClient.createWebSearchProvider(
        "exa",
        `Test Web Search Provider ${Date.now()}`
      );
      imageGenConfigId = await apiClient.createImageGenerationConfig(
        `test-image-gen-${Date.now()}`
      );
    } catch (error) {
      console.warn(`Failed to create tool providers: ${error}`);
    }

    // Navigate to chat preferences
    await page.goto("/admin/configuration/chat-preferences");
    await page.waitForURL("**/admin/configuration/chat-preferences**");

    // Attach basic API logging for this spec
    page.on("response", async (resp) => {
      const url = resp.url();
      if (
        url.includes("/api/admin/default-assistant") ||
        url.includes("/api/admin/settings")
      ) {
        const method = resp.request().method();
        const status = resp.status();
        let body = "";
        try {
          body = await resp.text();
        } catch {}
        console.log(
          `[api:response] ${method} ${url} => ${status} body=${body?.slice(
            0,
            300
          )}`
        );
      }
    });

    // Proactively log tool availability and current config
    try {
      const baseURL = process.env.BASE_URL || "http://localhost:3000";
      const toolsResp = await page.request.get(`${baseURL}/api/tool`);
      const cfgResp = await page.request.get(
        `${baseURL}/api/admin/default-assistant/configuration`
      );
      console.log(
        `[/api/tool] status=${toolsResp.status()} body=${(
          await toolsResp.text()
        ).slice(0, 400)}`
      );
      console.log(
        `[/configuration] status=${cfgResp.status()} body=${(
          await cfgResp.text()
        ).slice(0, 400)}`
      );
    } catch (e) {
      console.log(`[setup] Failed to fetch initial admin config: ${String(e)}`);
    }
  });

  test.afterEach(async ({ page }) => {
    const apiClient = new OnyxApiClient(page.request);

    // Clean up the test connector
    if (testCcPairId !== null) {
      try {
        await apiClient.deleteCCPair(testCcPairId);
        testCcPairId = null;
      } catch (error) {
        console.warn(
          `Failed to delete test connector ${testCcPairId}: ${error}`
        );
      }
    }

    // Clean up web search provider
    if (webSearchProviderId !== null) {
      try {
        await apiClient.deleteWebSearchProvider(webSearchProviderId);
        webSearchProviderId = null;
      } catch (error) {
        console.warn(
          `Failed to delete web search provider ${webSearchProviderId}: ${error}`
        );
      }
    }

    // Clean up image gen config
    if (imageGenConfigId !== null) {
      try {
        await apiClient.deleteImageGenerationConfig(imageGenConfigId);
        imageGenConfigId = null;
      } catch (error) {
        console.warn(
          `Failed to delete image gen config ${imageGenConfigId}: ${error}`
        );
      }
    }
  });

  test("should load chat preferences page for admin users", async ({
    page,
  }) => {
    // Verify page loads with expected content
    await expect(page.locator('[aria-label="admin-page-title"]')).toHaveText(
      /^Chat Preferences/
    );
    await expect(page.getByText("Actions & Tools")).toBeVisible();
  });

  test("should toggle Internal Search tool on and off", async ({ page }) => {
    await page.waitForSelector("text=Internal Search", { timeout: 10000 });

    const searchSwitch = getToolSwitch(page, "Internal Search");

    // Get initial state
    const initialState = await searchSwitch.getAttribute("aria-checked");
    console.log(
      `[toggle] Internal Search initial aria-checked=${initialState}`
    );

    // Set up response listener before the click to avoid race conditions
    const patchRespPromise = page.waitForResponse(
      (r) =>
        r.url().includes("/api/admin/default-assistant") &&
        r.request().method() === "PATCH",
      { timeout: 8000 }
    );

    // Toggle it — auto-saves immediately
    await searchSwitch.click();

    // Wait for PATCH to complete
    const patchResp = await patchRespPromise;
    console.log(
      `[toggle] Internal Search PATCH status=${patchResp.status()} body=${(
        await patchResp.text()
      ).slice(0, 300)}`
    );

    // Wait for success toast
    await expect(page.getByText("Tools updated").first()).toBeVisible({
      timeout: 5000,
    });

    // Refresh page to verify persistence
    await page.reload();
    await page.waitForSelector("text=Internal Search", { timeout: 10000 });

    // Wait for SWR data to load and React to re-render with the persisted state
    const expectedState = initialState === "true" ? "false" : "true";
    await expect(searchSwitch).toHaveAttribute("aria-checked", expectedState, {
      timeout: 10000,
    });
    console.log(
      `[toggle] Internal Search after reload aria-checked=${expectedState}`
    );

    // Toggle back to original state
    await clickAndWaitForPatch(page, searchSwitch);
  });

  test("should toggle Web Search tool on and off", async ({ page }) => {
    await page.waitForSelector("text=Web Search", { timeout: 10000 });

    const webSearchSwitch = getToolSwitch(page, "Web Search");

    // Get initial state
    const initialState = await webSearchSwitch.getAttribute("aria-checked");
    console.log(`[toggle] Web Search initial aria-checked=${initialState}`);

    // Set up response listener before the click to avoid race conditions
    const patchRespPromise = page.waitForResponse(
      (r) =>
        r.url().includes("/api/admin/default-assistant") &&
        r.request().method() === "PATCH",
      { timeout: 8000 }
    );

    // Toggle it
    await webSearchSwitch.click();

    // Wait for PATCH to complete
    const patchResp = await patchRespPromise;
    console.log(
      `[toggle] Web Search PATCH status=${patchResp.status()} body=${(
        await patchResp.text()
      ).slice(0, 300)}`
    );

    // Wait for success toast
    await expect(page.getByText("Tools updated").first()).toBeVisible({
      timeout: 5000,
    });

    // Refresh page to verify persistence
    await page.reload();
    await page.waitForSelector("text=Web Search", { timeout: 10000 });

    // Wait for SWR data to load and React to re-render with the persisted state
    const expectedState = initialState === "true" ? "false" : "true";
    await expect(webSearchSwitch).toHaveAttribute(
      "aria-checked",
      expectedState,
      { timeout: 10000 }
    );
    console.log(
      `[toggle] Web Search after reload aria-checked=${expectedState}`
    );

    // Toggle back to original state
    await clickAndWaitForPatch(page, webSearchSwitch);
  });

  test("should toggle Image Generation tool on and off", async ({ page }) => {
    await page.waitForSelector("text=Image Generation", { timeout: 10000 });

    const imageGenSwitch = getToolSwitch(page, "Image Generation");

    // Get initial state
    const initialState = await imageGenSwitch.getAttribute("aria-checked");
    console.log(
      `[toggle] Image Generation initial aria-checked=${initialState}`
    );

    // Set up response listener before the click to avoid race conditions
    const patchRespPromise = page.waitForResponse(
      (r) =>
        r.url().includes("/api/admin/default-assistant") &&
        r.request().method() === "PATCH",
      { timeout: 8000 }
    );

    // Toggle it
    await imageGenSwitch.click();

    // Wait for PATCH to complete
    const patchResp = await patchRespPromise;
    console.log(
      `[toggle] Image Generation PATCH status=${patchResp.status()} body=${(
        await patchResp.text()
      ).slice(0, 300)}`
    );

    // Wait for success toast
    await expect(page.getByText("Tools updated").first()).toBeVisible({
      timeout: 5000,
    });

    // Refresh page to verify persistence
    await page.reload();
    await page.waitForSelector("text=Image Generation", { timeout: 10000 });

    // Wait for SWR data to load and React to re-render with the persisted state
    const expectedState = initialState === "true" ? "false" : "true";
    await expect(imageGenSwitch).toHaveAttribute(
      "aria-checked",
      expectedState,
      { timeout: 10000 }
    );
    console.log(
      `[toggle] Image Generation after reload aria-checked=${expectedState}`
    );

    // Toggle back to original state
    await clickAndWaitForPatch(page, imageGenSwitch);
  });

  test("should edit and save system prompt", async ({ page }) => {
    // Click "Modify Prompt" to open the system prompt modal
    await page.getByText("Modify Prompt").click();

    // Wait for modal to appear
    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 5000 });

    // Fill textarea with random suffix to ensure uniqueness
    const testPrompt = `This is a test system prompt for the E2E test. ${Math.floor(
      Math.random() * 1000000
    )}`;
    const textarea = modal.getByPlaceholder("Enter your system prompt...");
    await textarea.fill(testPrompt);

    // Click Save and wait for PATCH to complete
    await clickAndWaitForPatch(
      page,
      modal.getByRole("button", { name: "Save" })
    );

    // Modal should close after save
    await expect(modal).not.toBeVisible();

    // Refresh page to verify persistence
    await page.reload();
    await page.waitForLoadState("networkidle");

    // Reopen modal and verify
    await page.getByText("Modify Prompt").click();
    const modalAfter = page.getByRole("dialog");
    await expect(modalAfter).toBeVisible({ timeout: 5000 });
    await expect(
      modalAfter.getByPlaceholder("Enter your system prompt...")
    ).toHaveValue(testPrompt);

    // Close modal without saving to clean up
    await modalAfter.getByRole("button", { name: "Cancel" }).click();
  });

  test("should allow empty system prompt", async ({ page }) => {
    // Open system prompt modal
    await page.getByText("Modify Prompt").click();
    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 5000 });

    const textarea = modal.getByPlaceholder("Enter your system prompt...");

    // Get initial value to restore later
    const initialValue = await textarea.inputValue();

    // If already empty, add some text first
    if (initialValue === "") {
      await textarea.fill("Temporary text");
      await clickAndWaitForPatch(
        page,
        modal.getByRole("button", { name: "Save" })
      );
      // Reopen modal
      await page.getByText("Modify Prompt").click();
      await expect(modal).toBeVisible({ timeout: 5000 });
    }

    // Clear the textarea
    await textarea.fill("");

    // Save
    await clickAndWaitForPatch(
      page,
      modal.getByRole("button", { name: "Save" })
    );

    // Refresh page to verify persistence
    await page.reload();
    await page.waitForLoadState("networkidle");

    // Reopen modal and check
    await page.getByText("Modify Prompt").click();
    const modalAfter = page.getByRole("dialog");
    await expect(modalAfter).toBeVisible({ timeout: 5000 });

    // The modal pre-populates with default prompt when system_prompt is empty/null,
    // so we just verify the modal opens without error
    const textareaAfter = modalAfter.getByPlaceholder(
      "Enter your system prompt..."
    );
    await expect(textareaAfter).toBeVisible();

    // Restore original value if it wasn't already empty
    if (initialValue !== "") {
      await textareaAfter.fill(initialValue);
      await clickAndWaitForPatch(
        page,
        modalAfter.getByRole("button", { name: "Save" })
      );
    } else {
      await modalAfter.getByRole("button", { name: "Cancel" }).click();
    }
  });

  test("should handle very long system prompt gracefully", async ({ page }) => {
    // Open system prompt modal
    await page.getByText("Modify Prompt").click();
    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 5000 });

    const textarea = modal.getByPlaceholder("Enter your system prompt...");

    // Get initial value to restore later
    const initialValue = await textarea.inputValue();

    // Create a very long prompt (~4800 characters)
    const longPrompt = "This is a test. ".repeat(300);

    await textarea.fill(longPrompt);

    // Save
    await clickAndWaitForPatch(
      page,
      modal.getByRole("button", { name: "Save" })
    );

    // Verify persistence after reload
    await page.reload();
    await page.waitForLoadState("networkidle");

    await page.getByText("Modify Prompt").click();
    const modalAfter = page.getByRole("dialog");
    await expect(modalAfter).toBeVisible({ timeout: 5000 });
    await expect(
      modalAfter.getByPlaceholder("Enter your system prompt...")
    ).toHaveValue(longPrompt);

    // Restore original value
    if (initialValue !== longPrompt) {
      const restoreTextarea = modalAfter.getByPlaceholder(
        "Enter your system prompt..."
      );
      await restoreTextarea.fill(initialValue);
      await clickAndWaitForPatch(
        page,
        modalAfter.getByRole("button", { name: "Save" })
      );
    } else {
      await modalAfter.getByRole("button", { name: "Cancel" }).click();
    }
  });

  test("should reject invalid tool IDs via API", async ({ page }) => {
    // Use browser console to send invalid tool IDs
    // This simulates what would happen if someone tried to bypass the UI
    const response = await page.evaluate(async () => {
      const res = await fetch("/api/admin/default-assistant", {
        method: "PATCH",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          tool_ids: ["InvalidTool", "AnotherInvalidTool"],
        }),
      });
      return {
        ok: res.ok,
        status: res.status,
        body: await res.text(),
      };
    });
    // Also try via page.request (uses storageState) to capture status in case page fetch fails
    try {
      const baseURL = process.env.BASE_URL || "http://localhost:3000";
      const alt = await page.request.patch(
        `${baseURL}/api/admin/default-assistant`,
        {
          data: { tool_ids: ["InvalidTool", "AnotherInvalidTool"] },
          headers: { "Content-Type": "application/json" },
        }
      );
      console.log(
        `[invalid-tools] page.request.patch status=${alt.status()} body=${(
          await alt.text()
        ).slice(0, 300)}`
      );
    } catch (e) {
      console.log(`[invalid-tools] page.request.patch error: ${String(e)}`);
    }

    // Check that the request failed with 400 or 422 (validation error)
    expect(response.ok).toBe(false);
    expect([400, 422].includes(response.status)).toBe(true);
    // The error message should indicate invalid tool IDs
    if (response.status === 400) {
      expect(response.body).toContain("Invalid tool IDs");
    }
  });

  test("should toggle all tools and verify in chat", async ({ page }) => {
    // Providers are now created in beforeEach, so all tools should be available

    // Wait for ALL three tools to be visible in the UI
    await page.waitForSelector("text=Internal Search", { timeout: 10000 });
    await page.waitForSelector("text=Web Search", { timeout: 10000 });
    await page.waitForSelector("text=Image Generation", { timeout: 10000 });

    // Wait for form to fully initialize
    await page.waitForTimeout(2000);

    // Store initial states
    const toolStates: Record<string, string | null> = {};

    // Capture current states (we'll restore these at the end)
    for (const toolName of [
      "Internal Search",
      "Web Search",
      "Image Generation",
    ]) {
      const toolSwitch = getToolSwitch(page, toolName);
      const state = await toolSwitch.getAttribute("aria-checked");
      toolStates[toolName] = state;
      console.log(`[toggle-all] Initial state for ${toolName}: ${state}`);
    }

    // Disable all tools
    for (const toolName of [
      "Internal Search",
      "Web Search",
      "Image Generation",
    ]) {
      const toolSwitch = getToolSwitch(page, toolName);
      const currentState = await toolSwitch.getAttribute("aria-checked");
      if (currentState === "true") {
        await clickAndWaitForPatch(page, toolSwitch);
        const newState = await toolSwitch.getAttribute("aria-checked");
        console.log(`[toggle-all] Clicked ${toolName}, new state=${newState}`);
      }
    }

    // Navigate to app to verify tools are disabled and initial load greeting
    await page.goto("/app");
    await waitForUnifiedGreeting(page);

    // Go back and re-enable all tools
    await page.goto("/admin/configuration/chat-preferences");
    await page.waitForLoadState("networkidle");
    // Reload to ensure the page has the updated tools list (after providers were created)
    await page.reload();
    await page.waitForLoadState("networkidle");
    await page.waitForSelector("text=Internal Search", { timeout: 10000 });

    for (const toolName of [
      "Internal Search",
      "Web Search",
      "Image Generation",
    ]) {
      const toolSwitch = getToolSwitch(page, toolName);
      const currentState = await toolSwitch.getAttribute("aria-checked");
      if (currentState === "false") {
        await clickAndWaitForPatch(page, toolSwitch);
        const newState = await toolSwitch.getAttribute("aria-checked");
        console.log(`[toggle-all] Clicked ${toolName}, new state=${newState}`);
      }
    }

    // Navigate to app and verify the Action Management toggle and actions exist
    await page.goto("/app");
    await page.waitForLoadState("networkidle");

    // Wait a bit for backend to process the changes
    await page.waitForTimeout(2000);

    // Reload to ensure ChatContext has fresh tool data after providers were created
    await page.reload();
    await page.waitForLoadState("networkidle");

    // Debug: Check what tools are available via API
    try {
      const baseURL = process.env.BASE_URL || "http://localhost:3000";
      const toolsResp = await page.request.get(`${baseURL}/api/tool`);
      const toolsData = await toolsResp.json();
      console.log(
        `[toggle-all] Available tools from API: ${JSON.stringify(
          toolsData.map((t: any) => ({
            name: t.name,
            display_name: t.display_name,
            in_code_tool_id: t.in_code_tool_id,
          }))
        )}`
      );
    } catch (e) {
      console.warn(`[toggle-all] Failed to fetch tools: ${e}`);
    }

    // Debug: Check assistant configuration
    try {
      const baseURL = process.env.BASE_URL || "http://localhost:3000";
      const configResp = await page.request.get(
        `${baseURL}/api/admin/default-assistant/configuration`
      );
      const configData = await configResp.json();
      console.log(
        `[toggle-all] Default agent config: ${JSON.stringify(configData)}`
      );
    } catch (e) {
      console.warn(`[toggle-all] Failed to fetch config: ${e}`);
    }

    await waitForUnifiedGreeting(page);
    await expect(page.locator(TOOL_IDS.actionToggle)).toBeVisible();
    await openActionManagement(page);

    // Debug: Check what's actually in the popover
    const popover = page.locator(TOOL_IDS.options);
    const popoverText = await popover.textContent();
    console.log(`[toggle-all] Popover text: ${popoverText}`);

    // Verify at least Internal Search is visible (it should always be enabled)
    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible({
      timeout: 10000,
    });

    // Check if other tools are visible (they might not be if there's a form state issue)
    const webSearchVisible = await page
      .locator(TOOL_IDS.webSearchOption)
      .isVisible()
      .catch(() => false);
    const imageGenVisible = await page
      .locator(TOOL_IDS.imageGenerationOption)
      .isVisible()
      .catch(() => false);
    console.log(
      `[toggle-all] Tools visible in chat: Internal Search=true, Web Search=${webSearchVisible}, Image Gen=${imageGenVisible}`
    );

    // NOTE: Only Internal Search is verified as visible due to a known issue with
    // Web Search and Image Generation form state when providers are created in beforeEach.
    // This is being tracked separately as a potential Formik/form state bug.

    await page.goto("/admin/configuration/chat-preferences");

    // Restore original states
    let needsSave = false;
    for (const toolName of [
      "Internal Search",
      "Web Search",
      "Image Generation",
    ]) {
      const toolSwitch = getToolSwitch(page, toolName);
      const currentState = await toolSwitch.getAttribute("aria-checked");
      const originalState = toolStates[toolName];

      if (currentState !== originalState) {
        await clickAndWaitForPatch(page, toolSwitch);
        needsSave = true;
      }
    }
  });
});

test.describe("Chat Preferences Non-Admin Access", () => {
  test("should redirect non-authenticated users", async ({ page }) => {
    // Clear cookies to ensure we're not authenticated
    await page.context().clearCookies();

    // Try to navigate directly to chat preferences without logging in
    await page.goto("/admin/configuration/chat-preferences");

    // Wait for navigation to settle
    await page.waitForTimeout(2000);

    // Should be redirected away from admin page
    const url = page.url();
    expect(!url.includes("/admin/configuration/chat-preferences")).toBe(true);
  });
});


================================================
FILE: web/tests/e2e/admin/disable_default_agent.spec.ts
================================================
import { test, expect, Page } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { createAgent } from "@tests/e2e/utils/agentUtils";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

const MAX_SETTING_SAVE_ATTEMPTS = 5;
const SETTING_SAVE_RETRY_DELAY_MS = 750;

/**
 * Expand the "Advanced Options" collapsible section on the Chat Preferences page.
 * The section is closed by default (`defaultOpen={false}`).
 * Only expands if not already open (checks for the switch element visibility).
 */
async function expandAdvancedOptions(page: Page): Promise<void> {
  // Wait for the page title to be visible, signalling the form has loaded
  await expect(page.locator('[aria-label="admin-page-title"]')).toBeVisible({
    timeout: 10000,
  });

  // Check if the switch is already visible (section already expanded)
  const switchEl = page.locator("#disable_default_assistant");
  const alreadyVisible = await switchEl.isVisible().catch(() => false);
  if (alreadyVisible) return;

  const header = page.getByText("Advanced Options", { exact: true });
  await expect(header).toBeVisible({ timeout: 10000 });
  await header.scrollIntoViewIfNeeded();
  await header.click();

  // Wait for the collapsible content to expand and switch to appear
  await expect(switchEl).toBeVisible({ timeout: 5000 });
}

/**
 * Toggle the "Always Start with an Agent" setting (formerly "Disable Default Agent")
 * on the Chat Preferences page. Uses auto-save via the SwitchField.
 *
 * The switch is a SwitchField with name="disable_default_assistant" which renders
 * `<button role="switch" id="disable_default_assistant" aria-checked="...">`.
 */
async function setDisableDefaultAssistantSetting(
  page: Page,
  isDisabled: boolean
): Promise<void> {
  let lastCheckedState = false;

  for (let attempt = 0; attempt < MAX_SETTING_SAVE_ATTEMPTS; attempt += 1) {
    await page.goto("/admin/configuration/chat-preferences");
    await page.waitForLoadState("networkidle");

    // Expand "Advanced Options" collapsible (closed by default)
    await expandAdvancedOptions(page);

    const switchEl = page.locator("#disable_default_assistant");
    await expect(switchEl).toBeVisible({ timeout: 5000 });

    const currentState = await switchEl.getAttribute("aria-checked");
    lastCheckedState = currentState === "true";

    if (lastCheckedState === isDisabled) {
      return;
    }

    // Toggle the switch
    await switchEl.click();

    // Wait for auto-save toast
    await expect(page.getByText("Settings updated")).toBeVisible({
      timeout: 5000,
    });

    await page.waitForTimeout(SETTING_SAVE_RETRY_DELAY_MS);

    // Verify persistence after reload
    await page.reload();
    await page.waitForLoadState("networkidle");

    // Re-expand Advanced Options (closed by default after reload)
    await expandAdvancedOptions(page);

    const newState = await switchEl.getAttribute("aria-checked");
    lastCheckedState = newState === "true";

    if (lastCheckedState === isDisabled) {
      return;
    }
  }

  throw new Error(
    `Failed to persist Always Start with an Agent setting after ${MAX_SETTING_SAVE_ATTEMPTS} attempts (expected ${isDisabled}, last=${lastCheckedState}).`
  );
}

test.describe("Disable Default Agent Setting @exclusive", () => {
  let createdAssistantId: number | null = null;

  test.beforeEach(async ({ page }) => {
    // Log in as admin
    await page.context().clearCookies();
    await loginAs(page, "admin");
  });

  test.afterEach(async ({ page }) => {
    // Clean up any assistant created during the test
    if (createdAssistantId !== null) {
      const client = new OnyxApiClient(page.request);
      await client.deleteAgent(createdAssistantId);
      createdAssistantId = null;
    }

    // Ensure default agent is enabled (switch unchecked) after each test
    // to avoid interfering with other tests
    await setDisableDefaultAssistantSetting(page, false);
  });

  test("admin can enable and disable the setting in chat preferences", async ({
    page,
  }) => {
    await setDisableDefaultAssistantSetting(page, true);
    await setDisableDefaultAssistantSetting(page, false);
    await setDisableDefaultAssistantSetting(page, true);
  });

  test("new session button uses current agent when setting is enabled", async ({
    page,
  }) => {
    // First enable the setting
    await setDisableDefaultAssistantSetting(page, true);

    // Navigate to app and create a new assistant to ensure there's one besides the default
    await page.goto("/app");
    const agentName = `Test Assistant ${Date.now()}`;
    await createAgent(page, {
      name: agentName,
      description: "Test assistant for new session button test",
      instructions: "You are a helpful test assistant.",
    });

    // Extract the assistant ID from the URL
    const currentUrl = page.url();
    const agentIdMatch = currentUrl.match(/agentId=(\d+)/);
    expect(agentIdMatch).toBeTruthy();

    // Store for cleanup
    if (agentIdMatch) {
      createdAssistantId = Number(agentIdMatch[1]);
    }

    // Click the "New Session" button
    const newSessionButton = page.locator(
      '[data-testid="AppSidebar/new-session"]'
    );
    await newSessionButton.click();

    // Verify the WelcomeMessage shown is NOT from the default agent
    // Default agent shows onyx-logo, custom agents show agent-name-display
    await expect(page.locator('[data-testid="onyx-logo"]')).not.toBeVisible();
    await expect(
      page.locator('[data-testid="agent-name-display"]')
    ).toBeVisible();
  });

  test("direct navigation to /app uses first pinned assistant when setting is enabled", async ({
    page,
  }) => {
    // First enable the setting
    await setDisableDefaultAssistantSetting(page, true);

    // Navigate directly to /app
    await page.goto("/app");

    // Verify that we didn't land on the default agent (ID 0)
    // The assistant selection should be a pinned or available assistant (not ID 0)
    const currentUrl = page.url();
    // If agentId is in URL, it should not be 0
    if (currentUrl.includes("agentId=")) {
      expect(currentUrl).not.toContain("agentId=0");
    }
  });

  test("chat preferences shows disabled state when setting is enabled", async ({
    page,
  }) => {
    // First enable the setting
    await setDisableDefaultAssistantSetting(page, true);

    // Navigate to chat preferences configuration page
    await page.goto("/admin/configuration/chat-preferences");
    await page.waitForLoadState("networkidle");

    // Wait for the page to fully render (page title signals form is loaded)
    await expect(page.locator('[aria-label="admin-page-title"]')).toHaveText(
      /^Chat Preferences/,
      { timeout: 10000 }
    );

    // The new page wraps Connectors + Actions & Tools in <Disabled disabled={values.disable_default_assistant}>
    // When disabled, the section should have reduced opacity / disabled styling
    // The "Modify Prompt" button should still be accessible (it's outside the Disabled wrapper)
    // Use text locator (Opal Button wraps text in Interactive.Base > Slot which may
    // not expose role="button" to Playwright's getByRole)
    await expect(page.getByText("Modify Prompt")).toBeVisible({
      timeout: 5000,
    });

    // The "Actions & Tools" section text should still be present but visually disabled
    await expect(page.getByText("Actions & Tools")).toBeVisible();
  });

  test("chat preferences shows full configuration UI when setting is disabled", async ({
    page,
  }) => {
    // Ensure setting is disabled
    await setDisableDefaultAssistantSetting(page, false);

    // Navigate to chat preferences configuration page
    await page.goto("/admin/configuration/chat-preferences");
    await page.waitForLoadState("networkidle");

    // Verify configuration UI is shown (Actions & Tools section should be visible and enabled)
    await expect(page.getByText("Actions & Tools")).toBeVisible({
      timeout: 10000,
    });

    // Verify the page title
    await expect(page.locator('[aria-label="admin-page-title"]')).toHaveText(
      /^Chat Preferences/
    );
  });

  test("default agent is available again when setting is disabled", async ({
    page,
  }) => {
    // Navigate to settings and ensure setting is disabled
    await setDisableDefaultAssistantSetting(page, false);

    // Navigate directly to /app without parameters
    await page.goto("/app");

    // The default agent (ID 0) should be available
    // We can verify this by checking that the app loads successfully
    // and doesn't force navigation to a specific assistant
    expect(page.url()).toContain("/app");

    // Verify the new session button navigates to /app without agentId
    const newSessionButton = page.locator(
      '[data-testid="AppSidebar/new-session"]'
    );
    await newSessionButton.click();

    // Should navigate to /app without agentId parameter
    const newUrl = page.url();
    expect(newUrl).toContain("/app");
  });
});


================================================
FILE: web/tests/e2e/admin/discord-bot/admin-workflows.spec.ts
================================================
/**
 * E2E tests for Discord bot admin workflow flows.
 *
 * These tests verify complete user journeys that span multiple pages/components.
 * Individual component tests are in their respective spec files.
 */

import {
  test,
  expect,
  gotoDiscordBotPage,
  gotoGuildDetailPage,
} from "@tests/e2e/admin/discord-bot/fixtures";

// Disable retries for Discord bot tests - attempt once at most
test.describe.configure({ retries: 0 });

test.describe("Admin Workflow E2E Flows", () => {
  test("complete setup and configuration flow", async ({
    adminPage,
    mockRegisteredGuild,
    mockBotConfigured: _mockBotConfigured,
  }) => {
    // Start at list page
    await gotoDiscordBotPage(adminPage);

    // Verify list page loads
    await expect(
      adminPage
        .locator('[aria-label="admin-page-title"]')
        .getByText("Discord Integration")
    ).toBeVisible();
    await expect(
      adminPage.locator("text=Server Configurations").first()
    ).toBeVisible();

    // Navigate to guild detail page
    const guildButton = adminPage.locator(
      `button:has-text("${mockRegisteredGuild.name}")`
    );
    await expect(guildButton).toBeVisible({ timeout: 10000 });
    await guildButton.click();

    // Verify detail page loads
    await expect(adminPage).toHaveURL(
      new RegExp(`/admin/discord-bot/${mockRegisteredGuild.id}`)
    );
    await expect(
      adminPage.locator("text=Channel Configuration").first()
    ).toBeVisible();

    // Configure a channel: toggle enabled, show unsaved changes, save
    const channelRow = adminPage.locator("tbody tr").first();
    await expect(channelRow).toBeVisible();

    const enableToggle = channelRow.locator('[role="switch"]').first();
    if (await enableToggle.isVisible()) {
      const initialState = await enableToggle.getAttribute("aria-checked");
      await enableToggle.click();

      await expect(enableToggle).toHaveAttribute(
        "aria-checked",
        initialState === "true" ? "false" : "true"
      );
    }

    // Verify unsaved changes indicator
    await expect(
      adminPage.locator("text=You have unsaved changes")
    ).toBeVisible({ timeout: 5000 });

    // Save changes - wait for the bulk update API call
    // Update button is now in the header
    const updateButton = adminPage.locator(
      'button:has-text("Update Configuration")'
    );
    // Verify button is visible and enabled before clicking
    await expect(updateButton).toBeEnabled({ timeout: 5000 });

    const bulkUpdatePromise = adminPage.waitForResponse(
      (response) =>
        response
          .url()
          .includes(
            `/api/manage/admin/discord-bot/guilds/${mockRegisteredGuild.id}/channels`
          ) && response.request().method() === "PATCH"
    );

    await updateButton.click();
    await bulkUpdatePromise;

    // Verify success toast
    const successToast = adminPage.locator("text=/updated/i");
    await expect(successToast).toBeVisible({ timeout: 5000 });

    // Navigate back to list
    const backButton = adminPage.locator(
      'button:has-text("Back"), a:has-text("Back"), button[aria-label*="back" i]'
    );
    if (await backButton.isVisible({ timeout: 5000 }).catch(() => false)) {
      await backButton.click();
      await expect(adminPage).toHaveURL(/\/admin\/discord-bot$/);
    }
  });
});


================================================
FILE: web/tests/e2e/admin/discord-bot/bot-config.spec.ts
================================================
/**
 * E2E tests for Discord bot configuration page.
 *
 * Tests the bot token configuration card which allows admins to:
 * - Enter and save a Discord bot token
 * - View configuration status (Configured/Not Configured badge)
 * - Delete the bot token configuration
 */

import {
  test,
  expect,
  gotoDiscordBotPage,
} from "@tests/e2e/admin/discord-bot/fixtures";

// Disable retries for Discord bot tests - attempt once at most
test.describe.configure({ retries: 0 });

test.describe("Bot Configuration Page", () => {
  test("bot config page loads", async ({ adminPage }) => {
    await gotoDiscordBotPage(adminPage);

    // Page should load without errors
    await expect(adminPage).toHaveURL(/\/admin\/discord-bot/);
    // Page title should contain "Discord"
    await expect(
      adminPage
        .locator('[aria-label="admin-page-title"]')
        .getByText("Discord Integration")
    ).toBeVisible();
  });

  test("bot config shows token input when not configured", async ({
    adminPage,
  }) => {
    await gotoDiscordBotPage(adminPage);

    // When not configured, should show:
    // - "Not Configured" badge OR
    // - Token input field with "Save Token" button
    const notConfiguredBadge = adminPage.locator("text=Not Configured");
    const tokenInput = adminPage.locator('input[placeholder*="token" i]');
    const saveTokenButton = adminPage.locator('button:has-text("Save Token")');

    // Either not configured state with input, or already configured
    const configuredBadge = adminPage.locator("text=Configured").first();

    // Check that at least one of the states is visible
    // Check configured state first, then fall back to not configured state
    const isConfigured = await configuredBadge
      .isVisible({ timeout: 5000 })
      .catch(() => false);

    if (isConfigured) {
      // Bot is configured - verify configured badge is visible
      await expect(configuredBadge).toBeVisible();
    } else {
      // Bot is not configured - verify not configured badge and input are visible
      await expect(notConfiguredBadge).toBeVisible({ timeout: 10000 });
      await expect(tokenInput).toBeVisible();
      await expect(saveTokenButton).toBeVisible();
    }
  });

  test("bot config save token validation", async ({ adminPage }) => {
    await gotoDiscordBotPage(adminPage);

    const tokenInput = adminPage.locator('input[placeholder*="token" i]');
    const saveTokenButton = adminPage.locator('button:has-text("Save Token")');

    // Only run if token input is visible (not already configured)
    if (await tokenInput.isVisible({ timeout: 5000 }).catch(() => false)) {
      // Save button should be disabled when input is empty
      await expect(saveTokenButton).toBeDisabled();

      // Enter a token
      await tokenInput.fill("test_bot_token_12345");

      // Save button should now be enabled
      await expect(saveTokenButton).toBeEnabled();

      // Clear input
      await tokenInput.clear();

      // Button should be disabled again
      await expect(saveTokenButton).toBeDisabled();
    }
  });

  test("bot config shows configured state", async ({
    adminPage,
    mockBotConfigured,
  }) => {
    await gotoDiscordBotPage(adminPage);

    // With mockBotConfigured, should show configured state
    const configuredBadge = adminPage.locator("text=Configured").first();
    const deleteButton = adminPage.locator(
      'button:has-text("Delete Discord Token")'
    );

    // Should show configured badge
    await expect(configuredBadge).toBeVisible({ timeout: 10000 });

    // Should show delete button when configured
    await expect(deleteButton).toBeVisible();
  });

  test("bot config delete shows confirmation modal", async ({
    adminPage,
    mockBotConfigured,
  }) => {
    await gotoDiscordBotPage(adminPage);

    // Wait for configured state to be visible
    const configuredBadge = adminPage.locator("text=Configured").first();
    await expect(configuredBadge).toBeVisible({ timeout: 10000 });

    // Find and click delete button
    const deleteButton = adminPage.locator(
      'button:has-text("Delete Discord Token")'
    );
    await expect(deleteButton).toBeVisible();
    await deleteButton.click();

    // Confirmation modal should appear
    const modal = adminPage.locator('[role="dialog"]');
    await expect(modal).toBeVisible({ timeout: 10000 });

    // Modal should have cancel and confirm buttons
    const cancelButton = adminPage.locator('button:has-text("Cancel")');
    const confirmButton = adminPage.locator(
      'button:has-text("Delete"), button:has-text("Confirm")'
    );

    // At least one of these buttons should be visible
    await expect(cancelButton.or(confirmButton).first()).toBeVisible({
      timeout: 5000,
    });

    // Cancel to avoid actually deleting
    if (await cancelButton.isVisible({ timeout: 5000 }).catch(() => false)) {
      await cancelButton.click();
      await expect(modal).not.toBeVisible({ timeout: 5000 });
    }
  });
});


================================================
FILE: web/tests/e2e/admin/discord-bot/channel-config.spec.ts
================================================
/**
 * E2E tests for Discord guild detail page and channel configuration.
 *
 * Tests the guild detail page which includes:
 * - Guild enabled/disabled toggle
 * - Default Agent (persona) selector
 * - Channel Configuration section with:
 *   - List of channels with icons (text/forum)
 *   - Enabled toggle per channel
 *   - Require @mention toggle
 *   - Thread Only Mode toggle
 *   - Agent Override dropdown
 */

import {
  test,
  expect,
  gotoGuildDetailPage,
} from "@tests/e2e/admin/discord-bot/fixtures";

// Disable retries for Discord bot tests - attempt once at most
test.describe.configure({ retries: 0 });

test.describe("Guild Detail Page & Channel Configuration", () => {
  test("guild detail page loads", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    // Page should load with guild info
    await expect(adminPage).toHaveURL(
      new RegExp(`/admin/discord-bot/${mockRegisteredGuild.id}`)
    );

    // Should show the guild name in the header
    await expect(
      adminPage.locator(`text=${mockRegisteredGuild.name}`)
    ).toBeVisible();
  });

  test("guild default agent dropdown shows options", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    // Should show "Default Agent" section
    await expect(adminPage.locator("text=Default Agent").first()).toBeVisible({
      timeout: 10000,
    });

    // Find the persona/agent dropdown (InputSelect)
    const agentDropdown = adminPage.locator('button:has-text("Default Agent")');

    if (await agentDropdown.isVisible({ timeout: 5000 }).catch(() => false)) {
      await agentDropdown.click();

      // Dropdown should show available options
      const options = adminPage.locator('[role="option"]');
      await expect(options.first()).toBeVisible({ timeout: 5000 });
    }
  });
});

test.describe("Channel Configuration", () => {
  test("channels table displays with action buttons", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    // Channel list table should be visible
    const channelTable = adminPage.locator("table");
    await expect(channelTable).toBeVisible({ timeout: 10000 });

    // Should show our mock channels
    await expect(adminPage.locator("text=general")).toBeVisible();
    await expect(adminPage.locator("text=help-forum")).toBeVisible();
    await expect(adminPage.locator("text=private-support")).toBeVisible();

    // Should show action buttons
    await expect(
      adminPage.locator('button:has-text("Enable All")')
    ).toBeVisible();
    await expect(
      adminPage.locator('button:has-text("Disable All")')
    ).toBeVisible();
    // Update button is now in the header, not in the channel config section
    await expect(
      adminPage.locator('button:has-text("Update Configuration")')
    ).toBeVisible();
  });

  test("channels table has correct columns", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    // Table headers should be visible
    await expect(adminPage.locator("th:has-text('Channel')")).toBeVisible();
    await expect(adminPage.locator("th:has-text('Enabled')")).toBeVisible();
    await expect(
      adminPage.locator("th:has-text('Require @mention')")
    ).toBeVisible();
    await expect(
      adminPage.locator("th:has-text('Thread Only Mode')")
    ).toBeVisible();
    await expect(
      adminPage.locator("th:has-text('Agent Override')")
    ).toBeVisible();
  });

  test("channel enabled toggle updates state", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    // Find the row for "general" channel
    const generalRow = adminPage.locator("tr").filter({
      hasText: "general",
    });

    // Find the first switch in that row (Enabled toggle)
    const enabledToggle = generalRow.locator('[role="switch"]').first();
    await expect(enabledToggle).toBeVisible({ timeout: 10000 });

    // Get initial state
    const initialState = await enabledToggle.getAttribute("aria-checked");

    // Click to toggle
    await enabledToggle.click();

    // State should change (local state update)
    await expect(enabledToggle).toHaveAttribute(
      "aria-checked",
      initialState === "true" ? "false" : "true"
    );
  });

  test("channel require mention toggle works", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    // Find the row for "general" channel
    const generalRow = adminPage.locator("tr").filter({
      hasText: "general",
    });

    // Find switches - second one should be "require @mention"
    const switches = generalRow.locator('[role="switch"]');
    const requireMentionToggle = switches.nth(1);

    await expect(requireMentionToggle).toBeVisible({ timeout: 10000 });

    // Get initial state
    const initialState =
      await requireMentionToggle.getAttribute("aria-checked");

    // Click to toggle
    await requireMentionToggle.click();

    // State should change
    await expect(requireMentionToggle).toHaveAttribute(
      "aria-checked",
      initialState === "true" ? "false" : "true"
    );
  });

  test("channel thread only mode toggle works for text channels", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    // Find the row for "general" channel (text type)
    const generalRow = adminPage.locator("tr").filter({
      hasText: "general",
    });

    // Find switches - third one should be "thread only mode"
    const switches = generalRow.locator('[role="switch"]');
    const threadOnlyToggle = switches.nth(2);

    await expect(threadOnlyToggle).toBeVisible({ timeout: 10000 });

    // Toggle should be clickable for text channels
    await threadOnlyToggle.click();

    // Verify it changed
    const newState = await threadOnlyToggle.getAttribute("aria-checked");
    expect(newState).toBe("true");
  });

  test("forum channels do not show thread only toggle", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    // Find the row for "help-forum" channel (forum type)
    const forumRow = adminPage.locator("tr").filter({
      hasText: "help-forum",
    });

    // Forum channels should only have 2 switches (Enabled, Require @mention)
    // Thread Only Mode is not applicable to forums
    const switches = forumRow.locator('[role="switch"]');
    const count = await switches.count();

    // Should have fewer switches than text channels (2 vs 3)
    expect(count).toBe(2);
  });

  test("enable all button works", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    const enableAllButton = adminPage.locator('button:has-text("Enable All")');
    await expect(enableAllButton).toBeVisible({ timeout: 10000 });
    await enableAllButton.click();

    // Wait for UI to update - all enabled toggles should be checked
    const rows = adminPage.locator("tbody tr");
    const rowCount = await rows.count();

    for (let i = 0; i < rowCount; i++) {
      const toggle = rows.nth(i).locator('[role="switch"]').first();
      if (await toggle.isVisible()) {
        await expect(toggle).toHaveAttribute("aria-checked", "true");
      }
    }
  });

  test("disable all button works", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    const disableAllButton = adminPage.locator(
      'button:has-text("Disable All")'
    );
    await expect(disableAllButton).toBeVisible({ timeout: 10000 });
    await disableAllButton.click();

    // Wait for UI to update - all enabled toggles should be unchecked
    const rows = adminPage.locator("tbody tr");
    const rowCount = await rows.count();

    for (let i = 0; i < rowCount; i++) {
      const toggle = rows.nth(i).locator('[role="switch"]').first();
      if (await toggle.isVisible()) {
        await expect(toggle).toHaveAttribute("aria-checked", "false");
      }
    }
  });

  test("unsaved changes indicator appears", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);

    // Find the unsaved changes message container (always in DOM, hidden with opacity-0)
    const unsavedMessage = adminPage.locator("text=You have unsaved changes");
    // The container div has class "sticky" and controls visibility via opacity
    const messageContainer = adminPage
      .locator("div.sticky")
      .filter({ has: unsavedMessage })
      .first();

    // Initially hidden (opacity-0)
    await expect(messageContainer).toHaveCSS("opacity", "0");

    // Make a change
    const generalRow = adminPage.locator("tr").filter({
      hasText: "general",
    });
    const enabledToggle = generalRow.locator('[role="switch"]').first();
    await enabledToggle.click();

    // Unsaved changes indicator should appear (opacity-100)
    await expect(messageContainer).toHaveCSS("opacity", "1", { timeout: 5000 });
    await expect(unsavedMessage).toBeVisible({ timeout: 5000 });
  });
});


================================================
FILE: web/tests/e2e/admin/discord-bot/fixtures.ts
================================================
/**
 * Playwright fixtures for Discord bot admin UI tests.
 *
 * These fixtures provide:
 * - Authenticated admin page
 * - API client for backend operations
 * - Mock data for guilds and channels (since real Discord integration isn't available in tests)
 */

import { test as base, expect, Page } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

/**
 * Mock data types matching backend response schemas
 */
interface MockGuild {
  id: number;
  guild_id: string | null;
  guild_name: string | null;
  registration_key: string;
  registered_at: string | null;
  enabled: boolean;
  default_persona_id: number | null;
}

interface MockChannel {
  id: number;
  channel_id: string;
  channel_name: string;
  channel_type: "text" | "forum";
  is_private: boolean;
  enabled: boolean;
  require_bot_invocation: boolean;
  thread_only_mode: boolean;
  persona_override_id: number | null;
}

/**
 * Constants for mock data
 */
const MOCK_GUILD_ID = 999;

/**
 * Helper to authenticate and clear cookies
 */
async function authenticateAdmin(page: Page): Promise<void> {
  await page.context().clearCookies();
  await loginAs(page, "admin");
}

/**
 * Helper to create JSON response
 */
function jsonResponse(data: unknown, status = 200) {
  return {
    status,
    contentType: "application/json",
    body: JSON.stringify(data),
  };
}

/**
 * Creates mock channel data for a registered guild
 */
function createMockChannels(): MockChannel[] {
  return [
    {
      id: 1,
      channel_id: "1234567890123456789",
      channel_name: "general",
      channel_type: "text",
      is_private: false,
      enabled: true,
      require_bot_invocation: false,
      thread_only_mode: false,
      persona_override_id: null,
    },
    {
      id: 2,
      channel_id: "1234567890123456790",
      channel_name: "help-forum",
      channel_type: "forum",
      is_private: false,
      enabled: false,
      require_bot_invocation: true,
      thread_only_mode: false,
      persona_override_id: null,
    },
    {
      id: 3,
      channel_id: "1234567890123456791",
      channel_name: "private-support",
      channel_type: "text",
      is_private: true,
      enabled: true,
      require_bot_invocation: true,
      thread_only_mode: true,
      persona_override_id: null,
    },
  ];
}

/**
 * Creates a mock registered guild
 */
function createMockRegisteredGuild(id: number): MockGuild {
  return {
    id,
    guild_id: "987654321098765432",
    guild_name: "Test Discord Server",
    registration_key: "test-key-12345",
    registered_at: new Date().toISOString(),
    enabled: true,
    default_persona_id: null,
  };
}

/**
 * Creates a mock pending guild (not yet registered)
 */
function createMockPendingGuild(id: number): MockGuild {
  return {
    id,
    guild_id: null,
    guild_name: null,
    registration_key: "pending-key-67890",
    registered_at: null,
    enabled: false,
    default_persona_id: null,
  };
}

// Extend base test with Discord bot fixtures
export const test = base.extend<{
  adminPage: Page;
  apiClient: OnyxApiClient;
  seededGuild: { id: number; name: string; registrationKey: string };
  mockRegisteredGuild: {
    id: number;
    name: string;
    guild: MockGuild;
    channels: MockChannel[];
  };
  mockBotConfigured: boolean;
}>({
  // Admin page fixture - ensures proper authentication before each test
  adminPage: async ({ page }, use) => {
    await authenticateAdmin(page);
    await use(page);
  },

  // API client fixture - provides access to OnyxApiClient for backend operations
  apiClient: async ({ page }, use) => {
    await authenticateAdmin(page);
    const client = new OnyxApiClient(page.request);
    await use(client);
  },

  // Seeded guild fixture - creates a real pending guild via API
  seededGuild: async ({ page }, use) => {
    await authenticateAdmin(page);

    const apiClient = new OnyxApiClient(page.request);
    const guild = await apiClient.createDiscordGuild();

    await use({
      id: guild.id,
      name: guild.guild_name || "Pending",
      registrationKey: guild.registration_key,
    });

    // Cleanup
    await apiClient.deleteDiscordGuild(guild.id);
  },

  // Mock registered guild fixture - provides a fully mocked registered guild with channels
  // This intercepts API calls to simulate a registered guild without needing Discord
  mockRegisteredGuild: async ({ page }, use) => {
    await authenticateAdmin(page);

    // Use a mutable object so we can update it when PATCH requests come in
    let mockGuild = createMockRegisteredGuild(MOCK_GUILD_ID);
    const mockChannels = createMockChannels();

    // Mock the guild list endpoint
    await page.route(
      "**/api/manage/admin/discord-bot/guilds",
      async (route) => {
        const method = route.request().method();
        if (method === "GET") {
          await route.fulfill(jsonResponse([mockGuild]));
        } else if (method === "POST") {
          // Allow creating new guilds - return a new pending guild
          const newGuild = createMockPendingGuild(MOCK_GUILD_ID + 1);
          await route.fulfill(jsonResponse(newGuild));
        } else {
          await route.continue();
        }
      }
    );

    // Mock the specific guild endpoint
    await page.route(
      `**/api/manage/admin/discord-bot/guilds/${MOCK_GUILD_ID}`,
      async (route) => {
        const method = route.request().method();
        if (method === "GET") {
          await route.fulfill(jsonResponse(mockGuild));
        } else if (method === "PATCH") {
          // Handle updates - merge with current state and update mockGuild
          const body = (await route.request().postDataJSON()) || {};
          mockGuild = { ...mockGuild, ...body };
          await route.fulfill(jsonResponse(mockGuild));
        } else if (method === "DELETE") {
          await route.fulfill({ status: 204, body: "" });
        } else {
          await route.continue();
        }
      }
    );

    // Mock the channels endpoint for this guild
    await page.route(
      `**/api/manage/admin/discord-bot/guilds/${MOCK_GUILD_ID}/channels`,
      async (route) => {
        await route.fulfill(jsonResponse(mockChannels));
      }
    );

    // Mock channel update endpoint
    await page.route(
      `**/api/manage/admin/discord-bot/guilds/${MOCK_GUILD_ID}/channels/*`,
      async (route) => {
        if (route.request().method() === "PATCH") {
          const body = (await route.request().postDataJSON()) || {};
          // Extract channel ID from URL: .../channels/{id}
          const urlMatch = route
            .request()
            .url()
            .match(/\/channels\/(\d+)/);
          const channelIdStr = urlMatch?.[1];
          const channelId = channelIdStr ? parseInt(channelIdStr, 10) : null;
          const channel = channelId
            ? mockChannels.find((c) => c.id === channelId)
            : null;

          if (channel) {
            const updatedChannel = { ...channel, ...body };
            await route.fulfill(jsonResponse(updatedChannel));
          } else {
            await route.fulfill(
              jsonResponse({ error: "Channel not found" }, 404)
            );
          }
        } else {
          await route.continue();
        }
      }
    );

    await use({
      id: MOCK_GUILD_ID,
      name: mockGuild.guild_name!,
      guild: mockGuild,
      channels: mockChannels,
    });

    // No cleanup needed - routes are automatically cleared when page closes
  },

  // Mock bot configuration state
  mockBotConfigured: async ({ page }, use) => {
    const configResponse = {
      configured: true,
      created_at: new Date().toISOString(),
    };

    await page.route(
      "**/api/manage/admin/discord-bot/config",
      async (route) => {
        const method = route.request().method();
        if (method === "GET" || method === "POST") {
          await route.fulfill(jsonResponse(configResponse));
        } else if (method === "DELETE") {
          await route.fulfill({ status: 204, body: "" });
        } else {
          await route.continue();
        }
      }
    );

    await use(true);
  },
});

export { expect };

/**
 * Navigation helpers for Discord bot pages.
 * These wait for specific UI elements that indicate the page has loaded.
 */
export async function gotoDiscordBotPage(adminPage: Page): Promise<void> {
  await adminPage.goto("/admin/discord-bot");
  await adminPage.waitForLoadState("networkidle");
  // Wait for the page title
  await adminPage.waitForSelector("text=Discord Integration", {
    timeout: 15000,
  });
}

export async function gotoGuildDetailPage(
  adminPage: Page,
  guildId: number
): Promise<void> {
  await adminPage.goto(`/admin/discord-bot/${guildId}`);
  await adminPage.waitForLoadState("networkidle");
  // Wait for Channel Configuration section (the main content area on guild detail page)
  await adminPage.waitForSelector("text=Channel Configuration", {
    timeout: 15000,
  });
}


================================================
FILE: web/tests/e2e/admin/discord-bot/guilds-list.spec.ts
================================================
/**
 * E2E tests for Discord guilds list page.
 *
 * Tests the server configurations table which shows:
 * - List of registered and pending Discord servers
 * - Status badges (Registered/Pending)
 * - Enabled/Disabled status
 * - Add Server and Delete actions
 */

import {
  test,
  expect,
  gotoDiscordBotPage,
} from "@tests/e2e/admin/discord-bot/fixtures";

// Disable retries for Discord bot tests - attempt once at most
test.describe.configure({ retries: 0 });

test.describe("Guilds List Page", () => {
  test("guilds page shows server configurations", async ({ adminPage }) => {
    await gotoDiscordBotPage(adminPage);

    // Should show Server Configurations section
    // Use .first() to avoid strict mode violation if it appears in multiple places
    const serverConfigSection = adminPage
      .locator("text=Server Configurations")
      .first();
    await expect(serverConfigSection).toBeVisible({ timeout: 10000 });
  });

  test("guilds page empty state", async ({ adminPage }) => {
    await gotoDiscordBotPage(adminPage);

    // Should show either:
    // - "No Discord servers configured yet" empty message
    // - OR a table with servers
    // - OR Add Server button
    const emptyState = adminPage.locator(
      "text=No Discord servers configured yet"
    );
    const addButton = adminPage.locator('button:has-text("Add Server")');
    const serverTable = adminPage.locator("table");

    // Check each state separately to avoid strict mode violation
    // (empty state and add button can both be visible when bot not configured)
    const hasEmptyState = await emptyState
      .isVisible({ timeout: 5000 })
      .catch(() => false);
    const hasAddButton = await addButton
      .isVisible({ timeout: 5000 })
      .catch(() => false);
    const hasTable = await serverTable
      .isVisible({ timeout: 5000 })
      .catch(() => false);

    expect(hasEmptyState || hasAddButton || hasTable).toBe(true);
  });

  test("guilds page shows mock registered guild", async ({
    adminPage,
    mockRegisteredGuild,
  }) => {
    await gotoDiscordBotPage(adminPage);

    // Mock guild should appear in the list
    const guildName = adminPage.locator(`text=${mockRegisteredGuild.name}`);
    await expect(guildName).toBeVisible({ timeout: 10000 });

    // Find the table row containing the guild to scope badges
    const tableRow = adminPage.locator("tr").filter({
      hasText: mockRegisteredGuild.name,
    });

    // Should show Registered badge in the guild's row
    const registeredBadge = tableRow.locator("text=Registered");
    await expect(registeredBadge).toBeVisible();

    // Should show enabled toggle switch in the guild's row (in Enabled column)
    const enabledSwitch = tableRow.locator('[role="switch"]').first();
    await expect(enabledSwitch).toBeVisible();
    await expect(enabledSwitch).toHaveAttribute("aria-checked", "true");
  });

  test("guild enabled toggle works in table", async ({
    adminPage,
    mockRegisteredGuild,
    mockBotConfigured: _mockBotConfigured,
  }) => {
    await gotoDiscordBotPage(adminPage);

    // Find the table row containing the guild
    const tableRow = adminPage.locator("tr").filter({
      hasText: mockRegisteredGuild.name,
    });
    await expect(tableRow).toBeVisible({ timeout: 10000 });

    // Find the enabled toggle switch in that row
    const enabledSwitch = tableRow.locator('[role="switch"]').first();
    await expect(enabledSwitch).toBeVisible({ timeout: 10000 });
    await expect(enabledSwitch).toHaveAttribute("aria-checked", "true");
    await expect(enabledSwitch).toBeEnabled();

    const initialState = await enabledSwitch.getAttribute("aria-checked");
    const expectedState = initialState === "true" ? "false" : "true";
    const guildUrl = `/api/manage/admin/discord-bot/guilds/${mockRegisteredGuild.id}`;
    const guildsListUrl = `/api/manage/admin/discord-bot/guilds`;

    // Set up response waiters before clicking
    const patchPromise = adminPage.waitForResponse(
      (response) =>
        response.url().includes(guildUrl) &&
        response.request().method() === "PATCH"
    );

    // refreshGuilds() calls the list endpoint, not the individual guild endpoint
    const getPromise = adminPage.waitForResponse(
      (response) =>
        response.url().includes(guildsListUrl) &&
        response.request().method() === "GET"
    );

    await enabledSwitch.click();

    // Wait for PATCH then GET (refreshGuilds) to complete
    await patchPromise;
    await getPromise;

    // Verify the toggle state changed
    await expect(enabledSwitch).toHaveAttribute("aria-checked", expectedState);
  });

  test("guilds page add server modal and copy key", async ({ adminPage }) => {
    await gotoDiscordBotPage(adminPage);

    const addButton = adminPage.locator('button:has-text("Add Server")');

    if (await addButton.isVisible({ timeout: 5000 }).catch(() => false)) {
      // Button might be disabled if bot not configured
      if (await addButton.isEnabled()) {
        await addButton.click();

        // Should show modal with registration key
        const modal = adminPage.locator('[role="dialog"]');
        await expect(modal).toBeVisible({ timeout: 10000 });

        // Modal should show "Registration Key" title
        await expect(modal.getByText("Registration Key")).toBeVisible();

        // Should show the !register command (scoped to modal)
        await expect(modal.getByText("!register")).toBeVisible();

        // Find and click copy button
        const copyButton = adminPage.locator("button").filter({
          has: adminPage.locator("svg"),
        });

        const copyButtons = await copyButton.all();
        for (const btn of copyButtons) {
          const ariaLabel = await btn.getAttribute("aria-label");
          if (ariaLabel?.toLowerCase().includes("copy")) {
            await btn.click();

            // Toast notification should appear
            const toast = adminPage.locator("text=/copied/i");
            await expect(toast).toBeVisible({ timeout: 5000 });
            break;
          }
        }
      }
    }
  });

  test("guilds page delete shows confirmation", async ({
    adminPage,
    mockRegisteredGuild,
    mockBotConfigured: _mockBotConfigured,
  }) => {
    await gotoDiscordBotPage(adminPage);

    // Wait for table to load with mock guild
    await expect(
      adminPage.locator(`text=${mockRegisteredGuild.name}`)
    ).toBeVisible({ timeout: 10000 });

    // Wait for table to be fully loaded and stable
    await adminPage.waitForLoadState("networkidle");

    // Find the table row containing the guild
    const tableRow = adminPage.locator("tr").filter({
      hasText: mockRegisteredGuild.name,
    });
    await expect(tableRow).toBeVisible({ timeout: 10000 });

    // Find delete button in that row - it's an IconButton (last button in Actions column)
    // The DeleteButton uses IconButton with tooltip="Delete" and SvgTrash icon
    const deleteButton = tableRow.locator("button").last();

    if (await deleteButton.isVisible({ timeout: 5000 }).catch(() => false)) {
      // Ensure the button is visible and scrolled into view
      await deleteButton.scrollIntoViewIfNeeded();
      await deleteButton.waitFor({ state: "visible" });

      // Wait for any animations/transitions to complete
      await adminPage.waitForTimeout(300);

      // Use force click to bypass any overlay/interception issues
      // The SettingsLayouts.Body div may be intercepting pointer events
      await deleteButton.click({ force: true });

      // Confirmation modal should appear
      const modal = adminPage.locator('[role="dialog"]');
      await expect(modal).toBeVisible({ timeout: 10000 });

      // Cancel to avoid actually deleting
      const cancelButton = adminPage.locator('button:has-text("Cancel")');
      if (await cancelButton.isVisible({ timeout: 5000 }).catch(() => false)) {
        await cancelButton.click();
        await expect(modal).not.toBeVisible({ timeout: 5000 });
      }
    }
  });

  test("guilds page navigate to guild detail", async ({
    adminPage,
    mockRegisteredGuild,
    mockBotConfigured: _mockBotConfigured,
  }) => {
    // Wait for bot config API to complete to ensure Card is enabled
    // The Card is disabled when bot is not configured
    // Set up the wait BEFORE navigation so we can catch the response
    const configResponsePromise = adminPage.waitForResponse(
      (response) =>
        response.url().includes("/api/manage/admin/discord-bot/config") &&
        response.request().method() === "GET"
    );

    await gotoDiscordBotPage(adminPage);
    await configResponsePromise;

    // Wait for table to load with mock guild
    const guildButton = adminPage.locator(
      `button:has-text("${mockRegisteredGuild.name}")`
    );
    await expect(guildButton).toBeVisible({ timeout: 10000 });

    // Ensure button is enabled (it's disabled if bot not configured or guild not registered)
    // mockBotConfigured ensures bot is configured, mockRegisteredGuild ensures guild is registered
    await expect(guildButton).toBeEnabled();

    // Click on the guild name to navigate to detail page
    await guildButton.click();

    // Should navigate to guild detail page
    await expect(adminPage).toHaveURL(
      new RegExp(`/admin/discord-bot/${mockRegisteredGuild.id}`)
    );

    // Verify detail page loaded correctly
    // "Channel Configuration" is in a LineItemLayout in the body content, not the page title
    await expect(
      adminPage.locator("text=Channel Configuration").first()
    ).toBeVisible();
  });

  test("loading state shows loader", async ({ adminPage }) => {
    // Intercept API to delay response
    await adminPage.route(
      "**/api/manage/admin/discord-bot/**",
      async (route) => {
        await new Promise((r) => setTimeout(r, 1000));
        await route.continue();
      }
    );

    await adminPage.goto("/admin/discord-bot");

    // Should show loading indicator (ThreeDotsLoader)
    // The loader should appear while data is being fetched
    // ThreeDotsLoader uses react-loader-spinner's ThreeDots with ariaLabel="grid-loading"
    const loader = adminPage.locator('[aria-label="grid-loading"]');
    // Give it a moment to appear
    await expect(loader).toBeVisible({ timeout: 5000 });

    // Wait for page to finish loading
    await adminPage.waitForLoadState("networkidle");

    // After loading, page title should be visible
    await expect(
      adminPage
        .locator('[aria-label="admin-page-title"]')
        .getByText("Discord Integration")
    ).toBeVisible();
  });

  test("error state shows error message", async ({ adminPage }) => {
    // Intercept API to return error
    await adminPage.route("**/api/manage/admin/discord-bot/guilds", (route) => {
      route.fulfill({
        status: 500,
        contentType: "application/json",
        body: JSON.stringify({ detail: "Internal Server Error" }),
      });
    });

    await adminPage.goto("/admin/discord-bot");
    await adminPage.waitForLoadState("networkidle");

    // Should show error message from ErrorCallout
    // ErrorCallout shows both title ("Failed to load Discord servers") and detail ("Internal Server Error")
    // Use .first() to get the first matching element (the title)
    const errorMessage = adminPage.locator("text=/failed|error/i").first();
    await expect(errorMessage).toBeVisible({ timeout: 10000 });
  });
});


================================================
FILE: web/tests/e2e/admin/ee_feature_redirect.spec.ts
================================================
import { test, expect } from "@tests/e2e/fixtures/eeFeatures";

test.describe("EE Feature Redirect", () => {
  test("redirects to /chat with toast when EE features are not licensed", async ({
    page,
    eeEnabled,
  }) => {
    test.skip(eeEnabled, "Redirect only happens without Enterprise license");

    await page.goto("/admin/theme");

    await expect(page).toHaveURL(/\/chat/, { timeout: 10_000 });

    const toastContainer = page.getByTestId("toast-container");
    await expect(toastContainer).toBeVisible({ timeout: 5_000 });
    await expect(
      toastContainer.getByText(/only accessible with a paid license/i)
    ).toBeVisible();
  });
});


================================================
FILE: web/tests/e2e/admin/groups/GroupsAdminPage.ts
================================================
/**
 * Page Object Model for the Admin Groups page (/admin/groups).
 *
 * Covers the list page, create page, and edit page interactions.
 */

import { type Page, type Locator, expect } from "@playwright/test";

/** URL pattern that matches the groups data fetch. */
const GROUPS_API = /\/api\/manage\/admin\/user-group/;

export class GroupsAdminPage {
  readonly page: Page;

  constructor(page: Page) {
    this.page = page;
  }

  // ---------------------------------------------------------------------------
  // Navigation
  // ---------------------------------------------------------------------------

  async goto() {
    await this.page.goto("/admin/groups");
    await expect(this.newGroupButton).toBeVisible({ timeout: 15000 });
  }

  async gotoCreate() {
    await this.page.goto("/admin/groups/create");
    await expect(this.page.getByText("Create Group")).toBeVisible({
      timeout: 15000,
    });
  }

  async gotoEdit(groupId: number) {
    await this.page.goto(`/admin/groups/${groupId}`);
    // Wait for the form to be ready — avoids networkidle hanging due to SWR polling.
    await expect(this.groupNameInput).toBeVisible({ timeout: 15000 });
  }

  // ---------------------------------------------------------------------------
  // List page
  // ---------------------------------------------------------------------------

  /** The Groups page heading container (unique to the list page). */
  get pageHeading(): Locator {
    return this.page.getByTestId("groups-page-heading");
  }

  /** The search input on the list page. */
  get listSearchInput(): Locator {
    return this.page.getByPlaceholder("Search groups...");
  }

  /** The "New Group" button on the list page header. */
  get newGroupButton(): Locator {
    return this.page.getByRole("button", { name: "New Group" });
  }

  /** Returns all group cards on the list page. */
  get groupCards(): Locator {
    return this.page.locator("[data-card]");
  }

  /**
   * Returns a group card by name.
   * Cards use ContentAction which renders the title as text — match by content.
   */
  getGroupCard(name: string): Locator {
    return this.page.locator("[data-card]").filter({ hasText: name });
  }

  /** Click into a group's edit page from the list. */
  async openGroup(name: string) {
    const card = this.getGroupCard(name);
    await card.getByRole("button", { name: "View group" }).click();
    await expect(this.groupNameInput).toBeVisible({ timeout: 15000 });
  }

  /** Search groups on the list page. */
  async searchGroups(term: string) {
    await this.listSearchInput.fill(term);
  }

  /** Click "New Group" to navigate to the create page. */
  async clickNewGroup() {
    await this.newGroupButton.click();
    await expect(this.page.getByText("Create Group")).toBeVisible({
      timeout: 15000,
    });
  }

  // ---------------------------------------------------------------------------
  // Create page
  // ---------------------------------------------------------------------------

  /** The group name input on create/edit pages. */
  get groupNameInput(): Locator {
    return this.page.getByPlaceholder("Name your group");
  }

  /** The member search input on create/edit pages. */
  get memberSearchInput(): Locator {
    return this.page.getByPlaceholder("Search users and accounts...");
  }

  /** The "Create" button on the create page. */
  get createButton(): Locator {
    return this.page.getByRole("button", { name: "Create", exact: true });
  }

  /** The "Cancel" button on create/edit pages. */
  get cancelButton(): Locator {
    return this.page.getByRole("button", { name: "Cancel" });
  }

  /** Fill in the group name on create/edit pages. */
  async setGroupName(name: string) {
    await this.groupNameInput.fill(name);
  }

  /** Search for members in the members table. */
  async searchMembers(term: string) {
    await this.memberSearchInput.fill(term);
  }

  /** Select a member row by checking their checkbox (create page / add mode). */
  async selectMember(emailOrName: string) {
    const row = this.page.getByRole("row").filter({ hasText: emailOrName });
    const checkbox = row.getByRole("checkbox");
    await checkbox.click();
  }

  /** Submit the create form. */
  async submitCreate() {
    await this.createButton.click();
  }

  // ---------------------------------------------------------------------------
  // Edit page
  // ---------------------------------------------------------------------------

  /** The "Save Changes" button on the edit page. */
  get saveButton(): Locator {
    return this.page.getByRole("button", { name: "Save Changes" });
  }

  /** The "Add" button to enter add-members mode. */
  get addMembersButton(): Locator {
    return this.page.getByRole("button", { name: "Add", exact: true });
  }

  /** The "Done" button to exit add-members mode. */
  get doneAddingButton(): Locator {
    return this.page.getByRole("button", { name: "Done" });
  }

  /** The "Delete Group" button in the danger zone card. */
  get deleteGroupButton(): Locator {
    return this.page.getByRole("button", { name: "Delete Group" });
  }

  /** Enter add-members mode on the edit page. */
  async startAddingMembers() {
    await this.addMembersButton.click();
    await expect(this.doneAddingButton).toBeVisible();
  }

  /** Exit add-members mode. */
  async finishAddingMembers() {
    await this.doneAddingButton.click();
    await expect(this.addMembersButton).toBeVisible();
  }

  /**
   * Remove a member from the member view via the minus button.
   * Only works in member view (not add mode).
   */
  async removeMember(emailOrName: string) {
    const row = this.page.getByRole("row").filter({ hasText: emailOrName });
    // The remove button is an IconButton with SvgMinusCircle in the actions column
    await row.getByRole("button").last().click();
  }

  /** Save the edit form. */
  async submitEdit() {
    await this.saveButton.click();
  }

  // ---------------------------------------------------------------------------
  // Delete flow
  // ---------------------------------------------------------------------------

  /** Click "Delete Group" to open the confirmation modal. */
  async clickDeleteGroup() {
    await this.deleteGroupButton.click();
  }

  /** The delete confirmation modal. */
  get deleteModal(): Locator {
    return this.page.getByRole("dialog");
  }

  /** Confirm deletion in the modal. */
  async confirmDelete() {
    await this.deleteModal.getByRole("button", { name: "Delete" }).click();
  }

  /** Cancel deletion in the modal. */
  async cancelDelete() {
    // The modal close button (X icon) or clicking outside
    await this.deleteModal
      .getByRole("button")
      .filter({ hasText: /close|cancel/i })
      .first()
      .click();
  }

  // ---------------------------------------------------------------------------
  // Assertions
  // ---------------------------------------------------------------------------

  async expectToast(message: string | RegExp) {
    await expect(this.page.getByText(message)).toBeVisible({ timeout: 10000 });
  }

  /** Assert a group card exists on the list page. */
  async expectGroupVisible(name: string) {
    await expect(this.getGroupCard(name)).toBeVisible({ timeout: 10000 });
  }

  /** Assert a group card does NOT exist on the list page. */
  async expectGroupNotVisible(name: string) {
    await expect(this.getGroupCard(name)).not.toBeVisible({ timeout: 10000 });
  }

  /** Assert we navigated back to the groups list. */
  async expectOnListPage() {
    await expect(this.page).toHaveURL(/\/admin\/groups\/?$/);
    await expect(this.newGroupButton).toBeVisible();
  }

  /** Assert we are on the edit page for a specific group. */
  async expectOnEditPage(groupId: number) {
    await expect(this.page).toHaveURL(`/admin/groups/${groupId}`);
  }

  /** Wait for the groups API response after a mutation. */
  async waitForGroupsRefresh() {
    await this.page.waitForResponse(GROUPS_API);
  }
}


================================================
FILE: web/tests/e2e/admin/groups/fixtures.ts
================================================
/**
 * Playwright fixtures for Admin Groups page tests.
 *
 * Provides:
 * - Authenticated admin page
 * - OnyxApiClient for API-level setup/teardown
 * - GroupsAdminPage page object
 */

import { test as base, expect, type Page } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import { GroupsAdminPage } from "./GroupsAdminPage";

export const test = base.extend<{
  adminPage: Page;
  api: OnyxApiClient;
  groupsPage: GroupsAdminPage;
}>({
  adminPage: async ({ page }, use) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");
    await use(page);
  },

  api: async ({ adminPage }, use) => {
    const client = new OnyxApiClient(adminPage.request);
    await use(client);
  },

  groupsPage: async ({ adminPage }, use) => {
    const groupsPage = new GroupsAdminPage(adminPage);
    await use(groupsPage);
  },
});

export { expect };


================================================
FILE: web/tests/e2e/admin/groups/groups.spec.ts
================================================
/**
 * E2E Tests: Admin Groups Page
 *
 * Tests the full groups management page — list, create, edit, delete.
 *
 * Uses the GroupsAdminPage POM for all interactions. Groups are created via
 * OnyxApiClient for setup and cleaned up in afterAll/afterEach.
 */

import { test, expect } from "./fixtures";
import type { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import type { Browser } from "@playwright/test";

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

function uniqueGroupName(prefix: string): string {
  return `e2e-${prefix}-${Date.now()}`;
}

/** Best-effort cleanup — logs failures instead of silently swallowing them. */
async function softCleanup(fn: () => Promise<unknown>): Promise<void> {
  await fn().catch((e) => console.warn("cleanup:", e));
}

/**
 * Creates an authenticated API context for beforeAll/afterAll hooks.
 */
async function withApiContext(
  browser: Browser,
  fn: (api: OnyxApiClient) => Promise<void>
): Promise<void> {
  const context = await browser.newContext({
    storageState: "admin_auth.json",
  });
  try {
    const { OnyxApiClient } = await import("@tests/e2e/utils/onyxApiClient");
    const api = new OnyxApiClient(context.request);
    await fn(api);
  } finally {
    await context.close();
  }
}

// ---------------------------------------------------------------------------
// List page
// ---------------------------------------------------------------------------

test.describe("Groups page — layout", () => {
  let adminGroupId: number;
  let basicGroupId: number;
  let layoutGroupId: number;
  const layoutGroupName = uniqueGroupName("layout");

  test.beforeAll(async ({ browser }) => {
    await withApiContext(browser, async (api) => {
      const groups = await api.getUserGroups();
      const adminGroup = groups.find((g) => g.name === "Admin" && g.is_default);
      const basicGroup = groups.find((g) => g.name === "Basic" && g.is_default);
      if (!adminGroup || !basicGroup) {
        throw new Error("Default Admin/Basic groups not found");
      }
      adminGroupId = adminGroup.id;
      basicGroupId = basicGroup.id;

      // Create a custom group so the list is non-empty (default groups are
      // excluded from the API response by default).
      layoutGroupId = await api.createUserGroup(layoutGroupName);
      await api.waitForGroupSync(layoutGroupId);
    });
  });

  test.afterAll(async ({ browser }) => {
    await withApiContext(browser, async (api) => {
      await softCleanup(() => api.deleteUserGroup(layoutGroupId));
    });
  });

  test("renders page title, search, and new group button", async ({
    groupsPage,
  }) => {
    await groupsPage.goto();

    await expect(groupsPage.pageHeading).toBeVisible();
    await expect(groupsPage.listSearchInput).toBeVisible();
    await expect(groupsPage.newGroupButton).toBeVisible();
  });

  test.skip("shows built-in groups (Admin, Basic)", async ({ groupsPage }) => {
    // TODO: Enable once default groups are shown via include_default=true
    await groupsPage.goto();

    await groupsPage.expectGroupVisible("Admin");
    await groupsPage.expectGroupVisible("Basic");
  });

  test("search filters groups by name", async ({ groupsPage, api }) => {
    const name = uniqueGroupName("search");
    const groupId = await api.createUserGroup(name);
    await api.waitForGroupSync(groupId);

    try {
      await groupsPage.goto();
      await groupsPage.expectGroupVisible(name);

      await groupsPage.searchGroups("zzz-nonexistent-zzz");
      await groupsPage.expectGroupNotVisible(name);

      await groupsPage.searchGroups(name);
      await groupsPage.expectGroupVisible(name);
    } finally {
      await softCleanup(() => api.deleteUserGroup(groupId));
    }
  });
});

// ---------------------------------------------------------------------------
// Create flow
// ---------------------------------------------------------------------------

test.describe("Groups page — create", () => {
  test("navigates to create page via New Group button", async ({
    groupsPage,
  }) => {
    await groupsPage.goto();
    await groupsPage.clickNewGroup();

    await expect(groupsPage.page).toHaveURL(/\/admin\/groups\/create/);
    await expect(groupsPage.groupNameInput).toBeVisible();
  });

  test("creates a group and redirects to list", async ({ groupsPage, api }) => {
    const name = uniqueGroupName("create");
    let groupId: number | undefined;

    try {
      await groupsPage.gotoCreate();
      await groupsPage.setGroupName(name);
      await groupsPage.submitCreate();

      await groupsPage.expectToast(`Group "${name}" created`);
      await groupsPage.expectOnListPage();

      // Find the group ID for cleanup via the authenticated page context
      const res = await groupsPage.page.request.get(
        "/api/manage/admin/user-group"
      );
      const groups = await res.json();
      const group = groups.find(
        (g: { name: string; id: number }) => g.name === name
      );
      groupId = group?.id;
    } finally {
      if (groupId !== undefined) {
        await softCleanup(() => api.deleteUserGroup(groupId!));
      }
    }
  });

  test("cancel returns to list without creating", async ({ groupsPage }) => {
    await groupsPage.gotoCreate();
    await groupsPage.setGroupName("should-not-be-created");
    await groupsPage.cancelButton.click();

    await groupsPage.expectOnListPage();
  });
});

// ---------------------------------------------------------------------------
// Edit flow
// ---------------------------------------------------------------------------

test.describe("Groups page — edit @exclusive", () => {
  let groupId: number;
  const groupName = uniqueGroupName("edit");

  test.beforeAll(async ({ browser }) => {
    await withApiContext(browser, async (api) => {
      groupId = await api.createUserGroup(groupName);
      await api.waitForGroupSync(groupId);
    });
  });

  test.afterAll(async ({ browser }) => {
    await withApiContext(browser, async (api) => {
      await softCleanup(() => api.deleteUserGroup(groupId));
    });
  });

  test("navigates to edit page from list", async ({ groupsPage }) => {
    await groupsPage.goto();
    await groupsPage.openGroup(groupName);

    await groupsPage.expectOnEditPage(groupId);
    await expect(groupsPage.saveButton).toBeVisible();
  });

  test("edit page shows group name and save/cancel buttons", async ({
    groupsPage,
  }) => {
    await groupsPage.gotoEdit(groupId);

    await expect(groupsPage.groupNameInput).toHaveValue(groupName);
    await expect(groupsPage.saveButton).toBeVisible();
    await expect(groupsPage.cancelButton).toBeVisible();
  });

  test("can toggle add-members mode", async ({ groupsPage }) => {
    await groupsPage.gotoEdit(groupId);

    await expect(groupsPage.addMembersButton).toBeVisible();
    await groupsPage.startAddingMembers();
    await expect(groupsPage.doneAddingButton).toBeVisible();
    await groupsPage.finishAddingMembers();
    await expect(groupsPage.addMembersButton).toBeVisible();
  });

  test("cancel returns to list without saving", async ({ groupsPage }) => {
    await groupsPage.gotoEdit(groupId);
    await groupsPage.cancelButton.click();

    await groupsPage.expectOnListPage();
  });
});

// ---------------------------------------------------------------------------
// Delete flow
// ---------------------------------------------------------------------------

test.describe("Groups page — delete", () => {
  test("delete group via edit page", async ({ groupsPage, api }) => {
    const name = uniqueGroupName("delete");
    const groupId = await api.createUserGroup(name);
    await api.waitForGroupSync(groupId);

    await groupsPage.gotoEdit(groupId);
    await groupsPage.clickDeleteGroup();

    // Modal should show the group name
    await expect(groupsPage.deleteModal).toBeVisible();
    await expect(groupsPage.deleteModal.getByText(name)).toBeVisible();

    await groupsPage.confirmDelete();
    await groupsPage.expectToast(`Group "${name}" deleted`);
    await groupsPage.expectOnListPage();
  });
});

// ---------------------------------------------------------------------------
// Sync status (No Vector DB)
// ---------------------------------------------------------------------------

test.describe("Groups page — sync @lite", () => {
  test.beforeAll(async ({ browser }) => {
    const context = await browser.newContext({
      storageState: "admin_auth.json",
    });
    try {
      const { OnyxApiClient } = await import("@tests/e2e/utils/onyxApiClient");
      const client = new OnyxApiClient(context.request);
      const vectorDbEnabled = await client.isVectorDbEnabled();
      test.skip(
        vectorDbEnabled,
        "Skipped: vector DB is enabled in this deployment"
      );
    } finally {
      await context.close();
    }
  });

  test("newly created group syncs immediately", async ({ groupsPage, api }) => {
    const name = uniqueGroupName("sync");
    let groupId: number | undefined;

    try {
      // Create via API and verify sync completes
      groupId = await api.createUserGroup(name);
      await api.waitForGroupSync(groupId);

      // Navigate to edit page and verify it loads without error
      await groupsPage.gotoEdit(groupId);
      await expect(groupsPage.groupNameInput).toHaveValue(name);
    } finally {
      if (groupId !== undefined) {
        await softCleanup(() => api.deleteUserGroup(groupId!));
      }
    }
  });
});


================================================
FILE: web/tests/e2e/admin/image-generation/disconnect-provider.spec.ts
================================================
import { test, expect, Page, Locator } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { expectElementScreenshot } from "@tests/e2e/utils/visualRegression";

const IMAGE_GENERATION_URL = "/admin/configuration/image-generation";

const FAKE_CONNECTED_CONFIG = {
  image_provider_id: "openai_dalle_3",
  model_configuration_id: 100,
  model_name: "dall-e-3",
  llm_provider_id: 100,
  llm_provider_name: "openai-dalle3",
  is_default: false,
};

const FAKE_DEFAULT_CONFIG = {
  image_provider_id: "openai_gpt_image_1",
  model_configuration_id: 101,
  model_name: "gpt-image-1",
  llm_provider_id: 101,
  llm_provider_name: "openai-gpt-image-1",
  is_default: true,
};

function getProviderCard(page: Page, providerId: string): Locator {
  return page.getByLabel(`image-gen-provider-${providerId}`, { exact: true });
}

function mainContainer(page: Page): Locator {
  return page.locator("[data-main-container]");
}

/**
 * Sets up route mocks so the page sees configured providers
 * without needing real API keys.
 */
async function mockImageGenApis(
  page: Page,
  configs: (typeof FAKE_CONNECTED_CONFIG)[]
) {
  await page.route("**/api/admin/image-generation/config", async (route) => {
    if (route.request().method() === "GET") {
      await route.fulfill({ status: 200, json: configs });
    } else {
      await route.continue();
    }
  });

  await page.route(
    "**/api/admin/llm/provider?include_image_gen=true",
    async (route) => {
      await route.fulfill({ status: 200, json: { providers: [] } });
    }
  );
}

test.describe("Image Generation Provider Disconnect", () => {
  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");
  });

  test("should disconnect a connected (non-default) provider", async ({
    page,
  }) => {
    const configs = [{ ...FAKE_CONNECTED_CONFIG }, { ...FAKE_DEFAULT_CONFIG }];
    await mockImageGenApis(page, configs);

    await page.goto(IMAGE_GENERATION_URL);
    await page.waitForSelector("text=Image Generation Model", {
      timeout: 20000,
    });

    const card = getProviderCard(page, "openai_dalle_3");
    await card.waitFor({ state: "visible", timeout: 10000 });

    await expectElementScreenshot(mainContainer(page), {
      name: "image-gen-disconnect-non-default-before",
    });

    // Hover to reveal disconnect button, then verify
    await card.hover();
    const disconnectButton = card.getByRole("button", {
      name: "Disconnect DALL-E 3",
    });
    await expect(disconnectButton).toBeVisible();
    await expect(disconnectButton).toBeEnabled();

    // Mock the DELETE to succeed and update the config list
    await page.route(
      "**/api/admin/image-generation/config/openai_dalle_3",
      async (route) => {
        if (route.request().method() === "DELETE") {
          // Update the GET mock to return only the default config
          await page.unroute("**/api/admin/image-generation/config");
          await page.route(
            "**/api/admin/image-generation/config",
            async (route) => {
              if (route.request().method() === "GET") {
                await route.fulfill({
                  status: 200,
                  json: [{ ...FAKE_DEFAULT_CONFIG }],
                });
              } else {
                await route.continue();
              }
            }
          );
          await route.fulfill({ status: 200, json: {} });
        } else {
          await route.continue();
        }
      }
    );

    // Click disconnect
    await disconnectButton.click();

    // Verify confirmation modal appears
    const confirmDialog = page.getByRole("dialog");
    await expect(confirmDialog).toBeVisible({ timeout: 5000 });
    await expect(confirmDialog).toContainText("Disconnect DALL-E 3");

    await expectElementScreenshot(confirmDialog, {
      name: "image-gen-disconnect-non-default-modal",
    });

    // Click Disconnect in the confirmation modal
    const confirmButton = confirmDialog.getByRole("button", {
      name: "Disconnect",
    });
    await confirmButton.click();

    // Verify the card reverts to disconnected state (shows "Connect" button)
    await expect(card.getByRole("button", { name: "Connect" })).toBeVisible({
      timeout: 10000,
    });

    await expectElementScreenshot(mainContainer(page), {
      name: "image-gen-disconnect-non-default-after",
    });
  });

  test("should show replacement dropdown when disconnecting default provider with alternatives", async ({
    page,
  }) => {
    const configs = [{ ...FAKE_CONNECTED_CONFIG }, { ...FAKE_DEFAULT_CONFIG }];
    await mockImageGenApis(page, configs);

    await page.goto(IMAGE_GENERATION_URL);
    await page.waitForSelector("text=Image Generation Model", {
      timeout: 20000,
    });

    const defaultCard = getProviderCard(page, "openai_gpt_image_1");
    await defaultCard.waitFor({ state: "visible", timeout: 10000 });

    // Hover to reveal disconnect button
    await defaultCard.hover();
    const disconnectButton = defaultCard.getByRole("button", {
      name: "Disconnect GPT Image 1",
    });
    await expect(disconnectButton).toBeVisible();
    await expect(disconnectButton).toBeEnabled();

    await disconnectButton.click();

    const confirmDialog = page.getByRole("dialog");
    await expect(confirmDialog).toBeVisible({ timeout: 5000 });

    // Should show replacement dropdown since there's an alternative
    await expect(
      confirmDialog.getByText("Session history will be preserved")
    ).toBeVisible();

    // Disconnect button should be enabled because first replacement is auto-selected
    const confirmButton = confirmDialog.getByRole("button", {
      name: "Disconnect",
    });
    await expect(confirmButton).toBeEnabled();

    await expectElementScreenshot(confirmDialog, {
      name: "image-gen-disconnect-default-with-alt-modal",
    });
  });

  test("should show connect message when disconnecting default provider with no alternatives", async ({
    page,
  }) => {
    // Only the default config — no other providers configured
    await mockImageGenApis(page, [{ ...FAKE_DEFAULT_CONFIG }]);

    await page.goto(IMAGE_GENERATION_URL);
    await page.waitForSelector("text=Image Generation Model", {
      timeout: 20000,
    });

    const defaultCard = getProviderCard(page, "openai_gpt_image_1");
    await defaultCard.waitFor({ state: "visible", timeout: 10000 });

    await defaultCard.hover();
    const disconnectButton = defaultCard.getByRole("button", {
      name: "Disconnect GPT Image 1",
    });
    await disconnectButton.click();

    const confirmDialog = page.getByRole("dialog");
    await expect(confirmDialog).toBeVisible({ timeout: 5000 });

    // Should show message about connecting another provider
    await expect(
      confirmDialog.getByText("Connect another provider")
    ).toBeVisible();

    // Disconnect button should be enabled
    const confirmButton = confirmDialog.getByRole("button", {
      name: "Disconnect",
    });
    await expect(confirmButton).toBeEnabled();

    await expectElementScreenshot(confirmDialog, {
      name: "image-gen-disconnect-no-alt-modal",
    });
  });

  test("should not show disconnect button for unconfigured providers", async ({
    page,
  }) => {
    await mockImageGenApis(page, [{ ...FAKE_DEFAULT_CONFIG }]);

    await page.goto(IMAGE_GENERATION_URL);
    await page.waitForSelector("text=Image Generation Model", {
      timeout: 20000,
    });

    // DALL-E 3 is not configured — should not have a disconnect button
    const card = getProviderCard(page, "openai_dalle_3");
    await card.waitFor({ state: "visible", timeout: 10000 });

    const disconnectButton = card.getByRole("button", {
      name: "Disconnect DALL-E 3",
    });
    await expect(disconnectButton).not.toBeVisible();

    await expectElementScreenshot(mainContainer(page), {
      name: "image-gen-disconnect-unconfigured",
    });
  });
});


================================================
FILE: web/tests/e2e/admin/image-generation/image-generation-content.spec.ts
================================================
import { test, expect, Page, Locator } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

const IMAGE_GENERATION_URL =
  "http://localhost:3000/admin/configuration/image-generation";

// Provider IDs matching constants.ts
const PROVIDERS = [
  { id: "openai_gpt_image_1_5", title: "GPT Image 1.5" },
  { id: "openai_gpt_image_1", title: "GPT Image 1" },
  { id: "openai_dalle_3", title: "DALL-E 3" },
  { id: "azure_dalle_3", title: "Azure OpenAI DALL-E 3" },
];

// Helper to find a provider card by its aria-label
function getProviderCard(page: Page, providerId: string): Locator {
  return page.getByLabel(`image-gen-provider-${providerId}`, { exact: true });
}

// Helper to open the provider connection modal
async function openProviderModal(
  page: Page,
  providerId: string
): Promise<void> {
  const card = getProviderCard(page, providerId);
  await card.waitFor({ state: "visible", timeout: 10000 });

  // Click the Connect button within the card
  const connectButton = card.getByRole("button", { name: "Connect" });
  await connectButton.waitFor({ state: "visible", timeout: 5000 });
  await connectButton.click();
}

test.describe("Image Generation Provider Configuration", () => {
  test.beforeEach(async ({ page }) => {
    // Log in as admin before each test
    await page.context().clearCookies();
    await loginAs(page, "admin");

    // Navigate to image generation config page
    await page.goto(IMAGE_GENERATION_URL);
    await page.waitForLoadState("networkidle");

    // Wait for page to fully load - look for the section heading
    await page.waitForSelector("text=Image Generation Model", {
      timeout: 20000,
    });

    console.log("[image-gen-test] Page loaded successfully");
  });

  test("should open connection modal for all image generation providers", async ({
    page,
  }) => {
    for (const provider of PROVIDERS) {
      console.log(
        `[image-gen-test] Testing modal open for provider: ${provider.title}`
      );

      // Click Connect on provider card using aria-label
      await openProviderModal(page, provider.id);

      // Verify modal opens with correct title
      // Modal title is "Connect {providerTitle}" for new connections
      const modalDialog = page.getByRole("dialog", {
        name: new RegExp(`connect ${provider.title}`, "i"),
      });
      await expect(modalDialog).toBeVisible({ timeout: 10000 });

      console.log(`[image-gen-test] Modal opened for ${provider.title}`);

      // Close modal by pressing Escape
      await page.keyboard.press("Escape");
      await expect(modalDialog).not.toBeVisible({ timeout: 5000 });

      console.log(`[image-gen-test] Modal closed for ${provider.title}`);
    }

    console.log(
      "[image-gen-test] All provider modals opened and closed successfully"
    );
  });

  test.describe("OpenAI DALL-E 3 Configuration", () => {
    const OPENAI_API_KEY = process.env.OPENAI_API_KEY;

    test.skip(!OPENAI_API_KEY, "OPENAI_API_KEY environment variable not set");

    test.afterEach(async ({ page }) => {
      // Clean up the image generation config created during the test
      const apiClient = new OnyxApiClient(page.request);
      try {
        await apiClient.deleteImageGenerationConfig("openai_dalle_3");
        console.log("[image-gen-test] Cleaned up DALL-E 3 config");
      } catch (error) {
        console.warn(
          `[image-gen-test] Failed to clean up DALL-E 3 config: ${error}`
        );
      }
    });

    test.skip("should configure DALL-E 3 with API key", async ({ page }) => {
      // Click Connect on DALL-E 3 card using aria-label
      await openProviderModal(page, "openai_dalle_3");

      // Wait for modal to open
      const modalDialog = page.getByRole("dialog", {
        name: /connect dall-e 3/i,
      });
      await expect(modalDialog).toBeVisible({ timeout: 10000 });

      // Enter API key - use getByRole("combobox") to target only the input, not the listbox
      const apiKeyInput = modalDialog.getByRole("combobox");
      await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
      await apiKeyInput.clear();
      await apiKeyInput.fill(OPENAI_API_KEY!);

      // Close the dropdown by pressing Escape - it intercepts clicks on the Connect button
      await page.keyboard.press("Escape");

      // Click Connect button in modal - scope to the dialog to avoid matching other buttons
      const modalConnectButton = modalDialog.getByRole("button", {
        name: "Connect",
        exact: true,
      });
      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });
      await modalConnectButton.click();

      console.log(
        "[image-gen-test] Clicked Connect, waiting for validation..."
      );

      // Wait for modal to close (indicates success)
      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });

      console.log(
        "[image-gen-test] Modal closed, verifying provider is configured..."
      );

      // Wait for page to update
      await page.waitForLoadState("networkidle");

      // Verify DALL-E 3 is now configured - should show "Current Default"
      const dalleCard = getProviderCard(page, "openai_dalle_3");
      await expect(
        dalleCard.getByRole("button", { name: "Current Default" })
      ).toBeVisible({ timeout: 15000 });

      console.log("[image-gen-test] DALL-E 3 configured successfully");
    });
  });
});


================================================
FILE: web/tests/e2e/admin/llm_provider_setup.spec.ts
================================================
import { expect, test } from "@playwright/test";
import type { Locator, Page } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

const LLM_SETUP_URL = "/admin/configuration/llm";
const BASE_URL = process.env.BASE_URL || "http://localhost:3000";
const PROVIDER_API_KEY =
  process.env.E2E_LLM_PROVIDER_API_KEY ||
  process.env.OPENAI_API_KEY ||
  "e2e-placeholder-api-key-not-used";

type AdminLLMProvider = {
  id: number;
  name: string;
  is_auto_mode: boolean;
};

type DefaultModelInfo = {
  provider_id: number;
  model_name: string;
} | null;

type ProviderModelConfig = {
  name: string;
  is_visible: boolean;
};

function uniqueName(prefix: string): string {
  return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
}

function normalizeAlphaNum(input: string): string {
  return input.toLowerCase().replace(/[^a-z0-9]/g, "");
}

function modelTokenVariants(modelName: string): string[][] {
  return modelName
    .toLowerCase()
    .split(/[^a-z0-9]+/)
    .filter((token) => token.length > 0)
    .map((token) => {
      // Display names may shorten long numeric segments to suffixes.
      if (/^\d+$/.test(token) && token.length > 5) {
        return [token, token.slice(-5)];
      }
      return [token];
    });
}

function textMatchesModel(modelName: string, candidateText: string): boolean {
  const normalizedCandidate = normalizeAlphaNum(candidateText);
  if (!normalizedCandidate) {
    return false;
  }

  const tokenVariants = modelTokenVariants(modelName);
  return tokenVariants.every((variants) =>
    variants.some((variant) =>
      normalizedCandidate.includes(normalizeAlphaNum(variant))
    )
  );
}

async function getAdminLLMProviderResponse(page: Page) {
  const response = await page.request.get(`${BASE_URL}/api/admin/llm/provider`);
  expect(response.ok()).toBeTruthy();
  return (await response.json()) as {
    providers: AdminLLMProvider[];
    default_text: DefaultModelInfo;
    default_vision: DefaultModelInfo;
  };
}

async function listAdminLLMProviders(page: Page): Promise<AdminLLMProvider[]> {
  const data = await getAdminLLMProviderResponse(page);
  return data.providers;
}

async function getDefaultTextModel(page: Page): Promise<DefaultModelInfo> {
  const data = await getAdminLLMProviderResponse(page);
  return data.default_text ?? null;
}

async function createPublicProvider(
  page: Page,
  providerName: string,
  modelName: string = "gpt-4o"
): Promise<number> {
  return createPublicProviderWithModels(page, providerName, [
    { name: modelName, is_visible: true },
  ]);
}

async function createPublicProviderWithModels(
  page: Page,
  providerName: string,
  modelConfigurations: ProviderModelConfig[]
): Promise<number> {
  expect(modelConfigurations.length).toBeGreaterThan(0);

  const response = await page.request.put(
    `${BASE_URL}/api/admin/llm/provider?is_creation=true`,
    {
      data: {
        name: providerName,
        provider: "openai",
        api_key: PROVIDER_API_KEY,
        is_public: true,
        groups: [],
        personas: [],
        model_configurations: modelConfigurations,
      },
    }
  );
  expect(response.ok()).toBeTruthy();
  const data = (await response.json()) as { id: number };
  return data.id;
}

async function navigateToAdminLlmPageFromChat(page: Page): Promise<void> {
  await page.goto(LLM_SETUP_URL);
  await page.waitForURL("**/admin/configuration/llm**");
  await expect(page.getByLabel("admin-page-title")).toHaveText(
    /^Language Models/
  );
}

async function exitAdminToChat(page: Page): Promise<void> {
  await page.goto("/app");
  await page.waitForURL("**/app**");
  await page
    .locator("#onyx-chat-input-textarea")
    .waitFor({ state: "visible", timeout: 15000 });
}

async function isModelVisibleInChatProviders(
  page: Page,
  modelName: string
): Promise<boolean> {
  const response = await page.request.get(`${BASE_URL}/api/llm/provider`);
  expect(response.ok()).toBeTruthy();

  const data = (await response.json()) as {
    providers: {
      model_configurations: { name: string; is_visible: boolean }[];
    }[];
  };

  return data.providers.some((provider) =>
    provider.model_configurations.some(
      (model) => model.name === modelName && model.is_visible
    )
  );
}

async function expectModelVisibilityInChatProviders(
  page: Page,
  modelName: string,
  expectedVisible: boolean
): Promise<void> {
  await expect
    .poll(() => isModelVisibleInChatProviders(page, modelName), {
      timeout: 30000,
    })
    .toBe(expectedVisible);
}

async function getModelCountInChatSelector(
  page: Page,
  modelName: string
): Promise<number> {
  const dialog = page.locator('[role="dialog"]').first();

  // When used in expect.poll retries, a previous attempt may leave the
  // popover open. Ensure a clean state before toggling it.
  if (await dialog.isVisible()) {
    await page.keyboard.press("Escape");
    await dialog.waitFor({ state: "hidden", timeout: 5000 });
  }

  await page.getByTestId("AppInputBar/llm-popover-trigger").click();
  await dialog.waitFor({ state: "visible", timeout: 10000 });

  await dialog.getByPlaceholder("Search models...").fill(modelName);
  const optionButtons = dialog.getByRole("button");
  const optionTexts = await optionButtons.allTextContents();
  const uniqueOptionTexts = Array.from(
    new Set(optionTexts.map((text) => text.trim()))
  );
  const count = uniqueOptionTexts.filter((text) =>
    textMatchesModel(modelName, text)
  ).length;

  await page.keyboard.press("Escape");
  await dialog.waitFor({ state: "hidden", timeout: 10000 });

  return count;
}

async function getProviderByName(
  page: Page,
  providerName: string
): Promise<AdminLLMProvider | null> {
  const providers = await listAdminLLMProviders(page);
  return providers.find((provider) => provider.name === providerName) ?? null;
}

async function findProviderCard(
  page: Page,
  providerName: string
): Promise<Locator> {
  return page
    .locator("div.rounded-16")
    .filter({ hasText: providerName })
    .first();
}

async function openOpenAiSetupModal(page: Page): Promise<Locator> {
  const openAiCard = page
    .locator("div.rounded-16")
    .filter({ hasText: "OpenAI" })
    .filter({ has: page.getByRole("button", { name: "Connect" }) })
    .first();

  await expect(openAiCard).toBeVisible({ timeout: 10000 });
  await openAiCard.getByRole("button", { name: "Connect" }).click();

  const modal = page.getByRole("dialog", { name: /set up gpt/i });
  await expect(modal).toBeVisible({ timeout: 10000 });
  return modal;
}

async function openProviderEditModal(
  page: Page,
  providerName: string
): Promise<Locator> {
  const providerCard = await findProviderCard(page, providerName);
  await expect(providerCard).toBeVisible({ timeout: 10000 });
  await providerCard.getByRole("button", { name: /^Edit/ }).click();

  const modal = page.getByRole("dialog", { name: /configure/i });
  await expect(modal).toBeVisible({ timeout: 10000 });
  return modal;
}

test.describe("LLM Provider Setup @exclusive", () => {
  let providersToCleanup: number[] = [];

  test.beforeEach(async ({ page }) => {
    providersToCleanup = [];
    await page.context().clearCookies();
    await loginAs(page, "admin");
    await page.goto(LLM_SETUP_URL);
    await page.waitForLoadState("networkidle");
    await expect(page.getByLabel("admin-page-title")).toHaveText(
      /^Language Models/
    );
  });

  test.afterEach(async ({ page }) => {
    const apiClient = new OnyxApiClient(page.request);
    const uniqueIds = Array.from(new Set(providersToCleanup));

    for (const providerId of uniqueIds) {
      try {
        await apiClient.deleteProvider(providerId);
      } catch (error) {
        console.warn(
          `Cleanup failed for provider ${providerId}: ${String(error)}`
        );
      }
    }
  });

  test("admin can create, edit, and delete a provider from the LLM setup page", async ({
    page,
  }) => {
    // Keep this flow deterministic without external LLM connectivity.
    await page.route("**/api/admin/llm/test", async (route) => {
      await route.fulfill({
        status: 200,
        contentType: "application/json",
        body: JSON.stringify({ success: true }),
      });
    });

    const providerName = uniqueName("PW OpenAI Provider");
    const apiKey = PROVIDER_API_KEY;

    const setupModal = await openOpenAiSetupModal(page);
    await setupModal.getByLabel("Display Name").fill(providerName);
    await setupModal.getByLabel("API Key").fill(apiKey);

    const enableButton = setupModal.getByRole("button", { name: "Connect" });
    await expect(enableButton).toBeEnabled({ timeout: 10000 });
    await enableButton.click();
    await expect(setupModal).not.toBeVisible({ timeout: 30000 });

    await expect
      .poll(
        async () => (await getProviderByName(page, providerName))?.id ?? null
      )
      .not.toBeNull();

    const createdProvider = await getProviderByName(page, providerName);
    expect(createdProvider).not.toBeNull();
    providersToCleanup.push(createdProvider!.id);

    const editModal = await openProviderEditModal(page, providerName);
    const autoUpdateSwitch = editModal.getByRole("switch").first();
    const initialAutoModeState =
      (await autoUpdateSwitch.getAttribute("aria-checked")) === "true";
    await autoUpdateSwitch.click();

    const updateButton = editModal.getByRole("button", { name: "Update" });
    await expect(updateButton).toBeEnabled({ timeout: 10000 });
    await updateButton.click();
    await expect(editModal).not.toBeVisible({ timeout: 30000 });

    await expect
      .poll(async () => {
        const provider = await getProviderByName(page, providerName);
        return provider?.is_auto_mode;
      })
      .toBe(!initialAutoModeState);

    const providerCard = await findProviderCard(page, providerName);
    await providerCard.hover();
    await providerCard.getByRole("button", { name: /^Delete/ }).click();
    const confirmationModal = page.getByRole("dialog");
    await expect(confirmationModal).toBeVisible({ timeout: 10000 });
    await confirmationModal.getByRole("button", { name: "Delete" }).click();
    await expect(confirmationModal).not.toBeVisible({ timeout: 15000 });

    await expect
      .poll(
        async () => (await getProviderByName(page, providerName))?.id ?? null
      )
      .toBeNull();

    providersToCleanup = providersToCleanup.filter(
      (providerId) => providerId !== createdProvider!.id
    );
  });

  test("admin can switch the default model via the default model dropdown", async ({
    page,
  }) => {
    const apiClient = new OnyxApiClient(page.request);
    const initialDefault = await getDefaultTextModel(page);

    const firstProviderName = uniqueName("PW Baseline Provider");
    const secondProviderName = uniqueName("PW Target Provider");
    const firstModelName = "gpt-4o";
    const secondModelName = "gpt-4o-mini";

    const firstProviderId = await createPublicProvider(
      page,
      firstProviderName,
      firstModelName
    );
    const secondProviderId = await createPublicProvider(
      page,
      secondProviderName,
      secondModelName
    );
    providersToCleanup.push(firstProviderId, secondProviderId);

    try {
      await apiClient.setProviderAsDefault(firstProviderId, firstModelName);

      await page.reload();
      await page.waitForLoadState("networkidle");

      // Open the Default Model dropdown and select the model from the
      // second provider's group (scoped to avoid picking a same-named model
      // from another provider).
      await page.getByRole("combobox").click();
      const targetGroup = page
        .locator('[role="group"]')
        .filter({ hasText: secondProviderName });
      const defaultResponsePromise = page.waitForResponse(
        (response) =>
          response.url().includes("/api/admin/llm/default") &&
          response.request().method() === "POST"
      );
      await targetGroup.locator('[role="option"]').click();
      await defaultResponsePromise;

      // Verify the default switched to the second provider
      await expect
        .poll(async () => {
          const defaultText = await getDefaultTextModel(page);
          return defaultText?.provider_id;
        })
        .toBe(secondProviderId);
    } finally {
      if (initialDefault) {
        try {
          await apiClient.setProviderAsDefault(
            initialDefault.provider_id,
            initialDefault.model_name
          );
        } catch (error) {
          console.warn(`Failed to restore initial default: ${String(error)}`);
        }
      }
    }
  });

  test("adding a hidden model on an existing provider shows it in chat after one save", async ({
    page,
  }) => {
    await page.route("**/api/admin/llm/test", async (route) => {
      await route.fulfill({
        status: 200,
        contentType: "application/json",
        body: JSON.stringify({ success: true }),
      });
    });

    const providerName = uniqueName("PW Provider Add Model");
    const ts = Date.now();
    const alwaysVisibleModel = `pw-visible-${ts}-base`;
    const modelToEnable = `pw-hidden-${ts}-to-enable`;

    const providerId = await createPublicProviderWithModels(
      page,
      providerName,
      [
        { name: alwaysVisibleModel, is_visible: true },
        { name: modelToEnable, is_visible: false },
      ]
    );
    providersToCleanup.push(providerId);
    await expectModelVisibilityInChatProviders(page, modelToEnable, false);

    await page.goto("/app");
    await page.waitForLoadState("networkidle");
    await page
      .locator("#onyx-chat-input-textarea")
      .waitFor({ state: "visible", timeout: 15000 });

    await expect
      .poll(() => getModelCountInChatSelector(page, modelToEnable), {
        timeout: 15000,
      })
      .toBe(0);

    await navigateToAdminLlmPageFromChat(page);

    const editModal = await openProviderEditModal(page, providerName);
    await editModal.getByText(modelToEnable, { exact: true }).click();

    const updateButton = editModal.getByRole("button", { name: "Update" });
    const providerUpdateResponsePromise = page.waitForResponse(
      (response) =>
        response.url().includes("/api/admin/llm/provider") &&
        response.request().method() === "PUT"
    );
    await expect(updateButton).toBeEnabled({ timeout: 10000 });
    await updateButton.click();
    await providerUpdateResponsePromise;
    await expect(editModal).not.toBeVisible({ timeout: 30000 });
    await expectModelVisibilityInChatProviders(page, modelToEnable, true);

    await exitAdminToChat(page);
    await expect
      .poll(() => getModelCountInChatSelector(page, modelToEnable), {
        timeout: 15000,
      })
      .toBe(1);
  });

  test("removing a visible model on an existing provider hides it in chat after one save", async ({
    page,
  }) => {
    await page.route("**/api/admin/llm/test", async (route) => {
      await route.fulfill({
        status: 200,
        contentType: "application/json",
        body: JSON.stringify({ success: true }),
      });
    });

    const providerName = uniqueName("PW Provider Remove Model");
    const ts = Date.now();
    const alwaysVisibleModel = `pw-visible-${ts}-base`;
    const modelToDisable = `pw-visible-${ts}-to-disable`;

    const providerId = await createPublicProviderWithModels(
      page,
      providerName,
      [
        { name: alwaysVisibleModel, is_visible: true },
        { name: modelToDisable, is_visible: true },
      ]
    );
    providersToCleanup.push(providerId);
    await expectModelVisibilityInChatProviders(page, modelToDisable, true);

    await page.goto("/app");
    await page.waitForLoadState("networkidle");
    await page
      .locator("#onyx-chat-input-textarea")
      .waitFor({ state: "visible", timeout: 15000 });

    await expect
      .poll(() => getModelCountInChatSelector(page, modelToDisable), {
        timeout: 15000,
      })
      .toBe(1);

    await navigateToAdminLlmPageFromChat(page);

    const editModal = await openProviderEditModal(page, providerName);
    await editModal.getByText(modelToDisable, { exact: true }).click();

    const updateButton = editModal.getByRole("button", { name: "Update" });
    const providerUpdateResponsePromise = page.waitForResponse(
      (response) =>
        response.url().includes("/api/admin/llm/provider") &&
        response.request().method() === "PUT"
    );
    await expect(updateButton).toBeEnabled({ timeout: 10000 });
    await updateButton.click();
    await providerUpdateResponsePromise;
    await expect(editModal).not.toBeVisible({ timeout: 30000 });
    await expectModelVisibilityInChatProviders(page, modelToDisable, false);

    await exitAdminToChat(page);
    await expect
      .poll(() => getModelCountInChatSelector(page, modelToDisable), {
        timeout: 15000,
      })
      .toBe(0);
  });
});


================================================
FILE: web/tests/e2e/admin/oauth_config/test_tool_oauth.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { Page, Browser } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

// --- Locator Helper Functions ---
const getAuthorizationUrlInput = (page: Page) =>
  page.locator('input[name="authorizationUrl"]');
const getTokenUrlInput = (page: Page) => page.locator('input[name="tokenUrl"]');
const getClientIdInput = (page: Page) => page.locator('input[name="clientId"]');
const getClientSecretInput = (page: Page) =>
  page.locator('input[name="clientSecret"]');
const getScopesInput = (page: Page) => page.locator('input[name="scopes"]');
const getConnectButton = (page: Page) =>
  page.getByRole("button", { name: "Connect" });
const getDefinitionTextarea = (page: Page) =>
  page.locator('textarea[name="definition"]');
const getAddActionButton = (page: Page) =>
  page.getByRole("button", { name: "Add Action" });
const getAddOpenAPIActionButton = (page: Page) =>
  page.getByRole("button", { name: "Add OpenAPI Action" });

// Simple OpenAPI schema for testing
const SIMPLE_OPENAPI_SCHEMA = `{
  "openapi": "3.0.0",
  "info": {
    "title": "Test API",
    "version": "1.0.0",
    "description": "A test API for OAuth tool selection"
  },
  "servers": [
    {
      "url": "https://api.example.com"
    }
  ],
  "paths": {
    "/test": {
      "get": {
        "operationId": "test_operation",
        "summary": "Test operation",
        "description": "A test operation",
        "responses": {
          "200": {
            "description": "Success"
          }
        }
      }
    }
  }
}`;

let createdAssistantId: number | null = null;
let createdToolName: string | null = null;

test.afterAll(async ({ browser }: { browser: Browser }) => {
  const context = await browser.newContext({
    storageState: "admin_auth.json",
  });
  const page = await context.newPage();
  const client = new OnyxApiClient(page.request);

  // Delete the assistant first (it references the tool)
  if (createdAssistantId !== null) {
    await client.deleteAgent(createdAssistantId);
  }

  // Then delete the tool
  if (createdToolName !== null) {
    const tool = await client.findToolByName(createdToolName);
    if (tool) {
      await client.deleteCustomTool(tool.id);
    }
  }

  await context.close();
});

test("Tool OAuth Configuration: Creation, Selection, and Assistant Integration", async ({
  page,
}) => {
  await page.context().clearCookies();
  await loginAs(page, "admin");

  // --- Step 1: Navigate to OpenAPI Actions Page and Open Add Modal ---
  const toolName = `Test API ${Date.now()}`;
  const authorizationUrl = "https://github.com/login/oauth/authorize";
  const tokenUrl = "https://github.com/login/oauth/access_token";
  const clientId = "test_client_id_456";
  const clientSecret = "test_client_secret_789";
  const scopes = "repo, user";

  // Create a unique OpenAPI schema with the unique tool name
  const uniqueOpenAPISchema = SIMPLE_OPENAPI_SCHEMA.replace(
    '"title": "Test API"',
    `"title": "${toolName}"`
  );

  await page.goto("/admin/actions/open-api");
  await page.waitForLoadState("networkidle");

  // Click "Add OpenAPI Action" button to open modal
  const addOpenAPIActionButton = getAddOpenAPIActionButton(page);
  await addOpenAPIActionButton.click();

  // Wait for modal to appear
  await expect(
    page.getByRole("dialog", { name: "Add OpenAPI action" })
  ).toBeVisible({ timeout: 5000 });

  // Fill in the OpenAPI definition in the modal
  const definitionTextarea = getDefinitionTextarea(page);
  await definitionTextarea.fill(uniqueOpenAPISchema);

  // Wait for validation to complete (debounced, can take a few seconds)
  // The tool name appears in the modal after successful validation
  await expect(page.getByText(toolName)).toBeVisible({
    timeout: 15000,
  });

  // --- Step 2: Submit the OpenAPI Action Creation ---
  const addActionButton = getAddActionButton(page);
  await addActionButton.scrollIntoViewIfNeeded();
  await addActionButton.click();

  // --- Step 3: Configure OAuth in Authentication Modal ---
  // Wait for the authentication modal to appear
  await expect(page.getByText("Authentication Method")).toBeVisible({
    timeout: 5000,
  });

  // Store tool name for cleanup now that the tool is confirmed created
  createdToolName = toolName;

  // OAuth should be selected by default, fill in OAuth config details
  await getAuthorizationUrlInput(page).fill(authorizationUrl);
  await getTokenUrlInput(page).fill(tokenUrl);
  await getClientIdInput(page).fill(clientId);
  await getClientSecretInput(page).fill(clientSecret);
  await getScopesInput(page).fill(scopes);

  // Submit the authentication form
  const connectButton = getConnectButton(page);
  await connectButton.click();

  // Wait for authentication to complete and return to the actions list
  await page.waitForTimeout(2000);

  // --- Step 4: Verify Tool Was Created with OAuth Config ---
  // We should be on the OpenAPI actions page
  await page.waitForLoadState("networkidle");

  // Verify we're on the open-api page
  expect(page.url()).toContain("/admin/actions/open-api");

  // The tool should appear in the actions list - look for our unique tool name
  await expect(page.getByText(toolName, { exact: false }).first()).toBeVisible({
    timeout: 20000,
  });

  // --- Step 5: Verify OAuth Config Persists in Edit Mode ---
  // Find the action card with our tool and click the manage button
  const actionCard = page.locator(`[aria-label*="${toolName}"]`).first();
  await expect(actionCard).toBeVisible({ timeout: 5000 });

  // Click the manage button (gear icon) on the card
  const manageButton = actionCard
    .getByRole("button", { name: /manage/i })
    .or(actionCard.locator('button[aria-label*="anage"]'))
    .first();
  await manageButton.click();

  // Wait for the edit modal to appear
  const editDialog = page.getByRole("dialog", { name: "Edit OpenAPI action" });
  await expect(editDialog).toBeVisible({ timeout: 5000 });

  // Wait for the definition textarea to be visible (indicates modal is loaded)
  await expect(editDialog.locator('textarea[name="definition"]')).toBeVisible({
    timeout: 10000,
  });

  // Verify authentication status is shown (indicates OAuth is configured)
  await expect(editDialog.getByText("Authenticated & Enabled")).toBeVisible({
    timeout: 5000,
  });

  // Verify the "Edit Configs" button is visible (confirms OAuth config persists)
  const editConfigsButton = editDialog.getByRole("button", {
    name: "Edit Configs",
  });
  await expect(editConfigsButton).toBeVisible({ timeout: 5000 });

  // Close the modal
  const closeButton = page
    .locator('button[aria-label*="lose"]')
    .or(page.getByRole("button", { name: "Cancel" }))
    .first();
  await closeButton.click();

  // Wait for modal to close
  await page.waitForTimeout(500);

  // Test complete for steps 1-5! We've verified:
  // 1. OpenAPI action can be created via modal
  // 2. OAuth config is created and applied during action creation
  // 3. The tool is created and authenticated with the OAuth config
  // 4. The OAuth config persists when editing the tool

  // --- Step 6: Create Assistant and Verify Tool Availability ---
  // Navigate to the assistant creation page
  await page.goto("/app/agents/create");
  await page.waitForLoadState("networkidle");

  // Fill in basic assistant details
  const agentName = `Test Assistant ${Date.now()}`;
  const agentDescription = "Assistant with OAuth tool";
  const assistantInstructions = "Use the tool when needed";

  await page.locator('input[name="name"]').fill(agentName);
  await page.locator('textarea[name="description"]').fill(agentDescription);
  await page
    .locator('textarea[name="instructions"]')
    .fill(assistantInstructions);

  // Scroll down to the Actions section (tools are listed there)
  const actionsHeading = page.locator("text=Actions").first();
  await expect(actionsHeading).toBeVisible({ timeout: 10000 });
  await actionsHeading.scrollIntoViewIfNeeded();

  // Look for our tool in the list
  // The tool display_name is the tool name we created
  const toolLabel = page.locator(`label:has-text("${toolName}")`);
  await expect(toolLabel).toBeVisible({ timeout: 10000 });
  await toolLabel.scrollIntoViewIfNeeded();

  // Turn it on
  await toolLabel.click();

  // Submit the assistant creation form
  const createButton = page.locator('button[type="submit"]:has-text("Create")');
  await createButton.scrollIntoViewIfNeeded();
  await createButton.click();

  // Verify redirection to app page with the new assistant ID
  await page.waitForURL(/.*\/app\?agentId=\d+.*/, { timeout: 10000 });
  const assistantUrl = page.url();
  const agentIdMatch = assistantUrl.match(/agentId=(\d+)/);
  expect(agentIdMatch).toBeTruthy();

  // Store assistant ID for cleanup
  if (agentIdMatch) {
    createdAssistantId = Number(agentIdMatch[1]);
  }

  // Test complete! We've verified:
  // 5. The tool with OAuth config is available in assistant creation
  // 6. The tool can be selected and the assistant can be created successfully
});


================================================
FILE: web/tests/e2e/admin/scim/fixtures.ts
================================================
/**
 * Playwright fixtures for SCIM admin UI tests.
 *
 * Provides:
 * - Authenticated admin page
 * - Stateful mock for the SCIM token endpoint
 *   (GET starts as 404; POST creates a token and flips GET to 200)
 */

import { test as base, expect, Page } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import type { ScimTokenResponse } from "@/app/admin/scim/interfaces";

// ---------------------------------------------------------------------------
// Fixture control interface
// ---------------------------------------------------------------------------

interface MockTokenControl {
  /** Pre-seed the mock so GET returns an existing token (200). */
  seedToken: () => ScimTokenResponse;
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

async function authenticateAdmin(page: Page): Promise<void> {
  await page.context().clearCookies();
  await loginAs(page, "admin");
}

function jsonResponse(data: unknown, status = 200) {
  return {
    status,
    contentType: "application/json",
    body: JSON.stringify(data),
  };
}

// ---------------------------------------------------------------------------
// Extended test fixture
// ---------------------------------------------------------------------------

export const test = base.extend<{
  adminPage: Page;
  mockTokenEndpoint: MockTokenControl;
}>({
  adminPage: async ({ page }, use) => {
    await authenticateAdmin(page);
    await use(page);
  },

  mockTokenEndpoint: async ({ adminPage }, use) => {
    let currentToken: ScimTokenResponse | null = null;
    let tokenCounter = 0;

    function makeToken(): { token: ScimTokenResponse; rawToken: string } {
      tokenCounter++;
      const rawToken = `scim_test_token_${tokenCounter}_${Date.now()}`;
      const token: ScimTokenResponse = {
        id: tokenCounter,
        name: "default",
        token_display: rawToken.slice(0, 16) + "...",
        is_active: true,
        created_at: new Date().toISOString(),
        last_used_at: null,
        idp_domain: null,
      };
      return { token, rawToken };
    }

    await adminPage.route(
      "**/api/admin/enterprise-settings/scim/token",
      async (route) => {
        const method = route.request().method();

        if (method === "GET") {
          if (currentToken) {
            await route.fulfill(jsonResponse(currentToken));
          } else {
            await route.fulfill(jsonResponse({ detail: "Not found" }, 404));
          }
        } else if (method === "POST") {
          const { token, rawToken } = makeToken();
          currentToken = token;
          await route.fulfill(jsonResponse({ ...token, raw_token: rawToken }));
        } else {
          await route.continue();
        }
      }
    );

    await use({
      seedToken: () => {
        const { token } = makeToken();
        currentToken = token;
        return token;
      },
    });
  },
});

export { expect };

// ---------------------------------------------------------------------------
// Navigation helper
// ---------------------------------------------------------------------------

export async function gotoScimPage(adminPage: Page): Promise<void> {
  await adminPage.goto("/admin/scim");
  await expect(adminPage.getByText("SCIM Sync")).toBeVisible({
    timeout: 15000,
  });
}


================================================
FILE: web/tests/e2e/admin/scim/scim.spec.ts
================================================
/**
 * E2E Tests: SCIM Token Management
 *
 * Tests the full lifecycle of SCIM tokens — generation, clipboard copy,
 * file download, and regeneration with confirmation.
 */

import { test, expect, gotoScimPage } from "./fixtures";

test.describe("SCIM Token Management", () => {
  test("generate token, copy, and download", async ({
    adminPage,
    mockTokenEndpoint: _mockTokenEndpoint,
  }) => {
    await gotoScimPage(adminPage);

    // No token yet — click generate
    await adminPage
      .getByRole("button", { name: "Generate SCIM Token" })
      .click();

    // Token modal opens (.first() to skip hidden Radix aria-describedby element)
    await expect(
      adminPage.getByText("Save this key before continuing").first()
    ).toBeVisible({ timeout: 10000 });

    // Grab the raw token from the textarea
    const textarea = adminPage.locator("textarea");
    await textarea.waitFor({ state: "visible" });
    const tokenValue = await textarea.inputValue();
    expect(tokenValue).toContain("scim_test_token_");

    // Copy to clipboard
    await adminPage
      .context()
      .grantPermissions(["clipboard-read", "clipboard-write"]);
    await adminPage.getByRole("button", { name: "Copy Token" }).click();
    await expect(adminPage.getByText("Token copied to clipboard")).toBeVisible({
      timeout: 5000,
    });
    const clipboardText = await adminPage.evaluate(() =>
      navigator.clipboard.readText()
    );
    expect(clipboardText).toBe(tokenValue);

    // Download
    const downloadPromise = adminPage.waitForEvent("download");
    await adminPage.getByRole("button", { name: "Download" }).click();
    const download = await downloadPromise;
    expect(download.suggestedFilename()).toMatch(/^onyx-scim-token-\d+\.txt$/);
  });

  test("regenerate token", async ({ adminPage, mockTokenEndpoint }) => {
    // Start with an existing token so the card shows "Regenerate"
    mockTokenEndpoint.seedToken();
    await gotoScimPage(adminPage);

    // Click regenerate on the card
    await adminPage.getByRole("button", { name: "Regenerate Token" }).click();

    // Confirmation modal appears
    await expect(adminPage.getByText("Regenerate SCIM Token")).toBeVisible();
    await expect(
      adminPage.getByText("Your current SCIM token will be revoked")
    ).toBeVisible();

    // Confirm via the danger button inside the dialog
    const dialog = adminPage.locator('[role="dialog"]');
    await dialog.getByRole("button", { name: "Regenerate Token" }).click();

    // Token display modal replaces the confirmation modal
    await expect(
      adminPage.getByText("Save this key before continuing").first()
    ).toBeVisible({ timeout: 10000 });

    const textarea = adminPage.locator("textarea");
    await textarea.waitFor({ state: "visible" });
    const tokenValue = await textarea.inputValue();
    expect(tokenValue).toContain("scim_test_token_");
  });
});


================================================
FILE: web/tests/e2e/admin/theme/appearance_theme_settings.spec.ts
================================================
import { test, expect } from "@tests/e2e/fixtures/eeFeatures";
import { loginAs } from "@tests/e2e/utils/auth";

test.describe("Appearance Theme Settings @exclusive", () => {
  const TEST_VALUES = {
    applicationName: `TestApp${Date.now()}`,
    greetingMessage: "Welcome to our test application",
    chatHeader: "Test Header Content",
    chatFooter: "Test Footer Disclaimer",
    noticeHeader: "Important Notice",
    noticeContent: "Please read and agree to continue",
    consentPrompt: "I agree to the terms",
  };

  test.beforeEach(async ({ page, eeEnabled }) => {
    test.skip(
      !eeEnabled,
      "Enterprise license not active — skipping theme tests"
    );

    // Fresh session — the eeEnabled fixture already logged in to check the
    // setting, so clear cookies and re-login for a clean test state.
    await page.context().clearCookies();
    await loginAs(page, "admin");

    await page.goto("/admin/theme");
    await expect(
      page.locator('[data-label="application-name-input"]')
    ).toBeVisible({ timeout: 10_000 });

    // Clear localStorage to ensure consent modal shows
    await page.evaluate(() => {
      localStorage.removeItem("allUsersInitialPopupFlowCompleted");
    });
  });

  test.afterEach(async ({ page }) => {
    // Reset settings to defaults
    await page.goto("/admin/theme");
    await page.waitForLoadState("networkidle");

    // If the form isn't visible (e.g. EE license not active, or test failed
    // before navigating here), skip cleanup — there's nothing to reset.
    const appNameInput = page.locator('[data-label="application-name-input"]');
    if (!(await appNameInput.isVisible({ timeout: 3000 }).catch(() => false))) {
      return;
    }

    // Clear form fields
    await appNameInput.clear();

    const greetingInput = page.locator('[data-label="greeting-message-input"]');
    await greetingInput.clear();

    const headerInput = page.locator('[data-label="chat-header-input"]');
    await headerInput.clear();

    const footerTextarea = page.locator('[data-label="chat-footer-textarea"]');
    await footerTextarea.clear();

    // Disable notice toggle if enabled
    const noticeToggle = page.locator(
      '[data-label="first-visit-notice-toggle"]'
    );
    const isChecked = await noticeToggle.getAttribute("aria-checked");
    if (isChecked === "true") {
      await noticeToggle.click();
      await page.waitForTimeout(300);
    }

    // Save reset
    const saveButton = page.getByRole("button", { name: "Apply Changes" });
    if (await saveButton.isEnabled()) {
      await saveButton.click();
      await page.waitForResponse(
        (r) =>
          r.url().includes("/api/admin/enterprise-settings") &&
          r.request().method() === "PUT"
      );
    }

    // Clear localStorage
    await page.evaluate(() => {
      localStorage.removeItem("allUsersInitialPopupFlowCompleted");
    });
  });

  test("admin configures branding and verifies across pages", async ({
    page,
  }) => {
    // 1. Fill in Application Name (page already navigated in beforeEach)
    const appNameInput = page.locator('[data-label="application-name-input"]');
    await appNameInput.fill(TEST_VALUES.applicationName);

    // 3. Fill in Greeting Message
    const greetingInput = page.locator('[data-label="greeting-message-input"]');
    await greetingInput.fill(TEST_VALUES.greetingMessage);

    // 4. Fill in Chat Header
    const headerInput = page.locator('[data-label="chat-header-input"]');
    await headerInput.fill(TEST_VALUES.chatHeader);

    // 5. Fill in Chat Footer
    const footerTextarea = page.locator('[data-label="chat-footer-textarea"]');
    await footerTextarea.fill(TEST_VALUES.chatFooter);

    // 6. Enable First Visit Notice
    const noticeToggle = page.locator(
      '[data-label="first-visit-notice-toggle"]'
    );
    await noticeToggle.click();

    // 7. Fill Notice Header (wait for it to be visible first)
    const noticeHeaderInput = page.locator(
      '[data-label="notice-header-input"]'
    );
    await expect(noticeHeaderInput).toBeVisible({ timeout: 5000 });
    await noticeHeaderInput.fill(TEST_VALUES.noticeHeader);

    // 8. Fill Notice Content
    const noticeContentTextarea = page.locator(
      '[data-label="notice-content-textarea"]'
    );
    await noticeContentTextarea.fill(TEST_VALUES.noticeContent);

    // 9. Enable Consent Requirement (only if not already enabled)
    const consentToggle = page.locator('[data-label="require-consent-toggle"]');
    const consentState = await consentToggle.getAttribute("aria-checked");
    if (consentState !== "true") {
      await consentToggle.click();
    }

    // 10. Fill Consent Prompt (wait for it to be visible first)
    const consentPromptTextarea = page.locator(
      '[data-label="consent-prompt-textarea"]'
    );
    await expect(consentPromptTextarea).toBeVisible({ timeout: 5000 });
    await consentPromptTextarea.fill(TEST_VALUES.consentPrompt);

    // 11. Click Apply Changes
    const saveButton = page.getByRole("button", { name: "Apply Changes" });
    await expect(saveButton).toBeEnabled();
    await saveButton.click();

    // 12. Wait for API response
    const response = await page.waitForResponse(
      (r) =>
        r.url().includes("/api/admin/enterprise-settings") &&
        r.request().method() === "PUT",
      { timeout: 10000 }
    );
    expect(response.status()).toBe(200);

    // 13. Wait for success message
    await expect(page.getByText(/successfully/i)).toBeVisible({
      timeout: 5000,
    });

    // 14. Verify admin sidebar has branding (application name)
    await expect(
      page.getByText(TEST_VALUES.applicationName).first()
    ).toBeVisible({
      timeout: 5000,
    });

    // 15. Navigate to chat page
    // Clear localStorage again right before navigation to ensure consent modal shows
    await page.evaluate(() => {
      localStorage.removeItem("allUsersInitialPopupFlowCompleted");
    });
    await page.goto("/app");
    await page.waitForLoadState("networkidle");

    // 16. Handle consent modal
    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 15000 });

    // Verify notice header and content
    await expect(
      modal.getByText(TEST_VALUES.noticeHeader).first()
    ).toBeVisible();
    await expect(
      modal.getByText(TEST_VALUES.noticeContent).first()
    ).toBeVisible();

    // Check consent checkbox
    const checkbox = modal.getByLabel("Consent checkbox");
    await checkbox.click();

    // Click Start button
    const startButton = modal.getByRole("button", { name: "Start" });
    await startButton.click();

    // Wait for modal to close
    await expect(modal).not.toBeVisible({ timeout: 5000 });

    // 17. Verify sidebar branding on chat page
    await expect(
      page.getByText(TEST_VALUES.applicationName).first()
    ).toBeVisible();

    // 18. Verify greeting message on welcome screen
    await expect(page.getByText(TEST_VALUES.greetingMessage)).toBeVisible();

    // 19. Verify chat header content
    await expect(page.getByText(TEST_VALUES.chatHeader)).toBeVisible();

    // 20. Verify chat footer content
    await expect(page.getByText(TEST_VALUES.chatFooter)).toBeVisible();
  });
});


================================================
FILE: web/tests/e2e/admin/users/UsersAdminPage.ts
================================================
/**
 * Page Object Model for the Admin Users page (/admin/users).
 *
 * Encapsulates all locators and interactions so specs remain declarative.
 */

import { type Page, type Locator, expect } from "@playwright/test";

/** URL pattern that matches the users data fetch. */
const USERS_API = /\/api\/manage\/users\/(accepted\/all|invited)/;

export class UsersAdminPage {
  readonly page: Page;

  // Top-level elements
  readonly inviteButton: Locator;
  readonly searchInput: Locator;

  // Filter buttons
  readonly accountTypesFilter: Locator;
  readonly groupsFilter: Locator;
  readonly statusFilter: Locator;

  // Table
  readonly table: Locator;
  readonly tableRows: Locator;

  // Pagination & footer
  readonly paginationSummary: Locator;
  readonly downloadCsvButton: Locator;

  constructor(page: Page) {
    this.page = page;
    this.inviteButton = page.getByRole("button", { name: "Invite Users" });
    this.searchInput = page.getByPlaceholder("Search users...");

    this.accountTypesFilter = page.getByLabel("Filter by role");
    this.groupsFilter = page.getByLabel("Filter by group");
    this.statusFilter = page.getByLabel("Filter by status");

    this.table = page.getByRole("table");
    this.tableRows = page.getByRole("table").locator("tbody tr");

    this.paginationSummary = page.getByText(/Showing \d/);
    this.downloadCsvButton = page.getByRole("button", {
      name: "Download CSV",
    });
  }

  // ---------------------------------------------------------------------------
  // Popover helper
  // ---------------------------------------------------------------------------

  /**
   * Returns a locator for the currently open popover / filter dropdown.
   * Radix Popover renders its content with `role="dialog"`. Using
   * `getByRole("dialog").first()` targets the oldest open dialog, which is
   * always the popover during row-action or filter flows (confirmation
   * modals open later and would be `.last()`).
   */
  get popover(): Locator {
    return this.page.getByRole("dialog").first();
  }

  // ---------------------------------------------------------------------------
  // Navigation
  // ---------------------------------------------------------------------------

  async goto() {
    await this.page.goto("/admin/users");
    await expect(this.page.getByText("Users & Requests")).toBeVisible({
      timeout: 15000,
    });
    // Wait for the table to finish loading (pagination summary only appears
    // after the async data fetch completes).
    await expect(this.paginationSummary).toBeVisible({ timeout: 15000 });
  }

  // ---------------------------------------------------------------------------
  // Waiting helpers
  // ---------------------------------------------------------------------------

  /** Wait for the users API response that follows a table-refreshing action. */
  private async waitForTableRefresh(): Promise<void> {
    await this.page.waitForResponse(USERS_API);
  }

  // ---------------------------------------------------------------------------
  // Search
  // ---------------------------------------------------------------------------

  async search(term: string) {
    await this.searchInput.fill(term);
  }

  async clearSearch() {
    await this.searchInput.fill("");
  }

  // ---------------------------------------------------------------------------
  // Filters
  // ---------------------------------------------------------------------------

  async openAccountTypesFilter() {
    await this.accountTypesFilter.click();
    await expect(this.popover).toBeVisible();
  }

  async selectAccountType(label: string) {
    await this.popover.getByText(label, { exact: false }).first().click();
  }

  async openStatusFilter() {
    await this.statusFilter.click();
    await expect(this.popover).toBeVisible();
  }

  async selectStatus(label: string) {
    await this.popover.getByText(label, { exact: false }).first().click();
  }

  async openGroupsFilter() {
    await this.groupsFilter.click();
    await expect(this.popover).toBeVisible();
  }

  async selectGroup(label: string) {
    await this.popover.getByText(label, { exact: false }).first().click();
  }

  async closePopover() {
    await this.page.keyboard.press("Escape");
    await expect(this.page.getByRole("dialog")).not.toBeVisible();
  }

  // ---------------------------------------------------------------------------
  // Table interactions
  // ---------------------------------------------------------------------------

  async getVisibleRowCount(): Promise<number> {
    return await this.tableRows.count();
  }

  /**
   * Returns the text content of a specific column across all visible rows.
   * Column indices: 0=Name, 1=Groups, 2=Account Type, 3=Status, 4=Last Updated.
   */
  async getColumnTexts(columnIndex: number): Promise<string[]> {
    const cells = this.tableRows.locator(`td:nth-child(${columnIndex + 2})`);
    const count = await cells.count();
    const texts: string[] = [];
    for (let i = 0; i < count; i++) {
      const text = await cells.nth(i).textContent();
      if (text) texts.push(text.trim());
    }
    return texts;
  }

  getRowByEmail(email: string): Locator {
    return this.table.getByRole("row").filter({ hasText: email });
  }

  /** Click the sort button on a column header. */
  async sortByColumn(columnName: string) {
    // Column headers are <th> elements. The sort button is a child <button>
    // that only appears on hover — hover first to reveal it.
    const header = this.table.locator("th").filter({ hasText: columnName });
    await header.hover();
    await header.locator("button").first().click();
  }

  // ---------------------------------------------------------------------------
  // Pagination
  // ---------------------------------------------------------------------------

  /** Click a numbered page button in the table footer. */
  async goToPage(pageNumber: number) {
    const footer = this.page.locator(".table-footer");
    await footer
      .getByRole("button")
      .filter({ hasText: String(pageNumber) })
      .click();
  }

  // ---------------------------------------------------------------------------
  // Row actions
  // ---------------------------------------------------------------------------

  async openRowActions(email: string) {
    const row = this.getRowByEmail(email);
    const actionsButton = row.getByRole("button").last();
    await actionsButton.click();
    await expect(this.popover).toBeVisible();
  }

  async clickRowAction(actionName: string) {
    await this.popover.getByText(actionName).first().click();
  }

  // ---------------------------------------------------------------------------
  // Confirmation modals
  // ---------------------------------------------------------------------------

  /**
   * Returns the most recently opened dialog (modal).
   * Uses `.last()` because confirmation modals are portaled after row-action
   * popovers, and a closing popover (role="dialog") may briefly remain in the
   * DOM during its exit animation.
   */
  get dialog(): Locator {
    return this.page.getByRole("dialog").last();
  }

  async confirmModalAction(buttonName: string) {
    await this.dialog.getByRole("button", { name: buttonName }).first().click();
  }

  async cancelModal() {
    await this.dialog.getByRole("button", { name: "Cancel" }).first().click();
  }

  async expectToast(message: string | RegExp) {
    await expect(this.page.getByText(message)).toBeVisible();
  }

  // ---------------------------------------------------------------------------
  // Invite modal
  // ---------------------------------------------------------------------------

  /** The email input inside the invite modal. */
  get inviteEmailInput(): Locator {
    return this.dialog.getByPlaceholder("Add an email and press enter");
  }

  async openInviteModal() {
    await this.inviteButton.click();
    await expect(this.dialog.getByText("Invite Users")).toBeVisible();
  }

  async addInviteEmail(email: string) {
    await this.inviteEmailInput.pressSequentially(email, { delay: 20 });
    await this.inviteEmailInput.press("Enter");
    // Wait for the chip to appear in the dialog
    await expect(this.dialog.getByText(email)).toBeVisible();
  }

  async submitInvite() {
    await this.dialog.getByRole("button", { name: "Invite" }).click();
  }

  // ---------------------------------------------------------------------------
  // Inline role editing (Popover + OpenButton + LineItem)
  // ---------------------------------------------------------------------------

  async openRoleDropdown(email: string) {
    const row = this.getRowByEmail(email);
    const roleButton = row
      .locator("button")
      .filter({ hasText: /Basic|Admin|Global Curator|Slack User/ });
    await roleButton.click();
    await expect(this.popover).toBeVisible();
  }

  async selectRole(roleName: string) {
    await this.popover.getByText(roleName).first().click();
    await this.waitForTableRefresh();
  }

  // ---------------------------------------------------------------------------
  // Edit groups modal
  // ---------------------------------------------------------------------------

  /**
   * Stable locator for the edit-groups modal.
   *
   * We can't use the generic `dialog` getter (`.last()`) here because the
   * groups search opens a Radix Popover (also `role="dialog"`) inside the
   * modal, which shifts what `.last()` resolves to.  Targeting by accessible
   * name keeps the reference pinned to the modal itself.
   */
  get editGroupsDialog(): Locator {
    return this.page.getByRole("dialog", { name: /Edit User/ });
  }

  /** The search input inside the edit groups modal. */
  get groupSearchInput(): Locator {
    return this.editGroupsDialog.getByPlaceholder("Search groups to join...");
  }

  async openEditGroupsModal(email: string) {
    await this.openRowActions(email);
    await this.clickRowAction("Groups");
    await expect(
      this.editGroupsDialog.getByText("Edit User's Groups & Roles")
    ).toBeVisible();
  }

  async searchGroupsInModal(term: string) {
    // Click the input first to open the popover (Radix Popover.Trigger
    // wraps the input — fill() alone bypasses the trigger's click handler).
    await this.groupSearchInput.click();
    await this.groupSearchInput.fill(term);
    // The group name appears in the popover dropdown (nested dialog).
    // Use page-level search since the popover may be portaled.
    await expect(this.page.getByText(term).first()).toBeVisible();
  }

  async toggleGroupInModal(groupName: string) {
    // LineItem renders as a <div>, not <button>.
    // The popover dropdown is a nested dialog inside the modal.
    await this.page
      .getByRole("dialog")
      .last()
      .getByText(groupName)
      .first()
      .click();
  }

  async saveGroupsModal() {
    await this.editGroupsDialog
      .getByRole("button", { name: "Save Changes" })
      .click();
  }
}


================================================
FILE: web/tests/e2e/admin/users/fixtures.ts
================================================
/**
 * Playwright fixtures for Admin Users page tests.
 *
 * Provides:
 * - Authenticated admin page
 * - OnyxApiClient for API-level setup/teardown
 * - UsersAdminPage page object
 */

import { test as base, expect, type Page } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import { UsersAdminPage } from "./UsersAdminPage";

export const test = base.extend<{
  adminPage: Page;
  api: OnyxApiClient;
  usersPage: UsersAdminPage;
}>({
  adminPage: async ({ page }, use) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");
    await use(page);
  },

  api: async ({ adminPage }, use) => {
    const client = new OnyxApiClient(adminPage.request);
    await use(client);
  },

  usersPage: async ({ adminPage }, use) => {
    const usersPage = new UsersAdminPage(adminPage);
    await use(usersPage);
  },
});

export { expect };


================================================
FILE: web/tests/e2e/admin/users/users.spec.ts
================================================
/**
 * E2E Tests: Admin Users Page
 *
 * Tests the full users management page — search, filters, sorting,
 * inline role editing, row actions, invite modal, and group management.
 *
 * Read-only tests (layout, search, filters, sorting, pagination) run against
 * whatever users already exist in the database (at minimum 10 from global-setup:
 * 2 admins + 8 workers). Mutation tests create their own ephemeral users.
 */

import { test, expect } from "./fixtures";
import { TEST_ADMIN_CREDENTIALS } from "@tests/e2e/constants";
import type { Browser } from "@playwright/test";
import type { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

function uniqueEmail(prefix: string): string {
  return `e2e-${prefix}-${Date.now()}@test.onyx`;
}

const TEST_PASSWORD = "TestPassword123!";

/** Best-effort cleanup — logs failures instead of silently swallowing them. */
async function softCleanup(fn: () => Promise<unknown>): Promise<void> {
  await fn().catch((e) => console.warn("cleanup:", e));
}

/**
 * Creates an authenticated API context for beforeAll/afterAll hooks.
 * Handles browser context lifecycle so callers only write the setup logic.
 */
async function withApiContext(
  browser: Browser,
  fn: (api: OnyxApiClient) => Promise<void>
): Promise<void> {
  const context = await browser.newContext({
    storageState: "admin_auth.json",
  });
  try {
    const { OnyxApiClient } = await import("@tests/e2e/utils/onyxApiClient");
    const api = new OnyxApiClient(context.request);
    await fn(api);
  } finally {
    await context.close();
  }
}

// ---------------------------------------------------------------------------
// Page load & layout
// ---------------------------------------------------------------------------

test.describe("Users page — layout", () => {
  test("renders page title, invite button, search, and stats bar", async ({
    usersPage,
  }) => {
    await usersPage.goto();

    await expect(usersPage.page.getByText("Users & Requests")).toBeVisible();
    await expect(usersPage.inviteButton).toBeVisible();
    await expect(usersPage.searchInput).toBeVisible();
    // Stats bar renders number and label as separate elements
    await expect(usersPage.page.getByText("active users")).toBeVisible();
  });

  test("table renders with correct column headers", async ({ usersPage }) => {
    await usersPage.goto();

    for (const header of [
      "Name",
      "Groups",
      "Account Type",
      "Status",
      "Last Updated",
    ]) {
      await expect(
        usersPage.table.locator("th").filter({ hasText: header })
      ).toBeVisible();
    }
  });

  test("pagination shows summary and controls", async ({ usersPage }) => {
    await usersPage.goto();

    await expect(usersPage.paginationSummary).toBeVisible();
    await expect(usersPage.paginationSummary).toContainText("Showing");
  });

  test("CSV download button is visible in footer", async ({ usersPage }) => {
    await usersPage.goto();
    await expect(usersPage.downloadCsvButton).toBeVisible();
  });
});

// ---------------------------------------------------------------------------
// Search (uses existing DB users — at least admin_user@example.com)
// ---------------------------------------------------------------------------

test.describe("Users page — search", () => {
  test("search filters table rows by email", async ({ usersPage }) => {
    await usersPage.goto();
    await usersPage.search(TEST_ADMIN_CREDENTIALS.email);

    const row = usersPage.getRowByEmail(TEST_ADMIN_CREDENTIALS.email);
    await expect(row).toBeVisible();

    const rowCount = await usersPage.getVisibleRowCount();
    expect(rowCount).toBeGreaterThanOrEqual(1);
  });

  test("search with no results shows empty state", async ({ usersPage }) => {
    await usersPage.goto();
    await usersPage.search("zzz-no-match-exists-xyz@nowhere.invalid");

    await expect(usersPage.page.getByText("No users found")).toBeVisible();
  });

  test("clearing search restores all results", async ({ usersPage }) => {
    await usersPage.goto();

    await usersPage.search("zzz-no-match-exists-xyz@nowhere.invalid");
    await expect(usersPage.page.getByText("No users found")).toBeVisible();

    await usersPage.clearSearch();

    await expect(usersPage.table).toBeVisible();
    const rowCount = await usersPage.getVisibleRowCount();
    expect(rowCount).toBeGreaterThan(0);
  });
});

// ---------------------------------------------------------------------------
// Filters (uses existing DB users)
// ---------------------------------------------------------------------------

test.describe("Users page — filters", () => {
  test("account types filter shows expected roles", async ({ usersPage }) => {
    await usersPage.goto();
    await usersPage.openAccountTypesFilter();

    await expect(
      usersPage.popover.getByText("All Account Types").first()
    ).toBeVisible();
    await expect(usersPage.popover.getByText("Admin").first()).toBeVisible();
    await expect(usersPage.popover.getByText("Basic").first()).toBeVisible();

    await usersPage.closePopover();
  });

  test("filtering by Admin role shows only admin users", async ({
    usersPage,
  }) => {
    await usersPage.goto();
    await usersPage.openAccountTypesFilter();
    await usersPage.selectAccountType("Admin");
    await usersPage.closePopover();

    await expect(usersPage.accountTypesFilter).toContainText("Admin");

    const rowCount = await usersPage.getVisibleRowCount();
    expect(rowCount).toBeGreaterThan(0);

    // Every visible row's Account Type column must say "Admin"
    const roleTexts = await usersPage.getColumnTexts(2);
    for (const role of roleTexts) {
      expect(role).toBe("Admin");
    }
  });

  test("status filter for Active shows only active users", async ({
    usersPage,
  }) => {
    await usersPage.goto();
    await usersPage.openStatusFilter();
    await usersPage.selectStatus("Active");
    await usersPage.closePopover();

    await expect(usersPage.statusFilter).toContainText("Active");

    const rowCount = await usersPage.getVisibleRowCount();
    expect(rowCount).toBeGreaterThan(0);

    // Every visible row's Status column must say "Active"
    const statusTexts = await usersPage.getColumnTexts(3);
    for (const status of statusTexts) {
      expect(status).toBe("Active");
    }
  });

  test("resetting filter shows all users again", async ({ usersPage }) => {
    await usersPage.goto();

    await usersPage.openStatusFilter();
    await usersPage.selectStatus("Active");
    await usersPage.closePopover();
    const filteredCount = await usersPage.getVisibleRowCount();

    await usersPage.openStatusFilter();
    await usersPage.selectStatus("All Status");
    await usersPage.closePopover();
    const allCount = await usersPage.getVisibleRowCount();

    expect(allCount).toBeGreaterThanOrEqual(filteredCount);
  });
});

// ---------------------------------------------------------------------------
// Sorting (uses existing DB users)
// ---------------------------------------------------------------------------

test.describe("Users page — sorting", () => {
  test("clicking Name sort twice reverses row order", async ({ usersPage }) => {
    await usersPage.goto();

    const firstRowBefore = await usersPage.tableRows.first().textContent();

    // Click twice — first click may match default order; second guarantees reversal
    await usersPage.sortByColumn("Name");
    await usersPage.sortByColumn("Name");

    const firstRowAfter = await usersPage.tableRows.first().textContent();
    expect(firstRowAfter).not.toBe(firstRowBefore);
  });

  test("clicking Account Type sort twice reorders rows", async ({
    usersPage,
  }) => {
    await usersPage.goto();

    const rolesBefore = await usersPage.getColumnTexts(2);

    // Click twice to guarantee a different order from default
    await usersPage.sortByColumn("Account Type");
    await usersPage.sortByColumn("Account Type");

    const rolesAfter = await usersPage.getColumnTexts(2);
    expect(rolesAfter.length).toBeGreaterThan(0);
    expect(rolesAfter).not.toEqual(rolesBefore);
  });
});

// ---------------------------------------------------------------------------
// Pagination (uses existing DB users — need > 8 for multi-page)
// ---------------------------------------------------------------------------

test.describe("Users page — pagination", () => {
  test("clicking page 2 navigates to second page", async ({ usersPage }) => {
    await usersPage.goto();

    const summaryBefore = await usersPage.paginationSummary.textContent();

    // With 10+ users and page size 8, page 2 should exist
    await usersPage.goToPage(2);

    await expect(usersPage.paginationSummary).not.toHaveText(summaryBefore!);

    // Go back to page 1
    await usersPage.goToPage(1);
    await expect(usersPage.paginationSummary).toHaveText(summaryBefore!);
  });
});

// ---------------------------------------------------------------------------
// Invite users (creates ephemeral data)
// ---------------------------------------------------------------------------

test.describe("Users page — invite users", () => {
  test("invite modal opens with correct structure", async ({ usersPage }) => {
    await usersPage.goto();
    await usersPage.openInviteModal();

    await expect(usersPage.dialog.getByText("Invite Users")).toBeVisible();
    await expect(usersPage.inviteEmailInput).toBeVisible();

    await usersPage.cancelModal();
    await expect(usersPage.dialog).not.toBeVisible();
  });

  test("invite a user and verify Invite Pending status", async ({
    usersPage,
    api,
  }) => {
    const email = uniqueEmail("invite");

    await usersPage.goto();
    await usersPage.openInviteModal();
    await usersPage.addInviteEmail(email);
    await usersPage.submitInvite();

    await usersPage.expectToast(/Invited 1 user/);

    // Reload and search
    await usersPage.goto();
    await usersPage.search(email);

    const row = usersPage.getRowByEmail(email);
    await expect(row).toBeVisible();
    await expect(row).toContainText("Invite Pending");

    // Cleanup
    await api.cancelInvite(email);
  });

  test("invite multiple users at once", async ({ usersPage, api }) => {
    const email1 = uniqueEmail("multi1");
    const email2 = uniqueEmail("multi2");

    await usersPage.goto();
    await usersPage.openInviteModal();

    await usersPage.addInviteEmail(email1);
    await usersPage.addInviteEmail(email2);

    await usersPage.submitInvite();
    await usersPage.expectToast(/Invited 2 users/);

    // Cleanup
    await api.cancelInvite(email1);
    await api.cancelInvite(email2);
  });

  test("invite modal shows error icon for invalid emails", async ({
    usersPage,
  }) => {
    await usersPage.goto();
    await usersPage.openInviteModal();

    await usersPage.addInviteEmail("not-an-email");

    // The chip should be rendered with an error state
    await expect(usersPage.dialog.getByText("not-an-email")).toBeVisible();

    await usersPage.cancelModal();
  });
});

// ---------------------------------------------------------------------------
// Row actions — deactivate / activate (creates ephemeral data)
// ---------------------------------------------------------------------------

test.describe("Users page — deactivate & activate", () => {
  let testUserEmail: string;

  test.beforeAll(async ({ browser }) => {
    testUserEmail = uniqueEmail("deact");
    await withApiContext(browser, async (api) => {
      await api.registerUser(testUserEmail, TEST_PASSWORD);
    });
  });

  test("deactivate and then reactivate a user", async ({ usersPage }) => {
    await usersPage.goto();
    await usersPage.search(testUserEmail);

    const row = usersPage.getRowByEmail(testUserEmail);
    await expect(row).toBeVisible();
    await expect(row).toContainText("Active");

    // Deactivate
    await usersPage.openRowActions(testUserEmail);
    await usersPage.clickRowAction("Deactivate User");

    await expect(usersPage.dialog.getByText("Deactivate User")).toBeVisible();
    await expect(usersPage.dialog.getByText(testUserEmail)).toBeVisible();
    await expect(
      usersPage.dialog.getByText("will immediately lose access")
    ).toBeVisible();

    await usersPage.confirmModalAction("Deactivate");
    await usersPage.expectToast("User deactivated");

    // Verify Inactive
    await usersPage.goto();
    await usersPage.search(testUserEmail);
    const inactiveRow = usersPage.getRowByEmail(testUserEmail);
    await expect(inactiveRow).toContainText("Inactive");

    // Reactivate
    await usersPage.openRowActions(testUserEmail);
    await usersPage.clickRowAction("Activate User");

    await expect(usersPage.dialog.getByText("Activate User")).toBeVisible();

    await usersPage.confirmModalAction("Activate");
    await usersPage.expectToast("User activated");

    // Verify Active again
    await usersPage.goto();
    await usersPage.search(testUserEmail);
    const reactivatedRow = usersPage.getRowByEmail(testUserEmail);
    await expect(reactivatedRow).toContainText("Active");
  });

  test.afterAll(async ({ browser }) => {
    await withApiContext(browser, async (api) => {
      await softCleanup(() => api.deactivateUser(testUserEmail));
      await softCleanup(() => api.deleteUser(testUserEmail));
    });
  });
});

// ---------------------------------------------------------------------------
// Row actions — delete user (creates ephemeral data)
// ---------------------------------------------------------------------------

test.describe("Users page — delete user", () => {
  test("delete an inactive user", async ({ usersPage, api }) => {
    const email = uniqueEmail("delete");
    await api.registerUser(email, TEST_PASSWORD);
    await api.deactivateUser(email);

    await usersPage.goto();
    await usersPage.search(email);

    const row = usersPage.getRowByEmail(email);
    await expect(row).toBeVisible();
    await expect(row).toContainText("Inactive");

    await usersPage.openRowActions(email);
    await usersPage.clickRowAction("Delete User");

    await expect(usersPage.dialog.getByText("Delete User")).toBeVisible();
    await expect(
      usersPage.dialog.getByText("will be permanently removed")
    ).toBeVisible();

    await usersPage.confirmModalAction("Delete");
    await usersPage.expectToast("User deleted");

    // User gone
    await usersPage.goto();
    await usersPage.search(email);
    await expect(usersPage.page.getByText("No users found")).toBeVisible();
  });
});

// ---------------------------------------------------------------------------
// Row actions — cancel invite (creates ephemeral data)
// ---------------------------------------------------------------------------

test.describe("Users page — cancel invite", () => {
  test("cancel a pending invite", async ({ usersPage, api }) => {
    const email = uniqueEmail("cancel-inv");
    await api.inviteUsers([email]);

    await usersPage.goto();
    await usersPage.search(email);

    const row = usersPage.getRowByEmail(email);
    await expect(row).toBeVisible();
    await expect(row).toContainText("Invite Pending");

    await usersPage.openRowActions(email);
    await usersPage.clickRowAction("Cancel Invite");

    await expect(
      usersPage.dialog.getByText("Cancel Invite").first()
    ).toBeVisible();

    await usersPage.confirmModalAction("Cancel Invite");
    await usersPage.expectToast("Invite cancelled");

    // User gone
    await usersPage.goto();
    await usersPage.search(email);
    await expect(usersPage.page.getByText("No users found")).toBeVisible();
  });
});

// ---------------------------------------------------------------------------
// Inline role editing (creates ephemeral data)
// ---------------------------------------------------------------------------

test.describe("Users page — inline role editing", () => {
  let testUserEmail: string;

  test.beforeAll(async ({ browser }) => {
    testUserEmail = uniqueEmail("role");
    await withApiContext(browser, async (api) => {
      await api.registerUser(testUserEmail, TEST_PASSWORD);
    });
  });

  test("change user role from Basic to Admin and back", async ({
    usersPage,
  }) => {
    await usersPage.goto();
    await usersPage.search(testUserEmail);

    const row = usersPage.getRowByEmail(testUserEmail);
    await expect(row).toBeVisible();

    // Initially Basic
    await expect(row.getByText("Basic")).toBeVisible();

    // Change to Admin
    await usersPage.openRoleDropdown(testUserEmail);
    await usersPage.selectRole("Admin");
    await expect(row.getByText("Admin")).toBeVisible();

    // Change back to Basic
    await usersPage.openRoleDropdown(testUserEmail);
    await usersPage.selectRole("Basic");
    await expect(row.getByText("Basic")).toBeVisible();
  });

  test.afterAll(async ({ browser }) => {
    await withApiContext(browser, async (api) => {
      await softCleanup(() => api.deactivateUser(testUserEmail));
      await softCleanup(() => api.deleteUser(testUserEmail));
    });
  });
});

// ---------------------------------------------------------------------------
// Group management (creates ephemeral data)
// ---------------------------------------------------------------------------

test.describe("Users page — group management", () => {
  let testUserEmail: string;
  let testGroupId: number;
  const groupName = `E2E-UsersTest-${Date.now()}`;

  test.beforeAll(async ({ browser }) => {
    testUserEmail = uniqueEmail("grp");
    await withApiContext(browser, async (api) => {
      await api.registerUser(testUserEmail, TEST_PASSWORD);
      testGroupId = await api.createUserGroup(groupName);
      await api.waitForGroupSync(testGroupId);
    });
  });

  test("add user to group via edit groups modal", async ({ usersPage }) => {
    await usersPage.goto();
    await usersPage.search(testUserEmail);

    const row = usersPage.getRowByEmail(testUserEmail);
    await expect(row).toBeVisible();

    await usersPage.openEditGroupsModal(testUserEmail);
    await usersPage.searchGroupsInModal(groupName);
    await usersPage.toggleGroupInModal(groupName);
    await usersPage.saveGroupsModal();
    await usersPage.expectToast("User updated");

    // Verify group shows in the row
    await usersPage.goto();
    await usersPage.search(testUserEmail);
    const rowWithGroup = usersPage.getRowByEmail(testUserEmail);
    await expect(rowWithGroup).toContainText(groupName);
  });

  test("remove user from group via edit groups modal", async ({
    usersPage,
  }) => {
    await usersPage.goto();
    await usersPage.search(testUserEmail);

    const row = usersPage.getRowByEmail(testUserEmail);
    await expect(row).toBeVisible();

    await usersPage.openEditGroupsModal(testUserEmail);

    // Group shows as joined — click to remove
    await usersPage.toggleGroupInModal(groupName);
    await usersPage.saveGroupsModal();
    await usersPage.expectToast("User updated");

    // Verify group removed
    await usersPage.goto();
    await usersPage.search(testUserEmail);
    await expect(usersPage.getRowByEmail(testUserEmail)).not.toContainText(
      groupName
    );
  });

  test.afterAll(async ({ browser }) => {
    await withApiContext(browser, async (api) => {
      await softCleanup(() => api.deleteUserGroup(testGroupId));
      await softCleanup(() => api.deactivateUser(testUserEmail));
      await softCleanup(() => api.deleteUser(testUserEmail));
    });
  });
});

// ---------------------------------------------------------------------------
// Stats bar
// ---------------------------------------------------------------------------

test.describe("Users page — stats bar", () => {
  test("stats bar shows active users count", async ({ usersPage }) => {
    await usersPage.goto();
    // Number and label are separate elements; check for the label
    await expect(usersPage.page.getByText("active users")).toBeVisible();
  });

  test("stats bar updates after inviting a user", async ({
    usersPage,
    api,
  }) => {
    const email = uniqueEmail("stats");

    await usersPage.goto();

    await usersPage.openInviteModal();
    await usersPage.addInviteEmail(email);
    await usersPage.submitInvite();
    await usersPage.expectToast(/Invited 1 user/);

    // Stats bar should reflect the new invite
    await usersPage.goto();
    await expect(usersPage.page.getByText("pending invites")).toBeVisible();

    // Cleanup
    await api.cancelInvite(email);
  });
});


================================================
FILE: web/tests/e2e/admin/voice/disconnect-provider.spec.ts
================================================
import { test, expect, Page, Locator } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { expectElementScreenshot } from "@tests/e2e/utils/visualRegression";

const VOICE_URL = "/admin/configuration/voice";

const FAKE_PROVIDERS = {
  openai_active_stt: {
    id: 1,
    name: "openai",
    provider_type: "openai",
    is_default_stt: true,
    is_default_tts: false,
    stt_model: "whisper",
    tts_model: null,
    default_voice: null,
    has_api_key: true,
    target_uri: null,
  },
  openai_active_both: {
    id: 1,
    name: "openai",
    provider_type: "openai",
    is_default_stt: true,
    is_default_tts: true,
    stt_model: "whisper",
    tts_model: "tts-1",
    default_voice: "alloy",
    has_api_key: true,
    target_uri: null,
  },
  openai_connected: {
    id: 1,
    name: "openai",
    provider_type: "openai",
    is_default_stt: false,
    is_default_tts: false,
    stt_model: null,
    tts_model: null,
    default_voice: null,
    has_api_key: true,
    target_uri: null,
  },
  elevenlabs_connected: {
    id: 2,
    name: "elevenlabs",
    provider_type: "elevenlabs",
    is_default_stt: false,
    is_default_tts: false,
    stt_model: null,
    tts_model: null,
    default_voice: null,
    has_api_key: true,
    target_uri: null,
  },
};

function findModelCard(page: Page, ariaLabel: string): Locator {
  return page.getByLabel(ariaLabel, { exact: true });
}

function mainContainer(page: Page): Locator {
  return page.locator("[data-main-container]");
}

async function mockVoiceApis(
  page: Page,
  providers: (typeof FAKE_PROVIDERS)[keyof typeof FAKE_PROVIDERS][]
) {
  await page.route("**/api/admin/voice/providers", async (route) => {
    if (route.request().method() === "GET") {
      await route.fulfill({ status: 200, json: providers });
    } else {
      await route.continue();
    }
  });
}

test.describe("Voice Provider Disconnect", () => {
  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");
  });

  test("should disconnect a non-active provider and affect both STT and TTS cards", async ({
    page,
  }) => {
    const providers = [
      { ...FAKE_PROVIDERS.openai_connected },
      { ...FAKE_PROVIDERS.elevenlabs_connected },
    ];
    await mockVoiceApis(page, providers);

    await page.goto(VOICE_URL);
    await page.waitForSelector("text=Speech to Text", { timeout: 20000 });

    const whisperCard = findModelCard(page, "voice-stt-whisper");
    await whisperCard.waitFor({ state: "visible", timeout: 10000 });

    await expectElementScreenshot(mainContainer(page), {
      name: "voice-disconnect-non-active-before",
    });

    const disconnectButton = whisperCard.getByRole("button", {
      name: "Disconnect Whisper",
    });
    await expect(disconnectButton).toBeVisible();
    await expect(disconnectButton).toBeEnabled();

    // Mock DELETE to succeed and remove OpenAI from provider list
    await page.route("**/api/admin/voice/providers/1", async (route) => {
      if (route.request().method() === "DELETE") {
        await page.unroute("**/api/admin/voice/providers");
        await page.route("**/api/admin/voice/providers", async (route) => {
          if (route.request().method() === "GET") {
            await route.fulfill({
              status: 200,
              json: [{ ...FAKE_PROVIDERS.elevenlabs_connected }],
            });
          } else {
            await route.continue();
          }
        });
        await route.fulfill({ status: 200, json: {} });
      } else {
        await route.continue();
      }
    });

    await disconnectButton.click();

    // Modal shows provider name, not model name
    const confirmDialog = page.getByRole("dialog");
    await expect(confirmDialog).toBeVisible({ timeout: 5000 });
    await expect(confirmDialog).toContainText("Disconnect OpenAI");

    await expectElementScreenshot(confirmDialog, {
      name: "voice-disconnect-non-active-modal",
    });

    const confirmButton = confirmDialog.getByRole("button", {
      name: "Disconnect",
    });
    await confirmButton.click();

    // Both STT and TTS cards for OpenAI revert to disconnected
    await expect(
      whisperCard.getByRole("button", { name: "Connect" })
    ).toBeVisible({ timeout: 10000 });

    const tts1Card = findModelCard(page, "voice-tts-tts-1");
    await expect(tts1Card.getByRole("button", { name: "Connect" })).toBeVisible(
      { timeout: 10000 }
    );

    await expectElementScreenshot(mainContainer(page), {
      name: "voice-disconnect-non-active-after",
    });
  });

  test("should show replacement dropdown when disconnecting active provider with alternatives", async ({
    page,
  }) => {
    // OpenAI is active for STT, ElevenLabs is also configured
    const providers = [
      { ...FAKE_PROVIDERS.openai_active_stt },
      { ...FAKE_PROVIDERS.elevenlabs_connected },
    ];
    await mockVoiceApis(page, providers);

    await page.goto(VOICE_URL);
    await page.waitForSelector("text=Speech to Text", { timeout: 20000 });

    const whisperCard = findModelCard(page, "voice-stt-whisper");
    await whisperCard.waitFor({ state: "visible", timeout: 10000 });

    await expectElementScreenshot(mainContainer(page), {
      name: "voice-disconnect-active-with-alt-before",
    });

    const disconnectButton = whisperCard.getByRole("button", {
      name: "Disconnect Whisper",
    });
    await disconnectButton.click();

    const confirmDialog = page.getByRole("dialog");
    await expect(confirmDialog).toBeVisible({ timeout: 5000 });
    await expect(confirmDialog).toContainText("Disconnect OpenAI");

    // Should show replacement text and dropdown
    await expect(
      confirmDialog.getByText("Session history will be preserved")
    ).toBeVisible();

    // Disconnect button should be enabled because first replacement is auto-selected
    const confirmButton = confirmDialog.getByRole("button", {
      name: "Disconnect",
    });
    await expect(confirmButton).toBeEnabled();

    await expectElementScreenshot(confirmDialog, {
      name: "voice-disconnect-active-with-alt-modal",
    });
  });

  test("should show replacement when provider is default for both STT and TTS", async ({
    page,
  }) => {
    // OpenAI is default for both modes, ElevenLabs also configured
    const providers = [
      { ...FAKE_PROVIDERS.openai_active_both },
      { ...FAKE_PROVIDERS.elevenlabs_connected },
    ];
    await mockVoiceApis(page, providers);

    await page.goto(VOICE_URL);
    await page.waitForSelector("text=Speech to Text", { timeout: 20000 });

    const whisperCard = findModelCard(page, "voice-stt-whisper");
    await whisperCard.waitFor({ state: "visible", timeout: 10000 });

    await expectElementScreenshot(mainContainer(page), {
      name: "voice-disconnect-both-modes-before",
    });

    const disconnectButton = whisperCard.getByRole("button", {
      name: "Disconnect Whisper",
    });
    await disconnectButton.click();

    const confirmDialog = page.getByRole("dialog");
    await expect(confirmDialog).toBeVisible({ timeout: 5000 });
    await expect(confirmDialog).toContainText("Disconnect OpenAI");

    // Should mention both modes
    await expect(
      confirmDialog.getByText("speech-to-text or text-to-speech")
    ).toBeVisible();

    // Should show replacement dropdown
    await expect(
      confirmDialog.getByText("Session history will be preserved")
    ).toBeVisible();

    const confirmButton = confirmDialog.getByRole("button", {
      name: "Disconnect",
    });
    await expect(confirmButton).toBeEnabled();

    await expectElementScreenshot(confirmDialog, {
      name: "voice-disconnect-both-modes-modal",
    });
  });

  test("should show connect message when disconnecting active provider with no alternatives", async ({
    page,
  }) => {
    // Only OpenAI configured, active for STT — no other providers
    const providers = [{ ...FAKE_PROVIDERS.openai_active_stt }];
    await mockVoiceApis(page, providers);

    await page.goto(VOICE_URL);
    await page.waitForSelector("text=Speech to Text", { timeout: 20000 });

    const whisperCard = findModelCard(page, "voice-stt-whisper");
    await whisperCard.waitFor({ state: "visible", timeout: 10000 });

    await expectElementScreenshot(mainContainer(page), {
      name: "voice-disconnect-no-alt-before",
    });

    const disconnectButton = whisperCard.getByRole("button", {
      name: "Disconnect Whisper",
    });
    await disconnectButton.click();

    const confirmDialog = page.getByRole("dialog");
    await expect(confirmDialog).toBeVisible({ timeout: 5000 });
    await expect(confirmDialog).toContainText("Disconnect OpenAI");

    // Should show message about connecting another provider
    await expect(
      confirmDialog.getByText("Connect another provider")
    ).toBeVisible();

    // Disconnect button should be enabled
    const confirmButton = confirmDialog.getByRole("button", {
      name: "Disconnect",
    });
    await expect(confirmButton).toBeEnabled();

    await expectElementScreenshot(confirmDialog, {
      name: "voice-disconnect-no-alt-modal",
    });
  });

  test("should not show disconnect button for unconfigured provider", async ({
    page,
  }) => {
    await mockVoiceApis(page, []);

    await page.goto(VOICE_URL);
    await page.waitForSelector("text=Speech to Text", { timeout: 20000 });

    const whisperCard = findModelCard(page, "voice-stt-whisper");
    await whisperCard.waitFor({ state: "visible", timeout: 10000 });

    const disconnectButton = whisperCard.getByRole("button", {
      name: "Disconnect Whisper",
    });
    await expect(disconnectButton).not.toBeVisible();

    await expectElementScreenshot(mainContainer(page), {
      name: "voice-disconnect-unconfigured",
    });
  });
});


================================================
FILE: web/tests/e2e/admin/web-search/disconnect-provider.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { expectElementScreenshot } from "@tests/e2e/utils/visualRegression";
import {
  WEB_SEARCH_URL,
  FAKE_SEARCH_PROVIDERS,
  FAKE_CONTENT_PROVIDERS,
  findProviderCard,
  mainContainer,
  mockWebSearchApis,
} from "./svc";

test.describe("Web Search Provider Disconnect", () => {
  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");
  });

  test.describe("Search Engine Providers", () => {
    test("should disconnect a connected (non-active) search provider", async ({
      page,
    }) => {
      const searchProviders = [
        { ...FAKE_SEARCH_PROVIDERS.exa },
        { ...FAKE_SEARCH_PROVIDERS.brave },
      ];
      await mockWebSearchApis(page, searchProviders, []);

      await page.goto(WEB_SEARCH_URL);
      await page.waitForSelector("text=Search Engine", { timeout: 20000 });

      const braveCard = findProviderCard(page, "Brave");
      await braveCard.waitFor({ state: "visible", timeout: 10000 });

      await expectElementScreenshot(mainContainer(page), {
        name: "web-search-disconnect-non-active-before",
      });

      await braveCard.hover();
      const disconnectButton = braveCard.getByRole("button", {
        name: "Disconnect Brave",
      });
      await expect(disconnectButton).toBeVisible();
      await expect(disconnectButton).toBeEnabled();

      // Mock the DELETE to succeed
      await page.route(
        "**/api/admin/web-search/search-providers/2",
        async (route) => {
          if (route.request().method() === "DELETE") {
            await page.unroute("**/api/admin/web-search/search-providers");
            await page.route(
              "**/api/admin/web-search/search-providers",
              async (route) => {
                if (route.request().method() === "GET") {
                  await route.fulfill({
                    status: 200,
                    json: [{ ...FAKE_SEARCH_PROVIDERS.exa }],
                  });
                } else {
                  await route.continue();
                }
              }
            );
            await route.fulfill({ status: 200, json: {} });
          } else {
            await route.continue();
          }
        }
      );

      await disconnectButton.click();

      const confirmDialog = page.getByRole("dialog");
      await expect(confirmDialog).toBeVisible({ timeout: 5000 });
      await expect(confirmDialog).toContainText("Disconnect Brave");

      await expectElementScreenshot(confirmDialog, {
        name: "web-search-disconnect-non-active-modal",
      });

      const confirmButton = confirmDialog.getByRole("button", {
        name: "Disconnect",
      });
      await confirmButton.click();

      await expect(
        braveCard.getByRole("button", { name: "Connect" })
      ).toBeVisible({ timeout: 10000 });

      await expectElementScreenshot(mainContainer(page), {
        name: "web-search-disconnect-non-active-after",
      });
    });

    test("should show replacement dropdown when disconnecting active search provider with alternatives", async ({
      page,
    }) => {
      // Exa is active, Brave is also configured
      const searchProviders = [
        { ...FAKE_SEARCH_PROVIDERS.exa },
        { ...FAKE_SEARCH_PROVIDERS.brave },
      ];
      await mockWebSearchApis(page, searchProviders, []);

      await page.goto(WEB_SEARCH_URL);
      await page.waitForSelector("text=Search Engine", { timeout: 20000 });

      const exaCard = findProviderCard(page, "Exa");
      await exaCard.waitFor({ state: "visible", timeout: 10000 });

      await exaCard.hover();
      const disconnectButton = exaCard.getByRole("button", {
        name: "Disconnect Exa",
      });
      await expect(disconnectButton).toBeVisible();
      await expect(disconnectButton).toBeEnabled();

      await disconnectButton.click();

      const confirmDialog = page.getByRole("dialog");
      await expect(confirmDialog).toBeVisible({ timeout: 5000 });
      await expect(confirmDialog).toContainText("Disconnect Exa");

      // Should show replacement dropdown
      await expect(
        confirmDialog.getByText("Search history will be preserved")
      ).toBeVisible();

      // Disconnect button should be enabled because first replacement is auto-selected
      const confirmButton = confirmDialog.getByRole("button", {
        name: "Disconnect",
      });
      await expect(confirmButton).toBeEnabled();

      await expectElementScreenshot(confirmDialog, {
        name: "web-search-disconnect-active-with-alt-modal",
      });
    });

    test("should show connect message when disconnecting active search provider with no alternatives", async ({
      page,
    }) => {
      // Only Exa configured and active
      await mockWebSearchApis(page, [{ ...FAKE_SEARCH_PROVIDERS.exa }], []);

      await page.goto(WEB_SEARCH_URL);
      await page.waitForSelector("text=Search Engine", { timeout: 20000 });

      const exaCard = findProviderCard(page, "Exa");
      await exaCard.waitFor({ state: "visible", timeout: 10000 });

      await exaCard.hover();
      const disconnectButton = exaCard.getByRole("button", {
        name: "Disconnect Exa",
      });
      await disconnectButton.click();

      const confirmDialog = page.getByRole("dialog");
      await expect(confirmDialog).toBeVisible({ timeout: 5000 });

      // Should show message about connecting another provider
      await expect(
        confirmDialog.getByText("Connect another provider")
      ).toBeVisible();

      // Disconnect button should be enabled
      const confirmButton = confirmDialog.getByRole("button", {
        name: "Disconnect",
      });
      await expect(confirmButton).toBeEnabled();

      await expectElementScreenshot(confirmDialog, {
        name: "web-search-disconnect-no-alt-modal",
      });
    });

    test("should not show disconnect button for unconfigured search provider", async ({
      page,
    }) => {
      await mockWebSearchApis(page, [{ ...FAKE_SEARCH_PROVIDERS.exa }], []);

      await page.goto(WEB_SEARCH_URL);
      await page.waitForSelector("text=Search Engine", { timeout: 20000 });

      const braveCard = findProviderCard(page, "Brave");
      await braveCard.waitFor({ state: "visible", timeout: 10000 });

      const disconnectButton = braveCard.getByRole("button", {
        name: "Disconnect Brave",
      });
      await expect(disconnectButton).not.toBeVisible();

      await expectElementScreenshot(mainContainer(page), {
        name: "web-search-disconnect-unconfigured",
      });
    });
  });

  test.describe("Web Crawler (Content) Providers", () => {
    test("should disconnect a connected (non-active) content provider", async ({
      page,
    }) => {
      // Firecrawl connected but not active, Exa is active
      const contentProviders = [
        { ...FAKE_CONTENT_PROVIDERS.firecrawl, is_active: false },
        { ...FAKE_CONTENT_PROVIDERS.exa, is_active: true },
      ];
      await mockWebSearchApis(page, [], contentProviders);

      await page.goto(WEB_SEARCH_URL);
      await page.waitForSelector("text=Web Crawler", { timeout: 20000 });

      const firecrawlCard = findProviderCard(page, "Firecrawl");
      await firecrawlCard.waitFor({ state: "visible", timeout: 10000 });

      await firecrawlCard.hover();
      const disconnectButton = firecrawlCard.getByRole("button", {
        name: "Disconnect Firecrawl",
      });
      await expect(disconnectButton).toBeVisible();
      await expect(disconnectButton).toBeEnabled();

      // Mock the DELETE to succeed
      await page.route(
        "**/api/admin/web-search/content-providers/10",
        async (route) => {
          if (route.request().method() === "DELETE") {
            await page.unroute("**/api/admin/web-search/content-providers");
            await page.route(
              "**/api/admin/web-search/content-providers",
              async (route) => {
                if (route.request().method() === "GET") {
                  await route.fulfill({
                    status: 200,
                    json: [{ ...FAKE_CONTENT_PROVIDERS.exa, is_active: true }],
                  });
                } else {
                  await route.continue();
                }
              }
            );
            await route.fulfill({ status: 200, json: {} });
          } else {
            await route.continue();
          }
        }
      );

      await disconnectButton.click();

      const confirmDialog = page.getByRole("dialog");
      await expect(confirmDialog).toBeVisible({ timeout: 5000 });
      await expect(confirmDialog).toContainText("Disconnect Firecrawl");

      await expectElementScreenshot(confirmDialog, {
        name: "web-search-disconnect-content-non-active-modal",
      });

      const confirmButton = confirmDialog.getByRole("button", {
        name: "Disconnect",
      });
      await confirmButton.click();

      await expect(
        firecrawlCard.getByRole("button", { name: "Connect" })
      ).toBeVisible({ timeout: 10000 });
    });

    test("should show replacement dropdown when disconnecting active content provider with alternatives", async ({
      page,
    }) => {
      // Firecrawl is active, Exa is also configured
      const contentProviders = [
        { ...FAKE_CONTENT_PROVIDERS.firecrawl },
        { ...FAKE_CONTENT_PROVIDERS.exa },
      ];
      await mockWebSearchApis(page, [], contentProviders);

      await page.goto(WEB_SEARCH_URL);
      await page.waitForSelector("text=Web Crawler", { timeout: 20000 });

      const firecrawlCard = findProviderCard(page, "Firecrawl");
      await firecrawlCard.waitFor({ state: "visible", timeout: 10000 });

      await firecrawlCard.hover();
      const disconnectButton = firecrawlCard.getByRole("button", {
        name: "Disconnect Firecrawl",
      });
      await disconnectButton.click();

      const confirmDialog = page.getByRole("dialog");
      await expect(confirmDialog).toBeVisible({ timeout: 5000 });

      // Should show replacement dropdown
      await expect(
        confirmDialog.getByText("Search history will be preserved")
      ).toBeVisible();

      // Disconnect should be enabled because first replacement is auto-selected
      const confirmButton = confirmDialog.getByRole("button", {
        name: "Disconnect",
      });
      await expect(confirmButton).toBeEnabled();

      await expectElementScreenshot(confirmDialog, {
        name: "web-search-disconnect-content-active-with-alt-modal",
      });
    });

    test("should not show disconnect for Onyx Web Crawler (built-in)", async ({
      page,
    }) => {
      await mockWebSearchApis(page, [], []);

      await page.goto(WEB_SEARCH_URL);
      await page.waitForSelector("text=Web Crawler", { timeout: 20000 });

      const onyxCard = findProviderCard(page, "Onyx Web Crawler");
      await onyxCard.waitFor({ state: "visible", timeout: 10000 });

      const disconnectButton = onyxCard.getByRole("button", {
        name: "Disconnect Onyx Web Crawler",
      });
      await expect(disconnectButton).not.toBeVisible();
    });
  });
});


================================================
FILE: web/tests/e2e/admin/web-search/svc.ts
================================================
import type { Page, Locator } from "@playwright/test";

export const WEB_SEARCH_URL = "/admin/configuration/web-search";

export const FAKE_SEARCH_PROVIDERS = {
  exa: {
    id: 1,
    name: "Exa",
    provider_type: "exa",
    is_active: true,
    config: null,
    has_api_key: true,
  },
  brave: {
    id: 2,
    name: "Brave",
    provider_type: "brave",
    is_active: false,
    config: null,
    has_api_key: true,
  },
};

export const FAKE_CONTENT_PROVIDERS = {
  firecrawl: {
    id: 10,
    name: "Firecrawl",
    provider_type: "firecrawl",
    is_active: true,
    config: { base_url: "https://api.firecrawl.dev/v2/scrape" },
    has_api_key: true,
  },
  exa: {
    id: 11,
    name: "Exa",
    provider_type: "exa",
    is_active: false,
    config: null,
    has_api_key: true,
  },
};

export function findProviderCard(page: Page, providerLabel: string): Locator {
  return page
    .locator("div.rounded-16")
    .filter({ hasText: providerLabel })
    .first();
}

export function mainContainer(page: Page): Locator {
  return page.locator("[data-main-container]");
}

export async function openProviderModal(
  page: Page,
  providerLabel: string
): Promise<void> {
  const card = findProviderCard(page, providerLabel);
  await card.waitFor({ state: "visible", timeout: 10000 });

  // First try to find the Connect button
  const connectButton = card.getByRole("button", { name: "Connect" });
  if (await connectButton.isVisible({ timeout: 1000 }).catch(() => false)) {
    await connectButton.click();
    return;
  }

  // If no Connect button, click the Edit icon button to update credentials
  const editButton = card.getByRole("button", { name: /^Edit / });
  await editButton.waitFor({ state: "visible", timeout: 5000 });
  await editButton.click();
}

export async function mockWebSearchApis(
  page: Page,
  searchProviders: (typeof FAKE_SEARCH_PROVIDERS)[keyof typeof FAKE_SEARCH_PROVIDERS][],
  contentProviders: (typeof FAKE_CONTENT_PROVIDERS)[keyof typeof FAKE_CONTENT_PROVIDERS][]
): Promise<void> {
  await page.route(
    "**/api/admin/web-search/search-providers",
    async (route) => {
      if (route.request().method() === "GET") {
        await route.fulfill({ status: 200, json: searchProviders });
      } else {
        await route.continue();
      }
    }
  );

  await page.route(
    "**/api/admin/web-search/content-providers",
    async (route) => {
      if (route.request().method() === "GET") {
        await route.fulfill({ status: 200, json: contentProviders });
      } else {
        await route.continue();
      }
    }
  );
}


================================================
FILE: web/tests/e2e/admin/web-search/web_content_providers.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { WEB_SEARCH_URL, findProviderCard, openProviderModal } from "./svc";

test.describe("Web Content Provider Configuration", () => {
  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");

    await page.goto(WEB_SEARCH_URL);
    await page.waitForLoadState("networkidle");

    // Wait for page to fully load
    await page.waitForSelector("text=Web Crawler", { timeout: 20000 });

    console.log("[web-content-test] Page loaded successfully");
  });

  test.describe("Firecrawl Provider", () => {
    const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;

    test.skip(
      !FIRECRAWL_API_KEY,
      "FIRECRAWL_API_KEY environment variable not set"
    );

    test("should configure Firecrawl as web crawler", async ({ page }) => {
      // Click Connect on the Firecrawl card (or key icon if already configured)
      await openProviderModal(page, "Firecrawl");

      const modalDialog = page.getByRole("dialog");
      await expect(modalDialog).toBeVisible({ timeout: 10000 });
      await expect(
        page.getByText("Set up Firecrawl", { exact: false })
      ).toBeVisible();

      // Firecrawl has a base URL field (shown first) and API key
      const baseUrlInput = page.locator('input[placeholder="https://"]');
      await baseUrlInput.waitFor({ state: "visible", timeout: 5000 });
      // Don't check value - it might have a custom value from previous config

      // Enter API key - clear first in case modal opened with masked credentials.
      const apiKeyInput = modalDialog.getByTestId("web-provider-api-key-input");
      await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
      await apiKeyInput.clear();
      await apiKeyInput.fill(FIRECRAWL_API_KEY!);

      const modalConnectButton = modalDialog.getByRole("button", {
        name: "Connect",
        exact: true,
      });
      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });
      await modalConnectButton.click();

      console.log(
        "[web-content-test] Clicked Connect, waiting for validation..."
      );

      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });

      console.log(
        "[web-content-test] Modal closed, verifying Firecrawl is active..."
      );

      await page.waitForLoadState("networkidle");

      const firecrawlCard = findProviderCard(page, "Firecrawl");
      await expect(
        firecrawlCard.getByRole("button", { name: "Current Crawler" })
      ).toBeVisible({ timeout: 15000 });

      console.log("[web-content-test] Firecrawl configured successfully");
    });

    test("should switch back to Onyx Web Crawler from Firecrawl", async ({
      page,
    }) => {
      // First, ensure Firecrawl is configured and active
      const firecrawlCard = findProviderCard(page, "Firecrawl");
      await firecrawlCard.waitFor({ state: "visible", timeout: 10000 });

      const connectButton = firecrawlCard.getByRole("button", {
        name: "Connect",
      });
      const setDefaultButton = firecrawlCard.getByRole("button", {
        name: "Set as Default",
      });

      // Only configure if Connect button is visible (not already configured)
      if (await connectButton.isVisible()) {
        await connectButton.click();

        const modalDialog = page.getByRole("dialog");
        await expect(modalDialog).toBeVisible({ timeout: 10000 });
        await expect(
          page.getByText("Set up Firecrawl", { exact: false })
        ).toBeVisible();

        // Enter API key - clear first in case modal opened with masked credentials.
        const apiKeyInput = modalDialog.getByTestId(
          "web-provider-api-key-input"
        );
        await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
        await apiKeyInput.clear();
        await apiKeyInput.fill(FIRECRAWL_API_KEY!);

        await modalDialog
          .getByRole("button", { name: "Connect", exact: true })
          .click();
        await expect(modalDialog).not.toBeVisible({ timeout: 60000 });
        await page.waitForLoadState("networkidle");
      } else if (await setDefaultButton.isVisible()) {
        // If already configured but not active, set as default
        await setDefaultButton.click();
        await page.waitForLoadState("networkidle");
      }

      // Verify Firecrawl is now the current crawler
      const updatedFirecrawlCard = findProviderCard(page, "Firecrawl");
      await expect(
        updatedFirecrawlCard.getByRole("button", { name: "Current Crawler" })
      ).toBeVisible({ timeout: 15000 });

      console.log(
        "[web-content-test] Firecrawl configured, now switching to Onyx Web Crawler..."
      );

      // Switch to Onyx Web Crawler
      const onyxCrawlerCard = findProviderCard(page, "Onyx Web Crawler");
      await onyxCrawlerCard.waitFor({ state: "visible", timeout: 10000 });

      const onyxSetDefault = onyxCrawlerCard.getByRole("button", {
        name: "Set as Default",
      });

      if (await onyxSetDefault.isVisible()) {
        await onyxSetDefault.click();
        await page.waitForLoadState("networkidle");
      }

      await expect(
        onyxCrawlerCard.getByRole("button", { name: "Current Crawler" })
      ).toBeVisible({ timeout: 15000 });

      console.log("[web-content-test] Switched back to Onyx Web Crawler");
    });
  });
});


================================================
FILE: web/tests/e2e/admin/web-search/web_search_providers.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { WEB_SEARCH_URL, findProviderCard, openProviderModal } from "./svc";

test.describe("Web Search Provider Configuration", () => {
  test.beforeEach(async ({ page }) => {
    // Log in as admin before each test
    await page.context().clearCookies();
    await loginAs(page, "admin");

    // Navigate to web search config page
    await page.goto(WEB_SEARCH_URL);
    await page.waitForLoadState("networkidle");

    // Wait for page to fully load - look for the Search Engine section heading
    await page.waitForSelector("text=Search Engine", { timeout: 20000 });

    console.log("[web-search-test] Page loaded successfully");
  });

  test.describe("Exa Provider", () => {
    const EXA_API_KEY = process.env.EXA_API_KEY;

    test.skip(!EXA_API_KEY, "EXA_API_KEY environment variable not set");

    test.skip("should configure Exa as web search provider", async ({
      page,
    }) => {
      // Click Connect on the Exa card (or key icon if already configured)
      await openProviderModal(page, "Exa");

      // Wait for modal to open - Modal uses Radix Dialog with role="dialog"
      const modalDialog = page.getByRole("dialog", { name: /set up exa/i });
      await expect(modalDialog).toBeVisible({ timeout: 10000 });

      // Enter API key - clear first in case modal opened with masked credentials
      // Note: PasswordInputTypeIn uses type="text" with custom ∗ masking per design guidelines
      const apiKeyInput = modalDialog.getByLabel(/api key/i);
      await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
      await apiKeyInput.clear();
      await apiKeyInput.fill(EXA_API_KEY!);

      // Click Connect in modal - scope to the dialog to avoid matching other Connect buttons
      const modalConnectButton = modalDialog.getByRole("button", {
        name: "Connect",
        exact: true,
      });
      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });
      await modalConnectButton.click();

      console.log(
        "[web-search-test] Clicked Connect, waiting for validation..."
      );

      // Wait for modal to close
      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });

      console.log(
        "[web-search-test] Modal closed, verifying provider is active..."
      );

      // Wait for page to update
      await page.waitForLoadState("networkidle");

      // Verify Exa is now the current default - look for "Current Default" button in the Exa card
      const exaCard = findProviderCard(page, "Exa");
      await expect(
        exaCard.getByRole("button", { name: "Current Default" })
      ).toBeVisible({ timeout: 15000 });

      console.log("[web-search-test] Exa provider configured successfully");
    });
  });

  test.describe("Google PSE Provider", () => {
    const GOOGLE_PSE_API_KEY = process.env.GOOGLE_PSE_API_KEY;
    const GOOGLE_PSE_SEARCH_ENGINE_ID = process.env.GOOGLE_PSE_SEARCH_ENGINE_ID;

    test.skip(
      !GOOGLE_PSE_API_KEY || !GOOGLE_PSE_SEARCH_ENGINE_ID,
      "GOOGLE_PSE_API_KEY or GOOGLE_PSE_SEARCH_ENGINE_ID environment variable not set"
    );

    test("should configure Google PSE as web search provider", async ({
      page,
    }) => {
      // Click Connect on the Google PSE card
      await openProviderModal(page, "Google PSE");

      // Wait for modal to open
      const modalDialog = page.getByRole("dialog", {
        name: /set up google pse/i,
      });
      await expect(modalDialog).toBeVisible({ timeout: 10000 });

      // Google PSE requires both Search Engine ID and API key
      // Enter Search Engine ID
      const searchEngineIdInput = page.locator(
        'input[placeholder="Enter search engine ID"]'
      );
      await searchEngineIdInput.waitFor({ state: "visible", timeout: 5000 });
      await searchEngineIdInput.fill(GOOGLE_PSE_SEARCH_ENGINE_ID!);

      // Enter API key
      const apiKeyInput = modalDialog.getByLabel(/api key/i);
      await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
      await apiKeyInput.fill(GOOGLE_PSE_API_KEY!);

      // Click Connect in modal
      const modalConnectButton = modalDialog.getByRole("button", {
        name: "Connect",
        exact: true,
      });
      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });
      await modalConnectButton.click();

      console.log(
        "[web-search-test] Clicked Connect for Google PSE, waiting for validation..."
      );

      // Wait for modal to close
      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });

      console.log(
        "[web-search-test] Modal closed, verifying Google PSE is active..."
      );

      // Wait for page to update
      await page.waitForLoadState("networkidle");

      // Verify Google PSE is now the current default
      const googleCard = findProviderCard(page, "Google PSE");
      await expect(
        googleCard.getByRole("button", { name: "Current Default" })
      ).toBeVisible({ timeout: 15000 });

      console.log(
        "[web-search-test] Google PSE provider configured successfully"
      );
    });

    test("should reconnect with stored API key using update key button", async ({
      page,
    }) => {
      // First, configure Google PSE if not already configured
      const googleCard = findProviderCard(page, "Google PSE");
      await googleCard.waitFor({ state: "visible", timeout: 10000 });

      const connectButton = googleCard.getByRole("button", { name: "Connect" });

      // Only configure if Connect button is visible (not already configured)
      if (await connectButton.isVisible()) {
        await connectButton.click();
        const setupDialog = page.getByRole("dialog", {
          name: /set up google pse/i,
        });
        await expect(setupDialog).toBeVisible({ timeout: 10000 });

        const searchEngineIdInput = page.locator(
          'input[placeholder="Enter search engine ID"]'
        );
        await searchEngineIdInput.waitFor({ state: "visible", timeout: 5000 });
        await searchEngineIdInput.fill(GOOGLE_PSE_SEARCH_ENGINE_ID!);

        const apiKeyInput = setupDialog.getByLabel(/api key/i);
        await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
        await apiKeyInput.fill(GOOGLE_PSE_API_KEY!);

        await setupDialog
          .getByRole("button", { name: "Connect", exact: true })
          .click();
        await expect(setupDialog).not.toBeVisible({ timeout: 30000 });
        await page.waitForLoadState("networkidle");
      }

      console.log(
        "[web-search-test] Google PSE configured, now testing update key button..."
      );

      // Now click the Edit icon button
      const updatedGoogleCard = findProviderCard(page, "Google PSE");
      const editButton = updatedGoogleCard.getByRole("button", {
        name: /^Edit /,
      });
      await expect(editButton).toBeVisible({ timeout: 10000 });
      await editButton.click();

      // Modal should open with masked API key
      const modalDialog = page.getByRole("dialog", {
        name: /set up google pse/i,
      });
      await expect(modalDialog).toBeVisible({ timeout: 10000 });

      // Verify the API key input shows masked value
      // PasswordInputTypeIn displays stored values with ∗ (ASTERISK OPERATOR) per design guidelines
      const apiKeyInput = modalDialog.getByLabel(/api key/i);
      await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
      await expect(apiKeyInput).toHaveValue("∗∗∗∗∗∗∗∗∗∗∗∗∗∗∗∗");

      // Immediately click Connect without changing anything
      const modalConnectButton = modalDialog.getByRole("button", {
        name: "Connect",
        exact: true,
      });
      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });
      await modalConnectButton.click();

      console.log(
        "[web-search-test] Clicked Connect with stored key, waiting for success..."
      );

      // Wait for modal to close (success)
      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });

      console.log(
        "[web-search-test] Modal closed, verifying Google PSE is still active..."
      );

      // Wait for page to update
      await page.waitForLoadState("networkidle");

      // Verify Google PSE is still the current default
      const finalGoogleCard = findProviderCard(page, "Google PSE");
      await expect(
        finalGoogleCard.getByRole("button", { name: "Current Default" })
      ).toBeVisible({ timeout: 15000 });

      console.log(
        "[web-search-test] Successfully reconnected with stored API key"
      );
    });

    test("should fail when changing search engine ID with stored API key", async ({
      page,
    }) => {
      // First, configure Google PSE if not already configured
      const googleCard = findProviderCard(page, "Google PSE");
      await googleCard.waitFor({ state: "visible", timeout: 10000 });

      const connectButton = googleCard.getByRole("button", { name: "Connect" });

      // Only configure if Connect button is visible (not already configured)
      if (await connectButton.isVisible()) {
        await connectButton.click();
        const setupDialog = page.getByRole("dialog", {
          name: /set up google pse/i,
        });
        await expect(setupDialog).toBeVisible({ timeout: 10000 });

        const searchEngineIdInput = page.locator(
          'input[placeholder="Enter search engine ID"]'
        );
        await searchEngineIdInput.waitFor({ state: "visible", timeout: 5000 });
        await searchEngineIdInput.fill(GOOGLE_PSE_SEARCH_ENGINE_ID!);

        const apiKeyInput = setupDialog.getByLabel(/api key/i);
        await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
        await apiKeyInput.fill(GOOGLE_PSE_API_KEY!);

        await setupDialog
          .getByRole("button", { name: "Connect", exact: true })
          .click();
        await expect(setupDialog).not.toBeVisible({ timeout: 30000 });
        await page.waitForLoadState("networkidle");
      }

      console.log(
        "[web-search-test] Google PSE configured, now testing invalid search engine ID change..."
      );

      // Now click the Edit icon button
      const updatedGoogleCard = findProviderCard(page, "Google PSE");
      const editButton = updatedGoogleCard.getByRole("button", {
        name: /^Edit /,
      });
      await expect(editButton).toBeVisible({ timeout: 10000 });
      await editButton.click();

      // Modal should open with masked API key
      const modalDialog = page.getByRole("dialog", {
        name: /set up google pse/i,
      });
      await expect(modalDialog).toBeVisible({ timeout: 10000 });

      // Change the search engine ID to an invalid value
      const searchEngineIdInput = page.locator(
        'input[placeholder="Enter search engine ID"]'
      );
      await searchEngineIdInput.waitFor({ state: "visible", timeout: 5000 });
      await searchEngineIdInput.clear();
      await searchEngineIdInput.fill("invalid-search-engine-id");

      // Do NOT change the API key - keep the masked value
      // PasswordInputTypeIn displays stored values with ∗ (ASTERISK OPERATOR) per design guidelines
      const apiKeyInput = modalDialog.getByLabel(/api key/i);
      await expect(apiKeyInput).toHaveValue("∗∗∗∗∗∗∗∗∗∗∗∗∗∗∗∗");

      // Click Connect - should fail because search engine ID doesn't match the stored API key
      const modalConnectButton = modalDialog.getByRole("button", {
        name: "Connect",
        exact: true,
      });
      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });
      await modalConnectButton.click();

      console.log(
        "[web-search-test] Clicked Connect with invalid search engine ID, waiting for error..."
      );

      // Should show error message
      await expect(page.getByText(/failed|invalid|error/i).first()).toBeVisible(
        { timeout: 20000 }
      );

      console.log(
        "[web-search-test] Error message displayed as expected for mismatched search engine ID"
      );
    });
  });

  test.describe("Brave Provider", () => {
    const BRAVE_SEARCH_API_KEY = process.env.BRAVE_SEARCH_API_KEY;

    test.skip(
      !BRAVE_SEARCH_API_KEY,
      "BRAVE_SEARCH_API_KEY environment variable not set"
    );

    test("should configure Brave as web search provider", async ({ page }) => {
      await openProviderModal(page, "Brave");

      const modalDialog = page.getByRole("dialog", { name: /set up brave/i });
      await expect(modalDialog).toBeVisible({ timeout: 10000 });

      const apiKeyInput = modalDialog.getByLabel(/api key/i);
      await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
      await apiKeyInput.clear();
      await apiKeyInput.fill(BRAVE_SEARCH_API_KEY!);

      const modalConnectButton = modalDialog.getByRole("button", {
        name: "Connect",
        exact: true,
      });
      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });
      await modalConnectButton.click();

      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });
      await page.waitForLoadState("networkidle");

      const braveCard = findProviderCard(page, "Brave");
      await expect(
        braveCard.getByRole("button", { name: "Current Default" })
      ).toBeVisible({ timeout: 15000 });
    });
  });

  test.describe("Provider Switching", () => {
    // These tests require both providers to be configured
    const EXA_API_KEY = process.env.EXA_API_KEY;
    const GOOGLE_PSE_API_KEY = process.env.GOOGLE_PSE_API_KEY;
    const GOOGLE_PSE_SEARCH_ENGINE_ID = process.env.GOOGLE_PSE_SEARCH_ENGINE_ID;

    test.skip(
      !EXA_API_KEY || !GOOGLE_PSE_API_KEY || !GOOGLE_PSE_SEARCH_ENGINE_ID,
      "Both EXA and Google PSE credentials required"
    );

    test("should switch between configured providers", async ({ page }) => {
      // First, configure Exa if needed
      const exaCard = findProviderCard(page, "Exa");
      await exaCard.waitFor({ state: "visible", timeout: 10000 });

      let connectButton = exaCard.getByRole("button", { name: "Connect" });

      // Only configure if Connect button is visible (not already configured)
      if (await connectButton.isVisible()) {
        await connectButton.click();
        const exaDialog = page.getByRole("dialog", { name: /set up exa/i });
        await expect(exaDialog).toBeVisible({ timeout: 10000 });

        const apiKeyInput = exaDialog.getByLabel(/api key/i);
        await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
        await apiKeyInput.fill(EXA_API_KEY!);

        await exaDialog
          .getByRole("button", { name: "Connect", exact: true })
          .click();
        await expect(exaDialog).not.toBeVisible({ timeout: 30000 });
        await page.waitForLoadState("networkidle");
      }

      // Configure Google PSE if needed
      const googleCard = findProviderCard(page, "Google PSE");
      await googleCard.waitFor({ state: "visible", timeout: 10000 });

      connectButton = googleCard.getByRole("button", { name: "Connect" });

      if (await connectButton.isVisible()) {
        await connectButton.click();
        const googleDialog = page.getByRole("dialog", {
          name: /set up google pse/i,
        });
        await expect(googleDialog).toBeVisible({ timeout: 10000 });

        const searchEngineIdInput = page.locator(
          'input[placeholder="Enter search engine ID"]'
        );
        await searchEngineIdInput.waitFor({ state: "visible", timeout: 5000 });
        await searchEngineIdInput.fill(GOOGLE_PSE_SEARCH_ENGINE_ID!);

        const apiKeyInput = googleDialog.getByLabel(/api key/i);
        await apiKeyInput.waitFor({ state: "visible", timeout: 5000 });
        await apiKeyInput.fill(GOOGLE_PSE_API_KEY!);

        await googleDialog
          .getByRole("button", { name: "Connect", exact: true })
          .click();
        await expect(googleDialog).not.toBeVisible({ timeout: 30000 });
        await page.waitForLoadState("networkidle");
      }

      // Now test switching - click "Set as Default" on whichever is not current
      const exaSetDefault = exaCard.getByRole("button", {
        name: "Set as Default",
      });
      const googleSetDefault = googleCard.getByRole("button", {
        name: "Set as Default",
      });

      if (await exaSetDefault.isVisible()) {
        console.log("[web-search-test] Switching to Exa as default...");
        await exaSetDefault.click();
        await page.waitForLoadState("networkidle");
        await expect(
          exaCard.getByRole("button", { name: "Current Default" })
        ).toBeVisible({ timeout: 15000 });
        console.log("[web-search-test] Successfully switched to Exa");
      } else if (await googleSetDefault.isVisible()) {
        console.log("[web-search-test] Switching to Google PSE as default...");
        await googleSetDefault.click();
        await page.waitForLoadState("networkidle");
        await expect(
          googleCard.getByRole("button", { name: "Current Default" })
        ).toBeVisible({ timeout: 15000 });
        console.log("[web-search-test] Successfully switched to Google PSE");
      }
    });
  });

  // TODO: @jessica - add Serper provider tests
});


================================================
FILE: web/tests/e2e/agents/create_and_edit_agent.spec.ts
================================================
import { test, expect, Page, Browser } from "@playwright/test";
import { loginAs, loginAsWorkerUser } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import { expectScreenshot } from "@tests/e2e/utils/visualRegression";

// --- Locator Helper Functions ---
const getNameInput = (page: Page) => page.locator('input[name="name"]');
const getDescriptionInput = (page: Page) =>
  page.locator('textarea[name="description"]');
const getInstructionsTextarea = (page: Page) =>
  page.locator('textarea[name="instructions"]');
const getReminderTextarea = (page: Page) =>
  page.locator('textarea[name="reminders"]');
const getKnowledgeToggle = (page: Page) =>
  page.locator('button[role="switch"][name="enable_knowledge"]');

// Helper function to set date using InputDatePicker (sets to today's date)
const setKnowledgeCutoffDate = async (page: Page) => {
  // Find and click the date picker button within the Knowledge Cutoff Date section
  const datePickerButton = page
    .locator('label:has-text("Knowledge Cutoff Date")')
    .locator("..")
    .locator('button:has-text("Select Date"), button:has-text("/")');

  await datePickerButton.click();

  // Wait for the popover to open
  await page.waitForSelector('[role="dialog"]', {
    state: "visible",
    timeout: 5000,
  });

  // Click the "Today" button to set to today's date
  const todayButton = page
    .locator('[role="dialog"]')
    .getByRole("button", { name: "Today" })
    .first();
  await todayButton.click();

  // The popover should close automatically after selection
  await page.waitForSelector('[role="dialog"]', {
    state: "hidden",
    timeout: 5000,
  });
};
const getStarterMessageInput = (page: Page, index: number = 0) =>
  page.locator(`input[name="starter_messages.${index}"]`);
const getCreateSubmitButton = (page: Page) =>
  page.locator('button[type="submit"]:has-text("Create")');
const getUpdateSubmitButton = (page: Page) =>
  page.locator('button[type="submit"]:has-text("Save")');

// Helper to navigate to document sets view in the new Knowledge UI
const navigateToDocumentSetsView = async (page: Page) => {
  // First, check if we need to click "View / Edit" or "Add" button to open the knowledge panel
  const viewEditButton = page.getByLabel("knowledge-view-edit");
  const addButton = page.getByLabel("knowledge-add-button");

  if (await viewEditButton.isVisible()) {
    await viewEditButton.click();
  } else if (await addButton.isVisible()) {
    await addButton.click();
  }

  // Now click on "Document Sets" in the add view or sidebar
  const documentSetsButton = page.getByLabel("knowledge-add-document-sets");
  if (await documentSetsButton.isVisible()) {
    await documentSetsButton.click();
  } else {
    // Try the sidebar version
    const sidebarDocumentSets = page.getByLabel(
      "knowledge-sidebar-document-sets"
    );
    if (await sidebarDocumentSets.isVisible()) {
      await sidebarDocumentSets.click();
    }
  }

  // Wait for the document sets table to appear
  await page.waitForTimeout(500);
};

// Helper to select a document set by ID in the new Knowledge UI
const selectDocumentSet = async (page: Page, documentSetId: number) => {
  const documentSetRow = page.getByLabel(`document-set-row-${documentSetId}`);
  await expect(documentSetRow).toBeVisible({ timeout: 5000 });
  await documentSetRow.click();
};

// Helper to navigate to files view in the new Knowledge UI
const navigateToFilesView = async (page: Page) => {
  // First, check if we need to click "View / Edit" or "Add" button to open the knowledge panel
  const viewEditButton = page.getByLabel("knowledge-view-edit");
  const addButton = page.getByLabel("knowledge-add-button");

  if (await viewEditButton.isVisible()) {
    await viewEditButton.click();
  } else if (await addButton.isVisible()) {
    await addButton.click();
  }

  // Now click on "Your Files" in the add view or sidebar
  const filesButton = page.getByLabel("knowledge-add-files");
  if (await filesButton.isVisible()) {
    await filesButton.click();
  } else {
    // Try the sidebar version
    const sidebarFiles = page.getByLabel("knowledge-sidebar-files");
    if (await sidebarFiles.isVisible()) {
      await sidebarFiles.click();
    }
  }

  // Wait for the files table to appear
  await page.waitForTimeout(500);
};

test.describe("Assistant Creation and Edit Verification", () => {
  // Configure this entire suite to run serially
  test.describe.configure({ mode: "serial" });

  test.describe("User Files Only", () => {
    let userFilesAssistantId: number | null = null;

    test.afterAll(async ({ browser }: { browser: Browser }) => {
      if (userFilesAssistantId !== null) {
        const context = await browser.newContext({
          storageState: "admin_auth.json",
        });
        const page = await context.newPage();
        const cleanupClient = new OnyxApiClient(page.request);
        await cleanupClient.deleteAgent(userFilesAssistantId);
        await context.close();
        console.log(
          "[test] Cleanup completed - deleted User Files Only assistant"
        );
      }
    });

    test("should create assistant with user files when no connectors exist @exclusive", async ({
      page,
    }, testInfo) => {
      await page.context().clearCookies();
      await loginAsWorkerUser(page, testInfo.workerIndex);

      const agentName = "E2E User Files Assistant";
      const agentDescription = "Testing user file uploads without connectors";
      const assistantInstructions = "Help users with their documents.";

      await page.goto("/app/agents/create");

      // Fill in basic assistant details
      await getNameInput(page).fill(agentName);
      await getDescriptionInput(page).fill(agentDescription);
      await getInstructionsTextarea(page).fill(assistantInstructions);

      // Enable Knowledge toggle
      const knowledgeToggle = getKnowledgeToggle(page);
      await knowledgeToggle.scrollIntoViewIfNeeded();
      await expect(knowledgeToggle).toHaveAttribute("aria-checked", "false");
      await knowledgeToggle.click();

      // Navigate to files view in the new Knowledge UI
      await navigateToFilesView(page);

      // Verify "Add File" button is visible in the new UI
      const addFileButton = page.getByRole("button", {
        name: /add file/i,
      });
      await expect(addFileButton).toBeVisible();

      // Submit the assistant creation form
      await getCreateSubmitButton(page).click();

      // Verify redirection to chat page with the new assistant
      await page.waitForURL(/.*\/app\?agentId=\d+.*/);
      const url = page.url();
      const agentIdMatch = url.match(/agentId=(\d+)/);
      expect(agentIdMatch).toBeTruthy();

      // Store assistant ID for cleanup
      if (agentIdMatch) {
        userFilesAssistantId = Number(agentIdMatch[1]);
      }

      console.log(
        `[test] Successfully created assistant without connectors: ${agentName}`
      );
    });
  });

  test.describe("With Knowledge", () => {
    let ccPairId: number;
    let documentSetId: number;
    let knowledgeAssistantId: number | null = null;

    test.afterAll(async ({ browser }: { browser: Browser }) => {
      // Cleanup using browser fixture (worker-scoped) to avoid per-test fixture limitation
      const context = await browser.newContext({
        storageState: "admin_auth.json",
      });
      const page = await context.newPage();
      const cleanupClient = new OnyxApiClient(page.request);

      if (knowledgeAssistantId !== null) {
        await cleanupClient.deleteAgent(knowledgeAssistantId);
      }
      if (ccPairId && documentSetId) {
        await cleanupClient.deleteDocumentSet(documentSetId);
        await cleanupClient.deleteCCPair(ccPairId);
      }

      await context.close();
      console.log(
        "[test] Cleanup completed - deleted assistant, connector, and document set"
      );
    });

    test("should create and edit assistant with Knowledge enabled", async ({
      page,
    }, testInfo) => {
      // Login as admin to create connector and document set (requires admin permissions)
      await page.context().clearCookies();
      await loginAs(page, "admin");

      // Create a connector and document set to enable the Knowledge toggle
      const onyxApiClient = new OnyxApiClient(page.request);
      ccPairId = await onyxApiClient.createFileConnector("Test Connector");
      documentSetId = await onyxApiClient.createDocumentSet(
        "Test Document Set",
        [ccPairId]
      );

      // Navigate to a page to ensure session is fully established
      await page.goto("/app");
      await page.waitForLoadState("networkidle");

      // Now login as a regular user to test the assistant creation
      await page.context().clearCookies();
      await loginAsWorkerUser(page, testInfo.workerIndex);

      // --- Initial Values ---
      const agentName = "Test Assistant 1";
      const agentDescription = "This is a test assistant description.";
      const assistantInstructions = "These are the test instructions.";
      const assistantReminder = "Initial reminder.";
      const assistantStarterMessage = "Initial starter message?";

      // --- Edited Values ---
      const editedAssistantName = "Edited Assistant";
      const editedAssistantDescription = "This is the edited description.";
      const editedAssistantInstructions = "These are the edited instructions.";
      const editedAssistantReminder = "Edited reminder.";
      const editedAssistantStarterMessage = "Edited starter message?";

      // Navigate to the assistant creation page
      await page.goto("/app/agents/create");

      // --- Fill in Initial Assistant Details ---
      await getNameInput(page).fill(agentName);
      await getDescriptionInput(page).fill(agentDescription);
      await getInstructionsTextarea(page).fill(assistantInstructions);

      // Reminder
      await getReminderTextarea(page).fill(assistantReminder);

      // Knowledge Cutoff Date
      await setKnowledgeCutoffDate(page);

      // Enable Knowledge toggle (should now be enabled due to connector)
      const knowledgeToggle = getKnowledgeToggle(page);
      await knowledgeToggle.scrollIntoViewIfNeeded();

      // Verify toggle is NOT disabled
      await expect(knowledgeToggle).not.toBeDisabled();
      await knowledgeToggle.click();

      // Navigate to document sets view and select the document set
      await navigateToDocumentSetsView(page);
      await selectDocumentSet(page, documentSetId);

      // Starter Message
      await getStarterMessageInput(page).fill(assistantStarterMessage);

      // Submit the creation form
      await getCreateSubmitButton(page).click();

      // Verify redirection to chat page with the new assistant ID
      await page.waitForURL(/.*\/app\?agentId=\d+.*/);
      const url = page.url();
      const agentIdMatch = url.match(/agentId=(\d+)/);
      expect(agentIdMatch).toBeTruthy();
      const agentId = agentIdMatch ? agentIdMatch[1] : null;
      expect(agentId).not.toBeNull();
      await expectScreenshot(page, {
        name: "welcome-page-with-assistant",
        hide: ["[data-testid='AppInputBar/llm-popover-trigger']"],
      });

      // Store assistant ID for cleanup
      knowledgeAssistantId = Number(agentId);

      // Navigate directly to the edit page
      await page.goto(`/app/agents/edit/${agentId}`);
      await page.waitForURL(`**/app/agents/edit/${agentId}`);

      // Verify basic fields
      await expect(getNameInput(page)).toHaveValue(agentName);
      await expect(getDescriptionInput(page)).toHaveValue(agentDescription);
      await expect(getInstructionsTextarea(page)).toHaveValue(
        assistantInstructions
      );

      // Verify advanced fields
      await expect(getReminderTextarea(page)).toHaveValue(assistantReminder);
      // Knowledge toggle should be enabled since we have a connector
      await expect(getKnowledgeToggle(page)).toHaveAttribute(
        "aria-checked",
        "true"
      );
      // Verify document set is selected by navigating to the document sets view
      await navigateToDocumentSetsView(page);
      const documentSetRow = page.getByLabel(
        `document-set-row-${documentSetId}`
      );
      await expect(documentSetRow).toBeVisible();
      // The row should have a checked checkbox (data-selected attribute)
      await expect(documentSetRow).toHaveAttribute("data-selected", "true");

      await expect(getStarterMessageInput(page)).toHaveValue(
        assistantStarterMessage
      );

      // --- Edit Assistant Details ---
      await getNameInput(page).fill(editedAssistantName);
      await getDescriptionInput(page).fill(editedAssistantDescription);
      await getInstructionsTextarea(page).fill(editedAssistantInstructions);
      await getReminderTextarea(page).fill(editedAssistantReminder);
      await setKnowledgeCutoffDate(page);
      await getStarterMessageInput(page).fill(editedAssistantStarterMessage);

      // Submit the edit form
      await getUpdateSubmitButton(page).click();

      // Verify redirection back to the chat page
      await page.waitForURL(/.*\/app\?agentId=\d+.*/);
      expect(page.url()).toContain(`agentId=${agentId}`);

      // --- Navigate to Edit Page Again and Verify Edited Values ---
      await page.goto(`/app/agents/edit/${agentId}`);
      await page.waitForURL(`**/app/agents/edit/${agentId}`);

      // Verify basic fields
      await expect(getNameInput(page)).toHaveValue(editedAssistantName);
      await expect(getDescriptionInput(page)).toHaveValue(
        editedAssistantDescription
      );
      await expect(getInstructionsTextarea(page)).toHaveValue(
        editedAssistantInstructions
      );

      // Verify advanced fields
      await expect(getReminderTextarea(page)).toHaveValue(
        editedAssistantReminder
      );
      await expect(getKnowledgeToggle(page)).toHaveAttribute(
        "aria-checked",
        "true"
      );
      // Verify document set is still selected after edit
      await navigateToDocumentSetsView(page);
      const documentSetRowAfterEdit = page.getByLabel(
        `document-set-row-${documentSetId}`
      );
      await expect(documentSetRowAfterEdit).toBeVisible();
      await expect(documentSetRowAfterEdit).toHaveAttribute(
        "data-selected",
        "true"
      );

      await expect(getStarterMessageInput(page)).toHaveValue(
        editedAssistantStarterMessage
      );

      console.log(
        `[test] Successfully tested Knowledge-enabled assistant: ${agentName}`
      );
    });
  });
});


================================================
FILE: web/tests/e2e/agents/llm_provider_rbac.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { Page } from "@playwright/test";
import { loginAsRandomUser, loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

/**
 * This test verifies that LLM Provider RBAC works correctly in the assistant editor.
 *
 * Test scenario:
 * 1. Create a restricted LLM provider (not public, assigned to specific group)
 * 2. Create a user who doesn't have access to the restricted provider
 * 3. Navigate to assistant creation page
 * 4. Verify the restricted provider doesn't appear in the LLM selector
 */

const getDefaultModelSelector = (page: Page) =>
  page
    .locator(
      'button:has-text("User Default"), button:has-text("System Default")'
    )
    .first();

const getLLMProviderOptions = async (page: Page) => {
  // Click the selector to open the dropdown
  await getDefaultModelSelector(page).click();

  // Wait for the dropdown to be visible
  await page.waitForSelector('[role="option"]', { state: "visible" });

  // Get all visible options
  const options = await page.locator('[role="option"]').allTextContents();

  // Close the dropdown by clicking elsewhere
  await page.keyboard.press("Escape");

  return options;
};

test("Restricted LLM Provider should not appear for unauthorized users", async ({
  page,
}) => {
  await page.context().clearCookies();

  // Step 1: Login as admin to create test fixtures
  await loginAs(page, "admin");
  await page.waitForLoadState("networkidle");

  // Step 2: Create a user group that will have access to the restricted provider
  const restrictedGroupName = `Restricted Group ${Date.now()}`;
  let groupId: number | null = null;
  let providerId: number | null = null;

  const client = new OnyxApiClient(page.request);

  try {
    groupId = await client.createUserGroup(restrictedGroupName);
    console.log(`Created user group with ID: ${groupId}`);

    // Step 3: Create a restricted LLM provider assigned to that group
    const restrictedProviderName = `Restricted Provider ${Date.now()}`;
    providerId = await client.createRestrictedProvider(
      restrictedProviderName,
      groupId
    );
    console.log(
      `Created restricted provider "${restrictedProviderName}" with ID: ${providerId}`
    );

    // Step 4: Logout and login as a random user (who won't be in the restricted group)
    await page.context().clearCookies();
    await loginAsRandomUser(page);

    // Step 5: Navigate to the assistant creation page
    await page.goto("/app/agents/create");
    await page.waitForLoadState("networkidle");

    // Step 6: Scroll to the Default Model section
    const defaultModelSection = page.locator("text=Default Model").first();
    await defaultModelSection.scrollIntoViewIfNeeded();

    // Step 7: Get all available LLM provider options
    const llmOptions = await getLLMProviderOptions(page);

    // Step 8: Verify that we have some options (at least the default provider)
    expect(llmOptions.length).toBeGreaterThan(0);

    // Step 9: Verify the restricted provider does NOT appear
    const hasRestrictedProvider = llmOptions.some((option) =>
      option.includes(restrictedProviderName)
    );
    expect(hasRestrictedProvider).toBe(false);

    // Step 10: Verify that default/public providers DO appear
    const hasDefaultOption = llmOptions.some(
      (option) =>
        option.includes("Default") ||
        option.includes("GPT") ||
        option.includes("Claude")
    );
    expect(hasDefaultOption).toBe(true);

    console.log(
      `✓ Verified restricted provider "${restrictedProviderName}" does not appear for unauthorized user`
    );
  } finally {
    // Cleanup: Login as admin again to delete test fixtures
    await page.context().clearCookies();
    await loginAs(page, "admin");
    await page.waitForLoadState("networkidle");

    if (providerId) {
      await client.deleteProvider(providerId);
      console.log(`Deleted provider with ID: ${providerId}`);
    }

    if (groupId) {
      await client.deleteUserGroup(groupId);
      console.log(`Deleted user group with ID: ${groupId}`);
    }
  }
});

test("Default Model selector shows available models", async ({ page }) => {
  await page.context().clearCookies();
  await loginAsRandomUser(page);

  // Navigate to the assistant creation page
  await page.goto("/app/agents/create");
  await page.waitForLoadState("networkidle");

  // Scroll to the Default Model section
  const defaultModelSection = page.locator("text=Default Model").first();
  await defaultModelSection.scrollIntoViewIfNeeded();

  // Open the model selector
  await getDefaultModelSelector(page).click();
  await page.waitForSelector('[role="option"]', { state: "visible" });

  // Get all options
  const options = await page.locator('[role="option"]').allTextContents();

  // Close dropdown
  await page.keyboard.press("Escape");

  // Verify we have at least the default option
  expect(options.length).toBeGreaterThan(0);

  // Verify the default/system default option exists
  const hasDefaultOption = options.some((option) =>
    option.toLowerCase().includes("default")
  );
  expect(hasDefaultOption).toBeTruthy();
});


================================================
FILE: web/tests/e2e/agents/user_file_attachment.spec.ts
================================================
import { test, expect, Page } from "@playwright/test";
import { loginAsRandomUser } from "@tests/e2e/utils/auth";

/**
 * E2E test to verify user files are properly attached to assistants.
 *
 * This test prevents a regression where user_file_ids were not being saved
 * when creating an assistant, causing uploaded files to not be associated
 * with the persona in the database.
 */

// --- Locator Helper Functions ---
const getNameInput = (page: Page) => page.locator('input[name="name"]');
const getDescriptionInput = (page: Page) =>
  page.locator('textarea[name="description"]');
const getInstructionsTextarea = (page: Page) =>
  page.locator('textarea[name="instructions"]');
const getKnowledgeToggle = (page: Page) =>
  page.locator('button[role="switch"][name="enable_knowledge"]');
const getCreateSubmitButton = (page: Page) =>
  page.locator('button[type="submit"]:has-text("Create")');

const extractAssistantIdFromCreateResponse = (
  payload: Record<string, unknown> | null
): number | null => {
  if (!payload) {
    return null;
  }
  const rawId = payload.id ?? payload.assistant_id ?? payload.persona_id;
  if (typeof rawId === "number" && Number.isFinite(rawId)) {
    return rawId;
  }
  if (typeof rawId === "string") {
    const parsed = Number(rawId);
    if (Number.isFinite(parsed)) {
      return parsed;
    }
  }
  return null;
};

const createAgentAndGetId = async (page: Page): Promise<number> => {
  const createResponsePromise = page.waitForResponse(
    (response) => {
      if (response.request().method() !== "POST" || !response.ok()) {
        return false;
      }
      try {
        const pathname = new URL(response.url()).pathname;
        return /^\/api\/persona\/?$/.test(pathname);
      } catch {
        return false;
      }
    },
    { timeout: 30000 }
  );

  await getCreateSubmitButton(page).click();

  const createResponse = await createResponsePromise;

  await page.waitForURL(
    (url) => {
      const href = typeof url === "string" ? url : url.toString();
      return /\/app\?agentId=\d+/.test(href) || /\/app\?chatId=/.test(href);
    },
    { timeout: 20000 }
  );

  const agentIdFromUrl = page.url().match(/agentId=(\d+)/);
  if (agentIdFromUrl?.[1]) {
    return Number(agentIdFromUrl[1]);
  }

  const createPayload = (await createResponse
    .json()
    .catch(() => null)) as Record<string, unknown> | null;
  const agentIdFromResponse =
    extractAssistantIdFromCreateResponse(createPayload);
  if (agentIdFromResponse !== null) {
    return agentIdFromResponse;
  }

  throw new Error(
    `Assistant ID missing from URL (${page.url()}) and create response payload`
  );
};

// Helper to navigate to files view in the Knowledge UI
const navigateToFilesView = async (page: Page) => {
  // Check if we need to click "View / Edit" or "Add" button to open the knowledge panel
  const viewEditButton = page.getByLabel("knowledge-view-edit");
  const addButton = page.getByLabel("knowledge-add-button");

  if (await viewEditButton.isVisible()) {
    await viewEditButton.click();
  } else if (await addButton.isVisible()) {
    await addButton.click();
  }

  // Click on "Your Files" in the add view or sidebar
  const filesButton = page.getByLabel("knowledge-add-files");
  if (await filesButton.isVisible()) {
    await filesButton.click();
  } else {
    // Try the sidebar version
    const sidebarFiles = page.getByLabel("knowledge-sidebar-files");
    if (await sidebarFiles.isVisible()) {
      await sidebarFiles.click();
    }
  }

  // Wait for the files table to appear
  await page.waitForTimeout(500);
};

// Helper to upload a file through the knowledge panel
async function uploadTestFile(
  page: Page,
  fileName: string,
  content: string,
  maxRetries: number = 3
): Promise<string> {
  const buffer = Buffer.from(content, "utf-8");

  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      console.log(`[test] Upload attempt ${attempt} for ${fileName}`);

      // Find the Add File button
      const addFileButton = page.getByRole("button", { name: /add file/i });
      await expect(addFileButton).toBeVisible({ timeout: 5000 });
      await expect(addFileButton).toBeEnabled({ timeout: 5000 });

      // Set up file chooser listener before clicking
      const fileChooserPromise = page.waitForEvent("filechooser", {
        timeout: 5000,
      });
      await addFileButton.click();
      const fileChooser = await fileChooserPromise;

      // Wait for upload API completion to avoid racing the UI refresh.
      const uploadResponsePromise = page.waitForResponse(
        (response) =>
          response.url().includes("/api/user/projects/file/upload") &&
          response.request().method() === "POST",
        { timeout: 15000 }
      );

      // Upload the file
      await fileChooser.setFiles({
        name: fileName,
        mimeType: "text/plain",
        buffer: buffer,
      });
      const uploadResponse = await uploadResponsePromise;
      expect(uploadResponse.ok()).toBeTruthy();

      // Wait for network to settle after upload
      await page.waitForLoadState("networkidle", { timeout: 10000 });

      // Wait a moment for the UI to update
      await page.waitForTimeout(500);

      // Wait for the uploaded file row to appear.
      const fileRow = page
        .locator('[aria-label^="user-file-row-"]')
        .filter({ hasText: fileName })
        .first();
      await expect(fileRow).toBeVisible({ timeout: 10000 });

      console.log(`[test] Successfully uploaded ${fileName}`);

      // Return the file name for verification later
      return fileName;
    } catch (error) {
      console.log(
        `[test] Upload attempt ${attempt} failed: ${
          error instanceof Error ? error.message : "unknown error"
        }`
      );
      if (attempt === maxRetries) {
        throw error;
      }
      await page.waitForTimeout(1000);
    }
  }

  throw new Error(
    `Failed to upload file ${fileName} after ${maxRetries} attempts`
  );
}

// Helper to select a file by clicking its row
async function selectFileByName(page: Page, fileName: string): Promise<void> {
  const fileNameWithoutExt = fileName.replace(".txt", "");

  // Try to find and click the row containing the file name
  // First try by aria-label
  let fileRow = page.locator(`[aria-label^="user-file-row-"]`, {
    has: page.locator(`text=${fileNameWithoutExt}`),
  });

  if ((await fileRow.count()) === 0) {
    // Fall back to finding by table-row-layout class
    fileRow = page.locator("[data-selected]", {
      has: page.locator(`text=${fileNameWithoutExt}`),
    });
  }

  if ((await fileRow.count()) === 0) {
    // Last resort: find any clickable row with the file name
    fileRow = page
      .locator("div", {
        has: page.locator(`text=${fileNameWithoutExt}`),
      })
      .filter({
        has: page.locator('[role="checkbox"], input[type="checkbox"]'),
      })
      .first();
  }

  if ((await fileRow.count()) > 0) {
    await fileRow.click();
  } else {
    // Just click on the file name text itself
    await page.locator(`text=${fileNameWithoutExt}`).first().click();
  }

  // Wait for the selection to register
  await page.waitForTimeout(300);
  console.log(`[test] Selected file: ${fileName}`);
}

test.describe("User File Attachment to Assistant", () => {
  // Run serially to avoid session conflicts between parallel workers
  test.describe.configure({ mode: "serial", retries: 1 });

  test("should persist user file attachment after creating assistant", async ({
    page,
  }: {
    page: Page;
  }) => {
    // Login as a random user (no admin needed for user files)
    await page.context().clearCookies();
    await loginAsRandomUser(page);

    const agentName = `User File Test ${Date.now()}`;
    const agentDescription = "Testing user file persistence";
    const assistantInstructions = "Help users with their uploaded files.";
    const testFileName = `test-file-${Date.now()}.txt`;
    const testFileContent =
      "This is test content for the user file attachment test.";

    // Navigate to assistant creation page
    await page.goto("/app/agents/create");
    await page.waitForLoadState("networkidle");

    // Fill in basic assistant details
    await getNameInput(page).fill(agentName);
    await getDescriptionInput(page).fill(agentDescription);
    await getInstructionsTextarea(page).fill(assistantInstructions);

    // Enable Knowledge toggle
    const knowledgeToggle = getKnowledgeToggle(page);
    await knowledgeToggle.scrollIntoViewIfNeeded();
    await expect(knowledgeToggle).toHaveAttribute("aria-checked", "false");
    await knowledgeToggle.click();
    await expect(knowledgeToggle).toHaveAttribute("aria-checked", "true");

    // Navigate to files view in the Knowledge UI
    await navigateToFilesView(page);

    // Upload a test file - this automatically adds it to user_file_ids
    await uploadTestFile(page, testFileName, testFileContent);

    // NOTE: We do NOT call selectFileByName here because uploadTestFile
    // already adds the file to user_file_ids. Clicking again would toggle it OFF.

    // Verify file appears in the UI (use first() since file may appear in multiple places)
    const fileText = page.getByText(testFileName).first();
    await expect(fileText).toBeVisible();

    // Submit the assistant creation form and resolve assistant ID from URL or API response.
    const agentId = await createAgentAndGetId(page);

    console.log(
      `[test] Created assistant ${agentName} with ID ${agentId}, now verifying file persistence...`
    );

    // Navigate to the edit page for the assistant
    await page.goto(`/app/agents/edit/${agentId}`);
    await page.waitForURL(`**/app/agents/edit/${agentId}`);
    await page.waitForLoadState("networkidle");

    // Verify knowledge toggle is still enabled
    await expect(getKnowledgeToggle(page)).toHaveAttribute(
      "aria-checked",
      "true"
    );

    // Navigate to files view
    await navigateToFilesView(page);

    // Wait for files to load
    await page.waitForTimeout(1000);

    // Verify the uploaded file still appears and is selected
    const fileNameWithoutExt = testFileName.replace(".txt", "");
    const fileTextAfterEdit = page
      .locator(`text=${fileNameWithoutExt}`)
      .first();
    await expect(fileTextAfterEdit).toBeVisible({ timeout: 10000 });

    // Wait for UI to fully render the selection state
    await page.waitForTimeout(500);

    // Verify the file row has data-selected="true" (indicating it's attached to the assistant)
    // This confirms: user_file_ids were saved when creating the assistant,
    // and they're correctly loaded and displayed when editing
    const fileRowAfterEdit = page.locator("[data-selected='true']", {
      has: page.locator(`text=${fileNameWithoutExt}`),
    });

    await expect(fileRowAfterEdit).toBeVisible({ timeout: 5000 });

    console.log(
      `[test] Successfully verified user file ${testFileName} is persisted and selected for assistant ${agentName}`
    );
  });

  test("should persist multiple user files after editing assistant", async ({
    page,
  }: {
    page: Page;
  }) => {
    // Login as a random user
    await page.context().clearCookies();
    await loginAsRandomUser(page);

    const agentName = `Multi-File Test ${Date.now()}`;
    const testFileName1 = `test-file-1-${Date.now()}.txt`;
    const testFileName2 = `test-file-2-${Date.now()}.txt`;
    const testFileContent = "Test content for multi-file test.";

    // Navigate to assistant creation page
    await page.goto("/app/agents/create");
    await page.waitForLoadState("networkidle");

    // Fill in basic assistant details
    await getNameInput(page).fill(agentName);
    await getDescriptionInput(page).fill("Testing multiple user files");
    await getInstructionsTextarea(page).fill("Help with multiple files.");

    // Enable Knowledge toggle
    const knowledgeToggle = getKnowledgeToggle(page);
    await knowledgeToggle.scrollIntoViewIfNeeded();
    await knowledgeToggle.click();

    // Navigate to files view
    await navigateToFilesView(page);

    // Upload first file - automatically adds to user_file_ids
    await uploadTestFile(page, testFileName1, testFileContent);

    // Upload second file - automatically adds to user_file_ids
    await uploadTestFile(page, testFileName2, testFileContent);

    // NOTE: We do NOT call selectFileByName because uploadTestFile
    // already adds files to user_file_ids. Clicking would toggle them OFF.

    // Create the assistant and resolve assistant ID from URL or API response.
    const agentId = await createAgentAndGetId(page);

    // Go to edit page
    await page.goto(`/app/agents/edit/${agentId}`);
    await page.waitForLoadState("networkidle");

    // Navigate to files view
    await navigateToFilesView(page);

    // Wait for files to load
    await page.waitForTimeout(1000);

    // Verify both files are visible and selected
    // This confirms: user_file_ids were saved when creating the assistant,
    // and they're correctly loaded and displayed when editing
    for (const fileName of [testFileName1, testFileName2]) {
      const fileNameWithoutExt = fileName.replace(".txt", "");
      const fileText = page.locator(`text=${fileNameWithoutExt}`).first();
      await expect(fileText).toBeVisible({ timeout: 10000 });

      // Verify the file is selected (data-selected="true")
      const fileRow = page.locator("[data-selected='true']", {
        has: page.locator(`text=${fileNameWithoutExt}`),
      });
      await expect(fileRow).toBeVisible({ timeout: 5000 });
    }

    console.log(
      `[test] Successfully verified multiple user files are persisted for assistant ${agentName}`
    );
  });
});


================================================
FILE: web/tests/e2e/auth/email_verification.spec.ts
================================================
/**
 * E2E Test: Email Verification Success Flow
 * Tests that the login page displays verification success message when redirected from email verification
 */
import { test, expect } from "@playwright/test";

test("Login page shows verification success message after email verification", async ({
  page,
}) => {
  // Clear cookies so we hit the login page as an unauthenticated user
  await page.context().clearCookies();

  // Navigate to login page with verified=true query param (simulating redirect from email verification)
  await page.goto("/auth/login?verified=true");
  await page.waitForLoadState("networkidle");

  // Verify the success message is visible
  await expect(
    page.getByText("Your email has been verified! Please sign in to continue.")
  ).toBeVisible();

  // Verify normal login page elements are still present
  await expect(page.getByTestId("email")).toBeVisible();
  await expect(page.getByTestId("password")).toBeVisible();
});


================================================
FILE: web/tests/e2e/auth/login.spec.ts
================================================
import { test, expect } from "@playwright/test";
import {
  TEST_ADMIN_CREDENTIALS,
  workerUserCredentials,
} from "@tests/e2e/constants";
import { expectScreenshot } from "@tests/e2e/utils/visualRegression";

// These tests exercise the browser login UI.
// They clear cookies to start unauthenticated, then drive the login form.

test.describe("Login flow", () => {
  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
  });

  test("Login page renders email and password fields", async ({ page }) => {
    await page.goto("/auth/login");
    await page.waitForLoadState("networkidle");

    await expect(page.getByTestId("email")).toBeVisible();
    await expect(page.getByTestId("password")).toBeVisible();
    await expect(page.getByRole("button", { name: "Sign In" })).toBeVisible();

    // Capture the login page UI
    await expectScreenshot(page, { name: "login-page-initial" });
  });

  test("User can log in with valid credentials", async ({ page }) => {
    const { email, password } = TEST_ADMIN_CREDENTIALS;

    await page.goto("/auth/login");
    await page.waitForLoadState("networkidle");

    await page.getByTestId("email").fill(email);
    await page.getByTestId("password").fill(password);
    await page.getByRole("button", { name: "Sign In" }).click();

    await expect(page).toHaveURL(/\/app/);

    // Verify the session is valid
    const me = await page.request.get("/api/me");
    expect(me.ok()).toBe(true);
    const body = await me.json();
    expect(body.email).toBe(email);
  });

  test("Login fails with invalid password", async ({ page }) => {
    await page.goto("/auth/login");
    await page.waitForLoadState("networkidle");

    await page.getByTestId("email").fill(workerUserCredentials(0).email);
    await page.getByTestId("password").fill("WrongPassword123!");
    await page.getByRole("button", { name: "Sign In" }).click();

    // Wait for error message to appear (use exact match to avoid duplicate selector)
    await expect(
      page.getByText("Invalid email or password", { exact: true })
    ).toBeVisible();

    // Capture the error state
    await expectScreenshot(page, { name: "login-invalid-password-error" });

    // Should stay on the login page
    await expect(page).toHaveURL(/\/auth\/login/);

    // Should not be authenticated
    const me = await page.request.get("/api/me");
    expect(me.ok()).toBe(false);
  });

  test("Login fails with non-existent user", async ({ page }) => {
    await page.goto("/auth/login");
    await page.waitForLoadState("networkidle");

    await page.getByTestId("email").fill("nonexistent@example.com");
    await page.getByTestId("password").fill("SomePassword123!");
    await page.getByRole("button", { name: "Sign In" }).click();

    // Wait for error message to appear (use exact match to avoid duplicate selector)
    await expect(
      page.getByText("Invalid email or password", { exact: true })
    ).toBeVisible();

    // Capture the error state
    await expectScreenshot(page, { name: "login-nonexistent-user-error" });

    // Should stay on the login page
    await expect(page).toHaveURL(/\/auth\/login/);
  });
});


================================================
FILE: web/tests/e2e/auth/password_managements.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAsRandomUser, loginAs } from "@tests/e2e/utils/auth";
import {
  TEST_ADMIN2_CREDENTIALS,
  TEST_ADMIN_CREDENTIALS,
} from "@tests/e2e/constants";

// test("User changes password and logs in with new password", async ({

// Skip this test for now
test.skip("User changes password and logs in with new password", async ({
  page,
}) => {
  // Clear browser context before starting the test
  await page.context().clearCookies();
  await page.context().clearPermissions();

  const { email: uniqueEmail, password: initialPassword } =
    await loginAsRandomUser(page);
  const newPassword = "newPassword456!";

  // Navigate to user settings
  await page.click("#onyx-user-dropdown");
  await page.getByText("User Settings").click();
  await page.getByRole("button", { name: "Password" }).click();

  // Change password
  await page.getByLabel("Current Password").fill(initialPassword);
  await page.getByLabel("New Password", { exact: true }).fill(newPassword);
  await page.getByLabel("Confirm New Password").fill(newPassword);
  await page.getByRole("button", { name: "Change Password" }).click();

  // Verify password change success message
  await expect(page.getByText("Password changed successfully")).toBeVisible();

  // Log out
  await page.getByRole("button", { name: "Close modal", exact: true }).click();
  await page.click("#onyx-user-dropdown");
  await page.getByText("Log out").click();

  // Log in with new password
  await page.goto("/auth/login");
  await page.getByTestId("email").fill(uniqueEmail);
  await page.getByTestId("password").fill(newPassword);
  await page.getByRole("button", { name: "Log In" }).click();

  // Verify successful login
  await expect(page).toHaveURL("http://localhost:3000/app");
  await expect(page.getByText("Explore Agents")).toBeVisible();
});

test.use({ storageState: "admin2_auth.json" });

// Skip this test for now
test.skip("Admin resets own password and logs in with new password", async ({
  page,
}) => {
  const { email: adminEmail, password: adminPassword } =
    TEST_ADMIN2_CREDENTIALS;
  // Navigate to admin panel
  await page.goto("/admin/indexing/status");

  // Check if redirected to login page
  if (page.url().includes("/auth/login")) {
    await loginAs(page, "admin2");
  }

  // Navigate to Users page in admin panel
  await page.goto("/admin/users");

  await page.waitForTimeout(500);
  // Find the admin user and click on it
  // Log current URL
  console.log("Current URL:", page.url());
  // Log current rows
  const rows = await page.$$eval("tr", (rows) =>
    rows.map((row) => row.textContent)
  );
  console.log("Current rows:", rows);

  // Log admin email we're looking for
  console.log("Admin email:", adminEmail);

  // Attempt to find and click the row
  await page
    .getByRole("row", { name: adminEmail + " Active" })
    .getByRole("button")
    .click();

  await page.waitForTimeout(500);
  // Reset password
  await page.getByRole("button", { name: "Reset Password" }).click();
  await page.getByRole("button", { name: "Reset Password" }).click();

  // Copy the new password
  const newPasswordElement = page.getByTestId("new-password");
  const newPassword = await newPasswordElement.textContent();
  if (!newPassword) {
    throw new Error("New password not found");
  }

  // Close the modal
  await page.getByLabel("Close modal").click();

  // Log out
  await page.click("#onyx-user-dropdown");
  await page.getByText("Log out").click();

  // Log in with new password
  await page.goto("/auth/login");
  await page.getByTestId("email").fill(adminEmail);
  await page.getByTestId("password").fill(newPassword);

  await page.getByRole("button", { name: "Log In" }).click();

  // Verify successful login
  await expect(page).toHaveURL("http://localhost:3000/app");
  await expect(page.getByText("Explore Agents")).toBeVisible();
});


================================================
FILE: web/tests/e2e/auth/pat_management.spec.ts
================================================
/**
 * E2E Test: Personal Access Token (PAT) Management
 * Tests complete user flow: login → create → authenticate → delete
 */
import { test, expect } from "@playwright/test";
import { loginAsRandomUser } from "@tests/e2e/utils/auth";

test("PAT Complete Workflow", async ({ page }, testInfo) => {
  // Skip in admin project - we test with fresh user auth
  test.skip(
    testInfo.project.name === "admin",
    "Test requires clean user auth state"
  );

  await page.context().clearCookies();
  const { email } = await loginAsRandomUser(page);

  await page.goto("/app");
  await page.waitForLoadState("networkidle");

  // Click on user dropdown and open settings (same pattern as other tests)
  await page.locator("#onyx-user-dropdown").click();
  await page.getByText("User Settings").first().click();

  // Wait for settings modal to appear (first page has "Full Name" section)
  await expect(page.getByText("Full Name")).toBeVisible();

  await page
    .locator('a[href="/app/settings/accounts-access"]')
    .click({ force: true });

  // Wait for PAT page to load (button is unique to the PAT section)
  await expect(page.locator('button:has-text("New Access Token")')).toBeVisible(
    {
      timeout: 10000,
    }
  );

  await page.locator('button:has-text("New Access Token")').first().click();

  const tokenName = `E2E Test Token ${Date.now()}`;
  const nameInput = page
    .locator('input[placeholder*="Name your token"]')
    .first();
  await nameInput.fill(tokenName);

  // Click the Radix UI combobox for expiration (not a select element)
  const expirationCombobox = page.locator(
    'button[role="combobox"][aria-label*="expiration"]'
  );
  if (await expirationCombobox.isVisible()) {
    await expirationCombobox.click();
    // Wait for dropdown and select 7 days option using role=option
    await page.getByRole("option", { name: "7 days" }).click();
  }

  await page.locator('button:has-text("Create Token")').first().click();

  const tokenDisplay = page
    .locator("code")
    .filter({ hasText: "onyx_pat_" })
    .first();
  await tokenDisplay.waitFor({ state: "visible", timeout: 5000 });

  const tokenValue = await tokenDisplay.textContent();
  expect(tokenValue).toContain("onyx_pat_");

  // Grant clipboard permissions before copying
  await page.context().grantPermissions(["clipboard-read", "clipboard-write"]);

  // Copy the newly created token (button is inside .code-copy-button)
  await page.locator(".code-copy-button button").click();

  // Wait a moment for clipboard to be written and verify
  await page.waitForTimeout(500);
  const clipboardText = await page.evaluate(() =>
    navigator.clipboard.readText()
  );
  expect(clipboardText).toBe(tokenValue);

  await page.locator('button:has-text("Done")').first().click();
  await expect(page.getByText(tokenName).first()).toBeVisible({
    timeout: 5000,
  });

  // Test the PAT token works by making an API request in a new context (no session cookies)
  const testContext = await page.context().browser()!.newContext();
  const apiResponse = await testContext.request.get(
    "http://localhost:3000/api/me",
    {
      headers: {
        Authorization: `Bearer ${tokenValue}`,
      },
    }
  );
  expect(apiResponse.ok()).toBeTruthy();
  const userData = await apiResponse.json();
  expect(userData.email).toBe(email);
  await testContext.close();

  // Find and click the delete button using the aria-label with token name
  const deleteButton = page.locator(
    `button[aria-label="Delete token ${tokenName}"]`
  );
  await deleteButton.click();

  const confirmButton = page.locator('button:has-text("Revoke")').first();
  await confirmButton.waitFor({ state: "visible", timeout: 3000 });
  await confirmButton.click();

  // Wait for the modal to close (it contains the token name in its text)
  await expect(confirmButton).not.toBeVisible({ timeout: 3000 });

  // Now verify the token is no longer in the list
  await expect(page.locator(`p:text-is("${tokenName}")`)).not.toBeVisible({
    timeout: 5000,
  });

  // Create a new context without cookies to test the revoked token
  const newContext = await page.context().browser()!.newContext();
  const revokedApiResponse = await newContext.request.get(
    "http://localhost:3000/api/me",
    {
      headers: {
        Authorization: `Bearer ${tokenValue}`,
      },
    }
  );
  await newContext.close();
  // Revoked tokens return 403 Forbidden (as per backend tests)
  expect(revokedApiResponse.status()).toBe(403);
});

test("PAT Multiple Tokens Management", async ({ page }, testInfo) => {
  // Skip in admin project - we test with fresh user auth
  test.skip(
    testInfo.project.name === "admin",
    "Test requires clean user auth state"
  );

  await page.context().clearCookies();
  await loginAsRandomUser(page);

  await page.goto("/app");
  await page.waitForLoadState("networkidle");

  // Click on user dropdown and open settings (same pattern as other tests)
  await page.locator("#onyx-user-dropdown").click();
  await page.getByText("User Settings").first().click();

  // Wait for settings modal to appear (first page has "Full Name" section)
  await expect(page.getByText("Full Name")).toBeVisible();

  await page
    .locator('a[href="/app/settings/accounts-access"]')
    .click({ force: true });

  // Wait for PAT page to load (button is unique to the PAT section)
  await expect(page.locator('button:has-text("New Access Token")')).toBeVisible(
    {
      timeout: 10000,
    }
  );

  const tokens = [
    { name: `Token 1 - ${Date.now()}`, expiration: "7 days" },
    { name: `Token 2 - ${Date.now() + 1}`, expiration: "30 days" },
    { name: `Token 3 - ${Date.now() + 2}`, expiration: "No expiration" },
  ];

  for (const token of tokens) {
    // Click "New Access Token" button to open the modal
    await page.locator('button:has-text("New Access Token")').first().click();

    // Fill in the token name
    const nameInput = page
      .locator('input[placeholder*="Name your token"]')
      .first();
    await nameInput.fill(token.name);

    // Click the Radix UI combobox for expiration (not a select element)
    const expirationCombobox = page.locator(
      'button[role="combobox"][aria-label*="expiration"]'
    );
    if (await expirationCombobox.isVisible()) {
      await expirationCombobox.click();
      // Wait for dropdown and select the option using role=option
      await page.getByRole("option", { name: token.expiration }).click();
    }

    // Create the token
    await page.locator('button:has-text("Create Token")').first().click();

    // Wait for token to be created (code block with token appears)
    await page
      .locator("code")
      .filter({ hasText: "onyx_pat_" })
      .first()
      .waitFor({ state: "visible", timeout: 5000 });

    // Close the modal by clicking "Done"
    await page.locator('button:has-text("Done")').first().click();

    // Wait for token to appear in the list
    await expect(page.getByText(token.name).first()).toBeVisible({
      timeout: 5000,
    });
  }

  // Verify all tokens are visible in the list
  for (const token of tokens) {
    await expect(page.getByText(token.name).first()).toBeVisible();
  }

  // Delete the second token using its aria-label
  const deleteButton = page.locator(
    `button[aria-label="Delete token ${tokens[1]!.name}"]`
  );
  await deleteButton.click();

  // Click "Revoke" to confirm deletion
  const confirmButton = page.locator('button:has-text("Revoke")').first();
  await confirmButton.waitFor({ state: "visible", timeout: 3000 });
  await confirmButton.click();

  // Wait for the modal to close
  await expect(confirmButton).not.toBeVisible({ timeout: 3000 });

  // Now verify the deleted token is no longer in the list
  await expect(page.getByText(tokens[1]!.name)).not.toBeVisible({
    timeout: 5000,
  });

  // Verify the other two tokens are still visible
  await expect(page.getByText(tokens[0]!.name).first()).toBeVisible();
  await expect(page.getByText(tokens[2]!.name).first()).toBeVisible();
});


================================================
FILE: web/tests/e2e/auth/signup.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { expectScreenshot } from "@tests/e2e/utils/visualRegression";

// These tests exercise the signup (user registration) flow.
// They clear cookies to start unauthenticated, then drive the signup form.

test.describe("Signup flow", () => {
  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
  });

  test("Signup page renders correctly", async ({ page }) => {
    await page.goto("/auth/signup");
    await page.waitForLoadState("networkidle");

    // Verify form elements are present
    await expect(page.getByTestId("email")).toBeVisible();
    await expect(page.getByTestId("password")).toBeVisible();
    await expect(
      page.getByRole("button", { name: "Create account" })
    ).toBeVisible();

    // Capture the initial signup page
    await expectScreenshot(page, { name: "signup-page-initial" });
  });

  test("User can create a new account", async ({ page }) => {
    // Generate a unique email for this test
    const uniqueEmail = `testuser_${Date.now()}@example.com`;
    const password = "NewUserPassword123!";

    await page.goto("/auth/signup");
    await page.waitForLoadState("networkidle");

    await page.getByTestId("email").fill(uniqueEmail);
    await page.getByTestId("password").fill(password);
    await page.getByRole("button", { name: "Create account" }).click();

    // Should redirect to the app page after successful signup
    await expect(page).toHaveURL(/\/app/, { timeout: 10000 });

    // Verify the session is valid and user is logged in
    const me = await page.request.get("/api/me");
    expect(me.ok()).toBe(true);
    const body = await me.json();
    expect(body.email).toBe(uniqueEmail);
  });

  test("Signup fails with weak password", async ({ page }) => {
    await page.goto("/auth/signup");
    await page.waitForLoadState("networkidle");

    await page.getByTestId("email").fill("newuser@example.com");
    await page.getByTestId("password").fill("weak"); // Too short

    // Trigger validation by blurring the password field
    await page.getByTestId("password").blur();

    // Wait for validation error to appear
    await expect(
      page.getByText(/must be at least 8 characters/i)
    ).toBeVisible();

    // Verify submit button is disabled
    await expect(
      page.getByRole("button", { name: "Create account" })
    ).toBeDisabled();

    // Capture the validation error state
    await expectScreenshot(page, { name: "signup-weak-password-error" });

    // Should stay on the signup page
    await expect(page).toHaveURL(/\/auth\/signup/);
  });

  test("Signup fails with existing email", async ({ page }) => {
    // Use an email that already exists (from global-setup)
    const existingEmail = "admin_user@example.com";

    await page.goto("/auth/signup");
    await page.waitForLoadState("networkidle");

    await page.getByTestId("email").fill(existingEmail);
    await page.getByTestId("password").fill("SomePassword123!");
    await page.getByRole("button", { name: "Create account" }).click();

    // Wait for error message to appear
    await expect(
      page.getByText("An account already exists with the specified email.", {
        exact: true,
      })
    ).toBeVisible();

    // Capture the error state
    await expectScreenshot(page, { name: "signup-existing-email-error" });

    // Should stay on the signup page
    await expect(page).toHaveURL(/\/auth\/signup/);

    // Should not be authenticated as the existing user
    const me = await page.request.get("/api/me");
    expect(me.ok()).toBe(false);
  });

  test("Signup fails with invalid email format", async ({ page }) => {
    await page.goto("/auth/signup");
    await page.waitForLoadState("networkidle");

    await page.getByTestId("email").fill("notavalidemail");
    await page.getByTestId("password").fill("ValidPassword123!");

    // Trigger validation by blurring the email field
    await page.getByTestId("email").blur();

    // Verify submit button is disabled
    await expect(
      page.getByRole("button", { name: "Create account" })
    ).toBeDisabled();

    // Capture the validation error state
    await expectScreenshot(page, { name: "signup-invalid-email-error" });

    // Should stay on the signup page
    await expect(page).toHaveURL(/\/auth\/signup/);
  });

  test("Signup fails with disposable email address", async ({ page }) => {
    // Use a disposable email domain from the fallback list
    const disposableEmail = `testuser_${Date.now()}@mailinator.com`;

    await page.goto("/auth/signup");
    await page.waitForLoadState("networkidle");

    await page.getByTestId("email").fill(disposableEmail);
    await page.getByTestId("password").fill("ValidPassword123!");
    await page.getByRole("button", { name: "Create account" }).click();

    // Wait for error message to appear
    await expect(
      page.getByText("Disposable email addresses are not allowed").first()
    ).toBeVisible();

    // Capture the error state with hidden email to avoid non-deterministic diffs
    await expectScreenshot(page, {
      name: "signup-disposable-email-error",
      mask: ["[data-testid='email']"],
    });

    // Should stay on the signup page
    await expect(page).toHaveURL(/\/auth\/signup/);

    // Should not be authenticated
    const me = await page.request.get("/api/me");
    expect(me.ok()).toBe(false);
  });

  test("Login link navigates to login page", async ({ page }) => {
    await page.goto("/auth/signup");
    await page.waitForLoadState("networkidle");

    // Find and click the login link
    const loginLink = page.getByRole("link", { name: /sign in/i });
    await expect(loginLink).toBeVisible();
    await loginLink.click();

    // Should navigate to login page
    await expect(page).toHaveURL(/\/auth\/login/);
  });
});


================================================
FILE: web/tests/e2e/chat/actions_popover.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import {
  TOOL_IDS,
  openActionManagement,
  openSourceManagement,
  toggleToolDisabled,
  getSourceToggle,
} from "@tests/e2e/utils/tools";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

const LOCAL_STORAGE_KEY = "selectedInternalSearchSources";

test.describe("ActionsPopover Tool Toggles", () => {
  test.describe.configure({ mode: "serial" });

  let ccPairId: number | null = null;
  let webSearchProviderId: number | null = null;
  let imageGenConfigId: string | null = null;

  test.beforeAll(async ({ browser }) => {
    const ctx = await browser.newContext({ storageState: "admin_auth.json" });
    const page = await ctx.newPage();
    await page.goto("http://localhost:3000/app");
    await page.waitForLoadState("networkidle");

    const apiClient = new OnyxApiClient(page.request);

    // Create a file connector so internal search tool is available
    ccPairId = await apiClient.createFileConnector(
      `actions-popover-test-${Date.now()}`
    );

    // Create providers for web search and image generation (best-effort)
    try {
      webSearchProviderId = await apiClient.createWebSearchProvider(
        "exa",
        `actions-popover-web-search-${Date.now()}`
      );
    } catch (error) {
      console.warn(`Failed to create web search provider: ${error}`);
    }

    try {
      imageGenConfigId = await apiClient.createImageGenerationConfig(
        `actions-popover-image-gen-${Date.now()}`
      );
    } catch (error) {
      console.warn(`Failed to create image gen config: ${error}`);
    }

    // Ensure all tools are enabled on the default agent
    const toolsResp = await page.request.get("/api/tool");
    const allTools = await toolsResp.json();
    const toolIdsByCodeId: Record<string, number> = {};
    allTools.forEach((t: any) => {
      if (t.in_code_tool_id) toolIdsByCodeId[t.in_code_tool_id] = t.id;
    });

    const configResp = await page.request.get(
      "/api/admin/default-assistant/configuration"
    );
    const currentConfig = await configResp.json();

    const desiredToolIds = [
      toolIdsByCodeId["SearchTool"],
      toolIdsByCodeId["WebSearchTool"],
      toolIdsByCodeId["ImageGenerationTool"],
    ].filter(Boolean);

    const uniqueToolIds = Array.from(
      new Set([...(currentConfig.tool_ids || []), ...desiredToolIds])
    );

    await page.request.patch("/api/admin/default-assistant", {
      data: { tool_ids: uniqueToolIds },
    });

    await ctx.close();
  });

  test.afterAll(async ({ browser }) => {
    const ctx = await browser.newContext({ storageState: "admin_auth.json" });
    const page = await ctx.newPage();
    await page.goto("http://localhost:3000/app");
    await page.waitForLoadState("networkidle");

    const apiClient = new OnyxApiClient(page.request);

    if (ccPairId !== null) {
      try {
        await apiClient.deleteCCPair(ccPairId);
      } catch (error) {
        console.warn(`Cleanup: failed to delete connector: ${error}`);
      }
    }
    if (webSearchProviderId !== null) {
      try {
        await apiClient.deleteWebSearchProvider(webSearchProviderId);
      } catch (error) {
        console.warn(`Cleanup: failed to delete web search provider: ${error}`);
      }
    }
    if (imageGenConfigId !== null) {
      try {
        await apiClient.deleteImageGenerationConfig(imageGenConfigId);
      } catch (error) {
        console.warn(`Cleanup: failed to delete image gen config: ${error}`);
      }
    }

    await ctx.close();
  });

  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");
    await page.goto("/app");
    await page.waitForLoadState("networkidle");
    // Clear source preferences for a clean slate
    await page.evaluate(
      (key) => localStorage.removeItem(key),
      LOCAL_STORAGE_KEY
    );
  });

  test("should show internal search and other tools in popover", async ({
    page,
  }) => {
    await openActionManagement(page);

    // Internal search must be visible (connector was created in beforeAll)
    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible({
      timeout: 10000,
    });

    // Soft-check other tools (depend on provider setup success)
    const webVisible = await page
      .locator(TOOL_IDS.webSearchOption)
      .isVisible()
      .catch(() => false);
    const imgVisible = await page
      .locator(TOOL_IDS.imageGenerationOption)
      .isVisible()
      .catch(() => false);
    console.log(`[tools] web_search=${webVisible}, image_gen=${imgVisible}`);
  });

  test("source preferences should persist to localStorage and survive reload", async ({
    page,
  }) => {
    await openActionManagement(page);
    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible({
      timeout: 10000,
    });
    await openSourceManagement(page);

    // Find the first source switch
    const switches = page.locator('[role="switch"]');
    await expect(switches.first()).toBeVisible({ timeout: 5000 });

    const firstSwitch = switches.first();
    const ariaLabel = await firstSwitch.getAttribute("aria-label");
    const sourceName = ariaLabel?.replace("Toggle ", "") || "";
    expect(sourceName).toBeTruthy();

    // Ensure it's enabled, then disable it
    if ((await firstSwitch.getAttribute("aria-checked")) === "false") {
      await firstSwitch.click();
      await expect(firstSwitch).toHaveAttribute("aria-checked", "true");
    }
    await firstSwitch.click();
    await expect(firstSwitch).toHaveAttribute("aria-checked", "false");

    // Verify localStorage was updated
    const stored = await page.evaluate(
      (key) => localStorage.getItem(key),
      LOCAL_STORAGE_KEY
    );
    expect(stored).toBeTruthy();
    expect(JSON.parse(stored!).sourcePreferences).toBeDefined();

    // Reload and verify persistence
    await page.reload();
    await page.waitForLoadState("networkidle");

    await openActionManagement(page);
    await openSourceManagement(page);

    const sourceToggle = getSourceToggle(page, sourceName);
    await expect(sourceToggle).toHaveAttribute("aria-checked", "false", {
      timeout: 10000,
    });
  });

  test("disabling search tool clears sources, re-enabling restores them", async ({
    page,
  }) => {
    await openActionManagement(page);
    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible({
      timeout: 10000,
    });

    // Open source management and count enabled sources
    await openSourceManagement(page);
    const switches = page.locator('[role="switch"]');
    await expect(switches.first()).toBeVisible({ timeout: 5000 });

    const totalSources = await switches.count();
    let enabledBefore = 0;
    for (let i = 0; i < totalSources; i++) {
      if ((await switches.nth(i).getAttribute("aria-checked")) === "true") {
        enabledBefore++;
      }
    }
    expect(enabledBefore).toBeGreaterThan(0);

    // Go back to primary view
    await page.locator('button[aria-label="Back"]').click();
    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible();

    // Disable the search tool
    await toggleToolDisabled(page, TOOL_IDS.searchOption);

    // Verify localStorage was written (the fix being tested)
    const stored = await page.evaluate(
      (key) => localStorage.getItem(key),
      LOCAL_STORAGE_KEY
    );
    expect(stored).toBeTruthy();

    // Re-enable the search tool
    await toggleToolDisabled(page, TOOL_IDS.searchOption);

    // Verify sources were restored
    await openSourceManagement(page);
    const switchesAfter = page.locator('[role="switch"]');
    const totalAfter = await switchesAfter.count();
    let enabledAfter = 0;
    for (let i = 0; i < totalAfter; i++) {
      if (
        (await switchesAfter.nth(i).getAttribute("aria-checked")) === "true"
      ) {
        enabledAfter++;
      }
    }
    expect(enabledAfter).toBe(enabledBefore);
  });

  test("tool enabled and disabled states both persist across reload", async ({
    page,
  }) => {
    await openActionManagement(page);
    const searchOption = page.locator(TOOL_IDS.searchOption);
    await expect(searchOption).toBeVisible({ timeout: 10000 });

    // The slash button says "Disable" when the tool is enabled
    await searchOption.hover();
    const slashButton = searchOption.locator(
      'button[aria-label="Disable"], button[aria-label="Enable"]'
    );
    await expect(slashButton.first()).toHaveAttribute("aria-label", "Disable");

    // Reload — enabled state should persist
    await page.reload();
    await page.waitForLoadState("networkidle");
    await openActionManagement(page);
    await page.locator(TOOL_IDS.searchOption).hover();
    await expect(
      page
        .locator(TOOL_IDS.searchOption)
        .locator('button[aria-label="Disable"], button[aria-label="Enable"]')
        .first()
    ).toHaveAttribute("aria-label", "Disable");

    // Disable the search tool
    await toggleToolDisabled(page, TOOL_IDS.searchOption);

    // Verify it's now disabled (slash button says "Enable")
    await page.locator(TOOL_IDS.searchOption).hover();
    await expect(
      page
        .locator(TOOL_IDS.searchOption)
        .locator('button[aria-label="Disable"], button[aria-label="Enable"]')
        .first()
    ).toHaveAttribute("aria-label", "Enable");

    // Reload — disabled state should also persist (saved to DB)
    await page.reload();
    await page.waitForLoadState("networkidle");
    await openActionManagement(page);
    await page.locator(TOOL_IDS.searchOption).hover();
    await expect(
      page
        .locator(TOOL_IDS.searchOption)
        .locator('button[aria-label="Disable"], button[aria-label="Enable"]')
        .first()
    ).toHaveAttribute("aria-label", "Enable");

    // Re-enable the tool for cleanup (serial tests follow)
    await toggleToolDisabled(page, TOOL_IDS.searchOption);
  });
});


================================================
FILE: web/tests/e2e/chat/chat-search-command-menu.spec.ts
================================================
import { test, expect, Page, Locator } from "@playwright/test";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import { loginAsWorkerUser } from "@tests/e2e/utils/auth";
import { expectScreenshot } from "@tests/e2e/utils/visualRegression";

// Test data storage
const TEST_PREFIX = "E2E-CMD";
let chatSessionIds: string[] = [];
let projectIds: number[] = [];

/**
 * Helper to get the command menu dialog locator (using the content wrapper)
 */
function getCommandMenuContent(page: Page): Locator {
  return page.locator('[role="dialog"]:has([data-command-menu-list])');
}

/**
 * Helper to open the command menu and return a scoped locator
 */
async function openCommandMenu(page: Page): Promise<Locator> {
  await page.getByLabel("Open chat search").click();
  const dialog = getCommandMenuContent(page);
  await expect(
    dialog.getByPlaceholder("Search chat sessions, projects...")
  ).toBeVisible();
  return dialog;
}

test.describe("Chat Search Command Menu", () => {
  test.beforeAll(async ({ browser }, workerInfo) => {
    const context = await browser.newContext();
    const page = await context.newPage();
    await loginAsWorkerUser(page, workerInfo.workerIndex);
    const client = new OnyxApiClient(page.request);

    await page.goto("/app");
    await page.waitForLoadState("networkidle");

    for (let i = 1; i <= 5; i++) {
      const id = await client.createChatSession(`${TEST_PREFIX} Chat ${i}`);
      chatSessionIds.push(id);
    }

    for (let i = 1; i <= 4; i++) {
      const id = await client.createProject(`${TEST_PREFIX} Project ${i}`);
      projectIds.push(id);
    }

    await context.close();
  });

  test.afterAll(async ({ browser }, workerInfo) => {
    const context = await browser.newContext();
    const page = await context.newPage();
    await loginAsWorkerUser(page, workerInfo.workerIndex);
    const client = new OnyxApiClient(page.request);

    await page.goto("/app");
    await page.waitForLoadState("networkidle");

    for (const id of chatSessionIds) {
      await client.deleteChatSession(id);
    }
    for (const id of projectIds) {
      await client.deleteProject(id);
    }

    await context.close();
  });

  test.beforeEach(async ({ page }, testInfo) => {
    await page.context().clearCookies();
    await loginAsWorkerUser(page, testInfo.workerIndex);
    await page.goto("/app");
    await page.waitForLoadState("networkidle");
  });

  // -- Opening --

  test("Opens with search input, New Session action, and correct positioning", async ({
    page,
  }) => {
    const dialog = await openCommandMenu(page);

    await expect(
      dialog.getByPlaceholder("Search chat sessions, projects...")
    ).toBeFocused();
    await expect(
      dialog.locator('[data-command-item="new-session"]')
    ).toBeVisible();

    await expectScreenshot(page, { name: "command-menu-default-open" });
  });

  // -- Preview limits --

  test("Shows at most 4 chats and 3 projects in preview", async ({ page }) => {
    const dialog = await openCommandMenu(page);

    const chatCount = await dialog
      .locator('[data-command-item^="chat-"]')
      .count();
    expect(chatCount).toBeLessThanOrEqual(4);

    const projectCount = await dialog
      .locator('[data-command-item^="project-"]')
      .count();
    expect(projectCount).toBeLessThanOrEqual(3);
  });

  test('Shows "Recent Sessions", "Projects" filters and "New Project" action', async ({
    page,
  }) => {
    const dialog = await openCommandMenu(page);

    await expect(
      dialog.locator('[data-command-item="recent-sessions"]')
    ).toBeVisible();
    await expect(
      dialog.locator('[data-command-item="projects"]')
    ).toBeVisible();
    await expect(
      dialog.locator('[data-command-item="new-project"]')
    ).toBeVisible();
  });

  // -- Filter expansion --

  test('"Recent Sessions" filter expands to show all 5 chats', async ({
    page,
  }) => {
    const dialog = await openCommandMenu(page);
    await dialog.locator('[data-command-item="recent-sessions"]').click();

    await page.waitForTimeout(500);

    for (let i = 1; i <= 5; i++) {
      await expect(
        dialog.locator(`[data-command-item="chat-${chatSessionIds[i - 1]}"]`)
      ).toBeVisible();
    }

    await expect(dialog.getByText("Sessions")).toBeVisible();
  });

  test('"Projects" filter expands to show all 4 projects', async ({ page }) => {
    const dialog = await openCommandMenu(page);
    await dialog.locator('[data-command-item="projects"]').click();

    await page.waitForTimeout(500);

    for (let i = 1; i <= 4; i++) {
      await expect(
        dialog.locator(`[data-command-item="project-${projectIds[i - 1]}"]`)
      ).toBeVisible();
    }

    await expectScreenshot(page, { name: "command-menu-projects-filter" });
  });

  test("Filter chip X removes filter and returns to all", async ({ page }) => {
    const dialog = await openCommandMenu(page);
    await dialog.locator('[data-command-item="recent-sessions"]').click();
    await expect(dialog.getByText("Sessions")).toBeVisible();

    await dialog.locator('button[aria-label="Remove Sessions filter"]').click();

    await expect(
      dialog.locator('[data-command-item="new-session"]')
    ).toBeVisible();
  });

  test("Backspace on empty input removes active filter", async ({ page }) => {
    const dialog = await openCommandMenu(page);
    await dialog.locator('[data-command-item="recent-sessions"]').click();
    await expect(dialog.getByText("Sessions")).toBeVisible();

    const input = dialog.getByPlaceholder("Search chat sessions, projects...");
    await input.focus();
    await page.keyboard.press("Backspace");

    await expect(
      dialog.locator('[data-command-item="new-session"]')
    ).toBeVisible();
  });

  test("Backspace on empty input with no filter closes menu", async ({
    page,
  }) => {
    await openCommandMenu(page);
    await page.keyboard.press("Backspace");
    await expect(getCommandMenuContent(page)).not.toBeVisible();
  });

  // -- Search --

  test("Search finds matching chat session", async ({ page }) => {
    const dialog = await openCommandMenu(page);

    const input = dialog.getByPlaceholder("Search chat sessions, projects...");
    await input.fill(`${TEST_PREFIX} Chat 3`);
    await page.waitForTimeout(500);

    await expect(
      dialog.locator(`[data-command-item="chat-${chatSessionIds[2]}"]`)
    ).toBeVisible();

    await expectScreenshot(page, { name: "command-menu-search-results" });
  });

  test("Search finds matching project", async ({ page }) => {
    const dialog = await openCommandMenu(page);

    const input = dialog.getByPlaceholder("Search chat sessions, projects...");
    await input.fill(`${TEST_PREFIX} Project 2`);
    await page.waitForTimeout(500);

    await expect(
      dialog.locator(`[data-command-item="project-${projectIds[1]}"]`)
    ).toBeVisible();
  });

  test('Search shows "Create New Project" action with typed name', async ({
    page,
  }) => {
    const dialog = await openCommandMenu(page);

    const input = dialog.getByPlaceholder("Search chat sessions, projects...");
    await input.fill("my custom project name");

    await expect(
      dialog.locator('[data-command-item="create-project-with-name"]')
    ).toBeVisible();
  });

  test("Search with no results shows empty state", async ({ page }) => {
    const dialog = await openCommandMenu(page);

    const input = dialog.getByPlaceholder("Search chat sessions, projects...");
    await input.fill("xyz123nonexistent9999");
    await page.waitForTimeout(500);

    const noResults = dialog.getByText("No results found");
    const noMore = dialog.getByText("No more results");
    await expect(noResults.or(noMore)).toBeVisible();

    await expectScreenshot(page, { name: "command-menu-no-results" });
  });

  // -- Navigation --

  test('"New Session" navigates to /app', async ({ page }) => {
    // Start from /chat so navigation is observable
    await page.goto("/chat");
    await page.waitForLoadState("networkidle");

    const dialog = await openCommandMenu(page);
    await dialog.locator('[data-command-item="new-session"]').click();

    await page.waitForURL(/\/app/);
    expect(page.url()).toContain("/app");
  });

  test("Clicking a chat session navigates to its URL", async ({ page }) => {
    const dialog = await openCommandMenu(page);

    const input = dialog.getByPlaceholder("Search chat sessions, projects...");
    await input.fill(`${TEST_PREFIX} Chat 1`);
    await page.waitForTimeout(500);

    await dialog
      .locator(`[data-command-item="chat-${chatSessionIds[0]}"]`)
      .click();

    await page.waitForURL(/chatId=/);
    expect(page.url()).toContain(`chatId=${chatSessionIds[0]}`);
  });

  test("Clicking a project navigates to its URL", async ({ page }) => {
    const dialog = await openCommandMenu(page);

    const input = dialog.getByPlaceholder("Search chat sessions, projects...");
    await input.fill(`${TEST_PREFIX} Project 1`);
    await page.waitForTimeout(500);

    await dialog
      .locator(`[data-command-item="project-${projectIds[0]}"]`)
      .click();

    await page.waitForURL(/projectId=/);
    expect(page.url()).toContain(`projectId=${projectIds[0]}`);
  });

  test('"New Project" opens create project modal', async ({ page }) => {
    const dialog = await openCommandMenu(page);
    await dialog.locator('[data-command-item="new-project"]').click();
    await expect(page.getByText("Create New Project")).toBeVisible();
  });

  // -- Menu state --

  test("Menu closes after selecting an item", async ({ page }) => {
    const dialog = await openCommandMenu(page);
    await dialog.locator('[data-command-item="new-session"]').click();
    await expect(getCommandMenuContent(page)).not.toBeVisible();
  });

  test("Escape closes menu", async ({ page }) => {
    await openCommandMenu(page);
    await page.keyboard.press("Escape");
    await expect(getCommandMenuContent(page)).not.toBeVisible();
  });

  test("Menu state resets when reopened", async ({ page }) => {
    let dialog = await openCommandMenu(page);
    await dialog.locator('[data-command-item="recent-sessions"]').click();
    await expect(dialog.getByText("Sessions")).toBeVisible();

    const input = dialog.getByPlaceholder("Search chat sessions, projects...");
    await input.fill("test query");

    await page.keyboard.press("Escape");
    await expect(getCommandMenuContent(page)).not.toBeVisible();

    dialog = await openCommandMenu(page);

    await expect(
      dialog.getByPlaceholder("Search chat sessions, projects...")
    ).toHaveValue("");
    await expect(
      dialog.locator('[data-command-item="new-session"]')
    ).toBeVisible();
  });
});


================================================
FILE: web/tests/e2e/chat/chat_message_rendering.spec.ts
================================================
import { expect, Page, test } from "@playwright/test";
import { loginAsWorkerUser } from "@tests/e2e/utils/auth";
import { sendMessage } from "@tests/e2e/utils/chatActions";
import { THEMES, setThemeBeforeNavigation } from "@tests/e2e/utils/theme";
import { expectElementScreenshot } from "@tests/e2e/utils/visualRegression";

const SHORT_USER_MESSAGE = "What is Onyx?";

const LONG_WORD_USER_MESSAGE =
  "Please look into this issue: __________________________________________ and also this token: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA and this URL: https://example.com/a/very/long/path/that/keeps/going/and/going/and/going/without/any/breaks/whatsoever/to/test/overflow";

const LONG_USER_MESSAGE = `I've been evaluating several enterprise search and AI platforms for our organization, and I have a number of detailed questions about Onyx that I'd like to understand before we make a decision.

First, can you explain how Onyx handles document indexing across multiple data sources? We currently use Confluence, Google Drive, Slack, and GitHub, and we need to ensure that all of these can be indexed simultaneously without performance degradation.

Second, I'm interested in understanding the security model. Specifically, how does Onyx handle document-level permissions when syncing from sources that have their own ACL systems? Does it respect the original source permissions, or does it create its own permission layer?

Third, we have a requirement for real-time or near-real-time indexing. What is the typical latency between a document being updated in a source system and it becoming searchable in Onyx?

Finally, could you walk me through the architecture of the AI chat system? How does it decide which documents to reference when answering a question, and how does it handle cases where the retrieved documents might contain conflicting information?`;

const SHORT_AI_RESPONSE =
  "Onyx is an open-source AI-powered enterprise search platform that connects to your company's documents, apps, and people.";

const LONG_AI_RESPONSE = `Onyx is an open-source Gen-AI and Enterprise Search platform designed to connect to your company's documents, applications, and people. Let me address each of your questions in detail.

## Document Indexing

Onyx uses a **connector-based architecture** where each data source has a dedicated connector. These connectors run as background workers and can index simultaneously without interfering with each other. The supported connectors include:

- **Confluence** — Full page and space indexing with attachment support
- **Google Drive** — File and folder indexing with shared drive support
- **Slack** — Channel message indexing with thread support
- **GitHub** — Repository, issue, and pull request indexing

Each connector runs on its own schedule and can be configured independently for polling frequency.

## Security Model

Onyx implements a **document-level permission system** that syncs with source ACLs. When documents are indexed, their permissions are preserved:

\`\`\`
Source Permission → Onyx ACL Sync → Query-time Filtering
\`\`\`

This means that when a user searches, they only see documents they have access to in the original source system. The permission sync runs periodically to stay up to date.

## Indexing Latency

The typical indexing latency depends on your configuration:

1. **Polling mode**: Documents are picked up on the next polling cycle (configurable, default 10 minutes)
2. **Webhook mode**: Near real-time, typically under 30 seconds
3. **Manual trigger**: Immediate indexing on demand

## AI Chat Architecture

The chat system uses a **Retrieval-Augmented Generation (RAG)** pipeline:

1. User query is analyzed and expanded
2. Relevant documents are retrieved from the vector database (Vespa)
3. Documents are ranked and filtered by relevance and permissions
4. The LLM generates a response grounded in the retrieved documents
5. Citations are attached to specific claims in the response

When documents contain conflicting information, the system presents the most relevant and recent information first, and includes citations so users can verify the source material themselves.`;

const MARKDOWN_AI_RESPONSE = `Here's a quick overview with various formatting:

### Key Features

| Feature | Status | Notes |
|---------|--------|-------|
| Enterprise Search | ✅ Available | Full-text and semantic |
| AI Chat | ✅ Available | Multi-model support |
| Connectors | ✅ Available | 30+ integrations |
| Permissions | ✅ Available | Source ACL sync |

### Code Example

\`\`\`python
from onyx import OnyxClient

client = OnyxClient(api_key="your-key")
results = client.search("quarterly revenue report")

for doc in results:
    print(f"{doc.title}: {doc.score:.2f}")
\`\`\`

> **Note**: Onyx supports both cloud and self-hosted deployments. The self-hosted option gives you full control over your data.

Key benefits include:

- **Privacy**: Your data stays within your infrastructure
- **Flexibility**: Connect any data source via custom connectors
- **Extensibility**: Open-source codebase with active community`;

const LATEX_AI_RESPONSE = `Here is a mix of math and plain text:

Inline math should render cleanly: \\(E = mc^2\\).

Display math should render on its own line:
\\[
\\int_0^1 x^2 \\, dx = \\frac{1}{3}
\\]

This currency value should stay plain text: $100.

And this LaTeX source should remain a code block:
\`\`\`latex
\\int_0^1 x^2 \\, dx = \\frac{1}{3}
\`\`\``;

interface MockDocument {
  document_id: string;
  semantic_identifier: string;
  link: string;
  source_type: string;
  blurb: string;
  is_internet: boolean;
}

interface SearchMockOptions {
  content: string;
  queries: string[];
  documents: MockDocument[];
  /** Maps citation number -> document_id */
  citations: Record<number, string>;
  isInternetSearch?: boolean;
}

let turnCounter = 0;

function buildMockStream(content: string): string {
  turnCounter += 1;
  const userMessageId = turnCounter * 100 + 1;
  const agentMessageId = turnCounter * 100 + 2;

  const packets = [
    {
      user_message_id: userMessageId,
      reserved_assistant_message_id: agentMessageId,
    },
    {
      placement: { turn_index: 0, tab_index: 0 },
      obj: {
        type: "message_start",
        id: `mock-${agentMessageId}`,
        content,
        final_documents: null,
      },
    },
    {
      placement: { turn_index: 0, tab_index: 0 },
      obj: { type: "stop", stop_reason: "finished" },
    },
    {
      message_id: agentMessageId,
      citations: {},
      files: [],
    },
  ];

  return `${packets.map((p) => JSON.stringify(p)).join("\n")}\n`;
}

function buildMockSearchStream(options: SearchMockOptions): string {
  turnCounter += 1;
  const userMessageId = turnCounter * 100 + 1;
  const agentMessageId = turnCounter * 100 + 2;

  const fullDocs = options.documents.map((doc) => ({
    ...doc,
    boost: 0,
    hidden: false,
    score: 0.95,
    chunk_ind: 0,
    match_highlights: [],
    metadata: {},
    updated_at: null,
  }));

  // Turn 0: search tool
  // Turn 1: answer + citations
  const packets: Record<string, unknown>[] = [
    {
      user_message_id: userMessageId,
      reserved_assistant_message_id: agentMessageId,
    },
    {
      placement: { turn_index: 0, tab_index: 0 },
      obj: {
        type: "search_tool_start",
        ...(options.isInternetSearch !== undefined && {
          is_internet_search: options.isInternetSearch,
        }),
      },
    },
    {
      placement: { turn_index: 0, tab_index: 0 },
      obj: { type: "search_tool_queries_delta", queries: options.queries },
    },
    {
      placement: { turn_index: 0, tab_index: 0 },
      obj: { type: "search_tool_documents_delta", documents: fullDocs },
    },
    {
      placement: { turn_index: 0, tab_index: 0 },
      obj: { type: "section_end" },
    },
    {
      placement: { turn_index: 1, tab_index: 0 },
      obj: {
        type: "message_start",
        id: `mock-${agentMessageId}`,
        content: options.content,
        final_documents: fullDocs,
      },
    },
    ...Object.entries(options.citations).map(([num, docId]) => ({
      placement: { turn_index: 1, tab_index: 0 },
      obj: {
        type: "citation_info",
        citation_number: Number(num),
        document_id: docId,
      },
    })),
    {
      placement: { turn_index: 1, tab_index: 0 },
      obj: { type: "stop", stop_reason: "finished" },
    },
    {
      message_id: agentMessageId,
      citations: options.citations,
      files: [],
    },
  ];

  return `${packets.map((p) => JSON.stringify(p)).join("\n")}\n`;
}

async function openChat(page: Page): Promise<void> {
  await page.goto("/app");
  await page.waitForLoadState("networkidle");
  await page.waitForSelector("#onyx-chat-input-textarea", { timeout: 15000 });
}

async function mockChatEndpoint(
  page: Page,
  responseContent: string
): Promise<void> {
  await page.route("**/api/chat/send-chat-message", async (route) => {
    await route.fulfill({
      status: 200,
      contentType: "text/plain",
      body: buildMockStream(responseContent),
    });
  });
}

async function mockChatEndpointSequence(
  page: Page,
  responses: string[]
): Promise<void> {
  let callIndex = 0;
  await page.route("**/api/chat/send-chat-message", async (route) => {
    const content =
      responses[Math.min(callIndex, responses.length - 1)] ??
      responses[responses.length - 1]!;
    callIndex += 1;
    await route.fulfill({
      status: 200,
      contentType: "text/plain",
      body: buildMockStream(content),
    });
  });
}

async function scrollChatTo(
  page: Page,
  position: "top" | "bottom"
): Promise<void> {
  const scrollContainer = page.getByTestId("chat-scroll-container");
  await scrollContainer.evaluate(async (el, pos) => {
    el.scrollTo({ top: pos === "top" ? 0 : el.scrollHeight });
    await new Promise<void>((r) => requestAnimationFrame(() => r()));
  }, position);
}

async function screenshotChatContainer(
  page: Page,
  name: string
): Promise<void> {
  const container = page.locator("[data-main-container]");
  await expect(container).toBeVisible();
  await scrollChatTo(page, "bottom");
  await expectElementScreenshot(container, { name });
}

/**
 * Captures two screenshots of the chat container for long-content tests:
 * one scrolled to the top and one scrolled to the bottom. Both are captured
 * for the current theme, ensuring consistent scroll positions regardless of
 * whether the page was just navigated to (top) or just finished streaming (bottom).
 */
async function screenshotChatContainerTopAndBottom(
  page: Page,
  name: string
): Promise<void> {
  const container = page.locator("[data-main-container]");
  await expect(container).toBeVisible();

  await scrollChatTo(page, "top");
  await expectElementScreenshot(container, { name: `${name}-top` });

  await scrollChatTo(page, "bottom");
  await expectElementScreenshot(container, { name: `${name}-bottom` });
}

for (const theme of THEMES) {
  test.describe(`Chat Message Rendering (${theme} mode)`, () => {
    test.beforeEach(async ({ page }, testInfo) => {
      turnCounter = 0;
      await page.context().clearCookies();
      await setThemeBeforeNavigation(page, theme);
      await loginAsWorkerUser(page, testInfo.workerIndex);
    });

    test.describe("Short Messages", () => {
      test("short user message with short AI response renders correctly", async ({
        page,
      }) => {
        await openChat(page);
        await mockChatEndpoint(page, SHORT_AI_RESPONSE);

        await sendMessage(page, SHORT_USER_MESSAGE);

        const userMessage = page.locator("#onyx-human-message").first();
        await expect(userMessage).toContainText(SHORT_USER_MESSAGE);

        const aiMessage = page.getByTestId("onyx-ai-message").first();
        await expect(aiMessage).toContainText("open-source AI-powered");

        await screenshotChatContainer(
          page,
          `chat-short-message-short-response-${theme}`
        );
      });
    });

    test.describe("Long Messages", () => {
      test("long user message renders without truncation", async ({ page }) => {
        await openChat(page);
        await mockChatEndpoint(page, SHORT_AI_RESPONSE);

        await sendMessage(page, LONG_USER_MESSAGE);

        const userMessage = page.locator("#onyx-human-message").first();
        await expect(userMessage).toContainText("document indexing");
        await expect(userMessage).toContainText("security model");
        await expect(userMessage).toContainText("real-time or near-real-time");
        await expect(userMessage).toContainText("architecture of the AI chat");

        await screenshotChatContainer(
          page,
          `chat-long-user-message-short-response-${theme}`
        );
      });

      test("long AI response with markdown renders correctly", async ({
        page,
      }) => {
        await openChat(page);
        await mockChatEndpoint(page, LONG_AI_RESPONSE);

        await sendMessage(page, SHORT_USER_MESSAGE);

        const aiMessage = page.getByTestId("onyx-ai-message").first();
        await expect(aiMessage).toContainText("Document Indexing");
        await expect(aiMessage).toContainText("Security Model");
        await expect(aiMessage).toContainText("Indexing Latency");
        await expect(aiMessage).toContainText("AI Chat Architecture");

        await screenshotChatContainerTopAndBottom(
          page,
          `chat-short-message-long-response-${theme}`
        );
      });

      test("user message with very long words wraps without overflowing", async ({
        page,
      }) => {
        await openChat(page);
        await mockChatEndpoint(page, SHORT_AI_RESPONSE);

        await sendMessage(page, LONG_WORD_USER_MESSAGE);

        const userMessage = page.locator("#onyx-human-message").first();
        await expect(userMessage).toContainText("__________");

        await screenshotChatContainer(
          page,
          `chat-long-word-user-message-${theme}`
        );

        // Assert the message bubble does not overflow horizontally.
        const overflows = await userMessage.evaluate((el) => {
          const bubble = el.querySelector<HTMLElement>(
            ".whitespace-break-spaces"
          );
          if (!bubble)
            throw new Error(
              "Expected human message bubble (.whitespace-break-spaces) to exist"
            );
          return bubble.scrollWidth > bubble.offsetWidth;
        });
        expect(overflows).toBe(false);
      });

      test("long user message with long AI response renders correctly", async ({
        page,
      }) => {
        await openChat(page);
        await mockChatEndpoint(page, LONG_AI_RESPONSE);

        await sendMessage(page, LONG_USER_MESSAGE);

        const userMessage = page.locator("#onyx-human-message").first();
        await expect(userMessage).toContainText("document indexing");

        const aiMessage = page.getByTestId("onyx-ai-message").first();
        await expect(aiMessage).toContainText("Retrieval-Augmented Generation");

        await screenshotChatContainerTopAndBottom(
          page,
          `chat-long-message-long-response-${theme}`
        );
      });
    });

    test.describe("Markdown and Code Rendering", () => {
      test("AI response with tables and code blocks renders correctly", async ({
        page,
      }) => {
        await openChat(page);
        await mockChatEndpoint(page, MARKDOWN_AI_RESPONSE);

        await sendMessage(page, "Give me an overview of Onyx features");

        const aiMessage = page.getByTestId("onyx-ai-message").first();
        await expect(aiMessage).toContainText("Key Features");
        await expect(aiMessage).toContainText("OnyxClient");
        await expect(aiMessage).toContainText("Privacy");

        await screenshotChatContainer(
          page,
          `chat-markdown-code-response-${theme}`
        );
      });

      test("AI response with LaTeX math renders correctly", async ({
        page,
      }) => {
        await openChat(page);
        await mockChatEndpoint(page, LATEX_AI_RESPONSE);

        await sendMessage(page, "Show me inline and block math");

        const aiMessage = page.getByTestId("onyx-ai-message").first();

        await screenshotChatContainer(
          page,
          `chat-latex-math-response-${theme}`
        );

        await expect(aiMessage).toContainText("Inline math should render");
        await expect(aiMessage).toContainText(
          "This currency value should stay plain text: $100."
        );
        await expect(aiMessage.locator(".katex")).toHaveCount(2);
        await expect(aiMessage.locator(".katex-display")).toBeVisible();
        await expect(aiMessage.getByRole("code")).toContainText(
          "\\int_0^1 x^2 \\, dx = \\frac{1}{3}"
        );
      });
    });

    test.describe("Multi-Turn Conversation", () => {
      test("multi-turn conversation renders all messages correctly", async ({
        page,
      }) => {
        await openChat(page);

        const responses = [
          SHORT_AI_RESPONSE,
          "Yes, Onyx supports over 30 data source connectors including Confluence, Google Drive, Slack, GitHub, Jira, Notion, and many more.",
          "To get started, you can deploy Onyx using Docker Compose with a single command. The setup takes about 5 minutes.",
        ];

        await mockChatEndpointSequence(page, responses);

        await sendMessage(page, SHORT_USER_MESSAGE);
        await expect(page.getByTestId("onyx-ai-message").first()).toContainText(
          "open-source AI-powered"
        );

        await sendMessage(page, "What connectors does it support?");
        await expect(page.getByTestId("onyx-ai-message")).toHaveCount(2, {
          timeout: 30000,
        });

        await sendMessage(page, "How do I get started?");
        await expect(page.getByTestId("onyx-ai-message")).toHaveCount(3, {
          timeout: 30000,
        });

        const userMessages = page.locator("#onyx-human-message");
        await expect(userMessages).toHaveCount(3);

        await screenshotChatContainerTopAndBottom(
          page,
          `chat-multi-turn-conversation-${theme}`
        );
      });

      test("multi-turn with mixed message lengths renders correctly", async ({
        page,
      }) => {
        await openChat(page);

        const responses = [LONG_AI_RESPONSE, SHORT_AI_RESPONSE];

        await mockChatEndpointSequence(page, responses);

        await sendMessage(page, LONG_USER_MESSAGE);
        await expect(page.getByTestId("onyx-ai-message").first()).toContainText(
          "Document Indexing"
        );

        await sendMessage(page, SHORT_USER_MESSAGE);
        await expect(page.getByTestId("onyx-ai-message")).toHaveCount(2, {
          timeout: 30000,
        });

        await screenshotChatContainerTopAndBottom(
          page,
          `chat-multi-turn-mixed-lengths-${theme}`
        );
      });
    });

    test.describe("Web Search with Citations", () => {
      const TOOLBAR_BUTTONS = [
        "AgentMessage/copy-button",
        "AgentMessage/like-button",
        "AgentMessage/dislike-button",
      ] as const;

      async function screenshotToolbarButtonHoverStates(
        page: Page,
        namePrefix: string
      ): Promise<void> {
        const aiMessage = page.getByTestId("onyx-ai-message").first();
        const toolbar = aiMessage.getByTestId("AgentMessage/toolbar");
        await expect(toolbar).toBeVisible({ timeout: 10000 });

        await toolbar.scrollIntoViewIfNeeded();
        await page.evaluate(
          () => new Promise<void>((r) => requestAnimationFrame(() => r()))
        );

        for (const buttonTestId of TOOLBAR_BUTTONS) {
          const button = aiMessage.getByTestId(buttonTestId);
          await button.hover();
          const buttonSlug = buttonTestId.split("/")[1];
          await expectElementScreenshot(toolbar, {
            name: `${namePrefix}-toolbar-${buttonSlug}-hover-${theme}`,
          });
        }

        // Sources tag is located by role+name since SourceTag has no testid.
        const sourcesButton = toolbar.getByRole("button", { name: "Sources" });
        if (await sourcesButton.isVisible()) {
          await sourcesButton.hover();
          await expectElementScreenshot(toolbar, {
            name: `${namePrefix}-toolbar-sources-hover-${theme}`,
          });
        }

        // LLMPopover trigger is only rendered when the regenerate action is
        // available (requires onRegenerate + parentMessage + llmManager props).
        const llmTrigger = aiMessage.getByTestId("llm-popover-trigger");
        if (await llmTrigger.isVisible()) {
          await llmTrigger.hover();
          await expectElementScreenshot(toolbar, {
            name: `${namePrefix}-toolbar-llm-popover-hover-${theme}`,
          });
        }
      }

      const WEB_SEARCH_DOCUMENTS: MockDocument[] = [
        {
          document_id: "web-doc-1",
          semantic_identifier: "Onyx Documentation - Getting Started",
          link: "https://docs.onyx.app/getting-started",
          source_type: "web",
          blurb:
            "Onyx is an open-source enterprise search and AI platform. Deploy in minutes with Docker Compose.",
          is_internet: true,
        },
        {
          document_id: "web-doc-2",
          semantic_identifier: "Onyx GitHub Repository",
          link: "https://github.com/onyx-dot-app/onyx",
          source_type: "web",
          blurb:
            "Open-source Gen-AI platform with 30+ connectors. MIT licensed community edition.",
          is_internet: true,
        },
        {
          document_id: "web-doc-3",
          semantic_identifier: "Enterprise Search Comparison 2025",
          link: "https://example.com/enterprise-search-comparison",
          source_type: "web",
          blurb:
            "Comparing top enterprise search platforms including Onyx, Glean, and Coveo.",
          is_internet: true,
        },
      ];

      const WEB_SEARCH_RESPONSE = `Based on my web search, here's what I found about Onyx:

Onyx is an open-source enterprise search and AI platform that can be deployed in minutes using Docker Compose [[D1]](https://docs.onyx.app/getting-started). The project is hosted on GitHub and is MIT licensed for the community edition, with over 30 connectors available [[D2]](https://github.com/onyx-dot-app/onyx).

In comparisons with other enterprise search platforms, Onyx stands out for its open-source nature and self-hosted deployment option [[D3]](https://example.com/enterprise-search-comparison). Unlike proprietary alternatives, you maintain full control over your data and infrastructure.

Key advantages include:

- **Self-hosted**: Deploy on your own infrastructure
- **Open source**: Full visibility into the codebase [[D2]](https://github.com/onyx-dot-app/onyx)
- **Quick setup**: Get running in under 5 minutes [[D1]](https://docs.onyx.app/getting-started)
- **Extensible**: 30+ pre-built connectors with custom connector support`;

      test("web search response with citations renders correctly", async ({
        page,
      }) => {
        await openChat(page);

        await page.route("**/api/chat/send-chat-message", async (route) => {
          await route.fulfill({
            status: 200,
            contentType: "text/plain",
            body: buildMockSearchStream({
              content: WEB_SEARCH_RESPONSE,
              queries: ["Onyx enterprise search platform overview"],
              documents: WEB_SEARCH_DOCUMENTS,
              citations: {
                1: "web-doc-1",
                2: "web-doc-2",
                3: "web-doc-3",
              },
              isInternetSearch: true,
            }),
          });
        });

        await sendMessage(page, "Search the web for information about Onyx");

        const aiMessage = page.getByTestId("onyx-ai-message").first();
        await expect(aiMessage).toContainText("open-source enterprise search");
        await expect(aiMessage).toContainText("Docker Compose");
        await expect(aiMessage).toContainText("MIT licensed");

        await screenshotChatContainer(
          page,
          `chat-web-search-with-citations-${theme}`
        );

        await screenshotToolbarButtonHoverStates(page, "chat-web-search");
      });

      test("internal document search response renders correctly", async ({
        page,
      }) => {
        const internalDocs: MockDocument[] = [
          {
            document_id: "confluence-doc-1",
            semantic_identifier: "Q3 2025 Engineering Roadmap",
            link: "https://company.atlassian.net/wiki/spaces/ENG/pages/123",
            source_type: "confluence",
            blurb:
              "Engineering priorities for Q3 include platform stability, new connector integrations, and performance improvements.",
            is_internet: false,
          },
          {
            document_id: "gdrive-doc-1",
            semantic_identifier: "Platform Architecture Overview.pdf",
            link: "https://drive.google.com/file/d/abc123",
            source_type: "google_drive",
            blurb:
              "Onyx platform architecture document covering microservices, data flow, and deployment topology.",
            is_internet: false,
          },
        ];

        const internalResponse = `Based on your company's internal documents, here is the engineering roadmap:

The Q3 2025 priorities focus on three main areas [[D1]](https://company.atlassian.net/wiki/spaces/ENG/pages/123):

1. **Platform stability** — Improving error handling and retry mechanisms across all connectors
2. **New integrations** — Adding support for ServiceNow and Zendesk connectors
3. **Performance** — Optimizing vector search latency and reducing indexing time

The platform architecture document provides additional context on how these improvements fit into the overall system design [[D2]](https://drive.google.com/file/d/abc123). The microservices architecture allows each component to be scaled independently.`;

        await openChat(page);

        await page.route("**/api/chat/send-chat-message", async (route) => {
          await route.fulfill({
            status: 200,
            contentType: "text/plain",
            body: buildMockSearchStream({
              content: internalResponse,
              queries: ["Q3 engineering roadmap priorities"],
              documents: internalDocs,
              citations: {
                1: "confluence-doc-1",
                2: "gdrive-doc-1",
              },
              isInternetSearch: false,
            }),
          });
        });

        await sendMessage(page, "What are our engineering priorities for Q3?");

        const aiMessage = page.getByTestId("onyx-ai-message").first();
        await expect(aiMessage).toContainText("Platform stability");
        await expect(aiMessage).toContainText("New integrations");
        await expect(aiMessage).toContainText("Performance");

        await screenshotChatContainer(
          page,
          `chat-internal-search-with-citations-${theme}`
        );

        await screenshotToolbarButtonHoverStates(page, "chat-internal-search");
      });
    });

    test.describe("Header Levels", () => {
      const HEADINGS_RESPONSE = `# Getting Started

This is the introductory paragraph.

## Installing the \`onyx-sdk\`

Follow these steps to install the SDK.

### Configuration Options

Some details about configuration.

#### The \`max_results\` Parameter

Set \`max_results\` to limit the number of returned documents.`;

      test("h1 through h4 headings with inline code render correctly", async ({
        page,
      }) => {
        await openChat(page);
        await mockChatEndpoint(page, HEADINGS_RESPONSE);

        await sendMessage(page, "Show me all heading levels");

        const aiMessage = page.getByTestId("onyx-ai-message").first();

        await expect(aiMessage.locator("h1")).toContainText("Getting Started");
        await expect(aiMessage.locator("h2")).toContainText("Installing the");
        await expect(
          aiMessage.locator("h2").locator('[data-testid="code-block"]')
        ).toContainText("onyx-sdk");
        await expect(aiMessage.locator("h3")).toContainText(
          "Configuration Options"
        );
        await expect(aiMessage.locator("h4")).toContainText("Parameter");
        await expect(
          aiMessage.locator("h4").locator('[data-testid="code-block"]')
        ).toContainText("max_results");

        await expect(aiMessage.locator("h1")).toHaveCount(1);
        await expect(aiMessage.locator("h2")).toHaveCount(1);
        await expect(aiMessage.locator("h3")).toHaveCount(1);
        await expect(aiMessage.locator("h4")).toHaveCount(1);

        await screenshotChatContainer(
          page,
          `chat-heading-levels-h1-h4-${theme}`
        );
      });
    });

    test.describe("Message Interaction States", () => {
      test("hovering over user message shows action buttons", async ({
        page,
      }) => {
        await openChat(page);
        await mockChatEndpoint(page, SHORT_AI_RESPONSE);

        await sendMessage(page, SHORT_USER_MESSAGE);

        const userMessage = page.locator("#onyx-human-message").first();
        await userMessage.hover();

        const editButton = userMessage.getByTestId("HumanMessage/edit-button");
        await expect(editButton).toBeVisible({ timeout: 5000 });

        await screenshotChatContainer(
          page,
          `chat-user-message-hover-state-${theme}`
        );
      });

      test("AI message toolbar is visible after response completes", async ({
        page,
      }) => {
        await openChat(page);
        await mockChatEndpoint(page, SHORT_AI_RESPONSE);

        await sendMessage(page, SHORT_USER_MESSAGE);

        const aiMessage = page.getByTestId("onyx-ai-message").first();

        const copyButton = aiMessage.getByTestId("AgentMessage/copy-button");
        const likeButton = aiMessage.getByTestId("AgentMessage/like-button");
        const dislikeButton = aiMessage.getByTestId(
          "AgentMessage/dislike-button"
        );

        await expect(copyButton).toBeVisible({ timeout: 10000 });
        await expect(likeButton).toBeVisible();
        await expect(dislikeButton).toBeVisible();

        await screenshotChatContainer(
          page,
          `chat-ai-message-with-toolbar-${theme}`
        );
      });
    });
  });
}


================================================
FILE: web/tests/e2e/chat/chat_session_not_found.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { THEMES, setThemeBeforeNavigation } from "@tests/e2e/utils/theme";
import { expectElementScreenshot } from "@tests/e2e/utils/visualRegression";

const NON_EXISTENT_CHAT_ID = "00000000-0000-0000-0000-000000000000";

for (const theme of THEMES) {
  test.describe(`Chat session not found (${theme} mode)`, () => {
    test.beforeEach(async ({ page }) => {
      await setThemeBeforeNavigation(page, theme);
    });

    test("should show 404 page for a non-existent chat session", async ({
      page,
    }) => {
      await page.goto(`/app?chatId=${NON_EXISTENT_CHAT_ID}`);

      await expect(page.getByText("Chat not found")).toBeVisible({
        timeout: 10000,
      });
      await expect(
        page.getByText("This chat session doesn't exist or has been deleted.")
      ).toBeVisible();
      await expect(
        page.getByRole("link", { name: "Start a new chat" })
      ).toBeVisible();

      // Sidebar should still be visible
      await expect(page.getByTestId("AppSidebar/new-session")).toBeVisible();

      const container = page.locator("[data-main-container]");
      await expect(container).toBeVisible();
      await expectElementScreenshot(container, {
        name: `chat-session-not-found-${theme}`,
      });
    });

    test("should navigate to /app when clicking Start a new chat", async ({
      page,
    }) => {
      await page.goto(`/app?chatId=${NON_EXISTENT_CHAT_ID}`);

      await expect(page.getByText("Chat not found")).toBeVisible({
        timeout: 10000,
      });

      await page.getByRole("link", { name: "Start a new chat" }).click();
      await page.waitForLoadState("networkidle");

      await expect(page).toHaveURL("/app");
      await expect(page.getByText("Chat not found")).toBeHidden();
    });
  });
}


================================================
FILE: web/tests/e2e/chat/current_agent.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { dragElementAbove, dragElementBelow } from "@tests/e2e/utils/dragUtils";
import { loginAsRandomUser } from "@tests/e2e/utils/auth";
import { createAgent, pinAgentByName } from "@tests/e2e/utils/agentUtils";

// TODO (chris): figure out why this test is flakey
test.skip("Assistant Drag and Drop", async ({ page }) => {
  await page.context().clearCookies();
  await loginAsRandomUser(page);

  // Navigate to the chat page
  await page.goto("/app");

  // Ensure at least two assistants exist for drag-and-drop
  const ts = Date.now();
  const nameA = `E2E Assistant A ${ts}`;
  const nameB = `E2E Assistant B ${ts}`;
  const nameC = `E2E Assistant C ${ts}`;
  await createAgent(page, {
    name: nameA,
    description: "E2E-created assistant A",
    instructions: "Assistant A instructions",
  });
  await pinAgentByName(page, nameA);
  await expect(
    page.locator('[data-testid^="assistant-["]').filter({ hasText: nameA })
  ).toBeVisible();

  await createAgent(page, {
    name: nameB,
    description: "E2E-created assistant B",
    instructions: "Assistant B instructions",
  });
  await pinAgentByName(page, nameB);
  await expect(
    page.locator('[data-testid^="assistant-["]').filter({ hasText: nameB })
  ).toBeVisible();

  await createAgent(page, {
    name: nameC,
    description: "E2E-created assistant C",
    instructions: "Assistant C instructions",
  });
  await pinAgentByName(page, nameC);
  await expect(
    page.locator('[data-testid^="assistant-["]').filter({ hasText: nameC })
  ).toBeVisible();

  // Helper function to get the current order of assistants
  const getAssistantOrder = async () => {
    const assistants = await page.$$('[data-testid^="assistant-["]');
    const names = await Promise.all(
      assistants.map(async (assistant) => {
        const nameEl = await assistant.$("span.line-clamp-1");
        const txt = nameEl ? await nameEl.textContent() : null;
        return (txt || "").trim();
      })
    );
    return names;
  };

  // Get the initial order
  const initialOrder = await getAssistantOrder();

  // Drag second assistant above first
  const secondAssistant = page.locator('[data-testid^="assistant-["]').nth(1);
  const firstAssistant = page.locator('[data-testid^="assistant-["]').nth(0);

  await dragElementAbove(secondAssistant, firstAssistant, page);

  // Check new order
  // wait a second to make sure that the order has been applied
  await page.waitForTimeout(500);
  const orderAfterDragUp = await getAssistantOrder();
  expect(orderAfterDragUp[0]).toBe(initialOrder[1]);
  expect(orderAfterDragUp[1]).toBe(initialOrder[0]);

  // Drag last assistant to second position
  const assistants = page.locator('[data-testid^="assistant-["]');
  const lastIndex = (await assistants.count()) - 1;
  const lastAssistant = assistants.nth(lastIndex);
  const secondPosition = assistants.nth(1);

  await page.waitForTimeout(3000);
  await dragElementBelow(lastAssistant, secondPosition, page);

  // Check new order
  // wait a second to make sure that the order has been applied
  await page.waitForTimeout(500);
  const orderAfterDragDown = await getAssistantOrder();
  expect(orderAfterDragDown[1]).toBe(initialOrder[lastIndex]);

  // Refresh and verify order
  await page.reload();
  const orderAfterRefresh = await getAssistantOrder();
  expect(orderAfterRefresh).toEqual(orderAfterDragDown);
});


================================================
FILE: web/tests/e2e/chat/default_agent.spec.ts
================================================
import { GREETING_MESSAGES } from "@/lib/chat/greetingMessages";
import { test, expect } from "@playwright/test";
import { loginAsRandomUser, loginAs } from "@tests/e2e/utils/auth";
import {
  sendMessage,
  startNewChat,
  verifyAgentIsChosen,
  verifyDefaultAgentIsChosen,
} from "@tests/e2e/utils/chatActions";
import {
  TOOL_IDS,
  openActionManagement,
  waitForUnifiedGreeting,
} from "@tests/e2e/utils/tools";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

// Tool-related test selectors now imported from shared utils

test.describe("Default Agent Tests", () => {
  let imageGenConfigId: string | null = null;

  test.beforeAll(async ({ browser }) => {
    // Create image generation config as admin so ImageGenerationTool becomes available
    // This is needed because the Create Agent form enables Image Generation by default
    const adminContext = await browser.newContext({
      storageState: "admin_auth.json",
    });
    const adminPage = await adminContext.newPage();
    await adminPage.goto("http://localhost:3000/app");
    await adminPage.waitForLoadState("networkidle");

    const apiClient = new OnyxApiClient(adminPage.request);
    try {
      imageGenConfigId = await apiClient.createImageGenerationConfig(
        `test-default-assistant-${Date.now()}`
      );
    } catch (error) {
      console.warn(`Failed to create image generation config: ${error}`);
    }

    await adminContext.close();
  });

  test.afterAll(async ({ browser }) => {
    // Cleanup the image generation config
    if (imageGenConfigId) {
      const adminContext = await browser.newContext({
        storageState: "admin_auth.json",
      });
      const adminPage = await adminContext.newPage();
      await adminPage.goto("http://localhost:3000/app");
      await adminPage.waitForLoadState("networkidle");

      const apiClient = new OnyxApiClient(adminPage.request);
      await apiClient.deleteImageGenerationConfig(imageGenConfigId);

      await adminContext.close();
    }
  });

  test.beforeEach(async ({ page }) => {
    // Clear cookies and log in as a random user
    await page.context().clearCookies();
    await loginAsRandomUser(page);

    // Navigate to the chat page
    await page.goto("/app");
    await page.waitForLoadState("networkidle");
  });

  test.describe("Greeting Message Display", () => {
    test("should display greeting message when opening new chat with default agent", async ({
      page,
    }) => {
      // Look for greeting message - should be one from the predefined list
      const greeting = await waitForUnifiedGreeting(page);
      expect(GREETING_MESSAGES).toContain(greeting.trim());
    });

    test("greeting message should remain consistent during session", async ({
      page,
    }) => {
      // Get initial greeting
      const initialGreeting = await waitForUnifiedGreeting(page);

      // Reload the page
      await page.reload();
      await page.waitForLoadState("networkidle");

      // Get greeting after reload
      const greetingAfterReload = await waitForUnifiedGreeting(page);

      // Both greetings should be valid but might differ after reload
      expect(GREETING_MESSAGES).toContain(initialGreeting?.trim());
      expect(GREETING_MESSAGES).toContain(greetingAfterReload?.trim());
    });

    test("greeting should only appear for default agent", async ({ page }) => {
      // First verify greeting appears for default agent
      const greetingElement = await page.waitForSelector(
        '[data-testid="onyx-logo"]',
        { timeout: 5000 }
      );
      expect(greetingElement).toBeTruthy();

      // Create a custom agent to test non-default behavior
      await page.getByTestId("AppSidebar/more-agents").click();
      await page.getByLabel("AgentsPage/new-agent-button").click();
      await page
        .locator('input[name="name"]')
        .waitFor({ state: "visible", timeout: 10000 });
      await page.locator('input[name="name"]').fill("Custom Test Agent");
      await page
        .locator('textarea[name="description"]')
        .fill("Test Description");
      await page
        .locator('textarea[name="instructions"]')
        .fill("Test Instructions");
      await page.getByRole("button", { name: "Create" }).click();

      // Wait for agent to be created and selected
      await verifyAgentIsChosen(page, "Custom Test Agent");

      // Greeting should NOT appear for custom agent
      const customGreeting = await page.$('[data-testid="onyx-logo"]');
      expect(customGreeting).toBeNull();
    });
  });

  test.describe("Default Agent Branding", () => {
    test("should display Onyx logo for default agent", async ({ page }) => {
      // Look for Onyx logo
      const logoElement = await page.waitForSelector(
        '[data-testid="onyx-logo"]',
        { timeout: 5000 }
      );
      expect(logoElement).toBeTruthy();

      // Should NOT show agent name for default agent
      const agentNameElement = await page.$(
        '[data-testid="agent-name-display"]'
      );
      expect(agentNameElement).toBeNull();
    });

    test("custom agents should show name and icon instead of logo", async ({
      page,
    }) => {
      // Create a custom agent
      await page.getByTestId("AppSidebar/more-agents").click();
      await page.getByLabel("AgentsPage/new-agent-button").click();
      await page
        .locator('input[name="name"]')
        .waitFor({ state: "visible", timeout: 10000 });
      await page.locator('input[name="name"]').fill("Custom Agent");
      await page
        .locator('textarea[name="description"]')
        .fill("Test Description");
      await page
        .locator('textarea[name="instructions"]')
        .fill("Test Instructions");
      await page.getByRole("button", { name: "Create" }).click();

      // Wait for agent to be created and selected
      await verifyAgentIsChosen(page, "Custom Agent");

      // Should show agent name and icon, not Onyx logo
      const agentNameElement = await page.waitForSelector(
        '[data-testid="agent-name-display"]',
        { timeout: 5000 }
      );
      const nameText = await agentNameElement.textContent();
      expect(nameText).toContain("Custom Agent");

      // Onyx logo should NOT be shown
      const logoElement = await page.$('[data-testid="onyx-logo"]');
      expect(logoElement).toBeNull();
    });
  });

  test.describe("Starter Messages", () => {
    test("default agent should NOT have starter messages", async ({ page }) => {
      // Check that starter messages container does not exist for default agent
      const starterMessagesContainer = await page.$(
        '[data-testid="starter-messages"]'
      );
      expect(starterMessagesContainer).toBeNull();

      // Verify no starter message buttons exist
      const starterButtons = await page.$$('[data-testid^="starter-message-"]');
      expect(starterButtons.length).toBe(0);
    });

    test("custom agents should display starter messages", async ({ page }) => {
      // Create a custom agent with starter messages
      await page.getByTestId("AppSidebar/more-agents").click();
      await page.getByLabel("AgentsPage/new-agent-button").click();
      await page
        .locator('input[name="name"]')
        .waitFor({ state: "visible", timeout: 10000 });
      await page.locator('input[name="name"]').fill("Test Agent with Starters");
      await page
        .locator('textarea[name="description"]')
        .fill("Test Description");
      await page
        .locator('textarea[name="instructions"]')
        .fill("Test Instructions");

      // Add starter messages (if the UI supports it)
      // For now, we'll create without starter messages and check the behavior
      await page.getByRole("button", { name: "Create" }).click();

      // Wait for assistant to be created and selected
      await verifyAgentIsChosen(page, "Test Agent with Starters");

      // Starter messages container might exist but be empty for custom agents
      const starterMessagesContainer = await page.$(
        '[data-testid="starter-messages"]'
      );
      // It's okay if it exists but has no messages, or doesn't exist at all
      if (starterMessagesContainer) {
        const starterButtons = await page.$$(
          '[data-testid^="starter-message-"]'
        );
        // Custom agent without configured starter messages should have none
        expect(starterButtons.length).toBe(0);
      }
    });
  });

  test.describe("Agent Selection", () => {
    test("default agent should be selected for new chats", async ({ page }) => {
      // Verify the input placeholder indicates default agent (Onyx)
      await verifyDefaultAgentIsChosen(page);
    });

    test("default agent should NOT appear in agent selector", async ({
      page,
    }) => {
      // Open agent selector
      await page.getByTestId("AppSidebar/more-agents").click();

      // Wait for modal or assistant list to appear
      // The selector might be in a modal or dropdown.
      await page
        .getByLabel("AgentsPage/new-agent-button")
        .waitFor({ state: "visible", timeout: 5000 });

      // Look for default agent by name - it should NOT be there
      const assistantElements = await page.$$('[data-testid^="agent-"]');
      const assistantTexts = await Promise.all(
        assistantElements.map((el) => el.textContent())
      );

      // Check that the default agent is not in the list
      const hasDefaultAssistant = assistantTexts.some(
        (text) =>
          text?.includes("Assistant") &&
          !text?.includes("Test") &&
          !text?.includes("Custom")
      );
      expect(hasDefaultAssistant).toBe(false);

      // Close the modal/selector
      await page.keyboard.press("Escape");
    });

    test("should be able to switch from default to custom agent", async ({
      page,
    }) => {
      // Create a custom agent
      await page.getByTestId("AppSidebar/more-agents").click();
      await page.getByLabel("AgentsPage/new-agent-button").click();
      await page
        .locator('input[name="name"]')
        .waitFor({ state: "visible", timeout: 10000 });
      await page.locator('input[name="name"]').fill("Switch Test Agent");
      await page
        .locator('textarea[name="description"]')
        .fill("Test Description");
      await page
        .locator('textarea[name="instructions"]')
        .fill("Test Instructions");
      await page.getByRole("button", { name: "Create" }).click();

      // Verify switched to custom agent
      await verifyAgentIsChosen(page, "Switch Test Agent");

      // Start new chat to go back to default
      await startNewChat(page);

      // Should be back to default agent
      await verifyDefaultAgentIsChosen(page);
    });
  });

  test.describe("Action Management Toggle", () => {
    let imageGenConfigId: string | null = null;

    test.beforeAll(async ({ browser }) => {
      // Create image generation config as admin so ImageGenerationTool becomes available
      // Use saved admin auth state instead of logging in again
      const adminContext = await browser.newContext({
        storageState: "admin_auth.json",
      });
      const adminPage = await adminContext.newPage();
      await adminPage.goto("http://localhost:3000/app");
      await adminPage.waitForLoadState("networkidle");

      const apiClient = new OnyxApiClient(adminPage.request);
      try {
        imageGenConfigId = await apiClient.createImageGenerationConfig(
          `test-action-toggle-${Date.now()}`
        );
      } catch (error) {
        console.warn(`Failed to create image generation config: ${error}`);
      }

      await adminContext.close();
    });

    test.afterAll(async ({ browser }) => {
      // Cleanup the image generation config
      if (imageGenConfigId) {
        const adminContext = await browser.newContext({
          storageState: "admin_auth.json",
        });
        const adminPage = await adminContext.newPage();
        await adminPage.goto("http://localhost:3000/app");
        await adminPage.waitForLoadState("networkidle");

        const apiClient = new OnyxApiClient(adminPage.request);
        await apiClient.deleteImageGenerationConfig(imageGenConfigId);

        await adminContext.close();
      }
    });

    test("should display action management toggle", async ({ page }) => {
      // Look for action management toggle button
      const actionToggle = await page.waitForSelector(TOOL_IDS.actionToggle, {
        timeout: 5000,
      });
      expect(actionToggle).toBeTruthy();
    });

    test("should show web-search + image-generation tools options when clicked", async ({
      page,
    }) => {
      // This test requires admin permissions to create web search provider
      // Note: Image generation config is already created by beforeAll
      await page.context().clearCookies();
      await loginAs(page, "admin");
      await page.goto("/app");
      await page.waitForLoadState("domcontentloaded");

      const apiClient = new OnyxApiClient(page.request);
      let webSearchProviderId: number | null = null;

      try {
        // Set up a web search provider so the tool is available
        webSearchProviderId = await apiClient.createWebSearchProvider(
          "exa",
          `Test Web Search Provider ${Date.now()}`
        );
      } catch (error) {
        console.warn(
          `Failed to create web search provider for test: ${error}. Test may fail.`
        );
      }

      // Enable the tools in default agent config via API
      // Get current tools to find their IDs
      const toolsListResp = await page.request.get(
        "http://localhost:3000/api/tool"
      );
      const allTools = await toolsListResp.json();
      const toolIdsByCodeId: { [key: string]: number } = {};
      allTools.forEach((tool: any) => {
        if (tool.in_code_tool_id) {
          toolIdsByCodeId[tool.in_code_tool_id] = tool.id;
        }
      });

      // Get current config
      const currentConfigResp = await page.request.get(
        "http://localhost:3000/api/admin/default-assistant/configuration"
      );
      const currentConfig = await currentConfigResp.json();

      // Add Web Search and Image Generation tool IDs
      const toolIdsToEnable = [
        ...(currentConfig.tool_ids || []),
        toolIdsByCodeId["WebSearchTool"],
        toolIdsByCodeId["ImageGenerationTool"],
      ].filter((id) => id !== undefined);

      // Deduplicate
      const uniqueToolIds = Array.from(new Set(toolIdsToEnable));

      // Update config via API
      await page.request.patch(
        "http://localhost:3000/api/admin/default-assistant",
        {
          data: { tool_ids: uniqueToolIds },
        }
      );

      console.log(`[test] Enabled tools via API: ${uniqueToolIds}`);

      // Go back to chat
      await page.goto("/app");
      await page.waitForLoadState("domcontentloaded");

      // Will NOT show the `internal-search` option since that will be excluded when there are no connectors connected.
      // (Since we removed pre-seeded docs, we will have NO connectors connected on a fresh install; therefore, `internal-search` will not be available.)
      await openActionManagement(page);
      await expect(page.locator(TOOL_IDS.webSearchOption)).toBeVisible({
        timeout: 10000,
      });
      await expect(page.locator(TOOL_IDS.imageGenerationOption)).toBeVisible({
        timeout: 10000,
      });

      // Clean up web search provider only (image gen config is managed by beforeAll/afterAll)
      if (webSearchProviderId !== null) {
        try {
          await apiClient.deleteWebSearchProvider(webSearchProviderId);
        } catch (error) {
          console.warn(
            `Failed to delete web search provider ${webSearchProviderId}: ${error}`
          );
        }
      }
    });

    test("should be able to toggle tools on and off", async ({ page }) => {
      // Click action management toggle
      await page.click(TOOL_IDS.actionToggle);

      // Wait for tool options
      await page.waitForSelector(TOOL_IDS.options, {
        timeout: 5000,
      });

      // Find a checkbox/toggle within the image-generation tool option
      const imageGenerationToolOption = await page.$(
        TOOL_IDS.imageGenerationOption
      );
      expect(imageGenerationToolOption).toBeTruthy();

      // Look for a checkbox or switch within the tool option
      const imageGenerationToggle = await imageGenerationToolOption?.$(
        TOOL_IDS.toggleInput
      );

      if (imageGenerationToggle) {
        const initialState = await imageGenerationToggle.isChecked();
        await imageGenerationToggle.click();

        // Verify state changed
        const newState = await imageGenerationToggle.isChecked();
        expect(newState).toBe(!initialState);

        // Toggle it back
        await imageGenerationToggle.click();
        const finalState = await imageGenerationToggle.isChecked();
        expect(finalState).toBe(initialState);
      } else {
        // If no toggle found, just click the option itself
        await imageGenerationToolOption?.click();
        // Check if the option has some visual state change
        // This is a fallback behavior if toggles work differently
      }
    });

    test("tool toggle state should persist across page refresh", async ({
      page,
    }) => {
      // Click action management toggle
      await page.click(TOOL_IDS.actionToggle);

      // Wait for tool options
      await page.waitForSelector(TOOL_IDS.options, {
        timeout: 5000,
      });

      // Find the internet image-generation tool option and its toggle
      const imageGenerationToolOption = await page.$(
        TOOL_IDS.imageGenerationOption
      );
      expect(imageGenerationToolOption).toBeTruthy();

      const imageGenerationToggle = await imageGenerationToolOption?.$(
        TOOL_IDS.toggleInput
      );

      let toggledState = false;
      if (imageGenerationToggle) {
        await imageGenerationToggle.click();
        toggledState = await imageGenerationToggle.isChecked();
      } else {
        // Click the option itself if no toggle found
        await imageGenerationToolOption?.click();
        // Assume toggled if clicked
        toggledState = true;
      }

      // Reload page
      await page.reload();
      await page.waitForLoadState("networkidle");

      // Open action management again
      await page.click(TOOL_IDS.actionToggle);
      await page.waitForSelector(TOOL_IDS.options, {
        timeout: 5000,
      });

      // Check if state persisted
      const imageGenerationToolOptionAfterReload = await page.$(
        TOOL_IDS.imageGenerationOption
      );
      const imageGenerationToggleAfterReload =
        await imageGenerationToolOptionAfterReload?.$(TOOL_IDS.toggleInput);

      if (imageGenerationToggleAfterReload) {
        const stateAfterReload =
          await imageGenerationToggleAfterReload.isChecked();
        expect(stateAfterReload).toBe(toggledState);
      }
    });
  });
});

test.describe("End-to-End Default Agent Flow", () => {
  let imageGenConfigId: string | null = null;

  test.beforeAll(async ({ browser }) => {
    // Create image generation config as admin so ImageGenerationTool becomes available
    // Use saved admin auth state instead of logging in again
    const adminContext = await browser.newContext({
      storageState: "admin_auth.json",
    });
    const adminPage = await adminContext.newPage();
    await adminPage.goto("http://localhost:3000/app");
    await adminPage.waitForLoadState("networkidle");

    const apiClient = new OnyxApiClient(adminPage.request);
    try {
      imageGenConfigId = await apiClient.createImageGenerationConfig(
        `test-e2e-journey-${Date.now()}`
      );
    } catch (error) {
      console.warn(`Failed to create image generation config: ${error}`);
    }

    await adminContext.close();
  });

  test.afterAll(async ({ browser }) => {
    // Cleanup the image generation config
    if (imageGenConfigId) {
      const adminContext = await browser.newContext({
        storageState: "admin_auth.json",
      });
      const adminPage = await adminContext.newPage();
      await adminPage.goto("http://localhost:3000/app");
      await adminPage.waitForLoadState("networkidle");

      const apiClient = new OnyxApiClient(adminPage.request);
      await apiClient.deleteImageGenerationConfig(imageGenConfigId);

      await adminContext.close();
    }
  });

  test("complete user journey with default agent", async ({ page }) => {
    // Clear cookies and log in as a random user
    await page.context().clearCookies();
    await loginAsRandomUser(page);

    // Navigate to the chat page
    await page.goto("/app");
    await page.waitForLoadState("networkidle");

    // Verify greeting message appears
    await expect(page.locator('[data-testid="onyx-logo"]')).toBeVisible();

    // Verify Onyx logo is displayed
    await expect(page.locator('[data-testid="onyx-logo"]')).toBeVisible();

    // Send a message using the chat input
    await sendMessage(page, "Hello, can you help me?");

    // Open action management and verify tools
    await openActionManagement(page);

    // Close action management
    await page.keyboard.press("Escape");

    // Start a new chat
    await startNewChat(page);

    // Verify we're back to default agent with greeting
    await expect(page.locator('[data-testid="onyx-logo"]')).toBeVisible();
  });
});


================================================
FILE: web/tests/e2e/chat/default_app_mode.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

test.describe("Default App Mode", () => {
  test("loads persisted Search mode after refresh", async ({ page }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");

    // Arrange
    const apiClient = new OnyxApiClient(page.request);
    const ccPairId = await apiClient.createFileConnector(
      "Default App Mode Test Connector"
    );
    await apiClient.setDefaultAppMode("SEARCH");

    try {
      // Act
      await page.goto("/app");
      await page.waitForLoadState("networkidle");

      // Assert
      const appModeButton = page.getByLabel("Change app mode");
      await appModeButton.waitFor({ state: "visible", timeout: 10000 });
      await expect(appModeButton).toHaveText(/Search/);
    } finally {
      await apiClient.setDefaultAppMode("CHAT");
      await apiClient.deleteCCPair(ccPairId);
    }
  });
});


================================================
FILE: web/tests/e2e/chat/file_preview_modal.spec.ts
================================================
import { test, expect, Page } from "@playwright/test";
import { loginAsRandomUser } from "../utils/auth";
import * as fs from "fs";
import * as path from "path";

/**
 * Builds a newline-delimited JSON stream body matching the packet
 * format that useChatController expects:
 *
 * 1. MessageResponseIDInfo — identifies the user/assistant messages
 * 2. Packet-wrapped streaming objects ({placement, obj}) — the actual content
 * 3. BackendMessage — the final completed message
 *
 * Each line is a raw JSON object parsed by handleSSEStream.
 */
function buildMockStream(messageContent: string): string {
  const packets = [
    // 1. Message ID info — tells the frontend the message IDs
    JSON.stringify({
      user_message_id: 1,
      reserved_assistant_message_id: 2,
    }),
    // 2. Streaming content packets wrapped in {placement, obj}
    JSON.stringify({
      placement: { turn_index: 0 },
      obj: {
        type: "message_start",
        id: "mock-message-id",
        content: "",
        final_documents: null,
      },
    }),
    JSON.stringify({
      placement: { turn_index: 0 },
      obj: {
        type: "message_delta",
        content: messageContent,
      },
    }),
    JSON.stringify({
      placement: { turn_index: 0 },
      obj: {
        type: "message_end",
      },
    }),
    JSON.stringify({
      placement: { turn_index: 0 },
      obj: {
        type: "stop",
        stop_reason: "finished",
      },
    }),
    // 3. Final BackendMessage — the completed message record
    JSON.stringify({
      message_id: 2,
      message_type: "assistant",
      research_type: null,
      parent_message: 1,
      latest_child_message: null,
      message: messageContent,
      rephrased_query: null,
      context_docs: null,
      time_sent: new Date().toISOString(),
      citations: {},
      files: [],
      tool_call: null,
      overridden_model: null,
    }),
  ];
  return packets.join("\n") + "\n";
}

/**
 * Sends a message while intercepting the backend response with
 * a controlled mock stream. Returns once the AI message renders.
 */
async function sendMessageWithMockResponse(
  page: Page,
  userMessage: string,
  mockResponseContent: string
) {
  const existingMessageCount = await page
    .locator('[data-testid="onyx-ai-message"]')
    .count();

  // Intercept the send-chat-message endpoint and return our mock stream
  await page.route("**/api/chat/send-chat-message", async (route) => {
    await route.fulfill({
      status: 200,
      contentType: "application/json",
      body: buildMockStream(mockResponseContent),
    });
  });

  await page.locator("#onyx-chat-input-textarea").click();
  await page.locator("#onyx-chat-input-textarea").fill(userMessage);
  await page.locator("#onyx-chat-input-send-button").click();

  // Wait for the AI message to appear
  await expect(page.locator('[data-testid="onyx-ai-message"]')).toHaveCount(
    existingMessageCount + 1,
    { timeout: 30000 }
  );

  // Unroute so future requests go through normally
  await page.unroute("**/api/chat/send-chat-message");
}

const MOCK_FILE_ID = "00000000-0000-0000-0000-000000000001";

test.describe("File preview modal from chat file links", () => {
  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAsRandomUser(page);
    await page.goto("/app");
    await page.waitForLoadState("networkidle");
  });

  test("clicking a text file link opens the TextViewModal", async ({
    page,
  }) => {
    const mockContent = `Here is your file: [notes.txt](/api/chat/file/${MOCK_FILE_ID})`;

    // Mock the file endpoint to return text content
    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {
      await route.fulfill({
        status: 200,
        contentType: "text/plain",
        body: "Hello from the mock file!",
      });
    });

    await sendMessageWithMockResponse(page, "Give me the file", mockContent);

    // Find the link in the AI message and click it
    const aiMessage = page.getByTestId("onyx-ai-message").last();
    const fileLink = aiMessage.locator("a").filter({ hasText: "notes.txt" });
    await expect(fileLink).toBeVisible({ timeout: 5000 });
    await fileLink.click();

    // Verify the modal opens
    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 5000 });

    // Verify the file name is shown in the header
    await expect(modal.getByText("notes.txt")).toBeVisible();

    // Verify the download link exists
    await expect(modal.locator("a[download]")).toBeVisible();

    // Verify the file content is rendered
    await expect(modal.getByText("Hello from the mock file!")).toBeVisible();
  });

  test("clicking a code file link opens the PreviewModal with syntax highlighting", async ({
    page,
  }) => {
    const mockContent = `Here is your script: [app.py](/api/chat/file/${MOCK_FILE_ID})`;
    const pythonCode = 'def hello():\n    print("Hello, world!")';

    // Mock the file endpoint to return Python code
    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {
      await route.fulfill({
        status: 200,
        contentType: "application/octet-stream",
        body: pythonCode,
      });
    });

    await sendMessageWithMockResponse(page, "Give me the script", mockContent);

    // Find the link in the AI message and click it
    const aiMessage = page.getByTestId("onyx-ai-message").last();
    const fileLink = aiMessage.locator("a").filter({ hasText: "app.py" });
    await expect(fileLink).toBeVisible({ timeout: 5000 });
    await fileLink.click();

    // Verify the PreviewModal opens
    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 5000 });

    // Verify the file name is shown in the header
    await expect(modal.getByText("app.py")).toBeVisible();

    // Verify the header description shows language and line info
    await expect(
      modal
        .locator("div")
        .filter({ hasText: /python/i })
        .first()
    ).toBeVisible();
    await expect(
      modal
        .locator("div")
        .filter({ hasText: /2 lines/ })
        .first()
    ).toBeVisible();

    // Verify the code content is rendered
    await expect(modal.getByText("Hello, world!")).toBeVisible();

    // Verify the download icon button exists (tooltip-only, no visible text)
    const downloadButton = modal.locator("button").last();
    await expect(downloadButton).toBeVisible();

    // Hover to verify the download tooltip appears
    await downloadButton.hover();
    await expect(page.getByText("Download")).toBeVisible({ timeout: 3000 });
  });

  test("download button triggers file download", async ({ page }) => {
    const mockContent = `Here: [data.csv](/api/chat/file/${MOCK_FILE_ID})`;

    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {
      await route.fulfill({
        status: 200,
        contentType: "text/csv",
        body: "name,age\nAlice,30\nBob,25",
      });
    });

    await sendMessageWithMockResponse(page, "Give me the csv", mockContent);

    const aiMessage = page.getByTestId("onyx-ai-message").last();
    const fileLink = aiMessage.locator("a").filter({ hasText: "data.csv" });
    await expect(fileLink).toBeVisible({ timeout: 5000 });
    await fileLink.click();

    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 5000 });

    // Click the download link and verify a download starts
    const downloadPromise = page.waitForEvent("download");
    await modal.locator("a[download]").last().click();
    const download = await downloadPromise;

    expect(download.suggestedFilename()).toContain("data.csv");
  });

  test("clicking a .docx file link opens the preview modal and renders content", async ({
    page,
  }) => {
    const mockContent = `Here is your document: [report.docx](/api/chat/file/${MOCK_FILE_ID})`;

    // Serve a real .docx fixture so docx-preview can parse it
    const docxBuffer = fs.readFileSync(
      path.join(__dirname, "../fixtures/three_images.docx")
    );

    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {
      await route.fulfill({
        status: 200,
        contentType:
          "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        body: docxBuffer,
      });
    });

    await sendMessageWithMockResponse(
      page,
      "Give me the document",
      mockContent
    );

    const aiMessage = page.getByTestId("onyx-ai-message").last();
    const fileLink = aiMessage.locator("a").filter({ hasText: "report.docx" });
    await expect(fileLink).toBeVisible({ timeout: 5000 });
    await fileLink.click();

    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 5000 });

    // Verify the file name is shown in the header
    await expect(modal.getByText("report.docx")).toBeVisible();

    // Verify the header describes it as a Word Document
    await expect(
      modal
        .locator("div")
        .filter({ hasText: /Word Document/ })
        .first()
    ).toBeVisible();

    // Verify docx-preview rendered content into the body container
    await expect(modal.locator(".docx-host")).toBeVisible({ timeout: 10000 });

    // Verify the download button exists
    await expect(modal.locator("a[download]")).toBeVisible();
  });

  test("clicking a legacy .doc file link shows unsupported message", async ({
    page,
  }) => {
    const mockContent = `Here is your document: [old_report.doc](/api/chat/file/${MOCK_FILE_ID})`;

    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {
      await route.fulfill({
        status: 200,
        contentType: "application/msword",
        body: "fake binary content",
      });
    });

    await sendMessageWithMockResponse(
      page,
      "Give me the old document",
      mockContent
    );

    const aiMessage = page.getByTestId("onyx-ai-message").last();
    const fileLink = aiMessage
      .locator("a")
      .filter({ hasText: "old_report.doc" });
    await expect(fileLink).toBeVisible({ timeout: 5000 });
    await fileLink.click();

    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 5000 });

    // Verify the file name is shown
    await expect(modal.getByText("old_report.doc")).toBeVisible();

    // Verify the legacy .doc message is shown
    await expect(
      modal.getByText(/Legacy .doc format cannot be previewed/)
    ).toBeVisible();

    // Verify download button is still available
    await expect(modal.locator("a[download]")).toBeVisible();
  });
});


================================================
FILE: web/tests/e2e/chat/input_focus_retention.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAsWorkerUser } from "@tests/e2e/utils/auth";

test.describe(`Chat Input Focus Retention`, () => {
  test.beforeEach(async ({ page }, testInfo) => {
    await page.context().clearCookies();
    await loginAsWorkerUser(page, testInfo.workerIndex);
    await page.goto("/app");
    await page.waitForLoadState("networkidle");
  });

  test("clicking empty space retains focus on chat input", async ({ page }) => {
    const textarea = page.locator("#onyx-chat-input-textarea");
    await textarea.waitFor({ state: "visible", timeout: 10000 });

    // Focus the textarea and type something
    await textarea.focus();
    await textarea.fill("test message");
    await expect(textarea).toBeFocused();

    // Click on the main container's empty space (top-left corner)
    const container = page.locator("[data-main-container]");
    await container.click({ position: { x: 10, y: 10 } });

    // Focus should remain on the textarea
    await expect(textarea).toBeFocused();
  });

  test("clicking interactive elements still moves focus away", async ({
    page,
  }) => {
    const textarea = page.locator("#onyx-chat-input-textarea");
    await textarea.waitFor({ state: "visible", timeout: 10000 });

    // Focus the textarea
    await textarea.focus();
    await expect(textarea).toBeFocused();

    // Click on an interactive element inside the container
    const button = page.locator("[data-main-container] button").first();
    await button.waitFor({ state: "visible", timeout: 5000 });
    await button.click();

    // Focus should have moved away from the textarea
    await expect(textarea).not.toBeFocused();
  });
});


================================================
FILE: web/tests/e2e/chat/live_agent.spec.ts
================================================
import { test } from "@playwright/test";
import { loginAsRandomUser } from "@tests/e2e/utils/auth";
import {
  sendMessage,
  startNewChat,
  verifyAgentIsChosen,
  verifyDefaultAgentIsChosen,
} from "@tests/e2e/utils/chatActions";

test("Chat workflow", async ({ page }) => {
  // Clear cookies and log in as a random user
  await page.context().clearCookies();
  // Use waitForSelector for robustness instead of expect().toBeVisible()
  // await page.waitForSelector(
  //   `//div[@aria-label="Agents Modal"]//*[contains(text(), "${agentName}") and not(contains(@class, 'invisible'))]`,
  //   { state: "visible", timeout: 10000 }
  // );
  await loginAsRandomUser(page);

  // Navigate to the chat page
  await page.goto("/app");
  await page.waitForLoadState("networkidle");

  // Test interaction with the Default agent
  await sendMessage(page, "Hi");

  // Start a new chat session
  await startNewChat(page);

  // Verify the presence of the expected text
  await verifyDefaultAgentIsChosen(page);

  // Test creation of a new assistant
  await page.getByTestId("AppSidebar/more-agents").click();
  await page.getByLabel("AgentsPage/new-agent-button").click();
  await page.locator('input[name="name"]').click();
  await page.locator('input[name="name"]').fill("Test Assistant");
  await page.locator('textarea[name="description"]').click();
  await page
    .locator('textarea[name="description"]')
    .fill("Test Assistant Description");
  await page.locator('textarea[name="instructions"]').click();
  await page
    .locator('textarea[name="instructions"]')
    .fill("Test Assistant Instructions");
  await page.getByRole("button", { name: "Create" }).click();

  // Verify the successful creation of the new assistant
  await verifyAgentIsChosen(page, "Test Assistant");

  // Start another new chat session
  await startNewChat(page);
  await page.waitForLoadState("networkidle");

  // Verify the presence of the default agent text
  await verifyDefaultAgentIsChosen(page);
});


================================================
FILE: web/tests/e2e/chat/llm_ordering.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { verifyCurrentModel } from "@tests/e2e/utils/chatActions";
import { ensureImageGenerationEnabled } from "@tests/e2e/utils/agentUtils";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

test.describe("LLM Ordering", () => {
  let imageGenConfigId: string | null = null;

  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");

    const apiClient = new OnyxApiClient(page.request);

    // Create image generation config so the checkbox appears
    try {
      imageGenConfigId = await apiClient.createImageGenerationConfig(
        `test-image-gen-${Date.now()}`
      );
    } catch (error) {
      console.warn(`Failed to create image generation config: ${error}`);
    }
  });

  test.afterEach(async ({ page }) => {
    const apiClient = new OnyxApiClient(page.request);

    if (imageGenConfigId !== null) {
      try {
        await apiClient.deleteImageGenerationConfig(imageGenConfigId);
        imageGenConfigId = null;
      } catch (error) {
        console.warn(`Failed to delete image gen config: ${error}`);
      }
    }
  });

  test("Non-image-generation model visibility in chat input bar", async ({
    page,
  }) => {
    await ensureImageGenerationEnabled(page);

    await page.goto("/app");
    await page.waitForSelector("#onyx-chat-input-textarea", { timeout: 10000 });

    const trigger = page.getByTestId("llm-popover-trigger");
    const originalTriggerText = (await trigger.textContent())?.trim() ?? "";

    await trigger.click();
    await page.waitForSelector('[role="dialog"]', { timeout: 5000 });

    const dialog = page.locator('[role="dialog"]');
    const allModelItems = dialog.locator("[data-selected]");
    await expect(allModelItems.first()).toBeVisible({ timeout: 5000 });

    const count = await allModelItems.count();
    expect(count).toBeGreaterThan(0);

    // Pick the first non-selected model so the trigger text changes after click
    const nonSelectedItem = dialog.locator('[data-selected="false"]').first();
    const hasNonSelected = (await nonSelectedItem.count()) > 0;
    const targetItem = hasNonSelected ? nonSelectedItem : allModelItems.first();

    await expect(targetItem).toBeVisible();
    await targetItem.click();

    // Verify the popover closed and the trigger updated
    await expect(dialog).toBeHidden();

    if (hasNonSelected) {
      const updatedTriggerText = (await trigger.textContent())?.trim() ?? "";
      expect(updatedTriggerText).not.toBe(originalTriggerText);
    }
  });
});


================================================
FILE: web/tests/e2e/chat/llm_runtime_selection.spec.ts
================================================
import { expect, Page, test } from "@playwright/test";
import { loginAs, loginAsWorkerUser } from "@tests/e2e/utils/auth";
import {
  selectModelFromInputPopover,
  sendMessage,
  startNewChat,
  verifyCurrentModel,
} from "@tests/e2e/utils/chatActions";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

type SendChatMessagePayload = {
  llm_override?: {
    model_provider?: string | null;
    model_version?: string | null;
    temperature?: number | null;
  } | null;
};

function uniqueName(prefix: string): string {
  return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
}

async function openChat(page: Page): Promise<void> {
  await page.goto("/app");
  await page.waitForLoadState("networkidle");
  await page.waitForSelector("#onyx-chat-input-textarea", { timeout: 15000 });
}

async function loginWithCleanCookies(
  page: Page,
  user: "admin" | number
): Promise<void> {
  await page.context().clearCookies();
  if (typeof user === "number") {
    await loginAsWorkerUser(page, user);
  } else {
    await loginAs(page, user);
  }
}

async function createLlmProvider(
  page: Page,
  params: {
    name: string;
    provider: string;
    defaultModelName: string;
    isPublic: boolean;
    groupIds?: number[];
  }
): Promise<number> {
  const response = await page.request.put(
    "/api/admin/llm/provider?is_creation=true",
    {
      data: {
        name: params.name,
        provider: params.provider,
        api_key: "e2e-placeholder-api-key-not-used",
        default_model_name: params.defaultModelName,
        is_public: params.isPublic,
        groups: params.groupIds ?? [],
        personas: [],
        model_configurations: [
          {
            name: params.defaultModelName,
            is_visible: true,
          },
        ],
      },
    }
  );

  expect(response.ok()).toBeTruthy();
  const data = (await response.json()) as { id: number };
  return data.id;
}

async function sendMessageAndCapturePayload(
  page: Page,
  message: string
): Promise<SendChatMessagePayload> {
  const requestPromise = page.waitForRequest(
    (request) =>
      request.url().includes("/api/chat/send-chat-message") &&
      request.method() === "POST"
  );

  await sendMessage(page, message);

  const request = await requestPromise;
  return request.postDataJSON() as SendChatMessagePayload;
}

type LlmProviderBasics = {
  name: string;
  model_configurations: Array<{ name: string }>;
};

async function listUserLlmProviders(page: Page): Promise<LlmProviderBasics[]> {
  const response = await page.request.get("/api/llm/provider");
  expect(response.ok()).toBeTruthy();
  const data = (await response.json()) as {
    providers: LlmProviderBasics[];
  };
  return data.providers;
}

async function waitForModelOnProvider(
  page: Page,
  modelName: string,
  providerNames: string[]
): Promise<void> {
  await expect
    .poll(
      async () => {
        const providers = await listUserLlmProviders(page);
        return providerNames.every((providerName) =>
          providers.some(
            (provider) =>
              provider.name === providerName &&
              provider.model_configurations.some(
                (modelConfig) => modelConfig.name === modelName
              )
          )
        );
      },
      { timeout: 30000 }
    )
    .toBeTruthy();
}

function buildMockStreamResponse(turn: number): string {
  const userMessageId = turn * 100 + 1;
  const agentMessageId = turn * 100 + 2;

  const packets = [
    {
      user_message_id: userMessageId,
      reserved_assistant_message_id: agentMessageId,
    },
    {
      placement: { turn_index: 0, tab_index: 0 },
      obj: {
        type: "message_start",
        id: `mock-${agentMessageId}`,
        content: "Mock response for provider collision assertion.",
        final_documents: null,
      },
    },
    {
      placement: { turn_index: 0, tab_index: 0 },
      obj: { type: "stop", stop_reason: "finished" },
    },
    {
      message_id: agentMessageId,
      citations: {},
      files: [],
    },
  ];

  return `${packets.map((packet) => JSON.stringify(packet)).join("\n")}\n`;
}

test.describe("LLM Runtime Selection", () => {
  let providersToCleanup: number[] = [];
  let groupsToCleanup: number[] = [];

  test.beforeEach(async ({ page }, testInfo) => {
    providersToCleanup = [];
    groupsToCleanup = [];
    await loginWithCleanCookies(page, testInfo.workerIndex);
  });

  test.afterEach(async ({ page }) => {
    await loginWithCleanCookies(page, "admin");

    const client = new OnyxApiClient(page.request);
    const providerIds = Array.from(new Set(providersToCleanup));
    const groupIds = Array.from(new Set(groupsToCleanup));

    for (const providerId of providerIds) {
      try {
        await client.deleteProvider(providerId);
      } catch (error) {
        console.warn(
          `Cleanup failed for provider ${providerId}: ${String(error)}`
        );
      }
    }

    for (const groupId of groupIds) {
      try {
        await client.deleteUserGroup(groupId);
      } catch (error) {
        console.warn(`Cleanup failed for group ${groupId}: ${String(error)}`);
      }
    }
  });

  test("model selection persists across refresh and subsequent messages in the same chat", async ({
    page,
  }, testInfo) => {
    await loginWithCleanCookies(page, "admin");

    const persistenceProviderName = uniqueName("PW Runtime Persist Provider");
    const persistenceModelName = `persist-runtime-model-${Date.now()}`;
    const persistenceProviderId = await createLlmProvider(page, {
      name: persistenceProviderName,
      provider: "openai",
      defaultModelName: persistenceModelName,
      isPublic: true,
    });
    providersToCleanup.push(persistenceProviderId);
    await waitForModelOnProvider(page, persistenceModelName, [
      persistenceProviderName,
    ]);

    await loginWithCleanCookies(page, testInfo.workerIndex);
    await openChat(page);

    let turn = 0;
    await page.route("**/api/chat/send-chat-message", async (route) => {
      turn += 1;
      await route.fulfill({
        status: 200,
        contentType: "text/plain",
        body: buildMockStreamResponse(turn),
      });
    });

    const selectedModelDisplay = await selectModelFromInputPopover(page, [
      persistenceModelName,
    ]);
    await verifyCurrentModel(page, selectedModelDisplay);

    const firstPayload = await sendMessageAndCapturePayload(
      page,
      "First persistence check message."
    );
    const firstModelVersion = firstPayload.llm_override?.model_version;
    const firstModelProvider = firstPayload.llm_override?.model_provider;

    expect(firstModelVersion).toBeTruthy();
    expect(firstModelProvider).toBeTruthy();
    expect(firstModelProvider).toBe(persistenceProviderName);
    expect(page.url()).toContain("chatId=");

    await page.reload();
    await page.waitForLoadState("networkidle");
    await page.waitForSelector("#onyx-chat-input-textarea", { timeout: 15000 });

    await verifyCurrentModel(page, selectedModelDisplay);

    const secondPayload = await sendMessageAndCapturePayload(
      page,
      "Second persistence check after refresh."
    );

    expect(secondPayload.llm_override?.model_version).toBe(firstModelVersion);
    expect(secondPayload.llm_override?.model_provider).toBe(firstModelProvider);
  });

  test("regenerate with alternate model preserves version history semantics", async ({
    page,
  }) => {
    await openChat(page);

    let turn = 0;
    await page.route("**/api/chat/send-chat-message", async (route) => {
      turn += 1;
      await route.fulfill({
        status: 200,
        contentType: "text/plain",
        body: buildMockStreamResponse(turn),
      });
    });

    // Keep this aligned with the existing stable regenerate flow test.
    const initialModelDisplay = await selectModelFromInputPopover(page, [
      "GPT-4.1",
      "GPT-4o Mini",
      "GPT-4o",
    ]);
    await verifyCurrentModel(page, initialModelDisplay);

    const initialPayload = await sendMessageAndCapturePayload(
      page,
      "Generate a short sentence for regeneration."
    );
    const initialModelVersion = initialPayload.llm_override?.model_version;

    const aiMessage = page.locator('[data-testid="onyx-ai-message"]').first();
    await aiMessage.hover();

    const regenerateControl = aiMessage.getByTestId("AgentMessage/regenerate");
    await regenerateControl.click();
    await page.waitForSelector('[role="dialog"]', {
      state: "visible",
      timeout: 10000,
    });

    const regenerateDialog = page.locator('[role="dialog"]');
    const alternateModelOption = regenerateDialog
      .locator('[data-selected="false"]')
      .first();

    test.skip(
      (await regenerateDialog.locator('[data-selected="false"]').count()) === 0,
      "Regenerate model picker requires at least two runtime model options"
    );

    const regenerateRequestPromise = page.waitForRequest(
      (request) =>
        request.url().includes("/api/chat/send-chat-message") &&
        request.method() === "POST"
    );

    await expect(alternateModelOption).toBeVisible({ timeout: 15000 });
    await alternateModelOption.click();

    const regeneratePayload = (await regenerateRequestPromise.then((request) =>
      request.postDataJSON()
    )) as SendChatMessagePayload;

    await page.waitForSelector('[data-testid="AgentMessage/regenerate"]', {
      state: "visible",
      timeout: 20000,
    });

    const messageSwitcher = page
      .getByTestId("MessageSwitcher/container")
      .first();
    await expect(messageSwitcher).toBeVisible({ timeout: 10000 });
    await expect(messageSwitcher).toContainText("2/2");

    await messageSwitcher
      .locator("..")
      .locator("svg")
      .first()
      .locator("..")
      .click();
    await expect(messageSwitcher).toContainText("1/2");

    await messageSwitcher
      .locator("..")
      .locator("svg")
      .last()
      .locator("..")
      .click();
    await expect(messageSwitcher).toContainText("2/2");

    expect(regeneratePayload.llm_override?.model_version).toBeTruthy();
    expect(regeneratePayload.llm_override?.model_provider).toBeTruthy();
    expect(regeneratePayload.llm_override?.model_version).not.toBe(
      initialModelVersion
    );
  });

  test("same model name across providers resolves to provider-specific runtime payloads", async ({
    page,
  }, testInfo) => {
    await loginWithCleanCookies(page, "admin");

    const sharedModelName = `shared-runtime-model-${Date.now()}`;
    const openAiProviderName = uniqueName("PW Runtime OpenAI");
    const anthropicProviderName = uniqueName("PW Runtime Anthropic");

    const openAiProviderId = await createLlmProvider(page, {
      name: openAiProviderName,
      provider: "openai",
      defaultModelName: sharedModelName,
      isPublic: true,
    });
    const anthropicProviderId = await createLlmProvider(page, {
      name: anthropicProviderName,
      provider: "anthropic",
      defaultModelName: sharedModelName,
      isPublic: true,
    });

    providersToCleanup.push(openAiProviderId, anthropicProviderId);

    await waitForModelOnProvider(page, sharedModelName, [
      openAiProviderName,
      anthropicProviderName,
    ]);

    await loginWithCleanCookies(page, testInfo.workerIndex);

    const capturedPayloads: SendChatMessagePayload[] = [];
    let turn = 0;

    await page.route("**/api/chat/send-chat-message", async (route) => {
      turn += 1;
      capturedPayloads.push(
        route.request().postDataJSON() as SendChatMessagePayload
      );
      await route.fulfill({
        status: 200,
        contentType: "text/plain",
        body: buildMockStreamResponse(turn),
      });
    });

    await openChat(page);

    await page.getByTestId("AppInputBar/llm-popover-trigger").click();
    await page.waitForSelector('[role="dialog"]', { state: "visible" });
    const dialog = page.locator('[role="dialog"]');
    await dialog.getByPlaceholder("Search models...").fill(sharedModelName);

    const sharedModelOptions = dialog.locator("[data-selected]");
    await expect(sharedModelOptions).toHaveCount(2);
    const openAiModelOption = dialog
      .getByRole("region", { name: /openai/i })
      .locator("[data-selected]")
      .first();
    await expect(openAiModelOption).toBeVisible();
    await openAiModelOption.click();
    await page.waitForSelector('[role="dialog"]', { state: "hidden" });

    await sendMessage(page, "Collision payload check one.");
    await expect.poll(() => capturedPayloads.length).toBe(1);

    // Use a new session so runtime selection is not overwritten by the previous
    // chat session's persisted model override.
    await startNewChat(page);
    await page.waitForSelector("#onyx-chat-input-textarea", { timeout: 15000 });

    await page.getByTestId("AppInputBar/llm-popover-trigger").click();
    await page.waitForSelector('[role="dialog"]', { state: "visible" });
    const secondDialog = page.locator('[role="dialog"]');
    await secondDialog
      .getByPlaceholder("Search models...")
      .fill(sharedModelName);

    const secondSharedModelOptions = secondDialog.locator("[data-selected]");
    await expect(secondSharedModelOptions).toHaveCount(2);
    const anthropicModelOption = secondDialog
      .getByRole("region", { name: /anthropic/i })
      .locator("[data-selected]")
      .first();
    await expect(anthropicModelOption).toBeVisible();
    await anthropicModelOption.click();
    await page.waitForSelector('[role="dialog"]', { state: "hidden" });

    await page.getByTestId("AppInputBar/llm-popover-trigger").click();
    await page.waitForSelector('[role="dialog"]', { state: "visible" });
    const verifyDialog = page.locator('[role="dialog"]');
    const selectedAnthropicOption = verifyDialog
      .getByRole("region", { name: /anthropic/i })
      .locator('[data-selected="true"]');
    await expect(selectedAnthropicOption).toHaveCount(1);
    await page.keyboard.press("Escape");
    await page.waitForSelector('[role="dialog"]', { state: "hidden" });

    await sendMessage(page, "Collision payload check two.");
    await expect.poll(() => capturedPayloads.length).toBe(2);

    for (const payload of capturedPayloads) {
      expect(payload.llm_override?.model_version).toBe(sharedModelName);
      expect(payload.llm_override?.model_provider).toBeTruthy();
    }

    const providersUsed = capturedPayloads.map(
      (payload) => payload.llm_override?.model_provider
    );

    expect(new Set(providersUsed)).toEqual(
      new Set([openAiProviderName, anthropicProviderName])
    );
  });

  test("restricted provider model is unavailable to unauthorized runtime user selection", async ({
    page,
  }, testInfo) => {
    await loginWithCleanCookies(page, "admin");

    const client = new OnyxApiClient(page.request);
    const restrictedGroupName = uniqueName("PW Runtime Restricted Group");
    const restrictedModelName = `restricted-runtime-model-${Date.now()}`;
    const restrictedProviderName = uniqueName("PW Runtime Restricted Provider");

    let groupId: number;
    try {
      groupId = await client.createUserGroup(restrictedGroupName);
    } catch (error) {
      const errorText = String(error);
      const requiresEnterpriseLicense =
        errorText.includes("enterprise_license_required") ||
        errorText.includes("This feature requires an Enterprise license");
      test.skip(
        requiresEnterpriseLicense,
        "Restricted provider test requires Enterprise license-enabled environment"
      );
      throw error;
    }
    groupsToCleanup.push(groupId);

    const restrictedProviderId = await createLlmProvider(page, {
      name: restrictedProviderName,
      provider: "openai",
      defaultModelName: restrictedModelName,
      isPublic: false,
      groupIds: [groupId],
    });
    providersToCleanup.push(restrictedProviderId);

    await loginWithCleanCookies(page, testInfo.workerIndex);
    await openChat(page);

    await page.getByTestId("AppInputBar/llm-popover-trigger").click();
    await page.waitForSelector('[role="dialog"]', { state: "visible" });

    const dialog = page.locator('[role="dialog"]');
    await dialog.getByPlaceholder("Search models...").fill(restrictedModelName);

    const restrictedModelOption = dialog
      .locator("[data-selected]")
      .filter({ hasText: restrictedModelName });

    await expect(restrictedModelOption).toHaveCount(0);
    await expect(dialog.getByText("No models found")).toBeVisible();
  });
});


================================================
FILE: web/tests/e2e/chat/message_edit_regenerate.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAsRandomUser } from "@tests/e2e/utils/auth";
import { sendMessage, switchModel } from "@tests/e2e/utils/chatActions";

test.describe("Message Edit and Regenerate Tests", () => {
  test.beforeEach(async ({ page }) => {
    // Clear cookies and log in as a random user
    await page.context().clearCookies();
    await loginAsRandomUser(page);

    // Navigate to the chat page
    await page.goto("/app");
    await page.waitForLoadState("networkidle");
  });

  test("Complete message editing functionality", async ({ page }) => {
    // Send initial message
    await sendMessage(page, "What is 2+2?");

    // Test cancel editing
    let userMessage = page.locator("#onyx-human-message").first();
    await userMessage.hover();
    let editButton = userMessage
      .locator('[data-testid="HumanMessage/edit-button"]')
      .first();
    await editButton.click();

    let textarea = userMessage.locator("textarea");
    await textarea.fill("This edit will be cancelled");

    const cancelButton = userMessage.locator('button:has-text("Cancel")');
    await cancelButton.click();

    // Verify original message is preserved
    let messageContent = await userMessage.textContent();
    expect(messageContent).toContain("What is 2+2?");
    expect(messageContent).not.toContain("This edit will be cancelled");

    // Edit the message for real
    await userMessage.hover();
    editButton = userMessage
      .locator('[data-testid="HumanMessage/edit-button"]')
      .first();
    await editButton.click();

    textarea = userMessage.locator("textarea");
    await textarea.fill("What is 3+3?");

    let submitButton = userMessage.locator('button:has-text("Submit")');
    await submitButton.click();

    // Wait for the new AI response to complete
    await page.waitForSelector('[data-testid="AgentMessage/copy-button"]', {
      state: "detached",
    });
    await page.waitForSelector('[data-testid="AgentMessage/copy-button"]', {
      state: "visible",
      timeout: 30000,
    });

    // Verify edited message is displayed
    messageContent = await page
      .locator("#onyx-human-message")
      .first()
      .textContent();
    expect(messageContent).toContain("What is 3+3?");

    // Verify version switcher appears and shows 2/2
    let messageSwitcher = page.getByTestId("MessageSwitcher/container").first();
    await expect(messageSwitcher).toBeVisible();
    await expect(messageSwitcher).toContainText("2/2");

    // Edit again to create a third version
    userMessage = page.locator("#onyx-human-message").first();
    await userMessage.hover();
    editButton = userMessage
      .locator('[data-testid="HumanMessage/edit-button"]')
      .first();
    await editButton.click();

    textarea = userMessage.locator("textarea");
    await textarea.fill("What is 4+4?");

    submitButton = userMessage.locator('button:has-text("Submit")');
    await submitButton.click();

    // Wait for the new AI response to complete
    await page.waitForSelector('[data-testid="AgentMessage/copy-button"]', {
      state: "detached",
    });
    await page.waitForSelector('[data-testid="AgentMessage/copy-button"]', {
      state: "visible",
      timeout: 30000,
    });

    // Verify navigation between versions
    // Find the switcher showing "3 / 3"
    let switcherSpan = page.getByTestId("MessageSwitcher/container").first();
    await expect(switcherSpan).toBeVisible();
    await expect(switcherSpan).toContainText("3/3");

    // Navigate to previous version - click the first svg icon's parent (left chevron)
    await switcherSpan
      .locator("..")
      .locator("svg")
      .first()
      .locator("..")
      .click();

    // Check we're now at "2 / 3"
    switcherSpan = page.getByTestId("MessageSwitcher/container").first();
    await expect(switcherSpan).toBeVisible({ timeout: 5000 });
    await expect(switcherSpan).toContainText("2/3");

    // Navigate to first version - re-find the button each time
    await switcherSpan
      .locator("..")
      .locator("svg")
      .first()
      .locator("..")
      .click();

    // Check we're now at "1 / 3"
    switcherSpan = page.getByTestId("MessageSwitcher/container").first();
    await expect(switcherSpan).toBeVisible({ timeout: 5000 });
    await expect(switcherSpan).toContainText("1/3");

    // Navigate forward using next button - click the last svg icon's parent (right chevron)
    await switcherSpan
      .locator("..")
      .locator("svg")
      .last()
      .locator("..")
      .click();

    // Check we're back at "2 / 3"
    switcherSpan = page.getByTestId("MessageSwitcher/container").first();
    await expect(switcherSpan).toBeVisible({ timeout: 5000 });
    await expect(switcherSpan).toContainText("2/3");
  });

  test("Message regeneration with model selection", async ({ page }) => {
    // make sure we're using something other than GPT-4o Mini, otherwise the below
    // will fail since we need to switch to a different model for the test
    await switchModel(page, "GPT-4.1");

    // Send initial message
    await sendMessage(page, "hi! Respond with no more than a sentence");

    // Capture the original AI response text (just the message content, not buttons/switcher)
    const aiMessage = page.locator('[data-testid="onyx-ai-message"]').first();
    // Target the actual message content div (the one with select-text class)
    const messageContent = aiMessage.locator(".select-text").first();
    const originalResponseText = await messageContent.textContent();

    // Hover over AI message to show regenerate button
    await aiMessage.hover();

    // Click regenerate button using its data-testid
    const regenerateButton = aiMessage.getByTestId("AgentMessage/regenerate");
    await regenerateButton.click();

    // Wait for dropdown to appear and select GPT-4o Mini
    await page.waitForSelector('[role="dialog"]', { state: "visible" });

    // Look for the GPT-4o Mini option in the dropdown
    const gpt4oMiniOption = page
      .locator('[role="dialog"]')
      .getByText("GPT-4o Mini", { exact: true })
      .first();
    await gpt4oMiniOption.click();

    // Wait for regeneration to complete by waiting for feedback buttons to appear
    // The feedback buttons (copy, like, dislike, regenerate) appear when streaming is complete
    await page.waitForSelector('[data-testid="AgentMessage/regenerate"]', {
      state: "visible",
      timeout: 15000,
    });

    // Verify version switcher appears showing "2 / 2"
    const messageSwitcher = page
      .getByTestId("MessageSwitcher/container")
      .first();
    await expect(messageSwitcher).toBeVisible({ timeout: 5000 });
    await expect(messageSwitcher).toContainText("2/2");

    // Navigate to previous version
    await messageSwitcher
      .locator("..")
      .locator("svg")
      .first()
      .locator("..")
      .click();

    // Verify we're at "1 / 2"
    let switcherSpan = page.getByTestId("MessageSwitcher/container").first();
    await expect(switcherSpan).toBeVisible({ timeout: 5000 });
    await expect(switcherSpan).toContainText("1/2");

    // Verify we're back to the original response
    const firstVersionText = await messageContent.textContent();
    expect(firstVersionText).toBe(originalResponseText);

    // Navigate back to regenerated version
    await switcherSpan
      .locator("..")
      .locator("svg")
      .last()
      .locator("..")
      .click();

    // Verify we're back at "2 / 2"
    switcherSpan = page.getByTestId("MessageSwitcher/container").first();
    await expect(switcherSpan).toBeVisible({ timeout: 5000 });
    await expect(switcherSpan).toContainText("2/2");
  });

  test("Message editing with files", async ({ page }) => {
    const testFileName = `test-edit-${Date.now()}.txt`;
    const testFileContent = "This is a test file for editing with attachments.";
    const buffer = Buffer.from(testFileContent, "utf-8");

    // Trigger the native file dialog by clicking the hidden file input,
    // then intercept it with the filechooser event (same pattern as
    // user_file_attachment.spec.ts).
    const fileInput = page.locator('input[type="file"]').first();
    const fileChooserPromise = page.waitForEvent("filechooser");
    await fileInput.dispatchEvent("click");
    const fileChooser = await fileChooserPromise;

    const uploadResponsePromise = page.waitForResponse(
      (response) =>
        response.url().includes("/api/user/projects/file/upload") &&
        response.request().method() === "POST"
    );

    await fileChooser.setFiles({
      name: testFileName,
      mimeType: "text/plain",
      buffer: buffer,
    });

    const uploadResponse = await uploadResponsePromise;
    expect(uploadResponse.ok()).toBeTruthy();

    // Wait for upload processing to complete and file card to render
    await page.waitForLoadState("networkidle", { timeout: 10000 });
    await expect(page.getByText(testFileName).first()).toBeVisible({
      timeout: 10000,
    });

    // Send a message with the file attached using the shared utility
    await sendMessage(page, "Summarize this file");

    // Verify the file is displayed in the sent human message
    const humanMessage = page.locator("#onyx-human-message").first();

    // Verify message text is displayed
    const messageContent = await humanMessage.textContent();
    expect(messageContent).toContain("Summarize this file");

    // Hover and click the edit button
    await humanMessage.hover();
    const editButton = humanMessage
      .locator('[data-testid="HumanMessage/edit-button"]')
      .first();
    await expect(editButton).toBeVisible();
    await editButton.click();

    // Edit the message text
    const textarea = humanMessage.locator("textarea");
    await textarea.fill("What does this file contain?");

    // Submit the edit
    const submitButton = humanMessage.locator('button:has-text("Submit")');
    await submitButton.click();

    // Wait for the new AI response to complete
    await page.waitForSelector('[data-testid="AgentMessage/copy-button"]', {
      state: "detached",
    });
    await page.waitForSelector('[data-testid="AgentMessage/copy-button"]', {
      state: "visible",
      timeout: 30000,
    });

    // Verify the edited message text is displayed
    const editedHumanMessage = page.locator("#onyx-human-message").first();
    const editedMessageContent = await editedHumanMessage.textContent();
    expect(editedMessageContent).toContain("What does this file contain?");
    expect(editedMessageContent).not.toContain("Summarize this file");

    // Verify the file is still attached after editing
    const editedFileDisplay = editedHumanMessage.locator("#onyx-file");
    await expect(editedFileDisplay).toBeVisible();
    await expect(editedFileDisplay.getByText(testFileName)).toBeVisible();

    // Verify the version switcher shows 2/2 (original + edited)
    const messageSwitcher = page
      .getByTestId("MessageSwitcher/container")
      .first();
    await expect(messageSwitcher).toBeVisible();
    await expect(messageSwitcher).toContainText("2/2");
  });
});


================================================
FILE: web/tests/e2e/chat/message_feedback.spec.ts
================================================
import { test, expect } from "@playwright/test";
import { loginAsRandomUser } from "@tests/e2e/utils/auth";
import { sendMessage } from "@tests/e2e/utils/chatActions";

test.describe("Message feedback thumbs controls", () => {
  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAsRandomUser(page);

    await page.goto("/app");
    await page.waitForLoadState("networkidle");
  });

  test("allows submitting and clearing thumbs up/down feedback", async ({
    page,
  }) => {
    const createFeedbackRequests: {
      is_positive: boolean;
      chat_message_id: number;
      feedback_text?: string;
      predefined_feedback?: string;
    }[] = [];
    const removeFeedbackRequests: {
      url: string;
      query: Record<string, string>;
    }[] = [];

    await page.route(
      "**/api/chat/create-chat-message-feedback",
      async (route) => {
        const body = JSON.parse(route.request().postData() ?? "{}");
        createFeedbackRequests.push(body);
        await route.fulfill({
          status: 200,
          contentType: "application/json",
          body: "{}",
        });
      }
    );

    await page.route(
      "**/api/chat/remove-chat-message-feedback?*",
      async (route) => {
        const url = new URL(route.request().url());
        removeFeedbackRequests.push({
          url: route.request().url(),
          query: Object.fromEntries(url.searchParams.entries()),
        });
        await route.fulfill({
          status: 200,
          contentType: "application/json",
          body: "{}",
        });
      }
    );

    await sendMessage(page, "Share a short fun fact.");

    const aiMessage = page.getByTestId("onyx-ai-message").last();
    const likeButton = aiMessage.getByTestId("AgentMessage/like-button");
    const dislikeButton = aiMessage.getByTestId("AgentMessage/dislike-button");

    await expect(likeButton).toBeVisible({ timeout: 15000 });
    await expect(dislikeButton).toBeVisible();

    // Thumbs up opens the feedback modal with optional feedback
    await likeButton.click();
    const modalTitle = page.getByText("Feedback").first();
    await expect(modalTitle).toBeVisible({ timeout: 5000 });

    // Submit without entering feedback (optional for thumbs up)
    const submitButton = page.getByRole("button", { name: "Submit" });
    await expect(submitButton).toBeEnabled({ timeout: 2000 });

    await Promise.all([
      page.waitForRequest("**/api/chat/create-chat-message-feedback"),
      submitButton.click(),
    ]);

    expect(createFeedbackRequests).toHaveLength(1);
    const likedRequest = createFeedbackRequests[0];
    expect(likedRequest?.is_positive).toBe(true);
    expect(likedRequest?.chat_message_id).toBeTruthy();
    expect(likedRequest?.feedback_text).toBeFalsy();

    await expect(modalTitle).toBeHidden({ timeout: 5000 });

    // Clicking thumbs up again removes the feedback
    await Promise.all([
      page.waitForRequest("**/api/chat/remove-chat-message-feedback?*"),
      likeButton.click(),
    ]);
    expect(removeFeedbackRequests).toHaveLength(1);
    expect(removeFeedbackRequests[0]?.query.chat_message_id).toBe(
      String(likedRequest?.chat_message_id)
    );

    // Thumbs down opens the feedback modal with mandatory feedback
    await dislikeButton.click();
    await expect(modalTitle).toBeVisible({ timeout: 5000 });

    // Verify submit button is disabled without feedback
    const submitButtonDislike = page.getByRole("button", { name: "Submit" });
    await expect(submitButtonDislike).toBeDisabled();

    // Enter feedback (mandatory for thumbs down)
    const feedbackInput = page.getByPlaceholder(
      /What did you .* about this response\?/i
    );
    await feedbackInput.fill("Response missed some details.");

    // Submit button should now be enabled
    await expect(submitButtonDislike).toBeEnabled();

    await Promise.all([
      page.waitForRequest("**/api/chat/create-chat-message-feedback"),
      submitButtonDislike.click(),
    ]);

    expect(createFeedbackRequests).toHaveLength(2);
    const dislikedRequest = createFeedbackRequests[1];
    expect(dislikedRequest?.is_positive).toBe(false);
    expect(dislikedRequest?.feedback_text).toContain("missed some details");
    expect(dislikedRequest?.chat_message_id).toBe(
      likedRequest?.chat_message_id
    );

    await expect(modalTitle).toBeHidden({ timeout: 5000 });
  });
});


================================================
FILE: web/tests/e2e/chat/project_files_visual_regression.spec.ts
================================================
import { expect, test, type Locator, type Page } from "@playwright/test";
import { loginAsWorkerUser } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import { expectElementScreenshot } from "@tests/e2e/utils/visualRegression";

const PROJECT_NAME = "E2E-PROJECT-FILES-VISUAL";
const ATTACHMENT_ITEM_TITLE_TEST_ID = "attachment-item-title";
const ATTACHMENT_ITEM_ICON_WRAPPER_TEST_ID = "attachment-item-icon-wrapper";
const LONG_FILE_NAME =
  "CSE_202_Final_Project_Solution_Regression_Check_Long_Name.txt";
const FILE_CONTENT = "Visual regression test content for long filename cards.";

let projectId: number | null = null;

type Geometry = {
  elementLeft: number;
  elementRight: number;
  elementTop: number;
  elementBottom: number;
  cardLeft: number;
  cardRight: number;
  cardTop: number;
  cardBottom: number;
};

function getFilesSection(page: Page): Locator {
  return page
    .locator("div")
    .filter({ has: page.getByRole("button", { name: "Add Files" }) })
    .filter({ hasText: "Chats in this project can access these files." })
    .first();
}

async function uploadFileToProject(
  page: Page,
  targetProjectId: number,
  fileName: string,
  content: string
): Promise<void> {
  const response = await page.request.post("/api/user/projects/file/upload", {
    multipart: {
      project_id: String(targetProjectId),
      files: {
        name: fileName,
        mimeType: "text/plain",
        buffer: Buffer.from(content, "utf-8"),
      },
    },
  });

  expect(response.ok()).toBeTruthy();
}

async function getElementGeometryInCard(
  element: Locator
): Promise<Geometry | null> {
  return element.evaluate((targetEl) => {
    let cardEl: HTMLElement | null = targetEl.parentElement;

    while (cardEl) {
      const style = window.getComputedStyle(cardEl);
      const hasBorder =
        parseFloat(style.borderTopWidth) > 0 ||
        parseFloat(style.borderLeftWidth) > 0;
      const hasRadius = parseFloat(style.borderTopLeftRadius) > 0;

      if (hasBorder && hasRadius) {
        break;
      }
      cardEl = cardEl.parentElement;
    }

    if (!cardEl) {
      return null;
    }

    const elementRect = targetEl.getBoundingClientRect();
    const cardRect = cardEl.getBoundingClientRect();

    return {
      elementLeft: elementRect.left,
      elementRight: elementRect.right,
      elementTop: elementRect.top,
      elementBottom: elementRect.bottom,
      cardLeft: cardRect.left,
      cardRight: cardRect.right,
      cardTop: cardRect.top,
      cardBottom: cardRect.bottom,
    };
  });
}

function expectGeometryWithinCard(geometry: Geometry | null): void {
  expect(geometry).not.toBeNull();
  expect(geometry!.elementLeft).toBeGreaterThanOrEqual(geometry!.cardLeft - 1);
  expect(geometry!.elementRight).toBeLessThanOrEqual(geometry!.cardRight + 1);
  expect(geometry!.elementTop).toBeGreaterThanOrEqual(geometry!.cardTop - 1);
  expect(geometry!.elementBottom).toBeLessThanOrEqual(geometry!.cardBottom + 1);
}

test.describe("Project Files visual regression", () => {
  test.beforeAll(async ({ browser }, workerInfo) => {
    const context = await browser.newContext();
    const page = await context.newPage();

    await loginAsWorkerUser(page, workerInfo.workerIndex);
    const client = new OnyxApiClient(page.request);

    projectId = await client.createProject(PROJECT_NAME);
    await uploadFileToProject(page, projectId, LONG_FILE_NAME, FILE_CONTENT);

    await context.close();
  });

  test.afterAll(async ({ browser }, workerInfo) => {
    if (!projectId) {
      return;
    }

    const context = await browser.newContext();
    const page = await context.newPage();

    await loginAsWorkerUser(page, workerInfo.workerIndex);
    const client = new OnyxApiClient(page.request);
    await client.deleteProject(projectId);

    await context.close();
  });

  test.beforeEach(async ({ page }, workerInfo) => {
    if (projectId === null) {
      throw new Error(
        "Project setup failed in beforeAll; cannot run visual regression test"
      );
    }

    await page.context().clearCookies();
    await loginAsWorkerUser(page, workerInfo.workerIndex);
    await page.goto(`/app?projectId=${projectId}`);
    await page.waitForLoadState("networkidle");
    await expect(
      page.getByText("Chats in this project can access these files.")
    ).toBeVisible();
  });

  test("long underscore filename stays visually contained in file card", async ({
    page,
  }) => {
    const filesSection = getFilesSection(page);
    await expect(filesSection).toBeVisible();

    const fileTitle = filesSection
      .locator(`[data-testid="${ATTACHMENT_ITEM_TITLE_TEST_ID}"]`)
      .filter({ hasText: LONG_FILE_NAME })
      .first();
    await expect(fileTitle).toBeVisible();

    // Wait for deterministic post-processing state before geometry checks/screenshot.
    await expect(fileTitle).not.toContainText("Processing...", {
      timeout: 30_000,
    });
    await expect(fileTitle).not.toContainText("Uploading...", {
      timeout: 30_000,
    });
    await expect(fileTitle).toContainText("TXT", { timeout: 30_000 });

    const iconWrapper = filesSection
      .locator(`[data-testid="${ATTACHMENT_ITEM_ICON_WRAPPER_TEST_ID}"]`)
      .first();
    await expect(iconWrapper).toBeVisible();

    const container = page.locator("[data-main-container]");
    await expect(container).toBeVisible();
    await expectElementScreenshot(container, {
      name: "project-files-long-underscore-filename",
    });

    const iconGeometry = await getElementGeometryInCard(iconWrapper);
    const titleGeometry = await getElementGeometryInCard(fileTitle);
    expectGeometryWithinCard(iconGeometry);
    expectGeometryWithinCard(titleGeometry);
  });
});


================================================
FILE: web/tests/e2e/chat/scroll_behavior.spec.ts
================================================
import { test, expect } from "@playwright/test";
import type { Page } from "@playwright/test";
import { loginAsRandomUser } from "@tests/e2e/utils/auth";
import { sendMessage, startNewChat } from "@tests/e2e/utils/chatActions";

/**
 * Helper to toggle auto-scroll setting via the settings panel
 */
async function setAutoScroll(page: Page, enabled: boolean) {
  // Open user dropdown menu (same pattern as other tests)
  await page.locator("#onyx-user-dropdown").click();
  await page.getByText("User Settings").first().click();
  // Wait for dialog to appear
  await page.waitForSelector('[role="dialog"]', { state: "visible" });

  // Navigate to Chat Preferences tab
  await page
    .locator('a[href="/app/settings/chat-preferences"]')
    .click({ force: true });

  // Find the auto-scroll switch by locating the label text and then finding
  // the switch within the same container
  const autoScrollSwitch = page
    .locator("label")
    .filter({ hasText: "Chat Auto-scroll" })
    .locator('button[role="switch"]');

  await autoScrollSwitch.waitFor({ state: "visible" });

  const isCurrentlyChecked =
    (await autoScrollSwitch.getAttribute("aria-checked")) === "true";

  if (isCurrentlyChecked !== enabled) {
    await autoScrollSwitch.click();
    // Wait for the switch state to update
    const expectedState = enabled ? "true" : "false";
    await expect(autoScrollSwitch).toHaveAttribute(
      "aria-checked",
      expectedState
    );
  }

  await page.locator('a[href="/app"]').click({ force: true });
}

/**
 * Helper to get the scroll container element
 */
function getScrollContainer(page: Page) {
  // The scroll container is the div with overflow-y-auto inside ChatUI
  return page.locator(".overflow-y-auto").first();
}

test.describe("Chat Scroll Behavior", () => {
  // Configure this suite to run serially to resepect auto-scroll settings
  test.describe.configure({ mode: "serial" });

  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAsRandomUser(page);
    await page.goto("/app");
    const nameInput = page.getByPlaceholder("Your name");
    await nameInput.waitFor();
    await nameInput.fill("Playwright Tester");
    await page.getByText("Save").click();
    await Promise.all([
      // Wait for sidebar navigation to be visible to indicate page is loaded
      page.getByText("Agents").first().waitFor(),
      page.getByText("Projects").first().waitFor(),
    ]);
  });

  // TODO(Nik): https://linear.app/onyx-app/issue/ENG-3422/playwright-tests-for-scroll-behavior
  test.skip("Opening existing conversation positions correctly", async ({
    page,
  }) => {
    // Turn off auto-scroll
    await setAutoScroll(page, false);

    // Create a conversation with multiple messages
    await sendMessage(
      page,
      "Message 1: Creating some content to enable scrolling"
    );
    await sendMessage(page, "Message 2: More content for the scroll test");

    // Reload page to simulate opening an existing conversation
    await page.reload();
    await Promise.all([
      // Wait for sidebar navigation to be visible to indicate page is loaded
      page.getByText("Agents").first().waitFor(),
      page.getByText("Projects").first().waitFor(),
    ]);

    // Wait for scroll positioning to complete (content becomes visible)
    await page
      .locator('[data-scroll-ready="true"]')
      .waitFor({ timeout: 30000 });

    // Wait for the user messages to be visible
    const lastUserMessage = page.locator("#onyx-human-message").last();
    await lastUserMessage.waitFor({ state: "visible", timeout: 30000 });

    // Verify the last user message is positioned near the top of the viewport
    const isPositionedCorrectly = await lastUserMessage.evaluate(
      (el: HTMLElement) => {
        const scrollContainer = el.closest(".overflow-y-auto");
        if (!scrollContainer) return false;

        const containerRect = scrollContainer.getBoundingClientRect();
        const elementRect = el.getBoundingClientRect();

        // Check if element is near the top of the container (within 100px)
        return elementRect.top - containerRect.top < 100;
      }
    );

    expect(isPositionedCorrectly).toBe(true);
  });

  test("Auto-scroll ON: scrolls to bottom on new message", async ({ page }) => {
    // Ensure auto-scroll is ON (default)
    await setAutoScroll(page, true);

    // Send a message
    await sendMessage(page, "Hello, this is a test message");

    // Send another message to create some content
    await sendMessage(page, "Another message to test scrolling behavior");

    // The scroll container should be scrolled to bottom
    const scrollContainer = getScrollContainer(page);
    const isAtBottom = await scrollContainer.evaluate((el: HTMLElement) => {
      return Math.abs(el.scrollHeight - el.scrollTop - el.clientHeight) < 10;
    });

    expect(isAtBottom).toBe(true);
  });
});

/**
 * Tests for the Dynamic Bottom Spacer feature.
 *
 * The DynamicBottomSpacer creates a "fresh chat" effect where new messages
 * appear at the top of the viewport (below the header), giving each exchange
 * a clean slate appearance while preserving scroll-up access to history.
 */
test.describe("Dynamic Bottom Spacer - Fresh Chat Effect", () => {
  test.describe.configure({ mode: "serial" });

  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAsRandomUser(page);
    await page.goto("/app");
    const nameInput = page.getByPlaceholder("Your name");
    await nameInput.waitFor();
    await nameInput.fill("Playwright Tester");
    await page.getByText("Save").click();
    await Promise.all([
      page.getByText("Agents").first().waitFor(),
      page.getByText("Projects").first().waitFor(),
    ]);
  });

  /**
   * Helper to get the position of an element relative to scroll container
   */
  async function getElementPositionInContainer(
    page: Page,
    elementLocator: ReturnType<Page["locator"]>
  ) {
    return elementLocator.evaluate((el: HTMLElement) => {
      const scrollContainer = el.closest(".overflow-y-auto");
      if (!scrollContainer) return null;

      const containerRect = scrollContainer.getBoundingClientRect();
      const elementRect = el.getBoundingClientRect();

      return {
        topOffset: elementRect.top - containerRect.top,
        containerHeight: containerRect.height,
        elementTop: elementRect.top,
        containerTop: containerRect.top,
      };
    });
  }

  test("Follow-up message appears near top of viewport (fresh chat effect)", async ({
    page,
  }) => {
    // First, create some conversation history
    await sendMessage(
      page,
      "This is the first message to establish conversation history"
    );

    // Send a follow-up message - this should trigger the fresh chat effect
    await sendMessage(
      page,
      "This follow-up message should appear near the top of the viewport"
    );

    // Get the last user message (the follow-up)
    const lastUserMessage = page.locator("#onyx-human-message").last();
    await lastUserMessage.waitFor({ state: "visible" });

    // Check that the follow-up message is positioned near the top of the container
    // (within ~150px to account for sticky header and some padding)
    await expect
      .poll(
        async () => {
          const position = await getElementPositionInContainer(
            page,
            lastUserMessage
          );
          return position?.topOffset ?? Number.POSITIVE_INFINITY;
        },
        { timeout: 5000 }
      )
      .toBeLessThan(150);
  });

  test("Dynamic spacer element exists and has correct attributes", async ({
    page,
  }) => {
    // Send a message to start a conversation
    await sendMessage(page, "Test message to initialize chat");

    // Send a follow-up to trigger the spacer
    await sendMessage(page, "Follow-up message");

    // Verify the dynamic spacer element exists with correct attributes
    const spacer = page.locator('[data-dynamic-spacer="true"]');
    await expect(spacer).toBeVisible({ timeout: 10000 });
    await expect(spacer).toHaveAttribute("aria-hidden", "true");
  });

  test("User can scroll up to see previous messages after fresh chat effect", async ({
    page,
  }) => {
    // Create conversation history
    await sendMessage(page, "First message in the conversation");
    await sendMessage(page, "Second message in the conversation");

    // Send a follow-up (triggers fresh chat effect)
    await sendMessage(page, "Third message - should be at top");

    // Now scroll up to verify previous messages are accessible
    const scrollContainer = getScrollContainer(page);
    await scrollContainer.evaluate((el: HTMLElement) => {
      el.scrollTo({ top: 0, behavior: "instant" });
    });

    // Wait for scroll to complete
    await expect
      .poll(() => scrollContainer.evaluate((el: HTMLElement) => el.scrollTop), {
        timeout: 5000,
      })
      .toBeLessThanOrEqual(1);

    // Verify the first message is now visible
    const firstUserMessage = page.locator("#onyx-human-message").first();
    await expect(firstUserMessage).toBeVisible();

    // Verify the first message content
    await expect(firstUserMessage).toContainText("First message");
  });

  test("Scroll container remains at bottom after AI response completes", async ({
    page,
  }) => {
    // Send a message
    await sendMessage(page, "Please respond with a short message");

    // After AI response completes, verify we're still at the bottom
    const scrollContainer = getScrollContainer(page);
    const isAtBottom = await scrollContainer.evaluate((el: HTMLElement) => {
      // Allow a small tolerance (10px) for rounding
      return Math.abs(el.scrollHeight - el.scrollTop - el.clientHeight) < 10;
    });

    expect(isAtBottom).toBe(true);
  });
});


================================================
FILE: web/tests/e2e/chat/share_chat.spec.ts
================================================
import { test, expect } from "@playwright/test";
import type { Page } from "@playwright/test";
import { loginAsRandomUser } from "../utils/auth";
import { expectElementScreenshot } from "../utils/visualRegression";

async function sendMessageAndWaitForChat(page: Page, message: string) {
  await page.locator("#onyx-chat-input-textarea").click();
  await page.locator("#onyx-chat-input-textarea").fill(message);
  await page.locator("#onyx-chat-input-send-button").click();

  await page.waitForFunction(
    () => window.location.href.includes("chatId="),
    null,
    { timeout: 15000 }
  );

  await expect(page.locator('[aria-label="share-chat-button"]')).toBeVisible({
    timeout: 10000,
  });
}

async function openShareModal(page: Page) {
  await page.locator('[aria-label="share-chat-button"]').click();
  await expect(page.getByRole("dialog")).toBeVisible({ timeout: 5000 });
}

test.describe("Share Chat Session Modal", () => {
  test.describe.configure({ mode: "serial" });

  let page: Page;

  test.beforeAll(async ({ browser }) => {
    page = await browser.newPage();
    await loginAsRandomUser(page);
    await sendMessageAndWaitForChat(page, "Hello for share test");
  });

  test.afterAll(async () => {
    await page.close();
  });

  test("shows Private selected by default", async () => {
    await openShareModal(page);

    const dialog = page.getByRole("dialog");
    await expect(dialog).toBeVisible();

    const privateOption = dialog.locator(
      '[aria-label="share-modal-option-private"]'
    );
    await expect(privateOption.locator("svg").last()).toBeVisible();

    const submitButton = dialog.locator('[aria-label="share-modal-submit"]');
    await expect(submitButton).toHaveText("Done");

    const cancelButton = dialog.locator('[aria-label="share-modal-cancel"]');
    await expect(cancelButton).toBeVisible();

    await expectElementScreenshot(dialog, {
      name: "share-modal-default-private",
    });

    await page.keyboard.press("Escape");
    await expect(dialog).toBeHidden({ timeout: 5000 });
  });

  test("selecting Your Organization changes submit text", async () => {
    await openShareModal(page);

    const dialog = page.getByRole("dialog");

    await dialog.locator('[aria-label="share-modal-option-public"]').click();

    const submitButton = dialog.locator('[aria-label="share-modal-submit"]');
    await expect(submitButton).toHaveText("Create Share Link");

    const cancelButton = dialog.locator('[aria-label="share-modal-cancel"]');
    await expect(cancelButton).toBeVisible();

    await expectElementScreenshot(dialog, {
      name: "share-modal-public-selected",
    });

    await page.keyboard.press("Escape");
    await expect(dialog).toBeHidden({ timeout: 5000 });
  });

  test("Cancel closes modal without API calls", async () => {
    let patchCallCount = 0;
    await page.route("**/api/chat/chat-session/*", async (route) => {
      if (route.request().method() === "PATCH") {
        patchCallCount++;
      }
      await route.continue();
    });

    await openShareModal(page);

    const dialog = page.getByRole("dialog");
    const cancelButton = dialog.locator('[aria-label="share-modal-cancel"]');
    await cancelButton.click();

    await expect(dialog).toBeHidden({ timeout: 5000 });
    expect(patchCallCount).toBe(0);

    await page.unrouteAll({ behavior: "ignoreErrors" });
  });

  test("X button closes modal without API calls", async () => {
    let patchCallCount = 0;
    await page.route("**/api/chat/chat-session/*", async (route) => {
      if (route.request().method() === "PATCH") {
        patchCallCount++;
      }
      await route.continue();
    });

    await openShareModal(page);

    const dialog = page.getByRole("dialog");
    const closeButton = dialog.locator('div[tabindex="-1"] button');
    await closeButton.click();

    await expect(dialog).toBeHidden({ timeout: 5000 });
    expect(patchCallCount).toBe(0);

    await page.unrouteAll({ behavior: "ignoreErrors" });
  });

  test("creating a share link calls API and shows link", async () => {
    await openShareModal(page);

    const dialog = page.getByRole("dialog");

    let patchBody: Record<string, unknown> | null = null;
    await page.route("**/api/chat/chat-session/*", async (route) => {
      if (route.request().method() === "PATCH") {
        patchBody = JSON.parse(route.request().postData() ?? "{}");
        await route.continue();
      } else {
        await route.continue();
      }
    });

    await dialog.locator('[aria-label="share-modal-option-public"]').click();
    const submitButton = dialog.locator('[aria-label="share-modal-submit"]');
    await submitButton.click();

    await page.waitForResponse(
      (r) =>
        r.url().includes("/api/chat/chat-session/") &&
        r.request().method() === "PATCH",
      { timeout: 10000 }
    );

    expect(patchBody).toEqual({ sharing_status: "public" });

    const linkInput = dialog.locator('[aria-label="share-modal-link-input"]');
    await expect(linkInput).toHaveValue(/\/app\/shared\//, { timeout: 5000 });

    await expect(submitButton).toHaveText("Copy Link");
    await expect(dialog.getByText("Chat shared")).toBeVisible();
    await expect(
      dialog.locator('[aria-label="share-modal-cancel"]')
    ).toBeHidden();

    await expectElementScreenshot(dialog, {
      name: "share-modal-link-created",
      mask: ['[aria-label="share-modal-link-input"]'],
    });

    await page.unrouteAll({ behavior: "ignoreErrors" });

    // Wait for the toast to confirm SWR data has been refreshed
    // before closing, so the next test sees up-to-date shared_status
    await expect(
      page.getByText("Share link copied to clipboard!").first()
    ).toBeVisible({ timeout: 5000 });

    await page.keyboard.press("Escape");
    await expect(dialog).toBeHidden({ timeout: 5000 });
  });

  test("Copy Link triggers clipboard copy", async () => {
    await openShareModal(page);

    const dialog = page.getByRole("dialog");

    await expect(
      dialog.locator('[aria-label="share-modal-link-input"]')
    ).toBeVisible({ timeout: 5000 });

    const submitButton = dialog.locator('[aria-label="share-modal-submit"]');
    await expect(submitButton).toHaveText("Copy Link");

    await submitButton.click();

    await expect(
      page.getByText("Share link copied to clipboard!").first()
    ).toBeVisible({ timeout: 5000 });

    await page.keyboard.press("Escape");
    await expect(dialog).toBeHidden({ timeout: 5000 });
  });

  test("making chat private again calls API and closes modal", async () => {
    let patchBody: Record<string, unknown> | null = null;
    await page.route("**/api/chat/chat-session/*", async (route) => {
      if (route.request().method() === "PATCH") {
        patchBody = JSON.parse(route.request().postData() ?? "{}");
        await route.continue();
      } else {
        await route.continue();
      }
    });

    await openShareModal(page);

    const dialog = page.getByRole("dialog");
    const submitButton = dialog.locator('[aria-label="share-modal-submit"]');

    await dialog.locator('[aria-label="share-modal-option-private"]').click();

    await expect(submitButton).toHaveText("Make Private");

    await submitButton.click();

    await page.waitForResponse(
      (r) =>
        r.url().includes("/api/chat/chat-session/") &&
        r.request().method() === "PATCH",
      { timeout: 10000 }
    );

    expect(patchBody).toEqual({ sharing_status: "private" });

    await expect(dialog).toBeHidden({ timeout: 5000 });

    await expect(page.getByText("Chat is now private")).toBeVisible({
      timeout: 5000,
    });

    await page.unrouteAll({ behavior: "ignoreErrors" });
  });
});


================================================
FILE: web/tests/e2e/chat/welcome_page.spec.ts
================================================
import { test, expect } from "@playwright/test";
import {
  expectScreenshot,
  expectElementScreenshot,
} from "@tests/e2e/utils/visualRegression";
import { GREETING_MESSAGES } from "@/lib/chat/greetingMessages";
import { loginAs } from "@tests/e2e/utils/auth";

test.describe.configure({ mode: "parallel" });

const THEMES = ["light", "dark"] as const;

for (const theme of THEMES) {
  test.describe(`Welcome page — /app (${theme} mode)`, () => {
    test.beforeEach(async ({ page }) => {
      // Always log in before each test to ensure a valid session.
      await loginAs(page, "admin");

      // Inject theme into localStorage so next-themes picks it up immediately.
      await page.addInitScript((t: string) => {
        localStorage.setItem("theme", t);
      }, theme);

      await page.goto("/app");
      await page.waitForLoadState("networkidle");
    });

    // ── Full-page screenshot ──────────────────────────────────────────

    test("full page visual snapshot", async ({ page }) => {
      // Wait for the welcome greeting to ensure the page has fully rendered
      await page
        .getByTestId("chat-intro")
        .waitFor({ state: "visible", timeout: 10000 });

      await expectScreenshot(page, {
        name: `welcome-${theme}-full-page`,
        hide: ['[data-testid="onyx-logo"]'], // greeting text is random, hide to prevent size variation
      });
    });

    // ── Input bar element screenshot ──────────────────────────────────

    test("input bar element snapshot", async ({ page }) => {
      const inputBar = page.locator("#onyx-chat-input");
      await inputBar.waitFor({ state: "visible", timeout: 10000 });

      await expectElementScreenshot(inputBar, {
        name: `welcome-${theme}-input-bar`,
      });
    });

    // ── Sidebar element screenshot ────────────────────────────────────

    test("sidebar element snapshot", async ({ page }) => {
      // SidebarWrapper renders a div with `group/SidebarWrapper` Tailwind
      // group class — this is the most stable identifier for the sidebar
      // container element.
      const sidebar = page.locator(".group\\/SidebarWrapper");
      await sidebar.waitFor({ state: "visible", timeout: 10000 });

      await expectElementScreenshot(sidebar, {
        name: `welcome-${theme}-sidebar`,
      });
    });

    // ── Content assertions ────────────────────────────────────────────

    test("displays greeting from default agent", async ({ page }) => {
      const greetingContainer = page.getByTestId("onyx-logo");
      await greetingContainer.waitFor({ state: "visible", timeout: 10000 });

      const text = await greetingContainer.textContent();
      expect(GREETING_MESSAGES).toContain(text?.trim());
    });

    test("chat input is visible and focusable", async ({ page }) => {
      const textarea = page.locator("#onyx-chat-input-textarea");
      await expect(textarea).toBeVisible({ timeout: 10000 });

      await textarea.click();
      await expect(textarea).toBeFocused();
    });

    test("new session button is visible in the sidebar", async ({ page }) => {
      const newSessionBtn = page.getByTestId("AppSidebar/new-session");
      await expect(newSessionBtn).toBeVisible({ timeout: 10000 });
    });

    test.skip("send button is visible in the input bar", async ({ page }) => {
      const sendButton = page.locator("#onyx-chat-input-send-button");
      await expect(sendButton).toBeVisible({ timeout: 10000 });

      await expectElementScreenshot(sendButton, {
        name: `welcome-${theme}-send-button`,
      });
    });
  });
}


================================================
FILE: web/tests/e2e/connectors/federated_slack.spec.ts
================================================
import { test, expect } from "@playwright/test";
import type { Page } from "@playwright/test";
import { loginAs, loginAsRandomUser } from "@tests/e2e/utils/auth";

test.use({ storageState: "admin_auth.json" });

const SLACK_CLIENT_ID = process.env.SLACK_CLIENT_ID;
const SLACK_CLIENT_SECRET = process.env.SLACK_CLIENT_SECRET;

async function createFederatedSlackConnector(page: Page) {
  // Navigate to add connector page
  await page.goto("/admin/add-connector");
  await page.waitForLoadState("networkidle");

  // Click on Slack connector tile (specifically the one with "Logo Slack" text, not "Slack Bots")
  await page.getByRole("link", { name: "Logo Slack" }).first().click();
  await page.waitForLoadState("networkidle");

  if (!SLACK_CLIENT_ID || !SLACK_CLIENT_SECRET) {
    throw new Error("SLACK_CLIENT_ID and SLACK_CLIENT_SECRET must be set");
  }

  // Fill in the client ID and client secret
  await page.getByLabel(/client id/i).fill(SLACK_CLIENT_ID);
  await page.getByLabel(/client secret/i).fill(SLACK_CLIENT_SECRET);

  // Submit the form to create or update the federated connector
  const createOrUpdateButton = await page.getByRole("button", {
    name: /create|update/i,
  });
  await createOrUpdateButton.click();

  // Wait for success message or redirect
  await page.waitForTimeout(2000);
}

async function navigateToUserSettings(page: Page) {
  // Wait for any existing modals to close
  await page.waitForTimeout(1000);

  // Wait for potential modal backdrop to disappear
  await page
    .waitForSelector(".fixed.inset-0.bg-neutral-950\\/50", {
      state: "detached",
      timeout: 5000,
    })
    .catch(() => {});

  // Click on user dropdown/settings button
  await page.locator("#onyx-user-dropdown").click();

  // Click on settings option
  await page.getByText("User Settings").click();

  // Wait for settings modal to appear
  await expect(page.locator("h2", { hasText: "User Settings" })).toBeVisible();
}

async function openConnectorsTab(page: Page) {
  // Click on the Connectors tab in user settings
  await page.getByRole("button", { name: "Connectors" }).click();

  // Wait for connectors section to be visible
  // Allow multiple instances of "Connected Services" to be visible
  const connectedServicesLocators = page.getByText("Connected Services");
  await expect(connectedServicesLocators.first()).toBeVisible();
}

/**
 * Cleanup function to delete the federated Slack connector from the admin panel
 * This ensures test isolation by removing any test data created during the test
 */
async function deleteFederatedSlackConnector(page: Page) {
  // Navigate to admin indexing status page
  await page.goto("/admin/indexing/status");
  await page.waitForLoadState("networkidle");

  // Expand the Slack section first (summary row toggles open on click)
  const slackSummaryRow = page.locator("tr").filter({
    has: page.locator("text=/^\\s*Slack\\s*$/i"),
  });
  if ((await slackSummaryRow.count()) > 0) {
    await slackSummaryRow.first().click();
    // Wait a moment for rows to render
    await page.waitForTimeout(500);
  }

  // Look for the Slack federated connector row inside the expanded section
  // The federated connectors have a "Federated Access" badge
  const slackRow = page.locator("tr", { hasText: /federated access/i });

  // Check if the connector exists
  const rowCount = await slackRow.count();
  if (rowCount === 0) {
    // No federated Slack connector found, nothing to delete
    console.log("No federated Slack connector found to delete");
    return;
  }

  // Click on the row to navigate to the detail page
  await slackRow.first().click();
  await page.waitForLoadState("networkidle");

  // Look for and click the delete button
  // Open the Manage menu and click Delete
  const manageButton = page.getByRole("button", { name: /manage/i });
  await manageButton
    .waitFor({ state: "visible", timeout: 5000 })
    .catch(() => {});
  if (!(await manageButton.isVisible().catch(() => false))) {
    console.log("Manage button not visible; skipping delete");
    return;
  }
  await manageButton.click();
  // Wait for the dropdown menu to appear and settle (Radix animation)
  await page
    .getByRole("menu")
    .waitFor({ state: "visible", timeout: 3000 })
    .catch(() => {});
  await page.waitForTimeout(150);

  page.once("dialog", (dialog) => dialog.accept());
  const deleteMenuItem = page.getByRole("menuitem", { name: /^Delete$/ });
  await expect(deleteMenuItem).toBeVisible({ timeout: 5000 });
  await deleteMenuItem.click({ force: true });
  // Wait for deletion to complete and redirect
  await page.waitForURL("**/admin/indexing/status*", { timeout: 15000 });
  await page.waitForLoadState("networkidle");
}

// Causes other tests to fail for some reason???
// TODO (chris): fix this test
test.skip("Federated Slack Connector - Create, OAuth Modal, and User Settings Flow", async ({
  page,
}) => {
  try {
    // Setup: Clear cookies and log in as admin
    await page.context().clearCookies();
    await loginAs(page, "admin");

    // Create a federated Slack connector in admin panel
    await createFederatedSlackConnector(page);

    // Log in as a random user
    await page.context().clearCookies();
    await loginAsRandomUser(page);

    // Navigate back to main page and verify OAuth modal appears
    await page.goto("/app");
    await page.waitForLoadState("networkidle");

    // Check if the OAuth modal appears
    await expect(
      page.getByText(/improve answer quality by letting/i)
    ).toBeVisible({ timeout: 10000 });
    await expect(page.getByText(/slack/i)).toBeVisible();

    // Decline the OAuth connection
    await page.getByRole("button", { name: "Skip for now" }).click();

    // Wait for modal to disappear
    await expect(
      page.getByText(/improve answer quality by letting/i)
    ).not.toBeVisible();

    // Go to user settings and verify the connector appears
    await navigateToUserSettings(page);
    await openConnectorsTab(page);

    // Verify Slack connector appears in the federated connectors section
    await expect(page.getByText("Federated Connectors")).toBeVisible();
    await expect(page.getByText("Slack")).toBeVisible();
    await expect(page.getByText("Not connected")).toBeVisible();

    // Verify there's a Connect button available
    await expect(
      page.locator("button", { hasText: /^Connect$/ })
    ).toBeVisible();
  } finally {
    // Cleanup: Delete the federated Slack connector
    // Log back in as admin to delete the connector
    await page.context().clearCookies();
    await loginAs(page, "admin");
    await deleteFederatedSlackConnector(page);
  }
});


================================================
FILE: web/tests/e2e/connectors/inlineFileManagement.spec.ts
================================================
import { test, expect, Page } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

/** Upload a file through the inline manager, retrying on transient failures. */
async function uploadTestFile(
  page: Page,
  fileName: string,
  content: string,
  maxRetries: number = 3
): Promise<void> {
  const buffer = Buffer.from(content, "utf-8");

  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      const addFilesButton = page.getByRole("button", { name: /add files/i });
      await expect(addFilesButton).toBeVisible({ timeout: 5000 });
      await expect(addFilesButton).toBeEnabled({ timeout: 5000 });

      const fileChooserPromise = page.waitForEvent("filechooser", {
        timeout: 5000,
      });
      await addFilesButton.click();
      const fileChooser = await fileChooserPromise;
      await fileChooser.setFiles({
        name: fileName,
        mimeType: "text/plain",
        buffer: buffer,
      });
      await expect(page.getByText(fileName)).toBeVisible({ timeout: 5000 });
      return;
    } catch (error) {
      if (attempt === maxRetries) {
        throw error;
      }
      await page.waitForTimeout(1000);
    }
  }
}

test.describe("InlineFileManagement", () => {
  test.describe.configure({ retries: 2 });

  let testCcPairId: number | null = null;

  test.beforeEach(async ({ page }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");

    const apiClient = new OnyxApiClient(page.request);
    testCcPairId = await apiClient.createFileConnector(
      `Test File Connector ${Date.now()}`
    );
  });

  test.afterEach(async ({ page }) => {
    const apiClient = new OnyxApiClient(page.request);

    if (testCcPairId !== null) {
      try {
        await apiClient.deleteCCPair(testCcPairId);
        testCcPairId = null;
      } catch (error) {
        console.warn(
          `Failed to delete test connector ${testCcPairId}: ${error}`
        );
      }
    }
  });

  test("should display files section on connector page", async ({ page }) => {
    await page.goto(`/admin/connector/${testCcPairId}`);
    await page.waitForLoadState("networkidle");

    await expect(page.getByText(/Files \(/)).toBeVisible({ timeout: 10000 });
    await expect(page.getByRole("button", { name: /edit/i })).toBeVisible();
  });

  test("should enter and exit edit mode", async ({ page }) => {
    await page.goto(`/admin/connector/${testCcPairId}`);
    await page.waitForLoadState("networkidle");

    await page.getByRole("button", { name: /edit/i }).click();
    await expect(page.getByRole("button", { name: /cancel/i })).toBeVisible();
    await expect(
      page.getByRole("button", { name: /save changes/i })
    ).toBeVisible();
    await expect(
      page.getByRole("button", { name: /add files/i })
    ).toBeVisible();
    await page.getByRole("button", { name: /cancel/i }).click();
    await expect(page.getByRole("button", { name: /edit/i })).toBeVisible();
  });

  test("should add files and show them as pending", async ({ page }) => {
    await page.goto(`/admin/connector/${testCcPairId}`);
    await page.waitForLoadState("networkidle");

    await page.getByRole("button", { name: /edit/i }).click();
    await page.waitForTimeout(500);
    await uploadTestFile(
      page,
      "test-document.txt",
      "This is a test document content"
    );
    await expect(page.getByText("New")).toBeVisible();
    const saveButton = page.getByRole("button", { name: /save changes/i });
    await expect(saveButton).toBeEnabled();
  });

  test("should remove pending file before saving", async ({ page }) => {
    await page.goto(`/admin/connector/${testCcPairId}`);
    await page.waitForLoadState("networkidle");

    await page.getByRole("button", { name: /edit/i }).click();
    await page.waitForTimeout(500);
    await uploadTestFile(
      page,
      "file-to-remove.txt",
      "This file will be removed"
    );
    const newFileRow = page.locator("tr", { hasText: "file-to-remove.txt" });
    await newFileRow.locator('button[title="Remove file"]').click();
    await expect(page.getByText("file-to-remove.txt")).not.toBeVisible();
  });

  test("should show confirmation modal when saving", async ({ page }) => {
    await page.goto(`/admin/connector/${testCcPairId}`);
    await page.waitForLoadState("networkidle");

    await page.getByRole("button", { name: /edit/i }).click();
    await page.waitForTimeout(500);
    await uploadTestFile(
      page,
      "confirm-test.txt",
      "Test content for confirmation modal"
    );
    await page.getByRole("button", { name: /save changes/i }).click();
    const modalDialog = page.getByRole("dialog", {
      name: /confirm file changes/i,
    });
    await expect(modalDialog).toBeVisible({ timeout: 5000 });
    await expect(
      modalDialog.getByText(/1 file\(s\) will be added/)
    ).toBeVisible();
    await expect(
      modalDialog.getByRole("button", { name: /confirm & save/i })
    ).toBeVisible();
    await page.keyboard.press("Escape");
    await expect(modalDialog).not.toBeVisible();
    await expect(
      page.getByRole("button", { name: /save changes/i })
    ).toBeVisible();
  });

  test("should cancel edit mode and discard changes", async ({ page }) => {
    await page.goto(`/admin/connector/${testCcPairId}`);
    await page.waitForLoadState("networkidle");

    await page.getByRole("button", { name: /edit/i }).click();
    await page.waitForTimeout(500);
    await uploadTestFile(
      page,
      "discard-test.txt",
      "This file should be discarded"
    );
    await page.getByRole("button", { name: /cancel/i }).click();
    await expect(page.getByRole("button", { name: /edit/i })).toBeVisible();
    await expect(page.getByText("discard-test.txt")).not.toBeVisible();
  });
});


================================================
FILE: web/tests/e2e/constants.ts
================================================
export const TEST_ADMIN_CREDENTIALS = {
  email: "admin_user@example.com",
  password: "TestPassword123!",
};

export const TEST_ADMIN2_CREDENTIALS = {
  email: "admin2_user@example.com",
  password: "TestPassword123!",
};

/**
 * Number of distinct worker users provisioned during global setup.
 * Must be >= the max concurrent workers in playwright.config.ts.
 * Playwright's workerIndex can exceed this (retries spawn new workers
 * with incrementing indices), so callers should use modulo:
 *   workerIndex % WORKER_USER_POOL_SIZE
 */
export const WORKER_USER_POOL_SIZE = 8;

export function workerUserCredentials(workerIndex: number): {
  email: string;
  password: string;
} {
  return {
    email: `worker${workerIndex}@example.com`,
    password: "WorkerPassword123!",
  };
}


================================================
FILE: web/tests/e2e/fixtures/eeFeatures.ts
================================================
/**
 * Playwright fixture that detects EE (Enterprise Edition) license state.
 *
 * Usage:
 * ```ts
 * import { test, expect } from "@tests/e2e/fixtures/eeFeatures";
 *
 * test("my EE-gated test", async ({ page, eeEnabled }) => {
 *   test.skip(!eeEnabled, "Requires active Enterprise license");
 *   // ... rest of test
 * });
 * ```
 *
 * The fixture:
 * - Authenticates as admin
 * - Fetches /api/settings to check ee_features_enabled
 * - Provides a boolean to the test BEFORE any navigation happens
 *
 * This lets tests call test.skip() synchronously at the top, which is the
 * correct Playwright pattern — never navigate then decide to skip.
 */

import { test as base, expect } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";

export const test = base.extend<{
  /** Whether EE features are enabled (valid enterprise license). */
  eeEnabled: boolean;
}>({
  eeEnabled: async ({ page }, use) => {
    await loginAs(page, "admin");
    const res = await page.request.get("/api/settings");
    if (!res.ok()) {
      // Fail open — if we can't determine, assume EE is not enabled
      await use(false);
      return;
    }
    const settings = await res.json();
    await use(settings.ee_features_enabled === true);
  },
});

export { expect };


================================================
FILE: web/tests/e2e/fixtures/llmProvider.ts
================================================
/**
 * Playwright fixture that ensures a public LLM provider is available.
 *
 * Usage:
 * ```ts
 * // Import from this file instead of @playwright/test
 * import { test, expect } from "@tests/e2e/fixtures/llmProvider";
 *
 * test("my test that needs an LLM provider", async ({ page, llmProviderId }) => {
 *   // llmProviderId is the ID of the provider that was created (or null if
 *   // one already existed). The fixture handles cleanup automatically.
 * });
 * ```
 *
 * The fixture:
 * - Authenticates as admin
 * - Creates a public LLM provider if none exists
 * - Provides the created provider ID to the test
 * - Cleans up the provider after all tests in the file complete
 */

import { test as base, expect } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

export const test = base.extend<{
  /**
   * The ID of the public LLM provider created by this fixture, or `null`
   * if a public provider already existed.
   */
  llmProviderId: number | null;
}>({
  llmProviderId: async ({ page }, use) => {
    // Authenticate as admin to be able to create/list providers
    await page.context().clearCookies();
    await loginAs(page, "admin");

    const client = new OnyxApiClient(page.request);
    const createdId = await client.ensurePublicProvider();
    await use(createdId);

    // Cleanup: only delete if we created one
    if (createdId !== null) {
      // Re-authenticate in case the test changed the session
      await page.context().clearCookies();
      await loginAs(page, "admin");
      await client.deleteProvider(createdId);
    }
  },
});

export { expect };


================================================
FILE: web/tests/e2e/global-setup.ts
================================================
import { FullConfig, request } from "@playwright/test";
import {
  TEST_ADMIN_CREDENTIALS,
  TEST_ADMIN2_CREDENTIALS,
  WORKER_USER_POOL_SIZE,
  workerUserCredentials,
} from "@tests/e2e/constants";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";

const PREFLIGHT_TIMEOUT_MS = 60_000;
const PREFLIGHT_POLL_INTERVAL_MS = 2_000;
const PREFLIGHT_WARN_AFTER_MS = 15_000;

/**
 * Poll the health endpoint until the server is ready or we time out.
 * Fails fast with a clear error so developers don't see cryptic browser errors.
 */
async function waitForServer(baseURL: string): Promise<void> {
  const healthURL = baseURL;
  const deadline = Date.now() + PREFLIGHT_TIMEOUT_MS;
  const startTime = Date.now();
  let warned = false;

  console.log(`[global-setup] Waiting for server at ${healthURL} ...`);

  while (Date.now() < deadline) {
    try {
      const res = await fetch(healthURL);
      if (res.ok) {
        console.log("[global-setup] Server is ready.");
        return;
      }
      console.log(`[global-setup] Server returned ${res.status}, retrying ...`);
    } catch {
      // Connection refused / DNS error — server not up yet.
    }

    if (!warned && Date.now() - startTime >= PREFLIGHT_WARN_AFTER_MS) {
      warned = true;
      console.warn(
        `[global-setup] ⚠ Still waiting for server after ${
          PREFLIGHT_WARN_AFTER_MS / 1000
        }s.\n` +
          `  Please verify that both the backend and frontend are running.\n` +
          `  You can start them with: ods compose dev`
      );
    }

    await new Promise((r) => setTimeout(r, PREFLIGHT_POLL_INTERVAL_MS));
  }

  throw new Error(
    `Onyx is not running at ${baseURL}. ` +
      `Timed out after ${
        PREFLIGHT_TIMEOUT_MS / 1000
      }s waiting for ${healthURL} to return 200. ` +
      `Make sure the backend and frontend are running (e.g. \`ods compose dev\`).`
  );
}

/**
 * Register a user via the backend API. Idempotent — silently succeeds if the
 * user already exists (HTTP 400 with "REGISTER_USER_ALREADY_EXISTS").
 */
async function ensureUserExists(
  apiBase: string,
  email: string,
  password: string
): Promise<void> {
  const ctx = await request.newContext({ baseURL: apiBase });
  try {
    const res = await ctx.post("/api/auth/register", {
      data: { email, username: email, password },
    });

    if (res.ok()) {
      console.log(`[global-setup] Registered user ${email}`);
    } else {
      const body = await res.text();
      // "REGISTER_USER_ALREADY_EXISTS" is the standard FastAPI-Users error code
      if (
        res.status() === 400 &&
        body.includes("REGISTER_USER_ALREADY_EXISTS")
      ) {
        console.log(`[global-setup] User ${email} already exists, skipping.`);
      } else {
        console.warn(
          `[global-setup] Unexpected response registering ${email}: ${res.status()} ${body}`
        );
      }
    }
  } finally {
    await ctx.dispose();
  }
}

/**
 * Log in via the API and save the resulting cookies as a Playwright storage
 * state file.  No browser is needed — this uses Playwright's lightweight
 * request context, which is much faster and produces no console noise.
 */
async function apiLoginAndSaveState(
  baseURL: string,
  email: string,
  password: string,
  storageStatePath: string
): Promise<void> {
  const ctx = await request.newContext({ baseURL });
  try {
    const res = await ctx.post("/api/auth/login", {
      form: { username: email, password },
    });
    if (!res.ok()) {
      const body = await res.text();
      throw new Error(
        `[global-setup] Login failed for ${email}: ${res.status()} ${body}`
      );
    }
    await ctx.storageState({ path: storageStatePath });
  } finally {
    await ctx.dispose();
  }
}

/**
 * Promote a user to admin via the manage API.
 * Requires an authenticated context (admin storage state).
 */
async function promoteToAdmin(
  baseURL: string,
  adminStorageState: string,
  email: string
): Promise<void> {
  const ctx = await request.newContext({
    baseURL,
    storageState: adminStorageState,
  });
  try {
    const res = await ctx.patch("/api/manage/set-user-role", {
      data: {
        user_email: email,
        new_role: "admin",
      },
    });
    if (res.ok()) {
      console.log(`[global-setup] Promoted ${email} to admin`);
    } else if (res.status() === 403) {
      throw new Error(
        `[global-setup] Cannot promote ${email} — the primary admin account ` +
          `(${TEST_ADMIN_CREDENTIALS.email}) does not have the admin role.\n\n` +
          `This usually happens when running tests against a non-fresh database ` +
          `where another user was registered first.\n\n` +
          `To fix this, either:\n` +
          `  1. Promote the user manually: ${baseURL}/admin/users\n` +
          `  2. Reset to a seeded database: ods db restore --fetch-seeded\n`
      );
    } else {
      const body = await res.text();
      console.warn(
        `[global-setup] Failed to promote ${email}: ${res.status()} ${body}`
      );
    }
  } finally {
    await ctx.dispose();
  }
}

async function globalSetup(config: FullConfig) {
  // Get baseURL from config, fallback to localhost:3000
  const baseURL = config.projects[0]?.use?.baseURL || "http://localhost:3000";

  // ── Preflight check ──────────────────────────────────────────────────
  await waitForServer(baseURL);

  // ── Provision test users via API ─────────────────────────────────────
  // The first user registered becomes the admin automatically.
  // Order matters: admin first, then admin2, then worker users.
  await ensureUserExists(
    baseURL,
    TEST_ADMIN_CREDENTIALS.email,
    TEST_ADMIN_CREDENTIALS.password
  );
  await ensureUserExists(
    baseURL,
    TEST_ADMIN2_CREDENTIALS.email,
    TEST_ADMIN2_CREDENTIALS.password
  );

  for (let i = 0; i < WORKER_USER_POOL_SIZE; i++) {
    const { email, password } = workerUserCredentials(i);
    await ensureUserExists(baseURL, email, password);
  }

  // ── Login via API and save storage state ───────────────────────────
  await apiLoginAndSaveState(
    baseURL,
    TEST_ADMIN_CREDENTIALS.email,
    TEST_ADMIN_CREDENTIALS.password,
    "admin_auth.json"
  );

  // Promote admin2 now that we have an admin session
  await promoteToAdmin(
    baseURL,
    "admin_auth.json",
    TEST_ADMIN2_CREDENTIALS.email
  );

  await apiLoginAndSaveState(
    baseURL,
    TEST_ADMIN2_CREDENTIALS.email,
    TEST_ADMIN2_CREDENTIALS.password,
    "admin2_auth.json"
  );

  for (let i = 0; i < WORKER_USER_POOL_SIZE; i++) {
    const { email, password } = workerUserCredentials(i);
    const storageStatePath = `worker${i}_auth.json`;
    await apiLoginAndSaveState(baseURL, email, password, storageStatePath);

    const workerCtx = await request.newContext({
      baseURL,
      storageState: storageStatePath,
    });
    try {
      const res = await workerCtx.patch("/api/user/personalization", {
        data: { name: "worker" },
      });
      if (!res.ok()) {
        console.warn(
          `[global-setup] Failed to set display name for ${email}: ${res.status()}`
        );
      }
    } finally {
      await workerCtx.dispose();
    }
  }

  // ── Ensure a public LLM provider exists ───────────────────────────
  // Many tests depend on a default LLM being configured (file uploads,
  // assistant creation, etc.).  Re-use the admin session we just saved.
  const adminCtx = await request.newContext({
    baseURL,
    storageState: "admin_auth.json",
  });
  try {
    const client = new OnyxApiClient(adminCtx, baseURL);
    await client.ensurePublicProvider();
  } finally {
    await adminCtx.dispose();
  }
}

export default globalSetup;


================================================
FILE: web/tests/e2e/mcp/default-agent-mcp.spec.ts
================================================
import { test, expect } from "@playwright/test";
import type { Page } from "@playwright/test";
import { loginAs, apiLogin } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import {
  startMcpApiKeyServer,
  McpServerProcess,
} from "@tests/e2e/utils/mcpServer";
import {
  getPacketObjectsByType,
  sendMessageAndCaptureStreamPackets,
} from "@tests/e2e/utils/chatStream";

const API_KEY = process.env.MCP_API_KEY || "test-api-key-12345";
const DEFAULT_PORT = Number(process.env.MCP_API_KEY_TEST_PORT || "8005");
const MCP_API_KEY_TEST_URL = process.env.MCP_API_KEY_TEST_URL;
const MCP_ASSERTED_TOOL_NAME = "tool_0";

async function scrollToBottom(page: Page): Promise<void> {
  try {
    await page.evaluate(() => {
      window.scrollTo(0, document.body.scrollHeight);
    });
    await page.waitForTimeout(200);
  } catch {
    // ignore scrolling failures
  }
}

async function ensureOnboardingComplete(page: Page): Promise<void> {
  await page.evaluate(async () => {
    try {
      await fetch("/api/user/personalization", {
        method: "PATCH",
        headers: { "Content-Type": "application/json" },
        credentials: "include",
        body: JSON.stringify({ name: "Playwright User" }),
      });
    } catch {
      // ignore personalization failures
    }
  });

  await page.reload();
  await page.waitForLoadState("networkidle");
}

const getToolName = (packetObject: Record<string, unknown>): string | null => {
  const value = packetObject.tool_name;
  return typeof value === "string" ? value : null;
};

function getToolPacketCounts(
  packets: Record<string, unknown>[],
  toolName: string
): { start: number; delta: number; debug: number } {
  const start = getPacketObjectsByType(packets, "custom_tool_start").filter(
    (packetObject) => getToolName(packetObject) === toolName
  ).length;
  const delta = getPacketObjectsByType(packets, "custom_tool_delta").filter(
    (packetObject) => getToolName(packetObject) === toolName
  ).length;
  const debug = getPacketObjectsByType(packets, "tool_call_debug").filter(
    (packetObject) => getToolName(packetObject) === toolName
  ).length;

  return { start, delta, debug };
}

async function fetchMcpToolIdByName(
  page: Page,
  serverId: number,
  toolName: string
): Promise<number> {
  const response = await page.request.get(
    `/api/admin/mcp/server/${serverId}/db-tools`
  );
  expect(response.ok()).toBeTruthy();
  const data = (await response.json()) as {
    tools?: Array<{ id: number; name: string }>;
  };
  const matchedTool = data.tools?.find((tool) => tool.name === toolName);
  expect(matchedTool?.id).toBeTruthy();
  return matchedTool!.id;
}

test.describe("Default Agent MCP Integration", () => {
  test.describe.configure({ mode: "serial" });

  let serverProcess: McpServerProcess | null = null;
  let serverId: number | null = null;
  let serverName: string;
  let serverUrl: string;
  let basicUserEmail: string;
  let basicUserPassword: string;
  let createdProviderId: number | null = null;
  let assertedToolId: number | null = null;

  test.beforeAll(async ({ browser }) => {
    // Use dockerized server if URL is provided, otherwise start local server
    if (MCP_API_KEY_TEST_URL) {
      serverUrl = MCP_API_KEY_TEST_URL;
      console.log(
        `[test-setup] Using dockerized MCP API key server at ${serverUrl}`
      );
    } else {
      // Start the MCP API key server locally
      serverProcess = await startMcpApiKeyServer({
        port: DEFAULT_PORT,
        apiKey: API_KEY,
      });
      serverUrl = `http://${serverProcess.address.host}:${serverProcess.address.port}/mcp`;
      console.log(
        `[test-setup] MCP API key server started locally at ${serverUrl}`
      );
    }

    serverName = `PW API Key Server ${Date.now()}`;

    // Setup as admin
    const adminContext = await browser.newContext({
      storageState: "admin_auth.json",
    });
    const adminPage = await adminContext.newPage();
    const adminClient = new OnyxApiClient(adminPage.request);

    // Ensure a public LLM provider exists
    createdProviderId = await adminClient.ensurePublicProvider();

    // Clean up any existing servers with the same URL
    try {
      const existingServers = await adminClient.listMcpServers();
      for (const server of existingServers) {
        if (server.server_url === serverUrl) {
          await adminClient.deleteMcpServer(server.id);
        }
      }
    } catch (error) {
      console.warn("Failed to cleanup existing MCP servers", error);
    }

    // Create a basic user for testing
    basicUserEmail = `pw-basic-user-${Date.now()}@example.com`;
    basicUserPassword = "BasicUserPass123!";
    await adminClient.registerUser(basicUserEmail, basicUserPassword);

    await adminContext.close();
  });

  test.afterAll(async ({ browser }) => {
    const adminContext = await browser.newContext({
      storageState: "admin_auth.json",
    });
    const adminPage = await adminContext.newPage();
    const adminClient = new OnyxApiClient(adminPage.request);

    if (createdProviderId !== null) {
      await adminClient.deleteProvider(createdProviderId);
    }

    if (serverId) {
      await adminClient.deleteMcpServer(serverId);
    }

    await adminContext.close();

    // Only stop the server if we started it locally
    if (serverProcess) {
      await serverProcess.stop();
    }
  });

  test("Admin configures API key MCP server and adds tools to default agent", async ({
    page,
  }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");

    console.log(`[test] Starting with server name: ${serverName}`);

    // Navigate to MCP actions page
    await page.goto("/admin/actions/mcp");
    await page.waitForURL("**/admin/actions/mcp**");
    console.log(`[test] Navigated to MCP actions page`);

    // Click "Add MCP Server" button to open modal
    await page.getByRole("button", { name: /Add MCP Server/i }).click();
    await page.waitForTimeout(500); // Wait for modal to appear
    console.log(`[test] Opened Add MCP Server modal`);

    // Fill basic server info in AddMCPServerModal
    await page.locator("input#name").fill(serverName);
    await page.locator("textarea#description").fill("Test API key MCP server");
    await page.locator("input#server_url").fill(serverUrl);
    console.log(`[test] Filled basic server details`);

    // Submit the modal to create server
    const createServerResponsePromise = page.waitForResponse((resp) => {
      try {
        const url = new URL(resp.url());
        return (
          url.pathname === "/api/admin/mcp/server" &&
          resp.request().method() === "POST" &&
          resp.ok()
        );
      } catch {
        return false;
      }
    });
    await page.getByRole("button", { name: "Add Server" }).click();
    const createServerResponse = await createServerResponsePromise;
    const createdServer = (await createServerResponse.json()) as {
      id?: number;
    };
    expect(createdServer.id).toBeTruthy();
    serverId = Number(createdServer.id);
    expect(serverId).toBeGreaterThan(0);
    console.log(`[test] Created MCP server with id: ${serverId}`);
    await page.waitForTimeout(1000); // Wait for modal to close and auth modal to open
    console.log(`[test] Created MCP server, auth modal should open`);

    // MCPAuthenticationModal should now be open - configure API Key authentication
    await page.waitForTimeout(500); // Ensure modal is fully rendered

    // Select API Key as authentication method
    const authMethodSelect = page.getByTestId("mcp-auth-method-select");
    await authMethodSelect.click();
    await page.getByRole("option", { name: "API Key" }).click();
    console.log(`[test] Selected API Key authentication method`);

    await page.waitForTimeout(500); // Wait for tabs to appear

    // The modal now shows tabs - select "Shared Key (Admin)" tab
    const adminTab = page.getByRole("tab", { name: /Shared Key.*Admin/i });
    await expect(adminTab).toBeVisible({ timeout: 5000 });
    await adminTab.click();
    await page.waitForTimeout(300);
    console.log(`[test] Selected Shared Key (Admin) tab`);

    // Wait for API token field to appear and fill it
    const apiTokenInput = page.locator('input[name="api_token"]');
    await expect(apiTokenInput).toBeVisible({ timeout: 10000 });
    await apiTokenInput.click(); // Focus the field first
    await apiTokenInput.fill(API_KEY);
    console.log(`[test] Filled API key`);

    // Click Connect button to submit authentication
    const connectButton = page.getByTestId("mcp-auth-connect-button");
    await expect(connectButton).toBeVisible({ timeout: 5000 });
    await connectButton.click();
    console.log(`[test] Clicked Connect button`);

    // Wait for the tools to be fetched
    await page.waitForTimeout(1000);
    console.log(`[test] Tools fetched successfully`);

    // Verify server card is visible
    await expect(
      page.getByText(serverName, { exact: false }).first()
    ).toBeVisible({ timeout: 20000 });
    console.log(`[test] Verified server card is visible`);

    // Click the refresh button to fetch/refresh tools
    const refreshButton = page.getByRole("button", { name: "Refresh tools" });
    await expect(refreshButton).toBeVisible({ timeout: 5000 });
    await refreshButton.click();
    console.log(`[test] Clicked refresh tools button`);

    // Wait for tools to load - "No tools available" should disappear
    await expect(page.getByText("No tools available")).not.toBeVisible({
      timeout: 15000,
    });
    console.log(`[test] Tools loaded successfully`);

    assertedToolId = await fetchMcpToolIdByName(
      page,
      serverId,
      MCP_ASSERTED_TOOL_NAME
    );
    console.log(
      `[test] Resolved ${MCP_ASSERTED_TOOL_NAME} to tool ID ${assertedToolId}`
    );

    // Disable multiple tools (tool_0, tool_1, tool_2, tool_3)
    const toolIds = ["tool_11", "tool_12", "tool_13", "tool_14"];
    let disabledToolsCount = 0;

    for (const toolId of toolIds) {
      const toolToggle = page.getByLabel(`tool-toggle-${toolId}`).first();

      // Check if the tool exists
      const isVisible = await toolToggle
        .isVisible({ timeout: 2000 })
        .catch(() => false);

      if (!isVisible) {
        console.log(`[test] Tool ${toolId} not found, skipping`);
        continue;
      }

      console.log(`[test] Found tool: ${toolId}`);

      // Disable if currently enabled (tools are enabled by default)
      const state = await toolToggle.getAttribute("aria-checked");
      if (state === "true") {
        await toolToggle.click();
        await expect(toolToggle).toHaveAttribute("aria-checked", "false", {
          timeout: 5000,
        });
        disabledToolsCount++;
        console.log(`[test] Disabled tool: ${toolId}`);
      } else {
        console.log(`[test] Tool ${toolId} already disabled`);
      }
    }

    console.log(
      `[test] Successfully disabled ${disabledToolsCount} tools via UI`
    );
  });

  test("Admin adds MCP tools to default agent via chat preferences page", async ({
    page,
  }) => {
    test.skip(!serverId, "MCP server must be created first");

    await page.context().clearCookies();
    await loginAs(page, "admin");
    console.log(`[test] Logged in as admin for chat preferences config`);

    // Navigate to chat preferences page
    await page.goto("/admin/configuration/chat-preferences");
    await page.waitForURL("**/admin/configuration/chat-preferences**");
    console.log(`[test] Navigated to chat preferences page`);

    // Wait for page to load
    await expect(page.locator('[aria-label="admin-page-title"]')).toBeVisible({
      timeout: 10000,
    });
    console.log(`[test] Page loaded`);

    // Scroll to the Actions & Tools section (open by default)
    await scrollToBottom(page);

    // Find the MCP server card by name text
    // The server name appears inside a label within the ActionsLayouts.Header
    const serverLabel = page
      .locator("label")
      .filter({ has: page.getByText(serverName, { exact: true }) });
    await expect(serverLabel.first()).toBeVisible({ timeout: 10000 });
    console.log(`[test] MCP server card found for server: ${serverName}`);

    // Scroll server card into view
    await serverLabel.first().scrollIntoViewIfNeeded();

    // The server-level Switch in the header toggles ALL tools
    const serverSwitch = serverLabel
      .first()
      .locator('button[role="switch"]')
      .first();
    await expect(serverSwitch).toBeVisible({ timeout: 5000 });

    // Enable all tools by toggling the server switch ON
    const serverState = await serverSwitch.getAttribute("aria-checked");
    if (serverState !== "true") {
      await serverSwitch.click();
      // Auto-save triggers immediately
      await expect(page.getByText("Tools updated").first()).toBeVisible({
        timeout: 10000,
      });
    }
    console.log(`[test] MCP tools successfully added to default agent`);
  });

  test("Basic user can see and toggle MCP tools in default agent", async ({
    page,
  }) => {
    test.skip(!serverId, "MCP server must be configured first");
    test.skip(!basicUserEmail, "Basic user must be created first");

    await page.context().clearCookies();
    await apiLogin(page, basicUserEmail, basicUserPassword);
    console.log(`[test] Logged in as basic user: ${basicUserEmail}`);

    // Navigate to chat (which uses default agent for new users)
    await page.goto("/app");
    await page.waitForURL("**/app**");
    await ensureOnboardingComplete(page);
    console.log(`[test] Navigated to chat page`);

    // Open actions popover
    const actionsButton = page.getByTestId("action-management-toggle");
    await expect(actionsButton).toBeVisible({ timeout: 10000 });
    await actionsButton.click();
    console.log(`[test] Opened actions popover`);

    // Wait for popover to open
    const popover = page.locator('[data-testid="tool-options"]');
    await expect(popover).toBeVisible({ timeout: 5000 });

    // Find the MCP server in the list
    const serverLineItem = popover
      .locator(".group\\/LineItem")
      .filter({ hasText: serverName });
    await expect(serverLineItem).toBeVisible({ timeout: 10000 });
    console.log(`[test] Found MCP server: ${serverName}`);

    // Click to open the server's tool list
    await serverLineItem.click();
    await page.waitForTimeout(500);
    console.log(`[test] Clicked on MCP server to view tools`);

    // Verify we're in the tool list view (should have Enable/Disable All)
    await expect(
      popover.getByText(/(Enable|Disable) All/i).first()
    ).toBeVisible({ timeout: 5000 });
    console.log(`[test] Tool list view loaded`);

    // Find a specific tool (tool_0)
    const toolLineItem = popover
      .locator(".group\\/LineItem")
      .filter({ hasText: /^tool_0/ })
      .first();
    await expect(toolLineItem).toBeVisible({ timeout: 5000 });
    console.log(`[test] Found tool: tool_0`);

    // Find the toggle switch for the tool
    const toolToggle = toolLineItem.locator('[role="switch"]');
    await expect(toolToggle).toBeVisible({ timeout: 5000 });
    console.log(`[test] Tool toggle is visible`);

    // Get initial state and toggle
    const initialState = await toolToggle.getAttribute("aria-checked");
    console.log(`[test] Initial toggle state: ${initialState}`);
    await toolToggle.click();
    await page.waitForTimeout(300);

    // Wait for state to change
    const expectedState = initialState === "true" ? "false" : "true";
    await expect(toolToggle).toHaveAttribute("aria-checked", expectedState, {
      timeout: 5000,
    });
    console.log(`[test] Toggle state changed to: ${expectedState}`);

    // Toggle back
    await toolToggle.click();
    await page.waitForTimeout(300);
    await expect(toolToggle).toHaveAttribute("aria-checked", initialState!, {
      timeout: 5000,
    });
    console.log(`[test] Toggled back to original state: ${initialState}`);

    // Test "Disable All" functionality
    const disableAllButton = popover.getByText(/Disable All/i).first();
    const hasDisableAll = await disableAllButton.isVisible();
    console.log(`[test] Disable All button visible: ${hasDisableAll}`);

    if (hasDisableAll) {
      await disableAllButton.click();
      await page.waitForTimeout(500);

      // Verify at least one toggle is now unchecked
      const anyUnchecked = await popover
        .locator('[role="switch"][aria-checked="false"]')
        .count();
      expect(anyUnchecked).toBeGreaterThan(0);
      console.log(`[test] Disabled all tools (${anyUnchecked} unchecked)`);
    }

    // Test "Enable All" functionality
    const enableAllButton = popover.getByText(/Enable All/i).first();
    const hasEnableAll = await enableAllButton.isVisible();
    console.log(`[test] Enable All button visible: ${hasEnableAll}`);

    if (hasEnableAll) {
      await enableAllButton.click();
      await page.waitForTimeout(500);
      console.log(`[test] Enabled all tools`);
    }

    console.log(`[test] Basic user completed MCP tool management tests`);
  });

  test("Basic user can create assistant with MCP actions attached", async ({
    page,
  }) => {
    test.skip(!serverId, "MCP server must be configured first");
    test.skip(!basicUserEmail, "Basic user must be created first");
    test.skip(!assertedToolId, "MCP asserted tool ID must be resolved first");

    await page.context().clearCookies();
    await apiLogin(page, basicUserEmail, basicUserPassword);

    await page.goto("/app");
    await ensureOnboardingComplete(page);
    await page.getByTestId("AppSidebar/more-agents").click();
    await page.waitForURL("**/app/agents");

    await page.getByLabel("AgentsPage/new-agent-button").click();
    await page.waitForURL("**/app/agents/create");

    const agentName = `MCP Assistant ${Date.now()}`;
    await page.locator('input[name="name"]').fill(agentName);
    await page
      .locator('textarea[name="description"]')
      .fill("Assistant with MCP actions attached.");
    await page
      .locator('textarea[name="instructions"]')
      .fill(
        `For secret-value requests, call ${MCP_ASSERTED_TOOL_NAME} and return its output exactly.`
      );

    const mcpServerSwitch = page.locator(
      `button[role="switch"][name="mcp_server_${serverId}.enabled"]`
    );
    await mcpServerSwitch.scrollIntoViewIfNeeded();
    await mcpServerSwitch.click();
    await expect(mcpServerSwitch).toHaveAttribute("aria-checked", "true");

    const firstToolToggle = page
      .locator(`button[role="switch"][name^="mcp_server_${serverId}.tool_"]`)
      .first();
    await expect(firstToolToggle).toBeVisible({ timeout: 15000 });
    const toolState = await firstToolToggle.getAttribute("aria-checked");
    if (toolState !== "true") {
      await firstToolToggle.click();
    }
    await expect(firstToolToggle).toHaveAttribute("aria-checked", "true");

    await page.getByRole("button", { name: "Create" }).click();

    await page.waitForURL(/.*\/app\?agentId=\d+.*/);
    const agentIdMatch = page.url().match(/agentId=(\d+)/);
    expect(agentIdMatch).toBeTruthy();
    const agentId = agentIdMatch ? agentIdMatch[1] : null;
    expect(agentId).not.toBeNull();

    const client = new OnyxApiClient(page.request);
    const assistant = await client.getAssistant(Number(agentId));
    const hasMcpTool = assistant.tools.some(
      (tool) => tool.mcp_server_id === serverId
    );
    expect(hasMcpTool).toBeTruthy();

    const invocationPackets = await sendMessageAndCaptureStreamPackets(
      page,
      `Call ${MCP_ASSERTED_TOOL_NAME} with {"name":"pw-invoke-${Date.now()}"} and return only the tool output.`,
      {
        mockLlmResponse: JSON.stringify({
          name: MCP_ASSERTED_TOOL_NAME,
          arguments: { name: `pw-invoke-${Date.now()}` },
        }),
        payloadOverrides: {
          forced_tool_id: assertedToolId,
          forced_tool_ids: [assertedToolId],
        },
        waitForAiMessage: false,
      }
    );
    const invocationCounts = getToolPacketCounts(
      invocationPackets,
      MCP_ASSERTED_TOOL_NAME
    );
    expect(invocationCounts.start).toBeGreaterThan(0);
    expect(invocationCounts.delta).toBeGreaterThan(0);
    expect(invocationCounts.debug).toBeGreaterThan(0);

    const actionsButton = page.getByTestId("action-management-toggle");
    await expect(actionsButton).toBeVisible({ timeout: 10000 });
    await actionsButton.click();

    const popover = page.locator('[data-testid="tool-options"]');
    await expect(popover).toBeVisible({ timeout: 5000 });

    const serverLineItem = popover
      .locator(".group\\/LineItem")
      .filter({ hasText: serverName })
      .first();
    await expect(serverLineItem).toBeVisible({ timeout: 10000 });
    await serverLineItem.click();

    const toolSearchInput = popover
      .getByPlaceholder(/Search .* tools/i)
      .first();
    await expect(toolSearchInput).toBeVisible({ timeout: 10000 });
    await toolSearchInput.fill(MCP_ASSERTED_TOOL_NAME);

    const toolToggle = popover.getByLabel(`Toggle ${MCP_ASSERTED_TOOL_NAME}`);
    await expect(toolToggle).toBeVisible({ timeout: 10000 });
    const isToolToggleUnchecked = async () => {
      const dataState = await toolToggle.getAttribute("data-state");
      if (typeof dataState === "string") {
        return dataState === "unchecked";
      }
      return (await toolToggle.getAttribute("aria-checked")) === "false";
    };
    if (!(await isToolToggleUnchecked())) {
      await toolToggle.click();
    }
    await expect
      .poll(isToolToggleUnchecked, {
        timeout: 5000,
      })
      .toBe(true);

    await page.keyboard.press("Escape").catch(() => {});

    const disabledPackets = await sendMessageAndCaptureStreamPackets(
      page,
      `Call ${MCP_ASSERTED_TOOL_NAME} with {"name":"pw-disabled-${Date.now()}"} and return only the tool output.`,
      {
        mockLlmResponse: JSON.stringify({
          name: MCP_ASSERTED_TOOL_NAME,
          arguments: { name: `pw-disabled-${Date.now()}` },
        }),
        payloadOverrides: {
          forced_tool_id: assertedToolId,
          forced_tool_ids: [assertedToolId],
        },
        waitForAiMessage: false,
      }
    );
    const disabledCounts = getToolPacketCounts(
      disabledPackets,
      MCP_ASSERTED_TOOL_NAME
    );
    expect(disabledCounts.start).toBe(0);
    expect(disabledCounts.delta).toBe(0);
    expect(disabledCounts.debug).toBe(0);
  });

  test("Admin can modify MCP tools in default agent", async ({ page }) => {
    test.skip(!serverId, "MCP server must be configured first");

    await page.context().clearCookies();
    await loginAs(page, "admin");
    console.log(`[test] Testing tool modification`);

    // Navigate to chat preferences page
    await page.goto("/admin/configuration/chat-preferences");
    await page.waitForURL("**/admin/configuration/chat-preferences**");

    // Scroll to Actions & Tools section
    await scrollToBottom(page);

    // Find the MCP server card by name
    const serverLabel = page
      .locator("label")
      .filter({ has: page.getByText(serverName, { exact: true }) });
    await expect(serverLabel.first()).toBeVisible({ timeout: 10000 });
    await serverLabel.first().scrollIntoViewIfNeeded();

    // Click "Expand" to reveal individual tools
    const expandButton = page.getByRole("button", { name: "Expand" }).first();
    const isExpandVisible = await expandButton.isVisible().catch(() => false);
    if (isExpandVisible) {
      await expandButton.click();
      await page.waitForTimeout(300);
      console.log(`[test] Expanded MCP server card`);
    }

    // Find a specific tool by name inside the expanded card content
    // Individual tools are rendered as ActionsLayouts.Tool with their own Card > Label
    const toolLabel = page
      .locator("label")
      .filter({ has: page.getByText("tool_0", { exact: true }) });
    const firstToolSwitch = toolLabel
      .first()
      .locator('button[role="switch"]')
      .first();

    await expect(firstToolSwitch).toBeVisible({ timeout: 5000 });
    await firstToolSwitch.scrollIntoViewIfNeeded();

    // Get initial state and toggle
    const initialChecked = await firstToolSwitch.getAttribute("aria-checked");
    console.log(`[test] Initial tool state: ${initialChecked}`);
    await firstToolSwitch.click();

    // Wait for auto-save toast
    await expect(page.getByText("Tools updated").first()).toBeVisible({
      timeout: 10000,
    });
    console.log(`[test] Save successful`);

    // Reload and verify persistence
    await page.reload();
    await page.waitForURL("**/admin/configuration/chat-preferences**");
    await scrollToBottom(page);

    // Re-find the server card
    const serverLabelAfter = page
      .locator("label")
      .filter({ has: page.getByText(serverName, { exact: true }) });
    await expect(serverLabelAfter.first()).toBeVisible({ timeout: 10000 });
    await serverLabelAfter.first().scrollIntoViewIfNeeded();

    // Re-expand the card
    const expandButtonAfter = page
      .getByRole("button", { name: "Expand" })
      .first();
    const isExpandVisibleAfter = await expandButtonAfter
      .isVisible()
      .catch(() => false);
    if (isExpandVisibleAfter) {
      await expandButtonAfter.click();
      await page.waitForTimeout(300);
    }

    // Verify the tool state persisted
    const toolLabelAfter = page
      .locator("label")
      .filter({ has: page.getByText("tool_0", { exact: true }) });
    const firstToolSwitchAfter = toolLabelAfter
      .first()
      .locator('button[role="switch"]')
      .first();
    await expect(firstToolSwitchAfter).toBeVisible({ timeout: 5000 });
    const finalChecked =
      await firstToolSwitchAfter.getAttribute("aria-checked");
    console.log(`[test] Final tool state: ${finalChecked}`);
    expect(finalChecked).not.toEqual(initialChecked);
  });

  test("Instructions persist when saving via chat preferences", async ({
    page,
  }) => {
    await page.context().clearCookies();
    await loginAs(page, "admin");

    await page.goto("/admin/configuration/chat-preferences");
    await page.waitForURL("**/admin/configuration/chat-preferences**");

    // Click "Modify Prompt" to open the system prompt modal
    const modifyButton = page.getByText("Modify Prompt");
    await expect(modifyButton).toBeVisible({ timeout: 5000 });
    await modifyButton.click();

    const modal = page.getByRole("dialog");
    await expect(modal).toBeVisible({ timeout: 5000 });

    // Fill instructions in the modal textarea
    const testInstructions = `Test instructions for MCP - ${Date.now()}`;
    const textarea = modal.getByPlaceholder("Enter your system prompt...");
    await textarea.fill(testInstructions);
    console.log(`[test] Filled instructions`);

    // Click Save in the modal footer
    await modal.getByRole("button", { name: "Save" }).click();

    await expect(page.getByText("System prompt updated")).toBeVisible({
      timeout: 10000,
    });
    console.log(`[test] Instructions saved successfully`);

    // Modal should close
    await expect(modal).not.toBeVisible();

    // Reload and verify — wait for all data to load before opening modal
    // (the modal reads system_prompt from SWR state at click time, so data must be ready)
    await page.reload();
    await page.waitForLoadState("networkidle");
    await page.waitForURL("**/admin/configuration/chat-preferences**");

    // Reopen modal and check persisted value
    const modifyButtonAfter = page.getByText("Modify Prompt");
    await expect(modifyButtonAfter).toBeVisible({ timeout: 5000 });
    await modifyButtonAfter.click();

    const modalAfter = page.getByRole("dialog");
    await expect(modalAfter).toBeVisible({ timeout: 5000 });
    await expect(
      modalAfter.getByPlaceholder("Enter your system prompt...")
    ).toHaveValue(testInstructions);

    console.log(`[test] Instructions persisted correctly`);

    // Close modal
    await modalAfter.getByRole("button", { name: "Cancel" }).click();
  });

  test("MCP tools appear in basic user's chat actions after being added to default agent", async ({
    page,
  }) => {
    test.skip(!serverId, "MCP server must be configured first");
    test.skip(!basicUserEmail, "Basic user must be created first");

    await page.context().clearCookies();
    await apiLogin(page, basicUserEmail, basicUserPassword);
    console.log(`[test] Logged in as basic user to verify tool visibility`);

    // Navigate to chat
    await page.goto("/app");
    await page.waitForURL("**/app**");
    console.log(`[test] Navigated to chat`);

    // Open actions popover
    const actionsButton = page.getByTestId("action-management-toggle");
    await expect(actionsButton).toBeVisible({ timeout: 10000 });
    await actionsButton.click();
    console.log(`[test] Opened actions popover`);

    // Wait for popover
    const popover = page.locator('[data-testid="tool-options"]');
    await expect(popover).toBeVisible({ timeout: 5000 });

    // Verify MCP server appears in the actions list
    const serverLineItem = popover
      .locator(".group\\/LineItem")
      .filter({ hasText: serverName });
    await expect(serverLineItem).toBeVisible({ timeout: 10000 });
    console.log(`[test] Found MCP server in actions list`);

    // Click to see tools
    await serverLineItem.click();
    await page.waitForTimeout(500);
    console.log(`[test] Clicked server to view tools`);

    // Verify tools are present
    const toolsList = popover.locator('[role="switch"]');
    const toolCount = await toolsList.count();
    expect(toolCount).toBeGreaterThan(0);

    console.log(
      `[test] Basic user can see ${toolCount} MCP tools from default agent`
    );
  });
});


================================================
FILE: web/tests/e2e/mcp/mcp_oauth_flow.spec.ts
================================================
import { test, expect } from "@playwright/test";
import type { Page, Browser, Locator } from "@playwright/test";
import { loginAs, loginAsWorkerUser, apiLogin } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import {
  startMcpOauthServer,
  McpServerProcess,
} from "@tests/e2e/utils/mcpServer";
import { TEST_ADMIN_CREDENTIALS } from "@tests/e2e/constants";
import { logPageState } from "@tests/e2e/utils/pageStateLogger";
import {
  getPacketObjectsByType,
  sendMessageAndCaptureStreamPackets,
} from "@tests/e2e/utils/chatStream";

const REQUIRED_ENV_VARS = [
  "MCP_OAUTH_CLIENT_ID",
  "MCP_OAUTH_CLIENT_SECRET",
  "MCP_OAUTH_ISSUER",
  "MCP_OAUTH_JWKS_URI",
  "MCP_OAUTH_USERNAME",
  "MCP_OAUTH_PASSWORD",
];

const missingEnvVars = REQUIRED_ENV_VARS.filter(
  (envVar) => !process.env[envVar]
);

if (missingEnvVars.length > 0) {
  throw new Error(
    `Missing required environment variables for MCP OAuth tests: ${missingEnvVars.join(
      ", "
    )}`
  );
}

const DEFAULT_MCP_SERVER_URL =
  process.env.MCP_TEST_SERVER_URL || "http://127.0.0.1:8004/mcp";
let runtimeMcpServerUrl = DEFAULT_MCP_SERVER_URL;
const CLIENT_ID = process.env.MCP_OAUTH_CLIENT_ID!;
const CLIENT_SECRET = process.env.MCP_OAUTH_CLIENT_SECRET!;
const IDP_USERNAME = process.env.MCP_OAUTH_USERNAME!;
const IDP_PASSWORD = process.env.MCP_OAUTH_PASSWORD!;
const APP_BASE_URL = process.env.MCP_TEST_APP_BASE || "http://localhost:3000";
const APP_HOST = new URL(APP_BASE_URL).host;
const IDP_HOST = new URL(process.env.MCP_OAUTH_ISSUER!).host;
const QUICK_CONFIRM_CONNECTED_TIMEOUT_MS = Number(
  process.env.MCP_OAUTH_QUICK_CONFIRM_TIMEOUT_MS || 2000
);
const POST_CLICK_URL_CHANGE_WAIT_MS = Number(
  process.env.MCP_OAUTH_POST_CLICK_URL_CHANGE_WAIT_MS || 5000
);
const MCP_OAUTH_FLOW_TEST_TIMEOUT_MS = Number(
  process.env.MCP_OAUTH_TEST_TIMEOUT_MS || 300_000
);

type Credentials = {
  email: string;
  password: string;
};

type FlowArtifacts = {
  serverId: number;
  serverName: string;
  agentId: number;
  agentName: string;
  toolName: string;
  toolId: number | null;
};

type StepLogger = (message: string) => void;

const DEFAULT_USERNAME_SELECTORS = [
  'input[name="identifier"]',
  "#identifier-input",
  'input[name="username"]',
  "#okta-signin-username",
  "#idp-discovery-username",
  'input[id="idp-discovery-username"]',
  'input[name="email"]',
  'input[type="email"]',
  "#username",
  'input[name="user"]',
];

const DEFAULT_PASSWORD_SELECTORS = [
  'input[name="credentials.passcode"]',
  'input[name="password"]',
  "#okta-signin-password",
  'input[type="password"]',
  "#password",
];

const DEFAULT_SUBMIT_SELECTORS = [
  'button[type="submit"]',
  'input[type="submit"]',
  'button:has-text("Sign in")',
  'button:has-text("Log in")',
  'button:has-text("Continue")',
  'button:has-text("Verify")',
];

const DEFAULT_NEXT_SELECTORS = [
  'button:has-text("Next")',
  'button:has-text("Continue")',
  'input[type="submit"][value="Next"]',
];

const DEFAULT_CONSENT_SELECTORS = [
  'button:has-text("Allow")',
  'button:has-text("Authorize")',
  'button:has-text("Accept")',
  'button:has-text("Grant")',
];

const TOOL_NAMES = {
  admin: "tool_0",
  curator: "tool_1",
};

const SPEC_START_MS = Date.now();

function parseSelectorList(
  value: string | undefined,
  defaults: string[]
): string[] {
  if (!value) return defaults;
  return value
    .split(",")
    .map((selector) => selector.trim())
    .filter(Boolean);
}

function buildMcpServerUrl(baseUrl: string): string {
  const trimmed = baseUrl.replace(/\/+$/, "");
  return trimmed.endsWith("/mcp") ? trimmed : `${trimmed}/mcp`;
}

const logOauthEvent = (page: Page | null, message: string) => {
  const location = page ? ` url=${page.url()}` : "";
  console.log(`[mcp-oauth-test] ${message}${location}`);
};

const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));

async function clickAndWaitForPossibleUrlChange(
  page: Page,
  clickAction: () => Promise<void>,
  context: string
) {
  const startingUrl = page.url();
  const urlChangePromise = page
    .waitForURL(
      (url) => {
        const href = typeof url === "string" ? url : url.toString();
        return href !== startingUrl;
      },
      { timeout: POST_CLICK_URL_CHANGE_WAIT_MS }
    )
    .then(() => true)
    .catch(() => false);

  await clickAction();
  const changed = await urlChangePromise;
  if (changed) {
    logOauthEvent(page, `${context}: observed URL change after click`);
  } else {
    logOauthEvent(
      page,
      `${context}: no immediate URL change; continuing OAuth flow`
    );
  }
}

function createStepLogger(testName: string) {
  const start = Date.now();
  return (message: string) => {
    const elapsed = ((Date.now() - start) / 1000).toFixed(1);
    console.log(`[mcp-oauth-step][${testName}] ${message} (+${elapsed}s)`);
  };
}

const getToolName = (packetObject: Record<string, unknown>): string | null => {
  const value = packetObject.tool_name;
  return typeof value === "string" ? value : null;
};

async function verifyToolInvocationFromChat(
  page: Page,
  toolName: string,
  contextLabel: string,
  forcedToolId?: number | null
) {
  const prompt = [
    `Call the MCP tool "${toolName}" now.`,
    `Pass {"name":"playwright-${Date.now()}"} as the arguments.`,
    "Return the exact tool output.",
  ].join(" ");

  const packets = await sendMessageAndCaptureStreamPackets(page, prompt, {
    mockLlmResponse: JSON.stringify({
      name: toolName,
      arguments: { name: `playwright-${Date.now()}` },
    }),
    payloadOverrides:
      forcedToolId != null
        ? {
            forced_tool_id: forcedToolId,
            forced_tool_ids: [forcedToolId],
          }
        : undefined,
    waitForAiMessage: false,
  });
  const startPackets = getPacketObjectsByType(
    packets,
    "custom_tool_start"
  ).filter((packetObject) => getToolName(packetObject) === toolName);
  const deltaPackets = getPacketObjectsByType(
    packets,
    "custom_tool_delta"
  ).filter((packetObject) => getToolName(packetObject) === toolName);
  const debugPackets = getPacketObjectsByType(
    packets,
    "tool_call_debug"
  ).filter((packetObject) => getToolName(packetObject) === toolName);

  expect(startPackets.length).toBeGreaterThan(0);
  expect(deltaPackets.length).toBeGreaterThan(0);
  expect(debugPackets.length).toBeGreaterThan(0);

  console.log(
    `[mcp-oauth-test] ${contextLabel}: tool invocation packets received for ${toolName}`
  );
}

async function fetchMcpToolIdByName(
  page: Page,
  serverId: number,
  toolName: string,
  timeoutMs: number = 15_000
): Promise<number | null> {
  const start = Date.now();
  let visibleToolNames: string[] = [];

  while (Date.now() - start < timeoutMs) {
    const response = await page.request.get(
      `/api/admin/mcp/server/${serverId}/db-tools`
    );
    if (!response.ok()) {
      await page.waitForTimeout(500);
      continue;
    }

    const data = (await response.json()) as {
      tools?: Array<Record<string, unknown>>;
    };
    const tools = Array.isArray(data.tools) ? data.tools : [];
    visibleToolNames = tools
      .map((tool) => {
        const value =
          tool.name ??
          tool.display_name ??
          tool.in_code_tool_id ??
          tool.displayName;
        return typeof value === "string" ? value : "";
      })
      .filter(Boolean);

    const matchedTool = tools.find((tool) => {
      const candidates = [
        tool.name,
        tool.display_name,
        tool.in_code_tool_id,
        tool.displayName,
      ].filter((value): value is string => typeof value === "string");
      return candidates.includes(toolName);
    });
    if (matchedTool) {
      const id = matchedTool.id;
      if (typeof id === "number") {
        return id;
      }
      if (typeof id === "string") {
        const parsed = Number(id);
        if (!Number.isNaN(parsed)) {
          return parsed;
        }
      }
    }

    await page.waitForTimeout(500);
  }

  console.warn(
    `[mcp-oauth-test] Could not resolve tool id for ${toolName} on server ${serverId}. Visible tools: ${visibleToolNames.join(
      ", "
    )}`
  );
  return null;
}

async function logoutSession(page: Page, contextLabel: string) {
  try {
    const response = await page.request.post(`${APP_BASE_URL}/api/auth/logout`);
    const status = response.status();
    if (!response.ok() && status !== 401) {
      const body = await response.text();
      console.warn(
        `[mcp-oauth-test] ${contextLabel}: Logout returned ${status} - ${body}`
      );
    } else {
      console.log(
        `[mcp-oauth-test] ${contextLabel}: Logout request completed with status ${status}`
      );
    }
  } catch (error) {
    console.warn(
      `[mcp-oauth-test] ${contextLabel}: Logout request failed - ${String(
        error
      )}`
    );
  }
}

async function verifySessionUser(
  page: Page,
  expected: { email: string; role: string },
  contextLabel: string
) {
  const response = await page.request.get(`${APP_BASE_URL}/api/me`);
  const status = response.status();
  expect(response.ok()).toBeTruthy();
  const data = await response.json();
  expect(data.email).toBe(expected.email);
  expect(data.role).toBe(expected.role);
  console.log(
    `[mcp-oauth-test] ${contextLabel}: Verified session user ${data.email} (${data.role}) via /api/me (status ${status})`
  );
}

async function logPageStateWithTag(page: Page, context: string) {
  const elapsed = ((Date.now() - SPEC_START_MS) / 1000).toFixed(1);
  await logPageState(page, `${context} (+${elapsed}s)`, "[mcp-oauth-debug]");
}

async function fillFirstVisible(
  page: Page,
  selectors: string[],
  value: string
): Promise<boolean> {
  for (const selector of selectors) {
    const locator = page.locator(selector).first();
    const count = await locator.count();
    if (count === 0) {
      logOauthEvent(page, `Selector ${selector} not found`);
      continue;
    }
    logOauthEvent(page, `Filling first visible selector: ${selector}`);
    let isVisible = await locator.isVisible().catch(() => false);
    logOauthEvent(page, `Selector ${selector} is visible: ${isVisible}`);
    if (!isVisible) {
      logOauthEvent(
        page,
        `Selector ${selector} is not visible, waiting for it to be visible`
      );
      try {
        await locator.waitFor({ state: "visible", timeout: 500 });
        isVisible = true;
      } catch {
        continue;
      }
    }
    if (!isVisible) {
      continue;
    }
    const existing = await locator
      .inputValue()
      .catch(() => "")
      .then((val) => val ?? "");
    if (existing !== value) {
      await locator.fill(value);
    }
    return true;
  }
  return false;
}

async function clickFirstVisible(
  page: Page,
  selectors: string[],
  options: { optional?: boolean } = {}
): Promise<boolean> {
  for (const selector of selectors) {
    const locator = page.locator(selector).first();
    const count = await locator.count();
    if (count === 0) continue;
    let isVisible = await locator.isVisible().catch(() => false);
    if (!isVisible) {
      try {
        await locator.waitFor({ state: "visible", timeout: 500 });
        isVisible = true;
      } catch {
        continue;
      }
    }
    try {
      await locator.click();
      return true;
    } catch (err) {
      if (!options.optional) {
        throw err;
      }
    }
  }
  return false;
}

async function waitForAnySelector(
  page: Page,
  selectors: string[],
  options: { timeout?: number } = {}
): Promise<boolean> {
  const timeout = options.timeout ?? 5000;
  const deadline = Date.now() + timeout;
  while (Date.now() < deadline) {
    for (const selector of selectors) {
      const locator = page.locator(selector).first();
      if ((await locator.count()) === 0) {
        continue;
      }
      try {
        if (await locator.isVisible()) {
          return true;
        }
      } catch {
        continue;
      }
    }
    await page.waitForTimeout(50);
  }
  return false;
}

async function scrollToBottom(page: Page): Promise<void> {
  try {
    await page.evaluate(() => {
      const section = document.querySelector(
        '[data-testid="available-tools-section"]'
      );
      if (section && "scrollIntoView" in section) {
        section.scrollIntoView({ behavior: "instant", block: "end" });
      } else {
        window.scrollTo(0, document.body.scrollHeight);
      }
    });
    await page.waitForTimeout(200);
  } catch {
    // ignore scrolling failures in test environment
  }
}

const isOnHost = (url: string, host: string): boolean => {
  try {
    return new URL(url).host === host;
  } catch {
    return false;
  }
};

const isOnAppHost = (url: string): boolean => isOnHost(url, APP_HOST);
const isOnIdpHost = (url: string): boolean => isOnHost(url, IDP_HOST);

async function performIdpLogin(page: Page): Promise<void> {
  const usernameSelectors = parseSelectorList(
    process.env.MCP_OAUTH_TEST_USERNAME_SELECTOR,
    DEFAULT_USERNAME_SELECTORS
  );
  const passwordSelectors = parseSelectorList(
    process.env.MCP_OAUTH_TEST_PASSWORD_SELECTOR,
    DEFAULT_PASSWORD_SELECTORS
  );
  const submitSelectors = parseSelectorList(
    process.env.MCP_OAUTH_TEST_SUBMIT_SELECTOR,
    DEFAULT_SUBMIT_SELECTORS
  );
  const nextSelectors = parseSelectorList(
    process.env.MCP_OAUTH_TEST_NEXT_SELECTOR,
    DEFAULT_NEXT_SELECTORS
  );
  const consentSelectors = parseSelectorList(
    process.env.MCP_OAUTH_TEST_CONSENT_SELECTOR,
    DEFAULT_CONSENT_SELECTORS
  );
  const passwordSelectorString = passwordSelectors.join(",");

  await page
    .waitForLoadState("domcontentloaded", { timeout: 1000 })
    .catch(() => {});

  logOauthEvent(page, "Attempting IdP login");
  await waitForAnySelector(page, usernameSelectors, { timeout: 1000 });
  logOauthEvent(page, `Username selectors: ${usernameSelectors.join(", ")}`);
  const usernameFilled = await fillFirstVisible(
    page,
    usernameSelectors,
    IDP_USERNAME
  );
  if (usernameFilled) {
    logOauthEvent(page, "Filled username");
    await clickFirstVisible(page, nextSelectors, { optional: true });
    await waitForAnySelector(page, passwordSelectors, { timeout: 2000 });
  }

  const submitPasswordAttempt = async (attemptLabel: string) => {
    const passwordReady = await waitForAnySelector(page, passwordSelectors, {
      timeout: 8000,
    });
    if (!passwordReady) {
      await logPageStateWithTag(
        page,
        `Password input did not appear during ${attemptLabel}`
      );
      return false;
    }
    const filled = await fillFirstVisible(
      page,
      passwordSelectors,
      IDP_PASSWORD
    );
    if (!filled) {
      await logPageStateWithTag(
        page,
        `Unable to find password input during ${attemptLabel}`
      );
      return false;
    }
    logOauthEvent(page, `Filled password (${attemptLabel})`);
    const clickedSubmit = await clickFirstVisible(page, submitSelectors, {
      optional: true,
    });
    if (!clickedSubmit) {
      // As a fallback, press Enter in the password field
      const passwordLocator = page.locator(passwordSelectorString).first();
      if ((await passwordLocator.count()) > 0) {
        await passwordLocator.press("Enter").catch(() => {});
      } else {
        await page.keyboard.press("Enter").catch(() => {});
      }
    }
    logOauthEvent(page, `Submitted IdP credentials (${attemptLabel})`);
    await page
      .waitForLoadState("domcontentloaded", { timeout: 15000 })
      .catch(() => {});
    await page.waitForTimeout(300);
    return true;
  };

  const hasVisiblePasswordField = async (): Promise<boolean> => {
    const locator = page.locator(passwordSelectorString);
    const count = await locator.count();
    for (let i = 0; i < count; i++) {
      try {
        if (await locator.nth(i).isVisible()) {
          return true;
        }
      } catch {
        continue;
      }
    }
    return false;
  };

  await submitPasswordAttempt("initial");

  const MAX_PASSWORD_RETRIES = 3;
  for (let retry = 1; retry <= MAX_PASSWORD_RETRIES; retry++) {
    await page.waitForTimeout(250);
    if (!isOnIdpHost(page.url())) {
      break;
    }
    if (!(await hasVisiblePasswordField())) {
      break;
    }
    logOauthEvent(page, `Password challenge still visible (retry ${retry})`);
    const success = await submitPasswordAttempt(`retry ${retry}`);
    if (!success) {
      break;
    }
  }

  await clickFirstVisible(page, consentSelectors, { optional: true });
  logOauthEvent(page, "Handled consent prompt if present");
  await page
    .waitForLoadState("networkidle", { timeout: 10000 })
    .catch(() => {});
}

async function completeOauthFlow(
  page: Page,
  options: {
    expectReturnPathContains: string;
    confirmConnected?: () => Promise<void>;
    scrollToBottomOnReturn?: boolean;
  }
): Promise<void> {
  logOauthEvent(
    page,
    `Completing OAuth flow with options: ${JSON.stringify(options)}`
  );
  const returnSubstring = options.expectReturnPathContains;
  const matchesExpectedReturnPath = (url: string) => {
    if (!isOnAppHost(url)) {
      return false;
    }
    if (url.includes(returnSubstring)) {
      return true;
    }
    // Re-auth flows can return to a chat session URL instead of agentId URL.
    if (
      returnSubstring.includes("/app?agentId=") &&
      url.includes("/app?chatId=")
    ) {
      return true;
    }
    return false;
  };

  logOauthEvent(page, `Current page URL: ${page.url()}`);

  const waitForUrlOrRedirect = async (
    description: string,
    timeout: number,
    predicate: (url: string) => boolean
  ) => {
    const waitStart = Date.now();
    const current = page.url();
    if (predicate(current)) {
      logOauthEvent(
        page,
        `${description} already satisfied (elapsed ${Date.now() - waitStart}ms)`
      );
      return;
    }
    logOauthEvent(page, `Waiting for ${description} (timeout ${timeout}ms)`);
    try {
      await page.waitForURL(
        (url) => {
          const href = typeof url === "string" ? url : url.toString();
          try {
            return predicate(href);
          } catch (err) {
            logOauthEvent(
              null,
              `Predicate threw while waiting for ${description}: ${String(err)}`
            );
            return false;
          }
        },
        { timeout }
      );
      logOauthEvent(
        page,
        `${description} satisfied after ${Date.now() - waitStart}ms`
      );
    } catch (error) {
      // If the predicate became true after the timeout (e.g., navigation finished
      // just before the rejection), treat it as success.
      if (predicate(page.url())) {
        logOauthEvent(
          page,
          `${description} satisfied (after timeout) in ${
            Date.now() - waitStart
          }ms`
        );
        return;
      }
      await logPageStateWithTag(page, `Timeout waiting for ${description}`);
      throw error;
    }
  };

  const tryConfirmConnected = async (
    suppressErrors: boolean
  ): Promise<boolean> => {
    if (!options.confirmConnected) {
      return false;
    }
    if (page.isClosed()) {
      const message = "Page closed before confirmConnected check";
      if (suppressErrors) {
        logOauthEvent(null, message);
        return false;
      }
      throw new Error(message);
    }
    if (!isOnAppHost(page.url())) {
      const message = `confirmConnected requested while not on app host (url=${page.url()})`;
      if (suppressErrors) {
        logOauthEvent(page, message);
        return false;
      }
      throw new Error(message);
    }
    const confirmPromise = options
      .confirmConnected()
      .then(() => ({ status: "success" as const }))
      .catch((error) => ({ status: "error" as const, error }));
    if (suppressErrors) {
      const result = await Promise.race([
        confirmPromise,
        delay(QUICK_CONFIRM_CONNECTED_TIMEOUT_MS).then(() => ({
          status: "timeout" as const,
        })),
      ]);
      if (result.status === "success") {
        return true;
      }
      if (result.status === "error") {
        logOauthEvent(page, "confirmConnected check failed, continuing");
        return false;
      }
      logOauthEvent(
        page,
        `confirmConnected quick check timed out after ${QUICK_CONFIRM_CONNECTED_TIMEOUT_MS}ms`
      );
      return false;
    }
    const finalResult = await confirmPromise;
    if (finalResult.status === "success") {
      return true;
    }
    throw finalResult.error;
  };

  if (
    matchesExpectedReturnPath(page.url()) &&
    (await tryConfirmConnected(true))
  ) {
    return;
  }

  if (isOnAppHost(page.url()) && !page.url().includes("/mcp/oauth/callback")) {
    logOauthEvent(page, "Waiting for redirect away from app host");
    await waitForUrlOrRedirect("IdP redirect", 10000, (url) => {
      const parsed = new URL(url);
      return (
        parsed.host !== APP_HOST ||
        parsed.pathname.includes("/mcp/oauth/callback")
      );
    });
  }

  if (!isOnAppHost(page.url())) {
    logOauthEvent(page, "Starting IdP login step");
    await performIdpLogin(page);
  } else if (!page.url().includes("/mcp/oauth/callback")) {
    logOauthEvent(page, "Still on app host, waiting for OAuth callback");
    await waitForUrlOrRedirect(
      "OAuth callback",
      60000,
      (url) =>
        url.includes("/mcp/oauth/callback") || matchesExpectedReturnPath(url)
    );
  }

  if (!page.url().includes("/mcp/oauth/callback")) {
    logOauthEvent(page, "Waiting for OAuth callback redirect");
    await waitForUrlOrRedirect(
      "OAuth callback",
      60000,
      (url) =>
        url.includes("/mcp/oauth/callback") || matchesExpectedReturnPath(url)
    );
  }

  const waitForReturnStart = Date.now();
  await page
    .waitForLoadState("domcontentloaded", { timeout: 5000 })
    .catch(() => {});
  logOauthEvent(
    page,
    `Initial post-return load wait completed in ${
      Date.now() - waitForReturnStart
    }ms`
  );

  await waitForUrlOrRedirect(`return path ${returnSubstring}`, 60000, (url) =>
    matchesExpectedReturnPath(url)
  );
  const returnLoadStart = Date.now();
  await page
    .waitForLoadState("domcontentloaded", { timeout: 5000 })
    .catch(() => {});
  logOauthEvent(
    page,
    `Post-return domcontentloaded wait finished in ${
      Date.now() - returnLoadStart
    }ms`
  );
  if (!matchesExpectedReturnPath(page.url())) {
    throw new Error(
      `Redirected but final URL (${page.url()}) does not contain expected substring ${returnSubstring}`
    );
  }
  logOauthEvent(page, `Returned to ${returnSubstring}`);

  if (options.scrollToBottomOnReturn) {
    await scrollToBottom(page);
  }

  await tryConfirmConnected(false);
}

async function selectMcpTools(page: Page, serverId: number) {
  // Find the server toggle switch by its name attribute
  const toggleButton = page.locator(
    `button[role="switch"][name="mcp_server_${serverId}.enabled"]`
  );
  const toggleExists = await toggleButton.count();
  if (toggleExists === 0) {
    throw new Error(
      `MCP server section ${serverId} not found in assistant form`
    );
  }

  // Check if the server is enabled (switch is checked)
  const isEnabled = await toggleButton.getAttribute("aria-checked");
  if (isEnabled !== "true") {
    await toggleButton.click();
  }

  // Individual tools are automatically enabled when the server switch is turned on
  // The new AgentEditorPage enables all tools when the server is enabled
}

const escapeRegex = (value: string): string =>
  value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");

const ACTION_POPOVER_SELECTOR = '[data-testid="tool-options"]';
const LINE_ITEM_SELECTOR = ".group\\/LineItem";

async function ensureActionPopoverInPrimaryView(page: Page) {
  const popover = page.locator(ACTION_POPOVER_SELECTOR);
  const isVisible = await popover.isVisible().catch(() => false);
  if (!isVisible) {
    return;
  }

  const serverRows = page.locator("[data-mcp-server-name]");
  if ((await serverRows.count()) > 0) {
    return;
  }

  const backButton = popover.getByRole("button", { name: /Back/i }).first();
  if ((await backButton.count()) === 0) {
    return;
  }
  await backButton.click().catch(() => {});
  await page.waitForTimeout(200);
}

async function waitForMcpSecondaryView(page: Page) {
  const toggleControls = page
    .locator(ACTION_POPOVER_SELECTOR)
    .locator(LINE_ITEM_SELECTOR)
    .filter({ hasText: /(Enable|Disable) All/i })
    .first();
  await toggleControls
    .waitFor({ state: "visible", timeout: 5000 })
    .catch(() => {});
}

async function findMcpToolLineItemButton(
  page: Page,
  toolName: string,
  timeoutMs = 5000
): Promise<Locator | null> {
  const deadline = Date.now() + timeoutMs;
  const toolRegex = new RegExp(escapeRegex(toolName), "i");

  while (Date.now() < deadline) {
    const lineItem = page
      .locator(
        `${ACTION_POPOVER_SELECTOR} [data-testid^="tool-option-"] ${LINE_ITEM_SELECTOR}, ` +
          `${ACTION_POPOVER_SELECTOR} ${LINE_ITEM_SELECTOR}`
      )
      .filter({ hasText: toolRegex })
      .first();
    if ((await lineItem.count()) > 0) {
      return lineItem;
    }
    await page.waitForTimeout(200);
  }

  return null;
}

async function logActionPopoverHtml(page: Page, context: string) {
  try {
    const popover = page.locator(ACTION_POPOVER_SELECTOR);
    if ((await popover.count()) === 0) {
      console.log(
        `[mcp-oauth-debug] ${context} action-popover-html="<unavailable>" reason=popover-missing`
      );
      return;
    }
    const isVisible = await popover.isVisible().catch(() => false);
    if (!isVisible) {
      console.log(
        `[mcp-oauth-debug] ${context} action-popover-html="<unavailable>" reason=popover-hidden`
      );
      return;
    }
    const html = await popover.evaluate((node) => node.innerHTML || "");
    const snippet = html.replace(/\s+/g, " ").slice(0, 2000);
    console.log(
      `[mcp-oauth-debug] ${context} action-popover-html=${JSON.stringify(
        snippet
      )}`
    );
  } catch (error) {
    console.log(
      `[mcp-oauth-debug] ${context} action-popover-html="<unavailable>" reason=${String(
        error
      )}`
    );
  }
}

async function closeActionsPopover(page: Page) {
  if (page.isClosed()) {
    return;
  }

  const popover = page.locator(ACTION_POPOVER_SELECTOR);
  if ((await popover.count()) === 0) {
    return;
  }
  const isVisible = await popover.isVisible().catch(() => false);
  if (!isVisible) {
    return;
  }

  const backButton = popover.getByRole("button", { name: /Back/i }).first();
  if ((await backButton.count()) > 0) {
    await backButton.click().catch(() => {});
    await page.waitForTimeout(200).catch(() => {});
  }

  if (!page.isClosed()) {
    await page.keyboard.press("Escape").catch(() => {});
  }
}

async function openActionsPopover(page: Page) {
  const popover = page.locator(ACTION_POPOVER_SELECTOR);
  const isVisible = await popover.isVisible().catch(() => false);
  if (!isVisible) {
    await page.locator('[data-testid="action-management-toggle"]').click();
    await popover.waitFor({ state: "visible", timeout: 10000 });
  }
  await ensureActionPopoverInPrimaryView(page);
}

async function restoreAssistantContext(page: Page, agentId: number) {
  const assistantPath = `/app?agentId=${agentId}`;
  logOauthEvent(
    page,
    `Restoring assistant context for agentId=${agentId} (current url=${page.url()})`
  );

  // Clear chat-focused URL state first, then explicitly reselect assistant.
  await page.goto(`${APP_BASE_URL}/app`, { waitUntil: "domcontentloaded" });
  await page
    .waitForLoadState("networkidle", { timeout: 10000 })
    .catch(() => {});

  const assistantLink = page.locator(`a[href*="agentId=${agentId}"]`).first();
  if ((await assistantLink.count()) > 0) {
    await clickAndWaitForPossibleUrlChange(
      page,
      () => assistantLink.click(),
      `Restore assistant ${agentId} from sidebar`
    );
  } else {
    await page.goto(`${APP_BASE_URL}${assistantPath}`, {
      waitUntil: "domcontentloaded",
    });
  }

  await page
    .waitForLoadState("networkidle", { timeout: 10000 })
    .catch(() => {});
  logOauthEvent(page, `Assistant context restore landed on ${page.url()}`);
}

function getServerRowLocator(page: Page, serverName: string) {
  const labelRegex = new RegExp(escapeRegex(serverName));
  return page
    .locator(
      `${ACTION_POPOVER_SELECTOR} [data-mcp-server-name] ${LINE_ITEM_SELECTOR}, ` +
        `${ACTION_POPOVER_SELECTOR} ${LINE_ITEM_SELECTOR}`
    )
    .filter({ hasText: labelRegex })
    .first();
}

async function collectActionPopoverEntries(page: Page): Promise<string[]> {
  const locator = page
    .locator(ACTION_POPOVER_SELECTOR)
    .locator(
      `[data-mcp-server-name] ${LINE_ITEM_SELECTOR}, ` +
        `[data-testid^="tool-option-"] ${LINE_ITEM_SELECTOR}, ` +
        `${LINE_ITEM_SELECTOR}`
    );
  try {
    return await locator.evaluateAll((nodes) =>
      nodes
        .map((node) =>
          (node.textContent || "")
            .replace(/\s+/g, " ")
            .replace(/\u00a0/g, " ")
            .trim()
        )
        .filter(Boolean)
    );
  } catch {
    return [];
  }
}

async function waitForServerRow(
  page: Page,
  serverName: string,
  timeoutMs: number = 10_000
): Promise<Locator | null> {
  await page
    .locator(ACTION_POPOVER_SELECTOR)
    .waitFor({ state: "visible", timeout: 5000 })
    .catch(() => {});

  const locator = getServerRowLocator(page, serverName);
  const pollInterval = 100;
  const deadline = Date.now() + timeoutMs;

  while (Date.now() < deadline) {
    if ((await locator.count()) > 0) {
      return locator;
    }
    await page.waitForTimeout(pollInterval);
  }

  return null;
}

async function clickServerRowAndWaitForPossibleUrlChangeWithRetry(
  page: Page,
  serverName: string,
  actionName: string,
  timeoutMs: number = 15_000
): Promise<boolean> {
  let serverLocator: Locator | null = await waitForServerRow(
    page,
    serverName,
    timeoutMs
  );
  if (!serverLocator) {
    return false;
  }

  for (let attempt = 0; attempt < 5; attempt++) {
    if (!serverLocator) {
      const refreshedServerLocator = await waitForServerRow(
        page,
        serverName,
        5000
      );
      if (!refreshedServerLocator) {
        continue;
      }
      serverLocator = refreshedServerLocator;
    }
    const locatorToClick = serverLocator;
    try {
      await clickAndWaitForPossibleUrlChange(
        page,
        () => locatorToClick.click({ force: true, timeout: 3000 }),
        actionName
      );
      return true;
    } catch {
      if (attempt === 4) {
        break;
      }
      await page.waitForTimeout(150);
      await ensureActionPopoverInPrimaryView(page);
      const refreshedServerLocator = await waitForServerRow(
        page,
        serverName,
        5000
      );
      if (refreshedServerLocator) {
        serverLocator = refreshedServerLocator;
      }
    }
  }

  return false;
}

async function ensureToolOptionVisible(
  page: Page,
  toolName: string,
  serverName: string
) {
  await page
    .waitForSelector(ACTION_POPOVER_SELECTOR, {
      state: "visible",
      timeout: 5000,
    })
    .catch(() => {});

  let toolOption = page
    .getByTestId(`tool-option-${toolName}`)
    .locator(LINE_ITEM_SELECTOR)
    .first();
  if ((await toolOption.count()) > 0) {
    return toolOption;
  }

  await ensureActionPopoverInPrimaryView(page);
  let serverLocator = await waitForServerRow(page, serverName, 10_000);
  if (!serverLocator) {
    const entries = await collectActionPopoverEntries(page);
    await logPageStateWithTag(
      page,
      `MCP server row ${serverName} not found while forcing tool ${toolName}. Visible entries: ${JSON.stringify(
        entries
      )}`
    );
    throw new Error(`Unable to locate MCP server row for ${serverName}`);
  }

  let serverClicked = false;
  for (let attempt = 0; attempt < 3; attempt++) {
    try {
      await serverLocator.click({ force: true, timeout: 3000 });
      serverClicked = true;
      break;
    } catch (error) {
      if (attempt === 2) {
        throw error;
      }
      await page.waitForTimeout(150);
      await ensureActionPopoverInPrimaryView(page);
      const refreshedServerLocator = await waitForServerRow(
        page,
        serverName,
        5000
      );
      if (refreshedServerLocator) {
        serverLocator = refreshedServerLocator;
      }
    }
  }
  if (!serverClicked) {
    throw new Error(`Unable to click MCP server row for ${serverName}`);
  }

  await waitForMcpSecondaryView(page);

  for (let attempt = 0; attempt < 3; attempt++) {
    const mcpToolButton = await findMcpToolLineItemButton(
      page,
      toolName,
      10000
    );
    if (mcpToolButton) {
      const isVisible = await mcpToolButton.isVisible().catch(() => false);
      if (isVisible) {
        return mcpToolButton;
      }
    }
    if (attempt < 2) {
      await closeActionsPopover(page);
      await openActionsPopover(page);
      await ensureActionPopoverInPrimaryView(page);
      const refreshedServerLocator = await waitForServerRow(
        page,
        serverName,
        7000
      );
      if (!refreshedServerLocator) {
        break;
      }
      await refreshedServerLocator.click({ force: true, timeout: 3000 });
      await waitForMcpSecondaryView(page);
    }
  }

  await logPageStateWithTag(
    page,
    `Tool option ${toolName} still missing after selecting MCP server ${serverName}`
  );
  await logActionPopoverHtml(
    page,
    `Tool option ${toolName} missing after selecting ${serverName}`
  );
  throw new Error(
    `Tool option ${toolName} not available after selecting server ${serverName}`
  );
}

async function verifyMcpToolRowVisible(
  page: Page,
  serverName: string,
  toolName: string
) {
  await openActionsPopover(page);
  const toolButton = await ensureToolOptionVisible(page, toolName, serverName);
  await expect(toolButton).toBeVisible({ timeout: 5000 });
  await closeActionsPopover(page);
}

async function ensureMcpToolEnabledInActions(
  page: Page,
  serverName: string,
  toolName: string
) {
  await openActionsPopover(page);
  const toolButton = await ensureToolOptionVisible(page, toolName, serverName);
  await expect(toolButton).toBeVisible({ timeout: 5000 });

  let toolToggle = toolButton.getByRole("switch").first();
  if ((await toolToggle.count()) === 0) {
    toolToggle = page.getByLabel(`Toggle ${toolName}`).first();
  }
  await expect(toolToggle).toBeVisible({ timeout: 5000 });

  const isToggleChecked = async () => {
    const dataState = await toolToggle.getAttribute("data-state");
    if (typeof dataState === "string") {
      return dataState === "checked";
    }
    return (await toolToggle.getAttribute("aria-checked")) === "true";
  };

  if (!(await isToggleChecked())) {
    await toolToggle.click();
  }
  await expect.poll(isToggleChecked, { timeout: 5000 }).toBe(true);
  await closeActionsPopover(page);
}

async function reauthenticateFromChat(
  page: Page,
  serverName: string,
  returnSubstring: string
) {
  await openActionsPopover(page);
  const beforeClickUrl = page.url();
  const clickedServerRow =
    await clickServerRowAndWaitForPossibleUrlChangeWithRetry(
      page,
      serverName,
      "Re-authenticate server row click",
      15_000
    );
  if (!clickedServerRow) {
    const entries = await collectActionPopoverEntries(page);
    await logPageStateWithTag(
      page,
      `reauthenticateFromChat could not click ${serverName}; visible entries: ${JSON.stringify(
        entries
      )}`
    );
    throw new Error(
      `Unable to click MCP server row ${serverName} while reauthenticating`
    );
  }

  // Some MCP rows trigger OAuth directly instead of showing a footer action.
  if (page.url() !== beforeClickUrl || !isOnAppHost(page.url())) {
    await completeOauthFlow(page, {
      expectReturnPathContains: returnSubstring,
    });
    return;
  }

  await waitForMcpSecondaryView(page);
  const reauthItem = page.getByText("Re-Authenticate").first();
  let reauthVisible = await reauthItem.isVisible().catch(() => false);
  if (!reauthVisible) {
    // Popover state can rerender; retry selection once before failing.
    await closeActionsPopover(page);
    await openActionsPopover(page);
    const retryBeforeClickUrl = page.url();
    const clickedRetry =
      await clickServerRowAndWaitForPossibleUrlChangeWithRetry(
        page,
        serverName,
        "Re-authenticate server row click retry",
        10_000
      );
    if (!clickedRetry) {
      const entries = await collectActionPopoverEntries(page);
      await logPageStateWithTag(
        page,
        `reauthenticateFromChat retry could not click ${serverName}; visible entries: ${JSON.stringify(
          entries
        )}`
      );
      throw new Error(
        `Unable to click MCP server row ${serverName} on reauth retry`
      );
    }

    if (page.url() !== retryBeforeClickUrl || !isOnAppHost(page.url())) {
      await completeOauthFlow(page, {
        expectReturnPathContains: returnSubstring,
      });
      return;
    }

    await waitForMcpSecondaryView(page);
    reauthVisible = await reauthItem.isVisible().catch(() => false);
  }

  await expect(reauthItem).toBeVisible({ timeout: 15000 });
  await clickAndWaitForPossibleUrlChange(
    page,
    () => reauthItem.click(),
    "Re-authenticate click"
  );
  await completeOauthFlow(page, {
    expectReturnPathContains: returnSubstring,
  });
}

async function ensureServerVisibleInActions(
  page: Page,
  serverName: string,
  options?: {
    agentId?: number;
  }
) {
  for (let attempt = 0; attempt < 2; attempt++) {
    await page.keyboard.press("Escape").catch(() => {});
    await openActionsPopover(page);
    const locatorToUse = await waitForServerRow(page, serverName, 15_000);

    if (locatorToUse) {
      await expect(locatorToUse).toBeVisible({ timeout: 15000 });
      await page.keyboard.press("Escape").catch(() => {});
      return;
    }

    const entries = await collectActionPopoverEntries(page);
    await logPageStateWithTag(
      page,
      `ensureServerVisibleInActions could not find ${serverName}; visible entries: ${JSON.stringify(
        entries
      )}`
    );
    await page.keyboard.press("Escape").catch(() => {});

    if (attempt === 0 && options?.agentId) {
      logOauthEvent(
        page,
        `Server ${serverName} missing in actions, retrying after restoring assistant ${options.agentId} context`
      );
      await restoreAssistantContext(page, options.agentId);
      continue;
    }

    throw new Error(`Server ${serverName} not visible in actions popover`);
  }
}

async function waitForUserRecord(
  client: OnyxApiClient,
  email: string,
  timeoutMs: number = 10_000
) {
  const start = Date.now();
  while (Date.now() - start < timeoutMs) {
    const record = await client.getUserByEmail(email);
    if (record) {
      return record;
    }
    await new Promise((resolve) => setTimeout(resolve, 500));
  }
  throw new Error(`Timed out waiting for user record ${email}`);
}

async function waitForAssistantByName(
  client: OnyxApiClient,
  agentName: string,
  timeoutMs: number = 20_000
) {
  const start = Date.now();
  while (Date.now() - start < timeoutMs) {
    const assistant = await client.findAgentByName(agentName, {
      getEditable: true,
    });
    if (assistant) {
      return assistant;
    }
    await new Promise((resolve) => setTimeout(resolve, 500));
  }
  throw new Error(`Timed out waiting for assistant ${agentName}`);
}

async function waitForAssistantTools(
  client: OnyxApiClient,
  agentName: string,
  requiredToolNames: string[],
  timeoutMs: number = 30_000
) {
  const start = Date.now();
  while (Date.now() - start < timeoutMs) {
    const assistant = await client.findAgentByName(agentName, {
      getEditable: true,
    });
    if (
      assistant &&
      Array.isArray(assistant.tools) &&
      requiredToolNames.every((name) =>
        assistant.tools.some(
          (tool: any) =>
            tool?.name === name ||
            tool?.in_code_tool_id === name ||
            tool?.display_name === name
        )
      )
    ) {
      return assistant;
    }
    await new Promise((resolve) => setTimeout(resolve, 500));
  }
  throw new Error(
    `Timed out waiting for assistant ${agentName} to include tools: ${requiredToolNames.join(
      ", "
    )}`
  );
}

async function mockEmptyOauthStatus(page: Page): Promise<void> {
  await page.route("**/api/mcp/oauth/status*", (route) =>
    route.fulfill({
      status: 200,
      contentType: "application/json",
      body: JSON.stringify({ statuses: [] }),
    })
  );
}

function getNumericQueryParam(
  urlString: string,
  paramName: string
): number | null {
  try {
    const value = new URL(urlString).searchParams.get(paramName);
    if (!value) {
      return null;
    }
    const parsed = Number(value);
    return Number.isNaN(parsed) ? null : parsed;
  } catch {
    return null;
  }
}

async function configureOauthServerAndEnableTool(
  page: Page,
  options: {
    serverName: string;
    serverDescription: string;
    serverUrl: string;
    toolName: string;
    connectContext: string;
    logStep: StepLogger;
  }
): Promise<number> {
  const { serverName, serverDescription, serverUrl, toolName, connectContext } =
    options;

  await page.goto("/admin/actions/mcp");
  await page.waitForURL("**/admin/actions/mcp**", { timeout: 15000 });
  options.logStep("Opened MCP actions page");

  await page.getByRole("button", { name: /Add MCP Server/i }).click();
  await expect(page.locator("input#name")).toBeVisible({ timeout: 10000 });
  options.logStep("Opened Add MCP Server modal");

  await page.locator("input#name").fill(serverName);
  await page.locator("textarea#description").fill(serverDescription);
  await page.locator("input#server_url").fill(serverUrl);
  options.logStep(`Filled server URL: ${serverUrl}`);

  await page.getByRole("button", { name: "Add Server" }).click();
  await expect(page.getByTestId("mcp-auth-method-select")).toBeVisible({
    timeout: 10000,
  });
  options.logStep("Created MCP server, auth modal opened");

  const authMethodSelect = page.getByTestId("mcp-auth-method-select");
  await authMethodSelect.click();
  await page.getByRole("option", { name: "OAuth" }).click();
  options.logStep("Selected OAuth authentication method");

  await page.locator('input[name="oauth_client_id"]').fill(CLIENT_ID);
  await page.locator('input[name="oauth_client_secret"]').fill(CLIENT_SECRET);
  options.logStep("Filled OAuth credentials");

  const connectButton = page.getByTestId("mcp-auth-connect-button");
  await clickAndWaitForPossibleUrlChange(
    page,
    () => connectButton.click(),
    connectContext
  );
  options.logStep("Triggered OAuth connection");

  let serverId: number | null = null;
  await completeOauthFlow(page, {
    expectReturnPathContains: "/admin/actions/mcp",
    confirmConnected: async () => {
      serverId = getNumericQueryParam(page.url(), "server_id");
      if (serverId === null) {
        throw new Error("Missing or invalid server_id in OAuth return URL");
      }
      await expect(
        page.getByText(serverName, { exact: false }).first()
      ).toBeVisible({ timeout: 15000 });
    },
    scrollToBottomOnReturn: false,
  });
  options.logStep("Completed OAuth flow for MCP server");

  if (serverId === null) {
    serverId = getNumericQueryParam(page.url(), "server_id");
  }
  if (serverId === null) {
    throw new Error("Expected numeric server_id in URL after OAuth flow");
  }

  await expect(
    page.getByText(serverName, { exact: false }).first()
  ).toBeVisible({
    timeout: 20000,
  });
  const toolToggles = page.getByLabel(`tool-toggle-${toolName}`);
  await expect(toolToggles.first()).toBeVisible({ timeout: 20000 });
  options.logStep("Verified server card and tool toggles are visible");

  const toggleCount = await toolToggles.count();
  options.logStep(`Found ${toggleCount} instance(s) of ${toolName}`);
  for (let i = 0; i < toggleCount; i++) {
    const toggle = toolToggles.nth(i);
    const isEnabled = await toggle.getAttribute("aria-checked");
    if (isEnabled !== "true") {
      await toggle.click();
      await expect(toggle).toHaveAttribute("aria-checked", "true", {
        timeout: 5000,
      });
      options.logStep(`Enabled tool instance ${i + 1}: ${toolName}`);
    }
  }
  options.logStep("Tools auto-fetched and enabled via UI");

  return serverId;
}

async function openAssistantEditor(
  page: Page,
  options: {
    logStep: StepLogger;
    onLoginRedirect?: () => Promise<void>;
  }
): Promise<void> {
  const assistantEditorUrl = `${APP_BASE_URL}/app/agents/create?admin=true`;
  let assistantPageLoaded = false;

  for (let attempt = 0; attempt < 2 && !assistantPageLoaded; attempt++) {
    await page.goto(assistantEditorUrl);
    try {
      await page.waitForURL("**/app/agents/create**", {
        timeout: 15000,
      });
      assistantPageLoaded = true;
    } catch (error) {
      const currentUrl = page.url();
      if (currentUrl.includes("/app/agents/create")) {
        assistantPageLoaded = true;
        break;
      }
      if (currentUrl.includes("/app?from=login") && options.onLoginRedirect) {
        await options.onLoginRedirect();
        continue;
      }
      await logPageStateWithTag(
        page,
        "Timed out waiting for /app/agents/create"
      );
      throw error;
    }
  }

  if (!assistantPageLoaded) {
    throw new Error("Unable to navigate to /app/agents/create");
  }
  options.logStep("Assistant editor loaded");
}

async function createAgentAndWaitForTool(
  page: Page,
  options: {
    apiClient: OnyxApiClient;
    agentName: string;
    instructions: string;
    description: string;
    serverId: number;
    toolName: string;
    logStep: StepLogger;
  }
): Promise<number> {
  const {
    apiClient,
    agentName,
    instructions,
    description,
    serverId,
    toolName,
    logStep,
  } = options;

  await page.locator('input[name="name"]').fill(agentName);
  await page.locator('textarea[name="instructions"]').fill(instructions);
  await page.locator('textarea[name="description"]').fill(description);
  await selectMcpTools(page, serverId);

  await page.getByRole("button", { name: "Create" }).click();
  await page.waitForURL(
    (url) => {
      const href = typeof url === "string" ? url : url.toString();
      return /\/app\?agentId=\d+/.test(href) || href.includes("/admin/agents");
    },
    { timeout: 20000 }
  );

  let agentId = getNumericQueryParam(page.url(), "agentId");
  if (agentId === null) {
    const assistantRecord = await waitForAssistantByName(apiClient, agentName);
    agentId = assistantRecord.id;
    await page.goto(`/app?agentId=${agentId}`);
    await page.waitForURL(/\/app\?agentId=\d+/, { timeout: 20000 });
  }
  if (agentId === null) {
    throw new Error("Assistant ID could not be determined");
  }
  logStep(`Assistant created with id ${agentId}`);

  await waitForAssistantTools(apiClient, agentName, [toolName]);
  logStep("Confirmed assistant tools are available");
  return agentId;
}

test.describe("MCP OAuth flows", () => {
  test.describe.configure({ mode: "serial" });
  test.setTimeout(MCP_OAUTH_FLOW_TEST_TIMEOUT_MS);

  let serverProcess: McpServerProcess | null = null;
  let adminArtifacts: FlowArtifacts | null = null;
  let curatorArtifacts: FlowArtifacts | null = null;
  let curatorCredentials: Credentials | null = null;
  let curatorTwoCredentials: Credentials | null = null;
  let curatorGroupId: number | null = null;
  let curatorTwoGroupId: number | null = null;

  test.beforeAll(async ({ browser }, workerInfo) => {
    if (workerInfo.project.name !== "admin") {
      return;
    }

    if (!process.env.MCP_TEST_SERVER_URL) {
      const basePort = Number(process.env.MCP_TEST_SERVER_PORT || "8004");
      const allocatedPort = basePort + workerInfo.workerIndex;
      serverProcess = await startMcpOauthServer({
        port: allocatedPort,
        bindHost: process.env.MCP_TEST_SERVER_BIND_HOST,
        publicHost: process.env.MCP_TEST_SERVER_PUBLIC_HOST,
      });
      const explicitPublicUrl = process.env.MCP_TEST_SERVER_PUBLIC_URL;
      if (explicitPublicUrl) {
        runtimeMcpServerUrl = buildMcpServerUrl(explicitPublicUrl);
      } else {
        const { host: publicHost, port } = serverProcess.address;
        runtimeMcpServerUrl = buildMcpServerUrl(`http://${publicHost}:${port}`);
      }
    } else {
      runtimeMcpServerUrl = buildMcpServerUrl(process.env.MCP_TEST_SERVER_URL);
    }

    const adminContext = await browser.newContext({
      storageState: "admin_auth.json",
    });
    const adminPage = await adminContext.newPage();
    const adminClient = new OnyxApiClient(adminPage.request);
    try {
      const existingServers = await adminClient.listMcpServers();
      for (const server of existingServers) {
        if (server.server_url === runtimeMcpServerUrl) {
          await adminClient.deleteMcpServer(server.id);
        }
      }
    } catch (error) {
      console.warn("Failed to cleanup existing MCP servers", error);
    }

    const basePassword = "TestPassword123!";
    curatorCredentials = {
      email: `pw-curator-${Date.now()}@example.com`,
      password: basePassword,
    };
    await adminClient.registerUser(
      curatorCredentials.email,
      curatorCredentials.password
    );
    const curatorRecord = await waitForUserRecord(
      adminClient,
      curatorCredentials.email
    );
    curatorGroupId = await adminClient.createUserGroup(
      `Playwright Curator Group ${Date.now()}`,
      [curatorRecord.id]
    );
    await adminClient.setCuratorStatus(
      String(curatorGroupId),
      curatorRecord.id,
      true
    );
    curatorTwoCredentials = {
      email: `pw-curator-${Date.now()}-b@example.com`,
      password: basePassword,
    };
    await adminClient.registerUser(
      curatorTwoCredentials.email,
      curatorTwoCredentials.password
    );
    const curatorTwoRecord = await waitForUserRecord(
      adminClient,
      curatorTwoCredentials.email
    );
    curatorTwoGroupId = await adminClient.createUserGroup(
      `Playwright Curator Group ${Date.now()}-2`,
      [curatorTwoRecord.id]
    );
    await adminClient.setCuratorStatus(
      String(curatorTwoGroupId),
      curatorTwoRecord.id,
      true
    );

    await adminContext.close();
  });

  test.afterAll(async ({ browser }, workerInfo) => {
    if (workerInfo.project.name !== "admin") {
      return;
    }

    if (serverProcess) {
      await serverProcess.stop();
    }

    const adminContext = await browser.newContext({
      storageState: "admin_auth.json",
    });
    const adminPage = await adminContext.newPage();
    const adminClient = new OnyxApiClient(adminPage.request);

    if (adminArtifacts?.agentId) {
      await adminClient.deleteAgent(adminArtifacts.agentId);
    }
    if (adminArtifacts?.serverId) {
      await adminClient.deleteMcpServer(adminArtifacts.serverId);
    }

    if (curatorArtifacts?.agentId) {
      await adminClient.deleteAgent(curatorArtifacts.agentId);
    }
    if (curatorArtifacts?.serverId) {
      await adminClient.deleteMcpServer(curatorArtifacts.serverId);
    }

    if (curatorGroupId) {
      await adminClient.deleteUserGroup(curatorGroupId);
    }
    if (curatorTwoGroupId) {
      await adminClient.deleteUserGroup(curatorTwoGroupId);
    }

    await adminContext.close();
  });

  test("Admin can configure OAuth MCP server and use tools end-to-end", async ({
    page,
  }, testInfo) => {
    test.setTimeout(MCP_OAUTH_FLOW_TEST_TIMEOUT_MS);
    const logStep = createStepLogger("AdminFlow");
    test.skip(
      testInfo.project.name !== "admin",
      "MCP OAuth flows run only in admin project"
    );
    logStep("Starting admin MCP OAuth flow");

    await mockEmptyOauthStatus(page);

    await page.context().clearCookies();
    logStep("Cleared cookies");
    await loginAs(page, "admin");
    await verifySessionUser(
      page,
      { email: TEST_ADMIN_CREDENTIALS.email, role: "admin" },
      "AdminFlow primary login"
    );
    const adminApiClient = new OnyxApiClient(page.request);
    logStep("Logged in as admin");

    const serverName = `PW MCP Admin ${Date.now()}`;
    const agentName = `PW Admin Assistant ${Date.now()}`;

    const serverId = await configureOauthServerAndEnableTool(page, {
      serverName,
      serverDescription: "Playwright MCP OAuth server (admin)",
      serverUrl: runtimeMcpServerUrl,
      toolName: TOOL_NAMES.admin,
      connectContext: "Admin connect click",
      logStep,
    });

    await openAssistantEditor(page, {
      logStep,
      onLoginRedirect: async () => {
        await loginAs(page, "admin");
        await verifySessionUser(
          page,
          { email: TEST_ADMIN_CREDENTIALS.email, role: "admin" },
          "AdminFlow assistant editor relogin"
        );
      },
    });

    const agentId = await createAgentAndWaitForTool(page, {
      apiClient: adminApiClient,
      agentName,
      instructions: "Assist with MCP OAuth testing.",
      description: "Playwright admin MCP assistant.",
      serverId,
      toolName: TOOL_NAMES.admin,
      logStep,
    });
    const createdAgent = await adminApiClient.getAssistant(agentId);
    expect(createdAgent.is_public).toBe(false);
    logStep("Verified newly created agent is private by default");
    const adminToolId = await fetchMcpToolIdByName(
      page,
      serverId,
      TOOL_NAMES.admin
    );

    await ensureServerVisibleInActions(page, serverName, { agentId });
    await verifyMcpToolRowVisible(page, serverName, TOOL_NAMES.admin);
    await ensureMcpToolEnabledInActions(page, serverName, TOOL_NAMES.admin);
    logStep("Verified admin MCP tool row visible before reauth");
    await verifyToolInvocationFromChat(
      page,
      TOOL_NAMES.admin,
      "AdminFlow pre-reauth",
      adminToolId
    );
    logStep("Verified admin MCP tool invocation before reauth");

    await reauthenticateFromChat(page, serverName, `/app?agentId=${agentId}`);
    await ensureServerVisibleInActions(page, serverName, { agentId });
    await verifyMcpToolRowVisible(page, serverName, TOOL_NAMES.admin);
    await ensureMcpToolEnabledInActions(page, serverName, TOOL_NAMES.admin);
    logStep("Verified admin MCP tool row visible after reauth");
    await verifyToolInvocationFromChat(
      page,
      TOOL_NAMES.admin,
      "AdminFlow post-reauth",
      adminToolId
    );
    logStep("Verified admin MCP tool invocation after reauth");

    // Verify server card still shows the server and tools
    await page.goto("/admin/actions/mcp");
    await page.waitForURL("**/admin/actions/mcp**", { timeout: 15000 });
    await expect(
      page.getByText(serverName, { exact: false }).first()
    ).toBeVisible({ timeout: 15000 });
    logStep("Verified MCP server card is still visible on actions page");

    await adminApiClient.updateAgentSharing(agentId, {
      isPublic: true,
      userIds: createdAgent.users.map((user) => user.id),
      groupIds: createdAgent.groups,
    });
    logStep("Published agent explicitly for end-user MCP flow");

    adminArtifacts = {
      serverId,
      serverName,
      agentId,
      agentName,
      toolName: TOOL_NAMES.admin,
      toolId: adminToolId,
    };
  });

  test("Curator flow with access isolation", async ({
    page,
    browser,
  }, testInfo) => {
    test.setTimeout(MCP_OAUTH_FLOW_TEST_TIMEOUT_MS);
    const logStep = createStepLogger("CuratorFlow");
    test.skip(
      testInfo.project.name !== "admin",
      "MCP OAuth flows run only in admin project"
    );
    logStep("Starting curator MCP OAuth flow");
    await mockEmptyOauthStatus(page);

    if (!curatorCredentials || !curatorTwoCredentials) {
      test.skip(true, "Curator credentials were not initialized");
    }

    await page.context().clearCookies();
    logStep("Cleared cookies");
    await apiLogin(
      page,
      curatorCredentials!.email,
      curatorCredentials!.password
    );
    await verifySessionUser(
      page,
      { email: curatorCredentials!.email, role: "curator" },
      "CuratorFlow primary login"
    );
    logStep("Logged in as curator");
    const curatorApiClient = new OnyxApiClient(page.request);

    const serverName = `PW MCP Curator ${Date.now()}`;
    const agentName = `PW Curator Assistant ${Date.now()}`;

    let curatorServerProcess: McpServerProcess | null = null;
    let curatorRuntimeMcpServerUrl = runtimeMcpServerUrl;

    try {
      if (!process.env.MCP_TEST_SERVER_URL) {
        const basePort =
          (serverProcess?.address.port ??
            Number(process.env.MCP_TEST_SERVER_PORT || "8004")) + 1;
        curatorServerProcess = await startMcpOauthServer({ port: basePort });
        const { host, port } = curatorServerProcess.address;
        curatorRuntimeMcpServerUrl = `http://${host}:${port}/mcp`;
      }

      const serverId = await configureOauthServerAndEnableTool(page, {
        serverName,
        serverDescription: "Playwright MCP OAuth server (curator)",
        serverUrl: curatorRuntimeMcpServerUrl,
        toolName: TOOL_NAMES.curator,
        connectContext: "Curator connect click",
        logStep,
      });

      await openAssistantEditor(page, { logStep });

      const agentId = await createAgentAndWaitForTool(page, {
        apiClient: curatorApiClient,
        agentName,
        instructions: "Curator MCP OAuth assistant.",
        description: "Playwright curator MCP assistant.",
        serverId,
        toolName: TOOL_NAMES.curator,
        logStep,
      });

      await ensureServerVisibleInActions(page, serverName, { agentId });
      await verifyMcpToolRowVisible(page, serverName, TOOL_NAMES.curator);
      logStep("Verified curator MCP tool row visible before reauth");

      await reauthenticateFromChat(page, serverName, `/app?agentId=${agentId}`);
      await ensureServerVisibleInActions(page, serverName, { agentId });
      await verifyMcpToolRowVisible(page, serverName, TOOL_NAMES.curator);
      logStep("Verified curator MCP tool row visible after reauth");

      curatorArtifacts = {
        serverId,
        serverName,
        agentId,
        agentName,
        toolName: TOOL_NAMES.curator,
        toolId: null,
      };

      // Verify isolation: second curator must not be able to edit first curator's server
      const curatorTwoContext = await browser.newContext();
      const curatorTwoPage = await curatorTwoContext.newPage();
      await logoutSession(
        curatorTwoPage,
        "CuratorFlow secondary pre-login logout"
      );
      await apiLogin(
        curatorTwoPage,
        curatorTwoCredentials!.email,
        curatorTwoCredentials!.password
      );
      await verifySessionUser(
        curatorTwoPage,
        { email: curatorTwoCredentials!.email, role: "curator" },
        "CuratorFlow secondary login"
      );
      await curatorTwoPage.goto("/admin/actions/mcp");
      const serverLocator = curatorTwoPage.getByText(serverName, {
        exact: false,
      });
      await expect(serverLocator).not.toHaveCount(0, { timeout: 15000 });

      const editResponse = await curatorTwoPage.request.get(
        `${APP_BASE_URL}/api/admin/mcp/servers/${serverId}`
      );
      expect(editResponse.status()).toBe(403);
      await curatorTwoContext.close();
    } finally {
      await curatorServerProcess?.stop().catch(() => {});
    }
  });

  test("End user can authenticate and invoke MCP tools via chat", async ({
    page,
  }, testInfo) => {
    test.setTimeout(MCP_OAUTH_FLOW_TEST_TIMEOUT_MS);
    const logStep = createStepLogger("UserFlow");
    test.skip(
      testInfo.project.name !== "admin",
      "MCP OAuth flows run only in admin project"
    );
    logStep("Starting end-user MCP OAuth flow");
    await mockEmptyOauthStatus(page);

    test.skip(!adminArtifacts, "Admin flow must complete before user test");

    await page.context().clearCookies();
    logStep("Cleared cookies");
    await loginAsWorkerUser(page, testInfo.workerIndex);
    logStep("Logged in as worker user");

    const agentId = adminArtifacts!.agentId;
    const serverName = adminArtifacts!.serverName;
    const toolName = adminArtifacts!.toolName;

    await page.goto(`/app?agentId=${agentId}`, {
      waitUntil: "load",
    });
    await ensureServerVisibleInActions(page, serverName, { agentId });
    logStep("Opened chat as user and ensured server visible");

    await openActionsPopover(page);
    const serverLineItem = await waitForServerRow(page, serverName, 15_000);
    if (!serverLineItem) {
      const entries = await collectActionPopoverEntries(page);
      await logPageStateWithTag(
        page,
        `UserFlow reauth locate failed for ${serverName}; visible entries: ${JSON.stringify(
          entries
        )}`
      );
      throw new Error(
        `Unable to locate MCP server row ${serverName} for user reauth`
      );
    }
    await expect(serverLineItem).toBeVisible({ timeout: 15000 });

    const clickedServerRow =
      await clickServerRowAndWaitForPossibleUrlChangeWithRetry(
        page,
        serverName,
        "End-user reauth click",
        15_000
      );
    if (!clickedServerRow) {
      const entries = await collectActionPopoverEntries(page);
      await logPageStateWithTag(
        page,
        `UserFlow reauth click failed for ${serverName}; visible entries: ${JSON.stringify(
          entries
        )}`
      );
      throw new Error(
        `Unable to click MCP server row ${serverName} for user reauth`
      );
    }

    await completeOauthFlow(page, {
      expectReturnPathContains: `/app?agentId=${agentId}`,
    });
    logStep("Completed user OAuth reauthentication");

    await ensureServerVisibleInActions(page, serverName, { agentId });
    await verifyMcpToolRowVisible(page, serverName, toolName);
    await ensureMcpToolEnabledInActions(page, serverName, toolName);
    logStep("Verified user MCP tool row visible after reauth");
    await verifyToolInvocationFromChat(
      page,
      toolName,
      "UserFlow post-reauth",
      adminArtifacts!.toolId
    );
    logStep("Verified user MCP tool invocation after reauth");
  });
});


================================================
FILE: web/tests/e2e/onboarding/onboarding_flow.spec.ts
================================================
import { expect, test } from "@playwright/test";
import type { Page } from "@playwright/test";
import { loginAs, loginAsRandomUser, apiLogin } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import { expectElementScreenshot } from "@tests/e2e/utils/visualRegression";

/**
 * Onboarding Flow E2E Tests
 *
 * Tests the 4 main user scenarios:
 * 1. Admin WITHOUT LLM providers -> Full onboarding, chat disabled
 * 2. Admin WITH LLM providers -> No full onboarding, chat enabled
 * 3. Non-admin WITHOUT LLM providers -> NonAdminStep name prompt, chat disabled
 * 4. Non-admin WITH LLM providers -> NonAdminStep name prompt, chat enabled
 *
 * Marked @exclusive because scenarios 1 & 3 delete all LLM providers.
 */

async function deleteAllProviders(client: OnyxApiClient): Promise<void> {
  const providers = await client.listLlmProviders();
  for (const provider of providers) {
    try {
      await client.deleteProvider(provider.id, { force: true });
    } catch (error) {
      console.warn(
        `Failed to delete provider ${provider.id}: ${String(error)}`
      );
    }
  }
}

async function createFreshAdmin(
  page: Page
): Promise<{ email: string; password: string }> {
  // First, log in as the existing admin so we can promote the new user
  await page.context().clearCookies();
  const { email, password } = await loginAsRandomUser(page);

  // Now promote the new user to admin via the existing admin
  await page.context().clearCookies();
  await loginAs(page, "admin");
  const adminClient = new OnyxApiClient(page.request);
  await adminClient.setUserRole(email, "admin");

  // Log back in as the new admin
  await page.context().clearCookies();
  await apiLogin(page, email, password);

  return { email, password };
}

async function createFreshUser(
  page: Page
): Promise<{ email: string; password: string }> {
  await page.context().clearCookies();
  return await loginAsRandomUser(page);
}

test.describe("Onboarding Flow @exclusive", () => {
  test.describe("Scenario 1: Admin WITHOUT LLM providers", () => {
    test.beforeEach(async ({ page }) => {
      // Delete all providers first (as existing admin)
      await page.context().clearCookies();
      await loginAs(page, "admin");
      const adminClient = new OnyxApiClient(page.request);
      await deleteAllProviders(adminClient);

      // Create a fresh admin user (no chat history)
      await createFreshAdmin(page);
    });

    test.afterEach(async ({ page }) => {
      // Restore providers
      await page.context().clearCookies();
      await loginAs(page, "admin");
      const adminClient = new OnyxApiClient(page.request);
      await adminClient.ensurePublicProvider();
    });

    test("shows full onboarding flow with Welcome step", async ({ page }) => {
      await page.goto("/app");
      await page.waitForLoadState("networkidle");

      const onboardingFlow = page.locator('[aria-label="onboarding-flow"]');
      await expect(onboardingFlow).toBeVisible({ timeout: 15000 });

      const header = page.locator('[data-label="onboarding-header"]');
      await expect(header).toBeVisible();
      await expect(
        header.getByRole("button", { name: "Let's Go" })
      ).toBeVisible();

      await expectElementScreenshot(header, {
        name: "onboarding-welcome-step",
      });
    });

    test("chat input bar is disabled during onboarding", async ({ page }) => {
      await page.goto("/app");
      await page.waitForLoadState("networkidle");

      await expect(page.locator('[aria-label="onboarding-flow"]')).toBeVisible({
        timeout: 15000,
      });

      const chatInput = page.locator("#onyx-chat-input");
      await expect(chatInput).toHaveAttribute("aria-disabled", "true");

      await expectElementScreenshot(chatInput, {
        name: "onboarding-chat-disabled",
      });
    });

    test("can progress through onboarding steps", async ({ page }) => {
      await page.goto("/app");
      await page.waitForLoadState("networkidle");

      const header = page.locator('[data-label="onboarding-header"]');
      await expect(header).toBeVisible({ timeout: 15000 });
      await header.getByRole("button", { name: "Let's Go" }).click();

      const nameStep = page.locator('[aria-label="onboarding-name-step"]');
      await expect(nameStep).toBeVisible({ timeout: 10000 });
      await nameStep.getByPlaceholder("Your name").fill("Test Admin");

      await expectElementScreenshot(nameStep, {
        name: "onboarding-name-step",
      });

      const nextButton = header.getByRole("button", { name: "Next" });
      await expect(nextButton).toBeEnabled({ timeout: 10000 });
      await nextButton.click();

      const llmStep = page.locator('[aria-label="onboarding-llm-step"]');
      await expect(llmStep).toBeVisible({ timeout: 10000 });

      await expectElementScreenshot(llmStep, {
        name: "onboarding-llm-step",
      });
    });
  });

  test.describe("Scenario 2: Admin WITH LLM providers", () => {
    test.beforeEach(async ({ page }) => {
      // Ensure provider exists
      await page.context().clearCookies();
      await loginAs(page, "admin");
      const adminClient = new OnyxApiClient(page.request);
      await adminClient.ensurePublicProvider();

      // Create a fresh admin user
      await createFreshAdmin(page);
    });

    test("does not show full onboarding flow", async ({ page }) => {
      await page.goto("/app");
      await page.waitForLoadState("networkidle");

      await expect(
        page.locator('[aria-label="onboarding-flow"]')
      ).not.toBeVisible({ timeout: 5000 });
    });

    test("shows name prompt (NonAdminStep) when name not set", async ({
      page,
    }) => {
      await page.goto("/app");
      await page.waitForLoadState("networkidle");

      const namePrompt = page.locator('[aria-label="non-admin-name-prompt"]');
      await expect(namePrompt).toBeVisible({ timeout: 15000 });
      await expect(
        namePrompt.getByRole("button", { name: "Save" })
      ).toBeVisible();

      await expectElementScreenshot(namePrompt, {
        name: "onboarding-admin-name-prompt",
      });
    });

    test("chat input bar is enabled", async ({ page }) => {
      await page.goto("/app");
      await page.waitForLoadState("networkidle");

      await expect(page.locator("#onyx-chat-input")).toBeVisible({
        timeout: 15000,
      });

      const chatInput = page.locator("#onyx-chat-input");
      await expect(chatInput).not.toHaveAttribute("aria-disabled", "true");
    });
  });

  test.describe("Scenario 3: Non-admin WITHOUT LLM providers", () => {
    test.beforeEach(async ({ page }) => {
      // Delete all providers (as existing admin)
      await page.context().clearCookies();
      await loginAs(page, "admin");
      const adminClient = new OnyxApiClient(page.request);
      await deleteAllProviders(adminClient);

      // Create a fresh non-admin user
      await createFreshUser(page);
    });

    test.afterEach(async ({ page }) => {
      // Restore providers
      await page.context().clearCookies();
      await loginAs(page, "admin");
      const adminClient = new OnyxApiClient(page.request);
      await adminClient.ensurePublicProvider();
    });

    test("shows NonAdminStep name prompt", async ({ page }) => {
      // loginAsRandomUser already navigates to /app
      const namePrompt = page.locator('[aria-label="non-admin-name-prompt"]');
      await expect(namePrompt).toBeVisible({ timeout: 15000 });
      await expect(
        namePrompt.getByRole("button", { name: "Save" })
      ).toBeVisible();

      await expectElementScreenshot(namePrompt, {
        name: "onboarding-nonadmin-name-prompt",
      });
    });

    test("does NOT show full onboarding flow", async ({ page }) => {
      await expect(
        page.locator('[aria-label="onboarding-flow"]')
      ).not.toBeVisible({ timeout: 5000 });
      await expect(
        page.locator('[aria-label="onboarding-llm-step"]')
      ).not.toBeVisible();
    });

    test("chat input bar is disabled", async ({ page }) => {
      await expect(page.locator("#onyx-chat-input")).toBeVisible({
        timeout: 15000,
      });

      const chatInput = page.locator("#onyx-chat-input");
      await expect(chatInput).toHaveAttribute("aria-disabled", "true");
    });

    test("can save name and see confirmation", async ({ page }) => {
      const namePrompt = page.locator('[aria-label="non-admin-name-prompt"]');
      await expect(namePrompt).toBeVisible({ timeout: 15000 });

      await namePrompt.getByPlaceholder("Your name").fill("Test User");
      await namePrompt.getByRole("button", { name: "Save" }).click();

      const confirmation = page.locator(
        '[aria-label="non-admin-confirmation"]'
      );
      await expect(confirmation).toBeVisible({ timeout: 10000 });

      await expectElementScreenshot(confirmation, {
        name: "onboarding-nonadmin-confirmation",
      });
    });
  });

  test.describe("Scenario 4: Non-admin WITH LLM providers", () => {
    test.beforeEach(async ({ page }) => {
      // Ensure provider exists
      await page.context().clearCookies();
      await loginAs(page, "admin");
      const adminClient = new OnyxApiClient(page.request);
      await adminClient.ensurePublicProvider();

      // Create a fresh non-admin user
      await createFreshUser(page);
    });

    test("shows name prompt when name not set", async ({ page }) => {
      // loginAsRandomUser already navigates to /app
      const namePrompt = page.locator('[aria-label="non-admin-name-prompt"]');
      await expect(namePrompt).toBeVisible({ timeout: 15000 });
    });

    test("chat input bar is enabled", async ({ page }) => {
      await expect(page.locator("#onyx-chat-input")).toBeVisible({
        timeout: 15000,
      });

      const chatInput = page.locator("#onyx-chat-input");
      await expect(chatInput).not.toHaveAttribute("aria-disabled", "true");
    });

    test("after setting name, shows confirmation then no onboarding UI", async ({
      page,
    }) => {
      const namePrompt = page.locator('[aria-label="non-admin-name-prompt"]');
      await expect(namePrompt).toBeVisible({ timeout: 15000 });

      await namePrompt.getByPlaceholder("Your name").fill("E2E User");
      await namePrompt.getByRole("button", { name: "Save" }).click();

      const confirmation = page.locator(
        '[aria-label="non-admin-confirmation"]'
      );
      await expect(confirmation).toBeVisible({ timeout: 10000 });

      await expectElementScreenshot(confirmation, {
        name: "onboarding-nonadmin-with-llm-confirmation",
      });

      await confirmation.getByRole("button").first().click();
      await expect(namePrompt).not.toBeVisible({ timeout: 5000 });
      await expect(confirmation).not.toBeVisible();
    });
  });
});


================================================
FILE: web/tests/e2e/settings/settings_pages.spec.ts
================================================
import { expect, test } from "@playwright/test";
import { THEMES, setThemeBeforeNavigation } from "@tests/e2e/utils/theme";
import { expectScreenshot } from "@tests/e2e/utils/visualRegression";

test.use({ storageState: "admin_auth.json" });

/** Maps each settings slug to the header title shown on that page. */
const SLUG_TO_HEADER: Record<string, string> = {
  general: "Profile",
  "chat-preferences": "Chats",
  "accounts-access": "Accounts",
  connectors: "Connectors",
};

for (const theme of THEMES) {
  test.describe(`Settings pages (${theme} mode)`, () => {
    test.beforeEach(async ({ page }) => {
      await setThemeBeforeNavigation(page, theme);
    });

    test("should screenshot each settings tab", async ({ page }) => {
      await page.goto("/app/settings/general");
      await page
        .getByTestId("settings-left-tab-navigation")
        .waitFor({ state: "visible" });

      const nav = page.getByTestId("settings-left-tab-navigation");
      const tabs = nav.locator("a");
      await expect(tabs.first()).toBeVisible({ timeout: 10_000 });
      const count = await tabs.count();

      for (let i = 0; i < count; i++) {
        const tab = tabs.nth(i);
        const href = await tab.getAttribute("href");
        const slug = href ? href.replace("/app/settings/", "") : `tab-${i}`;

        await tab.click();

        const expectedHeader = SLUG_TO_HEADER[slug];
        if (expectedHeader) {
          await expect(
            page
              .locator(".opal-content-md-header")
              .filter({ hasText: expectedHeader })
          ).toBeVisible({ timeout: 10_000 });
        } else {
          await page.waitForLoadState("networkidle");
        }

        await expectScreenshot(page, {
          name: `settings-${theme}-${slug}`,
        });
      }
    });
  });
}


================================================
FILE: web/tests/e2e/utils/agentUtils.ts
================================================
import { Page } from "@playwright/test";
import { expect } from "@playwright/test";
import { verifyAgentIsChosen } from "./chatActions";

export type AgentParams = {
  name: string;
  description?: string;
  instructions?: string; // system_prompt
};

// Create an assistant via the UI from the app page and wait until it is active
export async function createAgent(page: Page, params: AgentParams) {
  const { name, description = "", instructions = "Test Instructions" } = params;

  // Navigate to creation flow
  // We assume we're on /app; if not, go there first
  if (!page.url().includes("/app")) {
    await page.goto("/app");
  }

  // Open Assistants modal/list
  await page.getByTestId("AppSidebar/more-agents").click();
  await page.getByLabel("AgentsPage/new-agent-button").click();

  // Fill required fields
  await page.locator('input[name="name"]').fill(name);
  if (description) {
    await page.locator('textarea[name="description"]').fill(description);
  }
  await page.locator('textarea[name="instructions"]').fill(instructions);

  // Submit create
  await page.getByRole("button", { name: "Create" }).click();

  // Verify it is selected in chat (placeholder contains assistant name)
  await verifyAgentIsChosen(page, name);
}

// Pin an assistant by its visible name in the sidebar list.
// If already pinned, this will leave it pinned (no-op).
export async function pinAgentByName(
  page: Page,
  agentName: string
): Promise<void> {
  const row = page
    .locator('[data-testid^="assistant-["]')
    .filter({ hasText: agentName })
    .first();

  await row.waitFor({ state: "visible", timeout: 10000 });
  await row.hover();

  const button = row.locator("button").first();
  await button.hover();

  // Tooltip indicates pin vs unpin; use it if available
  const pinTooltip = page.getByText("Pin this assistant to the sidebar");
  const unpinTooltip = page.getByText("Unpin this assistant from the sidebar");

  try {
    await expect(pinTooltip.or(unpinTooltip)).toBeVisible({ timeout: 2000 });
  } catch {
    // Tooltip may fail to appear in CI; continue optimistically
  }

  if (await pinTooltip.isVisible().catch(() => false)) {
    await button.click();
    await page.waitForTimeout(300);
  }
}

/**
 * Ensures the Image Generation tool is enabled in the default agent configuration.
 * If it's not enabled, it will toggle it on.
 *
 * Navigates to the Chat Preferences page and toggles the Image Generation switch
 * inside the "Actions & Tools" collapsible section (open by default).
 */
export async function ensureImageGenerationEnabled(page: Page): Promise<void> {
  // Navigate to the chat preferences page
  await page.goto("/admin/configuration/chat-preferences");
  await page.waitForLoadState("networkidle");

  // The "Actions & Tools" collapsible is open by default.
  // Find the Image Generation tool switch via its label container.
  const imageGenSwitch = page
    .locator("label")
    .filter({ has: page.getByText("Image Generation", { exact: true }) })
    .locator('button[role="switch"]')
    .first();

  await expect(imageGenSwitch).toBeVisible({ timeout: 10000 });

  // Check if it's already enabled
  const currentState = await imageGenSwitch.getAttribute("aria-checked");

  if (currentState !== "true") {
    // Toggle it on — auto-saves immediately via PATCH /api/admin/default-assistant
    await imageGenSwitch.click();

    // Wait for the auto-save toast to confirm success
    await expect(page.getByText("Tools updated").first()).toBeVisible({
      timeout: 5000,
    });

    // Verify it's now enabled
    const newState = await imageGenSwitch.getAttribute("aria-checked");
    if (newState !== "true") {
      throw new Error("Failed to enable Image Generation tool");
    }
  }
}


================================================
FILE: web/tests/e2e/utils/auth.ts
================================================
import type { Page } from "@playwright/test";
import {
  TEST_ADMIN2_CREDENTIALS,
  TEST_ADMIN_CREDENTIALS,
  WORKER_USER_POOL_SIZE,
  workerUserCredentials,
} from "@tests/e2e/constants";

/**
 * Log in via the API and set cookies on the page's browser context.
 * Much faster than navigating through the login UI.
 */
export async function apiLogin(
  page: Page,
  email: string,
  password: string
): Promise<void> {
  const res = await page.request.post("/api/auth/login", {
    form: { username: email, password },
  });
  if (!res.ok()) {
    const body = await res.text();
    throw new Error(`API login failed for ${email}: ${res.status()} ${body}`);
  }
}

// Logs in a known test user (admin or admin2) via the API.
// Users must already be provisioned (see global-setup.ts).
export async function loginAs(
  page: Page,
  userType: "admin" | "admin2"
): Promise<void> {
  const { email, password } =
    userType === "admin" ? TEST_ADMIN_CREDENTIALS : TEST_ADMIN2_CREDENTIALS;

  await apiLogin(page, email, password);
}

/**
 * Log in as a worker-specific user for test isolation.
 * Uses modulo to map any workerIndex (which can exceed the pool size due to
 * retries spawning new workers) back to a provisioned user. This is safe
 * because retries never run in parallel with the original attempt.
 */
export async function loginAsWorkerUser(
  page: Page,
  workerIndex: number
): Promise<void> {
  const { email, password } = workerUserCredentials(
    workerIndex % WORKER_USER_POOL_SIZE
  );
  await apiLogin(page, email, password);
}

// Generate a random email and password for throwaway test users.
const generateRandomCredentials = () => {
  const randomString = Math.random().toString(36).substring(2, 10);
  const specialChars = "!@#$%^&*()_+{}[]|:;<>,.?~";
  const randomSpecialChar =
    specialChars[Math.floor(Math.random() * specialChars.length)];
  const randomUpperCase = String.fromCharCode(
    65 + Math.floor(Math.random() * 26)
  );
  const randomNumber = Math.floor(Math.random() * 10);

  return {
    email: `test_${randomString}@example.com`,
    password: `P@ssw0rd_${randomUpperCase}${randomSpecialChar}${randomNumber}${randomString}`,
  };
};

// Register and log in as a new random user via the API.
export async function loginAsRandomUser(page: Page): Promise<{
  email: string;
  password: string;
}> {
  const { email, password } = generateRandomCredentials();

  const registerRes = await page.request.post("/api/auth/register", {
    data: { email, username: email, password },
  });
  if (!registerRes.ok()) {
    const body = await registerRes.text();
    throw new Error(
      `Failed to register random user ${email}: ${registerRes.status()} ${body}`
    );
  }

  await apiLogin(page, email, password);

  // Navigate to the app so the page is ready for test interactions
  await page.goto("/app?new_team=true");
  await page.waitForLoadState("networkidle");

  return { email, password };
}


================================================
FILE: web/tests/e2e/utils/chatActions.ts
================================================
import { Page } from "@playwright/test";
import { expect } from "@playwright/test";

export async function verifyDefaultAgentIsChosen(page: Page) {
  await expect(page.getByTestId("onyx-logo")).toBeVisible({ timeout: 5000 });
}

export async function verifyAgentIsChosen(
  page: Page,
  agentName: string,
  timeout: number = 5000
) {
  await expect(
    page.getByTestId("agent-name-display").getByText(agentName)
  ).toBeVisible({ timeout });
}

export async function navigateToAgentInHistorySidebar(
  page: Page,
  testId: string,
  agentName: string
) {
  await page.getByTestId(`assistant-${testId}`).click();
  try {
    await verifyAgentIsChosen(page, agentName);
  } catch (error) {
    console.error("Error in navigateToAgentInHistorySidebar:", error);
    const pageText = await page.textContent("body");
    console.log("Page text:", pageText);
    throw error;
  }
}

export async function sendMessage(page: Page, message: string) {
  // Count existing AI messages before sending
  const existingMessageCount = await page
    .locator('[data-testid="onyx-ai-message"]')
    .count();

  await page.locator("#onyx-chat-input-textarea").click();
  await page.locator("#onyx-chat-input-textarea").fill(message);
  await page.locator("#onyx-chat-input-send-button").click();

  // Wait for a NEW AI message to appear (count should increase)
  await expect(page.locator('[data-testid="onyx-ai-message"]')).toHaveCount(
    existingMessageCount + 1,
    { timeout: 30000 }
  );

  // Wait for up to 10 seconds for the URL to contain 'chatId='
  await page.waitForFunction(
    () => window.location.href.includes("chatId="),
    null,
    { timeout: 10000 }
  );
}

export async function verifyCurrentModel(page: Page, modelName: string) {
  const text = await page
    .getByTestId("AppInputBar/llm-popover-trigger")
    .textContent();
  expect(text).toContain(modelName);
}

export async function selectModelFromInputPopover(
  page: Page,
  preferredModels: string[]
): Promise<string> {
  const currentModelText =
    (
      await page.getByTestId("AppInputBar/llm-popover-trigger").textContent()
    )?.trim() ?? "";

  await page.getByTestId("AppInputBar/llm-popover-trigger").click();
  await page.waitForSelector('[role="dialog"]', {
    state: "visible",
    timeout: 10000,
  });

  const dialog = page.locator('[role="dialog"]');
  const searchInput = dialog.getByPlaceholder("Search models...");

  for (const modelName of preferredModels) {
    await searchInput.fill(modelName);
    const modelOptions = dialog.locator("[data-selected]");
    const nonSelectedOptions = dialog.locator('[data-selected="false"]');

    if ((await modelOptions.count()) > 0) {
      const candidate =
        (await nonSelectedOptions.count()) > 0
          ? nonSelectedOptions.first()
          : modelOptions.first();

      await candidate.click();
      await page.waitForSelector('[role="dialog"]', { state: "hidden" });
      const selectedText =
        (
          await page
            .getByTestId("AppInputBar/llm-popover-trigger")
            .textContent()
        )?.trim() ?? "";
      if (!selectedText) {
        throw new Error(
          "Failed to read selected model text from input trigger"
        );
      }
      return selectedText;
    }
  }

  // Reset search so fallback sees all available models.
  await searchInput.fill("");

  const nonSelectedOptions = dialog.locator('[data-selected="false"]');
  if ((await nonSelectedOptions.count()) > 0) {
    const fallback = nonSelectedOptions.first();
    await expect(fallback).toBeVisible();
    await fallback.click();
    await page.waitForSelector('[role="dialog"]', { state: "hidden" });

    const selectedText =
      (
        await page.getByTestId("AppInputBar/llm-popover-trigger").textContent()
      )?.trim() ?? "";
    if (!selectedText) {
      throw new Error("Failed to read selected model text from input trigger");
    }
    return selectedText;
  }

  await page.keyboard.press("Escape").catch(() => {});
  await page
    .waitForSelector('[role="dialog"]', { state: "hidden", timeout: 5000 })
    .catch(() => {});

  if (currentModelText) {
    return currentModelText;
  }

  throw new Error("Unable to select a model from input popover");
}

export async function switchModel(page: Page, modelName: string) {
  await page.getByTestId("AppInputBar/llm-popover-trigger").click();

  // Wait for the popover to open
  await page.waitForSelector('[role="dialog"]', { state: "visible" });

  const modelButton = page
    .locator('[role="dialog"]')
    .locator('[role="button"]')
    .filter({ hasText: modelName })
    .first();

  await modelButton.click();

  // Wait for the popover to close
  await page.waitForSelector('[role="dialog"]', { state: "hidden" });
}

export async function startNewChat(page: Page) {
  await page.getByTestId("AppSidebar/new-session").click();
  await expect(page.getByTestId("chat-intro")).toBeVisible();
}


================================================
FILE: web/tests/e2e/utils/chatStream.ts
================================================
import { expect, Page, Route } from "@playwright/test";
import { sendMessage } from "./chatActions";

export type ChatStreamObject = Record<string, unknown> & {
  type?: string;
};

export type ChatStreamPacket = Record<string, unknown> & {
  obj?: ChatStreamObject;
};

function parseStreamLine(rawLine: string): ChatStreamPacket | null {
  const trimmed = rawLine.trim();
  if (!trimmed) {
    return null;
  }

  const withoutPrefix = trimmed.startsWith("data:")
    ? trimmed.slice("data:".length).trim()
    : trimmed;
  if (!withoutPrefix || withoutPrefix === "[DONE]") {
    return null;
  }

  try {
    return JSON.parse(withoutPrefix) as ChatStreamPacket;
  } catch {
    return null;
  }
}

export function parseChatStreamBody(body: string): ChatStreamPacket[] {
  return body
    .split("\n")
    .map(parseStreamLine)
    .filter((packet): packet is ChatStreamPacket => packet !== null);
}

export function getPacketObjectsByType(
  packets: ChatStreamPacket[],
  packetType: string
): ChatStreamObject[] {
  return packets
    .map((packet) => packet.obj)
    .filter(
      (obj): obj is ChatStreamObject =>
        !!obj && typeof obj.type === "string" && obj.type === packetType
    );
}

export async function sendMessageAndCaptureStreamPackets(
  page: Page,
  message: string,
  options?: {
    mockLlmResponse?: string;
    payloadOverrides?: Record<string, unknown>;
    waitForAiMessage?: boolean;
  }
): Promise<ChatStreamPacket[]> {
  const requestUrlPattern = "**/api/chat/send-chat-message";
  const mockLlmResponse = options?.mockLlmResponse;
  const payloadOverrides = options?.payloadOverrides;
  const waitForAiMessage = options?.waitForAiMessage ?? true;
  const routeHandler = async (route: Route) => {
    if (!mockLlmResponse && !payloadOverrides) {
      await route.continue();
      return;
    }

    const request = route.request();
    const payload = request.postDataJSON() as Record<string, unknown>;
    if (payloadOverrides) {
      Object.assign(payload, payloadOverrides);
    }
    if (mockLlmResponse) {
      payload.mock_llm_response = mockLlmResponse;
    }

    await route.continue({
      postData: JSON.stringify(payload),
      headers: {
        ...request.headers(),
        "content-type": "application/json",
      },
    });
  };

  await page.route(requestUrlPattern, routeHandler);

  const responsePromise = page.waitForResponse((response) => {
    if (
      response.request().method() !== "POST" ||
      !response.url().includes("/api/chat/send-chat-message")
    ) {
      return false;
    }

    const requestBody = response.request().postData();
    if (!requestBody) {
      return true;
    }

    try {
      const payload = JSON.parse(requestBody) as Record<string, unknown>;
      return payload.message === message;
    } catch {
      return true;
    }
  });

  try {
    if (waitForAiMessage) {
      await sendMessage(page, message);
    } else {
      await page.locator("#onyx-chat-input-textarea").click();
      await page.locator("#onyx-chat-input-textarea").fill(message);
      await page.locator("#onyx-chat-input-send-button").click();
      await page
        .waitForFunction(() => window.location.href.includes("chatId="), null, {
          timeout: 10000,
        })
        .catch(() => {});
    }

    const response = await responsePromise;
    expect(response.ok()).toBeTruthy();
    const body = await response.text();
    return parseChatStreamBody(body);
  } finally {
    await page.unroute(requestUrlPattern, routeHandler);
  }
}


================================================
FILE: web/tests/e2e/utils/dragUtils.ts
================================================
import { Locator, Page } from "@playwright/test";

/**
 * Drag "source" above (higher Y) "target" by using mouse events.
 * Positions the cursor on the lower half of source, then moves to the top half of the target.
 */
export async function dragElementAbove(
  sourceLocator: Locator,
  targetLocator: Locator,
  page: Page
) {
  // Get bounding boxes
  const sourceBB = await sourceLocator.boundingBox();
  const targetBB = await targetLocator.boundingBox();
  if (!sourceBB || !targetBB) {
    throw new Error("Source/target bounding boxes not found.");
  }

  // Move over source, press mouse down
  await page.mouse.move(
    sourceBB.x + sourceBB.width / 2,
    sourceBB.y + sourceBB.height * 0.75 // Move to 3/4 down the source element
  );
  await page.mouse.down();

  // Move to a point slightly above the target's center
  await page.mouse.move(
    targetBB.x + targetBB.width / 2,
    targetBB.y + targetBB.height * 0.1, // Move to 1/10 down the target element
    { steps: 20 } // Increase steps for smoother drag
  );
  await page.mouse.up();

  // Increase wait time for DnD transitions
  await page.waitForTimeout(200);
}

/**
 * Drag "source" below (higher Y → lower Y) "target" using mouse events.
 */
export async function dragElementBelow(
  sourceLocator: Locator,
  targetLocator: Locator,
  page: Page
) {
  // Get bounding boxes
  const sourceBB = await targetLocator.boundingBox();
  const targetBB = await sourceLocator.boundingBox();
  if (!sourceBB || !targetBB) {
    throw new Error("Source/target bounding boxes not found.");
  }

  // Move over source, press mouse down
  await page.mouse.move(
    sourceBB.x + sourceBB.width / 2,
    sourceBB.y + sourceBB.height * 0.25 // Move to 1/4 down the source element
  );
  await page.mouse.down();

  // Move to a point well below the target's bottom edge
  await page.mouse.move(
    targetBB.x + targetBB.width / 2,
    targetBB.y + targetBB.height + 50, // Move 50 pixels below the target element
    { steps: 50 } // Keep the same number of steps for smooth drag
  );

  // Hold for a moment to ensure the drag is registered
  await page.waitForTimeout(500);

  await page.mouse.up();

  // Wait for DnD transitions and potential animations
  await page.waitForTimeout(1000);
}


================================================
FILE: web/tests/e2e/utils/mcpServer.ts
================================================
import { spawn, ChildProcessWithoutNullStreams } from "child_process";
import path from "path";
import net from "net";
import fs from "fs";

interface StartServerOptions {
  bindHost?: string;
  publicHost?: string;
  port?: number;
  pythonBinary?: string;
  scriptPath?: string;
  readyTimeoutMs?: number;
}

const DEFAULT_BIND_HOST =
  process.env.MCP_TEST_SERVER_BIND_HOST ||
  process.env.MCP_TEST_SERVER_HOST ||
  "127.0.0.1";
const DEFAULT_PUBLIC_HOST =
  process.env.MCP_TEST_SERVER_PUBLIC_HOST || DEFAULT_BIND_HOST;
const DEFAULT_PORT = Number(process.env.MCP_TEST_SERVER_PORT || "8004");
const READY_TIMEOUT_MS = 25_000;

export class McpServerProcess {
  private process: ChildProcessWithoutNullStreams;
  private bindHost: string;
  private publicHost: string;
  private port: number;
  private stopped = false;

  constructor(
    proc: ChildProcessWithoutNullStreams,
    bindHost: string,
    publicHost: string,
    port: number
  ) {
    this.process = proc;
    this.bindHost = bindHost;
    this.publicHost = publicHost;
    this.port = port;
  }

  get address(): { host: string; port: number } {
    return { host: this.publicHost, port: this.port };
  }

  get bindAddress(): { host: string; port: number } {
    return { host: this.bindHost, port: this.port };
  }

  async stop(signal: NodeJS.Signals = "SIGTERM"): Promise<void> {
    if (this.stopped) return;
    this.stopped = true;
    return new Promise((resolve) => {
      const timeout = setTimeout(() => {
        if (!this.process.killed) {
          this.process.kill("SIGKILL");
        }
        resolve();
      }, 5_000);

      this.process.once("exit", () => {
        clearTimeout(timeout);
        resolve();
      });

      this.process.kill(signal);
    });
  }
}

function waitForPort(
  host: string,
  port: number,
  proc: ChildProcessWithoutNullStreams,
  timeoutMs: number
): Promise<void> {
  return new Promise((resolve, reject) => {
    const start = Date.now();

    const connectHost =
      host === "0.0.0.0" || host === "::" ? "127.0.0.1" : host;

    const check = () => {
      if (proc.exitCode !== null) {
        reject(
          new Error(
            `MCP server process exited with code ${proc.exitCode ?? "unknown"}`
          )
        );
        return;
      }

      const socket = net.createConnection({ host: connectHost, port });

      socket.once("connect", () => {
        socket.destroy();
        resolve();
      });

      socket.once("error", () => {
        socket.destroy();
        if (Date.now() - start >= timeoutMs) {
          reject(
            new Error(
              `Timed out waiting for MCP OAuth test server to listen on ${host}:${port}`
            )
          );
        } else {
          setTimeout(check, 250);
        }
      });
    };

    check();
  });
}

export async function startMcpOauthServer(
  options: StartServerOptions = {}
): Promise<McpServerProcess> {
  const bindHost = options.bindHost || DEFAULT_BIND_HOST;
  const publicHost = options.publicHost || DEFAULT_PUBLIC_HOST;
  const port = options.port ?? DEFAULT_PORT;
  const pythonBinary = options.pythonBinary || "python3";
  const readyTimeout = options.readyTimeoutMs ?? READY_TIMEOUT_MS;

  const scriptPath =
    options.scriptPath ||
    path.resolve(
      __dirname,
      "../../../..",
      "backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_oauth.py"
    );
  const scriptDir = path.dirname(scriptPath);

  const proc = spawn(pythonBinary, [scriptPath, port.toString()], {
    cwd: scriptDir,
    stdio: ["pipe", "pipe", "pipe"],
    env: {
      ...process.env,
      MCP_SERVER_PORT: port.toString(),
      MCP_SERVER_HOST: bindHost,
      MCP_SERVER_PUBLIC_HOST: publicHost,
    },
  });

  proc.stdout.on("data", (chunk) => {
    const message = chunk.toString();
    console.log(`[mcp-oauth-server] ${message.trimEnd()}`);
  });
  proc.stderr.on("data", (chunk) => {
    const message = chunk.toString();
    console.error(`[mcp-oauth-server:stderr] ${message.trimEnd()}`);
  });

  proc.on("error", (err) => {
    console.error("[mcp-oauth-server] failed to start", err);
  });

  await waitForPort(bindHost, port, proc, readyTimeout);

  return new McpServerProcess(proc, bindHost, publicHost, port);
}

export async function startMcpApiKeyServer(
  options: StartServerOptions & { apiKey?: string } = {}
): Promise<McpServerProcess> {
  const bindHost = options.bindHost || DEFAULT_BIND_HOST;
  const publicHost = options.publicHost || DEFAULT_PUBLIC_HOST;
  const port = options.port ?? DEFAULT_PORT;
  const pythonBinary = options.pythonBinary || "python3";
  const readyTimeout = options.readyTimeoutMs ?? READY_TIMEOUT_MS;
  const apiKey = options.apiKey || "test-api-key-12345";

  const scriptPath =
    options.scriptPath ||
    path.resolve(
      __dirname,
      "../../../..",
      "backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_api_key.py"
    );
  const scriptDir = path.dirname(scriptPath);

  const proc = spawn(pythonBinary, [scriptPath, apiKey, port.toString()], {
    cwd: scriptDir,
    stdio: ["pipe", "pipe", "pipe"],
    env: {
      ...process.env,
      MCP_SERVER_PORT: port.toString(),
      MCP_SERVER_HOST: bindHost,
      MCP_SERVER_PUBLIC_HOST: publicHost,
    },
  });

  proc.stdout.on("data", (chunk) => {
    const message = chunk.toString();
    console.log(`[mcp-api-key-server] ${message.trimEnd()}`);
  });
  proc.stderr.on("data", (chunk) => {
    const message = chunk.toString();
    console.error(`[mcp-api-key-server:stderr] ${message.trimEnd()}`);
  });

  proc.on("error", (err) => {
    console.error("[mcp-api-key-server] failed to start", err);
  });

  await waitForPort(bindHost, port, proc, readyTimeout);

  return new McpServerProcess(proc, bindHost, publicHost, port);
}

/**
 * Start the MCP Google OAuth Pass-Through test server.
 *
 * This server validates Google OAuth tokens that are passed through from Onyx.
 * It calls Google's tokeninfo endpoint to verify the token is valid.
 *
 * For testing pass-through OAuth scenarios where Onyx forwards the user's
 * Google OAuth access token to an MCP server.
 */
export async function startMcpGoogleOAuthServer(
  options: StartServerOptions & { requiredScopes?: string[] } = {}
): Promise<McpServerProcess> {
  const bindHost = options.bindHost || DEFAULT_BIND_HOST;
  const publicHost = options.publicHost || DEFAULT_PUBLIC_HOST;
  const port = options.port ?? 8006; // Default to 8006 to not conflict with other MCP servers
  const pythonBinary = options.pythonBinary || "python3";
  const readyTimeout = options.readyTimeoutMs ?? READY_TIMEOUT_MS;
  const requiredScopes = options.requiredScopes || [];

  const scriptPath =
    options.scriptPath ||
    path.resolve(
      __dirname,
      "../../../..",
      "backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_google_oauth.py"
    );
  const scriptDir = path.dirname(scriptPath);

  const proc = spawn(pythonBinary, [scriptPath, port.toString()], {
    cwd: scriptDir,
    stdio: ["pipe", "pipe", "pipe"],
    env: {
      ...process.env,
      MCP_SERVER_PORT: port.toString(),
      MCP_SERVER_HOST: bindHost,
      MCP_SERVER_PUBLIC_HOST: publicHost,
      MCP_GOOGLE_REQUIRED_SCOPES: requiredScopes.join(","),
    },
  });

  proc.stdout.on("data", (chunk) => {
    const message = chunk.toString();
    console.log(`[mcp-google-oauth-server] ${message.trimEnd()}`);
  });
  proc.stderr.on("data", (chunk) => {
    const message = chunk.toString();
    console.error(`[mcp-google-oauth-server:stderr] ${message.trimEnd()}`);
  });

  proc.on("error", (err) => {
    console.error("[mcp-google-oauth-server] failed to start", err);
  });

  await waitForPort(bindHost, port, proc, readyTimeout);

  return new McpServerProcess(proc, bindHost, publicHost, port);
}


================================================
FILE: web/tests/e2e/utils/onyxApiClient.ts
================================================
import { APIRequestContext, expect, APIResponse } from "@playwright/test";

const E2E_LLM_PROVIDER_API_KEY =
  process.env.E2E_LLM_PROVIDER_API_KEY ||
  process.env.OPENAI_API_KEY ||
  "e2e-placeholder-api-key-not-used";

const E2E_WEB_SEARCH_API_KEY =
  process.env.E2E_WEB_SEARCH_API_KEY ||
  process.env.EXA_API_KEY ||
  process.env.BRAVE_SEARCH_API_KEY ||
  process.env.SERPER_API_KEY ||
  "e2e-placeholder-web-search-key";

const E2E_IMAGE_GEN_API_KEY =
  process.env.E2E_IMAGE_GEN_API_KEY ||
  process.env.OPENAI_API_KEY ||
  E2E_LLM_PROVIDER_API_KEY;

/**
 * API Client for Onyx backend operations in E2E tests.
 *
 * Provides a type-safe, abstracted interface for interacting with the Onyx backend API.
 * All methods handle authentication via the Playwright page context and include automatic
 * error handling, logging, and polling for asynchronous operations.
 *
 * **Available Endpoints:**
 *
 * **Connectors:**
 * - `createFileConnector(name)` - Creates a file connector with mock credentials
 * - `deleteCCPair(ccPairId)` - Deletes a connector-credential pair (with polling until complete)
 *
 * **Document Sets:**
 * - `createDocumentSet(name, ccPairIds)` - Creates a document set from connector pairs
 * - `deleteDocumentSet(id)` - Deletes a document set (with polling until complete)
 *
 * **LLM Providers:**
 * - `listLlmProviders()` - Lists LLM providers (admin endpoint, includes is_public)
 * - `ensurePublicProvider(name?)` - Idempotently creates a public default LLM provider
 * - `createRestrictedProvider(name, groupId)` - Creates a restricted LLM provider assigned to a group
 * - `setProviderAsDefault(id)` - Sets an LLM provider as the default for chat
 * - `deleteProvider(id)` - Deletes an LLM provider
 *
 * **User Groups:**
 * - `getUserGroups()` - Lists all user groups (including default system groups)
 * - `createUserGroup(name)` - Creates a user group
 * - `deleteUserGroup(id)` - Deletes a user group
 *
 * **Tool Providers:**
 * - `createWebSearchProvider(type, name)` - Creates and activates a web search provider
 * - `deleteWebSearchProvider(id)` - Deletes a web search provider
 * - `createImageGenerationConfig(id, model, provider, isDefault)` - Creates an image generation config (enables image gen tool)
 * - `deleteImageGenerationConfig(id)` - Deletes an image generation config
 *
 * **Chat Sessions:**
 * - `createChatSession(description, personaId?)` - Creates a chat session with a description
 * - `deleteChatSession(chatId)` - Deletes a chat session
 *
 * **Projects:**
 * - `createProject(name)` - Creates a project with a name
 * - `deleteProject(projectId)` - Deletes a project
 *
 * **Usage Example:**
 * ```typescript
 * // From a test with a Page:
 * const client = new OnyxApiClient(page.request);
 *
 * // From global-setup with a standalone context (pass baseURL explicitly):
 * const ctx = await request.newContext({ baseURL, storageState: "admin_auth.json" });
 * const client = new OnyxApiClient(ctx, baseURL);
 * ```
 *
 * @param request - Playwright APIRequestContext with authenticated session
 *                  (e.g. `page.request`, `context.request`, or `request.newContext()`)
 * @param baseUrl - Optional base URL override (e.g. `http://localhost:3000`).
 *                  Defaults to `process.env.BASE_URL` or `http://localhost:3000`.
 *                  Pass this when the Playwright-configured baseURL differs from
 *                  the env var (e.g. in `global-setup.ts` where the config value
 *                  is authoritative).
 */
export class OnyxApiClient {
  private baseUrl: string;

  constructor(
    private request: APIRequestContext,
    baseUrl?: string
  ) {
    this.baseUrl = `${
      baseUrl ?? process.env.BASE_URL ?? "http://localhost:3000"
    }/api`;
  }

  /**
   * Generic GET request to the API.
   *
   * @param endpoint - API endpoint path (e.g., "/manage/document-set/123")
   * @returns The API response
   */
  private async get(endpoint: string): Promise<APIResponse> {
    return await this.request.get(`${this.baseUrl}${endpoint}`);
  }

  /**
   * Generic POST request to the API.
   *
   * @param endpoint - API endpoint path (e.g., "/manage/admin/document-set")
   * @param data - Optional request body data
   * @returns The API response
   */
  private async post(endpoint: string, data?: any): Promise<APIResponse> {
    return await this.request.post(`${this.baseUrl}${endpoint}`, {
      data,
    });
  }

  /**
   * Generic DELETE request to the API.
   *
   * @param endpoint - API endpoint path (e.g., "/manage/admin/document-set/123")
   * @returns The API response
   */
  private async delete(endpoint: string): Promise<APIResponse> {
    return await this.request.delete(`${this.baseUrl}${endpoint}`);
  }

  /**
   * Generic PUT request to the API.
   *
   * @param endpoint - API endpoint path (e.g., "/manage/admin/cc-pair/123/status")
   * @param data - Optional request body data
   * @returns The API response
   */
  private async put(endpoint: string, data?: any): Promise<APIResponse> {
    return await this.request.put(`${this.baseUrl}${endpoint}`, {
      data,
    });
  }

  /**
   * Handle API response - parse JSON and handle errors.
   *
   * @param response - The API response to handle
   * @param errorMessage - Error message prefix to use if request failed
   * @returns Parsed JSON response data
   * @throws Error if the response is not ok
   */
  private async handleResponse<T>(
    response: APIResponse,
    errorMessage: string
  ): Promise<T> {
    if (!response.ok()) {
      const errorText = await response.text();
      throw new Error(`${errorMessage}: ${response.status()} - ${errorText}`);
    }
    return await response.json();
  }

  /**
   * Handle API response with logging on error (non-throwing).
   * Used for cleanup operations where we want to log errors but not fail the test.
   *
   * @param response - The API response to handle
   * @param errorMessage - Error message prefix to use if request failed
   * @returns true if response was ok, false otherwise
   */
  private async handleResponseSoft(
    response: APIResponse,
    errorMessage: string
  ): Promise<boolean> {
    if (!response.ok()) {
      const errorText = await response.text();
      console.error(
        `[OnyxApiClient] ${errorMessage}: ${response.status()} - ${errorText}`
      );
      return false;
    }
    return true;
  }

  /**
   * Wait for a resource to be deleted by polling until 404.
   * Uses Playwright's expect.poll() with automatic retry and exponential backoff.
   * We poll here because the deletion endpoint is asynchronous (kicks off a celery task)
   * and we want to wait for it to complete.
   *
   * @param endpoint - API endpoint to poll (e.g., "/manage/document-set/123")
   * @param resourceType - Human-readable resource type for error messages (e.g., "Document set")
   * @param resourceId - The resource ID for error messages
   * @param timeout - Maximum time to wait in milliseconds (default: 30000)
   * @returns Promise that resolves when resource returns 404, or rejects on timeout
   */
  private async waitForDeletion(
    endpoint: string,
    resourceType: string,
    resourceId: number | string,
    timeout: number = 30000
  ): Promise<void> {
    await expect
      .poll(
        async () => {
          const checkResponse = await this.get(endpoint);
          return checkResponse.status();
        },
        {
          message: `${resourceType} ${resourceId} was not deleted`,
          timeout,
        }
      )
      .toBe(404);
  }

  /**
   * Log an action with consistent formatting.
   *
   * @param message - The message to log (will be prefixed with "[OnyxApiClient]")
   */
  private log(message: string): void {
    console.log(`[OnyxApiClient] ${message}`);
  }

  /**
   * Checks whether the vector database is enabled in this deployment.
   *
   * @returns true if vector DB is enabled, false if DISABLE_VECTOR_DB is set
   */
  async isVectorDbEnabled(): Promise<boolean> {
    const response = await this.get("/settings");
    const data = await this.handleResponse<{ vector_db_enabled: boolean }>(
      response,
      "Failed to fetch settings"
    );
    return data.vector_db_enabled;
  }

  /**
   * Creates a simple file connector with mock credentials.
   * This enables the Knowledge toggle in assistant creation.
   *
   * @param connectorName - Name for the connector (defaults to "Test File Connector")
   * @param accessType - Access type for the connector (defaults to "public")
   * @returns The connector-credential pair ID (ccPairId)
   * @throws Error if the connector creation fails
   */
  async createFileConnector(
    connectorName: string = "Test File Connector",
    accessType: "public" | "private" = "public"
  ): Promise<number> {
    const response = await this.post(
      "/manage/admin/connector-with-mock-credential",
      {
        name: connectorName,
        source: "file",
        input_type: "load_state",
        connector_specific_config: {
          file_locations: [],
        },
        refresh_freq: null,
        prune_freq: null,
        indexing_start: null,
        access_type: accessType,
        groups: [],
      }
    );

    const responseData = await this.handleResponse<{ data: number }>(
      response,
      "Failed to create connector"
    );

    const ccPairId = responseData.data;
    this.log(
      `Created file connector: ${connectorName} (CC Pair ID: ${ccPairId})`
    );

    // Pause the connector immediately to prevent indexing during tests
    await this.pauseConnector(ccPairId);

    return ccPairId;
  }

  /**
   * Pauses a connector-credential pair to prevent indexing.
   *
   * @param ccPairId - The connector-credential pair ID to pause
   * @throws Error if the pause operation fails
   */
  async pauseConnector(ccPairId: number): Promise<void> {
    const response = await this.put(
      `/manage/admin/cc-pair/${ccPairId}/status`,
      {
        status: "PAUSED",
      }
    );

    await this.handleResponse(response, "Failed to pause connector");
    this.log(`Paused connector CC Pair ID: ${ccPairId}`);
  }

  /**
   * Creates a document set from connector-credential pairs.
   *
   * @param documentSetName - Name for the document set
   * @param ccPairIds - Array of connector-credential pair IDs to include in the set
   * @returns The document set ID
   * @throws Error if the document set creation fails
   */
  async createDocumentSet(
    documentSetName: string,
    ccPairIds: number[]
  ): Promise<number> {
    const response = await this.post("/manage/admin/document-set", {
      name: documentSetName,
      description: `Test document set: ${documentSetName}`,
      cc_pair_ids: ccPairIds,
      is_public: true,
      users: [],
      groups: [],
      federated_connectors: [],
    });

    const documentSetId = await this.handleResponse<number>(
      response,
      "Failed to create document set"
    );

    this.log(`Created document set: ${documentSetName} (ID: ${documentSetId})`);
    return documentSetId;
  }

  /**
   * Deletes a document set and waits for deletion to complete.
   * Uses polling to verify the deletion was successful (waits for 404 response).
   *
   * @param documentSetId - The document set ID to delete
   * @returns Promise that resolves when deletion is confirmed, or rejects on timeout
   */
  async deleteDocumentSet(documentSetId: number): Promise<void> {
    const response = await this.delete(
      `/manage/admin/document-set/${documentSetId}`
    );

    if (
      !(await this.handleResponseSoft(
        response,
        `Failed to delete document set ${documentSetId}`
      ))
    ) {
      return;
    }

    this.log(`Initiated deletion for document set: ${documentSetId}`);
    await this.waitForDeletion(
      `/manage/document-set/${documentSetId}`,
      "Document set",
      documentSetId
    );
    this.log(`Document set ${documentSetId} deletion confirmed`);
  }

  /**
   * Deletes a connector-credential pair and waits for deletion to complete.
   * Fetches the CC pair details to get connector/credential IDs, then initiates deletion
   * and polls until the deletion is confirmed (waits for 404 response).
   *
   * @param ccPairId - The connector-credential pair ID to delete
   * @returns Promise that resolves when deletion is confirmed, or rejects on timeout
   */
  async deleteCCPair(ccPairId: number): Promise<void> {
    // Get CC pair details to extract connector_id and credential_id
    const getResponse = await this.get(`/manage/admin/cc-pair/${ccPairId}`);

    if (
      !(await this.handleResponseSoft(
        getResponse,
        `Failed to get CC pair ${ccPairId} details`
      ))
    ) {
      return;
    }

    const ccPairInfo = await getResponse.json();
    const {
      connector: { id: connectorId },
      credential: { id: credentialId },
    } = ccPairInfo;

    // Delete using the deletion-attempt endpoint
    const deleteResponse = await this.post("/manage/admin/deletion-attempt", {
      connector_id: connectorId,
      credential_id: credentialId,
    });

    if (
      !(await this.handleResponseSoft(
        deleteResponse,
        `Failed to delete CC pair ${ccPairId}`
      ))
    ) {
      return;
    }

    this.log(
      `Initiated deletion for CC pair: ${ccPairId} (connector: ${connectorId}, credential: ${credentialId})`
    );
    await this.waitForDeletion(
      `/manage/admin/cc-pair/${ccPairId}`,
      "CC pair",
      ccPairId
    );
    this.log(`CC pair ${ccPairId} deletion confirmed`);
  }

  /**
   * Creates a restricted LLM provider assigned to a specific user group.
   *
   * @param providerName - Name for the provider
   * @param groupId - The user group ID that should have access to this provider
   * @returns The provider ID
   * @throws Error if the provider creation fails
   */
  async createRestrictedProvider(
    providerName: string,
    groupId: number
  ): Promise<number> {
    const response = await this.request.put(
      `${this.baseUrl}/admin/llm/provider?is_creation=true`,
      {
        data: {
          name: providerName,
          provider: "openai",
          api_key: E2E_LLM_PROVIDER_API_KEY,
          default_model_name: "gpt-4o",
          is_public: false,
          groups: [groupId],
          personas: [],
        },
      }
    );

    const responseData = await this.handleResponse<{ id: number }>(
      response,
      "Failed to create restricted provider"
    );

    this.log(
      `Created restricted LLM provider: ${providerName} (ID: ${responseData.id}, Group: ${groupId})`
    );
    return responseData.id;
  }

  /**
   * Lists LLM providers visible to the admin (includes `is_public`).
   *
   * @returns Array of LLM providers with id and is_public fields
   */
  async listLlmProviders(): Promise<
    Array<{
      id: number;
      is_public?: boolean;
    }>
  > {
    const response = await this.get("/admin/llm/provider");
    const data = await this.handleResponse<{
      providers: Array<{ id: number; is_public?: boolean }>;
    }>(response, "Failed to list LLM providers");
    return data.providers;
  }

  /**
   * Ensure at least one public LLM provider exists and is set as default.
   *
   * Idempotent — returns `null` if a public provider already exists,
   * or the new provider ID if one was created.
   *
   * @param providerName - Name for the provider (default: "PW Default Provider")
   * @returns The provider ID if one was created, or `null` if already present
   */
  async ensurePublicProvider(
    providerName: string = "PW Default Provider"
  ): Promise<number | null> {
    const providers = await this.listLlmProviders();
    const hasPublic = providers.some((p) => p.is_public);

    if (hasPublic) {
      return null;
    }

    const defaultModelName = "gpt-4o";
    const response = await this.request.put(
      `${this.baseUrl}/admin/llm/provider?is_creation=true`,
      {
        data: {
          name: providerName,
          provider: "openai",
          api_key: E2E_LLM_PROVIDER_API_KEY,
          is_public: true,
          groups: [],
          personas: [],
          model_configurations: [{ name: defaultModelName, is_visible: true }],
        },
      }
    );

    const responseData = await this.handleResponse<{ id: number }>(
      response,
      "Failed to create public provider"
    );

    // Set as default so get_default_llm() works (needed for tokenization, etc.)
    await this.setProviderAsDefault(responseData.id, defaultModelName);

    this.log(
      `Created public LLM provider: ${providerName} (ID: ${responseData.id})`
    );
    return responseData.id;
  }

  /**
   * Sets an LLM provider + model as the default for chat.
   *
   * @param providerId - The provider ID to set as default
   * @param modelName - The model name to set as default
   */
  async setProviderAsDefault(
    providerId: number,
    modelName: string
  ): Promise<void> {
    const response = await this.post("/admin/llm/default", {
      provider_id: providerId,
      model_name: modelName,
    });

    await this.handleResponseSoft(
      response,
      `Failed to set provider ${providerId} as default`
    );

    this.log(`Set LLM provider ${providerId} as default`);
  }

  /**
   * Deletes an LLM provider.
   *
   * @param providerId - The provider ID to delete
   */
  async deleteProvider(
    providerId: number,
    { force = false }: { force?: boolean } = {}
  ): Promise<void> {
    const query = force ? "?force=true" : "";
    const response = await this.delete(
      `/admin/llm/provider/${providerId}${query}`
    );

    await this.handleResponseSoft(
      response,
      `Failed to delete provider ${providerId}`
    );

    this.log(`Deleted LLM provider: ${providerId}`);
  }

  /**
   * Creates a user group.
   *
   * @param groupName - Name for the user group
   * @param userIds - Optional list of user IDs to add to the group
   * @param ccPairIds - Optional list of connector-credential pair IDs to associate
   * @returns The user group ID
   * @throws Error if the user group creation fails
   */
  async createUserGroup(
    groupName: string,
    userIds: string[] = [],
    ccPairIds: number[] = []
  ): Promise<number> {
    const response = await this.post("/manage/admin/user-group", {
      name: groupName,
      user_ids: userIds,
      cc_pair_ids: ccPairIds,
    });

    const responseData = await this.handleResponse<{ id: number }>(
      response,
      "Failed to create user group"
    );

    this.log(`Created user group: ${groupName} (ID: ${responseData.id})`);
    return responseData.id;
  }

  /**
   * Polls until a user group has finished syncing (is_up_to_date === true).
   * Newly created groups start syncing immediately; many mutation endpoints
   * reject requests while the group is still syncing.
   */
  async waitForGroupSync(
    groupId: number,
    timeout: number = 30000
  ): Promise<void> {
    await expect
      .poll(
        async () => {
          const res = await this.get("/manage/admin/user-group");
          const groups = await res.json();
          const group = groups.find(
            (g: { id: number; is_up_to_date: boolean }) => g.id === groupId
          );
          return group?.is_up_to_date ?? false;
        },
        {
          message: `User group ${groupId} did not finish syncing`,
          timeout,
        }
      )
      .toBe(true);
    this.log(`User group ${groupId} finished syncing`);
  }

  /**
   * Deletes a user group.
   *
   * @param groupId - The user group ID to delete
   */
  async deleteUserGroup(groupId: number): Promise<void> {
    const response = await this.delete(`/manage/admin/user-group/${groupId}`);

    await this.handleResponseSoft(
      response,
      `Failed to delete user group ${groupId}`
    );

    this.log(`Deleted user group: ${groupId}`);
  }

  /**
   * Lists all user groups.
   */
  async getUserGroups(): Promise<
    Array<{ id: number; name: string; is_default: boolean }>
  > {
    const response = await this.get(
      "/manage/admin/user-group?include_default=true"
    );
    return response.json();
  }

  async setUserRole(
    email: string,
    role: "admin" | "curator" | "global_curator" | "basic",
    explicitOverride = false
  ): Promise<void> {
    const response = await this.request.patch(
      `${this.baseUrl}/manage/set-user-role`,
      {
        data: {
          user_email: email,
          new_role: role,
          explicit_override: explicitOverride,
        },
      }
    );
    await this.handleResponse(response, `Failed to set user role for ${email}`);
    this.log(`Updated role for ${email} to ${role}`);
  }

  async deleteMcpServer(serverId: number): Promise<boolean> {
    const response = await this.request.delete(
      `${this.baseUrl}/admin/mcp/server/${serverId}`
    );
    const success = await this.handleResponseSoft(
      response,
      `Failed to delete MCP server ${serverId}`
    );
    if (success) {
      this.log(`Deleted MCP server ${serverId}`);
    }
    return success;
  }

  async deleteCustomTool(toolId: number): Promise<boolean> {
    const response = await this.request.delete(
      `${this.baseUrl}/admin/tool/custom/${toolId}`
    );
    const success = await this.handleResponseSoft(
      response,
      `Failed to delete custom tool ${toolId}`
    );
    if (success) {
      this.log(`Deleted custom tool ${toolId}`);
    }
    return success;
  }

  async listOpenApiTools(): Promise<
    Array<{ id: number; name: string; description: string }>
  > {
    const response = await this.get("/tool/openapi");
    return await this.handleResponse(response, "Failed to list OpenAPI tools");
  }

  async findToolByName(
    name: string
  ): Promise<{ id: number; name: string; description: string } | null> {
    const tools = await this.listOpenApiTools();
    return tools.find((tool) => tool.name === name) ?? null;
  }

  async deleteAgent(agentId: number): Promise<boolean> {
    const response = await this.request.delete(
      `${this.baseUrl}/persona/${agentId}`
    );
    const success = await this.handleResponseSoft(
      response,
      `Failed to delete assistant ${agentId}`
    );
    if (success) {
      this.log(`Deleted assistant ${agentId}`);
    }
    return success;
  }

  async getAssistant(agentId: number): Promise<{
    id: number;
    is_public: boolean;
    users: Array<{ id: string }>;
    groups: number[];
    tools: Array<{ id: number; mcp_server_id?: number | null }>;
  }> {
    const response = await this.get(`/persona/${agentId}`);
    return await this.handleResponse(
      response,
      `Failed to fetch assistant ${agentId}`
    );
  }

  async updateAgentSharing(
    agentId: number,
    options: {
      userIds?: string[];
      groupIds?: number[];
      isPublic?: boolean;
      labelIds?: number[];
    }
  ): Promise<void> {
    const response = await this.request.patch(
      `${this.baseUrl}/persona/${agentId}/share`,
      {
        data: {
          user_ids: options.userIds,
          group_ids: options.groupIds,
          is_public: options.isPublic,
          label_ids: options.labelIds,
        },
      }
    );
    await this.handleResponse(
      response,
      `Failed to update sharing for assistant ${agentId}`
    );
    this.log(
      `Updated assistant sharing: ${agentId} (is_public=${String(
        options.isPublic
      )})`
    );
  }

  async listMcpServers(): Promise<any[]> {
    const response = await this.get(`/admin/mcp/servers`);
    const data = await this.handleResponse<{ mcp_servers: any[] }>(
      response,
      "Failed to list MCP servers"
    );
    return data.mcp_servers;
  }

  async listAgents(options?: {
    includeDeleted?: boolean;
    getEditable?: boolean;
  }): Promise<any[]> {
    const params = new URLSearchParams();
    if (options?.includeDeleted) {
      params.set("include_deleted", "true");
    }
    if (options?.getEditable ?? true) {
      params.set("get_editable", "true");
    }
    const query = params.toString();
    const response = await this.get(
      `/admin/persona${query ? `?${query}` : ""}`
    );
    return await this.handleResponse<any[]>(
      response,
      "Failed to list assistants"
    );
  }

  async findAgentByName(
    name: string,
    options?: { includeDeleted?: boolean; getEditable?: boolean }
  ): Promise<any | null> {
    const assistants = await this.listAgents(options);
    return assistants.find((assistant) => assistant.name === name) ?? null;
  }

  async registerUser(email: string, password: string): Promise<{ id: string }> {
    const response = await this.request.post(`${this.baseUrl}/auth/register`, {
      data: {
        email,
        username: email,
        password,
      },
    });
    const data = await this.handleResponse<{ id: string }>(
      response,
      `Failed to register user ${email}`
    );
    return data;
  }

  async getUserByEmail(email: string): Promise<{
    id: string;
    email: string;
    role: string;
  } | null> {
    const response = await this.request.get(
      `${this.baseUrl}/manage/users/accepted`,
      {
        params: {
          q: email,
          page_size: 1,
        },
      }
    );
    const data = await this.handleResponse<{ items: any[] }>(
      response,
      `Failed to fetch user ${email}`
    );
    const [user] = data.items;
    return user
      ? {
          id: user.id,
          email: user.email,
          role: user.role,
        }
      : null;
  }

  async setCuratorStatus(
    userGroupId: string,
    userId: string,
    isCurator: boolean = true
  ): Promise<void> {
    const response = await this.request.post(
      `${this.baseUrl}/manage/admin/user-group/${userGroupId}/set-curator`,
      {
        data: {
          user_id: userId,
          is_curator: isCurator,
        },
      }
    );
    await this.handleResponse(
      response,
      `Failed to update curator status for ${userId}`
    );
  }

  /**
   * Create and activate a web search provider for testing.
   * Uses env-backed keys when available and falls back to a placeholder key.
   *
   * @param providerType - Type of provider: "exa", "brave", "serper", "google_pse", "searxng"
   * @param name - Optional name for the provider (defaults to "Test Provider")
   * @returns The created provider ID
   */
  async createWebSearchProvider(
    providerType: "exa" | "brave" | "serper" | "google_pse" | "searxng" = "exa",
    name: string = "Test Provider"
  ): Promise<number> {
    const config: Record<string, string> = {};
    if (providerType === "google_pse") {
      config.search_engine_id = "test-engine-id";
    }
    if (providerType === "searxng") {
      config.searxng_base_url = "https://test-searxng.example.com";
    }

    const response = await this.post("/admin/web-search/search-providers", {
      name,
      provider_type: providerType,
      api_key: E2E_WEB_SEARCH_API_KEY,
      api_key_changed: true,
      config: Object.keys(config).length > 0 ? config : undefined,
      activate: true,
    });

    const data = await this.handleResponse<{ id: number }>(
      response,
      `Failed to create web search provider ${providerType}`
    );
    return data.id;
  }

  /**
   * Delete a web search provider.
   *
   * @param providerId - ID of the provider to delete
   */
  async deleteWebSearchProvider(providerId: number): Promise<void> {
    const response = await this.delete(
      `/admin/web-search/search-providers/${providerId}`
    );
    if (!response.ok()) {
      const errorText = await response.text();
      console.warn(
        `Failed to delete web search provider ${providerId}: ${response.status()} - ${errorText}`
      );
    }
  }

  /**
   * Creates an image generation configuration for testing.
   * This enables the image generation tool in assistants.
   *
   * API: POST /api/admin/image-generation/config
   * Schema (ImageGenerationConfigCreate):
   *   - image_provider_id: string (required) - unique key
   *   - model_name: string (required) - e.g., "dall-e-3"
   *   - provider: string - e.g., "openai"
   *   - api_key: string
   *   - is_default: boolean
   *
   * @param imageProviderId - Unique identifier for the image generation config
   * @param modelName - Model name (defaults to "dall-e-3")
   * @param provider - Provider name (defaults to "openai")
   * @param isDefault - Whether this should be the default config (defaults to true)
   * @returns The image_provider_id
   */
  async createImageGenerationConfig(
    imageProviderId: string,
    modelName: string = "dall-e-3",
    provider: string = "openai",
    isDefault: boolean = true
  ): Promise<string> {
    const response = await this.post("/admin/image-generation/config", {
      image_provider_id: imageProviderId,
      model_name: modelName,
      provider: provider,
      api_key: E2E_IMAGE_GEN_API_KEY,
      is_default: isDefault,
    });

    await this.handleResponse(
      response,
      "Failed to create image generation config"
    );

    this.log(`Created image generation config: ${imageProviderId}`);
    return imageProviderId;
  }

  /**
   * Deletes an image generation configuration.
   *
   * @param imageProviderId - The image_provider_id to delete
   */
  async deleteImageGenerationConfig(imageProviderId: string): Promise<void> {
    const response = await this.delete(
      `/admin/image-generation/config/${imageProviderId}`
    );

    await this.handleResponseSoft(
      response,
      `Failed to delete image generation config ${imageProviderId}`
    );

    this.log(`Deleted image generation config: ${imageProviderId}`);
  }

  // === Discord Bot Methods ===

  /**
   * Creates a Discord guild configuration.
   * Returns the guild config with registration key (shown once).
   *
   * @returns The created guild config with id and registration_key
   */
  async createDiscordGuild(): Promise<{
    id: number;
    registration_key: string;
    guild_name: string | null;
  }> {
    const response = await this.post("/manage/admin/discord-bot/guilds");

    const guild = await this.handleResponse<{
      id: number;
      registration_key: string;
      guild_name: string | null;
    }>(response, "Failed to create Discord guild config");

    this.log(
      `Created Discord guild config: id=${guild.id}, registration_key=${guild.registration_key}`
    );
    return guild;
  }

  /**
   * Lists all Discord guild configurations.
   *
   * @returns Array of guild configs
   */
  async listDiscordGuilds(): Promise<
    Array<{
      id: number;
      guild_id: string | null;
      guild_name: string | null;
      enabled: boolean;
    }>
  > {
    const response = await this.get("/manage/admin/discord-bot/guilds");
    return await this.handleResponse(response, "Failed to list Discord guilds");
  }

  /**
   * Gets a specific Discord guild configuration.
   *
   * @param guildId - The internal guild config ID
   * @returns The guild config or null if not found
   */
  async getDiscordGuild(guildId: number): Promise<{
    id: number;
    guild_id: string | null;
    guild_name: string | null;
    enabled: boolean;
    default_persona_id: number | null;
  } | null> {
    const response = await this.get(
      `/manage/admin/discord-bot/guilds/${guildId}`
    );
    if (response.status() === 404) {
      return null;
    }
    return await this.handleResponse(
      response,
      `Failed to get Discord guild ${guildId}`
    );
  }

  /**
   * Updates a Discord guild configuration.
   *
   * @param guildId - The internal guild config ID
   * @param updates - The fields to update
   * @returns The updated guild config
   */
  async updateDiscordGuild(
    guildId: number,
    updates: { enabled?: boolean; default_persona_id?: number | null }
  ): Promise<{
    id: number;
    guild_id: string | null;
    guild_name: string | null;
    enabled: boolean;
  }> {
    const response = await this.request.patch(
      `${this.baseUrl}/manage/admin/discord-bot/guilds/${guildId}`,
      { data: updates }
    );
    return await this.handleResponse(
      response,
      `Failed to update Discord guild ${guildId}`
    );
  }

  /**
   * Deletes a Discord guild configuration.
   *
   * @param guildId - The internal guild config ID
   */
  async deleteDiscordGuild(guildId: number): Promise<void> {
    const response = await this.delete(
      `/manage/admin/discord-bot/guilds/${guildId}`
    );

    await this.handleResponseSoft(
      response,
      `Failed to delete Discord guild ${guildId}`
    );

    this.log(`Deleted Discord guild config: ${guildId}`);
  }

  /**
   * Lists channels for a Discord guild configuration.
   *
   * @param guildConfigId - The internal guild config ID
   * @returns Array of channel configs
   */
  async listDiscordChannels(guildConfigId: number): Promise<
    Array<{
      id: number;
      channel_id: string;
      channel_name: string;
      channel_type: string;
      enabled: boolean;
    }>
  > {
    const response = await this.get(
      `/manage/admin/discord-bot/guilds/${guildConfigId}/channels`
    );
    return await this.handleResponse(
      response,
      `Failed to list channels for guild ${guildConfigId}`
    );
  }

  /**
   * Updates a Discord channel configuration.
   *
   * @param guildConfigId - The internal guild config ID
   * @param channelConfigId - The internal channel config ID
   * @param updates - The fields to update
   * @returns The updated channel config
   */
  async updateDiscordChannel(
    guildConfigId: number,
    channelConfigId: number,
    updates: {
      enabled?: boolean;
      thread_only_mode?: boolean;
      require_bot_invocation?: boolean;
      persona_override_id?: number | null;
    }
  ): Promise<{
    id: number;
    channel_id: string;
    channel_name: string;
    enabled: boolean;
  }> {
    const response = await this.request.patch(
      `${this.baseUrl}/manage/admin/discord-bot/guilds/${guildConfigId}/channels/${channelConfigId}`,
      { data: updates }
    );
    return await this.handleResponse(
      response,
      `Failed to update channel ${channelConfigId}`
    );
  }

  // === User Management Methods ===

  async deactivateUser(email: string): Promise<void> {
    const response = await this.request.patch(
      `${this.baseUrl}/manage/admin/deactivate-user`,
      { data: { user_email: email } }
    );
    await this.handleResponse(response, `Failed to deactivate user ${email}`);
    this.log(`Deactivated user: ${email}`);
  }

  async activateUser(email: string): Promise<void> {
    const response = await this.request.patch(
      `${this.baseUrl}/manage/admin/activate-user`,
      { data: { user_email: email } }
    );
    await this.handleResponse(response, `Failed to activate user ${email}`);
    this.log(`Activated user: ${email}`);
  }

  async deleteUser(email: string): Promise<void> {
    const response = await this.request.delete(
      `${this.baseUrl}/manage/admin/delete-user`,
      { data: { user_email: email } }
    );
    await this.handleResponse(response, `Failed to delete user ${email}`);
    this.log(`Deleted user: ${email}`);
  }

  async cancelInvite(email: string): Promise<void> {
    const response = await this.request.patch(
      `${this.baseUrl}/manage/admin/remove-invited-user`,
      { data: { user_email: email } }
    );
    await this.handleResponse(response, `Failed to cancel invite for ${email}`);
    this.log(`Cancelled invite for: ${email}`);
  }

  async inviteUsers(emails: string[]): Promise<void> {
    const response = await this.put("/manage/admin/users", { emails });
    await this.handleResponse(response, `Failed to invite users`);
    this.log(`Invited users: ${emails.join(", ")}`);
  }

  async setPersonalName(name: string): Promise<void> {
    const response = await this.request.patch(
      `${this.baseUrl}/user/personalization`,
      { data: { name } }
    );
    await this.handleResponse(
      response,
      `Failed to set personal name to ${name}`
    );
    this.log(`Set personal name: ${name}`);
  }

  // === Chat Session Methods ===

  /**
   * Creates a chat session with a specific description.
   *
   * @param description - The description/title for the chat session
   * @param personaId - The persona/assistant ID to use (defaults to 0)
   * @returns The chat session ID
   * @throws Error if the chat session creation fails
   */
  async createChatSession(
    description: string,
    personaId: number = 0
  ): Promise<string> {
    const response = await this.post("/chat/create-chat-session", {
      persona_id: personaId,
      description,
    });
    const data = await this.handleResponse<{ chat_session_id: string }>(
      response,
      "Failed to create chat session"
    );
    this.log(
      `Created chat session: ${description} (ID: ${data.chat_session_id})`
    );
    return data.chat_session_id;
  }

  /**
   * Deletes a chat session.
   *
   * @param chatId - The chat session ID to delete
   */
  async deleteChatSession(chatId: string): Promise<void> {
    const response = await this.delete(`/chat/delete-chat-session/${chatId}`);
    await this.handleResponseSoft(
      response,
      `Failed to delete chat session ${chatId}`
    );
    this.log(`Deleted chat session: ${chatId}`);
  }

  // === Project Methods ===

  /**
   * Creates a project with a specific name.
   *
   * @param name - The name for the project
   * @returns The project ID
   * @throws Error if the project creation fails
   */
  async createProject(name: string): Promise<number> {
    const response = await this.post(
      `/user/projects/create?name=${encodeURIComponent(name)}`
    );
    const data = await this.handleResponse<{ id: number }>(
      response,
      "Failed to create project"
    );
    this.log(`Created project: ${name} (ID: ${data.id})`);
    return data.id;
  }

  /**
   * Deletes a project.
   *
   * @param projectId - The project ID to delete
   */
  async deleteProject(projectId: number): Promise<void> {
    const response = await this.delete(`/user/projects/${projectId}`);
    await this.handleResponseSoft(
      response,
      `Failed to delete project ${projectId}`
    );
    this.log(`Deleted project: ${projectId}`);
  }

  /**
   * Sets the current user's default app mode preference.
   *
   * @param mode - The default mode to persist ("CHAT" or "SEARCH")
   */
  async setDefaultAppMode(mode: "CHAT" | "SEARCH"): Promise<void> {
    const response = await this.request.patch(
      `${this.baseUrl}/user/default-app-mode`,
      {
        data: { default_app_mode: mode },
      }
    );
    await this.handleResponse(
      response,
      `Failed to set default app mode to ${mode}`
    );
    this.log(`Set default app mode: ${mode}`);
  }
}


================================================
FILE: web/tests/e2e/utils/pageStateLogger.ts
================================================
import { Page } from "@playwright/test";

const RUN_START_MS = Date.now();

/**
 * Captures a lightweight snapshot of the current page state to aid debugging of flaky flows.
 * Limits the amount of text collected so logs remain readable in CI.
 */
export async function logPageState(
  page: Page,
  context: string,
  logTag = "[e2e-page-debug]"
) {
  const sinceStartMs = Date.now() - RUN_START_MS;
  const snapshot: Record<string, unknown> = {
    context,
    timestamp: new Date().toISOString(),
    elapsedMs: sinceStartMs,
    elapsedSeconds: Number((sinceStartMs / 1000).toFixed(3)),
  };

  if (page.isClosed()) {
    snapshot.url = "<page-closed>";
    snapshot.title = "<unavailable>";
    snapshot.readyState = "<page-closed>";
    snapshot.bodySnippet = "<unavailable>";
    snapshot.visibleButtons = "<unavailable>";
    snapshot.visibleInputs = "<unavailable>";
    snapshot.note = "page was already closed before dump";
    console.log(`${logTag} ${JSON.stringify(snapshot)}`);
    return;
  }

  snapshot.url = page.url();

  try {
    snapshot.title = await page.title();
  } catch {
    snapshot.title = "<unavailable>";
  }

  try {
    snapshot.readyState = await page.evaluate(
      () => document.readyState ?? "<unknown>"
    );
  } catch {
    snapshot.readyState = "<unknown>";
  }

  try {
    const bodyText = await page.evaluate(() => document.body?.innerText ?? "");
    snapshot.bodySnippet = bodyText.trim().replace(/\s+/g, " ").slice(0, 500);
  } catch {
    snapshot.bodySnippet = "<unavailable>";
  }

  try {
    snapshot.visibleButtons = await page.evaluate(() =>
      Array.from(document.querySelectorAll("button"))
        .slice(0, 5)
        .map((btn) => ({
          text: btn.innerText,
          disabled: (btn as HTMLButtonElement).disabled,
          dataTestId: btn.getAttribute("data-testid"),
        }))
    );
  } catch {
    snapshot.visibleButtons = "<unavailable>";
  }

  try {
    snapshot.visibleInputs = await page.evaluate(() =>
      Array.from(
        document.querySelectorAll<HTMLInputElement | HTMLTextAreaElement>(
          "input, textarea"
        )
      )
        .slice(0, 5)
        .map((input) => ({
          name: input.name,
          type: input instanceof HTMLInputElement ? input.type : "textarea",
          value: input.value,
          dataTestId: input.getAttribute("data-testid"),
        }))
    );
  } catch {
    snapshot.visibleInputs = "<unavailable>";
  }

  console.log(`${logTag} ${JSON.stringify(snapshot)}`);
}


================================================
FILE: web/tests/e2e/utils/theme.ts
================================================
import type { Page } from "@playwright/test";

export const THEMES = ["light", "dark"] as const;
export type Theme = (typeof THEMES)[number];

/**
 * Injects the given theme into localStorage via `addInitScript` so that
 * `next-themes` applies it on first render. Call this in `beforeEach`
 * **before** any `page.goto()`.
 */
export async function setThemeBeforeNavigation(
  page: Page,
  theme: Theme
): Promise<void> {
  await page.addInitScript((t: string) => {
    localStorage.setItem("theme", t);
  }, theme);
}


================================================
FILE: web/tests/e2e/utils/tools.ts
================================================
// Shared test utilities for tool/action management and greetings

import { Page } from "@playwright/test";

export const TOOL_IDS = {
  actionToggle: '[data-testid="action-management-toggle"]',
  options: '[data-testid="tool-options"]',
  // These IDs are derived from tool.name in the app
  searchOption: '[data-testid="tool-option-internal_search"]',
  webSearchOption: '[data-testid="tool-option-web_search"]',
  imageGenerationOption: '[data-testid="tool-option-generate_image"]',
  // Generic toggle selector used inside tool options
  toggleInput: 'input[type="checkbox"], input[type="radio"], [role="switch"]',
} as const;

export { GREETING_MESSAGES } from "../../../src/lib/chat/greetingMessages";

// Wait for the unified assistant greeting and return its text
export async function waitForUnifiedGreeting(page: Page): Promise<string> {
  const el = await page.waitForSelector('[data-testid="onyx-logo"]', {
    timeout: 5000,
  });
  const text = (await el.textContent())?.trim() || "";
  return text;
}

// Ensure the Action Management popover is open
export async function openActionManagement(page: Page): Promise<void> {
  const actionToggle = page.locator(TOOL_IDS.actionToggle);
  await actionToggle.waitFor();
  await actionToggle.click();
  await page.locator(TOOL_IDS.options).waitFor();
}

// Check presence of the Action Management toggle
export async function isActionTogglePresent(page: Page): Promise<boolean> {
  const el = await page.$(TOOL_IDS.actionToggle);
  return !!el;
}

/**
 * Click the disable/enable (slash) button on a tool line item.
 * The button is hidden until hover; we hover first, then force-click
 * using aria-label which matches the button's current state.
 */
export async function toggleToolDisabled(
  page: Page,
  toolSelector: string
): Promise<void> {
  const toolOption = page.locator(toolSelector);
  await toolOption.hover();
  const slashButton = toolOption.locator(
    'button[aria-label="Disable"], button[aria-label="Enable"]'
  );
  await slashButton.first().click({ force: true });
}

/**
 * Open the source management secondary view for the internal search tool.
 * Assumes the ActionsPopover is already open.
 */
export async function openSourceManagement(page: Page): Promise<void> {
  const searchOption = page.locator(TOOL_IDS.searchOption);
  await searchOption
    .locator('button[aria-label="Configure Connectors"]')
    .click();
  // Wait for the source list Back button (indicates secondary view is open)
  await page.locator('button[aria-label="Back"]').waitFor({ timeout: 5000 });
}

/**
 * Get a source toggle Switch in the source management view by display name.
 */
export function getSourceToggle(page: Page, sourceName: string) {
  return page.locator(`[aria-label="Toggle ${sourceName}"]`);
}


================================================
FILE: web/tests/e2e/utils/visualRegression.ts
================================================
import type { Locator, Page, PageScreenshotOptions } from "@playwright/test";
import { expect } from "@playwright/test";

/**
 * Whether visual regression assertions are enabled.
 *
 * When `VISUAL_REGRESSION=true` is set, `expectScreenshot()` calls
 * `toHaveScreenshot()` which will fail if the screenshot differs from the
 * stored baseline.
 *
 * When disabled (the default), screenshots are still captured and saved but
 * mismatches do NOT fail the test — this lets CI collect screenshots for later
 * review without gating on them.
 */
const VISUAL_REGRESSION_ENABLED =
  process.env.VISUAL_REGRESSION?.toLowerCase() === "true";

/**
 * Default selectors to mask across all screenshots so that dynamic content
 * (timestamps, avatars, etc.) doesn't cause spurious diffs.
 */
const DEFAULT_MASK_SELECTORS: string[] = [
  // Add selectors for dynamic content that should be masked, e.g.:
  // '[data-testid="timestamp"]',
  // '[data-testid="user-avatar"]',
];

/**
 * Default selectors to hide (visibility: hidden) across all screenshots.
 * These elements are overlays or ephemeral UI that would cause spurious diffs.
 */
const DEFAULT_HIDE_SELECTORS: string[] = [
  '[data-testid="toast-container"]',
  // TODO: Remove once it loads consistently.
  '[data-testid="actions-container"]',
];

interface ScreenshotOptions {
  /**
   * Name for the screenshot file. If omitted, Playwright auto-generates one
   * from the test title.
   */
  name?: string;

  /**
   * Additional CSS selectors to mask (on top of the defaults).
   * Masked areas are replaced with a pink box so they don't cause diffs.
   */
  mask?: string[];

  /**
   * CSS selectors for elements to hide (visibility: hidden) before taking
   * the screenshot. This removes elements from the visual output while
   * preserving their layout space, preventing size-related inconsistencies.
   */
  hide?: string[];

  /**
   * If true, capture the full scrollable page instead of just the viewport.
   * Defaults to false.
   */
  fullPage?: boolean;

  /**
   * Override the max diff pixel ratio for this specific screenshot.
   */
  maxDiffPixelRatio?: number;

  /**
   * Override the per-channel threshold for this specific screenshot.
   */
  threshold?: number;

  /**
   * Additional Playwright screenshot options.
   */
  screenshotOptions?: PageScreenshotOptions;
}

interface ElementScreenshotOptions {
  /**
   * Name for the screenshot file. If omitted, Playwright auto-generates one
   * from the test title.
   */
  name?: string;

  /**
   * Additional CSS selectors to mask (on top of the defaults).
   * The selectors are resolved relative to the page the locator belongs to.
   */
  mask?: string[];

  /**
   * CSS selectors for elements to hide (visibility: hidden) before taking
   * the screenshot. This removes elements from the visual output while
   * preserving their layout space, preventing size-related inconsistencies.
   */
  hide?: string[];

  /**
   * Override the max diff pixel ratio for this specific screenshot.
   */
  maxDiffPixelRatio?: number;

  /**
   * Override the per-channel threshold for this specific screenshot.
   */
  threshold?: number;
}

/**
 * Wait for all running CSS animations and transitions on the page to finish
 * before proceeding.  This prevents screenshot tests from being non-deterministic
 * when animated elements (e.g. slide-in cards) are still mid-flight.
 *
 * The implementation:
 *   1. Yields one animation frame so that any pending animations have a chance
 *      to register with the Web Animations API.
 *   2. Calls `Promise.allSettled` on every active animation's `.finished`
 *      promise so we wait for completion (or cancellation) of all of them.
 */
export async function waitForAnimations(page: Page): Promise<void> {
  await page.evaluate(async () => {
    // Allow any freshly-scheduled animations to start
    await new Promise<void>((resolve) =>
      requestAnimationFrame(() => resolve())
    );
    // Wait for every currently-registered animation to finish (or be cancelled)
    const animations = document
      .getAnimations()
      .filter(
        (animation) => animation.effect?.getTiming().iterations !== Infinity
      );
    await Promise.allSettled(animations.map((animation) => animation.finished));
  });
}

/**
 * Wait for every **visible** `<img>` on the page to finish loading (or error).
 *
 * This prevents screenshot flakiness caused by images that have been added to
 * the DOM but haven't been decoded yet — `networkidle` only guarantees that
 * fewer than 2 connections are in flight, not that every image is painted.
 *
 * Only images that are actually visible and in (or near) the viewport are
 * waited on. Hidden images (e.g. the `dark:hidden` / `hidden dark:block`
 * alternates created by `createLogoIcon`) and offscreen lazy-loaded images
 * are skipped so they don't force a needless timeout.
 *
 * Times out after `timeoutMs` (default 5 000 ms) so a single broken image
 * doesn't block the entire test forever.
 */
export async function waitForImages(
  page: Page,
  timeoutMs: number = 5_000
): Promise<void> {
  await page.evaluate(async (timeout) => {
    const images = Array.from(document.querySelectorAll("img")).filter(
      (img) => {
        // Skip images hidden via CSS (display:none, visibility:hidden, etc.)
        // This covers createLogoIcon's dark-mode alternates.
        const style = getComputedStyle(img);
        if (
          style.display === "none" ||
          style.visibility === "hidden" ||
          style.opacity === "0"
        ) {
          return false;
        }

        // Skip images that have no layout box (zero size or detached).
        const rect = img.getBoundingClientRect();
        if (rect.width === 0 && rect.height === 0) return false;

        // Skip images far below the viewport (lazy-loaded, not yet needed).
        if (rect.top > window.innerHeight * 2) return false;

        return true;
      }
    );

    await Promise.race([
      Promise.allSettled(
        images.map((img) => {
          if (img.complete) return Promise.resolve();
          return new Promise<void>((resolve) => {
            img.addEventListener("load", () => resolve(), { once: true });
            img.addEventListener("error", () => resolve(), { once: true });
          });
        })
      ),
      new Promise<void>((resolve) => setTimeout(resolve, timeout)),
    ]);
  }, timeoutMs);
}

/**
 * Take a screenshot and optionally assert it matches the stored baseline.
 *
 * Behavior depends on the `VISUAL_REGRESSION` environment variable:
 * - `VISUAL_REGRESSION=true`  → assert via `toHaveScreenshot()` (fails on diff)
 * - Otherwise                 → capture and save the screenshot for review only
 *
 * Usage:
 * ```ts
 * import { expectScreenshot } from "@tests/e2e/utils/visualRegression";
 *
 * test("admin page looks right", async ({ page }) => {
 *   await page.goto("/admin/settings");
 *   await expectScreenshot(page, { name: "admin-settings" });
 * });
 * ```
 */
export async function expectScreenshot(
  page: Page,
  options: ScreenshotOptions = {}
): Promise<void> {
  const {
    name,
    mask = [],
    hide = [],
    fullPage = false,
    maxDiffPixelRatio,
    threshold,
  } = options;

  // Merge default hide selectors with per-call selectors
  const allHideSelectors = [...DEFAULT_HIDE_SELECTORS, ...hide];

  // Hide elements by setting visibility: hidden
  let styleHandle;
  if (allHideSelectors.length > 0) {
    styleHandle = await page.addStyleTag({
      content: allHideSelectors
        .map(
          (selector) =>
            `${selector} { visibility: hidden !important; opacity: 0 !important; pointer-events: none !important; }`
        )
        .join("\n"),
    });
  }

  try {
    // Combine default masks with per-call masks
    const allMaskSelectors = [...DEFAULT_MASK_SELECTORS, ...mask];
    const maskLocators = allMaskSelectors.map((selector) =>
      page.locator(selector)
    );

    // Wait for images to finish loading / decoding so that logo icons
    // and other <img> elements are fully painted before the screenshot.
    await waitForImages(page);

    // Wait for any in-flight CSS animations / transitions to settle so that
    // screenshots are deterministic (e.g. slide-in card animations on the
    // onboarding flow).
    await waitForAnimations(page);

    // Build the screenshot name array (Playwright expects string[])
    const nameArg = name ? [name + ".png"] : undefined;

    if (VISUAL_REGRESSION_ENABLED) {
      // Assert mode — fail the test if the screenshot differs from baseline
      const screenshotOpts = {
        fullPage,
        mask: maskLocators.length > 0 ? maskLocators : undefined,
        ...(maxDiffPixelRatio !== undefined && { maxDiffPixelRatio }),
        ...(threshold !== undefined && { threshold }),
      };

      if (nameArg) {
        await expect(page).toHaveScreenshot(nameArg, screenshotOpts);
      } else {
        await expect(page).toHaveScreenshot(screenshotOpts);
      }
    } else {
      // Capture-only mode — save the screenshot without asserting
      const screenshotPath = name
        ? `output/screenshots/${name}.png`
        : undefined;
      await page.screenshot({
        path: screenshotPath,
        fullPage,
        mask: maskLocators.length > 0 ? maskLocators : undefined,
        ...options.screenshotOptions,
      });
    }
  } finally {
    // Remove the injected style tag to avoid affecting subsequent screenshots/assertions
    if (styleHandle) {
      await styleHandle.evaluate((el: HTMLStyleElement) => el.remove());
    }
  }
}

/**
 * Take a screenshot of a specific element and optionally assert it matches
 * the stored baseline.
 *
 * Works like {@link expectScreenshot} but scopes the screenshot to a single
 * `Locator` instead of the full page.
 *
 * Usage:
 * ```ts
 * import { expectElementScreenshot } from "@tests/e2e/utils/visualRegression";
 *
 * test("sidebar looks right", async ({ page }) => {
 *   await page.goto("/app");
 *   const sidebar = page.getByTestId("AppSidebar/new-session");
 *   await expectElementScreenshot(sidebar, { name: "sidebar-new-session" });
 * });
 * ```
 */
export async function expectElementScreenshot(
  locator: Locator,
  options: ElementScreenshotOptions = {}
): Promise<void> {
  const { name, mask = [], hide = [], maxDiffPixelRatio, threshold } = options;

  const page = locator.page();

  // Merge default hide selectors with per-call selectors
  const allHideSelectors = [...DEFAULT_HIDE_SELECTORS, ...hide];

  // Hide elements by setting visibility: hidden
  let styleHandle;
  if (allHideSelectors.length > 0) {
    styleHandle = await page.addStyleTag({
      content: allHideSelectors
        .map(
          (selector) =>
            `${selector} { visibility: hidden !important; opacity: 0 !important; pointer-events: none !important; }`
        )
        .join("\n"),
    });
  }

  try {
    // Combine default masks with per-call masks
    const allMaskSelectors = [...DEFAULT_MASK_SELECTORS, ...mask];
    const maskLocators = allMaskSelectors.map((selector) =>
      page.locator(selector)
    );

    // Wait for images to finish loading / decoding.
    await waitForImages(page);

    // Wait for any in-flight CSS animations / transitions to settle so that
    // element screenshots are deterministic (same reasoning as expectScreenshot).
    await waitForAnimations(page);

    // Build the screenshot name array (Playwright expects string[])
    const nameArg = name ? [name + ".png"] : undefined;

    if (VISUAL_REGRESSION_ENABLED) {
      const screenshotOpts = {
        mask: maskLocators.length > 0 ? maskLocators : undefined,
        ...(maxDiffPixelRatio !== undefined && { maxDiffPixelRatio }),
        ...(threshold !== undefined && { threshold }),
      };

      if (nameArg) {
        await expect(locator).toHaveScreenshot(nameArg, screenshotOpts);
      } else {
        await expect(locator).toHaveScreenshot(screenshotOpts);
      }
    } else {
      // Capture-only mode — save the screenshot without asserting
      const screenshotPath = name
        ? `output/screenshots/${name}.png`
        : undefined;
      await locator.screenshot({
        path: screenshotPath,
        mask: maskLocators.length > 0 ? maskLocators : undefined,
      });
    }
  } finally {
    // Remove the injected style tag to avoid affecting subsequent screenshots/assertions
    if (styleHandle) {
      await styleHandle.evaluate((el: HTMLStyleElement) => el.remove());
    }
  }
}


================================================
FILE: web/tests/setup/fileMock.js
================================================
// Mock for static image imports used by Next.js Image component
// Returns an object compatible with Next.js static import structure
module.exports = {
  src: "/test-image.png",
  height: 100,
  width: 100,
  blurDataURL: "data:image/png;base64,test",
};


================================================
FILE: web/tests/setup/jest.setup.ts
================================================
import "@testing-library/jest-dom";
import { TextEncoder, TextDecoder } from "util";

// Tell React 18+ this is a test environment where act() is available
// This suppresses "not configured to support act(...)" warnings
// @ts-ignore
globalThis.IS_REACT_ACT_ENVIRONMENT = true;

// Polyfill TextEncoder/TextDecoder (required for some libraries)
global.TextEncoder = TextEncoder as any;
global.TextDecoder = TextDecoder as any;

// Only set up browser-specific mocks if we're in a jsdom environment
if (typeof window !== "undefined") {
  // Polyfill fetch for jsdom
  // @ts-ignore
  import("whatwg-fetch");

  // Mock BroadcastChannel for JSDOM
  global.BroadcastChannel = class BroadcastChannel {
    constructor(public name: string) {}
    postMessage() {}
    close() {}
    addEventListener() {}
    removeEventListener() {}
    dispatchEvent() {
      return true;
    }
  } as any;

  // Mock window.matchMedia for responsive components
  Object.defineProperty(window, "matchMedia", {
    writable: true,
    value: jest.fn().mockImplementation((query) => ({
      matches: false,
      media: query,
      onchange: null,
      addListener: jest.fn(), // deprecated
      removeListener: jest.fn(), // deprecated
      addEventListener: jest.fn(),
      removeEventListener: jest.fn(),
      dispatchEvent: jest.fn(),
    })),
  });

  // Mock IntersectionObserver
  global.IntersectionObserver = class IntersectionObserver {
    constructor() {}
    disconnect() {}
    observe() {}
    takeRecords() {
      return [];
    }
    unobserve() {}
  } as any;

  // Mock ResizeObserver
  global.ResizeObserver = class ResizeObserver {
    constructor() {}
    disconnect() {}
    observe() {}
    unobserve() {}
  } as any;

  // Mock window.scrollTo
  global.scrollTo = jest.fn();
}

// Suppress specific known console errors that are not actionable in tests.
// This pattern is recommended for handling third-party library warnings:
// https://github.com/testing-library/user-event/issues/1114#issuecomment-1876164351
//
// Radix UI's compose-refs package triggers state updates during component unmount
// which causes React to emit "not configured to support act" warnings. This happens
// because the updates occur in React's commit phase, outside of any act() boundary.
// The IS_REACT_ACT_ENVIRONMENT flag doesn't help because jsdom's globalThis is set
// up before our setup file runs.
const SUPPRESSED_ERRORS = [
  "The current testing environment is not configured to support act",
] as const;

const originalError = console.error;
console.error = (...args: any[]) => {
  if (
    typeof args[0] === "string" &&
    SUPPRESSED_ERRORS.some((error) => args[0].includes(error))
  ) {
    return;
  }
  originalError.call(console, ...args);
};


================================================
FILE: web/tests/setup/llmProviderTestUtils.ts
================================================
import { LLMProviderDescriptor } from "@/interfaces/llm";

export function makeProvider(
  overrides: Partial<LLMProviderDescriptor>
): LLMProviderDescriptor {
  return {
    id: overrides.id ?? 1,
    name: overrides.name ?? "Provider",
    provider: overrides.provider ?? "openai",
    provider_display_name: overrides.provider_display_name ?? "Provider",
    model_configurations: overrides.model_configurations ?? [],
    ...overrides,
  };
}


================================================
FILE: web/tests/setup/mocks/README.md
================================================
# Test Mocks Directory

This directory contains mock implementations used in Jest tests.

## Mocking Strategy

**Use `transformIgnorePatterns` for ES Module packages** instead of mocking them.

### Two Approaches:

| Approach                     | Use When                                                      | Examples                                                             |
| ---------------------------- | ------------------------------------------------------------- | -------------------------------------------------------------------- |
| **transformIgnorePatterns**  | All ESM packages                                              | `@radix-ui`, `@headlessui`, `react-markdown`, `remark-*`, `rehype-*` |
| **moduleNameMapper (mocks)** | Non-executable assets/files, or components with complex setup | CSS files, images, UserProvider                                      |

### Why Use transformIgnorePatterns:

Modern npm packages ship as ES Modules (ESM) by default. Jest runs in a Node environment that expects CommonJS. The `transformIgnorePatterns` configuration tells Jest which packages in `node_modules` to transform from ESM to CommonJS.

**Benefits:**

- Tests run against real package code, not mocks
- No need to maintain mock implementations
- Catches real bugs in how we use dependencies

**Trade-off:**

- Tests run slower (transformation takes time, especially for markdown packages)

## When to Add to transformIgnorePatterns

**Add packages to the `transformIgnorePatterns` array in `jest.config.js` when:**

### ✅ Add to transformIgnorePatterns:

1. **SyntaxError: Unexpected token 'export'**

   ```
   Error: SyntaxError: Unexpected token 'export'
   at node_modules/package-name/index.js:1
   ```

   → Package uses ES Modules and needs transformation

2. **Package ships as ESM**

   - Check `package.json`: `"type": "module"` or `"exports"` field
   - Files use `export`/`import` syntax
   - Common in modern packages (markdown, UI libraries)

3. **Works fine when transformed**
   - Package has no complex dependencies
   - No browser-specific APIs or native modules
   - Just needs ESM → CommonJS conversion

### How to Add:

1. Open `web/jest.config.js`
2. Find the `transformIgnorePatterns` array
3. Add package name to the appropriate category:

```javascript
transformIgnorePatterns: [
  "/node_modules/(?!(" +
    [
      // ... existing packages ...

      // Add your package here (grouped by category)
      "your-package-name",
      "another-package",

      // Use regex patterns for related packages
      "package-.*",  // All packages starting with "package-"
    ].join("|") +
    ")/)",
],
```

**Example:** Adding `remark-directive`:

```javascript
// Markdown & Syntax Highlighting
"react-markdown",
"remark-gfm",
"remark-math",
"remark-directive",  // ← Add here
"remark-parse",
```

## When to Add Mocks to This Directory

**Only mock things that CANNOT be executed in tests.**

### ✅ DO Mock:

1. **CSS/Style Files**

   - Already handled by `cssMock.js`
   - Cannot be executed in Node environment
   - Examples: `.css`, `.scss`, `.sass`, `.less`

2. **Static Assets**

   - Already handled by `fileMock.js`
   - Binary files that can't be imported
   - Examples: images, fonts, videos

3. **Components with Complex External Dependencies**
   - Components that require browser APIs not available in jsdom
   - Components with difficult-to-setup external dependencies
   - Example: `UserProvider.tsx` (already mocked)

### ❌ DON'T Mock:

1. **ES Module Packages**

   - ALWAYS use `transformIgnorePatterns` instead
   - Even complex packages like `react-markdown` with deep ESM dependency trees
   - Add the package (and any dependencies that fail) to `transformIgnorePatterns`

2. **Your Own Code**

   - Test real implementations
   - Mocking defeats the purpose of testing

3. **Packages That Work in Jest**
   - Most packages work fine in Jest
   - No need to add them anywhere

## Current Mocks

This directory contains **necessary mocks**:

```
mocks/
├── components/
│   └── UserProvider.tsx        # Component with complex dependencies
├── cssMock.js                  # All CSS/style files
└── README.md                   # This file
```

**Note:** `fileMock.js` is in `tests/setup/` (not in `mocks/`) for historical reasons.

## How to Add a New Mock

### Step 1: Determine if You Really Need a Mock

**Try `transformIgnorePatterns` first!** Only create a mock if:

- Asset/file cannot be executed (CSS, images)
- Component has complex external dependencies
- Package absolutely cannot work when transformed

### Step 2: Create the Mock File

**For Components:**

```typescript
// mocks/components/ComponentName.tsx
import React from 'react';

export default function ComponentName({ children }: { children?: React.ReactNode }) {
  return <div data-testid="mock-component-name">{children}</div>;
}
```

**For CSS/Assets:** (Already handled - no need to create)

### Step 3: Register in jest.config.js

Add to `moduleNameMapper`:

```javascript
moduleNameMapper: {
  // Before path aliases!
  "^@/components/ComponentName$":
    "<rootDir>/tests/setup/mocks/components/ComponentName.tsx",

  // Path aliases come last
  "^@/(.*)$": "<rootDir>/src/$1",
}
```

### Step 4: Verify Tests Pass

```bash
npm test
```

## Decision Tree

```
Need to use a package in tests?
         ↓
Does it cause "SyntaxError: Unexpected token 'export'"?
         ↓
      YES → Try adding to transformIgnorePatterns first ✅
         ↓
      Does it still fail after transformation?
         ↓
      YES → Create mock (complex ESM structure) ⚠️
         |
      NO → Transformation worked! ✅
         |
Is it CSS/static asset?
         ↓
      YES → Already mocked (cssMock.js/fileMock.js) ✅
         |
      NO → Can the package be executed in Node/jsdom?
         ↓
      YES → Use it directly (no mock needed) ✅
         |
      NO → Is it a component with complex dependencies?
         ↓
      YES → Create mock in mocks/components/ ⚠️
         |
      NO → You probably don't need a mock! ✅
```

## Examples

### ✅ Example 1: ESM Package

**Problem:** `@tiptap/react` causes `SyntaxError: Unexpected token 'export'`

**Solution:** Add to `transformIgnorePatterns` in `jest.config.js`

```javascript
transformIgnorePatterns: [
  "/node_modules/(?!(" +
    [
      // ...
      "@tiptap/react",  // ← Add here
      "@tiptap/core",
      // ...
    ].join("|") +
    ")/)",
],
```

**If you get more errors:** Keep adding the failing packages until tests pass. The package may have ESM dependencies that also need transformation.

### ✅ Example 2: Complex ESM Package with Dependencies

**Problem:** `react-markdown` causes SyntaxError, then after fixing it, `devlop` fails, then `hast-util-to-jsx-runtime` fails...

**Solution:** Keep adding packages to transformIgnorePatterns:

```javascript
[
  "react-markdown",
  "remark-.*", // All remark packages
  "rehype-.*", // All rehype packages
  "hast-.*", // All hast packages
  "devlop",
  "hastscript",
  // ... and so on
];
```

**Pro tip:** Use wildcard patterns like `"remark-.*"` to match all packages with that prefix.

### ✅ Example 3: Static Asset (Already Handled)

**Problem:** Importing CSS causes error

**Solution:** Already handled! `cssMock.js` catches all CSS imports.

### ✅ Example 4: Component Mock (Rare Case)

**Problem:** `AuthProvider` requires complex auth setup

**Solution:**

```typescript
// mocks/components/AuthProvider.tsx
import React from 'react';

export default function AuthProvider({ children }: { children?: React.ReactNode }) {
  return <div data-testid="mock-auth-provider">{children}</div>;
}
```

```javascript
// jest.config.js
"^@/components/auth/AuthProvider$":
  "<rootDir>/tests/setup/mocks/components/AuthProvider.tsx",
```

## Troubleshooting

### "SyntaxError: Unexpected token 'export'"

**Fix:** Add the package to `transformIgnorePatterns` in `jest.config.js`

**If it happens again:** The package likely has ESM dependencies. Keep adding failing packages to the list until tests pass.

### "Cannot find module 'package-name'"

**Check:**

1. Is package installed? `npm ls package-name`
2. Is path in `jest.config.js` correct?
3. Did you add to `transformIgnorePatterns` if it's ESM?

### Tests slow after adding to transformIgnorePatterns

**This is expected.** Transformation takes time, especially for packages with deep dependency trees like `react-markdown`.

**Example:** The markdown tests take ~23 seconds vs ~1 second without markdown packages.

**Why this is worth it:**

- Tests run against real code, catching real bugs
- No mock maintenance burden
- More confidence in test results

**If tests are too slow:**

1. Use `jest --maxWorkers=50%` to parallelize (already configured)
2. Run specific test files during development: `npm test -- --testPathPattern=MyComponent`
3. Let CI run the full suite

### Package still fails after adding to transformIgnorePatterns

**Rare, but possible issues:**

1. Package requires browser APIs → Mock it or use jsdom
2. Package has native dependencies → May need different approach
3. TypeScript type errors → Check tsconfig `allowJs: true` in jest.config.js transform options

## Testing Philosophy

**The Goal:** Write tests that are reliable and test YOUR code with REAL dependencies.

- ✅ **Transform ESM packages** - Always use `transformIgnorePatterns` for npm packages
- ✅ **Mock only non-executable things** - CSS, images, videos (things Node.js can't execute)
- ✅ **Test real code** - More confidence, catches real bugs, no mock maintenance
- ❌ **Don't mock packages** - Even if they have complex dependency trees
- ⚠️ **Accept slower tests** - Transformation takes time, but correctness > speed

## Additional Resources

- [Jest transformIgnorePatterns Documentation](https://jestjs.io/docs/configuration#transformignorepatterns-arraystring)
- [ES Modules in Jest](https://jestjs.io/docs/ecmascript-modules)
- [Testing Library Best Practices](https://testing-library.com/docs/guiding-principles/)


================================================
FILE: web/tests/setup/mocks/components/UserProvider.tsx
================================================
/**
 * Mock for @/components/user/UserProvider
 *
 * Why this mock exists:
 * The real UserProvider requires complex props (authTypeMetadata, settings, user)
 * that are not relevant for most component integration tests. This mock provides
 * a simple useUser() hook with safe default values.
 *
 * Usage:
 * Automatically applied via jest.config.js moduleNameMapper.
 * Any component that imports from "@/components/user/UserProvider" will get this mock.
 *
 * To customize user values in a specific test:
 * You would need to either:
 * 1. Pass props to the real UserProvider (requires disabling this mock for that test)
 * 2. Extend this mock to accept custom values via a setup function
 */
import React, { createContext, useContext } from "react";

interface UserContextType {
  user: any;
  isAdmin: boolean;
  isCurator: boolean;
  refreshUser: () => Promise<void>;
  isCloudSuperuser: boolean;
  updateUserAutoScroll: (autoScroll: boolean) => Promise<void>;
  updateUserShortcuts: (enabled: boolean) => Promise<void>;
  toggleAgentPinnedStatus: (
    currentPinnedAgentIDs: number[],
    agentId: number,
    isPinned: boolean
  ) => Promise<boolean>;
  updateUserTemperatureOverrideEnabled: (enabled: boolean) => Promise<void>;
  updateUserPersonalization: (personalization: any) => Promise<void>;
}

const mockUserContext: UserContextType = {
  user: null,
  isAdmin: false,
  isCurator: false,
  refreshUser: async () => {},
  isCloudSuperuser: false,
  updateUserAutoScroll: async () => {},
  updateUserShortcuts: async () => {},
  toggleAgentPinnedStatus: async () => true,
  updateUserTemperatureOverrideEnabled: async () => {},
  updateUserPersonalization: async () => {},
};

const UserContext = createContext<UserContextType | undefined>(mockUserContext);

export function useUser() {
  const context = useContext(UserContext);
  if (context === undefined) {
    throw new Error("useUser must be used within a UserProvider");
  }
  return context;
}

export function UserProvider({ children }: { children: React.ReactNode }) {
  return (
    <UserContext.Provider value={mockUserContext}>
      {children}
    </UserContext.Provider>
  );
}


================================================
FILE: web/tests/setup/mocks/cssMock.js
================================================
// Mock for CSS imports
module.exports = {};


================================================
FILE: web/tests/setup/test-utils.tsx
================================================
import React, { ReactElement } from "react";
import { render, RenderOptions } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { SWRConfig } from "swr";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
export { makeProvider } from "./llmProviderTestUtils";

/**
 * Custom render function that wraps components with common providers
 * used throughout the Onyx application.
 */

interface AllProvidersProps {
  children: React.ReactNode;
  swrConfig?: Record<string, any>;
}

/**
 * Wrapper component that provides all necessary context providers for tests.
 * Customize this as needed when you discover more global providers in the app.
 */
function AllTheProviders({ children, swrConfig = {} }: AllProvidersProps) {
  return (
    <SWRConfig
      value={{
        // Disable deduping in tests to ensure each test gets fresh data
        dedupingInterval: 0,
        // Use a Map instead of cache to avoid state leaking between tests
        provider: () => new Map(),
        // Disable error retries in tests for faster failures
        shouldRetryOnError: false,
        // Merge any custom SWR config passed from tests
        ...swrConfig,
      }}
    >
      <TooltipPrimitive.Provider>{children}</TooltipPrimitive.Provider>
    </SWRConfig>
  );
}

interface CustomRenderOptions extends Omit<RenderOptions, "wrapper"> {
  swrConfig?: Record<string, any>;
}

/**
 * Custom render function that wraps the component with all providers.
 * Use this instead of @testing-library/react's render in your tests.
 *
 * @example
 * import { render, screen } from '@tests/setup/test-utils';
 *
 * test('renders component', () => {
 *   render(<MyComponent />);
 *   expect(screen.getByText('Hello')).toBeInTheDocument();
 * });
 *
 * @example
 * // With custom SWR config to mock API responses
 * render(<MyComponent />, {
 *   swrConfig: {
 *     fallback: {
 *       '/api/credentials': mockCredentials,
 *     },
 *   },
 * });
 */
const customRender = (
  ui: ReactElement,
  { swrConfig, ...options }: CustomRenderOptions = {}
) => {
  const Wrapper = ({ children }: { children: React.ReactNode }) => (
    <AllTheProviders swrConfig={swrConfig}>{children}</AllTheProviders>
  );

  return render(ui, { wrapper: Wrapper, ...options });
};

// Re-export everything from @testing-library/react
export * from "@testing-library/react";
export { userEvent };

// Override render with our custom render
export { customRender as render };

/**
 * Setup userEvent with optimized configuration for testing.
 * All user interactions are automatically wrapped in act() to prevent warnings.
 * Use this helper instead of userEvent.setup() directly.
 *
 * @example
 * const user = setupUser();
 * await user.click(button);
 * await user.type(input, "text");
 */
export function setupUser(options = {}) {
  const baseUser = userEvent.setup({
    // Configure for React 18 to reduce act warnings
    delay: null, // Instant typing - batches state updates better
    ...options,
  });

  // Wrap all user-event methods in act() to prevent act warnings. We add this here
  // to prevent all callsites from needing to import and wrap user events in act()
  return new Proxy(baseUser, {
    get(target, prop) {
      const value = target[prop as keyof typeof target];

      // Only wrap methods (functions), not properties
      if (typeof value === "function") {
        return async (...args: any[]) => {
          const { act } = await import("@testing-library/react");
          return act(async () => {
            return (value as Function).apply(target, args);
          });
        };
      }

      return value;
    },
  });
}


================================================
FILE: web/tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "es5",
    "lib": ["dom", "dom.iterable", "esnext"],
    "allowJs": true,
    "skipLibCheck": true,
    "strict": true,
    "forceConsistentCasingInFileNames": true,
    "noEmit": true,
    "esModuleInterop": true,
    "module": "ESNext",
    "moduleResolution": "node",
    "noUncheckedIndexedAccess": true,
    "resolveJsonModule": true,
    "isolatedModules": true,
    "jsx": "react-jsx",
    "incremental": true,
    "plugins": [
      {
        "name": "next"
      }
    ],
    "paths": {
      "@/*": ["./src/*"],
      "@tests/*": ["./tests/*"],
      "@public/*": ["./public/*"],
      "@opal/*": ["./lib/opal/src/*"],
      "@opal/types/*": ["./lib/opal/src/types/*"]
    }
  },
  "include": [
    "next-env.d.ts",
    "**/*.ts",
    "**/*.tsx",
    ".next/types/**/*.ts",
    ".next/dev/types/**/*.ts"
  ],
  "exclude": ["node_modules", "lib/opal"]
}


================================================
FILE: web/tsconfig.types.json
================================================
{
  "extends": "./tsconfig.json",
  "compilerOptions": {
    "paths": {
      "@/*": ["./src/*"],
      "@tests/*": ["./tests/*"],
      "@public/*": ["./public/*"],
      "@opal/*": ["./lib/opal/src/*"]
    }
  },
  "include": [
    "next-env.d.ts",
    "src/**/*",
    "tests/**/*",
    ".next/types/**/*.ts",
    ".next/dev/types/**/*.ts",
    "types/**/*.d.ts"
  ],
  "exclude": ["node_modules", "lib/opal"]
}


================================================
FILE: web/types/assets.d.ts
================================================
declare module "*.svg" {
  const src: string;
  export default src;
}

declare module "*.png" {
  const src: string;
  export default src;
}

declare module "*.jpg" {
  const src: string;
  export default src;
}

declare module "*.jpeg" {
  const src: string;
  export default src;
}

declare module "*.gif" {
  const src: string;
  export default src;
}

declare module "*.webp" {
  const src: string;
  export default src;
}


================================================
FILE: web/types/favicon-fetch.d.ts
================================================
declare module "favicon-fetch" {
  type FetchFaviconArg = string | { uri: string };
  const fetchFavicon: (input: FetchFaviconArg) => Promise<string | undefined>;
  export default fetchFavicon;
}


================================================
FILE: widget/.gitignore
================================================
.vite/
dist/

================================================
FILE: widget/README.md
================================================
# Onyx Chat Widget

An embeddable, lightweight chat widget that brings AI-powered conversations to any website. Built with [Lit](https://lit.dev/) web components for maximum compatibility and minimal bundle size.

## Security Note

⚠️ **Always use a limited-scope API key for the widget.** The API key is visible in client-side code, so it should have restricted permissions and rate limits. Never use admin or full-access keys.

## Features

- 🚀 **Lightweight** - ~100-150kb gzipped bundle
- 🎨 **Fully Customizable** - Colors, branding, and styling
- 📱 **Responsive** - Desktop popup, mobile fullscreen
- 🔒 **Shadow DOM Isolation** - No style conflicts with your site
- 💬 **Real-time Streaming** - Server-sent events (SSE) for fast responses
- 🌐 **Two Deployment Modes** - Cloud CDN or self-hosted
- ♿ **Markdown Support** - Rich text formatting in responses
- 💾 **Session Persistence** - Conversations survive page reloads
- 🎯 **Two Display Modes** - Floating launcher or inline embed

## Quick Start

### Cloud Deployment (Recommended)

Add these two lines to your website:

```html
<!-- Load the widget -->
<script type="module" src="https://cdn.onyx.app/widget/1.0/dist/onyx-widget.js"></script>

<!-- Configure and display -->
<onyx-chat-widget
  backend-url="https://cloud.onyx.app/api"
  api-key="your_api_key_here"
  mode="launcher"
>
</onyx-chat-widget>
```

That's it! The widget will appear as a floating button in the bottom-right corner.

## How It Works

### Architecture Overview

```
┌─────────────────────────────────────────┐
│         Customer Website                │
│  ┌───────────────────────────────────┐  │
│  │  <onyx-chat-widget>               │  │
│  │  (Web Component)                  │  │
│  │  ┌─────────────────────────────┐  │  │
│  │  │    Shadow DOM               │  │  │
│  │  │  • Isolated styles          │  │  │
│  │  │  • UI components            │  │  │
│  │  │  • Message history          │  │  │
│  │  └─────────────────────────────┘  │  │
│  └───────────────────────────────────┘  │
└──────────────┬──────────────────────────┘
               │ API Calls (SSE)
               ▼
┌──────────────────────────────────────────┐
│         Onyx Backend                     │
│  • POST /api/chat/create-chat-session    │
│  • POST /api/chat/send-chat-message      │
│  • Streams responses via SSE             │
└──────────────────────────────────────────┘
```

### Technology Stack

- **Frontend Framework**: [Lit](https://lit.dev/) - Lightweight web components
- **Markdown Rendering**: [marked.js](https://marked.js.org/)
- **Build Tool**: [Vite](https://vitejs.dev/)
- **Styling**: CSS-in-JS with Shadow DOM isolation
- **API Communication**: Fetch API with SSE (Server-Sent Events)

### Component Structure

```
<onyx-chat-widget>
  └─ Shadow DOM
      ├─ Launcher Button (mode="launcher" only)
      └─ Chat Container
          ├─ Header
          │   ├─ Logo/Avatar
          │   ├─ Agent Name
          │   └─ Actions (Reset, Close)
          ├─ Disclaimer
          ├─ Messages
          │   ├─ User Messages
          │   ├─ Assistant Messages (with markdown)
          │   └─ Typing Indicator
          └─ Input Area
              ├─ Text Input
              ├─ Send Button
              └─ "Powered by Onyx" Footer
```

## Configuration Options

### Required Attributes

| Attribute     | Type   | Description                                                          |
| ------------- | ------ | -------------------------------------------------------------------- |
| `backend-url` | string | Your Onyx backend API URL (or set `VITE_WIDGET_BACKEND_URL` in .env) |
| `api-key`     | string | API key for authentication (or set `VITE_WIDGET_API_KEY` in .env)    |

**Note**: For cloud deployment, these must be provided as HTML attributes. For self-hosted deployment, they can be set in `.env` file during build and will be baked into the bundle.

### Optional Attributes

| Attribute          | Type   | Default       | Description                              |
| ------------------ | ------ | ------------- | ---------------------------------------- |
| `agent-id`         | number | `undefined`   | Specific agent/persona to use            |
| `agent-name`       | string | `"Assistant"` | Display name in header                   |
| `logo`             | string | Onyx logo     | URL to custom logo image                 |
| `primary-color`    | string  | `#1c1c1c`     | Primary brand color (buttons, accents)   |
| `background-color` | string  | `#e9e9e9`     | Widget background color                  |
| `text-color`       | string  | `#000000bf`   | Text color (75% opacity black)           |
| `mode`             | string  | `"launcher"`  | Display mode: `"launcher"` or `"inline"` |
| `include-citations`| boolean | `false`       | Include citation markers in responses    |

**Note**: These attributes must be provided as HTML attributes. Only `backend-url` and `api-key` can optionally be set via environment variables for self-hosted builds.

### Configuration Examples

**Basic Setup:**

```html
<onyx-chat-widget backend-url="https://cloud.onyx.app/api" api-key="on_abc123">
</onyx-chat-widget>
```

**Full Customization:**

```html
<onyx-chat-widget
  backend-url="https://cloud.onyx.app/api"
  api-key="on_abc123"
  agent-id="42"
  agent-name="Support Bot"
  logo="https://yoursite.com/logo.png"
  primary-color="#FF6B35"
  background-color="#FFFFFF"
  text-color="#1A1A1A"
  mode="launcher"
>
</onyx-chat-widget>
```

**Inline Mode (Embedded):**

```html
<div style="width: 400px; height: 600px;">
  <onyx-chat-widget
    backend-url="https://cloud.onyx.app/api"
    api-key="on_abc123"
    mode="inline"
  >
  </onyx-chat-widget>
</div>
```

## Display Modes

### Launcher Mode (Default)

A floating button appears in the bottom-right corner. Clicking it opens a chat popup.

- **Desktop**: 400x600px popup above the button
- **Mobile (<768px)**: Full-screen overlay

```html
<onyx-chat-widget mode="launcher"></onyx-chat-widget>
```

### Inline Mode

The widget is embedded directly in your page layout. Perfect for dedicated support pages.

```html
<div class="chat-container">
  <onyx-chat-widget mode="inline"></onyx-chat-widget>
</div>
```

**CSS Tip**: The widget will fill its container's dimensions in inline mode.

## Development

### Prerequisites

- Node.js 18+ and npm
- Access to Onyx backend API

### Setup

```bash
# Navigate to widget directory
cd widget/

# Install dependencies
npm install

# Copy example env file (for self-hosted builds)
cp .env.example .env
```

### Development Server

```bash
npm run dev
```

Opens at `http://localhost:5173` with hot module replacement.

### Build Commands

```bash
# Cloud deployment (no config baked in)
npm run build:cloud

# Self-hosted deployment (config from .env)
npm run build:self-hosted

# Standard build (same as cloud)
npm run build
```

### Project Structure

```
widget/
├── src/
│   ├── index.ts                 # Entry point
│   ├── widget.ts                # Main component
│   ├── config/
│   │   ├── config.ts            # Configuration resolver
│   │   └── build-config.ts      # Build-time config injection
│   ├── services/
│   │   ├── api-service.ts       # API client (SSE streaming)
│   │   └── stream-parser.ts     # SSE packet processor
│   ├── types/
│   │   ├── api-types.ts         # Backend packet types
│   │   └── widget-types.ts      # Widget configuration types
│   ├── styles/
│   │   ├── theme.ts             # Design tokens
│   │   ├── colors.ts            # Color system
│   │   └── widget-styles.ts     # Component styles
│   ├── utils/
│   │   └── storage.ts           # Session persistence
│   └── assets/
│       └── logo.ts              # Default Onyx logo (base64)
├── dist/                        # Build output
├── index.html
├── package.json
├── vite.config.ts
└── tsconfig.json
```

### Key Files

- **[src/widget.ts](src/widget.ts)** - Main Lit component with all UI logic
- **[src/services/api-service.ts](src/services/api-service.ts)** - Handles API calls and SSE streaming
- **[src/styles/widget-styles.ts](src/styles/widget-styles.ts)** - All CSS styles
- **[vite.config.ts](vite.config.ts)** - Build configuration (cloud vs self-hosted)

## API Integration

### Backend Endpoints Used

The widget communicates with these Onyx backend endpoints:

#### 1. Create Chat Session

```
POST /chat/create-chat-session
Content-Type: application/json
Authorization: Bearer YOUR_API_KEY

{
  "persona_id": 42  // Optional agent ID
}

Response:
{
  "chat_session_id": "uuid-here"
}
```

#### 2. Send Message (SSE Streaming)

```
POST /chat/send-chat-message
Content-Type: application/json
Authorization: Bearer YOUR_API_KEY

{
  "message": "User's question",
  "chat_session_id": "uuid-here",
  "parent_message_id": 123,  // null for first message
  "origin": "widget",
  "include_citations": false
}

Response: Server-Sent Events stream
{"type": "message_start"}
{"type": "message_delta", "content": "Hello"}
{"type": "message_delta", "content": " world!"}
{"type": "stop"}
```

## Deployment

### Self-Hosted Deployment

1. **Create `.env` file:**

   ```bash
   VITE_WIDGET_BACKEND_URL=https://your-backend.com
   VITE_WIDGET_API_KEY=your_api_key
   ```

2. **Build with config baked in:**

   ```bash
   npm run build:self-hosted
   ```

3. **Deploy `dist/onyx-widget.js` to your server**

4. **Customer embed:**
   ```html
   <script type="module" src="https://your-cdn.com/onyx-widget.js"></script>
   <onyx-chat-widget
     agent-id="1"
     agent-name="Support"
     logo="https://path-to-your-logo.com/"
   >
   </onyx-chat-widget>
   ```

## Customization

### Styling

The widget uses CSS custom properties (CSS variables) for theming. All styles are scoped within Shadow DOM to prevent conflicts.

**Default Colors (aligned with web/src/app/css/colors.css):**

```css
--theme-primary-05: #1c1c1c; /* Buttons, accents (onyx-ink-95) */
--theme-primary-06: #000000; /* Hover state (onyx-ink-100) */
--background-neutral-00: #ffffff; /* Widget background (grey-00) */
--background-neutral-03: #e6e6e6; /* Background hover (grey-10) */
--text-04: #000000bf; /* Text (alpha-grey-100-75) */
--text-light-05: #ffffff; /* White text on dark (grey-00) */
--border-01: #00000033; /* Borders (alpha-grey-100-20) */
```

**Override via attributes:**

```html
<onyx-chat-widget
  primary-color="#FF6B35"
  background-color="#FFFFFF"
  text-color="#1A1A1A"
>
</onyx-chat-widget>
```

## Browser Support

- ✅ Chrome/Edge 90+ (Chromium)
- ✅ Firefox 90+
- ✅ Safari 15+
- ✅ Mobile Safari (iOS 15+)
- ✅ Mobile Chrome (Android)

**Requirements:**

- ES Modules support
- Custom Elements v1
- Shadow DOM v1
- Fetch API with SSE

## Performance

- **Bundle Size**: ~100-150kb gzipped
- **Initial Load**: Shadow DOM renders immediately
- **Message Latency**: Real-time SSE streaming (<100ms first token)
- **Session Persistence**: sessionStorage (auto-save on each message)


================================================
FILE: widget/index.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Onyx Chat Widget - Development</title>
    <style>
      * {
        box-sizing: border-box;
      }

      body {
        font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto",
          "Helvetica", "Arial", sans-serif;
        margin: 0;
        padding: 0;
        background: #f5f5f5;
        color: #1a1a1a;
        line-height: 1.6;
      }

      .header {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        padding: 60px 20px;
        text-align: center;
      }

      .header h1 {
        margin: 0 0 10px 0;
        font-size: 2.5em;
        font-weight: 700;
      }

      .header p {
        margin: 0;
        font-size: 1.2em;
        opacity: 0.9;
      }

      .container {
        max-width: 1200px;
        margin: 0 auto;
        padding: 40px 20px;
      }

      .section {
        background: white;
        padding: 40px;
        border-radius: 12px;
        box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
        margin-bottom: 40px;
      }

      .section h2 {
        margin-top: 0;
        color: #667eea;
        font-size: 1.8em;
        margin-bottom: 20px;
      }

      .section p {
        color: #666;
        margin-bottom: 20px;
      }

      .code-block {
        background: #f8f9fa;
        border: 1px solid #e1e4e8;
        border-radius: 6px;
        padding: 16px;
        font-family: "Monaco", "Courier New", monospace;
        font-size: 14px;
        overflow-x: auto;
        margin: 20px 0;
      }

      .demo-grid {
        display: grid;
        grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
        gap: 30px;
        margin-top: 30px;
      }

      .demo-card {
        background: #f8f9fa;
        border: 2px solid #e1e4e8;
        border-radius: 8px;
        padding: 20px;
      }

      .demo-card h3 {
        margin-top: 0;
        color: #1a1a1a;
        font-size: 1.2em;
      }

      .demo-card p {
        color: #666;
        font-size: 0.95em;
        margin-bottom: 15px;
      }

      .compact-demo {
        width: 100%;
        max-width: 600px;
        margin: 20px auto 0;
      }

      .badge {
        display: inline-block;
        padding: 4px 12px;
        background: #667eea;
        color: white;
        border-radius: 12px;
        font-size: 0.85em;
        font-weight: 600;
        margin-bottom: 10px;
      }

      .info-box {
        background: #e3f2fd;
        border-left: 4px solid #2196f3;
        padding: 16px 20px;
        margin: 20px 0;
        border-radius: 4px;
      }

      .info-box strong {
        color: #1565c0;
      }

      .feature-list {
        list-style: none;
        padding: 0;
      }

      .feature-list li {
        padding: 8px 0;
        padding-left: 24px;
        position: relative;
      }

      .feature-list li:before {
        content: "✓";
        position: absolute;
        left: 0;
        color: #667eea;
        font-weight: bold;
      }
    </style>
  </head>
  <body>
    <div class="header">
      <h1>🤖 Onyx Chat Widget</h1>
      <p>Embeddable AI-powered chat for your website</p>
    </div>

    <div class="container">
      <!-- Introduction Section -->
      <div class="section">
        <h2>Development Environment</h2>
        <p>
          This page demonstrates the Onyx Chat Widget in development mode. The
          widget is loaded from <code>src/index.ts</code> via Vite with hot
          module replacement.
        </p>

        <div class="info-box">
          <strong>Configuration:</strong> Widget settings are loaded from the
          <code>.env</code> file. Create one from <code>.env.example</code> to
          configure backend URL, API key, and other options.
        </div>

        <ul class="feature-list">
          <li>Real-time streaming responses via SSE</li>
          <li>Markdown rendering for rich content</li>
          <li>Session persistence across page reloads</li>
          <li>Shadow DOM isolation (no style conflicts)</li>
          <li>Responsive design (desktop & mobile)</li>
          <li>Two display modes: Launcher & Inline</li>
        </ul>
      </div>

      <!-- Launcher Mode Section -->
      <div class="section">
        <span class="badge">Launcher Mode</span>
        <h2>Floating Chat Button</h2>
        <p>
          Look at the bottom-right corner of this page! A floating chat button
          will appear there. Click it to open the chat popup. This is the
          default mode for most website integrations.
        </p>

        <div class="code-block">
          &lt;onyx-chat-widget mode="launcher"&gt;&lt;/onyx-chat-widget&gt;
        </div>

        <div class="demo-grid">
          <div class="demo-card">
            <h3>Desktop View</h3>
            <p>Opens as a 400×600px popup above the button</p>
          </div>
          <div class="demo-card">
            <h3>Mobile View</h3>
            <p>Expands to full-screen overlay (&lt;768px)</p>
          </div>
        </div>
      </div>

      <!-- Compact Inline Mode Section -->
      <div class="section">
        <span class="badge">Compact Mode</span>
        <h2>Compact Inline (No Messages)</h2>
        <p>
          When using inline mode without any messages, the widget displays in a
          compact form - just a search-like input bar. Once you send a message,
          it expands to show the full chat interface.
        </p>

        <div class="code-block">
          &lt;onyx-chat-widget mode="inline"&gt;&lt;/onyx-chat-widget&gt;
          &lt;!-- Automatically compact when no messages --&gt;
        </div>

        <div class="compact-demo">
          <onyx-chat-widget mode="inline"></onyx-chat-widget>
        </div>
      </div>

      <!-- Configuration Section -->
      <div class="section">
        <h2>Configuration Options</h2>
        <p>
          The widget can be customized via HTML attributes or environment
          variables (for self-hosted builds). Below are some common
          configuration examples:
        </p>

        <h3>Basic Setup</h3>
        <div class="code-block">
          &lt;onyx-chat-widget backend-url="https://api.onyx.app"
          api-key="your_api_key_here"&gt; &lt;/onyx-chat-widget&gt;
        </div>

        <h3>Custom Branding</h3>
        <div class="code-block">
          &lt;onyx-chat-widget backend-url="https://api.onyx.app"
          api-key="your_api_key_here" agent-id="42" agent-name="Support Bot"
          logo="https://yoursite.com/logo.png" primary-color="#FF6B35"
          background-color="#FFFFFF" text-color="#1A1A1A"&gt;
          &lt;/onyx-chat-widget&gt;
        </div>

        <div class="info-box">
          <strong>Note:</strong> For cloud deployments, configuration must be
          provided via HTML attributes. For self-hosted builds, you can bake
          configuration into the bundle using environment variables.
        </div>
      </div>
    </div>

    <!-- Launcher widget instance -->
    <onyx-chat-widget
      backend-url="http://localhost:8080"
      api-key="your-api-key-here"
      mode="launcher"
    ></onyx-chat-widget>

    <!-- Load widget source via Vite for local development -->
    <script type="module" src="./dist/onyx-widget.js"></script>

    <!-- Load widget source via CDN for production -->
    <!-- <script type="module" src="https://cdn.onyx.app/widget/1.0/dist/onyx-widget.js"></script> -->
  </body>
</html>


================================================
FILE: widget/package.json
================================================
{
  "name": "onyx-chat-widget",
  "version": "1.0.0",
  "description": "Embeddable chat widget for Onyx",
  "type": "module",
  "main": "dist/onyx-widget.js",
  "types": "dist/types/index.d.ts",
  "files": [
    "dist"
  ],
  "scripts": {
    "dev": "vite",
    "build": "vite build",
    "build:cloud": "vite build --mode production",
    "build:self-hosted": "vite build --mode self-hosted",
    "preview": "vite preview",
    "type-check": "tsc --noEmit"
  },
  "dependencies": {
    "dompurify": "^3.3.2",
    "lit": "^3.1.0",
    "marked": "^12.0.0",
    "terser": "^5.46.1"
  },
  "devDependencies": {
    "@types/dompurify": "^3.0.0",
    "@types/node": "^20.0.0",
    "typescript": "^5.3.0",
    "vite": "^7.3.1"
  }
}


================================================
FILE: widget/src/assets/logo.ts
================================================
// Default Onyx logo as base64 data URL
export const DEFAULT_LOGO =
  "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAGQCAYAAACAvzbMAAAACXBIWXMAAG66AABuugHW3rEXAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAABnYSURBVHgB7d3NleTGlYbhV+JmtJM8CHlAehDazW56LGjIAlIWdMmCpixA0YKmdrNDedD0AOmBtJxZcTKIKrKqOqsKmYmfuMD7nPMd8ZAUKWUk7s3ATUSCpFPyMd19GiRJesUfj/numP6Yn5+l/LkGSZIeKY3jwzH/4svGcaqRfDwmIUnarcTQDMY0jlNpsZFI0q5khtnGzxOlvf9nSpI2KjNt43iezzgnkaTNeG0wPlfKv6u5/3dLkoI5ZzA+ZyNpcU4iSSEkrhuMzxUbiSRVKjPvfGOqfMKBuyRVIROjcZy6vdUgSVrUw3yjFOHaG8XYRpKQJM2mhsH4nI2kxUYiSZP6mjoH43OlxUYiSVfJxJxvTJUOB+6SdJZ37LtxPE+PA3dJetGW5xtTN5KEJMnGcUF6nJNI2rHMUARtHNelxUYiaScyzjfmSIcDd0kb1WDjWCI9DtwlbYDzjfUbSUKSArFx1JOyBi02EkmVywzFKkJh3WNabCSSKpNxvhEpHcPDmpK0mgYbR+T0OHCXtCDnG9tsJN/i7S1JM7Fx7CMtNhJJE8k4GN9rIynH6EvS2TLON8zwHmiQpDeU21QNNg7zZXpsJJJOcL5hxqZneK8kJO2ajcNckxYbibQ7+ZhPxCpWpt60eBKwtHkZ5xtmvnQ4J5E2pdym+g4bh1kuPTYSKTTnG2bt9Dhwl0JJ2DhMfWmxkUjVynibytSfFgfuUjUyNg4TLx3OSaRVPAzGPxOraBjzPD02krB+hyIpjaMcu/3d/R9LW3E45sdj/nH/x5Imko75iINxs4+0OHCXrpZxvmH2mxYH7tLZMjYOYx5SZn0Nkl70MBjviXVxG7NUemwkVXGIvj4H49J5DsfcHfN3HLhrpxIOxo25Ni0O3LUjGecbxkyd8vMEGWmjMjYOY+ZOj3OSxTgDmdfDb4yXGUdC0lIODDOS8nDiv9EsbCDzcDAu1eGAA/fZ2ECm9fUx7xl2HTYOqS632EhUoYzzDWOipMOBuyqQsXEYEzU9Dtyv4i2sy5TbU35tUNqGn475Cw7bz/Z7dInyRitvuL8yDOgkxXN3zH8f8w02j4u4A5lGZtgKv0dS7e4Yhul36Co2kGmlY26wkUi1KTuM8kzID9g4JmMDmUfitx1JQtJaSuMov3L4Pd6mmpwNZH7NMR+wkUhLOjDsNmwc2oQGv/JrzNzp8NuR2rDM8ERslAvSmAjpsHEszltY60k4cJeuUW5NldtUtwzPcmhhNpD1JRy4S+dwMC6d0OBvohvzUno84Vp6U4MDd2Me0uF8QzpbOR7+llgXuzFTpcPGIV0tYSMx+8i/GGYbCVXPIXos6Zh3+BO52h4H49KCGhy4m/jpcTAurabsSDpiFQ1jOpxvSNVIOCcx9afDxiFVKzE0kjKMjFJUzLbjYFwKJuGcxKyb0jhucL4hhdZgIzHL5TMOxqXNyThwN/Olw/mGNsRPQKclHLib6dJh43iJNSiwDr0mMTSSnlgFy6yfh/lGQq/5jEJqGN7oGb0l4ZzEjIuD8fEahtfsHQqnZ1i8Dp2jwUZivkzH8N6wcYxXdh8Pr50CyTx982d0rowDd+N841KZp69jRmF84suLQJdJOHDfY8o1lNGlWrAGRZQ4fUF8ja6RcOC+9TjfmEbi9OubUfVaTi/e92gKCeckNg69puX069yiqiVev0i8QKbVYCOJnO5+DTWdhDUorJbXL5gbNIfMl3MnU3fjyGgOH7AGhVQ6e8/ri+cngHklHLjXnBYbx9x6rEEhNYy7iL5Dc0s4cK8lzjeW0zBuTW5QdXrGLV6PllKKVoONxMaxDz3WoJAazru4Mlpaw29P5pr50uFgfA0N1qCwzi1MHVpLxjnJXI0jo7WU1//c9VIFMpddcBmtKWEjmSItvpfXlrEGhVUuoEsWr0M1SAwPefbUVZhrjvONurRYg0JKXHchJlSTBhuJjSOWxHVr6hFLK2q5bvE83qRODZ4E/Dgd/qZErVqsQSElrr8wfainbpl9z0k6vE9es4Q1KKyPTHOR3qDaJfbTSEpBabFxRPABa1BYPdNdsH4CiCExXGxTrX1tjeMG34uR9FiDQmqY9uL1eJN4GrbRSHpsHBE1TPs+uEGLKRfd1BexYmqIOXDv8DZVZD3WoJAa5rmgM4osE2NO0uF7LboGa1BY5QKc68JWfIn6Gkm5x12+rul3/reh1AprUECZeS/0jLYisf7A3cH49mSsQWG1zLt4HdqihmUbSfl3lS9m2Di2p8UaFFJimYs/oa1qmHfgXv7ZGW1VwhoUVssyi+fRAttXZhG32Dh0nhZrUEiJZRauxId69iNxeSN5GIwntAcJa1BYH1hu8Upu0J4khplFz7iL+wYv8L0p7w9rUFBjLmw/AWgKDaffb+XPORjfL2tQUA3LLtxDPN5k38rx6R3ON2QNCq1nncX7jCStV4N6dJWGdRbuIRlJe9ZgDQqrY93F65C0Z9agoDLrLpyfAKR9K88LWYOCaqlj8Tok7ZE1KKhEHQv3kISkPUlYg970e+r0gbo0SNqT2mqQX+kdKVFX5y/xoR5pPxLDNW8NekONO5D31KcsnJ8ApH3I1FesrUEj9dTV+av+BCBpctagkWrbgTTUO7AuC/cOSVvWUHcNatCLau38D+mQtGW11yCPWHpBQ90L95CMpC0qdxisQUF1xFi8DklbZA0KqpYjA8YmI2lLEtagsFpiLd4nJG1JS6wa1KFfJGIt3EP8Sq+0DYlYtechiZXV8DXeD8TkQz3SNkStQQ07l4jV8R/HBwul+BL1f3W32hq09g4k8oN5Hi0gxZeJe9r27mtQT6yOf+oTgKS4emLVnKp2IWvuQBri/86GRwtIcTVYg8LqidXpX0qHpIh6YtWal7K7402iHBkwNhlJkWRi1Rhr0CMdsRbnrXRIiqRcsxFqizXomUSshRmbjKQIErFqS7U1aI0h+ge26T2SIthqDfqWhf2OZSWGwdVW/emYfyOpVglr0GS+YlkfGU7e3ar/O+YOSbWyBgWVGB56qfX+4RTxeBOpXontfHW3ihq05Awks/3i6vEmUr0y8R8cfMuiNWjJGUjP9hevKPcf/4Sk2uypBv2ZBWYhS+1AGvaxcIXHm0j1adhXDYp8UO0XSuev/d7hlOmQVJOeWDXEGnQvE+uFnyoZSTXIxKod1qBHOmK96FOlQ1INyrUYoWZYg55JxHrBp05G0poSsWpGqBo09xD9A/v2HklrsgbNaM6v8Sa2fWTAWB5vIq0jYQ0qZqtBcx5lsvUjA8byeBNpHdagQbgalNjf1+ZeisebSMsr11xPrFoRrgbNNQPJ7Oehnbd4vIm0vPIgXULFbDVorhlIj4v3mMebSMuyBj01Sw2aYwfS4MI9t7mjBaSKNViDngtzxFLp/LXfE1wjHZKW0BOrNliD7mVivaBLJyNpTplYNcEa9EhHrBdz6XRImtMnYtUEa9C9RKwXcq1kJM0hEasWhK9BUw7RP6Ax3iNpDtagcSarQVN9jTfhkQFjLfZrYdKOJKxB55jkeJOpjjLxyIDx/gOPN5GmZg06TzU1KOHX5s6Nx5tI0+qJVQM2UYOmmIFkfGjnXB5vIk2nwRp0rklq0BQzkB4X7xIHhlmIpOtYgy5z9fEm1+5AGly4SyX8Sq90rQZr0KVWP96kdP7a7/XVnA5J1/hMrGveGnQvE/MFqy0ZSZfIxLrWN1eDrrmF9S2awgckXeI9msLiNSgRq8PWHr+/Lp0nEesarz2ZC1y6A/mAptQg6RzWoGm95wKXfI034ZEBU/N4E2m8hDVoahfVoEt2IHb+6flgoTSe89fpXVSDzt2BlH9J+dpcQlNzFyKNU3YfCU3t7Bp07g6k/K53QnMI85vF0ooarEFzOXsXcu4OxM4/rwMebyK9xho0r7OONzlnB9Lgws0t4YOF0ksarEFzK7uQd8zAIwOWSYekU8q1EeEatgY9k4GfzWLJSHosE+sa3kUNGnsL6z1a0gckPWYNWtaoGjRmiJ7woZ01lGH6AUkJa9Aavjnmp9f+hjE7ED8Nr8MHC6WBNWgdzVt/w1s7kISdfy0+WChZg9b0Zg16awfikQHr8XgTydnHmt6sQW/tQErnT2gt7kK0d9agdb1ag17bgTS4cGvzeBPtWYM1aG2v7kJe24HY+etwwONNtE/WoDoceKEGvbQDaXDhapHwwULtT4M1qBaJF2rQSw3EwVVd/Bqj9sYaVJfRNSgT65F7jxaQtiUT69rcbQ06tQN5j2r0AWkfrEF1+qIGPR+iJ3xop2Yeb6KtS1iDavakBn317C9+POZrVKvS8P8HabusQXV7UoMe70ASdv7a+WChtixhDardkxr0eAbifcf6ebyJtswaVL8nNejxDqR0/oRq5y5EW2UNiuHXGvSwA2lw4aLweBNtUYM1KIpfa9DDDsTOH0v5kZdvkLbDGhTL4Zg/lx1IgwsXTfmWSkbahgZrUDTpmFwaiIOrmHywUFthDYrpw5iftJUk6QulgfwdRfQD0jZYg2L6wSF6TAf8jRBty2d8Aj2SA/dD9OIfKBI/sWlr3FHH8ksNetiBlO/19vf/qbodcPeh7bEGxXHgvgY97EDKk4XuQmLwk5q2yBoUx6816PFRJn4CiMEj3bVV1qAYfq1Bj7/GWz4B+Om2brfYPLRd1qD63fKoBvmDUrG4+9DWJaxBNXtSg54/SFj+wh2q0S02D23fAWtQrW55VoNOPYnuV0Tr5NZee2ENqtPoGtQd87OpJh3SvpT3fIRrcy/5zAkvnYXlJ4C6uPvQ3liD6nLyK9a/e+W/0OPxJjU44IOD2idrUB0OvFCDXjuN10+9dfCTmPbKBwvr8GINem0H4kM96zvg7kP7ZQ1a34FXatBrOxCPFlifu0DtmTVofT++9hdf24EUfgJYlw8Oau+sQet6tQa99YuEHi2wnltsHpI1aD23vFGD3tqBFOVHXj6jpbn7kAYJjzdZw5s1aMxvov+ERwss7Rabh/TggDVoaf9kwhqUifXUZPRkJD2WiXUNW4Oe6YCfzezpkHRKuTYiXMPR0zPSmFtYD/w63TIcGEqn+VDtMka/zmOG6I/9C79ON6cDPjgovaZ8Ok5oLgfOqEFfcZ4/4P35Of2N4UsLkk4rH3r/E83lrBp07g7Eh3rmc8Ddh/QWa9B8Dsd8w/DszSjnzEDAowXm9COS3mINms8dZzSP4twdSOEngHn44KA0jjVoHmfXoHN3IEXpUP9EU7rF5iGN5fEm07tlwRqUifW95tqTkHSOcsRSpGvcGvRMR+wXrJa0SLpER6xrvdZ0rCAT88WqLRlJl8jEutatQc90xHzBaomnHEvX6Yh1zdeWnitcMkR/zEHWdfw6onQdjze5zlWv3yVf433O400uc8AHB6UplE/RCZ3rwJU16NodSOGn6Mv4yUmahndCLlNFDSq7j7ILiXC/r6b7ju7apGlYgy6rQYkrTbED8WiB891x5pEBkl5kDTrfHRM8ODjFDKR4+ASgcTy2RJqWx5ucZ5IaNMUOpPBogfFusXlIU/OIpfFuqbAGZWLdA1wrCUlzyMSqBdagZzpivZBLxyPbpXl1xKoJS6ejYplYL+bSyUiaUyZWTbAGPdMR6wVdKj2SltARqzaErUFTDdEfc5h+mg8OSsvwK72nTV6Dpvoa73Meb/LUAY8tkZZkDXrqwAw16Cvm8Qe83//Y3475CUlLsQY9FaoGebTA0/uOCUlLsgYtUIPmmIEUHi3wmzt8cFBamjXoN3fMVIPmmoEUHm8y8NgSaR3WoMFsNWiuHUjh8SYeWyKtyRoUvAZlYt0rnDoJSWvKxKoZ1qBnOmK94FOlQ1INyrUYoWZYg07IxHrRp0pGUg0ysWrHVHnHRnTEeuGvTY+kmnTEqiEhatCcQ/TH9jbI8tgSqS7WoBnM+TXex/b0a2EHPLZEqtFejjc5sFANmusok+f+l/0cLeCxJVKdrEGB7eFogR6/uivVag81qPz/SyxkqRlIsYejBe7wwUGpVnuoQeVXTw8sZKkZyIOtHy3gsSVS3axBE1pyB1KUTwBb/V3wW2weUu22fLzJLTuoQZlY9xTHJiEpgkys2mINeqYj1sK8lQ5JkZRrNkJtsQadkIm1OG8lIymSTKwa81YaduYzsRbopfRIiqgjVq2prgYtPUR/bCuDLI8tkWKyBgW2hYd6eiRFZg26wpo7kC081OPuQ4oteg3a9e++R96FLHpkgKRZRL8TkljRmjuQIvIuZNEjAyTNInINusUa9EsHjdTxq+j8kiZTdiGRao816JmOWAvXImlLPmENCisTa/ESkrYkE6sGfY2e6IixcB2StsgaFFgmxuK9Q9IWZWLUoAad1FP3wvVI2rIOa9Boa3+N97nav07ng4PSttV+vIk16BU1P9RTVeeXNAtr0Blq24HU/FCPnV/avppr0FZ/SXFStX4CSEjaA2vQSLXtQIoaf7P4Fo8MkPaixl3ILdag0RJ2fknrSViDQuuoY+FaJO2RNSiwTB2L55EB0j5l6qhBGV2kY92F65C0Z9agwDLrLl6DpD3LWINC61ln4XokCT5jDTqpxq/xPrfW1+l8cHDfMsPtg5IG7dlajxVYgyawxkM91Xd+zaK8177j9CfO8p5o0B5Zg4K7YdnF+w7tSSkQHxhXJPr7vzehPblh2Rp0gyaz9CeAhPYgMb5xnEqL75W9sAYF9z3LLFyLti4z7dczW/yu/h5YgwJLLLN4CW1VZt7v9Zd/doO2KmENCq1coHMuXIu25mEw3rPMxf8zDty3rMMaFFZm3sXLaCvOGYzP2Ug+4ifKLclYg0LrmGfhOrQFiaFo1/Z7Di02kq0otcIaFFRmnsVrUGSZek5PfauRZBRZxhoUWs+0C9ejqDIxGsfzlIcVGxRVjzUorBumXbwGRbLGYHyu9Azvvz+iSMr7zxoU1JQP9fQoihoG43M2khbnJFFYg4K7YZrFu0G1S9Q5GJ8rLTaSCG6YZr0/osVN9QkgoVplYs43psonHLjXzBoU3LVHC7SoRpl9N47n6fEeea2sQYGV3yu/ZvESqsXDfKMUy5/Nq40koVokrEGhdVy2cC2qwZYH43M2khaLTy06rEFhZS5bvIzWVHaPexqMz5UWG8naMtag0DrOW7gOrSXjfGOOdFiQ1lRe/3PW6zOqRua8xWvQ0jI2jiXS4/t7DRlrUGhjb4X0aCnON9ZvJAktpbzmY9dGlblh3OI1aG42jroaSYuNZAljjzdpUHXGPNTTozllhmJl46gzLTaSOY2tQZ57VqkbXl+8GzSHjPONSOlw4D6XG3iziatSb30CSGhK77BxRE6Pt1OmZg0K7hbs/DNyvrHdRpLQFF463qRF1cucXryErmHj2H7K2rZ4rVzrpSOWEgqh4+nCtehSmeH1q7HgmfnSYsG7RsfT1/NHFEbm6eJldK6M8w0zvAfKrEvnyTx9HTMKpbzxy8J5ZMB5Gmwc5sv0OHA/V8dvr52CKZ+ayuI16C3ON8zY9DhwHyszvGYNCsndx+tsHObSOHAfp0Nh+cTnaRkH42a6tAzfPNKXrEHajIzzDTNfOrxdI21Og43DLJceG4kUmvMNs3b6Y77FOYkUho3D1JgWG4lUrXzMJ2IVFbO/tPhQnVSNjPMNEy8dzkmkVZTbVA02DhM/PTaSsH6HIimNowwlv8Pvl2tbDgw/xfDD/R9LmkjCwbjZT1ocuEtXy3ibyuw3LQ7cpbNlbBzGPKTDOYn0qjLTKLONcgBklAvbmCXTYyOpikP09TkYl85zwIG7di4d8xEH48ZckxYH7tqRjPMNY6ZOiwN3bVjGxmHM3CkzxAZpAx4G4z2xLkJjoqfHRjI7h+jzcDAu1eFwzI/H/AMH7pOzgUwrMTSOBhuHVJvbY/6OjUSVyTjfMCZKys8eZKSVZWwcxkRNj3OSq3gL63IdfoqRtuCnY/5yzL/RWX6PLlXecH9lePNJiueO4Rr+BpvHRdyBTCMzbIXfI6l2dwzD9Dt0FRvItNIxN9hIpBrdMpyfdYcmYQOZR2J4BuS/8JweaU3l1lR5BuR7vE2lgBp8Et2YpVMOKb3B57Fm5Q5kOQ3Dra2MpLncMew4fkTaoMxwLzbKJzljIqTDD2eLcweynoQDd+kaZaZRdhoOxldiA1lf4rfbWwlJb3EwLp3Q4MDdmJfS42BcelODZ2wZ85AO5xvS2TIO3M1+02HjkK6WsJGYfaQ8v1FmG1+j6jlEjyXhwF3b5GBcWlCDA3cTPz3+9LO0mgYH7iZeOpxvSNUo94xviVVEzP7SYeOQqpWwkZi68jAYT0gKITHcW+6JVWzMduKJuNIGNNhIzHLpcTAubc47HLib+dLhfEPavIRzEjNdOmwc0u4khkZS7lVHKVamjjgYl/SLhHMSMy4OxiW9qMFGYr7MZxyMSxop48DdON+QdIWEA3cbhyRdITE0kp5YhdCMz8N8IyFJM0g4J9lq43C+IWkxDTaSyOlwMC5pZRkH7tEaR0aSKpJw4F5zPmHjkFS5hAP3WuJ8Q1JICeckNg5JulKDjWSJdPevtY1D0uZkhnvxUQpypMaRkaQdSDhwnyItNg5JO5Vw4H5unG9I0iOlGDbYSGwcknSFBhvJ43T3r4kkaaTMvuckHc43JOkqiX01khYbhyRNKjH8DndPrIYwJs43JGkhDdtoJDYOSVpJw/B73ZGaRkmHg3FJqkImxpykw/mGJFUpUV8jKbepWmwckhRCYpgt9KzbOMr/BucbkhRUw7KNxMYhSRvTMO9P75Z/9jskSZuVmXZO0uF8Q5J2JXF5Iym3qcpDjV8jSdqtxPiBu/MNSdJJDacbSflzN9g4JElvaBhmG843pBf8P8QugRRrUGSGAAAAAElFTkSuQmCC";


================================================
FILE: widget/src/config/config.ts
================================================
import { WidgetConfig } from "@/types/widget-types";

/**
 * Resolve widget configuration from attributes and environment variables
 * Priority: attributes > environment variables > defaults
 */
export function resolveConfig(attributes: Partial<WidgetConfig>): WidgetConfig {
  const config = {
    backendUrl:
      attributes.backendUrl || import.meta.env.VITE_WIDGET_BACKEND_URL || "",
    apiKey: attributes.apiKey || import.meta.env.VITE_WIDGET_API_KEY || "",
    agentId: attributes.agentId,
    primaryColor: attributes.primaryColor,
    backgroundColor: attributes.backgroundColor,
    textColor: attributes.textColor,
    agentName: attributes.agentName || "Assistant",
    logo: attributes.logo,
    mode: attributes.mode || "launcher",
    includeCitations: attributes.includeCitations ?? false,
  };

  if (!config.backendUrl || !config.apiKey) {
    throw new Error(
      "backendUrl and apiKey are required for the widget to function",
    );
  }

  return config;
}


================================================
FILE: widget/src/index.ts
================================================
/**
 * Onyx Chat Widget - Entry Point
 * Exports the main web component
 */

import { OnyxChatWidget } from "./widget";

// Define the custom element
if (
  typeof customElements !== "undefined" &&
  !customElements.get("onyx-chat-widget")
) {
  customElements.define("onyx-chat-widget", OnyxChatWidget);
}

// Export for use in other modules
export { OnyxChatWidget };
export * from "./types/api-types";
export * from "./types/widget-types";


================================================
FILE: widget/src/services/api-service.ts
================================================
/**
 * API Service - Handles all communication with Onyx backend
 */

import {
  Packet,
  CreateSessionRequest,
  CreateSessionResponse,
  SendMessageRequest,
} from "@/types/api-types";

export class ApiService {
  private maxRetries = 3;
  private retryDelay = 1000;

  constructor(
    private backendUrl: string,
    private apiKey: string,
  ) {}

  /**
   * Create a new chat session
   */
  async createChatSession(agentId?: number): Promise<string> {
    const request: CreateSessionRequest = {};
    if (agentId !== undefined) {
      request.persona_id = agentId;
    }

    const response = await this.fetchWithRetry(
      `${this.backendUrl}/chat/create-chat-session`,
      {
        method: "POST",
        headers: this.getHeaders(),
        body: JSON.stringify(request),
      },
    );

    if (!response.ok) {
      throw new Error(
        `Failed to create session: ${response.status} ${response.statusText}`,
      );
    }

    const data = (await response.json()) as CreateSessionResponse;
    return data.chat_session_id;
  }

  /**
   * Stream a message to the chat
   * Returns an async generator of packets
   */
  async *streamMessage(params: {
    message: string;
    chatSessionId: string;
    parentMessageId?: number | null;
    signal?: AbortSignal;
    includeCitations?: boolean;
  }): AsyncGenerator<Packet, void, unknown> {
    const request: SendMessageRequest = {
      message: params.message,
      chat_session_id: params.chatSessionId,
      parent_message_id: params.parentMessageId ?? null,
      origin: "widget",
      include_citations: params.includeCitations ?? false,
    };

    const response = await this.fetchWithRetry(
      `${this.backendUrl}/chat/send-chat-message`,
      {
        method: "POST",
        headers: this.getHeaders(),
        body: JSON.stringify(request),
        signal: params.signal,
      },
    );

    if (!response.ok) {
      throw new Error(
        `Failed to send message: ${response.status} ${response.statusText}`,
      );
    }

    // Parse SSE stream
    yield* this.parseSSEStream(response);
  }

  /**
   * Parse Server-Sent Events stream
   * Backend returns newline-delimited JSON packets
   */
  private async *parseSSEStream(
    response: Response,
  ): AsyncGenerator<Packet, void, unknown> {
    const reader = response.body?.getReader();
    if (!reader) {
      throw new Error("Response body is not readable");
    }

    const decoder = new TextDecoder();
    let buffer = "";

    try {
      while (true) {
        const { done, value } = await reader.read();
        if (done) break;

        buffer += decoder.decode(value, { stream: true });
        const lines = buffer.split("\n");
        buffer = lines.pop() || ""; // Keep incomplete line in buffer

        for (const line of lines) {
          if (line.trim()) {
            try {
              const rawData = JSON.parse(line);

              // Check if this is a MessageResponseIDInfo (not wrapped in Packet)
              if (
                "user_message_id" in rawData &&
                "reserved_assistant_message_id" in rawData
              ) {
                // Wrap it in a Packet structure for consistent handling
                const packet: Packet = {
                  obj: rawData as any,
                };
                yield packet;
              } else {
                // Regular packet with placement and obj
                yield rawData as Packet;
              }
            } catch (e) {
              // Fail fast on malformed packets - don't hide backend issues
              throw new Error(
                `Failed to parse SSE packet: ${line}. Error: ${e}`,
              );
            }
          }
        }
      }

      // Process any remaining data in buffer
      if (buffer.trim()) {
        try {
          const rawData = JSON.parse(buffer);

          // Check if this is a MessageResponseIDInfo (not wrapped in Packet)
          if (
            "user_message_id" in rawData &&
            "reserved_assistant_message_id" in rawData
          ) {
            const packet: Packet = {
              obj: rawData as any,
            };
            yield packet;
          } else {
            yield rawData as Packet;
          }
        } catch (e) {
          // Fail fast on malformed final buffer packets
          throw new Error(
            `Failed to parse final packet: ${buffer}. Error: ${e}`,
          );
        }
      }
    } finally {
      reader.releaseLock();
    }
  }

  /**
   * Fetch with retry logic for network failures and 5xx errors
   */
  private async fetchWithRetry(
    url: string,
    options: RequestInit,
    retries = 0,
  ): Promise<Response> {
    try {
      const response = await fetch(url, options);

      // Retry on 5xx or 429 errors
      if (!response.ok && retries < this.maxRetries) {
        if (response.status >= 500 || response.status === 429) {
          const delay = this.retryDelay * Math.pow(2, retries);
          await new Promise((resolve) => setTimeout(resolve, delay));
          return this.fetchWithRetry(url, options, retries + 1);
        }
      }

      return response;
    } catch (error) {
      // Don't retry if the request was aborted by the caller
      if (error instanceof Error && error.name === "AbortError") {
        throw error;
      }

      // Retry on network errors
      if (retries < this.maxRetries) {
        const delay = this.retryDelay * Math.pow(2, retries);
        await new Promise((resolve) => setTimeout(resolve, delay));
        return this.fetchWithRetry(url, options, retries + 1);
      }
      throw error;
    }
  }

  /**
   * Get common headers for API requests
   */
  private getHeaders(): Record<string, string> {
    return {
      "Content-Type": "application/json",
      Authorization: `Bearer ${this.apiKey}`,
    };
  }
}


================================================
FILE: widget/src/services/stream-parser.ts
================================================
/**
 * Stream Parser - Processes SSE packets and updates state
 */

import { Packet, Message, SearchDocument } from "@/types/api-types";
import { ChatMessage } from "@/types/widget-types";

export interface ParsedMessage {
  message: ChatMessage;
  isComplete: boolean;
}

export interface MessageIDs {
  userMessageId: number | null;
  assistantMessageId: number;
}

/**
 * Process a single packet from the SSE stream
 * Returns the current message being built and any state updates
 */
export function processPacket(
  packet: Packet,
  currentMessage: ChatMessage | null,
): {
  message: ChatMessage | null;
  citation?: { citation_number: number; document_id: string };
  documents?: SearchDocument[];
  status?: string;
  messageIds?: MessageIDs;
} {
  // Safety check - throw on malformed packets to fail fast
  if (!packet || !packet.obj) {
    throw new Error("Received malformed packet: packet.obj is missing");
  }

  const obj = packet.obj;

  // Handle MessageResponseIDInfo (doesn't have a type field)
  if ("reserved_assistant_message_id" in obj && "user_message_id" in obj) {
    return {
      message: currentMessage,
      messageIds: {
        userMessageId: obj.user_message_id,
        assistantMessageId: obj.reserved_assistant_message_id,
      },
    };
  }

  // Type guard - ensure obj has a type field
  if (!("type" in obj)) {
    throw new Error("Packet missing type field");
  }

  switch (obj.type) {
    case "message_start":
      // Start of a new assistant response
      return {
        message: {
          id: `msg-${Date.now()}`,
          role: "assistant",
          content: "",
          timestamp: Date.now(),
          isStreaming: true,
        },
        status: "", // Clear status when response starts
      };

    case "message_delta":
      // Append to current message
      if (currentMessage && currentMessage.role === "assistant") {
        return {
          message: {
            ...currentMessage,
            content: currentMessage.content + (obj.content || ""),
          },
          // No status update - let the message speak for itself
        };
      }
      return { message: currentMessage };

    case "citation_info":
      // Handle individual citation info packet
      return {
        message: currentMessage,
        citation: {
          citation_number: obj.citation_number,
          document_id: obj.document_id,
        },
      };

    case "search_tool_start":
      // Tool is starting - check if it's internet search
      return {
        message: currentMessage,
        status: obj.is_internet_search
          ? "Searching the web..."
          : "Searching internally...",
      };

    case "search_tool_queries_delta":
      // Queries being generated
      return {
        message: currentMessage,
        status: "Generating search queries...",
      };

    case "search_tool_documents_delta":
      // Search results coming in — capture document metadata for citation resolution
      return {
        message: currentMessage,
        documents: obj.documents,
        status: "Reading documents...",
      };

    case "open_url_start":
      return {
        message: currentMessage,
        status: "Opening URLs...",
      };

    case "open_url_urls":
      return {
        message: currentMessage,
        status: "Fetching web pages...",
      };

    case "open_url_documents":
      // Capture documents from URL fetching for citation resolution
      return {
        message: currentMessage,
        documents: obj.documents,
        status: "Processing web content...",
      };

    case "image_generation_start":
      return {
        message: currentMessage,
        status: "Generating image...",
      };

    case "image_generation_heartbeat":
      return {
        message: currentMessage,
        status: "Generating image...",
      };

    case "python_tool_start":
      return {
        message: currentMessage,
        status: "Running Python code...",
      };

    case "python_tool_delta":
      return {
        message: currentMessage,
        status: "Running Python code...",
      };

    case "custom_tool_start":
      return {
        message: currentMessage,
        status: "Running custom tool...",
      };

    case "reasoning_start":
      return {
        message: currentMessage,
        status: "Thinking...",
      };

    case "reasoning_delta":
      return {
        message: currentMessage,
        status: "Thinking...",
      };

    case "deep_research_plan_start":
      return {
        message: currentMessage,
        status: "Planning research...",
      };

    case "research_agent_start":
      return {
        message: currentMessage,
        status: "Researching...",
      };

    case "intermediate_report_start":
      return {
        message: currentMessage,
        status: "Generating report...",
      };

    case "stop":
    case "overall_stop":
      // End of stream - mark message as complete
      if (currentMessage) {
        return {
          message: {
            ...currentMessage,
            isStreaming: false,
          },
        };
      }
      return { message: currentMessage };

    case "error":
      // Error occurred during streaming - throw to fail fast
      throw new Error(`Stream error: ${obj.exception}`);

    default:
      // Unknown packet type
      return { message: currentMessage };
  }
}

/**
 * Convert API Message type to widget ChatMessage
 */
export function convertMessage(msg: Message): ChatMessage {
  return {
    id: msg.id,
    role: msg.role,
    content: msg.content,
    timestamp: msg.timestamp,
    isStreaming: msg.isStreaming,
  };
}

/**
 * Check if a packet is the final packet in a stream
 */
export function isStreamComplete(packet: Packet): boolean {
  return "type" in packet.obj && packet.obj.type === "overall_stop";
}

/**
 * Check if a packet is an error
 */
export function isStreamError(packet: Packet): boolean {
  return "type" in packet.obj && packet.obj.type === "error";
}


================================================
FILE: widget/src/styles/colors.ts
================================================
import { css } from "lit";

export const colors = css`
  :host {
    /* Base Colors - Aligned with web/src/app/css/colors.css */
    --grey-100: #000000;
    --grey-10: #e6e6e6;
    --grey-00: #ffffff;
    --alpha-grey-100-75: #000000bf;
    --alpha-grey-100-20: #00000033;

    /* Onyx Brand Colors */
    --onyx-ink-100: #000000;
    --onyx-ink-95: #1c1c1c;

    /* Theme / Primary - Configurable via env vars */
    --theme-primary-06: var(--onyx-ink-100);
    --theme-primary-05: var(--onyx-ink-95);

    /* Background / Neutral */
    --background-neutral-00: var(--grey-00);
    --background-neutral-03: var(--grey-10);

    /* Text */
    --text-04: var(--alpha-grey-100-75);
    --text-light-05: var(--grey-00);

    /* Border */
    --border-01: var(--alpha-grey-100-20);

    /* Shadow */
    --shadow-02: 0px 2px 12px rgba(0, 0, 0, 0.1);

    /* Status / Error */
    --status-error-01: #fee;
    --status-error-05: #c00;
  }
`;


================================================
FILE: widget/src/styles/theme.ts
================================================
import { css } from "lit";
import { colors } from "./colors";

/**
 * Onyx Design System - Theme
 * Typography, spacing, and layout tokens from Figma
 */
export const theme = css`
  ${colors}

  :host {
    /* Typography - Hanken Grotesk */
    --onyx-font-family: "Hanken Grotesk", -apple-system, BlinkMacSystemFont,
      "Segoe UI", sans-serif;
    --onyx-font-family-mono: "DM Mono", "Monaco", "Menlo", monospace;

    /* Font Sizes */
    --onyx-font-size-small: 10px;
    --onyx-font-size-secondary: 12px;
    --onyx-font-size-sm: 13px;
    --onyx-font-size-main: 14px;
    --onyx-font-size-label: 16px;

    /* Line Heights */
    --onyx-line-height-small: 12px;
    --onyx-line-height-secondary: 16px;
    --onyx-line-height-main: 20px;
    --onyx-line-height-label: 24px;
    --onyx-line-height-section: 28px;
    --onyx-line-height-headline: 36px;

    /* Font Weights */
    --onyx-weight-regular: 400;
    --onyx-weight-medium: 500;
    --onyx-weight-semibold: 600;

    /* Content Heights */
    --onyx-height-content-secondary: 12px;
    --onyx-height-content-main: 16px;
    --onyx-height-content-label: 18px;
    --onyx-height-content-section: 24px;

    /* Border Radius - from Figma */
    --onyx-radius-04: 4px;
    --onyx-radius-08: 8px;
    --onyx-radius-12: 12px;
    --onyx-radius-16: 16px;
    --onyx-radius-round: 1000px;

    /* Spacing - Block */
    --onyx-space-block-1x: 4px;
    --onyx-space-block-2x: 8px;
    --onyx-space-block-3x: 12px;
    --onyx-space-block-4x: 16px;
    --onyx-space-block-6x: 24px;

    /* Spacing - Inline */
    --onyx-space-inline-0: 0px;
    --onyx-space-inline-0_5x: 2px;
    --onyx-space-inline-1x: 4px;

    /* Legacy spacing aliases (for compatibility) */
    --onyx-space-2xs: var(--onyx-space-block-1x);
    --onyx-space-xs: var(--onyx-space-block-2x);
    --onyx-space-sm: var(--onyx-space-block-3x);
    --onyx-space-md: var(--onyx-space-block-4x);
    --onyx-space-lg: var(--onyx-space-block-6x);

    /* Padding */
    --onyx-padding-icon-0: 0px;
    --onyx-padding-icon-0_5x: 2px;
    --onyx-padding-text-0_5x: 2px;
    --onyx-padding-text-1x: 4px;

    /* Icon Weights (stroke-width) */
    --onyx-icon-weight-secondary: 1px;
    --onyx-icon-weight-main: 1.5px;
    --onyx-icon-weight-section: 2px;

    /* Z-index */
    --onyx-z-launcher: 9999;
    --onyx-z-widget: 10000;

    /* Transitions */
    --onyx-transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
    --onyx-transition-base: 200ms cubic-bezier(0.4, 0, 0.2, 1);
  }

  * {
    box-sizing: border-box;
  }
`;


================================================
FILE: widget/src/styles/widget-styles.ts
================================================
import { css } from "lit";

/**
 * Onyx Chat Widget - Component Styles
 * All styling for the main widget component
 */
export const widgetStyles = css`
  :host {
    display: block;
    font-family: var(--onyx-font-family);
  }

  .launcher {
    position: fixed;
    background: var(--background-neutral-00);
    bottom: 20px;
    right: 20px;
    width: 56px;
    height: 56px;
    border-radius: 50%;
    color: var(--text-light-05);
    border: none;
    cursor: pointer;
    box-shadow: var(--shadow-02);
    display: flex;
    align-items: center;
    justify-content: center;
    z-index: var(--onyx-z-launcher);
    transition:
      transform 200ms cubic-bezier(0.4, 0, 0.2, 1),
      box-shadow 200ms cubic-bezier(0.4, 0, 0.2, 1),
      background 200ms cubic-bezier(0.4, 0, 0.2, 1);
  }

  .launcher img {
    filter: drop-shadow(0px 1px 2px rgba(255, 255, 255, 0.3));
  }

  .launcher:hover {
    transform: translateY(-2px);
    background: var(--background-neutral-03);
    box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.2);
  }

  .launcher:active {
    transform: translateY(0px);
    box-shadow: var(--shadow-02);
  }

  .container {
    position: fixed;
    bottom: 20px;
    right: 20px;
    width: 400px;
    height: 600px;
    background: var(--background-neutral-00);
    border-radius: var(--onyx-radius-16);
    box-shadow: var(--shadow-02);
    display: flex;
    flex-direction: column;
    overflow: hidden;
    z-index: var(--onyx-z-widget);
    border: 1px solid var(--border-01);
    animation: fadeInSlideUp 300ms cubic-bezier(0.4, 0, 0.2, 1) forwards;
    opacity: 0;
    transform: translateY(20px);
  }

  @keyframes fadeInSlideUp {
    to {
      opacity: 1;
      transform: translateY(0);
    }
  }

  .container.inline {
    position: static;
    width: 100%;
    height: 100%;
    border-radius: var(--onyx-radius-08);
    animation: none;
    opacity: 1;
    transform: none;
  }

  .container.inline.compact {
    background: transparent;
    border: none;
    box-shadow: none;
    border-radius: var(--onyx-radius-16);
  }

  @media (max-width: 768px) {
    .container:not(.inline) {
      position: fixed;
      inset: 0;
      width: 100vw;
      height: 100vh;
      border-radius: 0;
      bottom: 0;
      right: 0;
    }
  }

  .header {
    display: flex;
    align-items: center;
    justify-content: space-between;
    padding: var(--onyx-space-md);
    background: var(--background-neutral-00);
    color: var(--text-04);
    border-bottom: 1px solid var(--border-01);
  }

  .header-left {
    display: flex;
    align-items: center;
    gap: var(--onyx-space-sm);
  }

  .header-right {
    display: flex;
    align-items: center;
    gap: var(--onyx-space-xs);
  }

  .avatar {
    width: 32px;
    height: 32px;
    border-radius: 50%;
    background: var(--background-neutral-00);
    display: flex;
    align-items: center;
    justify-content: center;
    font-size: 18px;
  }

  .header-title {
    font-weight: 600;
    font-size: var(--onyx-font-size-label);
    line-height: var(--onyx-line-height-label);
    color: var(--text-04);
  }

  .icon-button {
    background: none;
    border: none;
    color: var(--text-04);
    cursor: pointer;
    padding: var(--onyx-space-xs);
    border-radius: var(--onyx-radius-08);
    display: flex;
    align-items: center;
    justify-content: center;
    transition:
      background var(--onyx-transition-fast),
      color var(--onyx-transition-fast);
    font-size: 18px;
    width: 32px;
    height: 32px;
  }

  .icon-button:hover {
    background: var(--background-neutral-00);
    color: var(--text-04);
  }

  .messages {
    flex: 1;
    overflow-y: auto;
    padding: var(--onyx-space-md);
    display: flex;
    flex-direction: column;
    gap: var(--onyx-space-md);
    background: var(--background-neutral-00);
  }

  .message {
    display: flex;
    flex-direction: column;
    gap: var(--onyx-space-xs);
  }

  .message.user {
    align-items: flex-end;
  }

  .message.assistant {
    align-items: flex-start;
  }

  .message-bubble {
    max-width: 85%;
    padding: var(--onyx-space-sm) var(--onyx-space-md);
    border-radius: var(--onyx-radius-12);
    word-wrap: break-word;
    font-size: var(--onyx-font-size-main);
    line-height: var(--onyx-line-height-main);
  }

  .message.user .message-bubble {
    background: var(--onyx-user-message-bg);
    color: var(--text-04);
    border: 1px solid var(--border-01);
  }

  .message.assistant .message-bubble {
    background: var(--onyx-assistant-message-bg);
    color: var(--text-04);
    border: 1px solid var(--border-01);
  }

  /* Markdown styles */
  .message-bubble :first-child {
    margin-top: 0;
  }

  .message-bubble :last-child {
    margin-bottom: 0;
  }

  .message-bubble p {
    margin: 0.5em 0;
  }

  .message-bubble code {
    background: rgba(0, 0, 0, 0.08);
    padding: 2px 4px;
    border-radius: 3px;
    font-family: "Monaco", "Courier New", monospace;
    font-size: 0.9em;
  }

  .message-bubble pre {
    background: rgba(0, 0, 0, 0.08);
    padding: var(--onyx-space-sm);
    border-radius: var(--onyx-radius-sm);
    overflow-x: auto;
    margin: 0.5em 0;
  }

  .message-bubble pre code {
    background: none;
    padding: 0;
  }

  .message-bubble ul,
  .message-bubble ol {
    margin: 0.5em 0;
    padding-left: 1.5em;
  }

  .message-bubble li {
    margin: 0.25em 0;
  }

  .message-bubble a {
    color: var(--theme-primary-05);
    text-decoration: underline;
  }

  .message-bubble a:hover {
    text-decoration: none;
  }

  .message-bubble h1,
  .message-bubble h2,
  .message-bubble h3,
  .message-bubble h4,
  .message-bubble h5,
  .message-bubble h6 {
    margin: 0.5em 0 0.25em 0;
    font-weight: 600;
  }

  .message-bubble h1 {
    font-size: 1.5em;
  }
  .message-bubble h2 {
    font-size: 1.3em;
  }
  .message-bubble h3 {
    font-size: 1.1em;
  }

  .message-bubble blockquote {
    border-left: 3px solid var(--border-01);
    margin: 0.5em 0;
    padding-left: var(--onyx-space-md);
    color: var(--text-04);
  }

  .message-bubble strong {
    font-weight: 600;
  }

  .message-bubble em {
    font-style: italic;
  }

  .message-bubble hr {
    border: none;
    border-top: 1px solid var(--border-01);
    margin: 0.5em 0;
  }

  .status-container {
    display: flex;
    align-items: center;
    gap: var(--onyx-space-sm);
  }

  .typing-indicator {
    display: flex;
    gap: 4px;
  }

  .typing-dot {
    width: 8px;
    height: 8px;
    border-radius: 50%;
    background: var(--text-04);
    animation: typing 1.4s infinite;
  }

  .typing-dot:nth-child(2) {
    animation-delay: 0.2s;
  }

  .typing-dot:nth-child(3) {
    animation-delay: 0.4s;
  }

  @keyframes typing {
    0%,
    60%,
    100% {
      opacity: 0.3;
      transform: translateY(0);
    }
    30% {
      opacity: 1;
      transform: translateY(-4px);
    }
  }

  .status-text {
    color: var(--text-04);
    font-size: var(--onyx-font-size-sm);
    font-style: italic;
  }

  .input-wrapper {
    border-top: 1px solid var(--border-01);
    background: var(--background-neutral-00);
  }

  .input-container {
    padding: var(--onyx-space-md) var(--onyx-space-md) 4px;
    display: flex;
    align-items: center;
    gap: var(--onyx-space-xs);
  }

  .input {
    flex: 1;
    min-width: 0;
    padding: var(--onyx-space-xs) var(--onyx-space-sm);
    border: 1px solid var(--theme-primary-05);
    border-radius: var(--onyx-radius-08);
    font-size: var(--onyx-font-size-main);
    line-height: var(--onyx-line-height-main);
    outline: none;
    font-family: var(--onyx-font-family);
    background: var(--background-neutral-00);
    color: var(--text-04);
    transition:
      border-color var(--onyx-transition-fast),
      box-shadow var(--onyx-transition-fast);
    height: 36px;
  }

  .input:focus {
    border-color: var(--theme-primary-05);
    outline: 2px solid var(--theme-primary-05);
    outline-offset: -2px;
  }

  .powered-by {
    font-size: 10px;
    color: var(--text-04);
    opacity: 0.5;
    text-align: center;
    padding: 0 var(--onyx-space-md) var(--onyx-space-xs);
  }

  .powered-by a {
    color: var(--text-04);
    text-decoration: none;
    transition: opacity var(--onyx-transition-fast);
  }

  .powered-by a:hover {
    opacity: 0.8;
    text-decoration: underline;
  }

  .send-button {
    background: var(--theme-primary-05);
    border: none;
    color: var(--text-light-05);
    cursor: pointer;
    padding: var(--onyx-space-sm);
    border-radius: 50%;
    display: flex;
    align-items: center;
    justify-content: center;
    transition:
      background var(--onyx-transition-fast),
      transform var(--onyx-transition-fast);
    flex-shrink: 0;
    width: 36px;
    height: 36px;
  }

  .send-button svg {
    width: 18px;
    height: 18px;
  }

  .send-button:hover:not(:disabled) {
    background: var(--theme-primary-06);
    transform: scale(1.05);
  }

  .send-button:active:not(:disabled) {
    transform: scale(0.95);
  }

  .send-button:disabled {
    opacity: 0.5;
    cursor: not-allowed;
  }

  .disclaimer {
    padding: var(--onyx-space-xs) var(--onyx-space-md);
    background: var(--background-neutral-00);
    color: var(--text-04);
    font-size: 11px;
    line-height: 1.3;
    text-align: center;
    border-bottom: 1px solid var(--border-01);
  }

  .error {
    padding: var(--onyx-space-md);
    background: var(--status-error-01);
    color: var(--status-error-05);
    border-radius: var(--onyx-radius-08);
    margin: var(--onyx-space-md);
    font-size: var(--onyx-font-size-main);
  }

  /* Compact inline mode (no messages) */
  .container.compact {
    height: auto;
    min-height: unset;
    border: none;
    box-shadow: none;
    background: transparent;
  }

  .compact-input-container {
    display: flex;
    align-items: center;
    gap: var(--onyx-space-sm);
    padding: var(--onyx-space-md);
    background: var(--background-neutral-00);
    border-radius: var(--onyx-radius-16);
    border: 1px solid var(--border-01);
    box-shadow: var(--shadow-02);
    transition:
      border-color var(--onyx-transition-base),
      box-shadow var(--onyx-transition-base);
  }

  .compact-input-container:focus-within {
    border-color: var(--text-04);
    box-shadow:
      var(--shadow-02),
      0 0 0 3px var(--background-neutral-00);
  }

  .compact-avatar {
    width: 40px;
    height: 40px;
    border-radius: 50%;
    background: var(--background-neutral-00);
    display: flex;
    align-items: center;
    justify-content: center;
    flex-shrink: 0;
    color: var(--text-light-05);
    box-shadow: 0px 2px 8px rgba(0, 0, 0, 0.1);
  }

  .compact-input {
    flex: 1;
    min-width: 0;
    padding: var(--onyx-space-sm);
    border: none;
    font-size: var(--onyx-font-size-label);
    line-height: var(--onyx-line-height-label);
    outline: none;
    font-family: var(--onyx-font-family);
    background: transparent;
    color: var(--text-04);
    font-weight: 500;
  }

  .compact-input::placeholder {
    color: var(--text-04);
    font-weight: 400;
  }

  /* Inline citation superscripts */
  .message-bubble sup {
    font-size: 0.65em;
    color: var(--theme-primary-05);
    font-weight: 700;
    opacity: 0.5;
    cursor: default;
    letter-spacing: -0.02em;
  }

  /* Citation source row */
  .citation-list {
    display: flex;
    flex-wrap: wrap;
    align-items: stretch;
    gap: 6px;
    margin-top: 10px;
  }

  .citation-badge {
    display: inline-flex;
    align-items: center;
    gap: 5px;
    font-size: 12px;
    font-weight: 500;
    padding: 4px 10px 4px 8px;
    border-radius: var(--onyx-radius-08);
    background: var(--background-neutral-00);
    color: var(--text-04);
    text-decoration: none;
    cursor: pointer;
    border: 1px solid var(--border-01);
    transition:
      border-color 150ms ease,
      background 150ms ease;
    line-height: 1.2;
    font-family: var(--onyx-font-family);
  }

  .citation-badge .citation-num {
    font-size: 11px;
    font-weight: 600;
    color: var(--text-04);
    opacity: 0.45;
    flex-shrink: 0;
  }

  .citation-badge .citation-title {
    overflow: hidden;
    text-overflow: ellipsis;
    white-space: nowrap;
    max-width: 180px;
    font-size: 11px;
    opacity: 0.8;
    text-decoration: none;
  }

  a.citation-badge,
  a.citation-badge:visited,
  a.citation-badge:active,
  a.citation-badge:hover {
    text-decoration: none !important;
  }

  a.citation-badge:hover {
    border-color: var(--theme-primary-05);
    background: var(--background-neutral-03);
  }

  span.citation-badge {
    cursor: default;
  }

  .citation-more {
    display: inline-flex;
    align-items: center;
    font-size: 11px;
    font-weight: 500;
    padding: 4px 10px;
    border-radius: var(--onyx-radius-08);
    background: none;
    color: var(--text-04);
    opacity: 0.6;
    border: 1px dashed var(--border-01);
    cursor: pointer;
    font-family: var(--onyx-font-family);
    transition:
      opacity 150ms ease,
      border-color 150ms ease;
  }

  .citation-more:hover {
    opacity: 1;
    border-color: var(--theme-primary-05);
  }

  .citation-list.expanded .citation-more {
    display: none;
  }

  .citation-overflow {
    display: none;
    flex-wrap: wrap;
    gap: 6px;
    width: 100%;
  }

  .citation-list.expanded .citation-overflow {
    display: flex;
  }
`;


================================================
FILE: widget/src/types/api-types.ts
================================================
/**
 * API Types - Mirror backend streaming_models.py packet structure
 */

export interface Packet {
  placement?: Record<string, any>;
  obj: PacketType;
}

export type PacketType =
  | MessageResponseIDInfo
  | MessageStart
  | MessageDelta
  | CitationInfo
  | SearchToolStart
  | SearchToolQueriesDelta
  | SearchToolDocumentsDelta
  | OpenUrlStart
  | OpenUrlUrls
  | OpenUrlDocuments
  | ImageGenerationStart
  | ImageGenerationHeartbeat
  | PythonToolStart
  | PythonToolDelta
  | CustomToolStart
  | ReasoningStart
  | ReasoningDelta
  | DeepResearchPlanStart
  | ResearchAgentStart
  | IntermediateReportStart
  | Stop
  | OverallStop
  | ErrorPacket;

export interface MessageResponseIDInfo {
  type?: "message_response_id_info"; // Optional for backend compatibility
  user_message_id: number | null;
  reserved_assistant_message_id: number;
}

export interface MessageStart {
  type: "message_start";
}

export interface MessageDelta {
  type: "message_delta";
  content: string;
}

export interface CitationInfo {
  type: "citation_info";
  citation_number: number;
  document_id: string;
}

export interface ResolvedCitation {
  citation_number: number;
  document_id: string;
  semantic_identifier?: string;
  link?: string;
}

export interface SearchToolStart {
  type: "search_tool_start";
  is_internet_search?: boolean;
}

export interface SearchToolQueriesDelta {
  type: "search_tool_queries_delta";
  queries: string[];
}

export interface SearchToolDocumentsDelta {
  type: "search_tool_documents_delta";
  documents: SearchDocument[];
}

export interface SearchDocument {
  document_id: string;
  semantic_identifier: string;
  title: string;
  link?: string;
}

export interface OpenUrlStart {
  type: "open_url_start";
}

export interface OpenUrlUrls {
  type: "open_url_urls";
  urls: string[];
}

export interface OpenUrlDocuments {
  type: "open_url_documents";
  documents: SearchDocument[];
}

export interface ImageGenerationStart {
  type: "image_generation_start";
}

export interface ImageGenerationHeartbeat {
  type: "image_generation_heartbeat";
}

export interface PythonToolStart {
  type: "python_tool_start";
}

export interface PythonToolDelta {
  type: "python_tool_delta";
  code?: string;
}

export interface CustomToolStart {
  type: "custom_tool_start";
}

export interface ReasoningStart {
  type: "reasoning_start";
}

export interface ReasoningDelta {
  type: "reasoning_delta";
  reasoning: string;
}

export interface DeepResearchPlanStart {
  type: "deep_research_plan_start";
}

export interface ResearchAgentStart {
  type: "research_agent_start";
}

export interface IntermediateReportStart {
  type: "intermediate_report_start";
}

export interface Stop {
  type: "stop";
}

export interface OverallStop {
  type: "overall_stop";
}

export interface ErrorPacket {
  type: "error";
  exception: string;
}

export interface Message {
  id: string;
  role: "user" | "assistant";
  content: string;
  timestamp: number;
  isStreaming?: boolean;
  citations?: ResolvedCitation[];
}

export interface ChatSession {
  id: string;
  created_at: string;
  updated_at: string;
  messages: Message[];
}

export interface SendMessageRequest {
  message: string;
  chat_session_id?: string;
  parent_message_id?: number | null;
  origin?: string;
  include_citations?: boolean;
}

export interface CreateSessionRequest {
  persona_id?: number;
}

export interface CreateSessionResponse {
  chat_session_id: string;
}


================================================
FILE: widget/src/types/widget-types.ts
================================================
/**
 * Widget-specific types
 */

import { ResolvedCitation } from "@/types/api-types";

export interface WidgetConfig {
  // Required
  backendUrl: string;
  apiKey: string;

  // Optional - Assistant
  agentId?: number;
  agentName?: string;
  logo?: string;

  // Optional - Customization
  primaryColor?: string;
  backgroundColor?: string;
  textColor?: string;

  // Optional - Display
  mode?: "launcher" | "inline";

  // Optional - Citations
  includeCitations?: boolean;
}

export interface ChatState {
  sessionId?: string;
  messages: ChatMessage[];
  isLoading: boolean;
  error?: string;
}

export interface ChatMessage {
  id: string | number; // string for temporary local IDs, number for backend IDs
  role: "user" | "assistant";
  content: string;
  timestamp: number;
  isStreaming?: boolean;
  citations?: ResolvedCitation[];
}


================================================
FILE: widget/src/utils/storage.ts
================================================
/**
 * Session storage utilities
 */

import { ChatMessage } from "@/types/widget-types";

const SESSION_KEY = "onyx-widget-session";
const SESSION_TTL = 24 * 60 * 60 * 1000; // 24 hours

export interface StoredSession {
  sessionId: string;
  messages: ChatMessage[];
  timestamp: number;
}

/**
 * Save session to sessionStorage
 */
export function saveSession(sessionId: string, messages: ChatMessage[]): void {
  try {
    const session: StoredSession = {
      sessionId,
      messages,
      timestamp: Date.now(),
    };
    sessionStorage.setItem(SESSION_KEY, JSON.stringify(session));
  } catch (e) {
    console.warn("Failed to save session:", e);
  }
}

/**
 * Load session from sessionStorage
 * Returns null if session doesn't exist or has expired
 */
export function loadSession(): StoredSession | null {
  try {
    const data = sessionStorage.getItem(SESSION_KEY);
    if (!data) return null;

    const session: StoredSession = JSON.parse(data);

    // Check if session has expired
    if (Date.now() - session.timestamp > SESSION_TTL) {
      clearSession();
      return null;
    }

    return session;
  } catch (e) {
    console.warn("Failed to load session:", e);
    return null;
  }
}

/**
 * Clear session from sessionStorage
 */
export function clearSession(): void {
  try {
    sessionStorage.removeItem(SESSION_KEY);
  } catch (e) {
    console.warn("Failed to clear session:", e);
  }
}

/**
 * Check if a session exists
 */
export function hasSession(): boolean {
  try {
    const data = sessionStorage.getItem(SESSION_KEY);
    if (!data) return false;

    const session: StoredSession = JSON.parse(data);

    // Check if session has expired
    if (Date.now() - session.timestamp > SESSION_TTL) {
      clearSession();
      return false;
    }

    return true;
  } catch (e) {
    return false;
  }
}


================================================
FILE: widget/src/widget.ts
================================================
/**
 * Onyx Chat Widget - Main Component
 * Orchestrates launcher/inline modes and manages widget lifecycle
 */

import { LitElement, html, TemplateResult } from "lit";
import { customElement, property, state } from "lit/decorators.js";
import { unsafeHTML } from "lit/directives/unsafe-html.js";
import { marked } from "marked";
import DOMPurify from "dompurify";
import { WidgetConfig, ChatMessage } from "./types/widget-types";
import { SearchDocument, ResolvedCitation } from "./types/api-types";
import { resolveConfig } from "./config/config";
import { theme } from "./styles/theme";
import { widgetStyles } from "./styles/widget-styles";
import { ApiService } from "./services/api-service";
import { processPacket } from "./services/stream-parser";
import { saveSession, loadSession, clearSession } from "./utils/storage";
import { DEFAULT_LOGO } from "./assets/logo";

@customElement("onyx-chat-widget")
export class OnyxChatWidget extends LitElement {
  static styles = [theme, widgetStyles];

  // Configuration attributes
  @property({ attribute: "backend-url" }) backendUrl?: string;
  @property({ attribute: "api-key" }) apiKey?: string;
  @property({ attribute: "agent-id", type: Number }) agentId?: number;
  @property({ attribute: "primary-color" }) primaryColor?: string;
  @property({ attribute: "background-color" }) backgroundColor?: string;
  @property({ attribute: "text-color" }) textColor?: string;
  @property({ attribute: "agent-name" }) agentName?: string;
  @property({ attribute: "logo" }) logo?: string;
  @property() mode?: "launcher" | "inline";
  @property({ attribute: "include-citations", type: Boolean })
  includeCitations?: boolean;

  // Internal state
  @state() private isOpen = false;
  @state() private chatSessionId?: string;
  @state() private messages: ChatMessage[] = [];
  @state() private isLoading = false;
  @state() private isStreaming = false;
  @state() private streamingStatus = ""; // e.g., "Searching the web...", "Generating response..."
  @state() private error?: string;
  @state() private inputValue = "";

  private config!: WidgetConfig;
  private apiService!: ApiService;
  private abortController?: AbortController;
  // Citation state — plain fields (not @state) since Map mutations don't trigger Lit re-renders
  private documentMap = new Map<string, SearchDocument>();
  private citationMap = new Map<number, string>();

  constructor() {
    super();
    // Configure marked options
    marked.setOptions({
      breaks: true, // Convert \n to <br>
      gfm: true, // GitHub Flavored Markdown
    });
  }

  updated(changedProperties: Map<string, any>) {
    super.updated(changedProperties);

    // Auto-scroll when messages change or streaming status changes
    if (
      changedProperties.has("messages") ||
      changedProperties.has("isStreaming")
    ) {
      this.scrollToBottom();
    }
  }

  private scrollToBottom() {
    // Use requestAnimationFrame to ensure DOM is updated
    requestAnimationFrame(() => {
      const messagesContainer = this.shadowRoot?.querySelector(".messages");
      if (messagesContainer) {
        messagesContainer.scrollTop = messagesContainer.scrollHeight;
      }
    });
  }

  connectedCallback() {
    super.connectedCallback();

    // Resolve configuration
    this.config = resolveConfig({
      backendUrl: this.backendUrl,
      apiKey: this.apiKey,
      agentId: this.agentId,
      primaryColor: this.primaryColor,
      backgroundColor: this.backgroundColor,
      textColor: this.textColor,
      agentName: this.agentName,
      logo: this.logo,
      mode: this.mode,
      includeCitations: this.includeCitations,
    });

    // Apply custom colors
    this.applyCustomColors();

    // Initialize API service
    this.apiService = new ApiService(
      this.config.backendUrl,
      this.config.apiKey,
    );

    // Load persisted session
    const stored = loadSession();
    if (stored) {
      this.chatSessionId = stored.sessionId;
      this.messages = stored.messages;
    }

    // Auto-open if inline mode
    if (this.config.mode === "inline") {
      this.isOpen = true;
    }
  }

  private applyCustomColors() {
    // Primary color (buttons, accents)
    if (this.config.primaryColor) {
      this.style.setProperty("--theme-primary-05", this.config.primaryColor);
      this.style.setProperty(
        "--theme-primary-06",
        this.adjustBrightness(this.config.primaryColor, -10),
      );
    }

    // Background color
    if (this.config.backgroundColor) {
      this.style.setProperty(
        "--background-neutral-00",
        this.config.backgroundColor,
      );
      this.style.setProperty(
        "--background-neutral-03",
        this.adjustBrightness(this.config.backgroundColor, -10),
      );
    }

    // Text color
    if (this.config.textColor) {
      this.style.setProperty("--text-04", this.config.textColor);
    }
  }

  private adjustBrightness(color: string, percent: number): string {
    const num = parseInt(color.replace("#", ""), 16);
    const amt = Math.round(2.55 * percent);
    const R = (num >> 16) + amt;
    const G = ((num >> 8) & 0x00ff) + amt;
    const B = (num & 0x0000ff) + amt;
    return (
      "#" +
      (
        0x1000000 +
        (R < 255 ? (R < 1 ? 0 : R) : 255) * 0x10000 +
        (G < 255 ? (G < 1 ? 0 : G) : 255) * 0x100 +
        (B < 255 ? (B < 1 ? 0 : B) : 255)
      )
        .toString(16)
        .slice(1)
    );
  }

  /**
   * Public API: Reset conversation
   */
  public resetConversation() {
    // Abort any active streaming request first
    if (this.abortController) {
      this.abortController.abort();
      this.abortController = undefined;
    }

    this.messages = [];
    this.chatSessionId = undefined;
    this.error = undefined;
    this.inputValue = "";
    this.isStreaming = false;
    this.isLoading = false;
    this.streamingStatus = "";
    this.documentMap.clear();
    this.citationMap.clear();
    clearSession();
  }

  /**
   * Render markdown content safely.
   * Strips [[n]](url) citation links before markdown parsing so they render
   * as plain [n] text references. Citation badges are rendered separately.
   * Renumbers citations to sequential display numbers (1, 2, 3...).
   */
  private renderMarkdown(content: string, citations?: ResolvedCitation[]) {
    try {
      let stripped = content;
      if (this.config.includeCitations) {
        if (citations?.length) {
          // Build a map from backend citation number → sequential display number
          const displayMap = new Map<number, number>();
          citations.forEach((c, i) => displayMap.set(c.citation_number, i + 1));

          // Replace [[n]](url) with superscript-style display number
          stripped = stripped.replace(
            /\[\[(\d+)\]\]\([^)]*\)/g,
            (_match, num) => {
              const displayNum = displayMap.get(Number(num));
              return displayNum ? `<sup>[${displayNum}]</sup>` : "";
            },
          );
        } else {
          // Still streaming or no citations resolved yet — strip raw links
          stripped = stripped.replace(/\[\[(\d+)\]\]\([^)]*\)/g, "");
        }
      }
      const htmlContent = marked.parse(stripped, { async: false }) as string;
      const sanitizedHTML = DOMPurify.sanitize(htmlContent, {
        ADD_TAGS: ["sup"],
      });
      return unsafeHTML(sanitizedHTML);
    } catch (err) {
      console.error("Failed to parse markdown:", err);
      return content; // Fallback to plain text
    }
  }

  private static readonly CITATIONS_COLLAPSED_COUNT = 1;

  /**
   * Render a single citation badge.
   */
  private renderCitationBadge(
    c: ResolvedCitation,
    displayNum: number,
  ): TemplateResult {
    const title = c.semantic_identifier || "Source";
    const safeHref =
      c.link && /^https?:\/\//i.test(c.link) ? c.link : undefined;
    return safeHref
      ? html`<a
          class="citation-badge"
          href=${safeHref}
          target="_blank"
          rel="noopener noreferrer"
          title=${title}
          ><span class="citation-num">${displayNum}</span
          ><span class="citation-title">${title}</span></a
        >`
      : html`<span class="citation-badge" title=${title}
          ><span class="citation-num">${displayNum}</span
          ><span class="citation-title">${title}</span></span
        >`;
  }

  /**
   * Toggle expanded state for a citation list.
   */
  private toggleCitationExpand(e: Event): void {
    const container = (e.target as HTMLElement).closest(".citation-list");
    if (container) {
      container.classList.toggle("expanded");
    }
  }

  /**
   * Render citation badges for a message.
   * Shows first 3 inline, collapses the rest behind a "+N more" toggle.
   */
  private renderCitations(
    citations?: ResolvedCitation[],
  ): string | TemplateResult {
    if (!citations?.length) return "";
    const limit = OnyxChatWidget.CITATIONS_COLLAPSED_COUNT;
    const visible = citations.slice(0, limit);
    const overflow = citations.slice(limit);

    return html`
      <div class="citation-list">
        ${visible.map((c, i) => this.renderCitationBadge(c, i + 1))}
        ${overflow.length > 0
          ? html`
              <button class="citation-more" @click=${this.toggleCitationExpand}>
                +${overflow.length} more
              </button>
              <div class="citation-overflow">
                ${overflow.map((c, i) =>
                  this.renderCitationBadge(c, limit + i + 1),
                )}
              </div>
            `
          : ""}
      </div>
    `;
  }

  private toggleOpen() {
    this.isOpen = !this.isOpen;
  }

  private close() {
    if (this.config.mode === "launcher") {
      this.isOpen = false;
    }
  }

  private handleInput(e: InputEvent) {
    this.inputValue = (e.target as HTMLInputElement).value;
  }

  private handleKeyDown(e: KeyboardEvent) {
    if (e.key === "Enter" && !e.shiftKey) {
      e.preventDefault();
      this.sendMessage();
    }
  }

  private async sendMessage() {
    const message = this.inputValue.trim();
    if (!message || this.isLoading || this.isStreaming) return;

    // Clear input immediately
    this.inputValue = "";

    // Add user message
    const userMessage: ChatMessage = {
      id: `msg-${Date.now()}`,
      role: "user",
      content: message,
      timestamp: Date.now(),
    };
    this.messages = [...this.messages, userMessage];

    try {
      this.isStreaming = true;
      this.error = undefined;

      // Create session if needed
      if (!this.chatSessionId) {
        this.isLoading = true;
        this.chatSessionId = await this.apiService.createChatSession(
          this.config.agentId,
        );
        this.isLoading = false;
      }

      // Get parent message ID (last assistant message with a numeric ID from backend)
      const parentMessage = [...this.messages]
        .reverse()
        .find((m) => m.role === "assistant" && typeof m.id === "number");
      const parentMessageId =
        parentMessage && typeof parentMessage.id === "number"
          ? parentMessage.id
          : null;

      // Stream response
      this.abortController = new AbortController();
      let currentMessage: ChatMessage | null = null;
      let assistantMessageId: number | null = null;

      for await (const packet of this.apiService.streamMessage({
        message,
        chatSessionId: this.chatSessionId,
        parentMessageId,
        signal: this.abortController.signal,
        includeCitations: this.config.includeCitations,
      })) {
        const result = processPacket(packet, currentMessage);

        // Capture message IDs from backend and update local messages
        if (result.messageIds) {
          // Update user message ID if we got one
          if (result.messageIds.userMessageId !== null) {
            const userMsgIndex = this.messages.findIndex(
              (m) => m.id === userMessage.id,
            );
            if (userMsgIndex >= 0) {
              // Create new array to trigger reactivity
              const updatedMessage = {
                ...this.messages[userMsgIndex],
                id: result.messageIds.userMessageId,
              };
              this.messages = [
                ...this.messages.slice(0, userMsgIndex),
                updatedMessage,
                ...this.messages.slice(userMsgIndex + 1),
              ];
            }
          }
          // Store assistant message ID to apply when message is created
          assistantMessageId = result.messageIds.assistantMessageId;
        }

        // Update status if provided
        if (result.status !== undefined) {
          this.streamingStatus = result.status;
        }

        // Accumulate document metadata for citation resolution
        if (result.documents) {
          for (const doc of result.documents) {
            this.documentMap.set(doc.document_id, doc);
          }
        }

        // Accumulate citation mappings for the current message
        if (result.citation) {
          this.citationMap.set(
            result.citation.citation_number,
            result.citation.document_id,
          );
        }

        if (result.message) {
          // Reset per-message citation state when a new message starts
          if (
            result.message.isStreaming &&
            result.message.content === "" &&
            currentMessage === null
          ) {
            this.citationMap.clear();
          }

          currentMessage = result.message;

          // Apply the backend message ID if we have it and message doesn't have a numeric ID yet
          if (
            assistantMessageId !== null &&
            typeof currentMessage.id !== "number"
          ) {
            currentMessage.id = assistantMessageId;
          }

          // When message is complete, resolve citations and attach to message
          if (!currentMessage.isStreaming && this.citationMap.size > 0) {
            const resolved: ResolvedCitation[] = [];
            for (const [citNum, docId] of this.citationMap) {
              const doc = this.documentMap.get(docId);
              resolved.push({
                citation_number: citNum,
                document_id: docId,
                semantic_identifier: doc?.semantic_identifier,
                link: doc?.link ?? undefined,
              });
            }
            resolved.sort((a, b) => a.citation_number - b.citation_number);
            currentMessage = { ...currentMessage, citations: resolved };
          }

          // Update or add message
          const existingIndex = this.messages.findIndex(
            (m) => m.id === currentMessage?.id,
          );
          if (existingIndex >= 0) {
            this.messages = [
              ...this.messages.slice(0, existingIndex),
              currentMessage,
              ...this.messages.slice(existingIndex + 1),
            ];
          } else {
            this.messages = [...this.messages, currentMessage];
          }

          // Clear streaming state and persist when message is complete
          if (!currentMessage.isStreaming) {
            this.isStreaming = false;
            this.streamingStatus = "";
            saveSession(this.chatSessionId, this.messages);
          }
        }
      }
    } catch (err: any) {
      console.error("Failed to send message:", err);
      if (err.name !== "AbortError") {
        this.error = err.message || "Failed to send message";
      }
    } finally {
      this.isStreaming = false;
      this.isLoading = false;
      this.streamingStatus = "";
      this.abortController = undefined;
    }
  }

  render() {
    const showContainer = this.config.mode === "inline" || this.isOpen;
    const hasMessages = this.messages.length > 0 || this.isStreaming;
    const isCompactInline = this.config.mode === "inline" && !hasMessages;

    return html`
      ${this.config.mode === "launcher"
        ? html`
            <button
              class="launcher"
              @click=${this.toggleOpen}
              title="Open chat"
            >
              <img
                src="${this.config.logo || DEFAULT_LOGO}"
                alt="Logo"
                style="width: 32px; height: 32px; object-fit: contain;"
              />
            </button>
          `
        : ""}
      ${showContainer
        ? html`
            <div
              class="container ${this.config.mode === "inline"
                ? "inline"
                : ""} ${isCompactInline ? "compact" : ""}"
            >
              ${isCompactInline
                ? this.renderCompactInput()
                : html`
                    ${this.renderHeader()} ${this.renderMessages()}
                    ${this.renderInput()}
                  `}
            </div>
          `
        : ""}
    `;
  }

  private renderHeader() {
    return html`
      <div class="header">
        <div class="header-left">
          <div class="avatar">
            <img
              src="${this.config.logo || DEFAULT_LOGO}"
              alt="Logo"
              style="width: 100%; height: 100%; object-fit: contain;"
            />
          </div>
          <div class="header-title">
            ${this.config.agentName || "Assistant"}
          </div>
        </div>
        <div class="header-right">
          <button
            class="icon-button"
            @click=${this.resetConversation}
            title="Reset conversation"
          >
            <svg
              width="16"
              height="16"
              viewBox="0 0 16 16"
              fill="none"
              stroke="currentColor"
            >
              <path
                d="M14.448 3.10983V6.77746M14.448 6.77746H10.7803M14.448 6.77746L11.6117 4.11231C10.9547 3.45502 10.142 2.97486 9.24923 2.71664C8.35651 2.45842 7.41292 2.43055 6.50651 2.63564C5.6001 2.84072 4.76042 3.27208 4.06581 3.88945C3.3712 4.50683 2.84431 5.2901 2.53429 6.16618M1 12.8902V9.22254M1 9.22254H4.66763M1 9.22254L3.8363 11.8877C4.49326 12.545 5.30603 13.0251 6.19875 13.2834C7.09147 13.5416 8.03506 13.5694 8.94147 13.3644C9.84787 13.1593 10.6876 12.7279 11.3822 12.1105C12.0768 11.4932 12.6037 10.7099 12.9137 9.83381"
                stroke-width="1.5"
                stroke-linecap="round"
                stroke-linejoin="round"
              />
            </svg>
          </button>
          ${this.config.mode === "launcher"
            ? html`
                <button class="icon-button" @click=${this.close} title="Close">
                  <svg
                    width="16"
                    height="16"
                    viewBox="0 0 28 28"
                    fill="none"
                    stroke="currentColor"
                  >
                    <path
                      d="M21 7L7 21M7 7L21 21"
                      stroke-width="2"
                      stroke-linejoin="round"
                    />
                  </svg>
                </button>
              `
            : ""}
        </div>
      </div>
    `;
  }

  private renderMessages() {
    // Check if there's a streaming message with content
    const hasStreamingContent = this.messages.some(
      (m) => m.role === "assistant" && m.isStreaming && m.content.length > 0,
    );
    // Show ellipsis only when: streaming AND (has status text OR no content yet)
    const showEllipsis =
      this.isStreaming && (this.streamingStatus || !hasStreamingContent);

    return html`
      <div class="disclaimer">
        Responses are generated by AI and may be inaccurate
      </div>
      <div class="messages">
        ${this.error ? html` <div class="error">${this.error}</div> ` : ""}
        ${this.messages.map(
          (msg) => html`
            <div class="message ${msg.role}">
              <div class="message-bubble">
                ${msg.role === "assistant"
                  ? html`${this.renderMarkdown(
                      msg.content,
                      msg.citations,
                    )}${this.renderCitations(msg.citations)}`
                  : msg.content}
              </div>
            </div>
          `,
        )}
        ${showEllipsis
          ? html`
              <div class="message assistant">
                <div class="message-bubble">
                  <div class="status-container">
                    <div class="typing-indicator">
                      <div class="typing-dot"></div>
                      <div class="typing-dot"></div>
                      <div class="typing-dot"></div>
                    </div>
                    ${this.streamingStatus
                      ? html`
                          <span class="status-text"
                            >${this.streamingStatus}</span
                          >
                        `
                      : ""}
                  </div>
                </div>
              </div>
            `
          : ""}
      </div>
    `;
  }

  private renderInput() {
    return html`
      <div class="input-wrapper">
        <div class="input-container">
          <input
            class="input"
            type="text"
            .value=${this.inputValue}
            @input=${this.handleInput}
            @keydown=${this.handleKeyDown}
            placeholder="Type your message..."
            ?disabled=${this.isLoading || this.isStreaming}
          />
          <button
            class="send-button"
            @click=${this.sendMessage}
            ?disabled=${!this.inputValue.trim() ||
            this.isLoading ||
            this.isStreaming}
            title="Send message"
          >
            <svg
              width="20"
              height="20"
              viewBox="0 0 18 18"
              fill="none"
              stroke="currentColor"
            >
              <path
                d="M8 2.6665V13.3335M8 2.6665L4 6.6665M8 2.6665L12 6.6665"
                stroke-width="2"
                stroke-linecap="round"
                stroke-linejoin="round"
              />
            </svg>
          </button>
        </div>
        <div class="powered-by">
          Powered by
          <a
            href="https://onyx.app"
            target="_blank"
            rel="noopener noreferrer"
            style="text-decoration: underline;"
            >Onyx</a
          >
        </div>
      </div>
    `;
  }

  private renderCompactInput() {
    return html`
      <div class="compact-input-container">
        <div class="compact-avatar">
          <img
            src="${this.config.logo || DEFAULT_LOGO}"
            alt="Logo"
            style="width: 100%; height: 100%; object-fit: contain;"
          />
        </div>
        <input
          class="compact-input"
          type="text"
          .value=${this.inputValue}
          @input=${this.handleInput}
          @keydown=${this.handleKeyDown}
          placeholder="Ask ${this.config.agentName || "Assistant"} anything..."
          ?disabled=${this.isLoading || this.isStreaming}
        />
        <button
          class="send-button"
          @click=${this.sendMessage}
          ?disabled=${!this.inputValue.trim() ||
          this.isLoading ||
          this.isStreaming}
          title="Send message"
        >
          <svg
            width="18"
            height="18"
            viewBox="0 0 18 18"
            fill="none"
            stroke="currentColor"
          >
            <path
              d="M8 2.6665V13.3335M8 2.6665L4 6.6665M8 2.6665L12 6.6665"
              stroke-width="2"
              stroke-linecap="round"
              stroke-linejoin="round"
            />
          </svg>
        </button>
      </div>
    `;
  }
}

declare global {
  interface HTMLElementTagNameMap {
    "onyx-chat-widget": OnyxChatWidget;
  }
}


================================================
FILE: widget/tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "ES2020",
    "useDefineForClassFields": false,
    "experimentalDecorators": true,
    "lib": [
      "ES2020",
      "DOM",
      "DOM.Iterable"
    ],
    "types": ["vite/client"],
    "module": "ESNext",
    "skipLibCheck": true,
    "esModuleInterop": true,
    "allowSyntheticDefaultImports": true,

    /* Bundler mode */
    "moduleResolution": "bundler",
    "allowImportingTsExtensions": true,
    "resolveJsonModule": true,
    "isolatedModules": true,
    "noEmit": true,

    /* Linting */
    "strict": true,
    "noUnusedLocals": true,
    "noUnusedParameters": true,
    "noFallthroughCasesInSwitch": true,
    "baseUrl": ".",
    "paths": {
      "@/*": [
        "src/*"
      ]
    }
  },
  "include": [
    "src"
  ]
}


================================================
FILE: widget/vite.config.ts
================================================
import { defineConfig, loadEnv } from "vite";
import { resolve } from "path";

export default defineConfig(({ mode }) => {
  const isSelfHosted = mode === "self-hosted";

  // Load env file based on mode
  const env = loadEnv(mode, process.cwd(), "");

  return {
    resolve: {
      alias: {
        "@": resolve(__dirname, "./src"),
      },
    },
    build: {
      lib: {
        entry: resolve(__dirname, "src/index.ts"),
        name: "OnyxWidget",
        fileName: "onyx-widget",
        formats: ["es"],
      },
      rollupOptions: {
        output: {
          inlineDynamicImports: true,
        },
      },
      sourcemap: false,
      minify: "terser",
      terserOptions: {
        compress: {
          drop_console: true,
        },
      },
    },
    define: isSelfHosted
      ? {
          "import.meta.env.VITE_WIDGET_BACKEND_URL": JSON.stringify(
            env.VITE_WIDGET_BACKEND_URL,
          ),
          "import.meta.env.VITE_WIDGET_API_KEY": JSON.stringify(
            env.VITE_WIDGET_API_KEY,
          ),
        }
      : {},
  };
});